From ed5476bcd885b1003d79727ebd7f9ede6e7972d6 Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Mon, 3 Oct 2022 14:23:03 +0200 Subject: [PATCH 001/157] Add basic pre-commit hooks and run them --- .pre-commit-config.yaml | 11 + .readthedocs.yaml | 2 +- CHANGELOG.md | 2 +- CONTRIBUTING.md | 8 +- docs/source/_static/css/s5defs-roles.css | 2 +- docs/source/_static/imgs/manual_trainer.svg | 2 +- .../source/_static/imgs/model_categorical.svg | 2 +- .../_static/imgs/model_deterministic.svg | 2 +- docs/source/_static/imgs/model_gaussian.svg | 2 +- .../imgs/model_multivariate_gaussian.svg | 2 +- docs/source/_static/imgs/parallel_trainer.svg | 2 +- docs/source/_static/imgs/rl_schema.svg | 2 +- .../_static/imgs/sequential_trainer.svg | 2 +- .../imgs/utils_tensorboard_file_iterator.svg | 11056 ++++++++-------- docs/source/_static/imgs/wrapping.svg | 2 +- .../deepmind/dm_manipulation_stack_sac.py | 54 +- .../dm_suite_cartpole_swingup_ddpg.py | 10 +- docs/source/examples/gym/gym_cartpole_cem.py | 8 +- .../examples/gym/gym_cartpole_cem_eval.py | 8 +- docs/source/examples/gym/gym_cartpole_dqn.py | 12 +- .../examples/gym/gym_cartpole_dqn_eval.py | 12 +- .../gym/gym_frozen_lake_q_learning.py | 16 +- .../gym/gym_frozen_lake_q_learning_eval.py | 14 +- docs/source/examples/gym/gym_pendulum_ddpg.py | 10 +- .../examples/gym/gym_pendulum_ddpg_eval.py | 10 +- docs/source/examples/gym/gym_taxi_sarsa.py | 16 +- .../examples/gym/gym_taxi_sarsa_eval.py | 14 +- .../examples/gym/gym_vector_cartpole_dqn.py | 12 +- .../gym/gym_vector_frozen_lake_q_learning.py | 16 +- .../examples/gym/gym_vector_pendulum_ddpg.py | 10 +- .../examples/gym/gym_vector_taxi_sarsa.py | 16 +- docs/source/examples/isaacgym/amp_humanoid.py | 8 +- .../isaacgym_parallel_no_shared_memory.py | 30 +- ...isaacgym_parallel_no_shared_memory_eval.py | 28 +- .../isaacgym_sequential_no_shared_memory.py | 30 +- ...aacgym_sequential_no_shared_memory_eval.py | 28 +- .../isaacgym_sequential_shared_memory.py | 30 +- .../isaacgym_sequential_shared_memory_eval.py | 28 +- .../examples/isaacgym/ppo_allegro_hand.py | 16 +- docs/source/examples/isaacgym/ppo_ant.py | 10 +- docs/source/examples/isaacgym/ppo_anymal.py | 10 +- .../examples/isaacgym/ppo_anymal_terrain.py | 6 +- .../examples/isaacgym/ppo_ball_balance.py | 4 +- docs/source/examples/isaacgym/ppo_cartpole.py | 4 +- .../examples/isaacgym/ppo_cartpole_eval.py | 10 +- .../examples/isaacgym/ppo_franka_cabinet.py | 12 +- docs/source/examples/isaacgym/ppo_humanoid.py | 10 +- .../source/examples/isaacgym/ppo_ingenuity.py | 16 +- .../examples/isaacgym/ppo_quadcopter.py | 4 +- .../examples/isaacgym/ppo_shadow_hand.py | 10 +- .../source/examples/isaacgym/ppo_trifinger.py | 12 +- .../isaacsim/cartpole_example_skrl.py | 10 +- .../examples/omniisaacgym/ppo_allegro_hand.py | 10 +- docs/source/examples/omniisaacgym/ppo_ant.py | 10 +- .../examples/omniisaacgym/ppo_ant_mt.py | 12 +- .../examples/omniisaacgym/ppo_anymal.py | 10 +- .../omniisaacgym/ppo_anymal_terrain.py | 6 +- .../examples/omniisaacgym/ppo_ball_balance.py | 4 +- .../examples/omniisaacgym/ppo_cartpole.py | 4 +- .../examples/omniisaacgym/ppo_cartpole_mt.py | 4 +- .../examples/omniisaacgym/ppo_crazy_flie.py | 12 +- .../omniisaacgym/ppo_franka_cabinet.py | 12 +- .../examples/omniisaacgym/ppo_humanoid.py | 12 +- .../examples/omniisaacgym/ppo_ingenuity.py | 10 +- .../examples/omniisaacgym/ppo_quadcopter.py | 4 +- .../examples/omniisaacgym/ppo_shadow_hand.py | 12 +- .../reaching_franka_isaacgym_env.py | 104 +- .../reaching_franka_isaacgym_skrl_eval.py | 8 +- .../reaching_franka_isaacgym_skrl_train.py | 8 +- .../reaching_franka_omniverse_isaacgym_env.py | 128 +- ...ing_franka_omniverse_isaacgym_skrl_eval.py | 10 +- ...ng_franka_omniverse_isaacgym_skrl_train.py | 10 +- .../reaching_franka_real_env.py | 14 +- .../reaching_franka_real_skrl_eval.py | 10 +- .../utils/tensorboard_file_iterator.py | 2 +- docs/source/index.rst | 10 +- docs/source/intro/data.rst | 10 +- docs/source/intro/examples.rst | 204 +- docs/source/intro/getting_started.rst | 48 +- docs/source/intro/installation.rst | 12 +- docs/source/modules/skrl.agents.a2c.rst | 4 +- docs/source/modules/skrl.agents.amp.rst | 6 +- .../source/modules/skrl.agents.base_class.rst | 4 +- docs/source/modules/skrl.agents.cem.rst | 2 +- docs/source/modules/skrl.agents.ddpg.rst | 2 +- docs/source/modules/skrl.agents.ddqn.rst | 2 +- docs/source/modules/skrl.agents.dqn.rst | 4 +- docs/source/modules/skrl.agents.ppo.rst | 6 +- .../source/modules/skrl.agents.q_learning.rst | 2 +- docs/source/modules/skrl.agents.sac.rst | 2 +- docs/source/modules/skrl.agents.sarsa.rst | 2 +- docs/source/modules/skrl.agents.td3.rst | 6 +- docs/source/modules/skrl.agents.trpo.rst | 6 +- docs/source/modules/skrl.envs.isaac_gym.rst | 12 +- .../modules/skrl.envs.omniverse_isaac_gym.rst | 4 +- docs/source/modules/skrl.envs.wrapping.rst | 32 +- .../modules/skrl.memories.base_class.rst | 6 +- docs/source/modules/skrl.memories.random.rst | 4 +- .../source/modules/skrl.models.base_class.rst | 2 +- .../modules/skrl.models.categorical.rst | 2 +- .../modules/skrl.models.deterministic.rst | 2 +- docs/source/modules/skrl.models.gaussian.rst | 6 +- .../skrl.models.multivariate_gaussian.rst | 6 +- docs/source/modules/skrl.models.tabular.rst | 2 +- docs/source/modules/skrl.resources.noises.rst | 10 +- .../modules/skrl.resources.preprocessors.rst | 8 +- .../modules/skrl.resources.schedulers.rst | 4 +- .../modules/skrl.trainers.base_class.rst | 4 +- docs/source/modules/skrl.trainers.manual.rst | 2 +- .../source/modules/skrl.trainers.parallel.rst | 2 +- .../modules/skrl.trainers.sequential.rst | 2 +- .../modules/skrl.utils.isaacgym_utils.rst | 6 +- .../skrl.utils.model_instantiators.rst | 12 +- .../skrl.utils.omniverse_isaacgym_utils.rst | 2 +- .../modules/skrl.utils.postprocessing.rst | 10 +- docs/source/snippets/agent.py | 40 +- docs/source/snippets/categorical_model.py | 14 +- docs/source/snippets/deterministic_model.py | 14 +- docs/source/snippets/gaussian_model.py | 20 +- docs/source/snippets/isaacgym_utils.py | 8 +- docs/source/snippets/memory.py | 2 +- docs/source/snippets/model_mixin.py | 26 +- .../snippets/multivariate_gaussian_model.py | 20 +- docs/source/snippets/noise.py | 2 +- docs/source/snippets/shared_model.py | 2 +- docs/source/snippets/tabular_model.py | 10 +- docs/source/snippets/trainer.py | 8 +- docs/source/snippets/utils_postprocessing.py | 18 +- setup.py | 1 + skrl/agents/torch/__init__.py | 2 +- skrl/agents/torch/a2c/__init__.py | 2 +- skrl/agents/torch/a2c/a2c.py | 70 +- skrl/agents/torch/amp/__init__.py | 2 +- skrl/agents/torch/amp/amp.py | 102 +- skrl/agents/torch/base.py | 56 +- skrl/agents/torch/cem/__init__.py | 2 +- skrl/agents/torch/cem/cem.py | 62 +- skrl/agents/torch/ddpg/__init__.py | 2 +- skrl/agents/torch/ddpg/ddpg.py | 78 +- skrl/agents/torch/dqn/__init__.py | 2 +- skrl/agents/torch/dqn/ddqn.py | 70 +- skrl/agents/torch/dqn/dqn.py | 72 +- skrl/agents/torch/ppo/__init__.py | 2 +- skrl/agents/torch/ppo/ppo.py | 82 +- skrl/agents/torch/q_learning/__init__.py | 2 +- skrl/agents/torch/q_learning/q_learning.py | 49 +- skrl/agents/torch/sac/__init__.py | 2 +- skrl/agents/torch/sac/sac.py | 70 +- skrl/agents/torch/sarsa/__init__.py | 2 +- skrl/agents/torch/sarsa/sarsa.py | 51 +- skrl/agents/torch/td3/__init__.py | 2 +- skrl/agents/torch/td3/td3.py | 86 +- skrl/agents/torch/trpo/__init__.py | 2 +- skrl/agents/torch/trpo/trpo.py | 100 +- skrl/envs/torch/loaders.py | 56 +- skrl/memories/torch/__init__.py | 2 +- skrl/memories/torch/base.py | 42 +- skrl/memories/torch/prioritized.py | 6 +- skrl/memories/torch/random.py | 18 +- skrl/models/torch/base.py | 70 +- skrl/models/torch/categorical.py | 18 +- skrl/models/torch/deterministic.py | 17 +- skrl/models/torch/gaussian.py | 36 +- skrl/models/torch/multivariate_gaussian.py | 30 +- skrl/models/torch/tabular.py | 24 +- skrl/resources/noises/torch/base.py | 6 +- skrl/resources/noises/torch/gaussian.py | 4 +- .../noises/torch/ornstein_uhlenbeck.py | 18 +- .../torch/running_standard_scaler.py | 18 +- .../resources/schedulers/torch/kl_adaptive.py | 20 +- skrl/trainers/torch/manual.py | 2 +- skrl/utils/__init__.py | 6 +- skrl/utils/control.py | 11 +- skrl/utils/isaacgym_utils.py | 66 +- skrl/utils/model_instantiators.py | 178 +- skrl/utils/omniverse_isaacgym_utils.py | 88 +- skrl/utils/postprocessing.py | 18 +- skrl/version.txt | 2 +- tests/test_noises_gaussian.py | 2 +- tests/test_noises_ornstein_uhlenbeck.py | 2 +- 180 files changed, 7234 insertions(+), 7224 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..17d0cb62 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,11 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.3.0 + hooks: + - id: check-ast + - id: check-case-conflict + - id: check-docstring-first + - id: check-merge-conflict + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f8924d00..7aed1192 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -23,4 +23,4 @@ build: # Python requirements required to build your docs python: install: - - requirements: docs/requirements.txt \ No newline at end of file + - requirements: docs/requirements.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b66df84..6cd6e659 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -100,7 +100,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - More examples and contents in the documentation ### Fixed -- Clip actions using the whole space's limits +- Clip actions using the whole space's limits ## [0.2.0] - 2022-01-18 ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53004b68..1017b999 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -18,7 +18,7 @@ If you have a question, please do not open an issue for this. Instead, use the f Open an issue on [skrl's GitHub issues](https://github.com/Toni-SM/skrl/issues) and describe the bug. If possible, please provide some of the following items: - Minimum code that reproduces the bug... -- or the exact steps to reproduce it +- or the exact steps to reproduce it - The error log or a screenshot of it - A link to the source code of the library that you are using (some problems may be due to the use of older versions. If possible, always use the latest version) - Any other information that you think may be useful or help to reproduce/describe the problem @@ -31,7 +31,9 @@ There is a [board](https://github.com/users/Toni-SM/projects/2/views/8) containi - Try to **communicate your change first** to [discuss](https://github.com/Toni-SM/skrl/discussions) the implementation if you want to add a new feature or change an existing one - Modify only the minimum amount of code required and the files needed to make the change +- Use the provided [pre-commit](https://pre-commit.com/) hooks to format the code. Install it by running `pre-commit install` in the root of the repository, running it periodically using `pre-commit run --all` helps reducing commit errors - Changes that are cosmetic in nature (code formatting, removing whitespace, etc.) or that correct grammatical, spelling or typo errors, and that do not add anything substantial to the functionality of the library will generally not be accepted as a pull request + - The only exception are changes that results from the use of the pre-commit hooks #### Coding conventions @@ -51,7 +53,7 @@ Read the code a little bit and you will understand it at first glance... Also ```ini function annotation (e.g. typing) - # insert an empty line + # insert an empty line python libraries and other libraries (e.g. gym, numpy, time, etc.) # insert an empty line machine learning framework modules (e.g. torch, torch.nn) @@ -63,4 +65,4 @@ Read the code a little bit and you will understand it at first glance... Also Thank you once again, -Toni \ No newline at end of file +Toni diff --git a/docs/source/_static/css/s5defs-roles.css b/docs/source/_static/css/s5defs-roles.css index 05c5dfdb..00feb4b9 100644 --- a/docs/source/_static/css/s5defs-roles.css +++ b/docs/source/_static/css/s5defs-roles.css @@ -81,4 +81,4 @@ .purple { color: purple; -} \ No newline at end of file +} diff --git a/docs/source/_static/imgs/manual_trainer.svg b/docs/source/_static/imgs/manual_trainer.svg index 460497a3..a7218f5d 100755 --- a/docs/source/_static/imgs/manual_trainer.svg +++ b/docs/source/_static/imgs/manual_trainer.svg @@ -1 +1 @@ -step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions. . .. . ...............................Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)training / evaluationiteration \ No newline at end of file +step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions. . .. . ...............................Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)training / evaluationiteration diff --git a/docs/source/_static/imgs/model_categorical.svg b/docs/source/_static/imgs/model_categorical.svg index 8e48383e..d25f4c53 100644 --- a/docs/source/_static/imgs/model_categorical.svg +++ b/docs/source/_static/imgs/model_categorical.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)unnormalized_log_problog probabilities(logits)probabilities(probs)categoricaldistribution1 2 3 … n𝑃(𝑥)actions (𝒂𝒕+𝟏)log probevaluated at𝑎𝑡+1networkoutput \ No newline at end of file +inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)unnormalized_log_problog probabilities(logits)probabilities(probs)categoricaldistribution1 2 3 … n𝑃(𝑥)actions (𝒂𝒕+𝟏)log probevaluated at𝑎𝑡+1networkoutput diff --git a/docs/source/_static/imgs/model_deterministic.svg b/docs/source/_static/imgs/model_deterministic.svg index ee65e6d5..5b69af0a 100644 --- a/docs/source/_static/imgs/model_deterministic.svg +++ b/docs/source/_static/imgs/model_deterministic.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)actions (𝒂𝒕+𝟏)NoneNoneclip_actions \ No newline at end of file +inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)actions (𝒂𝒕+𝟏)NoneNoneclip_actions diff --git a/docs/source/_static/imgs/model_gaussian.svg b/docs/source/_static/imgs/model_gaussian.svg index 92fa89ca..475293ae 100755 --- a/docs/source/_static/imgs/model_gaussian.svg +++ b/docs/source/_static/imgs/model_gaussian.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)gaussiandistribution𝒩(𝜇,𝜎)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1mean actions (𝒂𝒕+𝟏)paramclip_log_stdclip_actionsreduction \ No newline at end of file +inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)gaussiandistribution𝒩(𝜇,𝜎)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1mean actions (𝒂𝒕+𝟏)paramclip_log_stdclip_actionsreduction diff --git a/docs/source/_static/imgs/model_multivariate_gaussian.svg b/docs/source/_static/imgs/model_multivariate_gaussian.svg index 19bd5771..043753f1 100644 --- a/docs/source/_static/imgs/model_multivariate_gaussian.svg +++ b/docs/source/_static/imgs/model_multivariate_gaussian.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)multivariategaussian distribution𝒩(𝜇,𝛴)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1mean actions (𝒂𝒕+𝟏)paramclip_log_stdclip_actions \ No newline at end of file +inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)multivariategaussian distribution𝒩(𝜇,𝛴)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1mean actions (𝒂𝒕+𝟏)paramclip_log_stdclip_actions diff --git a/docs/source/_static/imgs/parallel_trainer.svg b/docs/source/_static/imgs/parallel_trainer.svg index 07d82364..26c6a8db 100644 --- a/docs/source/_static/imgs/parallel_trainer.svg +++ b/docs/source/_static/imgs/parallel_trainer.svg @@ -1 +1 @@ -step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.post_interaction(…)agent.pre_interaction(…)agent.act(…)record transitionspost-interactionpre-interactioncompute actionsA0A1A2Ambarrier. . .. . .. . ...............................Execute each agent method in a different process and control synchronization through a multiprocessing.Barrierobjectscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏𝒔𝒕agentenvresetenv.reset(…)init \ No newline at end of file +step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.post_interaction(…)agent.pre_interaction(…)agent.act(…)record transitionspost-interactionpre-interactioncompute actionsA0A1A2Ambarrier. . .. . .. . ...............................Execute each agent method in a different process and control synchronization through a multiprocessing.Barrierobjectscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏𝒔𝒕agentenvresetenv.reset(…)init diff --git a/docs/source/_static/imgs/rl_schema.svg b/docs/source/_static/imgs/rl_schema.svg index aac7fe7a..0a45da66 100755 --- a/docs/source/_static/imgs/rl_schema.svg +++ b/docs/source/_static/imgs/rl_schema.svg @@ -1 +1 @@ -action (𝒂𝒕)state (𝒔𝒕+𝟏)reward (𝒓𝒕+𝟏)state (𝒔𝒕)reward (𝒓𝒕)agentenv17234586 \ No newline at end of file +action (𝒂𝒕)state (𝒔𝒕+𝟏)reward (𝒓𝒕+𝟏)state (𝒔𝒕)reward (𝒓𝒕)agentenv17234586 diff --git a/docs/source/_static/imgs/sequential_trainer.svg b/docs/source/_static/imgs/sequential_trainer.svg index 3495aa86..cce68bb5 100644 --- a/docs/source/_static/imgs/sequential_trainer.svg +++ b/docs/source/_static/imgs/sequential_trainer.svg @@ -1 +1 @@ -step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions. . .. . ...............................Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)init \ No newline at end of file +step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions. . .. . ...............................Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)init diff --git a/docs/source/_static/imgs/utils_tensorboard_file_iterator.svg b/docs/source/_static/imgs/utils_tensorboard_file_iterator.svg index ac016140..43925923 100644 --- a/docs/source/_static/imgs/utils_tensorboard_file_iterator.svg +++ b/docs/source/_static/imgs/utils_tensorboard_file_iterator.svg @@ -22,33 +22,33 @@ - - - - @@ -59,25 +59,25 @@ L 0 3.5 - @@ -87,8 +87,8 @@ z - @@ -100,17 +100,17 @@ L 126.67762 21.6 - @@ -123,8 +123,8 @@ z - @@ -136,28 +136,28 @@ L 179.220722 21.6 - @@ -170,8 +170,8 @@ z - @@ -183,36 +183,36 @@ L 231.763825 21.6 - @@ -225,8 +225,8 @@ z - @@ -238,21 +238,21 @@ L 284.306928 21.6 - @@ -265,8 +265,8 @@ z - @@ -278,28 +278,28 @@ L 336.850031 21.6 - @@ -312,8 +312,8 @@ z - @@ -325,34 +325,34 @@ L 389.393133 21.6 - @@ -365,8 +365,8 @@ z - @@ -378,13 +378,13 @@ L 441.936236 21.6 - @@ -397,8 +397,8 @@ z - @@ -410,43 +410,43 @@ L 494.479339 21.6 - @@ -461,156 +461,156 @@ z - - - - - - - @@ -629,14 +629,14 @@ z - - @@ -652,8 +652,8 @@ L -3.5 0 - @@ -672,8 +672,8 @@ L 515.454545 254.415099 - @@ -692,8 +692,8 @@ L 515.454545 199.265149 - @@ -712,8 +712,8 @@ L 515.454545 144.115199 - @@ -732,8 +732,8 @@ L 515.454545 88.965249 - @@ -754,120 +754,120 @@ L 515.454545 33.815299 - - - - - @@ -881,3574 +881,3574 @@ z - - - - - - - - - - - - - - - - - - - - - - @@ -4490,21 +4490,21 @@ z - - @@ -4512,86 +4512,86 @@ L 335.959233 189.492188 - - - - - @@ -4626,8 +4626,8 @@ z - @@ -4665,8 +4665,8 @@ L 335.959233 204.448438 - @@ -4704,8 +4704,8 @@ L 335.959233 219.404688 - @@ -4743,8 +4743,8 @@ L 335.959233 234.360938 - @@ -4782,8 +4782,8 @@ L 335.959233 249.317188 - @@ -4821,8 +4821,8 @@ L 335.959233 264.273438 - @@ -4860,8 +4860,8 @@ L 335.959233 279.229688 - @@ -4899,8 +4899,8 @@ L 335.959233 294.185938 - @@ -4941,1024 +4941,1024 @@ L 335.959233 309.142188 - - - @@ -5975,8 +5975,8 @@ L 627.879972 21.6 - @@ -5996,8 +5996,8 @@ L 680.423074 21.6 - @@ -6017,8 +6017,8 @@ L 732.966177 21.6 - @@ -6038,8 +6038,8 @@ L 785.50928 21.6 - @@ -6059,8 +6059,8 @@ L 838.052382 21.6 - @@ -6080,8 +6080,8 @@ L 890.595485 21.6 - @@ -6101,8 +6101,8 @@ L 943.138588 21.6 - @@ -6122,8 +6122,8 @@ L 995.681691 21.6 - @@ -6159,8 +6159,8 @@ L 1048.224793 21.6 - @@ -6177,8 +6177,8 @@ L 1069.2 309.619058 - @@ -6197,8 +6197,8 @@ L 1069.2 257.960781 - @@ -6217,8 +6217,8 @@ L 1069.2 206.302503 - @@ -6237,8 +6237,8 @@ L 1069.2 154.644225 - @@ -6257,8 +6257,8 @@ L 1069.2 102.985947 - @@ -6288,425 +6288,425 @@ L 1069.2 51.32767 - - - - - @@ -6762,21 +6762,21 @@ L 1069.2 21.6 - - @@ -6790,10 +6790,10 @@ L 1023.840625 294.742188 - diff --git a/docs/source/_static/imgs/wrapping.svg b/docs/source/_static/imgs/wrapping.svg index 02138b44..694f8c2f 100755 --- a/docs/source/_static/imgs/wrapping.svg +++ b/docs/source/_static/imgs/wrapping.svg @@ -1 +1 @@ -DeepMindIsaac GymGymnum_envs: intdevice: ML framework specific devicestate_space: gym.Spaceobservation_space: gym.Spaceaction_space: gym.Spacereset()-> observationsstep(actions)-> observations, rewards, dones, infosrender()close()propertiesmethodswrap_env()Omniverse \ No newline at end of file +DeepMindIsaac GymGymnum_envs: intdevice: ML framework specific devicestate_space: gym.Spaceobservation_space: gym.Spaceaction_space: gym.Spacereset()-> observationsstep(actions)-> observations, rewards, dones, infosrender()close()propertiesmethodswrap_env()Omniverse diff --git a/docs/source/examples/deepmind/dm_manipulation_stack_sac.py b/docs/source/examples/deepmind/dm_manipulation_stack_sac.py index 55cc071f..eb83fde9 100644 --- a/docs/source/examples/deepmind/dm_manipulation_stack_sac.py +++ b/docs/source/examples/deepmind/dm_manipulation_stack_sac.py @@ -42,30 +42,30 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, def compute(self, states, taken_actions, role): # The dm_control.manipulation tasks have as observation/state spec a `collections.OrderedDict` object as follows: - # OrderedDict([('front_close', BoundedArray(shape=(1, 84, 84, 3), dtype=dtype('uint8'), name='front_close', minimum=0, maximum=255)), - # ('jaco_arm/joints_pos', Array(shape=(1, 6, 2), dtype=dtype('float64'), name='jaco_arm/joints_pos')), - # ('jaco_arm/joints_torque', Array(shape=(1, 6), dtype=dtype('float64'), name='jaco_arm/joints_torque')), - # ('jaco_arm/joints_vel', Array(shape=(1, 6), dtype=dtype('float64'), name='jaco_arm/joints_vel')), - # ('jaco_arm/jaco_hand/joints_pos', Array(shape=(1, 3), dtype=dtype('float64'), name='jaco_arm/jaco_hand/joints_pos')), - # ('jaco_arm/jaco_hand/joints_vel', Array(shape=(1, 3), dtype=dtype('float64'), name='jaco_arm/jaco_hand/joints_vel')), - # ('jaco_arm/jaco_hand/pinch_site_pos', Array(shape=(1, 3), dtype=dtype('float64'), name='jaco_arm/jaco_hand/pinch_site_pos')), + # OrderedDict([('front_close', BoundedArray(shape=(1, 84, 84, 3), dtype=dtype('uint8'), name='front_close', minimum=0, maximum=255)), + # ('jaco_arm/joints_pos', Array(shape=(1, 6, 2), dtype=dtype('float64'), name='jaco_arm/joints_pos')), + # ('jaco_arm/joints_torque', Array(shape=(1, 6), dtype=dtype('float64'), name='jaco_arm/joints_torque')), + # ('jaco_arm/joints_vel', Array(shape=(1, 6), dtype=dtype('float64'), name='jaco_arm/joints_vel')), + # ('jaco_arm/jaco_hand/joints_pos', Array(shape=(1, 3), dtype=dtype('float64'), name='jaco_arm/jaco_hand/joints_pos')), + # ('jaco_arm/jaco_hand/joints_vel', Array(shape=(1, 3), dtype=dtype('float64'), name='jaco_arm/jaco_hand/joints_vel')), + # ('jaco_arm/jaco_hand/pinch_site_pos', Array(shape=(1, 3), dtype=dtype('float64'), name='jaco_arm/jaco_hand/pinch_site_pos')), # ('jaco_arm/jaco_hand/pinch_site_rmat', Array(shape=(1, 9), dtype=dtype('float64'), name='jaco_arm/jaco_hand/pinch_site_rmat'))]) # This spec is converted to a `gym.spaces.Dict` space by the `wrap_env` function as follows: - # Dict(front_close: Box(0, 255, (1, 84, 84, 3), uint8), - # jaco_arm/jaco_hand/joints_pos: Box(-inf, inf, (1, 3), float64), - # jaco_arm/jaco_hand/joints_vel: Box(-inf, inf, (1, 3), float64), - # jaco_arm/jaco_hand/pinch_site_pos: Box(-inf, inf, (1, 3), float64), - # jaco_arm/jaco_hand/pinch_site_rmat: Box(-inf, inf, (1, 9), float64), - # jaco_arm/joints_pos: Box(-inf, inf, (1, 6, 2), float64), - # jaco_arm/joints_torque: Box(-inf, inf, (1, 6), float64), + # Dict(front_close: Box(0, 255, (1, 84, 84, 3), uint8), + # jaco_arm/jaco_hand/joints_pos: Box(-inf, inf, (1, 3), float64), + # jaco_arm/jaco_hand/joints_vel: Box(-inf, inf, (1, 3), float64), + # jaco_arm/jaco_hand/pinch_site_pos: Box(-inf, inf, (1, 3), float64), + # jaco_arm/jaco_hand/pinch_site_rmat: Box(-inf, inf, (1, 9), float64), + # jaco_arm/joints_pos: Box(-inf, inf, (1, 6, 2), float64), + # jaco_arm/joints_torque: Box(-inf, inf, (1, 6), float64), # jaco_arm/joints_vel: Box(-inf, inf, (1, 6), float64)) - + # The `spaces` parameter is a flat tensor of the flattened observation/state space with shape (batch_size, size_of_flat_space). # Using the model's method `tensor_to_space` we can convert the flattened tensor to the original space. # https://skrl.readthedocs.io/en/latest/modules/skrl.models.base_class.html#skrl.models.torch.base.Model.tensor_to_space input = self.tensor_to_space(states, self.observation_space) - + # For this case, the `input` variable is a Python dictionary with the following structure and shapes: # {'front_close': torch.Tensor(shape=[batch_size, 1, 84, 84, 3], dtype=torch.float32), # 'jaco_arm/jaco_hand/joints_pos': torch.Tensor(shape=[batch_size, 1, 3], dtype=torch.float32) @@ -79,10 +79,10 @@ def compute(self, states, taken_actions, role): # permute and normalize the images (samples, width, height, channels) -> (samples, channels, width, height) features = self.features_extractor(input['front_close'][:,0].permute(0, 3, 1, 2) / 255.0) - return torch.tanh(self.net(torch.cat([features, - input["jaco_arm/joints_pos"].view(states.shape[0], -1), + return torch.tanh(self.net(torch.cat([features, + input["jaco_arm/joints_pos"].view(states.shape[0], -1), input["jaco_arm/joints_vel"].view(states.shape[0], -1)], dim=-1))), self.log_std_parameter - + class Critic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) @@ -107,15 +107,15 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.Linear(32, 1)) def compute(self, states, taken_actions, role): - # map the observations/states to the original space. + # map the observations/states to the original space. # See the explanation above (StochasticActor.compute) input = self.tensor_to_space(states, self.observation_space) - + # permute and normalize the images (samples, width, height, channels) -> (samples, channels, width, height) features = self.features_extractor(input['front_close'][:,0].permute(0, 3, 1, 2) / 255.0) - return self.net(torch.cat([features, - input["jaco_arm/joints_pos"].view(states.shape[0], -1), + return self.net(torch.cat([features, + input["jaco_arm/joints_pos"].view(states.shape[0], -1), input["jaco_arm/joints_vel"].view(states.shape[0], -1), taken_actions], dim=-1)) @@ -160,10 +160,10 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["checkpoint_interval"] = 5000 -agent_sac = SAC(models=models_sac, - memory=memory, - cfg=cfg_sac, - observation_space=env.observation_space, +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py b/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py index f0469fb3..95e35900 100644 --- a/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py +++ b/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py @@ -16,7 +16,7 @@ # Define the models (deterministic models) for the DDPG agent using mixins # and programming with two approaches (torch functional and torch.nn.Sequential class). # - Actor (policy): takes as input the environment's observation/state and returns an action -# - Critic: takes the state and action as input and provides a value to guide the policy +# - Critic: takes the state and action as input and provides a value to guide the policy class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) @@ -83,10 +83,10 @@ def compute(self, states, taken_actions, role): cfg_ddpg["experiment"]["write_interval"] = 1000 cfg_ddpg["experiment"]["checkpoint_interval"] = 5000 -agent_ddpg = DDPG(models=models_ddpg, - memory=memory, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_cartpole_cem.py b/docs/source/examples/gym/gym_cartpole_cem.py index ac78940b..b1bba349 100644 --- a/docs/source/examples/gym/gym_cartpole_cem.py +++ b/docs/source/examples/gym/gym_cartpole_cem.py @@ -66,10 +66,10 @@ def compute(self, states, taken_actions, role): cfg_cem["experiment"]["write_interval"] = 1000 cfg_cem["experiment"]["checkpoint_interval"] = 5000 -agent_cem = CEM(models=models_cem, - memory=memory, - cfg=cfg_cem, - observation_space=env.observation_space, +agent_cem = CEM(models=models_cem, + memory=memory, + cfg=cfg_cem, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_cartpole_cem_eval.py b/docs/source/examples/gym/gym_cartpole_cem_eval.py index 72a3633e..ab0b9d5e 100644 --- a/docs/source/examples/gym/gym_cartpole_cem_eval.py +++ b/docs/source/examples/gym/gym_cartpole_cem_eval.py @@ -57,10 +57,10 @@ def compute(self, states, taken_actions, role): cfg_cem["experiment"]["write_interval"] = 1000 cfg_cem["experiment"]["checkpoint_interval"] = 0 -agent_cem = CEM(models=models_cem, - memory=None, - cfg=cfg_cem, - observation_space=env.observation_space, +agent_cem = CEM(models=models_cem, + memory=None, + cfg=cfg_cem, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_cartpole_dqn.py b/docs/source/examples/gym/gym_cartpole_dqn.py index 98c7f6fe..1e574283 100644 --- a/docs/source/examples/gym/gym_cartpole_dqn.py +++ b/docs/source/examples/gym/gym_cartpole_dqn.py @@ -29,10 +29,10 @@ # DQN requires 2 models, visit its documentation for more details # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models models_dqn = {} -models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, +models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, action_space=env.action_space, device=device, - clip_actions=False, + clip_actions=False, input_shape=Shape.OBSERVATIONS, hiddens=[64, 64], hidden_activation=["relu", "relu"], @@ -66,10 +66,10 @@ cfg_dqn["experiment"]["write_interval"] = 1000 cfg_dqn["experiment"]["checkpoint_interval"] = 5000 -agent_dqn = DQN(models=models_dqn, - memory=memory, - cfg=cfg_dqn, - observation_space=env.observation_space, +agent_dqn = DQN(models=models_dqn, + memory=memory, + cfg=cfg_dqn, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_cartpole_dqn_eval.py b/docs/source/examples/gym/gym_cartpole_dqn_eval.py index 1cd9c90a..baa5902d 100644 --- a/docs/source/examples/gym/gym_cartpole_dqn_eval.py +++ b/docs/source/examples/gym/gym_cartpole_dqn_eval.py @@ -24,10 +24,10 @@ # DQN requires 2 models, visit its documentation for more details # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models models_dqn = {} -models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, +models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, action_space=env.action_space, device=device, - clip_actions=False, + clip_actions=False, input_shape=Shape.OBSERVATIONS, hiddens=[64, 64], hidden_activation=["relu", "relu"], @@ -45,10 +45,10 @@ cfg_dqn["experiment"]["write_interval"] = 1000 cfg_dqn["experiment"]["checkpoint_interval"] = 0 -agent_dqn = DQN(models=models_dqn, - memory=None, - cfg=cfg_dqn, - observation_space=env.observation_space, +agent_dqn = DQN(models=models_dqn, + memory=None, + cfg=cfg_dqn, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning.py b/docs/source/examples/gym/gym_frozen_lake_q_learning.py index c3141f38..c0289717 100644 --- a/docs/source/examples/gym/gym_frozen_lake_q_learning.py +++ b/docs/source/examples/gym/gym_frozen_lake_q_learning.py @@ -16,13 +16,13 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= TabularMixin.__init__(self, num_envs) self.epsilon = epsilon - self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - + def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + # choose random actions for exploration according to epsilon indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): @@ -55,15 +55,15 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy() cfg_q_learning["discount_factor"] = 0.999 -cfg_q_learning["alpha"] = 0.4 +cfg_q_learning["alpha"] = 0.4 # logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively cfg_q_learning["experiment"]["write_interval"] = 1600 cfg_q_learning["experiment"]["checkpoint_interval"] = 8000 agent_q_learning = Q_LEARNING(models=models_q_learning, - memory=None, - cfg=cfg_q_learning, - observation_space=env.observation_space, + memory=None, + cfg=cfg_q_learning, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py b/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py index bfdc9452..65c7680a 100644 --- a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py +++ b/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py @@ -16,13 +16,13 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= TabularMixin.__init__(self, num_envs) self.epsilon = epsilon - self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - + def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + # choose random actions for exploration according to epsilon indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): @@ -60,9 +60,9 @@ def compute(self, states, taken_actions, role): cfg_q_learning["experiment"]["checkpoint_interval"] = 0 agent_q_learning = Q_LEARNING(models=models_q_learning, - memory=None, - cfg=cfg_q_learning, - observation_space=env.observation_space, + memory=None, + cfg=cfg_q_learning, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_pendulum_ddpg.py b/docs/source/examples/gym/gym_pendulum_ddpg.py index 7ac71623..c1d089fd 100644 --- a/docs/source/examples/gym/gym_pendulum_ddpg.py +++ b/docs/source/examples/gym/gym_pendulum_ddpg.py @@ -15,7 +15,7 @@ # Define the models (deterministic models) for the DDPG agent using mixin # - Actor (policy): takes as input the environment's observation/state and returns an action -# - Critic: takes the state and action as input and provides a value to guide the policy +# - Critic: takes the state and action as input and provides a value to guide the policy class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) @@ -88,10 +88,10 @@ def compute(self, states, taken_actions, role): cfg_ddpg["experiment"]["write_interval"] = 300 cfg_ddpg["experiment"]["checkpoint_interval"] = 1500 -agent_ddpg = DDPG(models=models_ddpg, - memory=memory, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py b/docs/source/examples/gym/gym_pendulum_ddpg_eval.py index 350cc85c..e87c3283 100644 --- a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py +++ b/docs/source/examples/gym/gym_pendulum_ddpg_eval.py @@ -11,7 +11,7 @@ from skrl.envs.torch import wrap_env -# Define only the policy for evaluation +# Define only the policy for evaluation class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) @@ -56,10 +56,10 @@ def compute(self, states, taken_actions, role): cfg_ddpg["experiment"]["write_interval"] = 300 cfg_ddpg["experiment"]["checkpoint_interval"] = 0 -agent_ddpg = DDPG(models=models_ddpg, - memory=None, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=None, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_taxi_sarsa.py b/docs/source/examples/gym/gym_taxi_sarsa.py index 5f7ba8dd..28509dd3 100644 --- a/docs/source/examples/gym/gym_taxi_sarsa.py +++ b/docs/source/examples/gym/gym_taxi_sarsa.py @@ -16,13 +16,13 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= TabularMixin.__init__(self, num_envs) self.epsilon = epsilon - self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - + def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + # choose random actions for exploration according to epsilon indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): @@ -55,15 +55,15 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters cfg_sarsa = SARSA_DEFAULT_CONFIG.copy() cfg_sarsa["discount_factor"] = 0.999 -cfg_sarsa["alpha"] = 0.4 +cfg_sarsa["alpha"] = 0.4 # logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively cfg_sarsa["experiment"]["write_interval"] = 1600 cfg_sarsa["experiment"]["checkpoint_interval"] = 8000 agent_sarsa = SARSA(models=models_sarsa, - memory=None, - cfg=cfg_sarsa, - observation_space=env.observation_space, + memory=None, + cfg=cfg_sarsa, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_taxi_sarsa_eval.py b/docs/source/examples/gym/gym_taxi_sarsa_eval.py index 4cd64ac3..39ab5c24 100644 --- a/docs/source/examples/gym/gym_taxi_sarsa_eval.py +++ b/docs/source/examples/gym/gym_taxi_sarsa_eval.py @@ -16,13 +16,13 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= TabularMixin.__init__(self, num_envs) self.epsilon = epsilon - self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - + def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + # choose random actions for exploration according to epsilon indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): @@ -60,9 +60,9 @@ def compute(self, states, taken_actions, role): cfg_sarsa["experiment"]["checkpoint_interval"] = 0 agent_sarsa = SARSA(models=models_sarsa, - memory=None, - cfg=cfg_sarsa, - observation_space=env.observation_space, + memory=None, + cfg=cfg_sarsa, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_vector_cartpole_dqn.py b/docs/source/examples/gym/gym_vector_cartpole_dqn.py index 8e1ea656..56f4a0ef 100644 --- a/docs/source/examples/gym/gym_vector_cartpole_dqn.py +++ b/docs/source/examples/gym/gym_vector_cartpole_dqn.py @@ -29,10 +29,10 @@ # DQN requires 2 models, visit its documentation for more details # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models models_dqn = {} -models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, +models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, action_space=env.action_space, device=device, - clip_actions=False, + clip_actions=False, input_shape=Shape.OBSERVATIONS, hiddens=[64, 64], hidden_activation=["relu", "relu"], @@ -66,10 +66,10 @@ cfg_dqn["experiment"]["write_interval"] = 1000 cfg_dqn["experiment"]["checkpoint_interval"] = 5000 -agent_dqn = DQN(models=models_dqn, - memory=memory, - cfg=cfg_dqn, - observation_space=env.observation_space, +agent_dqn = DQN(models=models_dqn, + memory=memory, + cfg=cfg_dqn, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py b/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py index efc912be..d4fe3c3e 100644 --- a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py +++ b/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py @@ -16,13 +16,13 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= TabularMixin.__init__(self, num_envs) self.epsilon = epsilon - self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - + def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + # choose random actions for exploration according to epsilon indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): @@ -55,15 +55,15 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy() cfg_q_learning["discount_factor"] = 0.999 -cfg_q_learning["alpha"] = 0.4 +cfg_q_learning["alpha"] = 0.4 # logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively cfg_q_learning["experiment"]["write_interval"] = 1600 cfg_q_learning["experiment"]["checkpoint_interval"] = 8000 agent_q_learning = Q_LEARNING(models=models_q_learning, - memory=None, - cfg=cfg_q_learning, - observation_space=env.observation_space, + memory=None, + cfg=cfg_q_learning, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py b/docs/source/examples/gym/gym_vector_pendulum_ddpg.py index c6bf729f..7572c217 100644 --- a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py +++ b/docs/source/examples/gym/gym_vector_pendulum_ddpg.py @@ -15,7 +15,7 @@ # Define the models (deterministic models) for the DDPG agent using mixin # - Actor (policy): takes as input the environment's observation/state and returns an action -# - Critic: takes the state and action as input and provides a value to guide the policy +# - Critic: takes the state and action as input and provides a value to guide the policy class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) @@ -88,10 +88,10 @@ def compute(self, states, taken_actions, role): cfg_ddpg["experiment"]["write_interval"] = 1000 cfg_ddpg["experiment"]["checkpoint_interval"] = 1000 -agent_ddpg = DDPG(models=models_ddpg, - memory=memory, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/gym/gym_vector_taxi_sarsa.py b/docs/source/examples/gym/gym_vector_taxi_sarsa.py index 581489b2..eb79abcb 100644 --- a/docs/source/examples/gym/gym_vector_taxi_sarsa.py +++ b/docs/source/examples/gym/gym_vector_taxi_sarsa.py @@ -16,13 +16,13 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= TabularMixin.__init__(self, num_envs) self.epsilon = epsilon - self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - + def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + # choose random actions for exploration according to epsilon indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): @@ -55,15 +55,15 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters cfg_sarsa = SARSA_DEFAULT_CONFIG.copy() cfg_sarsa["discount_factor"] = 0.999 -cfg_sarsa["alpha"] = 0.4 +cfg_sarsa["alpha"] = 0.4 # logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively cfg_sarsa["experiment"]["write_interval"] = 1600 cfg_sarsa["experiment"]["checkpoint_interval"] = 8000 agent_sarsa = SARSA(models=models_sarsa, - memory=None, - cfg=cfg_sarsa, - observation_space=env.observation_space, + memory=None, + cfg=cfg_sarsa, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/amp_humanoid.py b/docs/source/examples/isaacgym/amp_humanoid.py index 11a3e580..3cbff0f8 100644 --- a/docs/source/examples/isaacgym/amp_humanoid.py +++ b/docs/source/examples/isaacgym/amp_humanoid.py @@ -33,7 +33,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(1024, 512), nn.ReLU(), nn.Linear(512, self.num_actions)) - + # set a fixed log standard deviation for the policy self.log_std_parameter = nn.Parameter(torch.full((self.num_actions,), fill_value=-2.9), requires_grad=False) @@ -127,9 +127,9 @@ def compute(self, states, taken_actions, role): cfg_amp["experiment"]["checkpoint_interval"] = 4000 agent = AMP(models=models_amp, - memory=memory, - cfg=cfg_amp, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_amp, + observation_space=env.observation_space, action_space=env.action_space, device=device, amp_observation_space=env.amp_observation_space, diff --git a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py index 6a1d8cf9..9c306802 100644 --- a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py +++ b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py @@ -112,7 +112,7 @@ def compute(self, states, taken_actions, role): model.init_parameters(method_name="normal_", mean=0.0, std=0.1) for model in models_sac.values(): model.init_parameters(method_name="normal_", mean=0.0, std=0.1) - + # Configure and instantiate the agent. # Only modify some of the default configuration, visit its documentation to see all the options @@ -149,32 +149,32 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["write_interval"] = 25 cfg_sac["experiment"]["checkpoint_interval"] = 1000 - agent_ddpg = DDPG(models=models_ddpg, - memory=memory_ddpg, - cfg=cfg_ddpg, - observation_space=env.observation_space, + agent_ddpg = DDPG(models=models_ddpg, + memory=memory_ddpg, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) - agent_td3 = TD3(models=models_td3, - memory=memory_td3, - cfg=cfg_td3, - observation_space=env.observation_space, + agent_td3 = TD3(models=models_td3, + memory=memory_td3, + cfg=cfg_td3, + observation_space=env.observation_space, action_space=env.action_space, device=device) - agent_sac = SAC(models=models_sac, - memory=memory_sac, - cfg=cfg_sac, - observation_space=env.observation_space, + agent_sac = SAC(models=models_sac, + memory=memory_sac, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) # Configure and instantiate the RL trainer and define the agent scopes cfg = {"timesteps": 8000, "headless": True} - trainer = ParallelTrainer(cfg=cfg, - env=env, + trainer = ParallelTrainer(cfg=cfg, + env=env, agents=[agent_ddpg, agent_td3, agent_sac], agents_scope=[100, 200, 212]) # agent scopes diff --git a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py index a935e9c9..0d311d76 100644 --- a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py +++ b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py @@ -90,24 +90,24 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["write_interval"] = 25 cfg_sac["experiment"]["checkpoint_interval"] = 0 - agent_ddpg = DDPG(models=models_ddpg, - memory=None, - cfg=cfg_ddpg, - observation_space=env.observation_space, + agent_ddpg = DDPG(models=models_ddpg, + memory=None, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) - agent_td3 = TD3(models=models_td3, - memory=None, - cfg=cfg_td3, - observation_space=env.observation_space, + agent_td3 = TD3(models=models_td3, + memory=None, + cfg=cfg_td3, + observation_space=env.observation_space, action_space=env.action_space, device=device) - agent_sac = SAC(models=models_sac, - memory=None, - cfg=cfg_sac, - observation_space=env.observation_space, + agent_sac = SAC(models=models_sac, + memory=None, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) @@ -119,8 +119,8 @@ def compute(self, states, taken_actions, role): # Configure and instantiate the RL trainer and define the agent scopes cfg = {"timesteps": 8000, "headless": True} - trainer = ParallelTrainer(cfg=cfg, - env=env, + trainer = ParallelTrainer(cfg=cfg, + env=env, agents=[agent_ddpg, agent_td3, agent_sac], agents_scope=[100, 200, 212]) # agent scopes diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py index 3cb0dc54..4cbe05a2 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py @@ -110,7 +110,7 @@ def compute(self, states, taken_actions, role): model.init_parameters(method_name="normal_", mean=0.0, std=0.1) for model in models_sac.values(): model.init_parameters(method_name="normal_", mean=0.0, std=0.1) - + # Configure and instantiate the agent. # Only modify some of the default configuration, visit its documentation to see all the options @@ -147,32 +147,32 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["write_interval"] = 25 cfg_sac["experiment"]["checkpoint_interval"] = 1000 -agent_ddpg = DDPG(models=models_ddpg, - memory=memory_ddpg, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=memory_ddpg, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_td3 = TD3(models=models_td3, - memory=memory_td3, - cfg=cfg_td3, - observation_space=env.observation_space, +agent_td3 = TD3(models=models_td3, + memory=memory_td3, + cfg=cfg_td3, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_sac = SAC(models=models_sac, - memory=memory_sac, - cfg=cfg_sac, - observation_space=env.observation_space, +agent_sac = SAC(models=models_sac, + memory=memory_sac, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) # Configure and instantiate the RL trainer and define the agent scopes cfg = {"timesteps": 8000, "headless": True} -trainer = SequentialTrainer(cfg=cfg, - env=env, +trainer = SequentialTrainer(cfg=cfg, + env=env, agents=[agent_ddpg, agent_td3, agent_sac], agents_scope=[100, 200, 212]) # agent scopes diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py index e775c3c7..1776e219 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py @@ -88,24 +88,24 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["write_interval"] = 25 cfg_sac["experiment"]["checkpoint_interval"] = 0 -agent_ddpg = DDPG(models=models_ddpg, - memory=None, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=None, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_td3 = TD3(models=models_td3, - memory=None, - cfg=cfg_td3, - observation_space=env.observation_space, +agent_td3 = TD3(models=models_td3, + memory=None, + cfg=cfg_td3, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_sac = SAC(models=models_sac, - memory=None, - cfg=cfg_sac, - observation_space=env.observation_space, +agent_sac = SAC(models=models_sac, + memory=None, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) @@ -117,8 +117,8 @@ def compute(self, states, taken_actions, role): # Configure and instantiate the RL trainer cfg = {"timesteps": 8000, "headless": True} -trainer = SequentialTrainer(cfg=cfg, - env=env, +trainer = SequentialTrainer(cfg=cfg, + env=env, agents=[agent_ddpg, agent_td3, agent_sac], agents_scope=[100, 200, 212]) diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py index ef93756e..5d0fe627 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py @@ -108,7 +108,7 @@ def compute(self, states, taken_actions, role): model.init_parameters(method_name="normal_", mean=0.0, std=0.1) for model in models_sac.values(): model.init_parameters(method_name="normal_", mean=0.0, std=0.1) - + # Configure and instantiate the agent. # Only modify some of the default configuration, visit its documentation to see all the options @@ -145,32 +145,32 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["write_interval"] = 25 cfg_sac["experiment"]["checkpoint_interval"] = 1000 -agent_ddpg = DDPG(models=models_ddpg, - memory=memory, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_td3 = TD3(models=models_td3, - memory=memory, - cfg=cfg_td3, - observation_space=env.observation_space, +agent_td3 = TD3(models=models_td3, + memory=memory, + cfg=cfg_td3, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_sac = SAC(models=models_sac, - memory=memory, - cfg=cfg_sac, - observation_space=env.observation_space, +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) # Configure and instantiate the RL trainer cfg = {"timesteps": 8000, "headless": True} -trainer = SequentialTrainer(cfg=cfg, - env=env, +trainer = SequentialTrainer(cfg=cfg, + env=env, agents=[agent_ddpg, agent_td3, agent_sac], agents_scope=[]) diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py index 209a5d1c..a8608b38 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py @@ -88,24 +88,24 @@ def compute(self, states, taken_actions, role): cfg_sac["experiment"]["write_interval"] = 25 cfg_sac["experiment"]["checkpoint_interval"] = 0 -agent_ddpg = DDPG(models=models_ddpg, - memory=None, - cfg=cfg_ddpg, - observation_space=env.observation_space, +agent_ddpg = DDPG(models=models_ddpg, + memory=None, + cfg=cfg_ddpg, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_td3 = TD3(models=models_td3, - memory=None, - cfg=cfg_td3, - observation_space=env.observation_space, +agent_td3 = TD3(models=models_td3, + memory=None, + cfg=cfg_td3, + observation_space=env.observation_space, action_space=env.action_space, device=device) -agent_sac = SAC(models=models_sac, - memory=None, - cfg=cfg_sac, - observation_space=env.observation_space, +agent_sac = SAC(models=models_sac, + memory=None, + cfg=cfg_sac, + observation_space=env.observation_space, action_space=env.action_space, device=device) @@ -117,8 +117,8 @@ def compute(self, states, taken_actions, role): # Configure and instantiate the RL trainer cfg = {"timesteps": 8000, "headless": True} -trainer = SequentialTrainer(cfg=cfg, - env=env, +trainer = SequentialTrainer(cfg=cfg, + env=env, agents=[agent_ddpg, agent_td3, agent_sac], agents_scope=[]) diff --git a/docs/source/examples/isaacgym/ppo_allegro_hand.py b/docs/source/examples/isaacgym/ppo_allegro_hand.py index 2214df9b..2938df7b 100644 --- a/docs/source/examples/isaacgym/ppo_allegro_hand.py +++ b/docs/source/examples/isaacgym/ppo_allegro_hand.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -53,9 +53,9 @@ def compute(self, states, taken_actions, role): # Load and wrap the Isaac Gym environment using the easy-to-use API from NVIDIA -env = isaacgymenvs.make(seed=seed, - task="AllegroHand", - num_envs=16384, +env = isaacgymenvs.make(seed=seed, + task="AllegroHand", + num_envs=16384, sim_device="cuda:0", rl_device="cuda:0", graphics_device_id=0, @@ -108,9 +108,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 2000 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_ant.py b/docs/source/examples/isaacgym/ppo_ant.py index 4bbc11f1..d1c061aa 100644 --- a/docs/source/examples/isaacgym/ppo_ant.py +++ b/docs/source/examples/isaacgym/ppo_ant.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 400 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_anymal.py b/docs/source/examples/isaacgym/ppo_anymal.py index 1de573e8..3a4dbc2f 100644 --- a/docs/source/examples/isaacgym/ppo_anymal.py +++ b/docs/source/examples/isaacgym/ppo_anymal.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1200 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_anymal_terrain.py b/docs/source/examples/isaacgym/ppo_anymal_terrain.py index f83e0c9f..bf55cf04 100644 --- a/docs/source/examples/isaacgym/ppo_anymal_terrain.py +++ b/docs/source/examples/isaacgym/ppo_anymal_terrain.py @@ -107,9 +107,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1800 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_ball_balance.py b/docs/source/examples/isaacgym/ppo_ball_balance.py index e3428f60..ffd29199 100644 --- a/docs/source/examples/isaacgym/ppo_ball_balance.py +++ b/docs/source/examples/isaacgym/ppo_ball_balance.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(64, 32), nn.ELU()) - + self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(32, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/isaacgym/ppo_cartpole.py b/docs/source/examples/isaacgym/ppo_cartpole.py index e3f8c253..b716259e 100644 --- a/docs/source/examples/isaacgym/ppo_cartpole.py +++ b/docs/source/examples/isaacgym/ppo_cartpole.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(32, 32), nn.ELU()) - + self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(32, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/isaacgym/ppo_cartpole_eval.py b/docs/source/examples/isaacgym/ppo_cartpole_eval.py index dde950f6..3123b6ee 100644 --- a/docs/source/examples/isaacgym/ppo_cartpole_eval.py +++ b/docs/source/examples/isaacgym/ppo_cartpole_eval.py @@ -24,10 +24,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(32, 32), nn.ELU()) - + self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(32, 1) def act(self, states, taken_actions, role): @@ -68,9 +68,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 0 agent = PPO(models=models_ppo, - memory=None, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_franka_cabinet.py b/docs/source/examples/isaacgym/ppo_franka_cabinet.py index 3fd9e834..383b5fa6 100644 --- a/docs/source/examples/isaacgym/ppo_franka_cabinet.py +++ b/docs/source/examples/isaacgym/ppo_franka_cabinet.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -77,7 +77,7 @@ def compute(self, states, taken_actions, role): cfg_ppo = PPO_DEFAULT_CONFIG.copy() cfg_ppo["rollouts"] = 16 # memory_size cfg_ppo["learning_epochs"] = 8 -cfg_ppo["mini_batches"] = 8 # 16 * 4096 / 8192 +cfg_ppo["mini_batches"] = 8 # 16 * 4096 / 8192 cfg_ppo["discount_factor"] = 0.99 cfg_ppo["lambda"] = 0.95 cfg_ppo["learning_rate"] = 5e-4 @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1200 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_humanoid.py b/docs/source/examples/isaacgym/ppo_humanoid.py index 19893771..e1405d84 100644 --- a/docs/source/examples/isaacgym/ppo_humanoid.py +++ b/docs/source/examples/isaacgym/ppo_humanoid.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(200, 100), nn.ELU()) - + self.mean_layer = nn.Linear(100, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(100, 1) def act(self, states, taken_actions, role): @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1600 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_ingenuity.py b/docs/source/examples/isaacgym/ppo_ingenuity.py index f60d46aa..f30466a0 100644 --- a/docs/source/examples/isaacgym/ppo_ingenuity.py +++ b/docs/source/examples/isaacgym/ppo_ingenuity.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -53,9 +53,9 @@ def compute(self, states, taken_actions, role): # Load and wrap the Isaac Gym environment using the easy-to-use API from NVIDIA -env = isaacgymenvs.make(seed=seed, - task="Ingenuity", - num_envs=4096, +env = isaacgymenvs.make(seed=seed, + task="Ingenuity", + num_envs=4096, sim_device="cuda:0", rl_device="cuda:0", graphics_device_id=0, @@ -108,9 +108,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 400 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_quadcopter.py b/docs/source/examples/isaacgym/ppo_quadcopter.py index 34ed47ed..4a168729 100644 --- a/docs/source/examples/isaacgym/ppo_quadcopter.py +++ b/docs/source/examples/isaacgym/ppo_quadcopter.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/isaacgym/ppo_shadow_hand.py b/docs/source/examples/isaacgym/ppo_shadow_hand.py index c7e8636a..9827ae04 100644 --- a/docs/source/examples/isaacgym/ppo_shadow_hand.py +++ b/docs/source/examples/isaacgym/ppo_shadow_hand.py @@ -35,10 +35,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -104,9 +104,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 2000 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacgym/ppo_trifinger.py b/docs/source/examples/isaacgym/ppo_trifinger.py index 87591dcf..65a27b17 100644 --- a/docs/source/examples/isaacgym/ppo_trifinger.py +++ b/docs/source/examples/isaacgym/ppo_trifinger.py @@ -35,10 +35,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -70,7 +70,7 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models models_ppo = {} models_ppo["policy"] = Shared(env.observation_space, env.action_space, device) -models_ppo["value"] = models_ppo["policy"] # same instance: shared model +models_ppo["value"] = models_ppo["policy"] # same instance: shared model # Configure and instantiate the agent. @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 8000 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/isaacsim/cartpole_example_skrl.py b/docs/source/examples/isaacsim/cartpole_example_skrl.py index d31c56e2..048fba58 100644 --- a/docs/source/examples/isaacsim/cartpole_example_skrl.py +++ b/docs/source/examples/isaacsim/cartpole_example_skrl.py @@ -1,4 +1,4 @@ -# Omniverse Isaac Sim tutorial: Creating New RL Environment +# Omniverse Isaac Sim tutorial: Creating New RL Environment # https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_new_rl_example.html # Instance of VecEnvBase and create the task @@ -75,7 +75,7 @@ def compute(self, states, taken_actions, role): # Initialize the models' parameters (weights and biases) using a Gaussian distribution for model in models_ppo.values(): - model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) # Configure and instantiate the agent. @@ -101,9 +101,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 10000 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py index b83b0933..cab3bf10 100644 --- a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py +++ b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 8000 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_ant.py b/docs/source/examples/omniisaacgym/ppo_ant.py index 4282b76d..2b005ab4 100644 --- a/docs/source/examples/omniisaacgym/ppo_ant.py +++ b/docs/source/examples/omniisaacgym/ppo_ant.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 400 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_ant_mt.py b/docs/source/examples/omniisaacgym/ppo_ant_mt.py index d3bdbc50..5c50d2db 100644 --- a/docs/source/examples/omniisaacgym/ppo_ant_mt.py +++ b/docs/source/examples/omniisaacgym/ppo_ant_mt.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -68,7 +68,7 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models models_ppo = {} models_ppo["policy"] = Shared(env.observation_space, env.action_space, device) -models_ppo["value"] = models_ppo["policy"] # same instance: shared model +models_ppo["value"] = models_ppo["policy"] # same instance: shared model # Configure and instantiate the agent. @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 400 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_anymal.py b/docs/source/examples/omniisaacgym/ppo_anymal.py index 72505e7e..0d73c249 100644 --- a/docs/source/examples/omniisaacgym/ppo_anymal.py +++ b/docs/source/examples/omniisaacgym/ppo_anymal.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1200 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py b/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py index 5b6aaf80..2982a5da 100644 --- a/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py +++ b/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py @@ -105,9 +105,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 4800 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_ball_balance.py b/docs/source/examples/omniisaacgym/ppo_ball_balance.py index f63dbe3a..92d92773 100644 --- a/docs/source/examples/omniisaacgym/ppo_ball_balance.py +++ b/docs/source/examples/omniisaacgym/ppo_ball_balance.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(64, 32), nn.ELU()) - + self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(32, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py index f091fdf9..a9387ab2 100644 --- a/docs/source/examples/omniisaacgym/ppo_cartpole.py +++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py @@ -29,10 +29,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(32, 32), nn.ELU()) - + self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(32, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py index 9838c6ca..d154c76a 100644 --- a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py +++ b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(32, 32), nn.ELU()) - + self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(32, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/omniisaacgym/ppo_crazy_flie.py b/docs/source/examples/omniisaacgym/ppo_crazy_flie.py index b8c16fd6..78b1b7af 100644 --- a/docs/source/examples/omniisaacgym/ppo_crazy_flie.py +++ b/docs/source/examples/omniisaacgym/ppo_crazy_flie.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Tanh(), nn.Linear(256, 128), nn.Tanh()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -75,7 +75,7 @@ def compute(self, states, taken_actions, role): cfg_ppo = PPO_DEFAULT_CONFIG.copy() cfg_ppo["rollouts"] = 16 # memory_size cfg_ppo["learning_epochs"] = 8 -cfg_ppo["mini_batches"] = 4 # 16 * 4096 / 16384 +cfg_ppo["mini_batches"] = 4 # 16 * 4096 / 16384 cfg_ppo["discount_factor"] = 0.99 cfg_ppo["lambda"] = 0.95 cfg_ppo["learning_rate"] = 1e-4 @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 800 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py b/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py index 310c2e2e..882fed2a 100644 --- a/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py +++ b/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(128, 64), nn.ELU()) - + self.mean_layer = nn.Linear(64, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(64, 1) def act(self, states, taken_actions, role): @@ -75,7 +75,7 @@ def compute(self, states, taken_actions, role): cfg_ppo = PPO_DEFAULT_CONFIG.copy() cfg_ppo["rollouts"] = 16 # memory_size cfg_ppo["learning_epochs"] = 8 -cfg_ppo["mini_batches"] = 8 # 16 * 4096 / 8192 +cfg_ppo["mini_batches"] = 8 # 16 * 4096 / 8192 cfg_ppo["discount_factor"] = 0.99 cfg_ppo["lambda"] = 0.95 cfg_ppo["learning_rate"] = 5e-4 @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1200 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_humanoid.py b/docs/source/examples/omniisaacgym/ppo_humanoid.py index cf45a8a1..1678a659 100644 --- a/docs/source/examples/omniisaacgym/ppo_humanoid.py +++ b/docs/source/examples/omniisaacgym/ppo_humanoid.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(200, 100), nn.ELU()) - + self.mean_layer = nn.Linear(100, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(100, 1) def act(self, states, taken_actions, role): @@ -66,7 +66,7 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models models_ppo = {} models_ppo["policy"] = Shared(env.observation_space, env.action_space, device) -models_ppo["value"] = models_ppo["policy"] # same instance: shared model +models_ppo["value"] = models_ppo["policy"] # same instance: shared model # Configure and instantiate the agent. @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 1600 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_ingenuity.py b/docs/source/examples/omniisaacgym/ppo_ingenuity.py index 345ec93c..e167bedc 100644 --- a/docs/source/examples/omniisaacgym/ppo_ingenuity.py +++ b/docs/source/examples/omniisaacgym/ppo_ingenuity.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -100,9 +100,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 320 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/omniisaacgym/ppo_quadcopter.py b/docs/source/examples/omniisaacgym/ppo_quadcopter.py index 2425c4f2..9eb8c88a 100644 --- a/docs/source/examples/omniisaacgym/ppo_quadcopter.py +++ b/docs/source/examples/omniisaacgym/ppo_quadcopter.py @@ -31,10 +31,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): diff --git a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py index 4908a92c..35b20ee4 100644 --- a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py +++ b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py @@ -33,10 +33,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(256, 128), nn.ELU()) - + self.mean_layer = nn.Linear(128, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + self.value_layer = nn.Linear(128, 1) def act(self, states, taken_actions, role): @@ -68,7 +68,7 @@ def compute(self, states, taken_actions, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models models_ppo = {} models_ppo["policy"] = Shared(env.observation_space, env.action_space, device) -models_ppo["value"] = models_ppo["policy"] # same instance: shared model +models_ppo["value"] = models_ppo["policy"] # same instance: shared model # Configure and instantiate the agent. @@ -102,9 +102,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 8000 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py index b4a19b7a..848edbb5 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_env.py @@ -13,43 +13,43 @@ from skrl.utils import isaacgym_utils -TASK_CFG = {"name": "ReachingFranka", - "physics_engine": "physx", - "rl_device": "cuda:0", - "sim_device": "cuda:0", - "graphics_device_id": 0, - "headless": False, - "virtual_screen_capture": False, +TASK_CFG = {"name": "ReachingFranka", + "physics_engine": "physx", + "rl_device": "cuda:0", + "sim_device": "cuda:0", + "graphics_device_id": 0, + "headless": False, + "virtual_screen_capture": False, "force_render": True, - "env": {"numEnvs": 1024, - "envSpacing": 1.5, - "episodeLength": 100, - "enableDebugVis": False, - "clipObservations": 1000.0, - "clipActions": 1.0, - "controlFrequencyInv": 4, - "actionScale": 2.5, - "dofVelocityScale": 0.1, + "env": {"numEnvs": 1024, + "envSpacing": 1.5, + "episodeLength": 100, + "enableDebugVis": False, + "clipObservations": 1000.0, + "clipActions": 1.0, + "controlFrequencyInv": 4, + "actionScale": 2.5, + "dofVelocityScale": 0.1, "controlSpace": "cartesian", - "enableCameraSensors": False}, + "enableCameraSensors": False}, "sim": {"dt": 0.0083, # 1 / 120 - "substeps": 1, - "up_axis": "z", - "use_gpu_pipeline": True, - "gravity": [0.0, 0.0, -9.81], - "physx": {"num_threads": 4, - "solver_type": 1, - "use_gpu": True, - "num_position_iterations": 4, - "num_velocity_iterations": 1, - "contact_offset": 0.005, - "rest_offset": 0.0, - "bounce_threshold_velocity": 0.2, - "max_depenetration_velocity": 1000.0, - "default_buffer_size_multiplier": 5.0, - "max_gpu_contact_pairs": 1048576, - "num_subscenes": 4, - "contact_collection": 0}}, + "substeps": 1, + "up_axis": "z", + "use_gpu_pipeline": True, + "gravity": [0.0, 0.0, -9.81], + "physx": {"num_threads": 4, + "solver_type": 1, + "use_gpu": True, + "num_position_iterations": 4, + "num_velocity_iterations": 1, + "contact_offset": 0.005, + "rest_offset": 0.0, + "bounce_threshold_velocity": 0.2, + "max_depenetration_velocity": 1000.0, + "default_buffer_size_multiplier": 5.0, + "max_gpu_contact_pairs": 1048576, + "num_subscenes": 4, + "contact_collection": 0}}, "task": {"randomize": False}} @@ -84,12 +84,12 @@ def __init__(self, cfg): self._end_effector_link = "panda_leftfinger" # setup VecTask - super().__init__(config=self.cfg, - rl_device=rl_device, - sim_device=sim_device, - graphics_device_id=graphics_device_id, - headless=headless, - virtual_screen_capture=virtual_screen_capture, + super().__init__(config=self.cfg, + rl_device=rl_device, + sim_device=sim_device, + graphics_device_id=graphics_device_id, + headless=headless, + virtual_screen_capture=virtual_screen_capture, force_render=force_render) # tensors and views: DOFs, roots, rigid bodies @@ -195,7 +195,7 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.handle_targets = [] self.handle_robots = [] self.handle_envs = [] - + indexes_sim_robot = [] indexes_sim_target = [] @@ -208,10 +208,10 @@ def _create_envs(self, num_envs, spacing, num_per_row): pose.p = gymapi.Vec3(0.0, 0.0, 0.0) pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1) - robot_actor = self.gym.create_actor(env=env_ptr, - asset=robot_asset, + robot_actor = self.gym.create_actor(env=env_ptr, + asset=robot_asset, pose=pose, - name="robot", + name="robot", group=i, # collision group filter=1, # mask off collision segmentationId=0) @@ -224,9 +224,9 @@ def _create_envs(self, num_envs, spacing, num_per_row): pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1) target_actor = self.gym.create_actor(env=env_ptr, - asset=target_asset, + asset=target_asset, pose=pose, - name="target", + name="target", group=i + 1, # collision group filter=1, # mask off collision segmentationId=1) @@ -240,7 +240,7 @@ def _create_envs(self, num_envs, spacing, num_per_row): self.indexes_sim_robot = torch.tensor(indexes_sim_robot, dtype=torch.int32, device=self.device) self.indexes_sim_target = torch.tensor(indexes_sim_target, dtype=torch.int32, device=self.device) - + self.num_robot_dofs = self.gym.get_asset_dof_count(robot_asset) self.rigid_body_dict_robot = self.gym.get_asset_rigid_body_dict(robot_asset) @@ -301,7 +301,7 @@ def reset_idx(self, env_ids): pos = torch.clamp(self.robot_default_dof_pos.unsqueeze(0) + 0.25 * (torch.rand((len(env_ids), self.num_robot_dofs), device=self.device) - 0.5), self.robot_dof_lower_limits, self.robot_dof_upper_limits) pos[:, 7:] = 0 - + self.robot_dof_targets[env_ids, :] = pos[:] self.dof_pos[env_ids, :] = pos[:] self.dof_vel[env_ids, :] = 0 @@ -309,14 +309,14 @@ def reset_idx(self, env_ids): indexes = self.indexes_sim_robot[env_ids] self.gym.set_dof_position_target_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.robot_dof_targets), - gymtorch.unwrap_tensor(indexes), + gymtorch.unwrap_tensor(indexes), len(env_ids)) self.gym.set_dof_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.dof_state), - gymtorch.unwrap_tensor(indexes), + gymtorch.unwrap_tensor(indexes), len(env_ids)) - + # reset targets pos = (torch.rand((len(env_ids), 3), device=self.device) - 0.5) * 2 pos[:, 0] = 0.50 + pos[:, 0] * 0.25 @@ -328,7 +328,7 @@ def reset_idx(self, env_ids): indexes = self.indexes_sim_target[env_ids] self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self.root_state), - gymtorch.unwrap_tensor(indexes), + gymtorch.unwrap_tensor(indexes), len(env_ids)) # bookkeeping diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py index cf30f686..60bfad69 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py @@ -11,7 +11,7 @@ from skrl.envs.torch import wrap_env -# Define only the policy for evaluation +# Define only the policy for evaluation class Policy(GaussianMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): @@ -68,9 +68,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 0 agent = PPO(models=models_ppo, - memory=None, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py index 841129aa..74290246 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py @@ -91,7 +91,7 @@ def compute(self, states, taken_actions, role): cfg_ppo = PPO_DEFAULT_CONFIG.copy() cfg_ppo["rollouts"] = 16 cfg_ppo["learning_epochs"] = 8 -cfg_ppo["mini_batches"] = 8 +cfg_ppo["mini_batches"] = 8 cfg_ppo["discount_factor"] = 0.99 cfg_ppo["lambda"] = 0.95 cfg_ppo["learning_rate"] = 5e-4 @@ -115,9 +115,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 250 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py index d530abc7..97a6460d 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py @@ -19,66 +19,66 @@ # - get_extras() -TASK_CFG = {"test": False, - "device_id": 0, +TASK_CFG = {"test": False, + "device_id": 0, "headless": True, - "sim_device": "gpu", - "task": {"name": "ReachingFranka", - "physics_engine": "physx", - "env": {"numEnvs": 1024, - "envSpacing": 1.5, - "episodeLength": 100, - "enableDebugVis": False, - "clipObservations": 1000.0, - "clipActions": 1.0, - "controlFrequencyInv": 4, - "actionScale": 2.5, - "dofVelocityScale": 0.1, - "controlSpace": "cartesian"}, + "sim_device": "gpu", + "task": {"name": "ReachingFranka", + "physics_engine": "physx", + "env": {"numEnvs": 1024, + "envSpacing": 1.5, + "episodeLength": 100, + "enableDebugVis": False, + "clipObservations": 1000.0, + "clipActions": 1.0, + "controlFrequencyInv": 4, + "actionScale": 2.5, + "dofVelocityScale": 0.1, + "controlSpace": "cartesian"}, "sim": {"dt": 0.0083, # 1 / 120 - "use_gpu_pipeline": True, - "gravity": [0.0, 0.0, -9.81], - "add_ground_plane": True, - "use_flatcache": True, - "enable_scene_query_support": False, - "enable_cameras": False, - "default_physics_material": {"static_friction": 1.0, - "dynamic_friction": 1.0, - "restitution": 0.0}, - "physx": {"worker_thread_count": 4, - "solver_type": 1, - "use_gpu": True, - "solver_position_iteration_count": 4, - "solver_velocity_iteration_count": 1, - "contact_offset": 0.005, - "rest_offset": 0.0, - "bounce_threshold_velocity": 0.2, - "friction_offset_threshold": 0.04, - "friction_correlation_distance": 0.025, - "enable_sleeping": True, - "enable_stabilization": True, - "max_depenetration_velocity": 1000.0, - "gpu_max_rigid_contact_count": 524288, - "gpu_max_rigid_patch_count": 33554432, - "gpu_found_lost_pairs_capacity": 524288, - "gpu_found_lost_aggregate_pairs_capacity": 262144, - "gpu_total_aggregate_pairs_capacity": 1048576, - "gpu_max_soft_body_contacts": 1048576, - "gpu_max_particle_contacts": 1048576, - "gpu_heap_capacity": 33554432, - "gpu_temp_buffer_capacity": 16777216, - "gpu_max_num_partitions": 8}, - "robot": {"override_usd_defaults": False, - "fixed_base": False, - "enable_self_collisions": False, - "enable_gyroscopic_forces": True, - "solver_position_iteration_count": 4, - "solver_velocity_iteration_count": 1, - "sleep_threshold": 0.005, - "stabilization_threshold": 0.001, - "density": -1, - "max_depenetration_velocity": 1000.0, - "contact_offset": 0.005, + "use_gpu_pipeline": True, + "gravity": [0.0, 0.0, -9.81], + "add_ground_plane": True, + "use_flatcache": True, + "enable_scene_query_support": False, + "enable_cameras": False, + "default_physics_material": {"static_friction": 1.0, + "dynamic_friction": 1.0, + "restitution": 0.0}, + "physx": {"worker_thread_count": 4, + "solver_type": 1, + "use_gpu": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "contact_offset": 0.005, + "rest_offset": 0.0, + "bounce_threshold_velocity": 0.2, + "friction_offset_threshold": 0.04, + "friction_correlation_distance": 0.025, + "enable_sleeping": True, + "enable_stabilization": True, + "max_depenetration_velocity": 1000.0, + "gpu_max_rigid_contact_count": 524288, + "gpu_max_rigid_patch_count": 33554432, + "gpu_found_lost_pairs_capacity": 524288, + "gpu_found_lost_aggregate_pairs_capacity": 262144, + "gpu_total_aggregate_pairs_capacity": 1048576, + "gpu_max_soft_body_contacts": 1048576, + "gpu_max_particle_contacts": 1048576, + "gpu_heap_capacity": 33554432, + "gpu_temp_buffer_capacity": 16777216, + "gpu_max_num_partitions": 8}, + "robot": {"override_usd_defaults": False, + "fixed_base": False, + "enable_self_collisions": False, + "enable_gyroscopic_forces": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "sleep_threshold": 0.005, + "stabilization_threshold": 0.001, + "density": -1, + "max_depenetration_velocity": 1000.0, + "contact_offset": 0.005, "rest_offset": 0.0}}}} @@ -118,7 +118,7 @@ def __init__(self, name, sim_config, env, offset=None) -> None: def set_up_scene(self, scene) -> None: self.get_robot() self.get_target() - + super().set_up_scene(scene) # robot view @@ -134,18 +134,18 @@ def set_up_scene(self, scene) -> None: # target view self._targets = XFormPrimView(prim_paths_expr="/World/envs/.*/target", name="target_view", reset_xform_properties=False) scene.add(self._targets) - + self.init_data() def get_robot(self): - robot = Robot(prim_path=self.default_zero_env_path + "/robot", - translation=torch.tensor([0.0, 0.0, 0.0]), + robot = Robot(prim_path=self.default_zero_env_path + "/robot", + translation=torch.tensor([0.0, 0.0, 0.0]), orientation=torch.tensor([1.0, 0.0, 0.0, 0.0]), name="robot") self._sim_config.apply_articulation_settings("robot", get_prim_at_path(robot.prim_path), self._sim_config.parse_actor_config("robot")) def get_target(self): - target = VisualSphere(prim_path=self.default_zero_env_path + "/target", + target = VisualSphere(prim_path=self.default_zero_env_path + "/target", name="target", radius=0.025, color=torch.tensor([1, 0, 0])) @@ -195,7 +195,7 @@ def pre_physics_step(self, actions) -> None: if self._control_space == "joint": targets = self.robot_dof_targets[:, :7] + self.robot_dof_speed_scales[:7] * self.dt * self.actions * self._action_scale - + elif self._control_space == "cartesian": goal_position = self.hand_pos + actions / 100.0 delta_dof_pos = omniverse_isaacgym_utils.ik(jacobian_end_effector=self.jacobians[:, 8 - 1, :, :7], # franka hand index: 8 diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py index f9283779..7d14c040 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py @@ -10,7 +10,7 @@ from skrl.envs.torch import wrap_env -# Define only the policy for evaluation +# Define only the policy for evaluation class Policy(GaussianMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): @@ -32,7 +32,7 @@ def compute(self, states, taken_actions, role): # instance VecEnvBase and setup task headless = True # set headless to False for rendering -env = get_env_instance(headless=headless) +env = get_env_instance(headless=headless) from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig from reaching_franka_sim_env import ReachingFrankaTask, TASK_CFG @@ -71,9 +71,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 0 agent = PPO(models=models_ppo, - memory=None, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py index b9a31852..15c34eb6 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py @@ -57,7 +57,7 @@ def compute(self, states, taken_actions, role): # instance VecEnvBase and setup task headless = True # set headless to False for rendering -env = get_env_instance(headless=headless) +env = get_env_instance(headless=headless) from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig from reaching_franka_sim_env import ReachingFrankaTask, TASK_CFG @@ -94,7 +94,7 @@ def compute(self, states, taken_actions, role): cfg_ppo = PPO_DEFAULT_CONFIG.copy() cfg_ppo["rollouts"] = 16 cfg_ppo["learning_epochs"] = 8 -cfg_ppo["mini_batches"] = 8 +cfg_ppo["mini_batches"] = 8 cfg_ppo["discount_factor"] = 0.99 cfg_ppo["lambda"] = 0.95 cfg_ppo["learning_rate"] = 5e-4 @@ -118,9 +118,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 250 agent = PPO(models=models_ppo, - memory=memory, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py index 8afb649d..06c0d20c 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_env.py @@ -17,7 +17,7 @@ def __init__(self, robot_ip="172.16.0.2", device="cuda:0", control_space="joint" self.motion_type = motion_type # waypoint or impedance if self.control_space == "cartesian" and self.motion_type == "impedance": - # The operation of this mode (Cartesian-impedance) was adjusted later without being able to test it on the real robot. + # The operation of this mode (Cartesian-impedance) was adjusted later without being able to test it on the real robot. # Dangerous movements may occur for the operator and the robot. # Comment the following line of code if you want to proceed with this mode. raise ValueError("See comment in the code to proceed with this mode") @@ -42,7 +42,7 @@ def __init__(self, robot_ip="172.16.0.2", device="cuda:0", control_space="joint" self.robot = frankx.Robot(robot_ip) self.robot.set_default_behavior() self.robot.recover_from_errors() - + # the robot's response can be better managed by independently setting the following properties, for example: # - self.robot.velocity_rel = 0.2 # - self.robot.acceleration_rel = 0.1 @@ -149,12 +149,12 @@ def reset(self): self.motion_thread.join() self.motion = None self.motion_thread = None - + # open/close gripper # self.gripper.open() # self.gripper.clamp() - # go to 1) safe position, 2) random position + # go to 1) safe position, 2) random position self.robot.move(frankx.JointMotion(self.robot_default_dof_pos.tolist())) dof_pos = self.robot_default_dof_pos + 0.25 * (np.random.rand(7) - 0.5) self.robot.move(frankx.JointMotion(dof_pos.tolist())) @@ -178,7 +178,7 @@ def reset(self): # initial pose affine = frankx.Affine(frankx.Kinematics.forward(dof_pos.tolist())) affine = affine * frankx.Affine(x=0, y=0, z=-0.10335, a=np.pi/2) - + # motion type if self.motion_type == "waypoint": self.motion = frankx.WaypointMotion([frankx.Waypoint(affine)], return_when_finished=False) @@ -186,7 +186,7 @@ def reset(self): self.motion = frankx.ImpedanceMotion(500, 50) else: raise ValueError("Invalid motion type:", self.motion_type) - + self.motion_thread = self.robot.move_async(self.motion) if self.motion_type == "impedance": self.motion.target = affine @@ -200,7 +200,7 @@ def reset(self): return observation else: return observation, {} - + def step(self, action): self.progress_buf += 1 diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py index 0d64487e..8c872d9b 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py @@ -9,7 +9,7 @@ from skrl.envs.torch import wrap_env -# Define only the policy for evaluation +# Define only the policy for evaluation class Policy(GaussianMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): @@ -34,7 +34,7 @@ def compute(self, states, taken_actions, role): control_space = "joint" # joint or cartesian motion_type = "waypoint" # waypoint or impedance -camera_tracking = False # True for USB-camera tracking +camera_tracking = False # True for USB-camera tracking env = ReachingFranka(robot_ip="172.16.0.2", device="cpu", @@ -67,9 +67,9 @@ def compute(self, states, taken_actions, role): cfg_ppo["experiment"]["checkpoint_interval"] = 0 agent = PPO(models=models_ppo, - memory=None, - cfg=cfg_ppo, - observation_space=env.observation_space, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, action_space=env.action_space, device=device) diff --git a/docs/source/examples/utils/tensorboard_file_iterator.py b/docs/source/examples/utils/tensorboard_file_iterator.py index e8ba7179..01ffb04c 100644 --- a/docs/source/examples/utils/tensorboard_file_iterator.py +++ b/docs/source/examples/utils/tensorboard_file_iterator.py @@ -8,7 +8,7 @@ rewards = [] # load the Tensorboard files and iterate over them (tag: "Reward / Total reward (mean)") -tensorboard_iterator = postprocessing.TensorboardFileIterator("runs/*/events.out.tfevents.*", +tensorboard_iterator = postprocessing.TensorboardFileIterator("runs/*/events.out.tfevents.*", tags=["Reward / Total reward (mean)"]) for dirname, data in tensorboard_iterator: rewards.append(data["Reward / Total reward (mean)"]) diff --git a/docs/source/index.rst b/docs/source/index.rst index 52ca475d..749144c1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -14,8 +14,8 @@ SKRL - Reinforcement Learning library (|version|) **skrl** is under **active continuous development**. Make sure you always have the latest version. Visit the `develop `_ branch or its `documentation `_ to access the latest updates to be released. -| **GitHub repository:** https://github.com/Toni-SM/skrl -| **Questions or discussions:** https://github.com/Toni-SM/skrl/discussions +| **GitHub repository:** https://github.com/Toni-SM/skrl +| **Questions or discussions:** https://github.com/Toni-SM/skrl/discussions **Citing skrl:** To cite this library (created at `Mondragon Unibertsitatea `_) use the following reference to its `article `_: *"skrl: Modular and Flexible Library for Reinforcement Learning"* @@ -157,7 +157,7 @@ Trainers :maxdepth: 1 :caption: Trainers :hidden: - + modules/skrl.trainers.base_class modules/skrl.trainers.sequential modules/skrl.trainers.parallel @@ -185,7 +185,7 @@ Resources :maxdepth: 2 :caption: Resources :hidden: - + modules/skrl.resources.noises modules/skrl.resources.schedulers modules/skrl.resources.preprocessors @@ -205,7 +205,7 @@ Utils :maxdepth: 1 :caption: Utils :hidden: - + modules/skrl.utils.utilities modules/skrl.utils.model_instantiators modules/skrl.utils.postprocessing diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst index eb09f06c..0ffbba8d 100644 --- a/docs/source/intro/data.rst +++ b/docs/source/intro/data.rst @@ -18,7 +18,7 @@ Each agent offers the following parameters under the :literal:`"experiment"` key DEFAULT_CONFIG = { ... - + "experiment": { "directory": "", # experiment's parent directory "experiment_name": "", # experiment name @@ -143,7 +143,7 @@ The checkpoint management, as in the previous case, is the responsibility of the DEFAULT_CONFIG = { ... - + "experiment": { "directory": "", # experiment's parent directory "experiment_name": "", # experiment name @@ -305,9 +305,9 @@ Memories can be automatically exported to files at each filling cycle (before da from skrl.memories.torch import RandomMemory # Instantiate a memory and enable its export - memory = RandomMemory(memory_size=16, - num_envs=env.num_envs, - device=device, + memory = RandomMemory(memory_size=16, + num_envs=env.num_envs, + device=device, export=True, export_format="pt", export_directory="./memories") diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index c15e6c92..5bed5d54 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -12,7 +12,7 @@ Examples
-Learning in an OpenAI Gym environment +Learning in an OpenAI Gym environment ------------------------------------- These examples perform the training of one agent in an OpenAI Gym environment (**one agent, one environment**) @@ -34,11 +34,11 @@ The following components or practices are exemplified (highlighted): - Load a checkpoint during evaluation: **Pendulum (DDPG)**, **CartPole (CEM)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** .. tabs:: - + .. tab:: Pendulum (DDPG) .. tabs:: - + .. group-tab:: Training :download:`gym_pendulum_ddpg.py <../examples/gym/gym_pendulum_ddpg.py>` @@ -48,7 +48,7 @@ The following components or practices are exemplified (highlighted): :emphasize-lines: 1, 13, 50-56 .. group-tab:: Evaluation - + :download:`gym_pendulum_ddpg_eval.py <../examples/gym/gym_pendulum_ddpg_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments @@ -86,19 +86,19 @@ The following components or practices are exemplified (highlighted): .. tab:: CartPole (DQN) .. tabs:: - + .. group-tab:: Training - + :download:`gym_cartpole_dqn.py <../examples/gym/gym_cartpole_dqn.py>` .. literalinclude:: ../examples/gym/gym_cartpole_dqn.py :language: python :emphasize-lines: 4, 31-51 - + .. group-tab:: Evaluation - + :download:`gym_cartpole_dqn_eval.py <../examples/gym/gym_cartpole_dqn_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -106,23 +106,23 @@ The following components or practices are exemplified (highlighted): .. literalinclude:: ../examples/gym/gym_cartpole_dqn_eval.py :language: python :emphasize-lines: 56 - + .. tab:: Taxi (SARSA) .. tabs:: - + .. group-tab:: Training - + :download:`gym_taxi_sarsa.py <../examples/gym/gym_taxi_sarsa.py>` .. literalinclude:: ../examples/gym/gym_taxi_sarsa.py :language: python :emphasize-lines: 6, 13-30 - + .. group-tab:: Evaluation - + :download:`gym_taxi_sarsa_eval.py <../examples/gym/gym_taxi_sarsa_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -130,23 +130,23 @@ The following components or practices are exemplified (highlighted): .. literalinclude:: ../examples/gym/gym_taxi_sarsa_eval.py :language: python :emphasize-lines: 70 - + .. tab:: FrozenLake (Q-learning) .. tabs:: - + .. group-tab:: Training - + :download:`gym_frozen_lake_q_learning.py <../examples/gym/gym_frozen_lake_q_learning.py>` .. literalinclude:: ../examples/gym/gym_frozen_lake_q_learning.py :language: python :emphasize-lines: 6, 13-30 - + .. group-tab:: Evaluation - + :download:`gym_frozen_lake_q_learning_eval.py <../examples/gym/gym_frozen_lake_q_learning_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -169,11 +169,11 @@ The following components or practices are exemplified (highlighted): - Load and wrap an OpenAI Gym vectorized environment: **Pendulum (DDPG)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** .. tabs:: - + .. tab:: Pendulum (DDPG) .. tabs:: - + .. group-tab:: Training :download:`gym_vector_pendulum_ddpg.py <../examples/gym/gym_vector_pendulum_ddpg.py>` @@ -185,33 +185,33 @@ The following components or practices are exemplified (highlighted): .. tab:: CartPole (DQN) .. tabs:: - + .. group-tab:: Training - + :download:`gym_vector_cartpole_dqn.py <../examples/gym/gym_vector_cartpole_dqn.py>` .. literalinclude:: ../examples/gym/gym_vector_cartpole_dqn.py :language: python :emphasize-lines: 1, 8, 13-19 - + .. tab:: Taxi (SARSA) .. tabs:: - + .. group-tab:: Training - + :download:`gym_vector_taxi_sarsa.py <../examples/gym/gym_vector_taxi_sarsa.py>` .. literalinclude:: ../examples/gym/gym_vector_taxi_sarsa.py :language: python :emphasize-lines: 1, 9, 35-41 - + .. tab:: FrozenLake (Q-learning) .. tabs:: - + .. group-tab:: Training - + :download:`gym_vector_frozen_lake_q_learning.py <../examples/gym/gym_vector_frozen_lake_q_learning.py>` .. literalinclude:: ../examples/gym/gym_vector_frozen_lake_q_learning.py @@ -242,11 +242,11 @@ The following components or practices are exemplified (highlighted): - Map the observation/state space (flat tensor) to the original environment space to be used by the model: **reach_site_vision (SAC)** .. tabs:: - + .. tab:: suite:cartpole (DDPG) .. tabs:: - + .. group-tab:: Training :download:`dm_suite_cartpole_swingup_ddpg.py <../examples/deepmind/dm_suite_cartpole_swingup_ddpg.py>` @@ -254,11 +254,11 @@ The following components or practices are exemplified (highlighted): .. literalinclude:: ../examples/deepmind/dm_suite_cartpole_swingup_ddpg.py :language: python :emphasize-lines: 1, 13, 50-51 - + .. tab:: manipulation:reach_site_vision (SAC) .. tabs:: - + .. group-tab:: Training :download:`dm_manipulation_stack_sac.py <../examples/deepmind/dm_manipulation_stack_sac.py>` @@ -333,13 +333,13 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 Isaac Gym environments implement a functionality to get their configuration from the command line. Because of this feature, setting the :literal:`headless` option from the trainer configuration will not work. In this case, it is necessary to invoke the scripts as follows: :literal:`python script.py headless=True` for Isaac Gym environments (preview 3 and preview 4) or :literal:`python script.py --headless` for Isaac Gym environments (preview 2) .. tabs:: - + .. tab:: Isaac Gym environments (training) .. tabs:: - + .. tab:: AllegroHand - + :download:`ppo_allegro_hand.py <../examples/isaacgym/ppo_allegro_hand.py>` .. literalinclude:: ../examples/isaacgym/ppo_allegro_hand.py @@ -347,7 +347,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 2, 19, 56-62 .. tab:: Ant - + :download:`ppo_ant.py <../examples/isaacgym/ppo_ant.py>` .. literalinclude:: ../examples/isaacgym/ppo_ant.py @@ -355,7 +355,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 13-14, 56-57 .. tab:: Anymal - + :download:`ppo_anymal.py <../examples/isaacgym/ppo_anymal.py>` .. literalinclude:: ../examples/isaacgym/ppo_anymal.py @@ -363,7 +363,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 13-14, 56-57 .. tab:: AnymalTerrain - + :download:`ppo_anymal_terrain.py <../examples/isaacgym/ppo_anymal_terrain.py>` .. literalinclude:: ../examples/isaacgym/ppo_anymal_terrain.py @@ -371,7 +371,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 11, 101-104 .. tab:: BallBalance - + :download:`ppo_ball_balance.py <../examples/isaacgym/ppo_ball_balance.py>` .. literalinclude:: ../examples/isaacgym/ppo_ball_balance.py @@ -379,7 +379,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 11, 96-99 .. tab:: Cartpole - + :download:`ppo_cartpole.py <../examples/isaacgym/ppo_cartpole.py>` .. literalinclude:: ../examples/isaacgym/ppo_cartpole.py @@ -387,7 +387,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 15, 19 .. tab:: Cartpole (TRPO) - + :download:`trpo_cartpole.py <../examples/isaacgym/trpo_cartpole.py>` .. literalinclude:: ../examples/isaacgym/trpo_cartpole.py @@ -395,7 +395,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 14, 18 .. tab:: FrankaCabinet - + :download:`ppo_franka_cabinet.py <../examples/isaacgym/ppo_franka_cabinet.py>` .. literalinclude:: ../examples/isaacgym/ppo_franka_cabinet.py @@ -403,7 +403,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 10, 84-85 .. tab:: Humanoid - + :download:`ppo_humanoid.py <../examples/isaacgym/ppo_humanoid.py>` .. literalinclude:: ../examples/isaacgym/ppo_humanoid.py @@ -411,7 +411,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 10, 84-85 .. tab:: Humanoid (AMP) - + :download:`amp_humanoid.py <../examples/isaacgym/amp_humanoid.py>` .. literalinclude:: ../examples/isaacgym/amp_humanoid.py @@ -419,7 +419,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 89, 124, 135, 138-139 .. tab:: Ingenuity - + :download:`ppo_ingenuity.py <../examples/isaacgym/ppo_ingenuity.py>` .. literalinclude:: ../examples/isaacgym/ppo_ingenuity.py @@ -427,7 +427,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 2, 19, 56-62 .. tab:: Quadcopter - + :download:`ppo_quadcopter.py <../examples/isaacgym/ppo_quadcopter.py>` .. literalinclude:: ../examples/isaacgym/ppo_quadcopter.py @@ -435,7 +435,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 95 .. tab:: ShadowHand - + :download:`ppo_shadow_hand.py <../examples/isaacgym/ppo_shadow_hand.py>` .. literalinclude:: ../examples/isaacgym/ppo_shadow_hand.py @@ -443,7 +443,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 97 .. tab:: Trifinger - + :download:`ppo_trifinger.py <../examples/isaacgym/ppo_trifinger.py>` .. literalinclude:: ../examples/isaacgym/ppo_trifinger.py @@ -453,11 +453,11 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 .. tab:: Isaac Gym environments (evaluation) .. tabs:: - + .. tab:: Cartpole - + :download:`ppo_cartpole_eval.py <../examples/isaacgym/ppo_cartpole_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -500,15 +500,15 @@ The following components or practices are exemplified (highlighted): .. note:: Isaac Gym environments implement a functionality to get their configuration from the command line. Because of this feature, setting the :literal:`headless` option from the trainer configuration will not work. In this case, it is necessary to invoke the scripts as follows: :literal:`python script.py headless=True` for Isaac Gym environments (preview 3 and preview 4) or :literal:`python script.py --headless` for Isaac Gym environments (preview 2) - + .. tabs:: - + .. tab:: Shared memory .. tabs:: - + .. tab:: Sequential training - + :download:`isaacgym_sequential_shared_memory.py <../examples/isaacgym/isaacgym_sequential_shared_memory.py>` .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_shared_memory.py @@ -516,9 +516,9 @@ The following components or practices are exemplified (highlighted): :emphasize-lines: 75, 149, 156, 163, 174-175 .. tab:: Sequential evaluation - + :download:`isaacgym_sequential_shared_memory_eval.py <../examples/isaacgym/isaacgym_sequential_shared_memory_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -530,9 +530,9 @@ The following components or practices are exemplified (highlighted): .. tab:: No shared memory .. tabs:: - + .. tab:: Sequential training - + :download:`isaacgym_sequential_no_shared_memory.py <../examples/isaacgym/isaacgym_sequential_no_shared_memory.py>` .. literalinclude:: ../examples/isaacgym/isaacgym_sequential_no_shared_memory.py @@ -540,7 +540,7 @@ The following components or practices are exemplified (highlighted): :emphasize-lines: 75-77, 151, 158, 165, 176-177 .. tab:: Parallel training - + :download:`isaacgym_parallel_no_shared_memory.py <../examples/isaacgym/isaacgym_parallel_no_shared_memory.py>` .. literalinclude:: ../examples/isaacgym/isaacgym_parallel_no_shared_memory.py @@ -548,9 +548,9 @@ The following components or practices are exemplified (highlighted): :emphasize-lines: 13, 67, 176-179 .. tab:: Sequential eval... - + :download:`isaacgym_sequential_no_shared_memory_eval.py <../examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -560,9 +560,9 @@ The following components or practices are exemplified (highlighted): :emphasize-lines: 113-115, 126 .. tab:: Parallel eval... - + :download:`isaacgym_parallel_no_shared_memory_eval.py <../examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py>` - + **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined @@ -641,15 +641,15 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 .. tabs:: .. tab:: AllegroHand - + :download:`ppo_allegro_hand.py <../examples/omniisaacgym/ppo_allegro_hand.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_allegro_hand.py :language: python :emphasize-lines: 11-12, 54-55 - + .. tab:: Ant - + :download:`ppo_ant.py <../examples/omniisaacgym/ppo_ant.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_ant.py @@ -657,15 +657,15 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 11-12, 54-55 .. tab:: Ant (multi-threaded) - + :download:`ppo_ant_mt.py <../examples/omniisaacgym/ppo_ant_mt.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_ant_mt.py :language: python :emphasize-lines: 1, 13-14, 56-57, 117, 121 - + .. tab:: Anymal - + :download:`ppo_anymal.py <../examples/omniisaacgym/ppo_anymal.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_anymal.py @@ -673,7 +673,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 11-12, 54-55 .. tab:: AnymalTerrain - + :download:`ppo_anymal_terrain.py <../examples/omniisaacgym/ppo_anymal_terrain.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_anymal_terrain.py @@ -689,7 +689,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 9, 94-97 .. tab:: Cartpole - + :download:`ppo_cartpole.py <../examples/omniisaacgym/ppo_cartpole.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole.py @@ -697,7 +697,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 13, 17 .. tab:: Cartpole (multi-threaded) - + :download:`ppo_cartpole_mt.py <../examples/omniisaacgym/ppo_cartpole_mt.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_cartpole_mt.py @@ -705,7 +705,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 1, 13-14, 54-55, 115, 119 .. tab:: Crazyflie - + :download:`ppo_crazy_flie.py <../examples/omniisaacgym/ppo_crazy_flie.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_crazy_flie.py @@ -713,7 +713,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 13, 17 .. tab:: FrankaCabinet - + :download:`ppo_franka_cabinet.py <../examples/omniisaacgym/ppo_franka_cabinet.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_franka_cabinet.py @@ -721,15 +721,15 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 8, 82-83 .. tab:: Humanoid - + :download:`ppo_humanoid.py <../examples/omniisaacgym/ppo_humanoid.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_humanoid.py :language: python :emphasize-lines: 8, 82-83 - + .. tab:: Ingenuity - + :download:`ppo_ingenuity.py <../examples/omniisaacgym/ppo_ingenuity.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_ingenuity.py @@ -737,7 +737,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 93 .. tab:: Quadcopter - + :download:`ppo_quadcopter.py <../examples/omniisaacgym/ppo_quadcopter.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_quadcopter.py @@ -745,7 +745,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 :emphasize-lines: 93 .. tab:: ShadowHand - + :download:`ppo_shadow_hand.py <../examples/omniisaacgym/ppo_shadow_hand.py>` .. literalinclude:: ../examples/omniisaacgym/ppo_shadow_hand.py @@ -770,7 +770,7 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment Use the steps described below to setup and launch the experiment after follow the tutorial .. code-block:: bash - + # download the sample code from GitHub in the directory containing the cartpole_task.py script wget https://raw.githubusercontent.com/Toni-SM/skrl/main/docs/source/examples/isaacsim/cartpole_example_skrl.py @@ -787,7 +787,7 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment :language: python .. tab:: Isaac Sim 2021.2.1 (JetBot) - + This example performs the training of an agent in the Isaac Sim's JetBot environment. The following components or practices are exemplified (highlighted): - Define and instantiate Convolutional Neural Networks (CNN) to learn from 128 X 128 RGB images @@ -797,12 +797,12 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment .. tabs:: .. tab:: Local workstation (setup) - + .. code-block:: bash # create a working directory and change to it - mkdir ~/.local/share/ov/pkg/isaac_sim-2021.2.1/standalone_examples/api/omni.isaac.jetbot/skrl_example - cd ~/.local/share/ov/pkg/isaac_sim-2021.2.1/standalone_examples/api/omni.isaac.jetbot/skrl_example + mkdir ~/.local/share/ov/pkg/isaac_sim-2021.2.1/standalone_examples/api/omni.isaac.jetbot/skrl_example + cd ~/.local/share/ov/pkg/isaac_sim-2021.2.1/standalone_examples/api/omni.isaac.jetbot/skrl_example # install the skrl library in editable mode from the working directory ~/.local/share/ov/pkg/isaac_sim-2021.2.1/python.sh -m pip install -e git+https://github.com/Toni-SM/skrl.git#egg=skrl @@ -821,7 +821,7 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment .. code-block:: bash # create a working directory and change to it - mkdir /isaac-sim/standalone_examples/api/omni.isaac.jetbot/skrl_example + mkdir /isaac-sim/standalone_examples/api/omni.isaac.jetbot/skrl_example cd /isaac-sim/standalone_examples/api/omni.isaac.jetbot/skrl_example # install the skrl library in editable mode from the working directory @@ -835,7 +835,7 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment # run the experiment /isaac-sim/python.sh isaacsim_jetbot_ppo.py - + .. raw:: html
@@ -860,13 +860,13 @@ These examples show basic real-world use cases to guide and support advanced RL .. raw:: html
- + **Implementation** (see details in the table below): * The observation space is composed of the episode's normalized progress, the robot joints' normalized positions (:math:`q`) in the interval -1 to 1, the robot joints' velocities (:math:`\dot{q}`) affected by a random uniform scale for generalization, and the target's position in space (:math:`target_{_{XYZ}}`) with respect to the robot's base - + * The action space, bounded in the range -1 to 1, consists of the following. For the joint control it's robot joints' position scaled change. For the Cartesian control it's the end-effector's position (:math:`ee_{_{XYZ}}`) scaled change. The end-effector position frame corresponds to the point where the left finger connects to the gripper base in simulation, whereas in the real world it corresponds to the end of the fingers. The gripper fingers remain closed all the time in both cases - + * The instantaneous reward is the negative value of the Euclidean distance (:math:`\text{d}`) between the robot end-effector and the target point position. The episode terminates when this distance is less than 0.035 meters in simulation (0.075 meters in real-world) or when the defined maximum timestep is reached * The target position lies within a rectangular cuboid of dimensions 0.5 x 0.5 x 0.2 meters centered at 0.5, 0.0, 0.2 meters with respect to the robot's base. The robot joints' positions are drawn from an initial configuration [0º, -45º, 0º, -135º, 0º, 90º, 45º] modified with uniform random values between -7º and 7º approximately @@ -878,7 +878,7 @@ These examples show basic real-world use cases to guide and support advanced RL - Formula / value - Size * - Observation space - - :math:`\dfrac{t}{t_{max}},\; 2 \dfrac{q - q_{min}}{q_{max} - q_{min}} - 1,\; 0.1\,\dot{q}\,U(0.5,1.5),\; target_{_{XYZ}}` + - :math:`\dfrac{t}{t_{max}},\; 2 \dfrac{q - q_{min}}{q_{max} - q_{min}} - 1,\; 0.1\,\dot{q}\,U(0.5,1.5),\; target_{_{XYZ}}` - 18 * - Action space (joint) - :math:`\dfrac{2.5}{120} \, \Delta q` @@ -888,13 +888,13 @@ These examples show basic real-world use cases to guide and support advanced RL - 3 * - Reward - :math:`-\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}})` - - + - * - Episode termination - - :math:`\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}}) \le 0.035 \quad` or :math:`\quad t \ge t_{max} - 1` - - + - :math:`\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}}) \le 0.035 \quad` or :math:`\quad t \ge t_{max} - 1` + - * - Maximum timesteps (:math:`t_{max}`) - 100 - - + - .. raw:: html @@ -1061,19 +1061,19 @@ Library utilities (skrl.utils module) This example shows how to use the library utilities to carry out the post-processing of files and data generated by the experiments .. tabs:: - + .. tab:: Tensorboard files - + .. image:: ../_static/imgs/utils_tensorboard_file_iterator.svg :width: 100% :alt: Tensorboard file iterator - + .. raw:: html

Example of a figure, generated by the code, showing the total reward (left) and the mean and standard deviation (right) of all experiments located in the runs folder - + :download:`tensorboard_file_iterator.py <../examples/utils/tensorboard_file_iterator.py>` **Note:** The code will load all the Tensorboard files of the experiments located in the :literal:`runs` folder. It is necessary to adjust the iterator's parameters for other paths diff --git a/docs/source/intro/getting_started.rst b/docs/source/intro/getting_started.rst index 112a80ef..ee60e249 100644 --- a/docs/source/intro/getting_started.rst +++ b/docs/source/intro/getting_started.rst @@ -14,7 +14,7 @@ Getting Started At each step (also called timestep) of interaction with the environment, the agent sees an observation :math:`o_t` of the complete description of the state :math:`s_t \in S` of the environment. Then, it decides which action :math:`a_t \in A` to take from the action space using a policy. The environment, which changes in response to the agent's action (or by itself), returns a reward signal :math:`r_t = R(s_t, a_t, s_{t+1})` as a measure of how good or bad the action was that moved it to its new state :math:`s_{t+1}`. The agent aims to maximize the cumulative reward (discounted or not by a factor :math:`\gamma \in (0,1]`) by adjusting the policy's behaviour via some optimization algorithm. -**From this schema, this section is intended to guide in the creation of a RL system using skrl**. Visit the :ref:`Examples ` section for training and evaluation demonstrations with different environment interfaces and highlighted practices, among others. +**From this schema, this section is intended to guide in the creation of a RL system using skrl**. Visit the :ref:`Examples ` section for training and evaluation demonstrations with different environment interfaces and highlighted practices, among others. 1. Environments --------------- @@ -69,7 +69,7 @@ Within the methods and properties defined in the wrapped environment, the observ .. tabs:: .. tab:: Preview 4 (isaacgymenvs.make) - + .. code-block:: python import isaacgymenvs @@ -78,9 +78,9 @@ Within the methods and properties defined in the wrapped environment, the observ from skrl.envs.torch import wrap_env # create/load the environment using the easy-to-use API from NVIDIA - env = isaacgymenvs.make(seed=0, - task="Cartpole", - num_envs=512, + env = isaacgymenvs.make(seed=0, + task="Cartpole", + num_envs=512, sim_device="cuda:0", rl_device="cuda:0", graphics_device_id=0, @@ -90,7 +90,7 @@ Within the methods and properties defined in the wrapped environment, the observ env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview4")' .. tab:: Preview 4 - + .. code-block:: python # import the environment wrapper and loader @@ -104,7 +104,7 @@ Within the methods and properties defined in the wrapped environment, the observ env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview4")' .. tab:: Preview 3 - + .. code-block:: python # import the environment wrapper and loader @@ -118,7 +118,7 @@ Within the methods and properties defined in the wrapped environment, the observ env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview3")' .. tab:: Preview 2 - + .. code-block:: python # import the environment wrapper and loader @@ -132,7 +132,7 @@ Within the methods and properties defined in the wrapped environment, the observ env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview2")' .. tab:: OpenAI Gym - + .. tabs:: .. tab:: Single environment @@ -166,7 +166,7 @@ Within the methods and properties defined in the wrapped environment, the observ env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' .. tab:: DeepMind - + .. code-block:: python # import the environment wrapper and the deepmind suite @@ -262,21 +262,21 @@ The following code snippets show how to define a model, based on the concept of import torch import torch.nn as nn from skrl.models.torch import Model, GaussianMixin - + # define the model class Policy(GaussianMixin, Model): - def __init__(self, observation_space, action_space, device="cuda:0", + def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) - + self.net = nn.Sequential(nn.Linear(self.num_observations, 32), nn.ELU(), nn.Linear(32, 32), nn.ELU(), nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + def compute(self, states, taken_actions, role): return self.net(states), self.log_std_parameter @@ -297,20 +297,20 @@ The following code snippets show how to define a model, based on the concept of import torch.nn as nn from skrl.models.torch import Model, MultivariateGaussianMixin - # define the model + # define the model class Policy(MultivariateGaussianMixin, Model): - def __init__(self, observation_space, action_space, device="cuda:0", + def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) - + self.net = nn.Sequential(nn.Linear(self.num_observations, 32), nn.ELU(), nn.Linear(32, 32), nn.ELU(), nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - + def compute(self, states, taken_actions, role): return self.net(states), self.log_std_parameter @@ -330,19 +330,19 @@ The following code snippets show how to define a model, based on the concept of import torch import torch.nn as nn from skrl.models.torch import Model, DeterministicMixin - + # define the model class Policy(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False): Model.__init__(self, observation_space, action_space, device) DeterministicMixin.__init__(self, clip_actions) - + self.net = nn.Sequential(nn.Linear(self.num_observations, 32), nn.ELU(), nn.Linear(32, 32), nn.ELU(), nn.Linear(32, self.num_actions)) - + def compute(self, states, taken_actions, role): return self.net(states) @@ -359,11 +359,11 @@ The following code snippets show how to define a model, based on the concept of Model.__init__(self, observation_space, action_space, device) TabularMixin.__init__(self, num_envs) - self.table = torch.ones((num_envs, self.num_observations, self.num_actions), + self.table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) def compute(self, states, taken_actions, role): - actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) Models must be collected in a dictionary and passed to the agent constructor during its instantiation under the argument :literal:`models`. The dictionary keys are specific to each agent. Visit their respective documentation for more details (under *Spaces and models* section). For example, the PPO agent requires the policy and value models as shown below: @@ -379,7 +379,7 @@ Models can be saved and loaded to and from the file system. However, the recomme 4. Noises --------- -Noise plays a fundamental role in the exploration stage, especially in agents of a deterministic nature, such as DDPG or TD3, for example. +Noise plays a fundamental role in the exploration stage, especially in agents of a deterministic nature, such as DDPG or TD3, for example. skrl provides, as part of its resources, **classes for instantiating noises** as shown in the following code snippets. Refer to :ref:`Noises ` documentation for more information. diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst index cd5e6bdb..b60e40f1 100644 --- a/docs/source/intro/installation.rst +++ b/docs/source/intro/installation.rst @@ -4,7 +4,7 @@ Installation .. raw:: html
- + Prerequisites ------------- @@ -29,7 +29,7 @@ Python Package Index (PyPI) To install **skrl** with pip, execute: .. code-block:: bash - + pip install skrl GitHub repository @@ -38,20 +38,20 @@ GitHub repository Clone or download the library from its GitHub repository (https://github.com/Toni-SM/skrl) .. code-block:: bash - + git clone https://github.com/Toni-SM/skrl.git cd skrl * **Install in editable/development mode** (links the package to its original location allowing any modifications to be reflected directly in its Python environment) .. code-block:: bash - + pip install -e . * **Install in the current Python site-packages directory** (modifications to the code downloaded from GitHub will not be reflected in your Python environment) .. code-block:: bash - + pip install . .. raw:: html @@ -77,7 +77,7 @@ Known issues See PyTorch issue `#80831 `_ .. code-block:: text - + AttributeError: 'Adam' object has no attribute '_warned_capturable_if_run_uncaptured' Changelog diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst index bcbf2a0e..02ded175 100644 --- a/docs/source/modules/skrl.agents.a2c.rst +++ b/docs/source/modules/skrl.agents.a2c.rst @@ -62,7 +62,7 @@ Algorithm implementation | :green:`# optimization step` | reset :math:`\text{optimizer}_{\theta, \phi}` | :math:`\nabla_{\theta, \, \phi} (L_{\pi_\theta} + {L}_{entropy} + L_{V_\phi})` -| :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi} \rVert)` with :guilabel:`grad_norm_clip` +| :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_{\theta, \phi}` | :green:`# update learning rate` | **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** @@ -132,5 +132,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst index 2c337b77..2d75f89e 100644 --- a/docs/source/modules/skrl.agents.amp.rst +++ b/docs/source/modules/skrl.agents.amp.rst @@ -71,7 +71,7 @@ Algorithm implementation | :math:`{logit}_{_{AMP}}^{^B} \leftarrow D_\psi(s_{_{AMP}}^{^B}) \qquad` with :math:`s_{_{AMP}}^{^B}` of size :guilabel:`discriminator_batch_size` | :math:`{logit}_{_{AMP}}^{^M} \leftarrow D_\psi(s_{_{AMP}}^{^M}) \qquad` with :math:`s_{_{AMP}}^{^M}` of size :guilabel:`discriminator_batch_size` | :green:`# discriminator prediction loss` -| :math:`L_{D_\psi} \leftarrow \dfrac{1}{2}(BCE({logit}_{_{AMP}}` ++ :math:`{logit}_{_{AMP}}^{^B}, \, 0) + BCE({logit}_{_{AMP}}^{^M}, \, 1))` +| :math:`L_{D_\psi} \leftarrow \dfrac{1}{2}(BCE({logit}_{_{AMP}}` ++ :math:`{logit}_{_{AMP}}^{^B}, \, 0) + BCE({logit}_{_{AMP}}^{^M}, \, 1))` | with :math:`\; BCE(x,y)=-\frac{1}{N} \sum_{i=1}^N [y \; log(\hat{y}) + (1-y) \, log(1-\hat{y})] \;` and :math:`\; \hat{y} = \dfrac{1}{1 + e^{-x}}` | :green:`# discriminator logit regularization` | :math:`L_{D_\psi} \leftarrow L_{D_\psi} +` :guilabel:`discriminator_logit_regularization_scale` :math:`\sum_{i=1}^N \text{flatten}(\psi_w[-1])^2` @@ -82,7 +82,7 @@ Algorithm implementation | :green:`# optimization step` | reset :math:`\text{optimizer}_{\theta, \phi, \psi}` | :math:`\nabla_{\theta, \, \phi, \, \psi} (L^{clip}_{\pi_\theta} + {L}_{entropy} + L_{V_\phi} + L_{D_\psi})` -| :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi, \, \psi} \rVert)` with :guilabel:`grad_norm_clip` +| :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi, \, \psi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_{\theta, \phi, \psi}` | :green:`# update learning rate` | **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** @@ -163,5 +163,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.base_class.rst b/docs/source/modules/skrl.agents.base_class.rst index 4fea0caf..32764bbe 100644 --- a/docs/source/modules/skrl.agents.base_class.rst +++ b/docs/source/modules/skrl.agents.base_class.rst @@ -11,7 +11,7 @@ Basic inheritance usage ^^^^^^^^^^^^^^^^^^^^^^^ .. tabs:: - + .. tab:: Inheritance .. literalinclude:: ../snippets/agent.py @@ -26,6 +26,6 @@ API :inherited-members: :private-members: _update, _empty_preprocessor, _get_internal_value :members: - + .. automethod:: __init__ .. automethod:: __str__ diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst index 0a774635..a2c44f9b 100644 --- a/docs/source/modules/skrl.agents.cem.rst +++ b/docs/source/modules/skrl.agents.cem.rst @@ -82,5 +82,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst index f7547713..4c197ac4 100644 --- a/docs/source/modules/skrl.agents.ddpg.rst +++ b/docs/source/modules/skrl.agents.ddpg.rst @@ -128,5 +128,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst index 4af5ea0f..e9f6661b 100644 --- a/docs/source/modules/skrl.agents.ddqn.rst +++ b/docs/source/modules/skrl.agents.ddqn.rst @@ -95,5 +95,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst index 200baf9a..b003f4ca 100644 --- a/docs/source/modules/skrl.agents.dqn.rst +++ b/docs/source/modules/skrl.agents.dqn.rst @@ -1,7 +1,7 @@ Deep Q-Network (DQN) ==================== -DQN is a **model-free**, **off-policy** algorithm that trains a control policies directly from high-dimensional sensory using a deep function approximator to represent the Q-value function +DQN is a **model-free**, **off-policy** algorithm that trains a control policies directly from high-dimensional sensory using a deep function approximator to represent the Q-value function Paper: `Playing Atari with Deep Reinforcement Learning `_ @@ -95,5 +95,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst index 11fdb703..eb226309 100644 --- a/docs/source/modules/skrl.agents.ppo.rst +++ b/docs/source/modules/skrl.agents.ppo.rst @@ -9,7 +9,7 @@ Algorithm ^^^^^^^^^ | For each iteration do: -| :math:`\bullet \;` Collect, in a rollout memory, a set of states :math:`s`, actions :math:`a`, rewards :math:`r`, dones :math:`d`, log probabilities :math:`logp` and values :math:`V` on policy using :math:`\pi_\theta` and :math:`V_\phi` +| :math:`\bullet \;` Collect, in a rollout memory, a set of states :math:`s`, actions :math:`a`, rewards :math:`r`, dones :math:`d`, log probabilities :math:`logp` and values :math:`V` on policy using :math:`\pi_\theta` and :math:`V_\phi` | :math:`\bullet \;` Estimate returns :math:`R` and advantages :math:`A` using Generalized Advantage Estimation (GAE(:math:`\lambda`)) from the collected data [:math:`r, d, V`] | :math:`\bullet \;` Compute the entropy loss :math:`{L}_{entropy}` | :math:`\bullet \;` Compute the clipped surrogate objective (policy loss) with :math:`ratio` as the probability ratio between the action under the current policy and the action under the previous policy: :math:`L^{clip}_{\pi_\theta} = \mathbb{E}[\min(A \; ratio, A \; \text{clip}(ratio, 1-c, 1+c))]` @@ -79,7 +79,7 @@ Algorithm implementation | :green:`# optimization step` | reset :math:`\text{optimizer}_{\theta, \phi}` | :math:`\nabla_{\theta, \, \phi} (L^{clip}_{\pi_\theta} + {L}_{entropy} + L_{V_\phi})` -| :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi} \rVert)` with :guilabel:`grad_norm_clip` +| :math:`\text{clip}(\lVert \nabla_{\theta, \, \phi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_{\theta, \phi}` | :green:`# update learning rate` | **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** @@ -148,5 +148,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.q_learning.rst b/docs/source/modules/skrl.agents.q_learning.rst index 2ea947f6..1182e0dd 100644 --- a/docs/source/modules/skrl.agents.q_learning.rst +++ b/docs/source/modules/skrl.agents.q_learning.rst @@ -80,5 +80,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst index 0af74b70..1370fb6f 100644 --- a/docs/source/modules/skrl.agents.sac.rst +++ b/docs/source/modules/skrl.agents.sac.rst @@ -141,5 +141,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.sarsa.rst b/docs/source/modules/skrl.agents.sarsa.rst index 9bb4f23f..2420c4f7 100644 --- a/docs/source/modules/skrl.agents.sarsa.rst +++ b/docs/source/modules/skrl.agents.sarsa.rst @@ -79,5 +79,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst index eb30fb80..4c0ad6b5 100644 --- a/docs/source/modules/skrl.agents.td3.rst +++ b/docs/source/modules/skrl.agents.td3.rst @@ -1,7 +1,7 @@ Twin-Delayed DDPG (TD3) ======================= -TD3 is a **model-free**, **deterministic** **off-policy** **actor-critic** algorithm (based on DDPG) that relies on double Q-learning, target policy smoothing and delayed policy updates to address the problems introduced by overestimation bias in actor-critic algorithms +TD3 is a **model-free**, **deterministic** **off-policy** **actor-critic** algorithm (based on DDPG) that relies on double Q-learning, target policy smoothing and delayed policy updates to address the problems introduced by overestimation bias in actor-critic algorithms Paper: `Addressing Function Approximation Error in Actor-Critic Methods `_ @@ -28,7 +28,7 @@ Algorithm implementation | **FOR** each gradient step up to :guilabel:`gradient_steps` **DO** | :green:`# target policy smoothing` | :math:`a' \leftarrow \mu_{\theta_{target}}(s')` -| :math:`noise \leftarrow \text{clip}(` :guilabel:`smooth_regularization_noise` :math:`, -c, c) \qquad` with :math:`c` as :guilabel:`smooth_regularization_clip` +| :math:`noise \leftarrow \text{clip}(` :guilabel:`smooth_regularization_noise` :math:`, -c, c) \qquad` with :math:`c` as :guilabel:`smooth_regularization_clip` | :math:`a' \leftarrow a' + noise` | :math:`a' \leftarrow \text{clip}(a', {a'}_{Low}, {a'}_{High})` | :green:`# compute target values` @@ -150,5 +150,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst index bd2b0ad6..ca375f3d 100644 --- a/docs/source/modules/skrl.agents.trpo.rst +++ b/docs/source/modules/skrl.agents.trpo.rst @@ -9,7 +9,7 @@ Algorithm ^^^^^^^^^ | For each iteration do -| :math:`\bullet \;` Collect, in a rollout memory, a set of states :math:`s`, actions :math:`a`, rewards :math:`r`, dones :math:`d`, log probabilities :math:`logp` and values :math:`V` on policy using :math:`\pi_\theta` and :math:`V_\phi` +| :math:`\bullet \;` Collect, in a rollout memory, a set of states :math:`s`, actions :math:`a`, rewards :math:`r`, dones :math:`d`, log probabilities :math:`logp` and values :math:`V` on policy using :math:`\pi_\theta` and :math:`V_\phi` | :math:`\bullet \;` Estimate returns :math:`R` and advantages :math:`A` using Generalized Advantage Estimation (GAE(:math:`\lambda`)) from the collected data [:math:`r, d, V`] | :math:`\bullet \;` Compute the surrogate objective (policy loss) gradient :math:`g` and the Hessian :math:`H` of :math:`KL` divergence with respect to the policy parameters :math:`\theta` | :math:`\bullet \;` Compute the search direction :math:`\; x \approx H^{-1}g \;` using the conjugate gradient method @@ -117,7 +117,7 @@ Algorithm implementation | :green:`# optimization step (value)` | reset :math:`\text{optimizer}_\phi` | :math:`\nabla_{\phi} L_{V_\phi}` -| :math:`\text{clip}(\lVert \nabla_{\phi} \rVert)` with :guilabel:`grad_norm_clip` +| :math:`\text{clip}(\lVert \nabla_{\phi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\phi` | :green:`# update learning rate` | **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** @@ -186,5 +186,5 @@ API :show-inheritance: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.envs.isaac_gym.rst b/docs/source/modules/skrl.envs.isaac_gym.rst index 6a386f24..a99c2ef9 100644 --- a/docs/source/modules/skrl.envs.isaac_gym.rst +++ b/docs/source/modules/skrl.envs.isaac_gym.rst @@ -26,9 +26,9 @@ With the release of Isaac Gym (preview 4), NVIDIA developers provide an easy-to- import isaacgymenvs - env = isaacgymenvs.make(seed=0, - task="Cartpole", - num_envs=2000, + env = isaacgymenvs.make(seed=0, + task="Cartpole", + num_envs=2000, sim_device="cuda:0", rl_device="cuda:0", graphics_device_id=0, @@ -61,7 +61,7 @@ Basic usage env = load_isaacgym_env_preview4(task_name="Cartpole") .. tab:: Command line arguments (priority) - + .. code-block:: python :linenos: @@ -118,7 +118,7 @@ Basic usage env = load_isaacgym_env_preview3(task_name="Cartpole") .. tab:: Command line arguments (priority) - + .. code-block:: python :linenos: @@ -171,7 +171,7 @@ Basic usage env = load_isaacgym_env_preview2(task_name="Cartpole") .. tab:: Command line arguments (priority) - + .. code-block:: python :linenos: diff --git a/docs/source/modules/skrl.envs.omniverse_isaac_gym.rst b/docs/source/modules/skrl.envs.omniverse_isaac_gym.rst index 4a6b3b34..c1b13a56 100644 --- a/docs/source/modules/skrl.envs.omniverse_isaac_gym.rst +++ b/docs/source/modules/skrl.envs.omniverse_isaac_gym.rst @@ -53,7 +53,7 @@ In this approach, the RL algorithm maintains the main execution loop env = load_omniverse_isaacgym_env(task_name="Cartpole") .. tab:: Command line arguments (priority) - + .. code-block:: python :linenos: @@ -98,7 +98,7 @@ In this approach, the RL algorithm is executed on a secondary thread while the s env.run() .. tab:: Command line arguments (priority) - + .. code-block:: python :linenos: diff --git a/docs/source/modules/skrl.envs.wrapping.rst b/docs/source/modules/skrl.envs.wrapping.rst index 3f0a7be5..2c905907 100644 --- a/docs/source/modules/skrl.envs.wrapping.rst +++ b/docs/source/modules/skrl.envs.wrapping.rst @@ -63,7 +63,7 @@ Basic usage .. tabs:: .. tab:: Preview 4 (isaacgymenvs.make) - + .. code-block:: python :linenos: @@ -73,9 +73,9 @@ Basic usage from skrl.envs.torch import wrap_env # create/load the environment using the easy-to-use API from NVIDIA - env = isaacgymenvs.make(seed=0, - task="Cartpole", - num_envs=512, + env = isaacgymenvs.make(seed=0, + task="Cartpole", + num_envs=512, sim_device="cuda:0", rl_device="cuda:0", graphics_device_id=0, @@ -85,7 +85,7 @@ Basic usage env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview4")' .. tab:: Preview 4 - + .. code-block:: python :linenos: @@ -100,7 +100,7 @@ Basic usage env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview4")' .. tab:: Preview 3 - + .. code-block:: python :linenos: @@ -115,7 +115,7 @@ Basic usage env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview3")' .. tab:: Preview 2 - + .. code-block:: python :linenos: @@ -130,7 +130,7 @@ Basic usage env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview2")' .. tab:: OpenAI Gym - + .. tabs:: .. tab:: Single environment @@ -166,7 +166,7 @@ Basic usage env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' .. tab:: DeepMind - + .. code-block:: python :linenos: @@ -200,41 +200,41 @@ Internal API :undoc-members: :show-inheritance: :members: - + .. automethod:: __init__ .. py:property:: device The device used by the environment - If the wrapped environment does not have the ``device`` property, the value of this property will be ``"cuda:0"`` or ``"cpu"`` depending on the device availability + If the wrapped environment does not have the ``device`` property, the value of this property will be ``"cuda:0"`` or ``"cpu"`` depending on the device availability .. autoclass:: skrl.envs.torch.wrappers.OmniverseIsaacGymWrapper :undoc-members: :show-inheritance: :members: - + .. automethod:: __init__ .. autoclass:: skrl.envs.torch.wrappers.IsaacGymPreview3Wrapper :undoc-members: :show-inheritance: :members: - + .. automethod:: __init__ .. autoclass:: skrl.envs.torch.wrappers.IsaacGymPreview2Wrapper :undoc-members: :show-inheritance: :members: - + .. automethod:: __init__ .. autoclass:: skrl.envs.torch.wrappers.GymWrapper :undoc-members: :show-inheritance: :members: - + .. automethod:: __init__ .. autoclass:: skrl.envs.torch.wrappers.DeepMindWrapper @@ -242,5 +242,5 @@ Internal API :show-inheritance: :private-members: _spec_to_space, _observation_to_tensor, _tensor_to_action :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.memories.base_class.rst b/docs/source/modules/skrl.memories.base_class.rst index 101e98dc..99177175 100644 --- a/docs/source/modules/skrl.memories.base_class.rst +++ b/docs/source/modules/skrl.memories.base_class.rst @@ -11,13 +11,13 @@ Basic inheritance usage ^^^^^^^^^^^^^^^^^^^^^^^ .. tabs:: - + .. tab:: Inheritance .. literalinclude:: ../snippets/memory.py :language: python :linenos: - + API ^^^ @@ -25,6 +25,6 @@ API :undoc-members: :show-inheritance: :members: - + .. automethod:: __init__ .. automethod:: __len__ diff --git a/docs/source/modules/skrl.memories.random.rst b/docs/source/modules/skrl.memories.random.rst index 36842732..cce722d0 100644 --- a/docs/source/modules/skrl.memories.random.rst +++ b/docs/source/modules/skrl.memories.random.rst @@ -12,7 +12,7 @@ Basic usage # create a random memory object memory = RandomMemory(memory_size=1000, num_envs=1, replacement=False) - + # create tensors in memory memory.create_tensor(name="states", size=(64, 64, 3), dtype=torch.float32) memory.create_tensor(name="actions", size=(4,1), dtype=torch.float32) @@ -39,6 +39,6 @@ API :show-inheritance: :inherited-members: :members: - + .. automethod:: __init__ .. automethod:: __len__ diff --git a/docs/source/modules/skrl.models.base_class.rst b/docs/source/modules/skrl.models.base_class.rst index 8422448e..d4cbc0cf 100644 --- a/docs/source/modules/skrl.models.base_class.rst +++ b/docs/source/modules/skrl.models.base_class.rst @@ -36,7 +36,7 @@ API :show-inheritance: :private-members: _get_space_size :members: - + .. automethod:: __init__ .. py:property:: device diff --git a/docs/source/modules/skrl.models.categorical.rst b/docs/source/modules/skrl.models.categorical.rst index 6dc185c2..2c2158f0 100644 --- a/docs/source/modules/skrl.models.categorical.rst +++ b/docs/source/modules/skrl.models.categorical.rst @@ -39,7 +39,7 @@ Basic usage ----------- .. tabs:: - + .. tab:: Multi-Layer Perceptron (MLP) .. literalinclude:: ../snippets/categorical_model.py diff --git a/docs/source/modules/skrl.models.deterministic.rst b/docs/source/modules/skrl.models.deterministic.rst index bccbf334..2d92d702 100644 --- a/docs/source/modules/skrl.models.deterministic.rst +++ b/docs/source/modules/skrl.models.deterministic.rst @@ -39,7 +39,7 @@ Basic usage ----------- .. tabs:: - + .. tab:: Multi-Layer Perceptron (MLP) .. literalinclude:: ../snippets/deterministic_model.py diff --git a/docs/source/modules/skrl.models.gaussian.rst b/docs/source/modules/skrl.models.gaussian.rst index 0b7dd56a..e3dff3cb 100644 --- a/docs/source/modules/skrl.models.gaussian.rst +++ b/docs/source/modules/skrl.models.gaussian.rst @@ -13,7 +13,7 @@ skrl provides a Python mixin (:literal:`GaussianMixin`) to assist in the creatio :emphasize-lines: 1 class GaussianModel(GaussianMixin, Model): - def __init__(self, observation_space, action_space, device="cuda:0", + def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) @@ -24,7 +24,7 @@ skrl provides a Python mixin (:literal:`GaussianMixin`) to assist in the creatio :emphasize-lines: 4-5 class GaussianModel(GaussianMixin, Model): - def __init__(self, observation_space, action_space, device="cuda:0", + def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) @@ -41,7 +41,7 @@ Basic usage ----------- .. tabs:: - + .. tab:: Multi-Layer Perceptron (MLP) .. literalinclude:: ../snippets/gaussian_model.py diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst index 13661349..df4204bc 100644 --- a/docs/source/modules/skrl.models.multivariate_gaussian.rst +++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst @@ -13,7 +13,7 @@ skrl provides a Python mixin (:literal:`MultivariateGaussianMixin`) to assist in :emphasize-lines: 1 class MultivariateGaussianModel(MultivariateGaussianMixin, Model): - def __init__(self, observation_space, action_space, device="cuda:0", + def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) @@ -24,7 +24,7 @@ skrl provides a Python mixin (:literal:`MultivariateGaussianMixin`) to assist in :emphasize-lines: 4-5 class MultivariateGaussianModel(MultivariateGaussianMixin, Model): - def __init__(self, observation_space, action_space, device="cuda:0", + def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) @@ -41,7 +41,7 @@ Basic usage ----------- .. tabs:: - + .. tab:: Multi-Layer Perceptron (MLP) .. literalinclude:: ../snippets/multivariate_gaussian_model.py diff --git a/docs/source/modules/skrl.models.tabular.rst b/docs/source/modules/skrl.models.tabular.rst index 1190c245..54ab3452 100644 --- a/docs/source/modules/skrl.models.tabular.rst +++ b/docs/source/modules/skrl.models.tabular.rst @@ -31,7 +31,7 @@ Basic usage ----------- .. tabs:: - + .. tab:: :math:`\epsilon`-greedy policy .. literalinclude:: ../snippets/tabular_model.py diff --git a/docs/source/modules/skrl.resources.noises.rst b/docs/source/modules/skrl.resources.noises.rst index e25c87c4..c4e74014 100644 --- a/docs/source/modules/skrl.resources.noises.rst +++ b/docs/source/modules/skrl.resources.noises.rst @@ -16,7 +16,7 @@ Basic usage The noise usage is defined in each agent's configuration dictionary. A noise instance is set under the :literal:`"noise"` sub-key. The following examples show how to set the noise for an agent: .. tabs:: - + .. tab:: Gaussian noise .. image:: ../_static/imgs/noise_gaussian.png @@ -73,7 +73,7 @@ API :inherited-members: :private-members: _update :members: - + .. automethod:: __init__ .. raw:: html @@ -94,7 +94,7 @@ API :inherited-members: :private-members: _update :members: - + .. automethod:: __init__ .. raw:: html @@ -116,7 +116,7 @@ Basic inheritance usage ^^^^^^^^^^^^^^^^^^^^^^^ .. tabs:: - + .. tab:: Inheritance .. literalinclude:: ../snippets/noise.py @@ -132,5 +132,5 @@ API :inherited-members: :private-members: _update :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.resources.preprocessors.rst b/docs/source/modules/skrl.resources.preprocessors.rst index 21c44a10..ae237b71 100644 --- a/docs/source/modules/skrl.resources.preprocessors.rst +++ b/docs/source/modules/skrl.resources.preprocessors.rst @@ -10,7 +10,7 @@ Preprocessors Basic usage ----------- -The preprocessors usage is defined in each agent's configuration dictionary. +The preprocessors usage is defined in each agent's configuration dictionary. The preprocessor class is set under the :literal:`"_preprocessor"` key and its arguments are set under the :literal:`"_preprocessor_kwargs"` key as a keyword argument dictionary. The following examples show how to set the preprocessors for an agent: @@ -48,11 +48,11 @@ Algorithm implementation **Standardization by centering and scaling** -| :math:`\text{clip}((x - \bar{x}_t) / (\sqrt{\sigma^2} \;+` :guilabel:`epsilon` :math:`), -c, c) \qquad` with :math:`c` as :guilabel:`clip_threshold` +| :math:`\text{clip}((x - \bar{x}_t) / (\sqrt{\sigma^2} \;+` :guilabel:`epsilon` :math:`), -c, c) \qquad` with :math:`c` as :guilabel:`clip_threshold` **Scale back the data to the original representation (inverse transform)** -| :math:`\sqrt{\sigma^2_t} \; \text{clip}(x, -c, c) + \bar{x}_t \qquad` with :math:`c` as :guilabel:`clip_threshold` +| :math:`\sqrt{\sigma^2_t} \; \text{clip}(x, -c, c) + \bar{x}_t \qquad` with :math:`c` as :guilabel:`clip_threshold` **Update the running mean and variance** (See `parallel algorithm `_) @@ -69,5 +69,5 @@ API .. autoclass:: skrl.resources.preprocessors.torch.running_standard_scaler.RunningStandardScaler :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.resources.schedulers.rst b/docs/source/modules/skrl.resources.schedulers.rst index a879dae6..a3a76071 100644 --- a/docs/source/modules/skrl.resources.schedulers.rst +++ b/docs/source/modules/skrl.resources.schedulers.rst @@ -15,7 +15,7 @@ Basic usage The learning rate scheduler usage is defined in each agent's configuration dictionary. The scheduler class is set under the :literal:`"learning_rate_scheduler"` key and its arguments are set under the :literal:`"learning_rate_scheduler_kwargs"` key as a keyword argument dictionary, without specifying the optimizer (first argument). The following examples show how to set the scheduler for an agent: .. tabs:: - + .. tab:: PyTorch scheduler .. code-block:: python @@ -66,5 +66,5 @@ API :show-inheritance: :inherited-members: :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.trainers.base_class.rst b/docs/source/modules/skrl.trainers.base_class.rst index 7b76d574..660c4b6b 100644 --- a/docs/source/modules/skrl.trainers.base_class.rst +++ b/docs/source/modules/skrl.trainers.base_class.rst @@ -11,7 +11,7 @@ Basic inheritance usage ^^^^^^^^^^^^^^^^^^^^^^^ .. tabs:: - + .. tab:: Inheritance .. literalinclude:: ../snippets/trainer.py @@ -29,6 +29,6 @@ API :inherited-members: :private-members: _setup_agents :members: - + .. automethod:: __init__ .. automethod:: __str__ diff --git a/docs/source/modules/skrl.trainers.manual.rst b/docs/source/modules/skrl.trainers.manual.rst index 61c43c86..b6a60e21 100644 --- a/docs/source/modules/skrl.trainers.manual.rst +++ b/docs/source/modules/skrl.trainers.manual.rst @@ -13,7 +13,7 @@ Basic usage ^^^^^^^^^^^ .. tabs:: - + .. tab:: Snippet .. literalinclude:: ../snippets/trainer.py diff --git a/docs/source/modules/skrl.trainers.parallel.rst b/docs/source/modules/skrl.trainers.parallel.rst index 4e3751e3..489875d8 100644 --- a/docs/source/modules/skrl.trainers.parallel.rst +++ b/docs/source/modules/skrl.trainers.parallel.rst @@ -21,7 +21,7 @@ Basic usage At the moment, only simultaneous training and evaluation of agents with local memory (no memory sharing) is implemented .. tabs:: - + .. tab:: Snippet .. literalinclude:: ../snippets/trainer.py diff --git a/docs/source/modules/skrl.trainers.sequential.rst b/docs/source/modules/skrl.trainers.sequential.rst index 9866a420..5f847508 100644 --- a/docs/source/modules/skrl.trainers.sequential.rst +++ b/docs/source/modules/skrl.trainers.sequential.rst @@ -13,7 +13,7 @@ Basic usage ^^^^^^^^^^^ .. tabs:: - + .. tab:: Snippet .. literalinclude:: ../snippets/trainer.py diff --git a/docs/source/modules/skrl.utils.isaacgym_utils.rst b/docs/source/modules/skrl.utils.isaacgym_utils.rst index 6ccd74cc..6df0454f 100644 --- a/docs/source/modules/skrl.utils.isaacgym_utils.rst +++ b/docs/source/modules/skrl.utils.isaacgym_utils.rst @@ -39,7 +39,7 @@ API Web viewer for development without X server ------------------------------------------- -This library provides an API for instantiating a lightweight web viewer useful, mostly, for designing Isaac Gym environments in remote workstations or docker containers without X server +This library provides an API for instantiating a lightweight web viewer useful, mostly, for designing Isaac Gym environments in remote workstations or docker containers without X server Gestures and actions ^^^^^^^^^^^^^^^^^^^^ @@ -85,7 +85,7 @@ Basic usage ^^^^^^^^^^^ .. tabs:: - + .. tab:: Snippet .. literalinclude:: ../snippets/isaacgym_utils.py @@ -102,5 +102,5 @@ API :inherited-members: :private-members: _route_index, _route_stream, _route_input_event, _stream :members: - + .. automethod:: __init__ diff --git a/docs/source/modules/skrl.utils.model_instantiators.rst b/docs/source/modules/skrl.utils.model_instantiators.rst index 536f4d04..56c62e90 100644 --- a/docs/source/modules/skrl.utils.model_instantiators.rst +++ b/docs/source/modules/skrl.utils.model_instantiators.rst @@ -10,26 +10,26 @@ API ^^^ .. autoclass:: skrl.utils.model_instantiators.Shape - + .. py:property:: ONE Flag to indicate that the model's input/output has shape (1,) - - This flag is useful for the definition of critic models, where the critic's output is a scalar + + This flag is useful for the definition of critic models, where the critic's output is a scalar .. py:property:: STATES Flag to indicate that the model's input/output is the state (observation) space of the environment It is an alias for :py:attr:`OBSERVATIONS` - + .. py:property:: OBSERVATIONS Flag to indicate that the model's input/output is the observation space of the environment - + .. py:property:: ACTIONS Flag to indicate that the model's input/output is the action space of the environment - + .. py:property:: STATES_ACTIONS Flag to indicate that the model's input/output is the combination (concatenation) of the state (observation) and action spaces of the environment diff --git a/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst b/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst index 8b4102e5..b3ea9380 100644 --- a/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst +++ b/docs/source/modules/skrl.utils.omniverse_isaacgym_utils.rst @@ -42,4 +42,4 @@ OmniIsaacGymEnvs-like environment instance API """ -.. autofunction:: skrl.utils.omniverse_isaacgym_utils.get_env_instance \ No newline at end of file +.. autofunction:: skrl.utils.omniverse_isaacgym_utils.get_env_instance diff --git a/docs/source/modules/skrl.utils.postprocessing.rst b/docs/source/modules/skrl.utils.postprocessing.rst index 98341e1a..d4914d82 100644 --- a/docs/source/modules/skrl.utils.postprocessing.rst +++ b/docs/source/modules/skrl.utils.postprocessing.rst @@ -10,7 +10,7 @@ Basic usage ^^^^^^^^^^^ .. tabs:: - + .. tab:: PyTorch (.pt) .. literalinclude:: ../snippets/utils_postprocessing.py @@ -47,7 +47,7 @@ API :inherited-members: :private-members: _format_numpy, _format_torch, _format_csv :members: - + .. automethod:: __init__ .. automethod:: __iter__ .. automethod:: __next__ @@ -70,7 +70,7 @@ Basic usage ^^^^^^^^^^^ .. tabs:: - + .. tab:: Tensorboard (events.out.tfevents.*) .. literalinclude:: ../snippets/utils_postprocessing.py @@ -88,7 +88,7 @@ API :show-inheritance: :inherited-members: :members: - + .. automethod:: __init__ .. automethod:: __iter__ - .. automethod:: __next__ \ No newline at end of file + .. automethod:: __next__ diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py index 1f27f789..95252f81 100644 --- a/docs/source/snippets/agent.py +++ b/docs/source/snippets/agent.py @@ -23,12 +23,12 @@ class CUSTOM(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """ :param models: Models used by the agent @@ -45,11 +45,11 @@ def __init__(self, :type cfg: dict """ CUSTOM_DEFAULT_CONFIG.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=CUSTOM_DEFAULT_CONFIG) # ===================================================================== # - get and process models from self.models @@ -89,17 +89,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample and return agent's actions # ====================================== - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent diff --git a/docs/source/snippets/categorical_model.py b/docs/source/snippets/categorical_model.py index 66fcc47b..918a0fdc 100644 --- a/docs/source/snippets/categorical_model.py +++ b/docs/source/snippets/categorical_model.py @@ -22,9 +22,9 @@ def compute(self, states, taken_actions, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = MLP(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, unnormalized_log_prob=True) # [end-mlp] @@ -60,13 +60,13 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro nn.Linear(32, self.num_actions)) def compute(self, states, taken_actions, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) + # permute (samples, width, height, channels) -> (samples, channels, width, height) return self.net(states.permute(0, 3, 1, 2)) # instantiate the model (assumes there is a wrapped environment: env) -policy = CNN(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, unnormalized_log_prob=True) # [end-cnn] diff --git a/docs/source/snippets/deterministic_model.py b/docs/source/snippets/deterministic_model.py index d735ef63..84714306 100644 --- a/docs/source/snippets/deterministic_model.py +++ b/docs/source/snippets/deterministic_model.py @@ -22,9 +22,9 @@ def compute(self, states, taken_actions, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = MLP(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, clip_actions=False) # [end-mlp] @@ -61,14 +61,14 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.Linear(32, 1)) def compute(self, states, taken_actions, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) + # permute (samples, width, height, channels) -> (samples, channels, width, height) x = self.features_extractor(states.permute(0, 3, 1, 2)) return self.net(torch.cat([x, taken_actions], dim=1)) # instantiate the model (assumes there is a wrapped environment: env) -policy = CNN(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, clip_actions=False) # [end-cnn] diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py index 50ef6afe..f66a6071 100644 --- a/docs/source/snippets/gaussian_model.py +++ b/docs/source/snippets/gaussian_model.py @@ -8,7 +8,7 @@ # define the model class MLP(GaussianMixin, Model): - def __init__(self, observation_space, action_space, device, + def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) @@ -27,9 +27,9 @@ def compute(self, states, taken_actions, role): return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter # instantiate the model (assumes there is a wrapped environment: env) -policy = MLP(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, clip_actions=True, clip_log_std=True, min_log_std=-20, @@ -49,7 +49,7 @@ def compute(self, states, taken_actions, role): # define the model class CNN(GaussianMixin, Model): - def __init__(self, observation_space, action_space, device, + def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) @@ -70,18 +70,18 @@ def __init__(self, observation_space, action_space, device, nn.Linear(16, 32), nn.Tanh(), nn.Linear(32, self.num_actions)) - + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) def compute(self, states, taken_actions, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) + # permute (samples, width, height, channels) -> (samples, channels, width, height) return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter # instantiate the model (assumes there is a wrapped environment: env) -policy = CNN(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, clip_actions=True, clip_log_std=True, min_log_std=-20, diff --git a/docs/source/snippets/isaacgym_utils.py b/docs/source/snippets/isaacgym_utils.py index 14b04cde..b5dd68b7 100644 --- a/docs/source/snippets/isaacgym_utils.py +++ b/docs/source/snippets/isaacgym_utils.py @@ -48,7 +48,7 @@ cam_props.width, cam_props.height = 300, 300 cam_handle = gym.create_camera_sensor(env, cam_props) gym.set_camera_location(cam_handle, env, gymapi.Vec3(1, 1, 1), gymapi.Vec3(0, 0, 0)) - + envs.append(env) cameras.append(cam_handle) @@ -62,7 +62,7 @@ gym.simulate(sim) # render the scene - web_viewer.render(fetch_results=True, - step_graphics=True, - render_all_camera_sensors=True, + web_viewer.render(fetch_results=True, + step_graphics=True, + render_all_camera_sensors=True, wait_for_page_load=True) diff --git a/docs/source/snippets/memory.py b/docs/source/snippets/memory.py index 42bbab11..5fec7f78 100644 --- a/docs/source/snippets/memory.py +++ b/docs/source/snippets/memory.py @@ -32,6 +32,6 @@ def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> L :rtype: list of torch.Tensor list """ # ================================ - # - sample a batch from memory. + # - sample a batch from memory. # It is possible to generate only the sampling indexes and call self.sample_by_index(...) # ================================ diff --git a/docs/source/snippets/model_mixin.py b/docs/source/snippets/model_mixin.py index 85c0af9e..3cfcb2b3 100644 --- a/docs/source/snippets/model_mixin.py +++ b/docs/source/snippets/model_mixin.py @@ -9,9 +9,9 @@ class CustomModel(Model): - def __init__(self, - observation_space: Union[int, Sequence[int], gym.Space], - action_space: Union[int, Sequence[int], gym.Space], + def __init__(self, + observation_space: Union[int, Sequence[int], gym.Space], + action_space: Union[int, Sequence[int], gym.Space], device: Union[str, torch.device] = "cuda:0") -> None: """ :param observation_space: Observation/state space or shape. @@ -24,10 +24,10 @@ def __init__(self, :type device: str or torch.device, optional """ super().__init__(observation_space, action_space, device) - - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act according to the specified behavior @@ -40,7 +40,7 @@ def act(self, :type role: str, optional :raises NotImplementedError: Child class must implement this method - + :return: Action to be taken by the agent given the state of the environment. The typical sequence's components are the actions, the log of the probability density function and mean actions. Deterministic agents must ignore the last two components and return empty tensors or None for them @@ -74,9 +74,9 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: self._custom_clip_actions = {} self._custom_clip_actions[role] - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act according to the specified behavior @@ -89,7 +89,7 @@ def act(self, :type role: str, optional :raises NotImplementedError: Child class must implement this method - + :return: Action to be taken by the agent given the state of the environment. The typical sequence's components are the actions, the log of the probability density function and mean actions. Deterministic agents must ignore the last two components and return empty tensors or None for them @@ -101,4 +101,4 @@ def act(self, # e.g. retrieve clip actions according to role clip_actions = self._custom_clip_actions[role] if role in self._custom_clip_actions else self._custom_clip_actions[""] -# [end-mixin] \ No newline at end of file +# [end-mixin] diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py index e53f7fe1..610f4ac6 100644 --- a/docs/source/snippets/multivariate_gaussian_model.py +++ b/docs/source/snippets/multivariate_gaussian_model.py @@ -8,7 +8,7 @@ # define the model class MLP(MultivariateGaussianMixin, Model): - def __init__(self, observation_space, action_space, device, + def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) @@ -27,9 +27,9 @@ def compute(self, states, taken_actions, role): return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter # instantiate the model (assumes there is a wrapped environment: env) -policy = MLP(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, clip_actions=True, clip_log_std=True, min_log_std=-20, @@ -48,7 +48,7 @@ def compute(self, states, taken_actions, role): # define the model class CNN(MultivariateGaussianMixin, Model): - def __init__(self, observation_space, action_space, device, + def __init__(self, observation_space, action_space, device, clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) @@ -69,18 +69,18 @@ def __init__(self, observation_space, action_space, device, nn.Linear(16, 32), nn.Tanh(), nn.Linear(32, self.num_actions)) - + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) def compute(self, states, taken_actions, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) + # permute (samples, width, height, channels) -> (samples, channels, width, height) return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter # instantiate the model (assumes there is a wrapped environment: env) -policy = CNN(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, clip_actions=True, clip_log_std=True, min_log_std=-20, diff --git a/docs/source/snippets/noise.py b/docs/source/snippets/noise.py index df55430f..b0da68be 100644 --- a/docs/source/snippets/noise.py +++ b/docs/source/snippets/noise.py @@ -18,7 +18,7 @@ def sample(self, size: Union[Tuple[int], torch.Size]) -> torch.Tensor: :param size: Shape of the sampled tensor :type size: tuple or list of integers, or torch.Size - + :return: Sampled noise :rtype: torch.Tensor """ diff --git a/docs/source/snippets/shared_model.py b/docs/source/snippets/shared_model.py index 40182f61..fe1c054d 100644 --- a/docs/source/snippets/shared_model.py +++ b/docs/source/snippets/shared_model.py @@ -18,7 +18,7 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.ELU(), nn.Linear(32, 32), nn.ELU()) - + # separated layers ("policy") self.mean_layer = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) diff --git a/docs/source/snippets/tabular_model.py b/docs/source/snippets/tabular_model.py index 4b94c69c..5afdbb49 100644 --- a/docs/source/snippets/tabular_model.py +++ b/docs/source/snippets/tabular_model.py @@ -14,9 +14,9 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32) def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) - + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) @@ -24,9 +24,9 @@ def compute(self, states, taken_actions, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = EpilonGreedyPolicy(observation_space=env.observation_space, - action_space=env.action_space, - device=env.device, +policy = EpilonGreedyPolicy(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, num_envs=env.num_envs, epsilon=0.15) # [end-epsilon-greedy] diff --git a/docs/source/snippets/trainer.py b/docs/source/snippets/trainer.py index 2e54ad44..57043263 100644 --- a/docs/source/snippets/trainer.py +++ b/docs/source/snippets/trainer.py @@ -16,9 +16,9 @@ class CustomTrainer(Trainer): - def __init__(self, - env: Wrapper, - agents: Union[Agent, List[Agent], List[List[Agent]]], + def __init__(self, + env: Wrapper, + agents: Union[Agent, List[Agent], List[List[Agent]]], agents_scope : List[int] = [], cfg: dict = {}) -> None: """ @@ -123,4 +123,4 @@ def eval(self) -> None: # evaluate the agent(s) for timestep in range(cfg["timesteps"]): trainer.eval(timestep=timestep) -# [end-manual] \ No newline at end of file +# [end-manual] diff --git a/docs/source/snippets/utils_postprocessing.py b/docs/source/snippets/utils_postprocessing.py index 1a204c2a..477f459f 100644 --- a/docs/source/snippets/utils_postprocessing.py +++ b/docs/source/snippets/utils_postprocessing.py @@ -6,10 +6,10 @@ memory_iterator = postprocessing.MemoryFileIterator("memories/*.pt") for filename, data in memory_iterator: filename # str: basename of the current file - data # dict: keys are the names of the memory tensors in the file. + data # dict: keys are the names of the memory tensors in the file. # Tensor shapes are (memory size, number of envs, specific content size) - - # example of simple usage: + + # example of simple usage: # print the filenames of all memories and their tensor shapes print("\nfilename:", filename) print(" |-- states:", data['states'].shape) @@ -30,8 +30,8 @@ filename # str: basename of the current file data # dict: keys are the names of the memory arrays in the file. # Array shapes are (memory size, number of envs, specific content size) - - # example of simple usage: + + # example of simple usage: # print the filenames of all memories and their array shapes print("\nfilename:", filename) print(" |-- states:", data['states'].shape) @@ -51,10 +51,10 @@ for filename, data in memory_iterator: filename # str: basename of the current file data # dict: keys are the names of the memory list of lists extracted from the file. - # List lengths are (memory size * number of envs) and + # List lengths are (memory size * number of envs) and # sublist lengths are (specific content size) - - # example of simple usage: + + # example of simple usage: # print the filenames of all memories and their list lengths print("\nfilename:", filename) print(" |-- states:", len(data['states'])) @@ -76,7 +76,7 @@ dirname # str: path of the directory (experiment name) containing the Tensorboard file data # dict: keys are the tags, values are lists of [step, value] pairs - # example of simple usage: + # example of simple usage: # print the directory name and the value length for the "Reward / Total reward (mean)" tag print("\ndirname:", dirname) for tag, values in data.items(): diff --git a/setup.py b/setup.py index bbf87912..52de3de0 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ "tensorboard", "tqdm", "packaging", + "pre-commit", ] # installation diff --git a/skrl/agents/torch/__init__.py b/skrl/agents/torch/__init__.py index ebbc5c75..8e83aa5e 100644 --- a/skrl/agents/torch/__init__.py +++ b/skrl/agents/torch/__init__.py @@ -1 +1 @@ -from .base import Agent \ No newline at end of file +from .base import Agent diff --git a/skrl/agents/torch/a2c/__init__.py b/skrl/agents/torch/a2c/__init__.py index e8cc76e2..42f75ba4 100644 --- a/skrl/agents/torch/a2c/__init__.py +++ b/skrl/agents/torch/a2c/__init__.py @@ -1 +1 @@ -from .a2c import A2C, A2C_DEFAULT_CONFIG \ No newline at end of file +from .a2c import A2C, A2C_DEFAULT_CONFIG diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 0693e0d5..c4f2c08c 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -17,10 +17,10 @@ A2C_DEFAULT_CONFIG = { "rollouts": 16, # number of rollouts before updating "mini_batches": 1, # number of mini batches to use for updating - + "discount_factor": 0.99, # discount factor (gamma) "lambda": 0.95, # TD(lambda) coefficient (lam) for computing returns and advantages - + "learning_rate": 1e-3, # learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -51,21 +51,21 @@ class A2C(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Advantage Actor Critic (A2C) https://arxiv.org/abs/1602.01783 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -81,11 +81,11 @@ def __init__(self, """ _cfg = copy.deepcopy(A2C_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -124,7 +124,7 @@ def __init__(self, if self.policy is self.value: self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._learning_rate) else: - self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), + self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), lr=self._learning_rate) if self._learning_rate_scheduler is not None: self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"]) @@ -149,7 +149,7 @@ def init(self) -> None: """ super().init() self.set_mode("eval") - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -188,17 +188,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample stochastic actions return self.policy.act(states, taken_actions=None, role="policy") - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -229,10 +229,10 @@ def record_transition(self, values, _, _ = self.value.act(self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, values=values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, values=values) def pre_interaction(self, timestep: int, timesteps: int) -> None: @@ -270,11 +270,11 @@ def _update(self, timestep: int, timesteps: int) -> None: :param timesteps: Number of timesteps :type timesteps: int """ - def compute_gae(rewards: torch.Tensor, - dones: torch.Tensor, - values: torch.Tensor, - next_values: torch.Tensor, - discount_factor: float = 0.99, + def compute_gae(rewards: torch.Tensor, + dones: torch.Tensor, + values: torch.Tensor, + next_values: torch.Tensor, + discount_factor: float = 0.99, lambda_coefficient: float = 0.95) -> torch.Tensor: """Compute the Generalized Advantage Estimator (GAE) @@ -347,7 +347,7 @@ def compute_gae(rewards: torch.Tensor, entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy(role="policy").mean() else: entropy_loss = 0 - + # compute policy loss policy_loss = -(sampled_advantages * next_log_prob).mean() @@ -379,7 +379,7 @@ def compute_gae(rewards: torch.Tensor, # record data self.track_data("Loss / Policy loss", cumulative_policy_loss / len(sampled_batches)) self.track_data("Loss / Value loss", cumulative_value_loss / len(sampled_batches)) - + if self._entropy_loss_scale: self.track_data("Loss / Entropy loss", cumulative_entropy_loss / len(sampled_batches)) diff --git a/skrl/agents/torch/amp/__init__.py b/skrl/agents/torch/amp/__init__.py index 9a6ca76e..53617227 100644 --- a/skrl/agents/torch/amp/__init__.py +++ b/skrl/agents/torch/amp/__init__.py @@ -1 +1 @@ -from .amp import AMP, AMP_DEFAULT_CONFIG \ No newline at end of file +from .amp import AMP, AMP_DEFAULT_CONFIG diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 4af3af05..349ee6b7 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -19,10 +19,10 @@ "rollouts": 16, # number of rollouts before updating "learning_epochs": 6, # number of learning epochs during each update "mini_batches": 2, # number of mini batches during each learning epoch - + "discount_factor": 0.99, # discount factor (gamma) "lambda": 0.95, # TD(lambda) coefficient (lam) for computing returns and advantages - + "learning_rate": 5e-5, # learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -69,14 +69,14 @@ class AMP(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}, - amp_observation_space: Union[int, Tuple[int], gym.Space, None] = None, + amp_observation_space: Union[int, Tuple[int], gym.Space, None] = None, motion_dataset: Union[Memory, None] = None, reply_buffer: Union[Memory, None] = None, collect_reference_motions: Union[Callable[[int], torch.Tensor], None] = None, @@ -84,14 +84,14 @@ def __init__(self, """Adversarial Motion Priors (AMP) https://arxiv.org/abs/2104.02180 - + The implementation is adapted from the NVIDIA IsaacGymEnvs (https://github.com/NVIDIA-Omniverse/IsaacGymEnvs/blob/main/isaacgymenvs/learning/amp_continuous.py) :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -104,7 +104,7 @@ def __init__(self, :type cfg: dict :param amp_observation_space: AMP observation/state space or shape (default: None) :type amp_observation_space: int, tuple or list of integers, gym.Space or None - :param motion_dataset: Reference motion dataset: M (default: None) + :param motion_dataset: Reference motion dataset: M (default: None) :type motion_dataset: skrl.memory.torch.Memory or None :param reply_buffer: Reply buffer for preventing discriminator overfitting: B (default: None) :type reply_buffer: skrl.memory.torch.Memory or None @@ -117,11 +117,11 @@ def __init__(self, """ _cfg = copy.deepcopy(AMP_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) self.amp_observation_space = amp_observation_space @@ -169,7 +169,7 @@ def __init__(self, self._learning_starts = self.cfg["learning_starts"] self._amp_batch_size = self.cfg["amp_batch_size"] - self._task_reward_weight = self.cfg["task_reward_weight"] + self._task_reward_weight = self.cfg["task_reward_weight"] self._style_reward_weight = self.cfg["style_reward_weight"] self._discriminator_batch_size = self.cfg["discriminator_batch_size"] @@ -182,9 +182,9 @@ def __init__(self, # set up optimizer and learning rate scheduler if self.policy is not None and self.value is not None and self.discriminator is not None: - self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), + self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters(), - self.discriminator.parameters()), + self.discriminator.parameters()), lr=self._learning_rate) if self._learning_rate_scheduler is not None: self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"]) @@ -215,7 +215,7 @@ def init(self) -> None: """ super().init() self.set_mode("eval") - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -276,17 +276,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return actions, log_prob, actions_mean - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -307,7 +307,7 @@ def record_transition(self, # use collected states if self._current_states is not None: states = self._current_states - + super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) # reward shaping @@ -326,12 +326,12 @@ def record_transition(self, next_values = self._value_preprocessor(next_values, inverse=True) next_values *= infos['terminate'].view(-1, 1).logical_not() - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, log_prob=self._current_log_prob, values=values, amp_states=amp_states, next_values=next_values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, log_prob=self._current_log_prob, values=values, amp_states=amp_states, next_values=next_values) - + def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -368,11 +368,11 @@ def _update(self, timestep: int, timesteps: int) -> None: :param timesteps: Number of timesteps :type timesteps: int """ - def compute_gae(rewards: torch.Tensor, - dones: torch.Tensor, - values: torch.Tensor, - next_values: torch.Tensor, - discount_factor: float = 0.99, + def compute_gae(rewards: torch.Tensor, + dones: torch.Tensor, + values: torch.Tensor, + next_values: torch.Tensor, + discount_factor: float = 0.99, lambda_coefficient: float = 0.95) -> torch.Tensor: """Compute the Generalized Advantage Estimator (GAE) @@ -419,7 +419,7 @@ def compute_gae(rewards: torch.Tensor, amp_logits, _, _ = self.discriminator.act(self._amp_state_preprocessor(amp_states), taken_actions=None, role="discriminator") style_reward = -torch.log(torch.maximum(1 - 1 / (1 + torch.exp(-amp_logits)), torch.tensor(0.0001, device=self.device))) style_reward *= self._discriminator_reward_scale - + combined_rewards = self._task_reward_weight * rewards + self._style_reward_weight * style_reward # compute returns and advantages @@ -462,7 +462,7 @@ def compute_gae(rewards: torch.Tensor, sampled_amp_states, _) in enumerate(sampled_batches): sampled_states = self._state_preprocessor(sampled_states, train=True) - + _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy") # compute entropy loss @@ -470,20 +470,20 @@ def compute_gae(rewards: torch.Tensor, entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy(role="policy").mean() else: entropy_loss = 0 - + # compute policy loss ratio = torch.exp(next_log_prob - sampled_log_prob) surrogate = sampled_advantages * ratio surrogate_clipped = sampled_advantages * torch.clip(ratio, 1.0 - self._ratio_clip, 1.0 + self._ratio_clip) - + policy_loss = -torch.min(surrogate, surrogate_clipped).mean() # compute value loss predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value") if self._clip_predicted_values: - predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, - min=-self._value_clip, + predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, + min=-self._value_clip, max=self._value_clip) value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values) @@ -517,11 +517,11 @@ def compute_gae(rewards: torch.Tensor, # discriminator gradient penalty if self._discriminator_gradient_penalty_scale: - amp_motion_gradient = torch.autograd.grad(amp_motion_logits, - sampled_amp_motion_states, + amp_motion_gradient = torch.autograd.grad(amp_motion_logits, + sampled_amp_motion_states, grad_outputs=torch.ones_like(amp_motion_logits), - create_graph=True, - retain_graph=True, + create_graph=True, + retain_graph=True, only_inputs=True) gradient_penalty = torch.sum(torch.square(amp_motion_gradient[0]), dim=-1).mean() discriminator_loss += self._discriminator_gradient_penalty_scale * gradient_penalty @@ -539,8 +539,8 @@ def compute_gae(rewards: torch.Tensor, self.optimizer.zero_grad() (policy_loss + entropy_loss + value_loss + discriminator_loss).backward() if self._grad_norm_clip > 0: - nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), - self.value.parameters(), + nn.utils.clip_grad_norm_(itertools.chain(self.policy.parameters(), + self.value.parameters(), self.discriminator.parameters()), self._grad_norm_clip) self.optimizer.step() diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 3483a994..44476e78 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -16,19 +16,19 @@ class Agent: - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Base class that represent a RL agent :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -52,7 +52,7 @@ def __init__(self, else: self.memory = memory self.secondary_memories = [] - + # convert the models to their respective device for model in self.models.values(): if model is not None: @@ -126,7 +126,7 @@ def init(self) -> None: if not experiment_name: experiment_name = "{}_{}".format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"), self.__class__.__name__) self.experiment_dir = os.path.join(directory, experiment_name) - + # main entry to log data for consumption and visualization by TensorBoard self.writer = SummaryWriter(log_dir=self.experiment_dir) @@ -193,7 +193,7 @@ def write_checkpoint(self, timestep: int, timesteps: int) -> None: # separated modules if self.checkpoint_store_separately: for name, module in self.checkpoint_modules.items(): - torch.save(self.checkpoint_best_modules["modules"][name], + torch.save(self.checkpoint_best_modules["modules"][name], os.path.join(self.experiment_dir, "checkpoints", "best_{}.pt".format(name))) # whole agent else: @@ -203,9 +203,9 @@ def write_checkpoint(self, timestep: int, timesteps: int) -> None: torch.save(modules, os.path.join(self.experiment_dir, "checkpoints", "best_{}.pt".format("agent"))) self.checkpoint_best_modules["saved"] = True - def act(self, - states: torch.Tensor, - timestep: int, + def act(self, + states: torch.Tensor, + timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy @@ -223,20 +223,20 @@ def act(self, """ raise NotImplementedError - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory (to be implemented by the inheriting classes) Inheriting classes must call this method to record episode information (rewards, timesteps, etc.). In addition to recording environment transition (such as states, rewards, etc.), agent information can be recorded. - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -258,7 +258,7 @@ def record_transition(self, if self._cumulative_rewards is None: self._cumulative_rewards = torch.zeros_like(rewards, dtype=torch.float32) self._cumulative_timesteps = torch.zeros_like(rewards, dtype=torch.int32) - + self._cumulative_rewards.add_(rewards) self._cumulative_timesteps.add_(1) @@ -273,7 +273,7 @@ def record_transition(self, # reset the cumulative rewards and timesteps self._cumulative_rewards[finished_episodes] = 0 self._cumulative_timesteps[finished_episodes] = 0 - + # record data if self.write_interval > 0: self.tracking_data["Reward / Instantaneous reward (max)"].append(torch.max(rewards).item()) @@ -565,9 +565,9 @@ def migrate(self, if module not in ["state_preprocessor", "value_preprocessor", "optimizer"] and hasattr(module, "migrate"): if verbose: logger.info("Model: {} ({})".format(name, type(module).__name__)) - status *= module.migrate(state_dict=checkpoint["model"], - name_map=name_map.get(name, {}), - auto_mapping=auto_mapping, + status *= module.migrate(state_dict=checkpoint["model"], + name_map=name_map.get(name, {}), + auto_mapping=auto_mapping, verbose=verbose) self.set_mode("eval") @@ -620,4 +620,4 @@ def _update(self, timestep: int, timesteps: int) -> None: :raises NotImplementedError: The method is not implemented by the inheriting classes """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError diff --git a/skrl/agents/torch/cem/__init__.py b/skrl/agents/torch/cem/__init__.py index dba62a05..29427ff9 100644 --- a/skrl/agents/torch/cem/__init__.py +++ b/skrl/agents/torch/cem/__init__.py @@ -1 +1 @@ -from .cem import CEM, CEM_DEFAULT_CONFIG \ No newline at end of file +from .cem import CEM, CEM_DEFAULT_CONFIG diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 6168ed02..96055f0c 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -17,7 +17,7 @@ "percentile": 0.70, # percentile to compute the reward bound [0, 1] "discount_factor": 0.99, # discount factor (gamma) - + "learning_rate": 1e-2, # learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -42,21 +42,21 @@ class CEM(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Cross-Entropy Method (CEM) https://ieeexplore.ieee.org/abstract/document/6796865/ - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -72,11 +72,11 @@ def __init__(self, """ _cfg = copy.deepcopy(CEM_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -84,7 +84,7 @@ def __init__(self, # checkpoint models self.checkpoint_modules["policy"] = self.policy - + # configuration: self._rollouts = self.cfg["rollouts"] self._rollout = 0 @@ -99,7 +99,7 @@ def __init__(self, self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] - + self._rewards_shaper = self.cfg["rewards_shaper"] self._episode_tracking = [] @@ -123,7 +123,7 @@ def init(self) -> None: """Initialize the agent """ super().init() - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -154,20 +154,20 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens if timestep < self._random_timesteps: return self.policy.random_act(states, taken_actions=None, role="policy") - # sample stochastic actions + # sample stochastic actions return self.policy.act(states, taken_actions=None, role="policy") - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -190,7 +190,7 @@ def record_transition(self, # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - + if self.memory is not None: self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: @@ -258,10 +258,10 @@ def _update(self, timestep: int, timesteps: int) -> None: if not len(returns): print("[WARNING] No returns to update. Consider increasing the number of rollouts") return - + returns = torch.tensor(returns) return_threshold = torch.quantile(returns, self._percentile, dim=-1) - + # get elite states and actions indexes = torch.nonzero(returns >= return_threshold) elite_states = torch.cat([sampled_states[limits[i][0]:limits[i][1]] for i in indexes[:, 0]], dim=0) @@ -287,6 +287,6 @@ def _update(self, timestep: int, timesteps: int) -> None: self.track_data("Coefficient / Return threshold", return_threshold.item()) self.track_data("Coefficient / Mean discounted returns", torch.mean(returns).item()) - + if self._learning_rate_scheduler: self.track_data("Learning / Learning rate", self.scheduler.get_last_lr()[0]) diff --git a/skrl/agents/torch/ddpg/__init__.py b/skrl/agents/torch/ddpg/__init__.py index 84b4d294..da2ee7a0 100644 --- a/skrl/agents/torch/ddpg/__init__.py +++ b/skrl/agents/torch/ddpg/__init__.py @@ -1 +1 @@ -from .ddpg import DDPG, DDPG_DEFAULT_CONFIG \ No newline at end of file +from .ddpg import DDPG, DDPG_DEFAULT_CONFIG diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index dc7235ab..33f3174a 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -15,10 +15,10 @@ DDPG_DEFAULT_CONFIG = { "gradient_steps": 1, # gradient steps "batch_size": 64, # training batch size - + "discount_factor": 0.99, # discount factor (gamma) "polyak": 0.005, # soft update hyperparameter (tau) - + "actor_learning_rate": 1e-3, # actor learning rate "critic_learning_rate": 1e-3, # critic learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) @@ -51,21 +51,21 @@ class DDPG(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Deep Deterministic Policy Gradient (DDPG) https://arxiv.org/abs/1509.02971 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -81,11 +81,11 @@ def __init__(self, """ _cfg = copy.deepcopy(DDPG_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -99,7 +99,7 @@ def __init__(self, self.checkpoint_modules["target_policy"] = self.target_policy self.checkpoint_modules["critic"] = self.critic self.checkpoint_modules["target_critic"] = self.target_critic - + if self.target_policy is not None and self.target_critic is not None: # freeze target networks with respect to optimizers (update via .update_parameters()) self.target_policy.freeze_parameters(True) @@ -112,7 +112,7 @@ def __init__(self, # configuration self._gradient_steps = self.cfg["gradient_steps"] self._batch_size = self.cfg["batch_size"] - + self._discount_factor = self.cfg["discount_factor"] self._polyak = self.cfg["polyak"] @@ -121,7 +121,7 @@ def __init__(self, self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"] self._state_preprocessor = self.cfg["state_preprocessor"] - + self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] @@ -131,7 +131,7 @@ def __init__(self, self._exploration_timesteps = self.cfg["exploration"]["timesteps"] self._rewards_shaper = self.cfg["rewards_shaper"] - + # set up optimizers and learning rate schedulers if self.policy is not None and self.critic is not None: self.policy_optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._actor_learning_rate) @@ -154,7 +154,7 @@ def init(self) -> None: """Initialize the agent """ super().init() - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -198,12 +198,12 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens if self._exploration_noise is not None: # sample noises noises = self._exploration_noise.sample(actions[0].shape) - + # define exploration timesteps scale = self._exploration_final_scale if self._exploration_timesteps is None: self._exploration_timesteps = timesteps - + # apply exploration noise if timestep <= self._exploration_timesteps: scale = (1 - timestep / self._exploration_timesteps) \ @@ -214,8 +214,8 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # modify actions actions[0].add_(noises) if self._backward_compatibility: - actions = (torch.max(torch.min(actions[0], self.clip_actions_max), self.clip_actions_min), - actions[1], + actions = (torch.max(torch.min(actions[0], self.clip_actions_max), self.clip_actions_min), + actions[1], actions[2]) else: actions[0].clamp_(min=self.clip_actions_min, max=self.clip_actions_max) @@ -224,26 +224,26 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens self.track_data("Exploration / Exploration noise (max)", torch.max(noises).item()) self.track_data("Exploration / Exploration noise (min)", torch.min(noises).item()) self.track_data("Exploration / Exploration noise (mean)", torch.mean(noises).item()) - + else: # record noises self.track_data("Exploration / Exploration noise (max)", 0) self.track_data("Exploration / Exploration noise (min)", 0) self.track_data("Exploration / Exploration noise (mean)", 0) - + return actions - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -266,7 +266,7 @@ def record_transition(self, # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - + if self.memory is not None: self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: @@ -317,15 +317,15 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): next_actions, _, _ = self.target_policy.act(states=sampled_next_states, taken_actions=None, role="target_policy") - + target_q_values, _, _ = self.target_critic.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic") target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=sampled_actions, role="critic") - + critic_loss = F.mse_loss(critic_values, target_values) - + # optimization step (critic) self.critic_optimizer.zero_grad() critic_loss.backward() diff --git a/skrl/agents/torch/dqn/__init__.py b/skrl/agents/torch/dqn/__init__.py index a6b63418..eaecb47b 100644 --- a/skrl/agents/torch/dqn/__init__.py +++ b/skrl/agents/torch/dqn/__init__.py @@ -1,2 +1,2 @@ from .dqn import DQN, DQN_DEFAULT_CONFIG -from .ddqn import DDQN, DDQN_DEFAULT_CONFIG \ No newline at end of file +from .ddqn import DDQN, DDQN_DEFAULT_CONFIG diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 6ad0901e..65a64f38 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -16,10 +16,10 @@ DDQN_DEFAULT_CONFIG = { "gradient_steps": 1, # gradient steps "batch_size": 64, # training batch size - + "discount_factor": 0.99, # discount factor (gamma) "polyak": 0.005, # soft update hyperparameter (tau) - + "learning_rate": 1e-3, # learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -36,7 +36,7 @@ "exploration": { "initial_epsilon": 1.0, # initial epsilon for epsilon-greedy exploration "final_epsilon": 0.05, # final epsilon for epsilon-greedy exploration - "timesteps": 1000, # timesteps for epsilon-greedy decay + "timesteps": 1000, # timesteps for epsilon-greedy decay }, "rewards_shaper": None, # rewards shaping function: Callable(reward, timestep, timesteps) -> reward @@ -53,21 +53,21 @@ class DDQN(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Double Deep Q-Network (DDQN) https://ojs.aaai.org/index.php/AAAI/article/view/10295 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -83,11 +83,11 @@ def __init__(self, """ _cfg = copy.deepcopy(DDQN_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -108,7 +108,7 @@ def __init__(self, # configuration self._gradient_steps = self.cfg["gradient_steps"] self._batch_size = self.cfg["batch_size"] - + self._discount_factor = self.cfg["discount_factor"] self._polyak = self.cfg["polyak"] @@ -116,7 +116,7 @@ def __init__(self, self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"] self._state_preprocessor = self.cfg["state_preprocessor"] - + self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] @@ -128,7 +128,7 @@ def __init__(self, self._exploration_timesteps = self.cfg["exploration"]["timesteps"] self._rewards_shaper = self.cfg["rewards_shaper"] - + # set up optimizer and learning rate scheduler if self.q_network is not None: self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=self._learning_rate) @@ -176,7 +176,7 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens if not self._exploration_timesteps: return torch.argmax(self.q_network.act(states, taken_actions=None, role="q_network")[0], dim=1, keepdim=True), None, None - + # sample random actions actions = self.q_network.random_act(states, taken_actions=None, role="q_network")[0] if timestep < self._random_timesteps: @@ -189,23 +189,23 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens indexes = (torch.rand(states.shape[0], device=self.device) >= epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.argmax(self.q_network.act(states[indexes], taken_actions=None, role="q_network")[0], dim=1, keepdim=True) - + # record epsilon self.track_data("Exploration / Exploration epsilon", epsilon) - + return actions, None, None - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -228,7 +228,7 @@ def record_transition(self, # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - + if self.memory is not None: self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: @@ -279,17 +279,17 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states, taken_actions=None, role="target_q_network") - + target_q_values = torch.gather(next_q_values, dim=1, index=torch.argmax(self.q_network.act(states=sampled_next_states, \ taken_actions=None, role="q_network")[0], dim=1, keepdim=True)) target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute Q-network loss - q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], + q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], dim=1, index=sampled_actions.long()) q_network_loss = F.mse_loss(q_values, target_values) - + # optimize Q-network self.optimizer.zero_grad() q_network_loss.backward() diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 0a18ece2..929aa024 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -16,10 +16,10 @@ DQN_DEFAULT_CONFIG = { "gradient_steps": 1, # gradient steps "batch_size": 64, # training batch size - + "discount_factor": 0.99, # discount factor (gamma) "polyak": 0.005, # soft update hyperparameter (tau) - + "learning_rate": 1e-3, # learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -36,7 +36,7 @@ "exploration": { "initial_epsilon": 1.0, # initial epsilon for epsilon-greedy exploration "final_epsilon": 0.05, # final epsilon for epsilon-greedy exploration - "timesteps": 1000, # timesteps for epsilon-greedy decay + "timesteps": 1000, # timesteps for epsilon-greedy decay }, "rewards_shaper": None, # rewards shaping function: Callable(reward, timestep, timesteps) -> reward @@ -53,21 +53,21 @@ class DQN(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Deep Q-Network (DQN) https://arxiv.org/abs/1312.5602 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -83,11 +83,11 @@ def __init__(self, """ _cfg = copy.deepcopy(DQN_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -97,7 +97,7 @@ def __init__(self, # checkpoint models self.checkpoint_modules["q_network"] = self.q_network self.checkpoint_modules["target_q_network"] = self.target_q_network - + if self.target_q_network is not None: # freeze target networks with respect to optimizers (update via .update_parameters()) self.target_q_network.freeze_parameters(True) @@ -108,13 +108,13 @@ def __init__(self, # configuration self._gradient_steps = self.cfg["gradient_steps"] self._batch_size = self.cfg["batch_size"] - + self._discount_factor = self.cfg["discount_factor"] self._polyak = self.cfg["polyak"] self._learning_rate = self.cfg["learning_rate"] self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"] - + self._state_preprocessor = self.cfg["state_preprocessor"] self._random_timesteps = self.cfg["random_timesteps"] @@ -128,7 +128,7 @@ def __init__(self, self._exploration_timesteps = self.cfg["exploration"]["timesteps"] self._rewards_shaper = self.cfg["rewards_shaper"] - + # set up optimizer and learning rate scheduler if self.q_network is not None: self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=self._learning_rate) @@ -148,7 +148,7 @@ def init(self) -> None: """Initialize the agent """ super().init() - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -189,23 +189,23 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens indexes = (torch.rand(states.shape[0], device=self.device) >= epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.argmax(self.q_network.act(states[indexes], taken_actions=None, role="q_network")[0], dim=1, keepdim=True) - + # record epsilon self.track_data("Exploration / Exploration epsilon", epsilon) - + return actions, None, None - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -228,7 +228,7 @@ def record_transition(self, # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - + if self.memory is not None: self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: @@ -279,16 +279,16 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states, taken_actions=None, role="target_q_network") - + target_q_values = torch.max(next_q_values, dim=-1, keepdim=True)[0] target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute Q-network loss - q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], + q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], dim=1, index=sampled_actions.long()) q_network_loss = F.mse_loss(q_values, target_values) - + # optimize Q-network self.optimizer.zero_grad() q_network_loss.backward() diff --git a/skrl/agents/torch/ppo/__init__.py b/skrl/agents/torch/ppo/__init__.py index e439116a..04f40be3 100644 --- a/skrl/agents/torch/ppo/__init__.py +++ b/skrl/agents/torch/ppo/__init__.py @@ -1 +1 @@ -from .ppo import PPO, PPO_DEFAULT_CONFIG \ No newline at end of file +from .ppo import PPO, PPO_DEFAULT_CONFIG diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 2fa6081d..f00c9a99 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -19,10 +19,10 @@ "rollouts": 16, # number of rollouts before updating "learning_epochs": 8, # number of learning epochs during each update "mini_batches": 2, # number of mini batches during each learning epoch - + "discount_factor": 0.99, # discount factor (gamma) "lambda": 0.95, # TD(lambda) coefficient (lam) for computing returns and advantages - + "learning_rate": 1e-3, # learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -59,21 +59,21 @@ class PPO(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Proximal Policy Optimization (PPO) https://arxiv.org/abs/1707.06347 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -89,11 +89,11 @@ def __init__(self, """ _cfg = copy.deepcopy(PPO_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -139,7 +139,7 @@ def __init__(self, if self.policy is self.value: self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._learning_rate) else: - self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), + self.optimizer = torch.optim.Adam(itertools.chain(self.policy.parameters(), self.value.parameters()), lr=self._learning_rate) if self._learning_rate_scheduler is not None: self.scheduler = self._learning_rate_scheduler(self.optimizer, **self.cfg["learning_rate_scheduler_kwargs"]) @@ -164,7 +164,7 @@ def init(self) -> None: """ super().init() self.set_mode("eval") - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -208,17 +208,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return actions, log_prob, actions_mean - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -249,10 +249,10 @@ def record_transition(self, values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, log_prob=self._current_log_prob, values=values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, log_prob=self._current_log_prob, values=values) def pre_interaction(self, timestep: int, timesteps: int) -> None: @@ -290,11 +290,11 @@ def _update(self, timestep: int, timesteps: int) -> None: :param timesteps: Number of timesteps :type timesteps: int """ - def compute_gae(rewards: torch.Tensor, - dones: torch.Tensor, - values: torch.Tensor, - next_values: torch.Tensor, - discount_factor: float = 0.99, + def compute_gae(rewards: torch.Tensor, + dones: torch.Tensor, + values: torch.Tensor, + next_values: torch.Tensor, + discount_factor: float = 0.99, lambda_coefficient: float = 0.95) -> torch.Tensor: """Compute the Generalized Advantage Estimator (GAE) @@ -330,7 +330,7 @@ def compute_gae(rewards: torch.Tensor, advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) return returns, advantages - + # compute returns and advantages with torch.no_grad(): last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value") @@ -364,7 +364,7 @@ def compute_gae(rewards: torch.Tensor, in sampled_batches: sampled_states = self._state_preprocessor(sampled_states, train=not epoch) - + _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy") # compute aproximate KL divergence @@ -382,20 +382,20 @@ def compute_gae(rewards: torch.Tensor, entropy_loss = -self._entropy_loss_scale * self.policy.get_entropy(role="policy").mean() else: entropy_loss = 0 - + # compute policy loss ratio = torch.exp(next_log_prob - sampled_log_prob) surrogate = sampled_advantages * ratio surrogate_clipped = sampled_advantages * torch.clip(ratio, 1.0 - self._ratio_clip, 1.0 + self._ratio_clip) - + policy_loss = -torch.min(surrogate, surrogate_clipped).mean() # compute value loss predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value") if self._clip_predicted_values: - predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, - min=-self._value_clip, + predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, + min=-self._value_clip, max=self._value_clip) value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values) @@ -414,10 +414,10 @@ def compute_gae(rewards: torch.Tensor, cumulative_value_loss += value_loss.item() if self._entropy_loss_scale: cumulative_entropy_loss += entropy_loss.item() - + # update learning rate if self._learning_rate_scheduler: - if isinstance(self.scheduler, KLAdaptiveRL): + if isinstance(self.scheduler, KLAdaptiveRL): self.scheduler.step(torch.tensor(kl_divergences).mean()) else: self.scheduler.step() diff --git a/skrl/agents/torch/q_learning/__init__.py b/skrl/agents/torch/q_learning/__init__.py index dd2a6413..85ba23ea 100644 --- a/skrl/agents/torch/q_learning/__init__.py +++ b/skrl/agents/torch/q_learning/__init__.py @@ -1 +1 @@ -from .q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG \ No newline at end of file +from .q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index f13a0c4a..facb094f 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -33,21 +33,21 @@ class Q_LEARNING(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Q-learning https://www.academia.edu/3294050/Learning_from_delayed_rewards - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -63,11 +63,11 @@ def __init__(self, """ _cfg = copy.deepcopy(Q_LEARNING_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -75,10 +75,10 @@ def __init__(self, # checkpoint models self.checkpoint_modules["policy"] = self.policy - + # configuration self._discount_factor = self.cfg["discount_factor"] - + self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] @@ -118,17 +118,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample actions from policy return self.policy.act(states, taken_actions=None, role="policy") - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -206,4 +206,3 @@ def _update(self, timestep: int, timesteps: int) -> None: * (self._current_rewards + self._discount_factor * self._current_dones.logical_not() \ * q_table[env_ids, self._current_next_states, next_actions] \ - q_table[env_ids, self._current_states, self._current_actions]) - \ No newline at end of file diff --git a/skrl/agents/torch/sac/__init__.py b/skrl/agents/torch/sac/__init__.py index 704d9b8d..1859419e 100644 --- a/skrl/agents/torch/sac/__init__.py +++ b/skrl/agents/torch/sac/__init__.py @@ -1 +1 @@ -from .sac import SAC, SAC_DEFAULT_CONFIG \ No newline at end of file +from .sac import SAC, SAC_DEFAULT_CONFIG diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 26145f59..d76ceb97 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -17,10 +17,10 @@ SAC_DEFAULT_CONFIG = { "gradient_steps": 1, # gradient steps "batch_size": 64, # training batch size - + "discount_factor": 0.99, # discount factor (gamma) "polyak": 0.005, # soft update hyperparameter (tau) - + "actor_learning_rate": 1e-3, # actor learning rate "critic_learning_rate": 1e-3, # critic learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) @@ -51,21 +51,21 @@ class SAC(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Soft Actor-Critic (SAC) https://arxiv.org/abs/1801.01290 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -81,11 +81,11 @@ def __init__(self, """ _cfg = copy.deepcopy(SAC_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -117,11 +117,11 @@ def __init__(self, self._discount_factor = self.cfg["discount_factor"] self._polyak = self.cfg["polyak"] - + self._actor_learning_rate = self.cfg["actor_learning_rate"] self._critic_learning_rate = self.cfg["critic_learning_rate"] self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"] - + self._state_preprocessor = self.cfg["state_preprocessor"] self._random_timesteps = self.cfg["random_timesteps"] @@ -138,7 +138,7 @@ def __init__(self, self._target_entropy = self.cfg["target_entropy"] if self._target_entropy is None: self._target_entropy = -np.prod(self.action_space.shape).astype(np.float32) - + self.log_entropy_coefficient = torch.log(torch.ones(1, device=self.device) * self._entropy_coefficient).requires_grad_(True) self.entropy_optimizer = torch.optim.Adam([self.log_entropy_coefficient], lr=self._entropy_learning_rate) @@ -147,7 +147,7 @@ def __init__(self, # set up optimizers and learning rate schedulers if self.policy is not None and self.critic_1 is not None and self.critic_2 is not None: self.policy_optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._actor_learning_rate) - self.critic_optimizer = torch.optim.Adam(itertools.chain(self.critic_1.parameters(), self.critic_2.parameters()), + self.critic_optimizer = torch.optim.Adam(itertools.chain(self.critic_1.parameters(), self.critic_2.parameters()), lr=self._critic_learning_rate) if self._learning_rate_scheduler is not None: self.policy_scheduler = self._learning_rate_scheduler(self.policy_optimizer, **self.cfg["learning_rate_scheduler_kwargs"]) @@ -201,17 +201,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample stochastic actions return self.policy.act(states, taken_actions=None, role="policy") - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -234,7 +234,7 @@ def record_transition(self, # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - + if self.memory is not None: self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: @@ -260,10 +260,10 @@ def post_interaction(self, timestep: int, timesteps: int) -> None: """ if timestep >= self._learning_starts: self._update(timestep, timesteps) - + # write tracking data and checkpoints super().post_interaction(timestep, timesteps) - + def _update(self, timestep: int, timesteps: int) -> None: """Algorithm's main update step @@ -278,7 +278,7 @@ def _update(self, timestep: int, timesteps: int) -> None: # gradient steps for gradient_step in range(self._gradient_steps): - + sampled_states = self._state_preprocessor(sampled_states, train=not gradient_step) sampled_next_states = self._state_preprocessor(sampled_next_states) @@ -294,9 +294,9 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute critic loss critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions, role="critic_1") critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions, role="critic_2") - + critic_loss = (F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values)) / 2 - + # optimization step (critic) self.critic_optimizer.zero_grad() critic_loss.backward() @@ -348,7 +348,7 @@ def _update(self, timestep: int, timesteps: int) -> None: self.track_data("Q-network / Q2 (max)", torch.max(critic_2_values).item()) self.track_data("Q-network / Q2 (min)", torch.min(critic_2_values).item()) self.track_data("Q-network / Q2 (mean)", torch.mean(critic_2_values).item()) - + self.track_data("Target / Target (max)", torch.max(target_values).item()) self.track_data("Target / Target (min)", torch.min(target_values).item()) self.track_data("Target / Target (mean)", torch.mean(target_values).item()) diff --git a/skrl/agents/torch/sarsa/__init__.py b/skrl/agents/torch/sarsa/__init__.py index c7c0a6e5..56a71e30 100644 --- a/skrl/agents/torch/sarsa/__init__.py +++ b/skrl/agents/torch/sarsa/__init__.py @@ -1 +1 @@ -from .sarsa import SARSA, SARSA_DEFAULT_CONFIG \ No newline at end of file +from .sarsa import SARSA, SARSA_DEFAULT_CONFIG diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index fdad6030..7d68793b 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -33,21 +33,21 @@ class SARSA(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """State Action Reward State Action (SARSA) https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.2539 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -63,11 +63,11 @@ def __init__(self, """ _cfg = copy.deepcopy(SARSA_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -75,10 +75,10 @@ def __init__(self, # checkpoint models self.checkpoint_modules["policy"] = self.policy - + # configuration self._discount_factor = self.cfg["discount_factor"] - + self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] @@ -118,17 +118,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample actions from policy return self.policy.act(states, taken_actions=None, role="policy") - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -197,7 +197,7 @@ def _update(self, timestep: int, timesteps: int) -> None: """ q_table = self.policy.table() env_ids = torch.arange(self._current_rewards.shape[0]).view(-1, 1) - + # compute next actions next_actions = self.policy.act(self._current_next_states, taken_actions=None, role="policy")[0] @@ -206,4 +206,3 @@ def _update(self, timestep: int, timesteps: int) -> None: * (self._current_rewards + self._discount_factor * self._current_dones.logical_not() \ * q_table[env_ids, self._current_next_states, next_actions] \ - q_table[env_ids, self._current_states, self._current_actions]) - \ No newline at end of file diff --git a/skrl/agents/torch/td3/__init__.py b/skrl/agents/torch/td3/__init__.py index 85f09b4f..7b5409a6 100644 --- a/skrl/agents/torch/td3/__init__.py +++ b/skrl/agents/torch/td3/__init__.py @@ -1 +1 @@ -from .td3 import TD3, TD3_DEFAULT_CONFIG \ No newline at end of file +from .td3 import TD3, TD3_DEFAULT_CONFIG diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index ae62cbf9..bc7a5f22 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -16,10 +16,10 @@ TD3_DEFAULT_CONFIG = { "gradient_steps": 1, # gradient steps "batch_size": 64, # training batch size - + "discount_factor": 0.99, # discount factor (gamma) "polyak": 0.005, # soft update hyperparameter (tau) - + "actor_learning_rate": 1e-3, # actor learning rate "critic_learning_rate": 1e-3, # critic learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) @@ -56,21 +56,21 @@ class TD3(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Twin Delayed DDPG (TD3) https://arxiv.org/abs/1802.09477 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -86,11 +86,11 @@ def __init__(self, """ _cfg = copy.deepcopy(TD3_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -100,7 +100,7 @@ def __init__(self, self.critic_2 = self.models.get("critic_2", None) self.target_critic_1 = self.models.get("target_critic_1", None) self.target_critic_2 = self.models.get("target_critic_2", None) - + # checkpoint models self.checkpoint_modules["policy"] = self.policy self.checkpoint_modules["target_policy"] = self.target_policy @@ -126,13 +126,13 @@ def __init__(self, self._discount_factor = self.cfg["discount_factor"] self._polyak = self.cfg["polyak"] - + self._actor_learning_rate = self.cfg["actor_learning_rate"] self._critic_learning_rate = self.cfg["critic_learning_rate"] self._learning_rate_scheduler = self.cfg["learning_rate_scheduler"] self._state_preprocessor = self.cfg["state_preprocessor"] - + self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] @@ -152,7 +152,7 @@ def __init__(self, # set up optimizers and learning rate schedulers if self.policy is not None and self.critic_1 is not None and self.critic_2 is not None: self.policy_optimizer = torch.optim.Adam(self.policy.parameters(), lr=self._actor_learning_rate) - self.critic_optimizer = torch.optim.Adam(itertools.chain(self.critic_1.parameters(), self.critic_2.parameters()), + self.critic_optimizer = torch.optim.Adam(itertools.chain(self.critic_1.parameters(), self.critic_2.parameters()), lr=self._critic_learning_rate) if self._learning_rate_scheduler is not None: self.policy_scheduler = self._learning_rate_scheduler(self.policy_optimizer, **self.cfg["learning_rate_scheduler_kwargs"]) @@ -211,17 +211,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample deterministic actions actions = self.policy.act(states, taken_actions=None, role="policy") - + # add noise if self._exploration_noise is not None: # sample noises noises = self._exploration_noise.sample(actions[0].shape) - + # define exploration timesteps scale = self._exploration_final_scale if self._exploration_timesteps is None: self._exploration_timesteps = timesteps - + # apply exploration noise if timestep <= self._exploration_timesteps: scale = (1 - timestep / self._exploration_timesteps) \ @@ -233,8 +233,8 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens actions[0].add_(noises) if self._backward_compatibility: - actions = (torch.max(torch.min(actions[0], self.clip_actions_max), self.clip_actions_min), - actions[1], + actions = (torch.max(torch.min(actions[0], self.clip_actions_max), self.clip_actions_min), + actions[1], actions[2]) else: actions[0].clamp_(min=self.clip_actions_min, max=self.clip_actions_max) @@ -243,7 +243,7 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens self.track_data("Exploration / Exploration noise (max)", torch.max(noises).item()) self.track_data("Exploration / Exploration noise (min)", torch.min(noises).item()) self.track_data("Exploration / Exploration noise (mean)", torch.mean(noises).item()) - + else: # record noises self.track_data("Exploration / Exploration noise (max)", 0) @@ -252,17 +252,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return actions - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -285,7 +285,7 @@ def record_transition(self, # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - + if self.memory is not None: self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: @@ -311,10 +311,10 @@ def post_interaction(self, timestep: int, timesteps: int) -> None: """ if timestep >= self._learning_starts: self._update(timestep, timesteps) - + # write tracking data and checkpoints super().post_interaction(timestep, timesteps) - + def _update(self, timestep: int, timesteps: int) -> None: """Algorithm's main update step @@ -332,12 +332,12 @@ def _update(self, timestep: int, timesteps: int) -> None: sampled_states = self._state_preprocessor(sampled_states, train=not gradient_step) sampled_next_states = self._state_preprocessor(sampled_next_states) - + with torch.no_grad(): # target policy smoothing next_actions, _, _ = self.target_policy.act(states=sampled_next_states, taken_actions=None, role="target_policy") - noises = torch.clamp(self._smooth_regularization_noise.sample(next_actions.shape), - min=-self._smooth_regularization_clip, + noises = torch.clamp(self._smooth_regularization_noise.sample(next_actions.shape), + min=-self._smooth_regularization_clip, max=self._smooth_regularization_clip) next_actions.add_(noises) @@ -355,9 +355,9 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute critic loss critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions, role="critic_1") critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions, role="critic_2") - + critic_loss = F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values) - + # optimization step (critic) self.critic_optimizer.zero_grad() critic_loss.backward() @@ -400,7 +400,7 @@ def _update(self, timestep: int, timesteps: int) -> None: self.track_data("Q-network / Q2 (max)", torch.max(critic_2_values).item()) self.track_data("Q-network / Q2 (min)", torch.min(critic_2_values).item()) self.track_data("Q-network / Q2 (mean)", torch.mean(critic_2_values).item()) - + self.track_data("Target / Target (max)", torch.max(target_values).item()) self.track_data("Target / Target (min)", torch.min(target_values).item()) self.track_data("Target / Target (mean)", torch.mean(target_values).item()) diff --git a/skrl/agents/torch/trpo/__init__.py b/skrl/agents/torch/trpo/__init__.py index 9fcdb9e3..152ee2a0 100644 --- a/skrl/agents/torch/trpo/__init__.py +++ b/skrl/agents/torch/trpo/__init__.py @@ -1 +1 @@ -from .trpo import TRPO, TRPO_DEFAULT_CONFIG \ No newline at end of file +from .trpo import TRPO, TRPO_DEFAULT_CONFIG diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index fa9d65d0..e0259a6b 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -19,10 +19,10 @@ "rollouts": 16, # number of rollouts before updating "learning_epochs": 8, # number of learning epochs during each update "mini_batches": 2, # number of mini batches during each learning epoch - + "discount_factor": 0.99, # discount factor (gamma) "lambda": 0.99, # TD(lambda) coefficient (lam) for computing returns and advantages - + "value_learning_rate": 1e-3, # value learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) "learning_rate_scheduler_kwargs": {}, # learning rate scheduler's kwargs (e.g. {"step_size": 1e-3}) @@ -59,21 +59,21 @@ class TRPO(Agent): - def __init__(self, - models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", + def __init__(self, + models: Dict[str, Model], + memory: Union[Memory, Tuple[Memory], None] = None, + observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", cfg: dict = {}) -> None: """Trust Region Policy Optimization (TRPO) https://arxiv.org/abs/1502.05477 - + :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model :param memory: Memory to storage the transitions. - If it is a tuple, the first element will be used for training and + If it is a tuple, the first element will be used for training and for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) @@ -89,11 +89,11 @@ def __init__(self, """ _cfg = copy.deepcopy(TRPO_DEFAULT_CONFIG) _cfg.update(cfg) - super().__init__(models=models, - memory=memory, - observation_space=observation_space, - action_space=action_space, - device=device, + super().__init__(models=models, + memory=memory, + observation_space=observation_space, + action_space=action_space, + device=device, cfg=_cfg) # models @@ -161,7 +161,7 @@ def init(self) -> None: """Initialize the agent """ super().init() - + # create tensors in memory if self.memory is not None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) @@ -205,17 +205,17 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return actions, log_prob, actions_mean - def record_transition(self, - states: torch.Tensor, - actions: torch.Tensor, - rewards: torch.Tensor, - next_states: torch.Tensor, - dones: torch.Tensor, - infos: Any, - timestep: int, + def record_transition(self, + states: torch.Tensor, + actions: torch.Tensor, + rewards: torch.Tensor, + next_states: torch.Tensor, + dones: torch.Tensor, + infos: Any, + timestep: int, timesteps: int) -> None: """Record an environment transition in memory - + :param states: Observations/states of the environment used to make the decision :type states: torch.Tensor :param actions: Actions taken by the agent @@ -246,10 +246,10 @@ def record_transition(self, values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, log_prob=self._current_log_prob, values=values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, log_prob=self._current_log_prob, values=values) def pre_interaction(self, timestep: int, timesteps: int) -> None: @@ -285,11 +285,11 @@ def _update(self, timestep: int, timesteps: int) -> None: :param timesteps: Number of timesteps :type timesteps: int """ - def compute_gae(rewards: torch.Tensor, - dones: torch.Tensor, - values: torch.Tensor, - next_values: torch.Tensor, - discount_factor: float = 0.99, + def compute_gae(rewards: torch.Tensor, + dones: torch.Tensor, + values: torch.Tensor, + next_values: torch.Tensor, + discount_factor: float = 0.99, lambda_coefficient: float = 0.95) -> torch.Tensor: """Compute the Generalized Advantage Estimator (GAE) @@ -326,10 +326,10 @@ def compute_gae(rewards: torch.Tensor, return returns, advantages - def surrogate_loss(policy: Model, - states: torch.Tensor, - actions: torch.Tensor, - log_prob: torch.Tensor, + def surrogate_loss(policy: Model, + states: torch.Tensor, + actions: torch.Tensor, + log_prob: torch.Tensor, advantages: torch.Tensor) -> torch.Tensor: """Compute the surrogate objective (policy loss) @@ -350,10 +350,10 @@ def surrogate_loss(policy: Model, _, new_log_prob, _ = policy.act(states, taken_actions=actions, role="policy") return (advantages * torch.exp(new_log_prob - log_prob.detach())).mean() - def conjugate_gradient(policy: Model, - states: torch.Tensor, - b: torch.Tensor, - num_iterations: float = 10, + def conjugate_gradient(policy: Model, + states: torch.Tensor, + b: torch.Tensor, + num_iterations: float = 10, residual_tolerance: float = 1e-10) -> torch.Tensor: """Conjugate gradient algorithm to solve Ax = b using the iterative method @@ -363,7 +363,7 @@ def conjugate_gradient(policy: Model, :type policy: Model :param states: States :type states: torch.Tensor - :param b: Vector b + :param b: Vector b :type b: torch.Tensor :param num_iterations: Number of iterations (default: 10) :type num_iterations: float, optional @@ -389,12 +389,12 @@ def conjugate_gradient(policy: Model, rr_old = rr_new return x - def fisher_vector_product(policy: Model, - states: torch.Tensor, - vector: torch.Tensor, + def fisher_vector_product(policy: Model, + states: torch.Tensor, + vector: torch.Tensor, damping: float = 0.1) -> torch.Tensor: """Compute the Fisher vector product (direct method) - + https://www.telesens.co/2018/06/09/efficiently-computing-the-fisher-vector-product-in-trpo/ :param policy: Policy @@ -437,7 +437,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor _, _, mu_2 = policy_2.act(states, taken_actions=None, role="policy") logstd_2 = policy_2.get_log_std(role="policy") - + kl = logstd_1 - logstd_2 + 0.5 * (torch.square(logstd_1.exp()) + torch.square(mu_1 - mu_2)) \ / torch.square(logstd_2.exp()) - 0.5 return torch.sum(kl, dim=-1).mean() @@ -446,7 +446,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor with torch.no_grad(): last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value") last_values = self._value_preprocessor(last_values, inverse=True) - + values = self.memory.get_tensor_by_name("values") returns, advantages = compute_gae(rewards=self.memory.get_tensor_by_name("rewards"), dones=self.memory.get_tensor_by_name("dones"), @@ -467,7 +467,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor # learning epochs for epoch in range(self._learning_epochs): - + # mini-batches loop for sampled_states, sampled_actions, sampled_log_prob, sampled_returns, sampled_advantages in sampled_batches: @@ -479,7 +479,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor flat_policy_loss_gradient = torch.cat([gradient.view(-1) for gradient in policy_loss_gradient]) # compute the search direction using the conjugate gradient algorithm - search_direction = conjugate_gradient(self.policy, sampled_states, flat_policy_loss_gradient.data, + search_direction = conjugate_gradient(self.policy, sampled_states, flat_policy_loss_gradient.data, num_iterations=self._conjugate_gradient_steps) # compute step size and full step @@ -533,7 +533,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor # record data self.track_data("Loss / Policy loss", cumulative_policy_loss / (self._learning_epochs * self._mini_batches)) self.track_data("Loss / Value loss", cumulative_value_loss / (self._learning_epochs * self._mini_batches)) - + self.track_data("Policy / Standard deviation", self.policy.distribution(role="policy").stddev.mean().item()) if self._learning_rate_scheduler: diff --git a/skrl/envs/torch/loaders.py b/skrl/envs/torch/loaders.py index 234a9826..80eff009 100644 --- a/skrl/envs/torch/loaders.py +++ b/skrl/envs/torch/loaders.py @@ -3,9 +3,9 @@ import queue from contextlib import contextmanager -__all__ = ["load_isaacgym_env_preview2", - "load_isaacgym_env_preview3", - "load_isaacgym_env_preview4", +__all__ = ["load_isaacgym_env_preview2", + "load_isaacgym_env_preview3", + "load_isaacgym_env_preview4", "load_omniverse_isaacgym_env"] @@ -27,7 +27,7 @@ def cwd(new_path: str) -> None: def _omegaconf_to_dict(config) -> dict: """Convert OmegaConf config to dict - + :param config: The OmegaConf config :type config: OmegaConf.Config @@ -69,8 +69,8 @@ def load_isaacgym_env_preview2(task_name: str = "", isaacgymenvs_path: str = "", :type isaacgymenvs_path: str, optional :param show_cfg: Whether to print the configuration (default: True) :type show_cfg: bool, optional - - :raises ValueError: The task name has not been defined, + + :raises ValueError: The task name has not been defined, neither by the function parameter nor by the command line arguments :raises RuntimeError: The isaacgym package is not installed or the path is wrong @@ -99,7 +99,7 @@ def load_isaacgym_env_preview2(task_name: str = "", isaacgymenvs_path: str = "", sys.argv.append(task_name) else: raise ValueError("No task name defined. Set the task_name parameter or use --task as command line argument") - + # get isaacgym envs path from isaacgym package metadata if not isaacgymenvs_path: if not hasattr(isaacgym, "__path__"): @@ -129,7 +129,7 @@ def load_isaacgym_env_preview2(task_name: str = "", isaacgymenvs_path: str = "", if show_cfg: print("\nIsaac Gym environment ({})".format(args.task)) _print_cfg(vars(args)) - + # update task arguments args.cfg_train = os.path.join(path, args.cfg_train) args.cfg_env = os.path.join(path, args.cfg_env) @@ -139,12 +139,12 @@ def load_isaacgym_env_preview2(task_name: str = "", isaacgymenvs_path: str = "", cfg, cfg_train, _ = load_cfg(args) sim_params = parse_sim_params(args, cfg, cfg_train) task, env = parse_task(args, cfg, cfg_train, sim_params) - + return env def load_isaacgym_env_preview3(task_name: str = "", isaacgymenvs_path: str = "", show_cfg: bool = True): - """Load an Isaac Gym environment (preview 3) - + """Load an Isaac Gym environment (preview 3) + Isaac Gym benchmark environments: https://github.com/NVIDIA-Omniverse/IsaacGymEnvs :param task_name: The name of the task (default: ""). @@ -156,7 +156,7 @@ def load_isaacgym_env_preview3(task_name: str = "", isaacgymenvs_path: str = "", :type isaacgymenvs_path: str, optional :param show_cfg: Whether to print the configuration (default: True) :type show_cfg: bool, optional - + :raises ValueError: The task name has not been defined, neither by the function parameter nor by the command line arguments :raises RuntimeError: The isaacgymenvs package is not installed or the path is wrong @@ -171,7 +171,7 @@ def load_isaacgym_env_preview3(task_name: str = "", isaacgymenvs_path: str = "", import isaacgym import isaacgymenvs - + # check task from command line arguments defined = False for arg in sys.argv: @@ -233,12 +233,12 @@ def load_isaacgym_env_preview3(task_name: str = "", isaacgymenvs_path: str = "", sys.path.append(isaacgymenvs_path) from tasks import isaacgym_task_map try: - env = isaacgym_task_map[config.task.name](cfg=cfg, + env = isaacgym_task_map[config.task.name](cfg=cfg, sim_device=config.sim_device, graphics_device_id=config.graphics_device_id, headless=config.headless) except TypeError as e: - env = isaacgym_task_map[config.task.name](cfg=cfg, + env = isaacgym_task_map[config.task.name](cfg=cfg, rl_device=config.rl_device, sim_device=config.sim_device, graphics_device_id=config.graphics_device_id, @@ -249,8 +249,8 @@ def load_isaacgym_env_preview3(task_name: str = "", isaacgymenvs_path: str = "", return env def load_isaacgym_env_preview4(task_name: str = "", isaacgymenvs_path: str = "", show_cfg: bool = True): - """Load an Isaac Gym environment (preview 4) - + """Load an Isaac Gym environment (preview 4) + Isaac Gym benchmark environments: https://github.com/NVIDIA-Omniverse/IsaacGymEnvs :param task_name: The name of the task (default: ""). @@ -262,7 +262,7 @@ def load_isaacgym_env_preview4(task_name: str = "", isaacgymenvs_path: str = "", :type isaacgymenvs_path: str, optional :param show_cfg: Whether to print the configuration (default: True) :type show_cfg: bool, optional - + :raises ValueError: The task name has not been defined, neither by the function parameter nor by the command line arguments :raises RuntimeError: The isaacgymenvs package is not installed or the path is wrong @@ -271,10 +271,10 @@ def load_isaacgym_env_preview4(task_name: str = "", isaacgymenvs_path: str = "", """ return load_isaacgym_env_preview3(task_name, isaacgymenvs_path, show_cfg) -def load_omniverse_isaacgym_env(task_name: str = "", - omniisaacgymenvs_path: str = "", - show_cfg: bool = True, - multi_threaded: bool = False, +def load_omniverse_isaacgym_env(task_name: str = "", + omniisaacgymenvs_path: str = "", + show_cfg: bool = True, + multi_threaded: bool = False, timeout: int = 30): """Load an Omniverse Isaac Gym environment @@ -293,7 +293,7 @@ def load_omniverse_isaacgym_env(task_name: str = "", :type multi_threaded: bool, optional :param timeout: Seconds to wait for data when queue is empty in multi-threaded environment (default: 30) :type timeout: int, optional - + :raises ValueError: The task name has not been defined, neither by the function parameter nor by the command line arguments :raises RuntimeError: The omniisaacgymenvs package is not installed or the path is wrong @@ -306,12 +306,12 @@ def load_omniverse_isaacgym_env(task_name: str = "", from hydra._internal.utils import create_automatic_config_search_path, get_args_parser from omegaconf import OmegaConf - + from omni.isaac.gym.vec_env import VecEnvBase, VecEnvMT, TaskStopException from omni.isaac.gym.vec_env.vec_env_mt import TrainerMT import omniisaacgymenvs - + # check task from command line arguments defined = False for arg in sys.argv: @@ -350,7 +350,7 @@ def load_omniverse_isaacgym_env(task_name: str = "", hydra_object = Hydra.create_main_hydra2(task_name='load_omniisaacgymenv', config_search_path=search_path) config = hydra_object.compose_config(config_file, args.overrides, run_mode=RunMode.RUN) - cfg = {} + cfg = {} cfg["task"] = _omegaconf_to_dict(config.task) cfg["task_name"] = config.task_name cfg["experiment"] = config.experiment @@ -374,7 +374,7 @@ def load_omniverse_isaacgym_env(task_name: str = "", print("\nOmniverse Isaac Gym environment ({})".format(config.task.name)) _print_cfg(cfg) - # internal classes + # internal classes class _OmniIsaacGymVecEnv(VecEnvBase): def step(self, actions): actions = torch.clamp(actions, -self._task.clip_actions, self._task.clip_actions).to(self._task.device).clone() @@ -404,7 +404,7 @@ def stop(self): class _OmniIsaacGymVecEnvMT(VecEnvMT): def __init__(self, headless): super().__init__(headless) - + self.action_queue = queue.Queue(1) self.data_queue = queue.Queue(1) diff --git a/skrl/memories/torch/__init__.py b/skrl/memories/torch/__init__.py index cad85aaf..184c1a29 100644 --- a/skrl/memories/torch/__init__.py +++ b/skrl/memories/torch/__init__.py @@ -1,4 +1,4 @@ from .base import Memory from .random import RandomMemory -from .prioritized import PrioritizedMemory \ No newline at end of file +from .prioritized import PrioritizedMemory diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index aaa18ab5..70f37bd0 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -13,18 +13,18 @@ class Memory: - def __init__(self, - memory_size: int, - num_envs: int = 1, - device: Union[str, torch.device] = "cuda:0", - export: bool = False, - export_format: str = "pt", + def __init__(self, + memory_size: int, + num_envs: int = 1, + device: Union[str, torch.device] = "cuda:0", + export: bool = False, + export_format: str = "pt", export_directory: str = "") -> None: """Base class representing a memory with circular buffers Buffers are torch tensors with shape (memory size, number of environments, data size). Circular buffers are implemented with two integers: a memory index and an environment index - + :param memory_size: Maximum number of elements in the first dimension of each internal storage :type memory_size: int :param num_envs: Number of parallel environments (default: 1) @@ -65,7 +65,7 @@ def __init__(self, def __len__(self) -> int: """Compute and return the current (valid) size of the memory - + The valid size is calculated as the ``memory_size * num_envs`` if the memory is full (filled). Otherwise, the ``memory_index * num_envs + env_index`` is returned @@ -73,7 +73,7 @@ def __len__(self) -> int: :rtype: int """ return self.memory_size * self.num_envs if self.filled else self.memory_index * self.num_envs + self.env_index - + def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int: """Get the size (number of elements) of a space @@ -144,7 +144,7 @@ def set_tensor_by_name(self, name: str, tensor: torch.Tensor) -> None: def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtype: Union[torch.dtype, None] = None) -> bool: """Create a new internal tensor in memory - + The tensor will have a 3-components shape (memory size, number of environments, size). The internal representation will use _tensor_ as the name of the class property @@ -156,7 +156,7 @@ def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtyp :param dtype: Data type (torch.dtype). If None, the global default torch data type will be used (default) :type dtype: torch.dtype or None, optional - + :raises ValueError: The tensor name exists already but the size or dtype are different :return: True if the tensor was created, otherwise False @@ -210,7 +210,7 @@ def add_samples(self, **tensors: torch.Tensor) -> None: - number of environments less than num_envs: Store the samples and increment the environment index (second index) by the number of the environments - + - number of environments equals num_envs: Store the samples and increment the memory index (first index) by one @@ -282,9 +282,9 @@ def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> L :type batch_size: int :param mini_batches: Number of mini-batches to sample (default: 1) :type mini_batches: int, optional - + :raises NotImplementedError: The method has not been implemented - + :return: Sampled data from tensors sorted according to their position in the list of names. The sampled tensors will have the following shape: (batch size, data size) :rtype: list of torch.Tensor list @@ -312,7 +312,7 @@ def sample_by_index(self, names: Tuple[str], indexes: Union[tuple, np.ndarray, t def sample_all(self, names: Tuple[str], mini_batches: int = 1) -> List[List[torch.Tensor]]: """Sample all data from memory - + :param names: Tensors names from which to obtain the samples :type names: tuple or list of strings :param mini_batches: Number of mini-batches to sample (default: 1) @@ -327,12 +327,12 @@ def sample_all(self, names: Tuple[str], mini_batches: int = 1) -> List[List[torc batches = BatchSampler(indexes, batch_size=len(indexes) // mini_batches, drop_last=True) return [[self.tensors_view[name][batch] for name in names] for batch in batches] return [[self.tensors_view[name] for name in names]] - + def save(self, directory: str = "", format: str = "pt") -> None: """Save the memory to a file Supported formats: - + - PyTorch (pt) - NumPy (npz) - Comma-separated values (csv) @@ -350,7 +350,7 @@ def save(self, directory: str = "", format: str = "pt") -> None: os.makedirs(os.path.join(directory, "memories"), exist_ok=True) memory_path = os.path.join(directory, "memories", \ "{}_memory_{}.{}".format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"), hex(id(self)), format)) - + # torch if format == "pt": torch.save({name: self.tensors[name] for name in self.get_tensor_names()}, memory_path) @@ -391,18 +391,18 @@ def load(self, path: str) -> None: data = torch.load(path) for name in self.get_tensor_names(): setattr(self, "_tensor_{}".format(name), data[name]) - + # numpy elif path.endswith(".npz"): data = np.load(path) for name in data: setattr(self, "_tensor_{}".format(name), torch.tensor(data[name])) - + # comma-separated values elif path.endswith(".csv"): # TODO: load the memory from a csv pass - + # unsupported format else: raise ValueError("Unsupported format: {}".format(path)) diff --git a/skrl/memories/torch/prioritized.py b/skrl/memories/torch/prioritized.py index 926ab084..4a78062a 100644 --- a/skrl/memories/torch/prioritized.py +++ b/skrl/memories/torch/prioritized.py @@ -21,7 +21,7 @@ def __init__(self, memory_size: int, num_envs: int = 1, device: Union[str, torch :type device: str or torch.device, optional :param preallocate: If true, preallocate memory for efficient use (default: True) :type preallocate: bool, optional - :param replacement: Flag to indicate whether the sample is with or without replacement (default: True). + :param replacement: Flag to indicate whether the sample is with or without replacement (default: True). Replacement implies that a value can be selected multiple times (the batch size is always guaranteed). Sampling without replacement will return a batch of maximum memory size if the memory size is less than the requested batch size :type replacement: bool, optional @@ -49,8 +49,8 @@ def sample(self, batch_size: int, names: Tuple[str]) -> Tuple[torch.Tensor]: :return: Sampled data from tensors sorted according to their position in the list of names. The sampled tensors will have the following shape: (batch size, data size) :rtype: tuple of torch.Tensor - """ + """ # generate random indexes indexes = np.random.choice(len(self), size=batch_size, replace=True) - + return self.sample_by_index(indexes=indexes, names=names) diff --git a/skrl/memories/torch/random.py b/skrl/memories/torch/random.py index 827d3b04..e8d3f259 100644 --- a/skrl/memories/torch/random.py +++ b/skrl/memories/torch/random.py @@ -6,13 +6,13 @@ class RandomMemory(Memory): - def __init__(self, - memory_size: int, - num_envs: int = 1, - device: Union[str, torch.device] = "cuda:0", - export: bool = False, - export_format: str = "pt", - export_directory: str = "", + def __init__(self, + memory_size: int, + num_envs: int = 1, + device: Union[str, torch.device] = "cuda:0", + export: bool = False, + export_format: str = "pt", + export_directory: str = "", replacement=True) -> None: """Random sampling memory @@ -33,7 +33,7 @@ def __init__(self, :param export_directory: Directory where the memory will be exported (default: ""). If empty, the agent's experiment directory will be used :type export_directory: str, optional - :param replacement: Flag to indicate whether the sample is with or without replacement (default: True). + :param replacement: Flag to indicate whether the sample is with or without replacement (default: True). Replacement implies that a value can be selected multiple times (the batch size is always guaranteed). Sampling without replacement will return a batch of maximum memory size if the memory size is less than the requested batch size :type replacement: bool, optional @@ -62,7 +62,7 @@ def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> L if self._replacement: indexes = torch.randint(0, len(self), (batch_size,), device=self.device) else: - # details about the random sampling performance can be found here: + # details about the random sampling performance can be found here: # https://discuss.pytorch.org/t/torch-equivalent-of-numpy-random-choice/16146/19 indexes = torch.randperm(len(self), dtype=torch.long, device=self.device)[:batch_size] diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index 06cfadf6..32c3780e 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -10,9 +10,9 @@ class Model(torch.nn.Module): - def __init__(self, - observation_space: Union[int, Sequence[int], gym.Space], - action_space: Union[int, Sequence[int], gym.Space], + def __init__(self, + observation_space: Union[int, Sequence[int], gym.Space], + action_space: Union[int, Sequence[int], gym.Space], device: Union[str, torch.device] = "cuda:0") -> None: """Base class representing a function approximator @@ -23,7 +23,7 @@ def __init__(self, - ``action_space`` (int, sequence of int, gym.Space): Action space - ``num_observations`` (int): Number of elements in the observation/state space - ``num_actions`` (int): Number of elements in the action space - + :param observation_space: Observation/state space or shape. The ``num_observations`` property will contain the size of that space :type observation_space: int, sequence of int, gym.Space @@ -61,14 +61,14 @@ def act(self, states, taken_actions=None, role=""): self._random_distribution = None - def _get_space_size(self, + def _get_space_size(self, space: Union[int, Sequence[int], gym.Space], number_of_elements: bool = True) -> int: """Get the size (number of elements) of a space :param space: Space or shape from which to obtain the number of elements :type space: int, sequence of int, or gym.Space - :param number_of_elements: Whether the number of elements occupied by the space is returned (default: ``True``). + :param number_of_elements: Whether the number of elements occupied by the space is returned (default: ``True``). If ``False``, the shape of the space is returned. It only affects Discrete spaces :type number_of_elements: bool, optional @@ -100,7 +100,7 @@ def _get_space_size(self, 1 # Dict space - >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), + >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), ... 'b': gym.spaces.Discrete(4)}) >>> model._get_space_size(space) 10 @@ -126,16 +126,16 @@ def _get_space_size(self, raise ValueError("Space type {} not supported".format(type(space))) return int(size) - def tensor_to_space(self, - tensor: torch.Tensor, - space: gym.Space, + def tensor_to_space(self, + tensor: torch.Tensor, + space: gym.Space, start: int = 0) -> Union[torch.Tensor, dict]: """Map a flat tensor to a Gym space The mapping is done in the following way: - Tensors belonging to Discrete spaces are returned without modification - - Tensors belonging to Box spaces are reshaped to the corresponding space shape + - Tensors belonging to Box spaces are reshaped to the corresponding space shape keeping the first dimension (number of samples) as they are - Tensors belonging to Dict spaces are mapped into a dictionary with the same keys as the original space @@ -153,7 +153,7 @@ def tensor_to_space(self, Example:: - >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), + >>> space = gym.spaces.Dict({'a': gym.spaces.Box(low=-1, high=1, shape=(2, 3)), ... 'b': gym.spaces.Discrete(4)}) >>> tensor = torch.tensor([[-0.3, -0.2, -0.1, 0.1, 0.2, 0.3, 2]]) >>> @@ -175,9 +175,9 @@ def tensor_to_space(self, return output raise ValueError("Space type {} not supported".format(type(space))) - def random_act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + def random_act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act randomly according to the action space @@ -203,7 +203,7 @@ def random_act(self, self._random_distribution = torch.distributions.uniform.Uniform( low=torch.tensor(self.action_space.low[0], device=self.device, dtype=torch.float32), high=torch.tensor(self.action_space.high[0], device=self.device, dtype=torch.float32)) - + return self._random_distribution.sample(sample_shape=(states.shape[0], self.num_actions)), None, None else: raise NotImplementedError("Action space type ({}) not supported".format(type(self.action_space))) @@ -211,7 +211,7 @@ def random_act(self, def init_parameters(self, method_name: str = "normal_", *args, **kwargs) -> None: """Initialize the model parameters according to the specified method name - Method names are from the `torch.nn.init `_ module. + Method names are from the `torch.nn.init `_ module. Allowed method names are *uniform_*, *normal_*, *constant_*, etc. :param method_name: `torch.nn.init `_ method name (default: ``"normal_"``) @@ -234,13 +234,13 @@ def init_parameters(self, method_name: str = "normal_", *args, **kwargs) -> None def init_weights(self, method_name: str = "orthogonal_", *args, **kwargs) -> None: """Initialize the model weights according to the specified method name - - Method names are from the `torch.nn.init `_ module. + + Method names are from the `torch.nn.init `_ module. Allowed method names are *uniform_*, *normal_*, *constant_*, etc. The following layers will be initialized: - torch.nn.Linear - + :param method_name: `torch.nn.init `_ method name (default: ``"orthogonal_"``) :type method_name: str, optional :param args: Positional arguments of the method to be called @@ -262,7 +262,7 @@ def _update_weights(module, method_name, args, kwargs): _update_weights(layer, method_name, args, kwargs) elif isinstance(layer, torch.nn.Linear): exec("torch.nn.init.{}(layer.weight, *args, **kwargs)".format(method_name)) - + _update_weights(self.children(), method_name, args, kwargs) def forward(self): @@ -272,8 +272,8 @@ def forward(self): """ raise NotImplementedError("Implement .act() and .compute() methods instead of this") - def compute(self, - states: torch.Tensor, + def compute(self, + states: torch.Tensor, taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]: """Define the computation performed (to be implemented by the inheriting classes) by the models @@ -287,15 +287,15 @@ def compute(self, :type role: str, optional :raises NotImplementedError: Child class must implement this method - + :return: Computation performed by the models :rtype: torch.Tensor or sequence of torch.Tensor """ raise NotImplementedError("The computation performed by the models (.compute()) is not implemented") - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act according to the specified behavior (to be implemented by the inheriting classes) @@ -312,7 +312,7 @@ def act(self, :type role: str, optional :raises NotImplementedError: Child class must implement this method - + :return: Action to be taken by the agent given the state of the environment. The typical sequence's components are the actions, the log of the probability density function and mean actions. Deterministic agents must ignore the last two components and return empty tensors or None for them @@ -320,11 +320,11 @@ def act(self, """ logger.warning("Make sure to place Mixins before Model during model definition") raise NotImplementedError("The action to be taken by the agent (.act()) is not implemented") - + def set_mode(self, mode: str) -> None: """Set the model mode (training or evaluation) - :param mode: Mode: ``"train"`` for training or ``"eval"`` for evaluation. + :param mode: Mode: ``"train"`` for training or ``"eval"`` for evaluation. See `torch.nn.Module.train `_ :type mode: str @@ -339,7 +339,7 @@ def set_mode(self, mode: str) -> None: def save(self, path: str, state_dict: Optional[dict] = None) -> None: """Save the model to the specified path - + :param path: Path to save the model to :type path: str :param state_dict: State dictionary to save (default: ``None``). @@ -390,7 +390,7 @@ def migrate(self, The final storage device is determined by the constructor of the model Only one of ``state_dict`` or ``path`` can be specified. - The ``path`` parameter allows automatic loading the ``state_dict`` only from files generated + The ``path`` parameter allows automatic loading the ``state_dict`` only from files generated by the *rl_games* and *stable-baselines3* libraries at the moment For ambiguous models (where 2 or more parameters, for source or current model, have equal shape) @@ -580,13 +580,13 @@ def migrate(self, self.eval() return status - + def freeze_parameters(self, freeze: bool = True) -> None: """Freeze or unfreeze internal parameters - Freeze: disable gradient computation (``parameters.requires_grad = False``) - - Unfreeze: enable gradient computation (``parameters.requires_grad = True``) - + - Unfreeze: enable gradient computation (``parameters.requires_grad = True``) + :param freeze: Freeze the internal parameters if True, otherwise unfreeze them (default: ``True``) :type freeze: bool, optional diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py index 19f94f10..ad5fa4da 100644 --- a/skrl/models/torch/categorical.py +++ b/skrl/models/torch/categorical.py @@ -9,8 +9,8 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: """Categorical mixin model (stochastic model) :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: ``True``). - If True, the model's output is interpreted as unnormalized log probabilities - (it can be any real number), otherwise as normalized probabilities + If True, the model's output is interpreted as unnormalized log probabilities + (it can be any real number), otherwise as normalized probabilities (the output must be non-negative, finite and have a non-zero sum) :type unnormalized_log_prob: bool, optional :param role: Role play by the model (default: ``""``) @@ -22,7 +22,7 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: >>> import torch >>> import torch.nn as nn >>> from skrl.models.torch import Model, CategoricalMixin - >>> + >>> >>> class Policy(CategoricalMixin, Model): ... def __init__(self, observation_space, action_space, device="cuda:0", unnormalized_log_prob=True): ... Model.__init__(self, observation_space, action_space, device) @@ -40,7 +40,7 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: >>> # given an observation_space: gym.spaces.Box with shape (4,) >>> # and an action_space: gym.spaces.Discrete with n = 2 >>> model = Policy(observation_space, action_space) - >>> + >>> >>> print(model) Policy( (net): Sequential( @@ -60,9 +60,9 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: self._c_distribution = {} self._c_distribution[role] = None - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act stochastically in response to the state of the environment @@ -86,7 +86,7 @@ def act(self, torch.Size([4096, 1]) torch.Size([4096, 1]) torch.Size([4096, 2]) """ # map from states/observations to normalized probabilities or unnormalized log probabilities - output = self.compute(states.to(self.device), + output = self.compute(states.to(self.device), taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) # unnormalized log probabilities @@ -95,7 +95,7 @@ def act(self, # normalized probabilities else: self._c_distribution[role] = Categorical(probs=output) - + # actions and log of the probability density function actions = self._c_distribution[role].sample() log_prob = self._c_distribution[role].log_prob(actions if taken_actions is None else taken_actions.view(-1)) diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py index a90cfaff..2aa2e669 100644 --- a/skrl/models/torch/deterministic.py +++ b/skrl/models/torch/deterministic.py @@ -20,7 +20,7 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: >>> import torch >>> import torch.nn as nn >>> from skrl.models.torch import Model, DeterministicMixin - >>> + >>> >>> class Value(DeterministicMixin, Model): ... def __init__(self, observation_space, action_space, device="cuda:0", clip_actions=False): ... Model.__init__(self, observation_space, action_space, device) @@ -38,7 +38,7 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) >>> model = Value(observation_space, action_space) - >>> + >>> >>> print(model) Value( (net): Sequential( @@ -60,10 +60,10 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: # backward compatibility: torch < 1.9 clamp method does not support tensors self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9) - - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act deterministically in response to the state of the environment @@ -87,10 +87,10 @@ def act(self, torch.Size([4096, 1]) None None """ # map from observations/states to actions - actions = self.compute(states.to(self.device), + actions = self.compute(states.to(self.device), taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) - # clip actions + # clip actions if self._d_clip_actions[role] if role in self._d_clip_actions else self._d_clip_actions[""]: if self._backward_compatibility: actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min) @@ -98,4 +98,3 @@ def act(self, actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max) return actions, None, None - \ No newline at end of file diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py index 53fa04b9..4beb9936 100644 --- a/skrl/models/torch/gaussian.py +++ b/skrl/models/torch/gaussian.py @@ -7,10 +7,10 @@ class GaussianMixin: - def __init__(self, - clip_actions: bool = False, - clip_log_std: bool = True, - min_log_std: float = -20, + def __init__(self, + clip_actions: bool = False, + clip_log_std: bool = True, + min_log_std: float = -20, max_log_std: float = 2, reduction: str = "sum", role: str = "") -> None: @@ -25,7 +25,7 @@ def __init__(self, :param max_log_std: Maximum value of the log standard deviation if ``clip_log_std`` is True (default: ``2``) :type max_log_std: float, optional :param reduction: Reduction method for returning the log probability density function: (default: ``"sum"``). - Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density + Supported values are ``"mean"``, ``"sum"``, ``"prod"`` and ``"none"``. If "``none"``, the log probability density function is returned as a tensor of shape ``(num_samples, num_actions)`` instead of ``(num_samples, 1)`` :type reduction: str, optional :param role: Role play by the model (default: ``""``) @@ -39,9 +39,9 @@ def __init__(self, >>> import torch >>> import torch.nn as nn >>> from skrl.models.torch import Model, GaussianMixin - >>> + >>> >>> class Policy(GaussianMixin, Model): - ... def __init__(self, observation_space, action_space, device="cuda:0", + ... def __init__(self, observation_space, action_space, device="cuda:0", ... clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): ... Model.__init__(self, observation_space, action_space, device) ... GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) @@ -59,7 +59,7 @@ def __init__(self, >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) >>> model = Policy(observation_space, action_space) - >>> + >>> >>> print(model) Policy( (net): Sequential( @@ -78,7 +78,7 @@ def __init__(self, if self._g_clip_actions[role]: self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32) self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32) - + # backward compatibility: torch < 1.9 clamp method does not support tensors self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9) @@ -101,7 +101,7 @@ def __init__(self, if not hasattr(self, "_g_distribution"): self._g_distribution = {} self._g_distribution[role] = None - + if reduction not in ["mean", "sum", "prod", "none"]: raise ValueError("reduction must be one of 'mean', 'sum', 'prod' or 'none'") if not hasattr(self, "_g_reduction"): @@ -109,9 +109,9 @@ def __init__(self, self._g_reduction[role] = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \ else torch.prod if reduction == "prod" else None - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act stochastically in response to the state of the environment @@ -122,7 +122,7 @@ def act(self, :type taken_actions: torch.Tensor, optional :param role: Role play by the model (default: ``""``) :type role: str, optional - + :return: Action to be taken by the agent given the state of the environment. The sequence's components are the actions, the log of the probability density function and mean actions :rtype: sequence of torch.Tensor @@ -135,12 +135,12 @@ def act(self, torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8]) """ # map from states/observations to mean actions and log standard deviations - actions_mean, log_std = self.compute(states.to(self.device), + actions_mean, log_std = self.compute(states.to(self.device), taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) # clamp log standard deviations if self._g_clip_log_std[role] if role in self._g_clip_log_std else self._g_clip_log_std[""]: - log_std = torch.clamp(log_std, + log_std = torch.clamp(log_std, self._g_log_std_min[role] if role in self._g_log_std_min else self._g_log_std_min[""], self._g_log_std_max[role] if role in self._g_log_std_max else self._g_log_std_max[""]) @@ -159,7 +159,7 @@ def act(self, actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min) else: actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max) - + # log of the probability density function log_prob = self._g_distribution[role].log_prob(actions if taken_actions is None else taken_actions) reduction = self._g_reduction[role] if role in self._g_reduction else self._g_reduction[""] @@ -205,7 +205,7 @@ def get_log_std(self, role: str = "") -> torch.Tensor: """ return (self._g_log_std[role] if role in self._g_log_std else self._g_log_std[""]) \ .repeat(self._g_num_samples[role] if role in self._g_num_samples else self._g_num_samples[""], 1) - + def distribution(self, role: str = "") -> torch.distributions.Normal: """Get the current distribution of the model diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py index 4ca63e26..6ee66662 100644 --- a/skrl/models/torch/multivariate_gaussian.py +++ b/skrl/models/torch/multivariate_gaussian.py @@ -7,10 +7,10 @@ class MultivariateGaussianMixin: - def __init__(self, - clip_actions: bool = False, - clip_log_std: bool = True, - min_log_std: float = -20, + def __init__(self, + clip_actions: bool = False, + clip_log_std: bool = True, + min_log_std: float = -20, max_log_std: float = 2, role: str = "") -> None: """Multivariate Gaussian mixin model (stochastic model) @@ -32,7 +32,7 @@ def __init__(self, >>> import torch >>> import torch.nn as nn >>> from skrl.models.torch import Model, MultivariateGaussianMixin - >>> + >>> >>> class Policy(MultivariateGaussianMixin, Model): ... def __init__(self, observation_space, action_space, device="cuda:0", ... clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): @@ -52,7 +52,7 @@ def __init__(self, >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) >>> model = Policy(observation_space, action_space) - >>> + >>> >>> print(model) Policy( (net): Sequential( @@ -71,7 +71,7 @@ def __init__(self, if self._mg_clip_actions[role]: self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32) self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device, dtype=torch.float32) - + # backward compatibility: torch < 1.9 clamp method does not support tensors self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9) @@ -94,10 +94,10 @@ def __init__(self, if not hasattr(self, "_mg_distribution"): self._mg_distribution = {} self._mg_distribution[role] = None - - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act stochastically in response to the state of the environment @@ -108,7 +108,7 @@ def act(self, :type taken_actions: torch.Tensor, optional :param role: Role play by the model (default: ``""``) :type role: str, optional - + :return: Action to be taken by the agent given the state of the environment. The sequence's components are the actions, the log of the probability density function and mean actions :rtype: sequence of torch.Tensor @@ -121,12 +121,12 @@ def act(self, torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8]) """ # map from states/observations to mean actions and log standard deviations - actions_mean, log_std = self.compute(states.to(self.device), + actions_mean, log_std = self.compute(states.to(self.device), taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) # clamp log standard deviations if self._mg_clip_log_std[role] if role in self._mg_clip_log_std else self._mg_clip_log_std[""]: - log_std = torch.clamp(log_std, + log_std = torch.clamp(log_std, self._mg_log_std_min[role] if role in self._mg_log_std_min else self._mg_log_std_min[""], self._mg_log_std_max[role] if role in self._mg_log_std_max else self._mg_log_std_max[""]) @@ -146,7 +146,7 @@ def act(self, actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min) else: actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max) - + # log of the probability density function log_prob = self._mg_distribution[role].log_prob(actions if taken_actions is None else taken_actions) if log_prob.dim() != actions.dim(): diff --git a/skrl/models/torch/tabular.py b/skrl/models/torch/tabular.py index 8fe88279..36eb2d76 100644 --- a/skrl/models/torch/tabular.py +++ b/skrl/models/torch/tabular.py @@ -19,23 +19,23 @@ def __init__(self, num_envs: int = 1, role: str = "") -> None: # define the model >>> import torch >>> from skrl.models.torch import Model, TabularMixin - >>> + >>> >>> class GreedyPolicy(TabularMixin, Model): ... def __init__(self, observation_space, action_space, device="cuda:0", num_envs=1): ... Model.__init__(self, observation_space, action_space, device) ... TabularMixin.__init__(self, num_envs) ... - ... self.table = torch.ones((num_envs, self.num_observations, self.num_actions), + ... self.table = torch.ones((num_envs, self.num_observations, self.num_actions), ... dtype=torch.float32, device=self.device) ... ... def compute(self, states, taken_actions, role): - ... actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], + ... actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], ... dim=-1, keepdim=True).view(-1,1) ... >>> # given an observation_space: gym.spaces.Discrete with n=100 >>> # and an action_space: gym.spaces.Discrete with n=5 >>> model = GreedyPolicy(observation_space, action_space, num_envs=1) - >>> + >>> >>> print(model) GreedyPolicy( (table): Tensor(shape=[1, 100, 5]) @@ -69,9 +69,9 @@ def _get_tensor_names(self) -> Sequence[str]: tensors.append(attr) return sorted(tensors) - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, + def act(self, + states: torch.Tensor, + taken_actions: Optional[torch.Tensor] = None, role: str = "") -> Sequence[torch.Tensor]: """Act in response to the state of the environment @@ -94,10 +94,10 @@ def act(self, >>> print(output[0], output[1], output[2]) tensor([[3]], device='cuda:0') None None """ - actions = self.compute(states.to(self.device), + actions = self.compute(states.to(self.device), taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) return actions, None, None - + def table(self) -> torch.Tensor: """Return the Q-table @@ -143,12 +143,12 @@ def load_state_dict(self, state_dict: Mapping, strict: bool = True) -> None: :param state_dict: A dict containing parameters and persistent buffers :type state_dict: dict - :param strict: Whether to strictly enforce that the keys in state_dict match the keys + :param strict: Whether to strictly enforce that the keys in state_dict match the keys returned by this module's state_dict() function (default: ``True``) :type strict: bool, optional """ Model.load_state_dict(self, state_dict, strict=False) - + for name, tensor in state_dict.items(): if hasattr(self, name) and isinstance(getattr(self, name), torch.Tensor): _tensor = getattr(self, name) @@ -163,7 +163,7 @@ def load_state_dict(self, state_dict: Mapping, strict: bool = True) -> None: def save(self, path: str, state_dict: Optional[dict] = None) -> None: """Save the model to the specified path - + :param path: Path to save the model to :type path: str :param state_dict: State dictionary to save (default: ``None``). diff --git a/skrl/resources/noises/torch/base.py b/skrl/resources/noises/torch/base.py index dac95119..a51356c2 100644 --- a/skrl/resources/noises/torch/base.py +++ b/skrl/resources/noises/torch/base.py @@ -19,7 +19,7 @@ def sample_like(self, tensor: torch.Tensor) -> torch.Tensor: :param tensor: Input tensor used to determine output tensor size (shape) :type tensor: torch.Tensor - + :return: Sampled noise :rtype: torch.Tensor """ @@ -30,10 +30,10 @@ def sample(self, size: Union[Tuple[int], torch.Size]) -> torch.Tensor: :param size: Shape of the sampled tensor :type size: tuple or list of integers, or torch.Size - + :raises NotImplementedError: The method is not implemented by the inheriting classes :return: Sampled noise :rtype: torch.Tensor """ - raise NotImplementedError("The sampling method (.sample()) is not implemented") \ No newline at end of file + raise NotImplementedError("The sampling method (.sample()) is not implemented") diff --git a/skrl/resources/noises/torch/gaussian.py b/skrl/resources/noises/torch/gaussian.py index 443741f4..0fb3e074 100644 --- a/skrl/resources/noises/torch/gaussian.py +++ b/skrl/resources/noises/torch/gaussian.py @@ -21,13 +21,13 @@ def __init__(self, mean: float, std: float, device: Union[str, torch.device] = " self.distribution = Normal(loc=torch.tensor(mean, device=self.device, dtype=torch.float32), scale=torch.tensor(std, device=self.device, dtype=torch.float32)) - + def sample(self, size: Union[Tuple[int], torch.Size]) -> torch.Tensor: """Sample a Gaussian noise :param size: Shape of the sampled tensor :type size: tuple or list of integers, or torch.Size - + :return: Sampled noise :rtype: torch.Tensor """ diff --git a/skrl/resources/noises/torch/ornstein_uhlenbeck.py b/skrl/resources/noises/torch/ornstein_uhlenbeck.py index eeb0973c..53762294 100644 --- a/skrl/resources/noises/torch/ornstein_uhlenbeck.py +++ b/skrl/resources/noises/torch/ornstein_uhlenbeck.py @@ -7,12 +7,12 @@ class OrnsteinUhlenbeckNoise(Noise): - def __init__(self, - theta: float, - sigma: float, - base_scale: float, - mean: float = 0, - std: float = 1, + def __init__(self, + theta: float, + sigma: float, + base_scale: float, + mean: float = 0, + std: float = 1, device: Union[str, torch.device] = "cuda:0") -> None: """Class representing an Ornstein-Uhlenbeck noise @@ -38,18 +38,18 @@ def __init__(self, self.distribution = Normal(loc=torch.tensor(mean, device=self.device, dtype=torch.float32), scale=torch.tensor(std, device=self.device, dtype=torch.float32)) - + def sample(self, size: Union[Tuple[int], torch.Size]) -> torch.Tensor: """Sample an Ornstein-Uhlenbeck noise :param size: Shape of the sampled tensor :type size: tuple or list of integers, or torch.Size - + :return: Sampled noise :rtype: torch.Tensor """ if isinstance(self.state, torch.Tensor) and self.state.size() != torch.Size(size): self.state = 0 self.state += -self.state * self.theta + self.sigma * self.distribution.sample(size) - + return self.base_scale * self.state diff --git a/skrl/resources/preprocessors/torch/running_standard_scaler.py b/skrl/resources/preprocessors/torch/running_standard_scaler.py index 12c8fe47..2f673aea 100644 --- a/skrl/resources/preprocessors/torch/running_standard_scaler.py +++ b/skrl/resources/preprocessors/torch/running_standard_scaler.py @@ -8,14 +8,14 @@ class RunningStandardScaler(nn.Module): - def __init__(self, - size: Union[int, Tuple[int], gym.Space], - epsilon: float = 1e-8, + def __init__(self, + size: Union[int, Tuple[int], gym.Space], + epsilon: float = 1e-8, clip_threshold: float = 5.0, device: Union[str, torch.device] = "cuda:0") -> None: """Standardize the input data by removing the mean and scaling by the standard deviation - The implementation is adapted from the rl_games library + The implementation is adapted from the rl_games library (https://github.com/Denys88/rl_games/blob/master/rl_games/algos_torch/running_mean_std.py) Example:: @@ -39,7 +39,7 @@ def __init__(self, self.clip_threshold = clip_threshold size = self._get_space_size(size) - + self.register_buffer("running_mean", torch.zeros(size, dtype = torch.float64, device=device)) self.register_buffer("running_variance", torch.ones(size, dtype = torch.float64, device=device)) self.register_buffer("current_count", torch.ones((), dtype = torch.float64, device=device)) @@ -69,7 +69,7 @@ def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int: raise ValueError("Space type {} not supported".format(type(space))) def _parallel_variance(self, input_mean: torch.Tensor, input_var: torch.Tensor, input_count: int) -> None: - """Update internal variables using the parallel algorithm for computing variance + """Update internal variables using the parallel algorithm for computing variance https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm @@ -112,7 +112,7 @@ def _compute(self, x: torch.Tensor, train: bool = False, inverse: bool = False) * torch.clamp(x, min=-self.clip_threshold, max=self.clip_threshold) + self.running_mean.float() # standardization by centering and scaling else: - return torch.clamp((x - self.running_mean.float()) / (torch.sqrt(self.running_variance.float()) + self.epsilon), + return torch.clamp((x - self.running_mean.float()) / (torch.sqrt(self.running_variance.float()) + self.epsilon), min=-self.clip_threshold, max=self.clip_threshold) def forward(self, x: torch.Tensor, train: bool = False, inverse: bool = False, no_grad: bool = True) -> torch.Tensor: @@ -125,7 +125,7 @@ def forward(self, x: torch.Tensor, train: bool = False, inverse: bool = False, n tensor([[0.6933, 0.1905], [0.3806, 0.3162], [0.1140, 0.0272]], device='cuda:0') - + >>> running_standard_scaler(x, train=True) tensor([[ 0.8681, -0.6731], [ 0.0560, -0.3684], @@ -149,4 +149,4 @@ def forward(self, x: torch.Tensor, train: bool = False, inverse: bool = False, n with torch.no_grad(): return self._compute(x, train, inverse) else: - return self._compute(x, train, inverse) \ No newline at end of file + return self._compute(x, train, inverse) diff --git a/skrl/resources/schedulers/torch/kl_adaptive.py b/skrl/resources/schedulers/torch/kl_adaptive.py index 6f7035f2..d6d54a3e 100644 --- a/skrl/resources/schedulers/torch/kl_adaptive.py +++ b/skrl/resources/schedulers/torch/kl_adaptive.py @@ -5,24 +5,24 @@ class KLAdaptiveRL(_LRScheduler): - def __init__(self, - optimizer: torch.optim.Optimizer, - kl_threshold: float = 0.008, - min_lr: float = 1e-6, + def __init__(self, + optimizer: torch.optim.Optimizer, + kl_threshold: float = 0.008, + min_lr: float = 1e-6, max_lr: float = 1e-2, kl_factor: float = 2, lr_factor: float = 1.5, - last_epoch: int = -1, + last_epoch: int = -1, verbose: bool = False) -> None: """Adaptive KL scheduler - + Adjusts the learning rate according to the KL divergence. - The implementation is adapted from the rl_games library + The implementation is adapted from the rl_games library (https://github.com/Denys88/rl_games/blob/master/rl_games/common/schedulers.py) .. note:: - This scheduler is only available for PPO at the moment. + This scheduler is only available for PPO at the moment. Applying it to other agents will not change the learning rate Example:: @@ -64,7 +64,7 @@ def __init__(self, def step(self, kl: Union[torch.Tensor, float, None] = None, epoch: Union[int, None] = None) -> None: """ Step scheduler - + Example:: >>> kl = torch.distributions.kl_divergence(p, q) @@ -88,5 +88,5 @@ def step(self, kl: Union[torch.Tensor, float, None] = None, epoch: Union[int, No group['lr'] = max(group['lr'] / self._lr_factor, self.min_lr) elif kl < self.kl_threshold / self._kl_factor: group['lr'] = min(group['lr'] * self._lr_factor, self.max_lr) - + self._last_lr = [group['lr'] for group in self.optimizer.param_groups] diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index d82fb26e..2e414fec 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -171,7 +171,7 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: # reset env if self.states is None: self.states = self.env.reset() - + with torch.no_grad(): if self.num_agents == 1: # compute actions diff --git a/skrl/utils/__init__.py b/skrl/utils/__init__.py index aaef12ab..1689e22c 100644 --- a/skrl/utils/__init__.py +++ b/skrl/utils/__init__.py @@ -14,8 +14,8 @@ def set_seed(seed: Optional[int] = None, deterministic: bool = False) -> int: """ Set the seed for the random number generators - Due to NumPy's legacy seeding constraint the seed must be between 0 and 2**32 - 1. - Otherwise a NumPy exception (``ValueError: Seed must be between 0 and 2**32 - 1``) will be raised + Due to NumPy's legacy seeding constraint the seed must be between 0 and 2**32 - 1. + Otherwise a NumPy exception (``ValueError: Seed must be between 0 and 2**32 - 1``) will be raised Modified packages: @@ -49,7 +49,7 @@ def set_seed(seed: Optional[int] = None, deterministic: bool = False) -> int: :param seed: The seed to set. Is None, a random seed will be generated (default: ``None``) :type seed: int, optional :param deterministic: Whether PyTorch is configured to use deterministic algorithms (default: ``False``). - The following environment variables should be established for CUDA 10.1 (``CUDA_LAUNCH_BLOCKING=1``) + The following environment variables should be established for CUDA 10.1 (``CUDA_LAUNCH_BLOCKING=1``) and for CUDA 10.2 or later (``CUBLAS_WORKSPACE_CONFIG=:16:8`` or ``CUBLAS_WORKSPACE_CONFIG=:4096:2``). See PyTorch `Reproducibility `_ for details :type deterministic: bool, optional diff --git a/skrl/utils/control.py b/skrl/utils/control.py index d04a1508..d12591a8 100644 --- a/skrl/utils/control.py +++ b/skrl/utils/control.py @@ -2,8 +2,8 @@ import torch -def ik(jacobian_end_effector, - current_position, current_orientation, +def ik(jacobian_end_effector, + current_position, current_orientation, goal_position, goal_orientation, damping_factor=0.05): """ @@ -22,15 +22,15 @@ def ik(jacobian_end_effector, lmbda = torch.eye(6).to(jacobian_end_effector.device) * (damping_factor ** 2) return (transpose @ torch.inverse(jacobian_end_effector @ transpose + lmbda) @ dpose) -def osc(jacobian_end_effector, mass_matrix, - current_position, current_orientation, +def osc(jacobian_end_effector, mass_matrix, + current_position, current_orientation, goal_position, goal_orientation, current_dof_velocities, kp=5, kv=2): """ https://studywolf.wordpress.com/2013/09/17/robot-control-4-operation-space-control/ """ - + mass_matrix_end_effector = torch.inverse(jacobian_end_effector @ torch.inverse(mass_matrix) @ torch.transpose(jacobian_end_effector, 1, 2)) # compute position and orientation error @@ -41,4 +41,3 @@ def osc(jacobian_end_effector, mass_matrix, dpose = torch.cat([position_error, orientation_error], -1) return torch.transpose(jacobian_end_effector, 1, 2) @ mass_matrix_end_effector @ (kp * dpose).unsqueeze(-1) - kv * mass_matrix @ current_dof_velocities - \ No newline at end of file diff --git a/skrl/utils/isaacgym_utils.py b/skrl/utils/isaacgym_utils.py index eb7aa2b6..9cc1fae8 100644 --- a/skrl/utils/isaacgym_utils.py +++ b/skrl/utils/isaacgym_utils.py @@ -69,9 +69,9 @@ def _route_index(self) -> 'flask.Response': @@ -114,7 +114,7 @@ def _route_index(self) -> 'flask.Response': if(event.keyCode != 18) sendInputRequest({key: event.keyCode}); }, false); - + canvas.addEventListener('mousemove', function(event){ if(event.buttons){ let data = {dx: event.movementX, dy: event.movementY}; @@ -132,7 +132,7 @@ def _route_index(self) -> 'flask.Response': } }, false); - canvas.addEventListener('wheel', function(event){ + canvas.addEventListener('wheel', function(event){ sendInputRequest({mouse: "wheel", dz: Math.sign(event.deltaY)}); }, false); } @@ -145,12 +145,12 @@ def _route_index(self) -> 'flask.Response': def _route_stream(self) -> 'flask.Response': """Stream the image to the web page - + :return: Flask response :rtype: flask.Response """ return flask.Response(self._stream(), mimetype='multipart/x-mixed-replace; boundary=frame') - + def _route_input_event(self) -> 'flask.Response': """Handle keyboard and mouse input @@ -182,22 +182,22 @@ def p_target(p, q, a=0, b=0, c=1, d=0): t = -(a * p[0] + b * p[1] + c * p[2] + d) / denominator return [p[0] + t * (p1[0] - p[0]), p[1] + t * (p1[1] - p[1]), p[2] + t * (p1[2] - p[2])] return v - + # get keyboard and mouse inputs data = flask.request.get_json() key, mouse = data.get("key", None), data.get("mouse", None) dx, dy, dz = data.get("dx", None), data.get("dy", None), data.get("dz", None) - transform = self._gym.get_camera_transform(self._sim, + transform = self._gym.get_camera_transform(self._sim, self._envs[self._camera_id], self._cameras[self._camera_id]) # zoom in/out if mouse == "wheel": # compute zoom vector - vector = qv_mult([transform.r.w, transform.r.x, transform.r.y, transform.r.z], + vector = qv_mult([transform.r.w, transform.r.x, transform.r.y, transform.r.z], [-0.025 * dz, 0, 0]) - + # update transform transform.p.x += vector[0] transform.p.y += vector[1] @@ -214,7 +214,7 @@ def p_target(p, q, a=0, b=0, c=1, d=0): q = q_mult(q, q_from_angle_axis(dy, [1, 0, 0])) # apply rotation - t = p_target([transform.p.x, transform.p.y, transform.p.z], + t = p_target([transform.p.x, transform.p.y, transform.p.z], [transform.r.w, transform.r.x, transform.r.y, transform.r.z]) p = qv_mult(q, [transform.p.x - t[0], transform.p.y - t[1], transform.p.z - t[2]]) q = q_mult(q, [transform.r.w, transform.r.x, transform.r.y, transform.r.z]) @@ -240,7 +240,7 @@ def p_target(p, q, a=0, b=0, c=1, d=0): # update transform transform.r.w, transform.r.x, transform.r.y, transform.r.z = q - + # walk camera elif mouse == "middle": # compute displacement @@ -264,12 +264,12 @@ def p_target(p, q, a=0, b=0, c=1, d=0): elif self._camera_type == gymapi.IMAGE_DEPTH: self._camera_type = gymapi.IMAGE_COLOR return flask.Response(status=200) - + else: return flask.Response(status=200) - self._gym.set_camera_transform(self._cameras[self._camera_id], - self._envs[self._camera_id], + self._gym.set_camera_transform(self._cameras[self._camera_id], + self._envs[self._camera_id], transform) return flask.Response(status=200) @@ -310,9 +310,9 @@ def setup(self, gym: 'isaacgym.gymapi.Gym', sim: 'isaacgym.gymapi.Sim', envs: Li self._envs = envs self._cameras = cameras - def render(self, - fetch_results: bool = True, - step_graphics: bool = True, + def render(self, + fetch_results: bool = True, + step_graphics: bool = True, render_all_camera_sensors: bool = True, wait_for_page_load: bool = True) -> None: """Render and get the image from the current camera @@ -320,11 +320,11 @@ def render(self, This function must be called after the simulation is stepped (post_physics_step). The following Isaac Gym functions are called before get the image. Their calling can be skipped by setting the corresponding argument to False - + - fetch_results - step_graphics - render_all_camera_sensors - + :param fetch_results: Call Gym.fetch_results method (default: True) :type fetch_results: bool :param step_graphics: Call Gym.step_graphics method (default: True) @@ -357,29 +357,29 @@ def render(self, self._gym.step_graphics(self._sim) if render_all_camera_sensors: self._gym.render_all_camera_sensors(self._sim) - + # get image - image = self._gym.get_camera_image(self._sim, + image = self._gym.get_camera_image(self._sim, self._envs[self._camera_id], - self._cameras[self._camera_id], + self._cameras[self._camera_id], self._camera_type) if self._camera_type == gymapi.IMAGE_COLOR: self._image = image.reshape(image.shape[0], -1, 4)[..., :3] elif self._camera_type == gymapi.IMAGE_DEPTH: - self._image = -image.reshape(image.shape[0], -1) + self._image = -image.reshape(image.shape[0], -1) minimum = 0 if np.isinf(np.min(self._image)) else np.min(self._image) maximum = 5 if np.isinf(np.max(self._image)) else np.max(self._image) self._image = np.clip(1 - (self._image - minimum) / (maximum - minimum), 0, 1) self._image = np.uint8(255 * self._image) else: raise ValueError("Unsupported camera type") - + # notify stream thread self._event_stream.set() self._notified = True -def ik(jacobian_end_effector: torch.Tensor, +def ik(jacobian_end_effector: torch.Tensor, current_position: torch.Tensor, current_orientation: torch.Tensor, goal_position: torch.Tensor, @@ -431,18 +431,18 @@ def print_arguments(args): print(" |-- {}: {}".format(a, args.__getattribute__(a))) def print_asset_options(asset_options: 'isaacgym.gymapi.AssetOptions', asset_name: str = ""): - attrs = ["angular_damping", "armature", "collapse_fixed_joints", "convex_decomposition_from_submeshes", - "default_dof_drive_mode", "density", "disable_gravity", "fix_base_link", "flip_visual_attachments", - "linear_damping", "max_angular_velocity", "max_linear_velocity", "mesh_normal_mode", "min_particle_mass", - "override_com", "override_inertia", "replace_cylinder_with_capsule", "tendon_limit_stiffness", "thickness", + attrs = ["angular_damping", "armature", "collapse_fixed_joints", "convex_decomposition_from_submeshes", + "default_dof_drive_mode", "density", "disable_gravity", "fix_base_link", "flip_visual_attachments", + "linear_damping", "max_angular_velocity", "max_linear_velocity", "mesh_normal_mode", "min_particle_mass", + "override_com", "override_inertia", "replace_cylinder_with_capsule", "tendon_limit_stiffness", "thickness", "use_mesh_materials", "use_physx_armature", "vhacd_enabled"] # vhacd_params print("\nAsset options{}".format(" ({})".format(asset_name) if asset_name else "")) for attr in attrs: print(" |-- {}: {}".format(attr, getattr(asset_options, attr) if hasattr(asset_options, attr) else "--")) # vhacd attributes if attr == "vhacd_enabled" and hasattr(asset_options, attr) and getattr(asset_options, attr): - vhacd_attrs = ["alpha", "beta", "concavity", "convex_hull_approximation", "convex_hull_downsampling", - "max_convex_hulls", "max_num_vertices_per_ch", "min_volume_per_ch", "mode", "ocl_acceleration", + vhacd_attrs = ["alpha", "beta", "concavity", "convex_hull_approximation", "convex_hull_downsampling", + "max_convex_hulls", "max_num_vertices_per_ch", "min_volume_per_ch", "mode", "ocl_acceleration", "pca", "plane_downsampling", "project_hull_vertices", "resolution"] print(" |-- vhacd_params:") for vhacd_attr in vhacd_attrs: diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index 86b64294..87aa26fb 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -82,17 +82,17 @@ def _get_num_units_by_shape(model: Model, shape: Shape) -> int: :rtype: int """ num_units = {Shape.ONE: 1, - Shape.STATES: model.num_observations, + Shape.STATES: model.num_observations, Shape.ACTIONS: model.num_actions, Shape.STATES_ACTIONS: model.num_observations + model.num_actions} return num_units[shape] -def _generate_sequential(model: Model, - input_shape: Shape = Shape.STATES, - hiddens: list = [256, 256], - hidden_activation: list = ["relu", "relu"], - output_shape: Shape = Shape.ACTIONS, - output_activation: Union[str, None] = "tanh", +def _generate_sequential(model: Model, + input_shape: Shape = Shape.STATES, + hiddens: list = [256, 256], + hidden_activation: list = ["relu", "relu"], + output_shape: Shape = Shape.ACTIONS, + output_activation: Union[str, None] = "tanh", output_scale: int = None) -> nn.Sequential: """Generate a sequential model @@ -127,22 +127,22 @@ def _generate_sequential(model: Model, output_layer = [nn.Linear(hiddens[-1], _get_num_units_by_shape(model, output_shape))] if output_activation is not None: output_layer.append(_get_activation_function(output_activation)) - + return nn.Sequential(*input_layer, *hidden_layers, *output_layer) -def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, +def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", - clip_actions: bool = False, - clip_log_std: bool = True, - min_log_std: float = -20, - max_log_std: float = 2, - input_shape: Shape = Shape.STATES, - hiddens: list = [256, 256], - hidden_activation: list = ["relu", "relu"], - output_shape: Shape = Shape.ACTIONS, - output_activation: Union[str, None] = "tanh", - output_scale: float = 1.0) -> Model: + device: Union[str, torch.device] = "cuda:0", + clip_actions: bool = False, + clip_log_std: bool = True, + min_log_std: float = -20, + max_log_std: float = 2, + input_shape: Shape = Shape.STATES, + hiddens: list = [256, 256], + hidden_activation: list = ["relu", "relu"], + output_shape: Shape = Shape.ACTIONS, + output_activation: Union[str, None] = "tanh", + output_scale: float = 1.0) -> Model: """Instantiate a Gaussian model :param observation_space: Observation/state space or shape (default: None). @@ -195,7 +195,7 @@ def __init__(self, observation_space, action_space, device, clip_actions, output_activation=metadata["output_activation"], output_scale=metadata["output_scale"]) self.log_std_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(self, metadata["output_shape"]))) - + def compute(self, states, taken_actions=None, role=""): if self.instantiator_input_type == 0: output = self.net(states) @@ -206,35 +206,35 @@ def compute(self, states, taken_actions=None, role=""): return output * self.instantiator_output_scale, self.log_std_parameter - metadata = {"input_shape": input_shape, - "hiddens": hiddens, - "hidden_activation": hidden_activation, - "output_shape": output_shape, - "output_activation": output_activation, + metadata = {"input_shape": input_shape, + "hiddens": hiddens, + "hidden_activation": hidden_activation, + "output_shape": output_shape, + "output_activation": output_activation, "output_scale": output_scale} return GaussianModel(observation_space=observation_space, - action_space=action_space, - device=device, - clip_actions=clip_actions, - clip_log_std=clip_log_std, + action_space=action_space, + device=device, + clip_actions=clip_actions, + clip_log_std=clip_log_std, min_log_std=min_log_std, max_log_std=max_log_std, metadata=metadata) - -def multivariate_gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, + +def multivariate_gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", - clip_actions: bool = False, - clip_log_std: bool = True, - min_log_std: float = -20, - max_log_std: float = 2, - input_shape: Shape = Shape.STATES, - hiddens: list = [256, 256], - hidden_activation: list = ["relu", "relu"], - output_shape: Shape = Shape.ACTIONS, - output_activation: Union[str, None] = "tanh", - output_scale: float = 1.0) -> Model: + device: Union[str, torch.device] = "cuda:0", + clip_actions: bool = False, + clip_log_std: bool = True, + min_log_std: float = -20, + max_log_std: float = 2, + input_shape: Shape = Shape.STATES, + hiddens: list = [256, 256], + hidden_activation: list = ["relu", "relu"], + output_shape: Shape = Shape.ACTIONS, + output_activation: Union[str, None] = "tanh", + output_scale: float = 1.0) -> Model: """Instantiate a multivariate Gaussian model :param observation_space: Observation/state space or shape (default: None). @@ -287,7 +287,7 @@ def __init__(self, observation_space, action_space, device, clip_actions, output_activation=metadata["output_activation"], output_scale=metadata["output_scale"]) self.log_std_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(self, metadata["output_shape"]))) - + def compute(self, states, taken_actions=None, role=""): if self.instantiator_input_type == 0: output = self.net(states) @@ -298,31 +298,31 @@ def compute(self, states, taken_actions=None, role=""): return output * self.instantiator_output_scale, self.log_std_parameter - metadata = {"input_shape": input_shape, - "hiddens": hiddens, - "hidden_activation": hidden_activation, - "output_shape": output_shape, - "output_activation": output_activation, + metadata = {"input_shape": input_shape, + "hiddens": hiddens, + "hidden_activation": hidden_activation, + "output_shape": output_shape, + "output_activation": output_activation, "output_scale": output_scale} return MultivariateGaussianModel(observation_space=observation_space, - action_space=action_space, - device=device, - clip_actions=clip_actions, - clip_log_std=clip_log_std, + action_space=action_space, + device=device, + clip_actions=clip_actions, + clip_log_std=clip_log_std, min_log_std=min_log_std, max_log_std=max_log_std, metadata=metadata) -def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", - clip_actions: bool = False, - input_shape: Shape = Shape.STATES, - hiddens: list = [256, 256], - hidden_activation: list = ["relu", "relu"], - output_shape: Shape = Shape.ACTIONS, - output_activation: Union[str, None] = "tanh", +def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", + clip_actions: bool = False, + input_shape: Shape = Shape.STATES, + hiddens: list = [256, 256], + hidden_activation: list = ["relu", "relu"], + output_shape: Shape = Shape.ACTIONS, + output_activation: Union[str, None] = "tanh", output_scale: float = 1.0) -> Model: """Instantiate a deterministic model @@ -368,7 +368,7 @@ def __init__(self, observation_space, action_space, device, clip_actions, metada output_shape=metadata["output_shape"], output_activation=metadata["output_activation"], output_scale=metadata["output_scale"]) - + def compute(self, states, taken_actions=None, role=""): if self.instantiator_input_type == 0: output = self.net(states) @@ -379,27 +379,27 @@ def compute(self, states, taken_actions=None, role=""): return output * self.instantiator_output_scale - metadata = {"input_shape": input_shape, - "hiddens": hiddens, - "hidden_activation": hidden_activation, - "output_shape": output_shape, - "output_activation": output_activation, + metadata = {"input_shape": input_shape, + "hiddens": hiddens, + "hidden_activation": hidden_activation, + "output_shape": output_shape, + "output_activation": output_activation, "output_scale": output_scale} return DeterministicModel(observation_space=observation_space, - action_space=action_space, - device=device, - clip_actions=clip_actions, + action_space=action_space, + device=device, + clip_actions=clip_actions, metadata=metadata) -def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, - device: Union[str, torch.device] = "cuda:0", - unnormalized_log_prob: bool = False, - input_shape: Shape = Shape.STATES, - hiddens: list = [256, 256], - hidden_activation: list = ["relu", "relu"], - output_shape: Shape = Shape.ACTIONS, +def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, + action_space: Union[int, Tuple[int], gym.Space, None] = None, + device: Union[str, torch.device] = "cuda:0", + unnormalized_log_prob: bool = False, + input_shape: Shape = Shape.STATES, + hiddens: list = [256, 256], + hidden_activation: list = ["relu", "relu"], + output_shape: Shape = Shape.ACTIONS, output_activation: Union[str, None] = None) -> Model: """Instantiate a categorical model @@ -412,8 +412,8 @@ def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None] :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") :type device: str or torch.device, optional :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: True). - If True, the model's output is interpreted as unnormalized log probabilities - (it can be any real number), otherwise as normalized probabilities + If True, the model's output is interpreted as unnormalized log probabilities + (it can be any real number), otherwise as normalized probabilities (the output must be non-negative, finite and have a non-zero sum) :type unnormalized_log_prob: bool, optional :param input_shape: Shape of the input (default: Shape.STATES) @@ -443,7 +443,7 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro hidden_activation=metadata["hidden_activation"], output_shape=metadata["output_shape"], output_activation=metadata["output_activation"]) - + def compute(self, states, taken_actions=None, role=""): if self.instantiator_input_type == 0: output = self.net(states) @@ -454,14 +454,14 @@ def compute(self, states, taken_actions=None, role=""): return output - metadata = {"input_shape": input_shape, - "hiddens": hiddens, - "hidden_activation": hidden_activation, - "output_shape": output_shape, + metadata = {"input_shape": input_shape, + "hiddens": hiddens, + "hidden_activation": hidden_activation, + "output_shape": output_shape, "output_activation": output_activation} return CategoricalModel(observation_space=observation_space, - action_space=action_space, - device=device, - unnormalized_log_prob=unnormalized_log_prob, + action_space=action_space, + device=device, + unnormalized_log_prob=unnormalized_log_prob, metadata=metadata) diff --git a/skrl/utils/omniverse_isaacgym_utils.py b/skrl/utils/omniverse_isaacgym_utils.py index 42d5da67..5a8f4f7e 100644 --- a/skrl/utils/omniverse_isaacgym_utils.py +++ b/skrl/utils/omniverse_isaacgym_utils.py @@ -54,7 +54,7 @@ def _torch_quat_conjugate(a): # wxyz a = a.reshape(-1, 4) return torch.cat((a[:, :1], -a[:, 1:]), dim=-1).view(shape) -def ik(jacobian_end_effector: torch.Tensor, +def ik(jacobian_end_effector: torch.Tensor, current_position: torch.Tensor, current_orientation: torch.Tensor, goal_position: torch.Tensor, @@ -139,50 +139,50 @@ def get_env_instance(headless: bool = True, multi_threaded: bool = False) -> "om # parse sim configuration from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig - sim_config = SimConfig({"test": False, - "device_id": 0, + sim_config = SimConfig({"test": False, + "device_id": 0, "headless": True, - "sim_device": "gpu", - "task": {"name": "CustomTask", - "physics_engine": "physx", - "env": {"numEnvs": 512, - "envSpacing": 1.5, - "enableDebugVis": False, - "clipObservations": 1000.0, - "clipActions": 1.0, - "controlFrequencyInv": 4}, + "sim_device": "gpu", + "task": {"name": "CustomTask", + "physics_engine": "physx", + "env": {"numEnvs": 512, + "envSpacing": 1.5, + "enableDebugVis": False, + "clipObservations": 1000.0, + "clipActions": 1.0, + "controlFrequencyInv": 4}, "sim": {"dt": 0.0083, # 1 / 120 - "use_gpu_pipeline": True, - "gravity": [0.0, 0.0, -9.81], - "add_ground_plane": True, - "use_flatcache": True, - "enable_scene_query_support": False, - "enable_cameras": False, - "default_physics_material": {"static_friction": 1.0, - "dynamic_friction": 1.0, - "restitution": 0.0}, - "physx": {"worker_thread_count": 4, - "solver_type": 1, - "use_gpu": True, - "solver_position_iteration_count": 4, - "solver_velocity_iteration_count": 1, - "contact_offset": 0.005, - "rest_offset": 0.0, - "bounce_threshold_velocity": 0.2, - "friction_offset_threshold": 0.04, - "friction_correlation_distance": 0.025, - "enable_sleeping": True, - "enable_stabilization": True, - "max_depenetration_velocity": 1000.0, - "gpu_max_rigid_contact_count": 524288, - "gpu_max_rigid_patch_count": 33554432, - "gpu_found_lost_pairs_capacity": 524288, - "gpu_found_lost_aggregate_pairs_capacity": 262144, - "gpu_total_aggregate_pairs_capacity": 1048576, - "gpu_max_soft_body_contacts": 1048576, - "gpu_max_particle_contacts": 1048576, - "gpu_heap_capacity": 33554432, - "gpu_temp_buffer_capacity": 16777216, + "use_gpu_pipeline": True, + "gravity": [0.0, 0.0, -9.81], + "add_ground_plane": True, + "use_flatcache": True, + "enable_scene_query_support": False, + "enable_cameras": False, + "default_physics_material": {"static_friction": 1.0, + "dynamic_friction": 1.0, + "restitution": 0.0}, + "physx": {"worker_thread_count": 4, + "solver_type": 1, + "use_gpu": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "contact_offset": 0.005, + "rest_offset": 0.0, + "bounce_threshold_velocity": 0.2, + "friction_offset_threshold": 0.04, + "friction_correlation_distance": 0.025, + "enable_sleeping": True, + "enable_stabilization": True, + "max_depenetration_velocity": 1000.0, + "gpu_max_rigid_contact_count": 524288, + "gpu_max_rigid_patch_count": 33554432, + "gpu_found_lost_pairs_capacity": 524288, + "gpu_found_lost_aggregate_pairs_capacity": 262144, + "gpu_total_aggregate_pairs_capacity": 1048576, + "gpu_max_soft_body_contacts": 1048576, + "gpu_max_particle_contacts": 1048576, + "gpu_heap_capacity": 33554432, + "gpu_temp_buffer_capacity": 16777216, "gpu_max_num_partitions": 8}}}}) # import and setup custom task @@ -225,7 +225,7 @@ def stop(self): class _OmniIsaacGymVecEnvMT(VecEnvMT): def __init__(self, headless): super().__init__(headless) - + self.action_queue = queue.Queue(1) self.data_queue = queue.Queue(1) diff --git a/skrl/utils/postprocessing.py b/skrl/utils/postprocessing.py index 7192ee66..f35f48fc 100644 --- a/skrl/utils/postprocessing.py +++ b/skrl/utils/postprocessing.py @@ -12,9 +12,9 @@ class MemoryFileIterator(): def __init__(self, pathname: str) -> None: """Python iterator for loading data from exported memories - + The iterator will load the next memory file in the list of path names. - The output of the iterator is a tuple of the filename and the memory data + The output of the iterator is a tuple of the filename and the memory data where the memory data is a dictionary of torch.Tensor (PyTorch), numpy.ndarray (NumPy) or lists (CSV) depending on the format and the keys of the dictionary are the names of the variables @@ -31,7 +31,7 @@ def __init__(self, pathname: str) -> None: - Comma-separated values: (memory_size * num_envs, data_size) :param pathname: String containing a path specification for the exported memories. - Python `glob `_ method + Python `glob `_ method is used to find all files matching the path specification :type pathname: str """ @@ -50,7 +50,7 @@ def __next__(self) -> Tuple[str, dict]: """ if self.n >= len(self.file_paths): raise StopIteration - + if self.file_paths[self.n].endswith(".pt"): return self._format_torch() elif self.file_paths[self.n].endswith(".npz"): @@ -62,7 +62,7 @@ def __next__(self) -> Tuple[str, dict]: def _format_numpy(self) -> Tuple[str, dict]: """Load numpy array from file - + :return: Tuple of file name and data :rtype: tuple """ @@ -94,7 +94,7 @@ def _format_csv(self) -> Tuple[str, dict]: with open(self.file_paths[self.n], 'r') as f: reader = csv.reader(f) - + # parse header try: header = next(reader, None) @@ -123,13 +123,13 @@ def _format_csv(self) -> Tuple[str, dict]: class TensorboardFileIterator(): def __init__(self, pathname: str, tags: Union[str, List[str]]) -> None: """Python iterator for loading data from Tensorboard files - + The iterator will load the next Tensorboard file in the list of path names. The iterator's output is a tuple of the directory name and the Tensorboard variables selected by the tags. The Tensorboard data is returned as a dictionary with the tag as the key and a list of steps and values as the value :param pathname: String containing a path specification for the Tensorboard files. - Python `glob `_ method + Python `glob `_ method is used to find all files matching the path specification :type pathname: str :param tags: String or list of strings containing the tags of the variables to load @@ -150,7 +150,7 @@ def __next__(self) -> Tuple[str, dict]: :rtype: tuple """ from tensorflow.python.summary.summary_iterator import summary_iterator - + if self.n >= len(self.file_paths): raise StopIteration diff --git a/skrl/version.txt b/skrl/version.txt index 8adc70fd..a3df0a69 100644 --- a/skrl/version.txt +++ b/skrl/version.txt @@ -1 +1 @@ -0.8.0 \ No newline at end of file +0.8.0 diff --git a/tests/test_noises_gaussian.py b/tests/test_noises_gaussian.py index f2dfc0b2..b838604e 100644 --- a/tests/test_noises_gaussian.py +++ b/tests/test_noises_gaussian.py @@ -66,7 +66,7 @@ def test_method_sample_like(self): if not sys.argv[-1] == '--debug': raise RuntimeError('Test can only be runned manually with --debug flag') - + test = TestCase() test.setUp() for method in dir(test): diff --git a/tests/test_noises_ornstein_uhlenbeck.py b/tests/test_noises_ornstein_uhlenbeck.py index 6c7241b4..fa24c168 100644 --- a/tests/test_noises_ornstein_uhlenbeck.py +++ b/tests/test_noises_ornstein_uhlenbeck.py @@ -57,7 +57,7 @@ def test_method_sample_like(self): if not sys.argv[-1] == '--debug': raise RuntimeError('Test can only be runned manually with --debug flag') - + test = TestCase() test.setUp() for method in dir(test): From b7f934fcd9aff803d2af35940feee430ba79532a Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:09:49 +0200 Subject: [PATCH 002/157] Replace mutable default arguments Mutable default arguments are "a source of evil" and "Pythons number one antipattern". They can lead to nasty and hard to track down bugs. Also, removing empty space at end of line. --- docs/source/snippets/agent.py | 6 +++--- docs/source/snippets/trainer.py | 9 +++++---- skrl/agents/torch/a2c/a2c.py | 6 +++--- skrl/agents/torch/amp/amp.py | 6 +++--- skrl/agents/torch/base.py | 6 +++--- skrl/agents/torch/cem/cem.py | 6 +++--- skrl/agents/torch/ddpg/ddpg.py | 6 +++--- skrl/agents/torch/dqn/ddqn.py | 6 +++--- skrl/agents/torch/dqn/dqn.py | 6 +++--- skrl/agents/torch/ppo/ppo.py | 6 +++--- skrl/agents/torch/q_learning/q_learning.py | 6 +++--- skrl/agents/torch/sac/sac.py | 6 +++--- skrl/agents/torch/sarsa/sarsa.py | 6 +++--- skrl/agents/torch/td3/td3.py | 6 +++--- skrl/agents/torch/trpo/trpo.py | 6 +++--- skrl/trainers/torch/base.py | 14 +++++++------- skrl/trainers/torch/manual.py | 7 ++++--- skrl/trainers/torch/parallel.py | 9 +++++---- skrl/trainers/torch/sequential.py | 9 +++++---- 19 files changed, 68 insertions(+), 64 deletions(-) diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py index 95252f81..466b7a44 100644 --- a/docs/source/snippets/agent.py +++ b/docs/source/snippets/agent.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym @@ -29,7 +29,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """ :param models: Models used by the agent :type models: dictionary of skrl.models.torch.Model @@ -44,7 +44,7 @@ def __init__(self, :param cfg: Configuration dictionary :type cfg: dict """ - CUSTOM_DEFAULT_CONFIG.update(cfg) + CUSTOM_DEFAULT_CONFIG.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/docs/source/snippets/trainer.py b/docs/source/snippets/trainer.py index 57043263..b4555acc 100644 --- a/docs/source/snippets/trainer.py +++ b/docs/source/snippets/trainer.py @@ -1,5 +1,5 @@ # [start-base] -from typing import Union, List +from typing import Union, List, Optional import copy @@ -19,8 +19,8 @@ class CustomTrainer(Trainer): def __init__(self, env: Wrapper, agents: Union[Agent, List[Agent], List[List[Agent]]], - agents_scope : List[int] = [], - cfg: dict = {}) -> None: + agents_scope: Optional[List[int]] = None, + cfg: Optional[dict] = None) -> None: """ :param env: Environment to train on :type env: skrl.env.torch.Wrapper @@ -32,7 +32,8 @@ def __init__(self, :type cfg: dict, optional """ _cfg = copy.deepcopy(CUSTOM_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) + agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) # ================================ diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index c4f2c08c..314a36b0 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -57,7 +57,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Advantage Actor Critic (A2C) https://arxiv.org/abs/1602.01783 @@ -80,7 +80,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(A2C_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 349ee6b7..05b6e609 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -1,4 +1,4 @@ -from typing import Callable, Union, Tuple, Dict, Any +from typing import Callable, Union, Tuple, Dict, Any, Optional import gym import math @@ -75,7 +75,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}, + cfg: Optional[dict] = None, amp_observation_space: Union[int, Tuple[int], gym.Space, None] = None, motion_dataset: Union[Memory, None] = None, reply_buffer: Union[Memory, None] = None, @@ -116,7 +116,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(AMP_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 44476e78..0aec7b7d 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -1,4 +1,4 @@ -from typing import Union, Mapping, Tuple, Dict, Any +from typing import Union, Mapping, Tuple, Dict, Any, Optional import os import gym @@ -22,7 +22,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Base class that represent a RL agent :param models: Models used by the agent @@ -44,7 +44,7 @@ def __init__(self, self.observation_space = observation_space self.action_space = action_space self.device = torch.device(device) - self.cfg = cfg + self.cfg = cfg if cfg is not None else {} if type(memory) is list: self.memory = memory[0] diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 96055f0c..86022a69 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -48,7 +48,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Cross-Entropy Method (CEM) https://ieeexplore.ieee.org/abstract/document/6796865/ @@ -71,7 +71,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(CEM_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 33f3174a..264b1858 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -57,7 +57,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Deep Deterministic Policy Gradient (DDPG) https://arxiv.org/abs/1509.02971 @@ -80,7 +80,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(DDPG_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 65a64f38..cea703ff 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -59,7 +59,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Double Deep Q-Network (DDQN) https://ojs.aaai.org/index.php/AAAI/article/view/10295 @@ -82,7 +82,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(DDQN_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 929aa024..1dd08e71 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -59,7 +59,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Deep Q-Network (DQN) https://arxiv.org/abs/1312.5602 @@ -82,7 +82,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(DQN_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index f00c9a99..844e317f 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -65,7 +65,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Proximal Policy Optimization (PPO) https://arxiv.org/abs/1707.06347 @@ -88,7 +88,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(PPO_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index facb094f..90788e08 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -39,7 +39,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Q-learning https://www.academia.edu/3294050/Learning_from_delayed_rewards @@ -62,7 +62,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(Q_LEARNING_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index d76ceb97..e9f7cfd5 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -57,7 +57,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Soft Actor-Critic (SAC) https://arxiv.org/abs/1801.01290 @@ -80,7 +80,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(SAC_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index 7d68793b..372e7c59 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -39,7 +39,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """State Action Reward State Action (SARSA) https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.17.2539 @@ -62,7 +62,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(SARSA_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index bc7a5f22..5b370f40 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -62,7 +62,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Twin Delayed DDPG (TD3) https://arxiv.org/abs/1802.09477 @@ -85,7 +85,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(TD3_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index e0259a6b..ef45bd06 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, Dict, Any +from typing import Union, Tuple, Dict, Any, Optional import gym import copy @@ -65,7 +65,7 @@ def __init__(self, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, device: Union[str, torch.device] = "cuda:0", - cfg: dict = {}) -> None: + cfg: Optional[dict] = None) -> None: """Trust Region Policy Optimization (TRPO) https://arxiv.org/abs/1502.05477 @@ -88,7 +88,7 @@ def __init__(self, :raises KeyError: If the models dictionary is missing a required key """ _cfg = copy.deepcopy(TRPO_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) super().__init__(models=models, memory=memory, observation_space=observation_space, diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index 7565b090..297b5cae 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -1,4 +1,4 @@ -from typing import Union, List +from typing import Union, List, Optional import tqdm @@ -30,12 +30,12 @@ def generate_equally_spaced_scopes(num_envs: int, num_agents: int) -> List[int]: return scopes -class Trainer(): +class Trainer: def __init__(self, env: Wrapper, agents: Union[Agent, List[Agent]], - agents_scope : List[int] = [], - cfg: dict = {}) -> None: + agents_scope: Optional[List[int]] = None, + cfg: Optional[dict] = None) -> None: """Base class for trainers :param env: Environment to train on @@ -47,13 +47,13 @@ def __init__(self, :param cfg: Configuration dictionary (default: {}) :type cfg: dict, optional """ - self.cfg = cfg + self.cfg = cfg if cfg is not None else {} self.env = env self.agents = agents - self.agents_scope = agents_scope + self.agents_scope = agents_scope if agents_scope is not None else [] # get configuration - self.timesteps = self.cfg.get('timesteps', 0) + self.timesteps = self.cfg.get("timesteps", 0) self.headless = self.cfg.get("headless", False) self.initial_timestep = 0 diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 2e414fec..e067314e 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -21,8 +21,8 @@ class ManualTrainer(Trainer): def __init__(self, env: Wrapper, agents: Union[Agent, List[Agent]], - agents_scope : List[int] = [], - cfg: dict = {}) -> None: + agents_scope: Optional[List[int]] = None, + cfg: Optional[dict] = None) -> None: """Manual trainer Train agents by manually controlling the training/evaluation loop @@ -38,7 +38,8 @@ def __init__(self, :type cfg: dict, optional """ _cfg = copy.deepcopy(MANUAL_TRAINER_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) + agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) # init agents diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index c02957a7..2363d2b4 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -1,4 +1,4 @@ -from typing import Union, List +from typing import Union, List, Optional import copy import tqdm @@ -100,8 +100,8 @@ class ParallelTrainer(Trainer): def __init__(self, env: Wrapper, agents: Union[Agent, List[Agent]], - agents_scope : List[int] = [], - cfg: dict = {}) -> None: + agents_scope: Optional[List[int]] = None, + cfg: Optional[dict] = None) -> None: """Parallel trainer Train agents in parallel using multiple processes @@ -117,7 +117,8 @@ def __init__(self, :type cfg: dict, optional """ _cfg = copy.deepcopy(PARALLEL_TRAINER_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) + agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) mp.set_start_method(method='spawn', force=True) diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index 910b6079..aad8dea9 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -1,4 +1,4 @@ -from typing import Union, List +from typing import Union, List, Optional import copy import tqdm @@ -21,8 +21,8 @@ class SequentialTrainer(Trainer): def __init__(self, env: Wrapper, agents: Union[Agent, List[Agent]], - agents_scope : List[int] = [], - cfg: dict = {}) -> None: + agents_scope: Optional[List[int]] = None, + cfg: Optional[dict] = None) -> None: """Sequential trainer Train agents sequentially (i.e., one after the other in each interaction with the environment) @@ -38,7 +38,8 @@ def __init__(self, :type cfg: dict, optional """ _cfg = copy.deepcopy(SEQUENTIAL_TRAINER_DEFAULT_CONFIG) - _cfg.update(cfg) + _cfg.update(cfg if cfg is not None else {}) + agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) # init agents From f00e7527f5b43fd0c3e627e0b7bff06fd06e83c4 Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:17:10 +0200 Subject: [PATCH 003/157] Improve typing with Optional type hint Replace all Union[..., None] = None type hints with Optional[...] = None. This makes it clear that this argument is optional. As an added bonus, it is often more compact. Also, removing empty space at end of line. --- docs/source/snippets/agent.py | 6 +-- skrl/agents/torch/a2c/a2c.py | 6 +-- skrl/agents/torch/amp/amp.py | 16 +++---- skrl/agents/torch/base.py | 6 +++ skrl/agents/torch/cem/cem.py | 6 +-- skrl/agents/torch/ddpg/ddpg.py | 6 +-- skrl/agents/torch/dqn/ddqn.py | 6 +-- skrl/agents/torch/dqn/dqn.py | 6 +-- skrl/agents/torch/ppo/ppo.py | 6 +-- skrl/agents/torch/q_learning/q_learning.py | 6 +-- skrl/agents/torch/sac/sac.py | 6 +-- skrl/agents/torch/sarsa/sarsa.py | 6 +-- skrl/agents/torch/td3/td3.py | 6 +-- skrl/agents/torch/trpo/trpo.py | 6 +-- skrl/envs/torch/wrappers.py | 4 +- skrl/memories/torch/base.py | 4 +- .../resources/schedulers/torch/kl_adaptive.py | 4 +- skrl/utils/isaacgym_utils.py | 4 +- skrl/utils/model_instantiators.py | 42 ++++++++++++++++++- 19 files changed, 99 insertions(+), 53 deletions(-) diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py index 466b7a44..8e9c5d72 100644 --- a/docs/source/snippets/agent.py +++ b/docs/source/snippets/agent.py @@ -25,9 +25,9 @@ class CUSTOM(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Memory] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """ diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 314a36b0..982b1765 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -53,9 +53,9 @@ class A2C(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Advantage Actor Critic (A2C) diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 05b6e609..1ccba9ed 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -71,16 +71,16 @@ class AMP(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None, - amp_observation_space: Union[int, Tuple[int], gym.Space, None] = None, - motion_dataset: Union[Memory, None] = None, - reply_buffer: Union[Memory, None] = None, - collect_reference_motions: Union[Callable[[int], torch.Tensor], None] = None, - collect_observation: Union[Callable[[], torch.Tensor], None] = None) -> None: + amp_observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + motion_dataset: Optional[Memory] = None, + reply_buffer: Optional[Memory] = None, + collect_reference_motions: Optional[Callable[[int], torch.Tensor]] = None, + collect_observation: Optional[Callable[[], torch.Tensor]] = None) -> None: """Adversarial Motion Priors (AMP) https://arxiv.org/abs/2104.02180 diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 0aec7b7d..48da3ff3 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -18,9 +18,15 @@ class Agent: def __init__(self, models: Dict[str, Model], +<<<<<<< HEAD memory: Union[Memory, Tuple[Memory], None] = None, observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, +======= + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +>>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Base class that represent a RL agent diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 86022a69..d7176a79 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -44,9 +44,9 @@ class CEM(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Cross-Entropy Method (CEM) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 264b1858..534cd75a 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -53,9 +53,9 @@ class DDPG(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Deep Deterministic Policy Gradient (DDPG) diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index cea703ff..bb053fe8 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -55,9 +55,9 @@ class DDQN(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Double Deep Q-Network (DDQN) diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 1dd08e71..e0d5d672 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -55,9 +55,9 @@ class DQN(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Deep Q-Network (DQN) diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 844e317f..1aa552f2 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -61,9 +61,9 @@ class PPO(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Proximal Policy Optimization (PPO) diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 90788e08..30aa0932 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -35,9 +35,9 @@ class Q_LEARNING(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Q-learning diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index e9f7cfd5..728bd64a 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -53,9 +53,9 @@ class SAC(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Soft Actor-Critic (SAC) diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index 372e7c59..eb7ea58a 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -35,9 +35,9 @@ class SARSA(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """State Action Reward State Action (SARSA) diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 5b370f40..03848d39 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -58,9 +58,9 @@ class TD3(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Twin Delayed DDPG (TD3) diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index ef45bd06..8cdaacd9 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -61,9 +61,9 @@ class TRPO(Agent): def __init__(self, models: Dict[str, Model], - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, + memory: Optional[Union[Memory, Tuple[Memory]]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Trust Region Policy Optimization (TRPO) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index 5f903e58..6de4616d 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -304,7 +304,7 @@ def action_space(self) -> gym.Space: return self._env.single_action_space return self._env.action_space - def _observation_to_tensor(self, observation: Any, space: Union[gym.Space, None] = None) -> torch.Tensor: + def _observation_to_tensor(self, observation: Any, space: Optional[gym.Space] = None) -> torch.Tensor: """Convert the OpenAI Gym observation to a flat tensor :param observation: The OpenAI Gym observation to convert to a tensor @@ -472,7 +472,7 @@ def _spec_to_space(self, spec: Any) -> gym.Space: else: raise ValueError("Spec type {} not supported. Please report this issue".format(type(spec))) - def _observation_to_tensor(self, observation: Any, spec: Union[Any, None] = None) -> torch.Tensor: + def _observation_to_tensor(self, observation: Any, spec: Optional[Any] = None) -> torch.Tensor: """Convert the DeepMind observation to a flat tensor :param observation: The DeepMind observation to convert to a tensor diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index 70f37bd0..41c4dff3 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, List +from typing import Union, Tuple, List, Optional import os import csv @@ -142,7 +142,7 @@ def set_tensor_by_name(self, name: str, tensor: torch.Tensor) -> None: with torch.no_grad(): self.tensors[name].copy_(tensor) - def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtype: Union[torch.dtype, None] = None) -> bool: + def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtype: Optional[torch.dtype] = None) -> bool: """Create a new internal tensor in memory The tensor will have a 3-components shape (memory size, number of environments, size). diff --git a/skrl/resources/schedulers/torch/kl_adaptive.py b/skrl/resources/schedulers/torch/kl_adaptive.py index d6d54a3e..efb14a28 100644 --- a/skrl/resources/schedulers/torch/kl_adaptive.py +++ b/skrl/resources/schedulers/torch/kl_adaptive.py @@ -1,4 +1,4 @@ -from typing import Union +from typing import Union, Optional import torch from torch.optim.lr_scheduler import _LRScheduler @@ -61,7 +61,7 @@ def __init__(self, self._last_lr = [group['lr'] for group in self.optimizer.param_groups] - def step(self, kl: Union[torch.Tensor, float, None] = None, epoch: Union[int, None] = None) -> None: + def step(self, kl: Optional[Union[torch.Tensor, float]] = None, epoch: Optional[int] = None) -> None: """ Step scheduler diff --git a/skrl/utils/isaacgym_utils.py b/skrl/utils/isaacgym_utils.py index 9cc1fae8..928d3636 100644 --- a/skrl/utils/isaacgym_utils.py +++ b/skrl/utils/isaacgym_utils.py @@ -1,4 +1,4 @@ -from typing import Union, List +from typing import List, Optional import math import logging @@ -383,7 +383,7 @@ def ik(jacobian_end_effector: torch.Tensor, current_position: torch.Tensor, current_orientation: torch.Tensor, goal_position: torch.Tensor, - goal_orientation: Union[torch.Tensor, None] = None, + goal_orientation: Optional[torch.Tensor] = None, damping_factor: float = 0.05, squeeze_output: bool = True) -> torch.Tensor: """ diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index 87aa26fb..78128635 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple +from typing import Union, Tuple, Optional import gym from enum import Enum @@ -93,7 +93,11 @@ def _generate_sequential(model: Model, hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, output_activation: Union[str, None] = "tanh", +<<<<<<< HEAD output_scale: int = None) -> nn.Sequential: +======= + output_scale: Optional[int] = None) -> nn.Sequential: +>>>>>>> 6be1f67 (Improve typing with Optional type hint) """Generate a sequential model :param model: model to generate sequential model for @@ -130,8 +134,13 @@ def _generate_sequential(model: Model, return nn.Sequential(*input_layer, *hidden_layers, *output_layer) +<<<<<<< HEAD def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, +======= +def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +>>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, clip_log_std: bool = True, @@ -141,7 +150,11 @@ def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, +<<<<<<< HEAD output_activation: Union[str, None] = "tanh", +======= + output_activation: Optional[str] = "tanh", +>>>>>>> 6be1f67 (Improve typing with Optional type hint) output_scale: float = 1.0) -> Model: """Instantiate a Gaussian model @@ -222,8 +235,13 @@ def compute(self, states, taken_actions=None, role=""): max_log_std=max_log_std, metadata=metadata) +<<<<<<< HEAD def multivariate_gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, +======= +def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +>>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, clip_log_std: bool = True, @@ -233,7 +251,11 @@ def multivariate_gaussian_model(observation_space: Union[int, Tuple[int], gym.Sp hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, +<<<<<<< HEAD output_activation: Union[str, None] = "tanh", +======= + output_activation: Optional[str] = "tanh", +>>>>>>> 6be1f67 (Improve typing with Optional type hint) output_scale: float = 1.0) -> Model: """Instantiate a multivariate Gaussian model @@ -314,15 +336,24 @@ def compute(self, states, taken_actions=None, role=""): max_log_std=max_log_std, metadata=metadata) +<<<<<<< HEAD def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, +======= +def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +>>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, input_shape: Shape = Shape.STATES, hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, +<<<<<<< HEAD output_activation: Union[str, None] = "tanh", +======= + output_activation: Optional[str] = "tanh", +>>>>>>> 6be1f67 (Improve typing with Optional type hint) output_scale: float = 1.0) -> Model: """Instantiate a deterministic model @@ -392,15 +423,24 @@ def compute(self, states, taken_actions=None, role=""): clip_actions=clip_actions, metadata=metadata) +<<<<<<< HEAD def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, action_space: Union[int, Tuple[int], gym.Space, None] = None, +======= +def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +>>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", unnormalized_log_prob: bool = False, input_shape: Shape = Shape.STATES, hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, +<<<<<<< HEAD output_activation: Union[str, None] = None) -> Model: +======= + output_activation: Optional[str] = None) -> Model: +>>>>>>> 6be1f67 (Improve typing with Optional type hint) """Instantiate a categorical model :param observation_space: Observation/state space or shape (default: None). From 50e9522eb39e215d25277a0e9c6f324df1a9ab45 Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:17:25 +0200 Subject: [PATCH 004/157] Add WandB config to the config dict --- skrl/agents/torch/a2c/a2c.py | 8 ++++++++ skrl/agents/torch/amp/amp.py | 8 ++++++++ skrl/agents/torch/cem/cem.py | 8 ++++++++ skrl/agents/torch/ddpg/ddpg.py | 8 ++++++++ skrl/agents/torch/dqn/ddqn.py | 8 ++++++++ skrl/agents/torch/dqn/dqn.py | 8 ++++++++ skrl/agents/torch/ppo/ppo.py | 8 ++++++++ skrl/agents/torch/q_learning/q_learning.py | 8 ++++++++ skrl/agents/torch/sac/sac.py | 8 ++++++++ skrl/agents/torch/sarsa/sarsa.py | 8 ++++++++ skrl/agents/torch/td3/td3.py | 8 ++++++++ skrl/agents/torch/trpo/trpo.py | 8 ++++++++ 12 files changed, 96 insertions(+) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 982b1765..c99c93ae 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -46,6 +46,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 1ccba9ed..9781e70d 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -64,6 +64,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index d7176a79..582ed2c7 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -37,6 +37,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 534cd75a..1a122dfe 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -46,6 +46,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index bb053fe8..735fea2d 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -48,6 +48,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index e0d5d672..4e0ea6b5 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -48,6 +48,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 1aa552f2..fc734bee 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -54,6 +54,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 30aa0932..29bfb516 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -28,6 +28,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 728bd64a..83cdec21 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -46,6 +46,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index eb7ea58a..2aa1b866 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -28,6 +28,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 03848d39..22bd63eb 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -51,6 +51,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 8cdaacd9..25fa8a65 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -54,6 +54,14 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": { + "enabled": False, # whether to use Weights & Biases + "project": None, # project name + "entity": None, # entity name + "group": None, # group name + "tags": [], # tags + } } } From 9d5fa633dc86782b456a59d5a83696bf4905846f Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:22:25 +0200 Subject: [PATCH 005/157] Create WandB setup function and call it The WandB setup function creates the connection to WandB, it runs whenever the config of the agent says so. This way, multiple agents can sync to wandb.ai independently. --- setup.py | 1 + skrl/trainers/torch/base.py | 33 +++++++++++++++++++++++++++++++ skrl/trainers/torch/manual.py | 3 +++ skrl/trainers/torch/parallel.py | 3 +++ skrl/trainers/torch/sequential.py | 3 +++ 5 files changed, 43 insertions(+) diff --git a/setup.py b/setup.py index 52de3de0..98f03c57 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,7 @@ "tqdm", "packaging", "pre-commit", + "wandb", ] # installation diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index 297b5cae..d06ded7d 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -121,6 +121,39 @@ def _setup_agents(self) -> None: else: self.num_agents = 1 + def _setup_wandb(self) -> None: + """Setup Weights & Biases""" + for n_agent in range(self.num_agents): + if self.num_agents == 1: + agent = self.agents + else: + agent = self.agents[n_agent] + + wandb_cfg = agent.cfg.get("experiment", {}).get("wandb", {}) + + if wandb_cfg.get("enabled", False): + import wandb + dir = agent.experiment_dir + run_name = dir.split("/")[-1] + try: + _net_cfg = {k: v.net._modules for (k, v) in agent.models.items()} + except AttributeError: + _net_cfg = {k: v._modules for (k, v) in agent.models.items()} + _cfg = { + **self.cfg, + **agent.cfg, + **_net_cfg + } + wandb.init( + project=wandb_cfg.get("project", None), + group=wandb_cfg.get("group", None), + entity=wandb_cfg.get("entity", None), + name=run_name, + sync_tensorboard=True, + resume="allow", + config=_cfg + ) + def train(self) -> None: """Train the agents diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index e067314e..993e9082 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -42,6 +42,9 @@ def __init__(self, agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) + # Setup weights and biases + self._setup_wandb() + # init agents if self.num_agents > 1: for agent in self.agents: diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index 2363d2b4..df9ee2d2 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -121,6 +121,9 @@ def __init__(self, agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) + # Setup weights and biases + self._setup_wandb() + mp.set_start_method(method='spawn', force=True) def train(self) -> None: diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index aad8dea9..f721ac21 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -42,6 +42,9 @@ def __init__(self, agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) + # Setup weights and biases + self._setup_wandb() + # init agents if self.num_agents > 1: for agent in self.agents: From aa76d81e7f6466bd540fc51e45837ca054e26290 Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:23:54 +0200 Subject: [PATCH 006/157] Fix no data upload The tensorboard writer has to be created AFTER wandb was initialized. Thus, we moved some stuff around to make it work. --- skrl/agents/torch/base.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 48da3ff3..da685e22 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -18,15 +18,9 @@ class Agent: def __init__(self, models: Dict[str, Model], -<<<<<<< HEAD - memory: Union[Memory, Tuple[Memory], None] = None, - observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, -======= memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, ->>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Base class that represent a RL agent @@ -78,6 +72,15 @@ def __init__(self, self.checkpoint_store_separately = self.cfg.get("experiment", {}).get("store_separately", False) self.checkpoint_best_modules = {"timestep": 0, "reward": -2 ** 31, "saved": False, "modules": {}} + # experiment directory + directory = self.cfg.get("experiment", {}).get("directory", "") + experiment_name = self.cfg.get("experiment", {}).get("experiment_name", "") + if not directory: + directory = os.path.join(os.getcwd(), "runs") + if not experiment_name: + experiment_name = "{}_{}".format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"), self.__class__.__name__) + self.experiment_dir = os.path.join(directory, experiment_name) + def __str__(self) -> str: """Generate a representation of the agent as string @@ -124,15 +127,6 @@ def init(self) -> None: This method should be called before the agent is used. It will initialize the TensoBoard writer and checkpoint directory """ - # experiment directory - directory = self.cfg.get("experiment", {}).get("directory", "") - experiment_name = self.cfg.get("experiment", {}).get("experiment_name", "") - if not directory: - directory = os.path.join(os.getcwd(), "runs") - if not experiment_name: - experiment_name = "{}_{}".format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f"), self.__class__.__name__) - self.experiment_dir = os.path.join(directory, experiment_name) - # main entry to log data for consumption and visualization by TensorBoard self.writer = SummaryWriter(log_dir=self.experiment_dir) From d001b5d9e0bc7d9ee7641556411f5c634bbd622d Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:34:46 +0200 Subject: [PATCH 007/157] Fix README linting errors --- docs/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index 2e5da518..d06d59ce 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,6 +1,6 @@ -## Documentation +# Documentation -### Install Sphinx and Read the Docs Sphinx Theme +## Install Sphinx and Read the Docs Sphinx Theme ```bash pip install sphinx @@ -9,7 +9,7 @@ pip install sphinx-autobuild pip install sphinx-tabs==3.2.0 ``` -### Building the documentation +## Building the documentation ```bash cd docs From 994adf563fbc244d554ccb7285ac1cf306237abf Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Thu, 29 Sep 2022 16:35:18 +0200 Subject: [PATCH 008/157] Add a small section about using WandB --- docs/source/modules/skrl.utils.utilities.rst | 31 ++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/docs/source/modules/skrl.utils.utilities.rst b/docs/source/modules/skrl.utils.utilities.rst index 6a095267..d7614ab4 100644 --- a/docs/source/modules/skrl.utils.utilities.rst +++ b/docs/source/modules/skrl.utils.utilities.rst @@ -17,3 +17,34 @@ API """ .. autofunction:: skrl.utils.set_seed + +Weights and Biases +------------------ + +Integration +""""""""""" + +You can use `Weights & Biases `_ to easily track your experiments. +Please login to your account and create a new project. +Afterwards, log into Weights & Biases from your terminal: + +.. code-block:: bash + + wandb login + +Usage +""""" + +Change your agents config to enable Weights & Biases, also, at a minimum, enter your project name and entity name: + +.. code-block:: python + + from skrl.agents.torch.dqn import DQN_DEFAULT_CONFIG + + cfg_dqn = DQN_DEFAULT_CONFIG.copy() + + cfg_dqn["experiment"]["wandb"] = { + "enabled": True, + "project": "skrl", + "entity": "Toni-SM", + } From 2013f2d0bb7434b8a845e513a8081ff5086730bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 12 Oct 2022 12:17:46 +0200 Subject: [PATCH 009/157] Replace XFormPrimView by RigidPrimView to speed up simulation --- .../reaching_franka_omniverse_isaacgym_env.py | 47 ++++++++++++------- ...ing_franka_omniverse_isaacgym_skrl_eval.py | 2 +- ...ng_franka_omniverse_isaacgym_skrl_train.py | 2 +- 3 files changed, 32 insertions(+), 19 deletions(-) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py index 97a6460d..2ee17f87 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_env.py @@ -4,9 +4,9 @@ from omniisaacgymenvs.tasks.base.rl_task import RLTask from omniisaacgymenvs.robots.articulations.franka import Franka as Robot -from omni.isaac.core.prims import RigidPrimView, XFormPrimView +from omni.isaac.core.prims import RigidPrimView from omni.isaac.core.articulations import ArticulationView -from omni.isaac.core.objects import VisualSphere +from omni.isaac.core.objects import DynamicSphere from omni.isaac.core.utils.prims import get_prim_at_path from skrl.utils import omniverse_isaacgym_utils @@ -79,7 +79,19 @@ "density": -1, "max_depenetration_velocity": 1000.0, "contact_offset": 0.005, - "rest_offset": 0.0}}}} + "rest_offset": 0.0}, + "target": {"override_usd_defaults": False, + "fixed_base": True, + "enable_self_collisions": False, + "enable_gyroscopic_forces": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "sleep_threshold": 0.005, + "stabilization_threshold": 0.001, + "density": -1, + "max_depenetration_velocity": 1000.0, + "contact_offset": 0.005, + "rest_offset": 0.0}}}} class RobotView(ArticulationView): @@ -132,7 +144,7 @@ def set_up_scene(self, scene) -> None: self._hands = RigidPrimView(prim_paths_expr="/World/envs/.*/robot/panda_hand", name="hand_view", reset_xform_properties=False) scene.add(self._hands) # target view - self._targets = XFormPrimView(prim_paths_expr="/World/envs/.*/target", name="target_view", reset_xform_properties=False) + self._targets = RigidPrimView(prim_paths_expr="/World/envs/.*/target", name="target_view", reset_xform_properties=False) scene.add(self._targets) self.init_data() @@ -145,10 +157,11 @@ def get_robot(self): self._sim_config.apply_articulation_settings("robot", get_prim_at_path(robot.prim_path), self._sim_config.parse_actor_config("robot")) def get_target(self): - target = VisualSphere(prim_path=self.default_zero_env_path + "/target", - name="target", - radius=0.025, - color=torch.tensor([1, 0, 0])) + target = DynamicSphere(prim_path=self.default_zero_env_path + "/target", + name="target", + radius=0.025, + color=torch.tensor([1, 0, 0])) + self._sim_config.apply_articulation_settings("target", get_prim_at_path(target.prim_path), self._sim_config.parse_actor_config("target")) target.set_collision_enabled(False) def init_data(self) -> None: @@ -162,8 +175,8 @@ def init_data(self) -> None: def get_observations(self) -> dict: robot_dof_pos = self._robots.get_joint_positions(clone=False) robot_dof_vel = self._robots.get_joint_velocities(clone=False) - end_effector_pos, end_effector_rot = self._end_effectors.get_local_poses() - target_pos, target_rot = self._targets.get_local_poses() + end_effector_pos, end_effector_rot = self._end_effectors.get_world_poses(clone=False) + target_pos, target_rot = self._targets.get_world_poses(clone=False) dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) \ / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0 @@ -174,14 +187,15 @@ def get_observations(self) -> dict: self.obs_buf[:, 0] = self.progress_buf / self._max_episode_length self.obs_buf[:, 1:8] = dof_pos_scaled[:, :7] self.obs_buf[:, 8:15] = dof_vel_scaled[:, :7] * generalization_noise - self.obs_buf[:, 15:18] = target_pos + self.obs_buf[:, 15:18] = target_pos - self._env_pos # compute distance for calculate_metrics() and is_done() self._computed_distance = torch.norm(end_effector_pos - target_pos, dim=-1) if self._control_space == "cartesian": self.jacobians = self._robots.get_jacobians(clone=False) - self.hand_pos, self.hand_rot = self._hands.get_local_poses() + self.hand_pos, self.hand_rot = self._hands.get_world_poses(clone=False) + self.hand_pos -= self._env_pos return {self._robots.name: {"obs_buf": self.obs_buf}} @@ -227,12 +241,11 @@ def reset_idx(self, env_ids) -> None: self._robots.set_joint_velocities(dof_vel, indices=indices) # reset target - pos = (torch.rand((len(env_ids), 3), device=self._device) - 0.5) * 2 - pos[:, 0] = 0.50 + pos[:, 0] * 0.25 - pos[:, 1] = 0.00 + pos[:, 1] * 0.25 - pos[:, 2] = 0.20 + pos[:, 2] * 0.10 + pos = (torch.rand((len(env_ids), 3), device=self._device) - 0.5) * 2 \ + * torch.tensor([0.25, 0.25, 0.10], device=self._device) \ + + torch.tensor([0.50, 0.00, 0.20], device=self._device) - self._targets.set_local_poses(pos, indices=indices) + self._targets.set_world_poses(pos + self._env_pos[env_ids], indices=indices) # bookkeeping self.reset_buf[env_ids] = 0 diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py index 7d14c040..cac6186c 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py @@ -35,7 +35,7 @@ def compute(self, states, taken_actions, role): env = get_env_instance(headless=headless) from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig -from reaching_franka_sim_env import ReachingFrankaTask, TASK_CFG +from reaching_franka_omniverse_isaacgym_env import ReachingFrankaTask, TASK_CFG TASK_CFG["headless"] = headless TASK_CFG["task"]["env"]["numEnvs"] = 64 diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py index 15c34eb6..3a40fcf6 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py @@ -60,7 +60,7 @@ def compute(self, states, taken_actions, role): env = get_env_instance(headless=headless) from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig -from reaching_franka_sim_env import ReachingFrankaTask, TASK_CFG +from reaching_franka_omniverse_isaacgym_env import ReachingFrankaTask, TASK_CFG TASK_CFG["headless"] = headless TASK_CFG["task"]["env"]["numEnvs"] = 1024 From 947b36cca57ae16d33217c25661c5eda6d1e0ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 12 Oct 2022 12:34:43 +0200 Subject: [PATCH 010/157] Update CHANGELOG and increase MINOR version --- CHANGELOG.md | 4 ++++ skrl/version.txt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cd6e659..7504c19e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [0.9.0] - Unreleased +### Fixed +- Omniverse Isaac Gym simulation speed for the Franka Emika real-world example + ## [0.8.0] - 2022-10-03 ### Added - AMP agent for physics-based character animation diff --git a/skrl/version.txt b/skrl/version.txt index a3df0a69..ac39a106 100644 --- a/skrl/version.txt +++ b/skrl/version.txt @@ -1 +1 @@ -0.8.0 +0.9.0 From 357382726e045414efbda2920a8d8cbc2e9f66ce Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Sat, 15 Oct 2022 16:15:51 +0200 Subject: [PATCH 011/157] Fix missed merge conflicts --- skrl/utils/model_instantiators.py | 40 ------------------------------- 1 file changed, 40 deletions(-) diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index 78128635..792b0eb9 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -93,11 +93,7 @@ def _generate_sequential(model: Model, hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, output_activation: Union[str, None] = "tanh", -<<<<<<< HEAD - output_scale: int = None) -> nn.Sequential: -======= output_scale: Optional[int] = None) -> nn.Sequential: ->>>>>>> 6be1f67 (Improve typing with Optional type hint) """Generate a sequential model :param model: model to generate sequential model for @@ -134,13 +130,8 @@ def _generate_sequential(model: Model, return nn.Sequential(*input_layer, *hidden_layers, *output_layer) -<<<<<<< HEAD -def gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, -======= def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, ->>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, clip_log_std: bool = True, @@ -150,11 +141,7 @@ def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space] hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, -<<<<<<< HEAD - output_activation: Union[str, None] = "tanh", -======= output_activation: Optional[str] = "tanh", ->>>>>>> 6be1f67 (Improve typing with Optional type hint) output_scale: float = 1.0) -> Model: """Instantiate a Gaussian model @@ -235,13 +222,8 @@ def compute(self, states, taken_actions=None, role=""): max_log_std=max_log_std, metadata=metadata) -<<<<<<< HEAD -def multivariate_gaussian_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, -======= def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, ->>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, clip_log_std: bool = True, @@ -251,11 +233,7 @@ def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, -<<<<<<< HEAD - output_activation: Union[str, None] = "tanh", -======= output_activation: Optional[str] = "tanh", ->>>>>>> 6be1f67 (Improve typing with Optional type hint) output_scale: float = 1.0) -> Model: """Instantiate a multivariate Gaussian model @@ -336,24 +314,15 @@ def compute(self, states, taken_actions=None, role=""): max_log_std=max_log_std, metadata=metadata) -<<<<<<< HEAD -def deterministic_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, -======= def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, ->>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, input_shape: Shape = Shape.STATES, hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, -<<<<<<< HEAD - output_activation: Union[str, None] = "tanh", -======= output_activation: Optional[str] = "tanh", ->>>>>>> 6be1f67 (Improve typing with Optional type hint) output_scale: float = 1.0) -> Model: """Instantiate a deterministic model @@ -423,24 +392,15 @@ def compute(self, states, taken_actions=None, role=""): clip_actions=clip_actions, metadata=metadata) -<<<<<<< HEAD -def categorical_model(observation_space: Union[int, Tuple[int], gym.Space, None] = None, - action_space: Union[int, Tuple[int], gym.Space, None] = None, -======= def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, ->>>>>>> 6be1f67 (Improve typing with Optional type hint) device: Union[str, torch.device] = "cuda:0", unnormalized_log_prob: bool = False, input_shape: Shape = Shape.STATES, hiddens: list = [256, 256], hidden_activation: list = ["relu", "relu"], output_shape: Shape = Shape.ACTIONS, -<<<<<<< HEAD - output_activation: Union[str, None] = None) -> Model: -======= output_activation: Optional[str] = None) -> Model: ->>>>>>> 6be1f67 (Improve typing with Optional type hint) """Instantiate a categorical model :param observation_space: Observation/state space or shape (default: None). From a13aba025fdc3518b784fd84bc31df5e9a548047 Mon Sep 17 00:00:00 2001 From: Johann Christensen Date: Sat, 15 Oct 2022 16:35:19 +0200 Subject: [PATCH 012/157] Move wandb initialization to the agent --- docs/source/snippets/agent.py | 4 +-- skrl/agents/torch/a2c/a2c.py | 4 +-- skrl/agents/torch/amp/amp.py | 4 +-- skrl/agents/torch/base.py | 33 +++++++++++++++++++++- skrl/agents/torch/cem/cem.py | 4 +-- skrl/agents/torch/ddpg/ddpg.py | 4 +-- skrl/agents/torch/dqn/ddqn.py | 4 +-- skrl/agents/torch/dqn/dqn.py | 4 +-- skrl/agents/torch/ppo/ppo.py | 4 +-- skrl/agents/torch/q_learning/q_learning.py | 4 +-- skrl/agents/torch/sac/sac.py | 4 +-- skrl/agents/torch/sarsa/sarsa.py | 4 +-- skrl/agents/torch/td3/td3.py | 4 +-- skrl/agents/torch/trpo/trpo.py | 4 +-- skrl/trainers/torch/base.py | 33 ---------------------- skrl/trainers/torch/manual.py | 7 ++--- skrl/trainers/torch/parallel.py | 12 ++++---- skrl/trainers/torch/sequential.py | 7 ++--- 18 files changed, 67 insertions(+), 77 deletions(-) diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py index 8e9c5d72..77497a2c 100644 --- a/docs/source/snippets/agent.py +++ b/docs/source/snippets/agent.py @@ -59,10 +59,10 @@ def __init__(self, # - set up preprocessors # ===================================================================== - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) self.set_mode("eval") # ================================================================= # - create tensors in memory if required diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index c99c93ae..37476ae4 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -152,10 +152,10 @@ def __init__(self, else: self._value_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) self.set_mode("eval") # create tensors in memory diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 9781e70d..1dde4353 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -218,10 +218,10 @@ def __init__(self, else: self._amp_state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) self.set_mode("eval") # create tensors in memory diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index da685e22..731e42e3 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -121,12 +121,15 @@ def _get_internal_value(self, _module: Any) -> Any: """ return _module.state_dict() if hasattr(_module, "state_dict") else _module - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent This method should be called before the agent is used. It will initialize the TensoBoard writer and checkpoint directory """ + # Setup Weight and Biases + self._setup_wandb(trainer_cfg=trainer_cfg) + # main entry to log data for consumption and visualization by TensorBoard self.writer = SummaryWriter(log_dir=self.experiment_dir) @@ -621,3 +624,31 @@ def _update(self, timestep: int, timesteps: int) -> None: :raises NotImplementedError: The method is not implemented by the inheriting classes """ raise NotImplementedError + + def _setup_wandb(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: + """Setup Weights & Biases""" + wandb_cfg = self.cfg.get("experiment", {}).get("wandb", {}) + + if wandb_cfg.get("enabled", False): + import wandb + dir = self.experiment_dir + run_name = dir.split("/")[-1] + trainer_cfg = trainer_cfg if trainer_cfg is not None else {} + try: + _net_cfg = {k: v.net._modules for (k, v) in self.models.items()} + except AttributeError: + _net_cfg = {k: v._modules for (k, v) in self.models.items()} + _cfg = { + **self.cfg, + **trainer_cfg, + **_net_cfg + } + wandb.init( + project=wandb_cfg.get("project", None), + group=wandb_cfg.get("group", None), + entity=wandb_cfg.get("entity", None), + name=run_name, + sync_tensorboard=True, + resume="allow", + config=_cfg + ) \ No newline at end of file diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 582ed2c7..3c1cb4d0 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -127,10 +127,10 @@ def __init__(self, else: self._state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 1a122dfe..60b3e1c5 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -158,10 +158,10 @@ def __init__(self, else: self._state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 735fea2d..9cb982c8 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -152,10 +152,10 @@ def __init__(self, else: self._state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 4e0ea6b5..9fa8709a 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -152,10 +152,10 @@ def __init__(self, else: self._state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index fc734bee..110e98d9 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -167,10 +167,10 @@ def __init__(self, else: self._value_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) self.set_mode("eval") # create tensors in memory diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 29bfb516..6dedf63a 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -101,10 +101,10 @@ def __init__(self, self._current_next_states = None self._current_dones = None - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 83cdec21..0d6eabb4 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -171,10 +171,10 @@ def __init__(self, else: self._state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index 2aa1b866..e1fc7206 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -101,10 +101,10 @@ def __init__(self, self._current_next_states = None self._current_dones = None - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 22bd63eb..bd34d3e9 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -176,10 +176,10 @@ def __init__(self, else: self._state_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 25fa8a65..3acb2031 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -165,10 +165,10 @@ def __init__(self, else: self._value_preprocessor = self._empty_preprocessor - def init(self) -> None: + def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ - super().init() + super().init(trainer_cfg=trainer_cfg) # create tensors in memory if self.memory is not None: diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index d06ded7d..297b5cae 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -121,39 +121,6 @@ def _setup_agents(self) -> None: else: self.num_agents = 1 - def _setup_wandb(self) -> None: - """Setup Weights & Biases""" - for n_agent in range(self.num_agents): - if self.num_agents == 1: - agent = self.agents - else: - agent = self.agents[n_agent] - - wandb_cfg = agent.cfg.get("experiment", {}).get("wandb", {}) - - if wandb_cfg.get("enabled", False): - import wandb - dir = agent.experiment_dir - run_name = dir.split("/")[-1] - try: - _net_cfg = {k: v.net._modules for (k, v) in agent.models.items()} - except AttributeError: - _net_cfg = {k: v._modules for (k, v) in agent.models.items()} - _cfg = { - **self.cfg, - **agent.cfg, - **_net_cfg - } - wandb.init( - project=wandb_cfg.get("project", None), - group=wandb_cfg.get("group", None), - entity=wandb_cfg.get("entity", None), - name=run_name, - sync_tensorboard=True, - resume="allow", - config=_cfg - ) - def train(self) -> None: """Train the agents diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 993e9082..79f775b2 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -42,15 +42,12 @@ def __init__(self, agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) - # Setup weights and biases - self._setup_wandb() - # init agents if self.num_agents > 1: for agent in self.agents: - agent.init() + agent.init(trainer_cfg=self.cfg) else: - self.agents.init() + self.agents.init(trainer_cfg=self.cfg) self._progress = None diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index df9ee2d2..ea1094fd 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -25,6 +25,7 @@ def fn_processor(process_index, *args): queue = args[1][process_index] barrier = args[2] scope = args[3][process_index] + trainer_cfg = scope = args[34][process_index] agent = None _states = None @@ -44,7 +45,7 @@ def fn_processor(process_index, *args): # initialize agent elif task == 'init': agent = queue.get() - agent.init() + agent.init(trainer_cfg=trainer_cfg) print("[INFO] Processor {}: init agent {} with scope {}".format(process_index, type(agent).__name__, scope)) barrier.wait() @@ -121,9 +122,6 @@ def __init__(self, agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) - # Setup weights and biases - self._setup_wandb() - mp.set_start_method(method='spawn', force=True) def train(self) -> None: @@ -141,7 +139,7 @@ def train(self) -> None: """ # single agent if self.num_agents == 1: - self.agents.init() + self.agents.init(trainer_cfg=self.cfg) self.single_agent_train() return @@ -171,7 +169,7 @@ def train(self) -> None: # spawn and wait for all processes to start for i in range(self.num_agents): process = mp.Process(target=fn_processor, - args=(i, consumer_pipes, queues, barrier, self.agents_scope), + args=(i, consumer_pipes, queues, barrier, self.agents_scope, self.cfg), daemon=True) processes.append(process) process.start() @@ -265,7 +263,7 @@ def eval(self) -> None: """ # single agent if self.num_agents == 1: - self.agents.init() + self.agents.init(trainer_cfg=self.cfg) self.single_agent_eval() return diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index f721ac21..a06fabd8 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -42,15 +42,12 @@ def __init__(self, agents_scope = agents_scope if agents_scope is not None else [] super().__init__(env=env, agents=agents, agents_scope=agents_scope, cfg=_cfg) - # Setup weights and biases - self._setup_wandb() - # init agents if self.num_agents > 1: for agent in self.agents: - agent.init() + agent.init(trainer_cfg=self.cfg) else: - self.agents.init() + self.agents.init(trainer_cfg=self.cfg) def train(self) -> None: """Train the agents sequentially From 3a5dccd166fb9f50eac6ecf701f5b70e41badfce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 15 Oct 2022 23:22:45 +0200 Subject: [PATCH 013/157] Set agents running mode --- skrl/agents/torch/base.py | 13 +++++++++++++ skrl/trainers/torch/manual.py | 14 ++++++++++++++ skrl/trainers/torch/parallel.py | 14 ++++++++++++++ skrl/trainers/torch/sequential.py | 14 ++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 44476e78..57c943a0 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -66,6 +66,8 @@ def __init__(self, self._cumulative_rewards = None self._cumulative_timesteps = None + self.training = True + # checkpoint self.checkpoint_modules = {} self.checkpoint_interval = self.cfg.get("experiment", {}).get("checkpoint_interval", 1000) @@ -302,6 +304,17 @@ def set_mode(self, mode: str) -> None: if model is not None: model.set_mode(mode) + def set_running_mode(self, mode: str) -> None: + """Set the current running mode (training or evaluation) + + This method sets the value of the ``training`` property (boolean). + This property can be used to know if the agent is running in training or evaluation mode. + + :param mode: Mode: 'train' for training or 'eval' for evaluation + :type mode: str + """ + self.training = mode == "train" + def save(self, path: str) -> None: """Save the agent to the specified path diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 2e414fec..04acffa0 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -77,6 +77,13 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: self._progress = tqdm.tqdm(total=timesteps) self._progress.update(n=1) + # set running mode + if self.num_agents > 1: + for agent in self.agents: + agent.set_running_mode("train") + else: + self.agents.set_running_mode("train") + # reset env if self.states is None: self.states = self.env.reset() @@ -168,6 +175,13 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: self._progress = tqdm.tqdm(total=timesteps) self._progress.update(n=1) + # set running mode + if self.num_agents > 1: + for agent in self.agents: + agent.set_running_mode("eval") + else: + self.agents.set_running_mode("eval") + # reset env if self.states is None: self.states = self.env.reset() diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index c02957a7..2609657d 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -135,6 +135,13 @@ def train(self) -> None: - Post-interaction (in parallel) - Reset environments """ + # set running mode + if self.num_agents > 1: + for agent in self.agents: + agent.set_running_mode("train") + else: + self.agents.set_running_mode("train") + # single agent if self.num_agents == 1: self.agents.init() @@ -259,6 +266,13 @@ def eval(self) -> None: - Render scene - Reset environments """ + # set running mode + if self.num_agents > 1: + for agent in self.agents: + agent.set_running_mode("eval") + else: + self.agents.set_running_mode("eval") + # single agent if self.num_agents == 1: self.agents.init() diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index 910b6079..d0a45739 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -61,6 +61,13 @@ def train(self) -> None: - Post-interaction (sequentially) - Reset environments """ + # set running mode + if self.num_agents > 1: + for agent in self.agents: + agent.set_running_mode("train") + else: + self.agents.set_running_mode("train") + # single agent if self.num_agents == 1: self.single_agent_train() @@ -123,6 +130,13 @@ def eval(self) -> None: - Render scene - Reset environments """ + # set running mode + if self.num_agents > 1: + for agent in self.agents: + agent.set_running_mode("eval") + else: + self.agents.set_running_mode("eval") + # single agent if self.num_agents == 1: self.single_agent_eval() From 27f4be8e4191c4f7d097d63fb40d3ce4c94dda5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 15 Oct 2022 23:43:57 +0200 Subject: [PATCH 014/157] Remove deprecated method in trainers --- skrl/trainers/torch/base.py | 8 -------- skrl/trainers/torch/sequential.py | 8 -------- 2 files changed, 16 deletions(-) diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index 7565b090..4845e6eb 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -135,14 +135,6 @@ def eval(self) -> None: """ raise NotImplementedError - def start(self) -> None: - """Start training - - This method is deprecated in favour of the '.train()' method - """ - # TODO: remove this method in future versions - print("[WARNING] Trainer.start() method is deprecated in favour of the '.train()' method") - def single_agent_train(self) -> None: """Train a single agent diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index d0a45739..a700a16d 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -180,11 +180,3 @@ def eval(self) -> None: # close the environment self.env.close() - - def start(self) -> None: - """Start training - - This method is deprecated in favour of the '.train()' method - """ - super().start() - self.train() From 397ec952925d2ea659be5c76ebbca0a51cd2a8d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 15:56:57 +0200 Subject: [PATCH 015/157] Get wandb init arguments from kwargs dict --- skrl/agents/torch/base.py | 57 +++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 33 deletions(-) diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index cbb27ac7..8afcbe43 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -115,8 +115,8 @@ def _empty_preprocessor(self, _input: Any, *args, **kwargs) -> Any: def _get_internal_value(self, _module: Any) -> Any: """Get internal module/variable state/value - :param _input: Module or variable - :type _input: Any + :param _module: Module or variable + :type _module: Any :return: Module/variable state/value :rtype: Any @@ -127,10 +127,29 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent This method should be called before the agent is used. - It will initialize the TensoBoard writer and checkpoint directory + It will initialize the TensoBoard writer (and optionally Weights & Biases) and create the checkpoints directory + + :param trainer_cfg: Trainer configuration + :type trainer_cfg: dict, optional """ - # Setup Weight and Biases - self._setup_wandb(trainer_cfg=trainer_cfg) + # setup Weights & Biases + if self.cfg.get("experiment", {}).get("wandb", False): + # save experiment config + trainer_cfg = trainer_cfg if trainer_cfg is not None else {} + try: + models_cfg = {k: v.net._modules for (k, v) in self.models.items()} + except AttributeError: + models_cfg = {k: v._modules for (k, v) in self.models.items()} + config={**self.cfg, **trainer_cfg, **models_cfg} + # set default values + wandb_kwargs = copy.deepcopy(self.cfg.get("experiment", {}).get("wandb_kwargs", {})) + wandb_kwargs.setdefault("name", os.path.split(self.experiment_dir)[-1]) + wandb_kwargs.setdefault("sync_tensorboard", True) + wandb_kwargs.setdefault("config", {}) + wandb_kwargs["config"].update(config) + # init Weights & Biases + import wandb + wandb.init(**wandb_kwargs) # main entry to log data for consumption and visualization by TensorBoard self.writer = SummaryWriter(log_dir=self.experiment_dir) @@ -637,31 +656,3 @@ def _update(self, timestep: int, timesteps: int) -> None: :raises NotImplementedError: The method is not implemented by the inheriting classes """ raise NotImplementedError - - def _setup_wandb(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: - """Setup Weights & Biases""" - wandb_cfg = self.cfg.get("experiment", {}).get("wandb", {}) - - if wandb_cfg.get("enabled", False): - import wandb - dir = self.experiment_dir - run_name = dir.split("/")[-1] - trainer_cfg = trainer_cfg if trainer_cfg is not None else {} - try: - _net_cfg = {k: v.net._modules for (k, v) in self.models.items()} - except AttributeError: - _net_cfg = {k: v._modules for (k, v) in self.models.items()} - _cfg = { - **self.cfg, - **trainer_cfg, - **_net_cfg - } - wandb.init( - project=wandb_cfg.get("project", None), - group=wandb_cfg.get("group", None), - entity=wandb_cfg.get("entity", None), - name=run_name, - sync_tensorboard=True, - resume="allow", - config=_cfg - ) \ No newline at end of file From c1590d4047ad8a6dc2a45a4aaeb32120c744b757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 16:03:33 +0200 Subject: [PATCH 016/157] Define wandb init arguments as kwargs dict --- skrl/agents/torch/a2c/a2c.py | 9 ++------- skrl/agents/torch/amp/amp.py | 9 ++------- skrl/agents/torch/cem/cem.py | 9 ++------- skrl/agents/torch/ddpg/ddpg.py | 9 ++------- skrl/agents/torch/dqn/ddqn.py | 9 ++------- skrl/agents/torch/dqn/dqn.py | 9 ++------- skrl/agents/torch/ppo/ppo.py | 9 ++------- skrl/agents/torch/q_learning/q_learning.py | 9 ++------- skrl/agents/torch/sac/sac.py | 9 ++------- skrl/agents/torch/sarsa/sarsa.py | 9 ++------- skrl/agents/torch/td3/td3.py | 9 ++------- skrl/agents/torch/trpo/trpo.py | 9 ++------- 12 files changed, 24 insertions(+), 84 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 37476ae4..cf02ffb9 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -47,13 +47,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 1dde4353..0f77d29c 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -65,13 +65,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 3c1cb4d0..28551f82 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -38,13 +38,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 60b3e1c5..51e17f52 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -47,13 +47,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 9cb982c8..10d12fc1 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -49,13 +49,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 9fa8709a..d0e767c4 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -49,13 +49,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 110e98d9..8375e627 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -55,13 +55,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 6dedf63a..950fb350 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -29,13 +29,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 0d6eabb4..912bc8ff 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -47,13 +47,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index e1fc7206..00702291 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -29,13 +29,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index bd34d3e9..b21c1b69 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -52,13 +52,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 3acb2031..a5a31b37 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -55,13 +55,8 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately - "wandb": { - "enabled": False, # whether to use Weights & Biases - "project": None, # project name - "entity": None, # entity name - "group": None, # group name - "tags": [], # tags - } + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } From 1e846bffa64ba3db1760bbb24483256440ddeae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 18:19:21 +0200 Subject: [PATCH 017/157] Move wandb documentation from utilities to data.rst file --- docs/source/intro/data.rst | 55 ++++++++++++++++++-- docs/source/modules/skrl.utils.utilities.rst | 31 ----------- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst index 0ffbba8d..fb2f5659 100644 --- a/docs/source/intro/data.rst +++ b/docs/source/intro/data.rst @@ -6,15 +6,15 @@ Saving, loading and logging Tracking metrics (TensorBoard) ------------------------------ +`TensorBoard `_ is used for tracking and visualizing metrics and scalars (coefficients, losses, etc.). The tracking and writing of metrics and scalars is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**). + Configuration ^^^^^^^^^^^^^ -`TensorBoard `_ is used for tracking and visualizing metrics and scalars (coefficients, losses, etc.). The tracking and writing of metrics and scalars is the responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**). - Each agent offers the following parameters under the :literal:`"experiment"` key: .. code-block:: python - :emphasize-lines: 5,6,7 + :emphasize-lines: 5-7 DEFAULT_CONFIG = { ... @@ -26,6 +26,9 @@ Each agent offers the following parameters under the :literal:`"experiment"` key "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } @@ -128,6 +131,49 @@ Tracking custom metrics/scales ---------------- +Tracking metrics (Weights and Biases) +------------------------------------- + +`Weights & Biases `_ is also supported for tracking and visualizing metrics and scalars. Its configuration is responsibility of the agents (**can be customized independently for each agent using its configuration dictionary**). + +Follow the steps described in Weights & Biases documentation (`Set up wandb `_) to login to the :literal:`wandb` library on the current machine. + +Configuration +^^^^^^^^^^^^^ + +Each agent offers the following parameters under the :literal:`experiment` key. Visit the Weights & Biases documentation () for more details about the configuration parameters. + +.. code-block:: python + :emphasize-lines: 12-13 + + DEFAULT_CONFIG = { + ... + + "experiment": { + "directory": "", # experiment's parent directory + "experiment_name": "", # experiment name + "write_interval": 250, # TensorBoard writing interval (timesteps) + + "checkpoint_interval": 1000, # interval for checkpoints (timesteps) + "store_separately": False, # whether to store checkpoints separately + + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) + } + } + +* **wandb**: whether to enable support for Weights & Biases. + +* **wandb_kwargs**: keyword argument dictionary used to parameterize the `wandb.init `_ function. If no values are provided for the following parameters, the following values will be set for them: + + * :literal:`name``: will be set to the name of the experiment directory. + + * :literal:`sync_tensorboard`: will be set to :literal:`True`. + + * :literal:`config`: will be updated with the configuration dictionaries of both the agent (and its models) and the trainer. The update will be done even if a value has been set for the parameter. + +---------------- + Checkpoints ----------- @@ -151,6 +197,9 @@ The checkpoint management, as in the previous case, is the responsibility of the "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } diff --git a/docs/source/modules/skrl.utils.utilities.rst b/docs/source/modules/skrl.utils.utilities.rst index d7614ab4..6a095267 100644 --- a/docs/source/modules/skrl.utils.utilities.rst +++ b/docs/source/modules/skrl.utils.utilities.rst @@ -17,34 +17,3 @@ API """ .. autofunction:: skrl.utils.set_seed - -Weights and Biases ------------------- - -Integration -""""""""""" - -You can use `Weights & Biases `_ to easily track your experiments. -Please login to your account and create a new project. -Afterwards, log into Weights & Biases from your terminal: - -.. code-block:: bash - - wandb login - -Usage -""""" - -Change your agents config to enable Weights & Biases, also, at a minimum, enter your project name and entity name: - -.. code-block:: python - - from skrl.agents.torch.dqn import DQN_DEFAULT_CONFIG - - cfg_dqn = DQN_DEFAULT_CONFIG.copy() - - cfg_dqn["experiment"]["wandb"] = { - "enabled": True, - "project": "skrl", - "entity": "Toni-SM", - } From 1daf79261e1b093e043cfd305dd65c3ce7efd4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 18:51:13 +0200 Subject: [PATCH 018/157] Fix function processor arguments indexing in parallel trainer --- skrl/trainers/torch/parallel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index c92155e6..c99e2456 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -25,7 +25,7 @@ def fn_processor(process_index, *args): queue = args[1][process_index] barrier = args[2] scope = args[3][process_index] - trainer_cfg = scope = args[34][process_index] + trainer_cfg = args[4] agent = None _states = None @@ -308,7 +308,7 @@ def eval(self) -> None: # spawn and wait for all processes to start for i in range(self.num_agents): process = mp.Process(target=fn_processor, - args=(i, consumer_pipes, queues, barrier, self.agents_scope), + args=(i, consumer_pipes, queues, barrier, self.agents_scope, self.cfg), daemon=True) processes.append(process) process.start() From e688f5de035b95be714e821617575fec7eea8013 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 19:42:46 +0200 Subject: [PATCH 019/157] Move extra computation to memory's block of code in record_transition method --- skrl/agents/torch/a2c/a2c.py | 10 +++++----- skrl/agents/torch/amp/amp.py | 10 +++++----- skrl/agents/torch/ddpg/ddpg.py | 8 ++++---- skrl/agents/torch/dqn/ddqn.py | 8 ++++---- skrl/agents/torch/dqn/dqn.py | 8 ++++---- skrl/agents/torch/ppo/ppo.py | 10 +++++----- skrl/agents/torch/sac/sac.py | 8 ++++---- skrl/agents/torch/td3/td3.py | 8 ++++---- skrl/agents/torch/trpo/trpo.py | 10 +++++----- 9 files changed, 40 insertions(+), 40 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index cf02ffb9..8e8705e7 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -221,13 +221,13 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) + if self.memory is not None: + self._current_next_states = next_states - self._current_next_states = next_states + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: with torch.no_grad(): values, _, _ = self.value.act(self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 0f77d29c..3b6577df 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -313,13 +313,13 @@ def record_transition(self, super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) + if self.memory is not None: + amp_states = infos["amp_obs"] - amp_states = infos["amp_obs"] + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: with torch.no_grad(): values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 51e17f52..d7c34ed2 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -266,11 +266,11 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 10d12fc1..3394efb7 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -228,11 +228,11 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index d0e767c4..b5a56a4e 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -228,11 +228,11 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 8375e627..86c34297 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -241,13 +241,13 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) + if self.memory is not None: + self._current_next_states = next_states - self._current_next_states = next_states + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: with torch.no_grad(): values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 912bc8ff..d0c2263d 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -234,11 +234,11 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index b21c1b69..ba9b4f55 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -285,11 +285,11 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index a5a31b37..36184555 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -238,13 +238,13 @@ def record_transition(self, """ super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) - # reward shaping - if self._rewards_shaper is not None: - rewards = self._rewards_shaper(rewards, timestep, timesteps) + if self.memory is not None: + self._current_next_states = next_states - self._current_next_states = next_states + # reward shaping + if self._rewards_shaper is not None: + rewards = self._rewards_shaper(rewards, timestep, timesteps) - if self.memory is not None: with torch.no_grad(): values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) From d179270ebc9a09982f30931f42a86944492b76ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 19:46:57 +0200 Subject: [PATCH 020/157] Call agents' record_transition method during evaluation --- skrl/trainers/torch/base.py | 16 ++++++++-------- skrl/trainers/torch/manual.py | 32 +++++++++++++++---------------- skrl/trainers/torch/parallel.py | 16 ++++++++-------- skrl/trainers/torch/sequential.py | 16 ++++++++-------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index e67c1741..e0e206a8 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -223,14 +223,14 @@ def single_agent_eval(self) -> None: with torch.no_grad(): # write data to TensorBoard - super(type(self.agents), self.agents).record_transition(states=states, - actions=actions, - rewards=rewards, - next_states=next_states, - dones=dones, - infos=infos, - timestep=timestep, - timesteps=self.timesteps) + self.agents.record_transition(states=states, + actions=actions, + rewards=rewards, + next_states=next_states, + dones=dones, + infos=infos, + timestep=timestep, + timesteps=self.timesteps) super(type(self.agents), self.agents).post_interaction(timestep=timestep, timesteps=self.timesteps) # reset environments diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 13a4f621..0a9420e0 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -207,27 +207,27 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: with torch.no_grad(): if self.num_agents == 1: # write data to TensorBoard - super(type(self.agents), self.agents).record_transition(states=self.states, - actions=actions, - rewards=rewards, - next_states=next_states, - dones=dones, - infos=infos, - timestep=timestep, - timesteps=timesteps) + self.agents.record_transition(states=self.states, + actions=actions, + rewards=rewards, + next_states=next_states, + dones=dones, + infos=infos, + timestep=timestep, + timesteps=timesteps) super(type(self.agents), self.agents).post_interaction(timestep=timestep, timesteps=timesteps) else: # write data to TensorBoard for agent, scope in zip(self.agents, self.agents_scope): - super(type(agent), agent).record_transition(states=self.states[scope[0]:scope[1]], - actions=actions[scope[0]:scope[1]], - rewards=rewards[scope[0]:scope[1]], - next_states=next_states[scope[0]:scope[1]], - dones=dones[scope[0]:scope[1]], - infos=infos, - timestep=timestep, - timesteps=timesteps) + agent.record_transition(states=self.states[scope[0]:scope[1]], + actions=actions[scope[0]:scope[1]], + rewards=rewards[scope[0]:scope[1]], + next_states=next_states[scope[0]:scope[1]], + dones=dones[scope[0]:scope[1]], + infos=infos, + timestep=timestep, + timesteps=timesteps) super(type(agent), agent).post_interaction(timestep=timestep, timesteps=timesteps) # reset environments diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index c99e2456..74987d11 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -85,14 +85,14 @@ def fn_processor(process_index, *args): # write data to TensorBoard (evaluation) elif task == "eval-record_transition-post_interaction": with torch.no_grad(): - super(type(agent), agent).record_transition(states=_states, - actions=_actions, - rewards=queue.get()[scope[0]:scope[1]], - next_states=queue.get()[scope[0]:scope[1]], - dones=queue.get()[scope[0]:scope[1]], - infos=queue.get(), - timestep=msg['timestep'], - timesteps=msg['timesteps']) + agent.record_transition(states=_states, + actions=_actions, + rewards=queue.get()[scope[0]:scope[1]], + next_states=queue.get()[scope[0]:scope[1]], + dones=queue.get()[scope[0]:scope[1]], + infos=queue.get(), + timestep=msg['timestep'], + timesteps=msg['timesteps']) super(type(agent), agent).post_interaction(timestep=msg['timestep'], timesteps=msg['timesteps']) barrier.wait() diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index 67eddb46..0f11342e 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -163,14 +163,14 @@ def eval(self) -> None: with torch.no_grad(): # write data to TensorBoard for agent, scope in zip(self.agents, self.agents_scope): - super(type(agent), agent).record_transition(states=states[scope[0]:scope[1]], - actions=actions[scope[0]:scope[1]], - rewards=rewards[scope[0]:scope[1]], - next_states=next_states[scope[0]:scope[1]], - dones=dones[scope[0]:scope[1]], - infos=infos, - timestep=timestep, - timesteps=self.timesteps) + agent.record_transition(states=states[scope[0]:scope[1]], + actions=actions[scope[0]:scope[1]], + rewards=rewards[scope[0]:scope[1]], + next_states=next_states[scope[0]:scope[1]], + dones=dones[scope[0]:scope[1]], + infos=infos, + timestep=timestep, + timesteps=self.timesteps) super(type(agent), agent).post_interaction(timestep=timestep, timesteps=self.timesteps) # reset environments From 6783d84d546866522d536fe8228f0135e8592cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 19:53:50 +0200 Subject: [PATCH 021/157] Update CHANGELOG --- CHANGELOG.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7504c19e..655818c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,17 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [0.9.0] - Unreleased +### Added +- Set the running mode (training or evaluation) of the agents +- Weights & Biases integration (by @juhannc) + ### Fixed - Omniverse Isaac Gym simulation speed for the Franka Emika real-world example +- Call agents' method `record_transition` instead of parent method +to allow storing samples in memories during evaluation + +### Removed +- Deprecated method `start` in trainers ## [0.8.0] - 2022-10-03 ### Added @@ -13,7 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Gaussian model mixin - Support for creating shared models - Parameter `role` to model methods -- Wrapper compatibility with the new OpenAI Gym environment API (by @JohannLange) +- Wrapper compatibility with the new OpenAI Gym environment API (by @juhannc) - Internal library colored logger - Migrate checkpoints/models from other RL libraries to skrl models/agents - Configuration parameter `store_separately` to agent configuration dict @@ -26,7 +35,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Models implementation as Python mixin [**breaking change**] - Multivariate Gaussian model (`GaussianModel` until 0.7.0) to `MultivariateGaussianMixin` - Trainer's `cfg` parameter position and default values -- Show training/evaluation display progress using `tqdm` (by @JohannLange) +- Show training/evaluation display progress using `tqdm` (by @juhannc) - Update Isaac Gym and Omniverse Isaac Gym examples ### Fixed From 4e016addba1211fe0739b16d1cfd59b33fda6a98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 20:28:26 +0200 Subject: [PATCH 022/157] Update agents' config in docs --- docs/source/modules/skrl.agents.a2c.rst | 2 +- docs/source/modules/skrl.agents.amp.rst | 2 +- docs/source/modules/skrl.agents.cem.rst | 2 +- docs/source/modules/skrl.agents.ddpg.rst | 2 +- docs/source/modules/skrl.agents.ddqn.rst | 2 +- docs/source/modules/skrl.agents.dqn.rst | 2 +- docs/source/modules/skrl.agents.ppo.rst | 2 +- docs/source/modules/skrl.agents.q_learning.rst | 2 +- docs/source/modules/skrl.agents.sac.rst | 2 +- docs/source/modules/skrl.agents.sarsa.rst | 2 +- docs/source/modules/skrl.agents.td3.rst | 2 +- docs/source/modules/skrl.agents.trpo.rst | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst index 02ded175..d0952b78 100644 --- a/docs/source/modules/skrl.agents.a2c.rst +++ b/docs/source/modules/skrl.agents.a2c.rst @@ -76,7 +76,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/a2c/a2c.py :language: python - :lines: 17-50 + :lines: 17-53 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst index 2d75f89e..87d36ee3 100644 --- a/docs/source/modules/skrl.agents.amp.rst +++ b/docs/source/modules/skrl.agents.amp.rst @@ -97,7 +97,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/amp/amp.py :language: python - :lines: 18-68 + :lines: 18-71 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst index a2c44f9b..64314b4b 100644 --- a/docs/source/modules/skrl.agents.cem.rst +++ b/docs/source/modules/skrl.agents.cem.rst @@ -32,7 +32,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/cem/cem.py :language: python - :lines: 15-41 + :lines: 15-44 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst index 4c197ac4..5c7d55a6 100644 --- a/docs/source/modules/skrl.agents.ddpg.rst +++ b/docs/source/modules/skrl.agents.ddpg.rst @@ -60,7 +60,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/ddpg/ddpg.py :language: python - :lines: 15-50 + :lines: 15-53 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst index e9f6661b..0577c971 100644 --- a/docs/source/modules/skrl.agents.ddqn.rst +++ b/docs/source/modules/skrl.agents.ddqn.rst @@ -39,7 +39,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/dqn/ddqn.py :language: python - :lines: 16-52 + :lines: 16-55 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst index b003f4ca..490fff3f 100644 --- a/docs/source/modules/skrl.agents.dqn.rst +++ b/docs/source/modules/skrl.agents.dqn.rst @@ -39,7 +39,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/dqn/dqn.py :language: python - :lines: 16-52 + :lines: 16-55 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst index eb226309..5e7df5f7 100644 --- a/docs/source/modules/skrl.agents.ppo.rst +++ b/docs/source/modules/skrl.agents.ppo.rst @@ -92,7 +92,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/ppo/ppo.py :language: python - :lines: 18-58 + :lines: 18-61 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.q_learning.rst b/docs/source/modules/skrl.agents.q_learning.rst index 1182e0dd..a42aa6c2 100644 --- a/docs/source/modules/skrl.agents.q_learning.rst +++ b/docs/source/modules/skrl.agents.q_learning.rst @@ -30,7 +30,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/q_learning/q_learning.py :language: python - :lines: 14-32 + :lines: 14-35 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst index 1370fb6f..c5b7cb31 100644 --- a/docs/source/modules/skrl.agents.sac.rst +++ b/docs/source/modules/skrl.agents.sac.rst @@ -67,7 +67,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/sac/sac.py :language: python - :lines: 17-50 + :lines: 17-53 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.sarsa.rst b/docs/source/modules/skrl.agents.sarsa.rst index 2420c4f7..77bede3f 100644 --- a/docs/source/modules/skrl.agents.sarsa.rst +++ b/docs/source/modules/skrl.agents.sarsa.rst @@ -29,7 +29,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/sarsa/sarsa.py :language: python - :lines: 14-32 + :lines: 14-35 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst index 4c0ad6b5..a95b8156 100644 --- a/docs/source/modules/skrl.agents.td3.rst +++ b/docs/source/modules/skrl.agents.td3.rst @@ -70,7 +70,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/td3/td3.py :language: python - :lines: 16-55 + :lines: 16-58 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst index ca375f3d..7f97a800 100644 --- a/docs/source/modules/skrl.agents.trpo.rst +++ b/docs/source/modules/skrl.agents.trpo.rst @@ -130,7 +130,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/trpo/trpo.py :language: python - :lines: 18-58 + :lines: 18-61 :linenos: Spaces and models From 1f7c6559ae22cc606beac66f89061f2b9ea15297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 16 Oct 2022 20:32:38 +0200 Subject: [PATCH 023/157] Fix wandb docs --- docs/source/intro/data.rst | 8 ++++---- docs/source/snippets/agent.py | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst index fb2f5659..df94a19e 100644 --- a/docs/source/intro/data.rst +++ b/docs/source/intro/data.rst @@ -141,7 +141,7 @@ Follow the steps described in Weights & Biases documentation (`Set up wandb `_ function. If no values are provided for the following parameters, the following values will be set for them: - * :literal:`name``: will be set to the name of the experiment directory. + * :literal:`"name"`: will be set to the name of the experiment directory. - * :literal:`sync_tensorboard`: will be set to :literal:`True`. + * :literal:`"sync_tensorboard"`: will be set to :literal:`True`. - * :literal:`config`: will be updated with the configuration dictionaries of both the agent (and its models) and the trainer. The update will be done even if a value has been set for the parameter. + * :literal:`"config"`: will be updated with the configuration dictionaries of both the agent (and its models) and the trainer. The update will be done even if a value has been set for the parameter. ---------------- diff --git a/docs/source/snippets/agent.py b/docs/source/snippets/agent.py index 77497a2c..2b16d5e9 100644 --- a/docs/source/snippets/agent.py +++ b/docs/source/snippets/agent.py @@ -18,6 +18,9 @@ "checkpoint_interval": 1000, # interval for checkpoints (timesteps) "store_separately": False, # whether to store checkpoints separately + + "wandb": False, # whether to use Weights & Biases + "wandb_kwargs": {} # wandb kwargs (see https://docs.wandb.ai/ref/python/init) } } From 1f2e8234d55abf149b20e11ad760eb31f9ab0f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 25 Oct 2022 23:48:09 +0200 Subject: [PATCH 024/157] Add Gymnasium support for envs, resources and utils --- skrl/envs/torch/wrappers.py | 159 +++++++++++++++++- .../torch/running_standard_scaler.py | 16 +- skrl/utils/model_instantiators.py | 33 ++-- 3 files changed, 187 insertions(+), 21 deletions(-) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index 6de4616d..154e08c4 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -1,6 +1,7 @@ from typing import Union, Tuple, Any, Optional import gym +import gymnasium import collections import numpy as np from packaging import version @@ -408,6 +409,152 @@ def close(self) -> None: self._env.close() +class GymnasiumWrapper(Wrapper): + def __init__(self, env: Any) -> None: + """Gymnasium environment wrapper + + :param env: The environment to wrap + :type env: Any supported Gymnasium environment + """ + super().__init__(env) + + self._vectorized = False + try: + if isinstance(env, gymnasium.vector.SyncVectorEnv) or isinstance(env, gymnasium.vector.AsyncVectorEnv): + self._vectorized = True + except Exception as e: + print("[WARNING] Failed to check for a vectorized environment: {}".format(e)) + + @property + def state_space(self) -> gymnasium.Space: + """State space + + An alias for the ``observation_space`` property + """ + if self._vectorized: + return self._env.single_observation_space + return self._env.observation_space + + @property + def observation_space(self) -> gymnasium.Space: + """Observation space + """ + if self._vectorized: + return self._env.single_observation_space + return self._env.observation_space + + @property + def action_space(self) -> gymnasium.Space: + """Action space + """ + if self._vectorized: + return self._env.single_action_space + return self._env.action_space + + def _observation_to_tensor(self, observation: Any, space: Optional[gymnasium.Space] = None) -> torch.Tensor: + """Convert the Gymnasium observation to a flat tensor + + :param observation: The Gymnasium observation to convert to a tensor + :type observation: Any supported Gymnasium observation space + + :raises: ValueError if the observation space type is not supported + + :return: The observation as a flat tensor + :rtype: torch.Tensor + """ + observation_space = self._env.observation_space if self._vectorized else self.observation_space + space = space if space is not None else observation_space + + if self._vectorized and isinstance(space, gymnasium.spaces.MultiDiscrete): + return torch.tensor(observation, device=self.device, dtype=torch.int64).view(self.num_envs, -1) + elif isinstance(observation, int): + return torch.tensor(observation, device=self.device, dtype=torch.int64).view(self.num_envs, -1) + elif isinstance(observation, np.ndarray): + return torch.tensor(observation, device=self.device, dtype=torch.float32).view(self.num_envs, -1) + elif isinstance(space, gymnasium.spaces.Discrete): + return torch.tensor(observation, device=self.device, dtype=torch.float32).view(self.num_envs, -1) + elif isinstance(space, gymnasium.spaces.Box): + return torch.tensor(observation, device=self.device, dtype=torch.float32).view(self.num_envs, -1) + elif isinstance(space, gymnasium.spaces.Dict): + tmp = torch.cat([self._observation_to_tensor(observation[k], space[k]) \ + for k in sorted(space.keys())], dim=-1).view(self.num_envs, -1) + return tmp + else: + raise ValueError("Observation space type {} not supported. Please report this issue".format(type(space))) + + def _tensor_to_action(self, actions: torch.Tensor) -> Any: + """Convert the action to the Gymnasium expected format + + :param actions: The actions to perform + :type actions: torch.Tensor + + :raise ValueError: If the action space type is not supported + + :return: The action in the Gymnasium format + :rtype: Any supported Gymnasium action space + """ + space = self._env.action_space if self._vectorized else self.action_space + + if self._vectorized: + if isinstance(space, gymnasium.spaces.MultiDiscrete): + return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape) + elif isinstance(space, gymnasium.spaces.Tuple): + if isinstance(space[0], gymnasium.spaces.Box): + return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(space.shape) + elif isinstance(space[0], gymnasium.spaces.Discrete): + return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(-1) + elif isinstance(space, gymnasium.spaces.Discrete): + return actions.item() + elif isinstance(space, gymnasium.spaces.Box): + return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape) + raise ValueError("Action space type {} not supported. Please report this issue".format(type(space))) + + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + """Perform a step in the environment + + :param actions: The actions to perform + :type actions: torch.Tensor + + :return: The state, the reward, the done flag, and the info + :rtype: tuple of torch.Tensor and any other info + """ + if self._drepecated_api: + observation, reward, done, info = self._env.step(self._tensor_to_action(actions)) + else: + observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions)) + if type(termination) is bool: + done = termination or truncation + else: + done = np.logical_or(termination, truncation) + # convert response to torch + return self._observation_to_tensor(observation), \ + torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ + torch.tensor(done, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + info + + def reset(self) -> torch.Tensor: + """Reset the environment + + :return: The state of the environment + :rtype: torch.Tensor + """ + if self._drepecated_api: + observation = self._env.reset() + else: + observation, info = self._env.reset() + return self._observation_to_tensor(observation) + + def render(self, *args, **kwargs) -> None: + """Render the environment + """ + self._env.render(*args, **kwargs) + + def close(self) -> None: + """Close the environment + """ + self._env.close() + + class DeepMindWrapper(Wrapper): def __init__(self, env: Any) -> None: """DeepMind environment wrapper @@ -576,7 +723,7 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: >>> env = wrap_env(env) :param env: The environment to be wrapped - :type env: gym.Env, dm_env.Environment or VecTask + :type env: gym.Env, gymnasium.Env, dm_env.Environment or VecTask :param wrapper: The type of wrapper to use (default: "auto"). If ``"auto"``, the wrapper will be automatically selected based on the environment class. The supported wrappers are described in the following table: @@ -590,6 +737,8 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: +====================+=========================+ |OpenAI Gym |``"gym"`` | +--------------------+-------------------------+ + |Gymnasium |``"gymnasium"`` | + +--------------------+-------------------------+ |DeepMind |``"dm"`` | +--------------------+-------------------------+ |Isaac Gym preview 2 |``"isaacgym-preview2"`` | @@ -623,6 +772,10 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: if verbose: logger.info("Environment wrapper: Gym") return GymWrapper(env) + elif isinstance(env, gymnasium.core.Env) or isinstance(env, gymnasium.core.Wrapper): + if verbose: + logger.info("Environment wrapper: Gymnasium") + return GymnasiumWrapper(env) elif "" in base_classes: if verbose: logger.info("Environment wrapper: DeepMind") @@ -638,6 +791,10 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: if verbose: logger.info("Environment wrapper: Gym") return GymWrapper(env) + elif wrapper == "gymnasium": + if verbose: + logger.info("Environment wrapper: gymnasium") + return GymnasiumWrapper(env) elif wrapper == "dm": if verbose: logger.info("Environment wrapper: DeepMind") diff --git a/skrl/resources/preprocessors/torch/running_standard_scaler.py b/skrl/resources/preprocessors/torch/running_standard_scaler.py index 2f673aea..413a5013 100644 --- a/skrl/resources/preprocessors/torch/running_standard_scaler.py +++ b/skrl/resources/preprocessors/torch/running_standard_scaler.py @@ -1,6 +1,7 @@ from typing import Union, Tuple import gym +import gymnasium import numpy as np import torch @@ -9,7 +10,7 @@ class RunningStandardScaler(nn.Module): def __init__(self, - size: Union[int, Tuple[int], gym.Space], + size: Union[int, Tuple[int], gym.Space, gymnasium.Space], epsilon: float = 1e-8, clip_threshold: float = 5.0, device: Union[str, torch.device] = "cuda:0") -> None: @@ -25,7 +26,7 @@ def __init__(self, >>> running_standard_scaler(data) :param size: Size of the input space - :type size: int, tuple or list of integers, or gym.Space + :type size: int, tuple or list of integers, gym.Space, or gymnasium.Space :param epsilon: Small number to avoid division by zero (default: 1e-8) :type epsilon: float :param clip_threshold: Threshold to clip the data (default: 5.0) @@ -44,11 +45,11 @@ def __init__(self, self.register_buffer("running_variance", torch.ones(size, dtype = torch.float64, device=device)) self.register_buffer("current_count", torch.ones((), dtype = torch.float64, device=device)) - def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int: + def _get_space_size(self, space: Union[int, Tuple[int], gym.Space, gymnasium.Space]) -> int: """Get the size (number of elements) of a space :param space: Space or shape from which to obtain the number of elements - :type space: int, tuple or list of integers, or gym.Space + :type space: int, tuple or list of integers, gym.Space, or gymnasium.Space :raises ValueError: If the space is not supported @@ -66,6 +67,13 @@ def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int: return np.prod(space.shape) elif issubclass(type(space), gym.spaces.Dict): return sum([self._get_space_size(space.spaces[key]) for key in space.spaces]) + elif issubclass(type(space), gymnasium.Space): + if issubclass(type(space), gymnasium.spaces.Discrete): + return 1 + elif issubclass(type(space), gymnasium.spaces.Box): + return np.prod(space.shape) + elif issubclass(type(space), gymnasium.spaces.Dict): + return sum([self._get_space_size(space.spaces[key]) for key in space.spaces]) raise ValueError("Space type {} not supported".format(type(space))) def _parallel_variance(self, input_mean: torch.Tensor, input_var: torch.Tensor, input_count: int) -> None: diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index 792b0eb9..645f5953 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -1,6 +1,7 @@ from typing import Union, Tuple, Optional import gym +import gymnasium from enum import Enum import torch @@ -130,8 +131,8 @@ def _generate_sequential(model: Model, return nn.Sequential(*input_layer, *hidden_layers, *output_layer) -def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, clip_log_std: bool = True, @@ -147,10 +148,10 @@ def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space] :param observation_space: Observation/state space or shape (default: None). If it is not None, the num_observations property will contain the size of that space - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Device on which the model will be trained (default: "cuda:0") :type device: str or torch.device, optional :param clip_actions: Flag to indicate whether the actions should be clipped (default: False) @@ -222,8 +223,8 @@ def compute(self, states, taken_actions=None, role=""): max_log_std=max_log_std, metadata=metadata) -def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, clip_log_std: bool = True, @@ -239,10 +240,10 @@ def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int :param observation_space: Observation/state space or shape (default: None). If it is not None, the num_observations property will contain the size of that space - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Device on which the model will be trained (default: "cuda:0") :type device: str or torch.device, optional :param clip_actions: Flag to indicate whether the actions should be clipped (default: False) @@ -314,8 +315,8 @@ def compute(self, states, taken_actions=None, role=""): max_log_std=max_log_std, metadata=metadata) -def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", clip_actions: bool = False, input_shape: Shape = Shape.STATES, @@ -328,10 +329,10 @@ def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.S :param observation_space: Observation/state space or shape (default: None). If it is not None, the num_observations property will contain the size of that space - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") :type device: str or torch.device, optional :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False) @@ -392,8 +393,8 @@ def compute(self, states, taken_actions=None, role=""): clip_actions=clip_actions, metadata=metadata) -def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, +def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", unnormalized_log_prob: bool = False, input_shape: Shape = Shape.STATES, @@ -405,10 +406,10 @@ def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Spa :param observation_space: Observation/state space or shape (default: None). If it is not None, the num_observations property will contain the size of that space - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") :type device: str or torch.device, optional :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: True). From 0484767248f17397581110be161deabbedc99f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 09:12:31 +0200 Subject: [PATCH 025/157] Add Gymnasium support for memories and models --- skrl/memories/torch/base.py | 21 +++++-- skrl/models/torch/base.py | 72 ++++++++++++++-------- skrl/models/torch/deterministic.py | 4 +- skrl/models/torch/gaussian.py | 4 +- skrl/models/torch/multivariate_gaussian.py | 4 +- 5 files changed, 73 insertions(+), 32 deletions(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index 41c4dff3..11f4f35b 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -3,6 +3,7 @@ import os import csv import gym +import gymnasium import operator import datetime import functools @@ -74,11 +75,11 @@ def __len__(self) -> int: """ return self.memory_size * self.num_envs if self.filled else self.memory_index * self.num_envs + self.env_index - def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int: + def _get_space_size(self, space: Union[int, Tuple[int], gym.Space, gymnasium.Space]) -> int: """Get the size (number of elements) of a space :param space: Space or shape from which to obtain the number of elements - :type space: int, tuple or list of integers, or gym.Space + :type space: int, tuple or list of integers, gym.Space, or gymnasium.Space :raises ValueError: If the space is not supported @@ -96,6 +97,13 @@ def _get_space_size(self, space: Union[int, Tuple[int], gym.Space]) -> int: return np.prod(space.shape) elif issubclass(type(space), gym.spaces.Dict): return sum([self._get_space_size(space.spaces[key]) for key in space.spaces]) + elif issubclass(type(space), gymnasium.Space): + if issubclass(type(space), gymnasium.spaces.Discrete): + return 1 + elif issubclass(type(space), gymnasium.spaces.Box): + return np.prod(space.shape) + elif issubclass(type(space), gymnasium.spaces.Dict): + return sum([self._get_space_size(space.spaces[key]) for key in space.spaces]) raise ValueError("Space type {} not supported".format(type(space))) def share_memory(self) -> None: @@ -142,7 +150,10 @@ def set_tensor_by_name(self, name: str, tensor: torch.Tensor) -> None: with torch.no_grad(): self.tensors[name].copy_(tensor) - def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtype: Optional[torch.dtype] = None) -> bool: + def create_tensor(self, + name: str, + size: Union[int, Tuple[int], gym.Space, gymnasium.Space], + dtype: Optional[torch.dtype] = None) -> bool: """Create a new internal tensor in memory The tensor will have a 3-components shape (memory size, number of environments, size). @@ -151,8 +162,8 @@ def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space], dtyp :param name: Tensor name (the name has to follow the python PEP 8 style) :type name: str :param size: Number of elements in the last dimension (effective data size). - The product of the elements will be computed for collections or gym spaces types - :type size: int, tuple or list of integers or gym.Space + The product of the elements will be computed for collections or gym/gymnasium spaces types + :type size: int, tuple or list of integers, gym.Space, or gymnasium.Space :param dtype: Data type (torch.dtype). If None, the global default torch data type will be used (default) :type dtype: torch.dtype or None, optional diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index 32c3780e..d860b359 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -1,6 +1,7 @@ from typing import Optional, Union, Mapping, Sequence import gym +import gymnasium import collections import numpy as np @@ -11,25 +12,25 @@ class Model(torch.nn.Module): def __init__(self, - observation_space: Union[int, Sequence[int], gym.Space], - action_space: Union[int, Sequence[int], gym.Space], + observation_space: Union[int, Sequence[int], gym.Space, gymnasium.Space], + action_space: Union[int, Sequence[int], gym.Space, gymnasium.Space], device: Union[str, torch.device] = "cuda:0") -> None: """Base class representing a function approximator The following properties are defined: - ``device`` (torch.device): Device to be used for the computations - - ``observation_space`` (int, sequence of int, gym.Space): Observation/state space - - ``action_space`` (int, sequence of int, gym.Space): Action space + - ``observation_space`` (int, sequence of int, gym.Space, gymnasium.Space): Observation/state space + - ``action_space`` (int, sequence of int, gym.Space, gymnasium.Space): Action space - ``num_observations`` (int): Number of elements in the observation/state space - ``num_actions`` (int): Number of elements in the action space :param observation_space: Observation/state space or shape. The ``num_observations`` property will contain the size of that space - :type observation_space: int, sequence of int, gym.Space + :type observation_space: int, sequence of int, gym.Space, gymnasium.Space :param action_space: Action space or shape. The ``num_actions`` property will contain the size of that space - :type action_space: int, sequence of int, gym.Space + :type action_space: int, sequence of int, gym.Space, gymnasium.Space :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``) :type device: str or torch.device, optional @@ -62,12 +63,12 @@ def act(self, states, taken_actions=None, role=""): self._random_distribution = None def _get_space_size(self, - space: Union[int, Sequence[int], gym.Space], + space: Union[int, Sequence[int], gym.Space, gymnasium.Space], number_of_elements: bool = True) -> int: """Get the size (number of elements) of a space :param space: Space or shape from which to obtain the number of elements - :type space: int, sequence of int, or gym.Space + :type space: int, sequence of int, gym.Space, or gymnasium.Space :param number_of_elements: Whether the number of elements occupied by the space is returned (default: ``True``). If ``False``, the shape of the space is returned. It only affects Discrete spaces :type number_of_elements: bool, optional @@ -122,15 +123,25 @@ def _get_space_size(self, size = np.prod(space.shape) elif issubclass(type(space), gym.spaces.Dict): size = sum([self._get_space_size(space.spaces[key], number_of_elements) for key in space.spaces]) + elif issubclass(type(space), gymnasium.Space): + if issubclass(type(space), gymnasium.spaces.Discrete): + if number_of_elements: + size = space.n + else: + size = 1 + elif issubclass(type(space), gymnasium.spaces.Box): + size = np.prod(space.shape) + elif issubclass(type(space), gymnasium.spaces.Dict): + size = sum([self._get_space_size(space.spaces[key], number_of_elements) for key in space.spaces]) if size is None: raise ValueError("Space type {} not supported".format(type(space))) return int(size) def tensor_to_space(self, tensor: torch.Tensor, - space: gym.Space, + space: Union[gym.Space, gymnasium.Space], start: int = 0) -> Union[torch.Tensor, dict]: - """Map a flat tensor to a Gym space + """Map a flat tensor to a Gym/Gymnasium space The mapping is done in the following way: @@ -142,7 +153,7 @@ def tensor_to_space(self, :param tensor: Tensor to map from :type tensor: torch.Tensor :param space: Space to map the tensor to - :type space: gym.Space + :type space: gym.Space or gymnasium.Space :param start: Index of the first element of the tensor to map (default: ``0``) :type start: int, optional @@ -162,17 +173,30 @@ def tensor_to_space(self, [ 0.1000, 0.2000, 0.3000]]]), 'b': tensor([[2.]])} """ - if issubclass(type(space), gym.spaces.Discrete): - return tensor - elif issubclass(type(space), gym.spaces.Box): - return tensor.view(tensor.shape[0], *space.shape) - elif issubclass(type(space), gym.spaces.Dict): - output = {} - for k in sorted(space.keys()): - end = start + self._get_space_size(space[k], number_of_elements=False) - output[k] = self.tensor_to_space(tensor[:, start:end], space[k], end) - start = end - return output + if issubclass(type(space), gym.Space): + if issubclass(type(space), gym.spaces.Discrete): + return tensor + elif issubclass(type(space), gym.spaces.Box): + return tensor.view(tensor.shape[0], *space.shape) + elif issubclass(type(space), gym.spaces.Dict): + output = {} + for k in sorted(space.keys()): + end = start + self._get_space_size(space[k], number_of_elements=False) + output[k] = self.tensor_to_space(tensor[:, start:end], space[k], end) + start = end + return output + else: + if issubclass(type(space), gymnasium.spaces.Discrete): + return tensor + elif issubclass(type(space), gymnasium.spaces.Box): + return tensor.view(tensor.shape[0], *space.shape) + elif issubclass(type(space), gymnasium.spaces.Dict): + output = {} + for k in sorted(space.keys()): + end = start + self._get_space_size(space[k], number_of_elements=False) + output[k] = self.tensor_to_space(tensor[:, start:end], space[k], end) + start = end + return output raise ValueError("Space type {} not supported".format(type(space))) def random_act(self, @@ -195,10 +219,10 @@ def random_act(self, :rtype: sequence of torch.Tensor """ # discrete action space (Discrete) - if issubclass(type(self.action_space), gym.spaces.Discrete): + if issubclass(type(self.action_space), gym.spaces.Discrete) or issubclass(type(self.action_space), gymnasium.spaces.Discrete): return torch.randint(self.action_space.n, (states.shape[0], 1), device=self.device), None, None # continuous action space (Box) - elif issubclass(type(self.action_space), gym.spaces.Box): + elif issubclass(type(self.action_space), gym.spaces.Box) or issubclass(type(self.action_space), gymnasium.spaces.Box): if self._random_distribution is None: self._random_distribution = torch.distributions.uniform.Uniform( low=torch.tensor(self.action_space.low[0], device=self.device, dtype=torch.float32), diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py index 2aa2e669..486fb75d 100644 --- a/skrl/models/torch/deterministic.py +++ b/skrl/models/torch/deterministic.py @@ -1,6 +1,7 @@ from typing import Optional, Sequence import gym +import gymnasium import torch @@ -52,7 +53,8 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: """ if not hasattr(self, "_d_clip_actions"): self._d_clip_actions = {} - self._d_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space) + self._d_clip_actions[role] = clip_actions and (issubclass(type(self.action_space), gym.Space) or \ + issubclass(type(self.action_space), gymnasium.Space)) if self._d_clip_actions[role]: self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32) diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py index 4beb9936..c7120292 100644 --- a/skrl/models/torch/gaussian.py +++ b/skrl/models/torch/gaussian.py @@ -1,6 +1,7 @@ from typing import Optional, Sequence import gym +import gymnasium import torch from torch.distributions import Normal @@ -73,7 +74,8 @@ def __init__(self, """ if not hasattr(self, "_g_clip_actions"): self._g_clip_actions = {} - self._g_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space) + self._g_clip_actions[role] = clip_actions and (issubclass(type(self.action_space), gym.Space) or \ + issubclass(type(self.action_space), gymnasium.Space)) if self._g_clip_actions[role]: self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32) diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py index 6ee66662..06212d34 100644 --- a/skrl/models/torch/multivariate_gaussian.py +++ b/skrl/models/torch/multivariate_gaussian.py @@ -1,6 +1,7 @@ from typing import Optional, Sequence import gym +import gymnasium import torch from torch.distributions import MultivariateNormal @@ -66,7 +67,8 @@ def __init__(self, """ if not hasattr(self, "_mg_clip_actions"): self._mg_clip_actions = {} - self._mg_clip_actions[role] = clip_actions and issubclass(type(self.action_space), gym.Space) + self._mg_clip_actions[role] = clip_actions and (issubclass(type(self.action_space), gym.Space) or \ + issubclass(type(self.action_space), gymnasium.Space)) if self._mg_clip_actions[role]: self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device, dtype=torch.float32) From 7fec5901e1a99e4d5ada418c3b16a314711f0aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 09:24:32 +0200 Subject: [PATCH 026/157] Add Gymnasium support for agents --- skrl/agents/torch/a2c/a2c.py | 10 +++++----- skrl/agents/torch/amp/amp.py | 14 +++++++------- skrl/agents/torch/base.py | 10 +++++----- skrl/agents/torch/cem/cem.py | 10 +++++----- skrl/agents/torch/ddpg/ddpg.py | 10 +++++----- skrl/agents/torch/dqn/ddqn.py | 10 +++++----- skrl/agents/torch/dqn/dqn.py | 10 +++++----- skrl/agents/torch/ppo/ppo.py | 10 +++++----- skrl/agents/torch/q_learning/q_learning.py | 10 +++++----- skrl/agents/torch/sac/sac.py | 10 +++++----- skrl/agents/torch/sarsa/sarsa.py | 10 +++++----- skrl/agents/torch/td3/td3.py | 10 +++++----- skrl/agents/torch/trpo/trpo.py | 10 +++++----- 13 files changed, 67 insertions(+), 67 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 8e8705e7..3c414b46 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import itertools @@ -57,8 +57,8 @@ class A2C(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Advantage Actor Critic (A2C) @@ -72,9 +72,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 3b6577df..5b8915ca 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -1,6 +1,6 @@ from typing import Callable, Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import math import copy import itertools @@ -75,11 +75,11 @@ class AMP(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None, - amp_observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + amp_observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, motion_dataset: Optional[Memory] = None, reply_buffer: Optional[Memory] = None, collect_reference_motions: Optional[Callable[[int], torch.Tensor]] = None, @@ -98,15 +98,15 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict :param amp_observation_space: AMP observation/state space or shape (default: None) - :type amp_observation_space: int, tuple or list of integers, gym.Space or None + :type amp_observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None :param motion_dataset: Reference motion dataset: M (default: None) :type motion_dataset: skrl.memory.torch.Memory or None :param reply_buffer: Reply buffer for preventing discriminator overfitting: B (default: None) diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 8afcbe43..c03068fe 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -1,7 +1,7 @@ from typing import Union, Mapping, Tuple, Dict, Any, Optional import os -import gym +import gym, gymnasium import copy import datetime import collections @@ -19,8 +19,8 @@ class Agent: def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Base class that represent a RL agent @@ -32,9 +32,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 28551f82..8a02c7a2 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import torch @@ -48,8 +48,8 @@ class CEM(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Cross-Entropy Method (CEM) @@ -63,9 +63,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index d7c34ed2..1557d00d 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import torch @@ -57,8 +57,8 @@ class DDPG(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Deep Deterministic Policy Gradient (DDPG) @@ -72,9 +72,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 3394efb7..02b252eb 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import math @@ -59,8 +59,8 @@ class DDQN(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Double Deep Q-Network (DDQN) @@ -74,9 +74,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index b5a56a4e..c2171247 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import math @@ -59,8 +59,8 @@ class DQN(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Deep Q-Network (DQN) @@ -74,9 +74,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 86c34297..35885119 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import itertools @@ -65,8 +65,8 @@ class PPO(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Proximal Policy Optimization (PPO) @@ -80,9 +80,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 950fb350..15beb8c8 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import torch @@ -39,8 +39,8 @@ class Q_LEARNING(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Q-learning @@ -54,9 +54,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index d0c2263d..f48f61c3 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import itertools import numpy as np @@ -57,8 +57,8 @@ class SAC(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Soft Actor-Critic (SAC) @@ -72,9 +72,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index 00702291..c0243f55 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import torch @@ -39,8 +39,8 @@ class SARSA(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """State Action Reward State Action (SARSA) @@ -54,9 +54,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index ba9b4f55..f1de6330 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import itertools @@ -62,8 +62,8 @@ class TD3(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Twin Delayed DDPG (TD3) @@ -77,9 +77,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 36184555..ec76f085 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -1,6 +1,6 @@ from typing import Union, Tuple, Dict, Any, Optional -import gym +import gym, gymnasium import copy import torch @@ -65,8 +65,8 @@ class TRPO(Agent): def __init__(self, models: Dict[str, Model], memory: Optional[Union[Memory, Tuple[Memory]]] = None, - observation_space: Optional[Union[int, Tuple[int], gym.Space]] = None, - action_space: Optional[Union[int, Tuple[int], gym.Space]] = None, + observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, + action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, device: Union[str, torch.device] = "cuda:0", cfg: Optional[dict] = None) -> None: """Trust Region Policy Optimization (TRPO) @@ -80,9 +80,9 @@ def __init__(self, for the rest only the environment transitions will be added :type memory: skrl.memory.torch.Memory, list of skrl.memory.torch.Memory or None :param observation_space: Observation/state space or shape (default: None) - :type observation_space: int, tuple or list of integers, gym.Space or None, optional + :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) - :type action_space: int, tuple or list of integers, gym.Space or None, optional + :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param device: Computing device (default: "cuda:0") :type device: str or torch.device, optional :param cfg: Configuration dictionary From 0c932513652aa653b953a88eef4999738569f098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 10:13:52 +0200 Subject: [PATCH 027/157] Add Gymnasium to requirements --- docs/requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index dc80cf40..c8ca4fdc 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,6 +3,7 @@ sphinx_rtd_theme sphinx-autobuild sphinx-tabs==3.2.0 gym +gymnasium torch tensorboard tqdm diff --git a/setup.py b/setup.py index 98f03c57..d301bf3c 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ # dependencies INSTALL_REQUIRES = [ "gym", + "gymnasium", "torch", "tensorboard", "tqdm", From 4fa464bde2ff15763ba18de067bd878aa9dcb8b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 10:22:16 +0200 Subject: [PATCH 028/157] Add support for Gymnasium to docs --- docs/source/index.rst | 10 +-- docs/source/intro/getting_started.rst | 82 +++++++++++++----- docs/source/intro/installation.rst | 2 +- docs/source/modules/skrl.agents.a2c.rst | 4 +- docs/source/modules/skrl.agents.amp.rst | 4 +- docs/source/modules/skrl.agents.cem.rst | 4 +- docs/source/modules/skrl.agents.ddpg.rst | 4 +- docs/source/modules/skrl.agents.ddqn.rst | 4 +- docs/source/modules/skrl.agents.dqn.rst | 4 +- docs/source/modules/skrl.agents.ppo.rst | 4 +- .../source/modules/skrl.agents.q_learning.rst | 4 +- docs/source/modules/skrl.agents.sac.rst | 4 +- docs/source/modules/skrl.agents.sarsa.rst | 4 +- docs/source/modules/skrl.agents.td3.rst | 4 +- docs/source/modules/skrl.agents.trpo.rst | 4 +- docs/source/modules/skrl.envs.wrapping.rst | 86 ++++++++++++++----- 16 files changed, 153 insertions(+), 75 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 749144c1..f19f357c 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,14 +1,14 @@ SKRL - Reinforcement Learning library (|version|) ================================================= -**skrl** is an open-source modular library for Reinforcement Learning written in Python (using `PyTorch `_) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the `OpenAI Gym `_ and `DeepMind `_ environment interfaces, it allows loading and configuring `NVIDIA Isaac Gym `_ and `NVIDIA Omniverse Isaac Gym `_ environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run +**skrl** is an open-source modular library for Reinforcement Learning written in Python (using `PyTorch `_) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the OpenAI `Gym `_ / Farama `Gymnasium `_ and `DeepMind `_ environment interfaces, it allows loading and configuring `NVIDIA Isaac Gym `_ and `NVIDIA Omniverse Isaac Gym `_ environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run **Main features:** * Clean code * Modularity and reusability * Documented library, code and implementations - * Support for OpenAI Gym (single and vectorized), DeepMind, NVIDIA Isaac Gym (preview 2, 3 and 4) and NVIDIA Omniverse Isaac Gym environments - * Simultaneous learning by scopes in OpenAI Gym (vectorized), NVIDIA Isaac Gym and NVIDIA Omniverse Isaac Gym + * Support for Gym/Gymnasium (single and vectorized), DeepMind, NVIDIA Isaac Gym (preview 2, 3 and 4) and NVIDIA Omniverse Isaac Gym environments + * Simultaneous learning by scopes in Gym/Gymnasium (vectorized), NVIDIA Isaac Gym and NVIDIA Omniverse Isaac Gym .. warning:: @@ -90,9 +90,9 @@ Agents Environments ^^^^^^^^^^^^ - Definition of the Isaac Gym (preview 2, 3 and 4) and Omniverse Isaac Gym environment loaders, and wrappers for the OpenAI Gym, DeepMind, Isaac Gym and Omniverse Isaac Gym environments + Definition of the Isaac Gym (preview 2, 3 and 4) and Omniverse Isaac Gym environment loaders, and wrappers for the Gym/Gymnasium, DeepMind, Isaac Gym and Omniverse Isaac Gym environments - * :doc:`Wrapping ` **OpenAI Gym**, **DeepMind**, **Isaac Gym** and **Omniverse Isaac Gym** environments + * :doc:`Wrapping ` **Gym/Gymnasium**, **DeepMind**, **Isaac Gym** and **Omniverse Isaac Gym** environments * Loading :doc:`Isaac Gym environments ` * Loading :doc:`Omniverse Isaac Gym environments ` diff --git a/docs/source/intro/getting_started.rst b/docs/source/intro/getting_started.rst index ee60e249..7ca7c12e 100644 --- a/docs/source/intro/getting_started.rst +++ b/docs/source/intro/getting_started.rst @@ -19,9 +19,9 @@ At each step (also called timestep) of interaction with the environment, the age 1. Environments --------------- -The environment plays a fundamental role in the definition of the RL schema. For example, the selection of the agent depends strongly on the observation and action space nature. There are several interfaces to interact with the environments such as OpenAI Gym or DeepMind. However, each of them has a different API and work with non-compatible data types. +The environment plays a fundamental role in the definition of the RL schema. For example, the selection of the agent depends strongly on the observation and action space nature. There are several interfaces to interact with the environments such as OpenAI Gym / Farama Gymnasium or DeepMind. However, each of them has a different API and work with non-compatible data types. -skrl offers a function to **wrap environments** based on the OpenAI Gym, DeepMind, Isaac Gym and Omniverse Isaac Gym interfaces (the last two have slight differences with OpenAI Gym) and offer, for library components, a common interface (based on OpenAI Gym) as shown in the following figure. Refer to the :doc:`Wrapping <../modules/skrl.envs.wrapping>` section for more information. +skrl offers a function to **wrap environments** based on the Gym/Gymnasium, DeepMind, Isaac Gym and Omniverse Isaac Gym interfaces (the last two have slight differences with Gym) and offer, for library components, a common interface (based on Gym/Gymnasium) as shown in the following figure. Refer to the :doc:`Wrapping <../modules/skrl.envs.wrapping>` section for more information. .. image:: ../_static/imgs/wrapping.svg :width: 100% @@ -131,39 +131,77 @@ Within the methods and properties defined in the wrapped environment, the observ # wrap the environment env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview2")' - .. tab:: OpenAI Gym + .. tab:: Gym / Gymnasium .. tabs:: - .. tab:: Single environment + .. tab:: Gym - .. code-block:: python + .. tabs:: - # import the environment wrapper and gym - from skrl.envs.torch import wrap_env - import gym + .. tab:: Single environment - # load environment - env = gym.make('Pendulum-v1') + .. code-block:: python - # wrap the environment - env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' + # import the environment wrapper and gym + from skrl.envs.torch import wrap_env + import gym - .. tab:: Vectorized environment + # load environment + env = gym.make('Pendulum-v1') - Visit the OpenAI Gym documentation (`Vector API `_) for more information about the creation and usage of vectorized environments. + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' - .. code-block:: python + .. tab:: Vectorized environment - # import the environment wrapper and gym - from skrl.envs.torch import wrap_env - import gym + Visit the OpenAI Gym documentation (`Vector `__) for more information about the creation and usage of vectorized environments. - # load a vectorized environment - env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) + .. code-block:: python - # wrap the environment - env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' + # import the environment wrapper and gym + from skrl.envs.torch import wrap_env + import gym + + # load a vectorized environment + env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' + + .. tab:: Gymnasium + + .. tabs:: + + .. tab:: Single environment + + .. code-block:: python + + # import the environment wrapper and gymnasium + from skrl.envs.torch import wrap_env + import gymnasium as gym + + # load environment + env = gym.make('Pendulum-v1') + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gymnasium")' + + .. tab:: Vectorized environment + + Visit the Gymnasium documentation (`Vector `__) for more information about the creation and usage of vectorized environments. + + .. code-block:: python + + # import the environment wrapper and gymnasium + from skrl.envs.torch import wrap_env + import gymnasium as gym + + # load a vectorized environment + env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gymnasium")' .. tab:: DeepMind diff --git a/docs/source/intro/installation.rst b/docs/source/intro/installation.rst index b60e40f1..f7fbbd7c 100644 --- a/docs/source/intro/installation.rst +++ b/docs/source/intro/installation.rst @@ -10,7 +10,7 @@ Prerequisites **skrl** requires Python 3.6 or higher and the following libraries (they will be installed automatically): - * `gym `_ + * `gym `_ / `gymnasium `_ * `tqdm `_ * `packaging `_ * `torch `_ 1.8.0 or higher diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst index d0952b78..38c1c6df 100644 --- a/docs/source/modules/skrl.agents.a2c.rst +++ b/docs/source/modules/skrl.agents.a2c.rst @@ -82,12 +82,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst index 87d36ee3..b44170e3 100644 --- a/docs/source/modules/skrl.agents.amp.rst +++ b/docs/source/modules/skrl.agents.amp.rst @@ -103,12 +103,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: AMP observation - .. centered:: Observation - .. centered:: Action diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst index 64314b4b..1b13f5f8 100644 --- a/docs/source/modules/skrl.agents.cem.rst +++ b/docs/source/modules/skrl.agents.cem.rst @@ -38,12 +38,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst index 5c7d55a6..33bcfc9e 100644 --- a/docs/source/modules/skrl.agents.ddpg.rst +++ b/docs/source/modules/skrl.agents.ddpg.rst @@ -66,12 +66,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst index 0577c971..3b2e12ca 100644 --- a/docs/source/modules/skrl.agents.ddqn.rst +++ b/docs/source/modules/skrl.agents.ddqn.rst @@ -45,12 +45,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst index 490fff3f..02213752 100644 --- a/docs/source/modules/skrl.agents.dqn.rst +++ b/docs/source/modules/skrl.agents.dqn.rst @@ -45,12 +45,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst index 5e7df5f7..d6c1e11e 100644 --- a/docs/source/modules/skrl.agents.ppo.rst +++ b/docs/source/modules/skrl.agents.ppo.rst @@ -98,12 +98,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.q_learning.rst b/docs/source/modules/skrl.agents.q_learning.rst index a42aa6c2..4c01c747 100644 --- a/docs/source/modules/skrl.agents.q_learning.rst +++ b/docs/source/modules/skrl.agents.q_learning.rst @@ -36,12 +36,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst index c5b7cb31..6fee4b41 100644 --- a/docs/source/modules/skrl.agents.sac.rst +++ b/docs/source/modules/skrl.agents.sac.rst @@ -73,12 +73,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.sarsa.rst b/docs/source/modules/skrl.agents.sarsa.rst index 77bede3f..c880cb7c 100644 --- a/docs/source/modules/skrl.agents.sarsa.rst +++ b/docs/source/modules/skrl.agents.sarsa.rst @@ -35,12 +35,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst index a95b8156..d532f211 100644 --- a/docs/source/modules/skrl.agents.td3.rst +++ b/docs/source/modules/skrl.agents.td3.rst @@ -76,12 +76,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst index 7f97a800..b9df0b3b 100644 --- a/docs/source/modules/skrl.agents.trpo.rst +++ b/docs/source/modules/skrl.agents.trpo.rst @@ -136,12 +136,12 @@ Configuration and hyperparameters Spaces and models ^^^^^^^^^^^^^^^^^ -The implementation supports the following `Gym spaces `_ +The implementation supports the following `Gym spaces `_ / `Gymnasium spaces `_ .. list-table:: :header-rows: 1 - * - Gym spaces + * - Gym/Gymnasium spaces - .. centered:: Observation - .. centered:: Action * - Discrete diff --git a/docs/source/modules/skrl.envs.wrapping.rst b/docs/source/modules/skrl.envs.wrapping.rst index 2c905907..17e1800d 100644 --- a/docs/source/modules/skrl.envs.wrapping.rst +++ b/docs/source/modules/skrl.envs.wrapping.rst @@ -3,7 +3,7 @@ Wrapping This library works with a common API to interact with the following RL environments: -* `OpenAI Gym `_ (single and vectorized environments) +* OpenAI `Gym `_ / Farama `Gymnasium `_ (single and vectorized environments) * `DeepMind `_ * `NVIDIA Isaac Gym `_ (preview 2, 3 and 4) * `NVIDIA Omniverse Isaac Gym `_ @@ -129,41 +129,81 @@ Basic usage # wrap the environment env = wrap_env(env) # or 'env = wrap_env(env, wrapper="isaacgym-preview2")' - .. tab:: OpenAI Gym + .. tab:: Gym / Gymnasium .. tabs:: - .. tab:: Single environment + .. tab:: Gym - .. code-block:: python - :linenos: + .. tabs:: - # import the environment wrapper and gym - from skrl.envs.torch import wrap_env - import gym + .. tab:: Single environment - # load environment - env = gym.make('Pendulum-v1') + .. code-block:: python + :linenos: - # wrap the environment - env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' + # import the environment wrapper and gym + from skrl.envs.torch import wrap_env + import gym - .. tab:: Vectorized environment + # load environment + env = gym.make('Pendulum-v1') - Visit the OpenAI Gym documentation (`Vector API `_) for more information about the creation and usage of vectorized environments + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' - .. code-block:: python - :linenos: + .. tab:: Vectorized environment - # import the environment wrapper and gym - from skrl.envs.torch import wrap_env - import gym + Visit the Gym documentation (`Vector `__) for more information about the creation and usage of vectorized environments - # load a vectorized environment - env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) + .. code-block:: python + :linenos: - # wrap the environment - env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' + # import the environment wrapper and gym + from skrl.envs.torch import wrap_env + import gym + + # load a vectorized environment + env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gym")' + + .. tab:: Gymnasium + + .. tabs:: + + .. tab:: Single environment + + .. code-block:: python + :linenos: + + # import the environment wrapper and gymnasium + from skrl.envs.torch import wrap_env + import gymnasium as gym + + # load environment + env = gym.make('Pendulum-v1') + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gymnasium")' + + .. tab:: Vectorized environment + + Visit the Gymnasium documentation (`Vector `__) for more information about the creation and usage of vectorized environments + + .. code-block:: python + :linenos: + + # import the environment wrapper and gymnasium + from skrl.envs.torch import wrap_env + import gymnasium as gym + + # load a vectorized environment + env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="gymnasium")' .. tab:: DeepMind From 7a2bd7831a85e7e0b630261dcea42249514ab860 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 12:21:02 +0200 Subject: [PATCH 029/157] Add Gymnasium example files --- .../gymnasium/gymnasium_cartpole_cem.py | 82 ++++++++++++++ .../gymnasium/gymnasium_cartpole_cem_eval.py | 75 +++++++++++++ .../gymnasium/gymnasium_cartpole_dqn.py | 82 ++++++++++++++ .../gymnasium/gymnasium_cartpole_dqn_eval.py | 64 +++++++++++ .../gymnasium_frozen_lake_q_learning.py | 76 +++++++++++++ .../gymnasium_frozen_lake_q_learning_eval.py | 78 +++++++++++++ .../gymnasium/gymnasium_pendulum_ddpg.py | 104 ++++++++++++++++++ .../gymnasium/gymnasium_pendulum_ddpg_eval.py | 75 +++++++++++++ .../gymnasium/gymnasium_taxi_sarsa.py | 76 +++++++++++++ .../gymnasium/gymnasium_taxi_sarsa_eval.py | 78 +++++++++++++ .../gymnasium_vector_cartpole_dqn.py | 82 ++++++++++++++ ...gymnasium_vector_frozen_lake_q_learning.py | 76 +++++++++++++ .../gymnasium_vector_pendulum_ddpg.py | 104 ++++++++++++++++++ .../gymnasium/gymnasium_vector_taxi_sarsa.py | 76 +++++++++++++ 14 files changed, 1128 insertions(+) create mode 100644 docs/source/examples/gymnasium/gymnasium_cartpole_cem.py create mode 100644 docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py create mode 100644 docs/source/examples/gymnasium/gymnasium_cartpole_dqn.py create mode 100644 docs/source/examples/gymnasium/gymnasium_cartpole_dqn_eval.py create mode 100644 docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py create mode 100644 docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py create mode 100644 docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py create mode 100644 docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py create mode 100644 docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py create mode 100644 docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py create mode 100644 docs/source/examples/gymnasium/gymnasium_vector_cartpole_dqn.py create mode 100644 docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py create mode 100644 docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py create mode 100644 docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py b/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py new file mode 100644 index 00000000..4a4bb79e --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py @@ -0,0 +1,82 @@ +import gymnasium as gym + +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, CategoricalMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.cem import CEM, CEM_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (categorical model) for the CEM agent using mixin +# - Policy: takes as input the environment's observation/state and returns an action +class Policy(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.linear_layer_1 = nn.Linear(self.num_observations, 64) + self.linear_layer_2 = nn.Linear(64, 64) + self.output_layer = nn.Linear(64, self.num_actions) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(states)) + x = F.relu(self.linear_layer_2(x)) + return self.output_layer(x) + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("CartPole-v1") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("CartPole-v")][0] + print("CartPole-v0 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=1000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's model (function approximator). +# CEM requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#spaces-and-models +models_cem = {} +models_cem["policy"] = Policy(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_cem.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#configuration-and-hyperparameters +cfg_cem = CEM_DEFAULT_CONFIG.copy() +cfg_cem["rollouts"] = 1000 +cfg_cem["learning_starts"] = 100 +# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively +cfg_cem["experiment"]["write_interval"] = 1000 +cfg_cem["experiment"]["checkpoint_interval"] = 5000 + +agent_cem = CEM(models=models_cem, + memory=memory, + cfg=cfg_cem, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(env=env, agents=[agent_cem], cfg=cfg_trainer) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py b/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py new file mode 100644 index 00000000..41edad28 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py @@ -0,0 +1,75 @@ +import gymnasium as gym + +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, CategoricalMixin +from skrl.agents.torch.cem import CEM, CEM_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (categorical model) for the CEM agent using mixin +# - Policy: takes as input the environment's observation/state and returns an action +class Policy(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.linear_layer_1 = nn.Linear(self.num_observations, 64) + self.linear_layer_2 = nn.Linear(64, 64) + self.output_layer = nn.Linear(64, self.num_actions) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(states)) + x = F.relu(self.linear_layer_2(x)) + return self.output_layer(x) + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("CartPole-v1") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("CartPole-v")][0] + print("CartPole-v0 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's model (function approximators). +# CEM requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#spaces-and-models +models_cem = {} +models_cem["policy"] = Policy(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.cem.html#configuration-and-hyperparameters +cfg_cem = CEM_DEFAULT_CONFIG.copy() +cfg_cem["rollouts"] = 1000 +cfg_cem["learning_starts"] = 100 +# logging to TensorBoard each 1000 timesteps and ignore checkpoints +cfg_cem["experiment"]["write_interval"] = 1000 +cfg_cem["experiment"]["checkpoint_interval"] = 0 + +agent_cem = CEM(models=models_cem, + memory=None, + cfg=cfg_cem, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoint +agent_cem.load("./runs/22-09-07_21-41-05-854385_CEM/checkpoints/best_agent.pt") + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(env=env, agents=[agent_cem], cfg=cfg_trainer) + +# evaluate the agent +trainer.eval() diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_dqn.py b/docs/source/examples/gymnasium/gymnasium_cartpole_dqn.py new file mode 100644 index 00000000..d55c1e48 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_cartpole_dqn.py @@ -0,0 +1,82 @@ +import gymnasium as gym + +# Import the skrl components to build the RL system +from skrl.utils.model_instantiators import deterministic_model, Shape +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.dqn import DQN, DQN_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("CartPole-v1") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("CartPole-v")][0] + print("CartPole-v0 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=50000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators) using the model instantiator utility +# DQN requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models +models_dqn = {} +models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, + action_space=env.action_space, + device=device, + clip_actions=False, + input_shape=Shape.OBSERVATIONS, + hiddens=[64, 64], + hidden_activation=["relu", "relu"], + output_shape=Shape.ACTIONS, + output_activation=None, + output_scale=1.0) +models_dqn["target_q_network"] = deterministic_model(observation_space=env.observation_space, + action_space=env.action_space, + device=device, + clip_actions=False, + input_shape=Shape.OBSERVATIONS, + hiddens=[64, 64], + hidden_activation=["relu", "relu"], + output_shape=Shape.ACTIONS, + output_activation=None, + output_scale=1.0) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_dqn.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters +cfg_dqn = DQN_DEFAULT_CONFIG.copy() +cfg_dqn["learning_starts"] = 100 +cfg_dqn["exploration"]["final_epsilon"] = 0.04 +cfg_dqn["exploration"]["timesteps"] = 1500 +# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively +cfg_dqn["experiment"]["write_interval"] = 1000 +cfg_dqn["experiment"]["checkpoint_interval"] = 5000 + +agent_dqn = DQN(models=models_dqn, + memory=memory, + cfg=cfg_dqn, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 50000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_dqn) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_dqn_eval.py b/docs/source/examples/gymnasium/gymnasium_cartpole_dqn_eval.py new file mode 100644 index 00000000..49421302 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_cartpole_dqn_eval.py @@ -0,0 +1,64 @@ +import gymnasium as gym + +# Import the skrl components to build the RL system +from skrl.utils.model_instantiators import deterministic_model, Shape +from skrl.agents.torch.dqn import DQN, DQN_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("CartPole-v1") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("CartPole-v")][0] + print("CartPole-v0 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate only the policy for evaluation. +# DQN requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models +models_dqn = {} +models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, + action_space=env.action_space, + device=device, + clip_actions=False, + input_shape=Shape.OBSERVATIONS, + hiddens=[64, 64], + hidden_activation=["relu", "relu"], + output_shape=Shape.ACTIONS, + output_activation=None, + output_scale=1.0) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters +cfg_dqn = DQN_DEFAULT_CONFIG.copy() +cfg_dqn["exploration"]["timesteps"] = 0 +# # logging to TensorBoard each 1000 timesteps and ignore checkpoints +cfg_dqn["experiment"]["write_interval"] = 1000 +cfg_dqn["experiment"]["checkpoint_interval"] = 0 + +agent_dqn = DQN(models=models_dqn, + memory=None, + cfg=cfg_dqn, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoint +agent_dqn.load("./runs/22-09-10_10-48-10-551426_DQN/checkpoints/best_agent.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 50000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_dqn) + +# evaluate the agent +trainer.eval() diff --git a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py new file mode 100644 index 00000000..b7a8f22e --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py @@ -0,0 +1,76 @@ +import gymnasium as gym + +import torch + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, TabularMixin +from skrl.agents.torch.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (tabular model) for the SARSA agent using mixin +class EpilonGreedyPolicy(TabularMixin, Model): + def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): + Model.__init__(self, observation_space, action_space, device) + TabularMixin.__init__(self, num_envs) + + self.epsilon = epsilon + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + dtype=torch.float32, device=self.device) + + def compute(self, states, taken_actions, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + dim=-1, keepdim=True).view(-1,1) + + # choose random actions for exploration according to epsilon + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + if indexes.numel(): + actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) + return actions + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("FrozenLake-v0") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("FrozenLake-v")][0] + print("FrozenLake-v0 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's models (table) +# Q-learning requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#spaces-and-models +models_q_learning = {} +models_q_learning["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters +cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy() +cfg_q_learning["discount_factor"] = 0.999 +cfg_q_learning["alpha"] = 0.4 +# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively +cfg_q_learning["experiment"]["write_interval"] = 1600 +cfg_q_learning["experiment"]["checkpoint_interval"] = 8000 + +agent_q_learning = Q_LEARNING(models=models_q_learning, + memory=None, + cfg=cfg_q_learning, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 80000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_q_learning) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py new file mode 100644 index 00000000..466393f5 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py @@ -0,0 +1,78 @@ +import gymnasium as gym + +import torch + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, TabularMixin +from skrl.agents.torch.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (tabular model) for the SARSA agent using mixin +class EpilonGreedyPolicy(TabularMixin, Model): + def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): + Model.__init__(self, observation_space, action_space, device) + TabularMixin.__init__(self, num_envs) + + self.epsilon = epsilon + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + dtype=torch.float32, device=self.device) + + def compute(self, states, taken_actions, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + dim=-1, keepdim=True).view(-1,1) + + # choose random actions for exploration according to epsilon + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + if indexes.numel(): + actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) + return actions + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("FrozenLake-v0") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("FrozenLake-v")][0] + print("FrozenLake-v0 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's models (table) +# Q-learning requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#spaces-and-models +models_q_learning = {} +models_q_learning["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters +cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy() +cfg_q_learning["random_timesteps"] = 0 +# logging to TensorBoard and write checkpoints each 1600 and ignore checkpoints +cfg_q_learning["experiment"]["write_interval"] = 1600 +cfg_q_learning["experiment"]["checkpoint_interval"] = 0 + +agent_q_learning = Q_LEARNING(models=models_q_learning, + memory=None, + cfg=cfg_q_learning, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoint +agent_q_learning.load("./runs/22-09-10_17-54-20-381109_Q_LEARNING/checkpoints/best_agent.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 80000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_q_learning) + +# evaluate the agent +trainer.eval() diff --git a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py new file mode 100644 index 00000000..716e50db --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py @@ -0,0 +1,104 @@ +import gymnasium as gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class DeterministicActor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(states)) + x = F.relu(self.linear_layer_2(x)) + return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + +class DeterministicCritic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x) + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("Pendulum-v1") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=15000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device) +models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 100 +cfg_ddpg["learning_starts"] = 100 +# logging to TensorBoard and write checkpoints each 300 and 1500 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 300 +cfg_ddpg["experiment"]["checkpoint_interval"] = 1500 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py new file mode 100644 index 00000000..38df39ef --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py @@ -0,0 +1,75 @@ +import gymnasium as gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define only the policy for evaluation +class DeterministicActor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(states)) + x = F.relu(self.linear_layer_2(x)) + return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("Pendulum-v1") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's policy. +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["random_timesteps"] = 0 +# logging to TensorBoard each 300 timesteps and ignore checkpoints +cfg_ddpg["experiment"]["write_interval"] = 300 +cfg_ddpg["experiment"]["checkpoint_interval"] = 0 + +agent_ddpg = DDPG(models=models_ddpg, + memory=None, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoint +agent_ddpg.load("./runs/22-09-10_11-02-46-773796_DDPG/checkpoints/agent_15000.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# evaluate the agent +trainer.eval() diff --git a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py new file mode 100644 index 00000000..0e133dc5 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py @@ -0,0 +1,76 @@ +import gymnasium as gym + +import torch + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, TabularMixin +from skrl.agents.torch.sarsa import SARSA, SARSA_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (tabular model) for the SARSA agent using mixin +class EpilonGreedyPolicy(TabularMixin, Model): + def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): + Model.__init__(self, observation_space, action_space, device) + TabularMixin.__init__(self, num_envs) + + self.epsilon = epsilon + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + dtype=torch.float32, device=self.device) + + def compute(self, states, taken_actions, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + dim=-1, keepdim=True).view(-1,1) + + # choose random actions for exploration according to epsilon + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + if indexes.numel(): + actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) + return actions + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("Taxi-v3") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("Taxi-v")][0] + print("Taxi-v3 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's models (table) +# SARSA requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#spaces-and-models +models_sarsa = {} +models_sarsa["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters +cfg_sarsa = SARSA_DEFAULT_CONFIG.copy() +cfg_sarsa["discount_factor"] = 0.999 +cfg_sarsa["alpha"] = 0.4 +# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively +cfg_sarsa["experiment"]["write_interval"] = 1600 +cfg_sarsa["experiment"]["checkpoint_interval"] = 8000 + +agent_sarsa = SARSA(models=models_sarsa, + memory=None, + cfg=cfg_sarsa, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 80000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sarsa) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py new file mode 100644 index 00000000..bb74e58d --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py @@ -0,0 +1,78 @@ +import gymnasium as gym + +import torch + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, TabularMixin +from skrl.agents.torch.sarsa import SARSA, SARSA_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (tabular models) for the SARSA agent using a helper class +class EpilonGreedyPolicy(TabularMixin, Model): + def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): + Model.__init__(self, observation_space, action_space, device) + TabularMixin.__init__(self, num_envs) + + self.epsilon = epsilon + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + dtype=torch.float32, device=self.device) + + def compute(self, states, taken_actions, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + dim=-1, keepdim=True).view(-1,1) + + # choose random actions for exploration according to epsilon + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + if indexes.numel(): + actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) + return actions + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.make("Taxi-v3") +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("Taxi-v")][0] + print("Taxi-v3 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's models (table) +# SARSA requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#spaces-and-models +models_sarsa = {} +models_sarsa["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters +cfg_sarsa = SARSA_DEFAULT_CONFIG.copy() +cfg_sarsa["random_timesteps"] = 0 +# logging to TensorBoard and write checkpoints each 1600 and ignore checkpoints +cfg_sarsa["experiment"]["write_interval"] = 1600 +cfg_sarsa["experiment"]["checkpoint_interval"] = 0 + +agent_sarsa = SARSA(models=models_sarsa, + memory=None, + cfg=cfg_sarsa, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoint +agent_sarsa.load("./runs/22-09-10_13-13-41-011999_SARSA/checkpoints/agent_80000.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 80000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sarsa) + +# evaluate the agent +trainer.eval() diff --git a/docs/source/examples/gymnasium/gymnasium_vector_cartpole_dqn.py b/docs/source/examples/gymnasium/gymnasium_vector_cartpole_dqn.py new file mode 100644 index 00000000..6f89dbc5 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_vector_cartpole_dqn.py @@ -0,0 +1,82 @@ +import gymnasium as gym + +# Import the skrl components to build the RL system +from skrl.utils.model_instantiators import deterministic_model, Shape +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.dqn import DQN, DQN_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.vector.make("CartPole-v1", num_envs=5, asynchronous=False) +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("CartPole-v")][0] + print("CartPole-v0 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=5, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=200000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators) using the model instantiator utility +# DQN requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models +models_dqn = {} +models_dqn["q_network"] = deterministic_model(observation_space=env.observation_space, + action_space=env.action_space, + device=device, + clip_actions=False, + input_shape=Shape.OBSERVATIONS, + hiddens=[64, 64], + hidden_activation=["relu", "relu"], + output_shape=Shape.ACTIONS, + output_activation=None, + output_scale=1.0) +models_dqn["target_q_network"] = deterministic_model(observation_space=env.observation_space, + action_space=env.action_space, + device=device, + clip_actions=False, + input_shape=Shape.OBSERVATIONS, + hiddens=[64, 64], + hidden_activation=["relu", "relu"], + output_shape=Shape.ACTIONS, + output_activation=None, + output_scale=1.0) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_dqn.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters +cfg_dqn = DQN_DEFAULT_CONFIG.copy() +cfg_dqn["learning_starts"] = 100 +cfg_dqn["exploration"]["final_epsilon"] = 0.04 +cfg_dqn["exploration"]["timesteps"] = 1500 +# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively +cfg_dqn["experiment"]["write_interval"] = 1000 +cfg_dqn["experiment"]["checkpoint_interval"] = 5000 + +agent_dqn = DQN(models=models_dqn, + memory=memory, + cfg=cfg_dqn, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 50000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_dqn) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py b/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py new file mode 100644 index 00000000..6312c034 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py @@ -0,0 +1,76 @@ +import gymnasium as gym + +import torch + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, TabularMixin +from skrl.agents.torch.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (tabular model) for the SARSA agent using mixin +class EpilonGreedyPolicy(TabularMixin, Model): + def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): + Model.__init__(self, observation_space, action_space, device) + TabularMixin.__init__(self, num_envs) + + self.epsilon = epsilon + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + dtype=torch.float32, device=self.device) + + def compute(self, states, taken_actions, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + dim=-1, keepdim=True).view(-1,1) + + # choose random actions for exploration according to epsilon + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + if indexes.numel(): + actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) + return actions + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.vector.make("FrozenLake-v0", num_envs=10, asynchronous=False) +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("FrozenLake-v")][0] + print("FrozenLake-v0 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=10, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's models (table) +# Q-learning requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#spaces-and-models +models_q_learning = {} +models_q_learning["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.q_learning.html#configuration-and-hyperparameters +cfg_q_learning = Q_LEARNING_DEFAULT_CONFIG.copy() +cfg_q_learning["discount_factor"] = 0.999 +cfg_q_learning["alpha"] = 0.4 +# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively +cfg_q_learning["experiment"]["write_interval"] = 1600 +cfg_q_learning["experiment"]["checkpoint_interval"] = 8000 + +agent_q_learning = Q_LEARNING(models=models_q_learning, + memory=None, + cfg=cfg_q_learning, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 80000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_q_learning) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py b/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py new file mode 100644 index 00000000..ee5c9892 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py @@ -0,0 +1,104 @@ +import gymnasium as gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class DeterministicActor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(states)) + x = F.relu(self.linear_layer_2(x)) + return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + +class DeterministicCritic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, states, taken_actions, role): + x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x) + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=10, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=100000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device) +models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 100 +cfg_ddpg["learning_starts"] = 100 +# logging to TensorBoard and write checkpoints each 1000 and 1000 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 1000 +cfg_ddpg["experiment"]["checkpoint_interval"] = 1000 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py b/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py new file mode 100644 index 00000000..e2e90c97 --- /dev/null +++ b/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py @@ -0,0 +1,76 @@ +import gymnasium as gym + +import torch + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, TabularMixin +from skrl.agents.torch.sarsa import SARSA, SARSA_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (tabular model) for the SARSA agent using mixin +class EpilonGreedyPolicy(TabularMixin, Model): + def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): + Model.__init__(self, observation_space, action_space, device) + TabularMixin.__init__(self, num_envs) + + self.epsilon = epsilon + self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), + dtype=torch.float32, device=self.device) + + def compute(self, states, taken_actions, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + dim=-1, keepdim=True).view(-1,1) + + # choose random actions for exploration according to epsilon + indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + if indexes.numel(): + actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) + return actions + + +# Load and wrap the Gymnasium environment. +# Note: the environment version may change depending on the gymnasium version +try: + env = gym.vector.make("Taxi-v3", num_envs=10, asynchronous=False) +except (gym.error.DeprecatedEnv, gym.error.VersionNotFound) as e: + env_id = [spec for spec in gym.envs.registry if spec.startswith("Taxi-v")][0] + print("Taxi-v3 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=10, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's models (table) +# SARSA requires 1 model, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#spaces-and-models +models_sarsa = {} +models_sarsa["policy"] = EpilonGreedyPolicy(env.observation_space, env.action_space, device, num_envs=env.num_envs, epsilon=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sarsa.html#configuration-and-hyperparameters +cfg_sarsa = SARSA_DEFAULT_CONFIG.copy() +cfg_sarsa["discount_factor"] = 0.999 +cfg_sarsa["alpha"] = 0.4 +# logging to TensorBoard and write checkpoints each 1600 and 8000 timesteps respectively +cfg_sarsa["experiment"]["write_interval"] = 1600 +cfg_sarsa["experiment"]["checkpoint_interval"] = 8000 + +agent_sarsa = SARSA(models=models_sarsa, + memory=None, + cfg=cfg_sarsa, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 80000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sarsa) + +# start training +trainer.train() From 1fa62bc1ad1ee01a9613ce1a0b33bcf8939b225f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 12:23:08 +0200 Subject: [PATCH 030/157] Fix Gymnasium action space conversion --- skrl/envs/torch/wrappers.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index 154e08c4..2c34a4cf 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -503,7 +503,7 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any: return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(space.shape) elif isinstance(space[0], gymnasium.spaces.Discrete): return np.array(actions.cpu().numpy(), dtype=space[0].dtype).reshape(-1) - elif isinstance(space, gymnasium.spaces.Discrete): + if isinstance(space, gymnasium.spaces.Discrete): return actions.item() elif isinstance(space, gymnasium.spaces.Box): return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape) @@ -518,14 +518,11 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch :return: The state, the reward, the done flag, and the info :rtype: tuple of torch.Tensor and any other info """ - if self._drepecated_api: - observation, reward, done, info = self._env.step(self._tensor_to_action(actions)) + observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions)) + if type(termination) is bool: + done = termination or truncation else: - observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions)) - if type(termination) is bool: - done = termination or truncation - else: - done = np.logical_or(termination, truncation) + done = np.logical_or(termination, truncation) # convert response to torch return self._observation_to_tensor(observation), \ torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ @@ -538,10 +535,7 @@ def reset(self) -> torch.Tensor: :return: The state of the environment :rtype: torch.Tensor """ - if self._drepecated_api: - observation = self._env.reset() - else: - observation, info = self._env.reset() + observation, info = self._env.reset() return self._observation_to_tensor(observation) def render(self, *args, **kwargs) -> None: From ed339bf7a33126e396da7ff439d80a96a112f6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 12:25:13 +0200 Subject: [PATCH 031/157] Add Gymnasium examples to docs --- docs/source/intro/examples.rst | 62 +++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 24 deletions(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 5bed5d54..fc010640 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -12,15 +12,15 @@ Examples
-Learning in an OpenAI Gym environment -------------------------------------- +Learning in a Gym/Gymnasium environment +--------------------------------------- -These examples perform the training of one agent in an OpenAI Gym environment (**one agent, one environment**) +These examples perform the training of one agent in a Gym/Gymnasium environment (**one agent, one environment**) .. image:: ../_static/imgs/example_gym.png :width: 100% :align: center - :alt: OpenAI Gym environments + :alt: Gym/Gymnasium environments .. raw:: html @@ -28,7 +28,7 @@ These examples perform the training of one agent in an OpenAI Gym environment (* The following components or practices are exemplified (highlighted): - - Load and wrap an OpenAI Gym environment: **Pendulum (DDPG)**, **CartPole (CEM)** + - Load and wrap a Gym environment: **Pendulum (DDPG)**, **CartPole (CEM)** - Instantiate models using the model instantiation utility: **CartPole (DQN)** - Create a tabular model (:math:`\epsilon`-greedy policy): **Taxi (SARSA)**, **FrozenLake (Q-Learning)** - Load a checkpoint during evaluation: **Pendulum (DDPG)**, **CartPole (CEM)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** @@ -41,7 +41,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_pendulum_ddpg.py <../examples/gym/gym_pendulum_ddpg.py>` + | :download:`gym_pendulum_ddpg.py <../examples/gym/gym_pendulum_ddpg.py>` + | :download:`gymnasium_pendulum_ddpg.py <../examples/gymnasium/gymnasium_pendulum_ddpg.py>` .. literalinclude:: ../examples/gym/gym_pendulum_ddpg.py :language: python @@ -49,7 +50,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Evaluation - :download:`gym_pendulum_ddpg_eval.py <../examples/gym/gym_pendulum_ddpg_eval.py>` + | :download:`gym_pendulum_ddpg_eval.py <../examples/gym/gym_pendulum_ddpg_eval.py>` + | :download:`gymnasium_pendulum_ddpg_eval.py <../examples/gymnasium/gymnasium_pendulum_ddpg_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments @@ -65,7 +67,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_cartpole_cem.py <../examples/gym/gym_cartpole_cem.py>` + | :download:`gym_cartpole_cem.py <../examples/gym/gym_cartpole_cem.py>` + | :download:`gymnasium_cartpole_cem.py <../examples/gymnasium/gymnasium_cartpole_cem.py>` .. literalinclude:: ../examples/gym/gym_cartpole_cem.py :language: python @@ -73,7 +76,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Evaluation - :download:`gym_cartpole_cem_eval.py <../examples/gym/gym_cartpole_cem_eval.py>` + | :download:`gym_cartpole_cem_eval.py <../examples/gym/gym_cartpole_cem_eval.py>` + | :download:`gymnasium_cartpole_cem_eval.py <../examples/gymnasium/gymnasium_cartpole_cem_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments @@ -89,7 +93,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_cartpole_dqn.py <../examples/gym/gym_cartpole_dqn.py>` + | :download:`gym_cartpole_dqn.py <../examples/gym/gym_cartpole_dqn.py>` + | :download:`gymnasium_cartpole_dqn.py <../examples/gymnasium/gymnasium_cartpole_dqn.py>` .. literalinclude:: ../examples/gym/gym_cartpole_dqn.py :language: python @@ -97,7 +102,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Evaluation - :download:`gym_cartpole_dqn_eval.py <../examples/gym/gym_cartpole_dqn_eval.py>` + | :download:`gym_cartpole_dqn_eval.py <../examples/gym/gym_cartpole_dqn_eval.py>` + | :download:`gymnasium_cartpole_dqn_eval.py <../examples/gymnasium/gymnasium_cartpole_dqn_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments @@ -113,7 +119,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_taxi_sarsa.py <../examples/gym/gym_taxi_sarsa.py>` + | :download:`gym_taxi_sarsa.py <../examples/gym/gym_taxi_sarsa.py>` + | :download:`gymnasium_taxi_sarsa.py <../examples/gymnasium/gymnasium_taxi_sarsa.py>` .. literalinclude:: ../examples/gym/gym_taxi_sarsa.py :language: python @@ -121,7 +128,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Evaluation - :download:`gym_taxi_sarsa_eval.py <../examples/gym/gym_taxi_sarsa_eval.py>` + | :download:`gym_taxi_sarsa_eval.py <../examples/gym/gym_taxi_sarsa_eval.py>` + | :download:`gymnasium_taxi_sarsa_eval.py <../examples/gymnasium/gymnasium_taxi_sarsa_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments @@ -137,7 +145,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_frozen_lake_q_learning.py <../examples/gym/gym_frozen_lake_q_learning.py>` + | :download:`gym_frozen_lake_q_learning.py <../examples/gym/gym_frozen_lake_q_learning.py>` + | :download:`gymnasium_frozen_lake_q_learning.py <../examples/gymnasium/gymnasium_frozen_lake_q_learning.py>` .. literalinclude:: ../examples/gym/gym_frozen_lake_q_learning.py :language: python @@ -145,7 +154,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Evaluation - :download:`gym_frozen_lake_q_learning_eval.py <../examples/gym/gym_frozen_lake_q_learning_eval.py>` + | :download:`gym_frozen_lake_q_learning_eval.py <../examples/gym/gym_frozen_lake_q_learning_eval.py>` + | :download:`gymnasium_frozen_lake_q_learning_eval.py <../examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments @@ -159,14 +169,14 @@ The following components or practices are exemplified (highlighted):
-Learning in an OpenAI Gym vectorized environment ------------------------------------------------- +Learning in a Gym/Gymnasium vectorized environment +-------------------------------------------------- -These examples perform the training of one agent in an OpenAI Gym vectorized environment (**one agent, multiple independent copies of the same environment in parallel**) +These examples perform the training of one agent in a Gym/Gymnasium vectorized environment (**one agent, multiple independent copies of the same environment in parallel**) The following components or practices are exemplified (highlighted): - - Load and wrap an OpenAI Gym vectorized environment: **Pendulum (DDPG)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** + - Load and wrap a Gym vectorized environment: **Pendulum (DDPG)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** .. tabs:: @@ -176,7 +186,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_vector_pendulum_ddpg.py <../examples/gym/gym_vector_pendulum_ddpg.py>` + | :download:`gym_vector_pendulum_ddpg.py <../examples/gym/gym_vector_pendulum_ddpg.py>` + | :download:`gymnasium_vector_pendulum_ddpg.py <../examples/gymnasium/gymnasium_vector_pendulum_ddpg.py>` .. literalinclude:: ../examples/gym/gym_vector_pendulum_ddpg.py :language: python @@ -188,7 +199,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_vector_cartpole_dqn.py <../examples/gym/gym_vector_cartpole_dqn.py>` + | :download:`gym_vector_cartpole_dqn.py <../examples/gym/gym_vector_cartpole_dqn.py>` + | :download:`gymnasium_vector_cartpole_dqn.py <../examples/gymnasium/gymnasium_vector_cartpole_dqn.py>` .. literalinclude:: ../examples/gym/gym_vector_cartpole_dqn.py :language: python @@ -200,7 +212,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_vector_taxi_sarsa.py <../examples/gym/gym_vector_taxi_sarsa.py>` + | :download:`gym_vector_taxi_sarsa.py <../examples/gym/gym_vector_taxi_sarsa.py>` + | :download:`gymnasium_vector_taxi_sarsa.py <../examples/gymnasium/gymnasium_vector_taxi_sarsa.py>` .. literalinclude:: ../examples/gym/gym_vector_taxi_sarsa.py :language: python @@ -212,7 +225,8 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - :download:`gym_vector_frozen_lake_q_learning.py <../examples/gym/gym_vector_frozen_lake_q_learning.py>` + | :download:`gym_vector_frozen_lake_q_learning.py <../examples/gym/gym_vector_frozen_lake_q_learning.py>` + | :download:`gymnasium_vector_frozen_lake_q_learning.py <../examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py>` .. literalinclude:: ../examples/gym/gym_vector_frozen_lake_q_learning.py :language: python @@ -759,7 +773,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 Learning in an Omniverse Isaac Sim environment ---------------------------------------------- -These examples show how to train an agent in an Omniverse Isaac Sim environment that is implemented using the OpenAI Gym interface (**one agent, one environment**) +These examples show how to train an agent in an Omniverse Isaac Sim environment that is implemented using the Gym interface (**one agent, one environment**) .. tabs:: From c28d9b663d60d77f80d971ab7438d95795eb43d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 26 Oct 2022 12:26:45 +0200 Subject: [PATCH 032/157] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 655818c3..71048128 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added - Set the running mode (training or evaluation) of the agents - Weights & Biases integration (by @juhannc) +- Support for Gymnasium interface ### Fixed - Omniverse Isaac Gym simulation speed for the Franka Emika real-world example From 3708fba1fd1a34a82cb7e9c983d9f01653972531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 27 Oct 2022 22:53:28 +0200 Subject: [PATCH 033/157] Update step and reset method return values to return terminated and truncated --- skrl/envs/torch/wrappers.py | 121 ++++++++++++++++++------------------ 1 file changed, 61 insertions(+), 60 deletions(-) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index 2c34a4cf..a46057ed 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -44,17 +44,17 @@ def __getattr__(self, key: str) -> Any: raise AttributeError("Wrapped environment ({}) does not have attribute '{}'" \ .format(self._env.__class__.__name__, key)) - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment :raises NotImplementedError: Not implemented - :return: The state of the environment - :rtype: torch.Tensor + :return: Observation, info + :rtype: torch.Tensor and any other info """ raise NotImplementedError - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform @@ -62,7 +62,7 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch :raises NotImplementedError: Not implemented - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ raise NotImplementedError @@ -123,28 +123,29 @@ def __init__(self, env: Any) -> None: self._reset_once = True self._obs_buf = None - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform :type actions: torch.Tensor - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ - self._obs_buf, rew_buf, reset_buf, info = self._env.step(actions) - return self._obs_buf, rew_buf.view(-1, 1), reset_buf.view(-1, 1), info + self._obs_buf, reward, terminated, info = self._env.step(actions) + truncated = torch.zeros_like(terminated) + return self._obs_buf, reward.view(-1, 1), terminated.view(-1, 1), truncated.view(-1, 1), info - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment - :return: The state of the environment - :rtype: torch.Tensor + :return: Observation, info + :rtype: torch.Tensor and any other info """ if self._reset_once: self._obs_buf = self._env.reset() self._reset_once = False - return self._obs_buf + return self._obs_buf, {} def render(self, *args, **kwargs) -> None: """Render the environment @@ -169,28 +170,29 @@ def __init__(self, env: Any) -> None: self._reset_once = True self._obs_dict = None - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform :type actions: torch.Tensor - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ - self._obs_dict, rew_buf, reset_buf, info = self._env.step(actions) - return self._obs_dict["obs"], rew_buf.view(-1, 1), reset_buf.view(-1, 1), info + self._obs_dict, reward, terminated, info = self._env.step(actions) + truncated = torch.zeros_like(terminated) + return self._obs_dict["obs"], reward.view(-1, 1), terminated.view(-1, 1), truncated.view(-1, 1), info - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment - :return: The state of the environment - :rtype: torch.Tensor + :return: Observation, info + :rtype: torch.Tensor and any other info """ if self._reset_once: self._obs_dict = self._env.reset() self._reset_once = False - return self._obs_dict["obs"] + return self._obs_dict["obs"], {} def render(self, *args, **kwargs) -> None: """Render the environment @@ -225,28 +227,29 @@ def run(self, trainer: Optional["omni.isaac.gym.vec_env.vec_env_mt.TrainerMT"] = """ self._env.run(trainer) - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform :type actions: torch.Tensor - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ - self._obs_dict, rew_buf, reset_buf, info = self._env.step(actions) - return self._obs_dict["obs"], rew_buf.view(-1, 1), reset_buf.view(-1, 1), info + self._obs_dict, reward, terminated, info = self._env.step(actions) + truncated = torch.zeros_like(terminated) + return self._obs_dict["obs"], reward.view(-1, 1), terminated.view(-1, 1), truncated.view(-1, 1), info - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment - :return: The state of the environment - :rtype: torch.Tensor + :return: Observation, info + :rtype: torch.Tensor and any other info """ if self._reset_once: self._obs_dict = self._env.reset() self._reset_once = False - return self._obs_dict["obs"] + return self._obs_dict["obs"], {} def render(self, *args, **kwargs) -> None: """Render the environment @@ -363,40 +366,39 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any: return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape) raise ValueError("Action space type {} not supported. Please report this issue".format(type(space))) - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform :type actions: torch.Tensor - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ if self._drepecated_api: - observation, reward, done, info = self._env.step(self._tensor_to_action(actions)) + observation, reward, terminated, info = self._env.step(self._tensor_to_action(actions)) + truncated = info.get("TimeLimit.truncated", False) # https://gymnasium.farama.org/tutorials/handling_time_limits else: - observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions)) - if type(termination) is bool: - done = termination or truncation - else: - done = np.logical_or(termination, truncation) + observation, reward, terminated, truncated, info = self._env.step(self._tensor_to_action(actions)) # convert response to torch return self._observation_to_tensor(observation), \ torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ - torch.tensor(done, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ info - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment - :return: The state of the environment - :rtype: torch.Tensor + :return: Observation, info + :rtype: torch.Tensor and any other info """ if self._drepecated_api: observation = self._env.reset() + info = {} else: observation, info = self._env.reset() - return self._observation_to_tensor(observation) + return self._observation_to_tensor(observation), info def render(self, *args, **kwargs) -> None: """Render the environment @@ -509,34 +511,31 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any: return np.array(actions.cpu().numpy(), dtype=space.dtype).reshape(space.shape) raise ValueError("Action space type {} not supported. Please report this issue".format(type(space))) - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform :type actions: torch.Tensor - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ - observation, reward, termination, truncation, info = self._env.step(self._tensor_to_action(actions)) - if type(termination) is bool: - done = termination or truncation - else: - done = np.logical_or(termination, truncation) + observation, reward, terminated, truncated, info = self._env.step(self._tensor_to_action(actions)) # convert response to torch return self._observation_to_tensor(observation), \ torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ - torch.tensor(done, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ info - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment - :return: The state of the environment - :rtype: torch.Tensor + :return: Observation, info + :rtype: torch.Tensor and any other info """ observation, info = self._env.reset() - return self._observation_to_tensor(observation) + return self._observation_to_tensor(observation), info def render(self, *args, **kwargs) -> None: """Render the environment @@ -656,36 +655,38 @@ def _tensor_to_action(self, actions: torch.Tensor) -> Any: else: raise ValueError("Action spec type {} not supported. Please report this issue".format(type(spec))) - def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, Any]: + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: """Perform a step in the environment :param actions: The actions to perform :type actions: torch.Tensor - :return: The state, the reward, the done flag, and the info + :return: Observation, reward, terminated, truncated, info :rtype: tuple of torch.Tensor and any other info """ timestep = self._env.step(self._tensor_to_action(actions)) observation = timestep.observation reward = timestep.reward if timestep.reward is not None else 0 - done = timestep.last() + terminated = timestep.last() + truncated = False info = {} # convert response to torch return self._observation_to_tensor(observation), \ torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ - torch.tensor(done, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ info - def reset(self) -> torch.Tensor: + def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment :return: The state of the environment :rtype: torch.Tensor """ timestep = self._env.reset() - return self._observation_to_tensor(timestep.observation) + return self._observation_to_tensor(timestep.observation), {} def render(self, *args, **kwargs) -> None: """Render the environment From 93b948d2ab00aaff3f969c87cd2a8a524120ea65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 27 Oct 2022 23:02:02 +0200 Subject: [PATCH 034/157] Update trainers to handle the terminated and truncated values --- skrl/trainers/torch/base.py | 14 ++++++++------ skrl/trainers/torch/manual.py | 20 +++++++++++-------- skrl/trainers/torch/parallel.py | 32 +++++++++++++++++++------------ skrl/trainers/torch/sequential.py | 14 ++++++++------ 4 files changed, 48 insertions(+), 32 deletions(-) diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index e0e206a8..c00f1fe0 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -163,7 +163,7 @@ def single_agent_train(self) -> None: actions, _, _ = self.agents.act(states, timestep=timestep, timesteps=self.timesteps) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -175,7 +175,8 @@ def single_agent_train(self) -> None: actions=actions, rewards=rewards, next_states=next_states, - dones=dones, + terminated=terminated, + truncated=truncated, infos=infos, timestep=timestep, timesteps=self.timesteps) @@ -185,7 +186,7 @@ def single_agent_train(self) -> None: # reset environments with torch.no_grad(): - if dones.any(): + if terminated.any() or truncated.any(): states = self.env.reset() else: states.copy_(next_states) @@ -215,7 +216,7 @@ def single_agent_eval(self) -> None: actions, _, _ = self.agents.act(states, timestep=timestep, timesteps=self.timesteps) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -227,14 +228,15 @@ def single_agent_eval(self) -> None: actions=actions, rewards=rewards, next_states=next_states, - dones=dones, + terminated=terminated, + truncated=truncated, infos=infos, timestep=timestep, timesteps=self.timesteps) super(type(self.agents), self.agents).post_interaction(timestep=timestep, timesteps=self.timesteps) # reset environments - if dones.any(): + if terminated.any() or truncated.any(): states = self.env.reset() else: states.copy_(next_states) diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 0a9420e0..5372f15c 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -108,7 +108,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: for agent, scope in zip(self.agents, self.agents_scope)]) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -121,7 +121,8 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: actions=actions, rewards=rewards, next_states=next_states, - dones=dones, + terminated=terminated, + truncated=truncated, infos=infos, timestep=timestep, timesteps=timesteps) @@ -137,7 +138,8 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: actions=actions[scope[0]:scope[1]], rewards=rewards[scope[0]:scope[1]], next_states=next_states[scope[0]:scope[1]], - dones=dones[scope[0]:scope[1]], + terminated=terminated[scope[0]:scope[1]], + truncated=truncated[scope[0]:scope[1]], infos=infos, timestep=timestep, timesteps=timesteps) @@ -148,7 +150,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: # reset environments with torch.no_grad(): - if dones.any(): + if terminated.any() or truncated.any(): self.states = self.env.reset() else: self.states.copy_(next_states) @@ -198,7 +200,7 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: for agent, scope in zip(self.agents, self.agents_scope)]) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -211,7 +213,8 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: actions=actions, rewards=rewards, next_states=next_states, - dones=dones, + terminated=terminated, + truncated=truncated, infos=infos, timestep=timestep, timesteps=timesteps) @@ -224,14 +227,15 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: actions=actions[scope[0]:scope[1]], rewards=rewards[scope[0]:scope[1]], next_states=next_states[scope[0]:scope[1]], - dones=dones[scope[0]:scope[1]], + terminated=terminated[scope[0]:scope[1]], + truncated=truncated[scope[0]:scope[1]], infos=infos, timestep=timestep, timesteps=timesteps) super(type(agent), agent).post_interaction(timestep=timestep, timesteps=timesteps) # reset environments - if dones.any(): + if terminated.any() or truncated.any(): self.states = self.env.reset() else: self.states.copy_(next_states) diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index 74987d11..366446fc 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -71,7 +71,8 @@ def fn_processor(process_index, *args): actions=_actions, rewards=queue.get()[scope[0]:scope[1]], next_states=queue.get()[scope[0]:scope[1]], - dones=queue.get()[scope[0]:scope[1]], + terminated=queue.get()[scope[0]:scope[1]], + truncated=queue.get()[scope[0]:scope[1]], infos=queue.get(), timestep=msg['timestep'], timesteps=msg['timesteps']) @@ -89,7 +90,8 @@ def fn_processor(process_index, *args): actions=_actions, rewards=queue.get()[scope[0]:scope[1]], next_states=queue.get()[scope[0]:scope[1]], - dones=queue.get()[scope[0]:scope[1]], + terminated=queue.get()[scope[0]:scope[1]], + truncated=queue.get()[scope[0]:scope[1]], infos=queue.get(), timestep=msg['timestep'], timesteps=msg['timesteps']) @@ -210,7 +212,7 @@ def train(self) -> None: actions = torch.vstack([queue.get() for queue in queues]) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -222,14 +224,17 @@ def train(self) -> None: rewards.share_memory_() if not next_states.is_cuda: next_states.share_memory_() - if not dones.is_cuda: - dones.share_memory_() + if not terminated.is_cuda: + terminated.share_memory_() + if not truncated.is_cuda: + truncated.share_memory_() for pipe, queue in zip(producer_pipes, queues): pipe.send({"task": "record_transition", "timestep": timestep, "timesteps": self.timesteps}) queue.put(rewards) queue.put(next_states) - queue.put(dones) + queue.put(terminated) + queue.put(truncated) queue.put(infos) barrier.wait() @@ -240,7 +245,7 @@ def train(self) -> None: # reset environments with torch.no_grad(): - if dones.any(): + if terminated.any() or truncated.any(): states = self.env.reset() if not states.is_cuda: states.share_memory_() @@ -337,7 +342,7 @@ def eval(self) -> None: actions = torch.vstack([queue.get() for queue in queues]) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -349,8 +354,10 @@ def eval(self) -> None: rewards.share_memory_() if not next_states.is_cuda: next_states.share_memory_() - if not dones.is_cuda: - dones.share_memory_() + if not terminated.is_cuda: + terminated.share_memory_() + if not truncated.is_cuda: + truncated.share_memory_() for pipe, queue in zip(producer_pipes, queues): pipe.send({"task": "eval-record_transition-post_interaction", @@ -358,12 +365,13 @@ def eval(self) -> None: "timesteps": self.timesteps}) queue.put(rewards) queue.put(next_states) - queue.put(dones) + queue.put(terminated) + queue.put(truncated) queue.put(infos) barrier.wait() # reset environments - if dones.any(): + if terminated.any() or truncated.any(): states = self.env.reset() if not states.is_cuda: states.share_memory_() diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index 0f11342e..0348d763 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -89,7 +89,7 @@ def train(self) -> None: for agent, scope in zip(self.agents, self.agents_scope)]) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -102,7 +102,8 @@ def train(self) -> None: actions=actions[scope[0]:scope[1]], rewards=rewards[scope[0]:scope[1]], next_states=next_states[scope[0]:scope[1]], - dones=dones[scope[0]:scope[1]], + terminated=terminated[scope[0]:scope[1]], + truncated=truncated[scope[0]:scope[1]], infos=infos, timestep=timestep, timesteps=self.timesteps) @@ -113,7 +114,7 @@ def train(self) -> None: # reset environments with torch.no_grad(): - if dones.any(): + if terminated.any() or truncated.any(): states = self.env.reset() else: states.copy_(next_states) @@ -154,7 +155,7 @@ def eval(self) -> None: for agent, scope in zip(self.agents, self.agents_scope)]) # step the environments - next_states, rewards, dones, infos = self.env.step(actions) + next_states, rewards, terminated, truncated, infos = self.env.step(actions) # render scene if not self.headless: @@ -167,14 +168,15 @@ def eval(self) -> None: actions=actions[scope[0]:scope[1]], rewards=rewards[scope[0]:scope[1]], next_states=next_states[scope[0]:scope[1]], - dones=dones[scope[0]:scope[1]], + terminated=terminated[scope[0]:scope[1]], + truncated=truncated[scope[0]:scope[1]], infos=infos, timestep=timestep, timesteps=self.timesteps) super(type(agent), agent).post_interaction(timestep=timestep, timesteps=self.timesteps) # reset environments - if dones.any(): + if terminated.any() or truncated.any(): states = self.env.reset() else: states.copy_(next_states) From 912f0eefd0dbbfb4655c4dcd4ec010143b602b98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 27 Oct 2022 23:06:20 +0200 Subject: [PATCH 035/157] Update agent base class to handle terminated and truncated values --- skrl/agents/torch/base.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index c03068fe..46f470bf 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -252,7 +252,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -269,8 +270,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -287,7 +290,7 @@ def record_transition(self, self._cumulative_timesteps.add_(1) # check ended episodes - finished_episodes = dones.nonzero(as_tuple=False) + finished_episodes = (terminated + truncated).nonzero(as_tuple=False) if finished_episodes.numel(): # storage cumulative rewards and timesteps From b6e268ee594db4ebf083714bd181c2f111fd9608 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 28 Oct 2022 10:29:39 +0200 Subject: [PATCH 036/157] Split return values when reseting the environment --- skrl/trainers/torch/base.py | 8 ++++---- skrl/trainers/torch/manual.py | 9 +++++---- skrl/trainers/torch/parallel.py | 8 ++++---- skrl/trainers/torch/sequential.py | 8 ++++---- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index c00f1fe0..b58682f5 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -151,7 +151,7 @@ def single_agent_train(self) -> None: assert self.num_agents == 1, "This method is only valid for a single agent" # reset env - states = self.env.reset() + states, infos = self.env.reset() for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): @@ -187,7 +187,7 @@ def single_agent_train(self) -> None: # reset environments with torch.no_grad(): if terminated.any() or truncated.any(): - states = self.env.reset() + states, infos = self.env.reset() else: states.copy_(next_states) @@ -207,7 +207,7 @@ def single_agent_eval(self) -> None: assert self.num_agents == 1, "This method is only valid for a single agent" # reset env - states = self.env.reset() + states, infos = self.env.reset() for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): @@ -237,7 +237,7 @@ def single_agent_eval(self) -> None: # reset environments if terminated.any() or truncated.any(): - states = self.env.reset() + states, infos = self.env.reset() else: states.copy_(next_states) diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 5372f15c..3128b734 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -1,3 +1,4 @@ +from distutils.log import info from typing import Union, List, Optional import copy @@ -87,7 +88,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: # reset env if self.states is None: - self.states = self.env.reset() + self.states, infos = self.env.reset() if self.num_agents == 1: # pre-interaction @@ -151,7 +152,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: # reset environments with torch.no_grad(): if terminated.any() or truncated.any(): - self.states = self.env.reset() + self.states, infos = self.env.reset() else: self.states.copy_(next_states) @@ -187,7 +188,7 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: # reset env if self.states is None: - self.states = self.env.reset() + self.states, infos = self.env.reset() with torch.no_grad(): if self.num_agents == 1: @@ -236,6 +237,6 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: # reset environments if terminated.any() or truncated.any(): - self.states = self.env.reset() + self.states, infos = self.env.reset() else: self.states.copy_(next_states) diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index 366446fc..d33dcde9 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -191,7 +191,7 @@ def train(self) -> None: barrier.wait() # reset env - states = self.env.reset() + states, infos = self.env.reset() if not states.is_cuda: states.share_memory_() @@ -246,7 +246,7 @@ def train(self) -> None: # reset environments with torch.no_grad(): if terminated.any() or truncated.any(): - states = self.env.reset() + states, infos = self.env.reset() if not states.is_cuda: states.share_memory_() else: @@ -326,7 +326,7 @@ def eval(self) -> None: barrier.wait() # reset env - states = self.env.reset() + states, infos = self.env.reset() if not states.is_cuda: states.share_memory_() @@ -372,7 +372,7 @@ def eval(self) -> None: # reset environments if terminated.any() or truncated.any(): - states = self.env.reset() + states, infos = self.env.reset() if not states.is_cuda: states.share_memory_() else: diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index 0348d763..b61d2f21 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -75,7 +75,7 @@ def train(self) -> None: return # reset env - states = self.env.reset() + states, infos = self.env.reset() for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): @@ -115,7 +115,7 @@ def train(self) -> None: # reset environments with torch.no_grad(): if terminated.any() or truncated.any(): - states = self.env.reset() + states, infos = self.env.reset() else: states.copy_(next_states) @@ -145,7 +145,7 @@ def eval(self) -> None: return # reset env - states = self.env.reset() + states, infos = self.env.reset() for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): @@ -177,7 +177,7 @@ def eval(self) -> None: # reset environments if terminated.any() or truncated.any(): - states = self.env.reset() + states, infos = self.env.reset() else: states.copy_(next_states) From f4d69a0b311301fdd03f8e6945147a913a9cc540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 28 Oct 2022 10:33:40 +0200 Subject: [PATCH 037/157] Get truncated values for vectorized environments --- skrl/envs/torch/wrappers.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index a46057ed..eb60c40f 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -377,7 +377,14 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch """ if self._drepecated_api: observation, reward, terminated, info = self._env.step(self._tensor_to_action(actions)) - truncated = info.get("TimeLimit.truncated", False) # https://gymnasium.farama.org/tutorials/handling_time_limits + # truncated: https://gymnasium.farama.org/tutorials/handling_time_limits + if type(info) is list: + truncated = np.array([d.get("TimeLimit.truncated", False) for d in info], dtype=terminated.dtype) + terminated *= np.logical_not(truncated) + else: + truncated = info.get("TimeLimit.truncated", False) + if truncated: + terminated = False else: observation, reward, terminated, truncated, info = self._env.step(self._tensor_to_action(actions)) # convert response to torch From 34b558bdd9ca8461954ff00ecc734df5b9345ccd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 28 Oct 2022 10:42:20 +0200 Subject: [PATCH 038/157] Update agents to handle terminated and truncated values --- skrl/agents/torch/a2c/a2c.py | 27 ++++++++++++---------- skrl/agents/torch/amp/amp.py | 21 +++++++++-------- skrl/agents/torch/cem/cem.py | 23 ++++++++++-------- skrl/agents/torch/ddpg/ddpg.py | 21 ++++++++++------- skrl/agents/torch/dqn/ddqn.py | 21 ++++++++++------- skrl/agents/torch/dqn/dqn.py | 21 ++++++++++------- skrl/agents/torch/ppo/ppo.py | 25 +++++++++++--------- skrl/agents/torch/q_learning/q_learning.py | 19 +++++++++------ skrl/agents/torch/sac/sac.py | 21 ++++++++++------- skrl/agents/torch/sarsa/sarsa.py | 19 +++++++++------ skrl/agents/torch/td3/td3.py | 21 ++++++++++------- skrl/agents/torch/trpo/trpo.py | 23 ++++++++++-------- 12 files changed, 157 insertions(+), 105 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 3c414b46..866a3143 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -158,12 +158,12 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) self.memory.create_tensor(name="values", size=1, dtype=torch.float32) self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self.tensors_names = ["states", "actions", "rewards", "dones", "values", "returns", "advantages"] + self.tensors_names = ["states", "actions", "returns", "advantages"] # create temporary variables needed for storage and computation self._current_next_states = None @@ -196,7 +196,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -210,8 +211,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -219,7 +222,7 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: self._current_next_states = next_states @@ -232,11 +235,11 @@ def record_transition(self, values, _, _ = self.value.act(self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, - values=values) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated, values=values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, - values=values) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated, values=values) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -321,7 +324,7 @@ def compute_gae(rewards: torch.Tensor, values = self.memory.get_tensor_by_name("values") returns, advantages = compute_gae(rewards=self.memory.get_tensor_by_name("rewards"), - dones=self.memory.get_tensor_by_name("dones"), + dones=self.memory.get_tensor_by_name("terminated"), values=values, next_values=last_values, discount_factor=self._discount_factor, @@ -339,7 +342,7 @@ def compute_gae(rewards: torch.Tensor, cumulative_value_loss = 0 # mini-batches loop - for sampled_states, sampled_actions, _, _, _, sampled_returns, sampled_advantages in sampled_batches: + for sampled_states, sampled_actions, sampled_returns, sampled_advantages in sampled_batches: sampled_states = self._state_preprocessor(sampled_states, train=True) diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 5b8915ca..e1e47b86 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -225,7 +225,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) self.memory.create_tensor(name="log_prob", size=1, dtype=torch.float32) self.memory.create_tensor(name="values", size=1, dtype=torch.float32) self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) @@ -234,7 +234,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="amp_states", size=self.amp_observation_space, dtype=torch.float32) self.memory.create_tensor(name="next_values", size=1, dtype=torch.float32) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones", \ + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated", \ "log_prob", "values", "returns", "advantages", "amp_states", "next_values"] # create tensors for motion dataset and reply buffer @@ -284,7 +284,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -298,8 +299,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -311,7 +314,7 @@ def record_transition(self, if self._current_states is not None: states = self._current_states - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: amp_states = infos["amp_obs"] @@ -329,10 +332,10 @@ def record_transition(self, next_values = self._value_preprocessor(next_values, inverse=True) next_values *= infos['terminate'].view(-1, 1).logical_not() - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, amp_states=amp_states, next_values=next_values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, amp_states=amp_states, next_values=next_values) def pre_interaction(self, timestep: int, timesteps: int) -> None: @@ -429,7 +432,7 @@ def compute_gae(rewards: torch.Tensor, values = self.memory.get_tensor_by_name("values") next_values=self.memory.get_tensor_by_name("next_values") returns, advantages = compute_gae(rewards=combined_rewards, - dones=self.memory.get_tensor_by_name("dones"), + dones=self.memory.get_tensor_by_name("terminated"), values=values, next_values=next_values, discount_factor=self._discount_factor, diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 8a02c7a2..274ade1a 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -133,9 +133,9 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.int64) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"] + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy @@ -165,7 +165,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -179,8 +180,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -188,20 +191,22 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) if self.memory is not None: - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) # track episodes internally if self._rollout: - indexes = torch.nonzero(dones) + indexes = torch.nonzero(terminated + truncated) if indexes.numel(): for i in indexes[:, 0]: self._episode_tracking[i.item()].append(self._rollout + 1) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 1557d00d..6f9bfd64 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -164,9 +164,9 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"] + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] # clip noise bounds self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device) @@ -241,7 +241,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -255,8 +256,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -264,16 +267,18 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 02b252eb..b973b2a6 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -158,9 +158,9 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.int64) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"] + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy @@ -203,7 +203,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -217,8 +218,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -226,16 +229,18 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index c2171247..4927e2f8 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -158,9 +158,9 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.int64) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"] + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy @@ -203,7 +203,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -217,8 +218,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -226,16 +229,18 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 35885119..6b95f172 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -173,13 +173,13 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) self.memory.create_tensor(name="log_prob", size=1, dtype=torch.float32) self.memory.create_tensor(name="values", size=1, dtype=torch.float32) self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self.tensors_names = ["states", "actions", "rewards", "dones", "log_prob", "values", "returns", "advantages"] + self.tensors_names = ["states", "actions", "rewards", "terminated", "log_prob", "values", "returns", "advantages"] # create temporary variables needed for storage and computation self._current_log_prob = None @@ -216,7 +216,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -230,8 +231,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -239,7 +242,7 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: self._current_next_states = next_states @@ -252,11 +255,11 @@ def record_transition(self, values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, - log_prob=self._current_log_prob, values=values) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, - log_prob=self._current_log_prob, values=values) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -341,7 +344,7 @@ def compute_gae(rewards: torch.Tensor, values = self.memory.get_tensor_by_name("values") returns, advantages = compute_gae(rewards=self.memory.get_tensor_by_name("rewards"), - dones=self.memory.get_tensor_by_name("dones"), + dones=self.memory.get_tensor_by_name("terminated"), values=values, next_values=last_values, discount_factor=self._discount_factor, diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 15beb8c8..3d987bc7 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -126,7 +126,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -140,8 +141,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -149,7 +152,7 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) # reward shaping if self._rewards_shaper is not None: @@ -159,12 +162,14 @@ def record_transition(self, self._current_actions = actions self._current_rewards = rewards self._current_next_states = next_states - self._current_dones = dones + self._current_dones = terminated + truncated if self.memory is not None: - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index f48f61c3..80daaf77 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -177,9 +177,9 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"] + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy @@ -209,7 +209,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -223,8 +224,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -232,16 +235,18 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index c0243f55..ff532154 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -126,7 +126,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -140,8 +141,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -149,7 +152,7 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) # reward shaping if self._rewards_shaper is not None: @@ -159,12 +162,14 @@ def record_transition(self, self._current_actions = actions self._current_rewards = rewards self._current_next_states = next_states - self._current_dones = dones + self._current_dones = terminated + truncated if self.memory is not None: - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index f1de6330..abde68b4 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -182,9 +182,9 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="next_states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "dones"] + self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] # clip noise bounds self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device) @@ -260,7 +260,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -274,8 +275,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -283,16 +286,18 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: # reward shaping if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index ec76f085..0fd1c85a 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -170,7 +170,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="states", size=self.observation_space, dtype=torch.float32) self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) - self.memory.create_tensor(name="dones", size=1, dtype=torch.bool) + self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) self.memory.create_tensor(name="log_prob", size=1, dtype=torch.float32) self.memory.create_tensor(name="values", size=1, dtype=torch.float32) self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) @@ -213,7 +213,8 @@ def record_transition(self, actions: torch.Tensor, rewards: torch.Tensor, next_states: torch.Tensor, - dones: torch.Tensor, + terminated: torch.Tensor, + truncated: torch.Tensor, infos: Any, timestep: int, timesteps: int) -> None: @@ -227,8 +228,10 @@ def record_transition(self, :type rewards: torch.Tensor :param next_states: Next observations/states of the environment :type next_states: torch.Tensor - :param dones: Signals to indicate that episodes have ended - :type dones: torch.Tensor + :param terminated: Signals to indicate that episodes have terminated + :type terminated: torch.Tensor + :param truncated: Signals to indicate that episodes have been truncated + :type truncated: torch.Tensor :param infos: Additional information about the environment :type infos: Any type supported by the environment :param timestep: Current timestep @@ -236,7 +239,7 @@ def record_transition(self, :param timesteps: Number of timesteps :type timesteps: int """ - super().record_transition(states, actions, rewards, next_states, dones, infos, timestep, timesteps) + super().record_transition(states, actions, rewards, next_states, terminated, truncated, infos, timestep, timesteps) if self.memory is not None: self._current_next_states = next_states @@ -249,11 +252,11 @@ def record_transition(self, values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") values = self._value_preprocessor(values, inverse=True) - self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, - log_prob=self._current_log_prob, values=values) + self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) for memory in self.secondary_memories: - memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, dones=dones, - log_prob=self._current_log_prob, values=values) + memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -452,7 +455,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor values = self.memory.get_tensor_by_name("values") returns, advantages = compute_gae(rewards=self.memory.get_tensor_by_name("rewards"), - dones=self.memory.get_tensor_by_name("dones"), + dones=self.memory.get_tensor_by_name("terminated"), values=values, next_values=last_values, discount_factor=self._discount_factor, From b4b8dc20f611498450ef3474e1ae19f7d6b7d511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 28 Oct 2022 10:54:23 +0200 Subject: [PATCH 039/157] Update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71048128..a884847f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Weights & Biases integration (by @juhannc) - Support for Gymnasium interface +### Changed +- Adopt the implementation of `terminated` and `truncated` over `done` for all environments + ### Fixed - Omniverse Isaac Gym simulation speed for the Franka Emika real-world example - Call agents' method `record_transition` instead of parent method From 425ee0ed1a151d1b5086f594f213e1060cb0fe2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 4 Nov 2022 10:46:39 +0100 Subject: [PATCH 040/157] Group model inputs in a dictionary --- skrl/models/torch/base.py | 53 +++++++++------------- skrl/models/torch/categorical.py | 28 +++++------- skrl/models/torch/deterministic.py | 26 +++++------ skrl/models/torch/gaussian.py | 28 +++++------- skrl/models/torch/multivariate_gaussian.py | 28 +++++------- skrl/models/torch/tabular.py | 24 ++++------ 6 files changed, 79 insertions(+), 108 deletions(-) diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index d860b359..c0262e46 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -46,8 +46,8 @@ def __init__(self, observation_space, action_space, device="cuda:0"): self.layer_1 = nn.Linear(self.num_observations, 64) self.layer_2 = nn.Linear(64, self.num_actions) - def act(self, states, taken_actions=None, role=""): - x = F.relu(self.layer_1(states)) + def act(self, inputs, role=""): + x = F.relu(self.layer_1(inputs["states"])) x = F.relu(self.layer_2(x)) return x """ @@ -199,17 +199,14 @@ def tensor_to_space(self, return output raise ValueError("Space type {} not supported".format(type(space))) - def random_act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def random_act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act randomly according to the action space - :param states: Observation/state of the environment used to get the shape of the action space - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -220,7 +217,7 @@ def random_act(self, """ # discrete action space (Discrete) if issubclass(type(self.action_space), gym.spaces.Discrete) or issubclass(type(self.action_space), gymnasium.spaces.Discrete): - return torch.randint(self.action_space.n, (states.shape[0], 1), device=self.device), None, None + return torch.randint(self.action_space.n, (inputs["states"].shape[0], 1), device=self.device), None, None # continuous action space (Box) elif issubclass(type(self.action_space), gym.spaces.Box) or issubclass(type(self.action_space), gymnasium.spaces.Box): if self._random_distribution is None: @@ -228,7 +225,7 @@ def random_act(self, low=torch.tensor(self.action_space.low[0], device=self.device, dtype=torch.float32), high=torch.tensor(self.action_space.high[0], device=self.device, dtype=torch.float32)) - return self._random_distribution.sample(sample_shape=(states.shape[0], self.num_actions)), None, None + return self._random_distribution.sample(sample_shape=(inputs["states"].shape[0], self.num_actions)), None, None else: raise NotImplementedError("Action space type ({}) not supported".format(type(self.action_space))) @@ -296,17 +293,14 @@ def forward(self): """ raise NotImplementedError("Implement .act() and .compute() methods instead of this") - def compute(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]: + def compute(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]: """Define the computation performed (to be implemented by the inheriting classes) by the models - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -317,21 +311,18 @@ def compute(self, """ raise NotImplementedError("The computation performed by the models (.compute()) is not implemented") - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act according to the specified behavior (to be implemented by the inheriting classes) Agents will call this method to obtain the decision to be taken given the state of the environment. This method is currently implemented by the helper models (**GaussianModel**, etc.). The classes that inherit from the latter must only implement the ``.compute()`` method - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py index ad5fa4da..f768b15f 100644 --- a/skrl/models/torch/categorical.py +++ b/skrl/models/torch/categorical.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Mapping, Sequence import torch from torch.distributions import Categorical @@ -34,8 +34,8 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: ... nn.ELU(), ... nn.Linear(32, self.num_actions)) ... - ... def compute(self, states, taken_actions, role): - ... return self.net(states) + ... def compute(self, inputs, role): + ... return self.net(inputs["states"]) ... >>> # given an observation_space: gym.spaces.Box with shape (4,) >>> # and an action_space: gym.spaces.Discrete with n = 2 @@ -60,17 +60,14 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: self._c_distribution = {} self._c_distribution[role] = None - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act stochastically in response to the state of the environment - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -81,13 +78,12 @@ def act(self, Example:: >>> # given a batch of sample states with shape (4096, 4) - >>> action, log_prob, net_output = model.act(states) + >>> action, log_prob, net_output = model.act({"states": states}) >>> print(action.shape, log_prob.shape, net_output.shape) torch.Size([4096, 1]) torch.Size([4096, 1]) torch.Size([4096, 2]) """ # map from states/observations to normalized probabilities or unnormalized log probabilities - output = self.compute(states.to(self.device), - taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) + output = self.compute(inputs, role) # unnormalized log probabilities if self._c_unnormalized_log_prob[role] if role in self._c_unnormalized_log_prob else self._c_unnormalized_log_prob[""]: @@ -98,7 +94,7 @@ def act(self, # actions and log of the probability density function actions = self._c_distribution[role].sample() - log_prob = self._c_distribution[role].log_prob(actions if taken_actions is None else taken_actions.view(-1)) + log_prob = self._c_distribution[role].log_prob(inputs.get("taken_actions", actions).view(-1)) return actions.unsqueeze(-1), log_prob.unsqueeze(-1), output diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py index 486fb75d..dfeee90f 100644 --- a/skrl/models/torch/deterministic.py +++ b/skrl/models/torch/deterministic.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Mapping, Sequence import gym import gymnasium @@ -33,8 +33,8 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: ... nn.ELU(), ... nn.Linear(32, 1)) ... - ... def compute(self, states, taken_actions, role): - ... return self.net(states) + ... def compute(self, inputs, role): + ... return self.net(inputs["states"]) ... >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) @@ -63,17 +63,14 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: # backward compatibility: torch < 1.9 clamp method does not support tensors self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9) - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act deterministically in response to the state of the environment - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -84,13 +81,12 @@ def act(self, Example:: >>> # given a batch of sample states with shape (4096, 60) - >>> output = model.act(states) + >>> output = model.act({"states": states}) >>> print(output[0].shape, output[1], output[2]) torch.Size([4096, 1]) None None """ # map from observations/states to actions - actions = self.compute(states.to(self.device), - taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) + actions = self.compute(inputs, role) # clip actions if self._d_clip_actions[role] if role in self._d_clip_actions else self._d_clip_actions[""]: diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py index c7120292..925047f6 100644 --- a/skrl/models/torch/gaussian.py +++ b/skrl/models/torch/gaussian.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Mapping, Sequence import gym import gymnasium @@ -54,8 +54,8 @@ def __init__(self, ... nn.Linear(32, self.num_actions)) ... self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) ... - ... def compute(self, states, taken_actions, role): - ... return self.net(states), self.log_std_parameter + ... def compute(self, inputs, role): + ... return self.net(inputs["states"]), self.log_std_parameter ... >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) @@ -111,17 +111,14 @@ def __init__(self, self._g_reduction[role] = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \ else torch.prod if reduction == "prod" else None - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act stochastically in response to the state of the environment - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -132,13 +129,12 @@ def act(self, Example:: >>> # given a batch of sample states with shape (4096, 60) - >>> action, log_prob, mean_action = model.act(states) + >>> action, log_prob, mean_action = model.act({"states": states}) >>> print(action.shape, log_prob.shape, mean_action.shape) torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8]) """ # map from states/observations to mean actions and log standard deviations - actions_mean, log_std = self.compute(states.to(self.device), - taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) + actions_mean, log_std = self.compute(inputs, role) # clamp log standard deviations if self._g_clip_log_std[role] if role in self._g_clip_log_std else self._g_clip_log_std[""]: @@ -163,7 +159,7 @@ def act(self, actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max) # log of the probability density function - log_prob = self._g_distribution[role].log_prob(actions if taken_actions is None else taken_actions) + log_prob = self._g_distribution[role].log_prob(inputs.get("taken_actions", actions)) reduction = self._g_reduction[role] if role in self._g_reduction else self._g_reduction[""] if reduction is not None: log_prob = reduction(log_prob, dim=-1) diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py index 06212d34..4846e91b 100644 --- a/skrl/models/torch/multivariate_gaussian.py +++ b/skrl/models/torch/multivariate_gaussian.py @@ -1,4 +1,4 @@ -from typing import Optional, Sequence +from typing import Mapping, Sequence import gym import gymnasium @@ -47,8 +47,8 @@ def __init__(self, ... nn.Linear(32, self.num_actions)) ... self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) ... - ... def compute(self, states, taken_actions, role): - ... return self.net(states), self.log_std_parameter + ... def compute(self, inputs, role): + ... return self.net(inputs["states"]), self.log_std_parameter ... >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) @@ -97,17 +97,14 @@ def __init__(self, self._mg_distribution = {} self._mg_distribution[role] = None - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act stochastically in response to the state of the environment - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -118,13 +115,12 @@ def act(self, Example:: >>> # given a batch of sample states with shape (4096, 60) - >>> action, log_prob, mean_action = model.act(states) + >>> action, log_prob, mean_action = model.act({"states": states}) >>> print(action.shape, log_prob.shape, mean_action.shape) torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8]) """ # map from states/observations to mean actions and log standard deviations - actions_mean, log_std = self.compute(states.to(self.device), - taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) + actions_mean, log_std = self.compute(inputs, role) # clamp log standard deviations if self._mg_clip_log_std[role] if role in self._mg_clip_log_std else self._mg_clip_log_std[""]: @@ -150,7 +146,7 @@ def act(self, actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max) # log of the probability density function - log_prob = self._mg_distribution[role].log_prob(actions if taken_actions is None else taken_actions) + log_prob = self._mg_distribution[role].log_prob(inputs.get("taken_actions", actions)) if log_prob.dim() != actions.dim(): log_prob = log_prob.unsqueeze(-1) diff --git a/skrl/models/torch/tabular.py b/skrl/models/torch/tabular.py index 36eb2d76..dc80c1d8 100644 --- a/skrl/models/torch/tabular.py +++ b/skrl/models/torch/tabular.py @@ -28,8 +28,8 @@ def __init__(self, num_envs: int = 1, role: str = "") -> None: ... self.table = torch.ones((num_envs, self.num_observations, self.num_actions), ... dtype=torch.float32, device=self.device) ... - ... def compute(self, states, taken_actions, role): - ... actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], + ... def compute(self, inputs, role): + ... actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], ... dim=-1, keepdim=True).view(-1,1) ... >>> # given an observation_space: gym.spaces.Discrete with n=100 @@ -69,17 +69,14 @@ def _get_tensor_names(self) -> Sequence[str]: tensors.append(attr) return sorted(tensors) - def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: """Act in response to the state of the environment - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: Mapping[str, torch.Tensor] :param role: Role play by the model (default: ``""``) :type role: str, optional @@ -90,12 +87,11 @@ def act(self, Example:: >>> # given a batch of sample states with shape (1, 100) - >>> output = model.act(states) + >>> output = model.act({"states": states}) >>> print(output[0], output[1], output[2]) tensor([[3]], device='cuda:0') None None """ - actions = self.compute(states.to(self.device), - taken_actions.to(self.device) if taken_actions is not None else taken_actions, role) + actions = self.compute(inputs, role) return actions, None, None def table(self) -> torch.Tensor: From 17ad69edfafc212ab3ce6febac0ee87729aee28e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 4 Nov 2022 11:39:50 +0100 Subject: [PATCH 041/157] Forward model inputs as dict --- skrl/agents/torch/a2c/a2c.py | 12 ++++++------ skrl/agents/torch/amp/amp.py | 20 ++++++++++---------- skrl/agents/torch/cem/cem.py | 6 +++--- skrl/agents/torch/ddpg/ddpg.py | 14 +++++++------- skrl/agents/torch/dqn/ddqn.py | 15 +++++++-------- skrl/agents/torch/dqn/dqn.py | 10 +++++----- skrl/agents/torch/ppo/ppo.py | 12 ++++++------ skrl/agents/torch/q_learning/q_learning.py | 4 ++-- skrl/agents/torch/sac/sac.py | 20 ++++++++++---------- skrl/agents/torch/sarsa/sarsa.py | 6 +++--- skrl/agents/torch/td3/td3.py | 18 +++++++++--------- skrl/agents/torch/trpo/trpo.py | 16 ++++++++-------- 12 files changed, 76 insertions(+), 77 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 866a3143..9bde3218 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -186,10 +186,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - return self.policy.act(states, taken_actions=None, role="policy") + return self.policy.act({"states": states}, role="policy") def record_transition(self, states: torch.Tensor, @@ -232,7 +232,7 @@ def record_transition(self, rewards = self._rewards_shaper(rewards, timestep, timesteps) with torch.no_grad(): - values, _, _ = self.value.act(self._state_preprocessor(states), taken_actions=None, role="value") + values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") values = self._value_preprocessor(values, inverse=True) self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, @@ -319,7 +319,7 @@ def compute_gae(rewards: torch.Tensor, # compute returns and advantages with torch.no_grad(): - last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value") + last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float())}, role="value") last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") @@ -346,7 +346,7 @@ def compute_gae(rewards: torch.Tensor, sampled_states = self._state_preprocessor(sampled_states, train=True) - _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy") + _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions}, role="policy") # compute entropy loss if self._entropy_loss_scale: @@ -358,7 +358,7 @@ def compute_gae(rewards: torch.Tensor, policy_loss = -(sampled_advantages * next_log_prob).mean() # compute value loss - predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") value_loss = F.mse_loss(sampled_returns, predicted_values) diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index e1e47b86..79fb4018 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -271,10 +271,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - actions, log_prob, actions_mean = self.policy.act(states, taken_actions=None, role="policy") + actions, log_prob, actions_mean = self.policy.act({"states": states}, role="policy") self._current_log_prob = log_prob return actions, log_prob, actions_mean @@ -324,11 +324,11 @@ def record_transition(self, rewards = self._rewards_shaper(rewards, timestep, timesteps) with torch.no_grad(): - values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") + values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") values = self._value_preprocessor(values, inverse=True) with torch.no_grad(): - next_values, _, _ = self.value.act(states=self._state_preprocessor(next_states), taken_actions=None, role="value") + next_values, _, _ = self.value.act({"states": self._state_preprocessor(next_states)}, role="value") next_values = self._value_preprocessor(next_values, inverse=True) next_values *= infos['terminate'].view(-1, 1).logical_not() @@ -422,7 +422,7 @@ def compute_gae(rewards: torch.Tensor, amp_states = self.memory.get_tensor_by_name("amp_states") with torch.no_grad(): - amp_logits, _, _ = self.discriminator.act(self._amp_state_preprocessor(amp_states), taken_actions=None, role="discriminator") + amp_logits, _, _ = self.discriminator.act({"states": self._amp_state_preprocessor(amp_states)}, role="discriminator") style_reward = -torch.log(torch.maximum(1 - 1 / (1 + torch.exp(-amp_logits)), torch.tensor(0.0001, device=self.device))) style_reward *= self._discriminator_reward_scale @@ -469,7 +469,7 @@ def compute_gae(rewards: torch.Tensor, sampled_states = self._state_preprocessor(sampled_states, train=True) - _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy") + _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions}, role="policy") # compute entropy loss if self._entropy_loss_scale: @@ -485,7 +485,7 @@ def compute_gae(rewards: torch.Tensor, policy_loss = -torch.min(surrogate, surrogate_clipped).mean() # compute value loss - predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") if self._clip_predicted_values: predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, @@ -506,9 +506,9 @@ def compute_gae(rewards: torch.Tensor, sampled_amp_motion_states = self._amp_state_preprocessor(sampled_motion_batches[batch_index][0], train=True) sampled_amp_motion_states.requires_grad_(True) - amp_logits, _, _ = self.discriminator.act(states=sampled_amp_states, taken_actions=None, role="discriminator") - amp_replay_logits, _, _ = self.discriminator.act(states=sampled_amp_replay_states, taken_actions=None, role="discriminator") - amp_motion_logits, _, _ = self.discriminator.act(states=sampled_amp_motion_states, taken_actions=None, role="discriminator") + amp_logits, _, _ = self.discriminator.act({"states": sampled_amp_states}, role="discriminator") + amp_replay_logits, _, _ = self.discriminator.act({"states": sampled_amp_replay_states}, role="discriminator") + amp_motion_logits, _, _ = self.discriminator.act({"states": sampled_amp_motion_states}, role="discriminator") amp_cat_logits = torch.cat([amp_logits, amp_replay_logits], dim=0) diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 274ade1a..11152e5a 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -155,10 +155,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - return self.policy.act(states, taken_actions=None, role="policy") + return self.policy.act({"states": states}, role="policy") def record_transition(self, states: torch.Tensor, @@ -276,7 +276,7 @@ def _update(self, timestep: int, timesteps: int) -> None: elite_actions = torch.cat([sampled_actions[limits[i][0]:limits[i][1]] for i in indexes[:, 0]], dim=0) # compute scores for the elite states - scores = self.policy.act(elite_states, taken_actions=None, role="policy")[2] + scores = self.policy.act({"states": elite_states}, role="policy")[2] # compute policy loss policy_loss = F.cross_entropy(scores, elite_actions.view(-1)) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 6f9bfd64..586b7172 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -192,10 +192,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample deterministic actions - actions = self.policy.act(states, taken_actions=None, role="policy") + actions = self.policy.act({"states": states}, role="policy") # add exloration noise if self._exploration_noise is not None: @@ -324,13 +324,13 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): - next_actions, _, _ = self.target_policy.act(states=sampled_next_states, taken_actions=None, role="target_policy") + next_actions, _, _ = self.target_policy.act({"states": sampled_next_states}, role="target_policy") - target_q_values, _, _ = self.target_critic.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic") + target_q_values, _, _ = self.target_critic.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic") target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss - critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=sampled_actions, role="critic") + critic_values, _, _ = self.critic.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic") critic_loss = F.mse_loss(critic_values, target_values) @@ -340,8 +340,8 @@ def _update(self, timestep: int, timesteps: int) -> None: self.critic_optimizer.step() # compute policy (actor) loss - actions, _, _ = self.policy.act(states=sampled_states, taken_actions=None, role="policy") - critic_values, _, _ = self.critic.act(states=sampled_states, taken_actions=actions, role="critic") + actions, _, _ = self.policy.act({"states": sampled_states}, role="policy") + critic_values, _, _ = self.critic.act({"states": sampled_states, "taken_actions": actions}, role="critic") policy_loss = -critic_values.mean() diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index b973b2a6..5b22384d 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -178,10 +178,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens states = self._state_preprocessor(states) if not self._exploration_timesteps: - return torch.argmax(self.q_network.act(states, taken_actions=None, role="q_network")[0], dim=1, keepdim=True), None, None + return torch.argmax(self.q_network.act({"states": states}, role="q_network")[0], dim=1, keepdim=True), None, None # sample random actions - actions = self.q_network.random_act(states, taken_actions=None, role="q_network")[0] + actions = self.q_network.random_act({"states": states}, role="q_network")[0] if timestep < self._random_timesteps: return actions, None, None @@ -191,7 +191,7 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens indexes = (torch.rand(states.shape[0], device=self.device) >= epsilon).nonzero().view(-1) if indexes.numel(): - actions[indexes] = torch.argmax(self.q_network.act(states[indexes], taken_actions=None, role="q_network")[0], dim=1, keepdim=True) + actions[indexes] = torch.argmax(self.q_network.act({"states": states[indexes]}, role="q_network")[0], dim=1, keepdim=True) # record epsilon self.track_data("Exploration / Exploration epsilon", epsilon) @@ -286,15 +286,14 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): - next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states, taken_actions=None, role="target_q_network") + next_q_values, _, _ = self.target_q_network.act({"states": sampled_next_states}, role="target_q_network") - target_q_values = torch.gather(next_q_values, dim=1, index=torch.argmax(self.q_network.act(states=sampled_next_states, \ - taken_actions=None, role="q_network")[0], dim=1, keepdim=True)) + target_q_values = torch.gather(next_q_values, dim=1, index=torch.argmax(self.q_network.act({"states": sampled_next_states}, \ + role="q_network")[0], dim=1, keepdim=True)) target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute Q-network loss - q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], - dim=1, index=sampled_actions.long()) + q_values = torch.gather(self.q_network.act({"states": sampled_states}, role="q_network")[0], dim=1, index=sampled_actions.long()) q_network_loss = F.mse_loss(q_values, target_values) diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 4927e2f8..fce5c61b 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -178,10 +178,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens states = self._state_preprocessor(states) if not self._exploration_timesteps: - return torch.argmax(self.q_network.act(states, taken_actions=None, role="q_network")[0], dim=1, keepdim=True), None, None + return torch.argmax(self.q_network.act({"states": states}, role="q_network")[0], dim=1, keepdim=True), None, None # sample random actions - actions = self.q_network.random_act(states, taken_actions=None, role="q_network")[0] + actions = self.q_network.random_act({"states": states}, role="q_network")[0] if timestep < self._random_timesteps: return actions, None, None @@ -191,7 +191,7 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens indexes = (torch.rand(states.shape[0], device=self.device) >= epsilon).nonzero().view(-1) if indexes.numel(): - actions[indexes] = torch.argmax(self.q_network.act(states[indexes], taken_actions=None, role="q_network")[0], dim=1, keepdim=True) + actions[indexes] = torch.argmax(self.q_network.act({"states": states[indexes]}, role="q_network")[0], dim=1, keepdim=True) # record epsilon self.track_data("Exploration / Exploration epsilon", epsilon) @@ -286,13 +286,13 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): - next_q_values, _, _ = self.target_q_network.act(states=sampled_next_states, taken_actions=None, role="target_q_network") + next_q_values, _, _ = self.target_q_network.act({"states": sampled_next_states}, role="target_q_network") target_q_values = torch.max(next_q_values, dim=-1, keepdim=True)[0] target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute Q-network loss - q_values = torch.gather(self.q_network.act(states=sampled_states, taken_actions=None, role="q_network")[0], + q_values = torch.gather(self.q_network.act({"states": sampled_states}, role="q_network")[0], dim=1, index=sampled_actions.long()) q_network_loss = F.mse_loss(q_values, target_values) diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 6b95f172..2654f0e1 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -203,10 +203,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - actions, log_prob, actions_mean = self.policy.act(states, taken_actions=None, role="policy") + actions, log_prob, actions_mean = self.policy.act({"states": states}, role="policy") self._current_log_prob = log_prob return actions, log_prob, actions_mean @@ -252,7 +252,7 @@ def record_transition(self, rewards = self._rewards_shaper(rewards, timestep, timesteps) with torch.no_grad(): - values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") + values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") values = self._value_preprocessor(values, inverse=True) self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, @@ -339,7 +339,7 @@ def compute_gae(rewards: torch.Tensor, # compute returns and advantages with torch.no_grad(): - last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value") + last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float())}, role="value") last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") @@ -371,7 +371,7 @@ def compute_gae(rewards: torch.Tensor, sampled_states = self._state_preprocessor(sampled_states, train=not epoch) - _, next_log_prob, _ = self.policy.act(states=sampled_states, taken_actions=sampled_actions, role="policy") + _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions}, role="policy") # compute aproximate KL divergence with torch.no_grad(): @@ -397,7 +397,7 @@ def compute_gae(rewards: torch.Tensor, policy_loss = -torch.min(surrogate, surrogate_clipped).mean() # compute value loss - predicted_values, _, _ = self.value.act(states=sampled_states, taken_actions=None, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") if self._clip_predicted_values: predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 3d987bc7..d3622310 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -116,10 +116,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens """ # sample random actions if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample actions from policy - return self.policy.act(states, taken_actions=None, role="policy") + return self.policy.act({"states": states}, role="policy") def record_transition(self, states: torch.Tensor, diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 80daaf77..eef551c6 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -199,10 +199,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - return self.policy.act(states, taken_actions=None, role="policy") + return self.policy.act({"states": states}, role="policy") def record_transition(self, states: torch.Tensor, @@ -292,16 +292,16 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): - next_actions, next_log_prob, _ = self.policy.act(states=sampled_next_states, taken_actions=None, role="policy") + next_actions, next_log_prob, _ = self.policy.act({"states": sampled_next_states}, role="policy") - target_q1_values, _, _ = self.target_critic_1.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_1") - target_q2_values, _, _ = self.target_critic_2.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_2") + target_q1_values, _, _ = self.target_critic_1.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_1") + target_q2_values, _, _ = self.target_critic_2.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_2") target_q_values = torch.min(target_q1_values, target_q2_values) - self._entropy_coefficient * next_log_prob target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss - critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions, role="critic_1") - critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions, role="critic_2") + critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_1") + critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_2") critic_loss = (F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values)) / 2 @@ -311,9 +311,9 @@ def _update(self, timestep: int, timesteps: int) -> None: self.critic_optimizer.step() # compute policy (actor) loss - actions, log_prob, _ = self.policy.act(states=sampled_states, taken_actions=None, role="policy") - critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=actions, role="critic_1") - critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=actions, role="critic_2") + actions, log_prob, _ = self.policy.act({"states": sampled_states}, role="policy") + critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": actions}, role="critic_1") + critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": actions}, role="critic_2") policy_loss = (self._entropy_coefficient * log_prob - torch.min(critic_1_values, critic_2_values)).mean() diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index ff532154..fedbd623 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -116,10 +116,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens """ # sample random actions if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample actions from policy - return self.policy.act(states, taken_actions=None, role="policy") + return self.policy.act({"states": states}, role="policy") def record_transition(self, states: torch.Tensor, @@ -207,7 +207,7 @@ def _update(self, timestep: int, timesteps: int) -> None: env_ids = torch.arange(self._current_rewards.shape[0]).view(-1, 1) # compute next actions - next_actions = self.policy.act(self._current_next_states, taken_actions=None, role="policy")[0] + next_actions = self.policy.act({"states": self._current_next_states}, role="policy")[0] # update Q-table q_table[env_ids, self._current_states, self._current_actions] += self._learning_rate \ diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index abde68b4..4616181f 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -210,10 +210,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample deterministic actions - actions = self.policy.act(states, taken_actions=None, role="policy") + actions = self.policy.act({"states": states}, role="policy") # add noise if self._exploration_noise is not None: @@ -343,7 +343,7 @@ def _update(self, timestep: int, timesteps: int) -> None: with torch.no_grad(): # target policy smoothing - next_actions, _, _ = self.target_policy.act(states=sampled_next_states, taken_actions=None, role="target_policy") + next_actions, _, _ = self.target_policy.act({"states": sampled_next_states}, role="target_policy") noises = torch.clamp(self._smooth_regularization_noise.sample(next_actions.shape), min=-self._smooth_regularization_clip, max=self._smooth_regularization_clip) @@ -355,14 +355,14 @@ def _update(self, timestep: int, timesteps: int) -> None: next_actions.clamp_(min=self.clip_actions_min, max=self.clip_actions_max) # compute target values - target_q1_values, _, _ = self.target_critic_1.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_1") - target_q2_values, _, _ = self.target_critic_2.act(states=sampled_next_states, taken_actions=next_actions, role="target_critic_2") + target_q1_values, _, _ = self.target_critic_1.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_1") + target_q2_values, _, _ = self.target_critic_2.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_2") target_q_values = torch.min(target_q1_values, target_q2_values) target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss - critic_1_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=sampled_actions, role="critic_1") - critic_2_values, _, _ = self.critic_2.act(states=sampled_states, taken_actions=sampled_actions, role="critic_2") + critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_1") + critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_2") critic_loss = F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values) @@ -376,8 +376,8 @@ def _update(self, timestep: int, timesteps: int) -> None: if not self._critic_update_counter % self._policy_delay: # compute policy (actor) loss - actions, _, _ = self.policy.act(states=sampled_states, taken_actions=None, role="policy") - critic_values, _, _ = self.critic_1.act(states=sampled_states, taken_actions=actions, role="critic_1") + actions, _, _ = self.policy.act({"states": sampled_states}, role="policy") + critic_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": actions}, role="critic_1") policy_loss = -critic_values.mean() diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 0fd1c85a..e3ea0ee4 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -200,10 +200,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act(states, taken_actions=None, role="policy") + return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - actions, log_prob, actions_mean = self.policy.act(states, taken_actions=None, role="policy") + actions, log_prob, actions_mean = self.policy.act({"states": states}, role="policy") self._current_log_prob = log_prob return actions, log_prob, actions_mean @@ -249,7 +249,7 @@ def record_transition(self, rewards = self._rewards_shaper(rewards, timestep, timesteps) with torch.no_grad(): - values, _, _ = self.value.act(states=self._state_preprocessor(states), taken_actions=None, role="value") + values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") values = self._value_preprocessor(values, inverse=True) self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, @@ -353,7 +353,7 @@ def surrogate_loss(policy: Model, :return: Surrogate loss :rtype: torch.Tensor """ - _, new_log_prob, _ = policy.act(states, taken_actions=actions, role="policy") + _, new_log_prob, _ = policy.act({"states": states, "taken_actions": actions}, role="policy") return (advantages * torch.exp(new_log_prob - log_prob.detach())).mean() def conjugate_gradient(policy: Model, @@ -437,11 +437,11 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor :return: KL divergence :rtype: torch.Tensor """ - _, _, mu_1 = policy_1.act(states, taken_actions=None, role="policy") + _, _, mu_1 = policy_1.act({"states": states}, role="policy") logstd_1 = policy_1.get_log_std(role="policy") mu_1, logstd_1 = mu_1.detach(), logstd_1.detach() - _, _, mu_2 = policy_2.act(states, taken_actions=None, role="policy") + _, _, mu_2 = policy_2.act({"states": states}, role="policy") logstd_2 = policy_2.get_log_std(role="policy") kl = logstd_1 - logstd_2 + 0.5 * (torch.square(logstd_1.exp()) + torch.square(mu_1 - mu_2)) \ @@ -450,7 +450,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor # compute returns and advantages with torch.no_grad(): - last_values, _, _ = self.value.act(self._state_preprocessor(self._current_next_states.float()), taken_actions=None, role="value") + last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float())}, role="value") last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") @@ -517,7 +517,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor self.policy.update_parameters(self.backup_policy) # compute value loss - predicted_values, _, _ = self.value.act(sampled_states, taken_actions=None, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values) From 75666c9125c3433ffb961c96d561b18c1fe24f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 11:46:29 +0100 Subject: [PATCH 042/157] Allow to create tensors with their original number of dimensions --- skrl/memories/torch/base.py | 43 +++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 14 deletions(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index 11f4f35b..19509fb7 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -55,6 +55,7 @@ def __init__(self, self.tensors = {} self.tensors_view = {} + self.tensors_keep_dimensions = {} # exporting data self.export = export @@ -75,34 +76,42 @@ def __len__(self) -> int: """ return self.memory_size * self.num_envs if self.filled else self.memory_index * self.num_envs + self.env_index - def _get_space_size(self, space: Union[int, Tuple[int], gym.Space, gymnasium.Space]) -> int: + def _get_space_size(self, + space: Union[int, Tuple[int], gym.Space, gymnasium.Space], + keep_dimensions: bool = False) -> Union[Tuple, int]: """Get the size (number of elements) of a space :param space: Space or shape from which to obtain the number of elements :type space: int, tuple or list of integers, gym.Space, or gymnasium.Space + :param keep_dimensions: Whether or not to keep the space dimensions (default: False) + :type keep_dimensions: bool :raises ValueError: If the space is not supported - :return: Size of the space data - :rtype: Space size (number of elements) + :return: Size of the space. If keep_dimensions is True, the space size will be a tuple + :rtype: int or tuple of int """ if type(space) in [int, float]: - return int(space) + return (int(space),) if keep_dimensions else int(space) elif type(space) in [tuple, list]: - return np.prod(space) + return tuple(space) if keep_dimensions else np.prod(space) elif issubclass(type(space), gym.Space): if issubclass(type(space), gym.spaces.Discrete): - return 1 + return (1,) if keep_dimensions else 1 elif issubclass(type(space), gym.spaces.Box): - return np.prod(space.shape) + return tuple(space.shape) if keep_dimensions else np.prod(space.shape) elif issubclass(type(space), gym.spaces.Dict): + if keep_dimensions: + raise ValueError("keep_dimensions=True cannot be used with Dict spaces") return sum([self._get_space_size(space.spaces[key]) for key in space.spaces]) elif issubclass(type(space), gymnasium.Space): if issubclass(type(space), gymnasium.spaces.Discrete): - return 1 + return (1,) if keep_dimensions else 1 elif issubclass(type(space), gymnasium.spaces.Box): - return np.prod(space.shape) + return tuple(space.shape) if keep_dimensions else np.prod(space.shape) elif issubclass(type(space), gymnasium.spaces.Dict): + if keep_dimensions: + raise ValueError("keep_dimensions=True cannot be used with Dict spaces") return sum([self._get_space_size(space.spaces[key]) for key in space.spaces]) raise ValueError("Space type {} not supported".format(type(space))) @@ -153,7 +162,8 @@ def set_tensor_by_name(self, name: str, tensor: torch.Tensor) -> None: def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space, gymnasium.Space], - dtype: Optional[torch.dtype] = None) -> bool: + dtype: Optional[torch.dtype] = None, + keep_dimensions: bool = False) -> bool: """Create a new internal tensor in memory The tensor will have a 3-components shape (memory size, number of environments, size). @@ -162,11 +172,13 @@ def create_tensor(self, :param name: Tensor name (the name has to follow the python PEP 8 style) :type name: str :param size: Number of elements in the last dimension (effective data size). - The product of the elements will be computed for collections or gym/gymnasium spaces types + The product of the elements will be computed for sequences or gym/gymnasium spaces :type size: int, tuple or list of integers, gym.Space, or gymnasium.Space :param dtype: Data type (torch.dtype). If None, the global default torch data type will be used (default) :type dtype: torch.dtype or None, optional + :param keep_dimensions: Whether or not to keep the dimensions defined through the size parameter (default: False) + :type keep_dimensions: bool :raises ValueError: The tensor name exists already but the size or dtype are different @@ -174,7 +186,7 @@ def create_tensor(self, :rtype: bool """ # compute data size - size = self._get_space_size(size) + size = self._get_space_size(size, keep_dimensions) # check dtype and size if the tensor exists if name in self.tensors: tensor = self.tensors[name] @@ -184,9 +196,12 @@ def create_tensor(self, raise ValueError("The dtype of the tensor {} ({}) doesn't match the existing one ({})".format(name, dtype, tensor.dtype)) return False # create tensor (_tensor_) and add it to the internal storage - setattr(self, "_tensor_{}".format(name), torch.zeros((self.memory_size, self.num_envs, size), device=self.device, dtype=dtype)) + tensor_shape = (self.memory_size, self.num_envs, *size) if keep_dimensions else (self.memory_size, self.num_envs, size) + view_shape = (-1, *size) if keep_dimensions else (-1, size) + setattr(self, "_tensor_{}".format(name), torch.zeros(tensor_shape, device=self.device, dtype=dtype)) self.tensors[name] = getattr(self, "_tensor_{}".format(name)) - self.tensors_view[name] = self.tensors[name].view(-1, self.tensors[name].size(-1)) + self.tensors_view[name] = self.tensors[name].view(*view_shape) + self.tensors_keep_dimensions[name] = keep_dimensions # fill the tensors (float tensors) with NaN for tensor in self.tensors.values(): if torch.is_floating_point(tensor): From 1298569b7ade42ceda1c6df179e30c0968c28094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 11:47:42 +0100 Subject: [PATCH 043/157] Get agent's actions using tuple indexing --- skrl/trainers/torch/base.py | 4 ++-- skrl/trainers/torch/manual.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index b58682f5..f17dd9de 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -160,7 +160,7 @@ def single_agent_train(self) -> None: # compute actions with torch.no_grad(): - actions, _, _ = self.agents.act(states, timestep=timestep, timesteps=self.timesteps) + actions = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)[0] # step the environments next_states, rewards, terminated, truncated, infos = self.env.step(actions) @@ -213,7 +213,7 @@ def single_agent_eval(self) -> None: # compute actions with torch.no_grad(): - actions, _, _ = self.agents.act(states, timestep=timestep, timesteps=self.timesteps) + actions = self.agents.act(states, timestep=timestep, timesteps=self.timesteps)[0] # step the environments next_states, rewards, terminated, truncated, infos = self.env.step(actions) diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 3128b734..7dca355e 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -96,7 +96,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: # compute actions with torch.no_grad(): - actions, _, _ = self.agents.act(self.states, timestep=timestep, timesteps=timesteps) + actions = self.agents.act(self.states, timestep=timestep, timesteps=timesteps)[0] else: # pre-interaction @@ -193,7 +193,7 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: with torch.no_grad(): if self.num_agents == 1: # compute actions - actions, _, _ = self.agents.act(self.states, timestep=timestep, timesteps=timesteps) + actions = self.agents.act(self.states, timestep=timestep, timesteps=timesteps)[0] else: # compute actions From cc180b3a7192b4167f8c9206d5e585c07885291c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 14:47:11 +0100 Subject: [PATCH 044/157] Allow to define common memory tensors for all environments --- skrl/memories/torch/base.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index 19509fb7..a7337002 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -56,6 +56,7 @@ def __init__(self, self.tensors = {} self.tensors_view = {} self.tensors_keep_dimensions = {} + self.tensors_common_for_all_envs = {} # exporting data self.export = export @@ -163,7 +164,8 @@ def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space, gymnasium.Space], dtype: Optional[torch.dtype] = None, - keep_dimensions: bool = False) -> bool: + keep_dimensions: bool = False, + common_for_all_envs: bool = False) -> bool: """Create a new internal tensor in memory The tensor will have a 3-components shape (memory size, number of environments, size). @@ -179,6 +181,9 @@ def create_tensor(self, :type dtype: torch.dtype or None, optional :param keep_dimensions: Whether or not to keep the dimensions defined through the size parameter (default: False) :type keep_dimensions: bool + :param common_for_all_envs: Whether or not the tensor should be defined for all environments (default: False). + If True, the 2nd tensor dimension (number of environment) will be suppressed + :type common_for_all_all: bool :raises ValueError: The tensor name exists already but the size or dtype are different @@ -195,13 +200,19 @@ def create_tensor(self, if dtype is not None and tensor.dtype != dtype: raise ValueError("The dtype of the tensor {} ({}) doesn't match the existing one ({})".format(name, dtype, tensor.dtype)) return False - # create tensor (_tensor_) and add it to the internal storage - tensor_shape = (self.memory_size, self.num_envs, *size) if keep_dimensions else (self.memory_size, self.num_envs, size) + # define tensor shape + if common_for_all_envs: + tensor_shape = (self.memory_size, *size) if keep_dimensions else (self.memory_size, size) + else: + tensor_shape = (self.memory_size, self.num_envs, *size) if keep_dimensions else (self.memory_size, self.num_envs, size) view_shape = (-1, *size) if keep_dimensions else (-1, size) + # create tensor (_tensor_) and add it to the internal storage setattr(self, "_tensor_{}".format(name), torch.zeros(tensor_shape, device=self.device, dtype=dtype)) + # update internal variables self.tensors[name] = getattr(self, "_tensor_{}".format(name)) self.tensors_view[name] = self.tensors[name].view(*view_shape) self.tensors_keep_dimensions[name] = keep_dimensions + self.tensors_common_for_all_envs[name] = common_for_all_envs # fill the tensors (float tensors) with NaN for tensor in self.tensors.values(): if torch.is_floating_point(tensor): @@ -349,9 +360,12 @@ def sample_all(self, names: Tuple[str], mini_batches: int = 1) -> List[List[torc :rtype: list of torch.Tensor list """ if mini_batches > 1: - indexes = np.arange(self.memory_size * self.num_envs) - batches = BatchSampler(indexes, batch_size=len(indexes) // mini_batches, drop_last=True) - return [[self.tensors_view[name][batch] for name in names] for batch in batches] + indexes_0 = np.arange(self.memory_size) # common for all environments + indexes_1 = np.arange(self.memory_size * self.num_envs) # per each environment + batches_0 = BatchSampler(indexes_0, batch_size=len(indexes_0) // mini_batches, drop_last=True) + batches_1 = BatchSampler(indexes_1, batch_size=len(indexes_1) // mini_batches, drop_last=True) + return [[self.tensors_view[name][b0 if self.tensors_common_for_all_envs[name] else b1] for name in names] \ + for b0, b1 in zip(batches_0, batches_1)] return [[self.tensors_view[name] for name in names]] def save(self, directory: str = "", format: str = "pt") -> None: From 3818fd0c872d4785d7e618c1dd444286b482cc80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 22:06:32 +0100 Subject: [PATCH 045/157] Return a dictionary as part of the model outputs --- skrl/models/torch/base.py | 40 +++++++++++++--------- skrl/models/torch/categorical.py | 31 ++++++++++------- skrl/models/torch/deterministic.py | 26 +++++++------- skrl/models/torch/gaussian.py | 31 ++++++++++------- skrl/models/torch/multivariate_gaussian.py | 31 ++++++++++------- skrl/models/torch/tabular.py | 25 ++++++++------ 6 files changed, 105 insertions(+), 79 deletions(-) diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index c0262e46..73f02a13 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -1,4 +1,4 @@ -from typing import Optional, Union, Mapping, Sequence +from typing import Optional, Union, Mapping, Sequence, Tuple, Any import gym import gymnasium @@ -49,7 +49,7 @@ def __init__(self, observation_space, action_space, device="cuda:0"): def act(self, inputs, role=""): x = F.relu(self.layer_1(inputs["states"])) x = F.relu(self.layer_2(x)) - return x + return x, None, {} """ super(Model, self).__init__() @@ -199,25 +199,27 @@ def tensor_to_space(self, return output raise ValueError("Space type {} not supported".format(type(space))) - def random_act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def random_act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, None, Mapping[str, Union[torch.Tensor, Any]]]: """Act randomly according to the action space :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional :raises NotImplementedError: Unsupported action space - :return: Random actions to be taken by the agent - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent + :rtype: tuple of torch.Tensor, None, and dictionary """ # discrete action space (Discrete) if issubclass(type(self.action_space), gym.spaces.Discrete) or issubclass(type(self.action_space), gymnasium.spaces.Discrete): - return torch.randint(self.action_space.n, (inputs["states"].shape[0], 1), device=self.device), None, None + return torch.randint(self.action_space.n, (inputs["states"].shape[0], 1), device=self.device), None, {} # continuous action space (Box) elif issubclass(type(self.action_space), gym.spaces.Box) or issubclass(type(self.action_space), gymnasium.spaces.Box): if self._random_distribution is None: @@ -225,7 +227,7 @@ def random_act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequ low=torch.tensor(self.action_space.low[0], device=self.device, dtype=torch.float32), high=torch.tensor(self.action_space.high[0], device=self.device, dtype=torch.float32)) - return self._random_distribution.sample(sample_shape=(inputs["states"].shape[0], self.num_actions)), None, None + return self._random_distribution.sample(sample_shape=(inputs["states"].shape[0], self.num_actions)), None, {} else: raise NotImplementedError("Action space type ({}) not supported".format(type(self.action_space))) @@ -293,25 +295,29 @@ def forward(self): """ raise NotImplementedError("Implement .act() and .compute() methods instead of this") - def compute(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Union[torch.Tensor, Sequence[torch.Tensor]]: + def compute(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[Union[torch.Tensor, Mapping[str, Union[torch.Tensor, Any]]]]: """Define the computation performed (to be implemented by the inheriting classes) by the models :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional :raises NotImplementedError: Child class must implement this method :return: Computation performed by the models - :rtype: torch.Tensor or sequence of torch.Tensor + :rtype: tuple of torch.Tensor and dictionary """ raise NotImplementedError("The computation performed by the models (.compute()) is not implemented") - def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act according to the specified behavior (to be implemented by the inheriting classes) Agents will call this method to obtain the decision to be taken given the state of the environment. @@ -322,16 +328,16 @@ def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[to - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional :raises NotImplementedError: Child class must implement this method - :return: Action to be taken by the agent given the state of the environment. - The typical sequence's components are the actions, the log of the probability density function and mean actions. - Deterministic agents must ignore the last two components and return empty tensors or None for them - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function for stochastic models + or None for deterministic models. The third component is a dictionary containing extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary """ logger.warning("Make sure to place Mixins before Model during model definition") raise NotImplementedError("The action to be taken by the agent (.act()) is not implemented") diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py index f768b15f..be18d4bb 100644 --- a/skrl/models/torch/categorical.py +++ b/skrl/models/torch/categorical.py @@ -1,4 +1,4 @@ -from typing import Mapping, Sequence +from typing import Union, Mapping, Tuple, Any import torch from torch.distributions import Categorical @@ -35,7 +35,7 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: ... nn.Linear(32, self.num_actions)) ... ... def compute(self, inputs, role): - ... return self.net(inputs["states"]) + ... return self.net(inputs["states"]), {} ... >>> # given an observation_space: gym.spaces.Box with shape (4,) >>> # and an action_space: gym.spaces.Discrete with n = 2 @@ -60,43 +60,48 @@ def __init__(self, unnormalized_log_prob: bool = True, role: str = "") -> None: self._c_distribution = {} self._c_distribution[role] = None - def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act stochastically in response to the state of the environment :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :return: Action to be taken by the agent given the state of the environment. - The sequence's components are the actions, the log of the probability density function and the model's output - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function. + The third component is a dictionary containing the network output ``"net_output"`` + and extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary Example:: >>> # given a batch of sample states with shape (4096, 4) - >>> action, log_prob, net_output = model.act({"states": states}) - >>> print(action.shape, log_prob.shape, net_output.shape) + >>> actions, log_prob, outputs = model.act({"states": states}) + >>> print(actions.shape, log_prob.shape, outputs["net_output"].shape) torch.Size([4096, 1]) torch.Size([4096, 1]) torch.Size([4096, 2]) """ # map from states/observations to normalized probabilities or unnormalized log probabilities - output = self.compute(inputs, role) + net_output, outputs = self.compute(inputs, role) # unnormalized log probabilities if self._c_unnormalized_log_prob[role] if role in self._c_unnormalized_log_prob else self._c_unnormalized_log_prob[""]: - self._c_distribution[role] = Categorical(logits=output) + self._c_distribution[role] = Categorical(logits=net_output) # normalized probabilities else: - self._c_distribution[role] = Categorical(probs=output) + self._c_distribution[role] = Categorical(probs=net_output) # actions and log of the probability density function actions = self._c_distribution[role].sample() log_prob = self._c_distribution[role].log_prob(inputs.get("taken_actions", actions).view(-1)) - return actions.unsqueeze(-1), log_prob.unsqueeze(-1), output + outputs["net_output"] = net_output + return actions.unsqueeze(-1), log_prob.unsqueeze(-1), outputs def distribution(self, role: str = "") -> torch.distributions.Categorical: """Get the current distribution of the model diff --git a/skrl/models/torch/deterministic.py b/skrl/models/torch/deterministic.py index dfeee90f..7e686d1a 100644 --- a/skrl/models/torch/deterministic.py +++ b/skrl/models/torch/deterministic.py @@ -1,4 +1,4 @@ -from typing import Mapping, Sequence +from typing import Union, Mapping, Tuple, Any import gym import gymnasium @@ -34,7 +34,7 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: ... nn.Linear(32, 1)) ... ... def compute(self, inputs, role): - ... return self.net(inputs["states"]) + ... return self.net(inputs["states"]), {} ... >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) @@ -63,30 +63,32 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: # backward compatibility: torch < 1.9 clamp method does not support tensors self._backward_compatibility = tuple(map(int, (torch.__version__.split(".")[:2]))) < (1, 9) - def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act deterministically in response to the state of the environment :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :return: Action to be taken by the agent given the state of the environment. - The sequence's components are the computed actions and None for the last two components - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is ``None``. The third component is a dictionary containing extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary Example:: >>> # given a batch of sample states with shape (4096, 60) - >>> output = model.act({"states": states}) - >>> print(output[0].shape, output[1], output[2]) - torch.Size([4096, 1]) None None + >>> actions, _, outputs = model.act({"states": states}) + >>> print(actions.shape, outputs) + torch.Size([4096, 1]) {} """ # map from observations/states to actions - actions = self.compute(inputs, role) + actions, outputs = self.compute(inputs, role) # clip actions if self._d_clip_actions[role] if role in self._d_clip_actions else self._d_clip_actions[""]: @@ -95,4 +97,4 @@ def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[to else: actions = torch.clamp(actions, min=self.clip_actions_min, max=self.clip_actions_max) - return actions, None, None + return actions, None, outputs diff --git a/skrl/models/torch/gaussian.py b/skrl/models/torch/gaussian.py index 925047f6..f903cc0d 100644 --- a/skrl/models/torch/gaussian.py +++ b/skrl/models/torch/gaussian.py @@ -1,4 +1,4 @@ -from typing import Mapping, Sequence +from typing import Union, Mapping, Tuple, Any import gym import gymnasium @@ -55,7 +55,7 @@ def __init__(self, ... self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) ... ... def compute(self, inputs, role): - ... return self.net(inputs["states"]), self.log_std_parameter + ... return self.net(inputs["states"]), self.log_std_parameter, {} ... >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) @@ -111,30 +111,34 @@ def __init__(self, self._g_reduction[role] = torch.mean if reduction == "mean" else torch.sum if reduction == "sum" \ else torch.prod if reduction == "prod" else None - def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act stochastically in response to the state of the environment :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :return: Action to be taken by the agent given the state of the environment. - The sequence's components are the actions, the log of the probability density function and mean actions - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function. + The third component is a dictionary containing the mean actions ``"mean_actions"`` + and extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary Example:: >>> # given a batch of sample states with shape (4096, 60) - >>> action, log_prob, mean_action = model.act({"states": states}) - >>> print(action.shape, log_prob.shape, mean_action.shape) + >>> actions, log_prob, outputs = model.act({"states": states}) + >>> print(actions.shape, log_prob.shape, outputs["mean_actions"].shape) torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8]) """ # map from states/observations to mean actions and log standard deviations - actions_mean, log_std = self.compute(inputs, role) + mean_actions, log_std, outputs = self.compute(inputs, role) # clamp log standard deviations if self._g_clip_log_std[role] if role in self._g_clip_log_std else self._g_clip_log_std[""]: @@ -143,10 +147,10 @@ def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[to self._g_log_std_max[role] if role in self._g_log_std_max else self._g_log_std_max[""]) self._g_log_std[role] = log_std - self._g_num_samples[role] = actions_mean.shape[0] + self._g_num_samples[role] = mean_actions.shape[0] # distribution - self._g_distribution[role] = Normal(actions_mean, log_std.exp()) + self._g_distribution[role] = Normal(mean_actions, log_std.exp()) # sample using the reparameterization trick actions = self._g_distribution[role].rsample() @@ -166,7 +170,8 @@ def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[to if log_prob.dim() != actions.dim(): log_prob = log_prob.unsqueeze(-1) - return actions, log_prob, actions_mean + outputs["mean_actions"] = mean_actions + return actions, log_prob, outputs def get_entropy(self, role: str = "") -> torch.Tensor: """Compute and return the entropy of the model diff --git a/skrl/models/torch/multivariate_gaussian.py b/skrl/models/torch/multivariate_gaussian.py index 4846e91b..5ddb23e7 100644 --- a/skrl/models/torch/multivariate_gaussian.py +++ b/skrl/models/torch/multivariate_gaussian.py @@ -1,4 +1,4 @@ -from typing import Mapping, Sequence +from typing import Union, Mapping, Tuple, Any import gym import gymnasium @@ -48,7 +48,7 @@ def __init__(self, ... self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) ... ... def compute(self, inputs, role): - ... return self.net(inputs["states"]), self.log_std_parameter + ... return self.net(inputs["states"]), self.log_std_parameter, {} ... >>> # given an observation_space: gym.spaces.Box with shape (60,) >>> # and an action_space: gym.spaces.Box with shape (8,) @@ -97,30 +97,34 @@ def __init__(self, self._mg_distribution = {} self._mg_distribution[role] = None - def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act stochastically in response to the state of the environment :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :return: Action to be taken by the agent given the state of the environment. - The sequence's components are the actions, the log of the probability density function and mean actions - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function. + The third component is a dictionary containing the mean actions ``"mean_actions"`` + and extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary Example:: >>> # given a batch of sample states with shape (4096, 60) - >>> action, log_prob, mean_action = model.act({"states": states}) - >>> print(action.shape, log_prob.shape, mean_action.shape) + >>> actions, log_prob, outputs = model.act({"states": states}) + >>> print(actions.shape, log_prob.shape, outputs["mean_actions"].shape) torch.Size([4096, 8]) torch.Size([4096, 1]) torch.Size([4096, 8]) """ # map from states/observations to mean actions and log standard deviations - actions_mean, log_std = self.compute(inputs, role) + mean_actions, log_std, outputs = self.compute(inputs, role) # clamp log standard deviations if self._mg_clip_log_std[role] if role in self._mg_clip_log_std else self._mg_clip_log_std[""]: @@ -129,11 +133,11 @@ def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[to self._mg_log_std_max[role] if role in self._mg_log_std_max else self._mg_log_std_max[""]) self._mg_log_std[role] = log_std - self._mg_num_samples[role] = actions_mean.shape[0] + self._mg_num_samples[role] = mean_actions.shape[0] # distribution covariance = torch.diag(log_std.exp() * log_std.exp()) - self._mg_distribution[role] = MultivariateNormal(actions_mean, scale_tril=covariance) + self._mg_distribution[role] = MultivariateNormal(mean_actions, scale_tril=covariance) # sample using the reparameterization trick actions = self._mg_distribution[role].rsample() @@ -150,7 +154,8 @@ def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[to if log_prob.dim() != actions.dim(): log_prob = log_prob.unsqueeze(-1) - return actions, log_prob, actions_mean + outputs["mean_actions"] = mean_actions + return actions, log_prob, outputs def get_entropy(self, role: str = "") -> torch.Tensor: """Compute and return the entropy of the model diff --git a/skrl/models/torch/tabular.py b/skrl/models/torch/tabular.py index dc80c1d8..487fe466 100644 --- a/skrl/models/torch/tabular.py +++ b/skrl/models/torch/tabular.py @@ -1,4 +1,4 @@ -from typing import Optional, Mapping, Sequence +from typing import Optional, Union, Mapping, Sequence, Tuple, Any import torch @@ -31,6 +31,7 @@ def __init__(self, num_envs: int = 1, role: str = "") -> None: ... def compute(self, inputs, role): ... actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], ... dim=-1, keepdim=True).view(-1,1) + ... return actions, {} ... >>> # given an observation_space: gym.spaces.Discrete with n=100 >>> # and an action_space: gym.spaces.Discrete with n=5 @@ -69,30 +70,32 @@ def _get_tensor_names(self) -> Sequence[str]: tensors.append(attr) return sorted(tensors) - def act(self, inputs: Mapping[str, torch.Tensor], role: str = "") -> Sequence[torch.Tensor]: + def act(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act in response to the state of the environment :param inputs: Model inputs. The most common keys are: - ``"states"``: state of the environment used to make the decision - ``"taken_actions"``: actions taken by the policy for the given states - :type inputs: Mapping[str, torch.Tensor] + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :return: Action to be taken by the agent given the state of the environment. - The sequence's components are the computed actions and None for the last two components - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is ``None``. The third component is a dictionary containing extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary Example:: >>> # given a batch of sample states with shape (1, 100) - >>> output = model.act({"states": states}) - >>> print(output[0], output[1], output[2]) - tensor([[3]], device='cuda:0') None None + >>> actions, _, outputs = model.act({"states": states}) + >>> print(actions[0], outputs) + tensor([[3]], device='cuda:0') {} """ - actions = self.compute(inputs, role) - return actions, None, None + actions, outputs = self.compute(inputs, role) + return actions, None, outputs def table(self) -> torch.Tensor: """Return the Q-table From a13b8bb18fc52df9bb8199857357a9e49c48bc3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 22:18:20 +0100 Subject: [PATCH 046/157] Update agents' act method --- skrl/agents/torch/amp/amp.py | 4 ++-- skrl/agents/torch/ppo/ppo.py | 4 ++-- skrl/agents/torch/trpo/trpo.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 79fb4018..1e38beb7 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -274,10 +274,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - actions, log_prob, actions_mean = self.policy.act({"states": states}, role="policy") + actions, log_prob, outputs = self.policy.act({"states": states}, role="policy") self._current_log_prob = log_prob - return actions, log_prob, actions_mean + return actions, log_prob, outputs def record_transition(self, states: torch.Tensor, diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 2654f0e1..620481a1 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -206,10 +206,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - actions, log_prob, actions_mean = self.policy.act({"states": states}, role="policy") + actions, log_prob, outputs = self.policy.act({"states": states}, role="policy") self._current_log_prob = log_prob - return actions, log_prob, actions_mean + return actions, log_prob, outputs def record_transition(self, states: torch.Tensor, diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index e3ea0ee4..fd68317f 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -203,10 +203,10 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens return self.policy.random_act({"states": states}, role="policy") # sample stochastic actions - actions, log_prob, actions_mean = self.policy.act({"states": states}, role="policy") + actions, log_prob, outputs = self.policy.act({"states": states}, role="policy") self._current_log_prob = log_prob - return actions, log_prob, actions_mean + return actions, log_prob, outputs def record_transition(self, states: torch.Tensor, From c4042e330847d3dd571ac485ab83e2c74b149bb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 22:34:50 +0100 Subject: [PATCH 047/157] Update CEM agent for supporting the model's dict output --- skrl/agents/torch/cem/cem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 11152e5a..e646bfa5 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -276,7 +276,8 @@ def _update(self, timestep: int, timesteps: int) -> None: elite_actions = torch.cat([sampled_actions[limits[i][0]:limits[i][1]] for i in indexes[:, 0]], dim=0) # compute scores for the elite states - scores = self.policy.act({"states": elite_states}, role="policy")[2] + _, _, outputs = self.policy.act({"states": elite_states}, role="policy") + scores = outputs["net_output"] # compute policy loss policy_loss = F.cross_entropy(scores, elite_actions.view(-1)) From 4bcecb9e99053cc5defc4953817e857e07001f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 22:42:38 +0100 Subject: [PATCH 048/157] Update snippet files --- docs/source/snippets/categorical_model.py | 10 ++-- docs/source/snippets/deterministic_model.py | 10 ++-- docs/source/snippets/gaussian_model.py | 10 ++-- docs/source/snippets/model_mixin.py | 55 ++++++++----------- .../snippets/multivariate_gaussian_model.py | 10 ++-- docs/source/snippets/shared_model.py | 12 ++-- docs/source/snippets/tabular_model.py | 5 +- 7 files changed, 53 insertions(+), 59 deletions(-) diff --git a/docs/source/snippets/categorical_model.py b/docs/source/snippets/categorical_model.py index 918a0fdc..b303f89b 100644 --- a/docs/source/snippets/categorical_model.py +++ b/docs/source/snippets/categorical_model.py @@ -15,10 +15,10 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro self.linear_layer_2 = nn.Linear(64, 32) self.output_layer = nn.Linear(32, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return self.output_layer(x) + return self.output_layer(x), {} # instantiate the model (assumes there is a wrapped environment: env) @@ -59,9 +59,9 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro nn.Tanh(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(states.permute(0, 3, 1, 2)) + return self.net(inputs["states"].permute(0, 3, 1, 2)), {} # instantiate the model (assumes there is a wrapped environment: env) diff --git a/docs/source/snippets/deterministic_model.py b/docs/source/snippets/deterministic_model.py index 84714306..4a2179e2 100644 --- a/docs/source/snippets/deterministic_model.py +++ b/docs/source/snippets/deterministic_model.py @@ -17,8 +17,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ReLU(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): - return self.net(torch.cat([states, taken_actions], dim=1)) + def compute(self, inputs, role): + return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {} # instantiate the model (assumes there is a wrapped environment: env) @@ -60,10 +60,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.Tanh(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): # permute (samples, width, height, channels) -> (samples, channels, width, height) - x = self.features_extractor(states.permute(0, 3, 1, 2)) - return self.net(torch.cat([x, taken_actions], dim=1)) + x = self.features_extractor(inputs["states"].permute(0, 3, 1, 2)) + return self.net(torch.cat([x, inputs["taken_actions"]], dim=1)), {} # instantiate the model (assumes there is a wrapped environment: env) diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py index f66a6071..fab2ce13 100644 --- a/docs/source/snippets/gaussian_model.py +++ b/docs/source/snippets/gaussian_model.py @@ -20,11 +20,11 @@ def __init__(self, observation_space, action_space, device, self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) x = F.relu(self.linear_layer_3(x)) - return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter + return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter, {} # instantiate the model (assumes there is a wrapped environment: env) policy = MLP(observation_space=env.observation_space, @@ -73,9 +73,9 @@ def __init__(self, observation_space, action_space, device, self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter + return self.net(inputs["states"].permute(0, 3, 1, 2)), self.log_std_parameter, {} # instantiate the model (assumes there is a wrapped environment: env) diff --git a/docs/source/snippets/model_mixin.py b/docs/source/snippets/model_mixin.py index 3cfcb2b3..06962241 100644 --- a/docs/source/snippets/model_mixin.py +++ b/docs/source/snippets/model_mixin.py @@ -1,6 +1,5 @@ # [start-model] -from typing import Optional, Union, Sequence - +from typing import Union, Mapping, Sequence, Tuple, Any import gym import torch @@ -26,25 +25,22 @@ def __init__(self, super().__init__(observation_space, action_space, device) def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act according to the specified behavior - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :raises NotImplementedError: Child class must implement this method - - :return: Action to be taken by the agent given the state of the environment. - The typical sequence's components are the actions, the log of the probability density function and mean actions. - Deterministic agents must ignore the last two components and return empty tensors or None for them - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function for stochastic models + or None for deterministic models. The third component is a dictionary containing extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary """ # ============================== # - act in response to the state @@ -54,7 +50,7 @@ def act(self, # ============================================================================= # [start-mixin] -from typing import Optional, Sequence +from typing import Union, Mapping, Sequence, Tuple, Any import gym @@ -75,25 +71,22 @@ def __init__(self, clip_actions: bool = False, role: str = "") -> None: self._custom_clip_actions[role] def act(self, - states: torch.Tensor, - taken_actions: Optional[torch.Tensor] = None, - role: str = "") -> Sequence[torch.Tensor]: + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Act according to the specified behavior - :param states: Observation/state of the environment used to make the decision - :type states: torch.Tensor - :param taken_actions: Actions taken by a policy to the given states (default: ``None``). - The use of these actions only makes sense in critical models, e.g. - :type taken_actions: torch.Tensor, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: dict where the values are typically torch.Tensor :param role: Role play by the model (default: ``""``) :type role: str, optional - :raises NotImplementedError: Child class must implement this method - - :return: Action to be taken by the agent given the state of the environment. - The typical sequence's components are the actions, the log of the probability density function and mean actions. - Deterministic agents must ignore the last two components and return empty tensors or None for them - :rtype: sequence of torch.Tensor + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function for stochastic models + or None for deterministic models. The third component is a dictionary containing extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary """ # ============================== # - act in response to the state diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py index 610f4ac6..d9ea6fcb 100644 --- a/docs/source/snippets/multivariate_gaussian_model.py +++ b/docs/source/snippets/multivariate_gaussian_model.py @@ -20,11 +20,11 @@ def __init__(self, observation_space, action_space, device, self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) x = F.relu(self.linear_layer_3(x)) - return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter + return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter, {} # instantiate the model (assumes there is a wrapped environment: env) policy = MLP(observation_space=env.observation_space, @@ -72,9 +72,9 @@ def __init__(self, observation_space, action_space, device, self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(states.permute(0, 3, 1, 2)), self.log_std_parameter + return self.net(inputs["states"].permute(0, 3, 1, 2)), self.log_std_parameter, {} # instantiate the model (assumes there is a wrapped environment: env) diff --git a/docs/source/snippets/shared_model.py b/docs/source/snippets/shared_model.py index fe1c054d..bb784e5c 100644 --- a/docs/source/snippets/shared_model.py +++ b/docs/source/snippets/shared_model.py @@ -27,18 +27,18 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) # override the .act(...) method to disambiguate its call - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) # forward the input to compute model output according to the specified role - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # instantiate the shared model and pass the same instance to the other key diff --git a/docs/source/snippets/tabular_model.py b/docs/source/snippets/tabular_model.py index 5afdbb49..7305dd68 100644 --- a/docs/source/snippets/tabular_model.py +++ b/docs/source/snippets/tabular_model.py @@ -13,14 +13,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.epsilon = epsilon self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): + states = inputs["states"] actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], dim=-1, keepdim=True).view(-1,1) indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # instantiate the model (assumes there is a wrapped environment: env) From 6ec3e85dc1a9ceee6701aa12b1e16e1b0bece09f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 5 Nov 2022 22:50:48 +0100 Subject: [PATCH 049/157] Update models' compute method inputs in docs --- docs/source/intro/data.rst | 8 ++++---- docs/source/intro/getting_started.rst | 21 +++++++++++---------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/docs/source/intro/data.rst b/docs/source/intro/data.rst index df94a19e..1c740916 100644 --- a/docs/source/intro/data.rst +++ b/docs/source/intro/data.rst @@ -263,8 +263,8 @@ The following code snippets show how to load the checkpoints through the instant nn.ReLU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Instantiate the model policy = Policy(env.observation_space, env.action_space, env.device, clip_actions=True) @@ -322,8 +322,8 @@ The following code snippets show how to migrate checkpoints from other libraries nn.ReLU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Instantiate the model policy = Policy(env.observation_space, env.action_space, env.device, clip_actions=True) diff --git a/docs/source/intro/getting_started.rst b/docs/source/intro/getting_started.rst index 7ca7c12e..810c26f4 100644 --- a/docs/source/intro/getting_started.rst +++ b/docs/source/intro/getting_started.rst @@ -281,8 +281,8 @@ The following code snippets show how to define a model, based on the concept of nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} .. tab:: Gaussian @@ -315,8 +315,8 @@ The following code snippets show how to define a model, based on the concept of nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} .. tab:: Multivariate Gaussian @@ -349,8 +349,8 @@ The following code snippets show how to define a model, based on the concept of nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} .. tab:: Deterministic @@ -381,8 +381,8 @@ The following code snippets show how to define a model, based on the concept of nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} .. tab:: Tabular @@ -400,9 +400,10 @@ The following code snippets show how to define a model, based on the concept of self.table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) + return actions, {} Models must be collected in a dictionary and passed to the agent constructor during its instantiation under the argument :literal:`models`. The dictionary keys are specific to each agent. Visit their respective documentation for more details (under *Spaces and models* section). For example, the PPO agent requires the policy and value models as shown below: From 4beab39c959ed453e420125a4b155e275603ec14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 18:52:02 +0100 Subject: [PATCH 050/157] Update model instantiators to support inputs as dictionary --- skrl/utils/model_instantiators.py | 40 +++++++++++++++---------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index 645f5953..f8fd781a 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -197,15 +197,15 @@ def __init__(self, observation_space, action_space, device, clip_actions, output_scale=metadata["output_scale"]) self.log_std_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(self, metadata["output_shape"]))) - def compute(self, states, taken_actions=None, role=""): + def compute(self, inputs, role=""): if self.instantiator_input_type == 0: - output = self.net(states) + output = self.net(inputs["states"]) elif self.instantiator_input_type == -1: - output = self.net(taken_actions) + output = self.net(inputs["taken_actions"]) elif self.instantiator_input_type == -2: - output = self.net(torch.cat((states, taken_actions), dim=1)) + output = self.net(torch.cat((inputs["states"], inputs["taken_actions"]), dim=1)) - return output * self.instantiator_output_scale, self.log_std_parameter + return output * self.instantiator_output_scale, self.log_std_parameter, {} metadata = {"input_shape": input_shape, "hiddens": hiddens, @@ -289,15 +289,15 @@ def __init__(self, observation_space, action_space, device, clip_actions, output_scale=metadata["output_scale"]) self.log_std_parameter = nn.Parameter(torch.zeros(_get_num_units_by_shape(self, metadata["output_shape"]))) - def compute(self, states, taken_actions=None, role=""): + def compute(self, inputs, role=""): if self.instantiator_input_type == 0: - output = self.net(states) + output = self.net(inputs["states"]) elif self.instantiator_input_type == -1: - output = self.net(taken_actions) + output = self.net(inputs["taken_actions"]) elif self.instantiator_input_type == -2: - output = self.net(torch.cat((states, taken_actions), dim=1)) + output = self.net(torch.cat((inputs["states"], inputs["taken_actions"]), dim=1)) - return output * self.instantiator_output_scale, self.log_std_parameter + return output * self.instantiator_output_scale, self.log_std_parameter, {} metadata = {"input_shape": input_shape, "hiddens": hiddens, @@ -370,15 +370,15 @@ def __init__(self, observation_space, action_space, device, clip_actions, metada output_activation=metadata["output_activation"], output_scale=metadata["output_scale"]) - def compute(self, states, taken_actions=None, role=""): + def compute(self, inputs, role=""): if self.instantiator_input_type == 0: - output = self.net(states) + output = self.net(inputs["states"]) elif self.instantiator_input_type == -1: - output = self.net(taken_actions) + output = self.net(inputs["taken_actions"]) elif self.instantiator_input_type == -2: - output = self.net(torch.cat((states, taken_actions), dim=1)) + output = self.net(torch.cat((inputs["states"], inputs["taken_actions"]), dim=1)) - return output * self.instantiator_output_scale + return output * self.instantiator_output_scale, {} metadata = {"input_shape": input_shape, "hiddens": hiddens, @@ -445,15 +445,15 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro output_shape=metadata["output_shape"], output_activation=metadata["output_activation"]) - def compute(self, states, taken_actions=None, role=""): + def compute(self, inputs, role=""): if self.instantiator_input_type == 0: - output = self.net(states) + output = self.net(inputs["states"]) elif self.instantiator_input_type == -1: - output = self.net(taken_actions) + output = self.net(inputs["taken_actions"]) elif self.instantiator_input_type == -2: - output = self.net(torch.cat((states, taken_actions), dim=1)) + output = self.net(torch.cat((inputs["states"], inputs["taken_actions"]), dim=1)) - return output + return output, {} metadata = {"input_shape": input_shape, "hiddens": hiddens, From 9601778ee357e8ca60d3b5dfcf72f32621c561ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:10:54 +0100 Subject: [PATCH 051/157] Update real-world example files --- .../reaching_franka_isaacgym_skrl_eval.py | 4 ++-- .../reaching_franka_isaacgym_skrl_train.py | 8 ++++---- .../reaching_franka_omniverse_isaacgym_skrl_eval.py | 4 ++-- .../reaching_franka_omniverse_isaacgym_skrl_train.py | 8 ++++---- .../franka_emika_panda/reaching_franka_real_skrl_eval.py | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py index 60bfad69..fb1de758 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_eval.py @@ -27,8 +27,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(64, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} # instantiate and configure the task diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py index 74290246..cbbec60b 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_isaacgym_skrl_train.py @@ -36,8 +36,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(64, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -52,8 +52,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(64, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # instantiate and configure the task diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py index cac6186c..50f5ae9b 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_eval.py @@ -26,8 +26,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(64, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} # instance VecEnvBase and setup task diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py index 3a40fcf6..899df201 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_omniverse_isaacgym_skrl_train.py @@ -35,8 +35,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(64, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -51,8 +51,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(64, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # instance VecEnvBase and setup task diff --git a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py index 8c872d9b..adcd1158 100644 --- a/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py +++ b/docs/source/examples/real_world/franka_emika_panda/reaching_franka_real_skrl_eval.py @@ -25,8 +25,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(64, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} # Load the environment From a09ef188f6867e7a83314018a79e81063842598a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:15:41 +0100 Subject: [PATCH 052/157] Update Omniverse Isaac Gym example files --- .../source/examples/omniisaacgym/ppo_allegro_hand.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_ant.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_ant_mt.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_anymal.py | 12 ++++++------ .../examples/omniisaacgym/ppo_anymal_terrain.py | 8 ++++---- .../source/examples/omniisaacgym/ppo_ball_balance.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_cartpole.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_cartpole_mt.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_crazy_flie.py | 12 ++++++------ .../examples/omniisaacgym/ppo_franka_cabinet.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_humanoid.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_ingenuity.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_quadcopter.py | 12 ++++++------ docs/source/examples/omniisaacgym/ppo_shadow_hand.py | 12 ++++++------ 14 files changed, 82 insertions(+), 82 deletions(-) diff --git a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py index cab3bf10..6377a604 100644 --- a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py +++ b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_ant.py b/docs/source/examples/omniisaacgym/ppo_ant.py index 2b005ab4..0e090a47 100644 --- a/docs/source/examples/omniisaacgym/ppo_ant.py +++ b/docs/source/examples/omniisaacgym/ppo_ant.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_ant_mt.py b/docs/source/examples/omniisaacgym/ppo_ant_mt.py index 5c50d2db..4aafeae8 100644 --- a/docs/source/examples/omniisaacgym/ppo_ant_mt.py +++ b/docs/source/examples/omniisaacgym/ppo_ant_mt.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_anymal.py b/docs/source/examples/omniisaacgym/ppo_anymal.py index 0d73c249..904a86c6 100644 --- a/docs/source/examples/omniisaacgym/ppo_anymal.py +++ b/docs/source/examples/omniisaacgym/ppo_anymal.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py b/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py index 2982a5da..b581cdc1 100644 --- a/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py +++ b/docs/source/examples/omniisaacgym/ppo_anymal_terrain.py @@ -35,8 +35,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(128, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -51,8 +51,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(128, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_ball_balance.py b/docs/source/examples/omniisaacgym/ppo_ball_balance.py index 92d92773..5d19e67b 100644 --- a/docs/source/examples/omniisaacgym/ppo_ball_balance.py +++ b/docs/source/examples/omniisaacgym/ppo_ball_balance.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py index a9387ab2..bb544307 100644 --- a/docs/source/examples/omniisaacgym/ppo_cartpole.py +++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py @@ -35,17 +35,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py index d154c76a..7b84a708 100644 --- a/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py +++ b/docs/source/examples/omniisaacgym/ppo_cartpole_mt.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the multi-threaded Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_crazy_flie.py b/docs/source/examples/omniisaacgym/ppo_crazy_flie.py index 78b1b7af..682305ba 100644 --- a/docs/source/examples/omniisaacgym/ppo_crazy_flie.py +++ b/docs/source/examples/omniisaacgym/ppo_crazy_flie.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py b/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py index 882fed2a..83483c7e 100644 --- a/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py +++ b/docs/source/examples/omniisaacgym/ppo_franka_cabinet.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_humanoid.py b/docs/source/examples/omniisaacgym/ppo_humanoid.py index 1678a659..610324fb 100644 --- a/docs/source/examples/omniisaacgym/ppo_humanoid.py +++ b/docs/source/examples/omniisaacgym/ppo_humanoid.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(100, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_ingenuity.py b/docs/source/examples/omniisaacgym/ppo_ingenuity.py index e167bedc..c92e954a 100644 --- a/docs/source/examples/omniisaacgym/ppo_ingenuity.py +++ b/docs/source/examples/omniisaacgym/ppo_ingenuity.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_quadcopter.py b/docs/source/examples/omniisaacgym/ppo_quadcopter.py index 9eb8c88a..1839a8ee 100644 --- a/docs/source/examples/omniisaacgym/ppo_quadcopter.py +++ b/docs/source/examples/omniisaacgym/ppo_quadcopter.py @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment diff --git a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py index 35b20ee4..125c9e7f 100644 --- a/docs/source/examples/omniisaacgym/ppo_shadow_hand.py +++ b/docs/source/examples/omniisaacgym/ppo_shadow_hand.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Omniverse Isaac Gym environment From aa9e15ec24eba784f1b07428acb48c506068bb43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:17:43 +0100 Subject: [PATCH 053/157] Update standalone Isaac Sim example files --- docs/source/examples/isaacsim/cartpole_example_skrl.py | 8 ++++---- docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/source/examples/isaacsim/cartpole_example_skrl.py b/docs/source/examples/isaacsim/cartpole_example_skrl.py index 048fba58..e54a4305 100644 --- a/docs/source/examples/isaacsim/cartpole_example_skrl.py +++ b/docs/source/examples/isaacsim/cartpole_example_skrl.py @@ -38,8 +38,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(64, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return torch.tanh(self.net(states)), self.log_std_parameter + def compute(self, inputs, role): + return torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -52,8 +52,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.Tanh(), nn.Linear(64, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the environment diff --git a/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py b/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py index 84790767..6a0dadd0 100644 --- a/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py +++ b/docs/source/examples/isaacsim/isaacsim_jetbot_ppo.py @@ -39,11 +39,11 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): # view (samples, width * height * channels) -> (samples, width, height, channels) # permute (samples, width, height, channels) -> (samples, channels, width, height) - x = self.net(states.view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)) - return 10 * torch.tanh(x), self.log_std_parameter # JetBotEnv action_space is -10 to 10 + x = self.net(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)) + return 10 * torch.tanh(x), self.log_std_parameter, {} # JetBotEnv action_space is -10 to 10 class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -67,10 +67,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.Tanh(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): # view (samples, width * height * channels) -> (samples, width, height, channels) # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(states.view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)) + return self.net(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)), {} # Load and wrap the JetBot environment (a subclass of Gym) From 373aa0e7d89e6f7984c62393722644804bb19371 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:27:35 +0100 Subject: [PATCH 054/157] Update Isaac Gym example files --- docs/source/examples/isaacgym/amp_humanoid.py | 12 ++++++------ .../isaacgym/isaacgym_parallel_no_shared_memory.py | 12 ++++++------ .../isaacgym_parallel_no_shared_memory_eval.py | 8 ++++---- .../isaacgym_sequential_no_shared_memory.py | 12 ++++++------ .../isaacgym_sequential_no_shared_memory_eval.py | 8 ++++---- .../isaacgym/isaacgym_sequential_shared_memory.py | 12 ++++++------ .../isaacgym_sequential_shared_memory_eval.py | 8 ++++---- docs/source/examples/isaacgym/ppo_allegro_hand.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_ant.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_anymal.py | 12 ++++++------ .../source/examples/isaacgym/ppo_anymal_terrain.py | 8 ++++---- docs/source/examples/isaacgym/ppo_ball_balance.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_cartpole.py | 14 +++++++------- docs/source/examples/isaacgym/ppo_cartpole_eval.py | 12 ++++++------ .../source/examples/isaacgym/ppo_franka_cabinet.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_humanoid.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_ingenuity.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_quadcopter.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_shadow_hand.py | 12 ++++++------ docs/source/examples/isaacgym/ppo_trifinger.py | 12 ++++++------ docs/source/examples/isaacgym/trpo_cartpole.py | 8 ++++---- 21 files changed, 117 insertions(+), 117 deletions(-) diff --git a/docs/source/examples/isaacgym/amp_humanoid.py b/docs/source/examples/isaacgym/amp_humanoid.py index 3cbff0f8..0dd3ca42 100644 --- a/docs/source/examples/isaacgym/amp_humanoid.py +++ b/docs/source/examples/isaacgym/amp_humanoid.py @@ -37,8 +37,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, # set a fixed log standard deviation for the policy self.log_std_parameter = nn.Parameter(torch.full((self.num_actions,), fill_value=-2.9), requires_grad=False) - def compute(self, states, taken_actions, role): - return torch.tanh(self.net(states)), self.log_std_parameter + def compute(self, inputs, role): + return torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -51,8 +51,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ReLU(), nn.Linear(512, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} class Discriminator(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -65,8 +65,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ReLU(), nn.Linear(512, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py index 9c306802..09283af9 100644 --- a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py +++ b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory.py @@ -32,8 +32,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -46,8 +46,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} class Critic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -60,8 +60,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): - return self.net(torch.cat([states, taken_actions], dim=1)) + def compute(self, inputs, role): + return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {} if __name__ == '__main__': diff --git a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py index 0d311d76..a4614752 100644 --- a/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py +++ b/docs/source/examples/isaacgym/isaacgym_parallel_no_shared_memory_eval.py @@ -27,8 +27,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -41,8 +41,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} if __name__ == '__main__': diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py index 4cbe05a2..9c1d1b9d 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory.py @@ -32,8 +32,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -46,8 +46,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} class Critic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -60,8 +60,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): - return self.net(torch.cat([states, taken_actions], dim=1)) + def compute(self, inputs, role): + return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py index 1776e219..9c599d66 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_no_shared_memory_eval.py @@ -27,8 +27,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -41,8 +41,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py index 5d0fe627..f8f24172 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory.py @@ -32,8 +32,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -46,8 +46,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} class Critic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -60,8 +60,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): - return self.net(torch.cat([states, taken_actions], dim=1)) + def compute(self, inputs, role): + return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py index a8608b38..841a99e9 100644 --- a/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py +++ b/docs/source/examples/isaacgym/isaacgym_sequential_shared_memory_eval.py @@ -27,8 +27,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class DeterministicActor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -41,8 +41,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_allegro_hand.py b/docs/source/examples/isaacgym/ppo_allegro_hand.py index 2938df7b..d3d3225e 100644 --- a/docs/source/examples/isaacgym/ppo_allegro_hand.py +++ b/docs/source/examples/isaacgym/ppo_allegro_hand.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment using the easy-to-use API from NVIDIA diff --git a/docs/source/examples/isaacgym/ppo_ant.py b/docs/source/examples/isaacgym/ppo_ant.py index d1c061aa..06e3eaec 100644 --- a/docs/source/examples/isaacgym/ppo_ant.py +++ b/docs/source/examples/isaacgym/ppo_ant.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_anymal.py b/docs/source/examples/isaacgym/ppo_anymal.py index 3a4dbc2f..53c4913d 100644 --- a/docs/source/examples/isaacgym/ppo_anymal.py +++ b/docs/source/examples/isaacgym/ppo_anymal.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_anymal_terrain.py b/docs/source/examples/isaacgym/ppo_anymal_terrain.py index bf55cf04..550635c7 100644 --- a/docs/source/examples/isaacgym/ppo_anymal_terrain.py +++ b/docs/source/examples/isaacgym/ppo_anymal_terrain.py @@ -37,8 +37,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(128, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -53,8 +53,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(128, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_ball_balance.py b/docs/source/examples/isaacgym/ppo_ball_balance.py index ffd29199..9cc9ccb8 100644 --- a/docs/source/examples/isaacgym/ppo_ball_balance.py +++ b/docs/source/examples/isaacgym/ppo_ball_balance.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_cartpole.py b/docs/source/examples/isaacgym/ppo_cartpole.py index b716259e..38aa7110 100644 --- a/docs/source/examples/isaacgym/ppo_cartpole.py +++ b/docs/source/examples/isaacgym/ppo_cartpole.py @@ -16,7 +16,7 @@ # set the seed for reproducibility -set_seed(42) +set_seed(32) # Define the shared model (stochastic and deterministic models) for the agent using mixins. @@ -37,17 +37,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_cartpole_eval.py b/docs/source/examples/isaacgym/ppo_cartpole_eval.py index 3123b6ee..c35a93db 100644 --- a/docs/source/examples/isaacgym/ppo_cartpole_eval.py +++ b/docs/source/examples/isaacgym/ppo_cartpole_eval.py @@ -30,17 +30,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(32, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_franka_cabinet.py b/docs/source/examples/isaacgym/ppo_franka_cabinet.py index 383b5fa6..4d951ea6 100644 --- a/docs/source/examples/isaacgym/ppo_franka_cabinet.py +++ b/docs/source/examples/isaacgym/ppo_franka_cabinet.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(64, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_humanoid.py b/docs/source/examples/isaacgym/ppo_humanoid.py index e1405d84..2d5dd996 100644 --- a/docs/source/examples/isaacgym/ppo_humanoid.py +++ b/docs/source/examples/isaacgym/ppo_humanoid.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(100, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_ingenuity.py b/docs/source/examples/isaacgym/ppo_ingenuity.py index f30466a0..e1635033 100644 --- a/docs/source/examples/isaacgym/ppo_ingenuity.py +++ b/docs/source/examples/isaacgym/ppo_ingenuity.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment using the easy-to-use API from NVIDIA diff --git a/docs/source/examples/isaacgym/ppo_quadcopter.py b/docs/source/examples/isaacgym/ppo_quadcopter.py index 4a168729..b7f34182 100644 --- a/docs/source/examples/isaacgym/ppo_quadcopter.py +++ b/docs/source/examples/isaacgym/ppo_quadcopter.py @@ -39,17 +39,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_shadow_hand.py b/docs/source/examples/isaacgym/ppo_shadow_hand.py index 9827ae04..3263777b 100644 --- a/docs/source/examples/isaacgym/ppo_shadow_hand.py +++ b/docs/source/examples/isaacgym/ppo_shadow_hand.py @@ -41,17 +41,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/ppo_trifinger.py b/docs/source/examples/isaacgym/ppo_trifinger.py index 65a27b17..9be44ef2 100644 --- a/docs/source/examples/isaacgym/ppo_trifinger.py +++ b/docs/source/examples/isaacgym/ppo_trifinger.py @@ -41,17 +41,17 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.value_layer = nn.Linear(128, 1) - def act(self, states, taken_actions, role): + def act(self, inputs, role): if role == "policy": - return GaussianMixin.act(self, states, taken_actions, role) + return GaussianMixin.act(self, inputs, role) elif role == "value": - return DeterministicMixin.act(self, states, taken_actions, role) + return DeterministicMixin.act(self, inputs, role) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): if role == "policy": - return self.mean_layer(self.net(states)), self.log_std_parameter + return self.mean_layer(self.net(inputs["states"])), self.log_std_parameter, {} elif role == "value": - return self.value_layer(self.net(states)) + return self.value_layer(self.net(inputs["states"])), {} # Load and wrap the Isaac Gym environment diff --git a/docs/source/examples/isaacgym/trpo_cartpole.py b/docs/source/examples/isaacgym/trpo_cartpole.py index 8b0a76e7..e2e89370 100644 --- a/docs/source/examples/isaacgym/trpo_cartpole.py +++ b/docs/source/examples/isaacgym/trpo_cartpole.py @@ -34,8 +34,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): - return self.net(states), self.log_std_parameter + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} class Value(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -48,8 +48,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ELU(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): - return self.net(states) + def compute(self, inputs, role): + return self.net(inputs["states"]), {} # Load and wrap the Isaac Gym environment From 1d848fa249a7bb98860c19954d7f2d35b48f2696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:36:04 +0100 Subject: [PATCH 055/157] Update Gymnasium example files --- .../examples/gymnasium/gymnasium_cartpole_cem.py | 6 +++--- .../gymnasium/gymnasium_cartpole_cem_eval.py | 6 +++--- .../gymnasium/gymnasium_frozen_lake_q_learning.py | 8 ++++---- .../gymnasium_frozen_lake_q_learning_eval.py | 8 ++++---- .../examples/gymnasium/gymnasium_pendulum_ddpg.py | 12 ++++++------ .../gymnasium/gymnasium_pendulum_ddpg_eval.py | 6 +++--- .../examples/gymnasium/gymnasium_taxi_sarsa.py | 8 ++++---- .../examples/gymnasium/gymnasium_taxi_sarsa_eval.py | 10 +++++----- .../gymnasium_vector_frozen_lake_q_learning.py | 8 ++++---- .../gymnasium/gymnasium_vector_pendulum_ddpg.py | 12 ++++++------ .../gymnasium/gymnasium_vector_taxi_sarsa.py | 8 ++++---- 11 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py b/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py index 4a4bb79e..64d1f872 100644 --- a/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py +++ b/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py @@ -22,10 +22,10 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro self.linear_layer_2 = nn.Linear(64, 64) self.output_layer = nn.Linear(64, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return self.output_layer(x) + return self.output_layer(x), {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py b/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py index 41edad28..39cb9719 100644 --- a/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py +++ b/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py @@ -21,10 +21,10 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro self.linear_layer_2 = nn.Linear(64, 64) self.output_layer = nn.Linear(64, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return self.output_layer(x) + return self.output_layer(x), {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py index b7a8f22e..30d339f5 100644 --- a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py +++ b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py index 466393f5..a07ec805 100644 --- a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py +++ b/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py index 716e50db..41dd7c64 100644 --- a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py +++ b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py @@ -25,10 +25,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 class DeterministicCritic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -39,10 +39,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.linear_layer_3 = nn.Linear(300, 1) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1))) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) x = F.relu(self.linear_layer_2(x)) - return self.linear_layer_3(x) + return self.linear_layer_3(x), {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py index 38df39ef..12eac470 100644 --- a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py +++ b/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py @@ -21,10 +21,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py index 0e133dc5..000eca7b 100644 --- a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py +++ b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py index bb74e58d..3899c8a6 100644 --- a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py +++ b/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py @@ -9,7 +9,7 @@ from skrl.envs.torch import wrap_env -# Define the model (tabular models) for the SARSA agent using a helper class +# Define the model (tabular model) for the SARSA agent using a helper class class EpilonGreedyPolicy(TabularMixin, Model): def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): Model.__init__(self, observation_space, action_space, device) @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py b/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py index 6312c034..a0fe5860 100644 --- a/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py +++ b/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py b/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py index ee5c9892..72107b94 100644 --- a/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py +++ b/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py @@ -25,10 +25,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 class DeterministicCritic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -39,10 +39,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.linear_layer_3 = nn.Linear(300, 1) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1))) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) x = F.relu(self.linear_layer_2(x)) - return self.linear_layer_3(x) + return self.linear_layer_3(x), {} # Load and wrap the Gymnasium environment. diff --git a/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py b/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py index e2e90c97..8ab8bd0e 100644 --- a/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py +++ b/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gymnasium environment. From d0a07fdbf64f289c53c73a10701184dce4e0419e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:38:44 +0100 Subject: [PATCH 056/157] Update Gym example files --- docs/source/examples/gym/gym_cartpole_cem.py | 6 +++--- docs/source/examples/gym/gym_cartpole_cem_eval.py | 6 +++--- .../examples/gym/gym_frozen_lake_q_learning.py | 8 ++++---- .../examples/gym/gym_frozen_lake_q_learning_eval.py | 8 ++++---- docs/source/examples/gym/gym_pendulum_ddpg.py | 12 ++++++------ docs/source/examples/gym/gym_pendulum_ddpg_eval.py | 6 +++--- docs/source/examples/gym/gym_taxi_sarsa.py | 8 ++++---- docs/source/examples/gym/gym_taxi_sarsa_eval.py | 10 +++++----- .../gym/gym_vector_frozen_lake_q_learning.py | 8 ++++---- docs/source/examples/gym/gym_vector_pendulum_ddpg.py | 12 ++++++------ docs/source/examples/gym/gym_vector_taxi_sarsa.py | 8 ++++---- 11 files changed, 46 insertions(+), 46 deletions(-) diff --git a/docs/source/examples/gym/gym_cartpole_cem.py b/docs/source/examples/gym/gym_cartpole_cem.py index b1bba349..96082e71 100644 --- a/docs/source/examples/gym/gym_cartpole_cem.py +++ b/docs/source/examples/gym/gym_cartpole_cem.py @@ -22,10 +22,10 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro self.linear_layer_2 = nn.Linear(64, 64) self.output_layer = nn.Linear(64, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return self.output_layer(x) + return self.output_layer(x), {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_cartpole_cem_eval.py b/docs/source/examples/gym/gym_cartpole_cem_eval.py index ab0b9d5e..e3c33247 100644 --- a/docs/source/examples/gym/gym_cartpole_cem_eval.py +++ b/docs/source/examples/gym/gym_cartpole_cem_eval.py @@ -21,10 +21,10 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro self.linear_layer_2 = nn.Linear(64, 64) self.output_layer = nn.Linear(64, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return self.output_layer(x) + return self.output_layer(x), {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning.py b/docs/source/examples/gym/gym_frozen_lake_q_learning.py index c0289717..881e86bb 100644 --- a/docs/source/examples/gym/gym_frozen_lake_q_learning.py +++ b/docs/source/examples/gym/gym_frozen_lake_q_learning.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py b/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py index 65c7680a..74487c8c 100644 --- a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py +++ b/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_pendulum_ddpg.py b/docs/source/examples/gym/gym_pendulum_ddpg.py index c1d089fd..17ced81a 100644 --- a/docs/source/examples/gym/gym_pendulum_ddpg.py +++ b/docs/source/examples/gym/gym_pendulum_ddpg.py @@ -25,10 +25,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 class DeterministicCritic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -39,10 +39,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.linear_layer_3 = nn.Linear(300, 1) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1))) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) x = F.relu(self.linear_layer_2(x)) - return self.linear_layer_3(x) + return self.linear_layer_3(x), {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py b/docs/source/examples/gym/gym_pendulum_ddpg_eval.py index e87c3283..226f6523 100644 --- a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py +++ b/docs/source/examples/gym/gym_pendulum_ddpg_eval.py @@ -21,10 +21,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_taxi_sarsa.py b/docs/source/examples/gym/gym_taxi_sarsa.py index 28509dd3..f991bc09 100644 --- a/docs/source/examples/gym/gym_taxi_sarsa.py +++ b/docs/source/examples/gym/gym_taxi_sarsa.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_taxi_sarsa_eval.py b/docs/source/examples/gym/gym_taxi_sarsa_eval.py index 39ab5c24..430e3585 100644 --- a/docs/source/examples/gym/gym_taxi_sarsa_eval.py +++ b/docs/source/examples/gym/gym_taxi_sarsa_eval.py @@ -9,7 +9,7 @@ from skrl.envs.torch import wrap_env -# Define the model (tabular models) for the SARSA agent using a helper class +# Define the model (tabular model) for the SARSA agent using a helper class class EpilonGreedyPolicy(TabularMixin, Model): def __init__(self, observation_space, action_space, device, num_envs=1, epsilon=0.1): Model.__init__(self, observation_space, action_space, device) @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py b/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py index d4fe3c3e..a16908f4 100644 --- a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py +++ b/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py b/docs/source/examples/gym/gym_vector_pendulum_ddpg.py index 7572c217..72990c3e 100644 --- a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py +++ b/docs/source/examples/gym/gym_vector_pendulum_ddpg.py @@ -25,10 +25,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)) # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 class DeterministicCritic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -39,10 +39,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.linear_layer_3 = nn.Linear(300, 1) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(torch.cat([states, taken_actions], dim=1))) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) x = F.relu(self.linear_layer_2(x)) - return self.linear_layer_3(x) + return self.linear_layer_3(x), {} # Load and wrap the Gym environment. diff --git a/docs/source/examples/gym/gym_vector_taxi_sarsa.py b/docs/source/examples/gym/gym_vector_taxi_sarsa.py index eb79abcb..a4370569 100644 --- a/docs/source/examples/gym/gym_vector_taxi_sarsa.py +++ b/docs/source/examples/gym/gym_vector_taxi_sarsa.py @@ -19,15 +19,15 @@ def __init__(self, observation_space, action_space, device, num_envs=1, epsilon= self.q_table = torch.ones((num_envs, self.num_observations, self.num_actions), dtype=torch.float32, device=self.device) - def compute(self, states, taken_actions, role): - actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), states], + def compute(self, inputs, role): + actions = torch.argmax(self.q_table[torch.arange(self.num_envs).view(-1, 1), inputs["states"]], dim=-1, keepdim=True).view(-1,1) # choose random actions for exploration according to epsilon - indexes = (torch.rand(states.shape[0], device=self.device) < self.epsilon).nonzero().view(-1) + indexes = (torch.rand(inputs["states"].shape[0], device=self.device) < self.epsilon).nonzero().view(-1) if indexes.numel(): actions[indexes] = torch.randint(self.num_actions, (indexes.numel(), 1), device=self.device) - return actions + return actions, {} # Load and wrap the Gym environment. From 2c434b7a17e48e8db7f50238e92e3905009a1416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 19:40:55 +0100 Subject: [PATCH 057/157] Update DeepMind example files --- .../deepmind/dm_manipulation_stack_sac.py | 32 +++++++++++-------- .../dm_suite_cartpole_swingup_ddpg.py | 10 +++--- 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/docs/source/examples/deepmind/dm_manipulation_stack_sac.py b/docs/source/examples/deepmind/dm_manipulation_stack_sac.py index eb83fde9..2cb943d0 100644 --- a/docs/source/examples/deepmind/dm_manipulation_stack_sac.py +++ b/docs/source/examples/deepmind/dm_manipulation_stack_sac.py @@ -40,7 +40,9 @@ def __init__(self, observation_space, action_space, device, clip_actions=False, self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): + states = inputs["states"] + # The dm_control.manipulation tasks have as observation/state spec a `collections.OrderedDict` object as follows: # OrderedDict([('front_close', BoundedArray(shape=(1, 84, 84, 3), dtype=dtype('uint8'), name='front_close', minimum=0, maximum=255)), # ('jaco_arm/joints_pos', Array(shape=(1, 6, 2), dtype=dtype('float64'), name='jaco_arm/joints_pos')), @@ -64,9 +66,9 @@ def compute(self, states, taken_actions, role): # The `spaces` parameter is a flat tensor of the flattened observation/state space with shape (batch_size, size_of_flat_space). # Using the model's method `tensor_to_space` we can convert the flattened tensor to the original space. # https://skrl.readthedocs.io/en/latest/modules/skrl.models.base_class.html#skrl.models.torch.base.Model.tensor_to_space - input = self.tensor_to_space(states, self.observation_space) + space = self.tensor_to_space(states, self.observation_space) - # For this case, the `input` variable is a Python dictionary with the following structure and shapes: + # For this case, the `space` variable is a Python dictionary with the following structure and shapes: # {'front_close': torch.Tensor(shape=[batch_size, 1, 84, 84, 3], dtype=torch.float32), # 'jaco_arm/jaco_hand/joints_pos': torch.Tensor(shape=[batch_size, 1, 3], dtype=torch.float32) # 'jaco_arm/jaco_hand/joints_vel': torch.Tensor(shape=[batch_size, 1, 3], dtype=torch.float32) @@ -77,11 +79,13 @@ def compute(self, states, taken_actions, role): # 'jaco_arm/joints_vel': torch.Tensor(shape=[batch_size, 1, 6], dtype=torch.float32)} # permute and normalize the images (samples, width, height, channels) -> (samples, channels, width, height) - features = self.features_extractor(input['front_close'][:,0].permute(0, 3, 1, 2) / 255.0) + features = self.features_extractor(space['front_close'][:,0].permute(0, 3, 1, 2) / 255.0) + + mean_actions = torch.tanh(self.net(torch.cat([features, + space["jaco_arm/joints_pos"].view(states.shape[0], -1), + space["jaco_arm/joints_vel"].view(states.shape[0], -1)], dim=-1))) - return torch.tanh(self.net(torch.cat([features, - input["jaco_arm/joints_pos"].view(states.shape[0], -1), - input["jaco_arm/joints_vel"].view(states.shape[0], -1)], dim=-1))), self.log_std_parameter + return mean_actions, self.log_std_parameter, {} class Critic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -106,18 +110,20 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ReLU(), nn.Linear(32, 1)) - def compute(self, states, taken_actions, role): + def compute(self, inputs, role): + states = inputs["states"] + # map the observations/states to the original space. # See the explanation above (StochasticActor.compute) - input = self.tensor_to_space(states, self.observation_space) + space = self.tensor_to_space(states, self.observation_space) # permute and normalize the images (samples, width, height, channels) -> (samples, channels, width, height) - features = self.features_extractor(input['front_close'][:,0].permute(0, 3, 1, 2) / 255.0) + features = self.features_extractor(space['front_close'][:,0].permute(0, 3, 1, 2) / 255.0) return self.net(torch.cat([features, - input["jaco_arm/joints_pos"].view(states.shape[0], -1), - input["jaco_arm/joints_vel"].view(states.shape[0], -1), - taken_actions], dim=-1)) + space["jaco_arm/joints_pos"].view(states.shape[0], -1), + space["jaco_arm/joints_vel"].view(states.shape[0], -1), + inputs["taken_actions"]], dim=-1)), {} # Load and wrap the DeepMind environment diff --git a/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py b/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py index 95e35900..1e498c20 100644 --- a/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py +++ b/docs/source/examples/deepmind/dm_suite_cartpole_swingup_ddpg.py @@ -26,10 +26,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): self.linear_layer_2 = nn.Linear(400, 300) self.action_layer = nn.Linear(300, self.num_actions) - def compute(self, states, taken_actions, role): - x = F.relu(self.linear_layer_1(states)) + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return torch.tanh(self.action_layer(x)) + return torch.tanh(self.action_layer(x)), {} class DeterministicCritic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): @@ -42,8 +42,8 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): nn.ReLU(), nn.Linear(300, 1)) - def compute(self, states, taken_actions, role): - return self.net(torch.cat([states, taken_actions], dim=1)) + def compute(self, inputs, role): + return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {} # Load and wrap the DeepMind environment From e8ac14baaf2ab43546138c724c98f0e355d2619b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 20:04:32 +0100 Subject: [PATCH 058/157] Update DeepMind example in docs --- docs/source/intro/examples.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index fc010640..b2723691 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -279,7 +279,7 @@ The following components or practices are exemplified (highlighted): .. literalinclude:: ../examples/deepmind/dm_manipulation_stack_sac.py :language: python - :emphasize-lines: 67, 80, 83-84, 112, 115, 118-119 + :emphasize-lines: 69, 82, 85-86, 118, 121, 124-125 .. raw:: html From de27ed4dd8d91b729c815b6d30cdfb3c7b10615e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 20:10:03 +0100 Subject: [PATCH 059/157] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a884847f..449860f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Support for Gymnasium interface ### Changed +- Forward model inputs as a Python dictionary [**breaking change**] +- Returns a Python dictionary with extra output values in model calls [**breaking change**] - Adopt the implementation of `terminated` and `truncated` over `done` for all environments ### Fixed From 7f1b80f23d0d5eaa6a2006e244fb595711df2bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 6 Nov 2022 20:15:19 +0100 Subject: [PATCH 060/157] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c4ae3499..88da777e 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@

SKRL - Reinforcement Learning library


-**skrl** is an open-source modular library for Reinforcement Learning written in Python (using [PyTorch](https://pytorch.org/)) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the [OpenAI Gym](https://www.gymlibrary.dev) and [DeepMind](https://github.com/deepmind/dm_env) environment interfaces, it allows loading and configuring [NVIDIA Isaac Gym](https://developer.nvidia.com/isaac-gym/) and [NVIDIA Omniverse Isaac Gym](https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html) environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run +**skrl** is an open-source modular library for Reinforcement Learning written in Python (using [PyTorch](https://pytorch.org/)) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the OpenAI [Gym](https://www.gymlibrary.dev) / Farama [Gymnasium](https://gymnasium.farama.org) and [DeepMind](https://github.com/deepmind/dm_env) environment interfaces, it allows loading and configuring [NVIDIA Isaac Gym](https://developer.nvidia.com/isaac-gym/) and [NVIDIA Omniverse Isaac Gym](https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/tutorial_gym_isaac_gym.html) environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run
From f3f771660be75835aab1fc66f92630be6a048400 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 10 Nov 2022 22:44:43 +0100 Subject: [PATCH 061/157] Add RNN implementation for PPO agent --- skrl/agents/torch/ppo/ppo.py | 100 +++++++++++++++++++++++++++++------ 1 file changed, 85 insertions(+), 15 deletions(-) diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 620481a1..152fc069 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -179,7 +179,38 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self.tensors_names = ["states", "actions", "rewards", "terminated", "log_prob", "values", "returns", "advantages"] + # tensors sampled during training + self._tensors_names = ["states", "actions", "log_prob", "values", "returns", "advantages"] + + # RNN specifications + self._rnn = False # flag to indicate whether RNN is available + self._rnn_tensors_names = [] # used for sampling during training + self._rnn_final_states = {"policy": [], "value": []} + self._rnn_initial_states = {"policy": [], "value": []} + + # policy + for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_policy_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_policy_{i}") + # default RNN states + self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) + + # value + if self.value is not None: + if self.policy is self.value: + self._rnn_initial_states["value"] = self._rnn_initial_states["policy"] + else: + for i, size in enumerate(self.value.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_value_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_value_{i}") + # default RNN states + self._rnn_initial_states["value"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) # create temporary variables needed for storage and computation self._current_log_prob = None @@ -198,17 +229,20 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens :return: Actions :rtype: torch.Tensor """ - states = self._state_preprocessor(states) + rnn = {"rnn": self._rnn_initial_states["policy"]} if self._rnn else {} # sample random actions - # TODO, check for stochasticity + # TODO: fix for stochasticity, rnn and log_prob if timestep < self._random_timesteps: - return self.policy.random_act({"states": states}, role="policy") + return self.policy.random_act({"states": self._state_preprocessor(states), **rnn}, role="policy") # sample stochastic actions - actions, log_prob, outputs = self.policy.act({"states": states}, role="policy") + actions, log_prob, outputs = self.policy.act({"states": self._state_preprocessor(states), **rnn}, role="policy") self._current_log_prob = log_prob + if self._rnn: + self._rnn_final_states["policy"] = outputs.get("rnn", []) + return actions, log_prob, outputs def record_transition(self, @@ -251,15 +285,39 @@ def record_transition(self, if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - with torch.no_grad(): - values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") + # compute values + rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} + values, _, outputs = self.value.act({"states": self._state_preprocessor(states), **rnn}, role="value") values = self._value_preprocessor(values, inverse=True) + # package RNN states + rnn_states = {} + if self._rnn: + rnn_states.update({f"rnn_policy_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["policy"])}) + if self.policy is not self.value: + rnn_states.update({f"rnn_value_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["value"])}) + + # storage transition in memory self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, **rnn_states) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, **rnn_states) + + # update RNN states + if self._rnn: + self._rnn_final_states["value"] = self._rnn_final_states["policy"] if self.policy is self.value else outputs.get("rnn", []) + + # reset states if the episodes have ended + finished_episodes = terminated.nonzero(as_tuple=False) + if finished_episodes.numel(): + for rnn_state in self._rnn_final_states["policy"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + if self.policy is not self.value: + for rnn_state in self._rnn_final_states["value"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + + self._rnn_initial_states = self._rnn_final_states def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -339,7 +397,8 @@ def compute_gae(rewards: torch.Tensor, # compute returns and advantages with torch.no_grad(): - last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float())}, role="value") + rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} + last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float()), **rnn}, role="value") last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") @@ -355,7 +414,10 @@ def compute_gae(rewards: torch.Tensor, self.memory.set_tensor_by_name("advantages", advantages) # sample mini-batches from memory - sampled_batches = self.memory.sample_all(names=self.tensors_names, mini_batches=self._mini_batches) + sampled_batches = self.memory.sample_all(names=self._tensors_names, mini_batches=self._mini_batches) + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches) + + rnn_policy, rnn_value = {}, {} cumulative_policy_loss = 0 cumulative_entropy_loss = 0 @@ -366,12 +428,20 @@ def compute_gae(rewards: torch.Tensor, kl_divergences = [] # mini-batches loop - for sampled_states, sampled_actions, _, _, sampled_log_prob, sampled_values, sampled_returns, sampled_advantages \ - in sampled_batches: + for i, (sampled_states, sampled_actions, sampled_log_prob, sampled_values, sampled_returns, sampled_advantages) \ + in enumerate(sampled_batches): + + if self._rnn: + if self.policy is self.value: + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]]} + rnn_value = rnn_policy + else: + rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "policy" in n]} + rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n]} sampled_states = self._state_preprocessor(sampled_states, train=not epoch) - _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions}, role="policy") + _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="policy") # compute aproximate KL divergence with torch.no_grad(): @@ -397,7 +467,7 @@ def compute_gae(rewards: torch.Tensor, policy_loss = -torch.min(surrogate, surrogate_clipped).mean() # compute value loss - predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states, **rnn_value}, role="value") if self._clip_predicted_values: predicted_values = sampled_values + torch.clip(predicted_values - sampled_values, From 41e89ee6f79eb94f19250105c75f8f7c25f856d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Thu, 10 Nov 2022 22:55:01 +0100 Subject: [PATCH 062/157] Call Model.act method via .forward method --- skrl/models/torch/base.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index 73f02a13..89dbeb43 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -288,12 +288,22 @@ def _update_weights(module, method_name, args, kwargs): _update_weights(self.children(), method_name, args, kwargs) - def forward(self): + def forward(self, *args, **kwargs) -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Forward pass of the model - :raises NotImplementedError: Child class must ``.act()`` and ``.compute()`` methods + This method calls the ``.act()`` method and returns its result + + :param args: Positional arguments passed to the called method + :type args: tuple, optional + :param kwargs: Key-value arguments passed to the called method + :type kwargs: dict, optional + + :return: Model output. The first component is the action to be taken by the agent. + The second component is the log of the probability density function for stochastic models + or None for deterministic models. The third component is a dictionary containing extra output values + :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary """ - raise NotImplementedError("Implement .act() and .compute() methods instead of this") + return self.act(*args, **kwargs) def compute(self, inputs: Mapping[str, Union[torch.Tensor, Any]], From ad31ff212d8eff9355a3c6c14aa31f0ac47ec4a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 11 Nov 2022 10:46:36 +0100 Subject: [PATCH 063/157] Add default method for getting model specification --- skrl/models/torch/base.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index 89dbeb43..ad00a2c0 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -288,10 +288,34 @@ def _update_weights(module, method_name, args, kwargs): _update_weights(self.children(), method_name, args, kwargs) + def get_specification(self) -> Mapping[str, Any]: + """Returns the specification of the model + + The following keys are used by the agents for initialization: + + - ``"rnn"``: Recurrent Neural Network (RNN) specification for RNN, LSTM and GRU layers/cells + + - ``"sizes"``: List of RNN shapes (number of layers, number of environments, number of features in the RNN state). + There must be as many tuples as there are states in the recurrent layer/cell. E.g., LSTM has 2 states (hidden and cell). + + :return: Dictionary containing advanced specification of the model + :rtype: dict + + Example:: + + # model with a LSTM layer. + # - number of layers: 1 + # - number of environments: 4 + # - number of features in the RNN state: 64 + >>> model.get_specification() + {'rnn': {'sizes': [(1, 4, 64), (1, 4, 64)]}} + """ + return {} + def forward(self, *args, **kwargs) -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Forward pass of the model - This method calls the ``.act()`` method and returns its result + This method calls the ``.act()`` method and returns its outputs :param args: Positional arguments passed to the called method :type args: tuple, optional From 5f7ebe98fe0bd100fd0b8f58244e414640a5ce2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 11 Nov 2022 10:58:16 +0100 Subject: [PATCH 064/157] Remove the definition of common memory tensors for all environments --- skrl/memories/torch/base.py | 40 +++++++++++++------------------------ 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index a7337002..bce8b307 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -56,7 +56,6 @@ def __init__(self, self.tensors = {} self.tensors_view = {} self.tensors_keep_dimensions = {} - self.tensors_common_for_all_envs = {} # exporting data self.export = export @@ -164,8 +163,7 @@ def create_tensor(self, name: str, size: Union[int, Tuple[int], gym.Space, gymnasium.Space], dtype: Optional[torch.dtype] = None, - keep_dimensions: bool = False, - common_for_all_envs: bool = False) -> bool: + keep_dimensions: bool = False) -> bool: """Create a new internal tensor in memory The tensor will have a 3-components shape (memory size, number of environments, size). @@ -181,9 +179,6 @@ def create_tensor(self, :type dtype: torch.dtype or None, optional :param keep_dimensions: Whether or not to keep the dimensions defined through the size parameter (default: False) :type keep_dimensions: bool - :param common_for_all_envs: Whether or not the tensor should be defined for all environments (default: False). - If True, the 2nd tensor dimension (number of environment) will be suppressed - :type common_for_all_all: bool :raises ValueError: The tensor name exists already but the size or dtype are different @@ -201,10 +196,7 @@ def create_tensor(self, raise ValueError("The dtype of the tensor {} ({}) doesn't match the existing one ({})".format(name, dtype, tensor.dtype)) return False # define tensor shape - if common_for_all_envs: - tensor_shape = (self.memory_size, *size) if keep_dimensions else (self.memory_size, size) - else: - tensor_shape = (self.memory_size, self.num_envs, *size) if keep_dimensions else (self.memory_size, self.num_envs, size) + tensor_shape = (self.memory_size, self.num_envs, *size) if keep_dimensions else (self.memory_size, self.num_envs, size) view_shape = (-1, *size) if keep_dimensions else (-1, size) # create tensor (_tensor_) and add it to the internal storage setattr(self, "_tensor_{}".format(name), torch.zeros(tensor_shape, device=self.device, dtype=dtype)) @@ -212,7 +204,6 @@ def create_tensor(self, self.tensors[name] = getattr(self, "_tensor_{}".format(name)) self.tensors_view[name] = self.tensors[name].view(*view_shape) self.tensors_keep_dimensions[name] = keep_dimensions - self.tensors_common_for_all_envs[name] = common_for_all_envs # fill the tensors (float tensors) with NaN for tensor in self.tensors.values(): if torch.is_floating_point(tensor): @@ -261,21 +252,21 @@ def add_samples(self, **tensors: torch.Tensor) -> None: raise ValueError("No samples to be recorded in memory. Pass samples as key-value arguments (where key is the tensor name)") # dimensions and shapes of the tensors (assume all tensors have the dimensions of the first tensor) - tmp = tensors[next(iter(tensors))] + tmp = tensors.get("states", tensors[next(iter(tensors))]) # ask for states first dim, shape = tmp.ndim, tmp.shape - # multi environment (number of environments less than num_envs) - if dim == 2 and shape[0] < self.num_envs: - for name, tensor in tensors.items(): - if name in self.tensors: - self.tensors[name][self.memory_index, self.env_index:self.env_index + tensor.shape[0]].copy_(tensor) - self.env_index += tensor.shape[0] # multi environment (number of environments equals num_envs) - elif dim == 2 and shape[0] == self.num_envs: + if dim == 2 and shape[0] == self.num_envs: for name, tensor in tensors.items(): if name in self.tensors: self.tensors[name][self.memory_index].copy_(tensor) self.memory_index += 1 + # multi environment (number of environments less than num_envs) + elif dim == 2 and shape[0] < self.num_envs: + for name, tensor in tensors.items(): + if name in self.tensors: + self.tensors[name][self.memory_index, self.env_index:self.env_index + tensor.shape[0]].copy_(tensor) + self.env_index += tensor.shape[0] # single environment - multi sample (number of environments greater than num_envs (num_envs = 1)) elif dim == 2 and self.num_envs == 1: for name, tensor in tensors.items(): @@ -360,12 +351,9 @@ def sample_all(self, names: Tuple[str], mini_batches: int = 1) -> List[List[torc :rtype: list of torch.Tensor list """ if mini_batches > 1: - indexes_0 = np.arange(self.memory_size) # common for all environments - indexes_1 = np.arange(self.memory_size * self.num_envs) # per each environment - batches_0 = BatchSampler(indexes_0, batch_size=len(indexes_0) // mini_batches, drop_last=True) - batches_1 = BatchSampler(indexes_1, batch_size=len(indexes_1) // mini_batches, drop_last=True) - return [[self.tensors_view[name][b0 if self.tensors_common_for_all_envs[name] else b1] for name in names] \ - for b0, b1 in zip(batches_0, batches_1)] + indexes = np.arange(self.memory_size * self.num_envs) + batches = BatchSampler(indexes, batch_size=len(indexes) // mini_batches, drop_last=True) + return [[self.tensors_view[name][batch] for name in names] for batch in batches] return [[self.tensors_view[name] for name in names]] def save(self, directory: str = "", format: str = "pt") -> None: @@ -399,7 +387,7 @@ def save(self, directory: str = "", format: str = "pt") -> None: np.savez(memory_path, **{name: self.tensors[name].cpu().numpy() for name in self.get_tensor_names()}) # comma-separated values elif format == "csv": - # open csv writer + # open csv writer # TODO: support keeping the dimensions with open(memory_path, "a") as file: writer = csv.writer(file) names = self.get_tensor_names() From 6cf1685a8e042a354371a136143d6f729cfd8f33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 12 Nov 2022 14:50:54 +0100 Subject: [PATCH 065/157] Update SVG images --- docs/source/_static/imgs/manual_trainer.svg | 2 +- docs/source/_static/imgs/model_categorical.svg | 2 +- docs/source/_static/imgs/model_deterministic.svg | 2 +- docs/source/_static/imgs/model_gaussian.svg | 2 +- docs/source/_static/imgs/model_multivariate_gaussian.svg | 2 +- docs/source/_static/imgs/parallel_trainer.svg | 2 +- docs/source/_static/imgs/sequential_trainer.svg | 2 +- docs/source/_static/imgs/wrapping.svg | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) mode change 100644 => 100755 docs/source/_static/imgs/model_categorical.svg mode change 100644 => 100755 docs/source/_static/imgs/model_deterministic.svg mode change 100644 => 100755 docs/source/_static/imgs/model_multivariate_gaussian.svg mode change 100644 => 100755 docs/source/_static/imgs/parallel_trainer.svg mode change 100644 => 100755 docs/source/_static/imgs/sequential_trainer.svg diff --git a/docs/source/_static/imgs/manual_trainer.svg b/docs/source/_static/imgs/manual_trainer.svg index a7218f5d..b2ecb545 100755 --- a/docs/source/_static/imgs/manual_trainer.svg +++ b/docs/source/_static/imgs/manual_trainer.svg @@ -1 +1 @@ -step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions. . .. . ...............................Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)training / evaluationiteration +step environmentsenv.step(…)a0ana0ana0an.........a0an...renderenv.render(…)agent.act(…)record transitionscompute actions.........Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)training / evaluationiteration𝑖𝑛𝑓𝑜𝒕𝒕+𝟏𝑻𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrTTTT. . .. . ......................tttt. . .agent.record_transitions(…) diff --git a/docs/source/_static/imgs/model_categorical.svg b/docs/source/_static/imgs/model_categorical.svg old mode 100644 new mode 100755 index d25f4c53..19f8b106 --- a/docs/source/_static/imgs/model_categorical.svg +++ b/docs/source/_static/imgs/model_categorical.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)unnormalized_log_problog probabilities(logits)probabilities(probs)categoricaldistribution1 2 3 … n𝑃(𝑥)actions (𝒂𝒕+𝟏)log probevaluated at𝑎𝑡+1networkoutput +inputhiddenoutput.compute(…)inputs (𝒔𝒕)unnormalized_log_problog probabilities(logits)probabilities(probs)categoricaldistribution1 2 3 … n𝑃(𝑥)actions (𝒂𝒕+𝟏)log probevaluated at𝑎𝑡+1outputs.act(…) /.forward(…) diff --git a/docs/source/_static/imgs/model_deterministic.svg b/docs/source/_static/imgs/model_deterministic.svg old mode 100644 new mode 100755 index 5b69af0a..182e3023 --- a/docs/source/_static/imgs/model_deterministic.svg +++ b/docs/source/_static/imgs/model_deterministic.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)actions (𝒂𝒕+𝟏)NoneNoneclip_actions +inputhiddenoutput.compute(…)actions (𝒂𝒕+𝟏)Noneoutputsclip_actionsinputs (𝒔𝒕)actions (𝒂𝒕+𝟏)Noneoutputs.act(…) /.forward(…) diff --git a/docs/source/_static/imgs/model_gaussian.svg b/docs/source/_static/imgs/model_gaussian.svg index 475293ae..d4cfe41d 100755 --- a/docs/source/_static/imgs/model_gaussian.svg +++ b/docs/source/_static/imgs/model_gaussian.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)gaussiandistribution𝒩(𝜇,𝜎)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1mean actions (𝒂𝒕+𝟏)paramclip_log_stdclip_actionsreduction +inputhiddenoutput.compute(…)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)gaussiandistribution𝒩(𝜇,𝜎)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1outputsparamclip_log_stdclip_actionsreductioninputs (𝒔𝒕).act(…) /.forward(…)actions (𝒂𝒕+𝟏)log probevaluated at𝑎𝑡+1outputs diff --git a/docs/source/_static/imgs/model_multivariate_gaussian.svg b/docs/source/_static/imgs/model_multivariate_gaussian.svg old mode 100644 new mode 100755 index 043753f1..3647f3e2 --- a/docs/source/_static/imgs/model_multivariate_gaussian.svg +++ b/docs/source/_static/imgs/model_multivariate_gaussian.svg @@ -1 +1 @@ -inputhiddenoutput.compute(…)states (𝒔𝒕)with or withoutactions (𝒂𝒕)log standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)multivariategaussian distribution𝒩(𝜇,𝛴)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1mean actions (𝒂𝒕+𝟏)paramclip_log_stdclip_actions +inputhiddenoutputlog standarddeviations(𝑙𝑜𝑔(𝜎))mean actions(𝑎𝑡+1)multivariategaussian distribution𝒩(𝜇,𝛴)actions (𝒂𝒕+𝟏)log prob evaluated at𝑎𝑡+1outputsparamclip_log_stdclip_actions.compute(…)inputs (𝒔𝒕).act(…) /.forward(…)actions (𝒂𝒕+𝟏)log probevaluated at𝑎𝑡+1outputs diff --git a/docs/source/_static/imgs/parallel_trainer.svg b/docs/source/_static/imgs/parallel_trainer.svg old mode 100644 new mode 100755 index 26c6a8db..d950d8b0 --- a/docs/source/_static/imgs/parallel_trainer.svg +++ b/docs/source/_static/imgs/parallel_trainer.svg @@ -1 +1 @@ -step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.post_interaction(…)agent.pre_interaction(…)agent.act(…)record transitionspost-interactionpre-interactioncompute actionsA0A1A2Ambarrier. . .. . .. . ...............................Execute each agent method in a different process and control synchronization through a multiprocessing.Barrierobjectscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏𝒔𝒕agentenvresetenv.reset(…)init +step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrTTTTrenderenv.render(…)agent.record_transitions(…)agent.post_interaction(…)agent.pre_interaction(…)agent.act(…)record transitionspost-interactionpre-interactioncompute actionsA0A1A2Ambarrier. . .. . .. . ..........Execute each agent method in a different process and control synchronization through a multiprocessing.Barrierobjectscope0scope1scope2scope3scopem𝒂𝒕𝒔𝒕agentenvresetenv.reset(…)init𝑖𝑛𝑓𝑜𝒕𝒕+𝟏𝑻𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏.....................tttt. . . diff --git a/docs/source/_static/imgs/sequential_trainer.svg b/docs/source/_static/imgs/sequential_trainer.svg old mode 100644 new mode 100755 index cce68bb5..7565f44a --- a/docs/source/_static/imgs/sequential_trainer.svg +++ b/docs/source/_static/imgs/sequential_trainer.svg @@ -1 +1 @@ -step environmentsenv.step(…)a0ana0ana0an.........a0an...scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrddddrenderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions. . .. . ...............................Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕𝒅𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)init +step environmentsenv.step(…)a0ana0ana0an.........a0an...renderenv.render(…)agent.record_transitions(…)agent.act(…)record transitionscompute actions.........Execute each agent method sequentially (one agent after the other) in the same processscope0scope1scope2scope3scopem𝒂𝒕A1A2A3. . .Amagentenvagent.post_interaction(…)agent.pre_interaction(…)post-interactionpre-interaction𝒔𝒕resetenv.reset(…)init𝑖𝑛𝑓𝑜𝒕𝒕+𝟏𝑻𝒕+𝟏𝒓𝒕+𝟏𝒔𝒕+𝟏scope0scope1scope2scope3scopemrs0sns0sns0sns0snrrrTTTT. . .. . ......................tttt. . . diff --git a/docs/source/_static/imgs/wrapping.svg b/docs/source/_static/imgs/wrapping.svg index 694f8c2f..cd8605c0 100755 --- a/docs/source/_static/imgs/wrapping.svg +++ b/docs/source/_static/imgs/wrapping.svg @@ -1 +1 @@ -DeepMindIsaac GymGymnum_envs: intdevice: ML framework specific devicestate_space: gym.Spaceobservation_space: gym.Spaceaction_space: gym.Spacereset()-> observationsstep(actions)-> observations, rewards, dones, infosrender()close()propertiesmethodswrap_env()Omniverse +DeepMindOmniverseIsaac GymGymnum_envs: intdevice: ML framework specific devicestate_space: gym/gymnasiumspaceobservation_space: gym/gymnasiumspaceaction_space: gym/gymnasiumspacereset()-> states, infosstep(actions)-> states, rewards, terminated, truncated, infosrender()close()propertiesmethodswrap_env()Isaac GymGymnasium From 889ec28d64f1fa4ff417d5316da554476ce73a9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 13 Nov 2022 22:30:17 +0100 Subject: [PATCH 066/157] Allow to disable progressbar during training/evaluation --- skrl/trainers/torch/base.py | 5 +++-- skrl/trainers/torch/manual.py | 9 +++++---- skrl/trainers/torch/parallel.py | 9 +++++---- skrl/trainers/torch/sequential.py | 9 +++++---- 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index f17dd9de..a16a0153 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -55,6 +55,7 @@ def __init__(self, # get configuration self.timesteps = self.cfg.get("timesteps", 0) self.headless = self.cfg.get("headless", False) + self.disable_progressbar = self.cfg.get("disable_progressbar", False) self.initial_timestep = 0 @@ -153,7 +154,7 @@ def single_agent_train(self) -> None: # reset env states, infos = self.env.reset() - for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): + for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar): # pre-interaction self.agents.pre_interaction(timestep=timestep, timesteps=self.timesteps) @@ -209,7 +210,7 @@ def single_agent_eval(self) -> None: # reset env states, infos = self.env.reset() - for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): + for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar): # compute actions with torch.no_grad(): diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 7dca355e..6776964f 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -13,8 +13,9 @@ MANUAL_TRAINER_DEFAULT_CONFIG = { - "timesteps": 100000, # number of timesteps to train for - "headless": False, # whether to use headless mode (no rendering) + "timesteps": 100000, # number of timesteps to train for + "headless": False, # whether to use headless mode (no rendering) + "disable_progressbar": False, # whether to disable the progressbar. If None, disable on non-TTY } @@ -76,7 +77,7 @@ def train(self, timestep: int, timesteps: Optional[int] = None) -> None: timesteps = self.timesteps if timesteps is None else timesteps if self._progress is None: - self._progress = tqdm.tqdm(total=timesteps) + self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar) self._progress.update(n=1) # set running mode @@ -176,7 +177,7 @@ def eval(self, timestep: int, timesteps: Optional[int] = None) -> None: timesteps = self.timesteps if timesteps is None else timesteps if self._progress is None: - self._progress = tqdm.tqdm(total=timesteps) + self._progress = tqdm.tqdm(total=timesteps, disable=self.disable_progressbar) self._progress.update(n=1) # set running mode diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index d33dcde9..07917e05 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -13,8 +13,9 @@ PARALLEL_TRAINER_DEFAULT_CONFIG = { - "timesteps": 100000, # number of timesteps to train for - "headless": False, # whether to use headless mode (no rendering) + "timesteps": 100000, # number of timesteps to train for + "headless": False, # whether to use headless mode (no rendering) + "disable_progressbar": False, # whether to disable the progressbar. If None, disable on non-TTY } @@ -195,7 +196,7 @@ def train(self) -> None: if not states.is_cuda: states.share_memory_() - for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): + for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar): # pre-interaction for pipe in producer_pipes: @@ -330,7 +331,7 @@ def eval(self) -> None: if not states.is_cuda: states.share_memory_() - for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): + for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar): # compute actions with torch.no_grad(): diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index b61d2f21..009a9778 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -12,8 +12,9 @@ SEQUENTIAL_TRAINER_DEFAULT_CONFIG = { - "timesteps": 100000, # number of timesteps to train for - "headless": False, # whether to use headless mode (no rendering) + "timesteps": 100000, # number of timesteps to train for + "headless": False, # whether to use headless mode (no rendering) + "disable_progressbar": False, # whether to disable the progressbar. If None, disable on non-TTY } @@ -77,7 +78,7 @@ def train(self) -> None: # reset env states, infos = self.env.reset() - for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): + for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar): # pre-interaction for agent in self.agents: @@ -147,7 +148,7 @@ def eval(self) -> None: # reset env states, infos = self.env.reset() - for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps)): + for timestep in tqdm.tqdm(range(self.initial_timestep, self.timesteps), disable=self.disable_progressbar): # compute actions with torch.no_grad(): From f4a8509dcec4a446dbb3bf95ca64c3972f51ada6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 13 Nov 2022 22:38:31 +0100 Subject: [PATCH 067/157] Update trainers' configuration in docs --- docs/source/modules/skrl.trainers.manual.rst | 2 +- docs/source/modules/skrl.trainers.parallel.rst | 2 +- docs/source/modules/skrl.trainers.sequential.rst | 2 +- docs/source/snippets/trainer.py | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/modules/skrl.trainers.manual.rst b/docs/source/modules/skrl.trainers.manual.rst index b6a60e21..320827d0 100644 --- a/docs/source/modules/skrl.trainers.manual.rst +++ b/docs/source/modules/skrl.trainers.manual.rst @@ -29,7 +29,7 @@ Configuration .. literalinclude:: ../../../skrl/trainers/torch/manual.py :language: python - :lines: 14-17 + :lines: 14-18 :linenos: API diff --git a/docs/source/modules/skrl.trainers.parallel.rst b/docs/source/modules/skrl.trainers.parallel.rst index 489875d8..843a489e 100644 --- a/docs/source/modules/skrl.trainers.parallel.rst +++ b/docs/source/modules/skrl.trainers.parallel.rst @@ -37,7 +37,7 @@ Configuration .. literalinclude:: ../../../skrl/trainers/torch/parallel.py :language: python - :lines: 15-18 + :lines: 15-19 :linenos: API diff --git a/docs/source/modules/skrl.trainers.sequential.rst b/docs/source/modules/skrl.trainers.sequential.rst index 5f847508..3807ea3e 100644 --- a/docs/source/modules/skrl.trainers.sequential.rst +++ b/docs/source/modules/skrl.trainers.sequential.rst @@ -29,7 +29,7 @@ Configuration .. literalinclude:: ../../../skrl/trainers/torch/sequential.py :language: python - :lines: 14-17 + :lines: 14-18 :linenos: API diff --git a/docs/source/snippets/trainer.py b/docs/source/snippets/trainer.py index b4555acc..d4e28a60 100644 --- a/docs/source/snippets/trainer.py +++ b/docs/source/snippets/trainer.py @@ -10,8 +10,9 @@ CUSTOM_DEFAULT_CONFIG = { - "timesteps": 100000, # number of timesteps to train for - "headless": False, # whether to use headless mode (no rendering) + "timesteps": 100000, # number of timesteps to train for + "headless": False, # whether to use headless mode (no rendering) + "disable_progressbar": False, # whether to disable the progressbar. If None, disable on non-TTY } From d8adb7536cdfb2d4452abb5de68a91a7ac1070f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 13 Nov 2022 22:39:22 +0100 Subject: [PATCH 068/157] Remove unused import statements --- skrl/trainers/torch/manual.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index 6776964f..b515f9f1 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -1,4 +1,3 @@ -from distutils.log import info from typing import Union, List, Optional import copy From e2ab9e3fa71703d05084e24150ccb784ec5ad1f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 14 Nov 2022 13:12:38 +0100 Subject: [PATCH 069/157] Add RNN implementation for DDPG agent --- skrl/agents/torch/ddpg/ddpg.py | 82 ++++++++++++++++++++++++++-------- 1 file changed, 63 insertions(+), 19 deletions(-) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 586b7172..b2d200a6 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -166,7 +166,29 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] + self._tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] + + # RNN specifications + self._rnn = False # flag to indicate whether RNN is available + self._rnn_tensors_names = [] # used for sampling during training + self._rnn_final_states = {"policy": []} + self._rnn_initial_states = {"policy": []} + self._rnn_sequence_length = self.policy.get_specification().get("rnn", {}).get("sequence_length", 1) + + # policy + for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_policy_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_policy_{i}") + # default RNN states + self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) + + # critic + if self.critic is not None: + for i, size in enumerate(self.critic.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True # clip noise bounds self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device) @@ -188,19 +210,22 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens :return: Actions :rtype: torch.Tensor """ - states = self._state_preprocessor(states) + rnn = {"rnn": self._rnn_initial_states["policy"]} if self._rnn else {} # sample random actions if timestep < self._random_timesteps: - return self.policy.random_act({"states": states}, role="policy") + return self.policy.random_act({"states": self._state_preprocessor(states), **rnn}, role="policy") # sample deterministic actions - actions = self.policy.act({"states": states}, role="policy") + actions, _, outputs = self.policy.act({"states": self._state_preprocessor(states), **rnn}, role="policy") + + if self._rnn: + self._rnn_final_states["policy"] = outputs.get("rnn", []) # add exloration noise if self._exploration_noise is not None: # sample noises - noises = self._exploration_noise.sample(actions[0].shape) + noises = self._exploration_noise.sample(actions.shape) # define exploration timesteps scale = self._exploration_final_scale @@ -215,13 +240,11 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens noises.mul_(scale) # modify actions - actions[0].add_(noises) + actions.add_(noises) if self._backward_compatibility: - actions = (torch.max(torch.min(actions[0], self.clip_actions_max), self.clip_actions_min), - actions[1], - actions[2]) + actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min) else: - actions[0].clamp_(min=self.clip_actions_min, max=self.clip_actions_max) + actions.clamp_(min=self.clip_actions_min, max=self.clip_actions_max) # record noises self.track_data("Exploration / Exploration noise (max)", torch.max(noises).item()) @@ -234,7 +257,7 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens self.track_data("Exploration / Exploration noise (min)", 0) self.track_data("Exploration / Exploration noise (mean)", 0) - return actions + return actions, None, outputs def record_transition(self, states: torch.Tensor, @@ -274,11 +297,27 @@ def record_transition(self, if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) + # package RNN states + rnn_states = {} + if self._rnn: + rnn_states.update({f"rnn_policy_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["policy"])}) + + # storage transition in memory self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated) + terminated=terminated, truncated=truncated, **rnn_states) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated) + terminated=terminated, truncated=truncated, **rnn_states) + + # update RNN states + if self._rnn: + # reset states if the episodes have ended + finished_episodes = terminated.nonzero(as_tuple=False) + if finished_episodes.numel(): + for rnn_state in self._rnn_final_states["policy"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + + self._rnn_initial_states = self._rnn_final_states def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -314,7 +353,12 @@ def _update(self, timestep: int, timesteps: int) -> None: """ # sample a batch from memory sampled_states, sampled_actions, sampled_rewards, sampled_next_states, sampled_dones = \ - self.memory.sample(names=self.tensors_names, batch_size=self._batch_size)[0] + self.memory.sample(names=self._tensors_names, batch_size=self._batch_size, sequence_length=self._rnn_sequence_length)[0] + + rnn_policy = {} + if self._rnn: + sampled_rnn = self.memory.sample_by_index(names=self._rnn_tensors_names, indexes=self.memory.get_sampling_indexes())[0] + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn]} # gradient steps for gradient_step in range(self._gradient_steps): @@ -324,13 +368,13 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): - next_actions, _, _ = self.target_policy.act({"states": sampled_next_states}, role="target_policy") + next_actions, _, _ = self.target_policy.act({"states": sampled_next_states, **rnn_policy}, role="target_policy") - target_q_values, _, _ = self.target_critic.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic") + target_q_values, _, _ = self.target_critic.act({"states": sampled_next_states, "taken_actions": next_actions, **rnn_policy}, role="target_critic") target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss - critic_values, _, _ = self.critic.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic") + critic_values, _, _ = self.critic.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="critic") critic_loss = F.mse_loss(critic_values, target_values) @@ -340,8 +384,8 @@ def _update(self, timestep: int, timesteps: int) -> None: self.critic_optimizer.step() # compute policy (actor) loss - actions, _, _ = self.policy.act({"states": sampled_states}, role="policy") - critic_values, _, _ = self.critic.act({"states": sampled_states, "taken_actions": actions}, role="critic") + actions, _, _ = self.policy.act({"states": sampled_states, **rnn_policy}, role="policy") + critic_values, _, _ = self.critic.act({"states": sampled_states, "taken_actions": actions, **rnn_policy}, role="critic") policy_loss = -critic_values.mean() From c1ca59cc99ea8d36ca41c880e53735540b5262b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 14 Nov 2022 13:14:00 +0100 Subject: [PATCH 070/157] Allow getting the last sampling indexes --- skrl/memories/torch/base.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index bce8b307..07a93ff1 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -57,6 +57,8 @@ def __init__(self, self.tensors_view = {} self.tensors_keep_dimensions = {} + self.sampling_indexes = None + # exporting data self.export = export self.export_format = export_format @@ -301,7 +303,11 @@ def add_samples(self, **tensors: torch.Tensor) -> None: if self.export: self.save(directory=self.export_directory, format=self.export_format) - def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> List[List[torch.Tensor]]: + def sample(self, + names: Tuple[str], + batch_size: int, + mini_batches: int = 1, + sequence_length: int = 1) -> List[List[torch.Tensor]]: """Data sampling method to be implemented by the inheriting classes :param names: Tensors names from which to obtain the samples @@ -310,6 +316,8 @@ def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> L :type batch_size: int :param mini_batches: Number of mini-batches to sample (default: 1) :type mini_batches: int, optional + :param sequence_length: Length of each sequence (default: 1) + :type sequence_length: int, optional :raises NotImplementedError: The method has not been implemented @@ -356,6 +364,14 @@ def sample_all(self, names: Tuple[str], mini_batches: int = 1) -> List[List[torc return [[self.tensors_view[name][batch] for name in names] for batch in batches] return [[self.tensors_view[name] for name in names]] + def get_sampling_indexes(self) -> Union[tuple, np.ndarray, torch.Tensor]: + """Get the last indexes used for sampling + + :return: Last sampling indexes + :rtype: tuple or list, numpy.ndarray or torch.Tensor + """ + return self.sampling_indexes + def save(self, directory: str = "", format: str = "pt") -> None: """Save the memory to a file From 2fe8a69586bcd28fc4f89d51b73fd88592051e77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 14 Nov 2022 13:15:48 +0100 Subject: [PATCH 071/157] Allow sampling random sequences in random memory --- skrl/memories/torch/random.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/skrl/memories/torch/random.py b/skrl/memories/torch/random.py index e8d3f259..6a8ae4b2 100644 --- a/skrl/memories/torch/random.py +++ b/skrl/memories/torch/random.py @@ -44,7 +44,11 @@ def __init__(self, self._replacement = replacement - def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> List[List[torch.Tensor]]: + def sample(self, + names: Tuple[str], + batch_size: int, + mini_batches: int = 1, + sequence_length: int = 1) -> List[List[torch.Tensor]]: """Sample a batch from memory randomly :param names: Tensors names from which to obtain the samples @@ -53,17 +57,30 @@ def sample(self, names: Tuple[str], batch_size: int, mini_batches: int = 1) -> L :type batch_size: int :param mini_batches: Number of mini-batches to sample (default: 1) :type mini_batches: int, optional + :param sequence_length: Length of each sequence (default: 1) + :type sequence_length: int, optional :return: Sampled data from tensors sorted according to their position in the list of names. The sampled tensors will have the following shape: (batch size, data size) :rtype: list of torch.Tensor list """ + # compute valid memory sizes + size = len(self) + if sequence_length > 1: + sequence_indexes = torch.arange(0, self.num_envs * sequence_length, self.num_envs) + size -= sequence_indexes[-1].item() + # generate random indexes if self._replacement: - indexes = torch.randint(0, len(self), (batch_size,), device=self.device) + indexes = torch.randint(0, size, (batch_size,)) else: # details about the random sampling performance can be found here: # https://discuss.pytorch.org/t/torch-equivalent-of-numpy-random-choice/16146/19 - indexes = torch.randperm(len(self), dtype=torch.long, device=self.device)[:batch_size] + indexes = torch.randperm(size, dtype=torch.long)[:batch_size] + + # generate sequence indexes + if sequence_length > 1: + indexes = (sequence_indexes.repeat(indexes.shape[0], 1) + indexes.view(-1, 1)).view(-1) + self.sampling_indexes = indexes return self.sample_by_index(names=names, indexes=indexes, mini_batches=mini_batches) From 038f1f1bd0ffd7cbd8de1d0f5b97eed58bfcb838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 14 Nov 2022 16:47:15 +0100 Subject: [PATCH 072/157] Allow sampling all memory sequentially --- skrl/memories/torch/base.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index 07a93ff1..92bee823 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -58,6 +58,7 @@ def __init__(self, self.tensors_keep_dimensions = {} self.sampling_indexes = None + self.all_sequence_indexes = np.concatenate([np.arange(i, memory_size * num_envs + i, num_envs) for i in range(num_envs)]) # exporting data self.export = export @@ -346,18 +347,28 @@ def sample_by_index(self, names: Tuple[str], indexes: Union[tuple, np.ndarray, t return [[self.tensors_view[name][batch] for name in names] for batch in batches] return [[self.tensors_view[name][indexes] for name in names]] - def sample_all(self, names: Tuple[str], mini_batches: int = 1) -> List[List[torch.Tensor]]: + def sample_all(self, names: Tuple[str], mini_batches: int = 1, sequence_length: int = 1) -> List[List[torch.Tensor]]: """Sample all data from memory :param names: Tensors names from which to obtain the samples :type names: tuple or list of strings :param mini_batches: Number of mini-batches to sample (default: 1) :type mini_batches: int, optional + :param sequence_length: Length of each sequence (default: 1) + :type sequence_length: int, optional :return: Sampled data from memory. The sampled tensors will have the following shape: (memory size * number of environments, data size) :rtype: list of torch.Tensor list """ + # sequential order + if sequence_length > 1: + if mini_batches > 1: + batches = BatchSampler(self.all_sequence_indexes, batch_size=len(self.all_sequence_indexes) // mini_batches, drop_last=True) + return [[self.tensors_view[name][batch] for name in names] for batch in batches] + return [[self.tensors_view[name][self.all_sequence_indexes] for name in names]] + + # default order if mini_batches > 1: indexes = np.arange(self.memory_size * self.num_envs) batches = BatchSampler(indexes, batch_size=len(indexes) // mini_batches, drop_last=True) From 4cb1c09da19af34e37a776fe2b4fe7eeb63bb180 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 14 Nov 2022 16:49:49 +0100 Subject: [PATCH 073/157] Sample sequences from memory --- skrl/agents/torch/ppo/ppo.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 152fc069..946e7cd5 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -187,6 +187,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self._rnn_tensors_names = [] # used for sampling during training self._rnn_final_states = {"policy": [], "value": []} self._rnn_initial_states = {"policy": [], "value": []} + self._rnn_sequence_length = self.policy.get_specification().get("rnn", {}).get("sequence_length", 1) # policy for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): @@ -414,8 +415,8 @@ def compute_gae(rewards: torch.Tensor, self.memory.set_tensor_by_name("advantages", advantages) # sample mini-batches from memory - sampled_batches = self.memory.sample_all(names=self._tensors_names, mini_batches=self._mini_batches) - sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches) + sampled_batches = self.memory.sample_all(names=self._tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) rnn_policy, rnn_value = {}, {} From 4121bed2a65a2be159a8c0968a181da985c7455d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 16 Nov 2022 20:04:18 +0100 Subject: [PATCH 074/157] Initialize entropy for Discrete action spaces --- skrl/agents/torch/sac/sac.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index eef551c6..f1861772 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -140,7 +140,12 @@ def __init__(self, if self._learn_entropy: self._target_entropy = self.cfg["target_entropy"] if self._target_entropy is None: - self._target_entropy = -np.prod(self.action_space.shape).astype(np.float32) + if issubclass(type(self.action_space), gym.spaces.Box) or issubclass(type(self.action_space), gymnasium.spaces.Box): + self._target_entropy = -np.prod(self.action_space.shape).astype(np.float32) + elif issubclass(type(self.action_space), gym.spaces.Discrete) or issubclass(type(self.action_space), gymnasium.spaces.Discrete): + self._target_entropy = -self.action_space.n + else: + self._target_entropy = 0 self.log_entropy_coefficient = torch.log(torch.ones(1, device=self.device) * self._entropy_coefficient).requires_grad_(True) self.entropy_optimizer = torch.optim.Adam([self.log_entropy_coefficient], lr=self._entropy_learning_rate) From 058ec088583b08a8424ec3515b6656dde0d5414c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 16 Nov 2022 20:06:49 +0100 Subject: [PATCH 075/157] Add RNN implementation for TD3 agent --- skrl/agents/torch/td3/td3.py | 93 ++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 26 deletions(-) diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 4616181f..32cdfe42 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -33,9 +33,9 @@ "exploration": { "noise": None, # exploration noise - "initial_scale": 1.0, # initial scale for noise - "final_scale": 1e-3, # final scale for noise - "timesteps": None, # timesteps for noise decay + "initial_scale": 1.0, # initial scale for the noise + "final_scale": 1e-3, # final scale for the noise + "timesteps": None, # timesteps for the noise decay }, "policy_delay": 2, # policy delay update with respect to critic update @@ -175,6 +175,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ super().init(trainer_cfg=trainer_cfg) + self.set_mode("eval") # create tensors in memory if self.memory is not None: @@ -184,7 +185,24 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] + self._tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] + + # RNN specifications + self._rnn = False # flag to indicate whether RNN is available + self._rnn_tensors_names = [] # used for sampling during training + self._rnn_final_states = {"policy": []} + self._rnn_initial_states = {"policy": []} + self._rnn_sequence_length = self.policy.get_specification().get("rnn", {}).get("sequence_length", 1) + + # policy + for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_policy_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_policy_{i}") + # default RNN states + self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) # clip noise bounds self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device) @@ -206,19 +224,22 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens :return: Actions :rtype: torch.Tensor """ - states = self._state_preprocessor(states) + rnn = {"rnn": self._rnn_initial_states["policy"]} if self._rnn else {} # sample random actions if timestep < self._random_timesteps: - return self.policy.random_act({"states": states}, role="policy") + return self.policy.random_act({"states": self._state_preprocessor(states), **rnn}, role="policy") # sample deterministic actions - actions = self.policy.act({"states": states}, role="policy") + actions, _, outputs = self.policy.act({"states": self._state_preprocessor(states), **rnn}, role="policy") - # add noise + if self._rnn: + self._rnn_final_states["policy"] = outputs.get("rnn", []) + + # add exloration noise if self._exploration_noise is not None: # sample noises - noises = self._exploration_noise.sample(actions[0].shape) + noises = self._exploration_noise.sample(actions.shape) # define exploration timesteps scale = self._exploration_final_scale @@ -233,14 +254,11 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens noises.mul_(scale) # modify actions - actions[0].add_(noises) - + actions.add_(noises) if self._backward_compatibility: - actions = (torch.max(torch.min(actions[0], self.clip_actions_max), self.clip_actions_min), - actions[1], - actions[2]) + actions = torch.max(torch.min(actions, self.clip_actions_max), self.clip_actions_min) else: - actions[0].clamp_(min=self.clip_actions_min, max=self.clip_actions_max) + actions.clamp_(min=self.clip_actions_min, max=self.clip_actions_max) # record noises self.track_data("Exploration / Exploration noise (max)", torch.max(noises).item()) @@ -253,7 +271,7 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens self.track_data("Exploration / Exploration noise (min)", 0) self.track_data("Exploration / Exploration noise (mean)", 0) - return actions + return actions, None, outputs def record_transition(self, states: torch.Tensor, @@ -293,11 +311,27 @@ def record_transition(self, if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) + # package RNN states + rnn_states = {} + if self._rnn: + rnn_states.update({f"rnn_policy_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["policy"])}) + + # storage transition in memory self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated) + terminated=terminated, truncated=truncated, **rnn_states) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated) + terminated=terminated, truncated=truncated, **rnn_states) + + # update RNN states + if self._rnn: + # reset states if the episodes have ended + finished_episodes = terminated.nonzero(as_tuple=False) + if finished_episodes.numel(): + for rnn_state in self._rnn_final_states["policy"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + + self._rnn_initial_states = self._rnn_final_states def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -318,7 +352,9 @@ def post_interaction(self, timestep: int, timesteps: int) -> None: :type timesteps: int """ if timestep >= self._learning_starts: + self.set_mode("train") self._update(timestep, timesteps) + self.set_mode("eval") # write tracking data and checkpoints super().post_interaction(timestep, timesteps) @@ -333,7 +369,12 @@ def _update(self, timestep: int, timesteps: int) -> None: """ # sample a batch from memory sampled_states, sampled_actions, sampled_rewards, sampled_next_states, sampled_dones = \ - self.memory.sample(names=self.tensors_names, batch_size=self._batch_size)[0] + self.memory.sample(names=self._tensors_names, batch_size=self._batch_size, sequence_length=self._rnn_sequence_length)[0] + + rnn_policy = {} + if self._rnn: + sampled_rnn = self.memory.sample_by_index(names=self._rnn_tensors_names, indexes=self.memory.get_sampling_indexes())[0] + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn]} # gradient steps for gradient_step in range(self._gradient_steps): @@ -343,7 +384,7 @@ def _update(self, timestep: int, timesteps: int) -> None: with torch.no_grad(): # target policy smoothing - next_actions, _, _ = self.target_policy.act({"states": sampled_next_states}, role="target_policy") + next_actions, _, _ = self.target_policy.act({"states": sampled_next_states, **rnn_policy}, role="target_policy") noises = torch.clamp(self._smooth_regularization_noise.sample(next_actions.shape), min=-self._smooth_regularization_clip, max=self._smooth_regularization_clip) @@ -355,14 +396,14 @@ def _update(self, timestep: int, timesteps: int) -> None: next_actions.clamp_(min=self.clip_actions_min, max=self.clip_actions_max) # compute target values - target_q1_values, _, _ = self.target_critic_1.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_1") - target_q2_values, _, _ = self.target_critic_2.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_2") + target_q1_values, _, _ = self.target_critic_1.act({"states": sampled_next_states, "taken_actions": next_actions, **rnn_policy}, role="target_critic_1") + target_q2_values, _, _ = self.target_critic_2.act({"states": sampled_next_states, "taken_actions": next_actions, **rnn_policy}, role="target_critic_2") target_q_values = torch.min(target_q1_values, target_q2_values) target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss - critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_1") - critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_2") + critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="critic_1") + critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="critic_2") critic_loss = F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values) @@ -376,8 +417,8 @@ def _update(self, timestep: int, timesteps: int) -> None: if not self._critic_update_counter % self._policy_delay: # compute policy (actor) loss - actions, _, _ = self.policy.act({"states": sampled_states}, role="policy") - critic_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": actions}, role="critic_1") + actions, _, _ = self.policy.act({"states": sampled_states, **rnn_policy}, role="policy") + critic_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": actions, **rnn_policy}, role="critic_1") policy_loss = -critic_values.mean() From 2d3ef69415bd6371aef3293a099e259ee05fe821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 16 Nov 2022 20:18:52 +0100 Subject: [PATCH 076/157] Allow clipping the norm of the gradient --- skrl/agents/torch/ddpg/ddpg.py | 14 +++++++++----- skrl/agents/torch/sac/sac.py | 9 +++++++++ skrl/agents/torch/td3/td3.py | 9 +++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index b2d200a6..e743e8b4 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -4,6 +4,7 @@ import copy import torch +import torch.nn as nn import torch.nn.functional as F from ....memories.torch import Memory @@ -30,6 +31,8 @@ "random_timesteps": 0, # random exploration steps "learning_starts": 0, # learning starts after this many steps + "grad_norm_clip": 0, # clipping coefficient for the norm of the gradients + "exploration": { "noise": None, # exploration noise "initial_scale": 1.0, # initial scale for the noise @@ -128,6 +131,8 @@ def __init__(self, self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] + self._grad_norm_clip = self.cfg["grad_norm_clip"] + self._exploration_noise = self.cfg["exploration"]["noise"] self._exploration_initial_scale = self.cfg["exploration"]["initial_scale"] self._exploration_final_scale = self.cfg["exploration"]["final_scale"] @@ -185,11 +190,6 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: # default RNN states self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) - # critic - if self.critic is not None: - for i, size in enumerate(self.critic.get_specification().get("rnn", {}).get("sizes", [])): - self._rnn = True - # clip noise bounds self.clip_actions_min = torch.tensor(self.action_space.low, device=self.device) self.clip_actions_max = torch.tensor(self.action_space.high, device=self.device) @@ -381,6 +381,8 @@ def _update(self, timestep: int, timesteps: int) -> None: # optimization step (critic) self.critic_optimizer.zero_grad() critic_loss.backward() + if self._grad_norm_clip > 0: + nn.utils.clip_grad_norm_(self.critic.parameters(), self._grad_norm_clip) self.critic_optimizer.step() # compute policy (actor) loss @@ -392,6 +394,8 @@ def _update(self, timestep: int, timesteps: int) -> None: # optimization step (policy) self.policy_optimizer.zero_grad() policy_loss.backward() + if self._grad_norm_clip > 0: + nn.utils.clip_grad_norm_(self.policy.parameters(), self._grad_norm_clip) self.policy_optimizer.step() # update target networks diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index f1861772..0355ec1b 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -6,6 +6,7 @@ import numpy as np import torch +import torch.nn as nn import torch.nn.functional as F from ....memories.torch import Memory @@ -32,6 +33,8 @@ "random_timesteps": 0, # random exploration steps "learning_starts": 0, # learning starts after this many steps + "grad_norm_clip": 0, # clipping coefficient for the norm of the gradients + "learn_entropy": True, # learn entropy "entropy_learning_rate": 1e-3, # entropy learning rate "initial_entropy_value": 0.2, # initial entropy value @@ -130,6 +133,8 @@ def __init__(self, self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] + self._grad_norm_clip = self.cfg["grad_norm_clip"] + self._entropy_learning_rate = self.cfg["entropy_learning_rate"] self._learn_entropy = self.cfg["learn_entropy"] self._entropy_coefficient = self.cfg["initial_entropy_value"] @@ -313,6 +318,8 @@ def _update(self, timestep: int, timesteps: int) -> None: # optimization step (critic) self.critic_optimizer.zero_grad() critic_loss.backward() + if self._grad_norm_clip > 0: + nn.utils.clip_grad_norm_(itertools.chain(self.critic_1.parameters(), self.critic_2.parameters()), self._grad_norm_clip) self.critic_optimizer.step() # compute policy (actor) loss @@ -325,6 +332,8 @@ def _update(self, timestep: int, timesteps: int) -> None: # optimization step (policy) self.policy_optimizer.zero_grad() policy_loss.backward() + if self._grad_norm_clip > 0: + nn.utils.clip_grad_norm_(self.policy.parameters(), self._grad_norm_clip) self.policy_optimizer.step() # entropy learning diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 32cdfe42..1783a594 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -5,6 +5,7 @@ import itertools import torch +import torch.nn as nn import torch.nn.functional as F from ....memories.torch import Memory @@ -31,6 +32,8 @@ "random_timesteps": 0, # random exploration steps "learning_starts": 0, # learning starts after this many steps + "grad_norm_clip": 0, # clipping coefficient for the norm of the gradients + "exploration": { "noise": None, # exploration noise "initial_scale": 1.0, # initial scale for the noise @@ -139,6 +142,8 @@ def __init__(self, self._random_timesteps = self.cfg["random_timesteps"] self._learning_starts = self.cfg["learning_starts"] + self._grad_norm_clip = self.cfg["grad_norm_clip"] + self._exploration_noise = self.cfg["exploration"]["noise"] self._exploration_initial_scale = self.cfg["exploration"]["initial_scale"] self._exploration_final_scale = self.cfg["exploration"]["final_scale"] @@ -410,6 +415,8 @@ def _update(self, timestep: int, timesteps: int) -> None: # optimization step (critic) self.critic_optimizer.zero_grad() critic_loss.backward() + if self._grad_norm_clip > 0: + nn.utils.clip_grad_norm_(itertools.chain(self.critic_1.parameters(), self.critic_2.parameters()), self._grad_norm_clip) self.critic_optimizer.step() # delayed update @@ -425,6 +432,8 @@ def _update(self, timestep: int, timesteps: int) -> None: # optimization step (policy) self.policy_optimizer.zero_grad() policy_loss.backward() + if self._grad_norm_clip > 0: + nn.utils.clip_grad_norm_(self.policy.parameters(), self._grad_norm_clip) self.policy_optimizer.step() # update target networks From b33dc5e01b38cf928c453de43148a64b5b535f5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 16 Nov 2022 22:45:23 +0100 Subject: [PATCH 077/157] Add RNN implementation for SAC agent --- skrl/agents/torch/sac/sac.py | 76 +++++++++++++++++++++++++++++------- 1 file changed, 61 insertions(+), 15 deletions(-) diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 0355ec1b..a67f87f9 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -180,6 +180,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ super().init(trainer_cfg=trainer_cfg) + self.set_mode("eval") # create tensors in memory if self.memory is not None: @@ -189,7 +190,24 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) - self.tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] + self._tensors_names = ["states", "actions", "rewards", "next_states", "terminated"] + + # RNN specifications + self._rnn = False # flag to indicate whether RNN is available + self._rnn_tensors_names = [] # used for sampling during training + self._rnn_final_states = {"policy": []} + self._rnn_initial_states = {"policy": []} + self._rnn_sequence_length = self.policy.get_specification().get("rnn", {}).get("sequence_length", 1) + + # policy + for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_policy_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_policy_{i}") + # default RNN states + self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: """Process the environment's states to make a decision (actions) using the main policy @@ -204,15 +222,20 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens :return: Actions :rtype: torch.Tensor """ - states = self._state_preprocessor(states) + rnn = {"rnn": self._rnn_initial_states["policy"]} if self._rnn else {} # sample random actions # TODO, check for stochasticity if timestep < self._random_timesteps: - return self.policy.random_act({"states": states}, role="policy") + return self.policy.random_act({"states": self._state_preprocessor(states), **rnn}, role="policy") # sample stochastic actions - return self.policy.act({"states": states}, role="policy") + actions, _, outputs = self.policy.act({"states": self._state_preprocessor(states), **rnn}, role="policy") + + if self._rnn: + self._rnn_final_states["policy"] = outputs.get("rnn", []) + + return actions, None, outputs def record_transition(self, states: torch.Tensor, @@ -252,11 +275,27 @@ def record_transition(self, if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) + # package RNN states + rnn_states = {} + if self._rnn: + rnn_states.update({f"rnn_policy_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["policy"])}) + + # storage transition in memory self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated) + terminated=terminated, truncated=truncated, **rnn_states) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated) + terminated=terminated, truncated=truncated, **rnn_states) + + # update RNN states + if self._rnn: + # reset states if the episodes have ended + finished_episodes = terminated.nonzero(as_tuple=False) + if finished_episodes.numel(): + for rnn_state in self._rnn_final_states["policy"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + + self._rnn_initial_states = self._rnn_final_states def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -277,7 +316,9 @@ def post_interaction(self, timestep: int, timesteps: int) -> None: :type timesteps: int """ if timestep >= self._learning_starts: + self.set_mode("train") self._update(timestep, timesteps) + self.set_mode("eval") # write tracking data and checkpoints super().post_interaction(timestep, timesteps) @@ -292,7 +333,12 @@ def _update(self, timestep: int, timesteps: int) -> None: """ # sample a batch from memory sampled_states, sampled_actions, sampled_rewards, sampled_next_states, sampled_dones = \ - self.memory.sample(names=self.tensors_names, batch_size=self._batch_size)[0] + self.memory.sample(names=self._tensors_names, batch_size=self._batch_size, sequence_length=self._rnn_sequence_length)[0] + + rnn_policy = {} + if self._rnn: + sampled_rnn = self.memory.sample_by_index(names=self._rnn_tensors_names, indexes=self.memory.get_sampling_indexes())[0] + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn]} # gradient steps for gradient_step in range(self._gradient_steps): @@ -302,16 +348,16 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute target values with torch.no_grad(): - next_actions, next_log_prob, _ = self.policy.act({"states": sampled_next_states}, role="policy") + next_actions, next_log_prob, _ = self.policy.act({"states": sampled_next_states, **rnn_policy}, role="policy") - target_q1_values, _, _ = self.target_critic_1.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_1") - target_q2_values, _, _ = self.target_critic_2.act({"states": sampled_next_states, "taken_actions": next_actions}, role="target_critic_2") + target_q1_values, _, _ = self.target_critic_1.act({"states": sampled_next_states, "taken_actions": next_actions, **rnn_policy}, role="target_critic_1") + target_q2_values, _, _ = self.target_critic_2.act({"states": sampled_next_states, "taken_actions": next_actions, **rnn_policy}, role="target_critic_2") target_q_values = torch.min(target_q1_values, target_q2_values) - self._entropy_coefficient * next_log_prob target_values = sampled_rewards + self._discount_factor * sampled_dones.logical_not() * target_q_values # compute critic loss - critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_1") - critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": sampled_actions}, role="critic_2") + critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="critic_1") + critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="critic_2") critic_loss = (F.mse_loss(critic_1_values, target_values) + F.mse_loss(critic_2_values, target_values)) / 2 @@ -323,9 +369,9 @@ def _update(self, timestep: int, timesteps: int) -> None: self.critic_optimizer.step() # compute policy (actor) loss - actions, log_prob, _ = self.policy.act({"states": sampled_states}, role="policy") - critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": actions}, role="critic_1") - critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": actions}, role="critic_2") + actions, log_prob, _ = self.policy.act({"states": sampled_states, **rnn_policy}, role="policy") + critic_1_values, _, _ = self.critic_1.act({"states": sampled_states, "taken_actions": actions, **rnn_policy}, role="critic_1") + critic_2_values, _, _ = self.critic_2.act({"states": sampled_states, "taken_actions": actions, **rnn_policy}, role="critic_2") policy_loss = (self._entropy_coefficient * log_prob - torch.min(critic_1_values, critic_2_values)).mean() From e417e39663e4d632206c3723950cdf08bf5c077c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 16 Nov 2022 22:45:57 +0100 Subject: [PATCH 078/157] Set training/evaluation mode --- skrl/agents/torch/ddpg/ddpg.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index e743e8b4..9020c244 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -162,6 +162,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ super().init(trainer_cfg=trainer_cfg) + self.set_mode("eval") # create tensors in memory if self.memory is not None: @@ -338,7 +339,9 @@ def post_interaction(self, timestep: int, timesteps: int) -> None: :type timesteps: int """ if timestep >= self._learning_starts: + self.set_mode("train") self._update(timestep, timesteps) + self.set_mode("eval") # write tracking data and checkpoints super().post_interaction(timestep, timesteps) From 0a8bd6707bc7233f2dbfa29932ad242332130dcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 16 Nov 2022 23:00:11 +0100 Subject: [PATCH 079/157] Update DDPG, TD3 and SAC docs --- docs/source/modules/skrl.agents.ddpg.rst | 4 +++- docs/source/modules/skrl.agents.sac.rst | 4 +++- docs/source/modules/skrl.agents.td3.rst | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst index 33bcfc9e..b48b5788 100644 --- a/docs/source/modules/skrl.agents.ddpg.rst +++ b/docs/source/modules/skrl.agents.ddpg.rst @@ -36,6 +36,7 @@ Algorithm implementation | :green:`# optimization step (critic)` | reset :math:`\text{optimizer}_\phi` | :math:`\nabla_{\phi} L_{Q_\phi}` +| :math:`\text{clip}(\lVert \nabla_{\phi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\phi` | :green:`# compute policy (actor) loss` | :math:`a \leftarrow \mu_\theta(s)` @@ -44,6 +45,7 @@ Algorithm implementation | :green:`# optimization step (policy)` | reset :math:`\text{optimizer}_\theta` | :math:`\nabla_{\theta} L_{\mu_\theta}` +| :math:`\text{clip}(\lVert \nabla_{\theta} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\theta` | :green:`# update target networks` | :math:`\theta_{target} \leftarrow` :guilabel:`polyak` :math:`\theta + (1 \;-` :guilabel:`polyak` :math:`) \theta_{target}` @@ -60,7 +62,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/ddpg/ddpg.py :language: python - :lines: 15-53 + :lines: 16-56 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst index 6fee4b41..5cfe1d81 100644 --- a/docs/source/modules/skrl.agents.sac.rst +++ b/docs/source/modules/skrl.agents.sac.rst @@ -32,6 +32,7 @@ Algorithm implementation | :green:`# optimization step (critic)` | reset :math:`\text{optimizer}_\phi` | :math:`\nabla_{\phi} L_{Q_\phi}` +| :math:`\text{clip}(\lVert \nabla_{\phi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\phi` | :green:`# compute policy (actor) loss` | :math:`a,\; logp \leftarrow \pi_\theta(s)` @@ -41,6 +42,7 @@ Algorithm implementation | :green:`# optimization step (policy)` | reset :math:`\text{optimizer}_\theta` | :math:`\nabla_{\theta} L_{\pi_\theta}` +| :math:`\text{clip}(\lVert \nabla_{\theta} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\theta` | :green:`# entropy learning` | **IF** :guilabel:`learn_entropy` is enabled **THEN** @@ -67,7 +69,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/sac/sac.py :language: python - :lines: 17-53 + :lines: 18-56 :linenos: Spaces and models diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst index d532f211..7cd2e01c 100644 --- a/docs/source/modules/skrl.agents.td3.rst +++ b/docs/source/modules/skrl.agents.td3.rst @@ -43,6 +43,7 @@ Algorithm implementation | :green:`# optimization step (critic)` | reset :math:`\text{optimizer}_\phi` | :math:`\nabla_{\phi} L_{Q_\phi}` +| :math:`\text{clip}(\lVert \nabla_{\phi} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\phi` | :green:`# delayed update` | **IF** it's time for the :guilabel:`policy_delay` update **THEN** @@ -53,6 +54,7 @@ Algorithm implementation | :green:`# optimization step (policy)` | reset :math:`\text{optimizer}_\theta` | :math:`\nabla_{\theta} L_{\mu_\theta}` +| :math:`\text{clip}(\lVert \nabla_{\theta} \rVert)` with :guilabel:`grad_norm_clip` | step :math:`\text{optimizer}_\theta` | :green:`# update target networks` | :math:`\theta_{target} \leftarrow` :guilabel:`polyak` :math:`\theta + (1 \;-` :guilabel:`polyak` :math:`) \theta_{target}` @@ -70,7 +72,7 @@ Configuration and hyperparameters .. literalinclude:: ../../../skrl/agents/torch/td3/td3.py :language: python - :lines: 16-58 + :lines: 17-61 :linenos: Spaces and models From 2af8a3321085fe961527ee168473ccd6329963c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 18 Nov 2022 18:32:39 +0100 Subject: [PATCH 080/157] Move TRPO policy optimization outside value optimization loop --- skrl/agents/torch/trpo/trpo.py | 89 ++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index fd68317f..e2b2f258 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -176,7 +176,8 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self.tensors_names = ["states", "actions", "log_prob", "returns", "advantages"] + self._tensors_names_policy = ["states", "actions", "log_prob", "advantages"] + self._tensors_names_value = ["states", "returns"] # create temporary variables needed for storage and computation self._current_log_prob = None @@ -437,11 +438,11 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor :return: KL divergence :rtype: torch.Tensor """ - _, _, mu_1 = policy_1.act({"states": states}, role="policy") + mu_1 = policy_1.act({"states": states}, role="policy")[2]["mean_actions"] logstd_1 = policy_1.get_log_std(role="policy") mu_1, logstd_1 = mu_1.detach(), logstd_1.detach() - _, _, mu_2 = policy_2.act({"states": states}, role="policy") + mu_2 = policy_2.act({"states": states}, role="policy")[2]["mean_actions"] logstd_2 = policy_2.get_log_std(role="policy") kl = logstd_1 - logstd_2 + 0.5 * (torch.square(logstd_1.exp()) + torch.square(mu_1 - mu_2)) \ @@ -465,56 +466,61 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor self.memory.set_tensor_by_name("returns", self._value_preprocessor(returns, train=True)) self.memory.set_tensor_by_name("advantages", advantages) - # sample mini-batches from memory - sampled_batches = self.memory.sample_all(names=self.tensors_names, mini_batches=self._mini_batches) + # sample all from memory + sampled_states, sampled_actions, sampled_log_prob, sampled_advantages \ + = self.memory.sample_all(names=self._tensors_names_policy, mini_batches=1)[0] - cumulative_policy_loss = 0 - cumulative_value_loss = 0 + sampled_states = self._state_preprocessor(sampled_states, train=True) - # learning epochs - for epoch in range(self._learning_epochs): + # compute policy loss gradient + policy_loss = surrogate_loss(self.policy, sampled_states, sampled_actions, sampled_log_prob, sampled_advantages) + policy_loss_gradient = torch.autograd.grad(policy_loss, self.policy.parameters()) + flat_policy_loss_gradient = torch.cat([gradient.view(-1) for gradient in policy_loss_gradient]) - # mini-batches loop - for sampled_states, sampled_actions, sampled_log_prob, sampled_returns, sampled_advantages in sampled_batches: + # compute the search direction using the conjugate gradient algorithm + search_direction = conjugate_gradient(self.policy, sampled_states, flat_policy_loss_gradient.data, + num_iterations=self._conjugate_gradient_steps) - sampled_states = self._state_preprocessor(sampled_states, train=not epoch) + # compute step size and full step + xHx = (search_direction * fisher_vector_product(self.policy, sampled_states, search_direction, self._damping)) \ + .sum(0, keepdim=True) + step_size = torch.sqrt(2 * self._max_kl_divergence / xHx)[0] + full_step = step_size * search_direction - # compute policy loss gradient - policy_loss = surrogate_loss(self.policy, sampled_states, sampled_actions, sampled_log_prob, sampled_advantages) - policy_loss_gradient = torch.autograd.grad(policy_loss, self.policy.parameters()) - flat_policy_loss_gradient = torch.cat([gradient.view(-1) for gradient in policy_loss_gradient]) + # backtracking line search + restore_policy_flag = True + self.backup_policy.update_parameters(self.policy) + params = parameters_to_vector(self.policy.parameters()) - # compute the search direction using the conjugate gradient algorithm - search_direction = conjugate_gradient(self.policy, sampled_states, flat_policy_loss_gradient.data, - num_iterations=self._conjugate_gradient_steps) + expected_improvement = (flat_policy_loss_gradient * full_step).sum(0, keepdim=True) - # compute step size and full step - xHx = (search_direction * fisher_vector_product(self.policy, sampled_states, search_direction, self._damping)) \ - .sum(0, keepdim=True) - step_size = torch.sqrt(2 * self._max_kl_divergence / xHx)[0] - full_step = step_size * search_direction + for alpha in [self._step_fraction * 0.5 ** i for i in range(self._max_backtrack_steps)]: + new_params = params + alpha * full_step + vector_to_parameters(new_params, self.policy.parameters()) - # backtracking line search - restore_policy_flag = True - self.backup_policy.update_parameters(self.policy) - params = parameters_to_vector(self.policy.parameters()) + expected_improvement *= alpha + kl = kl_divergence(self.backup_policy, self.policy, sampled_states) + loss = surrogate_loss(self.policy, sampled_states, sampled_actions, sampled_log_prob, sampled_advantages) - expected_improvement = (flat_policy_loss_gradient * full_step).sum(0, keepdim=True) + if kl < self._max_kl_divergence and (loss - policy_loss) / expected_improvement > self._accept_ratio: + restore_policy_flag = False + break - for alpha in [self._step_fraction * 0.5 ** i for i in range(self._max_backtrack_steps)]: - new_params = params + alpha * full_step - vector_to_parameters(new_params, self.policy.parameters()) + if restore_policy_flag: + self.policy.update_parameters(self.backup_policy) - expected_improvement *= alpha - kl = kl_divergence(self.backup_policy, self.policy, sampled_states) - loss = surrogate_loss(self.policy, sampled_states, sampled_actions, sampled_log_prob, sampled_advantages) + # sample mini-batches from memory + sampled_batches = self.memory.sample_all(names=self._tensors_names_value, mini_batches=self._mini_batches) - if kl < self._max_kl_divergence and (loss - policy_loss) / expected_improvement > self._accept_ratio: - restore_policy_flag = False - break + cumulative_value_loss = 0 + + # learning epochs + for epoch in range(self._learning_epochs): - if restore_policy_flag: - self.policy.update_parameters(self.backup_policy) + # mini-batches loop + for sampled_states, sampled_returns in sampled_batches: + + sampled_states = self._state_preprocessor(sampled_states, train=not epoch) # compute value loss predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") @@ -529,7 +535,6 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor self.value_optimizer.step() # update cumulative losses - cumulative_policy_loss += policy_loss.item() cumulative_value_loss += value_loss.item() # update learning rate @@ -537,7 +542,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor self.value_scheduler.step() # record data - self.track_data("Loss / Policy loss", cumulative_policy_loss / (self._learning_epochs * self._mini_batches)) + self.track_data("Loss / Policy loss", policy_loss.item()) self.track_data("Loss / Value loss", cumulative_value_loss / (self._learning_epochs * self._mini_batches)) self.track_data("Policy / Standard deviation", self.policy.distribution(role="policy").stddev.mean().item()) From ba1e817354275c5f3d08c16053a066867ea1054b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 18 Nov 2022 19:10:46 +0100 Subject: [PATCH 081/157] Initialize model biases --- skrl/models/torch/base.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index ad00a2c0..b7127764 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -288,6 +288,39 @@ def _update_weights(module, method_name, args, kwargs): _update_weights(self.children(), method_name, args, kwargs) + def init_biases(self, method_name: str = "constant_", *args, **kwargs) -> None: + """Initialize the model biases according to the specified method name + + Method names are from the `torch.nn.init `_ module. + Allowed method names are *uniform_*, *normal_*, *constant_*, etc. + + The following layers will be initialized: + - torch.nn.Linear + + :param method_name: `torch.nn.init `_ method name (default: ``"constant_"``) + :type method_name: str, optional + :param args: Positional arguments of the method to be called + :type args: tuple, optional + :param kwargs: Key-value arguments of the method to be called + :type kwargs: dict, optional + + Example:: + + # initialize all biases with a constant value (0) + >>> model.init_biases(method_name="constant_", val=0) + + # initialize all biases with normal distribution with mean 0 and standard deviation 0.25 + >>> model.init_biases(method_name="normal_", mean=0.0, std=0.25) + """ + def _update_biases(module, method_name, args, kwargs): + for layer in module: + if isinstance(layer, torch.nn.Sequential): + _update_biases(layer, method_name, args, kwargs) + elif isinstance(layer, torch.nn.Linear): + exec("torch.nn.init.{}(layer.bias, *args, **kwargs)".format(method_name)) + + _update_biases(self.children(), method_name, args, kwargs) + def get_specification(self) -> Mapping[str, Any]: """Returns the specification of the model From 4edc3daed5bf553c46cad0b8a366c87b96101b75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 18 Nov 2022 19:13:41 +0100 Subject: [PATCH 082/157] Update TRPO example --- docs/source/examples/isaacgym/trpo_cartpole.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/source/examples/isaacgym/trpo_cartpole.py b/docs/source/examples/isaacgym/trpo_cartpole.py index e2e89370..2be1c716 100644 --- a/docs/source/examples/isaacgym/trpo_cartpole.py +++ b/docs/source/examples/isaacgym/trpo_cartpole.py @@ -70,28 +70,26 @@ def compute(self, inputs, role): models_trpo["policy"] = Policy(env.observation_space, env.action_space, device) models_trpo["value"] = Value(env.observation_space, env.action_space, device) -# Initialize the models' parameters (weights and biases) using a Gaussian distribution -for model in models_trpo.values(): - model.init_parameters(method_name="normal_", mean=0.0, std=0.1) - # Configure and instantiate the agent. # Only modify some of the default configuration, visit its documentation to see all the options # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters cfg_trpo = TRPO_DEFAULT_CONFIG.copy() cfg_trpo["rollouts"] = 16 # memory_size -cfg_trpo["learning_epochs"] = 6 -cfg_trpo["mini_batches"] = 2 -cfg_trpo["grad_norm_clip"] = 0.5 -cfg_trpo["value_loss_scale"] = 2.0 +cfg_trpo["learning_epochs"] = 8 +cfg_trpo["mini_batches"] = 1 +cfg_trpo["discount_factor"] = 0.99 cfg_trpo["lambda"] = 0.95 +cfg_trpo["learning_rate"] = 3e-4 +cfg_trpo["grad_norm_clip"] = 1.0 +cfg_trpo["value_loss_scale"] = 2.0 cfg_trpo["state_preprocessor"] = RunningStandardScaler cfg_trpo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} cfg_trpo["value_preprocessor"] = RunningStandardScaler cfg_trpo["value_preprocessor_kwargs"] = {"size": 1, "device": device} -# logging to TensorBoard and write checkpoints each 16 and 125 timesteps respectively +# logging to TensorBoard and write checkpoints each 16 and 80 timesteps respectively cfg_trpo["experiment"]["write_interval"] = 16 -cfg_trpo["experiment"]["checkpoint_interval"] = 125 +cfg_trpo["experiment"]["checkpoint_interval"] = 80 agent = TRPO(models=models_trpo, memory=memory, From 4d6b71bfa4103a3063ca3ed679a5440b4bfa996a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 18 Nov 2022 19:25:35 +0100 Subject: [PATCH 083/157] Update TRPO algorithm in docs --- docs/source/modules/skrl.agents.trpo.rst | 58 ++++++++++++------------ skrl/agents/torch/trpo/trpo.py | 2 +- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst index b9df0b3b..2a864169 100644 --- a/docs/source/modules/skrl.agents.trpo.rst +++ b/docs/source/modules/skrl.agents.trpo.rst @@ -79,38 +79,40 @@ Algorithm implementation | :green:`# compute returns and advantages` | :math:`V_{_{last}}' \leftarrow V_\phi(s')` | :math:`R, A \leftarrow f_{GAE}(r, d, V, V_{_{last}}')` +| :green:`# sample all from memory` +| [[:math:`s, a, logp, A`]] :math:`\leftarrow` states, actions, log_prob, advantages +| :green:`# compute policy loss gradient` +| :math:`L_{\pi_\theta} \leftarrow f_{Loss}(\pi_\theta, s, a, logp, A)` +| :math:`g \leftarrow \nabla_{\theta} L_{\pi_\theta}` +| :math:`g_{_{flat}} \leftarrow \text{flatten}(g)` +| :green:`# compute the search direction using the conjugate gradient algorithm` +| :math:`search_{direction} \leftarrow f_{CG}(\pi_\theta, s, g_{_{flat}})` +| :green:`# compute step size and full step` +| :math:`xHx \leftarrow search_{direction} \; f_{Ax}(\pi_\theta, s, search_{direction})` +| :math:`step_{size} \leftarrow \sqrt{\dfrac{2 \, \delta}{xHx}} \qquad` with :math:`\; \delta` as :guilabel:`max_kl_divergence` +| :math:`\beta \leftarrow step_{size} \; search_{direction}` +| :green:`# backtracking line search` +| :math:`flag_{restore} \leftarrow \text{True}` +| :math:`\pi_{\theta_{backup}} \leftarrow \pi_\theta` +| :math:`\theta \leftarrow \text{get_parameters}(\pi_\theta)` +| :math:`I_{expected} \leftarrow g_{_{flat}} \; \beta` +| **FOR** :math:`\alpha \leftarrow (0.5` :guilabel:`step_fraction` :math:`)^i \;` with :math:`i = 0, 1, 2, ...` up to :guilabel:`max_backtrack_steps` **DO** +| :math:`\theta_{new} \leftarrow \theta + \alpha \; \beta` +| :math:`\pi_\theta \leftarrow \text{set_parameters}(\theta_{new})` +| :math:`I_{expected} \leftarrow \alpha \; I_{expected}` +| :math:`kl \leftarrow f_{KL}(\pi_{\theta_{backup}}, \pi_\theta, s)` +| :math:`L \leftarrow f_{Loss}(\pi_\theta, s, a, logp, A)` +| **IF** :math:`kl < \delta` **AND** :math:`\dfrac{L - L_{\pi_\theta}}{I_{expected}} >` :guilabel:`accept_ratio` **THEN** +| :math:`flag_{restore} \leftarrow \text{False}` +| **BREAK LOOP** +| **IF** :math:`flag_{restore}` **THEN** +| :math:`\pi_\theta \leftarrow \pi_{\theta_{backup}}` | :green:`# sample mini-batches from memory` -| [[:math:`s, a, logp, R, A`]] :math:`\leftarrow` states, actions, log_prob, returns, advantages +| [[:math:`s, R`]] :math:`\leftarrow` states, returns | :green:`# learning epochs` | **FOR** each learning epoch up to :guilabel:`learning_epochs` **DO** | :green:`# mini-batches loop` -| **FOR** each mini-batch [:math:`s, a, logp, R, A`] up to :guilabel:`mini_batches` **DO** -| :green:`# compute policy loss gradient` -| :math:`L_{\pi_\theta} \leftarrow f_{Loss}(\pi_\theta, s, a, logp, A)` -| :math:`g \leftarrow \nabla_{\theta} L_{\pi_\theta}` -| :math:`g_{_{flat}} \leftarrow \text{flatten}(g)` -| :green:`# compute the search direction using the conjugate gradient algorithm` -| :math:`search_{direction} \leftarrow f_{CG}(\pi_\theta, s, g_{_{flat}})` -| :green:`# compute step size and full step` -| :math:`xHx \leftarrow search_{direction} \; f_{Ax}(\pi_\theta, s, search_{direction})` -| :math:`step_{size} \leftarrow \sqrt{\dfrac{2 \, \delta}{xHx}} \qquad` with :math:`\; \delta` as :guilabel:`max_kl_divergence` -| :math:`\beta \leftarrow step_{size} \; search_{direction}` -| :green:`# backtracking line search` -| :math:`flag_{restore} \leftarrow \text{True}` -| :math:`\pi_{\theta_{backup}} \leftarrow \pi_\theta` -| :math:`\theta \leftarrow \text{get_parameters}(\pi_\theta)` -| :math:`I_{expected} \leftarrow g_{_{flat}} \; \beta` -| **FOR** :math:`\alpha \leftarrow (0.5` :guilabel:`step_fraction` :math:`)^i \;` with :math:`i = 0, 1, 2, ...` up to :guilabel:`max_backtrack_steps` **DO** -| :math:`\theta_{new} \leftarrow \theta + \alpha \; \beta` -| :math:`\pi_\theta \leftarrow \text{set_parameters}(\theta_{new})` -| :math:`I_{expected} \leftarrow \alpha \; I_{expected}` -| :math:`kl \leftarrow f_{KL}(\pi_{\theta_{backup}}, \pi_\theta, s)` -| :math:`L \leftarrow f_{Loss}(\pi_\theta, s, a, logp, A)` -| **IF** :math:`kl < \delta` **AND** :math:`\dfrac{L - L_{\pi_\theta}}{I_{expected}} >` :guilabel:`accept_ratio` **THEN** -| :math:`flag_{restore} \leftarrow \text{False}` -| **BREAK LOOP** -| **IF** :math:`flag_{restore}` **THEN** -| :math:`\pi_\theta \leftarrow \pi_{\theta_{backup}}` +| **FOR** each mini-batch [:math:`s, R`] up to :guilabel:`mini_batches` **DO** | :green:`# compute value loss` | :math:`V' \leftarrow V_\phi(s)` | :math:`L_{V_\phi} \leftarrow` :guilabel:`value_loss_scale` :math:`\frac{1}{N} \sum_{i=1}^N (R - V')^2` diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index e2b2f258..2c13751b 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -513,7 +513,7 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor sampled_batches = self.memory.sample_all(names=self._tensors_names_value, mini_batches=self._mini_batches) cumulative_value_loss = 0 - + # learning epochs for epoch in range(self._learning_epochs): From cdcfc5d0a98a9882f7e5bcf55a65423654b19748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 18 Nov 2022 19:36:41 +0100 Subject: [PATCH 084/157] Add RNN implementation for A2C agent --- skrl/agents/torch/a2c/a2c.py | 118 ++++++++++++++++++++++++++++++----- 1 file changed, 103 insertions(+), 15 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 9bde3218..24bebd92 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -10,6 +10,7 @@ from ....memories.torch import Memory from ....models.torch import Model +from ....resources.schedulers.torch import KLAdaptiveRL from .. import Agent @@ -159,13 +160,46 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="actions", size=self.action_space, dtype=torch.float32) self.memory.create_tensor(name="rewards", size=1, dtype=torch.float32) self.memory.create_tensor(name="terminated", size=1, dtype=torch.bool) + self.memory.create_tensor(name="log_prob", size=1, dtype=torch.float32) self.memory.create_tensor(name="values", size=1, dtype=torch.float32) self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self.tensors_names = ["states", "actions", "returns", "advantages"] + self._tensors_names = ["states", "actions", "log_prob", "returns", "advantages"] + + # RNN specifications + self._rnn = False # flag to indicate whether RNN is available + self._rnn_tensors_names = [] # used for sampling during training + self._rnn_final_states = {"policy": [], "value": []} + self._rnn_initial_states = {"policy": [], "value": []} + self._rnn_sequence_length = self.policy.get_specification().get("rnn", {}).get("sequence_length", 1) + + # policy + for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_policy_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_policy_{i}") + # default RNN states + self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) + + # value + if self.value is not None: + if self.policy is self.value: + self._rnn_initial_states["value"] = self._rnn_initial_states["policy"] + else: + for i, size in enumerate(self.value.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_value_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_value_{i}") + # default RNN states + self._rnn_initial_states["value"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) # create temporary variables needed for storage and computation + self._current_log_prob = None self._current_next_states = None def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tensor: @@ -181,15 +215,21 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens :return: Actions :rtype: torch.Tensor """ - states = self._state_preprocessor(states) + rnn = {"rnn": self._rnn_initial_states["policy"]} if self._rnn else {} # sample random actions - # TODO, check for stochasticity + # TODO: fix for stochasticity, rnn and log_prob if timestep < self._random_timesteps: - return self.policy.random_act({"states": states}, role="policy") + return self.policy.random_act({"states": self._state_preprocessor(states), **rnn}, role="policy") # sample stochastic actions - return self.policy.act({"states": states}, role="policy") + actions, log_prob, outputs = self.policy.act({"states": self._state_preprocessor(states), **rnn}, role="policy") + self._current_log_prob = log_prob + + if self._rnn: + self._rnn_final_states["policy"] = outputs.get("rnn", []) + + return actions, log_prob, outputs def record_transition(self, states: torch.Tensor, @@ -231,15 +271,39 @@ def record_transition(self, if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - with torch.no_grad(): - values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") + # compute values + rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} + values, _, outputs = self.value.act({"states": self._state_preprocessor(states), **rnn}, role="value") values = self._value_preprocessor(values, inverse=True) + # package RNN states + rnn_states = {} + if self._rnn: + rnn_states.update({f"rnn_policy_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["policy"])}) + if self.policy is not self.value: + rnn_states.update({f"rnn_value_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["value"])}) + + # storage transition in memory self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated, values=values) + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, **rnn_states) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated, values=values) + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, **rnn_states) + + # update RNN states + if self._rnn: + self._rnn_final_states["value"] = self._rnn_final_states["policy"] if self.policy is self.value else outputs.get("rnn", []) + + # reset states if the episodes have ended + finished_episodes = terminated.nonzero(as_tuple=False) + if finished_episodes.numel(): + for rnn_state in self._rnn_final_states["policy"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + if self.policy is not self.value: + for rnn_state in self._rnn_final_states["value"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + + self._rnn_initial_states = self._rnn_final_states def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -319,7 +383,8 @@ def compute_gae(rewards: torch.Tensor, # compute returns and advantages with torch.no_grad(): - last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float())}, role="value") + rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} + last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float()), **rnn}, role="value") last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") @@ -335,18 +400,38 @@ def compute_gae(rewards: torch.Tensor, self.memory.set_tensor_by_name("advantages", advantages) # sample mini-batches from memory - sampled_batches = self.memory.sample_all(names=self.tensors_names, mini_batches=self._mini_batches) + sampled_batches = self.memory.sample_all(names=self._tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) + + rnn_policy, rnn_value = {}, {} cumulative_policy_loss = 0 cumulative_entropy_loss = 0 cumulative_value_loss = 0 + kl_divergences = [] + # mini-batches loop - for sampled_states, sampled_actions, sampled_returns, sampled_advantages in sampled_batches: + for i, (sampled_states, sampled_actions, sampled_log_prob, sampled_returns, sampled_advantages) in enumerate(sampled_batches): + + if self._rnn: + if self.policy is self.value: + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]]} + rnn_value = rnn_policy + else: + rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "policy" in n]} + rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n]} sampled_states = self._state_preprocessor(sampled_states, train=True) - _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions}, role="policy") + _, next_log_prob, _ = self.policy.act({"states": sampled_states, "taken_actions": sampled_actions, **rnn_policy}, role="policy") + + # compute aproximate KL divergence for KLAdaptive learning rate scheduler + if isinstance(self.scheduler, KLAdaptiveRL): + with torch.no_grad(): + ratio = next_log_prob - sampled_log_prob + kl_divergence = ((torch.exp(ratio) - 1) - ratio).mean() + kl_divergences.append(kl_divergence) # compute entropy loss if self._entropy_loss_scale: @@ -358,7 +443,7 @@ def compute_gae(rewards: torch.Tensor, policy_loss = -(sampled_advantages * next_log_prob).mean() # compute value loss - predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states, **rnn_value}, role="value") value_loss = F.mse_loss(sampled_returns, predicted_values) @@ -380,7 +465,10 @@ def compute_gae(rewards: torch.Tensor, # update learning rate if self._learning_rate_scheduler: - self.scheduler.step() + if isinstance(self.scheduler, KLAdaptiveRL): + self.scheduler.step(torch.tensor(kl_divergences).mean()) + else: + self.scheduler.step() # record data self.track_data("Loss / Policy loss", cumulative_policy_loss / len(sampled_batches)) From 2a76de63dd62d4febffa10c1bd2cd60ff590b5db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 20 Nov 2022 21:13:19 +0100 Subject: [PATCH 085/157] Add terminated samples to RNN model inputs during training --- skrl/agents/torch/a2c/a2c.py | 10 +++++----- skrl/agents/torch/ddpg/ddpg.py | 2 +- skrl/agents/torch/ppo/ppo.py | 11 +++++------ skrl/agents/torch/sac/sac.py | 2 +- skrl/agents/torch/td3/td3.py | 2 +- 5 files changed, 13 insertions(+), 14 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 24bebd92..19ea0a72 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -165,7 +165,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self._tensors_names = ["states", "actions", "log_prob", "returns", "advantages"] + self._tensors_names = ["states", "actions", "terminated", "log_prob", "returns", "advantages"] # RNN specifications self._rnn = False # flag to indicate whether RNN is available @@ -412,15 +412,15 @@ def compute_gae(rewards: torch.Tensor, kl_divergences = [] # mini-batches loop - for i, (sampled_states, sampled_actions, sampled_log_prob, sampled_returns, sampled_advantages) in enumerate(sampled_batches): + for i, (sampled_states, sampled_actions, sampled_dones, sampled_log_prob, sampled_returns, sampled_advantages) in enumerate(sampled_batches): if self._rnn: if self.policy is self.value: - rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]], "terminated": sampled_dones} rnn_value = rnn_policy else: - rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "policy" in n]} - rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "policy" in n], "terminated": sampled_dones} + rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n], "terminated": sampled_dones} sampled_states = self._state_preprocessor(sampled_states, train=True) diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 9020c244..5b0bbf65 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -361,7 +361,7 @@ def _update(self, timestep: int, timesteps: int) -> None: rnn_policy = {} if self._rnn: sampled_rnn = self.memory.sample_by_index(names=self._rnn_tensors_names, indexes=self.memory.get_sampling_indexes())[0] - rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn], "terminated": sampled_dones} # gradient steps for gradient_step in range(self._gradient_steps): diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 946e7cd5..f2ad25b3 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -180,7 +180,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) # tensors sampled during training - self._tensors_names = ["states", "actions", "log_prob", "values", "returns", "advantages"] + self._tensors_names = ["states", "actions", "terminated", "log_prob", "values", "returns", "advantages"] # RNN specifications self._rnn = False # flag to indicate whether RNN is available @@ -429,16 +429,15 @@ def compute_gae(rewards: torch.Tensor, kl_divergences = [] # mini-batches loop - for i, (sampled_states, sampled_actions, sampled_log_prob, sampled_values, sampled_returns, sampled_advantages) \ - in enumerate(sampled_batches): + for i, (sampled_states, sampled_actions, sampled_dones, sampled_log_prob, sampled_values, sampled_returns, sampled_advantages) in enumerate(sampled_batches): if self._rnn: if self.policy is self.value: - rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]], "terminated": sampled_dones} rnn_value = rnn_policy else: - rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "policy" in n]} - rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "policy" in n], "terminated": sampled_dones} + rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n], "terminated": sampled_dones} sampled_states = self._state_preprocessor(sampled_states, train=not epoch) diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index a67f87f9..b0e60cd0 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -338,7 +338,7 @@ def _update(self, timestep: int, timesteps: int) -> None: rnn_policy = {} if self._rnn: sampled_rnn = self.memory.sample_by_index(names=self._rnn_tensors_names, indexes=self.memory.get_sampling_indexes())[0] - rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn], "terminated": sampled_dones} # gradient steps for gradient_step in range(self._gradient_steps): diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 1783a594..7411a809 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -379,7 +379,7 @@ def _update(self, timestep: int, timesteps: int) -> None: rnn_policy = {} if self._rnn: sampled_rnn = self.memory.sample_by_index(names=self._rnn_tensors_names, indexes=self.memory.get_sampling_indexes())[0] - rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn]} + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn], "terminated": sampled_dones} # gradient steps for gradient_step in range(self._gradient_steps): From 71b79b720a50d7241b5ad0661fb712f0ddc997f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 22 Nov 2022 22:45:26 +0100 Subject: [PATCH 086/157] Handle training mode for computing last value --- skrl/agents/torch/a2c/a2c.py | 2 ++ skrl/agents/torch/ppo/ppo.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 19ea0a72..49f87c2b 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -383,8 +383,10 @@ def compute_gae(rewards: torch.Tensor, # compute returns and advantages with torch.no_grad(): + self.value.train(False) rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float()), **rnn}, role="value") + self.value.train(True) last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index f2ad25b3..c03190f8 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -398,8 +398,10 @@ def compute_gae(rewards: torch.Tensor, # compute returns and advantages with torch.no_grad(): + self.value.train(False) rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float()), **rnn}, role="value") + self.value.train(True) last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") From 1a47aa6cc0f9073cec4c130d1ca80b3b90ffafa3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 25 Nov 2022 10:24:07 +0100 Subject: [PATCH 087/157] Add RNN implementation for TRPO agent --- skrl/agents/torch/trpo/trpo.py | 125 +++++++++++++++++++++++++++------ 1 file changed, 104 insertions(+), 21 deletions(-) diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 2c13751b..09686364 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -21,7 +21,7 @@ "mini_batches": 2, # number of mini batches during each learning epoch "discount_factor": 0.99, # discount factor (gamma) - "lambda": 0.99, # TD(lambda) coefficient (lam) for computing returns and advantages + "lambda": 0.95, # TD(lambda) coefficient (lam) for computing returns and advantages "value_learning_rate": 1e-3, # value learning rate "learning_rate_scheduler": None, # learning rate scheduler class (see torch.optim.lr_scheduler) @@ -164,6 +164,7 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: """Initialize the agent """ super().init(trainer_cfg=trainer_cfg) + self.set_mode("eval") # create tensors in memory if self.memory is not None: @@ -176,8 +177,39 @@ def init(self, trainer_cfg: Optional[Dict[str, Any]] = None) -> None: self.memory.create_tensor(name="returns", size=1, dtype=torch.float32) self.memory.create_tensor(name="advantages", size=1, dtype=torch.float32) - self._tensors_names_policy = ["states", "actions", "log_prob", "advantages"] - self._tensors_names_value = ["states", "returns"] + self._tensors_names_policy = ["states", "actions", "terminated", "log_prob", "advantages"] + self._tensors_names_value = ["states", "terminated", "returns"] + + # RNN specifications + self._rnn = False # flag to indicate whether RNN is available + self._rnn_tensors_names = [] # used for sampling during training + self._rnn_final_states = {"policy": [], "value": []} + self._rnn_initial_states = {"policy": [], "value": []} + self._rnn_sequence_length = self.policy.get_specification().get("rnn", {}).get("sequence_length", 1) + + # policy + for i, size in enumerate(self.policy.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_policy_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_policy_{i}") + # default RNN states + self._rnn_initial_states["policy"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) + + # value + if self.value is not None: + if self.policy is self.value: + self._rnn_initial_states["value"] = self._rnn_initial_states["policy"] + else: + for i, size in enumerate(self.value.get_specification().get("rnn", {}).get("sizes", [])): + self._rnn = True + # create tensors in memory + if self.memory is not None: + self.memory.create_tensor(name=f"rnn_value_{i}", size=(size[0], size[2]), dtype=torch.float32, keep_dimensions=True) + self._rnn_tensors_names.append(f"rnn_value_{i}") + # default RNN states + self._rnn_initial_states["value"].append(torch.zeros(size, dtype=torch.float32, device=self.device)) # create temporary variables needed for storage and computation self._current_log_prob = None @@ -196,17 +228,20 @@ def act(self, states: torch.Tensor, timestep: int, timesteps: int) -> torch.Tens :return: Actions :rtype: torch.Tensor """ - states = self._state_preprocessor(states) + rnn = {"rnn": self._rnn_initial_states["policy"]} if self._rnn else {} # sample random actions - # TODO, check for stochasticity + # TODO: fix for stochasticity, rnn and log_prob if timestep < self._random_timesteps: - return self.policy.random_act({"states": states}, role="policy") + return self.policy.random_act({"states": self._state_preprocessor(states), **rnn}, role="policy") # sample stochastic actions - actions, log_prob, outputs = self.policy.act({"states": states}, role="policy") + actions, log_prob, outputs = self.policy.act({"states": self._state_preprocessor(states), **rnn}, role="policy") self._current_log_prob = log_prob + if self._rnn: + self._rnn_final_states["policy"] = outputs.get("rnn", []) + return actions, log_prob, outputs def record_transition(self, @@ -249,15 +284,39 @@ def record_transition(self, if self._rewards_shaper is not None: rewards = self._rewards_shaper(rewards, timestep, timesteps) - with torch.no_grad(): - values, _, _ = self.value.act({"states": self._state_preprocessor(states)}, role="value") + # compute values + rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} + values, _, outputs = self.value.act({"states": self._state_preprocessor(states), **rnn}, role="value") values = self._value_preprocessor(values, inverse=True) + # package RNN states + rnn_states = {} + if self._rnn: + rnn_states.update({f"rnn_policy_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["policy"])}) + if self.policy is not self.value: + rnn_states.update({f"rnn_value_{i}": s.transpose(0, 1) for i, s in enumerate(self._rnn_initial_states["value"])}) + + # storage transition in memory self.memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, **rnn_states) for memory in self.secondary_memories: memory.add_samples(states=states, actions=actions, rewards=rewards, next_states=next_states, - terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values) + terminated=terminated, truncated=truncated, log_prob=self._current_log_prob, values=values, **rnn_states) + + # update RNN states + if self._rnn: + self._rnn_final_states["value"] = self._rnn_final_states["policy"] if self.policy is self.value else outputs.get("rnn", []) + + # reset states if the episodes have ended + finished_episodes = terminated.nonzero(as_tuple=False) + if finished_episodes.numel(): + for rnn_state in self._rnn_final_states["policy"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + if self.policy is not self.value: + for rnn_state in self._rnn_final_states["value"]: + rnn_state[:, finished_episodes[:, 0]] = 0 + + self._rnn_initial_states = self._rnn_final_states def pre_interaction(self, timestep: int, timesteps: int) -> None: """Callback called before the interaction with the environment @@ -279,7 +338,9 @@ def post_interaction(self, timestep: int, timesteps: int) -> None: """ self._rollout += 1 if not self._rollout % self._rollouts and timestep >= self._learning_starts: + self.set_mode("train") self._update(timestep, timesteps) + self.set_mode("eval") # write tracking data and checkpoints super().post_interaction(timestep, timesteps) @@ -354,7 +415,7 @@ def surrogate_loss(policy: Model, :return: Surrogate loss :rtype: torch.Tensor """ - _, new_log_prob, _ = policy.act({"states": states, "taken_actions": actions}, role="policy") + _, new_log_prob, _ = policy.act({"states": states, "taken_actions": actions, **rnn_policy}, role="policy") return (advantages * torch.exp(new_log_prob - log_prob.detach())).mean() def conjugate_gradient(policy: Model, @@ -438,12 +499,13 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor :return: KL divergence :rtype: torch.Tensor """ - mu_1 = policy_1.act({"states": states}, role="policy")[2]["mean_actions"] + mu_1 = policy_1.act({"states": states, **rnn_policy}, role="policy")[2]["mean_actions"] logstd_1 = policy_1.get_log_std(role="policy") mu_1, logstd_1 = mu_1.detach(), logstd_1.detach() - mu_2 = policy_2.act({"states": states}, role="policy")[2]["mean_actions"] - logstd_2 = policy_2.get_log_std(role="policy") + with torch.backends.cudnn.flags(enabled=not self._rnn): + mu_2 = policy_2.act({"states": states, **rnn_policy}, role="policy")[2]["mean_actions"] + logstd_2 = policy_2.get_log_std(role="policy") kl = logstd_1 - logstd_2 + 0.5 * (torch.square(logstd_1.exp()) + torch.square(mu_1 - mu_2)) \ / torch.square(logstd_2.exp()) - 0.5 @@ -451,7 +513,10 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor # compute returns and advantages with torch.no_grad(): - last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float())}, role="value") + self.value.train(False) + rnn = {"rnn": self._rnn_initial_states["value"]} if self._rnn else {} + last_values, _, _ = self.value.act({"states": self._state_preprocessor(self._current_next_states.float()), **rnn}, role="value") + self.value.train(True) last_values = self._value_preprocessor(last_values, inverse=True) values = self.memory.get_tensor_by_name("values") @@ -467,8 +532,17 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor self.memory.set_tensor_by_name("advantages", advantages) # sample all from memory - sampled_states, sampled_actions, sampled_log_prob, sampled_advantages \ - = self.memory.sample_all(names=self._tensors_names_policy, mini_batches=1)[0] + sampled_states, sampled_actions, sampled_dones, sampled_log_prob, sampled_advantages \ + = self.memory.sample_all(names=self._tensors_names_policy, mini_batches=1, sequence_length=self._rnn_sequence_length)[0] + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=1, sequence_length=self._rnn_sequence_length)[0] + + rnn_policy = {} + + if self._rnn: + if self.policy is self.value: + rnn_policy = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches], "terminated": sampled_dones} + else: + rnn_policy = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches, self._rnn_tensors_names) if "policy" in n], "terminated": sampled_dones} sampled_states = self._state_preprocessor(sampled_states, train=True) @@ -510,7 +584,10 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor self.policy.update_parameters(self.backup_policy) # sample mini-batches from memory - sampled_batches = self.memory.sample_all(names=self._tensors_names_value, mini_batches=self._mini_batches) + sampled_batches = self.memory.sample_all(names=self._tensors_names_value, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) + + rnn_value = {} cumulative_value_loss = 0 @@ -518,12 +595,18 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor for epoch in range(self._learning_epochs): # mini-batches loop - for sampled_states, sampled_returns in sampled_batches: + for i, (sampled_states, sampled_dones, sampled_returns) in enumerate(sampled_batches): + + if self._rnn: + if self.policy is self.value: + rnn_value = {"rnn": [s.transpose(0, 1) for s in sampled_rnn_batches[i]], "terminated": sampled_dones} + else: + rnn_value = {"rnn": [s.transpose(0, 1) for s, n in zip(sampled_rnn_batches[i], self._rnn_tensors_names) if "value" in n], "terminated": sampled_dones} sampled_states = self._state_preprocessor(sampled_states, train=not epoch) # compute value loss - predicted_values, _, _ = self.value.act({"states": sampled_states}, role="value") + predicted_values, _, _ = self.value.act({"states": sampled_states, **rnn_value}, role="value") value_loss = self._value_loss_scale * F.mse_loss(sampled_returns, predicted_values) From 32db9e25b2f0565d49cdcfdd60f92cef89a63fe1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 25 Nov 2022 10:30:12 +0100 Subject: [PATCH 088/157] Sample RNN states from memory only if RNN support is enabled --- skrl/agents/torch/a2c/a2c.py | 3 ++- skrl/agents/torch/ppo/ppo.py | 3 ++- skrl/agents/torch/trpo/trpo.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 49f87c2b..68b76bc6 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -403,9 +403,10 @@ def compute_gae(rewards: torch.Tensor, # sample mini-batches from memory sampled_batches = self.memory.sample_all(names=self._tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) - sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) rnn_policy, rnn_value = {}, {} + if self._rnn: + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) cumulative_policy_loss = 0 cumulative_entropy_loss = 0 diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index c03190f8..20096c41 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -418,9 +418,10 @@ def compute_gae(rewards: torch.Tensor, # sample mini-batches from memory sampled_batches = self.memory.sample_all(names=self._tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) - sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) rnn_policy, rnn_value = {}, {} + if self._rnn: + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) cumulative_policy_loss = 0 cumulative_entropy_loss = 0 diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 09686364..5f224610 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -585,9 +585,10 @@ def kl_divergence(policy_1: Model, policy_2: Model, states: torch.Tensor) -> tor # sample mini-batches from memory sampled_batches = self.memory.sample_all(names=self._tensors_names_value, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) - sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) rnn_value = {} + if self._rnn: + sampled_rnn_batches = self.memory.sample_all(names=self._rnn_tensors_names, mini_batches=self._mini_batches, sequence_length=self._rnn_sequence_length) cumulative_value_loss = 0 From 0e0c60ce00b5f45fbbeea703393b82fd6ed3010b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 14:06:27 +0100 Subject: [PATCH 089/157] Add information about the support for advanced features --- docs/source/modules/skrl.agents.a2c.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.amp.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.cem.rst | 10 ++++++++++ docs/source/modules/skrl.agents.ddpg.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.ddqn.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.dqn.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.ppo.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.sac.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.td3.rst | 12 ++++++++++++ docs/source/modules/skrl.agents.trpo.rst | 12 ++++++++++++ 10 files changed, 118 insertions(+) diff --git a/docs/source/modules/skrl.agents.a2c.rst b/docs/source/modules/skrl.agents.a2c.rst index 38c1c6df..5dba97d5 100644 --- a/docs/source/modules/skrl.agents.a2c.rst +++ b/docs/source/modules/skrl.agents.a2c.rst @@ -124,6 +124,18 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - for Policy and Value + * - RNN support + - RNN, LSTM, GRU and any other variant + API ^^^ diff --git a/docs/source/modules/skrl.agents.amp.rst b/docs/source/modules/skrl.agents.amp.rst index b44170e3..66a11244 100644 --- a/docs/source/modules/skrl.agents.amp.rst +++ b/docs/source/modules/skrl.agents.amp.rst @@ -155,6 +155,18 @@ The implementation uses 1 stochastic (continuous) and 2 deterministic function a - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - \- + API ^^^ diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst index 1b13f5f8..8d79dc5f 100644 --- a/docs/source/modules/skrl.agents.cem.rst +++ b/docs/source/modules/skrl.agents.cem.rst @@ -74,6 +74,16 @@ The implementation uses 1 discrete function approximator. This function approxim - action - :ref:`Categorical ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - RNN support + - \- + API ^^^ diff --git a/docs/source/modules/skrl.agents.ddpg.rst b/docs/source/modules/skrl.agents.ddpg.rst index b48b5788..7c5da694 100644 --- a/docs/source/modules/skrl.agents.ddpg.rst +++ b/docs/source/modules/skrl.agents.ddpg.rst @@ -122,6 +122,18 @@ The implementation uses 4 deterministic function approximators. These function a - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - RNN, LSTM, GRU and any other variant + API ^^^ diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst index 3b2e12ca..36017091 100644 --- a/docs/source/modules/skrl.agents.ddqn.rst +++ b/docs/source/modules/skrl.agents.ddqn.rst @@ -87,6 +87,18 @@ The implementation uses 2 deterministic function approximators. These function a - action - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - \- + API ^^^ diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst index 02213752..43d60647 100644 --- a/docs/source/modules/skrl.agents.dqn.rst +++ b/docs/source/modules/skrl.agents.dqn.rst @@ -87,6 +87,18 @@ The implementation uses 2 deterministic function approximators. These function a - action - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - \- + API ^^^ diff --git a/docs/source/modules/skrl.agents.ppo.rst b/docs/source/modules/skrl.agents.ppo.rst index d6c1e11e..a87ca455 100644 --- a/docs/source/modules/skrl.agents.ppo.rst +++ b/docs/source/modules/skrl.agents.ppo.rst @@ -140,6 +140,18 @@ The implementation uses 1 stochastic (discrete or continuous) and 1 deterministi - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - for Policy and Value + * - RNN support + - RNN, LSTM, GRU and any other variant + API ^^^ diff --git a/docs/source/modules/skrl.agents.sac.rst b/docs/source/modules/skrl.agents.sac.rst index 5cfe1d81..b81af374 100644 --- a/docs/source/modules/skrl.agents.sac.rst +++ b/docs/source/modules/skrl.agents.sac.rst @@ -135,6 +135,18 @@ The implementation uses 1 stochastic and 4 deterministic function approximators. - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - RNN, LSTM, GRU and any other variant + API ^^^ diff --git a/docs/source/modules/skrl.agents.td3.rst b/docs/source/modules/skrl.agents.td3.rst index 7cd2e01c..772a292c 100644 --- a/docs/source/modules/skrl.agents.td3.rst +++ b/docs/source/modules/skrl.agents.td3.rst @@ -144,6 +144,18 @@ The implementation uses 6 deterministic function approximators. These function a - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - RNN, LSTM, GRU and any other variant + API ^^^ diff --git a/docs/source/modules/skrl.agents.trpo.rst b/docs/source/modules/skrl.agents.trpo.rst index 2a864169..dcc4ed00 100644 --- a/docs/source/modules/skrl.agents.trpo.rst +++ b/docs/source/modules/skrl.agents.trpo.rst @@ -180,6 +180,18 @@ The implementation uses 1 stochastic and 1 deterministic function approximator. - 1 - :ref:`Deterministic ` +Support for advanced features is described in the next table + +.. list-table:: + :header-rows: 1 + + * - Feature + - Support and remarks + * - Shared model + - \- + * - RNN support + - RNN, LSTM, GRU and any other variant + API ^^^ From 107883f3588a74761788f3171c91a5e8c992f671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 14:10:55 +0100 Subject: [PATCH 090/157] Update model snippets --- docs/source/snippets/categorical_model.py | 112 +++++++++++-- docs/source/snippets/deterministic_model.py | 108 +++++++++++-- docs/source/snippets/gaussian_model.py | 144 ++++++++++++++--- .../snippets/multivariate_gaussian_model.py | 147 +++++++++++++++--- 4 files changed, 440 insertions(+), 71 deletions(-) diff --git a/docs/source/snippets/categorical_model.py b/docs/source/snippets/categorical_model.py index b303f89b..fbedbab6 100644 --- a/docs/source/snippets/categorical_model.py +++ b/docs/source/snippets/categorical_model.py @@ -1,4 +1,35 @@ -# [start-mlp] +# [start-mlp-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class MLP(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True) +# [end-mlp-sequential] + +# [start-mlp-functional] +import torch import torch.nn as nn import torch.nn.functional as F @@ -11,14 +42,16 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro Model.__init__(self, observation_space, action_space, device) CategoricalMixin.__init__(self, unnormalized_log_prob) - self.linear_layer_1 = nn.Linear(self.num_observations, 64) - self.linear_layer_2 = nn.Linear(64, 32) - self.output_layer = nn.Linear(32, self.num_actions) + self.fc1 = nn.Linear(self.num_observations, 64) + self.fc2 = nn.Linear(64, 32) + self.logits = nn.Linear(32, self.num_actions) def compute(self, inputs, role): - x = F.relu(self.linear_layer_1(inputs["states"])) - x = F.relu(self.linear_layer_2(x)) - return self.output_layer(x), {} + x = self.fc1(inputs["states"]) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + return self.logits(x), {} # instantiate the model (assumes there is a wrapped environment: env) @@ -26,11 +59,12 @@ def compute(self, inputs, role): action_space=env.action_space, device=env.device, unnormalized_log_prob=True) -# [end-mlp] +# [end-mlp-functional] # ============================================================================= -# [start-cnn] +# [start-cnn-sequential] +import torch import torch.nn as nn from skrl.models.torch import Model, CategoricalMixin @@ -49,7 +83,7 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(), nn.Flatten(), - nn.Linear(9216, 512), + nn.Linear(1024, 512), nn.ReLU(), nn.Linear(512, 16), nn.Tanh(), @@ -60,8 +94,60 @@ def __init__(self, observation_space, action_space, device, unnormalized_log_pro nn.Linear(32, self.num_actions)) def compute(self, inputs, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(inputs["states"].permute(0, 3, 1, 2)), {} + # permute (samples, width * height * channels) -> (samples, channels, width, height) + return self.net(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)), {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True) +# [end-cnn-sequential] + +# [start-cnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class CNN(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4) + self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2) + self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1) + self.fc1 = nn.Linear(1024, 512) + self.fc2 = nn.Linear(512, 16) + self.fc3 = nn.Linear(16, 64) + self.fc4 = nn.Linear(64, 32) + self.fc5 = nn.Linear(32, self.num_actions) + + def compute(self, inputs, role): + # permute (samples, width * height * channels) -> (samples, channels, width, height) + x = inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2) + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = self.conv3(x) + x = F.relu(x) + x = torch.flatten(x, start_dim=1) + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + x = torch.tanh(x) + x = self.fc3(x) + x = torch.tanh(x) + x = self.fc4(x) + x = torch.tanh(x) + x = self.fc5(x) + return x, {} # instantiate the model (assumes there is a wrapped environment: env) @@ -69,4 +155,4 @@ def compute(self, inputs, role): action_space=env.action_space, device=env.device, unnormalized_log_prob=True) -# [end-cnn] +# [end-cnn-functional] diff --git a/docs/source/snippets/deterministic_model.py b/docs/source/snippets/deterministic_model.py index 4a2179e2..cac70d88 100644 --- a/docs/source/snippets/deterministic_model.py +++ b/docs/source/snippets/deterministic_model.py @@ -1,4 +1,4 @@ -# [start-mlp] +# [start-mlp-sequential] import torch import torch.nn as nn @@ -26,11 +26,44 @@ def compute(self, inputs, role): action_space=env.action_space, device=env.device, clip_actions=False) -# [end-mlp] +# [end-mlp-sequential] + +# [start-mlp-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class MLP(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.fc1 = nn.Linear(self.num_observations + self.num_actions, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, 1) + + def compute(self, inputs, role): + x = self.fc1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + return self.fc3(x), {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False) +# [end-mlp-functional] # ============================================================================= -# [start-cnn] +# [start-cnn-sequential] import torch import torch.nn as nn @@ -43,26 +76,27 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) DeterministicMixin.__init__(self, clip_actions) - self.features_extractor = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=3), + self.features_extractor = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4), nn.ReLU(), nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), - nn.Conv2d(64, 64, kernel_size=2, stride=1), + nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(), nn.Flatten(), - nn.Linear(3136, 512), + nn.Linear(1024, 512), nn.ReLU(), nn.Linear(512, 16), nn.Tanh()) - self.net = nn.Sequential(nn.Linear(16 + self.num_actions, 32), + + self.net = nn.Sequential(nn.Linear(16 + self.num_actions, 64), nn.Tanh(), - nn.Linear(32, 32), + nn.Linear(64, 32), nn.Tanh(), nn.Linear(32, 1)) def compute(self, inputs, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) - x = self.features_extractor(inputs["states"].permute(0, 3, 1, 2)) + # permute (samples, width * height * channels) -> (samples, channels, width, height) + x = self.features_extractor(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)) return self.net(torch.cat([x, inputs["taken_actions"]], dim=1)), {} @@ -71,4 +105,56 @@ def compute(self, inputs, role): action_space=env.action_space, device=env.device, clip_actions=False) -# [end-cnn] +# [end-cnn-sequential] + +# [start-cnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class CNN(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4) + self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2) + self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1) + self.fc1 = nn.Linear(1024, 512) + self.fc2 = nn.Linear(512, 16) + self.fc3 = nn.Linear(16 + self.num_actions, 64) + self.fc4 = nn.Linear(64, 32) + self.fc5 = nn.Linear(32, 1) + + def compute(self, inputs, role): + # permute (samples, width * height * channels) -> (samples, channels, width, height) + x = inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2) + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = self.conv3(x) + x = F.relu(x) + x = torch.flatten(x, start_dim=1) + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + x = torch.tanh(x) + x = self.fc3(torch.cat([x, inputs["taken_actions"]], dim=1)) + x = torch.tanh(x) + x = self.fc4(x) + x = torch.tanh(x) + x = self.fc5(x) + return x, {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False) +# [end-cnn-functional] diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py index fab2ce13..eadf22dd 100644 --- a/docs/source/snippets/gaussian_model.py +++ b/docs/source/snippets/gaussian_model.py @@ -1,4 +1,42 @@ -# [start-mlp] +# [start-mlp-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class MLP(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, + clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum") +# [end-mlp-sequential] + +# [start-mlp-functional] import torch import torch.nn as nn import torch.nn.functional as F @@ -13,18 +51,20 @@ def __init__(self, observation_space, action_space, device, Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) - self.linear_layer_1 = nn.Linear(self.num_observations, 128) - self.linear_layer_2 = nn.Linear(128, 64) - self.linear_layer_3 = nn.Linear(64, 32) - self.mean_action_layer = nn.Linear(32, self.num_actions) + self.fc1 = nn.Linear(self.num_observations, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) def compute(self, inputs, role): - x = F.relu(self.linear_layer_1(inputs["states"])) - x = F.relu(self.linear_layer_2(x)) - x = F.relu(self.linear_layer_3(x)) - return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter, {} + x = self.fc1(inputs["states"]) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + return torch.tanh(x), self.log_std_parameter, {} + # instantiate the model (assumes there is a wrapped environment: env) policy = MLP(observation_space=env.observation_space, @@ -35,14 +75,13 @@ def compute(self, inputs, role): min_log_std=-20, max_log_std=2, reduction="sum") -# [end-mlp] +# [end-mlp-functional] # ============================================================================= -# [start-cnn] +# [start-cnn-sequential] import torch import torch.nn as nn -import torch.nn.functional as F from skrl.models.torch import Model, GaussianMixin @@ -54,28 +93,87 @@ def __init__(self, observation_space, action_space, device, Model.__init__(self, observation_space, action_space, device) GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) - self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=4, stride=2), + self.net = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4), nn.ReLU(), - nn.Conv2d(64, 32, kernel_size=4, stride=2), + nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), - nn.Conv2d(32, 16, kernel_size=2, stride=2), - nn.ReLU(), - nn.Conv2d(16, 8, kernel_size=2, stride=2), + nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(), nn.Flatten(), - nn.Linear(1800, 256), + nn.Linear(1024, 512), nn.ReLU(), - nn.Linear(256, 16), + nn.Linear(512, 16), + nn.Tanh(), + nn.Linear(16, 64), nn.Tanh(), - nn.Linear(16, 32), + nn.Linear(64, 32), nn.Tanh(), nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) def compute(self, inputs, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(inputs["states"].permute(0, 3, 1, 2)), self.log_std_parameter, {} + # permute (samples, width * height * channels) -> (samples, channels, width, height) + return self.net(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)), self.log_std_parameter, {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum") +# [end-cnn-sequential] + +# [start-cnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class CNN(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, + clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4) + self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2) + self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1) + self.fc1 = nn.Linear(1024, 512) + self.fc2 = nn.Linear(512, 16) + self.fc3 = nn.Linear(16, 64) + self.fc4 = nn.Linear(64, 32) + self.fc5 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # permute (samples, width * height * channels) -> (samples, channels, width, height) + x = inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2) + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = self.conv3(x) + x = F.relu(x) + x = torch.flatten(x, start_dim=1) + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + x = torch.tanh(x) + x = self.fc3(x) + x = torch.tanh(x) + x = self.fc4(x) + x = torch.tanh(x) + x = self.fc5(x) + return x, self.log_std_parameter, {} # instantiate the model (assumes there is a wrapped environment: env) @@ -87,4 +185,4 @@ def compute(self, inputs, role): min_log_std=-20, max_log_std=2, reduction="sum") -# [end-cnn] +# [end-cnn-functional] diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py index d9ea6fcb..3568cc0b 100644 --- a/docs/source/snippets/multivariate_gaussian_model.py +++ b/docs/source/snippets/multivariate_gaussian_model.py @@ -1,4 +1,42 @@ -# [start-mlp] +# [start-mlp-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class MLP(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, + clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = MLP(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum") +# [end-mlp-sequential] + +# [start-mlp-functional] import torch import torch.nn as nn import torch.nn.functional as F @@ -13,18 +51,20 @@ def __init__(self, observation_space, action_space, device, Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) - self.linear_layer_1 = nn.Linear(self.num_observations, 128) - self.linear_layer_2 = nn.Linear(128, 64) - self.linear_layer_3 = nn.Linear(64, 32) - self.mean_action_layer = nn.Linear(32, self.num_actions) + self.fc1 = nn.Linear(self.num_observations, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) def compute(self, inputs, role): - x = F.relu(self.linear_layer_1(inputs["states"])) - x = F.relu(self.linear_layer_2(x)) - x = F.relu(self.linear_layer_3(x)) - return torch.tanh(self.mean_action_layer(x)), self.log_std_parameter, {} + x = self.fc1(inputs["states"]) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + return torch.tanh(x), self.log_std_parameter, {} + # instantiate the model (assumes there is a wrapped environment: env) policy = MLP(observation_space=env.observation_space, @@ -34,14 +74,13 @@ def compute(self, inputs, role): clip_log_std=True, min_log_std=-20, max_log_std=2) -# [end-mlp] +# [end-mlp-functional] # ============================================================================= -# [start-cnn] +# [start-cnn-sequential] import torch import torch.nn as nn -import torch.nn.functional as F from skrl.models.torch import Model, MultivariateGaussianMixin @@ -53,28 +92,28 @@ def __init__(self, observation_space, action_space, device, Model.__init__(self, observation_space, action_space, device) MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) - self.net = nn.Sequential(nn.Conv2d(1, 64, kernel_size=4, stride=2), + self.net = nn.Sequential(nn.Conv2d(3, 32, kernel_size=8, stride=4), nn.ReLU(), - nn.Conv2d(64, 32, kernel_size=4, stride=2), + nn.Conv2d(32, 64, kernel_size=4, stride=2), nn.ReLU(), - nn.Conv2d(32, 16, kernel_size=2, stride=2), - nn.ReLU(), - nn.Conv2d(16, 8, kernel_size=2, stride=2), + nn.Conv2d(64, 64, kernel_size=3, stride=1), nn.ReLU(), nn.Flatten(), - nn.Linear(1800, 256), + nn.Linear(1024, 512), nn.ReLU(), - nn.Linear(256, 16), + nn.Linear(512, 16), + nn.Tanh(), + nn.Linear(16, 64), nn.Tanh(), - nn.Linear(16, 32), + nn.Linear(64, 32), nn.Tanh(), nn.Linear(32, self.num_actions)) self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) def compute(self, inputs, role): - # permute (samples, width, height, channels) -> (samples, channels, width, height) - return self.net(inputs["states"].permute(0, 3, 1, 2)), self.log_std_parameter, {} + # permute (samples, width * height * channels) -> (samples, channels, width, height) + return self.net(inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2)), self.log_std_parameter, {} # instantiate the model (assumes there is a wrapped environment: env) @@ -84,5 +123,65 @@ def compute(self, inputs, role): clip_actions=True, clip_log_std=True, min_log_std=-20, - max_log_std=2) -# [end-cnn] + max_log_std=2, + reduction="sum") +# [end-cnn-sequential] + +# [start-cnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class CNN(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, + clip_actions=False, clip_log_std=True, min_log_std=-20, max_log_std=2): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.conv1 = nn.Conv2d(3, 32, kernel_size=8, stride=4) + self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2) + self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1) + self.fc1 = nn.Linear(1024, 512) + self.fc2 = nn.Linear(512, 16) + self.fc3 = nn.Linear(16, 64) + self.fc4 = nn.Linear(64, 32) + self.fc5 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # permute (samples, width * height * channels) -> (samples, channels, width, height) + x = inputs["states"].view(-1, *self.observation_space.shape).permute(0, 3, 1, 2) + x = self.conv1(x) + x = F.relu(x) + x = self.conv2(x) + x = F.relu(x) + x = self.conv3(x) + x = F.relu(x) + x = torch.flatten(x, start_dim=1) + x = self.fc1(x) + x = F.relu(x) + x = self.fc2(x) + x = torch.tanh(x) + x = self.fc3(x) + x = torch.tanh(x) + x = self.fc4(x) + x = torch.tanh(x) + x = self.fc5(x) + return x, self.log_std_parameter, {} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = CNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum") +# [end-cnn-functional] From 0545e878f89ff5afd86ae7a192a506af9bdbcfbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 14:12:47 +0100 Subject: [PATCH 091/157] Update snippets in docs --- .../modules/skrl.models.categorical.rst | 67 +++++++++++++++---- .../modules/skrl.models.deterministic.rst | 67 +++++++++++++++---- docs/source/modules/skrl.models.gaussian.rst | 66 ++++++++++++++---- .../skrl.models.multivariate_gaussian.rst | 67 +++++++++++++++---- 4 files changed, 219 insertions(+), 48 deletions(-) diff --git a/docs/source/modules/skrl.models.categorical.rst b/docs/source/modules/skrl.models.categorical.rst index 2c2158f0..e67b73c1 100644 --- a/docs/source/modules/skrl.models.categorical.rst +++ b/docs/source/modules/skrl.models.categorical.rst @@ -38,23 +38,66 @@ Concept Basic usage ----------- +* Multi-Layer Perceptron (**MLP**) +* Convolutional Neural Network (**CNN**) + .. tabs:: - .. tab:: Multi-Layer Perceptron (MLP) + .. tab:: MLP + + .. image:: ../_static/imgs/model_categorical_mlp.svg + :width: 40% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-mlp-sequential] + :end-before: [end-mlp-sequential] + + .. tab:: nn.functional + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-mlp-functional] + :end-before: [end-mlp-functional] + + .. tab:: CNN + + .. image:: ../_static/imgs/model_categorical_cnn.svg + :width: 100% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential - .. literalinclude:: ../snippets/categorical_model.py - :language: python - :linenos: - :start-after: [start-mlp] - :end-before: [end-mlp] + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-cnn-sequential] + :end-before: [end-cnn-sequential] - .. tab:: Convolutional Neural Network (CNN) + .. tab:: nn.functional - .. literalinclude:: ../snippets/categorical_model.py - :language: python - :linenos: - :start-after: [start-cnn] - :end-before: [end-cnn] + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-cnn-functional] + :end-before: [end-cnn-functional] API --- diff --git a/docs/source/modules/skrl.models.deterministic.rst b/docs/source/modules/skrl.models.deterministic.rst index 2d92d702..8196b8d3 100644 --- a/docs/source/modules/skrl.models.deterministic.rst +++ b/docs/source/modules/skrl.models.deterministic.rst @@ -38,23 +38,66 @@ Concept Basic usage ----------- +* Multi-Layer Perceptron (**MLP**) +* Convolutional Neural Network (**CNN**) + .. tabs:: - .. tab:: Multi-Layer Perceptron (MLP) + .. tab:: MLP + + .. image:: ../_static/imgs/model_deterministic_mlp.svg + :width: 35% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-mlp-sequential] + :end-before: [end-mlp-sequential] + + .. tab:: nn.functional + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-mlp-functional] + :end-before: [end-mlp-functional] + + .. tab:: CNN + + .. image:: ../_static/imgs/model_deterministic_cnn.svg + :width: 100% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential - .. literalinclude:: ../snippets/deterministic_model.py - :language: python - :linenos: - :start-after: [start-mlp] - :end-before: [end-mlp] + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-cnn-sequential] + :end-before: [end-cnn-sequential] - .. tab:: Convolutional Neural Network (CNN) + .. tab:: nn.functional - .. literalinclude:: ../snippets/deterministic_model.py - :language: python - :linenos: - :start-after: [start-cnn] - :end-before: [end-cnn] + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-cnn-functional] + :end-before: [end-cnn-functional] API --- diff --git a/docs/source/modules/skrl.models.gaussian.rst b/docs/source/modules/skrl.models.gaussian.rst index e3dff3cb..fb77d2dc 100644 --- a/docs/source/modules/skrl.models.gaussian.rst +++ b/docs/source/modules/skrl.models.gaussian.rst @@ -40,24 +40,66 @@ Concept Basic usage ----------- +* Multi-Layer Perceptron (**MLP**) +* Convolutional Neural Network (**CNN**) + .. tabs:: - .. tab:: Multi-Layer Perceptron (MLP) + .. tab:: MLP + + .. image:: ../_static/imgs/model_gaussian_mlp.svg + :width: 42% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-mlp-sequential] + :end-before: [end-mlp-sequential] + + .. tab:: nn.functional + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-mlp-functional] + :end-before: [end-mlp-functional] + + .. tab:: CNN + + .. image:: ../_static/imgs/model_gaussian_cnn.svg + :width: 100% + :align: center + + .. raw:: html + +
+ + .. tabs:: - .. literalinclude:: ../snippets/gaussian_model.py - :language: python - :linenos: - :start-after: [start-mlp] - :end-before: [end-mlp] + .. tab:: nn.Sequential - .. tab:: Convolutional Neural Network (CNN) + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-cnn-sequential] + :end-before: [end-cnn-sequential] - .. literalinclude:: ../snippets/gaussian_model.py - :language: python - :linenos: - :start-after: [start-cnn] - :end-before: [end-cnn] + .. tab:: nn.functional + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-cnn-functional] + :end-before: [end-cnn-functional] API --- diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst index df4204bc..238ddf93 100644 --- a/docs/source/modules/skrl.models.multivariate_gaussian.rst +++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst @@ -40,23 +40,66 @@ Concept Basic usage ----------- +* Multi-Layer Perceptron (**MLP**) +* Convolutional Neural Network (**CNN**) + .. tabs:: - .. tab:: Multi-Layer Perceptron (MLP) + .. tab:: MLP + + .. image:: ../_static/imgs/model_gaussian_mlp.svg + :width: 42% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-mlp-sequential] + :end-before: [end-mlp-sequential] + + .. tab:: nn.functional + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-mlp-functional] + :end-before: [end-mlp-functional] + + .. tab:: CNN + + .. image:: ../_static/imgs/model_gaussian_cnn.svg + :width: 100% + :align: center + + .. raw:: html + +
+ + .. tabs:: + + .. tab:: nn.Sequential - .. literalinclude:: ../snippets/multivariate_gaussian_model.py - :language: python - :linenos: - :start-after: [start-mlp] - :end-before: [end-mlp] + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-cnn-sequential] + :end-before: [end-cnn-sequential] - .. tab:: Convolutional Neural Network (CNN) + .. tab:: nn.functional - .. literalinclude:: ../snippets/multivariate_gaussian_model.py - :language: python - :linenos: - :start-after: [start-cnn] - :end-before: [end-cnn] + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-cnn-functional] + :end-before: [end-cnn-functional] API --- From 5a34f3b987c9b6118fb05ac672f6a625abd5e646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 14:23:16 +0100 Subject: [PATCH 092/157] Add example models image files to docs --- docs/source/_static/imgs/model_categorical_cnn.svg | 1 + docs/source/_static/imgs/model_categorical_mlp.svg | 1 + docs/source/_static/imgs/model_deterministic_cnn.svg | 1 + docs/source/_static/imgs/model_deterministic_mlp.svg | 1 + docs/source/_static/imgs/model_gaussian_cnn.svg | 1 + docs/source/_static/imgs/model_gaussian_mlp.svg | 1 + 6 files changed, 6 insertions(+) create mode 100755 docs/source/_static/imgs/model_categorical_cnn.svg create mode 100755 docs/source/_static/imgs/model_categorical_mlp.svg create mode 100755 docs/source/_static/imgs/model_deterministic_cnn.svg create mode 100755 docs/source/_static/imgs/model_deterministic_mlp.svg create mode 100755 docs/source/_static/imgs/model_gaussian_cnn.svg create mode 100755 docs/source/_static/imgs/model_gaussian_mlp.svg diff --git a/docs/source/_static/imgs/model_categorical_cnn.svg b/docs/source/_static/imgs/model_categorical_cnn.svg new file mode 100755 index 00000000..da4893c4 --- /dev/null +++ b/docs/source/_static/imgs/model_categorical_cnn.svg @@ -0,0 +1 @@ +FC3+ TanhFC4+TanhFC5logits6432n165121024FC2+ TanhFC1+ ReLUflatten646432341286156415128Conv3+ ReLUConv2+ ReLUConv1+ ReLUstates12288reshape (view)and permute(channels first) diff --git a/docs/source/_static/imgs/model_categorical_mlp.svg b/docs/source/_static/imgs/model_categorical_mlp.svg new file mode 100755 index 00000000..51c4e25e --- /dev/null +++ b/docs/source/_static/imgs/model_categorical_mlp.svg @@ -0,0 +1 @@ +FC1+ ReLUFC2+ReLUFC3stateslogits6432n diff --git a/docs/source/_static/imgs/model_deterministic_cnn.svg b/docs/source/_static/imgs/model_deterministic_cnn.svg new file mode 100755 index 00000000..7ddb76f7 --- /dev/null +++ b/docs/source/_static/imgs/model_deterministic_cnn.svg @@ -0,0 +1 @@ +165121024FC2+ TanhFC1+ ReLUflatten646432341286156415128Conv3+ ReLUConv2+ ReLUConv1+ ReLUstatesreshape (view)and permute(channels first)12288FC3+ TanhFC4+Tanh6432FC51takenactions diff --git a/docs/source/_static/imgs/model_deterministic_mlp.svg b/docs/source/_static/imgs/model_deterministic_mlp.svg new file mode 100755 index 00000000..e5a26ce9 --- /dev/null +++ b/docs/source/_static/imgs/model_deterministic_mlp.svg @@ -0,0 +1 @@ +FC1+ ReLUFC2+ReLUFC3statestakenactions64321 diff --git a/docs/source/_static/imgs/model_gaussian_cnn.svg b/docs/source/_static/imgs/model_gaussian_cnn.svg new file mode 100755 index 00000000..426753a5 --- /dev/null +++ b/docs/source/_static/imgs/model_gaussian_cnn.svg @@ -0,0 +1 @@ +FC3+ TanhFC4+Tanh6432165121024FC2+ TanhFC1+ ReLUflatten646432341286156415128Conv3+ ReLUConv2+ ReLUConv1+ ReLUstatesreshape (view)and permute(channels first)12288meanactionsFC5numactions diff --git a/docs/source/_static/imgs/model_gaussian_mlp.svg b/docs/source/_static/imgs/model_gaussian_mlp.svg new file mode 100755 index 00000000..d1765944 --- /dev/null +++ b/docs/source/_static/imgs/model_gaussian_mlp.svg @@ -0,0 +1 @@ +FC1+ ReLUFC2+ReLUFC3+Tanhstatesmeanactions6432numactions From 4cccd8de024cb69c885afaf06a745941ccd44c16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 19:59:03 +0100 Subject: [PATCH 093/157] Rename gym/gymnasium examples and new files --- docs/source/examples/gym/a2c_gym_pendulum.py | 114 +++++++++ .../examples/gym/a2c_gym_pendulumnovel.py | 116 +++++++++ .../examples/gym/a2c_gym_pendulumnovel_gru.py | 215 +++++++++++++++++ .../gym/a2c_gym_pendulumnovel_lstm.py | 225 +++++++++++++++++ .../examples/gym/a2c_gym_pendulumnovel_rnn.py | 215 +++++++++++++++++ ...ym_cartpole_cem.py => cem_gym_cartpole.py} | 0 ...e_cem_eval.py => cem_gym_cartpole_eval.py} | 0 ..._pendulum_ddpg.py => ddpg_gym_pendulum.py} | 28 ++- ...ddpg_eval.py => ddpg_gym_pendulum_eval.py} | 0 ...um_ddpg.py => ddpg_gym_pendulum_vector.py} | 0 .../examples/gym/ddpg_gym_pendulumnovel.py | 109 +++++++++ .../gym/ddpg_gym_pendulumnovel_gru.py | 211 ++++++++++++++++ .../gym/ddpg_gym_pendulumnovel_lstm.py | 221 +++++++++++++++++ .../gym/ddpg_gym_pendulumnovel_rnn.py | 211 ++++++++++++++++ ...ym_cartpole_dqn.py => dqn_gym_cartpole.py} | 0 ...e_dqn_eval.py => dqn_gym_cartpole_eval.py} | 0 ...pole_dqn.py => dqn_gym_cartpole_vector.py} | 0 docs/source/examples/gym/ppo_gym_pendulum.py | 116 +++++++++ .../examples/gym/ppo_gym_pendulumnovel.py | 119 +++++++++ .../examples/gym/ppo_gym_pendulumnovel_gru.py | 218 +++++++++++++++++ .../gym/ppo_gym_pendulumnovel_lstm.py | 228 ++++++++++++++++++ .../examples/gym/ppo_gym_pendulumnovel_rnn.py | 218 +++++++++++++++++ ...rning.py => q_learning_gym_frozen_lake.py} | 0 ....py => q_learning_gym_frozen_lake_eval.py} | 0 ...y => q_learning_gym_frozen_lake_vector.py} | 0 docs/source/examples/gym/sac_gym_pendulum.py | 109 +++++++++ .../examples/gym/sac_gym_pendulumnovel.py | 112 +++++++++ .../examples/gym/sac_gym_pendulumnovel_gru.py | 213 ++++++++++++++++ .../gym/sac_gym_pendulumnovel_lstm.py | 223 +++++++++++++++++ .../examples/gym/sac_gym_pendulumnovel_rnn.py | 213 ++++++++++++++++ .../{gym_taxi_sarsa.py => sarsa_gym_taxi.py} | 0 ...i_sarsa_eval.py => sarsa_gym_taxi_eval.py} | 0 ...taxi_sarsa.py => sarsa_gym_taxi_vector.py} | 0 docs/source/examples/gym/td3_gym_pendulum.py | 110 +++++++++ .../examples/gym/td3_gym_pendulumnovel.py | 113 +++++++++ .../examples/gym/td3_gym_pendulumnovel_gru.py | 215 +++++++++++++++++ .../gym/td3_gym_pendulumnovel_lstm.py | 225 +++++++++++++++++ .../examples/gym/td3_gym_pendulumnovel_rnn.py | 215 +++++++++++++++++ docs/source/examples/gym/trpo_gym_pendulum.py | 107 ++++++++ .../examples/gym/trpo_gym_pendulumnovel.py | 110 +++++++++ .../gym/trpo_gym_pendulumnovel_gru.py | 209 ++++++++++++++++ .../gym/trpo_gym_pendulumnovel_lstm.py | 219 +++++++++++++++++ .../gym/trpo_gym_pendulumnovel_rnn.py | 209 ++++++++++++++++ ...tpole_cem.py => cem_gymnasium_cartpole.py} | 0 ...eval.py => cem_gymnasium_cartpole_eval.py} | 0 ...lum_ddpg.py => ddpg_gymnasium_pendulum.py} | 0 ...val.py => ddpg_gymnasium_pendulum_eval.py} | 0 ...g.py => ddpg_gymnasium_pendulum_vector.py} | 0 ...tpole_dqn.py => dqn_gymnasium_cartpole.py} | 0 ...eval.py => dqn_gymnasium_cartpole_eval.py} | 0 ...qn.py => dqn_gymnasium_cartpole_vector.py} | 0 ...py => q_learning_gymnasium_frozen_lake.py} | 0 ... q_learning_gymnasium_frozen_lake_eval.py} | 0 ..._learning_gymnasium_frozen_lake_vector.py} | 0 ..._taxi_sarsa.py => sarsa_gymnasium_taxi.py} | 0 ...a_eval.py => sarsa_gymnasium_taxi_eval.py} | 0 ...arsa.py => sarsa_gymnasium_taxi_vector.py} | 0 57 files changed, 5153 insertions(+), 13 deletions(-) create mode 100644 docs/source/examples/gym/a2c_gym_pendulum.py create mode 100644 docs/source/examples/gym/a2c_gym_pendulumnovel.py create mode 100644 docs/source/examples/gym/a2c_gym_pendulumnovel_gru.py create mode 100644 docs/source/examples/gym/a2c_gym_pendulumnovel_lstm.py create mode 100644 docs/source/examples/gym/a2c_gym_pendulumnovel_rnn.py rename docs/source/examples/gym/{gym_cartpole_cem.py => cem_gym_cartpole.py} (100%) rename docs/source/examples/gym/{gym_cartpole_cem_eval.py => cem_gym_cartpole_eval.py} (100%) rename docs/source/examples/gym/{gym_pendulum_ddpg.py => ddpg_gym_pendulum.py} (80%) rename docs/source/examples/gym/{gym_pendulum_ddpg_eval.py => ddpg_gym_pendulum_eval.py} (100%) rename docs/source/examples/gym/{gym_vector_pendulum_ddpg.py => ddpg_gym_pendulum_vector.py} (100%) create mode 100644 docs/source/examples/gym/ddpg_gym_pendulumnovel.py create mode 100644 docs/source/examples/gym/ddpg_gym_pendulumnovel_gru.py create mode 100644 docs/source/examples/gym/ddpg_gym_pendulumnovel_lstm.py create mode 100644 docs/source/examples/gym/ddpg_gym_pendulumnovel_rnn.py rename docs/source/examples/gym/{gym_cartpole_dqn.py => dqn_gym_cartpole.py} (100%) rename docs/source/examples/gym/{gym_cartpole_dqn_eval.py => dqn_gym_cartpole_eval.py} (100%) rename docs/source/examples/gym/{gym_vector_cartpole_dqn.py => dqn_gym_cartpole_vector.py} (100%) create mode 100644 docs/source/examples/gym/ppo_gym_pendulum.py create mode 100644 docs/source/examples/gym/ppo_gym_pendulumnovel.py create mode 100644 docs/source/examples/gym/ppo_gym_pendulumnovel_gru.py create mode 100644 docs/source/examples/gym/ppo_gym_pendulumnovel_lstm.py create mode 100644 docs/source/examples/gym/ppo_gym_pendulumnovel_rnn.py rename docs/source/examples/gym/{gym_frozen_lake_q_learning.py => q_learning_gym_frozen_lake.py} (100%) rename docs/source/examples/gym/{gym_frozen_lake_q_learning_eval.py => q_learning_gym_frozen_lake_eval.py} (100%) rename docs/source/examples/gym/{gym_vector_frozen_lake_q_learning.py => q_learning_gym_frozen_lake_vector.py} (100%) create mode 100644 docs/source/examples/gym/sac_gym_pendulum.py create mode 100644 docs/source/examples/gym/sac_gym_pendulumnovel.py create mode 100644 docs/source/examples/gym/sac_gym_pendulumnovel_gru.py create mode 100644 docs/source/examples/gym/sac_gym_pendulumnovel_lstm.py create mode 100644 docs/source/examples/gym/sac_gym_pendulumnovel_rnn.py rename docs/source/examples/gym/{gym_taxi_sarsa.py => sarsa_gym_taxi.py} (100%) rename docs/source/examples/gym/{gym_taxi_sarsa_eval.py => sarsa_gym_taxi_eval.py} (100%) rename docs/source/examples/gym/{gym_vector_taxi_sarsa.py => sarsa_gym_taxi_vector.py} (100%) create mode 100644 docs/source/examples/gym/td3_gym_pendulum.py create mode 100644 docs/source/examples/gym/td3_gym_pendulumnovel.py create mode 100644 docs/source/examples/gym/td3_gym_pendulumnovel_gru.py create mode 100644 docs/source/examples/gym/td3_gym_pendulumnovel_lstm.py create mode 100644 docs/source/examples/gym/td3_gym_pendulumnovel_rnn.py create mode 100644 docs/source/examples/gym/trpo_gym_pendulum.py create mode 100644 docs/source/examples/gym/trpo_gym_pendulumnovel.py create mode 100644 docs/source/examples/gym/trpo_gym_pendulumnovel_gru.py create mode 100644 docs/source/examples/gym/trpo_gym_pendulumnovel_lstm.py create mode 100644 docs/source/examples/gym/trpo_gym_pendulumnovel_rnn.py rename docs/source/examples/gymnasium/{gymnasium_cartpole_cem.py => cem_gymnasium_cartpole.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_cartpole_cem_eval.py => cem_gymnasium_cartpole_eval.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_pendulum_ddpg.py => ddpg_gymnasium_pendulum.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_pendulum_ddpg_eval.py => ddpg_gymnasium_pendulum_eval.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_vector_pendulum_ddpg.py => ddpg_gymnasium_pendulum_vector.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_cartpole_dqn.py => dqn_gymnasium_cartpole.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_cartpole_dqn_eval.py => dqn_gymnasium_cartpole_eval.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_vector_cartpole_dqn.py => dqn_gymnasium_cartpole_vector.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_frozen_lake_q_learning.py => q_learning_gymnasium_frozen_lake.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_frozen_lake_q_learning_eval.py => q_learning_gymnasium_frozen_lake_eval.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_vector_frozen_lake_q_learning.py => q_learning_gymnasium_frozen_lake_vector.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_taxi_sarsa.py => sarsa_gymnasium_taxi.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_taxi_sarsa_eval.py => sarsa_gymnasium_taxi_eval.py} (100%) rename docs/source/examples/gymnasium/{gymnasium_vector_taxi_sarsa.py => sarsa_gymnasium_taxi_vector.py} (100%) diff --git a/docs/source/examples/gym/a2c_gym_pendulum.py b/docs/source/examples/gym/a2c_gym_pendulum.py new file mode 100644 index 00000000..8590d020 --- /dev/null +++ b/docs/source/examples/gym/a2c_gym_pendulum.py @@ -0,0 +1,114 @@ +import gym + +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.a2c import A2C, A2C_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env +from skrl.utils import set_seed + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Load and wrap the Gym environment. +# Note: the environment version may change depending on the gym version +try: + env = gym.vector.make("Pendulum-v1", num_envs=10, asynchronous=False) +except gym.error.DeprecatedEnv as e: + env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=10, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# A2C requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#spaces-and-models +models_a2c = {} +models_a2c["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True) +models_a2c["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#configuration-and-hyperparameters +cfg_a2c = A2C_DEFAULT_CONFIG.copy() +cfg_a2c["rollouts"] = 1024 # memory_size +cfg_a2c["learning_epochs"] = 10 +cfg_a2c["mini_batches"] = 32 +cfg_a2c["discount_factor"] = 0.9 +cfg_a2c["lambda"] = 0.95 +cfg_a2c["learning_rate"] = 1e-3 +cfg_a2c["learning_rate_scheduler"] = KLAdaptiveRL +cfg_a2c["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008, "min_lr": 5e-4} +cfg_a2c["random_timesteps"] = 0 +cfg_a2c["learning_starts"] = 0 +cfg_a2c["grad_norm_clip"] = 0.5 +cfg_a2c["entropy_loss_scale"] = 0.0 +cfg_a2c["state_preprocessor"] = RunningStandardScaler +cfg_a2c["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_a2c["value_preprocessor"] = RunningStandardScaler +cfg_a2c["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_a2c["experiment"]["write_interval"] = 500 +cfg_a2c["experiment"]["checkpoint_interval"] = 5000 + +agent_ddpg = A2C(models=models_a2c, + memory=memory, + cfg=cfg_a2c, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/a2c_gym_pendulumnovel.py b/docs/source/examples/gym/a2c_gym_pendulumnovel.py new file mode 100644 index 00000000..49401866 --- /dev/null +++ b/docs/source/examples/gym/a2c_gym_pendulumnovel.py @@ -0,0 +1,116 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.a2c import A2C, A2C_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# A2C requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#spaces-and-models +models_a2c = {} +models_a2c["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True) +models_a2c["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#configuration-and-hyperparameters +cfg_a2c = A2C_DEFAULT_CONFIG.copy() +cfg_a2c["rollouts"] = 1024 # memory_size +cfg_a2c["learning_epochs"] = 10 +cfg_a2c["mini_batches"] = 32 +cfg_a2c["discount_factor"] = 0.9 +cfg_a2c["lambda"] = 0.95 +cfg_a2c["learning_rate"] = 1e-3 +cfg_a2c["learning_rate_scheduler"] = KLAdaptiveRL +cfg_a2c["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008, "min_lr": 5e-4} +cfg_a2c["random_timesteps"] = 0 +cfg_a2c["learning_starts"] = 0 +cfg_a2c["grad_norm_clip"] = 0.5 +cfg_a2c["entropy_loss_scale"] = 0.0 +cfg_a2c["state_preprocessor"] = RunningStandardScaler +cfg_a2c["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_a2c["value_preprocessor"] = RunningStandardScaler +cfg_a2c["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_a2c["experiment"]["write_interval"] = 500 +cfg_a2c["experiment"]["checkpoint_interval"] = 5000 + +agent_ddpg = A2C(models=models_a2c, + memory=memory, + cfg=cfg_a2c, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/a2c_gym_pendulumnovel_gru.py b/docs/source/examples/gym/a2c_gym_pendulumnovel_gru.py new file mode 100644 index 00000000..a27b5f87 --- /dev/null +++ b/docs/source/examples/gym/a2c_gym_pendulumnovel_gru.py @@ -0,0 +1,215 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.a2c import A2C, A2C_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# A2C requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#spaces-and-models +models_a2c = {} +models_a2c["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_a2c["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#configuration-and-hyperparameters +cfg_a2c = A2C_DEFAULT_CONFIG.copy() +cfg_a2c["rollouts"] = 1024 # memory_size +cfg_a2c["learning_epochs"] = 10 +cfg_a2c["mini_batches"] = 32 +cfg_a2c["discount_factor"] = 0.9 +cfg_a2c["lambda"] = 0.95 +cfg_a2c["learning_rate"] = 1e-3 +cfg_a2c["learning_rate_scheduler"] = KLAdaptiveRL +cfg_a2c["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008, "min_lr": 5e-4} +cfg_a2c["random_timesteps"] = 0 +cfg_a2c["learning_starts"] = 0 +cfg_a2c["grad_norm_clip"] = 0.5 +cfg_a2c["entropy_loss_scale"] = 0.0 +cfg_a2c["state_preprocessor"] = RunningStandardScaler +cfg_a2c["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_a2c["value_preprocessor"] = RunningStandardScaler +cfg_a2c["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_a2c["experiment"]["write_interval"] = 500 +cfg_a2c["experiment"]["checkpoint_interval"] = 5000 + +agent_ddpg = A2C(models=models_a2c, + memory=memory, + cfg=cfg_a2c, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/a2c_gym_pendulumnovel_lstm.py b/docs/source/examples/gym/a2c_gym_pendulumnovel_lstm.py new file mode 100644 index 00000000..ab0731ad --- /dev/null +++ b/docs/source/examples/gym/a2c_gym_pendulumnovel_lstm.py @@ -0,0 +1,225 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.a2c import A2C, A2C_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# A2C requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#spaces-and-models +models_a2c = {} +models_a2c["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_a2c["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#configuration-and-hyperparameters +cfg_a2c = A2C_DEFAULT_CONFIG.copy() +cfg_a2c["rollouts"] = 1024 # memory_size +cfg_a2c["learning_epochs"] = 10 +cfg_a2c["mini_batches"] = 32 +cfg_a2c["discount_factor"] = 0.9 +cfg_a2c["lambda"] = 0.95 +cfg_a2c["learning_rate"] = 1e-3 +cfg_a2c["learning_rate_scheduler"] = KLAdaptiveRL +cfg_a2c["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008, "min_lr": 5e-4} +cfg_a2c["random_timesteps"] = 0 +cfg_a2c["learning_starts"] = 0 +cfg_a2c["grad_norm_clip"] = 0.5 +cfg_a2c["entropy_loss_scale"] = 0.0 +cfg_a2c["state_preprocessor"] = RunningStandardScaler +cfg_a2c["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_a2c["value_preprocessor"] = RunningStandardScaler +cfg_a2c["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_a2c["experiment"]["write_interval"] = 500 +cfg_a2c["experiment"]["checkpoint_interval"] = 5000 + +agent_ddpg = A2C(models=models_a2c, + memory=memory, + cfg=cfg_a2c, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/a2c_gym_pendulumnovel_rnn.py b/docs/source/examples/gym/a2c_gym_pendulumnovel_rnn.py new file mode 100644 index 00000000..6357a894 --- /dev/null +++ b/docs/source/examples/gym/a2c_gym_pendulumnovel_rnn.py @@ -0,0 +1,215 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.a2c import A2C, A2C_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# A2C requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#spaces-and-models +models_a2c = {} +models_a2c["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_a2c["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.a2c.html#configuration-and-hyperparameters +cfg_a2c = A2C_DEFAULT_CONFIG.copy() +cfg_a2c["rollouts"] = 1024 # memory_size +cfg_a2c["learning_epochs"] = 10 +cfg_a2c["mini_batches"] = 32 +cfg_a2c["discount_factor"] = 0.9 +cfg_a2c["lambda"] = 0.95 +cfg_a2c["learning_rate"] = 1e-3 +cfg_a2c["learning_rate_scheduler"] = KLAdaptiveRL +cfg_a2c["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008, "min_lr": 5e-4} +cfg_a2c["random_timesteps"] = 0 +cfg_a2c["learning_starts"] = 0 +cfg_a2c["grad_norm_clip"] = 0.5 +cfg_a2c["entropy_loss_scale"] = 0.0 +cfg_a2c["state_preprocessor"] = RunningStandardScaler +cfg_a2c["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_a2c["value_preprocessor"] = RunningStandardScaler +cfg_a2c["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_a2c["experiment"]["write_interval"] = 500 +cfg_a2c["experiment"]["checkpoint_interval"] = 5000 + +agent_ddpg = A2C(models=models_a2c, + memory=memory, + cfg=cfg_a2c, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/gym_cartpole_cem.py b/docs/source/examples/gym/cem_gym_cartpole.py similarity index 100% rename from docs/source/examples/gym/gym_cartpole_cem.py rename to docs/source/examples/gym/cem_gym_cartpole.py diff --git a/docs/source/examples/gym/gym_cartpole_cem_eval.py b/docs/source/examples/gym/cem_gym_cartpole_eval.py similarity index 100% rename from docs/source/examples/gym/gym_cartpole_cem_eval.py rename to docs/source/examples/gym/cem_gym_cartpole_eval.py diff --git a/docs/source/examples/gym/gym_pendulum_ddpg.py b/docs/source/examples/gym/ddpg_gym_pendulum.py similarity index 80% rename from docs/source/examples/gym/gym_pendulum_ddpg.py rename to docs/source/examples/gym/ddpg_gym_pendulum.py index 17ced81a..e57eb411 100644 --- a/docs/source/examples/gym/gym_pendulum_ddpg.py +++ b/docs/source/examples/gym/ddpg_gym_pendulum.py @@ -16,7 +16,7 @@ # Define the models (deterministic models) for the DDPG agent using mixin # - Actor (policy): takes as input the environment's observation/state and returns an action # - Critic: takes the state and action as input and provides a value to guide the policy -class DeterministicActor(DeterministicMixin, Model): +class Actor(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) DeterministicMixin.__init__(self, clip_actions) @@ -28,9 +28,10 @@ def __init__(self, observation_space, action_space, device, clip_actions=False): def compute(self, inputs, role): x = F.relu(self.linear_layer_1(inputs["states"])) x = F.relu(self.linear_layer_2(x)) - return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} -class DeterministicCritic(DeterministicMixin, Model): +class Critic(DeterministicMixin, Model): def __init__(self, observation_space, action_space, device, clip_actions=False): Model.__init__(self, observation_space, action_space, device) DeterministicMixin.__init__(self, clip_actions) @@ -59,17 +60,17 @@ def compute(self, inputs, role): # Instantiate a RandomMemory (without replacement) as experience replay memory -memory = RandomMemory(memory_size=15000, num_envs=env.num_envs, device=device, replacement=False) +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) # Instantiate the agent's models (function approximators). # DDPG requires 4 models, visit its documentation for more details # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models models_ddpg = {} -models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device) -models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device) -models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device) -models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device) +models_ddpg["policy"] = Actor(env.observation_space, env.action_space, device) +models_ddpg["target_policy"] = Actor(env.observation_space, env.action_space, device) +models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device) +models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device) # Initialize the models' parameters (weights and biases) using a Gaussian distribution for model in models_ddpg.values(): @@ -81,12 +82,13 @@ def compute(self, inputs, role): # https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["discount_factor"] = 0.98 cfg_ddpg["batch_size"] = 100 -cfg_ddpg["random_timesteps"] = 100 -cfg_ddpg["learning_starts"] = 100 -# logging to TensorBoard and write checkpoints each 300 and 1500 timesteps respectively -cfg_ddpg["experiment"]["write_interval"] = 300 -cfg_ddpg["experiment"]["checkpoint_interval"] = 1500 +cfg_ddpg["random_timesteps"] = 1000 +cfg_ddpg["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 75 +cfg_ddpg["experiment"]["checkpoint_interval"] = 750 agent_ddpg = DDPG(models=models_ddpg, memory=memory, diff --git a/docs/source/examples/gym/gym_pendulum_ddpg_eval.py b/docs/source/examples/gym/ddpg_gym_pendulum_eval.py similarity index 100% rename from docs/source/examples/gym/gym_pendulum_ddpg_eval.py rename to docs/source/examples/gym/ddpg_gym_pendulum_eval.py diff --git a/docs/source/examples/gym/gym_vector_pendulum_ddpg.py b/docs/source/examples/gym/ddpg_gym_pendulum_vector.py similarity index 100% rename from docs/source/examples/gym/gym_vector_pendulum_ddpg.py rename to docs/source/examples/gym/ddpg_gym_pendulum_vector.py diff --git a/docs/source/examples/gym/ddpg_gym_pendulumnovel.py b/docs/source/examples/gym/ddpg_gym_pendulumnovel.py new file mode 100644 index 00000000..d1adc59d --- /dev/null +++ b/docs/source/examples/gym/ddpg_gym_pendulumnovel.py @@ -0,0 +1,109 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = Actor(env.observation_space, env.action_space, device) +models_ddpg["target_policy"] = Actor(env.observation_space, env.action_space, device) +models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device) +models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["discount_factor"] = 0.98 +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 1000 +cfg_ddpg["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 75 +cfg_ddpg["experiment"]["checkpoint_interval"] = 750 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ddpg_gym_pendulumnovel_gru.py b/docs/source/examples/gym/ddpg_gym_pendulumnovel_gru.py new file mode 100644 index 00000000..90f36533 --- /dev/null +++ b/docs/source/examples/gym/ddpg_gym_pendulumnovel_gru.py @@ -0,0 +1,211 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_policy" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {"rnn": [hidden_states]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["target_policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["discount_factor"] = 0.98 +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 0 +cfg_ddpg["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 75 +cfg_ddpg["experiment"]["checkpoint_interval"] = 750 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ddpg_gym_pendulumnovel_lstm.py b/docs/source/examples/gym/ddpg_gym_pendulumnovel_lstm.py new file mode 100644 index 00000000..d7310c74 --- /dev/null +++ b/docs/source/examples/gym/ddpg_gym_pendulumnovel_lstm.py @@ -0,0 +1,221 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role == "target_policy" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {"rnn": [rnn_states[0], rnn_states[1]]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["target_policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["discount_factor"] = 0.98 +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 0 +cfg_ddpg["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 75 +cfg_ddpg["experiment"]["checkpoint_interval"] = 750 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ddpg_gym_pendulumnovel_rnn.py b/docs/source/examples/gym/ddpg_gym_pendulumnovel_rnn.py new file mode 100644 index 00000000..a6a8df71 --- /dev/null +++ b/docs/source/examples/gym/ddpg_gym_pendulumnovel_rnn.py @@ -0,0 +1,211 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_policy" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {"rnn": [hidden_states]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["target_policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["discount_factor"] = 0.98 +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 0 +cfg_ddpg["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 75 +cfg_ddpg["experiment"]["checkpoint_interval"] = 750 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/gym_cartpole_dqn.py b/docs/source/examples/gym/dqn_gym_cartpole.py similarity index 100% rename from docs/source/examples/gym/gym_cartpole_dqn.py rename to docs/source/examples/gym/dqn_gym_cartpole.py diff --git a/docs/source/examples/gym/gym_cartpole_dqn_eval.py b/docs/source/examples/gym/dqn_gym_cartpole_eval.py similarity index 100% rename from docs/source/examples/gym/gym_cartpole_dqn_eval.py rename to docs/source/examples/gym/dqn_gym_cartpole_eval.py diff --git a/docs/source/examples/gym/gym_vector_cartpole_dqn.py b/docs/source/examples/gym/dqn_gym_cartpole_vector.py similarity index 100% rename from docs/source/examples/gym/gym_vector_cartpole_dqn.py rename to docs/source/examples/gym/dqn_gym_cartpole_vector.py diff --git a/docs/source/examples/gym/ppo_gym_pendulum.py b/docs/source/examples/gym/ppo_gym_pendulum.py new file mode 100644 index 00000000..70917ec6 --- /dev/null +++ b/docs/source/examples/gym/ppo_gym_pendulum.py @@ -0,0 +1,116 @@ +import gym + +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Load and wrap the Gym environment. +# Note: the environment version may change depending on the gym version +try: + env = gym.vector.make("Pendulum-v1", num_envs=4, asynchronous=False) +except gym.error.DeprecatedEnv as e: + env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True) +models_ppo["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["rollouts"] = 1024 # memory_size +cfg_ppo["learning_epochs"] = 10 +cfg_ppo["mini_batches"] = 32 +cfg_ppo["discount_factor"] = 0.9 +cfg_ppo["lambda"] = 0.95 +cfg_ppo["learning_rate"] = 1e-3 +cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["grad_norm_clip"] = 0.5 +cfg_ppo["ratio_clip"] = 0.2 +cfg_ppo["value_clip"] = 0.2 +cfg_ppo["clip_predicted_values"] = False +cfg_ppo["entropy_loss_scale"] = 0.0 +cfg_ppo["value_loss_scale"] = 0.5 +cfg_ppo["kl_threshold"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_ppo["value_preprocessor"] = RunningStandardScaler +cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 500 +cfg_ppo["experiment"]["checkpoint_interval"] = 5000 + +agent_ppo = PPO(models=models_ppo, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ppo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ppo_gym_pendulumnovel.py b/docs/source/examples/gym/ppo_gym_pendulumnovel.py new file mode 100644 index 00000000..79134c8d --- /dev/null +++ b/docs/source/examples/gym/ppo_gym_pendulumnovel.py @@ -0,0 +1,119 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True) +models_ppo["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["rollouts"] = 1024 # memory_size +cfg_ppo["learning_epochs"] = 10 +cfg_ppo["mini_batches"] = 32 +cfg_ppo["discount_factor"] = 0.9 +cfg_ppo["lambda"] = 0.95 +cfg_ppo["learning_rate"] = 1e-3 +cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["grad_norm_clip"] = 0.5 +cfg_ppo["ratio_clip"] = 0.2 +cfg_ppo["value_clip"] = 0.2 +cfg_ppo["clip_predicted_values"] = False +cfg_ppo["entropy_loss_scale"] = 0.0 +cfg_ppo["value_loss_scale"] = 0.5 +cfg_ppo["kl_threshold"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_ppo["value_preprocessor"] = RunningStandardScaler +cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 500 +cfg_ppo["experiment"]["checkpoint_interval"] = 5000 + +agent_ppo = PPO(models=models_ppo, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ppo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ppo_gym_pendulumnovel_gru.py b/docs/source/examples/gym/ppo_gym_pendulumnovel_gru.py new file mode 100644 index 00000000..ce4c8fa7 --- /dev/null +++ b/docs/source/examples/gym/ppo_gym_pendulumnovel_gru.py @@ -0,0 +1,218 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_ppo["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["rollouts"] = 1024 # memory_size +cfg_ppo["learning_epochs"] = 10 +cfg_ppo["mini_batches"] = 32 +cfg_ppo["discount_factor"] = 0.9 +cfg_ppo["lambda"] = 0.95 +cfg_ppo["learning_rate"] = 1e-3 +cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["grad_norm_clip"] = 0.5 +cfg_ppo["ratio_clip"] = 0.2 +cfg_ppo["value_clip"] = 0.2 +cfg_ppo["clip_predicted_values"] = False +cfg_ppo["entropy_loss_scale"] = 0.0 +cfg_ppo["value_loss_scale"] = 0.5 +cfg_ppo["kl_threshold"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_ppo["value_preprocessor"] = RunningStandardScaler +cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 500 +cfg_ppo["experiment"]["checkpoint_interval"] = 5000 + +agent_ppo = PPO(models=models_ppo, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ppo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ppo_gym_pendulumnovel_lstm.py b/docs/source/examples/gym/ppo_gym_pendulumnovel_lstm.py new file mode 100644 index 00000000..b5901411 --- /dev/null +++ b/docs/source/examples/gym/ppo_gym_pendulumnovel_lstm.py @@ -0,0 +1,228 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_ppo["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["rollouts"] = 1024 # memory_size +cfg_ppo["learning_epochs"] = 10 +cfg_ppo["mini_batches"] = 32 +cfg_ppo["discount_factor"] = 0.9 +cfg_ppo["lambda"] = 0.95 +cfg_ppo["learning_rate"] = 1e-3 +cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["grad_norm_clip"] = 0.5 +cfg_ppo["ratio_clip"] = 0.2 +cfg_ppo["value_clip"] = 0.2 +cfg_ppo["clip_predicted_values"] = False +cfg_ppo["entropy_loss_scale"] = 0.0 +cfg_ppo["value_loss_scale"] = 0.5 +cfg_ppo["kl_threshold"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_ppo["value_preprocessor"] = RunningStandardScaler +cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 500 +cfg_ppo["experiment"]["checkpoint_interval"] = 5000 + +agent_ppo = PPO(models=models_ppo, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ppo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/ppo_gym_pendulumnovel_rnn.py b/docs/source/examples/gym/ppo_gym_pendulumnovel_rnn.py new file mode 100644 index 00000000..de595dc5 --- /dev/null +++ b/docs/source/examples/gym/ppo_gym_pendulumnovel_rnn.py @@ -0,0 +1,218 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_ppo["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["rollouts"] = 1024 # memory_size +cfg_ppo["learning_epochs"] = 10 +cfg_ppo["mini_batches"] = 32 +cfg_ppo["discount_factor"] = 0.9 +cfg_ppo["lambda"] = 0.95 +cfg_ppo["learning_rate"] = 1e-3 +cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["grad_norm_clip"] = 0.5 +cfg_ppo["ratio_clip"] = 0.2 +cfg_ppo["value_clip"] = 0.2 +cfg_ppo["clip_predicted_values"] = False +cfg_ppo["entropy_loss_scale"] = 0.0 +cfg_ppo["value_loss_scale"] = 0.5 +cfg_ppo["kl_threshold"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_ppo["value_preprocessor"] = RunningStandardScaler +cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 500 +cfg_ppo["experiment"]["checkpoint_interval"] = 5000 + +agent_ppo = PPO(models=models_ppo, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ppo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning.py b/docs/source/examples/gym/q_learning_gym_frozen_lake.py similarity index 100% rename from docs/source/examples/gym/gym_frozen_lake_q_learning.py rename to docs/source/examples/gym/q_learning_gym_frozen_lake.py diff --git a/docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py b/docs/source/examples/gym/q_learning_gym_frozen_lake_eval.py similarity index 100% rename from docs/source/examples/gym/gym_frozen_lake_q_learning_eval.py rename to docs/source/examples/gym/q_learning_gym_frozen_lake_eval.py diff --git a/docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py b/docs/source/examples/gym/q_learning_gym_frozen_lake_vector.py similarity index 100% rename from docs/source/examples/gym/gym_vector_frozen_lake_q_learning.py rename to docs/source/examples/gym/q_learning_gym_frozen_lake_vector.py diff --git a/docs/source/examples/gym/sac_gym_pendulum.py b/docs/source/examples/gym/sac_gym_pendulum.py new file mode 100644 index 00000000..2c08048c --- /dev/null +++ b/docs/source/examples/gym/sac_gym_pendulum.py @@ -0,0 +1,109 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin, GaussianMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the SAC agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), self.log_std_parameter, {} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Load and wrap the Gym environment. +# Note: the environment version may change depending on the gym version +try: + env = gym.make("Pendulum-v1") +except gym.error.DeprecatedEnv as e: + env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# SAC requires 5 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models +models_sac = {} +models_sac["policy"] = Actor(env.observation_space, env.action_space, device, clip_actions=True) +models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device) +models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device) +models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device) +models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_sac.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#configuration-and-hyperparameters +cfg_sac = SAC_DEFAULT_CONFIG.copy() +cfg_sac["discount_factor"] = 0.98 +cfg_sac["batch_size"] = 100 +cfg_sac["random_timesteps"] = 0 +cfg_sac["learning_starts"] = 1000 +cfg_sac["learn_entropy"] = True +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_sac["experiment"]["write_interval"] = 75 +cfg_sac["experiment"]["checkpoint_interval"] = 750 + +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sac) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/sac_gym_pendulumnovel.py b/docs/source/examples/gym/sac_gym_pendulumnovel.py new file mode 100644 index 00000000..50debf6f --- /dev/null +++ b/docs/source/examples/gym/sac_gym_pendulumnovel.py @@ -0,0 +1,112 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin, GaussianMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the SAC agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), self.log_std_parameter, {} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# SAC requires 5 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models +models_sac = {} +models_sac["policy"] = Actor(env.observation_space, env.action_space, device, clip_actions=True) +models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device) +models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device) +models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device) +models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_sac.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#configuration-and-hyperparameters +cfg_sac = SAC_DEFAULT_CONFIG.copy() +cfg_sac["discount_factor"] = 0.98 +cfg_sac["batch_size"] = 100 +cfg_sac["random_timesteps"] = 0 +cfg_sac["learning_starts"] = 1000 +cfg_sac["learn_entropy"] = True +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_sac["experiment"]["write_interval"] = 75 +cfg_sac["experiment"]["checkpoint_interval"] = 750 + +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sac) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/sac_gym_pendulumnovel_gru.py b/docs/source/examples/gym/sac_gym_pendulumnovel_gru.py new file mode 100644 index 00000000..66e6d111 --- /dev/null +++ b/docs/source/examples/gym/sac_gym_pendulumnovel_gru.py @@ -0,0 +1,213 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin, GaussianMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the SAC agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role in ["target_critic_1", "target_critic_2"] else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# SAC requires 5 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models +models_sac = {} +models_sac["policy"] = Actor(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_sac.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#configuration-and-hyperparameters +cfg_sac = SAC_DEFAULT_CONFIG.copy() +cfg_sac["discount_factor"] = 0.98 +cfg_sac["batch_size"] = 100 +cfg_sac["random_timesteps"] = 0 +cfg_sac["learning_starts"] = 1000 +cfg_sac["learn_entropy"] = True +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_sac["experiment"]["write_interval"] = 75 +cfg_sac["experiment"]["checkpoint_interval"] = 750 + +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sac) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/sac_gym_pendulumnovel_lstm.py b/docs/source/examples/gym/sac_gym_pendulumnovel_lstm.py new file mode 100644 index 00000000..8866a4bb --- /dev/null +++ b/docs/source/examples/gym/sac_gym_pendulumnovel_lstm.py @@ -0,0 +1,223 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin, GaussianMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the SAC agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role in ["target_critic_1", "target_critic_2"] else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# SAC requires 5 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models +models_sac = {} +models_sac["policy"] = Actor(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_sac.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#configuration-and-hyperparameters +cfg_sac = SAC_DEFAULT_CONFIG.copy() +cfg_sac["discount_factor"] = 0.98 +cfg_sac["batch_size"] = 100 +cfg_sac["random_timesteps"] = 0 +cfg_sac["learning_starts"] = 1000 +cfg_sac["learn_entropy"] = True +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_sac["experiment"]["write_interval"] = 75 +cfg_sac["experiment"]["checkpoint_interval"] = 750 + +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sac) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/sac_gym_pendulumnovel_rnn.py b/docs/source/examples/gym/sac_gym_pendulumnovel_rnn.py new file mode 100644 index 00000000..03ea028d --- /dev/null +++ b/docs/source/examples/gym/sac_gym_pendulumnovel_rnn.py @@ -0,0 +1,213 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin, GaussianMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.sac import SAC, SAC_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the SAC agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role in ["target_critic_1", "target_critic_2"] else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# SAC requires 5 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#spaces-and-models +models_sac = {} +models_sac["policy"] = Actor(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_sac["critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["target_critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_sac["target_critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_sac.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.sac.html#configuration-and-hyperparameters +cfg_sac = SAC_DEFAULT_CONFIG.copy() +cfg_sac["discount_factor"] = 0.98 +cfg_sac["batch_size"] = 100 +cfg_sac["random_timesteps"] = 0 +cfg_sac["learning_starts"] = 1000 +cfg_sac["learn_entropy"] = True +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_sac["experiment"]["write_interval"] = 75 +cfg_sac["experiment"]["checkpoint_interval"] = 750 + +agent_sac = SAC(models=models_sac, + memory=memory, + cfg=cfg_sac, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_sac) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/gym_taxi_sarsa.py b/docs/source/examples/gym/sarsa_gym_taxi.py similarity index 100% rename from docs/source/examples/gym/gym_taxi_sarsa.py rename to docs/source/examples/gym/sarsa_gym_taxi.py diff --git a/docs/source/examples/gym/gym_taxi_sarsa_eval.py b/docs/source/examples/gym/sarsa_gym_taxi_eval.py similarity index 100% rename from docs/source/examples/gym/gym_taxi_sarsa_eval.py rename to docs/source/examples/gym/sarsa_gym_taxi_eval.py diff --git a/docs/source/examples/gym/gym_vector_taxi_sarsa.py b/docs/source/examples/gym/sarsa_gym_taxi_vector.py similarity index 100% rename from docs/source/examples/gym/gym_vector_taxi_sarsa.py rename to docs/source/examples/gym/sarsa_gym_taxi_vector.py diff --git a/docs/source/examples/gym/td3_gym_pendulum.py b/docs/source/examples/gym/td3_gym_pendulum.py new file mode 100644 index 00000000..8f4be4b5 --- /dev/null +++ b/docs/source/examples/gym/td3_gym_pendulum.py @@ -0,0 +1,110 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.resources.noises.torch import GaussianNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the TD3 agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Load and wrap the Gym environment. +# Note: the environment version may change depending on the gym version +try: + env = gym.make("Pendulum-v1") +except gym.error.DeprecatedEnv as e: + env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.make(env_id) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# TD3 requires 6 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models +models_td3 = {} +models_td3["policy"] = Actor(env.observation_space, env.action_space, device) +models_td3["target_policy"] = Actor(env.observation_space, env.action_space, device) +models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device) +models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device) +models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device) +models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_td3.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#configuration-and-hyperparameters +cfg_td3 = TD3_DEFAULT_CONFIG.copy() +cfg_td3["exploration"]["noise"] = GaussianNoise(0, 0.1, device=device) +cfg_td3["smooth_regularization_noise"] = GaussianNoise(0, 0.2, device=device) +cfg_td3["smooth_regularization_clip"] = 0.5 +cfg_td3["discount_factor"] = 0.98 +cfg_td3["batch_size"] = 100 +cfg_td3["random_timesteps"] = 1000 +cfg_td3["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_td3["experiment"]["write_interval"] = 75 +cfg_td3["experiment"]["checkpoint_interval"] = 750 + +agent_td3 = TD3(models=models_td3, + memory=memory, + cfg=cfg_td3, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_td3) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/td3_gym_pendulumnovel.py b/docs/source/examples/gym/td3_gym_pendulumnovel.py new file mode 100644 index 00000000..1e85427a --- /dev/null +++ b/docs/source/examples/gym/td3_gym_pendulumnovel.py @@ -0,0 +1,113 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.resources.noises.torch import GaussianNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the TD3 agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# TD3 requires 6 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models +models_td3 = {} +models_td3["policy"] = Actor(env.observation_space, env.action_space, device) +models_td3["target_policy"] = Actor(env.observation_space, env.action_space, device) +models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device) +models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device) +models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device) +models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_td3.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#configuration-and-hyperparameters +cfg_td3 = TD3_DEFAULT_CONFIG.copy() +cfg_td3["exploration"]["noise"] = GaussianNoise(0, 0.1, device=device) +cfg_td3["smooth_regularization_noise"] = GaussianNoise(0, 0.2, device=device) +cfg_td3["smooth_regularization_clip"] = 0.5 +cfg_td3["discount_factor"] = 0.98 +cfg_td3["batch_size"] = 100 +cfg_td3["random_timesteps"] = 1000 +cfg_td3["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_td3["experiment"]["write_interval"] = 75 +cfg_td3["experiment"]["checkpoint_interval"] = 750 + +agent_td3 = TD3(models=models_td3, + memory=memory, + cfg=cfg_td3, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_td3) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/td3_gym_pendulumnovel_gru.py b/docs/source/examples/gym/td3_gym_pendulumnovel_gru.py new file mode 100644 index 00000000..af7c93da --- /dev/null +++ b/docs/source/examples/gym/td3_gym_pendulumnovel_gru.py @@ -0,0 +1,215 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.resources.noises.torch import GaussianNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the TD3 agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_policy" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {"rnn": [hidden_states]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role in ["target_critic_1", "target_critic_2"] else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# TD3 requires 6 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models +models_td3 = {} +models_td3["policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_td3.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#configuration-and-hyperparameters +cfg_td3 = TD3_DEFAULT_CONFIG.copy() +cfg_td3["exploration"]["noise"] = GaussianNoise(0, 0.1, device=device) +cfg_td3["smooth_regularization_noise"] = GaussianNoise(0, 0.2, device=device) +cfg_td3["smooth_regularization_clip"] = 0.5 +cfg_td3["discount_factor"] = 0.98 +cfg_td3["batch_size"] = 100 +cfg_td3["random_timesteps"] = 0 +cfg_td3["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_td3["experiment"]["write_interval"] = 75 +cfg_td3["experiment"]["checkpoint_interval"] = 750 + +agent_td3 = TD3(models=models_td3, + memory=memory, + cfg=cfg_td3, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_td3) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/td3_gym_pendulumnovel_lstm.py b/docs/source/examples/gym/td3_gym_pendulumnovel_lstm.py new file mode 100644 index 00000000..d2f79407 --- /dev/null +++ b/docs/source/examples/gym/td3_gym_pendulumnovel_lstm.py @@ -0,0 +1,225 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.resources.noises.torch import GaussianNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the TD3 agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role == "target_policy" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {"rnn": [rnn_states[0], rnn_states[1]]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role in ["target_critic_1", "target_critic_2"] else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# TD3 requires 6 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models +models_td3 = {} +models_td3["policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_td3.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#configuration-and-hyperparameters +cfg_td3 = TD3_DEFAULT_CONFIG.copy() +cfg_td3["exploration"]["noise"] = GaussianNoise(0, 0.1, device=device) +cfg_td3["smooth_regularization_noise"] = GaussianNoise(0, 0.2, device=device) +cfg_td3["smooth_regularization_clip"] = 0.5 +cfg_td3["discount_factor"] = 0.98 +cfg_td3["batch_size"] = 100 +cfg_td3["random_timesteps"] = 0 +cfg_td3["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_td3["experiment"]["write_interval"] = 75 +cfg_td3["experiment"]["checkpoint_interval"] = 750 + +agent_td3 = TD3(models=models_td3, + memory=memory, + cfg=cfg_td3, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_td3) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/td3_gym_pendulumnovel_rnn.py b/docs/source/examples/gym/td3_gym_pendulumnovel_rnn.py new file mode 100644 index 00000000..4abb5b9c --- /dev/null +++ b/docs/source/examples/gym/td3_gym_pendulumnovel_rnn.py @@ -0,0 +1,215 @@ +import gym + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.resources.noises.torch import GaussianNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the TD3 agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_policy" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(rnn_output)) + x = F.relu(self.linear_layer_2(x)) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {"rnn": [hidden_states]} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=400, sequence_length=20): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.linear_layer_1 = nn.Linear(self.hidden_size + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic is only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role in ["target_critic_1", "target_critic_2"] else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = F.relu(self.linear_layer_1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + + return self.linear_layer_3(x), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.make("PendulumNoVel-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# TD3 requires 6 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models +models_td3 = {} +models_td3["policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_policy"] = Actor(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_critic_1"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) +models_td3["target_critic_2"] = Critic(env.observation_space, env.action_space, device, num_envs=env.num_envs) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_td3.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#configuration-and-hyperparameters +cfg_td3 = TD3_DEFAULT_CONFIG.copy() +cfg_td3["exploration"]["noise"] = GaussianNoise(0, 0.1, device=device) +cfg_td3["smooth_regularization_noise"] = GaussianNoise(0, 0.2, device=device) +cfg_td3["smooth_regularization_clip"] = 0.5 +cfg_td3["discount_factor"] = 0.98 +cfg_td3["batch_size"] = 100 +cfg_td3["random_timesteps"] = 0 +cfg_td3["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_td3["experiment"]["write_interval"] = 75 +cfg_td3["experiment"]["checkpoint_interval"] = 750 + +agent_td3 = TD3(models=models_td3, + memory=memory, + cfg=cfg_td3, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_td3) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/trpo_gym_pendulum.py b/docs/source/examples/gym/trpo_gym_pendulum.py new file mode 100644 index 00000000..3595d38a --- /dev/null +++ b/docs/source/examples/gym/trpo_gym_pendulum.py @@ -0,0 +1,107 @@ +import gym + +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.trpo import TRPO, TRPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Load and wrap the Gym environment. +# Note: the environment version may change depending on the gym version +try: + env = gym.vector.make("Pendulum-v1", num_envs=4, asynchronous=False) +except gym.error.DeprecatedEnv as e: + env_id = [spec.id for spec in gym.envs.registry.all() if spec.id.startswith("Pendulum-v")][0] + print("Pendulum-v1 not found. Trying {}".format(env_id)) + env = gym.vector.make(env_id, num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# TRPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#spaces-and-models +models_trpo = {} +models_trpo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True) +models_trpo["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters +cfg_trpo = TRPO_DEFAULT_CONFIG.copy() +cfg_trpo["rollouts"] = 1024 # memory_size +cfg_trpo["learning_epochs"] = 10 +cfg_trpo["mini_batches"] = 32 +cfg_trpo["discount_factor"] = 0.99 +cfg_trpo["lambda"] = 0.95 +cfg_trpo["learning_rate"] = 1e-3 +cfg_trpo["grad_norm_clip"] = 0.5 +cfg_trpo["state_preprocessor"] = RunningStandardScaler +cfg_trpo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_trpo["value_preprocessor"] = RunningStandardScaler +cfg_trpo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_trpo["experiment"]["write_interval"] = 500 +cfg_trpo["experiment"]["checkpoint_interval"] = 5000 + +agent_trpo = TRPO(models=models_trpo, + memory=memory, + cfg=cfg_trpo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_trpo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/trpo_gym_pendulumnovel.py b/docs/source/examples/gym/trpo_gym_pendulumnovel.py new file mode 100644 index 00000000..28ed1005 --- /dev/null +++ b/docs/source/examples/gym/trpo_gym_pendulumnovel.py @@ -0,0 +1,110 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.trpo import TRPO, TRPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum"): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(inputs["states"])), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# TRPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#spaces-and-models +models_trpo = {} +models_trpo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True) +models_trpo["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters +cfg_trpo = TRPO_DEFAULT_CONFIG.copy() +cfg_trpo["rollouts"] = 1024 # memory_size +cfg_trpo["learning_epochs"] = 10 +cfg_trpo["mini_batches"] = 32 +cfg_trpo["discount_factor"] = 0.99 +cfg_trpo["lambda"] = 0.95 +cfg_trpo["learning_rate"] = 1e-3 +cfg_trpo["grad_norm_clip"] = 0.5 +cfg_trpo["state_preprocessor"] = RunningStandardScaler +cfg_trpo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_trpo["value_preprocessor"] = RunningStandardScaler +cfg_trpo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_trpo["experiment"]["write_interval"] = 500 +cfg_trpo["experiment"]["checkpoint_interval"] = 5000 + +agent_trpo = TRPO(models=models_trpo, + memory=memory, + cfg=cfg_trpo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_trpo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/trpo_gym_pendulumnovel_gru.py b/docs/source/examples/gym/trpo_gym_pendulumnovel_gru.py new file mode 100644 index 00000000..9135d440 --- /dev/null +++ b/docs/source/examples/gym/trpo_gym_pendulumnovel_gru.py @@ -0,0 +1,209 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.trpo import TRPO, TRPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# TRPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#spaces-and-models +models_trpo = {} +models_trpo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_trpo["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters +cfg_trpo = TRPO_DEFAULT_CONFIG.copy() +cfg_trpo["rollouts"] = 1024 # memory_size +cfg_trpo["learning_epochs"] = 10 +cfg_trpo["mini_batches"] = 32 +cfg_trpo["discount_factor"] = 0.9 +cfg_trpo["lambda"] = 0.95 +cfg_trpo["learning_rate"] = 1e-3 +cfg_trpo["grad_norm_clip"] = 0.5 +cfg_trpo["state_preprocessor"] = RunningStandardScaler +cfg_trpo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_trpo["value_preprocessor"] = RunningStandardScaler +cfg_trpo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_trpo["experiment"]["write_interval"] = 500 +cfg_trpo["experiment"]["checkpoint_interval"] = 5000 + +agent_trpo = TRPO(models=models_trpo, + memory=memory, + cfg=cfg_trpo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_trpo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/trpo_gym_pendulumnovel_lstm.py b/docs/source/examples/gym/trpo_gym_pendulumnovel_lstm.py new file mode 100644 index 00000000..120fec50 --- /dev/null +++ b/docs/source/examples/gym/trpo_gym_pendulumnovel_lstm.py @@ -0,0 +1,219 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.trpo import TRPO, TRPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# TRPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#spaces-and-models +models_trpo = {} +models_trpo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_trpo["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters +cfg_trpo = TRPO_DEFAULT_CONFIG.copy() +cfg_trpo["rollouts"] = 1024 # memory_size +cfg_trpo["learning_epochs"] = 10 +cfg_trpo["mini_batches"] = 32 +cfg_trpo["discount_factor"] = 0.9 +cfg_trpo["lambda"] = 0.95 +cfg_trpo["learning_rate"] = 1e-3 +cfg_trpo["grad_norm_clip"] = 0.5 +cfg_trpo["state_preprocessor"] = RunningStandardScaler +cfg_trpo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_trpo["value_preprocessor"] = RunningStandardScaler +cfg_trpo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_trpo["experiment"]["write_interval"] = 500 +cfg_trpo["experiment"]["checkpoint_interval"] = 5000 + +agent_trpo = TRPO(models=models_trpo, + memory=memory, + cfg=cfg_trpo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_trpo) + +# start training +trainer.train() diff --git a/docs/source/examples/gym/trpo_gym_pendulumnovel_rnn.py b/docs/source/examples/gym/trpo_gym_pendulumnovel_rnn.py new file mode 100644 index 00000000..cb15682f --- /dev/null +++ b/docs/source/examples/gym/trpo_gym_pendulumnovel_rnn.py @@ -0,0 +1,209 @@ +import gym + +import torch +import torch.nn as nn +import numpy as np + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.trpo import TRPO, TRPO_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.envs.torch import wrap_env + + +# Define the models (stochastic and deterministic models) for the agent using mixins. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.net(rnn_output)), self.log_std_parameter, {"rnn": [hidden_states]} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=128): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 1)) + + def get_specification(self): + # batch size (N) is the number of envs + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# Gym environment observation wrapper used to mask velocity. Adapted from rl_zoo3 (rl_zoo3/wrappers.py) +class NoVelocityWrapper(gym.ObservationWrapper): + def observation(self, observation): + # observation: x, y, angular velocity + return observation * np.array([1, 1, 0]) + +gym.envs.registration.register(id="PendulumNoVel-v1", entry_point=lambda: NoVelocityWrapper(gym.make("Pendulum-v1"))) + +# Load and wrap the Gym environment +env = gym.vector.make("PendulumNoVel-v1", num_envs=4, asynchronous=False) +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=1024, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# TRPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#spaces-and-models +models_trpo = {} +models_trpo["policy"] = Policy(env.observation_space, env.action_space, device, clip_actions=True, num_envs=env.num_envs) +models_trpo["value"] = Value(env.observation_space, env.action_space, device, num_envs=env.num_envs) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.trpo.html#configuration-and-hyperparameters +cfg_trpo = TRPO_DEFAULT_CONFIG.copy() +cfg_trpo["rollouts"] = 1024 # memory_size +cfg_trpo["learning_epochs"] = 10 +cfg_trpo["mini_batches"] = 32 +cfg_trpo["discount_factor"] = 0.9 +cfg_trpo["lambda"] = 0.95 +cfg_trpo["learning_rate"] = 1e-3 +cfg_trpo["grad_norm_clip"] = 0.5 +cfg_trpo["state_preprocessor"] = RunningStandardScaler +cfg_trpo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_trpo["value_preprocessor"] = RunningStandardScaler +cfg_trpo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 500 and 5000 timesteps respectively +cfg_trpo["experiment"]["write_interval"] = 500 +cfg_trpo["experiment"]["checkpoint_interval"] = 5000 + +agent_trpo = TRPO(models=models_trpo, + memory=memory, + cfg=cfg_trpo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 100000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_trpo) + +# start training +trainer.train() diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_cem.py b/docs/source/examples/gymnasium/cem_gymnasium_cartpole.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_cartpole_cem.py rename to docs/source/examples/gymnasium/cem_gymnasium_cartpole.py diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py b/docs/source/examples/gymnasium/cem_gymnasium_cartpole_eval.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_cartpole_cem_eval.py rename to docs/source/examples/gymnasium/cem_gymnasium_cartpole_eval.py diff --git a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py b/docs/source/examples/gymnasium/ddpg_gymnasium_pendulum.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_pendulum_ddpg.py rename to docs/source/examples/gymnasium/ddpg_gymnasium_pendulum.py diff --git a/docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py b/docs/source/examples/gymnasium/ddpg_gymnasium_pendulum_eval.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_pendulum_ddpg_eval.py rename to docs/source/examples/gymnasium/ddpg_gymnasium_pendulum_eval.py diff --git a/docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py b/docs/source/examples/gymnasium/ddpg_gymnasium_pendulum_vector.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_vector_pendulum_ddpg.py rename to docs/source/examples/gymnasium/ddpg_gymnasium_pendulum_vector.py diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_dqn.py b/docs/source/examples/gymnasium/dqn_gymnasium_cartpole.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_cartpole_dqn.py rename to docs/source/examples/gymnasium/dqn_gymnasium_cartpole.py diff --git a/docs/source/examples/gymnasium/gymnasium_cartpole_dqn_eval.py b/docs/source/examples/gymnasium/dqn_gymnasium_cartpole_eval.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_cartpole_dqn_eval.py rename to docs/source/examples/gymnasium/dqn_gymnasium_cartpole_eval.py diff --git a/docs/source/examples/gymnasium/gymnasium_vector_cartpole_dqn.py b/docs/source/examples/gymnasium/dqn_gymnasium_cartpole_vector.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_vector_cartpole_dqn.py rename to docs/source/examples/gymnasium/dqn_gymnasium_cartpole_vector.py diff --git a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py b/docs/source/examples/gymnasium/q_learning_gymnasium_frozen_lake.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning.py rename to docs/source/examples/gymnasium/q_learning_gymnasium_frozen_lake.py diff --git a/docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py b/docs/source/examples/gymnasium/q_learning_gymnasium_frozen_lake_eval.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py rename to docs/source/examples/gymnasium/q_learning_gymnasium_frozen_lake_eval.py diff --git a/docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py b/docs/source/examples/gymnasium/q_learning_gymnasium_frozen_lake_vector.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py rename to docs/source/examples/gymnasium/q_learning_gymnasium_frozen_lake_vector.py diff --git a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py b/docs/source/examples/gymnasium/sarsa_gymnasium_taxi.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_taxi_sarsa.py rename to docs/source/examples/gymnasium/sarsa_gymnasium_taxi.py diff --git a/docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py b/docs/source/examples/gymnasium/sarsa_gymnasium_taxi_eval.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_taxi_sarsa_eval.py rename to docs/source/examples/gymnasium/sarsa_gymnasium_taxi_eval.py diff --git a/docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py b/docs/source/examples/gymnasium/sarsa_gymnasium_taxi_vector.py similarity index 100% rename from docs/source/examples/gymnasium/gymnasium_vector_taxi_sarsa.py rename to docs/source/examples/gymnasium/sarsa_gymnasium_taxi_vector.py From 387841255754c550bd45b92de3ffd501058dc78c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 20:10:28 +0100 Subject: [PATCH 094/157] Rename example files inf docs --- docs/source/intro/examples.rst | 114 ++++++++++++++++----------------- 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index b2723691..48d36685 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -12,8 +12,8 @@ Examples
-Learning in a Gym/Gymnasium environment ---------------------------------------- +Gym/Gymnasium environment +------------------------- These examples perform the training of one agent in a Gym/Gymnasium environment (**one agent, one environment**) @@ -41,23 +41,23 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_pendulum_ddpg.py <../examples/gym/gym_pendulum_ddpg.py>` - | :download:`gymnasium_pendulum_ddpg.py <../examples/gymnasium/gymnasium_pendulum_ddpg.py>` + | :download:`ddpg_gym_pendulum.py <../examples/gym/ddpg_gym_pendulum.py>` + | :download:`ddpg_gymnasium_pendulum.py <../examples/gymnasium/ddpg_gymnasium_pendulum.py>` - .. literalinclude:: ../examples/gym/gym_pendulum_ddpg.py + .. literalinclude:: ../examples/gym/ddpg_gym_pendulum.py :language: python - :emphasize-lines: 1, 13, 50-56 + :emphasize-lines: 1, 13, 51-57 .. group-tab:: Evaluation - | :download:`gym_pendulum_ddpg_eval.py <../examples/gym/gym_pendulum_ddpg_eval.py>` - | :download:`gymnasium_pendulum_ddpg_eval.py <../examples/gymnasium/gymnasium_pendulum_ddpg_eval.py>` + | :download:`ddpg_gym_pendulum_eval.py <../examples/gym/ddpg_gym_pendulum_eval.py>` + | :download:`ddpg_gymnasium_pendulum_eval.py <../examples/gymnasium/ddpg_gymnasium_pendulum_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined - .. literalinclude:: ../examples/gym/gym_pendulum_ddpg_eval.py + .. literalinclude:: ../examples/gym/ddpg_gym_pendulum_eval.py :language: python :emphasize-lines: 67 @@ -67,23 +67,23 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_cartpole_cem.py <../examples/gym/gym_cartpole_cem.py>` - | :download:`gymnasium_cartpole_cem.py <../examples/gymnasium/gymnasium_cartpole_cem.py>` + | :download:`cem_gym_cartpole.py <../examples/gym/cem_gym_cartpole.py>` + | :download:`cem_gymnasium_cartpole.py <../examples/gymnasium/cem_gymnasium_cartpole.py>` - .. literalinclude:: ../examples/gym/gym_cartpole_cem.py + .. literalinclude:: ../examples/gym/cem_gym_cartpole.py :language: python :emphasize-lines: 1, 11, 33-39 .. group-tab:: Evaluation - | :download:`gym_cartpole_cem_eval.py <../examples/gym/gym_cartpole_cem_eval.py>` - | :download:`gymnasium_cartpole_cem_eval.py <../examples/gymnasium/gymnasium_cartpole_cem_eval.py>` + | :download:`cem_gym_cartpole_eval.py <../examples/gym/cem_gym_cartpole_eval.py>` + | :download:`cem_gymnasium_cartpole_eval.py <../examples/gymnasium/cem_gymnasium_cartpole_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined - .. literalinclude:: ../examples/gym/gym_cartpole_cem_eval.py + .. literalinclude:: ../examples/gym/cem_gym_cartpole_eval.py :language: python :emphasize-lines: 68 @@ -93,23 +93,23 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_cartpole_dqn.py <../examples/gym/gym_cartpole_dqn.py>` - | :download:`gymnasium_cartpole_dqn.py <../examples/gymnasium/gymnasium_cartpole_dqn.py>` + | :download:`dqn_gym_cartpole.py <../examples/gym/dqn_gym_cartpole.py>` + | :download:`dqn_gymnasium_cartpole.py <../examples/gymnasium/dqn_gymnasium_cartpole.py>` - .. literalinclude:: ../examples/gym/gym_cartpole_dqn.py + .. literalinclude:: ../examples/gym/dqn_gym_cartpole.py :language: python :emphasize-lines: 4, 31-51 .. group-tab:: Evaluation - | :download:`gym_cartpole_dqn_eval.py <../examples/gym/gym_cartpole_dqn_eval.py>` - | :download:`gymnasium_cartpole_dqn_eval.py <../examples/gymnasium/gymnasium_cartpole_dqn_eval.py>` + | :download:`dqn_gym_cartpole_eval.py <../examples/gym/dqn_gym_cartpole_eval.py>` + | :download:`dqn_gymnasium_cartpole_eval.py <../examples/gymnasium/dqn_gymnasium_cartpole_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined - .. literalinclude:: ../examples/gym/gym_cartpole_dqn_eval.py + .. literalinclude:: ../examples/gym/dqn_gym_cartpole_eval.py :language: python :emphasize-lines: 56 @@ -119,23 +119,23 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_taxi_sarsa.py <../examples/gym/gym_taxi_sarsa.py>` - | :download:`gymnasium_taxi_sarsa.py <../examples/gymnasium/gymnasium_taxi_sarsa.py>` + | :download:`sarsa_gym_taxi.py <../examples/gym/sarsa_gym_taxi.py>` + | :download:`sarsa_gymnasium_taxi.py <../examples/gymnasium/sarsa_gymnasium_taxi.py>` - .. literalinclude:: ../examples/gym/gym_taxi_sarsa.py + .. literalinclude:: ../examples/gym/sarsa_gym_taxi.py :language: python :emphasize-lines: 6, 13-30 .. group-tab:: Evaluation - | :download:`gym_taxi_sarsa_eval.py <../examples/gym/gym_taxi_sarsa_eval.py>` - | :download:`gymnasium_taxi_sarsa_eval.py <../examples/gymnasium/gymnasium_taxi_sarsa_eval.py>` + | :download:`sarsa_gym_taxi_eval.py <../examples/gym/sarsa_gym_taxi_eval.py>` + | :download:`sarsa_gymnasium_taxi_eval.py <../examples/gymnasium/sarsa_gymnasium_taxi_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined - .. literalinclude:: ../examples/gym/gym_taxi_sarsa_eval.py + .. literalinclude:: ../examples/gym/sarsa_gym_taxi_eval.py :language: python :emphasize-lines: 70 @@ -145,23 +145,23 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_frozen_lake_q_learning.py <../examples/gym/gym_frozen_lake_q_learning.py>` - | :download:`gymnasium_frozen_lake_q_learning.py <../examples/gymnasium/gymnasium_frozen_lake_q_learning.py>` + | :download:`q_learning_gym_frozen_lake.py <../examples/gym/q_learning_gym_frozen_lake.py>` + | :download:`q_learning_gymnasium_frozen_lake.py <../examples/gymnasium/q_learning_gymnasium_frozen_lake.py>` - .. literalinclude:: ../examples/gym/gym_frozen_lake_q_learning.py + .. literalinclude:: ../examples/gym/q_learning_gym_frozen_lake.py :language: python :emphasize-lines: 6, 13-30 .. group-tab:: Evaluation - | :download:`gym_frozen_lake_q_learning_eval.py <../examples/gym/gym_frozen_lake_q_learning_eval.py>` - | :download:`gymnasium_frozen_lake_q_learning_eval.py <../examples/gymnasium/gymnasium_frozen_lake_q_learning_eval.py>` + | :download:`q_learning_gym_frozen_lake_eval.py <../examples/gym/q_learning_gym_frozen_lake_eval.py>` + | :download:`q_learning_gymnasium_frozen_lake_eval.py <../examples/gymnasium/q_learning_gymnasium_frozen_lake_eval.py>` **Note:** It is necessary to adjust the checkpoint path according to the directories generated by the new experiments **Note:** Warnings such as :literal:`[skrl:WARNING] Cannot load the module. The agent doesn't have such an instance` can be ignored without problems. The reason for this is that during the evaluation, not all components such as optimizers or other models apart from the policy are defined - .. literalinclude:: ../examples/gym/gym_frozen_lake_q_learning_eval.py + .. literalinclude:: ../examples/gym/q_learning_gym_frozen_lake_eval.py :language: python :emphasize-lines: 70 @@ -169,8 +169,8 @@ The following components or practices are exemplified (highlighted):
-Learning in a Gym/Gymnasium vectorized environment --------------------------------------------------- +Gym/Gymnasium vectorized environment +------------------------------------ These examples perform the training of one agent in a Gym/Gymnasium vectorized environment (**one agent, multiple independent copies of the same environment in parallel**) @@ -186,10 +186,10 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_vector_pendulum_ddpg.py <../examples/gym/gym_vector_pendulum_ddpg.py>` - | :download:`gymnasium_vector_pendulum_ddpg.py <../examples/gymnasium/gymnasium_vector_pendulum_ddpg.py>` + | :download:`ddpg_gym_pendulum_vector.py <../examples/gym/ddpg_gym_pendulum_vector.py>` + | :download:`ddpg_gymnasium_pendulum_vector.py <../examples/gymnasium/ddpg_gymnasium_pendulum_vector.py>` - .. literalinclude:: ../examples/gym/gym_vector_pendulum_ddpg.py + .. literalinclude:: ../examples/gym/ddpg_gym_pendulum_vector.py :language: python :emphasize-lines: 1, 13, 50-56 @@ -199,10 +199,10 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_vector_cartpole_dqn.py <../examples/gym/gym_vector_cartpole_dqn.py>` - | :download:`gymnasium_vector_cartpole_dqn.py <../examples/gymnasium/gymnasium_vector_cartpole_dqn.py>` + | :download:`dqn_gym_cartpole_vector.py <../examples/gym/dqn_gym_cartpole_vector.py>` + | :download:`dqn_gymnasium_cartpole_vector.py <../examples/gymnasium/dqn_gymnasium_cartpole_vector.py>` - .. literalinclude:: ../examples/gym/gym_vector_cartpole_dqn.py + .. literalinclude:: ../examples/gym/dqn_gym_cartpole_vector.py :language: python :emphasize-lines: 1, 8, 13-19 @@ -212,10 +212,10 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_vector_taxi_sarsa.py <../examples/gym/gym_vector_taxi_sarsa.py>` - | :download:`gymnasium_vector_taxi_sarsa.py <../examples/gymnasium/gymnasium_vector_taxi_sarsa.py>` + | :download:`sarsa_gym_taxi_vector.py <../examples/gym/sarsa_gym_taxi_vector.py>` + | :download:`sarsa_gymnasium_taxi_vector.py <../examples/gymnasium/sarsa_gymnasium_taxi_vector.py>` - .. literalinclude:: ../examples/gym/gym_vector_taxi_sarsa.py + .. literalinclude:: ../examples/gym/sarsa_gym_taxi_vector.py :language: python :emphasize-lines: 1, 9, 35-41 @@ -225,10 +225,10 @@ The following components or practices are exemplified (highlighted): .. group-tab:: Training - | :download:`gym_vector_frozen_lake_q_learning.py <../examples/gym/gym_vector_frozen_lake_q_learning.py>` - | :download:`gymnasium_vector_frozen_lake_q_learning.py <../examples/gymnasium/gymnasium_vector_frozen_lake_q_learning.py>` + | :download:`q_learning_gym_frozen_lake_vector.py <../examples/gym/q_learning_gym_frozen_lake_vector.py>` + | :download:`q_learning_gymnasium_frozen_lake_vector.py <../examples/gymnasium/q_learning_gymnasium_frozen_lake_vector.py>` - .. literalinclude:: ../examples/gym/gym_vector_frozen_lake_q_learning.py + .. literalinclude:: ../examples/gym/q_learning_gym_frozen_lake_vector.py :language: python :emphasize-lines: 1, 9, 35-41 @@ -236,8 +236,8 @@ The following components or practices are exemplified (highlighted):
-Learning in a DeepMind environment ----------------------------------- +DeepMind environment +-------------------- These examples perform the training of one agent in an DeepMind environment (**one agent, one environment**) @@ -285,8 +285,8 @@ The following components or practices are exemplified (highlighted):
-Learning in an Isaac Gym environment ------------------------------------- +Isaac Gym environment +--------------------- These examples perform the training of an agent in the `Isaac Gym environments `_ (**one agent, multiple environments**) @@ -484,8 +484,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
-Learning by scopes in an Isaac Gym environment ----------------------------------------------- +Isaac Gym environment (learning by scopes) +------------------------------------------ These examples perform the training of 3 agents by scopes in Isaac Gym's Cartpole environment in the same run (**multiple agents and environments**) @@ -589,8 +589,8 @@ The following components or practices are exemplified (highlighted):
-Learning in an Omniverse Isaac Gym environment ----------------------------------------------- +Omniverse Isaac Gym environment +------------------------------- These examples perform the training of an agent in the `Omniverse Isaac Gym environments `_ (**one agent, multiple environments**) @@ -770,8 +770,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
-Learning in an Omniverse Isaac Sim environment ----------------------------------------------- +Omniverse Isaac Sim environment +------------------------------- These examples show how to train an agent in an Omniverse Isaac Sim environment that is implemented using the Gym interface (**one agent, one environment**) From 338e15d5d9e84cb3c9849ccea87d8dcdeef5aa1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 3 Dec 2022 20:12:25 +0100 Subject: [PATCH 095/157] Use group tabs for model examples --- docs/source/modules/skrl.models.categorical.rst | 8 ++++---- docs/source/modules/skrl.models.deterministic.rst | 8 ++++---- docs/source/modules/skrl.models.gaussian.rst | 9 +++++---- .../source/modules/skrl.models.multivariate_gaussian.rst | 8 ++++---- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/source/modules/skrl.models.categorical.rst b/docs/source/modules/skrl.models.categorical.rst index e67b73c1..a54aab6b 100644 --- a/docs/source/modules/skrl.models.categorical.rst +++ b/docs/source/modules/skrl.models.categorical.rst @@ -55,7 +55,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/categorical_model.py :language: python @@ -63,7 +63,7 @@ Basic usage :start-after: [start-mlp-sequential] :end-before: [end-mlp-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/categorical_model.py :language: python @@ -83,7 +83,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/categorical_model.py :language: python @@ -91,7 +91,7 @@ Basic usage :start-after: [start-cnn-sequential] :end-before: [end-cnn-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/categorical_model.py :language: python diff --git a/docs/source/modules/skrl.models.deterministic.rst b/docs/source/modules/skrl.models.deterministic.rst index 8196b8d3..de47a75e 100644 --- a/docs/source/modules/skrl.models.deterministic.rst +++ b/docs/source/modules/skrl.models.deterministic.rst @@ -55,7 +55,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/deterministic_model.py :language: python @@ -63,7 +63,7 @@ Basic usage :start-after: [start-mlp-sequential] :end-before: [end-mlp-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/deterministic_model.py :language: python @@ -83,7 +83,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/deterministic_model.py :language: python @@ -91,7 +91,7 @@ Basic usage :start-after: [start-cnn-sequential] :end-before: [end-cnn-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/deterministic_model.py :language: python diff --git a/docs/source/modules/skrl.models.gaussian.rst b/docs/source/modules/skrl.models.gaussian.rst index fb77d2dc..a9ac574c 100644 --- a/docs/source/modules/skrl.models.gaussian.rst +++ b/docs/source/modules/skrl.models.gaussian.rst @@ -57,7 +57,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/gaussian_model.py :language: python @@ -65,7 +65,7 @@ Basic usage :start-after: [start-mlp-sequential] :end-before: [end-mlp-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/gaussian_model.py :language: python @@ -85,7 +85,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/gaussian_model.py :language: python @@ -93,13 +93,14 @@ Basic usage :start-after: [start-cnn-sequential] :end-before: [end-cnn-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/gaussian_model.py :language: python :linenos: :start-after: [start-cnn-functional] :end-before: [end-cnn-functional] + API --- diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst index 238ddf93..72bfc9c4 100644 --- a/docs/source/modules/skrl.models.multivariate_gaussian.rst +++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst @@ -57,7 +57,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/multivariate_gaussian_model.py :language: python @@ -65,7 +65,7 @@ Basic usage :start-after: [start-mlp-sequential] :end-before: [end-mlp-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/multivariate_gaussian_model.py :language: python @@ -85,7 +85,7 @@ Basic usage .. tabs:: - .. tab:: nn.Sequential + .. group-tab:: nn.Sequential .. literalinclude:: ../snippets/multivariate_gaussian_model.py :language: python @@ -93,7 +93,7 @@ Basic usage :start-after: [start-cnn-sequential] :end-before: [end-cnn-sequential] - .. tab:: nn.functional + .. group-tab:: nn.functional .. literalinclude:: ../snippets/multivariate_gaussian_model.py :language: python From 0f5fb147d00db60ed7887198a6392d1853c10b68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 6 Dec 2022 09:34:37 +0100 Subject: [PATCH 096/157] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 449860f0..a00662a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Set the running mode (training or evaluation) of the agents - Weights & Biases integration (by @juhannc) - Support for Gymnasium interface +- Allow clipping the gradient norm for DDPG, TD3 and SAC agents +- Initialize model biases +- Add RNN (RNN, LSTM, GRU and any other variant) support for A2C, DDPG, PPO, SAC, TD3 and TRPO agents ### Changed - Forward model inputs as a Python dictionary [**breaking change**] @@ -17,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Omniverse Isaac Gym simulation speed for the Franka Emika real-world example - Call agents' method `record_transition` instead of parent method to allow storing samples in memories during evaluation +- Move TRPO policy optimization out of the value optimization loop ### Removed - Deprecated method `start` in trainers From 6abc725b939b606be1cc3bb5861b58465f56969f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 9 Dec 2022 11:47:05 +0100 Subject: [PATCH 097/157] Fix access to the categorical mixin distribution --- skrl/models/torch/categorical.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skrl/models/torch/categorical.py b/skrl/models/torch/categorical.py index be18d4bb..a37fe4a1 100644 --- a/skrl/models/torch/categorical.py +++ b/skrl/models/torch/categorical.py @@ -117,4 +117,4 @@ def distribution(self, role: str = "") -> torch.distributions.Categorical: >>> print(distribution) Categorical(probs: torch.Size([4096, 2]), logits: torch.Size([4096, 2])) """ - return self._c_distribution if role in self._c_distribution else self._c_distribution[""] + return self._c_distribution[role] if role in self._c_distribution else self._c_distribution[""] From dcf432c431891b1f5bfb53f42061113a838bf25a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 9 Dec 2022 11:49:46 +0100 Subject: [PATCH 098/157] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a00662a6..80a472d9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Call agents' method `record_transition` instead of parent method to allow storing samples in memories during evaluation - Move TRPO policy optimization out of the value optimization loop +- Access to the categorical model distribution ### Removed - Deprecated method `start` in trainers From 3369703597151386e675e1e11f1d872cb37ca287 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 13 Dec 2022 12:29:57 +0100 Subject: [PATCH 099/157] Reset only once for gym/gymnasium vectorized environments --- skrl/envs/torch/wrappers.py | 54 ++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index eb60c40f..eef3f7ae 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -275,6 +275,9 @@ def __init__(self, env: Any) -> None: try: if isinstance(env, gym.vector.SyncVectorEnv) or isinstance(env, gym.vector.AsyncVectorEnv): self._vectorized = True + self._reset_once = True + self._obs_tensor = None + self._info_dict = None except Exception as e: print("[WARNING] Failed to check for a vectorized environment: {}".format(e)) @@ -387,12 +390,19 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch terminated = False else: observation, reward, terminated, truncated, info = self._env.step(self._tensor_to_action(actions)) + # convert response to torch - return self._observation_to_tensor(observation), \ - torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ - torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ - torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ - info + observation = self._observation_to_tensor(observation) + reward = torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1) + terminated = torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1) + truncated = torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1) + + # save observation and info for vectorized envs + if self._vectorized: + self._obs_tensor = observation + self._info_dict = info + + return observation, reward, terminated, truncated, info def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment @@ -400,6 +410,13 @@ def reset(self) -> Tuple[torch.Tensor, Any]: :return: Observation, info :rtype: torch.Tensor and any other info """ + # handle vectorized envs + if self._vectorized: + if not self._reset_once: + return self._obs_tensor, self._info_dict + self._reset_once = False + + # reset the env/envs if self._drepecated_api: observation = self._env.reset() info = {} @@ -431,6 +448,9 @@ def __init__(self, env: Any) -> None: try: if isinstance(env, gymnasium.vector.SyncVectorEnv) or isinstance(env, gymnasium.vector.AsyncVectorEnv): self._vectorized = True + self._reset_once = True + self._obs_tensor = None + self._info_dict = None except Exception as e: print("[WARNING] Failed to check for a vectorized environment: {}".format(e)) @@ -528,12 +548,19 @@ def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch :rtype: tuple of torch.Tensor and any other info """ observation, reward, terminated, truncated, info = self._env.step(self._tensor_to_action(actions)) + # convert response to torch - return self._observation_to_tensor(observation), \ - torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ - torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ - torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ - info + observation = self._observation_to_tensor(observation) + reward = torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1) + terminated = torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1) + truncated = torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1) + + # save observation and info for vectorized envs + if self._vectorized: + self._obs_tensor = observation + self._info_dict = info + + return observation, reward, terminated, truncated, info def reset(self) -> Tuple[torch.Tensor, Any]: """Reset the environment @@ -541,6 +568,13 @@ def reset(self) -> Tuple[torch.Tensor, Any]: :return: Observation, info :rtype: torch.Tensor and any other info """ + # handle vectorized envs + if self._vectorized: + if not self._reset_once: + return self._obs_tensor, self._info_dict + self._reset_once = False + + # reset the env/envs observation, info = self._env.reset() return self._observation_to_tensor(observation), info From 4354a48a5fe34944c06c90e17df99be5cc1fd593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 13 Dec 2022 12:34:24 +0100 Subject: [PATCH 100/157] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 80a472d9..5b2af965 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). to allow storing samples in memories during evaluation - Move TRPO policy optimization out of the value optimization loop - Access to the categorical model distribution +- Call reset only once for Gym/Gymnasium vectorized environments ### Removed - Deprecated method `start` in trainers From 29d3a6ac5ac0f9fea8536aa6ed4a63087cb90a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 14 Dec 2022 22:29:53 +0100 Subject: [PATCH 101/157] Add DeepMind robosuite environment wrapper --- skrl/envs/torch/wrappers.py | 148 ++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index eef3f7ae..28d61426 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -748,6 +748,144 @@ def close(self) -> None: self._env.close() +class DeepMindRobosuiteWrapper(Wrapper): + def __init__(self, env: Any) -> None: + """DeepMind robosuite environment wrapper + + :param env: The environment to wrap + :type env: Any supported DeepMind robosuite environment + """ + super().__init__(env) + + # observation and action spaces + self._observation_space = self._spec_to_space(self._env.observation_spec()) + self._action_space = self._spec_to_space(self._env.action_spec) + + @property + def state_space(self) -> gym.Space: + """State space + + An alias for the ``observation_space`` property + """ + return self._observation_space + + @property + def observation_space(self) -> gym.Space: + """Observation space + """ + return self._observation_space + + @property + def action_space(self) -> gym.Space: + """Action space + """ + return self._action_space + + def _spec_to_space(self, spec: Any) -> gym.Space: + """Convert the DeepMind robosuite spec to a Gym space + + :param spec: The DeepMind robosuite spec to convert + :type spec: Any supported DeepMind robosuite spec + + :raises: ValueError if the spec type is not supported + + :return: The Gym space + :rtype: gym.Space + """ + if type(spec) is tuple: + return gym.spaces.Box(shape=spec[0].shape, + dtype=np.float32, + low=spec[0], + high=spec[1]) + elif isinstance(spec, np.ndarray): + return gym.spaces.Box(shape=spec.shape, + dtype=np.float32, + low=np.full(spec.shape, float("-inf")), + high=np.full(spec.shape, float("inf"))) + elif isinstance(spec, collections.OrderedDict): + return gym.spaces.Dict({k: self._spec_to_space(v) for k, v in spec.items()}) + else: + raise ValueError("Spec type {} not supported. Please report this issue".format(type(spec))) + + def _observation_to_tensor(self, observation: Any, spec: Optional[Any] = None) -> torch.Tensor: + """Convert the DeepMind observation to a flat tensor + + :param observation: The DeepMind observation to convert to a tensor + :type observation: Any supported DeepMind observation + + :raises: ValueError if the observation spec type is not supported + + :return: The observation as a flat tensor + :rtype: torch.Tensor + """ + spec = spec if spec is not None else self._env.observation_spec() + + if isinstance(spec, np.ndarray): + return torch.tensor(observation, device=self.device, dtype=torch.float32).reshape(self.num_envs, -1) + elif isinstance(spec, collections.OrderedDict): + return torch.cat([self._observation_to_tensor(observation[k], spec[k]) \ + for k in sorted(spec.keys())], dim=-1).reshape(self.num_envs, -1) + else: + raise ValueError("Observation spec type {} not supported. Please report this issue".format(type(spec))) + + def _tensor_to_action(self, actions: torch.Tensor) -> Any: + """Convert the action to the DeepMind robosuite expected format + + :param actions: The actions to perform + :type actions: torch.Tensor + + :raise ValueError: If the action space type is not supported + + :return: The action in the DeepMind robosuite expected format + :rtype: Any supported DeepMind robosuite action + """ + spec = self._env.action_spec + + if type(spec) is tuple: + return np.array(actions.cpu().numpy(), dtype=np.float32).reshape(spec[0].shape) + else: + raise ValueError("Action spec type {} not supported. Please report this issue".format(type(spec))) + + def step(self, actions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Any]: + """Perform a step in the environment + + :param actions: The actions to perform + :type actions: torch.Tensor + + :return: Observation, reward, terminated, truncated, info + :rtype: tuple of torch.Tensor and any other info + """ + observation, reward, terminated, info = self._env.step(self._tensor_to_action(actions)) + truncated = False + info = {} + + # convert response to torch + return self._observation_to_tensor(observation), \ + torch.tensor(reward, device=self.device, dtype=torch.float32).view(self.num_envs, -1), \ + torch.tensor(terminated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + torch.tensor(truncated, device=self.device, dtype=torch.bool).view(self.num_envs, -1), \ + info + + def reset(self) -> Tuple[torch.Tensor, Any]: + """Reset the environment + + :return: The state of the environment + :rtype: torch.Tensor + """ + observation = self._env.reset() + return self._observation_to_tensor(observation), {} + + def render(self, *args, **kwargs) -> None: + """Render the environment + """ + self._env.render(*args, **kwargs) + + def close(self) -> None: + """Close the environment + """ + self._env.close() + + def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: """Wrap an environment to use a common interface @@ -777,6 +915,8 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: +--------------------+-------------------------+ |DeepMind |``"dm"`` | +--------------------+-------------------------+ + |DeepMind robosuite |``"dm-robosuite"`` | + +--------------------+-------------------------+ |Isaac Gym preview 2 |``"isaacgym-preview2"`` | +--------------------+-------------------------+ |Isaac Gym preview 3 |``"isaacgym-preview3"`` | @@ -816,6 +956,10 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: if verbose: logger.info("Environment wrapper: DeepMind") return DeepMindWrapper(env) + elif "" in base_classes: if verbose: logger.info("Environment wrapper: Isaac Gym (preview 2)") @@ -835,6 +979,10 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: if verbose: logger.info("Environment wrapper: DeepMind") return DeepMindWrapper(env) + elif wrapper == "dm-robosuite": + if verbose: + logger.info("Environment wrapper: DeepMind robosuite") + return DeepMindRobosuiteWrapper(env) elif wrapper == "isaacgym-preview2": if verbose: logger.info("Environment wrapper: Isaac Gym (preview 2)") From f647df6f936d3dc3a85fba7890bb5e5ad2f4e707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 14 Dec 2022 22:31:12 +0100 Subject: [PATCH 102/157] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b2af965..b3cabe39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [0.9.0] - Unreleased ### Added +- DeepMind robosuite environment wrapper - Set the running mode (training or evaluation) of the agents - Weights & Biases integration (by @juhannc) - Support for Gymnasium interface From 3cd2609e5e9b2879d3ceeac9ae843d1405400c75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 19:50:09 +0100 Subject: [PATCH 103/157] Select the device automatically if not specified in noises classes --- skrl/resources/noises/torch/base.py | 9 +++++---- skrl/resources/noises/torch/gaussian.py | 7 ++++--- skrl/resources/noises/torch/ornstein_uhlenbeck.py | 7 ++++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/skrl/resources/noises/torch/base.py b/skrl/resources/noises/torch/base.py index a51356c2..bc665fa2 100644 --- a/skrl/resources/noises/torch/base.py +++ b/skrl/resources/noises/torch/base.py @@ -1,16 +1,17 @@ -from typing import Union, Tuple +from typing import Optional, Union, Tuple import torch class Noise(): - def __init__(self, device: Union[str, torch.device] = "cuda:0") -> None: + def __init__(self, device: Optional[Union[str, torch.device]] = None) -> None: """Base class representing a noise - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu" :type device: str or torch.device, optional """ - self.device = torch.device(device) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device is None else torch.device(device) def sample_like(self, tensor: torch.Tensor) -> torch.Tensor: """Sample a noise with the same size (shape) as the input tensor diff --git a/skrl/resources/noises/torch/gaussian.py b/skrl/resources/noises/torch/gaussian.py index 0fb3e074..7dc34a92 100644 --- a/skrl/resources/noises/torch/gaussian.py +++ b/skrl/resources/noises/torch/gaussian.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple +from typing import Optional, Union, Tuple import torch from torch.distributions import Normal @@ -7,14 +7,15 @@ class GaussianNoise(Noise): - def __init__(self, mean: float, std: float, device: Union[str, torch.device] = "cuda:0") -> None: + def __init__(self, mean: float, std: float, device: Optional[Union[str, torch.device]] = None) -> None: """Class representing a Gaussian noise :param mean: Mean of the normal distribution :type mean: float :param std: Standard deviation of the normal distribution :type std: float - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu" :type device: str or torch.device, optional """ super().__init__(device) diff --git a/skrl/resources/noises/torch/ornstein_uhlenbeck.py b/skrl/resources/noises/torch/ornstein_uhlenbeck.py index 53762294..88e648af 100644 --- a/skrl/resources/noises/torch/ornstein_uhlenbeck.py +++ b/skrl/resources/noises/torch/ornstein_uhlenbeck.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple +from typing import Optional, Union, Tuple import torch from torch.distributions import Normal @@ -13,7 +13,7 @@ def __init__(self, base_scale: float, mean: float = 0, std: float = 1, - device: Union[str, torch.device] = "cuda:0") -> None: + device: Optional[Union[str, torch.device]] = None) -> None: """Class representing an Ornstein-Uhlenbeck noise :param theta: Factor to apply to current internal state @@ -26,7 +26,8 @@ def __init__(self, :type mean: float, optional :param std: Standard deviation of the normal distribution (default: 1.0) :type std: float, optional - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu" :type device: str or torch.device, optional """ super().__init__(device) From 6c4e91068a0b0f573999ec3c68eac788fbeca026 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 19:53:13 +0100 Subject: [PATCH 104/157] Refactorize noise tests using pytest --- tests/test_noises_gaussian.py | 78 ------------------------- tests/test_noises_ornstein_uhlenbeck.py | 69 ---------------------- tests/test_resources_noises.py | 38 ++++++++++++ 3 files changed, 38 insertions(+), 147 deletions(-) delete mode 100644 tests/test_noises_gaussian.py delete mode 100644 tests/test_noises_ornstein_uhlenbeck.py create mode 100644 tests/test_resources_noises.py diff --git a/tests/test_noises_gaussian.py b/tests/test_noises_gaussian.py deleted file mode 100644 index b838604e..00000000 --- a/tests/test_noises_gaussian.py +++ /dev/null @@ -1,78 +0,0 @@ -import unittest -import math - -import torch - -from skrl.resources.noises.torch import GaussianNoise - - -class TestCase(unittest.TestCase): - def setUp(self): - self.devices = ['cpu', 'cuda:0'] - - self.sizes = [(1000, 2), [2000, 10, 1], torch.Size([3000])] - self.means = (10 * (torch.rand(len(self.sizes)) + 0.5) * torch.sign(torch.rand(len(self.sizes)) - 0.5)).tolist() - self.stds = (10 * (torch.rand(len(self.sizes)) + 0.1)).tolist() # positive non-zero values - - def tearDown(self): - pass - - def test_devices(self): - for device in self.devices: - noise = GaussianNoise(mean=0, std=1.0, device=device) - self.assertEqual(noise.device, torch.device(device)) - - def test_method_sample(self): - for mean, std in zip(self.means, self.stds): - # create noise - noise = GaussianNoise(mean=mean, std=std, device='cpu') - # iterate over all sizes - for size in self.sizes: - # iterate 10 times - for i in range(10): - # sample noise - output = noise.sample(size) - # check output - _mean = output.mean().item() - _std = output.std().item() - self.assertTrue(math.isclose(_mean, mean, rel_tol=abs(mean) * 0.25)) - self.assertTrue(math.isclose(_std, std, rel_tol=std * 0.25)) - # check shape - self.assertEqual(output.size(), torch.Size(size)) - - def test_method_sample_like(self): - for mean, std in zip(self.means, self.stds): - # create noise - noise = GaussianNoise(mean=mean, std=std, device='cpu') - # iterate over all sizes - for size in self.sizes: - # create tensor - tensor = torch.rand(size) - # iterate 10 times - for i in range(10): - # sample noise - output = noise.sample_like(tensor) - # check output - _mean = output.mean().item() - _std = output.std().item() - self.assertTrue(math.isclose(_mean, mean, rel_tol=abs(mean) * 0.25)) - self.assertTrue(math.isclose(_std, std, rel_tol=std * 0.25)) - # check shape - self.assertEqual(output.size(), torch.Size(size)) - - -if __name__ == '__main__': - import sys - - if not sys.argv[-1] == '--debug': - raise RuntimeError('Test can only be runned manually with --debug flag') - - test = TestCase() - test.setUp() - for method in dir(test): - if method.startswith('test_'): - print('Running test: {}'.format(method)) - getattr(test, method)() - test.tearDown() - - print('All tests passed.') diff --git a/tests/test_noises_ornstein_uhlenbeck.py b/tests/test_noises_ornstein_uhlenbeck.py deleted file mode 100644 index fa24c168..00000000 --- a/tests/test_noises_ornstein_uhlenbeck.py +++ /dev/null @@ -1,69 +0,0 @@ -import unittest -import math - -import torch - -from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise - - -class TestCase(unittest.TestCase): - def setUp(self): - self.devices = ['cpu', 'cuda:0'] - - self.sizes = [(1000, 2), [2000, 10, 1], torch.Size([3000])] - self.thetas = (10 * (torch.rand(len(self.sizes)) + 0.5)).tolist() # positive non-zero values - self.sigmas = (10 * (torch.rand(len(self.sizes)) + 0.5)).tolist() # positive non-zero values - self.base_scales = (10 * (torch.rand(len(self.sizes)) + 0.5)).tolist() # positive non-zero values - - def tearDown(self): - pass - - def test_devices(self): - for device in self.devices: - noise = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) - self.assertEqual(noise.device, torch.device(device)) - - def test_method_sample(self): - for theta, sigma, base_scale in zip(self.thetas, self.sigmas, self.base_scales): - # create noise - noise = OrnsteinUhlenbeckNoise(theta=theta, sigma=sigma, base_scale=base_scale, device='cpu') - # iterate over all sizes - for size in self.sizes: - # iterate 10 times - for i in range(10): - # sample noise - output = noise.sample(size) - # check shape - self.assertEqual(output.size(), torch.Size(size)) - - def test_method_sample_like(self): - for theta, sigma, base_scale in zip(self.thetas, self.sigmas, self.base_scales): - # create noise - noise = OrnsteinUhlenbeckNoise(theta=theta, sigma=sigma, base_scale=base_scale, device='cpu') - # iterate over all sizes - for size in self.sizes: - # create tensor - tensor = torch.rand(size) - # iterate 10 times - for i in range(10): - # sample noise - output = noise.sample_like(tensor) - # check shape - self.assertEqual(output.size(), torch.Size(size)) - - -if __name__ == '__main__': - import sys - - if not sys.argv[-1] == '--debug': - raise RuntimeError('Test can only be runned manually with --debug flag') - - test = TestCase() - test.setUp() - for method in dir(test): - if method.startswith('test_'): - print('Running test: {}'.format(method)) - getattr(test, method)() - test.tearDown() - - print('All tests passed.') diff --git a/tests/test_resources_noises.py b/tests/test_resources_noises.py new file mode 100644 index 00000000..c023a64d --- /dev/null +++ b/tests/test_resources_noises.py @@ -0,0 +1,38 @@ +import pytest + +import torch + +from skrl.resources.noises.torch import GaussianNoise +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise + + +@pytest.fixture +def noises_and_kwargs(): + return [(GaussianNoise, {"mean": 0, "std": 1}), + (OrnsteinUhlenbeckNoise, {"theta": 0.1, "sigma": 0.2, "base_scale": 0.3})] + + +@pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) +def test_device(noises_and_kwargs, device): + _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + for klass, kwargs in noises_and_kwargs: + noise = klass(device=device, **kwargs) + + output = noise.sample((1,)) + assert noise.device == _device + assert output.device == _device + +@pytest.mark.parametrize("size", [(10,), [20, 1], torch.Size([30, 1, 2])]) +def test_sampling(noises_and_kwargs, size): + for klass, kwargs in noises_and_kwargs: + noise = klass(**kwargs) + + # sample + output = noise.sample(size) + assert output.size() == torch.Size(size) + + # sample like + tensor = torch.rand(size) + output = noise.sample_like(tensor) + assert output.size() == torch.Size(size) From 43d4207fad0e7cd96e4d94777beb7decbd75456c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 22:24:15 +0100 Subject: [PATCH 105/157] Storage preprocessor device --- .../torch/running_standard_scaler.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/skrl/resources/preprocessors/torch/running_standard_scaler.py b/skrl/resources/preprocessors/torch/running_standard_scaler.py index 413a5013..a03f4ed3 100644 --- a/skrl/resources/preprocessors/torch/running_standard_scaler.py +++ b/skrl/resources/preprocessors/torch/running_standard_scaler.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple +from typing import Optional, Union, Tuple import gym import gymnasium @@ -13,7 +13,7 @@ def __init__(self, size: Union[int, Tuple[int], gym.Space, gymnasium.Space], epsilon: float = 1e-8, clip_threshold: float = 5.0, - device: Union[str, torch.device] = "cuda:0") -> None: + device: Optional[Union[str, torch.device]] = None) -> None: """Standardize the input data by removing the mean and scaling by the standard deviation The implementation is adapted from the rl_games library @@ -22,28 +22,33 @@ def __init__(self, Example:: >>> running_standard_scaler = RunningStandardScaler(size=2) - >>> data = ... # tensor of shape (N, 2) + >>> data = torch.rand(3, 2) # tensor of shape (N, 2) >>> running_standard_scaler(data) + tensor([[0.1954, 0.3356], + [0.9719, 0.4163], + [0.8540, 0.1982]]) :param size: Size of the input space :type size: int, tuple or list of integers, gym.Space, or gymnasium.Space - :param epsilon: Small number to avoid division by zero (default: 1e-8) + :param epsilon: Small number to avoid division by zero (default: ``1e-8``) :type epsilon: float - :param clip_threshold: Threshold to clip the data (default: 5.0) + :param clip_threshold: Threshold to clip the data (default: ``5.0``) :type clip_threshold: float - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional """ super().__init__() self.epsilon = epsilon self.clip_threshold = clip_threshold + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device is None else torch.device(device) size = self._get_space_size(size) - self.register_buffer("running_mean", torch.zeros(size, dtype = torch.float64, device=device)) - self.register_buffer("running_variance", torch.ones(size, dtype = torch.float64, device=device)) - self.register_buffer("current_count", torch.ones((), dtype = torch.float64, device=device)) + self.register_buffer("running_mean", torch.zeros(size, dtype = torch.float64, device=self.device)) + self.register_buffer("running_variance", torch.ones(size, dtype = torch.float64, device=self.device)) + self.register_buffer("current_count", torch.ones((), dtype = torch.float64, device=self.device)) def _get_space_size(self, space: Union[int, Tuple[int], gym.Space, gymnasium.Space]) -> int: """Get the size (number of elements) of a space @@ -103,9 +108,9 @@ def _compute(self, x: torch.Tensor, train: bool = False, inverse: bool = False) :param x: Input tensor :type x: torch.Tensor - :param train: Whether to train the standardizer (default: False) + :param train: Whether to train the standardizer (default: ``False``) :type train: bool, optional - :param inverse: Whether to inverse the standardizer to scale back the data (default: False) + :param inverse: Whether to inverse the standardizer to scale back the data (default: ``False``) :type inverse: bool, optional """ if train: @@ -146,11 +151,11 @@ def forward(self, x: torch.Tensor, train: bool = False, inverse: bool = False, n :param x: Input tensor :type x: torch.Tensor - :param train: Whether to train the standardizer (default: False) + :param train: Whether to train the standardizer (default: ``False``) :type train: bool, optional - :param inverse: Whether to inverse the standardizer to scale back the data (default: False) + :param inverse: Whether to inverse the standardizer to scale back the data (default: ``False``) :type inverse: bool, optional - :param no_grad: Whether to disable the gradient computation (default: True) + :param no_grad: Whether to disable the gradient computation (default: ``True``) :type no_grad: bool, optional """ if no_grad: From 5691671fa8977fca1909a2e3e616cc42002a1eee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 22:46:07 +0100 Subject: [PATCH 106/157] Add noises' docstring examples --- skrl/resources/noises/torch/base.py | 22 +++++++++++++++++- skrl/resources/noises/torch/gaussian.py | 19 ++++++++++++++- .../noises/torch/ornstein_uhlenbeck.py | 23 ++++++++++++++++--- 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/skrl/resources/noises/torch/base.py b/skrl/resources/noises/torch/base.py index bc665fa2..6ff37b3a 100644 --- a/skrl/resources/noises/torch/base.py +++ b/skrl/resources/noises/torch/base.py @@ -8,8 +8,20 @@ def __init__(self, device: Optional[Union[str, torch.device]] = None) -> None: """Base class representing a noise :param device: Device on which a torch tensor is or will be allocated (default: ``None``). - If None, the device will be either ``"cuda:0"`` if available or ``"cpu" + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional + + Custom noises should override the ``sample`` method:: + + import torch + from skrl.resources.noises.torch import Noise + + class CustomNoise(Noise): + def __init__(self, device=None): + super().__init__(device) + + def sample(self, size): + return torch.rand(size, device=self.device) """ self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device is None else torch.device(device) @@ -23,6 +35,14 @@ def sample_like(self, tensor: torch.Tensor) -> torch.Tensor: :return: Sampled noise :rtype: torch.Tensor + + Example:: + + >>> x = torch.rand(3, 2, device="cuda:0") + >>> noise.sample_like(x) + tensor([[-0.0423, -0.1325], + [-0.0639, -0.0957], + [-0.1367, 0.1031]], device='cuda:0') """ return self.sample(tensor.size()) diff --git a/skrl/resources/noises/torch/gaussian.py b/skrl/resources/noises/torch/gaussian.py index 7dc34a92..1cde4bb5 100644 --- a/skrl/resources/noises/torch/gaussian.py +++ b/skrl/resources/noises/torch/gaussian.py @@ -15,8 +15,12 @@ def __init__(self, mean: float, std: float, device: Optional[Union[str, torch.de :param std: Standard deviation of the normal distribution :type std: float :param device: Device on which a torch tensor is or will be allocated (default: ``None``). - If None, the device will be either ``"cuda:0"`` if available or ``"cpu" + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional + + Example:: + + >>> noise = GaussianNoise(mean=0, std=1) """ super().__init__(device) @@ -31,5 +35,18 @@ def sample(self, size: Union[Tuple[int], torch.Size]) -> torch.Tensor: :return: Sampled noise :rtype: torch.Tensor + + Example:: + + >>> noise.sample((3, 2)) + tensor([[-0.4901, 1.3357], + [-1.2141, 0.3323], + [-0.0889, -1.1651]], device='cuda:0') + + >>> x = torch.rand(3, 2, device="cuda:0") + >>> noise.sample(x.shape) + tensor([[0.5398, 1.2009], + [0.0307, 1.3065], + [0.2082, 0.6116]], device='cuda:0') """ return self.distribution.sample(size) diff --git a/skrl/resources/noises/torch/ornstein_uhlenbeck.py b/skrl/resources/noises/torch/ornstein_uhlenbeck.py index 88e648af..37c328df 100644 --- a/skrl/resources/noises/torch/ornstein_uhlenbeck.py +++ b/skrl/resources/noises/torch/ornstein_uhlenbeck.py @@ -22,13 +22,17 @@ def __init__(self, :type sigma: float :param base_scale: Factor to apply to returned noise :type base_scale: float - :param mean: Mean of the normal distribution (default: 0.0) + :param mean: Mean of the normal distribution (default: ``0.0``) :type mean: float, optional - :param std: Standard deviation of the normal distribution (default: 1.0) + :param std: Standard deviation of the normal distribution (default: ``1.0``) :type std: float, optional :param device: Device on which a torch tensor is or will be allocated (default: ``None``). - If None, the device will be either ``"cuda:0"`` if available or ``"cpu" + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional + + Example:: + + >>> noise = OrnsteinUhlenbeckNoise(theta=0.1, sigma=0.2, base_scale=0.5) """ super().__init__(device) @@ -48,6 +52,19 @@ def sample(self, size: Union[Tuple[int], torch.Size]) -> torch.Tensor: :return: Sampled noise :rtype: torch.Tensor + + Example:: + + >>> noise.sample((3, 2)) + tensor([[-0.0452, 0.0162], + [ 0.0649, -0.0708], + [-0.0211, 0.0066]], device='cuda:0') + + >>> x = torch.rand(3, 2, device="cuda:0") + >>> noise.sample(x.shape) + tensor([[-0.0540, 0.0461], + [ 0.1117, -0.1157], + [-0.0074, 0.0420]], device='cuda:0') """ if isinstance(self.state, torch.Tensor) and self.state.size() != torch.Size(size): self.state = 0 From 66c3dfa19eef32ed71ef05d657f163d3759877b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 22:48:39 +0100 Subject: [PATCH 107/157] Add preprocessors tests --- tests/test_resources_noises.py | 16 ++++++------ tests/test_resources_preprocessors.py | 37 +++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 8 deletions(-) create mode 100644 tests/test_resources_preprocessors.py diff --git a/tests/test_resources_noises.py b/tests/test_resources_noises.py index c023a64d..2c53855d 100644 --- a/tests/test_resources_noises.py +++ b/tests/test_resources_noises.py @@ -7,25 +7,25 @@ @pytest.fixture -def noises_and_kwargs(): +def classes_and_kwargs(): return [(GaussianNoise, {"mean": 0, "std": 1}), (OrnsteinUhlenbeckNoise, {"theta": 0.1, "sigma": 0.2, "base_scale": 0.3})] @pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) -def test_device(noises_and_kwargs, device): +def test_device(classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - for klass, kwargs in noises_and_kwargs: + for klass, kwargs in classes_and_kwargs: noise = klass(device=device, **kwargs) output = noise.sample((1,)) - assert noise.device == _device - assert output.device == _device + assert noise.device == _device # defined device + assert output.device == _device # runtime device @pytest.mark.parametrize("size", [(10,), [20, 1], torch.Size([30, 1, 2])]) -def test_sampling(noises_and_kwargs, size): - for klass, kwargs in noises_and_kwargs: +def test_sampling(classes_and_kwargs, size): + for klass, kwargs in classes_and_kwargs: noise = klass(**kwargs) # sample @@ -33,6 +33,6 @@ def test_sampling(noises_and_kwargs, size): assert output.size() == torch.Size(size) # sample like - tensor = torch.rand(size) + tensor = torch.rand(size, device="cpu") output = noise.sample_like(tensor) assert output.size() == torch.Size(size) diff --git a/tests/test_resources_preprocessors.py b/tests/test_resources_preprocessors.py new file mode 100644 index 00000000..c1cf1372 --- /dev/null +++ b/tests/test_resources_preprocessors.py @@ -0,0 +1,37 @@ +import pytest + +import gym +import gymnasium +import numpy as np + +import torch + +from skrl.resources.preprocessors.torch import RunningStandardScaler + + +@pytest.fixture +def classes_and_kwargs(): + return [(RunningStandardScaler, {"size": 1})] + + +@pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) +def test_device(classes_and_kwargs, device): + _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + for klass, kwargs in classes_and_kwargs: + preprocessor = klass(device=device, **kwargs) + + assert preprocessor.device == _device # defined device + assert preprocessor(torch.ones(kwargs["size"], device=_device)).device == _device # runtime device + +@pytest.mark.parametrize("space_and_size", [(gym.spaces.Box(low=-1, high=1, shape=(2, 3)), 6), + (gymnasium.spaces.Box(low=-1, high=1, shape=(2, 3)), 6), + (gym.spaces.Discrete(n=3), 1), + (gymnasium.spaces.Discrete(n=3), 1)]) +def test_forward(classes_and_kwargs, space_and_size): + for klass, kwargs in classes_and_kwargs: + space, size = space_and_size + preprocessor = klass(size=space, device="cpu") + + output = preprocessor(torch.rand((10, size), device="cpu")) + assert output.shape == torch.Size((10, size)) From 235421f49c77f264ae4cfdac26a201c3ef15eb4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 23:13:30 +0100 Subject: [PATCH 108/157] Add schedulers tests --- tests/test_resources_schedulers.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/test_resources_schedulers.py diff --git a/tests/test_resources_schedulers.py b/tests/test_resources_schedulers.py new file mode 100644 index 00000000..01c56a24 --- /dev/null +++ b/tests/test_resources_schedulers.py @@ -0,0 +1,18 @@ +import pytest + +import torch + +from skrl.resources.schedulers.torch import KLAdaptiveRL + + +@pytest.fixture +def classes_and_kwargs(): + return [(KLAdaptiveRL, {})] + +@pytest.mark.parametrize("optimizer", [torch.optim.Adam([torch.ones((1,))], lr=0.1), + torch.optim.SGD([torch.ones((1,))], lr=0.1)]) +def test_step(classes_and_kwargs, optimizer): + for klass, kwargs in classes_and_kwargs: + scheduler = klass(optimizer, **kwargs) + + scheduler.step(0.0) From c2a98c2f98a2f0db12189b804e8f91f12f762520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 25 Dec 2022 23:51:09 +0100 Subject: [PATCH 109/157] Select the device automatically if not specified in memories --- skrl/memories/torch/base.py | 9 +++++---- skrl/memories/torch/random.py | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/skrl/memories/torch/base.py b/skrl/memories/torch/base.py index 92bee823..b123fcc5 100644 --- a/skrl/memories/torch/base.py +++ b/skrl/memories/torch/base.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, List, Optional +from typing import Optional, Union, Tuple, List import os import csv @@ -17,7 +17,7 @@ class Memory: def __init__(self, memory_size: int, num_envs: int = 1, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, export: bool = False, export_format: str = "pt", export_directory: str = "") -> None: @@ -30,7 +30,8 @@ def __init__(self, :type memory_size: int :param num_envs: Number of parallel environments (default: 1) :type num_envs: int, optional - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param export: Export the memory to a file (default: False). If True, the memory will be exported when the memory is filled @@ -46,7 +47,7 @@ def __init__(self, """ self.memory_size = memory_size self.num_envs = num_envs - self.device = torch.device(device) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device is None else torch.device(device) # internal variables self.filled = False diff --git a/skrl/memories/torch/random.py b/skrl/memories/torch/random.py index 6a8ae4b2..9d98b8e6 100644 --- a/skrl/memories/torch/random.py +++ b/skrl/memories/torch/random.py @@ -1,4 +1,4 @@ -from typing import Union, Tuple, List +from typing import Optional, Union, Tuple, List import torch @@ -9,7 +9,7 @@ class RandomMemory(Memory): def __init__(self, memory_size: int, num_envs: int = 1, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, export: bool = False, export_format: str = "pt", export_directory: str = "", @@ -22,7 +22,8 @@ def __init__(self, :type memory_size: int :param num_envs: Number of parallel environments (default: 1) :type num_envs: int, optional - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param export: Export the memory to a file (default: False). If True, the memory will be exported when the memory is filled From ad02997b3a29583631117a7ebb0f6716d5ee2a49 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 19:23:32 +0100 Subject: [PATCH 110/157] Add recurrent neural networks to snippets --- docs/source/snippets/categorical_model.py | 538 ++++++++++++++++ docs/source/snippets/deterministic_model.py | 528 +++++++++++++++- docs/source/snippets/gaussian_model.py | 586 +++++++++++++++++ .../snippets/multivariate_gaussian_model.py | 589 +++++++++++++++++- 4 files changed, 2231 insertions(+), 10 deletions(-) diff --git a/docs/source/snippets/categorical_model.py b/docs/source/snippets/categorical_model.py index fbedbab6..d54ff91e 100644 --- a/docs/source/snippets/categorical_model.py +++ b/docs/source/snippets/categorical_model.py @@ -156,3 +156,541 @@ def compute(self, inputs, role): device=env.device, unnormalized_log_prob=True) # [end-cnn-functional] + +# ============================================================================= + +# [start-rnn-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class RNN(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-sequential] + +# [start-rnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class RNN(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.logits = nn.Linear(32, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + + return self.logits(x), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-functional] + +# ============================================================================= + +# [start-gru-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class GRU(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-sequential] + +# [start-gru-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class GRU(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.logits = nn.Linear(32, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + + return self.logits(x), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-functional] + +# ============================================================================= + +# [start-lstm-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class LSTM(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-sequential] + +# [start-lstm-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, CategoricalMixin + + +# define the model +class LSTM(CategoricalMixin, Model): + def __init__(self, observation_space, action_space, device, unnormalized_log_prob=True, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + CategoricalMixin.__init__(self, unnormalized_log_prob) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.logits = nn.Linear(32, self.num_actions) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + + return self.logits(x), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + unnormalized_log_prob=True, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-functional] diff --git a/docs/source/snippets/deterministic_model.py b/docs/source/snippets/deterministic_model.py index cac70d88..5026367d 100644 --- a/docs/source/snippets/deterministic_model.py +++ b/docs/source/snippets/deterministic_model.py @@ -22,7 +22,7 @@ def compute(self, inputs, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = MLP(observation_space=env.observation_space, +critic = MLP(observation_space=env.observation_space, action_space=env.action_space, device=env.device, clip_actions=False) @@ -55,7 +55,7 @@ def compute(self, inputs, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = MLP(observation_space=env.observation_space, +critic = MLP(observation_space=env.observation_space, action_space=env.action_space, device=env.device, clip_actions=False) @@ -101,7 +101,7 @@ def compute(self, inputs, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = CNN(observation_space=env.observation_space, +critic = CNN(observation_space=env.observation_space, action_space=env.action_space, device=env.device, clip_actions=False) @@ -153,8 +153,528 @@ def compute(self, inputs, role): # instantiate the model (assumes there is a wrapped environment: env) -policy = CNN(observation_space=env.observation_space, +critic = CNN(observation_space=env.observation_space, action_space=env.action_space, device=env.device, clip_actions=False) # [end-cnn-functional] + +# ============================================================================= + +# [start-rnn-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class RNN(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size + self.num_actions, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, 1)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic models are only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(torch.cat([rnn_output, inputs["taken_actions"]], dim=1)), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +critic = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-sequential] + +# [start-rnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class RNN(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size + self.num_actions, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, 1) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic models are only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1)) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + + return self.fc3(x), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +critic = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-functional] + +# ============================================================================= + +# [start-gru-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class GRU(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size + self.num_actions, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, 1)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic models are only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(torch.cat([rnn_output, inputs["taken_actions"]], dim=1)), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +critic = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-sequential] + +# [start-gru-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class GRU(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size + self.num_actions, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, 1) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # critic models are only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1)) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + + return self.fc3(x), {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +critic = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-functional] + +# ============================================================================= + +# [start-lstm-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class LSTM(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size + self.num_actions, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, 1)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # critic models are only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(torch.cat([rnn_output, inputs["taken_actions"]], dim=1)), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +critic = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-sequential] + +# [start-lstm-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, DeterministicMixin + + +# define the model +class LSTM(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size + self.num_actions, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, 1) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # critic models are only used during training + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + sequence_index = 1 if role == "target_critic" else 0 # target networks act on the next state of the environment + hidden_states = hidden_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,sequence_index,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(torch.cat([rnn_output, inputs["taken_actions"]], dim=1)) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + + return self.fc3(x), {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +critic = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=False, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-functional] diff --git a/docs/source/snippets/gaussian_model.py b/docs/source/snippets/gaussian_model.py index eadf22dd..02d95fc3 100644 --- a/docs/source/snippets/gaussian_model.py +++ b/docs/source/snippets/gaussian_model.py @@ -186,3 +186,589 @@ def compute(self, inputs, role): max_log_std=2, reduction="sum") # [end-cnn-functional] + +# ============================================================================= + +# [start-rnn-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class RNN(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum", + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-sequential] + +# [start-rnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class RNN(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + + return torch.tanh(x), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum", + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-functional] + +# ============================================================================= + +# [start-gru-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class GRU(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum", + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-sequential] + +# [start-gru-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class GRU(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + + return torch.tanh(x), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum", + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-functional] + +# ============================================================================= + +# [start-lstm-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class LSTM(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum", + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-sequential] + +# [start-lstm-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, GaussianMixin + + +# define the model +class LSTM(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, reduction="sum", + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std, reduction) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + + return torch.tanh(x), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + reduction="sum", + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-functional] diff --git a/docs/source/snippets/multivariate_gaussian_model.py b/docs/source/snippets/multivariate_gaussian_model.py index 3568cc0b..e39d95c0 100644 --- a/docs/source/snippets/multivariate_gaussian_model.py +++ b/docs/source/snippets/multivariate_gaussian_model.py @@ -32,8 +32,7 @@ def compute(self, inputs, role): clip_actions=True, clip_log_std=True, min_log_std=-20, - max_log_std=2, - reduction="sum") + max_log_std=2) # [end-mlp-sequential] # [start-mlp-functional] @@ -123,8 +122,7 @@ def compute(self, inputs, role): clip_actions=True, clip_log_std=True, min_log_std=-20, - max_log_std=2, - reduction="sum") + max_log_std=2) # [end-cnn-sequential] # [start-cnn-functional] @@ -182,6 +180,585 @@ def compute(self, inputs, role): clip_actions=True, clip_log_std=True, min_log_std=-20, - max_log_std=2, - reduction="sum") + max_log_std=2) # [end-cnn-functional] + +# ============================================================================= + +# [start-rnn-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class RNN(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-sequential] + +# [start-rnn-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class RNN(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.rnn = nn.RNN(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.rnn(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.rnn(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + + return torch.tanh(x), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = RNN(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-rnn-functional] + +# ============================================================================= + +# [start-gru-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class GRU(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-sequential] + +# [start-gru-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class GRU(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hout + self.sequence_length = sequence_length + + self.gru = nn.GRU(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size)]}} # hidden states (D ∗ num_layers, N, Hout) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states = inputs["rnn"][0] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + # get the hidden states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, hidden_states = self.gru(rnn_input[:,i0:i1,:], hidden_states) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, hidden_states = self.gru(rnn_input, hidden_states) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + + return torch.tanh(x), self.log_std_parameter, {"rnn": [hidden_states]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = GRU(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-gru-functional] + +# ============================================================================= + +# [start-lstm-sequential] +import torch +import torch.nn as nn + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class LSTM(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.net = nn.Sequential(nn.Linear(self.hidden_size, 64), + nn.ReLU(), + nn.Linear(64, 32), + nn.ReLU(), + nn.Linear(32, self.num_actions), + nn.Tanh()) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + return self.net(rnn_output), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-sequential] + +# [start-lstm-functional] +import torch +import torch.nn as nn +import torch.nn.functional as F + +from skrl.models.torch import Model, MultivariateGaussianMixin + + +# define the model +class LSTM(MultivariateGaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2, + num_envs=1, num_layers=1, hidden_size=64, sequence_length=10): + Model.__init__(self, observation_space, action_space, device) + MultivariateGaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.num_envs = num_envs + self.num_layers = num_layers + self.hidden_size = hidden_size # Hcell (Hout is Hcell because proj_size = 0) + self.sequence_length = sequence_length + + self.lstm = nn.LSTM(input_size=self.num_observations, + hidden_size=self.hidden_size, + num_layers=self.num_layers, + batch_first=True) # batch_first -> (batch, sequence, features) + + self.fc1 = nn.Linear(self.hidden_size, 64) + self.fc2 = nn.Linear(64, 32) + self.fc3 = nn.Linear(32, self.num_actions) + + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def get_specification(self): + # batch size (N) is the number of envs during rollout + return {"rnn": {"sequence_length": self.sequence_length, + "sizes": [(self.num_layers, self.num_envs, self.hidden_size), # hidden states (D ∗ num_layers, N, Hout) + (self.num_layers, self.num_envs, self.hidden_size)]}} # cell states (D ∗ num_layers, N, Hcell) + + def compute(self, inputs, role): + states = inputs["states"] + terminated = inputs.get("terminated", None) + hidden_states, cell_states = inputs["rnn"][0], inputs["rnn"][1] + + # training + if self.training: + rnn_input = states.view(-1, self.sequence_length, states.shape[-1]) # (N, L, Hin): N=batch_size, L=sequence_length + hidden_states = hidden_states.view(self.num_layers, -1, self.sequence_length, hidden_states.shape[-1]) # (D * num_layers, N, L, Hout) + cell_states = cell_states.view(self.num_layers, -1, self.sequence_length, cell_states.shape[-1]) # (D * num_layers, N, L, Hcell) + # get the hidden/cell states corresponding to the initial sequence + hidden_states = hidden_states[:,:,0,:].contiguous() # (D * num_layers, N, Hout) + cell_states = cell_states[:,:,0,:].contiguous() # (D * num_layers, N, Hcell) + + # reset the RNN state in the middle of a sequence + if terminated is not None and torch.any(terminated): + rnn_outputs = [] + terminated = terminated.view(-1, self.sequence_length) + indexes = [0] + (terminated[:,:-1].any(dim=0).nonzero(as_tuple=True)[0] + 1).tolist() + [self.sequence_length] + + for i in range(len(indexes) - 1): + i0, i1 = indexes[i], indexes[i + 1] + rnn_output, (hidden_states, cell_states) = self.lstm(rnn_input[:,i0:i1,:], (hidden_states, cell_states)) + hidden_states[:, (terminated[:,i1-1]), :] = 0 + cell_states[:, (terminated[:,i1-1]), :] = 0 + rnn_outputs.append(rnn_output) + + rnn_states = (hidden_states, cell_states) + rnn_output = torch.cat(rnn_outputs, dim=1) + # no need to reset the RNN state in the sequence + else: + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + # rollout + else: + rnn_input = states.view(-1, 1, states.shape[-1]) # (N, L, Hin): N=num_envs, L=1 + rnn_output, rnn_states = self.lstm(rnn_input, (hidden_states, cell_states)) + + # flatten the RNN output + rnn_output = torch.flatten(rnn_output, start_dim=0, end_dim=1) # (N, L, D ∗ Hout) -> (N * L, D ∗ Hout) + + x = self.fc1(rnn_output) + x = F.relu(x) + x = self.fc2(x) + x = F.relu(x) + x = self.fc3(x) + + return torch.tanh(x), self.log_std_parameter, {"rnn": [rnn_states[0], rnn_states[1]]} + + +# instantiate the model (assumes there is a wrapped environment: env) +policy = LSTM(observation_space=env.observation_space, + action_space=env.action_space, + device=env.device, + clip_actions=True, + clip_log_std=True, + min_log_std=-20, + max_log_std=2, + num_envs=env.num_envs, + num_layers=1, + hidden_size=64, + sequence_length=10) +# [end-lstm-functional] From 297de8e8e612797155e643acac09d5f151a700ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 19:24:53 +0100 Subject: [PATCH 111/157] Add recurrent neural networks to docs --- .../modules/skrl.models.categorical.rst | 172 ++++++++++++++++++ .../modules/skrl.models.deterministic.rst | 172 ++++++++++++++++++ docs/source/modules/skrl.models.gaussian.rst | 172 ++++++++++++++++++ .../skrl.models.multivariate_gaussian.rst | 172 ++++++++++++++++++ 4 files changed, 688 insertions(+) diff --git a/docs/source/modules/skrl.models.categorical.rst b/docs/source/modules/skrl.models.categorical.rst index a54aab6b..f9fbad84 100644 --- a/docs/source/modules/skrl.models.categorical.rst +++ b/docs/source/modules/skrl.models.categorical.rst @@ -40,6 +40,9 @@ Basic usage * Multi-Layer Perceptron (**MLP**) * Convolutional Neural Network (**CNN**) +* Recurrent Neural Network (**RNN**) +* Gated Recurrent Unit RNN (**GRU**) +* Long Short-Term Memory RNN (**LSTM**) .. tabs:: @@ -99,6 +102,175 @@ Basic usage :start-after: [start-cnn-functional] :end-before: [end-cnn-functional] + .. tab:: RNN + + .. image:: ../_static/imgs/model_categorical_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-rnn-sequential] + :end-before: [end-rnn-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-rnn-functional] + :end-before: [end-rnn-functional] + + .. tab:: GRU + + .. image:: ../_static/imgs/model_categorical_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-gru-sequential] + :end-before: [end-gru-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-gru-functional] + :end-before: [end-gru-functional] + + .. tab:: LSTM + + .. image:: ../_static/imgs/model_categorical_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{cell} ={} & \text{hidden_size} \\ + H_{out} ={} & \text{proj_size if } \text{proj_size}>0 \text{ otherwise hidden_size} \\ + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden/cell states + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden/cell states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden/cell states + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-lstm-sequential] + :end-before: [end-lstm-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/categorical_model.py + :language: python + :linenos: + :start-after: [start-lstm-functional] + :end-before: [end-lstm-functional] + API --- diff --git a/docs/source/modules/skrl.models.deterministic.rst b/docs/source/modules/skrl.models.deterministic.rst index de47a75e..e4abb854 100644 --- a/docs/source/modules/skrl.models.deterministic.rst +++ b/docs/source/modules/skrl.models.deterministic.rst @@ -40,6 +40,9 @@ Basic usage * Multi-Layer Perceptron (**MLP**) * Convolutional Neural Network (**CNN**) +* Recurrent Neural Network (**RNN**) +* Gated Recurrent Unit RNN (**GRU**) +* Long Short-Term Memory RNN (**LSTM**) .. tabs:: @@ -99,6 +102,175 @@ Basic usage :start-after: [start-cnn-functional] :end-before: [end-cnn-functional] + .. tab:: RNN + + .. image:: ../_static/imgs/model_deterministic_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-rnn-sequential] + :end-before: [end-rnn-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-rnn-functional] + :end-before: [end-rnn-functional] + + .. tab:: GRU + + .. image:: ../_static/imgs/model_deterministic_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-gru-sequential] + :end-before: [end-gru-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-gru-functional] + :end-before: [end-gru-functional] + + .. tab:: LSTM + + .. image:: ../_static/imgs/model_deterministic_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{cell} ={} & \text{hidden_size} \\ + H_{out} ={} & \text{proj_size if } \text{proj_size}>0 \text{ otherwise hidden_size} \\ + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden/cell states + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden/cell states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden/cell states + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-lstm-sequential] + :end-before: [end-lstm-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/deterministic_model.py + :language: python + :linenos: + :start-after: [start-lstm-functional] + :end-before: [end-lstm-functional] + API --- diff --git a/docs/source/modules/skrl.models.gaussian.rst b/docs/source/modules/skrl.models.gaussian.rst index a9ac574c..36f78e83 100644 --- a/docs/source/modules/skrl.models.gaussian.rst +++ b/docs/source/modules/skrl.models.gaussian.rst @@ -42,6 +42,9 @@ Basic usage * Multi-Layer Perceptron (**MLP**) * Convolutional Neural Network (**CNN**) +* Recurrent Neural Network (**RNN**) +* Gated Recurrent Unit RNN (**GRU**) +* Long Short-Term Memory RNN (**LSTM**) .. tabs:: @@ -101,6 +104,175 @@ Basic usage :start-after: [start-cnn-functional] :end-before: [end-cnn-functional] + .. tab:: RNN + + .. image:: ../_static/imgs/model_gaussian_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-rnn-sequential] + :end-before: [end-rnn-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-rnn-functional] + :end-before: [end-rnn-functional] + + .. tab:: GRU + + .. image:: ../_static/imgs/model_gaussian_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-gru-sequential] + :end-before: [end-gru-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-gru-functional] + :end-before: [end-gru-functional] + + .. tab:: LSTM + + .. image:: ../_static/imgs/model_gaussian_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{cell} ={} & \text{hidden_size} \\ + H_{out} ={} & \text{proj_size if } \text{proj_size}>0 \text{ otherwise hidden_size} \\ + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden/cell states + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden/cell states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden/cell states + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-lstm-sequential] + :end-before: [end-lstm-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/gaussian_model.py + :language: python + :linenos: + :start-after: [start-lstm-functional] + :end-before: [end-lstm-functional] + API --- diff --git a/docs/source/modules/skrl.models.multivariate_gaussian.rst b/docs/source/modules/skrl.models.multivariate_gaussian.rst index 72bfc9c4..d499af25 100644 --- a/docs/source/modules/skrl.models.multivariate_gaussian.rst +++ b/docs/source/modules/skrl.models.multivariate_gaussian.rst @@ -42,6 +42,9 @@ Basic usage * Multi-Layer Perceptron (**MLP**) * Convolutional Neural Network (**CNN**) +* Recurrent Neural Network (**RNN**) +* Gated Recurrent Unit RNN (**GRU**) +* Long Short-Term Memory RNN (**LSTM**) .. tabs:: @@ -101,6 +104,175 @@ Basic usage :start-after: [start-cnn-functional] :end-before: [end-cnn-functional] + .. tab:: RNN + + .. image:: ../_static/imgs/model_gaussian_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-rnn-sequential] + :end-before: [end-rnn-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-rnn-functional] + :end-before: [end-rnn-functional] + + .. tab:: GRU + + .. image:: ../_static/imgs/model_gaussian_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{out} ={} & \text{hidden_size} + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden state + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden state + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-gru-sequential] + :end-before: [end-gru-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-gru-functional] + :end-before: [end-gru-functional] + + .. tab:: LSTM + + .. image:: ../_static/imgs/model_gaussian_rnn.svg + :width: 90% + :align: center + + where: + + .. math:: + \begin{aligned} + N ={} & \text{batch size} \\ + L ={} & \text{sequence length} \\ + D ={} & 2 \text{ if bidirectional=True otherwise } 1 \\ + H_{in} ={} & \text{input_size} \\ + H_{cell} ={} & \text{hidden_size} \\ + H_{out} ={} & \text{proj_size if } \text{proj_size}>0 \text{ otherwise hidden_size} \\ + \end{aligned} + + .. raw:: html + +
+ + The following points are relevant in the definition of recurrent models: + + * The ``.get_specification()`` method must be overwritten to return, under a dictionary key ``"rnn"``, a sub-dictionary that includes the sequence length (under key ``"sequence_length"``) as a number and a list of the dimensions (under key ``"sizes"``) of each initial hidden/cell states + + * The ``.compute()`` method's ``inputs`` parameter will have, at least, the following items in the dictionary: + + * ``"states"``: state of the environment used to make the decision + * ``"taken_actions"``: actions taken by the policy for the given states, if applicable + * ``"terminated"``: episode termination status for sampled environment transitions. This key is only defined during the training process + * ``"rnn"``: list of initial hidden/cell states ordered according to the model specification + + * The ``.compute()`` method must inlcude, under the ``"rnn"`` key of the returned dictionary, a list of each final hidden/cell states + + .. raw:: html + +
+ + .. tabs:: + + .. group-tab:: nn.Sequential + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-lstm-sequential] + :end-before: [end-lstm-sequential] + + .. group-tab:: nn.functional + + .. literalinclude:: ../snippets/multivariate_gaussian_model.py + :language: python + :linenos: + :start-after: [start-lstm-functional] + :end-before: [end-lstm-functional] + API --- From 255a5d270530236346044860a9fe8b822c2ac79f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 19:28:59 +0100 Subject: [PATCH 112/157] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3cabe39..d93d4867 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). - Allow clipping the gradient norm for DDPG, TD3 and SAC agents - Initialize model biases - Add RNN (RNN, LSTM, GRU and any other variant) support for A2C, DDPG, PPO, SAC, TD3 and TRPO agents +- Allow disabling training/evaluation progressbar ### Changed - Forward model inputs as a Python dictionary [**breaking change**] From b555e948fbcce8c4563c35422cf2341a78509a91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 19:31:24 +0100 Subject: [PATCH 113/157] Update Omniverse Isaac Gym example's hyperparameters --- docs/source/examples/omniisaacgym/ppo_allegro_hand.py | 2 +- docs/source/examples/omniisaacgym/ppo_cartpole.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py index 6377a604..74e50271 100644 --- a/docs/source/examples/omniisaacgym/ppo_allegro_hand.py +++ b/docs/source/examples/omniisaacgym/ppo_allegro_hand.py @@ -78,7 +78,7 @@ def compute(self, inputs, role): cfg_ppo["mini_batches"] = 4 # 16 * 8192 / 32768 cfg_ppo["discount_factor"] = 0.99 cfg_ppo["lambda"] = 0.95 -cfg_ppo["learning_rate"] = 5e-3 +cfg_ppo["learning_rate"] = 5e-4 cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.02} cfg_ppo["random_timesteps"] = 0 diff --git a/docs/source/examples/omniisaacgym/ppo_cartpole.py b/docs/source/examples/omniisaacgym/ppo_cartpole.py index bb544307..9a5202ea 100644 --- a/docs/source/examples/omniisaacgym/ppo_cartpole.py +++ b/docs/source/examples/omniisaacgym/ppo_cartpole.py @@ -14,7 +14,7 @@ # set the seed for reproducibility -set_seed(42) +set_seed(40) # Define the shared model (stochastic and deterministic models) for the agent using mixins. From 6a45d0b8f1d0046a768c443df6158fbe067b0011 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 20:16:29 +0100 Subject: [PATCH 114/157] Update .gitignore --- .gitignore | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.gitignore b/.gitignore index 65099c3b..6b0ca87a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,11 +2,18 @@ # Created by https://www.toptal.com/developers/gitignore/api/python,c,c++,visualstudiocode,cmake # Edit at https://www.toptal.com/developers/gitignore?templates=python,c,c++,visualstudiocode,cmake +### Other files ### +*.xml +*.mp4 +*.zip +*_old.* + ### Library experiments ### *.pt *.npz *.npy *.csv +*.onnx events.out.tfevents.* runs From 360667904269e81a82a191565cf6421d57698115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 21:21:17 +0100 Subject: [PATCH 115/157] Set default device if not specified --- skrl/agents/torch/a2c/a2c.py | 5 +++-- skrl/agents/torch/amp/amp.py | 5 +++-- skrl/agents/torch/base.py | 7 ++++--- skrl/agents/torch/cem/cem.py | 7 ++++--- skrl/agents/torch/ddpg/ddpg.py | 5 +++-- skrl/agents/torch/dqn/ddqn.py | 5 +++-- skrl/agents/torch/dqn/dqn.py | 5 +++-- skrl/agents/torch/ppo/ppo.py | 5 +++-- skrl/agents/torch/q_learning/q_learning.py | 5 +++-- skrl/agents/torch/sac/sac.py | 5 +++-- skrl/agents/torch/sarsa/sarsa.py | 5 +++-- skrl/agents/torch/td3/td3.py | 5 +++-- skrl/agents/torch/trpo/trpo.py | 5 +++-- skrl/models/torch/base.py | 7 ++++--- .../resources/schedulers/torch/kl_adaptive.py | 14 ++++++------- skrl/utils/model_instantiators.py | 20 +++++++++++-------- 16 files changed, 64 insertions(+), 46 deletions(-) diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 68b76bc6..3dd069aa 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -60,7 +60,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Advantage Actor Critic (A2C) @@ -76,7 +76,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 1e38beb7..7c3d1fe5 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -77,7 +77,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None, amp_observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, motion_dataset: Optional[Memory] = None, @@ -101,7 +101,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 46f470bf..74c7e4aa 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -21,7 +21,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Base class that represent a RL agent @@ -35,7 +35,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict @@ -43,8 +44,8 @@ def __init__(self, self.models = models self.observation_space = observation_space self.action_space = action_space - self.device = torch.device(device) self.cfg = cfg if cfg is not None else {} + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device is None else torch.device(device) if type(memory) is list: self.memory = memory[0] diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index e646bfa5..5bc906b1 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -50,7 +50,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Cross-Entropy Method (CEM) @@ -66,7 +66,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict @@ -282,7 +283,7 @@ def _update(self, timestep: int, timesteps: int) -> None: # compute policy loss policy_loss = F.cross_entropy(scores, elite_actions.view(-1)) - # optimize policy + # optimization step self.optimizer.zero_grad() policy_loss.backward() self.optimizer.step() diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index 5b0bbf65..a5a73e4c 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -62,7 +62,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Deep Deterministic Policy Gradient (DDPG) @@ -78,7 +78,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 5b22384d..54c37666 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -61,7 +61,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Double Deep Q-Network (DDQN) @@ -77,7 +77,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index fce5c61b..8cfaee3c 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -61,7 +61,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Deep Q-Network (DQN) @@ -77,7 +77,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 20096c41..5d7137b8 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -67,7 +67,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Proximal Policy Optimization (PPO) @@ -83,7 +83,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index d3622310..23a5d1c7 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -41,7 +41,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Q-learning @@ -57,7 +57,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index b0e60cd0..5d316b89 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -62,7 +62,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Soft Actor-Critic (SAC) @@ -78,7 +78,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index fedbd623..4c6e4359 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -41,7 +41,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """State Action Reward State Action (SARSA) @@ -57,7 +57,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 7411a809..44e84f4a 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -67,7 +67,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Twin Delayed DDPG (TD3) @@ -83,7 +83,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 5f224610..870d72d1 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -67,7 +67,7 @@ def __init__(self, memory: Optional[Union[Memory, Tuple[Memory]]] = None, observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, cfg: Optional[dict] = None) -> None: """Trust Region Policy Optimization (TRPO) @@ -83,7 +83,8 @@ def __init__(self, :type observation_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional :param action_space: Action space or shape (default: None) :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Computing device (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param cfg: Configuration dictionary :type cfg: dict diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index b7127764..d18846bf 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -14,7 +14,7 @@ class Model(torch.nn.Module): def __init__(self, observation_space: Union[int, Sequence[int], gym.Space, gymnasium.Space], action_space: Union[int, Sequence[int], gym.Space, gymnasium.Space], - device: Union[str, torch.device] = "cuda:0") -> None: + device: Optional[Union[str, torch.device]] = None) -> None: """Base class representing a function approximator The following properties are defined: @@ -31,7 +31,8 @@ def __init__(self, :param action_space: Action space or shape. The ``num_actions`` property will contain the size of that space :type action_space: int, sequence of int, gym.Space, gymnasium.Space - :param device: Device on which a torch tensor is or will be allocated (default: ``"cuda:0"``) + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional Custom models should override the ``act`` method:: @@ -53,7 +54,7 @@ def act(self, inputs, role=""): """ super(Model, self).__init__() - self.device = torch.device(device) + self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if device is None else torch.device(device) self.observation_space = observation_space self.action_space = action_space diff --git a/skrl/resources/schedulers/torch/kl_adaptive.py b/skrl/resources/schedulers/torch/kl_adaptive.py index efb14a28..4e942b09 100644 --- a/skrl/resources/schedulers/torch/kl_adaptive.py +++ b/skrl/resources/schedulers/torch/kl_adaptive.py @@ -36,19 +36,19 @@ def __init__(self, :param optimizer: Wrapped optimizer :type optimizer: torch.optim.Optimizer - :param kl_threshold: Threshold for KL divergence (default: 0.008) + :param kl_threshold: Threshold for KL divergence (default: ``0.008``) :type kl_threshold: float, optional - :param min_lr: Lower bound for learning rate (default: 1e-6) + :param min_lr: Lower bound for learning rate (default: ``1e-6``) :type min_lr: float, optional - :param max_lr: Upper bound for learning rate (default: 1e-2) + :param max_lr: Upper bound for learning rate (default: ``1e-2``) :type max_lr: float, optional - :param kl_factor: The number used to modify the KL divergence threshold (default: 2) + :param kl_factor: The number used to modify the KL divergence threshold (default: ``2``) :type kl_factor: float, optional - :param lr_factor: The number used to modify the learning rate (default: 1.5) + :param lr_factor: The number used to modify the learning rate (default: ``1.5``) :type lr_factor: float, optional - :param last_epoch: The index of last epoch (default: -1) + :param last_epoch: The index of last epoch (default: ``-1``) :type last_epoch: int, optional - :param verbose: Verbose mode (default: False) + :param verbose: Verbose mode (default: ``False``) :type verbose: bool, optional """ super().__init__(optimizer, last_epoch, verbose) diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index f8fd781a..5734d5c6 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -133,7 +133,7 @@ def _generate_sequential(model: Model, def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, clip_actions: bool = False, clip_log_std: bool = True, min_log_std: float = -20, @@ -152,7 +152,8 @@ def gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Device on which the model will be trained (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param clip_actions: Flag to indicate whether the actions should be clipped (default: False) :type clip_actions: bool, optional @@ -225,7 +226,7 @@ def compute(self, inputs, role=""): def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, clip_actions: bool = False, clip_log_std: bool = True, min_log_std: float = -20, @@ -244,7 +245,8 @@ def multivariate_gaussian_model(observation_space: Optional[Union[int, Tuple[int :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Device on which the model will be trained (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param clip_actions: Flag to indicate whether the actions should be clipped (default: False) :type clip_actions: bool, optional @@ -317,7 +319,7 @@ def compute(self, inputs, role=""): def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, clip_actions: bool = False, input_shape: Shape = Shape.STATES, hiddens: list = [256, 256], @@ -333,7 +335,8 @@ def deterministic_model(observation_space: Optional[Union[int, Tuple[int], gym.S :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param clip_actions: Flag to indicate whether the actions should be clipped to the action space (default: False) :type clip_actions: bool, optional @@ -395,7 +398,7 @@ def compute(self, inputs, role=""): def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, action_space: Optional[Union[int, Tuple[int], gym.Space, gymnasium.Space]] = None, - device: Union[str, torch.device] = "cuda:0", + device: Optional[Union[str, torch.device]] = None, unnormalized_log_prob: bool = False, input_shape: Shape = Shape.STATES, hiddens: list = [256, 256], @@ -410,7 +413,8 @@ def categorical_model(observation_space: Optional[Union[int, Tuple[int], gym.Spa :param action_space: Action space or shape (default: None). If it is not None, the num_actions property will contain the size of that space :type action_space: int, tuple or list of integers, gym.Space, gymnasium.Space or None, optional - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") + :param device: Device on which a torch tensor is or will be allocated (default: ``None``). + If None, the device will be either ``"cuda:0"`` if available or ``"cpu"`` :type device: str or torch.device, optional :param unnormalized_log_prob: Flag to indicate how to be interpreted the model's output (default: True). If True, the model's output is interpreted as unnormalized log probabilities From 8176509b4569b585a1eb8e79e06abd06f68b6bcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Tue, 27 Dec 2022 21:42:30 +0100 Subject: [PATCH 116/157] Improve agent's algorithms in docs --- docs/source/modules/skrl.agents.cem.rst | 20 +++++++++++++++----- docs/source/modules/skrl.agents.ddqn.rst | 11 +++++++---- docs/source/modules/skrl.agents.dqn.rst | 11 +++++++---- 3 files changed, 29 insertions(+), 13 deletions(-) diff --git a/docs/source/modules/skrl.agents.cem.rst b/docs/source/modules/skrl.agents.cem.rst index 8d79dc5f..948ea1c5 100644 --- a/docs/source/modules/skrl.agents.cem.rst +++ b/docs/source/modules/skrl.agents.cem.rst @@ -4,6 +4,11 @@ Cross-Entropy Method (CEM) Algorithm implementation ^^^^^^^^^^^^^^^^^^^^^^^^ +| Main notation/symbols: +| - policy function approximator (:math:`\pi_\theta`) +| - states (:math:`s`), actions (:math:`a`), rewards (:math:`r`), next states (:math:`s'`), dones (:math:`d`) +| - loss (:math:`L`) + **Decision making** (:literal:`act(...)`) | :math:`a \leftarrow \pi_\theta(s)` @@ -13,17 +18,22 @@ Algorithm implementation | :green:`# sample all memory` | :math:`s, a, r, s', d \leftarrow` states, actions, rewards, next_states, dones | :green:`# compute discounted return threshold` -| :math:`[G] \leftarrow \sum_{t=0}^{E-1} \gamma^{t} r_t` for each episode -| :math:`G_{_{bound}} \leftarrow q_{th_{percentile}}([G])` +| :math:`[G] \leftarrow \sum_{t=0}^{E-1}` :guilabel:`discount_factor`:math:`^{t} \, r_t` for each episode +| :math:`G_{_{bound}} \leftarrow q_{th_{quantile}}([G])` at the given :guilabel:`percentile` | :green:`# get elite states and actions` | :math:`s_{_{elite}} \leftarrow s[G \geq G_{_{bound}}]` | :math:`a_{_{elite}} \leftarrow a[G \geq G_{_{bound}}]` | :green:`# compute scores for the elite states` | :math:`scores \leftarrow \theta(s_{_{elite}})` | :green:`# compute policy loss` -| :math:`{Loss}_{policy} \leftarrow -\sum_{i=1}^{N} a_{_{elite}} \log(scores)` -| :green:`# optimize policy` -| :math:`\nabla_{\theta} {Loss}_{policy}` +| :math:`L_{\pi_\theta} \leftarrow -\sum_{i=1}^{N} a_{_{elite}} \log(scores)` +| :green:`# optimization step` +| reset :math:`\text{optimizer}_\theta` +| :math:`\nabla_{\theta} L_{\pi_\theta}` +| step :math:`\text{optimizer}_\theta` +| :green:`# update learning rate` +| **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** +| step :math:`\text{scheduler}_\theta (\text{optimizer}_\theta)` Configuration and hyperparameters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/modules/skrl.agents.ddqn.rst b/docs/source/modules/skrl.agents.ddqn.rst index 36017091..77eb36c4 100644 --- a/docs/source/modules/skrl.agents.ddqn.rst +++ b/docs/source/modules/skrl.agents.ddqn.rst @@ -16,13 +16,13 @@ Algorithm implementation **Learning algorithm** (:literal:`_update(...)`) | :green:`# sample a batch from memory` -| :math:`s, a, r, s', d \leftarrow` states, actions, rewards, next_states, dones +| [:math:`s, a, r, s', d`] :math:`\leftarrow` states, actions, rewards, next_states, dones of size :guilabel:`batch_size` | :green:`# gradient steps` -| **FOR** each gradient step **DO** +| **FOR** each gradient step up to :guilabel:`gradient_steps` **DO** | :green:`# compute target values` | :math:`Q' \leftarrow Q_{\phi_{target}}(s')` | :math:`Q_{_{target}} \leftarrow Q'[\underset{a}{\arg\max} \; Q_\phi(s')] \qquad` :gray:`# the only difference with DQN` -| :math:`y \leftarrow r + \gamma \; \neg d \; Q_{_{target}}` +| :math:`y \leftarrow r \;+` :guilabel:`discount_factor` :math:`\neg d \; Q_{_{target}}` | :green:`# compute Q-network loss` | :math:`Q \leftarrow Q_\phi(s)[a]` | :math:`{Loss}_{Q_\phi} \leftarrow \frac{1}{N} \sum_{i=1}^N (Q - y)^2` @@ -30,7 +30,10 @@ Algorithm implementation | :math:`\nabla_{\phi} {Loss}_{Q_\phi}` | :green:`# update target network` | **IF** it's time to update target network **THEN** -| :math:`\phi_{target} \leftarrow \tau \; \phi + (1 - \tau) \phi_{target}` +| :math:`\phi_{target} \leftarrow` :guilabel:`polyak` :math:`\phi + (1 \;-` :guilabel:`polyak` :math:`) \phi_{target}` +| :green:`# update learning rate` +| **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** +| step :math:`\text{scheduler}_\phi (\text{optimizer}_\phi)` Configuration and hyperparameters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/source/modules/skrl.agents.dqn.rst b/docs/source/modules/skrl.agents.dqn.rst index 43d60647..991733c5 100644 --- a/docs/source/modules/skrl.agents.dqn.rst +++ b/docs/source/modules/skrl.agents.dqn.rst @@ -16,13 +16,13 @@ Algorithm implementation **Learning algorithm** (:literal:`_update(...)`) | :green:`# sample a batch from memory` -| :math:`s, a, r, s', d \leftarrow` states, actions, rewards, next_states, dones +| [:math:`s, a, r, s', d`] :math:`\leftarrow` states, actions, rewards, next_states, dones of size :guilabel:`batch_size` | :green:`# gradient steps` -| **FOR** each gradient step **DO** +| **FOR** each gradient step up to :guilabel:`gradient_steps` **DO** | :green:`# compute target values` | :math:`Q' \leftarrow Q_{\phi_{target}}(s')` | :math:`Q_{_{target}} \leftarrow \underset{a}{\max} \; Q' \qquad` :gray:`# the only difference with DDQN` -| :math:`y \leftarrow r + \gamma \; \neg d \; Q_{_{target}}` +| :math:`y \leftarrow r \;+` :guilabel:`discount_factor` :math:`\neg d \; Q_{_{target}}` | :green:`# compute Q-network loss` | :math:`Q \leftarrow Q_\phi(s)[a]` | :math:`{Loss}_{Q_\phi} \leftarrow \frac{1}{N} \sum_{i=1}^N (Q - y)^2` @@ -30,7 +30,10 @@ Algorithm implementation | :math:`\nabla_{\phi} {Loss}_{Q_\phi}` | :green:`# update target network` | **IF** it's time to update target network **THEN** -| :math:`\phi_{target} \leftarrow \tau \; \phi + (1 - \tau) \phi_{target}` +| :math:`\phi_{target} \leftarrow` :guilabel:`polyak` :math:`\phi + (1 \;-` :guilabel:`polyak` :math:`) \phi_{target}` +| :green:`# update learning rate` +| **IF** there is a :guilabel:`learning_rate_scheduler` **THEN** +| step :math:`\text{scheduler}_\phi (\text{optimizer}_\phi)` Configuration and hyperparameters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From d7baa522f2ff46592a78bbbfbd4b805588c851a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Wed, 28 Dec 2022 12:42:06 +0100 Subject: [PATCH 117/157] Replace relative imports with absolute imports --- skrl/agents/torch/__init__.py | 2 +- skrl/agents/torch/a2c/__init__.py | 2 +- skrl/agents/torch/a2c/a2c.py | 8 ++++---- skrl/agents/torch/amp/__init__.py | 2 +- skrl/agents/torch/amp/amp.py | 6 +++--- skrl/agents/torch/base.py | 4 ++-- skrl/agents/torch/cem/__init__.py | 2 +- skrl/agents/torch/cem/cem.py | 6 +++--- skrl/agents/torch/ddpg/__init__.py | 2 +- skrl/agents/torch/ddpg/ddpg.py | 6 +++--- skrl/agents/torch/dqn/__init__.py | 4 ++-- skrl/agents/torch/dqn/ddqn.py | 6 +++--- skrl/agents/torch/dqn/dqn.py | 6 +++--- skrl/agents/torch/ppo/__init__.py | 2 +- skrl/agents/torch/ppo/ppo.py | 8 ++++---- skrl/agents/torch/q_learning/__init__.py | 2 +- skrl/agents/torch/q_learning/q_learning.py | 6 +++--- skrl/agents/torch/sac/__init__.py | 2 +- skrl/agents/torch/sac/sac.py | 6 +++--- skrl/agents/torch/sarsa/__init__.py | 2 +- skrl/agents/torch/sarsa/sarsa.py | 6 +++--- skrl/agents/torch/td3/__init__.py | 2 +- skrl/agents/torch/td3/td3.py | 6 +++--- skrl/agents/torch/trpo/__init__.py | 2 +- skrl/agents/torch/trpo/trpo.py | 6 +++--- skrl/envs/torch/__init__.py | 12 ++++++------ skrl/memories/torch/__init__.py | 6 +++--- skrl/memories/torch/prioritized.py | 2 +- skrl/memories/torch/random.py | 2 +- skrl/models/torch/__init__.py | 12 ++++++------ skrl/models/torch/tabular.py | 2 +- skrl/resources/noises/torch/__init__.py | 6 +++--- skrl/resources/noises/torch/gaussian.py | 2 +- skrl/resources/noises/torch/ornstein_uhlenbeck.py | 2 +- skrl/resources/preprocessors/torch/__init__.py | 2 +- skrl/resources/schedulers/torch/__init__.py | 2 +- skrl/trainers/torch/__init__.py | 10 +++++----- skrl/trainers/torch/base.py | 4 ++-- skrl/trainers/torch/manual.py | 6 +++--- skrl/trainers/torch/parallel.py | 6 +++--- skrl/trainers/torch/sequential.py | 6 +++--- skrl/utils/model_instantiators.py | 10 +++++----- 42 files changed, 99 insertions(+), 99 deletions(-) diff --git a/skrl/agents/torch/__init__.py b/skrl/agents/torch/__init__.py index 8e83aa5e..16fec55f 100644 --- a/skrl/agents/torch/__init__.py +++ b/skrl/agents/torch/__init__.py @@ -1 +1 @@ -from .base import Agent +from skrl.agents.torch.base import Agent diff --git a/skrl/agents/torch/a2c/__init__.py b/skrl/agents/torch/a2c/__init__.py index 42f75ba4..d28e13b5 100644 --- a/skrl/agents/torch/a2c/__init__.py +++ b/skrl/agents/torch/a2c/__init__.py @@ -1 +1 @@ -from .a2c import A2C, A2C_DEFAULT_CONFIG +from skrl.agents.torch.a2c.a2c import A2C, A2C_DEFAULT_CONFIG diff --git a/skrl/agents/torch/a2c/a2c.py b/skrl/agents/torch/a2c/a2c.py index 3dd069aa..ed9fe529 100644 --- a/skrl/agents/torch/a2c/a2c.py +++ b/skrl/agents/torch/a2c/a2c.py @@ -8,11 +8,11 @@ import torch.nn as nn import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model -from ....resources.schedulers.torch import KLAdaptiveRL +from skrl.memories.torch import Memory +from skrl.models.torch import Model +from skrl.resources.schedulers.torch import KLAdaptiveRL -from .. import Agent +from skrl.agents.torch import Agent A2C_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/amp/__init__.py b/skrl/agents/torch/amp/__init__.py index 53617227..d985f553 100644 --- a/skrl/agents/torch/amp/__init__.py +++ b/skrl/agents/torch/amp/__init__.py @@ -1 +1 @@ -from .amp import AMP, AMP_DEFAULT_CONFIG +from skrl.agents.torch.amp.amp import AMP, AMP_DEFAULT_CONFIG diff --git a/skrl/agents/torch/amp/amp.py b/skrl/agents/torch/amp/amp.py index 7c3d1fe5..2c413dff 100644 --- a/skrl/agents/torch/amp/amp.py +++ b/skrl/agents/torch/amp/amp.py @@ -9,10 +9,10 @@ import torch.nn as nn import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent AMP_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/base.py b/skrl/agents/torch/base.py index 74c7e4aa..7d0c4c00 100644 --- a/skrl/agents/torch/base.py +++ b/skrl/agents/torch/base.py @@ -11,8 +11,8 @@ from torch.utils.tensorboard import SummaryWriter from skrl import logger -from ...memories.torch import Memory -from ...models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model class Agent: diff --git a/skrl/agents/torch/cem/__init__.py b/skrl/agents/torch/cem/__init__.py index 29427ff9..89bcc376 100644 --- a/skrl/agents/torch/cem/__init__.py +++ b/skrl/agents/torch/cem/__init__.py @@ -1 +1 @@ -from .cem import CEM, CEM_DEFAULT_CONFIG +from skrl.agents.torch.cem.cem import CEM, CEM_DEFAULT_CONFIG diff --git a/skrl/agents/torch/cem/cem.py b/skrl/agents/torch/cem/cem.py index 5bc906b1..99c524b4 100644 --- a/skrl/agents/torch/cem/cem.py +++ b/skrl/agents/torch/cem/cem.py @@ -6,10 +6,10 @@ import torch import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent CEM_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/ddpg/__init__.py b/skrl/agents/torch/ddpg/__init__.py index da2ee7a0..27338168 100644 --- a/skrl/agents/torch/ddpg/__init__.py +++ b/skrl/agents/torch/ddpg/__init__.py @@ -1 +1 @@ -from .ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.agents.torch.ddpg.ddpg import DDPG, DDPG_DEFAULT_CONFIG diff --git a/skrl/agents/torch/ddpg/ddpg.py b/skrl/agents/torch/ddpg/ddpg.py index a5a73e4c..adf3fed0 100644 --- a/skrl/agents/torch/ddpg/ddpg.py +++ b/skrl/agents/torch/ddpg/ddpg.py @@ -7,10 +7,10 @@ import torch.nn as nn import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent DDPG_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/dqn/__init__.py b/skrl/agents/torch/dqn/__init__.py index eaecb47b..6532813e 100644 --- a/skrl/agents/torch/dqn/__init__.py +++ b/skrl/agents/torch/dqn/__init__.py @@ -1,2 +1,2 @@ -from .dqn import DQN, DQN_DEFAULT_CONFIG -from .ddqn import DDQN, DDQN_DEFAULT_CONFIG +from skrl.agents.torch.dqn.dqn import DQN, DQN_DEFAULT_CONFIG +from skrl.agents.torch.dqn.ddqn import DDQN, DDQN_DEFAULT_CONFIG diff --git a/skrl/agents/torch/dqn/ddqn.py b/skrl/agents/torch/dqn/ddqn.py index 54c37666..90c144ce 100644 --- a/skrl/agents/torch/dqn/ddqn.py +++ b/skrl/agents/torch/dqn/ddqn.py @@ -7,10 +7,10 @@ import torch import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent DDQN_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/dqn/dqn.py b/skrl/agents/torch/dqn/dqn.py index 8cfaee3c..d8c74333 100644 --- a/skrl/agents/torch/dqn/dqn.py +++ b/skrl/agents/torch/dqn/dqn.py @@ -7,10 +7,10 @@ import torch import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent DQN_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/ppo/__init__.py b/skrl/agents/torch/ppo/__init__.py index 04f40be3..a422f115 100644 --- a/skrl/agents/torch/ppo/__init__.py +++ b/skrl/agents/torch/ppo/__init__.py @@ -1 +1 @@ -from .ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.agents.torch.ppo.ppo import PPO, PPO_DEFAULT_CONFIG diff --git a/skrl/agents/torch/ppo/ppo.py b/skrl/agents/torch/ppo/ppo.py index 5d7137b8..aba0feed 100644 --- a/skrl/agents/torch/ppo/ppo.py +++ b/skrl/agents/torch/ppo/ppo.py @@ -8,11 +8,11 @@ import torch.nn as nn import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model -from ....resources.schedulers.torch import KLAdaptiveRL +from skrl.memories.torch import Memory +from skrl.models.torch import Model +from skrl.resources.schedulers.torch import KLAdaptiveRL -from .. import Agent +from skrl.agents.torch import Agent PPO_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/q_learning/__init__.py b/skrl/agents/torch/q_learning/__init__.py index 85ba23ea..a2b43800 100644 --- a/skrl/agents/torch/q_learning/__init__.py +++ b/skrl/agents/torch/q_learning/__init__.py @@ -1 +1 @@ -from .q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG +from skrl.agents.torch.q_learning.q_learning import Q_LEARNING, Q_LEARNING_DEFAULT_CONFIG diff --git a/skrl/agents/torch/q_learning/q_learning.py b/skrl/agents/torch/q_learning/q_learning.py index 23a5d1c7..e6868334 100644 --- a/skrl/agents/torch/q_learning/q_learning.py +++ b/skrl/agents/torch/q_learning/q_learning.py @@ -5,10 +5,10 @@ import torch -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent Q_LEARNING_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/sac/__init__.py b/skrl/agents/torch/sac/__init__.py index 1859419e..5cd0d7cb 100644 --- a/skrl/agents/torch/sac/__init__.py +++ b/skrl/agents/torch/sac/__init__.py @@ -1 +1 @@ -from .sac import SAC, SAC_DEFAULT_CONFIG +from skrl.agents.torch.sac.sac import SAC, SAC_DEFAULT_CONFIG diff --git a/skrl/agents/torch/sac/sac.py b/skrl/agents/torch/sac/sac.py index 5d316b89..5fd955df 100644 --- a/skrl/agents/torch/sac/sac.py +++ b/skrl/agents/torch/sac/sac.py @@ -9,10 +9,10 @@ import torch.nn as nn import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent SAC_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/sarsa/__init__.py b/skrl/agents/torch/sarsa/__init__.py index 56a71e30..bfd794b7 100644 --- a/skrl/agents/torch/sarsa/__init__.py +++ b/skrl/agents/torch/sarsa/__init__.py @@ -1 +1 @@ -from .sarsa import SARSA, SARSA_DEFAULT_CONFIG +from skrl.agents.torch.sarsa.sarsa import SARSA, SARSA_DEFAULT_CONFIG diff --git a/skrl/agents/torch/sarsa/sarsa.py b/skrl/agents/torch/sarsa/sarsa.py index 4c6e4359..6f1336e8 100644 --- a/skrl/agents/torch/sarsa/sarsa.py +++ b/skrl/agents/torch/sarsa/sarsa.py @@ -5,10 +5,10 @@ import torch -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent SARSA_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/td3/__init__.py b/skrl/agents/torch/td3/__init__.py index 7b5409a6..69f07c66 100644 --- a/skrl/agents/torch/td3/__init__.py +++ b/skrl/agents/torch/td3/__init__.py @@ -1 +1 @@ -from .td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.agents.torch.td3.td3 import TD3, TD3_DEFAULT_CONFIG diff --git a/skrl/agents/torch/td3/td3.py b/skrl/agents/torch/td3/td3.py index 44e84f4a..57441eda 100644 --- a/skrl/agents/torch/td3/td3.py +++ b/skrl/agents/torch/td3/td3.py @@ -8,10 +8,10 @@ import torch.nn as nn import torch.nn.functional as F -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent TD3_DEFAULT_CONFIG = { diff --git a/skrl/agents/torch/trpo/__init__.py b/skrl/agents/torch/trpo/__init__.py index 152ee2a0..c4dfd054 100644 --- a/skrl/agents/torch/trpo/__init__.py +++ b/skrl/agents/torch/trpo/__init__.py @@ -1 +1 @@ -from .trpo import TRPO, TRPO_DEFAULT_CONFIG +from skrl.agents.torch.trpo.trpo import TRPO, TRPO_DEFAULT_CONFIG diff --git a/skrl/agents/torch/trpo/trpo.py b/skrl/agents/torch/trpo/trpo.py index 870d72d1..1f90b864 100644 --- a/skrl/agents/torch/trpo/trpo.py +++ b/skrl/agents/torch/trpo/trpo.py @@ -9,10 +9,10 @@ from torch.nn.utils.convert_parameters import parameters_to_vector from torch.nn.utils.convert_parameters import vector_to_parameters -from ....memories.torch import Memory -from ....models.torch import Model +from skrl.memories.torch import Memory +from skrl.models.torch import Model -from .. import Agent +from skrl.agents.torch import Agent TRPO_DEFAULT_CONFIG = { diff --git a/skrl/envs/torch/__init__.py b/skrl/envs/torch/__init__.py index a11c34c9..67e00a5a 100644 --- a/skrl/envs/torch/__init__.py +++ b/skrl/envs/torch/__init__.py @@ -1,7 +1,7 @@ -from .wrappers import wrap_env -from .wrappers import Wrapper +from skrl.envs.torch.wrappers import wrap_env +from skrl.envs.torch.wrappers import Wrapper -from .loaders import load_isaacgym_env_preview2 -from .loaders import load_isaacgym_env_preview3 -from .loaders import load_isaacgym_env_preview4 -from .loaders import load_omniverse_isaacgym_env +from skrl.envs.torch.loaders import load_isaacgym_env_preview2 +from skrl.envs.torch.loaders import load_isaacgym_env_preview3 +from skrl.envs.torch.loaders import load_isaacgym_env_preview4 +from skrl.envs.torch.loaders import load_omniverse_isaacgym_env diff --git a/skrl/memories/torch/__init__.py b/skrl/memories/torch/__init__.py index 184c1a29..ae882b51 100644 --- a/skrl/memories/torch/__init__.py +++ b/skrl/memories/torch/__init__.py @@ -1,4 +1,4 @@ -from .base import Memory +from skrl.memories.torch.base import Memory -from .random import RandomMemory -from .prioritized import PrioritizedMemory +from skrl.memories.torch.random import RandomMemory +from skrl.memories.torch.prioritized import PrioritizedMemory diff --git a/skrl/memories/torch/prioritized.py b/skrl/memories/torch/prioritized.py index 4a78062a..61b518a3 100644 --- a/skrl/memories/torch/prioritized.py +++ b/skrl/memories/torch/prioritized.py @@ -4,7 +4,7 @@ import torch -from .base import Memory +from skrl.memories.torch import Memory class PrioritizedMemory(Memory): diff --git a/skrl/memories/torch/random.py b/skrl/memories/torch/random.py index 9d98b8e6..01817a28 100644 --- a/skrl/memories/torch/random.py +++ b/skrl/memories/torch/random.py @@ -2,7 +2,7 @@ import torch -from .base import Memory +from skrl.memories.torch import Memory class RandomMemory(Memory): diff --git a/skrl/models/torch/__init__.py b/skrl/models/torch/__init__.py index 9a4e879c..158bb6c8 100644 --- a/skrl/models/torch/__init__.py +++ b/skrl/models/torch/__init__.py @@ -1,7 +1,7 @@ -from .base import Model +from skrl.models.torch.base import Model -from .tabular import TabularMixin -from .gaussian import GaussianMixin -from .categorical import CategoricalMixin -from .deterministic import DeterministicMixin -from .multivariate_gaussian import MultivariateGaussianMixin +from skrl.models.torch.tabular import TabularMixin +from skrl.models.torch.gaussian import GaussianMixin +from skrl.models.torch.categorical import CategoricalMixin +from skrl.models.torch.deterministic import DeterministicMixin +from skrl.models.torch.multivariate_gaussian import MultivariateGaussianMixin diff --git a/skrl/models/torch/tabular.py b/skrl/models/torch/tabular.py index 487fe466..4d1e994f 100644 --- a/skrl/models/torch/tabular.py +++ b/skrl/models/torch/tabular.py @@ -2,7 +2,7 @@ import torch -from . import Model +from skrl.models.torch import Model class TabularMixin: diff --git a/skrl/resources/noises/torch/__init__.py b/skrl/resources/noises/torch/__init__.py index 6fe4734f..b751123d 100644 --- a/skrl/resources/noises/torch/__init__.py +++ b/skrl/resources/noises/torch/__init__.py @@ -1,4 +1,4 @@ -from .base import Noise +from skrl.resources.noises.torch.base import Noise -from .gaussian import GaussianNoise -from .ornstein_uhlenbeck import OrnsteinUhlenbeckNoise +from skrl.resources.noises.torch.gaussian import GaussianNoise +from skrl.resources.noises.torch.ornstein_uhlenbeck import OrnsteinUhlenbeckNoise diff --git a/skrl/resources/noises/torch/gaussian.py b/skrl/resources/noises/torch/gaussian.py index 1cde4bb5..f3ddfa94 100644 --- a/skrl/resources/noises/torch/gaussian.py +++ b/skrl/resources/noises/torch/gaussian.py @@ -3,7 +3,7 @@ import torch from torch.distributions import Normal -from . import Noise +from skrl.resources.noises.torch import Noise class GaussianNoise(Noise): diff --git a/skrl/resources/noises/torch/ornstein_uhlenbeck.py b/skrl/resources/noises/torch/ornstein_uhlenbeck.py index 37c328df..d133aa76 100644 --- a/skrl/resources/noises/torch/ornstein_uhlenbeck.py +++ b/skrl/resources/noises/torch/ornstein_uhlenbeck.py @@ -3,7 +3,7 @@ import torch from torch.distributions import Normal -from . import Noise +from skrl.resources.noises.torch import Noise class OrnsteinUhlenbeckNoise(Noise): diff --git a/skrl/resources/preprocessors/torch/__init__.py b/skrl/resources/preprocessors/torch/__init__.py index e0714537..793d27cb 100644 --- a/skrl/resources/preprocessors/torch/__init__.py +++ b/skrl/resources/preprocessors/torch/__init__.py @@ -1 +1 @@ -from .running_standard_scaler import RunningStandardScaler +from skrl.resources.preprocessors.torch.running_standard_scaler import RunningStandardScaler diff --git a/skrl/resources/schedulers/torch/__init__.py b/skrl/resources/schedulers/torch/__init__.py index bae19571..bf481d83 100644 --- a/skrl/resources/schedulers/torch/__init__.py +++ b/skrl/resources/schedulers/torch/__init__.py @@ -1 +1 @@ -from .kl_adaptive import KLAdaptiveRL +from skrl.resources.schedulers.torch.kl_adaptive import KLAdaptiveRL diff --git a/skrl/trainers/torch/__init__.py b/skrl/trainers/torch/__init__.py index 1077b277..6723541f 100644 --- a/skrl/trainers/torch/__init__.py +++ b/skrl/trainers/torch/__init__.py @@ -1,6 +1,6 @@ -from .base import Trainer -from .base import generate_equally_spaced_scopes +from skrl.trainers.torch.base import Trainer +from skrl.trainers.torch.base import generate_equally_spaced_scopes -from .sequential import SequentialTrainer -from .parallel import ParallelTrainer -from .manual import ManualTrainer +from skrl.trainers.torch.sequential import SequentialTrainer +from skrl.trainers.torch.parallel import ParallelTrainer +from skrl.trainers.torch.manual import ManualTrainer diff --git a/skrl/trainers/torch/base.py b/skrl/trainers/torch/base.py index a16a0153..32944d3e 100644 --- a/skrl/trainers/torch/base.py +++ b/skrl/trainers/torch/base.py @@ -4,8 +4,8 @@ import torch -from ...envs.torch import Wrapper -from ...agents.torch import Agent +from skrl.envs.torch import Wrapper +from skrl.agents.torch import Agent def generate_equally_spaced_scopes(num_envs: int, num_agents: int) -> List[int]: diff --git a/skrl/trainers/torch/manual.py b/skrl/trainers/torch/manual.py index b515f9f1..8b3af490 100644 --- a/skrl/trainers/torch/manual.py +++ b/skrl/trainers/torch/manual.py @@ -5,10 +5,10 @@ import torch -from ...envs.torch import Wrapper -from ...agents.torch import Agent +from skrl.envs.torch import Wrapper +from skrl.agents.torch import Agent -from . import Trainer +from skrl.trainers.torch import Trainer MANUAL_TRAINER_DEFAULT_CONFIG = { diff --git a/skrl/trainers/torch/parallel.py b/skrl/trainers/torch/parallel.py index 07917e05..342e32d6 100644 --- a/skrl/trainers/torch/parallel.py +++ b/skrl/trainers/torch/parallel.py @@ -6,10 +6,10 @@ import torch import torch.multiprocessing as mp -from ...envs.torch import Wrapper -from ...agents.torch import Agent +from skrl.envs.torch import Wrapper +from skrl.agents.torch import Agent -from . import Trainer +from skrl.trainers.torch import Trainer PARALLEL_TRAINER_DEFAULT_CONFIG = { diff --git a/skrl/trainers/torch/sequential.py b/skrl/trainers/torch/sequential.py index 009a9778..aacb6e8a 100644 --- a/skrl/trainers/torch/sequential.py +++ b/skrl/trainers/torch/sequential.py @@ -5,10 +5,10 @@ import torch -from ...envs.torch import Wrapper -from ...agents.torch import Agent +from skrl.envs.torch import Wrapper +from skrl.agents.torch import Agent -from . import Trainer +from skrl.trainers.torch import Trainer SEQUENTIAL_TRAINER_DEFAULT_CONFIG = { diff --git a/skrl/utils/model_instantiators.py b/skrl/utils/model_instantiators.py index 5734d5c6..8de39032 100644 --- a/skrl/utils/model_instantiators.py +++ b/skrl/utils/model_instantiators.py @@ -7,11 +7,11 @@ import torch import torch.nn as nn -from ..models.torch import Model -from ..models.torch import GaussianMixin -from ..models.torch import CategoricalMixin -from ..models.torch import DeterministicMixin -from ..models.torch import MultivariateGaussianMixin +from skrl.models.torch import Model +from skrl.models.torch import GaussianMixin +from skrl.models.torch import CategoricalMixin +from skrl.models.torch import DeterministicMixin +from skrl.models.torch import MultivariateGaussianMixin __all__ = ["categorical_model", "deterministic_model", "gaussian_model", "multivariate_gaussian_model", "Shape"] From dac8cacdc28d346299efccc735838d9f876327cf Mon Sep 17 00:00:00 2001 From: Toni-SM Date: Sun, 1 Jan 2023 23:13:26 +0100 Subject: [PATCH 118/157] Create python-test.yml --- .github/workflows/python-test.yml | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/python-test.yml diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml new file mode 100644 index 00000000..7d44191d --- /dev/null +++ b/.github/workflows/python-test.yml @@ -0,0 +1,36 @@ +name: Run Tests and Lint + +on: [ push, pull_request ] + +jobs: + build: + + strategy: + fail-fast: false + matrix: + os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install flake8 pytest + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + python -m pip install -e . + - name: Lint with flake8 + run: | + # stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: Test with pytest + run: | + pytest From fecc658c6ddc9dd7d54163a479da45db2c1902ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 1 Jan 2023 23:16:31 +0100 Subject: [PATCH 119/157] Add pytest files for examples --- tests/test_examples_deepmind.py | 28 +++++++++++ tests/test_examples_gym.py | 30 +++++++++++ tests/test_examples_gymnasium.py | 30 +++++++++++ tests/test_examples_isaacgym.py | 28 +++++++++++ tests/test_examples_isaacsim.py | 31 ++++++++++++ tests/test_examples_omniisaacgym.py | 31 ++++++++++++ tests/test_examples_robosuite.py | 27 ++++++++++ tests/test_examples_shimmy.py | 29 +++++++++++ tests/test_memories.py | 71 +++++++++++++++++++++++++++ tests/test_model_instantiators.py | 23 +++++++++ tests/test_resources_noises.py | 20 ++++++-- tests/test_resources_preprocessors.py | 13 +++-- tests/test_resources_schedulers.py | 8 ++- 13 files changed, 360 insertions(+), 9 deletions(-) create mode 100644 tests/test_examples_deepmind.py create mode 100644 tests/test_examples_gym.py create mode 100644 tests/test_examples_gymnasium.py create mode 100644 tests/test_examples_isaacgym.py create mode 100644 tests/test_examples_isaacsim.py create mode 100644 tests/test_examples_omniisaacgym.py create mode 100644 tests/test_examples_robosuite.py create mode 100644 tests/test_examples_shimmy.py create mode 100644 tests/test_memories.py create mode 100644 tests/test_model_instantiators.py diff --git a/tests/test_examples_deepmind.py b/tests/test_examples_deepmind.py new file mode 100644 index 00000000..db685347 --- /dev/null +++ b/tests/test_examples_deepmind.py @@ -0,0 +1,28 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +EXAMPLE_DIR = "deepmind" +SCRIPTS = ["dm_suite_cartpole_swingup_ddpg.py", + "dm_manipulation_stack_sac.py", ""] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"python {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)}" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import gym + except ImportError as e: + warnings.warn(f"\n\nUnable to import dm_control environments ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_gym.py b/tests/test_examples_gym.py new file mode 100644 index 00000000..0a7a1987 --- /dev/null +++ b/tests/test_examples_gym.py @@ -0,0 +1,30 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +EXAMPLE_DIR = "gym" +SCRIPTS = ["ddpg_gym_pendulum.py", + "cem_gym_cartpole.py", + "dqn_gym_cartpole.py", + "q_learning_gym_frozen_lake.py"] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"python {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)}" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import gym + except ImportError as e: + warnings.warn(f"\n\nUnable to import gym ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_gymnasium.py b/tests/test_examples_gymnasium.py new file mode 100644 index 00000000..a8816255 --- /dev/null +++ b/tests/test_examples_gymnasium.py @@ -0,0 +1,30 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +EXAMPLE_DIR = "gymnasium" +SCRIPTS = ["ddpg_gymnasium_pendulum.py", + "cem_gymnasium_cartpole.py", + "dqn_gymnasium_cartpole.py", + "q_learning_gymnasium_frozen_lake.py"] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"python {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)}" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import gymnasium + except ImportError as e: + warnings.warn(f"\n\nUnable to import gymnasium ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_isaacgym.py b/tests/test_examples_isaacgym.py new file mode 100644 index 00000000..107f20a9 --- /dev/null +++ b/tests/test_examples_isaacgym.py @@ -0,0 +1,28 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +EXAMPLE_DIR = "isaacgym" +SCRIPTS = ["ppo_cartpole.py", + "trpo_cartpole.py"] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"python {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)} headless=True num_envs=64" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import isaacgymenvs + except ImportError as e: + warnings.warn(f"\n\nUnable to import isaacgymenvs ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_isaacsim.py b/tests/test_examples_isaacsim.py new file mode 100644 index 00000000..959ecc4f --- /dev/null +++ b/tests/test_examples_isaacsim.py @@ -0,0 +1,31 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +# See the following link for Omniverse Isaac Sim Python environment +# https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/install_python.html +PYTHON_ENVIRONMENT = "./python.sh" + +EXAMPLE_DIR = "isaacsim" +SCRIPTS = ["cartpole_example_skrl.py"] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"{PYTHON_ENVIRONMENT} {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)}" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + from omni.isaac.kit import SimulationApp + except ImportError as e: + warnings.warn(f"\n\nUnable to import SimulationApp ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_omniisaacgym.py b/tests/test_examples_omniisaacgym.py new file mode 100644 index 00000000..bd756520 --- /dev/null +++ b/tests/test_examples_omniisaacgym.py @@ -0,0 +1,31 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +# See the following link for Omniverse Isaac Sim Python environment +# https://docs.omniverse.nvidia.com/app_isaacsim/app_isaacsim/install_python.html +PYTHON_ENVIRONMENT = "./python.sh" + +EXAMPLE_DIR = "omniisaacgym" +SCRIPTS = ["ppo_cartpole.py"] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"{PYTHON_ENVIRONMENT} {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)} headless=True num_envs=64" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import omniisaacgymenvs + except ImportError as e: + warnings.warn(f"\n\nUnable to import omniisaacgymenvs ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_robosuite.py b/tests/test_examples_robosuite.py new file mode 100644 index 00000000..eeb1de42 --- /dev/null +++ b/tests/test_examples_robosuite.py @@ -0,0 +1,27 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +EXAMPLE_DIR = "robosuite" +SCRIPTS = [] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"python {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)}" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import gym + except ImportError as e: + warnings.warn(f"\n\nUnable to import gym ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_shimmy.py b/tests/test_examples_shimmy.py new file mode 100644 index 00000000..49bb2280 --- /dev/null +++ b/tests/test_examples_shimmy.py @@ -0,0 +1,29 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import warnings + +import os +import subprocess + +EXAMPLE_DIR = "shimmy" +SCRIPTS = ["dqn_shimmy_atari_pong.py", + "sac_shimmy_dm_control_acrobot_swingup_sparse.py", + "ddpg_openai_gym_compatibility_pendulum.py"] +EXAMPLES_DIR = os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "docs", "source", "examples")) +COMMANDS = [f"python {os.path.join(EXAMPLES_DIR, EXAMPLE_DIR, script)}" for script in SCRIPTS] + + +@pytest.mark.parametrize("command", COMMANDS) +def test_scripts(capsys, command): + try: + import shimmy + except ImportError as e: + warnings.warn(f"\n\nUnable to import shimmy ({e}).\nThis test will be skipped\n") + return + + with capsys.disabled(): + print(command) + + subprocess.run(command, shell=True, check=True) diff --git a/tests/test_memories.py b/tests/test_memories.py new file mode 100644 index 00000000..67ce1d48 --- /dev/null +++ b/tests/test_memories.py @@ -0,0 +1,71 @@ +import pytest +import string +import hypothesis +import hypothesis.strategies as st + +import torch + +from skrl.memories.torch import Memory +from skrl.memories.torch import RandomMemory + + +@pytest.fixture +def classes_and_kwargs(): + return [(RandomMemory, {})] + + +@pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) +def test_device(capsys, classes_and_kwargs, device): + _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + + for klass, kwargs in classes_and_kwargs: + with capsys.disabled(): + print(klass.__name__, device) + + memory: Memory = klass(memory_size=1, device=device, **kwargs) + + assert memory.device == _device # defined device + +@hypothesis.given(names=st.sets(st.text(alphabet=string.ascii_letters + string.digits + "_", min_size=1, max_size=10), min_size=1, max_size=10)) +@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture], deadline=None) +def test_create_tensors(capsys, classes_and_kwargs, names): + for klass, kwargs in classes_and_kwargs: + with capsys.disabled(): + print(klass.__name__, names) + + memory: Memory = klass(memory_size=1, **kwargs) + + for name in names: + memory.create_tensor(name=name, size=1, dtype=torch.float32) + + assert memory.get_tensor_names() == sorted(names) + +@hypothesis.given(memory_size=st.integers(min_value=1, max_value=100), + num_envs=st.integers(min_value=1, max_value=10), + num_samples=st.integers(min_value=1, max_value=500)) +@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture], deadline=None) +def test_add_samples(capsys, classes_and_kwargs, memory_size, num_envs, num_samples): + for klass, kwargs in classes_and_kwargs: + with capsys.disabled(): + print(klass.__name__, memory_size, num_envs, num_samples) + + memory: Memory = klass(memory_size=memory_size, num_envs=num_envs, **kwargs) + memory.create_tensor(name="tensor_1", size=1, dtype=torch.float32) + memory.create_tensor(name="tensor_2", size=2, dtype=torch.float32) + + # memory_index + for _ in range(num_samples): + memory.add_samples(tensor_1=torch.zeros((num_envs, 1))) + + assert memory.memory_index == num_samples % memory_size + assert memory.filled == (num_samples >= memory_size) + + memory.reset() + + # memory_index, env_index + for _ in range(num_samples): + memory.add_samples(tensor_2=torch.zeros((2,))) + + assert memory.memory_index == (num_samples // num_envs) % memory_size + assert memory.env_index == num_samples % num_envs + assert memory.filled == (num_samples >= memory_size * num_envs) diff --git a/tests/test_model_instantiators.py b/tests/test_model_instantiators.py new file mode 100644 index 00000000..139ddec8 --- /dev/null +++ b/tests/test_model_instantiators.py @@ -0,0 +1,23 @@ +import pytest +import hypothesis +import hypothesis.strategies as st + +import torch + +from skrl.models.torch import Model + +from skrl.utils.model_instantiators import Shape +from skrl.utils.model_instantiators import categorical_model +from skrl.utils.model_instantiators import deterministic_model +from skrl.utils.model_instantiators import gaussian_model +from skrl.utils.model_instantiators import multivariate_gaussian_model + + +@pytest.fixture +def classes_and_kwargs(): + return [] + + +@pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) +def test_device(capsys, classes_and_kwargs, device): + _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") diff --git a/tests/test_resources_noises.py b/tests/test_resources_noises.py index 2c53855d..d85a0c54 100644 --- a/tests/test_resources_noises.py +++ b/tests/test_resources_noises.py @@ -1,7 +1,10 @@ import pytest +import hypothesis +import hypothesis.strategies as st import torch +from skrl.resources.noises.torch import Noise from skrl.resources.noises.torch import GaussianNoise from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise @@ -13,20 +16,27 @@ def classes_and_kwargs(): @pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) -def test_device(classes_and_kwargs, device): +def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - noise = klass(device=device, **kwargs) + with capsys.disabled(): + print(klass.__name__, device) + + noise: Noise = klass(device=device, **kwargs) output = noise.sample((1,)) assert noise.device == _device # defined device assert output.device == _device # runtime device -@pytest.mark.parametrize("size", [(10,), [20, 1], torch.Size([30, 1, 2])]) -def test_sampling(classes_and_kwargs, size): +@hypothesis.given(size=st.lists(st.integers(min_value=1, max_value=10), max_size=5)) +@hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture], deadline=None) +def test_sample(capsys, classes_and_kwargs, size): for klass, kwargs in classes_and_kwargs: - noise = klass(**kwargs) + with capsys.disabled(): + print(klass.__name__, size) + + noise: Noise = klass(**kwargs) # sample output = noise.sample(size) diff --git a/tests/test_resources_preprocessors.py b/tests/test_resources_preprocessors.py index c1cf1372..8d020461 100644 --- a/tests/test_resources_preprocessors.py +++ b/tests/test_resources_preprocessors.py @@ -1,8 +1,9 @@ import pytest +import hypothesis +import hypothesis.strategies as st import gym import gymnasium -import numpy as np import torch @@ -15,10 +16,13 @@ def classes_and_kwargs(): @pytest.mark.parametrize("device", [None, "cpu", "cuda:0"]) -def test_device(classes_and_kwargs, device): +def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: + with capsys.disabled(): + print(klass.__name__, device) + preprocessor = klass(device=device, **kwargs) assert preprocessor.device == _device # defined device @@ -28,8 +32,11 @@ def test_device(classes_and_kwargs, device): (gymnasium.spaces.Box(low=-1, high=1, shape=(2, 3)), 6), (gym.spaces.Discrete(n=3), 1), (gymnasium.spaces.Discrete(n=3), 1)]) -def test_forward(classes_and_kwargs, space_and_size): +def test_forward(capsys, classes_and_kwargs, space_and_size): for klass, kwargs in classes_and_kwargs: + with capsys.disabled(): + print(klass.__name__, space_and_size) + space, size = space_and_size preprocessor = klass(size=space, device="cpu") diff --git a/tests/test_resources_schedulers.py b/tests/test_resources_schedulers.py index 01c56a24..95e78960 100644 --- a/tests/test_resources_schedulers.py +++ b/tests/test_resources_schedulers.py @@ -1,4 +1,6 @@ import pytest +import hypothesis +import hypothesis.strategies as st import torch @@ -9,10 +11,14 @@ def classes_and_kwargs(): return [(KLAdaptiveRL, {})] + @pytest.mark.parametrize("optimizer", [torch.optim.Adam([torch.ones((1,))], lr=0.1), torch.optim.SGD([torch.ones((1,))], lr=0.1)]) -def test_step(classes_and_kwargs, optimizer): +def test_step(capsys, classes_and_kwargs, optimizer): for klass, kwargs in classes_and_kwargs: + with capsys.disabled(): + print(klass.__name__, optimizer) + scheduler = klass(optimizer, **kwargs) scheduler.step(0.0) From 14f01288a790a803a4d991720c376c027ec88788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 1 Jan 2023 23:31:29 +0100 Subject: [PATCH 120/157] Update GitHub actions flake8 arguments --- .github/workflows/python-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 7d44191d..b2c96ef9 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -28,9 +28,9 @@ jobs: - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude "docs build" --per-file-ignores="skrl/envs/torch/wrappers.py:F821 skrl/utils/omniverse_isaacgym_utils.py:F821" # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude "docs build" - name: Test with pytest run: | pytest From 892b333435c5babb56d302e3d24bce488a7d01c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 2 Jan 2023 09:21:04 +0100 Subject: [PATCH 121/157] Add hypothesis to python test dependencies --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index b2c96ef9..06d35732 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -22,7 +22,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8 pytest + python -m pip install flake8 pytest hypothesis if [ -f requirements.txt ]; then pip install -r requirements.txt; fi python -m pip install -e . - name: Lint with flake8 From 580f4cebb8222a665cf16daebad53dfed1f77ae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 2 Jan 2023 09:25:09 +0100 Subject: [PATCH 122/157] Add torch version to setup.py install requirements --- setup.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index d301bf3c..dd08d3d9 100644 --- a/setup.py +++ b/setup.py @@ -11,12 +11,11 @@ INSTALL_REQUIRES = [ "gym", "gymnasium", - "torch", + "torch>=1.8", "tensorboard", - "tqdm", - "packaging", - "pre-commit", "wandb", + "tqdm", + "packaging" ] # installation From bb5eb9923af721cc756bf9bf9f0cf12a5160bfa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 2 Jan 2023 09:47:10 +0100 Subject: [PATCH 123/157] Ignore testing examples --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 06d35732..9bc952c6 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -33,4 +33,4 @@ jobs: # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude "docs build" - name: Test with pytest run: | - pytest + pytest --ignore-glob "tests/test_examples*" From 0172c9528e88336266641bbc53762550a6a990bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 2 Jan 2023 21:22:01 +0100 Subject: [PATCH 124/157] Remove printing statements --- tests/test_examples_deepmind.py | 6 +----- tests/test_examples_gym.py | 6 +----- tests/test_examples_gymnasium.py | 6 +----- tests/test_examples_isaacgym.py | 6 +----- tests/test_examples_isaacsim.py | 6 +----- tests/test_examples_omniisaacgym.py | 6 +----- tests/test_examples_robosuite.py | 6 +----- tests/test_examples_shimmy.py | 6 +----- tests/test_memories.py | 14 ++++---------- tests/test_model_instantiators.py | 1 + tests/test_resources_noises.py | 7 +------ tests/test_resources_preprocessors.py | 7 +------ tests/test_resources_schedulers.py | 4 +--- 13 files changed, 16 insertions(+), 65 deletions(-) diff --git a/tests/test_examples_deepmind.py b/tests/test_examples_deepmind.py index db685347..1e946da4 100644 --- a/tests/test_examples_deepmind.py +++ b/tests/test_examples_deepmind.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -22,7 +21,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import dm_control environments ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_gym.py b/tests/test_examples_gym.py index 0a7a1987..50dcb759 100644 --- a/tests/test_examples_gym.py +++ b/tests/test_examples_gym.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -24,7 +23,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import gym ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_gymnasium.py b/tests/test_examples_gymnasium.py index a8816255..fcacc402 100644 --- a/tests/test_examples_gymnasium.py +++ b/tests/test_examples_gymnasium.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -24,7 +23,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import gymnasium ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_isaacgym.py b/tests/test_examples_isaacgym.py index 107f20a9..408cfc24 100644 --- a/tests/test_examples_isaacgym.py +++ b/tests/test_examples_isaacgym.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -22,7 +21,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import isaacgymenvs ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_isaacsim.py b/tests/test_examples_isaacsim.py index 959ecc4f..0215597e 100644 --- a/tests/test_examples_isaacsim.py +++ b/tests/test_examples_isaacsim.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -25,7 +24,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import SimulationApp ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_omniisaacgym.py b/tests/test_examples_omniisaacgym.py index bd756520..cb2571c7 100644 --- a/tests/test_examples_omniisaacgym.py +++ b/tests/test_examples_omniisaacgym.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -25,7 +24,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import omniisaacgymenvs ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_robosuite.py b/tests/test_examples_robosuite.py index eeb1de42..a66ffbd3 100644 --- a/tests/test_examples_robosuite.py +++ b/tests/test_examples_robosuite.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -21,7 +20,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import gym ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_examples_shimmy.py b/tests/test_examples_shimmy.py index 49bb2280..4ed69cfd 100644 --- a/tests/test_examples_shimmy.py +++ b/tests/test_examples_shimmy.py @@ -1,9 +1,8 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st -import warnings - import os import subprocess @@ -23,7 +22,4 @@ def test_scripts(capsys, command): warnings.warn(f"\n\nUnable to import shimmy ({e}).\nThis test will be skipped\n") return - with capsys.disabled(): - print(command) - subprocess.run(command, shell=True, check=True) diff --git a/tests/test_memories.py b/tests/test_memories.py index 67ce1d48..da07ff9e 100644 --- a/tests/test_memories.py +++ b/tests/test_memories.py @@ -1,8 +1,10 @@ import pytest -import string +import warnings import hypothesis import hypothesis.strategies as st +import string + import torch from skrl.memories.torch import Memory @@ -19,9 +21,6 @@ def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, device) - memory: Memory = klass(memory_size=1, device=device, **kwargs) assert memory.device == _device # defined device @@ -30,9 +29,6 @@ def test_device(capsys, classes_and_kwargs, device): @hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture], deadline=None) def test_create_tensors(capsys, classes_and_kwargs, names): for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, names) - memory: Memory = klass(memory_size=1, **kwargs) for name in names: @@ -46,10 +42,8 @@ def test_create_tensors(capsys, classes_and_kwargs, names): @hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture], deadline=None) def test_add_samples(capsys, classes_and_kwargs, memory_size, num_envs, num_samples): for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, memory_size, num_envs, num_samples) - memory: Memory = klass(memory_size=memory_size, num_envs=num_envs, **kwargs) + memory.create_tensor(name="tensor_1", size=1, dtype=torch.float32) memory.create_tensor(name="tensor_2", size=2, dtype=torch.float32) diff --git a/tests/test_model_instantiators.py b/tests/test_model_instantiators.py index 139ddec8..923e09d2 100644 --- a/tests/test_model_instantiators.py +++ b/tests/test_model_instantiators.py @@ -1,4 +1,5 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st diff --git a/tests/test_resources_noises.py b/tests/test_resources_noises.py index d85a0c54..591199b3 100644 --- a/tests/test_resources_noises.py +++ b/tests/test_resources_noises.py @@ -1,4 +1,5 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st @@ -20,9 +21,6 @@ def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, device) - noise: Noise = klass(device=device, **kwargs) output = noise.sample((1,)) @@ -33,9 +31,6 @@ def test_device(capsys, classes_and_kwargs, device): @hypothesis.settings(suppress_health_check=[hypothesis.HealthCheck.function_scoped_fixture], deadline=None) def test_sample(capsys, classes_and_kwargs, size): for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, size) - noise: Noise = klass(**kwargs) # sample diff --git a/tests/test_resources_preprocessors.py b/tests/test_resources_preprocessors.py index 8d020461..8b3377ab 100644 --- a/tests/test_resources_preprocessors.py +++ b/tests/test_resources_preprocessors.py @@ -1,4 +1,5 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st @@ -20,9 +21,6 @@ def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, device) - preprocessor = klass(device=device, **kwargs) assert preprocessor.device == _device # defined device @@ -34,9 +32,6 @@ def test_device(capsys, classes_and_kwargs, device): (gymnasium.spaces.Discrete(n=3), 1)]) def test_forward(capsys, classes_and_kwargs, space_and_size): for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, space_and_size) - space, size = space_and_size preprocessor = klass(size=space, device="cpu") diff --git a/tests/test_resources_schedulers.py b/tests/test_resources_schedulers.py index 95e78960..9f2840bb 100644 --- a/tests/test_resources_schedulers.py +++ b/tests/test_resources_schedulers.py @@ -1,4 +1,5 @@ import pytest +import warnings import hypothesis import hypothesis.strategies as st @@ -16,9 +17,6 @@ def classes_and_kwargs(): torch.optim.SGD([torch.ones((1,))], lr=0.1)]) def test_step(capsys, classes_and_kwargs, optimizer): for klass, kwargs in classes_and_kwargs: - with capsys.disabled(): - print(klass.__name__, optimizer) - scheduler = klass(optimizer, **kwargs) scheduler.step(0.0) From 52ba9294780386682a3ee5db59865c05dbf9199f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 2 Jan 2023 21:26:43 +0100 Subject: [PATCH 125/157] Catch runtime error if there are no GPUs --- tests/test_memories.py | 8 +++++++- tests/test_resources_noises.py | 8 +++++++- tests/test_resources_preprocessors.py | 8 +++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/tests/test_memories.py b/tests/test_memories.py index da07ff9e..3bebad0c 100644 --- a/tests/test_memories.py +++ b/tests/test_memories.py @@ -21,7 +21,13 @@ def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - memory: Memory = klass(memory_size=1, device=device, **kwargs) + try: + memory: Memory = klass(memory_size=1, device=device, **kwargs) + except RuntimeError as e: + with capsys.disabled(): + print(e) + warnings.warn(f"Invalid device: {device}. This test will be skipped") + continue assert memory.device == _device # defined device diff --git a/tests/test_resources_noises.py b/tests/test_resources_noises.py index 591199b3..522c3584 100644 --- a/tests/test_resources_noises.py +++ b/tests/test_resources_noises.py @@ -21,7 +21,13 @@ def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - noise: Noise = klass(device=device, **kwargs) + try: + noise: Noise = klass(device=device, **kwargs) + except RuntimeError as e: + with capsys.disabled(): + print(e) + warnings.warn(f"Invalid device: {device}. This test will be skipped") + continue output = noise.sample((1,)) assert noise.device == _device # defined device diff --git a/tests/test_resources_preprocessors.py b/tests/test_resources_preprocessors.py index 8b3377ab..b988d2de 100644 --- a/tests/test_resources_preprocessors.py +++ b/tests/test_resources_preprocessors.py @@ -21,7 +21,13 @@ def test_device(capsys, classes_and_kwargs, device): _device = torch.device(device) if device is not None else torch.device("cuda:0" if torch.cuda.is_available() else "cpu") for klass, kwargs in classes_and_kwargs: - preprocessor = klass(device=device, **kwargs) + try: + preprocessor = klass(device=device, **kwargs) + except RuntimeError as e: + with capsys.disabled(): + print(e) + warnings.warn(f"Invalid device: {device}. This test will be skipped") + continue assert preprocessor.device == _device # defined device assert preprocessor(torch.ones(kwargs["size"], device=_device)).device == _device # runtime device From 80d9bc743214cd16af2686dfc7b72ff6f0195da2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Mon, 2 Jan 2023 21:29:47 +0100 Subject: [PATCH 126/157] Remove install from requirements.txt --- .github/workflows/python-test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 9bc952c6..ff1ee677 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -23,7 +23,6 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install flake8 pytest hypothesis - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi python -m pip install -e . - name: Lint with flake8 run: | From 0beaee5effd5bd7a7c9adcae18c7446c0d8c139c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 6 Jan 2023 23:41:54 +0100 Subject: [PATCH 127/157] Update GitHub test workflow --- .github/workflows/python-test.yml | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index ff1ee677..3e1b9a93 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -1,19 +1,20 @@ -name: Run Tests and Lint +name: tests on: [ push, pull_request ] jobs: - build: + tests: strategy: fail-fast: false matrix: - os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] runs-on: ${{ matrix.os }} steps: + - if: '!(matrix.os == "macos-latest" && matrix.python-version == "3.11")' - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 @@ -24,12 +25,6 @@ jobs: python -m pip install --upgrade pip python -m pip install flake8 pytest hypothesis python -m pip install -e . - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics --exclude "docs build" --per-file-ignores="skrl/envs/torch/wrappers.py:F821 skrl/utils/omniverse_isaacgym_utils.py:F821" - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - # flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude "docs build" - - name: Test with pytest + - name: Tests run: | pytest --ignore-glob "tests/test_examples*" From 4c9313a3456461e8fd4df6d21d8dd91fc6b97f2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 6 Jan 2023 23:42:56 +0100 Subject: [PATCH 128/157] Add pre-commit to GitHub workflows --- .github/workflows/pre-commit.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/pre-commit.yaml diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml new file mode 100644 index 00000000..b0d3dc27 --- /dev/null +++ b/.github/workflows/pre-commit.yaml @@ -0,0 +1,21 @@ +name: pre-commit + +on: [ push, pull_request ] + +jobs: + pre-commit: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.8' + - name: Install dependencies + run: | + python -m pip install pre-commit + - name: Run pre-commit + run: | + pre-commit run --all-files From 100d1fcfce11429a73c3fda852924d07ecf1b989 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:04:01 +0100 Subject: [PATCH 129/157] Format GitHub workflows --- .github/workflows/pre-commit.yaml | 5 ++++- .github/workflows/python-publish-manual.yml | 13 ++++++++++++- .github/workflows/python-test.yml | 7 +++++-- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml index b0d3dc27..ffaffd29 100644 --- a/.github/workflows/pre-commit.yaml +++ b/.github/workflows/pre-commit.yaml @@ -3,19 +3,22 @@ name: pre-commit on: [ push, pull_request ] jobs: - pre-commit: + pre-commit: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Set up Python uses: actions/setup-python@v3 with: python-version: '3.8' + - name: Install dependencies run: | python -m pip install pre-commit + - name: Run pre-commit run: | pre-commit run --all-files diff --git a/.github/workflows/python-publish-manual.yml b/.github/workflows/python-publish-manual.yml index c3caae87..1f3de732 100644 --- a/.github/workflows/python-publish-manual.yml +++ b/.github/workflows/python-publish-manual.yml @@ -1,4 +1,4 @@ -name: Upload Python Package (manually triggered workflow) +name: pypi (manually triggered workflow) on: workflow_dispatch: @@ -12,22 +12,28 @@ permissions: contents: read jobs: + pypi: name: Publish package to PyPI runs-on: ubuntu-latest if: ${{ github.event.inputs.job == 'pypi'}} + steps: - uses: actions/checkout@v3 + - name: Set up Python uses: actions/setup-python@v3 with: python-version: '3.7' + - name: Install dependencies run: | python -m pip install --upgrade pip pip install build + - name: Build package run: python -m build + - name: Publish package to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: @@ -39,18 +45,23 @@ jobs: name: Publish package to TestPyPI runs-on: ubuntu-latest if: ${{ github.event.inputs.job == 'test-pypi'}} + steps: - uses: actions/checkout@v3 + - name: Set up Python uses: actions/setup-python@v3 with: python-version: '3.7' + - name: Install dependencies run: | python -m pip install --upgrade pip pip install build + - name: Build package run: python -m build + - name: Publish package to TestPyPI uses: pypa/gh-action-pypi-publish@release/v1 with: diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 3e1b9a93..5a707a60 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -3,8 +3,8 @@ name: tests on: [ push, pull_request ] jobs: - tests: + tests: strategy: fail-fast: false matrix: @@ -12,19 +12,22 @@ jobs: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] runs-on: ${{ matrix.os }} + if: ${{ !(matrix.os == "macos-latest" && matrix.python-version == "3.11") }} steps: - - if: '!(matrix.os == "macos-latest" && matrix.python-version == "3.11")' - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install flake8 pytest hypothesis python -m pip install -e . + - name: Tests run: | pytest --ignore-glob "tests/test_examples*" From 5f04d41622c0ddd56823814330806bec336e4073 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:05:57 +0100 Subject: [PATCH 130/157] Remove Python 3.11 from testing workflow --- .github/workflows/python-test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 5a707a60..b644c064 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -9,10 +9,9 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.7", "3.8", "3.9", "3.10"] runs-on: ${{ matrix.os }} - if: ${{ !(matrix.os == "macos-latest" && matrix.python-version == "3.11") }} steps: - uses: actions/checkout@v3 From dfc813d48f2969e38bfdcb94507e617ea3044208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:09:13 +0100 Subject: [PATCH 131/157] Remove macos-latest from testing workflow --- .github/workflows/python-test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index b644c064..f364ff82 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -8,8 +8,8 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "macos-latest", "windows-latest"] - python-version: ["3.7", "3.8", "3.9", "3.10"] + os: ["ubuntu-latest", "windows-latest"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] runs-on: ${{ matrix.os }} From 9419f63e9f7e9aa45befad41335701d635ea15e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:15:56 +0100 Subject: [PATCH 132/157] Handle "Torch not compiled with CUDA enabled" AssertionError in tests --- .github/workflows/python-test.yml | 4 ++-- tests/test_memories.py | 2 +- tests/test_resources_noises.py | 2 +- tests/test_resources_preprocessors.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index f364ff82..b644c064 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -8,8 +8,8 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "windows-latest"] - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.7", "3.8", "3.9", "3.10"] runs-on: ${{ matrix.os }} diff --git a/tests/test_memories.py b/tests/test_memories.py index 3bebad0c..99204cf8 100644 --- a/tests/test_memories.py +++ b/tests/test_memories.py @@ -23,7 +23,7 @@ def test_device(capsys, classes_and_kwargs, device): for klass, kwargs in classes_and_kwargs: try: memory: Memory = klass(memory_size=1, device=device, **kwargs) - except RuntimeError as e: + except (RuntimeError, AssertionError) as e: with capsys.disabled(): print(e) warnings.warn(f"Invalid device: {device}. This test will be skipped") diff --git a/tests/test_resources_noises.py b/tests/test_resources_noises.py index 522c3584..f9dcedba 100644 --- a/tests/test_resources_noises.py +++ b/tests/test_resources_noises.py @@ -23,7 +23,7 @@ def test_device(capsys, classes_and_kwargs, device): for klass, kwargs in classes_and_kwargs: try: noise: Noise = klass(device=device, **kwargs) - except RuntimeError as e: + except (RuntimeError, AssertionError) as e: with capsys.disabled(): print(e) warnings.warn(f"Invalid device: {device}. This test will be skipped") diff --git a/tests/test_resources_preprocessors.py b/tests/test_resources_preprocessors.py index b988d2de..1c94d139 100644 --- a/tests/test_resources_preprocessors.py +++ b/tests/test_resources_preprocessors.py @@ -23,7 +23,7 @@ def test_device(capsys, classes_and_kwargs, device): for klass, kwargs in classes_and_kwargs: try: preprocessor = klass(device=device, **kwargs) - except RuntimeError as e: + except (RuntimeError, AssertionError) as e: with capsys.disabled(): print(e) warnings.warn(f"Invalid device: {device}. This test will be skipped") From 0df30fa6a7fb308871560a2a3ae544f391464c42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:25:08 +0100 Subject: [PATCH 133/157] Add badges to README --- README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/README.md b/README.md index 88da777e..7313d5f1 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ +[![license](https://img.shields.io/pypi/l/skrl)](https://github.com/Toni-SM/skrl) +[![docs](https://readthedocs.org/projects/skrl/badge/?version=latest)](https://skrl.readthedocs.io/en/latest/?badge=latest) +[![pypi](https://img.shields.io/pypi/v/skrl)](https://pypi.org/project/skrl) +   +[![pytest](https://github.com/Toni-SM/skrl/actions/workflows/python-test.yml/badge.svg)](https://github.com/Toni-SM/skrl/actions/workflows/python-test.yml) +[![pre-commit](https://github.com/Toni-SM/skrl/actions/workflows/pre-commit.yml/badge.svg)](https://github.com/Toni-SM/skrl/actions/workflows/pre-commit.yml) + +

From b732932df9fc3996f2fda857f97bb7c25c15ad86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:29:31 +0100 Subject: [PATCH 134/157] Fix GitHub workflow file extension --- .github/workflows/{pre-commit.yaml => pre-commit.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{pre-commit.yaml => pre-commit.yml} (100%) diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yml similarity index 100% rename from .github/workflows/pre-commit.yaml rename to .github/workflows/pre-commit.yml From e37aa52d15ea2db4c11b962364a340445b95fd70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 11:47:33 +0100 Subject: [PATCH 135/157] Add project links to setup.py --- setup.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dd08d3d9..07ddfffc 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ description="Modular and flexible library for Reinforcement Learning", long_description=open(os.path.join(root_dir, "README.md")).read(), long_description_content_type="text/markdown", - keywords=["reinforcement learning", "machine learning", "rl", ""], + keywords=["reinforcement", "machine", "learning", "rl"], python_requires=">=3.6.*", install_requires=INSTALL_REQUIRES, url="https://github.com/Toni-SM/skrl", @@ -43,4 +43,10 @@ ], license="MIT", zip_safe=False, + project_urls={ + "Documentation": "https://skrl.readthedocs.io", + "Repository": "https://github.com/Toni-SM/skrl", + "Bug Tracker": "https://github.com/Toni-SM/skrl/issues", + "Discussions": "https://github.com/Toni-SM/skrl/discussions", + } ) From a4bc09b05a88778be04d726b6b0eace2b10e507f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 22:04:34 +0100 Subject: [PATCH 136/157] Add Farama Shimmy converted environments --- docs/source/intro/examples.rst | 53 ++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 48d36685..9730c6ba 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -236,6 +236,59 @@ The following components or practices are exemplified (highlighted):
+Farama Shimmy (converted environments) +-------------------------------------- + +The following examples show the training in several popular environments (Atari, DeepMind Control and OpenAI Gym) that have been converted to the Gymnasium API using the `Shimmy `_ (API conversion tool) package + +.. image:: ../_static/imgs/example_shimmy.png + :width: 100% + :align: center + :alt: Shimmy (converted environments) + +.. note:: + + From **skrl**, no extra implementation is necessary, since it fully supports Gymnasium API + +.. note:: + + Because the Gymnasium API requires that the rendering mode be specified during the initialization of the environment, it is not enough to set the :literal:`headless` option in the trainer configuration to render the environment. In this case, it is necessary to call the :literal:`gymnasium.make` function using :literal:`render_mode="human"` or any other supported option + +.. tabs:: + + .. tab:: Atari: Pong (DQN) + + .. tabs:: + + .. group-tab:: Training + + | :download:`dqn_shimmy_atari_pong.py <../examples/shimmy/dqn_shimmy_atari_pong.py>` + + .. literalinclude:: ../examples/shimmy/dqn_shimmy_atari_pong.py + :language: python + + .. tab:: DeepMind: Acrobot (SAC) + + .. tabs:: + + .. group-tab:: Training + + | :download:`sac_shimmy_dm_control_acrobot_swingup_sparse.py <../examples/shimmy/sac_shimmy_dm_control_acrobot_swingup_sparse.py>` + + .. literalinclude:: ../examples/shimmy/sac_shimmy_dm_control_acrobot_swingup_sparse.py + :language: python + + .. tab:: Gym compatibility (DDPG) + + .. tabs:: + + .. group-tab:: Training + + | :download:`ddpg_openai_gym_compatibility_pendulum.py <../examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py>` + + .. literalinclude:: ../examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py + :language: python + DeepMind environment -------------------- From 888614a8acef77574efc5d3f12e957055864ba20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 22:08:24 +0100 Subject: [PATCH 137/157] Add RNN model images to docs --- docs/source/_static/imgs/model_categorical_rnn.svg | 1 + docs/source/_static/imgs/model_deterministic_rnn.svg | 1 + docs/source/_static/imgs/model_gaussian_rnn.svg | 1 + 3 files changed, 3 insertions(+) create mode 100755 docs/source/_static/imgs/model_categorical_rnn.svg create mode 100755 docs/source/_static/imgs/model_deterministic_rnn.svg create mode 100755 docs/source/_static/imgs/model_gaussian_rnn.svg diff --git a/docs/source/_static/imgs/model_categorical_rnn.svg b/docs/source/_static/imgs/model_categorical_rnn.svg new file mode 100755 index 00000000..3a6130e6 --- /dev/null +++ b/docs/source/_static/imgs/model_categorical_rnn.svg @@ -0,0 +1 @@ +statesreshape(view)(N*L, Hin)(N, L, Hin)finalstatesinitialstates(N, L, D*Hout)flattenRNNHoutFC1+ ReLUFC2+ReLU6432FC3logitsn diff --git a/docs/source/_static/imgs/model_deterministic_rnn.svg b/docs/source/_static/imgs/model_deterministic_rnn.svg new file mode 100755 index 00000000..8e6dc222 --- /dev/null +++ b/docs/source/_static/imgs/model_deterministic_rnn.svg @@ -0,0 +1 @@ +FC1+ ReLUFC2+ReLUFC364321takenactionsstatesreshape(view)(N*L, Hin)(N, L, Hin)finalstatesinitialstates(N, L, D*Hout)flattenRNNHout diff --git a/docs/source/_static/imgs/model_gaussian_rnn.svg b/docs/source/_static/imgs/model_gaussian_rnn.svg new file mode 100755 index 00000000..dd33f668 --- /dev/null +++ b/docs/source/_static/imgs/model_gaussian_rnn.svg @@ -0,0 +1 @@ +statesreshape(view)(N*L, Hin)(N, L, Hin)finalstatesinitialstates(N, L, D*Hout)flattenRNNHoutFC1+ ReLUFC2+ReLUFC3+Tanhmeanactions6432numactions From 9aa3ce23599f21fd0aa9db00a94463575c78b0a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sat, 7 Jan 2023 22:11:51 +0100 Subject: [PATCH 138/157] Add Farama Shimmy example files --- .../ddpg_openai_gym_compatibility_pendulum.py | 98 +++++++++++++++++ .../examples/shimmy/dqn_shimmy_atari_pong.py | 78 ++++++++++++++ ...himmy_dm_control_acrobot_swingup_sparse.py | 100 ++++++++++++++++++ 3 files changed, 276 insertions(+) create mode 100644 docs/source/examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py create mode 100644 docs/source/examples/shimmy/dqn_shimmy_atari_pong.py create mode 100644 docs/source/examples/shimmy/sac_shimmy_dm_control_acrobot_swingup_sparse.py diff --git a/docs/source/examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py b/docs/source/examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py new file mode 100644 index 00000000..dc72a81c --- /dev/null +++ b/docs/source/examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py @@ -0,0 +1,98 @@ +import gymnasium as gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class DeterministicActor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + return 2 * torch.tanh(self.action_layer(x)), {} # Pendulum-v1 action_space is -2 to 2 + +class DeterministicCritic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Load and wrap the Gymnasium environment. +env = gym.make("GymV26Environment-v0", env_id="Pendulum-v1") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=15000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models_ddpg["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models_ddpg["critic"] = DeterministicCritic(env.observation_space, env.action_space, device) +models_ddpg["target_critic"] = DeterministicCritic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 100 +cfg_ddpg["learning_starts"] = 100 +# logging to TensorBoard and write checkpoints each 300 and 1500 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 300 +cfg_ddpg["experiment"]["checkpoint_interval"] = 1500 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() diff --git a/docs/source/examples/shimmy/dqn_shimmy_atari_pong.py b/docs/source/examples/shimmy/dqn_shimmy_atari_pong.py new file mode 100644 index 00000000..553ed5e6 --- /dev/null +++ b/docs/source/examples/shimmy/dqn_shimmy_atari_pong.py @@ -0,0 +1,78 @@ +import gymnasium as gym + +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.dqn import DQN, DQN_DEFAULT_CONFIG +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the model (deterministic models) for the DQN agent using mixin +class QNetwork(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 64), + nn.ReLU(), + nn.Linear(64, 64), + nn.ReLU(), + nn.Linear(64, self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# Load and wrap the environment +env = gym.make("ALE/Pong-v5") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=15000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DQN requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#spaces-and-models +models = {} +models["q_network"] = QNetwork(env.observation_space, env.action_space, device) +models["target_q_network"] = QNetwork(env.observation_space, env.action_space, device) + +# # Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.dqn.html#configuration-and-hyperparameters +cfg_agent = DQN_DEFAULT_CONFIG.copy() +cfg_agent["learning_starts"] = 100 +cfg_agent["exploration"]["initial_epsilon"] = 1.0 +cfg_agent["exploration"]["final_epsilon"] = 0.04 +cfg_agent["exploration"]["timesteps"] = 1500 +# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively +cfg_agent["experiment"]["write_interval"] = 1000 +cfg_agent["experiment"]["checkpoint_interval"] = 5000 + +agent_dqn = DQN(models=models, + memory=memory, + cfg=cfg_agent, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 50000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_dqn) + +# start training +trainer.train() diff --git a/docs/source/examples/shimmy/sac_shimmy_dm_control_acrobot_swingup_sparse.py b/docs/source/examples/shimmy/sac_shimmy_dm_control_acrobot_swingup_sparse.py new file mode 100644 index 00000000..cf1a2f17 --- /dev/null +++ b/docs/source/examples/shimmy/sac_shimmy_dm_control_acrobot_swingup_sparse.py @@ -0,0 +1,100 @@ +import gymnasium as gym + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ddpg import DDPG, DDPG_DEFAULT_CONFIG +from skrl.resources.noises.torch import OrnsteinUhlenbeckNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the DDPG agent using mixin +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class Actor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + # Pendulum-v1 action_space is -2 to 2 + return 2 * torch.tanh(self.action_layer(x)), {} + +class Critic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations + self.num_actions, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.linear_layer_3 = nn.Linear(300, 1) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1))) + x = F.relu(self.linear_layer_2(x)) + return self.linear_layer_3(x), {} + + +# Load and wrap the environment +env = gym.make("dm_control/acrobot-swingup_sparse-v0") +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=20000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# DDPG requires 4 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#spaces-and-models +models_ddpg = {} +models_ddpg["policy"] = Actor(env.observation_space, env.action_space, device) +models_ddpg["target_policy"] = Actor(env.observation_space, env.action_space, device) +models_ddpg["critic"] = Critic(env.observation_space, env.action_space, device) +models_ddpg["target_critic"] = Critic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models_ddpg.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ddpg.html#configuration-and-hyperparameters +cfg_ddpg = DDPG_DEFAULT_CONFIG.copy() +cfg_ddpg["exploration"]["noise"] = OrnsteinUhlenbeckNoise(theta=0.15, sigma=0.1, base_scale=1.0, device=device) +cfg_ddpg["discount_factor"] = 0.98 +cfg_ddpg["batch_size"] = 100 +cfg_ddpg["random_timesteps"] = 1000 +cfg_ddpg["learning_starts"] = 1000 +# logging to TensorBoard and write checkpoints each 75 and 750 timesteps respectively +cfg_ddpg["experiment"]["write_interval"] = 75 +cfg_ddpg["experiment"]["checkpoint_interval"] = 750 + +agent_ddpg = DDPG(models=models_ddpg, + memory=memory, + cfg=cfg_ddpg, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 15000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent_ddpg) + +# start training +trainer.train() From e71b14bb612584c4f637988e19225dce4a1a26f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 00:12:17 +0100 Subject: [PATCH 139/157] Restructure the examples section --- docs/source/intro/examples.rst | 110 ++++++++++++++++++++++++++++----- 1 file changed, 95 insertions(+), 15 deletions(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 9730c6ba..3fa5b9d9 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -3,8 +3,30 @@ Examples ======== +In this section, you will find a variety of examples that demonstrate how to use this library to solve reinforcement learning tasks. With the knowledge and skills you gain from trying these examples, you will be well on your way to using this library to solve your reinforcement learning problems + .. contents:: Table of Contents - :depth: 1 + :depth: 2 + :local: + :backlinks: none + +.. raw:: html + +
+ +.. note:: + + It is recommended to use the Table of Contents in the sidebar or in this section to improve the browsing experience + +.. raw:: html + +

+ +Gym/Gymnasium +------------- + +.. contents:: + :depth: 2 :local: :backlinks: none @@ -12,8 +34,8 @@ Examples
-Gym/Gymnasium environment -------------------------- +Gym/Gymnasium environments +^^^^^^^^^^^^^^^^^^^^^^^^^^ These examples perform the training of one agent in a Gym/Gymnasium environment (**one agent, one environment**) @@ -169,8 +191,8 @@ The following components or practices are exemplified (highlighted):
-Gym/Gymnasium vectorized environment ------------------------------------- +Gym/Gymnasium vectorized environments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These examples perform the training of one agent in a Gym/Gymnasium vectorized environment (**one agent, multiple independent copies of the same environment in parallel**) @@ -237,7 +259,7 @@ The following components or practices are exemplified (highlighted):
Farama Shimmy (converted environments) --------------------------------------- +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following examples show the training in several popular environments (Atari, DeepMind Control and OpenAI Gym) that have been converted to the Gymnasium API using the `Shimmy `_ (API conversion tool) package @@ -289,9 +311,25 @@ The following examples show the training in several popular environments (Atari, .. literalinclude:: ../examples/shimmy/ddpg_openai_gym_compatibility_pendulum.py :language: python -DeepMind environment +.. raw:: html + +

+ +Other supported APIs -------------------- +.. contents:: + :depth: 2 + :local: + :backlinks: none + +.. raw:: html + +
+ +DeepMind environments +^^^^^^^^^^^^^^^^^^^^^ + These examples perform the training of one agent in an DeepMind environment (**one agent, one environment**) .. image:: ../_static/imgs/example_deepmind.png @@ -334,12 +372,24 @@ The following components or practices are exemplified (highlighted): :language: python :emphasize-lines: 69, 82, 85-86, 118, 121, 124-125 +.. raw:: html + +

+ +Isaac Gym preview +----------------- + +.. contents:: + :depth: 2 + :local: + :backlinks: none + .. raw:: html
-Isaac Gym environment ---------------------- +Isaac Gym environments +^^^^^^^^^^^^^^^^^^^^^^ These examples perform the training of an agent in the `Isaac Gym environments `_ (**one agent, multiple environments**) @@ -537,8 +587,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
-Isaac Gym environment (learning by scopes) ------------------------------------------- +Isaac Gym environments (learning by scopes) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These examples perform the training of 3 agents by scopes in Isaac Gym's Cartpole environment in the same run (**multiple agents and environments**) @@ -638,12 +688,24 @@ The following components or practices are exemplified (highlighted): :language: python :emphasize-lines: 115-117, 128 +.. raw:: html + +

+ +Omniverse Isaac Gym +------------------- + +.. contents:: + :depth: 2 + :local: + :backlinks: none + .. raw:: html
-Omniverse Isaac Gym environment -------------------------------- +Omniverse Isaac Gym environments +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These examples perform the training of an agent in the `Omniverse Isaac Gym environments `_ (**one agent, multiple environments**) @@ -823,8 +885,8 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2
-Omniverse Isaac Sim environment -------------------------------- +Omniverse Isaac Sim (single environment) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These examples show how to train an agent in an Omniverse Isaac Sim environment that is implemented using the Gym interface (**one agent, one environment**) @@ -913,9 +975,18 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment :language: python :emphasize-lines: 24-39, 45, 53-68, 73 +.. raw:: html + +

+ Real-world examples ------------------- +.. contents:: + :depth: 2 + :local: + :backlinks: none + These examples show basic real-world use cases to guide and support advanced RL implementations .. tabs:: @@ -1120,11 +1191,20 @@ These examples show basic real-world use cases to guide and support advanced RL TASK_CFG["env"]["controlSpace"] = "joint" # "joint" or "cartesian" +.. raw:: html + +

+ .. _library_utilities: Library utilities (skrl.utils module) ------------------------------------- +.. contents:: + :depth: 2 + :local: + :backlinks: none + This example shows how to use the library utilities to carry out the post-processing of files and data generated by the experiments .. tabs:: From a164b263fc23edb836fbc240653652d4e7defb33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 00:13:15 +0100 Subject: [PATCH 140/157] Add introductory text --- docs/source/intro/getting_started.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/intro/getting_started.rst b/docs/source/intro/getting_started.rst index 810c26f4..4849f068 100644 --- a/docs/source/intro/getting_started.rst +++ b/docs/source/intro/getting_started.rst @@ -1,6 +1,11 @@ Getting Started =============== +In this section, you will learn how to use the various components of the **skrl** library to create reinforcement learning tasks. Whether you are a beginner or an experienced researcher, we hope this section will provide you with a solid foundation to build upon. We recommend visiting the :ref:`Examples ` to see how the components can be integrated and applied in practice. Let's get started! + +Reinforcement Learning schema +----------------------------- + **Reinforcement Learning (RL)** is a Machine Learning sub-field for decision making that allows an agent to learn from its interaction with the environment as shown in the following schema: .. image:: ../_static/imgs/rl_schema.svg @@ -14,7 +19,7 @@ Getting Started At each step (also called timestep) of interaction with the environment, the agent sees an observation :math:`o_t` of the complete description of the state :math:`s_t \in S` of the environment. Then, it decides which action :math:`a_t \in A` to take from the action space using a policy. The environment, which changes in response to the agent's action (or by itself), returns a reward signal :math:`r_t = R(s_t, a_t, s_{t+1})` as a measure of how good or bad the action was that moved it to its new state :math:`s_{t+1}`. The agent aims to maximize the cumulative reward (discounted or not by a factor :math:`\gamma \in (0,1]`) by adjusting the policy's behaviour via some optimization algorithm. -**From this schema, this section is intended to guide in the creation of a RL system using skrl**. Visit the :ref:`Examples ` section for training and evaluation demonstrations with different environment interfaces and highlighted practices, among others. +**From this schema, this section is intended to guide in the creation of a RL system using skrl** 1. Environments --------------- From 24c778c25b8946e1c9a40a46e18f94f9dfc373d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 11:48:17 +0100 Subject: [PATCH 141/157] Remove DeepMind reference for robosuite environment wrappper --- skrl/envs/torch/wrappers.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/skrl/envs/torch/wrappers.py b/skrl/envs/torch/wrappers.py index 28d61426..1bf3428a 100644 --- a/skrl/envs/torch/wrappers.py +++ b/skrl/envs/torch/wrappers.py @@ -748,12 +748,12 @@ def close(self) -> None: self._env.close() -class DeepMindRobosuiteWrapper(Wrapper): +class RobosuiteWrapper(Wrapper): def __init__(self, env: Any) -> None: - """DeepMind robosuite environment wrapper + """Robosuite environment wrapper :param env: The environment to wrap - :type env: Any supported DeepMind robosuite environment + :type env: Any supported robosuite environment """ super().__init__(env) @@ -782,10 +782,10 @@ def action_space(self) -> gym.Space: return self._action_space def _spec_to_space(self, spec: Any) -> gym.Space: - """Convert the DeepMind robosuite spec to a Gym space + """Convert the robosuite spec to a Gym space - :param spec: The DeepMind robosuite spec to convert - :type spec: Any supported DeepMind robosuite spec + :param spec: The robosuite spec to convert + :type spec: Any supported robosuite spec :raises: ValueError if the spec type is not supported @@ -808,10 +808,10 @@ def _spec_to_space(self, spec: Any) -> gym.Space: raise ValueError("Spec type {} not supported. Please report this issue".format(type(spec))) def _observation_to_tensor(self, observation: Any, spec: Optional[Any] = None) -> torch.Tensor: - """Convert the DeepMind observation to a flat tensor + """Convert the observation to a flat tensor - :param observation: The DeepMind observation to convert to a tensor - :type observation: Any supported DeepMind observation + :param observation: The observation to convert to a tensor + :type observation: Any supported observation :raises: ValueError if the observation spec type is not supported @@ -829,15 +829,15 @@ def _observation_to_tensor(self, observation: Any, spec: Optional[Any] = None) - raise ValueError("Observation spec type {} not supported. Please report this issue".format(type(spec))) def _tensor_to_action(self, actions: torch.Tensor) -> Any: - """Convert the action to the DeepMind robosuite expected format + """Convert the action to the robosuite expected format :param actions: The actions to perform :type actions: torch.Tensor :raise ValueError: If the action space type is not supported - :return: The action in the DeepMind robosuite expected format - :rtype: Any supported DeepMind robosuite action + :return: The action in the robosuite expected format + :rtype: Any supported robosuite action """ spec = self._env.action_spec @@ -915,7 +915,7 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: +--------------------+-------------------------+ |DeepMind |``"dm"`` | +--------------------+-------------------------+ - |DeepMind robosuite |``"dm-robosuite"`` | + |Robosuite |``"robosuite"`` | +--------------------+-------------------------+ |Isaac Gym preview 2 |``"isaacgym-preview2"`` | +--------------------+-------------------------+ @@ -958,8 +958,8 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: return DeepMindWrapper(env) elif "" in base_classes: if verbose: logger.info("Environment wrapper: Isaac Gym (preview 2)") @@ -979,10 +979,10 @@ def wrap_env(env: Any, wrapper: str = "auto", verbose: bool = True) -> Wrapper: if verbose: logger.info("Environment wrapper: DeepMind") return DeepMindWrapper(env) - elif wrapper == "dm-robosuite": + elif wrapper == "robosuite": if verbose: - logger.info("Environment wrapper: DeepMind robosuite") - return DeepMindRobosuiteWrapper(env) + logger.info("Environment wrapper: Robosuite") + return RobosuiteWrapper(env) elif wrapper == "isaacgym-preview2": if verbose: logger.info("Environment wrapper: Isaac Gym (preview 2)") From 84a09e5faa71d1b92bfeda80815e68fcbd8fd0f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 11:50:37 +0100 Subject: [PATCH 142/157] Add robosuite to wrapping in docs --- docs/source/modules/skrl.envs.wrapping.rst | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/docs/source/modules/skrl.envs.wrapping.rst b/docs/source/modules/skrl.envs.wrapping.rst index 17e1800d..12a28131 100644 --- a/docs/source/modules/skrl.envs.wrapping.rst +++ b/docs/source/modules/skrl.envs.wrapping.rst @@ -5,6 +5,7 @@ This library works with a common API to interact with the following RL environme * OpenAI `Gym `_ / Farama `Gymnasium `_ (single and vectorized environments) * `DeepMind `_ +* `robosuite `_ * `NVIDIA Isaac Gym `_ (preview 2, 3 and 4) * `NVIDIA Omniverse Isaac Gym `_ @@ -220,6 +221,35 @@ Basic usage # wrap the environment env = wrap_env(env) # or 'env = wrap_env(env, wrapper="dm")' + .. tab:: robosuite + + .. code-block:: python + :linenos: + + # import the environment wrapper and robosuite + from skrl.envs.torch import wrap_env + import robosuite + from robosuite.controllers import load_controller_config + + # load environment + controller_config = load_controller_config(default_controller="OSC_POSE") + env = robosuite.make("TwoArmLift", + robots=["Sawyer", "Panda"], # load a Sawyer robot and a Panda robot + gripper_types="default", # use default grippers per robot arm + controller_configs=controller_config, # each arm is controlled using OSC + env_configuration="single-arm-opposed", # (two-arm envs only) arms face each other + has_renderer=True, # on-screen rendering + render_camera="frontview", # visualize the "frontview" camera + has_offscreen_renderer=False, # no off-screen rendering + control_freq=20, # 20 hz control for applied actions + horizon=200, # each episode terminates after 200 steps + use_object_obs=True, # provide object observations to agent + use_camera_obs=False, # don't provide image observations to agent + reward_shaping=True) # use a dense reward signal for learning + + # wrap the environment + env = wrap_env(env) # or 'env = wrap_env(env, wrapper="robosuite")' + .. raw:: html
@@ -277,6 +307,13 @@ Internal API .. automethod:: __init__ +.. autoclass:: skrl.envs.torch.wrappers.GymnasiumWrapper + :undoc-members: + :show-inheritance: + :members: + + .. automethod:: __init__ + .. autoclass:: skrl.envs.torch.wrappers.DeepMindWrapper :undoc-members: :show-inheritance: @@ -284,3 +321,11 @@ Internal API :members: .. automethod:: __init__ + +.. autoclass:: skrl.envs.torch.wrappers.RobosuiteWrapper + :undoc-members: + :show-inheritance: + :private-members: _spec_to_space, _observation_to_tensor, _tensor_to_action + :members: + + .. automethod:: __init__ From 373e124dfcf9d2d8ffe17bc947fa13ee8c0def7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 12:29:44 +0100 Subject: [PATCH 143/157] Add robosuite example to docs --- docs/source/intro/examples.rst | 38 +++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 3fa5b9d9..98624d40 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -330,7 +330,7 @@ Other supported APIs DeepMind environments ^^^^^^^^^^^^^^^^^^^^^ -These examples perform the training of one agent in an DeepMind environment (**one agent, one environment**) +These examples perform the training of one agent in a DeepMind environment (**one agent, one environment**) .. image:: ../_static/imgs/example_deepmind.png :width: 100% @@ -372,6 +372,42 @@ The following components or practices are exemplified (highlighted): :language: python :emphasize-lines: 69, 82, 85-86, 118, 121, 124-125 +.. raw:: html + +
+ +Robosuite environments +^^^^^^^^^^^^^^^^^^^^^^ + +These examples perform the training of one agent in a robosuite environment (**one agent, one environment**) + +.. image:: ../_static/imgs/example_robosuite.png + :width: 50% + :align: center + :alt: robosuite environments + +.. raw:: html + +
+ +The following components or practices are exemplified (highlighted): + + - Load and wrap a robosuite environment: **TwoArmLift (TD3)** + +.. tabs:: + + .. tab:: robosuite:TwoArmLift (TD3) + + .. tabs:: + + .. group-tab:: Training + + :download:`td3_robosuite_two_arm_lift.py <../examples/robosuite/td3_robosuite_two_arm_lift.py>` (not tuned) + + .. literalinclude:: ../examples/robosuite/td3_robosuite_two_arm_lift.py + :language: python + :emphasize-lines: 1-2, 51-65 + .. raw:: html

From 231a4cf777fcf9690cde4eea5197456f7c533eb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 12:30:50 +0100 Subject: [PATCH 144/157] Add robosuite and Shimmy example images --- docs/source/_static/imgs/example_robosuite.png | Bin 0 -> 228064 bytes docs/source/_static/imgs/example_shimmy.png | Bin 0 -> 162134 bytes 2 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/source/_static/imgs/example_robosuite.png create mode 100644 docs/source/_static/imgs/example_shimmy.png diff --git a/docs/source/_static/imgs/example_robosuite.png b/docs/source/_static/imgs/example_robosuite.png new file mode 100644 index 0000000000000000000000000000000000000000..5a6a9a83139d8a334ea8f7bb217af4e7ee923023 GIT binary patch literal 228064 zcmV(}K+wO5P)zxzBy8cS)8dBO6(^5ZLBr7>tcE2Ac_p zA()|~zf1((#KZ(YVE8l<9rJ;R`62yz2M z?yY-v&RY3APp);YMDMD5?>T#~l`HefCv&~*b@uW9=lB0_GY<=Ef83+}jsC9vTVq|? zsiptYRb9)!xY|#~YQD68t(dJ{R4>r$^lB*^Z{4zVmg{d@8!6k+N=gdU?Pr!=-gKUwz?=$HxyEEJsI2n>_aS z_xJYp+VA!n;u=-D5AU*``g7+rqq6QqrUL9Nw*uhZEn zv(;NYk5(J(?Y7e*n%uFf7faWy&Q9(`cd)?J1@yS!IKx?UowDd`m*?ja!K^Rh> zG^FBD_?XEv&}YI&{K>%C#m5ZMy_!IVU)R(yT$Yh_q@VW$lNn}9hel8zW0a()8M6V_ z8(S+X_X~?W>o6?kXX_4srE|JI1xU4MWnnX?l_Sk;WItHWe_7)^KGa9?H6(jaXnYGa zve3Zi9rPK^WCl8~58PM!BrbwT=XtcGHNw1%XOxjR>C?~r1N-KpH(;%TMy;S+E)2&YQm@(M<@UriiU+Q}1 zAm#+y`DJPdl2j^nkuuqCwlew`)+hb4*j}hHn`-iJw=p(sQH_L&Q{ASGY8db;Rd_bt z(}gn!*?=B};(YXvBh*&;EH%a3%0D3D3gA`MD?LzBtF$IkbPdQXG=Zp2^>(v&@5#Aa z(EkADl+&$Zb=wnDwjN5PP4*}BuNESBA6(p!vQLk+gq?i;X+you~u zyz&s3eT(o{APW!8AF`+rxmz=eEfwke);Abw|?fQKeAcU#!8}nB*!@O z9EyOS$O$XEHyXmuDMSP_X)d3IF($LW?d2Ae`JPUQ?6jKl8&V)Uo?suTkVyd*-qgl+ z!g)m!p=!hNp2Js2l|J4BY{7dpljnOH@13}HT5Z@EBpS!a6*$n$D#pPdTQU6Y`t%k` z$%%-4*s+|J zj+xwAyqE90o>{8@T2*BoBSJWf@CAiMR1$z$L8}QS^c+9K9CK zsgIxY7d@7zG_(c4IV?04Zr%LZ+6K_3qdT`8I-N6Vw{xWCqs7(abJ%BU6xNanF5IlZ zdHWGIwK&4~+;uo1OdSnoJkTmZ4P6(gJajb%+(un-A~@*tsWEe1Yrpn9BXdGr1n!f$ zS=qFa9_Sil*b#IDf+08-v03uZgfwKBgT`j?x{h~U#o?L8Z z&_DrwNdl8|HAGmKxAe)gubHGGkU2O%>zT{h4~>xlq~&a~BM_Lc!!5klRagunm6sT7 zD3bXiFeO-nw9JvGAqFPk8+Nsy)`@TeK{Iq`eLaC^Q~OG zfHNmn^i~(?(|XODH*bCZFTc>b(l*U)-D-nn(Qq3T+uK&GO?=zi_PaQS!k&F|EBx*6 ze((03J0JVGpS^tLa+}uV+#!(npkYjtr$VS&(JYhw5!q1yMO8@fd$pDE7V0&!sm?bR zqV1^0KHjKKAVH+1b{?Np1CvFY^3U7CzK$*{VXn!%F)2d_rVOZBT{7gN`ws=h93818 zjH5p3I3s@4$3Y&O*p%=vGK;hNSy95!ud_mFAlc;nd3*rAV-`+)sIaImfkQIt0zn)WJI2<2N$q1ZH8Q3)pt5B^LF zL1KoR+gJ>kB_pd0eda^u%+)TY1CV1vPw?G-9sR5o%aF;t^b=Iq~OAiUQUxq^Q z79OXkkGhz}MoW3cl*x{)uqqUT2fs4u`yn6a}D$f>GKRwE(9;U`%;)Xb5_w*L>f#$v6UVVrZj zqERTasHdBInOKKDMtuy-b{no|Vo51^9>xYlOYL=?_37eFZa#T<`u~05i}&u`^X&Or zSNbpy^r>)M;*@CQv4Br`6d(GTW%=RDuYUe7|GLeaYh5~|1pWZqjt>Q<9Z2|#6U&_u z5}_7?yR`Iw#b3QrWT(P82QUg&-SyXbe7FK8O0#TUbV>^o#wfEE#SCMnrVIowPM#*U zZHy>&LpH0VPADOs!te`Ygr1U_!TDd_wq`h!=}f zc{CInUk+WyTRH5!zpQv`>=%+_nU)CiTwVA~gp>|ytKUO0`4N0g70@`KUt=Bmqz}_Nx}$;_?4#WPDZwl392B>1FkD2R#HJTdKEK)ZgO^} z9%PlE>4{28(G4COsG71TyzE*##nq@qz|vgEM4C(2F1j_IzK-N;Ke6c-g76R-_+B|= zCBT_ukbzBj78P_PQ4n2~^lN1YsWXc&Nd?k2n@@fEQMqbIALBY5LF?Cg+G_$@@A%b! z_}a@azk&&FYho{tR`KlYEENA|c|3ab2)A`hZA^j}O&QyD+4;crj} z!dm_^=Lm9_Kk0q=D{aD$!CiDBTINVdVr@hWM~?2F<{wF3d zOHOGHA?yU96Bx_afxiT~AW=*O=eg$(7&VI9B(KN03(3$NkK?dRoxD(dF$)n{B|K$<0^EqW|Y6?Jh+`XJw0ugYATEZHI#uBttWo<`fLC2 z^=}5JtxiV6wcabZ4)_qTX)51;;Bg|1Lue2p`pfh>?ROAlQwuqw(S(l;e6sZvEscWu zloxrdn(m`+p`YnGFb7Q>5?)*#x=XsUB$YQuLP&*xk5wH?2zEKl7cwDJTNDGAN;fM< zrT?lv?9))5AoO3tM<$m(I~EXs$G}q)XGS7l8+^Ic`Qfi@E%}Vl$_6{Pb3&Pb$YaaS z(jAWCvlT#rAuLqvRTk+3MNDOJb(KDDR8jaWCAjg&$*t*Fq^?2m^;R&)s3EnfNgx-X=(AFFT z605OsGBbaScS^A;tgxOb+eOGS!!T$%v0uLRu>uvI69UUw zuWMx+=)2Z+;d`xn2SkAJgp78=b}2^FxBxPgiK?nyh!)7r!2r`QIRx!20=o08tFZJS z13OdIHh()f2nxhXj7FgSr_Js9dTF9MIvO(}4-1T;>-4=5ilCEh!<)?>@}vn`AY&4f z7ts>*8R67lv^jmZele_Fm$7Em!=Stmea7Ihn$D?>FPpu8{NDF(-@bkE;>FgL+H_zW zbFCzMVuCWB%h7WuZRwO*LaY7tZ+z>q$F6;B-jOMu^W^U zU_+XXdfI4~FR`YHX^MJWwpLl($oi0oNgoqB)n-Jk0?yj(;1d+erGa(jCK=}}g!mYy zJ8MB56Fx&!=#UJ^r_Tb>NHA1j4 zO{i5Qu2?+>X>^+HPmz=$c97;NK?n!*c~Q|$5m0=R5Haa9qnEmKwahh}HWAtk5kGk_ zqbD!u?9j)^b-05YLC{;oT&KKQnP*vS|Jvw;wIo^MHYZHJ0O!uzN=(Ewa1qs zQn)Tjtp4;1IVh7liB6bu35^>CBG?m8&dT9BChS;d>1;E>5aXL#^(8f(DMRjzJq*C}sHQ@{I#7=~Qgwc7N69?G(rY-h!)M%r~>=^V7 zSzW@i*(4hh*wFNJxH(_1Thk28S-?i17|Ex%@1Tz*0t@vL$Ckw1`wot+BV4m1L+#At znrfh(455hAlaqh?=YL5=*9v>YKuuw5C7BM&-WR_y>z#Ywl91;6SHJr8r=EQB+O?}O z^-61Qdg9=z#>;(k&C1yBrTZ9v#DM&}f|;PAu?wg0$%($2`hu9E+sXvX#7M4UKn4u4 z$q4l8UAW3?v?l~jE=41uY(m6S8VTx1M_eX~g&8pDQpZ@T&p=^V*Cz+8Fg!G=#rdRf zXs*1Y4E0Hk%!lS(!oq}*`UFRZN#~l@f$rd=Cc{a{H8|A<_<%7NS_{t%gk?@h!iVz@ z*BAmlS8iA2z9Tp{5MB`*O>-UL34L;*s7pjI#hE4SD}Ao7{|uZ*lP zCQejh2FL`G_m41f*3DYH*+#h-3?X%1nQnm$sjo)4j<=-xRTBm|c$15dkOO4lBoZoGfAs5-v zdEf7Bj0g@JaZ3|5U2D?FBUuvmxv=A!Ox{=PXf6Ho!5Jjx*sDx^BX?y}2$eAo?~n&u z%32k1?qv*Gfu-q6_zgxA>VzKlwlcSG-}&JWU#Y2Tfq+3n`;F;i^}Dg#>Ed;v2hti z?d&!;d?eczFLjItjJ67&tc7t{uIJJFY}PbUT*wKj1ASZ$WMPZYxjv5KjtX{}hGCz( zLVd7VwV+Ru$KWsEfN<{lh?&cwraNexBozKm3v=+j5~UQLC-bv}2}O&wlao_d1_N#; z8V!`{=9@{cbT0h^aX8>~;XV%%>1R4XC(-EWlCmMqO^Vc!$uqFMO>j^R2wvPK74TQc zQChvJ7IzN4bxkKzxnLtoGey4^In}7NS@*JKgU`$M4%n((3WgsiHTgN;;CCdSels0;H zN@PZGpidE|h}SPmALnQY0Q7O#%dfoh@Zm$;yRFG<-MjUS#^dAT)6-M*zVilVxT7Y! zF*+|P;l97W-{#XdZ{AwGh?Y0ca4UAn)zg>`W6YcGKbb;Ti9SMJGsRCV~A|goJ2^T0a)`Uc5*P zlh12b9|^AX5l}pkfrdH4fl)+HP(anH<`C?TgEyqp32oe5!$r`>I%IheFD(>EXjVAGqQ#MA=40FJjJ^Xb_oDOLtr&;!| zTC*p_>LG&dPAtnhDCv_6k{Dd~$k)cm4>lI4mcLrhyRETBMIIt^WvSwb!TDlEUdn-D zn!i-PlCn@ZhuAPb24lnZX_j0l%R6~F_FGcgMs40NmO{Sptk3*9k*P97Qum$%l2+|4nLI*umz8*mgDlGoMrH`1*T5x zuqEM*xoKx+AyYxm{DfX`lJ4MDpT@Z+4Uv>sl$W#lK^{5K)UM$ikYu-Q>W_mSRd?>? z2(i?8kHgt_sI4GNIOQ7s1lp!=xCBf}g?hwXW{H@5omqAfIu1q})p4%a>xPS0f;X=a z1wQnOkZqC{<6i5;(hFw+*-wXttqN17{dEvB2t`)rsHrR2xhZYrO^(ULD#HV3#)26n zK%XvT;{yxe&z?;1uxUY$f+CMu@u%}Us81278|Nl{QsCnHxQdA$ydB^;12P9RM9*rq zC9iuS&F&5JKS5(V>vnzg=n?5ej#&i@-<2Ue1%jFL-++&ncYkrQbn_Si*K#;>q1#`GLr-4#x?KRFO_Rpt*-!JJTyU(kBYo| z_wL!*w#8eU%ZkDMz{+#$aSsmgn7Z{EE0^i#3zci{TR3fKO;PsK2koU@z5%1>I(TnVm%72PLGX1yFNS?rLF&4R16 zIBVr1UxzytU#7xXIp;uub(|pNi~%|X5jN;7&LyGw94hyT2H7Z;g7aAbZRE$7JJWKu zz36fWvy&$WA+hRlOKB9AdAgGMubZ6XC={#$fNv&J_l@0lEsF9$bvq@g2F(@J_T&pI zeLx898By_x;`o)fxNCDtSGhl466K^1rXYEPIoFuIvREF?och3SyLx2HbqDq!;IE!tU2(kYaJWzBWUS@L}FS$ zL4n9_S@_6qlFS9Q)=QJTeZsK~E(6k8RL7JY0u47m*DdFa2qFe@qj^Ijx9^n_K-_?!WQ}_=6#*N&L@skDE&(^r#>KXLpMJ@&OuTR6r>UP z<3w)N=Yxwp=Irx@k1>JesoXr9U5cCqiHTC*g0S`vKIe*nRTT5tt!%(6k+8>9v)w9@ zM^y=BFSkfws@}#y@T2v`FGm`YoA7XT`HZt_BTEOFUG~D40x(s{XNhMN_n6Ca8SG~S z#Y#ykNG;R4rN;g3Oy00oKausEdYc$jQta)Avq}`*L~*jJ>h>#c3baD5*U^Y23euep zSVD$Y>@plns8oWHQgwaSSWG|w1V?7PvNvR7QV1?ARyshfa4(U=+l)DiHpU~HN6<>A z(jX-Srqoos>~#Hx`=fUMgF9?3dX|}N^c+w!RhA1QrueBBCST*d;yebxvLnY!f%#Of z|0vDCh(~8<`_{V?t^}e_WiJ3eBs$wvrw_qB2jduFa2skWi9H?|>P9gLFY+#AjX9h; zg9mHBJKtZgw3CsX`k;o=+yI0(K=U!OxhAQ#Ul6N{N^w~7QuVGmT{&RKlIB(q#7HkJ zkMcM~W7ONE-6VbULEAi}INb9SwU%zpBa$O%!s|meD-7o~_kd&qoM3=iGogI7k`Wv+OG1ZrO!)4J)|hz-Cjqo3oOP zE)27QAT(km7?&}SxmX7MQvZ{?Rlsp$KbV5o`mTaJ&^u6{UTTXNOMVV-?UH8)AERh# z-HyZuO=8vxnZV1Kaf&>cv%>pD=A=e~o*qhlv{jkpN#UkjAGUksh#`Y}<$OHY4P;wm zT%M(qsfMj}#vvuPjAUkziK_;V%d!meQZm9gN7>teBs`2sOn_67d5wg! zsNmY3<@k)NYpBAxbedG;>ERcumA1pN2~O(c>`VstaEdo~QlT~H>e))r=J7g5XZrGz z$c9pC=@Zn4@YG`-MP_Cr1ej0hpQT!X*6Buw9;P8UyM;g=9Z8n`Y>2|@%1jMR2dD#a zQysglK=o9 z07*naRG3-m=L|E;y{??ZB#dP%00i3+ohFb{98yX>Njyl!Fnrb+-0z=N2$(qmhNcUx z?n>Q6(#Dc(1X)|kWJ$Pv4Kzdl{`}gl39UZkEchzGw z@c1g*QCzU-#olJ~+8b}&ym8~{_r7O6+Zu1+!CxxhA!eqsfezmWiQ>gnT9Qasqx~eG zz|3@1P8^{$Ek~e{43sIHgW6>4E4*jYGX16I8Q{8!)yF*xJ-jG(*xviC)-B;gDyuaWvLN00^U{F?f982_a zh!WsM9RRM;OlgtuBuH6fB2Ne%uNn}x!@yCFg}`gVB82(}6c$zCRux5h>eU4=!|ySX zr$3IgW6H`NHTmp^>@;>Uv-65LZVw(j7+cX$5wu$=g(*WMmOX_N_+&aa%8M!w8H+Fu~f^%rYt86+(#&D$0TZC$1n z&bD|XBYzSEuU3t2Z~}})#MB3F>S_ckQu{*DX!7 zW0wuH{F3V+!wI@!GTbJ0i{eyzT!k%@?_jUjY*n@ z*wJ5L0U3+T_|l;klaTi6tFPX<9_as5lTiwm7R+@e)-vJYN(m0TI*siAcsnZp7sZ%AY|9O(8ZMX_c)kq3o2 zvw>hths#+QO_E3bRDn|f)o67@ajLY)2Q*`H7TeB(T-N0H$kq65%VfX+1p=9C!4VQ4+gd z+_;Y6?B{}P$oQFhlhg~8@}V;ZVJO4f1Q_9ZafWQc+bB;4c8qemIFxkc6{~QQ1_n$P zzjp00lYeQXmcLqcbiJ+L#m(J|7cb(g4qC5_z0KI|+zxc85aRmO1WkjNm*wW0cedLV zqY(HTCiyT>g@bmn07EKV15g$;I*V?mAk_n;=bXjSEOm?EXMTix{?uY5PdeUvz4eMk zYR1#apq1aJB<|2wxmfoyu#!sx>IQ*`^&1MsaZVq>seXi4lW}%7Zy7duM$N`ZyizSQJVNL>6!bqwr#-D>7AiOv!mYeUSfL zQuqEzSxFC?2Q(Gy)B+VV zcdj$*abrhmDlfvQ3j4?eD((`%C8+`@ujT<*ET)6P`(@(dPxXX(^ zc-~_*h^M#9-rnhJH`bF!%i%$Hp6)jz)PqFn=LTns8w(bD>kf!|6gey4tWJ;FS@`^G z8vjntTIejBoi*z-ju_tP4RM&4Cgii6SjLfU;Ujw<=|Kzw$VmC+F(YVL9hM=lJEMiyc1Y!Mm>oV-T63&46Nj6qIl4`8wffUtRP%^+C` zPA))fD_H>ot2qW)CN)^>?Y_b~0Xu8UN=xr|d50=ZH30x+F&LGVK$|FqIO)4>k;gN1J#JJ z2hya}qF1SJ)=q5*TApLrP|d_&NA{^{Fr6KCCV;-qb1NzJ1v6r3}?3_ zeXR5ufb1MtRHrn|0$JT%kWnzff@CC>e7)eqFo|mpzlH+m@ByC8Arn0H24UeOv4P1Z zXO%Kh&dT_KI@BkV4z&d^9htRgZG?;Zz$Yf3@vnYKpDcnu%mfT)s_$|Zy$)w(!pN0? zg^ti?5m^#-W^ihwLR4Qt!1Bj^jN{lkYyn+as&E}86C}=IKpvA8tH8-OM8>3#P0n&? zTP?uqv;6d0fSDChu!^@UhsxtEMd&1S7w)>_XqaZ1IwO;V+;)ZPGDs6h`-Ixmp(qnh z1Op$-`#M9{qZ!4dSfG>QMi})G8xpaqB&;YW;O73v6>VIzW`DzS&Ta;l*f2QGd%3ho zMBkb1_K3-{NOc#o(?fBlcTNjP)z6>wDX}D zN0H#46Tjks$W&hd=jaKCcf6EcDa1JBTk-kDC_?NWuK$XMLR{XcY1U}T*&#_L)hLxr zX>*m(8qTZ;K9BMz4X0{(?(xU2J@X?!QcuPJbXaHZLi6kY>Mn2VdfRV&g!kLu{`U3j z*KNAdi+>8a$#LB{nyXig9pB;ISOqbfYA(&QM#Ws>=;fZ^@aQxG8#JOw}BJl z%IvHluyw0%Xgm3?`gD+t5Dni+9dUNmjI7*6j>GZ!Z%~wL+WFw!era)S=!J4}R(Whf zLHdmAJlH&hU^I-Gt#A`OK@UXP!w$6wun@m(iBY7Bb1-+0rFHj*2~AKd?-2Q8$XV#022jI1O+%dvsQiM39MYWp-9+ zN+grG9Kyg^v!Y#_Ojn_`aFz{r##pa9N1*7WHN}&`5Z8{qDz94moU1Wz;P;* zt6!SR5}ZEK-s~-V8%-P(n?~7LKcSGbvMM~Dql9!5OBy=VR5Q~dm7Cn7cX`7Co|4e6 zPjVVhGetHr2>}|lC^aU`K54GCK*MBbpDX529!@B&BC0~!J zsHHJ$RHNevD@OV-+lq=qg!A^16;X-_5k}=Fzp|iPNhy@4chsq{PN(j<C&ZVpMAE~LPuLu-ZnXIuh)F6k66KDdGzR~-+c43 z+5JELvpYZjg1`Go!#6RcD&UfNt+TnwOd2k42@a8mvw8U%o*08O(4WzjQb$ja!Wm}4 ziKX1Z(JMYRmvR@YeTWpkW;s%o!z%J-@*yH%Q4dNQ;J-c;3mZ|ApJPyMtg4_+`oPKP zuHQPqVVRWWE_8~739gW^oMi>*r;cgh3XnsZ!5v0|_ZD%lXlHTCnI%((vd~J9Av8!@ ziIS@f>Les9E$z~ClL~NRv{N0ti4m6>_6{ko7P>wwJ6A(kSpfp8#T>2bqg@&$j}Fae za7?FS*l6e~I1X~*##kuYElN8jhkv-IDX_y#&cZ;J+{j}M?5Ls`v87jXA7S2QtRdL+ z0a$ohBLfFw5&b*0PalzhOyIjA?DcCgc&+VJ%Q!6Q4|9oc>as{4;K?6+yGGXW9QHZ#C`pLyn|e(EFG&3NvR zA+gmr-ebz!j+`&;!Q^+G@|GJd2Ud^Wy!n&&?{Ca5zjkB)um2`0cHt>@C;AEp+`9s* zONmPEtt;8+LS={xuJoGoa89K%ClBs z=XjLknq7gRNeGs+2)~$9Ozy@YXAb7z&0vn>0pAL(BoQp3m$j3CntHk4*llY>)!WcS zEQ`(y6(FPOR_jEa{Ih5=f047;rP=U1$%){FP%4!Xw5AwG^ig$nbKai=I__D}*Q8XAusEG7=Fp0=h#D56oScwpW!XFkQAyhnRrnpQKoX{`~hO0WFqrQh#Ub?!{lr?j7kO?8&{_&r#I-VskX^G z1(JT_nw67nB0KZlvW$4^$5u-*#wYQc4&{KQI5&|4ch{Yq-Z&hklJqEWbfP^x<`t#8 z=tStP$Txa2RkXT@!t?mjtO~=a>a|gsI^Z$LXwQV<%(hU_NT$`S4tb=G3Ntev~;W>dh_7!@GC?*B}1yT5h<%V0Pud z{<;0|<>;R^bkf6W#8VM5@X73)fM@|4*0O`&9cScUtQD_2%s`0ASG(XCXr>s{q(^a- zWtUtJ&4(p9BoN@fR4)US zNE#;$UJV8M|H_7#0GrlSjke4|%lf&hk{2-mNbzU~jg^PvB|hG4e*Dh&gM$=O;$WQ`BZ+ z4u+B_tuWstXDWf@e^-ITat1)m2|X23>U*yCi$*Y5$b@kqDDX9+72?_AQs3c5eOfH9 z)59JlhqY$-ifc76@Il;!NYI!KV)D0-#5;-XR|1U&n^t3^Bn#*z|C(F}r;`~j=(!b% z6s#!gR z$EY_pw_^8iUPO&-Y^gj|RP2AowgnadM{=%^Y{s=k-!?5XAqOpoQ4{Zrph3rI%|8d+v8N zUy?H8cZ(K1u^C78+y-Ra9~>Y5^7p>?KOY}U53}uhvaXMQ=BFJF#q;dpS%H(Sh{1qa ztLt(xb(wzmBXnbes0tjE!Qx7K3eMUACx51y2IWd8my+;6jlfiP21&1@^noE!+1U`n zT}Xa$q*fzSt*XChj=~_yfnvaOd7k=+IbGlaQ-tB9Sj0JSTyc~ok~#hk%%Sq>cpre! zQW@`LAS|l_lXL17Xgr5iGC2`|>RBK^aip+{&_^HStj@w>j!jCX0YQjjgjJMR%crGV z>dadku?tQfJ-3AW^4|ex5jdmP#0*cbce~a!UT94^jJFv}rHr|Htn%GVk1Z>-E=fI(27QsN(EOzK1}nm*=CBQpyPCu{?$}NQVck+Cn*s%a@s- z)kuyoDZ%Oxq;!9Fr3JQRRShs95LiPIDk)3V1dU{@xMtS9>ca692nskipZ@fxD7{R} zBT6*g&4%#;v1A-;4Ag@&Xu27VP+UN5K?rF@A)-qoNe<_z9Zg6$Z2@P^)W*QeCEM5; zEj8vhi4yI2ihl#sS*PRk6n_gD-MuqAXmhpw{iKTtE!ND{56+CypRq+Yku6hRULY*C zprjigij*}Vl}mzTM(J0s>>@+1B}B9(R(%IwO3s-|41J(v85tU1yIhIo5KzhpfnU3N z_3yv()iu9p$VPc$VY>$|dT$axh1VbQo{F`Q{sM+>&#s%JP_dfrWkwQ~BU0K@G@z^Vk^+cr$xVD=j7d|2RDGOEAQa{jLMPyNRB4h< zvShj$*H&Dsyj8u;cYnFuO#MTQ%MrtlDh9X=aT_c=&ak@rV;_>CnH$ZK22KbuHWVm6 zNaN?4?^oITV);um*MTdR7VE2OF+@-m6F&XQWIL07*l$ByGmMxN11L4-=vILT390NNdGb1f)Iz9d8e|ga~h^{I#9P>%nh^l$*;NSpBFmyEYZr{HBsqcUPckbM2 zlW1)gSI}&?kAC>l>CgR~?e(wF!;Mk|Om>~U?_9JT1QjBbzcECM5D^AAXdkH;Bjl() zcM3C-Anli~DR z-gaD(gU|{(MN@n^v0;l|1DC)qn-zLGNG1M;s%Mkt6RFODV>3AYZhQZ#5R_T9%>;_| zBJ7HyfQ32q7YwVOAY2!SLpBFgr490OafywJIk*kP=}{L}OtA@^!per=Fm34~I2a*S zewF%2f~8K11nw8+xsMESyoaBMi>j9S{7r;x!AerrF5U3`7mhB)0!$=Ver4k=sT~Ss}rX^w0P%DTn~AJ9lgCh;6cN zXtiXGVhrH+(T3OF7U<4011g8a$(IylfjddMl@t}|W@DcVnxbrvX~GmXk&jE_o0{jV z5g$5LyQULv72EFes0!o>%*Aup((qFjNewtn(fJ-rF{HV$qSFD#!WH$f zOGlqfFhk_$6_c(}`Dkpa>Pmz@Mj|_S3}-rUtCK+;Oq#o~=WzSgU-{)Hu3S0WZs$4c zxntGCG-{94Y2OOjAFN?{m$%J*`!k>2uV26ZiSK>y-`%^{I$dlQ@22PPHV!{DyZYiw z`(OR49rjNH!?z%w#$kYBIJeRz1{{SL8DemEO@PMU(gH_(U~HH>*PG3Ta6nI1;A*xB z4=%gT^5o`tBXyWcb_OE2Kpr?Aqcx^PhNYAxJ8}#_p)krx9UOU{-lMsS24oZ2QObmg zs7a^%W*+J@j!}W&aLFA?qJ#q+BRKOmMISGv#xyt5`GEJv_<%QdS3p^@L2%4WZJkq6 zrTA03y{!0M>c~=~ldxN^lMy#^`m5xb-7~lXNrsixq<0PYKW{z{eopVqlP$mQL8%rF#IH7}w(^2PYU55;*x1fDp z-jPQ;M>G{OkN{f(xI^0+8_7>%6+?(c9mdJe)S^5#$=Q(#v9V*;z<{|Pub@0_AYpd- zluT8N5qLgxEskS6E1+0})%uD_)aC%t4gb@7qg{-iG8e=IMEMvB#ki-~Tkfl?>)6Cb zJD|i$*Bzrm+Q;1aZX#z3LLo>mL%eCj6p@ zSZ~R~#_E+zZQuYCPIY4|ZdVga{Ea#TKc(=fK6_g?h&U?Gb{E|IvHbRxQrPR(?_S@0I;U8QE-Ohpy&FO5>*`BXqVQF zzUwe@7t$2G1Shw%rYxEpiMH?3cp0K89@Ww7q8eTR=Gkx;U3<3RXQWoQiN|o&+G*Si zBdoaE;pjVhxzyK^HZ~53r>j(upI9T*4VrCuMIZ|GSZ&PNuG_PIBgyRe`1sbXH{ZH@ zuZoau)OxIZa8os>DPHfVh8eq^0?2bqs4J z#bQanqA}YD4`xZ!%f%TP9^_N#Ysq(b+^+Hw z#Qn^&C~E-HKrO$(43a1lO!86_&16$vN^9~=>P9Q^8gB#)+Qo`wn}q-XAOJ~3K~${2 zH{=@RWwKK#x)aqGK6c-wk0mF?zxOS-}DWAq8AU%@cdkVJKB`@!o`a( zzw*lQx4!jzj~;#K=;&TV&6*ea-Sc&u^0s;ImFurP_|O04_;-HW@??Uu1-KwKXe$aW zNF(xK1y*`W<=S8%GUecz4N8jX1q|!5W+3aC**f4t=^jqGU8mPMJF`$KPpUb>2HOC= z>>I4mx^(6wqS*UA+$$iC7%=Nt16?wLP-DRKRL}|i&vsgCG5Hh3!TsPgVO__F4qRIa zTm4Y<%bzt(=lCgQGW ztfm>hiuCNzIyP7{XBe$#+mNReIz*~<^Ovt)-P=F-=C{7}jc(VifoCMIE}B4(wGZ?Gg_$-$Wy7NkhWu2XUA{Y>Cl4D+ zPjwhrgEjUXzy-(?rO-Ft>lh+(9^m;{YvYIqHXDi{724k3>4OLBcKd;KZ9V8eUbsN} z(alSF^uen>R)p2IR=!PhPaZw`_BX!qOLy=7`wJJYw6_l*ww}2^9HaeTG1dmF`mrQg zkIb(9-QS=5@~5`Xz7J2OSn~sKG)*B)HaR#-)Ghbgkl;iPM9&E^<|q-no&r|cxn>9M z{KoR}@Tr6gz2NxRm}~e|I5@v_h%ggulAh?AQeiGmFcYD^5@el7A!K9!F5bW>Vm#7l zcxrY>#YX`XCS8IV8MNwWS3r@iZ=&5WBY6Dwpli>qO zCT{7einSzr*Kid-rLmP=JD%#b7Q?eXu>y~XP%`RiA-0?+ga*OwI6OLf?6D`VUVH4j z-~H}i{Ka3s_~MKBCXYtfqobn>d0lPq18SDl_xN&$ksUeG(lrJ!!-HsN{#3kPtu ziXl%G8I(~>yg(fF=Vm}&NLM3uOnTbxfy>UaUMJ{d)NpdiOE%@Wi<)R~S9(Pmtw-yQ z^q*FF$r4aYet88WsKN}2C7r2ZX=4C?N)Bp+rQuT+FK()JF9Js7=ZJc_tdlIHB78~e zQ)#v@=$vaxcqh$_v|mb(DsHBsnX`(99KPE~4Pb);K{bQ7I8pJTNAY*GWYphCIw~VC zj=j;99Gy9<^v(yrk&+Z`#3Ywn?BHO(Hmu_!Un@-tKJ;GLz9Bo+SnrB9+3!w~<9Xf+ zR~eM;&u$<)VvRvq0gQT1im*QbnLHbqEQ98$PEJWPT3NnVToz#^h?eE})|=PA@Ha=> z^_h&+*ZCa5lEy=-kMpg~H*ep5@cr-q?~fk+FPAT0+}rCzn3ifQdk(-^7xmlOWxG9l z>4&Eu`?=Qfm-?d>x> zNs#OJdT-K#fp=(7|dZ5ZsjKLWu)3?lfM8Dp%}kF%UY z!%TCTMMf!^!_o~N>k-gUBLFAIMB{UO82GR~fpgwd<_f90fQcI=2DJ&eZ-baJ-LTIa zBhYGiq)g5;dwcs&yzAXhzvr1Y^Ze{*Kl`8l)Bk((=FR5t*2^9qw(hv~r-#^%u*nG} z1Vs>qhbJf3ufKNV#?5Qju3ox)i3?Ela6a<{VHi1X$QEl&2!UM6xUv~qU=i@R=uTMl zTcqh1Sf+z-zy`Du0(hneaV~ju2NFhyhegUiaRI2{T2Ei;;KYxYN@~A)=huKD&>vTk1@ZP^ z|6r5bYw1#B9-kF+rDrW9!pCI-H)voOnB;AkYDIHr}EQ5 zali!oMf#h}98^Z`5<&a1>}`%;zj@<}e|xybf!U92+ng(I?Clk#F2optdV8|nK6vAe z=WpEj-r3fNVh4PXHG_+*kgeG{Zi&Glx5;dBkJq}0Z zLnlnLQhL0E*@c>8t0_5Is-r(a*swT?uQpHUAy-9)oYLc z;0HhWgFpC#uYK)n^{rW$-KOD8L2d4Pc6NqGj@NIQoR{-zGxN3A zUduP>*=m_SDRpcXQgYlhhBGUsk#%#U^tM&mEN@a%rH1Tz*w4}n?;?g&@yZUYJGxV( z(8{#C2Y{RqMqQ)(cD$e#f?S3EKFaUudQi8D0yL3Xu)j}x0;i=m@<#}`YDB8!(wu-c zF6R~3QdX#bOydTr6zY9FcVPAR`oJ8Dst#L}H$d;e>bZliy%DAZG^YVD46%}9jul7K z;2rhL>}*7tH&1i|Y8y7FO3JY(2kJj7qXkE|l-+%h8xBiEdljmAG+uOT$cGh~%gJrqWCa-N8vA^G@ zvw{EXc+?9tKr4}}%~${YFK&JKL-F*}xxtDS2UrC7G%sXzI&<_;g^BHWs{3Niy=bC! zhaLGV^xzB6`{im0fY6;uHc%wdb;%!GV;bRAAdl5qW2B%U^z9ps*f$yuXQ>>ThakN*$}=WdF5ji4su-yNg@h^_%mwp)#nGu?5r0 zkR4Brrs%0M${hWuWBkt3)f4ToV|E{UC<{~vYg2FC}N1aaGyoALRqCBcl zU_~$EHRKv14*Y%Ep|%@&USBz(4rX$r_Nb*u&#pay_<3EXLmGfKpmwjy>ZwofO__Hl z+OVa(2cc7ZiNTP96lq$LRU~P@tH?42E7`Y!(Q2Aa_<7nu7J>%Uk#H$iiA9GKCNHsg zWBJ*$#~**<+GCF&UAVB;v6-MeqK-Y(Ydf-PzEU7z1chm;fe&X2y`eQ*Eh z@fUxwb-GVJ`6RBk z<6GbQR+}Gx{_~%2Q^0y?`B%U8jg}mK>LVXgOflYlY-}{=Pyi`2%RtZY8IWFVWiAhl zt{C70bEDJXzhNn86Ga>A1FxbqJXN#_Wm)4I%Ak!1!K-b$FJkXfq>6(hPBqL*>9T|6 zyy7P&mdwOJyHk*5vp$Nu$P(gJgcSt^i{6wZp~Sur&sgM`k{*#76~$9%yo1^Oyf!@0 zGHS+y`6u@?YjfaKz`bHZMg=?q_hEz8xF-cI7-OM?H4WC7h^z`xOJJvN14PV>r)S`C z$u6j1pS7%N0(DZvGQ}rb5p$go4zL|b%h9&_`z0gIkvFc^S5xZC@;uBWf;EuMEj`OE=n1;r!xrs-aL#FIkST1q=Zh@YLc>BGHNCI8#7m3z?T3{M|CkhJnn!2Z~wO)!)o$_gmM z3RF#Kpb=y{wMQ*>8-q6Prs#(Lu+nYl^ETShQVs$}x=q+?uTc3ZrQ>qK(xj}L{e$NdAzLp-B_eC|!0oox?~ zjt&kl-n#W>o3OS%wRM@F{p@G&-o1-Y1l3K-W^8uzkAM8*AOH1V|Md@j=tGw-U8-lJ z;%JvX_OXxs+OPfEAN|oEeeQFgYeI1Tt6%$O`~8z2`Ejp%WCk4y9Ou@rTwaMB{~D-H zK1KpgDo!)R;)-!bLG(F<@?~UTX4l1;$8=0i!64IX9N4Vif;RO{k;*2KpX5Gyc-knw z7&cDVR|%m+MTyyNgZPsz;)N-Zn)zhXWKPu`QwuN+uOmn$*3wV_8QoaMMKzFrl%Eba z&0~ti5v)*@oQ%N{?6dP15EBy6(5CKg4#3T4e&tiL$AG;^0r;@{KY>jmCl!)97)CMb z7@wuM^C#7h{@`$mSMxUXfiA`5d;>*!b0TH3Uqlls445GopXxFCWCzwtNvucjEQ>x2 z*H0}otP@67cfd*{Rd?Up>>V5)K&iUiDYWEsFz^!kIbA-v&V3@Z&kayTG{pQ1B8{CA zPP@Hs9l6=O+Lq8Wq0&cmHjD>4%qd;rGz};%0N@ z;NZ&9(R&XL+H~Oh-MddeeE7`6hwp8j?sj`|m?mZr`3B<#x4HL`?;`4dXpO|BP=VA} z2427U;G_Se?e78GVFXGpg_}*37(#$d-Tf4z28!k1qK)2qDZ)dq1FnF}EE_h_O$})< zgoyKyTEnG?GAQD>(@Oel9FW`hJ1a=`SsAH9wU;u#oC`J5*2M;GKq`lIEV&eS%Ys~t znIboFIUhI$h#<^H@<4Me0zx_|X(JMx^n*B^JGyZ3N8kU!AAR=ytru)PB|5+QMvcwh z-h~Sn-~H56KmHRR{^0X3w7%huH*T(d?pu8xY5nlw>Fu}fKmDF(?%uus>Z`9r|4O^l zHd}q=l~>T+Zl>>qZgbyfpMCbXe(Se>^EZF!c^5x6#e)0+2LvR2riy|g|8IyP+W1^a)j-n+i8&M{(3ksny zZyeB7a?P%seUCD74L+tVWy!aqWLG z4DPI!$V?C|bcv1TO)fxBV`_m%iVs=FBlr(8f?JIiobkLta&UMkFk;lFl}Q)CuG2Ke z>E~2-CQDVso7_{qgPd$hmCsW+4P$ligC69`e!2Q)^L(lf2Y{)}xZ5O|c}a#*8RyOK zn_v8rUw`dlKGEXBvb28p^8Ws{qoc=;j(+Uw)kp6C^5DUdaa8R3ztn4KzR4+X8=4(% z^tDhib-Ix^C)x1pZ@9nE0+fXIx4}i3cTQMT0#avaK6L1@;e_(-O+de@c9mxV?@>UFwlQ82ht zgNh-B>jYk*iy^eV%qS84yTT+VR}GCBg0w;6oFYj`pXYGDF!UY8Ej`}G6aD!i;B$@b zq=rWHJ1$nP)t`4f>NJ|JTrkN;_0o}V)f0g~EGKnwfs=B^o`!RJi+{|7g|q;7#lnOM zVXuaZg5tSn&Xn3m1#3q7@(ZIHa2>61xQ;MRH`&QuF%q zLV7yp+4K0YPZq7@#U@AN`6J*-8n#~WSZDz!0U+EdmrDfN{aHHefac>w$2yBPnjT&_ z?4p9R;>8xR2MQX0hfc`@o)%>S(agh!->R1DAf(k>yT*DdAEsrNu;8N@hLt@<8AV9RPp8-D5&daQYux0G};!(QIo`{r-_#*@GIdryDhgAeZC z#e;24bd}b|c3chRM(IICViB{EFw{Aq9^pKu5o*FT8Dk~q#3QQ>_RMA)$Rm|l!O zL|b-0^HZy1BAu)p57SJxoqb055SxBuJU+k5Gy zmtVd9`cHi5BOm?fM_bqUxzBy>-o1ORSnHU-^rbJgSuFbU`CBQUdg`f9ed<#m``E`? zw_0M+dUd^+_|^=#b-m9$_gs_tPyh5!?QJXh@Z{v>S6+ShQ}4cf`O;cnPckB)=MWH4 zi8)`%B5}DuO>j66^x2Pr!U7pii_s%>naJ&;O{Ge)2b@{tYu1|vNwWRrmtQ$KJtaw; z!c(8hV=nE7Ya~&@L3?*{)DCfI4A0ZrcMxnTD27E+xPq`&9mRDBS%@i7sWG+BRL6_h zJyYii+^#1|&rx^egWizVnO(PejM!mT<&U)Apgp;G&pu8@w zv>MlzMMD%GvTb1F6~?`-#BI?w-IbHtOji^Sg>rmo$q!oWe40%~CM#U6U=EOsul^6kvRard#VgkF>U#5U05n9M2q9(iK(|=6v+SIwFz`uXth3$X%4?UMpBK6zgHA#Le9f4pw5B&>XH<|;?YUueQb3}Xax zE*n;B(O!!<270kuttf*Rc=)WmxoD78Sxy^yNS}%?mogLtYpDMivZUH&-C|nkHnS1w zz}Av&h2#bxlf#%l<${5s@mZ6{lY{q1jmyUldb(LQ+i@cL^vo_hM-moKsF zb$wW+P!(rPymA4f@U~!dbB|Tpo*3s#XQ}}Y^NAn>QBkuysNjgcP(>D7SR)zVQ%;RBi9CaX^`u4rmAf_& zCp!wVHApBGU|W=0JrvidK%{Fl+eU1<6fuFv6Qo2$nF|ndHHqp;CsJsx?oN#;LqhdB zb*ZI2N=E*&DTNunoIIaeWulI-*3?r+DE}!j;%p9g5J=}PjDyvYNK5xoctux{cI9>m zBJ7pUCi8);=J!(z*+}NcUYj?kaaP5g?ga*qj*sp5{(&|aALjp~++cN>)8B(-`RHSh z{a-J<(54c{W~XLn`oG2h{KD`yQ#d_4J3c+VcY6A8yKSBHqoxQpSncmG`EjHE=)3-~ z<@MW#hhP8Q-~Gsc{Et8T>%YF&hK>Dk2YZh__V|ShmlGWl#mL{}a|k7E_KtjLXa_vg z$^v7vepHCqgfQF(#1?MsH*|LFF9}cXa9^p(QrtJ`!jEeu`B(5Yuu4IOjZuSRpE>qt zRi+J^l6!5`e|KM7sM9K68HyDrtulgvb)ZWBvt8kI5*SO$9G#{OS-l=7sfj{Bnb-yr zz3!9~OmKQpB#rrN*Qd0~Xik&xdgU|4x-t!c!;6=%UcHJQw^o?j_14clc<|t@JlcyP z+#oYBB|SVkeE#|8+lcPUl`E&Gr?+q4{=pA^@Z~Rm`QgKd_wV1obLY;BFTS|y0gP3# zHSx?d&wS(~AHf%RU?STN&xL68z8D)}k@+N#RC%Tk{Ncm?;HrMKlC9l8 z|NLLQ{>H6(=7!?E_l|s6d!;i@fT<5TU~c%R86J6Ip)oYK&J5dErffnLWmk|$H8_LZ z^U8#Fd--lO1(J4J{AhH#NCzQtHk3CTFJmWl8hhC)8*D7jl|zsr@#r-8#2z5brW)gw z#F{Z)C%$DH>*#tA-CiZoUYII5{|>#}mQi?|7KwyNomUvDcK;qg4z7D+OGO&fPq0!6 zEeE?x8%Iwx<5VZAf8BM};p-i`wcso*(2C>tekP_v>}^qRw>dj| z>+I~GfBxq${{HX(*uVKVZQ|1=8d(`Ls%G=1O?fk8B9KG`n9R^}*PH-$2Np0yiTXts z)g?k>@)kAMAg|ZysdJ5D1q)?N(apw4s@5E=p@R5c^m1Z(d?E;J!eR!Lp_QW;l|eU_ zWvp5^-m9*&HnahdN#Ih(Gau~cK_Qc6Qc+_KoFeLJH#xkH4UZ+pJZY*PWiU8jbJeQV z+CRMb_~VabrrTzs^K=s=$BeUesOV+U&u#Pjudw!u=UTMFt(R>l+l$YC{_|h?%2!ab zb%)olUq}1xQfmd-yiKov=4XE9eeZi;JrHAlRAl?z-bP<&VW(S5%{YU;_8Z^$#=U#@ zm`5X8ul&XvZ$5VI>Xj>(xBOO*PN8nhUf!k7kX^TD39{ltLF-x^$+)n|1+^9x#WAZp zDJBNTxMv4uv#R=!w0oxuPx(=rl=tvtuHdwX#aaLWAOJ~3K~%if&Bnsds5rkU4G@)e z(Tbac8Pv~J43wvxWF9BIN}m%wRaY5PtfVX;P_Z?KPaZKzFdB#j$Z@&EnxaiQR@A;t zu2p%uATVlkMobp&qrV#1X2X=1ag-KyXMmv9WdD!sz*es)oh*OR}kwF*3Dy>sBL zQ=%~v!T3mSOr%+lefRL@&E@QDKPS8RC=tFjyD_7s%)R``&?YhcKc9Q<2lwxP z;nuCEcDr6rzvJ77PPW_Dx!!y9sP()1%krRg;XNPiZCYY|`0yXEUEBWKfBS*o{_RT_ zF0@3um8Gt=!ONE~Ke+b9o3~!G;psqu>~FcBATS9q;RdJYbi;+fpu{dNJFkgM*yN^; z%+3NosX+v(jA~%g;*<0xb&YZphjFM2g?w7Xkca7g^GTkR!5stoZkN2SrE=4F|C}=5 zcGPDCpiCAe^iB?!CT2v&l=|{72Tz#@K(4{WS($v5R$|h*IkM2Vtg$$G4{A+29O_mBT<*!z1U zcK`T?e>y^zBUZ5v@2+gE9ac!PNij@uDFFgsAPEvA@#f3`n8CcKyJxzmyYD@@Rr!6H zr*02quOp~~#+`f5sZ*7e`DJEhR-HP%Z)#D#Z2OF30wX`+g5B~3n?7E; zbP3KFVQ3%ouO&DxuD28}=W(*WeaLqpPp|JIgC6@cW62fbX7r`hP{QQW3r$71BQ%@Z@uu}h4Rxq9@y)MnIt*>6aa{2tb?_PZG0^Z-{oVVmsvezGK zqd=U&eexYeg^+Xb@O5f_vo?>^2QVBQd=;^ylx(J&$Z(Vk%r3MNmoQLP&TPsY%u|6%`;a55hI9l;Gstn7O<*%OgAc!W(1B>Qr@Wdsj?2SMYbL= zNgycdLXyDW463`CM{2-EWIKE4vKxZ2f62kz zX!Jkcd+%E}Z>|?p5`{IrZ)K*9CzCBt;@#ZZ+T*ruPl=dSuauzVr+<3?Z~o@7M;|Sg zbrLIFQoiaou~@fa#WuG#w{ERFd!pkGDtz;^7Voy;P>5nm`5M`ay9qx4-HR&Y66Z`Q z>siCVLyp^g_mngs5K~Z73wDCdN@OZI%JCth!%X7%1s85Ltu6v!k^?+~vqGvPG136L z9zFJMb5^LykS7m?2i>;RWqV(XdYTDm54|>no=esE8;*GE(yrWfQa?$;+xv6%IlCLqL94r9LB_?(3yZEB z=9bm3n>GR_b-L>t*y4&_X^Ox(cCKi8r`eF?I64kELMS9^^V3KYR*3ho8b|;d!jfTL zssR*3$Yv!iCa5?Z(Ch%ll*;b)HoV1{{U-(1QRq6brJm}H+ctUkYb!AQbRbcYMr%Ti z3S!ufs~SWZZ1K*J)F1?at)ql4a3icr{kbC;7uE62TcZeb>3Y2tt@`i7qH=6)RblPj zdwPDpxZeNm%$Zqt)fw;d56fsvWKiPWu3O*QDlyL5czpiw;qAZkcTW8JuP^M`vt4Xu zKns#jk=N~n=H^P!v2yY9<>Kd;mL$p%Gh{`TUZ1-H^)26WKV(SHTpx!ff-?hF>xC{k zBf^~{{XG1ckBF#HQGuT(&n*tHO)Yf0P}oIoA9AdfggAIXp{2_2y>vr2i_e>2ra!#5 zff)8(Ed9E7m%0yPa3Y)~Yu4y0J{thn6OJA7ft(>R)T#QeMlY?=zD{w*FKN1p@x+$) z?^{?{z;n90B3&KEHxMr$DAGtfBOuc_(66ySGW3HV{GhmAWQo<^i-kH-207}*#l_+s zi$ld-#R02ZjB=&~tmR&u+1S{?K9S{1I(uls|Xw+{-V&eERfh zEO>4qhNF}}a`foufB9Dr9z68TufIT{Lzh*CsbI)~hAKh|h1CwD)v>HZB|abw9EEC> zZzbffU4X(vCnNxzzR}mE@9b=sfl;?B|E!dy*>HxE5=fz5944)z*XeO^hsTz1-tvA& zJ=^!loW?Q)!GL)f+G*2gzsJ&0a%8}M``ul%4%=w-#L*GJeehQ7SaLX9NsB}+@w0E-2LKYaL4E?=I>_}IVnzQ(Hh>Ao)!#Aes+?YhgiZf%@8b?hJh z!-qfiv0}O=ZlRFbBhiVC`U>}voITt9i+{2F{PSzie*Eg+{?)y4iELC3I~Ac+%Cn5xQV!~GeY9PiU0IAe?WdQ6HJ=B7VizG(6I%LTdfD>R6 zN~e;X5onWhhM)zoyDaL&jQaqlxYenJ%c-rc@!Y~vaei>RSgV|VH{lHFb-u*ohzR?J+KO3KedLpC&u_u7^Mr5F}cVbct357~kf z1rY3_A$atE{Ez?fzxr4I>dKWXAXxs3$A_Qz*v7`@OE10Hfp;k-2q1Y;2U;VjZs)77 z1OS&9tfx8G%+}QJ8td>W8XX=YN$X@&Vv?Gd+S(!x1*U}m;(rUv=thLbP+{+2de8%! zDG(`HfBj<7Q;p8%ZR`ab7EL^|28$bT1{^XO-isQXkRiJRQld%{3_`+03ON`*oAbkg zkua1r3uO0MhyPPBhJWfk-fk_>rBfrJ4bkAS;;cwm3&u=B8GA(*%?<=eV|tvwU@2&2 zx#nMJKU`AWYtd=?$mTq6MywXJ*WJ-o2WAdRFcsc(&9Xg+g;3&^H31mVsLBa9c z7ZVlpHokFVF8Vw1M{kSm*&`KYy^rxbj~H~Wl=5g^t$lND?gyslzWV$J z9=P}92Tn*>Ob9(NO4d+Jl`}%SCM@4`ULHUT{WD0rqo$KH+}Xox??4%>l0^f|)-fFH zaynTCavVaA85OMk8vF`ZNEv;ni-&3LN(uBRJ@nm9Ga0fbT&{DlG5Z*PxvRA=k=_@<$mUY6zn25^4l z+g!jm_nu_NDkr!@5faunGq1rA%Mmk*SvW{sDuF2$Nns^(9}Bp!fwRS;Dx|D&W6~x* z+QUk&K9bKE792l*{OHl6Kw{~myx##zQ2*&iAG><>@;h&z#Ueh~P1DfXh(r7lJRzJh zJXLBIOEMVrp4QFkFnEkx+2Qoi%U?)cj|8Tk{Ndi6w~^x zmJ{zFkuX6oX+O}%9-H6Saz2W8h$3@~^tHMps`_Kg%PaRic%m4Z(adaVycmr#<}M1k zckkZviFc4mC~Z0Bx^Bcjo_k*Xzz5zH-FfHTYnLt^J#=VgR8r>Q$&(rV2XQx?RKdm3V3hfvo&BhsNGgnEf?(gj;d!_y$r~=dAJmq9S5A zV~Zne+w~JwmCsT+T;Rlarz$6lH-+&j#SrEqgWXnAKFg)GwY4O``B4{PojG#`HKPl= zI)&QM99k@&_dod1h4b(Bc<>A(tthNP3e!ns7@JifkIyLMp>@6cz35@csEpKHn=}yBuKv{l8#EUn?q4( zw`DbKxUC0d3{uuCA9{&<<6P4SztN4j%TyS~;l_cWXoylky32;f>P<7Sw#~eBFc?A@ zbYDTUaikno)cX~x8$>9=O`-tz8Q-f63P3-u8c-6%D!~Qo7^GA9M%dLNJ;*Z29yw_o zh~>R5$(~E9LdNu--r_D9h6vHe=szp0-w_E_pu>;Wp=AaAh!m14uN;e>V!+gM{;w9m zi)9LeUw!RcKm5_F*yOz^`GOzI%gYB39$Z{p#FL8{IA6PVZFO}ONv+!seXQvyKg-x0 zBk}e1wPQyQ@7ur6M-X8=of>dJ2Yiv3@;QM@8%JyI0W&b76A=>=BhZ*_Kd{KON7o_S{e;K40_ z0%H`f8Aqg!0LW!fUx~8!-~5}=@Be-=Z{M7sfBn>{@7;Uv&9yb#+$?1af4=k5_doOL zXKS9*$K0JVNke690NqC%M$W__93rb+=(@VbjW8diP4>t++hb7M z8OPB%1kcf74doELNcdNjnK+gi`e`UE3&wY2UOkpmh)IZ2s*{trubz+4wN(LXoJguI z3>QfAuB$LD%%UN1a>m%<4#m1%obfffnkEE}stVL@?~M2DU(QE{QtmjUIW*R>&&FHm z)6qgBGEPO{OX7V})*{_yjg1Xj>VXeZsiMaNNc;;XbY95bFlnlwkbT88FFj^6d&`FD#3fNOz3P+aActANeYqNzjM zk!b`2&MQnpKalQjyQ6>V^I2@0Vmr7~Mj<<6f!u)M@F@+72ERf3JS{$J!ppp3;f9?X zgO(AH>}d8sY6g4;vu1;;kBZ1 z73R-si|k#eVLVj_wU_4kC04Iqg}+}lxFV7Tn0(XMuIPe5`#B4>Y-hA^-Ol1{wQxtyRzJ29OIa9tAsQrswkI%f+S?RiF9DPaQb0JoeeLiUWVvUX?wpT=OX>I*_nFl{h|AmjNpp)h zlYxq!;~Sv|^`RkACxLUb!M}!3079NK=10uOdx5JoPeOep?p0_)Ns&cH$9ldfddN|E zkQBNGouJg)R*dO%jrQsvvO}2HFluygCUhCyk>)VO(lHuQrhfWco7;Ql78d5`kzqy} zS#?;GY~e4F{6IhqY>b=DD-i6Cn*;t5u_+#?%|ix=^QZb-b&)+*azGH#q7@KkHuxk} zj#IPO<;#T&7m70WVJG|8+3LCHo}2iSMQu=yyT*}}w#H$ok1+0k;Grv*FPSUi6hai@ z)93;>X7M4np)Mb#a&IYg6=s2bSC<54pio(gDLg8>W_24Z3~7{n!w0Acneu*mkXk9C zFx3FL!~x$KoHQC(3mgyXgnIBLI?Ezt5<@lT4d2@0-p4GIAld={hORY@M6;UBS@RL) zF_H$M7Fq&EWDU4E7y!Y+6kvojgB!b!bafh80*g?`4>;yjX0VK1fgp`{;Z}{|0`LJA zWF1bcuQMY5q;DO$@n*}EinJpdVXOv~b|r?x^pk`9yw91efOQK_DDj4{z@(|r0EQq2 zE&7+P;|7X6VU%lq=~&%O$ycHJ8{ernF^m;<*+t|xVH{sH>b2KiEjWe|iOx!Kw}%fO zhRcB}V#K}eFF%1#E`oK{3NJ!Bx2RIu&Jt~%8XAv`U&(r%upc@ znGB%cDMGjTg{mnyloRS0w|SFVIfCw#kUYwpixD-2+Pjg};DAfN_E1J0kyVRMlJWxT zG8$T%UcfBVhlSznd@U7rv4GZ0d|3eeE3(1c%>kEOJZ||?)WpJnlS@q^8roElv%bE5 z@ZdqLpGpEyCT|2E@>9e6B5Zx{z4uDUI~g8!V59H6^G^9(US7U_{d!wDv>UZ$UMG(O zD~AppIeO_L_Oo<7gC!fp1gjTtRSB1X5tW86lVp!og60kez_C}v#HDaTEntSmk=J#_ z^ca)~_*7V$s2hfdBp{wq5M|L9tUMr0VCx461L{1Nx8Ts!Ivkc}3gq!xc>E|CumO{z zaC>7sLC5lJ45Wqf%D`Sr8uJ3$u@p*l(El|kNd)U&`-~?;4SzmTpnC-MKv>kvF?UBy zIB7XrLEW`QHVVpu?jgXXh%Cq#)%+$T)-O&-s$xpRN9949D6wRcjn+&3#8QgVNG_#_ zUgRChb@}xxzNlbjjcE``${VinpAeht4=@}E$26eNpr~N*f$WX@I(LQQ3bru6aP9W> zk^85a*podi3+8um$?8(itxRnp8bC5?b93{=iHDy0*b}e3`bM!Ym<}I^BuO>U$Ap40 z`_~2R>2x)QffPSAin9jE@47tv<}EcQrZpPt!g*K9c6Q3J0^pdqeW`JQ1UmZL4=E;-i z=jJvpU+!b~pH>e&2XN)W#mff|9rT6YdC8J9sJCc!uP&10Xe!b(xga^%LsqkfX$Ckt;yN{!Xp(3T4VPi$?XB^Fm6bisW{@~| zvki6zodhJ1^2Hdms6Z|)#=vwZj*Np#0E%}Zn0rCQ&EuGzEqat*yuL)}@L}w&VR5Ky zB7)uQJHDere3ql!e(~Z(Oa?)4(9=S587q{Zci(+?p(Gb>}KsAF~qo;bTYqzz;^ z0_cfKWg0TO9~KF%I}fvfy7E?W z2D)Kp9mzT>TU%305xN)pz(OE}CoO2o-!@ZCNH;<;<+b|YddNdkr^sUWToKtw@|=u- zAsCvrkN}(`o{|ixdI`+>qLrA1HDknRo`#Gi01}h`;RfpcJ{WKoXtrBU^ms8)Fb@Ty zAk*d=Y{Pxh4{3Iting5z9eO7c4}OwPZ^gS`eK;TIO+JTmHuI#C1di&qu}fF4U1KB0 zj^j%Xhn32pFh+fw8xSc9{NzVJ@+-gcOGT>>9X$B`mw&`|EoN|HMC95gFc4L04oejB zMmgBfFq~$?Qx>XRwgIWtO*7g>RRDZN4jAR zJNwG1Q!Dr1U*hxdG5;Cp)CLi`07GOocWwx8F4{4 zSCkZ9t>4^SSX^q?Ql+GXd`b{jcp%b+(?W2WeixB0=4Hn5dOo<`QlLb!STKQ^nnF&g zq1=UDz-7X3UA=m>IO&C&_MW#m(owav;irYiEnmt=u{hl;SFTv7uz8NU-2Tin&wyHt zvn^I-i_fa0>lNlUq~*hb0|!@D4qv|fURB(QLcA3oloGSbW|n#yaAyoLh_&~;1?5}y znxU2h!Ph;R?Dlzr+Y3K|SVy77V{y0@F_TcU;M*inC-Jm7@J5p%>pe9`wrcPN-+Gd**LNhxRe_A$#S{&n~&RGgUsI>KfjMy_U;(S&Ro;>;ml3hV- zjWgMHN8{z<^mcMYtdvg3GiKgJg9D@ov{%P~r>bcP*zF3mF07jID*gxS!47iSiFG~C zhZmBR&Z01RsG1N>?_U=azzo5JvdU?aJ?!o`Z?H!(zp==utMDHwi4asJz;KU@-r<0xW&W#(henb9A;2$addKbbG_rzm= z1wvs~`LgXXdujX=eScwlyL4e?|Nabd(9O;h#nnFU*)uwOcK;W@xN+gan(E8@_PzDM z18XZQvbk9zozYBv?lwb~JRsadN#*rg3;8NXfe)VT+r?36kNW0uh@?Q}WDzF8F?7xu=f60BKVHINc8 zu4IRYM!T%i^(M!W4l)Tq!2miz#lGON7}}tPQG{9QKo^t2r8V&L-%4oqh}yGjyc$I< z8!~niR$bY%&_~?o&S*fRup{+nb|Km8JoS5Vd#a3!XvSkqq;X~Ftu;jP&>+vE$uL!D z7LlI@x9DHZJKT|%NagmvhnzE!6${1E4YsM)562&^_eqm$b4Qj`7vek(1ma)EuW-$e z4FD(o!7o3T=;T9%=mHrH5)k`%LGKN-f2@nQ2Z3OFy~&5P04+xUS}l8UuLO8|57jK_ zuqra~(MQ?}nJ&-?qmYG*$12}H|BF9AyJxnH@MmXcFI;$UX12Pbz=EUGd5?#AY8)n5 z+9uhg1aS2w@$jT>8AuP!dWd(S=ZJ@CN%!osYF z4}13R?cJiaJlS;=TK4VVf9vM@ZM-N@jKbl+>P)Wtc67e$h)>OG7zuw4jRwxA0Kv5( zUaY9Ju8;MieyGr=BVG+Y=M&`y_zW9#r1oR}J%_mpm}S@eT-U>;_0TexbF=}#U`WBR zDT$#pQGb_;u6~%r!-a!KVf+Lg>@e{hExHysXB-&_b8TRY@jqWBOTGj#tij7|ZSCCN zo*X!^56h!)r29dyEr!2XOSRVz_)B>CBw+dAF!yKu~M4QpxpU`MeYHXGiVq4u^2j4w zzMIGC)2C-*uMgadUbi`InLfaT{re9rF6~=iy>OjWF$w+x4c z#tF&*+|+a2#|<@wNA4xSsX$D9DMB5&I;d0wMghM(sRVm~(*kV*j0sT@Jq1M$#9%DQ z%KyZmI-qE<%qToyDv5Q^pr~|^R~}6{D5YR!gbY-XC#sF721&fhcDLIMKxt~?VwB`P zbrH}6gEh|cqMj(0gz*#`hz6tOW`Tf^^&HC#WQg0a5Y<4RF&&y+v;1O|7`hPWjD2R= zp*S1iFskDW>c0)p~9@!=y!4r4ZGXFPuOwb%D8 zEmd*2DOAo5vX6mPtq-b@&VhXzEOL4Wq-r=q{46vT7vA5zB?CYtH7Thrp@d+?hZlq2 zErf0Px~fUI-Xoiv51v22zP^q~WgJf~*0%&+y}Iznf82lROE*_nzp;P+Hy(WO$H$Hp zeV_RHPdMqc)OYGtIa#VLrfq(1{@|e_d-m=D$dS#7c$YRk*wXnUJb~~Kv@&@U2f|LD z@rsFwREqFOLS+TE$#*5p1n@J)mjH~3wB2w_jvyl{Cgf6mR}$mxCYAVKLQ52A>|{`i zA-TqOm0FBTAkPB5(PW~ROp6Lguf`EI$H=Lkp~VjZgEKxvxN-C5+}s>8%%eM=8Uj^N z1gEFNB0H2fPu{}mBG}EWa(Y~RL6Z6A&6|%u{`ld;ho5}%$*f;IB&d*j9p?4g15HQ+hE<0xP&A#w?(FJHc#693{>_bm)k57zR0dAps>C1HCSY(8cNG6|UGIGle*YjdD3E1Yr8e z4Z8S5ILws*##FuDvI$ZnDp3Fc^9nR9j7ivQ33`Qw$0_uK7M02+5`VbHz8HNLe$!Qj zaQ&3B6_yYDf(oxq8HvciD4v(-Y!tVkob`lyoAN9fAN7D(R$Z_A(o&->?!_Wi0!fo) z6pEi8`c3MG`uv|h{GQN+jUr)XteE#o!&3+*q$`jOdJ`Ire&I_cdeU>HM-od4xNbA( z(rCI*5k-xt7IDnL>cQwVSnqK=_(-FbrjY&h zrV|~RE7BYX$`nITobJ-n(lbv#h4f&tL~p$D)*ElWb>DsWR<{+aYj|<@@ftIvVZVx&>p}$2bo`!|45%zm_v^zx{S8j7{xNE+;?v$xlA?&_m-f9zr!#h)(vtwMFKi%4dO4aiy0oT`Jxe4;M|Qk5ayP z^x|ua(|!H**D z2>#fB3&WT@t{N}n8mAaUh8d<>!V}Cg0EV><_95Hp>;BTWQLnWq@jE!8f;zYg@rzAK zQ$UfBx`f3*vhl9wte|L9Mf`>5@&YW87!KugJ#oy#k&(|qNTcloFMX8Q!L+rs7;Cnn z{;VhH1}ik|B3TMY;HQ1RDhicN$0fH*;Y_1ZHvzp4Cw8<~-(mYG%a znj)SY(Bq9bb87jnPC!mj4?1N0Us1fTS^M6noRC2d+4+LqaE=E4xoJuESV+j{P8?gB zAWHPXdnRu?}!b?=OPgbC;GWi*ap{&-+ zbpGUD(cz-HkDPw^@Zm#!eTu2Zc3=FnFD)-Gmv|T6U1Tu-w0dK&q$5w!f)mN?qBm4& zgcTrTVk2!-R&3y;Sh1a*ZhEsSaEyqru^JM#%$<&X2)|=?8%CZ`tPc^D$mNl1L@SUSaua}cIZx-u3ZeH}(E_}uM8Za$rsl*gUUASYI9ursyM-Z008 zMS=u6Zx&)0jKxn4DT8Pf^#rYAeB#LT5XXRY0N=mY))|+?sF5vEUK8#yX@*IA4hI1y zRjvw&zDv+DOw!k4%pyB?aVZU)PSy1~$6Ihk+(IbtAae&vTU`&$C-l7@lG}6ZMX#*(uU1@>7o&7T zK8DZchYnCwEvucmE-zM@f#yJlXv}40EvVDn#KqZgM1H%w%-#*V8~!&a($YNrMav?T>0ON3?)ye?$ib(@#3%6QU6 zyep_Yz9Zq4Nl5o%{wnek9F%wh^Sg|m1b+uMHxHgaf4vyB>(^)Bcwe*Ac`xewfb|EsUQ`t9$| zed^gyD3z4#sia~k3Md!{j2e6)hbsRuOAB?RD8-cu_j$CzHBZULiO-h%ycyjY$AktH zvKiXL>%UG!$$K*qd3S&gEkBFcf&hmECy{=(X_*oZ3pa>Rs;VWo`!J&hpQQ{ zqX4)Qir9?01O6x=okq;k7}b1QVB0mu8C8m|uH7sy1h4sn90&8E+8C+&a}b9yk?z`{4>({UE-@hMCh{IJQ z(#>%s&Ue1BKhJ>Mg%yX7-gWKT)!yG*tuR)BUZY?%MB_1suLV$m9%dDQ8Ne%^?Fj1x z8TpqXlyp(_zIvSAN`%9>s66(&0IjV>Ao0>3jt2|Kd6fwO$kscGu zFUS+KP(yG*IY-mYj1-T+(<+Ik!UVHihJIh2sOsukssvD9Gmqv|7}L^VJ!nrFDBzq= zZTLKs1#4Mj{a)c4tj;PZoSgDhS{#5?v#X@p|LYm4cxwahogHKdEo|P%EFek3-64FH z0bs#KFIIX#zfi4DJK5~DO)Y883b%dX2KL-3TwPmR^Zf1Pi+}c?zw(v8C}!p4$q#lR zO4g;KVPOXu8^PKoXNAU8gKLm0VhwMe6Mm({~<>lA!zWa^&`Hfq*Ch>5^*mv_Oh7a4`B0zyv`s-$S z%b8bRd8L#uvBk-gC(A8A_~9$(&b>XmXV!)5us2H4%m_vzBCB$Cc_*cjaJ)83Gq0g6 zI*81CGDjK!_XFyb+(vb%K5Fk%MlJzH-U3aJ>;;JvpRXTbs&7!_&}9h__I=EmI58IK z0skRs)9M)l#-a%l<4o(453FBdYEU?oT>%QJfNKUdy-4c%>gro>ow3*lw2NuoF7nK6 z-U9N?>`$@KezN}a2#t0nwB>&LtAzFS7XximV-wq~cv0|yQif4{i6SjLNmfD-uLj_qzT zGzXG3{4Q$93iAlq$kM+33yX_H4s8xa1fFZ6ZQy#9Od3om^>j(ec12(hxio=gTu};B zy^mAigT4%vt7;cFB^gn3dxS{@+!WaM3vm0X8>};LX%9HBfV~W~L5n5S*8gCngk#q} z#w(I+0}^q2GDCxcz&Qt~9VXg&eTpVHZzXf4tMX%pB^1P|co?fewP8;?KVvc8k5B=1 zaKP@hYR~K&V$k|;sAFZ0qq3(ph!ls`)vxs_n=UewI?#tC8`|wO8twvb)`S)^{EO~b zt&ei#rz(dM&FWi#wT>Zr#2L=pd>x@h*YF7gp)i@iPJt7m6R{o+;zuY(d^93cfR|*;+-#DzWn1KS1Wet(7_L! zs1sLpo@NwI-@90szlY65E3nZ>Kah8BNuBfAcbu=3gB&-r-PV;Woxgdv1-ps_Lxv?< zhCW~z;j_FjsOfqqE|+n|ML&P_>ZM|jmY3H(AW(ng(HbjUI*w2SYg4;;Swa`7=H8=( zEMKm^{r1~m{_>YU``OR_;xGQb*~P05aF;NQ=}8I7s77W|6ptzlh?2H<(eXYzTie6H$i3Iv z`o?|t-(O@=r1Ri|4;F_sitU+Dk2HZ`8|-!-0^&*BaulcAuBe*$s;G>5Ti5%{GtU&) z3qJ~ktgNg&{`lk1KmWXiKPv?^HgFaP+r)Re$XrX(mS1)qt&c1}mzS5He){QB__fzw zD-O5>ySIIN=dmvbL#$s)mUg@5aN!Iua?p5kNrJtF8T*$HY}{JsnUIl!G|0#SM)-LL z>z9s(6V&>ZUM!@BxTWJ`=@!jJVx%60CEj9)n_xx5n$oWy8k;}`ji3z$s2c>O{71gW}2lO<+t3)y`C5z&cg#Tv!99T2y6qWJr^tdM6{1Cqy{QJ3dog%cXFqx>3 zv-QS*N9r2w&*#{YxGUgXip>odghb%q^;3C+7#nm zti3KILy{Tydutu-v>P*KX0bm@E#;NhWgY$SRx*ub7xGkn40X#``|{<>x8qIS>Uq=` zzV>zXL|id8k3MpGadBx<-_q3uY>{mDKKlR0yi#z0VpgMn#C|tj^tSe(p09%#Y=r40 zi`#>s@n=Y)k~uIR00BBs4WjRu(XP?Vzru_i|0}$>cHqG65&+eA8+01ed!S9dwqi+( z!73fc(o*RI5KDoxa0u9M1 z3C*WxRTASjxjMKk{5d?X_;AxwnaZ|}0jM}ISqy_KcxDhoeNndAL8CO(1B#M>cU^{I zGNQ@T7u@EL`qG!a``z#U`mg``v17;n z=#Tzr>{~IH=1R!hc~%)`pnW_3a1jm*Cym3JqmS&A`gU5y=@!(L8h-f09~J^0d+afI z{cT@|g&i(c3A4=Br0Y%R4JNdWYJD_r`O?MnlbF1(+$CI8F${8}MCiIL)~*YfN^1Jh z9sehxtI|TZPw5rT^gb~x^wwckNuH7=_4f4EL3;x+M+9AoO!E{aOwzmCfL8RDisd~H zV0$;~iSQTRNY`+Ujj~1IeWGm2PlF_FKgab1H^t_X9E8K`U{!jpU|CW_CWOoio z&uhpO7`vB=@g|7$duL}xOG}Go1YWfC;K74q zSh?D}WAptMADOMnoTJ#<0jOfKsDh$PBqhXLCf*gpxR~H~8F$>cA4BmnBG|cheIXWA z2|!|G9gK$rpeMe1x%Id?r`!Hb;`5fT@SJrE3oKax2^JaVwOHZ8CAeOML9mIVJDz|n zmx|>rcJ}Z7-M?E{TI}66SIu-Uyzq^m``J$y=GU6tw1jXpbT=weYQGXQu2;32#Mmw* zX6y9|Al`$QLm^8w=mb4TwTiZJa4y+(=TqJ@W6N@i=?z!hm z0D9M5cZo=bdZT#LUuKxGI2jM@;x$~`&9v^0Z%vhc7logE^2t(dsqNy$i)9e@;AT6|$tp>t_zsVH)Z zrwcLfyz@?}@IxQ^koi+j>QhC9_=KzaCharvI4{SnA(OsYte~mIH8fvbSXf$L_osv< z#`Z#>CS+X-{?jd|0&|WQ=prlC!AqhDK$_6y8LjX9Hp%^;0!9D~Ta_3yMlC#>5Rl;r z3+$f&U1A*{Io%LfR6-=%epg)@3zh}??^0{C5oqzt%&%OKZ|NKw> z$@S~kv6bBr?HQ*ZtFY=(*T`5EkT zZoWt)X8!if%+^#veSZT|d83f8&)`z_;^F|NYwRi10h<&nclGK>a!Ixy&diSg4tZt| zih4%9^%?&vyNOGtb-H7GF*`Cldl4_zp56;r!tbi@C4{tOF^-)tZQYe^?sEB}u<}#{17!5OR$Phq)=36h- zvzSHe=T=bP%NH2xwK&^<0 zg~aHUtJi1u)YonKnyNXh4K7})Kze?- z6_C?nxOK+j4t`;AX?^X6!CpD)iWK}^BpifcD|VuVhLKs~y4I#baiQoEsQo>mFhF49Z)$|Pc@|e z3XPDX&UGXmTP-R?&DJ&+y1b-0rpwFf10%y}vfXjtw`Qcpb5m**4kkBC)37QfjB~Ken{^ zsbBlG#pUHc`Q6|B;upUt?|0;Q+O;6d$nTr~9GV5(ndZDZ1eM?-Wu$ zaN?m~{M^qfX6b?$@m^Xdk#Y6Cic(Y#+d}c=_NLt0$R~(YVA6>nL9VQNhpxv4Ans6m zOzs*d{nF`L6YOq^Jbdcp2M(@0|NPgyf=;J1#(hk5A!aRR@y={>hP;`n^Dg(#KmYs# z4?J-E`0)>a_`?@2Tqu6G*x9$=e)p}n&YnJfa%X$T0`vYEt-8fXwp5N+`KEAzw2q<; zN0i2jJb~RqQc+kSq$i0)M{^D|aXZOD@!_OrcKeuKZIBidcoa#trBpgtZi;?9lPfcRt4uXIRGm9ixD37^eBABn!eJlG6pz$^vKa8N3a3P zr7Kt7efNA0HAeoh6Z%y=cj?al{rh{*R3~%6>OvTz+jP2xC%f|4mM{XZ{YXeJE$zEp zpM&TrBaHFUgiuZz5USEg%DEd9DowI7FeG(wYlG>NxKg*i83*QBEg)`gvK8903n8RI zB&cEq+SD|(G4=BfB5-KlrJ--TOS;n3ZUwt=iY2KVSgpd2=dD?n?#dt!rHpziM7u=! zC}iGgjVA=anJ}f^zziZ8gMN9b5Ut<9N34Jt&|FlFK^&QiyO2`lUCMAfSj<#yaEKl{ zS2t;N!C9j`)#KlRt9~6TxA4oT*(m_YL%W!cYvspbUE(uajy9c}(TUty8gn72UXvf$ zlwvrjY3npjn@}0ZMZeDj1N6YR)xLW)+Fo8hd-m*0FTEsx&5;?WUEl9Kw&Qi4|Aphi zh4UYM;-`+?eOGmhkyKZWM7(Dm6oTR!1XACPx~=OsbUY66Bxn7*Dkt%fYQ%OK{sd9G zq}jsb!u5_L}Nmo>MO5Zy>@*z_Qgfb_>R7heaV&jgcH6X{SLqTz9H}V z^XH#??zwyJx#z@*6Xo;VxpP~-gYHY;`~Ka>j_zMtstdoQfuaSp8@6C@DE=yuTF%15 zg*e^ziMCt{@wCnW03ZNKL_t&m_gF1KLyI+~)nHhdP69-EkAj-4HVuM0w=wa7hP5q4 zG48%=IFfo}T~y;#tm$Dp;&^F?#K9Ixwpg^=v|Xd+Yu8to_AM8Og!c_#eex(4*5c8c z66rqu^wULH-}~P8ioY!07elLYymYAe-FyW?>v}WN#U-36NO<AO^r& zyl6h2_6M}yWuU1>F<`)Z^vJOy}(+QaPtn5Sh8+4N# zV}$8`J(&iZ78XI@L|K7iC=w_E%@%u(_k@@mKH+od0{-|xQKB!UDkRZe-cWw4MnG?DY`p#6d)H=X zFI~DM|LsS{zUgNEt4FacXV1QU@3CY4UK3|m2~XW5>Co966^(*5#e{U5m#=h_Nt1YyJk^X~rJdZh=5x|~&Mhu1E-n1Vul>sZ{(t@9WMwpSx{6 zZX56B0U}GW?|=XMANj~fiYb5WvB!!nzH#G5xms-a>#x00Mj?XdCg4Jp94K*aZ+EbT z!08BcVN{3l%HWF|4*6k#VSBoM;L+F~4tp3b^o&=iM5VEbcoSV>FPrhk`P+P=p$Bc$ zEGVl)iO^aHr$q%rAC2P+m>w@k=n56htwVcDu*C~y=V6g*vF^s|YUyI}UWjz_xuEpB z5FC8Z!w)}v`t<2DXU>#3^_gd$IdbF(GRy_Hxzp2jY%IDf*v-syacTG7d++7Tm&ec$%HvbKt<;juc<4demf|nBYlV1?bXTL;+1e3SQ6Hy)luD%f z*`NJPal`-l&;MV5a~ww@Aj9*)ChS?*@eY@ z8yh#B#Z}N9Op0(PocbLzxI@&ZDHTF1ModP*qfid3{3lbhb%Xc9TQ_oiX#y&=7_wU< zdo+b6SUH*wk1&*Kho2VI)jGRCATDY2-Lg>$I}QSbECR4YBd8l9u}nFtjjn6sRD@I# z>nxUI@PW$YW!?i7jv^S8ql9-SQumdjuJip+s>kT3>#aQsPiZMRRLB-JFGB>%#_3@@ z;M*uwx;lu#Ele;m~kDSzjR>*SdgdaQ+akkSN9B)`0r8#U8VP8

7ykOA z4DcuZrXV-!We{$vAw)lbw7zauWg+Xz0=vD5By+c`Q+`j^b;IPYpFyOX!EU56dlv86yUZJn<^otb`*O&%F5z`hjg_QfxL@xJ@+ zE8h3zmtX$lKmKD>{=-*Zd+gCiify&9Pu>$ImZ#Zbz(yZ?cm+xBjcB4!4V6fVZ3~2O zwt`ov0jOJq=8JeyAVf4q9>O>AIp}cNXJ{nGe9<)KL{N{ntuR0$geE8fv7vxz0g@gY zx*~uIsF5-ji64&cmMyu;ifk{x#*_Z?m23N#SC*ERkYp~OSiv-k4NtIK86v||Lm&V6 z$BSF~!4G~=223CN(1+l9cP(SyZjPMRO&@&y`t|kIYsZg2go#zfGY-y`P)wCTm&KZJ zD1ITJ7MdBAp9PafORC9bzom}BpMB=zCB*)VzkFWf`5#v3-RV8ZMmPI-II8#Lb4xi! zkP-)qji+Cw09w4ibW8+*k1abQJmgKZ&;|mKM4xEJ#(v6&!iYr4 z!p7nRt_W)h>F>_g4tA8yqa}@t(zu=mlWc9u#<6l>yCD->^l7z{uy(Qy_7-Jw6x z_aJTHQY-pUlB7^06Y5P^TcSB!D&_J_k0=a`Vy|$6hKpz4*aZxw2hI&7I*qZK4Y^7P zFs#lXfR^a$sM)My+~kE}+LD-Bs?yp|-O1Xtso1-nQ(r?@&;-H-J0V>cFmuPRp z7{IA?RMwvH`6IB{ECL-KT=zAw2v2!Y!zp0RQP#}j!sO<9DYde)f*ZQ&WfcE^4zsq! z|H@(h!m<({dUCJx6;u(?I9qzWYb-eEWcZlc`kfoAD$)6rq)s0`oeTqnz4G#12qweB zyKuVccXiL~^1*}Snv?E->2p7S{=$VH|M+Zo$1Jafb?&))j^bI7Ty)xoyyMsy$b4S> zop;{(>Q}$|`Okm;@BO{MS56i;T%z9h-g|FjW78KoJ84)ITwhsfv;%_Sv-=f)3cYhg z!Lb1qV1-e|QyJCh_MkD*mvDmhPORgaQmB}eabl{atB8u}D6bl-;HCgWI1{#Rgk}Dp zT99y3Gzb{-x4_HBI3w8&Rq$Q_As$RyWQ-i@J@#!KP%gn1K}&0^>*bCE2M#Ry0l$lt zL~yqfXb}g0kp=cpTwY#2bm&m=yI=qM*Gu{G^G@fw)4bV!$Cp8kebIZNb>s;b@rK`{^Zqb*S`0?mvi)x4!Hf) zoJlsa;w@Y!%{I}fB%@xGnVTytnl1MVQH69P(kE3gj#5Y@<^n`hLS`-S0N~1pUK_S3 zO4PpBjnl_t*8>X&7O<(+_OzOVQjk?sGg2lAeP?lV1K)l+>Ma)hkw+dmdGh2x_y_;s zvBw^J?z!hac;dm8mE|6o?7Cqwr-W`fTL=tn)5Z~DfH!%djGtV}t}l+Px-m>8zxU}9 zB|9rnfLMgwCUH;(0Dw#eD&J$ffXc~F|)fj-g2MgDtIY) zVyrXDXXGAw=%G^Pg%@6U{q@(6963_1Za2Fz?@T|ui<2cRK7H!sGoN^}wBAP~)FPcy z1LK@8FwK3w>bPF-2Ir*I=ax%8XIpPAG=K8hPrUQa`L(rmr<%v4*>x^YYV7Si_Gk3B z{Y`}h!*g?U;s8dA8SnP-qQK-VDx8^}-8;XqvvZ^Nt?uH7O9YG%N6cEM`T(MXcml@o zm7(qa&}QJ)Jmpz~e(wqx$BzceA^k)LNtK`sPy>*8E$+}E!x%Irr|VtH_9770%r-?G zd9XoF+-NM&4P_7!ZSG(kBw&JOr@~6-I#^|*M&Ur*WHtA zv^+7bfRD-qkE23RDKt>&Zc+y;C&s>e%~oHjD#1utB^kyqT{6^?v`TfYae$7{B042~ zlpmlF;D$6k3zs`Kn?Hy=vaNoycWtfQyl>w=^;8v}Wawl0?oaUOdIeevRcKd~#%41u_}7#4u^yOPYddy045%VaH$m#b4Zg_uVCUdg`gC zo_zAjAN}Zu=gz)$_~1&Ra^$<2!c-Eh!ImJvTlXi;KnPFLCK>Tb2&#px!ejg{(^v^1 zif4YH?@ACmp4tb;*;lVd0k&+JxD7Q55_{?!=n#x$G2r!d4YJ*=#$bw#-&}MJ`0Ic1 zBaTy*Nb~8+%8TxpCHP~-FE%CIEr>XJJkC-g36f>edt3! z{NWE@e);83*;Tw}-lz`%uQeU0a1ShyL_#q8$gC8oi>N15}FS@b0If-pG z!>+}x7L_xOiq?q30in^h;WN!Cs|*Y}8bOmtIK~KWEsYIoG@{<3jNtCHP6ofe>#*#1 zJv3yVq=8J1j3sG@l7>_Avmh`<(_HcH`5cgwz+SmtIN^O9(57f9{33|#`=Kj5NkGDl zkv1SyF>nv|!R#PPXgL6FZ%;@t%)c4NBU)GVVCp68x7+X(h>}!*9RxTl>t$vILze2a zxNCtBA;wN)mP~`K@gclD>{wueHyK{(hqo%4g%IdM>aRA0LI>b_(eUEp*2YFTQ*0BI zv*;o2?SIn8#J9fFy8G3Qnlkv*FDK31D<7m%sb zaJ>k52Q4j8!puxD-{1Z2cVQ4$S6Bb^Pyge0zVrQ!joTeot$WoFP#rn?yNZD>rP4u; z!Ib8QQl#}hXGmx!;R(yqJAq*d9DWWV%t5L)QDYCN;5E|4xQ=3FKx$^d5hSS5dhlY` z`Bq|GqS1TAyM4qJ%8s@L#6U{uuwSj@xj7HExIRkU%nxXDFaxZ#>(^J8mg)<6ife(> zMJ$U)Zf0W-$i1;C)l9se2yO{k<=*EM%4q5F#~)u?TPqU0?Qi8nu(%y-tPt#OY-~LG z=qcZRxhFA{5%t%h?oZ!oP)n(0Nz1PDQhhqMiGAaME*I{McgCk5K6&U+-7W)IP0-?V z(yXz<&vC5826_?omNQ^}3-A8@jsn**Qt$9m8KV`iFBEc{S@Ts&Wz+!>Nx0DZ#V-dQ z7H3FaKxZ*X+;TOnHAl5m)u9$YnXVuYO};7N6n1i`BM?WUgYT^sMPZ6dB1h8Gv%4B~ zZG}Q{$}VD}{)Q_$Ks_2yXOZHjZAj3IJLl}hKxb1zL8`!EaTup*KZ681h3seY%TL8g z*IS@5+nDZ|XxKYefm%9OS2*C%{JbUUHu|dAQ6QLv;Xhk506z4JLUK;y_7HPYDcJ@8 zs4qMib*PsmJ7t>Dn;za(08^qd#Mg8>Xvh~YV0&i`ONVnUUYv{pC_!`ioL@PxbMxld z-xQ0r6+7|r?Z1fwsl3wpi9ZgMhxcDwb~<^^HO!ZHy0yC2ZSS;xH$WLeb{B4eS41*sQ!?BJ17<%}&0LJG+()AUrTH5>BYb<5W;+D`OiQ zC<$!a5Go~+tOp0|pG}+d>P@zH%779ps^E0tWpUtSM!?8Co6mjjOuxP!;@(qV{&BMS z)H7$!)JWQoNgUh0i{zn$E03H$H6C+*RNT&(1DJzLS5Nj%VXLrr3ZzNRrcSuA3Zavi z5cjJ|>ZBMh7vH$XRN+1LRqc2=^x$%@mI95nwS4i&?zz zIKkpbbMZ*e_!ffhY0m-Oj#vP+Iz0X4RgZj{AO99n?p<`f_pVsycjb9<2wo5>vqF$CvmIa&QUdkhyohsu+T0VHIdF_9q5hX=k4kgNK(y|#*n zce_9w|G-a713Kl!PwwPSRgKea|17?Y{rlqbe#(}{#&))M9(w43zxDagx!rjm%hgBd z026lW)~&R{8um1wo<#S}7g$}na^?BwpD#ALINgtZ>|@2!z53egH*enTG(gqr3ggprud#0-p@1*&=r6)KxHh<_9!5jT^$VqGK3DK1n)v% zg+YmdNTE)!p=#)8gqrYfDC1!8ewuOfGD%I10Xoc+$-t9fL8av4rOU<17F4&9E<)S6 zI2IQd;f&#TMYv)PyDM+`4j4wA2e0(dp+m=x9a~>tf9IWdkd+D8d1a0S@TOQOviG?c6uNw9Kk~jP1t4ltF6~7egN1He zh-=OHDAu$f!D`-5t!Yy9V4GVICyuT$n!sKY0j_M(Zz^T$TBn}IrqlKswm=i0MdRFV z^6r?N$G`#DccaPYtjrBf@R4Wm9nPh-4;|O%TN9P%KOvpIPAZP(=XEmK8;=t0{6yDM zwFmTe&fhj_dgI_XMr3mh$i28}~}kkKx0M&;(N_r(`q zJay{SBOOWcfx_t&fVhTCX;S6$?IXk7 zp?!Awh=ekSG($%zpfbQL8hr5J%IcZ5$z+&U$zzxIMK3T?#KPv={=yjxB|a4m;N*ap%W^WN~Hu^5X4QiR6>iQ{qg?g9C1@m-4AIT^u6SQI^*5ukmXhJkw zqcnUeMn9hPK0#7x>O-&b(^FX5-iyk;ZJYxeEZGlq43FnJoo21<$avhOWW432xagh9 z;iRAeM%ci2iWa8;G7-t~uiryPk_@jtn2YX&!VCz)>B(qR2E8(_d&%zKzh8XMsmb)a zKYy}EQM_WIEPA9ti3Pj@Y>Z!&il}&qXlim7Um~Mf@AXXVBrHc8?}Bl=f?aG@C8KVB zY0->}TR-FZ}$^9X@n0v&}LHmIqp!c(yr1UWs&E9$`=_!Ntsd=}TX_di84g z`MJ-1?pJ^HS2wqIe)!6(QDs1`$AlRUUI7gWFc*67(Nd37)m+Ac9kUj6l}DT}4$PPl zz1bOym>0E?Cqq>@pK*B@GE_Ki-wUoM6TY+~5LuPMV7ef+fPw^SjqUY5YHAqXV0q9f zKkzM#L~UU0##(V91=4tCH|KYeRYpo#$duvr05mVL@~DsHMSW~K(Z|xHiND}>d3pKb z#fv5OEk3%C_OT~^s`@RnFuj0UaMcJe_%k412It_V(-N5lPXzd-klgNt9Rgb*puTkB z$l*gd-*IQiyX#Aa>A4si+2%<+L^SffCpBif^E_BlRAX=57^{?jHZ8+VCZlb* zmD0`gSx-~AIgd)%JD4ESj?>ODUd_9J2f8(r2+v4l@WBVQL*LP8fX|uMB;vi%cw*X* z-U6C#cU|RZEzyZS^qCb1>#sG#7zfj$UFtU594tF37&?eHG|1DKXw+t2-GT{Tp5~!9 zjsSunmbPGDawoHIt&m#biV;m3C2=0rDyu(Z5PVuBP*4-`^Mr1TrV_A8w~`yRUZfO2||Y^k+jl#y5k2D%U^ z1|h*5v^XfG>sG@*SA1{-)LHzdx&#eW@l;sV6p%t{wN|cy;HBs`1?pdw3Pho`N8%Qp zNJSbHH@VmiaR8wb^IVqy9jnOpRBvu_D5*)d^)F;FHS7%9L7qZ? zW$1><9q1&@n&0YYj%WQ=1>$3#iQVwp6_PY6Jc}9c!{LiLxhw*-dQ^Z>Fze(9+B=^& zf;xn-wZ^oMsebtAV5acWgOD17fhm%OKgAJFt1dncmZpS{i|efe(HKSuX2p2S?AS|3-fqO|zW0D|nUnk{)DdX+imoMj{szLlMGlSbO zH{(bnv@xzVkuJXM6u-N;I8zr|g9K8NfGbwz#EBD6Klw3uQ=2{nNDtQw6PB(QsK_-| z(L1u@g%@6U@x>QQv{b&|U$?S);-&BZptpXYgZE5X1%fJ7cQ7|@-8*et$su1|LrZtn zvlgJmu&AO3O@S2HM}J<$Nf~uPzb5+!c(W;K2EOXR1Ks>U`C+LSk)~ObFqre6aMoI( z4F7=l}oOgR^7b4HJTv$(Z^kHrmI1qz`M)lQR za`edI61R`zSs=hQ^k6vhC%UlXo3Dv)Xezz~Ih=78Ygv?E)ORM|yLS%OodWB+5d}09 zZg2D-^=B>}1yqRegbG@yYw;w12(2QB{C%h85~1U#yTJhjqJO$ zPTc!iF3s?P_oe~3{-8>oY_mOn=47ygR99hXCJ0YU}qH(GH8cqik)cu zo;v1HUg0B?K08)7mDHFF^MW$kvu9^*wexqPNi5l)iLHEovWL2^3g4grfjBcEO*4sp zlTkej6icvk_3C1#Hljk7RJTXIt7L9amBH==fy;EToAbNH@2(tL@ijHcTSkQ>skqP4 z=$AhKx$_s^J9GBzoWFC)0^k%>xZZMRV`Bqp%<^-l*+~OsI=+9jPMejIU;5IQ%Fjn1 zeYAv4pZ)A-Kls59+*-debPmgS;Vzp+#o$u4TzETW@V1{l)Je4vWe4G!y)wJ2w?%BE_8j8q#+0ssaAB!g%G z03ZNKL_t)L){@*QvvW$SM+v31;nNcv+GtP$n zOEoYu(NOIHMiItQ<}eKMDSKaURsKdAT<dPHYnTF{Zi6Io0OqK1J95>gL^u|XdKdCh^VyqC}ns<`+D#bS@P#u3?& zWsDlEF=GMJ;k2ZeQewejF|;l)X8$sv#Tc;47K$g&2*Rg1$Tb!b!Ru&*GMH*_gv|qu^z!lQsz2 z{QiE3Q#o+afCFUHH5p6udp-Ty+v4JAyS|AG02M*%zKeQ_9cgv!CwrJbj8VR9ZP(jv#op^i*e>UxL^Jn_q{Ra;g2U*-^C4(`c z0T32rl)1foe);pi@PGf?|EFfi0f6Zj^n#ppy@;F`nZ^E*aJm}item`YNIFkCeAoYBsp*H zxbPhzRPl!EH*X$TIaJ)q{{8#c*4BRS_kQn{S6(Rt%1knsyZ-90{_5ZTyMOoCV~_p) zzyJ3O^Kr6oQp&rWZhBm-{>?A9Zrys|{`($!@ByTqVgGs$+^ablp+7-vEF!Dl6bp+& zz{*J_39AKhgUv8n3A4D3JdR&ATYmJ&k*im)gKUfiXZ+337?+pt^Yim%EK)dwbycgY zs|Bt_EAi^X@}>7JU!*wzjQkD))Wgxa`MOcFk`gDyH7=nd9_6ExeN2xS-?0n^I*C{%ZfFqp*srOutO9XE|17iwKb38QHjPUfRT-~H z5=B`?Wirqw+MHln%ob=E=co}1sW9Oh)edbqr+viC!lBt3>XV#ln?r#avevV*q2R$d zlnY?FL#u|vB(i1c%!Kl0RTe1m5AqaO0gXODlEFxc zos$J*N?=kX^+zlu=O-;DOY)dv9*FjuBsYyB1VSYAZH=|f#lRI#&P;C}X<;mIZ|AGbUU=b!H{N)o#9k*)p6qLeyx+5@zMUGg@g5X8 zqx?ag+R%7{pP;nEdp@A0nCpN}5rWL@5K3k-{X@i&|GnLLrv_B80tVMbnefzja2!#x zBqbD5YI3SXwS#4FqIj)PGZCe!$lxkf%HkX_;fmSQ1`)>26F*(msyK|*)tlQp;}XUd zJiq?>>woeme^Q1uSY4G5>0$s>#PqeVeeJv7{qAr7_HX~r@B9uh5BlMB$FZ4WK5Vn= z)d+Yo#uFcYd}d};yXL^?@gf9wU10&)pXeu4H_QVq48FQ_AWib5{>L%~0R)6Jwa0nY zTM0mq9lQIjGv_i54)~R>9X)y!+f$dH>+9Z>&-BwI5LL(bg{TM(P`3K8ZAG}BuPA5h;@`)@9Gy{QP{z=XbyR-OOp3 znPGlvUrUU@Yt-J3kSqN0AOG>GQ>XsukN!y9;;I|nc0L*BI%UdY`ZIsb*Pnml(S`Z> za*72|(&A3%4$#`9tqRvsgcgBcDbH+I>RgPc{8V-8$jJt&Eu8RXL1lgLz=1JK6yJ8+ zZ3_zvI2|(&KM(WTwQCtCb$S2bNUT*BbG0L;@7obNm}W{%LIQ9)k5~Z?vm-MyIyxBk z6NQ1WgW{5cZIhEa1%8C8)p)3_^W5ZEkC_mCZ3Wq^iILwFLbjRE74r@jKBljwXE^XrzO}Uu{`usvwn`^R=wkip}Xnjur+@*+* ztR7)a*Kp^ugQO!_m&w?MA^_UP#4F)9$vmSlb!|oo)PRA^K~IhWZ?#oT6S;$RNY`Z9 z4&O=a;J0YS63v3x%}2DS)J>v}8P!JjBpOmSWx64G>M5BmCnW$dTQ6VLzG||lFuDR$O~fO0*~W9^!w{4-Z*mjU;-}+6|svlbu^R+ zP^MfMBn?9v@@#h3lrrHq_xDITYX|b4relt}iVg zK5{e%rhoQlf0mg!lK|EZ$f-T68B~Lf`K!PBtJ`nC{afGqmbT7v!md`>SFHHW7qQ#* z#@gD^BZuxdW^ein8(;`US|Yj-`r-+ynCxwItDM&jNF@3BDm^^w6tvecYAYa8G6n_5 zy2?()f&I5V{@A0JuUyUMGaK5Kl@%nwN4<3Y&6UBfwe3GH^V9B>25&il#z5V6fW=?8?=6t8P=@yYTxdxoiSO(aDb9H0F98 zju0EGTl~}%@*1Py(37Ob)#!x~RMahPB1WmBqxD zE?<4~t+&h9mfiK5nu5UufihdD9BJsHT-L3Pzuo~gy*L{LiEHcK@6#;6y0_G>1!Qc8 z%>qBBL4bb#L9l~MuH+B0LPrH82EQNDK|mdD-csgoRho_+^k}8r%7tF6fJ0OxukJkHtZyDsYiKcd(CMTcyT9<_ ziziN;xN_wR&a+Hj+DHh>if~VnTB5#kA2HV9-)eEAwfxn@#3bi~8?lX8nPbF*;iGbV z>>nIa$H2*{1Z-Fi-6Z9uQ5LPM;K5*>rnrkkC|VCuk48kx4n-Sy(Xd~70Rx#8P}k!v zack;xJAk!8Y8tI=yJ8De*EEI>^)9sO(cAR_Hs82aZb6KX-c|?Sgxt%JVijWsK>(*~=1-m(}?_{+xGfl1t0-cB=)u$w|r4M@;VgA+hv6jP!UidMCOgo=~7B?p9VvM=kXIb)XK zZ;9gsxLjIVo}Zt8_uY5jdFP#SaKBHeC&zqh;T*K9eL5!F)ExC@l3~r%hLyFowG}Hj ztc20c6#e|;k4{XCt8!siqaY->rHPePn@ocm3d9kToH~Z(+8YRmCXqVQ!ikNCIyZ)3 z8*)ZQMvopnBrT>=FI2Ez;;XoyY++z+>#Dl`YNkLMD#yn_>G;G%_I08yu`@Ek;N%mh zy(1<}VYM&o+(upS1JR2H4P=Ip21nSO?j&MVsitmD+s04+D(n5%UOORxq^8*etKUQ!bzLAddrXix z)lG{zpQ3$R-vL8zk9$^+tl~hRLol?7VX5zVQeWT12ZLul=C)4-3~X5i^uomxa3ie0 zs&Oe=kU)+c5;B9d285Y>Vt|k&n{(~un$JJ~{MgtSg0N)s>s>pJs@l1ix@zU0#X`3n zBfkIs``NN)6N?xZ78k*8%tT+84owz*{Q{8`D6# z;vvG*mci6W5L>bPl{se(NG9)9(;$TwgKF?k)H@9t5vCEc7J)Ji1~S}KDX^M&w`}f= zI^w;GHZTRpmrqT83`3S+(G(#C5J!mkxH`64?1^8AjW{MrFz+__yE66QP_^pQc>#GH zad^JvtmIM_Vhn5Wuia{(PtI@&`vs+BR5x#|;m9g%;l;@h^6`|aB&H^8*JF=#rj1B7 z8G|*;cx>xle{)ke*5k0M+D&g+8v5Eo|l{qTLI&kd^G0 zTo-7Ad%LP@;@pluoSB~awbmh4+A3bXhONCR+Z?@w>5ah#ijuh|Cr^Fw{^<{@Zl`WI z&sfWpI}Nb4XPj6W|F|Y|TdWkALKD1Bi@GdZf_8PSNxh(!Cw2n}s^Gj$B=a?e*b1aJHLc0UI~nn}5r?OeZYjCW`1qu_^1PW(j5_Kg)JD+C0V>gmjT(FcwXQC2+3OzA z(j+2GoI}bdVvwDoz*LC$CGBEZ+;JyK)A|nBjdxqbGC+MIfWatQOzHz;7fsXAl=J@% zSw~YXC(LVGOX0`(OifcQiL0vwS%s(tzzOLqf1ZQ3q=*m%lP*J(s=McvcjFUL$v?Wu zHSBx0F*;NY0`sO-FfSADMJT4c6Iw5TYL%upesrArctzaVRF?)vCz{om19t2NWhII> zpFnN=G?W+m6hcI)!_a!Tz`$V7uA8xxia!06IISxZM}ZosRgON#Mtf`PS-_^Iro>*2 z&}NN~u&4!mSy1hTwMbEr4IwB-)4+&KBfKm>eRG37z`P3BbWWf5qm#n4ky6cXIAO7rM=bOgcB~%`k zs@4CdkT-CS3c2!YC*I5sN=3Fv+OFg*iWW3ruwPBZlo~k@XY~YIh-$WB!?|0WPs;N^ z9OMOa$i#$@|A-ei8NPI=(efHb=w?XtLm*N0!`M*=dN~*$?Nkk)IMD1sbBwyaws!I2 zMeIa}*~kVsBR@y0*RNm4z8bvsFPFCKqc%H`D_ps9Wywmef9={ee8jq{Ovh}A4<5Mf zbB{kpufl*Yzf8j@herPxx-Z!ZrW&gOY9|j-P)dqlYTQXJ;mGq0KP$)L4C?~gtZGpw zm&u9Ag9i_8+lk7zhP-X|&=!|QO{BYkoKEXKl6WNDQY!4?H9HiM(GET#XjFz)qs1rY zM*ciL+3vNDi`QsNf@LU~xZyl3N~NEYOYbd3^+Rr#27?LPb|glVc!9C|UABkO22?GG zAuA=XUWKO9U=XlUv=wl*tN?rTZ`d+XCc^QuwmaWWGRK+yZilb%(?MW0`GpHmN54QT zoAW6dxdo*)m4FMQG<=TAmGg=Onc8D@2`5!7*lswLHI)=cEoP$8lx2{Vx+po>0t*gG zE1@PqCn`^zcS?jc>x}5t*dt8i7)HSWWjNWvt{dH@3I4_3hPl z7TIm2ojtdVpW1)tpE*0yw)R9WQzb4K^{|efz}Cv-Ub*|rU;WC$0$(;>J5Oq>ggT2? zCe%KWmRccia(N2014~!0UVG==lgW0w7D((FfWqWWVlh^OTK9SJkPPIY5F* zP~}660_f391xF;PQ<9Jp{Q{DBywtHXYBWSK)CYkC*>^%Bl+1u6nAYUNnBBq{wgXA6 z$tU@zx7}ag*f??GL>#w-SS?4c_uqg2@BQBIef!(rK6L2NrAwDGArSS#^rCavn}MCM zjTrZ;m3)o#axR%^C&tIK(X~D3Dfpz!X*!=YI?DZW3`c89H+axoWUoo`XrKrxhUi2; z4W|<)N}L3BgiYCDa|aLX7e67gosx-*n~HeIJ8ZoIA)?-P!dY9IA>f&?DPE&aaxt-O z=K-+kAqAfhCziM|Z$zm7f=_I8mZ0tzT@=g~VgepW-WzUde6EErLTJo^IC`6+aoZd5 zyG?6e;!>lUJ&Ul*o=EVFoeW8hRZa$#U;!yoGGA&E|FEIbJYNDgX4Rt3c5P2p!G8mw_LCVwl0iWi)vP! zI?1)!RMsupHDeM}Tcab@_I9#E-o;;yfX$}zQJ0FZMi{QGukYDCZ>|+64zeWr{54~U zJmmh$YLaF#vyh+2=*f9QQj?shv5VJ^uNqiBIevP+s0tzP)Xa1XI`!s*{lln`Oa_t7 z%G5eBIsWUv`YZqRd;d!;8Es=sP0isxvdxvfMK0E=pj3Wxr0gvC>g88nKXmZG-0TcB zQE-LMH1pd*6hft?{q>TjA zirxxI9iS#6xnKZEWqbdpl*1Al6~9BRB*=pS3;+!$xrNmmH%^^8ovr3Xc}Y$&xl@hy zJ@ut8?3$Y$W^%j17N-gLL>nGz@^mJ!>eq(NophwP1Yqnvh6N|X8j4R%FD2$Zh!axB ziR2_Mb%ulC!Gj0#00*suns1#D36_fsRBM~t{1uK98Me;TXkUO!ZHjkvxY;*62#AC{ ztn-qYv8qv(g`X1#BVZw)n9!QhPJSAPRaKs+lqd>r1}HlLgLVW1HrR>33Rb2p0HdX% zlGD%(L>^O((XdxRC3a|IATuY9TYS_KPJ`f!;w;)nlG11yg5@_2-sDXfUWhhp!u7sA z$4fVB(kdE2q9yxdeB^Tm_Cy(GWMWc8rT(a=goHPh;}y^-PhK%ZcOgt_~41DX*K>y?9rTbPt>#v!Ae1?tf_~0 z-f{e~UwGu!Y_m8Rq4IF%QB=cl|NfA-WABB1{--Yv2PMSR1|l#E??}#;-i}gySb?PMNvo;Gt0hCruilMgrapEMJEmORUdv@E)D+jHYdECUJEppYUIrP_R z)$T9GI;7@b|LzHHeZX^>IG@6sa72^ZcNM^kOS!%>>!ndcxp;J0SOx;(n#lyl0qUxwm}6(0>G;&LY?IH>p-(gRkh2`bq= z*d(!QW<=4xeLX~CFbr&qtE_=0s7i6$$dmom;p%GsZed{o;n&Byg%@JH)!S0t0f~=~ z{+1!U;N-xA4@@sCz~2TI15ZL!q~yzgq$O%t;!*oeq?d;+)0;Y799D&fAUp*U1Z}2M z2#F{B)$~AeM=|5-%TGUf|9$sv^?K@gJRcC4f@Rp+R@9#8OdDU^J#x4?L<}Gq zz|4&&kJy~wXbeepc*;CCK%Oz{G5Kxq}?&_aHq=}oq@s##&{j+%=q`*yIK zbM~!*$0-=Dqor_L&&0|`s?GfrN+2~T2os>~x-J*^N_I5W)C*74e>e*96q=F)+pYN% zH(w@>iPyqmmXE65kl?SIb{mgnMI0nkL~K%N3y4fI(9QPP+7?$2ZFIRovwIcW!gnIE zc#HSEGZ*m|oK`^ZXdKUCV7#Z^m0*H97^#TrCNP)KJ9kM(?Hu)f+)Z zD3z32FoFAYNkRw%GhELWYSi0rPfp;AR8@|zJz`^i$ZBi-(k zU;O;8xw-bXqA216L?i*F20bVeYU}5Qyp`=hedVC?z(1Btx+@+&uPtYW@4|IW7a%U}L-HoEPsY1Ina_V3@H0SF&xXxqK= z{n^>s9ONWZeTMYL~MlzF4Q*M<3P<= z{>|~VO2yMNZ0b;$%P~YXb$#rm&BTL%NJ^ z0yY%GFriAtMISa$j>VkKW~xgy>KFb`Zm50aMsIa2TZax3SdKH`S_RN_Z3%id#_~5=LpFF&^H9T=bPo7+P=_S2-HJdW+ z_ba@ckF3g3t7>GGcK~2NHQtCYJ2R4<08<=w&1ofYH*33Sk`P7|BU}v$$d6}UOMB3vlElCc0{~cva>|9-xrW6{ zj&ztpP3aCu#HnT=g*$@Xw?zZO8xR+2>w=<`P3px_A-w(p+Z@!^TZ!XeeBJoe)b#ko9{qVzcPd#X@!4=AnOwF>SZco*hFs_U12mYZ@lrwiXHde z_p-WLmKq#9c(CUs-z*VuU4QP0$L_!HUKf^<4hBM40CsYDG>1%t++x@-g728;_>|@$ zds{uHX19l!0qZ@=yCg&=q#fo|{GYJ?!+9Xs^yQHOY` zoxDJb!q_yO37U^c(+6+QNS%oH$~M?3bf5Y7BoCxQofs?{!?jk6O0InIRBdUWw1Y^z zFlohB5cf~)X%{xsP2wDH!e)b^OPa)aCDAgIDb$&dnYju$P@oQ@DAlra-9a}6<5dNA zH`uxz85>Q5!N}++EWyV*zl%Kc`r5jwvm#wkUyUYGq#-m1EIpftgJ9>6Iv6Zpxw3Zs z`jzqVnc3OdFMnAkC+E7|(QM5wTv&bc&Ect2_4)JLCr@r)x>T1{8QS+#u4)f+@jE-x z2lp=KA=_>S7celA`PT|LDHt`V;LkAaVQ7ceD1Lf*%juV&`qKNSPT#n(mb^5iF)AkG zG-R8jPUXRFYFS_0QI96KdcC=`=W|q&YzGU$6vnAi*UilW?~sg9*G(hIzNX;2ESi>& z?g~Sc1}m_oKShRv@Q8ZsZr&gqK2zl8Vml-SH*QwN^{s)xB9?2x4ba{tJ>(OwPjfAh zqM|*8*MwBnnKS23ojRRuE26G^^Nu_2IDGhU1~LxEfNiM#1hJQ1dMPtYWp5(Nw7^jq zM~@zzn3%{H*fO-Su{l0I_KRQs(jCW-4gA0pZTxtW$8n;LE-_21AOz%gXHZ*1&6=X9 zfgKn9s_-E`CJFT#gh=YzPsmg)KwNMv>s-!XOGU~K9z1aV!o}gvExc{hTldWXcmRx})X6NQFUpx;3qDY`r-GUMtPK-}E2T@gS3Wqj0O#Brt zu(44A_HR0@3djaSG)NNXK(K3ad;^itai+Kfxgdi|^VANk+tYlPM-}}8OG0gpwjwmP zr=e7Z4W;BJm4*=OM*P0)09CZ<+{G?6O<70gXA3)Ue*(|b;r1Ki%);+mce=a@$y8@5 zZ;hgrFFO_Y91yr22scs)P#Mkmun?kHFbbI{119b(k5JihjfAtb8f(2*>l7jy@5t}6E-eUQeD9i&{mDnEj8UC6@X}9RLsOS#Kf1$DOmGb$h?(@K5DxZ zl-vrXhw)`?l!Y%xikZi|lM^{yICt(Gwi3z5P}FxOY;3Mmj5!eKsg#I+pfIi5#G45K z$xz`uog4~pZDj+T73X%@KG^N--(MX$GLjwS&CNqwTP5Lm{rbuWAFQ7}yLt6$|Hh5$ zOH0?TU7JxIPpO!%teB1;ugbf12ck}AzK*$Wop(gX$>A>EPD@_baI@TYSZDtc*Dogr#*M&+OTU$?m@WF4dti1gEPdA?YLd6u5Z!1z| zPAGYx>dcT*6_NrrKo=ouU^VkQ)rG1hY3jPlmh9!mD(MK^d9pIt&pY9kO-f(*e2pFj zDpkcTvVxTDhkU&It=k5`sONM>RY^5UtEm*DWqG7V$nX|6z@k9f)}*8s^INs(r$Q2|FN@ zqFFXa#Dkh385y@-h+&NIl2&2wzXaWCyOzMw{w3v%<*p1p( zZmA^`Z6oFg_cW%EKpSZy41iGZ$>qD#$(P$(n-g=hS$M=2R(wo$SA3gCwu$}iZP;KR zEdd0!<3`oWZZ(W(9}x$aTXns?xjDFT11IQi*y;8~RUJGy+`qplwYr|K>*;Ky?bW|o z#lC;~^qzyamDiorc%4WJxfF})iiB}C*J)on=B*T9kes&fGt%z7an<>mK>!`VOotKNP0)_e9?Fa?G%$WpUf z&zYk}zS!&pR1B!e9_iHH#3Uv^&Yv!8K4L9qwan3~Ydb_K%)OAGC@H2n;GbGbfDO=X zNyQ25v}74d12{MFM}%6M52-g^Z81saoD*-pef8RP9K{@d%#9m2vg4RT-j1)r#Un5( zKl;&+mY0|FN4)<4i{^1NSoY& zR4@s{Ac}u2u0VV^rR6HQ{wj6pfSNc65kineMMDQE{;8k$iT}C9Y_az2-aRtXjdgrs zTsyWg7ayzE1P6c~_=|@#W^jNV=E#S>7_jmc&Vh3jX}!`?!#tyR%7}SR(l8o+ z!J%cl$;|`!k}N@j8~TN`4o#&Exlx$|6uNAUCv4JXU)TeUhs>wj(rO@NR;t}v`VdkK zo^@IPlt0~;H-h0va?XKh6Umfo5R;%#WR0*fRAov7oWPcay}h)w1jC%8oaO7+zxj=? z6)LEw+7k0lsnY>dx~K!fMD8>i03b0jtYM)It-t(BUw!AD_pUE52h70$+OoT1^NSJP zwkLXVvDWWTEG_*fOH03a?J-@l&Tc&aBw8Y z@0)SVMa-o-WEAuZ0FLp#MH68Pa059}1Mg*}UxE}ScE?tBMxifJ-%s|^zYbPThA z;W+~}Lie_Nk(p1)M^El;3B2-u0$y6*LJ$P}20T+5*%VPtb5l{Hx_q>{LlbNySqIJ* zJG+YZpf&tUSQ${$o)zKZC^g@bKsAHPf>67O2MT0oCI5yDCXh|A0W?FSw%De9OCc2+ z5NpEOmasY)2qn2>z6F~)Dfoa(Y(sJ!%WRln$I&og6nf?SmVpk0EmVtf@o=cVr<-3{ zGd7XKa6H8j1LMXDhA#^;&=ZN3)D;U$r?OY$mFE}%b@0kT<8ONI7@B#(!WYXpSu|Lg z1Hv`L#zh*)^nnu~85yb9ZsacTYJ_a4YF`cUk(bGkLRnf?W@Sc1 z7XI!>V?TRQM@IeiA?0v+Xh6_(CtAe&6kq|kCB;L6!6f+GFbPwc7#ZqP%(zb-*%r2J z>Jm~Un9kg*Ko2MfryAKYauj>gP=kVQ%58V)BuE4G6YT0_ zj5}y~Y;5e%!To)ktgSu6jIZjZ5sj|~cDe|DV+jK6Yi?&^Vj_pLIA0T)=divgICpbl zVfXC(LX87Y91fxhEK>26UmEOc7*XYL@I_^+uPo018&Wg4e$|$!rpED1o-!~klENpz zmsVLMw1=@5D!V}yfxF*U1g`E?o7)b5xgtj60Rv8BJk3q8Ii)CCBWVM|s9(3PMWm73 zhWjX2Mm=K5jb;key3|H6!Hh9?+2yvAmWz1E9gchPF&vl(xDV_y&pE zY;#s1InYTa40zvoiVd=05CI7qf{A-+?&si_JyEhQe{gG_qOtLSs_kN zj89EZ?Ooh6Hac3{5SWi)j#nE7qkx%QkZD_A!BckLbeCNzNo^wGFEEn8OD2-QPTeRM zIt0#2NnDuxjv|SVWK7~?6m^#Zv=6aLw#MPL*Wdi$>^W@MO*UV?Z{NPJfBow}{NWGR z*49p+KK+L!BegFP$_PJo>eNqv`qOM7V)0ZCb`KsrnCtAnZSO;${q*AEoYB^dru6&UW%7@EcsM8oG5DCE+z{`ydlQKYRK{%Es9PoP zac6NGB`^$aTbqQk;bTc4BOda{R7tIb4xKz~5Y?2Q(Nq{U_M-g9c9SrG4(di7ic9iG z-6~7)C%+=z9LR3%d0O_R8UQ;>iTWziMBV7^qySx<5EbLmVeXFbT_S^c2Ph7d@p5gxe)%bW;C7#r5kXg41Fi^LQ?$CEf)L1{#?_*w2 zDFXxp;5qpOSfbH2A+i>15liJKWYsh~sGjVwf~TMQ;+3mcUqA6i^9#jQz{MIDOxJ-JSYzc4p?t;RCnd zel+_NYLRFOEW{#@Dy5JLaV5qKO+`trb%^!$W@)4j586mmVw6NDT$}&kF7@PjBO}J< z1*dMJ$iO5QMAmeUQ3RL)AaG%$!6MWNf>c__%6@Ha{pHtQ3#FeQKYsl9l`B`?dh4w> z-+a?Ns*#9whBj51oxH#N%fHNq7W26IPL6T2*OgD%K^2TdMPEpxgrVZ9@13Lp z0H(%Ux-0t|2lwALJw3Ix)eED0llA40y!Yg<-4&Y$!A;!3iPorG`L3s z8l3h9&!Q}`XTv-7eoJR|mU^2Hn^I%)Tc!PSU;N7VyVFzK=PqU0*tKg{mZ+hfoHsc+ z=~VbA3uHLkzKxB|YF}sA9||3&)TGAyyN7_eLwg?Z{NPnyY5=Q z^UjT9$I`xiBbm?^2g^{SF-+{pWoFryXz4r-cMjtr1sws8XQd{msJN6 z3F<-##8NC(pmXy8pZwzI&YnBJv9aL~GUpi=$#hnRqy!Tp*Or#Pe*OCIbh|fFdVhO6 zQ@gC$Mxc?(TSUV2K6c^4mBq!A`}dDnBUmPLV{Pr#*WY~q^qKqbyXWx1gV}GA;04Mi znJt(uC3oz==f-BHWOFxN|Mb zL(8TYZ}jW+di(buICkvl6OTR8>2!znP+WnqBZDQ=vXeSCOtVHtUt$v$A`Z#KXdsT* za9J0b6>>9|2O2YxGu2!o?+rbgwh*X5aiq8kGAAjUybAN>>KFm3pnAkt^>+OF#0_mURai86&9!MvnmO#HXSa-?ZZWDX%eJ98T}~6&E3I*+G&RqP1)Y zRb*I_t!FN!1r8_!ma(Lade;e!mI~-nQ)|d6F{NX4ag2aU5+%4)-M{w11jGSK@|{-r zm9VjqW3^I8Ebn)PC@^kAY9tY7Mnyh4r^jW-XN?037<$|JaVK%CgE=Bdp%YEH(r!d^oBN zsJ-Qt(ZOKcULy3a$k^uQ%&Ajzr%oMs_Sx>-TyOX8!JT&w@4tWT&>`8gN2aFAR^1!` zWiyO}=v)`5N4@F>I7gYg=4QJiU5HSr*9u))p;>SQkWPO~fJM}m$XuaHxtkGq?AYy} zd*ZSG>tFoKY9}GI6{p&3qteR8#_qMX|9Wfd$k^Ds{r+~QLS`^?kFwEKJB<=>Kq*cQ zhmXGZ-u1=Bjp^xZw`)I5H-ELVvihSR|8(Pl&Aab7KEx@|A`tQvvuK6}+aQ}NU(SjO zHq_Q|CH|DiSli0OISS7^MI2%rlvdVRUm@TU-0V4}Or5oqeDj>}Jwm zzkYpgZtlK&?|$5jE|zX+&G51GSBKR2U?sWGB@*3pPN8qa#hv)uiI74d$S710F=pK4 zN1cE}1A&Ubm#8Boi4j0Rc_3*Lfu$pZwZZv-*v|l!z?CV*@m(bD)M_LO>+71X021x z5IcR)rb6)2&YR9`QM^Y~;Xoaj)HD^?5l{PfQ*qnW001BWNklY zRg!+l*|>z0Ho5U?bR^rNI&1ui>>^Lvd+D29q#tcj-@;pV2`g0q5Rw{EEkJKyzC1D< zj*8@;F>T}O!bv&G!TA7*Y?cLkmyLWATd~dDDhHAFly{hVETb|N}y&Q06YKoJbM$p>*6~Gu0fU6MEI?i@?X)S)XQ$&R;+;jaw zfAIL@k6yTV@uinusXXn8E^3c@x9WQ2#*HuDxbfwz*G0$r#(%*nIx^7Yjlx%*rf!fje> zVWdMG`JBv7$g;4L+Mnek_$`DasfWl>iLEuNm%uVw0)TbdA$a)kp^@&6-9$btI-^0I z$_wrcY)8P<)D+fOfdn1jJ5>1;Z{q0ac;#O9Ebd*{z31B1E5$K_(oWK5Lgxhn9UNjK ziE6b?w>P6}U(k9vTMDow+8$21;OwNEtkh`4Kraa?@kNv7B}fTMAM{o4-}6QZEwoK; zDOdnJLBqa(ZpSL&hCQni6tk9arx{<0@6|y}{;Y!jw- zhVA=;!(p*chBg>1Djmn z1)GCwOCx4eh04bNzn`D|+^CN^hgIyTpvedEbXmqtGLV7Rq4n^jEKr{m-6 zqoZpR6FCmv%x-wMn{8d$Q?$LkwZ6VVsi4wb91lE&u>jmj%Qsv7`*b@ z>$mM&oSvR8{*z)(u)yeX4v~y2g{TJGQ!_K?)20mMt4hY7rD|EeXUlQjfuVY-O|p|M zkhD3-VI6A2u$O8&*Gj$N?iZFTs;Y~;*#;BwmtT49%GImcX+xNmN0*Vjy1IJe#0f0< zx%=+BzxK7SiAdPpix)3G`|Pv%V~%t;t>j?$(MKM6{LzOq6T$BAKlF_`RtlnH^!9BH zgt*>RS7e4j#qx4>v@v@eST0;**=d|tT%}Ip7HwePQH(-gAFH`&L+Evr1A$;d=voTJ zlpFr3;X(HA+c!HqyL#gW7H!lv#hd-JR*y~e$wo{zYfB4d(;G);pjz5lIzDd&{>hDJ z1#;-f?aSAflrR)et|DM8xp4%*>^*s?YQxL;><D4FzCbwh!`U6!y6#o z2@}@+$j#LoLny+UKsauv)P#blF7YkL%T%5^vAq&?OLBJs0=Q$ijSZGY6jLJPQNxGL znM!*i#^pgj2bQwqu1(9K@ZbtZKd$-=LpHC*6t3f^)JTs0okl3G)LcwAdPZ#9Cup{( zR&EiCHLEtvTdAeqwDpeh65Pb$+fx{obA^x{$N?aLO4 zdat+9>-ASwhMcb`Xshg5G@XOyA9g#B&Cc#FJXOOcIa@m2%v}>&;-H8BcL@PcO3k+u zK-eTP$jifAT-@^;zy8hd{*V7N41*da`orO_Uhko`wfncW)>FEkorydI^D68NlQd#5 zkC^ElTCU=9NQuzeEHSSJ^t7&ylnn~y`@0>$fU$diK1rlor_W8^R8b=N>K`&x8aMb>@Yx{ zCF#=bqA&0*O~Nx{;*~cNY(|EploVFw%DZy<(%JLpUpw(eWSFx(c19C zSC?tftxf?7`43lFF@BZiz8Na*Z}-VHqFzU(C3;kqt;tl}RcSmP@t)&G!)|yS_k-@j z&f2UZb5Q7CCh=d7?0bYZ%#xY9)?=+ZNhE2NUp$u=>m2m^Qv zvf*Q@*^QtoJx2+PNzs5BN@%k-fwYkuB*aWYzqIH|x*=#*PT@jkXlQLiy)g`;Cm*0n z#Y^g1~#q&_GZ<6RJIO}${hr|9>Z)AFUetuqT3-7;M zsyQT6z1aCtTPrIoH>}`Yr&7Au>UO#V~6M9ozC3E z#N^mmnaZuIY{|0dmks}3r=M?T8*drrEs>2J!HcYk+z`2zFGfb5oSP%eMoW){ZBM@H7Fu4PzroqXVw9U%)Q<7K?gQaI^0uQp?g7w_MORv19 zp6bjF&GF;MmzI_;U%q_q+_@ZJWsdsFSHAM#gAa8w2Nt%3Apu41;!r@_@7!48|{@A|J-clVw?|7@jmw z5h>~(JhUXAPr#O#DNN!>SK@;BZJH!fd@O*{fU&UI9(V2}XhfobEQ!~xc%oFP%f`xr zyqe!8*g#`DZ0NLOU=!C`4$ z(o@wLo0{satc>s5i}x1bi00twzo$UzM9{h7oZDQon#(~3-fje$zv)4H*AN~he3pH% zHM&*RyKrf2Wi^N1Q!Trj1%1Tg-5l)hnVcLQA6K(Ly&OPi-@K|uED_y-^|iz-E-Jsk zx3e>|qvag~Mg*->1BSqD=Nax|SPkZ=i*+RCoQ8PB^bviVGL)KpYw4Jjb^Y4<#@Y8z z?O9s7uiu|kyePI{zh-ZM$kzA2JbU(^Y;FByaq;vAXY9yB5L#UM*n$k)=4k5Jt(?9)FoK^-` zFMEDp`{l15Ieci)A0QPRUN=sXN;gB&46}IFuxW`RqxKf)Qh^*oVII~4+RkXv5mE+? z)sobV9IHZ0x|dC%qZ=(wnnVWcfG(h&gvgMEy_CgbhT$AI9y)mN{(J9v;iXq%mRH@H zzy_-WQ+d2A!i{-AW*m_LzbCHS-PS&H+_hKs9z1m9!nw1Xn;X!}4t;^$9;UgVT=6hIJ7Gm*0HAhI#=d9EAsg&6dJ%^tNz3yP z9!DYZd;kzY#zhL%gYTQBoAwepkArWD-W>5O4IemC5!Xr!OWox4Bw+G~iyg9CSZst! z8Z}bWiVq30VJYt9K1!=M6BZ*bm(k!G8<73Nw`jPAA8>J+B6%i*;2att7MB zojv>Zj*N{KmMysod#x$f*w*U!hBCrAOePe}6o?@lR*8zb#k`J34u6o+PbMdayLM%ZyXb+1wqz9oHm3T* z+}wY2?AU*L^5kNU^DMZ}WB-k-SKse+UVZbe9P7Z50-NqoGF}>~7I&)+A-<=;<;f^s z*=quvh0SdZ&o8x_H*oRM43(*O(-Up^BJvF7KxJ}NV&X7s=cVh*ufOqTx5HV+s=Q@= z;?${AmAzBwuDkAf@4ffF^PTTt+Z%%2T%LXQ*&GVzM>2nA&b)T*+CBH&_1L423I%KfG1jD{$ zw`9J(v$Y8VvDVP8HYxxYLQ?jF1|ngIm6Qqg;-D--)i&A%L{dkU zA-Cg{Wg(FXVG>x=_V-{hbOLEFcTf$Zay%H=xG2|UxBtL4kQf76YYrZIwx^rK4*JOA zH#^g{NgCvt5$_@s6`=O*DESCyku=79t^B_%xuYD4WhFWES9>DW2_`G=M|$t%Tn`QX45-^x3t>~DCR-8QW~>&4fV~q4$RylC0F^in6jFjnXbB>dVn-TLFjOR zprM`5aF5j{!T?)YL*z^h1MD0xE`#7OXLM7o^`OMRx8HZ~`mcQb#ee$GIv$Z%;tLB) zI;SjGUZ|=o-R}29mgnaurl+SyM^j6$0d>jpf$?!v^`+U_|9)}tKf7>YXdP}^<6l}@ z`rgFE<*QeZ96B_#V|9(rLjsG#fGfV8*QEj2Fo780wCBo(>oYIqk1~;^wMF_VM8gbl zB1(a!z@a0o%tnHR866qQ(1?9-Fz_eGuY(```1!T94ZQ3x5B&DqZ~x{uznRTyX2mC- zcp@9!BS(%r_Sj=$!7fhUJbCiu-~HX+W$lmm*k@lbqwA|*ekzceqg9Kg=nk+zi!0Qu zdH^p1kG-59g-IMB#I7;0k{(gh6viR0m6dUT2*nQ6GnE0=R9L98R9^WKo=K71%E1Vq zfRd{;*6_{^&B@p68CHiT>&B`b-p^>+w|7qt^xuBxBd>A1w z#ukl;c}rb;?Q!<@4jepm^~wb_X)MKLn;9PK!5}4)$suL@nMJm-h#FW;UL!Rwl3N0B zOOnO{CvVhclYP-URBfJ?o%Px!kWc`tym1PP@Y?VHpMP@P&G{I%XLr@)6v2LQ)dZj7dh(HOYwPVg&`Qb#v%0xmId=0aSU0=^% z=U_to)d(9_B591{wMwA>-rar>e>}-@48}IsVH(+g207Zr98(`N4kY`;*ht zwpzFh1RM}N8lZrP2{<<@;O39boTU&R0=F?Z-d;j8(e}b_L zyrOlhIi`3`SZic3b)4LKJOUJc5 z!icC$MB#8aI2O1RX_Sg{2yvn=yS3w^)NovZDJi48&8LMlw3aOYqTIeW z=?6dc{;AW$TQ>66t(V@DF`U8TX1iv@yABIAb6Y>#>;i)YU??gFJgl3<}kgt-LSJq=JBDHd;r=3E>>S%A761O`&a zILn{P7v>qOB}NT4JdBFahssF8!`9tyX#+Yj(vU#JTfn%{E^#Kp#RZ+J(_kB6fmKco zW_);wRoaEQIsBr5Ck_dQ-{ZbsKIdd}uUq^99|O#O_G^p(=+p_+;eZdg@&QUh&tzOO zfvC1#n5fd37}smd8PbJ?g^r!+`okao5LvtW-y&)Jp{267a_-u-EB_iFAG_abj_EYp>beS)bw^vB_PaxiISHru6$cFvpZ^XM1}j z{|pAX(fl*8pAj=csTJ%dyO^4rHGI++F^ol8AQ^F?Nhx+fUlr^Zq|?Rb%yE9aIUESpsp{DjqP4om>3(gwFFgQzEKJvR`R!XRsHY# z_8r;SIAX^Q^i{LBv3GOx@}(>Ela$KEpig9BGQRl8$xh`yvdQXUm)8j~$9uWVFeN;z z(Ly7hlKjD@fasKh*8SwTEIPUb^lR%Iue|y?JP$-yI85z@7hcGI+rtk({LVY?JoC&m zM~@y=&oJlT`P(<%ctd;*^UCV#p#ulL`1vPlZPp2rE(ASri%$WDBZ_yE3$SpYL1;8# z={qhC@n110F-zzO5uaW%q6l~p^q_u&$f`@OQZcxB$81vcCE+_+!K|4i1UF<4Vb3J@Av z-x$eR!MfS%`C(P$E@LdSWt7!MAQA`Zs4&|4ZN|*yS98>GSRAY0FaU8{Fs`-$LYrQo zqWF0HuIwM5wvkzArkv4sCSZk%l#M2hgCGp#Cejy9lh5cx8$DIE#l78;?$p%O z?Ck8Jg9k@PM-~_N%=9WB4}^jg@&gfX#qs;7{ ziKNg3)Sa@eGv%P5Vp7qDma2xc#8K!uS%F5z6fsqTxv)k4T&$tawVs@vee~b|*5Cej z|70YqFb64HiRUu3Q&W5GoKE$;lepHSQkD>m!VHN2P&5A@Oiw?%xcD3A&#Q;LqjkNq zzGfD@IlSIktC2C+^{$QRno)Ambsg`qXBMhyO2A6SP1Y1!F#XtK^Z2aIv$WO8ml`EJ zT|?Sle)+W<>l>pZBgvQABGTHkXV3ind>&Lbx=%j&B(Rh%ZVqiPUApw=fBxsmrgicD zgKUkz`jw}~$HoSD9Yuw0#SnLp2SniLpz=U8OE0;Sw^}yR3U8ao&{c61U1A#?tx#FT ze<0HJSCKnJfY46X2auBzu%LXZbW!_fHepCB%lB&EQej^wKQ|*e5RTq#3%i{=?>K($ z{Ka;vx%M3458v!MONjUGlDP(hgqPSTTln5{@=?CT)+9T*r^e3#Q zAQmAak*w(yog^V&7{??U25VNF?vh*#Ji%m5X;_(3Ke>lSEm4>d9i^r#;twV~p`We; z#e6TGqB(Jv3rqwkfhYT@p$1X26#zCR0cXUT08AUZ^}|GV=`9g&#GFm(Bq(KM`>ejk zcp5z0FeGudtU+@Mn!E4f-vt`2KXo$s!^8kUl0^6fOp%-;9R%+zR!~=!I}OpGI3x=F zhHQDMS;k*?C#N>5w7$BUWoCSQ+_vQW8%nbHWB>mBlatw+lrlOx+TZT)*|Tu;=;2+v z=ey;FIpflJ9cpiq6>T{*;1$drmg^kWoIJVz#G8wjULN-oniLFFwjWyxa&1i}CS+_( z>{Li(p2zzA@xfrbuD31P>sw4QZcB}}&F(Vft*V(_yA+uY7dF_*LP48a!$}AdxJ$sc zO?U?N4Q}IRAlL3_&_l!e00X6wvRHk1=-zt{J^BmhfAnLl|2mP4Zl^OnH#a{$UHM#H z*ttYmZLXu#I~SZ+!{KWS3x}4LKD)J5+mJK5x$y}yAT~FfgiO0bO2o%uKr0ugAOj|X z&)`3@DVI}meiszx*eeTfYAO`SPl8C950jv6>^?{v{X&}+-F^1l`M2JAuj`YD+0Z`y z^wS3p95{abc)s4fd-vkvBFACDZZ^7`o159_o;!Cg-^_M5hrdfpOAkNvnLCc(ZbsKd zD}gSRV3UNw;iGp*#K*+X<~5iesMUqagBnJxb@AzvXP%YL6FGzGr;%_gWEVybDK1;2 zK~v2W6m+6J=;A>0#R+6nfvlC!*|y$s{Md^xzq-BMk2cja$v4gW-jt4x%JTD9ToOE+ z4uI*+1*@yBT)qfsp&=TYTfGP%PkIEX=Zl*Dc|J_P{0!(odX zZ@=SU;~B>Es^a7-??s@`Vw?%Bq%=SZbK-+bSbV<&QH|2%%m`*vXsRV>7GqL3m!P=e zgP=H3fykzJ^T%kd0jBss@mC0SBBrcW=ZNUeWVXXGCh8Uu@|$v?Bo!yvTEU4aDO3V@ zb;GvQ$yN^w8%)$|n@z!4Dok<`y}Ss8h;568yV0@hz25b=-_D_Awxg@7t8z=pEoDKP znVBi?xL#Pu=6PabBAJkF^?KRhWyRO2sw>y74L3Hvci4DBD^c&rNdC#g7_&p2wvl1ZkGSdW z_}-!DK6N`2)02?i29BEVLNPB8SrnN2$p8Q#07*naQ~^4?C8ci`n+wv(R4T#E8m)tO zg6m2JlG!n!ruHBIjbHoAD_7Rueebf!&#G$I!orkIt7|LL#YfCsrk%wxC=q<%?slj1 z+}^#%KKP(Eylo-flf*edT2~l06`u>fH3OFf3$JjnRO9Li4!{c!4>~1?o7-8wzI^G@ z<(?_`!-o&$pL~CHWu?=>L3Alm6S=(r7%mS|EOTdbbL+Y1Ul`a+v62P5-~RTupL*)4 ze4X!Pt{WR0i?I)FIaMyNyzN(%^L8MwD9QN;y|p3@(FP%)3j{-8j*K;N zcn3U&7F}#MOMFeUH~<{Fp?VyY$mBSnFOwAlv)ENXn@awN4JbH?HUNd zU{eZH2xY8{%`2xVaa2_aJPD$w#QD_HmaTSzaF|O^*-Kc2)#B9TPF{=KnxeGC(KGI9 z7)^^%F^N!c=PP|AP*9h3$l@(G{E24aBA0@-Z2ZyA_~>wY>e8uGrHNMrnR-KXWuz^ z;Mjlk9~N!vWJpNFri2wBWgy*7Iiaa6YtwF3O;qCx@4S9wd~)jW;oN4mvI6l#iWqaW z9Xo3x|8%#vD|`P;4!V;Kc*NFKl_QSb=x*90kz`Kb`%bpI(}fixJBP&qiI{de2yri2 zzjq;A97y31P|$F0LjI=D+$ z#edu@7V1n0Vd`|tHjf-XEibQ}KY!u+^2&B^JNsq(_TBc_6A#`0$xrTHSjfRouebH; zD=(fp`L38~CqJM~2#vb)PY%sIbK;G+u3TN}c1y6EHTWZsJo4zHkLF0Ly^jU6jI}MN z!uH%8?7{})jYDf|H@@{7zcxENll8bOJX?CqFjXkSK~`~`+xHdrzX;`p+#|ux6v|BK zPG%1YZ(Z3~@LO^`0)Yam(O*&Z1inB|gi{{sHvb-iOgreJO4VgTtq>^6yAQC@A6!O$ z@WB2TUV0@0*>-drr%1vKw>KS0YnHdR^CNMZ?9VGOz1cg@^qQaFJvKJJwY3SYMnoyG z4|K#>FtY7lkKK_r9H2DABWfkYR8h^Y6Q2mv8@Chtup0>grPNd*X!N{Ql0zVLacHHY zDK?xMJK;kUk+zg+@K4hu+VvZ*IppSRukWo%d`X59frrKw{(z~RY=DnRiI^T%m#uP7 z>#P6zUk@L6VE3Lq$M3vz*REYT1Uou4HM6#MEStJsZ*P9$(xq&LHhR6;_4VoX_0`Qy z+1}ph_p_vKX3@>Rv+5UF61lpud8{5vvqsK-Va#P6P^an-X3w3UyL5T(-1&v;%QLl( zcB;v(o*p?+D4-h zTLQg~Ex&HF;R1||j*X0tIN{5`WC!IK!uU$30xa?|je&I@6Gt--7!Kw}=#m_#ez3DP z3pERTmafQJ?a*C!9)0@B7yjZ|7~Nav%+f8^_omp_``uZp>V<`cg^i8A4S#WByu3;; zSS=-L8bHW)8cPhGX_D#29HzA!SF&KM>TGOmT)npR-lg}ioxh&6I(YEllTSYN@WYQB zKYna}em=vwwzihlceakZcFpa1`m1m3Kk&wh*H@O8a${AeOOT{^_Z)-$>2#};?|<;> z>u+>A#NOQ8+;9HoZ^GgtgbJgpo?*`Ahd=z`JMX+Bo?%{IUcUDecYop&cV~BylIT&K z&6+V!Zbc!e27}8>f&i6Ywq==_p&&ZmNn>eyjzsA(i_5LRM;B1g6#@lQsdH*(DP276 zt7NK7oCrdpaj{kGSx}9+E$TObt!<`I4#Pb4TKDYPz5lj-7cX7Lftt0?@8W2xjJF7d z>(&mUdS{6ZMe=h>vKc7M@=j0BEbLx9d**aUV^uM27_sho?BofhUf5kV_0Cq^vr&#X zC5dbR?l){4&k#8YVDT$&m6(;LpniO-R0GiXbkldV@3QaTG|_JPP&IZMP=pXgNhBUx zYgoup)z}S)YV%OL!NhpqD=Tb;aG;4_l=l%)?1elIWgrbc?DQsy7TIxPv*Oms=;b~$ zsnK-k2yalqzqFGL!Pu_#f$xwY4U3OcGB{GPtW+^7A<)sjqY5p>TxQdURfYHpm$)tM3Cm$gGm=F~o}d-M@QgHJQ6q_#C@vC6kRU;TAh9&i zSb9NszwUnPy(gzCzc2IDZGbK`?z{J#I#pS@XI7mmo1a^J{mSvL{LP7{zjN&Di>q5( z8INY^J;Q8Ak52EufAbTcnEca!I{DI<^!I*m{PN3LN8@u;-$pZ}Ca|0F$ZhRjeCgtS zS@o9ZSC$DluCAUsb?XoR?2nar^wg=7`}Xa}=1>_-us_^>d9|^>zkLaybjfzqrkum9$tU_>X*Orw~mv_6or|8 z>Zg9{%$YOdtHE(jC6dn)!EOn^o_p@OKlzhC>FSw9bvf0I&HBFY$3F7mQnLpax)|2c z#=DqIOUrs-t0`}L+y~H*Yo*)t7JdOa5L+BOiYG99unCX)_B)sB<5eDf2 zeyUAR`wojG`!3Qa$Tbgi0Z)!07r|w4Tl=I0 z8C5x6!-xW=z>*+>NkD;OqQ}Qkd*=eB>VH)>=;)FdNaB^N#=;(rVhNJd8E}yFy@mUe zms~P|z@GA3FCqotC=f2oY2XH?AfZ#07@Ti2A`Ys<(4}9(5)dhbO;i&hL_jwx`fPoB zyZl?-*q9e7vC;w#sb5^&1uVs2_!+n63!0>3oS2E+%n9#pZWb4y?ns5K|EMbuF1&vE z^q2qU&cFKWLszaWG+`(Jz-J$pa(e!JcmDkHU;M@Ht+&e0{oLMv`cKnuepCMazqgAQ zGxoKh>WE3dI=PFxqp-WVPbAu3T3x9Mi&#`JTiws0VL=^?u7HM?6vqLam26Y2x2s&- zbpyg1C5zUKNiQcb=7GGyg9p}1Xn6kog?2er7i*96yG3AXuJyXqM`%12HuGn3yvE}f zR#tAga%E>c-oLiL-u0NzVdr?-Hx#^3`q4|@4_o-7x(}<{*tq(ouRL~RYwP66Q-AO8 z{nUdGK6v89iG9T(TUaPcc4K1$`=}sP!W(GjZ-EyRyR@{lfB*i6i}k(lt>1m->F+-C zolBQ4`byUhZ^4dq^uh~g|LU*5QtIQaq0-d*-~av(e(;0p1_LHM&m*!3c1vLW+0TCV z^5x6DpF33g-MD)7M}OoKM~)tzxDOgU&KD&?68B0euZLd5@DCK1oRHUO3Ps&rGNDV38J~V?GF#eg^E9@WOzR8tJdSPDzF$GA+1G*`T3%kcas8V5rXqyL zksQ;$Da!SuZqAHD^hl)S#8Bgs!pH)F4Kee_)P^U+tXh~}s_{%j5ro7&DGTBX?ZQwq z2nSKdSn!uXWeY-v;*-sHc@qg7E(T_GpuVs(bYmgR);J0h7d26qS5d|txFi@2X*;-? zbsDwaqJIRICis;gGvHdFKiUbn{kIV+tLFE5tB(y&|g6U22_7RStEj(V#9uFu302qGna?(fy@5D+4 ze8$l*d_%MzFEPKeTC8a|nRM}-kF9P-NUDdg+POwS)9MCKXU%8**H5>uU0YmTDOLLM zsA%QWU-;5pfAxjKSFSEJTP3$oHLS!v6b%ajUVO3pwO`YF?%Da}U$#$wy8pFbv(JBC zf|)D~ZL%9oZy#4+fV0BF(sG@cbjHol!GI>v1^7)GVeM!Ll#jT6!V|Ly<&V;B;c&psx4HG%_?nP>YqEVJsHxnb12xU{r#<%%sV ztgjy=#e1rG47^pCcosT+U=JmGfW*aalqmM8XP&+Lo_ijA@F6T>S3h!KR9_EU#!?y! z?Tk_sbg5HpVIPx%MGF@@{oq6Ie)|LOeD&2=zVp<#Upn{V^^J`}kte_X^q0Q;w?(0+ z(S6{72mZ-F`6uWI8RlYi$FZCL)KAMP)gF87u_vE=veYRCxIBLK>ebV?o_gTzZ=3oV zh!(*g=mPR_M?58|gZRWpqOqU$O5PCH1;HMuftajP&3gg*B|0l+Nx)r7{L27_P+_hn(RbW3au9mStC%2yABPOBv2Na*9R>!7!JkU|L-!!W%vj&eOpb+!^w^O-=2$B|EW0jIryy|>#LJ=f z7H9ezr5~J-1E*}j3k(Bh0U0@|dAF*)i%?rqqCyybg0c^H0qw`3fz1vWj8q9my{Eq7 zG!0=FV^vl#(lxI)ES+C8f=k<*Y+k-{;KcFis9XQ~H}3w^KR@;CbJ%ujUWVmPZ2|k4 z;1v{U7^{}?(@*#R_#bcmgMTpn)nDEF^rz)N{>LsXF7`*XXb|5g@kJZ!UVbjFEZ1Y; zrW4E$IAyJCB(Z+-kyjOSx8^%piQnq{jPBSuuTSh2g$KmWov zp7<6Psz3>%)6_&=%zNdn{6zh%fM(0LYjbn2m&eDWg9i@Om!&1{hzk)?BHf^=oc?L@ zsR^C67+hVsy0Njj{hmi2J$(3ZQKrSVm7t{f4dd8JDz{Te)r?FgHa^1_&pIx``j!Vz zow~LBE7za+`s2Uxo4@hcSHD)eLW9Ky6r=kqzw#?3L^eNN4)N}|*{T{D=5y!Hm0-8D zkFBW+RK-ku{O^9W`T>)A{sFee>_Te;OOzW7um?3ks?6Y6rjfzzRaTHsYB<4hiqMw= z-i5bKl^vvqfWkTimo-@|zlxnbl=DipF%l*kCr1uk*MdPdbx=Je2*umQCRsNCi%%YY_#_Gcu9J-T$+(7t;K!|*8? z8a7@6HAT-rYZJT*wHTL!v)T|D5omT;1E6pPs+$r72>s{umLOGd92iqWs=XZv-wUc6I?QU2>f>8K=U<2QA^#<+C$A3}ib61&{ky!F3- z?(V<%{MxjK#l;B+wRGLp<>jk>E>OYMqS?W%t@+*E0@+AN%4x%B^~-cR_q)G4{qA@B zfAep4iqQYoZ>hhlDzPhMjJ%dDOcKXWL>V=SJstkB5I7^);oH?1%dMN~8OsFsYEd5JH!<$_C7$qCksylc0wvBOaR6 z#-Fa@;TcwAOr;qg2NFbO=ti2s`#socJi@^SLhAS4#%QssdyhuYQE$-}YXfk3 z%z%x&iAwN=eJh$LlVrThjV#Hyq5w_1#sb~+5A~PL-tvVnzV-8edCPa6S$2!N;&JeW zefu6?U;o~rL*LoIznGeBtj_Sm-dA^bPwejAd-3ACFJ8Ru;>FQqV!}mRef+EW=P!Oy z|JA=5{kwm+cjCnKU;k_OAQ;_FgEK9H;EZ~4z4v&xx=o6;*1gAn^}bf;y=$>%$#G;i zim(6R`W($Y!Wa%!R&(T#1+Z1F1oLSU-G3BTId<&mPyR1I{`>#sGf1!H8Y^_0-z~Lj za#V0y7F@4twC}f6-*Ly8#f61xfq2(~uT>-Y6nv^uJcfy3-p!>6uRqC1yGxfYU%Gtx z$dS2eEUtNDQ8%a>c!O#dx47tX;)`HmKCveG!i5We_=kV^JHPWgTU%QQ_e+JM^gr~W z5B;-$_RmT%-TTQLSiPJveHw{U9)JAtKmYSTFBgl^Ef=p{yLR;Gkq>;}eW13JXgJ0j zgAod_hDvZr8RXO{UcKRNns4~@H~|8};!4wT>Elw#r;YlAzcn$Q1oO+Ua zC#9M->w#UqkU+TryUTadY)}fq=5kGs4-1{TdV&q;5c`^a=b7&b{Mu%@0A9z!j4X2a zFvBpqnSX}cl<{%b9K`#kW$gR#tgWpbK60c^@hW|GM!3un0DkvVi&N#(69XoaOMN(w z^l(Nks(>c2JALTGMruR`+#1^CMgFe{vW#2-U?ep4(9R&r(YLDwAwQ*fn$bl7e`)q4 zG&n}$45%}5RMeya9k_yjE%XxgLB4$;O)}z@7j9Fv>!%+UYT!6 zJThyl^fb{EY7;WoE@(-e*)1uI2W$614W-MkZEVgFnuafrGGZ%_Pvo)tGI-#UEwzukK4ch=W0E-Z9@P_7D&6D91g z&&@r*y82h^>;Lu4ncJ>i`{>!T55M%%-0p6vGx6BB4^wK7J=Xv7FZaLttGl22l>EUT z7{6K>#e(K?5G@bg>%Cs@qmI*W%LB z+u!kaRIaLRJg)Dq#!17F=gMdAnaN@&%Jm=m=!X~PtE)lwG@}=lYEh~g8r6IiS31`cWXFUnY-T}7 zuc2YQPDkTc+lIh8&INw2g>C_7^wAW_fc78*Z8sj7QGw_bx$kbPVQz zB`#}F#Kl!fBjs!4e$_%3Mz7)H;Gw+Lg;X}(M3?Mc2fA=TO3UO|Vk{c)fum}}7VARt zz6BULOfzXFAp1wO#g2zmH-TDN{b!>A(`%}E)3kGsG;c9B$SR+y<(w&6yB5n zFG2v*=&JJ|nAiqINp`QB6p|v2t(+$gf(RV}m!o_f1%$!lUd?865;OHcY8kKt>+20D zN!rzIU~{klsG>wgcy>Me8`=hs_C?M0kP1H61n4AqzP2PmK>r8p6^)@jIgXEmN`7?| zdPN)dP;cRbUH4(LEwg2}yH6fD^8eg^`&ZZ3i|m%Tb}{meC?R3!r`~ zf9KKj7cTtmV~=YSINnrJ-fqH}xMAz!9EGYScXp2N-+%Dn!D&C$c-GGjVg?vg*9KJ1wb#A%kmVf@w|M`2~^PW9_0SPrn&H3sXX2HsS^PAuN+SlsVOS^tX)|D$)-uaHV z-+AX9elUarLP0(NqvHsT8`spFhXq2Q97vyT5Ed6k)8LK}7$e?sr3-SHk6BYT#Z60v zn_XJyI;HH?6J2~{7VVYK4tu;IrR2>>1U2j>q&7fz@W>V1zz;E>4@n#YSd(n_^(UU( z+S*=SUBxsl&^Tp4KE$D0LYD~>cQW){`~LSca`Fr7$0=+yx{%_oG{@Du)%4Eh@szFF}B?GA*p zIcXJmkd+T8(h}CB9352BJ%OSUG0ARZun;1H_s|TKY-!d!<9xu%$uB^!-_Cw`uqzs9 zJ5gV672SD<&{zSHkAK$f_qGIV+BlD7<54p%1-=A43mm}hO%Af$!j@z6>U_^9>15Vq z3F>!utbL>kQR#f-z0pZ&Sb)TZagE=^BPvyIm(nD8&ppL}En4CqB#xE0tcOz>!X2v% zQbtv19-I|AkM=4WV5kRX5@{GOfFW|wwg4VQDDM{Tt)tN=RJSLSRgtav`9HnuuK(-g z$*u9YZYC9zmxy4Hq-w)=^9X4)ozD9%9ADnQ|LF%F`1vQFeDKv*EsSVJxBcfozx&R2 z?)}4mXpcYMpFf`!_5xHKjj$VWaLg84$_H_d-ys;j?!$jzP|6|O3^q)er2%5HLhOfE ze-tuyz5xta(#YuwZe=}CrQqVnf9yv#H@Cj^)YFTedxujZvn3yDFPf`As7}Lc9@{%d zZaq1g8#_k5yJ+=T3jhEh07*naRIcX;Z4d@FXvrJ(!@EG8>P{ESy|J;exVV&IF6>`^ zPk2-{<+6|675bRpow{rC+H0@<{_p?(V~;&nVq8oOqP35H^rJuj^FM#=*s-GMH9U{D zps7-jVJ>0rAOGI>?(5h$dTW>>#ncu-(S8jHVFx24k50vyFlZ;zH(T1clPbuUzcsyeG&a`tgVUsmA9b?$c(BT4DTSpjq-&rH4JLa ztwkVfVGCObC1fGL^l`%L<;$1<;1B-b>8GDAMi=9n`uSHM``E{R?&p54ghWu~$tUnU zk~a|S7S&%qpM3Jk66}hbirzN{|JX-Abl~6tUrq(8GDM^r#FY>Pnn06*C{A|``0+(0 zTcRMTa$c;56ATw2av|kG-%`UnB_^q+TH*5%xzHO;Y`%n+N|X7>001reKY~-bs%FN! zhTwO;GQj+vY8oDBe8=agd**x37K^LyrHy^xk~F#~Fr{PPYAOTY^0V;3*x$z$PpItr zxhbV`k#rE~`Vdn4IV$0m>> zMXp9Rh$A-kVu|VK+QRH5?+&AKUzM*82X(taaX@MY;1e-H#<*O+6t*U21+*Z|fNx;I z4K^Eus&mhpL4y-rV+>I#_)pndz0!f9z$-S4y7uL2RGZg2t6pF)s=<9|+#8MsX~R%k zrcwzi2g+D0(`y~sM)s-x(ENOn#S*Xm<%tvj`JQ`j%*`#Q6nTLrh0%pbBlfkJIP79g zEX1{V<6B+#>AUa#Uw3x8D_8cy;%>pL-ne0(`ONlD|FnMN8{3aRzT;p_-1_Em_j~aV zv9gZFpoWqefC@*O`a-K1W>T|$aU>Vj76-_aqcA3XjcYPQy}_RKRy2ib=-N^={QF)% zJvtiIH9|iBose{~Fb6mso{9pR!f`34=s@4EtSr}5-jpJhRHKow2ouz`0B1U>?9rP0 ziybs<8eOcfu_$SY6QpD?T(R(?u3o+RyTALp&ph)?F}eVzJbUuw$zS+|UwGt^N5r*x z*QBY_=%PXJ1A^TG(i2ZSQ4itpY-jm-^yrZX-uAX>KMgFUR?zrjk+TvTa$5~iOb|zs zC`mpE-l%+@TL;vKoHv|FT+ejfke|jw4R2TjhPQ1pvlbV~=cKq-(5Ot7YKYQd5wN$( z3>7q|VRIg=^His=(P^&(-QL-K{`s>LKjO2Xu@C~OW@!rY%;R`lx_lPP3jY`{QezfC zL!tp#X{3G2D|z51|7>Mt<=*?=dg;;yM}QkJaG;M6q^R=|MG1^X)SrPKjH2QLA(+zF zK~s)g;!&EvfjqAs9Lgs`nVYBd2R|j5q$MW=*YONEqkW7w`oVY5uki`UFfv;#;S4RG zVoDxVHc5R2thG>`uC@isz(TDyu zXdabBn^l^UP~yD-$6-IMoc)meR%zmU zJr-No0%JZCKj$ecd}d-pm)CFH#5lkdlJzYmbc+_b#YhJci@7*gjDvQF>)dGjZ()sDiAU81)Awho-?*eh}H<3V?ZF6 z-m0fhx!N|yknN%*FI#XWE@~4Ys~foo=H?C+<=FRMK6&zYiqY*ln2qF*jC4nSo(vph z$f`Bwb`qQ8h<6HYTPXMW{rlf};DBDaGKr)yL)fh?{p-KhkAHl0`0%!GA(_js`k2<; z4!b+4tDMFf<09f*o{;&sc&%K>X1+BpdD2l~(%?T*9j@tmv!r0665LXgy92ieKUkPy zgUYU3Vb9IauZ>1;+u3=(>s~K0M@=qpE?%uk)J1;WlOM=-U30U7F zGs>ZgMCWX=R_Zu5E`UnLnKs+=zR^QU-e{pp*h*7>@fUybr7wL6iA{5jfBN+4fAo+3 z(VchR3C*svLA*xE(c!()Qp%T$7cZ8$8SYx?ePLn#?z`^vx5)zebwK8{Rm{}7WGb}} zDDE?(*(p`ZkjtnA-ihu<{EQ%}ryKR`ec2@X|A7Vt3y4mh-Q^(sKb?V8d}xs?_`nMyLcAt129;yZ*J zm>^8b8?3Nj!~_Q_8oO;PYJ#YhbfoY}gQMy6d&2@H zasZ^)S(CSb7S!|IvP|A)A%YI zNzAA^5tme!f!$?bZnU9ES^?Qr0tPlRqpd9!N6$y%aZ5OwFYz1g6RT)UecIsEczm1L z_GI$qm=@A7Sy-UeuY!T<P4sMqB(ua0tmxI!%O{PQT(#Bu+G<3kz*m}X@~@*QeeirsASFUh(Mr86!aLH zv`;@tV;;exl%JO`U;fbBc%!#W+*&zb(NDX23K<(m1FZ{H~El)jto%O*CPQ>Zj+JVDIj-G$*RaIsK zAOb-04npeIG)W@b(UkU4WKfO6l~x1uximEzsLRTVaF?`9)G=0p7Uj&0Q%s_Ar3V5k z@I;@`inrMUV0bdQ2%Tv1mI?z>BNhPwEB5)gT400B;Cp@n7V4`(v2jez(ChuN!#G6Q zS?W{qUJ$MrhmJW-@1hH-aHCataTFP1)BtFF99RjCCTRR31kAwA-XJQvuj?rt>%bJQ z9J_F$LL#`1*|(#kY*@n%tfSFW_{d4>6Z42Yyr9MFJkwa$!ae)Meq?#vT zclYuf_~O5Frw-vK1jd~-vW9kpesPbAaSy^xA4)J2{0V-;Bs!F#8nj-}7A@eJs+$7; zmJC>L)`KQS-NNF0fxN^Z5AEz6nNFWxSh!X+VDVHM%qLb2UQ2;(XXl>H&11#-mRqB4 zZfOC!z{4ZKY7`L6Amt8iS6`9pa$yg_$aU16N;mWZDx|sXJ!1)@Xc2|mY$+rzk?z^E zXOXKckCZDP_`nD5zWZ)Oy66X-piMd?$7r$Va~l3Ln}D^ z)PdFynRJyTN*@}G((1^_DBpr8)epnE4G}6}7jz1P=Q+Gdhn`P)lUiL&ps@}GX;*>) z2^1OTJG_=lYh{z_peP|lPmUS&i7rvDg`-SL#O3vSPY#yoO$DbX)v~g0)w8>MXcRkA zl~TT>`->@a_(PD>wY4=7oK-OR{l?*UphCLQ5|aY(p5M&wRsk(`_rm$t1c18wj5KV0 zJlCnoV5sU}Q43Fmj7nrnuY%YD8zI$^#UYM>MZt*e@=ut*xxBd3O$T zz+@JVb2Dk;GUU7>I6FNE?rdXWZbNC#%)Udnrros$2e?v#-9P=)Kg}VR&l2W7`skye zG9oQ&Lix;sWU$-EVlmj=66wD3$}8Ddse9z;QM634v%*&fThoVB;3a|}b5Pu6#IfO1 z3Fe9=XN3%CademkR)V0$XqA#kEc+LidozEHzQcZKp$9EgP~^;b9dZfp$1%mFHlUF%m_OH?rfM1oQl;rlQkGi_i%Xuk-UEiyjDgS`whSVQ@rlJeWpgFa=r?B-jz}PO5W+@4 zF;GhxbpuP<8L8yOf=12>qS0Q|u#AYK;0a2fHl2f{G~CAPDa|&cKeCEwSb7UaQl#V}saeG7+D5<3Wvs zJ|yzO39rTDRb|5k?nna(&OnF&rL18c8PKppi@pMbUe(YLZ0y5(PBWw)GHqhAcz4jW~%JZj{UPTp3Q!MafP0=XzM%UdwKVM|D*tXA}IrHq& z(n3G%Y1I}y(nZnO1|dZhh8DRm$T_-$xSHd=z7aGUZMy~SWq4b+{2N`mR36znc5Lg) z74z%c%{xV~yPG1%4jianxg}bX9ElCgD)`>H)&H}a%;`p5oZqJu|Be$S?gmb$lnj+& z7f%D*_KjFO;pi_bM2jpL)Txj6OUnx~tP)=`dth(xuF2&5XmoBozAzeX1PSIRlS9+# z@yXvvi?4Jx&ccpp2l&Dd^rfyiY(8d!$qzD?=T1oCM)g7Wbu^ z*W&BO9bG+aY~Q}ss~a05rxBbRNS}s!>0`B8`CeR{0|yS2TSX+39K*Q$*aLtuiWM7a zZow6O6toQfare5PUs&AR-O~{vtfTF0XM3j~HXa$2wN#WT)3+5LNwO41v?BvGg;#*T z+8($(ncd|eDkddDSc8t1?=K7zVyTc@wr8#33T2`GFi8xvG+ zi64$D4j>J0?RVhAWWjN^m@h7;4-S8xLA1>P&Bn-GM6`y8xVUGm9e`;#hD;N%yBvf+ z>j*+JBIL4BQYx%&30|${P34f1?|D%{s=yRR$Bh~j(LtpIDZj)U@oo(arV#@XP41{Q zde;Jj1El`eP?5%Q@nL<}DyOUSPajx~ZUZgFeJKiW|N6RJxgt5#rWnyn%&6PxLkkQ0 z_x84@)4w@!;(r`EG&k#8q39b5u1{hf7uz6+hrdPCv1YOl$JxU4*6i_g`fA@VG+SXz zBILE#U*EfYd3$ALCoFLr@9svtd+7Kr?^G;wFnlrbvUu$qx$JCWDck zgtbX|@QaeK(gUkc`=J_BmrGisdYXb$ws2NKONnkz-*Lz4g$tu=*V6i~imXkiZ<$UD zc9u(+SZp3R)??hxJ&aAyPG1mtb!Bz)+{@3sbnd{~+UZ+Q-g@#x&fSpzJ6Ba)34+iT znM?|mK!x;GJNNEDq@hh^I*mCz+{HYffKR-X?!Nx@uNO;+`PWjXgh$2h?!{q$5)*{x z@zyw2+i8eiA8#(f=#~~1$zmQ z#b%YPAd010s@V&w2B^|9VmLnjB(UWphpqLLxVm|>NUbP8L5A9c@>Ma{v+Y_N$tU_G zM;|<(LP3&7Dh9x`B{*T_)P&C!)iv8OG$+ci&P0ccb!K?=;K7}MBMU@MQd|R)#ksjd zeZR4}xwpLh=eOQkBy2A4&K5_^Op-YTxQOWG%a?b3gLR0pL`n)Jg?8$Dc&&(Bi9mMJ z%<@+7u3anoV+-#K`@RG>Uw`7ti!Ysf_+9VB3~LB{fj_n_3{mSdq5{=| z>oVn=`3VbWXzJTEVnG~&Cu@AKqIpZai^`5&+rNK*F}>Jq0c_mG9APqvL_rtO<5&Su zewGGXOR#U>KCe(M2UNCGglD=*lY2}lgYY=rW3`5R9er%0jR*Tar4N6_5JxJm*`Txi zUjC>zs<>3(UA=T7ZGyohfl*RimHwWueWUgOlUQut1Qx|t0k@KPM*ie4jtC0Re5+^Y z&$@PCacQZlo$8_}_GeaCzwm^*SP0TJFH$4eMY%~N3(kw9ytK4bTr2hMo#8s;sHj4w zX>*W9G7RX|H~2GRRVKb}F2+~2%|N`Mgb=d$iu-|FuM5IyK9@JpvR5&9TpG+zS#aWL zGx2v5B)x^HbCsUHPWCWBV4>a_%>`5%H4`{XwzK#l(E#CP^?{2xWHNV1-hGe&Ym!XulTVB04tY0JDGKN7AFm)nVB};7=HpOJn0PzNCE&cfdex zD=e1^7Dh7%3Gyh^Xl=p=8;K^t5*1@j3EPh~QK~`z;F5tYbl@ad-#XJwN-2g1Xq|5? z6EFdAyg%2Qw09`UVJam0tW_)1m$>t$B~e)%>lVzU1)g=)8hMEP$%zxs`%dju|TQWUmDLI-?DJiQ^l!m4QM`Sc} zHD4#p>Unm%#p~~{idTY zy?cJ_`LubbwvTDxQS(wzq3pSH=cIWJ30#+(9I3Bw&t}mG<t#N~j{4M+Y(`aLP zdAV4}X&hWJ=#wa8fA6c*vDtvN4S7Mqa;2E(!V(kTJsYMM%QetlN^$RbW{?tV_HxK& zjOA*P#}T&nWzqZ)j2a_0{=(xVoZ%aPa-4~eVPsyBN**{wJA+ZA){Jk2Q9*tO0N>4K zTQ>fu(kR2A)X=81g^ogwB{YV}&0=bL^YcDF>ySvv@%vc17z-Qo_KVn5j#BWGqOOn9 zYU(Ud>(A8um&4@$Vcn6qCmTpmns_uGm@ryOw-a3uI6Zoe#{D}SLVCF}Kn%Ey2=L}A z5bi6*`k<)UhoVOBmY*S7hPTCsEssDsEZ>lDRfsAgg*30TNm%6((W<+o7U^hB7ON_t zh)48Jz(L#!K42p;@hzSvEg{!ohvswQ}7ZogO`rLGe*h}Q|~>QhAlZ+m4Q%F z3WSqj>{@N=>u(YFkBcKx#ycKg9*r&@I#jT?JU_qC_cgCjOkMd`B8;NKMx(+T+XaVn zbMyQ50n5c07GJ0Yms4NCboT6vdk;S}o*VZ7qZUp;fT$|D(t_*?zAs$1w77`#h|=`7 zbn&JPJ`=`1i7hqCm*Sa$+)37=l#zBu>HHwxjA*f&72JjLbK6U~hL_k$&!yL41Tah-S?0~;sFMjLtC?wf*bbff)(XWzbZoeGQ?8uCQ`m@V z8k4TA9F+h7AOJ~3K~x2OI6c-Vu1QSZr*D*{k6ByusiG3~V#s?@(v8k)co2l1))cfs zKZ$WNwXQD-9<;=rf7Q^mcxFHy2Cez&)sROBiDMtZ>J-&@g;FF4LY1Ha^-y9FvA3Ar z3EPeYqL^Totbyf3e^N1J7|JzQI0+2J#O`0vX9P;_r-|No7Zu(mX*_Tz;_9Nk9+Pht@HKEDHuFGavC$edREa1Jdx$`uLby<{Q;|qROY~Y~8$lpj8d`UfvJg^0~pw3>r z_nD@&*v=9gwd^r!&igBsiw4gSDubsT-RjN71=G}2%yp>m+O ze1Ie0*3_#JG^zyXQ^KRv2F|#h2N^W)coxAH?`5-+w9_C92r9Jhlm@YeHUuv<9a|7KgRx6XBPyYQRa2FuGzLJb zB!3hOB6HO2r)fXh9x6Lx*p&ukzL07xArOEvfNr3+xX>GB;1w`A%L>`l0%O`FRhV|o zLHljeBdCH-og^5Jkw}!8giW$Hj!gMCYq0D8-O@kU_ZBryA?44rzZ!LHf*_V&dHb~D4w zJag9-b-sK3db`>=1Dpc(^2+jC-u|{8M=K_LQ82Q)i&<>-hY&dBjeHezZ@!^8vWj(# z4(&8>Dv3hyyeA%YP-lXc1yW8Sj?c{+wfFhXCD{N-aCO!q3OMRK<}UF?HCO%!F2Zz` z&tjQwzwPw=;sV46LrxwWy76Vj5W!lbZjK~l9W5>{Y+S$5cAbF>)F|@6MTw)aE~fAc z>DSiQO30h*lMCsKYlPh*dnO);UJEs#;x`0{M254&6LKJ&~o*RNmiV%MZnN}ZLJ zl@gJA)}aS~Ypl8o?6G>#I@bM$v!4hEbx4$i2v(%@AlI8PMmIDb21Z8;JUAGZGCnj9 zFzXb%FP6@WZ3(s@Tk+NWs!_pL;6=kA-0Kr~)yCNqc!mK+AzMFnCPAa+LJ>oBmE^#f zfx48wBsyFxP(M=?JAP~tR-uEAFy1yBJ);qr?LyGyr8)QYFq%5CU3hDAYg>K6%`6@Q zM*q|0%UxOtgFU&u4y;+2VnlHMY{TaQRL-d3_z|Gux#4=&4D!J-2bgJTWLBjcV3{dU zEMT-Cd3(kF z5cm8^9N;3xnX*Ao)@Y3^z|sKMf5*f8e;^NARtaK&fownw!13X+_JFd$4zCW#4>xB( zgOY?kV}#)=*LnpEy$lA=vs#kys7q!)b!7`h9pm7%u_EC)bnT5ujdE#`>yIKQmtMc} z%B!!fy&=zx8VIlmeBWO@f4<1yn#ebg9lKHF?G2BJ1A(^-ia-{NyS%)N6r%b&okd;t zO{6=G1v`BdivY{qpDHliyR&nBYb%4@*68kH%+u+`(rjaNb1bFW1Mhn1_{kHK`pN`E z%WYoyoVxIH8Uplm`grhE8Ofoj%s8r<1aO%%E>yet!Ws93B;I5P&Yi*7+<+Hn%A3+5 zb?sa4dlA+Hb}VxjZ^n=)#7yY25szEu1{8g+?2DZ3zSY%nvC5Mnp2#mZF@_Hz7`ZMi z=;r3;`^MrP+s_vu-7z}TglN*K1VIBQB zCL8ODN=4TdS)=ODK`Ox^MJmfDPAEz$Xe4j2PHGfK?OJ%r61-o&V;g70yLb?+)y=+4 znDm9gE(ls(o(HxyHKZBbVxmX+pd*j$%8SG!i2^!g(jwF#0VAMWb z?)pa&%QXxRLJpKvm9zkHZ2&SKk_5^uVsYHj0bC9MT$c7P#Vhqgw5ZY!1YAM01*g!D z(DFvc&V`b_I`TY`Vs+?5u^Bgi2-Y`O1b9M>i2zohLao2J-An41J}`+V<3%7~ifkWi zk-8Blq}>$yH)>=u0@p9#M<0x1!Vr^SRzDVNms3;YKV_`rba9VAv~#6_T8vh*Ye5Xc z=cfX7pk4r(4^~|s{gvHOH%ueBl7NI;8{)9}TCK#SmoH!0ZC>L;8TbV{*YR`x(xr{9 zt@$FBAQFfEn__1t+fk_HvnNAE5wGk7jb3{ND%=&;37i3o4hr7q*|bE?`KQH_#mK-9?) z2kNg7V(q}1b3w-yMc&M?ZHJLbu`P}n6}Q%)ceJv+l;rq+9#T=Jp5wRjx#b1%P#%N; zb}ef9*=L`{7D(3UVg$`t=`;2XBN0y{(uDzd{`u$6o;|A`hxL)-E;UaaKbF~1D1uOR zK~qpfLjlIs?u`^N`pXWm&)a&28G&Q%+MU4qRsvcQqD>(B#=y%`9cd8R2IApT)rMSc zIRq9n;%8A?$e$cr{AB9On#ZV7=|fmrH3z0VJ2&RmkKm1hz#^V-f<;0B##WJVcaS|! z*2*%88GU$5De_q%O>Y2is9DO&@?sYoXQl|$yVx`%6&G$N)?$d%=SrEZ7l$nkEYU!d z6=tEae@z!5yMRy|SH_ZEWT1t{pP{uBL(QQ8LaufOAXM)hlN*A7%=wd7gGZX-`^qB; zGKvfVmXliGfpO#5ypYF{hk^ha*23AqA9`#aM@0+QXW}eTGA#S0^xF+09USFy;=xe zzF<-ozEr`_kt0Vmb2E;yVm|M<;|_K6nZ6fRRAEhQrYrZFExXH?uYK)nrI#*f0TwxU z@WAO?Pfh#SAiAMQRbm~LT_ptavh>JF0*!eX4%Wp)DW=q(6q*ctl{`?jM4h7%>H>oz zphZ_#1r`$%6xxPpweilU&}v2S2Pq=q^tjFeMI^{V)~#dmUEzm-UgCDZ8K|fu(HIHr zt~f374Xy#IaMwVc;zeO;9vbWMy16R3#g{baBiSjM!gm0Z?iy_QEyoV?mI4)ZvzqSD#6IAXTr!i)Pj}p?YX&gOk1o^OSw1?)dOCDdNRz@!UUF|{+)1V2NBe4K=1$j5p~=#zDaq0#Ws#*+w2 zHplm5yDnZYwm~ifbAb;dxz)J9LNr0e)qw|%6cvKvLf9pBJEj2GJJHQ#qhIk?r7=A5 z?HUG{TvL@G^z!A)NBTic#GrEYH6Xou`SOnMTv6iPCl4IBp&F;&h{OPa1ejZxUc8V{ zMDD_c3s{JeW#UQ8HhxOe3yPHP?lU{Iv2m_=#F0SG6;-)=#^U<=%FfQ~lgTUx)w>6e z9ew}DK1@ndO>W6isg+)v*2(1f|{%3=#iGfzE)HR*6=*)y8i;j zQNgI3v9~vEFgnzh?^A46Z{Ac>Jsc@9&e5YsoQ1R%F{Q?5O|#%-x|&p)D=W)=lO(Kf zI{9IIxo`d`ibkj%P09ZK`wJWNIZ5Fm?>E2s%|{=7wERR<*uE;yr^+?Au*wL0zxTcG zed8P7NUlIBL%sdB(}lsTzS&h@K2d1mM&f{wN(85fGJ2qh0AB%GWJg^c&?^d30WpL7 zgqsSs@=G;0(Dkun9l9`!$DO%g{V0*65lK%)f%ZL>>;P(WwBJAxm&=aIjkGp)1V}o( zyOF41+cg@k{y_Ot2nWxge7S!8diz2V$kfOC*2U2|aNq#0Oq+L&@{5G`ne>$?NhSz2 zNLdJd=+MEH7q`mJI+kEug`h;cT2Fuj~(4SYi#MDnp4rRZ>X+yUKnIk7J)Rwgm{hk4)!C)D#Ic zS@0ngSN%J700L2oOy!SM2ZYR`W*>>53YeQGZDhw{5f(=3;}m?c=w=`vq|=g(;N{eF z1C`sUcU*PkVQaL;VK{~em;z26xWrP6#V9~ZJP;9phO!G(#P}slu|JYNK5I7I%|}5u z_pdeBtmnFtN{v2edFjq`L5D}ZqtSR&LW@h6F6~Vw(iB6O%aPj7jT_fCH%H#zPS?G- zx~k0pb3~oL8G^TlaC$a?DeV{W?q}aLxG8U$Qi?Cd1g(kOa_!o=68FTbL4BCp{8tpo z-u(Q|g$t7g9`Tx^#l;`~sh?awcwjn#)?mg=g8$(QdyjQH0SQby4^+-ZyiX1RO2cdm z;|vCoKC>{=@ZlVR#z_I8m{f<%wYqR^M-?Tv@h()>Z{E1khxe1k8l{#@HS{t&>YJXL z`c{R-I0T5 zz>K_yut9fs_5xRpX=qERS`M_eH(qSU2i3e{)Yw24WNxTQAi@>^rKqQ9$HIVSyAH@O z0Od~s6I$K@oI^GuzYKSU3CtXXSK~JgoQr3kIH>;!C+zV&WbVnn(#xo?lqNDpd_P0-rFtSW$&Yx3qlK%cR+R zXfNJI=x2*Xusy3k?0tl`FTeb9iFAwUZHHC{RV??1KJbGz*ro4KKKj(l;ySc<|JOTp)l#Qk0`DPK zNf95sJNCU`^shr3!!{A-30~)eP$GNn^LiW8tA#! zYxW-KxqUI+G|3R(f1ynr@56Us|gFW?zt&EwQpk6UYvyvK5}xXZ_nAIC+F zxYn8i%HqNH@w^K$hj(@7owu#5EPLoILfz~CtCSkUO8(G$Y;Z7&%IdT7(aaaPUz;NA zI8$!3JIJxi8qq>s55-I7k;cdlvW8zh&!Ix=1B621GLlUPY-AW|%^KNgF&|1t96;y* zg*2=pprj)wLab%|3fH7FoF*53gUfEI63)s#MCfByVk zcinaJRPeDcY?IJ-A*6$}6255E7l)2F7Bepq^`A*~o{?7ZbR zvcn9Y<2dDBT&?t$-jXAuB8RINF&RK{Vg>R=yx&~uiF2HQFl<+$0TLCQ_4LRs3_R3A zZ4>KJ8sh4}6-wR=MHmRgFa@i=B=`XoWnZ{}yBbRh{AFA?#O?Bnm6er(>f+)c@qk{? z=Buy1S`71=A8#ejY2u`BXLkFUP=wD~NVGFyHM)n^4;(vkc#2(t13#QCGuhtVaY87` zAgqOv^Ea%(j1;I0*rUrLA?P}|SxpHv4`fZ*hxD5yHVxukzXL6JIUDv?QO%oya*3u} zTGuaqK#f6bSSqv31_yg5l`)L?B&TszExKrxfJxve@k14ZYp6U<2B!!gV5q<3Wv;$oX{9DutH9c zu>olrS=Lap_8kFjK9T#aeaHcce1I6yyq7db~HR29~xSQ!bQrl z^CgjxI*1?Erl!$NLu+Ac^GXT&9_{=6H*UP@JZoWMTZ_xLiQK)}yM8@+uv}bNS^m2} z{-Y1S_mMs(r9fpBZL0tRqk#|X4vZ>XLkqLEj<6urE@zFyRKxrjx` z66{*E{7?MEPn3|i*wNOdC_P@hc(H_2hYlUO<(6B@`Y=G z{m6$NeCGqZzN=T~{|UEbR=m{g6Pf;0U6k&xlS-mgy9ir7rB!L;Vk%bOyYWK`*d!wKg(vuIqi0#N3Z@40o`Rh*kw`Xl%P)ubHQi&EPU8hc>}gPCzIV zG$NK@6@&5t6nXgNs_|4h)F@#)$`v4A@jYg{QmdpRjm%(SgN% z_fhAnQ2j!389xFx8@Rvw8Qie-Hyz$xyj#lxGA_fX(0xQLrUdS8ZFEU?1Sks`z zXcfg*^yAvv{-W$!jh2PO5w=nu!E$s}?(kd&j(IzEGq%4TcwQi!kU3%Roo7^9N81y0 zm^`q7u!Sb2*|q&^FTe6Ct`y=HExa0~*!3evjuhK@`0(NK*v7^NHY4Mh`~C-h;0IoM z>81bhAO3@e5ydThFDCZFg$rGr{|c^8=rh9wa;q@chX{vZ-?&J`|~K#e3Xr4DKEWjAvLImn`E2SUjsjvzyW zq==AvFx4CF8d))*YY0a$ZbC)%G%|%yyX))mO^YSgErz!ENh@BEvA{~JLdRlyOVsz75=^%jhpE`()2C1SDghFxYf7?c(hH@Jce0NkJMxyh?-er!WMX%|f}Eg^u@jHE1IPo`g8ZCCVjg8))bc4U#DoQ&)yamT`!2y(t8s zy}&M$vIG3U72u^6(dL<%Rzm3ymciTXXR7K9+B7u_x^mXAabg5}B~}SEaEyg!q#@y$ zg*hKhP@J=KF|g6owD=Nh7Gk2?f}b6Vz+X>LK}BVHu|i2p7Ce=5#owBAkeQl&>@>&a ziUbMIS@2}dE-NiBn$`?}gIJ8rq)z0R_(HZ4S{Xsm z73)OMb;jX>x+U1%-BsZ-P=MOa&6y~gg6QkluHo+T>dM>S^YBML{zD5j{@W#^irJ9Uupl>8k|E^v zskvIG?@dzD$nLIXg6W;UzQ;wQ`09f-HEC_j;B;4~-iVlyRg44xgX=&h(HGCYgp|Pc z_7)O=C8oq%uo~mhrdaG_1sjG)1@Z2j?^A!rnKN&H>-`0n(Mf}WYEC>06oTdCuiiz( z#4wY!@qPPN`g0YHMSCL5EfEsZ%SB%oi(Bk2*1jT**`8xlF8<8V{7lj2pZUyZ@Fq7# z(Z|#v#)S-|&e~>)f|0)S9S?l;!yn9Kn^UCL0m}`7Sk26oN=9YWjbh*wE^)iYM?fe4 z-EqR(ujBxnD>Cj324T3SEmvst0GNF#3ED_{3E0RR>7yG|5B7A)^Okz-FzOy@^eiFw zp6NATNO#Xqu(I=tD!V3^lrdaaq!fTKj3Y8mK`(5gf%*=<53QqQ3$T|$$!2_||Lgz&AOJ~3 zK~ynv7ct-`V zkLV;tF_$Z2a<_;v?CUg2+FD!}m!nAV<4k$LH6aqxcxqavbL;sWod* z)N2Tq#^kYv@PLD1%rpc_yb~lg0miiFBz5nbF_ltO9#)2nYfQXPX>MrY)|hWhU%sp# zTL0Of{?vl694-LVZ12wYsgoy*lATN^9asTNWG&bRJm^DAFf>8}rr=y&78a_#Oru)@ zrlPpZcX$R++(7j?-`J+Gl ziF(wMA-CBmBByet&fsFvUk9$OkLi6$J>gF6kP8aRgz(Z_d28S%>L@2VltgMKIN|ko z7lJSJXxo7RvBGsJQ~^C&16M#Pp)rU@M~&kO4vkl534q~2!a0iz#Pfn-JGy#bx>YDu zNVd;QAzz^#&SwP43UxL%Hj0yU&pr1P)BClreeHtJJzcwY?YrOoZn4V6Hs@>B5c$NI zG~_O=ojQ5^AtZrK-|SoYsseFM^j-hcnQ9=z@DJJ${#ENW$Ud$(c?UO!P1p+DI5A~@~Z^_it@8mkVt^H9}q7)lk!ttVH zv33TzVl1!{2ceH;HKp<|{n9U$A^g`r{KKgm=A7>RpO^A@u@moo^pOvL@CWDT=IX|% z4i`il`)hhJ6bNGkRrN-XfE8CHGE%C+>|QjnWQmZ0C1_U^l2H{)PhmBv3kjC-F~{@C zKLk!#O=(Y!u(jAZfH;Kr1d1mf#_kg1zJp z9z3}21xxeH^9nb-?C$P<@{^x@>Zzw5fBf+$o_L}-(9b>h+^es?S{{Md1uAQ^rAB=> zX=VA~!2_pG9pAUQ3W1lvWWpw%E$BG$TvuOu3OdM9aY`NOTq;7bi5mO`^jJP54c?+H zrS=8>l}HAc)P^Ylra)Q0*MVE#DmQdRvPVM0YIt1I^P|$Ob^6@0$%&~=Sep)a*j9;l zz&inMGbkcW(J2b}Gn%0{FR^Hvuo+e;WK+(>WA@Pv#1LfzO16gp(2eveoIMjF(qV7o z$OvKk97`h9<-!{@ie1nDBLXnZW&>Qpz^3Z8-q$o41b(o2L55;vN7Pv0c)$qyI;3iV z&H-_cLNJ^}y5k3OLnwrf4Q>$`o~d^9=DHmppbGsgJUpI{SGEQw^S_~pYm@FMl;P~{ z?qYw!WZvEBbjO2TjKt#BSm1FD%%Nd=VIgu7xA@L^gy&7>wk?Gc4^1ZfH#W}h?Y(i) zGSbW2VRsi-S08-vop-$DuEV$9dhqbNkELp7ks+@T94^k7l{LCC)0;w+`xlXzQsAzT zWGcyofe!6>!#zW^7{1dP*3s$ci(M;b4Ri>P&Us*jQzV8qQ+?ptUd(c#0F#}sK}Aft z7n{*%UM_e?4jwq*%Zb6@XE=8KNCWk0X7#D%zS)I)Yip~-Plpd5`Qab=k+aV|`_$9lft?=3kx-V- zVvHSW^V6qq{m6$uc+cH;VeMFA4-7cv(z$^f2Xc5T^)ohU5NTALuY0m^xsty1qehDm zb3_@`E|pQI^kbSwn0wO*ie0t^E|W87yp?O@OO3@M&Y2pUez+iqga*9MN?nj6F7-b5 zA<41cXFUTrCfky!l&Gk8M!wFgARLRTFj-KHXo+Hv961923GMI2mW@8pdZzue&p!Kw zFMQz}-}uJ4bLa4S0#;2TXfB_(-g@i4{rlCXlilt4g@r=+$(}E;!0s`D3gvE4gd4P+ z_(?=U>m?8>zTyrU)i70H75;}|hfr5mAJ)D5-3DcWaO!27al}DkfuxvFc5qPy`z%>V zcaI~d(02AW6bxFiAPnqg;0g=16pGw0STHBY8;nS)l>BLIJ|Ls=7_3jj4#99LP0Tq? z%+CT{fvHT)Z7Li;J`WifV7nNZ$>6^O#zX?xCD9kdLk% z>V7Xq1~f*hx*0_8wpEA@#OSV&!w>Q%9jrFX9-P5G!lHv!Kt_x75?}(Zm9T?!@FY_p zjT9OTuW~%~)(EE{DDY9ADiMe@rQF$J6Sb?Ue;K*u+qbi`z&t8ZEkq%YUxm&s(p&5< za-n$xyByeS3R+?b_MCZ*6UUhq>LXfzx7}5)WHpYw8`wue0aT>-WFRQn zdc6yeRcUU6$=P}7_nv#-@`W#d<;Jz^Eg`1OJ4T0S%|DL?PK@4ox*zhC?ug+j;TOk?SWx|@Y12<75a z-+uCyb1zK%HS`8NKmloks0L@c6m8z9i$5Tz#tk6zKd48vLW>yxIqE1)Dxrxj6fpO} zahV>P;1wZ)JU0QV3u(c^W<#6@`6l-fwbr)Xd?(@^Ql-x;NiDglD0*(j8hu%eS&}g% z`xeUpB17%4cb1LC#~8Z66(zdzVSra6VjS2|2zuo>*NaLR>R&GK_gd~*4v1^UK?(C# zy;_eo6o%ETxZ|AW#6=*E00tSld|O$tjj1QV|WX1QHBy^;dr}v3IkVv-`9|U z0>SIrF$FU^KJm2c5WH)v~0JSq`b3Hs2jA1Z`V`3xV3Q{{wnhtnza6os4|9?Su*g-R42 z?&tFh>Km3}m9rzt;w*8&Q1RiM zfjIci%!-cLSZQTpaRH!WyNqHi@#>fOI}1+7E@fF7(Y7#?rR1we5a<7&vNvn8CA-eU z*2+^=_g2*%d%8WNL7>r?x&aUz0D?9o$U#elN!oHaL`e>Z>H`;2hpWuaQqLmHGn1dCix{<4C z^bCk_oX#p&O7|mWkxNE_U~mKR0bs&bL-ZX}kSbZr^#S-!7KKDdsXCX}F-|0x0%5cX zT+ZjDczAp18_CzsdhNkltEd*IyV~3R#b5mV zKltiD-rK))etI6~v)aJ}EQdV|QOoOAN|}htj%rB)X`ImZe$~Mvyl5js&cLEjMRm@A zu~LJ*SgbG}QGk;|OZs`IW42vg9NYE#2fz62Gk^a28^_z*%kRnYF^n372p-zzvDm?< zo_cEkLqB)(-od5oS7I(W3w+gn{EvdB$kX7EgempRm@qPg+RAB+pE{;x;NOf{2d@kq#A-)PtH!+XTu{7U?TGiWD!sO;gtX+JnNaPT1fHRr3mMO6Ey> zh&bhB+B>;hjzI)u9vjMrqa6?!RCA8g z+%#jV!5C)=s22`%;szr|ld}m_nqrQ7)~yf~Rs)MY0kzKri0}@N(X5H@NUsW^FD6Q; zSd`AB>|D#Nc|+D{s>lVLpl+h+*-|UFLRgB3ZiJA=Hvr2DW&wTnUrx1l9x1zNXFbQnQ(G<7DMl5%wlhBi~ zxdL2ec}f9@p#`IE8Du|E2f64ykMQ2n9k@-@0|(4((vzaYG>rl!UZRS#MZ(vv?Z5uU z)uo^J=%den;S0|^_0&7>zI&8gP%XCYi6@_&@?Exf?%utA<;n|BJ@w*8KC*bZo40Pg z^})@T9=%?RqFZ$?{hej8(6j4wx>O3z$LcPI$(M^;+gs1S@WRrU+1=aQxwyC5-dW7} z$=SMXWB3 z<0FRDUH%fi4R&4E-DKSyo5`zbv>r4Z?W?>9%MhLoZI!ZTTc)8{0GDxziKyl7;%Kpl z=T>gxx|7GnJg|qxq^GDKII3YdL7Gq}7~VIv^K2^R93uErNBkwzTU)QCL~w}OY@NeZf-5J~m3RW?ygrbS?C6LDl1MwhiV*qjQ0 z{l2|xO`{DXLTfFh0PwivAv#rRMfQ3pkjM!wqbiyNH|jIitbi54#KpqGxW;6i6{#N- zp~V+bwSX0Lu@omIk^(_n&D9+XiQe0*JhC9DY7bdB`jUGYnqmU&x#O83oqTAlzvwg` z=xgo_t%1{B{N57Kp3d_tH*fxjD_6Q&n3aEB=S$1+syH0;W;ge7vzJa!pSyqmNBQQy z7@Y3e(9KeVZYg>H+gE?;hQ zz4tC&y41FZTr%z-?O)p8KRP-z4oj|=j?;3oSa#1>y)LL&x>pD5^=fOip0Nr6Yi$CM zhZ7BWdjKwlj6ieN5^<=`;vl4zXbwD0mcYiA9YZ-7ppp$BC%pdUY=Q|d6o()eSV9mv z99jXCPaP7jdZ?xr6#=^hV%gv&SXY`>3TC^Yp>vHQmxfx3V@26D|B6jib1;t-vLXQz zsE@=pfGzTt|EsMPaOk^ z5JuzS+1a^EOWe!vG5w%dRWi?|Y(X}c@(uS~4z?tL;$uu~v=Y5O%wJCet{yFw#oWTW<0w3^pn|Ok^LGMQjwV`ge?bE3$_cCKnrk2%4aGQM;3;s13#z zD?wZX?mSLAV|n7~8Y4%Z7uR^>&YfRAJp7X@SGH4!XFP+e(I@PZ5$f#Xg=(O?5NRQ?qt9ZD2>$)_{a2VkTnLdqK?LKp=>KN1&RNLK?xB`69NLKF9XX znRa>f_4?ZO_A^gEeRO(y^v!QR@t^+Fgq^5%kj!92hTnC_M2~B zbGv^1dd{N-!7Es$I=9yo&LJ}#Hjt2HgosAfK4GcCa=CRSeBnTrncXwO>EMj z9)W(dlYTTa#`VKadB6$21M`jg{m-P4*`9O?&#Rq0sF*RUplLoD^sO=srRg!}$ z@?s90WGD<10Yr!H3oR%F0f96cIA#YofU-0=fMpt!><=kbCUEl7+@quS$tXQzy7p$G)7+lKM%6>gkINbdvw z4(jAoDX|r)3qjt=Af326kp?r?N*+JNlSy+qNv@B1_@^9fyQmoMuqxau^WZMK)a!JW zV<5F6W>yf@w}4R(NWSVr6L`(~A}$Twhc z?k!E)X30RlFNy?8k1D4Z5r38XtME4(_EtkD6F-9G7hSP`q@b9oX&4G$Q?vLQ%~DGf!{tm z`^Rs-`NO@v_jY%&wehT_ws^|LbaBqjYM1WnqY(G zcmK(M^Xs4c(ic1*tx2=y=(`(PN>&OKY8Z7!ysP%QQi!+q6(&5|x=ENK`-wi5x8J%LZ|n6cSy^-CC`ld*##L{?q?p*|fz*zWuFl zefRM2g%5q`iEn-De40+C>HNZlrSo{UwRN(+eRS_${K=nOe)7rFyLW%#w|?Wog~cz{ zAT#hOT$uMj4@lQ`j6@+pW$$gETWldKw4fEEznfzF!o*=4ehgyWqs&ZUu25^(_|CX&fo;)M$(qrA^y-da-rfCMY$-+N$;8ZWInJJSO7ff;pxfqAzb zk?Zdw6%<%C(*7ew2|EaE|y2Ah$TRW||HL_@lbh|11FQcgY)t0Gm73T}2Av$gckn^b1>hd1d~ z!O`66+R9sJ`fLdaBJ8NTD6Cnx3Fev+U{R4ZVCQiHcjEY1*ar2&H-@RofKD||nDn-_ zb`!0@nt?(NX!ZDUgkOyY2xqM^j-c7mn6f`ZAq={czLsoev1yq+WJ7V)g(ufz^~UB? za4FuL?wOF7Op_Pf9==Z2&A?d}d~upi@8AFJH{blfpMU;ucXn2D+ba*9T|C$qX|my2 zA2EC7s z$(8%b!sNH#e(9+vKlHg*(&=)r2McQ(hf>eh$%<-AnCAW)AsUniMac{Kvb;V`7cN{_ zB+FK53?l+v)|}+ai)dp$tvPkiHMF`hep;QKoW!`ifMn!pD>Ti+B!LDmJMV{71ZkoD zlx9+ePM{FcG+&U&PI*>_RgR80)@cN+%r9Ckt6-f9$vBtVd8Jb|?gOLaZ^pSzo=~;inyya7Be8YrB8B<0)>}Php zQ@(h6e}8{_=Yky`Kq5&4`{db9um=W{?OKqG_uqiE**LbBZIfzAG}6;L!0aT_$5MD= zdPH^zjits>SxnSwLIQ@M3hYotJ@6DRr#Q$PJv1~ZLNT9H*Mm)kUOIPVH7j~Z;RH9) zSFRKUgVSW*FDEUXC7Zla+SmeLxaPi^)bX!i1r$|#V3j-Sw~39d5U)_wk(=Xvh;rsOK`4iaJ}wr3yb&P z|JU>U|K7Oq|LpD_xi7}|Y@W~C%YECfRoBd(S+7677}gpGNdFL;`ahioZ zYIb37?*sQsOQ&F-zj^cKYxnOjOrA0@x@vYkxp(p4(C;6_BNxgYprSCY{ZGbZRhqrd zYD=Wm9?MY>LjWpP7&fY$i}5_x2 zg?CRl2aryo%N|JSc#sr`3(uL75lLxC4b1E2*bo%4*|x07dr86OXAO$KkXLeyF@qv1G!9Nab)LdPN|&1p z7PRTJl%j?(&1`_j22heyQ4ih*QaZ-KLs3R4iylq^kQlP1VotfpvDxG~SNk~=Nik|z zSfpSgIYysK?*^U{I)+@X@=`DZ!k{Ojvb?*>VCYTFK}Vu#pd+=!75pI=+DT)14jBo` z!e7CHwa7%UUBI09i5^=JKR!S)_>Y_jP)kXZbwXEu<``W?%PK|y03ZNKL_t(b)QDQQ z{dcmL+(jSh0>>p5l3soHwfk|z()T)a|JyrvK6P;LiT(X|cXwak**VH53ND_X?;jsu zJvsT{;Nafb*=^&t8?HWX=%faj+so~@x4-%?{@KT0d1alO5D}b`8(K?kgbuJ-K5>wc zL{7BP1aNR**-TtF!_Ib$<=4Rt2ST`NJj=~}W{e3ZmL(ud?7)bb)1i1x@?nV%^it?+ zQquU1L)@-y!zXmfv0!G7_1Wo-kNn)vzx?TMz530CZx`GTBF>MFj?d43ba?pA@$q)P z2@1@-n0paE!q7ZGvQVwV9-#z4#UyV^|8tZc0$eX0InroEt|Y!28HC zEQ98zNL&WNRTCcnGWut6vIQTWnBCr(?sj5*B2 zzwOR(4NgxEqP_DGNmgJInb)q;*M9mUWE}DyIAj26H zQU~f5Dt4ze3!~BZO*apCvym#(*nBhr!1F6`2!LkX&xo>wJ6^!k;luDc+Liyd{W6V352@ zgV`QeGKYR~lsr!Ik-J6N?kr{8y}NV!_D8bZJL~o0eQ&SV@8x9hT3KeTTdo4Dvy1Jh ze&LH>c_7djqxWxq^Nlx-&F-I{ zFP7=v@$vg-XBRc*@A^E}+nJ6J51VsZ$5?VI0#TRs0DcUSO&BKynX~gj2^KDFwUzU5 zM(k8-CLvHhmeDPrZ2|KVMC5z=I>U?{&;IUocD|OQ&08`@AwD#~4;qOyl;*@0L}eXf zh$mn+gP3646S!mWNm&~LcnqL=RZu6P3%V3sQ;HU+DL_s>%x`f24)Pt!h1Lh00B(=Ho=5*JA6&ToXbhWupXF1xR zad>$=BA2gUyMBGD-YxsDEQdm(h%?xlylsjyS7c0{XA9^*dht~aaH0}T%*>BDnFLUS z3VN2V2F!!MfOs-VN)9I00<#01E}^JU3qwAXsH??v>QOl&T&AbeUDhnY4`q3V2|ipT zSUP+Rf}^mViAS#SJ5fpMWogg9&HHki}mdF?y1w_Iea?vs5LOuXD+s+}N z#;3WH>ith%%Ibzu^C=V%7~+RFo?^vvNz}X}WUxd)W5h-@kSXyjL1!YS2xl1=b_JX} z=|_MhNBOn#d$pW>@&amPkwD(d;Nojcr-#yXxvJnwK0S#pP{m% zT#QX7h4o8-m^ds@X-yfJVL%|dUwP=E4lC8G657`c<8tq+6vS{pHoZvxC;v z;2uF8Q5zP!E5wQDUZ!0%xq>F*d4KmaX~rz6;q7M`hgSI^noY1QNEUT^IR{MyC!7Maez?6l{s8Aki6zsgGQ+ZNnkGT1p~yT$wx=+Jy%#9caodFfgrqPD1_CN)PJ_?UD7}P z_!B!9F6fb5wRi3bMm{W9UQ#YpZLPK}iZ5_*j%@69dc`IJj+7uu<0I-;LN3*rcpha} z(6&AiKx@qd-(6LaDLheL(n7}wRx!;0x1oqg!mxQP=k>g))P@t zn&5{2@G^ClLvUzgjlH;6o6#0(^~|j?ikO1L#5D)H1;wp-so;qTnnpz^l7sZ9WV8z{ zY0_r-Nm@>m47!Zu$OB)7#U_S;JIS1cU&FYX8rW*)cCJ|?dkwmeBKQ@WjL4k7YAnbk z+&W#6Fv7L+yPJuc{v}&xnX-0o+HUWw=*wXN6Qh>YFCC}1_uk>_S?(x#jtw#Lz{NB5 zZs*Nz?&x&uncXk`+JCioX}|1vlnQn6fpioXeQq%Wof&pN#f`Dk2ahbbE{84}SJH)W z97>y$f=tEwf|$vRL>g#lb+*^|ov%^aCe7`s?f{PVdX7%-|H4tlAS6m}J^9T&Y(%Xym zN@ut;F*68AN}{K76Kx=j%k@lkpryfIN(^T+zbn0G@i~M$(f)^6lUcB`N4nr*2Rv{0 zIn8-*wFPVp7-#rT66EN&8Ehw170|+p>Z!Yla0=_AA z!PxsPYLGABYTgjCg;Zd$D4K z>#Y%J2ARoS!3c=eFuoPLkf5C4dA{VJUg1|Or|O8! z-%7T0b{~J{nGe78vBhm@ZZ^}i6*(xXw+l$d_YM(xX&K2o9M{`fC0paOc~T}&|y87k%R(fsSqF+WNRcUa@1gD>vOFCDjHf*i>bQu z_@l?ube`eD;%+g>yvqHbwpXh~WJ?d_we}dVJ^QhbJ@)*wLVPvPiM~KgAzZ;-Ys3L+l z6fAJVV7IWBZ&sF51MG8{D=kEio^8<}#;eEceb6$gGW7XTL`_}-PF57sWHp6`;H*5y zAlW(iB$W?q^+7`rh7-$Ttt?_G@9w>O@4fflwQJXw4sK{ySJd95qxoaJCIPQiwx8{Y z+@5_o+1tMq@4d^0HyUgP3n&^$r2Q_2tVLh~L2qISj6j_zAUvHCl*Vv~$>ZHhRl@F> zG6<90xgbD52xes=F-WK7^cac;+bQj#$PN13EF?`eG$dc^rO7@3>{UC`q)Q~Atn$?< z%?5QGB_cRg>)Bb#{OLzxdL{-Q9KWu-}0ep;o7~d=L-F<8z#8ekKbA8N@@b z?&-Z3s8(t|42=fTa!xCFOjFH$>N&y8ZZusRkBR5~$d~{kpBgiqV1r~9KWXf~r1?N8 z?6w7PTC-}Mh75AgR$G7LekDi#kIeSXF6HcR+hgd$g^N2oKR!Nw^YnCgfB*Bp@$0MA zR^%K+F0BF$VjY&w1c1%39doXQ-|JE!;tJ0nG#Dgr%3lEkyi(3UvCU+Jb z9YMGUOlI9u!C7zP;ImjR&rAXuZL6#>`9zdRv zgshd0*Tsl71i13mQuAqhKz|bd)Hrb4>1nLYpvx;}Qw@Pjg(i$Ny4kc|m?<&F8s&GO z&bi2+TokC44>3VFx>Ga+qGyx^t&W4b^B5q+52FL}Zlx{ITO zORn&HLHOcoV zG^;n0yNpRa!!Ya1i=w^Lsh&$I83oZeHGV!pZlmhLA^z%&8A6?AyoS zNh>JZ*TwO@_S)-r-QT=$;kn#U^LT6Piu=`R+R8S6=kDF_9Ut%PUi{L(`0dA@e{RO> zP9a(+|Jqj=@-}tnLMX;Ka`Y$%hmmWuP0i zG;55sgAXP@l;diR5Az@kL#c>IQ37eG2d+@p0AS1bCjEh_Bmh-Zq(3W0DG|eN!~1rl zQvr^*f%=Ir^8qm}X%Uf2xXKC5dIXlV1=CG^Swgd`Hp}p(Lp?k^S}wNDz3<%F-QDHX zIEDvu*Q%x1MSN2p^}eYkC6>$CaKz@(wo*AhUYu@wMc~<5yy$VyrevM478 zrz=_0>4q-)a5vCrmD9y%q|=?1Acn;8PzM_Q-RC~`;_lvFzOxg|lo6?hWcRBvbxo$( z#J&)32l+AQP((1uu$+UY1iMbTKn2qW$bk=Ygm5z6!q*9JscP4;PY^n10ikpq5k~V1 zgBIS8u1KiSSzZb^2ttT>u}8OW-M+BBbMNYvzrA(qnf3acW(V{9%b)t>CqMd;+xPGP z_>ceO+A~l8!f*Wg^Dn2pbLcaBco_yl* z_io-=EbDeYi_+ffch(l>+`E5&={eoKb8)fdumAXO7CU?M=KJ6L<~P6bjc>g8;)_Q| zho5`-<)@#x@%meDeeXMe{ph3DrWP~I#i)@6FhTt(ytdM)HKHfx_p-{O=hGEaF=n>f#`8-MTfti3sZ%J%@rW=3b9D@PO~q zk;O(-m42iI!F_xE)ba7+bQfS8pBzK3L^GI1ZnOfRn@jAn6n}bpX7pz=#dSpgz=%Us zI7sAVMu~pN7t}7(fFhN`$Wpa}>C1{D5lc5&ZrY_GMgP(Y3CJ^ZMX!abHqz)uwt$?X zhGn6^f_c>CrTH`bK~y@G==@JQ44u|IF%#2ThAjikK(ui!qPoMd7vX$FGCO=ZG-`$k z%f6a-IKrxa3-H?AIm9X-8Ei~QDTb^d4cy`dA4>Is3l;3-*Lg0*N36-MJ5i&zU?P=` zvy5T=6U}Y6INm{-Qy?zdfTZnIoUKhW$^c3%IZC!fyneYAJhf9L3mtK7CEC29`lap(o{KVDA zAKSZfd40y6P8_^L6(S{@K4Wr&23pVuAPU$9z-NRU#{MB>vGhwm`H7c4^uqIRyz$mw ze&f~sOP3eHUA}VV>Xqg1+WFb(TW`Gf*;ih9@ue4^xN+kjeEGlppa0dr`n})#z3+ba zyPx^YXO^zg7ryYzTU%Ry{KxRDOq;{HCNfd-% zkzvKq4mN@T*7ahx_`78>iURy$gf{^}>R?T|urtT-f_Mnj1uU7gcmog87|;_SibhXQ zBs5bI)O4t@iHaqJ#*Ce%tkEmQ+4*2H9>E6rZ@bJI}J~*uzbyIw3NNpR+dh(g3nIb^jO=;Ul z^&?&F>4(6~^mVl>sbEm)QQL3ky;~BR64;%azTmVUD31vX6v`YSn&+mc^8;}X!wd81QHUmt6JurD=HMrfK zqoSLXaRWB6Ngg2NwDQ(8LLpk6GRyH|!(!vSf*F?)npqsmn9yy+6=;e{q|jK{IcwzA z(w<@?IGe^AgUQp1<5Y=k6t!HLGa60Ra}q0#A~h!GR{1i{)M@7cl!|gb_Cofy*2x#N z1MJFO0_rXI1=V@z1lNHCOUdh83Iit!qUT)6T*-Q3{m$~}b9&M3-QDYtKFUG4CSf)#AGZn9Gy@+X3&WE`Py`nmy@L~A&7ry31g?;A zbX|(VRg-_yQg+R=u)(-w6BTcvIi?AMCCSnixPQOdFWAA@+XYE`J3DP5*82R$3m;kv zJ3rMP^lqwU&}5fCL6;+w39A`=OJfi*euE+r#MP=RWuI zKm7i8mPo?6L-Rv4m{B+8@=jvT*g`Of5!h@3wq&6kC@%7ookfl%8^lY@IS|qtZZg;z z6=7%qwFC^B^*^j3pfqEkRQ$9-;z@zXb7D=HY+nf;l~vZlF|%yY?U_`X7ba)8-nt=P zl`dw%Z38=a>*f0*ATtb!&8BYac^((?z~KKPxFu#2EPu=qKKqSyHfvi6Rw zd-rbMym{yL?VX(!)u;2p$}J@y8bgkcPv&*Lpa}q-)!s=vrBwgQiEVtTAptIK4W-G< zSy@n(`cWj~8^Pfa8JO?NS}`2^HV>91pk|}kR4aR71zkxWw-ST^QZu>4RTWOLWzk)K zaV3AaQKcK=Wv-?{ zp*8NoR#Y{jnAEa~Dad9M+Co&$?*uYqe9SrW5R~78h|H!<9FEwis07|GCfQ9#b5+-@ z{w|W~@!O5O@z5`xBU(JhGtWJHac|!$eAc|EZ4l|=CIlTut&n05@(oz6%G?A;YKC~H!^q^)%6oH%H)EqkIltq@pX2*PXe!jcc z-UGOp;OCxyZf|#Qe}Dh6N3Nfq94+-O)PDWzUw`-Acb|Rs*(LesKmYkZ{KG%IbLY-N z#$uE&UAmNg zrE#$hwCJqp!>21%yVL$=PD7wq9`MmZKTPaw($eUoU9R%7L?3Gb0Tox`j&9|(3QuSl zp%*zG6P;`APu7_qONw>AD6EXnl$*IO0>XNz#hZpYvKV%5bMv{fVF~*#@;5?S1G-?6<_VHn`KC0OmHer!~iI=RNOlF1w~m8v=|0yy6#*eWU80V!u?A!_S-mOW9v{3M#~|-RYuj4Xvvx#=YZ7~+tnF|50Zd&5S;VRrkLta0w$CgIeBRWcal(n zKqtpcLl&Gp5}U>(Ak{F)0Jz|4(jE2oBsCZ^0M3ovbr$Ya=G3}ytb*fI5NVjSfMt>n zuuO^}#Anh_-1QQgZgJ{Y-6k;HG!vH3dSHbug&Ct#(;YQlEMfb0Hlg~X!P2a=!mPL! zl}!Cz_~3KvqPL-(`mKvd{Pc&O-?_MR)}G;WNPK8uwmZ-bpx?sv)*OH*FToL#b(ugV z08+`&2;G2dA!t~SAb~D5iUn@n*{L$pz+0dcCrIY5w`HJEw6|SI;~2(U!*ZBTX#ydu zz;SoZ@!F5u0uIuEMdPA2AHo9$rd(LwqENLpEu2#pNR4~YH=`0IrEOt(3*}|_ zl!dt!#I=uPc22(FyFuc~D1__`WzrDBPJ^vZSS4dzFc6RyKQ}wyl9E(X5bRf%48dHi z-waeAl1>=k4m6K^Xmqx9s$PwT^eke74++RvbE?iP7gf!R6D79^{5IITc;7`vckkX^ zVxw!aS$*SQOE*U8VkpYGjWr`OhFl0L=FQSU5S8{Y)rCxhKI_gA$*=)OVsM1 z%-DLvC?F~b)DCF@bxzglU}M`wLrpYf@A>fHaGJEFE-PN#(uEyt;>kR^Yr=+2T2@i6 zm~F#Ij%Gt+;2rpn@1A^~RayiCf5>7nk(c)NFYfLxUvJ&IwRCRw_Vz-}SuFVXzyJNi z!^52J0eT)BJ&Lx^ZEc3Fk3>;tf%me_Wg80A3sWLP|`8)1ybLa3hT z;*3_->$cRSG6Pg43(>&D5D1C^U)q&o7l zJ>hMWTtx_PFaf@Xb1h`#`B1A87vPbigdD2?03ZNKL_t)(ERmOqW^n^JGMkuS0KAh> z=U{mNZlsb~8qi$`m#tw@os5vP3qJfNb6U`9?{7t{Xi_e4NfnWNy(NZv8ADNQM#mOD z9ZaYrL0pU|I>$k26S?!616f>7LReZd#f?JL5I`@GXb|UPryKh1N7XoV zaAO!`e!$)0h=)Gms+Z5z*486WJdXVZg){I6MS}3;e_OyJJeVl+wuuDgo{uSR(3?cf zeM1p&*m4rw*qyR4#dQh(kLM!3$o^D zaH~uYu8`Q$oTbLq>fXV@**RZhY-r%zuJ7;dCst>%to=t113%~+l`RNDlo5j^3H_hY9l^3;7r*F?VScFWwef*mn@JOK8DnA zO#}Imk{r>&N^=8{=87;y16d@JCcRY?lQeDE2s~2M9?({YL158Ig5!(8e8ccw`j$3G1sDDXZP+`!5}YwhTj^Lwi>?M%v;C985b+Q^qe4- zMm1~%GEHSm8vP+wb_Qev<{8_hQ5;0sM#bU_8eAO1duau;X_DY(@!v=QigXp#erML@ z$vaGmaSWHbZ`P$B`d~x_8X(z1h&y_s2%g*B{k^?MbH&*x@SW}Lt?hiQIzt|ydijR; z&u6=8K=L8@fwJTSPLZV;EgFcRq@>~u`#<=>58i#Z?OrtJGb*LqV03u`aM~RUYQUcoD>0RWU@^*(jCI<< z85*0QFiE3V$j{VG=vo$niTO&FJ@P38MwH^%SSF$rI|@q_kf&^Cw1`H@c$6&AAX5&C zA@QRt?U>SewtI7k3Un9>N$lN|1Zue_pOHyG%>+w}?%lh$Zd=nl*x$8*MZS4VIIo;iJ7_zb`VpHKz~l!~B|SJxq=?tq z$RYT8Ks|IYK?GGGtY;bM2RH((Pq8?H4YB2c^4)uOq)-Jz8Y3ooy|~(bW{b@d6T)?1O>(GYFJK|jG|@2=b_9Ojp6!9MaWI*6;PzX{p<_@5@rdZbMWZ$3fRa&z zEoVw))Haz*!jlzLS`5}jRWELek--capwCZfvhpxg;Ivk|&3cgxxdf2TUkuUaH2CD) zu97WO8|6{Z`u2DH!EHM~cQxi`{S_sZu~WZ!#O46SKsvt(5--ZUcj*!sk)@HnJE9X#%xUZFzF&g&sKfyw-utHl*!pwb1=Onn80Qff>i`zSFg05?vmh(U;N^g zD_73)J?f@oG46}sg#@2}{`rH0gSXy#tL z_g-XPTGkH8S%5V9S}Q>m1!M$}!230Cr9zyvQpk>B?4%S*QDy-&tQF$aVZ$+Atk1eK ziuk7;;A-R|0205J(>2Q4UBN`&#W*uZ2+)i?p)q7WI3XaqOG^Y=_3fCUGWRSh$9uBM zl7vyQCyWHR^;+t`*ww8{m|$iDm!iI=L|8)SpX=8)Nip(A?y&Ae@or4pF3;i&8wv)7 zN*&71WZaeyjs&byN+wOK$F&A(BqXBJdZ!U=n2jRFF$cL|E^#6RYrI%Zj&1Oqf-^3F4?XU80cjxY%$!O&N>w*=Otj`^6NY@~w@ zN=!9|{JOKW_J6MEtCGzOnJ0J>85A@c%vzG3-nml?B}!cJ(C<&YSUmh`1}_BkUL~TXx%mx5@MXS;5Dg@NlcO$;P!B5-DDGka zg^faz1|Nw~*9Qr*`v5@OJDrNCtfN*UcY`VAE^wZnvGde zMf-JPg3&!1DDJlu?7Ad-am!`T$^QV;3w&h3Ldk`T7ccGa-@0|{_kaKQKmYm9fA+JV zUBZI$zIX22xq0(u(m#LbLm&For#|`kV~@W0;)~e1_hLJCQA_SK&7$cklcgDc1WWW;%qKbb+j@YmS%NmgcI_UNhBUd(C*q9e-RNp+zJE*#($GEpp zUYzxKGuqs)xLiZboH!R{8&r5LuJNMZ@i$co!5UjG2)8X-X|H{{eY=N5*REb|k5Ohf zBE*;wPpUr-`b%#~wIUl`0&8k^BSIhqBStWRJ@9l}tJVJg?sECB|N5_g_`@GAKK09A z{_@*zzrB22db)>)hl@)_feWd>^E?0M@bKW;wd?0;VcV0S7cYt!l9GIbQJX^!l2m0= zptwsnk`hbVC21Lsv>Yv83L@9OF&=JYZwLaS4QXwv5~bnOgJ2LOl-K))3F=^F?dfWQ z5BU}t%2@UyjM_JEpf8jr7g42U!`u*VGg%NEp-W-}C?E-@2-46XCd?Lw9Uq@S+%;lc zgbm<-@Cpu7P4-g`+=`%G20Wd6u}WhyH*DlW-r4afjEJLYr964`h6m{Ag&P;>^s1e# zVc}}@K;BSfRbV7rMkbV9&DqRQ8T$>F;}~ko3qVO}B_QXCoaj!3ms2{X0yFc>2-xWe zyhxJ_2HFz+wXG=&Zr8~=v1+S5Y>pyk7_S&4n!dnFPGJLCq=R@fn9*tK39`tXCv-io zGnmu?K!gXsS`yhLO7+86FX>hZXGGaKP5S$&%ETZr_ z9ExxYh$W@*2wlWo?DdCdEj9`>N=G6~woO{|mk@Nkp$L^NtyoQD?r0+-WQCoz2c(ll z)=5&{(IPSs5EN{pJ=Bq}e_zZ!w)6E26H#fX&zOMlq5*YJ6Dj!>{T(Mi%MqkQAqy@V#p(#Hba)$Rm`>Y5=y|Nc?V;-2 z1{$12YO)4Z{sbbUKBV~r+J8cg+1$`zK=6uzdq%V*C7NP4US|`1h`DX!#4yN;YGe;W zk|icWQ*2s#)732{YXfh}jrPNPkx`_(BQ2M3x}GD-lqAGOLs$u$rL<#Oxmr>lEV8xO z#0sXBqkHZh%p-+rfEgAVN)3vUY-(ncvtxdeR8ld@n?{W~mL$n07*B9{kapI=kd|?_ zv+a^a%h>)BD`l_A+nBP)i?oFZsHSVexNBo;cm_8!XOqwdH7x0xe0_Mx(J|{wrJ@>J z84b1av)|3P5-nt5lSmA@L1vRqZ1Z!uzJf5x+Wt}?d>`ZpHgZc1z$m$ty9MP76%dXD z`k-2^V+ADkRPr0gY#CG`MKXNER&*(Z%pPa}ud_2T=ib-htGR6`CYt3dItW=NVUfcz z)vkyET@1V>z2#%++CKN(vrjz!*t_q&zqqguzwn{lXCNJbkh3_6ImdM;5Eg24DRVZV zqY(ZCmCS6uxDIO2;m#=$&KJS}!R^JB%ar$%||{@X+KtQ=vdSQ@Df{*BNVF| z&4iNM5!hLCPYbD$Y`19AeEGLp+?FabCft8V^D_-q`8R&_6-nQ)b>` zs>whA4HQ&>Y=^uJA`LG)MJZ)Fj5s0lB8K2EtW(Ds9Z04}j$KN~H|O>@`H&@{#ynBz zs`8hs9HkUj*~Y*O8y9cK`rITYHc+1frTvgJrE>Xo%KfS)-SWIL%k_>9c*@`QEX#Vm z+R7K$p%qvQnrv0OtXecEr_R&=ReNzOxh}$!C!tONF%#-W<)`OTO|}Xll-lZyZ%_#` zl}As(gNmtSYm|69nH7QHdUhwB%FnA(^a%?Za5(_V2C>C3J$d7afAc&4`^}plT-x7z z>glIj-%;y&QvR0yI!9`%`BAVnQ3L5RL!-WX(=>>auU1lQ&zFRwmGdnC4m^F1zfa!pt}~??W+IKH>nRQ}28A zC@D-Ja`u4;gly52Pwc1(6Na?-jW83Cl5ZFZLM?e?a_$RZ(s}dqv(vkG?@bH{c%V43 z%f^fLpMRvRflRBR{s)t=U5%)=R$pUvUOePoDH9r=no)4IpHAIqA{5GOrdCBZ{I`N) zFiDu4ZbM9FawU)=~WKQ4#LfKKdRAes&cYnYJZ9c!uP?GigM7Mlu(N@HjM0pdL70QCuFN; zi_cNy6t}~~Ueu#uz-g*PDr$1MSXWZC zy&r>0#JoPa-yb5fl76)pIX3U+SwF2l?s&qftikn8IZf*MxkTUxw{Ks*emzvGx@+=G zY3ZV^XO6Z7btdzCFDYj7v`z#HPstda3&?6C{zyMI*@~$-@)*E?(eyAb&$2v)HD|Yy zU8}4TKBYaRE;wbMhGI(VZZbYiOnB%P19BOiI>k#sZ!Kq9;K zhP`)HTf`6{Y|q3c9{ohU+&LksG7i`vb1@_!JqV)UoC<=7Wcl>{{k@AhO*zDMfTrvn3!|+q?(%z*2fpz~Te& zLsd0(mRDfkX=byrP!0uZJ|VivDqynFxs(7?5J&k^1R%jSkH7okcq>@-#+Bii1*%4I z;?of2VuTN!$dqjxXGS$F$JA8vfHVMnW+oqI-7wJ>+J=$p;$#WT<;BM;8B7H~UHNC= zC=}cn(|DIw9mwB$Ta!wnWCsS>*j=SBH)llUiiHGvinRhK{?ow+g7a%IGcH|7{xPJx9*@ph8QMtBs($gT0uk@GesdvJF#x$-0IvS!zHMtePb(9qoMLQ?>+S z%&~D!j|~dEHw#@|8TrBh%cVLHkbxx?8F40a8j0)kvvu1Kh`?|6XBO2wx`r|NC=q&p z*;OZ1*7-U@Ha~`kt!Lylsw~O_ol@@n=N2MU^)P6&4M9MQYb1tTYOc4xWI_$ZHvk)T z|AFNJvEVl46F!MxcB=; zRcB2DDOH+?_xdq`IY>nJdQ}G53OpPGSKX!XR%MjP3xmWF41fW$+^~sMw~lkP57SgcwIqW>qO~Fv!_n zF_G9Kk*+~iR{*9)IH&B?#&MUKP$vxir&QZ+IMY})1hh9Aapi3*3tFlS$(g2*>qQ5u zeO1Ho$@G%Tc=+b$1Kr$t@VMjeDsM&GvB=jc8=IqS6q*p)&Zf;XCZ7szAZhqy4-!l! zBB3m6Z1xLB8z$M8KeqgxY4lk%>Q5d9FrP?7SUL;qmAo?!{jd->LkMuukYb&LJ2nIs z9Gz%V#ro3`*xFnJ8)z9Vs+TrbL6J1K2IgHCFU@8N&Zv1 zqNkp`G3l-8ws{;J9AKX#Z0b8YT`Uk=T<`Jmp|$vVmF06uRLUZ`rPgt*CL8Ud!HNrr zJc{{~U{GlQdwiu{B`_y;PzFIr1u<|PiszCBkg#rR5Z6LF@zBv|rv5-Oo0d&%%mXHf zYl)pu$JoMLa;H=yX)y0iZVm9Yo)b*D0>?4rMTw?TK#DnDitu;!cg^hKTea-jDSd!1 z#;cc}7i+#+96toXn8i?|t{32$MLWs8z zFWfBzmn6Dta3O2as{xktt+AA()1fapnuvbjDveO0Ir%Oa!%@H(u_)BqDn0OeGC zM19EU1c5^lpdrPbz|(w5+FU)wJ}b!oXzL?$~hbkH0zvd!+Sc>E!5qws-p3? zy1C_X@p&3?aOaLW-p>dN<=|mWb~*{->mE&tzy@N>z?yUAf@eUGUdqISUQlF0S4D6h zF_#ZE78nS;#MPuNQW#X4Xw7gh#ABF&F;L>UUUy;wJjHHM*NOAlgbmjid*LK$qh=V! zvNFF=pPZk(HWg_d8VsAGc^GO&)hx4KIUHtPdq7?qdVHVA^W2v|O!93dG*Xv}Zrpfc ze}C`Zz56~u0>{03_ZHX7h%f_((E+!XPpg1>Qr=>F3?5fZ@LXaICbXI+`awuJo5DR! z5Lx9CSqfqyfeQx6E)zGjWuEgVM~<^(gn;0;Y)*@oG|eF-Kt-G-XTej3$(d-K3+}3Y zZdEr+0nZbSAh+>bXaosPm}V0r<0PRG!GWd8G$}H{*m5MF5nHsYt7%%C?s9Q!tJ~i3 zu!p+2B|dxT@Im4+9{IKHY?W`w&h=&F586k44p0~;r>9j^uP7N_Y8$ddRA)z26c2(< zsEO!W??cJn<1v&qCSz@-+@!@NVOCaUAnc%A3;``j7`YW}OFN%bo=QfWpr-A-9E4Rt zoipvrtY_ZDf7qgS4gz5PFs?PoNGg&d4jWB5wagTq_{iwII%Bjjq!(Ep)&!a4VI5CA z4bt4}&pYB5nH0Pk;!#?PIj5xpUab}*AcLf^A-zG;!!by@4;rGbi8R`-0A7X^`-w9AkeAo0y1bg)V5s*;4J z1-Rix*Ww5f(B?G3*~UrCM0*3=b?8L~Uhd&S0rX)B7lx%jp#moIv2oI;JG4jB{#bBX+gO>HO@x7lMFM8_p1OjI>b!Z=o_P)})#jC2)a53JKV{UduSx za8LzlNt4FVDl+Q5NP7vA4e^PH#`vl$hBc0THAq z{~9}ntr+ri5O?5vpu^I}dDERV8p+#mIuajb}x^}YM{JLjjTQ@#q6IuIL59%bFEBP!JO#m&fp^o(1wbzvfnASbKB$m|w~cwkvd z!UDx-GJIqeDJE$~OEWp2JjwC`!AnuxXwbeKp+q)ft(et%15xKD5~4U7!5PXNo3I=i zky2Rk$zAxcVI2+nXR6+aAu)u-YoZ63g88--&zdwv5ggeU7y7`dMP8r#>?_O3Uw-4& zlatf!ZEc2=+vwlCdGqk_u-+es5Eh?2a&w7%+Qd!FM+3o6G9ffDBSY!Z^GagQB20^k zoJ^>90f7n3XaG+FA)v6;lVA%y;4G<^>fbQv?_3YEgKu{relq=xwN!}y^Mf*O5}b|1 zBn3H8!gv8*$uUQO*<5Yy=`5Ti<|l9$f-&32=~W3UVRd|TwC1xtJZw0|i#4`9Rj}9< z!EjPnf^FuJyqqZCe9MTXjZxPCrM17)lhgM8B2%0sSh~)IOf$9E)T$^NAO|7B$Z(PB ztWMT5ucn!pR@^+t4KQoj%l7*4hcuw5gpZ?T>lCwbD31h>cj|m(JnBtUj2vQxOzG}6 z+(pJHPJpCPGlr?eF>yQESVt8!85%=LGUbqC;&#o5D44d3d19SKXb%Gbi`ELLX4;pe zl(i4JvK(Ga_O>*$h5=n;kL0zr?shAE4RvC{81U&layidzTn6Jo!K7qheNbt3N3{MW zGmAQxa|lyW4{Ji8yOYhsvGGx8001BWNklyP%56N#MTc0^=Nl z=Rj6=CfH@HNmCsW!j)d?Vn73Ug)w=IF-(;M>8U!5BAJ$H<`m}yoiS=Qhzw1rhu%34 z&P%~0;-t8dfCVp;XmS$)DgB8Q0ehgpGyY316q)S@r!e{np?RA6?RGL*(Zg~Kilv?t z(-Tz_){$AD!pgfe|H6d}zw(7&`pAc0_}ZWT*^mC_Z!w0s9E;z5=bd+sj*d)IyclE# zNHKtrRGG)$cbnWPm?*)-1=8W?Ps8WN0a2!i$j21G~@I#^&pr*H*CA3zGS6V>s!q6UO@TG$CY6 zzcoOU+;;Z|IiCz`wa%fSDC>kAvOxZ;o0%1!E>v`9NH7X>(^w2JfH5qR9EcZC9-rnw zXVJxxObi7pj;GvVoogUbPY@xJ0=7_zT5RP@%jD0gikuhWkaPBRlv%8^fr0&i)_WOo zEal)tH8Z-6{9itgk4_E`kFKbBDye@CFx0s-)^J_%?7yz%R%39zF^n)z>gk>j{oSqh zy4DFMKYKQjhRhW^Bk(6DTMOFe0R63Pb3PV=mE@UO&s-**E!@Tk1uzjmi3tz_0Aa!2 zRZgn4EZJKdnUiBb^^JaOg>9DuQUHJnGA<7=($KOJ93@TIpb1C_Eb@XJ0p|6LM5RWp zKc|JGmQI6{3(i0UHMUaPwTUwsW|Ni%X^rIg=@x&90dBuS9(*0dL@tr$&zVVH3+`o6{Q=fQA z4eyYSj3P!R0G~gB2{UyFOdt=i8X2Si4M?CGG#i>G4Wx)SkwBK`p#V~aW9F4BqXwNz z>ZbY%7nl!(JCj2761b2#2gL#l3Cmf67!-}s?iC6IeN$ddCakMEqdoxQl5Trnq}Hfn zd^_f3|IsHt@rmWyQQN3_uHgsk;IHO)aQ}=)(v`c%QtW39p+mi&enALp)7-`EVB^w9RUnk*dS6J;z7vJ1&PDO_8oz|^4qQ+;xgK@iBJ?vdAwrX5 zXZh0VqB(3s`fd*$-m&b~N0MDz7|$x?fnINoGfx_7p3l$Mr}aXc1ZY%q2{tb^%$Ckt z3Ewm5ko?8f4r_3FW*E{qzrh4pw-owZoG&XlG&~Dzf3vx`l%|=Bn?#aYO@~7W^``x$Re)ra`TT2_?efQlz`lCO3{PD+kc6Jtz zyWr~}kK^Ox<@4z$pKxxrDo&8A5Eg2pWKB$nxf5VHOsEo=9O&L5>dG0yj@NjvVlnz%7CmRs;eOZ zYs?9(fMknA%&9 z@7Zi=ourUeMkb^&U{*`j_1NzJE)Bdf!)g+Ye-2>Q zLK|F~CP_|^Wa~R+G~=zq#F&RZuf=CkBaLvzl>9_fk|Q9Kivsd#UWY5xBkFKdb}~ql zWSHKb?V6*&&;UGV+jG+fm(fA1lakEp*=H_`1vuq%CfNXA(Sv1E1ta12$8&hA7K6<4?A(8<&cAABNXQ&wmfccp;I zp2P)$C3uV%tpw4C@zh+pZccL|+D-kg{rvFO#(cRGX{QYtq1Y04-4X!jY#thBgFa>< zv+FhL82u{f48GDwJmr&5K8f8u zlWgv}J#RepSdngVtMh6(me6Wo-HB@M}Hw82!c zS+QzRd0`?OZti0uBbq&pc4muyvkD3RWHKoY8K3y4mePH>;yo^VColq2#bt%xCw0j5b#U<4Aq2Mk1Mh$%B8d=}wNBjyQ%3&q7WuPmP? z*)hoDa3qe10zz#AKo~n3Kx{gqoFxbdXk9aK>`o;+X!ZMs#vww6Vv*Va?s_jeNCo%| zk+wPJT5LE9GUL(d9L}eTzTsA#t66I34Mo)y?=%cGFTYDycYSi=&1tOz`d|q@>Pc8{ zAO0v+1LmDm1~Kay<-G3gnP6CUt~SaW7|aMzoaxgY<*xc*>`)hL#+)$3X_P#MA6<^v znSG&ZZBRl-k1%Urz(u$LOX{+e>1;V__XD&T5GJlTZH7?5$x`SS0z^8B$g2*(B@sDu zY#59ndjKahsjPUjz$tqz6I`&FN(N>CoBV^>0}qn~z+YR9AN}Y@-+beZyLVfs zXU->xmXnYC+zXFB`pE9?#l=Hi*xqsMW(-S|0)W*3TQrtyZe(LtP7Y^~t!O`z2`I>0 z+#}SmJAx9{^V%7AkO=`P7@-j$BE^~h5DsIOu`x1%NN^m|81c&Clm%Kc2T;tB;aL+` z&Hng23kei}5ed#A9mI-8&{*0it`u{yPU?bI;3D2K5`dzVaG`67tugF}H`p&H@4x^4 z;$?5&zWw&wZ(qKA`H@E+SpuWQ6E7DRKf1TKx13JPa>%yT>uS{^ zt^4<}H3y_%Q4Xf%YVQL!40*Mojr57QIAO zIUGqqv#7amMxxUh3(~-#im9-O;8gQcWZcG)UjrZFw{LtkIR+D>P?xSr#UssUUU258 zT!4||y zs36BqU7BA#&~#|6{9+y@!P+lgxZs`VKlxzRK>%*tnW0=S#v&dVWX2J4z3A`O(yE2V zyYIe>K8dSuP%>R%WItfFZU*tT-6QY)W&I ztIi4ZGbmF~P@5MToTWn`0B}bhNQe*#T}!)X7RBjz0?+lLE9Su3eC6_`H{O1Ck?QjGI1iZkrGxv?k9_#T zg`G7ftuw}Ad4Um0BMSg}5^XG<(h&jq2}?1x$A^qez&t2S-D_Ifv1lb)oDwO%A9cBe7U@6_+Kx(n58S(NH!+)T1mbDH^cqR^?;b2wvkd4amMw%P`5 zxVU`X+1_5#pB^2Mjo^Dj5=Ps)J}!aC`TCp+M4kmWVj>$F4Hd;V8yFku{J1dAWHF?L z_?C$3$gNybn#?wIAd6>tE2=yZ#Vh=nMX+JW!EH!0CeC{20voRsq4d;-k5FdsQSoL@ zGN_v7z)IaA?PWzgD8gEC*J_6%4A|P8E>CsCHo>IU= zQ{+g*g^L$rz23dFkEK;61D#Lnaz+&pTj+$Yfb%Fov~VN<9u2=a`_T6|tma?|v{?xl z88sAS7|hQ~OXE@_mrScfg2JSZmkICI46aYM+>-+^2u`9~9pp@ZV5wCwB@M=K#jTq< zFDaOTfZi|fP|DVHCa7~2*%O~Ia^Zw<8xn3a3y7WoyVj`oehEHX0kYb|T{W3nC-_|H zSnL3NbCF>wSv;lfx#^YbfX}X6koDH~HiU@Ya4R8$G_VAEZL1gRAKR!YG_JxzYo*FmdHT(RL|$f6IaAX*1Oj)5`~0E|PjUBi4M zyD?3kq!^Q=|DU!u{kH5n?!&6i9o{_j+zkR~G#Uc|Q341KkT`&VAjXOY$CfiXvXeOZ z?0+Homj56t%b#LtC9)G+vQ}g}iX=;>IFld&f*?QwAkp3Ex%sg5LIk)0N;=sT`9!3AdZ!hkQW^{NR$QPF#vEy0cJq7UnzJ>DtOQz9UkI+ za{#{6FbI?JPC%oA;RH^zXpGH1CSy2JdC3=@B2y*NV_8-X?#jRhVU%K)543QripqAa zL&OA#0*XyR5saMl0tq_Q<}`3THjSZIfpq@^g4Q_j-gYKj9O5k=W#ADQkWK1{olm5v z=RLvqffiK)C=zrw*{8yC2?e`DRdCr12FF3lo+xr~aEO=h24dn`^H}0)AR75`A85dL zlu8*e7LEMr@#5m*-p-(Hr_qX%M@I=1gSeflN%1b^zy%DQ^?o^+R*AUO7#bOh%QZYv zVP-Be;~4OI?GXs)jDoUZEFgAJK2g1p!9t~42QO;IUu#e}o&roLm;Fa}d+MroWoX?z>4H%2v)yXiOXP4--lzlKpMk{&C#jo8HKjF&K~ zu0?TfzG%1DI4Y_)%S1ZUZQIrbuU%#Gp0;3qiy4&{1TzjQ-2yytRVmn9-{&b9^h7ac zBcF?xI|GE7IM=p58SF$bL^B{X$gs3G>EN&Y`B#R+;i*%nu!?GKZvM>alertZ;LCEF z9D#q7VT}NxU2>mP4KyG+c-vv`FkakfTnCg&P(eox#JE(*A(YmKat(m|sIggqf&?l^ z=u~zaXFNzQx}YYI@~RuBf-$3uxsO7^hoeFTl})il^Nt0HkQMX+47x(WWTA|Ckb#pq zK^YlcAk$~oVXH|oO3lIHp}=BS`r`Z%?(U1d5R9=ejOs4>0MkJ4(Imw$30uu z$W8`RiAvTK@3uO>OAe$W!-jdRi&h%qi)?nKyr8{gol+;~cQN)UotF>=QwT`jQ-5Sw zo3V2)moihruW(QOA)HrJn2jQ>sq zAb0u`RmJzQco#npwzu2KRA2tu6;85t+(Bj^uIH$a+0~6v#?j~mv{n_9TBHlH5+{k) zLT-MIjg0fjnMNxF>Gl0_p)HKl#2JT-z{+D*9u$I4Ai=v4)}cW0=Qv{R^H zh;Wo!O%wn?=d*$Ad=Gamuov(dmYKXm6v(Vn)o2hx$2p@Avc;d8i*w5TQN`SKZOK#< z3zXt$!Ak7z>~__rok$PwcX6%CTmg9*g2N4CBo{o zyoPS2lA>Ke2-EZua1ubXhxUb*L-Vk4x3g3*7R;Wur(D-?kd`8$^RAbw8VM-UZ?*GRXB#K592x>S; zs|HvI-htIEz8%ygNBw7Jx71`D!Bq80TkDyhVXzx2&Hr$a;VC2jYYYa4G4<)3j6Lm0 z&fx1XNr6DjZEb^<5TH2gfWwsb^Ye2=h9ZgX(@au4OYjDJHry9QYp+^VMGw^QV7v>{ zJK5X!`CS98lMddBsbD~Aa5U^L7#UeeJW{ms3YW12n-np-D=EcHvS$sog=&B}CSTj1 zGlQn8xthjUjNHYrk_J*F1;Uw$jU_`-mV9CWKz#rJv7*RKY+kJ9QJljX)$;G?QP5>L9IUUcjz**Lc#IaYq*(}}q#a={ zW-|l;tW+5f^|FG4@{X1A9egT@|oi}90SAai1!7%9R&ckf&RHz<+T0U>g0Y)XnzMfghriY2c zjcOVnl+mX?Y1P>ni*s^V4*cw#U%}!awALb_$l3hl0+aC_;tbKTCjwdQp_mB8pgcl$ zAR54+jG=)8FBCs@K`C+MiI5s`3}&OW$KbP&cuN=zz#0v5V90mHSzJTL1sOn`RV2!7 z67*nO(PefQ#EKn|RAs_oA*Q%Anx4;% zKo#on9qXyl!EcU0JT_NP1JR%ztY0*S_LZ4JD#Umw7qPJnh_23miRn{@W@`nTk#eOt zRn%D8XaKT^*wDU5$i~KEcq4Pa!PQ!h;fzY;gcP81#tc!T@3BXaZ5YciDHW&zjdSH* z+h8y}d-m+^-aeu$kY_s4wFOO_C1HSrbDeL>LO6BPVfBB~(B&w)!|w z04j9O%pgGqqseU8fjbE_Rp!(-YyrD~-K}w>vvVQs?(K>nAB041O_7dWV*nYJxbX>f zpdC2@?J?_%i{Ws1^5iq?>nDEr=IaOh`z|{9AvK*=)6>aaJIUNAMdBRc`ph~k#uo~; zkeH#$zu0;%AgK4)aY>$3=%KoaYD))pl$j+v;@m?<9N?BhtQ7`tTI&NOj7}hjLJb(1 zyqp1PAW@ZHh18pjs`1P!&iAEq!c-GhrJS*iTSQ!k&YRd9ol0I_S_0hI1#+ zpTn@Qc+Bb#D`5N^pBO7{4Bdw0I+m@+lbhN5QAq-3axh|O4UZ<&0EsT0X&?k*gcg$( zswXDc?m7GcJK)zAos1l_Meam04j&fr|j-q_u zS=Ae*!f|l1$<%10Z_9wf!3?twyja!Z6A{s{TX?eiwFM^UfB(vrbdeKamOaDTJ`OSV z^<1L2(c>=aoZ;-Gd_VK)7uMERup?Yy>o^p(ISt15r6VpT&J^UG2igTfVn4wEii_h^ zU^WgW!znu!aJx}9MfLi@QjnSrLj0^gc4Jnv0jaXGl^NbXvKbV(@vf7{ZsIzgA_B&pp09R} zQpgOuiw#J+2Wfcui6>uv<;!Qzo~tyqHPYyAVCys+9+1bblt~;DcHupevcFrTsNYhV zYy8Ho4B6e1&zM;4mUmX1UJgshWX(iW-LXo<%eZPHr>2FS9vVK!`@So;?I)fOBqkEn{r<~)jjs7 zhFPmoJo*Hj`d2y&1saJl6RXfTRe_cK>UK7AIZxFMI?-l8&~_T~8cSy53I#82XBQEE zNk&qsP{`@#-N7>`Po-qK_Gw}X2_RGjH0rC_q!=xVX6qE}Q7$}e5+6wh5Hs3D)3KS| zsOQ4dPcNN0BO2T|{jd}{Xr61$AyiqADT)}wrUbBl3$zt66c}gd4?_=1#Nt>&a8|$) z3W}KXh(*_qx4$BZnM{MpRUiPizBN8HEw!UN3!@>@*jB5`MhrvTQ36#LaM06A8v%8$ zee8=PEE6eU)b2(o=m4nOD7e)xG-oFTMQgx4-rE53gSFc|GIl@zFSntCF?)9AxW<^~THLzLjE^brW;848a46X7Uk` zx!zIGoWAS&ztZA!ND`5B0#8hFxhJffT#8R+U1sAu+Ci01ITg zL2U-MXU;b}Jx0fDw(CT;ttN#_5*N0eRUW-{^jS8QAAqj1g0A2ON0FSlvm@!eLqW$X zInUVfNL2ng-zb0`sF`sZ>Xf`IF-RbR@eFP5ezIqS#YH@p*-s4u`EVSfTd$tX=n@>V)V*wV0TpEvTQ)_ys&;&>NbA7?z;iTd}ara%-LN*ld6fZ7IZnyYky8 z;2?~AAh3@vRLudFU~Df}!sl8$7*lAU{JG(hfHBW+APF#l<>Hz;gB-hB2@0<|I|4uw zIj*MpJjmfg51j=&+6z{5>KtY^FksDe8|g;or$=v2;E00}m!~Mbvox zV)Zh}i5!Bw4uL(TxM6JLOkxzbVqTSDB4>sU7t?nXV&mc;AWMA31sGIZW20StACbM* zio=PfkZ8v~`Vb?8I!>i^c>^lh zX><%mKrl8fAb|`54LJO2W~?B})#>&g9b1KMHUO+8?Hc&SU}S76%i6gDpvl?_HbohX zoM8)Z)RNucKjflwaW(>ZvmmMBN^OJQ4Aj}4RM-r~VM88oMML5`Uut&28UT(!6YR+5 zjcd0G007vi5M%-`Yf+mGvB67a6rl66b4Dkyqn|06+kQSy>ofWkSH}Y-wy{y2vgyIcMpJPlq)m;-k|p`XU}d zTf4!cqzXdEq;oG)%lmu#DdbHS5%t5q)&XwZPjTw<^788H>WLF4&YnGc;lhP;=guVy znx5|N?rv>uCDV(yDZumsX|bEA{*Fb?slw+!`KiUl#qYiLoyl~PKuT73|KPwW0g28O zWam^9kotf8iB6j+7%^v<&{@5DSPHWVys2}ECq^r$g4-6WMDySd!%KvuXv40Wq`<%h z7%k257z{#o6*Mh{ogv}3R>-i2Wl<%x2Rp+=`N9<9t!s&XJZ3$2z|bVMsj1LdBOh}r z?&{=^n!j)W^1YblCC8U?vvekhc`t5b_|V>&PnB~*!rj~&$(%y0v9NdqP>%wm8yU1Qh9_YB>8n&AGVCw@Lnh}(n*?`$aB(PcClCdb zZ)F&I0MR6?4F*7Bwn}iF?YUZ5H=%j*%}#xyz#==o+14DnxDCQb23m!Tji&B7< z3u3;-+UyFVTpg(PRq(JP4lWvOG~~nHBW-AQJ|ysA9#^Rb2a(@cJa1$-+$vCPOHKt4 z!nY$sCqsl)DztCaYK9ca-oAS`g}fnyu@rRPHofK4 zH@+op!i%tR4|wYfWrz zbbe5$iUF3=sI6BYmx89K9WW`ma1bPciyAHsQ-yb`u_`uRM}_Ic*hc$hC@tLVi_Y*4 zYCxTRg?x819h^IR`L}<2?`vP%f9IWcH0smwBo!cmYxCjR{EQ~(0@bWVx`l=Q*|Xj0 z)AJWEE?m6Gr%vrnr#o9)2g$~_Z4%Z?o15+4&iGR=^jyvfs!;0=4cOqEAOYiw;W7h} zr@MgszEY zx41hi6{b`ne#+Yb(q`|Ho^8ch39&ER%ecgZflO@j5u_CSBn}DL{s6F2=1l#OA?XUw z7*~=a09aDas?I(Hi{zA#MT1P)BlBd)Hk>%OfxFld4vU z6lz@{7M<~c?F9w441R)xC=$4paEX;|Is!1c9USH1!I{Epr@#u{2phbhV_+#xZQJg_ z=r+cOd)4t5@ksP|$US}40U}qts)#kfO-2_XFYd&VRZT3IaNX3?Gp*9a<;zd3EU$d) zn_s*B;kEI2LKMtmI55F4DyzbL;3$w$e^xfh%u9&ezi06Ed5*TYeH2BKy&;uhjhG!`iEmWg`8z?;O4eLW%pH?q zQ(a%Mu@bt|^B!2)E|W?s-m!Waz?vNe3DE(_V3kLID#cN|kg*g{$T%$r0D&uJl@0E5 zPd9vOd~Y`;&%O0@@e=^hg)E2}^ZWST-Q0ZQ*MI%wOE2xb{WgF2A#ZHR_I7)8)T?Ud z;96X(PGyY-k`zvpge@)6>gr@T*7!fx?6ESF=*x>}Q>n1d0f~PAB#cxYE zvq567)Hx=fdWxRs??&e!!9vso!q{-A9Msw1Ct63HT>t=(tbY!G(YiDC2C{tcm~A(p zBD?~C*iY1Q6ae)A!1V^tU;s#jkEN*Va1=H)iU?Y9t~UUh#t+7mAhK$r#@){VK)WM8 zYzEiS0q6n%as}p$Ck9^PE z07He*MOz0gty^$Y98{IZ4kSqSacgVGr*Rv0q$%)bCfT{mH-&Um2 z21iHj_Vyt8w)^|z`}Y(62g#=Dvc@D%E2mFSQmnJM*eoy8!oqYoY!(-%^YiUs&>s$u zG*ObzciG(dO1@XSaW5E>nxTU~BZwxGbPW z6zi-;=14g{%u=$WsPGb-np94!I9{vLb2JGHf0RJvZVv!3JPSw}az#aooU}Tv>lw^- zg}cOcBNe6eqst*3aps_<9Ova8K$OVp#B#wy+trVUkxXg8gHypec zv#$XFTbSlVU~*unP{l|EScr}Tpdi~gt&VV&a6t-P@l)B~ju@=TV~x6q1@iM_E&y0{ z7B%`yD7yt?)ku!kRP<15#QF~MblfjVI5zNO}ITos5 z=nQpQ&^#ei9r>mKt+h4IVNqNvz(fV1*PD#Tw{PF6?{`q#fUJE^2lD~&3|QC%~!;efzD z0*wmRM1%#^3|^3eaIZ{J#1uN&%WFPE92ybFeP%GWaeL$28LB#aSpx(=zv2)R3^1Fv zTj~Hf!5dUMGRv2q0>i<*HJiCK&5DO+1y1_;QjAehQ)V}te zF6+q#$Olf9M{&n$vl96)Q{j)g79&!_;nl`X?;!jw0AzZfZ7DzL0u?a;+ zPDeUImCqVhmpS2eb$fg9&Yksl-f7-{|KRT3_eAay9TE-G_hhnv_wMTXb9td-r|)j! zVopR{Hz@*OUaZ#)E`_YzqIL&qu)${jjL z3%&v-(TY=qVcRkQy{0Q{gYggWT%!m#015|dR#t0|Q>e0?Yor&UiN$AK0An$dxDr7& z0F{S=o6LN+B8g(2FJE*R{!p)ly>$fyBik&ELu4bjT2dDSYa}Mdyj;dwnF4?&N4g+^ z$O8a0I-i;WXet0)mE}?m+Xf-^m3#djYHqL^d1k_u+&C8!kdYx{K-^z903a9*+bg$7 zoCg4c2r$4LBH8JlF<9APycrdX1~bihAs*{o8y_7U?d|Qw7_tIej>SL?Fr~>YPUiOT@Gw0} z-~0RfNz(Bqq6+gS_)#jt;+PK-x8hQr)Pak>zQSm2ZSA-J-rqlW?%copqdyvr4ucdP zD=KdnwrQ$ZMPX3-VKBld&#n+mvN)i%0t6)6of*BzE{#x*jK9>e|Re02HI!qw}d~6-4qAoxiAYdeooe_W$xCZKR zYMnNI#jb=x8F|-dP+TwzZvz9V^|Js1?R8)|Em3tD2_X&|v9sx&zg0J7;XC!-`-6Y| z$M662XV35Kjy~~;?ephlaWU(EJ0^$nDv0S+hC>+)WHQM!JxTW5iO%h%r;|yiD{)Cp zMQ=Kt+u9oJ@6X-7J%8=m%Fli_y>n-GW8*H-eKog7dR|o@BYh&{-CcO&6sRGIexvJb z+=YIE*hTor=dcYVvDGeFYojocL0igGC~%1$ zSdC@vzbw(7gjq7!WGP zT3OTZ1=OJ$bzs1%MjXdzzUZLW3&36zm0l=mxb%$+%@}WdDc#KAX?#H@O`JupHA*p8 zWl^XQ-f4l}i4#S21_gyF3tXe`gkkHJApS0FXWWIR46k;Vtur5c?R(Gvvp?7wjebIO z<>t*tZ{GaG($dPgbK{2|8lOMko;=wuFVo^;`ta&%KZ)XCki6ye$*V6W6V~^74)^xv z_VxzJ(B8dE`}+$wZ}#rpn{02VPl}#*)YcxTt;M)}99+d@Z!y~Z;OfQC{UYnC8H8m< z(aziofF5K1h^QJfcj%LaS>)7#6YG)WER#Z2nFQtP#7~74LsJJLqGm57;jZ}%2nPkR z1IpOL3KC&1#pauxzJxxpkyCeR8c67%AT~v(3Zq(A{k}3}0YKb}u$e}7@H@;{3#+v0IHNPPMh-p)JARLY5)iT5IY96JGyZ|0WcaH*K|0tlq?%S z>s$==wl%7$rf-JTNWkb%gX1#`34Cw@mH1$F%6j+yy;L-2SQ|f18TJn?!d_y1GQw8G zugR1i9UTFl-QC@9fBW0$S}L9_ZnB-LtE*?uoJkkS1}7tp8c-HFqeapEzCGXa3mg2> zOJ7PR{C9unA8&1L_H@Y}gwf5AqSSG3Jr4|m;F?8wv?2~yC@!}MJWD>dLC#s8jp-v( zc+r-L0gd1~*J7l{<4_e=wEP5h+id;iKQ`LKmBgjjrkC-I~1&x($EGCMHaT#!>##}SnbQ?Q$0#{rk zM!_+1i$jB+X$P`obqzF*SUKQAdBBE|V0ReZaa37Dz-(}WfQs%wiE;_%sCrz0Lst|C zBDNW5WibOw$2K*Q#iQC}ve*mz`_KH*zk2+&@Aukvj_7X@eTOJn-_8B~)t~+}eb$Li zCNF7jZeeb&KR-WQSz+A{pGQHH2=#h%T}PP6+20=@9(MV>^KNf6+L=!GlF;I;W@fy6 zr11l-xcbC95)SU)pN=QJzOT8%U}m60Y#l&;rBavOb0tUP2+TS1w*~ih$8d@%W9KWX z%78XhCUn)@8_P64Y+?LsdIT@Eo$79%COCvm$40#F)q_T)yMweEor7R&BnCFQcE_FP zGXPx(8ZV7gIq97{Uxy7g1ZazB6mCs;0619VmwxGU$&vp@|LA|- z+uiNOs(AlXaLbh}JLi&dO_cCbO54KF`nDMDEciRBVk;?R!W*@O zikU9m%9ow^atB7sZ0B%ff4(7O8pC3Nu|Vn=fQ*T$MbwCJ9ZiB+fX4i#!2<9J4s)go zqYHWf3qn0kvh~96|KTTp@TTaXaeDla$ZFT!o=(T=9qbWJh_;vqLZj%|yyFs+UC7KgekEswn7!Sfke9m`KyQRUdaW(N_Xbg{Oogg5_{Jd}QuurDwwu zfZsX2V89sOt~5Dy$??@m8K^KG$OIGPH^XdGGoPXdb4*T94xVxV^nYOiatQ2}I19Um z@nq;{r?Th2$+T_r6zx>o$~V*R!-K=yx9`Bh)rqSVEh-K&5ix5!9*=i+ zcGAV&yLT^Lx^!@G(DPH8>u*vsoGkR#))wjb6OL$3W;jK|*bO`!4pTizoYQv#CZcTw zzMuK*XSa8@fA=5%FL0(!u{pIA5oBN5Qx_(`X?D!xG(%oIG&svWwFb5v$LUg%m}r`; z&N|k(bZ}-1MsxbWn8*xBTdJ1>b0&RX!|eHuSR+j$Gw44hv{7b5@v9 z^sVyH#i_vI;g$E^qm>ntXI(Cv1a7#yJFbY>0D!#KC@;V2Qm4{KuLUT#o7*!@WpYRq&{|&c$jK9FghP)1IbY0<4H1{ ztybDQ99g}|tm?b`QoYS&>QESHZzskp5*t`sPxFFwHMNkg(iBeMEWuAc&slzo(NCWLM3Nu+R?9`+x^W8d#zM` zwXo2niHy4Ws&N;Dg9pX_p&-qqoVUe-TQ(k0B4;#U7Z3(}ts>wXIWQ_Ebt7Zv0kW(&N#xS4`lY#hQJ1Eyjvq^2y(ppkAD+}zxx{C=uIYCQ=o zj+{&vb8~a)W@-Wk7=XR>(wE+NqO=TTnKh2=Ib|^KUn^WY}bv!9cr{v@An# zba!F7J*2VqYh;wF8%4cN8E>?-LYBo-hR=o{N+~#+0-rv>v?}b9DLQBzV@be%_<>PC zVy+t15t9h`@^froGrmFZy-xBfdEIdhFmPgW{2J4T&Hb7L^{#YB|mSWnG?RX4B4U~cR*5K|(K zmy!VloRJ5|mh+X6>kECVv7KSO84hl6(;JV)WhUL6be+97wN3?U2ZaoXLbO@5aD1vx zM-Hw&O$;?Bojw{(7Z)iHbB)snt=+r7ff_p9rn$Jdu(+^r=FI7bE}TDq{@i>%b06Sj zEH--lZ|7^fsPd7zuAt7)2H~Ire28=o=?LS`=8V-EfPqzOx8Q?y7`DqhnX!Aa#x0qy z$N&H!07*naR0&4NsG6ZirDAOWkjA_Ob=rKd38}fJ#^w%Cf@%POOHr3H0{~Jr`g*|@ zz)&~o?0})9M*H9R_NmQL*YI8nk}$K{>Dnf693aLx18T-ptIC90RKYHS9!}g<)S^?M z8WuOD3b!VcjduLX>GQ)|cdx#)@r~0@H_|qyYXYNEY?Q5G(77ZTTb6FB9d3BE%}b}b zH_&$jbzi-=`0&E?7gk5)6{(}ex$(~M9Fn6Ac7}3Lwe0S#DCLeCH+JMQ8@8bhA=_4t z)tM333LaEn`+}wQ1rXc0i(_KJy>^sY!CfyKNK!_<@95_X@vi#R1W{#Ru@oh~v zZrn%~H^srp`kpy+CWXQ{`57T^x_IT~U;fcsKOBvY*vzQI=6`{CW*c;?-s*(gSSkT&|`uw6S&FJ0FbXOx@#{UjeZ8UlE8%KGoPD zxCtf`@2<94xBrYJDG*0dV+4xGTPNUL&b1cQ z&Sl*xfGb+UnUjQEs7x8BzlZJUC|RJaCG;(`Bu6qqu}NM;K7qBINRB#To=4ez$~Cpw zeA#~JX`XG*AWm2=J~ViEXOdW9(oXkNDFwfW?OVas*zu#d=r9&*!H}UxM^$vXPR(%V zn@KVaG#E@ezH6u7xO?a0E9;NU&;81+*WYRuwt0|wNNr=Fkey|uNL2L=x#;)X58vdK zvxAE-5Vv_qTj*chTzzz5ygq=7Td07R@l8Ox}C{r$7ARk9PO=rqed2W>XMJE*;8V z9ClQEZ+%KB8t(a)A&e5k`qoCbt^~#7QOcGge(W0*H_7zxlcL&`5-{pOY+t)}Ey0jX z@2OL#lJ9)t#EAse>9c1a`q;zoz59~{Z@~mwW2?$5E%giL^>pkf)KZr#9^UN6Df!347-iD4cZ z?}Ooryk8V-qy^kEQTB+%6(2zu*pcVQ&Sh})00fhDG8lY~iF0tQ*CK1fB-p}Rt-%)l z6{*w00IHgfw#S9}tQzMQ{T+|c697T>9IM(=DA48^Q|-6R&2_VsFGMcYn9Cs5&01ka zs>M~O@wAJ+ihyT%jjQX%mo%T+*-0AZPQOoqC%*{ofN#97W@l&TJKy=<+i(Bm^PhY1 zsVARsFsNRB>y!=X+ylx1iH9{JHfXJ39p1*1tMZ6Py+!3o_%2vVrj3P^^>BzI0BDtI zl)GdGd6qffQ2;bRDGoqqFOo8+kn&4kn-8`>eE0AJ>}MVH21=2tKPvFXvW6~YR$N#2 z^F`hlMk(IBwa|@wJQQB+4<^p5telP`@e&wqsSP(WNIf$5F4vhtWD>mYJh@A4#wV~1 zcad`7-?iK0@n7D*^&fxiu_s6OU%vO_KREHU>lGVw^MAmNaXN$)0;;-2Poqrs`uqQ4 z>Fme)rygnBX|nUXQ~IOZYyas(Tf@c|G-aOSASt1D?D7ajq>pR^k0QLzoozN51CVFa z{*n(!n%*^N0ix0p-PbKcokWfLS87KO_ggV!78gwIvQA*%zJ2>^U;oyJ*KeeVQ}@je z*c6Mj{#pW3aTBpM$>}~~Sxlhr?d@T^Z9S;l_?gU2HPK5Sl~SpLgM+1|B~eC|W8nXL zrn;g0YQlT>?%uuo-h1z*`X7Gy;gyw@M<2cT-n;M42#XWN@c&U@dZ<)1OsK@7J2jqw z@^HDRnt^Fxlu}xf8$7YQ9FCe1-N!_&X;U*sXD3tPWtA-d_dtRSqk5XDg8d8aa8zY8 znCKVV;B%;2LzEioY^m%eZ|c#o-;F@QUy_CT2{`rrOagESh$+BxL=O});qZus7ev() z*wr8;L%KDiwt@Z_`pGT5H^@cUGBko2s`4LC3MW{84x&&5v$GUn!@8<0Yz#0>5yU`& z=#Q1tuGt+nKuldV@bO?R?7v}0eTY1$o5IXen*pW{?NvPo$v5#S4}2M}6x1 zjoaN+=fit@|KNiUzPh~p#?sQQB=fxzl*^y}*t`NcTKdw#_3xg0Vr#IV<1o(BP9gEW7antJDsl3vf_AiX`1=2~{PKUry4iVWo*s0*cH=fjDrozxg(D8Tt8 zssJsF*}SXl0I^o3rz_&2!j{?K>a}bC{P+Jb^(cj#Fruw*3+^hvQhXE&yLVUvU(&Y` zOsCUyC+2i9vD%wGK!d9k*dR~1pX_2=9W3?#Nvc~+-t)WfzWeB-kFG2)Cty1};@%0! zdr2afN1a8GFuH7e{c9i3>UL8M zeSjjaeZ`vxLq=EihS@}_?uPMKK{t@ZGFdj&A@03N#2IizQ9h|n70nKH7am0y6(2y5 z&gu5-zJM)ojD-2KDnSj$%VmQfp)i!V<|+UnGVTZArHHr+5;Iqmoy_6qXcr(G$k1W> zDq{%nvj@9CN-o)paLt<5(Kmvy5p=5dz@T&58Pud&6_b2y$DkOfNl+Wz%MsnB!f)u3 z*JwSv>+n=p;jKIPq8yIub-Q$z`ys_~ffq$z*!MINV1y#G1|9VAWC^ zkD1D51)zmy*W(0eVK6`J=7zl?B9xS2+DxY1BuBth2Jy3G^w|0kP$-W|#zF2@ik@|i zc(c_TT|3zM*lJ$o)J?n5G!I-inXQ_BljlHk@T8|7Xf@(5*)jv)>B}1cuYsSPp`JR* zVoa@h=DI=3xI3AAd-LuiOG~Gx2QO}3`NOl%4s30IV>OT|4K_h1V=m)+#Y-}}(^2Zu z_}ZKMKl$d|vo9y&^!s@i!JllLI6K!qxo|kOt*Z*OQCulu6339f>|Z(BGXQYT@EEy$ zc{XNh^j@y8(H--o`vqNNVN9J>IBbg)DiwzW{tU&Alt7VGgJgo{^4EAg{^LLXv%UR; zrNza-h3I({wb3QAJ;%rHkWQ?Qbz?pD|HQhgdWx4lMOj>b-W>_3 zt#NfKJGe7eqc2t`$1IoxquE4Z4 z_~c|unpGlcX-UIfqPl^QQqRep=ffsb z_%v;8uQ7U&7hdToIN9Jlsui>8c^Ehc)7n=Y9hi6MtD;QB-rU)IZtvtX%d215`rvD8 zkKXUi+smRs7@@hI5pBNjNM#xNX$mQK>cH*-T~|dpdHr7x*3SxnM+&gn;4(p~qtE4$QpVPMqR*zDGK~;xa8!#rnaDDVm({t6tVO_s zroJ>&XS0z|2#ubx3gE=wW0ugB(Eq^)SFT*WI+v(Cog!LAbLqO(b;}^V%c3cYdJH+; zIXE~N=!@u)`u_Hz#%F&~DpcRGgtqF*-3RmhHR;ZsJ6En;Nu{2A^2zn}^^a^KQp$FC zc-Z7UNDKMcEaLjngJNe%AW3>*qzcA2{SZ$|852PFy7G-EB9WJkv{AVg2giCfig|B~ ztQMZ>s~h_n5iLf*zrcVD7(%j$Y*8dzGx3aPya%f^Kroi@<{Y5Dcw5#`?jf580INfg z5G!Y(2U^fyG9V2w)f|Mi%~_(IS6uBfv`$4ukqU!gLA`HT%$2l}K#86(2x9zdlyJo8 z6e0U~tq}z<+JHg)sVJ%_7Pf6d;U&>5-x(U%2lR{b1Jdvd$gqbD$~UesIz)e6m%Lw@ zpUWdrGaPaJ8XwT8b0Po8R5;JC;yXScc@v+{Fuz1}B(gdfoJzW5Iz3XVy)qg-e{}Re zojdm@Cr{#>n;M2r&)LblhmVe~T)Fn#FMNX4$5I>^IwY1YMq?sA+M?)k_eJpPSu(Z; zL9IvDry#Vl2kS^Kld0b5TE64)8y-QVY+)2^umU7xyLiEjqGxM~`3hjU$>Eq5)J`N@ z*yfQ#ZZMpPJe`x>zaUF`fni^C2?xrm$D3ckaP4q^)U}Jf{?c$b8F03bY9o`L*F&B$ zYHKFe*wPw9dl|6`Mh}9QIyITv{!!!X#uQ-=d8d=F+`DyoY4ybT;O|a<_U>cPw0SEo zE$Q0iiB3Lo-l5wJdZrY2+GalYm9V*}$F#Nh*?YtDx+H5l>3ZG4NNm#!zc=as;q4Ru z*@X>VVQqXbPHhm8frI5~T%+(9Oj))_GjG#AnL4wE(H)|JRes~o}BmN*B7 zM!{M~jG_$&$?7Jni6)yo#bBd5=^KMRAp+te86hlgjz)Qnce0uh?;_HztuMPlg^8sa zMOtC1X-q{v+ zn=}Exz+V-#5CqXtZR^)J&AS`DO|u6H~XCs?VEblIhGA zPKnu>(lH%0gLf#DCZE%rcM#+Q#_-C*PR4tqo$>hg=|M zVJREk!+^VPvTB_B(jy(X>?h}EIqP)s~q|&57oThx?$Ov|T&J*^83QwaE83de1HPf8peG z>Tltz@!Qx5`&1afWP(hA=v1l3iinbpjzL0p(Y8^OcoN*C)vy-C809hgQz_#lMq8R2 ziiPuj=@w@+l7fvF&X!@VWB-dY>!5OCpDVfg16s(`|5|+KmWy-awYx#*S`AIZ+`QezxHdt zCO!$>>s9YKuUhPEKuAaOAU&jm2r+jvqnv^;Gh*%uqG}|r@h;8*6hj|{*NkC~Su!UO zl#09aun03)LCwt7D2r8~Dc9;#4e|cG2f?>aig72vu|}D624vQ^Fxe+54b{;+isk41 znQoyD=7xWMlSDa==#;2S1$Coz2MIBB{Y33dS^zAA!DE z_e%SNk;(Qa9K8RtM|F*8AV&)K9cLW-h`B;Da(FB?)xvI!htEFe>H?o6h?_*a>+2Vm zmzVC`xwXGP5=q|joW3|AU95E7m+#+yZFN;FKFq1pBVR}tL%A4eQC^05kt+aj>{462-5yWtml zmG2$w?H`T~^Vu?;FOWuF(Ou&vDK0VInu$^{1S3w!&HwB|lc@<1k9~aH5tqI`_~6R--+b%v`i(E&y4^wi z3}N}a#+o0bhCtOs%t%$43{cZJP90p>b)MMD#&wx!q{mOj)A-!&x;O6MUzwkO_QHir zTU$3bHg?)}SMBbi0ya$AM~|CBvvBSA^|ij7-=5p*IaWOzDl`Tb3QvWj7e;q3LuVkS zh7rg{JU>78%B#P8=IoiWm{VYK*n4aVJUv4=Ef6_^tUl^vrnuCA|Mgv5CcE43KlYGuYg9=n*nQ@#5~$w@g-Q`n4;+OZs@Mh=b+1nu@@e|i9}h;uVi zI`{cYr+bf&Zyb)ly_GcmfO%g}oX%%|LbBL=WROqSlx0TC2-SH}*2$c_w<*8sF`qM( z*FT#A(1mNN{g?WK=hjZPA`89VysrD&n@-mTgBS0=`;Gp|Hx|zHIy>bfI<7e8;6y3^ z>mAPqC3&U{7^fQ2#WA~^iXV?K&fu=;e`tq(8}`iGQ4DZAc^wW+WPvfn>X8h zn7J-l%}$mtc@ftX3No3bITC4xv{ELdL-eX5%riCy^6$81Z_R4eHLc=8>e$B$Ix;8Q zoAM(10}==li?&@_HTe=({_N3I96xIfvd=+yPOn)AGhHD9rD<(3zEG8x?E6ibu?lu= zPK9{e<8h89Lr^StN%GAmAAIKHa1?>|*?tan_maVq}_BraqL(u5@eC>k|*urViQc9`@Ixs=T2&?~diZ)^B5| zx}=dj`o-WEn-Fs_^y2)&Z(MjZk40dXNMt;j-fP<-wNLC^dvoz@uZMkX=J>LKMy|}H zLYWJ(AxBA;xj}k*a@NtbJ%3_(ZegB}CiMR{1**s>G(+Afe*KNt z-|$yf`1^=71YlE4*%#^10X!8CBdmVYSLxxi&piE&Z`#X$lG~R|FSZiF^pdk~vSH~m zi2$r-x_Hp&VuTr!yAkrnAalCtss++$Q8e~V*U3z>r>fKH@{Y$>uU@%x`_>0P``K`A zZfkSv)Td94H6<&%xV&L;In;a~mB z|8sL|Gs|Hegn>{YUKVAEX?`-k52?p}5voBiLp5&6n8}sB7C|E-#>W(Yt7p4N^A8nsg51ZWVA zDh`5i1v5j6ei&VSAW;#K13_#i6+QA13JwsgwKCab?c}GBZ5`&)nd8iGA6~uw{WpKO ze{lGlTU-C#?b|21?rpMzch=kXW7Fw!+a~M#*xcNp>%KcFnoFvSPQ%s|yCi8I;8kMe zEe2GTl%`wj^YbsRt}aif$@@)to@Rb;GsD zFvSqA?$s427+sRk0@BvC5=Nm}MKJ68Jdc|lwJ8RK5w&R#^P+7%P9$g4cR%X{rcm2W zq)lwQyg000hvZmcisf5iEqiQ2haY;yoLzX|fS%up(wOH_RYxXSR?e!5I;bkZb{!S) zOl4jjx#|SsQ$5c=|H=2?e?J*WZ$LA#(E^&Y)AQ1aW9gpd)$Tzfg zNpgv6G;2@;F^JeYL4Cx*dF(no+7OhTp?}2{O&PnYlhaTKf>{^rv&^#73}|2{0ka#d zGZh0Q@c`BBNbq5;bDWpO5x}8f(Hj*O=SOQ59O|VDW>GBMoTjN){EA2*k>EOHgmeK7 z_&h_Ky?oy?gsNREf(bAa+f{(fMmH75KtazICxgJm%;&qz9J&uh2taTmL9LHd)(#sm zJItJGVRIS8_~sZS#5DnYKn4#$Rt0l>H2%SxZ(aNF`cWspy3y+jYXATs07*naRI~FB zZ{B1LBImW(`ebrpGFjH4;FV-_*Viu+{owZPnqP3802ofEPl}vr^0Djb{&aEiHqqg9 zIs;W8N{gIaUfvju2FcY;Dx+}8tT@q+b_QOnUg0^9_gAOGS{^+OXj>%U^C

832s?Kk<#?%j7ExXNRn`BjPI>ik4^KPMrCUWO!be4xxU76hEa{;?n z2Yx9l4;v|hbRo>u19j%PE3VxtI-7}(alib^;B@QhfO`^SHk$b`#-sK=bE-2U47T8ira?N`2n zxBm(}?2u0OVP|F&X=A@?xCMt~X$J=V!Dm17>9^nc@#V)K|LM#B#@892u5n!-47A_@onB)TQ1wzOhv%IjrtZSG_PozSrbO#f|ozL~Ao2ji<55SUH ztfsOTAARJZhb|;rn>*{oen^Z@lrkW^w0Ul1@vq z*-nScmTeMJL6aDb#FniqV91@Xf$$+*?vz=y-Q5ib2hqte8GhKpiy@T{2>lIDZ7F`XkB4 zYu|t4!w+xrpnvJ;=)bvryU~?OjW*IwCQB)P)t_sF!DM0KyW87?)zw6yY4p?yo9?9i zr@C%{N7L#3d-sl3R(NFvAO~cCyip1tb=}tw4}Vc3X;Rl`WiWV^=ueK0lF@yH=&T-^ zkwDt37gr~NZ`(bhZxH<{(XG|h>B*D%wz~N{J3HD+-s$_27ZqU>#Aqls+c6Sg0^k zC)B2lUFCZ)NmcZtYq697xctNu@4ff_)hkypCaf9Y_vP9fTN>gsYrr4G*9QpHEy8D4j;3JPb0*!mj=u%50TlKiiRPq41*epsceg{^)ZFJEk(O6`ClBK~m&?CWV-qXz@od znMg6=QGla-C8_ua4uNH6zfIv)7HF+^1%K9bX}h$>&g6jzcDoC$22W6}kh;sgzBqc& zzTwj?44d?dYM;g##~{c-Eu4NZwI+hL@#uqXC=WfW(1y&GLvW{atAp+#Xx3*=GomPK z-DV%()DGI{Xh+5`;)WlY!Btw)$zM`oO2vslQ%KDmIFGn0qskH5a$|GzyRW^zv$LCo zZrn5<-`_toorWAFvv-c@jILT*PRe3&@oRf~A9meS$@8U;NE4A0y7rZTU!tFk$G0~( zpIun!`SMrNP8q2+PdA-z5xqW{Tug3tV@Efi?)P6#7t?89Bkv7d>P{+_fQ&GYr|t94Uiy{adbyd76HE`CT!K|z zoa-+R`<-4a%nh_PPR13hqmzkM;Z@`?P*f2nb*;;*qGt(0$7o?mj!_BK;pR-8v)Mj6 zrEV=PFF*gnCvV-lIn|K~3O_g}wq4qo;!q@J1XU3;6a|Lv)6ccll~-SV<#&GPe~lFk zi8Q;ryEttM{JFHWgp;Jg0JBch?Cw4NdL}z1<$U1(*>jan)h%5K)Pm^tOslnMFo2eyzUAjF5>fcskq* z1I{Z1jELx*`NJ4HC|-~Dhz&pNXwKIuPv2?@1Ty&_}rwofShJbz=AZ$wO7!AVpMwL-$d+dS4y(@;dRMOq@Cl-=8ZId0Y5W<*!Oj z^Ej4>eEi%xA{3RNZ|YUpHo%M{?r{N749d(pTI|CQufO)aH?Z15bLBCdkcL+6?(;fd zyV&b3EG?y|^vBcbdefXpKD8fIs}8z)XQy1yl< zgRQNtcpIqbeA0jTH@H2hi)JtC4Eg9Wpp$s(Y1H-qTM%`^ck@w7TV= zICQ)gryvy1{+VH^Du(jr;|U5mqZA>ABN&*YnUn~gD461J($`w#4R54x-_vkP?QWR^ z<08F8=64e0Nu{z;Wjr@NIvVXCjQ90y5^Xr?9Dni8 z(t{WUQ#;~KYnSKjm6YsEFvQmVqG&9?T_&_W`skyNUw-_@@BG-&#Jc#~5rtdOVLIgR z=rZ=p87*X>@p$s_r=R>gzxB6%@1Oo35tt?>Po@_$0?9iZ0f8e0EP>! z-s*X3^u)@ZuJD54hh2xcCAYgNr4D|O&+EE! z2ESo)i|9#$nxz-w%9bpUafK{Yl)5Gz7NuywNm)#9XlxTVVkz34Dn=~atrjZS(HX9S z$MSIXRPTy7vsR_7du6|pSnRF8`U<=hK-E&Q?gqiJ7^NqOYh&9u{GA#v;*gQQAV?pu z-$dhJ`)XV+`l%R#enLddg``dRMVe6Rcmym_C@N=1iIwu(um|Xx!o(g}laq)2GDl#I zIwU@>?sG$h?1VF-eXK?#D0}VN_3wV~`*?R4?j-yA_F#~V*%LabOFq0j&%Cm{{Qh{n zGnst0X_7deZQBKr`$W4McaGIJNzi;Oncmn@p!(sy$d(31OLKEFou+tmg|m520@rWb ze%B?DT4X5% zuTT2@Mk9cv*LHSx4i66lrAbqK>V@Y&^XX6JiCCkdV$F8@2$@6ySOymo2LeGSW3^<` zmX?-(;rZuWB*Zw|n=2G9vQbEGSt=oWpbXiC^G49jOWFm|Oud7_@YJbO{2s3^ttO2% zX(x&3%L~g2Cojy;YgMwcxV*Bkk`S=GxU{^Oo@Q=8y>?1jkWa6lYIM2&{M`JoH~b&} z`~Tsqf0kb}etG!}GU%F`ykAboelx~d<%KP-*5Su4Q6U9b`cgT&ojax*?Xp(l#@|~m zmWbJmidvLR0BIo9*``xFo&0ryk)nua+@F$a#CpbK>46(|Hz?;qEg`hC6y%yHmLzz_ zviR+UqF?-_&)v9wV{dO?xy{>A3`37)YSy z`LT{E7KC{VBZ}m^-I6iupFDXITkNh~yOvCB`aXaD{H05mk{M10I630UuwA}3*&H@T1nw%dYTvy+ifvY9Tynf1`Mp`ztMHneP6VN+Hg^K6HP$GrmhF{(5o!q z$zw?0$=oJTQ{`)GYl*eK{`If=Y8hrXe5F#o1z9~tri^@7wn3(f(D@geX>=ZwT4ue3 z^N5&XHnN&FiM(vh!P-c)N2PIvQl%hR1JA&Mdb3#ls+tE9`}QKmjF08|jeMa@XHcTH zJ_cFPfT>AUjqXzen^N!*!6;~^FuSofg5)b45vPb0Ls;d200x(|Vy-%|8WfC(gvd># zT|kLzvgQI;=$2=$5LxgBu+av=_xI{d_~DI7&;ZZvoPeOVUV#2p18kLwo=GjJZi$_OEd}T68(v!kc9Nsw9?~S%i*5EfNYv*YSzK27>0+6;H zx9xJ%Y$S6lawWOp$L9w!YSuk$9Cz39*UW#tTL^B;~v+t6m%v zn^KR<2{|_&d+tf{0^{~SdF5~ajnBTC?5<9h2<)zkRgBN6sx zbbo(1H>4#41Op{W7821LU#Sk3>*wXqd2%<8YU-(wdH|%3`ozqLbr*%;L#OAh%>;!Q z1NjbOg%AA>4dJ)AzTx+(J7RYqMBGQEW2NgtJag&Q&ZbJpBWHCfIc!~DTYu)6XTSB$ zZ}xO6c|5`pKNvD5uh<-Ms{-7^5rL^9`j7tRuWxN_{rD&EhUvwQ-d?pliga)|<%d_w z8chy$>rT3JPt)6Pznv`T*|TR;Ae+87H#e7-mQp=1^PRu-3(748?#b>ZYx?4gFD5G< z8ZR}CnFzeHe(G<|z&Kj#3F92%10J>@Ky#++xGb#8(t3A%h z!BuJnW(uj~Rxp6?!}_|0M~BII-EK!+jBe*|$A~tWw=i^u-afbso^hjVg|dih`8M8m zsrBLoAE8DV4YJpaXH^agxel3qKwKy+u9p)lDqaVw6$8UpT^-Y+1M9Dl~5;?eYXAk;B zH4Q#9O%tu?u=Z#);>qMF73sP=BDcEk9#M*zcbaBvZtlka!`z!TOLk<3VR>#8se)!wMd(R@VfV+dnlvR&~Zd47@09tZW%E}rJ)oCG+9bC>>nKb@JH`O z-5ApYF&c2^>8E33a%X-0({A^L5MD$)e{?hvWU2X}l(7?y9&fp-I$|h+|BpHTD%~Tr z*U!fTf&TJ!=)6TgwpV*JjJ(zFulIV<(eBjsdI^ zlG9D+(rs<&NLmVvO8c9?@pqqk;tAgT5R#z#Mr ziN7`jZuO)wNrpwA&E_t{GPKHUwJzgP^B6xOudi5>EtgLD#a;07>Ii|4|5mwK;A!+_V4acYo)9{#P>58>nZwx3|X= zw7jz@3zv&5g#H5=Clz&NltPn4MvJdrE(Er6d|p+M z#TQ6Mn^l}stAGZZ6Cw<5MGpu%U1yXBk9c}m<^uqflLkZ~x9fp3(7rX7I52;-AXXR@ zgs5!=lLpP1DglwVBn2Mr*7N|m1rXTnL*^6*N_NtPl@PF!DhnMk`i!z%H@?3A!AG}l z-L`XW1|g!p_1I&Zi;L0q-d&`${L`~l zl~(=ggj2MveQMiAFWZg&IQrLqf3x4;?f2uuew_{~jJ)3+8~bu$;fuMsjj^%K=<1kn zY1-dOCP7~hgXHsFIgqDyfAv>?DXK7!DhK5W>XZRjzSm*lB63~W;zmjP!lf53KKE=b zC$kG%SvgdGQUcUElW+8a(an56LIwr1|b}Up^bvVF~u?tH6Mxy!u=e7Xe`M_J`Ks!Cn{=?%)Y$=3GKy; z7oU6Xxz9iQ9Jr|Ty_UNgm}8&`(kqR9Wh29>EazUYn+c$Y2IRS!p!^MGb15vY4rqae^`~g`4902>f3Ll@WEN^pi>JeuHN~@ z1f^0}LMGZ&EEiAL{4%%EY6yauRH-NAc%m4X9inhw*~bKMMu9usAjqjhDxEYgA!R9J z>Xe&WDs)_Aw1JQ1!yx)*;ldk~jx^;?XM%#Z@425*2kGX;*E@Iae)ZMWs2j%U22`}F zu+whWQ&VX~_x$-w*RH|F#xCNQdJ^(X`~{c=mzZeKV#{{BsacAMI_fHk|$P-GMPr!7?7h600bvWIjEwI zD9pYC-Y%tm_7>eXly(g%yJSZY4;8YJ4s<&K6)f#{=5d?U+)Rjom$PCWn_x(H__94t z*m<@+`iV}PdOg%b60&?5iZX)yU|AL)AIsNa(1<=54*AV}M`NOg^0ldKNaHbo=huGi z=IYJpy2N~C*@H^=7bt4hXeu66c@kh6tqbB5Q^-I3{y*5>-nsJ0r#7cco9L>RUYTGI zg4{h64`SNyc?`{07CjkWpeHgZc(^?Lr8$vM3qtDmw`K zETtB@^c#n%e{sRO&KZnckPlpl%KG&xy%0*i+9wVL-!ubtVgx8drl9%BR2zu>p)wJ1 z>gvT51YWF6<<>-j51%smtL)r%VGXEXM2RoG4Q;~{D>*jWBmuC`ue2Jp7=TI@hbnR0 zz_+vW0=rVwhJ`T;703=sS&ZFM@wJYba3!h;Af5ei_3GD$M@PJ^z5y&Q6`$QYIhp+2 z!NKmt#0Sqj^Rvmx3tL+kj*jMH7nVkh_d{4u_t7P7PkVvtx+1UKroePHdcV(`19-1w zi^|L{HvWm;cLy;l;x52Wr?c8_-)y(nI-RYm+8iJ6N6#I#aebYM;Wl#HjvCPVvKBO7 zv0|l|nVI>`@BQ7`nVD{{D?H~~w%JN&aJUj}f3rFmmL==X;#+UOy>f0P@q`2)RI~_# z)J>33)iY5Pw9#T!3k4{iygKAB7}RK^W$9r^kmO^$YJLj^ei#yoD6Wkl`zoTF7!un- zZ00KFDlv}75xH%Oe1s;l;L!*Vw=-GD)A=fNlx5%zzDWy`#JAc0q$N2Ywjr1EKvl`ijBo@{r&Gf z`NR{k!5yg0%jaVx?&fDvER#G4*j%@3viaq!z$bCZ>wkete>2 z)G&Z@pgEg8ehpF-@S#D!nPp@>%s~=Uv5jKpXyVj@oBm3Ij42k>7Ff}>$J%vL+2f5M zq`fKvgg8E@(I4Pis)P3JVoT=fr=I-XfB4(~>VNs)Yn|xj0U>j}ej5*qH0X_fK+d?X zRrqPOFdGD*vE`QDR>Z5WqE_-l(EezT&S&Mwb8~Fpvy=b;AOJ~3K~!^WPxu#gGg99z zW@fczoIG2^jer)9oDp!oL8OB%A{?gaUf~G&Mil#*xfCpkg$i4P$~Uq~X&P~3kSiN$ zxRmUHp<;zrg%nv0<{gyEoTKC~kC0VO1t@jc^u-8f2gyJN%xr)Ja{(0vS`Zdi0>Hv0 z3k;bC{wgU5MO#p*1fXjG8a^S%Wg}_GyrM`es@`SyE@ob>F5#oE)JAuzo28wzXZcoM zQxl%s`>Zb7Ii{KsV5*t($|lLtv&C`@s($RgY-tq}i`*t4{b17no1>`vG_)o;N>FyNYX0LTE2HOQt2(*afiN_yDlS5#I1gZZl zd^y9Mn<_4GO!yZa{@cIw_So1s9ZF1CWWa+f4!Bo2-k%}}*xzxRj>*c=4&LGNAtHKs#l%{f1{PbU1>DEp|&uP#& z{>OcV7@jUMk^_;sei9FPeLjr_wW-NP%98f|(h?);T=1$6TW)3Kcp))phLM_}+7yc6 zDye}53WiG+T2PY%g8;=}6jQ)Kpj84zRD-m@YIgSRciz5!?HXBjmOZb|ft9^c@j@Zw z1_uz6DmtO_r57(neThD-()~Z|cTLOMI*82?*b#bb;Pugzj!l8Zw6FZya`*M3I&W?q zhY~C!|7`KOB^`e|dSrA^QY~LdM{cEiddJ5)b{>q#2v@pt%rO#KsV&o>Em}k=1(C$| zS0jkXXug7zqM){*s4_UMX;rXx0N0=x^bizL7A>wzu{T+c61WZ}Kv4QM65&Snbetow zH$ z(i52=rThD$;>L%TLxm0+B%wBhd6T6g{-ZP<@7H#aw*dh+pfw+$`2 zL#ZhtTj^#Y4Io~K8fQuc*A%3cVf`0gy!_mA&n8yTSJNd&ECdlE7Z6yFz{Qm6RME*P zxKXXbtnoP}tG+4*XACBR8k6FJd{Eqw@eqK+u2#brV`w4|6)}rDsePGwl?Ggu%yExH z)Y<|wAA;O=25RUzliC`8wEkb(W6)*Bo512U;DlO#fxb-&gY+huIj>~$+3W%d4d2#Hk9c#-!7HDuw{rm+uYiY z!PaO78cgQ9fPy+mv87Z6e8#d@_CbBLPhx+VJXbsBVvkX3%d`>hldyL4Dk?rTy6w)b zv?MicJC1E)YE;V?ImTa`FG2froP#J7vmT_HLHb9c^1;MPPT`n0CH$-^?Y!pi&DgM<@j6JW+iXX-5d2lc3T^?KueGojVh62xT8q zkm^8nwehj76^yyKCXQBr;2ML5li8g<)ynQd4-JQP7sNcEnqidkHX3)#W!eBLmNFB_ z9#MxrfO7zl12>drL6Y!PB5xdxkEemz z4=(Yu<-))k&Q6#8u9*zKUW`um!C*>B(uyGUm4!<61M(6CZ9oQ$-H}&cdksZ#q+0bM z6cxkD;HySMB_-;whmL<c3?F9uBipCa@lhKQRHL3>U6>nM?zEg$? zlBQLP$!*WTYmy=G&caF<3Zhd?7W1W+tppN6tFmZ1nASLCeW5Rz7%Nz#2tmKvb7BJI z#|*}@LI4WJ4+9+dI>akWCBD!`FS44HmA$@*(h;KqFP$C79Qyc3t9RBKYZ+RF=OCCbkt0M zVxZa(+Kd^9DC}2zD;Sy*QAj?6u&B~OGyAG8JT)D%TawJ3O&iRE($arSk~0 zi?nRy1c2{265K>!V8sx7O#xz+q~0=(M5medT!HDW=GqFXSg8wJbF?&yl!RW;OX^Aq zW~pSP$spR8PaxK&Ri*uzsp+7?%~e>*c?Id2{T%O4Wnf)peFzE-1>FHJn6MRb#_SXv+7H_5G509_WKr7qgrdRn_f-;;^neR|Z%HWVQ^;v)#WEcn~O~ACL zsOB{jY3iXyBUB}{Vn@prXbyy=5={VDEt3#dz^4cXzQV}1_HqBowtNqiy#(+-*rXha zKaJ4Q;bq`pZOMls0NfjT;TZXlBG&c%!s4s1zV=st^)ASgZ$?yO=7*#Mu5mGwY4V4c zWCk`g#OR&v9aiv)My)s{>kiK)F-EfhW&~__sjnMq%?G2ppP)JeYQeoV2&(B}Gz@ixnNWW4dl|VTm?M(q3XL|oDuR(|!P0>fm;iACwpnKM+4zvIXSH+!aE+kqm zt#+$Vy2opSnFZd-@vlt<)dL&vI2=^C!L@TywT`vj5c;qDY(@vZviV9S0nL@-x`|Fxa8|=W zC;Fx9=*{m2?I0qJTD9mNA%11B_O7j6;t-yV@_L7QqJ`$`X~@s^icaMRV;XRdY>{j^ z6^A#{q8saqP?1N#VDtc|3IH--Dd|Nh9M`g{CKd52)TB{SusL$r=%dFKEQ;u;3MT78 zP_qU$ED9mGyj)?$g%AVG##2_3g`x^Wz)P?^5>x2WHI>Yhn7Gmd?cX);6x7gjpr8WQ zYIZ?BE2A~dNTK8|3`0~SiQEY$76H0XK4;3Tazjya=8+mkt~dy+muY!t%{&(t2XeXO zJ7mKaWYz_3uQ&k%E?_zvo)(J+b_Zh5ge2gVj+r^N`YD9qMiQtkR7pYjLe1#6fR`6q zYXj5;8Xf$=ORJdkf}OzoLdi$HFDP)hZ?PIwx^2M1Ek}mhfo12NZodR>r?W+fQFr(D z6MJN`TPmBWOzq>;Qs)1FtAe?Qx*#28(^2)-+ut1`P+4Ce9E=+S=wMj5 zH;3h;ulb@bqG->|&V2W~-@SQrwHDi=Bl@68B@u11-~&OC?Fuh!=9Yb`ovsegAnCYQ z(!L((<|9kmhsa51}QREL7% z7Xz6mPzkU?NgZ(WR)n1MHX-3O)^yABXA*|!3=ZK1EpAb=^g?nAoI&aNIL^~L_pdeBv$0}#7)ybG*)sWfQn%)8ds4yJFa#5NNDTm z2LnlSDw@uI^+9~FjI1|%TQ`@_Htc|uvQQ?{Tr z>bV4^40$M+=>kD@%wzM_=RxHPYuCv(zgyS!pwqR+0r0K5v<;CtNmY~ZnbTu=O=OT^ z;XF-CG)eLwFTVWJ#k8EtoUVKyLNJ)%D;`avTim3sJrb!oy2)^q8X?0aCQ!eBws8n2AZ05N7Squ^>wALADzNXgN z?#G@kt<{QATP!Cin|g!!o&tstImm%~fx+m?Xb|rCFUKMWYkY}NlteOGLh6B1%3U605I;Do^bR;&$8Dq&wo*DBcCOY(2}#S5&=Q?e2Ta$~lN zf}JjdJ)?SM#_WBhHv;@xyM8cd6pjUOLuLHR4~8Kq9((TV%oo!?Nr$vEC%r8(z8k_^ zVO_=x%ij+y=AW?L6B0&sbAxPj0z$ui0_C+)Z zgNzfYgwi)xW)@g+9QlKbB5i&1K~`4~8PCho*SZ((952Cbg@9eCt zZS?v*=8OFbMQD8WCF9Rtx;U26m}6roqV=h#(j)C^(;0F+w&Rt}m}4rZ=yRYCIp&!k z{4bUe1^{VO6CcNl>VO;14D59Iv{*|X$vZqa#F{5>bk+dXfKtgsY}K;K-8WqC^lxHf z;*B?62Z!Ip~5FDK$x3opk*lrX6E-aE;(}COK+V zu-;Ss%Sa975S1nUp{hyFB=%@%)7k)}uoK*00boA`m~w%8O(a_UvR*Mijt1jd#I#SI5>UYii`qz+UI6=b)H^Pk)3pc7PzG3hLd%qj$ymkAI&oqPix`A-5YM07a zec;;9$;E#Jn3|fLnVt%208v^bN%Cp*fw!cim>E@2EVI(=m|YFK!6KmUFw>yof`Oe@ zPQ==ziCJa~ltd6{nW<5bW_)O%SF;m4Xe|ahuPTog3Mx?gD4{Nw=3D^uw0z1@jFc7t zK$&Ys4r_}Wgjr0W4Vmg$7M^>`4M8LP2Fp`r9jm(9+}ygcdUI`keShzu+v|3Z(##57 ze%A->vJV%XhXX(}C`gX2r6Q@+L4lt|wA8j9UzSD$)2M#Aa07LJJ*su! z2o_>c(JC;@-Llx-+l!*n!dB24=IT$i6B`X_rGhLRm&#N@=DLjf{qifXteiWiTU$)5 z#gB>@^9L_k^w0a}h6Ruaev3G0v@C#*ha#ytAa_=|)EdRtkUr!$Mu0^$hqkP=6pe;h zCMG8=p?r#>dvM4V!KFsJ=g6^A_|mIs%(-VVlfqYl=O%Tl5NBMm<^%VD6Kuv-fWv}Vc_jrAz1k7-s*ZwzD&GIwh+ z%_bO_hqg~rz5$2o1JQwxZ@&#_Sb*B?b&TEm@Tee~lsoClf3^^Fx16e?<780>xIEC@H=>O7>mV~%5sOPr zX9ctWNL@DWfB{KJeeV3ZH^1{{Fl`AWd`@ahrD1MJm}vvOs6MDvO zkfI@v6RG{$UGMx>mi)=2yA14xHLeKlx32QcYl9|$s>r)Ael zCS2Ua27V-31e-~NSzM$t23}CYOtFmAVp0r>m260KZ|}eV(U(`h4(Vu^rgKGYA%;f_ z2L}gt?%d%^dsu~7ngfYD_jkEmtW+(xR~?2yw;>EX^bfi$PB-#NXR>bgT@K`qW^*le75d6kQ=I~nuxA%Ma@mh67`cb7r|k& zTv&i5%!IfA6I)BkYSPp~1(6cUg56->jY3IMQlc~xNGo?*vO<;D)zfkw(y^KrVe!x$ zmm_h!ohFsiKy=N>$e<_ZL8p2P&$3QBfPunUco_CcZI}O|TNei+>-XWoQkjjITva~* zOd{FiA^2P^MvFN&xA4oq@+<$(U;N)Bv{G!amL091N*yh(eq{Nyg&=8NXY2?Lr5W_b zcomS)c(!h3<-ioY(otx8fPukh##5K$$)>#gV42!4s>%na_=6#}?%LWQw$OTq+upFe6t}%&|QNq z5#-DWG35Yk2!v>6LI$NuI(gTxuYUZ=r)z8L=6D8SRQSV#*gU*@_bwl)7)ZgSAOtqP zMCFizU>Dn!HA{BAaS6G~x`V z!qVxo9gBYM#*G^*D=P~N3&)iAVE~ge`bi`g8ivTsf19EqP8Nn2=T9VKtB|Y z&{`_SD}q`VT!4Z#ei=Es`4~Rk!u)Hkdx83Hs)?(k9#{`);pqJOPyjtzD?QUiW@!mq zl0e1Ct$$GR>Ej5b{(=C=)}_4^524M}qpoV;9CcW2|{1>LD%4 z`JTMdHb!mZ?3K$9vo5_ zw_2#8E4|TDHN}2SZFv3WP%u$Z$o=m)0&(=q$4wJi+gI5w&>hY!2dvu6s6+)bk^9#GEtES)NrPtA?HEqC`2%etBQ2s=1(J7(1{cKb>_uGOfi|gy_TU%Q} zMZti@nzrk7Uw-Mu7hZU7eSPEh?K?Mb-MYKBw!OWxyDLZDFmNh+_P-IjdiU*65QY!a zD|u~c)B_aLR{gvgFcYf&|RS!m~G^1L% zkOezGDDV0`9UvJIDbbM7ri`-kK^-^_E_298A$a*0KlM`yCzG9`P z>nd$ib(+3JY>$n#szK_MWN=I<3t^>mKOEgXX7I08cMRyOsR21ltENF>hXrLQxL=si zXz1R29g3|%o(J<#F zRf8d`fwt8+j^eJ;{I2Ay^2KEnq5#BFGne%QgdpjicSflGlnsEVN&i&_GnL2|93X0&2hLW=g!y+_(PJ>8_GJ zkOj+pi86^sfLy=oqyuYyjjz4_+VsqHx7*7;GF#yh)kgv<-<`n8H^Jc3GD<6#=#gar z_E$d-%~(||h%je@U1EOpZR=2Wlbex!Sn{b;Gt;lV`Nroz`F9LF z`Mzyh7!lQz@7U&1x-bASV(efXf-qsos9m1g16F}Sx%VEd2_P*`-l0t9 zBug75G96aH+V06>6GbzU=mZ@3R3p$4E{*0rH9x0Bp(kVHr3};ZJ9LrY<=D(m%7L}QkNs;JiuLWWc92JW+ zU~@q^(m*;PJw_ZGn|k^6SO4@+|I|V%Z69G;c^u`Z1#0xur&jz3bSz9lLIYLwt**gX zszqVVOe$K_-B}g1<#2yYNx8w)ZcWb54;K&ZerLwHRuX5+Y|~z3{^(vqQKKFtanP!# zXwrlqn&6bcm1RY;YAQgg4e{ldU;X6Dm8)NVNn0O5xlJueiU7e#s)*%*WLRzE>5R|u zR$>rXyM00%n)VMC!6&gmw4oQZA<_QK%*;IT#1kmP(TU#`X=;qKv$Lb)E5qc0(8g}Y zjT<+j>wU!gi;NFesK(cw-M#&T!{z1JyP1PZj<^i- zINZoxEQhu9(Zr1FM4TRF0G3y1gS_w+GP)iV^mrkA4PxaPxC&hcUUM`^I=WJa)TBTR z))hlEGr>txBfiDyk(&8CySpEJ`0>@N-?T<|a~+093b7p!o$kTG!7;fTuMX*;W>h8N zx~kK;mEGy7DPJ@wfdh}}3Xl3dJ&aPYsn8+p+${jS*xo*+1U$1bDwuMk2Jrx6vU-76Wo zn8ParbHXSNuYc#w$?=I=HbT?9j0g#NS>B4K8eXN=8`eFt1C=ywYWA!uH0g>Vs|}t7 zOimp64rwlV;uDQlw_EQp+IbR+V8ym_N|*Yk8~#jm~=>;xMo>M;<~-HV{@oj7~(U7#$=EO zbpH56{Y3?Pjon-tYU!(78z)jfXK0QdfX;}$e*OBR-(SQqis8T=-&=R~cD3Uhgi*jl zuB`m58CgEKdYO~JalC8`>sTpFJD6!G|FIZre0{_q>q9vms?bB4-({$mkz23_D|;ik31SBTQkms68Z|KOT&v$bsqU z=|ehP$*j3W;>H4yLyA4)OP60!f&7AeVB ztc5yp(*v|884YH~t6IjH8Vif0!D65fb6l$!751r@?R_&m+QCyEdHc8u75iehE$<`se8{~ zy8PX@SKs^5kJ4c~wRR0@2!S|RgHGeM+O4ti@uQvVcJQ5jE+a8Gce(;WnAxwTZF zc;o|F3!^OTQ%Ay&a*seF`-{$5&V4F&??y8NgdyF%S-d~bD|?ADdmfA z@QX=GF}SWv+~@|8QzUiISbijSvW26=Y0Q{$14sm*jdh(V;s{bF78OZRxte(Uf!=(g zgn=4jj?6tEYfsp)ndBVQx&|wU*c_7haCFrD_>)h+&>8+P0v>T?Ga#+AUt3!{jng#_ z@mAUZ6X{%aUOXOKot^gV+{~+!m;dbl`U`*7>rhlD=5->dj@>6Tk|Qmpo*W;4?WN1N zZ{O}79`X2Fk{f8rS}Qubp;e{h=O?G8-+23%FqAz@-5;66q*&Ucwwr~88!G8p#Yg4VxJ@oV%UXm7FDqZ6;2g;d-4GM)A+(Uh7fW)SfXRlO#B1*2ggDtrSA71f8vQ3 zUw(Q2`pCU(&0*Yf>le$r77c9t6jA#+HFRz zH?6E9<51K6f9j7Z2;3Ol5CHZ-3BQq3R)TR)3)hgV%rYRdsVO3hjwX;z;M#*4Fy^`p~BCAecf%(cIbDnVFesw_4HNPEAhE%}&ouPsOHUMA#~eX*$dg zC#tkh^5Ad}cp&w;_Vx}A4~}9J8N_PRV%da+j9EIUA?;Fl{OM;p6BBi>Z$)DS4fDta zVF^XGc4AMxQH8Q|!|^NvurX)xLrp*tedoPJ8R8$Np`q*K2tl)=8l^D4CZ{dE3&@9^;8@Q8ggBVj!~(f6E2 zxlR-3z9ACX>2!YOowpu4e=b_2wT+D%H*Vcddn@)1j}Cj?-ofGFNjNgFWsA-3_`AHk z{OGrt9M7xxb#rSg_TZ!Iotv2vl|wZYAaO)>tq&OdTgXC~)k;4&t7{UdZZQ+5$4LhE z)EwLnGOo?^fb<+FW1#MK1=5UnKA#PSVCUSFB#@lg_DLrO1*r_NBD-26jPisi&>vm- z^vkclj=jU7RVYUJVanw(5Y=px;GUdZOf@w%m6j_5KL5;_F+RcPNzKpC zZSU+_i66v>+y}r(ZCf9~o@-j$b^G?6-RJa2_;y!j9p?Dv+I&!2znaXQ=z zMc}na*Pop(O?-~J8r7HJrOVnI4dnGR@9eBq6o`j2!d@t8WWDTwzM zu~PEiK%L=!!ay!LU2haPR=Vzlnn~857HaH)DQMawAH@KQzlb_SZOtaB$e_u6JLR;x zR030n_81BhPYI~MEU-^JeevrrzgWF~jU6gYnx__ax}Bb4(2h!oZg+EIBS4|5$Jm5Z znhP^-{5#6dKGtrxCMGAIoP6x@3+KDN-rnB+-L>^ke)4%ldry0Wm<&Fo z?_1no6h^QMcszXd*4|t?+^Q`zed!CUnoFRXtCC9UB}hPxQc*T!IR^3N6k=7VGL`?L zrG~Vwit9N2t3zE`0=7_f&Iniq&0oq|ABxIP^oNW9s}?Fg^0YE>D}^=1BO6-XU0eJ3 z%BMG0Z~g)XE&zp{U@&y#8$x_K=&p2s?!m8He6 zzqxi2rY4x24L&?PK<$DDvhuMN((84rHji=AqBtGtV2R^ncb>icVpd}4RchwD$tP4Kz3+4w^LCq|G%Ye-%u#T72!eCE0 zQ>J>5p#SXi&rMEFB^{}%9`#&<>a(ryulW2pkj5o^?9ORHiX!Oq*@i*3*C-o-_}7N??^aDb{@gatlqLW6!T$ zRsCKsS{~kl9KF%p+-$T$H1^gSA0OK}X=7vq!0j)Zm?*dLeA~c`UEkO|iY1m9l1t({&wy0P0dz#p&d8HaCHL4a#JS)T~dG!`9g#_}NoNijtJpi`KkZ zfe6Dihm?kim8smHaUxSyd>J>>NCeh=+iq+;r`*_sM*~5 zTRQrYAxh`k+S=aU-boRM0Hk|554X2>Zm0XL0u8~2w4M+`x7Q6cHZ@IaYB)ZQ7_EBP zJ{5*_Y8zkq*L_NdXZ5k)qFb$0m}$2ty-;tVe)5@T7Z#UlzEVS-AJxC=1mhyH&sma4 zqpte^89&KhjI{;I{$+zj1EMc!nn92qt*BT=Ac`lBdq@>hCEbR(0&=cE&pCAkwPU>5TRJa+++k-nZ}EjUJaLhEBrN z1fcyzyvTWKYU-ruXAj4-w@Nc&`(IzX9-9~Q^K-3Mv;QioO)cn;IN`vWFZjh?h(T+Z z6w#G}ftWFdAcl?QB!RxOM&3UVr4P^Bm8DjxI$9{l2= zv8=r)uk4azRmE=ZS69DIho_7lTYmI22L`ci%lm~+ig+AE6^;$AgLF}RJx2S@504I+ z!?CHcxG-P4dSc%;RE*5|`LFyuD#_VG!#(fp?bE6pUQ(MK3Z3nfW*(cD#+$()o*$E|g69P8zk-W+cD>^K>*XwU?Z-dA95P<9Gq*(U`vGy0{=GfTx zVq>DJI-L%;#;SCq_I{M8D1K=X=)D7^KE}a~8#m6MKR-J=ds6g=1vQ;_Js8e9b@TR} zShE){oRcwU%rcBDvB2-FqS-`YUL3-D8JJh5f+Fm!;9`Q*TIgvNoH~AO^N!#^{p`vS zL6PN#)u6-IqDVCle{t)tmO~{`l88Y;ln1pifyc*CMw@$myng-KdmnssIJ%q5aOT2y zdWd3lR|cK#6QtcxRkv>6?j9Y@&dzn~{=vQ+(HsRnYBityVvf@*^H4|o;Nacoy@F4> zzrP=w%h-|Yad@VX`-bVc)q3vIixU$QvV(=wh9DB?ya@z##o0BYdIA-rpl%N(i6qTW z`oJy-HKT+;K5E923FwdW1m>xH{16-&F1~g-HKwD&$Fnz92O%y~g*Z7eefhOlzkL6b zahf4(cvmh)XL>T~d1rExmAXAP)*kC{ctkcnJ2Npi8$ZpgtR&5C!OXdHF|X)W$LD6E zsKtcw|A=aGevWPWWZD4|I+GLai3v=nUj-#;P&SS&mqY>L6_raWbBKqdm_tJw;ZWu> zim-ctQP~nmjYi(j&dtC4`Wqko=x^>%i4&q^gtp-zMFzSWkY z?$=98^PTa|(ZNx#*TuTuo}P^wy0^C%eeK<~wVf9KX5^ z(k`RjqoeN1%F@itbn!v0Mf$6@2BcnqCrV`i4A_&3D-7~a6o~#KWkB!2R!S)NC6!Bz;yxiYA7@wHv*ZsqT18!i_s*QfHSIy7O4j#qDzVkQH(+WvXfF+sr%Dj#7nfEpJdSyZKz4{hWmK)Sp41k+--8g;OQ~DNoS4D9 zm)Om~E!LvA`iWyRDhQ@T@>S4?1;mUw=T<&x6E<^9vnWW?Pnn7WBo7Gx-rxHVXYOv! zEH6&XCN~+K==f~(qtSh?@JgtO`PsA_Z*GpxfJ}aLEc)OOCKl#+9gx(WPV5kFdqBrn zd64Nz#M6t%Q1JXlE#}E$G!ljqb9xE~nXkQoVK7K<40-Cg7gn!dyL0O%h@d1CY3n&u zovLMrGc$OVIi=1v;Qmsb1{yUR#U?tNb|>9;#OL&rDBF?H?ZSO%Ju62XcP^ zw7=-)&71LeX=&-iHQo;oj_LRwrn`)8EiKM3FD=qeJQ5Fam?X}u6zmEUkXfL_Vj|c| zuy%<_v2hx$P7u>o>*TKLBg(p}6r<+6RRSaedu6JKloCOFIjsi+kr=T*m3Gsm2#g?R zTA4P+-u?A!t3P`G!=0U-Mu znQ$|B_RPO1k*%+3HbNws6-JaKs4LL1UH(ydG(cT%peJI7I#b76uDErDUv2efD* z6FXRs{oc`QzxKCZ{q?^sdQWPmHbYMdkfy8BKuu^8q}0~QjNr!snu5|jQ1(&ac`NTh z>I3vihVbPRz(Ah&0nL2x5r(8Seh-%23I*CQ0osFb>E%~8*6!BYG~qbCUcahZk>Rn$ z32q?jM8!NY!Xp5rb$L^hlQpf7Z?)UR==$JbZ+d#BRkg3(SUu(D-QxlD{-WsJS5{WO zElbW$cc}MeCA+(}esp-Wyt3qvbVltsfVw)adjPd8b}3-subdLaA%-B90|H-U1pJ_y z&p*lU)~vwX#jQYEzjE45V42jkZWfQQo3W%(WC*h5L9%cZC9fQ%_Wch&{^HB4qwW;? zMQ|;#p0>8O_)@=<;+_^DU7B&QwY4)hH{0)7_oBs`yEwlTn;=~}OR`b?(=+7sG0zOr z-rRa?d;9#kWxD(o>3pAb$8f(_U%2?}(z)||L7|cb?w^`EmL9P04#2u8^bbX(EX0hS zxEatt5L9R>50Ta;bC4rZ?YO=mxXLnF5w+_d^btK%5R#0|hG3`Ip# zU0itV$#aiAv3mWRmfV62)DVK$w%p%4JiM>Dr$?=!4M?j=T1yKHQ9a}TB=*s4$6i2v zaJU!E(c!`2SvB!F`TF{Lv;gPNpFe&k@u9*o9qvfv_U`WfVLDr5VPQcWD{Hjs8*3hG zKC(rj0MiZgjPfDyZGYgK=*d;f=H+GKu^iJ%;v-3J`4ke~q?ohQ`2? zfO6J=lpAkw*xK5D|HF^Ixpw22JB1h?u*x3yy<(d-x{OauU3}p( zx5|M042g4|q)-thX*RoB*m;_|5Rtc0LK9{Z-aHr#0Zd;IEQs?!^Ta`gHe}xlreKZi zGg&OmqF6z(22+kQWdTBV`KF6CLdA@@;~IjLSTeTeK)g-jl zu=nWiFKWOsW6sg$uim_Uc+_24T5PpjL7b!59Fm2^X}6OHgX{Bh0HF#dO&CIcv6^B{ zr|`CUOu3+*KWeYIunKHvoVBA5b)|+CsLM+wWVecBa}(8He{=1}?|pc1F!D|zF>7a6 z-}eEr#~B-FCtm7zufTpc-NM`LM&Xl239*Hxp=z~eW~O=1+)$xEQ4=h}@iB)w)S=N% ziXsqmJL+{SnOqE=cB@^rV)Oaw=Pymq%=Y_TzU(4o(`jy2_oo%oiz=u}0gu+0v}Qb% zEH^qe#J$(S9ZrSLA(+W3aAg1&D7ph7NJCXs*XYX`HZXu{A6j(r&v}?Cm|})t?P^C5 zgwt2WKuDRAkvn>GdqV=0xF!)nI4=Sw0lqRU{n^5B=AHy_PYGQ^MoJD+)v?boJ3s&I z^Dq45%15MDRocOuMwh~dP=R_p5#)?FAqOLQy+Oen1VBMKn~& zi|Z*?@g=?NeSLG}H}cEx?qTS25ro+YXn3=gP$nEv0?fl5#rGi@dmwCByxH^djLLv6nUMj7 zXy9`%zO-`wLR$aWPd7$KA&-sAjg3v&^BTg5ZP`;gu!fwR6*Q`8Y;0_9c9w^xTD1M3 z<}(;?*8+g~X*5w(t?_a> zs0B`kNs6@XFJ_P!8gvspu6%mu z&fTHo`;4$Y-C4&E6%Gy#*4EY@WWUfcz@>}LT zbr3HaXjVRrH(l=6-piwkCW&swt zWI+E4`L7>X%9#$yRtW>p7c_gXNsI|vw2XEDx5b5^%^o+1W^qS?M<`E)V7dZ>8}b;* z&H<3l1j~dc+0n?k;Apg-Bq^wcFcT3cC#Ro(>E(^Pce}lG^J1sdj{ooN@Ak{pf)4^i zJ?_I$W zwZntMm6hd*@iB3NNi3XX6st6b=^_k>q6(Y#8-Olqpd+t9yC5XNP-X}m-Ss^}SsBQx z1vC#F3()QMe*W24pZ@f-gM-7N-CXtUCyW3*0@&Nzi%!=|+u4E@bA4m0U(+JCv_*=K zAxg`(CnhGBmKN>kjE4Js^xkx+74X%~ugL4FwzlcYI&6hBZ1u#G7njdJR_DW#xZNge zyvI$o?QF#^Ay@7elHKzF-4@<~m#r>XX-2|7ip+0I4FE9zwP` zVC5fz0R0mTi-MSG9Lcf~P?Q9QJPJ`I<->satZcv>*;dNL2IYmv9>4I!6W6bOL%rVA zWvu(feQf^s%;5~tA{`P*Lv2ZOLppXVoi^IX)a%{Z-5nw#p{e`#4WRprqNQD4UjEk2 z{wM(2;JbftaBgLJjyCd<=0i}kB3hBhRfe($_1CJ8#EK*(#&7G1aVl^q3r2%gIf|3! zOZozLKg1@Pri(eG4tlk-v-{DNPrmw!PN5obx{S8gYr4a?2T1#cc+uTSaZd$9Wt8?tr8FswR_gH~qWDGdW94XmROeA_!iGfc7; z{ib>>+Y8E%A!!EzOJ-!CY|27sz>$R)023@1Vu0>fJ}DXKo9hm0Pk zd%>$T>2-H)Jq#RDdxm>DLJ-ptJ2^4y*zx_g_7|N9kOtf98>_eY65m$4#VTe>j4~aj zE|&x=H@|g^&+3&$9l=xw+`R~N6iX0Eb(sXUIUw4=epr`uVYW7Ox#P9#H~#u>etanvdLJ5%ZrPhcH1;i znmW##K2+r#^th+-!I+juwl+4mdcAJDRmFlo{lbeg^YdxUS=}8=A1%9LODo0=z(#Pv z{jmb7B?NCa?GyO7>}@4K%~}VagY$ZZN*V{bj2Zpjmv zdCEc39{^6H-6!hfAT*N1*O}?5vCddG-5nn2ipdH|kox`J+S}oCPkWm&V<>CnB%(a zi*#BOX!&^5I3g<#CI8E4Ry$y3sEnY|EJ#x1ddmqw3#grWbJjLgL$6EA-ie8clj1&f7{zT!E5eShU0+>ZT3lSn)6(3i*KPy=x5mMlEyc_1B==dH+l-dK z&8MylMr037sOA1KwHoTUSxS zJvLe!UgY5c*X#bl!Oq^^{QR7>RH}-%B*I(ENKqsQ!dU5^9?O?(bvIdkng{@kF>E%8%tMJdVuWn9@rXY9@z15Zwpp12GU zEPl>j9I{$-{xsC#TCy^D7=JB>>(81eQ9r6qVQ5{m8zgN9(QP6OTnrbx$Rl$?9dxc2 zaBNieHjw1RgW$$%1A4Ta$3T*XKnyzO0RfOd1%>W8G;^_%b7n*M2`nq_131DkSR1RJ zoSM4${H2}EjXGo_c}h4doi(^u@7`vaiyKfXj^ zdfJ_n{0j!mliPRh9vmD-?>jM}yO2o+pfnN4qGWm&REWiHlewasPO9LoX_eYPV&y?o zp-Q-55ceHeMloH9_06^G|LofMr(0!uzvM;Xwfg)OoV30J(zRG(OW_tsMlQj@YdBAU(Uk&hq z$nQvgE*qE;u?GWGR0`0%sp>H3f1yMU!3nCS6cTdGbh5$J};FU78Hw|jJWuz#2?HR;s^x9{FvTi@c6#PSk{-4< zWjK8UDAI#!h`~D~l`GF9{StD6 z4#oAL7g#WRD=9cJ4Emw`8S;zzaIQH0fVwhAu9iigkUp?N{i138dc_MUK6M9?RNXp%dhepeiJuu?v7WSQ>v9UE{}NGMg?x% zymj;T9ogaxr9!ZM5z9kDYEeM#8`~U|`Q5-XO~H#JMdCROsAM0p7(Np87_z#0^MjAB z#6BEPj}MZ|t_9pSw{CGD z3^WVyq!DIA_{h`-smHaowR7RAXXX}{dfl#=LgyeP1jO)xd}h-k-$7&2BbS<<)Sl93 zPAO6<@%SHU0FAhEJ{Z*RBcH!16=SFaJ3|`eFVi!0_PXU?4U&{tk)4{Flly9tcB06MH@yiu~HBBdN^n%P|o6~ZQgKU)0ma-PdK%CYs`U?8Itz}kDRN?B@X66k5Fbtbywghq!^`=HKBcL4~S&ky?b|hdU|1DVbJoNXp2s1uTBXORrG7N;A;?b zY@@8M-tMjR=VoUsy^W3Pu%dhYQXDgG7}$jy_Vm-{NT`c8gC_7-p(4o) zi;|``tF87Ef?kg$77?_LB@sDp-&1yypwaAWPKRnQHE+q8s0*N$p7$W<#(=t6g@Tjx z;;0RjDJMZC5BNNgEWyh!PrkqeZCsp*iq9 z0)%R*bT52OK|~&ghkh( zI+?@`cN;y&5OzE6NQ*R(*9jK zL&;j8*~W<#lxur|)?Gyl`@w-bps_h z=A0c^mU^hN6YvcBR^TijQD0n7-bJG#_%9rkTN#b` zL6<}EACOQjr#SzCq{9UR^iQel0!l)oa4s0iG6VR026$2eWLg`<2%KA zs+=u0gMMAFtSrqh%*Rehw1@q)4MjlFGBhbHc&Gyr!t-%m_%+V`jn!7x>*}eVb6OX6kq3ZM>E3~gfd(6V5H)3}>ZB$tR zwp=wvzuRh64K+sBi}fr8oq=d%F*NXDKwV>0k3aZ>|Dx4u_w{-f2rM}0ydxVcr3rW- z8u>=(Va6BLq?Tc&RDeaLn5Y>64GL!@fsP7CMH_OCP4p%h5|Kv8wJbr&DJ5Qmo1O=` z&n85;te}ZTl_f~=u=(lU#C0$+=0K4(A0KVd+G@ys2ZsT*G&zcqHqdg)iM41svc*Jg z{|_ei3@bQ*CEkGq4Q?6c0fZ_xnHv2Xh$}(9m#KlNuIsV!sXzXs|N1Zg^4;Ct-P5Ss zs5qmv_)K8XsZ32yMavVPqp6P`E)}HH;k8s-+xNM@Bc|fJ!%vEMDu|Tax^-)Ib~bi- z?}L~V;TW8aPlMbTT1}CAu?=(k_MHnC&LuOg4Z%F;L^$XM3Th?l729nZcdO_GB{Kiy zbXoGUcv6-zkFBlkpZ)xc#&UD-Ydx*^SCQdZ?hL|-QX8@H0_NkRJyc-rihR3u=T5KN zooEadP?YoMmZKV2kN2LKa8_Vd#?Jm28hX9%KlvyB^qqI!)+uwbN@~nRg2)4ByQ@=s zt}dK$Q&COtW?#+Nt7#OWgwm`^euO)dAh_Qz(}zi2rVO-dn}U~kAi0J{9|yz2E>0&3OI5nk0&Ugs@gRD^S^fovK{=qC<<13X2n2Y*1N3)l@ck~;-@x9;wy?^%K z{rCT3a&j`oeE{?F@LX?@k|xTd!C6{b4A81E`N)dSimWK#sKU4J+~rN4_iA=x{Ihbp zf!G-PNSm9R(e;`k;!pQVn1@kFj%Pbr?BVAR28SxLZF6foHfYYRtkj|qoLnM?pi4bN z^0d69r(Lnf6;x;<#{t0+7awljx_xxiwPkPqQz)TPg=ZyL_kkxm-7{G%P(8ra9H3_! zry-r~?fLmRIkGCrdPJKJj!A36&H58y*}*-r>^^=Yd_;=-0ibdFS6+Gf5C8C=*EIl(P%LBZW{nU9=zh zl*=`ek@+)b@9yX-mn+aGbHY+RAS$YeI6Y_*a%EJNJW@@0s6)?5JTD5yXh;?@l-U5Z zFA8Y?EKG@YZ$+qG5XD7uh(Z7_fLKuNvUl-AA+e1yd&uY;MbEC*lWiPra>F$TA(z>p zIrSb~gw&S&FaO}5{~!P6|NP>MFQYvS_Y0@d>oWj4r*?XJdTe4M*_v*;cr7ikYPDj| z@~GS0+uOSjr+elAv{$F6yNXYWe$Q~P1?0ixW6Y_2-QL+)^KwPy&nMi1U?Es3x35}_yva!xh>h?Ynh)|YH!5TfFkHRDs0v} z(}v}6wIFen467;5OZ*dg(I5#>CWX=Y1LcSP&!FWEt|X2{P`ow5j7M4#@~WIt(z6BO z|6OfykX3lj3oU@ZbAyT#f}x@!L(XZ)G_B7}7-E&=x$2o>1%QHIK1uog&w0*{GcuT1c;6!!)X zh@?(;|D{ojDC4wJE^W?=)yT{GWA#KmNV6|#`PY4pJ8`P}PJT*={QR|F`#b;mAO9zZ zM~6U}lH7!B)KQH(iZRc!!SV;v!6gY4E6G+KoO#wVaG|mt2FZ%mNaEAU87j75=r|2} zbVQ>kDR9KBCRz?J)EeY_g$|IvaQnjODnB~_UdRm=(V^A3@0_U?LgQugp)@rB3xYLF zi9lniP#ok<%kMH^UN^}PR^8bAz}yX1EWxb&lo$|{LPu0^ftdmWCd?yhyx4ewFTN)b z8|f^F*kD9^obuQphbhLo|4;w3|MXw~>;LuN{@Z_d%pt=MqKprc)RSAU*zGt=u&oec4-alw{U32AViXOcXwxIX2OW#;{&t|Mx`AUzP+dvMQ$ zFJ$=+f=Hh<%zec{KO!_bbDv4z2m6423|j*wdQR`8PbkiqrSObOE6Y)2{J-g|$4fB*Pf?M|G}tq*&l;D3`^QSms{>shD+Ui`oSJ6{hGI0{Dz2hT!t@pd}PGT40ez zSpuBiK@QPM8PTuSmQnfDMaw(~t0t$?xk0Z9g0*5ir#uVz#yAc z6wg*BHe)~f;fmHFkBznV4-UAedt%G`(E!r16%R@0OyFE2IF*h?aJ2O2#)}EQ3+TGlUl}95L zvL|O$*%(73WN6IO!+m48j|w<*U=By_U0+>2qBX!wQ=co9#t&jOTN#;w-xlRV^Nm{i z(o6q8b#EGD+j5vH;|8{IB4LYNS16yF&u}s1yM64TZ*Gtw5E`f3|r0* z!$$HeHuA$j4F47a7>FDgHY^7k5XF+r$ks%H7R5o2q$oZ<^1XNO?e5dvr~8b1@5)-= zs;XUQKHaycEI#(#XYU$URjsL3t@=7rb~VeSnU*Na#xVkJbHM_%@^iy-5~Bi6tONRL z?{Kz^){%ED35{~*XjgvuRVsBg+g%i-m4|U+Jwl9w67k49)_85n_9$R#b7!4vvn#0t z$gJUSS$g)H&8I>TG#I1Cna$X<33}l(0udr#jOI4wgLW3~3D6@Cx_o3X?iV<(YyD_n zeLCN?@|{JF{iDM=Y#~+#RpSuW;W4JCjfksr3@!daJIqaik*3T7J@V)iQ)7PO!$1Ci z{L;TXIy{Xv|pq0{YU?A>5J+Jsiq&{itx&em43Xr)bJe(cQ+4UJ6S|L$-8=!-ABAXkRT zOQAtdcZX|+a#0~2GjnteG}u5hwq#uK>|+lC-(u}}i2H6u#Yll~NR6&cF~>N@i&sU- zBs|+Qt|&Eb`ZU46#+530%T`$D2Ia&E$$^GyV=J*O&_Q*Rpe+)#|2gez@ZNFIS#u(& z2|W-46@H?1t~o0BHz-RUgX&y~#&+B#w#XhUDFEZs@ZPJQy7zFFTM4x|L7n7gJ1Z2|KpNN&##@SyLC4Kq$zE0ZzX9; zweh|8!+|md9IR5}MJK1HN?jX|e?|~zSZNIXJvlkq+S+>f@S*q&h2YgtpIA9B9RDW# zB&A&{%2>TxC00G3?`jw?imDcOx8LlCCl2BD|)E_;1sNEjBUuOVMW?5fRGbd^3Dy8ERXiu3VxS5p=GY9WrUd-cqC|f%C#Au1ORhIvI7FDJ}E;Z5++d@m>4LDk;mfB7QZ-Tt4 zd|S3mg*E8$f20NzW}d=0boo9=>F361c4NX`mGj39wi$gLcE$RZjCU=7IiKdh2yUhz z&NsDeRtQ1&-Q%=&jPzu*EEH`bcCh-J|LR};>yLfx{~e2)I~9zxGcU7RuLgdBdzQ1l zzFuoI`rR%uTUu~z$SrcB^E9CC^9$dexuxDX7##ZWI3d;pwwH&YIXpMj&Vg4zIIicr zzT+V(^2;1-p_GGcSS*SmXh7AO@5V`=H-j@sQ_=Ru+Ro;>QZG@i&E~=W-fNGaq)W`0o10aa0t2~n zQL0-4LdfaanQS)O@r13lR;xD}O_F!g2~*WnG~NPk!*xird3AO57k=UIY;3OgdVTOo z`+ra#+Zf|W6irsg3dMd{(kt-nQWJ&PVdH|Z(YCLqo?3yceN`<@+#wW*?=E=q$oY|JY!(>lqUh( zaodb>LNS|gI5eUco==A#N*FKSwPAu*hZ2+<^m&b#4#qW0xhf_`RC(0T0E_UsRqOJ> zK8|*8@|79;sQGH!YW<@h`I)H>R+n0*#ZEj&V4xaUr$oBo;|Gn>d&wB;~A8ym=lID7HA&nM(ToO)W92-Wd z_+*t=%PfPA+G0y;=_4{k&Y%pMt2wE0ilTNUm+ZiKTp1}uTlTIklktrkp}iAi)uzS2 z;_c)*GG%J}3Y}nKTVX^Z(XN6~C>f0Zsr>Sp{9r*(mNd>0Mn5U#a5wg7AI7qpv4`@r zSLf|eh2M?)J`UrGvcpV}*MZ=fBH}VH;G`iB-{d#G@vTAG`(n_*6OwM_K=8roDf$bMLio1`sabZ zd((|s2%)|!>S=)!nEssf*xxaaFn-Adu;$k62YSrrQ?r!e( z2nn|m1y{$mg;}1(hJ!+GZ*0(iFTLMKZE1sKXWFx=G5HCCBl@2M^*U z0v(6AxLYvLcO!%$U6l4_L0Uor6sf#^qSKqrt*vga+itf>nC7jOZqG& zo6-6_Fq)9kYtLpPq4ov(Hb_qYDr9l4Ui)vrkH*R^f4O2z&<9hNp`V?&kbc!_wS*zv zD7jA4RJPUTC)2^6pmAnl;yxtGgoqj|C*;KPF!oG&JDrXk!%<6v=2(Y=g~LN^WovVz zUTa_**gzLBF>AY+wwl#G;l3mh0|U*lNlKw$il zAN}jkzwmso+Xa>Jx~&4uQKIb1FC__3fKvXaWgMPQI@;y_1r^rto$_5M`{GL~|FsIQ z)|O2xP(ESrmR~zci<))7B}&5SOw)l;({o!(U!6*2?b(s*gnwNU^s?jHH8#XjQ2Epm z3bKWy9lrxMGMd+>4_>+&gNC`Uy5zLw^7R-t%Cnv@a{!|1Pqb31kWrZf_TxlT%RWGB;O}`=!cb5$Cuvn*1!Mv|K30N2mf%%Ww=b5 zX9t6z863D@sqV!&QSU~h**S0XHE5ND-A!Ohz0}_4Wg(vt<#-Z8)+jb!YA{I#@+M@c zVXn#dV-Li1iFq&C28CQnns^7&IY3J4^N$|v@9v(Sog!F0%{^wMG$mxCH^O`*QcvV9 zhCZ(0P-XimYAzB=#9-=hivKu z*Esqcas{8sVpUlQ)F{`#!Z?0eRyX!zto1s0Z?l#K1Pzi3+)p!Fk1Z8%fGOk5@L_2| zQe)9<%9r+Wa()QTw0riFkWFs9WZ)Y*-pBB#4xc=M8~qxz%J#qnKPQv4al5V6u9!47!W}O8lD0@2Su>@NFFPJ5=`6GYhZ~WVT`^*3FKmM+b!Z-tFE#b>9 z?O716cZ03UwY4?At4^6-=YmvTVnW?+H%gM@lT+bI*Am2j7A$FN)b7?>p?eZ~q1kN8 zH3=h~n!$I&Xa7J29v&X{dc9@5-UKLTqgLY!6Bq5NlJc~%u|}xf+T3in&%2#&VWhLv z0RyrQ&r#y4(06$_bi}dvv6%~}Wj95s^J!Wko*TnLn^xB_n;K)6RoTo|BVPno^DCO0`yh^7x5i7jxoYG|BYpN^_wH-K_yj z1}n{<|2sdsxwYBv_8=SZ|4eCWe61b)yi>aC5a@75m7)EttXFGO;wg++DA`_jr-sj6V`d`V~wg!3(f6KXIi zyKqAuuCK4B)#^#>gu@$%$1IGRIwF>eTNX&~f=xN$sA0yDG#Zvz)ZBzOhY1}~Qf)MQ zgRI@|kOEw-CQ(i8?(V+)g+DcmblLkb=LEF2mk_M4uN~TJ2})Q7xz_?RA)+`sBz-rE zNr*dqa+D2+iQ6wH#5bD_Kkcr(ZWj*Ck|5EkzWvYtxxt_hS3NaiGuCd1E#cAHsk27t zv21J@XMHsqg=n`Miocdg%NxnOF8JRC}uVuVjNIz}Y}_Vu{NqBY}2YVCU9e|j`3 zkwASo!y7l8yG&%Orj@MQ-k$e^YH(uZkB5QEqDy@aFopN;=V8lZ!h9_@H% z8;CIq3b5JT94iwBA5}=3#(EUI)o$+iN@t}mp~(9m{NQ(e=tKX{FaF~H?zO9hi*aFs zX8{`fMzc|=)_A8dfU;74(B)61T)0i6)X02y(=cqD zaeuWd%9O&-C^|>LHhr?MzG^$Udb*XDliVf^oCLx^pSQ8t{mNsEIGM4|Uj^yAvs&o+Yxq;4 z*Fry)!}jt_m%%JGE>wJ_lCG>YiMi0&$5Gtt7KXGWfv~u{arN}W}28W z`zJ>Uv1P^VJrQ#&QU4h1x5BpN;`P^F8}tW}1jFO7GC*x^7mZ|L}o5XGD!YxLuj*cE{r{a&iRi45dmrD7jgkvY^T_0^V!uqvO zq0vI$S7nR0D6=)FavU5s-HWn4#`Vbfrj>0ETjPd9OCD|O^T^UcKHsLWI9Gr?qjf%m zW4-dj1c{BS!}By-nJJ6crL_XR2&5-liv6t}i+1>Ty>O^5YvqD

51Nkve!AfJj~w z=56{_WW%?1_@Zb8!|M-9P5fRFKfgQ^~jO2gjr9DRF=qCyEM8Mrt(i_2v04Wvo^!K2pcr zl*RVeX0=jbue3XlGDNIW4pw{d&MNCxYG=C*l9FwA3 zKfmaj&yr`KL)QXk!s34?APu0u7oE=W(ec`96J(b6 z-U-1p1T;T3*4Ier8$;|=Jj=_4L$ExA_@S1(>s{~sp6~hKU@*}3;H;-FAX?heE1TJp zGMgO=k?{o3rcj;-c;!rks%asYo>uyn;IDA>NGPPMvQ#H+i;4$8AdO<9^t>e~{44j z{#!WL{WF-mKeEN|CkyutG%ih9h>XkllneS7ytJb&IB;c3y;Hz*Lm5q**iOk2!DFPY z$|vRrqjjl0d;FWQOp8yA_mhL>hHC`BV_?enI%t=?VU!m6fB$1Y_M89yxBlP{KR)M* zz}o?8&)V7w5x~62*k;KlT)XC;9v;5_`r~-+B{#PQdfFi(YWC^rsqFhoBO@18LtFYW z!fq#jGQD-~(8~i6VL8@3KN$%Jj~*V7%1Z>3kJ=_2j{)mL_d4f|dcAtEe{^)z>-7t` zH%HKKN%&!DLawT(t`Tt~$s$gPOP^evUvRAEG?fk)@Nj18r}e{wgRQO2Cx=JVqkspn zu-&%?_kr&UP)o95c65BSzSf`?QM*{&(GmNgD_X|HtwYqe`hl*rZ|?v%>OrN8yby3R|12bhH+) zcbbxz%3f`tB?k-A6{o^-v~8#s(LmZ(xDDE|#3pEptC29-Lb*jHsIe8w`Jlsh>EWjsufMJwe*p^@Rxc1{@OtiXP_ zeO^1A>{nyTD*d=fW>-Z-58DFzT=;@UHtYh+>$Rh3d@`}7C6}3CB3xQ z9~6Ur%9aIEVtJ#QDkeH#tyTB-cTZ2+7Z>M-15y=ot|*ZhRj`LdwR%0v)sx3hsiEvz zgW za&em9+!zU+vEOc5((VaN$~c>! z&`9B2XpD92b5$#0!qP@r8A2+Rn<;vf&nQO^J@s_0usKXX_`C{ZBnSRDaypHAa*86T zLYikR*H=P{+p;oIk z8l|i0SQZmp^d6dpMJM(Qn27DHtD>fYsG87R=Kgk^^$PSa|gH0zVr2^E;6tPjh0P-kI(cB_oatt2~Jn{pI- z{uH^c3H0nBa?qkwHx33KUNlA8d~RJh0wLMZ>@R4TwOUQ+@=rdZ zOAE2}ylsTSdVI-Bb&Uec(d8FkzmWPsE7h(TSF+0_8l$f)C|;1_yHIA6B5X#-*nD-y zrIk$X5SbtIkTZ>xr~G&2*hbh?ZYw5|aaZve*HLM#jG{c5u>3J7ze?#|FSm?ME7t(a zwJl?Xq~RMHVoduETP0tx;*jJL`jKEVZ1raC`>QRUT?6EW@gkFv;!B~$cQGK}G9{IA zT-~^&6YfxI2566T+XdTG8pyO#`Jex%fAsN>e zjy}>H_M#XJhEy-HUd?@h?@p2RyXPc5RIB?(hsV^C*hCz!itlAis>eqy(uLt{#nZpw zh>^Tvp~=%5^qlMB3ld&<{!z4qIdoyWZxb$MXSg{aqz#6{!=vMrBpZ;3tl%JFIpSdR z#ZdKnr_)_n$W7o@ru&co_>aBoUGMDky+c}F+5A8kAAiC!fpWo>Zz0BKS`pKOr4iw0xKaBV5G``Vsa(>=1xIl_f$R_53jm(10&5iXZPY*q3H30uCB`oaA)fh81ks|V58CYwkbG^apW%Z#+6c>3j;Kmw_*|JVMWe)=ixnvTeVE7HIz`0M0u`x4jzX z@;R34d9EH~r}ir?y=e5EaVM+fyDdj>If^5fPkI-;vpi4`XY-bo2)_vWX>$UJB~rX< zD}2Z@HqY@axR1unRHAJ$s+2;M9dQS~yt0+PqdNHRHE*Ttgb`eU5zByTHy(a?!qGG% zk0ak40r%+E#+9c0tFY%mO4l(We?RlnKlKN{|A&A0@lRALcMxcZ`mjQrq1hx2E|HBt zrv(zh=@3i1AHFXlA9zk7^k>YOc`{4*(|2L)eSzQ`%PaJhy;iLbIO5~1`Rh^^W@~eU zT0MYk4j)dzYkDM2YS8C91ZmLYD8@Np*uw|?&5hN1trqor+2PZ}EE}@ZGXIxGI)NPm zmc^)WwQJe&rDlKcU zoDfS(pjN9r;}W>q5;Ab7`pKXC@$IdxEN5}gk|E)}Ovei4**W_N2uA=xE_ZXaBe*5w zH;A5raxBqz#8Yy+29&QwIMLD%KLZ`ZF5I*Gh-`kIq5GyR7auJb4vBV1khSE{C1Evs zlnTpB6%<|ae8+@r8p%ACNOD?4m4%GgVuHWQ;XmYH#xY6v*}e_O+{z>_jaDWoIBby5 z>`)XAi1yiGtaU?YMj9-lR#=dfP>O~j1wgyBwJv)w7WHbhdhW%KeB`Hzb}n3% zP6X8<#-0(V$VROO&rN$L~ss|j+I-pi%Pn2@Y}JdnuF>8JvIDy+Bdq&pkY-)vBa*paq%S0LbUH_}C(cZpJBOzS=0yO@>Gs z-A=nwiKFA=Bg#cxW~R#bAH?7b^dHac7TaSOG&va+_mwoF9BOLh3>z__DC zixS>A$oaX8PA41Y;%KKaw>XqG+whaU-I4u_CENV$D~H8=;htGlHl%?E$h*@!im&n48B z^tx8B9q^5Vi*K1&nBdmnp733fH;-E}IR~*&Aga-QWmUiI|YRjnjL!pfG>Iltq);?>j zCXGsM#j@3*95-QrlBZDl&(NPxdKf7ICm%)=_Wg8jf2JR(0eiXe(tE-z{NG5YY{g~iwj}&xxrgNz9u=0JkCwk&NKhON`f zB3y5%0qWpE{xhy9V|+EP3>AK{nH&o74=VLY10qye3jJGXL9#`Z_{p4b5>LqK6OmiY1hBeLsvC z3N5*;;BylIZ0P=A&>swy-`~x0J6Vj{Twn8E?^i?JWf!w??o{9WzV{CM14|E`q1cq% zOz5>1B~m*@MgC~vNtQYqZ5(6MmXvAZxSpl3j;gv>mcLqXG#hj+!dIWc|7#p2H2z8h zU49b^XlbLb(zOX~wYj~{WrVET*T}7H<2+M9A(4M}E%ibJpPUG5Q8I87O+P(;lH>xxPOK;tyd+FQe>K$*bq%uzKaiinspD)S(EHN$r_yR|V z*Q3oBq1}JR7jOLYC<-kO-Bb%^z*q--Gs8mcOC9_dcETdhmSaB(YV8~Cl9hPmug3Cu zKN`kW=LaUlm(H`{+uru}ANYaq6R%o7;$!RdvdeZEAbd0$wTMsc3JsTg>3GExOStHC z?hk$TdXWkA-N+r-odsiBZ!!w3#?ztF+rBpbowHO)nA`E=Jv^jNkvee<0(u zDAjEM&4e z8`551(a5h+Ft!-&e@kU0>-B~D0>XpH{y*DqQ+8GmX$6LWH;h40ly+UQbqNN_ibs+o zm9aQ+NnBVlhe8u(nM8rdv;Aep5=<@tRVJSTf@sgKm6~k3_MOw-oM2zcG&M3U9iP{3 ze9JzDt@0D3ET_aq0H(;e{%QZB|>euZ8Qgdb2X+nXEWErHcwr-$Z{$b-|f zQ++@9ZWJ6MQfNFeT{N;EHgA&pjeQ#H3;jGkK3123!g2liU<$LowpyvA9Ph}6(2Dq4 zxtzBa8t~TUM(SLv zZh8(~7lpDCmm7_+%W4v2N_f6IiX+$3lG~L7G@eF& zSgsE)Xk2<>hSPb4c2%fw5N%Q`1;|*tnjJ-x9HZ?W#f%ER`DTx2P>!)rbS!)H+zUVb z(?9W%kNn(ilAtt=H#XL>Fid1))GKNB>Dk$E$X3h(PE3nRx>*pV$dAU_+M18uBm%@_ zmN6bYOL`zF=Te7v(LHgiiYq^#KATgXs}C4Jw#Z@h_v4< zfjyI}h`^b}P_Fou*Hu8TVC%~{Vo_u_eT#g1dy`mq7a@)Q{{mX;g$TWANzxCA001BW zNklzUlGd7% z+`7t*@q8(uIV03b2%S|~@!OPSE&%_ktVVKM0vlu)PZ2(ie47f>QnUNfA`>)YESY_p$`Zz4dK(iY;*k?7`sAak~1>6{fWvWHx z%u2HuH$hI9UF*HwU0Q4Hc3TH#svBL}Ns_Lwtsy{wy+)o%4E`LqS{Y(7-igCk2xjyU zkwUm!;+MyOS-CthpHCt6Wlpk4O9s8gOpQ{K=1Fpautm{7W4>HYNz)LK)4-7p#IP{z z@EjEpvLsrG#b4)>nk!F^!iVj^Tq$s{ir-Yy^k8rIi+}dY9h^$L5?pEkZW?H=w^}X5 zzVeAk=b)wRWLT|Mo6Sad(bz9!#`6nnpv=P!fSzLxLOEHv~GTq<*QPQin?x) z^-X-`jmK}~xwT>59J}>kP-(SNVLw}cFP>%#5()ZG503`JVWVEZbH1ylIlbN$tu7XM zR0)tOzPGy*r*ScWi#RF1?97a74tP~+c*T%}6BmXH6oXAxaQOMi@KIe*T4O`hTC*oa z3Fjon3bYsjJf$Y6*J^t^+b@6dOJnqXAwRf7i0;7&aRI{V$!S|GsySTU7{e_6P$QGg z&5b9AM;?=cd5=|jeio)X2E*a7b=-RJ;DMGo=ZN{T`7Mkx*UK@#k>5PodsjW zEluLpm8N+AGBe1FIJTZT7oF~w%4Xo4 zG+N3Hn{zC_RWJ~tgTJ_*ye|eYYJ0DP>fGeZv5^V}CQNTmMg_*gkr>TV%|3-O{9W+S}5-6i^{kj(Af6x;6%Zq zv;t-kb)N2BBrr4A616W*F#>w(;~ZEnl`0cnx`sut1QIR-x57lnE4WI9SIKkDFxrG` zZ4jLB`GDu?=~>1XwNBwNo{lZO3>Sl8*Nv6LIg=WD$bcCG(ZJ)@i739eMr!83EaFeZ zoLIv(+MN6S{+zEM#fz~)Y<6>Vy;`X%#JkS1eID_d6wRTkh?28{2QEVeW`&qDGguu2 z+lja_)9-{`docFO@O`k6eQ%7tI9&AL0+BShM6I?q_@t>Y@GDOjsLnO87`T8IxI*17gePfCR$X&7+ghzUmo<>zn6k5Wdcme zdG*4U0zLKdvhWO9=fJ{J#T}aiyquq(6Yae@)ozS~7J0F|y-j?JD8A5e(UFMK470q| zI`Q_uOOcCf2R*~{hTivjy^D*BM~@!O*UBc_FK;IQ;zIlLi2^L}r2~tUU4=d1M+XOc z`+JDrff!&o$H-@p@{H9dsI9hMMAD12Xu3*b=5lDR!YPZQFN3z@+Ffi3VBgWII63y` zC2=;)_I7tRH`Y1G=O|1L(v&)Hx9hbkOIcBrn;}LhDssKCzLum5R_FZHa;ddMBi%*BIPFftECX6Cra9BU zU!HEy{=As6&G^1JV+f1WE`yG%gC6Tez&zwoq!7n6t zW|%Le3lG6Bze2ADL53;OCu#zyE@GFxxeBoW6}Yvvi9l2?5Q>g-jsh0f(rPYSxJq{N zZDim$in)J;ZSJc|ta;jBQelHSX>oE(Vzz?DKuQ5M*>-~^I8&`u_V;!lKRsN~%-J(c zL)lf%&M=m2POhmhiJ_7#1ApM=WCDTvpz_L8qhWjF_1AW{))eA)vYlBYL|HzlS5pb3 zKDjV%Vx3TdXJ>7`3qcAV4_`0?W(>~Mx0gwhj zIlbs~wZ6*M-nD}`RZYI9){+J#cN^^N?5wV?%KKUTb?om{pfX9#BQ`OdcH>Ye70m)F zfvEG^YO~wv3~{_CXZ=(saSHCqHyv;=OOAoT>7nTs5+MV+F%y4lC05c_7mruecq6 zwI?RYrau@|s}+&32o;JKCx;ayStR1yqm~GpGL1K17AWt<#kman)ZLBC6m7!fQ*6AWg9R}C@Tvok>20mji;Fi#yEi#JBD11o1P}8;HB7Y z2sAd}Skp}wN-PQY!(pBJF&GS=9HVOBJ18BtFs7Y{i+!_wEv{K2J5bkdjs4=42 zO{K{~u}PI80_8b~S2o6&>zs%1r*@!50mpWVh0e@WLZe=lu9j0rg( z%`ME3NP3(U^pb=doX)LO(g-&X#gxEg3G-SG6=_@JouW1Nx+QLgg8vnBTnX7{*f*M{ zQDbH0ZEt%^!jT`Y$i@t0K<{82V*j%ISn|$cx&qNXI7%cyW87}H2mL+`8X-9^aP!6> zqh40hbQu@E`D}GbrmKJxwlniF;)~eJvDB*mfZ6H9%m8p3U#ZLwO9qahyC;98qJM~D5F(c*70%D~v zz4WH7?VWb}jHooLvl_>LfL{)~VK_1g@k&0y#j}jJZ*gurg4U;8oJjaz@lhl=L`&(+ zgAp~5ZPt`pnr`We9u9_U>+8h!*P6|0rPA*Y7gG7DUoRCoa}34`^sIM2Th;gAD3LUh znsMAZkvRKL zSei*?mM?e_Q)KX^%_hjRK0G7bM7gxSy4@aYInx&JwXEn63O>Bs6wNB)`;c;P z>LVg~a(bplnbuu_UKGAp!(movyK$gK>A#x6UuVIj-o->U(xm=OO&&_Nq-VKI- zl|kz9PSnkVDqh&b>K#e5V z{@+sVK#re4q>sqb$;`5LG;1i!*IDWGx zoEA^6>j1G=EJ}4RjuL5x=<$=M-EOzhXd*yllp7nRq>{$##CIp0+RbG)fi5Lm$mWwO z?aTof|EGL4wPa(Nf$G%xc54)VoZ-ZW$0X&@fOVfuN0^=NA=O>d30hc-7AuyjAO^p{uOd-$mg#%;|nR#n-L+<%IIX+rlTR(WP z|Mbb>px>iz46>|3te05~k@FlhF*cT<={QLV?FQ;$>MSov<0ZaJ zm?2q5D+okd#kI=nN;3|e_huEcaLO(TBKb=6u|;Va*mS}$QhR~j3Kf2s-aHM*X@Th# zayI1E?tgM$7QZYrj1{g&-f=JzEwlzdN2+mn)7EcrE*3}hcUdj+)=SgVaw&fWaM@m@ zmzMbFU&>z4%i+2kIWjotj;?jJnMRl6Ou@aW*+ zAWb4}O~#j&q)A+QkTcOLR zH~&}f|29xR_Jki$_p+f8!+uf7hJT`1eT2heO^UfFT3z4ZErN>+Tn%3& za8neVAjCGG+2HCFE^T*Ig*_y&!VNa7Xr)s}2upI$wgj!gQsR!lNkb7aumS=q;EKnMxDjLVZU}*Rp1kdwculMV}@gFMHYNOt$ z)~Y-Qk~C&5K}+6luY0h!^U|B1W2)sO6P|2}Wqq8pU}GuIpK7(!Ialx<{nA&y`rE(r zd-Zy~R;|)|DmNwiN2$?Z!9y%Aq9~5$65;C`-tjg*$ygYu-a<)3it*SvK6?+o5UR>2@de zNORpB;@QF6n_Gz@a68`L<>vN} zrzUeE#L%|HL1WShKRSiWuOj3_V9I>G#|1^V8GT`@Z3Au_}7F?HCS? z1+|>HkQ}bVNdTeL?3seyvMTe6^sq|=zs=Wdnmol>c3x3@&sv;RE0lOB0v`1%iE)On z7!C&&J~7FsrD<8!8+EFO*niL7wai4QQmGn?T4LuB@eknv5rl?bzej7E)@H0tC0Qfv z4Ep_Qm5^3wv29^(@;Z+calM?;Zqk%*V6}+CShH}SCLC2K&l6|p17m|?rz7H&akmo8NYk%=<;x^HN|`GwD^x5oW@5k8{W>csMkbhaHfK{)JB{?4AMFyU;~U`d zl~60R?(b#HC}q%rO+WdqQ=+gq9=4BBv4Lq5W*I~V6!o^5Py)AT<& zBx#tYWFE_{i#*wex*$%RnoO)~I)S*MWtUe9f`G^ila>=L+nv4rBu-C`Pe}IUTYN#< zSS4o%th@rhss{)Apa0^Y-l!em8bRNKqeKkEoQA`*_E{y3yBw#A1)ww`>cclz?CUW#FRYrkIup4SFSaQVrDh)YEx5sx$kO%X+s3eR&e8zXWuXc&7Y61SrY z`K~Ue*SycIh4;cR5P_QadyE;)@^CW1sl-U`K_kw_%#ukZvn`GvCMpyZO^|<YT}$ZIJRuGm%EAR;#9Q(o>6rbdSLe zje=fuz>fZszzD-jApGRC> zrAaTIjdF61NF-)%^vbJWd-ZFtHyfne)TFetvva>=?f;g@!OdKJFBooJlq^Dg<$L1p z+A0v&(;6S~zToo!PzROqL=~+dX391sNfWXDTG6;fggq9aS8+VKBoXBs=P6<7W&Kp? z(s;o9lHR85>(17KAr!KDtxf_O9|$ZEKMojDVt=;wcE-45m(E636Xhrh#8KpdJBwz^ zp~F1OC9mweI)| z7$-*peOx>B06PaHQE?@1C@zyeI>onQDda+gez%02?RrqSx;y&|cc`rH?=mWp|U9*>C;M@BPMa zehge+G;AZ(e|LA+PfL$I?SS5Nx;;5ZC%Kf<`+Z`A(-&T$B-T=?RQW(*##^wtL|2t$ zB>I{N0myjcHtVxxDZ&L9V*Vw5%-q^oF_%Ox#0;v)-i?7E3o1X1Wy9DZ0c)Gj*kf;) z<=+S>5T?tv;v{iQ4y_SiG`mf}s~RQ%W`%_d78rpUgB5CdQrN-ODwBG}vQ-#+gbXGz za9vOgwj>)hq(LEVk`2F^pg3Ze5vchw?^P24DPSHWoRE+cL)0Mo%==9Ffe3L}5FaHY7sKS|5n$5J}z|_D#xdbGvf^gdSYwYM(tcT3qwZ2mxAROkn$AQGEzF4`ju++?EFIE zZZ8etiB?aR4}|p^wjQOzs_6q;l1Wu1&cy%nlHu(bMC%p)DpQd~STquJU#`A3P#u}g zYLPQYD^1ycILwAY6FZ`VG1x{TB(w#=V zPGpTpnZ^#fvO^%8*Se%p?!oYKk@e=RyBk`PW7C^PkCqq#dw9t<0`E1l5rQV+QR`$f z%k6_PSMUrLwuXy}Gt>an|PTcLO^fYq=to@D)fAE-W%2uQJ#7 zcX!~XAzm#vGO?}^^t~}|XoSuXeT9wo*xxE}PY7YLfoq99O64J>EDK@z;n2_E+8}ug z&}*6(G$q6i%!-cVd?F7D5iTdU25K4(S!VK=ZM8`<><`59Bg{HS z^gVd7YLQC0`&w+sd?E$&1;XnlM-hO zr7VQoO08;uWa3P)bsLc(c~$3Zd@}Hh1lP!wh~*IEfo|*}}_PBM-|B zjJ6TQG#q*+I#4}LAYZb(!(i~6ANySz^cP=v^o{R)`!~Jo?Sy#_6yIEFpPgixz1>|3 z3?Igx9T+VKvfS1z9t4@NioBj!n5j1MHFRq^*i z3>y*x(jqHNWURjt&M5mH8L{xQS9~H)Z?<=a$%d@?98SxkU~45&WH}+&2FnqoDjOt@ zyjcjLbzqLR@nU5Hwa~CgI>LvAaEJ)a(n45a80RNKRJ(od_kG6(PCILdnI>Px z(6(tN{Lvr($)`X2`9{6=?yrAGzt>&e*rK+do}GtTCLcUV`Fu-xwThkM$mu#bL7)pr zU)Dog!BpdX&Zm^-K~LL9o0EPHOqR1G7%MCm8|!N<`arh8OyHLZA7Uv^&)ULfu3XEt zfW9C4kHP&yeXPX!DG62kySvn8N^H$|ewS9kVGzbPG4*k>UN6s&Ha50svA0?+js$FS zTD7EL;lM!7cCeL|mHmT9X|;BId^F7Z1)nm?A{;>^5|d(8v4ou$L>)TaE{W|*5T|v$ zR@>X%U6M(7@^@EY?u}SVNF5Hdlhbyb%FeFlRd2K$;@w!^xaoFYYSk^GIJ~4Wy~GRD zF!745(~8z3th$<7PH7xdbcK|2Bcz)V*b6;%J3XR=89b%WV23Pu!{xydX;WCp#kJ6K zAqH_(W>sEJbEbUGN70HZU>}Nb#+@Zx6qaMtQLI6t)=TH8wONjxPOw|1^zbyMl*F4A zcmQ$09Q)G+oULdgP+gRV*ey&_{2xsW#V$Wdtx}If&t{dHg~&*@BkisvhmVNign&4FgZ;e>e12++Ctuzz=zAb)iL6A!*S@%@R^`HErd_mPm?CG{_SRO4UGq5zTn4wx zq?rSLVVDKdF;i`rZN*p{iBCtf(U9Z08oXC1Z-tMXs=_f{-@>p8c`4+Xa`pOyZnw|- z$M_0g`qBg)5?l|F?3X*HKGs~D0JUq;-k_y{O2xiF$7`7T6ddgYmx#b@F5OX}{RUo# zDIa@JL~GP=0472=81`vE=@U$Se6FBctzlCbp+cDMGgBW92c+pD+(WAE4r?Jk;llDr zZ?08q2t{JxrV5S-6C#i)Oq1QCn7?rt4TW7xvGfAB;Hcs$t=uUh4ZU4EW~151WayYI z0~@rdR(sIjRtNn)@AbOeBWCk9Dutk10$hmK33iGz9oZof&TQ;XcI3y-1HJ6BV|=Ek zLF~>NX;}F*RD2qbYOPAS!UDs;V{=Sl^%=z4)o?iyz9d;S=dy>6&|d>@xRFKVYgr!V zq#v82X0)!$f@EJ^U12&8cRatPLw#(w&x!9w?zGsvf^Hb}Js`pYvp81>hEJ?j_V;$M z5_y@!wq)H$obauLbZCUx8G=gC!g>1SF-@)q4-XJg2m4C&w^na#?;P}p+3DG-VU3nn zT@Is>qT)q0f%(PnyZ9i1yqb8^ZZO>3SdZ?3<#tJko0~|;Md#<|E~Q#uU*Fl@7KvUtCMv@k@?)aKlO8d?SPq`hJ{MILIv$u17zdP2ctUCB zF`gI3Q8MiJHBS9TI4UD%r2?9SR=5NkQMdTyD_9_Lnc+Y#%7ht!l-5eBm|+;=7Gi~l z!C6%RTqe6cvH=vVC6diKk#V)M98-&SS-=yDks`O7S!07Gwiu$qB34&YKEIT}1T0S1 zZV&~1i0u$lPa}>Zh8R)?+XtvELUBe;@zQ%ftUI7yvb&tdrno&p#ARx+aK^BcC2(kt zST3OkB&VlsNhuHxO~lcZE?#~0RpSmCje$=_d}U*OozxT(^?Kb7l<(ZH>oBo@6aJLd z$L=sbbOHWv#8UysDSZRZ&f0mg)GZ}py;HxAJxu-i>_BLL^zb1K1-$xXHwTf|qUi9r zg)?b)ukY&n!FQ8HOt}jqGZ*XYYqeTcyd@HZxRB$u+8yzSlOqTevdEN)VeLOYdYTOf z&pr33R$v_w91)s{WONHkpvsFF?Au{Zs-|r6xSV7{=rDY3*m;G=+ zf`C`f=FhzO32qIdyFN;!fDNKu$Hy)4rqKvcg@ZSGV{>!2*NdE;`BDJnKpVeJy;S0B z1giI<(@8N!3>3#a6M0bi=5PR}IBwLd5p>PM?M-;r>&by9VH#Q+F-Rh?vH0j}y|%LY z=;4FUeC}mp5a98A#kbF5AZ7|jq-o2s18cRgsX|p{50Yq5ks;2578?<-K+&t@V3JS= zXtXi$tAW*Es8EnW&unU&24G94ig_g+WC0fqff+NbzLBk5`u5IziB5^DSY!rARzG-*X zNPZ&ZiR{txQDR=*IUJ5Km>@BTheguHkONnl#72>X1Soxv-jkpzSC}(JQaK@`W#ap^L~t5gP{B4&>)E2OfnUOgP-y+}y&64iZ`V z{a&Tk91Mmbs}2hv12@M=-%X75co$QwXff<$)g%X+tqa#OH8QK{FkE%)~ zhG;3#B&>^Vpyp0(zee08qQ#^ICe;KMP!3x`Y?ufxu`>hL`c9{_NradcubiYv7!R0V z$|rx2cccod<}q8sduA<#fzcI>!DSWFHtrQnkLfTwmv_ptE=d5{7S<<)WA4;uXrlZ9 zuD`5y68DkSNYZjjtd}JTo`=+*xKicgxrTRxrO1MhaYu+W$E}0nQsB_eu!`%;nR1bC z4d*EFVOG3niH@*KHeZBd;6|X04H`AX#uSZDf~`#v-WB-0gq1E}7dALCWzYO5){~4q z=wx!l;Q?39b;4%ew94wcvfPeQNxc(Gcmzh{hf+!qp+`Fi2jz`7-Z(rwlyl4(&R?yq zt;yw{G>8Km$eMR&lAZ09Ch=OPOO*J$ecB%kqQD=z4TIeuT6!D{$3Nz+EPN^;5R$!V|Gl|E{mzI$?XBqw2p!^L;a z-W-^FbBM?wkjSD`yz@%Ea3nlqs${7%hgHX!N|x5;bq&R)48CW z+uNJT(c#l98xl2;y?5=Ruo8zxfOy3lRD(f=Xge`B%&?GPkYTTBv5cFY?X5=-4!-cG zUrwf7E)>qZo1TTsfW8MN4p}PIO67QnXSJ?B^ zDttNlh6y(UEMU_CVeO4OKw|e(b5f|Pl~f1x!WS}?qA8F?Qd`$I)|)F$nQycVHa9m6 zh@+4d;$f$!r$n-aI)_Vo(ugF4l6gj@vj5;%sfERa57M{(L3VklbQu{-tpC7)Es3J zC)HSob#evzp5s9%G{S(0AYX7f@Q=a{buQNd2v+-peq&{wP$_4bvCo_rxHi$e zVl@}hHaaZjTRXFIL#!Qmfta!o8i_ruudf%_j){X%(rc?L6;}P3SyF|W&SZ*uP{DNa z4}SR=;OB+s9&y|fInjjXf`;<2M%V^7h#;sD8O=4$~xF zTU#x@`ZY;BhFDJprWYL~TbO1HCegAaqG$@8p0;uMbE+eW;ll58%oB}U4n@O+-M3n} z{oS#0#u_Zsx7Y7Iefqf9U%|m&b^zYqTqlhuf*X#c%n>AOEbrt%OM&u+6oSL2PsL7? z=W_g1wq%shTq>MTh}P>g$t$&5TC0SD!2}7gp78MLKY{unkZ5hDj$=mS8E&{9g7vjw^>~h zre>K_&K0WgK=@f*X|ArWoSmL-@9u7GZasbaG^cUPRi-&GZMWBp?po_m_r{qySppS= zQc@L~waU}SZ?My=^VW=H(b?GSk@z7h@LiEgA#5~+n|eheEY2M36>T!+cG6cIMq z!3WDq{55cMGHcV=U9xV|N)90)oFPh%NNn8S+woH2JqL3SjuHv^pvm|6@e>-4m4t7~ ztT*^-voC+;D?<*rSRoociBEG|LbP6v-3Fi$qB#TA7h#qUp(?pZ1Ctfod|W1Jd;Q_a z4%1j&dK4mrKULx+4O;wxzw{m7^1gQ;w@!ZXU;N9iyGz6bx(L;{y1Kfvv$M9g=2^vP z73?3BWqcUjFC-s~iK^*h3s#;v?lJF?ag=EgWE6ZisgVYI+W@{sGZ&d7 zmsp9szF^&DbxJypcC%d-o8s)AYp?uD#P5_=8i_K>shEWvGy*$g{T!$=IEEywQNx{* z>J*7uoEd{1JqlP zz;|+TB0Je?HiRT)Y?^tP_5ON+`d7m>Ry3>P02=B;(HUHU{AWrl#SJ8n#c$CI!EgvBp)hGCc|e zR3g&Q9oVhkSyMuLfKb^!zaWJ0891gbd_TwD?)Jgn&hw8RzVY}8N5zXQ*_8*85#^dW z6(Bq;+2EXUl5p(C0$fMDiqf@EavM3`;JDgqqQ!tU~Oo=@+=D;iD|?!sF{yK z3+JV1J*Rb|P_BjKxdY4RWJ@4xbuSNIyNJUc!SD@IHr>Cxb461ix(B-p1x)mW z&a?A#S_>PSTQs7r;}(3>d6a6k>aESo`Pf)UaipCcw#sI>9-adhdXs4+GMdC67yePP zE-kZWOZXmqs61w8*hmwBpfgkakstcrZ+`DL#m+(6Pq5Y16;g}}fL{PA=u5<|suxf$ zxlrxy_K*XqL%7ev7^}paH?33#y*{WgLN~ip1R4=DH1741Zi=`X%ON5}pODok!8Ggw zw)Y1YN_q-QKn9CiHW70R5hR(d+*xj~+LcaG z8%X;RA)* z3-(RGv9R7P>*DtI2ZX|b4e2251IUAnDepqms-jG!IKib6;P6c^KL33m{A-lvqSJ+O zFS48Qk=@R>+Ww5W8jd~{Qirp&g2#A)KohDAmTBi6?$I(QC#O1^Ugmu8jhVBG7bdtA z$ik`DYj)Zf#o6iEO0&7My`woea89~!uAS(!@Nz^lCU+|D{joQZfVZb*wk;Z&c3m>8x(rY|#Rq3m!kYqNJTJcR{MyP&rCK3k^UABQ z794WVF4so~`&9&;xGb2P(Tw#B`BYm~GP;1%>FUag>^Mlk1P(8XqeMA&P&FHBIKYIE z2IYuO_%|$Bpn$$BGP3YdD}$yG&>9(MZ{)mV~Z4iU`o%C~o_ z8sYKKZfyjA&2o2E2yvSBqkYT_`S`$O2LpV%Mjo zZ(PI-ZB55)A&B6>)$esznk%>~snBxHP$@TzY}4SQgOI29y!#t} z;DaCd*nj%{Pki!^DbeoE_TJ9+n_qnH+rRa{*xcSMV9FliDk_eR1$T7Z7i1jU)nT#z zuLn#XjzdD2m%A@$ROGT}XuXV~0(yyfR@1CjOTe5(H|zZvTnqQ5B8U#t(8$#_ByXLx zPWEfp>q1NU?LkGB-_WnuN5h`U4KMP9izc0-Dbp zrW}+36iJD0!ROtTcpqDjLWzo+;IM<1lOCFFgMD_vkDb;pSA2V5pHH~;r9BsBB+H*!LYG03?d!)xt-3j7YV7%TQe5IhNo&l0i-2 zKDg@9U2V%wE*!vFiv=6HAe3et)gx^Z;&Y7n2W$w<;d3h)V`k_j@5erR4lOy7*c1gg zI*Z6aIP?BoByWQ=N|C~$UkEV;dJScW-$n8sySb4_6d~sOABj;Y^pA7dz<`B3r|K*l|~ zYs05m&Y9V;drG{uxuJ0{nQZWo_?yCQ1%hc2*q>acV_Z3E zz9`iV0wHyOZ}%_$mw(|Kzy2LGPN(fN;=4@dmDR%t>CI*%cDKRYe}Im~ifNwN873|~ z2uhsr$ydbm&fw= z0#{d8UVnN#QJU9x_4&mG@l7@hi;Y31F7|pA6iSVw>S8`6!af)b6P>U;YoI6_duv`~ z*$Z5oc(JjgBCNkAGH^|Tu1^&ua&sRMJ|^<)sAa^^O=PmjE0L(Uus0>T)48C=rjcxy zin_`1IUv(0u<`;%U?m&L5^G(fwe0J}8L0gUPWNQQX6m&{uV)4(6N>?(4ksN9?9}K; zVWk_E6qK=cdu@HeU?ecsIgWd35DkM$Ba)C0`b2wp(;DG-h(!+APl@;TrB`wmsN3sn zCrkOU&?Uu?LlexPYqeUX!eO)MO_!J-HmYNEb_g2<#CKXUQKws80vHrka=UPLq3e8g zhm-y_Ua+$shwzLe3DG5YuW62Y)>E#Wu<$V$tNPYptZNaU7pP)n_`r9Jz4q48;d3v( zm{zOreeb(z-hJk?pZ&8hf8`DdV0;gpnGN=Lm(jS1-R_8?g^;POy+%xWven9ou-yTk z3pOh%3SaE4Q!NMoz(d&?B;t#rQjQ!4Y6#cF-025KDdDwj0gtUeDmETtk@P_XnJHsG zGO1S67oK~l^vFC2VIe=hWr*+2uaA&MqSoL3u^)N++upLWy88IA_3Br@N(f^61Dt6@ zjM`XVC3H*R4C8dodxb*}(khxh$E_BL+i6-&D@txh7Y#=^MA2inu1e2sy!7G1s;KqKTHybO8i#W+rY4NctWM;&)8%)rLW6;SUy=8R0|c_xU~+kqMqR%B+R(DXUD*;sjynNw$y^p1R8%EHq(}%1^a>kM6X@Qv0c z$m+!;8&<2{TF_Aov#SB=^WdliH)z;#@LN~L1%ca&ALMZj%?2|#K_cZlOHd+d+~E$6 zX2`*!;y|-rcPz7!A^MnJowUbe+_h@8*Vn||S=3;qzvU0*vmvdm$^;O|Y z-}&@YA}77}uK)6X`RD&a&d7rzy$=K3mBo-5m?_oB%C}mrwbd2jFIlP}QeK=BAPQBL z;R5~gvbfoN7V`co4uERSL!+!t_=2wFPSuI_r~m*U07*naRP~L*cE;_F;qS5^P}Kyy z6M#A*r$(JFc9PXR?g;9YCEnWHsLe{{A7ADHP`fht2O3HwGyTyIzWA_<6mfDhVZJ>)K$ zrKuRuL1A1lfmd^jTo&}M&|&H3Y&IGiREj(sBs_Si;~BL!6#2;#eur)f-cxF)#J>Xw z$NHWH@=x0Els+&HcnG|8VuKvteSkngl3_S%(znPJ;Vl?#T*MWWRUq=fl$t{8oGJfO zfj}^bxuc0p%sA>rkRNN+N~hapy{QX3+Zv^Mi;_j{erOoj?NTtgdrK=8NudbAS1dza3i88w9P^o zJu<#RF@kY;G9{Lg2>MWR*@{_A5VL4**L*ji`r3nGwjGn0m<*x1Fa&6~v=>mtRiM_r zLy7>wL3D8I_IlvPUFYPaT1NpDP-W?}*H=D1kL80ri+)>8g?;<*D7(Y)3}XWa=|Nsk z$nEmd;?eQ3g)4X<)Y{r&4@AO-TMlmBE@G_(V@hpHOP{{-LXP-pytR&mI4y(AJvlPy zEfhYNAI zn9XKZ!}A3UW43RX?1BqBTXcJNNbQn#NDd<)H(EbtcPT=VB1a^_n0O*Ol(5%+vYs6D zZVztH6*($xt?tp4^G^+-Md1CZ%I_X=wpe;ST9dABG+gFlZJ$aSWMC5E~}I%JVzZ z+wTup`bH?dqsxrB)9nz*BzJ_)IS)GtiL45g+|9DvHIH;=5e$>dZtr*djJV?t&$xkb z8~-VAG}M(M6iQ;|CId;hhykM}3vn+2?+2Q%8JLCccBO|_v!&-5&QORr$Vm2}$W@F* zG7oi^<@SJzDx^L(f$$IDW2Fz(I!CBTK^8%pl8*DUpc3+S4pon`3N+_>vp!LN)g1Z^ zbjJHSlo{rF$<`IlyN5(PmIiI8%P3ppl^xj&(l*xA8{JX%4pG#x$hKKqTT#b&=av>1 zuqX(`(!W)*S;T!%ziAocvNQ8gw+Z4-(>cjvR2#P?Z)f-i7S}?U&Trld#dq!GZ9fm# z2_~&coAay?U(Gb?wNPWjQA$!q%4pJN^=G`kw?_OGYBn$KY=6sr zch*B53LqjD)=gC=MW|_5gfe8F?@o-FhhiP1LxJd)(IqdYDB`(1WK~>QS?(xw`;aw722yr^M5SR%B2f<7rkZ507Fwu(kkcH7T^*Hfd7xv|=F$(a zp{7?>w;>3vMT*aZI}-c%s0^sXE}-;_xg|7Ts~*B2_hs;G2c1*(tc~m}TT6faZkJ&N zIH4-MnuxAFwIsrGXvTI!1ObLv?YH_Cx(b850?q23d`$8I3AQw3D7b1H%VZH*q1<0V z+T8C4!wcO7snnz;C@%^=D1E(k`}WO) zTdOOYXiU~IyRB1ke0;LDwkq?`s0m9{9cVgfM<=x7<923;7rNZ78KHa{8)@S3ljE^PiCUTh0UJ6V9H84m?4a2BCtTc1Jct z9Re2jNgiuQs>3a+!8bG_JeWKQW`y@Ie--MbDHl5GwkpKwd(Ztq{(Ir2mqlPDE#Ybc z*Ayt0W&h`Y{u_dW8qHRjqffXGkfG4lz|{*ARlycvsWB7?rv;@7q}x8EI;Efp<{6;E zopC0sW`_D)m}p`2Wr07uU`0cE)Z2)OA>#l%ND(}e^sB~M8Gn4z?#H}Tu;kL`o3ml? z?&`>$Yn5CJs#zd7I7fW2;<6xidsl{>BF(v=p|x7@j%Hk=&;qO zM7Oed_7w(wxNtlAa~iCm3fD%1kE?XEl6wTX`zZWc3j*I0!Cix_JQQi=g(xqLV8gHd z*`NRJ@BP6^`=s4bEAO|z_Z-=y0&d3jTK&TI=GNB6Q$O~^Gw*nNtC@LZDTyZaJH6RE zQyn%~`Ya!02M&~xs#5TEp3zp8RuY*6WPk}8k+c=P>SC)q9sH*Q>dIZIr0qm`h!>saG21Rv6U@DuI1RI$&X%n?aN>J`g1>g zflAkMd%-e`t=3x}d*u3!o3JnX>o*Q|c4`ULnh{_{gbe;c)Y<}DTgp!KpO9m-u%O;; zgP!uHkOlQky=IxN28+DX`vHpLYPt-1tmy8_Y_Ax!`(2ea(I0|-6u-+G^^1WmlsxCv zYOmYtblcJyGNmZVmG_kq3(4eJg&T8t5f_rB>oJ5{+mcJjGE&eC$)e$+?B(8yypG)6 zFN#{VYBg5agST*28S_xgfbUK@&mq$VsGUt3T@qTQe-MR1Sm2BK~Gb2Q5Po zT>1SM4u$FZ>ev7Lzxe5AKK_vph&a-Y6&1(!In+N#l4~`W8Z2x%2TU%os%8<5?8;$O z&_tW3Gc%VvpYove>!iJR?=X5E}#K(_Ge%1TRv9--5PPeYC*tn4JinfE@* z4am*z$-{XQ$R+GO+zm>SMKB;KhS-Pm1HF^nU4s%vh%UKr97|0g48Is(^flLK26Y_u z(m=LOgu9Kk)s%K)?Bw#g9mh7l`H;W4p+v1#i!N<1z5L4m`Y(Rt(;xrXY!7qvyh3-}+@G)ru@B2(nnUm?$CFoVKq%w2Rg*tM3P`_mGDG*jeH=EOv zDOpJnA&l+W1~I7e%4@IvtAGDHue^3;Z~r=tucq|w^0x|h@`aaQf%u{nuHRP?IrTi$ zzlGS=LL+(Q#O3i1WK+1z!_!Wz=D@UW2RjJr5qUzPc#AuL+z*-b;@HDl`5mouP z-e}Sb*U)<^3SI&lnV=EC7??F(H5wJrX{(${KW!CQW2#6>^As1Tz5=eKqd2rsnr)J< zMn$(<&;X2_H!3q%N(y`%|Dg#f%jhr++7rZV_2<%`Vgz}P49_G{FcqhSvie9hlgofT z<0Uw6GVF}63d6V4!i2$9b391Pqobq!{r!bj>-#@=(ZMr_bK*;x`Sh*9i~jjverIq0 z#*;t(gdC_!EJifc;pkJKkWfu5l2KRGksLW#WE$1fjqKK%?HQFJj!^V|?CC0Z?k9%{ z&3|#`Pm>^RY#Bnc)s)raZmHKB6vb40PxZly;^_A6#iixTmoLBg!V8x#UHsq&-utiq z&40M>Xk-#R40M-FMp+X!7No(2?af*xgb6vVEv+HH!JP`stJ;~AyF?Zpn0~2xtH8?= zvEW%2%avL%bQLO@EDlSzt;Quezty_tqJnLX`TDDj(9NJUUc&VUGrw$*AzQRCcC#XM8t$0?cZ9fWR)puaIlrpmU6JhCya!!Zad6IRfN3BnXP8*m5jDiFx1xhy&zCAXPAx7Y1z z4!dCeg4UXXuZLSZ@)O8V27N^xsdJkcfcRl=Z}0H%P%T{eN_Wt5gYqKEY`pfym6u+5 zwbO2|uC5MsIMg_SMb7!}eD}Gn&GkmJAvAt*p_Rdb8EFt?7gcEemDhmx_3U$DrwS62 zuHRqq2qF@W_UQNsB{wX~KRb;w=pgFKQp%T0`@2^!Kl0f6#>W2s-h1Eui_bm(!*6`^ zTUF*-IZv<;M-Y*P&^QyR%1++d+Nf0Xt|k@F@>rx`5Tg~kPOEwbR#^4E4$7(m8$Se; z(*#039;eS#SPD?ij2bhb<5C+B)LaH9N7XR3Drd?A6Zn`Xou(a&GmM&CgVnVSSzX!9 zw_iRw_XWHkFy(OGM-cpY@xu1sf8Trm^B;bt9|I_5i{H3;@SFecx8L`kpMUo+yh{#0 zuq4PprhUptUe+>gLYC zmkKA^92^|7+JS)$Cnl=Y<13`DRu$o?(#LC(T0^|h!#7Yfq0l$JQ$}A?hx?f(K`Xt4q;cwMP3AtM$3z{FVI=wK{xuqUgCTns866{7*|PP^VXTwmV= zvsgsgfGIf&Tc}fr(M_1ko>w)swL?cQtqAKkI0qk39D(UTU4>vUel2%9^auhsK)c=f z?f>+9X&BPU(n3pKP$Fh7Lw}^DCaq7 zX|tGN3QoKQIaKh%4jvJzdal*0J(0Y6U8H@`w`*vHI}K5&)~d`mA|xP#s5&qO$tN6m zWg8$#1_~C@!mDI5kSfyTP$*Qd>rgy0)8#NlBRqlb(s)@p(3&bU51wv8e{2l3XYkWD zu;OZSCBs5%p-6CGZVc0XR4Pbo<>hSYF^HhjeKv%WEp1WwBn>(je|G zj%4b#{MU?Q?D&ou9LEe4Br z^Y`EPp8w-t{-$&8?#44G*oQ#g^>aHj%Va+F)KjY)oBO-h2Ewr#J$P~`DkziXK?|!$ zgQ^Ea5rpi2gbWph>ZY>1$OiF*xeScEby?;3QaM2)N?C8hn zlE{)XECHtE#pM+eUFr71&Q{pj6Abl}Fzf???EFKC78Vws`@sv({@_J3V%vZb-v9oW zzuKtR)#Qm>yAVEr;b3uWcVZ@rso*13KEHRdb8xA$WvVdK}EKu#}0PP?uqQYmTO z%pi=a>l1u^L;V8bR|mex;H<Zmq&hHwF$C8Heu? z9f*_~NOhSr;cL!Qs~JTQC~TlXDOh~o@J^2|EbY*iYOP%D$0oc&h>L)*&lQK&gOnKm zJ1xr6XI%sb7QBa?*wRmD(63h-Y69;U(0{`eW$2P@j6X%F%8M%>o`3%N-Q8XC8T0RC zKf;L)E%eZ512Nrm9;QAbuq<(l&N-e0Gq>(pJDd&{GTDFny)SXM$c|Q8+o+Q_l5O_g zXTOiouUNPV=?Lrf#^KSCLteSf&CRP)wzRExxSfAcNZ60 zrcn3cjjipyYgg3;hnh%4rU)H{>8#HJ-ulK0sa=s-*M?()ep!jAtk*2{WjdA!Ym4!h)Rn-79Yx$l8xyzKBX>e*Tm5punCp z^W|Z2Sy@@RuH~l9(sFR)Ok96L5JyKxHx3wXG=_~G+cWQdGbh=PGwvp^(NAhpgHdoL zk=K+caeAXPKh#W2>uUrk>S?Zk>(|FB>e+dx)1zGLB2Q^Es)JM<0^h1GhyyKHRDpUK z^q6(s!S*U8GnxV_z70KD^xUak%?APo5!50;1!1fz%L-*3oU}Vau9~eDWY(fquT>BS zQq_M&C^;m$)6pRRPE8KAd#1)`f`raS@!I?_ zM0CqAgx&c0taNt(kb6%~PF$2?)35B)0R>aW1vT}2hI66Pk%dVX^$(E9$!VH-y|r+3 zLa~_SV!%vK!BU&&2y+M9>a>6zV|8FAg3GaxZzdE*C1UlBDEd7)cQtrPVK7xh;%aq5 zP(H^po^o8PC`WJMeC0!Uv`WU6%ntfR4F3UtZ7dbsYI&+^u%TwUQU7_3P0(^qmW-Z~ zQ;UcojZ^R;L9uWLszXNMWUb#&Q8(ra+^}`%Az8HM>NJRi?)5u)HB+@Rs@S@)K`fMK z^5A|}sdb_AB(5m0G7%Y_hcl=j=HFK?dZpSDX&l=5<*%f>^PG-J`*WzxS$F1n0LLlo zTp3r$-NAccZEcmd^E6{79txGp=s*qt7c!Po^Q4hV%h`RYe_L+HB*RqOP6_|3Rj zlR>#yt!ALKO?`(z7+s(FF?2@_A_2DWXAWWpd1Ik_xn=?p`WD1S3u6Q2-6jyk=J(Fk&hT{Jy3H8E7ke@v{)A{I<2ZA+vST*2%9geBF)?hGs9l2eb?EvKnjZV~@@8Et8!nkcg| zQ?$ASYbs`>Yp%dptC`7rovw*wDm~kmm1Z%uuQfKUtWm8pJc*2DqjT;kv;tAjO!ONFe^Dt&*$$<+bhh+dG6s_<3-4n}%Q!^_ zvyrk-+<7%kj-fkHeMJH)2&+~_Srn?@Ekcd!n$a4pKC@#Qbeo8Q_eaOad~17J^_r=V zaXXPiIHpWPAZ17cE`Loh+TJl`@nlGMQC~8%*_2=Be1-c3wq)xW0(tBG{e6w=1C}PL z;qcPzaYZzNv!)yK_kHm8ob7t^!}OIkmr++-7@Ww`V( z%9Ng*oRH{NSS(05W7IpMLAHp|dS1(&66+aML2 zTU)h8LnPe-rZ0y=4Fv)UWzDv?HtxG^;G7^kAG#~BzP>IKb^G@1GS-@wTu_a*N2`}o{h0>Mt`NB6n}D1Czuhi@Mt|gY18T7 zFl9OPw}y!!`+d#!QLT2mouG}?lDg{PY*5vQ{bi7C2f|_GQRod{7LYgWK#On9d2Hb4rC>D#{76qQFxo&b~y@2)%IUh7g zS`%urA|T_AJk(++OQ55S3M@VOM1__C{jm-YHyzuM+kh!VF+e2`yR?Wkd@QvWA^|`R zuH@4v%T&nw;X!V@2(?Q4+mv<=g?oOcACQ|Wl@ijx#%_`!NZV}Ao^7O^>BHETY%u-N z=Le&s8hfk+!&uZyo-|ImF079rE1?MVLp%>SQz+nhfLDTAok^Z#=7fp7bYaItR+?Fp z-+1EDkibkf~3G0+Q4q zOpzS7WdrDr7^=taqMo_vss6_c!54B(BeS(Z4#*&=pX-s>kcl98bhFctPYqg4vo~`ZXEvB0rW_lhvw4u3DPac_ zhgbefNY9$IOhfpNUk9k76ZdnNMk5e+!FR?!MdUP;1JHL+rqS zx)*B8v8h99eQgvdbI7v%FGr1DWr0zD-k?g4gOa0<;sc<6Hq9YbvuVH}v3g1P0NS3J zGmBh;hm6>VNxQHA6eR67&^#Tl^6dQqiK2R7a9BQgO?uGU3et+PMzW#sIcN-b*jN zw7$Of*kg|lW)Y{ef_*sT-Db0qnC3|!+hk>Bsnu-4-WqbD8_iVFd;Qf{Wm<*wT9j%P zV699pG_*nmimhlJQvDU4%`PTa*MkN@7aI`~UDi^=gF1j{LNgV|NQ2p?76GpBUA<}% zO`oi2HtD8}+1cJo*&?UB+c|B@GXaM^eVk_knLc4Ve*RrQd*j9p!KW)LE3)0k_AieX zF(#f)e(!H+=sCsXP+ZLLXN@jO#oFhDG3IsU<& z)-#u%WpR+W!{dADj}*}a78VvAg5=fJ5Q!(0^s>YzY5;o_?D8m@tMXrvBQ>=lg-gn~ zRg~#1pz=+-9@T0c{j~a;y9Nb?j0W;)+09xNM@&asuhnx9=`>U`OuP)d9euJZ%cEg@ zg3t@5J7bs=RMT9he!qtz3Le&C2Y+T4$+KsIBMX(bWnxMjG}W9sPB`TN`5;2JLuEjZ z+Pxm#R#g0DaZ3~&EHKG;#(#%p8JzZ|>UO(FM@Jp4lU9r(it<-CVm2OaY$FrE_EX4t z8QcVSydkjj$r$GWzH}I^je3O%kU<$a$;(7;Y_(detED*Pp`O_{gKpa{$f{>^;l7+)1ZUMz8zFfATfKqYJIpGGd3=KsIy* z>VGz-iADDM^o_{*DNob`nY4M33tw|ix@#f^9Gbn+r`QqNqj7%&*+az)*dN$KAux{)yx9p8Y<^#j*=!leQIF4tHJa5RwN;ic@*{UKG zgb==pK#oGA`-}yIkWHV9ljGxBvyqvQX1&+bTve7Svq0oD2NqlsxroM3oi- zt0v?ChRD?Q$-)h~$g(hOTCxiiKH*W-Ay-jHiGjZ+mh#OtVPtND*$;N&0aahsSOZ*G z9r-bbg91>1zGe(W_uc%~)*Y0mc79Ulzvl^Z+AXvYT4KQu4WSeZPugk{MGpK>iAt|zfsYN!>?$s;X7cXw^ z>>S*$ zD{D$k_pV(d((yJ3H;xBBriqs!uCK3O+Sz&WN3TqIF@k>(BxDGZis0^`d^q`e|HjR| z{p%v;$~+`0i$yv0vVvC1sfH&3F?IBmyJKpaH0C*{bSyTRj%B7aD9^y?uVcZV_N(&I zNLKErj8ciLvn*suP4nPER!MYW9vN1SS?Co&e(t#5#J`r#LF`K9Le@=F;v~YKkS?-l5Xc)2Dd}7H+=ao&UHlW2)T3e{6u}7Ec@njHqFS5DD zhiJ_^btJKfSg3+~kQ3c$x8<3wg;uZAIXpaET3V7#+tXA4C?@3aEI3=qFoZAZbh?X+ z3$qtlQ%(f4YAHBsW7*Bb>qzYPjD2!l?MT1DZ7#T)}m%a@jeYm_!a`c)ou ze0(hH5`41J$*rBn~9L=@|vwFI|{#J zy+dfC1LG#sEfiPi?#A}^_1#??1A$CrWvq=xSe!aovi#xEk%xanCQ>Y_vuScj4RD^bzFK z%JR|&-uK?JEGtiuuX(IQ3k5|skF{gcaVq1Tp%0^N&I)%iAvO{-DmJG)I-$k1L#B^2 zwXYzxEGd~xpfKhqCnthL1U1X&WVA~oNj)Sdo%A5~nPrSF)vqpu&=47^0mWkHBhXGb z(F$2vHYgCLXP~_06-aP!$bvXkutHbll(n23RCZxx(X$Bef&^vsPQuL?rFu&-?w(G< z4i3>4J>Wz`4@@^yRX+2KoAB!V4|(t{YxZ%sRe%8CgoVr-6}eZ0TBY~a^ap)4&4xH~ z3ckUq$|0$8B_`+cFcRZERg{-;s?j&kx}8qFj-(lQrySnc7K2pvwnjKtBH6X&1(L~> zr7T}b4IC8IvVz9|Y1V~Kp6951G|GLB-BG?odPm1FbQd|fGKD@X40V3qFz4flb7T(L zUcNTfiW%;e_g!9I`t@J?Co9X#_ww#z;T|QN`vqOdFjK(8FtoF?=txz-vxAP-m_fs-!zk2LoTg55L!4fPnoFxXFgEKC z3gM@(?_N_S!RA)*QpADM(`-Y^XA{@%0C zjTL7tRBEB{`+QK|uW#~nO&a8+>pcHt!ho*(GY34>Anvh1lX#JRQYs=v_fd8k?QjTe z)H4rV1Z2HlA8rgLiy@4G1|xD;XwXUf1QMIJ2}Ml3=^+H(>>?NMA!n-&_=hx4EV+M`@wH;ARmwky^>i$ z4(pqnaIuWiu>N5H9J?h_YF}xoNi@h<-5;5 zcl-A3Y~1em&PrBW$Dr9**3D0jpybgP0)} zykU{1uKc&$N&1km-gFqcg`Y>osnA^}@{Rf``#2;#KD=6{WieUiQhg@KdJ54^?=FL_ zRVx~X4_T0WM1f+0+E{(2w3rZ24OCSX$Ev_O2}PM^Ffp79Vq(ajW$D(WXFFur5!zP3 z?Thwc{Vxl4A0Nr8PjhO63{-Yy;$4CEX7LC|dnXj?`gYm_yrE32>%e|;^Y#!MbHyO@Ih;E4ob#4xwsOofD&F7N+i%nx zTRRuBD4pVT>hJUyA%;<2+QSN^=e{E6qzH!q!tTo2nmpv{l{b7`MOu-#oEU-j>0mL2 z4zTFc*9$tgYK7S%XSh~P-`sBw*n6Y9z9h7XFe7g>rF?WBMODxGxS=Cdp0V`#0ZwCi zcs@Fi4~|MfNKjg; zc^)an!Ue=)t5D&5O{QyOdz+7l&BJbY&IOZ}{3MiIExlSHQ;Uf4zm+x3akzKoN(Kuq zEl+9v==wptLjnV7M$r?Uy5yFnrN!;7&9SEYhY;>XpiEO-uhri2md6(s7RHhX8||Ge zPc+(K3Y>L#XYK#2FfgsSQZ5Cy}=p6;k39G#x9;o+FhA94DQ6UbVX&`JjP}>dm3L zWi70v1rvb3z$p5Bn< z4K^4_RwcAYSG+`VQx7wbl6`QJgO4PC`dkr4r^O=$u`iWJ8@)xitDpJlpPH0ofWZZ$ z*T=#Td-Udmyg{v|oZ?JOs?N+?n+9YOl&x=&eojd*{V3JMTW5e>Rb7;|1cIC#!4TK? z_8N_*DxAYBhm3d;-6^lFpHzpY$igCiUK!P;)phCR+8b{GYXn~m7Zq6Uu0MExq-L$IVns`tZPAZ-_`WbnpIyU;fBbPd-_e+lGJvyJtG3>^zU95zch) z`GFrHve&Vk5;_~C zk)rt7kNR*PmO-zR`65$CBxH}_TElDC!=s9hRN5`_PVfO_UUOpbP^{XMTs+>^R#h#nKGGZ#m;=iw0tYEo=&lfa^^4YHX;1_>a)C(x2;!`f5Sj`{ z4bk4l790wQsr;2k6#YVub_IH6+(GGC)x{dwuYrYqP0Xba*PYyx|7x|myo-!ZMkCjy zFP*?%A6TV|S2sw{!wM708S~Bp*(e8WZ*PC%6QB6-hd;a=!&mHlfCukT&2)fyXH+YAjL+A!Rlj9086XRO+ENsWzPSYDRHzI)}0XLcK~)IP3d*q15t zGE^yX@hmHMOaqA^iyfGUx!0$^=b1LjeFuLJf$bc*%)w9n;rN*sQapKWP|ix0^U%{bPb12gh&d11+^C zBydI|3m#wkTxMV$luOSmeWbL+OXcv+_8b)wUgk^}1qW#@`CuS2cG_)`bcHogW{Hihn$$`;_)j1T}8pRUTif(2U#MEO6grJ8ouY%D;_cARv_V#P_ z#^&~RV98C`lZ1?Ax-(P~l?SY?uT`tr?$tM>t1JpJ-iK0vww(J4#$JSTD6OTjN=CW3 zxIj-iK^%`A*(1Yk}pre z`p#>E@>P@_r=%USBkTXd36L}ndPv|L%)OjRH7ChMq4YAaMysLcI-|6%s&?U5l$8W8 zMa{YTMI+OeRqItwYq|cxP@yEqh=T~_<=}?`%K5a_=fn)Q1nEk;UfxlhkTSQIl2c~U z{DFI|NIXxy?Z>!YL)}jQJpJN%0N*8!{M5xcLFlfCyrb%O6A$T^zWgVj{p=@~?=6$Y z1DAIz>Tf?jYPk$@Iv;!VvS0wMAXYf=+g76uRf#kZHdHf^gOL?V>VREI|Ayhu;L*Ja~ z5w~BUlL&qO#rU>WV3mIwHX=7LT(L(lnO)EPqY?XJ_|^EI-1OdcHHH zFtJQ>Tnt#y2M9>kt2Y=ZULFLe>mnZ7jI|$BSdB<}FaFzRB+R4->R2?q?zvuxr-X)7OU48{H6gA8quKRSa! z*t4iZu0F=<0+vN9HGDQz8IvJo0=c1%q`~pg(ca$P$?yhWwe+Fmp(wa2L0n3Cfdj7a%t-EG(o9Rzr}fZ}ZXDzW&Y6eR{mM zc^hyAF8v$%m?{d2775BXu6!0C>>YcSYGa=K1pbC6mOO>A*7cz0p&e}o1`P= z(czJd5q=(uUOSz(w83J-b&FX%lpQbH8XcR~lNctf8uvg2`9|yrNls8kCS|OW8mFFz zuBTf_O?|9!#BWgrrQN- zd^;81jZp85xqz$56K-N93CADg7%@JZ9s)t#JFXFgJ9y=(g@3Y&qOs^J}at{K!hmUa%E*j z{<@NJcjzfHD6PcK+I%2>N^Aytt_1tjRG9}aLK6pI!j^^x$OO%)5-0c>vHEXUGej1B zp`)7a%4R}?ISlw>gxU!_*ch#`QP2#XGtL&i$IOhQN#L4>qFSBp^gywfG%{)dK2Edg0$GeaO~~v%Q0VESQ4REwlqR1{L<2r zjIFO>%{ns~vS+Av2USaT#xT`#UHF+3csWctEYjQh`g%EIu$hG)(jF_f9QsP&W^({C zF^TAMXU2A>QLlHqJsRuG1Q_ak`XKKYlsA9*bNN`^1xQyi=R@un33%wf^yNSK{O3M> zcX^wk7(JUp@xFt|f}PHI#v*}NAMfgg3p=uV+v>qE5K5Zk^#}c)MG&4kD)f8Y>T(tl zxlkZN$c4kGTZs_?nt1j0t;2NQaIpPy+ zH227> zRM(P9=IEfuQvCn`8BR$=K~(T?d1+~7bwxM~a0!8XAf7EKSgXXK1ItIEbjl!Cj*gCG zFtTtLE?iLi`cy~@YD>zhvHR1O7*;6ki*$P$Pr0$cu5cKGQ)!;&KDDd6wHarJ(IjG-lr#VldnqomF1 z$}-D)2m>0_VDuEx>L5D3Dtsc@x;lAeT3p0yR6-?Vmb#pU?G5Z#>QQ*>;Fgl!os0Co z$QG6sO=hlMxdP`&h!kpo2~mKKk}q%*6K{}b&#=In7~L8sz2oMEvO>p>SSC##DesWS zbZb-I_AbcM{RMj#H3&DiO#R>axu0EKTm8##e|J)$N-{Wi5wM$p9B3sb;7m~NaBSyy z{lo-g&q*2SOhQ0%@#Lg^^Wb0>+^F2MZ11;-|V3i!T zHC5t-cq%8Uf(*bOfvv72l)KWjpo)(UfkE?nwn4eUc^l*JP+1#@F@@x4)kX*|iLUol zJ9`>>Jv_V&jIX4`f&>Jebkz$?tM2KkMA)xG84nSNH#RopL5GL8<;XVytPvz+SOb!f zGFslW-|eXlt6CLi-&Qp?-SWAFE6uR(9ojqBH23k%y9FIvr4ljqm2T$6ic;rQR6FvY-X(4H!^grL;( zOby7!!@Q&=Su&yzu% zwzm4wkA38;U;pN${p2C4-n9?2VT=~S`N@jTJjQqZpgCdY00=VLS6+Fw-R_Le-VloP zATSA8@UI{HIgNxQ95oo?q`bw-1t*=!hH zzP^WFl3b&4m$b@~07n!j=gL4!Y{05oDy22kYAwg!aB6Gl!B)z5*KVQ#*V!HL@b;MQH?~`Y1ve;ZstjR0XHd}-oD}5E(F862%MSmbS7{}}% zxwezjuDLaQ6D1wR-E;t!}T|T3l$hnp&Uu;K0RF z5S2_Pu&1hf<;Ybxj_@ReGw0WD9u&nLnm2TC3WW5q5Riyg!Z3L#KWh-I<*xqe;*x2% zq#afMJOpg)VgN$94R0y;EMGg_?vGx3Jv*%$O;0hW!A{s<%S?IKZEmd7EC`RtrqwOoA828*o-YRK z+uUszy>2F_rQb1fsQR65=h(Y}af71M^@G0r1Z1r~{PMrhaCvgK+upr)wb5+uTzn+E z`udfDpaX;u$U$`>6P9wu&~H_M5Y&~UDClIN%Zv=M7Dgu$1G~7eP(krB;SpGH6vfU4 zE6a-)cD9RB8s4#Ru3K{+@bhUx(p$(Ief~3_e)hTVCvEBw*yyBicN>lmVoU{1P-C68 z=7hUDgwtUZbEU5`;M+&Xy^$i9W6_o`fho=K=)FUe3Gz`i>h-rj`Qu;w+^7E8|M2TS z|E`~<-L%|c4rf4jTBIhhHzAOWg3FO~saCJc*=RIdy>4%CrvR6Wk7gh&ZigI)>pHMp zWn4XlIBcpp4tUTg!LTSFmLtrDS`b-KwIyquE*^mEd?F{PfPXb(_CbS@!?`HJa(WZ1 z4TY2+kS3MR6~SZtj;l7+RTzH2o^y{n{x2U>u7FBCDS0%crn)ViOZdFX^b7C+D)*vwM7BfLm^CMkI>zx-~QyU z{a0W3@cZAhv%Qs}*hYCB$L_09@R9Vo7&v4`v@hkqBJ$28E%uBWS2M>r=ZEs6sShik zk+TkrAh!$YofOWSQQQ2d-}u~TKXs-TnMZ6mR{$QE?aVCAE?|#Yb<|Kl4|U(KRz1pq z!f@0V4iW+OGDR9nBtM93$&@zLT1AaI2&L0vCe%z}FJ~I*>#px!tJSM=@)bGqv^d9P zH3&?EF9!&)2J$)+5EIV>8Q4rMLji;MD?#YOW?1MK62^j{?^Yt`QvYW?9oRS7Fx2Yj}DLi z>brkkz^NyN?vPn@6C?l|Cl}qiAZTWKM3w7TzZ>9X%blQFXE4kb^g{?X6wZY9csNTcnm-VpXW{3s!SYKB2#bBcM7G&W8Bvk4W9iw;+eWq}ByrGDz zLOCB~;1LZ2wo}b>Cg(diX2Uv6+>w?*i8|76$t|^d4dv?PP@7CqiOQ-+$H%I?sWmF7 zt$X|EaPfFiwzmxF=FOY(lPm$=3tk|F6SP!CysxJ-M69vI2}xVJ3?69<8%pdeNxkee z>b>^yk9_bap8T=D-TUbe{=Ikq&5JL8=h^4K_v{b$uiwbB;oi#KmghNLOmSj1N{7Wb z8OTHF%D~pKLmlO(-<-oZ1nJcR6B#kX*b&YN^45okhgIl2Mt_xyKm3~)e&X#{+aM=LFsd=Z5KRWYM2^{4pP+*va|f;8oOUq6sq`-3(nUcA|b&y?(vf zkW(ytGJP)sZzpf9{IE+aA_sZz>J{?)RD~5Jx}KR>)fS~kCIfz<#fZu-S&w=H`D=_G zRC2X0#x?2~)DOrbR;seN(7JeGd*%z9ai|t*w)~7_>+Ye9)5n5~N-FhX{^*DQ5Y730y6m4nG%^Cx}|(2;lwg>EMGM7iK(kbqb{53 z(uJM>@t^+Mhd%ILp}w*MMJ{~pjjR9Hzx@xl50B*O3jkr;S&G0%Nd1>PlvO=CImxiS zyYNF6(om~6R9**p^*$7`TgRUC2x3E zjwID0U}|iHW~U+}p~s=q?$oNam6a9M19v+zGYhSjeBRyN70N9#v9y%AYBgK^eyCJN zrn3SMk5IALsKk77($)~B(gsK$5|H%D@~y6}E-fu-itKLh-~IOgEi3Dh0{thS{)vD5 z#n1lJU;9T@gjB*nV9R|D!O*^<7;PfmHKh%iHc8qclX9(3f=OdIZ?GeJ@+pmHg4|Ba zeYwx_uX6nlzVt`G@`ZV|&1tG1APDo=Y0q|M7G82=V=a~`sKO(%s^^q3yA{pB$28;B zTCG{D2a@WvLYMq2H+e00?|9zhsSEg=%Bq5a_%_rL!=Ce`c0A`I8XHFZIaC_)AiD6P z3j4LyY?A0^FhCA8dz>a+gK0;py*zVkbE6g&ebe80=&Ky>pMK`)d)q?ep$8bfK7}bu zXHsVES3dul=f3}gH{Q51w!MX80(tha8+RVybd`BnQ#xHks3d(J`fz?w-oE8g$!Ecp z<&fDln1mcDv}MuoSwtH|g7SlgzRK+@%ggOfcWJS;w6q9KK0<6V``YVQ_V#ZGnub+I zA&>g`BJyNk2jFM6; z2WygJR9(UB$_nt2a15G{15-~5_HTnvvi@tMnwn0)q}E$JalnA<7y(SnYLpkkkM`P_&?9|k% zg)fwCRSCFS$*a{qbML^X(z%cuhq{)G0^B4$`cZdhY-g&Sl+o`RUw`Jd_(7>txuczw;17B)1EP``u@spSiAg0puQrF4_rh zI|Pm&KFoi6YkT_}-}vSvPs;*kfU^?aGRklOZdO}DDECEIGlW6z8pckP?9p;b8A5to z7H!q(UT(ypA-U+e#;5-%Vf7t;1JzQ`CPSESn%RnO4*gO@d=QcyZEpom7yEDhT^7M&%7OKY;8 zZoNh~9}JAN*~8q!m|X*{wdSZoPllni6%8MtfXw@qcX2Kx#n8cq_vOUtgEMxZe?5Yx zIK5sysR|kedYVw$Av!@B=o5h4vbMS^c<12Y;M%ooLSN-&1raT;EH5rC9;yG6Dj2l< zU#H6mV@1U)7Lu<(?Q^?N0_IEIYy70_oGg4B`9MMGK$M$&*kAxp|JJ#y*7>dJDG zIVQjJ7=gIk?Ty|wv#XgfbV?I&b7Uhb5%LZ4Tw#7c_OTED{+GV&#)Tpf1UJzgf9$Vg zp?pZ}IF*rHY1HdG+gtbe;`a-N&OZk!v$V3ZlAYGw5p0BVCyr&(M=BJV31ESOrTtr< zc>KNZ{slZYY&PL5ceb{u{*}oWlubP|7cRlrK5djcJ!D6DM5EP`y(s4=>0>QUh!RxP zMIeQSGF59|S}>gyE_q^P4MC|%`5>wWMU&$*mGXK`iA5TTMAjy>fl@oqLs$+!(07XV zW=)I*6&ACm8}1PP4O%N@j%k`Q;L{DP$kNQ1k@BW2%J$Ct$mUSCm&I9=AJPdm7kmXrAYUEY?dHdpl=UFyR+J|2d)1 zqkK$iX3rupum{0)k6gZZRxCONGO)2@NlY2!X_qeUXhIl(fom0`BxwGSvB|k0N4~tY zB)e38J!!W!QB1X_5*G;8Kn=cqo9h&Tm9%7SskYL+r%%(F@;I%A=U4ebW~2iy0k>Lc zOdfog2Z%k$b0%t5bTQCC$zF2N_Cz<#fRFsojEFL`uEJ2wn<;$tcR{=~oXZK0(} zeEey&G6m*oa~Dt^u#E^yHiJk8>BWcIrz_>36xRa-CQLW9Nk+#nS7a~!>K8t9VP~fg z|2+pvDV;7XwBGub#{@MakE8DX98OHf$0wttoju!Vb~-#d3IVQ~Ur804Apl8TBtqsz znq%lu%onmChv74@xCX(8Bcobe8!Omn5Cri!`W2aE^~s{XowT1W7@$>#@qge3AR>eP zb;0o-z8gi*Fawn5<;8*tbX=HHcvug@17Ly1EStQdXIbdDF|;`e`%;&_g@OvtAq;i1 z(c~04Q0r#1B^b7FgV$bv{rLEJadBy(wQzWNbaH&Mvb3UT0W%>KKhv@(eF_SO2nHb1 zgCD2aN}m}3)<-CW!8xjE_L9!}>hkjPl7%teAR%tI)4P1(!lys}QHDpSJy!l(hP0Ph zrh>bU^25+sPTM2>6{NYfwKYU~M>n~7P=FEMK0Gop1F$)L=*(RW&%?>cV8+JYF0&`A z|JAR3fZ%>`^tdW*IZF%Ne73rq}5@#G%OP1Rd4W zC;u}GO%@Dof}7w`;;l!;5+jE<%Pp&^^ik8>Kmf0R3w6*$DaQ@J=>ZbYqGE}b-9=iP zD!Ir48E(nCZmg|>)1L&>O4`8KXR!7&ZWE$QUfZ!%0KxS zN+dWXt&sG)xVZ4zm20wrHa9n}T)p<^fAQ@}8fFjsU-Bv%+h!)DV?T6kj*d^h|HBui z+%@T^Hvwc7gy`~=PCD?3jYvSc8%RQQOiIYkm;9wm6CzyEg2?^+%B9CXRPp7%`G=o> z|9gJ1TFI|oz4H2NuN@y9H5>JJ|NPHA`L?$XVrcB>z%X9b^pLp8D1~)xADzIeYpG~9 zq?U2`yjO#5WWUp>27y$MK&9&G7>o+LUSoe%ksgiOh$?UeJ&@VE_NF6KFJ&JRR4JMW zVj-TWvw^0liV)@!7AQxnuUdQ&-KRY}Efs80FqkT<0S(M}{Tyc#k5;|}X{B|Zfn=%! z2)^<`s4VqqjYd7wGNeUQfPL7wBmnaeeA5~--gbHTJ@1X{%Zl_kM zF;|w46J7Rawx{=l4Du(QdfPkR{-p5nySuxuzyA8c!GTcX%NKY4(dR$4&}xwd>1!$y z^)!}oKJ$*ZKmO<=E{1oPSI0(WyNT=0J*JkqZg&t9B4(PS;8Z&3@;)?i@`apG>h-f! z&7)V&1Ja{CY_VVe^S}J-=YLo^?RS|Pl)s({JSe&>!6b^6X(o*qS5{XpU%r$fXpMnh zY4TVNb6&bDj8~_#w|~9e?*5;@`CH$8_W5i?^>~*6-PrUdq^yrzzGO8(4hCK4;bfq* z3YEL7R#oaH2PV@Z{GsrJYG(l*)Ew+cpDZ$J`}o+P45+lE@~ME(%zG<^(HVuSgPhg@ zv}Za38i76K4<))NqB$t&uMrHw$u9=E=)l$pE!2QN^4ri~sr5*NcQav|mSJWCZR6a* zt=l$QeYh|M%AG>jhf#U|vp@aLkALI?H*Ow$@3|lR@mK$h4yy+Hpo7%&(s+G+ZFObk z_@rGvu{qK&q%A9tc;o8st8ZK-p`fhE4m$HrSRN1|G5V+`sl7QEQ^edW0hD| zRq}<;lk}#_4C6APlbOCFbOyo&R&pUTfs2*X1kp^`Sj`(Ogj2_(+r=Q^lppl9h_($; zMoM=%d+MMi!m-O37KkeaSakzRUu8{09ca;PR)l=_`UTTQR^UOYZ(pyeV<(DgcQV^* zHspxixOrn^eM7Dv9Uq|xMDwKGZnhe5=Ta9?^>%J8S!Ra@lc8xN6a6hMD?iI-xP0lt zCqMdu-QE3bd;4;DMao^@SbOZzOO0AxcD~>qhXk}vjal-a|MFM9_R=e_Zf>UQjqeU@Jce3sve!$mW8(H6 zdUR?(KS-Z~12rYt($eU??C}5n-QSblC47m2geIHe@bJ)p!J?{Z9zC<{sD)XW;h%zC3P`_aszKC*}NtO7J%ZM8Dg6LZySlIwI! z5W~yC*8tvL&%qsphH#O(jt^a<*=RLEiBH;U^s3io+bZ2@w;3ZRZ}I;F#u-_cX)wS( P00000NkvXXu0mjfwTWcl literal 0 HcmV?d00001 diff --git a/docs/source/_static/imgs/example_shimmy.png b/docs/source/_static/imgs/example_shimmy.png new file mode 100644 index 0000000000000000000000000000000000000000..9500d48be8ae93f25a02c55d45c7e5c2eb9edff2 GIT binary patch literal 162134 zcmZU4byO5u`!(Gu(%lNuAPv$WDV;-imvqC>CEZfeB_Z97bl1?`F(BzT-rrU4yS}v! zi&?`zoZ8Rc``IU4NkIx7g$M--3JP6DT3i_l3SkNg3i=oc0rHI^Bd-PI7qWx2mNOI- z>Wg1L&{M0vk5Et)P%`2os_xLoZI%u?8p(Se(oidI5qUR0-%NCpTZbRA(>|JgEG%O$ zE2S@GEVVYu+mrP>49gC?dy9QlLS<%T^@Z_E^B0C%hOEfxgVkiF#pLqUC5`t~eG)6T zq%=^0nO7ZHG$`GdPy#mFKbKx2ueY^4p8DW--gP)DWLL*QA^&&K$gh{UJzwcgC2hZh zXD$+ZTctxA0R1o&DKgH zDbc?z8E1X!J<|Oq(|VjX%;!^)D1qyX&2+eHO?A4-)AMc5)~IUY6L76fEPXzKGBc+A#F=$L7d;* zEWI$v%#G=t>KKUWc~AnpvtnHtKDS>ylJPnmx7L-$y)BCzi;#X%xby%BBv8>@e-Xw& zc7_(x{II`p(V2aO7UbbD4LY5K(@@<-%DHTnZ4LKDcB+H6ZZ0EJ)i>%N6X&t^= z={LthJpYD6)JMv#1I6+i6MAD~u~I}Nipi8<#gRloU8B-`jN-~Pne5)x=j#{eqd#^Y zXLY`O;J=OGjP5Qs3$yUM;`U4ULbr(a`!u}cgc*n-O;9OP2(N9Q!G6FouS(u?WZWVZ;|}Wp zXna#Z#qE98RKuUv1)qFN_*7Si_w8ATP|QP@vX_p@ZlNPxg|#Yng9|2JmP$Ip-Q?FfWI+!zI~QO6xk_kx zvfl{EX-D#N8f>R1~tg|9E_?rXvB6 zB^|yD3cUJcsW@dI{FMa1Me5g7e~maXxXhnNsJMg&mA+(!VaeE+X|{Fc1)4V}AluVN zxobwlfDu~!s(j z`g|5Gj7nD2K05)}9^MicZqp$r|M9IfZqBHjM3<6)#7@Hf!`<;dw(~W3bz(|L5cY zh40oc<`Y(VQMdH;*BwQB+Pf1f+#ED@8NPtgV+K3VQ@_5S6wMkic+SBZJ+v;{R$TK| z*XH*se46gLce?TJ*J>(P@V&q1aH_A7x3eCTLGlYfExQ`X8eChf_Qg5i9;f_2=z)GQ z2!OdAkQxN}el`F11pRxCX5o}A8*61Vkxz{Cz(jX%B=Dv_Yw={g(908p9`6o>NnpV+ zBI*|NKN|NyfDpXXd>?# zn=Z@0+xBZ(2HemnmLG9Ueg0>T1{?vOm5ik7yOa$2uW01)2Hc}guu!9N0q6@QFENe{ zki|ZlN`yadR^W5`Rp~!JoOLMKCw(w>_?GWm=1Or*s!1tlnEqwPYX0jfPq~CUZecBt zDjee*22LvJR*!5{6x~f#=z*gwnG%OjRU1qxEEVzG*=yAE=}QF?Vm9|r&^2!oaqojC zfam)`=7*vz{fwtESA=i_kB{GjBY>>4+^kf73x48@%Q-phJh9>JYb&Rx3BtY}a?a+S z(FU0of?dAn+`r)x3fTlF;MzB6@JoCx%m2)>It;0^SS#-?_oJXmL?h=rgmflS1DfmZ0Q=3jmxd3%ym}hiw3^y4i8|b3D>@fc1 z$kbB4KC{_H3ECTf_`v4NOfUOrN#BnLHsjU5zMo zI$q(zNoH5GMz;RwvciN0d-Z5~9z=Q_DfEc?w|y;MVNADnS` zvsvP|w1;yWx2N#@jT)x*vB2W%}`|bD2c3N)sZ28}q_YQ{YjF-WRU?GS(?V=J%0%NOwI*rBaqupB%e?BjSuK?+wE_a)5 zCUP>j?&s2x|X zuRdE=yv|J@$SGq$-PrP9wZvh);u`qvNe=7t4Pu=;O=Cm;QUE*($k9c0Q0Ari3sex| zfJ6(`Wvbl=AU%ARoxdxQ%kLH#xT^dfV|0Mh5zpzxs+aSc_pnIi_j@Zp{m|pX# zA9)^5pqJ0<-o-a`v|5Gt+o0o)N-TY%MfaI0E*Pu3F29HzQD{BDl3`ugk{;?z>R5Qv zDZ%i`V7&X$$oA`hnVlx}JIYZadNPT>+XgwOyz)>$mF-Gi$LD6YJW>Clw`T!qXLY99>$}hiZXpWH_X%vr3e{dgW#Sh0pn}t#1mq5Ye0(@giuo2tsaSm zuz$Dpu}v!1X_IUALM*H=n?J6NnPi$fb_+uCSGOV`RSqtjW!dSuzY{ zqq=Ts2T7Aug#l|eG6}QRTmV&?Ba?F+_kmm~(m6)6S*3Sq-fy&0zmm^M%S)uSUhURi zF14>JtE#?ozSp%7ksizU{$rkl^Or6JdY6_>Bqg6kdFm z(pFF)=Kp|(bCEjnsIh7|*6E`xHGlty2)gkz;XK_Rb5LG`yk-Wlsp{r12ql6h-8len^J4oE92`{D+F9n7)nGOM9 zhU66a%;$EThK>2(%GjzkWwtu#ApoU%fGAPl6g6|sfu1H02nPq`$&B1O(eDWC1uw>` z0ePDa+l0`TJ5$=|K}$AHU41%=Z^_Rl;=YodMP;_S>j9AkaE=Pw@ zr)K6Vbn%9LW*0BCCR@q(!L>S7IZ5DpCkWv--+y_|_Gbw&a6ZQ+_aqEdCj6hyc=s0j zyzJt0tDAJg<3<6`m)T`n5m@vwVo6uu;}?NNPVt)%I3uh<3}Zlb0&D&pOUWnl zw(A1qr}%%+smp%Ao+=1h! zxKlHQOzT~~XRITBl$ll^L;@}%GHyU;hB}{fq*#BF`GE34-z3IF7jG9ZJWP%x4^D~I zDeVW9WB5k0hRy>PrV*w3x3_}m<$X6Rg<|<1ug+|x@6b;m1sE|5%_IMJ6e^=22Hb8)jYTL5-vo}`*{fkRv z<4)4)%}_@sV#!{ixOb;7{0W_;qs!)J9QZzWPzF=_DG0!)P`8pcSe|CNG9K+rfz3vv zXUG+CQSUYDC9K`+2rX?+*aJs@^r~vY-UI^ULW0DeJKFfUM|zg$MmDB3w$*hs@NU@o zTY{@gLlyPrbVEi1e5kuJj6|hE$KkRB#V)%7aPGZ2!}!9(?u+NEK!^2QxRQw(shP>+ z7(Ofj+)*V8o3qt=EwaUi5A8yZBFt+Rb9?U##Cu31_zcppigyQf9}!zTw*#MEDN&J| zW3c@c7oyV~$-B3O#Xt{Z5Yh`B7}HHUV_DTz{lTO8FYs!j!M!SA0&49N{43EYvfvDc zke_L$DtWo%aXc`PO;X^s%y?>~q)MLQ>m6s6)(WlSLAto3}G znJ_IiE(Qd>wUJPaE#OKmIcf>TZx3Yvg(-*>m15sOlL%`KaDAMngK4BRt_O1ZSlxcK zRH|Y?v7GA?wcvBi?K&nr*BsV>26?7UUj-_d-ReiCCRDbIptCKrAn_v^3Qw|gb2Ao( zNPKl&WjOc{%Uc&Lh%rwsFq>?RG{AJEVP!FlfI~@C6&oc#d@=?g%yFS#vH^~__^tK3 z*Q~+cT%)-~j}pUZ`ZfA8b2l6Y(!Bg#d;&rw{Y95VGd{$G{;g5c6c5P7@#sL)FxK-Q z5#yS+KJLzkO>BasNb%`m=ONBY13O1d>_Vr>AGrdFVeRI$?P2>(kAjbRoz&4EoH;h& z;k>k^C72GrUV~VQ3yf%TT}7ezAQgf0+pV-0zThjdyIxNcq&s|F@~*X-F_hLAZ<7gk z7}4>ky?u51yRpyyc=~Nu*4^2Q!U9ws_vfGG4xV>2E>_!Q49v_VW3t+)H0tSqCx(=a7QYJFa|(tO;(Cwe4`qfBu07bHj0r zt}|CL(`xEv`c&$KLv^+!s zom3UW)kdsLANuR37Uo_X6EW1tUPV(Dp`f+CNuz`^7CwH$Ld9V!&hOm$l%+FAq$sv6 zN35*aQ@sOXM}BZ>5C~`MdVtsMuGoX(25&~3dp1N-s|!D-!#RKt`KT?pIaiZqiC$oB zKmZ?y^++rpj2<-@0@RsW6)r$ChdrW0{g9qP=;9tf*8q+$`YDYRAoxV>biUwWd2}54 z2JK6SHEMI;-Pswc=VuaiG{LH9A0mOhhJ|!x$B&E1%ZqvX zohz}T9ev?PZG*v3Zp2{ZCxJcEOB= zvOP-@FQ=yXdxN!mwBInjPM*qxAVeV^n-jNFAVCB1`Uzvo zG6GCuuI^A&^w7JY4-a{sd9Gmev{pAa!3L^KoF&P(g1}doah~&S=e<3bI~&tZcV*GebR)F;PbQ$ zvE?`6SpIG&na>7u-Fo!lGnRY2nk{_{7#+_mfKf#)x-%a~)n>KpF>(D#eD~K|>ET4| z+-OipNgzU9%7xf&lDOR>(Y2$g6@iSu3lLC;1z|3D-)(yBV0@nf`L}M(w`Ip+jVD<| zeq7?5;eSRv+`##BC{WX(#G!))9`-wtao90~MrX${Su>g8*wEkZM|^#eU|rnU$a;Jq z)I|)i9yzj(zEz7IGi-#p*zpV!xx#r#)B|)N)(`d5A?`fH6Y5x| zdikaPlYHjuJgp3lzak;-0dh2Uo=KRSvEfc>-h4WYxa3D5Dbwdw4nom45fG{Z!e0LB5DmZ;=x zct74*z55VI;^Jp-21clCUP8IM!^}xZz5#gLPTSWXJj5d3^i_dJ4Fv8l`&~zNSU|p7 z?MktgEU}qMcYsj!9fb3)bQ!mRMwf~j`FljE0(*z+_Feg$>oi6pn40`DeaU{hD0~en z&)&z24g2%5%2!ElAa4M0pnqpIwj+u1tdZwM$zwY}$`7hPpgOyYk?Q`!n~MYP`!S89 zyg{_QN9$#XCwy5)euSkRH6R3Vr7jhE?@%+~6>_gSs8efH@;IRXs06<`=CD1J`saz!){O_7umH`^mB0YT`Z06 z<|>)n9vanpoGGdhI{eN1daHjQu((3FuN$>6Xmx#uceN1px<%?%anNw_D1*;lhsjcv zr89ppD$U{aq@E$?Vy>+lmX}TO|bCpzI0f2hadcjMAd#s7p#lHK6bO_tB0c~ z_*EX#dw4B14w%L2@VRi~1#oda9=Wx|`KakfW6MHt0>ZN-1~;nbzLhlhs0OD=y|G))PwP_&Dd{-G*w5GEz5`J52P$KUP6C04Hg(E4EG z?gXEefVhDRIumPD{#t}uh=@B$gl83akY!ESKme>#%Si<&TbFMKkMl!~QnfHj(cieJFKM9lZn5AYG zFRkX>fko;%bwwQlZeQ&t($>PQ9xLUKX6TX|8k$&@9Z1k)N3WeFe)$dsKgI^19Nx-dQ(w(5+)%wkC7<}~$9=Lxo zB23<2RNF>UfC?44^G50O+oqSyBE4qeGBCK)eH{yJe^E+%?r z&Q#)pRtBNb!@ZJ1+0vcpy_@&4Qr}2s<05@=7xQqpaRce6+XeY|w(k&{%0co&iCiNo z!_%?0DlGBLyk7M(O>mdaBC2T)ilG%mRcadQgP5vFfsS`&)O*(puc;AY?72#5>H)f7`l zRj*Z-r$%2hPb4EvuoJWxC zr-6_I4!!DZ$DZyU9qz%cSetkpKFkI3`UFe>G4FLf&meh6Y*vd|@W@Ri^w3Byn~A4w zwa>pxuPOISNPSt{)%Y15zDpcQ#*9ui_v!Gx$K8RHyt`yDvYZPM#dcB(?g+$~WwXnX@^9#2%qoSQip zvk6*Ux4EfWJqLc1Q*$lBKEv{5epR}-NF?B?&3Q~KUbnu`tjY>!-I`ok1B^koE^4e+ zdTSl_Z8Na@owT4H{ukq1#897dN~A1i^bsvf0VK-cAG0l!OYnFNqCzEb)nwj>wkX)> zc`;)1NOR#kfzGN(rPFPBm0yN~&C#67#(aur-vL* z)hZ4*R$|l(w~a~quaZMV>Wm{0hU)Hs*pXFlQ3Ll6s~^9Sixs|C<=1{K!R zH#7Ih0l?l@1yv>+xwF>RJ|6Y(Er)cD#QyeDhE~OGze$oNl^)o+1s@pxY~TCIWS6~; zsVtUltqr#4vJ4X=9V-iog($Xo?{-|q$2RTp08+}ka@p?I*BujLM*>7@Xyl>hwT);M z;pIC#2>^-t@bd-s2CtN&0r^-9kXG|+`}XR2_j+HS$3QK=8WRU+AB2zs&3Vtvtakg% zxAUat8mA5@=a5E=%$3&1;)P<5VBr`8sv9lw=MlP3JslO}W3Pz;WfzOaL zR_G%^En?_^+GC?tFpoPot@gxjqvt2RR^O2@O*(oOKHU6ufmon%%aZ#t39{q(ryrj} zR5FSVaRnv>4lDLsx8PGM?E;t6lVnZ%rnI@{z7dNsrrkxc*<$Mh!@R_L6pS#UKVy3Z zcvwdK&H^B>RG@}Zz1i#CP6RT82Qxj}w+ZvTNs?A7M%#tg`$9ZI_7V&0J&G*D(#}C$ zrUt<{Ex-WJdZ%kfkPxjX{FBrV3HNxy*tg{;>!3Uze|k&%pJ8ax@~QJUmd2Dhu)c#T z^Z9_Uno@)E_$gBB0z3ir1V|xi7Nl3Dk(cxzE6?Gr$MUoNxHFau)x^B}Q7B2;@q}c5 z+s4%kNJ`Q=HXLIY5WDh^O}kAHFo@7aLM;9LwA(PalU(9w5qx1Mf|)agrC z&$-Yz2|prHAv4k&EbwVE&n{AT=iq*DYsb&ji{i}m8?%5th3uEjnaZk~2Cc^ej@vn6 zfEsr6fISU1bZbKeiNw#(E-8vIe(RpE8JQDd+UDpuHK@AZB2@0eF+^@9IvgQ;;1P56 z?%R&G03S^*kfI$qv%1B$G(RKX8YwhwTNA9JQ0GO*C`NDx@QNv-qGT!;TB96cQ95vZ zaPV=v!5BnDj3LIxC9`VcK1-t*_#GZFaKs7wsk6Dc+c){I@A-SFOyC1t2;z(}ue0Z4 zfJUkxSmsfqhJ9eu>EHhhDn@eTAT^alzhXLR>#xNE0HKo?*#<76*0?#SVbje~fp<2V znEO4DM1U!zj5{ef+w0jhngVgG0UL!X8Y+~+9w&r^M1(0%Ygy{?u1oIL{XNvznih_I z=_==oZTzMu5rvn18`FolGtM3Egr3SM`B>WWW1Q&Y#I~akuJ&3*hMayEbFxl{MnNP3A3>S729YIx2%?~C z(v3vtY)_C)z`uZo2lI>T8sgz^5XdQnXs$iKap(&5_4Y9O)BgMw!3g+){9VTJL$0bu za=1=gZ2znZtVjP;fneX-DuMaB3R~2ctTiK7-It#2yUSA6#26Up(%cexy9SrRsORX3 zPfeQVxU{cWO~iiI;4u!%nk4N8H&^B!uOnDW@N~Krh~l;;TNlf_D^Nw13Yse1ZOc@6 za?i4qDA&9d-dY>z4DvY68@20{t<9_JJx1j-l(ZNi)TD>R2V}EO3H92<*z-2nsBuYj z%CqB53nk3tO)tH8c2|a7u`J(sXa5FngA44>aScCq1@@$(Fk7d|Vu4VT+iOFqudiDS zJ{aak>!Uc6F(Ll-vI^l09D+68WqYP9m`{vNb$w~l!HSh5d-X(WKb8&Yzm(`}(F%KA zd01K!( zcN(4y@Jm@9c_+UO7Oepk5bER zu%p$FqBIT|7cojIcXSq?m9DsFTCwm*iFGL@<^Tqz&YQOj*An-U}_KT`6> ziYsH3h4@We498I;Orz#pW!*%PTrjns1fr&!^w}_7s4zqVa3{=IB60Z)n(of;;>2&B zZw{Up7I9K*@8(NXLT;gPby>JLM<&iKW}w#vwhc%+5{Q{5Z);${iQXp52(M=ayv(_D z4~kK|UM}S4?M42v1l~gY12b50^>_fx56_cH_g&+J1V=(%QLk_VwyK~dM7i)Bs-=W@|=1P<(5gwDb7ybZRq!Y_puHm zH9Gz`^u5uB`O(V$+3h~3$wlQJJmyD0yD`~igb95+lbG4!q|(&<0)-SK=%jNZ)wi;) zNO)0c1g>?`hShzU4KFx}c*?co=Q-iQDY>dxE4V(Echbrf;Bjmq8BqonnDg=|$nYnQ0Mo}`>bS5H0rs+7X* zeBWcW%7-DZggYIuZ!uEH{Z`;=0Yj^{RRNNn$t7L&Q7_$M`?IMu2H{X4oU*CzSbs|& zqy~!9V+{=wJiW25!mzN*7d{p?LW?W0qSxB`QXdZ&6S%9Yxj#jmpUGa>WN)rY@I4WO z>*Qi6MQSW9M#No)1~U;CyQ27WXB6QU*dpU#n{<3-U6TV}9Rb(A51AyEct39WC{~{= zTDJTR14G>KS7k`QAfwe4nKSG9^|@yGgb>A>ohvUlOZPsurlkc*N+!#%Y-Ds3u21F3+ohDCCYXr3#FJpaI|<;R#!}7#Y9Y zEJYAQQ~ zE}|SPPqx8ZEsvpG+zNy#N^(Vs9ko5u7*Yf*)9aJ+Qqzxd(=2WAFseobrG`$#Lwa87xe5j520Sa4-Z8&H-XHD- zox#(xPPhEI`2S3sB0!fyNk@;;l}n9C)Lt4W#d#J*l&Ye(&IYF#y1AK`2`2ULBJ;%* zsS2|w$4YK3&C?y0jhqdlN}Z%%BD@8N@kgr31r^D7SccnZf74wLpIQNtsvJXAJ$i>G zCci`x|1S4(8nONq9}joAVUi65L@&TuhC?=f3{gc*Gaw&e!w-l z&}l8Q$UaR7&kN%+T3+7ER8@jCXV<@VtIDdx8mVdj!gB}GlU$_U(BtzX@2QWDi*pxR z4D{S!nqL8#mVV72GkNH}`a_uU@B+SjmuYIE@e=*dtOq?{SUv5uI+T6SyA!_tY?*fY zu`R*n@MZT1#_5n`j^r(VCFs-hMtmkNe&(SXg4k8mZdXL|Hw=#%<~5smNCwL|uAlcJ z*&RO6{b+n{YlBu*Pm9jJ<1w+$6)Y<&6+0CAvZ4b^v~sPP$?s=btckeuE7L&Idw*}G zj=LYkwL$`boS~h>@OE+I`UQPP^;{fK4VJEst8KJ=Bd*M$9mOCmNVCkChzK0ljxXoY zdY^C26Oh43vJuQKp{uP-gHbVFAxi3$x%lDL=S7it*FQ`)m^v=qzJEh|)A{eM|EF?m z$wIY5L!?uGsz3#B^e!9rqZ<*zQWpdleYo;~Gv3s!3#*c)N3c2djLE|>qt-iSvY~h3 zGhf)G%L05&Y(xi2x8yqsNJ~ybo&{*oZ?Ee(SFAL~D<`W#7VAeEVglV)ltnnZW}Qm7(C26{mM&t=SWwDreZP7@)K>PTG+EVX z%V4LGhhDTm+1(S7Yphrdr`}vcqB2z94kN8=)BcA3LWXP&`kXfM{~|D*k}%CxyJZ`!`fc*Na)a4$hjVF?|tF^xWiGd zPg8)MGr&hkMw&9-E@Jk#)m$oP=CUeUE`B2 zFu%-|xd6aY+V&ipvL9r(UuBW3jo2z5D8mPX|0tI?R*=et2fp6hU5^ik8CsM@L8T^Q zD33zZue|8XR8Gg$76AUb|3)~2Dm{bcM4AG)w3{rKL%Mn~_y@xAX{FWTuJ@3E^S@s5}(-m%mZ2HiJ zXY@?mOHXzCQ%?I$RJmq@`14)c)70D|zRF;)6nfRrQ{NGH;zK4tN4{e9vg{U9dTpZm z?v%t+jw-fBF~)MN9-Z5E-oIl({liz_ILLG@d&q}F{@Ya^<eBEiK#q*Hd=7nWNJmpXd zOB9GdPwvZVJ_Gb2 z#+dc+M$`<+SPJxi+MfY5$i>jp`s;}o^{!UEa;?mn5fd>;m_wp4O%j~|YO_4!&`=Dx zkO>lz^Ft9$0r%~m_ooiqL&W1J@m!E-2D#runx_W1f7T@2Gv@gias0@$QTHy1bw2ug#~SrdRN< z$k2T6vlwUjm!jI|=hf8)f1aNKX1&AnFMmGX8>ijSWal`(AAkA*abxf?c`xxaG)HH? zt1G?hVRtEx10JEDvGo)42GoZa?l_eYzHf=lU{Rhvg5&E~`Ol&tKngz_)Wyy*je$_> z5u*G5<#z*)sr((HFfav&zFMm#2ic+(U|OZDqFp_ z*SPtg4-^&ThafS1`C{R-)l9Q+y{$$OCsi?oO;1j4G!yRA%mdO!(RW@JSA2^#9?_Yz zx5>o!AndbxUPhl#WF?14%*NZ??MBEJAn1Xc(3YBItrhXB^wjB|6Hq!rmLm zyuQ!rE2qc!vAghmA8#Tud2;N7tKA1eT1EAJ-Im;qyKuq)S{E<2FFs=XFRsK(@J_}J zRMgdJqUbRbW~9n7>*ngky_wmX5h%)-uZZ*U`Z%1_k7s$BDqj)@fzs?xjULFe>&MGn-Jpv6lAzzqw6N*PFwOL^(pub+ABni zfQd*WK@IopAKCyu zdJ=`*o9w>BOd`uuCh@(jSFBv)s*G2!iMw=7M(Bg&rV3e$FPR|}zp8~lmZ_51&{zNr zOTHQUQv`XUyVxx)r?^DI(Sx+;*4TthUth>q+a98XoMSWR7NF*J-*zll)C|hWs?RQ> z-oUbC_*}j5wDyHnWo;R^j`6O1k3i4S@g+F9C4W4@dq{;1y$OAQZUvg9-W{x?CpNeS z6zc9_a|lA@Zfc3UL;O8Qe4d3zY(@7pti`ILR*?)+tKvBAH~F&wDm zeH9`&#m!?SfVsz6RUt&`)&OT^am*cd2_X zgliwL&diZ#?#@5F^@k-p{jxYRvxPqX@q{36h$xDXiu;_bmhyRr zJ>j#VS<%FFg913zX!Xx-)(SjgA^CgSy(%s%i^}XiO1uzg6ed{UQd>)dw53c5|VxyA!Rup&O=V@zLdEbB>xz0`U-Fha?aU1KNDU~nfPlDVZ^Q=4%uQ-CE) z5H|87rtHVSFGXz(=Wi{azkgW0YDEsW*cbB!w99!2I7!}4fC4@^8%tC9^gJpw3-+s^ z&&yEFql{gYaqpZL@50S~k|qtm7M$WGNXW|GjbGZh`ok~}Xx~vXwY4rTk;0U4;1-fQ zqMQH@$kB9YAPJKdxN2Tw6`fba8H6L7*~&3OZE&hY)idjLL@XUbj-W6LGBARFx*>SQ z0QSNJr@LkZVtYBqo+Wn0e@Li$jb0KPs5e@oCEXhN` ztrq1j^I*8V3txs6%k{m#%01I6c!jM`@2xl=99F}kbQm8GLthyI$J|QB~+l> z=~siBzQ|D4ktr_bg!4s7bb*;>jmjQ`oz5IBv;W=4lWhRW` z^)5Z9*A;oZ1Y&n48Jky1yRCh$xJs(5PcgIkkdZMx?gKsg`fhSrOJhc1Zz8hs!x4075k0k4w zt{Rn{=Vs>BONu&E(e_E7k>Je>dyQdE4#{V*fKQ+Nq{<`rRvdhgNfjH7U-;Q6FhI=c z(MwN4)!Z_v2QtowejWN^jndo6%Vr{oOV901`Etd=jyo4Ui|BAH7*so2PGXNa?I$ot z)=06Z8txgg!+*|mu3rG^O}8)0V?LCU#X5n!=<7{7i)#`%kzc;Vglkr^uZ1ZvN=diimde4NL) z-qKvJW@-3lzJ{*)8zmxrH{kCZgEwz}X{kAHvnLC04vr}f);RD})VH7?ol*RQLH#xE z<9ipsP$5nR#@Pyf_-b56YvA*?n7Ga>b!-AG*Ei2FMbCg&LBrec4VP<>)~}$@EOX&= zFwc3rwnI=nNcYP1KCt-R_uy~$-dSI!dF~VBSB_fao#=o+qMrEe8%H7b+qh4L5Zmux zTnMRW$%=iL8`gh1b{*Yu7W2J2pyb*e(2-qt-yYSf%~SBb$WPgMh@3b5>9#gf=QbDI zsFIV>%P@LT%W94^bW?_jnR@4ZG>V`Z8vov){i5D+ELgK{%XZW4ppk@{A(=a{ls<8Qg^5jflwkI=R4G4kPADtTdbW&vWL^7Rmvh$( zKD*!oue-F>ubx_2zk0ZXg9`=Qqx?edfM&m~OT`d+rbDJzN+KEqNGux$rvi*yK?FLV zMOzgYH~E=Sk>PBhot+>#r*u0vY^2{swi5>Ou){V$-l73>4}^bVbN2e-pVx?yQU2Nq zW@-*bpO+-wo{>~mzyaJZv3Y$qn&sr=OUOu-e6ikFekqm9_A*LAi;X@y;#M7;WDRr! z$gi3B(8J7`N&_^xo%uWP(92VlpuTIjSPCIjH$m2$UJ#t+rwK z!I$c%-w_o{Ko-VlRnX?=zJB$+`+P81_d~nS@M2hEE@I(2TPsNEt0Tr_6{Np$a6;an z!zT7aMAyS?978VygV%SrG^%A)W>o-qN0Q-?QT=J%coufrWe(`~$N+VVfku3RnJZ*F zS8ltXE2zdZ``|LLTYtHK6aDLTQ&TRP-jzh%gt~*Rm|U}Gt$ed5!ddVG+Ocu&z?Va| zN=03MkHgm~G2S7}xZT+vHlYzG;LDx(>8X(F_z&Vc>y6kzxA|l+@C?}$l|{ImHkpLd zK<_rz>s@oOJi!j{>el}c(EsCw1)yQO@79DCeCR7gzMatJxd7oW9&Id_36*Nzk2N7IG-!-H_7>qApPX7}KzK49n7V_A-6G{cT= z*{lLjjs$M}NR3+_dia&Sq@sis6oCYLlzmfht&Myl>tL_dT(GpF$|}G|<{5SpTZ5s_ zRF!~UmQrG#o-Rp6-CSj>i(}B@al(N6Xn!EYRWv{C6M8Pv@S>V$V8~@{g``>u_ui!Q zT=!pu9(06&gp)VS;_ooZKOMkhigR#{bAKf4M?UxTt{8-%qx~Pj-j}1uTqb+r14mw+ zJm}QN+cS08<5p#i+b1bG>iIU=t+#h>>^z)i&oOVi?@kIuX#xX?Z9932eW|8a`DxDl zd^JPbWk!H|7EhSBOXa|fEEh#$Y^Fsvkwm#K%pAU^{=U7(kz^X0`=91>R;hPP3ok+V zp{CN#YdcP9>LNSKTljLI9Apl9hE54K$(~wd+SP2cG^3~^d{nOGe$??6#(A9rKLhcl zW>45`b?jaAqx(tHjO{FP!79;evC0*j_%*+enk#&32bXoCe%z3E@jo}RxPN7Yx7Xuq zCzI2drqR#V@{6hlwQ$1_5e8f^@~ch^@_oVkt#KS*@qT9k3SgE_DupcP$+!CO0&LGo z{u8=}81A+(3RmdD-v&BCf3?>yirtS4A&`0xar7fcR;H#t(RF1SQnA4CjGG@b`%6am zh#wGObS&5;-Emh9J5_j^aR zQ(;attBuT{R!H}xxDNm5O$1grjaYIC6g4lrmZQ=zPNTr7t#7gt??5B_|$P|G&#oKpU|!gQE_l%Z9@Ki z1d1ud%-#f#4Mrz_Ob}z%0S4DcDutX6_O^=5#HxPw5#1&yBjYx@D2~|XNPl4b;y_Z; z4VOKT6zFa26GY9P`yj{X^1A$F4)RvKt-e_meHkQjFhibbVRNQFCj8gP@P~6s_n+oZ zTXvJ)<@EU<+HTN=o!-V&2aX3F2U=59p!z0B15VVn5niBfvR>^wDPSV$H>A; z>3N->Zro=WoH|Hn8tkjvT=4$+(7cM6<&(aji;{Q3;gD&Nzi}`pu4ML7n_f>(MS|b` z)7})T?{e^VkM#jd32q?;(L0AADK!UgtiHLb@|2%oXTntE$HJbs^$uH7q*yfV ze4Yxrhl|GD-avlVn7u$r&+;m{B#KzglOB$}BuM_vM?UGXr%Lz(ro3KcNmhfV-uruS z*rRKfYEsH@PU~GJ1)5il#0GhB}RPkTn^P~$Ou+eF}(wN~O0Z3b2epgkP{8F&v-z0JMe?F&>Tad}#CfcnqI_kI-MF*);EQ%rFVg2zU)nmpQ=6kuW@zwaw-?vRoGM!Dk7u-&@d&gYA z3)nyX!3FvCmhn%e*xjhzK0dck5(lIN$uPKc=Oj@{>L_SV##?rbq3Om3x+ElINoNZB zpP-*?TPMr8aBI{G5n#fCz9*G`b=x{y5@?cFE#g; z0DaCw=7;DJ6{Ty1%5aP|;2b4==7C6EI~V-4~LXiCnQmSAv%xKa~S$v6=2; zv#2nG3#NywLS5DvHO_n30ON78*ZYIQ9q>jDd{k*w9F}N@A>}(blssMU^9_@WmCK zo5i4^rBB%HHU0HHsL8d>+iNqhuFkEf-skNB3!^g!G!8~s}5%W8U1Hn2&-inXpND&Cs+Esq=2)^cP^%OFSe11c9( zx-5Ny2*E=02v(DVwnfxSrarn|7(~2eob#v;mU_X8N&t>8%3<$e=rNeX?G~k%;1f?~ z4C~`McTt^O$%{){s`} z0AC8+jF^wkXyow{i2`4vTxr3r?xm&Ig56kvAxn{)tg2W8Z?$6y;JI%V8+qoGcH1T| z`F#ir7;DnP1|aB=83D;>j#aPE=CV8bT0H=Qe1@*mKM1*k z)Oao5LFP2=QVL?FD^`-xE-trp-+?!r@bd$-X2BtPL(0R1dqaDnPhpTcKrO!o91G>H zB=aoRpno1IjRw7XgNZ&ht$ehIiV4!)9wy&1v8|Y#U*V=}&n8<93cTFd@2f>B2CF!s z4-wvcm@>U}6$1t|)(O@-nbXVqLssv9mp8eaQ)qmI+SYvq2-;gj95$v^kkQ%*r-j(O zzY*FRe!LEXGLjU@U*MWWNe;VbwW{RIoMrFG+~|S(Vd0xPiqiA@wf` zwt%8MyHWU;Z4IACr+8|AH5l^oCRE_Y$4##TQ{K6^4{wEx2Lsu9>X&Sa$^-?LiL`FuDcEQ^y}V2DL{~>P zn9wR?;{0T(8AJgJyG-fgC27E(MFe|&_4ezri4`*FXYQZtcH_6Q7{xMIhRDQ**VZg` zNk<sZWxI8=n1-yCEFEaZKBL0=Db%0kKLo!{H9-)tB+)Y} zbxiJtNYoBoG=ARPXJ%iImraT%PTw}xuSRU*?cN!!>&+riPQEZm7FZSSMQyn42O5;BS6DCm8l{ii<~q`V$bF~5 zgw?co#A|lnJ0mECsr7O5wX*^laWuk=MA*aiM7+4WQSsKJ3y1JlM|0yZg^_?(S}OV| zQ;$|rf-XHDfSRh}*BkJqr*=?1^uOw^a?W4sNW9A-#XmJh&^gXd5X2NDr(kM)OA^E%<4R<9jqw%Q_q6hqs0 z_eC2st2%_jmjMKEbDK@Gz;wjS=>eT+4Vig+jAoX_rpNHP>0AO+a;023@ZQECXH?#{ zwaxhO72Tv{7(nmHfgAh`_Dz-rW&H@@e2ZfG5>RfoQw(8q?#s&S*z7dT!Ir`bKnF*Y z=gaH*Y4}_+g2z|FKif)4=9f12zS8OMpd%*vMtnm58u?ci#*|C~rA`j%KhK3lb@}D` zg4;0B*(9-%YpXLv|3e@XzVo)VhMGCA_5wb+45vgrv9Rx{q@_eln<$I6|r z4K(3B0N4up#b%xN*y^RJG5K$BM(^~{GE|65>wfK;x+~z@*)?q>=Y;>392JIIr;^1iE8wg zu7C-dj2uOA{&eD7$E?6KSQ@Tt3g6vIRUAitm|FQBaX7LphfQ2)g`7-yQxGvvwA`)XV_wfh-?skkv23*^=&xnEoKhk_h6xs2;Nm5PpjeC!i$S5XJS1EejY(n3z)*W)4z zVq5ym5!#CwU-31S(rLhfyo6V=JQuGN59<+SmyW+{s4byR zj|Q!RSR*NP_{{6+N~7Ze$KmW=h-Y=nydsCYn{E(m`_rI~YE0bZp@mV5NLBxJcxSqe zg*G-OZhn1#qoUKCTk)aaJ)9ss69+0mXr*!8r2HQAe{9S)h~8}4G~cJ$Jk8x@Ki$Mw zhudQR#JQNOcDm_%eQX+?%7S;9(fb@$)^=ZR0>z32UNUeK4-&8F<^`j5$< zd5om_eBKvuwCJ3fV3mZ*d<*-?%65#>iW9k+A@|7nQ-G{nj5)8h;vKRYW6X1VpRWrT z$b9B1Sr(w2a*--#O?6RJ--D8TQmD(1zB55lOWNdA<(dJEW zdz9IznRok|ZI1G&;)wwmp6B^I^CvGw4h4-*!~c%^U-=QO!P|NkLYLByzAJD0jM@2_ zjJj$>A<6`;@~F6eaBU$EROkY2TDUyhp-m^vWaY9)#CXME5b*WEX{ZObBpCR}1kA^vjVTFp;-(41&{ zKI>}TNm!uSGQZf_YW7++cR5bJf>~KX>(4bTUV4Q3zc0DIs|Gs+ZOk$M4maZEKn{Lj&*J zHRqdYw^+c262phh&fsMX=sa`g@LqhYngz8Zx+Q3>?Io znmn#=^@*#L2}9UY>vekP24zBxI+M|v3yH0*oo|v-KijH)U-zp(`lT4?siDa(a{lPW zT5O5~dqeyza0ol&<)IxuHLW7;=9~c^9(D<&I{X@fPx4<_0P^bUTRINh$0gPL-;>{P zp-=cNBY!6#)VKA#^E;?tI*S8jB`DzaNH!0`9qTyZUv)fN`mZlRZ;Wl=Xv=h}KuoRw zsR(EF+XR}-#+ossy%cMu*tV`REV7rzzn+cYRvKO=i_zh%d-Uf#&i$$& zfxd?kY5qNHEYqKCyKRF^FtWOv=N6CaUrQC$_Lle!N~N`pD7~&VqQ8lX(BR&86kfiw zYF!bBn=7PcvNu8><|{2XaX?6coXhUv8%W!*iU4JnqjQz#tao>k>2$SgW%NW!22p-O zl+O1&ho~(>IDQB7HY>g}_rdd#kN}zGp z$nCmQS?zd3kAuRBr*PzNZ(lW0knS><@DAmM9fsk^#`28oPdr#60mXy5olxN4$gS`5 z+wmu_Vn>_~At~F;^RM2D7%s&}lzX2p-iP4xNmWPe0iihj!Mm&hY|ESRkEC_d@V-5u z6m?=^g|0m=pYuKS0WM{CKtN9xG%sk7I2>S6jbiLI0;6)++})nwbqS{(nQWg z_PTBtDnWa6a-A_>TWI6lsl!|XJ8Qo7WdapQVsPuM4$%{Mh@CF4bHV(lyFRDdDD1yv z%-!wAbgb-O-*3$&$?OC|6mJT?^lcrcuOhzw5moo${~{ZWOXrk-&ktyG`alX2eW!dw zN+g^KthFu+gf5B$nRr;MMIz8fGh&H=X;%#K8DWXXcF>LcCZ-JuQEQReCC-x!_#m< zK?FWli(FP6PuRpV2wWz*+drkIayqMa7A`*);zU7X%<`@T#DIGdwG#NhD}ycdyxti1 zE^&RPo1uN*?uhf0wgA&rgR-=GY$|2W@|rbs#4uMB)p^9=b&=dJcN;W{v-b_Gg+fs(1+Dv+}oN|9;7x&glo!s7qk3ULz=Yn`|vtJ zG1CwR7o$HJmcv_#1*tw+cY-2cAp~XZdS+DrE+Nt(I?j)y~&aMHhkf zFbXI|ki8o?AP2y@@T`CaGPRth;%aA0@61vG0Mv1 z*(QU>R_CL8*HU{0;QVDfcFU)rP5^q;8086g1(oJ;+Dczy!$@jT{+5amo|V3}HaM?n zI*>Nyf00m0o~XrIjOr8-_wO``$On%MxjPW1(=&6DY*gU997cb3vJTWSPI1a|jU9=2 z8s_fy)a>q+No=Fe2S1^B5H3co+gVfsVr@Jx4*0oBb@;XU@>IU~rtxruhxM&1D`^=7 z@|6(3@mo>91HO*k_GDQD9G0L>bI8OldM*{9g&|)aI>DLwj)9xR=%F~eLHUjk69@>G zGxVfpsS(Bq+*evpi3YaE9YtFp9aO{eeBMuQO)bRdunG&sK%9 zl)XdD^fdYF@5wi=a-O6kCI`@Z^xTaP+Z=>mmZfa3VY(jU-xuqYW)}mLSxUXPag;8P z)o-NyT*KIFoUo56AL1%(dI<4CNLMon$_+0VPCbYT$`qCwSBv}!XVcM(LtFo78zldK zwRgGC8;BCBNC|pvYc(WXbYGRbkS}L=_!&{_6}F_}A7}vWZlL#{#z;QpL|!X?a+w+Z z6zWLec}^BA;4qH5X*S($3HqyUp$$7Dvugq>Z~R&`7z)&VQZAoPLpE2+S_NZP&okaM z;-t@w4|_R#Z_hF_Gq*nP^LOnalQ;I8x8{8UX~BY}nQp@d5mAdK76ualoDEl@_R|WO zx>RoGVSlb}oZuPEtVL6!NWN+@_EJsaUld)$$%`Apd08y7H-M@15-O92fT2sp;j6gV zC$uAQ4F(NO4eRk4IqC;};diL@Aa6WDe_wiu)@vRNh0{v*)jn zXL#BRi=m)zsC*1o0~LoS%?fG03=g(G94;?%7Qd9#B;;GIN&-&j{#qHAz7_tLId zP)f@}(6;R1M_pV*VF(KmuM&Yhi_lYDoGUIp)mdw8qp@^iYQMZEvqK&NE-?b#)p`G@ zOZ^xd?wIHL4B0`4DFfoqNbiu$5anA5Pjlua{XG6836khwO)Xh@gd4vrGB}D>arO%EiE8DD6s40gV*f=12WrzU}BpB)kM&ud5pP@HTUX%5#FiuVJ_X zddLNB=G}P~XFLgT&A=hfpJ8lEcSA0X^jS=>&N{rK?PEwN#{$of877pCd9w7ZMaS#) zRKTyz{24jq`YJ&nU4LY2^hi3G%ag43iEnWS%Wt$;@(mt81^K4xue zubZrdErFLOiDZh(4(Ph16W!)xLj*K*v_2#qcj1?ZG!7$E%eZq{xEYo#H;fg9avi%I zJ=yvS;ytcJxaW7eTc%GLFBiQRccmrHE6yaXd0QZRA&KAN3DgQVMoQ=TmDlfIDLerY zqcK_ZR2t759sZ*roD^?#fm6)h1$dq?(ZrZh&4Hzgrgnk;Fu9yOIe=M=mZ=A!tmv91 zVNi|jyUrx9NJ$zW*Zw?6*pRl!Q4%)|Ql~c*apcr_QRYGsz%R(>Dc4$lCLK(e8py$| z%U0*pQQDaQX}^`89-q6IHnGg{MVR;9gvlosaj3kx1II}o&TK>M#?ei>4V?b(n2dx0 z)X7IH6`z2Supm!i@@r4q-6<^Q@b?^))wfqkDeuZx8XusH&>sB*r^AhbMy`16H@gn6 z$&=af(3+vXA0zBt>S1@ZIhbzh^|a^DaM!5@2W1?>t(iH*wCZQjCI?w@AC4V%Le;I= zO)M;bxgaMcsjk1bXYnGa%*;9h{~g=}r9t)%GE_Qh^1qpw|MD?8ybe zpVLgo7vv14)3V03{}A3Benu~n6qry9iKMag!P^iCkI%3aGmJDrWa>c`t$4NuL80Be zZeh!yv&3Otp%DLw0zt0Ct}164FuC1$Yp28rVE*6SfDjT$mvO#jzxU_81p4FVy>~kT zD8k;LdK2Sh!lBfbWi_n^oAl%Kt}EJPHE4TYo(qAbkcvnpXPG2b9y)vj+gD=OqPSSm zBQ0Pu_l1n){u(@P_qkUlhy_-unEFpnL*&f?QFVxrmQ_m%ZNcO?xu<9Sp}$NMH)E{O z<@)wLrGM(f8&5sjg@Z^hL%W>|1TMGjKp0D`X7Rkd=|@>O0)^p6p6YZ5esQbT>v;xJ$-3do(QuwZ*fCDLA>? zPiYXGS-%wd|NWsA{x$aXZ2P`{jU6?=sNlQm;6b(s&}Ypf=1^SW5r#~iG%SIUdbM%c z$n&EnbC~VPLfoJ%CopEW4BODrL_-7sLswK3Z6ssnpz|oJQ@bS|om};ttX@=5()siQ zl17JS4Q3P;u)YpO=J2#5h5IV?wZ-B{Tulv<{cK94zDbX`j3~sdwE`*@4byC;%I;d) zZ7h{3WB#f2bUMnX^=bvyU$La55Uq}fh`e_)v>020Hx1FAwrgX3oQVr?q?M>Xd5Jco z;AqtFOY{G3I;+9jrfXuDVZo6uc5H%0#^SeWX=RMJeWp9*pnUTY`tRcbnvXUE;&LY= z;1ft>J~(Uk;c4THN~oSM{)oKXdQ|>X9@m8DeebU}f=6$tC|$nV+EQIQkR~$Bz5F#e zmD7u@2?|Z(^o);@QdF?b$0sQOVnXiChP05+V6gzrA+qn(a6BS{{zE`mSd%Jzx{UNk zgnGg`j%<9B|0YSnSe7izBs;0*bEinf#FhUvL4#ClsTofR90vHEv;SmW|6y+oq`+{oero}c1y@St5e|dT z6vV^ov+5yEeUbR^1IMX{Q@`|{2Y3uXkI6vwBkWE^0Xi`%cqWpFZtLlq3zDygZ#%E1 z49Kh;RP8djOKY9TCx}I~<-}ty&m`Q%gxQ=$clkRS{f*XCB6D|yjc7V%ZRM|;%E~|M&UIomDgEr>5MTn8kb&SR@NrN z4~wI4MF++Te7x7_vdFp0Jm83BJ-OXe<&&aSy@C7L%wU(NL8*d+4VYH?nn0fU$;I{8 z2D>aWn@*T_U-kJrhEX#9ICp7iL10;bwH@Y=_+we{OLa!_c0DY$oNwda#)&%^W0bE# z=H_^r?Dc5{b7E|YT;78|=`C2^&tlP+qY_LbJ3DF5$NjpE>xb_J%ta$<EINV}5sg6yrJhc+upXy17Wd{I$MGQjhh%sk zSXrO5=bbNFQNn^XKevh9lfFl8){+tW4Yia6l3iZ-#e~8A{UXDlzs@4z(W|^T1x0ss z*Fv57*2||>+MXof%_KT@)$kWxeM{-t$n&)*d!>{(6%i``v z@{&_UHE75#DfA{srK6I0>B9o9o?=>jjkY2#Hp_CgKZ!|zRGZ6g%s~epzQn%?RBVzr zhSu70IsLs8lLEBE_{+?23K=})Qo%Zmc#rP<*ATwQXv+W#M=XNTFr0;DaV_ePo#y2J ztYO3c2`Z!SH}~VRvDM@na`|f*it^HQ!*}>&Mu&o9za6Jz8OBWjH;b`?J43Q>&FWtn z5@0`SvOy{63;Ad@&C@C$a3!1=+#TYzJ>gS5<*4%!m6^SMeYRK9RMwv%Zk`%a=>|i) z4ErvLHQty^4QlR4+sQkOj*m$Hpf1%&mDkFrB?B$Cn*p;`?PsjUF4!q8;k%L4X1qjvRd0&fe^kUo$UiCBMgg)zh@2|7os*M`jf~L4 zMddYsA!z)G_K&~3f7e9*p6#DE2nSEDEY0Ith8kNXCY4h z@mJ!n?hb2-`dsYmEmasI^3a#aJuj6LN~G}y2#+Z73`4f->B1TD3?*@dt+fh>1bn2R zYKLLq6`cC0fvc4)!1!)bW(Qebef0yO)g;Y$IC<$<^sP zJ?ZS})<2UZx4ZcnABzA%D`w|;UFWo1cVh=aYtX5@S^e_ai+E`V`x*nnIAaCky(v=b zD=q&YJV)dcAO-R>>PS_O`1TigA?k!6x(Jh#9f&?NDbjaEV;Ev+KXp8lsXj) ztswkX>BRDP?kR2cBue2Dfq^mpL zClGMU-378$eFc76Hh%$3UXC`Ou$>N9?>gF<)L3%$4fqI!2%*_W=)Dnq0}-6+BTVgI z*Q!ie39XWOQqmabeJV-hUFbG(n(Fp#X?59+A@22hA(!7WqvcMVtd*`rh8!S z8YxNwr@#8sBj4AjtJGu|<&sLp_KAmqm)_KM^m+5}R6FYS8qE9O_Jdr@_vQ`u$D*>bdd-Yw zK~2I<2#y`v3FFT}$%Ly4^l$rg1Ci4yD>TfHq=NgM>9i?nFot8`)CaSh3+3(EzYgrU zshPd>z;ik6RQ8jZ-dbFa2u*>rD4b}viZ3)ZZVlzU_t0xYjcL$tKC=U==@=1wY!iW$ z$2_C?Xg%|I@`>(bg@7-pY>7K4?IiU0kCR7~ATx@jTyDEv@!zQG17kEyTX4un7ihlV zX1xkVUZ~BatW30B6VA5g)FZtF2oHRcAFULU<#3bJYvE^}r$^jG67t&2#JH#0bxJd3 zI;9<9fge-|THZ2L+jQgT;xd-&mK8?p0)d+JIy4-Q})AyQOta8oIoT)>WT8ymd4EbwMNNVG8IC1fd&}8NR4F$ z`5`pl`$8kQt@9xE{wt~gYHpV+t>IWGj%B)AYYjT=eY1DxJeo&WAuh6$ALAldT6NSI zG%2yEr4^=EQKZ=xZowW-lp>e$-69gTqR(y|8`ospp3>bNLS0YkQK2(Y8@x?Dt!h6B zcromS@EVVV|32bGyl<~0;UaO(|Cz^MunZsKNeskUm!F49AY|E3hi+(&S|?np53qVo zQSDEj1v`b0)e{KAT6B{`B!x4rDGeE|9PM9pY$6TZ4;ocsy4bOb=A_vQO8P-{6AAwa zR~}1f5O3wEt?oZvc^&e9#KpS82}xT!SOxAYh%iw9tVj1}(Q33QZGPC;Y#f>0-eic7 zFiP`ye4|6F$D6NDAh6;k3$(AyF6m6681BU+mfS^?i#zq&G`&VFPPpa2R}$g4ng4uQ z4`$T+fl>I)1kcUIekhres&d(iNewkYo{Y0X#?gYCN+;hphkUGnjm5_u%uug2PCw@( zO`cmBW%CqSs-YNbzB+B{>-71W+hAAQYcbW2EV~JaB+0#)7pg5ah8g{u*z^>kocK-f zyk|*%*L4p`yTjv26byXN<@d$z#41W7u}rpx6>qVja}799bcl3CWi6~zx0MR8WP)e_a4!RsZwn{4usdQNkSB`Kuv}TR z_A9@38}#=Ne79GZ?p`kG>9>ugF+`iWCC1UAFR7l86P*oXi49(;`TXhGUY4^S(fL*~ zf-%wSIKg)bufc8my{Tg7?RF)uvjygs-)BbSTv}aT1>l((o3(Zp3gTH+*9~VLX&L0Y*v2vBRGlenB9jz2iFtSFm>O)7WwDFuRo9#-a_+f%ubQCx|}K-!UqN= zx6bJsSukFas7$lqBo`G|ibKXrFBY_&eenWDY|33rd;fs)=|14BcH6|ZIc-#}1hmr@ zlv>BqLz}9-%Xl-JZN4YsuMP2N;xFHcraxC^X#zUeHH%p5eC&4*q|{2*iN>vV8L_HU zSes1n11)dMQpGNf2L})6s_ako3(QS!a1#8mLX;g(g`yspp)&f%*2)8CXG%Mpfl=EKN-lp$uu>;PJKFd%gARu;bQ=db~z!8;SyX3N-mswUNfOq);)=b z!|Y<4(IJ1U(kA@uqRuy7?&$<9H55?3`I8@lrT}eXLn_1LU&4BCLNF7&v)sLd1NO2& zW{3w>;5F$1YfbeaR0ct;4qJI%UT)Gy<2w5CGjLG`mWXI%6Q>>@E7Nl)vv;(BSTH3#5{xyfVVD_m@E&umgbEWVkzt zCFr&_0p$Qc%nX1eI(VEotm*a3VNgY=}Fj%dK#+8*d|VaX9f zs1AQXFBawAMMT$X<#Z)qqC?mHF&G*<`a0O1L5G(Z7}+yA($ zF}EPf+anK`_#~LIw&O`Fi9hG#=S7}y4T~O(+h{TCH!p4k_GvjJ#!gPT4YzJ=48$@H zFnX`E9JK>M4`Xj;-Xfvgl}Y8VNO)}i zD*xM5fMl1Wj3C92(nD$0S(UycBa#(IU^1-HuWLtH~T6k0a2& z>Xo94$C?tTvMGic%I}fIN+8+ba}rhvEviCZU%IP z@k04t_-;|c-m1rAb@4^+e1O@l%fn?x!Ubd|c2VhJ(QVtAg`w-W_TtF&<(E%*hw z$&Zb6I0q=ksZDtjowH`avy|6xk-DkfoI$`{l~`_rs=t>@m^eJ;KZM(#cp}+`;bV$Z zYbmYR0+Y9Wa#Ep+iFxbxZhJ4ufXjtm^Kvt`kgw))^=Yt2|~{7IF%*7wth&8{%t7<3@;MOS7f*KLDXG`75%zgbYMqY zYSQEo??H+to5HepiwBxbv9xP+0^&<{NWgI3E$##sDCcke=56e=!NEE zPoV!S5-QaMXj^NmsQTKpAq3{uG_!(?B3zYEr0{uxTebSIMmA<<^7#;~JsZpn$_=!nlAJexhw@JP4_1f#{BPdNf z7#{xOwj#1|ekJ2`&`BukFXMOYLu&&o>cY%}wrK2AK-k%}%uFi*ml+Le_ zV42TjV)Znnx*+O$J-pg%U--Um)vMqC`xStQySr2{|G=h6&hw{2D=$ig9wae|0Ea}& ziZ4@>00ZOcvePVry}eiY}egE<4w2fP(Cg;NHZ+d%5^ zIX~@#IE+Iemhn}t+nL>A!_LVGJ{N8l3pL;eU%5-kQ!k;1W;|Uw&xi~59{jd>oq)p6 zeiCl;QMEt3>xKyECX4g+5uHCzer<_0#J8clqtWOxJ2Fx(H7YB`P56a0{@1ROVkuOR z_mz&DIHs0|QB?_pU0H7t;Y%0~I=Tm>%Al(kf9MEz=b-m!6sPU+9^HB?nkwtK?LfnT zvbbXGO2=pFhV0{n_|;~=@jYF})~6+l`SKfjRJF6K?)R1q5ar|Hd#CfCk}6G)rT3MQ zzQd^xfC_Y`toerCG$la)Hoe%#226)Nm_@&-yd z?jnOhY5EHo)#S%C%DNp(Pq@v9HEq>ubkcSwW)%w<$N9Z~t)-^la)OqP;%|SHsC(A-MXj<+DKW+*P3H)RLDv@kZe zRWiq|g5!okVPD8!=R*^lGL`#7eI9#L=HntC;i_6t?t^S4PK4ipI8=K0S!I^B(1e+C zzqaGcyFJ>WgwzEn=zeJH^jm@ZiK9=!DS%=5_JPcm5uk)@jm?dJ+ROXx&qtwDkPR0# z6a?RK5Exu@iYJ-~S(hkB{52nYe&z?W$sYwI5;-tsN5!T$yD>j-@R}9Ou+9b z6p_(X+(7<#RX^d@wb9t)yv1~sw#XV-KL5LvBkK;zOUPev>;wOPeN_-4N29Qabw{OK z+>|~~EX|y-1$V&p`Vu3Ph?u)2^!Kx6_oxE0h!ti#T&lK<6nH7~Ms?#xk!ro(B?C!q~#sm+BK(_eUkrv60VvBp8 z>{b8lt16*hceLQF!rjzFrYyDt!pp`d;c1T%eS03Lg@j9yCdRifq&=O(QbPPHtL_4< zV5zE|)DL-kE^(X`YJ0Z5N8ir)5e?XRoqoHEeu&Zn<*;P4uC~>GPYG(&8{UWoHrmw! zIQ8&Zlp`JbgxOk(ZWIJc{=+sVwaw#HJbe=6Gx~Bwp&tVPjYTi1@CZN=1<83 ztX@=$IDeeWxlePR-68j!~R4U@xX7m4TYqE0skwKM+NC z{OSqEe_D0*_f5(2gT1zW-g~9BCHEdZHC2W)dWaQn__X>3!hD6oRtL>V;eVBFMIw;I z5BB5yhrfz@Z(#gIh_r~IJ+Mg|GIE~2dSo@foKPgwNOwNxH0%jz;%ZL_CJbrg2Ha{1ONTsfFc6qUIxKW`b`QxC%&x4@2BCyhKtICfeCkyqqV~g4^nWe0vVOkdFk^KEG8MIAw z4P!jt!c~BwSpI}$4@d3wU9&F>;VsP$xB^*xh{7ABLRw8$b&QcTwjH1N5CON5sQ-a; zawL#UH^#Vx>t9MiEEI1hHX0U@oss0shAv*E#W@mizhq^LE|oB?uaukEb>r_!ed%V@ zM^$jHp-Rtnc<{Pxa3hcg^5yT$pJ2k~wi=Q%6W-qXiMPd04`TEW?Tdk$0bS51glAKN z)j6q}xf^b6ta9>l%s89f`E~bFEHUC{#d9_@aybvwNWj5iZ4gfEPY8p9-a&fy^txON95)yo3W&^O*$xmZFZW591CAM05h z6XpR09q(~qWJTZBQ}BZOU5uPCd=NR!ibt_-3E`aml7=c;Whw@F(FuyxD=qx@AmAk} zqnVeTLzV~aqqu3_nY?{*oktjVEAXrokt$x=*av&V+|eWq5G`%$*Lo5#L=6#%JXyte zL&D80D{T-{azvsy&_E5r?fnff`ayS|eXcCQ>ivtZ`w(>oq4#vOe;jB2;Lk1WPVAmE z$_u~i3%XrxaL82o%j@`L4}jNI7<#bY^V*=neBYhR?%=!1EL%iN*Arr=S$bgUVyz<( z=ZErkXqItK=Q4g53!NUSdo%?32cPwJ{et>W=HH<>HOP0o- zkFw);a0N~fc&ppr6Zn9kABUEyCO)U8$M;l`^M)1wuBpK&fJWNEfCBXghyJQvT0rQ< zIuzLgW?QoAng@vTGur@M<_~D|4!6Ik0nublo*t}~wGyTrMg<_QTHp1+XV7~CZOR_O z1+X)klsSvpSHaSqj5%!C;Z26i62Z{}v9`h7F5_ypaEvYQd%u!yvGEGQKm#rBwN#-V zs3IzYCAx~8beGl3MgM)>PD;qL)WyU=TEB(2#=wB|}@GH(t~KSlECDCHNNg z7itUvwxs^P*7Fx^6vTtMGWTSiJ0*5X`WZL?hTeaJ#|^qLTPNWmcZP`sr_{b-LiGzx zT=K(LQxj7*g+@2r`b!0@^d`5*FS{$G-_w!`R`4-^` ze@6gwTzcucg@>`xkc45G2!NRG6Qib%=LwCHmJ)sMs7^e-8={Q8ObRP&O+Mwe^U0$5 zf|g}h7fmm z^wmS$KkGXWfBPpK=>>pngK1&`e_b&H8m&JGa@!rWVSWYDfg7}8d`cP}Qkgcf_Cbl$ z#uIwXBYD(1Rys7@LBR~DLcNa@Z`F^{9sgSkz^i}&uveL8#foI(ML%V03&c5^yIs%x z@rtX@kX~z%q3#d-=nl3PnPxBn^u2Ct0fSpeim|%qTtkMinawS*srvGwmBeLO=(;*% zlD~L^Tw`$VwAX}S{Gs{ao$5zF33az1O~kZjB5E=0NIhc`GDI0iOTYWKMPpf&Bjjvk z)4b);1@~;~44NO(9+1UBIk9WYHI#dOr|Y1;&i)ldC*HfFe|SzoMo`k?8@+z`&y0Y! zz}g2eHZ&6T$y~>xsYBtgab^<&d?3cgr$En7RDFZ*P5WoH%(1ceCknV`9v-2l6LCyK zZw@*ml1ne?1_ss2fci0p&>K2?2HVMFB_9^hkw+^lwZ2E6D3x{UoCDr%7_P~)GUAPP z5&yX57-^sbUAXp)JJVOTx{^jD{CSTB5{Z*2Y_qkwM&{w+v72QLbXW1A*!t2H|JqvX zQM9A)0;eW}cF91TEIUr}Iue zyM|(=i7CVO5QFJX3ysdEZS<`Hj`Vq}3ou}Vea1HlkV?4b=WbUvz6tmWeadX<_wa-E zdQJOd^aK^j+#OZ}Vjmb0y{(m&!d*ExBztRt7RoU z#4bMD+bBvKamv`+qGDm?p@L|MC@93wxsv;H$Lu zdED!IE1>9N)$0Wc)$aYu$8|J+Qx=y6y&tMs07ap6_giy?0sBZ0J<-vFxu>`0-@D?L zffrThbexdh(vldIfM-3Dm?F~+6y!SYDKft1OKaKblwnyHP&EE%zltxQNo##FN2AGh zwT#qyUkn>F&x|E%(UyEar+IAuQ`b0=Qu(px@-C)5v4XJtl-#xblh1Q{uJFs3MC+f$ zyKQSfhLX7CbUgN{TmXZ7-`Ax%Hav=ZoXsOQ&v2F(woDE}q|L?QITs?DPaK3ed1dEGZAv ze#;pdg?ikv%}mvxV>I$Yx%5pasK#Mk>wX4Kcbvtsl&i`ng74=p7C|%R&}hw55=t>R z-`nhh8j*+$n)scWn|wY9lwz-zKWbCP3h1zB^|`KOLSvDCDNtj+y=l&kr%fb<+Zkad z5i2G~S|{P4n8Xq@mZf9XfwV&JYT7B#4dOGF1jxrj#%(UU*9po20jd%t4r(1%PSdq;NTE2 zKORSy9L9}4!BAzdr*~k&Hjje{N|I4t*9=|6ByH9ognZJ8~%0|XA|9e5vSt%#P}soIi@qZ_CX z96U4|qwry;V!visa{g;*k~OBIOCSSdE5DtBr_|Up7twiATUzffvPaC z+TmVo^sIrI1*}WY(}}Omj&H{cxSut!#~Ijl-n#{QAAg6e+K$5{h%uy=_Pp7)-7MK* zaV2eC&eH`w!hx+6=bJ;uL2!Iu(*|z(vV;v1TJbVWeuLc-mjgcSq$y;B$_j=nl2nbS z{P!F6Oxd$+wQUC(tOht8L2A>ieIQka=E4F`IAe0(p{aAmx~O-s-;}Z+^^dhM%2q|1 z*IR*R`uf&0oQ|yrYyHiJ{8=G~OXKw>sG|s^&~<_60A#o}!2r z0ymQ?+DWK)EAt+L@}`{Ytq&PxQUyDsUEoAk9wuwfgzjb}A2@2NS1*R}%Z5LmW{xuE zVitaC`Hdt$Fv5W1Rf>P;2-V-G{{tZy7A;iJyYOA5a*9KJ)+1HL?TKmT{?Wl{Dc5d4 z4XID~)C)C5!GX4-%7T{_lBzy%KNYLt;43WgLCVZ|)2=d`op$bd{x|Jo>SrwV@%io8!~;)|@3j1C4{<(c=eEorA-y)Xu)p8u=?OYZGvdz3 zed0B>Fpc3~uJcrm0M!3*t}1QEy0>j5Vj$)eYLj}$ubs?r_7tat8bLqzOu&@Q$?EOc zr)-@?cQ~8qb_;3>6F;&(+>2G-WqV3L@p!!#Ms#X-GwpsKGYYpMqpdtxf}DqJ``&}k zBeCL@(JT0%pZV+RoX2WAuJK8sQRoy;t>EpRBk4_GGSWfA)Fj3&VTaVsfx4kgk&4Q! znOk3Y!cSvwi4pTeDYUIYxanML*{44*1Hrcyzc4yR?ED`q)c**QMJ0a-irf%?F$i_d zZ<8TQj7A~Ag5WvC#ZL}LYsIo=NLBQeD-17v<{mQQgX#KQG>Gfn2nMAXI!>1dJ zG@gLgEB57HF6{ZMc1&}rj!9uaVH+8#4nY2BX0h`OhIy|{GGMzIg)_zGlh^xSw#|CwoB#a3*d+b@;`@5YGC!`l^=76-~#N+!qG8USpZv#u=_{8lg44Dxk@XOdf&-@YpFATZVz5UOdv=Uz%RxIXw*c+x6 z%z9Eggx(xrA`x^V>cyZDhN!AXLZuIc_T1jY|mXt;dL7IXc#zvc`5n>mF`6r<8 zzT4z4L*syxDIA%rif>(Ax%f*&fF~pB?G=+3YqW^=1Rb7FPDjEwaLj}uY{KW$EkclC zw?Qei#^p`EaRgb6uDFRmFHWKe6w!RfU8UYxqq0?Th8PxT9>vM27J4<*wxo741l4Q1 z_QqEuoK2%aL$94^{oXLBgxlfNMZMV}^$eWmdnu(uv!5mb%^uq?ogTHfZ=u`G`%B5b zQa-Kt2wpm~74ZBh=1c|S3lqK!FAK?>F>mx28=MbaJl{}9pAs#z{0Gc{p}=?p6(!xT z@^}BaIyynT>N>%VmDVdFVJYQ=qd&M(9j^T8%xy!5&I@bo`xwKQOY7nk#Fh0zFP%7L z^41;z?IgsIVZIjo;EbOHq#~!wxur4%gZP~~*~{_6$4xN>KkkZ@!|Lf2uB*zJSL5(im0GM1mDEdiF8U zE0mtBw9ps(@P6?LYx~?wPHqa6EP7*zkgcfcB;%GTbV6)EBMPz#QuHL$AZG2q2mQ&K;(|qzw)bSFf)Y4!@n_CBgUR7iMZi@mMMu*NSIk?X3=axviK&m7P;-DOO%!R5>s@E{#niC%sHt>nLRo;_SQoC23S_c-R&Kt z$F~k#|NGP-WPrB7g#7%+F$}Ez?<8*^*2S&}l*@HJliA$`n+jE^-cvk8>I34 zHM)>%CYhLerkp80Kx1&J)5QCI9E{+WZs`k0Es&yG@h^i&->?$(jfqnf=PmIEOT!LD`@qW@5C>s z_YE={*>wv{?vcgxP)anLisugNJS;`uRm2KlBX8S!pd|wOq?p$?ezWeyt(hhLx@x_P ze(DJ|05zxzGO1x|NqT)jT8FQyF&koV)IAWIb3XM^Yg9QFff3#C-CJc9kRaAx>2~yA z)J8OK64oC{rb)wNrLo;V^FhkD3s#K3Orf(g;c$GK_3h$ZU}u-e>(!l*reEHU`!Z*L z8xx~eI(~s%)ICg3*LF4oVJdhViC(LkR2s%3mK1ts_0Qqx>GO(62;)4x|9xk`LxKb( z7840!^S7UYRD6U>Mm|1@@a+@u#v&C1q_GTitVmTku`F-pdOdHx+u8Z`8R=w8ah6#6 z>FE;pV1QH?HZN$U6a0RL?hWfTG9&54T&X0PH+?L{r%P*Yc4I4W+;!=i#&L~Bx#YOM zFE<>PydyMUuJFCsR2t3>@I8j?!l=>a)ABN`{yFBNoRtm%w76xaYo4hr`hA!RL)}y= z1Jr$VQZ)JP_h?5c_^c!l19Bxc@e*kYp8!tL$jG}FW0Utcv&sXyfo^S<$cg3ua=-!w z0V{H+iuCYb!(09@Y#=a${9#@tE-4wATtua#czC?WSl3SHia}Omx8>!Or?-MUFw`7IVp>lW2ov}N zINGe}!zrJ6T<+NF)KmyLu5lRnv8JIla>^+%ditT>Z}kaR_ZqakEGXFyxcv)!e((af zI~+mf`+vUcU)O;Js1-0oZEkJ$z6X@OQ{IQwN^gn;TZ|4D%{1X_e754cBCD=U=^bGi zEmon~&gpBZ2{^$j5koRN^#IP(Wlvs?V|MauCKNXtz-)#US2{pPW?&FMkgN#eDJm#w z>%0}8`c^2#Fx)>F1AU@yVkLyfYNt>Zx9d&X3@Ib0>TNh(l;-&xWY46@^KiUSCWKFt zAJ5p8&F3( zok-F_+0{|P!^4xD}u5kt?a*14S zcy|tDBz}ep5_GUq3FI_-=eL{{E@v)x_lye?ZZ+QIBSvOT=|1QZ>jDE#rAR|+`}FKZ zsctY2af77HTvI+wUylWeH_69zlTn@*m$=T4dmi+ZxV$*nm+X`4M16GH;n8m9T3bCf z^$q$507vO(U*Fe@Jeo2)wmhZqnd=SP{4Emy&+2-f5uzc-KjI$}hqg#{tW00irJ! zNp)}to}6as8)J;`%Nis7wxlcNJqfdJ4Uv$AGp8nZ)c`{{d}V)Ydf9 z#?Cg0Q@b1l0h#Y;tq&ZR68YcTvP)Zd6x3Jtp~08om);|}5WIoU&tpyfvhDWTFTL_(PrvC34 z2!>+e^9CQ=29g;2UwQC#QF`{mv=U$kzq}3o&MBYjq8r?JKOQY37eVgbp>e{LdmkS1 z_0WWvWE&!BroZK3jYY33>TX{TO|_H|Mp2sErS$O*xQ49_Bw2olAASx%Zg+BepmvmO zLxtCQ3g&r1b==z%CM6cIz9dcbz%uPV9XByHigkCi&UJA+%~}KfB*Mbb9z4}rvfm78 zeCY#2lVWzq8}K1to!sH0S~%OCTbdmwJEF)VJkj~}LiX5h>GgN{@(Sb#fq0%gExpWt z{|9)84t)b>*Y{29gD^t;MZCD=EL-?HqCUe#`zU6gqM!m5Hp}A*EUktgc=Pn~Gg(#Z zImn~+&i9RL>+5AHs}go5v_88$^1^ddQ^dKnYO(LGbOj6tV~}i<)hcxj-{Wy9jfl_m z!z(T8;WzI~p!Pt0pj&M7d&d|&vafql z{XQP*;9vQJRoH_cWKA`P@;|H!h(9EpU_Q$NG;{$%FuqVHg(4xxusquDA--aCyjLt8 z*s2h_YQ(H1wn_5cN7e*?)}zS!T~%Gk8jC}JvD!70x2(|OWF0>zjfZ&i{a5H4Z?1Oz z?hM|>-jRV=IK_G!LkF2dy#^>EWj>A6e7`RQ3>$;IqAFlR9FGxfFD~(C&bp9gnhcB# z62wnVG`C-W_o~=d+f{Jht{)c7Vg2)K@{&Me%+sSd75yo@@`8(swPp(0ZLVXDH!9Fm z@6vcSheRs&K#Zj;Hu=AO>-U45n-xhL{!2-NxZ7n@yl(k+iTKwJ>t9@6mJQBM*`=i` z89e&n)GkD({QlkGd>DkP?zhD|y4{ofOTzW>KwFCX+T$V)z{7_3v7|1P+$~iwapKJ< z@cOD+9L8BVI0nBv-vTIJwbbQmc|rAf*z#Qq^q+qWY5f23fqQ$%zFmzhS%X5emS!3x z^z8SlW8A_9t-N;}^OOI;An}8VI{=R|o;t>$dWGCtUozS+N8vMNX%rq>PEqk`?!Fuzn^y9K%W}t6r(q65kvEJ=(WzdWE`r2aSw0s-7HcX+Kpf6|UdZ*+YR!qO zF{eEARL9IFv2~tnk!ij10vM{D%_P-&nB}0uE%-m{sMY7y_0>*AO8y7UOM;{bj)3`Y zgB^O}&L>esBnG%qdskk7E;l0+R18C5^i6*iR`Y^KU=bh(s5RW++L9a+LH^=|Pq%Nb zl{4N?$}Y$mjPoEy6mFVcNd;Cl%OCV zuftBv^KUflz1g*Cn=esrR>G^bFLtp!Om@5_l)P|R{{Dg3M^`PlDTMaVbJwdiuf>*i zjAY?o5b*ayz*{gM6=2TiWr%lrXqbR;f=@ZOlTN_fx>nO_6PQNe{Myu4m*rg(F)bI8 zGc-Dg0SkA!{i?6|#rqA5gZr!6p-g9Ajpbb`u4Apb3f8?V9|B7_EAgr2jn|J5KkWwW z+1^lpF_rs(wM9GBq6kPxi)}4)6*g;vUj#*~k8F%2j1#2Z--p2hQBid|Fvz9a)RLcr zk`bou)*k1qR=OKVdB}x3&ccRP{JCTz;wc9{4oCC5Dz%*%&9A0e>(! z^;`cxzb73sEUS)|Z-A^^L~g3Y+<2Ds1940WY$BR~F3e3_d~~s2KaMBN)WiaM$1?Fj zzk!cG{j}m~<;zL)jO&W*)B;%DfZNlyl+Gu7tJY75na~ZqVRBpamr5ft=6X9Ibos*sRZt6I#vPrF`bz{7>DMpabtaqs@YSn=BUuWJg7DyGcg!_`?4my5}AEp#RXSWPP1$M+i&`w=Vhp{V(l2M{FpoAIfBQS zbBs!918Jn**;!u8N{GovWNPa;4w3so#_pJl>NpioR5I{|XTV8*@L+fUl(FhSekB+3g&qj=61m5;kq=z7D0M%~zUjh}gY_a|M| z)E9o;D3~kVJUSdbv_Ow6&M`KoYvMP#XX@@Mr64UlJ*ww)0Z$F}bNu|z*5{2v2{_(R z7JcX2@rOzdppdV`Z-{>sV!<3>t2SyNT9$3JK_IkCI}6)S!gVK*cYeh93gIZ!_6>dn zve7^g1AFrN-ITZ4z&$g?2f`Sgd?#A?U5O%e$|ogLcvHgonL2UO95E*+P`0CzDbL~o z9Sw)X;kCD|q_lW+?7&Ac1^t{eiamvNh;5@n6U;nmr*x1PSX2~oj^?QX_kK1kq zT~1B)A>lDijLZ(sRg#)dQ6!%C5=XVzQ9DNRssV&fFr}N_e@U!qnAT4)XHeS3_sl*@ zAS^f?^@#9-stlFq85465o&MM^4{-;n?E8ys#jYGXW{rDv4Dt6D)+DOdA#G)_e8!z_ z`|fkNJjCHrQg(5ijkZlpHC6d69q@Yx6HfVOj2=JmXaM&YDwNC03C1nt0~A}<_>|6$ z+mJbv&b?fIy|VcQ z=UxpVt(KrNM>-PU?SO6L?3UJjF@I;e72`WCfQlDi@g5Q__;I1)R`-Y-*;014j^7*l z;@rkX7v|$-YPNy+f%de=uU3jBIM&pf1fEK-jhyfM5C5B?ghTK~z_b)At^;0nw-u=~ zb6Jhr6}MXpCor91RB1KPkJEz3Wy>+Y*gS4#WzN-q499Q$3iR?;dy@P9@z-`s-N$i@ z(6YcIT(i5?Xj)y~j*r(@H<(~O1~%|a2+iA^V25Zvq5^o;YC{(@+39n5io{p1mk~a5 z{C7!pov(i8JHmq>QGvv?+52w&=uD(_KU9M-*&FU|FC=eV5G@KgNP1Kh((GBs!Yk3; zIq;Nm8mUo{MwO)S>R{8CQs%pSA!35kg&e{cXqUG}6BM&Kq6YRg0Bu(&402mGY7mK3 zmR%u{95TgP{@w&4M%wqf8u=?q1OOFzjvMg#PfsV*(wILMVqY%PbcYK|`xj4KdaSrK zM8Ktxe?@G65Uu{NZ2g_=O@}%Nej`Qp?Z()|xuZIy34W=+iSE}D8lO@hPpoIiXU|2X z>GGyZaS(F|Ry&6ZpQlO0%Hg5Nr!I>gW{{_B%L4}4d~Gr)trWM2v~ie5osxw%k<+N2 z70LHZ&xqbN^z0#v=*`K(?8Azm&>_bTQYv#~uz7MIXSYT7TW#AuFSZIGpXoDA@wapn zk(<7}+VC&fsrkzl%88icqvOV3-CfKs_Y7o&gzBq)+xIVUT!V0oT%VA}e|{Mr_#0@M zA?Mw1(Ppo8Aeuj!?LY^Zd#kHgtP1l~$cD~D@pb}MNx_f^_*d82zdyX|$fvMO<#@{a z3VymK2vnOb`!l1#cJDf~hD*%gvw!!#^nj5Dw`q);P^;KCHyQhua zV={W*K(@;vU`ex$;4a&b?XxT&@xxth3yMcR2}|{*qwbgo=A7YSKcwA4TF~%!O6nq3 z7#8t2aw8HY4mcQmSr(|>kVP-;{e2)WvoLDd*h{oW+p!g8-gKI9_jtdBjV~6=T?Q7e z?*rEDzFRu6WXf&djjc+eqow{@-X_37N;Td7=$!C@iI0mH9;pfttpMYIByv{kk&8|%7qsPY@My4l7h!>f28e)cn_Wgs^NeMP+ zNY&=c$%*F%PTZ!-E%WD(eRB*H!nAN#j_(j>?)^RvEhuNFp*J~|>~bNu?N~TD6-+WS zB_UOf{A|%|TG}>t1~ph_NlGBBk+|-}XGZJkyq*MfqA;w48F$~>7~$Ebl|%>>fX~&T=ZZrHR!kQ#505eNO@p=*+6MEdbU$DvL$GJ(HhHN9)$MZafru zHmyzfcCI1LI3X1-+SV{^mNpTSW{A3mH?ANZUmNNSOWY8=m8m$7_jsX7NPj3nzmX4U zTceiE{g8n3GgI}Ayw9C#!5T!aG&R=Kb=Dp3&JkS7%VFY#(Dg!RGJ9j5{Sg<@Z&36} zS1w+p(fzNE)zu^C0rGLl!8vP%zs@#SaH$}}<}o4ztoN{;L2m>&y8l0a4>Gti)-xR! zv!v6c1q-WZ$ZE-@&F6;i#Aq!qqZl`9U{RM|iZOPkAg3=?c`VZ=?u+R~evqkl<@!(% zY}0Ur18|_&w5HH!FSdMec>$+ z%&#FDE~k57FE!*tBW!bX=b<~*CZFbQ?v|YKlOp`j=FBzB6?{5<^YqTC*lXY2emcIn z92lEG&o0WCcc_QzcL!p;`F4K-@ex|%NCAEU+uk#dXBnHN>IV<8E20F(uNxMg8vDOM zq8u{f=Aay!Wndg2mWxv!m}2B)2ccrhGp(-Cd=|xyQLrNol`ed33vVkmcs z;Lm5U`I!NxSaec_c5?nQ!mn6xGrQ4U>|g=X$IIMocK1ZMjVE!GK z9u3FKPQ!IUFm0e_NCQMzrRiQYFxO!$AQ=s@HYV^{$zM=ZJSHIw?zOHNg zi_esZ11$1EFjHFzj!b$>dbzyQ!+mzCG}H^-%pxomlw+OM2^S>*=wd44OW5$1Kd|8x znoSRc?z3BK3R?59w|Seso=vY^ z2hZbFH`HNTS%Ld<5-iWz0K>CXW*??_KPUj2_*E4c$QHqM14hY82EB4W(!+5zd->m2 zXF`#3PsE2t&d#}q^VYpZn2pw_DpcZTQgN`hv-9(xkJQa@{hyX2iIGxm!+$E%?gWc( zZr%-aGcuCCO|`|Ky>=Ir4?W}b%Ee+08)o;PDYSKezQD%ET7zFVzwP2CN!yVvIv&!p zKA3g^I?1hg1u0MFU}(-ht+U8So9$Y~Wu8G_C*!On0wIO5D!I_@s=QTY z&R75>3_Q#U>xL1^)-y1W>UY_<=8LJGEfvS#k84lTd$IaDjD(L`PAZ_w8)pd-sUCGJ z{t&1Iovm@f^QHLIs2d(`0_d<*OyqQaIh-rIdOz5r5~GNhliw`avQYAaBc@`SV39hqEX5|B%%VC@`T^5(^U5Hfj4l?eWgVJmzsn72l8nT8fQ3#v>t(_EdOG~`gPQgxq?_z(oQk+MogN{W zKx=9-7m;CaFhG5hAz815qEhmq^IPREPy^#NBglqiw=U`f4H=@d7`~0 z8z$N{q(WTKWpiII%)!yHC1&KijcM}Uc}1(GuEF_tiWuK+tK8LBaaqrZF6?6)F>TMq z7NZoVn%{kM!84tiY;dR-mFspT45f;im)xTkoYhsofm*Z>79N`lhx`WVuZxuT5w;4C zokX7J#qklk9XZ0o8~AhBV4@}e<-=Z=vNwI?&j5=C>0(wpxC(z)`O|vrMe-{PiOZt&q1= zgqel(P!gBG>wFhz>10J({vld)oio@QySdOL8K6|{l~<#hdD%ASSMRKFz$+*CNW5RQ2m5Y1Azx&AX(I19{Z2-nUyf`LuEvj)5=c@WZQ95@y z;b*B=qnfCEcma()q5!1IV&5Jp$$X%Ds^ZCW=)7{JnVh;SI|{4A#T7|(9EY`oO3J=n z?F$_p!9#fRbXl2VR&}!@$))CDE_AsruOO4pj}ToXB4cqJ?cVU)TZ=a|-i_^0us28=_C$E;6=|)V;NO?|2;UIB zN-pQ#=G+F5RcSBJu9!Hx_-W2^Or>Y!;n17}qdA`xH+?U;*XZ3+&d8Aum+~ZNlnh)_ zUV#Wo6tAXfvD(u>i$rg^~?$b>INrXS?K^i8fO zEssq+qi)Dw9?-nI=1j1ORD|||I{)mes^!r*HB3$G(OTTbp*MY*cn;iCEE-Y(Zxn}z zHo3}&RP0sGRb}g4q*ncHdA?k@%Qwf+7Om0_-a6nz5VcDBF+r~y;7)0TRwY!+%N5V) z{WvQ;O3J@g5;W&U@=e(LT@Jtrm`%fDa-Y6^S7nBjw~86`gc`dbeUo!jf-u=&tHpZ% z*8gpGeM#FP-))>(gL|}YK7sq{EHLZxq;gp)f|BrBuy9pFP`SyaoJxaLc(`CkNnN0l zc*(Z5A^WP|wu4k=C z_pki+29{Om4Zq6JG+ICkx?PgDG7YAfWV8`_p#&!dIp<Uyp_c_vGx)Q=mzqd{43@JE_aDDO2JyTN$pfoY~jniFmpLH?m z_$~2|yUn8CAzVW*wM{dBoB~9PAIsa#^z-cunGu2~+HZ+m)}RmDJB6zX=I0Tl6ap}{ ztg770?s8qS0p5ERPF+N(WTykD1`v|eYZ@Imaa163U>0V~__=Wk^zlruG&gEHou5XA zLL2`t!RKmnygEfOih?Vg{1_JQM>Tt*PSyAPps6sN1{y(qNx=I0)16na%Bs#mo(Iw2 z)|E(;s7I*xufP$l3?$ZveoD*4e)X55mac3~JN7$Zeo=G>H+H1Uru zFP9woCgZ2R$PRMdItbk1>;dYLhwep)*Np9tbEZ4_rZ;^Zs?!56w_W%lTA*HSCPGfz z1JuJ>TlEsYxw@MOKZFl^^5zQUVsaxsk*YgYe)Ck+E$QugnaGA2^E)sxhRLuB4jYx( z^4951>DuHAOf!Vss;g^3f&OqjM%fU#rDIQ1^UI{b*oCP390l_dHWwPG>0doZxXx(H zsW8ZxlF|p3&}}TQrzQa(Y_ii?S8XVj`_X*u)le8YI&f-4n>4Yh{v+>!p`4Rg<#0Ka5w4EFfus_DX2lsfg)~H?Fh==UCk(3VEfIvQN5wScX}bv6H1QN$dh4P z=%g%Icx9Q3aXEyCW{il#M!Z+Tw0C^KOe8ZJjCu+W`XS6P>eNt-7qE z;2-S#VSP=2!bkG+U_6Adw79cDO>vVtU@{>@^_SlreV~Okd&elW6oiCWXuKcVi(Fbc zvCn>2(xI$4GyAjIrL=3EV|dLWzzAs|BK$nrul&+Al~+xC(o4F~4xSITT$U%J{aK=W zIhnpmWV%=_$IB?%kryh>G0@hFY@fnH4ZswrDZHG5Rq`)SEz&lP$(jJ68xhAF+?M{o zN*EBRc)%Poiqz^wwgDHE0gXm<_ebWOp4m|th@I?0y)rB;9rBPS!2itja~>sHCts~& z8)~e>jQS1S-08bl-OUl+G|XVpuL;kuYxxA|BECNtq05olGz*}AGmWvb;(jIMcN?>F4)m!1P{K<{*BcEa-5E8gr zZdS#}D-^C3D{xhgPv+tBA_uPRu6>5!17I-^N{!!^S2`D#5bv%Xp1;*e;^MdcL>()^ zIa~4o+T*-ps&n(DS6}4w=tnl2^<=eL89T~Gl}YxLEcgx~1Y*u=8IhADWN-F!a%Mw3 zGR-zFzj~kt3{?5k8&K2wmj%e72&sA~^JG^h8J0;2SCTH-Q9hcD3&7M#I(Q0#Z!=ak ziyAW_YA|tgP$PK9zhX|_cBDMlgW}q-t<;rAvXZCDi@w*S!c8^%;XnJC2*e<@ApH&H zWlxxE+uop`fYavC3^mz44wB-E1vzEpe~nm`o}dNs2)odGolar%t9QIsN{}W5g!gkiCNi!;D_3E>xHeyji{oPvfxkA<;8~cb-&JsI z5<`*S69s&dKk~bCt59^ZbL+d=2lWrP!Ui>ltK2`%jH&2)e&AW|cAWVu*-#f%?ZuF* z;Y5^+zb;@H(O*U*c=?V`R}nLU2VPS=Px-Y$K^aq=3#6XEAlF|??|%twVE3N|O1+^EiERKACxg*=FT{@x$@*p?cZh)Z&_g zWL!=-ylYv);qT$IyiurzJhoyK^1VTd%Fl}Kq-^4GorTWV9DP6xziE{D8>xTihyCS- z`HAd7$lMPlvr@NivM)G8JydK;A)<0nM!+N8e79(4WNaFb$bI+Y>tvbIoD4dB!XxX3 z$D4`FWr-Q)o8@3>9*PTtkr;dX6Ir37jY+Y`8Ui(H6*^%&=Uok;fDOXou>GzE!l%-J z!jmorB^^F)$>f{=jFoGFhl!+R>>!oqY!vL-jgr2`XC_orwSm9C=hE>DDd^3$+0ajE z-nlBlvK}LG0$UUH|L~*Uukkn0r*%+UI^Pi64ABs^sfzvj7F6=rfixsp-x-GEGehpf zA6+KYgjyCzub*1Wo7dj4F6MdW#W>TiGM?uC=0)Qji>X!h_i7>VFEE`an&e;J_KE*n zy!@?PKf-h>j?hDR5~92n#i-Yg2x(kQ{m|(zGemFyyJ#GSV)@#<1;412kh#{tK(D~y zTWXd`UXvTzmr(`Icb#6^S4G&kLKBS$#3cxWK@oILhk4OaQO1pF+e3-@8s9OyS;(a( z8kso<-2-F>pYpnYKvXVliH)iPJJyTrQG7E1Vugbna6;1wu7IR0?Z| z8b+sHfSWpG&LF{{?2eQ_Kyd|!=n3KNr?zdZBG*HBpSHniHA`Fgq49+o{0jI*o?_FK z?o3245`KgEBbHf*WM{6+xlJ|A$f^&%vP9o=%q)#>IeTa5(vdni8tSuGyhmVyV~P1j z>i^m#n~Yi7%&RQsVKNsi3pevT5;OQkHObT=gbSEzyXzUav{x0C5}GKY@#kNYkr>QYYg#_nLFZK8ln;Y&GxRD z2h90kOLO8b4VU{jl?429NFC_m)I^$#2cTzL86=%dzci~SPC-O9@$nsUA2a=>VY_LH2) zY`u2qwzCr-kOQITlfCe}=miovGrL%zzyvE~Cgln=R)zNflfy7l!9BlOdZaspEr3`xXv4>hh zEjU>Tb2bLtdG2fa(l$g1;?k^%UH4cHi8%vCbC)9^oZ_->Q{)c-Q#$mQ6=F7JkPhP{ zvX0*ES?56W>+Wpj1JBiP8Ecz9Wchv9fq10&10E#+9LSs&1fxj$)0mhL0{KW~6|Hs8 zweRSYEQp6jG(IU#{#}gUXH-^)`kcKkL^0(gP3^M~5$-p7Fbzq%Z6O(ge$iySV4S0T zC}~6oY_yM?Nf98Vdl#rWFS_Z^f>0IOKupo`QEXv$tWNuVTWbpFSZAxy^?~D_YWzAr zpH5kIg#_pu)M@N8)8u-V<}O7INH)UCLDt66R;nTPyYqL(X{iakqVcKPdcTuY76&jn z6Mc8ob^-ZB{)@oXKV?lXcpu9$A0*UvpL3w$<&gx6D{wpp3AZ<@;F~aYcYWCqf3ss2 z{;n3^#fsP7Gzc&al1g?||7g)I%soX7|2bvNCO?w&mHFV-z7%?23uA2d*Ec{q0<3caDX7yW1^agB!K@F5}WABkIyZ+ivRKy z9?rLp-x`l)@Pv+|%}U4?x+21G*-$2k&@Pr`7G3}-_u{kF-(1krhQ0la<70cxSM1&I z+D?TeQy#BVmd^KQ(jrd^4CYs!*AZpZ7xFG@XtF$(#(k}UVKcW4=f@>aAVw4ES$;TR z9%lcE%h6}l{iEC#7Xh>B1BZ+-UAcjOyZ|JJUrmNwQh$|yxO&U{xD+rY-lHb|wpyL?3j!=|~dD`Acxg@!-Y zNpblap6DaPDGq^ z|I(r?KyL(@asw-xbjbnkVNNk(uRB#$wSyCN+g9Mj&g4ht%KOiKC!Ro{w| zV?3S1n;q|&rF(WCm{Ks7=Jf=5i`8uz+BE-@g-`a>%AWwI^yH}DZy`9JJ`8ZIkdfYK zfMV+&!GlkKi&()09iDHXl5Uev9-y8M24`}Rw1tZ{Rsu{!deJBvZg<9=-=E+#fDTWI z4S|a<@ehLzjwFHN|L&$y!kLf91Y-}2Tv<7QTU};oFuz->(81_O>F4QMn}wWnR+1gO zc?@(kyI2Ow?LGt9Khn8*xrZJf;?-CvU^_XU2wQD)>WSS67I%Y4-|If@qgj2_nV`$o z>0z#>e5uqvY;YmPf&}Nf9V^{ws}onW9RfefwGfKL*>PfJ)WHQ_NgtmF(fp>8!SFa9 z0KTt*c&$Imj@NgF3~tvI;ox2QW7f_%gf1c%3dtZO0%_GY_de8^gZ)@Su|9~EEh;?& z#3Oy80YeqpY7uHOqE12I?sxX7qVLV(ev-;&hjcUD`gam=$k(Lh5a@RfzKp4PRtRIP zO8jpcP3DNAH;!7%8HL`!d><;W8N#zGVm5(y)C>He{HqS?WWl0lJFk$D?F#$|;!rT) zH@=HgD-*&Qv{BL{rt?YQ&vheH~k*&*Fu z?4A9~1=*R(+bCECZ{CdgAX+JmQU=)CK~N%%KL6QyD6wzgJm#>P;n+0lm$=4v&PXP| zJ$1$TU?o6!^Wl+g`g`}FaoUnud6wf}EV^%2;jlYuI62*ioXrj=!)L;lCfti)&%s2OSDq%ojXe{V=-Gw)#t4d2K|zz(0k=0qSd z)*16`tXf%^DP2~38Z(@?N}|&kNn}&U zb9M-hiBZfivp-g7CLRbSIrC+AwZcu2O%YV73fqH!~o{Hh>5p~pz7ueCwp%{G7Cue#6X(_dJzz|jnl&#Bih zifd|Cd=5hUI%i>D>&UO3ya(w5GFKOA!s&V@k|mD^EpdZv)H|qPGxaVDKj0e zg83&O)~`ym!0_s}Uul2|_N!hQ7*=TaEg^}XzJ4!|eWrUqnL+;m*vf8EU@=O^@@3y| z)s=GQIZnq#iXee+|LnN?=uMk!dP;G1LfKo^?!-n-E%{}oQs{Y#QZsgLA~*l0fQ$Ak z&H;vMnb(=-=fa4zr)%}01;v8pFrzk_t-k_JB1R$WgMzp2R~{u7yK66LpZU+Hivm?& z4ig}YfMiV&npoqXeJLK08N6DY5Ghl~NI6AW8Djkug_3BR0elUn4<-IcJG$Rd^EbD) z`enn-VkdS=`@0Hfpwg=!gJ+}?IqSSydoxpQ9}v=Fyn9CS{S`Fj;PO_Z(zV;ItShoh zSBSZqTx-hOs+`HYRLx0&5oH^U?O{L!>G;w@VLBl+?%AxzT2pt4Vt%1XRLfHrepYtVgqf|z2_2XVTvoQDH8p?4IogUr^Wi(g~cUC2REHFjU}-&Uw(vQb4C zE9duak2;CxqXKcIJHD?bA0Tp9F!kwg1?6upO5zi^6xK^S7-ODauSkdkjHK_n>yH(- z@8CJN^?v%V4LVWB$EhCch#^Uzp1b8P_=#T9736(b565Vuy`dGf{oh-;4<+)5YdGmQ zsL_0mZ}OLV2iIX-a&@iS%(h~_IpN_SXjogxG*^FSHdkvGd-(YHOO1xQCPk5YRtZRk zvDGv(6xiZcNq|@IFI^z@uqHshiC=PbDDAnM%{CbIrOYO4#BZmUnqI$vgn$STJo^Ml z%n?He3VqooGud(iRYE#k1oiQ1r7@Jm2H_`+C}}gzX%IuWCmV!*wkF3>z{ShvSfa$v zz8J-RuFPCN>|h$&9M|&7BJ&a+-xjRDC@=sf_ccJqhF0zPmNki+TT+?J84zrWzw#T?7d_XDQ70#Cf(JP`7P=IlGYu z4CtwZj^hQXj_Sw9Wy3s_jXe!-)0NYn`XR=Pv;)OURxnP+1C~4P@IGy>JM172FAWY# zyhfW~fogfV@&~eh7f-nO2{K_Qyz2g*ki-_(v5M>L8AJGP7&FgvY_PL{x-o=6t?^%z zI%>$9kU%6-z}!cJXr^J%nYbONAPiq15E)CEetyIAL`sERPq%^egj^VBq_SK zkTp-IhKt`oh6o8@r<0^U+TYt6V?iD8K=5{#bd7$z9}3@W&~z8la6A*uFDd|%ZQ^_9 zrv|d?bDpILsa-zUds(ish=eFD2w>fxhxlC{+B9@!YcFDWj)B!imj@;+gJ5llp@8Nj ztyYNJ<*UN2K;i0sV4OjJ`2_k98%9~-r)DLE@5QP8`2AP?N#D({XqMFfWs!SmP9v2 z4lcK&MoGRR&Zxc*TKx5n{et*V0S$(Z#3$Lt-1;e%~Q0CjvJ}H3#9oI< zaox~iH<$`>*Ri`AX4F@=`8bCPDG7_4eApx+&N8~4eIG9#J6MKEJW@(fb_)!)wPbVyF@hJFvh^cPppK%Ffui?5Q(EzCMZ;-8Y>H(nS>}l3v-hR&10k52@=ne7NoGZ zFO^#=Vp%qvHCZcb$%KG*=bIfKx+y|A41M2~y*|d}M?W8_hl_)d6odknhHerAN@E>b z=L}Rcw@QW{KCuZRFr(tA-NmvrJvcRh9iU`?h{^2n36*MK2 z^CAy9=KQp@0$7JHY}1LAf7W^d44zjYog!tf0%M?wn0FDT7^V#)H7S}jX?i(3Ln0+c zK@O8}lp@DS(3ioaTP|NF+>7RlwSscj@P?v2yEntJQqrs%2%lwo)U260CT@{>;Bi?8 z#uz(`Ix+hw8Yeeq`}z3+a|sg97j+Pg0{W`y`9by>pRLx_%KHfYEp%$EIR(So`aTkn z@BkVtvSMlM5)$Gxqm)`oGWxe(_Xlf|17nj3;eETmO9B3m+k6AvxnBkHqnOjouP(ht{s8)|44?KlQ@ZXdAx0 zd*7GlBKy2HW8U{noEu`f^aA?EBT&14bkW=M5H3>fl*L?AUFgY9Eypapa)T8u|MkMx z#-_^FB(zPZ@OIwzuIpq4h*5R?Sj5YFaCD^D!BP>fOj`~cvxkewIT|4g!`U`{u*otk zgvAWMf>%W)6H+juS&VvGG2NuA!^kJ1Qf?FJ@|1XV-W54v^>_%-wNYK^a|=r!v29lw z7IRfumZ=XEsDVg21nkn$tbxQWdrGg$x{*hXN-x-l?&&8?U3br=>IW7~ zkC}O#U$LSgb!1vSIwkAx|01pqA@`TvI7(z|cUW2i|GHX;NP%ii#;fSxfSAA7E3wt4 zrH9U=<4Cx8ar}z#j9fnfJHZ45AaXBDrUSB_&bTKgzy$D#=>ncGU``M7PxBIO=0TP( zzd=tr_Ha2eKJE$JrV@(=`WA zMJW#k$?5F(AFI~^-<|H*`^fBWJ>gkB}ojH@9>Lrs3`1 zTYbV~by7L8U1>PG4MUu&MIgY7)=vZ8OLPeDL@JjRV;KMRe%iB~nxJ5G2zxHa7>vYl z3z$@u{HDv_+~?o_u1jgHtjX%beMh6kK^b!?(_)8HEb;(*Mr6h>MAneNqtZ;WYw=%b zS#sl74|n9nNnmp=fMuB;T_nOAa^7JqomW=mnDN6>WN0>5{MvzIJyrI6sDmihV(_gb zR-wROEG?^RrXCN))!4~znFjLrJX_0V68<)+x`M8((7oz`5wu^&@y*iLF#J>C$==;Lsd;+FFBcJ`gqyU?Od@Gr{uOgX* zt-qVqqn!Q=1`pk+M=s{zrE{tOGblVAyS~NG4Q`E|>%xK4@E84CosDE-s~A&Jg}7L2 zg{{0GmZ_ANm7P@8ngTNfTgoEp6?W~DrtA5a<((LlC;Q@Tm`~tA9z?%jBy=JD05^A? zdf3E!BF%3l10tAH5M|p6v2lGn+*kCUDXjX<@!b?atEMdDYh=V{Sz`-O5MSWW$kM*) z>ZJ~2Moqwm<2ZeZjOkO@`|2?Ef4KUpfT;U!Yo)upV*u&y?oI^+=|)OQYC!3R8M+ZE z3F+=;=b0!PEx!e+ADvuTKz=ljVFKnZtl`|$KSvmTLz3Nl1qX~?bi=V7}cuA-$7-Ua6D4s`xP5H~i?VeijNI{ldS0Tt^tq zxa(#R*>fkRoZB#@bNAb2x>l{X`!=hnE?gfAj8b0oLjL<+{I{|a*cxzn|Mii>hiTkP zkehMqPv@%X^afQsoNnZ5=|e9IOefTcv_voX_;ZpP6K& z%ed>UAy|xX^)OA$GR%P|=77v9ycoCz$fY;lb7CHd7Qgw%GzW@YTRoB7O1 zkPB1-NS}kx8IS|ufYQ^th5Z)U5G?TpnCzf+PG8=;#`PVYa zH{1q0^ZYS1Suyc%px`(k&mq^^Z%e&A|OPqNu=`>tLcr`E( z1n(7RwWmGF6>V3P?eQS`WBwacD#h`nM1u4uEu-vM&~AQ8>H?}bSxk@+C(HU&d5dLOr_l*Dmn`}`;cMsDg|@x#)BLq5x1UPu zCiwiXRX*}gJUAOF#{7lr#2yFIw=<)KEmj^dgS@VPCceo^OkP_=iftXynj4R@X8(4& zWb+VwnqN`h?RzEj*Yf=}zjrud>D{w%pENRLcOTA2NX%ku(6AnYiY5NHZ$MqdWE(ro zYM#uCweKdZW``6r&Y}=34aJeX5x)MF4fR?JC(cQkWb zhf4+hlNXHS+j>H_r0#@T4eef}`ubr90ZK${a5M&TVVF9(!+h3Us7n9Op-lqZ1wNOTt;?WHsXRWj!K+{ zID?2(4jM`C84E>d!q1?)xKmCwSTa3Xt6|Y2XgMiO@}Ie=&J_2hpvzq2S->6RAZN2Kf+=mzDY_k#A>h!GOE%n{$mxWW zusi#8!DpYSr8Q-asa8>|Fl$u|dX=T&eDP<$QC-v`9TK9)aoSK`!#tgn(>#iLjdWjg zuwZM*pM~Sz7))@=E>_docZp`xocBp;JK2qAG5&s|SbB1^)>etqFSoUHzhF6cF~Amp z!Ev=HdEyw6Jw#7Z9Q2+P)2O>{ZK~$anHNK%$x8B)sd$2|8%-Pu95=VOpR)YaJSsno zv}!b^tA&^EkaYz=NJe~3f6gGesMN=_bm`Xb`2(tbyjoN)Od0?A<*kKZ?Iz83r!;j= zT!j25p)LI_lhjHv10|dp;zU4!xIR)1#H(*A{IKJilOol&l>Hkf{JOu#)0H6fE<==; z@g;H8?c;!V;U|;6&+@HSs@XK!iTExUJ*1+xwf{-4J#rF?HS_Z}(KH}rXj-rg$hy(~ z?}YH5yr3mRl~lO%0krmL zbvdj)9q$=l@@(yQ8QOBlRzLKs4{#DS61$Aj;BF3)qma7)oSTnU?+(2(_*F-SHf!CBy(d@cidlzBcFAkC`j1 zCBVt>&^Km96xzwm{VIGibnE7|v$&7Dzi1X0XIP=StTt5foU}*P_WCE>Z@+y@H*xW% z2}95nOM?cwj$Nzv;a`?dm5dY={oF`q5o9rir5_`rP1oCtG&RHOOW_0@%OxXDRLnE* z-hg*mQk@%o1+F_UEcvNqz=Hv5d~XK87YiD!io}5WWj4Or^P9f)qL11?hj5K(;`5e39T%S`}h8uGq^ zb=G&)yfxQ&b@3$m`8U3ncE}m1Q$}!feI;-_IX}K2vbt0*U@yHVa4!rX+e9HQH^=!D z2smxgo7x$AJ@~p1Qt@-FwQd!w_uxc2PF#$#JJKw+Ata5j!U{XM=X`o#=3?%4m9h4E zg?T`j99nw1&$I=qom@MhnZ zWQcjZrt=bFo#hjf{odLxzjaHbs||xeBGoRs5noYfsyq5I!2F9b8W5ZM#_ePL@!Fcw zaBu%7J81Pp#gFtZJc-mmyO_p=mgDM`@A%RC!~by~Jpc3W5<($5L`v*Nbuy0kki?70 z^wXiF%*Q;WJh>AkH#%F56;w=$5CMBh1u15V=M&q-N=~rQXoqn zQJ;E^%5JDN;G2cB^eME+mQ9q zI^T8`R2GeR2AzAUd5CKfbo(25>{y2E7uHjr_OeUHJ5}VC;Ev$blCy+Y-RHC&{1Ktz ztgS?Ru=M7;%Bk4aOqT+ULruCioZcv#U1aVTZI!f}CTIGk9*y672KBi7$lbw#YIm)* zd9#zscuBZxps|W{x>t1}b(DEL4^1Iyc8WtYW=b7USDxS|UC;qVEPlt$p|z$aGa1Ko zmQ_zo_hJ3s>u7obvP=6l%fZaC2XD^*Z%QAg0Xg9uS21J}RnHe~Mo3ist;JZh7S;ac zeiGOaUYNeLltW3Jp}qRYCG3Y%WVvA~WO}YQj3imOv2hLX(FF|a#FDwp=#>QEqy}Rq zRvWzRx6(&(!5k-9Wr^Ii=x6ps<`#&%9phn%JC=RiteiO@6sgK37hR4F8qpz@&d$}Y zc;$eL)$EkuszDAw97E?i`cu2 zZH-Kh&u=KDlQfb26f$x$1@|~N7w0#RZxVWMM1GFvMmrpvoorEv7%&Z%01xch zpm;u7ob2!`8z1T&IFxixbF3ljH?Tti+3lP{Bfcz)s~;-$8aiHjMa!PYB+`(hMIWS3 z9Y#GHTK6Y54&vQRkSz>vccVR+A7 zKB~Mw(7XNl+^P4yA>LR_fCQplu*ElZ_3ToEwvTOYw+!1k3`R-N5$|E+)+6eesOVtJ z$8>Gx|KZ#_}NW;QB-)|ufz&S|FBjxDhk3iY-790}68ZY=A$ye5k= zH)E3V-7m`h@fEPy^v?OG|3RV|znfI16<9pW(RtbB-;}^ZWsT_Yl$QvvIdAI{68~3i z=U?eB5f?H5PC<)KlvmFHbD!B?bXKI=dzKm;$VstIeoq^5w#mzv@7_DPOKEu+MIbU7 z>-zC2VT9U+DYAiM5BE~lx578p?fS~Bwd&D2E4D?;^BcyU{?%Q;#jRcB_Cqidb2#UU z_-Esw|KIGheA#hLa_gEL*X9G~vhL5~t3g(y`L_d0IEZ zv@U4gnO`$P`APb$JV!nZPRd^HL}tGPwcL!-dmTRi_WH(9@bhuKuJXode7j`p#mcXJ zdYo7!;QHwTySh)`cOclM*OG9c#K{4KTR1vkKGw=6fA-j0=CMWYJhQ_*VHMocaANp= z@4D5lO~%wh676dwuR_oRyh#5Z$Aws(xgn{1Staah}AotY>n{c;q}o2-cS0hF~le8DLbMv)M(onibkjG;*}nkE8GoLAD*+2<|yH}b9{k5p$0 z9SsjYc}q?^{M;_@@34bZE;%pXAZ%3t9;2!Zs~m7Ur}xeWUb-0&c()$3b#)-xCAv!7 zU7aVZ9&m@BcZSTQalRbjv2TKB`FNCtTNYh5h|^=#7Ea7ZBFF3tf-%$SLRVYqkcbD| z&uOyB2UxwI7Xc*lG(~zkS^&q2Q+ro3i*)MFGm(B;k;d4&3>Qo?0T&qakE>1&ko=p) zrF4X7?5v6JpEo{v|NO@lKg#~Fd-W51nabW(eU@Fjk4Y&12(CYOH<2~TgSJ`@dDwI3 z|EVJH~;={twPgR&s&z&|7Kem_(_i}5TBr;JC# zko0AU*L6 zCmO(=zXeLK1%5h8XCo$bt#H=sd^YiNYpeH4MkXED8}z@G7Duua+xU7QjWIm8w!-4n z(%vf)iP54QQdha8-w#;@x^vX0dBLW9c8#tyJL$IUu@W`+^$16rzi z?sMSfDy~?z3`oJHQumCf&winO=T@_nGGL-cRf@*hXEzFN9NhPkt%?f19cJ{Ke+rHc zE<(MwCVdnWPC4`!df)3)F#g36*AOex6^G2t#+*nF$J`I}H40%sRDoSoI_Z3QS(=>X zG-1GuW};)2GGQryklYgVFl(-HkUx=5FY!l|0ov zfB z!V>3c+CBs-IjoB{VjgR`;X@MKHrd*ph~t6*@*T@(XWoCnWCg0xlFHg~k6YhW^Vf(P zf0|Xc_@Vn$orAMkPw_$6AX9SYzjC5P9X8VX>+_A@7U=>(dR&XG$1+&U$DtG{3K80F zlVX;s*&3>%tpA;MU4#OMew4e{vOTv5cLLkbTZ$;*6ic&)UN${7Ya$vka29n?T4r};-$02D9cz|Z1-PP3PSHsL6J>Hprk)c zf*+a)1gAu%g&e6@S5Lkodp@!Nx9TWFVE{F0tT;RN_kGg6_Kh4=lffqMEA2&g0Z~dd~-YO{LI78cH&}`ie#L8QY4ZDLr zTTh`Jt)u4IT z?BKUJfE9AugJlhvyW5~$+hYg4de=(pjC>7;&9b~=at-8R=Gp#84WWv(0U*D*>7^M$ zf*0f_31RDaKYndn}&I8jl|1`bWKMc?21D= zOx*XG7KR@(#yS0ydhnw>ls8QB{koguxNiDSDIx){JMP+F;q8EDisgOywhY~F6y zu;PO)be71!#=k<2QP|0y#4;KmyuA~O7z2rE{V0qzao#1U|2RamapY`1pzbC<^%A}v zTkCE1NfSYRk*5CMuxmN9k$HBJfwrCKAt5P2LDvKGqKR7Uq@wG#gy5f|vMRRYM)-8N zZ5$846rKe0anWrV`j(3gpR_VT|C;c2cuI9}nP6`3`>Zmj+Qla+6d%)ZVL6!%9uwj#Awn z9W@Yjf>6S#m)wWzC}GPbSIq+#t2BR!t1ry59U&~BdD!SK#FPIA73Ro(U+{C`&xn7Ls;Xv=O z{NgWRHw-{phkKp@5~?UsSaLt?DK(LKwV}c|h$YuXxkZ*n({$EQxX_qBSr|RsW``P9 z4}oi#Py#Y2DA{OJu`J0yeMfC%#SPd%s#H_dHGp z0mvhRzdJpK=Uy_vX6J&=WsX6N>2P%c@X6!2U!;K~m9U!NjBB&{1Yj!Cha#Fsz-~mv zZ_um{J%$CKJQ0t2CNLZCpEqzRD$Bm#sqiPZ;#+tgy8S-1!{h1ldV52UQJ6L)IA7Ix znOa>@I7aJx_BG0=>JliE{M!Q>giUB@egGC_4BRs>ZKR=H|dh12O`F`W`ZtcaB@$Aqp`J)L4Ev>?wN6o5vJ6)!F5BDedXeckH$9$%j zaFC9W4chgzPGI325FH49Wv2=E*Ol})oqiYt$?^6+UsU!!Yw2tfFhJP6^93YbKrH6> z+i`xW+Jic48RsN~y-NIFw)PtU}E>s=#{qGP}5nARo zHsneF$U1RlRhPp7sW<}92vEZM>4zl}CYp;mHE36~K0|nJXz%6YqF1d~UZiZ@RS=4= zf~7aO&iB(;s~&x6=ay#7yfREv0;EE(zVawNzufj5uwi@X@qWtvqBVezqjar0;<6j< zZKqD`5)QC88~e2_r_hSe$yMc6`dFd6R<$zMPI`^1+|-6;g+{inU*p(GjKiHd3y7 zIz@WLJf7tvT|Uw6Cm()pJLI=tpd+n6=DWXh5zQLZ{io$rz2dSzv~%>UpqO8aG6a3Fr8X$|{GxG)Qgb9{nNgIb&#I?Tw1WyywR9DF8H z%!WL;*eJ=!&Iq%i`34g{&hkp4`;LQb>TRG5QFP}O3wMZn2B{M#*+#M{X%H1SkvO(0 z$qD)=u^%*b-zjIZvX1Wyn|~rZK~p4KtVAM4gQ^ep;q3R-EiJy{{d)EBsAv37V24pQF!BmqbE2CtoQzwsL||r0GrHq(5B*em8UE zAbIW1G7#WDmf)Mf9iVBN?-SxTC)N|-e$9!IU*4M6dGxo#F=-hyPYLT53>S7CTP{S4 zB;tNpU*0#zwGwc7dLud{5i?OTe3&~2jXj=;yq-gOngxNmJ-}c5OCd2vR}SpI-f+o? z1^Ie1Dn#5wk|wNIz+}cVVlapxg%a$Glc0n%;SdiyS&7hX72!9shQ1qXT@4(s0`}dj zZTgMLnWhXDB=LD2AEPJnlg!TIkgHGFM`eUVZ%;AsWoC*_PE1&unv1A$gR_^~bbihF zU8<24tr0E<_MZo1X3w$v`WmT#4uy&OJ*#617}mI1GB8&;`h2*vqGa4B`oiY=FGSS* zFwQM(Gyie@N-P_{OI5*Sd78tn>{kPWnK}<&7S`)F8V={bFwbUb?dbo3c|wuqLt5=I zH{iWy=a*q%xm{^H?TX9ErQxDC?(gSTlL}_}K;jU(B-kbHJsH|}t@b~?S~z=V(v)=} zzxfSVW5sR><0-=^NbXq)Hl`M4=*TnoqF0l^JVFw^8n@X_ZiA(1W_rBofHd4yYltj- zU;}IlX#~6lA$nn-T#Qc$dmwd6YGssW%59WtqWSz@!DR7Xksj4-ZjNAmY{6t0r}+MA za92_%)&PN6n$Mkov;;eA1dK5reIuF|+ya4ZZ*8|A+K3;-B9nUBGw+rtgfovY_O4&Z zHC7iiAau$3S&RN;BlG~56RI#%x`i2|Y{<9<(zBN*pGUZ!C7Qj{d8wfeB7T4exiYE5 z5tmnezw!i5&g_6)Px8+&rAGlZxn;8=kNNd$biVL(-vh653|Zphhc>F_6+dW9glpc} z8`VJ7GP1;#`Rt#ug0!1u3wqQ1U~Ckmgzil$05*2KN>A})x=>xDmUb_O8`aGl=TB%X zz-6n(Up%J>r0E7|Ai7i_CR>W|`vUdMD0bqExJys@0wt>aVw7;*-CN-G2>Pzq^+xJw z2qEE`^ix33Q)6}B{Bc~o^=EK7S`OvCvR33jFTeH1KjwI82QRD?m`DdD)N3_;F>MQh zHgo=(;Sz1HgXD=nerf%uR{ZZzhmGX-kWL7h4MzNXRHF|_m36MC)Y4^^_CYaRdfu3- zMMYp`wkC-`Bx&o!9tajl3bmk|NtzDF` z@{3!DR;`9#Exb}vaWg(y5a+EgKXh__F)j1LR6$RLzcAjV76TqA=rZY2>55*eO4luS`G!3_~}lcI3_&H~#~AaRdupipL9V-E&i zEzVTkjyv1he1`YOe)WP78Z1}e_|Q3!hp>5g#^*lphRK}!>4!cJJor1p0upD&E+%PmOu(9e31tH^vw`qt#*HxrXgwhwIi( z2(EO3CgI>-?S02mG=X^Hvv<($aQ1`;8=w(#7JlqqpB<4G%$StlQQp$ik%gUT1vgSNF@e$nM#$%=WJ<)YHcUC0C!yCFXN0A)~mCJr0$zxQFGqu_Y>N^0}WvZ&%gx+t;qL-T<`{a!3A|d=WVy9U*`d$GEn# zd@clAVtyk#3U%aSuHI?w`u_@F3!H205>QX|O~} zz06#3d2=hAVvbmQoSaRV5t%Q(cztXC>Jb+6Q*AE_ztK!=Y=3HClc9x{Xsh+c>ry@j7D!6 zMT&Nzs}=Wm56%3~T5`GN zt9aYT4uZfX*&D@E5r4A-WZu(Op61GAfI z(1@Rxmm1DH8F=cm7JvHf^%42)0+i;3ytG}_`IqRG@0kdFOE0*q`o|Pwsm%X^4gMz= z%E*34*tn>|N#%rYif5e0&E8;07Z3OFMnl}uqeD0b=*4(&>T2qSy4BlZ(%@cEXwmZY z>RKPLNY1XW({sc`5;-XSHu3-HXlr87Hg*UsTULb4WNqgXs$h3xHnjvJxymgv;E7;e za`fQub-MzGZNniDk_Ltk(<&b!E4=gmeei1!p10ryEHx7 zUEu4j_jU>kfwBJ*bXQB6%*Ynki_>Hmm)!VWDV;4G5rO+(EPITcGM<0!^$j(#N7{27 z@&az8iL!nX=lD21YWUPK)gZmL?jSAI(Bk44X;ni(u~Zvyj9kmehzV2Vp(R5=+fKo2 z>mYUS{@idj3gX=pAtzd#hR*7Ec^NX2BE}ef%K=d%wbRm(3tbcW|t1u zl8aKL0B*GWXN!zuT6FCTv$ z%2SND=nR0@($S$uC1g_(Fr}Nok;+5I^KYSr4z5>|CGh`0mhhKdoRWf+XL?QsgLkDJf&{Qk{A2ht<9BRfMA3$rT}f$Kt>J2KpRFDB(T07h}WRgUpP4A zm-491pon#L+p`>zz2OC9iakBUpb~;a3CR_UlS7hM(272O2y`n%0qbyt#U;>4%62!O z|LNjw;Kod_yapCrgG--1m|Y_z2iJa(z^|Crf@M@&=m{I9H51 zuO&%#M;~j;X+`Y;`h;K|&##0fe&kTxDrPyamfBt8(HJin46j@;RVyNMSsUnkrwekF zB`wo-a1@W6$t*P-OACHlvOy1f7QhLHV^yY!^9Hynx}%8^-n2P<-(wOjc@QyDMq9ru zGEd<7Z!UoO+8j-dpt?8~OJ&a?ca)KL z43UVpM2e%#Y{s@y(I?_S8hd*+oxZcLK>p}qZZ6=_yvIJg0UYcDbfMOqf0E?yD_$az z34D?8d)lHLno<{c=*lJ}Bpmu(zsgj<>=J3BWOlV2kw{Qh!a z+wTtd|Jx(~gRG0NejB;%Ze6zbos2IDc_&rFtlr5BYN3G^s1!0GG`!m3*Cl-J|a2_+t z7UT--1@R3*^+-s|+q-}pCQ~<<`IJE}EODaR4h~%GJAN7|rxTp-CxNne=WS`6?kE?A zt5l~>3yh&L89c@sNL$%AR+ME}*^EgQM-A%*qF+>so-&8W`@aD<^HUsZq@aPcj}0a| zI{HHW^-BFBRQ##)$x*M`D*9<;?I{{~Q=Od{5fM>CUjWe8udrp1rDFffRwA5t*Z5cU z;(s36@g>)&Zr27v00Qt}$a}L1n0$AVC91YI_8nUG5X|g**KWBdCf&o4pyPtjyUmkN zkqFtTv3VhjZ!V=+rMPF~$y*A(_g(e$K!x6T2VG&jDTlMS`9r~xRwb|QMA@RdJh_LF z;*Ddcwa@=sL5Vof{uk=lLO;#?J6{gXKJRoQSaz;2F(H6S710<#0|HW&|9IpT-(e^e zcK{5eL=MyRK165r#_xuWiM}75osqT{j~a&uY1=4tP&oPXG!>GxR*I@rV!(4ub_>%G zaR}i_BuknK{4T)VV<2?E!!N^@7S~QlOyk}4%VXG4z&{zdn8ay(yh+ci%vXImySofQ zq9{7A7R6Zel}26j1)v@j4YHFwiNC&ps0E4iIZ94@!#u_{$g6zG?^b3Y!741%(pUi|XjwRzws+oOqi;+-&yw_S}-&WbfimH$H^ zqdnV>S07w^IbKU^u`bd19Y^_c7J6|W`%NTwjp3etg3V_$oU@%x2wGDDu!&y2rPB&F z#r)55%YP7qC=a-}%00S{?+7==iaS(lEt4xN+`o7SctheMSSF$d!Pf^_Q+HyPTD4Cz zEA;~*zbOYVzj-k*YJx3vwaB?_WF}dVPB}?Ff+C8khtKXAZ0 z9C%sP?oD`RQGc+qGp!v^vLf}1)*~QRDYBATY2{pyof$YsjqI69$){2mI4g4$%?mTN$+wy1 z>2!%v_1kJpeoYxn>IjE7G=T-kHt_UQQ8bNoQQRHPqIR`XNtW(cSM=%kl2$9srB~$f z!#2GcKG6EKmQXjRReI|xBqVznkNPvFCvJ)S(L;?#RI70=_V#S};JxKF9dllpf7WD* zEdAK~!6mU^)x^P`n%~f_ZyVn~CI$ny?)+aTx}tX79Qq{U<@6q_=j@MS+67 z0V^o@(t5}&$a2}^EM=OSg|^j9i%FxPy5!UBw~7_d_n?oY(ZQc5LX^4!Oji^{x)h_z zY&JHT_LDKwBX>40s()nz=o}f)j?ZMIQE&D_Z|}|nkX!{(^P7GmdCkC_t)CC(xH^Pj z(b~#!jV#SE*FL|`$Gsn1_X}y#*q3`}c2RkcnqOT3vuo&?5M;uQ7_-BIHO@qos)T`O zj<}2I_U@g^vwFRT?)FfL+$c{r{^A4$&?JQ=tB6w&J0%~fP{82=APpfC^^$wp0$W$` z8Q}(SXs+4MxiWKHnLc&#qC3prq4!&|4wP#%vnQm3jgQwTy(ddQ3E|TD4~|9TNAA?6 z&knw4Z>3g6DpJH;oY28}Wm7cVG61=P^~brlIl@K}DScMg2i=Ekmj0Bm;21$y&*%X# z4$zBiOAhq|v%f#>lo&r3KmE~dkK1JN{NjGR#@_$!VXDQnX3_=pudAWL?blwwwG`VBv&e{ulUUFybA_mT=c*#)T!8I#6bZYeXZiSKBNQGo zpu!jniEf41jtbH>&kVnqGPB#vg^ZGLHvLA~v+iJBl)Iz}cd>4~ai@9X_0{6)R- zV*nIo7V#1wkoZkQpM-3)Lk%#?~u{FKYH?b8fzG>MKx zspw>e{+kFM(E2w4DbkLwtgvQ3Oc$bs-yA!A`}DU$wBEU`DJ)aNc{}-q z^As@?$TtP48P!uot+jq{?5F9s;iS|`aoD#vPHh1WQfITDNZQCV^OJD0v6!UDr>jVR zS9=~M~!6ylwn6F5AFzqyQw?la z(?%^OY%Q`8+dW0DeM7E*>xi=7tI$mw|H`m8uk8mz#xGU&i!}q=a*xPeR93lgXT1fj zYj<*dGWo3Li{u`8E4so=FcEKFFg{@YZ?fb+T^5xqT-3ONb!w12g;A$a0vxxdgEy+0R)Nl=^fOT+}W^eL)FFm!6Vv z0TRIsW=(WD0^@Ew(S3eAK-DhpT9XqN=!r~!Yo$o)8M*kfWr8c2hpBk>;EOy<)7+>6 z0HP;8iDYW%d>2K=3J@CrYKuzPM=$Q28Wck&G`+YTPIndSH*}{p~8jK z@4@m!3{jL;T+1tz`+ltt64#8_zf(NqEnFAD>5+rG0aY4M%(4T$EeW zwqfa;YxWh^$5^st=;8z#_P}@}yr($)11#;uF#5CMkaJ1N;7)n_m*n&H=Y{f@ccNKh z+woDaqhv}D+02?%=rRYC3iqaUEbH5gS3K#lwfLjn>~8yCx>j1L8{BWdjbOJXTS{mF z9`+aaMdG;nH>3k?zmVaInUpN=;ZZH7ZdT~`KI)_-1Mp)lJ5=Xas>B)W`MA)lO3;o6 zSVbiTSElJu$<#biIMGO3roOPi>CMIhKLc`YBu-B~)&tM1*hpf?&n%otoCwt2^9@dH z6EGLaC$U)qb@i=;T#n+c?RIWs#jK+aaQ)GSEKcvR>MjDH`a$Arw_aZ7wI18S?uWs* zldGqx?{Ev*!>`^*xl2{%-RTG)(LREMh}nZJv@B&)fyc_(oI&TO6n=f4hk_gUgS^#{ zlI4;5!a{Y5Zzt@NuvHP}bc+nzw-J%cF1HL$acRX+`ky(wf}IcID)s~D=4g9)Vc41RL0*ts;O^&NgFt|g943qNhvik$6K?&V!>Hq99!u;78K++dv75MA`;G4CSQYg7g*NtlA=7?=!m(L=Egz zRoy1opBlc)3c$glOC8DlIHfGFYzCJ>C7}*o5P@b&qtT4t+L{W-?(P_;coweLFV)oh>*_8QLowW%4P6dol`Ti3Y%gy593~^ivnB@kl@DJ` z+H0~;{_1Yi%(~!} z*->$jn1DB=YFEyK@W+GCp10@63Ph7AVat&2VsJp}zGleW8s4#amwdQn~XDV)&@oiKRt86LZ2T0QqWb6lozSN#~{tm`d7~ zjVjrEjK7~eX%YNLV8IDFd*hd96c&}U%}`$; z8fTvum=@Vgd+J*B+{P=q5lqq8m9eWrrct!&G#bn6ezFrTR$-ooFFrV62&Bg zO~m_`as4?-3Ln~s{A+`s2D&k>$tA_2exPV_amC?gIxWI=vyR5Vpk=5>t&P7Ubs9m{ z9q(U5q#W$}`yG3-W#rz#ObMHRf4`U4Y2Yyj3?|0GE<$Qu`|pqR*~k!bs1VLh7*^~Z zNG8{V#9&ILjRz6JBTzv6lXKlg0UN?2-ZIGq<9JSGu#%YN?bU9XR&%XuLAL4qZqz{7 zZ_}$5?Kpmbx1XfQOb#VF8BGzpDEW8c%2->BJ?`PMqup)vjAP#E`)m(>=yB%tx6?B# z<{|a&&pT27c4lH0>ft8TXNShG`f2Iu!A7>0 zmX^fbTCdaZiH9IE9dwwY35p)aCD6K(6N~7RFGx5k6JyL)hV_%7ys=9{^PKfUPHSFhFQoT^i` ztM;yPtX^AmLX!zrIcV-yvDwxdRpI$IT`K~7B;*NHCsM=3^AmQDExnRV>sy7&ITm6= zQEyUL*^_bEA(R2eio_vpT00zJyRf7VhH`K}m4=yJy1^$p1$_&&8@87my@=nj)A81Q zT#vwhC1%iXpU8rb;#Xv@`63d5M&kaJhBZnYCVd2YYBmwlbCQFuJthbD_V`))7k9lxXORNEqx&KT3WN5$ zjJ=|i(+_Py(oR(a#moDaud9PsX`cluo|7`@wX2OPlCW`5Ub8a4eTWE~!hUnB$f!ol zi-s>3sq7}bi1$FYlyM%FvF810bSJ*Q`D{-FN6W>f_wpm;+h+*_CZ7GAy02#hfdBrh zA@y5FqZGuAr^F9^ubx83Lz&8CN8Ec5E}=oF4s=*UI9LMeM_H{p0RPiBZ7g!F8wNTO zH;q1!3E%S+xbNQx4v1cc?S=LzG0& z9}e6jo?Zup+W2_PmM<{XxHGmMz)A#DHnA_+Xx=4(r30qmD4pkw@;Y% z?}_>_Mxh+XrI^@TM=M&v`9&H<1Y;BJZ)24mCH3>OGMz-449P{Il2lsfkRa(~hR>uL zE0$$(h1+N2%&*|NIX}`<+g0NmsiH$^+x-y@pr%9|s{#zfV`Ygi9dw+=OatpheEl}f zlIVo)Ry;N{1nuWL8;rW+t5g&xdHLYeyA*f0v^;4JU*+z@N8?)&inAnBp-rXI{uFw& z?1XCY)wiZ1-lt?vJkI8J*#Y7A8s(wLe1J~}s!x1*@3}_`jr=l_+|VAJHBa`CtdKxD z7!KFd@uc?PLhIIBEhdYXf~Ebw?TS3J@)*mq5=Cehmpgd_UFNcx4}oYB{&Z{Q5rqQB zytv$VKW*W0F8JrL`XAKjD>cBIU&MCg#|m5d-mbZ=`QXt_{huZddqTY2-u>EBk$Sm7 zRs{UN+}3+k*^|md81Y)2f7}8<>1rRoaU*)$HJS7_Mdpca*R8rL-vRXx1AxLwzmNcAGB%BKyI1PP?h# zxQUtbHCnQYtj&b@pmFvdDI)IwA%%DbbQI0SrHBO`)eJl?g(NCgeI7^D0tG!$gl_~L zw~cd5(-k8^{OogN0NQ&{n6?#hoS2dR+PqCaTS9ciRo$Tqh1<-7(vsOwjL+ocENs^_ zmm-&PYhihJ+~RdKlXN{2T#iPlfD3=7^qz56-#*f)wWCtqGW@B=F5^<|u>@+lfyBk7 zM?I^uy<_s3cQY}-bE<1Lx%H)(GKjsRESifnC62I95WmM25h~BW9o!}wvgq4J53-CM zv(F|2Kfo4pZTaSK*?waL=65J(mLxk}BkkVGI@?}&r5-jNcRzoGIyGv)4hcH~al)k0 z2A0>YpN7&Vdny3CBuxbM1T_L+d*pAxyHq50VR=qvt3I-S|Daa-b8=<)YhL697dYk@ zydo~pH1M5Op&a9%;uif>P?x*INdh7-staJJSa1s{j7_;nz`#kjg|lwlD2s@K1*wPg zE(c1kQ(S^q>OKL(00hgkTejI8997#f3%OZLV!acn z+DsCx+d-6s9*mi7#ZIg?Ae-G+uJqu^TStf5Wr@lFPTSc#kKJ^Oi;~c{)()??){a|+ zGK_y}C;%z6r{`lqaphd_;E=N*2LI4520Iq^L{2u`04CK0>_yS?{%{+o=V4*{q9R^^idgCKei$7MZOOX{TgI=`3Up!oIhb`xY93kzEI z`csAXOq)t%m3vhapl#rka1-yg8NlENY9KveOh34RlZ7gT4DC2K8Hql&^Pk*^*& zyp~HWUUAQ#ByhIOy8}|RW?*T%%gf|4H5 z$w8{3mjIr~BL%AXm>;iF{t-j~L{NGtq_fZW0FcwoY`+sPSLX+4X<4ri&$=%pNnqHa z?&_KT{(ynKkd#xwm;kEG{7#-YB!JpB1ri)LQV6w3lPc@rQ6fZq4nNDD7DAOwli8MM zfW1Yh4Ybgq6V@oKu5Hlw)O{>m;F2NC4^})pMM=vv{jS&LD@6DDv9-QeB+o<|Lf5;r-@FYl9hXc zuCV)coGI0Ulw;5P3-_Ul*51gQCSk4^k6`C9-)!}0J(^HtqomANu)3=e_A%b>Lv*JVl^ zM9{x`0d7PVTXk2hE#s)&UsRZwQx1z&Qcn#W<(7p9G!rCEJkC1dqUWAIsR&W6JV`|Qd##$QwQDlvZG;JD-SI1ffeNCx3dN8xXF66 zy{>K9jC(C-NY2?a(GK2Y4t9tA+CDsCPGlGfK@AQGIWvbE_~?%s_pU$P{GzaPXehdg z_4cD5T9@EWOF<^z)LA-LiTdBCGpNG=H#z1aF1_P|I0`aL`JW;4FE^GGw?hwk>)}HqV5>H4n>VriAX(IZT8^XAoOZ{1ah`Ziyzi?7+ zGi0fj-;XgXaKmQQmqhe)DLDrYg9Z5RLnA4GzJMGB_yD~%%Mc|g0Gqg|AmSct~ z@#Rl5fK+JeJCb!(dYWFw2${D>*N)CoB`8IW)?cGAIH#@t0d%}&>fzB4-PCW=C>Igm zXS;%a@G|tem^hCj#AAA8H3_@*%q)Cix?7I>lS9eSPFjJ73U4$~z!=9jhDw-cgW^=-1L^b{iB!ZJ-(E3y0N{RUUq@92KRKOYUQ)fI;$%&Dw?>zU~e*-%xjcw2 zn^_(%1qEG#RkrwTL;S&=5`?`!aY^4BP66W@+g#~m%^aYU`0}?@183;CK`-`JH{#%r zk46+Ks6o#!dD-{UWjCwu*1oo@o%0~q{#-y+N$EBIV$v(_qtShcjn4m&HXa?<{6hv%X{iINxYkvIuxx%Vsn zGz=|ue|mSZd4NyHZ~`RNJEF>2y4u(lGO=j z+bcU!63%c)qLz_xjO2B)@jM#&&hrVFtVKt1BEWb|wD&0k!E88FSc+M%dc0pS+GAk@ zDhxsX$MPAnM@zTcrd-)rRiZ^XSC;_CLf2Dbsy=ub_0GPnWx;vtF(!LWOV2g_{DVQe z!}oJ|Y}1@5O?Ij&>24=XTLVuO;A&@=@IMVLEiD}mjxst;ZoiM_J)?qZu~})>xH%=c zd-T7>TRrz$d(2*uj0KX{xVNiih&+v69(WYk;Qx~gHQFTa5b4zw`@z*v*B$r03G%xE zl1029XsUjt--**GsxbFD0Sl+2#U!N6>lT{Nc{}Ocp#R+k=b%b9r=0L}#P{`;Nm~21 zb}2z(6hv?dOnrYQ<>2CSb%A_w!^dn9O?pEXlQzyc&}R%d#P0R!kBs6Y0UyJi3>kwqqahU5{PAKX?mgaa=?IWBWZ1+sQgTz;0HDu5%rCG4w;0uSp`!!t#EO^~va zpg!X#ejhjJPx+1KblEx_%5&f=Nlc-*np17+NAu>|>1_jA$f}NIOGq%+4hm%+eJJyCzh=LOjJNGW9xKQ6DT}^=jVIa070?W8f?O=Ag8^!5N zYtt}e#cA&9?6ICVNVsiyVqwPiY+IOrZGo!YYa7p-uaku+ONh^h=KOy0?Q+_}fW&Fb zy+k`vlhM4jTD(u>EGbF z{~!{q8ECA%dbkKRs+P2Z`T);1o8LOBu0B2uR;Xc#31HIq@mw*8j&=}0O|;2^9iD8k zW2Evd`M?x|3JTk;DD?`4BodC%KUCRza({9}FnC6o5ugz0!hJoF;{Ued^6VJWf`h9q znIS~Y%N%EgEaBMnLuFh}4at~Ttmqz^xkMD2rHjH~dXBnP34Uo-1j{soV%Wn0`RU`U zrIXQ0Z3!3Y-nW9>rgzd${^=}8L&N!huFpGk;~x;2D3bu$PxU_uBf=AZWfvEaNlXIL z@cC~ie6!vcJ)b>f0F#KH_8x^PDM;PK03<`bg|$IMu*Y&iDDy5J5uyWlQ8GU+`f+|HA(D#=e)~ z80i1o_;K?u?T+-frrKTHQkZ|+-~X9m$A#Pbc#?&{AXp(da3nc`T#_xxi3IbHMEI3C z3JuaJ`goi}!M2jIaZq=pZ6T3C;y!XKT!zllcKhqaZ748S?lKrD-QlrF#1v42SI4GE z)na2kJUQl)KGsVMmQ#0m(7$liuS>#o87k#7a0sZ)i%;!c4)jGgdfv^Dq>Qe$E7SE8 zxgeV0Kr+ohS*2bD5X{*r+L5f8O-P9zqc}!nRD0#(Dfgy3v1X*KAQIT8!%BmYGyBNx z0GXWR&?4-qSg(8_tIJ&4I1XigckLv>?XpQzJx+S;{3FJcU_q401QxP_L1<#t{S?~W zKMT*r(?Z-as(Bz7bJJ;HuK<}+MdA}fwlz8c9v@BP9f(ZCvWQCV$|-!ocK>`3;`%hL z&F6iT{rG6!?P{v*8!tN=G3ifz)Q&<2Pk76V!l9G*uGm{0FDgc0;S#3ZkOL3jLPt zbc^hGhKfeOZrI8xTq zO^JLx*XZ8v=!~Zu#!Gd!%=rwUj3nf?Om0s)y!EK-isu*{43D5LM$o9d0UKQ|m)yTW zT1*nEKU4!HJtQ8ioPX5ME%%-%U7EArpx!Zk+4b`J;tzsfJOGUmP&m`)qgUkKQ?biOwff)_y8;9T} z-FDEgB+@z9TZZThtTTS_b^aV4`b8F!ZZO~7mt!;hi>5ib`d%vxzKZNUY{&{R6Q}Qj zkAvSs5l3J<<77?HtCpd;12TX5)IONzG+6pv`otQmf7e~wiMKGQ3I`>uy6@kEx6I<| zJ&){7C6iHuEq>{yFlLgOxf`?^kO-z8o)vx*zHDRDPjDA;TKSyd>3UDD&_tOlsh18y z!q@szCf}C&Cd{7mdR2b$C?bM$;~P0+aQde2a3}Vos7U(n-@nzuxwbhENo5}AJ;9C( z%&O_GeMG9?^}LSjHXg69uXlZf%?2?s>Na)P9yA-3Ro;RhFL=b4NJcM5jf8&$oPhD? zqJFNjiPx+9i|(=YJmPpU&1a%Vj3`14h3jF}vr@aB&BHJSU*2U=s-q z&Fj4GtCl}84KpYQM9J=w|Fs%!!(_mE9a26`uL2C@S}+eHjw}^Hd0U<0@;z($cy|S zX^t#k`4~K8=*2dN2ztn&zJ{t@_&m3J7aQe z5FhzoYzp?x=VGI#iTJf;@;Z>)YNWpG%Q(KJ4~rsb=_!a2^$#I9d4t5Bmv`q|)FSv1 zVsBnLP5_4U>a0#=w8B$$#lt!3U5BW9YHkrzmEr+sY<(ej58j%llbv8>{;@OWZD7 zMEC_tHW|80P%=KGsU?LugC(`%Gqsd#5wW9qxDC7cnA|=h6XfUgTVh`F*UJEr@2L8e znc)*$lSkRiy8IORALVF?&2@3(31WTJlnho8V+(GYL4ru>7vouPQ)N1 zSRu{*CTfn=LZqg1l=82-3Vo{CjDc*BF~PHPR(UcUvB&JT4*uV0^Re{m^eLK|``V76 zT?h9=w)QE%=R+^E-deixM84S%8xgZKZ`hsS>!LuYDMF%2F#pwia__9AgM`?%ujR{+ z2TMP^3Z-_XYO9w1IzTk&8ExFYi;_w3IH?I1xy86m9wS)PW!s+YzP{0|NPRtQU#6KV z{({meF)=)``eWCqGEBM<$MQ})g3Ug1a_or)uZ38!7!XZJNL_VTSzY+80T5Ye z2-h?tJ(YV;f3`R8B|08srDpJbjd0R$lu_c92REqsVykH~_ZCI7v9_D4#nBSCiciuLXU1wL?+}V z&5V50=jmsH`Wv63>#foJ4-*B{8I0C)O&oXiS^*VD@@?Yw%9J7x_8vP*^)!ZPVibw! zCLegn`{~HaAWAw2w#`II<5$O4^@;vUn^-xPUk1b9oeFbXn)4K2ZmbRc;|mmgpT`p? zXD2{{ebrnQ(D##q@N;*64;+K14idR)e9ifgCp# z`byDhs5aw+_HlWlt$qz+L=V_PPo>6O^+s~On@V0E+;lJMp4lU;-{6}_DD11Fqbu+S zQiW5od)wL3t^!_+bl^qWOQug@|hS9q?X33EeIU)qibjFn$!#tW7{N`976#zm2?nV|bx z%P421EI08Z+%tB}6v>gm0Blp0+3VztNTw!39#vG1xJFY>M07EEc#79A_Ysdg6H5-0 zn}Larow$seD%lY9u9iM9Fimhy6ds-(KEIN`g2+JmI^pSAdq zkYSJ(ZB4NX77o}JPdGK#DeT1{de-n1VElE=ORirGv8479dnwl2uQBtix&r{;=a-tC zflhlr0rRytv~0NfGuD7-Ue)VcK%AQ^cG7%U@p<#=jSOn~K4Aa#Hj`?D{-4!;pw2@J z&+7d}s6Un%6=V!e7Cl(vs|FKwBZZDP5#___%F|F!=BDfZ+J!?R0aJypyqt{X~=wMu2!d~92KZ6}wCv}d@BEz$=DPOnw(VMgziW&|d zsh9GK@glJ4?}zeOgly1V5_Z#i@@|40R!_BiLci3>r;k0xyKxAci8Xu9=@iGuw+*t7 z_oHHx@2euE@)mIK#a2%O_SSmKTf_(xE%9PXR=C~zFy|QEzC|^fj%IGp9%)aR4g~V4 z1%8b|B+5`s=d&$+1KIY)2yEA;rfe1_L#m$wL#QjiFqRCD-2rW|Z+xomNH|l*D?AOV zlubPP0gq7$%V`1oRl}s`vbp|EZ`5gBXBeKJE_)s(0z-Vvl^M)G~?YZlcVSwP~x3flYwgYC@Ca zz3^V#mZLtRaN2SvK<9*{!!*&4jAuE94tCE}pAGmO?bE9e0+T4U9|E3Sk2mj9>Ows^ zkx7#ac0UU9()qDwCd;#{pc_0Mw>L%x{xB8vs@^+U zAB48vW`5VBwd$>s*>5(tvV`E+#r=R{XKlE^_h8q)!{20i|KVVlEjuFzNUY`Kt`mFCpQ6Cy{_pkqzwiLvd+ zIJupco#W&}nly#L`w;ycWJbiQ`He&Kr2gNTvg{)%MAk=^CZR&6`RbEi)or=&(u4q! zzIT1zo;SFv8GE~$S4@O1W!D4^TL@4q)av$vLgoCV7N*6lqSLC^Z*wcQp#!+AXJuVn z>cBUP))&vGej>J5<||&6FwM;cYy+9*Rj~5OlKGih2A!ui-UC~^-|b_lG~XgA(G(!- zWKs|<#r;Rp67W8zgSH`G3SWLopDxsk;#?dBddNMkq5$nLjt+*1V@eew*UVJQwoY(W z%k9b1&g+YxAH|!nrvdMQl@`S0#?BO%+TW8T1$~39iFN03i5N2c$LRAk14rz1*bGEc zSXAuE(+}i^8CaVRRvHnEXPAQv+&UmBMjSi19qm z7AUEG*xrOd6`lboL8iC{^D=R6FRj;KW!p%NGI1+Ya;H1MDa5)Z$KqIiSq`;QbS!~3 z{&H_T5Hd&G3RMYy`jXD=GWj~T>Lohhpl6Zq=C*0h$Sz6=g$nOaO2_&Q5>raGfHI-p zRIUiiR<53LNVq*a_IWAA+xzH}M0-s5RL(QOc7WR~Lft4XjRF?}men5u4L-1+kMVcp zh{FZ=QA1_S`PO;NS!F&;f0-O^N`GUvyt35V|M9w2ih!rJXTuUs$sk`d>ucq1po>#v zUe;=wS7y34^LCX^&DMHQ&$Ymsf9x6sN}z za)tWq%=OGyM2&vtA(eM^kR8=Zr^W4Eut2uuYU1LaFa4P>vS;>Tu19^J;PGWAwIc8E z4iU`yiflASn}k|)8Q4@)QeIbB=7YED8i_NUKXwuM4xkNQ)y^edd^epd8s|kwHumA+ z;VO-tYG$5pGfFoz_!Kpjna;8QE9Vs^ho&**t8&)CZRrnII!qXt03C^SK*ez5wDxw{ zNoAol0I@qA6g`yMk`Utg)9^z?C^E6nm8vkbdz|p={aDLi75ev3fubJp;W9cNcXHNZ zqNu1az=r<{#;VFMMr$t;dZfNeyN*^ehnvIy1OeF3D zDEiW2@JxJ;e|AnfKwhz;2I&rCtFJwYKvDdT8S80ou<{|PO z=X8U1u?i0*QvBjNZVp(Iz2EV4489*tG~udW~@>ZI5;jOYP6Ps}d4spa703O`q3C&RQ>?YuiQ1?#CO|t0Qgs z^ETDBanal+rxK4Hc`L()7@gTNx?hzfuXlaZ)6ECxfBv=f>?^ za2?6Ek|Yxe^FaK)r^74DHY)q4+gXfKuCIjxbB|-1x!shi)o+Bp`B4kSBI?ibZ}Io} zy&2Z&v!$b}UD^)(9;&&_lEai@KgtW!U)B>z^0V87J&z8ekCgDnXg;u;h{=}KS!+&) z*)?Wle||;pY{A$tWXR%FQHUPD4$nz+rM)VPu=?M3QS38`yTF=%B!`;k zd3_pt&3rjIeTg|~y)}4PoNLp}qjchkGu=TqGmqG9i6-Eg+#UIT@J8Y-X>DE6qjh_G z6bt6ck4;mm5gzNmtntk#M{!_|y41BV)oc($aHge(QpR(hC-iHb&jSU!pbC^x2-w?i zbJ`0YNpve^Ie(qk(x-J{Yi~mbv4AHqiA=TTYQDIN%MV;GJIoN=9Zp>VgJS<45ftTC z@k*g?#IsR_=yTi?sTn|gN&H!F;?DqpA4h$iLw)+ zyrm(h_s*uo>gba(vab6U9CI;>zFXY|ddCZ2lbW4KH^;j#Wv%7OtuGn4EIyP?{14QK z+b zZRxXkH#_@|m57iy_mS8qG~0JW!ypnZr2P7J?qqdH)?YxJ+0bjJu=XL(>;ujAMtapH z;}yB?-w#lM9e>ErfVT83^WikHwfj*wOH)(RFUbxQ0ymlVeveCo3{RD>50(10arAE+ z^&aLAML9a{H^hIt%zt~%cTlX&8&2!{4xZZwILV#-7K$_PzMhG2$GEy)|3r+V&ZuH8 z0V94(Y9G(sh9 zSV??UIJwD(x}cZ<(YS%{b>EJJ9XBzfV>%=GZk0LVd4eOS6H@mmhqkfQ$h3B)oGUa= z22I?WLQg8>wf#uv^k_o8qawE8i2am9I&H0NJ{l`c5VtRwn5fppHmA^Fy@M3{VUB7E z0_EEK9O}ZT{cTx$W`4dDd374uH!p@4+yf*<Er3}!)XB3y7d}r>JL0IS7i)_**I%C_btJ^8Rl}Fve zAm3SyTyEuRO203tKezP`szq53=5Vo2bTbIlS7{SPWt(YYl%??we|=zn9beM@67ndNkeQ9`2lhR6Sw!?H7Wolv)1)^G2>5Retyu)uKi1> z`ACKo;M`u-6ZyZX1$L-Qb(|PZeR3&dXZ|0PF%n1hKgYBvEz@qw)MRdtleoX98z#%k zl;zA%uxv5i@YZn~*_mEkE(_UY*QEWrvOFz2#G+Rq98pwEQ2FITC-N_XNg_X z(wWFcBhhR~YOGWVSQoXPB0vA=xN9$eCt1E%T>`?I20N0WlKpUAboFxt)3m*fP>-F?bvf>olQd1o#&+Ml{Y=)osFgn1~r_q`keqD~Td zVl*GwNgr=cM8sQeiPfAmbvcn-Bh`d)v%=Knac1WG?-nfAzvh=SMH8xxR0=|#izDvR zW`_^09vA}(ZKNNq;bDTr9u8FeN)1r&H*wl!yN`19t?4=~L|--l#r^A>n@JSEI|;0q z>RD$@ADDoWD1Y|?rJaT3Dj+8KuhXw62xT+;YmZeA8oK)`a!s%VjDT*_xbxGm0v(LP z-P#nry3m@l=1MY*|Jy10^4-D498hzXTD!YAohlkxItb{?Hp@3;owEt7Gy*GAVkoj& z5o=?V#n|4mj%ty!SxC2x%}i);z-hQEw-c|%^RWfHmC+zy_Fo=d76L`V?}g8V;<1mK z8${h8?(tC2LKIBSvs36QL6*eC11$G%Eoq*uyJh^_XxnYi3o?I~m3h=6>sfvIZi)Qb z=ap9Nv1;-BjY9c&*(mZsg7S;Qu@ZZ5@W+E-9K6NiIgsEuZh6U+Uw)*lGm1Q) zMYcrWYc>*CMm6cbP99!O9_o6)Eb_Uc2~(i1Zk~Niqy!R%# zgeqH|sw(7#xe%fs3_`Lm%i;CWOkVQLqtm#Ip0Zq8St=g=B+()ctAp#{U^hh6uqyc+ z6m&IPzjAePXg_tpk@c+uPlWT^ZS9wZFFGyeRJKcXVL)qVuHa2AtAiqAvSrkC^n8yO z$r^rI_sh>51HlE+_MyGPd+0?E&^JL7>136jPaDL~Hq$D_hqN85#-tMWX`J%G-io}i z3z6jkDWl<&@h}sXgk)rWZitgh(_*3&llc3gzh);RVw4ww-{4#7|ZVxCI_Ww z??3}+AzB7uDF9Pvp50dlr;|+aK%qWb&|j+a{x>=kV^i&+kb_X`fOa9%_#OO1vP;o`ZSopCQuE4^TY% zesvc|xWvQ`lr5r3NeM4W%#^kO_;x@AV&8uq2bVQGfSmURs!v-N;vMzRZJv=k20s z#}0zex6O*3i`q@z-2-!m*(;XROon$63P^bhp1D7B*8Pr|$(@dqW)LW|Yye}i77LW0AZTRXUE;RLW*!~jP21r>< zhTs^;0`g6~T-@Bs;-*q&Vn!Dfq>_u#jr)V=I~#_Sp?P9aZVU|?=bUwZgM_9 zz6y$VdmGG25*)e=J44$AkYOH-3YYuVs?CNhr|3vMjG!k%N#FHtxdr9 zo=WPmN7 z?g3a^`L)-V+mhnquyr)--BwFM3x~B==%B01M-mg37?>X84kj0J`f+4pekb#>g0=QbZhr447+Y`K zNb;w$&z{Hh9g+*@6Xu@Bh^kIowC|vVFD~b2(m`^=2#L0gm}G(*z~SbYQhR!Tj;H!w zSda-#SrtqtiD1sH`r2>2_}!DFaQ?e3ImYj1%li=L`@&zAm-YJ2fkKGCQt4X^eiiSz z%QLS88lHF(kEzaj8g`&Ro@|Y^{VDx&>eQflUN&ZFzjszH_xJeFjnJG*){5scnu8(7 z>j0@xCii(sb%GufVn_;Mck=b6MkLRIyG=SQHf!)v-)XJ9#baRL@=Re{!?^;Z30TAc zd`Y9-;qaJaxzgxSPsX~yNdh8 zN34KtAWHIlhQ#T2!#LM#&>~#s!SBFbULGRsI92Do(8Dm@WD|ne(HwIZ;}G7&B;fk% zGDEcbz--pH_mJ2#Y@Q>(u&E?(!yD=vY=l2jdQs~dfGUQD&T}uXb&>}b^N=Q$7{-(bxx0wC@ju7_KG*IIIRkF4%tn}IV z&75r*apT zot{rBJ)i;WZ$&Xa%EDh}*q+^XQ&%~W!lb(oR9|RnjhE48jlNb_RmVJYBD?FpZ=>D&e|(Dv_niN&yOv}p?ihTj1qp9}SD*#1Uh{igX<`_h&N?kZ4LaIG>{wCdBz42gi`{l82R~wPDp$AtVueSkYqn z_3OuwWbf^GnV;q5OzqX%0La?W_~zalC76l+H#r)GC*9rz^h*~MHm0sR4=eK^2t-HF*!|;|eL?QISj(9$qLJY(np3p0=`!yag zn2332p_X=5WzDxC8X}dqxD;KA)aV%&p|fr3(7wJcq&WeuwKQ@>ttOm;*u}C@5;~5C z8Cv*o?%N*0T6rYdAjO{niW1>{u+&o->5AWH35)Khz8`Fk)5$yH%6=L=t>6^zrJR0& zb>e5|mM$L6)Yw~Gm<<$8up)s>y~Nx=4Sj}|NQ%-rjec5$`{j*veuRjKeC~TsZoBYO zQS8%}wCaGoH7SK~UshFkw1fZAD@uz`xt*!!qV>k49HngVe6oJ<*GX#|q*en?JTu@M zwjv?=%YhQpgjU-H)?%BFz>4dVtuqi2P6*vDO8|dV3>eYU2f1OydAfE@R0`uO4t+$m zCVj{cyFBgM6F=dp80e6IL{^lH)S_TO{J#O$w?l6Q59yASd3aJNlf59i((04l3i|=+ zU2p$H6+BXGrmFG6*Hf-eqYtqq{K6JW2*)Xj4V+Pv_s^IfAhs z*Il$GznMiIyvWRc(sEN#B7h%0Yc$1k{E7(%NLZj#y|C(R6Y+{d&{ z#dwstC&>8FxLC0le@=xWD8dg3^NCDy2b9$+ zd8DY_Yz0}|?dZi|{4M(kyLGGROZuOTD-7Ltl%%?hM$ZkU<}z1)fbBoIs_|C#uM_Gt z4Y_Kn#Vazxj8$_%c8eN$53@&@24;I~Xn4Nt@XU8x$9XGGAHL`ROZUilh=+B@J$OrB zf&bt>Blh=S1p){dz|l(pG(Dc@9m5x@=b`qeNb-)aF$;!IJ%-~WJZokBzpX1^3>xK zHvW7|3I01_nkI*0tw%>iqiOIkxu{G5JB780kwcK6qMv*k z6+LQG!m?WYIkB!VQSTHj*oW5q?Bn3VoDsW62t&175);7|g*6ZB{?tL5eB*T2$H^6{ zhG~yDNqF<6g?~}OK3hS}uQ}SA(m-4ztu;aF%jnlp0~21w=ciuJ%vM4LrO!CFOyyRF zxPj`UA>`jZFfB^lcgHJ)Z3okQY`@FM+MdKvE(KHBN*+Vlz@<~m8Zhx z2oghX)esLPzUwe4o0L|?<+2#HEEceTL=C%){CQ~=Mc>ukiSq%!Msbce|9By~a*Dy; zCciV+cqGmGCnP*q)@2o97_>L_CkC&n=t)+LXpL*N{?pYTW@an|PjsF&6q2VoY@*R( zapsBzYz{?$0Nvo2b#C^a+O0fL&RF54~q+mC??PM z@$PkVSE458y!vj}I?bNP_zyewIRz#(0`k7h1Zh!^K`OpUT~E(I*tT`{Er?idXHZOI zX0OuGsyS6X-pmv`ZFSDk}N_UBO-r8in zu^x8wLTVL%T5=osi}j6q#cluRQjyvldPdC#g@DcY;|*^6>vXlRx>yUsc+1pkEyjrX z9X4TzguT;5e;CJh*z@$)4Y9QAp_#v>YUR2OF`5;6pv6aOHp0ASQq~=y9AP`CQib>t z_|pGe^XuN?9b?2cq_t<}-i6UItX;$c6JjDS_s`BZdMc2gM_|ob$*=H?$!#p%B@cdL z_^^KYyKy?dLOo^(-McEqu{WV5^6FLVvS;V{}1}}KkMXxkJu+K_A9HTwPt&4Me7B728UDVh1wU5UN$U4>v^}O;<`zV)XH2c zgk6TpN}e^}ut_vOdL4*>DZSgS3R#pkRm1-RUhE@0 zB-!MLTne-Q_Gth5usCT?XkYSB>iC}TsjINbu@oSX&{iE4v^egRVmPl6c4rfI+;T%( z`AuyU6v(zDo80ODKJcUnhzIa&6n!>!c(BfmIIT7bsonEh8(vwjuPpauX=MsBtr9I$ zE;K@#9{N8*3)PA!=jdbPLu>`0#?lW{1jV4np0UD+{jIc6^N--{(gs12P>J)hM0X2P z6M#Ub!O{!sg0Gtdw8Pi+Z8hJerTYZSAw@rI@w4{n$z6m(%yK}F?t|W&?$*1l=4Z9s zsxo5lDYpLkt~X6=!jdbKv+`5YKdHtO$0L=ujY;%rCk`*Cjg16p1GTP}^OBgoYoj*J zlsGI{JDbf1f5qcaNf9-xkmTjI($ZhTtJG;gex*TPhd)FB;WrL0c>CpYb?sw#>klkd zf(X-WpIWApPjm2a(2x&zBZt#3uVb%kE6!*AY&VKi!|G=r1-y?apSSl=N`H6Grwdowh-|7-+n9V%cMRwKnx(*Lv!K`99PKNLRDGt#mP^1(kPXqRfo(d>p#EojVc|_I4xe=_*x6#6i|u4Fpx|! zRL>cu`8?SB!l2@8zmVp6uZArfoTh73m+7)hpE2W(c?+?6l;3bzvp6?8PWy89SzlOW z*oVFnqJ;s!+QT(aIvsp?JO-wWFF{`MWcSE<-tOCnNVKVJuakI&;Z-2aL2vOv<0aN)-p1BGtKkSpiJCOR%BKN)ntfKdw`GMqr-ustl`iTt1y8rGD$=6+^V{2y{ zH1bF`82b3MJwltY)+VbQv1z>JvvXa3)ti#_x7=4C0b(o4tk;xs;hUwYZqyAcF3o&* zF5xemLOIC)kFmE3i?eICMT1N5-~=bQySuwPjWoe6Sa5dG?k>R{I=H*L6Er!U zz5ajY`}T7#_HEzwd}r0H8e`O`OJ;DkT``y|Ry}@%1-}r|Rx1LwD>xug7X-Su*hgDidyv%4!E^$!WOS)2C*_+e zk_rbG5BK!wPo`qo{_~&B;>3~oe^JqN4A{}%E7Kw0m~#H_cH=+*I+G56t12_I_xPoT zoLbq>{deJu#qX~aCi2{7X~2)npY)}ND3bSio+`sM3tJM0k}#ofgaxUqT3m5__dg5E zqYF3Xt}kgsUZzPijkTYDfj?{xKR!~(XXAF@2pps$9IX5Wq^?A>6zaL0=#QmfT=dF9 zd72|oy^@RqH8=hU(#6X;-K}rfK8p%)DaRZhAV*WVXHUvJNOzUJS#}P6gss492CCm7 zuUrd7I+vkPLdq&J1o*Iq`X6QIH@ih6{rfwQnLL)%Oxrh2P9Uw%jvFELuq-vN;c+(^ zS~N?xNu96^@ilQRA7(CN<`wC6QYgTf!@F@C&V^olra!xa1?@f)Q!fS+x?y(BXI^?c zB}xE)iHLO3yd#{`FLXa=`xV$j{>1kip2jmT^&iEl##_iG2F?47TK#=g|FjX*Owh#y z>likl%sIuY9hZ6l+NZ_+VRjyD8Hn-Il5q?WvtYBJZ`rT}%3-!^-Y7y{}=uq*h z2^B;)Pb1>|8fj$xuYb|Lvk;a7k=v1-j{n#g$|qsF@Z(-Bi_fJKW??!kq{i81ldUh| znOU|H8Sp+iG%Sk+7uA!}$SzvjS2p;JQ1FkDe^ALgD&qBi&EcIBqkJ#^qbmiDRVRhf zezXNH5-}0J2yrskIb~362wUXykg_W+rB_t!;b@=b`zJT9U-H`Y*AWykd&>&iGkC(j z0(tFsWOsIFq-m;ApL`lK3jV$BUpH_+8t)Rb7KoCcVF4vrE*F_nMZcn= zjXYWnI3+s&_u`tcQ$U(z^K7muy?-SNlPGc%M^RS}S^RKQ>@d6phd4<9cM3B1RTW5U!Ry6wyLnUx>{vIJtKR zGiEb53PZKOxKA%S3B}GfUM>$CBO(b8*{)aU-R^ZCZ7ki6e89x@CVjtiWGj!p<%`M!=Brfu62_{PQrpj0t_tLQU(Faimq0g5NiK?m@ z6n4;%j7C$SB~-THMB6p>fNpX?)cF&_GmY&u|xmoz&#Jot1>nL!YPYUV-T}ZXAr>S4pkWa z+;HG0$_2QEs2_Qur-jf%q-+0LVK&plWb4^E)VIPQ1g5-4vDnf}+JdwLX?g-l;y#iKd*47BTJ& z<*GvXF$#~>@rowiYjd_o{;yYfR=-v=ss^{r-)iAIZG|uRief6xlr*%n73L;b57Ve}v!t_r(pyxDfd0+z61dZ6MB2OIw7f=0YZ z{+o9;@AqpX7jz6t<=V%K1{=j8YaIzSONl>wmQX9by$hyzQsk zDS{h$MQQ)j)W%16W8@CJTD(oC}?@UnM-U$CE@!qbjBx6 z)@Sn}uwk+OH{v@*YeEZ(Zx#J@qMBzPJ$Wzad&x%)GiJmjoreYxvGMgV2Nfy3$1|N5 ze7a^C?=xHFMp`efP;YmC+26{bQjRj9TBesmYEt=fONv;c07Nt4n3q-mcpO=_lK4&G z#L_oOa7wNp8{*7zO;xC)9s-zT) zZ6c_kM21b+ORit<+;X;lBY>nyaincujTGI4uMwDQg0X>m>v8?N^RxaIfgv~V6mDS6%9%?e>?R#m2S zlf%MfqRu1pRaHaXLM_Xw{)Sq*J|bcQcjLZ>H08e#6IBVsZq?Q0WyQRX9o*v!(yz@M zOA-dn-(_AEe{_2s&0QjSUKWYUa>Uc8C@ugz36u^lBUn0KK>VcT@t@|(*HdO^lyOnk z5YR}AI}y;zF$~?QQj7$40|NqS^&~6Q61Y*H@|Tn|lvrH(c=n$yB_& zo(>^?!ir`+D+6{*QX&%*v943oA7)oXWQQ>Czud&!Ir$$RA}xC#A~KnPzrnd!{s65Z zExV+4IznR#v!L<^2?7(N2MQ5lWp^}wObb)PrQ@6ARt0uWi()r&!_zIV%csS-&M?3z zC6kdvo065fMr9Ha9hUv9&Zqvh{_C8i#t9h#*QkC(_Un;6w7{LcbWHH6>OE52iJ(+Mx@+@>=2%`;cVz4zXXo`j=so(kYV1-5%FqO zMis+A7&l#;R5I%b?WYrOnKs>#7A9`Gu2NMjg*RTZA$*l>#5M8<;rXAhA1D_CL1U6U zt|i_6UI$2T58+|DO!u^sKE`X)XSI02Ddi|BwCh^bm*+;$9;a`HNfa4m>ta_ zz2oC^rWGq|&aabsBH`LB#`2Dre5?|7n%YxWf`>d)T4DvXL2v;?`(M)m`J(X_;Jh9Z zCgALZWV!>Q^r8f=Zbd79Qbp{+J;V;7NC+X0Rcu}?(uyMPJ5+0!7is4sJipT}Z_X)i z;iMh=@<-E%^@nAH+xyA2Q4I@o^JMsMDFA+AOLazRhG4=#WONJ<{_PG<6kK>fd;k>p zK8Uv;m@e07J`G+E^#KqjLETs?6^7Pg^0Uu_1`D%-FpsMsUCTR!pY#xaZ0x(~0*UHU zWLBrXlbb6`p6i>vLeoJs5^rTB*dr+FHovFx@NeZMIulZmA$=Ez9V!Jdf*g!GB7cBY zK$@nn?-f^3vxsrm7F-yYPv2Y`1N2iuiE$BxZKTIqWr8PT&RuzE^FlLY!?58~F0 z&-o~w?wd_rs9zkiQq|G7Ue2HThYoqYiB}=;jC@I6pkhO#M4dl3-6R1b{rDe>}TSlt3IbrYIRz(A+KpI=ZV6JuTJgX57QBi*eY)%B&9Hzy65}lXQZz)ZQ?s)S(>PU@O(0SV-~{x+viX5 zeOK0zBjkSHHm}MzY&RxPXf>uD$Fk)4_*T(iAFn?|m-^~65Sw2U=fj=Y9HNG+`#Z+K z8aj0E`0Tj>q8Oo0NiXAHCU(GAg?XTagxXIe@vby~6Y!k0XheezCFAuUp5bq;H0c66 z9hDtqY7~m*S~Fi>KcY#THv+04V!*6JCwYf1+y(F~6iWqYBL#Rr!6(b$cC;a5VpKg{ z+Z4Hem@QTJ%!$3?({H~&)!4-mr?JUpQuf_D0P+tl-dL?v35^0JDOIj3tGcm$&sIBC zJg2&K9m6?L>R$Z0O2GryodSsNd)PT0z;34t*qIE;nFxorG+(|J5ZuF~8A`enjI z+iVFa>J^n4^O8?ir*<)3#|IXB5(s;~X%iW%c!0qIQ~NjV_<43S`>I$s7? zuw$ieP_C>vSUD99KA&obWFrF))ktsQfW|`aq~}>pw#1`xqMtT?y)ufP>i0 z9eV&Cm%%RS7TZh{|T)VcIGt*V0GaA zyqJ|DMDzH_%-4FtZR|r}wEvbtXy_qJPjOT-(E+iwT)or-k0?9&?^s872sxyg3K3%= z{`=b>U0QY=qAqF>>$bCKK{4{9UgoL^`=*wSKDS!>mnn zhkD5;+BS9Jla1Jyz9kjHi8<&KObEwfDnS~7aCp^sYK<%Z0~zfG-(=@|HS@>~;@Z`e zI0wb5IsKl7q<#cgOW|^rha&;k7HOs+gYG!`^YnTx-)fU9yHe#!5MiEiC|T1&g&(Q! z*39Q=$6SvuU;D>Vyr_zy%U z3l3)|&-q3fM!ysP5yeU9klvnpBRg{N-xV@fL;%rFPR{ZrEYbdrrr%Bf&8P|Qs3~;0 z>1r&^cEdxE=Pix#1E)~W*6h5aPwez)QfW}C%EYP_&v@o!7UQ6bzi;M~nV;8zE=^bCq+HXb!Px~nrEebu zJElh(GIKTYM(JQYp>|r{VlL5>2|;#x9>rD)9NI0k$`LFxjgz?P82d%?Q=Z;238W0A zzl+R&hoJYy(C`pU(A62TIV)ADVicCkGxwcAg)<1NY&X|2BM`FtT@c~Hr$^5P#K3-K zs6y&J2cj=)y!zROUpfy_0jBO#Q~Uak27e0a#NRqAr>Pj32T|Xn?Q^KfNGDHvW%&HP zRoxIdTSP-CM;bfUzXCK+F6s?RIIjWR2uEaU{Q=H zjgXla%dw9Ulf?xO?PE8f?nPIYMY3=|qpJaFAtKY$xxRh^MD&+%*0`5&0zWrpK^o3$?3GHh!!Zo%UOWfbrY+~ zz>jeJIF#I~H{fu*cz5W`luWTsMhM>T;5)lL`+cHv_#+|dcH_HOgmn5>Xf>@^bE>(R z>%flz68GyJ=Y&rXp2;xu;`OdiW$FpFnyy;j7nPfb>xa(wOFFb8e(|4z54_)7;sq}d zjVT?R|5ps7s0B;Cv2k878&8%i2KZ(ANVzmK|VP4jwq>AptUOt&1K zLp=oq1`WOSV8B310dP3D5?Iu2>!1sj3i_Ar5u8Y)1Bo}^2v&F4q58h05AKI)n&aF)QK6(Y7uMva3BT6kYR02X>XCcn?mLO|@&7I-CB|F7%h(3LH zuC?jCJg|UzYFQL-Ak2!>D-2EYfHi|#IrYdH!8rfECm`>hgjf9ch01w;RTYUmrK=$v z>m|KJDDSQxj34RRRYKM*Ul_X3qae|qw%xkq(v0hrLvqx8DWUf8v~$zMkg4&fZj;w}ZW_WmC`Rw7aw$X#zb>_&pwYAtRHD2|2~t)%)(6DT^G}vZT#9X}XS1R_W*7 z57rJ7`9_a?L>Ir_mY?16jP@lRN;M$Vy6?zc0z|y`U7rTiL=+4@cPP*Zq2%>AZ z`~9B*hyR454IbpRu=(sD$?@-WHxd$nR|x3&bk(Y5w;L|zS|Hwmqv@E4t=&rou=~cU zOBth2ku6p~OS<&KY01~T zPVi`$?zdH+Bd|$r>$6Db>HPladw984^7X`rG3Rd&e|?XsmLrP zAFe+-J8Q6*dp>lPq49RfHaAk#qCT~tkVEi*sxtH=_dN`=JzVQVI2t(sP>Xnv2GXkE zaGQQh-eAiW@uBBJ8=sM!Ou>_+Z~Tc(#4_5KkCC^tci?L=iz9}*7BzCmK0mr}cP(LK zK6JVLMnvFrDN=|pc48`>v3ZbUrctdZ|4q@)?jIOy&zr#69?O_s4b=YPf3!Nh{ty=K z>?OmNp!9En2VqD>Cn&pf#eD{+rw861Eiu#!sCb?%!z>w?ht=x z8**aP?T-TOb$3BES66&CcEEj%xPEey3`L*>N5}MbB)mQ`(O22s_h+P8ljfP$b6s;QHqRD_9LWOQp zpsmApT63`T5jNge`Po4kURtfhkF<87^0^zwwWni=p3244u@G3&qU#f=4*n4AQBe)mH)ov5& z+s@KmBdm+Dsxqig=!aggvE-Yu8bDva$(14k_(ppTZFSHsIs;GNUyYlJFlBL9K-{p0 zIq&6XdOKT1_jVBE31$Taw35JnPl8BK4KBGc*#go6@ zUmZYN`m_(JG%nC3*1c`aZI;&@DCo1ij&DBsLBYs50-IA)ne|0&S@fhF9oEE3!0tFy zYt&{xF4OHD95ex$5 zxjMM-zM0;Mzkv_499eT+n9K~ABm>XAv}pLP9$>7GsdPR>ZAW8F{&-Ln^9PDAB}(0R zs4V-#3I+?hg;2sF?hX?r$Ic=SB7-dlPKIiTkIfrw4we z5GfOMEuU%}U7PFh7#EAu1AVDK(DY#QP+cO*JgY1nB~tXU3@k1dA&C zh#fOi)cx2ooh+0t$92!%hcAjk)(@l~umpXI7=lf*;ALUTMVUFykm$IdoI2mqOWyTC z%hU#y>C29okbRZyAWP~$G%j|QN-NFaWPu5!cn>4U34wGKS-xL2@wdOtms%^n(#0Nd z>+y@IAJR9-vW9?L5VrJumh?bhlmRQ2jsn$NIX{H0^Jd)-q8nn&*>*v$2b+S__`XBk zdjrYsPscErG06!?J)hu?5AZOtBB@UF>;4~Y$ugI$3Ub}uKvs9R8}K+1+{=5CijzAg06NR%AV zJn+*msJ&!oCFT)GT-0xOBB|S~2H&8@<${2bp2?CcSRs@wNV|2%M{?8`(eG-a$&wjz zu;q@FaUkGiaedCgF!(!D%4%Nc%RKfm9f#1{}ag|W@rFPR*ERpYmbY=dA1%l za`+*{6->&kHj2=eA>UG2`qK_gw$op+^Hd?m9JJt^?V6^aKkGH;LAuFnmFZUy$=Y>h^u! zg~f0D7LBr6^6{coeNf>{1El(k8o&s`>|Fk7U!qcc&1&I8&2IXet|700aWl}VKN^a| zvSB9X-dZrPqk2hYUJ_03>Ce#Mf07to5Tu&ZFsJy4|Mx=uVSqG91NAwuD0a!QQ2K=L zom*ZhNsG$o0pc6aBSOdTb49;q{$6#m91im<#h!#4!v}LQaMwkH1cR%enWIOmq~X%{ zlTNMgpO&O;tRtpkeE^wxs1tV(ebJB+xSw$QYftLAjE~Ih0Uh!u{2?kJ_Cc0)B-wyP z!_@L2pO?1@+KzT^&vz_XLRmz)P)w7|1}8WPYc}>iRX2H~pv9aa^WTtKzlb zVqH?xxkp;((qM9PPI2byKcx_=n6PD!EGtp8wBr9grT-olWRlOI(5%w*4LD4|;gM`2 zOCXxW!%istiU_o@zN|c?oCC`3f{G)Nouib;iiCws*2}k@Zfo66+h#q+e4Rwj$=9&u zC^&+lA=>Xh%;*zy11#U+4W=vZV8q#VAcW&@st_mHvx)=JU@ql%F25k{OA@u12C3L4m3c_SOXiGbL9X%PM_+g^hwmQ77=*zej%> zi$mP<&Tgwkp6KqbJq_RA&2pP=$o9X3>nu@3B_>Hom|}Yx2#({($lJCYqX*}^B$-S^ zbLMF}AO4tjXWwXhv=4zvrY#My$ty3Lsjm%Xb^wmc9bnBK8oTr+vPRTZwh zAk`jaOLM`<`@`Cw-u2lGGw&3wRH_tgl+LtX*Vd`-Os+EYW)pgSVQ_uGH$S9) zuMqAjp2OaOGAbu+zjs1an6_cisbn83*#H^S`0stvmlpkPpv!bZ+xE9B{hzd+ga*YT z-6_YztBv2kG`O%{kOtT&7{>R2p`BqyTPw4;wh}?R_7o75=4#{{pJIYF0d`Z}01tY1sYwEn4~mOnDk$?Z=CcdnIIvu$c2Vq! zawX*IejiXq)bBsZJpNY;prBp-HLI#UGuP|<1Hbci!{pmt2Hq4G{CRVFxiukQPFCKE z39%0?amc3dbvK$aG_haoL9HB?s%;egilNC26oV5tYDjgJ5rkFIS1Dh17hjs(KtZymL zdkTjhvmP1oml!0ix)q%C4HzJT>OI9>l<9fm_K`gKAAysIOjgPqJ^PkG!Ms=q-{W?v zI2Pj3--sUOg?1FdF(Xb#$`uA_vOLdfYba*ChLmTz`qi%&Y6Y^mu6V_J-b`e#E9W~Q zhVyF;dk-$F*>3dhS|Ba z2hod-Lg`R8@p}Tt>yO_ZxXi9E|6)E9@(7t^BY&&jq3S!=mFzn^Q&-`Ipp9>{56G(* zN5;v?6OS$aho*%Z6*3Q~lv#=;_3tDH8z@hv8l^V-@w*oGnR;-PW_gj=x?Fw%3B5Av z+ugpym05fcQFVI|KVQh2K>N>6-v;pQ&+JRWeju)mnv%SpW`KqTB;7;u^Y%(FzxbfW zeb}GowWCp^7_Lk7i{gQ^)-DK4m5fw&`!Xd#I zpQS-^s3>`~2T~9N<771aWrZ1cA)^XYb(mARUuLa=OGPH*pj~!7JWHzo_veLE)D-mt zm!59YGL@-v;#h37!NdJMHe}W-IB@*@3>A@r4E)W@D?PH;z7})@#3inVf zvYK**Sh(dkXCz0f!YFXD#AAmCDc0f5oA-?JZ;)Yr3d0u+ntiI*cKU=~VD4d_4Dfj5 zA|#Gg4HBC6AW+~(#}JiOsUWVP-0=+e_*IkkutanlrJyqdq5Kc$c$%kL&jA`jl#(7{ zK0XenT1k&rZVM?iDIKnxay0@*L#?>`x{3d-t|8t)_K?}QtmPYq!oS4(pA4Qpi>TSd zs{@yi5`w?3Gy##1L=P=x(N{^PDMY0dkfLngRlF6fXt)`aPhkILzkNlO!|NuE$tx_Z zAxG0B-o@GJAt;NdLb~nr>FzY4UlO7*dszJNv1LeTr9RL!I|VZRHaT_57H+vL#UJV=_bEkpqe*{L#iByl1v3U=^FK_li<>qt;S-Q?xx zmd={X&~`gkmON5EfjHv}oY`DQH92}+zDhj?L!n-*t+Fzs>jfdN;2kmX?XU)mIqLL>&-z8=Ulsb_mF2zh zjtzJSupGID=vG%8x&qf%>w?{#Bl@YlNB9ycxI#n(2VNAztVb$z<2xs~Nq$bzWDU|H zf-_JuK;a%0JMLr)n`%3j68^*Be3IaPAUbPCDu}>dsezF&zX~!nMeJAYMvWx_{+Sjj3VDO;O))!!?TVp^L92#vY;k;JfsDCJ%Zu6;H^p6zzj_K%3WQEJ&nq5{9nHn7^@t0D6)& zO}Q=eI(iafY9JMw(UwZ|&({eeTYMq%t8G*NM^*KoD3ijxG&>!`OGSlCyEj90T_|bS zf8=ikvt{|2nn<&U@iWjyBcQ7^{OS>tuc2A=2pNMus&$``w}IGUG=J`rnW80yhldf~ z+35G&a?&NZN%_=f^M)Vkz78Ilq8xc@EVm0DnIvY$a$Yi-Z3Aqcajm#h<#e0j=a-om z`y25J5lj%`p%~VP5f@!TLr@5{Je^d~W3I98X+6;Rt&@<9%l$$?c1XqT$L3~@C7jqj zWbc7eWssVE7g#KV6VN7ZKzXWX&uDkq^ir8s@d+i(bdJ+Baa0QEejag4R0hBv@^##KZu<% z@_Q7M&%U2j=yS^ynt9pPC{m>#CTG?VwsAmI&lZ}&osflak^o$ds<-# zZZs|Qxxd6YY}biTQ>31I?{JM+(}X>TWDd`W+mBLsnyw$+-Ul~OlsbKftmYx$k2r+1 zC&&CHo|&eoOCOq9us3}qPD7QQPt8~Mwtm7M_bSw>Ubh7+(_U%&9AOid6;~AWGS=;B z{|$K%;h$Hrks#&j-> zou4hbv2!e4nL{hbpgUTUqrR^Ng@rWUrmzq=T95QGG)ddk$wN{LrpHvN^?ld&Z&SOl zK>gpdb8MK~oFK!UibYr#X0XF&=vkP{ODRV^p|jnuYdZptY!CnM_FB5$naWrX0byu3;@ zE3A}2cei4|^+AT(*;nH$V1+FUm=x?>i zFEF&sxZkfftYsj76AyREF}OlvY?_mC$3L^Cf-ck;2p_&EQIjh~p>*=rt?`l7)hvJq^4&2di_6{S!_v24@bva%Ki_S_>Xc7NxZvKX|jMswxPC` z&Fzaae|QhjCM}C z{2N;`Fy}Igr8l`Gxwk%n64>+XhTUXLXPxTsO)ilh3&y#mr}H=J$1_FbrFGq9?5;-H z8qKEUGJ_%dFK{3W2{}?B@48swnc;)t+UB^85>F76h)8uLf0xdVr2}nf{d5c0`%Wiq zLI^m2#^mZ|9v`MMH`Qttk0cX!LMuQW-Icmx*NF7}2lmVY38I0A9Gu65=`M{xTQW^v zTql7q*{ouzrk$d=;o9Q8d6qS{PD4;_-O+OEhcsS1dIH2(LKR8KzRXfh^@KC8LbY}D zc5iP_E}Mh<41Ok$oz>FzG741fr3B@0dJMS4pmBy2$#C}%(9wcyF(5w6Q;Rm#(R5+(Uvu) z$v1O4wzXyL`iXIF2Hi5`My{IgEQ)cbDpLo{BtQyOe48RTlySE^{RkNkT>1;n+*Hi~ zFvK0rKWM>D5EIfcWgTYLl6XO&y)boh976W;kQ}#VZ8n~poFx8FIs`(Gr-HHg+{dUf zFK~#!l|Y@3v`zgUf#;2Cy7c(r7UdtSf{Kgtxgnp-S;*JdS`uFHTLlYdKt;a=qWXya z=-jx`-tzL^o_xIy$Oe>UhvSHc#{Rx;=PCrt(FaPb9C=)|zkWQu+aUSngFV#{nM+~dA%QZjGLcgJ2C;bH1uJYf+NiFavao!VPzy7Ts%C%5B`W;*D}a6Q8DB=K zs2Kla6Uz?2_c^h5Q^-R_16BBEMeZVq!ZHFE?2zBoU zoGj=oS7G$7iu`Uyc`>>g{VzK?rzMV z_N1;V1aNp;xiIEqn`%85$ULaq_XE>-Kh4~m4R>o>wSMg45w(@-@E%u@83ZAc?KmuMqqbrynY)_FKNWgW?o@ibQIR1)u zEu>3+oGB+kqcmZY=3?aGMz&tt(W{d2(DwVHC98$V_JQWLB~c?mETlE6>B}`yCV#_s zwzs3G^F<-P8qceOfsZx>gQm%WcCUPWjtGE3)eWE6jby{R`}~x2=Nw08vnW6<#?G#U z1vSCIiRW^2ZNCg4=jM1slBSHe_l`AfaP`N3SPrpN_$jnd#jnQ zUa#%^4}pdT9MqL;7Ci^u&Oh*rKiwEM;^x_B{BtMjO!lCliU2pX-$Zs%>j;Hh_f)-2 zb@m_`Hxzc1wDk}x)YyrX&eS!wO}PNoFT6yETZlM47BqrjR!5zYbj!nuU(>pLF81H7 zG|DR4Js75uSk{l+T9Kz#0p}tJrPx6U``XD_i(w0ps1S4efi$n-mL@X&! zZa4*Ry}YSjpzJhCRw~i+g?Mt?N zr=QKpS7ef1we?)W>TA=;$2IjK4KH#_n(OV7(fEWpa2=%-d$xBYZBh2IZ7LJrHbAt` zC0o{Pf{iXDmNo0gqDM8}BH~=M>P8hWCG~gNF}9y(U1L#Afb@`$4vI=jJoEFZ-bt*? zJ5Ahr`jpIlad5x$%pCWr+P}=t)vAz^(nrQO^Rg&?(D-@ZtJvz3WNX$obvspr`Ji~E*2Kxo;>*?@&% z!X?;l?AW-*p~)ru<@o0pl4B2tRq9F$%mKv#DRawcrC7UJwrQ(zqX3Mb#hMR4sPgWl ze7H#`H>Sf9acn4u=*Fu2Q+MJk71;1>Kku(keD-SZa7_9-uGD0GPf@Ms7bb^+=VBm% z>!(mdv+ZflErUasC*bF<_00g0b5`eCX>A2B)A^Blq2IOes4xWgIe6t`Xk9~0x1Bb~ zvx?LlU%`m0rXetf0!zurDmS4tM#h+pooR%n*UrD1GkI_HeQvtHm|wn*v3-v`MC@& zqHm6gX_ISDg;cVe@#@JrjB zO@4px{Nca2oTAbncq;RTzVNR$;SW58ZwVWfWF;q6@F5Y2+h-reds*vT*w}tz)RObp z-onT(ueLLz6LyNRNxW&xc(r9(A(zt}V|Ajkrnz7-%DUC$pmK2hcrp2qbPsfKlhk9z zBj0N5XaiI2R8RUx>!jAkT|%P69KCC&Xzh%<; zqBX@mjbYL#Bb4yG4w@YgL=7U6plnyD8Z#&C$hZ<*U`qgXJ?eM5cAs$2Fs)ciW2E8N zJL7Qd4T_svDM| zz$6-1kE)p%=X5QN19I*8W*J8@YtgFt zb)977S1Oq1%o$9|!))2w+p2LJWOvrYq&wCt)!{w5X?}@C$zlxd&UUwOr(@VyEqr8N zz@M|{XV>CcyNrJT+>NZ~zX-rEZn1@)@ZT>_blH8q1eX;)vB|N!+0suDF2yh`P-%#y zC9sGvuzQe*@{N!HC_PsV2b|uvCQJ21TO!>9@C5?ql`8 z_dcc<;dRwEG_K-0M5|Yd^;k58Hf1!e9dU^jd=xo_g9{c|g}c2r+ zd=%7#2nX$lvpzKhkx_G9S|k39IN|i~i)QMEe{w=5pMvxSFiF-|@cO(QQfV=$Q|E!& z$n!+rtghPA#h#1~Qpxk9v975VA(rz6aJJ7#Q+xW2>=!6rOe!0CnA%H^>M>pNOViL3 z*Rs)e#!K@VZ^NM(xHFbBONgqOAi#DWRdpIwQimJ7qa45vrC7^BS3{(c;A|9bjy`bSeg$g3O9hL$cviML&SL_nC+c>JO#T0yop9|L2yS~eh zBb@OzSG?Ye*VOo`rlD#)I>tJyvMooR*Z(WIh6o}4blRXn!BN`((zgG@-r7Jx1#_t+ zSot_;Qk5lG9j}sdcUXgXBnHg94wjCu*`wrGlZloInNe|~3~gegxVf}g*RT`v7_?=3 z$WT;hcUrA34FgSeKfDSoFnfQbXJ1>?8QH&<((YoLIa($YaIqd{iU^@YZ;4)48Qp9hcVbP*V>$L{7!$7 z_a4ifQb=47^lI0Q{`8G*uSA#98&cZ1y7@$)f_+`B>q3kpB+>J111T26Wk zRXh1ZZe%Kx6lHvj0BMh!3F}!Wj!Dqk;~eUBOQ2Dy_Tibye9u&>(fh*a$*~{E?BGnV z!KD>!OCP!dyhpF3jyoWULKnUMmvuoJ*@b~idz z_BL3B*QtkX{mb11mWKY(y!`)6J%#Wv^x#_C$#_`XdM&@m5jp^7LgbQqq9Bb*WBWq4XMx2C}ndbHEI1`Dt z>JnACp-*9Uh(ikhA7gJBRcDh#3*+wYuEE{i-Ccq^1b6q~4oPr#cMEWE_uz2w;2tdG zp1gDC4l`?gYkhy8=U;bM?OnTebrZs)L-?)tH89#D?; z+)`{odT00oMXQM*TJVux9DsKi(`Bk@0Qs{O=e}in^VDPp|3|zveCC|f_4Iv$!&=6B z2Sf)BCghs7FWjr8`11*u=qJcWg-?8G8QY|)zgCx5pM8~$rKg9#)}(xtVQKu+x;V3; z@5-Tc?&g>_dOBWEqCI1?KW+ar%{0lTUGkjDuYxX-lx|phFK$y%1#KWq9`oN`0D*$J zRUkkAs*r5Fn0GE4c`3+&Jp9C;WLy1>bom)O9uA% z+!)mHn`Z3X_plqk(p+7175wD7paCA>Ht>Nx+-%S=W25nOijxSl%S2~gR5Xjg>J*21 zrJY8$*_USW6fHMNj?L7ndwX@(d595d(?RD=Em-tjy~Q{)buUzar?U_ z$)U?zz|60kmp`k1nKWaqj6$OTN(zoon|A3fT8Lj*?Jc57-~mvhgW>ohVcUI@9BntXchF=AqZKqNO2yNg)<6vZ190OLt8qzpNr{%+MN#K_ml@WNmWYr4^F3eSitrLlLgy~uDxk7zZIdS9Z=>EuY8 zS9eavB-A|F9XqO;ia%w4*4v`~?5tlt;v@iD<)0wO2O!= z+xH*jDmqFuuRR>dp8PhdpsRKWQCf)Op-eNT^`%J~Amb}c@Vj=wk^K{^;KEH~0`JLF@|glO$#|{by%tJ0OK*yAG>kd@7XqWwzMNHZUr69qIM|58;jMSM&?Fob6J^6qB4+^!S@GSwo$rMDw5cz$lfwZvCofxah!s@u#Z5jxE&5$oU?&FyCfDl7Zh3ZaYjW_=_2v6oG z(lN+y4aC(3d_LDW(0b&;KXG0H=TGZ7*bX3goM-)=EW#4-;#{)3j_WQ67MD>0=1p#pH#urBFfkcngtK=_I>pU}LA4k>?jmHuf(pW$QE zpi!h;iMq3&85Gh1dh;xlt{{m3xM}%xb@GId`9Ld0h@(G!D(19@08bRZ>gPd)?=)?X z7e(U2InG@MTR(YB;Q6zL?es7I6FWP;0<&Swr#+rVKL2m?{t34}!jrx-*p8e%4o~nj z>%ysZ64m>o+SZass@zXe!7K>$`GwgkfN0QGotr?6vF=7gD-h(*B{U20-#9gs!ON|# zu!Jr z@piGW2fPHm(k=Ar_h(pO!z?w@>aw0U(R58Hu)bbtrfGLwaCbRzW@hEfgTJ3Q)Kg;K z1)E_Y|LcUv?}sa$K)JThgO@Nyd)fJ;C#x`6cc_*5(!#0k8xFe+2Mr%aMepDQ7I<)Dc9>sKDdK=S&(@APB_0PUOpFl`JZt65@#uwEn5x zah*cIi}OiMO?!aAg$f)HVzOi3)X^mQ~S0ie3XebHLs;+Gpbr4`dV->n0Fk7Xw zv10oayuI18XTWpowUcE_gNI)8E{2>-h)4fmdz6&TTE6@Gub>NjL;=0(jXzUbU|@Is zwXo)U`h3Q4=!&+w14i5RA^O#75;f?Wi`{riGh-rYN_hPGS=C6V>){z`fSS`!y6}jM zRye~ubweSfWC6oed-ln)i6nd;stA4)P2y^Lhjl>y7;&UEW2uO#7Xnk|LhKJez*K7m zNlV~h4Z)wNKbKx8LFaivUXM643Pnb?cMhDZ@sU8-NNe{TC$Sj(H6Y0+QZf-U@;~6H zN-yrubHMG_pLs-q4-)`jp8uazmSzT}*q3U+Vld*$zS~L94Ke$UFZ`wy;sp!2VWzQX zD%0_Juzgdh(nz-Z-mjZNaKe+Kkm}9(bSKQ`=p2!o2wySegICRKR@}p{o9b}RLbzHJmHC1rxn*LWt*0QejCDNr+gyn+m+NmQsJ62@`5cH6^MJnw7=>AC9&it#{ zJ}I|L@&^*)L{^?yls~Uok~HFD6aW^k8S!EPum@}ldYu9Sx?TXcUe5Yz`a_mcI?#Og zM^XaFP?aB7olW37br*Ai1~)^hox&{MN>{%ncbD>!ZSWfuUprcf%kCKg9GZ^#I7>5Q&^aT9XkE|w2>=G+D7IU~NPFW91mg@uN}qWlU55_t zXQ8s^Yg7S}>+j!Wd~(SH@@|wrtewcPsBk#THJcz{#f-F*@dm+`&VomYtZGui&gAo- zhxQksg*MT_p{h7r%IQ7k>k5@yn(v22Jn;QLcJVKKdfg$uA}7!hkF{;a`1Mt)1(e@zY3X)i#c5_Oc=M*G&!q|=j3fI- z5szigqyaUXvZd?YMi26FA82CLCvlFLYYSKO_!lGM`-5MMWc@*3i#$absPpOQ17pRm zS_$JLZSKbI^z=Uk#FwoNQ- z-U5lYcdxy9O?lQ|UPb_2UhYb5cH4~#LN8kdhqxh8E4WqkUr-l}6&ndvBXzrEJxWss z3o%281iqB!#|>YKBCLe?*1-4bg<0YSCfeXg=GE+74~FSKgI->h5pKaVNkgs0;wYK8 z*U$CeuXm?V-Vv8Zb&WAcHDhK)r(2OA7{$y?VRwi92&YcYubJwMG{=_aFHTCDi1%#L z9?;$I#n7)I`@#9AGaXdLDD=)RJE-Zf5CEx8%Ud*eqP`-vVQ{`h6mL_BlPUuV#AP?u zDYP&=N;eFRu0wq$vap=_zO!6-8M$9{{u_wr+scl9ez?F zdk|YCSgnLsuz5P*_}uuOp=0S+a)OxqE?wlENz-jGi#c>{Ub?c|#D(}Oosi*nRYN!?QiFu#qF%|e4LC;NyZ`)FN~y?FkKM}+o`&vb#1~$ zX=O@bozoVXnxp_9 z)eWZM#DRl_d`_2EZSKY71gP`VH`Ca}Yz>d8DwimK?*DlBJ=T08K?!>K_~ibW>vSER zV|C{spf5Rm2FZ@#lO~v!8A!qgG9xo{;kO@%2g&*fkEl=Of`;At$?h>k%oX<5$3c(rW;7a=@c@Q=cA6{%U9LK5JlLP zmSeT~L9N)eoGtC=`3->Xcdb8_e4yGrTr4%(ju*Gv{XP~{3>;xnD+)6|aD zV`du>c&W7qa~GtVv+obe!6<*)vy$NCUzQq$&irQRJiQ8`L+yUXXyYzKwG!Hs6*B+| zn4iY_m=PfkQ*Ev4aF^11>m8<}QVg@4LVae}>}Ix9IKC07PS=mH-@64~i}HS-T={+f z&DaJ6kRk8!8)uhwT?=KCpl%2F3clXFL;>>t?CL}XJs*t#@`N`?dpo8ix(5mGvduqGXg(v+fw1uc-bBZ}0Wh8PgQq(5_7%Q1mad~GW0{G(X+nzMeHOu^%6K=GaGwU6L{v zT8AwHLtUPWxt&%gTB5jeNuMi`=}=YNwHOX}Nn$nx{KVu7TMGG>P%zZS^7T`=bYs%_FK<^Y0k@mLPk*>JxG!W#Q7%dAtI!~mWEclTOla3w zY@#1-Ka#v{U5b#t0rTFDufV%jnMv)OsEog|)cN5D4!2uHX%Vw$PzA(i{|u-I^mNpl zj?A8!+N(y+O62#vJ>xIAi6^OsamZb;uY7I8lb<~&R`FimLXK2iw0^>}PX!e{$0QnB zz0O}b2M@$>#H)CaA(;fFnVnqv!;D5T38J*fO+T+|3L$GyTUA>h849UFbwU?xm|+JD zofBWWOv$p;RkK9d;pf>=r&M48VgQ`{B2l@-`rv0F(et7ld@8|(Xpl-RHN;jm)E()% zp2nPD!>mjQYyGWN?~<4O2}tUx{b}b>^N;lNFLL&mt9@#R)U|19X^pK)qkg3J-~Ea- zQNNnCo5H&{Ns&+8j2COxk4uzJirC_%>s)L-F?&9A{6*HCurIk34h4#mD6JAEU9-bJVJr0T1jeak1jP?|grO=4{D{`Y z7`A|p#~!>W`{}wgL6Z^I^JuX(%uVJ?>Nc-^^@TBSJ?6Lw36)1dO-ibP6dm&(dkEFj z!+v-&XZ*Ul2v=TW+U{-~Un_DUmUxl2WBT9B3%}}-7I3%@>@y?nLaNgp&a11i6`E!A z6^;%4L$UE6v3?_)phCd}Db}D{S*m~L)_@+cPUyplr8hR%3FrN8o+aHjd8YF&Za!+k zK7aaEcjEdm_P_Mmgt$JUwFw%|8B>^#2C$0MOZQl)3eF-C(?C~Odk~$2*6ghid+-XG z^D&~A&axsECuHV3mIkWC3QXcRqpS8Y*VYVjh!YdFiR!!MNzJUbFaR6zX0uhvn%J0a?mFU}RO27u~cg6D+^mG2i&C5_~(n^TOe z__${hA5&)f0ts`*ZD^!u+@i+i$Pk4`jA*M?rhx)um(*&*AQ*^bXwK zjZB67sG%VDp@TDiY4?Xo8I14N1}4mz%JJ#m6#SoPa6leqM$|NN%o#^6j9pr3ZiQAM zi3Z|m9Qrv99FmX@VuU&+9~%1O9??~rdyzFEUAE*Diz!=s=Acy@L3`r681NtZTUzS) zySyyEV`P8x*Oof0r)QqpmsXk)YTSd+R9TQzgNk*@W)Bnb(VHUOkTn&&4E?9Y(OsF;`9~{!b+_k3y9^WMj9LttKj7W zTMH8Jg!l*90v(A*=qpqwC}hgq`wJ+LXq&Uv;-a7A9i0905s=pVmJuW8gKr=8HY-D% zlI7a`OOA>+RaJg=*y8DKov>7;v7#L%tgvvcJ9|XAN^m7~Oy_gN>D?_rG~GTz#j-lJ zfDwUv5g?a?W`7e>dnih` zMX+>PD(=B_4oN*rh`^?@QbZjcNe>QlRRsGBT6}IuxEKNQ^37*Oy@|r}ETMu-YN{X< z&|_(E^w4L^qP+2yQHn|XVvO*yyM1H6r6dGjubbxT3FBL@2GNieF~5{`sAj>B4lK`c zooGZh1=H_F@z_l9&RRw*Tz4GpD%{qINHLpsCHVZ%QEtJL#;%kKNUeb4?a&2hcG~~8 zTNUD2d*k>Kl-)_4w8jPIvs!jXIZ@n348B&~Q1qNd>{AcN*>L#{?g| z!ty%OlE?{pr&BRe;kg(o%T>*bwnTt9i7Jn8B{{Y#Jb2Z>jPTblG#7>t^f-8-x%B?I zP-L$z*l_TKjA50M{Uhp~zI`&!nEBt}Oz(rCP&FMqw` zKeau7mEs46mHT)4$zO8Y-H$SA_LZqZ30gT}j5R_tZ1*D;&5q@$MK%&wk9|xRRziu4 zAI(=0T;woL{f-;OVTI~N&tqNI` zIVllgLGD_Z{rK#Zpi_wvB5oRr0fUF(pAmev1G;I<%Cs+-_FE6qk=_t^liL&41QSlR zOmKW;;nm0@fvGlv#>@cLTnGcJMmIB45$gLR8~}S0L~itZ><<2a61F>RU^~hv!q=K2 zrqitKR8xE!>7ib#!Azvn8`4`P!Bo?tjLb#!Y^u#P1OD;bxdb0mk%@`3WPvPXUV|0D zVaq4m%^Gqk5PG^tYAXuixT$ed1Xt9LAFY9no4z6yk3b3`el`~LyJ}}dB63#B7)#od z6Gp^?WS;GRI?eAPyPY7@rXp4f#1AW==4ZMLg2_o_!N@td78Aph-1R4v>*q+wA9kJ> zs^deE8-jp~;OxBZN0Pdip}!VEK^eKgTDtRwooDySsaTl;~OwaHSphD zGm?ePjE>(^)6ZCJcztg~3!h^q2{aNnCB2?9JLWFC<443=8SYAhNRUzzMp;#CaAute zoxHJ^eahj_mrW!GnJcgX^WZ*E%efJ%>RFOJ5ylBPVZB98Z5ErduJ6B0 z`@re|c}nf0x+s)Ysn1o$7P1N7WFRr%BG@-3*`<|N@0Q-nk$_h?$FWd;^w#m95leQ7 zp}6ZQ#&AbAckUJ5JVma3#c`VdA(`lLljHWudzgL5)jlTd_`WYuWI~h3l;i+^k*o$~ zVPISR-9rD^`b+3vu+N^~iGBZIqa^IbA>e2`^=#E%2ix4SaG9b>6B_~H zNBJU#^{Ad8O0&r( z9_Sk=PpX}!nU<=|eLg6d@9HOEaFl&BKLY9xWQ}eU|9V6c$v=3U&PQbn^uOp`;!5C2 zVM04IACp%+WRtPfaEu{!1wRaY{WLPkZy8Y#SBhc#3XhaDMMwqNwW^;w> zwkubV47U@?gS``;XrHgNp9Q6QtDE}eW#wJjd+!A*dw&EWzaafDVG)Jx{XN@Xs> z&)<}xeNDDDZ`f#cmC#KJG+wmi^Bmnca1p@qO6~*aipiUc7}~TJY&s1$Qb#3KaV>4c(5rm zFWL*p*|5C=x_)`9uL*qbp)A2OFMcRlVi+Hg%FBV0-_6cm(7y;xOS5V;XWmB~Wy|p9 z!%r_V-TJo|Ab!Ss1jRRS04y|D%B#Eo=y`sdS>T-hzSLa27J2>p=bm7=bePK3;N&t!`-iG`9PEvb2pW=?j{LWlipp!hA} z(*5FYcu zD5G#1A&_Uj5P>sZ7pMvf|L6uuw#{3hXV-3#e}xIMLp2b&!~tblI|f++oh=CR^@$jl z!Z~cCxyt)Ps{a>TQrrU zq}V76oKJBHwt|rGUCK4!^;w*p5BF zj}EQB(V-LY)cN>7kioM6j>LYl_SLqvF;YINg}XussMwr(qw1PvHd2r0??#6WmMn_S zc$m#}=F)_?bOu~vvATm2AbQjeLq%O_3Cj_nCE(uix0xLx-;-k?p9I1?eMvl!GGho% z2wBj2I8$T?h6?c+(_`q23)Rb(vTY-Xn28RSq6@GND8cDp^@5<+X2;o2VTN}g@dt)F z_tf(0t~Vw^Fy}Xf`b0cJI|tG?h}J0>w-Dk2{gIaB2I)SNsu^sgHs5M$S6e z(uasWFQ)0mHb*DmykaT3y5f9^aA#yL$4}mY(xCU!t}XyT=b%3q&WT4Qb5+tbOrMk4 z;+Q*Fo20U-F`VIDlJrmed(2DJye-YAl$fbVuyG~6#%R|NV5)d%@`D=qzxSeG{jKvz zj!DH#^0mcwAY@A9UKw1#l}phi^cyt4ncipiYrWtyYgkjU13D#On7e-8IOHd zOPObdC`-LKqC8EFmbA>>NQ zbh5ZpS`fEP(H6hfezK2j@j&+?$qMsy!^ywtjp;=@r?>Z9#fKy9xb9N*Dkj+GU+%^w z4Jg=(h?QWtG8EcTNuolSN(x*-)Y5o@b!PoGzW+7E2iA3Ht`B3+`-!>V4g2znk)IU9 z&#)3}_p2OSz=8%sa%uuySxHj3LZNn4*aa8|k!k2>6CAA?Q`$IB^@3`?4QL38$w#u& zUedVM;9lszNO)m`SIa+^5s%P2@WydV*?Dom|IfwWvCT zkk8GE-N-gf4(LlR*QpUBgBGZX&lh38ulT zdPTRJ{;;$eGkAkW>SizTACz=Z|1hW*(tj+p&D^QWv`!m3X)cg?|*0(V@C&}5Q0vQ50 zM^JhVS#~ap zBKVQn@NV#FMGUiCxQs&a851zx12o{v8;za-o^-IHkaG;Ep9P*_N(pqk|2DWJ;{)G zxX2a(SVT($ifNJDZFp4x#J(i?^^Ch*(-Q_rt)oYfBrLFoibpV|I|YX9Y=cn-1B0S| z6B78Iel*fp@jmxNcd8=4MD3jt+~N>3uuWkY@#o3pe&g=%*P$WpUtZX-kgeUo7b)H) z`+od5J|?}$S#H6tq1jR{l&T*5>lYGtLzk=}y~DH!nCuh_sx!Ht*M2yU3+!g?yOUm9 z^|!38JzZMz{O=A~_Y%tSz-o6Ow=N7<2L8dI)h`P@cQR8l8l{G?AuU`SHn^6ERVisU zTYsUe&{K|ck^<{flPqb8mbk>~AtcKiIuk)KFM3XJ1zEM|Aa*1?;$Mz!o&+-kw8*C3i+vr9<${yu+Z@&8#P|YkJ2! z#GQ>YIJ{(?pE=0rjK6qe$=#)Nu%24`kO^x&b7%m7#jnM;xB*iMWRCIk%5H+f86wxxrKv{y5z0cv8!Nsuwu3~`7lg8l4vu8YASK2N z0A29|8(2Jp@Z?eqQY>lDVP=vD%v19SZAQ5dZyU|;*VD=_`p70E5yky?;8S)EuD ztS*G&-d^m2n|GW_(`-%Uw}{#1tTeM&Oi|nqoxjj?d-L8Yc4Jza4#ARSZ5B0qoJkN} zm|Xuv2XG?&{3&p-Ec`-`T{PjWw1+93;%oJ=5jUxH%hmR(I? zGPZF~siRBCGC6axAp;vwkxExy?C@CHwfcB;7+l&DJTSKG{9QINbo?unwPe;J{*O@R z{f)SOh`Xk?TOa#?aC+@{iHZWSwfEw#tm|?k#Ps7r!h1lK+Ie1*A(;sR9>El(K zJ4NEPcq?O+6>W^X&tSOSt0U&n*fn%mJowV=qRf3rf+Rrk(tjf>F5|gu*HF!cQ-fc) zXQ^Y<$MGJM6=b8G1^fQxL~x}QjNSh0A&k1LJTe7`>dmxZMj6=({p7Q(@lfr2V3^E?Wd13 zs!Emg_lKAGPGU6^)05@s6PD ztp6=-&r41R{aEd18JLIqsTC}{#{)NM*Ca~V4x;2QuJQyh#0gwR#Z~`IU63@kr>Qd=*UFs z_&L$sr$8eTFTIA3p>@hg2t}Rys)7}Q(9{#9$5_lzX|@mT!+0i3oIb3B1((z&naGV@=`H7gA6evL$3*x6 z`su0mgj+kdOyBD^0UXTWOm%#y1OAwZS+`L5U8cTD_XN@Mk3S>M7v8y2{_#G9@SB|4 zS`wYt90jq$NmY~wb1YOwhL)|%w?L9B2TuL7QX{0^ z9lTH}`7}f~hi(>`M{fC+ z$wdd^Zf*ymI1}h$%?6LQ-f~^w2ROZnYLINJa`NiH+^s02uoF(S!msh2@!xo-bBauV zBlX*MU0n zQFZdbP5XP^Azvcy*`#(0;8zb7oEu>gF7fi`SmS%mFIaa9bdZeld6{@=^}SrPWoX=x zIwN;ph0-IHbHDL%mWI=~R&rJ#-I6C|f5yRqMXnFg&rh=%CxkyyMfR+lh@eL$oPUA` zYx5q=e|tjdDSO*Hf}6y2YU1nR?O7(HelLHAYT`9cg~%0nCS(-NGX z-kNLauADA_8SN~__9|TOQY#Jm-xQ?_I4&)?>YR;*Z z=9;9u;~8Jc^^PsrqM+Rd@VRsiJwo=B{02imoQoD2@aKB-d}ZP%pqO{0QVfoUhre*7 z9!e;LNnb&i@CDOF;0nncJ`sVHJEo_7;@-nW@vlvg!7rR+1>#6F^UF3_#IB5SnOns| z&!KMxh{7NS)+hNs=jyIAw_*+8-?^ECM&D9O@d%}vx$`biQ&V^9#WjP zU(}72%8DaxH6!%cB+pA;4(-DD9e-^#GW)8Ggf9kgti;Hcs^I^DJ#-slMhl-a&tzN* z52367;=91BN~G;d`mi^CB|43U(-A*VqRf37^4)e4^-&e)s zEa68A16iczHm$(h|Dxwre@7c?ALc!P?*R3m!l(waVX-tFIX1avD?Bobv#(NncjI6n zPB>FXA}oYmGv2LpECT*svJ2xlBY{&Aa67l(`mveN(LaRJMw<4{IZ|`7sBwpEp+gV{ zN6*mJbU4e>XXCt0O4Ah-07S%@!$^H@vBCW_*M|?zgH3bC%l6l6(Oyl6Bo3dwdN{aB}$t?r=`YD z#k%4GJ7yfm;-PfqYqZFzS>ZKuZnAWk-&tfQz=eQ}qvR$u#mf@eU&ATVK2UhSGmVl? z&Hurk=-WW9SCZQEN=L@6t|>Y#juc?hW~QuxTG8BPGme@H%QK<`P*rS{9IA!>ha=g~ykwr=T(j*KqZ2VB3M6ZHfT@GP& zPOs1?o}fkylm3;`cPz2?`(MibX1`+n&Rg|Q>Ve@$i?yd4)~3nyp#F=bzO9<7_g$^u zQp>SFhmAN7S88cIikpRJQ!bNB-E>e{NCfF9)-Ogr2mCv+h5G!+OK08cc`q7KFpM!rJLJ@zwWv@ zH`bD!=wGV;ctcR@In7KgA-AN&hj_bXA`+p0>ObD!*Yx(5h*=SHg@ZSEa*cyoVNEfl zpZwWfzdy;z^seLg|J^zP>0kz4vJCDNInX|V5&yLe-nU0Q59laXW2G+hGDn=W|cL(=}4~`OAGhd2))?$|E32ahN z7rI_sNxq%)&_WJ|yWf^)4S91JeZ=5Qva5*Lmgm`@bEKnBP>T|iv5D<~(*;?c6cVy@ zkDmrSa>M?0O<$KgEq)aF7G})nc8~{u+6`-Pn8LqM$?WrnrYL@O)$x^+Ym)hCsM})% zQMO4ncmpdogW`a0buIE*lZuK z;$F@$@uXwT=MaJh{w$6Cw}TwJ+*nmbfkRi=aMP6*pM;9u*eE-CnggI+q+l!L)!Rib z>{s{7vJqG$EOM%^{C@0FxuiK6J(E)XDYJrrxsduFhI&E*eU#dzjs6z8i&9oWdKYOt z5m}khL)qEWbK>uq=8uiPO!Ic>bnP8F{L@5HhIQcbsFclIIUz4FgHMQ;Vv;v5N_%wU zgNKe8FO)V-@`7#e^ZbG1<4sQ3g${Bm1+VkZ*DQN`jY53W(Bqm`nszLS{OZXqL+#;< zn>`wyLm67yjx8w4++p(gFDNF#I#T%~K*A;|uyxJK#IlIFN>rP%TF}8m#9Ak0f9gz8 z2r^g0doDL2XSMfsnG|s5*wQ!-0CSIKI|28?|!BF?Sq+`im_l3n~XzfTxM!e!(Tl ztEog3ulAzG;}+1(){t)Lr&c;Ovba(p-zJP^H2zNBn#Z_lS=+b zt9?GuIAGbI^+Ux*iH>ZS-;+@1bK>YfYuyhVKmWI#1+qa^LUxX8qgD)bBa~V3bo3<@ z9z@Iy(iHwfgA^b%Etb&QlP}S`=0K=`nn%Ja34o|%vQp+sCfAi1KQuF9k4A4dvhnCV zglNFBq{L;-?8Ar#PGpt1j3Q1_v)1MYH&UKwcouFFd*pMd(*c=3c{mx`P4i%NYa<$| z5qq@M1o+(8A|vtTcvQ?n_O$LbarzIXO98J4K+uH*YiE$7;o$u`eoqtN!H zqH8u0+EhiU$={*o{tbkmE?i9|#c7=>Xu}7XV)H1UCdXWT{aln|bKDG!CGP%9BfIQKJqva&hwQd_@`9Ms+*@i@)7yOG_la%O^ar2a6+CwC+LSBRS zl)zlQvwY*3q^xC${0FaLxNVyX{z62t`xqRsOhYnBHEk$)H~3mJ-Kx@woAU>6MmB0u zqRsharvvX;@--&U;wMMqb3x2*_Nj!9ID8F0O}8vW=+;&YNul-N0m|{%K8;uZ1k8PL zfGR9zs-t=GpO()vDBqYFz2FI_L)h3nvbvp@n!cf}AAKJedDuVeIP@=v`hY6cPS=rh zM!Bh-@##{wjc3WO=a@(Znw3*Nr^Lw9Yg8SWX$Id~&5xj>i{!y_Y+@c`0B&AP?(n=wC0C&E_FvNd&8uIz+?#h{Wj)~ZZXQ16q7P0!L z7FMH5B$W0aKG|KmSx3uJ7h94c+rK=aIklhYJ_~oEiq@6`*WgMTxajCBO@KP*WXrP{ zhugT?8#&F(GHk@$9vj}WM0TbSHFF?qa6ZFg-+2u@lWNn6$)`#3+w!}03nrI8 zOgnWq_(1|z22&+nBM4IFRAfNKdFBj-5E;gos`oxJ!+^ibf1g>e^4_NaqC-1M?qK#d zt#R6?_|>2UAKwI^Mo{q-Tm1G77@7FG0PIodGvTR^G(hm|8lu>oNheK_v zSzzE53O*cJ=pOMSA_glk;~iMA!X{juci`)R*LEZwPp6V^v$Lkk8jbCuGpnu^?`VT8 z0K|#EULBe#QBl>&V$qwGiQh1O{&nk^QA=H)*2AAardwy4|L8Dz5iix!N1|OhL(Xu~ zH*%_5!>zQ+8$MkP{yA#__%=C_iXyUs2~kF9E$D+zxJ9P^8Axx{raVB8lcutQ?l*wC zxjBbNg~`dGd!ZgG&=SoVd;`SnkVoU?N|+Ls)<`>8%)+pHQYR_pdHI05x&xbtyaUi+k+1X*A&r5hvtgi7^xt%V}b98HK6X;_*^Y{{yYY-)$`D5nBzCn&Fd0tj#0zR3~ z7-^VL!qYkMI-~&#fmUmm-a=J3$cZ>ks5pW94A^xdf+ND73Xfa&e_v zF2HC!;Ye&ho89@WP;lWIvZ*zJ4w{~{VBP0(u4Rj=F=yX3^U1XP;X?z>1TsZ|hM_d? ziV*KHUiII^DKtNx1US69?lh!3cnDW$x6?vxgk*KgVr`{x{pcu9R)i|BcAMjvKlb|^ z$-{HE!Q7o8LF=+(CVQrC9>29esWvI{A}98fca}=6m%Grh_IGbj;x`${>3apAIEw=5 zhcp|iJ>G6M3)sxAbk{Gk&DOX0D|Iu*!e-52DKGCP$NPTFf|yPKvQEQH!Mje$-x!E@ zH)-%bq`~c89!tSZB$f)La4Bb#Q`kk3r(D{5__r6J9f6xx9wk~10PzzXae1b_WUmYW z=~*-f(TX&OF%4-rx*sM;h1XpOqau6dz@UI*?DEl1n!SMe6^j>t+2PxdPvNkn3I#Mf zpOXm5ETWh^fic`Rp8nolY2~+;6*{#5)(R6 z%KnB)inK|*3+takpFvK`94;=vT%`_s&ep#5_4RTNFR}h*m!uAU;D6*_?{H86QFNwC z5sWs6dbJo*kfSr*4^AZg8K3RoDikZzEm}pQFzCq*1rGIY?pXjd-F}S@Dl80}IpRH; zSRVN_EL+^lsDSYFF@#j1S01b5=8uEvk~fhADN^Z0T}E^l+V4*N<3B^lb-&ggzPxi& z{RQ4vX#o>N`>%^yxGQR0Sq|u)dR2ukynJ2TzW-uV+6(6)vqB>N6n_Dc+9I&~QZ?~UenJN+pq>%k=Hrxvw?g8X9Da=nk7C@U{Bican}=ukPdJ7^_~LN zH9-P`ECX|BSFPVg%D>EPkMSOT%S*hbAPJ7TRkVXqagKMfy;3TsKFXb0MI2O|LHk;A zau!LQsGy?0`Zvq0TY|P>w}FBYa}uwGHx&^k2kB6D#H}MVtVDI>?6Vuz%mJ%Ebx?rt z)!b3U+br5SU7JVRZ{7?B;vb(|>)7@*Ua6*o&l4}>a*jt;K{Ed1+Q~&kJ$i$wF1`~p$7|@&5J@2wl zT`@?7c#&te$di6olYw|sAL>SkX~Sy9QhQ~8TdgLAm5pf(w6u|JwXw}ol6u6R14@iK z?_VYfd<-UE+xtI^y<>EpZMOy*r?G7}wr$(C)7Va9+i7guw%MpL8e5HXR(tk--+td1 z`;0Tj`gN~=_x(&<*PQd3dx3nvLeRMa>B$t4x_Q|PKzfx*`xsFW(7|LKKt+JzodWDd zu|S6a^KMtx?q6@6JlmRd)t~V=NLJ%=CxKf<~52Q@M*hI*tTN+G+19L z6H>gKIBGI?fCtor1LreF!DJQAd|0TSmT^Goj2pLum`@}}#OC;rGi(t%k-Rf%nGX^{ z&zc-3adA5h+NtE+Z9MMSv?0?`&+lBd>FE`Ud%jnCfORvIJUorj0usI|^(|rWT9KH0 z-rQ&#k$qRiz6ECdSYS>QPG%ZoDK4KOaoF4J@;`=!{?Fb*703`%&dNR6lsvurJ%#u` z&I#nF8NTQuC^)Q3>4}+^#LzqidMN>cJd;GAO!JgRF;IY0rBc`e5glUl=L&EGa(kv)Z*<$1+eqZ6e^}laGY)nYKL4tLpDW7 zFW6p7?GQA|-#GK4(-@q}F@8p_P?;hpe9<>(PV|_i9upe4Tr~3W1%8EH&`>7TNu~}= zVhouVUl$#+Nwf=!<%~xlZ;;YOr5eEZq{D-Id3uOpQ2>1{<@As-4G2q{v2>tQqwnzk zz+67Tb^_^5O9BePw(U*|m9-KV49Nxissuwo16eYXnTJ+p#Y;am)C~k1FO~cR85Hbj zcwjsf0W1#e;L`G#oiElia650&`{bQ(-EdvW0zZP2TImAWA(evjPMW~{J+Al)wlHi% z2TSKxG$byijcJHH7w>PZp#3{p{i0rZ`rlX~5wEt0m3|r>b0aP|NR5$($RVhL?r}Ho zaUh%d5t(G2*{Bv(6^$lnem2T>;{JM%f^ROd3!wwNwa|Oa?9N@xJu7Dd#IZnRS zH4J_F-T=pti$4R3<$%w2Wp>~2H8||^St!`MuyJOUp}=%cLa_1A)uu5pQ;hoaLKIAW z#gE5R^fEPvL6ka@(?YvM`Pt`4U;wH2X4P=moU?>XXc-mi&KrAyc`)EV%+^?ErCD(H%FxZyVU!zsP}`f?k@)QcSzrIhjiDS zLSI%}O++x7N6n(4Ywu0VU){BkOh!aZgiu)&s&bvTFcAxE2wga~??<2w8Yi}pB+p{? z1_}Z^2pT)q!J|_gM|_-=j3R`u(#}?}3WfC|DK5;xYEUnws~A=o9pkw~0i^w0L(b7s zOe1ylTL;e~r=Lo>UwQ8a9qMSc%Q?-pt8ZYuDtu2^;5G)RdYPW>f-`$3qCB*3B^cL5 zDh8jsnH6jzW2JYJ^7jO3oKWoy?SitFs= zW_M!iXM*;j1ZhaAF;dGZ1rSyhA0X`U^cDX!WV6!6TxsItZ~CyRJWe9;c|oxo83xn{ zqb6d7I!%Q3BJi74;_loyds{o?pF23ARP}mIwsG0-c!PD^juv^(Wd`Ek@rL(E-lpmm zaUi8T$r&f?ad#y}u@<$O0tEVcN5^C?tTf0ao@g*cX`dCxv`+uU%iB_a=H$EVbzT1} zTHu`e(<;zSNWpwTOe{V!ahL-iM*WgbO+-izcdZK3^h34)506EJ+feX%fdWu08{TDc zMf;91i>`oI9=t$?@kuEu0Qh&@p!RRF;z|r$SMJpb)mnPp=NZWZ7zJo*&}bECtUKoN z`35g~kqi5e1p25`Wtg69HQe=|LXU(~YE6?d$Qa|;0AcL>CB}od$~_TYtm)Z_uWcf8 z9Ybr^GVTr^A?RBpW_!P0U#(sEkXq#12s+Za6K$6;*I7R8cKJWXaPiJnHy#eqO6}`r z!Y+6g*c8UNmyy7AV6wk-=~cu!Fohsn4bt3#V<6Pzj$5X8p%&wwfY#=#>;(>cv_8^6 zF20jO>iaem;&D3>2p@^=6*cu@+B8!kh%198wsz^YK!ei&p(ar;?(xBjD zbI+r=8b)FxXzYbgy@qr)-}iTb&?~tihy-YY%Uj13_Sj!6YQy}{_!kJ7ETY^UvILbq z7_JDTZO_BjxW?gG3(Buqo1ba=W6!b~8-|E~yCuRH=F+8oHvYndnXhB^N^hH=rmcIHo4wqp=!t$1hy754~DT+rPJUqL3n#9L!N-?U&r3SV2T%Tx{WM zhDnukLDg~sr814(PFV7>X*>`gqKFPiyeVK8EFftgiguij{Q8bT;pNI6?8ON*kV8wz z)~pN$b=-|x&1WlX8*&2C_8^Gww)COc^{FCct(Ki~6l-gd9PqI}r(89E6Rz8C`CO*K z%BRyWt2iuh81k+0@uGdiZ5dGRncJeh`vvvDgAxZLWl=W-RT={tctuWu_&^Mj4iKpa~|ZPZ!T43?MsTaVCpW3}!4GgCd+?zFiKt-LxR! znI(AYMD!SiH;QaR&~f=q10w`31n%}Lp-dzEjP-RZF~`#inLFibXPiE9B(kOWn5GUU zQgLjMIVh=pXD!z#g1Ii=CWPZD`-F5ITmA-h6=%PB?VEa+xxf6@pW<>2Xmf7JIH36u*0>MeK5uyC z>oKNzsS{_`61bV;m%fjn4G^2JFP^;OVn){h(n&Av<_9B9HGZvk2`8;Vkb;W03t&` z*mo?J*^)G@%q(IilW1*i3J6TLIGh8tfx{Jx92BuEzGVYCtMLj**~4;8`sbJ9_Sz|B zbA$?imze(6_A88Uw^Rg-($g|H&YWf?Y;fdY-OZ03k+!nZTc&s11N6m8e8mwCa8FIp zZyJ_#yq|!(w0Xl=cVrQogE3=&8d)IsVV~a!<+zBTR4!UZjLc))|ukq2~L z@-if~_W{0$me23?5vs-6d;6OOm)PFpGx|DRTQ3xq_jmLsP-GFhHhW-pVJQKH1{I%5 zRTPE9Jjb&o`B_e1Jf=M^oV)7V@L3XF znt2JXfqCnQkLC_qI78vKxn&peol8>>%UtKx3~YD^5_n3Mj){Y9Dz7hx5ppx$UUru9 zjcI#shJ&0TNbuISHVi1sGhG3}b631N7Y5+!*+wLSYpWSW_LSV;4CW7k7|+z@(k~>m z*vjxQJEjU}`=;s0mW2;mbnP==lZy7QP7ZJFeG$Iq-X~hJBAS#g`a!iAQ35uGqGSZs z-R``@y`juF2!MXd+tTb#8ly; zI|HW*{>KE8s!Q*C#Z7s{h0%XiGy!$ADo~GWb*lz&)+zwWts&5%M>&!j+`gAY%x1Z8U}9{tNxO3Xrr;b6x;NFAE!(e2C4D=cCiU~Iv75?S1z z!rNV(LYU&!WE3oyV~+q@2;pNUT}kOB}Oirbb2sVgD=yd;x!{H6h3LfSHyGkEozott?23A(#t1~7?ZJgWsenp zS~@bv6G-TAoqI1!9z?91OM$qWuy_tO+WLr7C)QmK`?5SZ+O`X;quOfHxlYi0fKJd# zRu3B}9vRVblp*5kjw=&}V#hg%Jvut4PkyY-P$#Fss;w4>f9Dldd8eh9`&XAT&iv@B-%p=lGc!u8`BJzF$@?)Ev<_M~jLO zKajS-R!4DguAn-EXb>Q=2WLl40S7eASbk^SJHpb5ooQ{_gPPgxk0~K>ExNuCL~UUY z+xw9YZ@(J0NHJ1x;`ed-*@AD+6#_wNc0b@A2)m=y4dKW(`syAE2P zY6!Y4=owJXDHHNdmuG8Gh>U z71^D)m5z@PjYw$gcJ8VX-MtMcpdA$(u*gU#WjDhd>P?137w3l|3D&h72pUad#*AsI zJJE*>9n1**%YpvoTDT36DUGWMRS#OO=B)#Bx_>j>MZKS~4`XG06tWoi%&41_dw)zL z%KJ;G-KBe>V21rD(Xtrhu#q?>rrzy1!$@yCMI=#eI;T&Hb#<0_pKy$>FIDtFJ~`B{ zLHmJSs-W<#Z(qsehI^;!)JDf^yw53Yg)HPX_ucnqx`^nnaoFEBw|h9RV?j523Jibz zI%~J29wAi%8%|Y}Mx8p&F^72t5Gid=KB;x%fOI z9+?-tSkd%7VN%oTo4hr%#O}tq&@Ntw0o_owFkrNt)uMW#f64vU_j)hMmg#6Kv34Ry zr61&Uli=oZN#!W=J!a+*<-UQnZ3V?P%fa#H38Tc#1o>4M=+!LD0kU@>DXx12ziuxa z!g9t<{&9MuMYPEsWJDom^)dRjuPVt?>WXq!!+mrR8dq7vUNgFG!5WAea#QV9B?!(- z3KPo9vbnQ+LrtsWPQ9Krz!&vd$IcGtU}`Z^)>H1FviC2^`5v60YJ#n^QDI$ zg(UnAgr|}yoVkk`-!4l(Qo1I_51xGdk^A6lw1f;mKEv`EPu-sEiEv`+Ny6b5=d(YrYL!xWLL}7Q{)pUy@(QnD!GEHY z<6I&LI8$WT9w}JuC9o(3{g^R8u@yV65jbv#HMb`nf z=~NN^g|TgMBVwkUy_*j#y6Coy?eV21?-Uz6wMu9FvEC2HxI5iCFs_8Eu_4tIr9>w$ zt6lH+)vc;24og(wE#uZM5`g{&ceX{Y>)>Vl`lccb<-J0ASc^60aX*Hr<|l6;H82)7 zLdh|d(hQO_j9V0Yr(E={Jx6#LU$z~%`?)EY0-2#=JQJmbUO_6zHbMrk5)_l3@5Ha; zg6oK}GtQACF9RJ2n8k0)qcJ{RCw?jU;O+XP74AD?vYSP z*i{E<@gLE{E656{=H`aX>>k$ZlibE>%sslJAC9&n-)|h%lBo@JrG{ikPjkUC_sD|I zoKKlO`F?EdT)bb{NBUhc^jN)SRX_7)Pc_&zD+FmR%ax!S!eyLFb%G8QqL1gx%<>ML zl0>@<8BkwS=M2*F!W1xSvSlK*c2m-frmRp7AP)^Y_941CSX385)QNALQC{GRqoJBt zEm%cDsoyuCd*8*HSjLHUe7nfLbcrkWiezA^D4xN;`E{)qL&)a zd)E_3E&21;)d82mgq2QOl|c#>T7Yn$*%Th# zC*1>0M-)OMv#>{tOJKNpIRwl?pI7`2)>;e6ib^CccSo&wV44*qBsoBixNTAj=Crd| zGp0Z3lae|w@`e83kO~P6Rd_?QDpx0CYY7QvvSlUQwNN{kQ6MFB1dLOSNyT<%{NSV_ zgJ=;@n78v&f%@NGfKMZ=2Gt`%fOvE>M?{DEt5hchyP&>%pmxtF)itp1ll~nGan?d`lJZz0_@5N< ztRH6~X9D;`K(t&0E+(Nc`7gqzJLUH~Shpqk;Q!^^ezOSt7LYcS(UG;mV@mPF_McY- z6XOJeM6xM0m`T5bN%Dy>ug-v9Hl%2l!h$U$MMzkbT`;C6Vk*o+el6p9|W!sb%HBei9Kzzop zaE)i4pKx2yh2gv4rxIZuNpn^O#3Aw8LNZ}!1OCmGMn(ndd+az^uzldikeqnfgfx*Xkw;{)xVOJ%VS4{R#6C}fHuz-pnj4F;(Sa+1&&GCl zLBwIgaOlP4FZu+^Gd|g!wloR{W_R7czrAQw-ti!B%QQBJaSfA+)vd&h1d47Oz~kZ$ zJkxnR;Trd)!{8@kO(=+~{9Jr~(h9>(+`=`ZYMZpkM|x82=e)ua&oq-pEBEB~ z;r3Ty5sn!KLI_o4O3Q12cw&-EI*Z8smt%u`?S(n!vx}IOC$J|hQ2*5KPPjFUJS8!K zrr00I7{hUn+x;ix7>@oEYny${rE&^EaB-QsD`(icjL$K-x-C4pZ9V+!^P+nS;Vl)| zHqMH9!9=R6b0VTO=3BvOUspoe*@k}MRN#K1?0a(q5kqj{*VNg`dfhQjT;Uv6oGsq4 zL?!2m8p;fcl$Age;`mvuXMsXbbd-F8j&{kKG1+Dt+nKkO2ikqRxucdv0n2}k9(y%! zW=Sbhs-!!qx|tNzPidRS~IrDU<0(THcTD5-ILs)DX z%q6%L$Oa*&jvtJ}OqS_lN`3MQ4SHSGkj4E2Q5$HiQw4*r1F%L6U}Pkbg?)>RorO8w2xneHi9w_1$MJg+4GEI& zBY5Qige5FNu0k-gV8j3=Xvk6GkJ}ybt=FTgyZ89>?yJkv?-!QCY zkCn7g1}YtG=bym}$)u47^brls&QN>bZP2hT&1XnN%Z};>GuCN3z_}56MQ8?(y@bq^pRU(1dD4P{}oBhp=?db!cgAq&@A{B8IfXOI^R(=`N z1+`s;?9&j7R)Q0$uVz0SXxgF` z^^c+f%n7$5@#n``+Udtbk&e0A(Tbe1c?7N=lduW|4ji_l}(( z;X8QbF^jK{f|@;3w4A2Hrqog*D?Wo$;gcE=*~c^4;A6^3HrXZ(ybUrZJGkpHTWIX= z@Aw!+5+M$mD1pYy`jDHcM}!{{^%%YGm0q*tb`NdIUvB#JUNpMV;JDxhCvWivz<}B? z5(-~9KL^OWG%%8tp3RmMaU{7<;Pw8kVbt0TKUrb9>AiIWdqI}Gm4Fr57%SXx@#<$hf6*mJE^aUiw6Z1F}8BNQ)|7AFM?o<2fH z#Njgc&fF|qj(i<{Qk(v|HBmdFD?zIJTc}uNNE9IkC+mfzI~JdDyRfF{*8>9oS*mgG zT6n{%58b}WPcD>ixu_kmLx7o`*MabYNY>}`0s6l z$E`ECJ5Kd;c&eu0|6}@46*M86-6~ z4I*SKDJiY99)LjjiBW4uFgg@P?7QUx?V$C~jc3Ue=m3S8NMxn96N7t?NpRoUU3aoP zbMj@?E+mFnen;2~8|4k3OFAVpOx-QpPw2E75*?rRTZ3{9%Xy?2W;k=C&_hcMhK9*j z{IP?E;RBfsPryIFfn3Xdet<8`x=}K=%t4V9Ih|vL6p!__#0@xA3s?mKxk%!LAzrxr zXW-89=RjV^`iZxQ)XyFm?#+#VOOC7m>2Lmx4#dm#qN)!@0In1e^r~T z<)GsF+BhPdK_C3ZzH=H(5bwU1Kz>$tY zRSFfXvcS>(6pJ&)5I;%ekzOziTL#ktv+{WpSr%JKmj9b@=3o>uDkk3!Hu^`RtZk52ssDTr;`p9 zi}-tiP3|~Q@Ai+AUVK&1$!}dFF?1+^NNQEl0^-e(LC0stXpyKb3o^0Ai3(ezF7Sf1 zx*+kzI3hq>-(_1K>Db$WYLVr|9jh^kr$IXk|BeodMt!ro%PWPFVcQcZrh1d6*jL)3 z(Tit8gyn()z>1U{tE!nWBe7!(vp<{TSa1e<=3y&v`b9~JKz zT~>-e;sS9&Rit2ylknK+b7Tz>pu#>mKh~sj*DrrJ{3W!4`Va>TJYf^mfIV{{ILK^k zY1T34PQj>XFvON7RDU*MEZZEUvIkL@E?Ns3UR+uhvthXgbI7diF|$ z5nn7Mz@O3GxZ#l)Hh~go-SD$h=-S#Q{^jFC%znbBPVIV&Ao3YVirZ1nNrwKc0?lIo zDO_Qi?`h!h-SKG~?rZzS{sDn1^iEf%z5w2?4nGbt{1fp2Fp9}d(I<4Qu$DZt=ZgYA z34xCJBTqbsNb%R;5I@X*;BX9@IYK3t0dUYJ=N;mRepTwgb`$wkkC1T+O{~T+zQ1zp zD>Rqi>JD>5XUc!oL;lPjN#n%@vuf)#G;x;ARz-f@S#xp<0($i6H)14!2@=z}?}6yscUncMN3q))!$r$2R-OCe~_Yc76KuePBUx}M{H}R7C&7!e!p#qMG8B}+02s?aYl9RSHF1yduXP%neVr#ft6e!RDTc! zAZUSrH3c9T%a&`t-OvA=0e=PprxOcsrlQLlhcQQ9u+NZ=x8e`7P^ub?BW_G(E^u?? z9)Wz8_U%TNtxn!i`HCYglyGFSAi1S85z<$FybD5b%@kUpJFdN5Wa!D6q7A8RJ-CrJ zgvLdIC@Hab5J~mz9+&rsx{TD=ww|GWy{II(_dszXbKYfCRS}jmMl&`S+F?q zqOyqX<$jV+3Mk5&j2l?o>vl>>EITMoqn04vqA+yGi1ko-SM+*4d+Y^cX-Y`9$g|)g zQNJ~%x8B3J&HlKnC^fYN88V5%&Qk7UST~7CwG`{rk@djK-ErGJ)XJR zTzq_diXrH8qlc}w!((P_usF)ro#iD-GuZMn_lrj8H{OIwBL>NIy}MR2RroE$uR9hz z@Q8==ty;VnDp+yZ{7a-^8_NRi$h3R}(GQ)a!)yD;09E-fC^mkmbOg#@M7U2*q{XG+ zORfC~rCcu&e!wba7aY@87PCvDm#0@~W1|ePH_Evl@2IoJBs?unZtuUcQzM~8=79m^a~X|q3E z5cW7diEO)HI6gVliG=P-j={Z;SAtA--WFqbJ$kuDvQey2Ewu;E+PCtH&heyNxA423 z(Oe|oG97EZ0 zSu)p5gZs!iK}Tc{2ilwCPV?@9(PzI9-hCFlA&Q?HTOeyHj47=j)*0AHA}4s(4`mI3 zoO&@tJ}qCPw=Qr%-#__B*RZY>6-1ZlO4Xj1&*S?hkLCJv5@Dd={H55IRNc0{1P!nLUk3R7YibZT%l{p zKhTpn^Xw1!LfyNyz@2}rGko_zro_$qXGM z!-VBcVHo;GY|!{1AzT<)1x!+!9o$Vb?NGnmB`9EB($YD%PB=nFjuDxDvr+U!C1MI= z;=`gC^*=%>)iiT}BhEAf2>{EIR>Y_(jv}SZ7ZfUlh9>{C3^lyJI|$_~UV;0%K1!>b z-0ZtH&I~m$l5Ttbc}V9Jn+#&3WTvOu$Wo4Gt0zSMt`r9L18H4Y&(^SY3^le;ft4d) z4`0gvj_Ete5|Yezx;kKOaungvHE=h!3yy9Ymy_ z(l9I&`kwa4&^nLZXz((=TNDbLrlCNm&_9Ooe;G#z$l_`PmBB%w4trI&jVX!ZBTx=% zr?y|^MGHXp(=OJz2waz7prt2a%J_?Mj$y)}-7liF>Aq%8N~+qhrk`1#3^8Q;$U0?m zRe1Gc=Zm${jgjYD8@`X(uj#g%$gsh(WhBgZYV{Km`qE)S#oJSWgz^bmB9ZiKljdP5g``aPX3|ws?1Y zc#X{3`O>vp%n249OLY6n)Oo9>+jUrc_Q-(G^fmbiMxn*}%k$V?^CG?ta|kz;{Fg$M zHllvL$>d~VJ%0McXP0W?oK{L1YfDGfA8V&;C#G-pTKv3KOP%g1&)0fl^P)80lEcbp z;C@A{=!`RNMi|2=jJb>4y$T{uqe`IZ{qyYyXe$uDrQXxx!o&bW|A&|I{swXkn<>*9 z1M}GSSg8O1KE@m>@a`T;#M-^<{i8^vg|lN?jSNgn26bI{0d9ajI18b0ocPidCj@#O zG-;#}oIAG$&}wqr@VBCv8W0Bxy^iaeC;P~uxt+^{y$NHuAWBSjj8Hkv zZ*dj0n{Jo&%L}b@^ZA48t2l(pxAG`sbFRfuhq-`EiXteQ3x&ySj?Z-52lz#XJ$n9_ zGoDQq%JLVvPncy{<+rwszN;lO8KzHbA3$V?=P2f|E)&pMhKS514e>~PLimsw+5yfS zhgVGfri%-~i_;29{^D*WS$*Kr-CWa=bVXaitw zbo2`xdUSRUw=JY^TO+>rH_z>!K3)~e^hGk(ixFA4_JS^h<8(NyJ4TLU>?8XPb54SX zVDNu^@9I@R)e;>iEeHd`|JcCqV2}+`fMSt^Q#93QqQS;-oE3Ed&fNVRl@sauVK3j} zv>)7*1uymx7_U36wygr7*0K@|bDUOZy%W_PGL|&xQsRaeiG$G1+!;_8gBk1xrzDFQ zU$O7jv1bgRo?3s+!{&Xh5!hQx8T*2F9Ko{5w@<{~7C$c{jWLThfdxAMK2 zaP?j$A$&(Jh44+r=)KVgpA1LTTK5JPs$bNingP)moo8%Hl@jsSLb7Opo3vJ*wBXNb z5*cAdNkqB4Z2ivsc5St7sp70@F)@46D1nqhFJ>&#bkDvtRB&{rmQ1g9s`D>R9t9Qi z7p`FYc1pYZ*caFtb)q=S?fxPgFdx+x%&E{Zj)nTv!A>zPa_1S_$67 z?EUrek3WXooMA>-`7xri!yhDIEKfV^Su?E3i=;|rOp2J>%c9TV*aBHo@)8zW8`?Yb z+SBC*)7g!bbsiF&OknLhrb(10TQXD(A>iq_svz=T&eyPPuNNlFD;%u zQ5Ih@t@f${g_D4=HQ(zq$4{cAiw#p~1x&^e@rP618nBuCM2*dNXYg4K!Yy-^RM&UE0+2G*Bt zyZ_{0Jb765jv1PNZ${hVx=%g-$epCFbF`+y0jB8Slnb4aomK3;F>B!M19$JC%+WU? zQ5}8X$j$4PRg7H+EnjyxH5;N40{wsR0q|%0d+6rv1m!`0fcdkuSvxHU6f1i~!pWg|TiyT)gEQoCkNgX=?9f^%3JqW*iK%aSbmv>eCII9& zrip$0x_M~-(K&w}b&MdyAW~99`_QV%<8)Y@00! ztCC26GE+j<{+qC}=l=Fl0qjVje}M?VyOlu~E?8GRabVo6I@%wqfO+G`+U>L!1MtI~ z)7(C~y0e)^`6C%lo{A3RqLNbd>TRH9chp1{@Jvl{0`9#~G%-h!W;Bc(GGC^bV{);P z252|g2&)@r%dw2g_|~>rX;4#%7N=%3Di;{3t_~55MQ7SlS35E4pxJ78m-d}6w6bF6 z%`i=`d59cb=rP4bgd2${^O%E}76oYc|MmjpC-Qdf8%u`o9f>bqv(pN+;@MdeboNCMOFcMiT;H zYkr|P0*`9aWr}7S34h+|1#g_x{Pq%J%I{%Xq6nABm8-K!>fjmFkYpCTVkLS%Tpj28 z_D5OtvR0!Er#yi&@0(=x&PiK53NI%Qmk$?h#an(RMPt;Uq69J)UXRZU(98j8b2Se$ zv*SQBY;bF+e}eoc_O_CP*8(J3F44_`|H)Ya(Yh5G{D>4gN9QzKdO@VQzAyAB>p3kv z^8+O3<8o)MWpQAj1#K%^M(*Np-r&j_zOG1aKN?uBEz4mARo&Q%5zd1z4w_^2Fu9Tt zTP;ex6N_t#x+%b4KbguCTVX!z#L6%Z7+Zq4U`(kHe%QOzkODA-QzN!E7Yyf2%%KBf z=$=rP~{FxKQi8ti`o)*o8QpV~V!n(4ge#)zE7r{Pi7g zaUNg3o;!%~%TEck&YVr;_Q1*G4B8SMN>Ov4?1&SMVZ|P_)}%?ps?ofl9r@g1r|;lc zLj2y;>ru^4dSv|g?T;wBTy3@$LP9#BYxzmFzL^dMUv6GwmxsgU> z3)2g-Jj7p`Hyqo6jDQu&X6Oo{`8-ugmRaK-MCae+KyDH2Tn)uQZj44pllGQP8exma z)BRCr1`n8qOLB0X^TgOdUa$uCr&JQmWQ`W>GHPCAV$BPy-h6}CE%h2OMa<73IwZA^ zW!x3MOE`o@I_WpNNOxirNn>|Nj8K-3eMv=c9UR^9KU6kYR6Ax9d`W5z%~PPy;u9>nca&aX1TX?*6`ZRpi9 z5|DU%3Ux<@C|@2f5AV#8*u}Ne)01T1{#!ETm|tUTg^Cn8;Ti-{qC}r-ICJ2HC(k+X z9r1!j1_J;2wyULnZx)yRj;f>1&3%heau~+zj9xM#vW8$q7p$0IG)%#G*A73@K|16oM`k}*GulU_Wo%kXqb8l zJBcwG2EMZFP9m;AaVoTa;b`AhVBT}_mN(J3JjNN6Vn^Kg9u4K6l*s*vHo zUVga(re+_C0tF@#wA) z9Hu86d|R7PE`*&1X&Gf|U38K7VAn#`*ruByqipr(B&XijejxcJ{4L{%;=bg5EDm*@ zUo*#PRwO^>bk>*(Nf)o*soA+hn_ig5UY2B0#OL`hp}bQD8gI2an02tLq)L8J8S`MY8!@tmK?Kdaa^3=Us`p?<#92CHZ z+Kb7_^%kYCOLC=3e%iRz>kcr zIY+*`H^^g*-19s!HZRWkw};N)<*#8I#Hb*u3LH0)o$@GYuySGzni9h*ICC-NO9y(t z$+`qQWSOq>cf{->);fJpKNEReT)qjjSo+msq~Q5af%4J4P`Y}J5rpkss1M=O}8v^pmmK{w??RKg!p2oyvDAF4m7mR2W1mEo!yx|tt z)MlJbe?YzkfV?Or zOE=_IP1QZx-08&O!9B*aW{<(S#q|B_Hmd)yJNIj$JOAZP0_?xi=9n3iaPzb|od`Qn z>{&oXzryiO99~~rx_tDuIIFKWP)(s&wanoT^=QymhdhBrROdY)nSnGyfvYcS4{~m;BB9L(On`X^R<((q zd@X}=5u~->sav4jWq2FZ6`YGf2dpX|19r(a4M_D&ej%aM;iQcm-s7s2u9PBhUR4H# z&oOOK9)>`b`r>iRB$8Jwreti7N4E-QW@ai{HsrixU|4_y54|S*feBZ#qV=j}q2R)jH*81= zAO7(ez0s8Z!Hqax!VVn&oc(s-srdpNbt&>SrX*A9;7VT~k`|cbkW08Z=4nTlDj>B4 zyj~w<NWTm6%Y^*5Tv`iyGy#eLqfW{5s{V>q`MoD?(SGL(#@i~ z^ZwZ9{B`eh?)}`Cz?;Rh=9&4%%siP{gQjn{@o?whs2te0>0DTH%C0vsK04fzcTXD? zzhlj1;E3W>^`bbd{|%eqAmc=?7=B+VAt+Gj5Hz+;ErQr~v`Yi;n~%(xd%M8nsC08W zwM;7Nb&j}zg&8rT`wfrv!%~Baieuc<`keF^YQd3D*DCB3c0;8$MAnX$N2V5~{Cb4r zrCLu}G)NiFGAd#dQ=^wuObil{GF3S#G8Tz+gZ3z3xehQYDqR%{B3NwUrwt7=CG+_?dB ztUm|aZbQWc_QPK8jrl*kmCW`6YCwKaF|PSn*h>eqwoLxeL3tj#-Cdy3pD@jW+X;#S8-cgnmnEG~u|WINc{w=4wF zQrF>a;xTctH>-fwaICSQqkTZ@EpMfJ$?PB}(hDJVJhj)B_AW17F3-2j>S^fxD#aHI zYj*by8$#{y_yZ~-2=nJ$|&Ao}QqZSIfLYoxE z&he27N4|b{t&MGM2|NO)`?V9uRGx_Srpe5y7$IjxJ>1|q$fEOINLqmMli=9yLB*sv zeOiE|0)G5H7}(sZot)CKM<=#Nq<|+qXB5)N(3i>2-z5RyNP>#w5K6DSNC;aeQHEH2 z3l1ns=xR#gngmR4!b^m0Z~TQmv(96h619 zSE&~2*&Kzw`yt_4Rc(aL6n>6P_@ckeg0Hu-G0y3=x&)W(_*L+V5^OMbcF(7Qn3ij{ zT!-2zb(aw){nHZ0 zT69Mq!qOq0kMK>U<_>ZJWRN!0*_*4KKG^q8iz@S5#&V-9J!y-s2Wc4Nt`FX9cWpnD zWGKnT3M2ZPH%wa%<)?c;M_Ckp_S-AVD5^w5x#`4_h_jqrChwD{sk|2tl&j*mJ6F41 z8WeBlnd-!=lp|Y!prOnf^ntd9F;ctx)}Q_vFTEzO{51vZk9*~S)6g4k$qpP139JR1 zxRCW_)jm|{L|u4CDsLv0+-TzH6OnmWF9neF!q){}c-clMR43OcyU6=aNfxnD8E1WQzrd!6+e1G^?m;EpIh)55;OC2 z!$@|zMovSj3TTcWPRg6vO@Y!aW5<$BV0?)A6PZj}KR%S&;Hw^01NwX-5UdDYWmBpX zq{#mUhDLEM30V+viXr!O9s;ZGb4o`POfy#Hzu4Y2Ov=BX|G-1>c?E$lb#u$?w)uFs zHnKW0e%9!9S!yenXRFft><%Q2w{=yAg^n2U?U*wrVM#6pfm=su$dLmIDQC;4Ej&7e zodXSs?S5~UcBzZF*w8p`Wurgqg$wq(ta zQJk926Mi4!KBbDpD>LZS*YA?>Z7M->Gpw3kF9xt@F79vTHKH2nIoGpE68sym1ITD% zu#i5%O|D7$B=dXQAZ|S{H}g5BE>yEfd_VsdM~~PsIyKx)T`kJBZSsptD0Xr)iTbCkL)#8RTasM!d(ko zt3N7gtRh%BUW)2T7x*^prFhAM?n>@JL?bb;JBU=;RmjU*m?>2leN7 zKvp_&fv-O|8>KeQUR>=aGH{A7=iSiZ@%3_#e(#@DS-d6N??|)p?Fa0bNsLP%HiI=o z&+DMCG2CeD0y62yqDSgKQDR@r5BW{muGbo$EMJiWDL1tsa*g4pfnwv=+9{s;*d8fh zonuaHdI)?un&Qdl#$t#LDrX7ANeW3BvROm0%B^Msfh<=NcUR#C|B z4A!AG#X*U29Ofacyz z3I9Umwh}Mpb<``A^sfgCl-C<+=*3VrFq(>zZQM3lTJ^@{Ir24f$+3chvxwUVVg`6_ zg+h~UU4wSdj|aFX+C7Tf^c@EYzhNXr9+Zu^MJwN#vQ$|pn$2j!NK0;vDpPjZux_#M znz(vk3k@*TQG7L6>OQ#suJMjBwdEY$;~#<211-H z8X>1BG4-DmrHanx$j3b#HKqB=$ zwgYzFLMIM5_D7XCAiqfmXeM-8EcM5~<#M*#D=r5UU<}^r6F^y>ki|i#>oLZ7R)U{I*9UGKn)}=l_-M z2!P^?1kGoTjjzJ@jYpm2vu9UPZOg)(ZiL9LJBa-V3JSBh_#1=e_gaTS^=|EuJLVT6 z^49Ay;Xx`ybYlfZT1}1km#+gU$zi8{9Mex?um$R{)gF20mhk{fy7JJiT~GaX*HuXqlzhu$5@dcy*IrC)#*_IQ890hr5>q*%vNC?4ujjCrUTA7rTz9xX`!#<; zn4h~b;v9xa+ceiNNlRzufsmG60Q>H{`(I7gRvd`yp1QNkLz~Mwzh~qUe!--x_K0WL zDUjJ6p_%gp6z+dg_OX(`vt^mf0pRpe5A42b(BU74YlCepGY%QIl0@q!K2lkpY$O>C zWg$DjnL3Za8G@3O+_2mV=pXw?CE~%IDZUfqM15%=_W5E)!!F; zO{|f9xZ`Tvgf2*HQ_t>h>21W3gk@>+W-2myZ$9oj)Sh+=d~#=*R~;8lp#t2NVU0l5GrpmHCN_{SzS0?z^)hXM?Uk(n+f#FwqjNaet-r!RmE_O6i6-y2MpWVBx2(%W^Xl|ChyCkOpIoE%eZx$ zN-6mMaQ9TITv`Tvus5N(f!gy{tkQHqxsS=*p6gfp+Zm?nP(*dPVn=yr_IwOZ0O{zq zWYj?t^w2NS4$?3k37dVo_27S~@>`jd()KOD#XB^`qp!`Lsqg*9?vhJXq6c)Mm3zs~ zXOGNBT251HO=GDE!G_uy*2s5+XvUWy_9qUiX*EokY^xPb!_lPb#w0hRt$w2iJ$^Q?_bFAo{6MKG#pj~UHJK7C&6J6CA5p)_!iJhqBe4`5-L+}AwV zHI0-i;gTa@%s2e2>Ds<%x;=HV3fhb3Ww`(Hm;O0?6`FR+g&>`Ngm1D>E4eFell1sN z_qpzDX;i8M=1$ssdDP@qhW*sn6Pb2J4#HtT)N=vV^lKtoe6aGXp6NtwiL+iu-ruOU z)NatDp%UV}a8~kVuhu4ue?^OH)E#W)GLqs@>a^Ts)qB$a&e0JqDvnfD{UihkTez2D z&Nefy)p+}KzSE8HspVx^qo$`XXAi zppuJhS+5%5&NZ;{21q9j#Yj-7NaVXG1p)Cd#HSCq>%6uW<69AcAvy`kg}Fyux1Y<^ zGCV_lfgwH{5X!Y@CUddTLv7HPvvUxdaE*AR+KIA9fc0qE`NOKDF7yLiz#U25E zmEddL3Bx+pe}u+zRe$@v2!qK8{5IgpMts87vKj9$3~BDp5||3J4m^F+-xXb}Vq$DQ zIJD;V{s3&Ee~1z@H63!^tCwRZvD@VGF8F3CW=mM8?ap6Z4M!EDO)Xc4vEa7}&N&p^ z$@R8MnJzUoE9TLb(5Bau1&-K>ERZ!9mjWUCed8iWoxD5}-Xc}A55f&y+`%RTE|J>l zQ^?2<-U2~P!k@t$FhD7@jAyrxr!PP#4{MPkP}0_UF1CnVD8CJ>z9?n8%L!`szzQr8 z3Np>HjR*V(yjXi3`eXl!S++j**&s{({cLu;?%5Z5x(Kw7oeztyY5hKr(WQC;Ddx-( z>h(8K*f$tab`J+l)j_{=)fj-`z?L~c2Em5-SJr@daonKnNRa;10kf)%j+Lg&%FP3J z1H7N;Ez?&Z!5wO)FuXf%Avc7oD{s=3x=z}?LzW+JK4c;V&`f3kX9F~IUq`(?1m~a_ z6;alawi+u2m11`9r~!MNdgm(?Wk&m!ST*oEoj}aJaJc0($7bOe4p+52_7`%#;O{Pb zvcD56o$#q8cp5Z_aL%BK3ns{o8i*Gi?*B5C&3zG?ULxDNzzD*slp|35l=gX1G?= zBa=qkY<5i!uM3VHc`L65w8>Kh49tF(XZ5m|0@v4y&)71pQv6nlL4`_^ULGJI4I;l7 zz^R{O6+<|GMarZRw0030juhp6^H%n)ivmg9|5o{;v}L4SqRjn84H!)o9m-C6%A!ka zly~1|DhNEKvrVV5*A>i0Xp1cmT}z#-_>5k@mcoKuQR*_GKRmyDm&73;j!cIz-TK?> zTOsNB9%PsS=S6<`uGejPxhz7sMe@0NMS_GJV>KGzsEfR??9qiFyvxX9fAj)K?Ss#9 zU1}1kW<)7zC;ggy9fHSz!c@{KeN|4%Of<1n1o$BX8e z93*9ub%5yV;u+#p&>_BV#(*B$O3QHv3I=_UXq20CN0)6~0yyW?24BNlr;n_$-8+v1 zOEYLW2Wg8ZlL0@6kcf~c*lLDHIiB#a31gHyxsfc|O6NxGTSJR8}vo=)m8X``?V;bt+sgcvOi^m;`!eZ4_$!>`}AbVo|bh$-75N3emu z{l*qjLE4CGH@h`%9bE;CrO4Tddv1}anG#ydSA58zUIanI^9D2*FN0O(Q^di_3wE>{ z%6js{paH&)k3x8HvZrT@&{K`RZ&HX;wA{~|4}v6PkXKK5HuH4&l%F1b+U|f|o(tA> zCHMFDn)uu$$a#25i6OGPjD)Wk!hw~~_9qs#Xx#*V!ILYr_H&-^Vc*x=Kcdk8YMGj) zV&Zelm#Uo^-=f98iI4QY|9R%ED-ctdcj*w}_>Ew5vY}UK{*^RUw-2DSg*f$mWW^+o zZGyAaSh1Q4S8-s?6RxR`xpuUIb9014oAjowyms>xNY;%Ir_@c`d5;GGTM0ca2}ci5 zG=Nouyoo(TX7uZL(o=rLq~W-AN;<$lzS@Sa!YO3NywX)1P@MY}14*ekYyaJ%c`;{} z7m7!W;UHBP8!?tGKP~9F-XBx@#w*$G+?61?ik&V8a0;r*eH3& zpV2ebVYP!E{o}OGFj+k#iQB+eiW%p*2N7pEx+9yUW!iV|VeeWVGZQkL)o=tE%a#kv zX{JAG>+ZHBBcS{N*{k#D?IQCvZ2^gde*z2;7FRpbsT6*WvyPh^wbq~szK^I7k9}4; z8s)N#(DqJTMgWv@%a7b-IzneUof0Fq$Gbc3N`9Sk89B z$iELepeu7Y^38G%yU;DFqcLo=db#vHn2C@^<}B|tYocWUY3j^7qLDLl7>qhoPFVv$u<|`Eo1`E7BO%X>@NrOG@}JB_dCUpxe}I z$dP!A@!A*0Sq==7CZH}LsVSlV+%aeDnp%-_+QYSRlH3Okj2u?0*e-LJLO~S;dx6-a zRcS%A7H-aUdY=08Ai|Pajr(S{F2x^~?N$X2Dv0m@oJl>(3-%zjBSZR&M_$h941IOd z+HpNX{{|jbOEjkh|3MAUlf`yGil@v7zyI8yO8$-f8GNajP^J3s*FTyd;(#vDunl&% zFrF|S3t3FW;vS3RElA+L74_zNtXbFPEbzo@wPC);PVM{0>d3iu}j#y|SRna z2m)s~StCT2ea1K%UaK#zYKU{^Hs zatgCGqzD@b-0EfGS$c$(*L`jpVcu~^yZ)R2i|q(Bv|5UmZJ)Av+)~FI0=z*$@Yy=9 zks`vI`>Y#J4(9cr%2e7n0)HO6(&+M=p{mCsdO(P~k!sW11yyPfa+swrnKZ9ruHz zV@oEm)QAu?N+?UAU7ni4JTTEX`^N5)o3t`J2EL%wZO7S{t4;CDafU&HdTN&) z@A!o^kr5(6cRpW7l@&9ODKSjoI`^r5kKYP^D;>);X)RI-I#a<=L$5O_6lxEBP)fsECnNjs9mUV)y`T;7z zK?e(3g_V(uXK(aV5SZ{{DXq0?5~3%$SQW~sCu)lnHr;Hc3Av?Fb&$ZeaZft`fKH~m2;ALUIQTbCGIn6CytPGMbE3pQ&|0}(QZ1E zbp`ZhfoqlI8q=X$u&FcUjXHdM0JcG>rph$PA#&lbcBpd}N>rJnns<|0(%ETq&D8Jy3eV;0z?tYB>I8TExoH1%dlHixy*Mu)o{>y{u`%5rXoSS5rDmBzXLYD;-E2C9M-FW;P3DP;Q92z`xg z5(O>?FLTsPA^PC$t+`HPc49^XM*5H5<7SCa@CUJDf8Nq_sbfSgl|jFxeDg(+A@0YZ z#l_C4?-qO+;gg$7O3lOTyr1-$wHr>FtD~Zj8)y!1w;CSpTuP=cEc1Pw-2cFtc61t` zvxC-ql8NO{Vtxgk*`7y}Sui#QPYFIcKDHn}^zv?vOf!h_Tvjy3rY4Tyl}USBM~U(y zVG>$XH=+o=QsCo_jwu8NAS+zfqi3m?Aosk^q^cotE1(N(p`g}22qzGWU;I7InGne8 z%Yg$d`SlyrvPwsz4oSX%rudQWEx1$=VJ=}7(U&I?2ikK$Rpd6;rpj|q;*n~5A0ui5 zj=<_Dn_<{W#o+0E!-+8eid_b`-r<^rH>Y*ai*jW}$5T9+kCMLS5ZY-WZ9EDZr_=k_ z6VmzZ6<$kWMOlNF3v+93dmYIan#>Uj4>|gzUX{pr@^%cK2d~H0g$mz?n@UeFFRSP) z9BcvO+IiByR*`PTi&fNm{r3HT`$TggBUsJw?&@PpFmEiYVYOw?Q)YHA`5CFY!C;LK>mbkfHlpa$o8nBEDd`&?)Z8&!5nYlr@hkf-f?ZV>?Zi50i6Tv&nJD`m zD44{7H8Gi?J@GqmyQqHq8Cvn_wAKdQdo#$0>qWCvU)Qitf#ryQAUQ&^d9XxiTwXSh zmZZ+k&Q3>V09=psD!fAWoKd!FiA&*m(5it~4i9}H!QuRm4I_}6^_AVnG~Zx?7Sr?AP@8udi%Hyyud($L!Ub?ZAqkghqLR0d5lO6-T3ObpT{NbXS_TlS~8tRXCwmI z6!byDsMt*&xdc8rXUg19pr9+M_5aOQe4tqmg&YQnwTinQn#|%15GkWS5C&M4EDyh7RT?5pkOk z(yjT;4^ej!pI~&<&1jRt=%|^+ww;#4E{=;dLltKKQ+kJF-nLEW@TNABJ%oB>hA6B# zec3B0lLI)h4D9-R6|>IXZ$APWhxM=;$gtV1lun_KDyx)y?#9TAs|dv`9Yli0%IaEV_|0#{kR$gJheGK8Nggbb$a*LS+SRS0gUm>a(D>;$yEu|is>912{M!C$KQ@B;x#}|Kfe>WQvA547Y zo49!F21zW~c11ly!R250bf0Iz8^fHMTJGe?RUE0XU%DXFH`o(K$VVAjo&XHD1jrvt}& z0401z57c&YRb+etiSH8=!5D=Kp2@KFZ+x3<0bnk#pp=w)8bl`gI|FC-d)sVRUjQaB zZ@s{~JaCBrSS<&dWFsEQd|sy^N}uwAuHIr;jdz@F=cd03O{^e{RAxc!NUwVW|LWJV zmHOe)IE_*Mva(p**1e}ZT5Yk_mSnv2kxVG$wdjBFNATiaygItlMP|&GeJUM#zL3{-0`o$bJAFgkyX_ZuAa&-;cwz8jb>bAk zzlhnFbAUuXMIlV_9P<^Jhz3lztc=U~P2Ay696xJwiZ&ivz^5yd0<)VJ(9qqpUQt;t z-6(Gx_B}z{{C%`}Io>g(vl7kV`jjH%hRz${J)YA`msfTeynJ|nvy>2Y?tF4(RF`5g0r<*?Mr21wf_Etrw|#!t<8y-*B4g8ykUX~?x< zYe$85$hg!Gw{x_m#yMeA86jf6TwfG*vXpX4kU?33z35AquRkNf0h(^=AatsGf`;yr zk(4zxaJ$=6%S%vPOvZ?Ek*Jb3`EbSv_Z~07dkTIxj3vB{V*BP^w+0F&e(t|C(r3)| zO&&3yw^u3%T74K>-xmt%4N#@%A1J?1rTZS~HujBp#F-N%e`wdbz9?nfbnHhBezEaf zvhm=tL(G8Bukls<@1}fy2B&Th@#T^mW_gEHiasDrR>|377xaF9c{99)uG#a-gy&pD zGd*RY#mDRF=^oHFikK~&o3SZUOKWSH?-+nr7rg4+_ju_oHYtN(|7e=JwJ&Jl;2L)F zZ{r()v~-goy=yw%c#58GMi{bBi#X9Tw=}RvaeWFZeDKvC>Qt2p~cDvrnr<) zY!7}4Tpd^2Gx;)J+wD)^n?69SyjSppCYiTm>2G{)xR8cUDGy)z-z&ug%l*j)79EOAX$;lfl}u@|a&ZF`5}_2KIG2*Ujq{ z`;a2>HSO{%9%+Mid)P1DKcV>cTs4~Cauznc}uFG*Dwn?F6k%PCH}645LB{05=o zI49|s?XFPpvQLiV%q*txoIlKpUVjKzb($%Xr?9z8vt`>-CBq1z{yzrl($qp|z+dhHWT_h-=uUL_hlD+#oH^@HWN5}YZF14wLG`%o_O)< zKd1N}j2Em(8S&Gfc7J>jQ{=Rkpe&@Auo-o&&bM5p+QgJpR>NK38diOG6~O{+{vBZT z8kDDHPs_{8JZ;xuW=O~6Gp=l(x_pWAzi9Y>Nk8jp(}MoIcz2$a2wTRFGyHL!lZiYbHgp z=bsc8INQK9%SChYs&RRF@J7M3?QI#SP8Sx0 z1sA*m1;oa@TnUcy@s-$VEkqNgql@QnLCj~K-fnIeslY@jplSrwxm_IC+EBz|CgO4S zTKja?aXtEcIK@zd_#n_XbqAgEdOZ+&o_a)0LfO>QU<%{tr~c2@&(Ah~9v)fXyhqK) zyZwp_SIcFc52j@QzZYXj^>c-Y&SdEHY~8H=*>?rJ2@L(o;qfZr%G*G{ojIekSgr#}IMd3u}Jg{hV6AfKL*GmWUY zB@?X4q7;M~bSOui9*d#|3`EHyeG16IDaVsaNl@%N-U>>Osm&^}=d8}G%~@z1ZNO10 z_Sx@WdYjy8A}@lL8J}k^0gci&MAukb(;8$|UY;YVr`RsbpL7Wx-3%WTTjYZW7Df(x zk-I>Q?q+}lw=BCQJSp}DOSR>HZh8KzpWfwVcwyhOjfwtGti4d&K$`YCmWPdur$S2Q z+uSVcD>N^@o?f$PWBF@mmUPg8RHQ?eK;>(9QVa(R%$xdoHJs&;OnpyfgUUJSbFfh9 z`wabct^PW}=ZtCAN0+CieI3g*agApx8Cb=k(Fqddet6KB>CYbKFfrc=^ADh}3uE-l znlcSMfdW7)r`q79s!i-jZiJNLvD`GrIl`BDxv~Mrpoyr86a9fKts3d!g4;9{@84c6 zb2n~>hfCg18^OcAQ&;zH4Pl@BB!Jdy`itU~*{R|0A*-iV31?+3C5G*ya)BBXsU*~? zINTkYqt2ecNp8QW9H|dqg|?fl|tQE!ng4vdmZ0{%8yE`EKfk)d@c zuz#m9fm?nu4bL}eP|);#b2k?^t!_%iX}p-BOHs65l=RlwsMhUcs%EfC4!jau#?%OI z;AEp_NRlp$R>0W@NlD+a_4bVCZNINNJTtTLnw{ngKtn-(SocqIBpmWvrfVvF;fafBh(;Bi&j|Vgt)yX1#A~b%+?>r8yk~* zvj(jdt&``>ep-H<`&3Evt36wR*c4$6^~owkA6k*N-K7J4E(U)P!%^m9`;U+mWavaG zX#5Jatg`R>`Gs^6#{QKiPbCzrRpu64+?D@s*AfjggE_V&ld+I%GTkDRj4$HDc7*gX z4CPLJp>~l-f3x(jVHHF{)(F31qoSUFoh!miDZ^M6!-M)y%2A^w0-G;|YJ>6otSR${ zpvJ!}bQwIT@Czb!%R(`v1KIr4+KOr131N2dYo7_Nl#AS^e7gA>5CIk}%_W5Mp}K4mP<;sQ-j64+k()O7LNjcBeGoRrQ+3|d5$6}IJmh_N`n2@w6&9J4 zsIV%1<|)bikqxCr!JZq0p8^NW?$;>3oyLb@pLKSle7mtFe22#AY=mMJ21)E+AK|A~ z@jtI8hhZaz(Doc zbHV)i-3yY%RnF8eEJm_@b1HSLChPLC{*`7IcPALF?XgEte@XBWr$PYB`P0cy3B4vi zKdr}1v&s>JT-s5`vuM`3xgXU(C1!z1!n>U~mZe(cg)Lg9=W{2xK9>7fcet+6^tGAD zu#A|Rg_d!ndE3jA)04pPy@!?tQN7NBz;dl);ANBd=O= z>htFQ+UfTTU3Jsf1qIU?e)p{8@y(d&umpeN(mv@WR#{dsWugCkb1$%K1pX~%*n|J6 z6nD$z4^Rz%k$HjkIJ`czwpx|(t>S>DAYX5zaFp(tmD1GrrrVp^JBjB9gOo3j#w@}^ z!UD#)Sn#)n*UG#2{252P3zK`i{J6WP3&y|(l>Ry^GD_-^k{?M!5Wvi!oHOlWHx3EA z)u4F7YtE0x`K2qfuR^B`RXAjE77e!-hN7mQ9~hrQmrJbbA9u*pz7_!6)KV4f+)QkjufC~^pub7b~ zFd&M15`(bx5S?{`?wA{!9(X8io~`3-?AdUtLxcPCjjr;%XeutPc8UL0?<~<*V{Ij@ z5e6}ZqOkho2DN7GmB&RDjFxSVLMhW=2=J2VrV!6Ol3{{G1om1~8(UV&j{pIE?KELJ z-gdVK!KzV9C&x%i`rMLS_uCX4Z_iap70w8C1acXm-a;EG_|?o-A^ld~4NX%a zhG(u_rH{^HG=h|eJ|MtiF-sirtcns~J7S?<Jd|5HIj2P} zlj%>88n;D#eA1wNIReH7SUzeyk(fMAD1YyVnG)iv?O(E&HCNFM$lXnMYD7C+-7f6x1^s^eL6EK?W*FU zPo|AjF@)bfQA+rc=il;odZl{=QEm-p&3w}is)uwT*eUKlR#-9n{_0=Ce>h~qW%rSl z5~9=JEoT38v<9H9#@h5*Bka?fZ#GBQ z(B;~g6GzyA-EPkbvc>(Vb27MdLG|0LUOiH|gZq$Qm?EdArh;hY#2ty?jn@3Yb)Gvkyhwm3%M~6rz_W zo$PO}Y?`C5JY--F}&hSB0wpa*}d3a_y^c!Ku~>NqibQSG9FlN@QH6M=LipuXydcPD#O-}q1-*VI1< zj!fQQz#9Fv_qJ&5Wu7nHZY_;P{^vUX6U7T^AKjmZDSOU|C-UIw#s+6S?I1=V3k5yu zGfzk|gQ>g2&2&5ka14I9>L6HLrc4;b?wDD|3!5;HtbY+cho-jRJ>q|MGaRn^Sj_q% zD(Sq$5=1xf_KtsP;@IYzqR#E%<~OJi*#GgHGwG0bgz5GcBg}$e!Nrxf=V3hYUlyQS_5v5?>yQHe>$L?#KD|u=hOUE^pdYg2(g!Y1l8-c>LOm@6{ge2D z8hz!v2|e`I&L!MMTeQxr##_%UtFK4`MN3rCcL=><=iz8U5U&^pJ`D^&47O}r7`lKQo8v0 z<-hvBwk(}^skdiTpD!ml-+rUW7&rFk^M;;J#+U%?R^&FELsN`*@pzKBLrMunkScbg z>tColcfDZc+^YL^a`CdBcsTV1Qp zDQI?irV^{xek;z$Ww{j|O>M@;ctJ^p6OQS3d(Ip6Po`lU@#jXPL3 zE+<1~vo(U52QsqYn%3yz>nOK|7Gq6WueRtSiryWcQ~Y1QO|)Gs5Vtd-=`}J^la}&7 zis|dsDEH0iD_Pdqhx`?KXA53>1&{f=y8j&VOYzhw{G6&@GFqnQzuupJ2K_L!7|KmK zQC5jeWig&g<|>gOZOx(6P)537(4@Cv)YF^<&T3VEEapWXA%wft)a^n!`DQxWSr&P$ z0%@AAu!lCkyeW9@kLL7J+8K2HNkN75^c5#+nPXNo2JHj<*7z31Ak}nmVn7j?U(Cd(PJd}q zL>qVkA>xU0irEUgZwkXMT4&Zl9>TgY93h!bCcxl&K$pkDUvTYSohe*rT*F^|dIBpZ zJ)rS_g0S7s z;XfNKlVWI{2#RSw0BHJugO7T9Xxoc6NJd?kZ+~U>7rCHGSxCM#ZR;TUdvT4>yp~v% z5JGY~oM$=n?l1#mrp&r3Ep&#Rl zrO;F?&_h7#k}anwnpav}1}QeZ)oAPJs{7IMs0Ui*YrsQ~_eWkzU!}d z!U;JEJ3UG6(YU!2lCgfkD<}LUVb{t<9S4t}3zJxcjYn-^iT-3A`|+3hpS?ox_CMdS zeHuKP**Lu0oveY{ShO&+CHu#gF$gi(IK}I(Vozf~(^SlHj_S{~Eq>9DW>-(&^?z+o z=(}s7@?RdP*&(TRD9!djlm4P{OUgJ|PeiEW3CV#BU;dWEb8EEcbu|;rz6#Dgv;fe~ zv3oi=*Mpp)7x#(f+xMKsqR68Yg4PMIvfX@?GMesxWg+#KEc9^n@h=ku;?#eo08$3j zN#6rfzqIb|fL((I0JC=e{n+GuR{qHztPBF4~$EYB<}VY-NY;__8qrW3+p zsn=XRz-VQ>z#CZ*`bqz4Rn{nMM}M4MMK2DI`Oh`?4TeXfoPc**!)}fAl`f7Ctj!5a z9He!$P75(RKqjZ|jFE(T_ZW*d9kGS5-96l}t8nYL73x*`DSO^>?`~?l<7< z0@n@<@+J}UQB;hgu*4gv^*8@~H2?qir0!3v+MXeFPCzJ+Uxefc(>+bs1H#Kmy^N+s zXorj%PqqW+Dq7buQkzHsr|X ziyI^?dQF3=Z|;0gpZKK1NqTK3Vh6}8LdCp`CbQ_n%5%F=_Xpbru+;&wC&xVR((^&US;(b`ceF=fNW*APFz8g@G^gzAl3$6AuaT*Yad4 z!P~5!w_@d^L|rH5gZky~ISq|1uT2#vfBp^J1;y3Ir|=UFwmzl*Nvr>SuEku)C+lQ) zhRxIx`p)3NV$ilX*#Q#&(wZ}8PhViIb zjp``MrFB6_3ZJ>nDY;+z3`tU{&brxzWfmD52`OfBL_*;#8;x$(wl2oOtfFj_M!C#& zE#x|DN$31@KF8|)^Zo1jy`T4azTe;TJm2Td3QRkkbt&a3sJ6;5wjBU9}g@y<3mv)|Q)*6&EB0bUirYXUW=fBeebCR{s|(V3Tt|4Y}Y+Dj!a3ka;% z9D^~U{JSiuY-Jj)6)UH7`7m_X*9RNrBnAmh9m=b=N0Fuc4E^PZMVrLbh1`WtZH>MC z;@(26YTk!@D3BdT#K(6Fcm*kn zs0hb_=}*0>A-oX22!B6-ze4i<>NhuswnGh~*w%ielHIc?0A<2RY8tbDxtPGn{HVQS z*QU?r*lZL7B~ynel_&%$5)Yehch^7S^o%BV|p12q@9Q%T+91)dy+r&*dTA{LPQsLM5rj6jJvn4bG1;eNM- z5m447lYIV+9&;;X!{d4Zx{<)c9&88uA2i3jn+uG$$uhp5jsMma}TpMsUoOJus)q^S4cpN8qyK*Y5g_vJMWgTJ8oKCuRb z(B$1S+X24d$hLgrEW3KLffaoRMTuaY59ZwrzPmDVJW$L1X{2G7TjpWxkkiUPy`a?e z5K9=G;@6MtvHI3DlV7%REz~#fE;Jc8+)~X(t_lU`7?Y*-V_L+}j*5XI3+Twng2d51 zKyu*TUP`e-J(OXO^B;k!r)?9b#*5?IPD!~>UYoLzq)XJurT#E>3Akj@$~x{sgJ}ja z%u?s)0|dHoqCvj+CgM)f63-i3!_g|4#hJ_}ANT`;IFvhh!&Dx$5VdCb%C3O(w{Gtb zK`+aDSnV~SW5e_=)Yo}#h5L#$jP{uua;wPF*~8Y9 z)Oxjim*~6 z@XhmcJt#|ETl{p{mYBC;2EInkdI#DAH&2phD@?~cJFbTz^-EJYBw~DB48vxUaGLJW zNbW<6o|fa-r?l;3tvQgfs#uX+25+{j?rJP=BVTDJPYPZk~>lVtDxvP9PJ`|^8 zLWif6^QA*WXJkQ%vLJUYrAluC=r`zFop_CT$ZHNF6DXA{Kk$N(t4fH$cE&X%BiE?3MlEf2nawi8XeMAtL!57}UxL>SDHM2_ mC~;m?Oke#}`G4jL`o8D1y{CWn_)OLkV0pXyy4`a@Uiue82ccg8 literal 0 HcmV?d00001 From 69d07292583842feae4e40afddaf20ec05d39e25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 12:31:45 +0100 Subject: [PATCH 145/157] Add robosuite example files --- .../robosuite/td3_robosuite_two_arm_lift.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 docs/source/examples/robosuite/td3_robosuite_two_arm_lift.py diff --git a/docs/source/examples/robosuite/td3_robosuite_two_arm_lift.py b/docs/source/examples/robosuite/td3_robosuite_two_arm_lift.py new file mode 100644 index 00000000..69a5f344 --- /dev/null +++ b/docs/source/examples/robosuite/td3_robosuite_two_arm_lift.py @@ -0,0 +1,117 @@ +import robosuite +from robosuite.controllers import load_controller_config + +import torch +import torch.nn as nn +import torch.nn.functional as F + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.td3 import TD3, TD3_DEFAULT_CONFIG +from skrl.resources.noises.torch import GaussianNoise +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define the models (deterministic models) for the TD3 agent using mixins +# and programming with two approaches (torch functional and torch.nn.Sequential class). +# - Actor (policy): takes as input the environment's observation/state and returns an action +# - Critic: takes the state and action as input and provides a value to guide the policy +class DeterministicActor(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.linear_layer_1 = nn.Linear(self.num_observations, 400) + self.linear_layer_2 = nn.Linear(400, 300) + self.action_layer = nn.Linear(300, self.num_actions) + + def compute(self, inputs, role): + x = F.relu(self.linear_layer_1(inputs["states"])) + x = F.relu(self.linear_layer_2(x)) + return torch.tanh(self.action_layer(x)), {} + +class DeterministicCritic(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations + self.num_actions, 400), + nn.ReLU(), + nn.Linear(400, 300), + nn.ReLU(), + nn.Linear(300, 1)) + + def compute(self, inputs, role): + return self.net(torch.cat([inputs["states"], inputs["taken_actions"]], dim=1)), {} + + +# Load and wrap the DeepMind robosuite environment +controller_config = load_controller_config(default_controller="OSC_POSE") +env = robosuite.make("TwoArmLift", + robots=["Sawyer", "Panda"], # load a Sawyer robot and a Panda robot + gripper_types="default", # use default grippers per robot arm + controller_configs=controller_config, # each arm is controlled using OSC + env_configuration="single-arm-opposed", # (two-arm envs only) arms face each other + has_renderer=True, # on-screen rendering + render_camera="frontview", # visualize the "frontview" camera + has_offscreen_renderer=False, # no off-screen rendering + control_freq=20, # 20 hz control for applied actions + horizon=200, # each episode terminates after 200 steps + use_object_obs=True, # provide object observations to agent + use_camera_obs=False, # don't provide image observations to agent + reward_shaping=True) # use a dense reward signal for learning +env = wrap_env(env) + +device = env.device + + +# Instantiate a RandomMemory (without replacement) as experience replay memory +memory = RandomMemory(memory_size=25000, num_envs=env.num_envs, device=device, replacement=False) + + +# Instantiate the agent's models (function approximators). +# TD3 requires 6 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#spaces-and-models +models = {} +models["policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models["target_policy"] = DeterministicActor(env.observation_space, env.action_space, device) +models["critic_1"] = DeterministicCritic(env.observation_space, env.action_space, device) +models["critic_2"] = DeterministicCritic(env.observation_space, env.action_space, device) +models["target_critic_1"] = DeterministicCritic(env.observation_space, env.action_space, device) +models["target_critic_2"] = DeterministicCritic(env.observation_space, env.action_space, device) + +# Initialize the models' parameters (weights and biases) using a Gaussian distribution +for model in models.values(): + model.init_parameters(method_name="normal_", mean=0.0, std=0.1) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.td3.html#configuration-and-hyperparameters +cfg_agent = TD3_DEFAULT_CONFIG.copy() +cfg_agent["exploration"]["noise"] = GaussianNoise(0, 0.1, device=device) +cfg_agent["smooth_regularization_noise"] = GaussianNoise(0, 0.2, device=device) +cfg_agent["smooth_regularization_clip"] = 0.5 +cfg_agent["batch_size"] = 100 +cfg_agent["random_timesteps"] = 100 +cfg_agent["learning_starts"] = 100 +# logging to TensorBoard and write checkpoints each 1000 and 5000 timesteps respectively +cfg_agent["experiment"]["write_interval"] = 1000 +cfg_agent["experiment"]["checkpoint_interval"] = 5000 + +agent = TD3(models=models, + memory=memory, + cfg=cfg_agent, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 50000, "headless": False} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent) + +# start training +trainer.train() From a1c72a55d1f0ab7af12952f317aada3db2fa91b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 12:55:06 +0100 Subject: [PATCH 146/157] Update benchmark result links --- docs/source/intro/examples.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 98624d40..ec33b407 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -55,6 +55,8 @@ The following components or practices are exemplified (highlighted): - Create a tabular model (:math:`\epsilon`-greedy policy): **Taxi (SARSA)**, **FrozenLake (Q-Learning)** - Load a checkpoint during evaluation: **Pendulum (DDPG)**, **CartPole (CEM)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** +**Benchmark results** are listed in `Benchmark results #32 (Gym/Gymnasium) `_ + .. tabs:: .. tab:: Pendulum (DDPG) @@ -479,7 +481,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 # trainer timesteps = horizon_length * max_epochs -**Benchmark results** for Isaac Gym are listed in `Benchmark results #32 `_. +**Benchmark results** are listed in `Benchmark results #32 (NVIDIA Isaac Gym) `_ .. note:: @@ -793,7 +795,7 @@ The PPO agent configuration is mapped, as far as possible, from the rl_games' A2 # trainer timesteps = horizon_length * max_epochs -**Benchmark results** for Omniverse Isaac Gym are listed in `Benchmark results #32 `_. +**Benchmark results** are listed in `Benchmark results #32 (NVIDIA Omniverse Isaac Gym) `_ .. note:: From 718dc7385f77c1a17645edd76080f1a80a277807 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 13:07:51 +0100 Subject: [PATCH 147/157] Removed not implemented prioritized memory file --- skrl/memories/torch/__init__.py | 1 - skrl/memories/torch/prioritized.py | 56 ------------------------------ 2 files changed, 57 deletions(-) delete mode 100644 skrl/memories/torch/prioritized.py diff --git a/skrl/memories/torch/__init__.py b/skrl/memories/torch/__init__.py index ae882b51..31491065 100644 --- a/skrl/memories/torch/__init__.py +++ b/skrl/memories/torch/__init__.py @@ -1,4 +1,3 @@ from skrl.memories.torch.base import Memory from skrl.memories.torch.random import RandomMemory -from skrl.memories.torch.prioritized import PrioritizedMemory diff --git a/skrl/memories/torch/prioritized.py b/skrl/memories/torch/prioritized.py deleted file mode 100644 index 61b518a3..00000000 --- a/skrl/memories/torch/prioritized.py +++ /dev/null @@ -1,56 +0,0 @@ -from typing import Union, Tuple - -import numpy as np - -import torch - -from skrl.memories.torch import Memory - - -class PrioritizedMemory(Memory): - def __init__(self, memory_size: int, num_envs: int = 1, device: Union[str, torch.device] = "cuda:0", preallocate: bool = True, alpha: float = 0.5, beta: float = 0.4, eps: float = 1e-6) -> None: - """Prioritized sampling memory - - Sample a batch from memory randomly - - :param memory_size: Maximum number of elements in the first dimension of each internal storage - :type memory_size: int - :param num_envs: Number of parallel environments (default: 1) - :type num_envs: int, optional - :param device: Device on which a torch tensor is or will be allocated (default: "cuda:0") - :type device: str or torch.device, optional - :param preallocate: If true, preallocate memory for efficient use (default: True) - :type preallocate: bool, optional - :param replacement: Flag to indicate whether the sample is with or without replacement (default: True). - Replacement implies that a value can be selected multiple times (the batch size is always guaranteed). - Sampling without replacement will return a batch of maximum memory size if the memory size is less than the requested batch size - :type replacement: bool, optional - :param alpha: Hyperparameter for prioritized sampling (default: 0.5) - :type alpha: float, optional - :param beta: Hyperparameter for prioritized sampling (default: 0.4) - :type beta: float, optional - :param eps: Hyperparameter for prioritized sampling (default: 1e-6) - :type eps: float, optional - """ - super().__init__(memory_size, num_envs, device, preallocate) - - self.alpha = alpha - self.beta = beta - self.eps = eps - - def sample(self, batch_size: int, names: Tuple[str]) -> Tuple[torch.Tensor]: - """Sample a batch from memory randomly - - :param batch_size: Number of element to sample - :type batch_size: int - :param names: Tensors names from which to obtain the samples - :type names: tuple or list of strings - - :return: Sampled data from tensors sorted according to their position in the list of names. - The sampled tensors will have the following shape: (batch size, data size) - :rtype: tuple of torch.Tensor - """ - # generate random indexes - indexes = np.random.choice(len(self), size=batch_size, replace=True) - - return self.sample_by_index(indexes=indexes, names=names) From 1bf5fe5e39a4de6498984d81079399b26141ec69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 13:09:10 +0100 Subject: [PATCH 148/157] Add a generic description for supporting other environments --- docs/source/index.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index f19f357c..b948cdca 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,13 +1,13 @@ SKRL - Reinforcement Learning library (|version|) ================================================= -**skrl** is an open-source modular library for Reinforcement Learning written in Python (using `PyTorch `_) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the OpenAI `Gym `_ / Farama `Gymnasium `_ and `DeepMind `_ environment interfaces, it allows loading and configuring `NVIDIA Isaac Gym `_ and `NVIDIA Omniverse Isaac Gym `_ environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run +**skrl** is an open-source modular library for Reinforcement Learning written in Python (using `PyTorch `_) and designed with a focus on readability, simplicity, and transparency of algorithm implementation. In addition to supporting the OpenAI `Gym `_ / Farama `Gymnasium `_, `DeepMind `_ and other environment interfaces, it allows loading and configuring `NVIDIA Isaac Gym `_ and `NVIDIA Omniverse Isaac Gym `_ environments, enabling agents' simultaneous training by scopes (subsets of environments among all available environments), which may or may not share resources, in the same run **Main features:** * Clean code * Modularity and reusability * Documented library, code and implementations - * Support for Gym/Gymnasium (single and vectorized), DeepMind, NVIDIA Isaac Gym (preview 2, 3 and 4) and NVIDIA Omniverse Isaac Gym environments + * Support for Gym/Gymnasium (single and vectorized), DeepMind, NVIDIA Isaac Gym (preview 2, 3 and 4), NVIDIA Omniverse Isaac Gym environments, among others * Simultaneous learning by scopes in Gym/Gymnasium (vectorized), NVIDIA Isaac Gym and NVIDIA Omniverse Isaac Gym .. warning:: @@ -90,9 +90,9 @@ Agents Environments ^^^^^^^^^^^^ - Definition of the Isaac Gym (preview 2, 3 and 4) and Omniverse Isaac Gym environment loaders, and wrappers for the Gym/Gymnasium, DeepMind, Isaac Gym and Omniverse Isaac Gym environments + Definition of the Isaac Gym (preview 2, 3 and 4) and Omniverse Isaac Gym environment loaders, and wrappers for the Gym/Gymnasium, DeepMind, Isaac Gym, Omniverse Isaac Gym environments, among others - * :doc:`Wrapping ` **Gym/Gymnasium**, **DeepMind**, **Isaac Gym** and **Omniverse Isaac Gym** environments + * :doc:`Wrapping ` **Gym/Gymnasium**, **DeepMind**, **Isaac Gym**, **Omniverse Isaac Gym** environments, among others * Loading :doc:`Isaac Gym environments ` * Loading :doc:`Omniverse Isaac Gym environments ` From c87c4dcbae5f796f3f9ee13d08f58d8c93cbcb29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Sun, 8 Jan 2023 19:20:40 +0100 Subject: [PATCH 149/157] Add RNN examples to docs --- docs/source/intro/examples.rst | 47 ++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index ec33b407..ac1c76bd 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -51,6 +51,7 @@ These examples perform the training of one agent in a Gym/Gymnasium environment The following components or practices are exemplified (highlighted): - Load and wrap a Gym environment: **Pendulum (DDPG)**, **CartPole (CEM)** + - Recurrent neural network models (RNN, GRU, LSTM): **PendulumNoVel (DDPG)** - Instantiate models using the model instantiation utility: **CartPole (DQN)** - Create a tabular model (:math:`\epsilon`-greedy policy): **Taxi (SARSA)**, **FrozenLake (Q-Learning)** - Load a checkpoint during evaluation: **Pendulum (DDPG)**, **CartPole (CEM)**, **CartPole (DQN)**, **Taxi (SARSA)**, **FrozenLake (Q-Learning)** @@ -85,6 +86,52 @@ The following components or practices are exemplified (highlighted): :language: python :emphasize-lines: 67 + .. tab:: PendulumNoVel (DDPG) + + .. note:: + + The examples use a wrapper around the original environment to mask the velocity in the observation. The intention is to make the MDP partially observable and to show the capabilities of recurrent neural networks + + More examples with other algorithms can be found in the repository documentation `example folder `_ and in the benchmark results indicated above + + .. tabs:: + + .. tab:: RNN + + .. tabs:: + + .. group-tab:: Training + + | :download:`ddpg_gym_pendulumnovel_rnn.py <../examples/gym/ddpg_gym_pendulumnovel_rnn.py>` + + .. literalinclude:: ../examples/gym/ddpg_gym_pendulumnovel_rnn.py + :language: python + :emphasize-lines: 31-34, 40-43, 50-77, 86, 99-102, 108-111, 118-141, 149 + + .. tab:: GRU + + .. tabs:: + + .. group-tab:: Training + + | :download:`ddpg_gym_pendulumnovel_gru.py <../examples/gym/ddpg_gym_pendulumnovel_gru.py>` + + .. literalinclude:: ../examples/gym/ddpg_gym_pendulumnovel_gru.py + :language: python + :emphasize-lines: 31-34, 40-43, 50-77, 86, 99-102, 108-111, 118-141, 149 + + .. tab:: LSTM + + .. tabs:: + + .. group-tab:: Training + + | :download:`ddpg_gym_pendulumnovel_lstm.py <../examples/gym/ddpg_gym_pendulumnovel_lstm.py>` + + .. literalinclude:: ../examples/gym/ddpg_gym_pendulumnovel_lstm.py + :language: python + :emphasize-lines: 31-34, 40-44, 51-82, 91, 104-107, 113-117, 127-151, 159 + .. tab:: CartPole (CEM) .. tabs:: From 88430f09c1523e95b1b1a7643df97e9e020f794f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 10:09:13 +0100 Subject: [PATCH 150/157] Add KUKA LBR iiwa real-world example --- docs/source/intro/examples.rst | 207 ++++++++++++++++++++++++++++++++- 1 file changed, 203 insertions(+), 4 deletions(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index ac1c76bd..8848a780 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -1078,7 +1078,7 @@ These examples show basic real-world use cases to guide and support advanced RL .. tab:: Franka Emika Panda - **3D reaching task (Franka's gripper must reach a certain target point in space)**. The training was done in Omniverse Isaac Gym. The real robot control is performed through the Python API of a modified version of frankx (see `frankx's pull request #44 `_), a high-level motion library around libfranka. Training and evaluation is performed for both Cartesian and joint control space + **3D reaching task (Franka's gripper must reach a certain target point in space)**. The training was done in Omniverse Isaac Gym. The real robot control is performed through the Python API of a modified version of *frankx* (see `frankx's pull request #44 `_), a high-level motion library around *libfranka*. Training and evaluation is performed for both Cartesian and joint control space .. raw:: html @@ -1092,7 +1092,7 @@ These examples show basic real-world use cases to guide and support advanced RL * The instantaneous reward is the negative value of the Euclidean distance (:math:`\text{d}`) between the robot end-effector and the target point position. The episode terminates when this distance is less than 0.035 meters in simulation (0.075 meters in real-world) or when the defined maximum timestep is reached - * The target position lies within a rectangular cuboid of dimensions 0.5 x 0.5 x 0.2 meters centered at 0.5, 0.0, 0.2 meters with respect to the robot's base. The robot joints' positions are drawn from an initial configuration [0º, -45º, 0º, -135º, 0º, 90º, 45º] modified with uniform random values between -7º and 7º approximately + * The target position lies within a rectangular cuboid of dimensions 0.5 x 0.5 x 0.2 meters centered at (0.5, 0.0, 0.2) meters with respect to the robot's base. The robot joints' positions are drawn from an initial configuration [0º, -45º, 0º, -135º, 0º, 90º, 45º] modified with uniform random values between -7º and 7º approximately .. list-table:: :header-rows: 1 @@ -1149,7 +1149,7 @@ These examples show basic real-world use cases to guide and support advanced RL **Prerequisites:** - A physical Franka robot with `Franka Control Interface (FCI) `_ is required. Additionally, the frankx library must be available in the python environment (see `frankx's pull request #44 `_ for the RL-compatible version installation) + A physical Franka Emika Panda robot with `Franka Control Interface (FCI) `_ is required. Additionally, the *frankx* library must be available in the python environment (see `frankx's pull request #44 `_ for the RL-compatible version installation) **Files** @@ -1182,7 +1182,7 @@ These examples show basic real-world use cases to guide and support advanced RL .. raw:: html .. raw:: html @@ -1276,6 +1276,205 @@ These examples show basic real-world use cases to guide and support advanced RL TASK_CFG["env"]["controlSpace"] = "joint" # "joint" or "cartesian" + .. tab:: Kuka LBR iiwa + + **3D reaching task (iiwa's end-effector must reach a certain target point in space)**. The training was done in Omniverse Isaac Gym. The real robot control is performed through the Python, ROS and ROS2 APIs of `libiiwa `_, a scalable multi-control framework for the KUKA LBR Iiwa robots. Training and evaluation is performed for both Cartesian and joint control space + + .. raw:: html + +


+ + **Implementation** (see details in the table below): + + * The observation space is composed of the episode's normalized progress, the robot joints' normalized positions (:math:`q`) in the interval -1 to 1, the robot joints' velocities (:math:`\dot{q}`) affected by a random uniform scale for generalization, and the target's position in space (:math:`target_{_{XYZ}}`) with respect to the robot's base + + * The action space, bounded in the range -1 to 1, consists of the following. For the joint control it's robot joints' position scaled change. For the Cartesian control it's the end-effector's position (:math:`ee_{_{XYZ}}`) scaled change + + * The instantaneous reward is the negative value of the Euclidean distance (:math:`\text{d}`) between the robot end-effector and the target point position. The episode terminates when this distance is less than 0.035 meters in simulation (0.075 meters in real-world) or when the defined maximum timestep is reached + + * The target position lies within a rectangular cuboid of dimensions 0.2 x 0.4 x 0.4 meters centered at (0.6, 0.0, 0.4) meters with respect to the robot's base. The robot joints' positions are drawn from an initial configuration [0º, 0º, 0º, -90º, 0º, 90º, 0º] modified with uniform random values between -7º and 7º approximately + + .. list-table:: + :header-rows: 1 + + * - Variable + - Formula / value + - Size + * - Observation space + - :math:`\dfrac{t}{t_{max}},\; 2 \dfrac{q - q_{min}}{q_{max} - q_{min}} - 1,\; 0.1\,\dot{q}\,U(0.5,1.5),\; target_{_{XYZ}}` + - 18 + * - Action space (joint) + - :math:`\dfrac{2.5}{120} \, \Delta q` + - 7 + * - Action space (Cartesian) + - :math:`\dfrac{1}{100} \, \Delta ee_{_{XYZ}}` + - 3 + * - Reward + - :math:`-\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}})` + - + * - Episode termination + - :math:`\text{d}(ee_{_{XYZ}},\; target_{_{XYZ}}) \le 0.035 \quad` or :math:`\quad t \ge t_{max} - 1` + - + * - Maximum timesteps (:math:`t_{max}`) + - 100 + - + + .. raw:: html + +
+ + **Workflows** + + .. tabs:: + + .. tab:: Real-world + + .. warning:: + + Make sure you have the smartHMI on hand in case something goes wrong in the run. **Control via RL can be dangerous and unsafe for both the operator and the robot** + + .. raw:: html + + + + **Prerequisites:** + + A physical Kuka LBR iiwa robot is required. Additionally, the *libiiwa* library must be installed (visit the `libiiwa `_ documentation for installation details) + + **Files** + + * Environment: :download:`reaching_iiwa_real_env.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_env.py>` + * Evaluation script: :download:`reaching_iiwa_real_skrl_eval.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_skrl_eval.py>` + * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip ` + + **Evaluation:** + + .. code-block:: bash + + python3 reaching_iiwa_real_skrl_eval.py + + **Main environment configuration:** + + The control space (Cartesian or joint) can be specified in the environment class constructor (from :literal:`reaching_iiwa_real_skrl_eval.py`) as follow: + + .. code-block:: python + + control_space = "joint" # joint or cartesian + + .. tab:: Real-world (ROS/ROS2) + + .. warning:: + + Make sure you have the smartHMI on hand in case something goes wrong in the run. **Control via RL can be dangerous and unsafe for both the operator and the robot** + + .. raw:: html + + + + **Prerequisites:** + + A physical Kuka LBR iiwa robot is required. Additionally, the *libiiwa* library must be installed (visit the `libiiwa `_ documentation for installation details) and a Robot Operating System (ROS or ROS2) distribution must be available + + **Files** + + * Environment (ROS): :download:`reaching_iiwa_real_ros_env.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_env.py>` + * Environment (ROS2): :download:`reaching_iiwa_real_ros2_env.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros2_env.py>` + * Evaluation script: :download:`reaching_iiwa_real_ros_ros2_skrl_eval.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_ros2_skrl_eval.py>` + * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip ` + + .. note:: + + Source the ROS/ROS2 distribution and the ROS/ROS workspace containing the libiiwa packages before executing the scripts + + **Evaluation:** + + .. code-block:: bash + + python3 reaching_iiwa_real_ros_ros2_skrl_eval.py + + **Main environment configuration:** + + The control space (Cartesian or joint) and the ROS/ROS version to use can be specified in the environment class constructor (from :literal:`reaching_iiwa_real_ros_ros2_skrl_eval.py`) as follow: + + .. code-block:: python + + ros_version = 1 # 1 or 2 + control_space = "joint" # joint or cartesian + + .. tab:: Simulation (Omniverse Isaac Gym) + + .. raw:: html + + + + .. raw:: html + + + + | + + **Prerequisites:** + + All installation steps described in Omniverse Isaac Gym's `Overview & Getting Started `_ section must be fulfilled (especially the subsection 1.3. Installing Examples Repository) + + **Files** (the implementation is self-contained so no specific location is required): + + * Environment: :download:`reaching_iiwa_omniverse_isaacgym_env.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py>` + * Training script: :download:`reaching_iiwa_omniverse_isaacgym_skrl_train.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py>` + * Evaluation script: :download:`reaching_iiwa_omniverse_isaacgym_skrl_eval.py <../examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py>` + * Checkpoints (:literal:`agent_joint.pt`, :literal:`agent_cartesian.pt`): :download:`trained_checkpoints.zip ` + * Simulation files: (.usd assets and robot class): :download:`simulation_files.zip ` + + + Simulation files must be structured as follows: + + .. code-block:: + + + ├── agent_cartesian.pt + ├── agent_joint.pt + ├── assets + │ ├── iiwa14_instanceable_meshes.usd + │ └── iiwa14.usd + ├── reaching_iiwa_omniverse_isaacgym_env.py + ├── reaching_iiwa_omniverse_isaacgym_skrl_eval.py + ├── reaching_iiwa_omniverse_isaacgym_skrl_train.py + ├── robots + │ ├── iiwa14.py + │ └── __init__.py + + **Training and evaluation:** + + .. code-block:: bash + + # training (local workstation) + ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_iiwa_omniverse_isaacgym_skrl_train.py + + # training (docker container) + /isaac-sim/python.sh reaching_iiwa_omniverse_isaacgym_skrl_train.py + + .. code-block:: bash + + # evaluation (local workstation) + ~/.local/share/ov/pkg/isaac_sim-*/python.sh reaching_iiwa_omniverse_isaacgym_skrl_eval.py + + # evaluation (docker container) + /isaac-sim/python.sh reaching_iiwa_omniverse_isaacgym_skrl_eval.py + + **Main environment configuration:** + + The control space (Cartesian or joint) can be specified in the task configuration dictionary (from :literal:`reaching_iiwa_omniverse_isaacgym_skrl_train.py`) as follow: + + .. code-block:: python + + TASK_CFG["task"]["env"]["controlSpace"] = "joint" # "joint" or "cartesian" + .. raw:: html

From 0c2ef52a23b6aba61ba56696463f33d79425dacd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 10:11:22 +0100 Subject: [PATCH 151/157] Update CHANGELOG --- CHANGELOG.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d93d4867..8c7a3166 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,14 +4,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ## [0.9.0] - Unreleased ### Added -- DeepMind robosuite environment wrapper -- Set the running mode (training or evaluation) of the agents +- Support for Farama Gymnasium interface +- Wrapper for robosuite environments - Weights & Biases integration (by @juhannc) -- Support for Gymnasium interface +- Set the running mode (training or evaluation) of the agents - Allow clipping the gradient norm for DDPG, TD3 and SAC agents - Initialize model biases - Add RNN (RNN, LSTM, GRU and any other variant) support for A2C, DDPG, PPO, SAC, TD3 and TRPO agents - Allow disabling training/evaluation progressbar +- Farama Shimmy and robosuite examples +- KUKA LBR iiwa real-world example ### Changed - Forward model inputs as a Python dictionary [**breaking change**] From ba81de28fa405bcb75486b40abf5e7021c915d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 11:32:46 +0100 Subject: [PATCH 152/157] Update KUKA LBR iiwa real-world example in docs --- docs/source/intro/examples.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 8848a780..7e82272a 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -1392,17 +1392,20 @@ These examples show basic real-world use cases to guide and support advanced RL **Evaluation:** + .. note:: + + The environment (:literal:`reaching_iiwa_real_ros_env.py` or :literal:`reaching_iiwa_real_ros2_env.py`) to be loaded will be automatically selected based on the sourced ROS distribution (ROS or ROS2) at script execution + .. code-block:: bash python3 reaching_iiwa_real_ros_ros2_skrl_eval.py **Main environment configuration:** - The control space (Cartesian or joint) and the ROS/ROS version to use can be specified in the environment class constructor (from :literal:`reaching_iiwa_real_ros_ros2_skrl_eval.py`) as follow: + The control space (Cartesian or joint) can be specified in the environment class constructor (from :literal:`reaching_iiwa_real_ros_ros2_skrl_eval.py`) as follow: .. code-block:: python - ros_version = 1 # 1 or 2 control_space = "joint" # joint or cartesian .. tab:: Simulation (Omniverse Isaac Gym) From 973e93982571345a50345717d474e8293503b903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 11:36:27 +0100 Subject: [PATCH 153/157] Add KUKA LBR iiwa rel-world example files --- .../reaching_iiwa_omniverse_isaacgym_env.py | 271 ++++++++++++++++++ ...ching_iiwa_omniverse_isaacgym_skrl_eval.py | 92 ++++++ ...hing_iiwa_omniverse_isaacgym_skrl_train.py | 133 +++++++++ .../reaching_iiwa_real_ros_ros2_skrl_eval.py | 96 +++++++ .../reaching_iiwa_real_skrl_eval.py | 82 ++++++ 5 files changed, 674 insertions(+) create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_ros2_skrl_eval.py create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_skrl_eval.py diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py new file mode 100644 index 00000000..f29d7eba --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_env.py @@ -0,0 +1,271 @@ +import torch +import numpy as np + +from omniisaacgymenvs.tasks.base.rl_task import RLTask + +from omni.isaac.core.prims import RigidPrimView +from omni.isaac.core.articulations import ArticulationView +from omni.isaac.core.objects import DynamicSphere +from omni.isaac.core.utils.prims import get_prim_at_path + +from robots.iiwa14 import Iiwa14 as Robot + +from skrl.utils import omniverse_isaacgym_utils + +# post_physics_step calls +# - get_observations() +# - get_states() +# - calculate_metrics() +# - is_done() +# - get_extras() + + +TASK_CFG = {"test": False, + "device_id": 0, + "headless": True, + "sim_device": "gpu", + "task": {"name": "ReachingIiwa", + "physics_engine": "physx", + "env": {"numEnvs": 1024, + "envSpacing": 1.5, + "episodeLength": 100, + "enableDebugVis": False, + "clipObservations": 1000.0, + "clipActions": 1.0, + "controlFrequencyInv": 4, + "actionScale": 2.5, + "dofVelocityScale": 0.1, + "controlSpace": "cartesian"}, + "sim": {"dt": 0.0083, # 1 / 120 + "use_gpu_pipeline": True, + "gravity": [0.0, 0.0, -9.81], + "add_ground_plane": True, + "use_flatcache": True, + "enable_scene_query_support": False, + "enable_cameras": False, + "default_physics_material": {"static_friction": 1.0, + "dynamic_friction": 1.0, + "restitution": 0.0}, + "physx": {"worker_thread_count": 4, + "solver_type": 1, + "use_gpu": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "contact_offset": 0.005, + "rest_offset": 0.0, + "bounce_threshold_velocity": 0.2, + "friction_offset_threshold": 0.04, + "friction_correlation_distance": 0.025, + "enable_sleeping": True, + "enable_stabilization": True, + "max_depenetration_velocity": 1000.0, + "gpu_max_rigid_contact_count": 524288, + "gpu_max_rigid_patch_count": 33554432, + "gpu_found_lost_pairs_capacity": 524288, + "gpu_found_lost_aggregate_pairs_capacity": 262144, + "gpu_total_aggregate_pairs_capacity": 1048576, + "gpu_max_soft_body_contacts": 1048576, + "gpu_max_particle_contacts": 1048576, + "gpu_heap_capacity": 33554432, + "gpu_temp_buffer_capacity": 16777216, + "gpu_max_num_partitions": 8}, + "robot": {"override_usd_defaults": False, + "fixed_base": False, + "enable_self_collisions": False, + "enable_gyroscopic_forces": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "sleep_threshold": 0.005, + "stabilization_threshold": 0.001, + "density": -1, + "max_depenetration_velocity": 1000.0, + "contact_offset": 0.005, + "rest_offset": 0.0}, + "target": {"override_usd_defaults": False, + "fixed_base": True, + "enable_self_collisions": False, + "enable_gyroscopic_forces": True, + "solver_position_iteration_count": 4, + "solver_velocity_iteration_count": 1, + "sleep_threshold": 0.005, + "stabilization_threshold": 0.001, + "density": -1, + "max_depenetration_velocity": 1000.0, + "contact_offset": 0.005, + "rest_offset": 0.0}}}} + + +class RobotView(ArticulationView): + def __init__(self, prim_paths_expr: str, name: str = "robot_view") -> None: + super().__init__(prim_paths_expr=prim_paths_expr, name=name, reset_xform_properties=False) + + +class ReachingIiwaTask(RLTask): + def __init__(self, name, sim_config, env, offset=None) -> None: + self._sim_config = sim_config + self._cfg = sim_config.config + self._task_cfg = sim_config.task_config + + self.dt = 1 / 120.0 + + self._num_envs = self._task_cfg["env"]["numEnvs"] + self._env_spacing = self._task_cfg["env"]["envSpacing"] + self._action_scale = self._task_cfg["env"]["actionScale"] + self._dof_vel_scale = self._task_cfg["env"]["dofVelocityScale"] + self._max_episode_length = self._task_cfg["env"]["episodeLength"] + self._control_space = self._task_cfg["env"]["controlSpace"] + + # observation and action space + self._num_observations = 18 + if self._control_space == "joint": + self._num_actions = 7 + elif self._control_space == "cartesian": + self._num_actions = 3 + else: + raise ValueError("Invalid control space: {}".format(self._control_space)) + + self._end_effector_link = "iiwa_link_7" + + RLTask.__init__(self, name, env) + + def set_up_scene(self, scene) -> None: + self.get_robot() + self.get_target() + + super().set_up_scene(scene) + + # robot view + self._robots = RobotView(prim_paths_expr="/World/envs/.*/robot", name="robot_view") + scene.add(self._robots) + # end-effectors view + self._end_effectors = RigidPrimView(prim_paths_expr="/World/envs/.*/robot/{}".format(self._end_effector_link), name="end_effector_view") + scene.add(self._end_effectors) + # target view + self._targets = RigidPrimView(prim_paths_expr="/World/envs/.*/target", name="target_view", reset_xform_properties=False) + scene.add(self._targets) + + self.init_data() + + def get_robot(self): + robot = Robot(prim_path=self.default_zero_env_path + "/robot", + translation=torch.tensor([0.0, 0.0, 0.0]), + orientation=torch.tensor([1.0, 0.0, 0.0, 0.0]), + name="robot") + self._sim_config.apply_articulation_settings("robot", get_prim_at_path(robot.prim_path), self._sim_config.parse_actor_config("robot")) + + def get_target(self): + target = DynamicSphere(prim_path=self.default_zero_env_path + "/target", + name="target", + radius=0.025, + color=torch.tensor([1, 0, 0])) + self._sim_config.apply_articulation_settings("target", get_prim_at_path(target.prim_path), self._sim_config.parse_actor_config("target")) + target.set_collision_enabled(False) + + def init_data(self) -> None: + self.robot_default_dof_pos = torch.tensor(np.radians([0, 0, 0, -90, 0, 90, 0]), device=self._device, dtype=torch.float32) + self.actions = torch.zeros((self._num_envs, self.num_actions), device=self._device) + + if self._control_space == "cartesian": + self.jacobians = torch.zeros((self._num_envs, 7, 6, 7), device=self._device) + self.end_effector_pos, self.end_effector_rot = torch.zeros((self._num_envs, 3), device=self._device), torch.zeros((self._num_envs, 4), device=self._device) + + def get_observations(self) -> dict: + robot_dof_pos = self._robots.get_joint_positions(clone=False) + robot_dof_vel = self._robots.get_joint_velocities(clone=False) + end_effector_pos, end_effector_rot = self._end_effectors.get_world_poses(clone=False) + target_pos, target_rot = self._targets.get_world_poses(clone=False) + + dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) \ + / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0 + dof_vel_scaled = robot_dof_vel * self._dof_vel_scale + + generalization_noise = torch.rand((dof_vel_scaled.shape[0], 7), device=self._device) + 0.5 + + self.obs_buf[:, 0] = self.progress_buf / self._max_episode_length + self.obs_buf[:, 1:8] = dof_pos_scaled + self.obs_buf[:, 8:15] = dof_vel_scaled * generalization_noise + self.obs_buf[:, 15:18] = target_pos - self._env_pos + + # compute distance for calculate_metrics() and is_done() + self._computed_distance = torch.norm(end_effector_pos - target_pos, dim=-1) + + if self._control_space == "cartesian": + self.jacobians = self._robots.get_jacobians(clone=False) + self.end_effector_pos, self.end_effector_rot = end_effector_pos, end_effector_rot + self.end_effector_pos -= self._env_pos + + return {self._robots.name: {"obs_buf": self.obs_buf}} + + def pre_physics_step(self, actions) -> None: + reset_env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1) + if len(reset_env_ids) > 0: + self.reset_idx(reset_env_ids) + + self.actions = actions.clone().to(self._device) + env_ids_int32 = torch.arange(self._robots.count, dtype=torch.int32, device=self._device) + + if self._control_space == "joint": + targets = self.robot_dof_targets + self.robot_dof_speed_scales * self.dt * self.actions * self._action_scale + + elif self._control_space == "cartesian": + goal_position = self.end_effector_pos + actions / 100.0 + delta_dof_pos = omniverse_isaacgym_utils.ik(jacobian_end_effector=self.jacobians[:, 7 - 1, :, :7], # iiwa_link_7 index: 7 + current_position=self.end_effector_pos, + current_orientation=self.end_effector_rot, + goal_position=goal_position, + goal_orientation=None) + targets = self.robot_dof_targets[:, :7] + delta_dof_pos + + self.robot_dof_targets = torch.clamp(targets, self.robot_dof_lower_limits, self.robot_dof_upper_limits) + self._robots.set_joint_position_targets(self.robot_dof_targets, indices=env_ids_int32) + + def reset_idx(self, env_ids) -> None: + indices = env_ids.to(dtype=torch.int32) + + # reset robot + pos = torch.clamp(self.robot_default_dof_pos.unsqueeze(0) + 0.25 * (torch.rand((len(env_ids), self.num_robot_dofs), device=self._device) - 0.5), + self.robot_dof_lower_limits, self.robot_dof_upper_limits) + dof_pos = torch.zeros((len(indices), self._robots.num_dof), device=self._device) + dof_pos[:] = pos + dof_vel = torch.zeros((len(indices), self._robots.num_dof), device=self._device) + self.robot_dof_targets[env_ids, :] = pos + self.robot_dof_pos[env_ids, :] = pos + + self._robots.set_joint_position_targets(self.robot_dof_targets[env_ids], indices=indices) + self._robots.set_joint_positions(dof_pos, indices=indices) + self._robots.set_joint_velocities(dof_vel, indices=indices) + + # reset target + pos = (torch.rand((len(env_ids), 3), device=self._device) - 0.5) * 2 \ + * torch.tensor([0.10, 0.20, 0.20], device=self._device) \ + + torch.tensor([0.60, 0.00, 0.40], device=self._device) + + self._targets.set_world_poses(pos + self._env_pos[env_ids], indices=indices) + + # bookkeeping + self.reset_buf[env_ids] = 0 + self.progress_buf[env_ids] = 0 + + def post_reset(self): + self.num_robot_dofs = self._robots.num_dof + self.robot_dof_pos = torch.zeros((self.num_envs, self.num_robot_dofs), device=self._device) + dof_limits = self._robots.get_dof_limits() + self.robot_dof_lower_limits = dof_limits[0, :, 0].to(device=self._device) + self.robot_dof_upper_limits = dof_limits[0, :, 1].to(device=self._device) + + self.robot_dof_speed_scales = torch.ones_like(self.robot_dof_lower_limits) + self.robot_dof_targets = torch.zeros((self._num_envs, self.num_robot_dofs), dtype=torch.float, device=self._device) + + # randomize all envs + indices = torch.arange(self._num_envs, dtype=torch.int64, device=self._device) + self.reset_idx(indices) + + def calculate_metrics(self) -> None: + self.rew_buf[:] = -self._computed_distance + + def is_done(self) -> None: + self.reset_buf.fill_(0) + # target reached + self.reset_buf = torch.where(self._computed_distance <= 0.035, torch.ones_like(self.reset_buf), self.reset_buf) + # max episode length + self.reset_buf = torch.where(self.progress_buf >= self._max_episode_length - 1, torch.ones_like(self.reset_buf), self.reset_buf) diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py new file mode 100644 index 00000000..3611f86a --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_eval.py @@ -0,0 +1,92 @@ +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.trainers.torch import SequentialTrainer +from skrl.utils.omniverse_isaacgym_utils import get_env_instance +from skrl.envs.torch import wrap_env + + +# Define only the policy for evaluation +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 256), + nn.ELU(), + nn.Linear(256, 128), + nn.ELU(), + nn.Linear(128, 64), + nn.ELU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} + + +# instance VecEnvBase and setup task +headless = not True # set headless to False for rendering +env = get_env_instance(headless=headless) + +from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig +from reaching_iiwa_omniverse_isaacgym_env import ReachingIiwaTask, TASK_CFG + +TASK_CFG["headless"] = headless +TASK_CFG["task"]["env"]["numEnvs"] = 64 +TASK_CFG["task"]["env"]["controlSpace"] = "joint" # "joint" or "cartesian" + +sim_config = SimConfig(TASK_CFG) +task = ReachingIiwaTask(name="ReachingIiwa", sim_config=sim_config, env=env) +env.set_task(task=task, sim_params=sim_config.get_physics_params(), backend="torch", init_sim=True) + +# wrap the environment +env = wrap_env(env, "omniverse-isaacgym") + +device = env.device + + +# Instantiate the agent's policy. +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["random_timesteps"] = 0 +cfg_ppo["learning_starts"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +# logging to TensorBoard each 32 timesteps an ignore checkpoints +cfg_ppo["experiment"]["write_interval"] = 32 +cfg_ppo["experiment"]["checkpoint_interval"] = 0 + +agent = PPO(models=models_ppo, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoints +if TASK_CFG["task"]["env"]["controlSpace"] == "joint": + agent.load("./agent_joint.pt") +elif TASK_CFG["task"]["env"]["controlSpace"] == "cartesian": + agent.load("./agent_cartesian.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 5000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent) + +# start evaluation +trainer.eval() diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py new file mode 100644 index 00000000..d109085a --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_omniverse_isaacgym_skrl_train.py @@ -0,0 +1,133 @@ +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin, DeterministicMixin +from skrl.memories.torch import RandomMemory +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.resources.schedulers.torch import KLAdaptiveRL +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.trainers.torch import SequentialTrainer +from skrl.utils.omniverse_isaacgym_utils import get_env_instance +from skrl.envs.torch import wrap_env +from skrl.utils import set_seed + + +# set the seed for reproducibility +set_seed(42) + + +# Define the models (stochastic and deterministic models) for the agent using helper mixin. +# - Policy: takes as input the environment's observation/state and returns an action +# - Value: takes the state as input and provides a value to guide the policy +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 256), + nn.ELU(), + nn.Linear(256, 128), + nn.ELU(), + nn.Linear(128, 64), + nn.ELU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} + +class Value(DeterministicMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False): + Model.__init__(self, observation_space, action_space, device) + DeterministicMixin.__init__(self, clip_actions) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 256), + nn.ELU(), + nn.Linear(256, 128), + nn.ELU(), + nn.Linear(128, 64), + nn.ELU(), + nn.Linear(64, 1)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), {} + + +# instance VecEnvBase and setup task +headless = True # set headless to False for rendering +env = get_env_instance(headless=headless) + +from omniisaacgymenvs.utils.config_utils.sim_config import SimConfig +from reaching_iiwa_omniverse_isaacgym_env import ReachingIiwaTask, TASK_CFG + +TASK_CFG["headless"] = headless +TASK_CFG["task"]["env"]["numEnvs"] = 1024 +TASK_CFG["task"]["env"]["controlSpace"] = "joint" # "joint" or "cartesian" + +sim_config = SimConfig(TASK_CFG) +task = ReachingIiwaTask(name="ReachingIiwa", sim_config=sim_config, env=env) +env.set_task(task=task, sim_params=sim_config.get_physics_params(), backend="torch", init_sim=True) + +# wrap the environment +env = wrap_env(env, "omniverse-isaacgym") + +device = env.device + + +# Instantiate a RandomMemory as rollout buffer (any memory can be used for this) +memory = RandomMemory(memory_size=16, num_envs=env.num_envs, device=device) + + +# Instantiate the agent's models (function approximators). +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device) +models_ppo["value"] = Value(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["rollouts"] = 16 +cfg_ppo["learning_epochs"] = 8 +cfg_ppo["mini_batches"] = 8 +cfg_ppo["discount_factor"] = 0.99 +cfg_ppo["lambda"] = 0.95 +cfg_ppo["learning_rate"] = 5e-4 +cfg_ppo["learning_rate_scheduler"] = KLAdaptiveRL +cfg_ppo["learning_rate_scheduler_kwargs"] = {"kl_threshold": 0.008} +cfg_ppo["random_timesteps"] = 0 +cfg_ppo["learning_starts"] = 0 +cfg_ppo["grad_norm_clip"] = 1.0 +cfg_ppo["ratio_clip"] = 0.2 +cfg_ppo["value_clip"] = 0.2 +cfg_ppo["clip_predicted_values"] = True +cfg_ppo["entropy_loss_scale"] = 0.0 +cfg_ppo["value_loss_scale"] = 2.0 +cfg_ppo["kl_threshold"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +cfg_ppo["value_preprocessor"] = RunningStandardScaler +cfg_ppo["value_preprocessor_kwargs"] = {"size": 1, "device": device} +# logging to TensorBoard and write checkpoints each 32 and 250 timesteps respectively +cfg_ppo["experiment"]["write_interval"] = 32 +cfg_ppo["experiment"]["checkpoint_interval"] = 250 + +agent = PPO(models=models_ppo, + memory=memory, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 5000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent) + +# start training +trainer.train() diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_ros2_skrl_eval.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_ros2_skrl_eval.py new file mode 100644 index 00000000..b3c424f6 --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_ros2_skrl_eval.py @@ -0,0 +1,96 @@ +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define only the policy for evaluation +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 256), + nn.ELU(), + nn.Linear(256, 128), + nn.ELU(), + nn.Linear(128, 64), + nn.ELU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} + + +# Load the environment according to the ROS version +def get_active_ros_version(): + import os + if os.environ.get("ROS_DISTRO"): + return "ROS2" if os.environ.get("AMENT_PREFIX_PATH") else "ROS" + return "" + +active_ros_version = get_active_ros_version() + +if active_ros_version == "ROS": + from reaching_iiwa_real_ros_env import ReachingIiwa +elif active_ros_version == "ROS2": + from reaching_iiwa_real_ros2_env import ReachingIiwa +else: + print("No active ROS version found") + exit() + +control_space = "joint" # joint or cartesian +env = ReachingIiwa(control_space=control_space) + +# wrap the environment +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's policy. +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["random_timesteps"] = 0 +cfg_ppo["learning_starts"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +# logging to TensorBoard each 32 timesteps an ignore checkpoints +cfg_ppo["experiment"]["write_interval"] = 32 +cfg_ppo["experiment"]["checkpoint_interval"] = 0 + +agent = PPO(models=models_ppo, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoints +if control_space == "joint": + agent.load("./agent_joint.pt") +elif control_space == "cartesian": + agent.load("./agent_cartesian.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 1000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent) + +# start evaluation +trainer.eval() diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_skrl_eval.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_skrl_eval.py new file mode 100644 index 00000000..87b43765 --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_skrl_eval.py @@ -0,0 +1,82 @@ +import torch +import torch.nn as nn + +# Import the skrl components to build the RL system +from skrl.models.torch import Model, GaussianMixin +from skrl.agents.torch.ppo import PPO, PPO_DEFAULT_CONFIG +from skrl.resources.preprocessors.torch import RunningStandardScaler +from skrl.trainers.torch import SequentialTrainer +from skrl.envs.torch import wrap_env + + +# Define only the policy for evaluation +class Policy(GaussianMixin, Model): + def __init__(self, observation_space, action_space, device, clip_actions=False, + clip_log_std=True, min_log_std=-20, max_log_std=2): + Model.__init__(self, observation_space, action_space, device) + GaussianMixin.__init__(self, clip_actions, clip_log_std, min_log_std, max_log_std) + + self.net = nn.Sequential(nn.Linear(self.num_observations, 256), + nn.ELU(), + nn.Linear(256, 128), + nn.ELU(), + nn.Linear(128, 64), + nn.ELU(), + nn.Linear(64, self.num_actions)) + self.log_std_parameter = nn.Parameter(torch.zeros(self.num_actions)) + + def compute(self, inputs, role): + return self.net(inputs["states"]), self.log_std_parameter, {} + + +# Load the environment +from reaching_iiwa_real_env import ReachingIiwa + +control_space = "joint" # joint or cartesian +env = ReachingIiwa(control_space=control_space) + +# wrap the environment +env = wrap_env(env) + +device = env.device + + +# Instantiate the agent's policy. +# PPO requires 2 models, visit its documentation for more details +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#spaces-and-models +models_ppo = {} +models_ppo["policy"] = Policy(env.observation_space, env.action_space, device) + + +# Configure and instantiate the agent. +# Only modify some of the default configuration, visit its documentation to see all the options +# https://skrl.readthedocs.io/en/latest/modules/skrl.agents.ppo.html#configuration-and-hyperparameters +cfg_ppo = PPO_DEFAULT_CONFIG.copy() +cfg_ppo["random_timesteps"] = 0 +cfg_ppo["learning_starts"] = 0 +cfg_ppo["state_preprocessor"] = RunningStandardScaler +cfg_ppo["state_preprocessor_kwargs"] = {"size": env.observation_space, "device": device} +# logging to TensorBoard each 32 timesteps an ignore checkpoints +cfg_ppo["experiment"]["write_interval"] = 32 +cfg_ppo["experiment"]["checkpoint_interval"] = 0 + +agent = PPO(models=models_ppo, + memory=None, + cfg=cfg_ppo, + observation_space=env.observation_space, + action_space=env.action_space, + device=device) + +# load checkpoints +if control_space == "joint": + agent.load("./agent_joint.pt") +elif control_space == "cartesian": + agent.load("./agent_cartesian.pt") + + +# Configure and instantiate the RL trainer +cfg_trainer = {"timesteps": 1000, "headless": True} +trainer = SequentialTrainer(cfg=cfg_trainer, env=env, agents=agent) + +# start evaluation +trainer.eval() From a775e4296d7c72d1aa96c66b855eb1a40cbc197e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 12:00:09 +0100 Subject: [PATCH 154/157] Add KUKA LBR iiwa real-world environment files --- .../kuka_lbr_iiwa/reaching_iiwa_real_env.py | 145 ++++++++++++ .../reaching_iiwa_real_ros2_env.py | 218 ++++++++++++++++++ .../reaching_iiwa_real_ros_env.py | 193 ++++++++++++++++ 3 files changed, 556 insertions(+) create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_env.py create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros2_env.py create mode 100644 docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_env.py diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_env.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_env.py new file mode 100644 index 00000000..4e5a8e9e --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_env.py @@ -0,0 +1,145 @@ +import time +import numpy as np +import gymnasium as gym + +import libiiwa + + +class ReachingIiwa(gym.Env): + def __init__(self, control_space="joint"): + + self.control_space = control_space # joint or cartesian + + # spaces + self.observation_space = gym.spaces.Box(low=-1000, high=1000, shape=(18,), dtype=np.float32) + if self.control_space == "joint": + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) + elif self.control_space == "cartesian": + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) + else: + raise ValueError("Invalid control space:", self.control_space) + + # init iiwa + print("Connecting to robot...") + + self.robot = libiiwa.LibIiwa() + self.robot.set_control_interface(libiiwa.ControlInterface.CONTROL_INTERFACE_SERVO) + + self.robot.set_desired_joint_velocity_rel(0.5) + self.robot.set_desired_joint_acceleration_rel(0.5) + self.robot.set_desired_joint_jerk_rel(0.5) + + self.robot.set_desired_cartesian_velocity(10) + self.robot.set_desired_cartesian_acceleration(10) + self.robot.set_desired_cartesian_jerk(10) + + print("Robot connected") + + self.motion = None + self.motion_thread = None + + self.dt = 1 / 120.0 + self.action_scale = 2.5 + self.dof_vel_scale = 0.1 + self.max_episode_length = 100 + self.robot_dof_speed_scales = 1 + self.target_pos = np.array([0.65, 0.2, 0.2]) + self.robot_default_dof_pos = np.radians([0, 0, 0, -90, 0, 90, 0]) + self.robot_dof_lower_limits = np.array([-2.9671, -2.0944, -2.9671, -2.0944, -2.9671, -2.0944, -3.0543]) + self.robot_dof_upper_limits = np.array([ 2.9671, 2.0944, 2.9671, 2.0944, 2.9671, 2.0944, 3.0543]) + + self.progress_buf = 1 + self.obs_buf = np.zeros((18,), dtype=np.float32) + + def _get_observation_reward_done(self): + # get robot state + robot_state = self.robot.get_state(refresh=True) + + # observation + robot_dof_pos = robot_state["joint_position"] + robot_dof_vel = robot_state["joint_velocity"] + end_effector_pos = robot_state["cartesian_position"] + + dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0 + dof_vel_scaled = robot_dof_vel * self.dof_vel_scale + + self.obs_buf[0] = self.progress_buf / float(self.max_episode_length) + self.obs_buf[1:8] = dof_pos_scaled + self.obs_buf[8:15] = dof_vel_scaled + self.obs_buf[15:18] = self.target_pos + + # reward + distance = np.linalg.norm(end_effector_pos - self.target_pos) + reward = -distance + + # done + done = self.progress_buf >= self.max_episode_length - 1 + done = done or distance <= 0.075 + + print("Distance:", distance) + if done: + print("Target or Maximum episode length reached") + time.sleep(1) + + return self.obs_buf, reward, done + + def reset(self): + print("Reseting...") + + # go to 1) safe position, 2) random position + self.robot.command_joint_position(self.robot_default_dof_pos) + time.sleep(3) + dof_pos = self.robot_default_dof_pos + 0.25 * (np.random.rand(7) - 0.5) + self.robot.command_joint_position(dof_pos) + time.sleep(1) + + # get target position from prompt + while True: + try: + print("Enter target position (X, Y, Z) in meters") + raw = input("or press [Enter] key for a random target position: ") + if raw: + self.target_pos = np.array([float(p) for p in raw.replace(' ', '').split(',')]) + else: + noise = (2 * np.random.rand(3) - 1) * np.array([0.1, 0.2, 0.2]) + self.target_pos = np.array([0.6, 0.0, 0.4]) + noise + print("Target position:", self.target_pos) + break + except ValueError: + print("Invalid input. Try something like: 0.65, 0.0, 0.4") + + input("Press [Enter] to continue") + + self.progress_buf = 0 + observation, reward, done = self._get_observation_reward_done() + + return observation, {} + + def step(self, action): + self.progress_buf += 1 + + # get robot state + robot_state = self.robot.get_state(refresh=True) + + # control space + # joint + if self.control_space == "joint": + dof_pos = robot_state["joint_position"] + (self.robot_dof_speed_scales * self.dt * action * self.action_scale) + self.robot.command_joint_position(dof_pos) + # cartesian + elif self.control_space == "cartesian": + end_effector_pos = robot_state["cartesian_position"] + action / 100.0 + self.robot.command_cartesian_pose(end_effector_pos) + + # the use of time.sleep is for simplicity. It does not guarantee control at a specific frequency + time.sleep(1 / 30.0) + + observation, reward, terminated = self._get_observation_reward_done() + + return observation, reward, terminated, False, {} + + def render(self, *args, **kwargs): + pass + + def close(self): + pass diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros2_env.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros2_env.py new file mode 100644 index 00000000..e3598e30 --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros2_env.py @@ -0,0 +1,218 @@ +import time +import numpy as np +import gymnasium as gym + +import rclpy +from rclpy.node import Node +from rclpy.qos import QoSPresetProfiles +import sensor_msgs.msg +import geometry_msgs.msg + +import libiiwa_msgs.srv + + +class ReachingIiwa(gym.Env): + def __init__(self, control_space="joint"): + + self.control_space = control_space # joint or cartesian + + # spaces + self.observation_space = gym.spaces.Box(low=-1000, high=1000, shape=(18,), dtype=np.float32) + if self.control_space == "joint": + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) + elif self.control_space == "cartesian": + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) + else: + raise ValueError("Invalid control space:", self.control_space) + + # initialize the ROS node + rclpy.init() + self.node = Node(self.__class__.__name__) + + import threading + threading.Thread(target=self._spin).start() + + # create publishers + + self.pub_command_joint = self.node.create_publisher(sensor_msgs.msg.JointState, '/iiwa/command/joint', QoSPresetProfiles.SYSTEM_DEFAULT.value) + self.pub_command_cartesian = self.node.create_publisher(geometry_msgs.msg.Pose, '/iiwa/command/cartesian', QoSPresetProfiles.SYSTEM_DEFAULT.value) + + # keep compatibility with libiiwa Python API + self.robot_state = {"joint_position": np.zeros((7,)), + "joint_velocity": np.zeros((7,)), + "cartesian_position": np.zeros((3,))} + + # create subscribers + self.node.create_subscription(msg_type=sensor_msgs.msg.JointState, + topic='/iiwa/state/joint_states', + callback=self._callback_joint_states, + qos_profile=QoSPresetProfiles.SYSTEM_DEFAULT.value) + self.node.create_subscription(msg_type=geometry_msgs.msg.Pose, + topic='/iiwa/state/end_effector_pose', + callback=self._callback_end_effector_pose, + qos_profile=QoSPresetProfiles.SYSTEM_DEFAULT.value) + + # service clients + client_control_interface = self.node.create_client(libiiwa_msgs.srv.SetString, '/iiwa/set_control_interface') + client_control_interface.wait_for_service() + request = libiiwa_msgs.srv.SetString.Request() + request.data = "SERVO" # or "servo" + client_control_interface.call(request) + + client_joint_velocity_rel = self.node.create_client(libiiwa_msgs.srv.SetNumber, '/iiwa/set_desired_joint_velocity_rel') + client_joint_acceleration_rel = self.node.create_client(libiiwa_msgs.srv.SetNumber, '/iiwa/set_desired_joint_acceleration_rel') + client_joint_jerk_rel = self.node.create_client(libiiwa_msgs.srv.SetNumber, '/iiwa/set_desired_joint_jerk_rel') + + client_cartesian_velocity = self.node.create_client(libiiwa_msgs.srv.SetNumber, '/iiwa/set_desired_cartesian_velocity') + client_cartesian_acceleration = self.node.create_client(libiiwa_msgs.srv.SetNumber, '/iiwa/set_desired_cartesian_acceleration') + client_cartesian_jerk = self.node.create_client(libiiwa_msgs.srv.SetNumber, '/iiwa/set_desired_cartesian_jerk') + + client_joint_velocity_rel.wait_for_service() + client_joint_acceleration_rel.wait_for_service() + client_joint_jerk_rel.wait_for_service() + + client_cartesian_velocity.wait_for_service() + client_cartesian_acceleration.wait_for_service() + client_cartesian_jerk.wait_for_service() + + request = libiiwa_msgs.srv.SetNumber.Request() + + request.data = 0.5 + client_joint_velocity_rel.call(request) + client_joint_acceleration_rel.call(request) + client_joint_jerk_rel.call(request) + + request.data = 10.0 + client_cartesian_velocity.call(request) + client_cartesian_acceleration.call(request) + client_cartesian_jerk.call(request) + + print("Robot connected") + + self.motion = None + self.motion_thread = None + + self.dt = 1 / 120.0 + self.action_scale = 2.5 + self.dof_vel_scale = 0.1 + self.max_episode_length = 100 + self.robot_dof_speed_scales = 1 + self.target_pos = np.array([0.65, 0.2, 0.2]) + self.robot_default_dof_pos = np.radians([0, 0, 0, -90, 0, 90, 0]) + self.robot_dof_lower_limits = np.array([-2.9671, -2.0944, -2.9671, -2.0944, -2.9671, -2.0944, -3.0543]) + self.robot_dof_upper_limits = np.array([ 2.9671, 2.0944, 2.9671, 2.0944, 2.9671, 2.0944, 3.0543]) + + self.progress_buf = 1 + self.obs_buf = np.zeros((18,), dtype=np.float32) + + def _spin(self): + rclpy.spin(self.node) + + def _callback_joint_states(self, msg): + self.robot_state["joint_position"] = np.array(msg.position) + self.robot_state["joint_velocity"] = np.array(msg.velocity) + + def _callback_end_effector_pose(self, msg): + positon = msg.position + self.robot_state["cartesian_position"] = np.array([positon.x, positon.y, positon.z]) + + def _get_observation_reward_done(self): + # observation + robot_dof_pos = self.robot_state["joint_position"] + robot_dof_vel = self.robot_state["joint_velocity"] + end_effector_pos = self.robot_state["cartesian_position"] + + dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0 + dof_vel_scaled = robot_dof_vel * self.dof_vel_scale + + self.obs_buf[0] = self.progress_buf / float(self.max_episode_length) + self.obs_buf[1:8] = dof_pos_scaled + self.obs_buf[8:15] = dof_vel_scaled + self.obs_buf[15:18] = self.target_pos + + # reward + distance = np.linalg.norm(end_effector_pos - self.target_pos) + reward = -distance + + # done + done = self.progress_buf >= self.max_episode_length - 1 + done = done or distance <= 0.075 + + print("Distance:", distance) + if done: + print("Target or Maximum episode length reached") + time.sleep(1) + + return self.obs_buf, reward, done + + def reset(self): + print("Reseting...") + + # go to 1) safe position, 2) random position + msg = sensor_msgs.msg.JointState() + msg.position = self.robot_default_dof_pos.tolist() + self.pub_command_joint.publish(msg) + time.sleep(3) + msg.position = (self.robot_default_dof_pos + 0.25 * (np.random.rand(7) - 0.5)).tolist() + self.pub_command_joint.publish(msg) + time.sleep(1) + + # get target position from prompt + while True: + try: + print("Enter target position (X, Y, Z) in meters") + raw = input("or press [Enter] key for a random target position: ") + if raw: + self.target_pos = np.array([float(p) for p in raw.replace(' ', '').split(',')]) + else: + noise = (2 * np.random.rand(3) - 1) * np.array([0.1, 0.2, 0.2]) + self.target_pos = np.array([0.6, 0.0, 0.4]) + noise + print("Target position:", self.target_pos) + break + except ValueError: + print("Invalid input. Try something like: 0.65, 0.0, 0.4") + + input("Press [Enter] to continue") + + self.progress_buf = 0 + observation, reward, done = self._get_observation_reward_done() + + return observation, {} + + def step(self, action): + self.progress_buf += 1 + + # control space + # joint + if self.control_space == "joint": + joint_positions = self.robot_state["joint_position"] + (self.robot_dof_speed_scales * self.dt * action * self.action_scale) + msg = sensor_msgs.msg.JointState() + msg.position = joint_positions.tolist() + self.pub_command_joint.publish(msg) + # cartesian + elif self.control_space == "cartesian": + end_effector_pos = self.robot_state["cartesian_position"] + action / 100.0 + msg = geometry_msgs.msg.Pose() + msg.position.x = end_effector_pos[0] + msg.position.y = end_effector_pos[1] + msg.position.z = end_effector_pos[2] + msg.orientation.x = np.nan + msg.orientation.y = np.nan + msg.orientation.z = np.nan + msg.orientation.w = np.nan + self.pub_command_cartesian.publish(msg) + + # the use of time.sleep is for simplicity. It does not guarantee control at a specific frequency + time.sleep(1 / 30.0) + + observation, reward, terminated = self._get_observation_reward_done() + + return observation, reward, terminated, False, {} + + def render(self, *args, **kwargs): + pass + + def close(self): + # shutdown the node + self.node.destroy_node() + rclpy.shutdown() diff --git a/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_env.py b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_env.py new file mode 100644 index 00000000..a6df08e6 --- /dev/null +++ b/docs/source/examples/real_world/kuka_lbr_iiwa/reaching_iiwa_real_ros_env.py @@ -0,0 +1,193 @@ +import time +import numpy as np +import gymnasium as gym + +import rospy +import sensor_msgs.msg +import geometry_msgs.msg + +import libiiwa_msgs.srv + + +class ReachingIiwa(gym.Env): + def __init__(self, control_space="joint"): + + self.control_space = control_space # joint or cartesian + + # spaces + self.observation_space = gym.spaces.Box(low=-1000, high=1000, shape=(18,), dtype=np.float32) + if self.control_space == "joint": + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(7,), dtype=np.float32) + elif self.control_space == "cartesian": + self.action_space = gym.spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) + else: + raise ValueError("Invalid control space:", self.control_space) + + # create publishers + self.pub_command_joint = rospy.Publisher('/iiwa/command/joint', sensor_msgs.msg.JointState, queue_size=1) + self.pub_command_cartesian = rospy.Publisher('/iiwa/command/cartesian', geometry_msgs.msg.Pose, queue_size=1) + + # keep compatibility with libiiwa Python API + self.robot_state = {"joint_position": np.zeros((7,)), + "joint_velocity": np.zeros((7,)), + "cartesian_position": np.zeros((3,))} + + # create subscribers + rospy.Subscriber('/iiwa/state/joint_states', sensor_msgs.msg.JointState, self._callback_joint_states) + rospy.Subscriber('/iiwa/state/end_effector_pose', geometry_msgs.msg.Pose, self._callback_end_effector_pose) + + # create service clients + rospy.wait_for_service('/iiwa/set_control_interface') + + proxy = rospy.ServiceProxy('/iiwa/set_control_interface', libiiwa_msgs.srv.SetString) + proxy("SERVO") # or "servo" + + rospy.wait_for_service('/iiwa/set_desired_joint_velocity_rel') + rospy.wait_for_service('/iiwa/set_desired_joint_acceleration_rel') + rospy.wait_for_service('/iiwa/set_desired_joint_jerk_rel') + + proxy = rospy.ServiceProxy('/iiwa/set_desired_joint_velocity_rel', libiiwa_msgs.srv.SetNumber) + proxy(0.5) + proxy = rospy.ServiceProxy('/iiwa/set_desired_joint_acceleration_rel', libiiwa_msgs.srv.SetNumber) + proxy(0.5) + proxy = rospy.ServiceProxy('/iiwa/set_desired_joint_jerk_rel', libiiwa_msgs.srv.SetNumber) + proxy(0.5) + + rospy.wait_for_service('/iiwa/set_desired_cartesian_velocity') + rospy.wait_for_service('/iiwa/set_desired_cartesian_acceleration') + rospy.wait_for_service('/iiwa/set_desired_cartesian_jerk') + + proxy = rospy.ServiceProxy('/iiwa/set_desired_cartesian_velocity', libiiwa_msgs.srv.SetNumber) + proxy(10.0) + proxy = rospy.ServiceProxy('/iiwa/set_desired_cartesian_acceleration', libiiwa_msgs.srv.SetNumber) + proxy(10.0) + proxy = rospy.ServiceProxy('/iiwa/set_desired_cartesian_jerk', libiiwa_msgs.srv.SetNumber) + proxy(10.0) + + # initialize the ROS node + rospy.init_node(self.__class__.__name__) + + print("Robot connected") + + self.motion = None + self.motion_thread = None + + self.dt = 1 / 120.0 + self.action_scale = 2.5 + self.dof_vel_scale = 0.1 + self.max_episode_length = 100 + self.robot_dof_speed_scales = 1 + self.target_pos = np.array([0.65, 0.2, 0.2]) + self.robot_default_dof_pos = np.radians([0, 0, 0, -90, 0, 90, 0]) + self.robot_dof_lower_limits = np.array([-2.9671, -2.0944, -2.9671, -2.0944, -2.9671, -2.0944, -3.0543]) + self.robot_dof_upper_limits = np.array([ 2.9671, 2.0944, 2.9671, 2.0944, 2.9671, 2.0944, 3.0543]) + + self.progress_buf = 1 + self.obs_buf = np.zeros((18,), dtype=np.float32) + + def _callback_joint_states(self, msg): + self.robot_state["joint_position"] = np.array(msg.position) + self.robot_state["joint_velocity"] = np.array(msg.velocity) + + def _callback_end_effector_pose(self, msg): + positon = msg.position + self.robot_state["cartesian_position"] = np.array([positon.x, positon.y, positon.z]) + + def _get_observation_reward_done(self): + # observation + robot_dof_pos = self.robot_state["joint_position"] + robot_dof_vel = self.robot_state["joint_velocity"] + end_effector_pos = self.robot_state["cartesian_position"] + + dof_pos_scaled = 2.0 * (robot_dof_pos - self.robot_dof_lower_limits) / (self.robot_dof_upper_limits - self.robot_dof_lower_limits) - 1.0 + dof_vel_scaled = robot_dof_vel * self.dof_vel_scale + + self.obs_buf[0] = self.progress_buf / float(self.max_episode_length) + self.obs_buf[1:8] = dof_pos_scaled + self.obs_buf[8:15] = dof_vel_scaled + self.obs_buf[15:18] = self.target_pos + + # reward + distance = np.linalg.norm(end_effector_pos - self.target_pos) + reward = -distance + + # done + done = self.progress_buf >= self.max_episode_length - 1 + done = done or distance <= 0.075 + + print("Distance:", distance) + if done: + print("Target or Maximum episode length reached") + time.sleep(1) + + return self.obs_buf, reward, done + + def reset(self): + print("Reseting...") + + # go to 1) safe position, 2) random position + msg = sensor_msgs.msg.JointState() + msg.position = self.robot_default_dof_pos.tolist() + self.pub_command_joint.publish(msg) + time.sleep(3) + msg.position = (self.robot_default_dof_pos + 0.25 * (np.random.rand(7) - 0.5)).tolist() + self.pub_command_joint.publish(msg) + time.sleep(1) + + # get target position from prompt + while True: + try: + print("Enter target position (X, Y, Z) in meters") + raw = input("or press [Enter] key for a random target position: ") + if raw: + self.target_pos = np.array([float(p) for p in raw.replace(' ', '').split(',')]) + else: + noise = (2 * np.random.rand(3) - 1) * np.array([0.1, 0.2, 0.2]) + self.target_pos = np.array([0.6, 0.0, 0.4]) + noise + print("Target position:", self.target_pos) + break + except ValueError: + print("Invalid input. Try something like: 0.65, 0.0, 0.4") + + input("Press [Enter] to continue") + + self.progress_buf = 0 + observation, reward, done = self._get_observation_reward_done() + + return observation, {} + + def step(self, action): + self.progress_buf += 1 + + # control space + # joint + if self.control_space == "joint": + joint_positions = self.robot_state["joint_position"] + (self.robot_dof_speed_scales * self.dt * action * self.action_scale) + msg = sensor_msgs.msg.JointState() + msg.position = joint_positions.tolist() + self.pub_command_joint.publish(msg) + # cartesian + elif self.control_space == "cartesian": + end_effector_pos = self.robot_state["cartesian_position"] + action / 100.0 + msg = geometry_msgs.msg.Pose() + msg.position.x = end_effector_pos[0] + msg.position.y = end_effector_pos[1] + msg.position.z = end_effector_pos[2] + msg.orientation.x = np.nan + msg.orientation.y = np.nan + msg.orientation.z = np.nan + msg.orientation.w = np.nan + self.pub_command_cartesian.publish(msg) + + # the use of time.sleep is for simplicity. It does not guarantee control at a specific frequency + time.sleep(1 / 30.0) + + observation, reward, terminated = self._get_observation_reward_done() + + return observation, reward, terminated, False, {} + + def render(self, *args, **kwargs): + pass + + def close(self): + pass From f14ebbb34595c70c036066d1cb28e57307fdd26b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 12:02:27 +0100 Subject: [PATCH 155/157] Make the .forward method have the same parameters as .act --- skrl/models/torch/base.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/skrl/models/torch/base.py b/skrl/models/torch/base.py index d18846bf..8caa2480 100644 --- a/skrl/models/torch/base.py +++ b/skrl/models/torch/base.py @@ -346,22 +346,27 @@ def get_specification(self) -> Mapping[str, Any]: """ return {} - def forward(self, *args, **kwargs) -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: + def forward(self, + inputs: Mapping[str, Union[torch.Tensor, Any]], + role: str = "") -> Tuple[torch.Tensor, Union[torch.Tensor, None], Mapping[str, Union[torch.Tensor, Any]]]: """Forward pass of the model This method calls the ``.act()`` method and returns its outputs - :param args: Positional arguments passed to the called method - :type args: tuple, optional - :param kwargs: Key-value arguments passed to the called method - :type kwargs: dict, optional + :param inputs: Model inputs. The most common keys are: + + - ``"states"``: state of the environment used to make the decision + - ``"taken_actions"``: actions taken by the policy for the given states + :type inputs: dict where the values are typically torch.Tensor + :param role: Role play by the model (default: ``""``) + :type role: str, optional :return: Model output. The first component is the action to be taken by the agent. The second component is the log of the probability density function for stochastic models or None for deterministic models. The third component is a dictionary containing extra output values :rtype: tuple of torch.Tensor, torch.Tensor or None, and dictionary """ - return self.act(*args, **kwargs) + return self.act(inputs, role) def compute(self, inputs: Mapping[str, Union[torch.Tensor, Any]], From 8f34de3a336baec71394cb7596ea56df4c70f779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 17:51:05 +0100 Subject: [PATCH 156/157] Update Isaac Sim version in standalone example --- docs/source/intro/examples.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/intro/examples.rst b/docs/source/intro/examples.rst index 7e82272a..48d31708 100644 --- a/docs/source/intro/examples.rst +++ b/docs/source/intro/examples.rst @@ -977,7 +977,7 @@ These examples show how to train an agent in an Omniverse Isaac Sim environment .. tabs:: - .. tab:: Isaac Sim 2022.1.X (Cartpole) + .. tab:: Isaac Sim 2022.X.X (Cartpole) This example performs the training of an agent in the Isaac Sim's Cartpole environment described in the `Creating New RL Environment `_ tutorial From ed2a0c3cee4743e86b8aba13ca88e25c2f61be09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Serrano=20Mu=C3=B1oz?= Date: Fri, 13 Jan 2023 17:51:36 +0100 Subject: [PATCH 157/157] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c7a3166..00b260c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). -## [0.9.0] - Unreleased +## [0.9.0] - 2023-01-13 ### Added - Support for Farama Gymnasium interface - Wrapper for robosuite environments