Readme (kakaoenterprise#10)

UPDATE README
SC-PIONEER · Oct 21, 2021 · 616b9ef · 616b9ef
1 parent 4a1dae4
commit 616b9ef
Show file tree

Hide file tree

Showing 165 changed files with 738 additions and 446 deletions.
diff --git a/README.md b/README.md
@@ -15,7 +15,6 @@
 - Benchmark of the algorithms is conducted in many RL environment
 
 
-
 ## :arrow_down: Installation
 
 ```
@@ -30,7 +29,7 @@
 
 ## :rocket: QuickStart
 
-<img src="./img/quickstart.png" alt="quickstart" width=60%/> 
+<img src="./resrc/quickstart.png" alt="quickstart" width=60%/> 
 
 
 
@@ -42,24 +41,33 @@
 
 
 
+## :mag: How to
+
+- [How to use](./docs/How_to_use.md)
+- [How to customize config](./config/README.md)
+- [How to customize agent](./core/agent/README.md)
+- [How to customize environment](./core/env/README.md)
+- [How to customize network](./core/network/README.md)
+- [How to customize buffer](./core/buffer/README.md)
+
+
+
 ## :page_facing_up: Documentation
 
-- [Implementation List](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/Implementation_list.md)
-- [Benchmark](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/Benchmark.md)
-- [Distributed Architecture](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/Distributed_Architecture.md)
-- [Reference](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/Reference.md)
+- [Distributed Architecture](./docs/Distributed_Architecture.md)
+- [Role of Managers](./manager/README.md)
+- [Implementation List](./docs/Implementation_list.md)
+- [Naming Convention](./docs/Naming_convention.md)
+- [Benchmark](https://www.notion.so/rlnote/Benchmark-c7642d152cad4980bc03fe804fe9e88a)
+- [Reference](./docs/Reference.md)
 
 
-- [How to use](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/How_to_use.md)
-- [How to add RL algorithm](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/How_to_add_rl_algorithm.md)
-- [How to add environment](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/How_to_add_environment.md)
-- [How to add network](https://github.kakaocorp.com/leonard-q/RL_Algorithms/blob/master/docs/How_to_add_network.md)
 
 ## :busts_in_silhouette: Contributors
 
-:mailbox: Contact: [Leonard.Q](leonard.q@kakaoenterprise.com), [Ramanuzan.Lee]([email protected]), [Royce.Choi]([email protected])
+:mailbox: Contact: atech.rl@kakaocorp.com
 
-<img src="./img/contributors.png" alt="contributors" width=80%/> 
+<img src="./resrc/contributors.png" alt="contributors" width=80%/> 
 
 
 ## :copyright: License

diff --git a/async_distributed_train.py b/async_distributed_train.py
@@ -37,15 +37,15 @@
     path_queue = mp.Queue(1)
 
     record_period = config.train.record_period if config.train.record_period else config.train.run_step//10
-    test_manager_config = (Env(**config.env), config.train.test_iteration, config.train.record, record_period)
+    eval_manager_config = (Env(**config.env), config.train.eval_iteration, config.train.record, record_period)
     log_id = config.train.id if config.train.id else config.agent.name
     log_manager_config = (config.env.name, log_id, config.train.experiment)
     agent_config['device'] = "cpu"
     manage = mp.Process(target=manage_process,
                         args=(Agent, agent_config,
                               result_queue, manage_sync_queue, path_queue,
                               config.train.run_step, config.train.print_period,
-                              MetricManager, TestManager, test_manager_config,
+                              MetricManager, EvalManager, eval_manager_config,
                               LogManager, log_manager_config, config_manager))
     distributed_manager_config = (Env, config.env, Agent, agent_config, config.train.num_workers, 'async')
     interact = mp.Process(target=interact_process,

diff --git a/config/README.md b/config/README.md
@@ -0,0 +1,43 @@
+# How to customize config
+
+## Config file management rules
+- The config file provided by default is mainly managed in the form of config/\[agent\]/\[env\].py. 
+- For a specific environment group that shares parameters, manage it in the form of config/\[agent\]/\[env_group\], and specify the environment name with --env.name in the run command.
+
+reference: [dqn/cartpole.py](./dqn/cartpole.py), [dqn/atari.py](./dqn/atari.py)
+
+## Config setting
+- The config file is managed with a total of four dictionary variables: agent, env, optim, and train. 
+
+  ### agent
+    - The agent dictionary manages input parameters used by the agent class. 
+      - name: The key of the agent class you want to use.
+      - others: You can check it in the agent class.
+
+  ### env
+    - The env dictionary manages input parameters used by the env class. 
+      - name: The key of the env class you want to use.
+      - others: You can check it in the env class.
+
+  ### optim
+    - The optim dictionary manages input parameters used by the optimizer class. Since the optimizer of pytorch is used as it is, any optimizer supported by pytorch can be used.
+      - name: The key of the optimizer class you want to use.
+      - others: You can check it in the optimizer class supported by pytorch.
+
+  ### train
+    - The optim dictionary manages parameters used in the main script.
+      - training: It means whether to learn. Set to False in the eval.py script and True otherwise.
+      - load_path: It means the path to load the model. If you want to load the model or in the eval.py script, you need to set it. If not, set it None.
+      - run_step: It determines the total number of interactions to proceed.
+      - print_period: It means the cycle(unit=step) to print the progress.
+      - save_period: It means the cycle(unit=step) to save the model.
+      - eval_iteration: It means how many episodes will be run in total to get the evaluation score.
+      - record: It means whether to record the simulation as the evaluation proceeds. If you set it True, simulation is saved as a gif file in save_path. If you set it True and env is recordable, simulation is saved as a gif file in save_path. (Note that this does not work for non-recordable environments.)
+      - record_period: It means the cycle(unit=step) to record.  
+      - distributed_batch_size: In distributed script, uses distributed_batch_size instead of agent.batch_size.
+      - update_period: It means the cycle(unit=step) in which actors pass transition data to learner.
+      - num_workers: Total number of distributed actors which interact with env.
+
+      __distributed_batch_size, update_period and num_workers are only used in distributed  scripts.__
+
+reference: [ppo/atari.py](./ppo/atari.py)
diff --git a/config/ape_x/atari.py b/config/ape_x/atari.py
@@ -43,7 +43,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/ape_x/cartpole.py b/config/ape_x/cartpole.py
@@ -36,7 +36,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "distributed_batch_size" : 512,
     "update_period" : 16,

diff --git a/config/ape_x/pong_mlagent.py b/config/ape_x/pong_mlagent.py
@@ -35,7 +35,7 @@
     "run_step" : 200000,
     "print_period" : 5000,
     "save_period" : 50000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "distributed_batch_size" : 512,
     "update_period" : 16,

diff --git a/config/ape_x/procgen.py b/config/ape_x/procgen.py
@@ -40,7 +40,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/c51/atari.py b/config/c51/atari.py
@@ -41,7 +41,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/c51/cartpole.py b/config/c51/cartpole.py
@@ -34,7 +34,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     # distributed setting
     "update_period" : 32,
     "num_workers" : 8,

diff --git a/config/c51/pong_mlagent.py b/config/c51/pong_mlagent.py
@@ -33,7 +33,7 @@
     "run_step" : 200000,
     "print_period" : 5000,
     "save_period" : 50000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "update_period" : 8,
     "num_workers" : 16,

diff --git a/config/c51/procgen.py b/config/c51/procgen.py
@@ -38,7 +38,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/ddpg/cartpole.py b/config/ddpg/cartpole.py
@@ -34,7 +34,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "update_period": 1,
     "num_workers": 8,

diff --git a/config/ddpg/hopper_mlagent.py b/config/ddpg/hopper_mlagent.py
@@ -33,7 +33,7 @@
     "run_step" : 300000,
     "print_period" : 5000,
     "save_period" : 10000,
-    "test_iteration" : 10,
+    "eval_iteration" : 10,
     # distributed setting
     "distributed_batch_size" : 256,
     "update_period" : 1,

diff --git a/config/ddpg/pendulum.py b/config/ddpg/pendulum.py
@@ -33,7 +33,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "distributed_batch_size" : 128,
     "update_period" : 1,

diff --git a/config/double/atari.py b/config/double/atari.py
@@ -37,7 +37,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/double/cartpole.py b/config/double/cartpole.py
@@ -30,7 +30,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     # distributed setting
     "update_period" : 32,
     "num_workers" : 8,

diff --git a/config/double/pong_mlagent.py b/config/double/pong_mlagent.py
@@ -29,7 +29,7 @@
     "run_step" : 200000,
     "print_period" : 2000,
     "save_period" : 50000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "update_period" : 8,
     "num_workers" : 16,

diff --git a/config/double/procgen.py b/config/double/procgen.py
@@ -34,7 +34,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/dqn/atari.py b/config/dqn/atari.py
@@ -37,7 +37,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/dqn/cartpole.py b/config/dqn/cartpole.py
@@ -29,7 +29,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "update_period" : 32,
     "num_workers" : 8,

diff --git a/config/dqn/mario.py b/config/dqn/mario.py
@@ -34,7 +34,7 @@
     "run_step" : 100000000,
     "print_period" : 5000,
     "save_period" : 50000,
-    "test_iteration": 1,
+    "eval_iteration": 1,
     "record" : True,
     "record_period" : 200000,
     # distributed setting

diff --git a/config/dqn/pong_mlagent.py b/config/dqn/pong_mlagent.py
@@ -29,7 +29,7 @@
     "run_step" : 200000,
     "print_period" : 5000,
     "save_period" : 50000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "update_period" : 8,
     "num_workers" : 16,

diff --git a/config/dqn/procgen.py b/config/dqn/procgen.py
@@ -34,7 +34,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/dueling/atari.py b/config/dueling/atari.py
@@ -37,7 +37,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/dueling/cartpole.py b/config/dueling/cartpole.py
@@ -30,7 +30,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     # distributed setting
     "update_period" : 32,
     "num_workers" : 8,

diff --git a/config/dueling/pong_mlagent.py b/config/dueling/pong_mlagent.py
@@ -29,7 +29,7 @@
     "run_step" : 200000,
     "print_period" : 5000,
     "save_period" : 50000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "update_period" : 8,
     "num_workers" : 16,

diff --git a/config/dueling/procgen.py b/config/dueling/procgen.py
@@ -34,7 +34,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/icm_ppo/atari.py b/config/icm_ppo/atari.py
@@ -45,7 +45,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/icm_ppo/cartpole.py b/config/icm_ppo/cartpole.py
@@ -38,7 +38,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 10,
+    "eval_iteration": 10,
     # distributed setting
     "distributed_batch_size" : 256,
     "update_period" : agent["n_step"],

diff --git a/config/icm_ppo/mario.py b/config/icm_ppo/mario.py
@@ -49,7 +49,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 500000,
-    "test_iteration": 1,
+    "eval_iteration": 1,
     "record": True,
     "record_period": 500000,
     # distributed setting

diff --git a/config/icm_ppo/procgen.py b/config/icm_ppo/procgen.py
@@ -41,7 +41,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/iqn/atari.py b/config/iqn/atari.py
@@ -43,7 +43,7 @@
     "run_step" : 30000000,
     "print_period" : 10000,
     "save_period" : 100000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     "record" : True,
     "record_period" : 300000,
     # distributed setting

diff --git a/config/iqn/cartpole.py b/config/iqn/cartpole.py
@@ -36,7 +36,7 @@
     "run_step" : 100000,
     "print_period" : 1000,
     "save_period" : 10000,
-    "test_iteration": 5,
+    "eval_iteration": 5,
     # distributed setting
     "update_period" : 32,
     "num_workers" : 8,