-
Notifications
You must be signed in to change notification settings - Fork 819
/
Copy pathatari_config.py
46 lines (42 loc) · 2.19 KB
/
atari_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
atari_config = {
## Commented parameters are set to default values in ppo
#========== env config ==========
'env': 'PongNoFrameskip-v4', # environment name
'continuous_action': False, # action type of the environment
'env_num': 8, # number of the environment
'seed': None, # seed of the experiment
'xparl_addr': None, # xparl address for distributed training
#========== training config ==========
'train_total_steps': int(1e7), # max training steps
'step_nums': 128, # data collecting time steps (ie. T in the paper)
'num_minibatches': 4, # number of training minibatches per update.
'update_epochs': 4, # number of epochs for updating (ie K in the paper)
'eval_episode': 3,
'test_every_steps': int(5e3), # interval between evaluations
#========== coefficient of ppo ==========
'initial_lr': 2.5e-4, # start learning rate
'lr_decay': True, # whether or not to use linear decay rl
# 'eps': 1e-5, # Adam optimizer epsilon (default: 1e-5)
'clip_param': 0.1, # epsilon in clipping loss
'entropy_coef': 0.01, # Entropy coefficient (ie. c_2 in the paper)
# 'value_loss_coef': 0.5, # Value loss coefficient (ie. c_1 in the paper)
# 'max_grad_norm': 0.5, # Max gradient norm for gradient clipping
# 'use_clipped_value_loss': True, # advantages normalization
# 'clip_vloss': True, # whether or not to use a clipped loss for the value function
# 'gamma': 0.99, # discounting factor
# 'gae': True, # whether or not to use GAE
# 'gae_lambda': 0.95, # Lambda parameter for calculating N-step advantage
}