examples/PPO/atari_config.py

#   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

atari_config = {
    ## Commented parameters are set to default values in ppo

    #==========  env config ==========
    'env': 'PongNoFrameskip-v4',  # environment name
    'continuous_action': False,  # action type of the environment
    'env_num': 8,  # number of the environment
    'seed': None,  # seed of the experiment
    'xparl_addr': None,  # xparl address for distributed training

    #==========  training config ==========
    'train_total_steps': int(1e7),  # max training steps
    'step_nums': 128,  # data collecting time steps (ie. T in the paper)
    'num_minibatches': 4,  # number of training minibatches per update.
    'update_epochs': 4,  # number of epochs for updating (ie K in the paper)
    'eval_episode': 3,
    'test_every_steps': int(5e3),  # interval between evaluations

    #========== coefficient of ppo ==========
    'initial_lr': 2.5e-4,  # start learning rate
    'lr_decay': True,  # whether or not to use linear decay rl
    # 'eps': 1e-5,  # Adam optimizer epsilon (default: 1e-5)
    'clip_param': 0.1,  # epsilon in clipping loss
    'entropy_coef': 0.01,  # Entropy coefficient (ie. c_2 in the paper)
    # 'value_loss_coef': 0.5,  # Value loss coefficient (ie. c_1 in the paper)
    # 'max_grad_norm': 0.5,  # Max gradient norm for gradient clipping
    # 'use_clipped_value_loss': True,  # advantages normalization
    # 'clip_vloss': True,  # whether or not to use a clipped loss for the value function
    # 'gamma': 0.99, # discounting factor
    # 'gae': True,  # whether or not to use GAE
    # 'gae_lambda': 0.95,  # Lambda parameter for calculating N-step advantage
}