exp_config = { | |
'type': 'ppo', | |
'on_policy': True, | |
'cuda': True, | |
'action_space': 'discrete', | |
'discount_factor': 0.99, | |
'gae_lambda': 0.95, | |
'epoch_per_collect': 10, | |
'batch_size': 320, | |
'learning_rate': 0.0005, | |
'lr_scheduler': [1000, 0.1], | |
'weight_decay': 0, | |
'value_weight': 0.5, | |
'entropy_weight': 0.01, | |
'clip_ratio': 0.2, | |
'adv_norm': True, | |
'value_norm': 'baseline', | |
'ppo_param_init': True, | |
'grad_norm': 0.5, | |
'n_sample': 3200, | |
'unroll_len': 1, | |
'deterministic_eval': True, | |
'model': { | |
'encoder_hidden_size_list': [64, 64, 128], | |
'actor_head_hidden_size': 128, | |
'critic_head_hidden_size': 128 | |
}, | |
'cfg_type': 'PPOFPolicyDict', | |
'env_id': 'QbertNoFrameskip-v4', | |
'exp_name': 'QbertNoFrameskip-v4-PPO' | |
} | |