PPO playing BreakoutNoFrameskip-v4 from https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c
0e56dad
CartPole-v1: &cartpole-defaults | |
n_timesteps: !!float 1e5 | |
env_hyperparams: | |
n_envs: 8 | |
algo_hyperparams: | |
n_steps: 32 | |
batch_size: 256 | |
n_epochs: 20 | |
gae_lambda: 0.8 | |
gamma: 0.98 | |
ent_coef: 0.0 | |
learning_rate: 0.001 | |
learning_rate_decay: linear | |
clip_range: 0.2 | |
clip_range_decay: linear | |
eval_params: | |
step_freq: !!float 2.5e4 | |
n_episodes: 10 | |
save_best: true | |
CartPole-v0: | |
<<: *cartpole-defaults | |
n_timesteps: !!float 5e4 | |
MountainCar-v0: | |
n_timesteps: !!float 1e6 | |
env_hyperparams: | |
normalize: true | |
n_envs: 16 | |
algo_hyperparams: | |
n_steps: 16 | |
n_epochs: 4 | |
gae_lambda: 0.98 | |
gamma: 0.99 | |
ent_coef: 0.0 | |
MountainCarContinuous-v0: | |
n_timesteps: !!float 1e5 | |
env_hyperparams: | |
normalize: true | |
n_envs: 4 | |
policy_hyperparams: | |
init_layers_orthogonal: false | |
# log_std_init: -3.29 | |
algo_hyperparams: | |
n_steps: 512 | |
batch_size: 256 | |
n_epochs: 10 | |
learning_rate: !!float 7.77e-5 | |
ent_coef: 0.01 # 0.00429 | |
ent_coef_decay: linear | |
clip_range: 0.1 | |
gae_lambda: 0.9 | |
max_grad_norm: 5 | |
vf_coef: 0.19 | |
# use_sde: true | |
eval_params: | |
step_freq: 5000 | |
n_episodes: 10 | |
save_best: true | |
Acrobot-v1: | |
n_timesteps: !!float 1e6 | |
env_hyperparams: | |
n_envs: 16 | |
normalize: true | |
algo_hyperparams: | |
n_steps: 256 | |
n_epochs: 4 | |
gae_lambda: 0.94 | |
gamma: 0.99 | |
ent_coef: 0.0 | |
LunarLander-v2: | |
n_timesteps: !!float 1e6 | |
env_hyperparams: | |
n_envs: 16 | |
algo_hyperparams: | |
n_steps: 1024 | |
batch_size: 64 | |
n_epochs: 4 | |
gae_lambda: 0.98 | |
gamma: 0.999 | |
ent_coef: 0.01 | |
ent_coef_decay: linear | |
normalize_advantage: false | |
eval_params: | |
step_freq: !!float 5e4 | |
n_episodes: 10 | |
save_best: true | |
CarRacing-v0: | |
n_timesteps: !!float 4e6 | |
env_hyperparams: | |
n_envs: 8 | |
frame_stack: 4 | |
policy_hyperparams: | |
use_sde: true | |
log_std_init: -2 | |
init_layers_orthogonal: false | |
activation_fn: relu | |
share_features_extractor: false | |
cnn_feature_dim: 256 | |
algo_hyperparams: | |
n_steps: 512 | |
batch_size: 128 | |
n_epochs: 10 | |
learning_rate: !!float 1e-4 | |
learning_rate_decay: linear | |
gamma: 0.99 | |
gae_lambda: 0.95 | |
ent_coef: 0.0 | |
sde_sample_freq: 4 | |
max_grad_norm: 0.5 | |
vf_coef: 0.5 | |
clip_range: 0.2 | |
# BreakoutNoFrameskip-v4 | |
# PongNoFrameskip-v4 | |
# SpaceInvadersNoFrameskip-v4 | |
# QbertNoFrameskip-v4 | |
atari: &atari-defaults | |
n_timesteps: !!float 1e7 | |
policy_hyperparams: | |
activation_fn: relu | |
env_hyperparams: &atari-env-defaults | |
n_envs: 8 | |
frame_stack: 4 | |
no_reward_timeout_steps: 1000 | |
no_reward_fire_steps: 500 | |
vec_env_class: subproc | |
algo_hyperparams: | |
n_steps: 128 | |
batch_size: 256 | |
n_epochs: 4 | |
learning_rate: !!float 2.5e-4 | |
learning_rate_decay: linear | |
clip_range: 0.1 | |
clip_range_decay: linear | |
vf_coef: 0.5 | |
ent_coef: 0.01 | |
eval_params: | |
deterministic: false | |
HalfCheetahBulletEnv-v0: &pybullet-defaults | |
n_timesteps: !!float 2e6 | |
env_hyperparams: &pybullet-env-defaults | |
n_envs: 16 | |
normalize: true | |
policy_hyperparams: &pybullet-policy-defaults | |
pi_hidden_sizes: [256, 256] | |
v_hidden_sizes: [256, 256] | |
activation_fn: relu | |
algo_hyperparams: &pybullet-algo-defaults | |
n_steps: 512 | |
batch_size: 128 | |
n_epochs: 20 | |
gamma: 0.99 | |
gae_lambda: 0.9 | |
ent_coef: 0.0 | |
sde_sample_freq: 4 | |
max_grad_norm: 0.5 | |
vf_coef: 0.5 | |
learning_rate: !!float 3e-5 | |
clip_range: 0.4 | |
AntBulletEnv-v0: | |
<<: *pybullet-defaults | |
policy_hyperparams: | |
<<: *pybullet-policy-defaults | |
algo_hyperparams: | |
<<: *pybullet-algo-defaults | |
Walker2DBulletEnv-v0: | |
<<: *pybullet-defaults | |
algo_hyperparams: | |
<<: *pybullet-algo-defaults | |
clip_range_decay: linear | |
HopperBulletEnv-v0: | |
<<: *pybullet-defaults | |
algo_hyperparams: | |
<<: *pybullet-algo-defaults | |
clip_range_decay: linear | |
HumanoidBulletEnv-v0: | |
<<: *pybullet-defaults | |
n_timesteps: !!float 1e7 | |
env_hyperparams: | |
<<: *pybullet-env-defaults | |
n_envs: 8 | |
policy_hyperparams: | |
<<: *pybullet-policy-defaults | |
# log_std_init: -1 | |
algo_hyperparams: | |
<<: *pybullet-algo-defaults | |
n_steps: 2048 | |
batch_size: 64 | |
n_epochs: 10 | |
gae_lambda: 0.95 | |
learning_rate: !!float 2.5e-4 | |
clip_range: 0.2 | |