sgoodfriend's picture
PPO playing BreakoutNoFrameskip-v4 from https://github.com/sgoodfriend/rl-algo-impls/tree/5598ebc4b03054f16eebe76792486ba7bcacfc5c
0e56dad
raw
history blame
3.17 kB
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 4e5
policy_hyperparams:
hidden_sizes: [32]
algo_hyperparams:
steps_per_epoch: 4096
pi_lr: 0.01
gamma: 0.99
lam: 1
val_lr: 0.01
train_v_iters: 80
eval_params:
step_freq: !!float 2.5e4
n_episodes: 10
save_best: true
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 1e5
algo_hyperparams:
steps_per_epoch: 1024
pi_lr: 0.01
gamma: 0.99
lam: 1
val_lr: 0.01
train_v_iters: 80
Acrobot-v1:
n_timesteps: !!float 2e5
policy_hyperparams:
hidden_sizes: [32, 32]
algo_hyperparams:
steps_per_epoch: 2048
pi_lr: 0.005
gamma: 0.99
lam: 0.97
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 4e4
n_episodes: 10
save_best: true
LunarLander-v2:
n_timesteps: !!float 4e6
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
steps_per_epoch: 2048
pi_lr: 0.0001
gamma: 0.999
lam: 0.97
val_lr: 0.0001
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
frame_stack: 4
n_envs: 4
vec_env_class: "dummy"
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
steps_per_epoch: 4000
pi_lr: !!float 7e-5
gamma: 0.99
lam: 0.95
val_lr: !!float 1e-4
train_v_iters: 40
max_grad_norm: 0.5
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
policy_hyperparams:
hidden_sizes: [64, 64]
init_layers_orthogonal: true
algo_hyperparams:
steps_per_epoch: 4000
pi_lr: !!float 3e-4
gamma: 0.99
lam: 0.97
val_lr: !!float 1e-3
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 1e5
n_episodes: 10
save_best: true
HopperBulletEnv-v0:
<<: *pybullet-defaults
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
hidden_sizes: [400, 300]
algo_hyperparams:
pi_lr: !!float 7e-4
gamma: 0.99
lam: 0.97
val_lr: !!float 7e-3
train_v_iters: 80
max_grad_norm: 0.5
FrozenLake-v1:
n_timesteps: !!float 8e5
env_params:
make_kwargs:
map_name: 8x8
is_slippery: true
policy_hyperparams:
hidden_sizes: [64]
algo_hyperparams:
steps_per_epoch: 2048
pi_lr: 0.01
gamma: 0.99
lam: 0.98
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
SpaceInvadersNoFrameskip-v4: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams:
frame_stack: 4
no_reward_timeout_steps: 1_000
n_envs: 8
vec_env_class: "subproc"
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
steps_per_epoch: 4096
pi_lr: !!float 1e-4
gamma: 0.99
lam: 0.95
val_lr: !!float 2e-4
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 1e5
n_episodes: 10
save_best: true