File size: 3,170 Bytes
ff8c6a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 4e5
policy_hyperparams:
hidden_sizes: [32]
algo_hyperparams:
steps_per_epoch: 4096
pi_lr: 0.01
gamma: 0.99
lam: 1
val_lr: 0.01
train_v_iters: 80
eval_params:
step_freq: !!float 2.5e4
n_episodes: 10
save_best: true
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 1e5
algo_hyperparams:
steps_per_epoch: 1024
pi_lr: 0.01
gamma: 0.99
lam: 1
val_lr: 0.01
train_v_iters: 80
Acrobot-v1:
n_timesteps: !!float 2e5
policy_hyperparams:
hidden_sizes: [32, 32]
algo_hyperparams:
steps_per_epoch: 2048
pi_lr: 0.005
gamma: 0.99
lam: 0.97
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 4e4
n_episodes: 10
save_best: true
LunarLander-v2:
n_timesteps: !!float 4e6
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
steps_per_epoch: 2048
pi_lr: 0.0001
gamma: 0.999
lam: 0.97
val_lr: 0.0001
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
CarRacing-v0:
n_timesteps: !!float 4e6
env_hyperparams:
frame_stack: 4
n_envs: 4
vec_env_class: "dummy"
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
steps_per_epoch: 4000
pi_lr: !!float 7e-5
gamma: 0.99
lam: 0.95
val_lr: !!float 1e-4
train_v_iters: 40
max_grad_norm: 0.5
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
policy_hyperparams:
hidden_sizes: [64, 64]
init_layers_orthogonal: true
algo_hyperparams:
steps_per_epoch: 4000
pi_lr: !!float 3e-4
gamma: 0.99
lam: 0.97
val_lr: !!float 1e-3
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 1e5
n_episodes: 10
save_best: true
HopperBulletEnv-v0:
<<: *pybullet-defaults
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
hidden_sizes: [400, 300]
algo_hyperparams:
pi_lr: !!float 7e-4
gamma: 0.99
lam: 0.97
val_lr: !!float 7e-3
train_v_iters: 80
max_grad_norm: 0.5
FrozenLake-v1:
n_timesteps: !!float 8e5
env_params:
make_kwargs:
map_name: 8x8
is_slippery: true
policy_hyperparams:
hidden_sizes: [64]
algo_hyperparams:
steps_per_epoch: 2048
pi_lr: 0.01
gamma: 0.99
lam: 0.98
val_lr: 0.01
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 5e4
n_episodes: 10
save_best: true
SpaceInvadersNoFrameskip-v4: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams:
frame_stack: 4
no_reward_timeout_steps: 1_000
n_envs: 8
vec_env_class: "subproc"
policy_hyperparams:
hidden_sizes: [256, 256]
algo_hyperparams:
steps_per_epoch: 4096
pi_lr: !!float 1e-4
gamma: 0.99
lam: 0.95
val_lr: !!float 2e-4
train_v_iters: 80
max_grad_norm: 0.5
eval_params:
step_freq: !!float 1e5
n_episodes: 10
save_best: true
|