File size: 7,629 Bytes
8f6a17a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 |
CartPole-v1: &cartpole-defaults
n_timesteps: !!float 1e5
env_hyperparams:
n_envs: 8
algo_hyperparams:
n_steps: 32
batch_size: 256
n_epochs: 20
gae_lambda: 0.8
gamma: 0.98
ent_coef: 0.0
learning_rate: 0.001
learning_rate_decay: linear
clip_range: 0.2
clip_range_decay: linear
eval_params:
step_freq: !!float 2.5e4
CartPole-v0:
<<: *cartpole-defaults
n_timesteps: !!float 5e4
MountainCar-v0:
n_timesteps: !!float 1e6
env_hyperparams:
normalize: true
n_envs: 16
algo_hyperparams:
n_steps: 16
n_epochs: 4
gae_lambda: 0.98
gamma: 0.99
ent_coef: 0.0
MountainCarContinuous-v0:
n_timesteps: !!float 1e5
env_hyperparams:
normalize: true
n_envs: 4
# policy_hyperparams:
# init_layers_orthogonal: false
# log_std_init: -3.29
# use_sde: true
algo_hyperparams:
n_steps: 512
batch_size: 256
n_epochs: 10
learning_rate: !!float 7.77e-5
ent_coef: 0.01 # 0.00429
ent_coef_decay: linear
clip_range: 0.1
gae_lambda: 0.9
max_grad_norm: 5
vf_coef: 0.19
eval_params:
step_freq: 5000
Acrobot-v1:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
normalize: true
algo_hyperparams:
n_steps: 256
n_epochs: 4
gae_lambda: 0.94
gamma: 0.99
ent_coef: 0.0
LunarLander-v2:
n_timesteps: !!float 1e6
env_hyperparams:
n_envs: 16
algo_hyperparams:
n_steps: 1024
batch_size: 64
n_epochs: 4
gae_lambda: 0.98
gamma: 0.999
ent_coef: 0.01
ent_coef_decay: linear
normalize_advantage: false
CarRacing-v0: &carracing-defaults
n_timesteps: !!float 4e6
env_hyperparams:
n_envs: 8
frame_stack: 4
policy_hyperparams: &carracing-policy-defaults
use_sde: true
log_std_init: -2
init_layers_orthogonal: false
activation_fn: relu
share_features_extractor: false
cnn_feature_dim: 256
hidden_sizes: [256]
algo_hyperparams:
n_steps: 512
batch_size: 128
n_epochs: 10
learning_rate: !!float 1e-4
learning_rate_decay: linear
gamma: 0.99
gae_lambda: 0.95
ent_coef: 0.0
sde_sample_freq: 4
max_grad_norm: 0.5
vf_coef: 0.5
clip_range: 0.2
impala-CarRacing-v0:
<<: *carracing-defaults
env_id: CarRacing-v0
policy_hyperparams:
<<: *carracing-policy-defaults
cnn_style: impala
init_layers_orthogonal: true
cnn_layers_init_orthogonal: false
hidden_sizes: []
# BreakoutNoFrameskip-v4
# PongNoFrameskip-v4
# SpaceInvadersNoFrameskip-v4
# QbertNoFrameskip-v4
_atari: &atari-defaults
n_timesteps: !!float 1e7
env_hyperparams: &atari-env-defaults
n_envs: 8
frame_stack: 4
no_reward_timeout_steps: 1000
no_reward_fire_steps: 500
vec_env_class: subproc
policy_hyperparams: &atari-policy-defaults
activation_fn: relu
algo_hyperparams:
n_steps: 128
batch_size: 256
n_epochs: 4
learning_rate: !!float 2.5e-4
learning_rate_decay: linear
clip_range: 0.1
clip_range_decay: linear
vf_coef: 0.5
ent_coef: 0.01
eval_params:
deterministic: false
debug-PongNoFrameskip-v4:
<<: *atari-defaults
device: cpu
env_id: PongNoFrameskip-v4
env_hyperparams:
<<: *atari-env-defaults
vec_env_class: dummy
_impala-atari: &impala-atari-defaults
<<: *atari-defaults
policy_hyperparams:
<<: *atari-policy-defaults
cnn_style: impala
cnn_feature_dim: 256
init_layers_orthogonal: true
cnn_layers_init_orthogonal: false
impala-PongNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: PongNoFrameskip-v4
impala-BreakoutNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: BreakoutNoFrameskip-v4
impala-SpaceInvadersNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: SpaceInvadersNoFrameskip-v4
impala-QbertNoFrameskip-v4:
<<: *impala-atari-defaults
env_id: QbertNoFrameskip-v4
HalfCheetahBulletEnv-v0: &pybullet-defaults
n_timesteps: !!float 2e6
env_hyperparams: &pybullet-env-defaults
n_envs: 16
normalize: true
policy_hyperparams: &pybullet-policy-defaults
pi_hidden_sizes: [256, 256]
v_hidden_sizes: [256, 256]
activation_fn: relu
algo_hyperparams: &pybullet-algo-defaults
n_steps: 512
batch_size: 128
n_epochs: 20
gamma: 0.99
gae_lambda: 0.9
ent_coef: 0.0
max_grad_norm: 0.5
vf_coef: 0.5
learning_rate: !!float 3e-5
clip_range: 0.4
AntBulletEnv-v0:
<<: *pybullet-defaults
policy_hyperparams:
<<: *pybullet-policy-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
Walker2DBulletEnv-v0:
<<: *pybullet-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
clip_range_decay: linear
HopperBulletEnv-v0:
<<: *pybullet-defaults
algo_hyperparams:
<<: *pybullet-algo-defaults
clip_range_decay: linear
HumanoidBulletEnv-v0:
<<: *pybullet-defaults
n_timesteps: !!float 1e7
env_hyperparams:
<<: *pybullet-env-defaults
n_envs: 8
policy_hyperparams:
<<: *pybullet-policy-defaults
# log_std_init: -1
algo_hyperparams:
<<: *pybullet-algo-defaults
n_steps: 2048
batch_size: 64
n_epochs: 10
gae_lambda: 0.95
learning_rate: !!float 2.5e-4
clip_range: 0.2
_procgen: &procgen-defaults
env_hyperparams: &procgen-env-defaults
is_procgen: true
n_envs: 64
# grayscale: false
# frame_stack: 4
normalize: true # procgen only normalizes reward
policy_hyperparams: &procgen-policy-defaults
activation_fn: relu
cnn_style: impala
cnn_feature_dim: 256
init_layers_orthogonal: true
cnn_layers_init_orthogonal: false
algo_hyperparams: &procgen-algo-defaults
gamma: 0.999
gae_lambda: 0.95
n_steps: 256
batch_size: 2048
n_epochs: 3
ent_coef: 0.01
clip_range: 0.2
# clip_range_decay: linear
clip_range_vf: 0.2
learning_rate: !!float 5e-4
# learning_rate_decay: linear
vf_coef: 0.5
eval_params: &procgen-eval-defaults
ignore_first_episode: true
# deterministic: false
step_freq: !!float 1e5
_procgen-easy: &procgen-easy-defaults
<<: *procgen-defaults
n_timesteps: !!float 25e6
env_hyperparams: &procgen-easy-env-defaults
<<: *procgen-env-defaults
make_kwargs:
distribution_mode: easy
procgen-coinrun-easy: &coinrun-easy-defaults
<<: *procgen-easy-defaults
env_id: coinrun
debug-procgen-coinrun:
<<: *coinrun-easy-defaults
device: cpu
procgen-starpilot-easy:
<<: *procgen-easy-defaults
env_id: starpilot
procgen-bossfight-easy:
<<: *procgen-easy-defaults
env_id: bossfight
procgen-bigfish-easy:
<<: *procgen-easy-defaults
env_id: bigfish
_procgen-hard: &procgen-hard-defaults
<<: *procgen-defaults
n_timesteps: !!float 200e6
env_hyperparams: &procgen-hard-env-defaults
<<: *procgen-env-defaults
n_envs: 256
make_kwargs:
distribution_mode: hard
algo_hyperparams:
<<: *procgen-algo-defaults
batch_size: 8192
eval_params:
<<: *procgen-eval-defaults
step_freq: !!float 5e5
procgen-starpilot-hard: &procgen-starpilot-hard-defaults
<<: *procgen-hard-defaults
env_id: starpilot
procgen-starpilot-hard-2xIMPALA:
<<: *procgen-starpilot-hard-defaults
policy_hyperparams:
<<: *procgen-policy-defaults
impala_channels: [32, 64, 64]
procgen-starpilot-hard-2xIMPALA-fat:
<<: *procgen-starpilot-hard-defaults
policy_hyperparams:
<<: *procgen-policy-defaults
impala_channels: [32, 64, 64]
cnn_feature_dim: 512
procgen-starpilot-hard-4xIMPALA:
<<: *procgen-starpilot-hard-defaults
policy_hyperparams:
<<: *procgen-policy-defaults
impala_channels: [64, 128, 128]
|