johnjim0816
add all benchmarks
62e03a2
|
general_cfg: |
|
algo_name: PPO |
|
device: cpu |
|
env_name: LunarLander-v2 |
|
eval_eps: 10 |
|
eval_per_episode: 5 |
|
load_checkpoint: false |
|
load_path: tasks |
|
max_steps: 1000 |
|
mode: train |
|
mp_backend: mp |
|
new_step_api: true |
|
render: false |
|
save_fig: true |
|
seed: 1 |
|
show_fig: false |
|
test_eps: 20 |
|
train_eps: 600 |
|
wrapper: null |
|
algo_cfg: |
|
actor_hidden_dim: 256 |
|
actor_lr: 0.0003 |
|
continuous: false |
|
critic_hidden_dim: 256 |
|
critic_lr: 0.001 |
|
entropy_coef: 0.01 |
|
eps_clip: 0.2 |
|
gamma: 0.99 |
|
k_epochs: 4 |
|
ppo_type: clip |
|
sgd_batch_size: 32 |
|
train_batch_size: 256 |
|
|