|
exp_config = { |
|
'env': { |
|
'manager': { |
|
'episode_num': float("inf"), |
|
'max_retry': 1, |
|
'retry_type': 'reset', |
|
'auto_reset': True, |
|
'step_timeout': None, |
|
'reset_timeout': None, |
|
'retry_waiting_time': 0.1, |
|
'cfg_type': 'BaseEnvManagerDict' |
|
}, |
|
'stop_value': 30, |
|
'n_evaluator_episode': 8, |
|
'env_id': 'PongNoFrameskip-v4', |
|
'collector_env_num': 8, |
|
'evaluator_env_num': 8, |
|
'fram_stack': 4, |
|
'env_wrapper': 'atari_default' |
|
}, |
|
'policy': { |
|
'model': { |
|
'obs_shape': [4, 84, 84], |
|
'action_shape': 6, |
|
'encoder_hidden_size_list': [64, 128, 256], |
|
'critic_head_hidden_size': 256, |
|
'critic_head_layer_num': 2, |
|
'actor_head_hidden_size': 256, |
|
'actor_head_layer_num': 2 |
|
}, |
|
'learn': { |
|
'learner': { |
|
'train_iterations': 1000000000, |
|
'dataloader': { |
|
'num_workers': 0 |
|
}, |
|
'log_policy': True, |
|
'hook': { |
|
'load_ckpt_before_run': '', |
|
'log_show_after_iter': 100, |
|
'save_ckpt_after_iter': 10000, |
|
'save_ckpt_after_run': True |
|
}, |
|
'cfg_type': 'BaseLearnerDict' |
|
}, |
|
'update_per_collect': 2, |
|
'batch_size': 128, |
|
'learning_rate': 0.0006, |
|
'value_weight': 0.5, |
|
'entropy_weight': 0.01, |
|
'discount_factor': 0.99, |
|
'lambda_': 0.95, |
|
'rho_clip_ratio': 1.0, |
|
'c_clip_ratio': 1.0, |
|
'rho_pg_clip_ratio': 1.0, |
|
'grad_clip_type': 'clip_norm', |
|
'clip_value': 0.5 |
|
}, |
|
'collect': { |
|
'collector': { |
|
'collect_print_freq': 1000 |
|
}, |
|
'n_sample': 16, |
|
'unroll_len': 64 |
|
}, |
|
'eval': { |
|
'evaluator': { |
|
'eval_freq': 2000, |
|
'render': { |
|
'render_freq': -1, |
|
'mode': 'train_iter' |
|
}, |
|
'figure_path': None, |
|
'cfg_type': 'InteractionSerialEvaluatorDict', |
|
'stop_value': 30, |
|
'n_episode': 8 |
|
} |
|
}, |
|
'other': { |
|
'replay_buffer': { |
|
'replay_buffer_size': 10000, |
|
'max_use': 16, |
|
'sliced': False |
|
} |
|
}, |
|
'on_policy': False, |
|
'cuda': True, |
|
'multi_gpu': False, |
|
'bp_update_sync': True, |
|
'traj_len_inf': False, |
|
'type': 'impala', |
|
'priority': False, |
|
'priority_IS_weight': False, |
|
'action_space': 'discrete', |
|
'unroll_len': 64, |
|
'transition_with_policy_data': True, |
|
'cfg_type': 'IMPALAPolicyDict' |
|
}, |
|
'exp_name': 'PongNoFrameskip-v4-IMPALA', |
|
'seed': 0, |
|
'wandb_logger': { |
|
'gradient_logger': True, |
|
'video_logger': True, |
|
'plot_logger': True, |
|
'action_logger': True, |
|
'return_logger': False |
|
} |
|
} |
|
|