default_settings: null | |
behaviors: | |
Worm: | |
trainer_type: ppo | |
hyperparameters: | |
batch_size: 2024 | |
buffer_size: 20240 | |
learning_rate: 0.0003 | |
beta: 0.005 | |
epsilon: 0.2 | |
lambd: 0.95 | |
num_epoch: 3 | |
learning_rate_schedule: linear | |
beta_schedule: linear | |
epsilon_schedule: linear | |
network_settings: | |
normalize: true | |
hidden_units: 512 | |
num_layers: 3 | |
vis_encode_type: simple | |
memory: null | |
goal_conditioning_type: hyper | |
deterministic: false | |
reward_signals: | |
extrinsic: | |
gamma: 0.995 | |
strength: 1.0 | |
network_settings: | |
normalize: false | |
hidden_units: 128 | |
num_layers: 2 | |
vis_encode_type: simple | |
memory: null | |
goal_conditioning_type: hyper | |
deterministic: false | |
init_path: null | |
keep_checkpoints: 5 | |
checkpoint_interval: 500000 | |
max_steps: 3000000 | |
time_horizon: 1000 | |
summary_freq: 30000 | |
threaded: false | |
self_play: null | |
behavioral_cloning: null | |
env_settings: | |
env_path: ./training-envs-executables/linux/Worm/Worm | |
env_args: null | |
base_port: 5005 | |
num_envs: 1 | |
num_areas: 1 | |
seed: -1 | |
max_lifetime_restarts: 10 | |
restarts_rate_limit_n: 1 | |
restarts_rate_limit_period_s: 60 | |
engine_settings: | |
width: 84 | |
height: 84 | |
quality_level: 5 | |
time_scale: 20 | |
target_frame_rate: -1 | |
capture_frame_rate: 60 | |
no_graphics: true | |
environment_parameters: null | |
checkpoint_settings: | |
run_id: WormPPO1 | |
initialize_from: null | |
load_model: false | |
resume: false | |
force: false | |
train_model: false | |
inference: false | |
results_dir: results | |
torch_settings: | |
device: null | |
debug: false | |