shivakanthsujit's picture
Init Commit
8cc1b26
raw
history blame
No virus
2.16 kB
action_optimizer:
_target_: mbrl.planning.CEMOptimizer
alpha: 0.1
clipped_normal: false
device: cpu:0
elite_ratio: 0.1
lower_bound: ???
num_iterations: 5
population_size: 350
return_mean_elites: true
upper_bound: ???
algorithm:
agent:
_target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
action_space:
_target_: gym.env.Box
high:
- 1.0
low:
- -1.0
shape:
- 1
args:
alpha: 0.2
automatic_entropy_tuning: true
device: cpu:0
gamma: 0.99
hidden_size: 256
lr: 0.0003
policy: Gaussian
target_entropy: -0.05
target_update_interval: 4
tau: 0.005
num_inputs: 4
freq_train_model: 200
initial_exploration_steps: 5000
learned_rewards: true
name: mbpo
normalize: true
normalize_double_precision: true
num_eval_episodes: 1
random_initial_explore: false
real_data_ratio: 0.0
sac_samples_action: true
target_is_delta: true
debug_mode: false
device: cpu:0
dynamics_model:
_target_: mbrl.models.GaussianMLP
activation_fn_cfg:
_target_: torch.nn.SiLU
deterministic: false
device: cpu:0
ensemble_size: 7
hid_size: 200
in_size: 5
learn_logvar_bounds: false
num_layers: 4
out_size: 5
propagation_method: random_model
experiment: default
log_frequency_agent: 1000
overrides:
cem_alpha: 0.1
cem_clipped_normal: false
cem_elite_ratio: 0.1
cem_num_iters: 5
cem_population_size: 350
effective_model_rollouts_per_step: 400
env: cartpole_continuous
epoch_length: 200
freq_train_model: 200
model_batch_size: 256
model_lr: 0.001
model_wd: 5.0e-05
num_elites: 5
num_epochs_to_retain_sac_buffer: 1
num_sac_updates_per_step: 20
num_steps: 5000
patience: 5
planning_horizon: 15
rollout_schedule:
- 1
- 15
- 1
- 1
sac_alpha: 0.2
sac_automatic_entropy_tuning: true
sac_batch_size: 256
sac_gamma: 0.99
sac_hidden_size: 256
sac_lr: 0.0003
sac_policy: Gaussian
sac_target_entropy: -0.05
sac_target_update_interval: 4
sac_tau: 0.005
sac_updates_every_steps: 1
trial_length: 200
validation_ratio: 0.2
root_dir: ./logs
save_video: false
seed: 0