action_optimizer: _target_: mbrl.planning.CEMOptimizer alpha: 0.1 clipped_normal: false device: cpu:0 elite_ratio: 0.1 lower_bound: ??? num_iterations: 5 population_size: 350 return_mean_elites: true upper_bound: ??? algorithm: agent: _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC action_space: _target_: gym.env.Box high: - 1.0 low: - -1.0 shape: - 1 args: alpha: 0.2 automatic_entropy_tuning: true device: cpu:0 gamma: 0.99 hidden_size: 256 lr: 0.0003 policy: Gaussian target_entropy: -0.05 target_update_interval: 4 tau: 0.005 num_inputs: 4 freq_train_model: 200 initial_exploration_steps: 5000 learned_rewards: true name: mbpo normalize: true normalize_double_precision: true num_eval_episodes: 1 random_initial_explore: false real_data_ratio: 0.0 sac_samples_action: true target_is_delta: true debug_mode: false device: cpu:0 dynamics_model: _target_: mbrl.models.GaussianMLP activation_fn_cfg: _target_: torch.nn.SiLU deterministic: false device: cpu:0 ensemble_size: 7 hid_size: 200 in_size: 5 learn_logvar_bounds: false num_layers: 4 out_size: 5 propagation_method: random_model experiment: default log_frequency_agent: 1000 overrides: cem_alpha: 0.1 cem_clipped_normal: false cem_elite_ratio: 0.1 cem_num_iters: 5 cem_population_size: 350 effective_model_rollouts_per_step: 400 env: cartpole_continuous epoch_length: 200 freq_train_model: 200 model_batch_size: 256 model_lr: 0.001 model_wd: 5.0e-05 num_elites: 5 num_epochs_to_retain_sac_buffer: 1 num_sac_updates_per_step: 20 num_steps: 5000 patience: 5 planning_horizon: 15 rollout_schedule: - 1 - 15 - 1 - 1 sac_alpha: 0.2 sac_automatic_entropy_tuning: true sac_batch_size: 256 sac_gamma: 0.99 sac_hidden_size: 256 sac_lr: 0.0003 sac_policy: Gaussian sac_target_entropy: -0.05 sac_target_update_interval: 4 sac_tau: 0.005 sac_updates_every_steps: 1 trial_length: 200 validation_ratio: 0.2 root_dir: ./logs save_video: false seed: 0