File size: 2,163 Bytes
8cc1b26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987de76
 
8cc1b26
 
 
 
 
987de76
 
8cc1b26
 
 
 
 
 
987de76
8cc1b26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
action_optimizer:
  _target_: mbrl.planning.CEMOptimizer
  alpha: 0.1
  clipped_normal: false
  device: cpu:0
  elite_ratio: 0.1
  lower_bound: ???
  num_iterations: 5
  population_size: 350
  return_mean_elites: true
  upper_bound: ???
algorithm:
  agent:
    _target_: mbrl.third_party.pytorch_sac_pranz24.sac.SAC
    action_space:
      _target_: gym.env.Box
      high:
      - 1.0
      low:
      - -1.0
      shape:
      - 1
    args:
      alpha: 0.2
      automatic_entropy_tuning: true
      device: cpu:0
      gamma: 0.99
      hidden_size: 256
      lr: 0.0003
      policy: Gaussian
      target_entropy: -0.05
      target_update_interval: 4
      tau: 0.005
    num_inputs: 4
  freq_train_model: 200
  initial_exploration_steps: 5000
  learned_rewards: true
  name: mbpo
  normalize: true
  normalize_double_precision: true
  num_eval_episodes: 1
  random_initial_explore: false
  real_data_ratio: 0.0
  sac_samples_action: true
  target_is_delta: true
debug_mode: false
device: cpu:0
dynamics_model:
  _target_: mbrl.models.GaussianMLP
  activation_fn_cfg:
    _target_: torch.nn.SiLU
  deterministic: false
  device: cpu:0
  ensemble_size: 7
  hid_size: 200
  in_size: 5
  learn_logvar_bounds: false
  num_layers: 4
  out_size: 5
  propagation_method: random_model
experiment: default
log_frequency_agent: 1000
overrides:
  cem_alpha: 0.1
  cem_clipped_normal: false
  cem_elite_ratio: 0.1
  cem_num_iters: 5
  cem_population_size: 350
  effective_model_rollouts_per_step: 400
  env: cartpole_continuous
  epoch_length: 200
  freq_train_model: 200
  model_batch_size: 256
  model_lr: 0.001
  model_wd: 5.0e-05
  num_elites: 5
  num_epochs_to_retain_sac_buffer: 1
  num_sac_updates_per_step: 20
  num_steps: 5000
  patience: 5
  planning_horizon: 15
  rollout_schedule:
  - 1
  - 15
  - 1
  - 1
  sac_alpha: 0.2
  sac_automatic_entropy_tuning: true
  sac_batch_size: 256
  sac_gamma: 0.99
  sac_hidden_size: 256
  sac_lr: 0.0003
  sac_policy: Gaussian
  sac_target_entropy: -0.05
  sac_target_update_interval: 4
  sac_tau: 0.005
  sac_updates_every_steps: 1
  trial_length: 200
  validation_ratio: 0.2
root_dir: ./logs
save_video: false
seed: 0