ahalev commited on
Commit
d9556aa
1 Parent(s): 278e1df

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +143 -0
config.yaml ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ algo:
2
+ ddpg:
3
+ params:
4
+ target_update_tau: 0.01
5
+ policy:
6
+ exploration:
7
+ sigma: 0.3
8
+ theta: 0.15
9
+ deterministic_params:
10
+ buffer_batch_size: 32
11
+ min_buffer_size: 10000
12
+ n_train_steps: 500
13
+ qf_lr: 0.0001
14
+ steps_per_epoch: 1
15
+ dqn:
16
+ params:
17
+ clip_gradient: 10
18
+ deterministic_eval: true
19
+ double_q: false
20
+ target_update_freq: 2
21
+ policy:
22
+ exploration:
23
+ decay_ratio: 0.5
24
+ max_epsilon: 1.0
25
+ min_epsilon: 0.05
26
+ general_params:
27
+ discount: 0.99
28
+ package: garage
29
+ policy:
30
+ hidden_sizes:
31
+ - 128
32
+ - 128
33
+ pretrained_policy: null
34
+ ppo:
35
+ params:
36
+ center_adv: false
37
+ tanhnormal: false
38
+ pretrain:
39
+ additional_config: null
40
+ algo_to_pretrain: null
41
+ params:
42
+ episodes_per_batch: 10
43
+ loss: log_prob
44
+ policy_lr: 0.01
45
+ pretrain_algo: rbc
46
+ replay_buffer:
47
+ buffer_size: 200000
48
+ rnd:
49
+ batch_size: 64
50
+ bound_reward_weight: cosine
51
+ bound_reward_weight_initial_ratio: 0.999999
52
+ bound_reward_weight_transient_epochs: 10
53
+ hidden_sizes:
54
+ - 64
55
+ - 64
56
+ intrinsic_reward_weight: 0.0001
57
+ n_train_steps: 32
58
+ output_dim: 128
59
+ predictor_lr: 0.001
60
+ standardize_extrinsic_reward: true
61
+ standardize_intrinsic_reward: true
62
+ sampler:
63
+ n_workers: 16
64
+ type: ray
65
+ train:
66
+ batch_size: 50000
67
+ n_epochs: 100
68
+ steps_per_epoch: 32
69
+ type: ppo
70
+ context:
71
+ disable_logging: false
72
+ experiment_name: null
73
+ log_dir:
74
+ from_keys:
75
+ - microgrid.config.scenario
76
+ - microgrid.methods.set_forecaster.forecaster
77
+ - microgrid.methods.set_module_attrs.battery_transition_model
78
+ - context.seed
79
+ - env.domain_randomization.noise_std
80
+ - algo.ppo.tanhnormal
81
+ - algo.rnd.intrinsic_reward_weight
82
+ parent: /home/ahalev/data/GridRL/paper_experiments
83
+ use_existing_dir: false
84
+ seed: 42
85
+ snapshot_gap: 10
86
+ verbose: 0
87
+ wandb:
88
+ api_key_file: ../../local/wandb_api_key.txt
89
+ group: null
90
+ log_density: 1
91
+ plot_baseline:
92
+ - mpc
93
+ - rbc
94
+ username: ahalev
95
+ env:
96
+ cls: DiscreteMicrogridEnv
97
+ domain_randomization:
98
+ noise_std: 0.01
99
+ relative_noise: true
100
+ forced_genset: null
101
+ net_load:
102
+ slack_module: genset
103
+ use: true
104
+ observation_keys:
105
+ - soc
106
+ - net_load
107
+ - import_price_current
108
+ - import_price_forecast_0
109
+ - import_price_forecast_1
110
+ - import_price_forecast_2
111
+ - import_price_forecast_3
112
+ - import_price_forecast_4
113
+ microgrid:
114
+ attributes:
115
+ reward_shaping_func: !BaselineShaper
116
+ baseline_module: false
117
+ module:
118
+ - genset
119
+ - 0
120
+ config:
121
+ scenario: 1
122
+ methods:
123
+ set_forecaster:
124
+ forecast_horizon: 23
125
+ forecaster: 0.0
126
+ forecaster_increase_uncertainty: true
127
+ forecaster_relative_noise: true
128
+ set_module_attrs:
129
+ battery_transition_model: null
130
+ normalized_action_bounds:
131
+ - 0.0
132
+ - 1.0
133
+ trajectory:
134
+ evaluate:
135
+ final_step: -1
136
+ initial_step: 5840
137
+ trajectory_func: null
138
+ train:
139
+ final_step: 5840
140
+ initial_step: 0
141
+ trajectory_func: !FixedLengthStochasticTrajectory
142
+ trajectory_length: 720
143
+ verbose: 1