sharkMeow commited on
Commit
83a945b
1 Parent(s): dfc4a06

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +5 -5
  3. train_results.json +6 -6
  4. trainer_state.json +87 -12
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 5.574370384216309,
4
- "eval_runtime": 31.2053,
5
- "eval_samples_per_second": 311.326,
6
- "eval_steps_per_second": 7.082,
7
- "total_flos": 2.3359629906138624e+18,
8
- "train_loss": 0.0,
9
- "train_runtime": 3.5615,
10
- "train_samples_per_second": 1660250.627,
11
- "train_steps_per_second": 41528.026
12
  }
 
1
  {
2
+ "epoch": 150.0,
3
+ "eval_loss": 5.543883323669434,
4
+ "eval_runtime": 30.8465,
5
+ "eval_samples_per_second": 314.947,
6
+ "eval_steps_per_second": 7.165,
7
+ "total_flos": 3.5039444859207936e+18,
8
+ "train_loss": 0.007636452434781384,
9
+ "train_runtime": 24973.9811,
10
+ "train_samples_per_second": 355.144,
11
+ "train_steps_per_second": 8.883
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 5.574370384216309,
4
- "eval_runtime": 31.2053,
5
- "eval_samples_per_second": 311.326,
6
- "eval_steps_per_second": 7.082
7
  }
 
1
  {
2
+ "epoch": 150.0,
3
+ "eval_loss": 5.543883323669434,
4
+ "eval_runtime": 30.8465,
5
+ "eval_samples_per_second": 314.947,
6
+ "eval_steps_per_second": 7.165
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 100.0,
3
- "total_flos": 2.3359629906138624e+18,
4
- "train_loss": 0.0,
5
- "train_runtime": 3.5615,
6
- "train_samples_per_second": 1660250.627,
7
- "train_steps_per_second": 41528.026
8
  }
 
1
  {
2
+ "epoch": 150.0,
3
+ "total_flos": 3.5039444859207936e+18,
4
+ "train_loss": 0.007636452434781384,
5
+ "train_runtime": 24973.9811,
6
+ "train_samples_per_second": 355.144,
7
+ "train_steps_per_second": 8.883
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 100.0,
5
  "eval_steps": 14790,
6
- "global_step": 147900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -159,19 +159,94 @@
159
  "step": 147900
160
  },
161
  {
162
- "epoch": 100.0,
163
- "step": 147900,
164
- "total_flos": 2.3359629906138624e+18,
165
- "train_loss": 0.0,
166
- "train_runtime": 3.5615,
167
- "train_samples_per_second": 1660250.627,
168
- "train_steps_per_second": 41528.026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  }
170
  ],
171
  "logging_steps": 14790,
172
- "max_steps": 147900,
173
  "num_input_tokens_seen": 0,
174
- "num_train_epochs": 100,
175
  "save_steps": 500,
176
  "stateful_callbacks": {
177
  "TrainerControl": {
@@ -185,7 +260,7 @@
185
  "attributes": {}
186
  }
187
  },
188
- "total_flos": 2.3359629906138624e+18,
189
  "train_batch_size": 40,
190
  "trial_name": null,
191
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 150.0,
5
  "eval_steps": 14790,
6
+ "global_step": 221850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
159
  "step": 147900
160
  },
161
  {
162
+ "epoch": 110.0,
163
+ "grad_norm": 0.292227566242218,
164
+ "learning_rate": 2.6701825557809337e-06,
165
+ "loss": 0.0243,
166
+ "step": 162690
167
+ },
168
+ {
169
+ "epoch": 110.0,
170
+ "eval_loss": 5.654893398284912,
171
+ "eval_runtime": 30.718,
172
+ "eval_samples_per_second": 316.265,
173
+ "eval_steps_per_second": 7.194,
174
+ "step": 162690
175
+ },
176
+ {
177
+ "epoch": 120.0,
178
+ "grad_norm": 0.000675542454700917,
179
+ "learning_rate": 2.0038314176245213e-06,
180
+ "loss": 0.024,
181
+ "step": 177480
182
+ },
183
+ {
184
+ "epoch": 120.0,
185
+ "eval_loss": 5.59971284866333,
186
+ "eval_runtime": 30.8062,
187
+ "eval_samples_per_second": 315.359,
188
+ "eval_steps_per_second": 7.174,
189
+ "step": 177480
190
+ },
191
+ {
192
+ "epoch": 130.0,
193
+ "grad_norm": 0.00018167876987718046,
194
+ "learning_rate": 1.3374802794681092e-06,
195
+ "loss": 0.0227,
196
+ "step": 192270
197
+ },
198
+ {
199
+ "epoch": 130.0,
200
+ "eval_loss": 5.560417175292969,
201
+ "eval_runtime": 30.9576,
202
+ "eval_samples_per_second": 313.816,
203
+ "eval_steps_per_second": 7.139,
204
+ "step": 192270
205
+ },
206
+ {
207
+ "epoch": 140.0,
208
+ "grad_norm": 0.0007145697018131614,
209
+ "learning_rate": 6.711291413116972e-07,
210
+ "loss": 0.0219,
211
+ "step": 207060
212
+ },
213
+ {
214
+ "epoch": 140.0,
215
+ "eval_loss": 5.54286527633667,
216
+ "eval_runtime": 31.0413,
217
+ "eval_samples_per_second": 312.97,
218
+ "eval_steps_per_second": 7.12,
219
+ "step": 207060
220
+ },
221
+ {
222
+ "epoch": 150.0,
223
+ "grad_norm": 0.0009066470083780587,
224
+ "learning_rate": 4.778003155285103e-09,
225
+ "loss": 0.0217,
226
+ "step": 221850
227
+ },
228
+ {
229
+ "epoch": 150.0,
230
+ "eval_loss": 5.543883323669434,
231
+ "eval_runtime": 30.764,
232
+ "eval_samples_per_second": 315.791,
233
+ "eval_steps_per_second": 7.184,
234
+ "step": 221850
235
+ },
236
+ {
237
+ "epoch": 150.0,
238
+ "step": 221850,
239
+ "total_flos": 3.5039444859207936e+18,
240
+ "train_loss": 0.007636452434781384,
241
+ "train_runtime": 24973.9811,
242
+ "train_samples_per_second": 355.144,
243
+ "train_steps_per_second": 8.883
244
  }
245
  ],
246
  "logging_steps": 14790,
247
+ "max_steps": 221850,
248
  "num_input_tokens_seen": 0,
249
+ "num_train_epochs": 150,
250
  "save_steps": 500,
251
  "stateful_callbacks": {
252
  "TrainerControl": {
 
260
  "attributes": {}
261
  }
262
  },
263
+ "total_flos": 3.5039444859207936e+18,
264
  "train_batch_size": 40,
265
  "trial_name": null,
266
  "trial_params": null