GlycerinLOL commited on
Commit
9190ae2
1 Parent(s): 6cc241b

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +107 -25
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 7.99,
3
- "train_loss": 0.637062493348733,
4
- "train_runtime": 6322.7563,
5
- "train_samples_per_second": 63.264,
6
- "train_steps_per_second": 0.493
7
  }
 
1
  {
2
+ "epoch": 11.99,
3
+ "train_loss": 0.6240079896062867,
4
+ "train_runtime": 9692.0216,
5
+ "train_samples_per_second": 61.907,
6
+ "train_steps_per_second": 0.483
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 7.99,
3
- "train_loss": 0.637062493348733,
4
- "train_runtime": 6322.7563,
5
- "train_samples_per_second": 63.264,
6
- "train_steps_per_second": 0.493
7
  }
 
1
  {
2
+ "epoch": 11.99,
3
+ "train_loss": 0.6240079896062867,
4
+ "train_runtime": 9692.0216,
5
+ "train_samples_per_second": 61.907,
6
+ "train_steps_per_second": 0.483
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.994241842610364,
5
  "eval_steps": 500,
6
- "global_step": 3120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,37 +157,119 @@
157
  "step": 3000
158
  },
159
  {
160
- "epoch": 7.99,
161
- "eval_f1": 0.9081,
162
- "eval_gen_len": 26.465454545454545,
163
- "eval_loss": 1.6796071529388428,
164
- "eval_precision": 0.9092,
165
- "eval_recall": 0.9073,
166
- "eval_rouge1": 0.4613,
167
- "eval_rouge2": 0.2127,
168
- "eval_rougeL": 0.3775,
169
- "eval_rougeLsum": 0.3772,
170
- "eval_runtime": 504.4764,
171
- "eval_samples_per_second": 5.451,
172
- "eval_steps_per_second": 0.341,
173
  "step": 3120
174
  },
175
  {
176
- "epoch": 7.99,
177
- "step": 3120,
178
- "total_flos": 5.768064442218578e+17,
179
- "train_loss": 0.637062493348733,
180
- "train_runtime": 6322.7563,
181
- "train_samples_per_second": 63.264,
182
- "train_steps_per_second": 0.493
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  }
184
  ],
185
  "logging_steps": 500,
186
- "max_steps": 3120,
187
  "num_input_tokens_seen": 0,
188
- "num_train_epochs": 8,
189
  "save_steps": 500,
190
- "total_flos": 5.768064442218578e+17,
191
  "train_batch_size": 32,
192
  "trial_name": null,
193
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.990403071017274,
5
  "eval_steps": 500,
6
+ "global_step": 4680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "step": 3000
158
  },
159
  {
160
+ "epoch": 8.0,
161
+ "eval_f1": 0.9083,
162
+ "eval_gen_len": 26.620363636363635,
163
+ "eval_loss": 1.676251769065857,
164
+ "eval_precision": 0.9094,
165
+ "eval_recall": 0.9076,
166
+ "eval_rouge1": 0.4621,
167
+ "eval_rouge2": 0.2133,
168
+ "eval_rougeL": 0.3791,
169
+ "eval_rougeLsum": 0.3789,
170
+ "eval_runtime": 506.5847,
171
+ "eval_samples_per_second": 5.429,
172
+ "eval_steps_per_second": 0.34,
173
  "step": 3120
174
  },
175
  {
176
+ "epoch": 8.97,
177
+ "learning_rate": 5.042735042735043e-06,
178
+ "loss": 1.7559,
179
+ "step": 3500
180
+ },
181
+ {
182
+ "epoch": 9.0,
183
+ "eval_f1": 0.9086,
184
+ "eval_gen_len": 26.424,
185
+ "eval_loss": 1.666217565536499,
186
+ "eval_precision": 0.9098,
187
+ "eval_recall": 0.9078,
188
+ "eval_rouge1": 0.4632,
189
+ "eval_rouge2": 0.215,
190
+ "eval_rougeL": 0.38,
191
+ "eval_rougeLsum": 0.3799,
192
+ "eval_runtime": 507.5176,
193
+ "eval_samples_per_second": 5.419,
194
+ "eval_steps_per_second": 0.339,
195
+ "step": 3511
196
+ },
197
+ {
198
+ "epoch": 10.0,
199
+ "eval_f1": 0.9089,
200
+ "eval_gen_len": 26.542545454545454,
201
+ "eval_loss": 1.6593670845031738,
202
+ "eval_precision": 0.9099,
203
+ "eval_recall": 0.9082,
204
+ "eval_rouge1": 0.4651,
205
+ "eval_rouge2": 0.2168,
206
+ "eval_rougeL": 0.3812,
207
+ "eval_rougeLsum": 0.3812,
208
+ "eval_runtime": 501.1134,
209
+ "eval_samples_per_second": 5.488,
210
+ "eval_steps_per_second": 0.343,
211
+ "step": 3902
212
+ },
213
+ {
214
+ "epoch": 10.25,
215
+ "learning_rate": 2.9059829059829063e-06,
216
+ "loss": 1.7357,
217
+ "step": 4000
218
+ },
219
+ {
220
+ "epoch": 11.0,
221
+ "eval_f1": 0.9091,
222
+ "eval_gen_len": 26.605090909090908,
223
+ "eval_loss": 1.6554700136184692,
224
+ "eval_precision": 0.91,
225
+ "eval_recall": 0.9086,
226
+ "eval_rouge1": 0.4663,
227
+ "eval_rouge2": 0.2178,
228
+ "eval_rougeL": 0.3824,
229
+ "eval_rougeLsum": 0.3823,
230
+ "eval_runtime": 504.1076,
231
+ "eval_samples_per_second": 5.455,
232
+ "eval_steps_per_second": 0.341,
233
+ "step": 4293
234
+ },
235
+ {
236
+ "epoch": 11.53,
237
+ "learning_rate": 7.692307692307694e-07,
238
+ "loss": 1.7297,
239
+ "step": 4500
240
+ },
241
+ {
242
+ "epoch": 11.99,
243
+ "eval_f1": 0.9092,
244
+ "eval_gen_len": 26.54581818181818,
245
+ "eval_loss": 1.654082179069519,
246
+ "eval_precision": 0.9101,
247
+ "eval_recall": 0.9085,
248
+ "eval_rouge1": 0.4665,
249
+ "eval_rouge2": 0.2182,
250
+ "eval_rougeL": 0.3824,
251
+ "eval_rougeLsum": 0.3824,
252
+ "eval_runtime": 506.4111,
253
+ "eval_samples_per_second": 5.43,
254
+ "eval_steps_per_second": 0.34,
255
+ "step": 4680
256
+ },
257
+ {
258
+ "epoch": 11.99,
259
+ "step": 4680,
260
+ "total_flos": 8.651981084751299e+17,
261
+ "train_loss": 0.6240079896062867,
262
+ "train_runtime": 9692.0216,
263
+ "train_samples_per_second": 61.907,
264
+ "train_steps_per_second": 0.483
265
  }
266
  ],
267
  "logging_steps": 500,
268
+ "max_steps": 4680,
269
  "num_input_tokens_seen": 0,
270
+ "num_train_epochs": 12,
271
  "save_steps": 500,
272
+ "total_flos": 8.651981084751299e+17,
273
  "train_batch_size": 32,
274
  "trial_name": null,
275
  "trial_params": null