GlycerinLOL commited on
Commit
f6d1d9d
1 Parent(s): f831bae

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +106 -24
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 15.99,
3
- "train_loss": 0.4757408973498222,
4
- "train_runtime": 9951.3836,
5
- "train_samples_per_second": 80.391,
6
- "train_steps_per_second": 0.627
7
  }
 
1
  {
2
+ "epoch": 19.99,
3
+ "train_loss": 0.3883497776129307,
4
+ "train_runtime": 10238.8851,
5
+ "train_samples_per_second": 97.667,
6
+ "train_steps_per_second": 0.762
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 15.99,
3
- "train_loss": 0.4757408973498222,
4
- "train_runtime": 9951.3836,
5
- "train_samples_per_second": 80.391,
6
- "train_steps_per_second": 0.627
7
  }
 
1
  {
2
+ "epoch": 19.99,
3
+ "train_loss": 0.3883497776129307,
4
+ "train_runtime": 10238.8851,
5
+ "train_samples_per_second": 97.667,
6
+ "train_steps_per_second": 0.762
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.990403071017274,
5
  "eval_steps": 500,
6
- "global_step": 6240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -321,37 +321,119 @@
321
  "step": 6000
322
  },
323
  {
324
- "epoch": 15.99,
325
- "eval_f1": 0.9096,
326
- "eval_gen_len": 26.52509090909091,
327
- "eval_loss": 1.6378074884414673,
328
- "eval_precision": 0.9107,
329
- "eval_recall": 0.909,
330
- "eval_rouge1": 0.4698,
331
- "eval_rouge2": 0.2197,
332
  "eval_rougeL": 0.385,
333
- "eval_rougeLsum": 0.3849,
334
- "eval_runtime": 501.725,
335
- "eval_samples_per_second": 5.481,
336
- "eval_steps_per_second": 0.343,
337
  "step": 6240
338
  },
339
  {
340
- "epoch": 15.99,
341
- "step": 6240,
342
- "total_flos": 1.153589772728402e+18,
343
- "train_loss": 0.4757408973498222,
344
- "train_runtime": 9951.3836,
345
- "train_samples_per_second": 80.391,
346
- "train_steps_per_second": 0.627
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  }
348
  ],
349
  "logging_steps": 500,
350
- "max_steps": 6240,
351
  "num_input_tokens_seen": 0,
352
- "num_train_epochs": 16,
353
  "save_steps": 500,
354
- "total_flos": 1.153589772728402e+18,
355
  "train_batch_size": 32,
356
  "trial_name": null,
357
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.990403071017276,
5
  "eval_steps": 500,
6
+ "global_step": 7800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
321
  "step": 6000
322
  },
323
  {
324
+ "epoch": 16.0,
325
+ "eval_f1": 0.9097,
326
+ "eval_gen_len": 26.424727272727274,
327
+ "eval_loss": 1.6346381902694702,
328
+ "eval_precision": 0.9108,
329
+ "eval_recall": 0.9089,
330
+ "eval_rouge1": 0.4703,
331
+ "eval_rouge2": 0.2204,
332
  "eval_rougeL": 0.385,
333
+ "eval_rougeLsum": 0.385,
334
+ "eval_runtime": 506.5023,
335
+ "eval_samples_per_second": 5.429,
336
+ "eval_steps_per_second": 0.34,
337
  "step": 6240
338
  },
339
  {
340
+ "epoch": 16.66,
341
+ "learning_rate": 3.3333333333333333e-06,
342
+ "loss": 1.6923,
343
+ "step": 6500
344
+ },
345
+ {
346
+ "epoch": 17.0,
347
+ "eval_f1": 0.9099,
348
+ "eval_gen_len": 26.443636363636365,
349
+ "eval_loss": 1.6305155754089355,
350
+ "eval_precision": 0.911,
351
+ "eval_recall": 0.9091,
352
+ "eval_rouge1": 0.4706,
353
+ "eval_rouge2": 0.221,
354
+ "eval_rougeL": 0.3855,
355
+ "eval_rougeLsum": 0.3856,
356
+ "eval_runtime": 506.0984,
357
+ "eval_samples_per_second": 5.434,
358
+ "eval_steps_per_second": 0.34,
359
+ "step": 6631
360
+ },
361
+ {
362
+ "epoch": 17.94,
363
+ "learning_rate": 2.0512820512820513e-06,
364
+ "loss": 1.6839,
365
+ "step": 7000
366
+ },
367
+ {
368
+ "epoch": 18.0,
369
+ "eval_f1": 0.9098,
370
+ "eval_gen_len": 26.612,
371
+ "eval_loss": 1.6285477876663208,
372
+ "eval_precision": 0.9106,
373
+ "eval_recall": 0.9094,
374
+ "eval_rouge1": 0.4712,
375
+ "eval_rouge2": 0.2215,
376
+ "eval_rougeL": 0.3862,
377
+ "eval_rougeLsum": 0.3864,
378
+ "eval_runtime": 509.5093,
379
+ "eval_samples_per_second": 5.397,
380
+ "eval_steps_per_second": 0.338,
381
+ "step": 7022
382
+ },
383
+ {
384
+ "epoch": 19.0,
385
+ "eval_f1": 0.9099,
386
+ "eval_gen_len": 26.529090909090908,
387
+ "eval_loss": 1.6263303756713867,
388
+ "eval_precision": 0.9108,
389
+ "eval_recall": 0.9093,
390
+ "eval_rouge1": 0.4709,
391
+ "eval_rouge2": 0.2217,
392
+ "eval_rougeL": 0.3862,
393
+ "eval_rougeLsum": 0.3864,
394
+ "eval_runtime": 510.6771,
395
+ "eval_samples_per_second": 5.385,
396
+ "eval_steps_per_second": 0.337,
397
+ "step": 7413
398
+ },
399
+ {
400
+ "epoch": 19.22,
401
+ "learning_rate": 7.692307692307694e-07,
402
+ "loss": 1.6743,
403
+ "step": 7500
404
+ },
405
+ {
406
+ "epoch": 19.99,
407
+ "eval_f1": 0.9099,
408
+ "eval_gen_len": 26.541090909090908,
409
+ "eval_loss": 1.6258454322814941,
410
+ "eval_precision": 0.9108,
411
+ "eval_recall": 0.9093,
412
+ "eval_rouge1": 0.4708,
413
+ "eval_rouge2": 0.2214,
414
+ "eval_rougeL": 0.3861,
415
+ "eval_rougeLsum": 0.3863,
416
+ "eval_runtime": 510.6936,
417
+ "eval_samples_per_second": 5.385,
418
+ "eval_steps_per_second": 0.337,
419
+ "step": 7800
420
+ },
421
+ {
422
+ "epoch": 19.99,
423
+ "step": 7800,
424
+ "total_flos": 1.441981436981674e+18,
425
+ "train_loss": 0.3883497776129307,
426
+ "train_runtime": 10238.8851,
427
+ "train_samples_per_second": 97.667,
428
+ "train_steps_per_second": 0.762
429
  }
430
  ],
431
  "logging_steps": 500,
432
+ "max_steps": 7800,
433
  "num_input_tokens_seen": 0,
434
+ "num_train_epochs": 20,
435
  "save_steps": 500,
436
+ "total_flos": 1.441981436981674e+18,
437
  "train_batch_size": 32,
438
  "trial_name": null,
439
  "trial_params": null