GlycerinLOL commited on
Commit
6b7df18
1 Parent(s): 5cfa814

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +156 -24
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 24.0,
3
- "train_loss": 0.22391605226564926,
4
- "train_runtime": 17118.5241,
5
- "train_samples_per_second": 70.1,
6
- "train_steps_per_second": 0.73
7
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "train_loss": 0.1497309269236969,
4
+ "train_runtime": 15515.0349,
5
+ "train_samples_per_second": 96.68,
6
+ "train_steps_per_second": 1.007
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 24.0,
3
- "train_loss": 0.22391605226564926,
4
- "train_runtime": 17118.5241,
5
- "train_samples_per_second": 70.1,
6
- "train_steps_per_second": 0.73
7
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "train_loss": 0.1497309269236969,
4
+ "train_runtime": 15515.0349,
5
+ "train_samples_per_second": 96.68,
6
+ "train_steps_per_second": 1.007
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 24.0,
5
  "eval_steps": 500,
6
- "global_step": 12504,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -528,36 +528,168 @@
528
  },
529
  {
530
  "epoch": 24.0,
531
- "eval_f1": 0.9034,
532
- "eval_gen_len": 19.90290909090909,
533
- "eval_loss": 1.6053136587142944,
534
- "eval_precision": 0.9159,
535
- "eval_recall": 0.8916,
536
- "eval_rouge1": 0.4481,
537
- "eval_rouge2": 0.2283,
538
- "eval_rougeL": 0.3861,
539
- "eval_rougeLsum": 0.3863,
540
- "eval_runtime": 314.5795,
541
- "eval_samples_per_second": 8.742,
542
- "eval_steps_per_second": 0.547,
543
  "step": 12504
544
  },
545
  {
546
- "epoch": 24.0,
547
- "step": 12504,
548
- "total_flos": 2.569106349028344e+18,
549
- "train_loss": 0.22391605226564926,
550
- "train_runtime": 17118.5241,
551
- "train_samples_per_second": 70.1,
552
- "train_steps_per_second": 0.73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553
  }
554
  ],
555
  "logging_steps": 500,
556
- "max_steps": 12504,
557
  "num_input_tokens_seen": 0,
558
- "num_train_epochs": 24,
559
  "save_steps": 500,
560
- "total_flos": 2.569106349028344e+18,
561
  "train_batch_size": 24,
562
  "trial_name": null,
563
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 15630,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
528
  },
529
  {
530
  "epoch": 24.0,
531
+ "eval_f1": 0.9037,
532
+ "eval_gen_len": 19.900727272727273,
533
+ "eval_loss": 1.6096539497375488,
534
+ "eval_precision": 0.9162,
535
+ "eval_recall": 0.892,
536
+ "eval_rouge1": 0.4491,
537
+ "eval_rouge2": 0.2284,
538
+ "eval_rougeL": 0.3872,
539
+ "eval_rougeLsum": 0.387,
540
+ "eval_runtime": 312.9411,
541
+ "eval_samples_per_second": 8.788,
542
+ "eval_steps_per_second": 0.55,
543
  "step": 12504
544
  },
545
  {
546
+ "epoch": 24.95,
547
+ "learning_rate": 3.3653230966090854e-06,
548
+ "loss": 0.7733,
549
+ "step": 13000
550
+ },
551
+ {
552
+ "epoch": 25.0,
553
+ "eval_f1": 0.9027,
554
+ "eval_gen_len": 19.91781818181818,
555
+ "eval_loss": 1.6059536933898926,
556
+ "eval_precision": 0.9154,
557
+ "eval_recall": 0.8906,
558
+ "eval_rouge1": 0.4442,
559
+ "eval_rouge2": 0.2257,
560
+ "eval_rougeL": 0.3827,
561
+ "eval_rougeLsum": 0.3828,
562
+ "eval_runtime": 312.8395,
563
+ "eval_samples_per_second": 8.79,
564
+ "eval_steps_per_second": 0.55,
565
+ "step": 13025
566
+ },
567
+ {
568
+ "epoch": 25.91,
569
+ "learning_rate": 2.72552783109405e-06,
570
+ "loss": 0.7631,
571
+ "step": 13500
572
+ },
573
+ {
574
+ "epoch": 26.0,
575
+ "eval_f1": 0.9031,
576
+ "eval_gen_len": 19.917454545454547,
577
+ "eval_loss": 1.618681788444519,
578
+ "eval_precision": 0.9154,
579
+ "eval_recall": 0.8915,
580
+ "eval_rouge1": 0.4472,
581
+ "eval_rouge2": 0.2276,
582
+ "eval_rougeL": 0.3861,
583
+ "eval_rougeLsum": 0.3861,
584
+ "eval_runtime": 312.5385,
585
+ "eval_samples_per_second": 8.799,
586
+ "eval_steps_per_second": 0.55,
587
+ "step": 13546
588
+ },
589
+ {
590
+ "epoch": 26.87,
591
+ "learning_rate": 2.085732565579015e-06,
592
+ "loss": 0.7505,
593
+ "step": 14000
594
+ },
595
+ {
596
+ "epoch": 27.0,
597
+ "eval_f1": 0.9031,
598
+ "eval_gen_len": 19.896727272727272,
599
+ "eval_loss": 1.620802402496338,
600
+ "eval_precision": 0.9155,
601
+ "eval_recall": 0.8914,
602
+ "eval_rouge1": 0.4463,
603
+ "eval_rouge2": 0.227,
604
+ "eval_rougeL": 0.3852,
605
+ "eval_rougeLsum": 0.3851,
606
+ "eval_runtime": 310.9302,
607
+ "eval_samples_per_second": 8.844,
608
+ "eval_steps_per_second": 0.553,
609
+ "step": 14067
610
+ },
611
+ {
612
+ "epoch": 27.83,
613
+ "learning_rate": 1.4459373000639796e-06,
614
+ "loss": 0.7413,
615
+ "step": 14500
616
+ },
617
+ {
618
+ "epoch": 28.0,
619
+ "eval_f1": 0.9032,
620
+ "eval_gen_len": 19.91527272727273,
621
+ "eval_loss": 1.623663306236267,
622
+ "eval_precision": 0.9159,
623
+ "eval_recall": 0.8912,
624
+ "eval_rouge1": 0.4468,
625
+ "eval_rouge2": 0.2273,
626
+ "eval_rougeL": 0.3854,
627
+ "eval_rougeLsum": 0.3853,
628
+ "eval_runtime": 311.7716,
629
+ "eval_samples_per_second": 8.821,
630
+ "eval_steps_per_second": 0.552,
631
+ "step": 14588
632
+ },
633
+ {
634
+ "epoch": 28.79,
635
+ "learning_rate": 8.061420345489445e-07,
636
+ "loss": 0.7348,
637
+ "step": 15000
638
+ },
639
+ {
640
+ "epoch": 29.0,
641
+ "eval_f1": 0.9035,
642
+ "eval_gen_len": 19.893818181818183,
643
+ "eval_loss": 1.6312142610549927,
644
+ "eval_precision": 0.9158,
645
+ "eval_recall": 0.8918,
646
+ "eval_rouge1": 0.4482,
647
+ "eval_rouge2": 0.2268,
648
+ "eval_rougeL": 0.3858,
649
+ "eval_rougeLsum": 0.3858,
650
+ "eval_runtime": 310.3349,
651
+ "eval_samples_per_second": 8.861,
652
+ "eval_steps_per_second": 0.554,
653
+ "step": 15109
654
+ },
655
+ {
656
+ "epoch": 29.75,
657
+ "learning_rate": 1.6634676903390917e-07,
658
+ "loss": 0.7286,
659
+ "step": 15500
660
+ },
661
+ {
662
+ "epoch": 30.0,
663
+ "eval_f1": 0.9033,
664
+ "eval_gen_len": 19.908727272727273,
665
+ "eval_loss": 1.6350260972976685,
666
+ "eval_precision": 0.9156,
667
+ "eval_recall": 0.8915,
668
+ "eval_rouge1": 0.4471,
669
+ "eval_rouge2": 0.2259,
670
+ "eval_rougeL": 0.3846,
671
+ "eval_rougeLsum": 0.3845,
672
+ "eval_runtime": 314.8086,
673
+ "eval_samples_per_second": 8.735,
674
+ "eval_steps_per_second": 0.546,
675
+ "step": 15630
676
+ },
677
+ {
678
+ "epoch": 30.0,
679
+ "step": 15630,
680
+ "total_flos": 3.2113882736270377e+18,
681
+ "train_loss": 0.1497309269236969,
682
+ "train_runtime": 15515.0349,
683
+ "train_samples_per_second": 96.68,
684
+ "train_steps_per_second": 1.007
685
  }
686
  ],
687
  "logging_steps": 500,
688
+ "max_steps": 15630,
689
  "num_input_tokens_seen": 0,
690
+ "num_train_epochs": 30,
691
  "save_steps": 500,
692
+ "total_flos": 3.2113882736270377e+18,
693
  "train_batch_size": 24,
694
  "trial_name": null,
695
  "trial_params": null