MarkelFe commited on
Commit
61ce466
1 Parent(s): de7cac6

Training in progress, step 60000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d898d6ea104f17cbbf27e7461b1d6144bf9fe1ae55eea4068e53eb3839da02a
3
- size 995605189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b1476fc7f63764e7c906982b1b12a6daf7ebe1a4c9eabc5fef44fc57fcc9502
3
+ size 995605445
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a99a09048543fe282b50b60bfea4bb009b9decd582e348ad2d38001d140d18
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06dbc8237a227289c2c378d392e11aa4349c38612caeca6588a0593a85dc6327
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6359c6c031dbde530d264752810896a8f9d4957f508f18193732bd65efe52d
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:238fcc56614b79437ea498441742d1361699701d7545b2165136d90ba11cb014
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b9bb18d023d349a9a0f7837b4603b5fafaa2d673ffb5da28ae0fb78b43c6d7e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b2728bb8093f11b5a52109cc3872bbd75c6945624640e5b69917df439310bd
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1838242257789564,
5
- "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -652,11 +652,139 @@
652
  "eval_samples_per_second": 165.853,
653
  "eval_steps_per_second": 20.736,
654
  "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
  }
656
  ],
657
  "max_steps": 633540,
658
  "num_train_epochs": 15,
659
- "total_flos": 1.0692746654976e+16,
660
  "trial_name": null,
661
  "trial_params": null
662
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.4205890709347475,
5
+ "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
652
  "eval_samples_per_second": 165.853,
653
  "eval_steps_per_second": 20.736,
654
  "step": 50000
655
+ },
656
+ {
657
+ "epoch": 1.2,
658
+ "learning_rate": 3.275726868074628e-05,
659
+ "loss": 3.0453,
660
+ "step": 50500
661
+ },
662
+ {
663
+ "epoch": 1.21,
664
+ "learning_rate": 3.269413138870474e-05,
665
+ "loss": 3.014,
666
+ "step": 51000
667
+ },
668
+ {
669
+ "epoch": 1.22,
670
+ "learning_rate": 3.26309940966632e-05,
671
+ "loss": 3.0549,
672
+ "step": 51500
673
+ },
674
+ {
675
+ "epoch": 1.23,
676
+ "learning_rate": 3.256785680462165e-05,
677
+ "loss": 3.0268,
678
+ "step": 52000
679
+ },
680
+ {
681
+ "epoch": 1.24,
682
+ "learning_rate": 3.250471951258011e-05,
683
+ "loss": 3.0389,
684
+ "step": 52500
685
+ },
686
+ {
687
+ "epoch": 1.25,
688
+ "learning_rate": 3.244158222053857e-05,
689
+ "loss": 3.0315,
690
+ "step": 53000
691
+ },
692
+ {
693
+ "epoch": 1.27,
694
+ "learning_rate": 3.237844492849702e-05,
695
+ "loss": 3.0363,
696
+ "step": 53500
697
+ },
698
+ {
699
+ "epoch": 1.28,
700
+ "learning_rate": 3.231530763645547e-05,
701
+ "loss": 3.0291,
702
+ "step": 54000
703
+ },
704
+ {
705
+ "epoch": 1.29,
706
+ "learning_rate": 3.225217034441393e-05,
707
+ "loss": 3.0257,
708
+ "step": 54500
709
+ },
710
+ {
711
+ "epoch": 1.3,
712
+ "learning_rate": 3.218903305237238e-05,
713
+ "loss": 3.0457,
714
+ "step": 55000
715
+ },
716
+ {
717
+ "epoch": 1.31,
718
+ "learning_rate": 3.212589576033084e-05,
719
+ "loss": 3.0431,
720
+ "step": 55500
721
+ },
722
+ {
723
+ "epoch": 1.33,
724
+ "learning_rate": 3.20627584682893e-05,
725
+ "loss": 3.0591,
726
+ "step": 56000
727
+ },
728
+ {
729
+ "epoch": 1.34,
730
+ "learning_rate": 3.199962117624775e-05,
731
+ "loss": 3.0525,
732
+ "step": 56500
733
+ },
734
+ {
735
+ "epoch": 1.35,
736
+ "learning_rate": 3.193648388420621e-05,
737
+ "loss": 3.0479,
738
+ "step": 57000
739
+ },
740
+ {
741
+ "epoch": 1.36,
742
+ "learning_rate": 3.187334659216466e-05,
743
+ "loss": 3.019,
744
+ "step": 57500
745
+ },
746
+ {
747
+ "epoch": 1.37,
748
+ "learning_rate": 3.181020930012312e-05,
749
+ "loss": 3.0356,
750
+ "step": 58000
751
+ },
752
+ {
753
+ "epoch": 1.39,
754
+ "learning_rate": 3.174707200808158e-05,
755
+ "loss": 3.0549,
756
+ "step": 58500
757
+ },
758
+ {
759
+ "epoch": 1.4,
760
+ "learning_rate": 3.168393471604003e-05,
761
+ "loss": 3.0439,
762
+ "step": 59000
763
+ },
764
+ {
765
+ "epoch": 1.41,
766
+ "learning_rate": 3.162079742399849e-05,
767
+ "loss": 3.0491,
768
+ "step": 59500
769
+ },
770
+ {
771
+ "epoch": 1.42,
772
+ "learning_rate": 3.155766013195695e-05,
773
+ "loss": 3.0409,
774
+ "step": 60000
775
+ },
776
+ {
777
+ "epoch": 1.42,
778
+ "eval_loss": 3.1459972858428955,
779
+ "eval_runtime": 113.0693,
780
+ "eval_samples_per_second": 166.022,
781
+ "eval_steps_per_second": 20.757,
782
+ "step": 60000
783
  }
784
  ],
785
  "max_steps": 633540,
786
  "num_train_epochs": 15,
787
+ "total_flos": 1.2788110741248e+16,
788
  "trial_name": null,
789
  "trial_params": null
790
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a99a09048543fe282b50b60bfea4bb009b9decd582e348ad2d38001d140d18
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06dbc8237a227289c2c378d392e11aa4349c38612caeca6588a0593a85dc6327
3
  size 510398013