jflotz commited on
Commit
bc40f1b
1 Parent(s): 5d96bbd

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:564dabf1934e46cc7ba6a8d36285b89de4adb73c031fca3ba9a7a2cd768c72d1
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fdc18a3a07dc1829b889359b7e8a5532af1f5ff31ef1547a2371fed9f04234c
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93645087c6aeead636db9d9e4b7cbf206d1b0f761762ede97a32712b0dcff1ba
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f1a20ef47ec44509fdf200456c5d0dab5065c5747882695a1fa6a6507e69b1
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6434323d38390968de269207dfc4f56d03e65926668b055e0435e4eab6a847eb
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d06e0985627e05f775a3ba05c980d5f591b2564a72af07a844dfcb127f862fb
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:196e9b55e2db27c384076a5416088da2b3e045d13b4c3f579694349cd8bb530e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c550723a62ae0c4012004470c1ebd8187b99ec43cbd02d108f19a1f1901da5d8
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.476088508208422,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3806,11 +3806,211 @@
3806
  "eval_samples_per_second": 1031.502,
3807
  "eval_steps_per_second": 16.166,
3808
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3809
  }
3810
  ],
3811
  "max_steps": 250000,
3812
  "num_train_epochs": 12,
3813
- "total_flos": 3.043134998592889e+21,
3814
  "trial_name": null,
3815
  "trial_params": null
3816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.922198429693076,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3806
  "eval_samples_per_second": 1031.502,
3807
  "eval_steps_per_second": 16.166,
3808
  "step": 190000
3809
+ },
3810
+ {
3811
+ "epoch": 8.5,
3812
+ "learning_rate": 9.67486925578087e-05,
3813
+ "loss": 0.3686,
3814
+ "step": 190500
3815
+ },
3816
+ {
3817
+ "epoch": 8.52,
3818
+ "learning_rate": 9.537134606282964e-05,
3819
+ "loss": 0.369,
3820
+ "step": 191000
3821
+ },
3822
+ {
3823
+ "epoch": 8.52,
3824
+ "eval_loss": 0.3414945900440216,
3825
+ "eval_runtime": 2.186,
3826
+ "eval_samples_per_second": 1050.801,
3827
+ "eval_steps_per_second": 16.469,
3828
+ "step": 191000
3829
+ },
3830
+ {
3831
+ "epoch": 8.54,
3832
+ "learning_rate": 9.400316940427652e-05,
3833
+ "loss": 0.3688,
3834
+ "step": 191500
3835
+ },
3836
+ {
3837
+ "epoch": 8.57,
3838
+ "learning_rate": 9.264422243062844e-05,
3839
+ "loss": 0.3685,
3840
+ "step": 192000
3841
+ },
3842
+ {
3843
+ "epoch": 8.57,
3844
+ "eval_loss": 0.34313786029815674,
3845
+ "eval_runtime": 2.2429,
3846
+ "eval_samples_per_second": 1024.133,
3847
+ "eval_steps_per_second": 16.051,
3848
+ "step": 192000
3849
+ },
3850
+ {
3851
+ "epoch": 8.59,
3852
+ "learning_rate": 9.129456458662876e-05,
3853
+ "loss": 0.3684,
3854
+ "step": 192500
3855
+ },
3856
+ {
3857
+ "epoch": 8.61,
3858
+ "learning_rate": 8.995425491068365e-05,
3859
+ "loss": 0.3684,
3860
+ "step": 193000
3861
+ },
3862
+ {
3863
+ "epoch": 8.61,
3864
+ "eval_loss": 0.341512531042099,
3865
+ "eval_runtime": 2.1692,
3866
+ "eval_samples_per_second": 1058.928,
3867
+ "eval_steps_per_second": 16.596,
3868
+ "step": 193000
3869
+ },
3870
+ {
3871
+ "epoch": 8.63,
3872
+ "learning_rate": 8.862335203228025e-05,
3873
+ "loss": 0.3683,
3874
+ "step": 193500
3875
+ },
3876
+ {
3877
+ "epoch": 8.65,
3878
+ "learning_rate": 8.73019141694222e-05,
3879
+ "loss": 0.3683,
3880
+ "step": 194000
3881
+ },
3882
+ {
3883
+ "epoch": 8.65,
3884
+ "eval_loss": 0.3420671820640564,
3885
+ "eval_runtime": 2.2415,
3886
+ "eval_samples_per_second": 1024.765,
3887
+ "eval_steps_per_second": 16.061,
3888
+ "step": 194000
3889
+ },
3890
+ {
3891
+ "epoch": 8.68,
3892
+ "learning_rate": 8.598999912608229e-05,
3893
+ "loss": 0.3681,
3894
+ "step": 194500
3895
+ },
3896
+ {
3897
+ "epoch": 8.7,
3898
+ "learning_rate": 8.468766428967468e-05,
3899
+ "loss": 0.368,
3900
+ "step": 195000
3901
+ },
3902
+ {
3903
+ "epoch": 8.7,
3904
+ "eval_loss": 0.3422170877456665,
3905
+ "eval_runtime": 2.2216,
3906
+ "eval_samples_per_second": 1033.959,
3907
+ "eval_steps_per_second": 16.205,
3908
+ "step": 195000
3909
+ },
3910
+ {
3911
+ "epoch": 8.72,
3912
+ "learning_rate": 8.339496662854397e-05,
3913
+ "loss": 0.3682,
3914
+ "step": 195500
3915
+ },
3916
+ {
3917
+ "epoch": 8.74,
3918
+ "learning_rate": 8.211196268947367e-05,
3919
+ "loss": 0.3719,
3920
+ "step": 196000
3921
+ },
3922
+ {
3923
+ "epoch": 8.74,
3924
+ "eval_loss": 0.3433374762535095,
3925
+ "eval_runtime": 2.2305,
3926
+ "eval_samples_per_second": 1029.81,
3927
+ "eval_steps_per_second": 16.14,
3928
+ "step": 196000
3929
+ },
3930
+ {
3931
+ "epoch": 8.77,
3932
+ "learning_rate": 8.083870859521251e-05,
3933
+ "loss": 0.368,
3934
+ "step": 196500
3935
+ },
3936
+ {
3937
+ "epoch": 8.79,
3938
+ "learning_rate": 7.95752600420192e-05,
3939
+ "loss": 0.3678,
3940
+ "step": 197000
3941
+ },
3942
+ {
3943
+ "epoch": 8.79,
3944
+ "eval_loss": 0.33997857570648193,
3945
+ "eval_runtime": 2.2361,
3946
+ "eval_samples_per_second": 1027.215,
3947
+ "eval_steps_per_second": 16.099,
3948
+ "step": 197000
3949
+ },
3950
+ {
3951
+ "epoch": 8.81,
3952
+ "learning_rate": 7.832167229722666e-05,
3953
+ "loss": 0.3677,
3954
+ "step": 197500
3955
+ },
3956
+ {
3957
+ "epoch": 8.83,
3958
+ "learning_rate": 7.707800019682362e-05,
3959
+ "loss": 0.3675,
3960
+ "step": 198000
3961
+ },
3962
+ {
3963
+ "epoch": 8.83,
3964
+ "eval_loss": 0.3419722616672516,
3965
+ "eval_runtime": 2.2466,
3966
+ "eval_samples_per_second": 1022.412,
3967
+ "eval_steps_per_second": 16.024,
3968
+ "step": 198000
3969
+ },
3970
+ {
3971
+ "epoch": 8.86,
3972
+ "learning_rate": 7.5844298143057e-05,
3973
+ "loss": 0.3676,
3974
+ "step": 198500
3975
+ },
3976
+ {
3977
+ "epoch": 8.88,
3978
+ "learning_rate": 7.462062010205106e-05,
3979
+ "loss": 0.3676,
3980
+ "step": 199000
3981
+ },
3982
+ {
3983
+ "epoch": 8.88,
3984
+ "eval_loss": 0.3425526022911072,
3985
+ "eval_runtime": 2.2092,
3986
+ "eval_samples_per_second": 1039.756,
3987
+ "eval_steps_per_second": 16.296,
3988
+ "step": 199000
3989
+ },
3990
+ {
3991
+ "epoch": 8.9,
3992
+ "learning_rate": 7.340701960144751e-05,
3993
+ "loss": 0.3676,
3994
+ "step": 199500
3995
+ },
3996
+ {
3997
+ "epoch": 8.92,
3998
+ "learning_rate": 7.220354972806392e-05,
3999
+ "loss": 0.3674,
4000
+ "step": 200000
4001
+ },
4002
+ {
4003
+ "epoch": 8.92,
4004
+ "eval_loss": 0.33957439661026,
4005
+ "eval_runtime": 2.28,
4006
+ "eval_samples_per_second": 1007.437,
4007
+ "eval_steps_per_second": 15.789,
4008
+ "step": 200000
4009
  }
4010
  ],
4011
  "max_steps": 250000,
4012
  "num_train_epochs": 12,
4013
+ "total_flos": 3.203303001458693e+21,
4014
  "trial_name": null,
4015
  "trial_params": null
4016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93645087c6aeead636db9d9e4b7cbf206d1b0f761762ede97a32712b0dcff1ba
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f1a20ef47ec44509fdf200456c5d0dab5065c5747882695a1fa6a6507e69b1
3
  size 25761253