plip commited on
Commit
36efee0
1 Parent(s): 24c139c

Training in progress, step 450000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38e96dc800b8071588a25548ce196afd6691c407f883f1ee47078f41d437f6cc
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48da217ea188770413b01ab5847a04b2b361555bd731533e7a2791bedfd928dd
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5997fc3c43be6a9667fa07314f7e899550168c519b5f1cc4e4addad00b4f02db
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2291b66065799571b4264696030000d7ec1a0e86601a7f2aefc345f1b8fac97d
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7888d98a0e63278076119528e69fae749a8a6dd4d788341bc14abb08d306def4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b5ef0740ac3df960078bba8a97e2da86f6b2673b0404aac1c245b336272b522
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.213619450532647,
5
- "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8806,11 +8806,211 @@
8806
  "eval_samples_per_second": 822.887,
8807
  "eval_steps_per_second": 13.166,
8808
  "step": 440000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8809
  }
8810
  ],
8811
  "max_steps": 500000,
8812
  "num_train_epochs": 13,
8813
- "total_flos": 1.4057285838034505e+22,
8814
  "trial_name": null,
8815
  "trial_params": null
8816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.468474438044753,
5
+ "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8806
  "eval_samples_per_second": 822.887,
8807
  "eval_steps_per_second": 13.166,
8808
  "step": 440000
8809
+ },
8810
+ {
8811
+ "epoch": 11.23,
8812
+ "learning_rate": 2.1083388335824145e-05,
8813
+ "loss": 0.2689,
8814
+ "step": 440500
8815
+ },
8816
+ {
8817
+ "epoch": 11.24,
8818
+ "learning_rate": 2.0900255633978873e-05,
8819
+ "loss": 0.2689,
8820
+ "step": 441000
8821
+ },
8822
+ {
8823
+ "epoch": 11.24,
8824
+ "eval_loss": 0.8037525415420532,
8825
+ "eval_runtime": 1.2548,
8826
+ "eval_samples_per_second": 796.966,
8827
+ "eval_steps_per_second": 12.751,
8828
+ "step": 441000
8829
+ },
8830
+ {
8831
+ "epoch": 11.25,
8832
+ "learning_rate": 2.0718589425453314e-05,
8833
+ "loss": 0.2686,
8834
+ "step": 441500
8835
+ },
8836
+ {
8837
+ "epoch": 11.26,
8838
+ "learning_rate": 2.0538391696920015e-05,
8839
+ "loss": 0.2687,
8840
+ "step": 442000
8841
+ },
8842
+ {
8843
+ "epoch": 11.26,
8844
+ "eval_loss": 0.8066098690032959,
8845
+ "eval_runtime": 1.3012,
8846
+ "eval_samples_per_second": 768.545,
8847
+ "eval_steps_per_second": 12.297,
8848
+ "step": 442000
8849
+ },
8850
+ {
8851
+ "epoch": 11.28,
8852
+ "learning_rate": 2.035966441899249e-05,
8853
+ "loss": 0.2687,
8854
+ "step": 442500
8855
+ },
8856
+ {
8857
+ "epoch": 11.29,
8858
+ "learning_rate": 2.0182409546203555e-05,
8859
+ "loss": 0.2688,
8860
+ "step": 443000
8861
+ },
8862
+ {
8863
+ "epoch": 11.29,
8864
+ "eval_loss": 0.812473714351654,
8865
+ "eval_runtime": 1.3079,
8866
+ "eval_samples_per_second": 764.56,
8867
+ "eval_steps_per_second": 12.233,
8868
+ "step": 443000
8869
+ },
8870
+ {
8871
+ "epoch": 11.3,
8872
+ "learning_rate": 2.000662901698415e-05,
8873
+ "loss": 0.2689,
8874
+ "step": 443500
8875
+ },
8876
+ {
8877
+ "epoch": 11.32,
8878
+ "learning_rate": 1.983232475364195e-05,
8879
+ "loss": 0.2686,
8880
+ "step": 444000
8881
+ },
8882
+ {
8883
+ "epoch": 11.32,
8884
+ "eval_loss": 0.8055201768875122,
8885
+ "eval_runtime": 1.3423,
8886
+ "eval_samples_per_second": 744.998,
8887
+ "eval_steps_per_second": 11.92,
8888
+ "step": 444000
8889
+ },
8890
+ {
8891
+ "epoch": 11.33,
8892
+ "learning_rate": 1.9659498662340474e-05,
8893
+ "loss": 0.2686,
8894
+ "step": 444500
8895
+ },
8896
+ {
8897
+ "epoch": 11.34,
8898
+ "learning_rate": 1.948815263307819e-05,
8899
+ "loss": 0.2686,
8900
+ "step": 445000
8901
+ },
8902
+ {
8903
+ "epoch": 11.34,
8904
+ "eval_loss": 0.8064922094345093,
8905
+ "eval_runtime": 1.2899,
8906
+ "eval_samples_per_second": 775.27,
8907
+ "eval_steps_per_second": 12.404,
8908
+ "step": 445000
8909
+ },
8910
+ {
8911
+ "epoch": 11.35,
8912
+ "learning_rate": 1.9318288539667765e-05,
8913
+ "loss": 0.2686,
8914
+ "step": 445500
8915
+ },
8916
+ {
8917
+ "epoch": 11.37,
8918
+ "learning_rate": 1.914990823971574e-05,
8919
+ "loss": 0.2685,
8920
+ "step": 446000
8921
+ },
8922
+ {
8923
+ "epoch": 11.37,
8924
+ "eval_loss": 0.813357412815094,
8925
+ "eval_runtime": 1.286,
8926
+ "eval_samples_per_second": 777.596,
8927
+ "eval_steps_per_second": 12.442,
8928
+ "step": 446000
8929
+ },
8930
+ {
8931
+ "epoch": 11.38,
8932
+ "learning_rate": 1.8983013574602096e-05,
8933
+ "loss": 0.2687,
8934
+ "step": 446500
8935
+ },
8936
+ {
8937
+ "epoch": 11.39,
8938
+ "learning_rate": 1.8817606369460156e-05,
8939
+ "loss": 0.2684,
8940
+ "step": 447000
8941
+ },
8942
+ {
8943
+ "epoch": 11.39,
8944
+ "eval_loss": 0.806776225566864,
8945
+ "eval_runtime": 1.2826,
8946
+ "eval_samples_per_second": 779.686,
8947
+ "eval_steps_per_second": 12.475,
8948
+ "step": 447000
8949
+ },
8950
+ {
8951
+ "epoch": 11.4,
8952
+ "learning_rate": 1.865368843315663e-05,
8953
+ "loss": 0.2684,
8954
+ "step": 447500
8955
+ },
8956
+ {
8957
+ "epoch": 11.42,
8958
+ "learning_rate": 1.8491261558271762e-05,
8959
+ "loss": 0.2683,
8960
+ "step": 448000
8961
+ },
8962
+ {
8963
+ "epoch": 11.42,
8964
+ "eval_loss": 0.808623194694519,
8965
+ "eval_runtime": 1.2653,
8966
+ "eval_samples_per_second": 790.357,
8967
+ "eval_steps_per_second": 12.646,
8968
+ "step": 448000
8969
+ },
8970
+ {
8971
+ "epoch": 11.43,
8972
+ "learning_rate": 1.833032752107986e-05,
8973
+ "loss": 0.2682,
8974
+ "step": 448500
8975
+ },
8976
+ {
8977
+ "epoch": 11.44,
8978
+ "learning_rate": 1.817088808152978e-05,
8979
+ "loss": 0.2684,
8980
+ "step": 449000
8981
+ },
8982
+ {
8983
+ "epoch": 11.44,
8984
+ "eval_loss": 0.8024921417236328,
8985
+ "eval_runtime": 1.2578,
8986
+ "eval_samples_per_second": 795.067,
8987
+ "eval_steps_per_second": 12.721,
8988
+ "step": 449000
8989
+ },
8990
+ {
8991
+ "epoch": 11.46,
8992
+ "learning_rate": 1.801294498322569e-05,
8993
+ "loss": 0.2685,
8994
+ "step": 449500
8995
+ },
8996
+ {
8997
+ "epoch": 11.47,
8998
+ "learning_rate": 1.7856499953407978e-05,
8999
+ "loss": 0.2682,
9000
+ "step": 450000
9001
+ },
9002
+ {
9003
+ "epoch": 11.47,
9004
+ "eval_loss": 0.8072661757469177,
9005
+ "eval_runtime": 1.2702,
9006
+ "eval_samples_per_second": 787.258,
9007
+ "eval_steps_per_second": 12.596,
9008
+ "step": 450000
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 13,
9013
+ "total_flos": 1.4376772976809005e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5997fc3c43be6a9667fa07314f7e899550168c519b5f1cc4e4addad00b4f02db
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2291b66065799571b4264696030000d7ec1a0e86601a7f2aefc345f1b8fac97d
3
  size 102501541