MarkelFe commited on
Commit
44550eb
1 Parent(s): 09c1301

Training in progress, step 470000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cab0feaeec9b0df081531b11134d4a07974f3e8871469ef1e5978a3fdf4513c
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f95151b405ff6e9b28a316b89062144bf559aa926a03528da9726a9f12203ab4
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b06627b0adf71aa49d708b0fe02640f869165ee333039e8785ac4790ed4b7e03
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80220a02bd1bb4effc31eeb67a63bc1bc002600ac88d646d6d240817c138de29
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40f85b1d724a0d5eeb70a8d3d8c6161d3e8829e289f2cc9ca11e46d533504766
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f20101388fa6eb1fe545871d1e9e63a9edfa1ca0004720ab71ea7f1eab3f9aa
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.891182877166399,
5
- "global_step": 460000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5900,11 +5900,139 @@
5900
  "eval_samples_per_second": 166.297,
5901
  "eval_steps_per_second": 20.792,
5902
  "step": 460000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5903
  }
5904
  ],
5905
  "max_steps": 633540,
5906
  "num_train_epochs": 15,
5907
- "total_flos": 9.7069628570112e+16,
5908
  "trial_name": null,
5909
  "trial_params": null
5910
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.12794772232219,
5
+ "global_step": 470000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5900
  "eval_samples_per_second": 166.297,
5901
  "eval_steps_per_second": 20.792,
5902
  "step": 460000
5903
+ },
5904
+ {
5905
+ "epoch": 10.9,
5906
+ "learning_rate": 0.0,
5907
+ "loss": 2.4369,
5908
+ "step": 460500
5909
+ },
5910
+ {
5911
+ "epoch": 10.91,
5912
+ "learning_rate": 0.0,
5913
+ "loss": 2.431,
5914
+ "step": 461000
5915
+ },
5916
+ {
5917
+ "epoch": 10.93,
5918
+ "learning_rate": 0.0,
5919
+ "loss": 2.4349,
5920
+ "step": 461500
5921
+ },
5922
+ {
5923
+ "epoch": 10.94,
5924
+ "learning_rate": 0.0,
5925
+ "loss": 2.4178,
5926
+ "step": 462000
5927
+ },
5928
+ {
5929
+ "epoch": 10.95,
5930
+ "learning_rate": 0.0,
5931
+ "loss": 2.4334,
5932
+ "step": 462500
5933
+ },
5934
+ {
5935
+ "epoch": 10.96,
5936
+ "learning_rate": 0.0,
5937
+ "loss": 2.4253,
5938
+ "step": 463000
5939
+ },
5940
+ {
5941
+ "epoch": 10.97,
5942
+ "learning_rate": 0.0,
5943
+ "loss": 2.437,
5944
+ "step": 463500
5945
+ },
5946
+ {
5947
+ "epoch": 10.99,
5948
+ "learning_rate": 0.0,
5949
+ "loss": 2.4155,
5950
+ "step": 464000
5951
+ },
5952
+ {
5953
+ "epoch": 11.0,
5954
+ "learning_rate": 0.0,
5955
+ "loss": 2.4418,
5956
+ "step": 464500
5957
+ },
5958
+ {
5959
+ "epoch": 11.01,
5960
+ "learning_rate": 0.0,
5961
+ "loss": 2.4177,
5962
+ "step": 465000
5963
+ },
5964
+ {
5965
+ "epoch": 11.02,
5966
+ "learning_rate": 0.0,
5967
+ "loss": 2.4494,
5968
+ "step": 465500
5969
+ },
5970
+ {
5971
+ "epoch": 11.03,
5972
+ "learning_rate": 0.0,
5973
+ "loss": 2.4259,
5974
+ "step": 466000
5975
+ },
5976
+ {
5977
+ "epoch": 11.05,
5978
+ "learning_rate": 0.0,
5979
+ "loss": 2.4245,
5980
+ "step": 466500
5981
+ },
5982
+ {
5983
+ "epoch": 11.06,
5984
+ "learning_rate": 0.0,
5985
+ "loss": 2.4343,
5986
+ "step": 467000
5987
+ },
5988
+ {
5989
+ "epoch": 11.07,
5990
+ "learning_rate": 0.0,
5991
+ "loss": 2.4279,
5992
+ "step": 467500
5993
+ },
5994
+ {
5995
+ "epoch": 11.08,
5996
+ "learning_rate": 0.0,
5997
+ "loss": 2.4222,
5998
+ "step": 468000
5999
+ },
6000
+ {
6001
+ "epoch": 11.09,
6002
+ "learning_rate": 0.0,
6003
+ "loss": 2.4216,
6004
+ "step": 468500
6005
+ },
6006
+ {
6007
+ "epoch": 11.1,
6008
+ "learning_rate": 0.0,
6009
+ "loss": 2.432,
6010
+ "step": 469000
6011
+ },
6012
+ {
6013
+ "epoch": 11.12,
6014
+ "learning_rate": 0.0,
6015
+ "loss": 2.4287,
6016
+ "step": 469500
6017
+ },
6018
+ {
6019
+ "epoch": 11.13,
6020
+ "learning_rate": 0.0,
6021
+ "loss": 2.4275,
6022
+ "step": 470000
6023
+ },
6024
+ {
6025
+ "epoch": 11.13,
6026
+ "eval_loss": 3.1522228717803955,
6027
+ "eval_runtime": 112.9503,
6028
+ "eval_samples_per_second": 166.197,
6029
+ "eval_steps_per_second": 20.779,
6030
+ "step": 470000
6031
  }
6032
  ],
6033
  "max_steps": 633540,
6034
  "num_train_epochs": 15,
6035
+ "total_flos": 9.9180941677056e+16,
6036
  "trial_name": null,
6037
  "trial_params": null
6038
  }