MarkelFe commited on
Commit
3ca9f0b
1 Parent(s): 1b239d3

Training in progress, step 390000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2310ccd274183453ec41297ed5b4fbe0708a6ec9bca366b103dfcf65aeab3e97
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdba09a21a6fe137d8ba359aac9297fdf4dbf33d9b301f28af9c14375f6817c1
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:070b24fba265a1068a427c0c24eff65c5db228aea33e6a3f577c8ae63bc979f8
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c2d59896d59a19db4a7827a3260af1e2736fb9670920d9776b8ffe7675ce78
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf8d2b73d4b0feab91274f36ef211767933c8e416500bcbb275242b4c476138
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b771a1fe99a5de1f25e62006cef44ab6aa7ae84aa666df6010f762208efae301
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.997064115920068,
5
- "global_step": 380000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4876,11 +4876,139 @@
4876
  "eval_samples_per_second": 166.019,
4877
  "eval_steps_per_second": 20.757,
4878
  "step": 380000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4879
  }
4880
  ],
4881
  "max_steps": 633540,
4882
  "num_train_epochs": 15,
4883
- "total_flos": 8.0235708749568e+16,
4884
  "trial_name": null,
4885
  "trial_params": null
4886
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.23382896107586,
5
+ "global_step": 390000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4876
  "eval_samples_per_second": 166.019,
4877
  "eval_steps_per_second": 20.757,
4878
  "step": 380000
4879
+ },
4880
+ {
4881
+ "epoch": 9.01,
4882
+ "learning_rate": 0.0,
4883
+ "loss": 2.4446,
4884
+ "step": 380500
4885
+ },
4886
+ {
4887
+ "epoch": 9.02,
4888
+ "learning_rate": 0.0,
4889
+ "loss": 2.4163,
4890
+ "step": 381000
4891
+ },
4892
+ {
4893
+ "epoch": 9.03,
4894
+ "learning_rate": 0.0,
4895
+ "loss": 2.4384,
4896
+ "step": 381500
4897
+ },
4898
+ {
4899
+ "epoch": 9.04,
4900
+ "learning_rate": 0.0,
4901
+ "loss": 2.4467,
4902
+ "step": 382000
4903
+ },
4904
+ {
4905
+ "epoch": 9.06,
4906
+ "learning_rate": 0.0,
4907
+ "loss": 2.4226,
4908
+ "step": 382500
4909
+ },
4910
+ {
4911
+ "epoch": 9.07,
4912
+ "learning_rate": 0.0,
4913
+ "loss": 2.4246,
4914
+ "step": 383000
4915
+ },
4916
+ {
4917
+ "epoch": 9.08,
4918
+ "learning_rate": 0.0,
4919
+ "loss": 2.4276,
4920
+ "step": 383500
4921
+ },
4922
+ {
4923
+ "epoch": 9.09,
4924
+ "learning_rate": 0.0,
4925
+ "loss": 2.4267,
4926
+ "step": 384000
4927
+ },
4928
+ {
4929
+ "epoch": 9.1,
4930
+ "learning_rate": 0.0,
4931
+ "loss": 2.4246,
4932
+ "step": 384500
4933
+ },
4934
+ {
4935
+ "epoch": 9.12,
4936
+ "learning_rate": 0.0,
4937
+ "loss": 2.4199,
4938
+ "step": 385000
4939
+ },
4940
+ {
4941
+ "epoch": 9.13,
4942
+ "learning_rate": 0.0,
4943
+ "loss": 2.4252,
4944
+ "step": 385500
4945
+ },
4946
+ {
4947
+ "epoch": 9.14,
4948
+ "learning_rate": 0.0,
4949
+ "loss": 2.4306,
4950
+ "step": 386000
4951
+ },
4952
+ {
4953
+ "epoch": 9.15,
4954
+ "learning_rate": 0.0,
4955
+ "loss": 2.4269,
4956
+ "step": 386500
4957
+ },
4958
+ {
4959
+ "epoch": 9.16,
4960
+ "learning_rate": 0.0,
4961
+ "loss": 2.4259,
4962
+ "step": 387000
4963
+ },
4964
+ {
4965
+ "epoch": 9.17,
4966
+ "learning_rate": 0.0,
4967
+ "loss": 2.423,
4968
+ "step": 387500
4969
+ },
4970
+ {
4971
+ "epoch": 9.19,
4972
+ "learning_rate": 0.0,
4973
+ "loss": 2.4288,
4974
+ "step": 388000
4975
+ },
4976
+ {
4977
+ "epoch": 9.2,
4978
+ "learning_rate": 0.0,
4979
+ "loss": 2.446,
4980
+ "step": 388500
4981
+ },
4982
+ {
4983
+ "epoch": 9.21,
4984
+ "learning_rate": 0.0,
4985
+ "loss": 2.4166,
4986
+ "step": 389000
4987
+ },
4988
+ {
4989
+ "epoch": 9.22,
4990
+ "learning_rate": 0.0,
4991
+ "loss": 2.4268,
4992
+ "step": 389500
4993
+ },
4994
+ {
4995
+ "epoch": 9.23,
4996
+ "learning_rate": 0.0,
4997
+ "loss": 2.4334,
4998
+ "step": 390000
4999
+ },
5000
+ {
5001
+ "epoch": 9.23,
5002
+ "eval_loss": 3.1522228717803955,
5003
+ "eval_runtime": 113.0688,
5004
+ "eval_samples_per_second": 166.023,
5005
+ "eval_steps_per_second": 20.757,
5006
+ "step": 390000
5007
  }
5008
  ],
5009
  "max_steps": 633540,
5010
  "num_train_epochs": 15,
5011
+ "total_flos": 8.2348841714688e+16,
5012
  "trial_name": null,
5013
  "trial_params": null
5014
  }