Training in progress, step 390000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995605445
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdba09a21a6fe137d8ba359aac9297fdf4dbf33d9b301f28af9c14375f6817c1
|
3 |
size 995605445
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45c2d59896d59a19db4a7827a3260af1e2736fb9670920d9776b8ffe7675ce78
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b771a1fe99a5de1f25e62006cef44ab6aa7ae84aa666df6010f762208efae301
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4876,11 +4876,139 @@
|
|
4876 |
"eval_samples_per_second": 166.019,
|
4877 |
"eval_steps_per_second": 20.757,
|
4878 |
"step": 380000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4879 |
}
|
4880 |
],
|
4881 |
"max_steps": 633540,
|
4882 |
"num_train_epochs": 15,
|
4883 |
-
"total_flos": 8.
|
4884 |
"trial_name": null,
|
4885 |
"trial_params": null
|
4886 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.23382896107586,
|
5 |
+
"global_step": 390000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4876 |
"eval_samples_per_second": 166.019,
|
4877 |
"eval_steps_per_second": 20.757,
|
4878 |
"step": 380000
|
4879 |
+
},
|
4880 |
+
{
|
4881 |
+
"epoch": 9.01,
|
4882 |
+
"learning_rate": 0.0,
|
4883 |
+
"loss": 2.4446,
|
4884 |
+
"step": 380500
|
4885 |
+
},
|
4886 |
+
{
|
4887 |
+
"epoch": 9.02,
|
4888 |
+
"learning_rate": 0.0,
|
4889 |
+
"loss": 2.4163,
|
4890 |
+
"step": 381000
|
4891 |
+
},
|
4892 |
+
{
|
4893 |
+
"epoch": 9.03,
|
4894 |
+
"learning_rate": 0.0,
|
4895 |
+
"loss": 2.4384,
|
4896 |
+
"step": 381500
|
4897 |
+
},
|
4898 |
+
{
|
4899 |
+
"epoch": 9.04,
|
4900 |
+
"learning_rate": 0.0,
|
4901 |
+
"loss": 2.4467,
|
4902 |
+
"step": 382000
|
4903 |
+
},
|
4904 |
+
{
|
4905 |
+
"epoch": 9.06,
|
4906 |
+
"learning_rate": 0.0,
|
4907 |
+
"loss": 2.4226,
|
4908 |
+
"step": 382500
|
4909 |
+
},
|
4910 |
+
{
|
4911 |
+
"epoch": 9.07,
|
4912 |
+
"learning_rate": 0.0,
|
4913 |
+
"loss": 2.4246,
|
4914 |
+
"step": 383000
|
4915 |
+
},
|
4916 |
+
{
|
4917 |
+
"epoch": 9.08,
|
4918 |
+
"learning_rate": 0.0,
|
4919 |
+
"loss": 2.4276,
|
4920 |
+
"step": 383500
|
4921 |
+
},
|
4922 |
+
{
|
4923 |
+
"epoch": 9.09,
|
4924 |
+
"learning_rate": 0.0,
|
4925 |
+
"loss": 2.4267,
|
4926 |
+
"step": 384000
|
4927 |
+
},
|
4928 |
+
{
|
4929 |
+
"epoch": 9.1,
|
4930 |
+
"learning_rate": 0.0,
|
4931 |
+
"loss": 2.4246,
|
4932 |
+
"step": 384500
|
4933 |
+
},
|
4934 |
+
{
|
4935 |
+
"epoch": 9.12,
|
4936 |
+
"learning_rate": 0.0,
|
4937 |
+
"loss": 2.4199,
|
4938 |
+
"step": 385000
|
4939 |
+
},
|
4940 |
+
{
|
4941 |
+
"epoch": 9.13,
|
4942 |
+
"learning_rate": 0.0,
|
4943 |
+
"loss": 2.4252,
|
4944 |
+
"step": 385500
|
4945 |
+
},
|
4946 |
+
{
|
4947 |
+
"epoch": 9.14,
|
4948 |
+
"learning_rate": 0.0,
|
4949 |
+
"loss": 2.4306,
|
4950 |
+
"step": 386000
|
4951 |
+
},
|
4952 |
+
{
|
4953 |
+
"epoch": 9.15,
|
4954 |
+
"learning_rate": 0.0,
|
4955 |
+
"loss": 2.4269,
|
4956 |
+
"step": 386500
|
4957 |
+
},
|
4958 |
+
{
|
4959 |
+
"epoch": 9.16,
|
4960 |
+
"learning_rate": 0.0,
|
4961 |
+
"loss": 2.4259,
|
4962 |
+
"step": 387000
|
4963 |
+
},
|
4964 |
+
{
|
4965 |
+
"epoch": 9.17,
|
4966 |
+
"learning_rate": 0.0,
|
4967 |
+
"loss": 2.423,
|
4968 |
+
"step": 387500
|
4969 |
+
},
|
4970 |
+
{
|
4971 |
+
"epoch": 9.19,
|
4972 |
+
"learning_rate": 0.0,
|
4973 |
+
"loss": 2.4288,
|
4974 |
+
"step": 388000
|
4975 |
+
},
|
4976 |
+
{
|
4977 |
+
"epoch": 9.2,
|
4978 |
+
"learning_rate": 0.0,
|
4979 |
+
"loss": 2.446,
|
4980 |
+
"step": 388500
|
4981 |
+
},
|
4982 |
+
{
|
4983 |
+
"epoch": 9.21,
|
4984 |
+
"learning_rate": 0.0,
|
4985 |
+
"loss": 2.4166,
|
4986 |
+
"step": 389000
|
4987 |
+
},
|
4988 |
+
{
|
4989 |
+
"epoch": 9.22,
|
4990 |
+
"learning_rate": 0.0,
|
4991 |
+
"loss": 2.4268,
|
4992 |
+
"step": 389500
|
4993 |
+
},
|
4994 |
+
{
|
4995 |
+
"epoch": 9.23,
|
4996 |
+
"learning_rate": 0.0,
|
4997 |
+
"loss": 2.4334,
|
4998 |
+
"step": 390000
|
4999 |
+
},
|
5000 |
+
{
|
5001 |
+
"epoch": 9.23,
|
5002 |
+
"eval_loss": 3.1522228717803955,
|
5003 |
+
"eval_runtime": 113.0688,
|
5004 |
+
"eval_samples_per_second": 166.023,
|
5005 |
+
"eval_steps_per_second": 20.757,
|
5006 |
+
"step": 390000
|
5007 |
}
|
5008 |
],
|
5009 |
"max_steps": 633540,
|
5010 |
"num_train_epochs": 15,
|
5011 |
+
"total_flos": 8.2348841714688e+16,
|
5012 |
"trial_name": null,
|
5013 |
"trial_params": null
|
5014 |
}
|