Training in progress, step 330000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995605445
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3af69497cd7909d18e142fdb28af685d5fd4b6d1152a10069e6f9547b752a3d5
|
3 |
size 995605445
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:926d62aa8c0e8eb933d167c471771b77e84d87c1ca180d71a4153e846dcaa5bc
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dac0a7c6aeb1853f8f712f2c9f2553cbe98523717c3c148ee76debf888894532
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4108,11 +4108,139 @@
|
|
4108 |
"eval_samples_per_second": 166.315,
|
4109 |
"eval_steps_per_second": 20.794,
|
4110 |
"step": 320000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4111 |
}
|
4112 |
],
|
4113 |
"max_steps": 633540,
|
4114 |
"num_train_epochs": 15,
|
4115 |
-
"total_flos": 6.
|
4116 |
"trial_name": null,
|
4117 |
"trial_params": null
|
4118 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.813239890141112,
|
5 |
+
"global_step": 330000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4108 |
"eval_samples_per_second": 166.315,
|
4109 |
"eval_steps_per_second": 20.794,
|
4110 |
"step": 320000
|
4111 |
+
},
|
4112 |
+
{
|
4113 |
+
"epoch": 7.59,
|
4114 |
+
"learning_rate": 0.0,
|
4115 |
+
"loss": 2.4285,
|
4116 |
+
"step": 320500
|
4117 |
+
},
|
4118 |
+
{
|
4119 |
+
"epoch": 7.6,
|
4120 |
+
"learning_rate": 0.0,
|
4121 |
+
"loss": 2.4267,
|
4122 |
+
"step": 321000
|
4123 |
+
},
|
4124 |
+
{
|
4125 |
+
"epoch": 7.61,
|
4126 |
+
"learning_rate": 0.0,
|
4127 |
+
"loss": 2.4389,
|
4128 |
+
"step": 321500
|
4129 |
+
},
|
4130 |
+
{
|
4131 |
+
"epoch": 7.62,
|
4132 |
+
"learning_rate": 0.0,
|
4133 |
+
"loss": 2.4277,
|
4134 |
+
"step": 322000
|
4135 |
+
},
|
4136 |
+
{
|
4137 |
+
"epoch": 7.64,
|
4138 |
+
"learning_rate": 0.0,
|
4139 |
+
"loss": 2.4349,
|
4140 |
+
"step": 322500
|
4141 |
+
},
|
4142 |
+
{
|
4143 |
+
"epoch": 7.65,
|
4144 |
+
"learning_rate": 0.0,
|
4145 |
+
"loss": 2.4376,
|
4146 |
+
"step": 323000
|
4147 |
+
},
|
4148 |
+
{
|
4149 |
+
"epoch": 7.66,
|
4150 |
+
"learning_rate": 0.0,
|
4151 |
+
"loss": 2.4453,
|
4152 |
+
"step": 323500
|
4153 |
+
},
|
4154 |
+
{
|
4155 |
+
"epoch": 7.67,
|
4156 |
+
"learning_rate": 0.0,
|
4157 |
+
"loss": 2.4331,
|
4158 |
+
"step": 324000
|
4159 |
+
},
|
4160 |
+
{
|
4161 |
+
"epoch": 7.68,
|
4162 |
+
"learning_rate": 0.0,
|
4163 |
+
"loss": 2.4418,
|
4164 |
+
"step": 324500
|
4165 |
+
},
|
4166 |
+
{
|
4167 |
+
"epoch": 7.69,
|
4168 |
+
"learning_rate": 0.0,
|
4169 |
+
"loss": 2.4342,
|
4170 |
+
"step": 325000
|
4171 |
+
},
|
4172 |
+
{
|
4173 |
+
"epoch": 7.71,
|
4174 |
+
"learning_rate": 0.0,
|
4175 |
+
"loss": 2.4216,
|
4176 |
+
"step": 325500
|
4177 |
+
},
|
4178 |
+
{
|
4179 |
+
"epoch": 7.72,
|
4180 |
+
"learning_rate": 0.0,
|
4181 |
+
"loss": 2.4335,
|
4182 |
+
"step": 326000
|
4183 |
+
},
|
4184 |
+
{
|
4185 |
+
"epoch": 7.73,
|
4186 |
+
"learning_rate": 0.0,
|
4187 |
+
"loss": 2.4472,
|
4188 |
+
"step": 326500
|
4189 |
+
},
|
4190 |
+
{
|
4191 |
+
"epoch": 7.74,
|
4192 |
+
"learning_rate": 0.0,
|
4193 |
+
"loss": 2.4415,
|
4194 |
+
"step": 327000
|
4195 |
+
},
|
4196 |
+
{
|
4197 |
+
"epoch": 7.75,
|
4198 |
+
"learning_rate": 0.0,
|
4199 |
+
"loss": 2.4462,
|
4200 |
+
"step": 327500
|
4201 |
+
},
|
4202 |
+
{
|
4203 |
+
"epoch": 7.77,
|
4204 |
+
"learning_rate": 0.0,
|
4205 |
+
"loss": 2.4519,
|
4206 |
+
"step": 328000
|
4207 |
+
},
|
4208 |
+
{
|
4209 |
+
"epoch": 7.78,
|
4210 |
+
"learning_rate": 0.0,
|
4211 |
+
"loss": 2.3932,
|
4212 |
+
"step": 328500
|
4213 |
+
},
|
4214 |
+
{
|
4215 |
+
"epoch": 7.79,
|
4216 |
+
"learning_rate": 0.0,
|
4217 |
+
"loss": 2.425,
|
4218 |
+
"step": 329000
|
4219 |
+
},
|
4220 |
+
{
|
4221 |
+
"epoch": 7.8,
|
4222 |
+
"learning_rate": 0.0,
|
4223 |
+
"loss": 2.4297,
|
4224 |
+
"step": 329500
|
4225 |
+
},
|
4226 |
+
{
|
4227 |
+
"epoch": 7.81,
|
4228 |
+
"learning_rate": 0.0,
|
4229 |
+
"loss": 2.4363,
|
4230 |
+
"step": 330000
|
4231 |
+
},
|
4232 |
+
{
|
4233 |
+
"epoch": 7.81,
|
4234 |
+
"eval_loss": 3.1522228717803955,
|
4235 |
+
"eval_runtime": 112.8083,
|
4236 |
+
"eval_samples_per_second": 166.406,
|
4237 |
+
"eval_steps_per_second": 20.805,
|
4238 |
+
"step": 330000
|
4239 |
}
|
4240 |
],
|
4241 |
"max_steps": 633540,
|
4242 |
"num_train_epochs": 15,
|
4243 |
+
"total_flos": 6.971360467392e+16,
|
4244 |
"trial_name": null,
|
4245 |
"trial_params": null
|
4246 |
}
|