Training in progress, step 500000
Browse files
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995605445
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da5c66e223c9afd97a3b2031f28400ecc3ff5cf48322a645ecb5c3d4ba9e2cc8
|
3 |
size 995605445
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:724fe75b2891e3eb33370e185b7549e8b85ea750d0af5d509737ae3f8927f173
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2934ce8ee53a3f126996335cb7a1d2a9354eb222cdf717375f161332a72ce0b8
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -6284,11 +6284,139 @@
|
|
6284 |
"eval_samples_per_second": 166.429,
|
6285 |
"eval_steps_per_second": 20.808,
|
6286 |
"step": 490000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6287 |
}
|
6288 |
],
|
6289 |
"max_steps": 633540,
|
6290 |
"num_train_epochs": 15,
|
6291 |
-
"total_flos": 1.
|
6292 |
"trial_name": null,
|
6293 |
"trial_params": null
|
6294 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.838242257789563,
|
5 |
+
"global_step": 500000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
6284 |
"eval_samples_per_second": 166.429,
|
6285 |
"eval_steps_per_second": 20.808,
|
6286 |
"step": 490000
|
6287 |
+
},
|
6288 |
+
{
|
6289 |
+
"epoch": 11.61,
|
6290 |
+
"learning_rate": 0.0,
|
6291 |
+
"loss": 2.4271,
|
6292 |
+
"step": 490500
|
6293 |
+
},
|
6294 |
+
{
|
6295 |
+
"epoch": 11.63,
|
6296 |
+
"learning_rate": 0.0,
|
6297 |
+
"loss": 2.4343,
|
6298 |
+
"step": 491000
|
6299 |
+
},
|
6300 |
+
{
|
6301 |
+
"epoch": 11.64,
|
6302 |
+
"learning_rate": 0.0,
|
6303 |
+
"loss": 2.4414,
|
6304 |
+
"step": 491500
|
6305 |
+
},
|
6306 |
+
{
|
6307 |
+
"epoch": 11.65,
|
6308 |
+
"learning_rate": 0.0,
|
6309 |
+
"loss": 2.4335,
|
6310 |
+
"step": 492000
|
6311 |
+
},
|
6312 |
+
{
|
6313 |
+
"epoch": 11.66,
|
6314 |
+
"learning_rate": 0.0,
|
6315 |
+
"loss": 2.4353,
|
6316 |
+
"step": 492500
|
6317 |
+
},
|
6318 |
+
{
|
6319 |
+
"epoch": 11.67,
|
6320 |
+
"learning_rate": 0.0,
|
6321 |
+
"loss": 2.4308,
|
6322 |
+
"step": 493000
|
6323 |
+
},
|
6324 |
+
{
|
6325 |
+
"epoch": 11.68,
|
6326 |
+
"learning_rate": 0.0,
|
6327 |
+
"loss": 2.4395,
|
6328 |
+
"step": 493500
|
6329 |
+
},
|
6330 |
+
{
|
6331 |
+
"epoch": 11.7,
|
6332 |
+
"learning_rate": 0.0,
|
6333 |
+
"loss": 2.4239,
|
6334 |
+
"step": 494000
|
6335 |
+
},
|
6336 |
+
{
|
6337 |
+
"epoch": 11.71,
|
6338 |
+
"learning_rate": 0.0,
|
6339 |
+
"loss": 2.4247,
|
6340 |
+
"step": 494500
|
6341 |
+
},
|
6342 |
+
{
|
6343 |
+
"epoch": 11.72,
|
6344 |
+
"learning_rate": 0.0,
|
6345 |
+
"loss": 2.4229,
|
6346 |
+
"step": 495000
|
6347 |
+
},
|
6348 |
+
{
|
6349 |
+
"epoch": 11.73,
|
6350 |
+
"learning_rate": 0.0,
|
6351 |
+
"loss": 2.4253,
|
6352 |
+
"step": 495500
|
6353 |
+
},
|
6354 |
+
{
|
6355 |
+
"epoch": 11.74,
|
6356 |
+
"learning_rate": 0.0,
|
6357 |
+
"loss": 2.4108,
|
6358 |
+
"step": 496000
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 11.76,
|
6362 |
+
"learning_rate": 0.0,
|
6363 |
+
"loss": 2.4133,
|
6364 |
+
"step": 496500
|
6365 |
+
},
|
6366 |
+
{
|
6367 |
+
"epoch": 11.77,
|
6368 |
+
"learning_rate": 0.0,
|
6369 |
+
"loss": 2.4318,
|
6370 |
+
"step": 497000
|
6371 |
+
},
|
6372 |
+
{
|
6373 |
+
"epoch": 11.78,
|
6374 |
+
"learning_rate": 0.0,
|
6375 |
+
"loss": 2.4255,
|
6376 |
+
"step": 497500
|
6377 |
+
},
|
6378 |
+
{
|
6379 |
+
"epoch": 11.79,
|
6380 |
+
"learning_rate": 0.0,
|
6381 |
+
"loss": 2.4123,
|
6382 |
+
"step": 498000
|
6383 |
+
},
|
6384 |
+
{
|
6385 |
+
"epoch": 11.8,
|
6386 |
+
"learning_rate": 0.0,
|
6387 |
+
"loss": 2.4323,
|
6388 |
+
"step": 498500
|
6389 |
+
},
|
6390 |
+
{
|
6391 |
+
"epoch": 11.81,
|
6392 |
+
"learning_rate": 0.0,
|
6393 |
+
"loss": 2.4014,
|
6394 |
+
"step": 499000
|
6395 |
+
},
|
6396 |
+
{
|
6397 |
+
"epoch": 11.83,
|
6398 |
+
"learning_rate": 0.0,
|
6399 |
+
"loss": 2.4377,
|
6400 |
+
"step": 499500
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 11.84,
|
6404 |
+
"learning_rate": 0.0,
|
6405 |
+
"loss": 2.4497,
|
6406 |
+
"step": 500000
|
6407 |
+
},
|
6408 |
+
{
|
6409 |
+
"epoch": 11.84,
|
6410 |
+
"eval_loss": 3.1522228717803955,
|
6411 |
+
"eval_runtime": 113.0091,
|
6412 |
+
"eval_samples_per_second": 166.11,
|
6413 |
+
"eval_steps_per_second": 20.768,
|
6414 |
+
"step": 500000
|
6415 |
}
|
6416 |
],
|
6417 |
"max_steps": 633540,
|
6418 |
"num_train_epochs": 15,
|
6419 |
+
"total_flos": 1.0549596386304e+17,
|
6420 |
"trial_name": null,
|
6421 |
"trial_params": null
|
6422 |
}
|