Training in progress, step 330000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:54534335b10706006d453ec56a979a5d5c461f57d0f68d1a6ad0a77c57a0aecf
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ea75c11d2eaacb0e2bbd235a00be3c1d10c576c2381b03ccce251e6969bea54
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:636cb28fce30ad56f68aface20193360fd815697da4c2ec39f5ca647b5e6b45b
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 8.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -6406,11 +6406,211 @@
|
|
6406 |
"eval_samples_per_second": 802.848,
|
6407 |
"eval_steps_per_second": 12.846,
|
6408 |
"step": 320000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6409 |
}
|
6410 |
],
|
6411 |
"max_steps": 500000,
|
6412 |
"num_train_epochs": 13,
|
6413 |
-
"total_flos": 1.
|
6414 |
"trial_name": null,
|
6415 |
"trial_params": null
|
6416 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.410214587899485,
|
5 |
+
"global_step": 330000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
6406 |
"eval_samples_per_second": 802.848,
|
6407 |
"eval_steps_per_second": 12.846,
|
6408 |
"step": 320000
|
6409 |
+
},
|
6410 |
+
{
|
6411 |
+
"epoch": 8.17,
|
6412 |
+
"learning_rate": 0.00010073144655927253,
|
6413 |
+
"loss": 0.2809,
|
6414 |
+
"step": 320500
|
6415 |
+
},
|
6416 |
+
{
|
6417 |
+
"epoch": 8.18,
|
6418 |
+
"learning_rate": 0.0001002870876838929,
|
6419 |
+
"loss": 0.28,
|
6420 |
+
"step": 321000
|
6421 |
+
},
|
6422 |
+
{
|
6423 |
+
"epoch": 8.18,
|
6424 |
+
"eval_loss": 0.8134902119636536,
|
6425 |
+
"eval_runtime": 1.3191,
|
6426 |
+
"eval_samples_per_second": 758.118,
|
6427 |
+
"eval_steps_per_second": 12.13,
|
6428 |
+
"step": 321000
|
6429 |
+
},
|
6430 |
+
{
|
6431 |
+
"epoch": 8.19,
|
6432 |
+
"learning_rate": 9.984332714015662e-05,
|
6433 |
+
"loss": 0.2799,
|
6434 |
+
"step": 321500
|
6435 |
+
},
|
6436 |
+
{
|
6437 |
+
"epoch": 8.21,
|
6438 |
+
"learning_rate": 9.94001697809578e-05,
|
6439 |
+
"loss": 0.2796,
|
6440 |
+
"step": 322000
|
6441 |
+
},
|
6442 |
+
{
|
6443 |
+
"epoch": 8.21,
|
6444 |
+
"eval_loss": 0.8164393305778503,
|
6445 |
+
"eval_runtime": 1.2535,
|
6446 |
+
"eval_samples_per_second": 797.791,
|
6447 |
+
"eval_steps_per_second": 12.765,
|
6448 |
+
"step": 322000
|
6449 |
+
},
|
6450 |
+
{
|
6451 |
+
"epoch": 8.22,
|
6452 |
+
"learning_rate": 9.895762045259445e-05,
|
6453 |
+
"loss": 0.2797,
|
6454 |
+
"step": 322500
|
6455 |
+
},
|
6456 |
+
{
|
6457 |
+
"epoch": 8.23,
|
6458 |
+
"learning_rate": 9.851568399471498e-05,
|
6459 |
+
"loss": 0.2793,
|
6460 |
+
"step": 323000
|
6461 |
+
},
|
6462 |
+
{
|
6463 |
+
"epoch": 8.23,
|
6464 |
+
"eval_loss": 0.8119146823883057,
|
6465 |
+
"eval_runtime": 1.2514,
|
6466 |
+
"eval_samples_per_second": 799.097,
|
6467 |
+
"eval_steps_per_second": 12.786,
|
6468 |
+
"step": 323000
|
6469 |
+
},
|
6470 |
+
{
|
6471 |
+
"epoch": 8.24,
|
6472 |
+
"learning_rate": 9.807436524026574e-05,
|
6473 |
+
"loss": 0.2797,
|
6474 |
+
"step": 323500
|
6475 |
+
},
|
6476 |
+
{
|
6477 |
+
"epoch": 8.26,
|
6478 |
+
"learning_rate": 9.763366901543801e-05,
|
6479 |
+
"loss": 0.2791,
|
6480 |
+
"step": 324000
|
6481 |
+
},
|
6482 |
+
{
|
6483 |
+
"epoch": 8.26,
|
6484 |
+
"eval_loss": 0.8064904808998108,
|
6485 |
+
"eval_runtime": 1.2268,
|
6486 |
+
"eval_samples_per_second": 815.12,
|
6487 |
+
"eval_steps_per_second": 13.042,
|
6488 |
+
"step": 324000
|
6489 |
+
},
|
6490 |
+
{
|
6491 |
+
"epoch": 8.27,
|
6492 |
+
"learning_rate": 9.719360013961495e-05,
|
6493 |
+
"loss": 0.2793,
|
6494 |
+
"step": 324500
|
6495 |
+
},
|
6496 |
+
{
|
6497 |
+
"epoch": 8.28,
|
6498 |
+
"learning_rate": 9.675416342531944e-05,
|
6499 |
+
"loss": 0.2793,
|
6500 |
+
"step": 325000
|
6501 |
+
},
|
6502 |
+
{
|
6503 |
+
"epoch": 8.28,
|
6504 |
+
"eval_loss": 0.8141771554946899,
|
6505 |
+
"eval_runtime": 1.2692,
|
6506 |
+
"eval_samples_per_second": 787.921,
|
6507 |
+
"eval_steps_per_second": 12.607,
|
6508 |
+
"step": 325000
|
6509 |
+
},
|
6510 |
+
{
|
6511 |
+
"epoch": 8.3,
|
6512 |
+
"learning_rate": 9.631536367816086e-05,
|
6513 |
+
"loss": 0.2798,
|
6514 |
+
"step": 325500
|
6515 |
+
},
|
6516 |
+
{
|
6517 |
+
"epoch": 8.31,
|
6518 |
+
"learning_rate": 9.587720569678299e-05,
|
6519 |
+
"loss": 0.2794,
|
6520 |
+
"step": 326000
|
6521 |
+
},
|
6522 |
+
{
|
6523 |
+
"epoch": 8.31,
|
6524 |
+
"eval_loss": 0.803835391998291,
|
6525 |
+
"eval_runtime": 1.24,
|
6526 |
+
"eval_samples_per_second": 806.471,
|
6527 |
+
"eval_steps_per_second": 12.904,
|
6528 |
+
"step": 326000
|
6529 |
+
},
|
6530 |
+
{
|
6531 |
+
"epoch": 8.32,
|
6532 |
+
"learning_rate": 9.543969427281131e-05,
|
6533 |
+
"loss": 0.2791,
|
6534 |
+
"step": 326500
|
6535 |
+
},
|
6536 |
+
{
|
6537 |
+
"epoch": 8.33,
|
6538 |
+
"learning_rate": 9.500283419080062e-05,
|
6539 |
+
"loss": 0.2792,
|
6540 |
+
"step": 327000
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 8.33,
|
6544 |
+
"eval_loss": 0.81174635887146,
|
6545 |
+
"eval_runtime": 1.301,
|
6546 |
+
"eval_samples_per_second": 768.659,
|
6547 |
+
"eval_steps_per_second": 12.299,
|
6548 |
+
"step": 327000
|
6549 |
+
},
|
6550 |
+
{
|
6551 |
+
"epoch": 8.35,
|
6552 |
+
"learning_rate": 9.45666302281829e-05,
|
6553 |
+
"loss": 0.2787,
|
6554 |
+
"step": 327500
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 8.36,
|
6558 |
+
"learning_rate": 9.413108715521467e-05,
|
6559 |
+
"loss": 0.2789,
|
6560 |
+
"step": 328000
|
6561 |
+
},
|
6562 |
+
{
|
6563 |
+
"epoch": 8.36,
|
6564 |
+
"eval_loss": 0.8118357062339783,
|
6565 |
+
"eval_runtime": 1.2706,
|
6566 |
+
"eval_samples_per_second": 787.037,
|
6567 |
+
"eval_steps_per_second": 12.593,
|
6568 |
+
"step": 328000
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 8.37,
|
6572 |
+
"learning_rate": 9.369620973492525e-05,
|
6573 |
+
"loss": 0.2794,
|
6574 |
+
"step": 328500
|
6575 |
+
},
|
6576 |
+
{
|
6577 |
+
"epoch": 8.38,
|
6578 |
+
"learning_rate": 9.326200272306445e-05,
|
6579 |
+
"loss": 0.2793,
|
6580 |
+
"step": 329000
|
6581 |
+
},
|
6582 |
+
{
|
6583 |
+
"epoch": 8.38,
|
6584 |
+
"eval_loss": 0.809190034866333,
|
6585 |
+
"eval_runtime": 1.2623,
|
6586 |
+
"eval_samples_per_second": 792.184,
|
6587 |
+
"eval_steps_per_second": 12.675,
|
6588 |
+
"step": 329000
|
6589 |
+
},
|
6590 |
+
{
|
6591 |
+
"epoch": 8.4,
|
6592 |
+
"learning_rate": 9.282847086805059e-05,
|
6593 |
+
"loss": 0.2788,
|
6594 |
+
"step": 329500
|
6595 |
+
},
|
6596 |
+
{
|
6597 |
+
"epoch": 8.41,
|
6598 |
+
"learning_rate": 9.239561891091853e-05,
|
6599 |
+
"loss": 0.279,
|
6600 |
+
"step": 330000
|
6601 |
+
},
|
6602 |
+
{
|
6603 |
+
"epoch": 8.41,
|
6604 |
+
"eval_loss": 0.8081182837486267,
|
6605 |
+
"eval_runtime": 1.2445,
|
6606 |
+
"eval_samples_per_second": 803.539,
|
6607 |
+
"eval_steps_per_second": 12.857,
|
6608 |
+
"step": 330000
|
6609 |
}
|
6610 |
],
|
6611 |
"max_steps": 500000,
|
6612 |
"num_train_epochs": 13,
|
6613 |
+
"total_flos": 1.054296774825414e+22,
|
6614 |
"trial_name": null,
|
6615 |
"trial_params": null
|
6616 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ea75c11d2eaacb0e2bbd235a00be3c1d10c576c2381b03ccce251e6969bea54
|
3 |
size 102501541
|