Training in progress, step 5376, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 903834408
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff920e5ccc2d2a405fd728410318b5d77a58a46344ae8542bb3adc83fa0b5aa5
|
3 |
size 903834408
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1807824186
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc711544bbc463382370e087fb56a499145af6b3d8d135d3d2ad1e1612e329ff
|
3 |
size 1807824186
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efc2f5216a7387c189c9349463d3b40a111802d247e9267a1c2d9b8f7b01f222
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a2cc71334cc40bd202de7bb17936da773f888ad3b66367935ef5b0e0f47c791
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 16,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -40542,6 +40542,127 @@
|
|
40542 |
"eval_samples_per_second": 11.555,
|
40543 |
"eval_steps_per_second": 1.444,
|
40544 |
"step": 5360
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40545 |
}
|
40546 |
],
|
40547 |
"logging_steps": 1,
|
@@ -40561,7 +40682,7 @@
|
|
40561 |
"attributes": {}
|
40562 |
}
|
40563 |
},
|
40564 |
-
"total_flos": 1.
|
40565 |
"train_batch_size": 8,
|
40566 |
"trial_name": null,
|
40567 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.3475393939393939,
|
5 |
"eval_steps": 16,
|
6 |
+
"global_step": 5376,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
40542 |
"eval_samples_per_second": 11.555,
|
40543 |
"eval_steps_per_second": 1.444,
|
40544 |
"step": 5360
|
40545 |
+
},
|
40546 |
+
{
|
40547 |
+
"epoch": 0.34656969696969697,
|
40548 |
+
"grad_norm": 0.0666361004114151,
|
40549 |
+
"learning_rate": 0.00019445821927260975,
|
40550 |
+
"loss": 0.0985,
|
40551 |
+
"step": 5361
|
40552 |
+
},
|
40553 |
+
{
|
40554 |
+
"epoch": 0.34663434343434346,
|
40555 |
+
"grad_norm": 0.0619664341211319,
|
40556 |
+
"learning_rate": 0.0001944559741044043,
|
40557 |
+
"loss": 0.0795,
|
40558 |
+
"step": 5362
|
40559 |
+
},
|
40560 |
+
{
|
40561 |
+
"epoch": 0.3466989898989899,
|
40562 |
+
"grad_norm": 0.07481850683689117,
|
40563 |
+
"learning_rate": 0.0001944537284944595,
|
40564 |
+
"loss": 0.0889,
|
40565 |
+
"step": 5363
|
40566 |
+
},
|
40567 |
+
{
|
40568 |
+
"epoch": 0.3467636363636364,
|
40569 |
+
"grad_norm": 0.10630080848932266,
|
40570 |
+
"learning_rate": 0.00019445148244278586,
|
40571 |
+
"loss": 0.1363,
|
40572 |
+
"step": 5364
|
40573 |
+
},
|
40574 |
+
{
|
40575 |
+
"epoch": 0.3468282828282828,
|
40576 |
+
"grad_norm": 0.06847912818193436,
|
40577 |
+
"learning_rate": 0.00019444923594939386,
|
40578 |
+
"loss": 0.0961,
|
40579 |
+
"step": 5365
|
40580 |
+
},
|
40581 |
+
{
|
40582 |
+
"epoch": 0.3468929292929293,
|
40583 |
+
"grad_norm": 0.06266526877880096,
|
40584 |
+
"learning_rate": 0.00019444698901429397,
|
40585 |
+
"loss": 0.0847,
|
40586 |
+
"step": 5366
|
40587 |
+
},
|
40588 |
+
{
|
40589 |
+
"epoch": 0.34695757575757574,
|
40590 |
+
"grad_norm": 0.058274924755096436,
|
40591 |
+
"learning_rate": 0.00019444474163749677,
|
40592 |
+
"loss": 0.0782,
|
40593 |
+
"step": 5367
|
40594 |
+
},
|
40595 |
+
{
|
40596 |
+
"epoch": 0.34702222222222223,
|
40597 |
+
"grad_norm": 0.06570993363857269,
|
40598 |
+
"learning_rate": 0.00019444249381901272,
|
40599 |
+
"loss": 0.093,
|
40600 |
+
"step": 5368
|
40601 |
+
},
|
40602 |
+
{
|
40603 |
+
"epoch": 0.34708686868686867,
|
40604 |
+
"grad_norm": 0.06814686208963394,
|
40605 |
+
"learning_rate": 0.00019444024555885237,
|
40606 |
+
"loss": 0.0896,
|
40607 |
+
"step": 5369
|
40608 |
+
},
|
40609 |
+
{
|
40610 |
+
"epoch": 0.34715151515151516,
|
40611 |
+
"grad_norm": 0.06851497292518616,
|
40612 |
+
"learning_rate": 0.0001944379968570262,
|
40613 |
+
"loss": 0.1058,
|
40614 |
+
"step": 5370
|
40615 |
+
},
|
40616 |
+
{
|
40617 |
+
"epoch": 0.3472161616161616,
|
40618 |
+
"grad_norm": 0.058960504829883575,
|
40619 |
+
"learning_rate": 0.00019443574771354474,
|
40620 |
+
"loss": 0.0793,
|
40621 |
+
"step": 5371
|
40622 |
+
},
|
40623 |
+
{
|
40624 |
+
"epoch": 0.3472808080808081,
|
40625 |
+
"grad_norm": 0.06058274954557419,
|
40626 |
+
"learning_rate": 0.0001944334981284185,
|
40627 |
+
"loss": 0.0858,
|
40628 |
+
"step": 5372
|
40629 |
+
},
|
40630 |
+
{
|
40631 |
+
"epoch": 0.34734545454545457,
|
40632 |
+
"grad_norm": 0.06411627680063248,
|
40633 |
+
"learning_rate": 0.00019443124810165802,
|
40634 |
+
"loss": 0.0831,
|
40635 |
+
"step": 5373
|
40636 |
+
},
|
40637 |
+
{
|
40638 |
+
"epoch": 0.347410101010101,
|
40639 |
+
"grad_norm": 0.06605079025030136,
|
40640 |
+
"learning_rate": 0.00019442899763327378,
|
40641 |
+
"loss": 0.0885,
|
40642 |
+
"step": 5374
|
40643 |
+
},
|
40644 |
+
{
|
40645 |
+
"epoch": 0.3474747474747475,
|
40646 |
+
"grad_norm": 0.058634303510189056,
|
40647 |
+
"learning_rate": 0.00019442674672327638,
|
40648 |
+
"loss": 0.0788,
|
40649 |
+
"step": 5375
|
40650 |
+
},
|
40651 |
+
{
|
40652 |
+
"epoch": 0.3475393939393939,
|
40653 |
+
"grad_norm": 0.06674228608608246,
|
40654 |
+
"learning_rate": 0.00019442449537167628,
|
40655 |
+
"loss": 0.1054,
|
40656 |
+
"step": 5376
|
40657 |
+
},
|
40658 |
+
{
|
40659 |
+
"epoch": 0.3475393939393939,
|
40660 |
+
"eval_bleu": 15.692248886076214,
|
40661 |
+
"eval_loss": 0.09231525659561157,
|
40662 |
+
"eval_runtime": 2.8228,
|
40663 |
+
"eval_samples_per_second": 11.336,
|
40664 |
+
"eval_steps_per_second": 1.417,
|
40665 |
+
"step": 5376
|
40666 |
}
|
40667 |
],
|
40668 |
"logging_steps": 1,
|
|
|
40682 |
"attributes": {}
|
40683 |
}
|
40684 |
},
|
40685 |
+
"total_flos": 1.0476024382881792e+17,
|
40686 |
"train_batch_size": 8,
|
40687 |
"trial_name": null,
|
40688 |
"trial_params": null
|