Training in progress, step 60000
Browse files- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +131 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b1476fc7f63764e7c906982b1b12a6daf7ebe1a4c9eabc5fef44fc57fcc9502
|
3 |
+
size 995605445
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510398013
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06dbc8237a227289c2c378d392e11aa4349c38612caeca6588a0593a85dc6327
|
3 |
size 510398013
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:238fcc56614b79437ea498441742d1361699701d7545b2165136d90ba11cb014
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07b2728bb8093f11b5a52109cc3872bbd75c6945624640e5b69917df439310bd
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -652,11 +652,139 @@
|
|
652 |
"eval_samples_per_second": 165.853,
|
653 |
"eval_steps_per_second": 20.736,
|
654 |
"step": 50000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
655 |
}
|
656 |
],
|
657 |
"max_steps": 633540,
|
658 |
"num_train_epochs": 15,
|
659 |
-
"total_flos": 1.
|
660 |
"trial_name": null,
|
661 |
"trial_params": null
|
662 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.4205890709347475,
|
5 |
+
"global_step": 60000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
652 |
"eval_samples_per_second": 165.853,
|
653 |
"eval_steps_per_second": 20.736,
|
654 |
"step": 50000
|
655 |
+
},
|
656 |
+
{
|
657 |
+
"epoch": 1.2,
|
658 |
+
"learning_rate": 3.275726868074628e-05,
|
659 |
+
"loss": 3.0453,
|
660 |
+
"step": 50500
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 1.21,
|
664 |
+
"learning_rate": 3.269413138870474e-05,
|
665 |
+
"loss": 3.014,
|
666 |
+
"step": 51000
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 1.22,
|
670 |
+
"learning_rate": 3.26309940966632e-05,
|
671 |
+
"loss": 3.0549,
|
672 |
+
"step": 51500
|
673 |
+
},
|
674 |
+
{
|
675 |
+
"epoch": 1.23,
|
676 |
+
"learning_rate": 3.256785680462165e-05,
|
677 |
+
"loss": 3.0268,
|
678 |
+
"step": 52000
|
679 |
+
},
|
680 |
+
{
|
681 |
+
"epoch": 1.24,
|
682 |
+
"learning_rate": 3.250471951258011e-05,
|
683 |
+
"loss": 3.0389,
|
684 |
+
"step": 52500
|
685 |
+
},
|
686 |
+
{
|
687 |
+
"epoch": 1.25,
|
688 |
+
"learning_rate": 3.244158222053857e-05,
|
689 |
+
"loss": 3.0315,
|
690 |
+
"step": 53000
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 1.27,
|
694 |
+
"learning_rate": 3.237844492849702e-05,
|
695 |
+
"loss": 3.0363,
|
696 |
+
"step": 53500
|
697 |
+
},
|
698 |
+
{
|
699 |
+
"epoch": 1.28,
|
700 |
+
"learning_rate": 3.231530763645547e-05,
|
701 |
+
"loss": 3.0291,
|
702 |
+
"step": 54000
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 1.29,
|
706 |
+
"learning_rate": 3.225217034441393e-05,
|
707 |
+
"loss": 3.0257,
|
708 |
+
"step": 54500
|
709 |
+
},
|
710 |
+
{
|
711 |
+
"epoch": 1.3,
|
712 |
+
"learning_rate": 3.218903305237238e-05,
|
713 |
+
"loss": 3.0457,
|
714 |
+
"step": 55000
|
715 |
+
},
|
716 |
+
{
|
717 |
+
"epoch": 1.31,
|
718 |
+
"learning_rate": 3.212589576033084e-05,
|
719 |
+
"loss": 3.0431,
|
720 |
+
"step": 55500
|
721 |
+
},
|
722 |
+
{
|
723 |
+
"epoch": 1.33,
|
724 |
+
"learning_rate": 3.20627584682893e-05,
|
725 |
+
"loss": 3.0591,
|
726 |
+
"step": 56000
|
727 |
+
},
|
728 |
+
{
|
729 |
+
"epoch": 1.34,
|
730 |
+
"learning_rate": 3.199962117624775e-05,
|
731 |
+
"loss": 3.0525,
|
732 |
+
"step": 56500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 1.35,
|
736 |
+
"learning_rate": 3.193648388420621e-05,
|
737 |
+
"loss": 3.0479,
|
738 |
+
"step": 57000
|
739 |
+
},
|
740 |
+
{
|
741 |
+
"epoch": 1.36,
|
742 |
+
"learning_rate": 3.187334659216466e-05,
|
743 |
+
"loss": 3.019,
|
744 |
+
"step": 57500
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 1.37,
|
748 |
+
"learning_rate": 3.181020930012312e-05,
|
749 |
+
"loss": 3.0356,
|
750 |
+
"step": 58000
|
751 |
+
},
|
752 |
+
{
|
753 |
+
"epoch": 1.39,
|
754 |
+
"learning_rate": 3.174707200808158e-05,
|
755 |
+
"loss": 3.0549,
|
756 |
+
"step": 58500
|
757 |
+
},
|
758 |
+
{
|
759 |
+
"epoch": 1.4,
|
760 |
+
"learning_rate": 3.168393471604003e-05,
|
761 |
+
"loss": 3.0439,
|
762 |
+
"step": 59000
|
763 |
+
},
|
764 |
+
{
|
765 |
+
"epoch": 1.41,
|
766 |
+
"learning_rate": 3.162079742399849e-05,
|
767 |
+
"loss": 3.0491,
|
768 |
+
"step": 59500
|
769 |
+
},
|
770 |
+
{
|
771 |
+
"epoch": 1.42,
|
772 |
+
"learning_rate": 3.155766013195695e-05,
|
773 |
+
"loss": 3.0409,
|
774 |
+
"step": 60000
|
775 |
+
},
|
776 |
+
{
|
777 |
+
"epoch": 1.42,
|
778 |
+
"eval_loss": 3.1459972858428955,
|
779 |
+
"eval_runtime": 113.0693,
|
780 |
+
"eval_samples_per_second": 166.022,
|
781 |
+
"eval_steps_per_second": 20.757,
|
782 |
+
"step": 60000
|
783 |
}
|
784 |
],
|
785 |
"max_steps": 633540,
|
786 |
"num_train_epochs": 15,
|
787 |
+
"total_flos": 1.2788110741248e+16,
|
788 |
"trial_name": null,
|
789 |
"trial_params": null
|
790 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510398013
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06dbc8237a227289c2c378d392e11aa4349c38612caeca6588a0593a85dc6327
|
3 |
size 510398013
|