HealthTeam
commited on
Commit
•
1fcdde7
1
Parent(s):
5441393
Training in progress, step 66288
Browse files- last-checkpoint/optimizer.pt +2 -2
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +135 -3
- pytorch_model.bin +1 -1
- runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2992361efcf4770d4c6602bb67d93320a187287e7fc6dc44519cbf03c5dd8d1
|
3 |
+
size 2401461637
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4071ac5bc82e1943cce385333b946af39ce630c568c0ddebfc504245d7448e1
|
3 |
size 1200739717
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e52dd293318f93d0d24ab2680c6a46204bbcb9dee0ba0954189329cd5f7d5e2e
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c0312d2c26f50db24e7fa24aa7f3be59f0d2b84dcf88829a4f490d4d99de93a
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -666,11 +666,143 @@
|
|
666 |
"learning_rate": 1.4545436513839717e-05,
|
667 |
"loss": 3.0787,
|
668 |
"step": 55000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
669 |
}
|
670 |
],
|
671 |
"max_steps": 201666,
|
672 |
"num_train_epochs": 3,
|
673 |
-
"total_flos":
|
674 |
"trial_name": null,
|
675 |
"trial_params": null
|
676 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9861057391925263,
|
5 |
+
"global_step": 66288,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
666 |
"learning_rate": 1.4545436513839717e-05,
|
667 |
"loss": 3.0787,
|
668 |
"step": 55000
|
669 |
+
},
|
670 |
+
{
|
671 |
+
"epoch": 0.83,
|
672 |
+
"learning_rate": 1.4495849573056442e-05,
|
673 |
+
"loss": 3.0755,
|
674 |
+
"step": 55500
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 0.83,
|
678 |
+
"learning_rate": 1.4446262632273165e-05,
|
679 |
+
"loss": 3.066,
|
680 |
+
"step": 56000
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 0.84,
|
684 |
+
"learning_rate": 1.439667569148989e-05,
|
685 |
+
"loss": 3.0695,
|
686 |
+
"step": 56500
|
687 |
+
},
|
688 |
+
{
|
689 |
+
"epoch": 0.85,
|
690 |
+
"learning_rate": 1.4347088750706615e-05,
|
691 |
+
"loss": 3.059,
|
692 |
+
"step": 57000
|
693 |
+
},
|
694 |
+
{
|
695 |
+
"epoch": 0.86,
|
696 |
+
"learning_rate": 1.429750180992334e-05,
|
697 |
+
"loss": 3.0628,
|
698 |
+
"step": 57500
|
699 |
+
},
|
700 |
+
{
|
701 |
+
"epoch": 0.86,
|
702 |
+
"learning_rate": 1.4247914869140065e-05,
|
703 |
+
"loss": 3.0733,
|
704 |
+
"step": 58000
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 0.87,
|
708 |
+
"learning_rate": 1.419832792835679e-05,
|
709 |
+
"loss": 3.0591,
|
710 |
+
"step": 58500
|
711 |
+
},
|
712 |
+
{
|
713 |
+
"epoch": 0.88,
|
714 |
+
"learning_rate": 1.4148740987573514e-05,
|
715 |
+
"loss": 3.0468,
|
716 |
+
"step": 59000
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 0.89,
|
720 |
+
"learning_rate": 1.4099154046790237e-05,
|
721 |
+
"loss": 3.0265,
|
722 |
+
"step": 59500
|
723 |
+
},
|
724 |
+
{
|
725 |
+
"epoch": 0.89,
|
726 |
+
"learning_rate": 1.4049567106006963e-05,
|
727 |
+
"loss": 3.0282,
|
728 |
+
"step": 60000
|
729 |
+
},
|
730 |
+
{
|
731 |
+
"epoch": 0.9,
|
732 |
+
"learning_rate": 1.3999980165223688e-05,
|
733 |
+
"loss": 3.0222,
|
734 |
+
"step": 60500
|
735 |
+
},
|
736 |
+
{
|
737 |
+
"epoch": 0.91,
|
738 |
+
"learning_rate": 1.3950393224440413e-05,
|
739 |
+
"loss": 3.0275,
|
740 |
+
"step": 61000
|
741 |
+
},
|
742 |
+
{
|
743 |
+
"epoch": 0.91,
|
744 |
+
"learning_rate": 1.3900806283657138e-05,
|
745 |
+
"loss": 3.0277,
|
746 |
+
"step": 61500
|
747 |
+
},
|
748 |
+
{
|
749 |
+
"epoch": 0.92,
|
750 |
+
"learning_rate": 1.3851219342873862e-05,
|
751 |
+
"loss": 3.0551,
|
752 |
+
"step": 62000
|
753 |
+
},
|
754 |
+
{
|
755 |
+
"epoch": 0.93,
|
756 |
+
"learning_rate": 1.3801632402090585e-05,
|
757 |
+
"loss": 3.0205,
|
758 |
+
"step": 62500
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 0.94,
|
762 |
+
"learning_rate": 1.375204546130731e-05,
|
763 |
+
"loss": 3.023,
|
764 |
+
"step": 63000
|
765 |
+
},
|
766 |
+
{
|
767 |
+
"epoch": 0.94,
|
768 |
+
"learning_rate": 1.3702458520524036e-05,
|
769 |
+
"loss": 3.0244,
|
770 |
+
"step": 63500
|
771 |
+
},
|
772 |
+
{
|
773 |
+
"epoch": 0.95,
|
774 |
+
"learning_rate": 1.365287157974076e-05,
|
775 |
+
"loss": 3.0116,
|
776 |
+
"step": 64000
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"epoch": 0.96,
|
780 |
+
"learning_rate": 1.3603284638957486e-05,
|
781 |
+
"loss": 3.0141,
|
782 |
+
"step": 64500
|
783 |
+
},
|
784 |
+
{
|
785 |
+
"epoch": 0.97,
|
786 |
+
"learning_rate": 1.355369769817421e-05,
|
787 |
+
"loss": 3.0284,
|
788 |
+
"step": 65000
|
789 |
+
},
|
790 |
+
{
|
791 |
+
"epoch": 0.97,
|
792 |
+
"learning_rate": 1.3504110757390933e-05,
|
793 |
+
"loss": 3.0236,
|
794 |
+
"step": 65500
|
795 |
+
},
|
796 |
+
{
|
797 |
+
"epoch": 0.98,
|
798 |
+
"learning_rate": 1.3454523816607659e-05,
|
799 |
+
"loss": 3.013,
|
800 |
+
"step": 66000
|
801 |
}
|
802 |
],
|
803 |
"max_steps": 201666,
|
804 |
"num_train_epochs": 3,
|
805 |
+
"total_flos": 7.793968305851597e+16,
|
806 |
"trial_name": null,
|
807 |
"trial_params": null
|
808 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4071ac5bc82e1943cce385333b946af39ce630c568c0ddebfc504245d7448e1
|
3 |
size 1200739717
|
runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16d94d99a4243783f2c4751209b7f4a0753cff0f06dbbcb869acc91381ecb224
|
3 |
+
size 25141
|