HealthTeam
commited on
Commit
•
5441393
1
Parent(s):
86191f8
Training in progress, step 55240
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +135 -3
- pytorch_model.bin +1 -1
- runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0 +2 -2
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2401461253
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4df6baefe0df3845a28abe9fc6e812fd755b1eb716adddecc1deedd7aca8278d
|
3 |
size 2401461253
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:612baa21561bf04530d6eb5804833e6f0b56599c78fd0a39bacd3ac335e650d3
|
3 |
size 1200739717
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:274f4675622b5854821912e67212d43584ebb6ec4a78ba0fa35ff9cd87e972c2
|
3 |
size 14575
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b09dba34704dde96f55673b0cc5c41d0a0ce45d83ad2015625ba94df43f96a45
|
3 |
size 627
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -534,11 +534,143 @@
|
|
534 |
"learning_rate": 1.5636349211071773e-05,
|
535 |
"loss": 3.1734,
|
536 |
"step": 44000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
537 |
}
|
538 |
],
|
539 |
"max_steps": 201666,
|
540 |
"num_train_epochs": 3,
|
541 |
-
"total_flos":
|
542 |
"trial_name": null,
|
543 |
"trial_params": null
|
544 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8217547826604386,
|
5 |
+
"global_step": 55240,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
534 |
"learning_rate": 1.5636349211071773e-05,
|
535 |
"loss": 3.1734,
|
536 |
"step": 44000
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"epoch": 0.66,
|
540 |
+
"learning_rate": 1.55867622702885e-05,
|
541 |
+
"loss": 3.1401,
|
542 |
+
"step": 44500
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"epoch": 0.67,
|
546 |
+
"learning_rate": 1.5537175329505225e-05,
|
547 |
+
"loss": 3.1463,
|
548 |
+
"step": 45000
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 0.68,
|
552 |
+
"learning_rate": 1.5487588388721947e-05,
|
553 |
+
"loss": 3.1431,
|
554 |
+
"step": 45500
|
555 |
+
},
|
556 |
+
{
|
557 |
+
"epoch": 0.68,
|
558 |
+
"learning_rate": 1.5438001447938673e-05,
|
559 |
+
"loss": 3.1316,
|
560 |
+
"step": 46000
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"epoch": 0.69,
|
564 |
+
"learning_rate": 1.5388414507155395e-05,
|
565 |
+
"loss": 3.1606,
|
566 |
+
"step": 46500
|
567 |
+
},
|
568 |
+
{
|
569 |
+
"epoch": 0.7,
|
570 |
+
"learning_rate": 1.533882756637212e-05,
|
571 |
+
"loss": 3.1362,
|
572 |
+
"step": 47000
|
573 |
+
},
|
574 |
+
{
|
575 |
+
"epoch": 0.71,
|
576 |
+
"learning_rate": 1.5289240625588847e-05,
|
577 |
+
"loss": 3.1335,
|
578 |
+
"step": 47500
|
579 |
+
},
|
580 |
+
{
|
581 |
+
"epoch": 0.71,
|
582 |
+
"learning_rate": 1.523965368480557e-05,
|
583 |
+
"loss": 3.149,
|
584 |
+
"step": 48000
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"epoch": 0.72,
|
588 |
+
"learning_rate": 1.5190066744022297e-05,
|
589 |
+
"loss": 3.1293,
|
590 |
+
"step": 48500
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 0.73,
|
594 |
+
"learning_rate": 1.514047980323902e-05,
|
595 |
+
"loss": 3.1286,
|
596 |
+
"step": 49000
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"epoch": 0.74,
|
600 |
+
"learning_rate": 1.5090892862455743e-05,
|
601 |
+
"loss": 3.1196,
|
602 |
+
"step": 49500
|
603 |
+
},
|
604 |
+
{
|
605 |
+
"epoch": 0.74,
|
606 |
+
"learning_rate": 1.5041305921672469e-05,
|
607 |
+
"loss": 3.1238,
|
608 |
+
"step": 50000
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 0.75,
|
612 |
+
"learning_rate": 1.4991718980889195e-05,
|
613 |
+
"loss": 3.1033,
|
614 |
+
"step": 50500
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 0.76,
|
618 |
+
"learning_rate": 1.4942132040105919e-05,
|
619 |
+
"loss": 3.1112,
|
620 |
+
"step": 51000
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"epoch": 0.77,
|
624 |
+
"learning_rate": 1.4892545099322645e-05,
|
625 |
+
"loss": 3.0936,
|
626 |
+
"step": 51500
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 0.77,
|
630 |
+
"learning_rate": 1.4842958158539369e-05,
|
631 |
+
"loss": 3.107,
|
632 |
+
"step": 52000
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 0.78,
|
636 |
+
"learning_rate": 1.4793371217756094e-05,
|
637 |
+
"loss": 3.1063,
|
638 |
+
"step": 52500
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"epoch": 0.79,
|
642 |
+
"learning_rate": 1.4743784276972817e-05,
|
643 |
+
"loss": 3.0639,
|
644 |
+
"step": 53000
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 0.8,
|
648 |
+
"learning_rate": 1.4694197336189543e-05,
|
649 |
+
"loss": 3.1028,
|
650 |
+
"step": 53500
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"epoch": 0.8,
|
654 |
+
"learning_rate": 1.4644610395406267e-05,
|
655 |
+
"loss": 3.0821,
|
656 |
+
"step": 54000
|
657 |
+
},
|
658 |
+
{
|
659 |
+
"epoch": 0.81,
|
660 |
+
"learning_rate": 1.4595023454622992e-05,
|
661 |
+
"loss": 3.0596,
|
662 |
+
"step": 54500
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 0.82,
|
666 |
+
"learning_rate": 1.4545436513839717e-05,
|
667 |
+
"loss": 3.0787,
|
668 |
+
"step": 55000
|
669 |
}
|
670 |
],
|
671 |
"max_steps": 201666,
|
672 |
"num_train_epochs": 3,
|
673 |
+
"total_flos": 6.497219068615066e+16,
|
674 |
"trial_name": null,
|
675 |
"trial_params": null
|
676 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1200739717
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:612baa21561bf04530d6eb5804833e6f0b56599c78fd0a39bacd3ac335e650d3
|
3 |
size 1200739717
|
runs/Feb07_05-04-07_5214b674e698/events.out.tfevents.1675746342.5214b674e698.342.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b5afd134e7190bfca227464f177ede39b4d5a435161a2a45582286c96ae5885
|
3 |
+
size 21621
|