Training in progress, epoch 13
Browse files- logs/events.out.tfevents.1716900085.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +33 -1
logs/events.out.tfevents.1716900085.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ed61d8d13c22cdf54016caea8fb70cf58322d96e59c1f41b389cf4222c13568
|
3 |
+
size 95465
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eff0797834c76e9deea9ee3171dbcaace1b622744f99390802a8c97f9b6a893
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -618,4 +618,36 @@ command outputs:
|
|
618 |
|
619 |
92%|ββββββββββ| 9850/10682 [1:24:10<06:50, 2.03it/s]
|
620 |
92%|ββββββββββ| 9851/10682 [1:24:10<06:50, 2.03it/s]
|
621 |
92%|ββββββββββ| 9852/10682 [1:24:11<06:50, 2.02it/s]
|
622 |
92%|ββββββββββ| 9853/10682 [1:24:11<06:48, 2.03it/s]
|
623 |
92%|ββββββββββ| 9854/10682 [1:24:12<06:48, 2.03it/s]
|
624 |
92%|ββββββββββ| 9855/10682 [1:24:12<06:47, 2.03it/s]
|
625 |
92%|ββββββββββ| 9856/10682 [1:24:13<06:47, 2.03it/s]
|
626 |
92%|ββββββββββ| 9857/10682 [1:24:13<06:47, 2.03it/s]
|
627 |
92%|ββββββββββ| 9858/10682 [1:24:14<06:46, 2.03it/s]
|
628 |
92%|ββββββββββ| 9859/10682 [1:24:14<06:46, 2.03it/s]
|
629 |
92%|ββββββββββ| 9860/10682 [1:24:15<06:45, 2.03it/s]
|
630 |
92%|ββββββββββ| 9861/10682 [1:24:15<06:45, 2.03it/s]
|
631 |
92%|ββββββββββ| 9862/10682 [1:24:16<06:44, 2.03it/s]
|
632 |
92%|ββββββββββ| 9863/10682 [1:24:16<06:43, 2.03it/s]
|
633 |
92%|ββββββββββ| 9864/10682 [1:24:17<06:43, 2.03it/s]
|
634 |
92%|ββββββββββ| 9865/10682 [1:24:17<06:42, 2.03it/s]
|
635 |
92%|ββββββββββ| 9866/10682 [1:24:18<06:42, 2.03it/s]
|
636 |
92%|ββββββββββ| 9867/10682 [1:24:18<06:41, 2.03it/s]
|
637 |
92%|ββββββββββ| 9868/10682 [1:24:19<06:41, 2.03it/s]
|
638 |
92%|ββββββββββ| 9869/10682 [1:24:19<06:40, 2.03it/s]
|
639 |
92%|ββββββββββ| 9870/10682 [1:24:20<06:40, 2.03it/s]
|
640 |
92%|ββββββββββ| 9871/10682 [1:24:20<06:40, 2.03it/s]
|
641 |
92%|ββββββββββ| 9872/10682 [1:24:21<06:39, 2.03it/s]
|
642 |
92%|ββββββββββ| 9873/10682 [1:24:21<06:39, 2.03it/s]
|
643 |
92%|ββββββββββ| 9874/10682 [1:24:22<06:38, 2.03it/s]
|
644 |
92%|ββββββββββ| 9875/10682 [1:24:22<06:38, 2.03it/s]{'loss': 2.8269, 'grad_norm': 0.24751795828342438, 'learning_rate': 1.7288216615031272e-05, 'epoch': 12.93}
|
645 |
|
646 |
|
647 |
92%|ββββββββββ| 9875/10682 [1:24:22<06:38, 2.03it/s]
|
648 |
92%|ββββββββββ| 9876/10682 [1:24:23<06:37, 2.03it/s]
|
649 |
92%|ββββββββββ| 9877/10682 [1:24:23<06:37, 2.03it/s]
|
650 |
92%|ββββββββββ| 9878/10682 [1:24:24<06:36, 2.03it/s]
|
651 |
92%|ββββββββββ| 9879/10682 [1:24:24<06:35, 2.03it/s]
|
652 |
92%|ββββββββββ| 9880/10682 [1:24:25<06:35, 2.03it/s]
|
653 |
93%|ββββββββββ| 9881/10682 [1:24:25<06:35, 2.03it/s]
|
654 |
93%|ββββββββββ| 9882/10682 [1:24:26<06:34, 2.03it/s]
|
655 |
93%|ββββββββββ| 9883/10682 [1:24:26<06:34, 2.03it/s]
|
656 |
93%|ββββββββββ| 9884/10682 [1:24:27<06:33, 2.03it/s]
|
657 |
93%|ββββββββββ| 9885/10682 [1:24:27<06:33, 2.03it/s]
|
658 |
93%|ββββββββββ| 9886/10682 [1:24:28<06:32, 2.03it/s]
|
659 |
93%|ββββββββββ| 9887/10682 [1:24:28<06:31, 2.03it/s]
|
660 |
93%|ββββββββββ| 9888/10682 [1:24:29<06:31, 2.03it/s]
|
661 |
93%|ββββββββββ| 9889/10682 [1:24:29<06:31, 2.03it/s]
|
662 |
93%|ββββββββββ| 9890/10682 [1:24:30<06:30, 2.03it/s]
|
663 |
93%|ββββββββββ| 9891/10682 [1:24:30<06:29, 2.03it/s]
|
664 |
93%|ββββββββββ| 9892/10682 [1:24:31<06:29, 2.03it/s]
|
665 |
93%|ββββββββββ| 9893/10682 [1:24:31<06:28, 2.03it/s]
|
666 |
93%|ββββββββββ| 9894/10682 [1:24:32<06:28, 2.03it/s]
|
667 |
93%|ββββββββββ| 9895/10682 [1:24:32<06:27, 2.03it/s]
|
668 |
93%|ββββββββββ| 9896/10682 [1:24:33<06:27, 2.03it/s]
|
669 |
93%|ββββββββββ| 9897/10682 [1:24:33<06:27, 2.03it/s]
|
670 |
93%|ββββββββββ| 9898/10682 [1:24:34<06:26, 2.03it/s]
|
671 |
93%|ββββββββββ| 9899/10682 [1:24:34<06:26, 2.03it/s]
|
672 |
93%|ββββββββββ| 9900/10682 [1:24:35<06:25, 2.03it/s]{'loss': 2.8229, 'grad_norm': 0.2464432567358017, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
673 |
|
674 |
|
675 |
93%|ββββββββββ| 9900/10682 [1:24:35<06:25, 2.03it/s]
|
676 |
93%|ββββββββββ| 9901/10682 [1:24:35<06:25, 2.03it/s]
|
677 |
93%|ββββββββββ| 9902/10682 [1:24:36<06:24, 2.03it/s]
|
678 |
93%|ββββββββββ| 9903/10682 [1:24:36<06:24, 2.03it/s]
|
679 |
93%|ββββββββββ| 9904/10682 [1:24:37<06:23, 2.03it/s]
|
680 |
93%|ββββββββββ| 9905/10682 [1:24:37<06:23, 2.02it/s]
|
681 |
93%|ββββββββββ| 9906/10682 [1:24:38<06:23, 2.02it/s]
|
682 |
93%|ββββββββββ| 9907/10682 [1:24:38<06:23, 2.02it/s]
|
683 |
93%|ββββββββββ| 9908/10682 [1:24:39<06:22, 2.02it/s]
|
684 |
93%|ββββββββββ| 9909/10682 [1:24:39<06:22, 2.02it/s]
|
685 |
93%|ββββββββββ| 9910/10682 [1:24:40<06:21, 2.03it/s]
|
686 |
93%|ββββββββββ| 9911/10682 [1:24:40<06:20, 2.03it/s]
|
687 |
93%|ββββββββββ| 9912/10682 [1:24:41<06:19, 2.03it/s]
|
688 |
93%|ββββββββββ| 9913/10682 [1:24:41<06:19, 2.03it/s]
|
689 |
93%|ββββββββββ| 9914/10682 [1:24:42<06:18, 2.03it/s]
|
690 |
93%|ββββββββββ| 9915/10682 [1:24:42<06:17, 2.03it/s]
|
691 |
93%|ββββββββββ| 9916/10682 [1:24:43<06:17, 2.03it/s]
|
692 |
93%|ββββββββββ| 9917/10682 [1:24:43<06:17, 2.03it/s]
|
693 |
93%|ββββββββββ| 9918/10682 [1:24:44<06:16, 2.03it/s]
|
694 |
93%|ββββββββββ| 9919/10682 [1:24:44<06:16, 2.03it/s]
|
695 |
93%|ββββββββββ| 9920/10682 [1:24:45<06:16, 2.02it/s]
|
696 |
93%|ββββββββββ| 9921/10682 [1:24:45<06:15, 2.03it/s]
|
697 |
93%|ββββββββββ| 9922/10682 [1:24:45<06:14, 2.03it/s]
|
698 |
93%|ββββββββββ| 9923/10682 [1:24:46<06:14, 2.03it/s]
|
699 |
93%|ββββββββββ| 9924/10682 [1:24:46<06:13, 2.03it/s]
|
700 |
93%|ββββββββββ| 9925/10682 [1:24:47<06:22, 1.98it/s]{'loss': 2.8269, 'grad_norm': 0.24894338846206665, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.0}
|
701 |
-
|
702 |
|
703 |
93%|ββββββββββ| 9925/10682 [1:24:47<06:22, 1.98it/s]
|
704 |
93%|ββββββββββ| 9926/10682 [1:24:59<50:04, 3.97s/it]
|
705 |
93%|ββββββββββ| 9927/10682 [1:25:00<36:52, 2.93s/it]
|
706 |
93%|ββββββββββ| 9928/10682 [1:25:00<27:37, 2.20s/it]
|
707 |
93%|ββββββββββ| 9929/10682 [1:25:01<21:12, 1.69s/it]
|
708 |
93%|ββββββββββ| 9930/10682 [1:25:01<16:40, 1.33s/it]
|
709 |
93%|ββββββββββ| 9931/10682 [1:25:02<13:30, 1.08s/it]
|
710 |
93%|ββββββββββ| 9932/10682 [1:25:02<11:16, 1.11it/s]
|
711 |
93%|ββββββββββ| 9933/10682 [1:25:03<09:45, 1.28it/s]
|
712 |
93%|ββββββββββ| 9934/10682 [1:25:03<08:41, 1.44it/s]
|
713 |
93%|ββββββββββ| 9935/10682 [1:25:04<07:54, 1.57it/s]
|
714 |
93%|ββββββββββ| 9936/10682 [1:25:04<07:22, 1.69it/s]
|
715 |
93%|ββββββββββ| 9937/10682 [1:25:05<06:59, 1.77it/s]
|
716 |
93%|ββββββββββ| 9938/10682 [1:25:05<06:43, 1.84it/s]
|
717 |
93%|ββββββββββ| 9939/10682 [1:25:06<06:32, 1.89it/s]
|
718 |
93%|ββββββββββ| 9940/10682 [1:25:06<06:24, 1.93it/s]
|
719 |
93%|ββββββββββ| 9941/10682 [1:25:07<06:18, 1.96it/s]
|
720 |
93%|ββββββββββ| 9942/10682 [1:25:07<06:14, 1.98it/s]
|
721 |
93%|ββββββββββ| 9943/10682 [1:25:07<06:11, 1.99it/s]
|
722 |
93%|ββββββββββ| 9944/10682 [1:25:08<06:09, 2.00it/s]
|
723 |
93%|ββββββββββ| 9945/10682 [1:25:08<06:07, 2.01it/s]
|
724 |
93%|ββββββββββ| 9946/10682 [1:25:09<06:06, 2.01it/s]
|
725 |
93%|ββββββββββ| 9947/10682 [1:25:09<06:05, 2.01it/s]
|
726 |
93%|ββββββββββ| 9948/10682 [1:25:10<06:04, 2.01it/s]
|
|
|
727 |
|
728 |
93%|ββββββββββ| 9925/10682 [1:24:47<06:22, 1.98it/s]
|
729 |
93%|ββββββββββ| 9926/10682 [1:24:59<50:04, 3.97s/it]
|
730 |
93%|ββββββββββ| 9927/10682 [1:25:00<36:52, 2.93s/it]
|
731 |
93%|ββββββββββ| 9928/10682 [1:25:00<27:37, 2.20s/it]
|
732 |
93%|ββββββββββ| 9929/10682 [1:25:01<21:12, 1.69s/it]
|
733 |
93%|ββββββββββ| 9930/10682 [1:25:01<16:40, 1.33s/it]
|
734 |
93%|ββββββββββ| 9931/10682 [1:25:02<13:30, 1.08s/it]
|
735 |
93%|ββββββββββ| 9932/10682 [1:25:02<11:16, 1.11it/s]
|
736 |
93%|ββββββββββ| 9933/10682 [1:25:03<09:45, 1.28it/s]
|
737 |
93%|ββββββββββ| 9934/10682 [1:25:03<08:41, 1.44it/s]
|
738 |
93%|ββββββββββ| 9935/10682 [1:25:04<07:54, 1.57it/s]
|
739 |
93%|ββββββββββ| 9936/10682 [1:25:04<07:22, 1.69it/s]
|
740 |
93%|ββββββββββ| 9937/10682 [1:25:05<06:59, 1.77it/s]
|
741 |
93%|ββββββββββ| 9938/10682 [1:25:05<06:43, 1.84it/s]
|
742 |
93%|ββββββββββ| 9939/10682 [1:25:06<06:32, 1.89it/s]
|
743 |
93%|ββββββββββ| 9940/10682 [1:25:06<06:24, 1.93it/s]
|
744 |
93%|ββββββββββ| 9941/10682 [1:25:07<06:18, 1.96it/s]
|
745 |
93%|ββββββββββ| 9942/10682 [1:25:07<06:14, 1.98it/s]
|
746 |
93%|ββββββββββ| 9943/10682 [1:25:07<06:11, 1.99it/s]
|
747 |
93%|ββββββββββ| 9944/10682 [1:25:08<06:09, 2.00it/s]
|
748 |
93%|ββββββββββ| 9945/10682 [1:25:08<06:07, 2.01it/s]
|
749 |
93%|ββββββββββ| 9946/10682 [1:25:09<06:06, 2.01it/s]
|
750 |
93%|ββββββββββ| 9947/10682 [1:25:09<06:05, 2.01it/s]
|
751 |
93%|ββββββββββ| 9948/10682 [1:25:10<06:04, 2.01it/s]
|
752 |
93%|ββββββββββ| 9949/10682 [1:25:10<06:03, 2.02it/s]
|
753 |
93%|ββββββββββ| 9950/10682 [1:25:11<06:02, 2.02it/s]{'loss': 2.7842, 'grad_norm': 0.2500942349433899, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.03}
|
|
|
754 |
|
755 |
93%|ββββββββββ| 9950/10682 [1:25:11<06:02, 2.02it/s]
|
756 |
93%|ββββββββββ| 9951/10682 [1:25:11<06:02, 2.02it/s]
|
757 |
93%|ββββββββββ| 9952/10682 [1:25:12<06:00, 2.02it/s]
|
758 |
93%|ββββββββββ| 9953/10682 [1:25:12<06:00, 2.02it/s]
|
759 |
93%|ββββββββββ| 9954/10682 [1:25:13<05:59, 2.03it/s]
|
760 |
93%|ββββββββββ| 9955/10682 [1:25:13<05:59, 2.02it/s]
|
761 |
93%|ββββββββββ| 9956/10682 [1:25:14<05:58, 2.03it/s]
|
762 |
93%|ββββββββββ| 9957/10682 [1:25:14<05:57, 2.03it/s]
|
763 |
93%|ββββββββββ| 9958/10682 [1:25:15<05:57, 2.03it/s]
|
764 |
93%|ββββββββββ| 9959/10682 [1:25:15<05:56, 2.03it/s]
|
765 |
93%|ββββββββββ| 9960/10682 [1:25:16<05:56, 2.03it/s]
|
766 |
93%|ββββββββββ| 9961/10682 [1:25:16<05:55, 2.03it/s]
|
767 |
93%|ββββββββββ| 9962/10682 [1:25:17<05:55, 2.03it/s]
|
768 |
93%|ββββββββββ| 9963/10682 [1:25:17<05:54, 2.03it/s]
|
769 |
93%|ββββββββββ| 9964/10682 [1:25:18<05:53, 2.03it/s]
|
770 |
93%|ββββββββββ| 9965/10682 [1:25:18<05:53, 2.03it/s]
|
771 |
93%|ββββββββββ| 9966/10682 [1:25:19<05:53, 2.03it/s]
|
772 |
93%|ββββββββββ| 9967/10682 [1:25:19<05:52, 2.03it/s]
|
773 |
93%|ββββββββββ| 9968/10682 [1:25:20<05:51, 2.03it/s]
|
774 |
93%|ββββββββββ| 9969/10682 [1:25:20<05:51, 2.03it/s]
|
775 |
93%|ββββββββββ| 9970/10682 [1:25:21<05:50, 2.03it/s]
|
776 |
93%|ββββββββββ| 9971/10682 [1:25:21<05:50, 2.03it/s]
|
777 |
93%|ββββββββββ| 9972/10682 [1:25:22<05:50, 2.03it/s]
|
778 |
93%|ββββββββββ| 9973/10682 [1:25:22<05:49, 2.03it/s]
|
779 |
93%|ββββββββββ| 9974/10682 [1:25:23<05:49, 2.03it/s]
|
780 |
93%|ββββββββββ| 9975/10682 [1:25:23<05:48, 2.03it/s]{'loss': 2.799, 'grad_norm': 0.25120672583580017, 'learning_rate': 1.3287025325307511e-05, 'epoch': 13.06}
|
|
|
781 |
|
782 |
93%|ββββββββββ| 9975/10682 [1:25:23<05:48, 2.03it/s]
|
783 |
93%|ββββββββββ| 9976/10682 [1:25:24<05:48, 2.03it/s]
|
784 |
93%|ββββββββββ| 9977/10682 [1:25:24<05:47, 2.03it/s]
|
785 |
93%|ββββββββββ| 9978/10682 [1:25:25<05:47, 2.03it/s]
|
786 |
93%|ββββββββββ| 9979/10682 [1:25:25<05:46, 2.03it/s]
|
787 |
93%|ββββββββββ| 9980/10682 [1:25:26<05:46, 2.03it/s]
|
788 |
93%|ββββββββββ| 9981/10682 [1:25:26<05:45, 2.03it/s]
|
789 |
93%|ββββββββββ| 9982/10682 [1:25:27<05:45, 2.03it/s]
|
790 |
93%|ββββββββββ| 9983/10682 [1:25:27<05:44, 2.03it/s]
|
791 |
93%|ββββββββββ| 9984/10682 [1:25:28<05:44, 2.03it/s]
|
792 |
93%|ββββββββββ| 9985/10682 [1:25:28<05:44, 2.03it/s]
|
793 |
93%|ββββββββββ| 9986/10682 [1:25:29<05:43, 2.03it/s]
|
794 |
93%|ββββββββββ| 9987/10682 [1:25:29<05:42, 2.03it/s]
|
795 |
94%|ββββββββββ| 9988/10682 [1:25:30<05:42, 2.03it/s]
|
796 |
94%|ββββββββββ| 9989/10682 [1:25:30<05:42, 2.03it/s]
|
797 |
94%|ββββββββββ| 9990/10682 [1:25:31<05:41, 2.03it/s]
|
798 |
94%|ββββββββββ| 9991/10682 [1:25:31<05:41, 2.03it/s]
|
799 |
94%|ββββββββββ| 9992/10682 [1:25:32<05:40, 2.03it/s]
|
800 |
94%|ββββββββββ| 9993/10682 [1:25:32<05:40, 2.02it/s]
|
801 |
94%|ββββββββββ| 9994/10682 [1:25:33<05:39, 2.03it/s]
|
802 |
94%|ββββββββββ| 9995/10682 [1:25:33<05:38, 2.03it/s]
|
803 |
94%|ββββββββββ| 9996/10682 [1:25:34<05:38, 2.02it/s]
|
804 |
94%|ββββββββββ| 9997/10682 [1:25:34<05:37, 2.03it/s]
|
805 |
94%|ββββββββββ| 9998/10682 [1:25:35<05:37, 2.03it/s]
|
806 |
94%|ββββββββββ| 9999/10682 [1:25:35<05:37, 2.03it/s]
|
807 |
94%|ββββββββββ| 10000/10682 [1:25:36<05:36, 2.03it/s]{'loss': 2.8013, 'grad_norm': 0.24948380887508392, 'learning_rate': 1.2367787735873993e-05, 'epoch': 13.1}
|
|
|
808 |
|
809 |
94%|ββββββββββ| 10000/10682 [1:25:36<05:36, 2.03it/s]
|
810 |
94%|ββββββββββ| 10001/10682 [1:25:36<05:36, 2.03it/s]
|
811 |
94%|ββββββββββ| 10002/10682 [1:25:37<05:35, 2.03it/s]
|
812 |
94%|ββββββββββ| 10003/10682 [1:25:37<05:34, 2.03it/s]
|
813 |
94%|ββββββββββ| 10004/10682 [1:25:38<05:34, 2.03it/s]
|
814 |
94%|ββββββββββ| 10005/10682 [1:25:38<05:33, 2.03it/s]
|
815 |
94%|ββββββββββ| 10006/10682 [1:25:39<05:33, 2.03it/s]
|
816 |
94%|ββββββββββ| 10007/10682 [1:25:39<05:33, 2.03it/s]
|
817 |
94%|ββββββββββ| 10008/10682 [1:25:40<05:32, 2.03it/s]
|
818 |
94%|ββββββββββ| 10009/10682 [1:25:40<05:32, 2.03it/s]
|
819 |
94%|ββββββββββ| 10010/10682 [1:25:41<05:31, 2.03it/s]
|
820 |
94%|ββββββββββ| 10011/10682 [1:25:41<05:31, 2.03it/s]
|
821 |
94%|ββββββββββ| 10012/10682 [1:25:42<05:30, 2.03it/s]
|
822 |
94%|ββββββββββ| 10013/10682 [1:25:42<05:30, 2.03it/s]
|
823 |
94%|ββββββββββ| 10014/10682 [1:25:43<05:29, 2.03it/s]
|
824 |
94%|ββββββββββ| 10015/10682 [1:25:43<05:29, 2.03it/s]
|
825 |
94%|ββββββββββ| 10016/10682 [1:25:44<05:28, 2.03it/s]
|
826 |
94%|ββββββββββ| 10017/10682 [1:25:44<05:28, 2.03it/s]
|
827 |
94%|ββββββββββ| 10018/10682 [1:25:45<05:27, 2.02it/s]
|
828 |
94%|ββββββββββ| 10019/10682 [1:25:45<05:27, 2.03it/s]
|
829 |
94%|ββββββββββ| 10020/10682 [1:25:45<05:27, 2.02it/s]
|
830 |
94%|ββββββββββ| 10021/10682 [1:25:46<05:26, 2.02it/s]
|
831 |
94%|ββββββββββ| 10022/10682 [1:25:46<05:25, 2.02it/s]
|
832 |
94%|ββββββββββ| 10023/10682 [1:25:47<05:25, 2.03it/s]
|
833 |
94%|ββββββββββ| 10024/10682 [1:25:47<05:25, 2.02it/s]
|
834 |
94%|ββββββββββ| 10025/10682 [1:25:48<05:24, 2.03it/s]{'loss': 2.8, 'grad_norm': 0.2516091465950012, 'learning_rate': 1.1481100210606388e-05, 'epoch': 13.13}
|
|
|
835 |
|
836 |
94%|ββββββββββ| 10025/10682 [1:25:48<05:24, 2.03it/s]
|
837 |
94%|ββββββββββ| 10026/10682 [1:25:48<05:24, 2.02it/s]
|
838 |
94%|ββββββββββ| 10027/10682 [1:25:49<05:23, 2.03it/s]
|
839 |
94%|ββββββββββ| 10028/10682 [1:25:49<05:22, 2.03it/s]
|
840 |
94%|ββββββββββ| 10029/10682 [1:25:50<05:22, 2.03it/s]
|
841 |
94%|ββββββββββ| 10030/10682 [1:25:50<05:21, 2.03it/s]
|
842 |
94%|ββββββββββ| 10031/10682 [1:25:51<05:21, 2.03it/s]
|
843 |
94%|ββββββββββ| 10032/10682 [1:25:51<05:20, 2.03it/s]
|
844 |
94%|ββββββββββ| 10033/10682 [1:25:52<05:19, 2.03it/s]
|
845 |
94%|ββββββββββ| 10034/10682 [1:25:52<05:19, 2.03it/s]
|
846 |
94%|ββββββββββ| 10035/10682 [1:25:53<05:18, 2.03it/s]
|
847 |
94%|ββββββββββ| 10036/10682 [1:25:53<05:18, 2.03it/s]
|
848 |
94%|ββββββββββ| 10037/10682 [1:25:54<05:17, 2.03it/s]
|
849 |
94%|ββββββββββ| 10038/10682 [1:25:54<05:17, 2.03it/s]
|
850 |
94%|ββββββββββ| 10039/10682 [1:25:55<05:16, 2.03it/s]
|
851 |
94%|ββββββββββ| 10040/10682 [1:25:55<05:16, 2.03it/s]
|
852 |
94%|ββββββββββ| 10041/10682 [1:25:56<05:15, 2.03it/s]
|
853 |
94%|ββββββββββ| 10042/10682 [1:25:56<05:15, 2.03it/s]
|
854 |
94%|ββββββββββ| 10043/10682 [1:25:57<05:15, 2.03it/s]
|
855 |
94%|ββββββββββ| 10044/10682 [1:25:57<05:14, 2.03it/s]
|
856 |
94%|ββββββββββ| 10045/10682 [1:25:58<05:13, 2.03it/s]
|
857 |
94%|ββββββββββ| 10046/10682 [1:25:58<05:13, 2.03it/s]
|
858 |
94%|ββββββββββ| 10047/10682 [1:25:59<05:12, 2.03it/s]
|
859 |
94%|ββββββββββ| 10048/10682 [1:25:59<05:12, 2.03it/s]
|
860 |
94%|ββββββββββ| 10049/10682 [1:26:00<05:11, 2.03it/s]
|
861 |
94%|ββββββββββ| 10050/10682 [1:26:00<05:11, 2.03it/s]{'loss': 2.8031, 'grad_norm': 0.2510456144809723, 'learning_rate': 1.0627021937013704e-05, 'epoch': 13.16}
|
|
|
862 |
|
863 |
94%|ββββββββββ| 10050/10682 [1:26:00<05:11, 2.03it/s]
|
864 |
94%|ββββββββββ| 10051/10682 [1:26:01<05:11, 2.03it/s]
|
865 |
94%|ββββββββββ| 10052/10682 [1:26:01<05:10, 2.03it/s]
|
866 |
94%|ββββββββββ| 10053/10682 [1:26:02<05:10, 2.03it/s]
|
867 |
94%|ββββββββββ| 10054/10682 [1:26:02<05:09, 2.03it/s]
|
868 |
94%|ββββββββββ| 10055/10682 [1:26:03<05:09, 2.03it/s]
|
869 |
94%|ββββββββββ| 10056/10682 [1:26:03<05:08, 2.03it/s]
|
870 |
94%|ββββββββββ| 10057/10682 [1:26:04<05:08, 2.03it/s]
|
871 |
94%|ββββββββββ| 10058/10682 [1:26:04<05:07, 2.03it/s]
|
872 |
94%|ββββββββββ| 10059/10682 [1:26:05<05:07, 2.03it/s]
|
873 |
94%|ββββββββββ| 10060/10682 [1:26:05<05:07, 2.02it/s]
|
874 |
94%|ββββββββββ| 10061/10682 [1:26:06<05:06, 2.03it/s]
|
875 |
94%|ββββββββββ| 10062/10682 [1:26:06<05:05, 2.03it/s]
|
876 |
94%|ββββββββββ| 10063/10682 [1:26:07<05:05, 2.03it/s]
|
877 |
94%|ββββββββββ| 10064/10682 [1:26:07<05:04, 2.03it/s]
|
878 |
94%|ββββββββββ| 10065/10682 [1:26:08<05:04, 2.03it/s]
|
879 |
94%|ββββββββββ| 10066/10682 [1:26:08<05:04, 2.02it/s]
|
880 |
94%|ββββββββββ| 10067/10682 [1:26:09<05:03, 2.03it/s]
|
881 |
94%|ββββββββββ| 10068/10682 [1:26:09<05:03, 2.03it/s]
|
882 |
94%|ββββββββββ| 10069/10682 [1:26:10<05:02, 2.03it/s]
|
883 |
94%|ββββββββββ| 10070/10682 [1:26:10<05:02, 2.03it/s]
|
884 |
94%|ββββββββββ| 10071/10682 [1:26:11<05:01, 2.03it/s]
|
885 |
94%|ββββββββββ| 10072/10682 [1:26:11<05:01, 2.02it/s]
|
886 |
94%|ββββββββββ| 10073/10682 [1:26:12<05:00, 2.03it/s]
|
887 |
94%|ββββββββββ| 10074/10682 [1:26:12<04:59, 2.03it/s]
|
888 |
94%|ββββββββββ| 10075/10682 [1:26:13<04:59, 2.03it/s]{'loss': 2.8029, 'grad_norm': 0.2531677186489105, 'learning_rate': 9.805609925895964e-06, 'epoch': 13.2}
|
|
|
889 |
|
890 |
94%|ββββββββββ| 10075/10682 [1:26:13<04:59, 2.03it/s]
|
891 |
94%|ββββββββββ| 10076/10682 [1:26:13<04:59, 2.03it/s]
|
892 |
94%|ββββββββββ| 10077/10682 [1:26:14<04:58, 2.02it/s]
|
893 |
94%|ββββββββββ| 10078/10682 [1:26:14<04:58, 2.02it/s]
|
894 |
94%|ββββββββββ| 10079/10682 [1:26:15<04:58, 2.02it/s]
|
895 |
94%|ββββββββββ| 10080/10682 [1:26:15<04:57, 2.03it/s]
|
896 |
94%|ββββββββββ| 10081/10682 [1:26:16<04:56, 2.03it/s]
|
897 |
94%|ββββββββββ| 10082/10682 [1:26:16<04:56, 2.03it/s]
|
898 |
94%|ββββββββββ| 10083/10682 [1:26:17<04:55, 2.03it/s]
|
899 |
94%|ββββββββββ| 10084/10682 [1:26:17<04:55, 2.02it/s]
|
900 |
94%|ββββββββββ| 10085/10682 [1:26:18<04:54, 2.02it/s]
|
901 |
94%|ββββββββββ| 10086/10682 [1:26:18<04:54, 2.02it/s]
|
902 |
94%|ββββββββββ| 10087/10682 [1:26:19<04:53, 2.03it/s]
|
903 |
94%|ββββββββββ| 10088/10682 [1:26:19<04:53, 2.02it/s]
|
904 |
94%|ββββββββββ| 10089/10682 [1:26:20<04:53, 2.02it/s]
|
905 |
94%|ββββββββββ| 10090/10682 [1:26:20<04:52, 2.03it/s]
|
906 |
94%|ββββββββββ| 10091/10682 [1:26:21<04:51, 2.03it/s]
|
907 |
94%|ββββββββββ| 10092/10682 [1:26:21<04:51, 2.03it/s]
|
908 |
94%|ββββββββββ| 10093/10682 [1:26:22<04:50, 2.03it/s]
|
909 |
94%|ββββββββββ| 10094/10682 [1:26:22<04:49, 2.03it/s]
|
910 |
95%|ββββββββββ| 10095/10682 [1:26:22<04:49, 2.03it/s]
|
911 |
95%|ββββββββββ| 10096/10682 [1:26:23<04:49, 2.03it/s]
|
912 |
95%|ββββββββββ| 10097/10682 [1:26:23<04:48, 2.03it/s]
|
913 |
95%|ββββββββββ| 10098/10682 [1:26:24<04:48, 2.03it/s]
|
914 |
95%|ββββββββββ| 10099/10682 [1:26:24<04:47, 2.03it/s]
|
915 |
95%|ββββββββββ| 10100/10682 [1:26:25<04:47, 2.03it/s]{'loss': 2.7963, 'grad_norm': 0.24528002738952637, 'learning_rate': 9.01691900753926e-06, 'epoch': 13.23}
|
|
|
916 |
|
917 |
95%|ββββββββββ| 10100/10682 [1:26:25<04:47, 2.03it/s]
|
918 |
95%|ββββββββββ| 10101/10682 [1:26:25<04:47, 2.02it/s]
|
919 |
95%|ββββββββββ| 10102/10682 [1:26:26<04:46, 2.02it/s]
|
920 |
95%|ββββββββββ| 10103/10682 [1:26:26<04:45, 2.02it/s]
|
921 |
95%|ββββββββββ| 10104/10682 [1:26:27<04:45, 2.02it/s]
|
922 |
95%|ββββββββββ| 10105/10682 [1:26:27<04:44, 2.03it/s]
|
923 |
95%|ββββββββββ| 10106/10682 [1:26:28<04:44, 2.03it/s]
|
924 |
95%|ββββββββββ| 10107/10682 [1:26:28<04:43, 2.03it/s]
|
925 |
95%|ββββββββββ| 10108/10682 [1:26:29<04:42, 2.03it/s]
|
926 |
95%|ββββββββββ| 10109/10682 [1:26:29<04:42, 2.03it/s]
|
927 |
95%|ββββββββββ| 10110/10682 [1:26:30<04:42, 2.03it/s]
|
928 |
95%|ββββββββββ| 10111/10682 [1:26:30<04:41, 2.03it/s]
|
929 |
95%|ββββββββββ| 10112/10682 [1:26:31<04:41, 2.03it/s]
|
930 |
95%|ββββββββββ| 10113/10682 [1:26:31<04:40, 2.03it/s]
|
931 |
95%|ββββββββββ| 10114/10682 [1:26:32<04:40, 2.03it/s]
|
932 |
95%|ββββββββββ| 10115/10682 [1:26:32<04:39, 2.03it/s]
|
933 |
95%|ββββββββββ| 10116/10682 [1:26:33<04:39, 2.03it/s]
|
934 |
95%|ββββββββββ| 10117/10682 [1:26:33<04:38, 2.03it/s]
|
935 |
95%|ββββββββββ| 10118/10682 [1:26:34<04:38, 2.03it/s]
|
936 |
95%|ββββββββββ| 10119/10682 [1:26:34<04:37, 2.03it/s]
|
937 |
95%|ββββββββββ| 10120/10682 [1:26:35<04:37, 2.03it/s]
|
938 |
95%|ββββββββββ| 10121/10682 [1:26:35<04:36, 2.03it/s]
|
939 |
95%|ββββββββββ| 10122/10682 [1:26:36<04:35, 2.03it/s]
|
940 |
95%|ββββββββββ| 10123/10682 [1:26:36<04:35, 2.03it/s]
|
941 |
95%|ββββββββββ| 10124/10682 [1:26:37<04:34, 2.03it/s]
|
942 |
95%|ββββββββββ| 10125/10682 [1:26:37<04:34, 2.03it/s]{'loss': 2.7944, 'grad_norm': 0.25175368785858154, 'learning_rate': 8.261001828055447e-06, 'epoch': 13.26}
|
|
|
943 |
|
944 |
95%|ββββββββββ| 10125/10682 [1:26:37<04:34, 2.03it/s]
|
945 |
95%|ββββββββββ| 10126/10682 [1:26:38<04:34, 2.03it/s]
|
946 |
95%|ββββββββββ| 10127/10682 [1:26:38<04:34, 2.03it/s]
|
947 |
95%|ββββββββββ| 10128/10682 [1:26:39<04:33, 2.03it/s]
|
948 |
95%|ββββββββββ| 10129/10682 [1:26:39<04:32, 2.03it/s]
|
949 |
95%|ββββββββββ| 10130/10682 [1:26:40<04:32, 2.03it/s]
|
950 |
95%|ββββββββββ| 10131/10682 [1:26:40<04:31, 2.03it/s]
|
951 |
95%|ββββββββββ| 10132/10682 [1:26:41<04:31, 2.03it/s]
|
952 |
95%|ββββββββββ| 10133/10682 [1:26:41<04:31, 2.02it/s]
|
953 |
95%|ββββββββββ| 10134/10682 [1:26:42<04:30, 2.02it/s]
|
954 |
95%|ββββββββββ| 10135/10682 [1:26:42<04:30, 2.02it/s]
|
955 |
95%|ββββββββββ| 10136/10682 [1:26:43<04:30, 2.02it/s]
|
956 |
95%|ββββββββββ| 10137/10682 [1:26:43<04:29, 2.02it/s]
|
957 |
95%|ββββββββββ| 10138/10682 [1:26:44<04:28, 2.02it/s]
|
958 |
95%|ββββββββββ| 10139/10682 [1:26:44<04:28, 2.02it/s]
|
959 |
95%|ββββββββββ| 10140/10682 [1:26:45<04:27, 2.02it/s]
|
960 |
95%|ββββββββββ| 10141/10682 [1:26:45<04:27, 2.03it/s]
|
961 |
95%|ββββββββββ| 10142/10682 [1:26:46<04:26, 2.02it/s]
|
962 |
95%|ββββββββββ| 10143/10682 [1:26:46<04:26, 2.02it/s]
|
963 |
95%|ββββββββββ| 10144/10682 [1:26:47<04:25, 2.03it/s]
|
964 |
95%|ββββββββββ| 10145/10682 [1:26:47<04:25, 2.03it/s]
|
965 |
95%|ββββββββββ| 10146/10682 [1:26:48<04:24, 2.03it/s]
|
966 |
95%|ββββββββββ| 10147/10682 [1:26:48<04:23, 2.03it/s]
|
967 |
95%|ββββββββββ| 10148/10682 [1:26:49<04:23, 2.03it/s]
|
968 |
95%|ββββββββββ| 10149/10682 [1:26:49<04:22, 2.03it/s]
|
969 |
95%|ββββββββββ| 10150/10682 [1:26:50<04:22, 2.03it/s]{'loss': 2.8061, 'grad_norm': 0.2488679438829422, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.29}
|
|
|
970 |
|
971 |
95%|ββββββββββ| 10150/10682 [1:26:50<04:22, 2.03it/s]
|
972 |
95%|ββββββββββ| 10151/10682 [1:26:50<04:22, 2.02it/s]
|
973 |
95%|ββββββββββ| 10152/10682 [1:26:51<04:22, 2.02it/s]
|
974 |
95%|ββββββββββ| 10153/10682 [1:26:51<04:21, 2.03it/s]
|
975 |
95%|ββββββββββ| 10154/10682 [1:26:52<04:43, 1.86it/s]
|
976 |
95%|ββββββββββ| 10155/10682 [1:26:52<04:35, 1.91it/s]
|
977 |
95%|ββββββββββ| 10156/10682 [1:26:53<04:30, 1.94it/s]
|
978 |
95%|ββββββββββ| 10157/10682 [1:26:53<04:26, 1.97it/s]
|
979 |
95%|ββββββββββ| 10158/10682 [1:26:54<04:23, 1.99it/s]
|
980 |
95%|ββββββββββ| 10159/10682 [1:26:54<04:21, 2.00it/s]
|
981 |
95%|ββββββββββ| 10160/10682 [1:26:55<04:20, 2.00it/s]
|
982 |
95%|ββββββββββ| 10161/10682 [1:26:55<04:19, 2.01it/s]
|
983 |
95%|ββββββββββ| 10162/10682 [1:26:56<04:18, 2.01it/s]
|
984 |
95%|ββββββββββ| 10163/10682 [1:26:56<04:17, 2.02it/s]
|
985 |
95%|ββββββββββ| 10164/10682 [1:26:57<04:16, 2.02it/s]
|
986 |
95%|ββββββββββ| 10165/10682 [1:26:57<04:15, 2.02it/s]
|
987 |
95%|ββββββββββ| 10166/10682 [1:26:58<04:15, 2.02it/s]
|
988 |
95%|ββββββββββ| 10167/10682 [1:26:58<04:14, 2.02it/s]
|
989 |
95%|ββββββββββ| 10168/10682 [1:26:59<04:14, 2.02it/s]
|
990 |
95%|ββββββββββ| 10169/10682 [1:26:59<04:13, 2.02it/s]
|
991 |
95%|ββββββββββ| 10170/10682 [1:27:00<04:12, 2.03it/s]
|
992 |
95%|ββββββββββ| 10171/10682 [1:27:00<04:12, 2.03it/s]
|
993 |
95%|ββββββββββ| 10172/10682 [1:27:01<04:11, 2.03it/s]
|
994 |
95%|ββββββββββ| 10173/10682 [1:27:01<04:11, 2.03it/s]
|
995 |
95%|ββββββββββ| 10174/10682 [1:27:02<04:10, 2.03it/s]
|
996 |
95%|ββββββββββ| 10175/10682 [1:27:02<04:10, 2.03it/s]{'loss': 2.8033, 'grad_norm': 0.2522995173931122, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.33}
|
|
|
997 |
|
998 |
95%|ββββββββββ| 10175/10682 [1:27:02<04:10, 2.03it/s]
|
999 |
95%|ββββββββββ| 10176/10682 [1:27:03<04:10, 2.02it/s]
|
1000 |
95%|ββββββββββ| 10177/10682 [1:27:03<04:09, 2.02it/s]
|
1001 |
95%|ββββββββββ| 10178/10682 [1:27:04<04:09, 2.02it/s]
|
1002 |
95%|ββββββββββ| 10179/10682 [1:27:04<04:08, 2.02it/s]
|
1003 |
95%|ββββββββββ| 10180/10682 [1:27:05<04:07, 2.03it/s]
|
1004 |
95%|ββββββββββ| 10181/10682 [1:27:05<04:07, 2.03it/s]
|
1005 |
95%|ββββββββββ| 10182/10682 [1:27:06<04:06, 2.03it/s]
|
1006 |
95%|ββββββββββ| 10183/10682 [1:27:06<04:06, 2.02it/s]
|
1007 |
95%|ββββββββββ| 10184/10682 [1:27:07<04:05, 2.03it/s]
|
1008 |
95%|ββββββββββ| 10185/10682 [1:27:07<04:05, 2.03it/s]
|
1009 |
95%|ββββββββββ| 10186/10682 [1:27:08<04:04, 2.03it/s]
|
1010 |
95%|ββββββββββ| 10187/10682 [1:27:08<04:04, 2.03it/s]
|
1011 |
95%|ββββββββββ| 10188/10682 [1:27:09<04:03, 2.03it/s]
|
1012 |
95%|ββββββββββ| 10189/10682 [1:27:09<04:03, 2.03it/s]
|
1013 |
95%|ββββββββββ| 10190/10682 [1:27:10<04:02, 2.03it/s]
|
1014 |
95%|ββββββββββ| 10191/10682 [1:27:10<04:02, 2.02it/s]
|
1015 |
95%|ββββββββββ| 10192/10682 [1:27:11<04:01, 2.03it/s]
|
1016 |
95%|ββββββββββ| 10193/10682 [1:27:11<04:01, 2.02it/s]
|
1017 |
95%|ββββββββββ| 10194/10682 [1:27:12<04:00, 2.03it/s]
|
1018 |
95%|ββββββββββ| 10195/10682 [1:27:12<04:01, 2.02it/s]
|
1019 |
95%|ββββββββββ| 10196/10682 [1:27:13<04:00, 2.02it/s]
|
1020 |
95%|ββββββββββ| 10197/10682 [1:27:13<04:00, 2.02it/s]
|
1021 |
95%|ββββββββββ| 10198/10682 [1:27:13<03:59, 2.02it/s]
|
1022 |
95%|ββββββββββ| 10199/10682 [1:27:14<03:58, 2.02it/s]
|
1023 |
95%|ββββββββββ| 10200/10682 [1:27:14<03:58, 2.02it/s]{'loss': 2.799, 'grad_norm': 0.2519816756248474, 'learning_rate': 6.190386348572108e-06, 'epoch': 13.36}
|
|
|
1024 |
|
1025 |
95%|ββββββββββ| 10200/10682 [1:27:14<03:58, 2.02it/s]
|
1026 |
95%|ββββββββββ| 10201/10682 [1:27:15<03:57, 2.02it/s]
|
1027 |
96%|ββββββββββ| 10202/10682 [1:27:15<03:56, 2.03it/s]
|
1028 |
96%|ββββββββββ| 10203/10682 [1:27:16<03:56, 2.03it/s]
|
1029 |
96%|ββββββββββ| 10204/10682 [1:27:16<03:55, 2.03it/s]
|
1030 |
96%|ββββββββββ| 10205/10682 [1:27:17<03:55, 2.03it/s]
|
1031 |
96%|ββββββββββ| 10206/10682 [1:27:17<03:54, 2.03it/s]
|
1032 |
96%|ββββββββββ| 10207/10682 [1:27:18<03:54, 2.03it/s]
|
1033 |
96%|ββββββββββ| 10208/10682 [1:27:18<03:54, 2.03it/s]
|
1034 |
96%|ββββββββββ| 10209/10682 [1:27:19<03:53, 2.02it/s]
|
1035 |
96%|ββββββββββ| 10210/10682 [1:27:19<03:52, 2.03it/s]
|
1036 |
96%|ββββββββββ| 10211/10682 [1:27:20<03:52, 2.03it/s]
|
1037 |
96%|ββββββββββ| 10212/10682 [1:27:20<03:51, 2.03it/s]
|
1038 |
96%|ββββββββββ| 10213/10682 [1:27:21<03:51, 2.03it/s]
|
1039 |
96%|ββββββββββ| 10214/10682 [1:27:21<03:50, 2.03it/s]
|
1040 |
96%|ββββββββββ| 10215/10682 [1:27:22<03:50, 2.03it/s]
|
1041 |
96%|ββββββββββ| 10216/10682 [1:27:22<03:49, 2.03it/s]
|
1042 |
96%|ββββββββββ| 10217/10682 [1:27:23<03:49, 2.03it/s]
|
1043 |
96%|ββββββββββ| 10218/10682 [1:27:23<03:48, 2.03it/s]
|
1044 |
96%|ββββββββββ| 10219/10682 [1:27:24<03:48, 2.03it/s]
|
1045 |
96%|ββββββββββ| 10220/10682 [1:27:24<03:47, 2.03it/s]
|
1046 |
96%|ββββββββββ| 10221/10682 [1:27:25<03:47, 2.03it/s]
|
1047 |
96%|ββββββββββ| 10222/10682 [1:27:25<03:46, 2.03it/s]
|
1048 |
96%|ββββββββββ| 10223/10682 [1:27:26<03:46, 2.03it/s]
|
1049 |
96%|ββββββββββ| 10224/10682 [1:27:26<03:45, 2.03it/s]
|
1050 |
96%|ββββββββββ| 10225/10682 [1:27:27<03:45, 2.03it/s]{'loss': 2.7962, 'grad_norm': 0.2496296763420105, 'learning_rate': 5.56604678228706e-06, 'epoch': 13.39}
|
|
|
1051 |
|
1052 |
96%|ββββββββββ| 10225/10682 [1:27:27<03:45, 2.03it/s]
|
1053 |
96%|ββββββββββ| 10226/10682 [1:27:27<03:44, 2.03it/s]
|
1054 |
96%|ββββββββββ| 10227/10682 [1:27:28<03:44, 2.03it/s]
|
1055 |
96%|ββββββββββ| 10228/10682 [1:27:28<03:43, 2.03it/s]
|
1056 |
96%|ββββββββββ| 10229/10682 [1:27:29<03:43, 2.03it/s]
|
1057 |
96%|ββββββββββ| 10230/10682 [1:27:29<03:42, 2.03it/s]
|
1058 |
96%|ββββββββββ| 10231/10682 [1:27:30<03:42, 2.03it/s]
|
1059 |
96%|ββββββββββ| 10232/10682 [1:27:30<03:41, 2.03it/s]
|
1060 |
96%|ββββββββββ| 10233/10682 [1:27:31<03:41, 2.03it/s]
|
1061 |
96%|ββββββββββ| 10234/10682 [1:27:31<03:41, 2.03it/s]
|
1062 |
96%|ββββββββββ| 10235/10682 [1:27:32<03:40, 2.03it/s]
|
1063 |
96%|ββββββββββ| 10236/10682 [1:27:32<03:39, 2.03it/s]
|
1064 |
96%|ββββββββββ| 10237/10682 [1:27:33<03:39, 2.03it/s]
|
1065 |
96%|ββββββββββ| 10238/10682 [1:27:33<03:39, 2.03it/s]
|
1066 |
96%|ββββββββββ| 10239/10682 [1:27:34<03:38, 2.03it/s]
|
1067 |
96%|ββββββββββ| 10240/10682 [1:27:34<03:38, 2.03it/s]
|
1068 |
96%|ββββββββββ| 10241/10682 [1:27:35<03:37, 2.03it/s]
|
1069 |
96%|ββββββββββ| 10242/10682 [1:27:35<03:37, 2.03it/s]
|
1070 |
96%|ββββββββββ| 10243/10682 [1:27:36<03:36, 2.03it/s]
|
1071 |
96%|ββββββββββ| 10244/10682 [1:27:36<03:36, 2.03it/s]
|
1072 |
96%|ββββββββββ| 10245/10682 [1:27:37<03:35, 2.03it/s]
|
1073 |
96%|ββββββββββ| 10246/10682 [1:27:37<03:35, 2.03it/s]
|
1074 |
96%|ββββββββββ| 10247/10682 [1:27:38<03:34, 2.03it/s]
|
1075 |
96%|ββββββββββ| 10248/10682 [1:27:38<03:34, 2.02it/s]
|
1076 |
96%|ββββββββββ| 10249/10682 [1:27:39<03:33, 2.03it/s]
|
1077 |
96%|ββββββββββ| 10250/10682 [1:27:39<03:33, 2.03it/s]{'loss': 2.7955, 'grad_norm': 0.24875780940055847, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
1078 |
|
|
|
1079 |
96%|ββββββββββ| 10250/10682 [1:27:39<03:33, 2.03it/s]
|
1080 |
96%|ββββββββββ| 10251/10682 [1:27:40<03:32, 2.02it/s]
|
1081 |
96%|ββββββββββ| 10252/10682 [1:27:40<03:32, 2.03it/s]
|
1082 |
96%|ββββββββββ| 10253/10682 [1:27:41<03:49, 1.87it/s]
|
1083 |
96%|ββββββββββ| 10254/10682 [1:27:41<03:43, 1.91it/s]
|
1084 |
96%|ββββββββββ| 10255/10682 [1:27:42<03:39, 1.94it/s]
|
1085 |
96%|ββββββββββ| 10256/10682 [1:27:42<03:36, 1.97it/s]
|
1086 |
96%|ββββββββββ| 10257/10682 [1:27:43<03:33, 1.99it/s]
|
1087 |
96%|ββββββββββ| 10258/10682 [1:27:43<03:32, 2.00it/s]
|
1088 |
96%|ββββββββββ| 10259/10682 [1:27:44<03:30, 2.01it/s]
|
1089 |
96%|ββββββββββ| 10260/10682 [1:27:44<03:29, 2.01it/s]
|
1090 |
96%|ββββββββββ| 10261/10682 [1:27:45<03:28, 2.02it/s]
|
1091 |
96%|ββββββββββ| 10262/10682 [1:27:45<03:28, 2.02it/s]
|
1092 |
96%|ββββββββββ| 10263/10682 [1:27:46<03:27, 2.02it/s]
|
1093 |
96%|ββββββββββ| 10264/10682 [1:27:46<03:26, 2.02it/s]
|
1094 |
96%|ββββββββββ| 10265/10682 [1:27:47<03:26, 2.02it/s]
|
1095 |
96%|ββββββββββ| 10266/10682 [1:27:47<03:25, 2.03it/s]
|
1096 |
96%|ββββββββββ| 10267/10682 [1:27:48<03:24, 2.03it/s]
|
1097 |
96%|ββββββββββ| 10268/10682 [1:27:48<03:24, 2.03it/s]
|
1098 |
96%|ββββββββββ| 10269/10682 [1:27:49<03:23, 2.03it/s]
|
1099 |
96%|ββββββββββ| 10270/10682 [1:27:49<03:23, 2.03it/s]
|
1100 |
96%|ββββββββββ| 10271/10682 [1:27:50<03:22, 2.03it/s]
|
1101 |
96%|ββββββββββ| 10272/10682 [1:27:50<03:22, 2.03it/s]
|
1102 |
96%|ββββββββββ| 10273/10682 [1:27:51<03:21, 2.03it/s]
|
1103 |
96%|ββββββββββ| 10274/10682 [1:27:51<03:21, 2.03it/s]
|
1104 |
96%|ββββββββββ| 10275/10682 [1:27:52<03:20, 2.03it/s]{'loss': 2.8098, 'grad_norm': 0.25310125946998596, 'learning_rate': 4.416419388921844e-06, 'epoch': 13.46}
|
|
|
1105 |
|
1106 |
96%|ββββββββββ| 10275/10682 [1:27:52<03:20, 2.03it/s]
|
1107 |
96%|ββββββββββ| 10276/10682 [1:27:52<03:20, 2.02it/s]
|
1108 |
96%|ββββββββββ| 10277/10682 [1:27:53<03:20, 2.02it/s]
|
1109 |
96%|ββββββββββ| 10278/10682 [1:27:53<03:19, 2.02it/s]
|
1110 |
96%|ββββββββββ| 10279/10682 [1:27:54<03:18, 2.03it/s]
|
1111 |
96%|ββββββββββ| 10280/10682 [1:27:54<03:18, 2.02it/s]
|
1112 |
96%|ββββββββββ| 10281/10682 [1:27:55<03:18, 2.02it/s]
|
1113 |
96%|ββββββββββ| 10282/10682 [1:27:55<03:17, 2.03it/s]
|
1114 |
96%|ββββββββββ| 10283/10682 [1:27:56<03:16, 2.03it/s]
|
1115 |
96%|ββββββββββ| 10284/10682 [1:27:56<03:16, 2.03it/s]
|
1116 |
96%|ββββββββββ| 10285/10682 [1:27:57<03:15, 2.03it/s]
|
1117 |
96%|ββββββββββ| 10286/10682 [1:27:57<03:15, 2.03it/s]
|
1118 |
96%|ββββββββββ| 10287/10682 [1:27:58<03:14, 2.03it/s]
|
1119 |
96%|ββββββββββ| 10288/10682 [1:27:58<03:14, 2.03it/s]
|
1120 |
96%|ββββββββββ| 10289/10682 [1:27:59<03:13, 2.03it/s]
|
1121 |
96%|ββββββββββ| 10290/10682 [1:27:59<03:13, 2.03it/s]
|
1122 |
96%|ββββββββββ| 10291/10682 [1:28:00<03:13, 2.03it/s]
|
1123 |
96%|ββββββββββ| 10292/10682 [1:28:00<03:12, 2.03it/s]
|
1124 |
96%|ββββββββββ| 10293/10682 [1:28:01<03:11, 2.03it/s]
|
1125 |
96%|ββββββββββ| 10294/10682 [1:28:01<03:11, 2.02it/s]
|
1126 |
96%|ββββββββββ| 10295/10682 [1:28:01<03:10, 2.03it/s]
|
1127 |
96%|ββββββββββ| 10296/10682 [1:28:02<03:10, 2.03it/s]
|
1128 |
96%|ββββββββββ| 10297/10682 [1:28:02<03:09, 2.03it/s]
|
1129 |
96%|ββββββββββ| 10298/10682 [1:28:03<03:09, 2.03it/s]
|
1130 |
96%|ββββββββββ| 10299/10682 [1:28:03<03:09, 2.03it/s]
|
1131 |
96%|ββββββββββ| 10300/10682 [1:28:04<03:08, 2.03it/s]
|
1132 |
|
|
|
1133 |
96%|ββββββββββ| 10300/10682 [1:28:04<03:08, 2.03it/s]
|
1134 |
96%|ββββββββββ| 10301/10682 [1:28:04<03:08, 2.02it/s]
|
1135 |
96%|ββββββββββ| 10302/10682 [1:28:05<03:08, 2.02it/s]
|
1136 |
96%|ββββββββββ| 10303/10682 [1:28:05<03:07, 2.02it/s]
|
1137 |
96%|ββββββββββ| 10304/10682 [1:28:06<03:06, 2.02it/s]
|
1138 |
96%|ββββββββββ| 10305/10682 [1:28:06<03:06, 2.03it/s]
|
1139 |
96%|ββββββββββ| 10306/10682 [1:28:07<03:05, 2.03it/s]
|
1140 |
96%|ββββββββββ| 10307/10682 [1:28:07<03:05, 2.03it/s]
|
1141 |
96%|ββββββββββ| 10308/10682 [1:28:08<03:04, 2.03it/s]
|
1142 |
97%|ββββββββββ| 10309/10682 [1:28:08<03:04, 2.03it/s]
|
1143 |
97%|ββββββββββ| 10310/10682 [1:28:09<03:03, 2.03it/s]
|
1144 |
97%|ββββββββββ| 10311/10682 [1:28:09<03:03, 2.03it/s]
|
1145 |
97%|ββββββββββ| 10312/10682 [1:28:10<03:02, 2.03it/s]
|
1146 |
97%|ββββββββββ| 10313/10682 [1:28:10<03:02, 2.03it/s]
|
1147 |
97%|ββββββββββ| 10314/10682 [1:28:11<03:01, 2.03it/s]
|
1148 |
97%|ββββββββββ| 10315/10682 [1:28:11<03:01, 2.03it/s]
|
1149 |
97%|ββββββββββ| 10316/10682 [1:28:12<03:00, 2.02it/s]
|
1150 |
97%|ββββββββββ| 10317/10682 [1:28:12<03:00, 2.03it/s]
|
1151 |
97%|ββββββββββ| 10318/10682 [1:28:13<02:59, 2.03it/s]
|
1152 |
97%|ββββββββββ| 10319/10682 [1:28:13<02:59, 2.03it/s]
|
1153 |
97%|ββββββββββ| 10320/10682 [1:28:14<02:58, 2.03it/s]
|
1154 |
97%|ββββββββββ| 10321/10682 [1:28:14<02:59, 2.01it/s]
|
1155 |
97%|ββββββββββ| 10322/10682 [1:28:15<02:58, 2.02it/s]
|
1156 |
97%|ββββββββββ| 10323/10682 [1:28:15<02:57, 2.02it/s]
|
1157 |
97%|ββββββββββ| 10324/10682 [1:28:16<02:57, 2.02it/s]
|
1158 |
97%|ββββββββββ| 10325/10682 [1:28:16<02:56, 2.03it/s]
|
1159 |
{'loss': 2.8013, 'grad_norm': 0.2537311613559723, 'learning_rate': 3.3991130994299734e-06, 'epoch': 13.52}
|
|
|
1160 |
97%|ββββββββββ| 10325/10682 [1:28:16<02:56, 2.03it/s]
|
1161 |
97%|ββββββββββ| 10326/10682 [1:28:17<02:56, 2.02it/s]
|
1162 |
97%|ββββββββββ| 10327/10682 [1:28:17<02:55, 2.02it/s]
|
1163 |
97%|ββββββββββ| 10328/10682 [1:28:18<02:54, 2.03it/s]
|
1164 |
97%|ββββββββββ| 10329/10682 [1:28:18<02:54, 2.02it/s]
|
1165 |
97%|ββββββββββ| 10330/10682 [1:28:19<02:53, 2.02it/s]
|
1166 |
97%|ββββββββββ| 10331/10682 [1:28:19<02:53, 2.02it/s]
|
1167 |
97%|ββββββββββ| 10332/10682 [1:28:20<02:52, 2.03it/s]
|
1168 |
97%|ββββββββββ| 10333/10682 [1:28:20<02:52, 2.02it/s]
|
1169 |
97%|ββββββββββ| 10334/10682 [1:28:21<02:51, 2.03it/s]
|
1170 |
97%|ββββββββββ| 10335/10682 [1:28:21<02:51, 2.02it/s]
|
1171 |
97%|ββββββββββ| 10336/10682 [1:28:22<02:50, 2.02it/s]
|
1172 |
97%|ββββββββββ| 10337/10682 [1:28:22<02:50, 2.02it/s]
|
1173 |
97%|ββββββββββ| 10338/10682 [1:28:23<02:49, 2.02it/s]
|
1174 |
97%|ββββββββββ| 10339/10682 [1:28:23<02:49, 2.02it/s]
|
1175 |
97%|ββββββββββ| 10340/10682 [1:28:24<02:48, 2.03it/s]
|
1176 |
97%|ββββββββββ| 10341/10682 [1:28:24<02:48, 2.02it/s]
|
1177 |
97%|ββββββββββ| 10342/10682 [1:28:25<02:47, 2.03it/s]
|
1178 |
97%|ββββββββββ| 10343/10682 [1:28:25<02:47, 2.03it/s]
|
1179 |
97%|ββββββββββ| 10344/10682 [1:28:26<02:46, 2.03it/s]
|
1180 |
97%|ββββββββββ| 10345/10682 [1:28:26<02:46, 2.03it/s]
|
1181 |
97%|ββββββββββ| 10346/10682 [1:28:27<02:45, 2.03it/s]
|
1182 |
97%|ββββββββββ| 10347/10682 [1:28:27<02:45, 2.03it/s]
|
1183 |
97%|ββββββββββ| 10348/10682 [1:28:28<02:44, 2.03it/s]
|
1184 |
97%|ββββββββββ| 10349/10682 [1:28:28<02:44, 2.03it/s]
|
1185 |
97%|ββββββββββ| 10350/10682 [1:28:29<02:43, 2.03it/s]{'loss': 2.8039, 'grad_norm': 0.24740473926067352, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
|
|
1186 |
|
1187 |
97%|ββββββββββ| 10350/10682 [1:28:29<02:43, 2.03it/s]
|
1188 |
97%|ββββββββββ| 10351/10682 [1:28:29<02:43, 2.03it/s]
|
1189 |
97%|ββββββββββ| 10352/10682 [1:28:30<02:42, 2.03it/s]
|
1190 |
97%|ββββββββββ| 10353/10682 [1:28:30<02:42, 2.03it/s]
|
1191 |
97%|ββββββββββ| 10354/10682 [1:28:31<02:41, 2.03it/s]
|
1192 |
97%|ββββββββββ| 10355/10682 [1:28:31<02:41, 2.03it/s]
|
1193 |
97%|ββββββββββ| 10356/10682 [1:28:32<02:40, 2.03it/s]
|
1194 |
97%|ββββββββββ| 10357/10682 [1:28:32<02:40, 2.03it/s]
|
1195 |
97%|ββββββββββ| 10358/10682 [1:28:33<02:39, 2.03it/s]
|
1196 |
97%|ββββββββββ| 10359/10682 [1:28:33<02:39, 2.03it/s]
|
1197 |
97%|ββββββββββ| 10360/10682 [1:28:34<02:38, 2.03it/s]
|
1198 |
97%|ββββββββββ| 10361/10682 [1:28:34<02:38, 2.03it/s]
|
1199 |
97%|ββββββββββ| 10362/10682 [1:28:35<02:37, 2.03it/s]
|
1200 |
97%|ββββββββββ| 10363/10682 [1:28:35<02:37, 2.03it/s]
|
1201 |
97%|ββββββββββ| 10364/10682 [1:28:36<02:36, 2.03it/s]
|
1202 |
97%|ββββββββββ| 10365/10682 [1:28:36<02:36, 2.02it/s]
|
1203 |
97%|ββββββββββ| 10366/10682 [1:28:37<02:35, 2.03it/s]
|
1204 |
97%|ββββββββββ| 10367/10682 [1:28:37<02:35, 2.02it/s]
|
1205 |
97%|ββββββββββ| 10368/10682 [1:28:38<02:34, 2.03it/s]
|
1206 |
97%|ββββββββββ| 10369/10682 [1:28:38<02:34, 2.03it/s]
|
1207 |
97%|ββββββββββ| 10370/10682 [1:28:39<02:34, 2.02it/s]
|
1208 |
97%|ββββββββββ| 10371/10682 [1:28:39<02:33, 2.03it/s]
|
1209 |
97%|ββββββββββ| 10372/10682 [1:28:40<02:33, 2.03it/s]
|
1210 |
97%|ββββββββββ| 10373/10682 [1:28:40<02:32, 2.03it/s]
|
1211 |
97%|ββββββββββ| 10374/10682 [1:28:40<02:31, 2.03it/s]
|
1212 |
97%|ββββββββββ| 10375/10682 [1:28:41<02:31, 2.03it/s]{'loss': 2.7982, 'grad_norm': 0.25136321783065796, 'learning_rate': 2.5143995351817882e-06, 'epoch': 13.59}
|
|
|
1213 |
|
1214 |
97%|ββββββββββ| 10375/10682 [1:28:41<02:31, 2.03it/s]
|
1215 |
97%|ββββββββββ| 10376/10682 [1:28:41<02:31, 2.02it/s]
|
1216 |
97%|ββββββββββ| 10377/10682 [1:28:42<02:30, 2.02it/s]
|
1217 |
97%|ββββββββββ| 10378/10682 [1:28:42<02:30, 2.02it/s]
|
1218 |
97%|ββββββββββ| 10379/10682 [1:28:43<02:29, 2.02it/s]
|
1219 |
97%|ββββββββββ| 10380/10682 [1:28:43<02:29, 2.02it/s]
|
1220 |
97%|ββββββββββ| 10381/10682 [1:28:44<02:28, 2.03it/s]
|
1221 |
97%|ββββββββββ| 10382/10682 [1:28:44<02:28, 2.03it/s]
|
1222 |
97%|ββββββββββ| 10383/10682 [1:28:45<02:27, 2.03it/s]
|
1223 |
97%|ββββββββββ| 10384/10682 [1:28:45<02:26, 2.03it/s]
|
1224 |
97%|ββββββββββ| 10385/10682 [1:28:46<02:26, 2.03it/s]
|
1225 |
97%|ββββββββββ| 10386/10682 [1:28:46<02:25, 2.03it/s]
|
1226 |
97%|ββββββββββ| 10387/10682 [1:28:47<02:25, 2.03it/s]
|
1227 |
97%|ββββββββββ| 10388/10682 [1:28:47<02:24, 2.03it/s]
|
1228 |
97%|ββββββββββ| 10389/10682 [1:28:48<02:24, 2.03it/s]
|
1229 |
97%|ββββββββββ| 10390/10682 [1:28:48<02:24, 2.03it/s]
|
1230 |
97%|ββββββββββ| 10391/10682 [1:28:49<02:23, 2.03it/s]
|
1231 |
97%|ββββββββββ| 10392/10682 [1:28:49<02:23, 2.03it/s]
|
1232 |
97%|ββββββββββ| 10393/10682 [1:28:50<02:22, 2.03it/s]
|
1233 |
97%|ββββββββββ| 10394/10682 [1:28:50<02:22, 2.03it/s]
|
1234 |
97%|ββββββββββ| 10395/10682 [1:28:51<02:21, 2.03it/s]
|
1235 |
97%|ββββββββββ| 10396/10682 [1:28:51<02:20, 2.03it/s]
|
1236 |
97%|ββββββββββ| 10397/10682 [1:28:52<02:20, 2.03it/s]
|
1237 |
97%|ββββββββββ| 10398/10682 [1:28:52<02:19, 2.03it/s]
|
1238 |
97%|ββββββββββ| 10399/10682 [1:28:53<02:19, 2.03it/s]
|
1239 |
97%|ββββββββββ| 10400/10682 [1:28:53<02:19, 2.03it/s]{'loss': 2.7831, 'grad_norm': 0.24997153878211975, 'learning_rate': 2.1218402281655835e-06, 'epoch': 13.62}
|
1240 |
|
|
|
1241 |
97%|ββββββββββ| 10400/10682 [1:28:53<02:19, 2.03it/s]
|
1242 |
97%|ββββββββββ| 10401/10682 [1:28:54<02:18, 2.02it/s]
|
1243 |
97%|ββββββββββ| 10402/10682 [1:28:54<02:18, 2.03it/s]
|
1244 |
97%|ββββββββββ| 10403/10682 [1:28:55<02:17, 2.03it/s]
|
1245 |
97%|ββββββββββ| 10404/10682 [1:28:55<02:17, 2.03it/s]
|
1246 |
97%|ββββββββββ| 10405/10682 [1:28:56<02:16, 2.03it/s]
|
1247 |
97%|ββββββββββ| 10406/10682 [1:28:56<02:16, 2.03it/s]
|
1248 |
97%|ββββββββββ| 10407/10682 [1:28:57<02:15, 2.03it/s]
|
1249 |
97%|ββββββββββ| 10408/10682 [1:28:57<02:15, 2.03it/s]
|
1250 |
97%|ββββββββββ| 10409/10682 [1:28:58<02:14, 2.03it/s]
|
1251 |
97%|ββββββββββ| 10410/10682 [1:28:58<02:13, 2.03it/s]
|
1252 |
97%|ββββββββββ| 10411/10682 [1:28:59<02:13, 2.03it/s]
|
1253 |
97%|ββββββββββ| 10412/10682 [1:28:59<02:12, 2.03it/s]
|
1254 |
97%|ββββββββββ| 10413/10682 [1:29:00<02:12, 2.03it/s]
|
1255 |
97%|ββββββββββ| 10414/10682 [1:29:00<02:11, 2.03it/s]
|
1256 |
98%|ββββββββββ| 10415/10682 [1:29:01<02:11, 2.03it/s]
|
1257 |
98%|ββββββββββ| 10416/10682 [1:29:01<02:10, 2.03it/s]
|
1258 |
98%|ββββββββββ| 10417/10682 [1:29:02<02:10, 2.03it/s]
|
1259 |
98%|ββββββββββ| 10418/10682 [1:29:02<02:09, 2.03it/s]
|
1260 |
98%|ββββββββββ| 10419/10682 [1:29:03<02:09, 2.03it/s]
|
1261 |
98%|ββββββββββ| 10420/10682 [1:29:03<02:08, 2.03it/s]
|
1262 |
98%|ββββββββββ| 10421/10682 [1:29:04<02:08, 2.03it/s]
|
1263 |
98%|ββββββββββ| 10422/10682 [1:29:04<02:07, 2.03it/s]
|
1264 |
98%|ββββββββββ| 10423/10682 [1:29:05<02:07, 2.03it/s]
|
1265 |
98%|ββββββββββ| 10424/10682 [1:29:05<02:06, 2.04it/s]
|
1266 |
98%|ββββββββββ| 10425/10682 [1:29:06<02:06, 2.03it/s]
|
1267 |
|
|
|
1268 |
98%|ββββββββββ| 10425/10682 [1:29:06<02:06, 2.03it/s]
|
1269 |
98%|ββββββββββ| 10426/10682 [1:29:06<02:06, 2.03it/s]
|
1270 |
98%|ββββββββββ| 10427/10682 [1:29:07<02:05, 2.03it/s]
|
1271 |
98%|ββββββββββ| 10428/10682 [1:29:07<02:05, 2.03it/s]
|
1272 |
98%|ββββββββββ| 10429/10682 [1:29:08<02:04, 2.03it/s]
|
1273 |
98%|ββββββββββ| 10430/10682 [1:29:08<02:04, 2.03it/s]
|
1274 |
98%|ββββββββββ| 10431/10682 [1:29:09<02:03, 2.03it/s]
|
1275 |
98%|ββββββββββ| 10432/10682 [1:29:09<02:03, 2.03it/s]
|
1276 |
98%|ββββββββββ| 10433/10682 [1:29:10<02:02, 2.03it/s]
|
1277 |
98%|ββββββββββ| 10434/10682 [1:29:10<02:02, 2.03it/s]
|
1278 |
98%|ββββββββββ| 10435/10682 [1:29:11<02:01, 2.03it/s]
|
1279 |
98%|ββββββββββ| 10436/10682 [1:29:11<02:01, 2.03it/s]
|
1280 |
98%|ββββββββββ| 10437/10682 [1:29:12<02:00, 2.03it/s]
|
1281 |
98%|ββββββββββ| 10438/10682 [1:29:12<02:00, 2.03it/s]
|
1282 |
98%|ββββββββββ| 10439/10682 [1:29:13<01:59, 2.03it/s]
|
1283 |
98%|ββββββββββ| 10440/10682 [1:29:13<01:59, 2.03it/s]
|
1284 |
98%|ββββββββββ| 10441/10682 [1:29:14<01:58, 2.03it/s]
|
1285 |
98%|ββββββββββ| 10442/10682 [1:29:14<01:58, 2.03it/s]
|
1286 |
98%|ββββββββββ| 10443/10682 [1:29:14<01:57, 2.03it/s]
|
1287 |
98%|ββββββββββ| 10444/10682 [1:29:15<01:57, 2.03it/s]
|
1288 |
98%|ββββββββββ| 10445/10682 [1:29:15<01:56, 2.03it/s]
|
1289 |
98%|ββββββββββ| 10446/10682 [1:29:16<01:56, 2.03it/s]
|
1290 |
98%|ββββββββββ| 10447/10682 [1:29:16<01:55, 2.03it/s]
|
1291 |
98%|ββββββββββ| 10448/10682 [1:29:17<01:55, 2.03it/s]
|
1292 |
98%|ββββββββββ| 10449/10682 [1:29:17<01:54, 2.03it/s]
|
1293 |
98%|ββββββββββ| 10450/10682 [1:29:18<01:54, 2.03it/s]
|
1294 |
|
|
|
1295 |
98%|ββββββββββ| 10450/10682 [1:29:18<01:54, 2.03it/s]
|
1296 |
98%|ββββββββββ| 10451/10682 [1:29:18<01:54, 2.02it/s]
|
1297 |
98%|ββββββββββ| 10452/10682 [1:29:19<01:53, 2.02it/s]
|
1298 |
98%|ββββββββββ| 10453/10682 [1:29:19<01:53, 2.03it/s]
|
1299 |
98%|ββββββββββ| 10454/10682 [1:29:20<01:52, 2.03it/s]
|
1300 |
98%|ββββββββββ| 10455/10682 [1:29:20<01:51, 2.03it/s]
|
1301 |
98%|ββββββββββ| 10456/10682 [1:29:21<01:51, 2.03it/s]
|
1302 |
98%|ββββββββββ| 10457/10682 [1:29:21<01:50, 2.03it/s]
|
1303 |
98%|ββββββββββ| 10458/10682 [1:29:22<01:50, 2.03it/s]
|
1304 |
98%|ββββββββββ| 10459/10682 [1:29:22<01:49, 2.03it/s]
|
1305 |
98%|ββββββββββ| 10460/10682 [1:29:23<01:49, 2.03it/s]
|
1306 |
98%|ββββββββββ| 10461/10682 [1:29:23<01:48, 2.03it/s]
|
1307 |
98%|ββββββββββ| 10462/10682 [1:29:24<01:48, 2.03it/s]
|
1308 |
98%|ββββββββββ| 10463/10682 [1:29:24<01:47, 2.03it/s]
|
1309 |
98%|ββββββββββ| 10464/10682 [1:29:25<01:47, 2.03it/s]
|
1310 |
98%|ββββββββββ| 10465/10682 [1:29:25<01:46, 2.03it/s]
|
1311 |
98%|ββββββββββ| 10466/10682 [1:29:26<01:46, 2.03it/s]
|
1312 |
98%|ββββββββββ| 10467/10682 [1:29:26<01:45, 2.03it/s]
|
1313 |
98%|ββββββββββ| 10468/10682 [1:29:27<01:45, 2.03it/s]
|
1314 |
98%|ββββββββββ| 10469/10682 [1:29:27<01:44, 2.03it/s]
|
1315 |
98%|ββββββββββ| 10470/10682 [1:29:28<01:44, 2.03it/s]
|
1316 |
98%|ββββββββββ| 10471/10682 [1:29:28<01:43, 2.03it/s]
|
1317 |
98%|ββββββββββ| 10472/10682 [1:29:29<01:43, 2.03it/s]
|
1318 |
98%|ββββββββββ| 10473/10682 [1:29:29<01:42, 2.03it/s]
|
1319 |
98%|ββββββββββ| 10474/10682 [1:29:30<01:42, 2.03it/s]
|
1320 |
98%|ββββββββββ| 10475/10682 [1:29:30<01:42, 2.03it/s]
|
1321 |
|
|
|
1322 |
98%|ββββββββββ| 10475/10682 [1:29:30<01:42, 2.03it/s]
|
1323 |
98%|ββββββββββ| 10476/10682 [1:29:31<01:41, 2.03it/s]
|
1324 |
98%|ββββββββββ| 10477/10682 [1:29:31<01:41, 2.03it/s]
|
1325 |
98%|ββββββββββ| 10478/10682 [1:29:32<01:40, 2.03it/s]
|
1326 |
98%|ββββββββββ| 10479/10682 [1:29:32<01:40, 2.03it/s]
|
1327 |
98%|ββββββββββ| 10480/10682 [1:29:33<01:39, 2.03it/s]
|
1328 |
98%|ββββββββββ| 10481/10682 [1:29:33<01:38, 2.03it/s]
|
1329 |
98%|ββββββββββ| 10482/10682 [1:29:34<01:38, 2.03it/s]
|
1330 |
98%|ββββββββββ| 10483/10682 [1:29:34<01:38, 2.03it/s]
|
1331 |
98%|ββββββββββ| 10484/10682 [1:29:35<01:37, 2.03it/s]
|
1332 |
98%|ββββββββββ| 10485/10682 [1:29:35<01:37, 2.03it/s]
|
1333 |
98%|ββββββββββ| 10486/10682 [1:29:36<01:36, 2.03it/s]
|
1334 |
98%|ββββββββββ| 10487/10682 [1:29:36<01:36, 2.03it/s]
|
1335 |
98%|ββββββββββ| 10488/10682 [1:29:37<01:35, 2.03it/s]
|
1336 |
98%|ββββββββββ| 10489/10682 [1:29:37<01:35, 2.03it/s]
|
1337 |
98%|ββββββββββ| 10490/10682 [1:29:38<01:34, 2.03it/s]
|
1338 |
98%|ββββββββββ| 10491/10682 [1:29:38<01:34, 2.03it/s]
|
1339 |
98%|ββββββββββ| 10492/10682 [1:29:39<01:33, 2.03it/s]
|
1340 |
98%|ββββββββββ| 10493/10682 [1:29:39<01:33, 2.03it/s]
|
1341 |
98%|ββββββββββ| 10494/10682 [1:29:40<01:32, 2.03it/s]
|
1342 |
98%|ββββββββββ| 10495/10682 [1:29:40<01:31, 2.03it/s]
|
1343 |
98%|ββββββββββ| 10496/10682 [1:29:41<01:31, 2.03it/s]
|
1344 |
98%|ββββββββββ| 10497/10682 [1:29:41<01:31, 2.03it/s]
|
1345 |
98%|ββββββββββ| 10498/10682 [1:29:42<01:30, 2.03it/s]
|
1346 |
98%|ββββββββββ| 10499/10682 [1:29:42<01:30, 2.03it/s]
|
1347 |
98%|ββββββββββ| 10500/10682 [1:29:43<01:29, 2.03it/s]{'loss': 2.8054, 'grad_norm': 0.24958878755569458, 'learning_rate': 8.841716933915555e-07, 'epoch': 13.75}
|
1348 |
|
|
|
1349 |
98%|ββββββββββ| 10500/10682 [1:29:43<01:29, 2.03it/s]
|
1350 |
98%|ββββββββββ| 10501/10682 [1:29:43<01:29, 2.03it/s]
|
1351 |
98%|ββββββββββ| 10502/10682 [1:29:44<01:28, 2.03it/s]
|
1352 |
98%|ββββββββββ| 10503/10682 [1:29:44<01:28, 2.03it/s]
|
1353 |
98%|ββββββββββ| 10504/10682 [1:29:45<01:27, 2.03it/s]
|
1354 |
98%|ββββββββββ| 10505/10682 [1:29:45<01:27, 2.03it/s]
|
1355 |
98%|ββββββββββ| 10506/10682 [1:29:46<01:26, 2.03it/s]
|
1356 |
98%|ββββββββββ| 10507/10682 [1:29:46<01:26, 2.03it/s]
|
1357 |
98%|ββββββββββ| 10508/10682 [1:29:47<01:25, 2.03it/s]
|
1358 |
98%|ββββββββββ| 10509/10682 [1:29:47<01:25, 2.03it/s]
|
1359 |
98%|ββββββββββ| 10510/10682 [1:29:48<01:24, 2.03it/s]
|
1360 |
98%|ββββββββββ| 10511/10682 [1:29:48<01:24, 2.03it/s]
|
1361 |
98%|ββββββββββ| 10512/10682 [1:29:48<01:23, 2.03it/s]
|
1362 |
98%|ββββββββββ| 10513/10682 [1:29:49<01:23, 2.03it/s]
|
1363 |
98%|ββββββββββ| 10514/10682 [1:29:49<01:22, 2.03it/s]
|
1364 |
98%|ββββββββββ| 10515/10682 [1:29:50<01:22, 2.03it/s]
|
1365 |
98%|ββββββββββ| 10516/10682 [1:29:50<01:21, 2.03it/s]
|
1366 |
98%|ββββββββββ| 10517/10682 [1:29:51<01:21, 2.03it/s]
|
1367 |
98%|ββββββββββ| 10518/10682 [1:29:51<01:20, 2.03it/s]
|
1368 |
98%|ββββββββββ| 10519/10682 [1:29:52<01:20, 2.03it/s]
|
1369 |
98%|ββββββββββ| 10520/10682 [1:29:52<01:19, 2.03it/s]
|
1370 |
98%|ββββββββββ| 10521/10682 [1:29:53<01:19, 2.03it/s]
|
1371 |
99%|ββββββββββ| 10522/10682 [1:29:53<01:18, 2.03it/s]
|
1372 |
99%|ββββββββββ| 10523/10682 [1:29:54<01:18, 2.03it/s]
|
1373 |
99%|ββββββββββ| 10524/10682 [1:29:54<01:17, 2.03it/s]
|
1374 |
99%|ββββββββββ| 10525/10682 [1:29:55<01:17, 2.03it/s]
|
1375 |
|
|
|
1376 |
99%|ββββββββββ| 10525/10682 [1:29:55<01:17, 2.03it/s]
|
1377 |
99%|ββββββββββ| 10526/10682 [1:29:55<01:17, 2.03it/s]
|
1378 |
99%|ββββββββββ| 10527/10682 [1:29:56<01:16, 2.03it/s]
|
1379 |
99%|ββββββββββ| 10528/10682 [1:29:56<01:15, 2.03it/s]
|
1380 |
99%|ββββββββββ| 10529/10682 [1:29:57<01:15, 2.03it/s]
|
1381 |
99%|ββββββββββ| 10530/10682 [1:29:57<01:14, 2.03it/s]
|
1382 |
99%|ββββββββββ| 10531/10682 [1:29:58<01:14, 2.03it/s]
|
1383 |
99%|ββββββββββ| 10532/10682 [1:29:58<01:13, 2.03it/s]
|
1384 |
99%|ββββββββββ| 10533/10682 [1:29:59<01:13, 2.03it/s]
|
1385 |
99%|ββββββββββ| 10534/10682 [1:29:59<01:12, 2.03it/s]
|
1386 |
99%|ββββββββββ| 10535/10682 [1:30:00<01:12, 2.03it/s]
|
1387 |
99%|ββββββββββ| 10536/10682 [1:30:00<01:11, 2.03it/s]
|
1388 |
99%|ββββββββββ| 10537/10682 [1:30:01<01:11, 2.03it/s]
|
1389 |
99%|ββββββββββ| 10538/10682 [1:30:01<01:10, 2.03it/s]
|
1390 |
99%|ββββββββββ| 10539/10682 [1:30:02<01:10, 2.03it/s]
|
1391 |
99%|ββββββββββ| 10540/10682 [1:30:02<01:10, 2.03it/s]
|
1392 |
99%|ββββββββββ| 10541/10682 [1:30:03<01:09, 2.02it/s]
|
1393 |
99%|ββββββββββ| 10542/10682 [1:30:03<01:09, 2.03it/s]
|
1394 |
99%|ββββββββββ| 10543/10682 [1:30:04<01:08, 2.03it/s]
|
1395 |
99%|ββββββββββ| 10544/10682 [1:30:04<01:08, 2.03it/s]
|
1396 |
99%|ββββββββββ| 10545/10682 [1:30:05<01:07, 2.03it/s]
|
1397 |
99%|ββββββββββ| 10546/10682 [1:30:05<01:07, 2.03it/s]
|
1398 |
99%|ββββββββββ| 10547/10682 [1:30:06<01:06, 2.03it/s]
|
1399 |
99%|ββββββββββ| 10548/10682 [1:30:06<01:06, 2.03it/s]
|
1400 |
99%|ββββββββββ| 10549/10682 [1:30:07<01:05, 2.02it/s]
|
1401 |
99%|ββββββββββ| 10550/10682 [1:30:07<01:05, 2.03it/s]{'loss': 2.7968, 'grad_norm': 0.25245076417922974, 'learning_rate': 4.651600211027507e-07, 'epoch': 13.82}
|
|
|
1402 |
|
1403 |
99%|ββββββββββ| 10550/10682 [1:30:07<01:05, 2.03it/s]
|
1404 |
99%|ββββββββββ| 10551/10682 [1:30:08<01:04, 2.02it/s]
|
1405 |
99%|ββββββββββ| 10552/10682 [1:30:08<01:04, 2.03it/s]
|
1406 |
99%|ββββββββββ| 10553/10682 [1:30:09<01:03, 2.03it/s]
|
1407 |
99%|ββββββββββ| 10554/10682 [1:30:09<01:03, 2.03it/s]
|
1408 |
99%|ββββββββββ| 10555/10682 [1:30:10<01:02, 2.03it/s]
|
1409 |
99%|ββββββββββ| 10556/10682 [1:30:10<01:02, 2.03it/s]
|
1410 |
99%|ββββββββββ| 10557/10682 [1:30:11<01:01, 2.03it/s]
|
1411 |
99%|ββββββββββ| 10558/10682 [1:30:11<01:01, 2.03it/s]
|
1412 |
99%|ββββββββββ| 10559/10682 [1:30:12<01:00, 2.03it/s]
|
1413 |
99%|ββββββββββ| 10560/10682 [1:30:12<01:00, 2.03it/s]
|
1414 |
99%|ββββββββββ| 10561/10682 [1:30:13<00:59, 2.03it/s]
|
1415 |
99%|ββββββββββ| 10562/10682 [1:30:13<00:59, 2.03it/s]
|
1416 |
99%|ββββββββββ| 10563/10682 [1:30:14<00:58, 2.03it/s]
|
1417 |
99%|ββββββββββ| 10564/10682 [1:30:14<00:58, 2.03it/s]
|
1418 |
99%|ββββββββββ| 10565/10682 [1:30:15<00:57, 2.03it/s]
|
1419 |
99%|ββββββββββ| 10566/10682 [1:30:15<00:57, 2.03it/s]
|
1420 |
99%|ββββββββββ| 10567/10682 [1:30:16<00:56, 2.03it/s]
|
1421 |
99%|ββββββββββ| 10568/10682 [1:30:16<00:56, 2.03it/s]
|
1422 |
99%|ββββββββββ| 10569/10682 [1:30:17<00:55, 2.03it/s]
|
1423 |
99%|ββββββββββ| 10570/10682 [1:30:17<00:55, 2.03it/s]
|
1424 |
99%|ββββββββββ| 10571/10682 [1:30:18<00:54, 2.03it/s]
|
1425 |
99%|ββββββββββ| 10572/10682 [1:30:18<00:54, 2.03it/s]
|
1426 |
99%|ββββββββββ| 10573/10682 [1:30:19<00:53, 2.03it/s]
|
1427 |
99%|ββββββββββ| 10574/10682 [1:30:19<00:53, 2.03it/s]
|
1428 |
99%|ββββββββββ| 10575/10682 [1:30:20<00:52, 2.03it/s]
|
1429 |
|
|
|
1430 |
99%|ββββββββββ| 10575/10682 [1:30:20<00:52, 2.03it/s]
|
1431 |
99%|ββββββββββ| 10576/10682 [1:30:20<00:52, 2.02it/s]
|
1432 |
99%|ββββββββββ| 10577/10682 [1:30:21<00:51, 2.02it/s]
|
1433 |
99%|ββββββββββ| 10578/10682 [1:30:21<00:51, 2.02it/s]
|
1434 |
99%|ββββββββββ| 10579/10682 [1:30:22<00:50, 2.02it/s]
|
1435 |
99%|ββββββββββ| 10580/10682 [1:30:22<00:50, 2.02it/s]
|
1436 |
99%|ββββββββββ| 10581/10682 [1:30:23<00:49, 2.02it/s]
|
1437 |
99%|ββββββββββ| 10582/10682 [1:30:23<00:49, 2.03it/s]
|
1438 |
99%|ββββββββββ| 10583/10682 [1:30:24<00:48, 2.03it/s]
|
1439 |
99%|ββββββββββ| 10584/10682 [1:30:24<00:48, 2.03it/s]
|
1440 |
99%|ββββββββββ| 10585/10682 [1:30:25<00:47, 2.03it/s]
|
1441 |
99%|ββββββββββ| 10586/10682 [1:30:25<00:47, 2.03it/s]
|
1442 |
99%|ββββββββββ| 10587/10682 [1:30:25<00:46, 2.03it/s]
|
1443 |
99%|ββββββββββ| 10588/10682 [1:30:26<00:46, 2.03it/s]
|
1444 |
99%|ββββββββββ| 10589/10682 [1:30:26<00:45, 2.03it/s]
|
1445 |
99%|ββββββββββ| 10590/10682 [1:30:27<00:45, 2.03it/s]
|
1446 |
99%|ββββββββββ| 10591/10682 [1:30:27<00:44, 2.03it/s]
|
1447 |
99%|ββββββββββ| 10592/10682 [1:30:28<00:44, 2.03it/s]
|
1448 |
99%|ββββββββββ| 10593/10682 [1:30:28<00:43, 2.03it/s]
|
1449 |
99%|ββββββββββ| 10594/10682 [1:30:29<00:43, 2.03it/s]
|
1450 |
99%|ββββββββββ| 10595/10682 [1:30:29<00:42, 2.03it/s]
|
1451 |
99%|ββββββββββ| 10596/10682 [1:30:30<00:42, 2.03it/s]
|
1452 |
99%|ββββββββββ| 10597/10682 [1:30:30<00:41, 2.03it/s]
|
1453 |
99%|ββββββββββ| 10598/10682 [1:30:31<00:41, 2.03it/s]
|
1454 |
99%|ββββββββββ| 10599/10682 [1:30:31<00:40, 2.03it/s]
|
1455 |
99%|ββββββββββ| 10600/10682 [1:30:32<00:40, 2.03it/s]{'loss': 2.8119, 'grad_norm': 0.2522198557853699, 'learning_rate': 1.7952444123359167e-07, 'epoch': 13.88}
|
|
|
1456 |
|
1457 |
99%|ββββββββββ| 10600/10682 [1:30:32<00:40, 2.03it/s]
|
1458 |
99%|ββββββββββ| 10601/10682 [1:30:32<00:39, 2.03it/s]
|
1459 |
99%|ββββββββββ| 10602/10682 [1:30:33<00:39, 2.03it/s]
|
1460 |
99%|ββββββββββ| 10603/10682 [1:30:33<00:39, 2.03it/s]
|
1461 |
99%|ββββββββββ| 10604/10682 [1:30:34<00:38, 2.03it/s]
|
1462 |
99%|ββββββββββ| 10605/10682 [1:30:34<00:38, 2.02it/s]
|
1463 |
99%|ββββββββββ| 10606/10682 [1:30:35<00:37, 2.03it/s]
|
1464 |
99%|ββββββββββ| 10607/10682 [1:30:35<00:37, 2.03it/s]
|
1465 |
99%|ββββββββββ| 10608/10682 [1:30:36<00:36, 2.03it/s]
|
1466 |
99%|ββββββββββ| 10609/10682 [1:30:36<00:36, 2.03it/s]
|
1467 |
99%|ββββββββββ| 10610/10682 [1:30:37<00:35, 2.03it/s]
|
1468 |
99%|ββββββββββ| 10611/10682 [1:30:37<00:35, 2.03it/s]
|
1469 |
99%|ββββββββββ| 10612/10682 [1:30:38<00:34, 2.03it/s]
|
1470 |
99%|ββββββββββ| 10613/10682 [1:30:38<00:33, 2.03it/s]
|
1471 |
99%|ββββββββββ| 10614/10682 [1:30:39<00:33, 2.03it/s]
|
1472 |
99%|ββββββββββ| 10615/10682 [1:30:39<00:33, 2.03it/s]
|
1473 |
99%|ββββββββββ| 10616/10682 [1:30:40<00:32, 2.03it/s]
|
1474 |
99%|ββββββββββ| 10617/10682 [1:30:40<00:32, 2.03it/s]
|
1475 |
99%|ββββββββββ| 10618/10682 [1:30:41<00:31, 2.03it/s]
|
1476 |
99%|ββββββββββ| 10619/10682 [1:30:41<00:31, 2.03it/s]
|
1477 |
99%|ββββββββββ| 10620/10682 [1:30:42<00:30, 2.03it/s]
|
1478 |
99%|ββββββββββ| 10621/10682 [1:30:42<00:30, 2.03it/s]
|
1479 |
99%|ββββββββββ| 10622/10682 [1:30:43<00:29, 2.03it/s]
|
1480 |
99%|ββββββββββ| 10623/10682 [1:30:43<00:29, 2.03it/s]
|
1481 |
99%|ββββββββββ| 10624/10682 [1:30:44<00:28, 2.03it/s]
|
1482 |
99%|ββββββββββ| 10625/10682 [1:30:44<00:28, 2.03it/s]{'loss': 2.8038, 'grad_norm': 0.2499060034751892, 'learning_rate': 8.674791042273533e-08, 'epoch': 13.92}
|
|
|
1483 |
|
1484 |
99%|ββββββββββ| 10625/10682 [1:30:44<00:28, 2.03it/s]
|
1485 |
99%|ββββββββββ| 10626/10682 [1:30:45<00:27, 2.02it/s]
|
1486 |
99%|ββββββββββ| 10627/10682 [1:30:45<00:27, 2.02it/s]
|
1487 |
99%|ββββββββββ| 10628/10682 [1:30:46<00:26, 2.02it/s]
|
|
|
1488 |
|
1489 |
|
|
|
|
|
1490 |
|
|
|
|
618 |
|
619 |
92%|ββββββββββ| 9850/10682 [1:24:10<06:50, 2.03it/s]
|
620 |
92%|ββββββββββ| 9851/10682 [1:24:10<06:50, 2.03it/s]
|
621 |
92%|ββββββββββ| 9852/10682 [1:24:11<06:50, 2.02it/s]
|
622 |
92%|ββββββββββ| 9853/10682 [1:24:11<06:48, 2.03it/s]
|
623 |
92%|ββββββββββ| 9854/10682 [1:24:12<06:48, 2.03it/s]
|
624 |
92%|ββββββββββ| 9855/10682 [1:24:12<06:47, 2.03it/s]
|
625 |
92%|ββββββββββ| 9856/10682 [1:24:13<06:47, 2.03it/s]
|
626 |
92%|ββββββββββ| 9857/10682 [1:24:13<06:47, 2.03it/s]
|
627 |
92%|ββββββββββ| 9858/10682 [1:24:14<06:46, 2.03it/s]
|
628 |
92%|ββββββββββ| 9859/10682 [1:24:14<06:46, 2.03it/s]
|
629 |
92%|ββββββββββ| 9860/10682 [1:24:15<06:45, 2.03it/s]
|
630 |
92%|ββββββββββ| 9861/10682 [1:24:15<06:45, 2.03it/s]
|
631 |
92%|ββββββββββ| 9862/10682 [1:24:16<06:44, 2.03it/s]
|
632 |
92%|ββββββββββ| 9863/10682 [1:24:16<06:43, 2.03it/s]
|
633 |
92%|ββββββββββ| 9864/10682 [1:24:17<06:43, 2.03it/s]
|
634 |
92%|ββββββββββ| 9865/10682 [1:24:17<06:42, 2.03it/s]
|
635 |
92%|ββββββββββ| 9866/10682 [1:24:18<06:42, 2.03it/s]
|
636 |
92%|ββββββββββ| 9867/10682 [1:24:18<06:41, 2.03it/s]
|
637 |
92%|ββββββββββ| 9868/10682 [1:24:19<06:41, 2.03it/s]
|
638 |
92%|ββββββββββ| 9869/10682 [1:24:19<06:40, 2.03it/s]
|
639 |
92%|ββββββββββ| 9870/10682 [1:24:20<06:40, 2.03it/s]
|
640 |
92%|ββββββββββ| 9871/10682 [1:24:20<06:40, 2.03it/s]
|
641 |
92%|ββββββββββ| 9872/10682 [1:24:21<06:39, 2.03it/s]
|
642 |
92%|ββββββββββ| 9873/10682 [1:24:21<06:39, 2.03it/s]
|
643 |
92%|ββββββββββ| 9874/10682 [1:24:22<06:38, 2.03it/s]
|
644 |
92%|ββββββββββ| 9875/10682 [1:24:22<06:38, 2.03it/s]{'loss': 2.8269, 'grad_norm': 0.24751795828342438, 'learning_rate': 1.7288216615031272e-05, 'epoch': 12.93}
|
645 |
|
646 |
|
647 |
92%|ββββββββββ| 9875/10682 [1:24:22<06:38, 2.03it/s]
|
648 |
92%|ββββββββββ| 9876/10682 [1:24:23<06:37, 2.03it/s]
|
649 |
92%|ββββββββββ| 9877/10682 [1:24:23<06:37, 2.03it/s]
|
650 |
92%|ββββββββββ| 9878/10682 [1:24:24<06:36, 2.03it/s]
|
651 |
92%|ββββββββββ| 9879/10682 [1:24:24<06:35, 2.03it/s]
|
652 |
92%|ββββββββββ| 9880/10682 [1:24:25<06:35, 2.03it/s]
|
653 |
93%|ββββββββββ| 9881/10682 [1:24:25<06:35, 2.03it/s]
|
654 |
93%|ββββββββββ| 9882/10682 [1:24:26<06:34, 2.03it/s]
|
655 |
93%|ββββββββββ| 9883/10682 [1:24:26<06:34, 2.03it/s]
|
656 |
93%|ββββββββββ| 9884/10682 [1:24:27<06:33, 2.03it/s]
|
657 |
93%|ββββββββββ| 9885/10682 [1:24:27<06:33, 2.03it/s]
|
658 |
93%|ββββββββββ| 9886/10682 [1:24:28<06:32, 2.03it/s]
|
659 |
93%|ββββββββββ| 9887/10682 [1:24:28<06:31, 2.03it/s]
|
660 |
93%|ββββββββββ| 9888/10682 [1:24:29<06:31, 2.03it/s]
|
661 |
93%|ββββββββββ| 9889/10682 [1:24:29<06:31, 2.03it/s]
|
662 |
93%|ββββββββββ| 9890/10682 [1:24:30<06:30, 2.03it/s]
|
663 |
93%|ββββββββββ| 9891/10682 [1:24:30<06:29, 2.03it/s]
|
664 |
93%|ββββββββββ| 9892/10682 [1:24:31<06:29, 2.03it/s]
|
665 |
93%|ββββββββββ| 9893/10682 [1:24:31<06:28, 2.03it/s]
|
666 |
93%|ββββββββββ| 9894/10682 [1:24:32<06:28, 2.03it/s]
|
667 |
93%|ββββββββββ| 9895/10682 [1:24:32<06:27, 2.03it/s]
|
668 |
93%|ββββββββββ| 9896/10682 [1:24:33<06:27, 2.03it/s]
|
669 |
93%|ββββββββββ| 9897/10682 [1:24:33<06:27, 2.03it/s]
|
670 |
93%|ββββββββββ| 9898/10682 [1:24:34<06:26, 2.03it/s]
|
671 |
93%|ββββββββββ| 9899/10682 [1:24:34<06:26, 2.03it/s]
|
672 |
93%|ββββββββββ| 9900/10682 [1:24:35<06:25, 2.03it/s]{'loss': 2.8229, 'grad_norm': 0.2464432567358017, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
673 |
|
674 |
|
675 |
93%|ββββββββββ| 9900/10682 [1:24:35<06:25, 2.03it/s]
|
676 |
93%|ββββββββββ| 9901/10682 [1:24:35<06:25, 2.03it/s]
|
677 |
93%|ββββββββββ| 9902/10682 [1:24:36<06:24, 2.03it/s]
|
678 |
93%|ββββββββββ| 9903/10682 [1:24:36<06:24, 2.03it/s]
|
679 |
93%|ββββββββββ| 9904/10682 [1:24:37<06:23, 2.03it/s]
|
680 |
93%|ββββββββββ| 9905/10682 [1:24:37<06:23, 2.02it/s]
|
681 |
93%|ββββββββββ| 9906/10682 [1:24:38<06:23, 2.02it/s]
|
682 |
93%|ββββββββββ| 9907/10682 [1:24:38<06:23, 2.02it/s]
|
683 |
93%|ββββββββββ| 9908/10682 [1:24:39<06:22, 2.02it/s]
|
684 |
93%|ββββββββββ| 9909/10682 [1:24:39<06:22, 2.02it/s]
|
685 |
93%|ββββββββββ| 9910/10682 [1:24:40<06:21, 2.03it/s]
|
686 |
93%|ββββββββββ| 9911/10682 [1:24:40<06:20, 2.03it/s]
|
687 |
93%|ββββββββββ| 9912/10682 [1:24:41<06:19, 2.03it/s]
|
688 |
93%|ββββββββββ| 9913/10682 [1:24:41<06:19, 2.03it/s]
|
689 |
93%|ββββββββββ| 9914/10682 [1:24:42<06:18, 2.03it/s]
|
690 |
93%|ββββββββββ| 9915/10682 [1:24:42<06:17, 2.03it/s]
|
691 |
93%|ββββββββββ| 9916/10682 [1:24:43<06:17, 2.03it/s]
|
692 |
93%|ββββββββββ| 9917/10682 [1:24:43<06:17, 2.03it/s]
|
693 |
93%|ββββββββββ| 9918/10682 [1:24:44<06:16, 2.03it/s]
|
694 |
93%|ββββββββββ| 9919/10682 [1:24:44<06:16, 2.03it/s]
|
695 |
93%|ββββββββββ| 9920/10682 [1:24:45<06:16, 2.02it/s]
|
696 |
93%|ββββββββββ| 9921/10682 [1:24:45<06:15, 2.03it/s]
|
697 |
93%|ββββββββββ| 9922/10682 [1:24:45<06:14, 2.03it/s]
|
698 |
93%|ββββββββββ| 9923/10682 [1:24:46<06:14, 2.03it/s]
|
699 |
93%|ββββββββββ| 9924/10682 [1:24:46<06:13, 2.03it/s]
|
700 |
93%|ββββββββββ| 9925/10682 [1:24:47<06:22, 1.98it/s]{'loss': 2.8269, 'grad_norm': 0.24894338846206665, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.0}
|
|
|
701 |
|
702 |
93%|ββββββββββ| 9925/10682 [1:24:47<06:22, 1.98it/s]
|
703 |
93%|ββββββββββ| 9926/10682 [1:24:59<50:04, 3.97s/it]
|
704 |
93%|ββββββββββ| 9927/10682 [1:25:00<36:52, 2.93s/it]
|
705 |
93%|ββββββββββ| 9928/10682 [1:25:00<27:37, 2.20s/it]
|
706 |
93%|ββββββββββ| 9929/10682 [1:25:01<21:12, 1.69s/it]
|
707 |
93%|ββββββββββ| 9930/10682 [1:25:01<16:40, 1.33s/it]
|
708 |
93%|ββββββββββ| 9931/10682 [1:25:02<13:30, 1.08s/it]
|
709 |
93%|ββββββββββ| 9932/10682 [1:25:02<11:16, 1.11it/s]
|
710 |
93%|ββββββββββ| 9933/10682 [1:25:03<09:45, 1.28it/s]
|
711 |
93%|ββββββββββ| 9934/10682 [1:25:03<08:41, 1.44it/s]
|
712 |
93%|ββββββββββ| 9935/10682 [1:25:04<07:54, 1.57it/s]
|
713 |
93%|ββββββββββ| 9936/10682 [1:25:04<07:22, 1.69it/s]
|
714 |
93%|ββββββββββ| 9937/10682 [1:25:05<06:59, 1.77it/s]
|
715 |
93%|ββββββββββ| 9938/10682 [1:25:05<06:43, 1.84it/s]
|
716 |
93%|ββββββββββ| 9939/10682 [1:25:06<06:32, 1.89it/s]
|
717 |
93%|ββββββββββ| 9940/10682 [1:25:06<06:24, 1.93it/s]
|
718 |
93%|ββββββββββ| 9941/10682 [1:25:07<06:18, 1.96it/s]
|
719 |
93%|ββββββββββ| 9942/10682 [1:25:07<06:14, 1.98it/s]
|
720 |
93%|ββββββββββ| 9943/10682 [1:25:07<06:11, 1.99it/s]
|
721 |
93%|ββββββββββ| 9944/10682 [1:25:08<06:09, 2.00it/s]
|
722 |
93%|ββββββββββ| 9945/10682 [1:25:08<06:07, 2.01it/s]
|
723 |
93%|ββββββββββ| 9946/10682 [1:25:09<06:06, 2.01it/s]
|
724 |
93%|ββββββββββ| 9947/10682 [1:25:09<06:05, 2.01it/s]
|
725 |
93%|ββββββββββ| 9948/10682 [1:25:10<06:04, 2.01it/s]
|
726 |
+
|
727 |
|
728 |
93%|ββββββββββ| 9925/10682 [1:24:47<06:22, 1.98it/s]
|
729 |
93%|ββββββββββ| 9926/10682 [1:24:59<50:04, 3.97s/it]
|
730 |
93%|ββββββββββ| 9927/10682 [1:25:00<36:52, 2.93s/it]
|
731 |
93%|ββββββββββ| 9928/10682 [1:25:00<27:37, 2.20s/it]
|
732 |
93%|ββββββββββ| 9929/10682 [1:25:01<21:12, 1.69s/it]
|
733 |
93%|ββββββββββ| 9930/10682 [1:25:01<16:40, 1.33s/it]
|
734 |
93%|ββββββββββ| 9931/10682 [1:25:02<13:30, 1.08s/it]
|
735 |
93%|ββββββββββ| 9932/10682 [1:25:02<11:16, 1.11it/s]
|
736 |
93%|ββββββββββ| 9933/10682 [1:25:03<09:45, 1.28it/s]
|
737 |
93%|ββββββββββ| 9934/10682 [1:25:03<08:41, 1.44it/s]
|
738 |
93%|ββββββββββ| 9935/10682 [1:25:04<07:54, 1.57it/s]
|
739 |
93%|ββββββββββ| 9936/10682 [1:25:04<07:22, 1.69it/s]
|
740 |
93%|ββββββββββ| 9937/10682 [1:25:05<06:59, 1.77it/s]
|
741 |
93%|ββββββββββ| 9938/10682 [1:25:05<06:43, 1.84it/s]
|
742 |
93%|ββββββββββ| 9939/10682 [1:25:06<06:32, 1.89it/s]
|
743 |
93%|ββββββββββ| 9940/10682 [1:25:06<06:24, 1.93it/s]
|
744 |
93%|ββββββββββ| 9941/10682 [1:25:07<06:18, 1.96it/s]
|
745 |
93%|ββββββββββ| 9942/10682 [1:25:07<06:14, 1.98it/s]
|
746 |
93%|ββββββββββ| 9943/10682 [1:25:07<06:11, 1.99it/s]
|
747 |
93%|ββββββββββ| 9944/10682 [1:25:08<06:09, 2.00it/s]
|
748 |
93%|ββββββββββ| 9945/10682 [1:25:08<06:07, 2.01it/s]
|
749 |
93%|ββββββββββ| 9946/10682 [1:25:09<06:06, 2.01it/s]
|
750 |
93%|ββββββββββ| 9947/10682 [1:25:09<06:05, 2.01it/s]
|
751 |
93%|ββββββββββ| 9948/10682 [1:25:10<06:04, 2.01it/s]
|
752 |
93%|ββββββββββ| 9949/10682 [1:25:10<06:03, 2.02it/s]
|
753 |
93%|ββββββββββ| 9950/10682 [1:25:11<06:02, 2.02it/s]{'loss': 2.7842, 'grad_norm': 0.2500942349433899, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.03}
|
754 |
+
|
755 |
|
756 |
93%|ββββββββββ| 9950/10682 [1:25:11<06:02, 2.02it/s]
|
757 |
93%|ββββββββββ| 9951/10682 [1:25:11<06:02, 2.02it/s]
|
758 |
93%|ββββββββββ| 9952/10682 [1:25:12<06:00, 2.02it/s]
|
759 |
93%|ββββββββββ| 9953/10682 [1:25:12<06:00, 2.02it/s]
|
760 |
93%|ββββββββββ| 9954/10682 [1:25:13<05:59, 2.03it/s]
|
761 |
93%|ββββββββββ| 9955/10682 [1:25:13<05:59, 2.02it/s]
|
762 |
93%|ββββββββββ| 9956/10682 [1:25:14<05:58, 2.03it/s]
|
763 |
93%|ββββββββββ| 9957/10682 [1:25:14<05:57, 2.03it/s]
|
764 |
93%|ββββββββββ| 9958/10682 [1:25:15<05:57, 2.03it/s]
|
765 |
93%|ββββββββββ| 9959/10682 [1:25:15<05:56, 2.03it/s]
|
766 |
93%|ββββββββββ| 9960/10682 [1:25:16<05:56, 2.03it/s]
|
767 |
93%|ββββββββββ| 9961/10682 [1:25:16<05:55, 2.03it/s]
|
768 |
93%|ββββββββββ| 9962/10682 [1:25:17<05:55, 2.03it/s]
|
769 |
93%|ββββββββββ| 9963/10682 [1:25:17<05:54, 2.03it/s]
|
770 |
93%|ββββββββββ| 9964/10682 [1:25:18<05:53, 2.03it/s]
|
771 |
93%|ββββββββββ| 9965/10682 [1:25:18<05:53, 2.03it/s]
|
772 |
93%|ββββββββββ| 9966/10682 [1:25:19<05:53, 2.03it/s]
|
773 |
93%|ββββββββββ| 9967/10682 [1:25:19<05:52, 2.03it/s]
|
774 |
93%|ββββββββββ| 9968/10682 [1:25:20<05:51, 2.03it/s]
|
775 |
93%|ββββββββββ| 9969/10682 [1:25:20<05:51, 2.03it/s]
|
776 |
93%|ββββββββββ| 9970/10682 [1:25:21<05:50, 2.03it/s]
|
777 |
93%|ββββββββββ| 9971/10682 [1:25:21<05:50, 2.03it/s]
|
778 |
93%|ββββββββββ| 9972/10682 [1:25:22<05:50, 2.03it/s]
|
779 |
93%|ββββββββββ| 9973/10682 [1:25:22<05:49, 2.03it/s]
|
780 |
93%|ββββββββββ| 9974/10682 [1:25:23<05:49, 2.03it/s]
|
781 |
93%|ββββββββββ| 9975/10682 [1:25:23<05:48, 2.03it/s]{'loss': 2.799, 'grad_norm': 0.25120672583580017, 'learning_rate': 1.3287025325307511e-05, 'epoch': 13.06}
|
782 |
+
|
783 |
|
784 |
93%|ββββββββββ| 9975/10682 [1:25:23<05:48, 2.03it/s]
|
785 |
93%|ββββββββββ| 9976/10682 [1:25:24<05:48, 2.03it/s]
|
786 |
93%|ββββββββββ| 9977/10682 [1:25:24<05:47, 2.03it/s]
|
787 |
93%|ββββββββββ| 9978/10682 [1:25:25<05:47, 2.03it/s]
|
788 |
93%|ββββββββββ| 9979/10682 [1:25:25<05:46, 2.03it/s]
|
789 |
93%|ββββββββββ| 9980/10682 [1:25:26<05:46, 2.03it/s]
|
790 |
93%|ββββββββββ| 9981/10682 [1:25:26<05:45, 2.03it/s]
|
791 |
93%|ββββββββββ| 9982/10682 [1:25:27<05:45, 2.03it/s]
|
792 |
93%|ββββββββββ| 9983/10682 [1:25:27<05:44, 2.03it/s]
|
793 |
93%|ββββββββββ| 9984/10682 [1:25:28<05:44, 2.03it/s]
|
794 |
93%|ββββββββββ| 9985/10682 [1:25:28<05:44, 2.03it/s]
|
795 |
93%|ββββββββββ| 9986/10682 [1:25:29<05:43, 2.03it/s]
|
796 |
93%|ββββββββββ| 9987/10682 [1:25:29<05:42, 2.03it/s]
|
797 |
94%|ββββββββββ| 9988/10682 [1:25:30<05:42, 2.03it/s]
|
798 |
94%|ββββββββββ| 9989/10682 [1:25:30<05:42, 2.03it/s]
|
799 |
94%|ββββββββββ| 9990/10682 [1:25:31<05:41, 2.03it/s]
|
800 |
94%|ββββββββββ| 9991/10682 [1:25:31<05:41, 2.03it/s]
|
801 |
94%|ββββββββββ| 9992/10682 [1:25:32<05:40, 2.03it/s]
|
802 |
94%|ββββββββββ| 9993/10682 [1:25:32<05:40, 2.02it/s]
|
803 |
94%|ββββββββββ| 9994/10682 [1:25:33<05:39, 2.03it/s]
|
804 |
94%|ββββββββββ| 9995/10682 [1:25:33<05:38, 2.03it/s]
|
805 |
94%|ββββββββββ| 9996/10682 [1:25:34<05:38, 2.02it/s]
|
806 |
94%|ββββββββββ| 9997/10682 [1:25:34<05:37, 2.03it/s]
|
807 |
94%|ββββββββββ| 9998/10682 [1:25:35<05:37, 2.03it/s]
|
808 |
94%|ββββββββββ| 9999/10682 [1:25:35<05:37, 2.03it/s]
|
809 |
94%|ββββββββββ| 10000/10682 [1:25:36<05:36, 2.03it/s]{'loss': 2.8013, 'grad_norm': 0.24948380887508392, 'learning_rate': 1.2367787735873993e-05, 'epoch': 13.1}
|
810 |
+
|
811 |
|
812 |
94%|ββββββββββ| 10000/10682 [1:25:36<05:36, 2.03it/s]
|
813 |
94%|ββββββββββ| 10001/10682 [1:25:36<05:36, 2.03it/s]
|
814 |
94%|ββββββββββ| 10002/10682 [1:25:37<05:35, 2.03it/s]
|
815 |
94%|ββββββββββ| 10003/10682 [1:25:37<05:34, 2.03it/s]
|
816 |
94%|ββββββββββ| 10004/10682 [1:25:38<05:34, 2.03it/s]
|
817 |
94%|ββββββββββ| 10005/10682 [1:25:38<05:33, 2.03it/s]
|
818 |
94%|ββββββββββ| 10006/10682 [1:25:39<05:33, 2.03it/s]
|
819 |
94%|ββββββββββ| 10007/10682 [1:25:39<05:33, 2.03it/s]
|
820 |
94%|ββββββββββ| 10008/10682 [1:25:40<05:32, 2.03it/s]
|
821 |
94%|ββββββββββ| 10009/10682 [1:25:40<05:32, 2.03it/s]
|
822 |
94%|ββββββββββ| 10010/10682 [1:25:41<05:31, 2.03it/s]
|
823 |
94%|ββββββββββ| 10011/10682 [1:25:41<05:31, 2.03it/s]
|
824 |
94%|ββββββββββ| 10012/10682 [1:25:42<05:30, 2.03it/s]
|
825 |
94%|ββββββββββ| 10013/10682 [1:25:42<05:30, 2.03it/s]
|
826 |
94%|ββββββββββ| 10014/10682 [1:25:43<05:29, 2.03it/s]
|
827 |
94%|ββββββββββ| 10015/10682 [1:25:43<05:29, 2.03it/s]
|
828 |
94%|ββββββββββ| 10016/10682 [1:25:44<05:28, 2.03it/s]
|
829 |
94%|ββββββββββ| 10017/10682 [1:25:44<05:28, 2.03it/s]
|
830 |
94%|ββββββββββ| 10018/10682 [1:25:45<05:27, 2.02it/s]
|
831 |
94%|ββββββββββ| 10019/10682 [1:25:45<05:27, 2.03it/s]
|
832 |
94%|ββββββββββ| 10020/10682 [1:25:45<05:27, 2.02it/s]
|
833 |
94%|ββββββββββ| 10021/10682 [1:25:46<05:26, 2.02it/s]
|
834 |
94%|ββββββββββ| 10022/10682 [1:25:46<05:25, 2.02it/s]
|
835 |
94%|ββββββββββ| 10023/10682 [1:25:47<05:25, 2.03it/s]
|
836 |
94%|ββββββββββ| 10024/10682 [1:25:47<05:25, 2.02it/s]
|
837 |
94%|ββββββββββ| 10025/10682 [1:25:48<05:24, 2.03it/s]{'loss': 2.8, 'grad_norm': 0.2516091465950012, 'learning_rate': 1.1481100210606388e-05, 'epoch': 13.13}
|
838 |
+
|
839 |
|
840 |
94%|ββββββββββ| 10025/10682 [1:25:48<05:24, 2.03it/s]
|
841 |
94%|ββββββββββ| 10026/10682 [1:25:48<05:24, 2.02it/s]
|
842 |
94%|ββββββββββ| 10027/10682 [1:25:49<05:23, 2.03it/s]
|
843 |
94%|ββββββββββ| 10028/10682 [1:25:49<05:22, 2.03it/s]
|
844 |
94%|ββββββββββ| 10029/10682 [1:25:50<05:22, 2.03it/s]
|
845 |
94%|ββββββββββ| 10030/10682 [1:25:50<05:21, 2.03it/s]
|
846 |
94%|ββββββββββ| 10031/10682 [1:25:51<05:21, 2.03it/s]
|
847 |
94%|ββββββββββ| 10032/10682 [1:25:51<05:20, 2.03it/s]
|
848 |
94%|ββββββββββ| 10033/10682 [1:25:52<05:19, 2.03it/s]
|
849 |
94%|ββββββββββ| 10034/10682 [1:25:52<05:19, 2.03it/s]
|
850 |
94%|ββββββββββ| 10035/10682 [1:25:53<05:18, 2.03it/s]
|
851 |
94%|ββββββββββ| 10036/10682 [1:25:53<05:18, 2.03it/s]
|
852 |
94%|ββββββββββ| 10037/10682 [1:25:54<05:17, 2.03it/s]
|
853 |
94%|ββββββββββ| 10038/10682 [1:25:54<05:17, 2.03it/s]
|
854 |
94%|ββββββββββ| 10039/10682 [1:25:55<05:16, 2.03it/s]
|
855 |
94%|ββββββββββ| 10040/10682 [1:25:55<05:16, 2.03it/s]
|
856 |
94%|ββββββββββ| 10041/10682 [1:25:56<05:15, 2.03it/s]
|
857 |
94%|ββββββββββ| 10042/10682 [1:25:56<05:15, 2.03it/s]
|
858 |
94%|ββββββββββ| 10043/10682 [1:25:57<05:15, 2.03it/s]
|
859 |
94%|ββββββββββ| 10044/10682 [1:25:57<05:14, 2.03it/s]
|
860 |
94%|ββββββββββ| 10045/10682 [1:25:58<05:13, 2.03it/s]
|
861 |
94%|ββββββββββ| 10046/10682 [1:25:58<05:13, 2.03it/s]
|
862 |
94%|ββββββββββ| 10047/10682 [1:25:59<05:12, 2.03it/s]
|
863 |
94%|ββββββββββ| 10048/10682 [1:25:59<05:12, 2.03it/s]
|
864 |
94%|ββββββββββ| 10049/10682 [1:26:00<05:11, 2.03it/s]
|
865 |
94%|ββββββββββ| 10050/10682 [1:26:00<05:11, 2.03it/s]{'loss': 2.8031, 'grad_norm': 0.2510456144809723, 'learning_rate': 1.0627021937013704e-05, 'epoch': 13.16}
|
866 |
+
|
867 |
|
868 |
94%|ββββββββββ| 10050/10682 [1:26:00<05:11, 2.03it/s]
|
869 |
94%|ββββββββββ| 10051/10682 [1:26:01<05:11, 2.03it/s]
|
870 |
94%|ββββββββββ| 10052/10682 [1:26:01<05:10, 2.03it/s]
|
871 |
94%|ββββββββββ| 10053/10682 [1:26:02<05:10, 2.03it/s]
|
872 |
94%|ββββββββββ| 10054/10682 [1:26:02<05:09, 2.03it/s]
|
873 |
94%|ββββββββββ| 10055/10682 [1:26:03<05:09, 2.03it/s]
|
874 |
94%|ββββββββββ| 10056/10682 [1:26:03<05:08, 2.03it/s]
|
875 |
94%|ββββββββββ| 10057/10682 [1:26:04<05:08, 2.03it/s]
|
876 |
94%|ββββββββββ| 10058/10682 [1:26:04<05:07, 2.03it/s]
|
877 |
94%|ββββββββββ| 10059/10682 [1:26:05<05:07, 2.03it/s]
|
878 |
94%|ββββββββββ| 10060/10682 [1:26:05<05:07, 2.02it/s]
|
879 |
94%|ββββββββββ| 10061/10682 [1:26:06<05:06, 2.03it/s]
|
880 |
94%|ββββββββββ| 10062/10682 [1:26:06<05:05, 2.03it/s]
|
881 |
94%|ββββββββββ| 10063/10682 [1:26:07<05:05, 2.03it/s]
|
882 |
94%|ββββββββββ| 10064/10682 [1:26:07<05:04, 2.03it/s]
|
883 |
94%|ββββββββββ| 10065/10682 [1:26:08<05:04, 2.03it/s]
|
884 |
94%|ββββββββββ| 10066/10682 [1:26:08<05:04, 2.02it/s]
|
885 |
94%|ββββββββββ| 10067/10682 [1:26:09<05:03, 2.03it/s]
|
886 |
94%|ββββββββββ| 10068/10682 [1:26:09<05:03, 2.03it/s]
|
887 |
94%|ββββββββββ| 10069/10682 [1:26:10<05:02, 2.03it/s]
|
888 |
94%|ββββββββββ| 10070/10682 [1:26:10<05:02, 2.03it/s]
|
889 |
94%|ββββββββββ| 10071/10682 [1:26:11<05:01, 2.03it/s]
|
890 |
94%|ββββββββββ| 10072/10682 [1:26:11<05:01, 2.02it/s]
|
891 |
94%|ββββββββββ| 10073/10682 [1:26:12<05:00, 2.03it/s]
|
892 |
94%|ββββββββββ| 10074/10682 [1:26:12<04:59, 2.03it/s]
|
893 |
94%|ββββββββββ| 10075/10682 [1:26:13<04:59, 2.03it/s]{'loss': 2.8029, 'grad_norm': 0.2531677186489105, 'learning_rate': 9.805609925895964e-06, 'epoch': 13.2}
|
894 |
+
|
895 |
|
896 |
94%|ββββββββββ| 10075/10682 [1:26:13<04:59, 2.03it/s]
|
897 |
94%|ββββββββββ| 10076/10682 [1:26:13<04:59, 2.03it/s]
|
898 |
94%|ββββββββββ| 10077/10682 [1:26:14<04:58, 2.02it/s]
|
899 |
94%|ββββββββββ| 10078/10682 [1:26:14<04:58, 2.02it/s]
|
900 |
94%|ββββββββββ| 10079/10682 [1:26:15<04:58, 2.02it/s]
|
901 |
94%|ββββββββββ| 10080/10682 [1:26:15<04:57, 2.03it/s]
|
902 |
94%|ββββββββββ| 10081/10682 [1:26:16<04:56, 2.03it/s]
|
903 |
94%|ββββββββββ| 10082/10682 [1:26:16<04:56, 2.03it/s]
|
904 |
94%|ββββββββββ| 10083/10682 [1:26:17<04:55, 2.03it/s]
|
905 |
94%|ββββββββββ| 10084/10682 [1:26:17<04:55, 2.02it/s]
|
906 |
94%|ββββββββββ| 10085/10682 [1:26:18<04:54, 2.02it/s]
|
907 |
94%|ββββββββββ| 10086/10682 [1:26:18<04:54, 2.02it/s]
|
908 |
94%|ββββββββββ| 10087/10682 [1:26:19<04:53, 2.03it/s]
|
909 |
94%|ββββββββββ| 10088/10682 [1:26:19<04:53, 2.02it/s]
|
910 |
94%|ββββββββββ| 10089/10682 [1:26:20<04:53, 2.02it/s]
|
911 |
94%|ββββββββββ| 10090/10682 [1:26:20<04:52, 2.03it/s]
|
912 |
94%|ββββββββββ| 10091/10682 [1:26:21<04:51, 2.03it/s]
|
913 |
94%|ββββββββββ| 10092/10682 [1:26:21<04:51, 2.03it/s]
|
914 |
94%|ββββββββββ| 10093/10682 [1:26:22<04:50, 2.03it/s]
|
915 |
94%|ββββββββββ| 10094/10682 [1:26:22<04:49, 2.03it/s]
|
916 |
95%|ββββββββββ| 10095/10682 [1:26:22<04:49, 2.03it/s]
|
917 |
95%|ββββββββββ| 10096/10682 [1:26:23<04:49, 2.03it/s]
|
918 |
95%|ββββββββββ| 10097/10682 [1:26:23<04:48, 2.03it/s]
|
919 |
95%|ββββββββββ| 10098/10682 [1:26:24<04:48, 2.03it/s]
|
920 |
95%|ββββββββββ| 10099/10682 [1:26:24<04:47, 2.03it/s]
|
921 |
95%|ββββββββββ| 10100/10682 [1:26:25<04:47, 2.03it/s]{'loss': 2.7963, 'grad_norm': 0.24528002738952637, 'learning_rate': 9.01691900753926e-06, 'epoch': 13.23}
|
922 |
+
|
923 |
|
924 |
95%|ββββββββββ| 10100/10682 [1:26:25<04:47, 2.03it/s]
|
925 |
95%|ββββββββββ| 10101/10682 [1:26:25<04:47, 2.02it/s]
|
926 |
95%|ββββββββββ| 10102/10682 [1:26:26<04:46, 2.02it/s]
|
927 |
95%|ββββββββββ| 10103/10682 [1:26:26<04:45, 2.02it/s]
|
928 |
95%|ββββββββββ| 10104/10682 [1:26:27<04:45, 2.02it/s]
|
929 |
95%|ββββββββββ| 10105/10682 [1:26:27<04:44, 2.03it/s]
|
930 |
95%|ββββββββββ| 10106/10682 [1:26:28<04:44, 2.03it/s]
|
931 |
95%|ββββββββββ| 10107/10682 [1:26:28<04:43, 2.03it/s]
|
932 |
95%|ββββββββββ| 10108/10682 [1:26:29<04:42, 2.03it/s]
|
933 |
95%|ββββββββββ| 10109/10682 [1:26:29<04:42, 2.03it/s]
|
934 |
95%|ββββββββββ| 10110/10682 [1:26:30<04:42, 2.03it/s]
|
935 |
95%|ββββββββββ| 10111/10682 [1:26:30<04:41, 2.03it/s]
|
936 |
95%|ββββββββββ| 10112/10682 [1:26:31<04:41, 2.03it/s]
|
937 |
95%|ββββββββββ| 10113/10682 [1:26:31<04:40, 2.03it/s]
|
938 |
95%|ββββββββββ| 10114/10682 [1:26:32<04:40, 2.03it/s]
|
939 |
95%|ββββββββββ| 10115/10682 [1:26:32<04:39, 2.03it/s]
|
940 |
95%|ββββββββββ| 10116/10682 [1:26:33<04:39, 2.03it/s]
|
941 |
95%|ββββββββββ| 10117/10682 [1:26:33<04:38, 2.03it/s]
|
942 |
95%|ββββββββββ| 10118/10682 [1:26:34<04:38, 2.03it/s]
|
943 |
95%|ββββββββββ| 10119/10682 [1:26:34<04:37, 2.03it/s]
|
944 |
95%|ββββββββββ| 10120/10682 [1:26:35<04:37, 2.03it/s]
|
945 |
95%|ββββββββββ| 10121/10682 [1:26:35<04:36, 2.03it/s]
|
946 |
95%|ββββββββββ| 10122/10682 [1:26:36<04:35, 2.03it/s]
|
947 |
95%|ββββββββββ| 10123/10682 [1:26:36<04:35, 2.03it/s]
|
948 |
95%|ββββββββββ| 10124/10682 [1:26:37<04:34, 2.03it/s]
|
949 |
95%|ββββββββββ| 10125/10682 [1:26:37<04:34, 2.03it/s]{'loss': 2.7944, 'grad_norm': 0.25175368785858154, 'learning_rate': 8.261001828055447e-06, 'epoch': 13.26}
|
950 |
+
|
951 |
|
952 |
95%|ββββββββββ| 10125/10682 [1:26:37<04:34, 2.03it/s]
|
953 |
95%|ββββββββββ| 10126/10682 [1:26:38<04:34, 2.03it/s]
|
954 |
95%|ββββββββββ| 10127/10682 [1:26:38<04:34, 2.03it/s]
|
955 |
95%|ββββββββββ| 10128/10682 [1:26:39<04:33, 2.03it/s]
|
956 |
95%|ββββββββββ| 10129/10682 [1:26:39<04:32, 2.03it/s]
|
957 |
95%|ββββββββββ| 10130/10682 [1:26:40<04:32, 2.03it/s]
|
958 |
95%|ββββββββββ| 10131/10682 [1:26:40<04:31, 2.03it/s]
|
959 |
95%|ββββββββββ| 10132/10682 [1:26:41<04:31, 2.03it/s]
|
960 |
95%|ββββββββββ| 10133/10682 [1:26:41<04:31, 2.02it/s]
|
961 |
95%|ββββββββββ| 10134/10682 [1:26:42<04:30, 2.02it/s]
|
962 |
95%|ββββββββββ| 10135/10682 [1:26:42<04:30, 2.02it/s]
|
963 |
95%|ββββββββββ| 10136/10682 [1:26:43<04:30, 2.02it/s]
|
964 |
95%|ββββββββββ| 10137/10682 [1:26:43<04:29, 2.02it/s]
|
965 |
95%|ββββββββββ| 10138/10682 [1:26:44<04:28, 2.02it/s]
|
966 |
95%|ββββββββββ| 10139/10682 [1:26:44<04:28, 2.02it/s]
|
967 |
95%|ββββββββββ| 10140/10682 [1:26:45<04:27, 2.02it/s]
|
968 |
95%|ββββββββββ| 10141/10682 [1:26:45<04:27, 2.03it/s]
|
969 |
95%|ββββββββββ| 10142/10682 [1:26:46<04:26, 2.02it/s]
|
970 |
95%|ββββββββββ| 10143/10682 [1:26:46<04:26, 2.02it/s]
|
971 |
95%|ββββββββββ| 10144/10682 [1:26:47<04:25, 2.03it/s]
|
972 |
95%|ββββββββββ| 10145/10682 [1:26:47<04:25, 2.03it/s]
|
973 |
95%|ββββββββββ| 10146/10682 [1:26:48<04:24, 2.03it/s]
|
974 |
95%|ββββββββββ| 10147/10682 [1:26:48<04:23, 2.03it/s]
|
975 |
95%|ββββββββββ| 10148/10682 [1:26:49<04:23, 2.03it/s]
|
976 |
95%|ββββββββββ| 10149/10682 [1:26:49<04:22, 2.03it/s]
|
977 |
95%|ββββββββββ| 10150/10682 [1:26:50<04:22, 2.03it/s]{'loss': 2.8061, 'grad_norm': 0.2488679438829422, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.29}
|
978 |
+
|
979 |
|
980 |
95%|ββββββββββ| 10150/10682 [1:26:50<04:22, 2.03it/s]
|
981 |
95%|ββββββββββ| 10151/10682 [1:26:50<04:22, 2.02it/s]
|
982 |
95%|ββββββββββ| 10152/10682 [1:26:51<04:22, 2.02it/s]
|
983 |
95%|ββββββββββ| 10153/10682 [1:26:51<04:21, 2.03it/s]
|
984 |
95%|ββββββββββ| 10154/10682 [1:26:52<04:43, 1.86it/s]
|
985 |
95%|ββββββββββ| 10155/10682 [1:26:52<04:35, 1.91it/s]
|
986 |
95%|ββββββββββ| 10156/10682 [1:26:53<04:30, 1.94it/s]
|
987 |
95%|ββββββββββ| 10157/10682 [1:26:53<04:26, 1.97it/s]
|
988 |
95%|ββββββββββ| 10158/10682 [1:26:54<04:23, 1.99it/s]
|
989 |
95%|ββββββββββ| 10159/10682 [1:26:54<04:21, 2.00it/s]
|
990 |
95%|ββββββββββ| 10160/10682 [1:26:55<04:20, 2.00it/s]
|
991 |
95%|ββββββββββ| 10161/10682 [1:26:55<04:19, 2.01it/s]
|
992 |
95%|ββββββββββ| 10162/10682 [1:26:56<04:18, 2.01it/s]
|
993 |
95%|ββββββββββ| 10163/10682 [1:26:56<04:17, 2.02it/s]
|
994 |
95%|ββββββββββ| 10164/10682 [1:26:57<04:16, 2.02it/s]
|
995 |
95%|ββββββββββ| 10165/10682 [1:26:57<04:15, 2.02it/s]
|
996 |
95%|ββββββββββ| 10166/10682 [1:26:58<04:15, 2.02it/s]
|
997 |
95%|ββββββββββ| 10167/10682 [1:26:58<04:14, 2.02it/s]
|
998 |
95%|ββββββββββ| 10168/10682 [1:26:59<04:14, 2.02it/s]
|
999 |
95%|ββββββββββ| 10169/10682 [1:26:59<04:13, 2.02it/s]
|
1000 |
95%|ββββββββββ| 10170/10682 [1:27:00<04:12, 2.03it/s]
|
1001 |
95%|ββββββββββ| 10171/10682 [1:27:00<04:12, 2.03it/s]
|
1002 |
95%|ββββββββββ| 10172/10682 [1:27:01<04:11, 2.03it/s]
|
1003 |
95%|ββββββββββ| 10173/10682 [1:27:01<04:11, 2.03it/s]
|
1004 |
95%|ββββββββββ| 10174/10682 [1:27:02<04:10, 2.03it/s]
|
1005 |
95%|ββββββββββ| 10175/10682 [1:27:02<04:10, 2.03it/s]{'loss': 2.8033, 'grad_norm': 0.2522995173931122, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.33}
|
1006 |
+
|
1007 |
|
1008 |
95%|ββββββββββ| 10175/10682 [1:27:02<04:10, 2.03it/s]
|
1009 |
95%|ββββββββββ| 10176/10682 [1:27:03<04:10, 2.02it/s]
|
1010 |
95%|ββββββββββ| 10177/10682 [1:27:03<04:09, 2.02it/s]
|
1011 |
95%|ββββββββββ| 10178/10682 [1:27:04<04:09, 2.02it/s]
|
1012 |
95%|ββββββββββ| 10179/10682 [1:27:04<04:08, 2.02it/s]
|
1013 |
95%|ββββββββββ| 10180/10682 [1:27:05<04:07, 2.03it/s]
|
1014 |
95%|ββββββββββ| 10181/10682 [1:27:05<04:07, 2.03it/s]
|
1015 |
95%|ββββββββββ| 10182/10682 [1:27:06<04:06, 2.03it/s]
|
1016 |
95%|ββββββββββ| 10183/10682 [1:27:06<04:06, 2.02it/s]
|
1017 |
95%|ββββββββββ| 10184/10682 [1:27:07<04:05, 2.03it/s]
|
1018 |
95%|ββββββββββ| 10185/10682 [1:27:07<04:05, 2.03it/s]
|
1019 |
95%|ββββββββββ| 10186/10682 [1:27:08<04:04, 2.03it/s]
|
1020 |
95%|ββββββββββ| 10187/10682 [1:27:08<04:04, 2.03it/s]
|
1021 |
95%|ββββββββββ| 10188/10682 [1:27:09<04:03, 2.03it/s]
|
1022 |
95%|ββββββββββ| 10189/10682 [1:27:09<04:03, 2.03it/s]
|
1023 |
95%|ββββββββββ| 10190/10682 [1:27:10<04:02, 2.03it/s]
|
1024 |
95%|ββββββββββ| 10191/10682 [1:27:10<04:02, 2.02it/s]
|
1025 |
95%|ββββββββββ| 10192/10682 [1:27:11<04:01, 2.03it/s]
|
1026 |
95%|ββββββββββ| 10193/10682 [1:27:11<04:01, 2.02it/s]
|
1027 |
95%|ββββββββββ| 10194/10682 [1:27:12<04:00, 2.03it/s]
|
1028 |
95%|ββββββββββ| 10195/10682 [1:27:12<04:01, 2.02it/s]
|
1029 |
95%|ββββββββββ| 10196/10682 [1:27:13<04:00, 2.02it/s]
|
1030 |
95%|ββββββββββ| 10197/10682 [1:27:13<04:00, 2.02it/s]
|
1031 |
95%|ββββββββββ| 10198/10682 [1:27:13<03:59, 2.02it/s]
|
1032 |
95%|ββββββββββ| 10199/10682 [1:27:14<03:58, 2.02it/s]
|
1033 |
95%|ββββββββββ| 10200/10682 [1:27:14<03:58, 2.02it/s]{'loss': 2.799, 'grad_norm': 0.2519816756248474, 'learning_rate': 6.190386348572108e-06, 'epoch': 13.36}
|
1034 |
+
|
1035 |
|
1036 |
95%|ββββββββββ| 10200/10682 [1:27:14<03:58, 2.02it/s]
|
1037 |
95%|ββββββββββ| 10201/10682 [1:27:15<03:57, 2.02it/s]
|
1038 |
96%|ββββββββββ| 10202/10682 [1:27:15<03:56, 2.03it/s]
|
1039 |
96%|ββββββββββ| 10203/10682 [1:27:16<03:56, 2.03it/s]
|
1040 |
96%|ββββββββββ| 10204/10682 [1:27:16<03:55, 2.03it/s]
|
1041 |
96%|ββββββββββ| 10205/10682 [1:27:17<03:55, 2.03it/s]
|
1042 |
96%|ββββββββββ| 10206/10682 [1:27:17<03:54, 2.03it/s]
|
1043 |
96%|ββββββββββ| 10207/10682 [1:27:18<03:54, 2.03it/s]
|
1044 |
96%|ββββββββββ| 10208/10682 [1:27:18<03:54, 2.03it/s]
|
1045 |
96%|ββββββββββ| 10209/10682 [1:27:19<03:53, 2.02it/s]
|
1046 |
96%|ββββββββββ| 10210/10682 [1:27:19<03:52, 2.03it/s]
|
1047 |
96%|ββββββββββ| 10211/10682 [1:27:20<03:52, 2.03it/s]
|
1048 |
96%|ββββββββββ| 10212/10682 [1:27:20<03:51, 2.03it/s]
|
1049 |
96%|ββββββββββ| 10213/10682 [1:27:21<03:51, 2.03it/s]
|
1050 |
96%|ββββββββββ| 10214/10682 [1:27:21<03:50, 2.03it/s]
|
1051 |
96%|ββββββββββ| 10215/10682 [1:27:22<03:50, 2.03it/s]
|
1052 |
96%|ββββββββββ| 10216/10682 [1:27:22<03:49, 2.03it/s]
|
1053 |
96%|ββββββββββ| 10217/10682 [1:27:23<03:49, 2.03it/s]
|
1054 |
96%|ββββββββββ| 10218/10682 [1:27:23<03:48, 2.03it/s]
|
1055 |
96%|ββββββββββ| 10219/10682 [1:27:24<03:48, 2.03it/s]
|
1056 |
96%|ββββββββββ| 10220/10682 [1:27:24<03:47, 2.03it/s]
|
1057 |
96%|ββββββββββ| 10221/10682 [1:27:25<03:47, 2.03it/s]
|
1058 |
96%|ββββββββββ| 10222/10682 [1:27:25<03:46, 2.03it/s]
|
1059 |
96%|ββββββββββ| 10223/10682 [1:27:26<03:46, 2.03it/s]
|
1060 |
96%|ββββββββββ| 10224/10682 [1:27:26<03:45, 2.03it/s]
|
1061 |
96%|ββββββββββ| 10225/10682 [1:27:27<03:45, 2.03it/s]{'loss': 2.7962, 'grad_norm': 0.2496296763420105, 'learning_rate': 5.56604678228706e-06, 'epoch': 13.39}
|
1062 |
+
|
1063 |
|
1064 |
96%|ββββββββββ| 10225/10682 [1:27:27<03:45, 2.03it/s]
|
1065 |
96%|ββββββββββ| 10226/10682 [1:27:27<03:44, 2.03it/s]
|
1066 |
96%|ββββββββββ| 10227/10682 [1:27:28<03:44, 2.03it/s]
|
1067 |
96%|ββββββββββ| 10228/10682 [1:27:28<03:43, 2.03it/s]
|
1068 |
96%|ββββββββββ| 10229/10682 [1:27:29<03:43, 2.03it/s]
|
1069 |
96%|ββββββββββ| 10230/10682 [1:27:29<03:42, 2.03it/s]
|
1070 |
96%|ββββββββββ| 10231/10682 [1:27:30<03:42, 2.03it/s]
|
1071 |
96%|ββββββββββ| 10232/10682 [1:27:30<03:41, 2.03it/s]
|
1072 |
96%|ββββββββββ| 10233/10682 [1:27:31<03:41, 2.03it/s]
|
1073 |
96%|ββββββββββ| 10234/10682 [1:27:31<03:41, 2.03it/s]
|
1074 |
96%|ββββββββββ| 10235/10682 [1:27:32<03:40, 2.03it/s]
|
1075 |
96%|ββββββββββ| 10236/10682 [1:27:32<03:39, 2.03it/s]
|
1076 |
96%|ββββββββββ| 10237/10682 [1:27:33<03:39, 2.03it/s]
|
1077 |
96%|ββββββββββ| 10238/10682 [1:27:33<03:39, 2.03it/s]
|
1078 |
96%|ββββββββββ| 10239/10682 [1:27:34<03:38, 2.03it/s]
|
1079 |
96%|ββββββββββ| 10240/10682 [1:27:34<03:38, 2.03it/s]
|
1080 |
96%|ββββββββββ| 10241/10682 [1:27:35<03:37, 2.03it/s]
|
1081 |
96%|ββββββββββ| 10242/10682 [1:27:35<03:37, 2.03it/s]
|
1082 |
96%|ββββββββββ| 10243/10682 [1:27:36<03:36, 2.03it/s]
|
1083 |
96%|ββββββββββ| 10244/10682 [1:27:36<03:36, 2.03it/s]
|
1084 |
96%|ββββββββββ| 10245/10682 [1:27:37<03:35, 2.03it/s]
|
1085 |
96%|ββββββββββ| 10246/10682 [1:27:37<03:35, 2.03it/s]
|
1086 |
96%|ββββββββββ| 10247/10682 [1:27:38<03:34, 2.03it/s]
|
1087 |
96%|ββββββββββ| 10248/10682 [1:27:38<03:34, 2.02it/s]
|
1088 |
96%|ββββββββββ| 10249/10682 [1:27:39<03:33, 2.03it/s]
|
1089 |
96%|ββββββββββ| 10250/10682 [1:27:39<03:33, 2.03it/s]{'loss': 2.7955, 'grad_norm': 0.24875780940055847, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
1090 |
|
1091 |
+
|
1092 |
96%|ββββββββββ| 10250/10682 [1:27:39<03:33, 2.03it/s]
|
1093 |
96%|ββββββββββ| 10251/10682 [1:27:40<03:32, 2.02it/s]
|
1094 |
96%|ββββββββββ| 10252/10682 [1:27:40<03:32, 2.03it/s]
|
1095 |
96%|ββββββββββ| 10253/10682 [1:27:41<03:49, 1.87it/s]
|
1096 |
96%|ββββββββββ| 10254/10682 [1:27:41<03:43, 1.91it/s]
|
1097 |
96%|ββββββββββ| 10255/10682 [1:27:42<03:39, 1.94it/s]
|
1098 |
96%|ββββββββββ| 10256/10682 [1:27:42<03:36, 1.97it/s]
|
1099 |
96%|ββββββββββ| 10257/10682 [1:27:43<03:33, 1.99it/s]
|
1100 |
96%|ββββββββββ| 10258/10682 [1:27:43<03:32, 2.00it/s]
|
1101 |
96%|ββββββββββ| 10259/10682 [1:27:44<03:30, 2.01it/s]
|
1102 |
96%|ββββββββββ| 10260/10682 [1:27:44<03:29, 2.01it/s]
|
1103 |
96%|ββββββββββ| 10261/10682 [1:27:45<03:28, 2.02it/s]
|
1104 |
96%|ββββββββββ| 10262/10682 [1:27:45<03:28, 2.02it/s]
|
1105 |
96%|ββββββββββ| 10263/10682 [1:27:46<03:27, 2.02it/s]
|
1106 |
96%|ββββββββββ| 10264/10682 [1:27:46<03:26, 2.02it/s]
|
1107 |
96%|ββββββββββ| 10265/10682 [1:27:47<03:26, 2.02it/s]
|
1108 |
96%|ββββββββββ| 10266/10682 [1:27:47<03:25, 2.03it/s]
|
1109 |
96%|ββββββββββ| 10267/10682 [1:27:48<03:24, 2.03it/s]
|
1110 |
96%|ββββββββββ| 10268/10682 [1:27:48<03:24, 2.03it/s]
|
1111 |
96%|ββββββββββ| 10269/10682 [1:27:49<03:23, 2.03it/s]
|
1112 |
96%|ββββββββββ| 10270/10682 [1:27:49<03:23, 2.03it/s]
|
1113 |
96%|ββββββββββ| 10271/10682 [1:27:50<03:22, 2.03it/s]
|
1114 |
96%|ββββββββββ| 10272/10682 [1:27:50<03:22, 2.03it/s]
|
1115 |
96%|ββββββββββ| 10273/10682 [1:27:51<03:21, 2.03it/s]
|
1116 |
96%|ββββββββββ| 10274/10682 [1:27:51<03:21, 2.03it/s]
|
1117 |
96%|ββββββββββ| 10275/10682 [1:27:52<03:20, 2.03it/s]{'loss': 2.8098, 'grad_norm': 0.25310125946998596, 'learning_rate': 4.416419388921844e-06, 'epoch': 13.46}
|
1118 |
+
|
1119 |
|
1120 |
96%|ββββββββββ| 10275/10682 [1:27:52<03:20, 2.03it/s]
|
1121 |
96%|ββββββββββ| 10276/10682 [1:27:52<03:20, 2.02it/s]
|
1122 |
96%|ββββββββββ| 10277/10682 [1:27:53<03:20, 2.02it/s]
|
1123 |
96%|ββββββββββ| 10278/10682 [1:27:53<03:19, 2.02it/s]
|
1124 |
96%|ββββββββββ| 10279/10682 [1:27:54<03:18, 2.03it/s]
|
1125 |
96%|ββββββββββ| 10280/10682 [1:27:54<03:18, 2.02it/s]
|
1126 |
96%|ββββββββββ| 10281/10682 [1:27:55<03:18, 2.02it/s]
|
1127 |
96%|ββββββββββ| 10282/10682 [1:27:55<03:17, 2.03it/s]
|
1128 |
96%|ββββββββββ| 10283/10682 [1:27:56<03:16, 2.03it/s]
|
1129 |
96%|ββββββββββ| 10284/10682 [1:27:56<03:16, 2.03it/s]
|
1130 |
96%|ββββββββββ| 10285/10682 [1:27:57<03:15, 2.03it/s]
|
1131 |
96%|ββββββββββ| 10286/10682 [1:27:57<03:15, 2.03it/s]
|
1132 |
96%|ββββββββββ| 10287/10682 [1:27:58<03:14, 2.03it/s]
|
1133 |
96%|ββββββββββ| 10288/10682 [1:27:58<03:14, 2.03it/s]
|
1134 |
96%|ββββββββββ| 10289/10682 [1:27:59<03:13, 2.03it/s]
|
1135 |
96%|ββββββββββ| 10290/10682 [1:27:59<03:13, 2.03it/s]
|
1136 |
96%|ββββββββββ| 10291/10682 [1:28:00<03:13, 2.03it/s]
|
1137 |
96%|ββββββββββ| 10292/10682 [1:28:00<03:12, 2.03it/s]
|
1138 |
96%|ββββββββββ| 10293/10682 [1:28:01<03:11, 2.03it/s]
|
1139 |
96%|ββββββββββ| 10294/10682 [1:28:01<03:11, 2.02it/s]
|
1140 |
96%|ββββββββββ| 10295/10682 [1:28:01<03:10, 2.03it/s]
|
1141 |
96%|ββββββββββ| 10296/10682 [1:28:02<03:10, 2.03it/s]
|
1142 |
96%|ββββββββββ| 10297/10682 [1:28:02<03:09, 2.03it/s]
|
1143 |
96%|ββββββββββ| 10298/10682 [1:28:03<03:09, 2.03it/s]
|
1144 |
96%|ββββββββββ| 10299/10682 [1:28:03<03:09, 2.03it/s]
|
1145 |
96%|ββββββββββ| 10300/10682 [1:28:04<03:08, 2.03it/s]
|
1146 |
|
1147 |
+
|
1148 |
96%|ββββββββββ| 10300/10682 [1:28:04<03:08, 2.03it/s]
|
1149 |
96%|ββββββββββ| 10301/10682 [1:28:04<03:08, 2.02it/s]
|
1150 |
96%|ββββββββββ| 10302/10682 [1:28:05<03:08, 2.02it/s]
|
1151 |
96%|ββββββββββ| 10303/10682 [1:28:05<03:07, 2.02it/s]
|
1152 |
96%|ββββββββββ| 10304/10682 [1:28:06<03:06, 2.02it/s]
|
1153 |
96%|ββββββββββ| 10305/10682 [1:28:06<03:06, 2.03it/s]
|
1154 |
96%|ββββββββββ| 10306/10682 [1:28:07<03:05, 2.03it/s]
|
1155 |
96%|ββββββββββ| 10307/10682 [1:28:07<03:05, 2.03it/s]
|
1156 |
96%|ββββββββββ| 10308/10682 [1:28:08<03:04, 2.03it/s]
|
1157 |
97%|ββββββββββ| 10309/10682 [1:28:08<03:04, 2.03it/s]
|
1158 |
97%|ββββββββββ| 10310/10682 [1:28:09<03:03, 2.03it/s]
|
1159 |
97%|ββββββββββ| 10311/10682 [1:28:09<03:03, 2.03it/s]
|
1160 |
97%|ββββββββββ| 10312/10682 [1:28:10<03:02, 2.03it/s]
|
1161 |
97%|ββββββββββ| 10313/10682 [1:28:10<03:02, 2.03it/s]
|
1162 |
97%|ββββββββββ| 10314/10682 [1:28:11<03:01, 2.03it/s]
|
1163 |
97%|ββββββββββ| 10315/10682 [1:28:11<03:01, 2.03it/s]
|
1164 |
97%|ββββββββββ| 10316/10682 [1:28:12<03:00, 2.02it/s]
|
1165 |
97%|ββββββββββ| 10317/10682 [1:28:12<03:00, 2.03it/s]
|
1166 |
97%|ββββββββββ| 10318/10682 [1:28:13<02:59, 2.03it/s]
|
1167 |
97%|ββββββββββ| 10319/10682 [1:28:13<02:59, 2.03it/s]
|
1168 |
97%|ββββββββββ| 10320/10682 [1:28:14<02:58, 2.03it/s]
|
1169 |
97%|ββββββββββ| 10321/10682 [1:28:14<02:59, 2.01it/s]
|
1170 |
97%|ββββββββββ| 10322/10682 [1:28:15<02:58, 2.02it/s]
|
1171 |
97%|ββββββββββ| 10323/10682 [1:28:15<02:57, 2.02it/s]
|
1172 |
97%|ββββββββββ| 10324/10682 [1:28:16<02:57, 2.02it/s]
|
1173 |
97%|ββββββββββ| 10325/10682 [1:28:16<02:56, 2.03it/s]
|
1174 |
{'loss': 2.8013, 'grad_norm': 0.2537311613559723, 'learning_rate': 3.3991130994299734e-06, 'epoch': 13.52}
|
1175 |
+
|
1176 |
97%|ββββββββββ| 10325/10682 [1:28:16<02:56, 2.03it/s]
|
1177 |
97%|ββββββββββ| 10326/10682 [1:28:17<02:56, 2.02it/s]
|
1178 |
97%|ββββββββββ| 10327/10682 [1:28:17<02:55, 2.02it/s]
|
1179 |
97%|ββββββββββ| 10328/10682 [1:28:18<02:54, 2.03it/s]
|
1180 |
97%|ββββββββββ| 10329/10682 [1:28:18<02:54, 2.02it/s]
|
1181 |
97%|ββββββββββ| 10330/10682 [1:28:19<02:53, 2.02it/s]
|
1182 |
97%|ββββββββββ| 10331/10682 [1:28:19<02:53, 2.02it/s]
|
1183 |
97%|ββββββββββ| 10332/10682 [1:28:20<02:52, 2.03it/s]
|
1184 |
97%|ββββββββββ| 10333/10682 [1:28:20<02:52, 2.02it/s]
|
1185 |
97%|ββββββββββ| 10334/10682 [1:28:21<02:51, 2.03it/s]
|
1186 |
97%|ββββββββββ| 10335/10682 [1:28:21<02:51, 2.02it/s]
|
1187 |
97%|ββββββββββ| 10336/10682 [1:28:22<02:50, 2.02it/s]
|
1188 |
97%|ββββββββββ| 10337/10682 [1:28:22<02:50, 2.02it/s]
|
1189 |
97%|ββββββββββ| 10338/10682 [1:28:23<02:49, 2.02it/s]
|
1190 |
97%|ββββββββββ| 10339/10682 [1:28:23<02:49, 2.02it/s]
|
1191 |
97%|ββββββββββ| 10340/10682 [1:28:24<02:48, 2.03it/s]
|
1192 |
97%|ββββββββββ| 10341/10682 [1:28:24<02:48, 2.02it/s]
|
1193 |
97%|ββββββββββ| 10342/10682 [1:28:25<02:47, 2.03it/s]
|
1194 |
97%|ββββββββββ| 10343/10682 [1:28:25<02:47, 2.03it/s]
|
1195 |
97%|ββββββββββ| 10344/10682 [1:28:26<02:46, 2.03it/s]
|
1196 |
97%|ββββββββββ| 10345/10682 [1:28:26<02:46, 2.03it/s]
|
1197 |
97%|ββββββββββ| 10346/10682 [1:28:27<02:45, 2.03it/s]
|
1198 |
97%|ββββββββββ| 10347/10682 [1:28:27<02:45, 2.03it/s]
|
1199 |
97%|ββββββββββ| 10348/10682 [1:28:28<02:44, 2.03it/s]
|
1200 |
97%|ββββββββββ| 10349/10682 [1:28:28<02:44, 2.03it/s]
|
1201 |
97%|ββββββββββ| 10350/10682 [1:28:29<02:43, 2.03it/s]{'loss': 2.8039, 'grad_norm': 0.24740473926067352, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
1202 |
+
|
1203 |
|
1204 |
97%|ββββββββββ| 10350/10682 [1:28:29<02:43, 2.03it/s]
|
1205 |
97%|ββββββββββ| 10351/10682 [1:28:29<02:43, 2.03it/s]
|
1206 |
97%|ββββββββββ| 10352/10682 [1:28:30<02:42, 2.03it/s]
|
1207 |
97%|ββββββββββ| 10353/10682 [1:28:30<02:42, 2.03it/s]
|
1208 |
97%|ββββββββββ| 10354/10682 [1:28:31<02:41, 2.03it/s]
|
1209 |
97%|ββββββββββ| 10355/10682 [1:28:31<02:41, 2.03it/s]
|
1210 |
97%|ββββββββββ| 10356/10682 [1:28:32<02:40, 2.03it/s]
|
1211 |
97%|ββββββββββ| 10357/10682 [1:28:32<02:40, 2.03it/s]
|
1212 |
97%|ββββββββββ| 10358/10682 [1:28:33<02:39, 2.03it/s]
|
1213 |
97%|ββββββββββ| 10359/10682 [1:28:33<02:39, 2.03it/s]
|
1214 |
97%|ββββββββββ| 10360/10682 [1:28:34<02:38, 2.03it/s]
|
1215 |
97%|ββββββββββ| 10361/10682 [1:28:34<02:38, 2.03it/s]
|
1216 |
97%|ββββββββββ| 10362/10682 [1:28:35<02:37, 2.03it/s]
|
1217 |
97%|ββββββββββ| 10363/10682 [1:28:35<02:37, 2.03it/s]
|
1218 |
97%|ββββββββββ| 10364/10682 [1:28:36<02:36, 2.03it/s]
|
1219 |
97%|ββββββββββ| 10365/10682 [1:28:36<02:36, 2.02it/s]
|
1220 |
97%|ββββββββββ| 10366/10682 [1:28:37<02:35, 2.03it/s]
|
1221 |
97%|ββββββββββ| 10367/10682 [1:28:37<02:35, 2.02it/s]
|
1222 |
97%|ββββββββββ| 10368/10682 [1:28:38<02:34, 2.03it/s]
|
1223 |
97%|ββββββββββ| 10369/10682 [1:28:38<02:34, 2.03it/s]
|
1224 |
97%|ββββββββββ| 10370/10682 [1:28:39<02:34, 2.02it/s]
|
1225 |
97%|ββββββββββ| 10371/10682 [1:28:39<02:33, 2.03it/s]
|
1226 |
97%|ββββββββββ| 10372/10682 [1:28:40<02:33, 2.03it/s]
|
1227 |
97%|ββββββββββ| 10373/10682 [1:28:40<02:32, 2.03it/s]
|
1228 |
97%|ββββββββββ| 10374/10682 [1:28:40<02:31, 2.03it/s]
|
1229 |
97%|ββββββββββ| 10375/10682 [1:28:41<02:31, 2.03it/s]{'loss': 2.7982, 'grad_norm': 0.25136321783065796, 'learning_rate': 2.5143995351817882e-06, 'epoch': 13.59}
|
1230 |
+
|
1231 |
|
1232 |
97%|ββββββββββ| 10375/10682 [1:28:41<02:31, 2.03it/s]
|
1233 |
97%|ββββββββββ| 10376/10682 [1:28:41<02:31, 2.02it/s]
|
1234 |
97%|ββββββββββ| 10377/10682 [1:28:42<02:30, 2.02it/s]
|
1235 |
97%|ββββββββββ| 10378/10682 [1:28:42<02:30, 2.02it/s]
|
1236 |
97%|ββββββββββ| 10379/10682 [1:28:43<02:29, 2.02it/s]
|
1237 |
97%|ββββββββββ| 10380/10682 [1:28:43<02:29, 2.02it/s]
|
1238 |
97%|ββββββββββ| 10381/10682 [1:28:44<02:28, 2.03it/s]
|
1239 |
97%|ββββββββββ| 10382/10682 [1:28:44<02:28, 2.03it/s]
|
1240 |
97%|ββββββββββ| 10383/10682 [1:28:45<02:27, 2.03it/s]
|
1241 |
97%|ββββββββββ| 10384/10682 [1:28:45<02:26, 2.03it/s]
|
1242 |
97%|ββββββββββ| 10385/10682 [1:28:46<02:26, 2.03it/s]
|
1243 |
97%|ββββββββββ| 10386/10682 [1:28:46<02:25, 2.03it/s]
|
1244 |
97%|ββββββββββ| 10387/10682 [1:28:47<02:25, 2.03it/s]
|
1245 |
97%|ββββββββββ| 10388/10682 [1:28:47<02:24, 2.03it/s]
|
1246 |
97%|ββββββββββ| 10389/10682 [1:28:48<02:24, 2.03it/s]
|
1247 |
97%|ββββββββββ| 10390/10682 [1:28:48<02:24, 2.03it/s]
|
1248 |
97%|ββββββββββ| 10391/10682 [1:28:49<02:23, 2.03it/s]
|
1249 |
97%|ββββββββββ| 10392/10682 [1:28:49<02:23, 2.03it/s]
|
1250 |
97%|ββββββββββ| 10393/10682 [1:28:50<02:22, 2.03it/s]
|
1251 |
97%|ββββββββββ| 10394/10682 [1:28:50<02:22, 2.03it/s]
|
1252 |
97%|ββββββββββ| 10395/10682 [1:28:51<02:21, 2.03it/s]
|
1253 |
97%|ββββββββββ| 10396/10682 [1:28:51<02:20, 2.03it/s]
|
1254 |
97%|ββββββββββ| 10397/10682 [1:28:52<02:20, 2.03it/s]
|
1255 |
97%|ββββββββββ| 10398/10682 [1:28:52<02:19, 2.03it/s]
|
1256 |
97%|ββββββββββ| 10399/10682 [1:28:53<02:19, 2.03it/s]
|
1257 |
97%|ββββββββββ| 10400/10682 [1:28:53<02:19, 2.03it/s]{'loss': 2.7831, 'grad_norm': 0.24997153878211975, 'learning_rate': 2.1218402281655835e-06, 'epoch': 13.62}
|
1258 |
|
1259 |
+
|
1260 |
97%|ββββββββββ| 10400/10682 [1:28:53<02:19, 2.03it/s]
|
1261 |
97%|ββββββββββ| 10401/10682 [1:28:54<02:18, 2.02it/s]
|
1262 |
97%|ββββββββββ| 10402/10682 [1:28:54<02:18, 2.03it/s]
|
1263 |
97%|ββββββββββ| 10403/10682 [1:28:55<02:17, 2.03it/s]
|
1264 |
97%|ββββββββββ| 10404/10682 [1:28:55<02:17, 2.03it/s]
|
1265 |
97%|ββββββββββ| 10405/10682 [1:28:56<02:16, 2.03it/s]
|
1266 |
97%|ββββββββββ| 10406/10682 [1:28:56<02:16, 2.03it/s]
|
1267 |
97%|ββββββββββ| 10407/10682 [1:28:57<02:15, 2.03it/s]
|
1268 |
97%|ββββββββββ| 10408/10682 [1:28:57<02:15, 2.03it/s]
|
1269 |
97%|ββββββββββ| 10409/10682 [1:28:58<02:14, 2.03it/s]
|
1270 |
97%|ββββββββββ| 10410/10682 [1:28:58<02:13, 2.03it/s]
|
1271 |
97%|ββββββββββ| 10411/10682 [1:28:59<02:13, 2.03it/s]
|
1272 |
97%|ββββββββββ| 10412/10682 [1:28:59<02:12, 2.03it/s]
|
1273 |
97%|ββββββββββ| 10413/10682 [1:29:00<02:12, 2.03it/s]
|
1274 |
97%|ββββββββββ| 10414/10682 [1:29:00<02:11, 2.03it/s]
|
1275 |
98%|ββββββββββ| 10415/10682 [1:29:01<02:11, 2.03it/s]
|
1276 |
98%|ββββββββββ| 10416/10682 [1:29:01<02:10, 2.03it/s]
|
1277 |
98%|ββββββββββ| 10417/10682 [1:29:02<02:10, 2.03it/s]
|
1278 |
98%|ββββββββββ| 10418/10682 [1:29:02<02:09, 2.03it/s]
|
1279 |
98%|ββββββββββ| 10419/10682 [1:29:03<02:09, 2.03it/s]
|
1280 |
98%|ββββββββββ| 10420/10682 [1:29:03<02:08, 2.03it/s]
|
1281 |
98%|ββββββββββ| 10421/10682 [1:29:04<02:08, 2.03it/s]
|
1282 |
98%|ββββββββββ| 10422/10682 [1:29:04<02:07, 2.03it/s]
|
1283 |
98%|ββββββββββ| 10423/10682 [1:29:05<02:07, 2.03it/s]
|
1284 |
98%|ββββββββββ| 10424/10682 [1:29:05<02:06, 2.04it/s]
|
1285 |
98%|ββββββββββ| 10425/10682 [1:29:06<02:06, 2.03it/s]
|
1286 |
|
1287 |
+
|
1288 |
98%|ββββββββββ| 10425/10682 [1:29:06<02:06, 2.03it/s]
|
1289 |
98%|ββββββββββ| 10426/10682 [1:29:06<02:06, 2.03it/s]
|
1290 |
98%|ββββββββββ| 10427/10682 [1:29:07<02:05, 2.03it/s]
|
1291 |
98%|ββββββββββ| 10428/10682 [1:29:07<02:05, 2.03it/s]
|
1292 |
98%|ββββββββββ| 10429/10682 [1:29:08<02:04, 2.03it/s]
|
1293 |
98%|ββββββββββ| 10430/10682 [1:29:08<02:04, 2.03it/s]
|
1294 |
98%|ββββββββββ| 10431/10682 [1:29:09<02:03, 2.03it/s]
|
1295 |
98%|ββββββββββ| 10432/10682 [1:29:09<02:03, 2.03it/s]
|
1296 |
98%|ββββββββββ| 10433/10682 [1:29:10<02:02, 2.03it/s]
|
1297 |
98%|ββββββββββ| 10434/10682 [1:29:10<02:02, 2.03it/s]
|
1298 |
98%|ββββββββββ| 10435/10682 [1:29:11<02:01, 2.03it/s]
|
1299 |
98%|ββββββββββ| 10436/10682 [1:29:11<02:01, 2.03it/s]
|
1300 |
98%|ββββββββββ| 10437/10682 [1:29:12<02:00, 2.03it/s]
|
1301 |
98%|ββββββββββ| 10438/10682 [1:29:12<02:00, 2.03it/s]
|
1302 |
98%|ββββββββββ| 10439/10682 [1:29:13<01:59, 2.03it/s]
|
1303 |
98%|ββββββββββ| 10440/10682 [1:29:13<01:59, 2.03it/s]
|
1304 |
98%|ββββββββββ| 10441/10682 [1:29:14<01:58, 2.03it/s]
|
1305 |
98%|ββββββββββ| 10442/10682 [1:29:14<01:58, 2.03it/s]
|
1306 |
98%|ββββββββββ| 10443/10682 [1:29:14<01:57, 2.03it/s]
|
1307 |
98%|ββββββββββ| 10444/10682 [1:29:15<01:57, 2.03it/s]
|
1308 |
98%|ββββββββββ| 10445/10682 [1:29:15<01:56, 2.03it/s]
|
1309 |
98%|ββββββββββ| 10446/10682 [1:29:16<01:56, 2.03it/s]
|
1310 |
98%|ββββββββββ| 10447/10682 [1:29:16<01:55, 2.03it/s]
|
1311 |
98%|ββββββββββ| 10448/10682 [1:29:17<01:55, 2.03it/s]
|
1312 |
98%|ββββββββββ| 10449/10682 [1:29:17<01:54, 2.03it/s]
|
1313 |
98%|ββββββββββ| 10450/10682 [1:29:18<01:54, 2.03it/s]
|
1314 |
|
1315 |
+
|
1316 |
98%|ββββββββββ| 10450/10682 [1:29:18<01:54, 2.03it/s]
|
1317 |
98%|ββββββββββ| 10451/10682 [1:29:18<01:54, 2.02it/s]
|
1318 |
98%|ββββββββββ| 10452/10682 [1:29:19<01:53, 2.02it/s]
|
1319 |
98%|ββββββββββ| 10453/10682 [1:29:19<01:53, 2.03it/s]
|
1320 |
98%|ββββββββββ| 10454/10682 [1:29:20<01:52, 2.03it/s]
|
1321 |
98%|ββββββββββ| 10455/10682 [1:29:20<01:51, 2.03it/s]
|
1322 |
98%|ββββββββββ| 10456/10682 [1:29:21<01:51, 2.03it/s]
|
1323 |
98%|ββββββββββ| 10457/10682 [1:29:21<01:50, 2.03it/s]
|
1324 |
98%|ββββββββββ| 10458/10682 [1:29:22<01:50, 2.03it/s]
|
1325 |
98%|ββββββββββ| 10459/10682 [1:29:22<01:49, 2.03it/s]
|
1326 |
98%|ββββββββββ| 10460/10682 [1:29:23<01:49, 2.03it/s]
|
1327 |
98%|ββββββββββ| 10461/10682 [1:29:23<01:48, 2.03it/s]
|
1328 |
98%|ββββββββββ| 10462/10682 [1:29:24<01:48, 2.03it/s]
|
1329 |
98%|ββββββββββ| 10463/10682 [1:29:24<01:47, 2.03it/s]
|
1330 |
98%|ββββββββββ| 10464/10682 [1:29:25<01:47, 2.03it/s]
|
1331 |
98%|ββββββββββ| 10465/10682 [1:29:25<01:46, 2.03it/s]
|
1332 |
98%|ββββββββββ| 10466/10682 [1:29:26<01:46, 2.03it/s]
|
1333 |
98%|ββββββββββ| 10467/10682 [1:29:26<01:45, 2.03it/s]
|
1334 |
98%|ββββββββββ| 10468/10682 [1:29:27<01:45, 2.03it/s]
|
1335 |
98%|ββββββββββ| 10469/10682 [1:29:27<01:44, 2.03it/s]
|
1336 |
98%|ββββββββββ| 10470/10682 [1:29:28<01:44, 2.03it/s]
|
1337 |
98%|ββββββββββ| 10471/10682 [1:29:28<01:43, 2.03it/s]
|
1338 |
98%|ββββββββββ| 10472/10682 [1:29:29<01:43, 2.03it/s]
|
1339 |
98%|ββββββββββ| 10473/10682 [1:29:29<01:42, 2.03it/s]
|
1340 |
98%|ββββββββββ| 10474/10682 [1:29:30<01:42, 2.03it/s]
|
1341 |
98%|ββββββββββ| 10475/10682 [1:29:30<01:42, 2.03it/s]
|
1342 |
|
1343 |
+
|
1344 |
98%|ββββββββββ| 10475/10682 [1:29:30<01:42, 2.03it/s]
|
1345 |
98%|ββββββββββ| 10476/10682 [1:29:31<01:41, 2.03it/s]
|
1346 |
98%|ββββββββββ| 10477/10682 [1:29:31<01:41, 2.03it/s]
|
1347 |
98%|ββββββββββ| 10478/10682 [1:29:32<01:40, 2.03it/s]
|
1348 |
98%|ββββββββββ| 10479/10682 [1:29:32<01:40, 2.03it/s]
|
1349 |
98%|ββββββββββ| 10480/10682 [1:29:33<01:39, 2.03it/s]
|
1350 |
98%|ββββββββββ| 10481/10682 [1:29:33<01:38, 2.03it/s]
|
1351 |
98%|ββββββββββ| 10482/10682 [1:29:34<01:38, 2.03it/s]
|
1352 |
98%|ββββββββββ| 10483/10682 [1:29:34<01:38, 2.03it/s]
|
1353 |
98%|ββββββββββ| 10484/10682 [1:29:35<01:37, 2.03it/s]
|
1354 |
98%|ββββββββββ| 10485/10682 [1:29:35<01:37, 2.03it/s]
|
1355 |
98%|ββββββββββ| 10486/10682 [1:29:36<01:36, 2.03it/s]
|
1356 |
98%|ββββββββββ| 10487/10682 [1:29:36<01:36, 2.03it/s]
|
1357 |
98%|ββββββββββ| 10488/10682 [1:29:37<01:35, 2.03it/s]
|
1358 |
98%|ββββββββββ| 10489/10682 [1:29:37<01:35, 2.03it/s]
|
1359 |
98%|ββββββββββ| 10490/10682 [1:29:38<01:34, 2.03it/s]
|
1360 |
98%|ββββββββββ| 10491/10682 [1:29:38<01:34, 2.03it/s]
|
1361 |
98%|ββββββββββ| 10492/10682 [1:29:39<01:33, 2.03it/s]
|
1362 |
98%|ββββββββββ| 10493/10682 [1:29:39<01:33, 2.03it/s]
|
1363 |
98%|ββββββββββ| 10494/10682 [1:29:40<01:32, 2.03it/s]
|
1364 |
98%|ββββββββββ| 10495/10682 [1:29:40<01:31, 2.03it/s]
|
1365 |
98%|ββββββββββ| 10496/10682 [1:29:41<01:31, 2.03it/s]
|
1366 |
98%|ββββββββββ| 10497/10682 [1:29:41<01:31, 2.03it/s]
|
1367 |
98%|ββββββββββ| 10498/10682 [1:29:42<01:30, 2.03it/s]
|
1368 |
98%|ββββββββββ| 10499/10682 [1:29:42<01:30, 2.03it/s]
|
1369 |
98%|ββββββββββ| 10500/10682 [1:29:43<01:29, 2.03it/s]{'loss': 2.8054, 'grad_norm': 0.24958878755569458, 'learning_rate': 8.841716933915555e-07, 'epoch': 13.75}
|
1370 |
|
1371 |
+
|
1372 |
98%|ββββββββββ| 10500/10682 [1:29:43<01:29, 2.03it/s]
|
1373 |
98%|ββββββββββ| 10501/10682 [1:29:43<01:29, 2.03it/s]
|
1374 |
98%|ββββββββββ| 10502/10682 [1:29:44<01:28, 2.03it/s]
|
1375 |
98%|ββββββββββ| 10503/10682 [1:29:44<01:28, 2.03it/s]
|
1376 |
98%|ββββββββββ| 10504/10682 [1:29:45<01:27, 2.03it/s]
|
1377 |
98%|ββββββββββ| 10505/10682 [1:29:45<01:27, 2.03it/s]
|
1378 |
98%|ββββββββββ| 10506/10682 [1:29:46<01:26, 2.03it/s]
|
1379 |
98%|ββββββββββ| 10507/10682 [1:29:46<01:26, 2.03it/s]
|
1380 |
98%|ββββββββββ| 10508/10682 [1:29:47<01:25, 2.03it/s]
|
1381 |
98%|ββββββββββ| 10509/10682 [1:29:47<01:25, 2.03it/s]
|
1382 |
98%|ββββββββββ| 10510/10682 [1:29:48<01:24, 2.03it/s]
|
1383 |
98%|ββββββββββ| 10511/10682 [1:29:48<01:24, 2.03it/s]
|
1384 |
98%|ββββββββββ| 10512/10682 [1:29:48<01:23, 2.03it/s]
|
1385 |
98%|ββββββββββ| 10513/10682 [1:29:49<01:23, 2.03it/s]
|
1386 |
98%|ββββββββββ| 10514/10682 [1:29:49<01:22, 2.03it/s]
|
1387 |
98%|ββββββββββ| 10515/10682 [1:29:50<01:22, 2.03it/s]
|
1388 |
98%|ββββββββββ| 10516/10682 [1:29:50<01:21, 2.03it/s]
|
1389 |
98%|ββββββββββ| 10517/10682 [1:29:51<01:21, 2.03it/s]
|
1390 |
98%|ββββββββββ| 10518/10682 [1:29:51<01:20, 2.03it/s]
|
1391 |
98%|ββββββββββ| 10519/10682 [1:29:52<01:20, 2.03it/s]
|
1392 |
98%|ββββββββββ| 10520/10682 [1:29:52<01:19, 2.03it/s]
|
1393 |
98%|ββββββββββ| 10521/10682 [1:29:53<01:19, 2.03it/s]
|
1394 |
99%|ββββββββββ| 10522/10682 [1:29:53<01:18, 2.03it/s]
|
1395 |
99%|ββββββββββ| 10523/10682 [1:29:54<01:18, 2.03it/s]
|
1396 |
99%|ββββββββββ| 10524/10682 [1:29:54<01:17, 2.03it/s]
|
1397 |
99%|ββββββββββ| 10525/10682 [1:29:55<01:17, 2.03it/s]
|
1398 |
|
1399 |
+
|
1400 |
99%|ββββββββββ| 10525/10682 [1:29:55<01:17, 2.03it/s]
|
1401 |
99%|ββββββββββ| 10526/10682 [1:29:55<01:17, 2.03it/s]
|
1402 |
99%|ββββββββββ| 10527/10682 [1:29:56<01:16, 2.03it/s]
|
1403 |
99%|ββββββββββ| 10528/10682 [1:29:56<01:15, 2.03it/s]
|
1404 |
99%|ββββββββββ| 10529/10682 [1:29:57<01:15, 2.03it/s]
|
1405 |
99%|ββββββββββ| 10530/10682 [1:29:57<01:14, 2.03it/s]
|
1406 |
99%|ββββββββββ| 10531/10682 [1:29:58<01:14, 2.03it/s]
|
1407 |
99%|ββββββββββ| 10532/10682 [1:29:58<01:13, 2.03it/s]
|
1408 |
99%|ββββββββββ| 10533/10682 [1:29:59<01:13, 2.03it/s]
|
1409 |
99%|ββββββββββ| 10534/10682 [1:29:59<01:12, 2.03it/s]
|
1410 |
99%|ββββββββββ| 10535/10682 [1:30:00<01:12, 2.03it/s]
|
1411 |
99%|ββββββββββ| 10536/10682 [1:30:00<01:11, 2.03it/s]
|
1412 |
99%|ββββββββββ| 10537/10682 [1:30:01<01:11, 2.03it/s]
|
1413 |
99%|ββββββββββ| 10538/10682 [1:30:01<01:10, 2.03it/s]
|
1414 |
99%|ββββββββββ| 10539/10682 [1:30:02<01:10, 2.03it/s]
|
1415 |
99%|ββββββββββ| 10540/10682 [1:30:02<01:10, 2.03it/s]
|
1416 |
99%|ββββββββββ| 10541/10682 [1:30:03<01:09, 2.02it/s]
|
1417 |
99%|ββββββββββ| 10542/10682 [1:30:03<01:09, 2.03it/s]
|
1418 |
99%|ββββββββββ| 10543/10682 [1:30:04<01:08, 2.03it/s]
|
1419 |
99%|ββββββββββ| 10544/10682 [1:30:04<01:08, 2.03it/s]
|
1420 |
99%|ββββββββββ| 10545/10682 [1:30:05<01:07, 2.03it/s]
|
1421 |
99%|ββββββββββ| 10546/10682 [1:30:05<01:07, 2.03it/s]
|
1422 |
99%|ββββββββββ| 10547/10682 [1:30:06<01:06, 2.03it/s]
|
1423 |
99%|ββββββββββ| 10548/10682 [1:30:06<01:06, 2.03it/s]
|
1424 |
99%|ββββββββββ| 10549/10682 [1:30:07<01:05, 2.02it/s]
|
1425 |
99%|ββββββββββ| 10550/10682 [1:30:07<01:05, 2.03it/s]{'loss': 2.7968, 'grad_norm': 0.25245076417922974, 'learning_rate': 4.651600211027507e-07, 'epoch': 13.82}
|
1426 |
+
|
1427 |
|
1428 |
99%|ββββββββββ| 10550/10682 [1:30:07<01:05, 2.03it/s]
|
1429 |
99%|ββββββββββ| 10551/10682 [1:30:08<01:04, 2.02it/s]
|
1430 |
99%|ββββββββββ| 10552/10682 [1:30:08<01:04, 2.03it/s]
|
1431 |
99%|ββββββββββ| 10553/10682 [1:30:09<01:03, 2.03it/s]
|
1432 |
99%|ββββββββββ| 10554/10682 [1:30:09<01:03, 2.03it/s]
|
1433 |
99%|ββββββββββ| 10555/10682 [1:30:10<01:02, 2.03it/s]
|
1434 |
99%|ββββββββββ| 10556/10682 [1:30:10<01:02, 2.03it/s]
|
1435 |
99%|ββββββββββ| 10557/10682 [1:30:11<01:01, 2.03it/s]
|
1436 |
99%|ββββββββββ| 10558/10682 [1:30:11<01:01, 2.03it/s]
|
1437 |
99%|ββββββββββ| 10559/10682 [1:30:12<01:00, 2.03it/s]
|
1438 |
99%|ββββββββββ| 10560/10682 [1:30:12<01:00, 2.03it/s]
|
1439 |
99%|ββββββββββ| 10561/10682 [1:30:13<00:59, 2.03it/s]
|
1440 |
99%|ββββββββββ| 10562/10682 [1:30:13<00:59, 2.03it/s]
|
1441 |
99%|ββββββββββ| 10563/10682 [1:30:14<00:58, 2.03it/s]
|
1442 |
99%|ββββββββββ| 10564/10682 [1:30:14<00:58, 2.03it/s]
|
1443 |
99%|ββββββββββ| 10565/10682 [1:30:15<00:57, 2.03it/s]
|
1444 |
99%|ββββββββββ| 10566/10682 [1:30:15<00:57, 2.03it/s]
|
1445 |
99%|ββββββββββ| 10567/10682 [1:30:16<00:56, 2.03it/s]
|
1446 |
99%|ββββββββββ| 10568/10682 [1:30:16<00:56, 2.03it/s]
|
1447 |
99%|ββββββββββ| 10569/10682 [1:30:17<00:55, 2.03it/s]
|
1448 |
99%|ββββββββββ| 10570/10682 [1:30:17<00:55, 2.03it/s]
|
1449 |
99%|ββββββββββ| 10571/10682 [1:30:18<00:54, 2.03it/s]
|
1450 |
99%|ββββββββββ| 10572/10682 [1:30:18<00:54, 2.03it/s]
|
1451 |
99%|ββββββββββ| 10573/10682 [1:30:19<00:53, 2.03it/s]
|
1452 |
99%|ββββββββββ| 10574/10682 [1:30:19<00:53, 2.03it/s]
|
1453 |
99%|ββββββββββ| 10575/10682 [1:30:20<00:52, 2.03it/s]
|
1454 |
|
1455 |
+
|
1456 |
99%|ββββββββββ| 10575/10682 [1:30:20<00:52, 2.03it/s]
|
1457 |
99%|ββββββββββ| 10576/10682 [1:30:20<00:52, 2.02it/s]
|
1458 |
99%|ββββββββββ| 10577/10682 [1:30:21<00:51, 2.02it/s]
|
1459 |
99%|ββββββββββ| 10578/10682 [1:30:21<00:51, 2.02it/s]
|
1460 |
99%|ββββββββββ| 10579/10682 [1:30:22<00:50, 2.02it/s]
|
1461 |
99%|ββββββββββ| 10580/10682 [1:30:22<00:50, 2.02it/s]
|
1462 |
99%|ββββββββββ| 10581/10682 [1:30:23<00:49, 2.02it/s]
|
1463 |
99%|ββββββββββ| 10582/10682 [1:30:23<00:49, 2.03it/s]
|
1464 |
99%|ββββββββββ| 10583/10682 [1:30:24<00:48, 2.03it/s]
|
1465 |
99%|ββββββββββ| 10584/10682 [1:30:24<00:48, 2.03it/s]
|
1466 |
99%|ββββββββββ| 10585/10682 [1:30:25<00:47, 2.03it/s]
|
1467 |
99%|ββββββββββ| 10586/10682 [1:30:25<00:47, 2.03it/s]
|
1468 |
99%|ββββββββββ| 10587/10682 [1:30:25<00:46, 2.03it/s]
|
1469 |
99%|ββββββββββ| 10588/10682 [1:30:26<00:46, 2.03it/s]
|
1470 |
99%|ββββββββββ| 10589/10682 [1:30:26<00:45, 2.03it/s]
|
1471 |
99%|ββββββββββ| 10590/10682 [1:30:27<00:45, 2.03it/s]
|
1472 |
99%|ββββββββββ| 10591/10682 [1:30:27<00:44, 2.03it/s]
|
1473 |
99%|ββββββββββ| 10592/10682 [1:30:28<00:44, 2.03it/s]
|
1474 |
99%|ββββββββββ| 10593/10682 [1:30:28<00:43, 2.03it/s]
|
1475 |
99%|ββββββββββ| 10594/10682 [1:30:29<00:43, 2.03it/s]
|
1476 |
99%|ββββββββββ| 10595/10682 [1:30:29<00:42, 2.03it/s]
|
1477 |
99%|ββββββββββ| 10596/10682 [1:30:30<00:42, 2.03it/s]
|
1478 |
99%|ββββββββββ| 10597/10682 [1:30:30<00:41, 2.03it/s]
|
1479 |
99%|ββββββββββ| 10598/10682 [1:30:31<00:41, 2.03it/s]
|
1480 |
99%|ββββββββββ| 10599/10682 [1:30:31<00:40, 2.03it/s]
|
1481 |
99%|ββββββββββ| 10600/10682 [1:30:32<00:40, 2.03it/s]{'loss': 2.8119, 'grad_norm': 0.2522198557853699, 'learning_rate': 1.7952444123359167e-07, 'epoch': 13.88}
|
1482 |
+
|
1483 |
|
1484 |
99%|ββββββββββ| 10600/10682 [1:30:32<00:40, 2.03it/s]
|
1485 |
99%|ββββββββββ| 10601/10682 [1:30:32<00:39, 2.03it/s]
|
1486 |
99%|ββββββββββ| 10602/10682 [1:30:33<00:39, 2.03it/s]
|
1487 |
99%|ββββββββββ| 10603/10682 [1:30:33<00:39, 2.03it/s]
|
1488 |
99%|ββββββββββ| 10604/10682 [1:30:34<00:38, 2.03it/s]
|
1489 |
99%|ββββββββββ| 10605/10682 [1:30:34<00:38, 2.02it/s]
|
1490 |
99%|ββββββββββ| 10606/10682 [1:30:35<00:37, 2.03it/s]
|
1491 |
99%|ββββββββββ| 10607/10682 [1:30:35<00:37, 2.03it/s]
|
1492 |
99%|ββββββββββ| 10608/10682 [1:30:36<00:36, 2.03it/s]
|
1493 |
99%|ββββββββββ| 10609/10682 [1:30:36<00:36, 2.03it/s]
|
1494 |
99%|ββββββββββ| 10610/10682 [1:30:37<00:35, 2.03it/s]
|
1495 |
99%|ββββββββββ| 10611/10682 [1:30:37<00:35, 2.03it/s]
|
1496 |
99%|ββββββββββ| 10612/10682 [1:30:38<00:34, 2.03it/s]
|
1497 |
99%|ββββββββββ| 10613/10682 [1:30:38<00:33, 2.03it/s]
|
1498 |
99%|ββββββββββ| 10614/10682 [1:30:39<00:33, 2.03it/s]
|
1499 |
99%|ββββββββββ| 10615/10682 [1:30:39<00:33, 2.03it/s]
|
1500 |
99%|ββββββββββ| 10616/10682 [1:30:40<00:32, 2.03it/s]
|
1501 |
99%|ββββββββββ| 10617/10682 [1:30:40<00:32, 2.03it/s]
|
1502 |
99%|ββββββββββ| 10618/10682 [1:30:41<00:31, 2.03it/s]
|
1503 |
99%|ββββββββββ| 10619/10682 [1:30:41<00:31, 2.03it/s]
|
1504 |
99%|ββββββββββ| 10620/10682 [1:30:42<00:30, 2.03it/s]
|
1505 |
99%|ββββββββββ| 10621/10682 [1:30:42<00:30, 2.03it/s]
|
1506 |
99%|ββββββββββ| 10622/10682 [1:30:43<00:29, 2.03it/s]
|
1507 |
99%|ββββββββββ| 10623/10682 [1:30:43<00:29, 2.03it/s]
|
1508 |
99%|ββββββββββ| 10624/10682 [1:30:44<00:28, 2.03it/s]
|
1509 |
99%|ββββββββββ| 10625/10682 [1:30:44<00:28, 2.03it/s]{'loss': 2.8038, 'grad_norm': 0.2499060034751892, 'learning_rate': 8.674791042273533e-08, 'epoch': 13.92}
|
1510 |
+
|
1511 |
|
1512 |
99%|ββββββββββ| 10625/10682 [1:30:44<00:28, 2.03it/s]
|
1513 |
99%|ββββββββββ| 10626/10682 [1:30:45<00:27, 2.02it/s]
|
1514 |
99%|ββββββββββ| 10627/10682 [1:30:45<00:27, 2.02it/s]
|
1515 |
99%|ββββββββββ| 10628/10682 [1:30:46<00:26, 2.02it/s]
|
1516 |
+
|
1517 |
|
1518 |
|
1519 |
+
|
1520 |
+
|
1521 |
|
1522 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|