Training in progress, epoch 13
Browse files- logs/events.out.tfevents.1716820160.sphinx2 +2 -2
- model.safetensors +1 -1
- train_job_output.txt +32 -1
logs/events.out.tfevents.1716820160.sphinx2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7ce92215f31b86015b6e5759af7eda0d377944e6da230ad7fcc68ad91aa6d85
|
3 |
+
size 95442
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 281715176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbd0bc7620c13c0e632fea3c413d166820b79f16652d670d9e6ac1043a091563
|
3 |
size 281715176
|
train_job_output.txt
CHANGED
@@ -586,4 +586,35 @@ command outputs:
|
|
586 |
|
587 |
|
588 |
92%|ββββββββββ| 9875/10682 [1:24:14<06:38, 2.03it/s]
|
589 |
92%|ββββββββββ| 9876/10682 [1:24:14<06:37, 2.03it/s]
|
590 |
92%|ββββββββββ| 9877/10682 [1:24:15<06:37, 2.03it/s]
|
591 |
92%|ββββββββββ| 9878/10682 [1:24:15<06:36, 2.03it/s]
|
592 |
92%|ββββββββββ| 9879/10682 [1:24:16<06:36, 2.03it/s]
|
593 |
92%|ββββββββββ| 9880/10682 [1:24:16<06:35, 2.03it/s]
|
594 |
93%|ββββββββββ| 9881/10682 [1:24:17<06:34, 2.03it/s]
|
595 |
93%|ββββββββββ| 9882/10682 [1:24:17<06:34, 2.03it/s]
|
596 |
93%|ββββββββββ| 9883/10682 [1:24:17<06:33, 2.03it/s]
|
597 |
93%|ββββββββββ| 9884/10682 [1:24:18<06:32, 2.03it/s]
|
598 |
93%|ββββββββββ| 9885/10682 [1:24:18<06:32, 2.03it/s]
|
599 |
93%|ββββββββββ| 9886/10682 [1:24:19<06:31, 2.03it/s]
|
600 |
93%|ββββββββββ| 9887/10682 [1:24:19<06:31, 2.03it/s]
|
601 |
93%|ββββββββββ| 9888/10682 [1:24:20<06:31, 2.03it/s]
|
602 |
93%|ββββββββββ| 9889/10682 [1:24:20<06:30, 2.03it/s]
|
603 |
93%|ββββββββββ| 9890/10682 [1:24:21<06:30, 2.03it/s]
|
604 |
93%|ββββββββββ| 9891/10682 [1:24:21<06:30, 2.03it/s]
|
605 |
93%|ββββββββββ| 9892/10682 [1:24:22<06:29, 2.03it/s]
|
606 |
93%|ββββββββββ| 9893/10682 [1:24:22<06:29, 2.03it/s]
|
607 |
93%|ββββββββββ| 9894/10682 [1:24:23<06:28, 2.03it/s]
|
608 |
93%|ββββββββββ| 9895/10682 [1:24:23<06:28, 2.03it/s]
|
609 |
93%|ββββββββββ| 9896/10682 [1:24:24<06:27, 2.03it/s]
|
610 |
93%|ββββββββββ| 9897/10682 [1:24:24<06:26, 2.03it/s]
|
611 |
93%|ββββββββββ| 9898/10682 [1:24:25<06:26, 2.03it/s]
|
612 |
93%|ββββββββββ| 9899/10682 [1:24:25<06:25, 2.03it/s]
|
613 |
93%|ββββββββββ| 9900/10682 [1:24:26<06:25, 2.03it/s]{'loss': 2.8968, 'grad_norm': 0.26422566175460815, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
614 |
|
615 |
|
616 |
93%|ββββββββββ| 9900/10682 [1:24:26<06:25, 2.03it/s]
|
617 |
93%|ββββββββββ| 9901/10682 [1:24:26<06:24, 2.03it/s]
|
618 |
93%|ββββββββββ| 9902/10682 [1:24:27<06:24, 2.03it/s]
|
619 |
93%|ββββββββββ| 9903/10682 [1:24:27<06:23, 2.03it/s]
|
620 |
93%|ββββββββββ| 9904/10682 [1:24:28<06:23, 2.03it/s]
|
621 |
93%|ββββββββββ| 9905/10682 [1:24:28<06:23, 2.03it/s]
|
622 |
93%|ββββββββββ| 9906/10682 [1:24:29<06:22, 2.03it/s]
|
623 |
93%|ββββββββββ| 9907/10682 [1:24:29<06:22, 2.03it/s]
|
624 |
93%|ββββββββββ| 9908/10682 [1:24:30<06:21, 2.03it/s]
|
625 |
93%|ββββββββββ| 9909/10682 [1:24:30<06:21, 2.03it/s]
|
626 |
93%|ββββββββββ| 9910/10682 [1:24:31<06:20, 2.03it/s]
|
627 |
93%|ββββββββββ| 9911/10682 [1:24:31<06:20, 2.03it/s]
|
628 |
93%|ββββββββββ| 9912/10682 [1:24:32<06:19, 2.03it/s]
|
629 |
93%|ββββββββββ| 9913/10682 [1:24:32<06:19, 2.03it/s]
|
630 |
93%|ββββββββββ| 9914/10682 [1:24:33<06:19, 2.03it/s]
|
631 |
93%|ββββββββββ| 9915/10682 [1:24:33<06:18, 2.03it/s]
|
632 |
93%|ββββββββββ| 9916/10682 [1:24:34<06:17, 2.03it/s]
|
633 |
93%|ββββββββββ| 9917/10682 [1:24:34<06:17, 2.03it/s]
|
634 |
93%|ββββββββββ| 9918/10682 [1:24:35<06:16, 2.03it/s]
|
635 |
93%|ββββββββββ| 9919/10682 [1:24:35<06:15, 2.03it/s]
|
636 |
93%|ββββββββββ| 9920/10682 [1:24:36<06:15, 2.03it/s]
|
637 |
93%|ββββββββββ| 9921/10682 [1:24:36<06:14, 2.03it/s]
|
638 |
93%|ββββββββββ| 9922/10682 [1:24:37<06:14, 2.03it/s]
|
639 |
93%|ββββββββββ| 9923/10682 [1:24:37<06:13, 2.03it/s]
|
640 |
93%|ββββββββββ| 9924/10682 [1:24:38<06:12, 2.03it/s]
|
641 |
93%|ββββββββββ| 9925/10682 [1:24:38<06:22, 1.98it/s]{'loss': 2.89, 'grad_norm': 0.2640738785266876, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.0}
|
642 |
|
643 |
|
644 |
93%|ββββββββββ| 9925/10682 [1:24:38<06:22, 1.98it/s]
|
645 |
93%|ββββββββββ| 9926/10682 [1:24:50<50:36, 4.02s/it]
|
646 |
93%|ββββββββββ| 9927/10682 [1:24:51<37:14, 2.96s/it]
|
647 |
93%|ββββββββββ| 9928/10682 [1:24:51<27:53, 2.22s/it]
|
648 |
93%|ββββββββββ| 9929/10682 [1:24:52<21:23, 1.70s/it]
|
649 |
93%|ββββββββββ| 9930/10682 [1:24:52<16:48, 1.34s/it]
|
650 |
93%|ββββββββββ| 9931/10682 [1:24:53<13:35, 1.09s/it]
|
651 |
93%|ββββββββββ| 9932/10682 [1:24:53<11:20, 1.10it/s]
|
652 |
93%|ββββββββββ| 9933/10682 [1:24:54<09:49, 1.27it/s]
|
653 |
93%|ββββββββββ| 9934/10682 [1:24:54<08:43, 1.43it/s]
|
654 |
93%|ββββββββββ| 9935/10682 [1:24:55<07:56, 1.57it/s]
|
655 |
93%|ββββββββββ| 9936/10682 [1:24:55<07:23, 1.68it/s]
|
656 |
93%|ββββββββββ| 9937/10682 [1:24:56<07:00, 1.77it/s]
|
657 |
93%|ββββββββββ| 9938/10682 [1:24:56<06:44, 1.84it/s]
|
658 |
93%|ββββββββββ| 9939/10682 [1:24:57<06:32, 1.89it/s]
|
659 |
93%|ββββββββββ| 9940/10682 [1:24:57<06:24, 1.93it/s]
|
660 |
93%|ββββββββββ| 9941/10682 [1:24:58<06:18, 1.96it/s]
|
661 |
93%|ββββββββββ| 9942/10682 [1:24:58<06:13, 1.98it/s]
|
662 |
93%|ββββββββββ| 9943/10682 [1:24:59<06:10, 1.99it/s]
|
663 |
93%|ββββββββββ| 9944/10682 [1:24:59<06:08, 2.00it/s]
|
664 |
93%|ββββββββββ| 9945/10682 [1:25:00<06:06, 2.01it/s]
|
665 |
93%|ββββββββββ| 9946/10682 [1:25:00<06:04, 2.02it/s]
|
666 |
93%|ββββββββββ| 9947/10682 [1:25:01<06:03, 2.02it/s]
|
667 |
93%|ββββββββββ| 9948/10682 [1:25:01<06:02, 2.03it/s]
|
668 |
93%|ββββββββββ| 9949/10682 [1:25:02<06:02, 2.02it/s]
|
669 |
93%|ββββββββββ| 9950/10682 [1:25:02<06:01, 2.03it/s]{'loss': 2.8505, 'grad_norm': 0.2604374289512634, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.03}
|
670 |
-
|
671 |
|
672 |
93%|ββββββββββ| 9950/10682 [1:25:02<06:01, 2.03it/s]
|
673 |
93%|ββββββββββ| 9951/10682 [1:25:03<06:01, 2.02it/s]
|
|
|
674 |
|
675 |
93%|ββββββββββ| 9950/10682 [1:25:02<06:01, 2.03it/s]
|
676 |
93%|ββββββββββ| 9951/10682 [1:25:03<06:01, 2.02it/s]
|
677 |
93%|ββββββββββ| 9952/10682 [1:25:03<06:01, 2.02it/s]
|
678 |
93%|ββββββββββ| 9953/10682 [1:25:04<06:00, 2.02it/s]
|
679 |
93%|ββββββββββ| 9954/10682 [1:25:04<05:59, 2.02it/s]
|
680 |
93%|ββββββββββ| 9955/10682 [1:25:05<05:58, 2.03it/s]
|
681 |
93%|ββββββββββ| 9956/10682 [1:25:05<05:57, 2.03it/s]
|
682 |
93%|ββββββββββ| 9957/10682 [1:25:06<05:57, 2.03it/s]
|
683 |
93%|ββββββββββ| 9958/10682 [1:25:06<05:56, 2.03it/s]
|
684 |
93%|ββββββββββ| 9959/10682 [1:25:07<05:55, 2.03it/s]
|
685 |
93%|ββββββββββ| 9960/10682 [1:25:07<05:55, 2.03it/s]
|
686 |
93%|ββββββββββ| 9961/10682 [1:25:08<05:54, 2.03it/s]
|
687 |
93%|ββββββββββ| 9962/10682 [1:25:08<05:54, 2.03it/s]
|
688 |
93%|ββββββββββ| 9963/10682 [1:25:09<05:54, 2.03it/s]
|
689 |
93%|ββββββββββ| 9964/10682 [1:25:09<05:53, 2.03it/s]
|
690 |
93%|ββββββββββ| 9965/10682 [1:25:10<05:52, 2.03it/s]
|
691 |
93%|ββββββββββ| 9966/10682 [1:25:10<05:52, 2.03it/s]
|
692 |
93%|ββββββββββ| 9967/10682 [1:25:11<05:51, 2.03it/s]
|
693 |
93%|ββββββββββ| 9968/10682 [1:25:11<05:51, 2.03it/s]
|
694 |
93%|ββββββββββ| 9969/10682 [1:25:12<05:50, 2.03it/s]
|
695 |
93%|ββββββββββ| 9970/10682 [1:25:12<05:50, 2.03it/s]
|
696 |
93%|ββββββββββ| 9971/10682 [1:25:13<05:50, 2.03it/s]
|
697 |
93%|ββββββββββ| 9972/10682 [1:25:13<05:49, 2.03it/s]
|
698 |
93%|ββββββββββ| 9973/10682 [1:25:14<05:49, 2.03it/s]
|
699 |
93%|ββββββββββ| 9974/10682 [1:25:14<05:48, 2.03it/s]
|
700 |
93%|ββββββββββ| 9975/10682 [1:25:15<05:47, 2.03it/s]{'loss': 2.8519, 'grad_norm': 0.25846487283706665, 'learning_rate': 1.3287025325307511e-05, 'epoch': 13.06}
|
|
|
701 |
|
702 |
93%|ββββββββββ| 9975/10682 [1:25:15<05:47, 2.03it/s]
|
703 |
93%|ββββββββββ| 9976/10682 [1:25:15<05:47, 2.03it/s]
|
704 |
93%|ββββββββββ| 9977/10682 [1:25:16<05:47, 2.03it/s]
|
705 |
93%|ββββββββββ| 9978/10682 [1:25:16<05:46, 2.03it/s]
|
706 |
93%|ββββββββββ| 9979/10682 [1:25:17<05:46, 2.03it/s]
|
707 |
93%|ββββββββββ| 9980/10682 [1:25:17<05:45, 2.03it/s]
|
708 |
93%|ββββββββββ| 9981/10682 [1:25:18<05:44, 2.03it/s]
|
709 |
93%|ββββββββββ| 9982/10682 [1:25:18<05:44, 2.03it/s]
|
710 |
93%|ββββββββββ| 9983/10682 [1:25:19<05:44, 2.03it/s]
|
711 |
93%|ββββββββββ| 9984/10682 [1:25:19<05:44, 2.03it/s]
|
712 |
93%|ββββββββββ| 9985/10682 [1:25:20<05:43, 2.03it/s]
|
713 |
93%|ββββββββββ| 9986/10682 [1:25:20<05:43, 2.03it/s]
|
714 |
93%|ββββββββββ| 9987/10682 [1:25:21<05:42, 2.03it/s]
|
715 |
94%|ββββββββββ| 9988/10682 [1:25:21<05:41, 2.03it/s]
|
716 |
94%|ββββββββββ| 9989/10682 [1:25:22<05:41, 2.03it/s]
|
717 |
94%|ββββββββββ| 9990/10682 [1:25:22<05:40, 2.03it/s]
|
718 |
94%|ββββββββββ| 9991/10682 [1:25:22<05:40, 2.03it/s]
|
719 |
94%|ββββββββββ| 9992/10682 [1:25:23<05:39, 2.03it/s]
|
720 |
94%|ββββββββββ| 9993/10682 [1:25:23<05:38, 2.03it/s]
|
721 |
94%|βββββββοΏ½οΏ½ββ| 9994/10682 [1:25:24<05:38, 2.03it/s]
|
722 |
94%|ββββββββββ| 9995/10682 [1:25:24<05:37, 2.03it/s]
|
723 |
94%|ββββββββββ| 9996/10682 [1:25:25<05:37, 2.03it/s]
|
724 |
94%|ββββββββββ| 9997/10682 [1:25:25<05:37, 2.03it/s]
|
725 |
94%|ββββββββββ| 9998/10682 [1:25:26<05:36, 2.03it/s]
|
726 |
94%|ββββββββββ| 9999/10682 [1:25:26<05:35, 2.03it/s]
|
727 |
94%|ββββββββββ| 10000/10682 [1:25:27<05:35, 2.03it/s]{'loss': 2.8577, 'grad_norm': 0.26245424151420593, 'learning_rate': 1.2367787735873993e-05, 'epoch': 13.1}
|
|
|
728 |
|
729 |
94%|ββββββββββ| 10000/10682 [1:25:27<05:35, 2.03it/s]
|
730 |
94%|ββββββββββ| 10001/10682 [1:25:27<05:35, 2.03it/s]
|
731 |
94%|ββββββββββ| 10002/10682 [1:25:28<05:36, 2.02it/s]
|
732 |
94%|ββββββββββ| 10003/10682 [1:25:28<05:35, 2.02it/s]
|
733 |
94%|ββββββββββ| 10004/10682 [1:25:29<05:35, 2.02it/s]
|
734 |
94%|ββββββββββ| 10005/10682 [1:25:29<05:33, 2.03it/s]
|
735 |
94%|ββββββββββ| 10006/10682 [1:25:30<05:33, 2.03it/s]
|
736 |
94%|ββββββββββ| 10007/10682 [1:25:30<05:32, 2.03it/s]
|
737 |
94%|ββββββββββ| 10008/10682 [1:25:31<05:32, 2.03it/s]
|
738 |
94%|ββββββββββ| 10009/10682 [1:25:31<05:31, 2.03it/s]
|
739 |
94%|ββββββββββ| 10010/10682 [1:25:32<05:30, 2.03it/s]
|
740 |
94%|ββββββββββ| 10011/10682 [1:25:32<05:30, 2.03it/s]
|
741 |
94%|ββββββββββ| 10012/10682 [1:25:33<05:29, 2.03it/s]
|
742 |
94%|ββββββββββ| 10013/10682 [1:25:33<05:29, 2.03it/s]
|
743 |
94%|ββββββββββ| 10014/10682 [1:25:34<05:28, 2.03it/s]
|
744 |
94%|ββββββββββ| 10015/10682 [1:25:34<05:28, 2.03it/s]
|
745 |
94%|ββββββββββ| 10016/10682 [1:25:35<05:27, 2.03it/s]
|
746 |
94%|ββββββββββ| 10017/10682 [1:25:35<05:27, 2.03it/s]
|
747 |
94%|ββββββββββ| 10018/10682 [1:25:36<05:26, 2.03it/s]
|
748 |
94%|ββββββββββ| 10019/10682 [1:25:36<05:26, 2.03it/s]
|
749 |
94%|ββββββββββ| 10020/10682 [1:25:37<05:25, 2.03it/s]
|
750 |
94%|ββββββββββ| 10021/10682 [1:25:37<05:25, 2.03it/s]
|
751 |
94%|ββββββββββ| 10022/10682 [1:25:38<05:25, 2.03it/s]
|
752 |
94%|ββββββββββ| 10023/10682 [1:25:38<05:24, 2.03it/s]
|
753 |
94%|ββββββββββ| 10024/10682 [1:25:39<05:24, 2.03it/s]
|
754 |
94%|ββββββββββ| 10025/10682 [1:25:39<05:24, 2.03it/s]{'loss': 2.8525, 'grad_norm': 0.2591426372528076, 'learning_rate': 1.1481100210606388e-05, 'epoch': 13.13}
|
|
|
755 |
|
756 |
94%|ββββββββββ| 10025/10682 [1:25:39<05:24, 2.03it/s]
|
757 |
94%|ββββββββββ| 10026/10682 [1:25:40<05:23, 2.03it/s]
|
758 |
94%|ββββββββββ| 10027/10682 [1:25:40<05:22, 2.03it/s]
|
759 |
94%|ββββββββββ| 10028/10682 [1:25:41<05:22, 2.03it/s]
|
760 |
94%|ββββββββββ| 10029/10682 [1:25:41<05:21, 2.03it/s]
|
761 |
94%|ββββββββββ| 10030/10682 [1:25:42<05:21, 2.03it/s]
|
762 |
94%|ββββββββββ| 10031/10682 [1:25:42<05:20, 2.03it/s]
|
763 |
94%|ββββββββββ| 10032/10682 [1:25:43<05:20, 2.03it/s]
|
764 |
94%|ββββββββββ| 10033/10682 [1:25:43<05:19, 2.03it/s]
|
765 |
94%|ββββββββββ| 10034/10682 [1:25:44<05:18, 2.03it/s]
|
766 |
94%|ββββββββββ| 10035/10682 [1:25:44<05:18, 2.03it/s]
|
767 |
94%|ββββββββββ| 10036/10682 [1:25:45<05:18, 2.03it/s]
|
768 |
94%|ββββββββββ| 10037/10682 [1:25:45<05:17, 2.03it/s]
|
769 |
94%|ββββββββββ| 10038/10682 [1:25:46<05:17, 2.03it/s]
|
770 |
94%|ββββββββββ| 10039/10682 [1:25:46<05:16, 2.03it/s]
|
771 |
94%|ββββββββββ| 10040/10682 [1:25:47<05:16, 2.03it/s]
|
772 |
94%|ββββββββββ| 10041/10682 [1:25:47<05:15, 2.03it/s]
|
773 |
94%|ββββββββββ| 10042/10682 [1:25:48<05:15, 2.03it/s]
|
774 |
94%|ββββββββββ| 10043/10682 [1:25:48<05:14, 2.03it/s]
|
775 |
94%|ββββββββββ| 10044/10682 [1:25:49<05:14, 2.03it/s]
|
776 |
94%|ββββββββββ| 10045/10682 [1:25:49<05:13, 2.03it/s]
|
777 |
94%|ββββββββββ| 10046/10682 [1:25:50<05:13, 2.03it/s]
|
778 |
94%|ββββββββββ| 10047/10682 [1:25:50<05:12, 2.03it/s]
|
779 |
94%|ββββββββββ| 10048/10682 [1:25:51<05:12, 2.03it/s]
|
780 |
94%|ββββββββββ| 10049/10682 [1:25:51<05:11, 2.03it/s]
|
781 |
94%|ββββββββββ| 10050/10682 [1:25:52<05:11, 2.03it/s]{'loss': 2.8568, 'grad_norm': 0.25984886288642883, 'learning_rate': 1.0627021937013704e-05, 'epoch': 13.16}
|
|
|
782 |
|
783 |
94%|ββββββββββ| 10050/10682 [1:25:52<05:11, 2.03it/s]
|
784 |
94%|ββββββββββ| 10051/10682 [1:25:52<05:11, 2.03it/s]
|
785 |
94%|βββββοΏ½οΏ½ββββ| 10052/10682 [1:25:53<05:10, 2.03it/s]
|
786 |
94%|ββββββββββ| 10053/10682 [1:25:53<05:10, 2.02it/s]
|
787 |
94%|ββββββββββ| 10054/10682 [1:25:54<05:10, 2.02it/s]
|
788 |
94%|ββββββββββ| 10055/10682 [1:25:54<05:09, 2.02it/s]
|
789 |
94%|ββββββββββ| 10056/10682 [1:25:55<05:09, 2.03it/s]
|
790 |
94%|ββββββββββ| 10057/10682 [1:25:55<05:08, 2.03it/s]
|
791 |
94%|ββββββββββ| 10058/10682 [1:25:56<05:07, 2.03it/s]
|
792 |
94%|ββββββββββ| 10059/10682 [1:25:56<05:07, 2.03it/s]
|
793 |
94%|ββββββββββ| 10060/10682 [1:25:56<05:06, 2.03it/s]
|
794 |
94%|ββββββββββ| 10061/10682 [1:25:57<05:06, 2.02it/s]
|
795 |
94%|ββββββββββ| 10062/10682 [1:25:57<05:06, 2.03it/s]
|
796 |
94%|ββββββββββ| 10063/10682 [1:25:58<05:05, 2.03it/s]
|
797 |
94%|ββββββββββ| 10064/10682 [1:25:58<05:04, 2.03it/s]
|
798 |
94%|ββββββββββ| 10065/10682 [1:25:59<05:04, 2.03it/s]
|
799 |
94%|ββββββββββ| 10066/10682 [1:25:59<05:03, 2.03it/s]
|
800 |
94%|ββββββββββ| 10067/10682 [1:26:00<05:03, 2.03it/s]
|
801 |
94%|ββββββββββ| 10068/10682 [1:26:00<05:02, 2.03it/s]
|
802 |
94%|ββββββββββ| 10069/10682 [1:26:01<05:02, 2.03it/s]
|
803 |
94%|ββββββββββ| 10070/10682 [1:26:01<05:01, 2.03it/s]
|
804 |
94%|ββββββββββ| 10071/10682 [1:26:02<05:00, 2.03it/s]
|
805 |
94%|ββββββββββ| 10072/10682 [1:26:02<05:00, 2.03it/s]
|
806 |
94%|ββββββββββ| 10073/10682 [1:26:03<05:00, 2.03it/s]
|
807 |
94%|ββββββββββ| 10074/10682 [1:26:03<04:59, 2.03it/s]
|
808 |
94%|ββββββββββ| 10075/10682 [1:26:04<04:59, 2.03it/s]{'loss': 2.8561, 'grad_norm': 0.2601851522922516, 'learning_rate': 9.805609925895964e-06, 'epoch': 13.2}
|
|
|
809 |
|
810 |
94%|ββββββββββ| 10075/10682 [1:26:04<04:59, 2.03it/s]
|
811 |
94%|ββββββββββ| 10076/10682 [1:26:04<04:58, 2.03it/s]
|
812 |
94%|ββββββββββ| 10077/10682 [1:26:05<04:58, 2.03it/s]
|
813 |
94%|ββββββββββ| 10078/10682 [1:26:05<04:57, 2.03it/s]
|
814 |
94%|ββββββββββ| 10079/10682 [1:26:06<04:57, 2.03it/s]
|
815 |
94%|ββββββββββ| 10080/10682 [1:26:06<04:56, 2.03it/s]
|
816 |
94%|ββββββββββ| 10081/10682 [1:26:07<04:56, 2.03it/s]
|
817 |
94%|ββββββββββ| 10082/10682 [1:26:07<04:55, 2.03it/s]
|
818 |
94%|ββββββββββ| 10083/10682 [1:26:08<04:55, 2.03it/s]
|
819 |
94%|ββββββββββ| 10084/10682 [1:26:08<04:54, 2.03it/s]
|
820 |
94%|ββββββββββ| 10085/10682 [1:26:09<04:54, 2.03it/s]
|
821 |
94%|ββββββββββ| 10086/10682 [1:26:09<04:53, 2.03it/s]
|
822 |
94%|ββββββββββ| 10087/10682 [1:26:10<04:53, 2.03it/s]
|
823 |
94%|ββββββββββ| 10088/10682 [1:26:10<04:52, 2.03it/s]
|
824 |
94%|ββββββββββ| 10089/10682 [1:26:11<04:52, 2.03it/s]
|
825 |
94%|ββββββββββ| 10090/10682 [1:26:11<04:51, 2.03it/s]
|
826 |
94%|ββββββββββ| 10091/10682 [1:26:12<04:50, 2.03it/s]
|
827 |
94%|ββββββββββ| 10092/10682 [1:26:12<04:50, 2.03it/s]
|
828 |
94%|ββββββββββ| 10093/10682 [1:26:13<04:49, 2.03it/s]
|
829 |
94%|ββββββββββ| 10094/10682 [1:26:13<04:49, 2.03it/s]
|
830 |
95%|ββββββββββ| 10095/10682 [1:26:14<04:49, 2.03it/s]
|
831 |
95%|ββββββββββ| 10096/10682 [1:26:14<04:48, 2.03it/s]
|
832 |
95%|ββββββββββ| 10097/10682 [1:26:15<04:48, 2.03it/s]
|
833 |
95%|ββββββββββ| 10098/10682 [1:26:15<04:47, 2.03it/s]
|
834 |
95%|ββββββββββ| 10099/10682 [1:26:16<04:47, 2.03it/s]
|
835 |
95%|ββββββββββ| 10100/10682 [1:26:16<04:46, 2.03it/s]{'loss': 2.8578, 'grad_norm': 0.2591904401779175, 'learning_rate': 9.01691900753926e-06, 'epoch': 13.23}
|
836 |
|
|
|
837 |
95%|ββββββββββ| 10100/10682 [1:26:16<04:46, 2.03it/s]
|
838 |
95%|ββββββββββ| 10101/10682 [1:26:17<04:46, 2.03it/s]
|
839 |
95%|ββββββββββ| 10102/10682 [1:26:17<04:45, 2.03it/s]
|
840 |
95%|ββββββββββ| 10103/10682 [1:26:18<04:45, 2.03it/s]
|
841 |
95%|ββββββββββ| 10104/10682 [1:26:18<04:44, 2.03it/s]
|
842 |
95%|ββββββββββ| 10105/10682 [1:26:19<04:44, 2.03it/s]
|
843 |
95%|ββββββββββ| 10106/10682 [1:26:19<04:43, 2.03it/s]
|
844 |
95%|ββββββββββ| 10107/10682 [1:26:20<04:43, 2.03it/s]
|
845 |
95%|ββββββββββ| 10108/10682 [1:26:20<04:42, 2.03it/s]
|
846 |
95%|ββββββββββ| 10109/10682 [1:26:21<04:42, 2.03it/s]
|
847 |
95%|ββββββββββ| 10110/10682 [1:26:21<04:41, 2.03it/s]
|
848 |
95%|ββββββββββ| 10111/10682 [1:26:22<04:41, 2.03it/s]
|
849 |
95%|ββββββββββ| 10112/10682 [1:26:22<04:40, 2.03it/s]
|
850 |
95%|βββββββοΏ½οΏ½ββ| 10113/10682 [1:26:23<04:40, 2.03it/s]
|
851 |
95%|ββββββββββ| 10114/10682 [1:26:23<04:39, 2.03it/s]
|
852 |
95%|ββββββββββ| 10115/10682 [1:26:24<04:39, 2.03it/s]
|
853 |
95%|ββββββββββ| 10116/10682 [1:26:24<04:38, 2.03it/s]
|
854 |
95%|ββββββββββ| 10117/10682 [1:26:25<04:38, 2.03it/s]
|
855 |
95%|ββββββββββ| 10118/10682 [1:26:25<04:37, 2.03it/s]
|
856 |
95%|ββββββββββ| 10119/10682 [1:26:26<04:37, 2.03it/s]
|
857 |
95%|ββββββββββ| 10120/10682 [1:26:26<04:36, 2.03it/s]
|
858 |
95%|ββββββββββ| 10121/10682 [1:26:27<04:36, 2.03it/s]
|
859 |
95%|ββββββββββ| 10122/10682 [1:26:27<04:36, 2.03it/s]
|
860 |
95%|ββββββββββ| 10123/10682 [1:26:28<04:35, 2.03it/s]
|
861 |
95%|ββββββββββ| 10124/10682 [1:26:28<04:35, 2.03it/s]
|
862 |
95%|ββββββββββ| 10125/10682 [1:26:29<04:34, 2.03it/s]{'loss': 2.8677, 'grad_norm': 0.2595525085926056, 'learning_rate': 8.261001828055447e-06, 'epoch': 13.26}
|
863 |
|
|
|
864 |
95%|ββββββββββ| 10125/10682 [1:26:29<04:34, 2.03it/s]
|
865 |
95%|ββββββββββ| 10126/10682 [1:26:29<04:34, 2.03it/s]
|
866 |
95%|ββββββββββ| 10127/10682 [1:26:30<04:33, 2.03it/s]
|
867 |
95%|ββββββββββ| 10128/10682 [1:26:30<04:33, 2.03it/s]
|
868 |
95%|ββββββββββ| 10129/10682 [1:26:30<04:32, 2.03it/s]
|
869 |
95%|ββββββββββ| 10130/10682 [1:26:31<04:32, 2.03it/s]
|
870 |
95%|ββββββββββ| 10131/10682 [1:26:31<04:32, 2.03it/s]
|
871 |
95%|ββββββββββ| 10132/10682 [1:26:32<04:31, 2.03it/s]
|
872 |
95%|ββββββββββ| 10133/10682 [1:26:32<04:30, 2.03it/s]
|
873 |
95%|ββββββββββ| 10134/10682 [1:26:33<04:30, 2.03it/s]
|
874 |
95%|ββββββββββ| 10135/10682 [1:26:33<04:29, 2.03it/s]
|
875 |
95%|ββββββββββ| 10136/10682 [1:26:34<04:28, 2.03it/s]
|
876 |
95%|ββββββββββ| 10137/10682 [1:26:34<04:28, 2.03it/s]
|
877 |
95%|ββββββββββ| 10138/10682 [1:26:35<04:27, 2.03it/s]
|
878 |
95%|ββββββββββ| 10139/10682 [1:26:35<04:27, 2.03it/s]
|
879 |
95%|ββββββββββ| 10140/10682 [1:26:36<04:26, 2.03it/s]
|
880 |
95%|ββββββββββ| 10141/10682 [1:26:36<04:26, 2.03it/s]
|
881 |
95%|ββββββββββ| 10142/10682 [1:26:37<04:25, 2.03it/s]
|
882 |
95%|ββββββββββ| 10143/10682 [1:26:37<04:25, 2.03it/s]
|
883 |
95%|ββββββββββ| 10144/10682 [1:26:38<04:25, 2.03it/s]
|
884 |
95%|ββββββββββ| 10145/10682 [1:26:38<04:24, 2.03it/s]
|
885 |
95%|ββββββββββ| 10146/10682 [1:26:39<04:23, 2.03it/s]
|
886 |
95%|ββββββββββ| 10147/10682 [1:26:39<04:23, 2.03it/s]
|
887 |
95%|ββββββββββ| 10148/10682 [1:26:40<04:45, 1.87it/s]
|
888 |
95%|ββββββββββ| 10149/10682 [1:26:40<04:38, 1.91it/s]
|
889 |
95%|ββββββββββ| 10150/10682 [1:26:41<04:32, 1.95it/s]{'loss': 2.8606, 'grad_norm': 0.26168325543403625, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.29}
|
|
|
890 |
|
891 |
95%|ββββββββββ| 10150/10682 [1:26:41<04:32, 1.95it/s]
|
892 |
95%|ββββββββββ| 10151/10682 [1:26:41<04:29, 1.97it/s]
|
893 |
95%|ββββββββββ| 10152/10682 [1:26:42<04:26, 1.99it/s]
|
894 |
95%|ββββββββββ| 10153/10682 [1:26:42<04:24, 2.00it/s]
|
895 |
95%|ββββββββββ| 10154/10682 [1:26:43<04:23, 2.01it/s]
|
896 |
95%|ββββββββββ| 10155/10682 [1:26:43<04:21, 2.02it/s]
|
897 |
95%|ββββββββββ| 10156/10682 [1:26:44<04:20, 2.02it/s]
|
898 |
95%|ββββββββββ| 10157/10682 [1:26:44<04:19, 2.03it/s]
|
899 |
95%|ββββββββββ| 10158/10682 [1:26:45<04:18, 2.03it/s]
|
900 |
95%|ββββββββββ| 10159/10682 [1:26:45<04:17, 2.03it/s]
|
901 |
95%|ββββββββββ| 10160/10682 [1:26:46<04:16, 2.03it/s]
|
902 |
95%|ββββββββββ| 10161/10682 [1:26:46<04:16, 2.03it/s]
|
903 |
95%|ββββββββββ| 10162/10682 [1:26:47<04:15, 2.03it/s]
|
904 |
95%|ββββββββββ| 10163/10682 [1:26:47<04:15, 2.03it/s]
|
905 |
95%|ββββββββββ| 10164/10682 [1:26:48<04:14, 2.03it/s]
|
906 |
95%|ββββββββββ| 10165/10682 [1:26:48<04:14, 2.03it/s]
|
907 |
95%|ββββββββββ| 10166/10682 [1:26:49<04:13, 2.03it/s]
|
908 |
95%|ββββββββββ| 10167/10682 [1:26:49<04:13, 2.03it/s]
|
909 |
95%|ββββββββββ| 10168/10682 [1:26:50<04:12, 2.03it/s]
|
910 |
95%|ββββββββββ| 10169/10682 [1:26:50<04:12, 2.03it/s]
|
911 |
95%|ββββββββββ| 10170/10682 [1:26:51<04:11, 2.03it/s]
|
912 |
95%|ββββββββββ| 10171/10682 [1:26:51<04:11, 2.03it/s]
|
913 |
95%|ββββββββββ| 10172/10682 [1:26:52<04:11, 2.03it/s]
|
914 |
95%|ββββββββββ| 10173/10682 [1:26:52<04:10, 2.03it/s]
|
915 |
95%|ββββββββοΏ½οΏ½β| 10174/10682 [1:26:53<04:10, 2.03it/s]
|
916 |
95%|ββββββββββ| 10175/10682 [1:26:53<04:09, 2.03it/s]{'loss': 2.8653, 'grad_norm': 0.26603227853775024, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.33}
|
|
|
917 |
|
918 |
95%|ββββββββββ| 10175/10682 [1:26:53<04:09, 2.03it/s]
|
919 |
95%|ββββββββββ| 10176/10682 [1:26:54<04:10, 2.02it/s]
|
920 |
95%|ββββββββββ| 10177/10682 [1:26:54<04:09, 2.03it/s]
|
921 |
95%|ββββββββββ| 10178/10682 [1:26:55<04:09, 2.02it/s]
|
922 |
95%|ββββββββββ| 10179/10682 [1:26:55<04:08, 2.03it/s]
|
923 |
95%|ββββββββββ| 10180/10682 [1:26:56<04:07, 2.03it/s]
|
924 |
95%|ββββββββββ| 10181/10682 [1:26:56<04:06, 2.03it/s]
|
925 |
95%|ββββββββββ| 10182/10682 [1:26:57<04:06, 2.03it/s]
|
926 |
95%|ββββββββββ| 10183/10682 [1:26:57<04:05, 2.03it/s]
|
927 |
95%|ββββββββββ| 10184/10682 [1:26:58<04:05, 2.03it/s]
|
928 |
95%|ββββββββββ| 10185/10682 [1:26:58<04:04, 2.03it/s]
|
929 |
95%|ββββββββββ| 10186/10682 [1:26:59<04:03, 2.03it/s]
|
930 |
95%|ββββββββββ| 10187/10682 [1:26:59<04:03, 2.03it/s]
|
931 |
95%|ββββββββββ| 10188/10682 [1:27:00<04:03, 2.03it/s]
|
932 |
95%|ββββββββββ| 10189/10682 [1:27:00<04:02, 2.03it/s]
|
933 |
95%|ββββββββββ| 10190/10682 [1:27:01<04:02, 2.03it/s]
|
934 |
95%|ββββββββββ| 10191/10682 [1:27:01<04:01, 2.03it/s]
|
935 |
95%|ββββββββββ| 10192/10682 [1:27:02<04:01, 2.03it/s]
|
936 |
95%|ββββββββββ| 10193/10682 [1:27:02<04:00, 2.03it/s]
|
937 |
95%|ββββββββββ| 10194/10682 [1:27:03<04:00, 2.03it/s]
|
938 |
95%|ββββββββββ| 10195/10682 [1:27:03<04:00, 2.03it/s]
|
939 |
95%|ββββββββββ| 10196/10682 [1:27:04<03:59, 2.03it/s]
|
940 |
95%|ββββββββββ| 10197/10682 [1:27:04<03:59, 2.03it/s]
|
941 |
95%|ββββββββββ| 10198/10682 [1:27:05<03:58, 2.03it/s]
|
942 |
95%|ββββββββββ| 10199/10682 [1:27:05<03:57, 2.03it/s]
|
943 |
95%|ββββββββββ| 10200/10682 [1:27:06<03:57, 2.03it/s]{'loss': 2.8659, 'grad_norm': 0.2612241208553314, 'learning_rate': 6.190386348572108e-06, 'epoch': 13.36}
|
|
|
944 |
|
945 |
95%|ββββββββββ| 10200/10682 [1:27:06<03:57, 2.03it/s]
|
946 |
95%|ββββββββββ| 10201/10682 [1:27:06<03:57, 2.03it/s]
|
947 |
96%|ββββββββββ| 10202/10682 [1:27:07<03:56, 2.03it/s]
|
948 |
96%|ββββββββββ| 10203/10682 [1:27:07<03:56, 2.03it/s]
|
949 |
96%|ββββββββββ| 10204/10682 [1:27:08<03:55, 2.03it/s]
|
950 |
96%|ββββββββββ| 10205/10682 [1:27:08<03:55, 2.03it/s]
|
951 |
96%|ββββββββββ| 10206/10682 [1:27:09<03:54, 2.03it/s]
|
952 |
96%|ββββββββββ| 10207/10682 [1:27:09<03:54, 2.03it/s]
|
953 |
96%|ββββββββββ| 10208/10682 [1:27:10<03:53, 2.03it/s]
|
954 |
96%|ββββββββββ| 10209/10682 [1:27:10<03:52, 2.03it/s]
|
955 |
96%|ββββββββββ| 10210/10682 [1:27:11<03:52, 2.03it/s]
|
956 |
96%|ββββββββββ| 10211/10682 [1:27:11<03:51, 2.03it/s]
|
957 |
96%|ββββββββββ| 10212/10682 [1:27:12<03:51, 2.03it/s]
|
958 |
96%|ββββββββββ| 10213/10682 [1:27:12<03:51, 2.03it/s]
|
959 |
96%|ββββββββββ| 10214/10682 [1:27:12<03:50, 2.03it/s]
|
960 |
96%|ββββββββββ| 10215/10682 [1:27:13<03:50, 2.03it/s]
|
961 |
96%|ββββββββββ| 10216/10682 [1:27:13<03:49, 2.03it/s]
|
962 |
96%|ββββββββββ| 10217/10682 [1:27:14<03:49, 2.03it/s]
|
963 |
96%|ββββββββββ| 10218/10682 [1:27:14<03:48, 2.03it/s]
|
964 |
96%|ββββββββββ| 10219/10682 [1:27:15<03:48, 2.03it/s]
|
965 |
96%|ββββββββββ| 10220/10682 [1:27:15<03:47, 2.03it/s]
|
966 |
96%|ββββββββββ| 10221/10682 [1:27:16<03:46, 2.03it/s]
|
967 |
96%|ββββββββββ| 10222/10682 [1:27:16<03:46, 2.03it/s]
|
968 |
96%|ββββββββββ| 10223/10682 [1:27:17<03:45, 2.03it/s]
|
969 |
96%|ββββββββββ| 10224/10682 [1:27:17<03:45, 2.03it/s]
|
970 |
96%|ββββββββββ| 10225/10682 [1:27:18<03:45, 2.03it/s]{'loss': 2.8655, 'grad_norm': 0.2632509469985962, 'learning_rate': 5.56604678228706e-06, 'epoch': 13.39}
|
|
|
971 |
|
972 |
96%|ββββββββββ| 10225/10682 [1:27:18<03:45, 2.03it/s]
|
973 |
96%|ββββββββββ| 10226/10682 [1:27:18<03:44, 2.03it/s]
|
974 |
96%|ββββββββββ| 10227/10682 [1:27:19<03:44, 2.03it/s]
|
975 |
96%|ββββββββββ| 10228/10682 [1:27:19<03:43, 2.03it/s]
|
976 |
96%|ββββββββββ| 10229/10682 [1:27:20<03:43, 2.03it/s]
|
977 |
96%|ββββββββββ| 10230/10682 [1:27:20<03:42, 2.03it/s]
|
978 |
96%|ββββββββββ| 10231/10682 [1:27:21<03:42, 2.03it/s]
|
979 |
96%|ββββββββββ| 10232/10682 [1:27:21<03:41, 2.03it/s]
|
980 |
96%|ββββββββββ| 10233/10682 [1:27:22<03:41, 2.03it/s]
|
981 |
96%|ββββββββββ| 10234/10682 [1:27:22<03:40, 2.03it/s]
|
982 |
96%|ββββββββββ| 10235/10682 [1:27:23<03:40, 2.03it/s]
|
983 |
96%|ββββββββββ| 10236/10682 [1:27:23<03:39, 2.03it/s]
|
984 |
96%|ββββββββββ| 10237/10682 [1:27:24<03:39, 2.03it/s]
|
985 |
96%|ββββββββββ| 10238/10682 [1:27:24<03:38, 2.03it/s]
|
986 |
96%|ββββββββββ| 10239/10682 [1:27:25<03:38, 2.03it/s]
|
987 |
96%|ββββββββββ| 10240/10682 [1:27:25<03:37, 2.03it/s]
|
988 |
96%|ββββββββββ| 10241/10682 [1:27:26<03:37, 2.03it/s]
|
989 |
96%|ββββββββββ| 10242/10682 [1:27:26<03:36, 2.03it/s]
|
990 |
96%|ββββββββββ| 10243/10682 [1:27:27<03:36, 2.03it/s]
|
991 |
96%|ββββββββββ| 10244/10682 [1:27:27<03:35, 2.03it/s]
|
992 |
96%|ββββββββββ| 10245/10682 [1:27:28<03:35, 2.03it/s]
|
993 |
96%|ββββββββββ| 10246/10682 [1:27:28<03:34, 2.03it/s]
|
994 |
96%|ββββββββββ| 10247/10682 [1:27:29<03:34, 2.03it/s]
|
995 |
96%|ββββββββββ| 10248/10682 [1:27:29<03:33, 2.03it/s]
|
996 |
96%|ββββββββββ| 10249/10682 [1:27:30<03:33, 2.03it/s]
|
997 |
96%|ββββββββββ| 10250/10682 [1:27:30<03:32, 2.03it/s]{'loss': 2.8679, 'grad_norm': 0.262270987033844, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
|
|
998 |
|
999 |
96%|ββββββββββ| 10250/10682 [1:27:30<03:32, 2.03it/s]
|
1000 |
96%|ββββββββββ| 10251/10682 [1:27:31<03:32, 2.03it/s]
|
1001 |
96%|ββββββββββ| 10252/10682 [1:27:31<03:31, 2.03it/s]
|
1002 |
96%|ββββββββββ| 10253/10682 [1:27:32<03:50, 1.86it/s]
|
1003 |
96%|ββββββββββ| 10254/10682 [1:27:32<03:44, 1.91it/s]
|
1004 |
96%|ββββββββββ| 10255/10682 [1:27:33<03:39, 1.94it/s]
|
1005 |
96%|ββββββββββ| 10256/10682 [1:27:33<03:35, 1.97it/s]
|
1006 |
96%|ββββββββββ| 10257/10682 [1:27:34<03:33, 1.99it/s]
|
1007 |
96%|ββββββββββ| 10258/10682 [1:27:34<03:31, 2.00it/s]
|
1008 |
96%|ββββββββββ| 10259/10682 [1:27:35<03:30, 2.01it/s]
|
1009 |
96%|ββββββββββ| 10260/10682 [1:27:35<03:29, 2.01it/s]
|
1010 |
96%|ββββββββββ| 10261/10682 [1:27:36<03:28, 2.02it/s]
|
1011 |
96%|ββββββββββ| 10262/10682 [1:27:36<03:27, 2.02it/s]
|
1012 |
96%|ββββββββββ| 10263/10682 [1:27:37<03:26, 2.03it/s]
|
1013 |
96%|ββββββββββ| 10264/10682 [1:27:37<03:26, 2.03it/s]
|
1014 |
96%|ββββββββββ| 10265/10682 [1:27:38<03:25, 2.03it/s]
|
1015 |
96%|ββββββββββ| 10266/10682 [1:27:38<03:24, 2.03it/s]
|
1016 |
96%|ββββββββββ| 10267/10682 [1:27:39<03:24, 2.03it/s]
|
1017 |
96%|ββββββββββ| 10268/10682 [1:27:39<03:23, 2.03it/s]
|
1018 |
96%|ββββββββββ| 10269/10682 [1:27:40<03:23, 2.03it/s]
|
1019 |
96%|ββββββββββ| 10270/10682 [1:27:40<03:22, 2.03it/s]
|
1020 |
96%|ββββββββββ| 10271/10682 [1:27:41<03:22, 2.03it/s]
|
1021 |
96%|ββββββββββ| 10272/10682 [1:27:41<03:21, 2.03it/s]
|
1022 |
96%|ββββββββββ| 10273/10682 [1:27:42<03:21, 2.03it/s]
|
1023 |
96%|ββββββββββ| 10274/10682 [1:27:42<03:21, 2.03it/s]
|
1024 |
96%|ββββββββββ| 10275/10682 [1:27:43<03:20, 2.03it/s]{'loss': 2.8735, 'grad_norm': 0.2618483603000641, 'learning_rate': 4.416419388921844e-06, 'epoch': 13.46}
|
|
|
1025 |
|
1026 |
96%|ββββββββββ| 10275/10682 [1:27:43<03:20, 2.03it/s]
|
1027 |
96%|ββββββββββ| 10276/10682 [1:27:43<03:20, 2.03it/s]
|
1028 |
96%|ββββββββββ| 10277/10682 [1:27:44<03:19, 2.03it/s]
|
1029 |
96%|ββββββββββ| 10278/10682 [1:27:44<03:18, 2.03it/s]
|
1030 |
96%|ββββββββββ| 10279/10682 [1:27:45<03:18, 2.03it/s]
|
1031 |
96%|ββββββββββ| 10280/10682 [1:27:45<03:17, 2.03it/s]
|
1032 |
96%|ββββββββββ| 10281/10682 [1:27:46<03:17, 2.03it/s]
|
1033 |
96%|ββββββββββ| 10282/10682 [1:27:46<03:16, 2.03it/s]
|
1034 |
96%|ββββββββββ| 10283/10682 [1:27:47<03:16, 2.03it/s]
|
1035 |
96%|ββββββββββ| 10284/10682 [1:27:47<03:16, 2.03it/s]
|
1036 |
96%|ββββββββββ| 10285/10682 [1:27:48<03:15, 2.03it/s]
|
1037 |
96%|ββββββββββ| 10286/10682 [1:27:48<03:15, 2.03it/s]
|
1038 |
96%|ββββββββββ| 10287/10682 [1:27:49<03:14, 2.03it/s]
|
1039 |
96%|ββββββββββ| 10288/10682 [1:27:49<03:14, 2.03it/s]
|
1040 |
96%|ββββββββββ| 10289/10682 [1:27:50<03:13, 2.03it/s]
|
1041 |
96%|ββββββββββ| 10290/10682 [1:27:50<03:13, 2.03it/s]
|
1042 |
96%|ββββββββββ| 10291/10682 [1:27:51<03:12, 2.03it/s]
|
1043 |
96%|ββββββββββ| 10292/10682 [1:27:51<03:12, 2.03it/s]
|
1044 |
96%|βββββββοΏ½οΏ½ββ| 10293/10682 [1:27:52<03:11, 2.03it/s]
|
1045 |
96%|ββββββββββ| 10294/10682 [1:27:52<03:11, 2.03it/s]
|
1046 |
96%|ββββββββββ| 10295/10682 [1:27:53<03:10, 2.03it/s]
|
1047 |
96%|ββββββββββ| 10296/10682 [1:27:53<03:10, 2.03it/s]
|
1048 |
96%|ββββββββββ| 10297/10682 [1:27:54<03:09, 2.03it/s]
|
1049 |
96%|ββββββββββ| 10298/10682 [1:27:54<03:08, 2.03it/s]
|
1050 |
96%|ββββββββββ| 10299/10682 [1:27:55<03:08, 2.03it/s]
|
1051 |
96%|ββββββββββ| 10300/10682 [1:27:55<03:08, 2.03it/s]
|
1052 |
|
|
|
1053 |
96%|ββββββββββ| 10300/10682 [1:27:55<03:08, 2.03it/s]
|
1054 |
96%|ββββββββββ| 10301/10682 [1:27:55<03:08, 2.03it/s]
|
1055 |
96%|ββββββββββ| 10302/10682 [1:27:56<03:07, 2.03it/s]
|
1056 |
96%|ββββββββββ| 10303/10682 [1:27:56<03:06, 2.03it/s]
|
1057 |
96%|ββββββββββ| 10304/10682 [1:27:57<03:06, 2.03it/s]
|
1058 |
96%|ββββββββββ| 10305/10682 [1:27:57<03:05, 2.03it/s]
|
1059 |
96%|ββββββββββ| 10306/10682 [1:27:58<03:05, 2.03it/s]
|
1060 |
96%|ββββββββββ| 10307/10682 [1:27:58<03:04, 2.03it/s]
|
1061 |
96%|ββββββββββ| 10308/10682 [1:27:59<03:04, 2.03it/s]
|
1062 |
97%|ββββββββββ| 10309/10682 [1:27:59<03:03, 2.03it/s]
|
1063 |
97%|ββββββββββ| 10310/10682 [1:28:00<03:03, 2.03it/s]
|
1064 |
97%|ββββββββββ| 10311/10682 [1:28:00<03:03, 2.03it/s]
|
1065 |
97%|ββββββββββ| 10312/10682 [1:28:01<03:02, 2.03it/s]
|
1066 |
97%|ββββββββββ| 10313/10682 [1:28:01<03:01, 2.03it/s]
|
1067 |
97%|ββββββββββ| 10314/10682 [1:28:02<03:01, 2.03it/s]
|
1068 |
97%|ββββββββββ| 10315/10682 [1:28:02<03:01, 2.03it/s]
|
1069 |
97%|ββββββββββ| 10316/10682 [1:28:03<03:00, 2.03it/s]
|
1070 |
97%|ββββββββββ| 10317/10682 [1:28:03<03:00, 2.03it/s]
|
1071 |
97%|ββββββββββ| 10318/10682 [1:28:04<02:59, 2.03it/s]
|
1072 |
97%|ββββββββββ| 10319/10682 [1:28:04<02:59, 2.03it/s]
|
1073 |
97%|ββββββββββ| 10320/10682 [1:28:05<02:58, 2.03it/s]
|
1074 |
97%|ββββββββββ| 10321/10682 [1:28:05<02:58, 2.03it/s]
|
1075 |
97%|ββββββββββ| 10322/10682 [1:28:06<02:57, 2.02it/s]
|
1076 |
97%|ββββββββββ| 10323/10682 [1:28:06<02:56, 2.03it/s]
|
1077 |
97%|ββββββββββ| 10324/10682 [1:28:07<02:56, 2.03it/s]
|
1078 |
97%|ββββββββββ| 10325/10682 [1:28:07<02:55, 2.03it/s]{'loss': 2.8633, 'grad_norm': 0.26117584109306335, 'learning_rate': 3.3991130994299734e-06, 'epoch': 13.52}
|
|
|
1079 |
|
1080 |
97%|ββββββββββ| 10325/10682 [1:28:07<02:55, 2.03it/s]
|
1081 |
97%|ββββββββββ| 10326/10682 [1:28:08<02:55, 2.03it/s]
|
1082 |
97%|ββββββββββ| 10327/10682 [1:28:08<02:55, 2.03it/s]
|
1083 |
97%|ββββββββββ| 10328/10682 [1:28:09<02:54, 2.03it/s]
|
1084 |
97%|ββββββββββ| 10329/10682 [1:28:09<02:53, 2.03it/s]
|
1085 |
97%|ββββββββββ| 10330/10682 [1:28:10<02:53, 2.03it/s]
|
1086 |
97%|ββββββββββ| 10331/10682 [1:28:10<02:53, 2.03it/s]
|
1087 |
97%|ββββββββββ| 10332/10682 [1:28:11<02:52, 2.03it/s]
|
1088 |
97%|ββββββββββ| 10333/10682 [1:28:11<02:51, 2.03it/s]
|
1089 |
97%|ββββββββββ| 10334/10682 [1:28:12<02:51, 2.03it/s]
|
1090 |
97%|ββββββββββ| 10335/10682 [1:28:12<02:50, 2.03it/s]
|
1091 |
97%|ββββββββββ| 10336/10682 [1:28:13<02:50, 2.03it/s]
|
1092 |
97%|ββββββββββ| 10337/10682 [1:28:13<02:49, 2.03it/s]
|
1093 |
97%|ββββββββββ| 10338/10682 [1:28:14<02:49, 2.03it/s]
|
1094 |
97%|ββββββββββ| 10339/10682 [1:28:14<02:49, 2.03it/s]
|
1095 |
97%|ββββββββββ| 10340/10682 [1:28:15<02:48, 2.03it/s]
|
1096 |
97%|ββββββββββ| 10341/10682 [1:28:15<02:48, 2.03it/s]
|
1097 |
97%|ββββββββββ| 10342/10682 [1:28:16<02:47, 2.03it/s]
|
1098 |
97%|ββββββββββ| 10343/10682 [1:28:16<02:46, 2.03it/s]
|
1099 |
97%|ββββββββββ| 10344/10682 [1:28:17<02:46, 2.03it/s]
|
1100 |
97%|ββββββββββ| 10345/10682 [1:28:17<02:46, 2.03it/s]
|
1101 |
97%|ββββββββββ| 10346/10682 [1:28:18<02:45, 2.03it/s]
|
1102 |
97%|ββββββββββ| 10347/10682 [1:28:18<02:45, 2.03it/s]
|
1103 |
97%|ββββββββββ| 10348/10682 [1:28:19<02:44, 2.03it/s]
|
1104 |
97%|ββββββββββ| 10349/10682 [1:28:19<02:44, 2.03it/s]
|
1105 |
97%|ββββββββββ| 10350/10682 [1:28:20<02:43, 2.03it/s]{'loss': 2.8637, 'grad_norm': 0.2615685760974884, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
|
|
1106 |
|
1107 |
97%|ββββββββββ| 10350/10682 [1:28:20<02:43, 2.03it/s]
|
1108 |
97%|ββββββββββ| 10351/10682 [1:28:20<02:43, 2.03it/s]
|
1109 |
97%|ββββββββββ| 10352/10682 [1:28:21<02:42, 2.03it/s]
|
1110 |
97%|ββββββββββ| 10353/10682 [1:28:21<02:42, 2.03it/s]
|
1111 |
97%|ββββββββββ| 10354/10682 [1:28:22<02:41, 2.03it/s]
|
1112 |
97%|ββββββββββ| 10355/10682 [1:28:22<02:41, 2.02it/s]
|
1113 |
97%|ββββββββββ| 10356/10682 [1:28:23<02:40, 2.03it/s]
|
1114 |
97%|ββββββββββ| 10357/10682 [1:28:23<02:39, 2.03it/s]
|
1115 |
97%|ββββββββββ| 10358/10682 [1:28:24<02:39, 2.03it/s]
|
1116 |
97%|ββββββββββ| 10359/10682 [1:28:24<02:39, 2.03it/s]
|
1117 |
97%|ββββββββββ| 10360/10682 [1:28:25<02:38, 2.03it/s]
|
1118 |
97%|ββββββββββ| 10361/10682 [1:28:25<02:38, 2.03it/s]
|
1119 |
97%|ββββββββββ| 10362/10682 [1:28:26<02:37, 2.03it/s]
|
1120 |
97%|ββββββββββ| 10363/10682 [1:28:26<02:37, 2.03it/s]
|
1121 |
97%|ββββββββββ| 10364/10682 [1:28:27<02:36, 2.03it/s]
|
1122 |
97%|ββββββββββ| 10365/10682 [1:28:27<02:36, 2.03it/s]
|
1123 |
97%|ββββββββββ| 10366/10682 [1:28:28<02:35, 2.03it/s]
|
1124 |
97%|ββββββββββ| 10367/10682 [1:28:28<02:34, 2.04it/s]
|
1125 |
97%|ββββββββββ| 10368/10682 [1:28:29<02:34, 2.04it/s]
|
1126 |
97%|ββββββββββ| 10369/10682 [1:28:29<02:33, 2.03it/s]
|
1127 |
97%|ββββββββββ| 10370/10682 [1:28:29<02:33, 2.03it/s]
|
1128 |
97%|ββββββββββ| 10371/10682 [1:28:30<02:33, 2.03it/s]
|
1129 |
97%|ββββββββββ| 10372/10682 [1:28:30<02:32, 2.03it/s]
|
1130 |
97%|ββββββββββ| 10373/10682 [1:28:31<02:32, 2.03it/s]
|
1131 |
97%|ββββββββββ| 10374/10682 [1:28:31<02:31, 2.03it/s]
|
1132 |
97%|ββββββββββ| 10375/10682 [1:28:32<02:31, 2.03it/s]
|
1133 |
|
|
|
1134 |
97%|ββββββββββ| 10375/10682 [1:28:32<02:31, 2.03it/s]
|
1135 |
97%|ββββββββββ| 10376/10682 [1:28:32<02:30, 2.03it/s]
|
1136 |
97%|ββββββββββ| 10377/10682 [1:28:33<02:30, 2.03it/s]
|
1137 |
97%|ββββββββββ| 10378/10682 [1:28:33<02:29, 2.03it/s]
|
1138 |
97%|ββββββββββ| 10379/10682 [1:28:34<02:29, 2.03it/s]
|
1139 |
97%|ββββββββββ| 10380/10682 [1:28:34<02:28, 2.03it/s]
|
1140 |
97%|ββββββββββ| 10381/10682 [1:28:35<02:28, 2.03it/s]
|
1141 |
97%|ββββββββββ| 10382/10682 [1:28:35<02:27, 2.03it/s]
|
1142 |
97%|ββββββββββ| 10383/10682 [1:28:36<02:26, 2.03it/s]
|
1143 |
97%|ββββββββββ| 10384/10682 [1:28:36<02:26, 2.03it/s]
|
1144 |
97%|ββββββββββ| 10385/10682 [1:28:37<02:26, 2.03it/s]
|
1145 |
97%|ββββββββββ| 10386/10682 [1:28:37<02:25, 2.03it/s]
|
1146 |
97%|ββββββββββ| 10387/10682 [1:28:38<02:25, 2.03it/s]
|
1147 |
97%|ββββββββββ| 10388/10682 [1:28:38<02:24, 2.04it/s]
|
1148 |
97%|ββββββββββ| 10389/10682 [1:28:39<02:23, 2.04it/s]
|
1149 |
97%|ββββββββββ| 10390/10682 [1:28:39<02:23, 2.03it/s]
|
1150 |
97%|ββββββββββ| 10391/10682 [1:28:40<02:23, 2.03it/s]
|
1151 |
97%|ββββββββββ| 10392/10682 [1:28:40<02:22, 2.03it/s]
|
1152 |
97%|ββββββββββ| 10393/10682 [1:28:41<02:22, 2.03it/s]
|
1153 |
97%|ββββββββββ| 10394/10682 [1:28:41<02:21, 2.03it/s]
|
1154 |
97%|ββββββββββ| 10395/10682 [1:28:42<02:21, 2.03it/s]
|
1155 |
97%|ββββββββββ| 10396/10682 [1:28:42<02:20, 2.03it/s]
|
1156 |
97%|ββββββββββ| 10397/10682 [1:28:43<02:20, 2.03it/s]
|
1157 |
97%|ββββββββββ| 10398/10682 [1:28:43<02:19, 2.03it/s]
|
1158 |
97%|ββββββββββ| 10399/10682 [1:28:44<02:19, 2.03it/s]
|
1159 |
97%|ββββββββββ| 10400/10682 [1:28:44<02:18, 2.03it/s]{'loss': 2.8597, 'grad_norm': 0.25953540205955505, 'learning_rate': 2.1218402281655835e-06, 'epoch': 13.62}
|
1160 |
|
|
|
1161 |
97%|ββββββββββ| 10400/10682 [1:28:44<02:18, 2.03it/s]
|
1162 |
97%|ββββββββββ| 10401/10682 [1:28:45<02:19, 2.02it/s]
|
1163 |
97%|ββββββββββ| 10402/10682 [1:28:45<02:18, 2.02it/s]
|
1164 |
97%|ββββββββββ| 10403/10682 [1:28:46<02:17, 2.03it/s]
|
1165 |
97%|ββββββββββ| 10404/10682 [1:28:46<02:17, 2.03it/s]
|
1166 |
97%|ββββββββββ| 10405/10682 [1:28:47<02:16, 2.03it/s]
|
1167 |
97%|ββββββββββ| 10406/10682 [1:28:47<02:15, 2.03it/s]
|
1168 |
97%|ββββββββββ| 10407/10682 [1:28:48<02:15, 2.03it/s]
|
1169 |
97%|ββββββββββ| 10408/10682 [1:28:48<02:14, 2.03it/s]
|
1170 |
97%|ββββββββββ| 10409/10682 [1:28:49<02:14, 2.03it/s]
|
1171 |
97%|ββββββββββ| 10410/10682 [1:28:49<02:13, 2.03it/s]
|
1172 |
97%|ββββββββββ| 10411/10682 [1:28:50<02:13, 2.03it/s]
|
1173 |
97%|ββββοΏ½οΏ½βββββ| 10412/10682 [1:28:50<02:12, 2.03it/s]
|
1174 |
97%|ββββββββββ| 10413/10682 [1:28:51<02:12, 2.03it/s]
|
1175 |
97%|ββββββββββ| 10414/10682 [1:28:51<02:12, 2.03it/s]
|
1176 |
98%|ββββββββββ| 10415/10682 [1:28:52<02:11, 2.03it/s]
|
1177 |
98%|ββββββββββ| 10416/10682 [1:28:52<02:11, 2.03it/s]
|
1178 |
98%|ββββββββββ| 10417/10682 [1:28:53<02:10, 2.03it/s]
|
1179 |
98%|ββββββββββ| 10418/10682 [1:28:53<02:09, 2.03it/s]
|
1180 |
98%|ββββββββββ| 10419/10682 [1:28:54<02:09, 2.03it/s]
|
1181 |
98%|ββββββββββ| 10420/10682 [1:28:54<02:08, 2.03it/s]
|
1182 |
98%|ββββββββββ| 10421/10682 [1:28:55<02:08, 2.03it/s]
|
1183 |
98%|ββββββββββ| 10422/10682 [1:28:55<02:07, 2.03it/s]
|
1184 |
98%|ββββββββββ| 10423/10682 [1:28:56<02:07, 2.03it/s]
|
1185 |
98%|ββββββββββ| 10424/10682 [1:28:56<02:07, 2.03it/s]
|
1186 |
98%|ββββββββββ| 10425/10682 [1:28:57<02:06, 2.03it/s]{'loss': 2.8679, 'grad_norm': 0.26001906394958496, 'learning_rate': 1.7625149152127318e-06, 'epoch': 13.65}
|
|
|
1187 |
|
1188 |
98%|ββββββββββ| 10425/10682 [1:28:57<02:06, 2.03it/s]
|
1189 |
98%|ββββββββββ| 10426/10682 [1:28:57<02:06, 2.03it/s]
|
1190 |
98%|ββββββββββ| 10427/10682 [1:28:58<02:05, 2.03it/s]
|
1191 |
98%|ββββββββββ| 10428/10682 [1:28:58<02:05, 2.03it/s]
|
1192 |
98%|ββββββββββ| 10429/10682 [1:28:59<02:04, 2.03it/s]
|
1193 |
98%|ββββββββββ| 10430/10682 [1:28:59<02:04, 2.03it/s]
|
1194 |
98%|ββββββββββ| 10431/10682 [1:29:00<02:03, 2.03it/s]
|
1195 |
98%|ββββββββββ| 10432/10682 [1:29:00<02:03, 2.03it/s]
|
1196 |
98%|ββββββββββ| 10433/10682 [1:29:01<02:02, 2.03it/s]
|
1197 |
98%|ββββββββββ| 10434/10682 [1:29:01<02:02, 2.03it/s]
|
1198 |
98%|ββββββββββ| 10435/10682 [1:29:02<02:01, 2.03it/s]
|
1199 |
98%|ββββββββββ| 10436/10682 [1:29:02<02:01, 2.03it/s]
|
1200 |
98%|ββββββββββ| 10437/10682 [1:29:02<02:00, 2.03it/s]
|
1201 |
98%|ββββββββββ| 10438/10682 [1:29:03<02:00, 2.03it/s]
|
1202 |
98%|ββββββββββ| 10439/10682 [1:29:03<01:59, 2.03it/s]
|
1203 |
98%|ββββββββββ| 10440/10682 [1:29:04<01:59, 2.03it/s]
|
1204 |
98%|ββββββββββ| 10441/10682 [1:29:04<01:58, 2.03it/s]
|
1205 |
98%|ββββββββββ| 10442/10682 [1:29:05<01:58, 2.03it/s]
|
1206 |
98%|ββββββββββ| 10443/10682 [1:29:05<01:57, 2.03it/s]
|
1207 |
98%|ββββββββββ| 10444/10682 [1:29:06<01:57, 2.03it/s]
|
1208 |
98%|ββββββββββ| 10445/10682 [1:29:06<01:56, 2.03it/s]
|
1209 |
98%|ββββββββββ| 10446/10682 [1:29:07<01:56, 2.03it/s]
|
1210 |
98%|ββββββββββ| 10447/10682 [1:29:07<01:55, 2.03it/s]
|
1211 |
98%|ββββββββββ| 10448/10682 [1:29:08<01:55, 2.03it/s]
|
1212 |
98%|ββββββββββ| 10449/10682 [1:29:08<01:54, 2.03it/s]
|
1213 |
98%|ββββββββββ| 10450/10682 [1:29:09<01:54, 2.03it/s]{'loss': 2.8763, 'grad_norm': 0.26250502467155457, 'learning_rate': 1.4364475817401635e-06, 'epoch': 13.69}
|
|
|
1214 |
|
1215 |
98%|ββββββββββ| 10450/10682 [1:29:09<01:54, 2.03it/s]
|
1216 |
98%|ββββββββββ| 10451/10682 [1:29:09<01:54, 2.02it/s]
|
1217 |
98%|ββββββββββ| 10452/10682 [1:29:10<01:53, 2.03it/s]
|
1218 |
98%|ββββββββββ| 10453/10682 [1:29:10<01:53, 2.03it/s]
|
1219 |
98%|ββββββββββ| 10454/10682 [1:29:11<01:52, 2.03it/s]
|
1220 |
98%|ββββββββββ| 10455/10682 [1:29:11<01:51, 2.03it/s]
|
1221 |
98%|ββββββββββ| 10456/10682 [1:29:12<01:51, 2.03it/s]
|
1222 |
98%|ββββββββββ| 10457/10682 [1:29:12<01:50, 2.03it/s]
|
1223 |
98%|ββββββββββ| 10458/10682 [1:29:13<01:50, 2.03it/s]
|
1224 |
98%|ββββββββββ| 10459/10682 [1:29:13<01:49, 2.03it/s]
|
1225 |
98%|ββββββββββ| 10460/10682 [1:29:14<01:49, 2.03it/s]
|
1226 |
98%|ββββββββββ| 10461/10682 [1:29:14<01:48, 2.03it/s]
|
1227 |
98%|ββββββββββ| 10462/10682 [1:29:15<01:48, 2.03it/s]
|
1228 |
98%|ββββββββββ| 10463/10682 [1:29:15<01:47, 2.03it/s]
|
1229 |
98%|ββββββββββ| 10464/10682 [1:29:16<01:47, 2.03it/s]
|
1230 |
98%|ββββββββββ| 10465/10682 [1:29:16<01:46, 2.03it/s]
|
1231 |
98%|ββββββββββ| 10466/10682 [1:29:17<01:46, 2.03it/s]
|
1232 |
98%|ββββββββββ| 10467/10682 [1:29:17<01:45, 2.03it/s]
|
1233 |
98%|ββββββββββ| 10468/10682 [1:29:18<01:45, 2.03it/s]
|
1234 |
98%|ββββββββββ| 10469/10682 [1:29:18<01:44, 2.03it/s]
|
1235 |
98%|ββββββββββ| 10470/10682 [1:29:19<01:44, 2.03it/s]
|
1236 |
98%|ββββββββββ| 10471/10682 [1:29:19<01:44, 2.03it/s]
|
1237 |
98%|ββββββββββ| 10472/10682 [1:29:20<01:43, 2.03it/s]
|
1238 |
98%|ββββοΏ½οΏ½βββββ| 10473/10682 [1:29:20<01:43, 2.03it/s]
|
1239 |
98%|ββββββββββ| 10474/10682 [1:29:21<01:42, 2.03it/s]
|
1240 |
98%|ββββββββββ| 10475/10682 [1:29:21<01:41, 2.03it/s]{'loss': 2.8583, 'grad_norm': 0.2589586079120636, 'learning_rate': 1.143659993153079e-06, 'epoch': 13.72}
|
1241 |
|
|
|
1242 |
98%|ββββββββββ| 10475/10682 [1:29:21<01:41, 2.03it/s]
|
1243 |
98%|ββββββββββ| 10476/10682 [1:29:22<01:41, 2.03it/s]
|
1244 |
98%|ββββββββββ| 10477/10682 [1:29:22<01:41, 2.03it/s]
|
1245 |
98%|ββββββββββ| 10478/10682 [1:29:23<01:40, 2.03it/s]
|
1246 |
98%|ββββββββββ| 10479/10682 [1:29:23<01:40, 2.03it/s]
|
1247 |
98%|ββββββββββ| 10480/10682 [1:29:24<01:39, 2.03it/s]
|
1248 |
98%|ββββββββββ| 10481/10682 [1:29:24<01:39, 2.03it/s]
|
1249 |
98%|ββββββββββ| 10482/10682 [1:29:25<01:38, 2.03it/s]
|
1250 |
98%|ββββββββββ| 10483/10682 [1:29:25<01:38, 2.03it/s]
|
1251 |
98%|ββββββββββ| 10484/10682 [1:29:26<01:37, 2.03it/s]
|
1252 |
98%|ββββββββββ| 10485/10682 [1:29:26<01:37, 2.03it/s]
|
1253 |
98%|ββββββββββ| 10486/10682 [1:29:27<01:36, 2.03it/s]
|
1254 |
98%|ββββββββββ| 10487/10682 [1:29:27<01:36, 2.03it/s]
|
1255 |
98%|ββββββββββ| 10488/10682 [1:29:28<01:35, 2.03it/s]
|
1256 |
98%|ββββββββββ| 10489/10682 [1:29:28<01:35, 2.03it/s]
|
1257 |
98%|ββββββββββ| 10490/10682 [1:29:29<01:34, 2.03it/s]
|
1258 |
98%|ββββββββββ| 10491/10682 [1:29:29<01:34, 2.03it/s]
|
1259 |
98%|ββββββββββ| 10492/10682 [1:29:30<01:33, 2.03it/s]
|
1260 |
98%|ββββββββββ| 10493/10682 [1:29:30<01:33, 2.03it/s]
|
1261 |
98%|ββββββββββ| 10494/10682 [1:29:31<01:32, 2.03it/s]
|
1262 |
98%|ββββββββββ| 10495/10682 [1:29:31<01:31, 2.03it/s]
|
1263 |
98%|ββββββββββ| 10496/10682 [1:29:32<01:31, 2.03it/s]
|
1264 |
98%|ββββββββββ| 10497/10682 [1:29:32<01:31, 2.03it/s]
|
1265 |
98%|ββββββββββ| 10498/10682 [1:29:33<01:30, 2.03it/s]
|
1266 |
98%|ββββββββββ| 10499/10682 [1:29:33<01:30, 2.03it/s]
|
1267 |
98%|ββββββββββ| 10500/10682 [1:29:34<01:29, 2.03it/s]
|
1268 |
|
|
|
1269 |
98%|ββββββββββ| 10500/10682 [1:29:34<01:29, 2.03it/s]
|
1270 |
98%|ββββββββββ| 10501/10682 [1:29:34<01:29, 2.02it/s]
|
1271 |
98%|ββββββββββ| 10502/10682 [1:29:35<01:28, 2.03it/s]
|
1272 |
98%|ββββββββββ| 10503/10682 [1:29:35<01:28, 2.02it/s]
|
1273 |
98%|ββββββββββ| 10504/10682 [1:29:36<01:27, 2.03it/s]
|
1274 |
98%|ββββββββββ| 10505/10682 [1:29:36<01:27, 2.03it/s]
|
1275 |
98%|ββββββββββ| 10506/10682 [1:29:37<01:26, 2.03it/s]
|
1276 |
98%|ββββββββββ| 10507/10682 [1:29:37<01:26, 2.03it/s]
|
1277 |
98%|ββββββββββ| 10508/10682 [1:29:37<01:25, 2.03it/s]
|
1278 |
98%|ββββββββββ| 10509/10682 [1:29:38<01:25, 2.03it/s]
|
1279 |
98%|ββββββββββ| 10510/10682 [1:29:38<01:24, 2.03it/s]
|
1280 |
98%|ββββββββββ| 10511/10682 [1:29:39<01:24, 2.03it/s]
|
1281 |
98%|ββββββββββ| 10512/10682 [1:29:39<01:23, 2.03it/s]
|
1282 |
98%|ββββββββββ| 10513/10682 [1:29:40<01:23, 2.03it/s]
|
1283 |
98%|ββββββββββ| 10514/10682 [1:29:40<01:22, 2.03it/s]
|
1284 |
98%|ββββββββββ| 10515/10682 [1:29:41<01:22, 2.03it/s]
|
1285 |
98%|ββββββββββ| 10516/10682 [1:29:41<01:21, 2.03it/s]
|
1286 |
98%|ββββββββββ| 10517/10682 [1:29:42<01:21, 2.03it/s]
|
1287 |
98%|ββββββββββ| 10518/10682 [1:29:42<01:20, 2.03it/s]
|
1288 |
98%|ββββββββββ| 10519/10682 [1:29:43<01:20, 2.03it/s]
|
1289 |
98%|ββββββββββ| 10520/10682 [1:29:43<01:19, 2.03it/s]
|
1290 |
98%|ββββββββββ| 10521/10682 [1:29:44<01:19, 2.03it/s]
|
1291 |
99%|ββββββββββ| 10522/10682 [1:29:44<01:18, 2.03it/s]
|
1292 |
99%|ββββββββββ| 10523/10682 [1:29:45<01:18, 2.03it/s]
|
1293 |
99%|ββββββββββ| 10524/10682 [1:29:45<01:17, 2.03it/s]
|
1294 |
99%|ββββββββββ| 10525/10682 [1:29:46<01:17, 2.03it/s]{'loss': 2.8753, 'grad_norm': 0.2604506015777588, 'learning_rate': 6.580000036264244e-07, 'epoch': 13.79}
|
1295 |
|
|
|
1296 |
99%|ββββββββββ| 10525/10682 [1:29:46<01:17, 2.03it/s]
|
1297 |
99%|ββββββββββ| 10526/10682 [1:29:46<01:16, 2.03it/s]
|
1298 |
99%|ββββββββββ| 10527/10682 [1:29:47<01:16, 2.03it/s]
|
1299 |
99%|ββββββββββ| 10528/10682 [1:29:47<01:15, 2.03it/s]
|
1300 |
99%|ββββββββββ| 10529/10682 [1:29:48<01:15, 2.03it/s]
|
1301 |
99%|ββββββββββ| 10530/10682 [1:29:48<01:14, 2.03it/s]
|
1302 |
99%|ββββββββββ| 10531/10682 [1:29:49<01:14, 2.03it/s]
|
1303 |
99%|ββββββββββ| 10532/10682 [1:29:49<01:13, 2.03it/s]
|
1304 |
99%|ββββββββββ| 10533/10682 [1:29:50<01:13, 2.03it/s]
|
1305 |
99%|ββββββββββ| 10534/10682 [1:29:50<01:12, 2.03it/s]
|
1306 |
99%|ββββββββββ| 10535/10682 [1:29:51<01:12, 2.03it/s]
|
1307 |
99%|ββββββββββ| 10536/10682 [1:29:51<01:11, 2.03it/s]
|
1308 |
99%|ββββββββββ| 10537/10682 [1:29:52<01:11, 2.03it/s]
|
1309 |
99%|ββββββββββ| 10538/10682 [1:29:52<01:10, 2.03it/s]
|
1310 |
99%|ββββββββββ| 10539/10682 [1:29:53<01:10, 2.03it/s]
|
1311 |
99%|ββββββββββ| 10540/10682 [1:29:53<01:09, 2.03it/s]
|
1312 |
99%|ββββββββββ| 10541/10682 [1:29:54<01:09, 2.03it/s]
|
1313 |
99%|ββββββββββ| 10542/10682 [1:29:54<01:08, 2.03it/s]
|
1314 |
99%|ββββββββββ| 10543/10682 [1:29:55<01:08, 2.03it/s]
|
1315 |
99%|ββββββββββ| 10544/10682 [1:29:55<01:07, 2.03it/s]
|
1316 |
99%|ββββββββββ| 10545/10682 [1:29:56<01:07, 2.03it/s]
|
1317 |
99%|ββββββββββ| 10546/10682 [1:29:56<01:06, 2.03it/s]
|
1318 |
99%|ββββββββββ| 10547/10682 [1:29:57<01:06, 2.03it/s]
|
1319 |
99%|ββββββββββ| 10548/10682 [1:29:57<01:05, 2.03it/s]
|
1320 |
99%|ββββββββββ| 10549/10682 [1:29:58<01:05, 2.03it/s]
|
1321 |
99%|ββββββββββ| 10550/10682 [1:29:58<01:04, 2.03it/s]
|
1322 |
|
|
|
1323 |
99%|ββββββββββ| 10550/10682 [1:29:58<01:04, 2.03it/s]
|
1324 |
99%|ββββββββββ| 10551/10682 [1:29:59<01:04, 2.03it/s]
|
1325 |
99%|ββββββββββ| 10552/10682 [1:29:59<01:04, 2.03it/s]
|
1326 |
99%|ββββββββββ| 10553/10682 [1:30:00<01:03, 2.03it/s]
|
1327 |
99%|ββββββββββ| 10554/10682 [1:30:00<01:03, 2.03it/s]
|
1328 |
99%|ββββββββββ| 10555/10682 [1:30:01<01:02, 2.03it/s]
|
1329 |
99%|ββββββββββ| 10556/10682 [1:30:01<01:02, 2.03it/s]
|
1330 |
99%|ββββββββββ| 10557/10682 [1:30:02<01:01, 2.03it/s]
|
1331 |
99%|ββββββββββ| 10558/10682 [1:30:02<01:01, 2.03it/s]
|
1332 |
99%|ββββββββββ| 10559/10682 [1:30:03<01:00, 2.03it/s]
|
1333 |
99%|ββββββββββ| 10560/10682 [1:30:03<01:00, 2.03it/s]
|
1334 |
99%|ββββββββββ| 10561/10682 [1:30:04<00:59, 2.03it/s]
|
1335 |
99%|ββββββββββ| 10562/10682 [1:30:04<00:59, 2.03it/s]
|
1336 |
99%|ββββββββββ| 10563/10682 [1:30:05<00:58, 2.03it/s]
|
1337 |
99%|ββββββββββ| 10564/10682 [1:30:05<00:58, 2.03it/s]
|
1338 |
99%|ββββββββββ| 10565/10682 [1:30:06<00:57, 2.03it/s]
|
1339 |
99%|ββββββββββ| 10566/10682 [1:30:06<00:57, 2.03it/s]
|
1340 |
99%|ββββββββββ| 10567/10682 [1:30:07<00:56, 2.03it/s]
|
1341 |
99%|ββββββββββ| 10568/10682 [1:30:07<00:56, 2.03it/s]
|
1342 |
99%|ββββββββββ| 10569/10682 [1:30:08<00:55, 2.03it/s]
|
1343 |
99%|ββββββββββ| 10570/10682 [1:30:08<00:55, 2.03it/s]
|
1344 |
99%|ββββββββββ| 10571/10682 [1:30:09<00:54, 2.03it/s]
|
1345 |
99%|ββββββββββ| 10572/10682 [1:30:09<00:54, 2.03it/s]
|
1346 |
99%|ββββββββββ| 10573/10682 [1:30:10<00:53, 2.03it/s]
|
1347 |
99%|ββββββββββ| 10574/10682 [1:30:10<00:53, 2.03it/s]
|
1348 |
99%|ββββββββββ| 10575/10682 [1:30:11<00:52, 2.03it/s]
|
1349 |
|
|
|
1350 |
99%|ββββββββββ| 10575/10682 [1:30:11<00:52, 2.03it/s]
|
1351 |
99%|ββββββββββ| 10576/10682 [1:30:11<00:52, 2.03it/s]
|
1352 |
99%|ββββββββββ| 10577/10682 [1:30:11<00:51, 2.03it/s]
|
1353 |
99%|ββββββββββ| 10578/10682 [1:30:12<00:51, 2.03it/s]
|
1354 |
99%|ββββββββββ| 10579/10682 [1:30:12<00:50, 2.03it/s]
|
1355 |
99%|ββββββββββ| 10580/10682 [1:30:13<00:50, 2.03it/s]
|
1356 |
99%|ββββββββββ| 10581/10682 [1:30:13<00:49, 2.03it/s]
|
1357 |
99%|ββββββββββ| 10582/10682 [1:30:14<00:49, 2.03it/s]
|
1358 |
99%|ββββββββββ| 10583/10682 [1:30:14<00:48, 2.03it/s]
|
1359 |
99%|ββββββββββ| 10584/10682 [1:30:15<00:48, 2.03it/s]
|
1360 |
99%|ββββββββββ| 10585/10682 [1:30:15<00:47, 2.03it/s]
|
1361 |
99%|ββββββββββ| 10586/10682 [1:30:16<00:47, 2.03it/s]
|
1362 |
99%|ββββββββββ| 10587/10682 [1:30:16<00:46, 2.03it/s]
|
1363 |
99%|ββββββββββ| 10588/10682 [1:30:17<00:46, 2.03it/s]
|
1364 |
99%|ββββββββββ| 10589/10682 [1:30:17<00:45, 2.03it/s]
|
1365 |
99%|ββββββββββ| 10590/10682 [1:30:18<00:45, 2.03it/s]
|
1366 |
99%|ββββββββββ| 10591/10682 [1:30:18<00:44, 2.03it/s]
|
1367 |
99%|ββββββββββ| 10592/10682 [1:30:19<00:44, 2.03it/s]
|
1368 |
99%|ββββββββββ| 10593/10682 [1:30:19<00:43, 2.03it/s]
|
1369 |
99%|ββββββββββ| 10594/10682 [1:30:20<00:43, 2.03it/s]
|
1370 |
99%|ββββββββββ| 10595/10682 [1:30:20<00:42, 2.03it/s]
|
1371 |
99%|ββββββββββ| 10596/10682 [1:30:21<00:42, 2.03it/s]
|
1372 |
99%|ββββββββββ| 10597/10682 [1:30:21<00:41, 2.03it/s]
|
1373 |
99%|ββββββββββ| 10598/10682 [1:30:22<00:41, 2.03it/s]
|
1374 |
99%|ββββββββββ| 10599/10682 [1:30:22<00:40, 2.03it/s]
|
1375 |
99%|ββββββββββ| 10600/10682 [1:30:23<00:40, 2.03it/s]
|
1376 |
|
|
|
1377 |
99%|ββββββββββ| 10600/10682 [1:30:23<00:40, 2.03it/s]
|
1378 |
99%|ββββββββββ| 10601/10682 [1:30:23<00:40, 2.02it/s]
|
1379 |
99%|ββββββββββ| 10602/10682 [1:30:24<00:39, 2.03it/s]
|
1380 |
99%|ββββββββββ| 10603/10682 [1:30:24<00:38, 2.03it/s]
|
1381 |
99%|ββββββββββ| 10604/10682 [1:30:25<00:38, 2.03it/s]
|
1382 |
99%|ββββββββββ| 10605/10682 [1:30:25<00:37, 2.03it/s]
|
1383 |
99%|ββββββββββ| 10606/10682 [1:30:26<00:37, 2.03it/s]
|
1384 |
99%|ββββββββββ| 10607/10682 [1:30:26<00:36, 2.03it/s]
|
1385 |
99%|ββββββββββ| 10608/10682 [1:30:27<00:36, 2.03it/s]
|
1386 |
99%|ββββββββββ| 10609/10682 [1:30:27<00:35, 2.03it/s]
|
1387 |
99%|ββββββββββ| 10610/10682 [1:30:28<00:35, 2.03it/s]
|
1388 |
99%|ββββββββββ| 10611/10682 [1:30:28<00:34, 2.03it/s]
|
1389 |
99%|ββββββββββ| 10612/10682 [1:30:29<00:34, 2.03it/s]
|
1390 |
99%|ββββββββββ| 10613/10682 [1:30:29<00:34, 2.03it/s]
|
1391 |
99%|ββββββββββ| 10614/10682 [1:30:30<00:33, 2.03it/s]
|
1392 |
99%|ββββββββββ| 10615/10682 [1:30:30<00:33, 2.03it/s]
|
1393 |
99%|ββββββββββ| 10616/10682 [1:30:31<00:32, 2.03it/s]
|
1394 |
99%|ββββββββββ| 10617/10682 [1:30:31<00:32, 2.03it/s]
|
1395 |
99%|ββββββββββ| 10618/10682 [1:30:32<00:31, 2.03it/s]
|
1396 |
99%|ββββββββββ| 10619/10682 [1:30:32<00:31, 2.03it/s]
|
1397 |
99%|ββββββββββ| 10620/10682 [1:30:33<00:30, 2.03it/s]
|
1398 |
99%|ββββββββββ| 10621/10682 [1:30:33<00:30, 2.03it/s]
|
1399 |
99%|ββββββββββ| 10622/10682 [1:30:34<00:29, 2.03it/s]
|
1400 |
99%|ββββββββββ| 10623/10682 [1:30:34<00:29, 2.03it/s]
|
1401 |
99%|ββββββββββ| 10624/10682 [1:30:35<00:28, 2.03it/s]
|
1402 |
99%|ββββββββββ| 10625/10682 [1:30:35<00:28, 2.03it/s]
|
1403 |
|
|
|
1404 |
99%|ββββββββββ| 10625/10682 [1:30:35<00:28, 2.03it/s]
|
1405 |
99%|ββββββββββ| 10626/10682 [1:30:36<00:27, 2.02it/s]
|
1406 |
99%|ββββββββββ| 10627/10682 [1:30:36<00:27, 2.03it/s]
|
1407 |
99%|ββββββββββ| 10628/10682 [1:30:37<00:26, 2.03it/s]
|
1408 |
|
|
|
|
|
1409 |
|
1410 |
|
|
|
|
|
|
586 |
|
587 |
|
588 |
92%|ββββββββββ| 9875/10682 [1:24:14<06:38, 2.03it/s]
|
589 |
92%|ββββββββββ| 9876/10682 [1:24:14<06:37, 2.03it/s]
|
590 |
92%|ββββββββββ| 9877/10682 [1:24:15<06:37, 2.03it/s]
|
591 |
92%|ββββββββββ| 9878/10682 [1:24:15<06:36, 2.03it/s]
|
592 |
92%|ββββββββββ| 9879/10682 [1:24:16<06:36, 2.03it/s]
|
593 |
92%|ββββββββββ| 9880/10682 [1:24:16<06:35, 2.03it/s]
|
594 |
93%|ββββββββββ| 9881/10682 [1:24:17<06:34, 2.03it/s]
|
595 |
93%|ββββββββββ| 9882/10682 [1:24:17<06:34, 2.03it/s]
|
596 |
93%|ββββββββββ| 9883/10682 [1:24:17<06:33, 2.03it/s]
|
597 |
93%|ββββββββββ| 9884/10682 [1:24:18<06:32, 2.03it/s]
|
598 |
93%|ββββββββββ| 9885/10682 [1:24:18<06:32, 2.03it/s]
|
599 |
93%|ββββββββββ| 9886/10682 [1:24:19<06:31, 2.03it/s]
|
600 |
93%|ββββββββββ| 9887/10682 [1:24:19<06:31, 2.03it/s]
|
601 |
93%|ββββββββββ| 9888/10682 [1:24:20<06:31, 2.03it/s]
|
602 |
93%|ββββββββββ| 9889/10682 [1:24:20<06:30, 2.03it/s]
|
603 |
93%|ββββββββββ| 9890/10682 [1:24:21<06:30, 2.03it/s]
|
604 |
93%|ββββββββββ| 9891/10682 [1:24:21<06:30, 2.03it/s]
|
605 |
93%|ββββββββββ| 9892/10682 [1:24:22<06:29, 2.03it/s]
|
606 |
93%|ββββββββββ| 9893/10682 [1:24:22<06:29, 2.03it/s]
|
607 |
93%|ββββββββββ| 9894/10682 [1:24:23<06:28, 2.03it/s]
|
608 |
93%|ββββββββββ| 9895/10682 [1:24:23<06:28, 2.03it/s]
|
609 |
93%|ββββββββββ| 9896/10682 [1:24:24<06:27, 2.03it/s]
|
610 |
93%|ββββββββββ| 9897/10682 [1:24:24<06:26, 2.03it/s]
|
611 |
93%|ββββββββββ| 9898/10682 [1:24:25<06:26, 2.03it/s]
|
612 |
93%|ββββββββββ| 9899/10682 [1:24:25<06:25, 2.03it/s]
|
613 |
93%|ββββββββββ| 9900/10682 [1:24:26<06:25, 2.03it/s]{'loss': 2.8968, 'grad_norm': 0.26422566175460815, 'learning_rate': 1.6239414036870183e-05, 'epoch': 12.97}
|
614 |
|
615 |
|
616 |
93%|ββββββββββ| 9900/10682 [1:24:26<06:25, 2.03it/s]
|
617 |
93%|ββββββββββ| 9901/10682 [1:24:26<06:24, 2.03it/s]
|
618 |
93%|ββββββββββ| 9902/10682 [1:24:27<06:24, 2.03it/s]
|
619 |
93%|ββββββββββ| 9903/10682 [1:24:27<06:23, 2.03it/s]
|
620 |
93%|ββββββββββ| 9904/10682 [1:24:28<06:23, 2.03it/s]
|
621 |
93%|ββββββββββ| 9905/10682 [1:24:28<06:23, 2.03it/s]
|
622 |
93%|ββββββββββ| 9906/10682 [1:24:29<06:22, 2.03it/s]
|
623 |
93%|ββββββββββ| 9907/10682 [1:24:29<06:22, 2.03it/s]
|
624 |
93%|ββββββββββ| 9908/10682 [1:24:30<06:21, 2.03it/s]
|
625 |
93%|ββββββββββ| 9909/10682 [1:24:30<06:21, 2.03it/s]
|
626 |
93%|ββββββββββ| 9910/10682 [1:24:31<06:20, 2.03it/s]
|
627 |
93%|ββββββββββ| 9911/10682 [1:24:31<06:20, 2.03it/s]
|
628 |
93%|ββββββββββ| 9912/10682 [1:24:32<06:19, 2.03it/s]
|
629 |
93%|ββββββββββ| 9913/10682 [1:24:32<06:19, 2.03it/s]
|
630 |
93%|ββββββββββ| 9914/10682 [1:24:33<06:19, 2.03it/s]
|
631 |
93%|ββββββββββ| 9915/10682 [1:24:33<06:18, 2.03it/s]
|
632 |
93%|ββββββββββ| 9916/10682 [1:24:34<06:17, 2.03it/s]
|
633 |
93%|ββββββββββ| 9917/10682 [1:24:34<06:17, 2.03it/s]
|
634 |
93%|ββββββββββ| 9918/10682 [1:24:35<06:16, 2.03it/s]
|
635 |
93%|ββββββββββ| 9919/10682 [1:24:35<06:15, 2.03it/s]
|
636 |
93%|ββββββββββ| 9920/10682 [1:24:36<06:15, 2.03it/s]
|
637 |
93%|ββββββββββ| 9921/10682 [1:24:36<06:14, 2.03it/s]
|
638 |
93%|ββββββββββ| 9922/10682 [1:24:37<06:14, 2.03it/s]
|
639 |
93%|ββββββββββ| 9923/10682 [1:24:37<06:13, 2.03it/s]
|
640 |
93%|ββββββββββ| 9924/10682 [1:24:38<06:12, 2.03it/s]
|
641 |
93%|ββββββββββ| 9925/10682 [1:24:38<06:22, 1.98it/s]{'loss': 2.89, 'grad_norm': 0.2640738785266876, 'learning_rate': 1.5222903086944684e-05, 'epoch': 13.0}
|
642 |
|
643 |
|
644 |
93%|ββββββββββ| 9925/10682 [1:24:38<06:22, 1.98it/s]
|
645 |
93%|ββββββββββ| 9926/10682 [1:24:50<50:36, 4.02s/it]
|
646 |
93%|ββββββββββ| 9927/10682 [1:24:51<37:14, 2.96s/it]
|
647 |
93%|ββββββββββ| 9928/10682 [1:24:51<27:53, 2.22s/it]
|
648 |
93%|ββββββββββ| 9929/10682 [1:24:52<21:23, 1.70s/it]
|
649 |
93%|ββββββββββ| 9930/10682 [1:24:52<16:48, 1.34s/it]
|
650 |
93%|ββββββββββ| 9931/10682 [1:24:53<13:35, 1.09s/it]
|
651 |
93%|ββββββββββ| 9932/10682 [1:24:53<11:20, 1.10it/s]
|
652 |
93%|ββββββββββ| 9933/10682 [1:24:54<09:49, 1.27it/s]
|
653 |
93%|ββββββββββ| 9934/10682 [1:24:54<08:43, 1.43it/s]
|
654 |
93%|ββββββββββ| 9935/10682 [1:24:55<07:56, 1.57it/s]
|
655 |
93%|ββββββββββ| 9936/10682 [1:24:55<07:23, 1.68it/s]
|
656 |
93%|ββββββββββ| 9937/10682 [1:24:56<07:00, 1.77it/s]
|
657 |
93%|ββββββββββ| 9938/10682 [1:24:56<06:44, 1.84it/s]
|
658 |
93%|ββββββββββ| 9939/10682 [1:24:57<06:32, 1.89it/s]
|
659 |
93%|ββββββββββ| 9940/10682 [1:24:57<06:24, 1.93it/s]
|
660 |
93%|ββββββββββ| 9941/10682 [1:24:58<06:18, 1.96it/s]
|
661 |
93%|ββββββββββ| 9942/10682 [1:24:58<06:13, 1.98it/s]
|
662 |
93%|ββββββββββ| 9943/10682 [1:24:59<06:10, 1.99it/s]
|
663 |
93%|ββββββββββ| 9944/10682 [1:24:59<06:08, 2.00it/s]
|
664 |
93%|ββββββββββ| 9945/10682 [1:25:00<06:06, 2.01it/s]
|
665 |
93%|ββββββββββ| 9946/10682 [1:25:00<06:04, 2.02it/s]
|
666 |
93%|ββββββββββ| 9947/10682 [1:25:01<06:03, 2.02it/s]
|
667 |
93%|ββββββββββ| 9948/10682 [1:25:01<06:02, 2.03it/s]
|
668 |
93%|ββββββββββ| 9949/10682 [1:25:02<06:02, 2.02it/s]
|
669 |
93%|ββββββββββ| 9950/10682 [1:25:02<06:01, 2.03it/s]{'loss': 2.8505, 'grad_norm': 0.2604374289512634, 'learning_rate': 1.4238751618640577e-05, 'epoch': 13.03}
|
|
|
670 |
|
671 |
93%|ββββββββββ| 9950/10682 [1:25:02<06:01, 2.03it/s]
|
672 |
93%|ββββββββββ| 9951/10682 [1:25:03<06:01, 2.02it/s]
|
673 |
+
|
674 |
|
675 |
93%|ββββββββββ| 9950/10682 [1:25:02<06:01, 2.03it/s]
|
676 |
93%|ββββββββββ| 9951/10682 [1:25:03<06:01, 2.02it/s]
|
677 |
93%|ββββββββββ| 9952/10682 [1:25:03<06:01, 2.02it/s]
|
678 |
93%|ββββββββββ| 9953/10682 [1:25:04<06:00, 2.02it/s]
|
679 |
93%|ββββββββββ| 9954/10682 [1:25:04<05:59, 2.02it/s]
|
680 |
93%|ββββββββββ| 9955/10682 [1:25:05<05:58, 2.03it/s]
|
681 |
93%|ββββββββββ| 9956/10682 [1:25:05<05:57, 2.03it/s]
|
682 |
93%|ββββββββββ| 9957/10682 [1:25:06<05:57, 2.03it/s]
|
683 |
93%|ββββββββββ| 9958/10682 [1:25:06<05:56, 2.03it/s]
|
684 |
93%|ββββββββββ| 9959/10682 [1:25:07<05:55, 2.03it/s]
|
685 |
93%|ββββββββββ| 9960/10682 [1:25:07<05:55, 2.03it/s]
|
686 |
93%|ββββββββββ| 9961/10682 [1:25:08<05:54, 2.03it/s]
|
687 |
93%|ββββββββββ| 9962/10682 [1:25:08<05:54, 2.03it/s]
|
688 |
93%|ββββββββββ| 9963/10682 [1:25:09<05:54, 2.03it/s]
|
689 |
93%|ββββββββββ| 9964/10682 [1:25:09<05:53, 2.03it/s]
|
690 |
93%|ββββββββββ| 9965/10682 [1:25:10<05:52, 2.03it/s]
|
691 |
93%|ββββββββββ| 9966/10682 [1:25:10<05:52, 2.03it/s]
|
692 |
93%|ββββββββββ| 9967/10682 [1:25:11<05:51, 2.03it/s]
|
693 |
93%|ββββββββββ| 9968/10682 [1:25:11<05:51, 2.03it/s]
|
694 |
93%|ββββββββββ| 9969/10682 [1:25:12<05:50, 2.03it/s]
|
695 |
93%|ββββββββββ| 9970/10682 [1:25:12<05:50, 2.03it/s]
|
696 |
93%|ββββββββββ| 9971/10682 [1:25:13<05:50, 2.03it/s]
|
697 |
93%|ββββββββββ| 9972/10682 [1:25:13<05:49, 2.03it/s]
|
698 |
93%|ββββββββββ| 9973/10682 [1:25:14<05:49, 2.03it/s]
|
699 |
93%|ββββββββββ| 9974/10682 [1:25:14<05:48, 2.03it/s]
|
700 |
93%|ββββββββββ| 9975/10682 [1:25:15<05:47, 2.03it/s]{'loss': 2.8519, 'grad_norm': 0.25846487283706665, 'learning_rate': 1.3287025325307511e-05, 'epoch': 13.06}
|
701 |
+
|
702 |
|
703 |
93%|ββββββββββ| 9975/10682 [1:25:15<05:47, 2.03it/s]
|
704 |
93%|ββββββββββ| 9976/10682 [1:25:15<05:47, 2.03it/s]
|
705 |
93%|ββββββββββ| 9977/10682 [1:25:16<05:47, 2.03it/s]
|
706 |
93%|ββββββββββ| 9978/10682 [1:25:16<05:46, 2.03it/s]
|
707 |
93%|ββββββββββ| 9979/10682 [1:25:17<05:46, 2.03it/s]
|
708 |
93%|ββββββββββ| 9980/10682 [1:25:17<05:45, 2.03it/s]
|
709 |
93%|ββββββββββ| 9981/10682 [1:25:18<05:44, 2.03it/s]
|
710 |
93%|ββββββββββ| 9982/10682 [1:25:18<05:44, 2.03it/s]
|
711 |
93%|ββββββββββ| 9983/10682 [1:25:19<05:44, 2.03it/s]
|
712 |
93%|ββββββββββ| 9984/10682 [1:25:19<05:44, 2.03it/s]
|
713 |
93%|ββββββββββ| 9985/10682 [1:25:20<05:43, 2.03it/s]
|
714 |
93%|ββββββββββ| 9986/10682 [1:25:20<05:43, 2.03it/s]
|
715 |
93%|ββββββββββ| 9987/10682 [1:25:21<05:42, 2.03it/s]
|
716 |
94%|ββββββββββ| 9988/10682 [1:25:21<05:41, 2.03it/s]
|
717 |
94%|ββββββββββ| 9989/10682 [1:25:22<05:41, 2.03it/s]
|
718 |
94%|ββββββββββ| 9990/10682 [1:25:22<05:40, 2.03it/s]
|
719 |
94%|ββββββββββ| 9991/10682 [1:25:22<05:40, 2.03it/s]
|
720 |
94%|ββββββββββ| 9992/10682 [1:25:23<05:39, 2.03it/s]
|
721 |
94%|ββββββββββ| 9993/10682 [1:25:23<05:38, 2.03it/s]
|
722 |
94%|βββββββοΏ½οΏ½ββ| 9994/10682 [1:25:24<05:38, 2.03it/s]
|
723 |
94%|ββββββββββ| 9995/10682 [1:25:24<05:37, 2.03it/s]
|
724 |
94%|ββββββββββ| 9996/10682 [1:25:25<05:37, 2.03it/s]
|
725 |
94%|ββββββββββ| 9997/10682 [1:25:25<05:37, 2.03it/s]
|
726 |
94%|ββββββββββ| 9998/10682 [1:25:26<05:36, 2.03it/s]
|
727 |
94%|ββββββββββ| 9999/10682 [1:25:26<05:35, 2.03it/s]
|
728 |
94%|ββββββββββ| 10000/10682 [1:25:27<05:35, 2.03it/s]{'loss': 2.8577, 'grad_norm': 0.26245424151420593, 'learning_rate': 1.2367787735873993e-05, 'epoch': 13.1}
|
729 |
+
|
730 |
|
731 |
94%|ββββββββββ| 10000/10682 [1:25:27<05:35, 2.03it/s]
|
732 |
94%|ββββββββββ| 10001/10682 [1:25:27<05:35, 2.03it/s]
|
733 |
94%|ββββββββββ| 10002/10682 [1:25:28<05:36, 2.02it/s]
|
734 |
94%|ββββββββββ| 10003/10682 [1:25:28<05:35, 2.02it/s]
|
735 |
94%|ββββββββββ| 10004/10682 [1:25:29<05:35, 2.02it/s]
|
736 |
94%|ββββββββββ| 10005/10682 [1:25:29<05:33, 2.03it/s]
|
737 |
94%|ββββββββββ| 10006/10682 [1:25:30<05:33, 2.03it/s]
|
738 |
94%|ββββββββββ| 10007/10682 [1:25:30<05:32, 2.03it/s]
|
739 |
94%|ββββββββββ| 10008/10682 [1:25:31<05:32, 2.03it/s]
|
740 |
94%|ββββββββββ| 10009/10682 [1:25:31<05:31, 2.03it/s]
|
741 |
94%|ββββββββββ| 10010/10682 [1:25:32<05:30, 2.03it/s]
|
742 |
94%|ββββββββββ| 10011/10682 [1:25:32<05:30, 2.03it/s]
|
743 |
94%|ββββββββββ| 10012/10682 [1:25:33<05:29, 2.03it/s]
|
744 |
94%|ββββββββββ| 10013/10682 [1:25:33<05:29, 2.03it/s]
|
745 |
94%|ββββββββββ| 10014/10682 [1:25:34<05:28, 2.03it/s]
|
746 |
94%|ββββββββββ| 10015/10682 [1:25:34<05:28, 2.03it/s]
|
747 |
94%|ββββββββββ| 10016/10682 [1:25:35<05:27, 2.03it/s]
|
748 |
94%|ββββββββββ| 10017/10682 [1:25:35<05:27, 2.03it/s]
|
749 |
94%|ββββββββββ| 10018/10682 [1:25:36<05:26, 2.03it/s]
|
750 |
94%|ββββββββββ| 10019/10682 [1:25:36<05:26, 2.03it/s]
|
751 |
94%|ββββββββββ| 10020/10682 [1:25:37<05:25, 2.03it/s]
|
752 |
94%|ββββββββββ| 10021/10682 [1:25:37<05:25, 2.03it/s]
|
753 |
94%|ββββββββββ| 10022/10682 [1:25:38<05:25, 2.03it/s]
|
754 |
94%|ββββββββββ| 10023/10682 [1:25:38<05:24, 2.03it/s]
|
755 |
94%|ββββββββββ| 10024/10682 [1:25:39<05:24, 2.03it/s]
|
756 |
94%|ββββββββββ| 10025/10682 [1:25:39<05:24, 2.03it/s]{'loss': 2.8525, 'grad_norm': 0.2591426372528076, 'learning_rate': 1.1481100210606388e-05, 'epoch': 13.13}
|
757 |
+
|
758 |
|
759 |
94%|ββββββββββ| 10025/10682 [1:25:39<05:24, 2.03it/s]
|
760 |
94%|ββββββββββ| 10026/10682 [1:25:40<05:23, 2.03it/s]
|
761 |
94%|ββββββββββ| 10027/10682 [1:25:40<05:22, 2.03it/s]
|
762 |
94%|ββββββββββ| 10028/10682 [1:25:41<05:22, 2.03it/s]
|
763 |
94%|ββββββββββ| 10029/10682 [1:25:41<05:21, 2.03it/s]
|
764 |
94%|ββββββββββ| 10030/10682 [1:25:42<05:21, 2.03it/s]
|
765 |
94%|ββββββββββ| 10031/10682 [1:25:42<05:20, 2.03it/s]
|
766 |
94%|ββββββββββ| 10032/10682 [1:25:43<05:20, 2.03it/s]
|
767 |
94%|ββββββββββ| 10033/10682 [1:25:43<05:19, 2.03it/s]
|
768 |
94%|ββββββββββ| 10034/10682 [1:25:44<05:18, 2.03it/s]
|
769 |
94%|ββββββββββ| 10035/10682 [1:25:44<05:18, 2.03it/s]
|
770 |
94%|ββββββββββ| 10036/10682 [1:25:45<05:18, 2.03it/s]
|
771 |
94%|ββββββββββ| 10037/10682 [1:25:45<05:17, 2.03it/s]
|
772 |
94%|ββββββββββ| 10038/10682 [1:25:46<05:17, 2.03it/s]
|
773 |
94%|ββββββββββ| 10039/10682 [1:25:46<05:16, 2.03it/s]
|
774 |
94%|ββββββββββ| 10040/10682 [1:25:47<05:16, 2.03it/s]
|
775 |
94%|ββββββββββ| 10041/10682 [1:25:47<05:15, 2.03it/s]
|
776 |
94%|ββββββββββ| 10042/10682 [1:25:48<05:15, 2.03it/s]
|
777 |
94%|ββββββββββ| 10043/10682 [1:25:48<05:14, 2.03it/s]
|
778 |
94%|ββββββββββ| 10044/10682 [1:25:49<05:14, 2.03it/s]
|
779 |
94%|ββββββββββ| 10045/10682 [1:25:49<05:13, 2.03it/s]
|
780 |
94%|ββββββββββ| 10046/10682 [1:25:50<05:13, 2.03it/s]
|
781 |
94%|ββββββββββ| 10047/10682 [1:25:50<05:12, 2.03it/s]
|
782 |
94%|ββββββββββ| 10048/10682 [1:25:51<05:12, 2.03it/s]
|
783 |
94%|ββββββββββ| 10049/10682 [1:25:51<05:11, 2.03it/s]
|
784 |
94%|ββββββββββ| 10050/10682 [1:25:52<05:11, 2.03it/s]{'loss': 2.8568, 'grad_norm': 0.25984886288642883, 'learning_rate': 1.0627021937013704e-05, 'epoch': 13.16}
|
785 |
+
|
786 |
|
787 |
94%|ββββββββββ| 10050/10682 [1:25:52<05:11, 2.03it/s]
|
788 |
94%|ββββββββββ| 10051/10682 [1:25:52<05:11, 2.03it/s]
|
789 |
94%|βββββοΏ½οΏ½ββββ| 10052/10682 [1:25:53<05:10, 2.03it/s]
|
790 |
94%|ββββββββββ| 10053/10682 [1:25:53<05:10, 2.02it/s]
|
791 |
94%|ββββββββββ| 10054/10682 [1:25:54<05:10, 2.02it/s]
|
792 |
94%|ββββββββββ| 10055/10682 [1:25:54<05:09, 2.02it/s]
|
793 |
94%|ββββββββββ| 10056/10682 [1:25:55<05:09, 2.03it/s]
|
794 |
94%|ββββββββββ| 10057/10682 [1:25:55<05:08, 2.03it/s]
|
795 |
94%|ββββββββββ| 10058/10682 [1:25:56<05:07, 2.03it/s]
|
796 |
94%|ββββββββββ| 10059/10682 [1:25:56<05:07, 2.03it/s]
|
797 |
94%|ββββββββββ| 10060/10682 [1:25:56<05:06, 2.03it/s]
|
798 |
94%|ββββββββββ| 10061/10682 [1:25:57<05:06, 2.02it/s]
|
799 |
94%|ββββββββββ| 10062/10682 [1:25:57<05:06, 2.03it/s]
|
800 |
94%|ββββββββββ| 10063/10682 [1:25:58<05:05, 2.03it/s]
|
801 |
94%|ββββββββββ| 10064/10682 [1:25:58<05:04, 2.03it/s]
|
802 |
94%|ββββββββββ| 10065/10682 [1:25:59<05:04, 2.03it/s]
|
803 |
94%|ββββββββββ| 10066/10682 [1:25:59<05:03, 2.03it/s]
|
804 |
94%|ββββββββββ| 10067/10682 [1:26:00<05:03, 2.03it/s]
|
805 |
94%|ββββββββββ| 10068/10682 [1:26:00<05:02, 2.03it/s]
|
806 |
94%|ββββββββββ| 10069/10682 [1:26:01<05:02, 2.03it/s]
|
807 |
94%|ββββββββββ| 10070/10682 [1:26:01<05:01, 2.03it/s]
|
808 |
94%|ββββββββββ| 10071/10682 [1:26:02<05:00, 2.03it/s]
|
809 |
94%|ββββββββββ| 10072/10682 [1:26:02<05:00, 2.03it/s]
|
810 |
94%|ββββββββββ| 10073/10682 [1:26:03<05:00, 2.03it/s]
|
811 |
94%|ββββββββββ| 10074/10682 [1:26:03<04:59, 2.03it/s]
|
812 |
94%|ββββββββββ| 10075/10682 [1:26:04<04:59, 2.03it/s]{'loss': 2.8561, 'grad_norm': 0.2601851522922516, 'learning_rate': 9.805609925895964e-06, 'epoch': 13.2}
|
813 |
+
|
814 |
|
815 |
94%|ββββββββββ| 10075/10682 [1:26:04<04:59, 2.03it/s]
|
816 |
94%|ββββββββββ| 10076/10682 [1:26:04<04:58, 2.03it/s]
|
817 |
94%|ββββββββββ| 10077/10682 [1:26:05<04:58, 2.03it/s]
|
818 |
94%|ββββββββββ| 10078/10682 [1:26:05<04:57, 2.03it/s]
|
819 |
94%|ββββββββββ| 10079/10682 [1:26:06<04:57, 2.03it/s]
|
820 |
94%|ββββββββββ| 10080/10682 [1:26:06<04:56, 2.03it/s]
|
821 |
94%|ββββββββββ| 10081/10682 [1:26:07<04:56, 2.03it/s]
|
822 |
94%|ββββββββββ| 10082/10682 [1:26:07<04:55, 2.03it/s]
|
823 |
94%|ββββββββββ| 10083/10682 [1:26:08<04:55, 2.03it/s]
|
824 |
94%|ββββββββββ| 10084/10682 [1:26:08<04:54, 2.03it/s]
|
825 |
94%|ββββββββββ| 10085/10682 [1:26:09<04:54, 2.03it/s]
|
826 |
94%|ββββββββββ| 10086/10682 [1:26:09<04:53, 2.03it/s]
|
827 |
94%|ββββββββββ| 10087/10682 [1:26:10<04:53, 2.03it/s]
|
828 |
94%|ββββββββββ| 10088/10682 [1:26:10<04:52, 2.03it/s]
|
829 |
94%|ββββββββββ| 10089/10682 [1:26:11<04:52, 2.03it/s]
|
830 |
94%|ββββββββββ| 10090/10682 [1:26:11<04:51, 2.03it/s]
|
831 |
94%|ββββββββββ| 10091/10682 [1:26:12<04:50, 2.03it/s]
|
832 |
94%|ββββββββββ| 10092/10682 [1:26:12<04:50, 2.03it/s]
|
833 |
94%|ββββββββββ| 10093/10682 [1:26:13<04:49, 2.03it/s]
|
834 |
94%|ββββββββββ| 10094/10682 [1:26:13<04:49, 2.03it/s]
|
835 |
95%|ββββββββββ| 10095/10682 [1:26:14<04:49, 2.03it/s]
|
836 |
95%|ββββββββββ| 10096/10682 [1:26:14<04:48, 2.03it/s]
|
837 |
95%|ββββββββββ| 10097/10682 [1:26:15<04:48, 2.03it/s]
|
838 |
95%|ββββββββββ| 10098/10682 [1:26:15<04:47, 2.03it/s]
|
839 |
95%|ββββββββββ| 10099/10682 [1:26:16<04:47, 2.03it/s]
|
840 |
95%|ββββββββββ| 10100/10682 [1:26:16<04:46, 2.03it/s]{'loss': 2.8578, 'grad_norm': 0.2591904401779175, 'learning_rate': 9.01691900753926e-06, 'epoch': 13.23}
|
841 |
|
842 |
+
|
843 |
95%|ββββββββββ| 10100/10682 [1:26:16<04:46, 2.03it/s]
|
844 |
95%|ββββββββββ| 10101/10682 [1:26:17<04:46, 2.03it/s]
|
845 |
95%|ββββββββββ| 10102/10682 [1:26:17<04:45, 2.03it/s]
|
846 |
95%|ββββββββββ| 10103/10682 [1:26:18<04:45, 2.03it/s]
|
847 |
95%|ββββββββββ| 10104/10682 [1:26:18<04:44, 2.03it/s]
|
848 |
95%|ββββββββββ| 10105/10682 [1:26:19<04:44, 2.03it/s]
|
849 |
95%|ββββββββββ| 10106/10682 [1:26:19<04:43, 2.03it/s]
|
850 |
95%|ββββββββββ| 10107/10682 [1:26:20<04:43, 2.03it/s]
|
851 |
95%|ββββββββββ| 10108/10682 [1:26:20<04:42, 2.03it/s]
|
852 |
95%|ββββββββββ| 10109/10682 [1:26:21<04:42, 2.03it/s]
|
853 |
95%|ββββββββββ| 10110/10682 [1:26:21<04:41, 2.03it/s]
|
854 |
95%|ββββββββββ| 10111/10682 [1:26:22<04:41, 2.03it/s]
|
855 |
95%|ββββββββββ| 10112/10682 [1:26:22<04:40, 2.03it/s]
|
856 |
95%|βββββββοΏ½οΏ½ββ| 10113/10682 [1:26:23<04:40, 2.03it/s]
|
857 |
95%|ββββββββββ| 10114/10682 [1:26:23<04:39, 2.03it/s]
|
858 |
95%|ββββββββββ| 10115/10682 [1:26:24<04:39, 2.03it/s]
|
859 |
95%|ββββββββββ| 10116/10682 [1:26:24<04:38, 2.03it/s]
|
860 |
95%|ββββββββββ| 10117/10682 [1:26:25<04:38, 2.03it/s]
|
861 |
95%|ββββββββββ| 10118/10682 [1:26:25<04:37, 2.03it/s]
|
862 |
95%|ββββββββββ| 10119/10682 [1:26:26<04:37, 2.03it/s]
|
863 |
95%|ββββββββββ| 10120/10682 [1:26:26<04:36, 2.03it/s]
|
864 |
95%|ββββββββββ| 10121/10682 [1:26:27<04:36, 2.03it/s]
|
865 |
95%|ββββββββββ| 10122/10682 [1:26:27<04:36, 2.03it/s]
|
866 |
95%|ββββββββββ| 10123/10682 [1:26:28<04:35, 2.03it/s]
|
867 |
95%|ββββββββββ| 10124/10682 [1:26:28<04:35, 2.03it/s]
|
868 |
95%|ββββββββββ| 10125/10682 [1:26:29<04:34, 2.03it/s]{'loss': 2.8677, 'grad_norm': 0.2595525085926056, 'learning_rate': 8.261001828055447e-06, 'epoch': 13.26}
|
869 |
|
870 |
+
|
871 |
95%|ββββββββββ| 10125/10682 [1:26:29<04:34, 2.03it/s]
|
872 |
95%|ββββββββββ| 10126/10682 [1:26:29<04:34, 2.03it/s]
|
873 |
95%|ββββββββββ| 10127/10682 [1:26:30<04:33, 2.03it/s]
|
874 |
95%|ββββββββββ| 10128/10682 [1:26:30<04:33, 2.03it/s]
|
875 |
95%|ββββββββββ| 10129/10682 [1:26:30<04:32, 2.03it/s]
|
876 |
95%|ββββββββββ| 10130/10682 [1:26:31<04:32, 2.03it/s]
|
877 |
95%|ββββββββββ| 10131/10682 [1:26:31<04:32, 2.03it/s]
|
878 |
95%|ββββββββββ| 10132/10682 [1:26:32<04:31, 2.03it/s]
|
879 |
95%|ββββββββββ| 10133/10682 [1:26:32<04:30, 2.03it/s]
|
880 |
95%|ββββββββββ| 10134/10682 [1:26:33<04:30, 2.03it/s]
|
881 |
95%|ββββββββββ| 10135/10682 [1:26:33<04:29, 2.03it/s]
|
882 |
95%|ββββββββββ| 10136/10682 [1:26:34<04:28, 2.03it/s]
|
883 |
95%|ββββββββββ| 10137/10682 [1:26:34<04:28, 2.03it/s]
|
884 |
95%|ββββββββββ| 10138/10682 [1:26:35<04:27, 2.03it/s]
|
885 |
95%|ββββββββββ| 10139/10682 [1:26:35<04:27, 2.03it/s]
|
886 |
95%|ββββββββββ| 10140/10682 [1:26:36<04:26, 2.03it/s]
|
887 |
95%|ββββββββββ| 10141/10682 [1:26:36<04:26, 2.03it/s]
|
888 |
95%|ββββββββββ| 10142/10682 [1:26:37<04:25, 2.03it/s]
|
889 |
95%|ββββββββββ| 10143/10682 [1:26:37<04:25, 2.03it/s]
|
890 |
95%|ββββββββββ| 10144/10682 [1:26:38<04:25, 2.03it/s]
|
891 |
95%|ββββββββββ| 10145/10682 [1:26:38<04:24, 2.03it/s]
|
892 |
95%|ββββββββββ| 10146/10682 [1:26:39<04:23, 2.03it/s]
|
893 |
95%|ββββββββββ| 10147/10682 [1:26:39<04:23, 2.03it/s]
|
894 |
95%|ββββββββββ| 10148/10682 [1:26:40<04:45, 1.87it/s]
|
895 |
95%|ββββββββββ| 10149/10682 [1:26:40<04:38, 1.91it/s]
|
896 |
95%|ββββββββββ| 10150/10682 [1:26:41<04:32, 1.95it/s]{'loss': 2.8606, 'grad_norm': 0.26168325543403625, 'learning_rate': 7.537908845868024e-06, 'epoch': 13.29}
|
897 |
+
|
898 |
|
899 |
95%|ββββββββββ| 10150/10682 [1:26:41<04:32, 1.95it/s]
|
900 |
95%|ββββββββββ| 10151/10682 [1:26:41<04:29, 1.97it/s]
|
901 |
95%|ββββββββββ| 10152/10682 [1:26:42<04:26, 1.99it/s]
|
902 |
95%|ββββββββββ| 10153/10682 [1:26:42<04:24, 2.00it/s]
|
903 |
95%|ββββββββββ| 10154/10682 [1:26:43<04:23, 2.01it/s]
|
904 |
95%|ββββββββββ| 10155/10682 [1:26:43<04:21, 2.02it/s]
|
905 |
95%|ββββββββββ| 10156/10682 [1:26:44<04:20, 2.02it/s]
|
906 |
95%|ββββββββββ| 10157/10682 [1:26:44<04:19, 2.03it/s]
|
907 |
95%|ββββββββββ| 10158/10682 [1:26:45<04:18, 2.03it/s]
|
908 |
95%|ββββββββββ| 10159/10682 [1:26:45<04:17, 2.03it/s]
|
909 |
95%|ββββββββββ| 10160/10682 [1:26:46<04:16, 2.03it/s]
|
910 |
95%|ββββββββββ| 10161/10682 [1:26:46<04:16, 2.03it/s]
|
911 |
95%|ββββββββββ| 10162/10682 [1:26:47<04:15, 2.03it/s]
|
912 |
95%|ββββββββββ| 10163/10682 [1:26:47<04:15, 2.03it/s]
|
913 |
95%|ββββββββββ| 10164/10682 [1:26:48<04:14, 2.03it/s]
|
914 |
95%|ββββββββββ| 10165/10682 [1:26:48<04:14, 2.03it/s]
|
915 |
95%|ββββββββββ| 10166/10682 [1:26:49<04:13, 2.03it/s]
|
916 |
95%|ββββββββββ| 10167/10682 [1:26:49<04:13, 2.03it/s]
|
917 |
95%|ββββββββββ| 10168/10682 [1:26:50<04:12, 2.03it/s]
|
918 |
95%|ββββββββββ| 10169/10682 [1:26:50<04:12, 2.03it/s]
|
919 |
95%|ββββββββββ| 10170/10682 [1:26:51<04:11, 2.03it/s]
|
920 |
95%|ββββββββββ| 10171/10682 [1:26:51<04:11, 2.03it/s]
|
921 |
95%|ββββββββββ| 10172/10682 [1:26:52<04:11, 2.03it/s]
|
922 |
95%|ββββββββββ| 10173/10682 [1:26:52<04:10, 2.03it/s]
|
923 |
95%|ββββββββοΏ½οΏ½β| 10174/10682 [1:26:53<04:10, 2.03it/s]
|
924 |
95%|ββββββββββ| 10175/10682 [1:26:53<04:09, 2.03it/s]{'loss': 2.8653, 'grad_norm': 0.26603227853775024, 'learning_rate': 6.847688328344037e-06, 'epoch': 13.33}
|
925 |
+
|
926 |
|
927 |
95%|ββββββββββ| 10175/10682 [1:26:53<04:09, 2.03it/s]
|
928 |
95%|ββββββββββ| 10176/10682 [1:26:54<04:10, 2.02it/s]
|
929 |
95%|ββββββββββ| 10177/10682 [1:26:54<04:09, 2.03it/s]
|
930 |
95%|ββββββββββ| 10178/10682 [1:26:55<04:09, 2.02it/s]
|
931 |
95%|ββββββββββ| 10179/10682 [1:26:55<04:08, 2.03it/s]
|
932 |
95%|ββββββββββ| 10180/10682 [1:26:56<04:07, 2.03it/s]
|
933 |
95%|ββββββββββ| 10181/10682 [1:26:56<04:06, 2.03it/s]
|
934 |
95%|ββββββββββ| 10182/10682 [1:26:57<04:06, 2.03it/s]
|
935 |
95%|ββββββββββ| 10183/10682 [1:26:57<04:05, 2.03it/s]
|
936 |
95%|ββββββββββ| 10184/10682 [1:26:58<04:05, 2.03it/s]
|
937 |
95%|ββββββββββ| 10185/10682 [1:26:58<04:04, 2.03it/s]
|
938 |
95%|ββββββββββ| 10186/10682 [1:26:59<04:03, 2.03it/s]
|
939 |
95%|ββββββββββ| 10187/10682 [1:26:59<04:03, 2.03it/s]
|
940 |
95%|ββββββββββ| 10188/10682 [1:27:00<04:03, 2.03it/s]
|
941 |
95%|ββββββββββ| 10189/10682 [1:27:00<04:02, 2.03it/s]
|
942 |
95%|ββββββββββ| 10190/10682 [1:27:01<04:02, 2.03it/s]
|
943 |
95%|ββββββββββ| 10191/10682 [1:27:01<04:01, 2.03it/s]
|
944 |
95%|ββββββββββ| 10192/10682 [1:27:02<04:01, 2.03it/s]
|
945 |
95%|ββββββββββ| 10193/10682 [1:27:02<04:00, 2.03it/s]
|
946 |
95%|ββββββββββ| 10194/10682 [1:27:03<04:00, 2.03it/s]
|
947 |
95%|ββββββββββ| 10195/10682 [1:27:03<04:00, 2.03it/s]
|
948 |
95%|ββββββββββ| 10196/10682 [1:27:04<03:59, 2.03it/s]
|
949 |
95%|ββββββββββ| 10197/10682 [1:27:04<03:59, 2.03it/s]
|
950 |
95%|ββββββββββ| 10198/10682 [1:27:05<03:58, 2.03it/s]
|
951 |
95%|ββββββββββ| 10199/10682 [1:27:05<03:57, 2.03it/s]
|
952 |
95%|ββββββββββ| 10200/10682 [1:27:06<03:57, 2.03it/s]{'loss': 2.8659, 'grad_norm': 0.2612241208553314, 'learning_rate': 6.190386348572108e-06, 'epoch': 13.36}
|
953 |
+
|
954 |
|
955 |
95%|ββββββββββ| 10200/10682 [1:27:06<03:57, 2.03it/s]
|
956 |
95%|ββββββββββ| 10201/10682 [1:27:06<03:57, 2.03it/s]
|
957 |
96%|ββββββββββ| 10202/10682 [1:27:07<03:56, 2.03it/s]
|
958 |
96%|ββββββββββ| 10203/10682 [1:27:07<03:56, 2.03it/s]
|
959 |
96%|ββββββββββ| 10204/10682 [1:27:08<03:55, 2.03it/s]
|
960 |
96%|ββββββββββ| 10205/10682 [1:27:08<03:55, 2.03it/s]
|
961 |
96%|ββββββββββ| 10206/10682 [1:27:09<03:54, 2.03it/s]
|
962 |
96%|ββββββββββ| 10207/10682 [1:27:09<03:54, 2.03it/s]
|
963 |
96%|ββββββββββ| 10208/10682 [1:27:10<03:53, 2.03it/s]
|
964 |
96%|ββββββββββ| 10209/10682 [1:27:10<03:52, 2.03it/s]
|
965 |
96%|ββββββββββ| 10210/10682 [1:27:11<03:52, 2.03it/s]
|
966 |
96%|ββββββββββ| 10211/10682 [1:27:11<03:51, 2.03it/s]
|
967 |
96%|ββββββββββ| 10212/10682 [1:27:12<03:51, 2.03it/s]
|
968 |
96%|ββββββββββ| 10213/10682 [1:27:12<03:51, 2.03it/s]
|
969 |
96%|ββββββββββ| 10214/10682 [1:27:12<03:50, 2.03it/s]
|
970 |
96%|ββββββββββ| 10215/10682 [1:27:13<03:50, 2.03it/s]
|
971 |
96%|ββββββββββ| 10216/10682 [1:27:13<03:49, 2.03it/s]
|
972 |
96%|ββββββββββ| 10217/10682 [1:27:14<03:49, 2.03it/s]
|
973 |
96%|ββββββββββ| 10218/10682 [1:27:14<03:48, 2.03it/s]
|
974 |
96%|ββββββββββ| 10219/10682 [1:27:15<03:48, 2.03it/s]
|
975 |
96%|ββββββββββ| 10220/10682 [1:27:15<03:47, 2.03it/s]
|
976 |
96%|ββββββββββ| 10221/10682 [1:27:16<03:46, 2.03it/s]
|
977 |
96%|ββββββββββ| 10222/10682 [1:27:16<03:46, 2.03it/s]
|
978 |
96%|ββββββββββ| 10223/10682 [1:27:17<03:45, 2.03it/s]
|
979 |
96%|ββββββββββ| 10224/10682 [1:27:17<03:45, 2.03it/s]
|
980 |
96%|ββββββββββ| 10225/10682 [1:27:18<03:45, 2.03it/s]{'loss': 2.8655, 'grad_norm': 0.2632509469985962, 'learning_rate': 5.56604678228706e-06, 'epoch': 13.39}
|
981 |
+
|
982 |
|
983 |
96%|ββββββββββ| 10225/10682 [1:27:18<03:45, 2.03it/s]
|
984 |
96%|ββββββββββ| 10226/10682 [1:27:18<03:44, 2.03it/s]
|
985 |
96%|ββββββββββ| 10227/10682 [1:27:19<03:44, 2.03it/s]
|
986 |
96%|ββββββββββ| 10228/10682 [1:27:19<03:43, 2.03it/s]
|
987 |
96%|ββββββββββ| 10229/10682 [1:27:20<03:43, 2.03it/s]
|
988 |
96%|ββββββββββ| 10230/10682 [1:27:20<03:42, 2.03it/s]
|
989 |
96%|ββββββββββ| 10231/10682 [1:27:21<03:42, 2.03it/s]
|
990 |
96%|ββββββββββ| 10232/10682 [1:27:21<03:41, 2.03it/s]
|
991 |
96%|ββββββββββ| 10233/10682 [1:27:22<03:41, 2.03it/s]
|
992 |
96%|ββββββββββ| 10234/10682 [1:27:22<03:40, 2.03it/s]
|
993 |
96%|ββββββββββ| 10235/10682 [1:27:23<03:40, 2.03it/s]
|
994 |
96%|ββββββββββ| 10236/10682 [1:27:23<03:39, 2.03it/s]
|
995 |
96%|ββββββββββ| 10237/10682 [1:27:24<03:39, 2.03it/s]
|
996 |
96%|ββββββββββ| 10238/10682 [1:27:24<03:38, 2.03it/s]
|
997 |
96%|ββββββββββ| 10239/10682 [1:27:25<03:38, 2.03it/s]
|
998 |
96%|ββββββββββ| 10240/10682 [1:27:25<03:37, 2.03it/s]
|
999 |
96%|ββββββββββ| 10241/10682 [1:27:26<03:37, 2.03it/s]
|
1000 |
96%|ββββββββββ| 10242/10682 [1:27:26<03:36, 2.03it/s]
|
1001 |
96%|ββββββββββ| 10243/10682 [1:27:27<03:36, 2.03it/s]
|
1002 |
96%|ββββββββββ| 10244/10682 [1:27:27<03:35, 2.03it/s]
|
1003 |
96%|ββββββββββ| 10245/10682 [1:27:28<03:35, 2.03it/s]
|
1004 |
96%|ββββββββββ| 10246/10682 [1:27:28<03:34, 2.03it/s]
|
1005 |
96%|ββββββββββ| 10247/10682 [1:27:29<03:34, 2.03it/s]
|
1006 |
96%|ββββββββββ| 10248/10682 [1:27:29<03:33, 2.03it/s]
|
1007 |
96%|ββββββββββ| 10249/10682 [1:27:30<03:33, 2.03it/s]
|
1008 |
96%|ββββββββββ| 10250/10682 [1:27:30<03:32, 2.03it/s]{'loss': 2.8679, 'grad_norm': 0.262270987033844, 'learning_rate': 4.974711304941093e-06, 'epoch': 13.43}
|
1009 |
+
|
1010 |
|
1011 |
96%|ββββββββββ| 10250/10682 [1:27:30<03:32, 2.03it/s]
|
1012 |
96%|ββββββββββ| 10251/10682 [1:27:31<03:32, 2.03it/s]
|
1013 |
96%|ββββββββββ| 10252/10682 [1:27:31<03:31, 2.03it/s]
|
1014 |
96%|ββββββββββ| 10253/10682 [1:27:32<03:50, 1.86it/s]
|
1015 |
96%|ββββββββββ| 10254/10682 [1:27:32<03:44, 1.91it/s]
|
1016 |
96%|ββββββββββ| 10255/10682 [1:27:33<03:39, 1.94it/s]
|
1017 |
96%|ββββββββββ| 10256/10682 [1:27:33<03:35, 1.97it/s]
|
1018 |
96%|ββββββββββ| 10257/10682 [1:27:34<03:33, 1.99it/s]
|
1019 |
96%|ββββββββββ| 10258/10682 [1:27:34<03:31, 2.00it/s]
|
1020 |
96%|ββββββββββ| 10259/10682 [1:27:35<03:30, 2.01it/s]
|
1021 |
96%|ββββββββββ| 10260/10682 [1:27:35<03:29, 2.01it/s]
|
1022 |
96%|ββββββββββ| 10261/10682 [1:27:36<03:28, 2.02it/s]
|
1023 |
96%|ββββββββββ| 10262/10682 [1:27:36<03:27, 2.02it/s]
|
1024 |
96%|ββββββββββ| 10263/10682 [1:27:37<03:26, 2.03it/s]
|
1025 |
96%|ββββββββββ| 10264/10682 [1:27:37<03:26, 2.03it/s]
|
1026 |
96%|ββββββββββ| 10265/10682 [1:27:38<03:25, 2.03it/s]
|
1027 |
96%|ββββββββββ| 10266/10682 [1:27:38<03:24, 2.03it/s]
|
1028 |
96%|ββββββββββ| 10267/10682 [1:27:39<03:24, 2.03it/s]
|
1029 |
96%|ββββββββββ| 10268/10682 [1:27:39<03:23, 2.03it/s]
|
1030 |
96%|ββββββββββ| 10269/10682 [1:27:40<03:23, 2.03it/s]
|
1031 |
96%|ββββββββββ| 10270/10682 [1:27:40<03:22, 2.03it/s]
|
1032 |
96%|ββββββββββ| 10271/10682 [1:27:41<03:22, 2.03it/s]
|
1033 |
96%|ββββββββββ| 10272/10682 [1:27:41<03:21, 2.03it/s]
|
1034 |
96%|ββββββββββ| 10273/10682 [1:27:42<03:21, 2.03it/s]
|
1035 |
96%|ββββββββββ| 10274/10682 [1:27:42<03:21, 2.03it/s]
|
1036 |
96%|ββββββββββ| 10275/10682 [1:27:43<03:20, 2.03it/s]{'loss': 2.8735, 'grad_norm': 0.2618483603000641, 'learning_rate': 4.416419388921844e-06, 'epoch': 13.46}
|
1037 |
+
|
1038 |
|
1039 |
96%|ββββββββββ| 10275/10682 [1:27:43<03:20, 2.03it/s]
|
1040 |
96%|ββββββββββ| 10276/10682 [1:27:43<03:20, 2.03it/s]
|
1041 |
96%|ββββββββββ| 10277/10682 [1:27:44<03:19, 2.03it/s]
|
1042 |
96%|ββββββββββ| 10278/10682 [1:27:44<03:18, 2.03it/s]
|
1043 |
96%|ββββββββββ| 10279/10682 [1:27:45<03:18, 2.03it/s]
|
1044 |
96%|ββββββββββ| 10280/10682 [1:27:45<03:17, 2.03it/s]
|
1045 |
96%|ββββββββββ| 10281/10682 [1:27:46<03:17, 2.03it/s]
|
1046 |
96%|ββββββββββ| 10282/10682 [1:27:46<03:16, 2.03it/s]
|
1047 |
96%|ββββββββββ| 10283/10682 [1:27:47<03:16, 2.03it/s]
|
1048 |
96%|ββββββββββ| 10284/10682 [1:27:47<03:16, 2.03it/s]
|
1049 |
96%|ββββββββββ| 10285/10682 [1:27:48<03:15, 2.03it/s]
|
1050 |
96%|ββββββββββ| 10286/10682 [1:27:48<03:15, 2.03it/s]
|
1051 |
96%|ββββββββββ| 10287/10682 [1:27:49<03:14, 2.03it/s]
|
1052 |
96%|ββββββββββ| 10288/10682 [1:27:49<03:14, 2.03it/s]
|
1053 |
96%|ββββββββββ| 10289/10682 [1:27:50<03:13, 2.03it/s]
|
1054 |
96%|ββββββββββ| 10290/10682 [1:27:50<03:13, 2.03it/s]
|
1055 |
96%|ββββββββββ| 10291/10682 [1:27:51<03:12, 2.03it/s]
|
1056 |
96%|ββββββββββ| 10292/10682 [1:27:51<03:12, 2.03it/s]
|
1057 |
96%|βββββββοΏ½οΏ½ββ| 10293/10682 [1:27:52<03:11, 2.03it/s]
|
1058 |
96%|ββββββββββ| 10294/10682 [1:27:52<03:11, 2.03it/s]
|
1059 |
96%|ββββββββββ| 10295/10682 [1:27:53<03:10, 2.03it/s]
|
1060 |
96%|ββββββββββ| 10296/10682 [1:27:53<03:10, 2.03it/s]
|
1061 |
96%|ββββββββββ| 10297/10682 [1:27:54<03:09, 2.03it/s]
|
1062 |
96%|ββββββββββ| 10298/10682 [1:27:54<03:08, 2.03it/s]
|
1063 |
96%|ββββββββββ| 10299/10682 [1:27:55<03:08, 2.03it/s]
|
1064 |
96%|ββββββββββ| 10300/10682 [1:27:55<03:08, 2.03it/s]
|
1065 |
|
1066 |
+
|
1067 |
96%|ββββββββββ| 10300/10682 [1:27:55<03:08, 2.03it/s]
|
1068 |
96%|ββββββββββ| 10301/10682 [1:27:55<03:08, 2.03it/s]
|
1069 |
96%|ββββββββββ| 10302/10682 [1:27:56<03:07, 2.03it/s]
|
1070 |
96%|ββββββββββ| 10303/10682 [1:27:56<03:06, 2.03it/s]
|
1071 |
96%|ββββββββββ| 10304/10682 [1:27:57<03:06, 2.03it/s]
|
1072 |
96%|ββββββββββ| 10305/10682 [1:27:57<03:05, 2.03it/s]
|
1073 |
96%|ββββββββββ| 10306/10682 [1:27:58<03:05, 2.03it/s]
|
1074 |
96%|ββββββββββ| 10307/10682 [1:27:58<03:04, 2.03it/s]
|
1075 |
96%|ββββββββββ| 10308/10682 [1:27:59<03:04, 2.03it/s]
|
1076 |
97%|ββββββββββ| 10309/10682 [1:27:59<03:03, 2.03it/s]
|
1077 |
97%|ββββββββββ| 10310/10682 [1:28:00<03:03, 2.03it/s]
|
1078 |
97%|ββββββββββ| 10311/10682 [1:28:00<03:03, 2.03it/s]
|
1079 |
97%|ββββββββββ| 10312/10682 [1:28:01<03:02, 2.03it/s]
|
1080 |
97%|ββββββββββ| 10313/10682 [1:28:01<03:01, 2.03it/s]
|
1081 |
97%|ββββββββββ| 10314/10682 [1:28:02<03:01, 2.03it/s]
|
1082 |
97%|ββββββββββ| 10315/10682 [1:28:02<03:01, 2.03it/s]
|
1083 |
97%|ββββββββββ| 10316/10682 [1:28:03<03:00, 2.03it/s]
|
1084 |
97%|ββββββββββ| 10317/10682 [1:28:03<03:00, 2.03it/s]
|
1085 |
97%|ββββββββββ| 10318/10682 [1:28:04<02:59, 2.03it/s]
|
1086 |
97%|ββββββββββ| 10319/10682 [1:28:04<02:59, 2.03it/s]
|
1087 |
97%|ββββββββββ| 10320/10682 [1:28:05<02:58, 2.03it/s]
|
1088 |
97%|ββββββββββ| 10321/10682 [1:28:05<02:58, 2.03it/s]
|
1089 |
97%|ββββββββββ| 10322/10682 [1:28:06<02:57, 2.02it/s]
|
1090 |
97%|ββββββββββ| 10323/10682 [1:28:06<02:56, 2.03it/s]
|
1091 |
97%|ββββββββββ| 10324/10682 [1:28:07<02:56, 2.03it/s]
|
1092 |
97%|ββββββββββ| 10325/10682 [1:28:07<02:55, 2.03it/s]{'loss': 2.8633, 'grad_norm': 0.26117584109306335, 'learning_rate': 3.3991130994299734e-06, 'epoch': 13.52}
|
1093 |
+
|
1094 |
|
1095 |
97%|ββββββββββ| 10325/10682 [1:28:07<02:55, 2.03it/s]
|
1096 |
97%|ββββββββββ| 10326/10682 [1:28:08<02:55, 2.03it/s]
|
1097 |
97%|ββββββββββ| 10327/10682 [1:28:08<02:55, 2.03it/s]
|
1098 |
97%|ββββββββββ| 10328/10682 [1:28:09<02:54, 2.03it/s]
|
1099 |
97%|ββββββββββ| 10329/10682 [1:28:09<02:53, 2.03it/s]
|
1100 |
97%|ββββββββββ| 10330/10682 [1:28:10<02:53, 2.03it/s]
|
1101 |
97%|ββββββββββ| 10331/10682 [1:28:10<02:53, 2.03it/s]
|
1102 |
97%|ββββββββββ| 10332/10682 [1:28:11<02:52, 2.03it/s]
|
1103 |
97%|ββββββββββ| 10333/10682 [1:28:11<02:51, 2.03it/s]
|
1104 |
97%|ββββββββββ| 10334/10682 [1:28:12<02:51, 2.03it/s]
|
1105 |
97%|ββββββββββ| 10335/10682 [1:28:12<02:50, 2.03it/s]
|
1106 |
97%|ββββββββββ| 10336/10682 [1:28:13<02:50, 2.03it/s]
|
1107 |
97%|ββββββββββ| 10337/10682 [1:28:13<02:49, 2.03it/s]
|
1108 |
97%|ββββββββββ| 10338/10682 [1:28:14<02:49, 2.03it/s]
|
1109 |
97%|ββββββββββ| 10339/10682 [1:28:14<02:49, 2.03it/s]
|
1110 |
97%|ββββββββββ| 10340/10682 [1:28:15<02:48, 2.03it/s]
|
1111 |
97%|ββββββββββ| 10341/10682 [1:28:15<02:48, 2.03it/s]
|
1112 |
97%|ββββββββββ| 10342/10682 [1:28:16<02:47, 2.03it/s]
|
1113 |
97%|ββββββββββ| 10343/10682 [1:28:16<02:46, 2.03it/s]
|
1114 |
97%|ββββββββββ| 10344/10682 [1:28:17<02:46, 2.03it/s]
|
1115 |
97%|ββββββββββ| 10345/10682 [1:28:17<02:46, 2.03it/s]
|
1116 |
97%|ββββββββββ| 10346/10682 [1:28:18<02:45, 2.03it/s]
|
1117 |
97%|ββββββββββ| 10347/10682 [1:28:18<02:45, 2.03it/s]
|
1118 |
97%|ββββββββββ| 10348/10682 [1:28:19<02:44, 2.03it/s]
|
1119 |
97%|ββββββββββ| 10349/10682 [1:28:19<02:44, 2.03it/s]
|
1120 |
97%|ββββββββββ| 10350/10682 [1:28:20<02:43, 2.03it/s]{'loss': 2.8637, 'grad_norm': 0.2615685760974884, 'learning_rate': 2.940166632433183e-06, 'epoch': 13.56}
|
1121 |
+
|
1122 |
|
1123 |
97%|ββββββββββ| 10350/10682 [1:28:20<02:43, 2.03it/s]
|
1124 |
97%|ββββββββββ| 10351/10682 [1:28:20<02:43, 2.03it/s]
|
1125 |
97%|ββββββββββ| 10352/10682 [1:28:21<02:42, 2.03it/s]
|
1126 |
97%|ββββββββββ| 10353/10682 [1:28:21<02:42, 2.03it/s]
|
1127 |
97%|ββββββββββ| 10354/10682 [1:28:22<02:41, 2.03it/s]
|
1128 |
97%|ββββββββββ| 10355/10682 [1:28:22<02:41, 2.02it/s]
|
1129 |
97%|ββββββββββ| 10356/10682 [1:28:23<02:40, 2.03it/s]
|
1130 |
97%|ββββββββββ| 10357/10682 [1:28:23<02:39, 2.03it/s]
|
1131 |
97%|ββββββββββ| 10358/10682 [1:28:24<02:39, 2.03it/s]
|
1132 |
97%|ββββββββββ| 10359/10682 [1:28:24<02:39, 2.03it/s]
|
1133 |
97%|ββββββββββ| 10360/10682 [1:28:25<02:38, 2.03it/s]
|
1134 |
97%|ββββββββββ| 10361/10682 [1:28:25<02:38, 2.03it/s]
|
1135 |
97%|ββββββββββ| 10362/10682 [1:28:26<02:37, 2.03it/s]
|
1136 |
97%|ββββββββββ| 10363/10682 [1:28:26<02:37, 2.03it/s]
|
1137 |
97%|ββββββββββ| 10364/10682 [1:28:27<02:36, 2.03it/s]
|
1138 |
97%|ββββββββββ| 10365/10682 [1:28:27<02:36, 2.03it/s]
|
1139 |
97%|ββββββββββ| 10366/10682 [1:28:28<02:35, 2.03it/s]
|
1140 |
97%|ββββββββββ| 10367/10682 [1:28:28<02:34, 2.04it/s]
|
1141 |
97%|ββββββββββ| 10368/10682 [1:28:29<02:34, 2.04it/s]
|
1142 |
97%|ββββββββββ| 10369/10682 [1:28:29<02:33, 2.03it/s]
|
1143 |
97%|ββββββββββ| 10370/10682 [1:28:29<02:33, 2.03it/s]
|
1144 |
97%|ββββββββββ| 10371/10682 [1:28:30<02:33, 2.03it/s]
|
1145 |
97%|ββββββββββ| 10372/10682 [1:28:30<02:32, 2.03it/s]
|
1146 |
97%|ββββββββββ| 10373/10682 [1:28:31<02:32, 2.03it/s]
|
1147 |
97%|ββββββββββ| 10374/10682 [1:28:31<02:31, 2.03it/s]
|
1148 |
97%|ββββββββββ| 10375/10682 [1:28:32<02:31, 2.03it/s]
|
1149 |
|
1150 |
+
|
1151 |
97%|ββββββββββ| 10375/10682 [1:28:32<02:31, 2.03it/s]
|
1152 |
97%|ββββββββββ| 10376/10682 [1:28:32<02:30, 2.03it/s]
|
1153 |
97%|ββββββββββ| 10377/10682 [1:28:33<02:30, 2.03it/s]
|
1154 |
97%|ββββββββββ| 10378/10682 [1:28:33<02:29, 2.03it/s]
|
1155 |
97%|ββββββββββ| 10379/10682 [1:28:34<02:29, 2.03it/s]
|
1156 |
97%|ββββββββββ| 10380/10682 [1:28:34<02:28, 2.03it/s]
|
1157 |
97%|ββββββββββ| 10381/10682 [1:28:35<02:28, 2.03it/s]
|
1158 |
97%|ββββββββββ| 10382/10682 [1:28:35<02:27, 2.03it/s]
|
1159 |
97%|ββββββββββ| 10383/10682 [1:28:36<02:26, 2.03it/s]
|
1160 |
97%|ββββββββββ| 10384/10682 [1:28:36<02:26, 2.03it/s]
|
1161 |
97%|ββββββββββ| 10385/10682 [1:28:37<02:26, 2.03it/s]
|
1162 |
97%|ββββββββββ| 10386/10682 [1:28:37<02:25, 2.03it/s]
|
1163 |
97%|ββββββββββ| 10387/10682 [1:28:38<02:25, 2.03it/s]
|
1164 |
97%|ββββββββββ| 10388/10682 [1:28:38<02:24, 2.04it/s]
|
1165 |
97%|ββββββββββ| 10389/10682 [1:28:39<02:23, 2.04it/s]
|
1166 |
97%|ββββββββββ| 10390/10682 [1:28:39<02:23, 2.03it/s]
|
1167 |
97%|ββββββββββ| 10391/10682 [1:28:40<02:23, 2.03it/s]
|
1168 |
97%|ββββββββββ| 10392/10682 [1:28:40<02:22, 2.03it/s]
|
1169 |
97%|ββββββββββ| 10393/10682 [1:28:41<02:22, 2.03it/s]
|
1170 |
97%|ββββββββββ| 10394/10682 [1:28:41<02:21, 2.03it/s]
|
1171 |
97%|ββββββββββ| 10395/10682 [1:28:42<02:21, 2.03it/s]
|
1172 |
97%|ββββββββββ| 10396/10682 [1:28:42<02:20, 2.03it/s]
|
1173 |
97%|ββββββββββ| 10397/10682 [1:28:43<02:20, 2.03it/s]
|
1174 |
97%|ββββββββββ| 10398/10682 [1:28:43<02:19, 2.03it/s]
|
1175 |
97%|ββββββββββ| 10399/10682 [1:28:44<02:19, 2.03it/s]
|
1176 |
97%|ββββββββββ| 10400/10682 [1:28:44<02:18, 2.03it/s]{'loss': 2.8597, 'grad_norm': 0.25953540205955505, 'learning_rate': 2.1218402281655835e-06, 'epoch': 13.62}
|
1177 |
|
1178 |
+
|
1179 |
97%|ββββββββββ| 10400/10682 [1:28:44<02:18, 2.03it/s]
|
1180 |
97%|ββββββββββ| 10401/10682 [1:28:45<02:19, 2.02it/s]
|
1181 |
97%|ββββββββββ| 10402/10682 [1:28:45<02:18, 2.02it/s]
|
1182 |
97%|ββββββββββ| 10403/10682 [1:28:46<02:17, 2.03it/s]
|
1183 |
97%|ββββββββββ| 10404/10682 [1:28:46<02:17, 2.03it/s]
|
1184 |
97%|ββββββββββ| 10405/10682 [1:28:47<02:16, 2.03it/s]
|
1185 |
97%|ββββββββββ| 10406/10682 [1:28:47<02:15, 2.03it/s]
|
1186 |
97%|ββββββββββ| 10407/10682 [1:28:48<02:15, 2.03it/s]
|
1187 |
97%|ββββββββββ| 10408/10682 [1:28:48<02:14, 2.03it/s]
|
1188 |
97%|ββββββββββ| 10409/10682 [1:28:49<02:14, 2.03it/s]
|
1189 |
97%|ββββββββββ| 10410/10682 [1:28:49<02:13, 2.03it/s]
|
1190 |
97%|ββββββββββ| 10411/10682 [1:28:50<02:13, 2.03it/s]
|
1191 |
97%|ββββοΏ½οΏ½βββββ| 10412/10682 [1:28:50<02:12, 2.03it/s]
|
1192 |
97%|ββββββββββ| 10413/10682 [1:28:51<02:12, 2.03it/s]
|
1193 |
97%|ββββββββββ| 10414/10682 [1:28:51<02:12, 2.03it/s]
|
1194 |
98%|ββββββββββ| 10415/10682 [1:28:52<02:11, 2.03it/s]
|
1195 |
98%|ββββββββββ| 10416/10682 [1:28:52<02:11, 2.03it/s]
|
1196 |
98%|ββββββββββ| 10417/10682 [1:28:53<02:10, 2.03it/s]
|
1197 |
98%|ββββββββββ| 10418/10682 [1:28:53<02:09, 2.03it/s]
|
1198 |
98%|ββββββββββ| 10419/10682 [1:28:54<02:09, 2.03it/s]
|
1199 |
98%|ββββββββββ| 10420/10682 [1:28:54<02:08, 2.03it/s]
|
1200 |
98%|ββββββββββ| 10421/10682 [1:28:55<02:08, 2.03it/s]
|
1201 |
98%|ββββββββββ| 10422/10682 [1:28:55<02:07, 2.03it/s]
|
1202 |
98%|ββββββββββ| 10423/10682 [1:28:56<02:07, 2.03it/s]
|
1203 |
98%|ββββββββββ| 10424/10682 [1:28:56<02:07, 2.03it/s]
|
1204 |
98%|ββββββββββ| 10425/10682 [1:28:57<02:06, 2.03it/s]{'loss': 2.8679, 'grad_norm': 0.26001906394958496, 'learning_rate': 1.7625149152127318e-06, 'epoch': 13.65}
|
1205 |
+
|
1206 |
|
1207 |
98%|ββββββββββ| 10425/10682 [1:28:57<02:06, 2.03it/s]
|
1208 |
98%|ββββββββββ| 10426/10682 [1:28:57<02:06, 2.03it/s]
|
1209 |
98%|ββββββββββ| 10427/10682 [1:28:58<02:05, 2.03it/s]
|
1210 |
98%|ββββββββββ| 10428/10682 [1:28:58<02:05, 2.03it/s]
|
1211 |
98%|ββββββββββ| 10429/10682 [1:28:59<02:04, 2.03it/s]
|
1212 |
98%|ββββββββββ| 10430/10682 [1:28:59<02:04, 2.03it/s]
|
1213 |
98%|ββββββββββ| 10431/10682 [1:29:00<02:03, 2.03it/s]
|
1214 |
98%|ββββββββββ| 10432/10682 [1:29:00<02:03, 2.03it/s]
|
1215 |
98%|ββββββββββ| 10433/10682 [1:29:01<02:02, 2.03it/s]
|
1216 |
98%|ββββββββββ| 10434/10682 [1:29:01<02:02, 2.03it/s]
|
1217 |
98%|ββββββββββ| 10435/10682 [1:29:02<02:01, 2.03it/s]
|
1218 |
98%|ββββββββββ| 10436/10682 [1:29:02<02:01, 2.03it/s]
|
1219 |
98%|ββββββββββ| 10437/10682 [1:29:02<02:00, 2.03it/s]
|
1220 |
98%|ββββββββββ| 10438/10682 [1:29:03<02:00, 2.03it/s]
|
1221 |
98%|ββββββββββ| 10439/10682 [1:29:03<01:59, 2.03it/s]
|
1222 |
98%|ββββββββββ| 10440/10682 [1:29:04<01:59, 2.03it/s]
|
1223 |
98%|ββββββββββ| 10441/10682 [1:29:04<01:58, 2.03it/s]
|
1224 |
98%|ββββββββββ| 10442/10682 [1:29:05<01:58, 2.03it/s]
|
1225 |
98%|ββββββββββ| 10443/10682 [1:29:05<01:57, 2.03it/s]
|
1226 |
98%|ββββββββββ| 10444/10682 [1:29:06<01:57, 2.03it/s]
|
1227 |
98%|ββββββββββ| 10445/10682 [1:29:06<01:56, 2.03it/s]
|
1228 |
98%|ββββββββββ| 10446/10682 [1:29:07<01:56, 2.03it/s]
|
1229 |
98%|ββββββββββ| 10447/10682 [1:29:07<01:55, 2.03it/s]
|
1230 |
98%|ββββββββββ| 10448/10682 [1:29:08<01:55, 2.03it/s]
|
1231 |
98%|ββββββββββ| 10449/10682 [1:29:08<01:54, 2.03it/s]
|
1232 |
98%|ββββββββββ| 10450/10682 [1:29:09<01:54, 2.03it/s]{'loss': 2.8763, 'grad_norm': 0.26250502467155457, 'learning_rate': 1.4364475817401635e-06, 'epoch': 13.69}
|
1233 |
+
|
1234 |
|
1235 |
98%|ββββββββββ| 10450/10682 [1:29:09<01:54, 2.03it/s]
|
1236 |
98%|ββββββββββ| 10451/10682 [1:29:09<01:54, 2.02it/s]
|
1237 |
98%|ββββββββββ| 10452/10682 [1:29:10<01:53, 2.03it/s]
|
1238 |
98%|ββββββββββ| 10453/10682 [1:29:10<01:53, 2.03it/s]
|
1239 |
98%|ββββββββββ| 10454/10682 [1:29:11<01:52, 2.03it/s]
|
1240 |
98%|ββββββββββ| 10455/10682 [1:29:11<01:51, 2.03it/s]
|
1241 |
98%|ββββββββββ| 10456/10682 [1:29:12<01:51, 2.03it/s]
|
1242 |
98%|ββββββββββ| 10457/10682 [1:29:12<01:50, 2.03it/s]
|
1243 |
98%|ββββββββββ| 10458/10682 [1:29:13<01:50, 2.03it/s]
|
1244 |
98%|ββββββββββ| 10459/10682 [1:29:13<01:49, 2.03it/s]
|
1245 |
98%|ββββββββββ| 10460/10682 [1:29:14<01:49, 2.03it/s]
|
1246 |
98%|ββββββββββ| 10461/10682 [1:29:14<01:48, 2.03it/s]
|
1247 |
98%|ββββββββββ| 10462/10682 [1:29:15<01:48, 2.03it/s]
|
1248 |
98%|ββββββββββ| 10463/10682 [1:29:15<01:47, 2.03it/s]
|
1249 |
98%|ββββββββββ| 10464/10682 [1:29:16<01:47, 2.03it/s]
|
1250 |
98%|ββββββββββ| 10465/10682 [1:29:16<01:46, 2.03it/s]
|
1251 |
98%|ββββββββββ| 10466/10682 [1:29:17<01:46, 2.03it/s]
|
1252 |
98%|ββββββββββ| 10467/10682 [1:29:17<01:45, 2.03it/s]
|
1253 |
98%|ββββββββββ| 10468/10682 [1:29:18<01:45, 2.03it/s]
|
1254 |
98%|ββββββββββ| 10469/10682 [1:29:18<01:44, 2.03it/s]
|
1255 |
98%|ββββββββββ| 10470/10682 [1:29:19<01:44, 2.03it/s]
|
1256 |
98%|ββββββββββ| 10471/10682 [1:29:19<01:44, 2.03it/s]
|
1257 |
98%|ββββββββββ| 10472/10682 [1:29:20<01:43, 2.03it/s]
|
1258 |
98%|ββββοΏ½οΏ½βββββ| 10473/10682 [1:29:20<01:43, 2.03it/s]
|
1259 |
98%|ββββββββββ| 10474/10682 [1:29:21<01:42, 2.03it/s]
|
1260 |
98%|ββββββββββ| 10475/10682 [1:29:21<01:41, 2.03it/s]{'loss': 2.8583, 'grad_norm': 0.2589586079120636, 'learning_rate': 1.143659993153079e-06, 'epoch': 13.72}
|
1261 |
|
1262 |
+
|
1263 |
98%|ββββββββββ| 10475/10682 [1:29:21<01:41, 2.03it/s]
|
1264 |
98%|ββββββββββ| 10476/10682 [1:29:22<01:41, 2.03it/s]
|
1265 |
98%|ββββββββββ| 10477/10682 [1:29:22<01:41, 2.03it/s]
|
1266 |
98%|ββββββββββ| 10478/10682 [1:29:23<01:40, 2.03it/s]
|
1267 |
98%|ββββββββββ| 10479/10682 [1:29:23<01:40, 2.03it/s]
|
1268 |
98%|ββββββββββ| 10480/10682 [1:29:24<01:39, 2.03it/s]
|
1269 |
98%|ββββββββββ| 10481/10682 [1:29:24<01:39, 2.03it/s]
|
1270 |
98%|ββββββββββ| 10482/10682 [1:29:25<01:38, 2.03it/s]
|
1271 |
98%|ββββββββββ| 10483/10682 [1:29:25<01:38, 2.03it/s]
|
1272 |
98%|ββββββββββ| 10484/10682 [1:29:26<01:37, 2.03it/s]
|
1273 |
98%|ββββββββββ| 10485/10682 [1:29:26<01:37, 2.03it/s]
|
1274 |
98%|ββββββββββ| 10486/10682 [1:29:27<01:36, 2.03it/s]
|
1275 |
98%|ββββββββββ| 10487/10682 [1:29:27<01:36, 2.03it/s]
|
1276 |
98%|ββββββββββ| 10488/10682 [1:29:28<01:35, 2.03it/s]
|
1277 |
98%|ββββββββββ| 10489/10682 [1:29:28<01:35, 2.03it/s]
|
1278 |
98%|ββββββββββ| 10490/10682 [1:29:29<01:34, 2.03it/s]
|
1279 |
98%|ββββββββββ| 10491/10682 [1:29:29<01:34, 2.03it/s]
|
1280 |
98%|ββββββββββ| 10492/10682 [1:29:30<01:33, 2.03it/s]
|
1281 |
98%|ββββββββββ| 10493/10682 [1:29:30<01:33, 2.03it/s]
|
1282 |
98%|ββββββββββ| 10494/10682 [1:29:31<01:32, 2.03it/s]
|
1283 |
98%|ββββββββββ| 10495/10682 [1:29:31<01:31, 2.03it/s]
|
1284 |
98%|ββββββββββ| 10496/10682 [1:29:32<01:31, 2.03it/s]
|
1285 |
98%|ββββββββββ| 10497/10682 [1:29:32<01:31, 2.03it/s]
|
1286 |
98%|ββββββββββ| 10498/10682 [1:29:33<01:30, 2.03it/s]
|
1287 |
98%|ββββββββββ| 10499/10682 [1:29:33<01:30, 2.03it/s]
|
1288 |
98%|ββββββββββ| 10500/10682 [1:29:34<01:29, 2.03it/s]
|
1289 |
|
1290 |
+
|
1291 |
98%|ββββββββββ| 10500/10682 [1:29:34<01:29, 2.03it/s]
|
1292 |
98%|ββββββββββ| 10501/10682 [1:29:34<01:29, 2.02it/s]
|
1293 |
98%|ββββββββββ| 10502/10682 [1:29:35<01:28, 2.03it/s]
|
1294 |
98%|ββββββββββ| 10503/10682 [1:29:35<01:28, 2.02it/s]
|
1295 |
98%|ββββββββββ| 10504/10682 [1:29:36<01:27, 2.03it/s]
|
1296 |
98%|ββββββββββ| 10505/10682 [1:29:36<01:27, 2.03it/s]
|
1297 |
98%|ββββββββββ| 10506/10682 [1:29:37<01:26, 2.03it/s]
|
1298 |
98%|ββββββββββ| 10507/10682 [1:29:37<01:26, 2.03it/s]
|
1299 |
98%|ββββββββββ| 10508/10682 [1:29:37<01:25, 2.03it/s]
|
1300 |
98%|ββββββββββ| 10509/10682 [1:29:38<01:25, 2.03it/s]
|
1301 |
98%|ββββββββββ| 10510/10682 [1:29:38<01:24, 2.03it/s]
|
1302 |
98%|ββββββββββ| 10511/10682 [1:29:39<01:24, 2.03it/s]
|
1303 |
98%|ββββββββββ| 10512/10682 [1:29:39<01:23, 2.03it/s]
|
1304 |
98%|ββββββββββ| 10513/10682 [1:29:40<01:23, 2.03it/s]
|
1305 |
98%|ββββββββββ| 10514/10682 [1:29:40<01:22, 2.03it/s]
|
1306 |
98%|ββββββββββ| 10515/10682 [1:29:41<01:22, 2.03it/s]
|
1307 |
98%|ββββββββββ| 10516/10682 [1:29:41<01:21, 2.03it/s]
|
1308 |
98%|ββββββββββ| 10517/10682 [1:29:42<01:21, 2.03it/s]
|
1309 |
98%|ββββββββββ| 10518/10682 [1:29:42<01:20, 2.03it/s]
|
1310 |
98%|ββββββββββ| 10519/10682 [1:29:43<01:20, 2.03it/s]
|
1311 |
98%|ββββββββββ| 10520/10682 [1:29:43<01:19, 2.03it/s]
|
1312 |
98%|ββββββββββ| 10521/10682 [1:29:44<01:19, 2.03it/s]
|
1313 |
99%|ββββββββββ| 10522/10682 [1:29:44<01:18, 2.03it/s]
|
1314 |
99%|ββββββββββ| 10523/10682 [1:29:45<01:18, 2.03it/s]
|
1315 |
99%|ββββββββββ| 10524/10682 [1:29:45<01:17, 2.03it/s]
|
1316 |
99%|ββββββββββ| 10525/10682 [1:29:46<01:17, 2.03it/s]{'loss': 2.8753, 'grad_norm': 0.2604506015777588, 'learning_rate': 6.580000036264244e-07, 'epoch': 13.79}
|
1317 |
|
1318 |
+
|
1319 |
99%|ββββββββββ| 10525/10682 [1:29:46<01:17, 2.03it/s]
|
1320 |
99%|ββββββββββ| 10526/10682 [1:29:46<01:16, 2.03it/s]
|
1321 |
99%|ββββββββββ| 10527/10682 [1:29:47<01:16, 2.03it/s]
|
1322 |
99%|ββββββββββ| 10528/10682 [1:29:47<01:15, 2.03it/s]
|
1323 |
99%|ββββββββββ| 10529/10682 [1:29:48<01:15, 2.03it/s]
|
1324 |
99%|ββββββββββ| 10530/10682 [1:29:48<01:14, 2.03it/s]
|
1325 |
99%|ββββββββββ| 10531/10682 [1:29:49<01:14, 2.03it/s]
|
1326 |
99%|ββββββββββ| 10532/10682 [1:29:49<01:13, 2.03it/s]
|
1327 |
99%|ββββββββββ| 10533/10682 [1:29:50<01:13, 2.03it/s]
|
1328 |
99%|ββββββββββ| 10534/10682 [1:29:50<01:12, 2.03it/s]
|
1329 |
99%|ββββββββββ| 10535/10682 [1:29:51<01:12, 2.03it/s]
|
1330 |
99%|ββββββββββ| 10536/10682 [1:29:51<01:11, 2.03it/s]
|
1331 |
99%|ββββββββββ| 10537/10682 [1:29:52<01:11, 2.03it/s]
|
1332 |
99%|ββββββββββ| 10538/10682 [1:29:52<01:10, 2.03it/s]
|
1333 |
99%|ββββββββββ| 10539/10682 [1:29:53<01:10, 2.03it/s]
|
1334 |
99%|ββββββββββ| 10540/10682 [1:29:53<01:09, 2.03it/s]
|
1335 |
99%|ββββββββββ| 10541/10682 [1:29:54<01:09, 2.03it/s]
|
1336 |
99%|ββββββββββ| 10542/10682 [1:29:54<01:08, 2.03it/s]
|
1337 |
99%|ββββββββββ| 10543/10682 [1:29:55<01:08, 2.03it/s]
|
1338 |
99%|ββββββββββ| 10544/10682 [1:29:55<01:07, 2.03it/s]
|
1339 |
99%|ββββββββββ| 10545/10682 [1:29:56<01:07, 2.03it/s]
|
1340 |
99%|ββββββββββ| 10546/10682 [1:29:56<01:06, 2.03it/s]
|
1341 |
99%|ββββββββββ| 10547/10682 [1:29:57<01:06, 2.03it/s]
|
1342 |
99%|ββββββββββ| 10548/10682 [1:29:57<01:05, 2.03it/s]
|
1343 |
99%|ββββββββββ| 10549/10682 [1:29:58<01:05, 2.03it/s]
|
1344 |
99%|ββββββββββ| 10550/10682 [1:29:58<01:04, 2.03it/s]
|
1345 |
|
1346 |
+
|
1347 |
99%|ββββββββββ| 10550/10682 [1:29:58<01:04, 2.03it/s]
|
1348 |
99%|ββββββββββ| 10551/10682 [1:29:59<01:04, 2.03it/s]
|
1349 |
99%|ββββββββββ| 10552/10682 [1:29:59<01:04, 2.03it/s]
|
1350 |
99%|ββββββββββ| 10553/10682 [1:30:00<01:03, 2.03it/s]
|
1351 |
99%|ββββββββββ| 10554/10682 [1:30:00<01:03, 2.03it/s]
|
1352 |
99%|ββββββββββ| 10555/10682 [1:30:01<01:02, 2.03it/s]
|
1353 |
99%|ββββββββββ| 10556/10682 [1:30:01<01:02, 2.03it/s]
|
1354 |
99%|ββββββββββ| 10557/10682 [1:30:02<01:01, 2.03it/s]
|
1355 |
99%|ββββββββββ| 10558/10682 [1:30:02<01:01, 2.03it/s]
|
1356 |
99%|ββββββββββ| 10559/10682 [1:30:03<01:00, 2.03it/s]
|
1357 |
99%|ββββββββββ| 10560/10682 [1:30:03<01:00, 2.03it/s]
|
1358 |
99%|ββββββββββ| 10561/10682 [1:30:04<00:59, 2.03it/s]
|
1359 |
99%|ββββββββββ| 10562/10682 [1:30:04<00:59, 2.03it/s]
|
1360 |
99%|ββββββββββ| 10563/10682 [1:30:05<00:58, 2.03it/s]
|
1361 |
99%|ββββββββββ| 10564/10682 [1:30:05<00:58, 2.03it/s]
|
1362 |
99%|ββββββββββ| 10565/10682 [1:30:06<00:57, 2.03it/s]
|
1363 |
99%|ββββββββββ| 10566/10682 [1:30:06<00:57, 2.03it/s]
|
1364 |
99%|ββββββββββ| 10567/10682 [1:30:07<00:56, 2.03it/s]
|
1365 |
99%|ββββββββββ| 10568/10682 [1:30:07<00:56, 2.03it/s]
|
1366 |
99%|ββββββββββ| 10569/10682 [1:30:08<00:55, 2.03it/s]
|
1367 |
99%|ββββββββββ| 10570/10682 [1:30:08<00:55, 2.03it/s]
|
1368 |
99%|ββββββββββ| 10571/10682 [1:30:09<00:54, 2.03it/s]
|
1369 |
99%|ββββββββββ| 10572/10682 [1:30:09<00:54, 2.03it/s]
|
1370 |
99%|ββββββββββ| 10573/10682 [1:30:10<00:53, 2.03it/s]
|
1371 |
99%|ββββββββββ| 10574/10682 [1:30:10<00:53, 2.03it/s]
|
1372 |
99%|ββββββββββ| 10575/10682 [1:30:11<00:52, 2.03it/s]
|
1373 |
|
1374 |
+
|
1375 |
99%|ββββββββββ| 10575/10682 [1:30:11<00:52, 2.03it/s]
|
1376 |
99%|ββββββββββ| 10576/10682 [1:30:11<00:52, 2.03it/s]
|
1377 |
99%|ββββββββββ| 10577/10682 [1:30:11<00:51, 2.03it/s]
|
1378 |
99%|ββββββββββ| 10578/10682 [1:30:12<00:51, 2.03it/s]
|
1379 |
99%|ββββββββββ| 10579/10682 [1:30:12<00:50, 2.03it/s]
|
1380 |
99%|ββββββββββ| 10580/10682 [1:30:13<00:50, 2.03it/s]
|
1381 |
99%|ββββββββββ| 10581/10682 [1:30:13<00:49, 2.03it/s]
|
1382 |
99%|ββββββββββ| 10582/10682 [1:30:14<00:49, 2.03it/s]
|
1383 |
99%|ββββββββββ| 10583/10682 [1:30:14<00:48, 2.03it/s]
|
1384 |
99%|ββββββββββ| 10584/10682 [1:30:15<00:48, 2.03it/s]
|
1385 |
99%|ββββββββββ| 10585/10682 [1:30:15<00:47, 2.03it/s]
|
1386 |
99%|ββββββββββ| 10586/10682 [1:30:16<00:47, 2.03it/s]
|
1387 |
99%|ββββββββββ| 10587/10682 [1:30:16<00:46, 2.03it/s]
|
1388 |
99%|ββββββββββ| 10588/10682 [1:30:17<00:46, 2.03it/s]
|
1389 |
99%|ββββββββββ| 10589/10682 [1:30:17<00:45, 2.03it/s]
|
1390 |
99%|ββββββββββ| 10590/10682 [1:30:18<00:45, 2.03it/s]
|
1391 |
99%|ββββββββββ| 10591/10682 [1:30:18<00:44, 2.03it/s]
|
1392 |
99%|ββββββββββ| 10592/10682 [1:30:19<00:44, 2.03it/s]
|
1393 |
99%|ββββββββββ| 10593/10682 [1:30:19<00:43, 2.03it/s]
|
1394 |
99%|ββββββββββ| 10594/10682 [1:30:20<00:43, 2.03it/s]
|
1395 |
99%|ββββββββββ| 10595/10682 [1:30:20<00:42, 2.03it/s]
|
1396 |
99%|ββββββββββ| 10596/10682 [1:30:21<00:42, 2.03it/s]
|
1397 |
99%|ββββββββββ| 10597/10682 [1:30:21<00:41, 2.03it/s]
|
1398 |
99%|ββββββββββ| 10598/10682 [1:30:22<00:41, 2.03it/s]
|
1399 |
99%|ββββββββββ| 10599/10682 [1:30:22<00:40, 2.03it/s]
|
1400 |
99%|ββββββββββ| 10600/10682 [1:30:23<00:40, 2.03it/s]
|
1401 |
|
1402 |
+
|
1403 |
99%|ββββββββββ| 10600/10682 [1:30:23<00:40, 2.03it/s]
|
1404 |
99%|ββββββββββ| 10601/10682 [1:30:23<00:40, 2.02it/s]
|
1405 |
99%|ββββββββββ| 10602/10682 [1:30:24<00:39, 2.03it/s]
|
1406 |
99%|ββββββββββ| 10603/10682 [1:30:24<00:38, 2.03it/s]
|
1407 |
99%|ββββββββββ| 10604/10682 [1:30:25<00:38, 2.03it/s]
|
1408 |
99%|ββββββββββ| 10605/10682 [1:30:25<00:37, 2.03it/s]
|
1409 |
99%|ββββββββββ| 10606/10682 [1:30:26<00:37, 2.03it/s]
|
1410 |
99%|ββββββββββ| 10607/10682 [1:30:26<00:36, 2.03it/s]
|
1411 |
99%|ββββββββββ| 10608/10682 [1:30:27<00:36, 2.03it/s]
|
1412 |
99%|ββββββββββ| 10609/10682 [1:30:27<00:35, 2.03it/s]
|
1413 |
99%|ββββββββββ| 10610/10682 [1:30:28<00:35, 2.03it/s]
|
1414 |
99%|ββββββββββ| 10611/10682 [1:30:28<00:34, 2.03it/s]
|
1415 |
99%|ββββββββββ| 10612/10682 [1:30:29<00:34, 2.03it/s]
|
1416 |
99%|ββββββββββ| 10613/10682 [1:30:29<00:34, 2.03it/s]
|
1417 |
99%|ββββββββββ| 10614/10682 [1:30:30<00:33, 2.03it/s]
|
1418 |
99%|ββββββββββ| 10615/10682 [1:30:30<00:33, 2.03it/s]
|
1419 |
99%|ββββββββββ| 10616/10682 [1:30:31<00:32, 2.03it/s]
|
1420 |
99%|ββββββββββ| 10617/10682 [1:30:31<00:32, 2.03it/s]
|
1421 |
99%|ββββββββββ| 10618/10682 [1:30:32<00:31, 2.03it/s]
|
1422 |
99%|ββββββββββ| 10619/10682 [1:30:32<00:31, 2.03it/s]
|
1423 |
99%|ββββββββββ| 10620/10682 [1:30:33<00:30, 2.03it/s]
|
1424 |
99%|ββββββββββ| 10621/10682 [1:30:33<00:30, 2.03it/s]
|
1425 |
99%|ββββββββββ| 10622/10682 [1:30:34<00:29, 2.03it/s]
|
1426 |
99%|ββββββββββ| 10623/10682 [1:30:34<00:29, 2.03it/s]
|
1427 |
99%|ββββββββββ| 10624/10682 [1:30:35<00:28, 2.03it/s]
|
1428 |
99%|ββββββββββ| 10625/10682 [1:30:35<00:28, 2.03it/s]
|
1429 |
|
1430 |
+
|
1431 |
99%|ββββββββββ| 10625/10682 [1:30:35<00:28, 2.03it/s]
|
1432 |
99%|ββββββββββ| 10626/10682 [1:30:36<00:27, 2.02it/s]
|
1433 |
99%|ββββββββββ| 10627/10682 [1:30:36<00:27, 2.03it/s]
|
1434 |
99%|ββββββββββ| 10628/10682 [1:30:37<00:26, 2.03it/s]
|
1435 |
|
1436 |
+
|
1437 |
+
|
1438 |
|
1439 |
|
1440 |
+
|
1441 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|