diff --git "a/run-2024-07-15T17:00:53+00:00.log" "b/run-2024-07-15T17:00:53+00:00.log" --- "a/run-2024-07-15T17:00:53+00:00.log" +++ "b/run-2024-07-15T17:00:53+00:00.log" @@ -4704,4 +4704,1168 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 63%|██████▎ | 232240/371472 [7:51:52<11:43:56, 3.30it/s] 63%|██████▎ | 232241/371472 [7:51:53<11:25:47, 3.38it/s] 63%|██████▎ | 232242/371472 [7:51:53<11:13:17, 3.45it/s] 63%|██████▎ | 232243/371472 [7:51:53<11:07:55, 3.47it/s] 63%|██████▎ | 232244/371472 [7:51:53<11:42:15, 3.30it/s] 63%|██████▎ | 232245/371472 [7:51:54<11:38:56, 3.32it/s] 63%|██████▎ | 232246/371472 [7:51:54<11:41:56, 3.31it/s] 63%|██████▎ | 232247/371472 [7:51:54<11:40:21, 3.31it/s] 63%|██████▎ | 232248/371472 [7:51:55<11:33:09, 3.35it/s] 63%|██████▎ | 232249/371472 [7:51:55<11:24:32, 3.39it/s] 63%|██████▎ | 232250/371472 [7:51:55<11:45:24, 3.29it/s] 63%|██████▎ | 232251/371472 [7:51:56<11:50:49, 3.26it/s] 63%|██████▎ | 232252/371472 [7:51:56<11:44:24, 3.29it/s] 63%|██████▎ | 232253/371472 [7:51:56<11:53:55, 3.25it/s] 63%|██████▎ | 232254/371472 [7:51:57<12:09:11, 3.18it/s] 63%|██████▎ | 232255/371472 [7:51:57<12:26:17, 3.11it/s] 63%|██████▎ | 232256/371472 [7:51:57<12:16:03, 3.15it/s] 63%|██████▎ | 232257/371472 [7:51:58<12:18:06, 3.14it/s] 63%|██████▎ | 232258/371472 [7:51:58<12:05:43, 3.20it/s] 63%|██████▎ | 232259/371472 [7:51:58<11:58:33, 3.23it/s] 63%|██████▎ | 232260/371472 [7:51:58<12:21:36, 3.13it/s] {'loss': 2.6214, 'learning_rate': 4.374636385183908e-07, 'epoch': 10.0} 63%|██████▎ | 232260/371472 [7:51:58<12:21:36, 3.13it/s] 63%|██████▎ | 232261/371472 [7:51:59<11:54:56, 3.25it/s] 63%|██████▎ | 232262/371472 [7:51:59<11:43:39, 3.30it/s] 63%|██████▎ | 232263/371472 [7:51:59<12:01:22, 3.22it/s] 63%|██████▎ | 232264/371472 [7:52:00<12:07:15, 3.19it/s] 63%|██████▎ | 232265/371472 [7:52:00<12:07:16, 3.19it/s] 63%|██████▎ | 232266/371472 [7:52:00<12:06:36, 3.19it/s] 63%|██████▎ | 232267/371472 [7:52:01<11:56:35, 3.24it/s] 63%|██████▎ | 232268/371472 [7:52:01<12:24:41, 3.12it/s] 63%|██████▎ | 232269/371472 [7:52:01<12:17:57, 3.14it/s] 63%|██████▎ | 232270/371472 [7:52:02<11:49:20, 3.27it/s] 63%|██████▎ | 232271/371472 [7:52:02<11:48:09, 3.28it/s] 63%|██████▎ | 232272/371472 [7:52:02<12:25:54, 3.11it/s] 63%|██████▎ | 232273/371472 [7:52:03<12:11:21, 3.17it/s] 63%|██████▎ | 232274/371472 [7:52:03<12:00:13, 3.22it/s] 63%|██████▎ | 232275/371472 [7:52:03<11:41:20, 3.31it/s] 63%|██████▎ | 232276/371472 [7:52:03<11:23:42, 3.39it/s] 63%|██████▎ | 232277/371472 [7:52:04<12:27:35, 3.10it/s] 63%|██████▎ | 232278/371472 [7:52:04<12:21:53, 3.13it/s] 63%|██████▎ | 232279/371472 [7:52:04<12:18:03, 3.14it/s] 63%|██████▎ | 232280/371472 [7:52:05<13:00:48, 2.97it/s] {'loss': 2.706, 'learning_rate': 4.37415156542912e-07, 'epoch': 10.0} 63%|██████▎ | 232280/371472 [7:52:05<13:00:48, 2.97it/s] 63%|██████▎ | 232281/371472 [7:52:05<13:24:12, 2.88it/s] 63%|██████▎ | 232282/371472 [7:52:05<12:20:28, 3.13it/s] 63%|██████▎ | 232283/371472 [7:52:06<13:23:19, 2.89it/s] 63%|██████▎ | 232284/371472 [7:52:06<12:41:50, 3.04it/s] 63%|██████▎ | 232285/371472 [7:52:06<12:35:55, 3.07it/s] 63%|██████▎ | 232286/371472 [7:52:07<12:23:15, 3.12it/s] 63%|██████▎ | 232287/371472 [7:52:07<12:22:50, 3.12it/s] 63%|██████▎ | 232288/371472 [7:52:07<12:27:33, 3.10it/s] 63%|██████▎ | 232289/371472 [7:52:08<13:01:08, 2.97it/s] 63%|██████▎ | 232290/371472 [7:52:08<12:29:08, 3.10it/s] 63%|██████▎ | 232291/371472 [7:52:08<12:22:48, 3.12it/s] 63%|██████▎ | 232292/371472 [7:52:09<12:29:36, 3.09it/s] 63%|██████▎ | 232293/371472 [7:52:09<12:33:58, 3.08it/s] 63%|██████▎ | 232294/371472 [7:52:09<12:04:59, 3.20it/s] 63%|██████▎ | 232295/371472 [7:52:10<12:14:07, 3.16it/s] 63%|██████▎ | 232296/371472 [7:52:10<13:34:07, 2.85it/s] 63%|██████▎ | 232297/371472 [7:52:10<13:06:54, 2.95it/s] 63%|██████▎ | 232298/371472 [7:52:11<12:54:01, 3.00it/s] 63%|██████▎ | 232299/371472 [7:52:11<12:59:38, 2.98it/s] 63%|██████▎ | 232300/371472 [7:52:11<13:06:07, 2.95it/s] {'loss': 2.7396, 'learning_rate': 4.37366674567433e-07, 'epoch': 10.01} - 63%|██████▎ | 232300/371472 [7:52:11<13:06:07, 2.95it/s] 63%|██████▎ | 232301/371472 [7:52:12<12:35:24, 3.07it/s] 63%|██████▎ | 232302/371472 [7:52:12<11:58:09, 3.23it/s] 63%|██████▎ | 232303/371472 [7:52:12<11:30:09, 3.36it/s] \ No newline at end of file + 63%|██████▎ | 232300/371472 [7:52:11<13:06:07, 2.95it/s] 63%|██████▎ | 232301/371472 [7:52:12<12:35:24, 3.07it/s] 63%|██████▎ | 232302/371472 [7:52:12<11:58:09, 3.23it/s] 63%|██████▎ | 232303/371472 [7:52:12<11:30:09, 3.36it/s] 63%|██████▎ | 232304/371472 [7:52:13<13:54:17, 2.78it/s] 63%|██████▎ | 232305/371472 [7:52:13<13:14:46, 2.92it/s] 63%|██████▎ | 232306/371472 [7:52:13<12:23:42, 3.12it/s] 63%|██████▎ | 232307/371472 [7:52:14<12:15:46, 3.15it/s] 63%|██████▎ | 232308/371472 [7:52:14<11:44:48, 3.29it/s] 63%|██████▎ | 232309/371472 [7:52:14<11:24:42, 3.39it/s] 63%|██████▎ | 232310/371472 [7:52:14<11:11:39, 3.45it/s] 63%|██████▎ | 232311/371472 [7:52:15<11:17:29, 3.42it/s] 63%|██████▎ | 232312/371472 [7:52:15<11:17:37, 3.42it/s] 63%|██████▎ | 232313/371472 [7:52:15<11:18:33, 3.42it/s] 63%|██████▎ | 232314/371472 [7:52:16<11:30:29, 3.36it/s] 63%|██████▎ | 232315/371472 [7:52:16<11:26:19, 3.38it/s] 63%|██████▎ | 232316/371472 [7:52:16<11:32:03, 3.35it/s] 63%|██████▎ | 232317/371472 [7:52:17<14:52:45, 2.60it/s] 63%|██████▎ | 232318/371472 [7:52:17<13:44:51, 2.81it/s] 63%|██████▎ | 232319/371472 [7:52:17<13:14:40, 2.92it/s] 63%|██████▎ | 232320/371472 [7:52:18<12:57:13, 2.98it/s] {'loss': 2.7347, 'learning_rate': 4.3731819259195417e-07, 'epoch': 10.01} + 63%|██████▎ | 232320/371472 [7:52:18<12:57:13, 2.98it/s] 63%|██████▎ | 232321/371472 [7:52:18<12:19:42, 3.14it/s] 63%|██████▎ | 232322/371472 [7:52:18<11:56:43, 3.24it/s] 63%|██████▎ | 232323/371472 [7:52:19<11:58:15, 3.23it/s] 63%|██████▎ | 232324/371472 [7:52:19<11:43:48, 3.30it/s] 63%|██████▎ | 232325/371472 [7:52:19<12:19:19, 3.14it/s] 63%|██████▎ | 232326/371472 [7:52:20<11:57:24, 3.23it/s] 63%|██████▎ | 232327/371472 [7:52:20<11:36:17, 3.33it/s] 63%|██████▎ | 232328/371472 [7:52:20<11:58:53, 3.23it/s] 63%|██████▎ | 232329/371472 [7:52:20<11:46:24, 3.28it/s] 63%|██████▎ | 232330/371472 [7:52:21<12:23:12, 3.12it/s] 63%|██████▎ | 232331/371472 [7:52:21<11:56:34, 3.24it/s] 63%|██████▎ | 232332/371472 [7:52:21<11:48:01, 3.28it/s] 63%|██████▎ | 232333/371472 [7:52:22<12:10:21, 3.18it/s] 63%|██████▎ | 232334/371472 [7:52:22<12:13:49, 3.16it/s] 63%|██████▎ | 232335/371472 [7:52:22<11:53:01, 3.25it/s] 63%|██████▎ | 232336/371472 [7:52:23<11:54:15, 3.25it/s] 63%|██████▎ | 232337/371472 [7:52:23<12:14:29, 3.16it/s] 63%|██████▎ | 232338/371472 [7:52:23<12:01:15, 3.22it/s] 63%|██████▎ | 232339/371472 [7:52:24<11:51:23, 3.26it/s] 63%|██████▎ | 232340/371472 [7:52:24<11:40:46, 3.31it/s] {'loss': 2.9149, 'learning_rate': 4.3726971061647524e-07, 'epoch': 10.01} + 63%|██████▎ | 232340/371472 [7:52:24<11:40:46, 3.31it/s] 63%|██████▎ | 232341/371472 [7:52:24<11:53:11, 3.25it/s] 63%|██████▎ | 232342/371472 [7:52:24<11:39:36, 3.31it/s] 63%|██████▎ | 232343/371472 [7:52:25<11:25:16, 3.38it/s] 63%|██████▎ | 232344/371472 [7:52:25<11:58:06, 3.23it/s] 63%|██████▎ | 232345/371472 [7:52:25<11:41:08, 3.31it/s] 63%|██████▎ | 232346/371472 [7:52:26<11:56:34, 3.24it/s] 63%|██████▎ | 232347/371472 [7:52:26<11:53:07, 3.25it/s] 63%|██████▎ | 232348/371472 [7:52:26<11:27:15, 3.37it/s] 63%|██████▎ | 232349/371472 [7:52:27<11:43:34, 3.30it/s] 63%|██████▎ | 232350/371472 [7:52:27<11:32:03, 3.35it/s] 63%|██████▎ | 232351/371472 [7:52:27<11:14:57, 3.44it/s] 63%|██████▎ | 232352/371472 [7:52:27<11:27:30, 3.37it/s] 63%|██████▎ | 232353/371472 [7:52:28<11:21:36, 3.40it/s] 63%|██████▎ | 232354/371472 [7:52:28<11:05:34, 3.48it/s] 63%|██████▎ | 232355/371472 [7:52:28<11:05:37, 3.48it/s] 63%|██████▎ | 232356/371472 [7:52:29<11:00:44, 3.51it/s] 63%|██████▎ | 232357/371472 [7:52:29<11:57:11, 3.23it/s] 63%|██████▎ | 232358/371472 [7:52:29<11:26:54, 3.38it/s] 63%|██████▎ | 232359/371472 [7:52:29<11:23:12, 3.39it/s] 63%|██████▎ | 232360/371472 [7:52:30<11:15:28, 3.43it/s] {'loss': 2.7851, 'learning_rate': 4.3722122864099637e-07, 'epoch': 10.01} + 63%|██████▎ | 232360/371472 [7:52:30<11:15:28, 3.43it/s] 63%|██████▎ | 232361/371472 [7:52:30<11:16:49, 3.43it/s] 63%|██████▎ | 232362/371472 [7:52:30<11:05:21, 3.48it/s] 63%|██████▎ | 232363/371472 [7:52:31<11:15:19, 3.43it/s] 63%|██████▎ | 232364/371472 [7:52:31<11:17:18, 3.42it/s] 63%|██████▎ | 232365/371472 [7:52:31<11:25:45, 3.38it/s] 63%|██████▎ | 232366/371472 [7:52:32<11:15:21, 3.43it/s] 63%|██████▎ | 232367/371472 [7:52:32<11:10:29, 3.46it/s] 63%|██████▎ | 232368/371472 [7:52:32<11:22:40, 3.40it/s] 63%|██████▎ | 232369/371472 [7:52:32<11:08:50, 3.47it/s] 63%|██████▎ | 232370/371472 [7:52:33<11:38:53, 3.32it/s] 63%|██████▎ | 232371/371472 [7:52:33<11:29:06, 3.36it/s] 63%|██████▎ | 232372/371472 [7:52:33<11:34:44, 3.34it/s] 63%|██████▎ | 232373/371472 [7:52:34<11:53:57, 3.25it/s] 63%|██████▎ | 232374/371472 [7:52:34<11:53:58, 3.25it/s] 63%|██████▎ | 232375/371472 [7:52:34<11:43:26, 3.30it/s] 63%|██████▎ | 232376/371472 [7:52:35<11:59:34, 3.22it/s] 63%|██████▎ | 232377/371472 [7:52:35<11:41:36, 3.30it/s] 63%|██████▎ | 232378/371472 [7:52:35<11:40:50, 3.31it/s] 63%|██████▎ | 232379/371472 [7:52:35<11:35:28, 3.33it/s] 63%|██████▎ | 232380/371472 [7:52:36<11:20:31, 3.41it/s] {'loss': 2.7284, 'learning_rate': 4.3717274666551744e-07, 'epoch': 10.01} + 63%|██████▎ | 232380/371472 [7:52:36<11:20:31, 3.41it/s] 63%|██████▎ | 232381/371472 [7:52:36<11:24:39, 3.39it/s] 63%|██████▎ | 232382/371472 [7:52:36<12:03:04, 3.21it/s] 63%|██████▎ | 232383/371472 [7:52:37<11:48:09, 3.27it/s] 63%|██████▎ | 232384/371472 [7:52:37<12:26:22, 3.11it/s] 63%|██████▎ | 232385/371472 [7:52:37<12:44:24, 3.03it/s] 63%|██████▎ | 232386/371472 [7:52:38<12:26:26, 3.11it/s] 63%|██████▎ | 232387/371472 [7:52:38<12:07:31, 3.19it/s] 63%|██████▎ | 232388/371472 [7:52:38<13:06:20, 2.95it/s] 63%|██████▎ | 232389/371472 [7:52:39<12:29:37, 3.09it/s] 63%|██████▎ | 232390/371472 [7:52:39<11:56:27, 3.24it/s] 63%|██████▎ | 232391/371472 [7:52:39<11:52:58, 3.25it/s] 63%|██████▎ | 232392/371472 [7:52:40<11:50:24, 3.26it/s] 63%|██████▎ | 232393/371472 [7:52:40<11:21:14, 3.40it/s] 63%|██████▎ | 232394/371472 [7:52:40<12:16:16, 3.15it/s] 63%|██████▎ | 232395/371472 [7:52:40<12:04:55, 3.20it/s] 63%|██████▎ | 232396/371472 [7:52:41<11:48:29, 3.27it/s] 63%|██████▎ | 232397/371472 [7:52:41<11:27:36, 3.37it/s] 63%|██████▎ | 232398/371472 [7:52:41<11:45:31, 3.29it/s] 63%|██████▎ | 232399/371472 [7:52:42<12:11:10, 3.17it/s] 63%|██████▎ | 232400/371472 [7:52:42<12:47:14, 3.02it/s] {'loss': 2.6129, 'learning_rate': 4.371242646900385e-07, 'epoch': 10.01} + 63%|██████▎ | 232400/371472 [7:52:42<12:47:14, 3.02it/s] 63%|██████▎ | 232401/371472 [7:52:42<12:38:30, 3.06it/s] 63%|██████▎ | 232402/371472 [7:52:43<12:48:57, 3.01it/s] 63%|██████▎ | 232403/371472 [7:52:43<13:35:15, 2.84it/s] 63%|██████▎ | 232404/371472 [7:52:43<13:28:54, 2.87it/s] 63%|██████▎ | 232405/371472 [7:52:44<13:21:52, 2.89it/s] 63%|██████▎ | 232406/371472 [7:52:44<12:57:09, 2.98it/s] 63%|██████▎ | 232407/371472 [7:52:44<12:46:53, 3.02it/s] 63%|██████▎ | 232408/371472 [7:52:45<12:52:02, 3.00it/s] 63%|██████▎ | 232409/371472 [7:52:45<12:44:11, 3.03it/s] 63%|██████▎ | 232410/371472 [7:52:45<12:31:37, 3.08it/s] 63%|██████▎ | 232411/371472 [7:52:46<12:12:05, 3.17it/s] 63%|██████▎ | 232412/371472 [7:52:46<12:25:07, 3.11it/s] 63%|██████▎ | 232413/371472 [7:52:46<12:28:17, 3.10it/s] 63%|██████▎ | 232414/371472 [7:52:47<12:03:03, 3.21it/s] 63%|██████▎ | 232415/371472 [7:52:47<11:43:24, 3.29it/s] 63%|██████▎ | 232416/371472 [7:52:47<11:22:14, 3.40it/s] 63%|██████▎ | 232417/371472 [7:52:48<12:03:47, 3.20it/s] 63%|██████▎ | 232418/371472 [7:52:48<12:07:01, 3.19it/s] 63%|██████▎ | 232419/371472 [7:52:48<12:37:05, 3.06it/s] 63%|██████▎ | 232420/371472 [7:52:49<12:16:47, 3.15it/s] {'loss': 2.8324, 'learning_rate': 4.3707578271455963e-07, 'epoch': 10.01} + 63%|██████▎ | 232420/371472 [7:52:49<12:16:47, 3.15it/s] 63%|██████▎ | 232421/371472 [7:52:49<12:27:41, 3.10it/s] 63%|██████▎ | 232422/371472 [7:52:49<12:09:07, 3.18it/s] 63%|██████▎ | 232423/371472 [7:52:49<11:58:51, 3.22it/s] 63%|██████▎ | 232424/371472 [7:52:50<11:35:30, 3.33it/s] 63%|██████▎ | 232425/371472 [7:52:50<12:16:01, 3.15it/s] 63%|██████▎ | 232426/371472 [7:52:50<11:56:47, 3.23it/s] 63%|██████▎ | 232427/371472 [7:52:51<12:02:50, 3.21it/s] 63%|██████▎ | 232428/371472 [7:52:51<11:50:59, 3.26it/s] 63%|██████▎ | 232429/371472 [7:52:51<11:36:28, 3.33it/s] 63%|██████▎ | 232430/371472 [7:52:52<12:05:03, 3.20it/s] 63%|██████▎ | 232431/371472 [7:52:52<11:47:43, 3.27it/s] 63%|██████▎ | 232432/371472 [7:52:52<11:33:50, 3.34it/s] 63%|██████▎ | 232433/371472 [7:52:52<11:23:29, 3.39it/s] 63%|██████▎ | 232434/371472 [7:52:53<11:19:52, 3.41it/s] 63%|██████▎ | 232435/371472 [7:52:53<11:16:08, 3.43it/s] 63%|██████▎ | 232436/371472 [7:52:53<11:18:10, 3.42it/s] 63%|██████▎ | 232437/371472 [7:52:54<11:14:27, 3.44it/s] 63%|██████▎ | 232438/371472 [7:52:54<11:28:06, 3.37it/s] 63%|██████▎ | 232439/371472 [7:52:54<11:13:12, 3.44it/s] 63%|██████▎ | 232440/371472 [7:52:55<11:03:32, 3.49it/s] {'loss': 2.7013, 'learning_rate': 4.370273007390807e-07, 'epoch': 10.01} + 63%|██████▎ | 232440/371472 [7:52:55<11:03:32, 3.49it/s] 63%|██████▎ | 232441/371472 [7:52:55<11:54:11, 3.24it/s] 63%|██████▎ | 232442/371472 [7:52:55<12:04:15, 3.20it/s] 63%|██████▎ | 232443/371472 [7:52:56<12:33:25, 3.08it/s] 63%|██████▎ | 232444/371472 [7:52:56<12:26:53, 3.10it/s] 63%|██████▎ | 232445/371472 [7:52:56<12:57:23, 2.98it/s] 63%|██████▎ | 232446/371472 [7:52:57<12:23:26, 3.12it/s] 63%|██████▎ | 232447/371472 [7:52:57<12:16:44, 3.15it/s] 63%|██████▎ | 232448/371472 [7:52:57<12:19:01, 3.14it/s] 63%|██████▎ | 232449/371472 [7:52:58<13:06:22, 2.95it/s] 63%|██████▎ | 232450/371472 [7:52:58<12:38:54, 3.05it/s] 63%|██████▎ | 232451/371472 [7:52:58<12:19:18, 3.13it/s] 63%|██████▎ | 232452/371472 [7:52:58<12:13:57, 3.16it/s] 63%|██████▎ | 232453/371472 [7:52:59<11:59:05, 3.22it/s] 63%|██████▎ | 232454/371472 [7:52:59<11:42:13, 3.30it/s] 63%|██████▎ | 232455/371472 [7:52:59<11:27:38, 3.37it/s] 63%|██████▎ | 232456/371472 [7:53:00<11:30:52, 3.35it/s] 63%|██████▎ | 232457/371472 [7:53:00<11:43:07, 3.30it/s] 63%|██████▎ | 232458/371472 [7:53:00<11:30:41, 3.35it/s] 63%|██████▎ | 232459/371472 [7:53:01<11:34:12, 3.34it/s] 63%|██████▎ | 232460/371472 [7:53:01<11:27:59, 3.37it/s] {'loss': 2.7624, 'learning_rate': 4.369788187636019e-07, 'epoch': 10.01} + 63%|██████▎ | 232460/371472 [7:53:01<11:27:59, 3.37it/s] 63%|██████▎ | 232461/371472 [7:53:01<12:22:35, 3.12it/s] 63%|██████▎ | 232462/371472 [7:53:02<12:22:13, 3.12it/s] 63%|██████▎ | 232463/371472 [7:53:02<12:01:43, 3.21it/s] 63%|██████▎ | 232464/371472 [7:53:02<11:41:17, 3.30it/s] 63%|██████▎ | 232465/371472 [7:53:02<11:26:12, 3.38it/s] 63%|██████▎ | 232466/371472 [7:53:03<11:57:43, 3.23it/s] 63%|██████▎ | 232467/371472 [7:53:03<11:45:29, 3.28it/s] 63%|██████▎ | 232468/371472 [7:53:03<11:47:13, 3.28it/s] 63%|██████▎ | 232469/371472 [7:53:04<12:21:14, 3.13it/s] 63%|██████▎ | 232470/371472 [7:53:04<12:02:00, 3.21it/s] 63%|██████▎ | 232471/371472 [7:53:04<11:24:53, 3.38it/s] 63%|██████▎ | 232472/371472 [7:53:05<12:00:04, 3.22it/s] 63%|██████▎ | 232473/371472 [7:53:05<11:45:48, 3.28it/s] 63%|██████▎ | 232474/371472 [7:53:05<11:16:23, 3.42it/s] 63%|██████▎ | 232475/371472 [7:53:05<11:50:17, 3.26it/s] 63%|█████���▎ | 232476/371472 [7:53:06<11:47:44, 3.27it/s] 63%|██████▎ | 232477/371472 [7:53:06<12:06:36, 3.19it/s] 63%|██████▎ | 232478/371472 [7:53:06<11:49:22, 3.27it/s] 63%|██████▎ | 232479/371472 [7:53:07<11:41:58, 3.30it/s] 63%|██████▎ | 232480/371472 [7:53:07<11:57:00, 3.23it/s] {'loss': 2.7084, 'learning_rate': 4.3693033678812295e-07, 'epoch': 10.01} + 63%|██████▎ | 232480/371472 [7:53:07<11:57:00, 3.23it/s] 63%|██████▎ | 232481/371472 [7:53:07<11:35:24, 3.33it/s] 63%|██████▎ | 232482/371472 [7:53:08<11:34:48, 3.33it/s] 63%|██████▎ | 232483/371472 [7:53:08<11:22:53, 3.39it/s] 63%|██████▎ | 232484/371472 [7:53:08<11:26:17, 3.38it/s] 63%|██████▎ | 232485/371472 [7:53:08<10:57:05, 3.53it/s] 63%|██████▎ | 232486/371472 [7:53:09<10:53:11, 3.55it/s] 63%|██████▎ | 232487/371472 [7:53:09<10:55:43, 3.53it/s] 63%|██████▎ | 232488/371472 [7:53:09<11:04:53, 3.48it/s] 63%|██████▎ | 232489/371472 [7:53:10<11:36:12, 3.33it/s] 63%|██████▎ | 232490/371472 [7:53:10<11:36:19, 3.33it/s] 63%|██████▎ | 232491/371472 [7:53:10<11:36:10, 3.33it/s] 63%|██████▎ | 232492/371472 [7:53:10<11:29:28, 3.36it/s] 63%|██████▎ | 232493/371472 [7:53:11<11:49:38, 3.26it/s] 63%|██████▎ | 232494/371472 [7:53:11<11:28:11, 3.37it/s] 63%|██████▎ | 232495/371472 [7:53:11<11:20:34, 3.40it/s] 63%|██████▎ | 232496/371472 [7:53:12<11:18:53, 3.41it/s] 63%|██████▎ | 232497/371472 [7:53:12<11:57:38, 3.23it/s] 63%|██████▎ | 232498/371472 [7:53:12<11:54:49, 3.24it/s] 63%|██████▎ | 232499/371472 [7:53:13<11:58:52, 3.22it/s] 63%|██████▎ | 232500/371472 [7:53:13<12:55:00, 2.99it/s] {'loss': 2.6958, 'learning_rate': 4.368818548126441e-07, 'epoch': 10.01} + 63%|██████▎ | 232500/371472 [7:53:13<12:55:00, 2.99it/s] 63%|██████▎ | 232501/371472 [7:53:13<13:02:06, 2.96it/s] 63%|██████▎ | 232502/371472 [7:53:14<13:25:24, 2.88it/s] 63%|██████▎ | 232503/371472 [7:53:14<13:08:59, 2.94it/s] 63%|██████▎ | 232504/371472 [7:53:14<13:31:17, 2.85it/s] 63%|██████▎ | 232505/371472 [7:53:15<12:47:43, 3.02it/s] 63%|██████▎ | 232506/371472 [7:53:15<16:15:38, 2.37it/s] 63%|██████▎ | 232507/371472 [7:53:16<15:39:51, 2.46it/s] 63%|██████▎ | 232508/371472 [7:53:16<14:33:54, 2.65it/s] 63%|██████▎ | 232509/371472 [7:53:16<14:15:13, 2.71it/s] 63%|██████▎ | 232510/371472 [7:53:17<13:23:51, 2.88it/s] 63%|██████▎ | 232511/371472 [7:53:17<12:46:34, 3.02it/s] 63%|██████▎ | 232512/371472 [7:53:17<12:21:23, 3.12it/s] 63%|██████▎ | 232513/371472 [7:53:18<12:20:13, 3.13it/s] 63%|██████▎ | 232514/371472 [7:53:18<11:58:32, 3.22it/s] 63%|██████▎ | 232515/371472 [7:53:18<11:47:19, 3.27it/s] 63%|██████▎ | 232516/371472 [7:53:18<11:20:34, 3.40it/s] 63%|██████▎ | 232517/371472 [7:53:19<11:33:25, 3.34it/s] 63%|██████▎ | 232518/371472 [7:53:19<12:04:13, 3.20it/s] 63%|██████▎ | 232519/371472 [7:53:19<11:47:26, 3.27it/s] 63%|██████▎ | 232520/371472 [7:53:20<11:37:19, 3.32it/s] {'loss': 2.7841, 'learning_rate': 4.3683337283716515e-07, 'epoch': 10.02} + 63%|██████▎ | 232520/371472 [7:53:20<11:37:19, 3.32it/s] 63%|██████▎ | 232521/371472 [7:53:20<11:36:31, 3.32it/s] 63%|██████▎ | 232522/371472 [7:53:20<12:53:43, 2.99it/s] 63%|██████▎ | 232523/371472 [7:53:21<13:05:59, 2.95it/s] 63%|██████▎ | 232524/371472 [7:53:21<12:32:33, 3.08it/s] 63%|██████▎ | 232525/371472 [7:53:21<12:28:29, 3.09it/s] 63%|██████▎ | 232526/371472 [7:53:22<11:54:46, 3.24it/s] 63%|██████▎ | 232527/371472 [7:53:22<11:45:01, 3.28it/s] 63%|██████▎ | 232528/371472 [7:53:22<12:04:09, 3.20it/s] 63%|██████▎ | 232529/371472 [7:53:23<12:03:06, 3.20it/s] 63%|██████▎ | 232530/371472 [7:53:23<12:04:15, 3.20it/s] 63%|██████▎ | 232531/371472 [7:53:23<12:10:09, 3.17it/s] 63%|██████▎ | 232532/371472 [7:53:24<14:11:13, 2.72it/s] 63%|██████▎ | 232533/371472 [7:53:24<13:29:42, 2.86it/s] 63%|██████▎ | 232534/371472 [7:53:24<13:00:17, 2.97it/s] 63%|██████▎ | 232535/371472 [7:53:25<12:38:15, 3.05it/s] 63%|���█████▎ | 232536/371472 [7:53:25<12:06:54, 3.19it/s] 63%|██████▎ | 232537/371472 [7:53:25<12:54:48, 2.99it/s] 63%|██████▎ | 232538/371472 [7:53:26<12:36:45, 3.06it/s] 63%|██████▎ | 232539/371472 [7:53:26<12:21:57, 3.12it/s] 63%|██████▎ | 232540/371472 [7:53:26<12:01:58, 3.21it/s] {'loss': 2.7964, 'learning_rate': 4.3678489086168633e-07, 'epoch': 10.02} + 63%|██████▎ | 232540/371472 [7:53:26<12:01:58, 3.21it/s] 63%|██████▎ | 232541/371472 [7:53:26<11:39:00, 3.31it/s] 63%|██████▎ | 232542/371472 [7:53:27<12:04:26, 3.20it/s] 63%|██████▎ | 232543/371472 [7:53:27<12:16:12, 3.15it/s] 63%|██████▎ | 232544/371472 [7:53:27<12:51:47, 3.00it/s] 63%|██████▎ | 232545/371472 [7:53:28<12:54:23, 2.99it/s] 63%|██████▎ | 232546/371472 [7:53:28<12:39:56, 3.05it/s] 63%|██████▎ | 232547/371472 [7:53:28<12:32:04, 3.08it/s] 63%|██████▎ | 232548/371472 [7:53:29<12:34:54, 3.07it/s] 63%|██████▎ | 232549/371472 [7:53:29<12:43:55, 3.03it/s] 63%|██████▎ | 232550/371472 [7:53:29<12:14:59, 3.15it/s] 63%|██████▎ | 232551/371472 [7:53:30<12:24:00, 3.11it/s] 63%|██████▎ | 232552/371472 [7:53:30<12:12:02, 3.16it/s] 63%|██████▎ | 232553/371472 [7:53:30<11:50:01, 3.26it/s] 63%|██████▎ | 232554/371472 [7:53:31<12:42:59, 3.03it/s] 63%|██████▎ | 232555/371472 [7:53:31<12:24:16, 3.11it/s] 63%|██████▎ | 232556/371472 [7:53:31<12:16:03, 3.15it/s] 63%|██████▎ | 232557/371472 [7:53:32<12:12:00, 3.16it/s] 63%|██████▎ | 232558/371472 [7:53:32<11:40:11, 3.31it/s] 63%|██████▎ | 232559/371472 [7:53:32<11:41:15, 3.30it/s] 63%|██████▎ | 232560/371472 [7:53:32<11:23:42, 3.39it/s] {'loss': 2.7828, 'learning_rate': 4.3673640888620735e-07, 'epoch': 10.02} + 63%|██████▎ | 232560/371472 [7:53:32<11:23:42, 3.39it/s] 63%|██████▎ | 232561/371472 [7:53:33<11:25:57, 3.38it/s] 63%|██████▎ | 232562/371472 [7:53:33<11:20:21, 3.40it/s] 63%|██████▎ | 232563/371472 [7:53:33<11:06:23, 3.47it/s] 63%|██████▎ | 232564/371472 [7:53:34<11:09:33, 3.46it/s] 63%|██████▎ | 232565/371472 [7:53:34<11:26:54, 3.37it/s] 63%|██████▎ | 232566/371472 [7:53:34<11:48:47, 3.27it/s] 63%|██████▎ | 232567/371472 [7:53:35<11:44:27, 3.29it/s] 63%|██████▎ | 232568/371472 [7:53:35<11:27:54, 3.37it/s] 63%|██████▎ | 232569/371472 [7:53:35<11:13:34, 3.44it/s] 63%|██████▎ | 232570/371472 [7:53:35<11:42:13, 3.30it/s] 63%|██████▎ | 232571/371472 [7:53:36<11:34:48, 3.33it/s] 63%|██████▎ | 232572/371472 [7:53:36<11:50:55, 3.26it/s] 63%|██████▎ | 232573/371472 [7:53:36<12:06:28, 3.19it/s] 63%|██████▎ | 232574/371472 [7:53:37<11:55:35, 3.24it/s] 63%|██████▎ | 232575/371472 [7:53:37<12:05:06, 3.19it/s] 63%|██████▎ | 232576/371472 [7:53:37<12:23:29, 3.11it/s] 63%|██████▎ | 232577/371472 [7:53:38<12:54:40, 2.99it/s] 63%|██████▎ | 232578/371472 [7:53:38<13:14:14, 2.91it/s] 63%|██████▎ | 232579/371472 [7:53:38<12:50:33, 3.00it/s] 63%|██████▎ | 232580/371472 [7:53:39<12:07:23, 3.18it/s] {'loss': 2.8251, 'learning_rate': 4.366879269107285e-07, 'epoch': 10.02} + 63%|██████▎ | 232580/371472 [7:53:39<12:07:23, 3.18it/s] 63%|██████▎ | 232581/371472 [7:53:39<11:43:21, 3.29it/s] 63%|██████▎ | 232582/371472 [7:53:39<11:35:47, 3.33it/s] 63%|██████▎ | 232583/371472 [7:53:40<11:39:11, 3.31it/s] 63%|██████▎ | 232584/371472 [7:53:40<11:53:24, 3.24it/s] 63%|██████▎ | 232585/371472 [7:53:40<12:34:06, 3.07it/s] 63%|██████▎ | 232586/371472 [7:53:41<12:12:10, 3.16it/s] 63%|██████▎ | 232587/371472 [7:53:41<11:59:46, 3.22it/s] 63%|██████▎ | 232588/371472 [7:53:41<11:46:12, 3.28it/s] 63%|██████▎ | 232589/371472 [7:53:41<12:01:05, 3.21it/s] 63%|██████▎ | 232590/371472 [7:53:42<11:28:30, 3.36it/s] 63%|██████▎ | 232591/371472 [7:53:42<11:47:30, 3.27it/s] 63%|██████▎ | 232592/371472 [7:53:42<11:35:14, 3.33it/s] 63%|██████▎ | 232593/371472 [7:53:43<11:28:33, 3.36it/s] 63%|██████▎ | 232594/371472 [7:53:43<11:38:02, 3.32it/s] 63%|██████▎ | 232595/371472 [7:53:43<11:37:01, 3.32it/s] 63%|██████▎ | 232596/371472 [7:53:44<12:07:25, 3.18it/s] 63%|██████▎ | 232597/371472 [7:53:44<12:06:14, 3.19it/s] 63%|██████▎ | 232598/371472 [7:53:44<12:20:47, 3.12it/s] 63%|██████▎ | 232599/371472 [7:53:45<12:53:37, 2.99it/s] 63%|██████▎ | 232600/371472 [7:53:45<13:05:48, 2.95it/s] {'loss': 2.6227, 'learning_rate': 4.3663944493524954e-07, 'epoch': 10.02} + 63%|██████▎ | 232600/371472 [7:53:45<13:05:48, 2.95it/s] 63%|██████▎ | 232601/371472 [7:53:45<13:00:44, 2.96it/s] 63%|██████▎ | 232602/371472 [7:53:46<12:24:40, 3.11it/s] 63%|██████▎ | 232603/371472 [7:53:46<12:03:00, 3.20it/s] 63%|██████▎ | 232604/371472 [7:53:46<11:48:36, 3.27it/s] 63%|██████▎ | 232605/371472 [7:53:46<11:27:37, 3.37it/s] 63%|██████▎ | 232606/371472 [7:53:47<11:14:27, 3.43it/s] 63%|██████▎ | 232607/371472 [7:53:47<11:24:03, 3.38it/s] 63%|██████▎ | 232608/371472 [7:53:47<11:20:25, 3.40it/s] 63%|██████▎ | 232609/371472 [7:53:48<12:06:34, 3.19it/s] 63%|██████▎ | 232610/371472 [7:53:48<11:52:36, 3.25it/s] 63%|██████▎ | 232611/371472 [7:53:48<13:33:43, 2.84it/s] 63%|██████▎ | 232612/371472 [7:53:49<12:47:56, 3.01it/s] 63%|██████▎ | 232613/371472 [7:53:49<12:23:20, 3.11it/s] 63%|██████▎ | 232614/371472 [7:53:49<12:29:20, 3.09it/s] 63%|██████▎ | 232615/371472 [7:53:50<12:22:14, 3.12it/s] 63%|██████▎ | 232616/371472 [7:53:50<12:21:21, 3.12it/s] 63%|██████▎ | 232617/371472 [7:53:50<12:03:49, 3.20it/s] 63%|██████▎ | 232618/371472 [7:53:51<11:31:29, 3.35it/s] 63%|██████▎ | 232619/371472 [7:53:51<11:40:23, 3.30it/s] 63%|██████▎ | 232620/371472 [7:53:51<12:03:28, 3.20it/s] {'loss': 2.6491, 'learning_rate': 4.365909629597707e-07, 'epoch': 10.02} + 63%|██████▎ | 232620/371472 [7:53:51<12:03:28, 3.20it/s] 63%|██████▎ | 232621/371472 [7:53:52<12:14:46, 3.15it/s] 63%|██████▎ | 232622/371472 [7:53:52<12:45:04, 3.02it/s] 63%|██████▎ | 232623/371472 [7:53:52<12:41:32, 3.04it/s] 63%|██████▎ | 232624/371472 [7:53:53<12:39:02, 3.05it/s] 63%|██████▎ | 232625/371472 [7:53:53<12:29:13, 3.09it/s] 63%|██████▎ | 232626/371472 [7:53:53<12:20:19, 3.13it/s] 63%|██████▎ | 232627/371472 [7:53:53<12:01:11, 3.21it/s] 63%|██████▎ | 232628/371472 [7:53:54<11:47:29, 3.27it/s] 63%|██████▎ | 232629/371472 [7:53:54<11:33:27, 3.34it/s] 63%|██████▎ | 232630/371472 [7:53:54<11:35:59, 3.32it/s] 63%|██████▎ | 232631/371472 [7:53:55<11:45:42, 3.28it/s] 63%|██████▎ | 232632/371472 [7:53:55<12:01:50, 3.21it/s] 63%|██████▎ | 232633/371472 [7:53:55<11:59:45, 3.21it/s] 63%|██████▎ | 232634/371472 [7:53:56<11:32:28, 3.34it/s] 63%|██████▎ | 232635/371472 [7:53:56<11:18:19, 3.41it/s] 63%|██████▎ | 232636/371472 [7:53:56<11:20:27, 3.40it/s] 63%|██████▎ | 232637/371472 [7:53:56<11:26:57, 3.37it/s] 63%|██████▎ | 232638/371472 [7:53:57<11:38:13, 3.31it/s] 63%|██████▎ | 232639/371472 [7:53:57<11:26:15, 3.37it/s] 63%|██████▎ | 232640/371472 [7:53:57<11:39:35, 3.31it/s] {'loss': 2.7686, 'learning_rate': 4.365424809842918e-07, 'epoch': 10.02} + 63%|██████▎ | 232640/371472 [7:53:57<11:39:35, 3.31it/s] 63%|██████▎ | 232641/371472 [7:53:58<12:09:11, 3.17it/s] 63%|██████▎ | 232642/371472 [7:53:58<12:03:34, 3.20it/s] 63%|██████▎ | 232643/371472 [7:53:58<11:33:57, 3.33it/s] 63%|██████▎ | 232644/371472 [7:53:59<14:45:36, 2.61it/s] 63%|██████▎ | 232645/371472 [7:53:59<13:34:25, 2.84it/s] 63%|██████▎ | 232646/371472 [7:53:59<12:54:25, 2.99it/s] 63%|██████▎ | 232647/371472 [7:54:00<12:17:18, 3.14it/s] 63%|██████▎ | 232648/371472 [7:54:00<11:44:40, 3.28it/s] 63%|██████▎ | 232649/371472 [7:54:00<11:40:30, 3.30it/s] 63%|██████▎ | 232650/371472 [7:54:01<12:01:02, 3.21it/s] 63%|██████▎ | 232651/371472 [7:54:01<11:41:27, 3.30it/s] 63%|██████▎ | 232652/371472 [7:54:01<11:23:14, 3.39it/s] 63%|██████▎ | 232653/371472 [7:54:01<11:16:18, 3.42it/s] 63%|██████▎ | 232654/371472 [7:54:02<11:19:58, 3.40it/s] 63%|██████▎ | 232655/371472 [7:54:02<11:26:59, 3.37it/s] 63%|██████▎ | 232656/371472 [7:54:02<12:45:43, 3.02it/s] 63%|██████▎ | 232657/371472 [7:54:03<12:07:02, 3.18it/s] 63%|██████▎ | 232658/371472 [7:54:03<13:26:55, 2.87it/s] 63%|██████▎ | 232659/371472 [7:54:03<13:13:08, 2.92it/s] 63%|██████▎ | 232660/371472 [7:54:04<12:30:32, 3.08it/s] {'loss': 2.7209, 'learning_rate': 4.3649399900881297e-07, 'epoch': 10.02} + 63%|██████▎ | 232660/371472 [7:54:04<12:30:32, 3.08it/s] 63%|██████▎ | 232661/371472 [7:54:04<12:15:35, 3.15it/s] 63%|██████▎ | 232662/371472 [7:54:04<12:14:59, 3.15it/s] 63%|██████▎ | 232663/371472 [7:54:05<11:44:02, 3.29it/s] 63%|██████▎ | 232664/371472 [7:54:05<11:23:00, 3.39it/s] 63%|██████▎ | 232665/371472 [7:54:05<11:13:26, 3.44it/s] 63%|██████▎ | 232666/371472 [7:54:06<11:17:15, 3.42it/s] 63%|██████▎ | 232667/371472 [7:54:06<11:05:57, 3.47it/s] 63%|██████▎ | 232668/371472 [7:54:06<11:07:24, 3.47it/s] 63%|██████▎ | 232669/371472 [7:54:06<11:20:34, 3.40it/s] 63%|██████▎ | 232670/371472 [7:54:07<11:04:31, 3.48it/s] 63%|██████▎ | 232671/371472 [7:54:07<11:02:36, 3.49it/s] 63%|██████▎ | 232672/371472 [7:54:07<12:04:26, 3.19it/s] 63%|██████▎ | 232673/371472 [7:54:08<12:33:02, 3.07it/s] 63%|██████▎ | 232674/371472 [7:54:08<12:06:45, 3.18it/s] 63%|██████▎ | 232675/371472 [7:54:08<12:11:46, 3.16it/s] 63%|██████▎ | 232676/371472 [7:54:09<11:37:27, 3.32it/s] 63%|██████▎ | 232677/371472 [7:54:09<11:30:30, 3.35it/s] 63%|██████▎ | 232678/371472 [7:54:09<11:03:10, 3.49it/s] 63%|██████▎ | 232679/371472 [7:54:09<10:53:37, 3.54it/s] 63%|██████▎ | 232680/371472 [7:54:10<10:39:48, 3.62it/s] {'loss': 2.9373, 'learning_rate': 4.36445517033334e-07, 'epoch': 10.02} + 63%|██████▎ | 232680/371472 [7:54:10<10:39:48, 3.62it/s] 63%|██████▎ | 232681/371472 [7:54:10<11:28:33, 3.36it/s] 63%|██████▎ | 232682/371472 [7:54:10<11:32:26, 3.34it/s] 63%|██████▎ | 232683/371472 [7:54:11<11:38:08, 3.31it/s] 63%|██████▎ | 232684/371472 [7:54:11<12:19:16, 3.13it/s] 63%|██████▎ | 232685/371472 [7:54:11<11:49:59, 3.26it/s] 63%|██████▎ | 232686/371472 [7:54:11<11:26:45, 3.37it/s] 63%|██████▎ | 232687/371472 [7:54:12<11:17:53, 3.41it/s] 63%|██████▎ | 232688/371472 [7:54:12<11:11:38, 3.44it/s] 63%|██████▎ | 232689/371472 [7:54:12<11:13:47, 3.43it/s] 63%|██████▎ | 232690/371472 [7:54:13<11:22:18, 3.39it/s] 63%|██████▎ | 232691/371472 [7:54:13<11:12:31, 3.44it/s] 63%|██████▎ | 232692/371472 [7:54:13<11:10:04, 3.45it/s] 63%|██████▎ | 232693/371472 [7:54:14<10:58:31, 3.51it/s] 63%|██████▎ | 232694/371472 [7:54:14<11:07:59, 3.46it/s] 63%|██████▎ | 232695/371472 [7:54:14<11:01:11, 3.50it/s] 63%|██████▎ | 232696/371472 [7:54:14<11:16:41, 3.42it/s] 63%|██████▎ | 232697/371472 [7:54:15<11:24:14, 3.38it/s] 63%|██████▎ | 232698/371472 [7:54:15<11:30:08, 3.35it/s] 63%|██████▎ | 232699/371472 [7:54:15<12:29:58, 3.08it/s] 63%|██████▎ | 232700/371472 [7:54:16<11:58:19, 3.22it/s] {'loss': 2.7911, 'learning_rate': 4.3639703505785516e-07, 'epoch': 10.02} + 63%|██████▎ | 232700/371472 [7:54:16<11:58:19, 3.22it/s] 63%|██████▎ | 232701/371472 [7:54:16<12:20:06, 3.13it/s] 63%|██████▎ | 232702/371472 [7:54:16<12:29:45, 3.08it/s] 63%|██████▎ | 232703/371472 [7:54:17<12:07:27, 3.18it/s] 63%|██████▎ | 232704/371472 [7:54:17<11:43:23, 3.29it/s] 63%|██████▎ | 232705/371472 [7:54:17<11:36:23, 3.32it/s] 63%|██████▎ | 232706/371472 [7:54:18<11:50:12, 3.26it/s] 63%|██████▎ | 232707/371472 [7:54:18<11:55:37, 3.23it/s] 63%|██████▎ | 232708/371472 [7:54:18<11:35:09, 3.33it/s] 63%|██████▎ | 232709/371472 [7:54:18<11:28:15, 3.36it/s] 63%|██████▎ | 232710/371472 [7:54:19<11:11:01, 3.45it/s] 63%|██████▎ | 232711/371472 [7:54:19<11:32:04, 3.34it/s] 63%|██████▎ | 232712/371472 [7:54:19<11:36:31, 3.32it/s] 63%|██████▎ | 232713/371472 [7:54:20<12:00:13, 3.21it/s] 63%|██████▎ | 232714/371472 [7:54:20<11:38:06, 3.31it/s] 63%|██████▎ | 232715/371472 [7:54:20<11:32:14, 3.34it/s] 63%|██████▎ | 232716/371472 [7:54:21<11:35:24, 3.33it/s] 63%|██████▎ | 232717/371472 [7:54:21<11:42:39, 3.29it/s] 63%|██████▎ | 232718/371472 [7:54:21<11:31:27, 3.34it/s] 63%|██████▎ | 232719/371472 [7:54:21<11:29:39, 3.35it/s] 63%|██████▎ | 232720/371472 [7:54:22<11:07:40, 3.46it/s] {'loss': 2.8482, 'learning_rate': 4.3634855308237624e-07, 'epoch': 10.02} + 63%|██████▎ | 232720/371472 [7:54:22<11:07:40, 3.46it/s] 63%|██████▎ | 232721/371472 [7:54:22<11:36:38, 3.32it/s] 63%|██████▎ | 232722/371472 [7:54:22<11:25:10, 3.38it/s] 63%|██████▎ | 232723/371472 [7:54:23<11:15:22, 3.42it/s] 63%|██████▎ | 232724/371472 [7:54:23<11:43:29, 3.29it/s] 63%|██████▎ | 232725/371472 [7:54:23<12:17:45, 3.13it/s] 63%|██████▎ | 232726/371472 [7:54:24<12:27:09, 3.09it/s] 63%|██████▎ | 232727/371472 [7:54:24<12:22:10, 3.12it/s] 63%|██████▎ | 232728/371472 [7:54:24<12:15:41, 3.14it/s] 63%|██████▎ | 232729/371472 [7:54:25<12:12:59, 3.15it/s] 63%|██████▎ | 232730/371472 [7:54:25<12:39:32, 3.04it/s] 63%|██████▎ | 232731/371472 [7:54:25<12:28:26, 3.09it/s] 63%|██████▎ | 232732/371472 [7:54:25<12:07:32, 3.18it/s] 63%|██████▎ | 232733/371472 [7:54:26<11:53:38, 3.24it/s] 63%|██████▎ | 232734/371472 [7:54:26<13:00:57, 2.96it/s] 63%|██████▎ | 232735/371472 [7:54:26<12:19:10, 3.13it/s] 63%|██████▎ | 232736/371472 [7:54:27<11:56:22, 3.23it/s] 63%|██████▎ | 232737/371472 [7:54:27<12:17:08, 3.14it/s] 63%|██████▎ | 232738/371472 [7:54:27<12:02:46, 3.20it/s] 63%|██████▎ | 232739/371472 [7:54:28<12:15:59, 3.14it/s] 63%|██████▎ | 232740/371472 [7:54:28<11:55:02, 3.23it/s] {'loss': 2.675, 'learning_rate': 4.3630007110689736e-07, 'epoch': 10.02} + 63%|██████▎ | 232740/371472 [7:54:28<11:55:02, 3.23it/s] 63%|██████▎ | 232741/371472 [7:54:28<11:36:57, 3.32it/s] 63%|██████▎ | 232742/371472 [7:54:29<12:01:47, 3.20it/s] 63%|██████▎ | 232743/371472 [7:54:29<13:14:02, 2.91it/s] 63%|██████▎ | 232744/371472 [7:54:29<12:39:14, 3.05it/s] 63%|██████▎ | 232745/371472 [7:54:30<11:52:35, 3.24it/s] 63%|██████▎ | 232746/371472 [7:54:30<11:44:52, 3.28it/s] 63%|██████▎ | 232747/371472 [7:54:30<11:24:47, 3.38it/s] 63%|██████▎ | 232748/371472 [7:54:31<12:20:27, 3.12it/s] 63%|██████▎ | 232749/371472 [7:54:31<12:37:37, 3.05it/s] 63%|██████▎ | 232750/371472 [7:54:31<12:01:01, 3.21it/s] 63%|██████▎ | 232751/371472 [7:54:31<11:43:22, 3.29it/s] 63%|██████▎ | 232752/371472 [7:54:32<12:01:01, 3.21it/s] 63%|██████▎ | 232753/371472 [7:54:32<11:47:44, 3.27it/s] 63%|██████▎ | 232754/371472 [7:54:32<12:17:19, 3.14it/s] 63%|██████▎ | 232755/371472 [7:54:33<12:19:25, 3.13it/s] 63%|██████▎ | 232756/371472 [7:54:33<11:49:22, 3.26it/s] 63%|██████▎ | 232757/371472 [7:54:33<11:31:11, 3.34it/s] 63%|██████▎ | 232758/371472 [7:54:34<11:16:22, 3.42it/s] 63%|██████▎ | 232759/371472 [7:54:34<10:57:37, 3.52it/s] 63%|██████▎ | 232760/371472 [7:54:34<11:16:09, 3.42it/s] {'loss': 2.4819, 'learning_rate': 4.3625158913141843e-07, 'epoch': 10.03} + 63%|██████▎ | 232760/371472 [7:54:34<11:16:09, 3.42it/s] 63%|██████▎ | 232761/371472 [7:54:34<11:38:55, 3.31it/s] 63%|██████▎ | 232762/371472 [7:54:35<11:24:11, 3.38it/s] 63%|██████▎ | 232763/371472 [7:54:35<11:03:28, 3.48it/s] 63%|██████▎ | 232764/371472 [7:54:35<11:22:19, 3.39it/s] 63%|██████▎ | 232765/371472 [7:54:36<11:10:54, 3.45it/s] 63%|██████▎ | 232766/371472 [7:54:36<11:03:29, 3.48it/s] 63%|██████▎ | 232767/371472 [7:54:36<10:59:18, 3.51it/s] 63%|██████▎ | 232768/371472 [7:54:36<10:42:15, 3.60it/s] 63%|██████▎ | 232769/371472 [7:54:37<10:36:25, 3.63it/s] 63%|██████▎ | 232770/371472 [7:54:37<10:41:50, 3.60it/s] 63%|██████▎ | 232771/371472 [7:54:37<11:21:43, 3.39it/s] 63%|██████▎ | 232772/371472 [7:54:38<11:32:03, 3.34it/s] 63%|██████▎ | 232773/371472 [7:54:38<11:27:07, 3.36it/s] 63%|██████▎ | 232774/371472 [7:54:38<11:14:19, 3.43it/s] 63%|██████▎ | 232775/371472 [7:54:39<11:29:51, 3.35it/s] 63%|██████▎ | 232776/371472 [7:54:39<11:31:28, 3.34it/s] 63%|██████▎ | 232777/371472 [7:54:39<11:26:09, 3.37it/s] 63%|██████▎ | 232778/371472 [7:54:39<11:23:02, 3.38it/s] 63%|██████▎ | 232779/371472 [7:54:40<11:21:54, 3.39it/s] 63%|██████▎ | 232780/371472 [7:54:40<11:29:30, 3.35it/s] {'loss': 2.8888, 'learning_rate': 4.362031071559396e-07, 'epoch': 10.03} + 63%|██████▎ | 232780/371472 [7:54:40<11:29:30, 3.35it/s] 63%|██████▎ | 232781/371472 [7:54:40<11:13:51, 3.43it/s] 63%|██████▎ | 232782/371472 [7:54:41<11:08:29, 3.46it/s] 63%|██████▎ | 232783/371472 [7:54:41<11:02:07, 3.49it/s] 63%|██████▎ | 232784/371472 [7:54:41<11:11:08, 3.44it/s] 63%|██████▎ | 232785/371472 [7:54:42<12:27:59, 3.09it/s] 63%|██████▎ | 232786/371472 [7:54:42<12:08:43, 3.17it/s] 63%|██████▎ | 232787/371472 [7:54:42<11:50:41, 3.25it/s] 63%|██████▎ | 232788/371472 [7:54:42<11:26:07, 3.37it/s] 63%|██████▎ | 232789/371472 [7:54:43<11:29:09, 3.35it/s] 63%|██████▎ | 232790/371472 [7:54:43<11:19:00, 3.40it/s] 63%|██████▎ | 232791/371472 [7:54:43<11:23:41, 3.38it/s] 63%|██████▎ | 232792/371472 [7:54:44<11:28:26, 3.36it/s] 63%|██████▎ | 232793/371472 [7:54:44<11:15:34, 3.42it/s] 63%|██████▎ | 232794/371472 [7:54:44<11:12:42, 3.44it/s] 63%|██████▎ | 232795/371472 [7:54:44<11:09:25, 3.45it/s] 63%|██████▎ | 232796/371472 [7:54:45<11:21:23, 3.39it/s] 63%|██████▎ | 232797/371472 [7:54:45<11:25:34, 3.37it/s] 63%|██████▎ | 232798/371472 [7:54:45<11:43:22, 3.29it/s] 63%|██████▎ | 232799/371472 [7:54:46<12:33:12, 3.07it/s] 63%|██████▎ | 232800/371472 [7:54:46<12:16:38, 3.14it/s] {'loss': 2.7406, 'learning_rate': 4.361546251804607e-07, 'epoch': 10.03} + 63%|██████▎ | 232800/371472 [7:54:46<12:16:38, 3.14it/s] 63%|██████▎ | 232801/371472 [7:54:46<12:19:17, 3.13it/s] 63%|██████▎ | 232802/371472 [7:54:47<11:44:35, 3.28it/s] 63%|██████▎ | 232803/371472 [7:54:47<11:49:07, 3.26it/s] 63%|██████▎ | 232804/371472 [7:54:47<11:57:41, 3.22it/s] 63%|██████▎ | 232805/371472 [7:54:48<12:18:24, 3.13it/s] 63%|██████▎ | 232806/371472 [7:54:48<11:52:48, 3.24it/s] 63%|██████▎ | 232807/371472 [7:54:48<12:28:31, 3.09it/s] 63%|██████▎ | 232808/371472 [7:54:49<12:13:49, 3.15it/s] 63%|██████▎ | 232809/371472 [7:54:49<11:37:26, 3.31it/s] 63%|██████▎ | 232810/371472 [7:54:49<11:55:11, 3.23it/s] 63%|██████▎ | 232811/371472 [7:54:49<11:30:47, 3.35it/s] 63%|██████▎ | 232812/371472 [7:54:50<11:59:47, 3.21it/s] 63%|██████▎ | 232813/371472 [7:54:50<11:30:52, 3.35it/s] 63%|██████▎ | 232814/371472 [7:54:50<11:21:16, 3.39it/s] 63%|██████▎ | 232815/371472 [7:54:51<11:17:15, 3.41it/s] 63%|██████▎ | 232816/371472 [7:54:51<11:02:33, 3.49it/s] 63%|██████▎ | 232817/371472 [7:54:51<11:15:43, 3.42it/s] 63%|██████▎ | 232818/371472 [7:54:51<11:09:32, 3.45it/s] 63%|██████▎ | 232819/371472 [7:54:52<11:34:33, 3.33it/s] 63%|██████▎ | 232820/371472 [7:54:52<11:39:14, 3.30it/s] {'loss': 2.7427, 'learning_rate': 4.361061432049818e-07, 'epoch': 10.03} + 63%|██████▎ | 232820/371472 [7:54:52<11:39:14, 3.30it/s] 63%|██████▎ | 232821/371472 [7:54:52<11:17:25, 3.41it/s] 63%|██████▎ | 232822/371472 [7:54:53<11:19:19, 3.40it/s] 63%|██████▎ | 232823/371472 [7:54:53<11:25:39, 3.37it/s] 63%|██████▎ | 232824/371472 [7:54:53<11:49:23, 3.26it/s] 63%|██████▎ | 232825/371472 [7:54:54<11:29:14, 3.35it/s] 63%|██████▎ | 232826/371472 [7:54:54<11:28:01, 3.36it/s] 63%|██████▎ | 232827/371472 [7:54:54<11:23:42, 3.38it/s] 63%|██████▎ | 232828/371472 [7:54:54<11:17:28, 3.41it/s] 63%|██████▎ | 232829/371472 [7:54:55<11:22:27, 3.39it/s] 63%|██████▎ | 232830/371472 [7:54:55<11:15:11, 3.42it/s] 63%|██████▎ | 232831/371472 [7:54:55<10:58:30, 3.51it/s] 63%|██████▎ | 232832/371472 [7:54:56<10:52:49, 3.54it/s] 63%|██████▎ | 232833/371472 [7:54:56<10:57:11, 3.52it/s] 63%|██████▎ | 232834/371472 [7:54:56<10:52:49, 3.54it/s] 63%|█��████▎ | 232835/371472 [7:54:56<10:56:04, 3.52it/s] 63%|██████▎ | 232836/371472 [7:54:57<10:42:09, 3.60it/s] 63%|██████▎ | 232837/371472 [7:54:57<10:57:12, 3.52it/s] 63%|██████▎ | 232838/371472 [7:54:57<10:55:05, 3.53it/s] 63%|██████▎ | 232839/371472 [7:54:58<11:29:56, 3.35it/s] 63%|██████▎ | 232840/371472 [7:54:58<11:42:02, 3.29it/s] {'loss': 2.7009, 'learning_rate': 4.360576612295029e-07, 'epoch': 10.03} + 63%|██████▎ | 232840/371472 [7:54:58<11:42:02, 3.29it/s] 63%|██████▎ | 232841/371472 [7:54:58<11:18:16, 3.41it/s] 63%|██████▎ | 232842/371472 [7:54:59<11:43:24, 3.28it/s] 63%|██████▎ | 232843/371472 [7:54:59<11:22:06, 3.39it/s] 63%|██████▎ | 232844/371472 [7:54:59<11:20:06, 3.40it/s] 63%|██████▎ | 232845/371472 [7:54:59<12:08:23, 3.17it/s] 63%|██████▎ | 232846/371472 [7:55:00<11:56:19, 3.23it/s] 63%|██████▎ | 232847/371472 [7:55:00<11:20:18, 3.40it/s] 63%|██████▎ | 232848/371472 [7:55:00<11:33:25, 3.33it/s] 63%|██████▎ | 232849/371472 [7:55:01<11:28:45, 3.35it/s] 63%|██████▎ | 232850/371472 [7:55:01<11:29:37, 3.35it/s] 63%|██████▎ | 232851/371472 [7:55:01<11:36:33, 3.32it/s] 63%|██████▎ | 232852/371472 [7:55:02<11:22:00, 3.39it/s] 63%|██████▎ | 232853/371472 [7:55:02<11:17:04, 3.41it/s] 63%|██████▎ | 232854/371472 [7:55:02<11:43:53, 3.28it/s] 63%|██████▎ | 232855/371472 [7:55:02<11:15:51, 3.42it/s] 63%|██████▎ | 232856/371472 [7:55:03<11:22:09, 3.39it/s] 63%|██████▎ | 232857/371472 [7:55:03<12:21:59, 3.11it/s] 63%|██████▎ | 232858/371472 [7:55:03<11:54:48, 3.23it/s] 63%|██████▎ | 232859/371472 [7:55:04<11:21:06, 3.39it/s] 63%|██████▎ | 232860/371472 [7:55:04<12:07:10, 3.18it/s] {'loss': 2.7962, 'learning_rate': 4.3600917925402405e-07, 'epoch': 10.03} + 63%|██████▎ | 232860/371472 [7:55:04<12:07:10, 3.18it/s] 63%|██████▎ | 232861/371472 [7:55:04<11:48:42, 3.26it/s] 63%|██████▎ | 232862/371472 [7:55:05<11:47:44, 3.26it/s] 63%|██████▎ | 232863/371472 [7:55:05<12:08:55, 3.17it/s] 63%|██████▎ | 232864/371472 [7:55:05<11:52:14, 3.24it/s] 63%|██████▎ | 232865/371472 [7:55:06<11:28:30, 3.36it/s] 63%|██████▎ | 232866/371472 [7:55:06<11:39:11, 3.30it/s] 63%|██████▎ | 232867/371472 [7:55:06<12:23:26, 3.11it/s] 63%|██████▎ | 232868/371472 [7:55:07<12:29:03, 3.08it/s] 63%|██████▎ | 232869/371472 [7:55:07<12:16:39, 3.14it/s] 63%|██████▎ | 232870/371472 [7:55:07<11:57:55, 3.22it/s] 63%|██████▎ | 232871/371472 [7:55:07<11:53:55, 3.24it/s] 63%|██████▎ | 232872/371472 [7:55:08<11:41:26, 3.29it/s] 63%|██████▎ | 232873/371472 [7:55:08<11:12:29, 3.43it/s] 63%|██████▎ | 232874/371472 [7:55:08<11:07:07, 3.46it/s] 63%|██████▎ | 232875/371472 [7:55:09<11:00:03, 3.50it/s] 63%|██████▎ | 232876/371472 [7:55:09<11:08:35, 3.45it/s] 63%|██████▎ | 232877/371472 [7:55:09<11:09:22, 3.45it/s] 63%|██████▎ | 232878/371472 [7:55:09<10:55:24, 3.52it/s] 63%|██████▎ | 232879/371472 [7:55:10<11:11:43, 3.44it/s] 63%|██████▎ | 232880/371472 [7:55:10<11:49:36, 3.26it/s] {'loss': 2.7476, 'learning_rate': 4.3596069727854507e-07, 'epoch': 10.03} + 63%|██████▎ | 232880/371472 [7:55:10<11:49:36, 3.26it/s] 63%|██████▎ | 232881/371472 [7:55:10<11:39:47, 3.30it/s] 63%|██████▎ | 232882/371472 [7:55:11<11:53:27, 3.24it/s] 63%|██████▎ | 232883/371472 [7:55:11<11:48:21, 3.26it/s] 63%|██████▎ | 232884/371472 [7:55:11<11:33:16, 3.33it/s] 63%|██████▎ | 232885/371472 [7:55:12<11:45:17, 3.27it/s] 63%|██████▎ | 232886/371472 [7:55:12<11:45:30, 3.27it/s] 63%|██████▎ | 232887/371472 [7:55:12<11:23:22, 3.38it/s] 63%|██████▎ | 232888/371472 [7:55:12<11:29:55, 3.35it/s] 63%|██████▎ | 232889/371472 [7:55:13<11:16:12, 3.42it/s] 63%|██████▎ | 232890/371472 [7:55:13<11:12:03, 3.44it/s] 63%|██████▎ | 232891/371472 [7:55:13<12:03:06, 3.19it/s] 63%|██████▎ | 232892/371472 [7:55:14<11:26:54, 3.36it/s] 63%|██████▎ | 232893/371472 [7:55:14<11:04:10, 3.48it/s] 63%|██████▎ | 232894/371472 [7:55:14<10:51:45, 3.54it/s] 63%|██████▎ | 232895/371472 [7:55:14<11:03:16, 3.48it/s] 63%|██████▎ | 232896/371472 [7:55:15<10:52:41, 3.54it/s] 63%|██████▎ | 232897/371472 [7:55:15<10:48:45, 3.56it/s] 63%|██████▎ | 232898/371472 [7:55:15<11:01:57, 3.49it/s] 63%|██████▎ | 232899/371472 [7:55:16<11:38:00, 3.31it/s] 63%|██████▎ | 232900/371472 [7:55:16<11:24:03, 3.38it/s] {'loss': 2.6929, 'learning_rate': 4.3591221530306625e-07, 'epoch': 10.03} + 63%|██████▎ | 232900/371472 [7:55:16<11:24:03, 3.38it/s] 63%|██████▎ | 232901/371472 [7:55:16<12:03:48, 3.19it/s] 63%|██████▎ | 232902/371472 [7:55:17<12:03:31, 3.19it/s] 63%|██████▎ | 232903/371472 [7:55:17<11:34:04, 3.33it/s] 63%|██████▎ | 232904/371472 [7:55:17<11:26:26, 3.36it/s] 63%|██████▎ | 232905/371472 [7:55:17<11:30:46, 3.34it/s] 63%|██████▎ | 232906/371472 [7:55:18<11:44:58, 3.28it/s] 63%|██████▎ | 232907/371472 [7:55:18<11:22:03, 3.39it/s] 63%|██████▎ | 232908/371472 [7:55:18<11:11:02, 3.44it/s] 63%|██████▎ | 232909/371472 [7:55:19<11:13:02, 3.43it/s] 63%|██████▎ | 232910/371472 [7:55:19<11:42:37, 3.29it/s] 63%|██████▎ | 232911/371472 [7:55:19<11:41:11, 3.29it/s] 63%|██████▎ | 232912/371472 [7:55:20<11:16:12, 3.42it/s] 63%|██████▎ | 232913/371472 [7:55:20<11:29:37, 3.35it/s] 63%|██████▎ | 232914/371472 [7:55:20<11:55:46, 3.23it/s] 63%|██████▎ | 232915/371472 [7:55:20<11:43:49, 3.28it/s] 63%|██████▎ | 232916/371472 [7:55:21<11:25:49, 3.37it/s] 63%|██████▎ | 232917/371472 [7:55:21<11:23:36, 3.38it/s] 63%|██████▎ | 232918/371472 [7:55:21<11:14:33, 3.42it/s] 63%|██████▎ | 232919/371472 [7:55:22<12:56:24, 2.97it/s] 63%|██████▎ | 232920/371472 [7:55:22<12:29:41, 3.08it/s] {'loss': 2.781, 'learning_rate': 4.358637333275873e-07, 'epoch': 10.03} + 63%|██████▎ | 232920/371472 [7:55:22<12:29:41, 3.08it/s] 63%|██████▎ | 232921/371472 [7:55:22<12:38:40, 3.04it/s] 63%|██████▎ | 232922/371472 [7:55:23<12:19:18, 3.12it/s] 63%|██████▎ | 232923/371472 [7:55:23<11:56:59, 3.22it/s] 63%|██████▎ | 232924/371472 [7:55:23<11:36:07, 3.32it/s] 63%|██████▎ | 232925/371472 [7:55:24<11:21:19, 3.39it/s] 63%|██████▎ | 232926/371472 [7:55:24<12:16:34, 3.13it/s] 63%|██████▎ | 232927/371472 [7:55:24<11:38:40, 3.30it/s] 63%|██████▎ | 232928/371472 [7:55:24<11:12:34, 3.43it/s] 63%|██████▎ | 232929/371472 [7:55:25<11:25:08, 3.37it/s] 63%|██████▎ | 232930/371472 [7:55:25<11:12:11, 3.44it/s] 63%|██████▎ | 232931/371472 [7:55:25<12:01:01, 3.20it/s] 63%|██████▎ | 232932/371472 [7:55:26<12:49:24, 3.00it/s] 63%|██████▎ | 232933/371472 [7:55:26<11:56:27, 3.22it/s] 63%|██████▎ | 232934/371472 [7:55:26<11:44:06, 3.28it/s] 63%|██████▎ | 232935/371472 [7:55:27<11:26:37, 3.36it/s] 63%|██████▎ | 232936/371472 [7:55:27<11:21:26, 3.39it/s] 63%|██████▎ | 232937/371472 [7:55:27<11:23:51, 3.38it/s] 63%|██████▎ | 232938/371472 [7:55:28<11:31:44, 3.34it/s] 63%|██████▎ | 232939/371472 [7:55:28<11:15:31, 3.42it/s] 63%|██████▎ | 232940/371472 [7:55:28<11:29:52, 3.35it/s] {'loss': 2.7936, 'learning_rate': 4.3581525135210834e-07, 'epoch': 10.03} + 63%|██████▎ | 232940/371472 [7:55:28<11:29:52, 3.35it/s] 63%|██████▎ | 232941/371472 [7:55:28<11:26:16, 3.36it/s] 63%|██████▎ | 232942/371472 [7:55:29<11:47:57, 3.26it/s] 63%|██████▎ | 232943/371472 [7:55:29<11:31:15, 3.34it/s] 63%|██████▎ | 232944/371472 [7:55:29<11:53:39, 3.24it/s] 63%|██████▎ | 232945/371472 [7:55:30<11:42:07, 3.29it/s] 63%|██████▎ | 232946/371472 [7:55:30<11:10:09, 3.45it/s] 63%|██████▎ | 232947/371472 [7:55:30<11:09:02, 3.45it/s] 63%|██████▎ | 232948/371472 [7:55:30<11:01:22, 3.49it/s] 63%|██████▎ | 232949/371472 [7:55:31<11:05:32, 3.47it/s] 63%|██████▎ | 232950/371472 [7:55:31<11:38:01, 3.31it/s] 63%|██████▎ | 232951/371472 [7:55:31<11:09:17, 3.45it/s] 63%|██████▎ | 232952/371472 [7:55:32<11:06:19, 3.46it/s] 63%|██████▎ | 232953/371472 [7:55:32<11:24:22, 3.37it/s] 63%|██████▎ | 232954/371472 [7:55:32<11:12:03, 3.44it/s] 63%|██████▎ | 232955/371472 [7:55:33<11:18:33, 3.40it/s] 63%|██████▎ | 232956/371472 [7:55:33<11:34:58, 3.32it/s] 63%|██████▎ | 232957/371472 [7:55:33<11:06:36, 3.46it/s] 63%|██████▎ | 232958/371472 [7:55:33<11:04:05, 3.48it/s] 63%|██████▎ | 232959/371472 [7:55:34<11:31:34, 3.34it/s] 63%|██████▎ | 232960/371472 [7:55:34<11:59:18, 3.21it/s] {'loss': 2.755, 'learning_rate': 4.357667693766295e-07, 'epoch': 10.03} + 63%|██████▎ | 232960/371472 [7:55:34<11:59:18, 3.21it/s] 63%|██████▎ | 232961/371472 [7:55:34<11:36:19, 3.32it/s] 63%|██████▎ | 232962/371472 [7:55:35<11:30:28, 3.34it/s] 63%|██████▎ | 232963/371472 [7:55:35<11:55:53, 3.22it/s] 63%|██████▎ | 232964/371472 [7:55:35<11:36:57, 3.31it/s] 63%|██████▎ | 232965/371472 [7:55:36<12:17:49, 3.13it/s] 63%|██████▎ | 232966/371472 [7:55:36<12:20:16, 3.12it/s] 63%|██████▎ | 232967/371472 [7:55:36<11:46:08, 3.27it/s] 63%|██████▎ | 232968/371472 [7:55:37<12:02:37, 3.19it/s] 63%|██████▎ | 232969/371472 [7:55:37<12:38:46, 3.04it/s] 63%|██████▎ | 232970/371472 [7:55:37<12:13:42, 3.15it/s] 63%|██████▎ | 232971/371472 [7:55:38<12:09:18, 3.17it/s] 63%|██████▎ | 232972/371472 [7:55:38<13:05:13, 2.94it/s] 63%|██████▎ | 232973/371472 [7:55:38<12:23:49, 3.10it/s] 63%|██████▎ | 232974/371472 [7:55:38<11:54:29, 3.23it/s] 63%|██████▎ | 232975/371472 [7:55:39<13:10:46, 2.92it/s] 63%|██████▎ | 232976/371472 [7:55:39<13:25:05, 2.87it/s] 63%|██████▎ | 232977/371472 [7:55:40<12:32:01, 3.07it/s] 63%|██████▎ | 232978/371472 [7:55:40<13:06:40, 2.93it/s] 63%|██████▎ | 232979/371472 [7:55:40<12:55:22, 2.98it/s] 63%|██████▎ | 232980/371472 [7:55:41<13:38:10, 2.82it/s] {'loss': 2.5696, 'learning_rate': 4.357182874011506e-07, 'epoch': 10.03} + 63%|██████▎ | 232980/371472 [7:55:41<13:38:10, 2.82it/s] 63%|██████▎ | 232981/371472 [7:55:41<12:50:23, 3.00it/s] 63%|██████▎ | 232982/371472 [7:55:41<12:45:47, 3.01it/s] 63%|██████▎ | 232983/371472 [7:55:42<12:58:20, 2.97it/s] 63%|██████▎ | 232984/371472 [7:55:42<12:22:33, 3.11it/s] 63%|██████▎ | 232985/371472 [7:55:42<11:53:38, 3.23it/s] 63%|██████▎ | 232986/371472 [7:55:42<11:45:00, 3.27it/s] 63%|██████▎ | 232987/371472 [7:55:43<11:22:51, 3.38it/s] 63%|██████▎ | 232988/371472 [7:55:43<10:47:53, 3.56it/s] 63%|██████▎ | 232989/371472 [7:55:43<11:06:08, 3.46it/s] 63%|██████▎ | 232990/371472 [7:55:44<11:23:33, 3.38it/s] 63%|██████▎ | 232991/371472 [7:55:44<11:30:55, 3.34it/s] 63%|██████▎ | 232992/371472 [7:55:44<11:06:55, 3.46it/s] 63%|██████▎ | 232993/371472 [7:55:44<11:10:39, 3.44it/s] 63%|██████▎ | 232994/371472 [7:55:45<10:50:07, 3.55it/s] 63%|██████▎ | 232995/371472 [7:55:45<10:44:57, 3.58it/s] 63%|██████▎ | 232996/371472 [7:55:45<11:11:18, 3.44it/s] 63%|██████▎ | 232997/371472 [7:55:46<11:02:40, 3.48it/s] 63%|██████▎ | 232998/371472 [7:55:46<10:57:55, 3.51it/s] 63%|██████▎ | 232999/371472 [7:55:46<11:00:36, 3.49it/s] 63%|██████▎ | 233000/371472 [7:55:46<10:43:25, 3.59it/s] {'loss': 2.9032, 'learning_rate': 4.356698054256717e-07, 'epoch': 10.04} + 63%|██████▎ | 233000/371472 [7:55:46<10:43:25, 3.59it/s] 63%|██████▎ | 233001/371472 [7:55:47<10:57:45, 3.51it/s] 63%|██████▎ | 233002/371472 [7:55:47<11:19:37, 3.40it/s] 63%|██████▎ | 233003/371472 [7:55:47<11:21:22, 3.39it/s] 63%|██████▎ | 233004/371472 [7:55:48<11:55:13, 3.23it/s] 63%|██████▎ | 233005/371472 [7:55:48<12:16:19, 3.13it/s] 63%|██████▎ | 233006/371472 [7:55:48<12:12:32, 3.15it/s] 63%|██████▎ | 233007/371472 [7:55:49<12:02:52, 3.19it/s] 63%|██████▎ | 233008/371472 [7:55:49<12:22:23, 3.11it/s] 63%|██████▎ | 233009/371472 [7:55:49<11:37:06, 3.31it/s] 63%|██████▎ | 233010/371472 [7:55:50<11:32:55, 3.33it/s] 63%|██████▎ | 233011/371472 [7:55:50<12:11:18, 3.16it/s] 63%|██████▎ | 233012/371472 [7:55:50<12:12:16, 3.15it/s] 63%|██████▎ | 233013/371472 [7:55:50<11:59:01, 3.21it/s] 63%|██████▎ | 233014/371472 [7:55:51<11:51:52, 3.24it/s] 63%|██████▎ | 233015/371472 [7:55:51<11:32:51, 3.33it/s] 63%|██████▎ | 233016/371472 [7:55:51<11:24:02, 3.37it/s] 63%|██████▎ | 233017/371472 [7:55:52<11:33:24, 3.33it/s] 63%|██████▎ | 233018/371472 [7:55:52<11:14:06, 3.42it/s] 63%|██████▎ | 233019/371472 [7:55:52<11:22:33, 3.38it/s] 63%|██████▎ | 233020/371472 [7:55:53<11:14:58, 3.42it/s] {'loss': 2.7736, 'learning_rate': 4.3562132345019273e-07, 'epoch': 10.04} + 63%|██████▎ | 233020/371472 [7:55:53<11:14:58, 3.42it/s] 63%|██████▎ | 233021/371472 [7:55:53<11:21:05, 3.39it/s] 63%|██████▎ | 233022/371472 [7:55:53<11:17:07, 3.41it/s] 63%|██████▎ | 233023/371472 [7:55:53<11:34:26, 3.32it/s] 63%|██████▎ | 233024/371472 [7:55:54<11:15:05, 3.42it/s] 63%|██████▎ | 233025/371472 [7:55:54<11:55:02, 3.23it/s] 63%|██████▎ | 233026/371472 [7:55:54<11:39:11, 3.30it/s] 63%|██████▎ | 233027/371472 [7:55:55<11:22:34, 3.38it/s] 63%|██████▎ | 233028/371472 [7:55:55<11:13:58, 3.42it/s] 63%|██████▎ | 233029/371472 [7:55:55<11:22:56, 3.38it/s] 63%|██████▎ | 233030/371472 [7:55:56<11:09:57, 3.44it/s] 63%|██████▎ | 233031/371472 [7:55:56<11:18:27, 3.40it/s] 63%|██████▎ | 233032/371472 [7:55:56<12:14:00, 3.14it/s] 63%|██████▎ | 233033/371472 [7:55:56<11:49:07, 3.25it/s] 63%|██████▎ | 233034/371472 [7:55:57<11:20:55, 3.39it/s] 63%|██████▎ | 233035/371472 [7:55:57<11:28:51, 3.35it/s] 63%|██████▎ | 233036/371472 [7:55:57<11:15:08, 3.42it/s] 63%|██████▎ | 233037/371472 [7:55:58<11:37:40, 3.31it/s] 63%|██████▎ | 233038/371472 [7:55:58<11:18:51, 3.40it/s] 63%|██████▎ | 233039/371472 [7:55:58<11:23:25, 3.38it/s] 63%|██████▎ | 233040/371472 [7:55:59<11:29:47, 3.34it/s] {'loss': 2.6972, 'learning_rate': 4.3557284147471396e-07, 'epoch': 10.04} + 63%|██████▎ | 233040/371472 [7:55:59<11:29:47, 3.34it/s] 63%|██████▎ | 233041/371472 [7:55:59<11:15:19, 3.42it/s] 63%|██████▎ | 233042/371472 [7:55:59<10:49:39, 3.55it/s] 63%|██████▎ | 233043/371472 [7:55:59<12:11:40, 3.15it/s] 63%|██████▎ | 233044/371472 [7:56:00<11:30:04, 3.34it/s] 63%|██████▎ | 233045/371472 [7:56:00<11:44:28, 3.27it/s] 63%|██████▎ | 233046/371472 [7:56:00<11:46:23, 3.27it/s] 63%|██████▎ | 233047/371472 [7:56:01<11:34:04, 3.32it/s] 63%|██████▎ | 233048/371472 [7:56:01<11:24:49, 3.37it/s] 63%|██████▎ | 233049/371472 [7:56:01<11:31:00, 3.34it/s] 63%|██████▎ | 233050/371472 [7:56:02<11:32:33, 3.33it/s] 63%|██████▎ | 233051/371472 [7:56:02<11:27:27, 3.36it/s] 63%|██████▎ | 233052/371472 [7:56:02<11:16:21, 3.41it/s] 63%|██████▎ | 233053/371472 [7:56:02<11:24:23, 3.37it/s] 63%|██████▎ | 233054/371472 [7:56:03<11:18:45, 3.40it/s] 63%|██████▎ | 233055/371472 [7:56:03<11:17:55, 3.40it/s] 63%|██████▎ | 233056/371472 [7:56:03<11:55:52, 3.22it/s] 63%|██████▎ | 233057/371472 [7:56:04<12:11:00, 3.16it/s] 63%|██████▎ | 233058/371472 [7:56:04<11:54:12, 3.23it/s] 63%|██████▎ | 233059/371472 [7:56:04<12:01:07, 3.20it/s] 63%|██████▎ | 233060/371472 [7:56:05<11:46:10, 3.27it/s] {'loss': 2.738, 'learning_rate': 4.35524359499235e-07, 'epoch': 10.04} + 63%|██████▎ | 233060/371472 [7:56:05<11:46:10, 3.27it/s] 63%|██████▎ | 233061/371472 [7:56:05<11:50:19, 3.25it/s] 63%|██████▎ | 233062/371472 [7:56:05<11:27:10, 3.36it/s] 63%|██████▎ | 233063/371472 [7:56:05<11:13:48, 3.42it/s] 63%|██████▎ | 233064/371472 [7:56:06<11:00:35, 3.49it/s] 63%|██████▎ | 233065/371472 [7:56:06<11:11:00, 3.44it/s] 63%|██████▎ | 233066/371472 [7:56:06<11:09:31, 3.45it/s] 63%|██████▎ | 233067/371472 [7:56:07<11:09:55, 3.44it/s] 63%|██████▎ | 233068/371472 [7:56:07<11:37:45, 3.31it/s] 63%|██████▎ | 233069/371472 [7:56:07<11:11:23, 3.44it/s] 63%|██████▎ | 233070/371472 [7:56:08<11:51:17, 3.24it/s] 63%|██████▎ | 233071/371472 [7:56:08<12:21:56, 3.11it/s] 63%|██████▎ | 233072/371472 [7:56:08<11:47:36, 3.26it/s] 63%|██████▎ | 233073/371472 [7:56:08<11:47:36, 3.26it/s] 63%|██████▎ | 233074/371472 [7:56:09<11:42:34, 3.28it/s] 63%|██████▎ | 233075/371472 [7:56:09<11:20:49, 3.39it/s] 63%|██████▎ | 233076/371472 [7:56:09<11:35:05, 3.32it/s] 63%|██████▎ | 233077/371472 [7:56:10<11:20:51, 3.39it/s] 63%|██████▎ | 233078/371472 [7:56:10<11:37:33, 3.31it/s] 63%|██████▎ | 233079/371472 [7:56:10<11:34:53, 3.32it/s] 63%|██████▎ | 233080/371472 [7:56:11<11:25:45, 3.36it/s] {'loss': 2.6802, 'learning_rate': 4.3547587752375616e-07, 'epoch': 10.04} + 63%|██████▎ | 233080/371472 [7:56:11<11:25:45, 3.36it/s] 63%|██████▎ | 233081/371472 [7:56:11<11:24:31, 3.37it/s] 63%|██████▎ | 233082/371472 [7:56:11<11:12:52, 3.43it/s] 63%|██████▎ | 233083/371472 [7:56:11<11:27:23, 3.36it/s] 63%|██████▎ | 233084/371472 [7:56:12<11:23:56, 3.37it/s] 63%|██████▎ | 233085/371472 [7:56:12<11:13:56, 3.42it/s] 63%|██████▎ | 233086/371472 [7:56:12<10:59:35, 3.50it/s] 63%|██████▎ | 233087/371472 [7:56:13<11:07:26, 3.46it/s] 63%|██████▎ | 233088/371472 [7:56:13<11:16:50, 3.41it/s] 63%|██████▎ | 233089/371472 [7:56:13<11:38:10, 3.30it/s] 63%|██████▎ | 233090/371472 [7:56:13<11:19:47, 3.39it/s] 63%|██████▎ | 233091/371472 [7:56:14<10:56:37, 3.51it/s] 63%|██████▎ | 233092/371472 [7:56:14<11:33:24, 3.33it/s] 63%|██████▎ | 233093/371472 [7:56:14<11:10:03, 3.44it/s] 63%|██████▎ | 233094/371472 [7:56:15<11:20:51, 3.39it/s] 63%|██████▎ | 233095/371472 [7:56:15<11:28:38, 3.35it/s] 63%|██████▎ | 233096/371472 [7:56:15<11:17:39, 3.40it/s] 63%|██████▎ | 233097/371472 [7:56:16<11:21:21, 3.38it/s] 63%|██████▎ | 233098/371472 [7:56:16<11:16:12, 3.41it/s] 63%|██████▎ | 233099/371472 [7:56:16<11:11:07, 3.44it/s] 63%|██████▎ | 233100/371472 [7:56:16<11:25:07, 3.37it/s] {'loss': 2.7077, 'learning_rate': 4.3542739554827723e-07, 'epoch': 10.04} + 63%|██████▎ | 233100/371472 [7:56:16<11:25:07, 3.37it/s] 63%|██████▎ | 233101/371472 [7:56:17<11:17:22, 3.40it/s] 63%|██████▎ | 233102/371472 [7:56:17<11:11:52, 3.43it/s] 63%|██████▎ | 233103/371472 [7:56:17<11:48:03, 3.26it/s] 63%|██████▎ | 233104/371472 [7:56:18<12:13:01, 3.15it/s] 63%|██████▎ | 233105/371472 [7:56:18<12:10:19, 3.16it/s] 63%|██████▎ | 233106/371472 [7:56:18<12:29:56, 3.08it/s] 63%|██████▎ | 233107/371472 [7:56:19<12:19:44, 3.12it/s] 63%|██████▎ | 233108/371472 [7:56:19<12:16:53, 3.13it/s] 63%|██████▎ | 233109/371472 [7:56:19<12:07:16, 3.17it/s] 63%|██████▎ | 233110/371472 [7:56:20<11:51:17, 3.24it/s] 63%|██████▎ | 233111/371472 [7:56:20<11:26:29, 3.36it/s] 63%|██████▎ | 233112/371472 [7:56:20<11:35:49, 3.31it/s] 63%|██████▎ | 233113/371472 [7:56:20<11:40:09, 3.29it/s] 63%|██████▎ | 233114/371472 [7:56:21<11:27:48, 3.35it/s] 63%|██████▎ | 233115/371472 [7:56:21<11:29:34, 3.34it/s] 63%|██████▎ | 233116/371472 [7:56:21<11:58:33, 3.21it/s] 63%|██████▎ | 233117/371472 [7:56:22<11:43:15, 3.28it/s] 63%|██████▎ | 233118/371472 [7:56:22<11:31:57, 3.33it/s] 63%|██████▎ | 233119/371472 [7:56:22<11:25:58, 3.36it/s] 63%|██████▎ | 233120/371472 [7:56:23<11:15:57, 3.41it/s] {'loss': 2.8842, 'learning_rate': 4.3537891357279835e-07, 'epoch': 10.04} + 63%|██████▎ | 233120/371472 [7:56:23<11:15:57, 3.41it/s] 63%|██████▎ | 233121/371472 [7:56:23<11:33:14, 3.33it/s] 63%|██████▎ | 233122/371472 [7:56:23<11:29:27, 3.34it/s] 63%|██████▎ | 233123/371472 [7:56:23<11:15:08, 3.42it/s] 63%|██████▎ | 233124/371472 [7:56:24<11:23:00, 3.38it/s] 63%|██████▎ | 233125/371472 [7:56:24<11:06:52, 3.46it/s] 63%|██████▎ | 233126/371472 [7:56:24<10:51:52, 3.54it/s] 63%|██████▎ | 233127/371472 [7:56:25<11:10:25, 3.44it/s] 63%|██████▎ | 233128/371472 [7:56:25<10:54:11, 3.52it/s] 63%|██████▎ | 233129/371472 [7:56:25<11:06:43, 3.46it/s] 63%|██████▎ | 233130/371472 [7:56:25<10:58:17, 3.50it/s] 63%|██████▎ | 233131/371472 [7:56:26<12:21:33, 3.11it/s] 63%|██████▎ | 233132/371472 [7:56:26<13:22:13, 2.87it/s] 63%|██████▎ | 233133/371472 [7:56:27<12:41:53, 3.03it/s] 63%|███���██▎ | 233134/371472 [7:56:27<12:09:17, 3.16it/s] 63%|██████▎ | 233135/371472 [7:56:27<12:23:14, 3.10it/s] 63%|██████▎ | 233136/371472 [7:56:27<12:03:42, 3.19it/s] 63%|██████▎ | 233137/371472 [7:56:28<11:48:18, 3.26it/s] 63%|██████▎ | 233138/371472 [7:56:28<11:35:42, 3.31it/s] 63%|██████▎ | 233139/371472 [7:56:28<12:15:18, 3.14it/s] 63%|██████▎ | 233140/371472 [7:56:29<11:41:30, 3.29it/s] {'loss': 2.5946, 'learning_rate': 4.353304315973194e-07, 'epoch': 10.04} + 63%|██████▎ | 233140/371472 [7:56:29<11:41:30, 3.29it/s] 63%|██████▎ | 233141/371472 [7:56:29<11:42:15, 3.28it/s] 63%|██████▎ | 233142/371472 [7:56:29<11:24:00, 3.37it/s] 63%|██████▎ | 233143/371472 [7:56:30<11:33:54, 3.32it/s] 63%|██████▎ | 233144/371472 [7:56:30<12:22:37, 3.10it/s] 63%|██████▎ | 233145/371472 [7:56:30<11:58:53, 3.21it/s] 63%|██████▎ | 233146/371472 [7:56:31<11:57:30, 3.21it/s] 63%|██████▎ | 233147/371472 [7:56:31<11:26:43, 3.36it/s] 63%|██████▎ | 233148/371472 [7:56:31<12:05:51, 3.18it/s] 63%|██████▎ | 233149/371472 [7:56:31<12:32:28, 3.06it/s] 63%|██████▎ | 233150/371472 [7:56:32<12:00:10, 3.20it/s] 63%|██████▎ | 233151/371472 [7:56:32<12:04:36, 3.18it/s] 63%|██████▎ | 233152/371472 [7:56:32<11:50:04, 3.25it/s] 63%|██████▎ | 233153/371472 [7:56:33<11:44:07, 3.27it/s] 63%|██████▎ | 233154/371472 [7:56:33<11:32:27, 3.33it/s] 63%|██████▎ | 233155/371472 [7:56:33<11:26:19, 3.36it/s] 63%|██████▎ | 233156/371472 [7:56:34<11:15:41, 3.41it/s] 63%|██████▎ | 233157/371472 [7:56:34<11:34:30, 3.32it/s] 63%|██████▎ | 233158/371472 [7:56:34<11:38:51, 3.30it/s] 63%|██████▎ | 233159/371472 [7:56:34<11:16:55, 3.41it/s] 63%|██████▎ | 233160/371472 [7:56:35<11:33:55, 3.32it/s] {'loss': 2.7576, 'learning_rate': 4.352819496218406e-07, 'epoch': 10.04} + 63%|██████▎ | 233160/371472 [7:56:35<11:33:55, 3.32it/s] 63%|██████▎ | 233161/371472 [7:56:35<11:21:36, 3.38it/s] 63%|██████▎ | 233162/371472 [7:56:35<11:55:34, 3.22it/s] 63%|██████▎ | 233163/371472 [7:56:36<11:46:17, 3.26it/s] 63%|██████▎ | 233164/371472 [7:56:36<11:36:32, 3.31it/s] 63%|██████▎ | 233165/371472 [7:56:36<11:16:03, 3.41it/s] 63%|██████▎ | 233166/371472 [7:56:37<11:16:47, 3.41it/s] 63%|██████▎ | 233167/371472 [7:56:37<11:42:18, 3.28it/s] 63%|██████▎ | 233168/371472 [7:56:37<11:37:28, 3.30it/s] 63%|██████▎ | 233169/371472 [7:56:37<11:32:03, 3.33it/s] 63%|██████▎ | 233170/371472 [7:56:38<12:05:01, 3.18it/s] 63%|██████▎ | 233171/371472 [7:56:38<11:46:13, 3.26it/s] 63%|██████▎ | 233172/371472 [7:56:38<11:18:12, 3.40it/s] 63%|██████▎ | 233173/371472 [7:56:39<10:56:29, 3.51it/s] 63%|██████▎ | 233174/371472 [7:56:39<12:11:37, 3.15it/s] 63%|██████▎ | 233175/371472 [7:56:39<12:11:53, 3.15it/s] 63%|██████▎ | 233176/371472 [7:56:40<12:18:40, 3.12it/s] 63%|██████▎ | 233177/371472 [7:56:40<11:52:48, 3.23it/s] 63%|██████▎ | 233178/371472 [7:56:40<12:10:23, 3.16it/s] 63%|██████▎ | 233179/371472 [7:56:41<11:40:07, 3.29it/s] 63%|██████▎ | 233180/371472 [7:56:41<11:32:05, 3.33it/s] {'loss': 2.7143, 'learning_rate': 4.352334676463617e-07, 'epoch': 10.04} + 63%|██████▎ | 233180/371472 [7:56:41<11:32:05, 3.33it/s] 63%|██████▎ | 233181/371472 [7:56:41<11:52:15, 3.24it/s] 63%|██████▎ | 233182/371472 [7:56:41<11:38:41, 3.30it/s] 63%|██████▎ | 233183/371472 [7:56:42<11:14:36, 3.42it/s] 63%|██████▎ | 233184/371472 [7:56:42<10:59:23, 3.50it/s] 63%|██████▎ | 233185/371472 [7:56:42<11:18:03, 3.40it/s] 63%|██████▎ | 233186/371472 [7:56:43<11:00:03, 3.49it/s] 63%|██████▎ | 233187/371472 [7:56:43<11:52:29, 3.23it/s] 63%|██████▎ | 233188/371472 [7:56:43<12:08:52, 3.16it/s] 63%|██████▎ | 233189/371472 [7:56:44<11:48:01, 3.26it/s] 63%|██████▎ | 233190/371472 [7:56:44<11:29:51, 3.34it/s] 63%|██████▎ | 233191/371472 [7:56:44<11:18:55, 3.39it/s] 63%|██████▎ | 233192/371472 [7:56:44<11:18:10, 3.40it/s] 63%|██████▎ | 233193/371472 [7:56:45<12:39:46, 3.03it/s] 63%|██████▎ | 233194/371472 [7:56:45<12:03:51, 3.18it/s] 63%|██████▎ | 233195/371472 [7:56:45<11:43:27, 3.28it/s] 63%|██████▎ | 233196/371472 [7:56:46<11:23:06, 3.37it/s] 63%|██████▎ | 233197/371472 [7:56:46<11:32:35, 3.33it/s] 63%|██████▎ | 233198/371472 [7:56:46<11:29:16, 3.34it/s] 63%|██████▎ | 233199/371472 [7:56:47<11:45:08, 3.27it/s] 63%|██████▎ | 233200/371472 [7:56:47<11:23:48, 3.37it/s] {'loss': 2.7301, 'learning_rate': 4.351849856708828e-07, 'epoch': 10.04} + 63%|██████▎ | 233200/371472 [7:56:47<11:23:48, 3.37it/s] 63%|██████▎ | 233201/371472 [7:56:47<11:27:44, 3.35it/s] 63%|██████▎ | 233202/371472 [7:56:48<11:38:27, 3.30it/s] 63%|██████▎ | 233203/371472 [7:56:48<12:13:07, 3.14it/s] 63%|██████▎ | 233204/371472 [7:56:48<11:58:31, 3.21it/s] 63%|██████▎ | 233205/371472 [7:56:48<11:43:24, 3.28it/s] 63%|██████▎ | 233206/371472 [7:56:49<12:01:38, 3.19it/s] 63%|██████▎ | 233207/371472 [7:56:49<11:40:38, 3.29it/s] 63%|██████▎ | 233208/371472 [7:56:49<11:29:43, 3.34it/s] 63%|██████▎ | 233209/371472 [7:56:50<10:59:48, 3.49it/s] 63%|██████▎ | 233210/371472 [7:56:50<11:03:54, 3.47it/s] 63%|██████▎ | 233211/371472 [7:56:50<11:26:12, 3.36it/s] 63%|██████▎ | 233212/371472 [7:56:51<11:26:28, 3.36it/s] 63%|██████▎ | 233213/371472 [7:56:51<11:50:33, 3.24it/s] 63%|██████▎ | 233214/371472 [7:56:51<11:39:20, 3.29it/s] 63%|██████▎ | 233215/371472 [7:56:51<11:21:33, 3.38it/s] 63%|██████▎ | 233216/371472 [7:56:52<11:14:18, 3.42it/s] 63%|██████▎ | 233217/371472 [7:56:52<11:17:20, 3.40it/s] 63%|██████▎ | 233218/371472 [7:56:52<11:13:42, 3.42it/s] 63%|██████▎ | 233219/371472 [7:56:53<11:56:49, 3.21it/s] 63%|██████▎ | 233220/371472 [7:56:53<11:54:05, 3.23it/s] {'loss': 2.7959, 'learning_rate': 4.3513650369540387e-07, 'epoch': 10.05} + 63%|██████▎ | 233220/371472 [7:56:53<11:54:05, 3.23it/s] 63%|██████▎ | 233221/371472 [7:56:53<11:47:35, 3.26it/s] 63%|██████▎ | 233222/371472 [7:56:54<11:36:47, 3.31it/s] 63%|██████▎ | 233223/371472 [7:56:54<12:04:56, 3.18it/s] 63%|██████▎ | 233224/371472 [7:56:54<11:32:00, 3.33it/s] 63%|██████▎ | 233225/371472 [7:56:54<11:14:03, 3.42it/s] 63%|██████▎ | 233226/371472 [7:56:55<10:59:11, 3.50it/s] 63%|██████▎ | 233227/371472 [7:56:55<10:39:26, 3.60it/s] 63%|██████▎ | 233228/371472 [7:56:55<10:40:06, 3.60it/s] 63%|██████▎ | 233229/371472 [7:56:55<10:24:28, 3.69it/s] 63%|██████▎ | 233230/371472 [7:56:56<10:28:07, 3.67it/s] 63%|██████▎ | 233231/371472 [7:56:56<10:54:52, 3.52it/s] 63%|██████▎ | 233232/371472 [7:56:56<10:59:55, 3.49it/s] 63%|██████▎ | 233233/371472 [7:56:57<10:57:12, 3.51it/s] 63%|██████▎ | 233234/371472 [7:56:57<11:34:17, 3.32it/s] 63%|██████▎ | 233235/371472 [7:56:57<12:15:58, 3.13it/s] 63%|██████▎ | 233236/371472 [7:56:58<11:50:40, 3.24it/s] 63%|██████▎ | 233237/371472 [7:56:58<12:46:56, 3.00it/s] 63%|██████▎ | 233238/371472 [7:56:58<12:09:58, 3.16it/s] 63%|██████▎ | 233239/371472 [7:56:59<12:21:43, 3.11it/s] 63%|██████▎ | 233240/371472 [7:56:59<12:41:25, 3.03it/s] {'loss': 2.9086, 'learning_rate': 4.3508802171992505e-07, 'epoch': 10.05} + 63%|██████▎ | 233240/371472 [7:56:59<12:41:25, 3.03it/s] 63%|██████▎ | 233241/371472 [7:56:59<12:06:21, 3.17it/s] 63%|██████▎ | 233242/371472 [7:57:00<11:36:50, 3.31it/s] 63%|██████▎ | 233243/371472 [7:57:00<11:30:17, 3.34it/s] 63%|██████▎ | 233244/371472 [7:57:00<11:09:59, 3.44it/s] 63%|██████▎ | 233245/371472 [7:57:00<11:14:52, 3.41it/s] 63%|██████▎ | 233246/371472 [7:57:01<11:45:08, 3.27it/s] 63%|██████▎ | 233247/371472 [7:57:01<11:24:43, 3.36it/s] 63%|██████▎ | 233248/371472 [7:57:01<11:03:10, 3.47it/s] 63%|██████▎ | 233249/371472 [7:57:02<10:44:50, 3.57it/s] 63%|██████▎ | 233250/371472 [7:57:02<10:59:31, 3.49it/s] 63%|██████▎ | 233251/371472 [7:57:02<11:22:08, 3.38it/s] 63%|██████▎ | 233252/371472 [7:57:02<11:27:08, 3.35it/s] 63%|██████▎ | 233253/371472 [7:57:03<11:29:56, 3.34it/s] 63%|██████▎ | 233254/371472 [7:57:03<11:44:31, 3.27it/s] 63%|██████▎ | 233255/371472 [7:57:03<11:32:54, 3.32it/s] 63%|██████▎ | 233256/371472 [7:57:04<11:13:20, 3.42it/s] 63%|██████▎ | 233257/371472 [7:57:04<11:08:38, 3.45it/s] 63%|██████▎ | 233258/371472 [7:57:04<11:11:20, 3.43it/s] 63%|██████▎ | 233259/371472 [7:57:05<10:58:46, 3.50it/s] 63%|██████▎ | 233260/371472 [7:57:05<11:37:49, 3.30it/s] {'loss': 2.7724, 'learning_rate': 4.3503953974444607e-07, 'epoch': 10.05} + 63%|██████▎ | 233260/371472 [7:57:05<11:37:49, 3.30it/s] 63%|██████▎ | 233261/371472 [7:57:05<11:34:27, 3.32it/s] 63%|██████▎ | 233262/371472 [7:57:05<11:41:28, 3.28it/s] 63%|██████▎ | 233263/371472 [7:57:06<11:28:30, 3.35it/s] 63%|██████▎ | 233264/371472 [7:57:06<11:18:29, 3.39it/s] 63%|██████▎ | 233265/371472 [7:57:06<11:35:17, 3.31it/s] 63%|██████▎ | 233266/371472 [7:57:07<11:34:16, 3.32it/s] 63%|██████▎ | 233267/371472 [7:57:07<11:28:05, 3.35it/s] 63%|██████▎ | 233268/371472 [7:57:07<11:20:59, 3.38it/s] 63%|██████▎ | 233269/371472 [7:57:08<11:32:53, 3.32it/s] 63%|██████▎ | 233270/371472 [7:57:08<12:11:02, 3.15it/s] 63%|██████▎ | 233271/371472 [7:57:08<11:53:07, 3.23it/s] 63%|██████▎ | 233272/371472 [7:57:09<11:57:28, 3.21it/s] 63%|██████▎ | 233273/371472 [7:57:09<12:47:27, 3.00it/s] 63%|██████▎ | 233274/371472 [7:57:09<12:09:58, 3.16it/s] 63%|██████▎ | 233275/371472 [7:57:09<11:43:29, 3.27it/s] 63%|██████▎ | 233276/371472 [7:57:10<11:23:03, 3.37it/s] 63%|██████▎ | 233277/371472 [7:57:10<11:04:56, 3.46it/s] 63%|██████▎ | 233278/371472 [7:57:10<11:17:08, 3.40it/s] 63%|██████▎ | 233279/371472 [7:57:11<11:01:30, 3.48it/s] 63%|██████▎ | 233280/371472 [7:57:11<10:46:37, 3.56it/s] {'loss': 2.6747, 'learning_rate': 4.3499105776896724e-07, 'epoch': 10.05} + 63%|██████▎ | 233280/371472 [7:57:11<10:46:37, 3.56it/s] 63%|██████▎ | 233281/371472 [7:57:11<11:30:25, 3.34it/s] 63%|██████▎ | 233282/371472 [7:57:11<11:40:47, 3.29it/s] 63%|██████▎ | 233283/371472 [7:57:12<11:23:53, 3.37it/s] 63%|██████▎ | 233284/371472 [7:57:12<11:00:46, 3.49it/s] 63%|██████▎ | 233285/371472 [7:57:12<11:19:03, 3.39it/s] 63%|██████▎ | 233286/371472 [7:57:13<11:23:01, 3.37it/s] 63%|██████▎ | 233287/371472 [7:57:13<11:31:39, 3.33it/s] 63%|██████▎ | 233288/371472 [7:57:13<11:12:57, 3.42it/s] 63%|██████▎ | 233289/371472 [7:57:14<11:23:48, 3.37it/s] 63%|██████▎ | 233290/371472 [7:57:14<11:02:03, 3.48it/s] 63%|██████▎ | 233291/371472 [7:57:14<13:32:45, 2.83it/s] 63%|██████▎ | 233292/371472 [7:57:15<12:52:54, 2.98it/s] 63%|██████▎ | 233293/371472 [7:57:15<12:25:59, 3.09it/s] 63%|██████▎ | 233294/371472 [7:57:15<12:08:04, 3.16it/s] 63%|██████▎ | 233295/371472 [7:57:15<11:55:54, 3.22it/s] 63%|██████▎ | 233296/371472 [7:57:16<11:35:01, 3.31it/s] 63%|██████▎ | 233297/371472 [7:57:16<11:47:34, 3.25it/s] 63%|██████▎ | 233298/371472 [7:57:16<12:11:22, 3.15it/s] 63%|██████▎ | 233299/371472 [7:57:17<13:14:05, 2.90it/s] 63%|██████▎ | 233300/371472 [7:57:17<12:56:32, 2.97it/s] {'loss': 2.7101, 'learning_rate': 4.349425757934883e-07, 'epoch': 10.05} + 63%|██████▎ | 233300/371472 [7:57:17<12:56:32, 2.97it/s] 63%|██████▎ | 233301/371472 [7:57:18<14:27:35, 2.65it/s] 63%|██████▎ | 233302/371472 [7:57:18<13:11:27, 2.91it/s] 63%|██████▎ | 233303/371472 [7:57:18<12:28:24, 3.08it/s] 63%|██████▎ | 233304/371472 [7:57:18<11:54:36, 3.22it/s] 63%|██████▎ | 233305/371472 [7:57:19<11:39:13, 3.29it/s] 63%|██████▎ | 233306/371472 [7:57:19<11:45:44, 3.26it/s] 63%|██████▎ | 233307/371472 [7:57:19<11:32:50, 3.32it/s] 63%|██████▎ | 233308/371472 [7:57:20<11:18:19, 3.39it/s] 63%|██████▎ | 233309/371472 [7:57:20<11:20:17, 3.38it/s] 63%|██████▎ | 233310/371472 [7:57:20<11:18:27, 3.39it/s] 63%|██████▎ | 233311/371472 [7:57:21<11:12:00, 3.43it/s] 63%|██████▎ | 233312/371472 [7:57:21<11:17:35, 3.40it/s] 63%|██████▎ | 233313/371472 [7:57:21<11:18:22, 3.39it/s] 63%|██████▎ | 233314/371472 [7:57:21<11:41:15, 3.28it/s] 63%|██████▎ | 233315/371472 [7:57:22<11:15:09, 3.41it/s] 63%|██████▎ | 233316/371472 [7:57:22<11:15:58, 3.41it/s] 63%|██████▎ | 233317/371472 [7:57:22<11:10:10, 3.44it/s] 63%|██████▎ | 233318/371472 [7:57:23<11:20:26, 3.38it/s] 63%|██████▎ | 233319/371472 [7:57:23<10:59:33, 3.49it/s] 63%|██████▎ | 233320/371472 [7:57:23<12:46:52, 3.00it/s] {'loss': 2.7335, 'learning_rate': 4.3489409381800944e-07, 'epoch': 10.05} + 63%|██████▎ | 233320/371472 [7:57:23<12:46:52, 3.00it/s] 63%|██████▎ | 233321/371472 [7:57:24<12:05:26, 3.17it/s] 63%|██████▎ | 233322/371472 [7:57:24<12:44:13, 3.01it/s] 63%|██████▎ | 233323/371472 [7:57:24<13:17:29, 2.89it/s] 63%|██████▎ | 233324/371472 [7:57:25<12:33:09, 3.06it/s] 63%|██████▎ | 233325/371472 [7:57:25<11:59:45, 3.20it/s] 63%|██████▎ | 233326/371472 [7:57:25<11:40:45, 3.29it/s] 63%|██████▎ | 233327/371472 [7:57:25<11:43:31, 3.27it/s] 63%|██████▎ | 233328/371472 [7:57:26<11:54:01, 3.22it/s] 63%|██████▎ | 233329/371472 [7:57:26<11:33:48, 3.32it/s] 63%|██████▎ | 233330/371472 [7:57:26<11:24:28, 3.36it/s] 63%|██████▎ | 233331/371472 [7:57:27<11:51:43, 3.23it/s] 63%|██████▎ | 233332/371472 [7:57:27<11:38:33, 3.30it/s] 63%|██████▎ | 233333/371472 [7:57:27<12:06:22, 3.17it/s] 63%|██████▎ | 233334/371472 [7:57:28<12:11:24, 3.15it/s] 63%|██████▎ | 233335/371472 [7:57:28<11:45:32, 3.26it/s] 63%|██████▎ | 233336/371472 [7:57:28<11:39:14, 3.29it/s] 63%|██████▎ | 233337/371472 [7:57:29<11:19:20, 3.39it/s] 63%|██████▎ | 233338/371472 [7:57:29<11:23:40, 3.37it/s] 63%|██████▎ | 233339/371472 [7:57:29<11:19:22, 3.39it/s] 63%|██████▎ | 233340/371472 [7:57:29<11:14:55, 3.41it/s] {'loss': 2.7687, 'learning_rate': 4.348456118425305e-07, 'epoch': 10.05} + 63%|██████▎ | 233340/371472 [7:57:29<11:14:55, 3.41it/s] 63%|██████▎ | 233341/371472 [7:57:30<11:10:38, 3.43it/s] 63%|██████▎ | 233342/371472 [7:57:30<10:59:27, 3.49it/s] 63%|██████▎ | 233343/371472 [7:57:30<11:06:38, 3.45it/s] 63%|██████▎ | 233344/371472 [7:57:31<10:53:00, 3.53it/s] 63%|██████▎ | 233345/371472 [7:57:31<11:18:48, 3.39it/s] 63%|██████▎ | 233346/371472 [7:57:31<11:05:22, 3.46it/s] 63%|██████▎ | 233347/371472 [7:57:31<10:55:30, 3.51it/s] 63%|██████▎ | 233348/371472 [7:57:32<11:19:23, 3.39it/s] 63%|██████▎ | 233349/371472 [7:57:32<11:34:54, 3.31it/s] 63%|██████▎ | 233350/371472 [7:57:32<11:20:38, 3.38it/s] 63%|██████▎ | 233351/371472 [7:57:33<11:02:45, 3.47it/s] 63%|██████▎ | 233352/371472 [7:57:33<11:00:59, 3.48it/s] 63%|██████▎ | 233353/371472 [7:57:33<10:40:39, 3.59it/s] 63%|██████▎ | 233354/371472 [7:57:33<10:31:06, 3.65it/s] 63%|██████▎ | 233355/371472 [7:57:34<10:33:49, 3.63it/s] 63%|██████▎ | 233356/371472 [7:57:34<10:27:29, 3.67it/s] 63%|██████▎ | 233357/371472 [7:57:34<10:37:15, 3.61it/s] 63%|██████▎ | 233358/371472 [7:57:35<10:50:39, 3.54it/s] 63%|██████▎ | 233359/371472 [7:57:35<10:35:01, 3.62it/s] 63%|██████▎ | 233360/371472 [7:57:35<10:46:46, 3.56it/s] {'loss': 2.8072, 'learning_rate': 4.347971298670517e-07, 'epoch': 10.05} + 63%|██████▎ | 233360/371472 [7:57:35<10:46:46, 3.56it/s] 63%|██████▎ | 233361/371472 [7:57:35<10:43:17, 3.58it/s] 63%|██████▎ | 233362/371472 [7:57:36<11:12:20, 3.42it/s] 63%|██████▎ | 233363/371472 [7:57:36<11:04:58, 3.46it/s] 63%|██████▎ | 233364/371472 [7:57:36<11:10:42, 3.43it/s] 63%|██████▎ | 233365/371472 [7:57:37<11:24:22, 3.36it/s] 63%|██████▎ | 233366/371472 [7:57:37<11:56:47, 3.21it/s] 63%|██████▎ | 233367/371472 [7:57:37<11:54:15, 3.22it/s] 63%|██████▎ | 233368/371472 [7:57:37<11:46:13, 3.26it/s] 63%|██████▎ | 233369/371472 [7:57:38<11:25:01, 3.36it/s] 63%|██████▎ | 233370/371472 [7:57:38<11:22:36, 3.37it/s] 63%|██████▎ | 233371/371472 [7:57:38<11:39:01, 3.29it/s] 63%|██████▎ | 233372/371472 [7:57:39<11:45:21, 3.26it/s] 63%|██████▎ | 233373/371472 [7:57:39<11:29:37, 3.34it/s] 63%|██████▎ | 233374/371472 [7:57:39<11:14:32, 3.41it/s] 63%|██████▎ | 233375/371472 [7:57:40<10:59:55, 3.49it/s] 63%|██████▎ | 233376/371472 [7:57:40<11:06:47, 3.45it/s] 63%|██████▎ | 233377/371472 [7:57:40<12:23:26, 3.10it/s] 63%|██████▎ | 233378/371472 [7:57:41<11:57:17, 3.21it/s] 63%|██████▎ | 233379/371472 [7:57:41<11:33:04, 3.32it/s] 63%|██████▎ | 233380/371472 [7:57:41<11:08:20, 3.44it/s] {'loss': 2.7751, 'learning_rate': 4.347486478915727e-07, 'epoch': 10.05} + 63%|██████▎ | 233380/371472 [7:57:41<11:08:20, 3.44it/s] 63%|██████▎ | 233381/371472 [7:57:41<11:07:29, 3.45it/s] 63%|██████▎ | 233382/371472 [7:57:42<10:55:53, 3.51it/s] 63%|██████▎ | 233383/371472 [7:57:42<11:11:33, 3.43it/s] 63%|██████▎ | 233384/371472 [7:57:42<11:01:36, 3.48it/s] 63%|██████▎ | 233385/371472 [7:57:42<10:50:32, 3.54it/s] 63%|██████▎ | 233386/371472 [7:57:43<10:42:38, 3.58it/s] 63%|██████▎ | 233387/371472 [7:57:43<10:30:50, 3.65it/s] 63%|██████▎ | 233388/371472 [7:57:43<10:38:55, 3.60it/s] 63%|██████▎ | 233389/371472 [7:57:44<10:45:41, 3.56it/s] 63%|██████▎ | 233390/371472 [7:57:44<10:51:22, 3.53it/s] 63%|██████▎ | 233391/371472 [7:57:44<10:54:27, 3.52it/s] 63%|██████▎ | 233392/371472 [7:57:44<10:33:18, 3.63it/s] 63%|██████▎ | 233393/371472 [7:57:45<10:57:19, 3.50it/s] 63%|██████▎ | 233394/371472 [7:57:45<12:02:40, 3.18it/s] 63%|██████▎ | 233395/371472 [7:57:45<11:29:12, 3.34it/s] 63%|██████▎ | 233396/371472 [7:57:46<12:07:46, 3.16it/s] 63%|██████▎ | 233397/371472 [7:57:46<12:03:50, 3.18it/s] 63%|██████▎ | 233398/371472 [7:57:46<11:49:07, 3.25it/s] 63%|██████▎ | 233399/371472 [7:57:47<11:34:31, 3.31it/s] 63%|██████▎ | 233400/371472 [7:57:47<12:04:21, 3.18it/s] {'loss': 2.6336, 'learning_rate': 4.347001659160939e-07, 'epoch': 10.05} + 63%|██████▎ | 233400/371472 [7:57:47<12:04:21, 3.18it/s] 63%|██████▎ | 233401/371472 [7:57:47<11:50:29, 3.24it/s] 63%|██████▎ | 233402/371472 [7:57:48<11:35:12, 3.31it/s] 63%|██████▎ | 233403/371472 [7:57:48<11:29:57, 3.34it/s] 63%|██████▎ | 233404/371472 [7:57:48<11:39:24, 3.29it/s] 63%|██████▎ | 233405/371472 [7:57:48<12:00:43, 3.19it/s] 63%|██████▎ | 233406/371472 [7:57:49<11:33:28, 3.32it/s] 63%|██████▎ | 233407/371472 [7:57:49<10:58:08, 3.50it/s] 63%|██████▎ | 233408/371472 [7:57:49<11:49:38, 3.24it/s] 63%|██████▎ | 233409/371472 [7:57:50<12:34:08, 3.05it/s] 63%|██████▎ | 233410/371472 [7:57:50<12:03:31, 3.18it/s] 63%|██████▎ | 233411/371472 [7:57:50<11:34:06, 3.32it/s] 63%|██████▎ | 233412/371472 [7:57:51<11:25:37, 3.36it/s] 63%|██████▎ | 233413/371472 [7:57:51<11:57:53, 3.21it/s] 63%|██████▎ | 233414/371472 [7:57:51<12:27:25, 3.08it/s] 63%|██████▎ | 233415/371472 [7:57:52<11:50:45, 3.24it/s] 63%|██████▎ | 233416/371472 [7:57:52<11:13:24, 3.42it/s] 63%|██████▎ | 233417/371472 [7:57:52<11:38:27, 3.29it/s] 63%|██████▎ | 233418/371472 [7:57:52<11:45:57, 3.26it/s] 63%|██████▎ | 233419/371472 [7:57:53<11:11:41, 3.43it/s] 63%|██████▎ | 233420/371472 [7:57:53<10:51:26, 3.53it/s] {'loss': 2.8188, 'learning_rate': 4.3465168394061496e-07, 'epoch': 10.05} + 63%|██████▎ | 233420/371472 [7:57:53<10:51:26, 3.53it/s] 63%|██████▎ | 233421/371472 [7:57:53<10:43:32, 3.58it/s] 63%|██████▎ | 233422/371472 [7:57:54<10:41:02, 3.59it/s] 63%|██████▎ | 233423/371472 [7:57:54<10:41:48, 3.58it/s] 63%|██████▎ | 233424/371472 [7:57:54<11:01:10, 3.48it/s] 63%|██████▎ | 233425/371472 [7:57:54<11:38:56, 3.29it/s] 63%|██████▎ | 233426/371472 [7:57:55<11:19:17, 3.39it/s] 63%|██████▎ | 233427/371472 [7:57:55<11:40:12, 3.29it/s] 63%|██████▎ | 233428/371472 [7:57:55<11:12:10, 3.42it/s] 63%|██████▎ | 233429/371472 [7:57:56<11:31:30, 3.33it/s] 63%|██████▎ | 233430/371472 [7:57:56<11:32:33, 3.32it/s] 63%|██████▎ | 233431/371472 [7:57:56<11:08:38, 3.44it/s] 63%|██████▎ | 233432/371472 [7:57:57<11:47:49, 3.25it/s] 63%|████��█▎ | 233433/371472 [7:57:57<11:27:01, 3.35it/s] 63%|██████▎ | 233434/371472 [7:57:57<11:08:10, 3.44it/s] 63%|██████▎ | 233435/371472 [7:57:57<11:43:43, 3.27it/s] 63%|██████▎ | 233436/371472 [7:57:58<11:37:33, 3.30it/s] 63%|██████▎ | 233437/371472 [7:57:58<11:46:29, 3.26it/s] 63%|██████▎ | 233438/371472 [7:57:58<12:01:48, 3.19it/s] 63%|██████▎ | 233439/371472 [7:57:59<11:37:51, 3.30it/s] 63%|██████▎ | 233440/371472 [7:57:59<11:03:11, 3.47it/s] {'loss': 2.7701, 'learning_rate': 4.346032019651361e-07, 'epoch': 10.05} + 63%|██████▎ | 233440/371472 [7:57:59<11:03:11, 3.47it/s] 63%|██████▎ | 233441/371472 [7:57:59<11:07:36, 3.45it/s] 63%|██████▎ | 233442/371472 [7:57:59<11:02:32, 3.47it/s] 63%|██████▎ | 233443/371472 [7:58:00<10:48:59, 3.54it/s] 63%|██████▎ | 233444/371472 [7:58:00<10:43:34, 3.57it/s] 63%|██████▎ | 233445/371472 [7:58:00<10:35:43, 3.62it/s] 63%|██████▎ | 233446/371472 [7:58:01<10:35:28, 3.62it/s] 63%|██████▎ | 233447/371472 [7:58:01<11:54:51, 3.22it/s] 63%|██████▎ | 233448/371472 [7:58:01<11:38:03, 3.30it/s] 63%|██████▎ | 233449/371472 [7:58:02<11:23:17, 3.37it/s] 63%|██████▎ | 233450/371472 [7:58:02<11:06:21, 3.45it/s] 63%|██████▎ | 233451/371472 [7:58:02<10:53:18, 3.52it/s] 63%|██████▎ | 233452/371472 [7:58:02<10:51:13, 3.53it/s] 63%|██████▎ | 233453/371472 [7:58:03<11:07:02, 3.45it/s] 63%|██████▎ | 233454/371472 [7:58:03<10:50:39, 3.54it/s] 63%|██████▎ | 233455/371472 [7:58:03<10:39:30, 3.60it/s] 63%|██████▎ | 233456/371472 [7:58:03<10:47:34, 3.55it/s] 63%|██████▎ | 233457/371472 [7:58:04<10:52:03, 3.53it/s] 63%|██████▎ | 233458/371472 [7:58:04<10:39:06, 3.60it/s] 63%|██████▎ | 233459/371472 [7:58:04<11:08:34, 3.44it/s] 63%|██████▎ | 233460/371472 [7:58:05<10:59:05, 3.49it/s] {'loss': 2.7414, 'learning_rate': 4.3455471998965715e-07, 'epoch': 10.06} + 63%|██████▎ | 233460/371472 [7:58:05<10:59:05, 3.49it/s] 63%|██████▎ | 233461/371472 [7:58:05<10:58:20, 3.49it/s] 63%|██████▎ | 233462/371472 [7:58:05<11:22:02, 3.37it/s] 63%|██████▎ | 233463/371472 [7:58:06<11:08:25, 3.44it/s] 63%|██████▎ | 233464/371472 [7:58:06<10:52:20, 3.53it/s] 63%|██████▎ | 233465/371472 [7:58:06<10:39:48, 3.60it/s] 63%|██████▎ | 233466/371472 [7:58:06<10:35:16, 3.62it/s] 63%|██████▎ | 233467/371472 [7:58:07<10:48:02, 3.55it/s] 63%|██████▎ | 233468/371472 [7:58:07<10:31:51, 3.64it/s] 63%|██████▎ | 233469/371472 [7:58:07<11:06:23, 3.45it/s] 63%|██████▎ | 233470/371472 [7:58:08<11:17:08, 3.40it/s] 63%|██████▎ | 233471/371472 [7:58:08<11:06:30, 3.45it/s] 63%|██████▎ | 233472/371472 [7:58:08<11:00:02, 3.48it/s] 63%|██████▎ | 233473/371472 [7:58:08<10:45:36, 3.56it/s] 63%|██████▎ | 233474/371472 [7:58:09<10:41:28, 3.59it/s] 63%|██████▎ | 233475/371472 [7:58:09<11:42:08, 3.28it/s] 63%|██████▎ | 233476/371472 [7:58:09<11:25:25, 3.36it/s] 63%|██████▎ | 233477/371472 [7:58:10<10:58:46, 3.49it/s] 63%|██████▎ | 233478/371472 [7:58:10<10:55:40, 3.51it/s] 63%|██████▎ | 233479/371472 [7:58:10<10:38:33, 3.60it/s] 63%|██████▎ | 233480/371472 [7:58:10<11:22:17, 3.37it/s] {'loss': 2.8707, 'learning_rate': 4.345062380141782e-07, 'epoch': 10.06} + 63%|██████▎ | 233480/371472 [7:58:10<11:22:17, 3.37it/s] 63%|██████▎ | 233481/371472 [7:58:11<11:04:51, 3.46it/s] 63%|██████▎ | 233482/371472 [7:58:11<10:51:58, 3.53it/s] 63%|██████▎ | 233483/371472 [7:58:11<11:15:44, 3.40it/s] 63%|██████▎ | 233484/371472 [7:58:12<10:52:40, 3.52it/s] 63%|██████▎ | 233485/371472 [7:58:12<10:38:13, 3.60it/s] 63%|██████▎ | 233486/371472 [7:58:12<10:31:49, 3.64it/s] 63%|██████▎ | 233487/371472 [7:58:12<10:31:29, 3.64it/s] 63%|██████▎ | 233488/371472 [7:58:13<10:58:44, 3.49it/s] 63%|██████▎ | 233489/371472 [7:58:13<11:04:00, 3.46it/s] 63%|██████▎ | 233490/371472 [7:58:13<10:49:06, 3.54it/s] 63%|██████▎ | 233491/371472 [7:58:13<10:43:38, 3.57it/s] 63%|██████▎ | 233492/371472 [7:58:14<10:33:12, 3.63it/s] 63%|██████▎ | 233493/371472 [7:58:14<10:42:25, 3.58it/s] 63%|██████▎ | 233494/371472 [7:58:14<11:02:53, 3.47it/s] 63%|██████▎ | 233495/371472 [7:58:15<10:57:37, 3.50it/s] 63%|██████▎ | 233496/371472 [7:58:15<11:15:54, 3.40it/s] 63%|██████▎ | 233497/371472 [7:58:15<11:53:20, 3.22it/s] 63%|██████▎ | 233498/371472 [7:58:16<11:19:11, 3.39it/s] 63%|██████▎ | 233499/371472 [7:58:16<12:10:55, 3.15it/s] 63%|██████▎ | 233500/371472 [7:58:16<11:50:33, 3.24it/s] {'loss': 2.7362, 'learning_rate': 4.344577560386994e-07, 'epoch': 10.06} + 63%|██████▎ | 233500/371472 [7:58:16<11:50:33, 3.24it/s] 63%|██████▎ | 233501/371472 [7:58:17<11:54:42, 3.22it/s] 63%|██████▎ | 233502/371472 [7:58:17<11:35:36, 3.31it/s] 63%|██████▎ | 233503/371472 [7:58:17<11:11:05, 3.43it/s] 63%|██████▎ | 233504/371472 [7:58:17<11:10:38, 3.43it/s] 63%|██████▎ | 233505/371472 [7:58:18<11:00:56, 3.48it/s] 63%|██████▎ | 233506/371472 [7:58:18<11:01:55, 3.47it/s] 63%|██████▎ | 233507/371472 [7:58:18<10:59:24, 3.49it/s] 63%|██████▎ | 233508/371472 [7:58:19<11:37:05, 3.30it/s] 63%|██████▎ | 233509/371472 [7:58:19<11:48:02, 3.25it/s] 63%|██████▎ | 233510/371472 [7:58:19<11:07:03, 3.45it/s] 63%|██████▎ | 233511/371472 [7:58:19<11:25:14, 3.36it/s] 63%|██████▎ | 233512/371472 [7:58:20<11:33:44, 3.31it/s] 63%|██████▎ | 233513/371472 [7:58:20<11:28:17, 3.34it/s] 63%|██████▎ | 233514/371472 [7:58:20<11:00:29, 3.48it/s] 63%|██████▎ | 233515/371472 [7:58:21<11:10:45, 3.43it/s] 63%|██████▎ | 233516/371472 [7:58:21<11:19:18, 3.38it/s] 63%|██████▎ | 233517/371472 [7:58:21<11:06:22, 3.45it/s] 63%|██████▎ | 233518/371472 [7:58:21<10:58:47, 3.49it/s] 63%|██████▎ | 233519/371472 [7:58:22<10:41:11, 3.59it/s] 63%|██████▎ | 233520/371472 [7:58:22<10:54:28, 3.51it/s] {'loss': 2.6798, 'learning_rate': 4.344092740632204e-07, 'epoch': 10.06} + 63%|██████▎ | 233520/371472 [7:58:22<10:54:28, 3.51it/s] 63%|██████▎ | 233521/371472 [7:58:22<10:50:31, 3.53it/s] 63%|██████▎ | 233522/371472 [7:58:23<11:10:23, 3.43it/s] 63%|██████▎ | 233523/371472 [7:58:23<11:11:20, 3.42it/s] 63%|██████▎ | 233524/371472 [7:58:23<11:25:32, 3.35it/s] 63%|██████▎ | 233525/371472 [7:58:23<11:13:43, 3.41it/s] 63%|██████▎ | 233526/371472 [7:58:24<11:21:12, 3.37it/s] 63%|██████▎ | 233527/371472 [7:58:24<11:51:47, 3.23it/s] 63%|██████▎ | 233528/371472 [7:58:25<12:52:23, 2.98it/s] 63%|██████▎ | 233529/371472 [7:58:25<12:54:18, 2.97it/s] 63%|██████▎ | 233530/371472 [7:58:25<12:02:01, 3.18it/s] 63%|██████▎ | 233531/371472 [7:58:25<12:09:14, 3.15it/s] 63%|██████▎ | 233532/371472 [7:58:26<12:27:51, 3.07it/s] 63%|██████▎ | 233533/371472 [7:58:26<12:33:11, 3.05it/s] 63%|██████▎ | 233534/371472 [7:58:26<11:58:35, 3.20it/s] 63%|██████▎ | 233535/371472 [7:58:27<11:36:02, 3.30it/s] 63%|██████▎ | 233536/371472 [7:58:27<11:26:50, 3.35it/s] 63%|██████▎ | 233537/371472 [7:58:27<11:29:44, 3.33it/s] 63%|██████▎ | 233538/371472 [7:58:28<11:44:11, 3.26it/s] 63%|██████▎ | 233539/371472 [7:58:28<11:24:52, 3.36it/s] 63%|██████▎ | 233540/371472 [7:58:28<10:55:09, 3.51it/s] {'loss': 2.8949, 'learning_rate': 4.3436079208774165e-07, 'epoch': 10.06} + 63%|██████▎ | 233540/371472 [7:58:28<10:55:09, 3.51it/s] 63%|██████▎ | 233541/371472 [7:58:28<11:09:59, 3.43it/s] 63%|██████▎ | 233542/371472 [7:58:29<10:49:06, 3.54it/s] 63%|██████▎ | 233543/371472 [7:58:29<10:46:21, 3.56it/s] 63%|██████▎ | 233544/371472 [7:58:29<11:14:18, 3.41it/s] 63%|██████▎ | 233545/371472 [7:58:30<11:11:53, 3.42it/s] 63%|██████▎ | 233546/371472 [7:58:30<11:32:06, 3.32it/s] 63%|██████▎ | 233547/371472 [7:58:30<11:29:14, 3.34it/s] 63%|██████▎ | 233548/371472 [7:58:31<11:39:20, 3.29it/s] 63%|██████▎ | 233549/371472 [7:58:31<12:27:15, 3.08it/s] 63%|██████▎ | 233550/371472 [7:58:31<12:07:07, 3.16it/s] 63%|██████▎ | 233551/371472 [7:58:32<12:00:56, 3.19it/s] 63%|██████▎ | 233552/371472 [7:58:32<11:36:24, 3.30it/s] 63%|██████▎ | 233553/371472 [7:58:32<11:37:02, 3.30it/s] 63%|██████▎ | 233554/371472 [7:58:32<12:18:31, 3.11it/s] 63%|██████▎ | 233555/371472 [7:58:33<12:05:55, 3.17it/s] 63%|██████▎ | 233556/371472 [7:58:33<11:51:41, 3.23it/s] 63%|██████▎ | 233557/371472 [7:58:33<11:39:06, 3.29it/s] 63%|██████▎ | 233558/371472 [7:58:34<11:14:25, 3.41it/s] 63%|██████▎ | 233559/371472 [7:58:34<10:52:09, 3.52it/s] 63%|██████▎ | 233560/371472 [7:58:34<10:30:03, 3.65it/s] {'loss': 2.878, 'learning_rate': 4.3431231011226267e-07, 'epoch': 10.06} + 63%|██████▎ | 233560/371472 [7:58:34<10:30:03, 3.65it/s] 63%|██████▎ | 233561/371472 [7:58:34<10:31:42, 3.64it/s] 63%|██████▎ | 233562/371472 [7:58:35<10:40:21, 3.59it/s] 63%|██████▎ | 233563/371472 [7:58:35<10:25:19, 3.68it/s] 63%|██████▎ | 233564/371472 [7:58:35<11:01:07, 3.48it/s] 63%|██████▎ | 233565/371472 [7:58:36<10:56:00, 3.50it/s] 63%|██████▎ | 233566/371472 [7:58:36<10:59:32, 3.48it/s] 63%|██████▎ | 233567/371472 [7:58:36<11:13:06, 3.41it/s] 63%|██████▎ | 233568/371472 [7:58:36<11:05:28, 3.45it/s] 63%|██████▎ | 233569/371472 [7:58:37<10:58:52, 3.49it/s] 63%|██████▎ | 233570/371472 [7:58:37<10:59:32, 3.48it/s] 63%|██████▎ | 233571/371472 [7:58:37<11:30:53, 3.33it/s] 63%|██████▎ | 233572/371472 [7:58:38<11:51:35, 3.23it/s] 63%|██████▎ | 233573/371472 [7:58:38<11:43:19, 3.27it/s] 63%|██████▎ | 233574/371472 [7:58:38<11:32:45, 3.32it/s] 63%|██████▎ | 233575/371472 [7:58:39<11:27:58, 3.34it/s] 63%|██████▎ | 233576/371472 [7:58:39<11:53:50, 3.22it/s] 63%|██████▎ | 233577/371472 [7:58:39<11:52:21, 3.23it/s] 63%|██████▎ | 233578/371472 [7:58:39<11:19:21, 3.38it/s] 63%|██████▎ | 233579/371472 [7:58:40<10:41:34, 3.58it/s] 63%|██████▎ | 233580/371472 [7:58:40<11:16:39, 3.40it/s] {'loss': 2.5081, 'learning_rate': 4.342638281367838e-07, 'epoch': 10.06} + 63%|██████▎ | 233580/371472 [7:58:40<11:16:39, 3.40it/s] 63%|██████▎ | 233581/371472 [7:58:40<11:06:21, 3.45it/s] 63%|██████▎ | 233582/371472 [7:58:41<10:49:41, 3.54it/s] 63%|██████▎ | 233583/371472 [7:58:41<10:36:32, 3.61it/s] 63%|██████▎ | 233584/371472 [7:58:41<10:23:14, 3.69it/s] 63%|██████▎ | 233585/371472 [7:58:41<10:26:55, 3.67it/s] 63%|██████▎ | 233586/371472 [7:58:42<10:24:57, 3.68it/s] 63%|██████▎ | 233587/371472 [7:58:42<11:57:29, 3.20it/s] 63%|██████▎ | 233588/371472 [7:58:42<11:27:46, 3.34it/s] 63%|██████▎ | 233589/371472 [7:58:43<11:13:04, 3.41it/s] 63%|██████▎ | 233590/371472 [7:58:43<10:57:01, 3.50it/s] 63%|██████▎ | 233591/371472 [7:58:43<10:44:37, 3.56it/s] 63%|██████▎ | 233592/371472 [7:58:43<10:27:29, 3.66it/s] 63%|██████▎ | 233593/371472 [7:58:44<10:22:36, 3.69it/s] 63%|██████▎ | 233594/371472 [7:58:44<10:12:18, 3.75it/s] 63%|██████▎ | 233595/371472 [7:58:44<10:29:51, 3.65it/s] 63%|██████▎ | 233596/371472 [7:58:44<10:30:27, 3.64it/s] 63%|██████▎ | 233597/371472 [7:58:45<10:41:59, 3.58it/s] 63%|██████▎ | 233598/371472 [7:58:45<10:38:50, 3.60it/s] 63%|██████▎ | 233599/371472 [7:58:45<10:27:31, 3.66it/s] 63%|██████▎ | 233600/371472 [7:58:46<10:34:15, 3.62it/s] {'loss': 2.7506, 'learning_rate': 4.3421534616130486e-07, 'epoch': 10.06} + 63%|██████▎ | 233600/371472 [7:58:46<10:34:15, 3.62it/s] 63%|██████▎ | 233601/371472 [7:58:46<10:18:48, 3.71it/s] 63%|██████▎ | 233602/371472 [7:58:46<11:08:31, 3.44it/s] 63%|██████▎ | 233603/371472 [7:58:46<10:48:18, 3.54it/s] 63%|██████▎ | 233604/371472 [7:58:47<10:59:51, 3.48it/s] 63%|██████▎ | 233605/371472 [7:58:47<10:58:36, 3.49it/s] 63%|██████▎ | 233606/371472 [7:58:47<10:46:28, 3.55it/s] 63%|██████▎ | 233607/371472 [7:58:48<10:57:39, 3.49it/s] 63%|██████▎ | 233608/371472 [7:58:48<11:04:09, 3.46it/s] 63%|██████▎ | 233609/371472 [7:58:48<10:48:25, 3.54it/s] 63%|██████▎ | 233610/371472 [7:58:48<10:55:39, 3.50it/s] 63%|██████▎ | 233611/371472 [7:58:49<11:24:05, 3.36it/s] 63%|██████▎ | 233612/371472 [7:58:49<11:39:18, 3.29it/s] 63%|██████▎ | 233613/371472 [7:58:49<11:19:55, 3.38it/s] 63%|██████▎ | 233614/371472 [7:58:50<11:20:12, 3.38it/s] 63%|██████▎ | 233615/371472 [7:58:50<11:51:17, 3.23it/s] 63%|██████▎ | 233616/371472 [7:58:50<11:32:19, 3.32it/s] 63%|██████▎ | 233617/371472 [7:58:51<11:34:52, 3.31it/s] 63%|██████▎ | 233618/371472 [7:58:51<11:10:42, 3.43it/s] 63%|██████▎ | 233619/371472 [7:58:51<11:50:00, 3.24it/s] 63%|██████▎ | 233620/371472 [7:58:52<12:23:20, 3.09it/s] {'loss': 2.6746, 'learning_rate': 4.3416686418582604e-07, 'epoch': 10.06} + 63%|██████▎ | 233620/371472 [7:58:52<12:23:20, 3.09it/s] 63%|██████▎ | 233621/371472 [7:58:52<11:34:12, 3.31it/s] 63%|██████▎ | 233622/371472 [7:58:52<11:25:35, 3.35it/s] 63%|██████▎ | 233623/371472 [7:58:52<11:08:01, 3.44it/s] 63%|██████▎ | 233624/371472 [7:58:53<11:03:32, 3.46it/s] 63%|██████▎ | 233625/371472 [7:58:53<10:48:58, 3.54it/s] 63%|██████▎ | 233626/371472 [7:58:53<11:04:12, 3.46it/s] 63%|██████▎ | 233627/371472 [7:58:54<11:49:36, 3.24it/s] 63%|██████▎ | 233628/371472 [7:58:54<12:21:46, 3.10it/s] 63%|██████▎ | 233629/371472 [7:58:54<11:49:08, 3.24it/s] 63%|██████▎ | 233630/371472 [7:58:54<11:17:50, 3.39it/s] 63%|██████▎ | 233631/371472 [7:58:55<11:10:32, 3.43it/s] 63%|██████▎ | 233632/371472 [7:58:55<11:02:10, 3.47it/s] 63%|██████▎ | 233633/371472 [7:58:55<11:08:21, 3.44it/s] 63%|██████▎ | 233634/371472 [7:58:56<11:05:58, 3.45it/s] 63%|██████▎ | 233635/371472 [7:58:56<11:08:52, 3.43it/s] 63%|██████▎ | 233636/371472 [7:58:56<11:08:07, 3.44it/s] 63%|██████▎ | 233637/371472 [7:58:56<10:54:15, 3.51it/s] 63%|██████▎ | 233638/371472 [7:58:57<10:49:33, 3.54it/s] 63%|██████▎ | 233639/371472 [7:58:57<10:57:00, 3.50it/s] 63%|██████▎ | 233640/371472 [7:58:57<11:43:49, 3.26it/s] {'loss': 2.7771, 'learning_rate': 4.3411838221034706e-07, 'epoch': 10.06} + 63%|██████▎ | 233640/371472 [7:58:57<11:43:49, 3.26it/s] 63%|██████▎ | 233641/371472 [7:58:58<11:14:55, 3.40it/s] 63%|██████▎ | 233642/371472 [7:58:58<11:30:07, 3.33it/s] 63%|██████▎ | 233643/371472 [7:58:58<11:51:14, 3.23it/s] 63%|██████▎ | 233644/371472 [7:58:59<11:43:12, 3.27it/s] 63%|██████▎ | 233645/371472 [7:58:59<11:34:57, 3.31it/s] 63%|██████▎ | 233646/371472 [7:58:59<11:25:57, 3.35it/s] 63%|██████▎ | 233647/371472 [7:58:59<11:08:09, 3.44it/s] 63%|██████▎ | 233648/371472 [7:59:00<10:46:48, 3.55it/s] 63%|██████▎ | 233649/371472 [7:59:00<10:56:31, 3.50it/s] 63%|██████▎ | 233650/371472 [7:59:00<11:00:08, 3.48it/s] 63%|██████▎ | 233651/371472 [7:59:01<10:59:04, 3.49it/s] 63%|██████▎ | 233652/371472 [7:59:01<11:02:08, 3.47it/s] 63%|██████▎ | 233653/371472 [7:59:01<11:21:24, 3.37it/s] 63%|██████▎ | 233654/371472 [7:59:02<11:24:00, 3.36it/s] 63%|██████▎ | 233655/371472 [7:59:02<12:02:45, 3.18it/s] 63%|██████▎ | 233656/371472 [7:59:02<12:22:40, 3.09it/s] 63%|██████▎ | 233657/371472 [7:59:03<12:13:44, 3.13it/s] 63%|██████▎ | 233658/371472 [7:59:03<11:52:54, 3.22it/s] 63%|██████▎ | 233659/371472 [7:59:03<11:40:58, 3.28it/s] 63%|██████▎ | 233660/371472 [7:59:03<11:14:52, 3.40it/s] {'loss': 2.6613, 'learning_rate': 4.3406990023486824e-07, 'epoch': 10.06} + 63%|██████▎ | 233660/371472 [7:59:03<11:14:52, 3.40it/s] 63%|██████▎ | 233661/371472 [7:59:04<11:02:57, 3.46it/s] 63%|██████▎ | 233662/371472 [7:59:04<11:00:19, 3.48it/s] 63%|██████▎ | 233663/371472 [7:59:04<10:53:41, 3.51it/s] 63%|██████▎ | 233664/371472 [7:59:05<11:03:35, 3.46it/s] 63%|██████▎ | 233665/371472 [7:59:05<10:53:34, 3.51it/s] 63%|██████▎ | 233666/371472 [7:59:05<11:19:39, 3.38it/s] 63%|██████▎ | 233667/371472 [7:59:05<11:13:22, 3.41it/s] 63%|██████▎ | 233668/371472 [7:59:06<11:46:41, 3.25it/s] 63%|██████▎ | 233669/371472 [7:59:06<11:25:08, 3.35it/s] 63%|██████▎ | 233670/371472 [7:59:06<11:09:28, 3.43it/s] 63%|██████▎ | 233671/371472 [7:59:07<11:11:05, 3.42it/s] 63%|██████▎ | 233672/371472 [7:59:07<11:07:16, 3.44it/s] 63%|██████▎ | 233673/371472 [7:59:07<11:02:00, 3.47it/s] 63%|██████▎ | 233674/371472 [7:59:07<10:56:16, 3.50it/s] 63%|██████▎ | 233675/371472 [7:59:08<10:44:58, 3.56it/s] 63%|██████▎ | 233676/371472 [7:59:08<11:19:01, 3.38it/s] 63%|██████▎ | 233677/371472 [7:59:08<11:06:41, 3.44it/s] 63%|██████▎ | 233678/371472 [7:59:09<10:54:32, 3.51it/s] 63%|██████▎ | 233679/371472 [7:59:09<10:47:37, 3.55it/s] 63%|██████▎ | 233680/371472 [7:59:09<10:53:02, 3.52it/s] {'loss': 2.8941, 'learning_rate': 4.340214182593893e-07, 'epoch': 10.07} + 63%|██████▎ | 233680/371472 [7:59:09<10:53:02, 3.52it/s] 63%|██████▎ | 233681/371472 [7:59:09<11:02:47, 3.46it/s] 63%|██████▎ | 233682/371472 [7:59:10<10:47:43, 3.55it/s] 63%|██████▎ | 233683/371472 [7:59:10<10:46:10, 3.55it/s] 63%|██████▎ | 233684/371472 [7:59:10<10:29:45, 3.65it/s] 63%|██████▎ | 233685/371472 [7:59:11<10:37:47, 3.60it/s] 63%|██████▎ | 233686/371472 [7:59:11<10:42:24, 3.57it/s] 63%|██████▎ | 233687/371472 [7:59:11<10:37:36, 3.60it/s] 63%|██████▎ | 233688/371472 [7:59:11<10:27:41, 3.66it/s] 63%|██████▎ | 233689/371472 [7:59:12<10:27:37, 3.66it/s] 63%|██████▎ | 233690/371472 [7:59:12<10:24:38, 3.68it/s] 63%|██████▎ | 233691/371472 [7:59:12<10:28:14, 3.66it/s] 63%|██████▎ | 233692/371472 [7:59:12<10:35:37, 3.61it/s] 63%|██████▎ | 233693/371472 [7:59:13<10:41:09, 3.58it/s] 63%|██████▎ | 233694/371472 [7:59:13<11:08:09, 3.44it/s] 63%|██████▎ | 233695/371472 [7:59:13<10:49:11, 3.54it/s] 63%|██████▎ | 233696/371472 [7:59:14<10:58:45, 3.49it/s] 63%|██████▎ | 233697/371472 [7:59:14<11:00:52, 3.47it/s] 63%|██████▎ | 233698/371472 [7:59:14<10:49:42, 3.53it/s] 63%|██████▎ | 233699/371472 [7:59:15<11:09:53, 3.43it/s] 63%|██████▎ | 233700/371472 [7:59:15<11:52:52, 3.22it/s] {'loss': 2.6194, 'learning_rate': 4.3397293628391043e-07, 'epoch': 10.07} + 63%|██████▎ | 233700/371472 [7:59:15<11:52:52, 3.22it/s] 63%|██████▎ | 233701/371472 [7:59:15<11:30:28, 3.33it/s] 63%|██████▎ | 233702/371472 [7:59:16<12:12:54, 3.13it/s] 63%|██████▎ | 233703/371472 [7:59:16<12:06:30, 3.16it/s] 63%|██████▎ | 233704/371472 [7:59:16<11:47:02, 3.25it/s] 63%|██████▎ | 233705/371472 [7:59:16<11:25:22, 3.35it/s] 63%|██████▎ | 233706/371472 [7:59:17<11:11:58, 3.42it/s] 63%|██████▎ | 233707/371472 [7:59:17<11:14:51, 3.40it/s] 63%|██████▎ | 233708/371472 [7:59:17<11:01:06, 3.47it/s] 63%|██████▎ | 233709/371472 [7:59:18<11:19:17, 3.38it/s] 63%|██████▎ | 233710/371472 [7:59:18<11:11:54, 3.42it/s] 63%|██████▎ | 233711/371472 [7:59:18<11:05:04, 3.45it/s] 63%|██████▎ | 233712/371472 [7:59:18<10:41:40, 3.58it/s] 63%|██████▎ | 233713/371472 [7:59:19<11:29:13, 3.33it/s] 63%|██████▎ | 233714/371472 [7:59:19<11:16:07, 3.40it/s] 63%|██████▎ | 233715/371472 [7:59:19<12:30:07, 3.06it/s] 63%|██████▎ | 233716/371472 [7:59:20<12:48:11, 2.99it/s] 63%|██████▎ | 233717/371472 [7:59:20<12:01:38, 3.18it/s] 63%|██████▎ | 233718/371472 [7:59:20<11:35:42, 3.30it/s] 63%|██████▎ | 233719/371472 [7:59:21<11:09:46, 3.43it/s] 63%|██████▎ | 233720/371472 [7:59:21<11:18:22, 3.38it/s] {'loss': 2.7221, 'learning_rate': 4.339244543084315e-07, 'epoch': 10.07} + 63%|██████▎ | 233720/371472 [7:59:21<11:18:22, 3.38it/s] 63%|██████▎ | 233721/371472 [7:59:21<11:11:06, 3.42it/s] 63%|██████▎ | 233722/371472 [7:59:21<11:02:51, 3.46it/s] 63%|██████▎ | 233723/371472 [7:59:22<11:09:55, 3.43it/s] 63%|██████▎ | 233724/371472 [7:59:22<11:05:51, 3.45it/s] 63%|██████▎ | 233725/371472 [7:59:22<10:38:41, 3.59it/s] 63%|██████▎ | 233726/371472 [7:59:23<10:40:57, 3.58it/s] 63%|██████▎ | 233727/371472 [7:59:23<10:42:01, 3.58it/s] 63%|██████▎ | 233728/371472 [7:59:23<10:44:15, 3.56it/s] 63%|██████▎ | 233729/371472 [7:59:23<10:34:00, 3.62it/s] 63%|██████▎ | 233730/371472 [7:59:24<11:00:50, 3.47it/s] 63%|██████▎ | 233731/371472 [7:59:24<10:56:46, 3.50it/s] 63%|█████��▎ | 233732/371472 [7:59:24<11:08:23, 3.43it/s] 63%|██████▎ | 233733/371472 [7:59:25<11:15:20, 3.40it/s] 63%|██████▎ | 233734/371472 [7:59:25<11:12:15, 3.41it/s] 63%|██████▎ | 233735/371472 [7:59:25<10:58:40, 3.49it/s] 63%|██████▎ | 233736/371472 [7:59:25<10:45:51, 3.55it/s] 63%|██████▎ | 233737/371472 [7:59:26<10:42:15, 3.57it/s] 63%|██████▎ | 233738/371472 [7:59:26<11:04:35, 3.45it/s] 63%|██████▎ | 233739/371472 [7:59:26<11:24:30, 3.35it/s] 63%|██████▎ | 233740/371472 [7:59:27<11:06:54, 3.44it/s] {'loss': 2.8645, 'learning_rate': 4.338759723329527e-07, 'epoch': 10.07} + 63%|██████▎ | 233740/371472 [7:59:27<11:06:54, 3.44it/s] 63%|██████▎ | 233741/371472 [7:59:27<11:17:32, 3.39it/s] 63%|██████▎ | 233742/371472 [7:59:27<12:13:50, 3.13it/s] 63%|██████▎ | 233743/371472 [7:59:28<12:13:31, 3.13it/s] 63%|██████▎ | 233744/371472 [7:59:28<12:34:59, 3.04it/s] 63%|██████▎ | 233745/371472 [7:59:28<12:01:48, 3.18it/s] 63%|██████▎ | 233746/371472 [7:59:29<12:32:16, 3.05it/s] 63%|██████▎ | 233747/371472 [7:59:29<11:54:50, 3.21it/s] 63%|██████▎ | 233748/371472 [7:59:29<11:36:28, 3.30it/s] 63%|██████▎ | 233749/371472 [7:59:30<12:27:51, 3.07it/s] 63%|██████▎ | 233750/371472 [7:59:30<11:59:44, 3.19it/s] 63%|██████▎ | 233751/371472 [7:59:30<11:54:18, 3.21it/s] 63%|██████▎ | 233752/371472 [7:59:30<11:32:01, 3.32it/s] 63%|██████▎ | 233753/371472 [7:59:31<11:25:07, 3.35it/s] 63%|██████▎ | 233754/371472 [7:59:31<11:54:51, 3.21it/s] 63%|██████▎ | 233755/371472 [7:59:31<11:33:38, 3.31it/s] 63%|██████▎ | 233756/371472 [7:59:32<11:59:20, 3.19it/s] 63%|██████▎ | 233757/371472 [7:59:32<11:37:40, 3.29it/s] 63%|██████▎ | 233758/371472 [7:59:32<10:58:21, 3.49it/s] 63%|██████▎ | 233759/371472 [7:59:32<10:53:10, 3.51it/s] 63%|██████▎ | 233760/371472 [7:59:33<10:52:28, 3.52it/s] {'loss': 2.6529, 'learning_rate': 4.338274903574737e-07, 'epoch': 10.07} + 63%|██████▎ | 233760/371472 [7:59:33<10:52:28, 3.52it/s] 63%|██████▎ | 233761/371472 [7:59:33<10:58:55, 3.48it/s] 63%|██████▎ | 233762/371472 [7:59:33<10:43:27, 3.57it/s] 63%|██████▎ | 233763/371472 [7:59:34<10:37:26, 3.60it/s] 63%|██████▎ | 233764/371472 [7:59:34<10:26:01, 3.67it/s] 63%|██████▎ | 233765/371472 [7:59:34<10:28:23, 3.65it/s] 63%|██████▎ | 233766/371472 [7:59:34<11:38:19, 3.29it/s] 63%|██████▎ | 233767/371472 [7:59:35<11:06:06, 3.45it/s] 63%|██████▎ | 233768/371472 [7:59:35<10:53:26, 3.51it/s] 63%|██████▎ | 233769/371472 [7:59:35<11:02:41, 3.46it/s] 63%|██████▎ | 233770/371472 [7:59:36<10:47:00, 3.55it/s] 63%|██████▎ | 233771/371472 [7:59:36<10:46:04, 3.55it/s] 63%|██████▎ | 233772/371472 [7:59:36<10:25:10, 3.67it/s] 63%|██████▎ | 233773/371472 [7:59:36<10:21:08, 3.69it/s] 63%|██████▎ | 233774/371472 [7:59:37<10:29:34, 3.65it/s] 63%|██████▎ | 233775/371472 [7:59:37<10:34:03, 3.62it/s] 63%|██████▎ | 233776/371472 [7:59:37<10:21:21, 3.69it/s] 63%|██████▎ | 233777/371472 [7:59:38<11:02:32, 3.46it/s] 63%|██████▎ | 233778/371472 [7:59:38<11:02:55, 3.46it/s] 63%|██████▎ | 233779/371472 [7:59:38<11:02:21, 3.46it/s] 63%|██████▎ | 233780/371472 [7:59:38<10:48:59, 3.54it/s] {'loss': 2.7663, 'learning_rate': 4.337790083819949e-07, 'epoch': 10.07} + 63%|██████▎ | 233780/371472 [7:59:38<10:48:59, 3.54it/s] 63%|██████▎ | 233781/371472 [7:59:39<10:55:30, 3.50it/s] 63%|██████▎ | 233782/371472 [7:59:39<11:34:35, 3.30it/s] 63%|██████▎ | 233783/371472 [7:59:39<11:54:50, 3.21it/s] 63%|██████▎ | 233784/371472 [7:59:40<11:32:57, 3.31it/s] 63%|██████▎ | 233785/371472 [7:59:40<11:21:32, 3.37it/s] 63%|██████▎ | 233786/371472 [7:59:40<11:26:56, 3.34it/s] 63%|██████▎ | 233787/371472 [7:59:40<11:06:28, 3.44it/s] 63%|██████▎ | 233788/371472 [7:59:41<11:08:35, 3.43it/s] 63%|██████▎ | 233789/371472 [7:59:41<11:00:24, 3.47it/s] 63%|██████▎ | 233790/371472 [7:59:41<11:10:45, 3.42it/s] 63%|██████▎ | 233791/371472 [7:59:42<11:07:43, 3.44it/s] 63%|█���████▎ | 233792/371472 [7:59:42<11:49:57, 3.23it/s] 63%|██████▎ | 233793/371472 [7:59:42<11:43:15, 3.26it/s] 63%|██████▎ | 233794/371472 [7:59:43<11:17:38, 3.39it/s] 63%|██████▎ | 233795/371472 [7:59:43<11:10:52, 3.42it/s] 63%|██████▎ | 233796/371472 [7:59:43<11:16:49, 3.39it/s] 63%|██████▎ | 233797/371472 [7:59:43<11:12:04, 3.41it/s] 63%|██████▎ | 233798/371472 [7:59:44<10:58:54, 3.48it/s] 63%|██████▎ | 233799/371472 [7:59:44<10:57:14, 3.49it/s] 63%|██████▎ | 233800/371472 [7:59:44<12:11:45, 3.14it/s] {'loss': 2.7477, 'learning_rate': 4.3373052640651595e-07, 'epoch': 10.07} + 63%|██████▎ | 233800/371472 [7:59:44<12:11:45, 3.14it/s] 63%|██████▎ | 233801/371472 [7:59:45<11:45:28, 3.25it/s] 63%|██████▎ | 233802/371472 [7:59:45<11:42:55, 3.26it/s] 63%|██████▎ | 233803/371472 [7:59:45<11:41:20, 3.27it/s] 63%|██████▎ | 233804/371472 [7:59:46<11:52:33, 3.22it/s] 63%|██████▎ | 233805/371472 [7:59:46<12:37:40, 3.03it/s] 63%|██████▎ | 233806/371472 [7:59:46<12:06:06, 3.16it/s] 63%|██████▎ | 233807/371472 [7:59:47<12:03:48, 3.17it/s] 63%|██████▎ | 233808/371472 [7:59:47<11:32:49, 3.31it/s] 63%|██████▎ | 233809/371472 [7:59:47<11:18:01, 3.38it/s] 63%|██████▎ | 233810/371472 [7:59:47<11:42:13, 3.27it/s] 63%|██████▎ | 233811/371472 [7:59:48<11:26:05, 3.34it/s] 63%|██████▎ | 233812/371472 [7:59:48<11:06:49, 3.44it/s] 63%|██████▎ | 233813/371472 [7:59:48<11:22:03, 3.36it/s] 63%|██████▎ | 233814/371472 [7:59:49<11:30:30, 3.32it/s] 63%|██████▎ | 233815/371472 [7:59:49<11:11:15, 3.42it/s] 63%|██████▎ | 233816/371472 [7:59:49<11:18:59, 3.38it/s] 63%|██████▎ | 233817/371472 [7:59:49<11:01:12, 3.47it/s] 63%|██████▎ | 233818/371472 [7:59:50<11:51:15, 3.23it/s] 63%|██████▎ | 233819/371472 [7:59:50<11:27:34, 3.34it/s] 63%|██████▎ | 233820/371472 [7:59:50<11:03:10, 3.46it/s] {'loss': 2.7058, 'learning_rate': 4.3368204443103707e-07, 'epoch': 10.07} + 63%|██████▎ | 233820/371472 [7:59:50<11:03:10, 3.46it/s] 63%|██████▎ | 233821/371472 [7:59:51<11:12:42, 3.41it/s] 63%|██████▎ | 233822/371472 [7:59:51<11:07:17, 3.44it/s] 63%|██████▎ | 233823/371472 [7:59:51<11:10:46, 3.42it/s] 63%|██████▎ | 233824/371472 [7:59:52<11:19:30, 3.38it/s] 63%|██████▎ | 233825/371472 [7:59:52<11:12:49, 3.41it/s] 63%|██████▎ | 233826/371472 [7:59:52<11:29:40, 3.33it/s] 63%|██████▎ | 233827/371472 [7:59:52<11:33:41, 3.31it/s] 63%|██████▎ | 233828/371472 [7:59:53<12:11:13, 3.14it/s] 63%|██████▎ | 233829/371472 [7:59:53<11:49:56, 3.23it/s] 63%|██████▎ | 233830/371472 [7:59:53<11:31:10, 3.32it/s] 63%|██████▎ | 233831/371472 [7:59:54<12:07:58, 3.15it/s] 63%|██████▎ | 233832/371472 [7:59:54<11:38:10, 3.29it/s] 63%|██████▎ | 233833/371472 [7:59:54<12:07:38, 3.15it/s] 63%|██████▎ | 233834/371472 [7:59:55<11:55:41, 3.21it/s] 63%|██████▎ | 233835/371472 [7:59:55<11:25:22, 3.35it/s] 63%|██████▎ | 233836/371472 [7:59:55<11:30:56, 3.32it/s] 63%|██████▎ | 233837/371472 [7:59:56<11:02:14, 3.46it/s] 63%|██████▎ | 233838/371472 [7:59:56<12:11:00, 3.14it/s] 63%|██████▎ | 233839/371472 [7:59:56<11:33:44, 3.31it/s] 63%|██████▎ | 233840/371472 [7:59:56<11:27:05, 3.34it/s] {'loss': 2.7488, 'learning_rate': 4.3363356245555814e-07, 'epoch': 10.07} + 63%|██████▎ | 233840/371472 [7:59:56<11:27:05, 3.34it/s] 63%|██████▎ | 233841/371472 [7:59:57<11:23:09, 3.36it/s] 63%|██████▎ | 233842/371472 [7:59:57<11:01:15, 3.47it/s] 63%|██████▎ | 233843/371472 [7:59:57<10:54:36, 3.50it/s] 63%|██████▎ | 233844/371472 [7:59:58<10:57:40, 3.49it/s] 63%|██████▎ | 233845/371472 [7:59:58<11:26:35, 3.34it/s] 63%|██████▎ | 233846/371472 [7:59:58<11:33:21, 3.31it/s] 63%|██████▎ | 233847/371472 [7:59:59<11:38:49, 3.28it/s] 63%|██████▎ | 233848/371472 [7:59:59<11:31:06, 3.32it/s] 63%|██████▎ | 233849/371472 [7:59:59<11:33:14, 3.31it/s] 63%|██████▎ | 233850/371472 [7:59:59<11:02:55, 3.46it/s] 63%|██████▎ | 233851/371472 [8:00:00<11:04:20, 3.45it/s] 63%|██████▎ | 233852/371472 [8:00:00<10:51:25, 3.52it/s] 63%|██████▎ | 233853/371472 [8:00:00<11:09:08, 3.43it/s] 63%|██████▎ | 233854/371472 [8:00:01<11:42:39, 3.26it/s] 63%|██████▎ | 233855/371472 [8:00:01<12:15:16, 3.12it/s] 63%|██████▎ | 233856/371472 [8:00:01<11:50:09, 3.23it/s] 63%|██████▎ | 233857/371472 [8:00:02<11:41:07, 3.27it/s] 63%|██████▎ | 233858/371472 [8:00:02<12:26:58, 3.07it/s] 63%|██████▎ | 233859/371472 [8:00:02<12:12:42, 3.13it/s] 63%|██████▎ | 233860/371472 [8:00:02<11:52:00, 3.22it/s] {'loss': 2.6378, 'learning_rate': 4.335850804800793e-07, 'epoch': 10.07} + 63%|██████▎ | 233860/371472 [8:00:03<11:52:00, 3.22it/s] 63%|██████▎ | 233861/371472 [8:00:03<11:45:04, 3.25it/s] 63%|██████▎ | 233862/371472 [8:00:03<12:24:34, 3.08it/s] 63%|██████▎ | 233863/371472 [8:00:03<11:51:55, 3.22it/s] 63%|██████▎ | 233864/371472 [8:00:04<11:37:22, 3.29it/s] 63%|██████▎ | 233865/371472 [8:00:04<12:00:51, 3.18it/s] 63%|██████▎ | 233866/371472 [8:00:04<11:41:24, 3.27it/s] 63%|██████▎ | 233867/371472 [8:00:05<11:26:09, 3.34it/s] 63%|██████▎ | 233868/371472 [8:00:05<12:06:12, 3.16it/s] 63%|██████▎ | 233869/371472 [8:00:05<12:01:00, 3.18it/s] 63%|██████▎ | 233870/371472 [8:00:06<12:03:30, 3.17it/s] 63%|██████▎ | 233871/371472 [8:00:06<11:47:31, 3.24it/s] 63%|██████▎ | 233872/371472 [8:00:06<11:51:25, 3.22it/s] 63%|██████▎ | 233873/371472 [8:00:07<12:14:54, 3.12it/s] 63%|██████▎ | 233874/371472 [8:00:07<11:48:22, 3.24it/s] 63%|██████▎ | 233875/371472 [8:00:07<11:36:46, 3.29it/s] 63%|██████▎ | 233876/371472 [8:00:07<11:34:37, 3.30it/s] 63%|██████▎ | 233877/371472 [8:00:08<11:31:30, 3.32it/s] 63%|██████▎ | 233878/371472 [8:00:08<11:20:38, 3.37it/s] 63%|██████▎ | 233879/371472 [8:00:08<11:20:10, 3.37it/s] 63%|██████▎ | 233880/371472 [8:00:09<12:01:12, 3.18it/s] {'loss': 2.7101, 'learning_rate': 4.335365985046004e-07, 'epoch': 10.07} + 63%|██████▎ | 233880/371472 [8:00:09<12:01:12, 3.18it/s] 63%|██████▎ | 233881/371472 [8:00:09<11:54:29, 3.21it/s] 63%|██████▎ | 233882/371472 [8:00:09<13:37:36, 2.80it/s] 63%|██████▎ | 233883/371472 [8:00:10<12:34:56, 3.04it/s] 63%|██████▎ | 233884/371472 [8:00:10<12:07:44, 3.15it/s] 63%|██████▎ | 233885/371472 [8:00:10<11:42:31, 3.26it/s] 63%|██████▎ | 233886/371472 [8:00:11<11:41:07, 3.27it/s] 63%|██████▎ | 233887/371472 [8:00:11<11:35:31, 3.30it/s] 63%|██████▎ | 233888/371472 [8:00:11<12:10:54, 3.14it/s] 63%|██████▎ | 233889/371472 [8:00:12<11:39:53, 3.28it/s] 63%|██████▎ | 233890/371472 [8:00:12<11:20:47, 3.37it/s] 63%|██████▎ | 233891/371472 [8:00:12<11:33:48, 3.30it/s] 63%|██████▎ | 233892/371472 [8:00:12<11:08:43, 3.43it/s] 63%|██████▎ | 233893/371472 [8:00:13<11:01:44, 3.47it/s] 63%|██████▎ | 233894/371472 [8:00:13<10:56:58, 3.49it/s] 63%|██████▎ | 233895/371472 [8:00:13<10:56:06, 3.49it/s] 63%|██████▎ | 233896/371472 [8:00:14<11:10:12, 3.42it/s] 63%|██████▎ | 233897/371472 [8:00:14<11:08:20, 3.43it/s] 63%|██████▎ | 233898/371472 [8:00:14<10:53:58, 3.51it/s] 63%|██████▎ | 233899/371472 [8:00:14<10:41:54, 3.57it/s] 63%|██████▎ | 233900/371472 [8:00:15<10:38:37, 3.59it/s] {'loss': 2.7669, 'learning_rate': 4.334881165291215e-07, 'epoch': 10.07} + 63%|██████▎ | 233900/371472 [8:00:15<10:38:37, 3.59it/s] 63%|██████▎ | 233901/371472 [8:00:15<10:43:33, 3.56it/s] 63%|██████▎ | 233902/371472 [8:00:15<10:58:15, 3.48it/s] 63%|██████▎ | 233903/371472 [8:00:16<11:02:25, 3.46it/s] 63%|██████▎ | 233904/371472 [8:00:16<10:53:39, 3.51it/s] 63%|██████▎ | 233905/371472 [8:00:16<11:16:49, 3.39it/s] 63%|██████▎ | 233906/371472 [8:00:16<11:00:37, 3.47it/s] 63%|██████▎ | 233907/371472 [8:00:17<10:53:11, 3.51it/s] 63%|██████▎ | 233908/371472 [8:00:17<10:41:23, 3.57it/s] 63%|██████▎ | 233909/371472 [8:00:17<10:50:13, 3.53it/s] 63%|██████▎ | 233910/371472 [8:00:18<11:35:22, 3.30it/s] 63%|██████▎ | 233911/371472 [8:00:18<11:27:42, 3.33it/s] 63%|██████▎ | 233912/371472 [8:00:18<11:26:01, 3.34it/s] 63%|██████▎ | 233913/371472 [8:00:18<11:06:38, 3.44it/s] 63%|██████▎ | 233914/371472 [8:00:19<10:48:58, 3.53it/s] 63%|██████▎ | 233915/371472 [8:00:19<10:57:05, 3.49it/s] 63%|██████▎ | 233916/371472 [8:00:19<10:53:57, 3.51it/s] 63%|██████▎ | 233917/371472 [8:00:20<11:12:55, 3.41it/s] 63%|██████▎ | 233918/371472 [8:00:20<10:59:24, 3.48it/s] 63%|██████▎ | 233919/371472 [8:00:20<10:33:00, 3.62it/s] 63%|██████▎ | 233920/371472 [8:00:20<10:45:33, 3.55it/s] {'loss': 2.7634, 'learning_rate': 4.334396345536426e-07, 'epoch': 10.08} + 63%|██████▎ | 233920/371472 [8:00:20<10:45:33, 3.55it/s] 63%|██████▎ | 233921/371472 [8:00:21<11:14:31, 3.40it/s] 63%|██████▎ | 233922/371472 [8:00:21<11:03:24, 3.46it/s] 63%|██████▎ | 233923/371472 [8:00:21<11:03:44, 3.45it/s] 63%|██████▎ | 233924/371472 [8:00:22<11:11:54, 3.41it/s] 63%|██████▎ | 233925/371472 [8:00:22<11:03:30, 3.46it/s] 63%|██████▎ | 233926/371472 [8:00:22<11:01:00, 3.47it/s] 63%|██████▎ | 233927/371472 [8:00:22<11:00:03, 3.47it/s] 63%|██████▎ | 233928/371472 [8:00:23<11:07:10, 3.44it/s] 63%|██████▎ | 233929/371472 [8:00:23<11:52:58, 3.22it/s] 63%|██████▎ | 233930/371472 [8:00:23<11:46:41, 3.24it/s] 63%|██████▎ | 233931/371472 [8:00:24<11:28:49, 3.33it/s] 63%|██████▎ | 233932/371472 [8:00:24<11:13:59, 3.40it/s] 63%|██████▎ | 233933/371472 [8:00:24<11:05:43, 3.44it/s] 63%|██████▎ | 233934/371472 [8:00:25<11:13:02, 3.41it/s] 63%|██████▎ | 233935/371472 [8:00:25<11:12:48, 3.41it/s] 63%|██████▎ | 233936/371472 [8:00:25<11:15:02, 3.40it/s] 63%|██████▎ | 233937/371472 [8:00:25<11:03:50, 3.45it/s] 63%|██████▎ | 233938/371472 [8:00:26<10:52:35, 3.51it/s] 63%|██████▎ | 233939/371472 [8:00:26<10:48:26, 3.53it/s] 63%|██████▎ | 233940/371472 [8:00:26<11:09:59, 3.42it/s] {'loss': 2.6902, 'learning_rate': 4.3339115257816377e-07, 'epoch': 10.08} + 63%|██████▎ | 233940/371472 [8:00:26<11:09:59, 3.42it/s] 63%|██████▎ | 233941/371472 [8:00:27<11:07:54, 3.43it/s] 63%|██████▎ | 233942/371472 [8:00:27<11:08:57, 3.43it/s] 63%|██████▎ | 233943/371472 [8:00:27<11:01:50, 3.46it/s] 63%|██████▎ | 233944/371472 [8:00:27<11:21:30, 3.36it/s] 63%|██████▎ | 233945/371472 [8:00:28<11:04:12, 3.45it/s] 63%|██████▎ | 233946/371472 [8:00:28<11:02:25, 3.46it/s] 63%|██████▎ | 233947/371472 [8:00:28<10:55:02, 3.50it/s] 63%|██████▎ | 233948/371472 [8:00:29<10:44:24, 3.56it/s] 63%|██████▎ | 233949/371472 [8:00:29<11:38:40, 3.28it/s] 63%|██████▎ | 233950/371472 [8:00:29<12:06:28, 3.16it/s] 63%|██████▎ | 233951/371472 [8:00:30<11:45:21, 3.25it/s] 63%|██████▎ | 233952/371472 [8:00:30<11:33:47, 3.30it/s] 63%|██████▎ | 233953/371472 [8:00:30<11:08:57, 3.43it/s] 63%|██████▎ | 233954/371472 [8:00:30<10:53:31, 3.51it/s] 63%|██████▎ | 233955/371472 [8:00:31<11:04:39, 3.45it/s] 63%|██████▎ | 233956/371472 [8:00:31<10:58:54, 3.48it/s] 63%|██████▎ | 233957/371472 [8:00:31<10:54:18, 3.50it/s] 63%|██████▎ | 233958/371472 [8:00:32<11:01:53, 3.46it/s] 63%|██████▎ | 233959/371472 [8:00:32<11:00:30, 3.47it/s] 63%|██████▎ | 233960/371472 [8:00:32<11:20:02, 3.37it/s] {'loss': 2.6588, 'learning_rate': 4.3334267060268484e-07, 'epoch': 10.08} + 63%|██████▎ | 233960/371472 [8:00:32<11:20:02, 3.37it/s] 63%|██████▎ | 233961/371472 [8:00:32<11:44:03, 3.26it/s] 63%|██████▎ | 233962/371472 [8:00:33<11:23:49, 3.35it/s] 63%|██████▎ | 233963/371472 [8:00:33<11:22:12, 3.36it/s] 63%|██████▎ | 233964/371472 [8:00:33<11:16:51, 3.39it/s] 63%|██████▎ | 233965/371472 [8:00:34<11:02:09, 3.46it/s] 63%|██████▎ | 233966/371472 [8:00:34<11:08:58, 3.43it/s] 63%|██████▎ | 233967/371472 [8:00:34<11:01:28, 3.46it/s] 63%|██████▎ | 233968/371472 [8:00:34<10:52:50, 3.51it/s] 63%|██████▎ | 233969/371472 [8:00:35<10:44:23, 3.56it/s] 63%|██████▎ | 233970/371472 [8:00:35<10:37:45, 3.59it/s] 63%|██████▎ | 233971/371472 [8:00:35<10:50:12, 3.52it/s] 63%|██████▎ | 233972/371472 [8:00:36<11:20:17, 3.37it/s] 63%|██████▎ | 233973/371472 [8:00:36<11:13:31, 3.40it/s] 63%|██████▎ | 233974/371472 [8:00:36<11:15:51, 3.39it/s] 63%|██████▎ | 233975/371472 [8:00:37<11:07:50, 3.43it/s] 63%|██████▎ | 233976/371472 [8:00:37<11:42:56, 3.26it/s] 63%|██████▎ | 233977/371472 [8:00:37<11:13:05, 3.40it/s] 63%|██████▎ | 233978/371472 [8:00:37<11:28:04, 3.33it/s] 63%|██████▎ | 233979/371472 [8:00:38<11:23:10, 3.35it/s] 63%|██████▎ | 233980/371472 [8:00:38<11:06:08, 3.44it/s] {'loss': 2.6609, 'learning_rate': 4.3329418862720596e-07, 'epoch': 10.08} + 63%|██████▎ | 233980/371472 [8:00:38<11:06:08, 3.44it/s] 63%|██████▎ | 233981/371472 [8:00:38<11:23:10, 3.35it/s] 63%|██████▎ | 233982/371472 [8:00:39<11:57:13, 3.19it/s] 63%|██████▎ | 233983/371472 [8:00:39<11:32:38, 3.31it/s] 63%|██████▎ | 233984/371472 [8:00:39<11:38:26, 3.28it/s] 63%|██████▎ | 233985/371472 [8:00:40<11:20:26, 3.37it/s] 63%|██████▎ | 233986/371472 [8:00:40<11:14:30, 3.40it/s] 63%|██████▎ | 233987/371472 [8:00:40<11:03:05, 3.46it/s] 63%|██████▎ | 233988/371472 [8:00:40<11:29:59, 3.32it/s] 63%|██████▎ | 233989/371472 [8:00:41<11:07:52, 3.43it/s] 63%|██████▎ | 233990/371472 [8:00:41<11:04:01, 3.45it/s] 63%|██████▎ | 233991/371472 [8:00:41<11:10:38, 3.42it/s] 63%|██████▎ | 233992/371472 [8:00:42<10:59:31, 3.47it/s] 63%|██████▎ | 233993/371472 [8:00:42<11:00:27, 3.47it/s] 63%|██████▎ | 233994/371472 [8:00:42<11:25:10, 3.34it/s] 63%|██████▎ | 233995/371472 [8:00:42<11:28:59, 3.33it/s] 63%|██████▎ | 233996/371472 [8:00:43<11:28:26, 3.33it/s] 63%|██████▎ | 233997/371472 [8:00:43<12:20:53, 3.09it/s] 63%|██████▎ | 233998/371472 [8:00:43<11:57:50, 3.19it/s] 63%|██████▎ | 233999/371472 [8:00:44<11:21:30, 3.36it/s] 63%|██████▎ | 234000/371472 [8:00:44<11:08:17, 3.43it/s] {'loss': 2.7384, 'learning_rate': 4.3324570665172703e-07, 'epoch': 10.08} + 63%|██████▎ | 234000/371472 [8:00:44<11:08:17, 3.43it/s] 63%|██████▎ | 234001/371472 [8:00:44<10:48:47, 3.53it/s] 63%|██████▎ | 234002/371472 [8:00:45<10:38:06, 3.59it/s] 63%|██████▎ | 234003/371472 [8:00:45<11:11:24, 3.41it/s] 63%|██████▎ | 234004/371472 [8:00:45<10:56:17, 3.49it/s] 63%|██████▎ | 234005/371472 [8:00:45<11:11:02, 3.41it/s] 63%|██████▎ | 234006/371472 [8:00:46<10:57:48, 3.48it/s] 63%|██████▎ | 234007/371472 [8:00:46<11:18:40, 3.38it/s] 63%|██████▎ | 234008/371472 [8:00:46<12:31:54, 3.05it/s] 63%|██████▎ | 234009/371472 [8:00:47<12:12:44, 3.13it/s] 63%|██████▎ | 234010/371472 [8:00:47<12:52:06, 2.97it/s] 63%|██████▎ | 234011/371472 [8:00:47<12:46:25, 2.99it/s] 63%|██████▎ | 234012/371472 [8:00:48<12:01:37, 3.17it/s] 63%|██████▎ | 234013/371472 [8:00:48<11:28:09, 3.33it/s] 63%|██████▎ | 234014/371472 [8:00:48<11:30:28, 3.32it/s] 63%|██████▎ | 234015/371472 [8:00:49<11:28:01, 3.33it/s] 63%|██████▎ | 234016/371472 [8:00:49<11:26:27, 3.34it/s] 63%|██████▎ | 234017/371472 [8:00:49<12:00:34, 3.18it/s] 63%|██████▎ | 234018/371472 [8:00:50<12:06:07, 3.15it/s] 63%|██████▎ | 234019/371472 [8:00:50<11:44:05, 3.25it/s] 63%|██████▎ | 234020/371472 [8:00:50<12:15:55, 3.11it/s] {'loss': 2.5661, 'learning_rate': 4.3319722467624805e-07, 'epoch': 10.08} + 63%|██████▎ | 234020/371472 [8:00:50<12:15:55, 3.11it/s] 63%|██████▎ | 234021/371472 [8:00:50<12:00:26, 3.18it/s] 63%|██████▎ | 234022/371472 [8:00:51<11:59:05, 3.19it/s] 63%|██████▎ | 234023/371472 [8:00:51<11:44:52, 3.25it/s] 63%|██████▎ | 234024/371472 [8:00:51<11:40:06, 3.27it/s] 63%|██████▎ | 234025/371472 [8:00:52<11:29:38, 3.32it/s] 63%|██████▎ | 234026/371472 [8:00:52<11:25:27, 3.34it/s] 63%|██████▎ | 234027/371472 [8:00:52<11:24:49, 3.35it/s] 63%|██████▎ | 234028/371472 [8:00:53<11:48:21, 3.23it/s] 63%|██████▎ | 234029/371472 [8:00:53<11:21:21, 3.36it/s] 63%|██████▎ | 234030/371472 [8:00:53<11:12:09, 3.41it/s] 63%|██████��� | 234031/371472 [8:00:53<10:55:15, 3.50it/s] 63%|██████▎ | 234032/371472 [8:00:54<10:50:40, 3.52it/s] 63%|██████▎ | 234033/371472 [8:00:54<11:00:34, 3.47it/s] 63%|██████▎ | 234034/371472 [8:00:54<10:51:50, 3.51it/s] 63%|██████▎ | 234035/371472 [8:00:55<11:32:21, 3.31it/s] 63%|██████▎ | 234036/371472 [8:00:55<11:18:42, 3.37it/s] 63%|██████▎ | 234037/371472 [8:00:55<11:05:32, 3.44it/s] 63%|██████▎ | 234038/371472 [8:00:55<11:05:40, 3.44it/s] 63%|██████▎ | 234039/371472 [8:00:56<11:03:56, 3.45it/s] 63%|██████▎ | 234040/371472 [8:00:56<10:46:56, 3.54it/s] {'loss': 2.7888, 'learning_rate': 4.3314874270076923e-07, 'epoch': 10.08} + 63%|██████▎ | 234040/371472 [8:00:56<10:46:56, 3.54it/s] 63%|██████▎ | 234041/371472 [8:00:56<10:50:10, 3.52it/s] 63%|██████▎ | 234042/371472 [8:00:57<10:43:32, 3.56it/s] 63%|██████▎ | 234043/371472 [8:00:57<11:38:40, 3.28it/s] 63%|██████▎ | 234044/371472 [8:00:57<11:22:51, 3.35it/s] 63%|██████▎ | 234045/371472 [8:00:58<11:19:03, 3.37it/s] 63%|██████▎ | 234046/371472 [8:00:58<11:24:43, 3.35it/s] 63%|██████▎ | 234047/371472 [8:00:58<11:01:26, 3.46it/s] 63%|██████▎ | 234048/371472 [8:00:58<11:08:02, 3.43it/s] 63%|██████▎ | 234049/371472 [8:00:59<10:58:45, 3.48it/s] 63%|██████▎ | 234050/371472 [8:00:59<10:49:08, 3.53it/s] 63%|██████▎ | 234051/371472 [8:00:59<11:10:44, 3.41it/s] 63%|██████▎ | 234052/371472 [8:01:00<10:58:49, 3.48it/s] 63%|██████▎ | 234053/371472 [8:01:00<10:41:54, 3.57it/s] 63%|██████▎ | 234054/371472 [8:01:00<10:38:55, 3.58it/s] 63%|██████▎ | 234055/371472 [8:01:00<11:42:53, 3.26it/s] 63%|██████▎ | 234056/371472 [8:01:01<11:36:57, 3.29it/s] 63%|██████▎ | 234057/371472 [8:01:01<11:25:18, 3.34it/s] 63%|██████▎ | 234058/371472 [8:01:01<11:45:57, 3.24it/s] 63%|██████▎ | 234059/371472 [8:01:02<11:33:19, 3.30it/s] 63%|██████▎ | 234060/371472 [8:01:02<11:19:45, 3.37it/s] {'loss': 2.6906, 'learning_rate': 4.331002607252903e-07, 'epoch': 10.08} + 63%|██████▎ | 234060/371472 [8:01:02<11:19:45, 3.37it/s] 63%|██████▎ | 234061/371472 [8:01:02<11:35:29, 3.29it/s] 63%|██████▎ | 234062/371472 [8:01:03<12:37:20, 3.02it/s] 63%|██████▎ | 234063/371472 [8:01:03<11:58:54, 3.19it/s] 63%|██████▎ | 234064/371472 [8:01:03<12:40:29, 3.01it/s] 63%|██████▎ | 234065/371472 [8:01:04<12:54:43, 2.96it/s] 63%|██████▎ | 234066/371472 [8:01:04<12:32:40, 3.04it/s] 63%|██████▎ | 234067/371472 [8:01:04<12:06:00, 3.15it/s] 63%|██████▎ | 234068/371472 [8:01:05<11:46:10, 3.24it/s] 63%|██████▎ | 234069/371472 [8:01:05<11:31:12, 3.31it/s] 63%|██████▎ | 234070/371472 [8:01:05<11:45:39, 3.25it/s] 63%|██████▎ | 234071/371472 [8:01:05<11:14:51, 3.39it/s] 63%|██████▎ | 234072/371472 [8:01:06<10:55:55, 3.49it/s] 63%|██████▎ | 234073/371472 [8:01:06<11:06:09, 3.44it/s] 63%|██████▎ | 234074/371472 [8:01:06<11:00:39, 3.47it/s] 63%|██████▎ | 234075/371472 [8:01:07<10:45:34, 3.55it/s] 63%|██████▎ | 234076/371472 [8:01:07<10:32:49, 3.62it/s] 63%|██████▎ | 234077/371472 [8:01:07<11:33:59, 3.30it/s] 63%|██████▎ | 234078/371472 [8:01:07<11:12:38, 3.40it/s] 63%|██████▎ | 234079/371472 [8:01:08<10:56:16, 3.49it/s] 63%|██████▎ | 234080/371472 [8:01:08<11:03:26, 3.45it/s] {'loss': 2.8107, 'learning_rate': 4.330517787498114e-07, 'epoch': 10.08} + 63%|██████▎ | 234080/371472 [8:01:08<11:03:26, 3.45it/s] 63%|██████▎ | 234081/371472 [8:01:08<11:24:47, 3.34it/s] 63%|██████▎ | 234082/371472 [8:01:09<11:15:58, 3.39it/s] 63%|██████▎ | 234083/371472 [8:01:09<11:15:29, 3.39it/s] 63%|██████▎ | 234084/371472 [8:01:09<11:03:10, 3.45it/s] 63%|██████▎ | 234085/371472 [8:01:09<11:04:25, 3.45it/s] 63%|██████▎ | 234086/371472 [8:01:10<10:54:08, 3.50it/s] 63%|██████▎ | 234087/371472 [8:01:10<11:11:16, 3.41it/s] 63%|██████▎ | 234088/371472 [8:01:10<11:04:12, 3.45it/s] 63%|██████▎ | 234089/371472 [8:01:11<10:57:52, 3.48it/s] 63%|██████▎ | 234090/371472 [8:01:11<10:43:46, 3.56it/s] 63%|█��████▎ | 234091/371472 [8:01:11<10:31:23, 3.63it/s] 63%|██████▎ | 234092/371472 [8:01:11<10:25:21, 3.66it/s] 63%|██████▎ | 234093/371472 [8:01:12<10:22:52, 3.68it/s] 63%|██████▎ | 234094/371472 [8:01:12<10:25:24, 3.66it/s] 63%|██████▎ | 234095/371472 [8:01:12<10:42:28, 3.56it/s] 63%|██████▎ | 234096/371472 [8:01:13<10:46:32, 3.54it/s] 63%|██████▎ | 234097/371472 [8:01:13<10:55:09, 3.49it/s] 63%|██████▎ | 234098/371472 [8:01:13<11:37:00, 3.28it/s] 63%|██████▎ | 234099/371472 [8:01:13<11:24:05, 3.35it/s] 63%|██████▎ | 234100/371472 [8:01:14<11:18:17, 3.38it/s] {'loss': 2.6208, 'learning_rate': 4.330032967743325e-07, 'epoch': 10.08} + 63%|██████▎ | 234100/371472 [8:01:14<11:18:17, 3.38it/s] 63%|██████▎ | 234101/371472 [8:01:14<11:35:58, 3.29it/s] 63%|██████▎ | 234102/371472 [8:01:14<11:18:24, 3.37it/s] 63%|██████▎ | 234103/371472 [8:01:15<11:20:31, 3.36it/s] 63%|██████▎ | 234104/371472 [8:01:15<11:11:03, 3.41it/s] 63%|██████▎ | 234105/371472 [8:01:15<11:01:22, 3.46it/s] 63%|██████▎ | 234106/371472 [8:01:16<12:17:56, 3.10it/s] 63%|██████▎ | 234107/371472 [8:01:16<12:40:48, 3.01it/s] 63%|██████▎ | 234108/371472 [8:01:16<12:06:08, 3.15it/s] 63%|██████▎ | 234109/371472 [8:01:17<12:10:24, 3.13it/s] 63%|██████▎ | 234110/371472 [8:01:17<11:45:42, 3.24it/s] 63%|██████▎ | 234111/371472 [8:01:17<11:22:54, 3.35it/s] 63%|██████▎ | 234112/371472 [8:01:17<11:38:55, 3.28it/s] 63%|██████▎ | 234113/371472 [8:01:18<11:37:40, 3.28it/s] 63%|██████▎ | 234114/371472 [8:01:18<11:26:43, 3.33it/s] 63%|██████▎ | 234115/371472 [8:01:18<11:14:55, 3.39it/s] 63%|██████▎ | 234116/371472 [8:01:19<11:10:39, 3.41it/s] 63%|██████▎ | 234117/371472 [8:01:19<11:21:44, 3.36it/s] 63%|██████▎ | 234118/371472 [8:01:19<11:17:53, 3.38it/s] 63%|██████▎ | 234119/371472 [8:01:20<11:40:27, 3.27it/s] 63%|██████▎ | 234120/371472 [8:01:20<11:29:07, 3.32it/s] {'loss': 2.8787, 'learning_rate': 4.329548147988537e-07, 'epoch': 10.08} + 63%|██████▎ | 234120/371472 [8:01:20<11:29:07, 3.32it/s] 63%|██████▎ | 234121/371472 [8:01:20<12:20:42, 3.09it/s] 63%|██████▎ | 234122/371472 [8:01:21<12:13:40, 3.12it/s] 63%|██████▎ | 234123/371472 [8:01:21<12:11:25, 3.13it/s] 63%|██████▎ | 234124/371472 [8:01:21<12:10:02, 3.14it/s] 63%|██████▎ | 234125/371472 [8:01:21<11:54:42, 3.20it/s] 63%|██████▎ | 234126/371472 [8:01:22<11:27:23, 3.33it/s] 63%|██████▎ | 234127/371472 [8:01:22<11:16:58, 3.38it/s] 63%|██████▎ | 234128/371472 [8:01:22<11:35:05, 3.29it/s] 63%|██████▎ | 234129/371472 [8:01:23<11:13:34, 3.40it/s] 63%|██████▎ | 234130/371472 [8:01:23<11:13:57, 3.40it/s] 63%|██████▎ | 234131/371472 [8:01:23<11:07:12, 3.43it/s] 63%|██████▎ | 234132/371472 [8:01:23<10:51:04, 3.52it/s] 63%|██████▎ | 234133/371472 [8:01:24<10:48:01, 3.53it/s] 63%|██████▎ | 234134/371472 [8:01:24<11:04:54, 3.44it/s] 63%|██████▎ | 234135/371472 [8:01:24<10:51:16, 3.51it/s] 63%|██████▎ | 234136/371472 [8:01:25<11:28:30, 3.32it/s] 63%|██████▎ | 234137/371472 [8:01:25<12:33:27, 3.04it/s] 63%|██████▎ | 234138/371472 [8:01:25<12:27:30, 3.06it/s] 63%|██████▎ | 234139/371472 [8:01:26<11:58:04, 3.19it/s] 63%|██████▎ | 234140/371472 [8:01:26<11:47:47, 3.23it/s] {'loss': 2.7092, 'learning_rate': 4.3290633282337475e-07, 'epoch': 10.08} + 63%|██████▎ | 234140/371472 [8:01:26<11:47:47, 3.23it/s] 63%|██████▎ | 234141/371472 [8:01:26<11:26:52, 3.33it/s] 63%|██████▎ | 234142/371472 [8:01:26<11:09:42, 3.42it/s] 63%|██████▎ | 234143/371472 [8:01:27<10:56:06, 3.49it/s] 63%|██████▎ | 234144/371472 [8:01:27<10:43:40, 3.56it/s] 63%|██████▎ | 234145/371472 [8:01:27<11:13:57, 3.40it/s] 63%|██████▎ | 234146/371472 [8:01:28<11:08:58, 3.42it/s] 63%|██████▎ | 234147/371472 [8:01:28<11:21:09, 3.36it/s] 63%|██████▎ | 234148/371472 [8:01:28<11:10:59, 3.41it/s] 63%|██████▎ | 234149/371472 [8:01:29<11:11:32, 3.41it/s] 63%|██████▎ | 234150/371472 [8:01:29<11:08:06, 3.43it/s] 63%|██████▎ | 234151/371472 [8:01:29<11:09:30, 3.42it/s] 63%|██████▎ | 234152/371472 [8:01:29<10:53:22, 3.50it/s] 63%|██████▎ | 234153/371472 [8:01:30<10:42:40, 3.56it/s] 63%|██████▎ | 234154/371472 [8:01:30<10:43:15, 3.56it/s] 63%|██████▎ | 234155/371472 [8:01:30<11:05:42, 3.44it/s] 63%|██████▎ | 234156/371472 [8:01:31<11:25:04, 3.34it/s] 63%|██████▎ | 234157/371472 [8:01:31<11:27:02, 3.33it/s] 63%|██████▎ | 234158/371472 [8:01:31<11:31:14, 3.31it/s] 63%|██████▎ | 234159/371472 [8:01:32<12:09:45, 3.14it/s] 63%|██████▎ | 234160/371472 [8:01:32<11:42:38, 3.26it/s] {'loss': 2.8086, 'learning_rate': 4.3285785084789587e-07, 'epoch': 10.09} + 63%|██████▎ | 234160/371472 [8:01:32<11:42:38, 3.26it/s] 63%|██████▎ | 234161/371472 [8:01:32<12:06:43, 3.15it/s] 63%|██████▎ | 234162/371472 [8:01:32<11:30:40, 3.31it/s] 63%|██████▎ | 234163/371472 [8:01:33<11:15:56, 3.39it/s] 63%|██████▎ | 234164/371472 [8:01:33<10:54:23, 3.50it/s] 63%|██████▎ | 234165/371472 [8:01:33<11:36:54, 3.28it/s] 63%|██████▎ | 234166/371472 [8:01:34<11:14:19, 3.39it/s] 63%|██████▎ | 234167/371472 [8:01:34<11:22:43, 3.35it/s] 63%|██████▎ | 234168/371472 [8:01:34<11:44:11, 3.25it/s] 63%|██████▎ | 234169/371472 [8:01:35<11:26:58, 3.33it/s] 63%|██████▎ | 234170/371472 [8:01:35<11:57:47, 3.19it/s] 63%|██████▎ | 234171/371472 [8:01:35<11:52:48, 3.21it/s] 63%|██████▎ | 234172/371472 [8:01:35<11:42:34, 3.26it/s] 63%|██████▎ | 234173/371472 [8:01:36<12:35:13, 3.03it/s] 63%|██████▎ | 234174/371472 [8:01:36<11:55:32, 3.20it/s] 63%|██████▎ | 234175/371472 [8:01:36<11:49:49, 3.22it/s] 63%|██████▎ | 234176/371472 [8:01:37<11:32:13, 3.31it/s] 63%|██████▎ | 234177/371472 [8:01:37<12:12:11, 3.13it/s] 63%|██████▎ | 234178/371472 [8:01:37<12:13:20, 3.12it/s] 63%|██████▎ | 234179/371472 [8:01:38<11:44:20, 3.25it/s] 63%|██████▎ | 234180/371472 [8:01:38<11:38:22, 3.28it/s] {'loss': 2.6078, 'learning_rate': 4.3280936887241694e-07, 'epoch': 10.09} + 63%|██████▎ | 234180/371472 [8:01:38<11:38:22, 3.28it/s] 63%|██████▎ | 234181/371472 [8:01:38<11:46:47, 3.24it/s] 63%|██████▎ | 234182/371472 [8:01:39<11:28:34, 3.32it/s] 63%|██████▎ | 234183/371472 [8:01:39<12:05:19, 3.15it/s] 63%|██████▎ | 234184/371472 [8:01:39<11:46:14, 3.24it/s] 63%|██████▎ | 234185/371472 [8:01:40<11:57:54, 3.19it/s] 63%|██████▎ | 234186/371472 [8:01:40<11:40:17, 3.27it/s] 63%|██████▎ | 234187/371472 [8:01:40<11:47:34, 3.23it/s] 63%|██████▎ | 234188/371472 [8:01:40<11:20:59, 3.36it/s] 63%|██████▎ | 234189/371472 [8:01:41<11:17:47, 3.38it/s] 63%|██████▎ | 234190/371472 [8:01:41<11:17:45, 3.38it/s] 63%|██████▎ | 234191/371472 [8:01:41<11:14:16, 3.39it/s] 63%|██████▎ | 234192/371472 [8:01:42<11:05:32, 3.44it/s] 63%|██████▎ | 234193/371472 [8:01:42<11:10:40, 3.41it/s] 63%|██████▎ | 234194/371472 [8:01:42<10:54:13, 3.50it/s] 63%|██████▎ | 234195/371472 [8:01:42<10:56:24, 3.49it/s] 63%|██████▎ | 234196/371472 [8:01:43<10:55:09, 3.49it/s] 63%|██████▎ | 234197/371472 [8:01:43<10:55:10, 3.49it/s] 63%|██████▎ | 234198/371472 [8:01:43<10:57:27, 3.48it/s] 63%|██████▎ | 234199/371472 [8:01:44<11:21:30, 3.36it/s] 63%|██████▎ | 234200/371472 [8:01:44<11:43:40, 3.25it/s] {'loss': 2.8745, 'learning_rate': 4.3276088689693807e-07, 'epoch': 10.09} + 63%|██████▎ | 234200/371472 [8:01:44<11:43:40, 3.25it/s] 63%|██████▎ | 234201/371472 [8:01:44<11:29:24, 3.32it/s] 63%|██████▎ | 234202/371472 [8:01:45<11:16:56, 3.38it/s] 63%|██████▎ | 234203/371472 [8:01:45<10:57:31, 3.48it/s] 63%|██████▎ | 234204/371472 [8:01:45<12:05:50, 3.15it/s] 63%|██████▎ | 234205/371472 [8:01:45<11:37:05, 3.28it/s] 63%|██████▎ | 234206/371472 [8:01:46<11:32:54, 3.30it/s] 63%|██████▎ | 234207/371472 [8:01:46<11:17:26, 3.38it/s] 63%|██████▎ | 234208/371472 [8:01:46<10:57:51, 3.48it/s] 63%|██████▎ | 234209/371472 [8:01:47<11:06:41, 3.43it/s] 63%|██████▎ | 234210/371472 [8:01:47<11:08:37, 3.42it/s] 63%|██████▎ | 234211/371472 [8:01:47<11:03:32, 3.45it/s] 63%|██████▎ | 234212/371472 [8:01:48<11:48:41, 3.23it/s] 63%|██████▎ | 234213/371472 [8:01:48<11:17:57, 3.37it/s] 63%|██████▎ | 234214/371472 [8:01:48<11:22:02, 3.35it/s] 63%|██████▎ | 234215/371472 [8:01:48<11:05:06, 3.44it/s] 63%|██████▎ | 234216/371472 [8:01:49<11:11:58, 3.40it/s] 63%|██████▎ | 234217/371472 [8:01:49<11:12:07, 3.40it/s] 63%|██████▎ | 234218/371472 [8:01:49<10:56:55, 3.48it/s] 63%|██████▎ | 234219/371472 [8:01:50<11:01:09, 3.46it/s] 63%|██████▎ | 234220/371472 [8:01:50<11:06:15, 3.43it/s] {'loss': 2.7078, 'learning_rate': 4.3271240492145914e-07, 'epoch': 10.09} + 63%|██████▎ | 234220/371472 [8:01:50<11:06:15, 3.43it/s] 63%|██████▎ | 234221/371472 [8:01:50<11:19:00, 3.37it/s] 63%|██████▎ | 234222/371472 [8:01:50<11:02:18, 3.45it/s] 63%|██████▎ | 234223/371472 [8:01:51<11:35:13, 3.29it/s] 63%|██████▎ | 234224/371472 [8:01:51<11:28:51, 3.32it/s] 63%|██████▎ | 234225/371472 [8:01:51<11:29:03, 3.32it/s] 63%|██████▎ | 234226/371472 [8:01:52<11:23:46, 3.35it/s] 63%|██████▎ | 234227/371472 [8:01:52<11:50:02, 3.22it/s] 63%|██████▎ | 234228/371472 [8:01:52<11:21:06, 3.36it/s] 63%|██████▎ | 234229/371472 [8:01:53<11:58:45, 3.18it/s] 63%|██████▎ | 234230/371472 [8:01:53<11:51:26, 3.22it/s] 63%|██████▎ | 234231/371472 [8:01:53<11:33:35, 3.30it/s] 63%|██████▎ | 234232/371472 [8:01:53<11:09:51, 3.41it/s] 63%|██████▎ | 234233/371472 [8:01:54<11:05:09, 3.44it/s] 63%|██████▎ | 234234/371472 [8:01:54<10:49:54, 3.52it/s] 63%|██████▎ | 234235/371472 [8:01:54<11:12:05, 3.40it/s] 63%|██████▎ | 234236/371472 [8:01:55<11:13:58, 3.39it/s] 63%|██████▎ | 234237/371472 [8:01:55<10:59:41, 3.47it/s] 63%|██████▎ | 234238/371472 [8:01:55<10:51:11, 3.51it/s] 63%|██████▎ | 234239/371472 [8:01:56<12:05:13, 3.15it/s] 63%|██████▎ | 234240/371472 [8:01:56<11:42:17, 3.26it/s] {'loss': 2.7043, 'learning_rate': 4.326639229459803e-07, 'epoch': 10.09} + 63%|██████▎ | 234240/371472 [8:01:56<11:42:17, 3.26it/s] 63%|██████▎ | 234241/371472 [8:01:56<11:24:09, 3.34it/s] 63%|██████▎ | 234242/371472 [8:01:56<11:00:54, 3.46it/s] 63%|██████▎ | 234243/371472 [8:01:57<11:40:16, 3.27it/s] 63%|██████▎ | 234244/371472 [8:01:57<11:43:32, 3.25it/s] 63%|██████▎ | 234245/371472 [8:01:57<11:34:20, 3.29it/s] 63%|██████▎ | 234246/371472 [8:01:58<11:53:41, 3.20it/s] 63%|██████▎ | 234247/371472 [8:01:58<11:15:45, 3.38it/s] 63%|██████▎ | 234248/371472 [8:01:58<11:30:26, 3.31it/s] 63%|██████▎ | 234249/371472 [8:01:59<11:01:41, 3.46it/s] 63%|██████▎ | 234250/371472 [8:01:59<11:14:25, 3.39it/s] 63%|██████▎ | 234251/371472 [8:01:59<10:53:51, 3.50it/s] 63%|██████▎ | 234252/371472 [8:01:59<11:25:44, 3.34it/s] 63%|██████▎ | 234253/371472 [8:02:00<11:26:51, 3.33it/s] 63%|██████▎ | 234254/371472 [8:02:00<11:29:49, 3.32it/s] 63%|██████▎ | 234255/371472 [8:02:00<11:24:51, 3.34it/s] 63%|██████▎ | 234256/371472 [8:02:01<11:12:33, 3.40it/s] 63%|██████▎ | 234257/371472 [8:02:01<11:08:52, 3.42it/s] 63%|██████▎ | 234258/371472 [8:02:01<10:59:57, 3.47it/s] 63%|██████▎ | 234259/371472 [8:02:01<10:59:42, 3.47it/s] 63%|██████▎ | 234260/371472 [8:02:02<10:57:20, 3.48it/s] {'loss': 2.6147, 'learning_rate': 4.326154409705014e-07, 'epoch': 10.09} + 63%|██████▎ | 234260/371472 [8:02:02<10:57:20, 3.48it/s] 63%|██████▎ | 234261/371472 [8:02:02<11:00:22, 3.46it/s] 63%|██████▎ | 234262/371472 [8:02:02<11:10:56, 3.41it/s] 63%|██████▎ | 234263/371472 [8:02:03<12:40:15, 3.01it/s] 63%|██████▎ | 234264/371472 [8:02:03<12:02:52, 3.16it/s] 63%|██████▎ | 234265/371472 [8:02:03<12:02:30, 3.17it/s] 63%|██████▎ | 234266/371472 [8:02:04<11:35:22, 3.29it/s] 63%|██████▎ | 234267/371472 [8:02:04<11:12:01, 3.40it/s] 63%|██████▎ | 234268/371472 [8:02:04<11:02:02, 3.45it/s] 63%|██████▎ | 234269/371472 [8:02:04<11:01:53, 3.45it/s] 63%|██████▎ | 234270/371472 [8:02:05<10:51:33, 3.51it/s] 63%|██████▎ | 234271/371472 [8:02:05<11:13:20, 3.40it/s] 63%|██████▎ | 234272/371472 [8:02:05<12:45:39, 2.99it/s] 63%|██████▎ | 234273/371472 [8:02:06<12:20:58, 3.09it/s] 63%|██████▎ | 234274/371472 [8:02:06<12:13:20, 3.12it/s] 63%|██████▎ | 234275/371472 [8:02:06<12:03:23, 3.16it/s] 63%|██████▎ | 234276/371472 [8:02:07<11:40:33, 3.26it/s] 63%|██████▎ | 234277/371472 [8:02:07<11:48:27, 3.23it/s] 63%|██████▎ | 234278/371472 [8:02:07<11:27:24, 3.33it/s] 63%|██████▎ | 234279/371472 [8:02:08<11:21:52, 3.35it/s] 63%|██████▎ | 234280/371472 [8:02:08<11:12:03, 3.40it/s] {'loss': 2.6612, 'learning_rate': 4.325669589950225e-07, 'epoch': 10.09} + 63%|██████▎ | 234280/371472 [8:02:08<11:12:03, 3.40it/s] 63%|██████▎ | 234281/371472 [8:02:08<11:12:53, 3.40it/s] 63%|██████▎ | 234282/371472 [8:02:08<10:57:57, 3.48it/s] 63%|██████▎ | 234283/371472 [8:02:09<11:10:03, 3.41it/s] 63%|██████▎ | 234284/371472 [8:02:09<12:10:56, 3.13it/s] 63%|██████▎ | 234285/371472 [8:02:09<11:46:04, 3.24it/s] 63%|██████▎ | 234286/371472 [8:02:10<11:37:25, 3.28it/s] 63%|██████▎ | 234287/371472 [8:02:10<11:38:06, 3.28it/s] 63%|██████▎ | 234288/371472 [8:02:10<12:18:14, 3.10it/s] 63%|██████▎ | 234289/371472 [8:02:11<11:36:43, 3.28it/s] 63%|██████▎ | 234290/371472 [8:02:11<11:52:58, 3.21it/s] 63%|██████▎ | 234291/371472 [8:02:11<11:35:38, 3.29it/s] 63%|██████▎ | 234292/371472 [8:02:11<11:00:48, 3.46it/s] 63%|██████▎ | 234293/371472 [8:02:12<11:49:16, 3.22it/s] 63%|██████▎ | 234294/371472 [8:02:12<11:35:32, 3.29it/s] 63%|██████▎ | 234295/371472 [8:02:12<11:13:52, 3.39it/s] 63%|██████▎ | 234296/371472 [8:02:13<13:18:22, 2.86it/s] 63%|██████▎ | 234297/371472 [8:02:13<13:12:14, 2.89it/s] 63%|██████▎ | 234298/371472 [8:02:14<12:28:50, 3.05it/s] 63%|██████▎ | 234299/371472 [8:02:14<12:16:13, 3.11it/s] 63%|██████▎ | 234300/371472 [8:02:14<11:39:28, 3.27it/s] {'loss': 2.5956, 'learning_rate': 4.325184770195436e-07, 'epoch': 10.09} + 63%|██████▎ | 234300/371472 [8:02:14<11:39:28, 3.27it/s] 63%|██████▎ | 234301/371472 [8:02:14<11:16:44, 3.38it/s] 63%|██████▎ | 234302/371472 [8:02:15<11:47:43, 3.23it/s] 63%|██████▎ | 234303/371472 [8:02:15<12:15:20, 3.11it/s] 63%|██████▎ | 234304/371472 [8:02:15<12:35:12, 3.03it/s] 63%|██████▎ | 234305/371472 [8:02:16<12:15:15, 3.11it/s] 63%|██████▎ | 234306/371472 [8:02:16<12:35:31, 3.03it/s] 63%|██████▎ | 234307/371472 [8:02:16<11:57:04, 3.19it/s] 63%|██████▎ | 234308/371472 [8:02:17<11:45:14, 3.24it/s] 63%|██████▎ | 234309/371472 [8:02:17<11:37:32, 3.28it/s] 63%|██████▎ | 234310/371472 [8:02:17<11:26:58, 3.33it/s] 63%|██████▎ | 234311/371472 [8:02:18<12:28:45, 3.05it/s] 63%|██████▎ | 234312/371472 [8:02:18<11:56:10, 3.19it/s] 63%|██████▎ | 234313/371472 [8:02:18<11:15:52, 3.38it/s] 63%|██████▎ | 234314/371472 [8:02:18<11:04:16, 3.44it/s] 63%|██████▎ | 234315/371472 [8:02:19<11:48:49, 3.22it/s] 63%|██████▎ | 234316/371472 [8:02:19<11:58:40, 3.18it/s] 63%|██████▎ | 234317/371472 [8:02:19<11:20:52, 3.36it/s] 63%|██████▎ | 234318/371472 [8:02:20<11:13:01, 3.40it/s] 63%|██████▎ | 234319/371472 [8:02:20<10:48:42, 3.52it/s] 63%|██████▎ | 234320/371472 [8:02:20<10:53:17, 3.50it/s] {'loss': 2.7266, 'learning_rate': 4.3246999504406476e-07, 'epoch': 10.09} + 63%|██████▎ | 234320/371472 [8:02:20<10:53:17, 3.50it/s] 63%|██████▎ | 234321/371472 [8:02:20<11:01:04, 3.46it/s] 63%|██████▎ | 234322/371472 [8:02:21<10:47:47, 3.53it/s] 63%|██████▎ | 234323/371472 [8:02:21<10:52:16, 3.50it/s] 63%|██████▎ | 234324/371472 [8:02:21<11:03:08, 3.45it/s] 63%|██████▎ | 234325/371472 [8:02:22<10:55:05, 3.49it/s] 63%|██████▎ | 234326/371472 [8:02:22<10:47:15, 3.53it/s] 63%|██████▎ | 234327/371472 [8:02:22<11:06:00, 3.43it/s] 63%|██████▎ | 234328/371472 [8:02:23<11:57:13, 3.19it/s] 63%|██████▎ | 234329/371472 [8:02:23<11:26:55, 3.33it/s] 63%|██████▎ | 234330/371472 [8:02:23<11:05:42, 3.43it/s] 63%|██████▎ | 234331/371472 [8:02:23<10:38:39, 3.58it/s] 63%|██████▎ | 234332/371472 [8:02:24<10:44:56, 3.54it/s] 63%|██████▎ | 234333/371472 [8:02:24<10:55:54, 3.48it/s] 63%|██████▎ | 234334/371472 [8:02:24<11:33:10, 3.30it/s] 63%|██████▎ | 234335/371472 [8:02:25<12:00:43, 3.17it/s] 63%|██████▎ | 234336/371472 [8:02:25<11:32:33, 3.30it/s] 63%|██████▎ | 234337/371472 [8:02:25<11:24:58, 3.34it/s] 63%|██████▎ | 234338/371472 [8:02:25<11:14:15, 3.39it/s] 63%|██████▎ | 234339/371472 [8:02:26<10:57:49, 3.47it/s] 63%|██████▎ | 234340/371472 [8:02:26<11:00:07, 3.46it/s] {'loss': 2.7149, 'learning_rate': 4.324215130685858e-07, 'epoch': 10.09} + 63%|██████▎ | 234340/371472 [8:02:26<11:00:07, 3.46it/s] 63%|██████▎ | 234341/371472 [8:02:26<11:08:04, 3.42it/s] 63%|██████▎ | 234342/371472 [8:02:27<11:00:33, 3.46it/s] 63%|██████▎ | 234343/371472 [8:02:27<11:31:59, 3.30it/s] 63%|██████▎ | 234344/371472 [8:02:27<11:53:03, 3.21it/s] 63%|██████▎ | 234345/371472 [8:02:28<11:48:02, 3.23it/s] 63%|██████▎ | 234346/371472 [8:02:28<12:08:32, 3.14it/s] 63%|██████▎ | 234347/371472 [8:02:28<11:41:37, 3.26it/s] 63%|██████▎ | 234348/371472 [8:02:28<11:14:25, 3.39it/s] 63%|██████▎ | 234349/371472 [8:02:29<10:45:33, 3.54it/s] 63%|██████▎ | 234350/371472 [8:02:29<10:40:31, 3.57it/s] 63%|██████▎ | 234351/371472 [8:02:29<10:37:41, 3.58it/s] 63%|██████▎ | 234352/371472 [8:02:30<10:29:48, 3.63it/s] 63%|██████▎ | 234353/371472 [8:02:30<10:35:27, 3.60it/s] 63%|██████▎ | 234354/371472 [8:02:30<10:51:56, 3.51it/s] 63%|██████▎ | 234355/371472 [8:02:30<10:37:18, 3.59it/s] 63%|██████▎ | 234356/371472 [8:02:31<10:42:52, 3.55it/s] 63%|██████▎ | 234357/371472 [8:02:31<11:21:02, 3.36it/s] 63%|██████▎ | 234358/371472 [8:02:31<12:42:25, 3.00it/s] 63%|██████▎ | 234359/371472 [8:02:32<12:07:30, 3.14it/s] 63%|██████▎ | 234360/371472 [8:02:32<12:28:32, 3.05it/s] {'loss': 2.7574, 'learning_rate': 4.3237303109310696e-07, 'epoch': 10.09} + 63%|██████▎ | 234360/371472 [8:02:32<12:28:32, 3.05it/s] 63%|██████▎ | 234361/371472 [8:02:32<11:55:24, 3.19it/s] 63%|██████▎ | 234362/371472 [8:02:33<11:36:51, 3.28it/s] 63%|██████▎ | 234363/371472 [8:02:33<11:17:58, 3.37it/s] 63%|██████▎ | 234364/371472 [8:02:33<10:52:00, 3.50it/s] 63%|██████▎ | 234365/371472 [8:02:34<11:08:29, 3.42it/s] 63%|██████▎ | 234366/371472 [8:02:34<11:25:41, 3.33it/s] 63%|██████▎ | 234367/371472 [8:02:34<11:02:27, 3.45it/s] 63%|██████▎ | 234368/371472 [8:02:34<10:40:52, 3.57it/s] 63%|██████▎ | 234369/371472 [8:02:35<10:57:05, 3.48it/s] 63%|██████▎ | 234370/371472 [8:02:35<10:49:54, 3.52it/s] 63%|██████▎ | 234371/371472 [8:02:35<10:29:46, 3.63it/s] 63%|██████▎ | 234372/371472 [8:02:35<10:27:32, 3.64it/s] 63%|██████▎ | 234373/371472 [8:02:36<11:30:18, 3.31it/s] 63%|██████▎ | 234374/371472 [8:02:36<11:09:34, 3.41it/s] 63%|██████▎ | 234375/371472 [8:02:36<11:13:49, 3.39it/s] 63%|██████▎ | 234376/371472 [8:02:37<10:50:02, 3.52it/s] 63%|██████▎ | 234377/371472 [8:02:37<11:06:24, 3.43it/s] 63%|██████▎ | 234378/371472 [8:02:37<11:37:33, 3.28it/s] 63%|██████▎ | 234379/371472 [8:02:38<13:00:08, 2.93it/s] 63%|██████▎ | 234380/371472 [8:02:38<12:17:40, 3.10it/s] {'loss': 2.8443, 'learning_rate': 4.32324549117628e-07, 'epoch': 10.1} + 63%|██████▎ | 234380/371472 [8:02:38<12:17:40, 3.10it/s] 63%|██████▎ | 234381/371472 [8:02:38<12:09:17, 3.13it/s] 63%|██████▎ | 234382/371472 [8:02:39<11:49:08, 3.22it/s] 63%|██████▎ | 234383/371472 [8:02:39<11:35:35, 3.28it/s] 63%|██████▎ | 234384/371472 [8:02:39<11:04:10, 3.44it/s] 63%|██████▎ | 234385/371472 [8:02:39<10:47:38, 3.53it/s] 63%|██████▎ | 234386/371472 [8:02:40<10:37:37, 3.58it/s] 63%|██████▎ | 234387/371472 [8:02:40<10:31:38, 3.62it/s] 63%|██████▎ | 234388/371472 [8:02:40<10:39:20, 3.57it/s] 63%|██████▎ | 234389/371472 [8:02:41<10:56:41, 3.48it/s] 63%|███���██▎ | 234390/371472 [8:02:41<10:58:04, 3.47it/s] 63%|██████▎ | 234391/371472 [8:02:41<10:56:27, 3.48it/s] 63%|██████▎ | 234392/371472 [8:02:41<10:41:55, 3.56it/s] 63%|██████▎ | 234393/371472 [8:02:42<11:17:18, 3.37it/s] 63%|██████▎ | 234394/371472 [8:02:42<11:29:37, 3.31it/s] 63%|██████▎ | 234395/371472 [8:02:42<11:16:18, 3.38it/s] 63%|██████▎ | 234396/371472 [8:02:43<11:04:20, 3.44it/s] 63%|██████▎ | 234397/371472 [8:02:43<11:07:43, 3.42it/s] 63%|██████▎ | 234398/371472 [8:02:43<11:14:24, 3.39it/s] 63%|██████▎ | 234399/371472 [8:02:43<10:46:50, 3.53it/s] 63%|██████▎ | 234400/371472 [8:02:44<10:42:44, 3.55it/s] {'loss': 2.8475, 'learning_rate': 4.3227606714214915e-07, 'epoch': 10.1} + 63%|██████▎ | 234400/371472 [8:02:44<10:42:44, 3.55it/s] 63%|██████▎ | 234401/371472 [8:02:44<10:50:59, 3.51it/s] 63%|██████▎ | 234402/371472 [8:02:44<11:13:10, 3.39it/s] 63%|██████▎ | 234403/371472 [8:02:45<11:27:12, 3.32it/s] 63%|██████▎ | 234404/371472 [8:02:45<11:12:29, 3.40it/s] 63%|██████▎ | 234405/371472 [8:02:45<10:55:38, 3.48it/s] 63%|██████▎ | 234406/371472 [8:02:46<11:39:38, 3.27it/s] 63%|██████▎ | 234407/371472 [8:02:46<11:19:42, 3.36it/s] 63%|██████▎ | 234408/371472 [8:02:46<11:04:53, 3.44it/s] 63%|██████▎ | 234409/371472 [8:02:46<10:47:33, 3.53it/s] 63%|██████▎ | 234410/371472 [8:02:47<11:54:05, 3.20it/s] 63%|██████▎ | 234411/371472 [8:02:47<11:51:43, 3.21it/s] 63%|██████▎ | 234412/371472 [8:02:47<11:35:07, 3.29it/s] 63%|██████▎ | 234413/371472 [8:02:48<12:05:36, 3.15it/s] 63%|██████▎ | 234414/371472 [8:02:48<12:34:47, 3.03it/s] 63%|██████▎ | 234415/371472 [8:02:48<12:23:15, 3.07it/s] 63%|██████▎ | 234416/371472 [8:02:49<11:50:58, 3.21it/s] 63%|██████▎ | 234417/371472 [8:02:49<11:34:51, 3.29it/s] 63%|██████▎ | 234418/371472 [8:02:49<11:20:20, 3.36it/s] 63%|██████▎ | 234419/371472 [8:02:50<11:08:10, 3.42it/s] 63%|██████▎ | 234420/371472 [8:02:50<10:56:14, 3.48it/s] {'loss': 2.5984, 'learning_rate': 4.322275851666702e-07, 'epoch': 10.1} + 63%|██████▎ | 234420/371472 [8:02:50<10:56:14, 3.48it/s] 63%|██████▎ | 234421/371472 [8:02:50<10:40:53, 3.56it/s] 63%|██████▎ | 234422/371472 [8:02:50<10:29:56, 3.63it/s] 63%|██████▎ | 234423/371472 [8:02:51<10:18:17, 3.69it/s] 63%|██████▎ | 234424/371472 [8:02:51<10:40:32, 3.57it/s] 63%|██████▎ | 234425/371472 [8:02:51<12:18:19, 3.09it/s] 63%|██████▎ | 234426/371472 [8:02:52<11:24:16, 3.34it/s] 63%|██████▎ | 234427/371472 [8:02:52<11:17:44, 3.37it/s] 63%|██████▎ | 234428/371472 [8:02:52<11:07:09, 3.42it/s] 63%|██████▎ | 234429/371472 [8:02:52<10:44:50, 3.54it/s] 63%|██████▎ | 234430/371472 [8:02:53<11:30:46, 3.31it/s] 63%|██████▎ | 234431/371472 [8:02:53<11:00:52, 3.46it/s] 63%|██████▎ | 234432/371472 [8:02:53<10:57:16, 3.47it/s] 63%|██████▎ | 234433/371472 [8:02:54<10:57:58, 3.47it/s] 63%|██████▎ | 234434/371472 [8:02:54<10:44:22, 3.54it/s] 63%|██████▎ | 234435/371472 [8:02:54<11:33:46, 3.29it/s] 63%|██████▎ | 234436/371472 [8:02:54<11:30:08, 3.31it/s] 63%|██████▎ | 234437/371472 [8:02:55<11:12:00, 3.40it/s] 63%|██████▎ | 234438/371472 [8:02:55<11:17:54, 3.37it/s] 63%|██████▎ | 234439/371472 [8:02:55<11:23:30, 3.34it/s] 63%|██████▎ | 234440/371472 [8:02:56<11:01:23, 3.45it/s] {'loss': 2.6424, 'learning_rate': 4.321791031911914e-07, 'epoch': 10.1} + 63%|██████▎ | 234440/371472 [8:02:56<11:01:23, 3.45it/s] 63%|██████▎ | 234441/371472 [8:02:56<11:21:03, 3.35it/s] 63%|██████▎ | 234442/371472 [8:02:56<11:32:05, 3.30it/s] 63%|██████▎ | 234443/371472 [8:02:57<11:23:07, 3.34it/s] 63%|██████▎ | 234444/371472 [8:02:57<11:08:08, 3.42it/s] 63%|██████▎ | 234445/371472 [8:02:57<11:00:26, 3.46it/s] 63%|██████▎ | 234446/371472 [8:02:57<11:45:08, 3.24it/s] 63%|██████▎ | 234447/371472 [8:02:58<12:15:12, 3.11it/s] 63%|██████▎ | 234448/371472 [8:02:58<11:43:56, 3.24it/s] 63%|██████▎ | 234449/371472 [8:02:58<11:36:21, 3.28it/s] 63%|██████▎ | 234450/371472 [8:02:59<11:24:23, 3.34it/s] 63%|██████▎ | 234451/371472 [8:02:59<11:33:07, 3.29it/s] 63%|██████▎ | 234452/371472 [8:02:59<11:09:33, 3.41it/s] 63%|██████▎ | 234453/371472 [8:03:00<10:46:24, 3.53it/s] 63%|██████▎ | 234454/371472 [8:03:00<10:49:03, 3.52it/s] 63%|██████▎ | 234455/371472 [8:03:00<10:43:33, 3.55it/s] 63%|██████▎ | 234456/371472 [8:03:01<13:34:53, 2.80it/s] 63%|██████▎ | 234457/371472 [8:03:01<13:03:20, 2.92it/s] 63%|██████▎ | 234458/371472 [8:03:01<12:44:36, 2.99it/s] 63%|██████▎ | 234459/371472 [8:03:01<11:44:11, 3.24it/s] 63%|██████▎ | 234460/371472 [8:03:02<11:43:01, 3.25it/s] {'loss': 2.6935, 'learning_rate': 4.321306212157124e-07, 'epoch': 10.1} + 63%|██████▎ | 234460/371472 [8:03:02<11:43:01, 3.25it/s] 63%|██████▎ | 234461/371472 [8:03:02<11:32:05, 3.30it/s] 63%|██████▎ | 234462/371472 [8:03:02<11:25:19, 3.33it/s] 63%|██████▎ | 234463/371472 [8:03:03<11:47:21, 3.23it/s] 63%|██████▎ | 234464/371472 [8:03:03<11:23:23, 3.34it/s] 63%|██████▎ | 234465/371472 [8:03:03<11:04:11, 3.44it/s] 63%|██████▎ | 234466/371472 [8:03:04<10:48:15, 3.52it/s] 63%|██████▎ | 234467/371472 [8:03:04<11:17:28, 3.37it/s] 63%|██████▎ | 234468/371472 [8:03:04<11:26:29, 3.33it/s] 63%|██████▎ | 234469/371472 [8:03:04<11:00:44, 3.46it/s] 63%|██████▎ | 234470/371472 [8:03:05<10:56:39, 3.48it/s] 63%|██████▎ | 234471/371472 [8:03:05<11:07:40, 3.42it/s] 63%|██████▎ | 234472/371472 [8:03:05<10:59:42, 3.46it/s] 63%|██████▎ | 234473/371472 [8:03:06<11:29:11, 3.31it/s] 63%|██████▎ | 234474/371472 [8:03:06<11:45:36, 3.24it/s] 63%|██████▎ | 234475/371472 [8:03:06<11:25:33, 3.33it/s] 63%|██████▎ | 234476/371472 [8:03:07<11:14:15, 3.39it/s] 63%|██████▎ | 234477/371472 [8:03:07<12:17:22, 3.10it/s] 63%|██████▎ | 234478/371472 [8:03:07<11:47:55, 3.23it/s] 63%|██████▎ | 234479/371472 [8:03:08<12:19:33, 3.09it/s] 63%|██████▎ | 234480/371472 [8:03:08<11:46:29, 3.23it/s] {'loss': 2.7103, 'learning_rate': 4.320821392402336e-07, 'epoch': 10.1} + 63%|██████▎ | 234480/371472 [8:03:08<11:46:29, 3.23it/s] 63%|██████▎ | 234481/371472 [8:03:08<11:45:21, 3.24it/s] 63%|██████▎ | 234482/371472 [8:03:08<11:38:31, 3.27it/s] 63%|██████▎ | 234483/371472 [8:03:09<12:12:39, 3.12it/s] 63%|██████▎ | 234484/371472 [8:03:09<12:59:10, 2.93it/s] 63%|██████▎ | 234485/371472 [8:03:10<13:15:01, 2.87it/s] 63%|██████▎ | 234486/371472 [8:03:10<13:14:15, 2.87it/s] 63%|██████▎ | 234487/371472 [8:03:10<13:04:58, 2.91it/s] 63%|██████▎ | 234488/371472 [8:03:11<13:06:57, 2.90it/s] 63%|██████▎ | 234489/371472 [8:03:11<12:45:19, 2.98it/s] 63%|██████▎ | 234490/371472 [8:03:11<12:23:44, 3.07it/s] 63%|██████▎ | 234491/371472 [8:03:11<12:04:49, 3.15it/s] 63%|██████▎ | 234492/371472 [8:03:12<12:12:38, 3.12it/s] 63%|██████▎ | 234493/371472 [8:03:12<12:23:17, 3.07it/s] 63%|██████▎ | 234494/371472 [8:03:12<12:14:07, 3.11it/s] 63%|██████▎ | 234495/371472 [8:03:13<11:41:43, 3.25it/s] 63%|██████▎ | 234496/371472 [8:03:13<12:44:20, 2.99it/s] 63%|██████▎ | 234497/371472 [8:03:13<12:27:16, 3.06it/s] 63%|██████▎ | 234498/371472 [8:03:14<13:46:25, 2.76it/s] 63%|██████▎ | 234499/371472 [8:03:14<13:10:48, 2.89it/s] 63%|██████▎ | 234500/371472 [8:03:14<12:25:47, 3.06it/s] {'loss': 2.7521, 'learning_rate': 4.3203365726475467e-07, 'epoch': 10.1} + 63%|██████▎ | 234500/371472 [8:03:14<12:25:47, 3.06it/s] 63%|██████▎ | 234501/371472 [8:03:15<11:49:07, 3.22it/s] 63%|██████▎ | 234502/371472 [8:03:15<11:40:12, 3.26it/s] 63%|██████▎ | 234503/371472 [8:03:15<11:20:19, 3.36it/s] 63%|██████▎ | 234504/371472 [8:03:16<12:00:50, 3.17it/s] 63%|██████▎ | 234505/371472 [8:03:16<11:55:12, 3.19it/s] 63%|██████▎ | 234506/371472 [8:03:16<11:40:24, 3.26it/s] 63%|██████▎ | 234507/371472 [8:03:17<11:11:26, 3.40it/s] 63%|██████▎ | 234508/371472 [8:03:17<11:12:21, 3.40it/s] 63%|██████▎ | 234509/371472 [8:03:17<11:00:08, 3.46it/s] 63%|██████▎ | 234510/371472 [8:03:17<10:47:40, 3.52it/s] 63%|██████▎ | 234511/371472 [8:03:18<10:51:35, 3.50it/s] 63%|██████▎ | 234512/371472 [8:03:18<11:12:13, 3.40it/s] 63%|██████▎ | 234513/371472 [8:03:18<11:04:36, 3.43it/s] 63%|██████▎ | 234514/371472 [8:03:19<11:01:33, 3.45it/s] 63%|██████▎ | 234515/371472 [8:03:19<11:19:42, 3.36it/s] 63%|██████▎ | 234516/371472 [8:03:19<11:23:33, 3.34it/s] 63%|██████▎ | 234517/371472 [8:03:19<11:18:30, 3.36it/s] 63%|██████▎ | 234518/371472 [8:03:20<12:24:24, 3.07it/s] 63%|██████▎ | 234519/371472 [8:03:20<12:02:24, 3.16it/s] 63%|██████▎ | 234520/371472 [8:03:20<11:43:36, 3.24it/s] {'loss': 2.912, 'learning_rate': 4.319851752892758e-07, 'epoch': 10.1} + 63%|██████▎ | 234520/371472 [8:03:20<11:43:36, 3.24it/s] 63%|██████▎ | 234521/371472 [8:03:21<11:15:41, 3.38it/s] 63%|██████▎ | 234522/371472 [8:03:21<11:05:32, 3.43it/s] 63%|██████▎ | 234523/371472 [8:03:21<10:47:02, 3.53it/s] 63%|██████▎ | 234524/371472 [8:03:22<10:49:03, 3.52it/s] 63%|██████▎ | 234525/371472 [8:03:22<10:43:08, 3.55it/s] 63%|██████▎ | 234526/371472 [8:03:22<10:46:11, 3.53it/s] 63%|██████▎ | 234527/371472 [8:03:22<10:32:45, 3.61it/s] 63%|██████▎ | 234528/371472 [8:03:23<11:11:47, 3.40it/s] 63%|██████▎ | 234529/371472 [8:03:23<11:03:42, 3.44it/s] 63%|██████▎ | 234530/371472 [8:03:23<12:39:02, 3.01it/s] 63%|██████▎ | 234531/371472 [8:03:24<12:59:38, 2.93it/s] 63%|██████▎ | 234532/371472 [8:03:24<12:15:22, 3.10it/s] 63%|██████▎ | 234533/371472 [8:03:24<12:06:26, 3.14it/s] 63%|██████▎ | 234534/371472 [8:03:25<11:34:03, 3.29it/s] 63%|██████▎ | 234535/371472 [8:03:25<11:15:37, 3.38it/s] 63%|██████▎ | 234536/371472 [8:03:25<11:36:15, 3.28it/s] 63%|██████▎ | 234537/371472 [8:03:26<11:24:48, 3.33it/s] 63%|██████▎ | 234538/371472 [8:03:26<10:55:16, 3.48it/s] 63%|██████▎ | 234539/371472 [8:03:26<10:36:11, 3.59it/s] 63%|██████▎ | 234540/371472 [8:03:26<10:49:03, 3.52it/s] {'loss': 2.7193, 'learning_rate': 4.3193669331379686e-07, 'epoch': 10.1} + 63%|██████▎ | 234540/371472 [8:03:26<10:49:03, 3.52it/s] 63%|██████▎ | 234541/371472 [8:03:27<10:54:57, 3.48it/s] 63%|██████▎ | 234542/371472 [8:03:27<10:41:55, 3.56it/s] 63%|██████▎ | 234543/371472 [8:03:27<11:33:20, 3.29it/s] 63%|██████▎ | 234544/371472 [8:03:28<11:14:48, 3.38it/s] 63%|██████▎ | 234545/371472 [8:03:28<11:53:50, 3.20it/s] 63%|██████▎ | 234546/371472 [8:03:28<11:16:41, 3.37it/s] 63%|██████▎ | 234547/371472 [8:03:28<11:25:38, 3.33it/s] 63%|██████▎ | 234548/371472 [8:03:29<11:09:16, 3.41it/s] 63%|██████▎ | 234549/371472 [8:03:29<11:56:41, 3.18it/s] 63%|██████▎ | 234550/371472 [8:03:29<12:03:40, 3.15it/s] 63%|██████▎ | 234551/371472 [8:03:30<11:35:46, 3.28it/s] 63%|██████▎ | 234552/371472 [8:03:30<11:32:00, 3.30it/s] 63%|██████▎ | 234553/371472 [8:03:30<12:51:12, 2.96it/s] 63%|██████▎ | 234554/371472 [8:03:31<11:46:19, 3.23it/s] 63%|██████▎ | 234555/371472 [8:03:31<11:05:35, 3.43it/s] 63%|██████▎ | 234556/371472 [8:03:31<11:12:55, 3.39it/s] 63%|██████▎ | 234557/371472 [8:03:32<11:26:11, 3.33it/s] 63%|██████▎ | 234558/371472 [8:03:32<11:06:11, 3.43it/s] 63%|██████▎ | 234559/371472 [8:03:32<10:53:38, 3.49it/s] 63%|██████▎ | 234560/371472 [8:03:32<11:08:11, 3.42it/s] {'loss': 2.9009, 'learning_rate': 4.3188821133831794e-07, 'epoch': 10.1} + 63%|██████▎ | 234560/371472 [8:03:32<11:08:11, 3.42it/s] 63%|██████▎ | 234561/371472 [8:03:33<10:52:14, 3.50it/s] 63%|██████▎ | 234562/371472 [8:03:33<10:49:41, 3.51it/s] 63%|██████▎ | 234563/371472 [8:03:33<11:06:51, 3.42it/s] 63%|██████▎ | 234564/371472 [8:03:34<11:24:35, 3.33it/s] 63%|██████▎ | 234565/371472 [8:03:34<11:26:05, 3.33it/s] 63%|██████▎ | 234566/371472 [8:03:34<11:14:51, 3.38it/s] 63%|██████▎ | 234567/371472 [8:03:34<11:03:21, 3.44it/s] 63%|██████▎ | 234568/371472 [8:03:35<12:00:53, 3.17it/s] 63%|██████▎ | 234569/371472 [8:03:35<11:36:55, 3.27it/s] 63%|██████▎ | 234570/371472 [8:03:35<11:14:15, 3.38it/s] 63%|██████▎ | 234571/371472 [8:03:36<10:55:14, 3.48it/s] 63%|██████▎ | 234572/371472 [8:03:36<10:56:33, 3.48it/s] 63%|██████▎ | 234573/371472 [8:03:36<10:49:15, 3.51it/s] 63%|██████▎ | 234574/371472 [8:03:36<10:48:59, 3.52it/s] 63%|██████▎ | 234575/371472 [8:03:37<11:21:30, 3.35it/s] 63%|██████▎ | 234576/371472 [8:03:37<11:08:06, 3.42it/s] 63%|██████▎ | 234577/371472 [8:03:37<11:38:13, 3.27it/s] 63%|██████▎ | 234578/371472 [8:03:38<11:11:36, 3.40it/s] 63%|██████▎ | 234579/371472 [8:03:38<10:40:52, 3.56it/s] 63%|██████▎ | 234580/371472 [8:03:38<10:38:20, 3.57it/s] {'loss': 2.6406, 'learning_rate': 4.318397293628391e-07, 'epoch': 10.1} + 63%|██████▎ | 234580/371472 [8:03:38<10:38:20, 3.57it/s] 63%|██████▎ | 234581/371472 [8:03:39<11:05:05, 3.43it/s] 63%|██████▎ | 234582/371472 [8:03:39<10:59:53, 3.46it/s] 63%|██████▎ | 234583/371472 [8:03:39<10:59:30, 3.46it/s] 63%|██████▎ | 234584/371472 [8:03:39<10:47:26, 3.52it/s] 63%|██████▎ | 234585/371472 [8:03:40<10:47:30, 3.52it/s] 63%|██████▎ | 234586/371472 [8:03:40<10:44:17, 3.54it/s] 63%|██████▎ | 234587/371472 [8:03:40<12:00:38, 3.17it/s] 63%|██████▎ | 234588/371472 [8:03:41<11:57:34, 3.18it/s] 63%|██████▎ | 234589/371472 [8:03:41<11:39:06, 3.26it/s] 63%|██████▎ | 234590/371472 [8:03:41<11:24:11, 3.33it/s] 63%|██████▎ | 234591/371472 [8:03:41<10:54:26, 3.49it/s] 63%|██████▎ | 234592/371472 [8:03:42<10:38:03, 3.58it/s] 63%|██████▎ | 234593/371472 [8:03:42<11:13:47, 3.39it/s] 63%|██████▎ | 234594/371472 [8:03:42<10:58:23, 3.46it/s] 63%|██████▎ | 234595/371472 [8:03:43<11:47:03, 3.23it/s] 63%|██████▎ | 234596/371472 [8:03:43<11:19:55, 3.36it/s] 63%|██████▎ | 234597/371472 [8:03:43<11:23:20, 3.34it/s] 63%|██████▎ | 234598/371472 [8:03:44<11:54:16, 3.19it/s] 63%|██████▎ | 234599/371472 [8:03:44<12:25:38, 3.06it/s] 63%|██████▎ | 234600/371472 [8:03:44<11:54:45, 3.19it/s] {'loss': 2.7141, 'learning_rate': 4.3179124738736013e-07, 'epoch': 10.1} + 63%|██████▎ | 234600/371472 [8:03:44<11:54:45, 3.19it/s] 63%|██████▎ | 234601/371472 [8:03:45<11:34:04, 3.29it/s] 63%|██████▎ | 234602/371472 [8:03:45<11:29:38, 3.31it/s] 63%|██████▎ | 234603/371472 [8:03:45<11:15:30, 3.38it/s] 63%|██████▎ | 234604/371472 [8:03:45<11:05:41, 3.43it/s] 63%|██████▎ | 234605/371472 [8:03:46<11:01:12, 3.45it/s] 63%|██████▎ | 234606/371472 [8:03:46<10:40:20, 3.56it/s] 63%|██████▎ | 234607/371472 [8:03:46<11:09:01, 3.41it/s] 63%|██████▎ | 234608/371472 [8:03:47<10:50:23, 3.51it/s] 63%|██████▎ | 234609/371472 [8:03:47<10:57:22, 3.47it/s] 63%|██████▎ | 234610/371472 [8:03:47<10:48:52, 3.52it/s] 63%|██████▎ | 234611/371472 [8:03:47<11:33:23, 3.29it/s] 63%|██████▎ | 234612/371472 [8:03:48<11:05:30, 3.43it/s] 63%|██████▎ | 234613/371472 [8:03:48<10:53:09, 3.49it/s] 63%|██████▎ | 234614/371472 [8:03:48<11:21:57, 3.34it/s] 63%|██████▎ | 234615/371472 [8:03:49<11:14:25, 3.38it/s] 63%|██████▎ | 234616/371472 [8:03:49<10:52:25, 3.50it/s] 63%|██████▎ | 234617/371472 [8:03:49<10:52:23, 3.50it/s] 63%|██████▎ | 234618/371472 [8:03:49<10:46:30, 3.53it/s] 63%|██████▎ | 234619/371472 [8:03:50<10:24:50, 3.65it/s] 63%|██████▎ | 234620/371472 [8:03:50<10:26:54, 3.64it/s] {'loss': 2.9049, 'learning_rate': 4.317427654118813e-07, 'epoch': 10.11} + 63%|██████▎ | 234620/371472 [8:03:50<10:26:54, 3.64it/s] 63%|██████▎ | 234621/371472 [8:03:50<10:49:07, 3.51it/s] 63%|██████▎ | 234622/371472 [8:03:51<11:08:59, 3.41it/s] 63%|██████▎ | 234623/371472 [8:03:51<10:45:13, 3.53it/s] 63%|██████▎ | 234624/371472 [8:03:51<11:02:40, 3.44it/s] 63%|██████▎ | 234625/371472 [8:03:51<11:07:48, 3.42it/s] 63%|██████▎ | 234626/371472 [8:03:52<10:55:24, 3.48it/s] 63%|██████▎ | 234627/371472 [8:03:52<12:07:05, 3.14it/s] 63%|██████▎ | 234628/371472 [8:03:52<12:14:53, 3.10it/s] 63%|██████▎ | 234629/371472 [8:03:53<11:59:31, 3.17it/s] 63%|██████▎ | 234630/371472 [8:03:53<11:50:29, 3.21it/s] 63%|██████▎ | 234631/371472 [8:03:53<11:52:01, 3.20it/s] 63%|██████▎ | 234632/371472 [8:03:54<11:55:43, 3.19it/s] 63%|██████▎ | 234633/371472 [8:03:54<11:22:25, 3.34it/s] 63%|██████▎ | 234634/371472 [8:03:54<11:01:55, 3.45it/s] 63%|██████▎ | 234635/371472 [8:03:55<11:32:10, 3.29it/s] 63%|██████▎ | 234636/371472 [8:03:55<11:56:41, 3.18it/s] 63%|██████▎ | 234637/371472 [8:03:55<11:39:26, 3.26it/s] 63%|██████▎ | 234638/371472 [8:03:55<11:41:14, 3.25it/s] 63%|██████▎ | 234639/371472 [8:03:56<11:21:26, 3.35it/s] 63%|██████▎ | 234640/371472 [8:03:56<11:20:34, 3.35it/s] {'loss': 2.7165, 'learning_rate': 4.316942834364024e-07, 'epoch': 10.11} + 63%|██████▎ | 234640/371472 [8:03:56<11:20:34, 3.35it/s] 63%|██████▎ | 234641/371472 [8:03:56<11:22:13, 3.34it/s] 63%|██████▎ | 234642/371472 [8:03:57<11:57:50, 3.18it/s] 63%|██████▎ | 234643/371472 [8:03:57<12:14:24, 3.11it/s] 63%|██████▎ | 234644/371472 [8:03:57<11:54:32, 3.19it/s] 63%|██████▎ | 234645/371472 [8:03:58<11:48:36, 3.22it/s] 63%|██████▎ | 234646/371472 [8:03:58<11:11:00, 3.40it/s] 63%|██████▎ | 234647/371472 [8:03:58<10:44:23, 3.54it/s] 63%|██████▎ | 234648/371472 [8:03:58<10:51:06, 3.50it/s] 63%|██████▎ | 234649/371472 [8:03:59<10:36:52, 3.58it/s] 63%|██████▎ | 234650/371472 [8:03:59<10:11:53, 3.73it/s] 63%|██████▎ | 234651/371472 [8:03:59<11:09:08, 3.41it/s] 63%|██████▎ | 234652/371472 [8:04:00<11:28:42, 3.31it/s] 63%|██████▎ | 234653/371472 [8:04:00<11:31:51, 3.30it/s] 63%|██████▎ | 234654/371472 [8:04:00<11:22:42, 3.34it/s] 63%|██████▎ | 234655/371472 [8:04:00<11:01:59, 3.44it/s] 63%|██████▎ | 234656/371472 [8:04:01<11:13:12, 3.39it/s] 63%|██████▎ | 234657/371472 [8:04:01<11:08:28, 3.41it/s] 63%|██████▎ | 234658/371472 [8:04:01<10:45:47, 3.53it/s] 63%|██████▎ | 234659/371472 [8:04:02<10:37:45, 3.58it/s] 63%|██████▎ | 234660/371472 [8:04:02<10:31:33, 3.61it/s] {'loss': 2.7212, 'learning_rate': 4.316458014609235e-07, 'epoch': 10.11} + 63%|██████▎ | 234660/371472 [8:04:02<10:31:33, 3.61it/s] 63%|██████▎ | 234661/371472 [8:04:02<10:37:44, 3.58it/s] 63%|██████▎ | 234662/371472 [8:04:03<11:19:52, 3.35it/s] 63%|██████▎ | 234663/371472 [8:04:03<11:41:32, 3.25it/s] 63%|██████▎ | 234664/371472 [8:04:03<11:13:44, 3.38it/s] 63%|██████▎ | 234665/371472 [8:04:03<10:56:29, 3.47it/s] 63%|██████▎ | 234666/371472 [8:04:04<10:50:41, 3.50it/s] 63%|██████▎ | 234667/371472 [8:04:04<10:44:08, 3.54it/s] 63%|██████▎ | 234668/371472 [8:04:04<10:29:56, 3.62it/s] 63%|██████▎ | 234669/371472 [8:04:04<10:32:15, 3.61it/s] 63%|██████▎ | 234670/371472 [8:04:05<10:35:23, 3.59it/s] 63%|██████▎ | 234671/371472 [8:04:05<10:20:54, 3.67it/s] 63%|██████▎ | 234672/371472 [8:04:05<10:32:11, 3.61it/s] 63%|██████▎ | 234673/371472 [8:04:06<10:35:52, 3.59it/s] 63%|██████▎ | 234674/371472 [8:04:06<11:19:34, 3.35it/s] 63%|██████▎ | 234675/371472 [8:04:06<11:14:36, 3.38it/s] 63%|██████▎ | 234676/371472 [8:04:07<11:24:42, 3.33it/s] 63%|██████▎ | 234677/371472 [8:04:07<10:57:40, 3.47it/s] 63%|██████▎ | 234678/371472 [8:04:07<10:45:38, 3.53it/s] 63%|██████▎ | 234679/371472 [8:04:07<11:43:26, 3.24it/s] 63%|██████▎ | 234680/371472 [8:04:08<11:37:44, 3.27it/s] {'loss': 3.0083, 'learning_rate': 4.315973194854446e-07, 'epoch': 10.11} + 63%|██████▎ | 234680/371472 [8:04:08<11:37:44, 3.27it/s] 63%|██████▎ | 234681/371472 [8:04:08<11:23:55, 3.33it/s] 63%|██████▎ | 234682/371472 [8:04:08<12:04:37, 3.15it/s] 63%|██████▎ | 234683/371472 [8:04:09<11:39:23, 3.26it/s] 63%|██████▎ | 234684/371472 [8:04:09<11:30:52, 3.30it/s] 63%|██████▎ | 234685/371472 [8:04:09<11:15:37, 3.37it/s] 63%|██████▎ | 234686/371472 [8:04:10<11:31:32, 3.30it/s] 63%|██████▎ | 234687/371472 [8:04:10<11:07:59, 3.41it/s] 63%|██████▎ | 234688/371472 [8:04:10<11:08:14, 3.41it/s] 63%|██████▎ | 234689/371472 [8:04:10<11:43:28, 3.24it/s] 63%|██████▎ | 234690/371472 [8:04:11<11:17:57, 3.36it/s] 63%|██████▎ | 234691/371472 [8:04:11<12:06:59, 3.14it/s] 63%|██████▎ | 234692/371472 [8:04:11<12:09:12, 3.13it/s] 63%|██████▎ | 234693/371472 [8:04:12<11:58:29, 3.17it/s] 63%|██████▎ | 234694/371472 [8:04:12<11:32:45, 3.29it/s] 63%|██████▎ | 234695/371472 [8:04:12<11:14:37, 3.38it/s] 63%|██████▎ | 234696/371472 [8:04:13<11:11:50, 3.39it/s] 63%|██████▎ | 234697/371472 [8:04:13<10:54:16, 3.48it/s] 63%|██████▎ | 234698/371472 [8:04:13<10:42:48, 3.55it/s] 63%|██████▎ | 234699/371472 [8:04:13<10:47:51, 3.52it/s] 63%|██████▎ | 234700/371472 [8:04:14<10:43:50, 3.54it/s] {'loss': 2.8408, 'learning_rate': 4.3154883750996575e-07, 'epoch': 10.11} + 63%|██████▎ | 234700/371472 [8:04:14<10:43:50, 3.54it/s] 63%|██████▎ | 234701/371472 [8:04:14<11:02:27, 3.44it/s] 63%|██████▎ | 234702/371472 [8:04:14<11:02:40, 3.44it/s] 63%|██████▎ | 234703/371472 [8:04:15<11:53:41, 3.19it/s] 63%|██████▎ | 234704/371472 [8:04:15<12:22:11, 3.07it/s] 63%|██████▎ | 234705/371472 [8:04:15<12:13:40, 3.11it/s] 63%|██████▎ | 234706/371472 [8:04:16<11:37:05, 3.27it/s] 63%|██████▎ | 234707/371472 [8:04:16<11:09:15, 3.41it/s] 63%|██████▎ | 234708/371472 [8:04:16<11:19:21, 3.36it/s] 63%|██████▎ | 234709/371472 [8:04:16<11:04:44, 3.43it/s] 63%|██████▎ | 234710/371472 [8:04:17<10:56:38, 3.47it/s] 63%|██████▎ | 234711/371472 [8:04:17<10:32:57, 3.60it/s] 63%|██████▎ | 234712/371472 [8:04:17<10:38:49, 3.57it/s] 63%|██████▎ | 234713/371472 [8:04:18<10:55:59, 3.47it/s] 63%|██████▎ | 234714/371472 [8:04:18<10:43:28, 3.54it/s] 63%|██████▎ | 234715/371472 [8:04:18<10:53:16, 3.49it/s] 63%|██████▎ | 234716/371472 [8:04:18<10:49:04, 3.51it/s] 63%|██████▎ | 234717/371472 [8:04:19<10:38:43, 3.57it/s] 63%|██████▎ | 234718/371472 [8:04:19<10:39:43, 3.56it/s] 63%|██████▎ | 234719/371472 [8:04:19<10:33:54, 3.60it/s] 63%|██████▎ | 234720/371472 [8:04:20<11:14:54, 3.38it/s] {'loss': 2.7484, 'learning_rate': 4.3150035553448677e-07, 'epoch': 10.11} + 63%|██████▎ | 234720/371472 [8:04:20<11:14:54, 3.38it/s] 63%|██████▎ | 234721/371472 [8:04:20<11:14:14, 3.38it/s] 63%|██████▎ | 234722/371472 [8:04:20<11:12:38, 3.39it/s] 63%|██████▎ | 234723/371472 [8:04:20<11:19:17, 3.36it/s] 63%|██████▎ | 234724/371472 [8:04:21<11:58:20, 3.17it/s] 63%|██████▎ | 234725/371472 [8:04:21<11:33:51, 3.28it/s] 63%|██████▎ | 234726/371472 [8:04:21<11:25:35, 3.32it/s] 63%|██████▎ | 234727/371472 [8:04:22<11:13:03, 3.39it/s] 63%|██████▎ | 234728/371472 [8:04:22<10:58:14, 3.46it/s] 63%|██████▎ | 234729/371472 [8:04:22<11:05:45, 3.42it/s] 63%|██████▎ | 234730/371472 [8:04:23<11:01:13, 3.45it/s] 63%|██████▎ | 234731/371472 [8:04:23<10:55:37, 3.48it/s] 63%|██████▎ | 234732/371472 [8:04:23<11:17:56, 3.36it/s] 63%|██████▎ | 234733/371472 [8:04:23<11:03:13, 3.44it/s] 63%|██████▎ | 234734/371472 [8:04:24<11:22:29, 3.34it/s] 63%|██████▎ | 234735/371472 [8:04:24<11:58:41, 3.17it/s] 63%|██████▎ | 234736/371472 [8:04:24<11:41:31, 3.25it/s] 63%|██████▎ | 234737/371472 [8:04:25<11:24:10, 3.33it/s] 63%|██████▎ | 234738/371472 [8:04:25<11:13:55, 3.38it/s] 63%|██████▎ | 234739/371472 [8:04:25<11:21:40, 3.34it/s] 63%|██████▎ | 234740/371472 [8:04:26<11:01:06, 3.45it/s] {'loss': 2.6511, 'learning_rate': 4.3145187355900795e-07, 'epoch': 10.11} + 63%|██████▎ | 234740/371472 [8:04:26<11:01:06, 3.45it/s] 63%|██████▎ | 234741/371472 [8:04:26<10:44:36, 3.54it/s] 63%|██████▎ | 234742/371472 [8:04:26<12:21:23, 3.07it/s] 63%|██████▎ | 234743/371472 [8:04:26<11:35:38, 3.28it/s] 63%|██████▎ | 234744/371472 [8:04:27<12:12:42, 3.11it/s] 63%|██████▎ | 234745/371472 [8:04:27<12:16:53, 3.09it/s] 63%|██████▎ | 234746/371472 [8:04:27<12:17:45, 3.09it/s] 63%|██████▎ | 234747/371472 [8:04:28<12:00:01, 3.16it/s] 63%|██████▎ | 234748/371472 [8:04:28<11:40:54, 3.25it/s] 63%|███��██▎ | 234749/371472 [8:04:28<12:20:51, 3.08it/s] 63%|██████▎ | 234750/371472 [8:04:29<11:42:57, 3.24it/s] 63%|██████▎ | 234751/371472 [8:04:29<12:51:45, 2.95it/s] 63%|██████▎ | 234752/371472 [8:04:29<12:25:40, 3.06it/s] 63%|██████▎ | 234753/371472 [8:04:30<11:48:57, 3.21it/s] 63%|██████▎ | 234754/371472 [8:04:30<11:53:37, 3.19it/s] 63%|██████▎ | 234755/371472 [8:04:30<11:21:03, 3.35it/s] 63%|██████▎ | 234756/371472 [8:04:31<11:02:20, 3.44it/s] 63%|██████▎ | 234757/371472 [8:04:31<11:06:02, 3.42it/s] 63%|██████▎ | 234758/371472 [8:04:31<10:56:29, 3.47it/s] 63%|██████▎ | 234759/371472 [8:04:31<11:23:07, 3.34it/s] 63%|██████▎ | 234760/371472 [8:04:32<11:11:47, 3.39it/s] {'loss': 2.7963, 'learning_rate': 4.31403391583529e-07, 'epoch': 10.11} + 63%|██████▎ | 234760/371472 [8:04:32<11:11:47, 3.39it/s] 63%|██████▎ | 234761/371472 [8:04:32<11:35:35, 3.28it/s] 63%|██████▎ | 234762/371472 [8:04:32<11:24:39, 3.33it/s] 63%|██████▎ | 234763/371472 [8:04:33<11:27:30, 3.31it/s] 63%|██████▎ | 234764/371472 [8:04:33<11:03:34, 3.43it/s] 63%|██████▎ | 234765/371472 [8:04:33<11:05:10, 3.43it/s] 63%|██████▎ | 234766/371472 [8:04:33<10:47:38, 3.52it/s] 63%|██████▎ | 234767/371472 [8:04:34<11:04:48, 3.43it/s] 63%|██████▎ | 234768/371472 [8:04:34<11:23:01, 3.34it/s] 63%|██████▎ | 234769/371472 [8:04:34<11:18:08, 3.36it/s] 63%|██████▎ | 234770/371472 [8:04:35<11:04:03, 3.43it/s] 63%|██████▎ | 234771/371472 [8:04:35<10:58:38, 3.46it/s] 63%|██████▎ | 234772/371472 [8:04:35<11:17:56, 3.36it/s] 63%|██████▎ | 234773/371472 [8:04:36<11:02:50, 3.44it/s] 63%|██████▎ | 234774/371472 [8:04:36<11:09:33, 3.40it/s] 63%|██████▎ | 234775/371472 [8:04:36<10:53:14, 3.49it/s] 63%|██████▎ | 234776/371472 [8:04:36<11:36:48, 3.27it/s] 63%|██████▎ | 234777/371472 [8:04:37<11:15:17, 3.37it/s] 63%|██████▎ | 234778/371472 [8:04:37<11:06:29, 3.42it/s] 63%|██████▎ | 234779/371472 [8:04:37<11:23:40, 3.33it/s] 63%|██████▎ | 234780/371472 [8:04:38<11:29:31, 3.30it/s] {'loss': 2.7539, 'learning_rate': 4.3135490960805015e-07, 'epoch': 10.11} + 63%|██████▎ | 234780/371472 [8:04:38<11:29:31, 3.30it/s] 63%|██████▎ | 234781/371472 [8:04:38<11:12:56, 3.39it/s] 63%|██████▎ | 234782/371472 [8:04:38<11:15:53, 3.37it/s] 63%|██████▎ | 234783/371472 [8:04:39<11:23:19, 3.33it/s] 63%|██████▎ | 234784/371472 [8:04:39<11:31:00, 3.30it/s] 63%|██████▎ | 234785/371472 [8:04:39<11:17:19, 3.36it/s] 63%|██████▎ | 234786/371472 [8:04:39<11:07:21, 3.41it/s] 63%|██████▎ | 234787/371472 [8:04:40<10:43:56, 3.54it/s] 63%|██████▎ | 234788/371472 [8:04:40<10:55:02, 3.48it/s] 63%|██████▎ | 234789/371472 [8:04:40<10:47:52, 3.52it/s] 63%|██████▎ | 234790/371472 [8:04:41<12:29:50, 3.04it/s] 63%|██████▎ | 234791/371472 [8:04:41<11:53:54, 3.19it/s] 63%|██████▎ | 234792/371472 [8:04:41<11:21:54, 3.34it/s] 63%|██████▎ | 234793/371472 [8:04:42<11:18:10, 3.36it/s] 63%|██████▎ | 234794/371472 [8:04:42<11:21:49, 3.34it/s] 63%|██████▎ | 234795/371472 [8:04:42<12:26:31, 3.05it/s] 63%|██████▎ | 234796/371472 [8:04:43<12:03:28, 3.15it/s] 63%|██████▎ | 234797/371472 [8:04:43<12:12:02, 3.11it/s] 63%|██████▎ | 234798/371472 [8:04:43<11:38:05, 3.26it/s] 63%|██████▎ | 234799/371472 [8:04:43<11:17:20, 3.36it/s] 63%|██████▎ | 234800/371472 [8:04:44<10:54:30, 3.48it/s] {'loss': 2.7425, 'learning_rate': 4.3130642763257116e-07, 'epoch': 10.11} + 63%|██████▎ | 234800/371472 [8:04:44<10:54:30, 3.48it/s] 63%|██████▎ | 234801/371472 [8:04:44<10:52:25, 3.49it/s] 63%|██████▎ | 234802/371472 [8:04:44<10:43:14, 3.54it/s] 63%|██████▎ | 234803/371472 [8:04:45<11:14:02, 3.38it/s] 63%|██████▎ | 234804/371472 [8:04:45<10:57:42, 3.46it/s] 63%|██████▎ | 234805/371472 [8:04:45<11:24:53, 3.33it/s] 63%|██████▎ | 234806/371472 [8:04:45<11:15:28, 3.37it/s] 63%|██████▎ | 234807/371472 [8:04:46<11:08:51, 3.41it/s] 63%|██████▎ | 234808/371472 [8:04:46<11:09:23, 3.40it/s] 63%|██████▎ | 234809/371472 [8:04:46<11:21:04, 3.34it/s] 63%|██████▎ | 234810/371472 [8:04:47<12:04:29, 3.14it/s] 63%|██████▎ | 234811/371472 [8:04:47<12:01:54, 3.16it/s] 63%|██████▎ | 234812/371472 [8:04:47<11:34:38, 3.28it/s] 63%|██████▎ | 234813/371472 [8:04:48<11:30:11, 3.30it/s] 63%|██████▎ | 234814/371472 [8:04:48<11:41:38, 3.25it/s] 63%|██████▎ | 234815/371472 [8:04:48<11:49:42, 3.21it/s] 63%|██████▎ | 234816/371472 [8:04:48<11:19:02, 3.35it/s] 63%|██████▎ | 234817/371472 [8:04:49<11:22:28, 3.34it/s] 63%|██████▎ | 234818/371472 [8:04:49<11:04:52, 3.43it/s] 63%|██████▎ | 234819/371472 [8:04:49<11:11:04, 3.39it/s] 63%|██████▎ | 234820/371472 [8:04:50<10:40:41, 3.55it/s] {'loss': 2.6201, 'learning_rate': 4.312579456570924e-07, 'epoch': 10.11} + 63%|██████▎ | 234820/371472 [8:04:50<10:40:41, 3.55it/s] 63%|██████▎ | 234821/371472 [8:04:50<10:32:47, 3.60it/s] 63%|██████▎ | 234822/371472 [8:04:50<10:24:11, 3.65it/s] 63%|██████▎ | 234823/371472 [8:04:50<10:43:19, 3.54it/s] 63%|██████▎ | 234824/371472 [8:04:51<11:04:00, 3.43it/s] 63%|██████▎ | 234825/371472 [8:04:51<11:04:59, 3.42it/s] 63%|██████▎ | 234826/371472 [8:04:51<11:07:24, 3.41it/s] 63%|██████▎ | 234827/371472 [8:04:52<11:10:22, 3.40it/s] 63%|██████▎ | 234828/371472 [8:04:52<11:07:20, 3.41it/s] 63%|██████▎ | 234829/371472 [8:04:52<11:19:55, 3.35it/s] 63%|██████▎ | 234830/371472 [8:04:53<11:32:52, 3.29it/s] 63%|██████▎ | 234831/371472 [8:04:53<11:08:43, 3.41it/s] 63%|██████▎ | 234832/371472 [8:04:53<10:52:27, 3.49it/s] 63%|██████▎ | 234833/371472 [8:04:53<10:47:09, 3.52it/s] 63%|██████▎ | 234834/371472 [8:04:54<11:30:27, 3.30it/s] 63%|██████▎ | 234835/371472 [8:04:54<11:07:50, 3.41it/s] 63%|██████▎ | 234836/371472 [8:04:54<10:53:40, 3.48it/s] 63%|██████▎ | 234837/371472 [8:04:55<11:26:20, 3.32it/s] 63%|██████▎ | 234838/371472 [8:04:55<11:58:06, 3.17it/s] 63%|██████▎ | 234839/371472 [8:04:55<12:03:17, 3.15it/s] 63%|██████▎ | 234840/371472 [8:04:56<11:35:23, 3.27it/s] {'loss': 2.9524, 'learning_rate': 4.312094636816134e-07, 'epoch': 10.12} + 63%|██████▎ | 234840/371472 [8:04:56<11:35:23, 3.27it/s] 63%|██████▎ | 234841/371472 [8:04:56<11:22:30, 3.34it/s] 63%|██████▎ | 234842/371472 [8:04:56<12:07:40, 3.13it/s] 63%|██████▎ | 234843/371472 [8:04:56<11:49:13, 3.21it/s] 63%|██████▎ | 234844/371472 [8:04:57<13:40:06, 2.78it/s] 63%|██████▎ | 234845/371472 [8:04:57<12:38:37, 3.00it/s] 63%|██████▎ | 234846/371472 [8:04:58<12:35:48, 3.01it/s] 63%|██████▎ | 234847/371472 [8:04:58<12:07:48, 3.13it/s] 63%|██████▎ | 234848/371472 [8:04:58<12:00:26, 3.16it/s] 63%|██████▎ | 234849/371472 [8:04:58<11:36:28, 3.27it/s] 63%|██████▎ | 234850/371472 [8:04:59<11:54:59, 3.18it/s] 63%|██████▎ | 234851/371472 [8:04:59<11:49:39, 3.21it/s] 63%|██████▎ | 234852/371472 [8:04:59<12:13:10, 3.11it/s] 63%|██████▎ | 234853/371472 [8:05:00<12:24:26, 3.06it/s] 63%|██████▎ | 234854/371472 [8:05:00<11:45:14, 3.23it/s] 63%|██████▎ | 234855/371472 [8:05:00<11:50:34, 3.20it/s] 63%|██████▎ | 234856/371472 [8:05:01<11:49:55, 3.21it/s] 63%|██████▎ | 234857/371472 [8:05:01<11:10:55, 3.39it/s] 63%|██████▎ | 234858/371472 [8:05:01<10:59:51, 3.45it/s] 63%|██████▎ | 234859/371472 [8:05:02<11:56:44, 3.18it/s] 63%|██████▎ | 234860/371472 [8:05:02<11:45:17, 3.23it/s] {'loss': 2.7409, 'learning_rate': 4.311609817061346e-07, 'epoch': 10.12} + 63%|██████▎ | 234860/371472 [8:05:02<11:45:17, 3.23it/s] 63%|██████▎ | 234861/371472 [8:05:02<11:33:24, 3.28it/s] 63%|██████▎ | 234862/371472 [8:05:02<11:10:38, 3.39it/s] 63%|██████▎ | 234863/371472 [8:05:03<10:58:08, 3.46it/s] 63%|██████▎ | 234864/371472 [8:05:03<11:00:23, 3.45it/s] 63%|██████▎ | 234865/371472 [8:05:03<10:56:38, 3.47it/s] 63%|██████▎ | 234866/371472 [8:05:04<11:45:22, 3.23it/s] 63%|██████▎ | 234867/371472 [8:05:04<11:25:54, 3.32it/s] 63%|██████▎ | 234868/371472 [8:05:04<11:10:13, 3.40it/s] 63%|██████▎ | 234869/371472 [8:05:04<10:48:45, 3.51it/s] 63%|██████▎ | 234870/371472 [8:05:05<10:43:20, 3.54it/s] 63%|██████▎ | 234871/371472 [8:05:05<10:58:05, 3.46it/s] 63%|██████▎ | 234872/371472 [8:05:05<10:46:01, 3.52it/s] 63%|██████▎ | 234873/371472 [8:05:06<10:39:10, 3.56it/s] 63%|██████▎ | 234874/371472 [8:05:06<10:45:27, 3.53it/s] 63%|██████▎ | 234875/371472 [8:05:06<10:41:43, 3.55it/s] 63%|██████▎ | 234876/371472 [8:05:06<10:28:24, 3.62it/s] 63%|██████▎ | 234877/371472 [8:05:07<10:13:24, 3.71it/s] 63%|██████▎ | 234878/371472 [8:05:07<10:10:55, 3.73it/s] 63%|██████▎ | 234879/371472 [8:05:07<10:32:41, 3.60it/s] 63%|██████▎ | 234880/371472 [8:05:08<10:45:35, 3.53it/s] {'loss': 2.7645, 'learning_rate': 4.3111249973065566e-07, 'epoch': 10.12} + 63%|██████▎ | 234880/371472 [8:05:08<10:45:35, 3.53it/s] 63%|██████▎ | 234881/371472 [8:05:08<11:04:42, 3.42it/s] 63%|██████▎ | 234882/371472 [8:05:08<11:01:37, 3.44it/s] 63%|██████▎ | 234883/371472 [8:05:08<11:04:18, 3.43it/s] 63%|██████▎ | 234884/371472 [8:05:09<10:49:06, 3.51it/s] 63%|██████▎ | 234885/371472 [8:05:09<10:38:16, 3.57it/s] 63%|██████▎ | 234886/371472 [8:05:09<10:31:53, 3.60it/s] 63%|██████▎ | 234887/371472 [8:05:10<11:14:53, 3.37it/s] 63%|██████▎ | 234888/371472 [8:05:10<10:59:09, 3.45it/s] 63%|██████▎ | 234889/371472 [8:05:10<10:49:27, 3.51it/s] 63%|██████▎ | 234890/371472 [8:05:10<11:15:37, 3.37it/s] 63%|██████▎ | 234891/371472 [8:05:11<10:55:00, 3.48it/s] 63%|██████▎ | 234892/371472 [8:05:11<11:04:38, 3.42it/s] 63%|██████▎ | 234893/371472 [8:05:11<10:47:06, 3.52it/s] 63%|██████▎ | 234894/371472 [8:05:12<10:55:19, 3.47it/s] 63%|██████▎ | 234895/371472 [8:05:12<11:42:18, 3.24it/s] 63%|██████▎ | 234896/371472 [8:05:12<11:28:22, 3.31it/s] 63%|██████▎ | 234897/371472 [8:05:13<11:20:18, 3.35it/s] 63%|██████▎ | 234898/371472 [8:05:13<12:12:40, 3.11it/s] 63%|██████▎ | 234899/371472 [8:05:13<12:21:40, 3.07it/s] 63%|██████▎ | 234900/371472 [8:05:14<11:51:00, 3.20it/s] {'loss': 2.8219, 'learning_rate': 4.310640177551768e-07, 'epoch': 10.12} + 63%|██████▎ | 234900/371472 [8:05:14<11:51:00, 3.20it/s] 63%|██████▎ | 234901/371472 [8:05:14<11:31:05, 3.29it/s] 63%|██████▎ | 234902/371472 [8:05:14<12:25:51, 3.05it/s] 63%|██████▎ | 234903/371472 [8:05:14<12:04:12, 3.14it/s] 63%|██████▎ | 234904/371472 [8:05:15<11:42:08, 3.24it/s] 63%|██████▎ | 234905/371472 [8:05:15<11:17:36, 3.36it/s] 63%|██████▎ | 234906/371472 [8:05:15<11:03:18, 3.43it/s] 63%|██████▎ | 234907/371472 [8:05:16<10:56:04, 3.47it/s] 63%|██████▎ | 234908/371472 [8:05:16<11:23:10, 3.33it/s] 63%|██████▎ | 234909/371472 [8:05:16<11:09:15, 3.40it/s] 63%|██████▎ | 234910/371472 [8:05:16<10:53:50, 3.48it/s] 63%|██████▎ | 234911/371472 [8:05:17<10:40:56, 3.55it/s] 63%|██████▎ | 234912/371472 [8:05:17<11:25:47, 3.32it/s] 63%|██████▎ | 234913/371472 [8:05:17<11:18:11, 3.36it/s] 63%|██████▎ | 234914/371472 [8:05:18<11:14:16, 3.38it/s] 63%|██████▎ | 234915/371472 [8:05:18<10:47:04, 3.52it/s] 63%|██████▎ | 234916/371472 [8:05:18<10:45:08, 3.53it/s] 63%|██████▎ | 234917/371472 [8:05:19<11:03:58, 3.43it/s] 63%|██████▎ | 234918/371472 [8:05:19<11:32:35, 3.29it/s] 63%|██████▎ | 234919/371472 [8:05:19<11:24:06, 3.33it/s] 63%|██████▎ | 234920/371472 [8:05:20<11:59:43, 3.16it/s] {'loss': 2.6798, 'learning_rate': 4.3101553577969786e-07, 'epoch': 10.12} + 63%|██████▎ | 234920/371472 [8:05:20<11:59:43, 3.16it/s] 63%|██████▎ | 234921/371472 [8:05:20<12:25:53, 3.05it/s] 63%|██████▎ | 234922/371472 [8:05:20<11:55:41, 3.18it/s] 63%|██████▎ | 234923/371472 [8:05:20<11:35:12, 3.27it/s] 63%|██████▎ | 234924/371472 [8:05:21<11:47:18, 3.22it/s] 63%|██████▎ | 234925/371472 [8:05:21<11:25:00, 3.32it/s] 63%|██████▎ | 234926/371472 [8:05:21<11:25:28, 3.32it/s] 63%|██████▎ | 234927/371472 [8:05:22<11:23:30, 3.33it/s] 63%|██████▎ | 234928/371472 [8:05:22<11:10:01, 3.40it/s] 63%|██████▎ | 234929/371472 [8:05:22<10:55:11, 3.47it/s] 63%|██████▎ | 234930/371472 [8:05:22<11:04:05, 3.43it/s] 63%|██████▎ | 234931/371472 [8:05:23<10:52:14, 3.49it/s] 63%|██████▎ | 234932/371472 [8:05:23<11:53:58, 3.19it/s] 63%|██████▎ | 234933/371472 [8:05:23<11:40:54, 3.25it/s] 63%|██████▎ | 234934/371472 [8:05:24<11:27:38, 3.31it/s] 63%|██████▎ | 234935/371472 [8:05:24<11:33:38, 3.28it/s] 63%|██████▎ | 234936/371472 [8:05:24<11:22:40, 3.33it/s] 63%|██████▎ | 234937/371472 [8:05:25<11:13:08, 3.38it/s] 63%|██████▎ | 234938/371472 [8:05:25<11:04:07, 3.43it/s] 63%|██████▎ | 234939/371472 [8:05:25<11:59:06, 3.16it/s] 63%|██████▎ | 234940/371472 [8:05:26<11:53:32, 3.19it/s] {'loss': 2.617, 'learning_rate': 4.3096705380421904e-07, 'epoch': 10.12} + 63%|██████▎ | 234940/371472 [8:05:26<11:53:32, 3.19it/s] 63%|██████▎ | 234941/371472 [8:05:26<11:52:48, 3.19it/s] 63%|██████▎ | 234942/371472 [8:05:26<11:49:30, 3.21it/s] 63%|██████▎ | 234943/371472 [8:05:26<11:27:46, 3.31it/s] 63%|██████▎ | 234944/371472 [8:05:27<12:27:52, 3.04it/s] 63%|██████▎ | 234945/371472 [8:05:27<11:52:18, 3.19it/s] 63%|██████▎ | 234946/371472 [8:05:27<11:46:30, 3.22it/s] 63%|██████▎ | 234947/371472 [8:05:28<11:33:26, 3.28it/s] 63%|██████▎ | 234948/371472 [8:05:28<11:22:48, 3.33it/s] 63%|██████▎ | 234949/371472 [8:05:28<11:05:00, 3.42it/s] 63%|██████▎ | 234950/371472 [8:05:29<11:10:09, 3.40it/s] 63%|██████▎ | 234951/371472 [8:05:29<11:10:44, 3.39it/s] 63%|██████▎ | 234952/371472 [8:05:29<11:10:32, 3.39it/s] 63%|██████▎ | 234953/371472 [8:05:30<11:28:03, 3.31it/s] 63%|██████▎ | 234954/371472 [8:05:30<11:33:57, 3.28it/s] 63%|██████▎ | 234955/371472 [8:05:30<11:10:20, 3.39it/s] 63%|██████▎ | 234956/371472 [8:05:30<10:58:24, 3.46it/s] 63%|██████▎ | 234957/371472 [8:05:31<10:48:02, 3.51it/s] 63%|██████▎ | 234958/371472 [8:05:31<10:57:00, 3.46it/s] 63%|██████▎ | 234959/371472 [8:05:31<11:02:58, 3.43it/s] 63%|██████▎ | 234960/371472 [8:05:32<11:11:37, 3.39it/s] {'loss': 2.8987, 'learning_rate': 4.309185718287401e-07, 'epoch': 10.12} + 63%|██████▎ | 234960/371472 [8:05:32<11:11:37, 3.39it/s] 63%|██████▎ | 234961/371472 [8:05:32<11:46:25, 3.22it/s] 63%|██████▎ | 234962/371472 [8:05:32<11:35:42, 3.27it/s] 63%|██████▎ | 234963/371472 [8:05:32<11:43:05, 3.24it/s] 63%|██████▎ | 234964/371472 [8:05:33<11:16:56, 3.36it/s] 63%|██████▎ | 234965/371472 [8:05:33<11:10:36, 3.39it/s] 63%|██████▎ | 234966/371472 [8:05:33<11:06:37, 3.41it/s] 63%|██████▎ | 234967/371472 [8:05:34<11:08:07, 3.41it/s] 63%|██████▎ | 234968/371472 [8:05:34<11:09:28, 3.40it/s] 63%|██████▎ | 234969/371472 [8:05:34<11:09:39, 3.40it/s] 63%|██████▎ | 234970/371472 [8:05:35<11:13:44, 3.38it/s] 63%|██████▎ | 234971/371472 [8:05:35<11:06:31, 3.41it/s] 63%|██████▎ | 234972/371472 [8:05:35<11:00:35, 3.44it/s] 63%|██████▎ | 234973/371472 [8:05:35<10:50:17, 3.50it/s] 63%|██████▎ | 234974/371472 [8:05:36<10:53:02, 3.48it/s] 63%|██████▎ | 234975/371472 [8:05:36<10:48:06, 3.51it/s] 63%|██████▎ | 234976/371472 [8:05:36<11:38:39, 3.26it/s] 63%|██████▎ | 234977/371472 [8:05:37<11:48:20, 3.21it/s] 63%|██████▎ | 234978/371472 [8:05:37<11:14:20, 3.37it/s] 63%|██████▎ | 234979/371472 [8:05:37<11:04:26, 3.42it/s] 63%|██████▎ | 234980/371472 [8:05:37<11:02:27, 3.43it/s] {'loss': 2.6836, 'learning_rate': 4.3087008985326123e-07, 'epoch': 10.12} + 63%|██████▎ | 234980/371472 [8:05:37<11:02:27, 3.43it/s] 63%|██████▎ | 234981/371472 [8:05:38<10:39:16, 3.56it/s] 63%|██████▎ | 234982/371472 [8:05:38<10:48:28, 3.51it/s] 63%|██████▎ | 234983/371472 [8:05:38<11:04:46, 3.42it/s] 63%|██████▎ | 234984/371472 [8:05:39<11:15:09, 3.37it/s] 63%|██████▎ | 234985/371472 [8:05:39<11:29:06, 3.30it/s] 63%|██████▎ | 234986/371472 [8:05:39<11:07:15, 3.41it/s] 63%|██████▎ | 234987/371472 [8:05:39<10:56:22, 3.47it/s] 63%|██████▎ | 234988/371472 [8:05:40<10:52:23, 3.49it/s] 63%|██████▎ | 234989/371472 [8:05:40<10:38:33, 3.56it/s] 63%|██████▎ | 234990/371472 [8:05:40<10:21:49, 3.66it/s] 63%|██████▎ | 234991/371472 [8:05:41<10:40:27, 3.55it/s] 63%|██████▎ | 234992/371472 [8:05:41<11:43:18, 3.23it/s] 63%|██████▎ | 234993/371472 [8:05:41<11:25:43, 3.32it/s] 63%|██████▎ | 234994/371472 [8:05:42<11:26:54, 3.31it/s] 63%|██████▎ | 234995/371472 [8:05:42<11:21:49, 3.34it/s] 63%|██████▎ | 234996/371472 [8:05:42<11:20:45, 3.34it/s] 63%|██████▎ | 234997/371472 [8:05:42<11:15:08, 3.37it/s] 63%|██████▎ | 234998/371472 [8:05:43<11:01:12, 3.44it/s] 63%|██████▎ | 234999/371472 [8:05:43<10:48:14, 3.51it/s] 63%|██████▎ | 235000/371472 [8:05:43<11:33:57, 3.28it/s] {'loss': 2.8904, 'learning_rate': 4.308216078777823e-07, 'epoch': 10.12} + 63%|██████▎ | 235000/371472 [8:05:43<11:33:57, 3.28it/s] 63%|██████▎ | 235001/371472 [8:05:44<11:17:35, 3.36it/s] 63%|██████▎ | 235002/371472 [8:05:44<11:24:07, 3.32it/s] 63%|██████▎ | 235003/371472 [8:05:44<11:16:37, 3.36it/s] 63%|██████▎ | 235004/371472 [8:05:45<11:10:44, 3.39it/s] 63%|██████▎ | 235005/371472 [8:05:45<11:24:53, 3.32it/s] 63%|██████▎ | 235006/371472 [8:05:45<11:13:05, 3.38it/s] 63%|██████▎ | 235007/371472 [8:05:45<11:08:05, 3.40it/s] 63%|██████▎ | 235008/371472 [8:05:46<10:55:03, 3.47it/s] 63%|██████▎ | 235009/371472 [8:05:46<10:54:49, 3.47it/s] 63%|██████▎ | 235010/371472 [8:05:46<10:46:57, 3.52it/s] 63%|██████▎ | 235011/371472 [8:05:47<11:02:25, 3.43it/s] 63%|██████▎ | 235012/371472 [8:05:47<10:50:26, 3.50it/s] 63%|██████▎ | 235013/371472 [8:05:47<10:57:55, 3.46it/s] 63%|██████▎ | 235014/371472 [8:05:47<11:15:47, 3.37it/s] 63%|██████▎ | 235015/371472 [8:05:48<11:10:54, 3.39it/s] 63%|██████▎ | 235016/371472 [8:05:48<10:56:48, 3.46it/s] 63%|██████▎ | 235017/371472 [8:05:48<10:39:51, 3.55it/s] 63%|██████▎ | 235018/371472 [8:05:49<11:00:46, 3.44it/s] 63%|██████▎ | 235019/371472 [8:05:49<11:15:39, 3.37it/s] 63%|██████▎ | 235020/371472 [8:05:49<11:14:07, 3.37it/s] {'loss': 2.7397, 'learning_rate': 4.307731259023035e-07, 'epoch': 10.12} + 63%|██████▎ | 235020/371472 [8:05:49<11:14:07, 3.37it/s] 63%|██████▎ | 235021/371472 [8:05:49<11:01:38, 3.44it/s] 63%|██████▎ | 235022/371472 [8:05:50<11:54:46, 3.18it/s] 63%|██████▎ | 235023/371472 [8:05:50<11:39:38, 3.25it/s] 63%|██████▎ | 235024/371472 [8:05:50<11:24:32, 3.32it/s] 63%|██████▎ | 235025/371472 [8:05:51<11:26:07, 3.31it/s] 63%|██████▎ | 235026/371472 [8:05:51<11:07:48, 3.41it/s] 63%|██████▎ | 235027/371472 [8:05:51<11:48:38, 3.21it/s] 63%|██████▎ | 235028/371472 [8:05:52<11:35:00, 3.27it/s] 63%|██████▎ | 235029/371472 [8:05:52<11:11:10, 3.39it/s] 63%|██████▎ | 235030/371472 [8:05:52<11:06:29, 3.41it/s] 63%|██████▎ | 235031/371472 [8:05:52<10:58:35, 3.45it/s] 63%|██████▎ | 235032/371472 [8:05:53<10:52:41, 3.48it/s] 63%|██████▎ | 235033/371472 [8:05:53<10:44:23, 3.53it/s] 63%|██████▎ | 235034/371472 [8:05:53<10:49:18, 3.50it/s] 63%|██████▎ | 235035/371472 [8:05:54<10:40:44, 3.55it/s] 63%|██████▎ | 235036/371472 [8:05:54<12:08:06, 3.12it/s] 63%|██████▎ | 235037/371472 [8:05:54<11:58:07, 3.17it/s] 63%|██████▎ | 235038/371472 [8:05:55<11:57:05, 3.17it/s] 63%|██████▎ | 235039/371472 [8:05:55<11:50:31, 3.20it/s] 63%|██████▎ | 235040/371472 [8:05:55<11:43:24, 3.23it/s] {'loss': 2.7348, 'learning_rate': 4.307246439268245e-07, 'epoch': 10.12} + 63%|██████▎ | 235040/371472 [8:05:55<11:43:24, 3.23it/s] 63%|██████▎ | 235041/371472 [8:05:56<12:03:33, 3.14it/s] 63%|██████▎ | 235042/371472 [8:05:56<11:36:39, 3.26it/s] 63%|██████▎ | 235043/371472 [8:05:56<11:03:12, 3.43it/s] 63%|██████▎ | 235044/371472 [8:05:56<11:25:33, 3.32it/s] 63%|██████▎ | 235045/371472 [8:05:57<11:58:39, 3.16it/s] 63%|██████▎ | 235046/371472 [8:05:57<11:38:26, 3.26it/s] 63%|██████▎ | 235047/371472 [8:05:57<12:59:19, 2.92it/s] 63%|█████��▎ | 235048/371472 [8:05:58<11:56:30, 3.17it/s] 63%|██████▎ | 235049/371472 [8:05:58<11:42:45, 3.24it/s] 63%|██████▎ | 235050/371472 [8:05:58<11:33:46, 3.28it/s] 63%|██████▎ | 235051/371472 [8:05:59<11:19:29, 3.35it/s] 63%|██████▎ | 235052/371472 [8:05:59<11:14:56, 3.37it/s] 63%|██████▎ | 235053/371472 [8:05:59<12:10:53, 3.11it/s] 63%|██████▎ | 235054/371472 [8:06:00<11:36:57, 3.26it/s] 63%|██████▎ | 235055/371472 [8:06:00<11:24:54, 3.32it/s] 63%|██████▎ | 235056/371472 [8:06:00<11:20:54, 3.34it/s] 63%|██████▎ | 235057/371472 [8:06:00<11:27:41, 3.31it/s] 63%|██████▎ | 235058/371472 [8:06:01<12:06:24, 3.13it/s] 63%|██████▎ | 235059/371472 [8:06:01<11:35:06, 3.27it/s] 63%|██████▎ | 235060/371472 [8:06:01<11:02:10, 3.43it/s] {'loss': 2.6797, 'learning_rate': 4.306761619513457e-07, 'epoch': 10.12} + 63%|██████▎ | 235060/371472 [8:06:01<11:02:10, 3.43it/s] 63%|██████▎ | 235061/371472 [8:06:02<11:05:46, 3.41it/s] 63%|██████▎ | 235062/371472 [8:06:02<11:00:33, 3.44it/s] 63%|██████▎ | 235063/371472 [8:06:02<10:50:51, 3.49it/s] 63%|██████▎ | 235064/371472 [8:06:03<11:12:05, 3.38it/s] 63%|██████▎ | 235065/371472 [8:06:03<11:09:56, 3.39it/s] 63%|██████▎ | 235066/371472 [8:06:03<11:08:19, 3.40it/s] 63%|██████▎ | 235067/371472 [8:06:03<11:13:44, 3.37it/s] 63%|██████▎ | 235068/371472 [8:06:04<11:54:07, 3.18it/s] 63%|██████▎ | 235069/371472 [8:06:04<11:20:22, 3.34it/s] 63%|██████▎ | 235070/371472 [8:06:04<11:50:04, 3.20it/s] 63%|██████▎ | 235071/371472 [8:06:05<11:35:40, 3.27it/s] 63%|██████▎ | 235072/371472 [8:06:05<11:01:29, 3.44it/s] 63%|██████▎ | 235073/371472 [8:06:05<10:55:05, 3.47it/s] 63%|██████▎ | 235074/371472 [8:06:05<10:45:05, 3.52it/s] 63%|██████▎ | 235075/371472 [8:06:06<10:27:06, 3.63it/s] 63%|██████▎ | 235076/371472 [8:06:06<10:35:53, 3.57it/s] 63%|██████▎ | 235077/371472 [8:06:06<10:43:53, 3.53it/s] 63%|██████▎ | 235078/371472 [8:06:07<10:47:41, 3.51it/s] 63%|██████▎ | 235079/371472 [8:06:07<10:39:04, 3.56it/s] 63%|██████▎ | 235080/371472 [8:06:07<10:42:53, 3.54it/s] {'loss': 2.8619, 'learning_rate': 4.3062767997586675e-07, 'epoch': 10.13} + 63%|██████▎ | 235080/371472 [8:06:07<10:42:53, 3.54it/s] 63%|██████▎ | 235081/371472 [8:06:07<10:52:32, 3.48it/s] 63%|██████▎ | 235082/371472 [8:06:08<11:08:52, 3.40it/s] 63%|██████▎ | 235083/371472 [8:06:08<12:01:48, 3.15it/s] 63%|██████▎ | 235084/371472 [8:06:08<11:48:30, 3.21it/s] 63%|██████▎ | 235085/371472 [8:06:09<11:30:42, 3.29it/s] 63%|██████▎ | 235086/371472 [8:06:09<11:59:34, 3.16it/s] 63%|██████▎ | 235087/371472 [8:06:09<11:33:07, 3.28it/s] 63%|██████▎ | 235088/371472 [8:06:10<11:39:48, 3.25it/s] 63%|██████▎ | 235089/371472 [8:06:10<11:41:05, 3.24it/s] 63%|██████▎ | 235090/371472 [8:06:10<11:34:13, 3.27it/s] 63%|██████▎ | 235091/371472 [8:06:11<11:13:16, 3.38it/s] 63%|██████▎ | 235092/371472 [8:06:11<11:39:43, 3.25it/s] 63%|██████▎ | 235093/371472 [8:06:11<11:24:18, 3.32it/s] 63%|██████▎ | 235094/371472 [8:06:11<11:31:03, 3.29it/s] 63%|██████▎ | 235095/371472 [8:06:12<11:22:04, 3.33it/s] 63%|██████▎ | 235096/371472 [8:06:12<11:29:42, 3.30it/s] 63%|██████▎ | 235097/371472 [8:06:12<11:04:10, 3.42it/s] 63%|██████▎ | 235098/371472 [8:06:13<11:07:48, 3.40it/s] 63%|██████▎ | 235099/371472 [8:06:13<11:13:26, 3.38it/s] 63%|██████▎ | 235100/371472 [8:06:13<14:16:47, 2.65it/s] {'loss': 2.6747, 'learning_rate': 4.3057919800038777e-07, 'epoch': 10.13} + 63%|██████▎ | 235100/371472 [8:06:13<14:16:47, 2.65it/s] 63%|██████▎ | 235101/371472 [8:06:14<13:18:10, 2.85it/s] 63%|██████▎ | 235102/371472 [8:06:14<13:42:08, 2.76it/s] 63%|██████▎ | 235103/371472 [8:06:14<12:55:41, 2.93it/s] 63%|██████▎ | 235104/371472 [8:06:15<12:26:02, 3.05it/s] 63%|██████▎ | 235105/371472 [8:06:15<12:13:43, 3.10it/s] 63%|██████▎ | 235106/371472 [8:06:15<11:55:25, 3.18it/s] 63%|██████▎ | 235107/371472 [8:06:16<11:51:52, 3.19it/s] 63%|��█████▎ | 235108/371472 [8:06:16<11:25:32, 3.32it/s] 63%|██████▎ | 235109/371472 [8:06:16<11:43:14, 3.23it/s] 63%|██████▎ | 235110/371472 [8:06:17<11:50:04, 3.20it/s] 63%|██████▎ | 235111/371472 [8:06:17<11:29:26, 3.30it/s] 63%|██████▎ | 235112/371472 [8:06:17<11:17:20, 3.36it/s] 63%|██████▎ | 235113/371472 [8:06:18<11:41:34, 3.24it/s] 63%|██████▎ | 235114/371472 [8:06:18<11:20:41, 3.34it/s] 63%|██████▎ | 235115/371472 [8:06:18<11:13:09, 3.38it/s] 63%|██████▎ | 235116/371472 [8:06:18<11:17:12, 3.36it/s] 63%|██████▎ | 235117/371472 [8:06:19<10:57:19, 3.46it/s] 63%|██████▎ | 235118/371472 [8:06:19<10:48:00, 3.51it/s] 63%|██████▎ | 235119/371472 [8:06:19<10:41:43, 3.54it/s] 63%|██████▎ | 235120/371472 [8:06:19<10:58:28, 3.45it/s] {'loss': 2.85, 'learning_rate': 4.3053071602490894e-07, 'epoch': 10.13} + 63%|██████▎ | 235120/371472 [8:06:20<10:58:28, 3.45it/s] 63%|██████▎ | 235121/371472 [8:06:20<11:16:54, 3.36it/s] 63%|██████▎ | 235122/371472 [8:06:20<11:32:18, 3.28it/s] 63%|██████▎ | 235123/371472 [8:06:20<11:18:30, 3.35it/s] 63%|██████▎ | 235124/371472 [8:06:21<11:11:25, 3.38it/s] 63%|██████▎ | 235125/371472 [8:06:21<10:58:32, 3.45it/s] 63%|██████▎ | 235126/371472 [8:06:21<11:00:37, 3.44it/s] 63%|██████▎ | 235127/371472 [8:06:22<10:56:31, 3.46it/s] 63%|██████▎ | 235128/371472 [8:06:22<11:32:22, 3.28it/s] 63%|██████▎ | 235129/371472 [8:06:22<11:35:45, 3.27it/s] 63%|██████▎ | 235130/371472 [8:06:23<12:00:14, 3.16it/s] 63%|██████▎ | 235131/371472 [8:06:23<11:28:12, 3.30it/s] 63%|██████▎ | 235132/371472 [8:06:23<11:18:57, 3.35it/s] 63%|██████▎ | 235133/371472 [8:06:23<11:01:40, 3.43it/s] 63%|██████▎ | 235134/371472 [8:06:24<11:08:49, 3.40it/s] 63%|██████▎ | 235135/371472 [8:06:24<10:41:05, 3.54it/s] 63%|██████▎ | 235136/371472 [8:06:24<11:25:22, 3.32it/s] 63%|██████▎ | 235137/371472 [8:06:25<11:04:32, 3.42it/s] 63%|██████▎ | 235138/371472 [8:06:25<10:56:59, 3.46it/s] 63%|██████▎ | 235139/371472 [8:06:25<11:09:27, 3.39it/s] 63%|██████▎ | 235140/371472 [8:06:25<11:19:38, 3.34it/s] {'loss': 2.6745, 'learning_rate': 4.3048223404943e-07, 'epoch': 10.13} + 63%|██████▎ | 235140/371472 [8:06:25<11:19:38, 3.34it/s] 63%|██████▎ | 235141/371472 [8:06:26<11:39:06, 3.25it/s] 63%|██████▎ | 235142/371472 [8:06:26<12:34:28, 3.01it/s] 63%|██████▎ | 235143/371472 [8:06:27<12:49:37, 2.95it/s] 63%|██████▎ | 235144/371472 [8:06:27<13:18:01, 2.85it/s] 63%|██████▎ | 235145/371472 [8:06:27<12:26:55, 3.04it/s] 63%|██████▎ | 235146/371472 [8:06:28<13:13:37, 2.86it/s] 63%|██████▎ | 235147/371472 [8:06:28<12:45:10, 2.97it/s] 63%|██████▎ | 235148/371472 [8:06:28<12:07:57, 3.12it/s] 63%|██████▎ | 235149/371472 [8:06:28<11:34:34, 3.27it/s] 63%|██████▎ | 235150/371472 [8:06:29<11:07:14, 3.41it/s] 63%|██████▎ | 235151/371472 [8:06:29<11:05:33, 3.41it/s] 63%|██████▎ | 235152/371472 [8:06:29<11:03:02, 3.43it/s] 63%|██████▎ | 235153/371472 [8:06:30<12:06:00, 3.13it/s] 63%|██████▎ | 235154/371472 [8:06:30<11:22:10, 3.33it/s] 63%|██████▎ | 235155/371472 [8:06:30<10:52:25, 3.48it/s] 63%|██████▎ | 235156/371472 [8:06:30<10:51:05, 3.49it/s] 63%|██████▎ | 235157/371472 [8:06:31<10:35:56, 3.57it/s] 63%|██████▎ | 235158/371472 [8:06:31<11:34:41, 3.27it/s] 63%|██████▎ | 235159/371472 [8:06:31<11:34:31, 3.27it/s] 63%|██████▎ | 235160/371472 [8:06:32<11:12:04, 3.38it/s] {'loss': 2.7043, 'learning_rate': 4.3043375207395114e-07, 'epoch': 10.13} + 63%|██████▎ | 235160/371472 [8:06:32<11:12:04, 3.38it/s] 63%|██████▎ | 235161/371472 [8:06:32<10:51:36, 3.49it/s] 63%|██████▎ | 235162/371472 [8:06:32<10:59:17, 3.45it/s] 63%|██████▎ | 235163/371472 [8:06:33<10:51:01, 3.49it/s] 63%|██████▎ | 235164/371472 [8:06:33<10:51:48, 3.49it/s] 63%|██████▎ | 235165/371472 [8:06:33<10:59:17, 3.45it/s] 63%|██████▎ | 235166/371472 [8:06:33<10:52:27, 3.48it/s] 63%|██████▎ | 235167/371472 [8:06:34<10:50:20, 3.49it/s] 63%|██████▎ | 235168/371472 [8:06:34<10:37:26, 3.56it/s] 63%|██████▎ | 235169/371472 [8:06:34<10:58:30, 3.45it/s] 63%|██████▎ | 235170/371472 [8:06:35<11:13:38, 3.37it/s] 63%|██████▎ | 235171/371472 [8:06:35<11:12:11, 3.38it/s] 63%|██████▎ | 235172/371472 [8:06:35<10:59:32, 3.44it/s] 63%|██████▎ | 235173/371472 [8:06:35<10:41:42, 3.54it/s] 63%|██████▎ | 235174/371472 [8:06:36<10:58:12, 3.45it/s] 63%|██████▎ | 235175/371472 [8:06:36<11:12:57, 3.38it/s] 63%|██████▎ | 235176/371472 [8:06:36<11:11:06, 3.38it/s] 63%|██████▎ | 235177/371472 [8:06:37<10:52:43, 3.48it/s] 63%|██████▎ | 235178/371472 [8:06:37<11:10:55, 3.39it/s] 63%|██████▎ | 235179/371472 [8:06:37<11:04:15, 3.42it/s] 63%|██████▎ | 235180/371472 [8:06:38<12:09:46, 3.11it/s] {'loss': 2.7826, 'learning_rate': 4.303852700984722e-07, 'epoch': 10.13} + 63%|██████▎ | 235180/371472 [8:06:38<12:09:46, 3.11it/s] 63%|██████▎ | 235181/371472 [8:06:38<12:48:50, 2.95it/s] 63%|██████▎ | 235182/371472 [8:06:38<12:50:13, 2.95it/s] 63%|██████▎ | 235183/371472 [8:06:39<11:58:19, 3.16it/s] 63%|██████▎ | 235184/371472 [8:06:39<11:21:22, 3.33it/s] 63%|██████▎ | 235185/371472 [8:06:39<11:09:02, 3.40it/s] 63%|██████▎ | 235186/371472 [8:06:39<11:03:33, 3.42it/s] 63%|██████▎ | 235187/371472 [8:06:40<10:54:48, 3.47it/s] 63%|██████▎ | 235188/371472 [8:06:40<10:48:33, 3.50it/s] 63%|██████▎ | 235189/371472 [8:06:40<10:47:17, 3.51it/s] 63%|██████▎ | 235190/371472 [8:06:40<10:33:29, 3.59it/s] 63%|██████▎ | 235191/371472 [8:06:41<10:43:06, 3.53it/s] 63%|██████▎ | 235192/371472 [8:06:41<11:17:40, 3.35it/s] 63%|██████▎ | 235193/371472 [8:06:41<11:04:38, 3.42it/s] 63%|██████▎ | 235194/371472 [8:06:42<10:43:00, 3.53it/s] 63%|██████▎ | 235195/371472 [8:06:42<11:22:57, 3.33it/s] 63%|██████▎ | 235196/371472 [8:06:42<11:23:58, 3.32it/s] 63%|██████▎ | 235197/371472 [8:06:43<12:02:40, 3.14it/s] 63%|██████▎ | 235198/371472 [8:06:43<11:41:36, 3.24it/s] 63%|██████▎ | 235199/371472 [8:06:43<11:45:28, 3.22it/s] 63%|██████▎ | 235200/371472 [8:06:44<11:50:28, 3.20it/s] {'loss': 2.736, 'learning_rate': 4.303367881229934e-07, 'epoch': 10.13} + 63%|██████▎ | 235200/371472 [8:06:44<11:50:28, 3.20it/s] 63%|██████▎ | 235201/371472 [8:06:44<11:10:16, 3.39it/s] 63%|██████▎ | 235202/371472 [8:06:44<11:00:17, 3.44it/s] 63%|██████▎ | 235203/371472 [8:06:44<11:19:09, 3.34it/s] 63%|██████▎ | 235204/371472 [8:06:45<11:41:05, 3.24it/s] 63%|██████▎ | 235205/371472 [8:06:45<11:50:09, 3.20it/s] 63%|██████▎ | 235206/371472 [8:06:45<11:41:07, 3.24it/s] 63%|██████▎ | 235207/371472 [8:06:46<11:17:19, 3.35it/s] 63%|██████▎ | 235208/371472 [8:06:46<10:58:12, 3.45it/s] 63%|██████▎ | 235209/371472 [8:06:46<10:57:18, 3.46it/s] 63%|██████▎ | 235210/371472 [8:06:46<10:48:11, 3.50it/s] 63%|██████▎ | 235211/371472 [8:06:47<11:06:54, 3.41it/s] 63%|██████▎ | 235212/371472 [8:06:47<11:44:55, 3.22it/s] 63%|██████▎ | 235213/371472 [8:06:47<11:19:10, 3.34it/s] 63%|██████▎ | 235214/371472 [8:06:48<11:31:25, 3.28it/s] 63%|██████▎ | 235215/371472 [8:06:48<11:31:14, 3.29it/s] 63%|██████▎ | 235216/371472 [8:06:48<12:22:25, 3.06it/s] 63%|██████▎ | 235217/371472 [8:06:49<12:03:54, 3.14it/s] 63%|██████▎ | 235218/371472 [8:06:49<11:32:14, 3.28it/s] 63%|██████▎ | 235219/371472 [8:06:49<13:31:09, 2.80it/s] 63%|██████▎ | 235220/371472 [8:06:50<12:38:55, 2.99it/s] {'loss': 2.6693, 'learning_rate': 4.3028830614751446e-07, 'epoch': 10.13} + 63%|██████▎ | 235220/371472 [8:06:50<12:38:55, 2.99it/s] 63%|██████▎ | 235221/371472 [8:06:50<12:08:07, 3.12it/s] 63%|██████▎ | 235222/371472 [8:06:50<12:13:07, 3.10it/s] 63%|██████▎ | 235223/371472 [8:06:51<11:41:21, 3.24it/s] 63%|██████▎ | 235224/371472 [8:06:51<11:18:19, 3.35it/s] 63%|██████▎ | 235225/371472 [8:06:51<11:00:33, 3.44it/s] 63%|██████▎ | 235226/371472 [8:06:52<11:12:57, 3.37it/s] 63%|██████▎ | 235227/371472 [8:06:52<11:21:20, 3.33it/s] 63%|██████▎ | 235228/371472 [8:06:52<11:07:52, 3.40it/s] 63%|██████▎ | 235229/371472 [8:06:52<11:24:48, 3.32it/s] 63%|██████▎ | 235230/371472 [8:06:53<11:41:05, 3.24it/s] 63%|██████▎ | 235231/371472 [8:06:53<11:29:46, 3.29it/s] 63%|██████▎ | 235232/371472 [8:06:53<11:52:45, 3.19it/s] 63%|██████▎ | 235233/371472 [8:06:54<11:55:08, 3.18it/s] 63%|██████▎ | 235234/371472 [8:06:54<12:00:12, 3.15it/s] 63%|██████▎ | 235235/371472 [8:06:54<11:48:59, 3.20it/s] 63%|██████▎ | 235236/371472 [8:06:55<11:29:18, 3.29it/s] 63%|██████▎ | 235237/371472 [8:06:55<11:26:48, 3.31it/s] 63%|██████▎ | 235238/371472 [8:06:55<11:39:22, 3.25it/s] 63%|██████▎ | 235239/371472 [8:06:55<11:20:41, 3.34it/s] 63%|██████▎ | 235240/371472 [8:06:56<10:52:56, 3.48it/s] {'loss': 2.8071, 'learning_rate': 4.302398241720356e-07, 'epoch': 10.13} + 63%|██████▎ | 235240/371472 [8:06:56<10:52:56, 3.48it/s] 63%|██████▎ | 235241/371472 [8:06:56<10:42:24, 3.53it/s] 63%|██████▎ | 235242/371472 [8:06:56<10:35:27, 3.57it/s] 63%|██████▎ | 235243/371472 [8:06:57<10:40:36, 3.54it/s] 63%|██████▎ | 235244/371472 [8:06:57<10:29:30, 3.61it/s] 63%|██████▎ | 235245/371472 [8:06:57<10:49:43, 3.49it/s] 63%|██████▎ | 235246/371472 [8:06:57<10:46:54, 3.51it/s] 63%|██████▎ | 235247/371472 [8:06:58<10:53:54, 3.47it/s] 63%|██████▎ | 235248/371472 [8:06:58<10:45:12, 3.52it/s] 63%|██████▎ | 235249/371472 [8:06:58<11:02:29, 3.43it/s] 63%|██████▎ | 235250/371472 [8:06:59<10:58:19, 3.45it/s] 63%|██████▎ | 235251/371472 [8:06:59<11:37:59, 3.25it/s] 63%|██████▎ | 235252/371472 [8:06:59<11:22:49, 3.32it/s] 63%|██████▎ | 235253/371472 [8:07:00<13:14:42, 2.86it/s] 63%|██████▎ | 235254/371472 [8:07:00<13:09:24, 2.88it/s] 63%|██████▎ | 235255/371472 [8:07:00<12:27:14, 3.04it/s] 63%|██████▎ | 235256/371472 [8:07:01<12:04:12, 3.13it/s] 63%|██████▎ | 235257/371472 [8:07:01<11:25:09, 3.31it/s] 63%|██████▎ | 235258/371472 [8:07:01<11:23:32, 3.32it/s] 63%|██████▎ | 235259/371472 [8:07:01<11:07:41, 3.40it/s] 63%|██████▎ | 235260/371472 [8:07:02<11:01:19, 3.43it/s] {'loss': 2.7331, 'learning_rate': 4.3019134219655666e-07, 'epoch': 10.13} + 63%|██████▎ | 235260/371472 [8:07:02<11:01:19, 3.43it/s] 63%|██████▎ | 235261/371472 [8:07:02<11:34:16, 3.27it/s] 63%|██████▎ | 235262/371472 [8:07:03<12:47:15, 2.96it/s] 63%|██████▎ | 235263/371472 [8:07:03<12:01:26, 3.15it/s] 63%|██████▎ | 235264/371472 [8:07:03<11:34:29, 3.27it/s] 63%|██████▎ | 235265/371472 [8:07:03<11:18:40, 3.34it/s] 63%|██████▎ | 235266/371472 [8:07:04<10:54:03, 3.47it/s] 63%|██████▎ | 235267/371472 [8:07:04<10:35:47, 3.57it/s] 63%|██████▎ | 235268/371472 [8:07:04<10:45:58, 3.51it/s] 63%|██████▎ | 235269/371472 [8:07:04<10:44:37, 3.52it/s] 63%|██████▎ | 235270/371472 [8:07:05<10:41:57, 3.54it/s] 63%|██████▎ | 235271/371472 [8:07:05<10:47:16, 3.51it/s] 63%|██████▎ | 235272/371472 [8:07:05<11:20:52, 3.33it/s] 63%|██████▎ | 235273/371472 [8:07:06<11:09:59, 3.39it/s] 63%|██████▎ | 235274/371472 [8:07:06<10:51:50, 3.48it/s] 63%|██████▎ | 235275/371472 [8:07:06<11:14:09, 3.37it/s] 63%|██████▎ | 235276/371472 [8:07:07<11:14:02, 3.37it/s] 63%|██████▎ | 235277/371472 [8:07:07<10:59:24, 3.44it/s] 63%|██████▎ | 235278/371472 [8:07:07<10:45:09, 3.52it/s] 63%|██████▎ | 235279/371472 [8:07:07<11:18:31, 3.35it/s] 63%|██████▎ | 235280/371472 [8:07:08<11:15:30, 3.36it/s] {'loss': 2.6406, 'learning_rate': 4.3014286022107783e-07, 'epoch': 10.13} + 63%|██████▎ | 235280/371472 [8:07:08<11:15:30, 3.36it/s] 63%|██████▎ | 235281/371472 [8:07:08<11:49:39, 3.20it/s] 63%|██████▎ | 235282/371472 [8:07:08<11:40:11, 3.24it/s] 63%|██████▎ | 235283/371472 [8:07:09<11:32:57, 3.28it/s] 63%|██████▎ | 235284/371472 [8:07:09<11:11:01, 3.38it/s] 63%|██████▎ | 235285/371472 [8:07:09<11:07:41, 3.40it/s] 63%|██████▎ | 235286/371472 [8:07:09<11:07:32, 3.40it/s] 63%|██████▎ | 235287/371472 [8:07:10<11:22:53, 3.32it/s] 63%|██████▎ | 235288/371472 [8:07:10<10:58:01, 3.45it/s] 63%|██████▎ | 235289/371472 [8:07:10<10:57:41, 3.45it/s] 63%|██████▎ | 235290/371472 [8:07:11<10:45:15, 3.52it/s] 63%|██████▎ | 235291/371472 [8:07:11<10:43:29, 3.53it/s] 63%|██████▎ | 235292/371472 [8:07:11<10:35:42, 3.57it/s] 63%|██████▎ | 235293/371472 [8:07:11<10:36:00, 3.57it/s] 63%|██████▎ | 235294/371472 [8:07:12<11:10:11, 3.39it/s] 63%|██████▎ | 235295/371472 [8:07:12<12:05:47, 3.13it/s] 63%|██████▎ | 235296/371472 [8:07:12<11:29:00, 3.29it/s] 63%|██████▎ | 235297/371472 [8:07:13<11:09:01, 3.39it/s] 63%|██████▎ | 235298/371472 [8:07:13<10:58:43, 3.45it/s] 63%|██████▎ | 235299/371472 [8:07:13<10:57:00, 3.45it/s] 63%|██████▎ | 235300/371472 [8:07:14<10:52:39, 3.48it/s] {'loss': 2.6701, 'learning_rate': 4.3009437824559885e-07, 'epoch': 10.13} + 63%|██████▎ | 235300/371472 [8:07:14<10:52:39, 3.48it/s] 63%|██████▎ | 235301/371472 [8:07:14<10:39:51, 3.55it/s] 63%|██████▎ | 235302/371472 [8:07:14<10:26:19, 3.62it/s] 63%|██████▎ | 235303/371472 [8:07:14<10:33:41, 3.58it/s] 63%|██████▎ | 235304/371472 [8:07:15<10:45:30, 3.52it/s] 63%|██████▎ | 235305/371472 [8:07:15<11:34:21, 3.27it/s] 63%|██████▎ | 235306/371472 [8:07:15<11:38:12, 3.25it/s] 63%|██████▎ | 235307/371472 [8:07:16<11:09:18, 3.39it/s] 63%|██████▎ | 235308/371472 [8:07:16<10:55:17, 3.46it/s] 63%|██████▎ | 235309/371472 [8:07:16<11:29:16, 3.29it/s] 63%|██████▎ | 235310/371472 [8:07:17<12:02:12, 3.14it/s] 63%|██████▎ | 235311/371472 [8:07:17<11:58:58, 3.16it/s] 63%|██████▎ | 235312/371472 [8:07:17<11:30:23, 3.29it/s] 63%|██████▎ | 235313/371472 [8:07:18<12:33:57, 3.01it/s] 63%|██████▎ | 235314/371472 [8:07:18<12:13:14, 3.09it/s] 63%|██████▎ | 235315/371472 [8:07:18<11:42:05, 3.23it/s] 63%|██████▎ | 235316/371472 [8:07:18<11:23:06, 3.32it/s] 63%|██████▎ | 235317/371472 [8:07:19<10:57:57, 3.45it/s] 63%|██████▎ | 235318/371472 [8:07:19<11:02:10, 3.43it/s] 63%|██████▎ | 235319/371472 [8:07:19<11:07:37, 3.40it/s] 63%|██████▎ | 235320/371472 [8:07:20<11:11:08, 3.38it/s] {'loss': 2.6395, 'learning_rate': 4.300458962701201e-07, 'epoch': 10.14} + 63%|██████▎ | 235320/371472 [8:07:20<11:11:08, 3.38it/s] 63%|██████▎ | 235321/371472 [8:07:20<12:08:30, 3.11it/s] 63%|██████▎ | 235322/371472 [8:07:20<11:32:18, 3.28it/s] 63%|██████▎ | 235323/371472 [8:07:20<11:05:03, 3.41it/s] 63%|██████▎ | 235324/371472 [8:07:21<11:16:57, 3.35it/s] 63%|██████▎ | 235325/371472 [8:07:21<10:57:22, 3.45it/s] 63%|██████▎ | 235326/371472 [8:07:21<10:54:18, 3.47it/s] 63%|██████▎ | 235327/371472 [8:07:22<11:13:52, 3.37it/s] 63%|██████▎ | 235328/371472 [8:07:22<10:43:35, 3.53it/s] 63%|██████▎ | 235329/371472 [8:07:22<10:40:00, 3.55it/s] 63%|██████▎ | 235330/371472 [8:07:22<10:44:25, 3.52it/s] 63%|██████▎ | 235331/371472 [8:07:23<10:44:33, 3.52it/s] 63%|██████▎ | 235332/371472 [8:07:23<10:58:47, 3.44it/s] 63%|██████▎ | 235333/371472 [8:07:23<10:55:05, 3.46it/s] 63%|██████▎ | 235334/371472 [8:07:24<10:51:43, 3.48it/s] 63%|██████▎ | 235335/371472 [8:07:24<10:42:00, 3.53it/s] 63%|██████▎ | 235336/371472 [8:07:24<10:33:15, 3.58it/s] 63%|██████▎ | 235337/371472 [8:07:25<11:12:26, 3.37it/s] 63%|██████▎ | 235338/371472 [8:07:25<11:23:09, 3.32it/s] 63%|██████▎ | 235339/371472 [8:07:25<12:06:47, 3.12it/s] 63%|██████▎ | 235340/371472 [8:07:25<11:22:43, 3.32it/s] {'loss': 2.6618, 'learning_rate': 4.299974142946411e-07, 'epoch': 10.14} + 63%|██████▎ | 235340/371472 [8:07:25<11:22:43, 3.32it/s] 63%|██████▎ | 235341/371472 [8:07:26<11:23:39, 3.32it/s] 63%|██████▎ | 235342/371472 [8:07:26<11:08:37, 3.39it/s] 63%|██████▎ | 235343/371472 [8:07:26<11:14:47, 3.36it/s] 63%|██████▎ | 235344/371472 [8:07:27<11:21:48, 3.33it/s] 63%|██████▎ | 235345/371472 [8:07:27<11:25:30, 3.31it/s] 63%|██████▎ | 235346/371472 [8:07:28<14:20:08, 2.64it/s] 63%|██████▎ | 235347/371472 [8:07:28<13:15:01, 2.85it/s] 63%|██████▎ | 235348/371472 [8:07:28<12:44:14, 2.97it/s] 63%|██████▎ | 235349/371472 [8:07:28<12:02:54, 3.14it/s] 63%|██████▎ | 235350/371472 [8:07:29<12:19:57, 3.07it/s] 63%|██████▎ | 235351/371472 [8:07:29<12:14:33, 3.09it/s] 63%|██████▎ | 235352/371472 [8:07:29<12:16:34, 3.08it/s] 63%|██████▎ | 235353/371472 [8:07:30<11:58:14, 3.16it/s] 63%|██████▎ | 235354/371472 [8:07:30<11:50:28, 3.19it/s] 63%|██████▎ | 235355/371472 [8:07:30<12:22:21, 3.06it/s] 63%|██████▎ | 235356/371472 [8:07:31<11:51:56, 3.19it/s] 63%|██████▎ | 235357/371472 [8:07:31<11:24:28, 3.31it/s] 63%|██████▎ | 235358/371472 [8:07:31<11:25:07, 3.31it/s] 63%|██████▎ | 235359/371472 [8:07:31<11:06:42, 3.40it/s] 63%|██████▎ | 235360/371472 [8:07:32<10:56:28, 3.46it/s] {'loss': 2.7274, 'learning_rate': 4.299489323191622e-07, 'epoch': 10.14} + 63%|██████▎ | 235360/371472 [8:07:32<10:56:28, 3.46it/s] 63%|██████▎ | 235361/371472 [8:07:32<11:10:32, 3.38it/s] 63%|██████▎ | 235362/371472 [8:07:32<11:08:56, 3.39it/s] 63%|██████▎ | 235363/371472 [8:07:33<11:12:32, 3.37it/s] 63%|██████▎ | 235364/371472 [8:07:33<11:11:03, 3.38it/s] 63%|██████▎ | 235365/371472 [8:07:33<11:29:43, 3.29it/s] 63%|██████▎ | 235366/371472 [8:07:34<12:20:00, 3.07it/s] 63%|██████▎ | 235367/371472 [8:07:34<12:03:17, 3.14it/s] 63%|██████▎ | 235368/371472 [8:07:34<11:30:18, 3.29it/s] 63%|██████▎ | 235369/371472 [8:07:35<11:30:10, 3.29it/s] 63%|██████▎ | 235370/371472 [8:07:35<11:28:49, 3.29it/s] 63%|██████▎ | 235371/371472 [8:07:35<11:09:04, 3.39it/s] 63%|██████▎ | 235372/371472 [8:07:35<11:06:38, 3.40it/s] 63%|██████▎ | 235373/371472 [8:07:36<11:26:57, 3.30it/s] 63%|██████▎ | 235374/371472 [8:07:36<12:06:10, 3.12it/s] 63%|██████▎ | 235375/371472 [8:07:36<11:54:56, 3.17it/s] 63%|██████▎ | 235376/371472 [8:07:37<11:48:38, 3.20it/s] 63%|██████▎ | 235377/371472 [8:07:37<11:34:26, 3.27it/s] 63%|██████▎ | 235378/371472 [8:07:37<11:29:28, 3.29it/s] 63%|██████▎ | 235379/371472 [8:07:38<12:10:50, 3.10it/s] 63%|██████▎ | 235380/371472 [8:07:38<11:48:24, 3.20it/s] {'loss': 2.7476, 'learning_rate': 4.299004503436833e-07, 'epoch': 10.14} + 63%|██████▎ | 235380/371472 [8:07:38<11:48:24, 3.20it/s] 63%|██████▎ | 235381/371472 [8:07:38<11:35:51, 3.26it/s] 63%|██████▎ | 235382/371472 [8:07:39<11:24:00, 3.32it/s] 63%|██████▎ | 235383/371472 [8:07:39<11:10:40, 3.38it/s] 63%|██████▎ | 235384/371472 [8:07:39<11:33:18, 3.27it/s] 63%|██████▎ | 235385/371472 [8:07:39<11:36:24, 3.26it/s] 63%|██████▎ | 235386/371472 [8:07:40<11:08:26, 3.39it/s] 63%|██████▎ | 235387/371472 [8:07:40<10:54:45, 3.46it/s] 63%|██████▎ | 235388/371472 [8:07:40<11:24:52, 3.31it/s] 63%|██████▎ | 235389/371472 [8:07:41<12:02:17, 3.14it/s] 63%|██████▎ | 235390/371472 [8:07:41<11:39:51, 3.24it/s] 63%|██████▎ | 235391/371472 [8:07:41<11:14:43, 3.36it/s] 63%|██████▎ | 235392/371472 [8:07:42<11:17:38, 3.35it/s] 63%|██████▎ | 235393/371472 [8:07:42<10:53:23, 3.47it/s] 63%|██████▎ | 235394/371472 [8:07:42<10:51:37, 3.48it/s] 63%|██████▎ | 235395/371472 [8:07:42<11:10:38, 3.38it/s] 63%|██████▎ | 235396/371472 [8:07:43<11:05:33, 3.41it/s] 63%|██████▎ | 235397/371472 [8:07:43<10:56:44, 3.45it/s] 63%|██████▎ | 235398/371472 [8:07:43<10:52:39, 3.47it/s] 63%|██████▎ | 235399/371472 [8:07:44<11:30:33, 3.28it/s] 63%|██████▎ | 235400/371472 [8:07:44<11:24:12, 3.31it/s] {'loss': 2.6348, 'learning_rate': 4.298519683682045e-07, 'epoch': 10.14} + 63%|██████▎ | 235400/371472 [8:07:44<11:24:12, 3.31it/s] 63%|██████▎ | 235401/371472 [8:07:44<11:17:20, 3.35it/s] 63%|██████▎ | 235402/371472 [8:07:44<11:00:43, 3.43it/s] 63%|██████▎ | 235403/371472 [8:07:45<10:46:58, 3.51it/s] 63%|██████▎ | 235404/371472 [8:07:45<10:40:42, 3.54it/s] 63%|██████▎ | 235405/371472 [8:07:45<10:42:08, 3.53it/s] 63%|██████▎ | 235406/371472 [8:07:46<10:28:43, 3.61it/s] 63%|███��██▎ | 235407/371472 [8:07:46<10:29:27, 3.60it/s] 63%|██████▎ | 235408/371472 [8:07:46<10:29:24, 3.60it/s] 63%|██████▎ | 235409/371472 [8:07:46<11:34:42, 3.26it/s] 63%|██████▎ | 235410/371472 [8:07:47<11:12:04, 3.37it/s] 63%|██████▎ | 235411/371472 [8:07:47<11:01:50, 3.43it/s] 63%|██████▎ | 235412/371472 [8:07:47<11:01:16, 3.43it/s] 63%|██████▎ | 235413/371472 [8:07:48<11:23:30, 3.32it/s] 63%|██████▎ | 235414/371472 [8:07:48<11:01:34, 3.43it/s] 63%|██████▎ | 235415/371472 [8:07:48<10:42:26, 3.53it/s] 63%|██████▎ | 235416/371472 [8:07:48<10:34:28, 3.57it/s] 63%|██████▎ | 235417/371472 [8:07:49<10:43:09, 3.53it/s] 63%|██████▎ | 235418/371472 [8:07:49<10:53:05, 3.47it/s] 63%|██████▎ | 235419/371472 [8:07:49<10:55:35, 3.46it/s] 63%|██████▎ | 235420/371472 [8:07:50<11:18:03, 3.34it/s] {'loss': 2.6986, 'learning_rate': 4.298034863927255e-07, 'epoch': 10.14} + 63%|██████▎ | 235420/371472 [8:07:50<11:18:03, 3.34it/s] 63%|██████▎ | 235421/371472 [8:07:50<11:18:22, 3.34it/s] 63%|██████▎ | 235422/371472 [8:07:50<12:03:03, 3.14it/s] 63%|██████▎ | 235423/371472 [8:07:51<12:47:45, 2.95it/s] 63%|██████▎ | 235424/371472 [8:07:51<12:07:19, 3.12it/s] 63%|██████▎ | 235425/371472 [8:07:51<11:38:23, 3.25it/s] 63%|██████▎ | 235426/371472 [8:07:52<11:20:30, 3.33it/s] 63%|██████▎ | 235427/371472 [8:07:52<11:05:28, 3.41it/s] 63%|██████▎ | 235428/371472 [8:07:52<11:00:52, 3.43it/s] 63%|██████▎ | 235429/371472 [8:07:52<11:34:30, 3.26it/s] 63%|██████▎ | 235430/371472 [8:07:53<12:06:16, 3.12it/s] 63%|██████▎ | 235431/371472 [8:07:53<11:55:22, 3.17it/s] 63%|██████▎ | 235432/371472 [8:07:53<11:34:00, 3.27it/s] 63%|██████▎ | 235433/371472 [8:07:54<11:36:13, 3.26it/s] 63%|██████▎ | 235434/371472 [8:07:54<11:17:46, 3.35it/s] 63%|██████▎ | 235435/371472 [8:07:54<11:04:47, 3.41it/s] 63%|██████▎ | 235436/371472 [8:07:55<11:04:02, 3.41it/s] 63%|██████▎ | 235437/371472 [8:07:55<11:11:05, 3.38it/s] 63%|██████▎ | 235438/371472 [8:07:55<10:46:21, 3.51it/s] 63%|██████▎ | 235439/371472 [8:07:55<10:47:41, 3.50it/s] 63%|██████▎ | 235440/371472 [8:07:56<10:33:55, 3.58it/s] {'loss': 2.7733, 'learning_rate': 4.2975500441724667e-07, 'epoch': 10.14} + 63%|██████▎ | 235440/371472 [8:07:56<10:33:55, 3.58it/s] 63%|██████▎ | 235441/371472 [8:07:56<10:42:23, 3.53it/s] 63%|██████▎ | 235442/371472 [8:07:56<10:44:09, 3.52it/s] 63%|██████▎ | 235443/371472 [8:07:57<11:15:36, 3.36it/s] 63%|██████▎ | 235444/371472 [8:07:57<11:53:00, 3.18it/s] 63%|██████▎ | 235445/371472 [8:07:57<11:21:15, 3.33it/s] 63%|██████▎ | 235446/371472 [8:07:58<11:27:00, 3.30it/s] 63%|██████▎ | 235447/371472 [8:07:58<11:14:36, 3.36it/s] 63%|██████▎ | 235448/371472 [8:07:58<10:59:57, 3.44it/s] 63%|██████▎ | 235449/371472 [8:07:58<10:44:12, 3.52it/s] 63%|██████▎ | 235450/371472 [8:07:59<10:43:47, 3.52it/s] 63%|██████▎ | 235451/371472 [8:07:59<10:37:01, 3.56it/s] 63%|██████▎ | 235452/371472 [8:07:59<10:23:51, 3.63it/s] 63%|██████▎ | 235453/371472 [8:07:59<10:19:10, 3.66it/s] 63%|██████▎ | 235454/371472 [8:08:00<10:04:21, 3.75it/s] 63%|██████▎ | 235455/371472 [8:08:00<10:04:23, 3.75it/s] 63%|██████▎ | 235456/371472 [8:08:00<10:19:31, 3.66it/s] 63%|██████▎ | 235457/371472 [8:08:01<10:47:38, 3.50it/s] 63%|██████▎ | 235458/371472 [8:08:01<10:41:18, 3.53it/s] 63%|██████▎ | 235459/371472 [8:08:01<10:46:30, 3.51it/s] 63%|██████▎ | 235460/371472 [8:08:01<10:41:15, 3.54it/s] {'loss': 2.7101, 'learning_rate': 4.2970652244176774e-07, 'epoch': 10.14} + 63%|██████▎ | 235460/371472 [8:08:01<10:41:15, 3.54it/s] 63%|██████▎ | 235461/371472 [8:08:02<11:51:53, 3.18it/s] 63%|██████▎ | 235462/371472 [8:08:02<11:14:13, 3.36it/s] 63%|██████▎ | 235463/371472 [8:08:02<11:12:43, 3.37it/s] 63%|██████▎ | 235464/371472 [8:08:03<11:24:17, 3.31it/s] 63%|██████▎ | 235465/371472 [8:08:03<11:17:10, 3.35it/s] 63%|██████▎ | 235466/371472 [8:08:03<11:25:48, 3.31it/s] 63%|██████▎ | 235467/371472 [8:08:04<11:18:17, 3.34it/s] 63%|██████▎ | 235468/371472 [8:08:04<10:56:08, 3.45it/s] 63%|██████▎ | 235469/371472 [8:08:04<10:51:22, 3.48it/s] 63%|██████▎ | 235470/371472 [8:08:04<10:54:01, 3.47it/s] 63%|██████▎ | 235471/371472 [8:08:05<10:32:26, 3.58it/s] 63%|██████▎ | 235472/371472 [8:08:05<10:35:02, 3.57it/s] 63%|██████▎ | 235473/371472 [8:08:05<11:42:31, 3.23it/s] 63%|██████▎ | 235474/371472 [8:08:06<11:30:37, 3.28it/s] 63%|██████▎ | 235475/371472 [8:08:06<11:16:55, 3.35it/s] 63%|██████▎ | 235476/371472 [8:08:06<11:14:54, 3.36it/s] 63%|██████▎ | 235477/371472 [8:08:07<11:54:01, 3.17it/s] 63%|██████▎ | 235478/371472 [8:08:07<12:28:00, 3.03it/s] 63%|██████▎ | 235479/371472 [8:08:07<12:30:32, 3.02it/s] 63%|██████▎ | 235480/371472 [8:08:08<12:19:55, 3.06it/s] {'loss': 2.7993, 'learning_rate': 4.2965804046628887e-07, 'epoch': 10.14} + 63%|██████▎ | 235480/371472 [8:08:08<12:19:55, 3.06it/s] 63%|██████▎ | 235481/371472 [8:08:08<12:16:47, 3.08it/s] 63%|██████▎ | 235482/371472 [8:08:08<11:49:47, 3.19it/s] 63%|██████▎ | 235483/371472 [8:08:08<11:43:30, 3.22it/s] 63%|██████▎ | 235484/371472 [8:08:09<11:11:17, 3.38it/s] 63%|██████▎ | 235485/371472 [8:08:09<10:53:06, 3.47it/s] 63%|██████▎ | 235486/371472 [8:08:09<10:38:49, 3.55it/s] 63%|██████▎ | 235487/371472 [8:08:10<10:46:10, 3.51it/s] 63%|██████▎ | 235488/371472 [8:08:10<10:27:37, 3.61it/s] 63%|██████▎ | 235489/371472 [8:08:10<10:41:14, 3.53it/s] 63%|██████▎ | 235490/371472 [8:08:10<11:08:54, 3.39it/s] 63%|██████▎ | 235491/371472 [8:08:11<11:04:15, 3.41it/s] 63%|██████▎ | 235492/371472 [8:08:11<11:05:38, 3.40it/s] 63%|██████▎ | 235493/371472 [8:08:11<10:55:03, 3.46it/s] 63%|██████▎ | 235494/371472 [8:08:12<10:52:04, 3.48it/s] 63%|██████▎ | 235495/371472 [8:08:12<10:42:14, 3.53it/s] 63%|██████▎ | 235496/371472 [8:08:12<11:00:58, 3.43it/s] 63%|██████▎ | 235497/371472 [8:08:13<12:09:04, 3.11it/s] 63%|██████▎ | 235498/371472 [8:08:13<11:23:03, 3.32it/s] 63%|██████▎ | 235499/371472 [8:08:13<11:05:30, 3.41it/s] 63%|██████▎ | 235500/371472 [8:08:13<10:35:38, 3.57it/s] {'loss': 2.6913, 'learning_rate': 4.2960955849080994e-07, 'epoch': 10.14} + 63%|██████▎ | 235500/371472 [8:08:13<10:35:38, 3.57it/s] 63%|██████▎ | 235501/371472 [8:08:14<10:33:21, 3.58it/s] 63%|██████▎ | 235502/371472 [8:08:14<10:54:01, 3.46it/s] 63%|██████▎ | 235503/371472 [8:08:14<10:35:50, 3.56it/s] 63%|██████▎ | 235504/371472 [8:08:14<10:41:17, 3.53it/s] 63%|██████▎ | 235505/371472 [8:08:15<10:40:56, 3.54it/s] 63%|██████▎ | 235506/371472 [8:08:15<10:22:36, 3.64it/s] 63%|██████▎ | 235507/371472 [8:08:15<10:47:54, 3.50it/s] 63%|██████▎ | 235508/371472 [8:08:16<11:16:38, 3.35it/s] 63%|██████▎ | 235509/371472 [8:08:16<11:38:09, 3.25it/s] 63%|██████▎ | 235510/371472 [8:08:16<11:12:18, 3.37it/s] 63%|██████▎ | 235511/371472 [8:08:17<11:43:34, 3.22it/s] 63%|██████▎ | 235512/371472 [8:08:17<11:23:04, 3.32it/s] 63%|██████▎ | 235513/371472 [8:08:17<11:06:23, 3.40it/s] 63%|██████▎ | 235514/371472 [8:08:17<11:09:00, 3.39it/s] 63%|██████▎ | 235515/371472 [8:08:18<11:08:39, 3.39it/s] 63%|██████▎ | 235516/371472 [8:08:18<11:16:58, 3.35it/s] 63%|██████▎ | 235517/371472 [8:08:18<10:52:17, 3.47it/s] 63%|██████▎ | 235518/371472 [8:08:19<11:26:36, 3.30it/s] 63%|██████▎ | 235519/371472 [8:08:19<10:54:59, 3.46it/s] 63%|██████▎ | 235520/371472 [8:08:19<10:47:59, 3.50it/s] {'loss': 2.7281, 'learning_rate': 4.295610765153311e-07, 'epoch': 10.14} + 63%|██████▎ | 235520/371472 [8:08:19<10:47:59, 3.50it/s] 63%|██████▎ | 235521/371472 [8:08:19<10:32:56, 3.58it/s] 63%|██████▎ | 235522/371472 [8:08:20<10:21:16, 3.65it/s] 63%|██████▎ | 235523/371472 [8:08:20<10:17:25, 3.67it/s] 63%|██████▎ | 235524/371472 [8:08:20<10:46:35, 3.50it/s] 63%|██████▎ | 235525/371472 [8:08:21<10:37:44, 3.55it/s] 63%|██████▎ | 235526/371472 [8:08:21<10:22:12, 3.64it/s] 63%|██████▎ | 235527/371472 [8:08:21<10:28:48, 3.60it/s] 63%|██████▎ | 235528/371472 [8:08:21<11:06:35, 3.40it/s] 63%|██████▎ | 235529/371472 [8:08:22<10:56:25, 3.45it/s] 63%|██████▎ | 235530/371472 [8:08:22<10:51:24, 3.48it/s] 63%|██████▎ | 235531/371472 [8:08:22<10:44:49, 3.51it/s] 63%|██████▎ | 235532/371472 [8:08:23<10:57:41, 3.44it/s] 63%|██████▎ | 235533/371472 [8:08:23<10:55:49, 3.45it/s] 63%|██████▎ | 235534/371472 [8:08:23<11:44:01, 3.22it/s] 63%|██████▎ | 235535/371472 [8:08:23<11:16:10, 3.35it/s] 63%|██████▎ | 235536/371472 [8:08:24<10:52:21, 3.47it/s] 63%|██████▎ | 235537/371472 [8:08:24<10:42:17, 3.53it/s] 63%|██████▎ | 235538/371472 [8:08:24<10:30:19, 3.59it/s] 63%|██████▎ | 235539/371472 [8:08:25<10:28:05, 3.61it/s] 63%|██████▎ | 235540/371472 [8:08:25<10:25:21, 3.62it/s] {'loss': 2.717, 'learning_rate': 4.2951259453985213e-07, 'epoch': 10.15} + 63%|██████▎ | 235540/371472 [8:08:25<10:25:21, 3.62it/s] 63%|██████▎ | 235541/371472 [8:08:25<10:42:12, 3.53it/s] 63%|██████▎ | 235542/371472 [8:08:25<11:04:22, 3.41it/s] 63%|██████▎ | 235543/371472 [8:08:26<10:50:03, 3.49it/s] 63%|██████▎ | 235544/371472 [8:08:26<10:41:11, 3.53it/s] 63%|██████▎ | 235545/371472 [8:08:26<10:57:37, 3.44it/s] 63%|██████▎ | 235546/371472 [8:08:27<11:05:47, 3.40it/s] 63%|██████▎ | 235547/371472 [8:08:27<11:03:03, 3.42it/s] 63%|██████▎ | 235548/371472 [8:08:27<11:25:05, 3.31it/s] 63%|██████▎ | 235549/371472 [8:08:28<11:38:55, 3.24it/s] 63%|██████▎ | 235550/371472 [8:08:28<11:17:30, 3.34it/s] 63%|██████▎ | 235551/371472 [8:08:28<11:14:36, 3.36it/s] 63%|██████▎ | 235552/371472 [8:08:28<10:56:43, 3.45it/s] 63%|██████▎ | 235553/371472 [8:08:29<10:47:36, 3.50it/s] 63%|██████▎ | 235554/371472 [8:08:29<10:41:39, 3.53it/s] 63%|██████▎ | 235555/371472 [8:08:29<10:49:02, 3.49it/s] 63%|██████▎ | 235556/371472 [8:08:30<11:39:53, 3.24it/s] 63%|██████▎ | 235557/371472 [8:08:30<11:37:22, 3.25it/s] 63%|██████▎ | 235558/371472 [8:08:30<11:32:20, 3.27it/s] 63%|██████▎ | 235559/371472 [8:08:30<11:02:47, 3.42it/s] 63%|██████▎ | 235560/371472 [8:08:31<10:50:46, 3.48it/s] {'loss': 2.6913, 'learning_rate': 4.294641125643733e-07, 'epoch': 10.15} + 63%|██████▎ | 235560/371472 [8:08:31<10:50:46, 3.48it/s] 63%|██████▎ | 235561/371472 [8:08:31<10:32:59, 3.58it/s] 63%|██████▎ | 235562/371472 [8:08:31<10:22:10, 3.64it/s] 63%|██████▎ | 235563/371472 [8:08:32<10:12:10, 3.70it/s] 63%|██████▎ | 235564/371472 [8:08:32<10:00:15, 3.77it/s] 63%|██████▎ | 235565/371472 [8:08:32<10:10:01, 3.71it/s] 63%|██████▎ | 235566/371472 [8:08:32<10:13:00, 3.70it/s] 63%|██████▎ | 235567/371472 [8:08:33<10:40:47, 3.53it/s] 63%|██████▎ | 235568/371472 [8:08:33<11:27:09, 3.30it/s] 63%|██████▎ | 235569/371472 [8:08:33<12:07:16, 3.11it/s] 63%|██████▎ | 235570/371472 [8:08:34<12:09:19, 3.11it/s] 63%|██████▎ | 235571/371472 [8:08:34<11:48:43, 3.20it/s] 63%|██████▎ | 235572/371472 [8:08:34<11:30:18, 3.28it/s] 63%|██████▎ | 235573/371472 [8:08:35<11:22:08, 3.32it/s] 63%|██████▎ | 235574/371472 [8:08:35<11:15:30, 3.35it/s] 63%|██████▎ | 235575/371472 [8:08:35<11:14:17, 3.36it/s] 63%|██████▎ | 235576/371472 [8:08:35<11:19:14, 3.33it/s] 63%|██████▎ | 235577/371472 [8:08:36<11:05:51, 3.40it/s] 63%|██████▎ | 235578/371472 [8:08:36<11:02:35, 3.42it/s] 63%|██████▎ | 235579/371472 [8:08:36<11:42:57, 3.22it/s] 63%|██████▎ | 235580/371472 [8:08:37<11:28:52, 3.29it/s] {'loss': 2.7638, 'learning_rate': 4.294156305888944e-07, 'epoch': 10.15} + 63%|██████▎ | 235580/371472 [8:08:37<11:28:52, 3.29it/s] 63%|██████▎ | 235581/371472 [8:08:37<11:17:48, 3.34it/s] 63%|██████▎ | 235582/371472 [8:08:37<11:10:53, 3.38it/s] 63%|██████▎ | 235583/371472 [8:08:38<11:25:54, 3.30it/s] 63%|██████▎ | 235584/371472 [8:08:38<11:05:10, 3.40it/s] 63%|██████▎ | 235585/371472 [8:08:38<10:44:13, 3.52it/s] 63%|██████▎ | 235586/371472 [8:08:38<10:39:06, 3.54it/s] 63%|██████▎ | 235587/371472 [8:08:39<10:47:10, 3.50it/s] 63%|██████▎ | 235588/371472 [8:08:39<11:12:24, 3.37it/s] 63%|██████▎ | 235589/371472 [8:08:39<11:22:16, 3.32it/s] 63%|██████▎ | 235590/371472 [8:08:40<10:53:42, 3.46it/s] 63%|██████▎ | 235591/371472 [8:08:40<10:41:54, 3.53it/s] 63%|██████▎ | 235592/371472 [8:08:40<10:38:54, 3.54it/s] 63%|██████▎ | 235593/371472 [8:08:40<10:33:20, 3.58it/s] 63%|██████▎ | 235594/371472 [8:08:41<10:57:14, 3.45it/s] 63%|██████▎ | 235595/371472 [8:08:41<11:27:37, 3.29it/s] 63%|██████▎ | 235596/371472 [8:08:41<11:52:45, 3.18it/s] 63%|██████▎ | 235597/371472 [8:08:42<11:32:28, 3.27it/s] 63%|██████▎ | 235598/371472 [8:08:42<11:29:14, 3.29it/s] 63%|██████▎ | 235599/371472 [8:08:42<11:22:30, 3.32it/s] 63%|██████▎ | 235600/371472 [8:08:43<11:16:41, 3.35it/s] {'loss': 2.7439, 'learning_rate': 4.293671486134155e-07, 'epoch': 10.15} + 63%|██████▎ | 235600/371472 [8:08:43<11:16:41, 3.35it/s] 63%|██████▎ | 235601/371472 [8:08:43<11:26:48, 3.30it/s] 63%|██████▎ | 235602/371472 [8:08:43<11:17:38, 3.34it/s] 63%|██████▎ | 235603/371472 [8:08:43<10:53:44, 3.46it/s] 63%|██████▎ | 235604/371472 [8:08:44<10:58:21, 3.44it/s] 63%|██████▎ | 235605/371472 [8:08:44<11:08:39, 3.39it/s] 63%|██████▎ | 235606/371472 [8:08:44<10:45:52, 3.51it/s] 63%|██████▎ | 235607/371472 [8:08:45<10:27:24, 3.61it/s] 63%|██████▎ | 235608/371472 [8:08:45<10:37:36, 3.55it/s] 63%|██████▎ | 235609/371472 [8:08:45<10:48:15, 3.49it/s] 63%|██████▎ | 235610/371472 [8:08:45<10:46:49, 3.50it/s] 63%|██████▎ | 235611/371472 [8:08:46<10:29:27, 3.60it/s] 63%|██████▎ | 235612/371472 [8:08:46<10:51:49, 3.47it/s] 63%|██████▎ | 235613/371472 [8:08:46<10:22:58, 3.63it/s] 63%|██████▎ | 235614/371472 [8:08:46<10:08:12, 3.72it/s] 63%|██████▎ | 235615/371472 [8:08:47<10:33:12, 3.58it/s] 63%|██████▎ | 235616/371472 [8:08:47<11:07:17, 3.39it/s] 63%|██████▎ | 235617/371472 [8:08:47<11:03:19, 3.41it/s] 63%|██████▎ | 235618/371472 [8:08:48<10:56:52, 3.45it/s] 63%|██████▎ | 235619/371472 [8:08:48<11:11:26, 3.37it/s] 63%|██████▎ | 235620/371472 [8:08:48<10:53:01, 3.47it/s] {'loss': 2.7637, 'learning_rate': 4.293186666379366e-07, 'epoch': 10.15} + 63%|██████▎ | 235620/371472 [8:08:48<10:53:01, 3.47it/s] 63%|██████▎ | 235621/371472 [8:08:49<11:32:55, 3.27it/s] 63%|██████▎ | 235622/371472 [8:08:49<11:28:04, 3.29it/s] 63%|██████▎ | 235623/371472 [8:08:49<10:52:31, 3.47it/s] 63%|██████▎ | 235624/371472 [8:08:50<11:55:54, 3.16it/s] 63%|██████▎ | 235625/371472 [8:08:50<11:18:58, 3.33it/s] 63%|██████▎ | 235626/371472 [8:08:50<11:38:34, 3.24it/s] 63%|██████▎ | 235627/371472 [8:08:50<11:48:06, 3.20it/s] 63%|██████▎ | 235628/371472 [8:08:51<11:27:48, 3.29it/s] 63%|██████▎ | 235629/371472 [8:08:51<10:50:00, 3.48it/s] 63%|██████▎ | 235630/371472 [8:08:51<10:32:56, 3.58it/s] 63%|██████▎ | 235631/371472 [8:08:52<10:26:50, 3.61it/s] 63%|██████▎ | 235632/371472 [8:08:52<10:41:17, 3.53it/s] 63%|██████▎ | 235633/371472 [8:08:52<10:40:05, 3.54it/s] 63%|██████▎ | 235634/371472 [8:08:52<10:29:29, 3.60it/s] 63%|██████▎ | 235635/371472 [8:08:53<10:42:23, 3.52it/s] 63%|██████▎ | 235636/371472 [8:08:53<10:39:09, 3.54it/s] 63%|██████▎ | 235637/371472 [8:08:53<11:05:06, 3.40it/s] 63%|██████▎ | 235638/371472 [8:08:54<12:39:56, 2.98it/s] 63%|██████▎ | 235639/371472 [8:08:54<12:57:10, 2.91it/s] 63%|██████▎ | 235640/371472 [8:08:54<13:17:21, 2.84it/s] {'loss': 2.73, 'learning_rate': 4.2927018466245765e-07, 'epoch': 10.15} + 63%|██████▎ | 235640/371472 [8:08:54<13:17:21, 2.84it/s] 63%|██████▎ | 235641/371472 [8:08:55<13:02:14, 2.89it/s] 63%|██████▎ | 235642/371472 [8:08:55<12:20:40, 3.06it/s] 63%|██████▎ | 235643/371472 [8:08:55<11:45:07, 3.21it/s] 63%|██████▎ | 235644/371472 [8:08:56<11:38:41, 3.24it/s] 63%|██████▎ | 235645/371472 [8:08:56<11:36:22, 3.25it/s] 63%|██████▎ | 235646/371472 [8:08:56<11:41:08, 3.23it/s] 63%|██████▎ | 235647/371472 [8:08:57<11:28:10, 3.29it/s] 63%|██████▎ | 235648/371472 [8:08:57<11:12:19, 3.37it/s] 63%|██████▎ | 235649/371472 [8:08:57<11:14:08, 3.36it/s] 63%|██████▎ | 235650/371472 [8:08:57<11:17:16, 3.34it/s] 63%|██████▎ | 235651/371472 [8:08:58<10:58:20, 3.44it/s] 63%|██████▎ | 235652/371472 [8:08:58<10:57:54, 3.44it/s] 63%|██████▎ | 235653/371472 [8:08:58<10:43:15, 3.52it/s] 63%|██████▎ | 235654/371472 [8:08:59<11:00:52, 3.43it/s] 63%|██████▎ | 235655/371472 [8:08:59<11:39:33, 3.24it/s] 63%|██████▎ | 235656/371472 [8:08:59<11:23:43, 3.31it/s] 63%|██████▎ | 235657/371472 [8:09:00<12:03:52, 3.13it/s] 63%|██████▎ | 235658/371472 [8:09:00<11:50:41, 3.19it/s] 63%|██████▎ | 235659/371472 [8:09:00<11:32:28, 3.27it/s] 63%|██████▎ | 235660/371472 [8:09:00<11:22:34, 3.32it/s] {'loss': 2.6166, 'learning_rate': 4.2922170268697883e-07, 'epoch': 10.15} + 63%|██████▎ | 235660/371472 [8:09:00<11:22:34, 3.32it/s] 63%|██████▎ | 235661/371472 [8:09:01<11:20:10, 3.33it/s] 63%|██████▎ | 235662/371472 [8:09:01<11:20:45, 3.32it/s] 63%|██████▎ | 235663/371472 [8:09:01<11:01:03, 3.42it/s] 63%|██████▎ | 235664/371472 [8:09:02<11:48:41, 3.19it/s] 63%|██████▎ | 235665/371472 [8:09:02<11:26:02, 3.30it/s] 63%|██████▎ | 235666/371472 [8:09:02<11:14:59, 3.35it/s] 63%|██████▎ | 235667/371472 [8:09:03<11:00:32, 3.43it/s] 63%|██████▎ | 235668/371472 [8:09:03<11:03:15, 3.41it/s] 63%|██████▎ | 235669/371472 [8:09:03<10:46:58, 3.50it/s] 63%|██████▎ | 235670/371472 [8:09:03<10:32:43, 3.58it/s] 63%|██████▎ | 235671/371472 [8:09:04<12:29:10, 3.02it/s] 63%|██████▎ | 235672/371472 [8:09:04<12:04:40, 3.12it/s] 63%|██████▎ | 235673/371472 [8:09:04<12:21:59, 3.05it/s] 63%|██████▎ | 235674/371472 [8:09:05<12:14:09, 3.08it/s] 63%|██████▎ | 235675/371472 [8:09:05<11:41:56, 3.22it/s] 63%|██████▎ | 235676/371472 [8:09:05<12:14:01, 3.08it/s] 63%|██████▎ | 235677/371472 [8:09:06<11:55:22, 3.16it/s] 63%|██████▎ | 235678/371472 [8:09:06<11:54:54, 3.17it/s] 63%|██████▎ | 235679/371472 [8:09:06<12:15:02, 3.08it/s] 63%|██████▎ | 235680/371472 [8:09:07<11:34:09, 3.26it/s] {'loss': 2.8379, 'learning_rate': 4.2917322071149985e-07, 'epoch': 10.15} + 63%|██████▎ | 235680/371472 [8:09:07<11:34:09, 3.26it/s] 63%|██████▎ | 235681/371472 [8:09:07<11:43:23, 3.22it/s] 63%|██████▎ | 235682/371472 [8:09:07<12:08:44, 3.11it/s] 63%|██████▎ | 235683/371472 [8:09:08<12:29:25, 3.02it/s] 63%|██████▎ | 235684/371472 [8:09:08<11:53:45, 3.17it/s] 63%|██████▎ | 235685/371472 [8:09:08<11:43:09, 3.22it/s] 63%|██████▎ | 235686/371472 [8:09:09<11:48:05, 3.20it/s] 63%|██████▎ | 235687/371472 [8:09:09<11:35:08, 3.26it/s] 63%|██████▎ | 235688/371472 [8:09:09<11:17:47, 3.34it/s] 63%|██████▎ | 235689/371472 [8:09:09<11:04:44, 3.40it/s] 63%|██████▎ | 235690/371472 [8:09:10<10:52:04, 3.47it/s] 63%|██████▎ | 235691/371472 [8:09:10<12:03:24, 3.13it/s] 63%|██████▎ | 235692/371472 [8:09:10<12:51:53, 2.93it/s] 63%|██████▎ | 235693/371472 [8:09:11<13:31:02, 2.79it/s] 63%|██████▎ | 235694/371472 [8:09:11<12:37:46, 2.99it/s] 63%|██████▎ | 235695/371472 [8:09:11<11:55:25, 3.16it/s] 63%|██████▎ | 235696/371472 [8:09:12<11:32:45, 3.27it/s] 63%|██████▎ | 235697/371472 [8:09:12<11:18:51, 3.33it/s] 63%|██████▎ | 235698/371472 [8:09:12<11:24:22, 3.31it/s] 63%|██████▎ | 235699/371472 [8:09:13<11:23:28, 3.31it/s] 63%|██████▎ | 235700/371472 [8:09:13<10:53:16, 3.46it/s] {'loss': 2.7107, 'learning_rate': 4.29124738736021e-07, 'epoch': 10.15} + 63%|██████▎ | 235700/371472 [8:09:13<10:53:16, 3.46it/s] 63%|██████▎ | 235701/371472 [8:09:13<10:39:00, 3.54it/s] 63%|██████▎ | 235702/371472 [8:09:13<10:43:12, 3.52it/s] 63%|██████▎ | 235703/371472 [8:09:14<11:55:28, 3.16it/s] 63%|██████▎ | 235704/371472 [8:09:14<11:35:26, 3.25it/s] 63%|██████▎ | 235705/371472 [8:09:14<12:26:55, 3.03it/s] 63%|█████��▎ | 235706/371472 [8:09:15<12:04:57, 3.12it/s] 63%|██████▎ | 235707/371472 [8:09:15<11:43:09, 3.22it/s] 63%|██████▎ | 235708/371472 [8:09:15<11:26:38, 3.30it/s] 63%|██████▎ | 235709/371472 [8:09:16<11:39:22, 3.24it/s] 63%|██████▎ | 235710/371472 [8:09:16<11:12:27, 3.36it/s] 63%|██████▎ | 235711/371472 [8:09:16<11:57:03, 3.16it/s] 63%|██████▎ | 235712/371472 [8:09:17<11:52:28, 3.18it/s] 63%|██████▎ | 235713/371472 [8:09:17<11:17:38, 3.34it/s] 63%|██████▎ | 235714/371472 [8:09:17<11:30:58, 3.27it/s] 63%|██████▎ | 235715/371472 [8:09:17<11:30:16, 3.28it/s] 63%|██████▎ | 235716/371472 [8:09:18<11:22:05, 3.32it/s] 63%|██████▎ | 235717/371472 [8:09:18<11:03:22, 3.41it/s] 63%|██████▎ | 235718/371472 [8:09:18<11:41:55, 3.22it/s] 63%|██████▎ | 235719/371472 [8:09:19<11:29:45, 3.28it/s] 63%|██████▎ | 235720/371472 [8:09:19<11:06:30, 3.39it/s] {'loss': 2.5572, 'learning_rate': 4.290762567605421e-07, 'epoch': 10.15} + 63%|██████▎ | 235720/371472 [8:09:19<11:06:30, 3.39it/s] 63%|██████▎ | 235721/371472 [8:09:19<11:19:17, 3.33it/s] 63%|██████▎ | 235722/371472 [8:09:20<10:58:02, 3.44it/s] 63%|██████▎ | 235723/371472 [8:09:20<11:04:23, 3.41it/s] 63%|██████▎ | 235724/371472 [8:09:20<10:46:52, 3.50it/s] 63%|██████▎ | 235725/371472 [8:09:20<11:03:31, 3.41it/s] 63%|██████▎ | 235726/371472 [8:09:21<11:23:42, 3.31it/s] 63%|██████▎ | 235727/371472 [8:09:21<11:09:10, 3.38it/s] 63%|██████▎ | 235728/371472 [8:09:21<11:13:55, 3.36it/s] 63%|██████▎ | 235729/371472 [8:09:22<11:00:49, 3.42it/s] 63%|██████▎ | 235730/371472 [8:09:22<11:06:32, 3.39it/s] 63%|██████▎ | 235731/371472 [8:09:22<10:52:13, 3.47it/s] 63%|██████▎ | 235732/371472 [8:09:22<11:10:38, 3.37it/s] 63%|██████▎ | 235733/371472 [8:09:23<11:14:39, 3.35it/s] 63%|██████▎ | 235734/371472 [8:09:23<11:22:28, 3.31it/s] 63%|██████▎ | 235735/371472 [8:09:23<11:00:45, 3.42it/s] 63%|██████▎ | 235736/371472 [8:09:24<10:44:28, 3.51it/s] 63%|██████▎ | 235737/371472 [8:09:24<10:46:04, 3.50it/s] 63%|██████▎ | 235738/371472 [8:09:24<10:42:09, 3.52it/s] 63%|██████▎ | 235739/371472 [8:09:25<11:22:51, 3.31it/s] 63%|██████▎ | 235740/371472 [8:09:25<11:34:37, 3.26it/s] {'loss': 2.694, 'learning_rate': 4.2902777478506327e-07, 'epoch': 10.15} + 63%|██████▎ | 235740/371472 [8:09:25<11:34:37, 3.26it/s] 63%|██████▎ | 235741/371472 [8:09:25<12:25:22, 3.03it/s] 63%|██████▎ | 235742/371472 [8:09:26<12:16:44, 3.07it/s] 63%|██████▎ | 235743/371472 [8:09:26<12:36:24, 2.99it/s] 63%|██████▎ | 235744/371472 [8:09:26<12:02:44, 3.13it/s] 63%|██████▎ | 235745/371472 [8:09:27<12:10:54, 3.09it/s] 63%|██████▎ | 235746/371472 [8:09:27<11:31:35, 3.27it/s] 63%|██████▎ | 235747/371472 [8:09:27<11:13:06, 3.36it/s] 63%|██████▎ | 235748/371472 [8:09:27<11:01:43, 3.42it/s] 63%|██████▎ | 235749/371472 [8:09:28<11:03:38, 3.41it/s] 63%|██████▎ | 235750/371472 [8:09:28<10:51:14, 3.47it/s] 63%|██████▎ | 235751/371472 [8:09:28<10:47:30, 3.49it/s] 63%|██████▎ | 235752/371472 [8:09:28<10:50:44, 3.48it/s] 63%|██████▎ | 235753/371472 [8:09:29<10:55:42, 3.45it/s] 63%|██████▎ | 235754/371472 [8:09:29<10:58:32, 3.43it/s] 63%|██████▎ | 235755/371472 [8:09:29<11:03:21, 3.41it/s] 63%|██████▎ | 235756/371472 [8:09:30<11:43:08, 3.22it/s] 63%|██████▎ | 235757/371472 [8:09:30<11:44:26, 3.21it/s] 63%|██████▎ | 235758/371472 [8:09:30<11:34:13, 3.26it/s] 63%|██████▎ | 235759/371472 [8:09:31<11:20:06, 3.33it/s] 63%|██████▎ | 235760/371472 [8:09:31<10:55:04, 3.45it/s] {'loss': 2.7585, 'learning_rate': 4.289792928095843e-07, 'epoch': 10.15} + 63%|██████▎ | 235760/371472 [8:09:31<10:55:04, 3.45it/s] 63%|██████▎ | 235761/371472 [8:09:31<10:57:27, 3.44it/s] 63%|██████▎ | 235762/371472 [8:09:32<11:12:30, 3.36it/s] 63%|██████▎ | 235763/371472 [8:09:32<11:01:48, 3.42it/s] 63%|██████▎ | 235764/371472 [8:09:32<11:07:25, 3.39it/s] 63%|██████▎ | 235765/371472 [8:09:32<10:57:09, 3.44it/s] 63%|█���████▎ | 235766/371472 [8:09:33<11:10:52, 3.37it/s] 63%|██████▎ | 235767/371472 [8:09:33<12:06:48, 3.11it/s] 63%|██████▎ | 235768/371472 [8:09:33<11:36:05, 3.25it/s] 63%|██████▎ | 235769/371472 [8:09:34<11:43:34, 3.21it/s] 63%|██████▎ | 235770/371472 [8:09:34<11:34:45, 3.26it/s] 63%|██████▎ | 235771/371472 [8:09:34<11:14:16, 3.35it/s] 63%|██████▎ | 235772/371472 [8:09:35<11:07:20, 3.39it/s] 63%|██████▎ | 235773/371472 [8:09:35<11:22:42, 3.31it/s] 63%|██████▎ | 235774/371472 [8:09:35<12:11:07, 3.09it/s] 63%|██████▎ | 235775/371472 [8:09:35<11:25:13, 3.30it/s] 63%|██████▎ | 235776/371472 [8:09:36<11:26:54, 3.29it/s] 63%|██████▎ | 235777/371472 [8:09:36<11:03:56, 3.41it/s] 63%|██████▎ | 235778/371472 [8:09:36<11:07:33, 3.39it/s] 63%|██████▎ | 235779/371472 [8:09:37<10:58:59, 3.43it/s] 63%|██████▎ | 235780/371472 [8:09:37<10:58:57, 3.43it/s] {'loss': 2.8107, 'learning_rate': 4.2893081083410547e-07, 'epoch': 10.16} + 63%|██████▎ | 235780/371472 [8:09:37<10:58:57, 3.43it/s] 63%|██████▎ | 235781/371472 [8:09:37<11:31:33, 3.27it/s] 63%|██████▎ | 235782/371472 [8:09:38<11:17:03, 3.34it/s] 63%|██████▎ | 235783/371472 [8:09:38<11:15:28, 3.35it/s] 63%|██████▎ | 235784/371472 [8:09:38<11:10:41, 3.37it/s] 63%|██████▎ | 235785/371472 [8:09:38<10:59:00, 3.43it/s] 63%|██████▎ | 235786/371472 [8:09:39<10:58:44, 3.43it/s] 63%|██████▎ | 235787/371472 [8:09:39<11:33:07, 3.26it/s] 63%|██████▎ | 235788/371472 [8:09:39<12:13:36, 3.08it/s] 63%|██████▎ | 235789/371472 [8:09:40<12:02:50, 3.13it/s] 63%|██████▎ | 235790/371472 [8:09:40<11:34:46, 3.25it/s] 63%|██████▎ | 235791/371472 [8:09:40<11:29:30, 3.28it/s] 63%|██████▎ | 235792/371472 [8:09:41<11:47:14, 3.20it/s] 63%|██████▎ | 235793/371472 [8:09:41<11:43:32, 3.21it/s] 63%|██████▎ | 235794/371472 [8:09:41<11:31:29, 3.27it/s] 63%|██████▎ | 235795/371472 [8:09:41<11:14:12, 3.35it/s] 63%|██████▎ | 235796/371472 [8:09:42<10:58:15, 3.44it/s] 63%|██████▎ | 235797/371472 [8:09:42<10:55:04, 3.45it/s] 63%|██████▎ | 235798/371472 [8:09:42<11:02:42, 3.41it/s] 63%|██████▎ | 235799/371472 [8:09:43<11:13:37, 3.36it/s] 63%|██████▎ | 235800/371472 [8:09:43<11:18:05, 3.33it/s] {'loss': 2.6571, 'learning_rate': 4.288823288586265e-07, 'epoch': 10.16} + 63%|██████▎ | 235800/371472 [8:09:43<11:18:05, 3.33it/s] 63%|██████▎ | 235801/371472 [8:09:43<10:58:34, 3.43it/s] 63%|██████▎ | 235802/371472 [8:09:44<10:47:12, 3.49it/s] 63%|██████▎ | 235803/371472 [8:09:44<10:55:05, 3.45it/s] 63%|██████▎ | 235804/371472 [8:09:44<11:21:38, 3.32it/s] 63%|██████▎ | 235805/371472 [8:09:45<12:19:17, 3.06it/s] 63%|██████▎ | 235806/371472 [8:09:45<12:02:56, 3.13it/s] 63%|██████▎ | 235807/371472 [8:09:45<12:03:49, 3.12it/s] 63%|██████▎ | 235808/371472 [8:09:45<11:30:33, 3.27it/s] 63%|██████▎ | 235809/371472 [8:09:46<11:22:22, 3.31it/s] 63%|██████▎ | 235810/371472 [8:09:46<11:08:21, 3.38it/s] 63%|██████▎ | 235811/371472 [8:09:46<11:06:42, 3.39it/s] 63%|██████▎ | 235812/371472 [8:09:47<11:12:28, 3.36it/s] 63%|██████▎ | 235813/371472 [8:09:47<11:00:51, 3.42it/s] 63%|██████▎ | 235814/371472 [8:09:47<11:39:42, 3.23it/s] 63%|██████▎ | 235815/371472 [8:09:48<12:11:14, 3.09it/s] 63%|██████▎ | 235816/371472 [8:09:48<11:49:27, 3.19it/s] 63%|██████▎ | 235817/371472 [8:09:48<11:12:29, 3.36it/s] 63%|██████▎ | 235818/371472 [8:09:48<11:17:18, 3.34it/s] 63%|██████▎ | 235819/371472 [8:09:49<11:10:23, 3.37it/s] 63%|██████▎ | 235820/371472 [8:09:49<11:02:01, 3.42it/s] {'loss': 2.6744, 'learning_rate': 4.2883384688314766e-07, 'epoch': 10.16} + 63%|██████▎ | 235820/371472 [8:09:49<11:02:01, 3.42it/s] 63%|██████▎ | 235821/371472 [8:09:49<10:55:37, 3.45it/s] 63%|██████▎ | 235822/371472 [8:09:50<10:47:34, 3.49it/s] 63%|██████▎ | 235823/371472 [8:09:50<10:42:41, 3.52it/s] 63%|██████▎ | 235824/371472 [8:09:50<10:35:28, 3.56it/s] 63%|██████▎ | 235825/371472 [8:09:50<11:16:00, 3.34it/s] 63%|██████▎ | 235826/371472 [8:09:51<10:54:28, 3.45it/s] 63%|██████▎ | 235827/371472 [8:09:51<10:51:25, 3.47it/s] 63%|██████▎ | 235828/371472 [8:09:51<10:41:24, 3.52it/s] 63%|██████▎ | 235829/371472 [8:09:52<11:26:00, 3.30it/s] 63%|██████▎ | 235830/371472 [8:09:52<11:08:07, 3.38it/s] 63%|██████▎ | 235831/371472 [8:09:52<11:50:17, 3.18it/s] 63%|██████▎ | 235832/371472 [8:09:53<12:05:45, 3.11it/s] 63%|██████▎ | 235833/371472 [8:09:53<11:53:40, 3.17it/s] 63%|██████▎ | 235834/371472 [8:09:53<11:32:48, 3.26it/s] 63%|██████▎ | 235835/371472 [8:09:54<11:29:53, 3.28it/s] 63%|██████▎ | 235836/371472 [8:09:54<11:06:36, 3.39it/s] 63%|██████▎ | 235837/371472 [8:09:54<11:10:11, 3.37it/s] 63%|██████▎ | 235838/371472 [8:09:54<11:16:36, 3.34it/s] 63%|██████▎ | 235839/371472 [8:09:55<11:06:59, 3.39it/s] 63%|██████▎ | 235840/371472 [8:09:55<10:55:03, 3.45it/s] {'loss': 2.7439, 'learning_rate': 4.2878536490766874e-07, 'epoch': 10.16} + 63%|██████▎ | 235840/371472 [8:09:55<10:55:03, 3.45it/s] 63%|██████▎ | 235841/371472 [8:09:55<11:00:35, 3.42it/s] 63%|██████▎ | 235842/371472 [8:09:56<10:57:52, 3.44it/s] 63%|██████▎ | 235843/371472 [8:09:56<10:52:38, 3.46it/s] 63%|██████▎ | 235844/371472 [8:09:56<11:15:26, 3.35it/s] 63%|██████▎ | 235845/371472 [8:09:56<11:29:24, 3.28it/s] 63%|██████▎ | 235846/371472 [8:09:57<11:30:17, 3.27it/s] 63%|██████▎ | 235847/371472 [8:09:57<11:27:08, 3.29it/s] 63%|██████▎ | 235848/371472 [8:09:57<11:23:36, 3.31it/s] 63%|██████▎ | 235849/371472 [8:09:58<11:15:04, 3.35it/s] 63%|██████▎ | 235850/371472 [8:09:58<10:58:40, 3.43it/s] 63%|██████▎ | 235851/371472 [8:09:58<10:51:05, 3.47it/s] 63%|██████▎ | 235852/371472 [8:09:59<11:01:36, 3.42it/s] 63%|██████▎ | 235853/371472 [8:09:59<10:59:18, 3.43it/s] 63%|██████▎ | 235854/371472 [8:09:59<11:07:54, 3.38it/s] 63%|██████▎ | 235855/371472 [8:09:59<10:55:08, 3.45it/s] 63%|██████▎ | 235856/371472 [8:10:00<11:52:05, 3.17it/s] 63%|██████▎ | 235857/371472 [8:10:00<11:25:04, 3.30it/s] 63%|██████▎ | 235858/371472 [8:10:00<12:30:47, 3.01it/s] 63%|██████▎ | 235859/371472 [8:10:01<12:13:07, 3.08it/s] 63%|██████▎ | 235860/371472 [8:10:01<11:53:28, 3.17it/s] {'loss': 2.6749, 'learning_rate': 4.2873688293218986e-07, 'epoch': 10.16} + 63%|██████▎ | 235860/371472 [8:10:01<11:53:28, 3.17it/s] 63%|██████▎ | 235861/371472 [8:10:01<11:35:49, 3.25it/s] 63%|██████▎ | 235862/371472 [8:10:02<11:07:07, 3.39it/s] 63%|██████▎ | 235863/371472 [8:10:02<11:50:14, 3.18it/s] 63%|██████▎ | 235864/371472 [8:10:02<12:34:19, 3.00it/s] 63%|██████▎ | 235865/371472 [8:10:03<11:58:51, 3.14it/s] 63%|██████▎ | 235866/371472 [8:10:03<11:55:13, 3.16it/s] 63%|██████▎ | 235867/371472 [8:10:03<11:38:37, 3.24it/s] 63%|██████▎ | 235868/371472 [8:10:04<11:26:37, 3.29it/s] 63%|██████▎ | 235869/371472 [8:10:04<11:06:41, 3.39it/s] 63%|██████▎ | 235870/371472 [8:10:04<11:07:23, 3.39it/s] 63%|██████▎ | 235871/371472 [8:10:04<11:17:39, 3.34it/s] 63%|██████▎ | 235872/371472 [8:10:05<11:13:17, 3.36it/s] 63%|██████▎ | 235873/371472 [8:10:05<12:00:18, 3.14it/s] 63%|██████▎ | 235874/371472 [8:10:05<11:52:21, 3.17it/s] 63%|██████▎ | 235875/371472 [8:10:06<11:23:21, 3.31it/s] 63%|██████▎ | 235876/371472 [8:10:06<11:18:22, 3.33it/s] 63%|██████▎ | 235877/371472 [8:10:06<10:56:59, 3.44it/s] 63%|██████▎ | 235878/371472 [8:10:07<11:41:44, 3.22it/s] 63%|██████▎ | 235879/371472 [8:10:07<11:28:59, 3.28it/s] 63%|██████▎ | 235880/371472 [8:10:07<11:41:33, 3.22it/s] {'loss': 2.7292, 'learning_rate': 4.2868840095671093e-07, 'epoch': 10.16} + 63%|██████▎ | 235880/371472 [8:10:07<11:41:33, 3.22it/s] 63%|██████▎ | 235881/371472 [8:10:08<12:04:44, 3.12it/s] 63%|██████▎ | 235882/371472 [8:10:08<11:33:13, 3.26it/s] 63%|██████▎ | 235883/371472 [8:10:08<11:24:34, 3.30it/s] 63%|██████▎ | 235884/371472 [8:10:08<11:03:45, 3.40it/s] 64%|██████▎ | 235885/371472 [8:10:09<11:12:10, 3.36it/s] 64%|██████▎ | 235886/371472 [8:10:09<11:03:22, 3.41it/s] 64%|██████▎ | 235887/371472 [8:10:09<11:08:10, 3.38it/s] 64%|██████▎ | 235888/371472 [8:10:10<10:56:58, 3.44it/s] 64%|██████▎ | 235889/371472 [8:10:10<11:41:12, 3.22it/s] 64%|██████▎ | 235890/371472 [8:10:10<11:23:28, 3.31it/s] 64%|██████▎ | 235891/371472 [8:10:10<11:13:56, 3.35it/s] 64%|██████▎ | 235892/371472 [8:10:11<11:11:11, 3.37it/s] 64%|██████▎ | 235893/371472 [8:10:11<11:04:48, 3.40it/s] 64%|██████▎ | 235894/371472 [8:10:11<10:48:09, 3.49it/s] 64%|██████▎ | 235895/371472 [8:10:12<10:43:25, 3.51it/s] 64%|██████▎ | 235896/371472 [8:10:12<11:24:39, 3.30it/s] 64%|██████▎ | 235897/371472 [8:10:12<11:33:58, 3.26it/s] 64%|██████▎ | 235898/371472 [8:10:13<11:23:22, 3.31it/s] 64%|██████▎ | 235899/371472 [8:10:13<11:14:39, 3.35it/s] 64%|██████▎ | 235900/371472 [8:10:13<11:21:25, 3.32it/s] {'loss': 2.7218, 'learning_rate': 4.286399189812321e-07, 'epoch': 10.16} + 64%|██████▎ | 235900/371472 [8:10:13<11:21:25, 3.32it/s] 64%|██████▎ | 235901/371472 [8:10:13<11:19:29, 3.33it/s] 64%|██████▎ | 235902/371472 [8:10:14<11:45:11, 3.20it/s] 64%|██████▎ | 235903/371472 [8:10:14<11:46:54, 3.20it/s] 64%|██████▎ | 235904/371472 [8:10:14<12:08:39, 3.10it/s] 64%|██████▎ | 235905/371472 [8:10:15<11:35:30, 3.25it/s] 64%|██████▎ | 235906/371472 [8:10:15<11:27:12, 3.29it/s] 64%|██████▎ | 235907/371472 [8:10:15<11:15:46, 3.34it/s] 64%|██████▎ | 235908/371472 [8:10:16<11:21:10, 3.32it/s] 64%|██████▎ | 235909/371472 [8:10:16<11:14:23, 3.35it/s] 64%|██████▎ | 235910/371472 [8:10:16<10:49:43, 3.48it/s] 64%|██████▎ | 235911/371472 [8:10:17<11:57:53, 3.15it/s] 64%|██████▎ | 235912/371472 [8:10:17<11:38:11, 3.24it/s] 64%|██████▎ | 235913/371472 [8:10:17<11:09:06, 3.38it/s] 64%|██████▎ | 235914/371472 [8:10:17<11:13:11, 3.36it/s] 64%|██████▎ | 235915/371472 [8:10:18<11:16:05, 3.34it/s] 64%|██████▎ | 235916/371472 [8:10:18<11:25:48, 3.29it/s] 64%|██████▎ | 235917/371472 [8:10:18<11:11:04, 3.37it/s] 64%|██████▎ | 235918/371472 [8:10:19<11:17:11, 3.34it/s] 64%|██████▎ | 235919/371472 [8:10:19<11:09:54, 3.37it/s] 64%|██████▎ | 235920/371472 [8:10:19<11:38:08, 3.24it/s] {'loss': 2.7372, 'learning_rate': 4.285914370057532e-07, 'epoch': 10.16} + 64%|██████▎ | 235920/371472 [8:10:19<11:38:08, 3.24it/s] 64%|██████▎ | 235921/371472 [8:10:20<11:22:44, 3.31it/s] 64%|██████▎ | 235922/371472 [8:10:20<11:13:35, 3.35it/s] 64%|██████▎ | 235923/371472 [8:10:20<10:58:41, 3.43it/s] 64%|██████▎ | 235924/371472 [8:10:20<11:11:33, 3.36it/s] 64%|██████▎ | 235925/371472 [8:10:21<11:05:45, 3.39it/s] 64%|██████▎ | 235926/371472 [8:10:21<10:45:17, 3.50it/s] 64%|██████▎ | 235927/371472 [8:10:21<10:43:24, 3.51it/s] 64%|██████▎ | 235928/371472 [8:10:22<10:44:39, 3.50it/s] 64%|██████▎ | 235929/371472 [8:10:22<10:39:46, 3.53it/s] 64%|██████▎ | 235930/371472 [8:10:22<10:59:05, 3.43it/s] 64%|██████▎ | 235931/371472 [8:10:22<10:56:10, 3.44it/s] 64%|██████▎ | 235932/371472 [8:10:23<10:47:12, 3.49it/s] 64%|██████▎ | 235933/371472 [8:10:23<10:48:45, 3.48it/s] 64%|██████▎ | 235934/371472 [8:10:23<10:55:52, 3.44it/s] 64%|██████▎ | 235935/371472 [8:10:24<10:41:28, 3.52it/s] 64%|██████▎ | 235936/371472 [8:10:24<10:59:38, 3.42it/s] 64%|██████▎ | 235937/371472 [8:10:24<11:48:54, 3.19it/s] 64%|██████▎ | 235938/371472 [8:10:24<11:32:03, 3.26it/s] 64%|██████▎ | 235939/371472 [8:10:25<11:23:04, 3.31it/s] 64%|██████▎ | 235940/371472 [8:10:25<11:31:58, 3.26it/s] {'loss': 2.7821, 'learning_rate': 4.285429550302743e-07, 'epoch': 10.16} + 64%|██████▎ | 235940/371472 [8:10:25<11:31:58, 3.26it/s] 64%|██████▎ | 235941/371472 [8:10:25<11:07:37, 3.38it/s] 64%|██████▎ | 235942/371472 [8:10:26<11:10:14, 3.37it/s] 64%|██████▎ | 235943/371472 [8:10:26<10:53:09, 3.46it/s] 64%|██████▎ | 235944/371472 [8:10:26<11:09:39, 3.37it/s] 64%|██████▎ | 235945/371472 [8:10:27<11:09:48, 3.37it/s] 64%|██████▎ | 235946/371472 [8:10:27<11:00:15, 3.42it/s] 64%|██████▎ | 235947/371472 [8:10:27<11:01:28, 3.41it/s] 64%|██████▎ | 235948/371472 [8:10:27<11:47:54, 3.19it/s] 64%|██████▎ | 235949/371472 [8:10:28<11:52:16, 3.17it/s] 64%|██████▎ | 235950/371472 [8:10:28<11:57:46, 3.15it/s] 64%|██████▎ | 235951/371472 [8:10:28<12:06:43, 3.11it/s] 64%|██████▎ | 235952/371472 [8:10:29<12:44:08, 2.96it/s] 64%|██████▎ | 235953/371472 [8:10:29<13:19:10, 2.83it/s] 64%|██████▎ | 235954/371472 [8:10:29<12:25:10, 3.03it/s] 64%|██████▎ | 235955/371472 [8:10:30<11:40:56, 3.22it/s] 64%|██████▎ | 235956/371472 [8:10:30<11:18:01, 3.33it/s] 64%|██████▎ | 235957/371472 [8:10:30<11:16:54, 3.34it/s] 64%|██████▎ | 235958/371472 [8:10:31<11:15:24, 3.34it/s] 64%|██████▎ | 235959/371472 [8:10:31<11:26:02, 3.29it/s] 64%|██████▎ | 235960/371472 [8:10:31<11:16:38, 3.34it/s] {'loss': 2.7652, 'learning_rate': 4.284944730547954e-07, 'epoch': 10.16} + 64%|██████▎ | 235960/371472 [8:10:31<11:16:38, 3.34it/s] 64%|██████▎ | 235961/371472 [8:10:31<10:54:14, 3.45it/s] 64%|██████▎ | 235962/371472 [8:10:32<10:39:12, 3.53it/s] 64%|██████▎ | 235963/371472 [8:10:32<10:39:49, 3.53it/s] 64%|██████▎ | 235964/371472 [8:10:32<10:49:44, 3.48it/s] 64%|██████▎ | 235965/371472 [8:10:33<11:20:22, 3.32it/s] 64%|██████▎ | 235966/371472 [8:10:33<11:33:12, 3.26it/s] 64%|██████▎ | 235967/371472 [8:10:33<11:17:00, 3.34it/s] 64%|██████▎ | 235968/371472 [8:10:34<11:13:52, 3.35it/s] 64%|██████▎ | 235969/371472 [8:10:34<11:43:19, 3.21it/s] 64%|██████▎ | 235970/371472 [8:10:34<11:53:15, 3.17it/s] 64%|██████▎ | 235971/371472 [8:10:35<11:28:13, 3.28it/s] 64%|██████▎ | 235972/371472 [8:10:35<11:22:02, 3.31it/s] 64%|██████▎ | 235973/371472 [8:10:35<10:56:19, 3.44it/s] 64%|██████▎ | 235974/371472 [8:10:35<10:50:25, 3.47it/s] 64%|██████▎ | 235975/371472 [8:10:36<11:04:05, 3.40it/s] 64%|██████▎ | 235976/371472 [8:10:36<11:38:21, 3.23it/s] 64%|██████▎ | 235977/371472 [8:10:36<11:23:09, 3.31it/s] 64%|██████▎ | 235978/371472 [8:10:37<12:00:50, 3.13it/s] 64%|██████▎ | 235979/371472 [8:10:37<11:34:48, 3.25it/s] 64%|██████▎ | 235980/371472 [8:10:37<11:45:31, 3.20it/s] {'loss': 2.8439, 'learning_rate': 4.284459910793165e-07, 'epoch': 10.16} + 64%|██████▎ | 235980/371472 [8:10:37<11:45:31, 3.20it/s] 64%|██████▎ | 235981/371472 [8:10:38<11:37:11, 3.24it/s] 64%|██████▎ | 235982/371472 [8:10:38<11:56:42, 3.15it/s] 64%|██████▎ | 235983/371472 [8:10:38<11:33:18, 3.26it/s] 64%|██████▎ | 235984/371472 [8:10:38<11:25:48, 3.29it/s] 64%|██████▎ | 235985/371472 [8:10:39<11:12:13, 3.36it/s] 64%|██████▎ | 235986/371472 [8:10:39<11:23:31, 3.30it/s] 64%|██████▎ | 235987/371472 [8:10:39<11:27:12, 3.29it/s] 64%|██████▎ | 235988/371472 [8:10:40<11:30:49, 3.27it/s] 64%|██████▎ | 235989/371472 [8:10:40<11:16:36, 3.34it/s] 64%|██████▎ | 235990/371472 [8:10:40<11:28:46, 3.28it/s] 64%|██████▎ | 235991/371472 [8:10:41<11:15:33, 3.34it/s] 64%|██████▎ | 235992/371472 [8:10:41<11:00:02, 3.42it/s] 64%|██████▎ | 235993/371472 [8:10:41<11:02:59, 3.41it/s] 64%|██████▎ | 235994/371472 [8:10:41<11:24:58, 3.30it/s] 64%|██████▎ | 235995/371472 [8:10:42<11:07:21, 3.38it/s] 64%|██████▎ | 235996/371472 [8:10:42<10:55:03, 3.45it/s] 64%|██████▎ | 235997/371472 [8:10:42<11:27:13, 3.29it/s] 64%|██████▎ | 235998/371472 [8:10:43<10:59:44, 3.42it/s] 64%|██████▎ | 235999/371472 [8:10:43<12:05:01, 3.11it/s] 64%|██████▎ | 236000/371472 [8:10:43<12:18:22, 3.06it/s] {'loss': 2.6719, 'learning_rate': 4.2839750910383757e-07, 'epoch': 10.16} + 64%|██████▎ | 236000/371472 [8:10:43<12:18:22, 3.06it/s] 64%|██████▎ | 236001/371472 [8:10:44<11:36:58, 3.24it/s] 64%|██████▎ | 236002/371472 [8:10:44<11:20:19, 3.32it/s] 64%|██████▎ | 236003/371472 [8:10:44<11:45:40, 3.20it/s] 64%|██████▎ | 236004/371472 [8:10:45<11:51:55, 3.17it/s] 64%|██████▎ | 236005/371472 [8:10:45<11:36:17, 3.24it/s] 64%|██████▎ | 236006/371472 [8:10:45<11:21:36, 3.31it/s] 64%|██████▎ | 236007/371472 [8:10:45<11:09:12, 3.37it/s] 64%|██████▎ | 236008/371472 [8:10:46<11:03:42, 3.40it/s] 64%|██████▎ | 236009/371472 [8:10:46<11:23:27, 3.30it/s] 64%|██████▎ | 236010/371472 [8:10:46<11:05:59, 3.39it/s] 64%|██████▎ | 236011/371472 [8:10:47<11:03:59, 3.40it/s] 64%|██████▎ | 236012/371472 [8:10:47<11:35:44, 3.24it/s] 64%|██████▎ | 236013/371472 [8:10:47<11:34:23, 3.25it/s] 64%|██████▎ | 236014/371472 [8:10:48<11:29:29, 3.27it/s] 64%|██████▎ | 236015/371472 [8:10:48<11:29:49, 3.27it/s] 64%|██████▎ | 236016/371472 [8:10:48<11:30:17, 3.27it/s] 64%|██████▎ | 236017/371472 [8:10:48<11:02:41, 3.41it/s] 64%|██████▎ | 236018/371472 [8:10:49<11:50:50, 3.18it/s] 64%|██████▎ | 236019/371472 [8:10:49<11:45:13, 3.20it/s] 64%|██████▎ | 236020/371472 [8:10:49<11:26:32, 3.29it/s] {'loss': 2.6264, 'learning_rate': 4.2834902712835875e-07, 'epoch': 10.17} + 64%|██████▎ | 236020/371472 [8:10:49<11:26:32, 3.29it/s] 64%|██████▎ | 236021/371472 [8:10:50<11:38:26, 3.23it/s] 64%|██████▎ | 236022/371472 [8:10:50<11:51:10, 3.17it/s] 64%|██████▎ | 236023/371472 [8:10:50<11:21:04, 3.31it/s] 64%|██████▎ | 236024/371472 [8:10:51<11:14:49, 3.35it/s] 64%|██████▎ | 236025/371472 [8:10:51<11:05:15, 3.39it/s] 64%|██████▎ | 236026/371472 [8:10:51<10:56:05, 3.44it/s] 64%|██████▎ | 236027/371472 [8:10:51<10:37:08, 3.54it/s] 64%|██████▎ | 236028/371472 [8:10:52<11:23:23, 3.30it/s] 64%|██████▎ | 236029/371472 [8:10:52<11:10:03, 3.37it/s] 64%|██████▎ | 236030/371472 [8:10:52<11:22:11, 3.31it/s] 64%|██████▎ | 236031/371472 [8:10:53<11:14:51, 3.34it/s] 64%|██████▎ | 236032/371472 [8:10:53<10:58:46, 3.43it/s] 64%|██████▎ | 236033/371472 [8:10:53<10:50:54, 3.47it/s] 64%|██████▎ | 236034/371472 [8:10:54<10:45:13, 3.50it/s] 64%|██████▎ | 236035/371472 [8:10:54<11:48:29, 3.19it/s] 64%|██████▎ | 236036/371472 [8:10:54<11:34:43, 3.25it/s] 64%|██████▎ | 236037/371472 [8:10:54<11:08:12, 3.38it/s] 64%|██████▎ | 236038/371472 [8:10:55<10:57:06, 3.44it/s] 64%|██████▎ | 236039/371472 [8:10:55<10:42:47, 3.51it/s] 64%|██████▎ | 236040/371472 [8:10:55<10:42:22, 3.51it/s] {'loss': 2.7847, 'learning_rate': 4.283005451528798e-07, 'epoch': 10.17} + 64%|██████▎ | 236040/371472 [8:10:55<10:42:22, 3.51it/s] 64%|██████▎ | 236041/371472 [8:10:56<11:14:33, 3.35it/s] 64%|██████▎ | 236042/371472 [8:10:56<12:04:14, 3.12it/s] 64%|██████▎ | 236043/371472 [8:10:56<11:44:19, 3.20it/s] 64%|██████▎ | 236044/371472 [8:10:57<11:44:57, 3.20it/s] 64%|██████▎ | 236045/371472 [8:10:57<11:54:06, 3.16it/s] 64%|██████▎ | 236046/371472 [8:10:57<12:04:50, 3.11it/s] 64%|██████▎ | 236047/371472 [8:10:58<12:08:34, 3.10it/s] 64%|██████▎ | 236048/371472 [8:10:58<11:49:14, 3.18it/s] 64%|██████▎ | 236049/371472 [8:10:58<11:25:35, 3.29it/s] 64%|██████▎ | 236050/371472 [8:10:59<12:15:01, 3.07it/s] 64%|██████▎ | 236051/371472 [8:10:59<11:46:19, 3.20it/s] 64%|██████▎ | 236052/371472 [8:10:59<11:21:44, 3.31it/s] 64%|██████▎ | 236053/371472 [8:10:59<11:30:39, 3.27it/s] 64%|██████▎ | 236054/371472 [8:11:00<11:52:38, 3.17it/s] 64%|██████▎ | 236055/371472 [8:11:00<11:53:54, 3.16it/s] 64%|██████▎ | 236056/371472 [8:11:00<11:27:57, 3.28it/s] 64%|██████▎ | 236057/371472 [8:11:01<11:09:55, 3.37it/s] 64%|██████▎ | 236058/371472 [8:11:01<11:29:56, 3.27it/s] 64%|██████▎ | 236059/371472 [8:11:01<11:19:33, 3.32it/s] 64%|██████▎ | 236060/371472 [8:11:02<11:47:17, 3.19it/s] {'loss': 2.8013, 'learning_rate': 4.2825206317740095e-07, 'epoch': 10.17} + 64%|██████▎ | 236060/371472 [8:11:02<11:47:17, 3.19it/s] 64%|██████▎ | 236061/371472 [8:11:02<11:59:53, 3.13it/s] 64%|██████▎ | 236062/371472 [8:11:02<11:39:59, 3.22it/s] 64%|██████▎ | 236063/371472 [8:11:02<11:08:55, 3.37it/s] 64%|██████▎ | 236064/371472 [8:11:03<10:48:48, 3.48it/s] 64%|██████▎ | 236065/371472 [8:11:03<10:58:28, 3.43it/s] 64%|██████▎ | 236066/371472 [8:11:03<10:53:58, 3.45it/s] 64%|██████▎ | 236067/371472 [8:11:04<11:12:22, 3.36it/s] 64%|██████▎ | 236068/371472 [8:11:04<10:51:30, 3.46it/s] 64%|██████▎ | 236069/371472 [8:11:04<11:15:29, 3.34it/s] 64%|██████▎ | 236070/371472 [8:11:05<11:39:19, 3.23it/s] 64%|██████▎ | 236071/371472 [8:11:05<11:08:26, 3.38it/s] 64%|██████▎ | 236072/371472 [8:11:05<11:13:40, 3.35it/s] 64%|██████▎ | 236073/371472 [8:11:05<11:17:18, 3.33it/s] 64%|██████▎ | 236074/371472 [8:11:06<12:28:35, 3.01it/s] 64%|██████▎ | 236075/371472 [8:11:06<11:57:25, 3.15it/s] 64%|██████▎ | 236076/371472 [8:11:06<12:00:46, 3.13it/s] 64%|██████▎ | 236077/371472 [8:11:07<11:53:54, 3.16it/s] 64%|██████▎ | 236078/371472 [8:11:07<11:19:52, 3.32it/s] 64%|██████▎ | 236079/371472 [8:11:07<11:09:44, 3.37it/s] 64%|██████▎ | 236080/371472 [8:11:08<11:24:10, 3.30it/s] {'loss': 2.6943, 'learning_rate': 4.28203581201922e-07, 'epoch': 10.17} + 64%|██████▎ | 236080/371472 [8:11:08<11:24:10, 3.30it/s] 64%|██████▎ | 236081/371472 [8:11:08<11:04:42, 3.39it/s] 64%|██████▎ | 236082/371472 [8:11:08<11:01:30, 3.41it/s] 64%|██████▎ | 236083/371472 [8:11:09<11:04:38, 3.40it/s] 64%|██████▎ | 236084/371472 [8:11:09<11:14:22, 3.35it/s] 64%|██████▎ | 236085/371472 [8:11:09<10:56:21, 3.44it/s] 64%|██████▎ | 236086/371472 [8:11:09<10:54:34, 3.45it/s] 64%|██████▎ | 236087/371472 [8:11:10<10:47:28, 3.48it/s] 64%|██████▎ | 236088/371472 [8:11:10<10:41:09, 3.52it/s] 64%|██████▎ | 236089/371472 [8:11:10<11:14:17, 3.35it/s] 64%|██████▎ | 236090/371472 [8:11:11<11:06:35, 3.38it/s] 64%|██████▎ | 236091/371472 [8:11:11<11:08:37, 3.37it/s] 64%|██████▎ | 236092/371472 [8:11:11<11:24:13, 3.30it/s] 64%|██████▎ | 236093/371472 [8:11:11<11:20:36, 3.32it/s] 64%|██████▎ | 236094/371472 [8:11:12<11:23:03, 3.30it/s] 64%|██████▎ | 236095/371472 [8:11:12<11:55:58, 3.15it/s] 64%|██████▎ | 236096/371472 [8:11:12<11:42:45, 3.21it/s] 64%|██████▎ | 236097/371472 [8:11:13<11:46:58, 3.19it/s] 64%|██████▎ | 236098/371472 [8:11:13<11:57:19, 3.15it/s] 64%|██████▎ | 236099/371472 [8:11:13<11:48:41, 3.18it/s] 64%|██████▎ | 236100/371472 [8:11:14<11:30:30, 3.27it/s] {'loss': 2.7968, 'learning_rate': 4.281550992264432e-07, 'epoch': 10.17} + 64%|██████▎ | 236100/371472 [8:11:14<11:30:30, 3.27it/s] 64%|██████▎ | 236101/371472 [8:11:14<11:26:29, 3.29it/s] 64%|██████▎ | 236102/371472 [8:11:14<11:18:55, 3.32it/s] 64%|██████▎ | 236103/371472 [8:11:15<11:28:15, 3.28it/s] 64%|██████▎ | 236104/371472 [8:11:15<11:13:23, 3.35it/s] 64%|██████▎ | 236105/371472 [8:11:15<11:37:20, 3.24it/s] 64%|██████▎ | 236106/371472 [8:11:15<11:25:30, 3.29it/s] 64%|██████▎ | 236107/371472 [8:11:16<11:57:39, 3.14it/s] 64%|██████▎ | 236108/371472 [8:11:16<12:10:33, 3.09it/s] 64%|██████▎ | 236109/371472 [8:11:16<11:49:27, 3.18it/s] 64%|██████▎ | 236110/371472 [8:11:17<11:47:46, 3.19it/s] 64%|██████▎ | 236111/371472 [8:11:17<11:13:35, 3.35it/s] 64%|██████▎ | 236112/371472 [8:11:17<11:32:43, 3.26it/s] 64%|██████▎ | 236113/371472 [8:11:18<11:42:11, 3.21it/s] 64%|██████▎ | 236114/371472 [8:11:18<12:03:02, 3.12it/s] 64%|██████▎ | 236115/371472 [8:11:18<11:50:19, 3.18it/s] 64%|██████▎ | 236116/371472 [8:11:19<11:21:35, 3.31it/s] 64%|██████▎ | 236117/371472 [8:11:19<11:58:38, 3.14it/s] 64%|██████▎ | 236118/371472 [8:11:19<12:51:18, 2.92it/s] 64%|██████▎ | 236119/371472 [8:11:20<12:28:27, 3.01it/s] 64%|██████▎ | 236120/371472 [8:11:20<12:05:54, 3.11it/s] {'loss': 2.6838, 'learning_rate': 4.281066172509642e-07, 'epoch': 10.17} + 64%|██████▎ | 236120/371472 [8:11:20<12:05:54, 3.11it/s] 64%|██████▎ | 236121/371472 [8:11:20<12:04:05, 3.12it/s] 64%|██████▎ | 236122/371472 [8:11:21<11:38:45, 3.23it/s] 64%|██████▎ | 236123/371472 [8:11:21<11:17:43, 3.33it/s] 64%|██████▎ | 236124/371472 [8:11:21<11:18:13, 3.33it/s] 64%|██████▎ | 236125/371472 [8:11:21<11:07:55, 3.38it/s] 64%|██████▎ | 236126/371472 [8:11:22<12:02:45, 3.12it/s] 64%|██████▎ | 236127/371472 [8:11:22<11:53:38, 3.16it/s] 64%|██████▎ | 236128/371472 [8:11:22<11:31:03, 3.26it/s] 64%|██████▎ | 236129/371472 [8:11:23<11:15:15, 3.34it/s] 64%|██████▎ | 236130/371472 [8:11:23<11:23:28, 3.30it/s] 64%|██████▎ | 236131/371472 [8:11:23<10:57:02, 3.43it/s] 64%|██████▎ | 236132/371472 [8:11:24<10:33:25, 3.56it/s] 64%|██████▎ | 236133/371472 [8:11:24<10:47:55, 3.48it/s] 64%|██████▎ | 236134/371472 [8:11:24<10:36:06, 3.55it/s] 64%|██████▎ | 236135/371472 [8:11:24<10:30:19, 3.58it/s] 64%|██████▎ | 236136/371472 [8:11:25<10:34:05, 3.56it/s] 64%|██████▎ | 236137/371472 [8:11:25<10:34:40, 3.55it/s] 64%|██████▎ | 236138/371472 [8:11:25<10:27:25, 3.59it/s] 64%|██████▎ | 236139/371472 [8:11:25<10:14:59, 3.67it/s] 64%|██████▎ | 236140/371472 [8:11:26<10:05:16, 3.73it/s] {'loss': 2.6997, 'learning_rate': 4.280581352754854e-07, 'epoch': 10.17} + 64%|██████▎ | 236140/371472 [8:11:26<10:05:16, 3.73it/s] 64%|██████▎ | 236141/371472 [8:11:26<10:11:13, 3.69it/s] 64%|██████▎ | 236142/371472 [8:11:26<10:28:03, 3.59it/s] 64%|██████▎ | 236143/371472 [8:11:27<10:25:55, 3.60it/s] 64%|██████▎ | 236144/371472 [8:11:27<10:44:11, 3.50it/s] 64%|██████▎ | 236145/371472 [8:11:27<11:06:38, 3.38it/s] 64%|██████▎ | 236146/371472 [8:11:28<11:30:08, 3.27it/s] 64%|██████▎ | 236147/371472 [8:11:28<12:28:27, 3.01it/s] 64%|██████▎ | 236148/371472 [8:11:28<12:43:24, 2.95it/s] 64%|██████▎ | 236149/371472 [8:11:29<12:45:48, 2.95it/s] 64%|██████▎ | 236150/371472 [8:11:29<11:48:22, 3.18it/s] 64%|██████▎ | 236151/371472 [8:11:29<11:27:20, 3.28it/s] 64%|██████▎ | 236152/371472 [8:11:29<11:01:58, 3.41it/s] 64%|██████▎ | 236153/371472 [8:11:30<10:55:15, 3.44it/s] 64%|██████▎ | 236154/371472 [8:11:30<10:44:37, 3.50it/s] 64%|██████▎ | 236155/371472 [8:11:30<11:02:53, 3.40it/s] 64%|██████▎ | 236156/371472 [8:11:31<10:50:00, 3.47it/s] 64%|██████▎ | 236157/371472 [8:11:31<10:50:06, 3.47it/s] 64%|██████▎ | 236158/371472 [8:11:31<10:35:42, 3.55it/s] 64%|██████▎ | 236159/371472 [8:11:31<10:47:41, 3.48it/s] 64%|██████▎ | 236160/371472 [8:11:32<10:41:44, 3.51it/s] {'loss': 2.5769, 'learning_rate': 4.280096533000064e-07, 'epoch': 10.17} + 64%|██████▎ | 236160/371472 [8:11:32<10:41:44, 3.51it/s] 64%|██████▎ | 236161/371472 [8:11:32<11:42:12, 3.21it/s] 64%|██████▎ | 236162/371472 [8:11:32<11:17:48, 3.33it/s] 64%|██████▎ | 236163/371472 [8:11:33<11:17:00, 3.33it/s] 64%|██████▎ | 236164/371472 [8:11:33<10:54:55, 3.44it/s] 64%|██████▎ | 236165/371472 [8:11:33<10:53:18, 3.45it/s] 64%|██████▎ | 236166/371472 [8:11:34<11:29:55, 3.27it/s] 64%|██████▎ | 236167/371472 [8:11:34<11:02:13, 3.41it/s] 64%|██████▎ | 236168/371472 [8:11:34<10:56:25, 3.44it/s] 64%|██████▎ | 236169/371472 [8:11:34<10:57:06, 3.43it/s] 64%|██████▎ | 236170/371472 [8:11:35<11:06:10, 3.39it/s] 64%|██████▎ | 236171/371472 [8:11:35<11:14:57, 3.34it/s] 64%|██████▎ | 236172/371472 [8:11:35<11:08:41, 3.37it/s] 64%|██████▎ | 236173/371472 [8:11:36<11:16:22, 3.33it/s] 64%|██████▎ | 236174/371472 [8:11:36<11:05:27, 3.39it/s] 64%|██████▎ | 236175/371472 [8:11:36<11:24:08, 3.30it/s] 64%|██████▎ | 236176/371472 [8:11:37<11:39:41, 3.22it/s] 64%|██████▎ | 236177/371472 [8:11:37<11:40:52, 3.22it/s] 64%|██████▎ | 236178/371472 [8:11:37<11:51:16, 3.17it/s] 64%|██████▎ | 236179/371472 [8:11:37<11:58:11, 3.14it/s] 64%|██████▎ | 236180/371472 [8:11:38<11:44:37, 3.20it/s] {'loss': 2.7788, 'learning_rate': 4.279611713245275e-07, 'epoch': 10.17} + 64%|██████▎ | 236180/371472 [8:11:38<11:44:37, 3.20it/s] 64%|██████▎ | 236181/371472 [8:11:38<11:24:39, 3.29it/s] 64%|██████▎ | 236182/371472 [8:11:38<11:16:54, 3.33it/s] 64%|██████▎ | 236183/371472 [8:11:39<12:11:52, 3.08it/s] 64%|██████▎ | 236184/371472 [8:11:39<11:58:23, 3.14it/s] 64%|██████▎ | 236185/371472 [8:11:39<11:33:02, 3.25it/s] 64%|██████▎ | 236186/371472 [8:11:40<12:23:45, 3.03it/s] 64%|██████▎ | 236187/371472 [8:11:40<12:59:17, 2.89it/s] 64%|██████▎ | 236188/371472 [8:11:40<12:24:53, 3.03it/s] 64%|██████▎ | 236189/371472 [8:11:41<12:24:58, 3.03it/s] 64%|██████▎ | 236190/371472 [8:11:41<12:10:34, 3.09it/s] 64%|██████▎ | 236191/371472 [8:11:41<11:56:13, 3.15it/s] 64%|██████▎ | 236192/371472 [8:11:42<11:23:43, 3.30it/s] 64%|██████▎ | 236193/371472 [8:11:42<11:09:03, 3.37it/s] 64%|██████▎ | 236194/371472 [8:11:42<11:06:33, 3.38it/s] 64%|██████▎ | 236195/371472 [8:11:42<11:04:20, 3.39it/s] 64%|██████▎ | 236196/371472 [8:11:43<11:07:06, 3.38it/s] 64%|██████▎ | 236197/371472 [8:11:43<11:02:20, 3.40it/s] 64%|██████▎ | 236198/371472 [8:11:43<10:50:03, 3.47it/s] 64%|██████▎ | 236199/371472 [8:11:44<10:47:31, 3.48it/s] 64%|██████▎ | 236200/371472 [8:11:44<10:51:36, 3.46it/s] {'loss': 2.6494, 'learning_rate': 4.2791268934904866e-07, 'epoch': 10.17} + 64%|██████▎ | 236200/371472 [8:11:44<10:51:36, 3.46it/s] 64%|██████▎ | 236201/371472 [8:11:44<10:57:11, 3.43it/s] 64%|██████▎ | 236202/371472 [8:11:44<10:50:01, 3.47it/s] 64%|██████▎ | 236203/371472 [8:11:45<11:45:03, 3.20it/s] 64%|██████▎ | 236204/371472 [8:11:45<11:26:32, 3.28it/s] 64%|██████▎ | 236205/371472 [8:11:46<12:21:17, 3.04it/s] 64%|██████▎ | 236206/371472 [8:11:46<11:56:48, 3.15it/s] 64%|██████▎ | 236207/371472 [8:11:46<11:38:04, 3.23it/s] 64%|██████▎ | 236208/371472 [8:11:46<11:49:59, 3.18it/s] 64%|██████▎ | 236209/371472 [8:11:47<11:31:20, 3.26it/s] 64%|██████▎ | 236210/371472 [8:11:47<11:24:30, 3.29it/s] 64%|██████▎ | 236211/371472 [8:11:47<11:22:51, 3.30it/s] 64%|██████▎ | 236212/371472 [8:11:48<11:23:05, 3.30it/s] 64%|██████▎ | 236213/371472 [8:11:48<11:29:59, 3.27it/s] 64%|██████▎ | 236214/371472 [8:11:48<11:17:57, 3.33it/s] 64%|██████▎ | 236215/371472 [8:11:49<11:24:16, 3.29it/s] 64%|██████▎ | 236216/371472 [8:11:49<11:09:53, 3.37it/s] 64%|██████▎ | 236217/371472 [8:11:49<11:07:05, 3.38it/s] 64%|██████▎ | 236218/371472 [8:11:49<11:13:31, 3.35it/s] 64%|██████▎ | 236219/371472 [8:11:50<11:09:22, 3.37it/s] 64%|██████▎ | 236220/371472 [8:11:50<11:29:25, 3.27it/s] {'loss': 2.8399, 'learning_rate': 4.2786420737356973e-07, 'epoch': 10.17} + 64%|██████▎ | 236220/371472 [8:11:50<11:29:25, 3.27it/s] 64%|██████▎ | 236221/371472 [8:11:50<12:10:07, 3.09it/s] 64%|██████▎ | 236222/371472 [8:11:51<12:17:04, 3.06it/s] 64%|██████▎ | 236223/371472 [8:11:51<11:38:46, 3.23it/s] 64%|██████▎ | 236224/371472 [8:11:51<11:12:35, 3.35it/s] 64%|██████▎ | 236225/371472 [8:11:52<11:34:17, 3.25it/s] 64%|██████▎ | 236226/371472 [8:11:52<11:10:27, 3.36it/s] 64%|██████▎ | 236227/371472 [8:11:52<11:09:56, 3.36it/s] 64%|██████▎ | 236228/371472 [8:11:52<11:03:41, 3.40it/s] 64%|██████▎ | 236229/371472 [8:11:53<11:07:43, 3.38it/s] 64%|██████▎ | 236230/371472 [8:11:53<11:08:12, 3.37it/s] 64%|██████▎ | 236231/371472 [8:11:53<11:16:35, 3.33it/s] 64%|██████▎ | 236232/371472 [8:11:54<11:13:14, 3.35it/s] 64%|██████▎ | 236233/371472 [8:11:54<11:12:41, 3.35it/s] 64%|██████▎ | 236234/371472 [8:11:54<11:17:39, 3.33it/s] 64%|██████▎ | 236235/371472 [8:11:55<11:32:15, 3.26it/s] 64%|██████▎ | 236236/371472 [8:11:55<11:06:14, 3.38it/s] 64%|██████▎ | 236237/371472 [8:11:55<11:46:45, 3.19it/s] 64%|██████▎ | 236238/371472 [8:11:56<11:39:27, 3.22it/s] 64%|██████▎ | 236239/371472 [8:11:56<11:32:54, 3.25it/s] 64%|██████▎ | 236240/371472 [8:11:56<11:45:14, 3.20it/s] {'loss': 2.8589, 'learning_rate': 4.2781572539809085e-07, 'epoch': 10.18} + 64%|██████▎ | 236240/371472 [8:11:56<11:45:14, 3.20it/s] 64%|██████▎ | 236241/371472 [8:11:56<11:53:56, 3.16it/s] 64%|██████▎ | 236242/371472 [8:11:57<11:13:35, 3.35it/s] 64%|██████▎ | 236243/371472 [8:11:57<11:03:37, 3.40it/s] 64%|██████▎ | 236244/371472 [8:11:57<11:24:57, 3.29it/s] 64%|██████▎ | 236245/371472 [8:11:58<11:42:02, 3.21it/s] 64%|██████▎ | 236246/371472 [8:11:58<11:21:14, 3.31it/s] 64%|██████▎ | 236247/371472 [8:11:58<11:10:56, 3.36it/s] 64%|██████▎ | 236248/371472 [8:11:59<11:17:16, 3.33it/s] 64%|██████▎ | 236249/371472 [8:11:59<10:50:59, 3.46it/s] 64%|██████▎ | 236250/371472 [8:11:59<11:37:28, 3.23it/s] 64%|██████▎ | 236251/371472 [8:11:59<11:26:40, 3.28it/s] 64%|██████▎ | 236252/371472 [8:12:00<11:23:51, 3.30it/s] 64%|██████▎ | 236253/371472 [8:12:00<11:02:22, 3.40it/s] 64%|██████▎ | 236254/371472 [8:12:00<11:02:25, 3.40it/s] 64%|██████▎ | 236255/371472 [8:12:01<11:47:56, 3.18it/s] 64%|██████▎ | 236256/371472 [8:12:01<11:51:03, 3.17it/s] 64%|██████▎ | 236257/371472 [8:12:01<12:02:53, 3.12it/s] 64%|██████▎ | 236258/371472 [8:12:02<12:00:37, 3.13it/s] 64%|██████▎ | 236259/371472 [8:12:02<11:39:47, 3.22it/s] 64%|██████▎ | 236260/371472 [8:12:02<11:13:55, 3.34it/s] {'loss': 2.8967, 'learning_rate': 4.277672434226119e-07, 'epoch': 10.18} + 64%|██████▎ | 236260/371472 [8:12:02<11:13:55, 3.34it/s] 64%|██████▎ | 236261/371472 [8:12:02<11:08:50, 3.37it/s] 64%|██████▎ | 236262/371472 [8:12:03<11:01:29, 3.41it/s] 64%|██████▎ | 236263/371472 [8:12:03<10:51:55, 3.46it/s] 64%|██████▎ | 236264/371472 [8:12:03<10:46:21, 3.49it/s] 64%|██████▎ | 236265/371472 [8:12:04<10:41:44, 3.51it/s] 64%|██████▎ | 236266/371472 [8:12:04<10:42:31, 3.51it/s] 64%|██████▎ | 236267/371472 [8:12:04<10:47:36, 3.48it/s] 64%|██████▎ | 236268/371472 [8:12:04<10:33:00, 3.56it/s] 64%|██████▎ | 236269/371472 [8:12:05<10:38:10, 3.53it/s] 64%|██████▎ | 236270/371472 [8:12:05<11:22:27, 3.30it/s] 64%|██████▎ | 236271/371472 [8:12:05<11:18:18, 3.32it/s] 64%|██████▎ | 236272/371472 [8:12:06<11:02:58, 3.40it/s] 64%|██████▎ | 236273/371472 [8:12:06<10:57:48, 3.43it/s] 64%|██████▎ | 236274/371472 [8:12:06<10:57:55, 3.42it/s] 64%|██████▎ | 236275/371472 [8:12:07<10:46:22, 3.49it/s] 64%|██████▎ | 236276/371472 [8:12:07<10:42:25, 3.51it/s] 64%|██████▎ | 236277/371472 [8:12:07<10:39:28, 3.52it/s] 64%|██████▎ | 236278/371472 [8:12:07<10:55:32, 3.44it/s] 64%|██████▎ | 236279/371472 [8:12:08<11:00:53, 3.41it/s] 64%|██████▎ | 236280/371472 [8:12:08<10:54:20, 3.44it/s] {'loss': 2.7168, 'learning_rate': 4.277187614471331e-07, 'epoch': 10.18} + 64%|██████▎ | 236280/371472 [8:12:08<10:54:20, 3.44it/s] 64%|██████▎ | 236281/371472 [8:12:08<11:23:45, 3.30it/s] 64%|██████▎ | 236282/371472 [8:12:09<12:20:45, 3.04it/s] 64%|██████▎ | 236283/371472 [8:12:09<11:54:49, 3.15it/s] 64%|██████▎ | 236284/371472 [8:12:09<11:32:40, 3.25it/s] 64%|██████▎ | 236285/371472 [8:12:10<13:04:11, 2.87it/s] 64%|██████▎ | 236286/371472 [8:12:10<12:31:49, 3.00it/s] 64%|██████▎ | 236287/371472 [8:12:10<12:29:17, 3.01it/s] 64%|██████▎ | 236288/371472 [8:12:11<12:18:34, 3.05it/s] 64%|██████▎ | 236289/371472 [8:12:11<12:08:35, 3.09it/s] 64%|██████▎ | 236290/371472 [8:12:11<11:46:00, 3.19it/s] 64%|██████▎ | 236291/371472 [8:12:12<11:40:44, 3.22it/s] 64%|██████▎ | 236292/371472 [8:12:12<11:28:21, 3.27it/s] 64%|██████▎ | 236293/371472 [8:12:12<11:33:20, 3.25it/s] 64%|██████▎ | 236294/371472 [8:12:12<11:22:36, 3.30it/s] 64%|██████▎ | 236295/371472 [8:12:13<11:20:10, 3.31it/s] 64%|██████▎ | 236296/371472 [8:12:13<11:34:37, 3.24it/s] 64%|██████▎ | 236297/371472 [8:12:13<11:36:49, 3.23it/s] 64%|██████▎ | 236298/371472 [8:12:14<11:56:30, 3.14it/s] 64%|██████▎ | 236299/371472 [8:12:14<11:53:26, 3.16it/s] 64%|██████▎ | 236300/371472 [8:12:14<11:32:15, 3.25it/s] {'loss': 2.7743, 'learning_rate': 4.276702794716542e-07, 'epoch': 10.18} + 64%|██████▎ | 236300/371472 [8:12:14<11:32:15, 3.25it/s] 64%|██████▎ | 236301/371472 [8:12:15<11:31:32, 3.26it/s] 64%|██████▎ | 236302/371472 [8:12:15<11:26:38, 3.28it/s] 64%|██████▎ | 236303/371472 [8:12:15<12:04:50, 3.11it/s] 64%|██████▎ | 236304/371472 [8:12:16<12:21:26, 3.04it/s] 64%|██████▎ | 236305/371472 [8:12:16<11:53:54, 3.16it/s] 64%|██████▎ | 236306/371472 [8:12:16<11:29:03, 3.27it/s] 64%|██████▎ | 236307/371472 [8:12:17<11:47:17, 3.19it/s] 64%|██████▎ | 236308/371472 [8:12:17<12:33:09, 2.99it/s] 64%|██████▎ | 236309/371472 [8:12:17<12:02:43, 3.12it/s] 64%|██████▎ | 236310/371472 [8:12:18<11:55:54, 3.15it/s] 64%|██████▎ | 236311/371472 [8:12:18<12:44:24, 2.95it/s] 64%|██████▎ | 236312/371472 [8:12:18<12:04:36, 3.11it/s] 64%|██████▎ | 236313/371472 [8:12:19<12:14:30, 3.07it/s] 64%|██████▎ | 236314/371472 [8:12:19<12:03:24, 3.11it/s] 64%|██████▎ | 236315/371472 [8:12:19<11:40:11, 3.22it/s] 64%|██████▎ | 236316/371472 [8:12:19<11:28:11, 3.27it/s] 64%|██████▎ | 236317/371472 [8:12:20<11:10:51, 3.36it/s] 64%|██████▎ | 236318/371472 [8:12:20<10:53:53, 3.44it/s] 64%|██████▎ | 236319/371472 [8:12:20<10:48:04, 3.48it/s] 64%|██████▎ | 236320/371472 [8:12:21<10:39:54, 3.52it/s] {'loss': 2.6319, 'learning_rate': 4.276217974961753e-07, 'epoch': 10.18} + 64%|██████▎ | 236320/371472 [8:12:21<10:39:54, 3.52it/s] 64%|██████▎ | 236321/371472 [8:12:21<10:45:35, 3.49it/s] 64%|██████▎ | 236322/371472 [8:12:21<11:40:31, 3.22it/s] 64%|██████▎ | 236323/371472 [8:12:22<11:25:27, 3.29it/s] 64%|██████▎ | 236324/371472 [8:12:22<11:27:13, 3.28it/s] 64%|██████▎ | 236325/371472 [8:12:22<11:16:21, 3.33it/s] 64%|██████▎ | 236326/371472 [8:12:22<11:17:10, 3.33it/s] 64%|██████▎ | 236327/371472 [8:12:23<11:32:42, 3.25it/s] 64%|██████▎ | 236328/371472 [8:12:23<11:48:17, 3.18it/s] 64%|██████▎ | 236329/371472 [8:12:23<11:26:25, 3.28it/s] 64%|██████▎ | 236330/371472 [8:12:24<11:12:40, 3.35it/s] 64%|██████▎ | 236331/371472 [8:12:24<10:53:57, 3.44it/s] 64%|██████▎ | 236332/371472 [8:12:24<11:31:51, 3.26it/s] 64%|██████▎ | 236333/371472 [8:12:25<11:09:32, 3.36it/s] 64%|██████▎ | 236334/371472 [8:12:25<11:02:39, 3.40it/s] 64%|██████▎ | 236335/371472 [8:12:25<11:05:18, 3.39it/s] 64%|██████▎ | 236336/371472 [8:12:25<10:56:05, 3.43it/s] 64%|██████▎ | 236337/371472 [8:12:26<10:59:23, 3.42it/s] 64%|██████▎ | 236338/371472 [8:12:26<10:53:31, 3.45it/s] 64%|██████▎ | 236339/371472 [8:12:26<11:03:20, 3.40it/s] 64%|██████▎ | 236340/371472 [8:12:27<11:11:06, 3.36it/s] {'loss': 2.8287, 'learning_rate': 4.2757331552069637e-07, 'epoch': 10.18} + 64%|██████▎ | 236340/371472 [8:12:27<11:11:06, 3.36it/s] 64%|██████▎ | 236341/371472 [8:12:27<10:58:48, 3.42it/s] 64%|██████▎ | 236342/371472 [8:12:27<10:58:16, 3.42it/s] 64%|██████▎ | 236343/371472 [8:12:27<10:46:44, 3.48it/s] 64%|██████▎ | 236344/371472 [8:12:28<10:52:02, 3.45it/s] 64%|██████▎ | 236345/371472 [8:12:28<10:59:22, 3.42it/s] 64%|██████▎ | 236346/371472 [8:12:28<11:03:08, 3.40it/s] 64%|██████▎ | 236347/371472 [8:12:29<11:25:12, 3.29it/s] 64%|██████▎ | 236348/371472 [8:12:29<11:37:12, 3.23it/s] 64%|██████▎ | 236349/371472 [8:12:29<11:16:24, 3.33it/s] 64%|██████▎ | 236350/371472 [8:12:30<10:55:15, 3.44it/s] 64%|██████▎ | 236351/371472 [8:12:30<10:55:25, 3.44it/s] 64%|██████▎ | 236352/371472 [8:12:30<10:31:38, 3.57it/s] 64%|██████▎ | 236353/371472 [8:12:30<11:03:00, 3.40it/s] 64%|██████▎ | 236354/371472 [8:12:31<11:13:10, 3.35it/s] 64%|██████▎ | 236355/371472 [8:12:31<10:49:15, 3.47it/s] 64%|██████▎ | 236356/371472 [8:12:31<11:05:43, 3.38it/s] 64%|██████▎ | 236357/371472 [8:12:32<11:45:19, 3.19it/s] 64%|██████▎ | 236358/371472 [8:12:32<11:56:30, 3.14it/s] 64%|██████▎ | 236359/371472 [8:12:32<11:45:07, 3.19it/s] 64%|██████▎ | 236360/371472 [8:12:33<11:35:21, 3.24it/s] {'loss': 2.8522, 'learning_rate': 4.2752483354521755e-07, 'epoch': 10.18} + 64%|██████▎ | 236360/371472 [8:12:33<11:35:21, 3.24it/s] 64%|██████▎ | 236361/371472 [8:12:33<11:33:31, 3.25it/s] 64%|██████▎ | 236362/371472 [8:12:33<11:22:04, 3.30it/s] 64%|██████▎ | 236363/371472 [8:12:33<11:07:47, 3.37it/s] 64%|██████▎ | 236364/371472 [8:12:34<11:24:59, 3.29it/s] 64%|██████▎ | 236365/371472 [8:12:34<11:05:32, 3.38it/s] 64%|██████▎ | 236366/371472 [8:12:34<11:18:17, 3.32it/s] 64%|██████▎ | 236367/371472 [8:12:35<10:47:06, 3.48it/s] 64%|██████▎ | 236368/371472 [8:12:35<12:26:16, 3.02it/s] 64%|██████▎ | 236369/371472 [8:12:35<12:14:32, 3.07it/s] 64%|██████▎ | 236370/371472 [8:12:36<12:13:31, 3.07it/s] 64%|██████▎ | 236371/371472 [8:12:36<11:51:36, 3.16it/s] 64%|██████▎ | 236372/371472 [8:12:36<12:30:44, 3.00it/s] 64%|██████▎ | 236373/371472 [8:12:37<11:58:32, 3.13it/s] 64%|██████▎ | 236374/371472 [8:12:37<11:28:16, 3.27it/s] 64%|██████▎ | 236375/371472 [8:12:37<11:05:13, 3.38it/s] 64%|██████▎ | 236376/371472 [8:12:37<11:07:05, 3.38it/s] 64%|██████▎ | 236377/371472 [8:12:38<12:15:17, 3.06it/s] 64%|██████▎ | 236378/371472 [8:12:38<11:36:34, 3.23it/s] 64%|██████▎ | 236379/371472 [8:12:38<11:10:09, 3.36it/s] 64%|██████▎ | 236380/371472 [8:12:39<11:24:24, 3.29it/s] {'loss': 2.6662, 'learning_rate': 4.2747635156973857e-07, 'epoch': 10.18} + 64%|██████▎ | 236380/371472 [8:12:39<11:24:24, 3.29it/s] 64%|██████▎ | 236381/371472 [8:12:39<10:55:33, 3.43it/s] 64%|██████▎ | 236382/371472 [8:12:39<11:44:13, 3.20it/s] 64%|██████▎ | 236383/371472 [8:12:40<11:22:54, 3.30it/s] 64%|██████▎ | 236384/371472 [8:12:40<11:35:19, 3.24it/s] 64%|██████▎ | 236385/371472 [8:12:40<11:25:14, 3.29it/s] 64%|██████▎ | 236386/371472 [8:12:41<10:59:58, 3.41it/s] 64%|██████▎ | 236387/371472 [8:12:41<10:50:48, 3.46it/s] 64%|██████▎ | 236388/371472 [8:12:41<11:54:51, 3.15it/s] 64%|██████▎ | 236389/371472 [8:12:41<11:28:48, 3.27it/s] 64%|██████▎ | 236390/371472 [8:12:42<11:02:51, 3.40it/s] 64%|██████▎ | 236391/371472 [8:12:42<10:49:54, 3.46it/s] 64%|██████▎ | 236392/371472 [8:12:42<10:51:00, 3.46it/s] 64%|██████▎ | 236393/371472 [8:12:43<10:37:59, 3.53it/s] 64%|██████▎ | 236394/371472 [8:12:43<10:43:43, 3.50it/s] 64%|██████▎ | 236395/371472 [8:12:43<11:36:45, 3.23it/s] 64%|██████▎ | 236396/371472 [8:12:44<11:42:05, 3.21it/s] 64%|██████▎ | 236397/371472 [8:12:44<11:07:40, 3.37it/s] 64%|██████▎ | 236398/371472 [8:12:44<10:37:35, 3.53it/s] 64%|██████▎ | 236399/371472 [8:12:44<10:34:31, 3.55it/s] 64%|██████▎ | 236400/371472 [8:12:45<10:36:28, 3.54it/s] {'loss': 2.6742, 'learning_rate': 4.2742786959425974e-07, 'epoch': 10.18} + 64%|██████▎ | 236400/371472 [8:12:45<10:36:28, 3.54it/s] 64%|██████▎ | 236401/371472 [8:12:45<10:39:32, 3.52it/s] 64%|██████▎ | 236402/371472 [8:12:45<10:38:45, 3.52it/s] 64%|██████▎ | 236403/371472 [8:12:45<10:38:09, 3.53it/s] 64%|██████▎ | 236404/371472 [8:12:46<10:37:42, 3.53it/s] 64%|██████▎ | 236405/371472 [8:12:46<10:24:08, 3.61it/s] 64%|██████▎ | 236406/371472 [8:12:46<10:34:37, 3.55it/s] 64%|██████▎ | 236407/371472 [8:12:47<11:06:22, 3.38it/s] 64%|██████▎ | 236408/371472 [8:12:47<10:56:11, 3.43it/s] 64%|██████▎ | 236409/371472 [8:12:47<11:12:40, 3.35it/s] 64%|██████▎ | 236410/371472 [8:12:48<11:02:45, 3.40it/s] 64%|██████▎ | 236411/371472 [8:12:48<10:53:08, 3.45it/s] 64%|██████▎ | 236412/371472 [8:12:48<10:46:52, 3.48it/s] 64%|██████▎ | 236413/371472 [8:12:48<10:37:33, 3.53it/s] 64%|██████▎ | 236414/371472 [8:12:49<10:33:05, 3.56it/s] 64%|██████▎ | 236415/371472 [8:12:49<10:21:08, 3.62it/s] 64%|██████▎ | 236416/371472 [8:12:49<11:00:57, 3.41it/s] 64%|██████▎ | 236417/371472 [8:12:49<10:49:37, 3.46it/s] 64%|██████▎ | 236418/371472 [8:12:50<11:33:54, 3.24it/s] 64%|██████▎ | 236419/371472 [8:12:50<11:26:25, 3.28it/s] 64%|██████▎ | 236420/371472 [8:12:50<11:02:26, 3.40it/s] {'loss': 2.8923, 'learning_rate': 4.273793876187808e-07, 'epoch': 10.18} + 64%|██████▎ | 236420/371472 [8:12:50<11:02:26, 3.40it/s] 64%|██████▎ | 236421/371472 [8:12:51<11:38:16, 3.22it/s] 64%|██████▎ | 236422/371472 [8:12:51<11:41:37, 3.21it/s] 64%|██████▎ | 236423/371472 [8:12:51<11:38:51, 3.22it/s] 64%|██████▎ | 236424/371472 [8:12:52<11:30:45, 3.26it/s] 64%|██████▎ | 236425/371472 [8:12:52<11:21:36, 3.30it/s] 64%|██████▎ | 236426/371472 [8:12:52<11:04:26, 3.39it/s] 64%|██████▎ | 236427/371472 [8:12:53<11:11:37, 3.35it/s] 64%|██████▎ | 236428/371472 [8:12:53<10:52:34, 3.45it/s] 64%|██████▎ | 236429/371472 [8:12:53<10:44:48, 3.49it/s] 64%|██████▎ | 236430/371472 [8:12:53<10:18:57, 3.64it/s] 64%|██████▎ | 236431/371472 [8:12:54<10:20:20, 3.63it/s] 64%|██████▎ | 236432/371472 [8:12:54<10:54:35, 3.44it/s] 64%|██████▎ | 236433/371472 [8:12:54<11:13:16, 3.34it/s] 64%|██████▎ | 236434/371472 [8:12:55<11:09:57, 3.36it/s] 64%|██████▎ | 236435/371472 [8:12:55<11:53:31, 3.15it/s] 64%|██████▎ | 236436/371472 [8:12:55<11:37:22, 3.23it/s] 64%|██████▎ | 236437/371472 [8:12:56<12:39:26, 2.96it/s] 64%|██████▎ | 236438/371472 [8:12:56<13:08:39, 2.85it/s] 64%|██████▎ | 236439/371472 [8:12:56<12:25:18, 3.02it/s] 64%|██████▎ | 236440/371472 [8:12:57<11:38:15, 3.22it/s] {'loss': 2.8241, 'learning_rate': 4.2733090564330194e-07, 'epoch': 10.18} + 64%|██████▎ | 236440/371472 [8:12:57<11:38:15, 3.22it/s] 64%|██████▎ | 236441/371472 [8:12:57<11:19:50, 3.31it/s] 64%|██████▎ | 236442/371472 [8:12:57<11:11:18, 3.35it/s] 64%|██████▎ | 236443/371472 [8:12:57<10:55:24, 3.43it/s] 64%|██████▎ | 236444/371472 [8:12:58<10:45:47, 3.48it/s] 64%|██████▎ | 236445/371472 [8:12:58<10:27:54, 3.58it/s] 64%|██████▎ | 236446/371472 [8:12:58<10:22:59, 3.61it/s] 64%|██████▎ | 236447/371472 [8:12:59<10:42:20, 3.50it/s] 64%|██████▎ | 236448/371472 [8:12:59<10:28:25, 3.58it/s] 64%|██████▎ | 236449/371472 [8:12:59<10:35:50, 3.54it/s] 64%|██████▎ | 236450/371472 [8:12:59<11:05:49, 3.38it/s] 64%|██████▎ | 236451/371472 [8:13:00<11:13:19, 3.34it/s] 64%|██████▎ | 236452/371472 [8:13:00<11:12:43, 3.35it/s] 64%|██████▎ | 236453/371472 [8:13:00<11:40:15, 3.21it/s] 64%|██████▎ | 236454/371472 [8:13:01<11:51:50, 3.16it/s] 64%|██████▎ | 236455/371472 [8:13:01<11:24:46, 3.29it/s] 64%|██████▎ | 236456/371472 [8:13:01<11:04:52, 3.38it/s] 64%|██████▎ | 236457/371472 [8:13:02<11:14:02, 3.34it/s] 64%|██████▎ | 236458/371472 [8:13:02<11:03:40, 3.39it/s] 64%|██████▎ | 236459/371472 [8:13:02<12:29:42, 3.00it/s] 64%|██████▎ | 236460/371472 [8:13:03<11:58:50, 3.13it/s] {'loss': 2.6736, 'learning_rate': 4.27282423667823e-07, 'epoch': 10.18} + 64%|██████▎ | 236460/371472 [8:13:03<11:58:50, 3.13it/s] 64%|██████▎ | 236461/371472 [8:13:03<11:43:20, 3.20it/s] 64%|██████▎ | 236462/371472 [8:13:03<11:28:31, 3.27it/s] 64%|██████▎ | 236463/371472 [8:13:03<11:12:18, 3.35it/s] 64%|██████▎ | 236464/371472 [8:13:04<11:46:38, 3.18it/s] 64%|██████▎ | 236465/371472 [8:13:04<12:01:06, 3.12it/s] 64%|██████▎ | 236466/371472 [8:13:04<12:03:05, 3.11it/s] 64%|██████▎ | 236467/371472 [8:13:05<11:33:45, 3.24it/s] 64%|██████▎ | 236468/371472 [8:13:05<12:19:36, 3.04it/s] 64%|██████▎ | 236469/371472 [8:13:05<11:45:59, 3.19it/s] 64%|██████▎ | 236470/371472 [8:13:06<11:38:23, 3.22it/s] 64%|██████▎ | 236471/371472 [8:13:06<11:26:47, 3.28it/s] 64%|██████▎ | 236472/371472 [8:13:06<10:59:38, 3.41it/s] 64%|██████▎ | 236473/371472 [8:13:06<10:33:34, 3.55it/s] 64%|██████▎ | 236474/371472 [8:13:07<11:20:53, 3.30it/s] 64%|██████▎ | 236475/371472 [8:13:07<12:04:47, 3.10it/s] 64%|██████▎ | 236476/371472 [8:13:07<11:57:32, 3.14it/s] 64%|██████▎ | 236477/371472 [8:13:08<11:45:02, 3.19it/s] 64%|██████▎ | 236478/371472 [8:13:08<11:40:04, 3.21it/s] 64%|██████▎ | 236479/371472 [8:13:08<11:28:29, 3.27it/s] 64%|██████▎ | 236480/371472 [8:13:09<11:51:17, 3.16it/s] {'loss': 2.6149, 'learning_rate': 4.272339416923442e-07, 'epoch': 10.19} + 64%|██████▎ | 236480/371472 [8:13:09<11:51:17, 3.16it/s] 64%|██████▎ | 236481/371472 [8:13:09<11:49:34, 3.17it/s] 64%|██████▎ | 236482/371472 [8:13:09<11:17:29, 3.32it/s] 64%|██████▎ | 236483/371472 [8:13:10<11:02:36, 3.40it/s] 64%|██████▎ | 236484/371472 [8:13:10<11:32:17, 3.25it/s] 64%|██████▎ | 236485/371472 [8:13:10<11:15:10, 3.33it/s] 64%|██████▎ | 236486/371472 [8:13:10<10:47:32, 3.47it/s] 64%|██████▎ | 236487/371472 [8:13:11<10:39:47, 3.52it/s] 64%|██████▎ | 236488/371472 [8:13:11<10:27:56, 3.58it/s] 64%|██████▎ | 236489/371472 [8:13:11<11:02:43, 3.39it/s] 64%|██████▎ | 236490/371472 [8:13:12<11:01:50, 3.40it/s] 64%|██████▎ | 236491/371472 [8:13:12<10:54:33, 3.44it/s] 64%|██████▎ | 236492/371472 [8:13:12<10:50:43, 3.46it/s] 64%|██████▎ | 236493/371472 [8:13:13<10:57:21, 3.42it/s] 64%|██████▎ | 236494/371472 [8:13:13<10:49:15, 3.46it/s] 64%|██████▎ | 236495/371472 [8:13:13<11:38:55, 3.22it/s] 64%|██████▎ | 236496/371472 [8:13:13<11:37:46, 3.22it/s] 64%|██████▎ | 236497/371472 [8:13:14<11:13:39, 3.34it/s] 64%|██████▎ | 236498/371472 [8:13:14<10:49:37, 3.46it/s] 64%|██████▎ | 236499/371472 [8:13:14<10:45:27, 3.49it/s] 64%|██████▎ | 236500/371472 [8:13:15<10:25:43, 3.60it/s] {'loss': 2.8253, 'learning_rate': 4.271854597168652e-07, 'epoch': 10.19} + 64%|██████▎ | 236500/371472 [8:13:15<10:25:43, 3.60it/s] 64%|██████▎ | 236501/371472 [8:13:15<10:46:43, 3.48it/s] 64%|██████▎ | 236502/371472 [8:13:15<10:54:19, 3.44it/s] 64%|██████▎ | 236503/371472 [8:13:15<10:49:59, 3.46it/s] 64%|██████▎ | 236504/371472 [8:13:16<11:27:20, 3.27it/s] 64%|██████▎ | 236505/371472 [8:13:16<11:06:53, 3.37it/s] 64%|██████▎ | 236506/371472 [8:13:16<11:02:15, 3.40it/s] 64%|██████▎ | 236507/371472 [8:13:17<10:50:52, 3.46it/s] 64%|██████▎ | 236508/371472 [8:13:17<11:26:50, 3.27it/s] 64%|██████▎ | 236509/371472 [8:13:17<11:10:18, 3.36it/s] 64%|██████▎ | 236510/371472 [8:13:18<11:33:23, 3.24it/s] 64%|██████▎ | 236511/371472 [8:13:18<12:20:43, 3.04it/s] 64%|██████▎ | 236512/371472 [8:13:18<11:43:09, 3.20it/s] 64%|██████▎ | 236513/371472 [8:13:18<11:04:52, 3.38it/s] 64%|██████▎ | 236514/371472 [8:13:19<11:22:36, 3.30it/s] 64%|██████▎ | 236515/371472 [8:13:19<11:18:59, 3.31it/s] 64%|██████▎ | 236516/371472 [8:13:19<10:57:47, 3.42it/s] 64%|██████▎ | 236517/371472 [8:13:20<10:46:14, 3.48it/s] 64%|██████▎ | 236518/371472 [8:13:20<10:42:11, 3.50it/s] 64%|██████▎ | 236519/371472 [8:13:20<10:41:10, 3.51it/s] 64%|██████▎ | 236520/371472 [8:13:20<10:29:02, 3.58it/s] {'loss': 2.7735, 'learning_rate': 4.271369777413864e-07, 'epoch': 10.19} + 64%|██████▎ | 236520/371472 [8:13:20<10:29:02, 3.58it/s] 64%|██████▎ | 236521/371472 [8:13:21<10:33:52, 3.55it/s] 64%|██████▎ | 236522/371472 [8:13:21<10:44:20, 3.49it/s] 64%|██████▎ | 236523/371472 [8:13:21<10:49:11, 3.46it/s] 64%|██████▎ | 236524/371472 [8:13:22<10:41:16, 3.51it/s] 64%|██████▎ | 236525/371472 [8:13:22<11:21:11, 3.30it/s] 64%|██████▎ | 236526/371472 [8:13:22<11:30:42, 3.26it/s] 64%|██████▎ | 236527/371472 [8:13:23<11:09:14, 3.36it/s] 64%|██████▎ | 236528/371472 [8:13:23<10:57:40, 3.42it/s] 64%|██████▎ | 236529/371472 [8:13:23<10:55:42, 3.43it/s] 64%|██████▎ | 236530/371472 [8:13:23<10:26:37, 3.59it/s] 64%|██████▎ | 236531/371472 [8:13:24<10:31:14, 3.56it/s] 64%|██████▎ | 236532/371472 [8:13:24<12:04:01, 3.11it/s] 64%|██████▎ | 236533/371472 [8:13:24<11:52:41, 3.16it/s] 64%|██████▎ | 236534/371472 [8:13:25<11:29:41, 3.26it/s] 64%|██████▎ | 236535/371472 [8:13:25<11:08:13, 3.37it/s] 64%|██████▎ | 236536/371472 [8:13:25<10:55:50, 3.43it/s] 64%|██████▎ | 236537/371472 [8:13:26<11:11:33, 3.35it/s] 64%|██████▎ | 236538/371472 [8:13:26<11:30:38, 3.26it/s] 64%|██████▎ | 236539/371472 [8:13:26<13:11:17, 2.84it/s] 64%|██████▎ | 236540/371472 [8:13:27<12:20:28, 3.04it/s] {'loss': 2.7163, 'learning_rate': 4.2708849576590746e-07, 'epoch': 10.19} + 64%|██████▎ | 236540/371472 [8:13:27<12:20:28, 3.04it/s] 64%|██████▎ | 236541/371472 [8:13:27<12:01:04, 3.12it/s] 64%|██████▎ | 236542/371472 [8:13:27<11:56:35, 3.14it/s] 64%|██████▎ | 236543/371472 [8:13:28<11:54:13, 3.15it/s] 64%|██████▎ | 236544/371472 [8:13:28<11:32:59, 3.25it/s] 64%|██████▎ | 236545/371472 [8:13:28<11:13:27, 3.34it/s] 64%|██████▎ | 236546/371472 [8:13:28<10:49:12, 3.46it/s] 64%|██████▎ | 236547/371472 [8:13:29<10:56:56, 3.42it/s] 64%|██████▎ | 236548/371472 [8:13:29<10:39:44, 3.52it/s] 64%|██████▎ | 236549/371472 [8:13:29<11:08:01, 3.37it/s] 64%|██████▎ | 236550/371472 [8:13:30<11:40:53, 3.21it/s] 64%|██████▎ | 236551/371472 [8:13:30<11:31:01, 3.25it/s] 64%|██████▎ | 236552/371472 [8:13:30<11:18:20, 3.31it/s] 64%|██████▎ | 236553/371472 [8:13:30<11:05:36, 3.38it/s] 64%|██████▎ | 236554/371472 [8:13:31<11:21:06, 3.30it/s] 64%|██████▎ | 236555/371472 [8:13:31<10:58:27, 3.41it/s] 64%|██████▎ | 236556/371472 [8:13:31<10:57:10, 3.42it/s] 64%|██████▎ | 236557/371472 [8:13:32<10:36:24, 3.53it/s] 64%|██████▎ | 236558/371472 [8:13:32<10:46:33, 3.48it/s] 64%|██████▎ | 236559/371472 [8:13:32<10:58:10, 3.42it/s] 64%|██████▎ | 236560/371472 [8:13:33<11:10:01, 3.36it/s] {'loss': 2.6441, 'learning_rate': 4.270400137904286e-07, 'epoch': 10.19} + 64%|██████▎ | 236560/371472 [8:13:33<11:10:01, 3.36it/s] 64%|██████▎ | 236561/371472 [8:13:33<11:23:42, 3.29it/s] 64%|██████▎ | 236562/371472 [8:13:33<11:20:38, 3.30it/s] 64%|██████▎ | 236563/371472 [8:13:33<11:07:56, 3.37it/s] 64%|██████▎ | 236564/371472 [8:13:34<11:48:54, 3.17it/s] 64%|██████▎ | 236565/371472 [8:13:34<11:29:36, 3.26it/s] 64%|██████▎ | 236566/371472 [8:13:34<11:18:18, 3.31it/s] 64%|██████▎ | 236567/371472 [8:13:35<11:08:12, 3.36it/s] 64%|██████▎ | 236568/371472 [8:13:35<11:05:41, 3.38it/s] 64%|██████▎ | 236569/371472 [8:13:35<11:24:36, 3.28it/s] 64%|██████▎ | 236570/371472 [8:13:36<12:06:35, 3.09it/s] 64%|██████▎ | 236571/371472 [8:13:36<12:40:50, 2.96it/s] 64%|██████▎ | 236572/371472 [8:13:36<12:50:37, 2.92it/s] 64%|██████▎ | 236573/371472 [8:13:37<13:06:42, 2.86it/s] 64%|██████▎ | 236574/371472 [8:13:37<13:02:28, 2.87it/s] 64%|██████▎ | 236575/371472 [8:13:37<12:52:21, 2.91it/s] 64%|██████▎ | 236576/371472 [8:13:38<12:10:26, 3.08it/s] 64%|██████▎ | 236577/371472 [8:13:38<12:17:09, 3.05it/s] 64%|██████▎ | 236578/371472 [8:13:38<11:53:11, 3.15it/s] 64%|██████▎ | 236579/371472 [8:13:39<11:25:48, 3.28it/s] 64%|██████▎ | 236580/371472 [8:13:39<11:05:29, 3.38it/s] {'loss': 2.6054, 'learning_rate': 4.269915318149496e-07, 'epoch': 10.19} + 64%|██████▎ | 236580/371472 [8:13:39<11:05:29, 3.38it/s] 64%|██████▎ | 236581/371472 [8:13:39<10:56:16, 3.43it/s] 64%|██████▎ | 236582/371472 [8:13:39<10:55:49, 3.43it/s] 64%|██████▎ | 236583/371472 [8:13:40<11:11:39, 3.35it/s] 64%|██████▎ | 236584/371472 [8:13:40<11:18:34, 3.31it/s] 64%|██████▎ | 236585/371472 [8:13:40<10:59:55, 3.41it/s] 64%|██████▎ | 236586/371472 [8:13:41<10:47:09, 3.47it/s] 64%|██████▎ | 236587/371472 [8:13:41<10:43:08, 3.50it/s] 64%|██████▎ | 236588/371472 [8:13:41<11:39:01, 3.22it/s] 64%|██████▎ | 236589/371472 [8:13:42<11:12:05, 3.34it/s] 64%|██████▎ | 236590/371472 [8:13:42<11:03:27, 3.39it/s] 64%|██████▎ | 236591/371472 [8:13:42<11:10:24, 3.35it/s] 64%|██████▎ | 236592/371472 [8:13:42<11:28:37, 3.26it/s] 64%|██████▎ | 236593/371472 [8:13:43<11:06:41, 3.37it/s] 64%|██████▎ | 236594/371472 [8:13:43<11:02:51, 3.39it/s] 64%|██████▎ | 236595/371472 [8:13:43<10:58:08, 3.42it/s] 64%|██████▎ | 236596/371472 [8:13:44<11:12:20, 3.34it/s] 64%|██████▎ | 236597/371472 [8:13:44<10:59:53, 3.41it/s] 64%|██████▎ | 236598/371472 [8:13:44<10:51:12, 3.45it/s] 64%|██████▎ | 236599/371472 [8:13:44<10:46:43, 3.48it/s] 64%|██████▎ | 236600/371472 [8:13:45<10:51:35, 3.45it/s] {'loss': 2.9061, 'learning_rate': 4.2694304983947083e-07, 'epoch': 10.19} + 64%|██████▎ | 236600/371472 [8:13:45<10:51:35, 3.45it/s] 64%|██████▎ | 236601/371472 [8:13:45<10:53:27, 3.44it/s] 64%|██████▎ | 236602/371472 [8:13:45<10:52:44, 3.44it/s] 64%|██████▎ | 236603/371472 [8:13:46<10:42:36, 3.50it/s] 64%|██████▎ | 236604/371472 [8:13:46<10:44:49, 3.49it/s] 64%|██████▎ | 236605/371472 [8:13:46<11:13:50, 3.34it/s] 64%|██████▎ | 236606/371472 [8:13:46<10:53:12, 3.44it/s] 64%|██████▎ | 236607/371472 [8:13:47<10:42:38, 3.50it/s] 64%|██████▎ | 236608/371472 [8:13:47<10:40:34, 3.51it/s] 64%|██████▎ | 236609/371472 [8:13:47<10:51:22, 3.45it/s] 64%|██████▎ | 236610/371472 [8:13:48<11:01:47, 3.40it/s] 64%|██████▎ | 236611/371472 [8:13:48<11:03:22, 3.39it/s] 64%|██████▎ | 236612/371472 [8:13:48<11:04:05, 3.38it/s] 64%|██████▎ | 236613/371472 [8:13:49<10:57:58, 3.42it/s] 64%|██████▎ | 236614/371472 [8:13:49<11:18:50, 3.31it/s] 64%|██████▎ | 236615/371472 [8:13:49<11:26:27, 3.27it/s] 64%|██████▎ | 236616/371472 [8:13:49<11:19:14, 3.31it/s] 64%|██████▎ | 236617/371472 [8:13:50<11:14:31, 3.33it/s] 64%|██████▎ | 236618/371472 [8:13:50<11:12:04, 3.34it/s] 64%|██████▎ | 236619/371472 [8:13:50<11:05:07, 3.38it/s] 64%|██████▎ | 236620/371472 [8:13:51<10:50:37, 3.45it/s] {'loss': 2.7978, 'learning_rate': 4.2689456786399185e-07, 'epoch': 10.19} + 64%|██████▎ | 236620/371472 [8:13:51<10:50:37, 3.45it/s] 64%|██████▎ | 236621/371472 [8:13:51<10:43:54, 3.49it/s] 64%|██████▎ | 236622/371472 [8:13:51<10:57:24, 3.42it/s] 64%|██████▎ | 236623/371472 [8:13:52<11:00:59, 3.40it/s] 64%|██████▎ | 236624/371472 [8:13:52<11:12:33, 3.34it/s] 64%|██████▎ | 236625/371472 [8:13:52<10:56:37, 3.42it/s] 64%|██████▎ | 236626/371472 [8:13:52<10:46:57, 3.47it/s] 64%|██████▎ | 236627/371472 [8:13:53<10:36:30, 3.53it/s] 64%|██████▎ | 236628/371472 [8:13:53<10:29:15, 3.57it/s] 64%|██████▎ | 236629/371472 [8:13:53<11:13:40, 3.34it/s] 64%|██████▎ | 236630/371472 [8:13:54<11:01:00, 3.40it/s] 64%|██████▎ | 236631/371472 [8:13:54<10:45:54, 3.48it/s] 64%|██████▎ | 236632/371472 [8:13:54<10:46:18, 3.48it/s] 64%|██████▎ | 236633/371472 [8:13:54<10:56:15, 3.42it/s] 64%|██████▎ | 236634/371472 [8:13:55<11:16:17, 3.32it/s] 64%|██████▎ | 236635/371472 [8:13:55<11:07:50, 3.36it/s] 64%|██████▎ | 236636/371472 [8:13:55<11:00:56, 3.40it/s] 64%|██████▎ | 236637/371472 [8:13:56<10:44:44, 3.49it/s] 64%|██████▎ | 236638/371472 [8:13:56<10:40:31, 3.51it/s] 64%|██████▎ | 236639/371472 [8:13:56<11:28:43, 3.26it/s] 64%|██████▎ | 236640/371472 [8:13:56<10:59:57, 3.41it/s] {'loss': 2.6875, 'learning_rate': 4.26846085888513e-07, 'epoch': 10.19} + 64%|██████▎ | 236640/371472 [8:13:56<10:59:57, 3.41it/s] 64%|██████▎ | 236641/371472 [8:13:57<10:39:11, 3.52it/s] 64%|██████▎ | 236642/371472 [8:13:57<10:26:31, 3.59it/s] 64%|██████▎ | 236643/371472 [8:13:57<11:05:47, 3.38it/s] 64%|██████▎ | 236644/371472 [8:13:58<11:13:39, 3.34it/s] 64%|██████▎ | 236645/371472 [8:13:58<12:13:58, 3.06it/s] 64%|██████▎ | 236646/371472 [8:13:58<11:51:59, 3.16it/s] 64%|██████▎ | 236647/371472 [8:13:59<11:45:43, 3.18it/s] 64%|██████▎ | 236648/371472 [8:13:59<11:20:02, 3.30it/s] 64%|██████▎ | 236649/371472 [8:13:59<11:03:43, 3.39it/s] 64%|██████▎ | 236650/371472 [8:13:59<10:40:55, 3.51it/s] 64%|██████▎ | 236651/371472 [8:14:00<10:51:29, 3.45it/s] 64%|██████▎ | 236652/371472 [8:14:00<11:02:04, 3.39it/s] 64%|██████▎ | 236653/371472 [8:14:00<10:53:07, 3.44it/s] 64%|██████▎ | 236654/371472 [8:14:01<10:36:39, 3.53it/s] 64%|██████▎ | 236655/371472 [8:14:01<10:34:36, 3.54it/s] 64%|██████▎ | 236656/371472 [8:14:01<10:14:57, 3.65it/s] 64%|██████▎ | 236657/371472 [8:14:01<10:53:29, 3.44it/s] 64%|██████▎ | 236658/371472 [8:14:02<10:45:50, 3.48it/s] 64%|██████▎ | 236659/371472 [8:14:02<10:53:37, 3.44it/s] 64%|██████▎ | 236660/371472 [8:14:02<10:33:12, 3.55it/s] {'loss': 2.739, 'learning_rate': 4.267976039130341e-07, 'epoch': 10.19} + 64%|██████▎ | 236660/371472 [8:14:02<10:33:12, 3.55it/s] 64%|██████▎ | 236661/371472 [8:14:03<10:38:18, 3.52it/s] 64%|██████▎ | 236662/371472 [8:14:03<10:31:01, 3.56it/s] 64%|██████��� | 236663/371472 [8:14:03<11:42:40, 3.20it/s] 64%|██████▎ | 236664/371472 [8:14:04<11:08:39, 3.36it/s] 64%|██████▎ | 236665/371472 [8:14:04<10:57:47, 3.42it/s] 64%|██████▎ | 236666/371472 [8:14:04<10:34:36, 3.54it/s] 64%|██████▎ | 236667/371472 [8:14:04<10:28:16, 3.58it/s] 64%|██████▎ | 236668/371472 [8:14:05<10:39:42, 3.51it/s] 64%|██████▎ | 236669/371472 [8:14:05<10:54:33, 3.43it/s] 64%|██████▎ | 236670/371472 [8:14:05<10:57:54, 3.41it/s] 64%|██████▎ | 236671/371472 [8:14:06<11:02:22, 3.39it/s] 64%|██████▎ | 236672/371472 [8:14:06<10:45:25, 3.48it/s] 64%|██████▎ | 236673/371472 [8:14:06<10:37:48, 3.52it/s] 64%|██████▎ | 236674/371472 [8:14:06<11:21:04, 3.30it/s] 64%|██████▎ | 236675/371472 [8:14:07<11:11:39, 3.34it/s] 64%|██████▎ | 236676/371472 [8:14:07<11:47:57, 3.17it/s] 64%|██████▎ | 236677/371472 [8:14:07<12:18:53, 3.04it/s] 64%|██████▎ | 236678/371472 [8:14:08<11:53:26, 3.15it/s] 64%|██████▎ | 236679/371472 [8:14:08<11:38:22, 3.22it/s] 64%|██████▎ | 236680/371472 [8:14:08<11:16:45, 3.32it/s] {'loss': 2.7167, 'learning_rate': 4.267491219375552e-07, 'epoch': 10.19} + 64%|██████▎ | 236680/371472 [8:14:08<11:16:45, 3.32it/s] 64%|██████▎ | 236681/371472 [8:14:09<11:00:32, 3.40it/s] 64%|██████▎ | 236682/371472 [8:14:09<10:33:16, 3.55it/s] 64%|██████▎ | 236683/371472 [8:14:09<11:09:16, 3.36it/s] 64%|██████▎ | 236684/371472 [8:14:09<10:48:14, 3.47it/s] 64%|██████▎ | 236685/371472 [8:14:10<10:50:02, 3.46it/s] 64%|██████▎ | 236686/371472 [8:14:10<11:19:16, 3.31it/s] 64%|██████▎ | 236687/371472 [8:14:10<10:52:30, 3.44it/s] 64%|██████▎ | 236688/371472 [8:14:11<10:31:19, 3.56it/s] 64%|██████▎ | 236689/371472 [8:14:11<10:15:03, 3.65it/s] 64%|██████▎ | 236690/371472 [8:14:11<11:06:16, 3.37it/s] 64%|██████▎ | 236691/371472 [8:14:11<11:01:05, 3.40it/s] 64%|██████▎ | 236692/371472 [8:14:12<11:05:20, 3.38it/s] 64%|██████▎ | 236693/371472 [8:14:12<10:57:35, 3.42it/s] 64%|██████▎ | 236694/371472 [8:14:12<11:01:57, 3.39it/s] 64%|██████▎ | 236695/371472 [8:14:13<11:42:36, 3.20it/s] 64%|██████▎ | 236696/371472 [8:14:13<11:07:33, 3.36it/s] 64%|██████▎ | 236697/371472 [8:14:13<11:30:35, 3.25it/s] 64%|██████▎ | 236698/371472 [8:14:14<10:55:01, 3.43it/s] 64%|██████▎ | 236699/371472 [8:14:14<10:39:07, 3.51it/s] 64%|██████▎ | 236700/371472 [8:14:14<10:37:19, 3.52it/s] {'loss': 2.7461, 'learning_rate': 4.267006399620763e-07, 'epoch': 10.2} + 64%|██████▎ | 236700/371472 [8:14:14<10:37:19, 3.52it/s] 64%|██████▎ | 236701/371472 [8:14:14<10:43:38, 3.49it/s] 64%|██████▎ | 236702/371472 [8:14:15<10:21:31, 3.61it/s] 64%|██████▎ | 236703/371472 [8:14:15<10:18:42, 3.63it/s] 64%|██████▎ | 236704/371472 [8:14:15<10:20:01, 3.62it/s] 64%|██████▎ | 236705/371472 [8:14:16<10:36:39, 3.53it/s] 64%|██████▎ | 236706/371472 [8:14:16<10:31:54, 3.55it/s] 64%|██████▎ | 236707/371472 [8:14:16<10:35:55, 3.53it/s] 64%|██████▎ | 236708/371472 [8:14:16<10:30:43, 3.56it/s] 64%|██████▎ | 236709/371472 [8:14:17<10:56:49, 3.42it/s] 64%|██████▎ | 236710/371472 [8:14:17<10:59:51, 3.40it/s] 64%|██████▎ | 236711/371472 [8:14:17<10:41:05, 3.50it/s] 64%|██████▎ | 236712/371472 [8:14:18<10:44:42, 3.48it/s] 64%|██████▎ | 236713/371472 [8:14:18<10:37:57, 3.52it/s] 64%|██████▎ | 236714/371472 [8:14:18<10:23:16, 3.60it/s] 64%|██████▎ | 236715/371472 [8:14:18<10:29:27, 3.57it/s] 64%|██████▎ | 236716/371472 [8:14:19<11:10:31, 3.35it/s] 64%|██████▎ | 236717/371472 [8:14:19<10:53:18, 3.44it/s] 64%|██████▎ | 236718/371472 [8:14:19<11:18:04, 3.31it/s] 64%|██████▎ | 236719/371472 [8:14:20<11:04:04, 3.38it/s] 64%|██████▎ | 236720/371472 [8:14:20<10:49:14, 3.46it/s] {'loss': 2.8194, 'learning_rate': 4.2665215798659736e-07, 'epoch': 10.2} + 64%|██████▎ | 236720/371472 [8:14:20<10:49:14, 3.46it/s] 64%|██████▎ | 236721/371472 [8:14:20<12:32:12, 2.99it/s] 64%|██████▎ | 236722/371472 [8:14:21<11:59:00, 3.12it/s] 64%|██���███▎ | 236723/371472 [8:14:21<11:41:22, 3.20it/s] 64%|██████▎ | 236724/371472 [8:14:21<11:37:49, 3.22it/s] 64%|██████▎ | 236725/371472 [8:14:22<11:58:08, 3.13it/s] 64%|██████▎ | 236726/371472 [8:14:22<11:28:15, 3.26it/s] 64%|██████▎ | 236727/371472 [8:14:22<11:28:19, 3.26it/s] 64%|██████▎ | 236728/371472 [8:14:22<11:18:34, 3.31it/s] 64%|██████▎ | 236729/371472 [8:14:23<11:00:21, 3.40it/s] 64%|██████▎ | 236730/371472 [8:14:23<11:15:43, 3.32it/s] 64%|██████▎ | 236731/371472 [8:14:23<11:15:59, 3.32it/s] 64%|██████▎ | 236732/371472 [8:14:24<11:28:11, 3.26it/s] 64%|██████▎ | 236733/371472 [8:14:24<12:01:01, 3.11it/s] 64%|██████▎ | 236734/371472 [8:14:24<11:42:02, 3.20it/s] 64%|██████▎ | 236735/371472 [8:14:25<11:45:52, 3.18it/s] 64%|██████▎ | 236736/371472 [8:14:25<11:16:39, 3.32it/s] 64%|██████▎ | 236737/371472 [8:14:25<10:59:21, 3.41it/s] 64%|██████▎ | 236738/371472 [8:14:25<10:57:25, 3.42it/s] 64%|██████▎ | 236739/371472 [8:14:26<10:41:11, 3.50it/s] 64%|██████▎ | 236740/371472 [8:14:26<11:32:09, 3.24it/s] {'loss': 2.5267, 'learning_rate': 4.2660367601111854e-07, 'epoch': 10.2} + 64%|██████▎ | 236740/371472 [8:14:26<11:32:09, 3.24it/s] 64%|██████▎ | 236741/371472 [8:14:26<11:08:26, 3.36it/s] 64%|██████▎ | 236742/371472 [8:14:27<10:45:52, 3.48it/s] 64%|██████▎ | 236743/371472 [8:14:27<10:49:44, 3.46it/s] 64%|██████▎ | 236744/371472 [8:14:27<11:04:29, 3.38it/s] 64%|██████▎ | 236745/371472 [8:14:27<11:18:19, 3.31it/s] 64%|██████▎ | 236746/371472 [8:14:28<11:12:23, 3.34it/s] 64%|██████▎ | 236747/371472 [8:14:28<10:57:51, 3.41it/s] 64%|██████▎ | 236748/371472 [8:14:28<10:51:31, 3.45it/s] 64%|██████▎ | 236749/371472 [8:14:29<10:36:02, 3.53it/s] 64%|██████▎ | 236750/371472 [8:14:29<10:36:38, 3.53it/s] 64%|██████▎ | 236751/371472 [8:14:29<10:31:52, 3.55it/s] 64%|██████▎ | 236752/371472 [8:14:30<11:18:27, 3.31it/s] 64%|██████▎ | 236753/371472 [8:14:30<11:05:10, 3.38it/s] 64%|██████▎ | 236754/371472 [8:14:30<11:45:30, 3.18it/s] 64%|██████▎ | 236755/371472 [8:14:30<11:55:43, 3.14it/s] 64%|██████▎ | 236756/371472 [8:14:31<12:31:51, 2.99it/s] 64%|██████▎ | 236757/371472 [8:14:31<11:46:25, 3.18it/s] 64%|██████▎ | 236758/371472 [8:14:31<11:17:24, 3.31it/s] 64%|██████▎ | 236759/371472 [8:14:32<10:50:19, 3.45it/s] 64%|██████▎ | 236760/371472 [8:14:32<11:31:42, 3.25it/s] {'loss': 2.7588, 'learning_rate': 4.2655519403563956e-07, 'epoch': 10.2} + 64%|██████▎ | 236760/371472 [8:14:32<11:31:42, 3.25it/s] 64%|██████▎ | 236761/371472 [8:14:32<11:15:56, 3.32it/s] 64%|██████▎ | 236762/371472 [8:14:33<10:59:05, 3.41it/s] 64%|██████▎ | 236763/371472 [8:14:33<11:26:02, 3.27it/s] 64%|██████▎ | 236764/371472 [8:14:33<11:25:41, 3.27it/s] 64%|██████▎ | 236765/371472 [8:14:33<11:00:26, 3.40it/s] 64%|██████▎ | 236766/371472 [8:14:34<10:54:48, 3.43it/s] 64%|██████▎ | 236767/371472 [8:14:34<10:53:04, 3.44it/s] 64%|██████▎ | 236768/371472 [8:14:34<11:15:48, 3.32it/s] 64%|██████▎ | 236769/371472 [8:14:35<10:50:49, 3.45it/s] 64%|██████▎ | 236770/371472 [8:14:35<10:39:58, 3.51it/s] 64%|██████▎ | 236771/371472 [8:14:35<10:41:32, 3.50it/s] 64%|██████▎ | 236772/371472 [8:14:36<11:10:11, 3.35it/s] 64%|██████▎ | 236773/371472 [8:14:36<10:49:39, 3.46it/s] 64%|██████▎ | 236774/371472 [8:14:36<11:10:29, 3.35it/s] 64%|██████▎ | 236775/371472 [8:14:36<11:00:43, 3.40it/s] 64%|██████▎ | 236776/371472 [8:14:37<10:56:54, 3.42it/s] 64%|██████▎ | 236777/371472 [8:14:37<10:53:12, 3.44it/s] 64%|██████▎ | 236778/371472 [8:14:37<11:52:32, 3.15it/s] 64%|██████▎ | 236779/371472 [8:14:38<12:11:03, 3.07it/s] 64%|██████▎ | 236780/371472 [8:14:38<11:48:56, 3.17it/s] {'loss': 2.7631, 'learning_rate': 4.2650671206016074e-07, 'epoch': 10.2} + 64%|██████▎ | 236780/371472 [8:14:38<11:48:56, 3.17it/s] 64%|██████▎ | 236781/371472 [8:14:38<11:39:46, 3.21it/s] 64%|██████▎ | 236782/371472 [8:14:39<11:26:19, 3.27it/s] 64%|██████▎ | 236783/371472 [8:14:39<11:01:13, 3.39it/s] 64%|██████▎ | 236784/371472 [8:14:39<11:28:43, 3.26it/s] 64%|██████▎ | 236785/371472 [8:14:40<11:32:54, 3.24it/s] 64%|██████▎ | 236786/371472 [8:14:40<11:30:02, 3.25it/s] 64%|██████▎ | 236787/371472 [8:14:40<12:35:16, 2.97it/s] 64%|██████▎ | 236788/371472 [8:14:41<11:53:56, 3.14it/s] 64%|██████▎ | 236789/371472 [8:14:41<11:29:06, 3.26it/s] 64%|██████▎ | 236790/371472 [8:14:41<11:11:28, 3.34it/s] 64%|██████▎ | 236791/371472 [8:14:41<11:06:53, 3.37it/s] 64%|██████▎ | 236792/371472 [8:14:42<11:38:43, 3.21it/s] 64%|██████▎ | 236793/371472 [8:14:42<11:27:56, 3.26it/s] 64%|██████▎ | 236794/371472 [8:14:42<11:57:20, 3.13it/s] 64%|██████▎ | 236795/371472 [8:14:43<11:48:23, 3.17it/s] 64%|██████▎ | 236796/371472 [8:14:43<11:20:15, 3.30it/s] 64%|██████▎ | 236797/371472 [8:14:43<11:06:25, 3.37it/s] 64%|██████▎ | 236798/371472 [8:14:44<11:20:33, 3.30it/s] 64%|██████▎ | 236799/371472 [8:14:44<11:18:05, 3.31it/s] 64%|██████▎ | 236800/371472 [8:14:44<11:07:29, 3.36it/s] {'loss': 2.6696, 'learning_rate': 4.264582300846818e-07, 'epoch': 10.2} + 64%|██████▎ | 236800/371472 [8:14:44<11:07:29, 3.36it/s] 64%|██████▎ | 236801/371472 [8:14:44<11:02:15, 3.39it/s] 64%|██████▎ | 236802/371472 [8:14:45<12:48:31, 2.92it/s] 64%|██████▎ | 236803/371472 [8:14:45<12:01:55, 3.11it/s] 64%|██████▎ | 236804/371472 [8:14:45<11:37:25, 3.22it/s] 64%|██████▎ | 236805/371472 [8:14:46<11:19:13, 3.30it/s] 64%|██████▎ | 236806/371472 [8:14:46<11:23:34, 3.28it/s] 64%|██████▎ | 236807/371472 [8:14:46<11:05:10, 3.37it/s] 64%|██████▎ | 236808/371472 [8:14:47<11:02:04, 3.39it/s] 64%|██████▎ | 236809/371472 [8:14:47<11:18:17, 3.31it/s] 64%|██████▎ | 236810/371472 [8:14:47<12:52:57, 2.90it/s] 64%|██████▎ | 236811/371472 [8:14:48<12:48:58, 2.92it/s] 64%|██████▎ | 236812/371472 [8:14:48<12:04:15, 3.10it/s] 64%|██████▎ | 236813/371472 [8:14:48<11:40:17, 3.20it/s] 64%|██████▍ | 236814/371472 [8:14:49<11:08:24, 3.36it/s] 64%|██████▍ | 236815/371472 [8:14:49<11:00:34, 3.40it/s] 64%|██████▍ | 236816/371472 [8:14:49<10:34:31, 3.54it/s] 64%|██████▍ | 236817/371472 [8:14:49<10:35:38, 3.53it/s] 64%|██████▍ | 236818/371472 [8:14:50<10:33:57, 3.54it/s] 64%|██████▍ | 236819/371472 [8:14:50<10:46:57, 3.47it/s] 64%|██████▍ | 236820/371472 [8:14:50<11:14:40, 3.33it/s] {'loss': 2.7617, 'learning_rate': 4.2640974810920293e-07, 'epoch': 10.2} + 64%|██████▍ | 236820/371472 [8:14:50<11:14:40, 3.33it/s] 64%|██████▍ | 236821/371472 [8:14:51<11:10:08, 3.35it/s] 64%|██████▍ | 236822/371472 [8:14:51<11:15:50, 3.32it/s] 64%|██████▍ | 236823/371472 [8:14:51<11:28:31, 3.26it/s] 64%|██████▍ | 236824/371472 [8:14:51<11:25:12, 3.28it/s] 64%|██████▍ | 236825/371472 [8:14:52<11:17:55, 3.31it/s] 64%|██████▍ | 236826/371472 [8:14:52<10:56:04, 3.42it/s] 64%|██████▍ | 236827/371472 [8:14:52<10:54:10, 3.43it/s] 64%|██████▍ | 236828/371472 [8:14:53<10:55:58, 3.42it/s] 64%|██████▍ | 236829/371472 [8:14:53<10:54:24, 3.43it/s] 64%|██████▍ | 236830/371472 [8:14:53<11:09:04, 3.35it/s] 64%|██████▍ | 236831/371472 [8:14:53<10:51:51, 3.44it/s] 64%|██████▍ | 236832/371472 [8:14:54<11:05:54, 3.37it/s] 64%|██████▍ | 236833/371472 [8:14:54<11:25:55, 3.27it/s] 64%|██████▍ | 236834/371472 [8:14:54<11:14:43, 3.33it/s] 64%|██████▍ | 236835/371472 [8:14:55<11:31:20, 3.25it/s] 64%|██████▍ | 236836/371472 [8:14:55<11:05:54, 3.37it/s] 64%|██████▍ | 236837/371472 [8:14:55<11:01:58, 3.39it/s] 64%|██████▍ | 236838/371472 [8:14:56<10:47:23, 3.47it/s] 64%|██████▍ | 236839/371472 [8:14:56<10:54:38, 3.43it/s] 64%|██████▍ | 236840/371472 [8:14:56<11:01:35, 3.39it/s] {'loss': 2.726, 'learning_rate': 4.26361266133724e-07, 'epoch': 10.2} + 64%|██████▍ | 236840/371472 [8:14:56<11:01:35, 3.39it/s] 64%|██████▍ | 236841/371472 [8:14:56<11:17:54, 3.31it/s] 64%|██████▍ | 236842/371472 [8:14:57<11:04:45, 3.38it/s] 64%|██████▍ | 236843/371472 [8:14:57<11:28:31, 3.26it/s] 64%|██████▍ | 236844/371472 [8:14:57<11:34:36, 3.23it/s] 64%|██████▍ | 236845/371472 [8:14:58<11:20:29, 3.30it/s] 64%|██████▍ | 236846/371472 [8:14:58<10:56:06, 3.42it/s] 64%|██████▍ | 236847/371472 [8:14:58<10:53:03, 3.44it/s] 64%|██████▍ | 236848/371472 [8:14:59<11:00:48, 3.40it/s] 64%|██████▍ | 236849/371472 [8:14:59<10:49:32, 3.45it/s] 64%|██████▍ | 236850/371472 [8:14:59<10:51:12, 3.45it/s] 64%|██████▍ | 236851/371472 [8:14:59<10:51:49, 3.44it/s] 64%|██████▍ | 236852/371472 [8:15:00<11:09:51, 3.35it/s] 64%|██████▍ | 236853/371472 [8:15:00<11:17:08, 3.31it/s] 64%|██████▍ | 236854/371472 [8:15:00<11:42:11, 3.20it/s] 64%|██████▍ | 236855/371472 [8:15:01<11:08:35, 3.36it/s] 64%|██████▍ | 236856/371472 [8:15:01<11:01:07, 3.39it/s] 64%|██████▍ | 236857/371472 [8:15:01<11:19:12, 3.30it/s] 64%|██████▍ | 236858/371472 [8:15:02<11:04:08, 3.38it/s] 64%|██████▍ | 236859/371472 [8:15:02<10:46:14, 3.47it/s] 64%|██████▍ | 236860/371472 [8:15:02<11:10:41, 3.35it/s] {'loss': 2.8623, 'learning_rate': 4.263127841582452e-07, 'epoch': 10.2} + 64%|██████▍ | 236860/371472 [8:15:02<11:10:41, 3.35it/s] 64%|██████▍ | 236861/371472 [8:15:02<11:00:03, 3.40it/s] 64%|██████▍ | 236862/371472 [8:15:03<10:46:18, 3.47it/s] 64%|██████▍ | 236863/371472 [8:15:03<10:36:59, 3.52it/s] 64%|██████▍ | 236864/371472 [8:15:03<10:35:06, 3.53it/s] 64%|██████▍ | 236865/371472 [8:15:04<10:47:54, 3.46it/s] 64%|██████▍ | 236866/371472 [8:15:04<10:53:08, 3.43it/s] 64%|██████▍ | 236867/371472 [8:15:04<10:43:39, 3.49it/s] 64%|██████▍ | 236868/371472 [8:15:04<10:48:29, 3.46it/s] 64%|██████▍ | 236869/371472 [8:15:05<10:39:53, 3.51it/s] 64%|██████▍ | 236870/371472 [8:15:05<11:03:23, 3.38it/s] 64%|██████▍ | 236871/371472 [8:15:05<12:37:50, 2.96it/s] 64%|██████▍ | 236872/371472 [8:15:06<12:09:12, 3.08it/s] 64%|██████▍ | 236873/371472 [8:15:06<11:40:09, 3.20it/s] 64%|██████▍ | 236874/371472 [8:15:06<11:42:38, 3.19it/s] 64%|██████▍ | 236875/371472 [8:15:07<11:17:25, 3.31it/s] 64%|██████▍ | 236876/371472 [8:15:07<10:51:23, 3.44it/s] 64%|██████▍ | 236877/371472 [8:15:07<10:50:31, 3.45it/s] 64%|██████▍ | 236878/371472 [8:15:08<11:51:57, 3.15it/s] 64%|██████▍ | 236879/371472 [8:15:08<11:30:07, 3.25it/s] 64%|██████▍ | 236880/371472 [8:15:08<11:38:59, 3.21it/s] {'loss': 2.7234, 'learning_rate': 4.262643021827662e-07, 'epoch': 10.2} + 64%|██████▍ | 236880/371472 [8:15:08<11:38:59, 3.21it/s] 64%|██████▍ | 236881/371472 [8:15:08<11:11:02, 3.34it/s] 64%|██████▍ | 236882/371472 [8:15:09<11:02:31, 3.39it/s] 64%|██████▍ | 236883/371472 [8:15:09<11:09:35, 3.35it/s] 64%|██████▍ | 236884/371472 [8:15:09<11:25:23, 3.27it/s] 64%|██████▍ | 236885/371472 [8:15:10<11:33:50, 3.23it/s] 64%|██████▍ | 236886/371472 [8:15:10<11:24:13, 3.28it/s] 64%|██████▍ | 236887/371472 [8:15:10<11:33:56, 3.23it/s] 64%|██████▍ | 236888/371472 [8:15:11<11:12:18, 3.34it/s] 64%|██████▍ | 236889/371472 [8:15:11<11:27:35, 3.26it/s] 64%|██████▍ | 236890/371472 [8:15:11<11:18:52, 3.30it/s] 64%|██████▍ | 236891/371472 [8:15:11<11:24:38, 3.28it/s] 64%|██████▍ | 236892/371472 [8:15:12<11:06:34, 3.36it/s] 64%|██████▍ | 236893/371472 [8:15:12<10:42:09, 3.49it/s] 64%|██████▍ | 236894/371472 [8:15:12<10:38:03, 3.52it/s] 64%|██████▍ | 236895/371472 [8:15:13<10:49:17, 3.45it/s] 64%|██████▍ | 236896/371472 [8:15:13<10:46:53, 3.47it/s] 64%|██████▍ | 236897/371472 [8:15:13<10:50:42, 3.45it/s] 64%|██████▍ | 236898/371472 [8:15:13<10:45:40, 3.47it/s] 64%|██████▍ | 236899/371472 [8:15:14<10:56:37, 3.42it/s] 64%|██████▍ | 236900/371472 [8:15:14<10:41:19, 3.50it/s] {'loss': 2.7692, 'learning_rate': 4.262158202072874e-07, 'epoch': 10.2} + 64%|██████▍ | 236900/371472 [8:15:14<10:41:19, 3.50it/s] 64%|██████▍ | 236901/371472 [8:15:14<11:00:30, 3.40it/s] 64%|██████▍ | 236902/371472 [8:15:15<11:06:16, 3.37it/s] 64%|██████▍ | 236903/371472 [8:15:15<10:45:35, 3.47it/s] 64%|██████▍ | 236904/371472 [8:15:15<11:20:54, 3.29it/s] 64%|██████▍ | 236905/371472 [8:15:16<11:05:30, 3.37it/s] 64%|██████▍ | 236906/371472 [8:15:16<11:11:18, 3.34it/s] 64%|██████▍ | 236907/371472 [8:15:16<11:11:33, 3.34it/s] 64%|██████▍ | 236908/371472 [8:15:16<11:15:03, 3.32it/s] 64%|██████▍ | 236909/371472 [8:15:17<10:59:19, 3.40it/s] 64%|██████▍ | 236910/371472 [8:15:17<10:43:32, 3.48it/s] 64%|██████▍ | 236911/371472 [8:15:17<10:53:32, 3.43it/s] 64%|██████▍ | 236912/371472 [8:15:18<11:24:32, 3.28it/s] 64%|██████▍ | 236913/371472 [8:15:18<11:09:26, 3.35it/s] 64%|██████▍ | 236914/371472 [8:15:18<11:21:32, 3.29it/s] 64%|██████▍ | 236915/371472 [8:15:19<11:35:44, 3.22it/s] 64%|██████▍ | 236916/371472 [8:15:19<11:04:41, 3.37it/s] 64%|██████▍ | 236917/371472 [8:15:19<10:38:06, 3.51it/s] 64%|██████▍ | 236918/371472 [8:15:19<10:38:01, 3.51it/s] 64%|██████▍ | 236919/371472 [8:15:20<10:46:15, 3.47it/s] 64%|██████▍ | 236920/371472 [8:15:20<10:47:09, 3.47it/s] {'loss': 2.6683, 'learning_rate': 4.2616733823180845e-07, 'epoch': 10.2} + 64%|██████▍ | 236920/371472 [8:15:20<10:47:09, 3.47it/s] 64%|██████▍ | 236921/371472 [8:15:20<10:48:36, 3.46it/s] 64%|██████▍ | 236922/371472 [8:15:21<10:47:32, 3.46it/s] 64%|██████▍ | 236923/371472 [8:15:21<10:52:28, 3.44it/s] 64%|██████▍ | 236924/371472 [8:15:21<10:42:10, 3.49it/s] 64%|██████▍ | 236925/371472 [8:15:21<10:34:09, 3.54it/s] 64%|██████▍ | 236926/371472 [8:15:22<11:05:05, 3.37it/s] 64%|██████▍ | 236927/371472 [8:15:22<11:07:55, 3.36it/s] 64%|██████▍ | 236928/371472 [8:15:22<11:16:23, 3.32it/s] 64%|██████▍ | 236929/371472 [8:15:23<11:02:03, 3.39it/s] 64%|██████▍ | 236930/371472 [8:15:23<10:48:26, 3.46it/s] 64%|██████▍ | 236931/371472 [8:15:23<11:07:53, 3.36it/s] 64%|██████▍ | 236932/371472 [8:15:23<11:00:02, 3.40it/s] 64%|██████▍ | 236933/371472 [8:15:24<11:32:16, 3.24it/s] 64%|██████▍ | 236934/371472 [8:15:24<11:04:50, 3.37it/s] 64%|██████▍ | 236935/371472 [8:15:24<11:03:37, 3.38it/s] 64%|██████▍ | 236936/371472 [8:15:25<11:01:01, 3.39it/s] 64%|██████▍ | 236937/371472 [8:15:25<11:00:58, 3.39it/s] 64%|██████▍ | 236938/371472 [8:15:25<11:00:54, 3.39it/s] 64%|██████▍ | 236939/371472 [8:15:26<10:58:55, 3.40it/s] 64%|██████▍ | 236940/371472 [8:15:26<11:00:55, 3.39it/s] {'loss': 2.7187, 'learning_rate': 4.2611885625632957e-07, 'epoch': 10.21} + 64%|██████▍ | 236940/371472 [8:15:26<11:00:55, 3.39it/s] 64%|██████▍ | 236941/371472 [8:15:26<10:59:27, 3.40it/s] 64%|██████▍ | 236942/371472 [8:15:26<10:49:14, 3.45it/s] 64%|██████▍ | 236943/371472 [8:15:27<10:39:14, 3.51it/s] 64%|██████▍ | 236944/371472 [8:15:27<10:38:13, 3.51it/s] 64%|██████▍ | 236945/371472 [8:15:27<11:31:44, 3.24it/s] 64%|██████▍ | 236946/371472 [8:15:28<11:40:09, 3.20it/s] 64%|██████▍ | 236947/371472 [8:15:28<11:11:54, 3.34it/s] 64%|██████▍ | 236948/371472 [8:15:28<11:16:48, 3.31it/s] 64%|██████▍ | 236949/371472 [8:15:29<11:01:09, 3.39it/s] 64%|██████▍ | 236950/371472 [8:15:29<10:48:54, 3.46it/s] 64%|██████▍ | 236951/371472 [8:15:29<10:46:06, 3.47it/s] 64%|██████▍ | 236952/371472 [8:15:29<10:45:53, 3.47it/s] 64%|██████▍ | 236953/371472 [8:15:30<11:09:45, 3.35it/s] 64%|██████▍ | 236954/371472 [8:15:30<10:53:31, 3.43it/s] 64%|██████▍ | 236955/371472 [8:15:30<11:16:19, 3.31it/s] 64%|██████▍ | 236956/371472 [8:15:31<11:05:31, 3.37it/s] 64%|██████▍ | 236957/371472 [8:15:31<11:45:27, 3.18it/s] 64%|██████▍ | 236958/371472 [8:15:31<11:35:07, 3.23it/s] 64%|██████▍ | 236959/371472 [8:15:32<11:32:19, 3.24it/s] 64%|██████▍ | 236960/371472 [8:15:32<12:17:52, 3.04it/s] {'loss': 2.7769, 'learning_rate': 4.2607037428085064e-07, 'epoch': 10.21} + 64%|██████▍ | 236960/371472 [8:15:32<12:17:52, 3.04it/s] 64%|██████▍ | 236961/371472 [8:15:32<11:57:54, 3.12it/s] 64%|██████▍ | 236962/371472 [8:15:33<11:25:38, 3.27it/s] 64%|██████▍ | 236963/371472 [8:15:33<11:12:54, 3.33it/s] 64%|██████▍ | 236964/371472 [8:15:33<11:32:51, 3.24it/s] 64%|██████▍ | 236965/371472 [8:15:33<11:18:46, 3.30it/s] 64%|██████▍ | 236966/371472 [8:15:34<10:50:28, 3.45it/s] 64%|██████▍ | 236967/371472 [8:15:34<10:29:16, 3.56it/s] 64%|██████▍ | 236968/371472 [8:15:34<10:32:22, 3.54it/s] 64%|██████▍ | 236969/371472 [8:15:34<10:27:12, 3.57it/s] 64%|██████▍ | 236970/371472 [8:15:35<10:24:22, 3.59it/s] 64%|██████▍ | 236971/371472 [8:15:35<10:39:33, 3.51it/s] 64%|██████▍ | 236972/371472 [8:15:35<10:34:09, 3.53it/s] 64%|██████▍ | 236973/371472 [8:15:36<10:28:04, 3.57it/s] 64%|██████▍ | 236974/371472 [8:15:36<10:22:32, 3.60it/s] 64%|██████▍ | 236975/371472 [8:15:36<10:31:36, 3.55it/s] 64%|██████▍ | 236976/371472 [8:15:36<10:36:27, 3.52it/s] 64%|██████▍ | 236977/371472 [8:15:37<10:37:17, 3.52it/s] 64%|██████▍ | 236978/371472 [8:15:37<10:40:31, 3.50it/s] 64%|██████▍ | 236979/371472 [8:15:37<11:05:55, 3.37it/s] 64%|██████▍ | 236980/371472 [8:15:38<11:14:56, 3.32it/s] {'loss': 2.6797, 'learning_rate': 4.260218923053718e-07, 'epoch': 10.21} + 64%|██████▍ | 236980/371472 [8:15:38<11:14:56, 3.32it/s] 64%|██████▍ | 236981/371472 [8:15:38<11:12:54, 3.33it/s] 64%|██████▍ | 236982/371472 [8:15:38<10:58:15, 3.41it/s] 64%|██████▍ | 236983/371472 [8:15:39<10:56:00, 3.42it/s] 64%|██████▍ | 236984/371472 [8:15:39<10:56:28, 3.41it/s] 64%|██████▍ | 236985/371472 [8:15:39<11:38:42, 3.21it/s] 64%|██████▍ | 236986/371472 [8:15:39<11:02:59, 3.38it/s] 64%|██████▍ | 236987/371472 [8:15:40<10:30:47, 3.55it/s] 64%|██████▍ | 236988/371472 [8:15:40<10:25:44, 3.58it/s] 64%|██████▍ | 236989/371472 [8:15:40<11:48:19, 3.16it/s] 64%|██████▍ | 236990/371472 [8:15:41<11:31:10, 3.24it/s] 64%|██████▍ | 236991/371472 [8:15:41<11:18:58, 3.30it/s] 64%|██████▍ | 236992/371472 [8:15:41<11:36:32, 3.22it/s] 64%|██████▍ | 236993/371472 [8:15:42<11:23:14, 3.28it/s] 64%|██████▍ | 236994/371472 [8:15:42<11:35:17, 3.22it/s] 64%|██████▍ | 236995/371472 [8:15:42<11:18:59, 3.30it/s] 64%|██████▍ | 236996/371472 [8:15:42<10:58:37, 3.40it/s] 64%|██████▍ | 236997/371472 [8:15:43<10:42:10, 3.49it/s] 64%|██████▍ | 236998/371472 [8:15:43<10:48:27, 3.46it/s] 64%|██████▍ | 236999/371472 [8:15:43<11:03:37, 3.38it/s] 64%|██████▍ | 237000/371472 [8:15:44<10:41:17, 3.49it/s] {'loss': 2.6933, 'learning_rate': 4.259734103298929e-07, 'epoch': 10.21} + 64%|██████▍ | 237000/371472 [8:15:44<10:41:17, 3.49it/s] 64%|██████▍ | 237001/371472 [8:15:44<10:36:33, 3.52it/s] 64%|██████▍ | 237002/371472 [8:15:44<10:31:50, 3.55it/s] 64%|██████▍ | 237003/371472 [8:15:45<11:38:11, 3.21it/s] 64%|██████▍ | 237004/371472 [8:15:45<11:39:09, 3.21it/s] 64%|██████▍ | 237005/371472 [8:15:45<12:05:05, 3.09it/s] 64%|██████▍ | 237006/371472 [8:15:45<11:35:06, 3.22it/s] 64%|██████▍ | 237007/371472 [8:15:46<11:13:24, 3.33it/s] 64%|██████▍ | 237008/371472 [8:15:46<10:49:39, 3.45it/s] 64%|██████▍ | 237009/371472 [8:15:46<11:04:22, 3.37it/s] 64%|██████▍ | 237010/371472 [8:15:47<10:54:15, 3.43it/s] 64%|██████▍ | 237011/371472 [8:15:47<10:58:44, 3.40it/s] 64%|██████▍ | 237012/371472 [8:15:47<10:45:59, 3.47it/s] 64%|██████▍ | 237013/371472 [8:15:47<10:59:38, 3.40it/s] 64%|██████▍ | 237014/371472 [8:15:48<10:35:34, 3.53it/s] 64%|██████▍ | 237015/371472 [8:15:48<10:25:36, 3.58it/s] 64%|██████▍ | 237016/371472 [8:15:48<11:09:22, 3.35it/s] 64%|██████▍ | 237017/371472 [8:15:49<10:57:53, 3.41it/s] 64%|██████▍ | 237018/371472 [8:15:49<10:45:34, 3.47it/s] 64%|██████▍ | 237019/371472 [8:15:49<10:40:10, 3.50it/s] 64%|██████▍ | 237020/371472 [8:15:50<10:50:49, 3.44it/s] {'loss': 2.6775, 'learning_rate': 4.25924928354414e-07, 'epoch': 10.21} + 64%|██████▍ | 237020/371472 [8:15:50<10:50:49, 3.44it/s] 64%|██████▍ | 237021/371472 [8:15:50<10:38:04, 3.51it/s] 64%|██████▍ | 237022/371472 [8:15:50<10:45:11, 3.47it/s] 64%|██████▍ | 237023/371472 [8:15:50<10:36:26, 3.52it/s] 64%|██████▍ | 237024/371472 [8:15:51<11:47:03, 3.17it/s] 64%|██████▍ | 237025/371472 [8:15:51<11:16:43, 3.31it/s] 64%|██████▍ | 237026/371472 [8:15:51<11:44:26, 3.18it/s] 64%|██████▍ | 237027/371472 [8:15:52<11:30:44, 3.24it/s] 64%|██████▍ | 237028/371472 [8:15:52<12:23:12, 3.01it/s] 64%|██████▍ | 237029/371472 [8:15:52<11:58:40, 3.12it/s] 64%|██████▍ | 237030/371472 [8:15:53<11:34:26, 3.23it/s] 64%|██████▍ | 237031/371472 [8:15:53<12:04:47, 3.09it/s] 64%|██████▍ | 237032/371472 [8:15:53<11:45:56, 3.17it/s] 64%|██████▍ | 237033/371472 [8:15:54<12:14:20, 3.05it/s] 64%|██████▍ | 237034/371472 [8:15:54<11:37:05, 3.21it/s] 64%|██████▍ | 237035/371472 [8:15:54<11:10:33, 3.34it/s] 64%|██████▍ | 237036/371472 [8:15:54<11:03:43, 3.38it/s] 64%|██████▍ | 237037/371472 [8:15:55<10:49:23, 3.45it/s] 64%|██████▍ | 237038/371472 [8:15:55<10:43:11, 3.48it/s] 64%|██████▍ | 237039/371472 [8:15:55<10:36:26, 3.52it/s] 64%|██████▍ | 237040/371472 [8:15:56<10:34:33, 3.53it/s] {'loss': 2.6683, 'learning_rate': 4.258764463789351e-07, 'epoch': 10.21} + 64%|██████▍ | 237040/371472 [8:15:56<10:34:33, 3.53it/s] 64%|██████▍ | 237041/371472 [8:15:56<10:26:42, 3.58it/s] 64%|██████▍ | 237042/371472 [8:15:56<10:33:02, 3.54it/s] 64%|██████▍ | 237043/371472 [8:15:56<10:32:18, 3.54it/s] 64%|██████▍ | 237044/371472 [8:15:57<10:36:55, 3.52it/s] 64%|██████▍ | 237045/371472 [8:15:57<11:09:13, 3.35it/s] 64%|██████▍ | 237046/371472 [8:15:57<10:51:51, 3.44it/s] 64%|██████▍ | 237047/371472 [8:15:58<11:11:26, 3.34it/s] 64%|██████▍ | 237048/371472 [8:15:58<11:11:32, 3.34it/s] 64%|██████▍ | 237049/371472 [8:15:58<10:58:46, 3.40it/s] 64%|██████▍ | 237050/371472 [8:15:59<11:03:38, 3.38it/s] 64%|██████▍ | 237051/371472 [8:15:59<11:09:02, 3.35it/s] 64%|██████▍ | 237052/371472 [8:15:59<11:06:20, 3.36it/s] 64%|██████▍ | 237053/371472 [8:15:59<11:34:34, 3.23it/s] 64%|██████▍ | 237054/371472 [8:16:00<11:09:29, 3.35it/s] 64%|██████▍ | 237055/371472 [8:16:00<11:23:38, 3.28it/s] 64%|██████▍ | 237056/371472 [8:16:00<11:12:38, 3.33it/s] 64%|██████▍ | 237057/371472 [8:16:01<11:26:14, 3.26it/s] 64%|██████▍ | 237058/371472 [8:16:01<11:12:30, 3.33it/s] 64%|██████▍ | 237059/371472 [8:16:01<10:50:45, 3.44it/s] 64%|██████▍ | 237060/371472 [8:16:01<10:49:48, 3.45it/s] {'loss': 2.8644, 'learning_rate': 4.2582796440345627e-07, 'epoch': 10.21} + 64%|██████▍ | 237060/371472 [8:16:01<10:49:48, 3.45it/s] 64%|██████▍ | 237061/371472 [8:16:02<10:38:01, 3.51it/s] 64%|██████▍ | 237062/371472 [8:16:02<10:36:25, 3.52it/s] 64%|██████▍ | 237063/371472 [8:16:02<10:27:21, 3.57it/s] 64%|██████▍ | 237064/371472 [8:16:03<10:49:32, 3.45it/s] 64%|██████▍ | 237065/371472 [8:16:03<10:52:35, 3.43it/s] 64%|██████▍ | 237066/371472 [8:16:03<11:22:00, 3.28it/s] 64%|██████▍ | 237067/371472 [8:16:04<11:18:16, 3.30it/s] 64%|██████▍ | 237068/371472 [8:16:04<10:58:26, 3.40it/s] 64%|██████▍ | 237069/371472 [8:16:04<11:33:50, 3.23it/s] 64%|██████▍ | 237070/371472 [8:16:04<11:23:18, 3.28it/s] 64%|██████▍ | 237071/371472 [8:16:05<11:07:07, 3.36it/s] 64%|██████▍ | 237072/371472 [8:16:05<11:10:50, 3.34it/s] 64%|██████▍ | 237073/371472 [8:16:05<11:00:39, 3.39it/s] 64%|██████▍ | 237074/371472 [8:16:06<10:52:37, 3.43it/s] 64%|██████▍ | 237075/371472 [8:16:06<11:01:26, 3.39it/s] 64%|██████▍ | 237076/371472 [8:16:06<11:02:18, 3.38it/s] 64%|██████▍ | 237077/371472 [8:16:06<10:41:38, 3.49it/s] 64%|██████▍ | 237078/371472 [8:16:07<10:37:31, 3.51it/s] 64%|██████▍ | 237079/371472 [8:16:07<11:02:05, 3.38it/s] 64%|██████▍ | 237080/371472 [8:16:07<10:56:39, 3.41it/s] {'loss': 2.6605, 'learning_rate': 4.257794824279773e-07, 'epoch': 10.21} + 64%|██████▍ | 237080/371472 [8:16:07<10:56:39, 3.41it/s] 64%|██████▍ | 237081/371472 [8:16:08<10:47:45, 3.46it/s] 64%|██████▍ | 237082/371472 [8:16:08<10:45:38, 3.47it/s] 64%|██████▍ | 237083/371472 [8:16:08<10:41:57, 3.49it/s] 64%|██████▍ | 237084/371472 [8:16:09<10:44:57, 3.47it/s] 64%|██████▍ | 237085/371472 [8:16:09<11:16:17, 3.31it/s] 64%|██████▍ | 237086/371472 [8:16:09<11:15:59, 3.31it/s] 64%|██████▍ | 237087/371472 [8:16:09<11:12:35, 3.33it/s] 64%|██████▍ | 237088/371472 [8:16:10<10:55:58, 3.41it/s] 64%|██████▍ | 237089/371472 [8:16:10<10:52:11, 3.43it/s] 64%|██████▍ | 237090/371472 [8:16:10<10:45:23, 3.47it/s] 64%|██████▍ | 237091/371472 [8:16:11<11:12:20, 3.33it/s] 64%|██████▍ | 237092/371472 [8:16:11<11:45:52, 3.17it/s] 64%|██████▍ | 237093/371472 [8:16:11<11:34:44, 3.22it/s] 64%|██████▍ | 237094/371472 [8:16:12<11:11:55, 3.33it/s] 64%|██████▍ | 237095/371472 [8:16:12<11:02:40, 3.38it/s] 64%|██████▍ | 237096/371472 [8:16:12<11:46:37, 3.17it/s] 64%|██████▍ | 237097/371472 [8:16:13<11:50:51, 3.15it/s] 64%|██████▍ | 237098/371472 [8:16:13<11:39:46, 3.20it/s] 64%|██████▍ | 237099/371472 [8:16:13<11:55:11, 3.13it/s] 64%|██████▍ | 237100/371472 [8:16:13<11:30:48, 3.24it/s] {'loss': 2.6446, 'learning_rate': 4.257310004524985e-07, 'epoch': 10.21} + 64%|██████▍ | 237100/371472 [8:16:13<11:30:48, 3.24it/s] 64%|██████▍ | 237101/371472 [8:16:14<12:09:02, 3.07it/s] 64%|██████▍ | 237102/371472 [8:16:14<12:00:04, 3.11it/s] 64%|██████▍ | 237103/371472 [8:16:14<11:44:25, 3.18it/s] 64%|██████▍ | 237104/371472 [8:16:15<12:07:32, 3.08it/s] 64%|██████▍ | 237105/371472 [8:16:15<11:43:56, 3.18it/s] 64%|██████▍ | 237106/371472 [8:16:15<11:32:51, 3.23it/s] 64%|██████▍ | 237107/371472 [8:16:16<11:27:58, 3.26it/s] 64%|██████▍ | 237108/371472 [8:16:16<11:33:30, 3.23it/s] 64%|██████▍ | 237109/371472 [8:16:16<11:33:41, 3.23it/s] 64%|██████▍ | 237110/371472 [8:16:17<11:40:46, 3.20it/s] 64%|██████▍ | 237111/371472 [8:16:17<11:41:28, 3.19it/s] 64%|██████▍ | 237112/371472 [8:16:17<11:38:01, 3.21it/s] 64%|██████▍ | 237113/371472 [8:16:18<11:48:20, 3.16it/s] 64%|██████▍ | 237114/371472 [8:16:18<11:29:33, 3.25it/s] 64%|██████▍ | 237115/371472 [8:16:18<11:29:37, 3.25it/s] 64%|██████▍ | 237116/371472 [8:16:18<11:41:37, 3.19it/s] 64%|██████▍ | 237117/371472 [8:16:19<11:52:01, 3.14it/s] 64%|██████▍ | 237118/371472 [8:16:19<11:14:12, 3.32it/s] 64%|██████▍ | 237119/371472 [8:16:19<10:58:55, 3.40it/s] 64%|██████▍ | 237120/371472 [8:16:20<11:09:20, 3.35it/s] {'loss': 2.5867, 'learning_rate': 4.2568251847701953e-07, 'epoch': 10.21} + 64%|██████▍ | 237120/371472 [8:16:20<11:09:20, 3.35it/s] 64%|██████▍ | 237121/371472 [8:16:20<11:02:17, 3.38it/s] 64%|██████▍ | 237122/371472 [8:16:20<11:24:32, 3.27it/s] 64%|██████▍ | 237123/371472 [8:16:21<11:00:52, 3.39it/s] 64%|██████▍ | 237124/371472 [8:16:21<10:52:43, 3.43it/s] 64%|██████▍ | 237125/371472 [8:16:21<10:40:34, 3.50it/s] 64%|██████▍ | 237126/371472 [8:16:21<10:31:33, 3.55it/s] 64%|██████▍ | 237127/371472 [8:16:22<10:36:24, 3.52it/s] 64%|██████▍ | 237128/371472 [8:16:22<11:13:11, 3.33it/s] 64%|██████▍ | 237129/371472 [8:16:22<10:59:37, 3.39it/s] 64%|██████▍ | 237130/371472 [8:16:23<11:39:51, 3.20it/s] 64%|██████▍ | 237131/371472 [8:16:23<12:13:53, 3.05it/s] 64%|██████▍ | 237132/371472 [8:16:23<12:13:20, 3.05it/s] 64%|██████▍ | 237133/371472 [8:16:24<12:26:07, 3.00it/s] 64%|██████▍ | 237134/371472 [8:16:24<11:40:21, 3.20it/s] 64%|██████▍ | 237135/371472 [8:16:24<12:04:49, 3.09it/s] 64%|██████▍ | 237136/371472 [8:16:25<11:38:24, 3.21it/s] 64%|██████▍ | 237137/371472 [8:16:25<11:20:41, 3.29it/s] 64%|██████▍ | 237138/371472 [8:16:25<11:41:41, 3.19it/s] 64%|██████▍ | 237139/371472 [8:16:25<11:11:14, 3.34it/s] 64%|██████▍ | 237140/371472 [8:16:26<10:53:00, 3.43it/s] {'loss': 2.7821, 'learning_rate': 4.2563403650154066e-07, 'epoch': 10.21} + 64%|██████▍ | 237140/371472 [8:16:26<10:53:00, 3.43it/s] 64%|██████▍ | 237141/371472 [8:16:26<10:41:04, 3.49it/s] 64%|██████▍ | 237142/371472 [8:16:26<10:36:25, 3.52it/s] 64%|██████▍ | 237143/371472 [8:16:27<10:25:35, 3.58it/s] 64%|██████▍ | 237144/371472 [8:16:27<10:13:27, 3.65it/s] 64%|██████▍ | 237145/371472 [8:16:27<10:25:34, 3.58it/s] 64%|██████▍ | 237146/371472 [8:16:27<10:39:30, 3.50it/s] 64%|██████▍ | 237147/371472 [8:16:28<12:01:19, 3.10it/s] 64%|██████▍ | 237148/371472 [8:16:28<11:37:04, 3.21it/s] 64%|██████▍ | 237149/371472 [8:16:28<11:36:13, 3.22it/s] 64%|██████▍ | 237150/371472 [8:16:29<11:12:13, 3.33it/s] 64%|██████▍ | 237151/371472 [8:16:29<11:50:14, 3.15it/s] 64%|██████▍ | 237152/371472 [8:16:29<11:38:16, 3.21it/s] 64%|██████▍ | 237153/371472 [8:16:30<11:17:53, 3.30it/s] 64%|██████▍ | 237154/371472 [8:16:30<11:07:54, 3.35it/s] 64%|██████▍ | 237155/371472 [8:16:30<10:49:50, 3.44it/s] 64%|██████▍ | 237156/371472 [8:16:30<11:04:42, 3.37it/s] 64%|██████▍ | 237157/371472 [8:16:31<11:03:33, 3.37it/s] 64%|██████▍ | 237158/371472 [8:16:31<10:39:21, 3.50it/s] 64%|██████▍ | 237159/371472 [8:16:31<10:44:41, 3.47it/s] 64%|██████▍ | 237160/371472 [8:16:32<10:56:30, 3.41it/s] {'loss': 2.7165, 'learning_rate': 4.2558555452606173e-07, 'epoch': 10.21} + 64%|██████▍ | 237160/371472 [8:16:32<10:56:30, 3.41it/s] 64%|██████▍ | 237161/371472 [8:16:32<10:43:11, 3.48it/s] 64%|██████▍ | 237162/371472 [8:16:32<12:00:57, 3.10it/s] 64%|██████▍ | 237163/371472 [8:16:33<11:36:17, 3.21it/s] 64%|██████▍ | 237164/371472 [8:16:33<11:49:11, 3.16it/s] 64%|██████▍ | 237165/371472 [8:16:33<11:25:43, 3.26it/s] 64%|██████▍ | 237166/371472 [8:16:34<11:22:24, 3.28it/s] 64%|██████▍ | 237167/371472 [8:16:34<12:11:19, 3.06it/s] 64%|██████▍ | 237168/371472 [8:16:34<11:57:50, 3.12it/s] 64%|██████▍ | 237169/371472 [8:16:34<11:36:57, 3.21it/s] 64%|██████▍ | 237170/371472 [8:16:35<11:25:36, 3.26it/s] 64%|██████▍ | 237171/371472 [8:16:35<10:49:33, 3.45it/s] 64%|██████▍ | 237172/371472 [8:16:35<10:40:03, 3.50it/s] 64%|██████▍ | 237173/371472 [8:16:36<10:55:23, 3.42it/s] 64%|██████▍ | 237174/371472 [8:16:36<10:49:25, 3.45it/s] 64%|██████▍ | 237175/371472 [8:16:36<10:46:56, 3.46it/s] 64%|██████▍ | 237176/371472 [8:16:36<10:33:07, 3.54it/s] 64%|██████▍ | 237177/371472 [8:16:37<10:35:13, 3.52it/s] 64%|██████▍ | 237178/371472 [8:16:37<10:47:16, 3.46it/s] 64%|██████▍ | 237179/371472 [8:16:37<11:55:09, 3.13it/s] 64%|██████▍ | 237180/371472 [8:16:38<11:30:32, 3.24it/s] {'loss': 2.7909, 'learning_rate': 4.255370725505829e-07, 'epoch': 10.22} + 64%|██████▍ | 237180/371472 [8:16:38<11:30:32, 3.24it/s] 64%|██████▍ | 237181/371472 [8:16:38<11:23:23, 3.28it/s] 64%|██████▍ | 237182/371472 [8:16:38<11:07:13, 3.35it/s] 64%|██████▍ | 237183/371472 [8:16:39<10:58:49, 3.40it/s] 64%|██████▍ | 237184/371472 [8:16:39<10:50:46, 3.44it/s] 64%|██████▍ | 237185/371472 [8:16:39<11:03:15, 3.37it/s] 64%|██████▍ | 237186/371472 [8:16:39<10:51:13, 3.44it/s] 64%|██████▍ | 237187/371472 [8:16:40<10:55:47, 3.41it/s] 64%|██████▍ | 237188/371472 [8:16:40<10:56:22, 3.41it/s] 64%|██████▍ | 237189/371472 [8:16:40<11:09:45, 3.34it/s] 64%|██████▍ | 237190/371472 [8:16:41<11:29:53, 3.24it/s] 64%|██████▍ | 237191/371472 [8:16:41<11:00:26, 3.39it/s] 64%|██████▍ | 237192/371472 [8:16:41<11:09:07, 3.34it/s] 64%|██████▍ | 237193/371472 [8:16:42<11:07:44, 3.35it/s] 64%|██████▍ | 237194/371472 [8:16:42<10:56:07, 3.41it/s] 64%|██████▍ | 237195/371472 [8:16:42<11:33:26, 3.23it/s] 64%|██████▍ | 237196/371472 [8:16:42<11:03:17, 3.37it/s] 64%|██████▍ | 237197/371472 [8:16:43<10:58:54, 3.40it/s] 64%|██████▍ | 237198/371472 [8:16:43<10:49:22, 3.45it/s] 64%|██████▍ | 237199/371472 [8:16:43<10:49:37, 3.44it/s] 64%|██████▍ | 237200/371472 [8:16:44<11:23:00, 3.28it/s] {'loss': 2.6505, 'learning_rate': 4.254885905751039e-07, 'epoch': 10.22} + 64%|██████▍ | 237200/371472 [8:16:44<11:23:00, 3.28it/s] 64%|██████▍ | 237201/371472 [8:16:44<11:18:07, 3.30it/s] 64%|██████▍ | 237202/371472 [8:16:44<11:16:00, 3.31it/s] 64%|██████▍ | 237203/371472 [8:16:45<11:01:08, 3.38it/s] 64%|██████▍ | 237204/371472 [8:16:45<10:56:52, 3.41it/s] 64%|██████▍ | 237205/371472 [8:16:45<11:09:20, 3.34it/s] 64%|██████▍ | 237206/371472 [8:16:45<11:00:27, 3.39it/s] 64%|██████▍ | 237207/371472 [8:16:46<10:55:35, 3.41it/s] 64%|██████▍ | 237208/371472 [8:16:46<11:44:51, 3.17it/s] 64%|██████▍ | 237209/371472 [8:16:46<11:26:21, 3.26it/s] 64%|██████▍ | 237210/371472 [8:16:47<12:13:42, 3.05it/s] 64%|██████▍ | 237211/371472 [8:16:47<11:34:39, 3.22it/s] 64%|██████▍ | 237212/371472 [8:16:47<11:36:11, 3.21it/s] 64%|██████▍ | 237213/371472 [8:16:48<11:36:53, 3.21it/s] 64%|██████▍ | 237214/371472 [8:16:48<11:10:18, 3.34it/s] 64%|██████▍ | 237215/371472 [8:16:48<11:12:46, 3.33it/s] 64%|██████▍ | 237216/371472 [8:16:49<11:19:30, 3.29it/s] 64%|██████▍ | 237217/371472 [8:16:49<11:26:02, 3.26it/s] 64%|██████▍ | 237218/371472 [8:16:49<11:14:29, 3.32it/s] 64%|██████▍ | 237219/371472 [8:16:49<10:54:21, 3.42it/s] 64%|██████▍ | 237220/371472 [8:16:50<10:36:57, 3.51it/s] {'loss': 2.7775, 'learning_rate': 4.254401085996251e-07, 'epoch': 10.22} + 64%|██████▍ | 237220/371472 [8:16:50<10:36:57, 3.51it/s] 64%|██████▍ | 237221/371472 [8:16:50<11:18:25, 3.30it/s] 64%|██████▍ | 237222/371472 [8:16:50<11:05:15, 3.36it/s] 64%|██████▍ | 237223/371472 [8:16:51<10:49:17, 3.45it/s] 64%|██████▍ | 237224/371472 [8:16:51<10:52:31, 3.43it/s] 64%|██████▍ | 237225/371472 [8:16:51<10:57:00, 3.41it/s] 64%|██████▍ | 237226/371472 [8:16:51<11:14:58, 3.31it/s] 64%|██████▍ | 237227/371472 [8:16:52<11:27:50, 3.25it/s] 64%|██████▍ | 237228/371472 [8:16:52<11:14:50, 3.32it/s] 64%|██████▍ | 237229/371472 [8:16:52<10:57:01, 3.41it/s] 64%|██████▍ | 237230/371472 [8:16:53<10:46:02, 3.46it/s] 64%|██████▍ | 237231/371472 [8:16:53<10:48:09, 3.45it/s] 64%|██████▍ | 237232/371472 [8:16:53<10:36:50, 3.51it/s] 64%|██████▍ | 237233/371472 [8:16:53<10:41:48, 3.49it/s] 64%|██████▍ | 237234/371472 [8:16:54<10:54:26, 3.42it/s] 64%|██████▍ | 237235/371472 [8:16:54<11:08:30, 3.35it/s] 64%|██████▍ | 237236/371472 [8:16:54<10:51:14, 3.44it/s] 64%|██████▍ | 237237/371472 [8:16:55<11:03:47, 3.37it/s] 64%|██████▍ | 237238/371472 [8:16:55<11:04:52, 3.36it/s] 64%|██████▍ | 237239/371472 [8:16:55<11:18:48, 3.30it/s] 64%|██████▍ | 237240/371472 [8:16:56<11:20:24, 3.29it/s] {'loss': 2.8393, 'learning_rate': 4.253916266241462e-07, 'epoch': 10.22} + 64%|██████▍ | 237240/371472 [8:16:56<11:20:24, 3.29it/s] 64%|██████▍ | 237241/371472 [8:16:56<11:04:02, 3.37it/s] 64%|██████▍ | 237242/371472 [8:16:56<11:42:08, 3.19it/s] 64%|██████▍ | 237243/371472 [8:16:57<11:34:56, 3.22it/s] 64%|██████▍ | 237244/371472 [8:16:57<11:13:54, 3.32it/s] 64%|██████▍ | 237245/371472 [8:16:57<11:01:02, 3.38it/s] 64%|██████▍ | 237246/371472 [8:16:57<10:54:14, 3.42it/s] 64%|██████▍ | 237247/371472 [8:16:58<11:01:13, 3.38it/s] 64%|██████▍ | 237248/371472 [8:16:58<10:44:46, 3.47it/s] 64%|██████▍ | 237249/371472 [8:16:58<11:00:56, 3.38it/s] 64%|██████▍ | 237250/371472 [8:16:59<11:05:55, 3.36it/s] 64%|██████▍ | 237251/371472 [8:16:59<10:52:02, 3.43it/s] 64%|██████▍ | 237252/371472 [8:16:59<10:39:37, 3.50it/s] 64%|██████▍ | 237253/371472 [8:16:59<10:30:02, 3.55it/s] 64%|██████▍ | 237254/371472 [8:17:00<10:18:09, 3.62it/s] 64%|██████▍ | 237255/371472 [8:17:00<10:21:50, 3.60it/s] 64%|██████▍ | 237256/371472 [8:17:00<10:32:41, 3.54it/s] 64%|██████▍ | 237257/371472 [8:17:01<10:36:08, 3.52it/s] 64%|██████▍ | 237258/371472 [8:17:01<10:29:14, 3.55it/s] 64%|██████▍ | 237259/371472 [8:17:01<10:31:22, 3.54it/s] 64%|██████▍ | 237260/371472 [8:17:01<11:12:17, 3.33it/s] {'loss': 2.6272, 'learning_rate': 4.253431446486672e-07, 'epoch': 10.22} + 64%|██████▍ | 237260/371472 [8:17:01<11:12:17, 3.33it/s] 64%|██████▍ | 237261/371472 [8:17:02<10:54:41, 3.42it/s] 64%|██████▍ | 237262/371472 [8:17:02<11:21:08, 3.28it/s] 64%|██████▍ | 237263/371472 [8:17:02<11:14:25, 3.32it/s] 64%|██████▍ | 237264/371472 [8:17:03<12:29:41, 2.98it/s] 64%|██████▍ | 237265/371472 [8:17:03<11:54:19, 3.13it/s] 64%|██████▍ | 237266/371472 [8:17:03<12:04:53, 3.09it/s] 64%|██████▍ | 237267/371472 [8:17:04<12:39:49, 2.94it/s] 64%|██████▍ | 237268/371472 [8:17:04<12:13:12, 3.05it/s] 64%|██████▍ | 237269/371472 [8:17:04<11:54:03, 3.13it/s] 64%|██████▍ | 237270/371472 [8:17:05<11:24:43, 3.27it/s] 64%|██████▍ | 237271/371472 [8:17:05<12:01:55, 3.10it/s] 64%|██████▍ | 237272/371472 [8:17:05<11:44:58, 3.17it/s] 64%|██████▍ | 237273/371472 [8:17:06<11:44:39, 3.17it/s] 64%|██████▍ | 237274/371472 [8:17:06<11:47:05, 3.16it/s] 64%|██████▍ | 237275/371472 [8:17:06<11:43:05, 3.18it/s] 64%|██████▍ | 237276/371472 [8:17:07<11:21:13, 3.28it/s] 64%|██████▍ | 237277/371472 [8:17:07<10:58:04, 3.40it/s] 64%|██████▍ | 237278/371472 [8:17:07<11:04:09, 3.37it/s] 64%|██████▍ | 237279/371472 [8:17:07<10:56:04, 3.41it/s] 64%|██████▍ | 237280/371472 [8:17:08<11:12:30, 3.33it/s] {'loss': 2.6569, 'learning_rate': 4.2529466267318837e-07, 'epoch': 10.22} + 64%|██████▍ | 237280/371472 [8:17:08<11:12:30, 3.33it/s] 64%|██████▍ | 237281/371472 [8:17:08<11:16:31, 3.31it/s] 64%|██████▍ | 237282/371472 [8:17:08<11:10:42, 3.33it/s] 64%|██████▍ | 237283/371472 [8:17:09<10:55:57, 3.41it/s] 64%|██████▍ | 237284/371472 [8:17:09<10:43:26, 3.48it/s] 64%|██████▍ | 237285/371472 [8:17:09<10:52:28, 3.43it/s] 64%|██████▍ | 237286/371472 [8:17:09<10:28:44, 3.56it/s] 64%|██████▍ | 237287/371472 [8:17:10<10:23:34, 3.59it/s] 64%|██████▍ | 237288/371472 [8:17:10<11:35:36, 3.22it/s] 64%|██████▍ | 237289/371472 [8:17:10<11:48:46, 3.16it/s] 64%|██████▍ | 237290/371472 [8:17:11<11:32:29, 3.23it/s] 64%|██████▍ | 237291/371472 [8:17:11<11:24:34, 3.27it/s] 64%|██████▍ | 237292/371472 [8:17:11<11:25:51, 3.26it/s] 64%|██████▍ | 237293/371472 [8:17:12<11:25:19, 3.26it/s] 64%|██████▍ | 237294/371472 [8:17:12<11:45:44, 3.17it/s] 64%|██████▍ | 237295/371472 [8:17:12<11:52:55, 3.14it/s] 64%|██████▍ | 237296/371472 [8:17:13<11:21:25, 3.28it/s] 64%|██████▍ | 237297/371472 [8:17:13<11:26:35, 3.26it/s] 64%|██████▍ | 237298/371472 [8:17:13<11:07:02, 3.35it/s] 64%|██████▍ | 237299/371472 [8:17:13<10:49:36, 3.44it/s] 64%|██████▍ | 237300/371472 [8:17:14<11:30:02, 3.24it/s] {'loss': 2.6837, 'learning_rate': 4.2524618069770944e-07, 'epoch': 10.22} + 64%|██████▍ | 237300/371472 [8:17:14<11:30:02, 3.24it/s] 64%|██████▍ | 237301/371472 [8:17:14<11:28:04, 3.25it/s] 64%|██████▍ | 237302/371472 [8:17:14<11:22:08, 3.28it/s] 64%|██████▍ | 237303/371472 [8:17:15<11:18:33, 3.30it/s] 64%|██████▍ | 237304/371472 [8:17:15<11:12:40, 3.32it/s] 64%|██████▍ | 237305/371472 [8:17:15<11:47:26, 3.16it/s] 64%|██████▍ | 237306/371472 [8:17:16<11:32:16, 3.23it/s] 64%|██████▍ | 237307/371472 [8:17:16<11:07:33, 3.35it/s] 64%|██████▍ | 237308/371472 [8:17:16<10:45:02, 3.47it/s] 64%|██████▍ | 237309/371472 [8:17:16<10:53:01, 3.42it/s] 64%|██████▍ | 237310/371472 [8:17:17<10:41:42, 3.48it/s] 64%|██████▍ | 237311/371472 [8:17:17<10:44:25, 3.47it/s] 64%|██████▍ | 237312/371472 [8:17:17<10:37:39, 3.51it/s] 64%|██████▍ | 237313/371472 [8:17:18<10:50:15, 3.44it/s] 64%|██████▍ | 237314/371472 [8:17:18<10:29:39, 3.55it/s] 64%|██████▍ | 237315/371472 [8:17:18<10:20:41, 3.60it/s] 64%|██████▍ | 237316/371472 [8:17:18<10:09:31, 3.67it/s] 64%|██████▍ | 237317/371472 [8:17:19<10:10:23, 3.66it/s] 64%|██████▍ | 237318/371472 [8:17:19<10:24:48, 3.58it/s] 64%|██████▍ | 237319/371472 [8:17:19<10:14:05, 3.64it/s] 64%|██████▍ | 237320/371472 [8:17:20<11:01:38, 3.38it/s] {'loss': 2.7375, 'learning_rate': 4.2519769872223057e-07, 'epoch': 10.22} + 64%|██████▍ | 237320/371472 [8:17:20<11:01:38, 3.38it/s] 64%|██████▍ | 237321/371472 [8:17:20<10:53:19, 3.42it/s] 64%|██████▍ | 237322/371472 [8:17:20<10:46:41, 3.46it/s] 64%|██████▍ | 237323/371472 [8:17:20<10:38:14, 3.50it/s] 64%|██████▍ | 237324/371472 [8:17:21<10:35:19, 3.52it/s] 64%|██████▍ | 237325/371472 [8:17:21<10:54:42, 3.41it/s] 64%|██████▍ | 237326/371472 [8:17:21<10:40:01, 3.49it/s] 64%|██████▍ | 237327/371472 [8:17:22<10:54:55, 3.41it/s] 64%|██████▍ | 237328/371472 [8:17:22<11:28:13, 3.25it/s] 64%|██████▍ | 237329/371472 [8:17:22<10:58:17, 3.40it/s] 64%|██████▍ | 237330/371472 [8:17:22<10:36:17, 3.51it/s] 64%|██████▍ | 237331/371472 [8:17:23<11:09:12, 3.34it/s] 64%|██████▍ | 237332/371472 [8:17:23<11:31:32, 3.23it/s] 64%|██████▍ | 237333/371472 [8:17:23<11:39:12, 3.20it/s] 64%|██████▍ | 237334/371472 [8:17:24<11:40:19, 3.19it/s] 64%|██████▍ | 237335/371472 [8:17:24<11:05:37, 3.36it/s] 64%|██████▍ | 237336/371472 [8:17:24<11:36:11, 3.21it/s] 64%|██████▍ | 237337/371472 [8:17:25<11:23:12, 3.27it/s] 64%|██████▍ | 237338/371472 [8:17:25<11:12:07, 3.33it/s] 64%|██████▍ | 237339/371472 [8:17:25<11:53:52, 3.13it/s] 64%|██████▍ | 237340/371472 [8:17:26<11:24:17, 3.27it/s] {'loss': 2.653, 'learning_rate': 4.2514921674675164e-07, 'epoch': 10.22} + 64%|██████▍ | 237340/371472 [8:17:26<11:24:17, 3.27it/s] 64%|██████▍ | 237341/371472 [8:17:26<11:09:30, 3.34it/s] 64%|██████▍ | 237342/371472 [8:17:26<11:05:04, 3.36it/s] 64%|██████▍ | 237343/371472 [8:17:26<11:24:21, 3.27it/s] 64%|██████▍ | 237344/371472 [8:17:27<11:15:02, 3.31it/s] 64%|██████▍ | 237345/371472 [8:17:27<11:41:18, 3.19it/s] 64%|██████▍ | 237346/371472 [8:17:27<11:16:28, 3.30it/s] 64%|██████▍ | 237347/371472 [8:17:28<11:08:46, 3.34it/s] 64%|██████▍ | 237348/371472 [8:17:28<11:17:18, 3.30it/s] 64%|██████▍ | 237349/371472 [8:17:28<10:59:16, 3.39it/s] 64%|██████▍ | 237350/371472 [8:17:29<10:57:26, 3.40it/s] 64%|██████▍ | 237351/371472 [8:17:29<10:45:08, 3.46it/s] 64%|██████▍ | 237352/371472 [8:17:29<10:50:16, 3.44it/s] 64%|██████▍ | 237353/371472 [8:17:29<10:30:34, 3.54it/s] 64%|██████▍ | 237354/371472 [8:17:30<10:39:29, 3.50it/s] 64%|██████▍ | 237355/371472 [8:17:30<10:27:12, 3.56it/s] 64%|██████▍ | 237356/371472 [8:17:30<10:44:24, 3.47it/s] 64%|██████▍ | 237357/371472 [8:17:31<10:35:29, 3.52it/s] 64%|██████▍ | 237358/371472 [8:17:31<10:59:00, 3.39it/s] 64%|██████▍ | 237359/371472 [8:17:31<10:45:38, 3.46it/s] 64%|██████▍ | 237360/371472 [8:17:31<10:48:52, 3.44it/s] {'loss': 2.5177, 'learning_rate': 4.251007347712728e-07, 'epoch': 10.22} + 64%|██████▍ | 237360/371472 [8:17:31<10:48:52, 3.44it/s] 64%|██████▍ | 237361/371472 [8:17:32<11:00:37, 3.38it/s] 64%|██████▍ | 237362/371472 [8:17:32<10:43:39, 3.47it/s] 64%|██████▍ | 237363/371472 [8:17:32<10:41:24, 3.48it/s] 64%|██████▍ | 237364/371472 [8:17:33<10:39:56, 3.49it/s] 64%|██████▍ | 237365/371472 [8:17:33<11:42:08, 3.18it/s] 64%|██████▍ | 237366/371472 [8:17:33<12:03:12, 3.09it/s] 64%|██████▍ | 237367/371472 [8:17:34<12:46:42, 2.92it/s] 64%|██████▍ | 237368/371472 [8:17:34<12:31:33, 2.97it/s] 64%|██████▍ | 237369/371472 [8:17:34<12:01:51, 3.10it/s] 64%|██████▍ | 237370/371472 [8:17:35<11:42:17, 3.18it/s] 64%|██████▍ | 237371/371472 [8:17:35<11:41:13, 3.19it/s] 64%|██████▍ | 237372/371472 [8:17:35<11:44:07, 3.17it/s] 64%|██████▍ | 237373/371472 [8:17:36<11:44:12, 3.17it/s] 64%|██████▍ | 237374/371472 [8:17:36<11:49:45, 3.15it/s] 64%|██████▍ | 237375/371472 [8:17:36<12:30:09, 2.98it/s] 64%|██████▍ | 237376/371472 [8:17:37<12:00:00, 3.10it/s] 64%|██████▍ | 237377/371472 [8:17:37<11:34:19, 3.22it/s] 64%|██████▍ | 237378/371472 [8:17:37<11:07:53, 3.35it/s] 64%|██████▍ | 237379/371472 [8:17:37<11:10:11, 3.33it/s] 64%|██████▍ | 237380/371472 [8:17:38<11:11:48, 3.33it/s] {'loss': 2.7613, 'learning_rate': 4.250522527957939e-07, 'epoch': 10.22} + 64%|██████▍ | 237380/371472 [8:17:38<11:11:48, 3.33it/s] 64%|████���█▍ | 237381/371472 [8:17:38<11:08:35, 3.34it/s] 64%|██████▍ | 237382/371472 [8:17:38<11:17:49, 3.30it/s] 64%|██████▍ | 237383/371472 [8:17:39<11:14:08, 3.32it/s] 64%|██████▍ | 237384/371472 [8:17:39<10:57:54, 3.40it/s] 64%|██████▍ | 237385/371472 [8:17:39<11:15:57, 3.31it/s] 64%|██████▍ | 237386/371472 [8:17:39<11:03:08, 3.37it/s] 64%|██████▍ | 237387/371472 [8:17:40<10:53:22, 3.42it/s] 64%|██████▍ | 237388/371472 [8:17:40<11:04:22, 3.36it/s] 64%|██████▍ | 237389/371472 [8:17:40<10:47:52, 3.45it/s] 64%|██████▍ | 237390/371472 [8:17:41<11:20:52, 3.28it/s] 64%|██████▍ | 237391/371472 [8:17:41<11:16:35, 3.30it/s] 64%|██████▍ | 237392/371472 [8:17:41<10:57:35, 3.40it/s] 64%|██████▍ | 237393/371472 [8:17:42<10:57:51, 3.40it/s] 64%|██████▍ | 237394/371472 [8:17:42<10:50:35, 3.43it/s] 64%|██████▍ | 237395/371472 [8:17:42<10:27:32, 3.56it/s] 64%|██████▍ | 237396/371472 [8:17:42<10:44:48, 3.47it/s] 64%|██████▍ | 237397/371472 [8:17:43<11:00:26, 3.38it/s] 64%|██████▍ | 237398/371472 [8:17:43<11:12:55, 3.32it/s] 64%|██████▍ | 237399/371472 [8:17:43<10:51:21, 3.43it/s] 64%|██████▍ | 237400/371472 [8:17:44<10:51:08, 3.43it/s] {'loss': 2.7773, 'learning_rate': 4.25003770820315e-07, 'epoch': 10.23} + 64%|██████▍ | 237400/371472 [8:17:44<10:51:08, 3.43it/s] 64%|██████▍ | 237401/371472 [8:17:44<10:53:31, 3.42it/s] 64%|██████▍ | 237402/371472 [8:17:44<10:54:14, 3.42it/s] 64%|██████▍ | 237403/371472 [8:17:44<11:12:26, 3.32it/s] 64%|██████▍ | 237404/371472 [8:17:45<10:51:22, 3.43it/s] 64%|██████▍ | 237405/371472 [8:17:45<11:15:09, 3.31it/s] 64%|██████▍ | 237406/371472 [8:17:45<10:58:02, 3.40it/s] 64%|██████▍ | 237407/371472 [8:17:46<11:38:53, 3.20it/s] 64%|██████▍ | 237408/371472 [8:17:46<11:46:26, 3.16it/s] 64%|██████▍ | 237409/371472 [8:17:46<11:25:04, 3.26it/s] 64%|██████▍ | 237410/371472 [8:17:47<14:52:52, 2.50it/s] 64%|██████▍ | 237411/371472 [8:17:47<13:55:35, 2.67it/s] 64%|██████▍ | 237412/371472 [8:17:48<13:46:30, 2.70it/s] 64%|██████▍ | 237413/371472 [8:17:48<12:47:29, 2.91it/s] 64%|██████▍ | 237414/371472 [8:17:48<12:07:43, 3.07it/s] 64%|██████▍ | 237415/371472 [8:17:48<11:34:36, 3.22it/s] 64%|██████▍ | 237416/371472 [8:17:49<10:56:45, 3.40it/s] 64%|██████▍ | 237417/371472 [8:17:49<10:59:11, 3.39it/s] 64%|██████▍ | 237418/371472 [8:17:49<11:04:46, 3.36it/s] 64%|██████▍ | 237419/371472 [8:17:50<11:02:49, 3.37it/s] 64%|██████▍ | 237420/371472 [8:17:50<10:55:06, 3.41it/s] {'loss': 2.6988, 'learning_rate': 4.249552888448361e-07, 'epoch': 10.23} + 64%|██████▍ | 237420/371472 [8:17:50<10:55:06, 3.41it/s] 64%|██████▍ | 237421/371472 [8:17:50<10:55:47, 3.41it/s] 64%|██████▍ | 237422/371472 [8:17:50<10:46:45, 3.45it/s] 64%|██████▍ | 237423/371472 [8:17:51<13:52:36, 2.68it/s] 64%|██████▍ | 237424/371472 [8:17:51<12:42:46, 2.93it/s] 64%|██████▍ | 237425/371472 [8:17:52<12:56:06, 2.88it/s] 64%|██████▍ | 237426/371472 [8:17:52<12:14:44, 3.04it/s] 64%|██████▍ | 237427/371472 [8:17:52<12:59:21, 2.87it/s] 64%|██████▍ | 237428/371472 [8:17:53<12:25:28, 3.00it/s] 64%|██████▍ | 237429/371472 [8:17:53<11:49:26, 3.15it/s] 64%|██████▍ | 237430/371472 [8:17:53<12:09:02, 3.06it/s] 64%|██████▍ | 237431/371472 [8:17:54<11:42:16, 3.18it/s] 64%|██████▍ | 237432/371472 [8:17:54<11:40:03, 3.19it/s] 64%|██████▍ | 237433/371472 [8:17:54<11:30:41, 3.23it/s] 64%|██████▍ | 237434/371472 [8:17:54<11:19:05, 3.29it/s] 64%|██████▍ | 237435/371472 [8:17:55<10:56:20, 3.40it/s] 64%|██████▍ | 237436/371472 [8:17:55<11:18:00, 3.29it/s] 64%|██████▍ | 237437/371472 [8:17:55<11:06:39, 3.35it/s] 64%|██████▍ | 237438/371472 [8:17:56<11:12:43, 3.32it/s] 64%|██████▍ | 237439/371472 [8:17:56<11:25:30, 3.26it/s] 64%|██████▍ | 237440/371472 [8:17:56<11:06:26, 3.35it/s] {'loss': 2.6198, 'learning_rate': 4.2490680686935726e-07, 'epoch': 10.23} + 64%|██████▍ | 237440/371472 [8:17:56<11:06:26, 3.35it/s] 64%|██████▍ | 237441/371472 [8:17:57<11:17:38, 3.30it/s] 64%|██████▍ | 237442/371472 [8:17:57<11:19:34, 3.29it/s] 64%|██████▍ | 237443/371472 [8:17:57<11:16:41, 3.30it/s] 64%|██████▍ | 237444/371472 [8:17:58<12:37:46, 2.95it/s] 64%|██████▍ | 237445/371472 [8:17:58<12:15:41, 3.04it/s] 64%|██████▍ | 237446/371472 [8:17:58<11:45:38, 3.17it/s] 64%|██████▍ | 237447/371472 [8:17:59<12:19:42, 3.02it/s] 64%|██████▍ | 237448/371472 [8:17:59<11:51:50, 3.14it/s] 64%|██████▍ | 237449/371472 [8:17:59<11:46:53, 3.16it/s] 64%|██████▍ | 237450/371472 [8:17:59<12:15:42, 3.04it/s] 64%|██████▍ | 237451/371472 [8:18:00<12:02:15, 3.09it/s] 64%|██████▍ | 237452/371472 [8:18:00<11:36:11, 3.21it/s] 64%|██████▍ | 237453/371472 [8:18:00<12:05:56, 3.08it/s] 64%|██████▍ | 237454/371472 [8:18:01<11:33:56, 3.22it/s] 64%|██████▍ | 237455/371472 [8:18:01<11:11:53, 3.32it/s] 64%|██████▍ | 237456/371472 [8:18:01<11:04:42, 3.36it/s] 64%|██████▍ | 237457/371472 [8:18:02<10:56:07, 3.40it/s] 64%|██████▍ | 237458/371472 [8:18:02<10:46:32, 3.45it/s] 64%|██████▍ | 237459/371472 [8:18:02<10:35:03, 3.52it/s] 64%|██████▍ | 237460/371472 [8:18:02<10:19:14, 3.61it/s] {'loss': 2.5947, 'learning_rate': 4.248583248938783e-07, 'epoch': 10.23} + 64%|██████▍ | 237460/371472 [8:18:02<10:19:14, 3.61it/s] 64%|██████▍ | 237461/371472 [8:18:03<10:23:28, 3.58it/s] 64%|██████▍ | 237462/371472 [8:18:03<10:39:35, 3.49it/s] 64%|██████▍ | 237463/371472 [8:18:03<11:18:02, 3.29it/s] 64%|██████▍ | 237464/371472 [8:18:04<11:02:20, 3.37it/s] 64%|██████▍ | 237465/371472 [8:18:04<10:51:15, 3.43it/s] 64%|██████▍ | 237466/371472 [8:18:04<10:44:03, 3.47it/s] 64%|██████▍ | 237467/371472 [8:18:04<10:49:37, 3.44it/s] 64%|██████▍ | 237468/371472 [8:18:05<10:47:06, 3.45it/s] 64%|██████▍ | 237469/371472 [8:18:05<11:24:47, 3.26it/s] 64%|██████▍ | 237470/371472 [8:18:05<11:12:40, 3.32it/s] 64%|██████▍ | 237471/371472 [8:18:06<10:39:03, 3.49it/s] 64%|██████▍ | 237472/371472 [8:18:06<10:57:57, 3.39it/s] 64%|██████▍ | 237473/371472 [8:18:06<10:44:37, 3.46it/s] 64%|██████▍ | 237474/371472 [8:18:06<10:33:53, 3.52it/s] 64%|██████▍ | 237475/371472 [8:18:07<10:32:19, 3.53it/s] 64%|██████▍ | 237476/371472 [8:18:07<10:37:12, 3.50it/s] 64%|██████▍ | 237477/371472 [8:18:07<10:54:06, 3.41it/s] 64%|██████▍ | 237478/371472 [8:18:08<10:51:02, 3.43it/s] 64%|██████▍ | 237479/371472 [8:18:08<10:56:16, 3.40it/s] 64%|██████▍ | 237480/371472 [8:18:08<11:31:10, 3.23it/s] {'loss': 2.796, 'learning_rate': 4.2480984291839946e-07, 'epoch': 10.23} + 64%|██████▍ | 237480/371472 [8:18:08<11:31:10, 3.23it/s] 64%|██████▍ | 237481/371472 [8:18:09<11:33:04, 3.22it/s] 64%|██████▍ | 237482/371472 [8:18:09<11:13:35, 3.32it/s] 64%|██████▍ | 237483/371472 [8:18:09<10:41:17, 3.48it/s] 64%|██████▍ | 237484/371472 [8:18:10<11:28:54, 3.24it/s] 64%|██████▍ | 237485/371472 [8:18:10<10:57:20, 3.40it/s] 64%|██████▍ | 237486/371472 [8:18:10<12:28:24, 2.98it/s] 64%|██████▍ | 237487/371472 [8:18:10<11:58:19, 3.11it/s] 64%|██████▍ | 237488/371472 [8:18:11<11:39:37, 3.19it/s] 64%|██████▍ | 237489/371472 [8:18:11<11:23:50, 3.27it/s] 64%|██████▍ | 237490/371472 [8:18:11<11:42:23, 3.18it/s] 64%|██████▍ | 237491/371472 [8:18:12<11:22:33, 3.27it/s] 64%|██████▍ | 237492/371472 [8:18:12<11:19:12, 3.29it/s] 64%|██████▍ | 237493/371472 [8:18:12<11:08:35, 3.34it/s] 64%|██████▍ | 237494/371472 [8:18:13<10:48:28, 3.44it/s] 64%|██████▍ | 237495/371472 [8:18:13<11:16:41, 3.30it/s] 64%|██████▍ | 237496/371472 [8:18:13<11:28:55, 3.24it/s] 64%|██████▍ | 237497/371472 [8:18:13<11:19:55, 3.28it/s] 64%|██████▍ | 237498/371472 [8:18:14<11:01:19, 3.38it/s] 64%|██████▍ | 237499/371472 [8:18:14<12:44:00, 2.92it/s] 64%|██████▍ | 237500/371472 [8:18:15<12:11:02, 3.05it/s] {'loss': 2.6244, 'learning_rate': 4.2476136094292053e-07, 'epoch': 10.23} + 64%|██████▍ | 237500/371472 [8:18:15<12:11:02, 3.05it/s] 64%|██████▍ | 237501/371472 [8:18:15<11:45:47, 3.16it/s] 64%|██████▍ | 237502/371472 [8:18:15<11:33:07, 3.22it/s] 64%|██████▍ | 237503/371472 [8:18:15<11:24:35, 3.26it/s] 64%|██████▍ | 237504/371472 [8:18:16<11:23:01, 3.27it/s] 64%|██████▍ | 237505/371472 [8:18:16<11:16:16, 3.30it/s] 64%|██████▍ | 237506/371472 [8:18:16<11:14:14, 3.31it/s] 64%|██████▍ | 237507/371472 [8:18:17<11:15:38, 3.30it/s] 64%|██████▍ | 237508/371472 [8:18:17<11:08:08, 3.34it/s] 64%|██████▍ | 237509/371472 [8:18:17<11:51:36, 3.14it/s] 64%|██████▍ | 237510/371472 [8:18:18<12:26:48, 2.99it/s] 64%|██████▍ | 237511/371472 [8:18:18<13:37:16, 2.73it/s] 64%|██████▍ | 237512/371472 [8:18:18<13:08:31, 2.83it/s] 64%|██████▍ | 237513/371472 [8:18:19<12:41:19, 2.93it/s] 64%|██████▍ | 237514/371472 [8:18:19<11:57:53, 3.11it/s] 64%|██████▍ | 237515/371472 [8:18:19<11:47:00, 3.16it/s] 64%|██████▍ | 237516/371472 [8:18:20<11:15:19, 3.31it/s] 64%|██████▍ | 237517/371472 [8:18:20<11:08:22, 3.34it/s] 64%|██████▍ | 237518/371472 [8:18:20<12:17:19, 3.03it/s] 64%|██████▍ | 237519/371472 [8:18:21<12:02:21, 3.09it/s] 64%|██████▍ | 237520/371472 [8:18:21<11:49:41, 3.15it/s] {'loss': 2.665, 'learning_rate': 4.247128789674417e-07, 'epoch': 10.23} + 64%|██████▍ | 237520/371472 [8:18:21<11:49:41, 3.15it/s] 64%|██████▍ | 237521/371472 [8:18:21<11:17:16, 3.30it/s] 64%|██████▍ | 237522/371472 [8:18:21<11:09:28, 3.33it/s] 64%|██████▍ | 237523/371472 [8:18:22<11:08:14, 3.34it/s] 64%|██████▍ | 237524/371472 [8:18:22<11:20:51, 3.28it/s] 64%|██████▍ | 237525/371472 [8:18:22<10:53:01, 3.42it/s] 64%|██████▍ | 237526/371472 [8:18:23<12:10:02, 3.06it/s] 64%|██████▍ | 237527/371472 [8:18:23<11:51:06, 3.14it/s] 64%|██████▍ | 237528/371472 [8:18:23<11:51:33, 3.14it/s] 64%|██████▍ | 237529/371472 [8:18:24<11:22:08, 3.27it/s] 64%|██████▍ | 237530/371472 [8:18:24<11:09:32, 3.33it/s] 64%|██████▍ | 237531/371472 [8:18:24<10:50:44, 3.43it/s] 64%|██████▍ | 237532/371472 [8:18:25<11:31:44, 3.23it/s] 64%|██████▍ | 237533/371472 [8:18:25<12:13:27, 3.04it/s] 64%|██████▍ | 237534/371472 [8:18:25<11:32:15, 3.22it/s] 64%|██████▍ | 237535/371472 [8:18:25<11:21:30, 3.28it/s] 64%|██████▍ | 237536/371472 [8:18:26<11:28:45, 3.24it/s] 64%|██████▍ | 237537/371472 [8:18:26<11:28:36, 3.24it/s] 64%|██████▍ | 237538/371472 [8:18:26<11:51:18, 3.14it/s] 64%|██████▍ | 237539/371472 [8:18:27<11:19:33, 3.28it/s] 64%|██████▍ | 237540/371472 [8:18:27<11:10:07, 3.33it/s] {'loss': 2.648, 'learning_rate': 4.246643969919627e-07, 'epoch': 10.23} + 64%|██████▍ | 237540/371472 [8:18:27<11:10:07, 3.33it/s] 64%|██████▍ | 237541/371472 [8:18:27<11:29:45, 3.24it/s] 64%|██████▍ | 237542/371472 [8:18:28<11:41:07, 3.18it/s] 64%|██████▍ | 237543/371472 [8:18:28<11:18:56, 3.29it/s] 64%|██████▍ | 237544/371472 [8:18:28<11:28:10, 3.24it/s] 64%|██████▍ | 237545/371472 [8:18:29<11:13:40, 3.31it/s] 64%|██████▍ | 237546/371472 [8:18:29<11:05:01, 3.36it/s] 64%|██████▍ | 237547/371472 [8:18:29<10:37:37, 3.50it/s] 64%|██████▍ | 237548/371472 [8:18:29<10:46:59, 3.45it/s] 64%|██████▍ | 237549/371472 [8:18:30<10:30:23, 3.54it/s] 64%|██████▍ | 237550/371472 [8:18:30<10:12:35, 3.64it/s] 64%|██████▍ | 237551/371472 [8:18:30<10:22:14, 3.59it/s] 64%|██████▍ | 237552/371472 [8:18:30<10:53:21, 3.42it/s] 64%|██████▍ | 237553/371472 [8:18:31<10:51:38, 3.43it/s] 64%|██████▍ | 237554/371472 [8:18:31<12:08:38, 3.06it/s] 64%|██████▍ | 237555/371472 [8:18:31<11:20:52, 3.28it/s] 64%|██████▍ | 237556/371472 [8:18:32<11:28:03, 3.24it/s] 64%|██████▍ | 237557/371472 [8:18:32<11:17:58, 3.29it/s] 64%|██████▍ | 237558/371472 [8:18:32<10:36:21, 3.51it/s] 64%|██████▍ | 237559/371472 [8:18:33<10:34:39, 3.52it/s] 64%|██████▍ | 237560/371472 [8:18:33<10:41:49, 3.48it/s] {'loss': 2.8089, 'learning_rate': 4.246159150164839e-07, 'epoch': 10.23} + 64%|██████▍ | 237560/371472 [8:18:33<10:41:49, 3.48it/s] 64%|██████▍ | 237561/371472 [8:18:33<10:50:54, 3.43it/s] 64%|██████▍ | 237562/371472 [8:18:34<11:13:43, 3.31it/s] 64%|██████▍ | 237563/371472 [8:18:34<11:18:05, 3.29it/s] 64%|██████▍ | 237564/371472 [8:18:34<11:19:26, 3.28it/s] 64%|██████▍ | 237565/371472 [8:18:34<10:56:40, 3.40it/s] 64%|██████▍ | 237566/371472 [8:18:35<10:41:37, 3.48it/s] 64%|██████▍ | 237567/371472 [8:18:35<10:31:15, 3.54it/s] 64%|██████▍ | 237568/371472 [8:18:35<11:13:38, 3.31it/s] 64%|██████▍ | 237569/371472 [8:18:36<11:03:30, 3.36it/s] 64%|██████▍ | 237570/371472 [8:18:36<10:50:49, 3.43it/s] 64%|██████▍ | 237571/371472 [8:18:36<10:31:21, 3.53it/s] 64%|██████▍ | 237572/371472 [8:18:36<10:21:41, 3.59it/s] 64%|██████▍ | 237573/371472 [8:18:37<10:12:12, 3.65it/s] 64%|██████▍ | 237574/371472 [8:18:37<10:46:11, 3.45it/s] 64%|██████▍ | 237575/371472 [8:18:37<10:27:52, 3.55it/s] 64%|██████▍ | 237576/371472 [8:18:38<10:52:16, 3.42it/s] 64%|██████▍ | 237577/371472 [8:18:38<10:50:10, 3.43it/s] 64%|██████▍ | 237578/371472 [8:18:38<11:52:27, 3.13it/s] 64%|██████▍ | 237579/371472 [8:18:39<11:49:39, 3.14it/s] 64%|██████▍ | 237580/371472 [8:18:39<11:50:56, 3.14it/s] {'loss': 2.6438, 'learning_rate': 4.245674330410049e-07, 'epoch': 10.23} + 64%|██████▍ | 237580/371472 [8:18:39<11:50:56, 3.14it/s] 64%|██████▍ | 237581/371472 [8:18:39<11:39:13, 3.19it/s] 64%|██████▍ | 237582/371472 [8:18:39<11:00:31, 3.38it/s] 64%|██████▍ | 237583/371472 [8:18:40<11:04:48, 3.36it/s] 64%|██████▍ | 237584/371472 [8:18:40<10:35:06, 3.51it/s] 64%|██████▍ | 237585/371472 [8:18:40<12:45:07, 2.92it/s] 64%|██████▍ | 237586/371472 [8:18:41<12:05:12, 3.08it/s] 64%|██████▍ | 237587/371472 [8:18:41<11:32:59, 3.22it/s] 64%|██████▍ | 237588/371472 [8:18:41<11:19:07, 3.29it/s] 64%|██████▍ | 237589/371472 [8:18:42<10:53:27, 3.41it/s] 64%|██████▍ | 237590/371472 [8:18:42<10:49:35, 3.44it/s] 64%|██████▍ | 237591/371472 [8:18:42<11:16:18, 3.30it/s] 64%|██████▍ | 237592/371472 [8:18:42<11:01:19, 3.37it/s] 64%|██████▍ | 237593/371472 [8:18:43<11:35:28, 3.21it/s] 64%|██████▍ | 237594/371472 [8:18:43<11:21:11, 3.28it/s] 64%|██████▍ | 237595/371472 [8:18:43<11:41:45, 3.18it/s] 64%|██████▍ | 237596/371472 [8:18:44<13:02:28, 2.85it/s] 64%|██████▍ | 237597/371472 [8:18:44<12:18:44, 3.02it/s] 64%|██████▍ | 237598/371472 [8:18:44<12:03:32, 3.08it/s] 64%|██████▍ | 237599/371472 [8:18:45<12:02:49, 3.09it/s] 64%|██████▍ | 237600/371472 [8:18:45<12:17:26, 3.03it/s] {'loss': 2.6655, 'learning_rate': 4.245189510655261e-07, 'epoch': 10.23} + 64%|██████▍ | 237600/371472 [8:18:45<12:17:26, 3.03it/s] 64%|██████▍ | 237601/371472 [8:18:45<11:35:10, 3.21it/s] 64%|██████▍ | 237602/371472 [8:18:46<11:40:21, 3.19it/s] 64%|██████▍ | 237603/371472 [8:18:46<11:29:07, 3.24it/s] 64%|██████▍ | 237604/371472 [8:18:46<12:25:04, 2.99it/s] 64%|██████▍ | 237605/371472 [8:18:47<12:33:31, 2.96it/s] 64%|██████▍ | 237606/371472 [8:18:47<12:04:53, 3.08it/s] 64%|██████▍ | 237607/371472 [8:18:47<11:41:51, 3.18it/s] 64%|██████▍ | 237608/371472 [8:18:48<11:20:11, 3.28it/s] 64%|██████▍ | 237609/371472 [8:18:48<11:02:20, 3.37it/s] 64%|██████▍ | 237610/371472 [8:18:48<10:47:29, 3.45it/s] 64%|██████▍ | 237611/371472 [8:18:48<10:34:51, 3.51it/s] 64%|██████▍ | 237612/371472 [8:18:49<11:00:39, 3.38it/s] 64%|██████▍ | 237613/371472 [8:18:49<11:06:59, 3.34it/s] 64%|██████▍ | 237614/371472 [8:18:49<10:40:19, 3.48it/s] 64%|██████▍ | 237615/371472 [8:18:50<10:27:45, 3.55it/s] 64%|██████▍ | 237616/371472 [8:18:50<10:45:00, 3.46it/s] 64%|██████▍ | 237617/371472 [8:18:50<10:36:05, 3.51it/s] 64%|██████▍ | 237618/371472 [8:18:50<10:36:00, 3.51it/s] 64%|██████▍ | 237619/371472 [8:18:51<11:04:12, 3.36it/s] 64%|██████▍ | 237620/371472 [8:18:51<10:49:00, 3.44it/s] {'loss': 2.7351, 'learning_rate': 4.2447046909004717e-07, 'epoch': 10.23} + 64%|██████▍ | 237620/371472 [8:18:51<10:49:00, 3.44it/s] 64%|██████▍ | 237621/371472 [8:18:51<10:44:40, 3.46it/s] 64%|██████▍ | 237622/371472 [8:18:52<10:40:31, 3.48it/s] 64%|██████▍ | 237623/371472 [8:18:52<10:23:38, 3.58it/s] 64%|██████▍ | 237624/371472 [8:18:52<11:04:57, 3.35it/s] 64%|██████▍ | 237625/371472 [8:18:53<10:59:09, 3.38it/s] 64%|██████▍ | 237626/371472 [8:18:53<10:58:45, 3.39it/s] 64%|██████▍ | 237627/371472 [8:18:53<11:03:24, 3.36it/s] 64%|██████▍ | 237628/371472 [8:18:53<10:55:11, 3.40it/s] 64%|██████▍ | 237629/371472 [8:18:54<11:03:29, 3.36it/s] 64%|██████▍ | 237630/371472 [8:18:54<11:07:36, 3.34it/s] 64%|██████▍ | 237631/371472 [8:18:54<11:06:19, 3.35it/s] 64%|██████▍ | 237632/371472 [8:18:55<11:28:40, 3.24it/s] 64%|██████▍ | 237633/371472 [8:18:55<11:07:49, 3.34it/s] 64%|██████▍ | 237634/371472 [8:18:55<10:51:00, 3.43it/s] 64%|██████▍ | 237635/371472 [8:18:55<10:31:59, 3.53it/s] 64%|██████▍ | 237636/371472 [8:18:56<10:23:57, 3.57it/s] 64%|██████▍ | 237637/371472 [8:18:56<10:40:17, 3.48it/s] 64%|██████▍ | 237638/371472 [8:18:56<10:25:27, 3.57it/s] 64%|██████▍ | 237639/371472 [8:18:57<10:58:11, 3.39it/s] 64%|██████▍ | 237640/371472 [8:18:57<11:09:23, 3.33it/s] {'loss': 2.685, 'learning_rate': 4.244219871145683e-07, 'epoch': 10.24} + 64%|██████▍ | 237640/371472 [8:18:57<11:09:23, 3.33it/s] 64%|██████▍ | 237641/371472 [8:18:57<10:55:19, 3.40it/s] 64%|██████▍ | 237642/371472 [8:18:58<11:14:52, 3.31it/s] 64%|██████▍ | 237643/371472 [8:18:58<10:57:23, 3.39it/s] 64%|██████▍ | 237644/371472 [8:18:58<10:41:27, 3.48it/s] 64%|██████▍ | 237645/371472 [8:18:58<10:38:39, 3.49it/s] 64%|██████▍ | 237646/371472 [8:18:59<10:46:12, 3.45it/s] 64%|██████▍ | 237647/371472 [8:18:59<11:05:02, 3.35it/s] 64%|██████▍ | 237648/371472 [8:18:59<10:54:40, 3.41it/s] 64%|██████▍ | 237649/371472 [8:19:00<10:42:09, 3.47it/s] 64%|██████▍ | 237650/371472 [8:19:00<10:29:25, 3.54it/s] 64%|██████▍ | 237651/371472 [8:19:00<11:24:41, 3.26it/s] 64%|██████▍ | 237652/371472 [8:19:00<11:13:43, 3.31it/s] 64%|██████▍ | 237653/371472 [8:19:01<10:52:33, 3.42it/s] 64%|██████▍ | 237654/371472 [8:19:01<11:19:05, 3.28it/s] 64%|██████▍ | 237655/371472 [8:19:01<11:23:18, 3.26it/s] 64%|██████▍ | 237656/371472 [8:19:02<11:17:53, 3.29it/s] 64%|██████▍ | 237657/371472 [8:19:02<11:11:03, 3.32it/s] 64%|██████▍ | 237658/371472 [8:19:02<11:02:12, 3.37it/s] 64%|██████▍ | 237659/371472 [8:19:03<10:45:04, 3.46it/s] 64%|██████▍ | 237660/371472 [8:19:03<10:55:36, 3.40it/s] {'loss': 2.7698, 'learning_rate': 4.2437350513908936e-07, 'epoch': 10.24} + 64%|██████▍ | 237660/371472 [8:19:03<10:55:36, 3.40it/s] 64%|██████▍ | 237661/371472 [8:19:03<10:46:27, 3.45it/s] 64%|██████▍ | 237662/371472 [8:19:03<11:24:29, 3.26it/s] 64%|██████▍ | 237663/371472 [8:19:04<11:06:57, 3.34it/s] 64%|██████▍ | 237664/371472 [8:19:04<10:59:49, 3.38it/s] 64%|██████▍ | 237665/371472 [8:19:04<11:32:35, 3.22it/s] 64%|██████▍ | 237666/371472 [8:19:05<11:31:04, 3.23it/s] 64%|██████▍ | 237667/371472 [8:19:05<10:57:26, 3.39it/s] 64%|██████▍ | 237668/371472 [8:19:05<10:32:14, 3.53it/s] 64%|██████▍ | 237669/371472 [8:19:06<10:38:47, 3.49it/s] 64%|██████▍ | 237670/371472 [8:19:06<10:56:51, 3.40it/s] 64%|██████▍ | 237671/371472 [8:19:06<11:09:17, 3.33it/s] 64%|██████▍ | 237672/371472 [8:19:06<11:28:06, 3.24it/s] 64%|██████▍ | 237673/371472 [8:19:07<12:30:10, 2.97it/s] 64%|██████▍ | 237674/371472 [8:19:07<11:54:05, 3.12it/s] 64%|██████▍ | 237675/371472 [8:19:07<11:35:37, 3.21it/s] 64%|██████▍ | 237676/371472 [8:19:08<11:53:23, 3.13it/s] 64%|██████▍ | 237677/371472 [8:19:08<11:37:56, 3.19it/s] 64%|██████▍ | 237678/371472 [8:19:08<11:39:41, 3.19it/s] 64%|██████▍ | 237679/371472 [8:19:09<11:19:51, 3.28it/s] 64%|██████▍ | 237680/371472 [8:19:09<11:03:39, 3.36it/s] {'loss': 2.6844, 'learning_rate': 4.2432502316361054e-07, 'epoch': 10.24} + 64%|██████▍ | 237680/371472 [8:19:09<11:03:39, 3.36it/s] 64%|██████▍ | 237681/371472 [8:19:09<10:56:40, 3.40it/s] 64%|██████▍ | 237682/371472 [8:19:10<11:04:56, 3.35it/s] 64%|██████▍ | 237683/371472 [8:19:10<10:42:09, 3.47it/s] 64%|██████▍ | 237684/371472 [8:19:10<10:45:08, 3.46it/s] 64%|██████▍ | 237685/371472 [8:19:10<10:48:31, 3.44it/s] 64%|██████▍ | 237686/371472 [8:19:11<10:42:04, 3.47it/s] 64%|██████▍ | 237687/371472 [8:19:11<10:56:51, 3.39it/s] 64%|██████▍ | 237688/371472 [8:19:11<10:50:21, 3.43it/s] 64%|██████▍ | 237689/371472 [8:19:12<11:16:18, 3.30it/s] 64%|██████▍ | 237690/371472 [8:19:12<11:04:27, 3.36it/s] 64%|██████▍ | 237691/371472 [8:19:12<10:44:14, 3.46it/s] 64%|██████▍ | 237692/371472 [8:19:12<10:34:46, 3.51it/s] 64%|██████▍ | 237693/371472 [8:19:13<10:27:04, 3.56it/s] 64%|██████▍ | 237694/371472 [8:19:13<10:24:21, 3.57it/s] 64%|██████▍ | 237695/371472 [8:19:13<10:33:15, 3.52it/s] 64%|██████▍ | 237696/371472 [8:19:14<10:28:10, 3.55it/s] 64%|██████▍ | 237697/371472 [8:19:14<10:53:15, 3.41it/s] 64%|██████▍ | 237698/371472 [8:19:14<11:09:01, 3.33it/s] 64%|██████▍ | 237699/371472 [8:19:14<10:51:53, 3.42it/s] 64%|██████▍ | 237700/371472 [8:19:15<10:28:58, 3.54it/s] {'loss': 2.7511, 'learning_rate': 4.242765411881316e-07, 'epoch': 10.24} + 64%|██████▍ | 237700/371472 [8:19:15<10:28:58, 3.54it/s] 64%|██████▍ | 237701/371472 [8:19:15<10:17:02, 3.61it/s] 64%|██████▍ | 237702/371472 [8:19:15<10:10:21, 3.65it/s] 64%|██████▍ | 237703/371472 [8:19:16<10:14:16, 3.63it/s] 64%|██████▍ | 237704/371472 [8:19:16<10:00:26, 3.71it/s] 64%|██████▍ | 237705/371472 [8:19:16<10:20:45, 3.59it/s] 64%|██████▍ | 237706/371472 [8:19:16<11:06:53, 3.34it/s] 64%|██████▍ | 237707/371472 [8:19:17<11:04:16, 3.36it/s] 64%|██████▍ | 237708/371472 [8:19:17<10:55:58, 3.40it/s] 64%|██████▍ | 237709/371472 [8:19:17<10:50:59, 3.42it/s] 64%|██████▍ | 237710/371472 [8:19:18<11:23:05, 3.26it/s] 64%|██████▍ | 237711/371472 [8:19:18<11:09:27, 3.33it/s] 64%|██████▍ | 237712/371472 [8:19:18<10:58:38, 3.38it/s] 64%|██████▍ | 237713/371472 [8:19:19<11:32:20, 3.22it/s] 64%|██████▍ | 237714/371472 [8:19:19<11:24:36, 3.26it/s] 64%|██████▍ | 237715/371472 [8:19:19<11:07:55, 3.34it/s] 64%|██████▍ | 237716/371472 [8:19:20<11:57:43, 3.11it/s] 64%|██████▍ | 237717/371472 [8:19:20<11:33:59, 3.21it/s] 64%|██████▍ | 237718/371472 [8:19:20<11:42:28, 3.17it/s] 64%|██████▍ | 237719/371472 [8:19:20<11:52:42, 3.13it/s] 64%|██████▍ | 237720/371472 [8:19:21<12:37:51, 2.94it/s] {'loss': 2.7576, 'learning_rate': 4.2422805921265274e-07, 'epoch': 10.24} + 64%|██████▍ | 237720/371472 [8:19:21<12:37:51, 2.94it/s] 64%|██████▍ | 237721/371472 [8:19:21<12:49:02, 2.90it/s] 64%|██████▍ | 237722/371472 [8:19:21<12:10:57, 3.05it/s] 64%|██████▍ | 237723/371472 [8:19:22<11:56:02, 3.11it/s] 64%|██████▍ | 237724/371472 [8:19:22<12:37:47, 2.94it/s] 64%|██████▍ | 237725/371472 [8:19:22<12:01:13, 3.09it/s] 64%|██████▍ | 237726/371472 [8:19:23<12:10:12, 3.05it/s] 64%|██████▍ | 237727/371472 [8:19:23<11:45:42, 3.16it/s] 64%|██████▍ | 237728/371472 [8:19:23<11:15:25, 3.30it/s] 64%|██████▍ | 237729/371472 [8:19:24<11:08:08, 3.34it/s] 64%|██████▍ | 237730/371472 [8:19:24<11:07:03, 3.34it/s] 64%|██████▍ | 237731/371472 [8:19:24<11:01:42, 3.37it/s] 64%|██████▍ | 237732/371472 [8:19:25<11:10:31, 3.32it/s] 64%|██████▍ | 237733/371472 [8:19:25<11:16:45, 3.29it/s] 64%|██████▍ | 237734/371472 [8:19:25<11:02:01, 3.37it/s] 64%|██████▍ | 237735/371472 [8:19:25<10:59:44, 3.38it/s] 64%|██████▍ | 237736/371472 [8:19:26<10:43:20, 3.46it/s] 64%|██████▍ | 237737/371472 [8:19:26<10:33:35, 3.52it/s] 64%|██████▍ | 237738/371472 [8:19:26<10:23:38, 3.57it/s] 64%|██████▍ | 237739/371472 [8:19:27<10:26:57, 3.56it/s] 64%|██████▍ | 237740/371472 [8:19:27<11:01:49, 3.37it/s] {'loss': 2.6258, 'learning_rate': 4.241795772371738e-07, 'epoch': 10.24} + 64%|██��███▍ | 237740/371472 [8:19:27<11:01:49, 3.37it/s] 64%|██████▍ | 237741/371472 [8:19:27<10:55:19, 3.40it/s] 64%|██████▍ | 237742/371472 [8:19:27<10:54:49, 3.40it/s] 64%|██████▍ | 237743/371472 [8:19:28<11:00:31, 3.37it/s] 64%|██████▍ | 237744/371472 [8:19:28<11:00:49, 3.37it/s] 64%|██████▍ | 237745/371472 [8:19:28<10:55:29, 3.40it/s] 64%|██████▍ | 237746/371472 [8:19:29<11:54:08, 3.12it/s] 64%|██████▍ | 237747/371472 [8:19:29<11:50:11, 3.14it/s] 64%|██████▍ | 237748/371472 [8:19:29<12:08:11, 3.06it/s] 64%|██████▍ | 237749/371472 [8:19:30<11:56:19, 3.11it/s] 64%|██████▍ | 237750/371472 [8:19:30<11:27:00, 3.24it/s] 64%|██████▍ | 237751/371472 [8:19:30<11:00:31, 3.37it/s] 64%|██████▍ | 237752/371472 [8:19:31<11:49:42, 3.14it/s] 64%|██████▍ | 237753/371472 [8:19:31<11:10:54, 3.32it/s] 64%|██████▍ | 237754/371472 [8:19:31<10:36:57, 3.50it/s] 64%|██████▍ | 237755/371472 [8:19:31<10:18:57, 3.60it/s] 64%|██████▍ | 237756/371472 [8:19:32<10:16:58, 3.61it/s] 64%|██████▍ | 237757/371472 [8:19:32<10:20:03, 3.59it/s] 64%|██████▍ | 237758/371472 [8:19:32<10:52:51, 3.41it/s] 64%|██████▍ | 237759/371472 [8:19:33<11:19:03, 3.28it/s] 64%|██████▍ | 237760/371472 [8:19:33<11:28:19, 3.24it/s] {'loss': 2.784, 'learning_rate': 4.2413109526169493e-07, 'epoch': 10.24} + 64%|██████▍ | 237760/371472 [8:19:33<11:28:19, 3.24it/s] 64%|██████▍ | 237761/371472 [8:19:33<11:16:36, 3.29it/s] 64%|██████▍ | 237762/371472 [8:19:34<11:14:15, 3.31it/s] 64%|██████▍ | 237763/371472 [8:19:34<11:10:00, 3.33it/s] 64%|██████▍ | 237764/371472 [8:19:34<11:06:16, 3.34it/s] 64%|██████▍ | 237765/371472 [8:19:34<11:02:48, 3.36it/s] 64%|██████▍ | 237766/371472 [8:19:35<10:40:28, 3.48it/s] 64%|██████▍ | 237767/371472 [8:19:35<10:29:12, 3.54it/s] 64%|██████▍ | 237768/371472 [8:19:35<10:37:40, 3.49it/s] 64%|██████▍ | 237769/371472 [8:19:36<10:58:00, 3.39it/s] 64%|██████▍ | 237770/371472 [8:19:36<10:51:46, 3.42it/s] 64%|██████▍ | 237771/371472 [8:19:36<10:35:24, 3.51it/s] 64%|██████▍ | 237772/371472 [8:19:36<10:30:05, 3.54it/s] 64%|██████▍ | 237773/371472 [8:19:37<10:30:28, 3.53it/s] 64%|██████▍ | 237774/371472 [8:19:37<10:17:36, 3.61it/s] 64%|██████▍ | 237775/371472 [8:19:37<10:23:44, 3.57it/s] 64%|██████▍ | 237776/371472 [8:19:38<12:16:53, 3.02it/s] 64%|██████▍ | 237777/371472 [8:19:38<11:50:23, 3.14it/s] 64%|██████▍ | 237778/371472 [8:19:38<11:52:50, 3.13it/s] 64%|██████▍ | 237779/371472 [8:19:39<11:51:14, 3.13it/s] 64%|██████▍ | 237780/371472 [8:19:39<11:26:07, 3.25it/s] {'loss': 2.6417, 'learning_rate': 4.24082613286216e-07, 'epoch': 10.24} + 64%|██████▍ | 237780/371472 [8:19:39<11:26:07, 3.25it/s] 64%|██████▍ | 237781/371472 [8:19:39<11:00:36, 3.37it/s] 64%|██████▍ | 237782/371472 [8:19:39<11:17:38, 3.29it/s] 64%|██████▍ | 237783/371472 [8:19:40<10:49:21, 3.43it/s] 64%|██████▍ | 237784/371472 [8:19:40<10:43:29, 3.46it/s] 64%|██████▍ | 237785/371472 [8:19:40<11:24:42, 3.25it/s] 64%|██████▍ | 237786/371472 [8:19:41<11:00:39, 3.37it/s] 64%|██████▍ | 237787/371472 [8:19:41<10:49:32, 3.43it/s] 64%|██████▍ | 237788/371472 [8:19:41<10:30:28, 3.53it/s] 64%|██████▍ | 237789/371472 [8:19:41<10:38:15, 3.49it/s] 64%|██████▍ | 237790/371472 [8:19:42<10:30:26, 3.53it/s] 64%|██████▍ | 237791/371472 [8:19:42<11:07:15, 3.34it/s] 64%|██████▍ | 237792/371472 [8:19:42<10:50:13, 3.43it/s] 64%|██████▍ | 237793/371472 [8:19:43<10:42:14, 3.47it/s] 64%|██████▍ | 237794/371472 [8:19:43<10:42:43, 3.47it/s] 64%|██████▍ | 237795/371472 [8:19:43<11:44:40, 3.16it/s] 64%|██████▍ | 237796/371472 [8:19:44<11:19:06, 3.28it/s] 64%|██████▍ | 237797/371472 [8:19:44<10:49:17, 3.43it/s] 64%|██████▍ | 237798/371472 [8:19:44<10:44:20, 3.46it/s] 64%|██████▍ | 237799/371472 [8:19:44<10:38:10, 3.49it/s] 64%|██████▍ | 237800/371472 [8:19:45<10:39:37, 3.48it/s] {'loss': 2.661, 'learning_rate': 4.240341313107371e-07, 'epoch': 10.24} + 64%|██████▍ | 237800/371472 [8:19:45<10:39:37, 3.48it/s] 64%|██████▍ | 237801/371472 [8:19:45<11:28:32, 3.24it/s] 64%|██████▍ | 237802/371472 [8:19:45<11:22:53, 3.26it/s] 64%|██████▍ | 237803/371472 [8:19:46<11:27:28, 3.24it/s] 64%|██████▍ | 237804/371472 [8:19:46<11:11:01, 3.32it/s] 64%|██████▍ | 237805/371472 [8:19:46<11:03:17, 3.36it/s] 64%|██████▍ | 237806/371472 [8:19:47<10:51:47, 3.42it/s] 64%|██████▍ | 237807/371472 [8:19:47<10:49:43, 3.43it/s] 64%|██████▍ | 237808/371472 [8:19:47<10:39:22, 3.48it/s] 64%|██████▍ | 237809/371472 [8:19:47<12:03:09, 3.08it/s] 64%|██████▍ | 237810/371472 [8:19:48<11:25:46, 3.25it/s] 64%|██████▍ | 237811/371472 [8:19:48<11:08:10, 3.33it/s] 64%|██████▍ | 237812/371472 [8:19:48<10:55:03, 3.40it/s] 64%|██████▍ | 237813/371472 [8:19:49<11:07:01, 3.34it/s] 64%|██████▍ | 237814/371472 [8:19:49<11:05:33, 3.35it/s] 64%|██████▍ | 237815/371472 [8:19:49<10:54:16, 3.40it/s] 64%|██████▍ | 237816/371472 [8:19:50<11:16:53, 3.29it/s] 64%|██████▍ | 237817/371472 [8:19:50<11:28:55, 3.23it/s] 64%|██████▍ | 237818/371472 [8:19:50<11:16:29, 3.29it/s] 64%|██████▍ | 237819/371472 [8:19:50<11:25:27, 3.25it/s] 64%|██████▍ | 237820/371472 [8:19:51<12:01:28, 3.09it/s] {'loss': 2.8966, 'learning_rate': 4.2398564933525825e-07, 'epoch': 10.24} + 64%|██████▍ | 237820/371472 [8:19:51<12:01:28, 3.09it/s] 64%|██████▍ | 237821/371472 [8:19:51<11:41:18, 3.18it/s] 64%|██████▍ | 237822/371472 [8:19:51<12:09:15, 3.05it/s] 64%|██████▍ | 237823/371472 [8:19:52<11:55:04, 3.12it/s] 64%|██████▍ | 237824/371472 [8:19:52<11:25:06, 3.25it/s] 64%|██████▍ | 237825/371472 [8:19:52<11:49:35, 3.14it/s] 64%|██████▍ | 237826/371472 [8:19:53<11:33:00, 3.21it/s] 64%|██████▍ | 237827/371472 [8:19:53<11:35:56, 3.20it/s] 64%|██████▍ | 237828/371472 [8:19:53<11:09:17, 3.33it/s] 64%|██████▍ | 237829/371472 [8:19:54<10:52:07, 3.42it/s] 64%|██████▍ | 237830/371472 [8:19:54<10:52:28, 3.41it/s] 64%|██████▍ | 237831/371472 [8:19:54<10:39:25, 3.48it/s] 64%|██████▍ | 237832/371472 [8:19:54<10:37:34, 3.49it/s] 64%|██████▍ | 237833/371472 [8:19:55<10:34:07, 3.51it/s] 64%|██████▍ | 237834/371472 [8:19:55<10:33:35, 3.52it/s] 64%|██████▍ | 237835/371472 [8:19:55<11:06:25, 3.34it/s] 64%|██████▍ | 237836/371472 [8:19:56<12:18:42, 3.02it/s] 64%|██████▍ | 237837/371472 [8:19:56<11:50:31, 3.13it/s] 64%|██████▍ | 237838/371472 [8:19:56<11:38:24, 3.19it/s] 64%|██████▍ | 237839/371472 [8:19:57<11:31:23, 3.22it/s] 64%|██████▍ | 237840/371472 [8:19:57<11:45:58, 3.15it/s] {'loss': 2.8227, 'learning_rate': 4.2393716735977927e-07, 'epoch': 10.24} + 64%|██████▍ | 237840/371472 [8:19:57<11:45:58, 3.15it/s] 64%|██████▍ | 237841/371472 [8:19:57<11:51:48, 3.13it/s] 64%|██████▍ | 237842/371472 [8:19:58<12:02:33, 3.08it/s] 64%|██████▍ | 237843/371472 [8:19:58<12:10:54, 3.05it/s] 64%|██████▍ | 237844/371472 [8:19:58<12:31:54, 2.96it/s] 64%|██████▍ | 237845/371472 [8:19:59<12:04:40, 3.07it/s] 64%|██████▍ | 237846/371472 [8:19:59<11:17:49, 3.29it/s] 64%|██████▍ | 237847/371472 [8:19:59<11:32:06, 3.22it/s] 64%|██████▍ | 237848/371472 [8:19:59<11:02:51, 3.36it/s] 64%|██████▍ | 237849/371472 [8:20:00<12:17:24, 3.02it/s] 64%|██████▍ | 237850/371472 [8:20:00<11:42:45, 3.17it/s] 64%|██████▍ | 237851/371472 [8:20:00<11:37:26, 3.19it/s] 64%|██████▍ | 237852/371472 [8:20:01<11:19:15, 3.28it/s] 64%|██████▍ | 237853/371472 [8:20:01<11:20:48, 3.27it/s] 64%|██████▍ | 237854/371472 [8:20:01<11:25:24, 3.25it/s] 64%|██████▍ | 237855/371472 [8:20:02<11:11:06, 3.32it/s] 64%|██████▍ | 237856/371472 [8:20:02<11:13:50, 3.30it/s] 64%|██████▍ | 237857/371472 [8:20:02<10:51:54, 3.42it/s] 64%|██████▍ | 237858/371472 [8:20:02<10:42:59, 3.46it/s] 64%|██████▍ | 237859/371472 [8:20:03<11:42:39, 3.17it/s] 64%|██████▍ | 237860/371472 [8:20:03<11:33:57, 3.21it/s] {'loss': 2.6246, 'learning_rate': 4.2388868538430045e-07, 'epoch': 10.25} + 64%|██████▍ | 237860/371472 [8:20:03<11:33:57, 3.21it/s] 64%|██████▍ | 237861/371472 [8:20:04<11:51:03, 3.13it/s] 64%|██████▍ | 237862/371472 [8:20:04<11:37:20, 3.19it/s] 64%|██████▍ | 237863/371472 [8:20:04<11:13:02, 3.31it/s] 64%|██████▍ | 237864/371472 [8:20:04<11:17:43, 3.29it/s] 64%|██████▍ | 237865/371472 [8:20:05<11:27:21, 3.24it/s] 64%|██████▍ | 237866/371472 [8:20:05<11:14:51, 3.30it/s] 64%|██████▍ | 237867/371472 [8:20:05<12:26:38, 2.98it/s] 64%|██████▍ | 237868/371472 [8:20:06<12:25:05, 2.99it/s] 64%|██████▍ | 237869/371472 [8:20:06<11:45:36, 3.16it/s] 64%|██████▍ | 237870/371472 [8:20:06<11:21:50, 3.27it/s] 64%|██████▍ | 237871/371472 [8:20:07<10:56:34, 3.39it/s] 64%|██████▍ | 237872/371472 [8:20:07<10:47:35, 3.44it/s] 64%|██████▍ | 237873/371472 [8:20:07<10:43:01, 3.46it/s] 64%|██████▍ | 237874/371472 [8:20:07<10:57:12, 3.39it/s] 64%|██████▍ | 237875/371472 [8:20:08<10:42:38, 3.46it/s] 64%|██████▍ | 237876/371472 [8:20:08<10:55:15, 3.40it/s] 64%|██████▍ | 237877/371472 [8:20:08<11:38:26, 3.19it/s] 64%|██████▍ | 237878/371472 [8:20:09<11:15:30, 3.30it/s] 64%|██████▍ | 237879/371472 [8:20:09<11:01:05, 3.37it/s] 64%|██████▍ | 237880/371472 [8:20:09<11:32:56, 3.21it/s] {'loss': 2.8172, 'learning_rate': 4.238402034088215e-07, 'epoch': 10.25} + 64%|██████▍ | 237880/371472 [8:20:09<11:32:56, 3.21it/s] 64%|██████▍ | 237881/371472 [8:20:10<12:01:55, 3.08it/s] 64%|██████▍ | 237882/371472 [8:20:10<11:44:58, 3.16it/s] 64%|██████▍ | 237883/371472 [8:20:10<11:23:58, 3.26it/s] 64%|██████▍ | 237884/371472 [8:20:11<11:02:22, 3.36it/s] 64%|██████▍ | 237885/371472 [8:20:11<10:42:06, 3.47it/s] 64%|██████▍ | 237886/371472 [8:20:11<10:42:42, 3.46it/s] 64%|██████▍ | 237887/371472 [8:20:11<11:08:08, 3.33it/s] 64%|██████▍ | 237888/371472 [8:20:12<10:41:09, 3.47it/s] 64%|██████▍ | 237889/371472 [8:20:12<10:50:17, 3.42it/s] 64%|██████▍ | 237890/371472 [8:20:12<11:25:45, 3.25it/s] 64%|██████▍ | 237891/371472 [8:20:13<11:32:26, 3.22it/s] 64%|██████▍ | 237892/371472 [8:20:13<10:55:38, 3.40it/s] 64%|██████▍ | 237893/371472 [8:20:13<10:46:30, 3.44it/s] 64%|██████▍ | 237894/371472 [8:20:13<10:57:19, 3.39it/s] 64%|██████▍ | 237895/371472 [8:20:14<10:48:54, 3.43it/s] 64%|██████▍ | 237896/371472 [8:20:14<11:34:20, 3.21it/s] 64%|██████▍ | 237897/371472 [8:20:14<11:15:10, 3.30it/s] 64%|██████▍ | 237898/371472 [8:20:15<10:55:30, 3.40it/s] 64%|██████▍ | 237899/371472 [8:20:15<10:54:16, 3.40it/s] 64%|██████▍ | 237900/371472 [8:20:15<10:50:53, 3.42it/s] {'loss': 2.8408, 'learning_rate': 4.2379172143334265e-07, 'epoch': 10.25} + 64%|██████▍ | 237900/371472 [8:20:15<10:50:53, 3.42it/s] 64%|██████▍ | 237901/371472 [8:20:16<11:20:48, 3.27it/s] 64%|██████▍ | 237902/371472 [8:20:16<10:47:19, 3.44it/s] 64%|██████▍ | 237903/371472 [8:20:16<11:13:35, 3.30it/s] 64%|██████▍ | 237904/371472 [8:20:16<11:05:28, 3.35it/s] 64%|██████▍ | 237905/371472 [8:20:17<11:16:33, 3.29it/s] 64%|██████▍ | 237906/371472 [8:20:17<12:00:44, 3.09it/s] 64%|██████▍ | 237907/371472 [8:20:17<11:50:22, 3.13it/s] 64%|██████▍ | 237908/371472 [8:20:18<12:06:07, 3.07it/s] 64%|██████▍ | 237909/371472 [8:20:18<11:44:16, 3.16it/s] 64%|██████▍ | 237910/371472 [8:20:18<11:34:33, 3.20it/s] 64%|██████▍ | 237911/371472 [8:20:19<11:26:21, 3.24it/s] 64%|██████▍ | 237912/371472 [8:20:19<10:58:50, 3.38it/s] 64%|██████▍ | 237913/371472 [8:20:19<11:31:05, 3.22it/s] 64%|██████▍ | 237914/371472 [8:20:20<11:22:53, 3.26it/s] 64%|██████▍ | 237915/371472 [8:20:20<11:03:39, 3.35it/s] 64%|██████▍ | 237916/371472 [8:20:20<10:42:26, 3.46it/s] 64%|██████▍ | 237917/371472 [8:20:20<10:48:46, 3.43it/s] 64%|██████▍ | 237918/371472 [8:20:21<10:43:56, 3.46it/s] 64%|██████▍ | 237919/371472 [8:20:21<11:17:10, 3.29it/s] 64%|██████▍ | 237920/371472 [8:20:21<11:07:43, 3.33it/s] {'loss': 2.7499, 'learning_rate': 4.237432394578637e-07, 'epoch': 10.25} + 64%|██████▍ | 237920/371472 [8:20:21<11:07:43, 3.33it/s] 64%|██████▍ | 237921/371472 [8:20:22<10:47:41, 3.44it/s] 64%|██████▍ | 237922/371472 [8:20:22<10:51:17, 3.42it/s] 64%|██████▍ | 237923/371472 [8:20:22<10:50:11, 3.42it/s] 64%|██████▍ | 237924/371472 [8:20:23<11:02:25, 3.36it/s] 64%|██████▍ | 237925/371472 [8:20:23<11:21:40, 3.27it/s] 64%|██████▍ | 237926/371472 [8:20:23<11:26:22, 3.24it/s] 64%|██████▍ | 237927/371472 [8:20:23<11:12:41, 3.31it/s] 64%|██████▍ | 237928/371472 [8:20:24<11:48:09, 3.14it/s] 64%|██████▍ | 237929/371472 [8:20:24<12:13:32, 3.03it/s] 64%|██████▍ | 237930/371472 [8:20:24<11:40:50, 3.18it/s] 64%|██████▍ | 237931/371472 [8:20:25<11:24:26, 3.25it/s] 64%|██████▍ | 237932/371472 [8:20:25<10:58:16, 3.38it/s] 64%|██████▍ | 237933/371472 [8:20:25<10:55:51, 3.39it/s] 64%|██████▍ | 237934/371472 [8:20:26<10:39:39, 3.48it/s] 64%|██████▍ | 237935/371472 [8:20:26<10:55:48, 3.39it/s] 64%|██████▍ | 237936/371472 [8:20:26<11:10:22, 3.32it/s] 64%|██████▍ | 237937/371472 [8:20:26<10:59:10, 3.38it/s] 64%|██████▍ | 237938/371472 [8:20:27<10:43:57, 3.46it/s] 64%|██████▍ | 237939/371472 [8:20:27<10:27:25, 3.55it/s] 64%|██████▍ | 237940/371472 [8:20:27<10:34:16, 3.51it/s] {'loss': 2.7672, 'learning_rate': 4.2369475748238484e-07, 'epoch': 10.25} + 64%|██████▍ | 237940/371472 [8:20:27<10:34:16, 3.51it/s] 64%|██████▍ | 237941/371472 [8:20:28<10:45:34, 3.45it/s] 64%|██████▍ | 237942/371472 [8:20:28<11:46:12, 3.15it/s] 64%|██████▍ | 237943/371472 [8:20:28<11:19:41, 3.27it/s] 64%|██████▍ | 237944/371472 [8:20:29<10:51:50, 3.41it/s] 64%|██████▍ | 237945/371472 [8:20:29<11:19:45, 3.27it/s] 64%|██████▍ | 237946/371472 [8:20:29<12:16:36, 3.02it/s] 64%|██████▍ | 237947/371472 [8:20:30<12:14:00, 3.03it/s] 64%|██████▍ | 237948/371472 [8:20:30<11:40:39, 3.18it/s] 64%|██████▍ | 237949/371472 [8:20:30<11:08:00, 3.33it/s] 64%|██████▍ | 237950/371472 [8:20:30<10:54:22, 3.40it/s] 64%|██████▍ | 237951/371472 [8:20:31<10:59:41, 3.37it/s] 64%|██████▍ | 237952/371472 [8:20:31<11:04:14, 3.35it/s] 64%|██████▍ | 237953/371472 [8:20:31<11:04:57, 3.35it/s] 64%|██████▍ | 237954/371472 [8:20:32<10:54:25, 3.40it/s] 64%|██████▍ | 237955/371472 [8:20:32<11:00:39, 3.37it/s] 64%|██████▍ | 237956/371472 [8:20:32<11:01:48, 3.36it/s] 64%|██████▍ | 237957/371472 [8:20:32<10:56:04, 3.39it/s] 64%|██████▍ | 237958/371472 [8:20:33<11:08:47, 3.33it/s] 64%|██████▍ | 237959/371472 [8:20:33<10:54:39, 3.40it/s] 64%|██████▍ | 237960/371472 [8:20:33<11:03:19, 3.35it/s] {'loss': 2.6401, 'learning_rate': 4.236462755069059e-07, 'epoch': 10.25} + 64%|██████▍ | 237960/371472 [8:20:33<11:03:19, 3.35it/s] 64%|██████▍ | 237961/371472 [8:20:34<12:17:17, 3.02it/s] 64%|██████▍ | 237962/371472 [8:20:34<11:34:12, 3.21it/s] 64%|██████▍ | 237963/371472 [8:20:34<11:19:26, 3.27it/s] 64%|██████▍ | 237964/371472 [8:20:35<11:03:59, 3.35it/s] 64%|██████▍ | 237965/371472 [8:20:35<11:15:40, 3.29it/s] 64%|██████▍ | 237966/371472 [8:20:35<11:17:54, 3.28it/s] 64%|██████▍ | 237967/371472 [8:20:36<11:00:38, 3.37it/s] 64%|██████▍ | 237968/371472 [8:20:36<10:57:09, 3.39it/s] 64%|██████▍ | 237969/371472 [8:20:36<10:45:32, 3.45it/s] 64%|██████▍ | 237970/371472 [8:20:36<11:13:52, 3.30it/s] 64%|██████▍ | 237971/371472 [8:20:37<11:34:49, 3.20it/s] 64%|██████▍ | 237972/371472 [8:20:37<11:40:06, 3.18it/s] 64%|██████▍ | 237973/371472 [8:20:37<11:43:28, 3.16it/s] 64%|██████▍ | 237974/371472 [8:20:38<11:39:34, 3.18it/s] 64%|██████▍ | 237975/371472 [8:20:38<11:34:40, 3.20it/s] 64%|██████▍ | 237976/371472 [8:20:38<11:26:56, 3.24it/s] 64%|██████▍ | 237977/371472 [8:20:39<11:17:46, 3.28it/s] 64%|██████▍ | 237978/371472 [8:20:39<11:12:20, 3.31it/s] 64%|██████▍ | 237979/371472 [8:20:39<11:04:17, 3.35it/s] 64%|██████▍ | 237980/371472 [8:20:40<11:15:43, 3.29it/s] {'loss': 2.9037, 'learning_rate': 4.235977935314271e-07, 'epoch': 10.25} + 64%|██████▍ | 237980/371472 [8:20:40<11:15:43, 3.29it/s] 64%|██████▍ | 237981/371472 [8:20:40<11:08:12, 3.33it/s] 64%|██████▍ | 237982/371472 [8:20:40<11:14:23, 3.30it/s] 64%|██████▍ | 237983/371472 [8:20:40<10:53:44, 3.40it/s] 64%|██████▍ | 237984/371472 [8:20:41<11:26:39, 3.24it/s] 64%|██████▍ | 237985/371472 [8:20:41<11:42:03, 3.17it/s] 64%|██████▍ | 237986/371472 [8:20:41<11:22:22, 3.26it/s] 64%|██████▍ | 237987/371472 [8:20:42<10:50:06, 3.42it/s] 64%|██████▍ | 237988/371472 [8:20:42<10:34:17, 3.51it/s] 64%|██████▍ | 237989/371472 [8:20:42<10:29:49, 3.53it/s] 64%|██████▍ | 237990/371472 [8:20:42<10:52:27, 3.41it/s] 64%|██████▍ | 237991/371472 [8:20:43<10:56:23, 3.39it/s] 64%|██████▍ | 237992/371472 [8:20:43<10:40:44, 3.47it/s] 64%|██████▍ | 237993/371472 [8:20:43<10:43:43, 3.46it/s] 64%|██████▍ | 237994/371472 [8:20:44<11:03:30, 3.35it/s] 64%|██████▍ | 237995/371472 [8:20:44<11:25:17, 3.25it/s] 64%|██████▍ | 237996/371472 [8:20:44<11:15:00, 3.30it/s] 64%|██████▍ | 237997/371472 [8:20:45<10:57:11, 3.38it/s] 64%|██████▍ | 237998/371472 [8:20:45<11:02:38, 3.36it/s] 64%|██████▍ | 237999/371472 [8:20:45<10:51:13, 3.42it/s] 64%|██████▍ | 238000/371472 [8:20:45<10:43:11, 3.46it/s] {'loss': 2.5941, 'learning_rate': 4.2354931155594816e-07, 'epoch': 10.25} + 64%|██████▍ | 238000/371472 [8:20:45<10:43:11, 3.46it/s] 64%|██████▍ | 238001/371472 [8:20:46<10:36:25, 3.50it/s] 64%|██████▍ | 238002/371472 [8:20:46<10:28:19, 3.54it/s] 64%|██████▍ | 238003/371472 [8:20:46<10:04:48, 3.68it/s] 64%|██████▍ | 238004/371472 [8:20:46<9:51:02, 3.76it/s] 64%|██████▍ | 238005/371472 [8:20:47<10:02:56, 3.69it/s] 64%|██████▍ | 238006/371472 [8:20:47<10:15:51, 3.61it/s] 64%|██████▍ | 238007/371472 [8:20:47<10:47:28, 3.44it/s] 64%|██████▍ | 238008/371472 [8:20:48<10:34:40, 3.50it/s] 64%|██████▍ | 238009/371472 [8:20:48<10:16:13, 3.61it/s] 64%|██████▍ | 238010/371472 [8:20:48<10:13:24, 3.63it/s] 64%|██████▍ | 238011/371472 [8:20:48<10:21:30, 3.58it/s] 64%|██████▍ | 238012/371472 [8:20:49<11:06:23, 3.34it/s] 64%|██████▍ | 238013/371472 [8:20:49<11:03:12, 3.35it/s] 64%|██████▍ | 238014/371472 [8:20:49<10:40:50, 3.47it/s] 64%|██████▍ | 238015/371472 [8:20:50<10:36:01, 3.50it/s] 64%|██████▍ | 238016/371472 [8:20:50<11:18:17, 3.28it/s] 64%|██████▍ | 238017/371472 [8:20:50<10:58:43, 3.38it/s] 64%|██████▍ | 238018/371472 [8:20:51<10:30:21, 3.53it/s] 64%|██████▍ | 238019/371472 [8:20:51<10:20:16, 3.59it/s] 64%|██████▍ | 238020/371472 [8:20:51<10:26:47, 3.55it/s] {'loss': 2.7517, 'learning_rate': 4.235008295804693e-07, 'epoch': 10.25} + 64%|██████▍ | 238020/371472 [8:20:51<10:26:47, 3.55it/s] 64%|██████▍ | 238021/371472 [8:20:51<10:47:20, 3.44it/s] 64%|██████▍ | 238022/371472 [8:20:52<10:31:21, 3.52it/s] 64%|██████▍ | 238023/371472 [8:20:52<11:05:04, 3.34it/s] 64%|██████▍ | 238024/371472 [8:20:52<10:49:29, 3.42it/s] 64%|██████▍ | 238025/371472 [8:20:53<10:21:49, 3.58it/s] 64%|██████▍ | 238026/371472 [8:20:53<10:21:20, 3.58it/s] 64%|██████▍ | 238027/371472 [8:20:53<10:24:15, 3.56it/s] 64%|██████▍ | 238028/371472 [8:20:53<10:15:57, 3.61it/s] 64%|██████▍ | 238029/371472 [8:20:54<11:53:10, 3.12it/s] 64%|██████▍ | 238030/371472 [8:20:54<11:47:11, 3.14it/s] 64%|██████▍ | 238031/371472 [8:20:54<11:34:39, 3.20it/s] 64%|██████▍ | 238032/371472 [8:20:55<11:18:16, 3.28it/s] 64%|██████▍ | 238033/371472 [8:20:55<10:58:58, 3.37it/s] 64%|██████▍ | 238034/371472 [8:20:55<10:49:08, 3.43it/s] 64%|██████▍ | 238035/371472 [8:20:56<10:49:38, 3.42it/s] 64%|██████▍ | 238036/371472 [8:20:56<10:37:06, 3.49it/s] 64%|██████▍ | 238037/371472 [8:20:56<10:20:09, 3.59it/s] 64%|██████▍ | 238038/371472 [8:20:56<10:22:59, 3.57it/s] 64%|██████▍ | 238039/371472 [8:20:57<10:30:04, 3.53it/s] 64%|██████▍ | 238040/371472 [8:20:57<10:36:35, 3.49it/s] {'loss': 2.7605, 'learning_rate': 4.2345234760499036e-07, 'epoch': 10.25} + 64%|██████▍ | 238040/371472 [8:20:57<10:36:35, 3.49it/s] 64%|██████▍ | 238041/371472 [8:20:57<10:43:22, 3.46it/s] 64%|██████▍ | 238042/371472 [8:20:58<10:49:12, 3.43it/s] 64%|██████▍ | 238043/371472 [8:20:58<10:37:30, 3.49it/s] 64%|██████▍ | 238044/371472 [8:20:58<10:26:43, 3.55it/s] 64%|██████▍ | 238045/371472 [8:20:58<10:21:03, 3.58it/s] 64%|██████▍ | 238046/371472 [8:20:59<10:15:42, 3.61it/s] 64%|██████▍ | 238047/371472 [8:20:59<10:25:31, 3.56it/s] 64%|██████▍ | 238048/371472 [8:20:59<10:15:43, 3.61it/s] 64%|██████▍ | 238049/371472 [8:20:59<10:42:11, 3.46it/s] 64%|██████▍ | 238050/371472 [8:21:00<12:06:06, 3.06it/s] 64%|██████▍ | 238051/371472 [8:21:00<12:08:53, 3.05it/s] 64%|██████▍ | 238052/371472 [8:21:01<12:03:58, 3.07it/s] 64%|██████▍ | 238053/371472 [8:21:01<11:41:09, 3.17it/s] 64%|██████▍ | 238054/371472 [8:21:01<11:26:28, 3.24it/s] 64%|██████▍ | 238055/371472 [8:21:01<11:39:25, 3.18it/s] 64%|██████▍ | 238056/371472 [8:21:02<11:03:34, 3.35it/s] 64%|██████▍ | 238057/371472 [8:21:02<11:38:50, 3.18it/s] 64%|██████▍ | 238058/371472 [8:21:02<11:02:29, 3.36it/s] 64%|██████▍ | 238059/371472 [8:21:03<11:10:19, 3.32it/s] 64%|██████▍ | 238060/371472 [8:21:03<10:52:57, 3.41it/s] {'loss': 2.6672, 'learning_rate': 4.2340386562951154e-07, 'epoch': 10.25} + 64%|██████▍ | 238060/371472 [8:21:03<10:52:57, 3.41it/s] 64%|██████▍ | 238061/371472 [8:21:03<10:38:52, 3.48it/s] 64%|██████▍ | 238062/371472 [8:21:03<10:16:43, 3.61it/s] 64%|██████▍ | 238063/371472 [8:21:04<10:26:03, 3.55it/s] 64%|██████▍ | 238064/371472 [8:21:04<11:31:20, 3.22it/s] 64%|██████▍ | 238065/371472 [8:21:05<12:09:38, 3.05it/s] 64%|██████▍ | 238066/371472 [8:21:05<11:49:26, 3.13it/s] 64%|██████▍ | 238067/371472 [8:21:05<11:31:27, 3.22it/s] 64%|██████▍ | 238068/371472 [8:21:05<11:08:32, 3.33it/s] 64%|██████▍ | 238069/371472 [8:21:06<11:31:13, 3.22it/s] 64%|██████▍ | 238070/371472 [8:21:06<11:24:14, 3.25it/s] 64%|██████▍ | 238071/371472 [8:21:06<11:24:13, 3.25it/s] 64%|██████▍ | 238072/371472 [8:21:07<11:39:09, 3.18it/s] 64%|██████▍ | 238073/371472 [8:21:07<11:50:33, 3.13it/s] 64%|██████▍ | 238074/371472 [8:21:07<11:23:40, 3.25it/s] 64%|██████▍ | 238075/371472 [8:21:08<11:06:59, 3.33it/s] 64%|██████▍ | 238076/371472 [8:21:08<11:10:22, 3.32it/s] 64%|██████▍ | 238077/371472 [8:21:08<11:01:59, 3.36it/s] 64%|██████▍ | 238078/371472 [8:21:08<10:47:37, 3.43it/s] 64%|██████▍ | 238079/371472 [8:21:09<10:32:55, 3.51it/s] 64%|██████▍ | 238080/371472 [8:21:09<10:50:29, 3.42it/s] {'loss': 2.7447, 'learning_rate': 4.233553836540326e-07, 'epoch': 10.25} + 64%|██████▍ | 238080/371472 [8:21:09<10:50:29, 3.42it/s] 64%|██████▍ | 238081/371472 [8:21:09<11:11:07, 3.31it/s] 64%|██████▍ | 238082/371472 [8:21:10<10:51:11, 3.41it/s] 64%|██████▍ | 238083/371472 [8:21:10<10:51:52, 3.41it/s] 64%|██████▍ | 238084/371472 [8:21:10<10:44:55, 3.45it/s] 64%|██████▍ | 238085/371472 [8:21:10<10:34:15, 3.51it/s] 64%|██████▍ | 238086/371472 [8:21:11<10:26:41, 3.55it/s] 64%|██████▍ | 238087/371472 [8:21:11<10:34:36, 3.50it/s] 64%|██████▍ | 238088/371472 [8:21:11<11:26:34, 3.24it/s] 64%|██████▍ | 238089/371472 [8:21:12<11:02:57, 3.35it/s] 64%|██████▍ | 238090/371472 [8:21:12<10:58:35, 3.38it/s] 64%|██████▍ | 238091/371472 [8:21:12<11:04:01, 3.35it/s] 64%|██████▍ | 238092/371472 [8:21:13<11:17:02, 3.28it/s] 64%|██████▍ | 238093/371472 [8:21:13<11:17:02, 3.28it/s] 64%|██████▍ | 238094/371472 [8:21:13<11:31:34, 3.21it/s] 64%|██████▍ | 238095/371472 [8:21:13<10:55:16, 3.39it/s] 64%|██████▍ | 238096/371472 [8:21:14<10:58:10, 3.38it/s] 64%|██████▍ | 238097/371472 [8:21:14<10:35:44, 3.50it/s] 64%|██████▍ | 238098/371472 [8:21:14<10:13:58, 3.62it/s] 64%|██████▍ | 238099/371472 [8:21:15<10:14:30, 3.62it/s] 64%|██████▍ | 238100/371472 [8:21:15<10:23:10, 3.57it/s] {'loss': 2.8843, 'learning_rate': 4.2330690167855373e-07, 'epoch': 10.26} + 64%|██████▍ | 238100/371472 [8:21:15<10:23:10, 3.57it/s] 64%|██████▍ | 238101/371472 [8:21:15<10:24:33, 3.56it/s] 64%|██████▍ | 238102/371472 [8:21:15<10:16:53, 3.60it/s] 64%|██████▍ | 238103/371472 [8:21:16<10:17:11, 3.60it/s] 64%|██████▍ | 238104/371472 [8:21:16<10:27:09, 3.54it/s] 64%|██████▍ | 238105/371472 [8:21:16<10:20:43, 3.58it/s] 64%|██████▍ | 238106/371472 [8:21:17<10:25:42, 3.55it/s] 64%|██████▍ | 238107/371472 [8:21:17<10:28:48, 3.53it/s] 64%|██████▍ | 238108/371472 [8:21:17<10:27:16, 3.54it/s] 64%|██████▍ | 238109/371472 [8:21:17<10:52:37, 3.41it/s] 64%|██████▍ | 238110/371472 [8:21:18<10:38:31, 3.48it/s] 64%|██████▍ | 238111/371472 [8:21:18<10:40:52, 3.47it/s] 64%|██████▍ | 238112/371472 [8:21:18<10:23:35, 3.56it/s] 64%|██████▍ | 238113/371472 [8:21:18<10:08:38, 3.65it/s] 64%|██████▍ | 238114/371472 [8:21:19<10:34:12, 3.50it/s] 64%|██████▍ | 238115/371472 [8:21:19<10:42:10, 3.46it/s] 64%|██████▍ | 238116/371472 [8:21:19<10:27:02, 3.54it/s] 64%|██████▍ | 238117/371472 [8:21:20<10:26:29, 3.55it/s] 64%|██████▍ | 238118/371472 [8:21:20<10:30:32, 3.52it/s] 64%|██████▍ | 238119/371472 [8:21:20<10:23:28, 3.56it/s] 64%|██████▍ | 238120/371472 [8:21:20<10:34:24, 3.50it/s] {'loss': 2.6501, 'learning_rate': 4.232584197030748e-07, 'epoch': 10.26} + 64%|██████▍ | 238120/371472 [8:21:20<10:34:24, 3.50it/s] 64%|██████▍ | 238121/371472 [8:21:21<10:37:23, 3.49it/s] 64%|██████▍ | 238122/371472 [8:21:21<10:40:21, 3.47it/s] 64%|██████▍ | 238123/371472 [8:21:21<10:30:02, 3.53it/s] 64%|██████▍ | 238124/371472 [8:21:22<10:25:49, 3.55it/s] 64%|██████▍ | 238125/371472 [8:21:22<10:39:07, 3.48it/s] 64%|██████▍ | 238126/371472 [8:21:22<10:32:45, 3.51it/s] 64%|██████▍ | 238127/371472 [8:21:22<10:22:07, 3.57it/s] 64%|██████▍ | 238128/371472 [8:21:23<10:44:14, 3.45it/s] 64%|██████▍ | 238129/371472 [8:21:23<10:30:59, 3.52it/s] 64%|██████▍ | 238130/371472 [8:21:23<11:35:54, 3.19it/s] 64%|██████▍ | 238131/371472 [8:21:24<11:13:58, 3.30it/s] 64%|██████▍ | 238132/371472 [8:21:24<11:00:53, 3.36it/s] 64%|██████▍ | 238133/371472 [8:21:24<10:32:37, 3.51it/s] 64%|██████▍ | 238134/371472 [8:21:25<10:35:20, 3.50it/s] 64%|██████▍ | 238135/371472 [8:21:25<10:21:47, 3.57it/s] 64%|██████▍ | 238136/371472 [8:21:25<10:10:22, 3.64it/s] 64%|██████▍ | 238137/371472 [8:21:25<10:49:05, 3.42it/s] 64%|██████▍ | 238138/371472 [8:21:26<11:15:59, 3.29it/s] 64%|██████▍ | 238139/371472 [8:21:26<10:57:11, 3.38it/s] 64%|██████▍ | 238140/371472 [8:21:26<10:48:30, 3.43it/s] {'loss': 2.7445, 'learning_rate': 4.23209937727596e-07, 'epoch': 10.26} + 64%|██████▍ | 238140/371472 [8:21:26<10:48:30, 3.43it/s] 64%|██████▍ | 238141/371472 [8:21:27<10:54:11, 3.40it/s] 64%|██████▍ | 238142/371472 [8:21:27<11:32:42, 3.21it/s] 64%|██████▍ | 238143/371472 [8:21:27<11:47:26, 3.14it/s] 64%|██████▍ | 238144/371472 [8:21:28<11:51:32, 3.12it/s] 64%|██████▍ | 238145/371472 [8:21:28<11:24:55, 3.24it/s] 64%|██████▍ | 238146/371472 [8:21:28<11:00:40, 3.36it/s] 64%|██████▍ | 238147/371472 [8:21:28<10:37:14, 3.49it/s] 64%|██████▍ | 238148/371472 [8:21:29<10:25:28, 3.55it/s] 64%|██████▍ | 238149/371472 [8:21:29<10:30:59, 3.52it/s] 64%|██████▍ | 238150/371472 [8:21:29<10:35:56, 3.49it/s] 64%|██████▍ | 238151/371472 [8:21:30<11:07:53, 3.33it/s] 64%|██████▍ | 238152/371472 [8:21:30<11:00:12, 3.37it/s] 64%|██████▍ | 238153/371472 [8:21:30<10:44:15, 3.45it/s] 64%|██████▍ | 238154/371472 [8:21:30<11:10:31, 3.31it/s] 64%|██████▍ | 238155/371472 [8:21:31<11:26:05, 3.24it/s] 64%|██████▍ | 238156/371472 [8:21:31<11:20:18, 3.27it/s] 64%|██████▍ | 238157/371472 [8:21:31<11:32:14, 3.21it/s] 64%|██████▍ | 238158/371472 [8:21:32<11:15:40, 3.29it/s] 64%|██████▍ | 238159/371472 [8:21:32<11:02:55, 3.35it/s] 64%|██████▍ | 238160/371472 [8:21:32<10:46:55, 3.43it/s] {'loss': 2.5849, 'learning_rate': 4.23161455752117e-07, 'epoch': 10.26} + 64%|██████▍ | 238160/371472 [8:21:32<10:46:55, 3.43it/s] 64%|██████▍ | 238161/371472 [8:21:33<12:21:35, 3.00it/s] 64%|██████▍ | 238162/371472 [8:21:33<11:34:15, 3.20it/s] 64%|██████▍ | 238163/371472 [8:21:33<11:12:41, 3.30it/s] 64%|██████▍ | 238164/371472 [8:21:34<10:50:22, 3.42it/s] 64%|██████▍ | 238165/371472 [8:21:34<11:01:07, 3.36it/s] 64%|██████▍ | 238166/371472 [8:21:34<10:53:00, 3.40it/s] 64%|██████▍ | 238167/371472 [8:21:34<10:40:34, 3.47it/s] 64%|██████▍ | 238168/371472 [8:21:35<10:30:04, 3.53it/s] 64%|██████▍ | 238169/371472 [8:21:35<10:27:14, 3.54it/s] 64%|██████▍ | 238170/371472 [8:21:35<10:47:24, 3.43it/s] 64%|██████▍ | 238171/371472 [8:21:36<11:30:18, 3.22it/s] 64%|██████▍ | 238172/371472 [8:21:36<11:22:51, 3.25it/s] 64%|██████▍ | 238173/371472 [8:21:36<11:11:50, 3.31it/s] 64%|██████▍ | 238174/371472 [8:21:37<11:03:04, 3.35it/s] 64%|██████▍ | 238175/371472 [8:21:37<10:57:36, 3.38it/s] 64%|██████▍ | 238176/371472 [8:21:37<10:33:31, 3.51it/s] 64%|██████▍ | 238177/371472 [8:21:37<10:47:35, 3.43it/s] 64%|██████▍ | 238178/371472 [8:21:38<10:55:29, 3.39it/s] 64%|██████▍ | 238179/371472 [8:21:38<10:46:25, 3.44it/s] 64%|██████▍ | 238180/371472 [8:21:38<10:51:38, 3.41it/s] {'loss': 2.7438, 'learning_rate': 4.231129737766382e-07, 'epoch': 10.26} + 64%|██████▍ | 238180/371472 [8:21:38<10:51:38, 3.41it/s] 64%|██████▍ | 238181/371472 [8:21:39<10:39:40, 3.47it/s] 64%|██████▍ | 238182/371472 [8:21:39<10:30:57, 3.52it/s] 64%|██████▍ | 238183/371472 [8:21:39<10:29:51, 3.53it/s] 64%|██████▍ | 238184/371472 [8:21:39<10:46:58, 3.43it/s] 64%|██████▍ | 238185/371472 [8:21:40<10:32:04, 3.51it/s] 64%|██████▍ | 238186/371472 [8:21:40<10:49:34, 3.42it/s] 64%|██████▍ | 238187/371472 [8:21:40<10:28:28, 3.53it/s] 64%|██████▍ | 238188/371472 [8:21:41<10:32:21, 3.51it/s] 64%|██████▍ | 238189/371472 [8:21:41<11:09:39, 3.32it/s] 64%|██████▍ | 238190/371472 [8:21:41<11:20:03, 3.27it/s] 64%|██████▍ | 238191/371472 [8:21:41<10:57:57, 3.38it/s] 64%|██████▍ | 238192/371472 [8:21:42<11:35:31, 3.19it/s] 64%|██████▍ | 238193/371472 [8:21:42<11:18:31, 3.27it/s] 64%|██████▍ | 238194/371472 [8:21:42<11:13:04, 3.30it/s] 64%|██████▍ | 238195/371472 [8:21:43<11:05:20, 3.34it/s] 64%|██████▍ | 238196/371472 [8:21:43<11:28:10, 3.23it/s] 64%|██████▍ | 238197/371472 [8:21:43<11:05:17, 3.34it/s] 64%|██████▍ | 238198/371472 [8:21:44<10:43:29, 3.45it/s] 64%|██████▍ | 238199/371472 [8:21:44<10:40:40, 3.47it/s] 64%|██████▍ | 238200/371472 [8:21:44<10:30:12, 3.52it/s] {'loss': 2.647, 'learning_rate': 4.2306449180115925e-07, 'epoch': 10.26} + 64%|██████▍ | 238200/371472 [8:21:44<10:30:12, 3.52it/s] 64%|██████▍ | 238201/371472 [8:21:44<10:28:33, 3.53it/s] 64%|██████▍ | 238202/371472 [8:21:45<10:58:03, 3.38it/s] 64%|██████▍ | 238203/371472 [8:21:45<11:12:58, 3.30it/s] 64%|██████▍ | 238204/371472 [8:21:45<11:05:01, 3.34it/s] 64%|██████▍ | 238205/371472 [8:21:46<11:10:23, 3.31it/s] 64%|██████▍ | 238206/371472 [8:21:46<11:07:16, 3.33it/s] 64%|██████▍ | 238207/371472 [8:21:46<11:12:08, 3.30it/s] 64%|██████▍ | 238208/371472 [8:21:47<10:58:09, 3.37it/s] 64%|██████▍ | 238209/371472 [8:21:47<10:53:53, 3.40it/s] 64%|██████▍ | 238210/371472 [8:21:47<11:13:37, 3.30it/s] 64%|██████▍ | 238211/371472 [8:21:47<11:04:48, 3.34it/s] 64%|██████▍ | 238212/371472 [8:21:48<10:42:47, 3.46it/s] 64%|██████▍ | 238213/371472 [8:21:48<10:22:00, 3.57it/s] 64%|██████▍ | 238214/371472 [8:21:48<10:14:04, 3.62it/s] 64%|██████▍ | 238215/371472 [8:21:49<10:31:22, 3.52it/s] 64%|██████▍ | 238216/371472 [8:21:49<10:22:01, 3.57it/s] 64%|██████▍ | 238217/371472 [8:21:49<10:34:59, 3.50it/s] 64%|██████▍ | 238218/371472 [8:21:49<11:27:38, 3.23it/s] 64%|██████▍ | 238219/371472 [8:21:50<11:03:00, 3.35it/s] 64%|██████▍ | 238220/371472 [8:21:50<10:58:27, 3.37it/s] {'loss': 2.672, 'learning_rate': 4.2301600982568037e-07, 'epoch': 10.26} + 64%|██████▍ | 238220/371472 [8:21:50<10:58:27, 3.37it/s] 64%|██████▍ | 238221/371472 [8:21:50<10:49:12, 3.42it/s] 64%|██████▍ | 238222/371472 [8:21:51<10:35:50, 3.49it/s] 64%|██████▍ | 238223/371472 [8:21:51<10:30:36, 3.52it/s] 64%|██████▍ | 238224/371472 [8:21:51<10:38:54, 3.48it/s] 64%|██████▍ | 238225/371472 [8:21:51<10:54:58, 3.39it/s] 64%|██████▍ | 238226/371472 [8:21:52<11:42:22, 3.16it/s] 64%|██████▍ | 238227/371472 [8:21:52<11:11:39, 3.31it/s] 64%|██████▍ | 238228/371472 [8:21:52<11:19:07, 3.27it/s] 64%|██████▍ | 238229/371472 [8:21:53<11:57:25, 3.10it/s] 64%|██████▍ | 238230/371472 [8:21:53<12:07:25, 3.05it/s] 64%|██████▍ | 238231/371472 [8:21:53<11:46:01, 3.15it/s] 64%|██████▍ | 238232/371472 [8:21:54<11:31:12, 3.21it/s] 64%|██████▍ | 238233/371472 [8:21:54<11:26:24, 3.24it/s] 64%|██████▍ | 238234/371472 [8:21:54<11:03:56, 3.34it/s] 64%|██████▍ | 238235/371472 [8:21:55<11:12:59, 3.30it/s] 64%|██████▍ | 238236/371472 [8:21:55<10:46:15, 3.44it/s] 64%|██████▍ | 238237/371472 [8:21:55<10:50:39, 3.41it/s] 64%|██████▍ | 238238/371472 [8:21:55<10:59:17, 3.37it/s] 64%|██████▍ | 238239/371472 [8:21:56<11:44:03, 3.15it/s] 64%|██████▍ | 238240/371472 [8:21:56<11:24:14, 3.25it/s] {'loss': 2.6635, 'learning_rate': 4.2296752785020144e-07, 'epoch': 10.26} + 64%|██████▍ | 238240/371472 [8:21:56<11:24:14, 3.25it/s] 64%|██████▍ | 238241/371472 [8:21:56<11:55:02, 3.11it/s] 64%|██████▍ | 238242/371472 [8:21:57<11:59:38, 3.09it/s] 64%|██████▍ | 238243/371472 [8:21:57<11:46:26, 3.14it/s] 64%|██████▍ | 238244/371472 [8:21:57<11:35:25, 3.19it/s] 64%|██████▍ | 238245/371472 [8:21:58<11:29:49, 3.22it/s] 64%|██████▍ | 238246/371472 [8:21:58<11:13:20, 3.30it/s] 64%|██████▍ | 238247/371472 [8:21:58<11:08:36, 3.32it/s] 64%|██████▍ | 238248/371472 [8:21:59<10:56:17, 3.38it/s] 64%|██████▍ | 238249/371472 [8:21:59<10:40:50, 3.46it/s] 64%|██████▍ | 238250/371472 [8:21:59<10:30:21, 3.52it/s] 64%|██████▍ | 238251/371472 [8:21:59<10:26:45, 3.54it/s] 64%|██████▍ | 238252/371472 [8:22:00<10:29:36, 3.53it/s] 64%|██████▍ | 238253/371472 [8:22:00<10:32:50, 3.51it/s] 64%|██████▍ | 238254/371472 [8:22:00<10:29:56, 3.52it/s] 64%|██████▍ | 238255/371472 [8:22:01<10:17:55, 3.59it/s] 64%|██████▍ | 238256/371472 [8:22:01<10:27:00, 3.54it/s] 64%|██████▍ | 238257/371472 [8:22:01<10:37:50, 3.48it/s] 64%|██████▍ | 238258/371472 [8:22:01<10:25:41, 3.55it/s] 64%|██████▍ | 238259/371472 [8:22:02<10:48:53, 3.42it/s] 64%|██████▍ | 238260/371472 [8:22:02<10:44:22, 3.45it/s] {'loss': 2.6759, 'learning_rate': 4.229190458747226e-07, 'epoch': 10.26} + 64%|██████▍ | 238260/371472 [8:22:02<10:44:22, 3.45it/s] 64%|██████▍ | 238261/371472 [8:22:02<10:36:07, 3.49it/s] 64%|██████▍ | 238262/371472 [8:22:03<10:32:36, 3.51it/s] 64%|██████▍ | 238263/371472 [8:22:03<10:42:57, 3.45it/s] 64%|██████▍ | 238264/371472 [8:22:03<10:35:32, 3.49it/s] 64%|██████▍ | 238265/371472 [8:22:03<10:33:39, 3.50it/s] 64%|██████▍ | 238266/371472 [8:22:04<10:33:26, 3.50it/s] 64%|██████▍ | 238267/371472 [8:22:04<10:56:12, 3.38it/s] 64%|██████▍ | 238268/371472 [8:22:04<10:37:49, 3.48it/s] 64%|██████▍ | 238269/371472 [8:22:05<10:42:26, 3.46it/s] 64%|██████▍ | 238270/371472 [8:22:05<10:26:35, 3.54it/s] 64%|██████▍ | 238271/371472 [8:22:05<11:10:24, 3.31it/s] 64%|██████▍ | 238272/371472 [8:22:05<10:38:40, 3.48it/s] 64%|██████▍ | 238273/371472 [8:22:06<10:20:58, 3.58it/s] 64%|██████▍ | 238274/371472 [8:22:06<10:30:46, 3.52it/s] 64%|██████▍ | 238275/371472 [8:22:06<10:26:22, 3.54it/s] 64%|██████▍ | 238276/371472 [8:22:07<10:15:56, 3.60it/s] 64%|██████▍ | 238277/371472 [8:22:07<10:16:30, 3.60it/s] 64%|██████▍ | 238278/371472 [8:22:07<10:55:41, 3.39it/s] 64%|██████▍ | 238279/371472 [8:22:07<10:43:59, 3.45it/s] 64%|██████▍ | 238280/371472 [8:22:08<11:08:31, 3.32it/s] {'loss': 2.7626, 'learning_rate': 4.2287056389924364e-07, 'epoch': 10.26} + 64%|██████▍ | 238280/371472 [8:22:08<11:08:31, 3.32it/s] 64%|██████▍ | 238281/371472 [8:22:08<11:30:47, 3.21it/s] 64%|██████▍ | 238282/371472 [8:22:08<11:06:30, 3.33it/s] 64%|██████▍ | 238283/371472 [8:22:09<11:38:38, 3.18it/s] 64%|██████▍ | 238284/371472 [8:22:09<11:19:44, 3.27it/s] 64%|██████▍ | 238285/371472 [8:22:09<11:19:47, 3.27it/s] 64%|██████▍ | 238286/371472 [8:22:10<10:57:07, 3.38it/s] 64%|██████▍ | 238287/371472 [8:22:10<11:28:38, 3.22it/s] 64%|██████▍ | 238288/371472 [8:22:10<10:57:18, 3.38it/s] 64%|██████▍ | 238289/371472 [8:22:11<11:43:40, 3.15it/s] 64%|██████▍ | 238290/371472 [8:22:11<12:32:32, 2.95it/s] 64%|██████▍ | 238291/371472 [8:22:11<11:52:01, 3.12it/s] 64%|██████▍ | 238292/371472 [8:22:11<11:22:47, 3.25it/s] 64%|██████▍ | 238293/371472 [8:22:12<11:39:50, 3.17it/s] 64%|██████▍ | 238294/371472 [8:22:12<11:20:06, 3.26it/s] 64%|██████▍ | 238295/371472 [8:22:12<10:59:48, 3.36it/s] 64%|██████▍ | 238296/371472 [8:22:13<10:51:44, 3.41it/s] 64%|██████▍ | 238297/371472 [8:22:13<10:39:51, 3.47it/s] 64%|██████▍ | 238298/371472 [8:22:13<10:32:37, 3.51it/s] 64%|██████▍ | 238299/371472 [8:22:14<10:42:35, 3.45it/s] 64%|██████▍ | 238300/371472 [8:22:14<10:35:26, 3.49it/s] {'loss': 2.5946, 'learning_rate': 4.228220819237648e-07, 'epoch': 10.26} + 64%|██████▍ | 238300/371472 [8:22:14<10:35:26, 3.49it/s] 64%|██████▍ | 238301/371472 [8:22:14<10:56:15, 3.38it/s] 64%|██████▍ | 238302/371472 [8:22:14<10:42:35, 3.45it/s] 64%|██████▍ | 238303/371472 [8:22:15<10:40:11, 3.47it/s] 64%|██████▍ | 238304/371472 [8:22:15<10:32:19, 3.51it/s] 64%|██████▍ | 238305/371472 [8:22:15<10:25:03, 3.55it/s] 64%|██████▍ | 238306/371472 [8:22:16<10:17:44, 3.59it/s] 64%|██████▍ | 238307/371472 [8:22:16<10:20:12, 3.58it/s] 64%|██████▍ | 238308/371472 [8:22:16<10:17:55, 3.59it/s] 64%|██████▍ | 238309/371472 [8:22:16<10:19:07, 3.58it/s] 64%|██████▍ | 238310/371472 [8:22:17<10:54:05, 3.39it/s] 64%|██████▍ | 238311/371472 [8:22:17<10:57:04, 3.38it/s] 64%|██████▍ | 238312/371472 [8:22:17<10:51:27, 3.41it/s] 64%|██████▍ | 238313/371472 [8:22:18<10:49:58, 3.41it/s] 64%|██████▍ | 238314/371472 [8:22:18<10:38:30, 3.48it/s] 64%|██████▍ | 238315/371472 [8:22:18<10:59:02, 3.37it/s] 64%|██████▍ | 238316/371472 [8:22:18<10:53:31, 3.40it/s] 64%|██████▍ | 238317/371472 [8:22:19<11:13:59, 3.29it/s] 64%|██████▍ | 238318/371472 [8:22:19<10:52:52, 3.40it/s] 64%|██████▍ | 238319/371472 [8:22:19<10:47:32, 3.43it/s] 64%|██████▍ | 238320/371472 [8:22:20<11:41:59, 3.16it/s] {'loss': 2.6959, 'learning_rate': 4.227735999482859e-07, 'epoch': 10.26} + 64%|██████▍ | 238320/371472 [8:22:20<11:41:59, 3.16it/s] 64%|██████▍ | 238321/371472 [8:22:20<11:25:47, 3.24it/s] 64%|██████▍ | 238322/371472 [8:22:20<11:04:16, 3.34it/s] 64%|██████▍ | 238323/371472 [8:22:21<10:52:49, 3.40it/s] 64%|██████▍ | 238324/371472 [8:22:21<12:25:27, 2.98it/s] 64%|██████▍ | 238325/371472 [8:22:21<11:43:49, 3.15it/s] 64%|██████▍ | 238326/371472 [8:22:22<11:18:38, 3.27it/s] 64%|██████▍ | 238327/371472 [8:22:22<11:02:46, 3.35it/s] 64%|██████▍ | 238328/371472 [8:22:22<10:43:50, 3.45it/s] 64%|██████▍ | 238329/371472 [8:22:22<10:33:49, 3.50it/s] 64%|██████▍ | 238330/371472 [8:22:23<10:37:52, 3.48it/s] 64%|██████▍ | 238331/371472 [8:22:23<10:57:37, 3.37it/s] 64%|██████▍ | 238332/371472 [8:22:23<11:18:05, 3.27it/s] 64%|██████▍ | 238333/371472 [8:22:24<10:49:02, 3.42it/s] 64%|██████▍ | 238334/371472 [8:22:24<10:47:12, 3.43it/s] 64%|██████▍ | 238335/371472 [8:22:24<10:37:21, 3.48it/s] 64%|██████▍ | 238336/371472 [8:22:24<10:37:49, 3.48it/s] 64%|██████▍ | 238337/371472 [8:22:25<10:57:52, 3.37it/s] 64%|██████▍ | 238338/371472 [8:22:25<10:53:11, 3.40it/s] 64%|██████▍ | 238339/371472 [8:22:25<10:57:11, 3.38it/s] 64%|██████▍ | 238340/371472 [8:22:26<10:45:53, 3.44it/s] {'loss': 2.8096, 'learning_rate': 4.227251179728069e-07, 'epoch': 10.27} + 64%|██████▍ | 238340/371472 [8:22:26<10:45:53, 3.44it/s] 64%|██████▍ | 238341/371472 [8:22:26<10:43:45, 3.45it/s] 64%|██████▍ | 238342/371472 [8:22:26<11:03:34, 3.34it/s] 64%|██████▍ | 238343/371472 [8:22:27<11:02:04, 3.35it/s] 64%|██████▍ | 238344/371472 [8:22:27<10:44:14, 3.44it/s] 64%|██████▍ | 238345/371472 [8:22:27<11:01:32, 3.35it/s] 64%|██████▍ | 238346/371472 [8:22:27<11:02:56, 3.35it/s] 64%|██████▍ | 238347/371472 [8:22:28<10:59:12, 3.37it/s] 64%|██████▍ | 238348/371472 [8:22:28<11:34:29, 3.19it/s] 64%|██████▍ | 238349/371472 [8:22:28<11:28:44, 3.22it/s] 64%|██████▍ | 238350/371472 [8:22:29<11:02:21, 3.35it/s] 64%|██████▍ | 238351/371472 [8:22:29<11:01:36, 3.35it/s] 64%|██████▍ | 238352/371472 [8:22:29<13:33:50, 2.73it/s] 64%|██████▍ | 238353/371472 [8:22:30<12:45:31, 2.90it/s] 64%|██████▍ | 238354/371472 [8:22:30<13:06:09, 2.82it/s] 64%|██████▍ | 238355/371472 [8:22:30<12:34:31, 2.94it/s] 64%|██████▍ | 238356/371472 [8:22:31<12:06:21, 3.05it/s] 64%|██████▍ | 238357/371472 [8:22:31<11:40:35, 3.17it/s] 64%|██████▍ | 238358/371472 [8:22:31<11:21:18, 3.26it/s] 64%|██████▍ | 238359/371472 [8:22:32<10:43:30, 3.45it/s] 64%|██████▍ | 238360/371472 [8:22:32<11:21:29, 3.26it/s] {'loss': 2.8333, 'learning_rate': 4.2267663599732803e-07, 'epoch': 10.27} + 64%|██████▍ | 238360/371472 [8:22:32<11:21:29, 3.26it/s] 64%|██████▍ | 238361/371472 [8:22:32<11:09:11, 3.32it/s] 64%|██████▍ | 238362/371472 [8:22:32<10:49:51, 3.41it/s] 64%|██████▍ | 238363/371472 [8:22:33<10:28:21, 3.53it/s] 64%|██████▍ | 238364/371472 [8:22:33<10:15:39, 3.60it/s] 64%|██████▍ | 238365/371472 [8:22:33<10:22:15, 3.57it/s] 64%|██████▍ | 238366/371472 [8:22:34<10:16:17, 3.60it/s] 64%|██████▍ | 238367/371472 [8:22:34<10:17:04, 3.60it/s] 64%|██████▍ | 238368/371472 [8:22:34<9:57:28, 3.71it/s] 64%|██████▍ | 238369/371472 [8:22:34<9:52:20, 3.75it/s] 64%|██████▍ | 238370/371472 [8:22:35<9:48:20, 3.77it/s] 64%|██████▍ | 238371/371472 [8:22:35<9:58:42, 3.71it/s] 64%|██████▍ | 238372/371472 [8:22:35<10:06:54, 3.66it/s] 64%|██████▍ | 238373/371472 [8:22:35<10:10:42, 3.63it/s] 64%|██████▍ | 238374/371472 [8:22:36<10:06:24, 3.66it/s] 64%|██████▍ | 238375/371472 [8:22:36<10:20:22, 3.58it/s] 64%|██████▍ | 238376/371472 [8:22:36<10:20:44, 3.57it/s] 64%|██████▍ | 238377/371472 [8:22:37<10:20:37, 3.57it/s] 64%|██████▍ | 238378/371472 [8:22:37<10:21:53, 3.57it/s] 64%|██████▍ | 238379/371472 [8:22:37<10:08:11, 3.65it/s] 64%|██████▍ | 238380/371472 [8:22:37<10:33:12, 3.50it/s] {'loss': 2.7044, 'learning_rate': 4.2262815402184916e-07, 'epoch': 10.27} + 64%|██████▍ | 238380/371472 [8:22:37<10:33:12, 3.50it/s] 64%|██████▍ | 238381/371472 [8:22:38<11:33:23, 3.20it/s] 64%|██████▍ | 238382/371472 [8:22:38<11:33:28, 3.20it/s] 64%|██████▍ | 238383/371472 [8:22:38<11:03:27, 3.34it/s] 64%|██████▍ | 238384/371472 [8:22:39<10:48:16, 3.42it/s] 64%|██████▍ | 238385/371472 [8:22:39<10:46:36, 3.43it/s] 64%|██████▍ | 238386/371472 [8:22:39<10:24:10, 3.55it/s] 64%|██████▍ | 238387/371472 [8:22:39<10:11:55, 3.62it/s] 64%|██████▍ | 238388/371472 [8:22:40<10:12:45, 3.62it/s] 64%|██████▍ | 238389/371472 [8:22:40<10:57:04, 3.38it/s] 64%|██████▍ | 238390/371472 [8:22:40<10:54:24, 3.39it/s] 64%|██████▍ | 238391/371472 [8:22:41<11:44:49, 3.15it/s] 64%|██████▍ | 238392/371472 [8:22:41<11:47:59, 3.13it/s] 64%|██████▍ | 238393/371472 [8:22:41<11:47:23, 3.14it/s] 64%|██████▍ | 238394/371472 [8:22:42<12:14:06, 3.02it/s] 64%|██████▍ | 238395/371472 [8:22:42<11:52:45, 3.11it/s] 64%|██████▍ | 238396/371472 [8:22:42<11:45:23, 3.14it/s] 64%|██████▍ | 238397/371472 [8:22:43<11:29:28, 3.22it/s] 64%|██████▍ | 238398/371472 [8:22:43<11:26:16, 3.23it/s] 64%|██████▍ | 238399/371472 [8:22:43<11:01:16, 3.35it/s] 64%|██████▍ | 238400/371472 [8:22:44<11:24:22, 3.24it/s] {'loss': 2.6821, 'learning_rate': 4.225796720463703e-07, 'epoch': 10.27} + 64%|██████▍ | 238400/371472 [8:22:44<11:24:22, 3.24it/s] 64%|██████▍ | 238401/371472 [8:22:44<12:01:06, 3.08it/s] 64%|██████▍ | 238402/371472 [8:22:44<11:38:24, 3.18it/s] 64%|██████▍ | 238403/371472 [8:22:44<11:09:15, 3.31it/s] 64%|██████▍ | 238404/371472 [8:22:45<11:01:12, 3.35it/s] 64%|██████▍ | 238405/371472 [8:22:45<10:44:21, 3.44it/s] 64%|██████▍ | 238406/371472 [8:22:45<10:31:19, 3.51it/s] 64%|██████▍ | 238407/371472 [8:22:46<10:20:46, 3.57it/s] 64%|██████▍ | 238408/371472 [8:22:46<10:33:09, 3.50it/s] 64%|██████▍ | 238409/371472 [8:22:46<10:22:28, 3.56it/s] 64%|██████▍ | 238410/371472 [8:22:46<10:24:02, 3.55it/s] 64%|██████▍ | 238411/371472 [8:22:47<10:22:27, 3.56it/s] 64%|██████▍ | 238412/371472 [8:22:47<10:26:42, 3.54it/s] 64%|██████▍ | 238413/371472 [8:22:47<10:16:32, 3.60it/s] 64%|██████▍ | 238414/371472 [8:22:48<11:15:37, 3.28it/s] 64%|██████▍ | 238415/371472 [8:22:48<11:05:06, 3.33it/s] 64%|██████▍ | 238416/371472 [8:22:48<10:40:41, 3.46it/s] 64%|██████▍ | 238417/371472 [8:22:48<11:04:18, 3.34it/s] 64%|██████▍ | 238418/371472 [8:22:49<10:39:32, 3.47it/s] 64%|██████▍ | 238419/371472 [8:22:49<10:29:00, 3.53it/s] 64%|██████▍ | 238420/371472 [8:22:49<10:31:47, 3.51it/s] {'loss': 2.8375, 'learning_rate': 4.2253119007089135e-07, 'epoch': 10.27} + 64%|██████▍ | 238420/371472 [8:22:49<10:31:47, 3.51it/s] 64%|██████▍ | 238421/371472 [8:22:50<10:25:37, 3.54it/s] 64%|██████▍ | 238422/371472 [8:22:50<10:56:21, 3.38it/s] 64%|██████▍ | 238423/371472 [8:22:50<10:38:25, 3.47it/s] 64%|██████▍ | 238424/371472 [8:22:51<11:25:16, 3.24it/s] 64%|██████▍ | 238425/371472 [8:22:51<11:13:09, 3.29it/s] 64%|██████▍ | 238426/371472 [8:22:51<10:51:02, 3.41it/s] 64%|██████▍ | 238427/371472 [8:22:51<11:15:32, 3.28it/s] 64%|██████▍ | 238428/371472 [8:22:52<11:12:56, 3.30it/s] 64%|██████▍ | 238429/371472 [8:22:52<11:44:08, 3.15it/s] 64%|██████▍ | 238430/371472 [8:22:52<11:38:30, 3.17it/s] 64%|██████▍ | 238431/371472 [8:22:53<11:06:58, 3.32it/s] 64%|██████▍ | 238432/371472 [8:22:53<11:31:08, 3.21it/s] 64%|██████▍ | 238433/371472 [8:22:53<11:12:06, 3.30it/s] 64%|██████▍ | 238434/371472 [8:22:54<12:01:23, 3.07it/s] 64%|██████▍ | 238435/371472 [8:22:54<11:27:07, 3.23it/s] 64%|██████▍ | 238436/371472 [8:22:54<11:33:27, 3.20it/s] 64%|██████▍ | 238437/371472 [8:22:55<11:15:32, 3.28it/s] 64%|██████▍ | 238438/371472 [8:22:55<11:04:15, 3.34it/s] 64%|██████▍ | 238439/371472 [8:22:55<11:00:32, 3.36it/s] 64%|██████▍ | 238440/371472 [8:22:55<11:05:59, 3.33it/s] {'loss': 2.7435, 'learning_rate': 4.2248270809541253e-07, 'epoch': 10.27} + 64%|██████▍ | 238440/371472 [8:22:55<11:05:59, 3.33it/s] 64%|██████▍ | 238441/371472 [8:22:56<10:49:03, 3.42it/s] 64%|██████▍ | 238442/371472 [8:22:56<10:38:45, 3.47it/s] 64%|██████▍ | 238443/371472 [8:22:56<10:39:51, 3.47it/s] 64%|██████▍ | 238444/371472 [8:22:57<11:00:43, 3.36it/s] 64%|██████▍ | 238445/371472 [8:22:57<11:00:27, 3.36it/s] 64%|██████▍ | 238446/371472 [8:22:57<11:02:36, 3.35it/s] 64%|██████▍ | 238447/371472 [8:22:58<11:12:19, 3.30it/s] 64%|██████▍ | 238448/371472 [8:22:58<11:29:34, 3.22it/s] 64%|██████▍ | 238449/371472 [8:22:58<11:13:51, 3.29it/s] 64%|██████▍ | 238450/371472 [8:22:58<10:42:06, 3.45it/s] 64%|██████▍ | 238451/371472 [8:22:59<10:36:42, 3.48it/s] 64%|██████▍ | 238452/371472 [8:22:59<10:35:31, 3.49it/s] 64%|██████▍ | 238453/371472 [8:22:59<11:42:17, 3.16it/s] 64%|██████▍ | 238454/371472 [8:23:00<12:19:57, 3.00it/s] 64%|██████▍ | 238455/371472 [8:23:00<12:14:35, 3.02it/s] 64%|██████▍ | 238456/371472 [8:23:00<12:09:16, 3.04it/s] 64%|██████▍ | 238457/371472 [8:23:01<11:55:54, 3.10it/s] 64%|██████▍ | 238458/371472 [8:23:01<11:16:40, 3.28it/s] 64%|██████▍ | 238459/371472 [8:23:01<10:49:58, 3.41it/s] 64%|██████▍ | 238460/371472 [8:23:01<10:57:27, 3.37it/s] {'loss': 2.7913, 'learning_rate': 4.224342261199336e-07, 'epoch': 10.27} + 64%|██████▍ | 238460/371472 [8:23:01<10:57:27, 3.37it/s] 64%|██████▍ | 238461/371472 [8:23:02<10:32:03, 3.51it/s] 64%|██████▍ | 238462/371472 [8:23:02<10:40:07, 3.46it/s] 64%|██████▍ | 238463/371472 [8:23:02<10:27:42, 3.53it/s] 64%|██████▍ | 238464/371472 [8:23:03<10:31:51, 3.51it/s] 64%|██████▍ | 238465/371472 [8:23:03<10:54:44, 3.39it/s] 64%|██████▍ | 238466/371472 [8:23:03<11:03:24, 3.34it/s] 64%|██████▍ | 238467/371472 [8:23:04<11:10:00, 3.31it/s] 64%|██████▍ | 238468/371472 [8:23:04<11:24:20, 3.24it/s] 64%|██████▍ | 238469/371472 [8:23:04<11:36:17, 3.18it/s] 64%|██████▍ | 238470/371472 [8:23:05<11:31:21, 3.21it/s] 64%|██████▍ | 238471/371472 [8:23:05<11:09:24, 3.31it/s] 64%|██████▍ | 238472/371472 [8:23:05<10:55:59, 3.38it/s] 64%|██████▍ | 238473/371472 [8:23:05<10:50:43, 3.41it/s] 64%|██████▍ | 238474/371472 [8:23:06<10:26:00, 3.54it/s] 64%|██████▍ | 238475/371472 [8:23:06<10:40:46, 3.46it/s] 64%|██████▍ | 238476/371472 [8:23:06<10:29:53, 3.52it/s] 64%|██████▍ | 238477/371472 [8:23:06<10:34:38, 3.49it/s] 64%|██████▍ | 238478/371472 [8:23:07<10:24:51, 3.55it/s] 64%|██████▍ | 238479/371472 [8:23:07<11:08:01, 3.32it/s] 64%|██████▍ | 238480/371472 [8:23:07<11:04:29, 3.34it/s] {'loss': 2.7772, 'learning_rate': 4.223857441444547e-07, 'epoch': 10.27} + 64%|██████▍ | 238480/371472 [8:23:07<11:04:29, 3.34it/s] 64%|██████▍ | 238481/371472 [8:23:08<10:47:20, 3.42it/s] 64%|██████▍ | 238482/371472 [8:23:08<10:40:22, 3.46it/s] 64%|██████▍ | 238483/371472 [8:23:08<10:35:37, 3.49it/s] 64%|██████▍ | 238484/371472 [8:23:09<10:46:59, 3.43it/s] 64%|██████▍ | 238485/371472 [8:23:09<11:15:45, 3.28it/s] 64%|██████▍ | 238486/371472 [8:23:09<10:57:34, 3.37it/s] 64%|██████▍ | 238487/371472 [8:23:09<10:43:51, 3.44it/s] 64%|██████▍ | 238488/371472 [8:23:10<11:01:02, 3.35it/s] 64%|██████▍ | 238489/371472 [8:23:10<10:51:37, 3.40it/s] 64%|██████▍ | 238490/371472 [8:23:10<10:46:46, 3.43it/s] 64%|██████▍ | 238491/371472 [8:23:11<10:16:26, 3.60it/s] 64%|██████▍ | 238492/371472 [8:23:11<10:03:57, 3.67it/s] 64%|██████▍ | 238493/371472 [8:23:11<9:58:41, 3.70it/s] 64%|██████▍ | 238494/371472 [8:23:11<10:13:06, 3.61it/s] 64%|██████▍ | 238495/371472 [8:23:12<10:14:02, 3.61it/s] 64%|██████▍ | 238496/371472 [8:23:12<10:22:31, 3.56it/s] 64%|██████▍ | 238497/371472 [8:23:12<10:16:25, 3.60it/s] 64%|██████▍ | 238498/371472 [8:23:13<10:59:36, 3.36it/s] 64%|██████▍ | 238499/371472 [8:23:13<11:03:45, 3.34it/s] 64%|██████▍ | 238500/371472 [8:23:13<10:45:03, 3.44it/s] {'loss': 2.6101, 'learning_rate': 4.223372621689758e-07, 'epoch': 10.27} + 64%|██████▍ | 238500/371472 [8:23:13<10:45:03, 3.44it/s] 64%|██████▍ | 238501/371472 [8:23:13<10:38:02, 3.47it/s] 64%|██████▍ | 238502/371472 [8:23:14<10:22:17, 3.56it/s] 64%|██████▍ | 238503/371472 [8:23:14<10:13:34, 3.61it/s] 64%|██████▍ | 238504/371472 [8:23:14<10:21:44, 3.56it/s] 64%|██████▍ | 238505/371472 [8:23:15<10:19:39, 3.58it/s] 64%|██████▍ | 238506/371472 [8:23:15<10:08:24, 3.64it/s] 64%|██████▍ | 238507/371472 [8:23:15<10:17:46, 3.59it/s] 64%|██████▍ | 238508/371472 [8:23:15<10:17:28, 3.59it/s] 64%|██████▍ | 238509/371472 [8:23:16<10:18:13, 3.58it/s] 64%|██████▍ | 238510/371472 [8:23:16<10:22:50, 3.56it/s] 64%|██████▍ | 238511/371472 [8:23:16<10:44:41, 3.44it/s] 64%|██████▍ | 238512/371472 [8:23:17<10:56:31, 3.38it/s] 64%|██████▍ | 238513/371472 [8:23:17<10:43:37, 3.44it/s] 64%|██████▍ | 238514/371472 [8:23:17<10:52:08, 3.40it/s] 64%|██████▍ | 238515/371472 [8:23:17<10:44:49, 3.44it/s] 64%|██████▍ | 238516/371472 [8:23:18<10:51:35, 3.40it/s] 64%|██████▍ | 238517/371472 [8:23:18<10:23:17, 3.56it/s] 64%|██████▍ | 238518/371472 [8:23:18<10:15:39, 3.60it/s] 64%|██████▍ | 238519/371472 [8:23:18<10:06:00, 3.66it/s] 64%|██████▍ | 238520/371472 [8:23:19<10:25:14, 3.54it/s] {'loss': 2.6701, 'learning_rate': 4.22288780193497e-07, 'epoch': 10.27} + 64%|██████▍ | 238520/371472 [8:23:19<10:25:14, 3.54it/s] 64%|██████▍ | 238521/371472 [8:23:19<10:19:47, 3.58it/s] 64%|██████▍ | 238522/371472 [8:23:19<10:42:25, 3.45it/s] 64%|██████▍ | 238523/371472 [8:23:20<10:25:52, 3.54it/s] 64%|██████▍ | 238524/371472 [8:23:20<10:26:51, 3.53it/s] 64%|██████▍ | 238525/371472 [8:23:20<10:20:06, 3.57it/s] 64%|██████▍ | 238526/371472 [8:23:20<10:24:32, 3.55it/s] 64%|██████▍ | 238527/371472 [8:23:21<10:22:29, 3.56it/s] 64%|██████▍ | 238528/371472 [8:23:21<10:16:25, 3.59it/s] 64%|██████▍ | 238529/371472 [8:23:21<10:42:11, 3.45it/s] 64%|██████▍ | 238530/371472 [8:23:22<10:12:26, 3.62it/s] 64%|██████▍ | 238531/371472 [8:23:22<10:49:21, 3.41it/s] 64%|██████▍ | 238532/371472 [8:23:22<10:40:12, 3.46it/s] 64%|██████▍ | 238533/371472 [8:23:22<10:29:39, 3.52it/s] 64%|██████▍ | 238534/371472 [8:23:23<10:28:54, 3.52it/s] 64%|██████▍ | 238535/371472 [8:23:23<10:27:32, 3.53it/s] 64%|██████▍ | 238536/371472 [8:23:23<10:27:58, 3.53it/s] 64%|██████▍ | 238537/371472 [8:23:24<10:18:35, 3.58it/s] 64%|██████▍ | 238538/371472 [8:23:24<10:39:12, 3.47it/s] 64%|██████▍ | 238539/371472 [8:23:24<10:29:52, 3.52it/s] 64%|██████▍ | 238540/371472 [8:23:25<11:06:36, 3.32it/s] {'loss': 2.9081, 'learning_rate': 4.22240298218018e-07, 'epoch': 10.27} + 64%|██████▍ | 238540/371472 [8:23:25<11:06:36, 3.32it/s] 64%|██████▍ | 238541/371472 [8:23:25<10:48:07, 3.42it/s] 64%|██████▍ | 238542/371472 [8:23:25<10:37:16, 3.48it/s] 64%|██████▍ | 238543/371472 [8:23:25<10:30:34, 3.51it/s] 64%|██████▍ | 238544/371472 [8:23:26<10:31:40, 3.51it/s] 64%|██████▍ | 238545/371472 [8:23:26<10:28:32, 3.52it/s] 64%|██████▍ | 238546/371472 [8:23:26<10:19:41, 3.58it/s] 64%|██████▍ | 238547/371472 [8:23:26<10:10:26, 3.63it/s] 64%|██████▍ | 238548/371472 [8:23:27<10:19:01, 3.58it/s] 64%|██████▍ | 238549/371472 [8:23:27<10:17:54, 3.59it/s] 64%|██████▍ | 238550/371472 [8:23:27<10:02:26, 3.68it/s] 64%|██████▍ | 238551/371472 [8:23:28<10:22:14, 3.56it/s] 64%|██████▍ | 238552/371472 [8:23:28<10:16:45, 3.59it/s] 64%|██████▍ | 238553/371472 [8:23:28<10:40:00, 3.46it/s] 64%|██████▍ | 238554/371472 [8:23:28<10:54:58, 3.38it/s] 64%|██████▍ | 238555/371472 [8:23:29<10:36:13, 3.48it/s] 64%|██████▍ | 238556/371472 [8:23:29<10:55:48, 3.38it/s] 64%|██████▍ | 238557/371472 [8:23:29<10:46:03, 3.43it/s] 64%|██████▍ | 238558/371472 [8:23:30<11:33:48, 3.19it/s] 64%|██████▍ | 238559/371472 [8:23:30<11:13:11, 3.29it/s] 64%|██████▍ | 238560/371472 [8:23:30<11:07:03, 3.32it/s] {'loss': 2.7445, 'learning_rate': 4.2219181624253917e-07, 'epoch': 10.28} + 64%|██████▍ | 238560/371472 [8:23:30<11:07:03, 3.32it/s] 64%|██████▍ | 238561/371472 [8:23:31<11:15:45, 3.28it/s] 64%|██████▍ | 238562/371472 [8:23:31<11:15:25, 3.28it/s] 64%|██████▍ | 238563/371472 [8:23:31<10:57:19, 3.37it/s] 64%|██████▍ | 238564/371472 [8:23:31<10:39:29, 3.46it/s] 64%|██████▍ | 238565/371472 [8:23:32<10:40:48, 3.46it/s] 64%|██████▍ | 238566/371472 [8:23:32<10:29:57, 3.52it/s] 64%|██████▍ | 238567/371472 [8:23:32<11:10:42, 3.30it/s] 64%|██████▍ | 238568/371472 [8:23:33<11:10:48, 3.30it/s] 64%|██████▍ | 238569/371472 [8:23:33<10:58:48, 3.36it/s] 64%|██████▍ | 238570/371472 [8:23:33<10:49:47, 3.41it/s] 64%|██████▍ | 238571/371472 [8:23:34<11:02:58, 3.34it/s] 64%|██████▍ | 238572/371472 [8:23:34<10:48:14, 3.42it/s] 64%|██████▍ | 238573/371472 [8:23:34<10:37:47, 3.47it/s] 64%|██████▍ | 238574/371472 [8:23:34<10:16:01, 3.60it/s] 64%|██████▍ | 238575/371472 [8:23:35<10:31:54, 3.51it/s] 64%|██████▍ | 238576/371472 [8:23:35<11:04:45, 3.33it/s] 64%|██████▍ | 238577/371472 [8:23:35<11:08:16, 3.31it/s] 64%|██████▍ | 238578/371472 [8:23:36<10:59:12, 3.36it/s] 64%|██████▍ | 238579/371472 [8:23:36<10:48:50, 3.41it/s] 64%|██████▍ | 238580/371472 [8:23:36<11:08:42, 3.31it/s] {'loss': 2.774, 'learning_rate': 4.2214333426706024e-07, 'epoch': 10.28} + 64%|██████▍ | 238580/371472 [8:23:36<11:08:42, 3.31it/s] 64%|██████▍ | 238581/371472 [8:23:36<10:51:26, 3.40it/s] 64%|██████▍ | 238582/371472 [8:23:37<10:35:08, 3.49it/s] 64%|██████▍ | 238583/371472 [8:23:37<10:35:11, 3.49it/s] 64%|██████▍ | 238584/371472 [8:23:37<11:01:51, 3.35it/s] 64%|██████▍ | 238585/371472 [8:23:38<11:02:29, 3.34it/s] 64%|██████▍ | 238586/371472 [8:23:38<10:49:11, 3.41it/s] 64%|██████▍ | 238587/371472 [8:23:38<10:38:08, 3.47it/s] 64%|██████▍ | 238588/371472 [8:23:38<10:52:23, 3.39it/s] 64%|██████▍ | 238589/371472 [8:23:39<10:29:42, 3.52it/s] 64%|██████▍ | 238590/371472 [8:23:39<10:22:00, 3.56it/s] 64%|██████▍ | 238591/371472 [8:23:39<10:52:36, 3.39it/s] 64%|██████▍ | 238592/371472 [8:23:40<10:36:41, 3.48it/s] 64%|██████▍ | 238593/371472 [8:23:40<11:16:24, 3.27it/s] 64%|██████▍ | 238594/371472 [8:23:40<11:31:22, 3.20it/s] 64%|██████▍ | 238595/371472 [8:23:41<11:29:29, 3.21it/s] 64%|██████▍ | 238596/371472 [8:23:41<11:04:12, 3.33it/s] 64%|██████▍ | 238597/371472 [8:23:41<11:10:02, 3.31it/s] 64%|██████▍ | 238598/371472 [8:23:42<11:24:06, 3.24it/s] 64%|██████▍ | 238599/371472 [8:23:42<11:31:03, 3.20it/s] 64%|██████▍ | 238600/371472 [8:23:42<11:46:58, 3.13it/s] {'loss': 2.6653, 'learning_rate': 4.2209485229158137e-07, 'epoch': 10.28} + 64%|██████▍ | 238600/371472 [8:23:42<11:46:58, 3.13it/s] 64%|██████▍ | 238601/371472 [8:23:42<11:35:42, 3.18it/s] 64%|██████▍ | 238602/371472 [8:23:43<11:08:51, 3.31it/s] 64%|██████▍ | 238603/371472 [8:23:43<10:41:43, 3.45it/s] 64%|██████▍ | 238604/371472 [8:23:43<11:02:16, 3.34it/s] 64%|██████▍ | 238605/371472 [8:23:44<10:39:39, 3.46it/s] 64%|██████▍ | 238606/371472 [8:23:44<10:34:03, 3.49it/s] 64%|██████▍ | 238607/371472 [8:23:44<10:55:50, 3.38it/s] 64%|██████▍ | 238608/371472 [8:23:45<11:02:19, 3.34it/s] 64%|██████▍ | 238609/371472 [8:23:45<10:44:25, 3.44it/s] 64%|██████▍ | 238610/371472 [8:23:45<10:50:34, 3.40it/s] 64%|██████▍ | 238611/371472 [8:23:45<10:24:14, 3.55it/s] 64%|██████▍ | 238612/371472 [8:23:46<11:43:24, 3.15it/s] 64%|██████▍ | 238613/371472 [8:23:46<11:11:22, 3.30it/s] 64%|██████▍ | 238614/371472 [8:23:46<10:52:45, 3.39it/s] 64%|██████▍ | 238615/371472 [8:23:47<10:47:17, 3.42it/s] 64%|██████▍ | 238616/371472 [8:23:47<11:16:04, 3.28it/s] 64%|██████▍ | 238617/371472 [8:23:47<11:11:06, 3.30it/s] 64%|██████▍ | 238618/371472 [8:23:47<11:08:59, 3.31it/s] 64%|██████▍ | 238619/371472 [8:23:48<10:58:50, 3.36it/s] 64%|██████▍ | 238620/371472 [8:23:48<10:48:40, 3.41it/s] {'loss': 2.5808, 'learning_rate': 4.2204637031610244e-07, 'epoch': 10.28} + 64%|██████▍ | 238620/371472 [8:23:48<10:48:40, 3.41it/s] 64%|██████▍ | 238621/371472 [8:23:48<10:28:06, 3.53it/s] 64%|██████▍ | 238622/371472 [8:23:49<10:34:12, 3.49it/s] 64%|██████▍ | 238623/371472 [8:23:49<10:59:18, 3.36it/s] 64%|██████▍ | 238624/371472 [8:23:49<10:43:13, 3.44it/s] 64%|██████▍ | 238625/371472 [8:23:50<10:48:06, 3.42it/s] 64%|██████▍ | 238626/371472 [8:23:50<11:51:26, 3.11it/s] 64%|██████▍ | 238627/371472 [8:23:50<11:34:51, 3.19it/s] 64%|██████▍ | 238628/371472 [8:23:51<12:01:13, 3.07it/s] 64%|██████▍ | 238629/371472 [8:23:51<11:33:53, 3.19it/s] 64%|██████▍ | 238630/371472 [8:23:51<11:41:02, 3.16it/s] 64%|██████▍ | 238631/371472 [8:23:51<11:12:39, 3.29it/s] 64%|██████▍ | 238632/371472 [8:23:52<11:03:43, 3.34it/s] 64%|██████▍ | 238633/371472 [8:23:52<10:52:35, 3.39it/s] 64%|██████▍ | 238634/371472 [8:23:52<10:57:41, 3.37it/s] 64%|██████▍ | 238635/371472 [8:23:53<11:20:12, 3.25it/s] 64%|██████▍ | 238636/371472 [8:23:53<11:01:57, 3.34it/s] 64%|██████▍ | 238637/371472 [8:23:53<11:14:08, 3.28it/s] 64%|██████▍ | 238638/371472 [8:23:54<11:23:00, 3.24it/s] 64%|██████▍ | 238639/371472 [8:23:54<11:40:56, 3.16it/s] 64%|██████▍ | 238640/371472 [8:23:54<11:00:50, 3.35it/s] {'loss': 2.7587, 'learning_rate': 4.219978883406236e-07, 'epoch': 10.28} + 64%|██████▍ | 238640/371472 [8:23:54<11:00:50, 3.35it/s] 64%|██████▍ | 238641/371472 [8:23:54<11:00:12, 3.35it/s] 64%|██████▍ | 238642/371472 [8:23:55<11:05:30, 3.33it/s] 64%|██████▍ | 238643/371472 [8:23:55<10:41:09, 3.45it/s] 64%|██████▍ | 238644/371472 [8:23:55<10:34:55, 3.49it/s] 64%|██████▍ | 238645/371472 [8:23:56<11:41:32, 3.16it/s] 64%|██████▍ | 238646/371472 [8:23:56<11:11:21, 3.30it/s] 64%|██████▍ | 238647/371472 [8:23:56<10:54:10, 3.38it/s] 64%|██████▍ | 238648/371472 [8:23:57<11:07:16, 3.32it/s] 64%|██████▍ | 238649/371472 [8:23:57<10:50:17, 3.40it/s] 64%|██████▍ | 238650/371472 [8:23:57<10:30:24, 3.51it/s] 64%|██████▍ | 238651/371472 [8:23:57<10:49:18, 3.41it/s] 64%|██████▍ | 238652/371472 [8:23:58<10:24:04, 3.55it/s] 64%|██████▍ | 238653/371472 [8:23:58<10:22:22, 3.56it/s] 64%|██████▍ | 238654/371472 [8:23:58<11:06:24, 3.32it/s] 64%|██████▍ | 238655/371472 [8:23:59<10:41:49, 3.45it/s] 64%|██████▍ | 238656/371472 [8:23:59<10:34:46, 3.49it/s] 64%|██████▍ | 238657/371472 [8:23:59<10:24:04, 3.55it/s] 64%|██████▍ | 238658/371472 [8:23:59<11:29:24, 3.21it/s] 64%|██████▍ | 238659/371472 [8:24:00<11:45:21, 3.14it/s] 64%|██████▍ | 238660/371472 [8:24:00<11:31:21, 3.20it/s] {'loss': 2.6612, 'learning_rate': 4.2194940636514463e-07, 'epoch': 10.28} + 64%|██████▍ | 238660/371472 [8:24:00<11:31:21, 3.20it/s] 64%|██████▍ | 238661/371472 [8:24:00<11:11:54, 3.29it/s] 64%|██████▍ | 238662/371472 [8:24:01<11:05:12, 3.33it/s] 64%|██████▍ | 238663/371472 [8:24:01<11:01:50, 3.34it/s] 64%|██████▍ | 238664/371472 [8:24:01<10:49:00, 3.41it/s] 64%|██████▍ | 238665/371472 [8:24:02<10:45:16, 3.43it/s] 64%|██████▍ | 238666/371472 [8:24:02<11:26:38, 3.22it/s] 64%|██████▍ | 238667/371472 [8:24:02<12:13:48, 3.02it/s] 64%|██████▍ | 238668/371472 [8:24:03<12:28:32, 2.96it/s] 64%|██████▍ | 238669/371472 [8:24:03<12:33:25, 2.94it/s] 64%|██████▍ | 238670/371472 [8:24:03<12:04:27, 3.06it/s] 64%|██████▍ | 238671/371472 [8:24:04<12:51:04, 2.87it/s] 64%|██████▍ | 238672/371472 [8:24:04<12:06:31, 3.05it/s] 64%|██████▍ | 238673/371472 [8:24:04<11:55:03, 3.10it/s] 64%|██████▍ | 238674/371472 [8:24:05<11:50:42, 3.11it/s] 64%|██████▍ | 238675/371472 [8:24:05<11:48:15, 3.12it/s] 64%|██████▍ | 238676/371472 [8:24:05<12:36:09, 2.93it/s] 64%|██████▍ | 238677/371472 [8:24:06<12:15:25, 3.01it/s] 64%|██████▍ | 238678/371472 [8:24:06<12:07:01, 3.04it/s] 64%|██████▍ | 238679/371472 [8:24:06<11:24:32, 3.23it/s] 64%|██████▍ | 238680/371472 [8:24:06<11:17:12, 3.27it/s] {'loss': 2.7094, 'learning_rate': 4.219009243896658e-07, 'epoch': 10.28} + 64%|██████▍ | 238680/371472 [8:24:06<11:17:12, 3.27it/s] 64%|██████▍ | 238681/371472 [8:24:07<11:58:45, 3.08it/s] 64%|██████▍ | 238682/371472 [8:24:07<12:29:44, 2.95it/s] 64%|██████▍ | 238683/371472 [8:24:08<12:43:01, 2.90it/s] 64%|██████▍ | 238684/371472 [8:24:08<12:01:40, 3.07it/s] 64%|██████▍ | 238685/371472 [8:24:08<11:39:02, 3.17it/s] 64%|██████▍ | 238686/371472 [8:24:08<11:03:47, 3.33it/s] 64%|██████▍ | 238687/371472 [8:24:09<11:04:14, 3.33it/s] 64%|██████▍ | 238688/371472 [8:24:09<11:07:43, 3.31it/s] 64%|██████▍ | 238689/371472 [8:24:09<11:06:54, 3.32it/s] 64%|██████▍ | 238690/371472 [8:24:10<10:45:16, 3.43it/s] 64%|██████▍ | 238691/371472 [8:24:10<10:38:44, 3.46it/s] 64%|██████▍ | 238692/371472 [8:24:10<10:37:06, 3.47it/s] 64%|██████▍ | 238693/371472 [8:24:10<10:57:30, 3.37it/s] 64%|██████▍ | 238694/371472 [8:24:11<10:51:55, 3.39it/s] 64%|██████▍ | 238695/371472 [8:24:11<11:34:35, 3.19it/s] 64%|██████▍ | 238696/371472 [8:24:11<11:08:20, 3.31it/s] 64%|██████▍ | 238697/371472 [8:24:12<10:58:29, 3.36it/s] 64%|██████▍ | 238698/371472 [8:24:12<11:44:35, 3.14it/s] 64%|██████▍ | 238699/371472 [8:24:12<11:32:53, 3.19it/s] 64%|���█████▍ | 238700/371472 [8:24:13<11:01:02, 3.35it/s] {'loss': 2.835, 'learning_rate': 4.218524424141869e-07, 'epoch': 10.28} + 64%|██████▍ | 238700/371472 [8:24:13<11:01:02, 3.35it/s] 64%|██████▍ | 238701/371472 [8:24:13<10:42:29, 3.44it/s] 64%|██████▍ | 238702/371472 [8:24:13<10:45:49, 3.43it/s] 64%|██████▍ | 238703/371472 [8:24:13<10:51:21, 3.40it/s] 64%|██████▍ | 238704/371472 [8:24:14<10:49:19, 3.41it/s] 64%|██████▍ | 238705/371472 [8:24:14<10:48:44, 3.41it/s] 64%|██████▍ | 238706/371472 [8:24:14<10:55:00, 3.38it/s] 64%|██████▍ | 238707/371472 [8:24:15<10:43:00, 3.44it/s] 64%|██████▍ | 238708/371472 [8:24:15<11:26:24, 3.22it/s] 64%|██████▍ | 238709/371472 [8:24:15<11:33:04, 3.19it/s] 64%|██████▍ | 238710/371472 [8:24:16<11:16:48, 3.27it/s] 64%|██████▍ | 238711/371472 [8:24:16<11:10:26, 3.30it/s] 64%|██████▍ | 238712/371472 [8:24:16<11:02:44, 3.34it/s] 64%|██████▍ | 238713/371472 [8:24:16<10:48:00, 3.41it/s] 64%|██████▍ | 238714/371472 [8:24:17<11:02:21, 3.34it/s] 64%|██████▍ | 238715/371472 [8:24:17<11:19:46, 3.25it/s] 64%|██████▍ | 238716/371472 [8:24:17<11:11:44, 3.29it/s] 64%|██████▍ | 238717/371472 [8:24:18<11:06:14, 3.32it/s] 64%|██████▍ | 238718/371472 [8:24:18<11:25:26, 3.23it/s] 64%|██████▍ | 238719/371472 [8:24:18<11:06:59, 3.32it/s] 64%|██████▍ | 238720/371472 [8:24:19<11:17:30, 3.27it/s] {'loss': 2.8281, 'learning_rate': 4.21803960438708e-07, 'epoch': 10.28} + 64%|██████▍ | 238720/371472 [8:24:19<11:17:30, 3.27it/s] 64%|██████▍ | 238721/371472 [8:24:19<11:43:35, 3.14it/s] 64%|██████▍ | 238722/371472 [8:24:19<11:07:15, 3.32it/s] 64%|██████▍ | 238723/371472 [8:24:20<11:27:58, 3.22it/s] 64%|██████▍ | 238724/371472 [8:24:20<11:26:05, 3.22it/s] 64%|██████▍ | 238725/371472 [8:24:20<11:03:51, 3.33it/s] 64%|██████▍ | 238726/371472 [8:24:21<11:21:36, 3.25it/s] 64%|██████▍ | 238727/371472 [8:24:21<11:06:21, 3.32it/s] 64%|██████▍ | 238728/371472 [8:24:21<10:56:49, 3.37it/s] 64%|██████▍ | 238729/371472 [8:24:21<10:45:09, 3.43it/s] 64%|██████▍ | 238730/371472 [8:24:22<11:13:36, 3.28it/s] 64%|██████▍ | 238731/371472 [8:24:22<11:02:19, 3.34it/s] 64%|██████▍ | 238732/371472 [8:24:22<10:49:21, 3.41it/s] 64%|██████▍ | 238733/371472 [8:24:23<10:57:34, 3.36it/s] 64%|██████▍ | 238734/371472 [8:24:23<11:49:53, 3.12it/s] 64%|██████▍ | 238735/371472 [8:24:23<11:44:32, 3.14it/s] 64%|██████▍ | 238736/371472 [8:24:24<12:00:13, 3.07it/s] 64%|██████▍ | 238737/371472 [8:24:24<11:26:13, 3.22it/s] 64%|██████▍ | 238738/371472 [8:24:24<11:00:33, 3.35it/s] 64%|██████▍ | 238739/371472 [8:24:24<10:39:26, 3.46it/s] 64%|██████▍ | 238740/371472 [8:24:25<10:28:57, 3.52it/s] {'loss': 2.7648, 'learning_rate': 4.217554784632291e-07, 'epoch': 10.28} + 64%|██████▍ | 238740/371472 [8:24:25<10:28:57, 3.52it/s] 64%|██████▍ | 238741/371472 [8:24:25<10:17:05, 3.58it/s] 64%|██████▍ | 238742/371472 [8:24:25<10:12:40, 3.61it/s] 64%|██████▍ | 238743/371472 [8:24:26<10:27:23, 3.53it/s] 64%|██████▍ | 238744/371472 [8:24:26<10:22:13, 3.56it/s] 64%|██████▍ | 238745/371472 [8:24:26<10:50:13, 3.40it/s] 64%|██████▍ | 238746/371472 [8:24:26<11:15:23, 3.28it/s] 64%|██████▍ | 238747/371472 [8:24:27<10:51:27, 3.40it/s] 64%|██████▍ | 238748/371472 [8:24:27<11:09:40, 3.30it/s] 64%|██████▍ | 238749/371472 [8:24:27<11:01:17, 3.35it/s] 64%|██████▍ | 238750/371472 [8:24:28<11:13:05, 3.29it/s] 64%|██████▍ | 238751/371472 [8:24:28<10:47:41, 3.42it/s] 64%|██████▍ | 238752/371472 [8:24:28<11:05:37, 3.32it/s] 64%|██████▍ | 238753/371472 [8:24:29<10:45:17, 3.43it/s] 64%|██████▍ | 238754/371472 [8:24:29<10:34:56, 3.48it/s] 64%|██████▍ | 238755/371472 [8:24:29<11:13:46, 3.28it/s] 64%|██████▍ | 238756/371472 [8:24:29<11:06:20, 3.32it/s] 64%|██████▍ | 238757/371472 [8:24:30<10:59:32, 3.35it/s] 64%|██████▍ | 238758/371472 [8:24:30<11:04:42, 3.33it/s] 64%|██████▍ | 238759/371472 [8:24:30<10:57:31, 3.36it/s] 64%|██████▍ | 238760/371472 [8:24:31<10:45:28, 3.43it/s] {'loss': 2.7684, 'learning_rate': 4.2170699648775026e-07, 'epoch': 10.28} + 64%|██████▍ | 238760/371472 [8:24:31<10:45:28, 3.43it/s] 64%|██████▍ | 238761/371472 [8:24:31<11:36:01, 3.18it/s] 64%|██████▍ | 238762/371472 [8:24:31<11:20:17, 3.25it/s] 64%|██████▍ | 238763/371472 [8:24:32<11:27:30, 3.22it/s] 64%|██████▍ | 238764/371472 [8:24:32<11:21:35, 3.25it/s] 64%|██████▍ | 238765/371472 [8:24:32<11:06:08, 3.32it/s] 64%|██████▍ | 238766/371472 [8:24:32<10:56:55, 3.37it/s] 64%|██████▍ | 238767/371472 [8:24:33<11:01:26, 3.34it/s] 64%|██████▍ | 238768/371472 [8:24:33<10:52:25, 3.39it/s] 64%|██████▍ | 238769/371472 [8:24:33<11:26:42, 3.22it/s] 64%|██████▍ | 238770/371472 [8:24:34<11:15:50, 3.27it/s] 64%|██████▍ | 238771/371472 [8:24:34<10:57:36, 3.36it/s] 64%|██████▍ | 238772/371472 [8:24:34<11:14:39, 3.28it/s] 64%|██████▍ | 238773/371472 [8:24:35<11:24:09, 3.23it/s] 64%|██████▍ | 238774/371472 [8:24:35<11:37:43, 3.17it/s] 64%|██████▍ | 238775/371472 [8:24:35<11:28:48, 3.21it/s] 64%|██████▍ | 238776/371472 [8:24:36<11:51:14, 3.11it/s] 64%|██████▍ | 238777/371472 [8:24:36<11:14:25, 3.28it/s] 64%|██████▍ | 238778/371472 [8:24:36<10:50:58, 3.40it/s] 64%|██████▍ | 238779/371472 [8:24:36<10:56:54, 3.37it/s] 64%|██████▍ | 238780/371472 [8:24:37<10:34:54, 3.48it/s] {'loss': 2.9308, 'learning_rate': 4.2165851451227133e-07, 'epoch': 10.28} + 64%|██████▍ | 238780/371472 [8:24:37<10:34:54, 3.48it/s] 64%|██████▍ | 238781/371472 [8:24:37<10:14:42, 3.60it/s] 64%|██████▍ | 238782/371472 [8:24:37<10:15:36, 3.59it/s] 64%|██████▍ | 238783/371472 [8:24:37<10:25:55, 3.53it/s] 64%|██████▍ | 238784/371472 [8:24:38<10:24:27, 3.54it/s] 64%|██████▍ | 238785/371472 [8:24:38<10:27:09, 3.53it/s] 64%|██████▍ | 238786/371472 [8:24:38<10:49:23, 3.41it/s] 64%|██████▍ | 238787/371472 [8:24:39<10:48:26, 3.41it/s] 64%|██████▍ | 238788/371472 [8:24:39<10:45:50, 3.42it/s] 64%|██████▍ | 238789/371472 [8:24:39<11:09:36, 3.30it/s] 64%|██████▍ | 238790/371472 [8:24:40<11:11:40, 3.29it/s] 64%|██████▍ | 238791/371472 [8:24:40<10:42:31, 3.44it/s] 64%|██████▍ | 238792/371472 [8:24:40<10:37:31, 3.47it/s] 64%|██████▍ | 238793/371472 [8:24:40<10:32:37, 3.50it/s] 64%|██████▍ | 238794/371472 [8:24:41<10:29:39, 3.51it/s] 64%|██████▍ | 238795/371472 [8:24:41<11:13:36, 3.28it/s] 64%|██████▍ | 238796/371472 [8:24:41<10:57:29, 3.36it/s] 64%|██████▍ | 238797/371472 [8:24:42<10:52:47, 3.39it/s] 64%|██████▍ | 238798/371472 [8:24:42<10:50:18, 3.40it/s] 64%|██████▍ | 238799/371472 [8:24:42<10:56:22, 3.37it/s] 64%|██████▍ | 238800/371472 [8:24:43<10:50:31, 3.40it/s] {'loss': 2.6397, 'learning_rate': 4.2161003253679245e-07, 'epoch': 10.29} + 64%|██████▍ | 238800/371472 [8:24:43<10:50:31, 3.40it/s] 64%|██████▍ | 238801/371472 [8:24:43<10:55:45, 3.37it/s] 64%|██████▍ | 238802/371472 [8:24:43<10:36:38, 3.47it/s] 64%|██████▍ | 238803/371472 [8:24:43<10:48:48, 3.41it/s] 64%|██████▍ | 238804/371472 [8:24:44<10:46:17, 3.42it/s] 64%|██████▍ | 238805/371472 [8:24:44<10:37:50, 3.47it/s] 64%|██████▍ | 238806/371472 [8:24:44<10:42:28, 3.44it/s] 64%|██████▍ | 238807/371472 [8:24:45<10:40:28, 3.45it/s] 64%|██████▍ | 238808/371472 [8:24:45<10:40:42, 3.45it/s] 64%|██████▍ | 238809/371472 [8:24:45<10:40:08, 3.45it/s] 64%|██████▍ | 238810/371472 [8:24:45<10:39:49, 3.46it/s] 64%|██████▍ | 238811/371472 [8:24:46<10:34:51, 3.48it/s] 64%|██████▍ | 238812/371472 [8:24:46<10:31:46, 3.50it/s] 64%|██████▍ | 238813/371472 [8:24:46<10:40:30, 3.45it/s] 64%|██████▍ | 238814/371472 [8:24:47<10:57:58, 3.36it/s] 64%|██████▍ | 238815/371472 [8:24:47<11:29:30, 3.21it/s] 64%|██████▍ | 238816/371472 [8:24:47<11:15:27, 3.27it/s] 64%|██████▍ | 238817/371472 [8:24:48<11:36:44, 3.17it/s] 64%|██████▍ | 238818/371472 [8:24:48<11:12:56, 3.29it/s] 64%|██████▍ | 238819/371472 [8:24:48<11:03:55, 3.33it/s] 64%|██████▍ | 238820/371472 [8:24:48<10:50:24, 3.40it/s] {'loss': 2.6449, 'learning_rate': 4.215615505613135e-07, 'epoch': 10.29} + 64%|██████▍ | 238820/371472 [8:24:48<10:50:24, 3.40it/s] 64%|██████▍ | 238821/371472 [8:24:49<10:42:33, 3.44it/s] 64%|██████▍ | 238822/371472 [8:24:49<10:37:38, 3.47it/s] 64%|██████▍ | 238823/371472 [8:24:49<11:18:14, 3.26it/s] 64%|██████▍ | 238824/371472 [8:24:50<11:24:46, 3.23it/s] 64%|██████▍ | 238825/371472 [8:24:50<11:17:30, 3.26it/s] 64%|██████▍ | 238826/371472 [8:24:50<11:18:45, 3.26it/s] 64%|██████▍ | 238827/371472 [8:24:51<11:20:48, 3.25it/s] 64%|██████▍ | 238828/371472 [8:24:51<10:58:58, 3.35it/s] 64%|██████▍ | 238829/371472 [8:24:51<11:15:31, 3.27it/s] 64%|██████▍ | 238830/371472 [8:24:51<11:00:05, 3.35it/s] 64%|██████▍ | 238831/371472 [8:24:52<10:34:56, 3.48it/s] 64%|██████▍ | 238832/371472 [8:24:52<10:24:06, 3.54it/s] 64%|██████▍ | 238833/371472 [8:24:52<10:18:41, 3.57it/s] 64%|██████▍ | 238834/371472 [8:24:53<10:28:25, 3.52it/s] 64%|██████▍ | 238835/371472 [8:24:53<10:28:37, 3.52it/s] 64%|██████▍ | 238836/371472 [8:24:53<10:31:34, 3.50it/s] 64%|██████▍ | 238837/371472 [8:24:54<11:43:05, 3.14it/s] 64%|██████▍ | 238838/371472 [8:24:54<12:09:05, 3.03it/s] 64%|██████▍ | 238839/371472 [8:24:54<11:42:43, 3.15it/s] 64%|██████▍ | 238840/371472 [8:24:54<11:45:45, 3.13it/s] {'loss': 2.5978, 'learning_rate': 4.215130685858347e-07, 'epoch': 10.29} + 64%|██████▍ | 238840/371472 [8:24:54<11:45:45, 3.13it/s] 64%|██████▍ | 238841/371472 [8:24:55<11:33:08, 3.19it/s] 64%|██████▍ | 238842/371472 [8:24:55<11:45:56, 3.13it/s] 64%|██████▍ | 238843/371472 [8:24:55<11:38:15, 3.17it/s] 64%|██████▍ | 238844/371472 [8:24:56<11:15:00, 3.27it/s] 64%|██████▍ | 238845/371472 [8:24:56<10:52:43, 3.39it/s] 64%|██████▍ | 238846/371472 [8:24:56<10:46:59, 3.42it/s] 64%|██████▍ | 238847/371472 [8:24:57<10:52:38, 3.39it/s] 64%|██████▍ | 238848/371472 [8:24:57<10:30:09, 3.51it/s] 64%|██████▍ | 238849/371472 [8:24:57<11:01:09, 3.34it/s] 64%|██████▍ | 238850/371472 [8:24:57<11:15:48, 3.27it/s] 64%|██████▍ | 238851/371472 [8:24:58<11:02:53, 3.33it/s] 64%|██████▍ | 238852/371472 [8:24:58<11:00:00, 3.35it/s] 64%|██████▍ | 238853/371472 [8:24:58<11:02:14, 3.34it/s] 64%|██████▍ | 238854/371472 [8:24:59<10:44:12, 3.43it/s] 64%|██████▍ | 238855/371472 [8:24:59<10:40:46, 3.45it/s] 64%|██████▍ | 238856/371472 [8:24:59<11:10:07, 3.30it/s] 64%|██████▍ | 238857/371472 [8:25:00<11:14:09, 3.28it/s] 64%|██████▍ | 238858/371472 [8:25:00<11:06:17, 3.32it/s] 64%|██████▍ | 238859/371472 [8:25:00<11:16:15, 3.27it/s] 64%|██████▍ | 238860/371472 [8:25:00<10:48:17, 3.41it/s] {'loss': 2.7557, 'learning_rate': 4.214645866103557e-07, 'epoch': 10.29} + 64%|██████▍ | 238860/371472 [8:25:00<10:48:17, 3.41it/s] 64%|██████▍ | 238861/371472 [8:25:01<10:44:32, 3.43it/s] 64%|██████▍ | 238862/371472 [8:25:01<10:43:37, 3.43it/s] 64%|██████▍ | 238863/371472 [8:25:01<11:31:26, 3.20it/s] 64%|██████▍ | 238864/371472 [8:25:02<11:07:17, 3.31it/s] 64%|██████▍ | 238865/371472 [8:25:02<11:09:45, 3.30it/s] 64%|██████▍ | 238866/371472 [8:25:02<11:14:09, 3.28it/s] 64%|██████▍ | 238867/371472 [8:25:03<11:03:31, 3.33it/s] 64%|██████▍ | 238868/371472 [8:25:03<11:21:11, 3.24it/s] 64%|██████▍ | 238869/371472 [8:25:03<12:00:51, 3.07it/s] 64%|██████▍ | 238870/371472 [8:25:04<12:08:48, 3.03it/s] 64%|██████▍ | 238871/371472 [8:25:04<11:44:51, 3.14it/s] 64%|██████▍ | 238872/371472 [8:25:04<11:30:48, 3.20it/s] 64%|██████▍ | 238873/371472 [8:25:04<11:08:18, 3.31it/s] 64%|██████▍ | 238874/371472 [8:25:05<11:14:28, 3.28it/s] 64%|██████▍ | 238875/371472 [8:25:05<11:11:59, 3.29it/s] 64%|██████▍ | 238876/371472 [8:25:05<11:23:17, 3.23it/s] 64%|██████▍ | 238877/371472 [8:25:06<11:56:03, 3.09it/s] 64%|██████▍ | 238878/371472 [8:25:06<11:37:34, 3.17it/s] 64%|██████▍ | 238879/371472 [8:25:06<11:52:47, 3.10it/s] 64%|██████▍ | 238880/371472 [8:25:07<11:57:08, 3.08it/s] {'loss': 2.5845, 'learning_rate': 4.214161046348768e-07, 'epoch': 10.29} + 64%|██████▍ | 238880/371472 [8:25:07<11:57:08, 3.08it/s] 64%|██████▍ | 238881/371472 [8:25:07<11:38:51, 3.16it/s] 64%|██████▍ | 238882/371472 [8:25:07<11:48:57, 3.12it/s] 64%|██████▍ | 238883/371472 [8:25:08<11:42:03, 3.15it/s] 64%|██████▍ | 238884/371472 [8:25:08<11:22:27, 3.24it/s] 64%|██████▍ | 238885/371472 [8:25:08<11:06:10, 3.32it/s] 64%|██████▍ | 238886/371472 [8:25:09<11:18:16, 3.26it/s] 64%|██████▍ | 238887/371472 [8:25:09<11:07:52, 3.31it/s] 64%|██████▍ | 238888/371472 [8:25:09<11:08:32, 3.31it/s] 64%|██████▍ | 238889/371472 [8:25:09<11:02:54, 3.33it/s] 64%|██████▍ | 238890/371472 [8:25:10<10:58:03, 3.36it/s] 64%|██████▍ | 238891/371472 [8:25:10<11:07:42, 3.31it/s] 64%|██████▍ | 238892/371472 [8:25:10<10:53:50, 3.38it/s] 64%|██████▍ | 238893/371472 [8:25:11<10:45:43, 3.42it/s] 64%|██████▍ | 238894/371472 [8:25:11<10:42:55, 3.44it/s] 64%|██████▍ | 238895/371472 [8:25:11<10:57:16, 3.36it/s] 64%|██████▍ | 238896/371472 [8:25:12<11:25:44, 3.22it/s] 64%|██████▍ | 238897/371472 [8:25:12<11:04:18, 3.33it/s] 64%|██████▍ | 238898/371472 [8:25:12<11:16:55, 3.26it/s] 64%|██████▍ | 238899/371472 [8:25:12<11:13:08, 3.28it/s] 64%|██████▍ | 238900/371472 [8:25:13<11:12:08, 3.29it/s] {'loss': 2.7624, 'learning_rate': 4.2136762265939797e-07, 'epoch': 10.29} + 64%|██████▍ | 238900/371472 [8:25:13<11:12:08, 3.29it/s] 64%|██████▍ | 238901/371472 [8:25:13<11:26:14, 3.22it/s] 64%|██████▍ | 238902/371472 [8:25:13<11:25:36, 3.22it/s] 64%|██████▍ | 238903/371472 [8:25:14<11:18:07, 3.26it/s] 64%|██████▍ | 238904/371472 [8:25:14<10:59:01, 3.35it/s] 64%|██████▍ | 238905/371472 [8:25:14<10:49:45, 3.40it/s] 64%|██████▍ | 238906/371472 [8:25:15<10:47:23, 3.41it/s] 64%|██████▍ | 238907/371472 [8:25:15<11:18:58, 3.25it/s] 64%|██████▍ | 238908/371472 [8:25:15<11:18:27, 3.26it/s] 64%|██████▍ | 238909/371472 [8:25:15<11:25:21, 3.22it/s] 64%|██████▍ | 238910/371472 [8:25:16<11:25:23, 3.22it/s] 64%|██████▍ | 238911/371472 [8:25:16<11:24:27, 3.23it/s] 64%|██████▍ | 238912/371472 [8:25:16<11:22:44, 3.24it/s] 64%|██████▍ | 238913/371472 [8:25:17<11:07:09, 3.31it/s] 64%|██████▍ | 238914/371472 [8:25:17<11:14:31, 3.28it/s] 64%|██████▍ | 238915/371472 [8:25:17<11:00:15, 3.35it/s] 64%|██████▍ | 238916/371472 [8:25:18<11:40:40, 3.15it/s] 64%|██████▍ | 238917/371472 [8:25:18<11:04:43, 3.32it/s] 64%|██████▍ | 238918/371472 [8:25:18<10:57:50, 3.36it/s] 64%|██████▍ | 238919/371472 [8:25:18<10:46:26, 3.42it/s] 64%|██████▍ | 238920/371472 [8:25:19<10:58:14, 3.36it/s] {'loss': 2.7894, 'learning_rate': 4.21319140683919e-07, 'epoch': 10.29} + 64%|██████▍ | 238920/371472 [8:25:19<10:58:14, 3.36it/s] 64%|██████▍ | 238921/371472 [8:25:19<10:48:08, 3.41it/s] 64%|██████▍ | 238922/371472 [8:25:19<10:41:27, 3.44it/s] 64%|██████▍ | 238923/371472 [8:25:20<10:48:32, 3.41it/s] 64%|██████▍ | 238924/371472 [8:25:20<10:31:50, 3.50it/s] 64%|██████▍ | 238925/371472 [8:25:20<10:21:17, 3.56it/s] 64%|██████▍ | 238926/371472 [8:25:20<10:21:56, 3.55it/s] 64%|██████▍ | 238927/371472 [8:25:21<11:06:05, 3.32it/s] 64%|██████▍ | 238928/371472 [8:25:21<11:39:56, 3.16it/s] 64%|██████▍ | 238929/371472 [8:25:21<11:27:24, 3.21it/s] 64%|██████▍ | 238930/371472 [8:25:22<11:02:19, 3.34it/s] 64%|██████▍ | 238931/371472 [8:25:22<11:16:20, 3.27it/s] 64%|██████▍ | 238932/371472 [8:25:22<11:03:50, 3.33it/s] 64%|██████▍ | 238933/371472 [8:25:23<10:49:31, 3.40it/s] 64%|██████▍ | 238934/371472 [8:25:23<10:41:52, 3.44it/s] 64%|██████▍ | 238935/371472 [8:25:23<10:32:41, 3.49it/s] 64%|██████▍ | 238936/371472 [8:25:24<10:35:54, 3.47it/s] 64%|██████▍ | 238937/371472 [8:25:24<10:26:03, 3.53it/s] 64%|██████▍ | 238938/371472 [8:25:24<10:15:18, 3.59it/s] 64%|██████▍ | 238939/371472 [8:25:24<10:27:52, 3.52it/s] 64%|██████▍ | 238940/371472 [8:25:25<10:42:18, 3.44it/s] {'loss': 2.7407, 'learning_rate': 4.2127065870844016e-07, 'epoch': 10.29} + 64%|██████▍ | 238940/371472 [8:25:25<10:42:18, 3.44it/s] 64%|██████▍ | 238941/371472 [8:25:25<10:44:10, 3.43it/s] 64%|██████▍ | 238942/371472 [8:25:25<11:08:48, 3.30it/s] 64%|██████▍ | 238943/371472 [8:25:26<10:56:40, 3.36it/s] 64%|██████▍ | 238944/371472 [8:25:26<10:45:49, 3.42it/s] 64%|██████▍ | 238945/371472 [8:25:26<10:50:50, 3.39it/s] 64%|██████▍ | 238946/371472 [8:25:26<11:13:38, 3.28it/s] 64%|██████▍ | 238947/371472 [8:25:27<10:50:52, 3.39it/s] 64%|██████▍ | 238948/371472 [8:25:27<11:02:24, 3.33it/s] 64%|██████▍ | 238949/371472 [8:25:27<11:11:02, 3.29it/s] 64%|██████▍ | 238950/371472 [8:25:28<11:06:35, 3.31it/s] 64%|██████▍ | 238951/371472 [8:25:28<11:33:41, 3.18it/s] 64%|██████▍ | 238952/371472 [8:25:28<11:19:48, 3.25it/s] 64%|██████▍ | 238953/371472 [8:25:29<11:04:44, 3.32it/s] 64%|██████▍ | 238954/371472 [8:25:29<11:01:21, 3.34it/s] 64%|██████▍ | 238955/371472 [8:25:29<11:36:24, 3.17it/s] 64%|██████▍ | 238956/371472 [8:25:30<11:35:52, 3.17it/s] 64%|██████▍ | 238957/371472 [8:25:30<11:24:25, 3.23it/s] 64%|██████▍ | 238958/371472 [8:25:30<11:19:35, 3.25it/s] 64%|██████▍ | 238959/371472 [8:25:30<11:13:09, 3.28it/s] 64%|██████▍ | 238960/371472 [8:25:31<11:39:24, 3.16it/s] {'loss': 2.6433, 'learning_rate': 4.2122217673296123e-07, 'epoch': 10.29} + 64%|██████▍ | 238960/371472 [8:25:31<11:39:24, 3.16it/s] 64%|██████▍ | 238961/371472 [8:25:31<11:25:52, 3.22it/s] 64%|██████▍ | 238962/371472 [8:25:31<11:18:55, 3.25it/s] 64%|██████▍ | 238963/371472 [8:25:32<11:04:21, 3.32it/s] 64%|██████▍ | 238964/371472 [8:25:32<11:12:35, 3.28it/s] 64%|██████▍ | 238965/371472 [8:25:32<11:16:55, 3.26it/s] 64%|██████▍ | 238966/371472 [8:25:33<11:36:52, 3.17it/s] 64%|██████▍ | 238967/371472 [8:25:33<11:08:45, 3.30it/s] 64%|██████▍ | 238968/371472 [8:25:33<11:09:11, 3.30it/s] 64%|██████▍ | 238969/371472 [8:25:33<10:48:50, 3.40it/s] 64%|██████▍ | 238970/371472 [8:25:34<10:38:54, 3.46it/s] 64%|██████▍ | 238971/371472 [8:25:34<10:26:44, 3.52it/s] 64%|██████▍ | 238972/371472 [8:25:34<10:17:16, 3.58it/s] 64%|██████▍ | 238973/371472 [8:25:35<10:26:15, 3.53it/s] 64%|██████▍ | 238974/371472 [8:25:35<11:04:22, 3.32it/s] 64%|██████▍ | 238975/371472 [8:25:35<10:53:54, 3.38it/s] 64%|██████▍ | 238976/371472 [8:25:36<12:03:18, 3.05it/s] 64%|██████▍ | 238977/371472 [8:25:36<11:59:48, 3.07it/s] 64%|██████▍ | 238978/371472 [8:25:36<11:36:03, 3.17it/s] 64%|██████▍ | 238979/371472 [8:25:37<11:39:15, 3.16it/s] 64%|██████▍ | 238980/371472 [8:25:37<11:36:47, 3.17it/s] {'loss': 2.6204, 'learning_rate': 4.2117369475748236e-07, 'epoch': 10.29} + 64%|██████▍ | 238980/371472 [8:25:37<11:36:47, 3.17it/s] 64%|██████▍ | 238981/371472 [8:25:37<11:46:51, 3.12it/s] 64%|██████▍ | 238982/371472 [8:25:37<11:26:01, 3.22it/s] 64%|██████▍ | 238983/371472 [8:25:38<11:33:05, 3.19it/s] 64%|██████▍ | 238984/371472 [8:25:38<11:17:25, 3.26it/s] 64%|██████▍ | 238985/371472 [8:25:38<11:10:20, 3.29it/s] 64%|██████▍ | 238986/371472 [8:25:39<10:56:52, 3.36it/s] 64%|██████▍ | 238987/371472 [8:25:39<11:34:18, 3.18it/s] 64%|██████▍ | 238988/371472 [8:25:39<11:11:18, 3.29it/s] 64%|██████▍ | 238989/371472 [8:25:40<10:56:30, 3.36it/s] 64%|██████▍ | 238990/371472 [8:25:40<10:43:09, 3.43it/s] 64%|██████▍ | 238991/371472 [8:25:40<10:33:37, 3.48it/s] 64%|██████▍ | 238992/371472 [8:25:40<11:13:08, 3.28it/s] 64%|██████▍ | 238993/371472 [8:25:41<10:41:19, 3.44it/s] 64%|██████▍ | 238994/371472 [8:25:41<10:42:27, 3.44it/s] 64%|██████▍ | 238995/371472 [8:25:41<10:38:22, 3.46it/s] 64%|██████▍ | 238996/371472 [8:25:42<10:42:41, 3.44it/s] 64%|██████▍ | 238997/371472 [8:25:42<10:21:16, 3.55it/s] 64%|██████▍ | 238998/371472 [8:25:42<10:32:05, 3.49it/s] 64%|██���███▍ | 238999/371472 [8:25:42<10:52:38, 3.38it/s] 64%|██████▍ | 239000/371472 [8:25:43<11:03:09, 3.33it/s] {'loss': 2.8115, 'learning_rate': 4.2112521278200343e-07, 'epoch': 10.29} + 64%|██████▍ | 239000/371472 [8:25:43<11:03:09, 3.33it/s] 64%|██████▍ | 239001/371472 [8:25:43<11:21:52, 3.24it/s] 64%|██████▍ | 239002/371472 [8:25:43<11:03:53, 3.33it/s] 64%|██████▍ | 239003/371472 [8:25:44<10:45:48, 3.42it/s] 64%|██████▍ | 239004/371472 [8:25:44<11:23:00, 3.23it/s] 64%|██████▍ | 239005/371472 [8:25:44<11:06:26, 3.31it/s] 64%|██████▍ | 239006/371472 [8:25:45<10:44:50, 3.42it/s] 64%|██████▍ | 239007/371472 [8:25:45<10:52:14, 3.38it/s] 64%|██████▍ | 239008/371472 [8:25:45<10:50:14, 3.40it/s] 64%|██████▍ | 239009/371472 [8:25:45<10:34:51, 3.48it/s] 64%|██████▍ | 239010/371472 [8:25:46<10:45:32, 3.42it/s] 64%|██████▍ | 239011/371472 [8:25:46<11:29:17, 3.20it/s] 64%|██████▍ | 239012/371472 [8:25:46<11:09:12, 3.30it/s] 64%|██████▍ | 239013/371472 [8:25:47<12:06:37, 3.04it/s] 64%|██████▍ | 239014/371472 [8:25:47<11:41:33, 3.15it/s] 64%|██████▍ | 239015/371472 [8:25:47<11:28:20, 3.21it/s] 64%|██████▍ | 239016/371472 [8:25:48<11:50:07, 3.11it/s] 64%|██████▍ | 239017/371472 [8:25:48<11:56:20, 3.08it/s] 64%|██████▍ | 239018/371472 [8:25:48<11:52:47, 3.10it/s] 64%|██████▍ | 239019/371472 [8:25:49<12:03:33, 3.05it/s] 64%|██████▍ | 239020/371472 [8:25:49<11:24:39, 3.22it/s] {'loss': 2.7236, 'learning_rate': 4.210767308065246e-07, 'epoch': 10.3} + 64%|██████▍ | 239020/371472 [8:25:49<11:24:39, 3.22it/s] 64%|██████▍ | 239021/371472 [8:25:49<11:14:41, 3.27it/s] 64%|██████▍ | 239022/371472 [8:25:50<11:36:47, 3.17it/s] 64%|██████▍ | 239023/371472 [8:25:50<11:03:00, 3.33it/s] 64%|██████▍ | 239024/371472 [8:25:50<11:00:14, 3.34it/s] 64%|██████▍ | 239025/371472 [8:25:50<11:05:56, 3.31it/s] 64%|██████▍ | 239026/371472 [8:25:51<11:02:33, 3.33it/s] 64%|██████▍ | 239027/371472 [8:25:51<10:53:34, 3.38it/s] 64%|██████▍ | 239028/371472 [8:25:51<10:43:20, 3.43it/s] 64%|██████▍ | 239029/371472 [8:25:52<10:37:50, 3.46it/s] 64%|██████▍ | 239030/371472 [8:25:52<10:59:46, 3.35it/s] 64%|██████▍ | 239031/371472 [8:25:52<11:14:59, 3.27it/s] 64%|██████▍ | 239032/371472 [8:25:53<11:19:24, 3.25it/s] 64%|██████▍ | 239033/371472 [8:25:53<10:59:40, 3.35it/s] 64%|██████▍ | 239034/371472 [8:25:53<10:59:10, 3.35it/s] 64%|██████▍ | 239035/371472 [8:25:53<11:24:01, 3.23it/s] 64%|██████▍ | 239036/371472 [8:25:54<11:16:03, 3.26it/s] 64%|██████▍ | 239037/371472 [8:25:54<10:52:35, 3.38it/s] 64%|██████▍ | 239038/371472 [8:25:54<11:21:29, 3.24it/s] 64%|██████▍ | 239039/371472 [8:25:55<11:00:48, 3.34it/s] 64%|██████▍ | 239040/371472 [8:25:55<11:11:07, 3.29it/s] {'loss': 2.7157, 'learning_rate': 4.2102824883104563e-07, 'epoch': 10.3} + 64%|██████▍ | 239040/371472 [8:25:55<11:11:07, 3.29it/s] 64%|██████▍ | 239041/371472 [8:25:55<11:28:17, 3.21it/s] 64%|██████▍ | 239042/371472 [8:25:56<11:37:09, 3.17it/s] 64%|██████▍ | 239043/371472 [8:25:56<11:23:42, 3.23it/s] 64%|██████▍ | 239044/371472 [8:25:56<11:07:17, 3.31it/s] 64%|██████▍ | 239045/371472 [8:25:57<11:13:40, 3.28it/s] 64%|██████▍ | 239046/371472 [8:25:57<10:47:57, 3.41it/s] 64%|██████▍ | 239047/371472 [8:25:57<11:43:48, 3.14it/s] 64%|██████▍ | 239048/371472 [8:25:57<11:33:53, 3.18it/s] 64%|██████▍ | 239049/371472 [8:25:58<11:24:49, 3.22it/s] 64%|██████▍ | 239050/371472 [8:25:58<11:01:12, 3.34it/s] 64%|██████▍ | 239051/371472 [8:25:58<10:49:27, 3.40it/s] 64%|██████▍ | 239052/371472 [8:25:59<10:33:14, 3.49it/s] 64%|██████▍ | 239053/371472 [8:25:59<10:17:38, 3.57it/s] 64%|██████▍ | 239054/371472 [8:25:59<10:24:04, 3.54it/s] 64%|██████▍ | 239055/371472 [8:25:59<10:28:48, 3.51it/s] 64%|██████▍ | 239056/371472 [8:26:00<10:21:51, 3.55it/s] 64%|██████▍ | 239057/371472 [8:26:00<10:09:28, 3.62it/s] 64%|██████▍ | 239058/371472 [8:26:00<10:04:16, 3.65it/s] 64%|██████▍ | 239059/371472 [8:26:01<10:00:55, 3.67it/s] 64%|██████▍ | 239060/371472 [8:26:01<10:14:15, 3.59it/s] {'loss': 2.645, 'learning_rate': 4.209797668555668e-07, 'epoch': 10.3} + 64%|██████▍ | 239060/371472 [8:26:01<10:14:15, 3.59it/s] 64%|██████▍ | 239061/371472 [8:26:01<10:28:26, 3.51it/s] 64%|██████▍ | 239062/371472 [8:26:01<10:14:47, 3.59it/s] 64%|██████▍ | 239063/371472 [8:26:02<10:02:15, 3.66it/s] 64%|██████▍ | 239064/371472 [8:26:02<10:12:09, 3.60it/s] 64%|██████▍ | 239065/371472 [8:26:02<10:27:07, 3.52it/s] 64%|██████▍ | 239066/371472 [8:26:03<10:15:52, 3.58it/s] 64%|██████▍ | 239067/371472 [8:26:03<10:41:46, 3.44it/s] 64%|██████▍ | 239068/371472 [8:26:03<11:00:42, 3.34it/s] 64%|██████▍ | 239069/371472 [8:26:03<10:54:34, 3.37it/s] 64%|██████▍ | 239070/371472 [8:26:04<10:45:12, 3.42it/s] 64%|██████▍ | 239071/371472 [8:26:04<10:32:42, 3.49it/s] 64%|██████▍ | 239072/371472 [8:26:04<10:26:59, 3.52it/s] 64%|██████▍ | 239073/371472 [8:26:05<10:38:53, 3.45it/s] 64%|██████▍ | 239074/371472 [8:26:05<10:58:07, 3.35it/s] 64%|██████▍ | 239075/371472 [8:26:05<10:49:00, 3.40it/s] 64%|██████▍ | 239076/371472 [8:26:06<11:13:37, 3.28it/s] 64%|██████▍ | 239077/371472 [8:26:06<11:05:20, 3.32it/s] 64%|██████▍ | 239078/371472 [8:26:06<10:46:11, 3.41it/s] 64%|██████▍ | 239079/371472 [8:26:06<10:43:26, 3.43it/s] 64%|██████▍ | 239080/371472 [8:26:07<11:13:51, 3.27it/s] {'loss': 2.7174, 'learning_rate': 4.209312848800879e-07, 'epoch': 10.3} + 64%|██████▍ | 239080/371472 [8:26:07<11:13:51, 3.27it/s] 64%|██████▍ | 239081/371472 [8:26:07<11:40:21, 3.15it/s] 64%|██████▍ | 239082/371472 [8:26:07<11:37:10, 3.16it/s] 64%|██████▍ | 239083/371472 [8:26:08<11:39:50, 3.15it/s] 64%|██████▍ | 239084/371472 [8:26:08<10:57:02, 3.36it/s] 64%|██████▍ | 239085/371472 [8:26:08<10:59:18, 3.35it/s] 64%|██████▍ | 239086/371472 [8:26:09<10:44:12, 3.43it/s] 64%|██████▍ | 239087/371472 [8:26:09<10:47:32, 3.41it/s] 64%|██████▍ | 239088/371472 [8:26:09<10:42:24, 3.43it/s] 64%|██████▍ | 239089/371472 [8:26:09<10:33:38, 3.48it/s] 64%|██████▍ | 239090/371472 [8:26:10<10:58:54, 3.35it/s] 64%|██████▍ | 239091/371472 [8:26:10<10:47:34, 3.41it/s] 64%|██████▍ | 239092/371472 [8:26:10<10:20:50, 3.55it/s] 64%|██████▍ | 239093/371472 [8:26:11<10:33:17, 3.48it/s] 64%|██████▍ | 239094/371472 [8:26:11<10:32:46, 3.49it/s] 64%|██████▍ | 239095/371472 [8:26:11<10:28:31, 3.51it/s] 64%|██████▍ | 239096/371472 [8:26:11<10:11:23, 3.61it/s] 64%|██████▍ | 239097/371472 [8:26:12<10:58:50, 3.35it/s] 64%|██████▍ | 239098/371472 [8:26:12<10:53:31, 3.38it/s] 64%|██████▍ | 239099/371472 [8:26:12<11:04:21, 3.32it/s] 64%|██████▍ | 239100/371472 [8:26:13<10:28:42, 3.51it/s] {'loss': 2.4616, 'learning_rate': 4.20882802904609e-07, 'epoch': 10.3} + 64%|██████▍ | 239100/371472 [8:26:13<10:28:42, 3.51it/s] 64%|██████▍ | 239101/371472 [8:26:13<10:33:14, 3.48it/s] 64%|██████▍ | 239102/371472 [8:26:13<10:25:01, 3.53it/s] 64%|██████▍ | 239103/371472 [8:26:13<10:13:08, 3.60it/s] 64%|██████▍ | 239104/371472 [8:26:14<10:17:45, 3.57it/s] 64%|██████▍ | 239105/371472 [8:26:14<12:10:48, 3.02it/s] 64%|██████▍ | 239106/371472 [8:26:14<12:10:36, 3.02it/s] 64%|██████▍ | 239107/371472 [8:26:15<11:24:22, 3.22it/s] 64%|██████▍ | 239108/371472 [8:26:15<11:02:39, 3.33it/s] 64%|██████▍ | 239109/371472 [8:26:15<10:33:03, 3.48it/s] 64%|██████▍ | 239110/371472 [8:26:16<10:26:47, 3.52it/s] 64%|██████▍ | 239111/371472 [8:26:16<10:06:15, 3.64it/s] 64%|██████▍ | 239112/371472 [8:26:16<10:00:36, 3.67it/s] 64%|██████▍ | 239113/371472 [8:26:16<10:13:04, 3.60it/s] 64%|██████▍ | 239114/371472 [8:26:17<10:24:40, 3.53it/s] 64%|██████▍ | 239115/371472 [8:26:17<10:13:38, 3.59it/s] 64%|██████▍ | 239116/371472 [8:26:17<10:17:48, 3.57it/s] 64%|██████▍ | 239117/371472 [8:26:18<10:45:04, 3.42it/s] 64%|██████▍ | 239118/371472 [8:26:18<10:43:31, 3.43it/s] 64%|██████▍ | 239119/371472 [8:26:18<10:33:32, 3.48it/s] 64%|██████▍ | 239120/371472 [8:26:18<10:43:00, 3.43it/s] {'loss': 2.7281, 'learning_rate': 4.2083432092913007e-07, 'epoch': 10.3} + 64%|██████▍ | 239120/371472 [8:26:18<10:43:00, 3.43it/s] 64%|██████▍ | 239121/371472 [8:26:19<10:39:24, 3.45it/s] 64%|██████▍ | 239122/371472 [8:26:19<10:37:00, 3.46it/s] 64%|██████▍ | 239123/371472 [8:26:19<10:58:58, 3.35it/s] 64%|██████▍ | 239124/371472 [8:26:20<10:38:41, 3.45it/s] 64%|██████▍ | 239125/371472 [8:26:20<10:53:14, 3.38it/s] 64%|██████▍ | 239126/371472 [8:26:20<11:00:18, 3.34it/s] 64%|██████▍ | 239127/371472 [8:26:20<10:55:41, 3.36it/s] 64%|██████▍ | 239128/371472 [8:26:21<10:46:03, 3.41it/s] 64%|██████▍ | 239129/371472 [8:26:21<10:33:59, 3.48it/s] 64%|██████▍ | 239130/371472 [8:26:21<11:28:11, 3.21it/s] 64%|██████▍ | 239131/371472 [8:26:22<11:20:26, 3.24it/s] 64%|██████▍ | 239132/371472 [8:26:22<11:14:54, 3.27it/s] 64%|██████▍ | 239133/371472 [8:26:22<11:33:33, 3.18it/s] 64%|██████▍ | 239134/371472 [8:26:23<11:10:28, 3.29it/s] 64%|██████▍ | 239135/371472 [8:26:23<11:19:09, 3.25it/s] 64%|██████▍ | 239136/371472 [8:26:23<11:18:31, 3.25it/s] 64%|██████▍ | 239137/371472 [8:26:24<11:24:35, 3.22it/s] 64%|██████▍ | 239138/371472 [8:26:24<11:17:53, 3.25it/s] 64%|██████▍ | 239139/371472 [8:26:24<11:29:25, 3.20it/s] 64%|██████▍ | 239140/371472 [8:26:24<11:39:13, 3.15it/s] {'loss': 2.8163, 'learning_rate': 4.2078583895365125e-07, 'epoch': 10.3} + 64%|██████▍ | 239140/371472 [8:26:24<11:39:13, 3.15it/s] 64%|██████▍ | 239141/371472 [8:26:25<11:29:44, 3.20it/s] 64%|██████▍ | 239142/371472 [8:26:25<11:05:34, 3.31it/s] 64%|██████▍ | 239143/371472 [8:26:25<11:03:37, 3.32it/s] 64%|██████▍ | 239144/371472 [8:26:26<11:25:12, 3.22it/s] 64%|██████▍ | 239145/371472 [8:26:26<10:58:36, 3.35it/s] 64%|██████▍ | 239146/371472 [8:26:26<11:15:21, 3.27it/s] 64%|██████▍ | 239147/371472 [8:26:27<10:56:42, 3.36it/s] 64%|██████▍ | 239148/371472 [8:26:27<10:51:09, 3.39it/s] 64%|██████▍ | 239149/371472 [8:26:27<10:54:24, 3.37it/s] 64%|██████▍ | 239150/371472 [8:26:27<11:27:33, 3.21it/s] 64%|██████▍ | 239151/371472 [8:26:28<12:21:37, 2.97it/s] 64%|██████▍ | 239152/371472 [8:26:28<11:34:42, 3.17it/s] 64%|██████▍ | 239153/371472 [8:26:28<11:12:22, 3.28it/s] 64%|██████▍ | 239154/371472 [8:26:29<10:42:04, 3.43it/s] 64%|██████▍ | 239155/371472 [8:26:29<11:15:04, 3.27it/s] 64%|██████▍ | 239156/371472 [8:26:29<11:11:47, 3.28it/s] 64%|██████▍ | 239157/371472 [8:26:30<10:50:01, 3.39it/s] 64%|██████▍ | 239158/371472 [8:26:30<10:21:06, 3.55it/s] 64%|██████▍ | 239159/371472 [8:26:30<10:09:27, 3.62it/s] 64%|██████▍ | 239160/371472 [8:26:30<9:56:02, 3.70it/s] {'loss': 2.6816, 'learning_rate': 4.207373569781723e-07, 'epoch': 10.3} + 64%|██████▍ | 239160/371472 [8:26:30<9:56:02, 3.70it/s] 64%|██████▍ | 239161/371472 [8:26:31<10:11:18, 3.61it/s] 64%|██████▍ | 239162/371472 [8:26:31<10:33:51, 3.48it/s] 64%|██████▍ | 239163/371472 [8:26:31<10:19:56, 3.56it/s] 64%|██████▍ | 239164/371472 [8:26:32<10:17:16, 3.57it/s] 64%|██████▍ | 239165/371472 [8:26:32<10:04:15, 3.65it/s] 64%|██████▍ | 239166/371472 [8:26:32<11:22:59, 3.23it/s] 64%|██████▍ | 239167/371472 [8:26:32<10:59:36, 3.34it/s] 64%|██████▍ | 239168/371472 [8:26:33<10:57:05, 3.36it/s] 64%|██████▍ | 239169/371472 [8:26:33<10:33:23, 3.48it/s] 64%|██████▍ | 239170/371472 [8:26:33<10:38:53, 3.45it/s] 64%|██████▍ | 239171/371472 [8:26:34<10:55:03, 3.37it/s] 64%|██████▍ | 239172/371472 [8:26:34<10:44:06, 3.42it/s] 64%|██████▍ | 239173/371472 [8:26:34<10:34:23, 3.48it/s] 64%|██████▍ | 239174/371472 [8:26:35<11:19:07, 3.25it/s] 64%|██████▍ | 239175/371472 [8:26:35<11:11:15, 3.28it/s] 64%|██████▍ | 239176/371472 [8:26:35<11:28:07, 3.20it/s] 64%|██████▍ | 239177/371472 [8:26:35<11:06:38, 3.31it/s] 64%|██████▍ | 239178/371472 [8:26:36<11:09:44, 3.29it/s] 64%|██████▍ | 239179/371472 [8:26:36<11:22:52, 3.23it/s] 64%|██████▍ | 239180/371472 [8:26:36<11:00:43, 3.34it/s] {'loss': 2.7182, 'learning_rate': 4.2068887500269344e-07, 'epoch': 10.3} + 64%|██████▍ | 239180/371472 [8:26:36<11:00:43, 3.34it/s] 64%|██████▍ | 239181/371472 [8:26:37<10:47:06, 3.41it/s] 64%|██████▍ | 239182/371472 [8:26:37<10:30:53, 3.49it/s] 64%|██████▍ | 239183/371472 [8:26:37<10:39:49, 3.45it/s] 64%|██████▍ | 239184/371472 [8:26:38<11:21:56, 3.23it/s] 64%|██████▍ | 239185/371472 [8:26:38<11:06:43, 3.31it/s] 64%|██████▍ | 239186/371472 [8:26:38<11:24:47, 3.22it/s] 64%|██████▍ | 239187/371472 [8:26:38<11:25:07, 3.22it/s] 64%|██████▍ | 239188/371472 [8:26:39<11:14:01, 3.27it/s] 64%|██████▍ | 239189/371472 [8:26:39<11:11:00, 3.29it/s] 64%|██████▍ | 239190/371472 [8:26:39<11:06:31, 3.31it/s] 64%|██████▍ | 239191/371472 [8:26:40<11:35:08, 3.17it/s] 64%|██████▍ | 239192/371472 [8:26:40<12:31:31, 2.93it/s] 64%|██████▍ | 239193/371472 [8:26:40<11:53:57, 3.09it/s] 64%|██████▍ | 239194/371472 [8:26:41<12:05:41, 3.04it/s] 64%|██████▍ | 239195/371472 [8:26:41<11:45:46, 3.12it/s] 64%|██████▍ | 239196/371472 [8:26:41<11:59:35, 3.06it/s] 64%|██████▍ | 239197/371472 [8:26:42<11:28:43, 3.20it/s] 64%|██████▍ | 239198/371472 [8:26:42<11:34:18, 3.18it/s] 64%|██████▍ | 239199/371472 [8:26:42<11:00:36, 3.34it/s] 64%|██████▍ | 239200/371472 [8:26:43<10:41:06, 3.44it/s] {'loss': 2.668, 'learning_rate': 4.206403930272145e-07, 'epoch': 10.3} + 64%|██████▍ | 239200/371472 [8:26:43<10:41:06, 3.44it/s] 64%|██████▍ | 239201/371472 [8:26:43<10:39:23, 3.45it/s] 64%|██████▍ | 239202/371472 [8:26:43<10:49:54, 3.39it/s] 64%|██████▍ | 239203/371472 [8:26:43<10:56:21, 3.36it/s] 64%|██████▍ | 239204/371472 [8:26:44<10:50:21, 3.39it/s] 64%|██████▍ | 239205/371472 [8:26:44<10:30:36, 3.50it/s] 64%|██████▍ | 239206/371472 [8:26:44<10:23:20, 3.54it/s] 64%|██████▍ | 239207/371472 [8:26:45<10:45:22, 3.42it/s] 64%|██████▍ | 239208/371472 [8:26:45<10:50:57, 3.39it/s] 64%|██████▍ | 239209/371472 [8:26:45<10:36:54, 3.46it/s] 64%|██████▍ | 239210/371472 [8:26:45<10:35:07, 3.47it/s] 64%|██████▍ | 239211/371472 [8:26:46<10:52:28, 3.38it/s] 64%|██████▍ | 239212/371472 [8:26:46<11:04:00, 3.32it/s] 64%|██████▍ | 239213/371472 [8:26:46<11:01:44, 3.33it/s] 64%|██████▍ | 239214/371472 [8:26:47<10:58:28, 3.35it/s] 64%|██████▍ | 239215/371472 [8:26:47<10:44:50, 3.42it/s] 64%|██████▍ | 239216/371472 [8:26:47<10:40:45, 3.44it/s] 64%|██████▍ | 239217/371472 [8:26:48<10:58:58, 3.34it/s] 64%|██████▍ | 239218/371472 [8:26:48<10:40:32, 3.44it/s] 64%|██████▍ | 239219/371472 [8:26:48<10:39:09, 3.45it/s] 64%|██████▍ | 239220/371472 [8:26:48<10:32:22, 3.49it/s] {'loss': 2.7394, 'learning_rate': 4.205919110517357e-07, 'epoch': 10.3} + 64%|██████▍ | 239220/371472 [8:26:48<10:32:22, 3.49it/s] 64%|██████▍ | 239221/371472 [8:26:49<10:44:15, 3.42it/s] 64%|██████▍ | 239222/371472 [8:26:49<10:54:52, 3.37it/s] 64%|██████▍ | 239223/371472 [8:26:49<11:19:17, 3.24it/s] 64%|██████▍ | 239224/371472 [8:26:50<10:48:56, 3.40it/s] 64%|██████▍ | 239225/371472 [8:26:50<10:50:42, 3.39it/s] 64%|██████▍ | 239226/371472 [8:26:50<10:52:27, 3.38it/s] 64%|██████▍ | 239227/371472 [8:26:50<10:47:45, 3.40it/s] 64%|██████▍ | 239228/371472 [8:26:51<11:25:29, 3.22it/s] 64%|██████▍ | 239229/371472 [8:26:51<10:57:03, 3.35it/s] 64%|██████▍ | 239230/371472 [8:26:51<10:45:23, 3.42it/s] 64%|██████▍ | 239231/371472 [8:26:52<10:59:38, 3.34it/s] 64%|██████▍ | 239232/371472 [8:26:52<10:52:39, 3.38it/s] 64%|██████▍ | 239233/371472 [8:26:52<11:01:36, 3.33it/s] 64%|██████▍ | 239234/371472 [8:26:53<10:53:37, 3.37it/s] 64%|██████▍ | 239235/371472 [8:26:53<11:10:19, 3.29it/s] 64%|██████▍ | 239236/371472 [8:26:53<10:58:39, 3.35it/s] 64%|██████▍ | 239237/371472 [8:26:53<10:53:56, 3.37it/s] 64%|██████▍ | 239238/371472 [8:26:54<10:58:12, 3.35it/s] 64%|██████▍ | 239239/371472 [8:26:54<10:31:47, 3.49it/s] 64%|██████▍ | 239240/371472 [8:26:54<11:21:55, 3.23it/s] {'loss': 2.7768, 'learning_rate': 4.205434290762567e-07, 'epoch': 10.3} + 64%|██████▍ | 239240/371472 [8:26:54<11:21:55, 3.23it/s] 64%|██████▍ | 239241/371472 [8:26:55<11:44:18, 3.13it/s] 64%|██████▍ | 239242/371472 [8:26:55<11:27:22, 3.21it/s] 64%|██████▍ | 239243/371472 [8:26:55<11:26:52, 3.21it/s] 64%|██████▍ | 239244/371472 [8:26:56<11:12:27, 3.28it/s] 64%|██████▍ | 239245/371472 [8:26:56<10:51:14, 3.38it/s] 64%|██████▍ | 239246/371472 [8:26:56<10:46:40, 3.41it/s] 64%|██████▍ | 239247/371472 [8:26:56<10:50:12, 3.39it/s] 64%|██████▍ | 239248/371472 [8:26:57<10:33:36, 3.48it/s] 64%|██████▍ | 239249/371472 [8:26:57<11:17:41, 3.25it/s] 64%|██████▍ | 239250/371472 [8:26:57<10:50:54, 3.39it/s] 64%|██████▍ | 239251/371472 [8:26:58<10:57:12, 3.35it/s] 64%|██████▍ | 239252/371472 [8:26:58<10:41:22, 3.44it/s] 64%|██████▍ | 239253/371472 [8:26:58<10:40:57, 3.44it/s] 64%|██████▍ | 239254/371472 [8:26:59<10:36:50, 3.46it/s] 64%|██████▍ | 239255/371472 [8:26:59<10:49:04, 3.40it/s] 64%|██████▍ | 239256/371472 [8:26:59<11:07:46, 3.30it/s] 64%|██████▍ | 239257/371472 [8:26:59<11:12:18, 3.28it/s] 64%|██████▍ | 239258/371472 [8:27:00<11:09:00, 3.29it/s] 64%|██████▍ | 239259/371472 [8:27:00<11:03:45, 3.32it/s] 64%|██████▍ | 239260/371472 [8:27:00<10:56:24, 3.36it/s] {'loss': 2.5834, 'learning_rate': 4.204949471007779e-07, 'epoch': 10.31} + 64%|██████▍ | 239260/371472 [8:27:00<10:56:24, 3.36it/s] 64%|██████▍ | 239261/371472 [8:27:01<10:56:47, 3.35it/s] 64%|██████▍ | 239262/371472 [8:27:01<11:09:51, 3.29it/s] 64%|██████▍ | 239263/371472 [8:27:01<10:48:26, 3.40it/s] 64%|██████▍ | 239264/371472 [8:27:02<10:54:14, 3.37it/s] 64%|██████▍ | 239265/371472 [8:27:02<10:54:34, 3.37it/s] 64%|██████▍ | 239266/371472 [8:27:02<10:43:45, 3.42it/s] 64%|██████▍ | 239267/371472 [8:27:02<11:10:47, 3.28it/s] 64%|██████▍ | 239268/371472 [8:27:03<11:30:02, 3.19it/s] 64%|██████▍ | 239269/371472 [8:27:03<11:31:23, 3.19it/s] 64%|██████▍ | 239270/371472 [8:27:03<11:40:01, 3.15it/s] 64%|██████▍ | 239271/371472 [8:27:04<11:27:06, 3.21it/s] 64%|██████▍ | 239272/371472 [8:27:04<11:19:02, 3.24it/s] 64%|██████▍ | 239273/371472 [8:27:04<11:51:47, 3.10it/s] 64%|██████▍ | 239274/371472 [8:27:05<11:34:02, 3.17it/s] 64%|██████▍ | 239275/371472 [8:27:05<11:28:40, 3.20it/s] 64%|██████▍ | 239276/371472 [8:27:05<11:13:32, 3.27it/s] 64%|██████▍ | 239277/371472 [8:27:06<10:57:06, 3.35it/s] 64%|██████▍ | 239278/371472 [8:27:06<11:02:05, 3.33it/s] 64%|██████▍ | 239279/371472 [8:27:06<10:34:27, 3.47it/s] 64%|██████▍ | 239280/371472 [8:27:06<10:49:32, 3.39it/s] {'loss': 2.7463, 'learning_rate': 4.2044646512529896e-07, 'epoch': 10.31} + 64%|██████▍ | 239280/371472 [8:27:06<10:49:32, 3.39it/s] 64%|██████▍ | 239281/371472 [8:27:07<11:31:02, 3.19it/s] 64%|██████▍ | 239282/371472 [8:27:07<11:49:22, 3.11it/s] 64%|██████▍ | 239283/371472 [8:27:07<11:10:18, 3.29it/s] 64%|██████▍ | 239284/371472 [8:27:08<11:13:12, 3.27it/s] 64%|██████▍ | 239285/371472 [8:27:08<10:56:29, 3.36it/s] 64%|██████▍ | 239286/371472 [8:27:08<11:00:13, 3.34it/s] 64%|██████▍ | 239287/371472 [8:27:09<10:52:49, 3.37it/s] 64%|██████▍ | 239288/371472 [8:27:09<10:52:38, 3.38it/s] 64%|██████▍ | 239289/371472 [8:27:09<10:50:47, 3.39it/s] 64%|██████▍ | 239290/371472 [8:27:09<10:54:47, 3.36it/s] 64%|██████▍ | 239291/371472 [8:27:10<11:23:38, 3.22it/s] 64%|██████▍ | 239292/371472 [8:27:10<11:15:20, 3.26it/s] 64%|██████▍ | 239293/371472 [8:27:10<11:11:07, 3.28it/s] 64%|██████▍ | 239294/371472 [8:27:11<11:52:01, 3.09it/s] 64%|██████▍ | 239295/371472 [8:27:11<11:44:16, 3.13it/s] 64%|██████▍ | 239296/371472 [8:27:11<11:24:58, 3.22it/s] 64%|██████▍ | 239297/371472 [8:27:12<11:07:24, 3.30it/s] 64%|██████▍ | 239298/371472 [8:27:12<10:43:10, 3.43it/s] 64%|██████▍ | 239299/371472 [8:27:12<10:34:57, 3.47it/s] 64%|██████▍ | 239300/371472 [8:27:13<10:48:23, 3.40it/s] {'loss': 2.8267, 'learning_rate': 4.2039798314982014e-07, 'epoch': 10.31} + 64%|██████▍ | 239300/371472 [8:27:13<10:48:23, 3.40it/s] 64%|██████▍ | 239301/371472 [8:27:13<10:41:39, 3.43it/s] 64%|██████▍ | 239302/371472 [8:27:13<10:37:22, 3.46it/s] 64%|██████▍ | 239303/371472 [8:27:13<10:51:16, 3.38it/s] 64%|██████▍ | 239304/371472 [8:27:14<11:06:31, 3.30it/s] 64%|██████▍ | 239305/371472 [8:27:14<10:48:40, 3.40it/s] 64%|██████▍ | 239306/371472 [8:27:14<10:33:03, 3.48it/s] 64%|██████▍ | 239307/371472 [8:27:15<11:18:36, 3.25it/s] 64%|██████▍ | 239308/371472 [8:27:15<11:02:48, 3.32it/s] 64%|██████▍ | 239309/371472 [8:27:15<11:16:13, 3.26it/s] 64%|██████▍ | 239310/371472 [8:27:16<11:10:15, 3.29it/s] 64%|██████▍ | 239311/371472 [8:27:16<11:04:31, 3.31it/s] 64%|██████▍ | 239312/371472 [8:27:16<11:05:45, 3.31it/s] 64%|██████▍ | 239313/371472 [8:27:16<11:00:16, 3.34it/s] 64%|██████▍ | 239314/371472 [8:27:17<10:50:01, 3.39it/s] 64%|██████▍ | 239315/371472 [8:27:17<10:44:32, 3.42it/s] 64%|██████▍ | 239316/371472 [8:27:17<11:20:07, 3.24it/s] 64%|██████▍ | 239317/371472 [8:27:18<11:56:03, 3.08it/s] 64%|██████▍ | 239318/371472 [8:27:18<11:29:51, 3.19it/s] 64%|██████▍ | 239319/371472 [8:27:18<11:59:39, 3.06it/s] 64%|██████▍ | 239320/371472 [8:27:19<11:28:25, 3.20it/s] {'loss': 2.7263, 'learning_rate': 4.2034950117434116e-07, 'epoch': 10.31} + 64%|██████▍ | 239320/371472 [8:27:19<11:28:25, 3.20it/s] 64%|██████▍ | 239321/371472 [8:27:19<11:16:56, 3.25it/s] 64%|██████▍ | 239322/371472 [8:27:19<10:59:03, 3.34it/s] 64%|██████▍ | 239323/371472 [8:27:19<10:49:27, 3.39it/s] 64%|██████▍ | 239324/371472 [8:27:20<10:52:49, 3.37it/s] 64%|██████▍ | 239325/371472 [8:27:20<10:33:00, 3.48it/s] 64%|██████▍ | 239326/371472 [8:27:20<10:34:30, 3.47it/s] 64%|██████▍ | 239327/371472 [8:27:21<10:37:05, 3.46it/s] 64%|██████▍ | 239328/371472 [8:27:21<10:55:50, 3.36it/s] 64%|██████▍ | 239329/371472 [8:27:21<10:41:43, 3.43it/s] 64%|██████▍ | 239330/371472 [8:27:22<10:38:20, 3.45it/s] 64%|██████▍ | 239331/371472 [8:27:22<11:17:52, 3.25it/s] 64%|██████▍ | 239332/371472 [8:27:22<11:22:44, 3.23it/s] 64%|██████▍ | 239333/371472 [8:27:22<10:49:42, 3.39it/s] 64%|██████▍ | 239334/371472 [8:27:23<10:48:03, 3.40it/s] 64%|██████▍ | 239335/371472 [8:27:23<10:46:57, 3.40it/s] 64%|██████▍ | 239336/371472 [8:27:23<10:31:57, 3.48it/s] 64%|██████▍ | 239337/371472 [8:27:24<10:55:59, 3.36it/s] 64%|██████▍ | 239338/371472 [8:27:24<10:41:27, 3.43it/s] 64%|██████▍ | 239339/371472 [8:27:24<10:24:22, 3.53it/s] 64%|██████▍ | 239340/371472 [8:27:24<10:13:54, 3.59it/s] {'loss': 2.7779, 'learning_rate': 4.2030101919886233e-07, 'epoch': 10.31} + 64%|██████▍ | 239340/371472 [8:27:24<10:13:54, 3.59it/s] 64%|██████▍ | 239341/371472 [8:27:25<10:36:06, 3.46it/s] 64%|██████▍ | 239342/371472 [8:27:25<10:51:56, 3.38it/s] 64%|██████▍ | 239343/371472 [8:27:25<10:44:46, 3.42it/s] 64%|██████▍ | 239344/371472 [8:27:26<10:31:34, 3.49it/s] 64%|██████▍ | 239345/371472 [8:27:26<10:26:32, 3.51it/s] 64%|██████▍ | 239346/371472 [8:27:26<10:29:08, 3.50it/s] 64%|██████▍ | 239347/371472 [8:27:26<10:11:09, 3.60it/s] 64%|██████▍ | 239348/371472 [8:27:27<11:01:43, 3.33it/s] 64%|██████▍ | 239349/371472 [8:27:27<10:50:30, 3.39it/s] 64%|██████▍ | 239350/371472 [8:27:27<11:27:38, 3.20it/s] 64%|██████▍ | 239351/371472 [8:27:28<11:04:37, 3.31it/s] 64%|██████▍ | 239352/371472 [8:27:28<11:04:55, 3.31it/s] 64%|██████▍ | 239353/371472 [8:27:28<11:01:46, 3.33it/s] 64%|██████▍ | 239354/371472 [8:27:29<11:02:31, 3.32it/s] 64%|██████▍ | 239355/371472 [8:27:29<10:39:46, 3.44it/s] 64%|██████▍ | 239356/371472 [8:27:29<10:24:45, 3.52it/s] 64%|██████▍ | 239357/371472 [8:27:29<10:31:57, 3.48it/s] 64%|██████▍ | 239358/371472 [8:27:30<10:37:17, 3.46it/s] 64%|██████▍ | 239359/371472 [8:27:30<10:41:07, 3.43it/s] 64%|██████▍ | 239360/371472 [8:27:30<10:44:38, 3.42it/s] {'loss': 2.6972, 'learning_rate': 4.2025253722338335e-07, 'epoch': 10.31} + 64%|██████▍ | 239360/371472 [8:27:30<10:44:38, 3.42it/s] 64%|██████▍ | 239361/371472 [8:27:31<10:33:14, 3.48it/s] 64%|██████▍ | 239362/371472 [8:27:31<10:44:16, 3.42it/s] 64%|██████▍ | 239363/371472 [8:27:31<10:40:45, 3.44it/s] 64%|██████▍ | 239364/371472 [8:27:32<12:02:01, 3.05it/s] 64%|██████▍ | 239365/371472 [8:27:32<11:44:21, 3.13it/s] 64%|██████▍ | 239366/371472 [8:27:32<11:19:18, 3.24it/s] 64%|██████▍ | 239367/371472 [8:27:32<10:50:53, 3.38it/s] 64%|██████▍ | 239368/371472 [8:27:33<10:45:23, 3.41it/s] 64%|██████▍ | 239369/371472 [8:27:33<11:35:38, 3.17it/s] 64%|██████▍ | 239370/371472 [8:27:33<11:17:12, 3.25it/s] 64%|██████▍ | 239371/371472 [8:27:34<11:04:51, 3.31it/s] 64%|██████▍ | 239372/371472 [8:27:34<11:07:16, 3.30it/s] 64%|██████▍ | 239373/371472 [8:27:34<11:00:00, 3.34it/s] 64%|██████▍ | 239374/371472 [8:27:35<10:39:49, 3.44it/s] 64%|██████▍ | 239375/371472 [8:27:35<10:53:02, 3.37it/s] 64%|██████▍ | 239376/371472 [8:27:35<10:32:20, 3.48it/s] 64%|██████▍ | 239377/371472 [8:27:35<10:19:48, 3.55it/s] 64%|██████▍ | 239378/371472 [8:27:36<10:27:06, 3.51it/s] 64%|██████▍ | 239379/371472 [8:27:36<10:17:46, 3.56it/s] 64%|██████▍ | 239380/371472 [8:27:36<10:38:01, 3.45it/s] {'loss': 2.8002, 'learning_rate': 4.2020405524790453e-07, 'epoch': 10.31} + 64%|██████▍ | 239380/371472 [8:27:36<10:38:01, 3.45it/s] 64%|██████▍ | 239381/371472 [8:27:37<10:36:06, 3.46it/s] 64%|██████▍ | 239382/371472 [8:27:37<10:37:45, 3.45it/s] 64%|██████▍ | 239383/371472 [8:27:37<10:35:25, 3.46it/s] 64%|██████▍ | 239384/371472 [8:27:37<11:00:01, 3.34it/s] 64%|██████▍ | 239385/371472 [8:27:38<11:29:40, 3.19it/s] 64%|██████▍ | 239386/371472 [8:27:38<11:30:13, 3.19it/s] 64%|██████▍ | 239387/371472 [8:27:38<12:04:15, 3.04it/s] 64%|██████▍ | 239388/371472 [8:27:39<11:42:37, 3.13it/s] 64%|██████▍ | 239389/371472 [8:27:39<11:23:24, 3.22it/s] 64%|██████▍ | 239390/371472 [8:27:39<11:05:22, 3.31it/s] 64%|██████▍ | 239391/371472 [8:27:40<11:02:17, 3.32it/s] 64%|██████▍ | 239392/371472 [8:27:40<10:51:13, 3.38it/s] 64%|██████▍ | 239393/371472 [8:27:40<10:43:34, 3.42it/s] 64%|██████▍ | 239394/371472 [8:27:41<11:14:08, 3.27it/s] 64%|██████▍ | 239395/371472 [8:27:41<11:06:52, 3.30it/s] 64%|██████▍ | 239396/371472 [8:27:41<11:09:33, 3.29it/s] 64%|██████▍ | 239397/371472 [8:27:42<11:45:02, 3.12it/s] 64%|██████▍ | 239398/371472 [8:27:42<12:21:23, 2.97it/s] 64%|██████▍ | 239399/371472 [8:27:42<12:01:54, 3.05it/s] 64%|██████▍ | 239400/371472 [8:27:42<11:33:28, 3.17it/s] {'loss': 2.7197, 'learning_rate': 4.201555732724256e-07, 'epoch': 10.31} + 64%|██████▍ | 239400/371472 [8:27:42<11:33:28, 3.17it/s] 64%|██████▍ | 239401/371472 [8:27:43<11:10:11, 3.28it/s] 64%|██████▍ | 239402/371472 [8:27:43<11:13:01, 3.27it/s] 64%|██████▍ | 239403/371472 [8:27:43<10:54:08, 3.36it/s] 64%|██████▍ | 239404/371472 [8:27:44<10:41:05, 3.43it/s] 64%|██████▍ | 239405/371472 [8:27:44<10:30:25, 3.49it/s] 64%|██████▍ | 239406/371472 [8:27:44<11:10:46, 3.28it/s] 64%|██████▍ | 239407/371472 [8:27:45<10:52:55, 3.37it/s] 64%|██████▍ | 239408/371472 [8:27:45<10:45:11, 3.41it/s] 64%|██████▍ | 239409/371472 [8:27:45<10:45:10, 3.41it/s] 64%|██████▍ | 239410/371472 [8:27:45<10:51:27, 3.38it/s] 64%|██████▍ | 239411/371472 [8:27:46<10:47:33, 3.40it/s] 64%|██████▍ | 239412/371472 [8:27:46<11:57:37, 3.07it/s] 64%|██████▍ | 239413/371472 [8:27:46<11:24:06, 3.22it/s] 64%|██████▍ | 239414/371472 [8:27:47<11:15:54, 3.26it/s] 64%|██████▍ | 239415/371472 [8:27:47<11:13:32, 3.27it/s] 64%|██████▍ | 239416/371472 [8:27:47<11:06:38, 3.30it/s] 64%|██████▍ | 239417/371472 [8:27:48<11:23:59, 3.22it/s] 64%|██████▍ | 239418/371472 [8:27:48<10:58:34, 3.34it/s] 64%|██████▍ | 239419/371472 [8:27:48<10:43:36, 3.42it/s] 64%|██████▍ | 239420/371472 [8:27:48<10:34:45, 3.47it/s] {'loss': 2.7259, 'learning_rate': 4.2010709129694667e-07, 'epoch': 10.31} + 64%|██████▍ | 239420/371472 [8:27:48<10:34:45, 3.47it/s] 64%|██████▍ | 239421/371472 [8:27:49<10:38:03, 3.45it/s] 64%|██████▍ | 239422/371472 [8:27:49<10:46:18, 3.41it/s] 64%|██████▍ | 239423/371472 [8:27:49<10:46:02, 3.41it/s] 64%|██████▍ | 239424/371472 [8:27:50<11:11:20, 3.28it/s] 64%|██████▍ | 239425/371472 [8:27:50<11:18:34, 3.24it/s] 64%|██████▍ | 239426/371472 [8:27:50<11:40:35, 3.14it/s] 64%|██████▍ | 239427/371472 [8:27:51<11:28:57, 3.19it/s] 64%|██████▍ | 239428/371472 [8:27:51<11:11:44, 3.28it/s] 64%|██████▍ | 239429/371472 [8:27:51<11:30:23, 3.19it/s] 64%|██████▍ | 239430/371472 [8:27:52<11:19:42, 3.24it/s] 64%|██████▍ | 239431/371472 [8:27:52<11:09:22, 3.29it/s] 64%|██████▍ | 239432/371472 [8:27:52<10:45:48, 3.41it/s] 64%|██████▍ | 239433/371472 [8:27:52<10:41:07, 3.43it/s] 64%|██████▍ | 239434/371472 [8:27:53<12:10:44, 3.01it/s] 64%|██████▍ | 239435/371472 [8:27:53<11:36:04, 3.16it/s] 64%|██████▍ | 239436/371472 [8:27:53<11:22:29, 3.22it/s] 64%|██████▍ | 239437/371472 [8:27:54<11:05:50, 3.30it/s] 64%|██████▍ | 239438/371472 [8:27:54<11:50:41, 3.10it/s] 64%|██████▍ | 239439/371472 [8:27:54<11:30:51, 3.19it/s] 64%|██████▍ | 239440/371472 [8:27:55<11:47:16, 3.11it/s] {'loss': 2.6876, 'learning_rate': 4.200586093214678e-07, 'epoch': 10.31} + 64%|██████▍ | 239440/371472 [8:27:55<11:47:16, 3.11it/s] 64%|██████▍ | 239441/371472 [8:27:55<11:27:47, 3.20it/s] 64%|██████▍ | 239442/371472 [8:27:55<11:15:04, 3.26it/s] 64%|██████▍ | 239443/371472 [8:27:56<11:22:39, 3.22it/s] 64%|██████▍ | 239444/371472 [8:27:56<11:11:37, 3.28it/s] 64%|██████▍ | 239445/371472 [8:27:56<11:03:11, 3.32it/s] 64%|██████▍ | 239446/371472 [8:27:56<10:56:05, 3.35it/s] 64%|██████▍ | 239447/371472 [8:27:57<11:26:12, 3.21it/s] 64%|██████▍ | 239448/371472 [8:27:57<11:29:26, 3.19it/s] 64%|██████▍ | 239449/371472 [8:27:57<11:14:24, 3.26it/s] 64%|██████▍ | 239450/371472 [8:27:58<11:06:33, 3.30it/s] 64%|██████▍ | 239451/371472 [8:27:58<11:13:33, 3.27it/s] 64%|██████▍ | 239452/371472 [8:27:58<10:56:29, 3.35it/s] 64%|██████▍ | 239453/371472 [8:27:59<11:04:21, 3.31it/s] 64%|██████▍ | 239454/371472 [8:27:59<12:00:51, 3.05it/s] 64%|██████▍ | 239455/371472 [8:27:59<11:56:40, 3.07it/s] 64%|██████▍ | 239456/371472 [8:28:00<11:28:00, 3.20it/s] 64%|██████▍ | 239457/371472 [8:28:00<11:17:45, 3.25it/s] 64%|██████▍ | 239458/371472 [8:28:00<11:21:12, 3.23it/s] 64%|██████▍ | 239459/371472 [8:28:00<10:51:28, 3.38it/s] 64%|██████▍ | 239460/371472 [8:28:01<10:52:59, 3.37it/s] {'loss': 2.5693, 'learning_rate': 4.200101273459889e-07, 'epoch': 10.31} + 64%|██████▍ | 239460/371472 [8:28:01<10:52:59, 3.37it/s] 64%|██████▍ | 239461/371472 [8:28:01<10:45:00, 3.41it/s] 64%|██████▍ | 239462/371472 [8:28:01<10:55:38, 3.36it/s] 64%|██████▍ | 239463/371472 [8:28:02<10:49:46, 3.39it/s] 64%|██████▍ | 239464/371472 [8:28:02<11:14:38, 3.26it/s] 64%|██████▍ | 239465/371472 [8:28:02<11:01:03, 3.33it/s] 64%|██████▍ | 239466/371472 [8:28:03<11:15:05, 3.26it/s] 64%|██████▍ | 239467/371472 [8:28:03<11:54:04, 3.08it/s] 64%|██████▍ | 239468/371472 [8:28:03<11:16:24, 3.25it/s] 64%|██████▍ | 239469/371472 [8:28:03<10:51:13, 3.38it/s] 64%|██████▍ | 239470/371472 [8:28:04<10:38:58, 3.44it/s] 64%|██████▍ | 239471/371472 [8:28:04<10:17:39, 3.56it/s] 64%|██████▍ | 239472/371472 [8:28:04<10:38:13, 3.45it/s] 64%|██████▍ | 239473/371472 [8:28:05<10:32:02, 3.48it/s] 64%|██████▍ | 239474/371472 [8:28:05<11:10:15, 3.28it/s] 64%|██████▍ | 239475/371472 [8:28:05<11:27:17, 3.20it/s] 64%|██████▍ | 239476/371472 [8:28:06<11:01:33, 3.33it/s] 64%|██████▍ | 239477/371472 [8:28:06<10:53:06, 3.37it/s] 64%|██████▍ | 239478/371472 [8:28:06<10:40:59, 3.43it/s] 64%|██████▍ | 239479/371472 [8:28:06<10:33:02, 3.48it/s] 64%|██████▍ | 239480/371472 [8:28:07<10:47:29, 3.40it/s] {'loss': 2.6567, 'learning_rate': 4.1996164537051005e-07, 'epoch': 10.31} + 64%|██████▍ | 239480/371472 [8:28:07<10:47:29, 3.40it/s] 64%|██████▍ | 239481/371472 [8:28:07<10:32:46, 3.48it/s] 64%|██████▍ | 239482/371472 [8:28:07<10:40:44, 3.43it/s] 64%|██████▍ | 239483/371472 [8:28:08<10:58:13, 3.34it/s] 64%|██████▍ | 239484/371472 [8:28:08<10:35:40, 3.46it/s] 64%|██████▍ | 239485/371472 [8:28:08<10:40:40, 3.43it/s] 64%|██████▍ | 239486/371472 [8:28:08<10:47:29, 3.40it/s] 64%|██████▍ | 239487/371472 [8:28:09<10:46:11, 3.40it/s] 64%|██████▍ | 239488/371472 [8:28:09<11:05:35, 3.30it/s] 64%|██████▍ | 239489/371472 [8:28:09<11:03:37, 3.31it/s] 64%|██████▍ | 239490/371472 [8:28:10<10:53:18, 3.37it/s] 64%|██████▍ | 239491/371472 [8:28:10<10:49:46, 3.39it/s] 64%|██████▍ | 239492/371472 [8:28:10<11:00:45, 3.33it/s] 64%|██████▍ | 239493/371472 [8:28:11<10:49:20, 3.39it/s] 64%|██████▍ | 239494/371472 [8:28:11<10:48:09, 3.39it/s] 64%|██████▍ | 239495/371472 [8:28:11<10:39:37, 3.44it/s] 64%|██████▍ | 239496/371472 [8:28:11<10:33:21, 3.47it/s] 64%|██████▍ | 239497/371472 [8:28:12<10:35:20, 3.46it/s] 64%|██████▍ | 239498/371472 [8:28:12<10:41:45, 3.43it/s] 64%|██████▍ | 239499/371472 [8:28:12<11:09:22, 3.29it/s] 64%|██████▍ | 239500/371472 [8:28:13<10:59:15, 3.34it/s] {'loss': 2.8098, 'learning_rate': 4.1991316339503107e-07, 'epoch': 10.32} + 64%|██████▍ | 239500/371472 [8:28:13<10:59:15, 3.34it/s] 64%|██████▍ | 239501/371472 [8:28:13<10:52:27, 3.37it/s] 64%|██████▍ | 239502/371472 [8:28:13<10:51:13, 3.38it/s] 64%|██████▍ | 239503/371472 [8:28:14<11:36:48, 3.16it/s] 64%|██████▍ | 239504/371472 [8:28:14<11:14:10, 3.26it/s] 64%|██████▍ | 239505/371472 [8:28:14<11:17:04, 3.25it/s] 64%|██████▍ | 239506/371472 [8:28:14<10:57:46, 3.34it/s] 64%|██████▍ | 239507/371472 [8:28:15<10:46:59, 3.40it/s] 64%|██████▍ | 239508/371472 [8:28:15<10:52:10, 3.37it/s] 64%|██████▍ | 239509/371472 [8:28:15<10:33:48, 3.47it/s] 64%|██████▍ | 239510/371472 [8:28:16<10:39:29, 3.44it/s] 64%|██████▍ | 239511/371472 [8:28:16<10:32:21, 3.48it/s] 64%|██████▍ | 239512/371472 [8:28:16<10:41:57, 3.43it/s] 64%|██████▍ | 239513/371472 [8:28:16<10:43:46, 3.42it/s] 64%|██████▍ | 239514/371472 [8:28:17<10:22:19, 3.53it/s] 64%|██████▍ | 239515/371472 [8:28:17<10:47:22, 3.40it/s] 64%|██████▍ | 239516/371472 [8:28:17<11:01:32, 3.32it/s] 64%|██████▍ | 239517/371472 [8:28:18<10:59:35, 3.33it/s] 64%|██████▍ | 239518/371472 [8:28:18<11:06:33, 3.30it/s] 64%|██████▍ | 239519/371472 [8:28:18<10:58:33, 3.34it/s] 64%|██████▍ | 239520/371472 [8:28:19<10:43:55, 3.42it/s] {'loss': 2.6968, 'learning_rate': 4.1986468141955224e-07, 'epoch': 10.32} + 64%|██████▍ | 239520/371472 [8:28:19<10:43:55, 3.42it/s] 64%|██████▍ | 239521/371472 [8:28:19<10:36:31, 3.45it/s] 64%|██████▍ | 239522/371472 [8:28:19<10:28:56, 3.50it/s] 64%|██████▍ | 239523/371472 [8:28:19<10:30:37, 3.49it/s] 64%|██████▍ | 239524/371472 [8:28:20<10:23:39, 3.53it/s] 64%|██████▍ | 239525/371472 [8:28:20<10:39:24, 3.44it/s] 64%|██████▍ | 239526/371472 [8:28:20<10:56:32, 3.35it/s] 64%|██████▍ | 239527/371472 [8:28:21<10:41:34, 3.43it/s] 64%|██████▍ | 239528/371472 [8:28:21<10:30:47, 3.49it/s] 64%|██████▍ | 239529/371472 [8:28:21<10:21:36, 3.54it/s] 64%|██████▍ | 239530/371472 [8:28:21<10:17:59, 3.56it/s] 64%|██████▍ | 239531/371472 [8:28:22<10:05:05, 3.63it/s] 64%|██████▍ | 239532/371472 [8:28:22<10:22:53, 3.53it/s] 64%|██████▍ | 239533/371472 [8:28:22<10:39:07, 3.44it/s] 64%|██████▍ | 239534/371472 [8:28:23<10:44:05, 3.41it/s] 64%|██████▍ | 239535/371472 [8:28:23<10:30:06, 3.49it/s] 64%|██████▍ | 239536/371472 [8:28:23<10:19:54, 3.55it/s] 64%|██████▍ | 239537/371472 [8:28:23<10:49:17, 3.39it/s] 64%|██████▍ | 239538/371472 [8:28:24<10:53:34, 3.36it/s] 64%|██████▍ | 239539/371472 [8:28:24<10:35:01, 3.46it/s] 64%|██████▍ | 239540/371472 [8:28:24<11:22:49, 3.22it/s] {'loss': 2.804, 'learning_rate': 4.198161994440733e-07, 'epoch': 10.32} + 64%|██████▍ | 239540/371472 [8:28:24<11:22:49, 3.22it/s] 64%|██████▍ | 239541/371472 [8:28:25<11:01:55, 3.32it/s] 64%|██████▍ | 239542/371472 [8:28:25<10:32:29, 3.48it/s] 64%|██████▍ | 239543/371472 [8:28:25<10:35:43, 3.46it/s] 64%|██████▍ | 239544/371472 [8:28:25<10:29:51, 3.49it/s] 64%|██████▍ | 239545/371472 [8:28:26<11:01:58, 3.32it/s] 64%|██████▍ | 239546/371472 [8:28:26<10:45:52, 3.40it/s] 64%|██████▍ | 239547/371472 [8:28:26<10:39:15, 3.44it/s] 64%|██████▍ | 239548/371472 [8:28:27<10:50:35, 3.38it/s] 64%|██████▍ | 239549/371472 [8:28:27<10:37:42, 3.45it/s] 64%|██████▍ | 239550/371472 [8:28:27<10:42:44, 3.42it/s] 64%|██████▍ | 239551/371472 [8:28:28<10:40:52, 3.43it/s] 64%|██████▍ | 239552/371472 [8:28:28<10:33:05, 3.47it/s] 64%|██████▍ | 239553/371472 [8:28:28<10:39:02, 3.44it/s] 64%|██████▍ | 239554/371472 [8:28:28<11:03:56, 3.31it/s] 64%|██████▍ | 239555/371472 [8:28:29<10:57:26, 3.34it/s] 64%|██████▍ | 239556/371472 [8:28:29<10:52:51, 3.37it/s] 64%|██████▍ | 239557/371472 [8:28:29<10:38:34, 3.44it/s] 64%|██████▍ | 239558/371472 [8:28:30<10:18:13, 3.56it/s] 64%|██████▍ | 239559/371472 [8:28:30<10:25:19, 3.52it/s] 64%|██████▍ | 239560/371472 [8:28:30<10:33:58, 3.47it/s] {'loss': 2.7836, 'learning_rate': 4.1976771746859444e-07, 'epoch': 10.32} + 64%|██████▍ | 239560/371472 [8:28:30<10:33:58, 3.47it/s] 64%|██████▍ | 239561/371472 [8:28:30<10:43:19, 3.42it/s] 64%|██████▍ | 239562/371472 [8:28:31<10:48:00, 3.39it/s] 64%|██████▍ | 239563/371472 [8:28:31<10:38:59, 3.44it/s] 64%|██████▍ | 239564/371472 [8:28:31<10:48:34, 3.39it/s] 64%|██████▍ | 239565/371472 [8:28:32<10:18:24, 3.55it/s] 64%|██████▍ | 239566/371472 [8:28:32<10:57:53, 3.34it/s] 64%|██████▍ | 239567/371472 [8:28:32<10:42:48, 3.42it/s] 64%|██████▍ | 239568/371472 [8:28:33<10:57:56, 3.34it/s] 64%|██████▍ | 239569/371472 [8:28:33<10:38:10, 3.44it/s] 64%|██████▍ | 239570/371472 [8:28:33<10:55:02, 3.36it/s] 64%|██████▍ | 239571/371472 [8:28:34<11:57:00, 3.07it/s] 64%|██████▍ | 239572/371472 [8:28:34<11:44:45, 3.12it/s] 64%|██████▍ | 239573/371472 [8:28:34<11:51:07, 3.09it/s] 64%|██████▍ | 239574/371472 [8:28:34<11:13:01, 3.27it/s] 64%|██████▍ | 239575/371472 [8:28:35<11:08:38, 3.29it/s] 64%|██████▍ | 239576/371472 [8:28:35<10:54:34, 3.36it/s] 64%|██████▍ | 239577/371472 [8:28:35<10:37:19, 3.45it/s] 64%|██████▍ | 239578/371472 [8:28:36<10:12:33, 3.59it/s] 64%|██████▍ | 239579/371472 [8:28:36<10:30:01, 3.49it/s] 64%|██████▍ | 239580/371472 [8:28:36<10:28:17, 3.50it/s] {'loss': 2.7852, 'learning_rate': 4.197192354931155e-07, 'epoch': 10.32} + 64%|██████▍ | 239580/371472 [8:28:36<10:28:17, 3.50it/s] 64%|██████▍ | 239581/371472 [8:28:36<10:38:18, 3.44it/s] 64%|██████▍ | 239582/371472 [8:28:37<11:02:31, 3.32it/s] 64%|██████▍ | 239583/371472 [8:28:37<11:09:06, 3.29it/s] 64%|██████▍ | 239584/371472 [8:28:37<11:12:25, 3.27it/s] 64%|██████▍ | 239585/371472 [8:28:38<12:06:29, 3.03it/s] 64%|██████▍ | 239586/371472 [8:28:38<11:34:00, 3.17it/s] 64%|██████▍ | 239587/371472 [8:28:38<11:05:17, 3.30it/s] 64%|██████▍ | 239588/371472 [8:28:39<10:42:53, 3.42it/s] 64%|██████▍ | 239589/371472 [8:28:39<10:30:15, 3.49it/s] 64%|██████▍ | 239590/371472 [8:28:39<11:16:48, 3.25it/s] 64%|██████▍ | 239591/371472 [8:28:39<11:08:18, 3.29it/s] 64%|██████▍ | 239592/371472 [8:28:40<10:58:09, 3.34it/s] 64%|██████▍ | 239593/371472 [8:28:40<11:07:15, 3.29it/s] 64%|██████▍ | 239594/371472 [8:28:40<12:00:49, 3.05it/s] 64%|██████▍ | 239595/371472 [8:28:41<11:30:25, 3.18it/s] 64%|██████▍ | 239596/371472 [8:28:41<11:09:23, 3.28it/s] 64%|██████▍ | 239597/371472 [8:28:41<11:04:14, 3.31it/s] 64%|██████▍ | 239598/371472 [8:28:42<10:53:27, 3.36it/s] 64%|██████▍ | 239599/371472 [8:28:42<11:03:23, 3.31it/s] 65%|██████▍ | 239600/371472 [8:28:42<11:05:54, 3.30it/s] {'loss': 2.7026, 'learning_rate': 4.196707535176367e-07, 'epoch': 10.32} + 65%|██████▍ | 239600/371472 [8:28:42<11:05:54, 3.30it/s] 65%|██████▍ | 239601/371472 [8:28:43<11:14:13, 3.26it/s] 65%|██████▍ | 239602/371472 [8:28:43<11:12:59, 3.27it/s] 65%|██████▍ | 239603/371472 [8:28:43<12:02:55, 3.04it/s] 65%|██████▍ | 239604/371472 [8:28:44<12:34:55, 2.91it/s] 65%|██████▍ | 239605/371472 [8:28:44<12:36:54, 2.90it/s] 65%|██████▍ | 239606/371472 [8:28:44<12:47:30, 2.86it/s] 65%|██████▍ | 239607/371472 [8:28:45<12:47:19, 2.86it/s] 65%|██████▍ | 239608/371472 [8:28:45<12:19:44, 2.97it/s] 65%|██████▍ | 239609/371472 [8:28:45<11:58:54, 3.06it/s] 65%|██████▍ | 239610/371472 [8:28:46<12:09:25, 3.01it/s] 65%|██████▍ | 239611/371472 [8:28:46<11:29:53, 3.19it/s] 65%|██████▍ | 239612/371472 [8:28:46<11:35:51, 3.16it/s] 65%|██████▍ | 239613/371472 [8:28:47<11:10:38, 3.28it/s] 65%|██████▍ | 239614/371472 [8:28:47<11:37:26, 3.15it/s] 65%|██████▍ | 239615/371472 [8:28:47<11:10:29, 3.28it/s] 65%|██████▍ | 239616/371472 [8:28:47<11:11:20, 3.27it/s] 65%|██████▍ | 239617/371472 [8:28:48<10:57:46, 3.34it/s] 65%|██████▍ | 239618/371472 [8:28:48<10:51:26, 3.37it/s] 65%|██████▍ | 239619/371472 [8:28:48<10:47:57, 3.39it/s] 65%|██████▍ | 239620/371472 [8:28:49<11:35:55, 3.16it/s] {'loss': 2.6644, 'learning_rate': 4.196222715421577e-07, 'epoch': 10.32} + 65%|██████▍ | 239620/371472 [8:28:49<11:35:55, 3.16it/s] 65%|██████▍ | 239621/371472 [8:28:49<11:12:20, 3.27it/s] 65%|██████▍ | 239622/371472 [8:28:49<11:04:00, 3.31it/s] 65%|██████▍ | 239623/371472 [8:28:50<11:03:46, 3.31it/s] 65%|██████▍ | 239624/371472 [8:28:50<10:53:42, 3.36it/s] 65%|██████▍ | 239625/371472 [8:28:50<11:38:15, 3.15it/s] 65%|██████▍ | 239626/371472 [8:28:50<11:26:59, 3.20it/s] 65%|██████▍ | 239627/371472 [8:28:51<12:55:30, 2.83it/s] 65%|██████▍ | 239628/371472 [8:28:51<12:49:55, 2.85it/s] 65%|██████▍ | 239629/371472 [8:28:52<12:16:01, 2.99it/s] 65%|██████▍ | 239630/371472 [8:28:52<12:02:42, 3.04it/s] 65%|██████▍ | 239631/371472 [8:28:52<12:06:12, 3.03it/s] 65%|██████▍ | 239632/371472 [8:28:53<11:31:16, 3.18it/s] 65%|██████▍ | 239633/371472 [8:28:53<11:20:45, 3.23it/s] 65%|██████▍ | 239634/371472 [8:28:53<11:12:08, 3.27it/s] 65%|██████▍ | 239635/371472 [8:28:53<10:58:32, 3.34it/s] 65%|██████▍ | 239636/371472 [8:28:54<11:34:12, 3.17it/s] 65%|██████▍ | 239637/371472 [8:28:54<11:27:25, 3.20it/s] 65%|██████▍ | 239638/371472 [8:28:54<11:56:47, 3.07it/s] 65%|██████▍ | 239639/371472 [8:28:55<11:23:37, 3.21it/s] 65%|██████▍ | 239640/371472 [8:28:55<11:03:13, 3.31it/s] {'loss': 2.5679, 'learning_rate': 4.195737895666789e-07, 'epoch': 10.32} + 65%|██████▍ | 239640/371472 [8:28:55<11:03:13, 3.31it/s] 65%|██████▍ | 239641/371472 [8:28:55<11:19:41, 3.23it/s] 65%|██████▍ | 239642/371472 [8:28:56<11:13:10, 3.26it/s] 65%|██████▍ | 239643/371472 [8:28:56<11:01:31, 3.32it/s] 65%|██████▍ | 239644/371472 [8:28:56<11:44:24, 3.12it/s] 65%|██████▍ | 239645/371472 [8:28:57<11:37:28, 3.15it/s] 65%|██████▍ | 239646/371472 [8:28:57<11:08:05, 3.29it/s] 65%|██████▍ | 239647/371472 [8:28:57<10:53:29, 3.36it/s] 65%|██████▍ | 239648/371472 [8:28:57<10:49:10, 3.38it/s] 65%|██████▍ | 239649/371472 [8:28:58<10:45:54, 3.40it/s] 65%|██████▍ | 239650/371472 [8:28:58<10:48:44, 3.39it/s] 65%|██████▍ | 239651/371472 [8:28:58<10:37:30, 3.45it/s] 65%|██████▍ | 239652/371472 [8:28:59<10:40:30, 3.43it/s] 65%|██████▍ | 239653/371472 [8:28:59<10:38:55, 3.44it/s] 65%|██████▍ | 239654/371472 [8:28:59<10:34:27, 3.46it/s] 65%|██████▍ | 239655/371472 [8:28:59<10:51:26, 3.37it/s] 65%|██████▍ | 239656/371472 [8:29:00<10:46:50, 3.40it/s] 65%|█████���▍ | 239657/371472 [8:29:00<10:46:25, 3.40it/s] 65%|██████▍ | 239658/371472 [8:29:00<11:17:03, 3.24it/s] 65%|██████▍ | 239659/371472 [8:29:01<11:02:53, 3.31it/s] 65%|██████▍ | 239660/371472 [8:29:01<11:20:25, 3.23it/s] {'loss': 2.712, 'learning_rate': 4.1952530759119995e-07, 'epoch': 10.32} + 65%|██████▍ | 239660/371472 [8:29:01<11:20:25, 3.23it/s] 65%|██████▍ | 239661/371472 [8:29:01<11:06:02, 3.30it/s] 65%|██████▍ | 239662/371472 [8:29:02<11:04:08, 3.31it/s] 65%|██████▍ | 239663/371472 [8:29:02<10:58:39, 3.34it/s] 65%|██████▍ | 239664/371472 [8:29:02<10:49:41, 3.38it/s] 65%|██████▍ | 239665/371472 [8:29:02<11:01:41, 3.32it/s] 65%|██████▍ | 239666/371472 [8:29:03<10:57:38, 3.34it/s] 65%|██████▍ | 239667/371472 [8:29:03<10:49:04, 3.38it/s] 65%|██████▍ | 239668/371472 [8:29:03<10:59:15, 3.33it/s] 65%|██████▍ | 239669/371472 [8:29:04<11:01:05, 3.32it/s] 65%|██████▍ | 239670/371472 [8:29:04<10:45:02, 3.41it/s] 65%|██████▍ | 239671/371472 [8:29:04<10:24:18, 3.52it/s] 65%|██████▍ | 239672/371472 [8:29:05<11:34:46, 3.16it/s] 65%|██████▍ | 239673/371472 [8:29:05<11:15:23, 3.25it/s] 65%|██████▍ | 239674/371472 [8:29:05<11:04:49, 3.30it/s] 65%|██████▍ | 239675/371472 [8:29:05<10:44:39, 3.41it/s] 65%|██████▍ | 239676/371472 [8:29:06<10:26:07, 3.51it/s] 65%|██████▍ | 239677/371472 [8:29:06<10:17:36, 3.56it/s] 65%|██████▍ | 239678/371472 [8:29:06<10:26:08, 3.51it/s] 65%|██████▍ | 239679/371472 [8:29:07<10:42:18, 3.42it/s] 65%|██████▍ | 239680/371472 [8:29:07<10:43:36, 3.41it/s] {'loss': 2.7454, 'learning_rate': 4.194768256157211e-07, 'epoch': 10.32} + 65%|██████▍ | 239680/371472 [8:29:07<10:43:36, 3.41it/s] 65%|██████▍ | 239681/371472 [8:29:07<10:44:41, 3.41it/s] 65%|██████▍ | 239682/371472 [8:29:07<11:00:37, 3.32it/s] 65%|██████▍ | 239683/371472 [8:29:08<10:40:46, 3.43it/s] 65%|██████▍ | 239684/371472 [8:29:08<11:02:45, 3.31it/s] 65%|██████▍ | 239685/371472 [8:29:08<11:04:14, 3.31it/s] 65%|██████▍ | 239686/371472 [8:29:09<10:59:42, 3.33it/s] 65%|██████▍ | 239687/371472 [8:29:09<10:53:37, 3.36it/s] 65%|██████▍ | 239688/371472 [8:29:09<10:41:24, 3.42it/s] 65%|██████▍ | 239689/371472 [8:29:10<10:36:28, 3.45it/s] 65%|██████▍ | 239690/371472 [8:29:10<10:21:20, 3.53it/s] 65%|██████▍ | 239691/371472 [8:29:10<10:23:36, 3.52it/s] 65%|██████▍ | 239692/371472 [8:29:10<10:32:24, 3.47it/s] 65%|██████▍ | 239693/371472 [8:29:11<10:31:50, 3.48it/s] 65%|██████▍ | 239694/371472 [8:29:11<10:39:57, 3.43it/s] 65%|██████▍ | 239695/371472 [8:29:11<10:40:54, 3.43it/s] 65%|██████▍ | 239696/371472 [8:29:12<10:44:38, 3.41it/s] 65%|██████▍ | 239697/371472 [8:29:12<10:31:29, 3.48it/s] 65%|██████▍ | 239698/371472 [8:29:12<10:22:35, 3.53it/s] 65%|██████▍ | 239699/371472 [8:29:12<11:20:04, 3.23it/s] 65%|██████▍ | 239700/371472 [8:29:13<10:59:38, 3.33it/s] {'loss': 2.8028, 'learning_rate': 4.1942834364024215e-07, 'epoch': 10.32} + 65%|██████▍ | 239700/371472 [8:29:13<10:59:38, 3.33it/s] 65%|██████▍ | 239701/371472 [8:29:13<10:57:26, 3.34it/s] 65%|██████▍ | 239702/371472 [8:29:13<11:01:50, 3.32it/s] 65%|██████▍ | 239703/371472 [8:29:14<11:22:14, 3.22it/s] 65%|██████▍ | 239704/371472 [8:29:14<11:12:48, 3.26it/s] 65%|██████▍ | 239705/371472 [8:29:14<10:57:52, 3.34it/s] 65%|██████▍ | 239706/371472 [8:29:15<10:45:05, 3.40it/s] 65%|██████▍ | 239707/371472 [8:29:15<10:50:26, 3.38it/s] 65%|██████▍ | 239708/371472 [8:29:15<11:24:53, 3.21it/s] 65%|██████▍ | 239709/371472 [8:29:16<11:18:13, 3.24it/s] 65%|██████▍ | 239710/371472 [8:29:16<11:11:42, 3.27it/s] 65%|██████▍ | 239711/371472 [8:29:16<10:59:03, 3.33it/s] 65%|██████▍ | 239712/371472 [8:29:16<10:56:24, 3.35it/s] 65%|██████▍ | 239713/371472 [8:29:17<10:47:39, 3.39it/s] 65%|██████▍ | 239714/371472 [8:29:17<10:46:28, 3.40it/s] 65%|██████▍ | 239715/371472 [8:29:17<11:03:27, 3.31it/s] 65%|██████▍ | 239716/371472 [8:29:18<11:53:28, 3.08it/s] 65%|��█████▍ | 239717/371472 [8:29:18<11:53:09, 3.08it/s] 65%|██████▍ | 239718/371472 [8:29:18<11:26:52, 3.20it/s] 65%|██████▍ | 239719/371472 [8:29:19<11:09:02, 3.28it/s] 65%|██████▍ | 239720/371472 [8:29:19<10:59:10, 3.33it/s] {'loss': 2.7481, 'learning_rate': 4.193798616647633e-07, 'epoch': 10.33} + 65%|██████▍ | 239720/371472 [8:29:19<10:59:10, 3.33it/s] 65%|██████▍ | 239721/371472 [8:29:19<11:40:45, 3.13it/s] 65%|██████▍ | 239722/371472 [8:29:20<11:46:06, 3.11it/s] 65%|██████▍ | 239723/371472 [8:29:20<11:19:47, 3.23it/s] 65%|██████▍ | 239724/371472 [8:29:20<11:07:02, 3.29it/s] 65%|██████▍ | 239725/371472 [8:29:20<11:05:31, 3.30it/s] 65%|██████▍ | 239726/371472 [8:29:21<11:11:20, 3.27it/s] 65%|██████▍ | 239727/371472 [8:29:21<11:00:20, 3.33it/s] 65%|██████▍ | 239728/371472 [8:29:21<10:43:00, 3.41it/s] 65%|██████▍ | 239729/371472 [8:29:22<10:41:23, 3.42it/s] 65%|██████▍ | 239730/371472 [8:29:22<10:49:23, 3.38it/s] 65%|██████▍ | 239731/371472 [8:29:22<11:36:57, 3.15it/s] 65%|██████▍ | 239732/371472 [8:29:23<11:02:33, 3.31it/s] 65%|██████▍ | 239733/371472 [8:29:23<11:49:04, 3.10it/s] 65%|██████▍ | 239734/371472 [8:29:23<11:30:47, 3.18it/s] 65%|██████▍ | 239735/371472 [8:29:24<11:22:56, 3.21it/s] 65%|██████▍ | 239736/371472 [8:29:24<12:21:34, 2.96it/s] 65%|██████▍ | 239737/371472 [8:29:24<11:53:30, 3.08it/s] 65%|██████▍ | 239738/371472 [8:29:24<11:38:39, 3.14it/s] 65%|██████▍ | 239739/371472 [8:29:25<11:28:31, 3.19it/s] 65%|██████▍ | 239740/371472 [8:29:25<11:02:25, 3.31it/s] {'loss': 2.8263, 'learning_rate': 4.1933137968928435e-07, 'epoch': 10.33} + 65%|██████▍ | 239740/371472 [8:29:25<11:02:25, 3.31it/s] 65%|██████▍ | 239741/371472 [8:29:25<11:26:11, 3.20it/s] 65%|██████▍ | 239742/371472 [8:29:26<11:33:41, 3.16it/s] 65%|██████▍ | 239743/371472 [8:29:26<11:23:26, 3.21it/s] 65%|██████▍ | 239744/371472 [8:29:26<11:13:37, 3.26it/s] 65%|██████▍ | 239745/371472 [8:29:27<11:11:37, 3.27it/s] 65%|██████▍ | 239746/371472 [8:29:27<11:07:20, 3.29it/s] 65%|██████▍ | 239747/371472 [8:29:27<10:50:48, 3.37it/s] 65%|██████▍ | 239748/371472 [8:29:28<11:29:39, 3.18it/s] 65%|██████▍ | 239749/371472 [8:29:28<11:17:44, 3.24it/s] 65%|██████▍ | 239750/371472 [8:29:28<11:09:32, 3.28it/s] 65%|██████▍ | 239751/371472 [8:29:28<11:09:55, 3.28it/s] 65%|██████▍ | 239752/371472 [8:29:29<10:55:19, 3.35it/s] 65%|██████▍ | 239753/371472 [8:29:29<10:54:49, 3.35it/s] 65%|██████▍ | 239754/371472 [8:29:29<11:16:55, 3.24it/s] 65%|██████▍ | 239755/371472 [8:29:30<11:02:33, 3.31it/s] 65%|██████▍ | 239756/371472 [8:29:30<11:02:41, 3.31it/s] 65%|██████▍ | 239757/371472 [8:29:30<10:52:01, 3.37it/s] 65%|██████▍ | 239758/371472 [8:29:31<11:00:07, 3.33it/s] 65%|██████▍ | 239759/371472 [8:29:31<11:56:46, 3.06it/s] 65%|██████▍ | 239760/371472 [8:29:31<11:23:48, 3.21it/s] {'loss': 2.8311, 'learning_rate': 4.192828977138055e-07, 'epoch': 10.33} + 65%|██████▍ | 239760/371472 [8:29:31<11:23:48, 3.21it/s] 65%|██████▍ | 239761/371472 [8:29:32<11:08:22, 3.28it/s] 65%|██████▍ | 239762/371472 [8:29:32<11:05:22, 3.30it/s] 65%|██████▍ | 239763/371472 [8:29:32<11:04:22, 3.30it/s] 65%|██████▍ | 239764/371472 [8:29:32<10:57:10, 3.34it/s] 65%|██████▍ | 239765/371472 [8:29:33<10:56:12, 3.35it/s] 65%|██████▍ | 239766/371472 [8:29:33<11:10:49, 3.27it/s] 65%|██████▍ | 239767/371472 [8:29:33<11:26:38, 3.20it/s] 65%|██████▍ | 239768/371472 [8:29:34<11:26:10, 3.20it/s] 65%|██████▍ | 239769/371472 [8:29:34<11:10:35, 3.27it/s] 65%|██████▍ | 239770/371472 [8:29:34<10:58:35, 3.33it/s] 65%|██████▍ | 239771/371472 [8:29:35<11:04:35, 3.30it/s] 65%|██████▍ | 239772/371472 [8:29:35<11:43:01, 3.12it/s] 65%|██████▍ | 239773/371472 [8:29:35<12:14:14, 2.99it/s] 65%|██████▍ | 239774/371472 [8:29:36<11:51:34, 3.08it/s] 65%|██████▍ | 239775/371472 [8:29:36<11:29:39, 3.18it/s] 65%|██████▍ | 239776/371472 [8:29:36<11:12:36, 3.26it/s] 65%|██████▍ | 239777/371472 [8:29:36<11:11:44, 3.27it/s] 65%|██████▍ | 239778/371472 [8:29:37<10:41:33, 3.42it/s] 65%|██████▍ | 239779/371472 [8:29:37<10:56:20, 3.34it/s] 65%|██████▍ | 239780/371472 [8:29:37<11:57:21, 3.06it/s] {'loss': 2.7306, 'learning_rate': 4.192344157383266e-07, 'epoch': 10.33} + 65%|██████▍ | 239780/371472 [8:29:37<11:57:21, 3.06it/s] 65%|██████▍ | 239781/371472 [8:29:38<12:08:11, 3.01it/s] 65%|██████▍ | 239782/371472 [8:29:38<11:51:50, 3.08it/s] 65%|██████▍ | 239783/371472 [8:29:38<11:43:39, 3.12it/s] 65%|██████▍ | 239784/371472 [8:29:39<12:05:25, 3.03it/s] 65%|██████▍ | 239785/371472 [8:29:39<11:39:36, 3.14it/s] 65%|██████▍ | 239786/371472 [8:29:39<11:28:43, 3.19it/s] 65%|██████▍ | 239787/371472 [8:29:40<11:10:13, 3.27it/s] 65%|██████▍ | 239788/371472 [8:29:40<11:02:54, 3.31it/s] 65%|██████▍ | 239789/371472 [8:29:40<10:58:33, 3.33it/s] 65%|██████▍ | 239790/371472 [8:29:41<11:14:03, 3.26it/s] 65%|██████▍ | 239791/371472 [8:29:41<10:59:30, 3.33it/s] 65%|██████▍ | 239792/371472 [8:29:41<10:57:52, 3.34it/s] 65%|██████▍ | 239793/371472 [8:29:41<11:08:06, 3.28it/s] 65%|██████▍ | 239794/371472 [8:29:42<10:59:27, 3.33it/s] 65%|██████▍ | 239795/371472 [8:29:42<11:25:27, 3.20it/s] 65%|██████▍ | 239796/371472 [8:29:42<11:10:12, 3.27it/s] 65%|██████▍ | 239797/371472 [8:29:43<11:14:24, 3.25it/s] 65%|██████▍ | 239798/371472 [8:29:43<10:56:56, 3.34it/s] 65%|██████▍ | 239799/371472 [8:29:43<10:39:19, 3.43it/s] 65%|██████▍ | 239800/371472 [8:29:44<10:41:14, 3.42it/s] {'loss': 2.7676, 'learning_rate': 4.191859337628477e-07, 'epoch': 10.33} + 65%|██████▍ | 239800/371472 [8:29:44<10:41:14, 3.42it/s] 65%|██████▍ | 239801/371472 [8:29:44<11:00:59, 3.32it/s] 65%|██████▍ | 239802/371472 [8:29:44<10:39:47, 3.43it/s] 65%|██████▍ | 239803/371472 [8:29:45<11:44:38, 3.11it/s] 65%|██████▍ | 239804/371472 [8:29:45<11:24:57, 3.20it/s] 65%|██████▍ | 239805/371472 [8:29:45<11:01:37, 3.32it/s] 65%|██████▍ | 239806/371472 [8:29:45<11:10:42, 3.27it/s] 65%|██████▍ | 239807/371472 [8:29:46<11:23:53, 3.21it/s] 65%|██████▍ | 239808/371472 [8:29:46<11:14:45, 3.25it/s] 65%|██████▍ | 239809/371472 [8:29:46<11:10:02, 3.27it/s] 65%|██████▍ | 239810/371472 [8:29:47<11:27:17, 3.19it/s] 65%|██████▍ | 239811/371472 [8:29:47<11:35:24, 3.16it/s] 65%|██████▍ | 239812/371472 [8:29:47<11:19:17, 3.23it/s] 65%|██████▍ | 239813/371472 [8:29:48<11:31:46, 3.17it/s] 65%|██████▍ | 239814/371472 [8:29:48<11:08:52, 3.28it/s] 65%|██████▍ | 239815/371472 [8:29:48<10:52:02, 3.37it/s] 65%|██████▍ | 239816/371472 [8:29:48<10:36:12, 3.45it/s] 65%|██████▍ | 239817/371472 [8:29:49<10:40:26, 3.43it/s] 65%|██████▍ | 239818/371472 [8:29:49<10:56:37, 3.34it/s] 65%|██████▍ | 239819/371472 [8:29:49<11:14:49, 3.25it/s] 65%|██████▍ | 239820/371472 [8:29:50<11:45:13, 3.11it/s] {'loss': 2.7976, 'learning_rate': 4.191374517873688e-07, 'epoch': 10.33} + 65%|██████▍ | 239820/371472 [8:29:50<11:45:13, 3.11it/s] 65%|██████▍ | 239821/371472 [8:29:50<11:03:45, 3.31it/s] 65%|██████▍ | 239822/371472 [8:29:50<10:41:48, 3.42it/s] 65%|██████▍ | 239823/371472 [8:29:51<10:28:18, 3.49it/s] 65%|██████▍ | 239824/371472 [8:29:51<10:40:01, 3.43it/s] 65%|██████▍ | 239825/371472 [8:29:51<10:40:24, 3.43it/s] 65%|██████▍ | 239826/371472 [8:29:51<10:51:13, 3.37it/s] 65%|██████▍ | 239827/371472 [8:29:52<10:57:22, 3.34it/s] 65%|██████▍ | 239828/371472 [8:29:52<11:00:46, 3.32it/s] 65%|██████▍ | 239829/371472 [8:29:52<10:49:31, 3.38it/s] 65%|██████▍ | 239830/371472 [8:29:53<10:49:41, 3.38it/s] 65%|██████▍ | 239831/371472 [8:29:53<10:43:23, 3.41it/s] 65%|██████▍ | 239832/371472 [8:29:53<10:30:15, 3.48it/s] 65%|██████▍ | 239833/371472 [8:29:54<10:59:58, 3.32it/s] 65%|██████▍ | 239834/371472 [8:29:54<11:09:11, 3.28it/s] 65%|██████▍ | 239835/371472 [8:29:54<10:52:43, 3.36it/s] 65%|██████▍ | 239836/371472 [8:29:54<10:57:01, 3.34it/s] 65%|██████▍ | 239837/371472 [8:29:55<10:42:17, 3.42it/s] 65%|██████▍ | 239838/371472 [8:29:55<10:55:56, 3.34it/s] 65%|██████▍ | 239839/371472 [8:29:55<11:17:26, 3.24it/s] 65%|██████▍ | 239840/371472 [8:29:56<11:07:21, 3.29it/s] {'loss': 2.7643, 'learning_rate': 4.1908896981188997e-07, 'epoch': 10.33} + 65%|██████▍ | 239840/371472 [8:29:56<11:07:21, 3.29it/s] 65%|██████▍ | 239841/371472 [8:29:56<11:15:22, 3.25it/s] 65%|██████▍ | 239842/371472 [8:29:56<11:14:46, 3.25it/s] 65%|██████▍ | 239843/371472 [8:29:57<10:53:33, 3.36it/s] 65%|██████▍ | 239844/371472 [8:29:57<10:44:31, 3.40it/s] 65%|██████▍ | 239845/371472 [8:29:57<10:24:45, 3.51it/s] 65%|██████▍ | 239846/371472 [8:29:57<10:38:56, 3.43it/s] 65%|██████▍ | 239847/371472 [8:29:58<11:02:09, 3.31it/s] 65%|██████▍ | 239848/371472 [8:29:58<11:05:39, 3.30it/s] 65%|██████▍ | 239849/371472 [8:29:58<11:28:24, 3.19it/s] 65%|██████▍ | 239850/371472 [8:29:59<11:17:35, 3.24it/s] 65%|██████▍ | 239851/371472 [8:29:59<10:52:01, 3.36it/s] 65%|██████▍ | 239852/371472 [8:29:59<10:42:16, 3.42it/s] 65%|██████▍ | 239853/371472 [8:29:59<10:26:24, 3.50it/s] 65%|██████▍ | 239854/371472 [8:30:00<11:04:20, 3.30it/s] 65%|██████▍ | 239855/371472 [8:30:00<10:52:12, 3.36it/s] 65%|██████▍ | 239856/371472 [8:30:00<10:34:12, 3.46it/s] 65%|██████▍ | 239857/371472 [8:30:01<10:44:28, 3.40it/s] 65%|██████▍ | 239858/371472 [8:30:01<12:22:38, 2.95it/s] 65%|██████▍ | 239859/371472 [8:30:01<11:51:52, 3.08it/s] 65%|██████▍ | 239860/371472 [8:30:02<11:57:27, 3.06it/s] {'loss': 2.8516, 'learning_rate': 4.1904048783641104e-07, 'epoch': 10.33} + 65%|██████▍ | 239860/371472 [8:30:02<11:57:27, 3.06it/s] 65%|██████▍ | 239861/371472 [8:30:02<11:33:26, 3.16it/s] 65%|██████▍ | 239862/371472 [8:30:02<11:52:26, 3.08it/s] 65%|██████▍ | 239863/371472 [8:30:03<11:17:36, 3.24it/s] 65%|██████▍ | 239864/371472 [8:30:03<11:40:29, 3.13it/s] 65%|██████▍ | 239865/371472 [8:30:03<11:27:06, 3.19it/s] 65%|██████▍ | 239866/371472 [8:30:04<11:08:24, 3.28it/s] 65%|██████▍ | 239867/371472 [8:30:04<11:29:01, 3.18it/s] 65%|██████▍ | 239868/371472 [8:30:04<11:09:17, 3.28it/s] 65%|██████▍ | 239869/371472 [8:30:04<10:46:22, 3.39it/s] 65%|██████▍ | 239870/371472 [8:30:05<11:21:01, 3.22it/s] 65%|██████▍ | 239871/371472 [8:30:05<11:48:14, 3.10it/s] 65%|██████▍ | 239872/371472 [8:30:05<11:33:56, 3.16it/s] 65%|██████▍ | 239873/371472 [8:30:06<11:12:29, 3.26it/s] 65%|██████▍ | 239874/371472 [8:30:06<11:13:18, 3.26it/s] 65%|██████▍ | 239875/371472 [8:30:06<10:54:02, 3.35it/s] 65%|██████▍ | 239876/371472 [8:30:07<10:46:20, 3.39it/s] 65%|██████▍ | 239877/371472 [8:30:07<10:48:33, 3.38it/s] 65%|██████▍ | 239878/371472 [8:30:07<11:00:36, 3.32it/s] 65%|██████▍ | 239879/371472 [8:30:08<10:59:28, 3.33it/s] 65%|██████▍ | 239880/371472 [8:30:08<11:28:13, 3.19it/s] {'loss': 2.8845, 'learning_rate': 4.1899200586093216e-07, 'epoch': 10.33} + 65%|██████▍ | 239880/371472 [8:30:08<11:28:13, 3.19it/s] 65%|██████▍ | 239881/371472 [8:30:08<11:14:44, 3.25it/s] 65%|██████▍ | 239882/371472 [8:30:08<10:55:32, 3.35it/s] 65%|██████▍ | 239883/371472 [8:30:09<10:57:12, 3.34it/s] 65%|██████▍ | 239884/371472 [8:30:09<11:22:01, 3.22it/s] 65%|██████▍ | 239885/371472 [8:30:09<10:52:29, 3.36it/s] 65%|██████▍ | 239886/371472 [8:30:10<10:48:29, 3.38it/s] 65%|██████▍ | 239887/371472 [8:30:10<12:03:12, 3.03it/s] 65%|██████▍ | 239888/371472 [8:30:10<11:37:03, 3.15it/s] 65%|██████▍ | 239889/371472 [8:30:11<12:50:13, 2.85it/s] 65%|██████▍ | 239890/371472 [8:30:11<12:10:07, 3.00it/s] 65%|██████▍ | 239891/371472 [8:30:11<11:35:51, 3.15it/s] 65%|██████▍ | 239892/371472 [8:30:12<11:15:14, 3.25it/s] 65%|██████▍ | 239893/371472 [8:30:12<11:04:30, 3.30it/s] 65%|██████▍ | 239894/371472 [8:30:12<10:58:23, 3.33it/s] 65%|██████▍ | 239895/371472 [8:30:13<11:37:37, 3.14it/s] 65%|██████▍ | 239896/371472 [8:30:13<11:16:35, 3.24it/s] 65%|██████▍ | 239897/371472 [8:30:13<11:40:34, 3.13it/s] 65%|██████▍ | 239898/371472 [8:30:14<11:46:20, 3.10it/s] 65%|██████▍ | 239899/371472 [8:30:14<11:40:35, 3.13it/s] 65%|██████▍ | 239900/371472 [8:30:14<11:29:09, 3.18it/s] {'loss': 2.7571, 'learning_rate': 4.1894352388545324e-07, 'epoch': 10.33} + 65%|██████▍ | 239900/371472 [8:30:14<11:29:09, 3.18it/s] 65%|██████▍ | 239901/371472 [8:30:15<12:05:27, 3.02it/s] 65%|██████▍ | 239902/371472 [8:30:15<11:44:41, 3.11it/s] 65%|██████▍ | 239903/371472 [8:30:15<11:40:30, 3.13it/s] 65%|██████▍ | 239904/371472 [8:30:15<12:04:30, 3.03it/s] 65%|██████▍ | 239905/371472 [8:30:16<11:55:38, 3.06it/s] 65%|██████▍ | 239906/371472 [8:30:16<12:24:33, 2.95it/s] 65%|██████▍ | 239907/371472 [8:30:16<11:58:36, 3.05it/s] 65%|██████▍ | 239908/371472 [8:30:17<11:37:01, 3.15it/s] 65%|██████▍ | 239909/371472 [8:30:17<11:24:15, 3.20it/s] 65%|██████▍ | 239910/371472 [8:30:17<11:25:24, 3.20it/s] 65%|██████▍ | 239911/371472 [8:30:18<11:10:44, 3.27it/s] 65%|██████▍ | 239912/371472 [8:30:18<11:07:17, 3.29it/s] 65%|██████▍ | 239913/371472 [8:30:18<11:41:52, 3.12it/s] 65%|██████▍ | 239914/371472 [8:30:19<11:13:16, 3.26it/s] 65%|██████▍ | 239915/371472 [8:30:19<11:07:34, 3.28it/s] 65%|██████▍ | 239916/371472 [8:30:19<10:48:36, 3.38it/s] 65%|██████▍ | 239917/371472 [8:30:19<10:37:34, 3.44it/s] 65%|██████▍ | 239918/371472 [8:30:20<10:54:11, 3.35it/s] 65%|██████▍ | 239919/371472 [8:30:20<10:41:34, 3.42it/s] 65%|██████▍ | 239920/371472 [8:30:20<10:45:55, 3.39it/s] {'loss': 2.8223, 'learning_rate': 4.188950419099744e-07, 'epoch': 10.33} + 65%|██████▍ | 239920/371472 [8:30:20<10:45:55, 3.39it/s] 65%|██████▍ | 239921/371472 [8:30:21<10:46:01, 3.39it/s] 65%|██████▍ | 239922/371472 [8:30:21<10:47:26, 3.39it/s] 65%|██████▍ | 239923/371472 [8:30:21<10:29:40, 3.48it/s] 65%|██████▍ | 239924/371472 [8:30:21<10:25:59, 3.50it/s] 65%|██████▍ | 239925/371472 [8:30:22<11:33:16, 3.16it/s] 65%|██████▍ | 239926/371472 [8:30:22<11:33:16, 3.16it/s] 65%|██████▍ | 239927/371472 [8:30:23<11:33:57, 3.16it/s] 65%|██████▍ | 239928/371472 [8:30:23<11:13:16, 3.26it/s] 65%|██████▍ | 239929/371472 [8:30:23<11:45:18, 3.11it/s] 65%|██████▍ | 239930/371472 [8:30:23<11:30:07, 3.18it/s] 65%|██████▍ | 239931/371472 [8:30:24<11:02:59, 3.31it/s] 65%|██████▍ | 239932/371472 [8:30:24<11:03:53, 3.30it/s] 65%|██████▍ | 239933/371472 [8:30:24<10:52:03, 3.36it/s] 65%|██████▍ | 239934/371472 [8:30:25<10:56:57, 3.34it/s] 65%|██████▍ | 239935/371472 [8:30:25<10:55:29, 3.34it/s] 65%|██████▍ | 239936/371472 [8:30:25<10:42:49, 3.41it/s] 65%|██████▍ | 239937/371472 [8:30:25<10:27:45, 3.49it/s] 65%|██████▍ | 239938/371472 [8:30:26<10:53:38, 3.35it/s] 65%|██████▍ | 239939/371472 [8:30:26<10:31:50, 3.47it/s] 65%|██████▍ | 239940/371472 [8:30:26<10:16:38, 3.56it/s] {'loss': 2.8767, 'learning_rate': 4.1884655993449543e-07, 'epoch': 10.33} + 65%|██████▍ | 239940/371472 [8:30:26<10:16:38, 3.56it/s] 65%|██████▍ | 239941/371472 [8:30:27<10:43:03, 3.41it/s] 65%|██████▍ | 239942/371472 [8:30:27<10:40:08, 3.42it/s] 65%|██████▍ | 239943/371472 [8:30:27<10:39:46, 3.43it/s] 65%|██████▍ | 239944/371472 [8:30:28<10:41:23, 3.42it/s] 65%|██████▍ | 239945/371472 [8:30:28<10:27:49, 3.49it/s] 65%|██████▍ | 239946/371472 [8:30:28<10:44:16, 3.40it/s] 65%|██████▍ | 239947/371472 [8:30:28<11:21:47, 3.22it/s] 65%|██████▍ | 239948/371472 [8:30:29<11:11:01, 3.27it/s] 65%|██████▍ | 239949/371472 [8:30:29<11:15:02, 3.25it/s] 65%|██████▍ | 239950/371472 [8:30:29<10:48:52, 3.38it/s] 65%|██████▍ | 239951/371472 [8:30:30<10:43:50, 3.40it/s] 65%|██████▍ | 239952/371472 [8:30:30<10:16:28, 3.56it/s] 65%|██████▍ | 239953/371472 [8:30:30<10:39:44, 3.43it/s] 65%|██████▍ | 239954/371472 [8:30:30<10:44:30, 3.40it/s] 65%|██████▍ | 239955/371472 [8:30:31<11:47:18, 3.10it/s] 65%|██████��� | 239956/371472 [8:30:31<11:19:21, 3.23it/s] 65%|██████▍ | 239957/371472 [8:30:31<10:52:56, 3.36it/s] 65%|██████▍ | 239958/371472 [8:30:32<11:11:03, 3.27it/s] 65%|██████▍ | 239959/371472 [8:30:32<11:06:28, 3.29it/s] 65%|██████▍ | 239960/371472 [8:30:32<10:47:42, 3.38it/s] {'loss': 2.7964, 'learning_rate': 4.187980779590165e-07, 'epoch': 10.34} + 65%|██████▍ | 239960/371472 [8:30:32<10:47:42, 3.38it/s] 65%|██████▍ | 239961/371472 [8:30:33<12:05:14, 3.02it/s] 65%|██████▍ | 239962/371472 [8:30:33<11:28:52, 3.18it/s] 65%|██████▍ | 239963/371472 [8:30:33<11:33:39, 3.16it/s] 65%|██████▍ | 239964/371472 [8:30:34<11:03:40, 3.30it/s] 65%|██████▍ | 239965/371472 [8:30:34<11:02:06, 3.31it/s] 65%|██████▍ | 239966/371472 [8:30:34<11:45:17, 3.11it/s] 65%|██████▍ | 239967/371472 [8:30:35<11:19:38, 3.22it/s] 65%|██████▍ | 239968/371472 [8:30:35<11:10:41, 3.27it/s] 65%|██████▍ | 239969/371472 [8:30:35<10:49:29, 3.37it/s] 65%|██████▍ | 239970/371472 [8:30:35<10:30:57, 3.47it/s] 65%|██████▍ | 239971/371472 [8:30:36<10:46:12, 3.39it/s] 65%|██████▍ | 239972/371472 [8:30:36<10:47:59, 3.38it/s] 65%|██████▍ | 239973/371472 [8:30:36<10:51:05, 3.37it/s] 65%|██████▍ | 239974/371472 [8:30:37<10:29:42, 3.48it/s] 65%|██████▍ | 239975/371472 [8:30:37<10:37:10, 3.44it/s] 65%|██████▍ | 239976/371472 [8:30:37<10:36:12, 3.44it/s] 65%|██████▍ | 239977/371472 [8:30:37<10:30:44, 3.47it/s] 65%|██████▍ | 239978/371472 [8:30:38<10:25:41, 3.50it/s] 65%|██████▍ | 239979/371472 [8:30:38<10:10:31, 3.59it/s] 65%|██████▍ | 239980/371472 [8:30:38<10:04:28, 3.63it/s] {'loss': 2.5461, 'learning_rate': 4.187495959835377e-07, 'epoch': 10.34} + 65%|██████▍ | 239980/371472 [8:30:38<10:04:28, 3.63it/s] 65%|██████▍ | 239981/371472 [8:30:39<10:52:34, 3.36it/s] 65%|██████▍ | 239982/371472 [8:30:39<10:40:02, 3.42it/s] 65%|██████▍ | 239983/371472 [8:30:39<10:28:48, 3.49it/s] 65%|██████▍ | 239984/371472 [8:30:39<10:36:33, 3.44it/s] 65%|██████▍ | 239985/371472 [8:30:40<10:36:14, 3.44it/s] 65%|██████▍ | 239986/371472 [8:30:40<10:22:32, 3.52it/s] 65%|██████▍ | 239987/371472 [8:30:40<10:17:55, 3.55it/s] 65%|██████▍ | 239988/371472 [8:30:41<10:19:50, 3.54it/s] 65%|██████▍ | 239989/371472 [8:30:41<10:22:31, 3.52it/s] 65%|██████▍ | 239990/371472 [8:30:41<10:27:24, 3.49it/s] 65%|██████▍ | 239991/371472 [8:30:41<10:24:17, 3.51it/s] 65%|██████▍ | 239992/371472 [8:30:42<10:22:34, 3.52it/s] 65%|██████▍ | 239993/371472 [8:30:42<10:31:46, 3.47it/s] 65%|██████▍ | 239994/371472 [8:30:42<10:40:23, 3.42it/s] 65%|██████▍ | 239995/371472 [8:30:43<10:29:39, 3.48it/s] 65%|██████▍ | 239996/371472 [8:30:43<10:30:33, 3.48it/s] 65%|██████▍ | 239997/371472 [8:30:43<10:25:39, 3.50it/s] 65%|██████▍ | 239998/371472 [8:30:44<11:07:15, 3.28it/s] 65%|██████▍ | 239999/371472 [8:30:44<11:03:44, 3.30it/s] 65%|██████▍ | 240000/371472 [8:30:44<10:59:49, 3.32it/s] {'loss': 2.6187, 'learning_rate': 4.187011140080587e-07, 'epoch': 10.34} + 65%|██████▍ | 240000/371472 [8:30:44<10:59:49, 3.32it/s] 65%|██████▍ | 240001/371472 [8:30:44<10:55:09, 3.34it/s] 65%|██████▍ | 240002/371472 [8:30:45<11:05:19, 3.29it/s] 65%|██████▍ | 240003/371472 [8:30:45<11:00:58, 3.32it/s] 65%|██████▍ | 240004/371472 [8:30:45<11:13:30, 3.25it/s] 65%|██████▍ | 240005/371472 [8:30:46<11:46:07, 3.10it/s] 65%|██████▍ | 240006/371472 [8:30:46<11:39:59, 3.13it/s] 65%|██████▍ | 240007/371472 [8:30:46<11:17:46, 3.23it/s] 65%|██████▍ | 240008/371472 [8:30:47<11:03:38, 3.30it/s] 65%|██████▍ | 240009/371472 [8:30:47<10:59:39, 3.32it/s] 65%|██████▍ | 240010/371472 [8:30:47<11:15:43, 3.24it/s] 65%|██████▍ | 240011/371472 [8:30:48<11:20:13, 3.22it/s] 65%|██████▍ | 240012/371472 [8:30:48<12:44:38, 2.87it/s] 65%|██████▍ | 240013/371472 [8:30:48<12:34:54, 2.90it/s] 65%|██████▍ | 240014/371472 [8:30:49<11:51:04, 3.08it/s] 65%|██████▍ | 240015/371472 [8:30:49<11:36:24, 3.15it/s] 65%|██████▍ | 240016/371472 [8:30:49<11:14:23, 3.25it/s] 65%|██████▍ | 240017/371472 [8:30:49<11:15:28, 3.24it/s] 65%|██████▍ | 240018/371472 [8:30:50<11:06:01, 3.29it/s] 65%|██████▍ | 240019/371472 [8:30:50<10:51:33, 3.36it/s] 65%|██████▍ | 240020/371472 [8:30:50<10:41:20, 3.42it/s] {'loss': 2.6227, 'learning_rate': 4.186526320325799e-07, 'epoch': 10.34} + 65%|██████▍ | 240020/371472 [8:30:50<10:41:20, 3.42it/s] 65%|██████▍ | 240021/371472 [8:30:51<10:53:03, 3.35it/s] 65%|██████▍ | 240022/371472 [8:30:51<10:43:39, 3.40it/s] 65%|██████▍ | 240023/371472 [8:30:51<10:48:21, 3.38it/s] 65%|██████▍ | 240024/371472 [8:30:51<10:27:00, 3.49it/s] 65%|██████▍ | 240025/371472 [8:30:52<10:39:04, 3.43it/s] 65%|██████▍ | 240026/371472 [8:30:52<10:33:32, 3.46it/s] 65%|██████▍ | 240027/371472 [8:30:52<10:26:23, 3.50it/s] 65%|██████▍ | 240028/371472 [8:30:53<10:23:46, 3.51it/s] 65%|██████▍ | 240029/371472 [8:30:53<10:19:55, 3.53it/s] 65%|██████▍ | 240030/371472 [8:30:53<10:06:13, 3.61it/s] 65%|██████▍ | 240031/371472 [8:30:53<10:16:33, 3.55it/s] 65%|██████▍ | 240032/371472 [8:30:54<10:19:58, 3.53it/s] 65%|██████▍ | 240033/371472 [8:30:54<10:27:37, 3.49it/s] 65%|██████▍ | 240034/371472 [8:30:54<10:53:34, 3.35it/s] 65%|██████▍ | 240035/371472 [8:30:55<10:55:21, 3.34it/s] 65%|██████▍ | 240036/371472 [8:30:55<10:42:50, 3.41it/s] 65%|██████▍ | 240037/371472 [8:30:55<10:30:46, 3.47it/s] 65%|██████▍ | 240038/371472 [8:30:56<10:25:41, 3.50it/s] 65%|██████▍ | 240039/371472 [8:30:56<10:17:05, 3.55it/s] 65%|██████▍ | 240040/371472 [8:30:56<10:10:45, 3.59it/s] {'loss': 2.59, 'learning_rate': 4.1860415005710095e-07, 'epoch': 10.34} + 65%|██████▍ | 240040/371472 [8:30:56<10:10:45, 3.59it/s] 65%|██████▍ | 240041/371472 [8:30:56<10:43:11, 3.41it/s] 65%|██████▍ | 240042/371472 [8:30:57<10:19:30, 3.54it/s] 65%|██████▍ | 240043/371472 [8:30:57<10:25:47, 3.50it/s] 65%|██████▍ | 240044/371472 [8:30:57<10:26:23, 3.50it/s] 65%|██████▍ | 240045/371472 [8:30:58<10:41:13, 3.42it/s] 65%|██████▍ | 240046/371472 [8:30:58<10:39:22, 3.43it/s] 65%|██████▍ | 240047/371472 [8:30:58<10:30:42, 3.47it/s] 65%|██████▍ | 240048/371472 [8:30:58<10:23:34, 3.51it/s] 65%|██████▍ | 240049/371472 [8:30:59<10:35:24, 3.45it/s] 65%|██████▍ | 240050/371472 [8:30:59<10:38:19, 3.43it/s] 65%|██████▍ | 240051/371472 [8:30:59<10:32:06, 3.47it/s] 65%|██████▍ | 240052/371472 [8:31:00<10:24:04, 3.51it/s] 65%|██████▍ | 240053/371472 [8:31:00<10:21:51, 3.52it/s] 65%|██████▍ | 240054/371472 [8:31:00<10:28:38, 3.48it/s] 65%|██████▍ | 240055/371472 [8:31:00<10:26:58, 3.49it/s] 65%|██████▍ | 240056/371472 [8:31:01<10:32:31, 3.46it/s] 65%|██████▍ | 240057/371472 [8:31:01<10:28:12, 3.49it/s] 65%|██████▍ | 240058/371472 [8:31:01<10:20:18, 3.53it/s] 65%|██████▍ | 240059/371472 [8:31:02<10:45:52, 3.39it/s] 65%|██████▍ | 240060/371472 [8:31:02<10:43:14, 3.40it/s] {'loss': 2.7605, 'learning_rate': 4.1855566808162207e-07, 'epoch': 10.34} + 65%|██████▍ | 240060/371472 [8:31:02<10:43:14, 3.40it/s] 65%|██████▍ | 240061/371472 [8:31:02<11:05:28, 3.29it/s] 65%|██████▍ | 240062/371472 [8:31:02<10:59:48, 3.32it/s] 65%|██████▍ | 240063/371472 [8:31:03<10:47:39, 3.38it/s] 65%|██████▍ | 240064/371472 [8:31:03<11:04:16, 3.30it/s] 65%|██████▍ | 240065/371472 [8:31:03<10:44:16, 3.40it/s] 65%|██████▍ | 240066/371472 [8:31:04<10:39:20, 3.43it/s] 65%|██████▍ | 240067/371472 [8:31:04<10:28:15, 3.49it/s] 65%|██████▍ | 240068/371472 [8:31:04<10:27:41, 3.49it/s] 65%|██████▍ | 240069/371472 [8:31:04<10:08:47, 3.60it/s] 65%|██████▍ | 240070/371472 [8:31:05<10:21:51, 3.52it/s] 65%|██████▍ | 240071/371472 [8:31:05<10:25:14, 3.50it/s] 65%|██████▍ | 240072/371472 [8:31:05<10:39:11, 3.43it/s] 65%|██████▍ | 240073/371472 [8:31:06<10:33:40, 3.46it/s] 65%|██████▍ | 240074/371472 [8:31:06<11:01:57, 3.31it/s] 65%|██████▍ | 240075/371472 [8:31:06<11:06:22, 3.29it/s] 65%|██████▍ | 240076/371472 [8:31:07<11:01:33, 3.31it/s] 65%|██████▍ | 240077/371472 [8:31:07<10:56:08, 3.34it/s] 65%|██████▍ | 240078/371472 [8:31:07<10:45:09, 3.39it/s] 65%|██████▍ | 240079/371472 [8:31:07<10:45:37, 3.39it/s] 65%|██████▍ | 240080/371472 [8:31:08<10:41:38, 3.41it/s] {'loss': 2.8958, 'learning_rate': 4.1850718610614314e-07, 'epoch': 10.34} + 65%|██████▍ | 240080/371472 [8:31:08<10:41:38, 3.41it/s] 65%|██████▍ | 240081/371472 [8:31:08<11:02:51, 3.30it/s] 65%|██████▍ | 240082/371472 [8:31:08<10:58:07, 3.33it/s] 65%|██████▍ | 240083/371472 [8:31:09<10:49:32, 3.37it/s] 65%|██████▍ | 240084/371472 [8:31:09<10:41:01, 3.42it/s] 65%|██████▍ | 240085/371472 [8:31:09<10:27:28, 3.49it/s] 65%|██████▍ | 240086/371472 [8:31:09<10:16:08, 3.55it/s] 65%|██████▍ | 240087/371472 [8:31:10<10:39:21, 3.42it/s] 65%|██████▍ | 240088/371472 [8:31:10<11:29:26, 3.18it/s] 65%|██████▍ | 240089/371472 [8:31:10<10:56:37, 3.33it/s] 65%|██████▍ | 240090/371472 [8:31:11<10:40:01, 3.42it/s] 65%|██████▍ | 240091/371472 [8:31:11<10:36:02, 3.44it/s] 65%|██████▍ | 240092/371472 [8:31:11<10:37:08, 3.44it/s] 65%|██████▍ | 240093/371472 [8:31:12<10:37:58, 3.43it/s] 65%|██████▍ | 240094/371472 [8:31:12<11:03:40, 3.30it/s] 65%|██████▍ | 240095/371472 [8:31:12<10:31:08, 3.47it/s] 65%|██████▍ | 240096/371472 [8:31:12<10:43:25, 3.40it/s] 65%|██████▍ | 240097/371472 [8:31:13<10:29:58, 3.48it/s] 65%|██████▍ | 240098/371472 [8:31:13<10:48:59, 3.37it/s] 65%|██████▍ | 240099/371472 [8:31:13<10:35:08, 3.45it/s] 65%|██████▍ | 240100/371472 [8:31:14<11:11:37, 3.26it/s] {'loss': 2.8473, 'learning_rate': 4.184587041306643e-07, 'epoch': 10.34} + 65%|██████▍ | 240100/371472 [8:31:14<11:11:37, 3.26it/s] 65%|██████▍ | 240101/371472 [8:31:14<10:43:16, 3.40it/s] 65%|██████▍ | 240102/371472 [8:31:14<10:37:41, 3.43it/s] 65%|██████▍ | 240103/371472 [8:31:15<11:10:48, 3.26it/s] 65%|██████▍ | 240104/371472 [8:31:15<10:58:11, 3.33it/s] 65%|██████▍ | 240105/371472 [8:31:15<10:44:08, 3.40it/s] 65%|██████▍ | 240106/371472 [8:31:15<10:34:53, 3.45it/s] 65%|██████▍ | 240107/371472 [8:31:16<10:17:59, 3.54it/s] 65%|██████▍ | 240108/371472 [8:31:16<10:39:35, 3.42it/s] 65%|██████▍ | 240109/371472 [8:31:16<10:27:22, 3.49it/s] 65%|██████▍ | 240110/371472 [8:31:17<10:36:37, 3.44it/s] 65%|██████▍ | 240111/371472 [8:31:17<10:40:13, 3.42it/s] 65%|██████▍ | 240112/371472 [8:31:17<10:48:58, 3.37it/s] 65%|██████▍ | 240113/371472 [8:31:17<11:20:28, 3.22it/s] 65%|██████▍ | 240114/371472 [8:31:18<11:03:07, 3.30it/s] 65%|██████▍ | 240115/371472 [8:31:18<10:48:09, 3.38it/s] 65%|██████▍ | 240116/371472 [8:31:18<11:12:48, 3.25it/s] 65%|██████▍ | 240117/371472 [8:31:19<10:51:09, 3.36it/s] 65%|██████▍ | 240118/371472 [8:31:19<10:49:28, 3.37it/s] 65%|██████▍ | 240119/371472 [8:31:19<10:36:46, 3.44it/s] 65%|██████▍ | 240120/371472 [8:31:20<10:34:59, 3.45it/s] {'loss': 2.6949, 'learning_rate': 4.1841022215518534e-07, 'epoch': 10.34} + 65%|██████▍ | 240120/371472 [8:31:20<10:34:59, 3.45it/s] 65%|██████▍ | 240121/371472 [8:31:20<11:29:08, 3.18it/s] 65%|██████▍ | 240122/371472 [8:31:20<11:11:31, 3.26it/s] 65%|██████▍ | 240123/371472 [8:31:20<11:12:55, 3.25it/s] 65%|██████▍ | 240124/371472 [8:31:21<11:06:36, 3.28it/s] 65%|██████▍ | 240125/371472 [8:31:21<10:59:47, 3.32it/s] 65%|██████▍ | 240126/371472 [8:31:21<10:55:03, 3.34it/s] 65%|██████▍ | 240127/371472 [8:31:22<11:13:20, 3.25it/s] 65%|██████▍ | 240128/371472 [8:31:22<10:59:58, 3.32it/s] 65%|██████▍ | 240129/371472 [8:31:22<10:45:11, 3.39it/s] 65%|██████▍ | 240130/371472 [8:31:23<10:47:41, 3.38it/s] 65%|██████▍ | 240131/371472 [8:31:23<10:33:50, 3.45it/s] 65%|██████▍ | 240132/371472 [8:31:23<11:17:01, 3.23it/s] 65%|██████▍ | 240133/371472 [8:31:23<10:43:19, 3.40it/s] 65%|██████▍ | 240134/371472 [8:31:24<11:05:53, 3.29it/s] 65%|██████▍ | 240135/371472 [8:31:24<11:26:10, 3.19it/s] 65%|██████▍ | 240136/371472 [8:31:24<11:01:50, 3.31it/s] 65%|██████▍ | 240137/371472 [8:31:25<10:55:15, 3.34it/s] 65%|██████▍ | 240138/371472 [8:31:25<11:19:22, 3.22it/s] 65%|██████▍ | 240139/371472 [8:31:25<11:09:49, 3.27it/s] 65%|██████▍ | 240140/371472 [8:31:26<11:03:54, 3.30it/s] {'loss': 2.8028, 'learning_rate': 4.1836174017970646e-07, 'epoch': 10.34} + 65%|██████▍ | 240140/371472 [8:31:26<11:03:54, 3.30it/s] 65%|██████▍ | 240141/371472 [8:31:26<11:36:12, 3.14it/s] 65%|██████▍ | 240142/371472 [8:31:26<11:29:34, 3.17it/s] 65%|██████▍ | 240143/371472 [8:31:27<11:15:42, 3.24it/s] 65%|██████▍ | 240144/371472 [8:31:27<10:47:26, 3.38it/s] 65%|██████▍ | 240145/371472 [8:31:27<10:33:38, 3.45it/s] 65%|██████▍ | 240146/371472 [8:31:27<10:28:45, 3.48it/s] 65%|██████▍ | 240147/371472 [8:31:28<10:27:25, 3.49it/s] 65%|██████▍ | 240148/371472 [8:31:28<10:23:51, 3.51it/s] 65%|██████▍ | 240149/371472 [8:31:28<10:31:04, 3.47it/s] 65%|██████▍ | 240150/371472 [8:31:29<10:19:14, 3.53it/s] 65%|██████▍ | 240151/371472 [8:31:29<10:27:28, 3.49it/s] 65%|██████▍ | 240152/371472 [8:31:29<10:49:14, 3.37it/s] 65%|██████▍ | 240153/371472 [8:31:29<10:35:06, 3.45it/s] 65%|██████▍ | 240154/371472 [8:31:30<10:52:33, 3.35it/s] 65%|██████▍ | 240155/371472 [8:31:30<11:03:14, 3.30it/s] 65%|██████▍ | 240156/371472 [8:31:30<10:57:18, 3.33it/s] 65%|██████▍ | 240157/371472 [8:31:31<10:31:52, 3.46it/s] 65%|██████▍ | 240158/371472 [8:31:31<10:12:28, 3.57it/s] 65%|██████▍ | 240159/371472 [8:31:31<10:12:26, 3.57it/s] 65%|██████▍ | 240160/371472 [8:31:31<10:02:57, 3.63it/s] {'loss': 2.6857, 'learning_rate': 4.183132582042276e-07, 'epoch': 10.34} + 65%|██████▍ | 240160/371472 [8:31:31<10:02:57, 3.63it/s] 65%|██████▍ | 240161/371472 [8:31:32<10:08:39, 3.60it/s] 65%|██████▍ | 240162/371472 [8:31:32<10:30:17, 3.47it/s] 65%|██████▍ | 240163/371472 [8:31:32<11:12:21, 3.25it/s] 65%|██████▍ | 240164/371472 [8:31:33<10:56:53, 3.33it/s] 65%|██████▍ | 240165/371472 [8:31:33<10:55:37, 3.34it/s] 65%|██████▍ | 240166/371472 [8:31:33<10:47:39, 3.38it/s] 65%|██████▍ | 240167/371472 [8:31:34<10:32:25, 3.46it/s] 65%|██████▍ | 240168/371472 [8:31:34<10:13:54, 3.56it/s] 65%|██████▍ | 240169/371472 [8:31:34<10:34:36, 3.45it/s] 65%|██████▍ | 240170/371472 [8:31:34<10:42:45, 3.40it/s] 65%|██████▍ | 240171/371472 [8:31:35<10:57:51, 3.33it/s] 65%|██████▍ | 240172/371472 [8:31:35<10:49:29, 3.37it/s] 65%|██████▍ | 240173/371472 [8:31:35<10:33:05, 3.46it/s] 65%|██████▍ | 240174/371472 [8:31:36<10:15:03, 3.56it/s] 65%|██████▍ | 240175/371472 [8:31:36<10:28:38, 3.48it/s] 65%|██████▍ | 240176/371472 [8:31:36<10:21:20, 3.52it/s] 65%|██████▍ | 240177/371472 [8:31:36<10:58:23, 3.32it/s] 65%|██████▍ | 240178/371472 [8:31:37<10:53:01, 3.35it/s] 65%|██████▍ | 240179/371472 [8:31:37<11:03:36, 3.30it/s] 65%|██████▍ | 240180/371472 [8:31:37<10:55:49, 3.34it/s] {'loss': 2.8328, 'learning_rate': 4.182647762287487e-07, 'epoch': 10.35} + 65%|██████▍ | 240180/371472 [8:31:37<10:55:49, 3.34it/s] 65%|██████▍ | 240181/371472 [8:31:38<11:30:31, 3.17it/s] 65%|██████▍ | 240182/371472 [8:31:38<11:22:47, 3.20it/s] 65%|██████▍ | 240183/371472 [8:31:38<11:30:11, 3.17it/s] 65%|██████▍ | 240184/371472 [8:31:39<11:18:12, 3.23it/s] 65%|██████▍ | 240185/371472 [8:31:39<11:04:08, 3.29it/s] 65%|██████▍ | 240186/371472 [8:31:39<10:51:16, 3.36it/s] 65%|██████▍ | 240187/371472 [8:31:39<10:53:45, 3.35it/s] 65%|██████▍ | 240188/371472 [8:31:40<10:32:36, 3.46it/s] 65%|██████▍ | 240189/371472 [8:31:40<10:40:41, 3.42it/s] 65%|██████▍ | 240190/371472 [8:31:40<11:45:24, 3.10it/s] 65%|██████▍ | 240191/371472 [8:31:41<12:17:50, 2.97it/s] 65%|██████▍ | 240192/371472 [8:31:41<11:39:42, 3.13it/s] 65%|██████▍ | 240193/371472 [8:31:41<11:00:20, 3.31it/s] 65%|██████▍ | 240194/371472 [8:31:42<10:47:34, 3.38it/s] 65%|██████▍ | 240195/371472 [8:31:42<10:37:23, 3.43it/s] 65%|██████▍ | 240196/371472 [8:31:42<11:04:46, 3.29it/s] 65%|██████▍ | 240197/371472 [8:31:43<11:05:05, 3.29it/s] 65%|██████▍ | 240198/371472 [8:31:43<11:01:16, 3.31it/s] 65%|██████▍ | 240199/371472 [8:31:43<11:43:31, 3.11it/s] 65%|██████▍ | 240200/371472 [8:31:44<11:31:17, 3.16it/s] {'loss': 2.6733, 'learning_rate': 4.182162942532698e-07, 'epoch': 10.35} + 65%|██████▍ | 240200/371472 [8:31:44<11:31:17, 3.16it/s] 65%|██████▍ | 240201/371472 [8:31:44<11:05:25, 3.29it/s] 65%|██████▍ | 240202/371472 [8:31:44<11:31:04, 3.17it/s] 65%|██████▍ | 240203/371472 [8:31:44<11:07:30, 3.28it/s] 65%|██████▍ | 240204/371472 [8:31:45<11:00:13, 3.31it/s] 65%|██████▍ | 240205/371472 [8:31:45<10:43:14, 3.40it/s] 65%|██████▍ | 240206/371472 [8:31:45<10:32:35, 3.46it/s] 65%|██████▍ | 240207/371472 [8:31:46<10:35:19, 3.44it/s] 65%|██████▍ | 240208/371472 [8:31:46<10:50:19, 3.36it/s] 65%|██████▍ | 240209/371472 [8:31:46<10:32:28, 3.46it/s] 65%|██████▍ | 240210/371472 [8:31:46<10:17:56, 3.54it/s] 65%|██████▍ | 240211/371472 [8:31:47<10:25:53, 3.50it/s] 65%|██████▍ | 240212/371472 [8:31:47<10:11:20, 3.58it/s] 65%|██████▍ | 240213/371472 [8:31:47<10:15:44, 3.55it/s] 65%|██████▍ | 240214/371472 [8:31:48<10:25:23, 3.50it/s] 65%|██████▍ | 240215/371472 [8:31:48<10:27:20, 3.49it/s] 65%|██████▍ | 240216/371472 [8:31:48<11:04:03, 3.29it/s] 65%|██████▍ | 240217/371472 [8:31:48<10:55:07, 3.34it/s] 65%|██████▍ | 240218/371472 [8:31:49<10:45:08, 3.39it/s] 65%|██████▍ | 240219/371472 [8:31:49<10:33:45, 3.45it/s] 65%|██████▍ | 240220/371472 [8:31:49<10:45:58, 3.39it/s] {'loss': 2.6818, 'learning_rate': 4.1816781227779096e-07, 'epoch': 10.35} + 65%|██████▍ | 240220/371472 [8:31:49<10:45:58, 3.39it/s] 65%|██████▍ | 240221/371472 [8:31:50<10:16:38, 3.55it/s] 65%|██████▍ | 240222/371472 [8:31:50<10:21:22, 3.52it/s] 65%|██████▍ | 240223/371472 [8:31:50<10:29:08, 3.48it/s] 65%|██████▍ | 240224/371472 [8:31:51<10:51:48, 3.36it/s] 65%|██████▍ | 240225/371472 [8:31:51<11:22:46, 3.20it/s] 65%|██████▍ | 240226/371472 [8:31:51<11:12:23, 3.25it/s] 65%|██████▍ | 240227/371472 [8:31:52<15:29:41, 2.35it/s] 65%|██████▍ | 240228/371472 [8:31:52<14:11:31, 2.57it/s] 65%|██████▍ | 240229/371472 [8:31:52<13:10:24, 2.77it/s] 65%|██████▍ | 240230/371472 [8:31:53<12:13:50, 2.98it/s] 65%|██████▍ | 240231/371472 [8:31:53<11:42:58, 3.11it/s] 65%|██████▍ | 240232/371472 [8:31:53<11:32:03, 3.16it/s] 65%|██████▍ | 240233/371472 [8:31:54<11:22:17, 3.21it/s] 65%|██████▍ | 240234/371472 [8:31:54<11:22:30, 3.20it/s] 65%|██████▍ | 240235/371472 [8:31:54<11:01:16, 3.31it/s] 65%|██████▍ | 240236/371472 [8:31:54<10:44:55, 3.39it/s] 65%|██████▍ | 240237/371472 [8:31:55<10:31:19, 3.46it/s] 65%|██████▍ | 240238/371472 [8:31:55<10:18:38, 3.54it/s] 65%|██████▍ | 240239/371472 [8:31:55<10:28:22, 3.48it/s] 65%|██████▍ | 240240/371472 [8:31:56<10:06:31, 3.61it/s] {'loss': 2.7516, 'learning_rate': 4.1811933030231203e-07, 'epoch': 10.35} + 65%|██████▍ | 240240/371472 [8:31:56<10:06:31, 3.61it/s] 65%|██████▍ | 240241/371472 [8:31:56<10:13:11, 3.57it/s] 65%|██████▍ | 240242/371472 [8:31:56<10:18:57, 3.53it/s] 65%|██████▍ | 240243/371472 [8:31:56<10:26:24, 3.49it/s] 65%|██████▍ | 240244/371472 [8:31:57<10:40:24, 3.42it/s] 65%|██████▍ | 240245/371472 [8:31:57<10:40:20, 3.42it/s] 65%|██████▍ | 240246/371472 [8:31:57<10:31:47, 3.46it/s] 65%|██████▍ | 240247/371472 [8:31:58<10:55:09, 3.34it/s] 65%|██████▍ | 240248/371472 [8:31:58<11:32:12, 3.16it/s] 65%|██████▍ | 240249/371472 [8:31:58<11:49:48, 3.08it/s] 65%|██████▍ | 240250/371472 [8:31:59<12:27:57, 2.92it/s] 65%|██████▍ | 240251/371472 [8:31:59<12:21:06, 2.95it/s] 65%|██████▍ | 240252/371472 [8:31:59<11:34:55, 3.15it/s] 65%|██████▍ | 240253/371472 [8:32:00<11:45:09, 3.10it/s] 65%|██████▍ | 240254/371472 [8:32:00<11:34:01, 3.15it/s] 65%|██████▍ | 240255/371472 [8:32:00<11:23:44, 3.20it/s] 65%|██████▍ | 240256/371472 [8:32:01<11:13:49, 3.25it/s] 65%|██████▍ | 240257/371472 [8:32:01<11:17:00, 3.23it/s] 65%|██████▍ | 240258/371472 [8:32:01<10:55:33, 3.34it/s] 65%|██████▍ | 240259/371472 [8:32:01<10:52:14, 3.35it/s] 65%|██████▍ | 240260/371472 [8:32:02<10:50:00, 3.36it/s] {'loss': 2.7548, 'learning_rate': 4.1807084832683316e-07, 'epoch': 10.35} + 65%|██████▍ | 240260/371472 [8:32:02<10:50:00, 3.36it/s] 65%|██████▍ | 240261/371472 [8:32:02<11:36:29, 3.14it/s] 65%|██████▍ | 240262/371472 [8:32:02<11:20:44, 3.21it/s] 65%|██████▍ | 240263/371472 [8:32:03<11:08:39, 3.27it/s] 65%|██████▍ | 240264/371472 [8:32:03<10:57:39, 3.33it/s] 65%|██████▍ | 240265/371472 [8:32:03<10:36:36, 3.44it/s] 65%|██████▍ | 240266/371472 [8:32:04<10:42:37, 3.40it/s] 65%|██████▍ | 240267/371472 [8:32:04<10:49:32, 3.37it/s] 65%|██████▍ | 240268/371472 [8:32:04<10:28:55, 3.48it/s] 65%|██████▍ | 240269/371472 [8:32:04<10:43:57, 3.40it/s] 65%|██████▍ | 240270/371472 [8:32:05<10:20:10, 3.53it/s] 65%|██████▍ | 240271/371472 [8:32:05<10:03:50, 3.62it/s] 65%|██████▍ | 240272/371472 [8:32:05<10:20:52, 3.52it/s] 65%|██████▍ | 240273/371472 [8:32:06<10:35:46, 3.44it/s] 65%|██████▍ | 240274/371472 [8:32:06<10:18:51, 3.53it/s] 65%|██████▍ | 240275/371472 [8:32:06<10:10:48, 3.58it/s] 65%|██████▍ | 240276/371472 [8:32:06<10:15:58, 3.55it/s] 65%|██████▍ | 240277/371472 [8:32:07<10:10:54, 3.58it/s] 65%|██████▍ | 240278/371472 [8:32:07<10:10:45, 3.58it/s] 65%|██████▍ | 240279/371472 [8:32:07<10:10:56, 3.58it/s] 65%|██████▍ | 240280/371472 [8:32:08<10:14:06, 3.56it/s] {'loss': 2.8873, 'learning_rate': 4.1802236635135423e-07, 'epoch': 10.35} + 65%|██████▍ | 240280/371472 [8:32:08<10:14:06, 3.56it/s] 65%|██████▍ | 240281/371472 [8:32:08<10:36:53, 3.43it/s] 65%|██████▍ | 240282/371472 [8:32:08<11:40:00, 3.12it/s] 65%|██████▍ | 240283/371472 [8:32:09<11:54:55, 3.06it/s] 65%|██████▍ | 240284/371472 [8:32:09<11:16:35, 3.23it/s] 65%|██████▍ | 240285/371472 [8:32:09<11:06:08, 3.28it/s] 65%|██████▍ | 240286/371472 [8:32:09<10:44:00, 3.40it/s] 65%|██████▍ | 240287/371472 [8:32:10<10:50:38, 3.36it/s] 65%|██████▍ | 240288/371472 [8:32:10<11:11:52, 3.25it/s] 65%|██████▍ | 240289/371472 [8:32:10<10:56:08, 3.33it/s] 65%|██████▍ | 240290/371472 [8:32:11<10:45:10, 3.39it/s] 65%|██████▍ | 240291/371472 [8:32:11<11:06:49, 3.28it/s] 65%|██████▍ | 240292/371472 [8:32:11<12:02:59, 3.02it/s] 65%|██████▍ | 240293/371472 [8:32:12<12:03:11, 3.02it/s] 65%|██████▍ | 240294/371472 [8:32:12<12:13:27, 2.98it/s] 65%|██████▍ | 240295/371472 [8:32:12<12:15:26, 2.97it/s] 65%|██████▍ | 240296/371472 [8:32:13<11:37:04, 3.14it/s] 65%|██████▍ | 240297/371472 [8:32:13<11:36:38, 3.14it/s] 65%|██████▍ | 240298/371472 [8:32:13<11:17:10, 3.23it/s] 65%|██████▍ | 240299/371472 [8:32:14<11:27:05, 3.18it/s] 65%|██████▍ | 240300/371472 [8:32:14<11:08:50, 3.27it/s] {'loss': 2.7693, 'learning_rate': 4.179738843758754e-07, 'epoch': 10.35} + 65%|██████▍ | 240300/371472 [8:32:14<11:08:50, 3.27it/s] 65%|██████▍ | 240301/371472 [8:32:14<10:58:04, 3.32it/s] 65%|██████▍ | 240302/371472 [8:32:15<12:29:48, 2.92it/s] 65%|██████▍ | 240303/371472 [8:32:15<11:46:48, 3.09it/s] 65%|██████▍ | 240304/371472 [8:32:15<11:18:23, 3.22it/s] 65%|██████▍ | 240305/371472 [8:32:15<10:51:08, 3.36it/s] 65%|██████▍ | 240306/371472 [8:32:16<10:34:08, 3.45it/s] 65%|██████▍ | 240307/371472 [8:32:16<11:15:55, 3.23it/s] 65%|██████▍ | 240308/371472 [8:32:16<11:27:44, 3.18it/s] 65%|██████▍ | 240309/371472 [8:32:17<11:16:38, 3.23it/s] 65%|██████▍ | 240310/371472 [8:32:17<11:04:42, 3.29it/s] 65%|██████▍ | 240311/371472 [8:32:17<10:47:20, 3.38it/s] 65%|██████▍ | 240312/371472 [8:32:18<11:00:53, 3.31it/s] 65%|██████▍ | 240313/371472 [8:32:18<11:11:47, 3.25it/s] 65%|██████▍ | 240314/371472 [8:32:18<11:11:08, 3.26it/s] 65%|██████▍ | 240315/371472 [8:32:18<10:53:25, 3.35it/s] 65%|██████▍ | 240316/371472 [8:32:19<10:50:01, 3.36it/s] 65%|██████▍ | 240317/371472 [8:32:19<10:48:51, 3.37it/s] 65%|██████▍ | 240318/371472 [8:32:19<10:56:49, 3.33it/s] 65%|██████▍ | 240319/371472 [8:32:20<10:49:23, 3.37it/s] 65%|██████▍ | 240320/371472 [8:32:20<10:30:49, 3.47it/s] {'loss': 2.6307, 'learning_rate': 4.179254024003964e-07, 'epoch': 10.35} + 65%|██████▍ | 240320/371472 [8:32:20<10:30:49, 3.47it/s] 65%|██████▍ | 240321/371472 [8:32:20<10:35:04, 3.44it/s] 65%|██████▍ | 240322/371472 [8:32:21<11:11:16, 3.26it/s] 65%|██████▍ | 240323/371472 [8:32:21<11:02:33, 3.30it/s] 65%|██████▍ | 240324/371472 [8:32:21<10:58:51, 3.32it/s] 65%|██████▍ | 240325/371472 [8:32:21<10:37:16, 3.43it/s] 65%|██████▍ | 240326/371472 [8:32:22<10:29:42, 3.47it/s] 65%|██████▍ | 240327/371472 [8:32:22<10:25:05, 3.50it/s] 65%|██████▍ | 240328/371472 [8:32:22<10:05:35, 3.61it/s] 65%|██████▍ | 240329/371472 [8:32:23<10:21:35, 3.52it/s] 65%|██████▍ | 240330/371472 [8:32:23<10:31:35, 3.46it/s] 65%|██████▍ | 240331/371472 [8:32:23<10:54:21, 3.34it/s] 65%|██████▍ | 240332/371472 [8:32:23<10:57:08, 3.33it/s] 65%|██████▍ | 240333/371472 [8:32:24<11:05:03, 3.29it/s] 65%|██████▍ | 240334/371472 [8:32:24<10:53:07, 3.35it/s] 65%|██████▍ | 240335/371472 [8:32:24<10:38:59, 3.42it/s] 65%|██████▍ | 240336/371472 [8:32:25<10:49:15, 3.37it/s] 65%|██████▍ | 240337/371472 [8:32:25<10:28:21, 3.48it/s] 65%|██████▍ | 240338/371472 [8:32:25<10:17:17, 3.54it/s] 65%|██████▍ | 240339/371472 [8:32:25<10:05:52, 3.61it/s] 65%|██████▍ | 240340/371472 [8:32:26<11:12:49, 3.25it/s] {'loss': 2.7293, 'learning_rate': 4.178769204249176e-07, 'epoch': 10.35} + 65%|██████▍ | 240340/371472 [8:32:26<11:12:49, 3.25it/s] 65%|██████▍ | 240341/371472 [8:32:26<11:01:13, 3.31it/s] 65%|██████▍ | 240342/371472 [8:32:26<11:19:56, 3.21it/s] 65%|██████▍ | 240343/371472 [8:32:27<11:09:40, 3.26it/s] 65%|██████▍ | 240344/371472 [8:32:27<10:50:59, 3.36it/s] 65%|██████▍ | 240345/371472 [8:32:27<10:43:17, 3.40it/s] 65%|██████▍ | 240346/371472 [8:32:28<10:54:27, 3.34it/s] 65%|██████▍ | 240347/371472 [8:32:28<10:31:34, 3.46it/s] 65%|██████▍ | 240348/371472 [8:32:28<10:18:31, 3.53it/s] 65%|██████▍ | 240349/371472 [8:32:28<10:22:17, 3.51it/s] 65%|██████▍ | 240350/371472 [8:32:29<10:50:09, 3.36it/s] 65%|██████▍ | 240351/371472 [8:32:29<11:17:23, 3.23it/s] 65%|██████▍ | 240352/371472 [8:32:29<10:48:04, 3.37it/s] 65%|██████▍ | 240353/371472 [8:32:30<10:36:40, 3.43it/s] 65%|██████▍ | 240354/371472 [8:32:30<11:35:16, 3.14it/s] 65%|██████▍ | 240355/371472 [8:32:30<11:16:30, 3.23it/s] 65%|██████▍ | 240356/371472 [8:32:31<10:46:20, 3.38it/s] 65%|██████▍ | 240357/371472 [8:32:31<11:23:00, 3.20it/s] 65%|██████▍ | 240358/371472 [8:32:31<11:11:03, 3.26it/s] 65%|██████▍ | 240359/371472 [8:32:32<11:08:12, 3.27it/s] 65%|██████▍ | 240360/371472 [8:32:32<10:44:22, 3.39it/s] {'loss': 2.8753, 'learning_rate': 4.178284384494387e-07, 'epoch': 10.35} + 65%|██████▍ | 240360/371472 [8:32:32<10:44:22, 3.39it/s] 65%|██████▍ | 240361/371472 [8:32:32<10:44:51, 3.39it/s] 65%|██████▍ | 240362/371472 [8:32:32<10:40:36, 3.41it/s] 65%|██████▍ | 240363/371472 [8:32:33<10:35:09, 3.44it/s] 65%|██████▍ | 240364/371472 [8:32:33<10:08:48, 3.59it/s] 65%|██████▍ | 240365/371472 [8:32:33<10:01:33, 3.63it/s] 65%|██████▍ | 240366/371472 [8:32:33<10:02:05, 3.63it/s] 65%|██████▍ | 240367/371472 [8:32:34<10:23:40, 3.50it/s] 65%|██████▍ | 240368/371472 [8:32:34<10:24:07, 3.50it/s] 65%|██████▍ | 240369/371472 [8:32:34<10:37:50, 3.43it/s] 65%|██████▍ | 240370/371472 [8:32:35<10:31:14, 3.46it/s] 65%|██████▍ | 240371/371472 [8:32:35<10:30:00, 3.47it/s] 65%|██████▍ | 240372/371472 [8:32:35<10:29:13, 3.47it/s] 65%|██████▍ | 240373/371472 [8:32:35<10:20:51, 3.52it/s] 65%|██████▍ | 240374/371472 [8:32:36<10:26:32, 3.49it/s] 65%|██████▍ | 240375/371472 [8:32:36<10:34:11, 3.45it/s] 65%|██████▍ | 240376/371472 [8:32:36<11:16:59, 3.23it/s] 65%|██████▍ | 240377/371472 [8:32:37<10:44:33, 3.39it/s] 65%|██████▍ | 240378/371472 [8:32:37<10:38:17, 3.42it/s] 65%|██████▍ | 240379/371472 [8:32:37<10:38:39, 3.42it/s] 65%|██████▍ | 240380/371472 [8:32:38<10:42:27, 3.40it/s] {'loss': 2.6822, 'learning_rate': 4.177799564739598e-07, 'epoch': 10.35} + 65%|██████▍ | 240380/371472 [8:32:38<10:42:27, 3.40it/s] 65%|██████▍ | 240381/371472 [8:32:38<10:29:20, 3.47it/s] 65%|██████▍ | 240382/371472 [8:32:38<11:59:49, 3.04it/s] 65%|██████▍ | 240383/371472 [8:32:39<11:20:03, 3.21it/s] 65%|██████▍ | 240384/371472 [8:32:39<11:36:05, 3.14it/s] 65%|██████▍ | 240385/371472 [8:32:39<11:25:41, 3.19it/s] 65%|██████▍ | 240386/371472 [8:32:39<10:58:54, 3.32it/s] 65%|██████▍ | 240387/371472 [8:32:40<10:39:57, 3.41it/s] 65%|██████▍ | 240388/371472 [8:32:40<10:36:05, 3.43it/s] 65%|██████▍ | 240389/371472 [8:32:40<10:35:00, 3.44it/s] 65%|██████▍ | 240390/371472 [8:32:41<10:31:43, 3.46it/s] 65%|██████▍ | 240391/371472 [8:32:41<11:32:07, 3.16it/s] 65%|██████▍ | 240392/371472 [8:32:41<11:22:21, 3.20it/s] 65%|██████▍ | 240393/371472 [8:32:42<10:52:21, 3.35it/s] 65%|██████▍ | 240394/371472 [8:32:42<10:44:02, 3.39it/s] 65%|██████▍ | 240395/371472 [8:32:42<10:37:52, 3.42it/s] 65%|██████▍ | 240396/371472 [8:32:42<10:29:31, 3.47it/s] 65%|██████▍ | 240397/371472 [8:32:43<10:16:24, 3.54it/s] 65%|██████▍ | 240398/371472 [8:32:43<10:19:10, 3.53it/s] 65%|██████▍ | 240399/371472 [8:32:43<10:32:42, 3.45it/s] 65%|██████▍ | 240400/371472 [8:32:44<10:33:47, 3.45it/s] {'loss': 2.8325, 'learning_rate': 4.1773147449848087e-07, 'epoch': 10.35} + 65%|██████▍ | 240400/371472 [8:32:44<10:33:47, 3.45it/s] 65%|██████▍ | 240401/371472 [8:32:44<10:43:05, 3.40it/s] 65%|██████▍ | 240402/371472 [8:32:44<10:43:14, 3.40it/s] 65%|██████▍ | 240403/371472 [8:32:44<10:47:08, 3.38it/s] 65%|██████▍ | 240404/371472 [8:32:45<10:37:09, 3.43it/s] 65%|██████▍ | 240405/371472 [8:32:45<10:47:34, 3.37it/s] 65%|██████▍ | 240406/371472 [8:32:45<10:36:05, 3.43it/s] 65%|██████▍ | 240407/371472 [8:32:46<11:17:09, 3.23it/s] 65%|██████▍ | 240408/371472 [8:32:46<11:07:15, 3.27it/s] 65%|██████▍ | 240409/371472 [8:32:46<11:05:38, 3.28it/s] 65%|██████▍ | 240410/371472 [8:32:47<10:59:41, 3.31it/s] 65%|██████▍ | 240411/371472 [8:32:47<11:34:10, 3.15it/s] 65%|██████▍ | 240412/371472 [8:32:47<11:32:52, 3.15it/s] 65%|██████▍ | 240413/371472 [8:32:48<11:35:26, 3.14it/s] 65%|██████▍ | 240414/371472 [8:32:48<11:34:13, 3.15it/s] 65%|██████▍ | 240415/371472 [8:32:48<11:23:19, 3.20it/s] 65%|██████▍ | 240416/371472 [8:32:48<11:04:24, 3.29it/s] 65%|██████▍ | 240417/371472 [8:32:49<10:39:16, 3.42it/s] 65%|██████▍ | 240418/371472 [8:32:49<10:25:53, 3.49it/s] 65%|██████▍ | 240419/371472 [8:32:49<11:05:31, 3.28it/s] 65%|██████▍ | 240420/371472 [8:32:50<10:43:52, 3.39it/s] {'loss': 2.6278, 'learning_rate': 4.1768299252300205e-07, 'epoch': 10.36} + 65%|██████▍ | 240420/371472 [8:32:50<10:43:52, 3.39it/s] 65%|██████▍ | 240421/371472 [8:32:50<10:45:38, 3.38it/s] 65%|██████▍ | 240422/371472 [8:32:50<10:40:49, 3.41it/s] 65%|██████▍ | 240423/371472 [8:32:50<10:30:02, 3.47it/s] 65%|██████▍ | 240424/371472 [8:32:51<10:45:56, 3.38it/s] 65%|██████▍ | 240425/371472 [8:32:51<10:39:55, 3.41it/s] 65%|██████▍ | 240426/371472 [8:32:51<10:29:10, 3.47it/s] 65%|██████▍ | 240427/371472 [8:32:52<10:31:44, 3.46it/s] 65%|██████▍ | 240428/371472 [8:32:52<10:32:50, 3.45it/s] 65%|██████▍ | 240429/371472 [8:32:52<10:27:09, 3.48it/s] 65%|██████▍ | 240430/371472 [8:32:52<10:29:17, 3.47it/s] 65%|██████▍ | 240431/371472 [8:32:53<10:36:33, 3.43it/s] 65%|██████▍ | 240432/371472 [8:32:53<10:51:29, 3.35it/s] 65%|██████▍ | 240433/371472 [8:32:53<10:41:02, 3.41it/s] 65%|██████▍ | 240434/371472 [8:32:54<10:25:37, 3.49it/s] 65%|██████▍ | 240435/371472 [8:32:54<10:11:49, 3.57it/s] 65%|██████▍ | 240436/371472 [8:32:54<10:31:09, 3.46it/s] 65%|██████▍ | 240437/371472 [8:32:55<10:34:11, 3.44it/s] 65%|██████▍ | 240438/371472 [8:32:55<11:35:04, 3.14it/s] 65%|██████▍ | 240439/371472 [8:32:55<11:17:18, 3.22it/s] 65%|██████▍ | 240440/371472 [8:32:56<11:47:11, 3.09it/s] {'loss': 2.7377, 'learning_rate': 4.1763451054752307e-07, 'epoch': 10.36} + 65%|██████▍ | 240440/371472 [8:32:56<11:47:11, 3.09it/s] 65%|██████▍ | 240441/371472 [8:32:56<11:41:33, 3.11it/s] 65%|██████▍ | 240442/371472 [8:32:56<11:17:08, 3.23it/s] 65%|██████▍ | 240443/371472 [8:32:56<11:07:34, 3.27it/s] 65%|██████▍ | 240444/371472 [8:32:57<11:04:52, 3.28it/s] 65%|██████▍ | 240445/371472 [8:32:57<10:41:02, 3.41it/s] 65%|██████▍ | 240446/371472 [8:32:57<10:59:39, 3.31it/s] 65%|██████▍ | 240447/371472 [8:32:58<11:03:59, 3.29it/s] 65%|██████▍ | 240448/371472 [8:32:58<10:46:01, 3.38it/s] 65%|██████▍ | 240449/371472 [8:32:58<10:59:48, 3.31it/s] 65%|██████▍ | 240450/371472 [8:32:59<10:53:50, 3.34it/s] 65%|██████▍ | 240451/371472 [8:32:59<10:24:28, 3.50it/s] 65%|██████▍ | 240452/371472 [8:32:59<10:20:29, 3.52it/s] 65%|██████▍ | 240453/371472 [8:32:59<10:50:52, 3.35it/s] 65%|██████▍ | 240454/371472 [8:33:00<10:35:38, 3.44it/s] 65%|██████▍ | 240455/371472 [8:33:00<10:38:21, 3.42it/s] 65%|██████▍ | 240456/371472 [8:33:00<10:42:41, 3.40it/s] 65%|██████▍ | 240457/371472 [8:33:01<10:52:45, 3.35it/s] 65%|██████▍ | 240458/371472 [8:33:01<11:33:01, 3.15it/s] 65%|██████▍ | 240459/371472 [8:33:01<10:59:42, 3.31it/s] 65%|██████▍ | 240460/371472 [8:33:01<10:43:04, 3.40it/s] {'loss': 3.0039, 'learning_rate': 4.1758602857204424e-07, 'epoch': 10.36} + 65%|██████▍ | 240460/371472 [8:33:01<10:43:04, 3.40it/s] 65%|██████▍ | 240461/371472 [8:33:02<10:50:08, 3.36it/s] 65%|██████▍ | 240462/371472 [8:33:02<10:30:45, 3.46it/s] 65%|██████▍ | 240463/371472 [8:33:02<10:34:53, 3.44it/s] 65%|██████▍ | 240464/371472 [8:33:03<10:55:00, 3.33it/s] 65%|██████▍ | 240465/371472 [8:33:03<10:38:26, 3.42it/s] 65%|██████▍ | 240466/371472 [8:33:03<10:38:59, 3.42it/s] 65%|██████▍ | 240467/371472 [8:33:04<10:35:41, 3.43it/s] 65%|██████▍ | 240468/371472 [8:33:04<12:24:11, 2.93it/s] 65%|██████▍ | 240469/371472 [8:33:04<11:58:30, 3.04it/s] 65%|██████▍ | 240470/371472 [8:33:05<11:17:59, 3.22it/s] 65%|██████▍ | 240471/371472 [8:33:05<11:03:21, 3.29it/s] 65%|██████▍ | 240472/371472 [8:33:05<10:56:24, 3.33it/s] 65%|██████▍ | 240473/371472 [8:33:05<10:48:12, 3.37it/s] 65%|██████▍ | 240474/371472 [8:33:06<10:35:05, 3.44it/s] 65%|██████▍ | 240475/371472 [8:33:06<10:52:36, 3.35it/s] 65%|██████▍ | 240476/371472 [8:33:06<10:46:16, 3.38it/s] 65%|██████▍ | 240477/371472 [8:33:07<10:31:07, 3.46it/s] 65%|██████▍ | 240478/371472 [8:33:07<10:32:43, 3.45it/s] 65%|██████▍ | 240479/371472 [8:33:07<10:39:53, 3.41it/s] 65%|██████▍ | 240480/371472 [8:33:07<10:43:39, 3.39it/s] {'loss': 2.7882, 'learning_rate': 4.175375465965653e-07, 'epoch': 10.36} + 65%|██████▍ | 240480/371472 [8:33:07<10:43:39, 3.39it/s] 65%|██████▍ | 240481/371472 [8:33:08<10:44:36, 3.39it/s] 65%|██████▍ | 240482/371472 [8:33:08<10:46:34, 3.38it/s] 65%|██████▍ | 240483/371472 [8:33:08<10:36:34, 3.43it/s] 65%|██████▍ | 240484/371472 [8:33:09<11:13:39, 3.24it/s] 65%|██████▍ | 240485/371472 [8:33:09<10:49:22, 3.36it/s] 65%|██████▍ | 240486/371472 [8:33:09<11:08:25, 3.27it/s] 65%|██████▍ | 240487/371472 [8:33:10<11:03:25, 3.29it/s] 65%|██████▍ | 240488/371472 [8:33:10<10:53:06, 3.34it/s] 65%|██████▍ | 240489/371472 [8:33:10<10:40:46, 3.41it/s] 65%|██████▍ | 240490/371472 [8:33:10<10:52:05, 3.35it/s] 65%|██████▍ | 240491/371472 [8:33:11<10:51:28, 3.35it/s] 65%|██████▍ | 240492/371472 [8:33:11<10:40:32, 3.41it/s] 65%|██████▍ | 240493/371472 [8:33:11<10:25:50, 3.49it/s] 65%|██████▍ | 240494/371472 [8:33:12<10:30:15, 3.46it/s] 65%|██████▍ | 240495/371472 [8:33:12<10:55:57, 3.33it/s] 65%|██████▍ | 240496/371472 [8:33:12<10:45:42, 3.38it/s] 65%|██████▍ | 240497/371472 [8:33:12<10:25:02, 3.49it/s] 65%|██████▍ | 240498/371472 [8:33:13<10:22:35, 3.51it/s] 65%|██████▍ | 240499/371472 [8:33:13<10:51:57, 3.35it/s] 65%|██████▍ | 240500/371472 [8:33:13<11:40:17, 3.12it/s] {'loss': 2.7133, 'learning_rate': 4.174890646210864e-07, 'epoch': 10.36} + 65%|██████▍ | 240500/371472 [8:33:13<11:40:17, 3.12it/s] 65%|██████▍ | 240501/371472 [8:33:14<11:12:53, 3.24it/s] 65%|██████▍ | 240502/371472 [8:33:14<11:29:15, 3.17it/s] 65%|██████▍ | 240503/371472 [8:33:14<11:07:28, 3.27it/s] 65%|██████▍ | 240504/371472 [8:33:15<10:35:25, 3.44it/s] 65%|██████▍ | 240505/371472 [8:33:15<10:31:27, 3.46it/s] 65%|██████▍ | 240506/371472 [8:33:15<10:28:50, 3.47it/s] 65%|██████▍ | 240507/371472 [8:33:15<10:25:59, 3.49it/s] 65%|██████▍ | 240508/371472 [8:33:16<11:33:58, 3.15it/s] 65%|██████▍ | 240509/371472 [8:33:16<11:06:33, 3.27it/s] 65%|██████▍ | 240510/371472 [8:33:16<10:41:08, 3.40it/s] 65%|██████▍ | 240511/371472 [8:33:17<10:28:34, 3.47it/s] 65%|██████▍ | 240512/371472 [8:33:17<10:28:55, 3.47it/s] 65%|██████▍ | 240513/371472 [8:33:17<10:19:15, 3.52it/s] 65%|██████▍ | 240514/371472 [8:33:18<10:17:50, 3.53it/s] 65%|██████▍ | 240515/371472 [8:33:18<10:09:38, 3.58it/s] 65%|██████▍ | 240516/371472 [8:33:18<10:20:19, 3.52it/s] 65%|██████▍ | 240517/371472 [8:33:18<11:09:25, 3.26it/s] 65%|██████▍ | 240518/371472 [8:33:19<10:46:14, 3.38it/s] 65%|██████▍ | 240519/371472 [8:33:19<10:55:19, 3.33it/s] 65%|██████▍ | 240520/371472 [8:33:19<10:59:47, 3.31it/s] {'loss': 2.6893, 'learning_rate': 4.174405826456075e-07, 'epoch': 10.36} + 65%|██████▍ | 240520/371472 [8:33:19<10:59:47, 3.31it/s] 65%|██████▍ | 240521/371472 [8:33:20<11:12:06, 3.25it/s] 65%|██████▍ | 240522/371472 [8:33:20<11:16:36, 3.23it/s] 65%|██████▍ | 240523/371472 [8:33:20<10:59:37, 3.31it/s] 65%|██████▍ | 240524/371472 [8:33:21<10:47:09, 3.37it/s] 65%|██████▍ | 240525/371472 [8:33:21<10:49:53, 3.36it/s] 65%|██████▍ | 240526/371472 [8:33:21<11:20:52, 3.21it/s] 65%|██████▍ | 240527/371472 [8:33:21<10:56:37, 3.32it/s] 65%|██████▍ | 240528/371472 [8:33:22<10:39:03, 3.41it/s] 65%|██████▍ | 240529/371472 [8:33:22<11:00:51, 3.30it/s] 65%|██████▍ | 240530/371472 [8:33:22<10:37:19, 3.42it/s] 65%|██████▍ | 240531/371472 [8:33:23<10:31:47, 3.45it/s] 65%|██████▍ | 240532/371472 [8:33:23<10:26:07, 3.49it/s] 65%|██████▍ | 240533/371472 [8:33:23<11:26:15, 3.18it/s] 65%|██████▍ | 240534/371472 [8:33:24<11:08:51, 3.26it/s] 65%|██████▍ | 240535/371472 [8:33:24<10:57:24, 3.32it/s] 65%|██████▍ | 240536/371472 [8:33:24<10:45:26, 3.38it/s] 65%|██████▍ | 240537/371472 [8:33:24<10:35:58, 3.43it/s] 65%|██████▍ | 240538/371472 [8:33:25<11:22:31, 3.20it/s] 65%|██████▍ | 240539/371472 [8:33:25<10:52:39, 3.34it/s] 65%|██████▍ | 240540/371472 [8:33:25<10:47:29, 3.37it/s] {'loss': 2.7598, 'learning_rate': 4.173921006701286e-07, 'epoch': 10.36} + 65%|██████▍ | 240540/371472 [8:33:25<10:47:29, 3.37it/s] 65%|██████▍ | 240541/371472 [8:33:26<10:50:36, 3.35it/s] 65%|██████▍ | 240542/371472 [8:33:26<10:42:33, 3.40it/s] 65%|██████▍ | 240543/371472 [8:33:26<11:19:03, 3.21it/s] 65%|██████▍ | 240544/371472 [8:33:27<11:50:36, 3.07it/s] 65%|██████▍ | 240545/371472 [8:33:27<11:58:45, 3.04it/s] 65%|██████▍ | 240546/371472 [8:33:27<11:47:05, 3.09it/s] 65%|██████▍ | 240547/371472 [8:33:28<11:33:09, 3.15it/s] 65%|██████▍ | 240548/371472 [8:33:28<11:02:49, 3.29it/s] 65%|██████▍ | 240549/371472 [8:33:28<11:08:10, 3.27it/s] 65%|██████▍ | 240550/371472 [8:33:28<11:00:32, 3.30it/s] 65%|██████▍ | 240551/371472 [8:33:29<10:57:21, 3.32it/s] 65%|██████▍ | 240552/371472 [8:33:29<10:32:19, 3.45it/s] 65%|██████▍ | 240553/371472 [8:33:29<11:25:46, 3.18it/s] 65%|██████▍ | 240554/371472 [8:33:30<10:53:09, 3.34it/s] 65%|██████▍ | 240555/371472 [8:33:30<12:57:59, 2.80it/s] 65%|██████▍ | 240556/371472 [8:33:30<12:03:17, 3.02it/s] 65%|██████▍ | 240557/371472 [8:33:31<11:20:26, 3.21it/s] 65%|██████▍ | 240558/371472 [8:33:31<11:51:57, 3.06it/s] 65%|██████▍ | 240559/371472 [8:33:31<11:27:14, 3.17it/s] 65%|██████▍ | 240560/371472 [8:33:32<11:28:01, 3.17it/s] {'loss': 2.8027, 'learning_rate': 4.1734361869464976e-07, 'epoch': 10.36} + 65%|██████▍ | 240560/371472 [8:33:32<11:28:01, 3.17it/s] 65%|██████▍ | 240561/371472 [8:33:32<11:02:58, 3.29it/s] 65%|██████▍ | 240562/371472 [8:33:32<10:38:13, 3.42it/s] 65%|██████▍ | 240563/371472 [8:33:32<10:43:18, 3.39it/s] 65%|██████▍ | 240564/371472 [8:33:33<10:48:27, 3.36it/s] 65%|██████▍ | 240565/371472 [8:33:33<10:26:46, 3.48it/s] 65%|██████▍ | 240566/371472 [8:33:33<11:31:55, 3.15it/s] 65%|██████▍ | 240567/371472 [8:33:34<11:06:10, 3.28it/s] 65%|██████▍ | 240568/371472 [8:33:34<11:07:49, 3.27it/s] 65%|██████▍ | 240569/371472 [8:33:34<10:54:12, 3.33it/s] 65%|██████▍ | 240570/371472 [8:33:35<10:43:14, 3.39it/s] 65%|██████▍ | 240571/371472 [8:33:35<10:25:09, 3.49it/s] 65%|██████▍ | 240572/371472 [8:33:35<10:27:26, 3.48it/s] 65%|██████▍ | 240573/371472 [8:33:35<10:13:00, 3.56it/s] 65%|██████▍ | 240574/371472 [8:33:36<10:11:00, 3.57it/s] 65%|██████▍ | 240575/371472 [8:33:36<10:18:02, 3.53it/s] 65%|██████▍ | 240576/371472 [8:33:36<11:19:08, 3.21it/s] 65%|██████▍ | 240577/371472 [8:33:37<10:57:30, 3.32it/s] 65%|██████▍ | 240578/371472 [8:33:37<11:02:15, 3.29it/s] 65%|██████▍ | 240579/371472 [8:33:37<11:11:31, 3.25it/s] 65%|██████▍ | 240580/371472 [8:33:38<11:16:53, 3.22it/s] {'loss': 2.834, 'learning_rate': 4.172951367191708e-07, 'epoch': 10.36} + 65%|██████▍ | 240580/371472 [8:33:38<11:16:53, 3.22it/s] 65%|██████▍ | 240581/371472 [8:33:38<11:02:24, 3.29it/s] 65%|██████▍ | 240582/371472 [8:33:38<11:08:08, 3.27it/s] 65%|██████▍ | 240583/371472 [8:33:38<11:01:36, 3.30it/s] 65%|██████▍ | 240584/371472 [8:33:39<10:47:55, 3.37it/s] 65%|██████▍ | 240585/371472 [8:33:39<11:01:19, 3.30it/s] 65%|██████▍ | 240586/371472 [8:33:39<11:20:39, 3.20it/s] 65%|██████▍ | 240587/371472 [8:33:40<10:55:31, 3.33it/s] 65%|██████▍ | 240588/371472 [8:33:40<11:00:57, 3.30it/s] 65%|██████▍ | 240589/371472 [8:33:40<10:52:38, 3.34it/s] 65%|██████▍ | 240590/371472 [8:33:41<10:59:45, 3.31it/s] 65%|██████▍ | 240591/371472 [8:33:41<10:37:25, 3.42it/s] 65%|██████▍ | 240592/371472 [8:33:41<10:35:45, 3.43it/s] 65%|██████▍ | 240593/371472 [8:33:41<10:35:31, 3.43it/s] 65%|██████▍ | 240594/371472 [8:33:42<10:25:51, 3.49it/s] 65%|██████▍ | 240595/371472 [8:33:42<10:20:01, 3.52it/s] 65%|██████▍ | 240596/371472 [8:33:42<10:27:42, 3.48it/s] 65%|██████▍ | 240597/371472 [8:33:43<10:43:49, 3.39it/s] 65%|██████▍ | 240598/371472 [8:33:43<10:29:24, 3.47it/s] 65%|██████▍ | 240599/371472 [8:33:43<10:18:59, 3.52it/s] 65%|██████▍ | 240600/371472 [8:33:43<10:25:56, 3.48it/s] {'loss': 2.6183, 'learning_rate': 4.1724665474369196e-07, 'epoch': 10.36} + 65%|██████▍ | 240600/371472 [8:33:43<10:25:56, 3.48it/s] 65%|██████▍ | 240601/371472 [8:33:44<10:16:50, 3.54it/s] 65%|██████▍ | 240602/371472 [8:33:44<10:02:42, 3.62it/s] 65%|██████▍ | 240603/371472 [8:33:44<11:07:44, 3.27it/s] 65%|██████▍ | 240604/371472 [8:33:45<11:04:13, 3.28it/s] 65%|██████▍ | 240605/371472 [8:33:45<10:52:24, 3.34it/s] 65%|██████▍ | 240606/371472 [8:33:45<10:35:33, 3.43it/s] 65%|██████▍ | 240607/371472 [8:33:46<10:32:25, 3.45it/s] 65%|██████▍ | 240608/371472 [8:33:46<10:29:11, 3.47it/s] 65%|██████▍ | 240609/371472 [8:33:46<10:26:56, 3.48it/s] 65%|██████▍ | 240610/371472 [8:33:46<10:56:26, 3.32it/s] 65%|██████▍ | 240611/371472 [8:33:47<11:05:54, 3.28it/s] 65%|██████▍ | 240612/371472 [8:33:47<11:18:49, 3.21it/s] 65%|██████▍ | 240613/371472 [8:33:47<11:12:45, 3.24it/s] 65%|████��█▍ | 240614/371472 [8:33:48<10:58:04, 3.31it/s] 65%|██████▍ | 240615/371472 [8:33:48<10:58:32, 3.31it/s] 65%|██████▍ | 240616/371472 [8:33:48<11:10:29, 3.25it/s] 65%|██████▍ | 240617/371472 [8:33:49<10:57:25, 3.32it/s] 65%|██████▍ | 240618/371472 [8:33:49<11:32:35, 3.15it/s] 65%|██████▍ | 240619/371472 [8:33:49<11:15:33, 3.23it/s] 65%|██████▍ | 240620/371472 [8:33:49<10:44:11, 3.39it/s] {'loss': 2.695, 'learning_rate': 4.1719817276821303e-07, 'epoch': 10.36} + 65%|██████▍ | 240620/371472 [8:33:49<10:44:11, 3.39it/s] 65%|██████▍ | 240621/371472 [8:33:50<10:32:00, 3.45it/s] 65%|██████▍ | 240622/371472 [8:33:50<10:29:12, 3.47it/s] 65%|██████▍ | 240623/371472 [8:33:50<10:09:34, 3.58it/s] 65%|██████▍ | 240624/371472 [8:33:51<10:11:46, 3.56it/s] 65%|██████▍ | 240625/371472 [8:33:51<10:05:24, 3.60it/s] 65%|██████▍ | 240626/371472 [8:33:51<10:17:18, 3.53it/s] 65%|██████▍ | 240627/371472 [8:33:51<10:08:22, 3.58it/s] 65%|██████▍ | 240628/371472 [8:33:52<10:07:26, 3.59it/s] 65%|██████▍ | 240629/371472 [8:33:52<10:43:16, 3.39it/s] 65%|██████▍ | 240630/371472 [8:33:52<10:59:02, 3.31it/s] 65%|██████▍ | 240631/371472 [8:33:53<10:46:49, 3.37it/s] 65%|██████▍ | 240632/371472 [8:33:53<10:50:50, 3.35it/s] 65%|██████▍ | 240633/371472 [8:33:53<10:44:10, 3.39it/s] 65%|██████▍ | 240634/371472 [8:33:54<10:48:21, 3.36it/s] 65%|██████▍ | 240635/371472 [8:33:54<10:47:26, 3.37it/s] 65%|██████▍ | 240636/371472 [8:33:54<10:39:21, 3.41it/s] 65%|██████▍ | 240637/371472 [8:33:54<11:12:44, 3.24it/s] 65%|██████▍ | 240638/371472 [8:33:55<10:48:22, 3.36it/s] 65%|██████▍ | 240639/371472 [8:33:55<10:32:32, 3.45it/s] 65%|██████▍ | 240640/371472 [8:33:55<10:33:53, 3.44it/s] {'loss': 2.791, 'learning_rate': 4.1714969079273415e-07, 'epoch': 10.36} + 65%|██████▍ | 240640/371472 [8:33:55<10:33:53, 3.44it/s] 65%|██████▍ | 240641/371472 [8:33:56<10:22:36, 3.50it/s] 65%|██████▍ | 240642/371472 [8:33:56<10:21:45, 3.51it/s] 65%|██████▍ | 240643/371472 [8:33:56<10:28:16, 3.47it/s] 65%|██████▍ | 240644/371472 [8:33:56<10:08:55, 3.58it/s] 65%|██████▍ | 240645/371472 [8:33:57<10:05:10, 3.60it/s] 65%|██████▍ | 240646/371472 [8:33:57<10:28:28, 3.47it/s] 65%|██████▍ | 240647/371472 [8:33:57<10:46:28, 3.37it/s] 65%|██████▍ | 240648/371472 [8:33:58<11:00:24, 3.30it/s] 65%|██████▍ | 240649/371472 [8:33:58<11:03:39, 3.29it/s] 65%|██████▍ | 240650/371472 [8:33:58<11:10:45, 3.25it/s] 65%|██████▍ | 240651/371472 [8:33:59<11:03:59, 3.28it/s] 65%|██████▍ | 240652/371472 [8:33:59<10:49:20, 3.36it/s] 65%|██████▍ | 240653/371472 [8:33:59<11:06:01, 3.27it/s] 65%|██████▍ | 240654/371472 [8:33:59<11:01:07, 3.30it/s] 65%|██████▍ | 240655/371472 [8:34:00<11:19:41, 3.21it/s] 65%|██████▍ | 240656/371472 [8:34:00<11:19:53, 3.21it/s] 65%|██████▍ | 240657/371472 [8:34:00<11:02:17, 3.29it/s] 65%|██████▍ | 240658/371472 [8:34:01<10:46:29, 3.37it/s] 65%|██████▍ | 240659/371472 [8:34:01<10:34:44, 3.43it/s] 65%|██████▍ | 240660/371472 [8:34:01<10:35:02, 3.43it/s] {'loss': 2.6879, 'learning_rate': 4.171012088172552e-07, 'epoch': 10.37} + 65%|██████▍ | 240660/371472 [8:34:01<10:35:02, 3.43it/s] 65%|██████▍ | 240661/371472 [8:34:01<10:38:04, 3.42it/s] 65%|██████▍ | 240662/371472 [8:34:02<10:53:22, 3.34it/s] 65%|██████▍ | 240663/371472 [8:34:02<10:36:12, 3.43it/s] 65%|██████▍ | 240664/371472 [8:34:02<10:24:59, 3.49it/s] 65%|██████▍ | 240665/371472 [8:34:03<10:13:07, 3.56it/s] 65%|██████▍ | 240666/371472 [8:34:03<10:08:14, 3.58it/s] 65%|██████▍ | 240667/371472 [8:34:03<10:31:15, 3.45it/s] 65%|██████▍ | 240668/371472 [8:34:04<10:27:08, 3.48it/s] 65%|██████▍ | 240669/371472 [8:34:04<10:34:44, 3.43it/s] 65%|██████▍ | 240670/371472 [8:34:04<10:24:17, 3.49it/s] 65%|██████▍ | 240671/371472 [8:34:04<10:14:01, 3.55it/s] 65%|██████▍ | 240672/371472 [8:34:05<10:16:49, 3.53it/s] 65%|██████▍ | 240673/371472 [8:34:05<10:15:21, 3.54it/s] 65%|���█████▍ | 240674/371472 [8:34:05<10:06:42, 3.59it/s] 65%|██████▍ | 240675/371472 [8:34:05<10:08:32, 3.58it/s] 65%|██████▍ | 240676/371472 [8:34:06<10:15:00, 3.54it/s] 65%|██████▍ | 240677/371472 [8:34:06<10:50:34, 3.35it/s] 65%|██████▍ | 240678/371472 [8:34:06<11:36:56, 3.13it/s] 65%|██████▍ | 240679/371472 [8:34:07<11:13:41, 3.24it/s] 65%|██████▍ | 240680/371472 [8:34:07<11:00:42, 3.30it/s] {'loss': 2.9086, 'learning_rate': 4.170527268417764e-07, 'epoch': 10.37} + 65%|██████▍ | 240680/371472 [8:34:07<11:00:42, 3.30it/s] 65%|██████▍ | 240681/371472 [8:34:07<11:01:42, 3.29it/s] 65%|██████▍ | 240682/371472 [8:34:08<11:13:37, 3.24it/s] 65%|██████▍ | 240683/371472 [8:34:08<11:01:49, 3.29it/s] 65%|██████▍ | 240684/371472 [8:34:08<11:16:35, 3.22it/s] 65%|██████▍ | 240685/371472 [8:34:09<11:15:52, 3.23it/s] 65%|██████▍ | 240686/371472 [8:34:09<11:48:18, 3.08it/s] 65%|██████▍ | 240687/371472 [8:34:09<11:22:59, 3.19it/s] 65%|██████▍ | 240688/371472 [8:34:10<11:09:45, 3.25it/s] 65%|██████▍ | 240689/371472 [8:34:10<10:54:47, 3.33it/s] 65%|██████▍ | 240690/371472 [8:34:10<10:46:11, 3.37it/s] 65%|██████▍ | 240691/371472 [8:34:10<10:31:12, 3.45it/s] 65%|██████▍ | 240692/371472 [8:34:11<10:42:25, 3.39it/s] 65%|██████▍ | 240693/371472 [8:34:11<10:51:35, 3.35it/s] 65%|██████▍ | 240694/371472 [8:34:11<11:09:12, 3.26it/s] 65%|██████▍ | 240695/371472 [8:34:12<11:06:22, 3.27it/s] 65%|██████▍ | 240696/371472 [8:34:12<11:06:18, 3.27it/s] 65%|██████▍ | 240697/371472 [8:34:12<11:04:11, 3.28it/s] 65%|██████▍ | 240698/371472 [8:34:12<10:33:56, 3.44it/s] 65%|██████▍ | 240699/371472 [8:34:13<10:24:25, 3.49it/s] 65%|██████▍ | 240700/371472 [8:34:13<10:17:01, 3.53it/s] {'loss': 2.6636, 'learning_rate': 4.170042448662974e-07, 'epoch': 10.37} + 65%|██████▍ | 240700/371472 [8:34:13<10:17:01, 3.53it/s] 65%|██████▍ | 240701/371472 [8:34:13<10:14:35, 3.55it/s] 65%|██████▍ | 240702/371472 [8:34:14<10:08:09, 3.58it/s] 65%|██████▍ | 240703/371472 [8:34:14<10:08:55, 3.58it/s] 65%|██████▍ | 240704/371472 [8:34:14<11:18:25, 3.21it/s] 65%|██████▍ | 240705/371472 [8:34:15<11:01:11, 3.30it/s] 65%|██████▍ | 240706/371472 [8:34:15<11:23:20, 3.19it/s] 65%|██████▍ | 240707/371472 [8:34:15<12:48:15, 2.84it/s] 65%|██████▍ | 240708/371472 [8:34:16<12:34:56, 2.89it/s] 65%|██████▍ | 240709/371472 [8:34:16<12:22:04, 2.94it/s] 65%|██████▍ | 240710/371472 [8:34:16<12:01:03, 3.02it/s] 65%|██████▍ | 240711/371472 [8:34:17<11:16:30, 3.22it/s] 65%|██████▍ | 240712/371472 [8:34:17<10:49:37, 3.35it/s] 65%|██████▍ | 240713/371472 [8:34:17<10:43:14, 3.39it/s] 65%|██████▍ | 240714/371472 [8:34:17<10:21:59, 3.50it/s] 65%|██████▍ | 240715/371472 [8:34:18<10:39:24, 3.41it/s] 65%|██████▍ | 240716/371472 [8:34:18<10:53:11, 3.34it/s] 65%|██████▍ | 240717/371472 [8:34:18<11:00:38, 3.30it/s] 65%|██████▍ | 240718/371472 [8:34:19<11:04:02, 3.28it/s] 65%|██████▍ | 240719/371472 [8:34:19<11:44:17, 3.09it/s] 65%|██████▍ | 240720/371472 [8:34:19<11:22:48, 3.19it/s] {'loss': 2.6825, 'learning_rate': 4.169557628908186e-07, 'epoch': 10.37} + 65%|██████▍ | 240720/371472 [8:34:19<11:22:48, 3.19it/s] 65%|██████▍ | 240721/371472 [8:34:20<11:26:15, 3.18it/s] 65%|██████▍ | 240722/371472 [8:34:20<11:03:18, 3.29it/s] 65%|██████▍ | 240723/371472 [8:34:20<11:13:49, 3.23it/s] 65%|██████▍ | 240724/371472 [8:34:20<10:53:49, 3.33it/s] 65%|██████▍ | 240725/371472 [8:34:21<10:53:35, 3.33it/s] 65%|██████▍ | 240726/371472 [8:34:21<10:55:32, 3.32it/s] 65%|██████▍ | 240727/371472 [8:34:21<11:00:07, 3.30it/s] 65%|██████▍ | 240728/371472 [8:34:22<10:58:41, 3.31it/s] 65%|██████▍ | 240729/371472 [8:34:22<10:59:47, 3.30it/s] 65%|██████▍ | 240730/371472 [8:34:22<10:37:50, 3.42it/s] 65%|██████▍ | 240731/371472 [8:34:23<11:04:18, 3.28it/s] 65%|██████▍ | 240732/371472 [8:34:23<10:55:58, 3.32it/s] 65%|██████▍ | 240733/371472 [8:34:23<10:42:18, 3.39it/s] 65%|██████▍ | 240734/371472 [8:34:24<11:17:30, 3.22it/s] 65%|██████▍ | 240735/371472 [8:34:24<10:48:07, 3.36it/s] 65%|██████▍ | 240736/371472 [8:34:24<10:25:37, 3.48it/s] 65%|██████▍ | 240737/371472 [8:34:24<10:33:32, 3.44it/s] 65%|██████▍ | 240738/371472 [8:34:25<10:22:43, 3.50it/s] 65%|██████▍ | 240739/371472 [8:34:25<10:18:04, 3.53it/s] 65%|██████▍ | 240740/371472 [8:34:25<10:28:48, 3.47it/s] {'loss': 2.8097, 'learning_rate': 4.1690728091533967e-07, 'epoch': 10.37} + 65%|██████▍ | 240740/371472 [8:34:25<10:28:48, 3.47it/s] 65%|██████▍ | 240741/371472 [8:34:26<10:52:45, 3.34it/s] 65%|██████▍ | 240742/371472 [8:34:26<10:46:13, 3.37it/s] 65%|██████▍ | 240743/371472 [8:34:26<11:21:45, 3.20it/s] 65%|██████▍ | 240744/371472 [8:34:26<10:54:54, 3.33it/s] 65%|██████▍ | 240745/371472 [8:34:27<10:44:17, 3.38it/s] 65%|██████▍ | 240746/371472 [8:34:27<10:59:53, 3.30it/s] 65%|██████▍ | 240747/371472 [8:34:27<11:42:52, 3.10it/s] 65%|██████▍ | 240748/371472 [8:34:28<11:53:54, 3.05it/s] 65%|██████▍ | 240749/371472 [8:34:28<11:26:18, 3.17it/s] 65%|██████▍ | 240750/371472 [8:34:28<11:09:05, 3.26it/s] 65%|██████▍ | 240751/371472 [8:34:29<11:00:27, 3.30it/s] 65%|██████▍ | 240752/371472 [8:34:29<10:59:13, 3.30it/s] 65%|██████▍ | 240753/371472 [8:34:29<11:18:28, 3.21it/s] 65%|██████▍ | 240754/371472 [8:34:30<11:24:04, 3.18it/s] 65%|██████▍ | 240755/371472 [8:34:30<10:52:35, 3.34it/s] 65%|██████▍ | 240756/371472 [8:34:30<10:36:51, 3.42it/s] 65%|██████▍ | 240757/371472 [8:34:30<11:34:30, 3.14it/s] 65%|██████▍ | 240758/371472 [8:34:31<11:35:15, 3.13it/s] 65%|██████▍ | 240759/371472 [8:34:31<11:48:45, 3.07it/s] 65%|██████▍ | 240760/371472 [8:34:31<11:28:48, 3.16it/s] {'loss': 2.5656, 'learning_rate': 4.168587989398608e-07, 'epoch': 10.37} + 65%|██████▍ | 240760/371472 [8:34:31<11:28:48, 3.16it/s] 65%|██████▍ | 240761/371472 [8:34:32<11:36:06, 3.13it/s] 65%|██████▍ | 240762/371472 [8:34:32<11:28:44, 3.16it/s] 65%|██████▍ | 240763/371472 [8:34:32<11:45:40, 3.09it/s] 65%|██████▍ | 240764/371472 [8:34:33<11:34:41, 3.14it/s] 65%|██████▍ | 240765/371472 [8:34:33<11:19:30, 3.21it/s] 65%|██████▍ | 240766/371472 [8:34:33<11:19:59, 3.20it/s] 65%|██████▍ | 240767/371472 [8:34:34<11:24:58, 3.18it/s] 65%|██████▍ | 240768/371472 [8:34:34<10:54:01, 3.33it/s] 65%|██████▍ | 240769/371472 [8:34:34<10:51:02, 3.35it/s] 65%|██████▍ | 240770/371472 [8:34:34<10:40:52, 3.40it/s] 65%|██████▍ | 240771/371472 [8:34:35<10:21:26, 3.51it/s] 65%|██████▍ | 240772/371472 [8:34:35<10:39:10, 3.41it/s] 65%|██████▍ | 240773/371472 [8:34:35<10:40:00, 3.40it/s] 65%|██████▍ | 240774/371472 [8:34:36<10:34:44, 3.43it/s] 65%|██████▍ | 240775/371472 [8:34:36<10:46:31, 3.37it/s] 65%|██████▍ | 240776/371472 [8:34:36<10:37:08, 3.42it/s] 65%|██████▍ | 240777/371472 [8:34:37<10:27:41, 3.47it/s] 65%|██████▍ | 240778/371472 [8:34:37<10:51:09, 3.35it/s] 65%|██████▍ | 240779/371472 [8:34:37<11:16:23, 3.22it/s] 65%|██████▍ | 240780/371472 [8:34:37<11:18:45, 3.21it/s] {'loss': 2.8078, 'learning_rate': 4.1681031696438186e-07, 'epoch': 10.37} + 65%|██████▍ | 240780/371472 [8:34:37<11:18:45, 3.21it/s] 65%|██████▍ | 240781/371472 [8:34:38<11:06:58, 3.27it/s] 65%|██████▍ | 240782/371472 [8:34:38<10:52:05, 3.34it/s] 65%|██████▍ | 240783/371472 [8:34:38<11:00:06, 3.30it/s] 65%|██████▍ | 240784/371472 [8:34:39<10:56:36, 3.32it/s] 65%|██████▍ | 240785/371472 [8:34:39<10:50:11, 3.35it/s] 65%|██████▍ | 240786/371472 [8:34:39<10:51:29, 3.34it/s] 65%|██████▍ | 240787/371472 [8:34:40<10:33:11, 3.44it/s] 65%|██████▍ | 240788/371472 [8:34:40<10:14:08, 3.55it/s] 65%|██████▍ | 240789/371472 [8:34:40<10:21:11, 3.51it/s] 65%|██████▍ | 240790/371472 [8:34:40<10:17:15, 3.53it/s] 65%|██████▍ | 240791/371472 [8:34:41<9:57:35, 3.64it/s] 65%|██████▍ | 240792/371472 [8:34:41<10:16:50, 3.53it/s] 65%|██████▍ | 240793/371472 [8:34:41<10:12:26, 3.56it/s] 65%|██████▍ | 240794/371472 [8:34:42<10:34:56, 3.43it/s] 65%|██████▍ | 240795/371472 [8:34:42<11:04:37, 3.28it/s] 65%|██████▍ | 240796/371472 [8:34:42<10:45:38, 3.37it/s] 65%|██████▍ | 240797/371472 [8:34:42<10:33:28, 3.44it/s] 65%|██████▍ | 240798/371472 [8:34:43<10:49:31, 3.35it/s] 65%|██████▍ | 240799/371472 [8:34:43<10:44:29, 3.38it/s] 65%|██████▍ | 240800/371472 [8:34:43<11:02:51, 3.29it/s] {'loss': 2.8529, 'learning_rate': 4.1676183498890304e-07, 'epoch': 10.37} + 65%|██████▍ | 240800/371472 [8:34:43<11:02:51, 3.29it/s] 65%|██████▍ | 240801/371472 [8:34:44<10:57:07, 3.31it/s] 65%|██████▍ | 240802/371472 [8:34:44<10:45:26, 3.37it/s] 65%|██████▍ | 240803/371472 [8:34:44<11:06:36, 3.27it/s] 65%|██████▍ | 240804/371472 [8:34:45<12:12:21, 2.97it/s] 65%|██████▍ | 240805/371472 [8:34:45<11:28:59, 3.16it/s] 65%|██████▍ | 240806/371472 [8:34:45<11:24:56, 3.18it/s] 65%|██████▍ | 240807/371472 [8:34:46<11:43:50, 3.09it/s] 65%|██████▍ | 240808/371472 [8:34:46<11:09:54, 3.25it/s] 65%|██████▍ | 240809/371472 [8:34:46<11:13:21, 3.23it/s] 65%|██████▍ | 240810/371472 [8:34:46<11:02:46, 3.29it/s] 65%|██████▍ | 240811/371472 [8:34:47<10:27:18, 3.47it/s] 65%|██████▍ | 240812/371472 [8:34:47<10:19:28, 3.52it/s] 65%|██████▍ | 240813/371472 [8:34:47<10:57:06, 3.31it/s] 65%|██████▍ | 240814/371472 [8:34:48<10:57:08, 3.31it/s] 65%|██████▍ | 240815/371472 [8:34:48<10:45:00, 3.38it/s] 65%|██████▍ | 240816/371472 [8:34:48<10:30:19, 3.45it/s] 65%|██████▍ | 240817/371472 [8:34:48<10:37:26, 3.42it/s] 65%|██████▍ | 240818/371472 [8:34:49<10:34:23, 3.43it/s] 65%|██████▍ | 240819/371472 [8:34:49<10:29:24, 3.46it/s] 65%|██████▍ | 240820/371472 [8:34:49<10:35:15, 3.43it/s] {'loss': 2.7072, 'learning_rate': 4.1671335301342406e-07, 'epoch': 10.37} + 65%|██████▍ | 240820/371472 [8:34:49<10:35:15, 3.43it/s] 65%|██████▍ | 240821/371472 [8:34:50<10:13:01, 3.55it/s] 65%|██████▍ | 240822/371472 [8:34:50<10:10:30, 3.57it/s] 65%|██████▍ | 240823/371472 [8:34:50<9:59:29, 3.63it/s] 65%|██████▍ | 240824/371472 [8:34:50<9:46:15, 3.71it/s] 65%|██████▍ | 240825/371472 [8:34:51<10:04:51, 3.60it/s] 65%|██████▍ | 240826/371472 [8:34:51<10:00:44, 3.62it/s] 65%|██████▍ | 240827/371472 [8:34:51<10:11:34, 3.56it/s] 65%|██████▍ | 240828/371472 [8:34:52<10:00:48, 3.62it/s] 65%|██████▍ | 240829/371472 [8:34:52<10:00:11, 3.63it/s] 65%|██████▍ | 240830/371472 [8:34:52<10:03:54, 3.61it/s] 65%|██████▍ | 240831/371472 [8:34:52<9:57:57, 3.64it/s] 65%|██████▍ | 240832/371472 [8:34:53<9:55:36, 3.66it/s] 65%|██████▍ | 240833/371472 [8:34:53<10:04:05, 3.60it/s] 65%|██████▍ | 240834/371472 [8:34:53<10:15:48, 3.54it/s] 65%|██████▍ | 240835/371472 [8:34:54<10:55:24, 3.32it/s] 65%|██████▍ | 240836/371472 [8:34:54<10:53:18, 3.33it/s] 65%|██████▍ | 240837/371472 [8:34:54<10:37:29, 3.42it/s] 65%|██████▍ | 240838/371472 [8:34:54<10:30:09, 3.46it/s] 65%|██████▍ | 240839/371472 [8:34:55<10:30:59, 3.45it/s] 65%|██████▍ | 240840/371472 [8:34:55<11:07:43, 3.26it/s] {'loss': 2.7549, 'learning_rate': 4.1666487103794524e-07, 'epoch': 10.37} + 65%|██████▍ | 240840/371472 [8:34:55<11:07:43, 3.26it/s] 65%|██████▍ | 240841/371472 [8:34:55<10:51:29, 3.34it/s] 65%|██████▍ | 240842/371472 [8:34:56<10:31:09, 3.45it/s] 65%|██████▍ | 240843/371472 [8:34:56<10:56:37, 3.32it/s] 65%|██████▍ | 240844/371472 [8:34:56<10:45:14, 3.37it/s] 65%|██████▍ | 240845/371472 [8:34:57<10:43:17, 3.38it/s] 65%|██████▍ | 240846/371472 [8:34:57<10:53:51, 3.33it/s] 65%|██████▍ | 240847/371472 [8:34:57<10:43:22, 3.38it/s] 65%|██████▍ | 240848/371472 [8:34:57<10:41:27, 3.39it/s] 65%|██████▍ | 240849/371472 [8:34:58<10:33:39, 3.44it/s] 65%|██████▍ | 240850/371472 [8:34:58<10:36:31, 3.42it/s] 65%|██████▍ | 240851/371472 [8:34:58<10:29:33, 3.46it/s] 65%|██████▍ | 240852/371472 [8:34:59<10:56:16, 3.32it/s] 65%|██████▍ | 240853/371472 [8:34:59<10:31:36, 3.45it/s] 65%|██████▍ | 240854/371472 [8:34:59<10:24:21, 3.49it/s] 65%|██████▍ | 240855/371472 [8:34:59<10:23:05, 3.49it/s] 65%|██████▍ | 240856/371472 [8:35:00<10:37:20, 3.42it/s] 65%|██████▍ | 240857/371472 [8:35:00<10:55:00, 3.32it/s] 65%|██████▍ | 240858/371472 [8:35:00<10:52:49, 3.33it/s] 65%|██████▍ | 240859/371472 [8:35:01<12:55:33, 2.81it/s] 65%|██████▍ | 240860/371472 [8:35:01<12:39:45, 2.87it/s] {'loss': 2.6892, 'learning_rate': 4.166163890624663e-07, 'epoch': 10.37} + 65%|██████▍ | 240860/371472 [8:35:01<12:39:45, 2.87it/s] 65%|██████▍ | 240861/371472 [8:35:01<11:51:15, 3.06it/s] 65%|██████▍ | 240862/371472 [8:35:02<11:13:28, 3.23it/s] 65%|██████▍ | 240863/371472 [8:35:02<11:03:09, 3.28it/s] 65%|██████▍ | 240864/371472 [8:35:02<11:05:40, 3.27it/s] 65%|██████▍ | 240865/371472 [8:35:03<11:16:48, 3.22it/s] 65%|██████▍ | 240866/371472 [8:35:03<10:55:20, 3.32it/s] 65%|██████▍ | 240867/371472 [8:35:03<11:16:09, 3.22it/s] 65%|██████▍ | 240868/371472 [8:35:04<10:42:18, 3.39it/s] 65%|██████▍ | 240869/371472 [8:35:04<11:12:54, 3.23it/s] 65%|██████▍ | 240870/371472 [8:35:04<10:49:49, 3.35it/s] 65%|██████▍ | 240871/371472 [8:35:04<11:03:08, 3.28it/s] 65%|██████▍ | 240872/371472 [8:35:05<10:57:24, 3.31it/s] 65%|██████▍ | 240873/371472 [8:35:05<10:48:48, 3.35it/s] 65%|██████▍ | 240874/371472 [8:35:05<10:49:58, 3.35it/s] 65%|██████▍ | 240875/371472 [8:35:06<10:28:31, 3.46it/s] 65%|██████▍ | 240876/371472 [8:35:06<10:16:42, 3.53it/s] 65%|██████▍ | 240877/371472 [8:35:06<10:26:35, 3.47it/s] 65%|██████▍ | 240878/371472 [8:35:06<10:06:01, 3.59it/s] 65%|██████▍ | 240879/371472 [8:35:07<10:25:27, 3.48it/s] 65%|██████▍ | 240880/371472 [8:35:07<10:24:35, 3.48it/s] {'loss': 2.824, 'learning_rate': 4.1656790708698743e-07, 'epoch': 10.38} + 65%|██████▍ | 240880/371472 [8:35:07<10:24:35, 3.48it/s] 65%|██████▍ | 240881/371472 [8:35:07<10:25:19, 3.48it/s] 65%|██████▍ | 240882/371472 [8:35:08<10:56:04, 3.32it/s] 65%|██████▍ | 240883/371472 [8:35:08<10:48:49, 3.35it/s] 65%|██████▍ | 240884/371472 [8:35:08<10:44:47, 3.38it/s] 65%|██████▍ | 240885/371472 [8:35:08<10:33:00, 3.44it/s] 65%|██████▍ | 240886/371472 [8:35:09<10:30:21, 3.45it/s] 65%|██████▍ | 240887/371472 [8:35:09<10:32:15, 3.44it/s] 65%|██████▍ | 240888/371472 [8:35:09<10:21:05, 3.50it/s] 65%|██████▍ | 240889/371472 [8:35:10<10:22:47, 3.49it/s] 65%|██████▍ | 240890/371472 [8:35:10<10:28:12, 3.46it/s] 65%|██████▍ | 240891/371472 [8:35:10<10:25:26, 3.48it/s] 65%|██████▍ | 240892/371472 [8:35:11<10:38:59, 3.41it/s] 65%|██████▍ | 240893/371472 [8:35:11<10:47:31, 3.36it/s] 65%|██████▍ | 240894/371472 [8:35:11<10:29:31, 3.46it/s] 65%|██████▍ | 240895/371472 [8:35:11<10:57:03, 3.31it/s] 65%|██████▍ | 240896/371472 [8:35:12<10:55:28, 3.32it/s] 65%|██████▍ | 240897/371472 [8:35:12<11:11:54, 3.24it/s] 65%|██████▍ | 240898/371472 [8:35:12<11:33:26, 3.14it/s] 65%|██████▍ | 240899/371472 [8:35:13<11:06:52, 3.26it/s] 65%|██████▍ | 240900/371472 [8:35:13<10:51:20, 3.34it/s] {'loss': 2.7393, 'learning_rate': 4.165194251115085e-07, 'epoch': 10.38} + 65%|██████▍ | 240900/371472 [8:35:13<10:51:20, 3.34it/s] 65%|██████▍ | 240901/371472 [8:35:13<10:46:34, 3.37it/s] 65%|██████▍ | 240902/371472 [8:35:14<10:25:04, 3.48it/s] 65%|██████▍ | 240903/371472 [8:35:14<10:26:37, 3.47it/s] 65%|██████▍ | 240904/371472 [8:35:14<10:20:49, 3.51it/s] 65%|██████▍ | 240905/371472 [8:35:14<10:24:21, 3.49it/s] 65%|██████▍ | 240906/371472 [8:35:15<10:21:38, 3.50it/s] 65%|██████▍ | 240907/371472 [8:35:15<10:25:40, 3.48it/s] 65%|██████▍ | 240908/371472 [8:35:15<10:35:24, 3.42it/s] 65%|██████▍ | 240909/371472 [8:35:16<10:37:06, 3.42it/s] 65%|██████▍ | 240910/371472 [8:35:16<10:38:21, 3.41it/s] 65%|██████▍ | 240911/371472 [8:35:16<11:38:00, 3.12it/s] 65%|██████▍ | 240912/371472 [8:35:16<11:11:14, 3.24it/s] 65%|██████▍ | 240913/371472 [8:35:17<10:45:50, 3.37it/s] 65%|██████▍ | 240914/371472 [8:35:17<10:33:39, 3.43it/s] 65%|██████▍ | 240915/371472 [8:35:17<10:35:54, 3.42it/s] 65%|██████▍ | 240916/371472 [8:35:18<10:35:03, 3.43it/s] 65%|██████▍ | 240917/371472 [8:35:18<10:23:22, 3.49it/s] 65%|██████▍ | 240918/371472 [8:35:18<11:03:49, 3.28it/s] 65%|██████▍ | 240919/371472 [8:35:19<10:34:38, 3.43it/s] 65%|██████▍ | 240920/371472 [8:35:19<10:31:20, 3.45it/s] {'loss': 2.8184, 'learning_rate': 4.164709431360297e-07, 'epoch': 10.38} + 65%|██████▍ | 240920/371472 [8:35:19<10:31:20, 3.45it/s] 65%|██████▍ | 240921/371472 [8:35:19<11:06:58, 3.26it/s] 65%|██████▍ | 240922/371472 [8:35:19<11:23:10, 3.18it/s] 65%|██████▍ | 240923/371472 [8:35:20<11:06:57, 3.26it/s] 65%|██████▍ | 240924/371472 [8:35:20<10:39:56, 3.40it/s] 65%|██████▍ | 240925/371472 [8:35:20<10:35:41, 3.42it/s] 65%|██████▍ | 240926/371472 [8:35:21<10:58:39, 3.30it/s] 65%|██████▍ | 240927/371472 [8:35:21<10:47:47, 3.36it/s] 65%|██████▍ | 240928/371472 [8:35:21<10:39:04, 3.40it/s] 65%|██████▍ | 240929/371472 [8:35:22<10:33:50, 3.43it/s] 65%|██████▍ | 240930/371472 [8:35:22<10:13:28, 3.55it/s] 65%|██████▍ | 240931/371472 [8:35:22<10:07:37, 3.58it/s] 65%|██████▍ | 240932/371472 [8:35:22<10:22:02, 3.50it/s] 65%|██████▍ | 240933/371472 [8:35:23<10:51:43, 3.34it/s] 65%|██████▍ | 240934/371472 [8:35:23<11:13:28, 3.23it/s] 65%|██████▍ | 240935/371472 [8:35:23<10:55:53, 3.32it/s] 65%|██████▍ | 240936/371472 [8:35:24<10:37:03, 3.42it/s] 65%|██████▍ | 240937/371472 [8:35:24<10:56:44, 3.31it/s] 65%|██████▍ | 240938/371472 [8:35:24<10:35:18, 3.42it/s] 65%|██████▍ | 240939/371472 [8:35:24<10:34:53, 3.43it/s] 65%|██████▍ | 240940/371472 [8:35:25<10:45:30, 3.37it/s] {'loss': 2.7304, 'learning_rate': 4.1642246116055075e-07, 'epoch': 10.38} + 65%|██████▍ | 240940/371472 [8:35:25<10:45:30, 3.37it/s] 65%|██████▍ | 240941/371472 [8:35:25<10:52:53, 3.33it/s] 65%|██████▍ | 240942/371472 [8:35:25<10:42:17, 3.39it/s] 65%|██████▍ | 240943/371472 [8:35:26<10:55:48, 3.32it/s] 65%|██████▍ | 240944/371472 [8:35:26<13:17:54, 2.73it/s] 65%|██████▍ | 240945/371472 [8:35:26<12:38:11, 2.87it/s] 65%|██████▍ | 240946/371472 [8:35:27<12:02:52, 3.01it/s] 65%|██████▍ | 240947/371472 [8:35:27<12:12:56, 2.97it/s] 65%|██████▍ | 240948/371472 [8:35:27<11:40:31, 3.11it/s] 65%|██████▍ | 240949/371472 [8:35:28<11:11:07, 3.24it/s] 65%|██████▍ | 240950/371472 [8:35:28<10:46:10, 3.37it/s] 65%|██████▍ | 240951/371472 [8:35:28<10:46:36, 3.36it/s] 65%|██████▍ | 240952/371472 [8:35:29<10:32:52, 3.44it/s] 65%|██████▍ | 240953/371472 [8:35:29<10:29:57, 3.45it/s] 65%|██████▍ | 240954/371472 [8:35:29<10:06:57, 3.58it/s] 65%|██████▍ | 240955/371472 [8:35:29<10:03:31, 3.60it/s] 65%|██████▍ | 240956/371472 [8:35:30<10:02:36, 3.61it/s] 65%|██████▍ | 240957/371472 [8:35:30<10:31:15, 3.45it/s] 65%|██████▍ | 240958/371472 [8:35:30<10:17:14, 3.52it/s] 65%|██████▍ | 240959/371472 [8:35:30<10:16:03, 3.53it/s] 65%|██████▍ | 240960/371472 [8:35:31<11:17:16, 3.21it/s] {'loss': 2.7009, 'learning_rate': 4.163739791850719e-07, 'epoch': 10.38} + 65%|██████▍ | 240960/371472 [8:35:31<11:17:16, 3.21it/s] 65%|██████▍ | 240961/371472 [8:35:31<11:12:58, 3.23it/s] 65%|██████▍ | 240962/371472 [8:35:31<11:01:54, 3.29it/s] 65%|██████▍ | 240963/371472 [8:35:32<10:40:33, 3.40it/s] 65%|██████▍ | 240964/371472 [8:35:32<11:14:00, 3.23it/s] 65%|██████▍ | 240965/371472 [8:35:32<10:51:11, 3.34it/s] 65%|██████▍ | 240966/371472 [8:35:33<10:43:34, 3.38it/s] 65%|██████▍ | 240967/371472 [8:35:33<10:21:18, 3.50it/s] 65%|██████▍ | 240968/371472 [8:35:33<10:09:36, 3.57it/s] 65%|██████▍ | 240969/371472 [8:35:34<10:36:44, 3.42it/s] 65%|██████▍ | 240970/371472 [8:35:34<10:23:14, 3.49it/s] 65%|██████▍ | 240971/371472 [8:35:34<10:18:49, 3.51it/s] 65%|██████▍ | 240972/371472 [8:35:34<10:18:32, 3.52it/s] 65%|██���███▍ | 240973/371472 [8:35:35<10:09:53, 3.57it/s] 65%|██████▍ | 240974/371472 [8:35:35<10:18:43, 3.52it/s] 65%|██████▍ | 240975/371472 [8:35:35<10:07:54, 3.58it/s] 65%|██████▍ | 240976/371472 [8:35:35<10:02:34, 3.61it/s] 65%|██████▍ | 240977/371472 [8:35:36<10:25:06, 3.48it/s] 65%|██████▍ | 240978/371472 [8:35:36<10:26:12, 3.47it/s] 65%|██████▍ | 240979/371472 [8:35:36<10:25:17, 3.48it/s] 65%|██████▍ | 240980/371472 [8:35:37<10:10:47, 3.56it/s] {'loss': 2.7764, 'learning_rate': 4.1632549720959295e-07, 'epoch': 10.38} + 65%|██████▍ | 240980/371472 [8:35:37<10:10:47, 3.56it/s] 65%|██████▍ | 240981/371472 [8:35:37<10:43:45, 3.38it/s] 65%|██████▍ | 240982/371472 [8:35:37<10:38:08, 3.41it/s] 65%|██████▍ | 240983/371472 [8:35:38<11:08:07, 3.26it/s] 65%|██████▍ | 240984/371472 [8:35:38<10:59:05, 3.30it/s] 65%|██████▍ | 240985/371472 [8:35:38<10:44:28, 3.37it/s] 65%|██████▍ | 240986/371472 [8:35:38<10:27:51, 3.46it/s] 65%|██████▍ | 240987/371472 [8:35:39<10:20:41, 3.50it/s] 65%|██████▍ | 240988/371472 [8:35:39<10:26:47, 3.47it/s] 65%|██████▍ | 240989/371472 [8:35:39<10:20:34, 3.50it/s] 65%|██████▍ | 240990/371472 [8:35:40<10:33:28, 3.43it/s] 65%|██████▍ | 240991/371472 [8:35:40<10:31:50, 3.44it/s] 65%|██████▍ | 240992/371472 [8:35:40<10:22:52, 3.49it/s] 65%|██████▍ | 240993/371472 [8:35:40<10:24:14, 3.48it/s] 65%|██████▍ | 240994/371472 [8:35:41<10:23:22, 3.49it/s] 65%|██████▍ | 240995/371472 [8:35:41<10:33:44, 3.43it/s] 65%|██████▍ | 240996/371472 [8:35:41<10:41:32, 3.39it/s] 65%|██████▍ | 240997/371472 [8:35:42<10:39:38, 3.40it/s] 65%|██████▍ | 240998/371472 [8:35:42<10:36:32, 3.42it/s] 65%|██████▍ | 240999/371472 [8:35:42<10:37:21, 3.41it/s] 65%|██████▍ | 241000/371472 [8:35:42<10:23:59, 3.48it/s] {'loss': 2.7895, 'learning_rate': 4.16277015234114e-07, 'epoch': 10.38} + 65%|██████▍ | 241000/371472 [8:35:42<10:23:59, 3.48it/s] 65%|██████▍ | 241001/371472 [8:35:43<10:19:01, 3.51it/s] 65%|██████▍ | 241002/371472 [8:35:43<10:12:26, 3.55it/s] 65%|██████▍ | 241003/371472 [8:35:43<11:13:21, 3.23it/s] 65%|██████▍ | 241004/371472 [8:35:44<10:56:37, 3.31it/s] 65%|██████▍ | 241005/371472 [8:35:44<10:38:37, 3.40it/s] 65%|██████▍ | 241006/371472 [8:35:44<10:24:37, 3.48it/s] 65%|██████▍ | 241007/371472 [8:35:45<11:08:27, 3.25it/s] 65%|██████▍ | 241008/371472 [8:35:45<11:05:17, 3.27it/s] 65%|██████▍ | 241009/371472 [8:35:45<11:23:41, 3.18it/s] 65%|██████▍ | 241010/371472 [8:35:45<11:14:48, 3.22it/s] 65%|██████▍ | 241011/371472 [8:35:46<11:58:07, 3.03it/s] 65%|██████▍ | 241012/371472 [8:35:46<11:10:55, 3.24it/s] 65%|██████▍ | 241013/371472 [8:35:46<11:26:03, 3.17it/s] 65%|██████▍ | 241014/371472 [8:35:47<11:00:23, 3.29it/s] 65%|██████▍ | 241015/371472 [8:35:47<11:08:33, 3.25it/s] 65%|██████▍ | 241016/371472 [8:35:47<11:31:25, 3.14it/s] 65%|██████▍ | 241017/371472 [8:35:48<11:14:05, 3.23it/s] 65%|██████▍ | 241018/371472 [8:35:48<10:59:12, 3.30it/s] 65%|██████▍ | 241019/371472 [8:35:48<10:50:35, 3.34it/s] 65%|██████▍ | 241020/371472 [8:35:49<10:25:41, 3.47it/s] {'loss': 2.6754, 'learning_rate': 4.1622853325863515e-07, 'epoch': 10.38} + 65%|██████▍ | 241020/371472 [8:35:49<10:25:41, 3.47it/s] 65%|██████▍ | 241021/371472 [8:35:49<10:18:08, 3.52it/s] 65%|██████▍ | 241022/371472 [8:35:49<10:22:27, 3.49it/s] 65%|██████▍ | 241023/371472 [8:35:49<10:13:38, 3.54it/s] 65%|██████▍ | 241024/371472 [8:35:50<10:15:14, 3.53it/s] 65%|██████▍ | 241025/371472 [8:35:50<11:25:41, 3.17it/s] 65%|██████▍ | 241026/371472 [8:35:50<11:16:50, 3.21it/s] 65%|██████▍ | 241027/371472 [8:35:51<11:06:23, 3.26it/s] 65%|██████▍ | 241028/371472 [8:35:51<10:48:25, 3.35it/s] 65%|██████▍ | 241029/371472 [8:35:51<10:38:39, 3.40it/s] 65%|██████▍ | 241030/371472 [8:35:52<10:57:31, 3.31it/s] 65%|██████▍ | 241031/371472 [8:35:52<10:42:29, 3.38it/s] 65%|██████▍ | 241032/371472 [8:35:52<10:46:33, 3.36it/s] 65%|██████▍ | 241033/371472 [8:35:52<10:38:57, 3.40it/s] 65%|██████▍ | 241034/371472 [8:35:53<10:29:02, 3.46it/s] 65%|██████▍ | 241035/371472 [8:35:53<10:15:28, 3.53it/s] 65%|██████▍ | 241036/371472 [8:35:53<10:14:39, 3.54it/s] 65%|██████▍ | 241037/371472 [8:35:54<10:19:30, 3.51it/s] 65%|██████▍ | 241038/371472 [8:35:54<10:14:33, 3.54it/s] 65%|██████▍ | 241039/371472 [8:35:54<10:15:59, 3.53it/s] 65%|██████▍ | 241040/371472 [8:35:54<10:11:07, 3.56it/s] {'loss': 2.7218, 'learning_rate': 4.161800512831562e-07, 'epoch': 10.38} + 65%|██████▍ | 241040/371472 [8:35:54<10:11:07, 3.56it/s] 65%|██████▍ | 241041/371472 [8:35:55<10:45:49, 3.37it/s] 65%|██████▍ | 241042/371472 [8:35:55<10:35:46, 3.42it/s] 65%|██████▍ | 241043/371472 [8:35:55<10:23:38, 3.49it/s] 65%|██████▍ | 241044/371472 [8:35:56<11:15:05, 3.22it/s] 65%|██████▍ | 241045/371472 [8:35:56<11:08:31, 3.25it/s] 65%|██████▍ | 241046/371472 [8:35:56<11:01:41, 3.29it/s] 65%|██████▍ | 241047/371472 [8:35:56<10:41:40, 3.39it/s] 65%|██████▍ | 241048/371472 [8:35:57<11:13:45, 3.23it/s] 65%|██████▍ | 241049/371472 [8:35:57<11:22:07, 3.19it/s] 65%|██████▍ | 241050/371472 [8:35:58<11:59:27, 3.02it/s] 65%|██████▍ | 241051/371472 [8:35:58<11:14:23, 3.22it/s] 65%|██████▍ | 241052/371472 [8:35:58<11:12:14, 3.23it/s] 65%|██████▍ | 241053/371472 [8:35:58<10:44:36, 3.37it/s] 65%|██████▍ | 241054/371472 [8:35:59<10:36:42, 3.41it/s] 65%|██████▍ | 241055/371472 [8:35:59<11:02:22, 3.28it/s] 65%|██████▍ | 241056/371472 [8:35:59<10:58:17, 3.30it/s] 65%|██████▍ | 241057/371472 [8:36:00<10:41:31, 3.39it/s] 65%|██████▍ | 241058/371472 [8:36:00<10:24:17, 3.48it/s] 65%|██████▍ | 241059/371472 [8:36:00<10:19:51, 3.51it/s] 65%|██████▍ | 241060/371472 [8:36:00<10:13:38, 3.54it/s] {'loss': 2.7934, 'learning_rate': 4.161315693076774e-07, 'epoch': 10.38} + 65%|██████▍ | 241060/371472 [8:36:00<10:13:38, 3.54it/s] 65%|██████▍ | 241061/371472 [8:36:01<10:15:05, 3.53it/s] 65%|██████▍ | 241062/371472 [8:36:01<10:11:26, 3.55it/s] 65%|██████▍ | 241063/371472 [8:36:01<10:20:28, 3.50it/s] 65%|██████▍ | 241064/371472 [8:36:02<10:44:02, 3.37it/s] 65%|██████▍ | 241065/371472 [8:36:02<11:00:46, 3.29it/s] 65%|██████▍ | 241066/371472 [8:36:02<11:10:23, 3.24it/s] 65%|██████▍ | 241067/371472 [8:36:02<11:03:15, 3.28it/s] 65%|██████▍ | 241068/371472 [8:36:03<10:54:29, 3.32it/s] 65%|██████▍ | 241069/371472 [8:36:03<10:59:30, 3.30it/s] 65%|██████▍ | 241070/371472 [8:36:03<10:53:19, 3.33it/s] 65%|██████▍ | 241071/371472 [8:36:04<10:42:23, 3.38it/s] 65%|██████▍ | 241072/371472 [8:36:04<10:47:11, 3.36it/s] 65%|██████▍ | 241073/371472 [8:36:04<11:08:20, 3.25it/s] 65%|██████▍ | 241074/371472 [8:36:05<11:06:32, 3.26it/s] 65%|██████▍ | 241075/371472 [8:36:05<10:57:50, 3.30it/s] 65%|██████▍ | 241076/371472 [8:36:05<10:43:25, 3.38it/s] 65%|██████▍ | 241077/371472 [8:36:05<10:35:19, 3.42it/s] 65%|██████▍ | 241078/371472 [8:36:06<10:41:17, 3.39it/s] 65%|██████▍ | 241079/371472 [8:36:06<10:51:34, 3.34it/s] 65%|██████▍ | 241080/371472 [8:36:06<10:39:26, 3.40it/s] {'loss': 2.559, 'learning_rate': 4.160830873321984e-07, 'epoch': 10.38} + 65%|██████▍ | 241080/371472 [8:36:06<10:39:26, 3.40it/s] 65%|██████▍ | 241081/371472 [8:36:07<10:32:44, 3.43it/s] 65%|██████▍ | 241082/371472 [8:36:07<10:53:27, 3.33it/s] 65%|██████▍ | 241083/371472 [8:36:07<11:07:59, 3.25it/s] 65%|██████▍ | 241084/371472 [8:36:08<11:21:37, 3.19it/s] 65%|██████▍ | 241085/371472 [8:36:08<10:46:03, 3.36it/s] 65%|██████▍ | 241086/371472 [8:36:08<10:20:47, 3.50it/s] 65%|██████▍ | 241087/371472 [8:36:08<10:49:54, 3.34it/s] 65%|██████▍ | 241088/371472 [8:36:09<10:38:17, 3.40it/s] 65%|██████▍ | 241089/371472 [8:36:09<10:29:28, 3.45it/s] 65%|██████▍ | 241090/371472 [8:36:09<10:30:50, 3.44it/s] 65%|██████▍ | 241091/371472 [8:36:10<10:39:12, 3.40it/s] 65%|██████▍ | 241092/371472 [8:36:10<10:24:51, 3.48it/s] 65%|██████▍ | 241093/371472 [8:36:10<10:37:34, 3.41it/s] 65%|██████▍ | 241094/371472 [8:36:10<10:40:44, 3.39it/s] 65%|██████▍ | 241095/371472 [8:36:11<11:15:11, 3.22it/s] 65%|██████▍ | 241096/371472 [8:36:11<10:51:36, 3.33it/s] 65%|██████▍ | 241097/371472 [8:36:11<10:50:14, 3.34it/s] 65%|██████▍ | 241098/371472 [8:36:12<10:24:21, 3.48it/s] 65%|██████▍ | 241099/371472 [8:36:12<10:46:03, 3.36it/s] 65%|██████▍ | 241100/371472 [8:36:12<10:53:11, 3.33it/s] {'loss': 2.6398, 'learning_rate': 4.160346053567196e-07, 'epoch': 10.38} + 65%|██████▍ | 241100/371472 [8:36:12<10:53:11, 3.33it/s] 65%|██████▍ | 241101/371472 [8:36:13<11:01:29, 3.28it/s] 65%|██████▍ | 241102/371472 [8:36:13<10:52:18, 3.33it/s] 65%|██████▍ | 241103/371472 [8:36:13<10:51:16, 3.34it/s] 65%|██████▍ | 241104/371472 [8:36:14<11:04:24, 3.27it/s] 65%|██████▍ | 241105/371472 [8:36:14<11:37:23, 3.12it/s] 65%|██████▍ | 241106/371472 [8:36:14<11:23:02, 3.18it/s] 65%|██████▍ | 241107/371472 [8:36:14<11:00:10, 3.29it/s] 65%|██████▍ | 241108/371472 [8:36:15<10:51:06, 3.34it/s] 65%|██████▍ | 241109/371472 [8:36:15<10:30:22, 3.45it/s] 65%|██████▍ | 241110/371472 [8:36:15<10:52:32, 3.33it/s] 65%|██████▍ | 241111/371472 [8:36:16<11:17:31, 3.21it/s] 65%|██████▍ | 241112/371472 [8:36:16<12:20:23, 2.93it/s] 65%|██████▍ | 241113/371472 [8:36:16<11:48:08, 3.07it/s] 65%|██████▍ | 241114/371472 [8:36:17<11:14:21, 3.22it/s] 65%|██████▍ | 241115/371472 [8:36:17<10:55:46, 3.31it/s] 65%|██████▍ | 241116/371472 [8:36:17<10:58:19, 3.30it/s] 65%|██████▍ | 241117/371472 [8:36:18<11:20:13, 3.19it/s] 65%|██████▍ | 241118/371472 [8:36:18<10:45:46, 3.36it/s] 65%|██████▍ | 241119/371472 [8:36:18<11:06:23, 3.26it/s] 65%|██████▍ | 241120/371472 [8:36:19<11:25:47, 3.17it/s] {'loss': 2.7251, 'learning_rate': 4.1598612338124066e-07, 'epoch': 10.39} + 65%|██████▍ | 241120/371472 [8:36:19<11:25:47, 3.17it/s] 65%|██████▍ | 241121/371472 [8:36:19<11:17:13, 3.21it/s] 65%|██████▍ | 241122/371472 [8:36:19<10:50:23, 3.34it/s] 65%|██████▍ | 241123/371472 [8:36:19<10:56:10, 3.31it/s] 65%|██████▍ | 241124/371472 [8:36:20<11:03:24, 3.27it/s] 65%|██████▍ | 241125/371472 [8:36:20<10:55:31, 3.31it/s] 65%|██████▍ | 241126/371472 [8:36:20<10:33:54, 3.43it/s] 65%|██████▍ | 241127/371472 [8:36:21<11:16:59, 3.21it/s] 65%|██████▍ | 241128/371472 [8:36:21<10:52:15, 3.33it/s] 65%|██████▍ | 241129/371472 [8:36:21<10:52:42, 3.33it/s] 65%|██████▍ | 241130/371472 [8:36:22<11:07:40, 3.25it/s] 65%|██████▍ | 241131/371472 [8:36:22<11:21:36, 3.19it/s] 65%|██████▍ | 241132/371472 [8:36:22<11:26:02, 3.17it/s] 65%|██████▍ | 241133/371472 [8:36:22<11:14:05, 3.22it/s] 65%|██████▍ | 241134/371472 [8:36:23<10:58:45, 3.30it/s] 65%|██████▍ | 241135/371472 [8:36:23<10:49:35, 3.34it/s] 65%|██████▍ | 241136/371472 [8:36:23<10:43:06, 3.38it/s] 65%|██████▍ | 241137/371472 [8:36:24<10:32:07, 3.44it/s] 65%|██████▍ | 241138/371472 [8:36:24<10:48:41, 3.35it/s] 65%|██████▍ | 241139/371472 [8:36:24<11:19:57, 3.19it/s] 65%|██████▍ | 241140/371472 [8:36:25<11:22:09, 3.18it/s] {'loss': 2.8396, 'learning_rate': 4.159376414057618e-07, 'epoch': 10.39} + 65%|██████▍ | 241140/371472 [8:36:25<11:22:09, 3.18it/s] 65%|██████▍ | 241141/371472 [8:36:25<11:16:44, 3.21it/s] 65%|██████▍ | 241142/371472 [8:36:25<10:57:55, 3.30it/s] 65%|██████▍ | 241143/371472 [8:36:26<11:59:41, 3.02it/s] 65%|██████▍ | 241144/371472 [8:36:26<11:51:14, 3.05it/s] 65%|██████▍ | 241145/371472 [8:36:26<11:36:35, 3.12it/s] 65%|██████▍ | 241146/371472 [8:36:26<11:13:39, 3.22it/s] 65%|██████▍ | 241147/371472 [8:36:27<10:53:05, 3.33it/s] 65%|██████▍ | 241148/371472 [8:36:27<11:04:49, 3.27it/s] 65%|██████▍ | 241149/371472 [8:36:27<11:06:42, 3.26it/s] 65%|██████▍ | 241150/371472 [8:36:28<10:57:13, 3.30it/s] 65%|██████▍ | 241151/371472 [8:36:28<10:59:40, 3.29it/s] 65%|██████▍ | 241152/371472 [8:36:28<10:47:49, 3.35it/s] 65%|██████▍ | 241153/371472 [8:36:29<10:37:44, 3.41it/s] 65%|██████▍ | 241154/371472 [8:36:29<10:35:56, 3.42it/s] 65%|██████▍ | 241155/371472 [8:36:29<10:23:41, 3.48it/s] 65%|██████▍ | 241156/371472 [8:36:29<10:49:41, 3.34it/s] 65%|██████▍ | 241157/371472 [8:36:30<10:30:46, 3.44it/s] 65%|██████▍ | 241158/371472 [8:36:30<10:33:27, 3.43it/s] 65%|██████▍ | 241159/371472 [8:36:30<11:00:04, 3.29it/s] 65%|██████▍ | 241160/371472 [8:36:31<10:32:38, 3.43it/s] {'loss': 2.7702, 'learning_rate': 4.1588915943028286e-07, 'epoch': 10.39} + 65%|██████▍ | 241160/371472 [8:36:31<10:32:38, 3.43it/s] 65%|██████▍ | 241161/371472 [8:36:31<10:39:16, 3.40it/s] 65%|██████▍ | 241162/371472 [8:36:31<10:19:07, 3.51it/s] 65%|██████▍ | 241163/371472 [8:36:31<10:16:52, 3.52it/s] 65%|██████▍ | 241164/371472 [8:36:32<10:45:01, 3.37it/s] 65%|██████▍ | 241165/371472 [8:36:32<10:41:26, 3.39it/s] 65%|██████▍ | 241166/371472 [8:36:32<10:43:22, 3.38it/s] 65%|██████▍ | 241167/371472 [8:36:33<10:22:43, 3.49it/s] 65%|██████▍ | 241168/371472 [8:36:33<10:12:05, 3.55it/s] 65%|██████▍ | 241169/371472 [8:36:33<10:09:56, 3.56it/s] 65%|██████▍ | 241170/371472 [8:36:34<11:02:56, 3.28it/s] 65%|██████▍ | 241171/371472 [8:36:34<11:05:31, 3.26it/s] 65%|██████▍ | 241172/371472 [8:36:34<10:54:35, 3.32it/s] 65%|██████▍ | 241173/371472 [8:36:34<10:47:16, 3.36it/s] 65%|██████▍ | 241174/371472 [8:36:35<11:02:58, 3.28it/s] 65%|██████▍ | 241175/371472 [8:36:35<10:41:43, 3.38it/s] 65%|██████▍ | 241176/371472 [8:36:35<10:32:26, 3.43it/s] 65%|██████▍ | 241177/371472 [8:36:36<10:20:38, 3.50it/s] 65%|██████▍ | 241178/371472 [8:36:36<10:20:25, 3.50it/s] 65%|██████▍ | 241179/371472 [8:36:36<10:21:04, 3.50it/s] 65%|██████▍ | 241180/371472 [8:36:36<10:01:07, 3.61it/s] {'loss': 2.8014, 'learning_rate': 4.1584067745480404e-07, 'epoch': 10.39} + 65%|██████▍ | 241180/371472 [8:36:36<10:01:07, 3.61it/s] 65%|██████▍ | 241181/371472 [8:36:37<10:37:29, 3.41it/s] 65%|██████▍ | 241182/371472 [8:36:37<10:59:01, 3.30it/s] 65%|██████▍ | 241183/371472 [8:36:37<11:29:13, 3.15it/s] 65%|██████▍ | 241184/371472 [8:36:38<11:30:17, 3.15it/s] 65%|██████▍ | 241185/371472 [8:36:38<11:30:41, 3.14it/s] 65%|██████▍ | 241186/371472 [8:36:38<10:57:43, 3.30it/s] 65%|██████▍ | 241187/371472 [8:36:39<10:50:09, 3.34it/s] 65%|██████▍ | 241188/371472 [8:36:39<11:18:00, 3.20it/s] 65%|██████▍ | 241189/371472 [8:36:39<11:30:33, 3.14it/s] 65%|██████▍ | 241190/371472 [8:36:40<10:58:16, 3.30it/s] 65%|██████▍ | 241191/371472 [8:36:40<11:02:20, 3.28it/s] 65%|██████▍ | 241192/371472 [8:36:40<10:53:39, 3.32it/s] 65%|██████▍ | 241193/371472 [8:36:40<10:41:25, 3.39it/s] 65%|██████▍ | 241194/371472 [8:36:41<11:06:11, 3.26it/s] 65%|██████▍ | 241195/371472 [8:36:41<10:38:51, 3.40it/s] 65%|██████▍ | 241196/371472 [8:36:41<11:02:32, 3.28it/s] 65%|██████▍ | 241197/371472 [8:36:42<10:44:51, 3.37it/s] 65%|██████▍ | 241198/371472 [8:36:42<10:46:30, 3.36it/s] 65%|██████▍ | 241199/371472 [8:36:42<10:48:40, 3.35it/s] 65%|██████▍ | 241200/371472 [8:36:43<10:30:54, 3.44it/s] {'loss': 2.6955, 'learning_rate': 4.157921954793251e-07, 'epoch': 10.39} + 65%|██████▍ | 241200/371472 [8:36:43<10:30:54, 3.44it/s] 65%|██████▍ | 241201/371472 [8:36:43<11:15:14, 3.22it/s] 65%|██████▍ | 241202/371472 [8:36:43<11:07:46, 3.25it/s] 65%|██████▍ | 241203/371472 [8:36:44<11:19:39, 3.19it/s] 65%|██████▍ | 241204/371472 [8:36:44<11:05:18, 3.26it/s] 65%|██████▍ | 241205/371472 [8:36:44<11:11:22, 3.23it/s] 65%|██████▍ | 241206/371472 [8:36:44<11:20:45, 3.19it/s] 65%|██████▍ | 241207/371472 [8:36:45<11:10:02, 3.24it/s] 65%|██████▍ | 241208/371472 [8:36:45<11:12:18, 3.23it/s] 65%|██████▍ | 241209/371472 [8:36:45<11:28:29, 3.15it/s] 65%|██████▍ | 241210/371472 [8:36:46<11:01:19, 3.28it/s] 65%|██████▍ | 241211/371472 [8:36:46<10:54:53, 3.32it/s] 65%|██████▍ | 241212/371472 [8:36:46<10:40:29, 3.39it/s] 65%|██████▍ | 241213/371472 [8:36:47<11:02:49, 3.28it/s] 65%|██████▍ | 241214/371472 [8:36:47<10:52:14, 3.33it/s] 65%|██████▍ | 241215/371472 [8:36:47<10:47:04, 3.35it/s] 65%|██████▍ | 241216/371472 [8:36:47<10:43:09, 3.38it/s] 65%|██████▍ | 241217/371472 [8:36:48<10:33:15, 3.43it/s] 65%|██████▍ | 241218/371472 [8:36:48<11:36:45, 3.12it/s] 65%|██████▍ | 241219/371472 [8:36:48<11:10:29, 3.24it/s] 65%|██████▍ | 241220/371472 [8:36:49<10:59:07, 3.29it/s] {'loss': 2.6607, 'learning_rate': 4.1574371350384623e-07, 'epoch': 10.39} + 65%|██████▍ | 241220/371472 [8:36:49<10:59:07, 3.29it/s] 65%|██████▍ | 241221/371472 [8:36:49<10:55:12, 3.31it/s] 65%|██████▍ | 241222/371472 [8:36:49<11:18:49, 3.20it/s] 65%|██████▍ | 241223/371472 [8:36:50<11:04:08, 3.27it/s] 65%|██████▍ | 241224/371472 [8:36:50<10:48:49, 3.35it/s] 65%|██████▍ | 241225/371472 [8:36:50<10:47:34, 3.35it/s] 65%|██████▍ | 241226/371472 [8:36:50<10:47:50, 3.35it/s] 65%|██████▍ | 241227/371472 [8:36:51<10:45:11, 3.36it/s] 65%|██████▍ | 241228/371472 [8:36:51<10:32:36, 3.43it/s] 65%|██████▍ | 241229/371472 [8:36:51<11:16:52, 3.21it/s] 65%|██████▍ | 241230/371472 [8:36:52<11:09:13, 3.24it/s] 65%|██████▍ | 241231/371472 [8:36:52<10:46:08, 3.36it/s] 65%|██████▍ | 241232/371472 [8:36:52<10:44:44, 3.37it/s] 65%|██████▍ | 241233/371472 [8:36:53<10:26:08, 3.47it/s] 65%|██████▍ | 241234/371472 [8:36:53<11:36:35, 3.12it/s] 65%|██████▍ | 241235/371472 [8:36:53<12:03:44, 3.00it/s] 65%|██████▍ | 241236/371472 [8:36:54<11:25:25, 3.17it/s] 65%|██████▍ | 241237/371472 [8:36:54<10:55:36, 3.31it/s] 65%|██████▍ | 241238/371472 [8:36:54<10:49:26, 3.34it/s] 65%|██████▍ | 241239/371472 [8:36:54<10:31:37, 3.44it/s] 65%|██████▍ | 241240/371472 [8:36:55<10:47:47, 3.35it/s] {'loss': 2.6697, 'learning_rate': 4.1569523152836736e-07, 'epoch': 10.39} + 65%|██████▍ | 241240/371472 [8:36:55<10:47:47, 3.35it/s] 65%|██████▍ | 241241/371472 [8:36:55<11:13:58, 3.22it/s] 65%|██████▍ | 241242/371472 [8:36:55<11:03:13, 3.27it/s] 65%|██████▍ | 241243/371472 [8:36:56<10:59:32, 3.29it/s] 65%|██████▍ | 241244/371472 [8:36:56<10:39:08, 3.40it/s] 65%|██████▍ | 241245/371472 [8:36:56<10:28:45, 3.45it/s] 65%|██████▍ | 241246/371472 [8:36:57<10:35:28, 3.42it/s] 65%|██████▍ | 241247/371472 [8:36:57<10:37:08, 3.41it/s] 65%|██████▍ | 241248/371472 [8:36:57<10:51:07, 3.33it/s] 65%|██████▍ | 241249/371472 [8:36:57<10:49:41, 3.34it/s] 65%|██████▍ | 241250/371472 [8:36:58<10:40:34, 3.39it/s] 65%|██████▍ | 241251/371472 [8:36:58<10:34:08, 3.42it/s] 65%|██████▍ | 241252/371472 [8:36:58<10:33:22, 3.43it/s] 65%|██████▍ | 241253/371472 [8:36:59<11:07:14, 3.25it/s] 65%|██████▍ | 241254/371472 [8:36:59<11:12:31, 3.23it/s] 65%|██████▍ | 241255/371472 [8:36:59<10:57:21, 3.30it/s] 65%|██████▍ | 241256/371472 [8:37:00<10:59:14, 3.29it/s] 65%|██████▍ | 241257/371472 [8:37:00<10:45:41, 3.36it/s] 65%|██████▍ | 241258/371472 [8:37:00<10:20:26, 3.50it/s] 65%|██████▍ | 241259/371472 [8:37:00<10:27:08, 3.46it/s] 65%|██████▍ | 241260/371472 [8:37:01<10:21:51, 3.49it/s] {'loss': 2.8915, 'learning_rate': 4.156467495528885e-07, 'epoch': 10.39} + 65%|██████▍ | 241260/371472 [8:37:01<10:21:51, 3.49it/s] 65%|██████▍ | 241261/371472 [8:37:01<10:11:14, 3.55it/s] 65%|██████▍ | 241262/371472 [8:37:01<10:57:51, 3.30it/s] 65%|██████▍ | 241263/371472 [8:37:02<10:52:23, 3.33it/s] 65%|██████▍ | 241264/371472 [8:37:02<11:05:08, 3.26it/s] 65%|██████▍ | 241265/371472 [8:37:02<10:41:16, 3.38it/s] 65%|██████▍ | 241266/371472 [8:37:02<10:54:43, 3.31it/s] 65%|██████▍ | 241267/371472 [8:37:03<10:55:17, 3.31it/s] 65%|██████▍ | 241268/371472 [8:37:03<10:41:36, 3.38it/s] 65%|██████▍ | 241269/371472 [8:37:03<10:54:29, 3.32it/s] 65%|██████▍ | 241270/371472 [8:37:04<12:29:47, 2.89it/s] 65%|██████▍ | 241271/371472 [8:37:04<11:41:33, 3.09it/s] 65%|██████▍ | 241272/371472 [8:37:04<11:34:31, 3.12it/s] 65%|██████▍ | 241273/371472 [8:37:05<11:21:07, 3.19it/s] 65%|██████▍ | 241274/371472 [8:37:05<11:06:01, 3.26it/s] 65%|██████▍ | 241275/371472 [8:37:05<10:50:12, 3.34it/s] 65%|██████▍ | 241276/371472 [8:37:06<10:58:18, 3.30it/s] 65%|██████▍ | 241277/371472 [8:37:06<11:04:35, 3.27it/s] 65%|██████▍ | 241278/371472 [8:37:06<11:14:34, 3.22it/s] 65%|██████▍ | 241279/371472 [8:37:07<10:52:03, 3.33it/s] 65%|██████▍ | 241280/371472 [8:37:07<10:49:44, 3.34it/s] {'loss': 2.7716, 'learning_rate': 4.155982675774095e-07, 'epoch': 10.39} + 65%|██████▍ | 241280/371472 [8:37:07<10:49:44, 3.34it/s] 65%|██████▍ | 241281/371472 [8:37:07<10:40:53, 3.39it/s] 65%|██████▍ | 241282/371472 [8:37:07<10:47:22, 3.35it/s] 65%|██████▍ | 241283/371472 [8:37:08<11:22:34, 3.18it/s] 65%|██████▍ | 241284/371472 [8:37:08<10:58:52, 3.29it/s] 65%|██████▍ | 241285/371472 [8:37:08<10:53:01, 3.32it/s] 65%|██████▍ | 241286/371472 [8:37:09<10:59:34, 3.29it/s] 65%|██████▍ | 241287/371472 [8:37:09<10:37:57, 3.40it/s] 65%|██████▍ | 241288/371472 [8:37:09<10:31:23, 3.44it/s] 65%|██████▍ | 241289/371472 [8:37:09<10:40:19, 3.39it/s] 65%|██████▍ | 241290/371472 [8:37:10<10:40:15, 3.39it/s] 65%|██████▍ | 241291/371472 [8:37:10<10:26:36, 3.46it/s] 65%|██████▍ | 241292/371472 [8:37:10<10:11:08, 3.55it/s] 65%|██████▍ | 241293/371472 [8:37:11<10:11:00, 3.55it/s] 65%|██████▍ | 241294/371472 [8:37:11<10:20:04, 3.50it/s] 65%|██████▍ | 241295/371472 [8:37:11<10:27:48, 3.46it/s] 65%|██████▍ | 241296/371472 [8:37:11<10:32:50, 3.43it/s] 65%|██████▍ | 241297/371472 [8:37:12<10:33:20, 3.43it/s] 65%|██████▍ | 241298/371472 [8:37:12<11:27:53, 3.15it/s] 65%|██████▍ | 241299/371472 [8:37:12<10:57:52, 3.30it/s] 65%|██████▍ | 241300/371472 [8:37:13<10:56:20, 3.31it/s] {'loss': 2.6913, 'learning_rate': 4.155497856019307e-07, 'epoch': 10.39} + 65%|██████▍ | 241300/371472 [8:37:13<10:56:20, 3.31it/s] 65%|██████▍ | 241301/371472 [8:37:13<11:06:06, 3.26it/s] 65%|██████▍ | 241302/371472 [8:37:13<10:57:58, 3.30it/s] 65%|██████▍ | 241303/371472 [8:37:14<10:27:41, 3.46it/s] 65%|██████▍ | 241304/371472 [8:37:14<10:41:59, 3.38it/s] 65%|██████▍ | 241305/371472 [8:37:14<10:44:32, 3.37it/s] 65%|██████▍ | 241306/371472 [8:37:15<10:51:32, 3.33it/s] 65%|██████▍ | 241307/371472 [8:37:15<10:57:55, 3.30it/s] 65%|██████▍ | 241308/371472 [8:37:15<13:35:15, 2.66it/s] 65%|██████▍ | 241309/371472 [8:37:16<12:42:15, 2.85it/s] 65%|██████▍ | 241310/371472 [8:37:16<12:41:42, 2.85it/s] 65%|██████▍ | 241311/371472 [8:37:16<12:00:27, 3.01it/s] 65%|██████▍ | 241312/371472 [8:37:17<11:21:54, 3.18it/s] 65%|██████▍ | 241313/371472 [8:37:17<10:59:22, 3.29it/s] 65%|██████▍ | 241314/371472 [8:37:17<10:49:54, 3.34it/s] 65%|██████▍ | 241315/371472 [8:37:17<10:59:45, 3.29it/s] 65%|██████▍ | 241316/371472 [8:37:18<11:12:32, 3.23it/s] 65%|██████▍ | 241317/371472 [8:37:18<11:02:13, 3.28it/s] 65%|██████▍ | 241318/371472 [8:37:18<10:38:50, 3.40it/s] 65%|██████▍ | 241319/371472 [8:37:19<10:25:34, 3.47it/s] 65%|██████▍ | 241320/371472 [8:37:19<10:14:47, 3.53it/s] {'loss': 2.84, 'learning_rate': 4.1550130362645175e-07, 'epoch': 10.39} + 65%|██████▍ | 241320/371472 [8:37:19<10:14:47, 3.53it/s] 65%|██████▍ | 241321/371472 [8:37:19<10:12:38, 3.54it/s] 65%|██████▍ | 241322/371472 [8:37:20<11:18:28, 3.20it/s] 65%|██████▍ | 241323/371472 [8:37:20<10:40:46, 3.39it/s] 65%|██████▍ | 241324/371472 [8:37:20<10:21:13, 3.49it/s] 65%|██████▍ | 241325/371472 [8:37:20<10:37:42, 3.40it/s] 65%|██████▍ | 241326/371472 [8:37:21<10:27:18, 3.46it/s] 65%|██████▍ | 241327/371472 [8:37:21<10:37:57, 3.40it/s] 65%|██████▍ | 241328/371472 [8:37:21<11:21:38, 3.18it/s] 65%|██████▍ | 241329/371472 [8:37:22<10:58:59, 3.29it/s] 65%|██████▍ | 241330/371472 [8:37:22<10:48:26, 3.35it/s] 65%|██████▍ | 241331/371472 [8:37:22<10:38:34, 3.40it/s] 65%|██████▍ | 241332/371472 [8:37:23<10:45:52, 3.36it/s] 65%|██████▍ | 241333/371472 [8:37:23<10:46:20, 3.36it/s] 65%|██████▍ | 241334/371472 [8:37:23<10:23:04, 3.48it/s] 65%|██████▍ | 241335/371472 [8:37:23<10:10:50, 3.55it/s] 65%|██████▍ | 241336/371472 [8:37:24<10:13:45, 3.53it/s] 65%|██████▍ | 241337/371472 [8:37:24<10:06:00, 3.58it/s] 65%|██████▍ | 241338/371472 [8:37:24<10:41:36, 3.38it/s] 65%|██████▍ | 241339/371472 [8:37:25<10:31:31, 3.43it/s] 65%|██████▍ | 241340/371472 [8:37:25<10:19:18, 3.50it/s] {'loss': 2.7007, 'learning_rate': 4.1545282165097287e-07, 'epoch': 10.39} + 65%|██████▍ | 241340/371472 [8:37:25<10:19:18, 3.50it/s] 65%|██████▍ | 241341/371472 [8:37:25<10:10:46, 3.55it/s] 65%|██████▍ | 241342/371472 [8:37:25<9:59:52, 3.62it/s] 65%|██████▍ | 241343/371472 [8:37:26<10:05:28, 3.58it/s] 65%|██████▍ | 241344/371472 [8:37:26<10:15:47, 3.52it/s] 65%|██████▍ | 241345/371472 [8:37:26<10:02:33, 3.60it/s] 65%|██████▍ | 241346/371472 [8:37:26<9:55:12, 3.64it/s] 65%|██████▍ | 241347/371472 [8:37:27<10:02:40, 3.60it/s] 65%|██████▍ | 241348/371472 [8:37:27<10:21:28, 3.49it/s] 65%|██████▍ | 241349/371472 [8:37:27<10:18:32, 3.51it/s] 65%|██████▍ | 241350/371472 [8:37:28<10:43:03, 3.37it/s] 65%|██████▍ | 241351/371472 [8:37:28<10:45:27, 3.36it/s] 65%|██████▍ | 241352/371472 [8:37:28<10:44:19, 3.37it/s] 65%|██████▍ | 241353/371472 [8:37:29<11:11:48, 3.23it/s] 65%|██████▍ | 241354/371472 [8:37:29<12:01:06, 3.01it/s] 65%|██████▍ | 241355/371472 [8:37:29<11:18:29, 3.20it/s] 65%|██████▍ | 241356/371472 [8:37:29<11:02:07, 3.28it/s] 65%|██████▍ | 241357/371472 [8:37:30<11:15:58, 3.21it/s] 65%|██████▍ | 241358/371472 [8:37:30<13:23:25, 2.70it/s] 65%|██████▍ | 241359/371472 [8:37:31<12:25:10, 2.91it/s] 65%|██████▍ | 241360/371472 [8:37:31<11:37:14, 3.11it/s] {'loss': 2.8545, 'learning_rate': 4.1540433967549394e-07, 'epoch': 10.4} + 65%|██████▍ | 241360/371472 [8:37:31<11:37:14, 3.11it/s] 65%|██████▍ | 241361/371472 [8:37:31<11:15:42, 3.21it/s] 65%|██████▍ | 241362/371472 [8:37:31<10:55:41, 3.31it/s] 65%|██████▍ | 241363/371472 [8:37:32<10:29:51, 3.44it/s] 65%|██████▍ | 241364/371472 [8:37:32<11:18:37, 3.20it/s] 65%|██████▍ | 241365/371472 [8:37:32<11:22:01, 3.18it/s] 65%|██████▍ | 241366/371472 [8:37:33<11:39:37, 3.10it/s] 65%|██████▍ | 241367/371472 [8:37:33<11:15:30, 3.21it/s] 65%|██████▍ | 241368/371472 [8:37:33<11:03:13, 3.27it/s] 65%|██████▍ | 241369/371472 [8:37:34<10:51:59, 3.33it/s] 65%|██████▍ | 241370/371472 [8:37:34<10:29:49, 3.44it/s] 65%|██████▍ | 241371/371472 [8:37:34<10:23:12, 3.48it/s] 65%|██████▍ | 241372/371472 [8:37:34<10:49:25, 3.34it/s] 65%|██████▍ | 241373/371472 [8:37:35<11:11:03, 3.23it/s] 65%|██████▍ | 241374/371472 [8:37:35<11:06:11, 3.25it/s] 65%|██████▍ | 241375/371472 [8:37:35<11:07:40, 3.25it/s] 65%|██████▍ | 241376/371472 [8:37:36<10:40:54, 3.38it/s] 65%|██████▍ | 241377/371472 [8:37:36<11:16:03, 3.21it/s] 65%|██████▍ | 241378/371472 [8:37:36<11:35:25, 3.12it/s] 65%|██████▍ | 241379/371472 [8:37:37<11:19:01, 3.19it/s] 65%|██████▍ | 241380/371472 [8:37:37<11:42:06, 3.09it/s] {'loss': 2.62, 'learning_rate': 4.153558577000151e-07, 'epoch': 10.4} + 65%|██████▍ | 241380/371472 [8:37:37<11:42:06, 3.09it/s] 65%|██████▍ | 241381/371472 [8:37:37<11:47:30, 3.06it/s] 65%|██████▍ | 241382/371472 [8:37:38<11:31:31, 3.14it/s] 65%|██████▍ | 241383/371472 [8:37:38<11:00:41, 3.28it/s] 65%|██████▍ | 241384/371472 [8:37:38<10:44:26, 3.36it/s] 65%|██████▍ | 241385/371472 [8:37:39<11:13:08, 3.22it/s] 65%|██████▍ | 241386/371472 [8:37:39<10:42:13, 3.38it/s] 65%|██████▍ | 241387/371472 [8:37:39<10:42:11, 3.38it/s] 65%|██████▍ | 241388/371472 [8:37:39<10:43:23, 3.37it/s] 65%|██████▍ | 241389/371472 [8:37:40<11:10:00, 3.24it/s] 65%|██████▍ | 241390/371472 [8:37:40<10:58:11, 3.29it/s] 65%|██████▍ | 241391/371472 [8:37:40<10:56:26, 3.30it/s] 65%|██████▍ | 241392/371472 [8:37:41<10:50:14, 3.33it/s] 65%|██████▍ | 241393/371472 [8:37:41<11:20:28, 3.19it/s] 65%|██████▍ | 241394/371472 [8:37:41<11:13:39, 3.22it/s] 65%|██████▍ | 241395/371472 [8:37:42<10:58:23, 3.29it/s] 65%|██████▍ | 241396/371472 [8:37:42<10:57:00, 3.30it/s] 65%|██████▍ | 241397/371472 [8:37:42<10:40:16, 3.39it/s] 65%|██████▍ | 241398/371472 [8:37:42<11:02:33, 3.27it/s] 65%|██████▍ | 241399/371472 [8:37:43<10:49:58, 3.34it/s] 65%|██████▍ | 241400/371472 [8:37:43<10:36:04, 3.41it/s] {'loss': 2.6837, 'learning_rate': 4.1530737572453614e-07, 'epoch': 10.4} + 65%|██████▍ | 241400/371472 [8:37:43<10:36:04, 3.41it/s] 65%|██████▍ | 241401/371472 [8:37:43<10:20:30, 3.49it/s] 65%|██████▍ | 241402/371472 [8:37:44<10:48:34, 3.34it/s] 65%|██████▍ | 241403/371472 [8:37:44<11:10:28, 3.23it/s] 65%|██████▍ | 241404/371472 [8:37:44<10:56:17, 3.30it/s] 65%|██████▍ | 241405/371472 [8:37:45<10:40:06, 3.39it/s] 65%|██████▍ | 241406/371472 [8:37:45<10:34:02, 3.42it/s] 65%|██████▍ | 241407/371472 [8:37:45<11:07:41, 3.25it/s] 65%|██████▍ | 241408/371472 [8:37:45<10:51:50, 3.33it/s] 65%|██████▍ | 241409/371472 [8:37:46<10:48:24, 3.34it/s] 65%|██████▍ | 241410/371472 [8:37:46<11:02:29, 3.27it/s] 65%|██████▍ | 241411/371472 [8:37:46<10:47:20, 3.35it/s] 65%|██████▍ | 241412/371472 [8:37:47<10:45:55, 3.36it/s] 65%|██████▍ | 241413/371472 [8:37:47<10:58:53, 3.29it/s] 65%|██████▍ | 241414/371472 [8:37:47<11:00:38, 3.28it/s] 65%|██████▍ | 241415/371472 [8:37:48<11:12:22, 3.22it/s] 65%|██████▍ | 241416/371472 [8:37:48<10:58:07, 3.29it/s] 65%|██████▍ | 241417/371472 [8:37:48<11:03:38, 3.27it/s] 65%|██████▍ | 241418/371472 [8:37:49<11:37:53, 3.11it/s] 65%|██████▍ | 241419/371472 [8:37:49<11:13:24, 3.22it/s] 65%|██████▍ | 241420/371472 [8:37:49<11:16:57, 3.20it/s] {'loss': 2.839, 'learning_rate': 4.152588937490573e-07, 'epoch': 10.4} + 65%|██████▍ | 241420/371472 [8:37:49<11:16:57, 3.20it/s] 65%|██████▍ | 241421/371472 [8:37:49<11:17:30, 3.20it/s] 65%|██████▍ | 241422/371472 [8:37:50<11:06:17, 3.25it/s] 65%|██████▍ | 241423/371472 [8:37:50<10:50:35, 3.33it/s] 65%|██████▍ | 241424/371472 [8:37:50<10:36:56, 3.40it/s] 65%|██████▍ | 241425/371472 [8:37:51<11:30:08, 3.14it/s] 65%|██████▍ | 241426/371472 [8:37:51<11:57:09, 3.02it/s] 65%|██████▍ | 241427/371472 [8:37:51<11:36:25, 3.11it/s] 65%|██████▍ | 241428/371472 [8:37:52<11:06:51, 3.25it/s] 65%|██████▍ | 241429/371472 [8:37:52<10:57:48, 3.29it/s] 65%|██████▍ | 241430/371472 [8:37:52<10:40:42, 3.38it/s] 65%|██████▍ | 241431/371472 [8:37:53<11:21:42, 3.18it/s] 65%|██████▍ | 241432/371472 [8:37:53<10:59:41, 3.29it/s] 65%|██████▍ | 241433/371472 [8:37:53<10:41:32, 3.38it/s] 65%|██████▍ | 241434/371472 [8:37:53<10:25:59, 3.46it/s] 65%|██████▍ | 241435/371472 [8:37:54<10:11:54, 3.54it/s] 65%|██████▍ | 241436/371472 [8:37:54<10:05:15, 3.58it/s] 65%|██████▍ | 241437/371472 [8:37:54<10:43:45, 3.37it/s] 65%|██████▍ | 241438/371472 [8:37:55<10:31:01, 3.43it/s] 65%|██████▍ | 241439/371472 [8:37:55<10:22:30, 3.48it/s] 65%|██████▍ | 241440/371472 [8:37:55<10:01:22, 3.60it/s] {'loss': 2.8489, 'learning_rate': 4.152104117735784e-07, 'epoch': 10.4} + 65%|██████▍ | 241440/371472 [8:37:55<10:01:22, 3.60it/s] 65%|██████▍ | 241441/371472 [8:37:55<10:00:11, 3.61it/s] 65%|██████▍ | 241442/371472 [8:37:56<10:01:20, 3.60it/s] 65%|██████▍ | 241443/371472 [8:37:56<9:56:27, 3.63it/s] 65%|██████▍ | 241444/371472 [8:37:56<10:09:09, 3.56it/s] 65%|██████▍ | 241445/371472 [8:37:56<10:19:07, 3.50it/s] 65%|██████▍ | 241446/371472 [8:37:57<10:19:46, 3.50it/s] 65%|██████▍ | 241447/371472 [8:37:57<10:26:31, 3.46it/s] 65%|██████▍ | 241448/371472 [8:37:57<10:31:02, 3.43it/s] 65%|██████▍ | 241449/371472 [8:37:58<11:17:55, 3.20it/s] 65%|██████▍ | 241450/371472 [8:37:58<11:13:39, 3.22it/s] 65%|██████▍ | 241451/371472 [8:37:58<10:53:59, 3.31it/s] 65%|██████▍ | 241452/371472 [8:37:59<10:35:46, 3.41it/s] 65%|██████▍ | 241453/371472 [8:37:59<10:46:13, 3.35it/s] 65%|██████▍ | 241454/371472 [8:37:59<10:50:00, 3.33it/s] 65%|██████▍ | 241455/371472 [8:38:00<10:46:58, 3.35it/s] 65%|██████▍ | 241456/371472 [8:38:00<10:46:00, 3.35it/s] 65%|██████▌ | 241457/371472 [8:38:00<10:54:42, 3.31it/s] 65%|██████▌ | 241458/371472 [8:38:00<10:48:53, 3.34it/s] 65%|██████▌ | 241459/371472 [8:38:01<10:42:59, 3.37it/s] 65%|██████▌ | 241460/371472 [8:38:01<10:44:00, 3.36it/s] {'loss': 2.7458, 'learning_rate': 4.151619297980995e-07, 'epoch': 10.4} + 65%|██████▌ | 241460/371472 [8:38:01<10:44:00, 3.36it/s] 65%|██████▌ | 241461/371472 [8:38:01<10:47:33, 3.35it/s] 65%|██████▌ | 241462/371472 [8:38:02<10:50:09, 3.33it/s] 65%|██████▌ | 241463/371472 [8:38:02<10:57:13, 3.30it/s] 65%|██████▌ | 241464/371472 [8:38:02<10:47:25, 3.35it/s] 65%|██████▌ | 241465/371472 [8:38:02<10:30:59, 3.43it/s] 65%|██████▌ | 241466/371472 [8:38:03<10:31:14, 3.43it/s] 65%|██████▌ | 241467/371472 [8:38:03<11:16:03, 3.20it/s] 65%|██████▌ | 241468/371472 [8:38:03<11:20:48, 3.18it/s] 65%|██████▌ | 241469/371472 [8:38:04<10:52:38, 3.32it/s] 65%|██████▌ | 241470/371472 [8:38:04<10:51:15, 3.33it/s] 65%|██████▌ | 241471/371472 [8:38:04<11:30:14, 3.14it/s] 65%|██████▌ | 241472/371472 [8:38:05<11:04:04, 3.26it/s] 65%|██████▌ | 241473/371472 [8:38:05<10:47:29, 3.35it/s] 65%|██████▌ | 241474/371472 [8:38:05<10:42:49, 3.37it/s] 65%|██████▌ | 241475/371472 [8:38:06<10:36:56, 3.40it/s] 65%|██████▌ | 241476/371472 [8:38:06<11:02:06, 3.27it/s] 65%|██████▌ | 241477/371472 [8:38:06<10:51:34, 3.33it/s] 65%|██████▌ | 241478/371472 [8:38:06<10:41:06, 3.38it/s] 65%|██████▌ | 241479/371472 [8:38:07<10:46:37, 3.35it/s] 65%|██████▌ | 241480/371472 [8:38:07<10:47:14, 3.35it/s] {'loss': 2.7536, 'learning_rate': 4.151134478226206e-07, 'epoch': 10.4} + 65%|██████▌ | 241480/371472 [8:38:07<10:47:14, 3.35it/s] 65%|██████▌ | 241481/371472 [8:38:07<11:11:33, 3.23it/s] 65%|██████▌ | 241482/371472 [8:38:08<11:59:18, 3.01it/s] 65%|██████▌ | 241483/371472 [8:38:08<12:21:11, 2.92it/s] 65%|██████▌ | 241484/371472 [8:38:08<12:34:49, 2.87it/s] 65%|██████▌ | 241485/371472 [8:38:09<12:11:40, 2.96it/s] 65%|██████▌ | 241486/371472 [8:38:09<11:40:18, 3.09it/s] 65%|██████▌ | 241487/371472 [8:38:09<12:01:36, 3.00it/s] 65%|██████▌ | 241488/371472 [8:38:10<11:22:09, 3.18it/s] 65%|██████▌ | 241489/371472 [8:38:10<11:07:11, 3.25it/s] 65%|██████▌ | 241490/371472 [8:38:10<10:56:07, 3.30it/s] 65%|██████▌ | 241491/371472 [8:38:11<10:53:02, 3.32it/s] 65%|██████▌ | 241492/371472 [8:38:11<10:39:09, 3.39it/s] 65%|██████▌ | 241493/371472 [8:38:11<10:17:48, 3.51it/s] 65%|██████▌ | 241494/371472 [8:38:11<10:11:29, 3.54it/s] 65%|██████▌ | 241495/371472 [8:38:12<10:31:39, 3.43it/s] 65%|██████▌ | 241496/371472 [8:38:12<11:37:44, 3.10it/s] 65%|██████▌ | 241497/371472 [8:38:12<11:28:25, 3.15it/s] 65%|██████▌ | 241498/371472 [8:38:13<11:10:22, 3.23it/s] 65%|██████▌ | 241499/371472 [8:38:13<10:51:16, 3.33it/s] 65%|██████▌ | 241500/371472 [8:38:13<11:22:33, 3.17it/s] {'loss': 2.6656, 'learning_rate': 4.150649658471417e-07, 'epoch': 10.4} + 65%|██████▌ | 241500/371472 [8:38:13<11:22:33, 3.17it/s] 65%|██████▌ | 241501/371472 [8:38:14<11:10:18, 3.23it/s] 65%|██████▌ | 241502/371472 [8:38:14<10:58:55, 3.29it/s] 65%|██████▌ | 241503/371472 [8:38:14<10:31:26, 3.43it/s] 65%|██████▌ | 241504/371472 [8:38:14<10:29:30, 3.44it/s] 65%|██████▌ | 241505/371472 [8:38:15<10:37:10, 3.40it/s] 65%|██████▌ | 241506/371472 [8:38:15<11:02:53, 3.27it/s] 65%|██████▌ | 241507/371472 [8:38:15<11:02:56, 3.27it/s] 65%|██████▌ | 241508/371472 [8:38:16<10:53:13, 3.32it/s] 65%|██████▌ | 241509/371472 [8:38:16<10:47:05, 3.35it/s] 65%|██████▌ | 241510/371472 [8:38:16<10:47:11, 3.35it/s] 65%|██████▌ | 241511/371472 [8:38:17<11:23:45, 3.17it/s] 65%|██████▌ | 241512/371472 [8:38:17<11:18:16, 3.19it/s] 65%|██████▌ | 241513/371472 [8:38:17<11:19:04, 3.19it/s] 65%|██████▌ | 241514/371472 [8:38:18<11:05:42, 3.25it/s] 65%|██████▌ | 241515/371472 [8:38:18<10:49:25, 3.34it/s] 65%|██████▌ | 241516/371472 [8:38:18<10:27:40, 3.45it/s] 65%|██████▌ | 241517/371472 [8:38:19<12:19:07, 2.93it/s] 65%|██████▌ | 241518/371472 [8:38:19<11:40:54, 3.09it/s] 65%|██████▌ | 241519/371472 [8:38:19<11:07:05, 3.25it/s] 65%|██████▌ | 241520/371472 [8:38:19<10:45:57, 3.35it/s] {'loss': 2.6403, 'learning_rate': 4.150164838716628e-07, 'epoch': 10.4} + 65%|██████▌ | 241520/371472 [8:38:19<10:45:57, 3.35it/s] 65%|██████▌ | 241521/371472 [8:38:20<10:43:25, 3.37it/s] 65%|██████▌ | 241522/371472 [8:38:20<10:36:09, 3.40it/s] 65%|██████▌ | 241523/371472 [8:38:20<10:23:03, 3.48it/s] 65%|██████▌ | 241524/371472 [8:38:21<10:28:58, 3.44it/s] 65%|██████▌ | 241525/371472 [8:38:21<10:24:20, 3.47it/s] 65%|██████▌ | 241526/371472 [8:38:21<10:27:15, 3.45it/s] 65%|██████▌ | 241527/371472 [8:38:21<10:22:09, 3.48it/s] 65%|██████▌ | 241528/371472 [8:38:22<11:16:47, 3.20it/s] 65%|██████▌ | 241529/371472 [8:38:22<11:10:28, 3.23it/s] 65%|██████▌ | 241530/371472 [8:38:22<10:36:12, 3.40it/s] 65%|██████▌ | 241531/371472 [8:38:23<10:16:12, 3.51it/s] 65%|██████▌ | 241532/371472 [8:38:23<10:05:13, 3.58it/s] 65%|██████▌ | 241533/371472 [8:38:23<10:11:18, 3.54it/s] 65%|██████▌ | 241534/371472 [8:38:24<11:26:55, 3.15it/s] 65%|██████▌ | 241535/371472 [8:38:24<11:08:54, 3.24it/s] 65%|██████▌ | 241536/371472 [8:38:24<11:02:05, 3.27it/s] 65%|██████▌ | 241537/371472 [8:38:24<10:48:42, 3.34it/s] 65%|██████▌ | 241538/371472 [8:38:25<10:58:36, 3.29it/s] 65%|██████▌ | 241539/371472 [8:38:25<11:17:50, 3.19it/s] 65%|██████▌ | 241540/371472 [8:38:25<11:13:42, 3.21it/s] {'loss': 2.7839, 'learning_rate': 4.1496800189618385e-07, 'epoch': 10.4} + 65%|██████▌ | 241540/371472 [8:38:25<11:13:42, 3.21it/s] 65%|██████▌ | 241541/371472 [8:38:26<11:18:05, 3.19it/s] 65%|██████▌ | 241542/371472 [8:38:26<10:57:50, 3.29it/s] 65%|██████▌ | 241543/371472 [8:38:26<10:32:38, 3.42it/s] 65%|██████▌ | 241544/371472 [8:38:27<11:16:58, 3.20it/s] 65%|██████▌ | 241545/371472 [8:38:27<10:51:36, 3.32it/s] 65%|██████▌ | 241546/371472 [8:38:27<10:32:09, 3.43it/s] 65%|██████▌ | 241547/371472 [8:38:27<10:55:43, 3.30it/s] 65%|██████▌ | 241548/371472 [8:38:28<10:58:27, 3.29it/s] 65%|██████▌ | 241549/371472 [8:38:28<10:53:26, 3.31it/s] 65%|██████▌ | 241550/371472 [8:38:28<10:56:22, 3.30it/s] 65%|██████▌ | 241551/371472 [8:38:29<10:45:39, 3.35it/s] 65%|██████▌ | 241552/371472 [8:38:29<10:55:50, 3.30it/s] 65%|██████▌ | 241553/371472 [8:38:29<11:06:39, 3.25it/s] 65%|██████▌ | 241554/371472 [8:38:30<11:30:49, 3.13it/s] 65%|██████▌ | 241555/371472 [8:38:30<11:01:24, 3.27it/s] 65%|██████▌ | 241556/371472 [8:38:30<11:01:39, 3.27it/s] 65%|██████▌ | 241557/371472 [8:38:31<11:33:54, 3.12it/s] 65%|██████▌ | 241558/371472 [8:38:31<10:57:24, 3.29it/s] 65%|██████▌ | 241559/371472 [8:38:31<10:32:09, 3.43it/s] 65%|██████▌ | 241560/371472 [8:38:32<11:58:58, 3.01it/s] {'loss': 2.6942, 'learning_rate': 4.1491951992070503e-07, 'epoch': 10.4} + 65%|██████▌ | 241560/371472 [8:38:32<11:58:58, 3.01it/s] 65%|██████▌ | 241561/371472 [8:38:32<11:26:12, 3.16it/s] 65%|██████▌ | 241562/371472 [8:38:32<11:50:14, 3.05it/s] 65%|██████▌ | 241563/371472 [8:38:32<11:07:48, 3.24it/s] 65%|██████▌ | 241564/371472 [8:38:33<11:38:50, 3.10it/s] 65%|██████▌ | 241565/371472 [8:38:33<11:28:12, 3.15it/s] 65%|██████▌ | 241566/371472 [8:38:33<11:10:11, 3.23it/s] 65%|██████▌ | 241567/371472 [8:38:34<11:05:16, 3.25it/s] 65%|██████▌ | 241568/371472 [8:38:34<11:06:41, 3.25it/s] 65%|██████▌ | 241569/371472 [8:38:34<10:47:41, 3.34it/s] 65%|██████▌ | 241570/371472 [8:38:35<10:17:39, 3.51it/s] 65%|██████▌ | 241571/371472 [8:38:35<10:15:47, 3.52it/s] 65%|██████▌ | 241572/371472 [8:38:35<10:23:02, 3.47it/s] 65%|██████▌ | 241573/371472 [8:38:35<10:29:26, 3.44it/s] 65%|██████▌ | 241574/371472 [8:38:36<10:24:09, 3.47it/s] 65%|██████▌ | 241575/371472 [8:38:36<10:17:00, 3.51it/s] 65%|██████▌ | 241576/371472 [8:38:36<10:17:25, 3.51it/s] 65%|██████▌ | 241577/371472 [8:38:37<10:13:50, 3.53it/s] 65%|██████▌ | 241578/371472 [8:38:37<10:43:00, 3.37it/s] 65%|██████▌ | 241579/371472 [8:38:37<10:40:38, 3.38it/s] 65%|██████▌ | 241580/371472 [8:38:37<10:42:26, 3.37it/s] {'loss': 2.7122, 'learning_rate': 4.148710379452261e-07, 'epoch': 10.41} + 65%|██████▌ | 241580/371472 [8:38:37<10:42:26, 3.37it/s] 65%|██████▌ | 241581/371472 [8:38:38<10:51:00, 3.33it/s] 65%|██████▌ | 241582/371472 [8:38:38<10:55:39, 3.30it/s] 65%|██████▌ | 241583/371472 [8:38:38<10:52:07, 3.32it/s] 65%|██████▌ | 241584/371472 [8:38:39<11:21:22, 3.18it/s] 65%|██████▌ | 241585/371472 [8:38:39<11:37:34, 3.10it/s] 65%|██████▌ | 241586/371472 [8:38:39<11:23:09, 3.17it/s] 65%|██████▌ | 241587/371472 [8:38:40<11:45:47, 3.07it/s] 65%|██████▌ | 241588/371472 [8:38:40<12:05:26, 2.98it/s] 65%|██████▌ | 241589/371472 [8:38:40<11:45:57, 3.07it/s] 65%|██████▌ | 241590/371472 [8:38:41<11:13:58, 3.21it/s] 65%|██████▌ | 241591/371472 [8:38:41<11:06:52, 3.25it/s] 65%|██████▌ | 241592/371472 [8:38:41<10:47:03, 3.35it/s] 65%|██████▌ | 241593/371472 [8:38:42<11:01:01, 3.27it/s] 65%|██████▌ | 241594/371472 [8:38:42<11:03:13, 3.26it/s] 65%|██████▌ | 241595/371472 [8:38:42<10:55:36, 3.30it/s] 65%|██████▌ | 241596/371472 [8:38:42<10:45:53, 3.35it/s] 65%|██████▌ | 241597/371472 [8:38:43<10:42:40, 3.37it/s] 65%|██████▌ | 241598/371472 [8:38:43<10:46:42, 3.35it/s] 65%|██████▌ | 241599/371472 [8:38:43<10:40:02, 3.38it/s] 65%|██████▌ | 241600/371472 [8:38:44<10:48:26, 3.34it/s] {'loss': 2.6757, 'learning_rate': 4.148225559697472e-07, 'epoch': 10.41} + 65%|██████▌ | 241600/371472 [8:38:44<10:48:26, 3.34it/s] 65%|██████▌ | 241601/371472 [8:38:44<10:44:24, 3.36it/s] 65%|██████▌ | 241602/371472 [8:38:44<11:24:41, 3.16it/s] 65%|██████▌ | 241603/371472 [8:38:45<11:07:36, 3.24it/s] 65%|██████▌ | 241604/371472 [8:38:45<10:52:03, 3.32it/s] 65%|██████▌ | 241605/371472 [8:38:45<10:53:52, 3.31it/s] 65%|██████▌ | 241606/371472 [8:38:45<10:49:30, 3.33it/s] 65%|██████▌ | 241607/371472 [8:38:46<10:34:38, 3.41it/s] 65%|██████▌ | 241608/371472 [8:38:46<10:46:14, 3.35it/s] 65%|██████▌ | 241609/371472 [8:38:46<10:39:32, 3.38it/s] 65%|██████▌ | 241610/371472 [8:38:47<10:36:42, 3.40it/s] 65%|██████▌ | 241611/371472 [8:38:47<10:28:14, 3.45it/s] 65%|██████▌ | 241612/371472 [8:38:47<10:30:32, 3.43it/s] 65%|██████▌ | 241613/371472 [8:38:48<10:28:23, 3.44it/s] 65%|██████▌ | 241614/371472 [8:38:48<10:58:16, 3.29it/s] 65%|██████▌ | 241615/371472 [8:38:48<11:00:50, 3.28it/s] 65%|██████▌ | 241616/371472 [8:38:48<11:01:04, 3.27it/s] 65%|██████▌ | 241617/371472 [8:38:49<10:52:04, 3.32it/s] 65%|██████▌ | 241618/371472 [8:38:49<11:03:23, 3.26it/s] 65%|██████▌ | 241619/371472 [8:38:49<10:42:07, 3.37it/s] 65%|██████▌ | 241620/371472 [8:38:50<10:32:52, 3.42it/s] {'loss': 2.738, 'learning_rate': 4.147740739942683e-07, 'epoch': 10.41} + 65%|██████▌ | 241620/371472 [8:38:50<10:32:52, 3.42it/s] 65%|██████▌ | 241621/371472 [8:38:50<10:19:20, 3.49it/s] 65%|██████▌ | 241622/371472 [8:38:50<10:35:43, 3.40it/s] 65%|██████▌ | 241623/371472 [8:38:51<10:54:32, 3.31it/s] 65%|██████▌ | 241624/371472 [8:38:51<11:44:15, 3.07it/s] 65%|██████▌ | 241625/371472 [8:38:51<11:08:34, 3.24it/s] 65%|██████▌ | 241626/371472 [8:38:52<11:23:40, 3.17it/s] 65%|██████▌ | 241627/371472 [8:38:52<11:20:33, 3.18it/s] 65%|██████▌ | 241628/371472 [8:38:52<10:52:36, 3.32it/s] 65%|██████▌ | 241629/371472 [8:38:52<11:05:14, 3.25it/s] 65%|██████▌ | 241630/371472 [8:38:53<10:58:38, 3.29it/s] 65%|██████�� | 241631/371472 [8:38:53<11:13:13, 3.21it/s] 65%|██████▌ | 241632/371472 [8:38:53<10:47:31, 3.34it/s] 65%|██████▌ | 241633/371472 [8:38:54<10:40:50, 3.38it/s] 65%|██████▌ | 241634/371472 [8:38:54<10:30:34, 3.43it/s] 65%|██████▌ | 241635/371472 [8:38:54<10:19:32, 3.49it/s] 65%|██████▌ | 241636/371472 [8:38:54<10:12:16, 3.53it/s] 65%|██████▌ | 241637/371472 [8:38:55<10:06:48, 3.57it/s] 65%|██████▌ | 241638/371472 [8:38:55<10:01:05, 3.60it/s] 65%|██████▌ | 241639/371472 [8:38:55<11:00:44, 3.27it/s] 65%|██████▌ | 241640/371472 [8:38:56<10:49:41, 3.33it/s] {'loss': 2.7018, 'learning_rate': 4.1472559201878947e-07, 'epoch': 10.41} + 65%|██████▌ | 241640/371472 [8:38:56<10:49:41, 3.33it/s] 65%|██████▌ | 241641/371472 [8:38:56<11:25:01, 3.16it/s] 65%|██████▌ | 241642/371472 [8:38:56<11:03:12, 3.26it/s] 65%|██████▌ | 241643/371472 [8:38:57<10:51:45, 3.32it/s] 65%|██████▌ | 241644/371472 [8:38:57<11:13:17, 3.21it/s] 65%|██████▌ | 241645/371472 [8:38:57<11:17:34, 3.19it/s] 65%|██████▌ | 241646/371472 [8:38:58<11:22:02, 3.17it/s] 65%|██████▌ | 241647/371472 [8:38:58<11:25:45, 3.16it/s] 65%|██████▌ | 241648/371472 [8:38:58<11:08:40, 3.24it/s] 65%|██████▌ | 241649/371472 [8:38:58<10:50:52, 3.32it/s] 65%|██████▌ | 241650/371472 [8:38:59<10:43:53, 3.36it/s] 65%|██████▌ | 241651/371472 [8:38:59<10:36:54, 3.40it/s] 65%|██████▌ | 241652/371472 [8:38:59<10:42:12, 3.37it/s] 65%|██████▌ | 241653/371472 [8:39:00<10:30:40, 3.43it/s] 65%|██████▌ | 241654/371472 [8:39:00<11:15:19, 3.20it/s] 65%|██████▌ | 241655/371472 [8:39:00<11:47:10, 3.06it/s] 65%|██████▌ | 241656/371472 [8:39:01<11:33:57, 3.12it/s] 65%|██████▌ | 241657/371472 [8:39:01<11:17:08, 3.20it/s] 65%|██████▌ | 241658/371472 [8:39:01<10:50:12, 3.33it/s] 65%|██████▌ | 241659/371472 [8:39:01<10:35:46, 3.40it/s] 65%|██████▌ | 241660/371472 [8:39:02<11:01:21, 3.27it/s] {'loss': 2.7495, 'learning_rate': 4.146771100433105e-07, 'epoch': 10.41} + 65%|██████▌ | 241660/371472 [8:39:02<11:01:21, 3.27it/s] 65%|██████▌ | 241661/371472 [8:39:02<11:09:25, 3.23it/s] 65%|██████▌ | 241662/371472 [8:39:02<10:51:47, 3.32it/s] 65%|██████▌ | 241663/371472 [8:39:03<10:53:13, 3.31it/s] 65%|██████▌ | 241664/371472 [8:39:03<10:39:30, 3.38it/s] 65%|██████▌ | 241665/371472 [8:39:03<10:30:21, 3.43it/s] 65%|██████▌ | 241666/371472 [8:39:04<10:13:22, 3.53it/s] 65%|██████▌ | 241667/371472 [8:39:04<10:13:18, 3.53it/s] 65%|██████▌ | 241668/371472 [8:39:04<10:27:39, 3.45it/s] 65%|██████▌ | 241669/371472 [8:39:04<10:34:01, 3.41it/s] 65%|██████▌ | 241670/371472 [8:39:05<10:33:29, 3.41it/s] 65%|██████▌ | 241671/371472 [8:39:05<10:24:44, 3.46it/s] 65%|██████▌ | 241672/371472 [8:39:05<10:43:27, 3.36it/s] 65%|██████▌ | 241673/371472 [8:39:06<11:36:32, 3.11it/s] 65%|██████▌ | 241674/371472 [8:39:06<10:57:18, 3.29it/s] 65%|██████▌ | 241675/371472 [8:39:06<13:12:54, 2.73it/s] 65%|██████▌ | 241676/371472 [8:39:07<12:23:22, 2.91it/s] 65%|██████▌ | 241677/371472 [8:39:07<12:01:32, 3.00it/s] 65%|██████▌ | 241678/371472 [8:39:07<11:39:51, 3.09it/s] 65%|██████▌ | 241679/371472 [8:39:08<11:28:28, 3.14it/s] 65%|██████▌ | 241680/371472 [8:39:08<11:00:09, 3.28it/s] {'loss': 2.4667, 'learning_rate': 4.1462862806783167e-07, 'epoch': 10.41} + 65%|██████▌ | 241680/371472 [8:39:08<11:00:09, 3.28it/s] 65%|██████▌ | 241681/371472 [8:39:08<10:43:21, 3.36it/s] 65%|██████▌ | 241682/371472 [8:39:09<10:45:07, 3.35it/s] 65%|██████▌ | 241683/371472 [8:39:09<11:59:15, 3.01it/s] 65%|██████▌ | 241684/371472 [8:39:09<11:26:06, 3.15it/s] 65%|██████▌ | 241685/371472 [8:39:10<12:30:16, 2.88it/s] 65%|██████▌ | 241686/371472 [8:39:10<11:49:22, 3.05it/s] 65%|██████▌ | 241687/371472 [8:39:10<12:45:58, 2.82it/s] 65%|██████▌ | 241688/371472 [8:39:11<12:12:18, 2.95it/s] 65%|██████▌ | 241689/371472 [8:39:11<11:57:38, 3.01it/s] 65%|██████▌ | 241690/371472 [8:39:11<11:28:37, 3.14it/s] 65%|█��████▌ | 241691/371472 [8:39:12<10:58:46, 3.28it/s] 65%|██████▌ | 241692/371472 [8:39:12<10:52:55, 3.31it/s] 65%|██████▌ | 241693/371472 [8:39:12<11:12:24, 3.22it/s] 65%|██████▌ | 241694/371472 [8:39:12<10:54:46, 3.30it/s] 65%|██████▌ | 241695/371472 [8:39:13<11:10:52, 3.22it/s] 65%|██████▌ | 241696/371472 [8:39:13<11:18:24, 3.19it/s] 65%|██████▌ | 241697/371472 [8:39:13<11:04:31, 3.25it/s] 65%|██████▌ | 241698/371472 [8:39:14<11:06:18, 3.25it/s] 65%|██████▌ | 241699/371472 [8:39:14<12:24:46, 2.90it/s] 65%|██████▌ | 241700/371472 [8:39:14<11:40:06, 3.09it/s] {'loss': 2.7203, 'learning_rate': 4.1458014609235274e-07, 'epoch': 10.41} + 65%|██████▌ | 241700/371472 [8:39:14<11:40:06, 3.09it/s] 65%|██████▌ | 241701/371472 [8:39:15<11:08:56, 3.23it/s] 65%|██████▌ | 241702/371472 [8:39:15<10:46:11, 3.35it/s] 65%|██████▌ | 241703/371472 [8:39:15<11:16:38, 3.20it/s] 65%|██████▌ | 241704/371472 [8:39:16<11:16:53, 3.20it/s] 65%|██████▌ | 241705/371472 [8:39:16<10:54:23, 3.31it/s] 65%|██████▌ | 241706/371472 [8:39:16<11:39:56, 3.09it/s] 65%|██████▌ | 241707/371472 [8:39:17<11:23:29, 3.16it/s] 65%|██████▌ | 241708/371472 [8:39:17<11:37:57, 3.10it/s] 65%|██████▌ | 241709/371472 [8:39:17<11:12:44, 3.21it/s] 65%|██████▌ | 241710/371472 [8:39:17<11:07:44, 3.24it/s] 65%|██████▌ | 241711/371472 [8:39:18<11:24:27, 3.16it/s] 65%|██████▌ | 241712/371472 [8:39:18<11:15:50, 3.20it/s] 65%|██████▌ | 241713/371472 [8:39:18<11:10:15, 3.23it/s] 65%|██████▌ | 241714/371472 [8:39:19<11:10:55, 3.22it/s] 65%|██████▌ | 241715/371472 [8:39:19<11:02:10, 3.27it/s] 65%|██████▌ | 241716/371472 [8:39:19<10:42:07, 3.37it/s] 65%|██████▌ | 241717/371472 [8:39:20<10:59:12, 3.28it/s] 65%|██████▌ | 241718/371472 [8:39:20<11:00:20, 3.27it/s] 65%|██████▌ | 241719/371472 [8:39:20<10:45:41, 3.35it/s] 65%|██████▌ | 241720/371472 [8:39:20<10:49:44, 3.33it/s] {'loss': 2.839, 'learning_rate': 4.1453166411687387e-07, 'epoch': 10.41} + 65%|██████▌ | 241720/371472 [8:39:20<10:49:44, 3.33it/s] 65%|██████▌ | 241721/371472 [8:39:21<10:51:57, 3.32it/s] 65%|██████▌ | 241722/371472 [8:39:21<10:35:44, 3.40it/s] 65%|██████▌ | 241723/371472 [8:39:21<11:09:00, 3.23it/s] 65%|██████▌ | 241724/371472 [8:39:22<10:39:55, 3.38it/s] 65%|██████▌ | 241725/371472 [8:39:22<10:31:23, 3.42it/s] 65%|██████▌ | 241726/371472 [8:39:22<10:24:07, 3.46it/s] 65%|██████▌ | 241727/371472 [8:39:23<11:09:21, 3.23it/s] 65%|██████▌ | 241728/371472 [8:39:23<10:49:51, 3.33it/s] 65%|██████▌ | 241729/371472 [8:39:23<10:49:45, 3.33it/s] 65%|██████▌ | 241730/371472 [8:39:23<10:28:34, 3.44it/s] 65%|██████▌ | 241731/371472 [8:39:24<10:20:05, 3.49it/s] 65%|██████▌ | 241732/371472 [8:39:24<10:25:50, 3.46it/s] 65%|██████▌ | 241733/371472 [8:39:24<10:14:25, 3.52it/s] 65%|██████▌ | 241734/371472 [8:39:25<10:10:22, 3.54it/s] 65%|██████▌ | 241735/371472 [8:39:25<10:32:13, 3.42it/s] 65%|██████▌ | 241736/371472 [8:39:25<13:14:19, 2.72it/s] 65%|██████▌ | 241737/371472 [8:39:26<12:18:06, 2.93it/s] 65%|██████▌ | 241738/371472 [8:39:26<11:42:30, 3.08it/s] 65%|██████▌ | 241739/371472 [8:39:26<11:11:19, 3.22it/s] 65%|██████▌ | 241740/371472 [8:39:27<10:56:48, 3.29it/s] {'loss': 2.7146, 'learning_rate': 4.1448318214139494e-07, 'epoch': 10.41} + 65%|██████▌ | 241740/371472 [8:39:27<10:56:48, 3.29it/s] 65%|██████▌ | 241741/371472 [8:39:27<10:33:47, 3.41it/s] 65%|██████▌ | 241742/371472 [8:39:27<10:42:26, 3.37it/s] 65%|██████▌ | 241743/371472 [8:39:27<10:48:03, 3.34it/s] 65%|██████▌ | 241744/371472 [8:39:28<10:41:17, 3.37it/s] 65%|██████▌ | 241745/371472 [8:39:28<11:05:54, 3.25it/s] 65%|██████▌ | 241746/371472 [8:39:28<11:11:18, 3.22it/s] 65%|██████▌ | 241747/371472 [8:39:29<10:55:16, 3.30it/s] 65%|██████▌ | 241748/371472 [8:39:29<10:45:59, 3.35it/s] 65%|██████▌ | 241749/371472 [8:39:29<10:35:46, 3.40it/s] 65%|██████▌ | 241750/371472 [8:39:30<11:08:17, 3.24it/s] 65%|██████▌ | 241751/371472 [8:39:30<10:55:15, 3.30it/s] 65%|██████▌ | 241752/371472 [8:39:30<11:08:10, 3.24it/s] 65%|██████▌ | 241753/371472 [8:39:30<10:43:41, 3.36it/s] 65%|██████▌ | 241754/371472 [8:39:31<10:26:12, 3.45it/s] 65%|██████▌ | 241755/371472 [8:39:31<10:30:51, 3.43it/s] 65%|██████▌ | 241756/371472 [8:39:31<10:27:54, 3.44it/s] 65%|██████▌ | 241757/371472 [8:39:32<10:18:36, 3.49it/s] 65%|██████▌ | 241758/371472 [8:39:32<10:28:00, 3.44it/s] 65%|██████▌ | 241759/371472 [8:39:32<10:58:39, 3.28it/s] 65%|██████▌ | 241760/371472 [8:39:33<10:31:40, 3.42it/s] {'loss': 2.7673, 'learning_rate': 4.144347001659161e-07, 'epoch': 10.41} + 65%|██████▌ | 241760/371472 [8:39:33<10:31:40, 3.42it/s] 65%|██████▌ | 241761/371472 [8:39:33<10:48:14, 3.33it/s] 65%|██████▌ | 241762/371472 [8:39:33<10:35:37, 3.40it/s] 65%|██████▌ | 241763/371472 [8:39:33<10:27:26, 3.45it/s] 65%|██████▌ | 241764/371472 [8:39:34<10:27:40, 3.44it/s] 65%|██████▌ | 241765/371472 [8:39:34<10:14:52, 3.52it/s] 65%|██████▌ | 241766/371472 [8:39:34<10:10:28, 3.54it/s] 65%|██████▌ | 241767/371472 [8:39:35<10:20:55, 3.48it/s] 65%|██████▌ | 241768/371472 [8:39:35<10:22:02, 3.48it/s] 65%|██████▌ | 241769/371472 [8:39:35<10:06:55, 3.56it/s] 65%|██████▌ | 241770/371472 [8:39:35<10:50:46, 3.32it/s] 65%|██████▌ | 241771/371472 [8:39:36<10:44:01, 3.36it/s] 65%|██████▌ | 241772/371472 [8:39:36<10:27:12, 3.45it/s] 65%|██████▌ | 241773/371472 [8:39:36<10:24:19, 3.46it/s] 65%|██████▌ | 241774/371472 [8:39:37<10:13:08, 3.53it/s] 65%|██████▌ | 241775/371472 [8:39:37<10:09:50, 3.54it/s] 65%|██████▌ | 241776/371472 [8:39:37<9:55:29, 3.63it/s] 65%|██████▌ | 241777/371472 [8:39:37<10:18:57, 3.49it/s] 65%|██████▌ | 241778/371472 [8:39:38<10:14:37, 3.52it/s] 65%|██████▌ | 241779/371472 [8:39:38<10:11:47, 3.53it/s] 65%|██████▌ | 241780/371472 [8:39:38<10:07:03, 3.56it/s] {'loss': 2.8578, 'learning_rate': 4.1438621819043713e-07, 'epoch': 10.41} + 65%|██████▌ | 241780/371472 [8:39:38<10:07:03, 3.56it/s] 65%|██████▌ | 241781/371472 [8:39:39<11:06:23, 3.24it/s] 65%|██████▌ | 241782/371472 [8:39:39<10:40:03, 3.38it/s] 65%|██████▌ | 241783/371472 [8:39:39<10:22:23, 3.47it/s] 65%|██████▌ | 241784/371472 [8:39:39<10:07:22, 3.56it/s] 65%|██████▌ | 241785/371472 [8:39:40<10:06:16, 3.57it/s] 65%|██████▌ | 241786/371472 [8:39:40<9:57:09, 3.62it/s] 65%|██████▌ | 241787/371472 [8:39:40<10:16:00, 3.51it/s] 65%|██████▌ | 241788/371472 [8:39:41<10:09:29, 3.55it/s] 65%|██████▌ | 241789/371472 [8:39:41<10:15:44, 3.51it/s] 65%|██████▌ | 241790/371472 [8:39:41<10:10:12, 3.54it/s] 65%|██████▌ | 241791/371472 [8:39:41<9:59:03, 3.61it/s] 65%|██████▌ | 241792/371472 [8:39:42<10:26:55, 3.45it/s] 65%|██████▌ | 241793/371472 [8:39:42<10:44:05, 3.36it/s] 65%|██████▌ | 241794/371472 [8:39:42<11:33:57, 3.11it/s] 65%|██████▌ | 241795/371472 [8:39:43<11:14:30, 3.20it/s] 65%|██████▌ | 241796/371472 [8:39:43<10:44:47, 3.35it/s] 65%|██████▌ | 241797/371472 [8:39:43<11:28:44, 3.14it/s] 65%|██████▌ | 241798/371472 [8:39:44<11:01:20, 3.27it/s] 65%|██████▌ | 241799/371472 [8:39:44<10:38:03, 3.39it/s] 65%|██████▌ | 241800/371472 [8:39:44<10:36:57, 3.39it/s] {'loss': 2.743, 'learning_rate': 4.143377362149583e-07, 'epoch': 10.41} + 65%|██████▌ | 241800/371472 [8:39:44<10:36:57, 3.39it/s] 65%|██████▌ | 241801/371472 [8:39:44<10:23:56, 3.46it/s] 65%|██████▌ | 241802/371472 [8:39:45<10:35:31, 3.40it/s] 65%|██████▌ | 241803/371472 [8:39:45<10:17:39, 3.50it/s] 65%|██████▌ | 241804/371472 [8:39:45<10:28:11, 3.44it/s] 65%|██████▌ | 241805/371472 [8:39:46<10:50:16, 3.32it/s] 65%|██████▌ | 241806/371472 [8:39:46<10:43:42, 3.36it/s] 65%|██████▌ | 241807/371472 [8:39:46<11:02:09, 3.26it/s] 65%|██████▌ | 241808/371472 [8:39:47<11:19:30, 3.18it/s] 65%|██████▌ | 241809/371472 [8:39:47<11:11:55, 3.22it/s] 65%|██████▌ | 241810/371472 [8:39:47<10:53:35, 3.31it/s] 65%|██████▌ | 241811/371472 [8:39:47<10:55:47, 3.30it/s] 65%|██████▌ | 241812/371472 [8:39:48<10:47:13, 3.34it/s] 65%|██████▌ | 241813/371472 [8:39:48<10:44:02, 3.36it/s] 65%|██████▌ | 241814/371472 [8:39:48<10:14:43, 3.52it/s] 65%|██████▌ | 241815/371472 [8:39:49<10:29:40, 3.43it/s] 65%|██████▌ | 241816/371472 [8:39:49<10:20:35, 3.48it/s] 65%|██████▌ | 241817/371472 [8:39:49<10:09:30, 3.55it/s] 65%|██████▌ | 241818/371472 [8:39:49<10:13:12, 3.52it/s] 65%|██████▌ | 241819/371472 [8:39:50<10:08:35, 3.55it/s] 65%|██████▌ | 241820/371472 [8:39:50<10:08:42, 3.55it/s] {'loss': 2.6676, 'learning_rate': 4.142892542394794e-07, 'epoch': 10.42} + 65%|██████▌ | 241820/371472 [8:39:50<10:08:42, 3.55it/s] 65%|██████▌ | 241821/371472 [8:39:50<11:23:49, 3.16it/s] 65%|██████▌ | 241822/371472 [8:39:51<12:05:29, 2.98it/s] 65%|██████▌ | 241823/371472 [8:39:51<11:32:25, 3.12it/s] 65%|██████▌ | 241824/371472 [8:39:51<11:05:59, 3.24it/s] 65%|██████▌ | 241825/371472 [8:39:52<10:30:24, 3.43it/s] 65%|██████▌ | 241826/371472 [8:39:52<10:40:21, 3.37it/s] 65%|██████▌ | 241827/371472 [8:39:52<10:27:39, 3.44it/s] 65%|██████▌ | 241828/371472 [8:39:52<10:10:35, 3.54it/s] 65%|██████▌ | 241829/371472 [8:39:53<10:14:43, 3.51it/s] 65%|██████▌ | 241830/371472 [8:39:53<10:03:14, 3.58it/s] 65%|██████▌ | 241831/371472 [8:39:53<9:59:43, 3.60it/s] 65%|██████▌ | 241832/371472 [8:39:54<10:15:42, 3.51it/s] 65%|██████▌ | 241833/371472 [8:39:54<10:30:32, 3.43it/s] 65%|██████▌ | 241834/371472 [8:39:54<10:33:02, 3.41it/s] 65%|██████▌ | 241835/371472 [8:39:54<10:26:17, 3.45it/s] 65%|██████▌ | 241836/371472 [8:39:55<10:26:08, 3.45it/s] 65%|██████▌ | 241837/371472 [8:39:55<10:41:03, 3.37it/s] 65%|██████▌ | 241838/371472 [8:39:55<10:32:41, 3.41it/s] 65%|██████▌ | 241839/371472 [8:39:56<11:02:12, 3.26it/s] 65%|██████▌ | 241840/371472 [8:39:56<10:48:36, 3.33it/s] {'loss': 2.8984, 'learning_rate': 4.142407722640005e-07, 'epoch': 10.42} + 65%|██████▌ | 241840/371472 [8:39:56<10:48:36, 3.33it/s] 65%|██████▌ | 241841/371472 [8:39:56<11:21:53, 3.17it/s] 65%|██████▌ | 241842/371472 [8:39:57<11:02:56, 3.26it/s] 65%|██████▌ | 241843/371472 [8:39:57<11:52:14, 3.03it/s] 65%|██████▌ | 241844/371472 [8:39:57<11:43:11, 3.07it/s] 65%|██████▌ | 241845/371472 [8:39:58<11:36:25, 3.10it/s] 65%|██████▌ | 241846/371472 [8:39:58<11:34:03, 3.11it/s] 65%|██████▌ | 241847/371472 [8:39:58<11:30:29, 3.13it/s] 65%|██████▌ | 241848/371472 [8:39:59<11:00:17, 3.27it/s] 65%|██████▌ | 241849/371472 [8:39:59<10:54:07, 3.30it/s] 65%|██████▌ | 241850/371472 [8:39:59<10:36:12, 3.40it/s] 65%|██████▌ | 241851/371472 [8:39:59<10:24:40, 3.46it/s] 65%|██████▌ | 241852/371472 [8:40:00<10:21:47, 3.47it/s] 65%|██████▌ | 241853/371472 [8:40:00<10:23:32, 3.46it/s] 65%|██████▌ | 241854/371472 [8:40:00<10:03:26, 3.58it/s] 65%|██████▌ | 241855/371472 [8:40:01<10:17:16, 3.50it/s] 65%|██████▌ | 241856/371472 [8:40:01<10:03:35, 3.58it/s] 65%|██████▌ | 241857/371472 [8:40:01<10:05:02, 3.57it/s] 65%|██████▌ | 241858/371472 [8:40:01<10:09:07, 3.55it/s] 65%|██████▌ | 241859/371472 [8:40:02<10:16:11, 3.51it/s] 65%|██████▌ | 241860/371472 [8:40:02<10:57:54, 3.28it/s] {'loss': 2.7202, 'learning_rate': 4.141922902885216e-07, 'epoch': 10.42} + 65%|██████▌ | 241860/371472 [8:40:02<10:57:54, 3.28it/s] 65%|██████▌ | 241861/371472 [8:40:02<12:13:56, 2.94it/s] 65%|██████▌ | 241862/371472 [8:40:03<11:27:09, 3.14it/s] 65%|██████▌ | 241863/371472 [8:40:03<10:57:53, 3.28it/s] 65%|██████▌ | 241864/371472 [8:40:03<11:07:32, 3.24it/s] 65%|██████▌ | 241865/371472 [8:40:04<10:40:29, 3.37it/s] 65%|██████▌ | 241866/371472 [8:40:04<10:46:23, 3.34it/s] 65%|██████▌ | 241867/371472 [8:40:04<10:36:22, 3.39it/s] 65%|██████▌ | 241868/371472 [8:40:04<10:44:54, 3.35it/s] 65%|██████▌ | 241869/371472 [8:40:05<10:59:15, 3.28it/s] 65%|██████▌ | 241870/371472 [8:40:05<10:53:38, 3.30it/s] 65%|██████▌ | 241871/371472 [8:40:05<10:31:08, 3.42it/s] 65%|██████▌ | 241872/371472 [8:40:06<10:16:06, 3.51it/s] 65%|██████▌ | 241873/371472 [8:40:06<10:09:53, 3.54it/s] 65%|██████▌ | 241874/371472 [8:40:06<10:31:28, 3.42it/s] 65%|██████▌ | 241875/371472 [8:40:06<10:13:19, 3.52it/s] 65%|██████▌ | 241876/371472 [8:40:07<10:28:50, 3.43it/s] 65%|██████▌ | 241877/371472 [8:40:07<10:20:34, 3.48it/s] 65%|██████▌ | 241878/371472 [8:40:07<10:14:41, 3.51it/s] 65%|██████▌ | 241879/371472 [8:40:08<10:31:04, 3.42it/s] 65%|██████▌ | 241880/371472 [8:40:08<10:19:26, 3.49it/s] {'loss': 2.8233, 'learning_rate': 4.1414380831304275e-07, 'epoch': 10.42} + 65%|██████▌ | 241880/371472 [8:40:08<10:19:26, 3.49it/s] 65%|██████▌ | 241881/371472 [8:40:08<10:50:19, 3.32it/s] 65%|██████▌ | 241882/371472 [8:40:08<10:27:31, 3.44it/s] 65%|██████▌ | 241883/371472 [8:40:09<10:20:36, 3.48it/s] 65%|██████▌ | 241884/371472 [8:40:09<10:17:49, 3.50it/s] 65%|██████▌ | 241885/371472 [8:40:09<10:34:25, 3.40it/s] 65%|██████▌ | 241886/371472 [8:40:10<10:20:01, 3.48it/s] 65%|██████▌ | 241887/371472 [8:40:10<10:18:53, 3.49it/s] 65%|██████▌ | 241888/371472 [8:40:10<10:33:43, 3.41it/s] 65%|██████▌ | 241889/371472 [8:40:11<10:40:04, 3.37it/s] 65%|██████▌ | 241890/371472 [8:40:11<10:30:10, 3.43it/s] 65%|██████▌ | 241891/371472 [8:40:11<10:34:31, 3.40it/s] 65%|██████▌ | 241892/371472 [8:40:11<10:53:15, 3.31it/s] 65%|██████▌ | 241893/371472 [8:40:12<10:44:39, 3.35it/s] 65%|██████▌ | 241894/371472 [8:40:12<10:19:30, 3.49it/s] 65%|██████▌ | 241895/371472 [8:40:12<10:15:40, 3.51it/s] 65%|██████▌ | 241896/371472 [8:40:13<10:07:21, 3.56it/s] 65%|██████▌ | 241897/371472 [8:40:13<9:56:39, 3.62it/s] 65%|██████▌ | 241898/371472 [8:40:13<10:14:22, 3.52it/s] 65%|██████▌ | 241899/371472 [8:40:13<10:56:33, 3.29it/s] 65%|██████▌ | 241900/371472 [8:40:14<11:15:03, 3.20it/s] {'loss': 2.7018, 'learning_rate': 4.1409532633756377e-07, 'epoch': 10.42} + 65%|██████▌ | 241900/371472 [8:40:14<11:15:03, 3.20it/s] 65%|██████▌ | 241901/371472 [8:40:14<11:12:43, 3.21it/s] 65%|██████▌ | 241902/371472 [8:40:14<11:10:02, 3.22it/s] 65%|██████▌ | 241903/371472 [8:40:15<11:56:13, 3.02it/s] 65%|██████▌ | 241904/371472 [8:40:15<11:15:24, 3.20it/s] 65%|██████▌ | 241905/371472 [8:40:15<11:00:39, 3.27it/s] 65%|██████▌ | 241906/371472 [8:40:16<11:01:38, 3.26it/s] 65%|██████▌ | 241907/371472 [8:40:16<10:51:06, 3.32it/s] 65%|██████▌ | 241908/371472 [8:40:16<10:47:31, 3.33it/s] 65%|██████▌ | 241909/371472 [8:40:17<10:23:07, 3.47it/s] 65%|██████▌ | 241910/371472 [8:40:17<10:18:42, 3.49it/s] 65%|██████▌ | 241911/371472 [8:40:17<10:00:56, 3.59it/s] 65%|██████▌ | 241912/371472 [8:40:17<9:46:49, 3.68it/s] 65%|██████▌ | 241913/371472 [8:40:18<10:21:05, 3.48it/s] 65%|██████▌ | 241914/371472 [8:40:18<10:17:22, 3.50it/s] 65%|██████▌ | 241915/371472 [8:40:18<10:07:41, 3.55it/s] 65%|██████▌ | 241916/371472 [8:40:18<10:05:48, 3.56it/s] 65%|██████▌ | 241917/371472 [8:40:19<9:57:09, 3.62it/s] 65%|██████▌ | 241918/371472 [8:40:19<9:56:51, 3.62it/s] 65%|██████▌ | 241919/371472 [8:40:19<10:23:47, 3.46it/s] 65%|██████▌ | 241920/371472 [8:40:20<10:20:09, 3.48it/s] {'loss': 2.8156, 'learning_rate': 4.140468443620849e-07, 'epoch': 10.42} + 65%|██████▌ | 241920/371472 [8:40:20<10:20:09, 3.48it/s] 65%|██████▌ | 241921/371472 [8:40:20<10:22:00, 3.47it/s] 65%|██████▌ | 241922/371472 [8:40:20<10:06:12, 3.56it/s] 65%|██████▌ | 241923/371472 [8:40:20<10:06:01, 3.56it/s] 65%|██████▌ | 241924/371472 [8:40:21<10:19:16, 3.49it/s] 65%|██████▌ | 241925/371472 [8:40:21<10:48:13, 3.33it/s] 65%|██████▌ | 241926/371472 [8:40:21<10:43:22, 3.36it/s] 65%|██████▌ | 241927/371472 [8:40:22<10:48:24, 3.33it/s] 65%|██████▌ | 241928/371472 [8:40:22<10:55:44, 3.29it/s] 65%|██████▌ | 241929/371472 [8:40:22<10:49:57, 3.32it/s] 65%|██████▌ | 241930/371472 [8:40:23<11:41:58, 3.08it/s] 65%|██████▌ | 241931/371472 [8:40:23<12:24:52, 2.90it/s] 65%|██████▌ | 241932/371472 [8:40:23<11:49:06, 3.04it/s] 65%|██████▌ | 241933/371472 [8:40:24<11:56:05, 3.01it/s] 65%|██████▌ | 241934/371472 [8:40:24<11:40:41, 3.08it/s] 65%|██████▌ | 241935/371472 [8:40:24<11:16:59, 3.19it/s] 65%|██████▌ | 241936/371472 [8:40:25<11:31:41, 3.12it/s] 65%|██████▌ | 241937/371472 [8:40:25<11:14:35, 3.20it/s] 65%|██████▌ | 241938/371472 [8:40:25<11:08:26, 3.23it/s] 65%|██████▌ | 241939/371472 [8:40:26<11:51:40, 3.03it/s] 65%|██████▌ | 241940/371472 [8:40:26<11:21:28, 3.17it/s] {'loss': 2.6408, 'learning_rate': 4.13998362386606e-07, 'epoch': 10.42} + 65%|██████▌ | 241940/371472 [8:40:26<11:21:28, 3.17it/s] 65%|██████▌ | 241941/371472 [8:40:26<10:57:42, 3.28it/s] 65%|██████▌ | 241942/371472 [8:40:26<11:08:55, 3.23it/s] 65%|██████▌ | 241943/371472 [8:40:27<11:18:17, 3.18it/s] 65%|██████▌ | 241944/371472 [8:40:27<11:12:25, 3.21it/s] 65%|██████▌ | 241945/371472 [8:40:27<11:05:37, 3.24it/s] 65%|██████▌ | 241946/371472 [8:40:28<12:26:32, 2.89it/s] 65%|██████▌ | 241947/371472 [8:40:28<11:41:20, 3.08it/s] 65%|██████▌ | 241948/371472 [8:40:28<11:11:23, 3.22it/s] 65%|██████▌ | 241949/371472 [8:40:29<10:36:52, 3.39it/s] 65%|██████▌ | 241950/371472 [8:40:29<10:32:20, 3.41it/s] 65%|██████▌ | 241951/371472 [8:40:29<10:44:32, 3.35it/s] 65%|██████▌ | 241952/371472 [8:40:30<10:44:17, 3.35it/s] 65%|██████▌ | 241953/371472 [8:40:30<10:31:50, 3.42it/s] 65%|██████▌ | 241954/371472 [8:40:30<10:32:55, 3.41it/s] 65%|██████▌ | 241955/371472 [8:40:30<10:47:18, 3.33it/s] 65%|██████▌ | 241956/371472 [8:40:31<10:23:55, 3.46it/s] 65%|██████▌ | 241957/371472 [8:40:31<10:20:27, 3.48it/s] 65%|██████▌ | 241958/371472 [8:40:31<10:47:14, 3.34it/s] 65%|██████▌ | 241959/371472 [8:40:32<10:26:12, 3.45it/s] 65%|██████▌ | 241960/371472 [8:40:32<10:23:15, 3.46it/s] {'loss': 2.9254, 'learning_rate': 4.1394988041112715e-07, 'epoch': 10.42} + 65%|██████▌ | 241960/371472 [8:40:32<10:23:15, 3.46it/s] 65%|██████▌ | 241961/371472 [8:40:32<10:11:47, 3.53it/s] 65%|██████▌ | 241962/371472 [8:40:32<10:04:20, 3.57it/s] 65%|██████▌ | 241963/371472 [8:40:33<10:04:32, 3.57it/s] 65%|██████▌ | 241964/371472 [8:40:33<10:53:26, 3.30it/s] 65%|██████▌ | 241965/371472 [8:40:33<10:45:28, 3.34it/s] 65%|██████▌ | 241966/371472 [8:40:34<10:54:12, 3.30it/s] 65%|██████▌ | 241967/371472 [8:40:34<10:52:52, 3.31it/s] 65%|██████▌ | 241968/371472 [8:40:34<12:14:31, 2.94it/s] 65%|██████▌ | 241969/371472 [8:40:35<11:58:20, 3.00it/s] 65%|██████▌ | 241970/371472 [8:40:35<11:25:02, 3.15it/s] 65%|██████▌ | 241971/371472 [8:40:35<10:50:07, 3.32it/s] 65%|██████▌ | 241972/371472 [8:40:36<10:34:36, 3.40it/s] 65%|██████▌ | 241973/371472 [8:40:36<10:34:01, 3.40it/s] 65%|██████▌ | 241974/371472 [8:40:36<10:33:44, 3.41it/s] 65%|██████▌ | 241975/371472 [8:40:36<10:42:01, 3.36it/s] 65%|██████▌ | 241976/371472 [8:40:37<10:35:37, 3.40it/s] 65%|██████▌ | 241977/371472 [8:40:37<10:48:23, 3.33it/s] 65%|██████▌ | 241978/371472 [8:40:37<11:20:04, 3.17it/s] 65%|██████▌ | 241979/371472 [8:40:38<11:12:55, 3.21it/s] 65%|██████▌ | 241980/371472 [8:40:38<11:08:22, 3.23it/s] {'loss': 2.5724, 'learning_rate': 4.139013984356482e-07, 'epoch': 10.42} + 65%|██████▌ | 241980/371472 [8:40:38<11:08:22, 3.23it/s] 65%|██████▌ | 241981/371472 [8:40:38<11:03:26, 3.25it/s] 65%|██████▌ | 241982/371472 [8:40:39<11:00:44, 3.27it/s] 65%|██████▌ | 241983/371472 [8:40:39<10:39:52, 3.37it/s] 65%|██████▌ | 241984/371472 [8:40:39<10:23:31, 3.46it/s] 65%|██████▌ | 241985/371472 [8:40:39<10:54:48, 3.30it/s] 65%|██████▌ | 241986/371472 [8:40:40<11:12:44, 3.21it/s] 65%|██████▌ | 241987/371472 [8:40:40<11:14:28, 3.20it/s] 65%|██████▌ | 241988/371472 [8:40:40<10:58:56, 3.28it/s] 65%|██████▌ | 241989/371472 [8:40:41<10:50:16, 3.32it/s] 65%|██████▌ | 241990/371472 [8:40:41<11:01:26, 3.26it/s] 65%|██████▌ | 241991/371472 [8:40:41<10:32:41, 3.41it/s] 65%|██████▌ | 241992/371472 [8:40:42<10:44:27, 3.35it/s] 65%|██████▌ | 241993/371472 [8:40:42<10:35:43, 3.39it/s] 65%|██████▌ | 241994/371472 [8:40:42<11:14:08, 3.20it/s] 65%|██████▌ | 241995/371472 [8:40:43<11:24:53, 3.15it/s] 65%|██████▌ | 241996/371472 [8:40:43<11:51:26, 3.03it/s] 65%|██████▌ | 241997/371472 [8:40:43<11:25:08, 3.15it/s] 65%|██████▌ | 241998/371472 [8:40:43<11:06:17, 3.24it/s] 65%|██████▌ | 241999/371472 [8:40:44<10:51:53, 3.31it/s] 65%|██████▌ | 242000/371472 [8:40:44<10:23:51, 3.46it/s] {'loss': 2.7481, 'learning_rate': 4.138529164601694e-07, 'epoch': 10.42} + 65%|██████▌ | 242000/371472 [8:40:44<10:23:51, 3.46it/s] 65%|██████▌ | 242001/371472 [8:40:44<10:45:33, 3.34it/s] 65%|██████▌ | 242002/371472 [8:40:45<10:56:34, 3.29it/s] 65%|██████▌ | 242003/371472 [8:40:45<11:24:28, 3.15it/s] 65%|██████▌ | 242004/371472 [8:40:45<12:10:33, 2.95it/s] 65%|██████▌ | 242005/371472 [8:40:46<12:04:48, 2.98it/s] 65%|██████▌ | 242006/371472 [8:40:46<11:32:01, 3.12it/s] 65%|██████▌ | 242007/371472 [8:40:46<11:07:08, 3.23it/s] 65%|██████▌ | 242008/371472 [8:40:47<11:31:13, 3.12it/s] 65%|██████▌ | 242009/371472 [8:40:47<11:09:08, 3.22it/s] 65%|██████▌ | 242010/371472 [8:40:47<11:01:05, 3.26it/s] 65%|██████▌ | 242011/371472 [8:40:48<11:00:29, 3.27it/s] 65%|██████▌ | 242012/371472 [8:40:48<10:49:11, 3.32it/s] 65%|██████▌ | 242013/371472 [8:40:48<11:38:31, 3.09it/s] 65%|██████▌ | 242014/371472 [8:40:48<10:57:51, 3.28it/s] 65%|██████▌ | 242015/371472 [8:40:49<10:40:10, 3.37it/s] 65%|██████▌ | 242016/371472 [8:40:49<10:59:55, 3.27it/s] 65%|██████▌ | 242017/371472 [8:40:49<11:07:12, 3.23it/s] 65%|██████▌ | 242018/371472 [8:40:50<10:56:22, 3.29it/s] 65%|██████▌ | 242019/371472 [8:40:50<10:59:26, 3.27it/s] 65%|██████▌ | 242020/371472 [8:40:50<10:39:56, 3.37it/s] {'loss': 2.641, 'learning_rate': 4.1380443448469047e-07, 'epoch': 10.42} + 65%|██████▌ | 242020/371472 [8:40:50<10:39:56, 3.37it/s] 65%|██████▌ | 242021/371472 [8:40:50<10:12:09, 3.52it/s] 65%|██████▌ | 242022/371472 [8:40:51<10:04:55, 3.57it/s] 65%|██████▌ | 242023/371472 [8:40:51<10:27:30, 3.44it/s] 65%|██████▌ | 242024/371472 [8:40:51<10:15:32, 3.50it/s] 65%|██████▌ | 242025/371472 [8:40:52<10:11:53, 3.53it/s] 65%|██████▌ | 242026/371472 [8:40:52<10:09:09, 3.54it/s] 65%|██████▌ | 242027/371472 [8:40:52<10:56:02, 3.29it/s] 65%|██████▌ | 242028/371472 [8:40:53<10:53:19, 3.30it/s] 65%|██████▌ | 242029/371472 [8:40:53<10:31:59, 3.41it/s] 65%|██████▌ | 242030/371472 [8:40:53<10:38:11, 3.38it/s] 65%|██████▌ | 242031/371472 [8:40:53<10:30:06, 3.42it/s] 65%|██████▌ | 242032/371472 [8:40:54<10:20:24, 3.48it/s] 65%|██████▌ | 242033/371472 [8:40:54<10:15:42, 3.50it/s] 65%|██████▌ | 242034/371472 [8:40:54<11:57:39, 3.01it/s] 65%|██████▌ | 242035/371472 [8:40:55<12:14:54, 2.94it/s] 65%|██████▌ | 242036/371472 [8:40:55<11:21:36, 3.16it/s] 65%|██████▌ | 242037/371472 [8:40:55<11:03:15, 3.25it/s] 65%|██████▌ | 242038/371472 [8:40:56<11:03:41, 3.25it/s] 65%|██████▌ | 242039/371472 [8:40:56<11:14:08, 3.20it/s] 65%|██████▌ | 242040/371472 [8:40:56<11:12:13, 3.21it/s] {'loss': 2.8069, 'learning_rate': 4.137559525092116e-07, 'epoch': 10.43} + 65%|██████▌ | 242040/371472 [8:40:56<11:12:13, 3.21it/s] 65%|██████▌ | 242041/371472 [8:40:57<11:06:14, 3.24it/s] 65%|██████▌ | 242042/371472 [8:40:57<10:32:27, 3.41it/s] 65%|██████▌ | 242043/371472 [8:40:57<10:28:15, 3.43it/s] 65%|██████▌ | 242044/371472 [8:40:57<10:23:01, 3.46it/s] 65%|██████▌ | 242045/371472 [8:40:58<10:06:42, 3.56it/s] 65%|██████▌ | 242046/371472 [8:40:58<10:01:37, 3.59it/s] 65%|██████▌ | 242047/371472 [8:40:58<10:22:45, 3.46it/s] 65%|██████▌ | 242048/371472 [8:40:59<10:59:19, 3.27it/s] 65%|██████▌ | 242049/371472 [8:40:59<11:02:19, 3.26it/s] 65%|██████▌ | 242050/371472 [8:40:59<10:53:03, 3.30it/s] 65%|██████▌ | 242051/371472 [8:40:59<10:46:13, 3.34it/s] 65%|██████▌ | 242052/371472 [8:41:00<10:23:34, 3.46it/s] 65%|██████▌ | 242053/371472 [8:41:00<10:13:13, 3.52it/s] 65%|██████▌ | 242054/371472 [8:41:00<10:24:57, 3.45it/s] 65%|██████▌ | 242055/371472 [8:41:01<10:13:19, 3.52it/s] 65%|██████▌ | 242056/371472 [8:41:01<10:14:35, 3.51it/s] 65%|██████▌ | 242057/371472 [8:41:01<10:08:29, 3.54it/s] 65%|██████▌ | 242058/371472 [8:41:01<10:21:24, 3.47it/s] 65%|██████▌ | 242059/371472 [8:41:02<10:16:12, 3.50it/s] 65%|██████▌ | 242060/371472 [8:41:02<10:13:59, 3.51it/s] {'loss': 2.8911, 'learning_rate': 4.1370747053373266e-07, 'epoch': 10.43} + 65%|██████▌ | 242060/371472 [8:41:02<10:13:59, 3.51it/s] 65%|██████▌ | 242061/371472 [8:41:02<10:16:14, 3.50it/s] 65%|██████▌ | 242062/371472 [8:41:03<10:10:11, 3.53it/s] 65%|██████▌ | 242063/371472 [8:41:03<10:16:18, 3.50it/s] 65%|██████▌ | 242064/371472 [8:41:03<10:36:46, 3.39it/s] 65%|██████▌ | 242065/371472 [8:41:03<10:34:21, 3.40it/s] 65%|██████▌ | 242066/371472 [8:41:04<10:28:38, 3.43it/s] 65%|██████▌ | 242067/371472 [8:41:04<10:32:45, 3.41it/s] 65%|██████▌ | 242068/371472 [8:41:04<10:25:21, 3.45it/s] 65%|██████▌ | 242069/371472 [8:41:05<10:13:00, 3.52it/s] 65%|██████▌ | 242070/371472 [8:41:05<9:59:37, 3.60it/s] 65%|██████▌ | 242071/371472 [8:41:05<9:58:53, 3.60it/s] 65%|██████▌ | 242072/371472 [8:41:05<10:04:55, 3.57it/s] 65%|██████▌ | 242073/371472 [8:41:06<11:02:40, 3.25it/s] 65%|██████▌ | 242074/371472 [8:41:06<10:56:40, 3.28it/s] 65%|██████▌ | 242075/371472 [8:41:06<11:03:47, 3.25it/s] 65%|██████▌ | 242076/371472 [8:41:07<10:45:54, 3.34it/s] 65%|██████▌ | 242077/371472 [8:41:07<10:36:21, 3.39it/s] 65%|██████▌ | 242078/371472 [8:41:07<10:49:02, 3.32it/s] 65%|██████▌ | 242079/371472 [8:41:08<10:59:12, 3.27it/s] 65%|██████▌ | 242080/371472 [8:41:08<10:42:43, 3.36it/s] {'loss': 2.6756, 'learning_rate': 4.136589885582537e-07, 'epoch': 10.43} + 65%|██████▌ | 242080/371472 [8:41:08<10:42:43, 3.36it/s] 65%|██████▌ | 242081/371472 [8:41:08<10:36:44, 3.39it/s] 65%|██████▌ | 242082/371472 [8:41:09<10:43:31, 3.35it/s] 65%|██████▌ | 242083/371472 [8:41:09<10:41:14, 3.36it/s] 65%|██████▌ | 242084/371472 [8:41:09<10:34:18, 3.40it/s] 65%|██████▌ | 242085/371472 [8:41:09<10:37:25, 3.38it/s] 65%|██████▌ | 242086/371472 [8:41:10<10:20:56, 3.47it/s] 65%|██████▌ | 242087/371472 [8:41:10<10:22:16, 3.47it/s] 65%|██████▌ | 242088/371472 [8:41:10<10:27:31, 3.44it/s] 65%|██████▌ | 242089/371472 [8:41:11<10:20:56, 3.47it/s] 65%|██████▌ | 242090/371472 [8:41:11<10:24:54, 3.45it/s] 65%|██████▌ | 242091/371472 [8:41:11<10:26:12, 3.44it/s] 65%|██████▌ | 242092/371472 [8:41:11<10:25:52, 3.45it/s] 65%|██████▌ | 242093/371472 [8:41:12<10:21:36, 3.47it/s] 65%|██████▌ | 242094/371472 [8:41:12<10:14:16, 3.51it/s] 65%|██████▌ | 242095/371472 [8:41:12<10:11:30, 3.53it/s] 65%|██████▌ | 242096/371472 [8:41:13<10:24:59, 3.45it/s] 65%|██████▌ | 242097/371472 [8:41:13<10:26:09, 3.44it/s] 65%|██████▌ | 242098/371472 [8:41:13<10:36:15, 3.39it/s] 65%|██████▌ | 242099/371472 [8:41:13<10:23:38, 3.46it/s] 65%|██████▌ | 242100/371472 [8:41:14<10:20:56, 3.47it/s] {'loss': 2.7046, 'learning_rate': 4.1361050658277486e-07, 'epoch': 10.43} + 65%|██████▌ | 242100/371472 [8:41:14<10:20:56, 3.47it/s] 65%|██████▌ | 242101/371472 [8:41:14<10:23:00, 3.46it/s] 65%|██████▌ | 242102/371472 [8:41:14<11:01:16, 3.26it/s] 65%|██████▌ | 242103/371472 [8:41:15<10:33:44, 3.40it/s] 65%|██████▌ | 242104/371472 [8:41:15<10:18:01, 3.49it/s] 65%|██████▌ | 242105/371472 [8:41:15<10:09:23, 3.54it/s] 65%|██████▌ | 242106/371472 [8:41:15<10:17:15, 3.49it/s] 65%|██████▌ | 242107/371472 [8:41:16<10:25:38, 3.45it/s] 65%|██████▌ | 242108/371472 [8:41:16<11:08:18, 3.23it/s] 65%|██████▌ | 242109/371472 [8:41:16<11:05:05, 3.24it/s] 65%|██████▌ | 242110/371472 [8:41:17<11:08:26, 3.23it/s] 65%|██████▌ | 242111/371472 [8:41:17<11:11:47, 3.21it/s] 65%|██████▌ | 242112/371472 [8:41:17<10:55:16, 3.29it/s] 65%|██████▌ | 242113/371472 [8:41:18<10:49:21, 3.32it/s] 65%|██████▌ | 242114/371472 [8:41:18<12:49:48, 2.80it/s] 65%|██████▌ | 242115/371472 [8:41:18<11:58:14, 3.00it/s] 65%|██████▌ | 242116/371472 [8:41:19<11:21:19, 3.16it/s] 65%|██████▌ | 242117/371472 [8:41:19<11:06:56, 3.23it/s] 65%|██████▌ | 242118/371472 [8:41:19<11:29:20, 3.13it/s] 65%|██████▌ | 242119/371472 [8:41:20<11:18:11, 3.18it/s] 65%|██████▌ | 242120/371472 [8:41:20<10:55:38, 3.29it/s] {'loss': 2.976, 'learning_rate': 4.1356202460729593e-07, 'epoch': 10.43} + 65%|██████▌ | 242120/371472 [8:41:20<10:55:38, 3.29it/s] 65%|██████▌ | 242121/371472 [8:41:20<11:08:26, 3.23it/s] 65%|██████▌ | 242122/371472 [8:41:21<11:00:55, 3.26it/s] 65%|██████▌ | 242123/371472 [8:41:21<10:54:51, 3.29it/s] 65%|██████▌ | 242124/371472 [8:41:21<10:26:41, 3.44it/s] 65%|██████▌ | 242125/371472 [8:41:21<10:40:44, 3.36it/s] 65%|██████▌ | 242126/371472 [8:41:22<11:10:05, 3.22it/s] 65%|██████▌ | 242127/371472 [8:41:22<10:44:24, 3.35it/s] 65%|██████▌ | 242128/371472 [8:41:22<10:38:56, 3.37it/s] 65%|██████▌ | 242129/371472 [8:41:23<11:36:41, 3.09it/s] 65%|██████▌ | 242130/371472 [8:41:23<10:59:27, 3.27it/s] 65%|██████▌ | 242131/371472 [8:41:23<11:37:32, 3.09it/s] 65%|██████▌ | 242132/371472 [8:41:24<11:30:53, 3.12it/s] 65%|██████▌ | 242133/371472 [8:41:24<12:10:38, 2.95it/s] 65%|██████▌ | 242134/371472 [8:41:24<11:36:05, 3.10it/s] 65%|██████▌ | 242135/371472 [8:41:25<11:04:45, 3.24it/s] 65%|██████▌ | 242136/371472 [8:41:25<12:35:57, 2.85it/s] 65%|██████▌ | 242137/371472 [8:41:25<11:49:52, 3.04it/s] 65%|██████▌ | 242138/371472 [8:41:26<11:45:40, 3.05it/s] 65%|██████▌ | 242139/371472 [8:41:26<11:37:14, 3.09it/s] 65%|██████▌ | 242140/371472 [8:41:26<11:15:31, 3.19it/s] {'loss': 2.7859, 'learning_rate': 4.135135426318171e-07, 'epoch': 10.43} + 65%|██████▌ | 242140/371472 [8:41:26<11:15:31, 3.19it/s] 65%|██████▌ | 242141/371472 [8:41:27<11:07:38, 3.23it/s] 65%|██████▌ | 242142/371472 [8:41:27<10:51:11, 3.31it/s] 65%|██████▌ | 242143/371472 [8:41:27<10:31:25, 3.41it/s] 65%|██████▌ | 242144/371472 [8:41:27<10:39:50, 3.37it/s] 65%|██████▌ | 242145/371472 [8:41:28<10:36:19, 3.39it/s] 65%|██████▌ | 242146/371472 [8:41:28<10:31:13, 3.41it/s] 65%|██████▌ | 242147/371472 [8:41:28<11:01:31, 3.26it/s] 65%|██████▌ | 242148/371472 [8:41:29<11:10:46, 3.21it/s] 65%|██████▌ | 242149/371472 [8:41:29<10:48:04, 3.33it/s] 65%|██████▌ | 242150/371472 [8:41:29<10:52:44, 3.30it/s] 65%|██████▌ | 242151/371472 [8:41:30<11:12:43, 3.20it/s] 65%|██████▌ | 242152/371472 [8:41:30<12:54:33, 2.78it/s] 65%|██████▌ | 242153/371472 [8:41:30<12:32:52, 2.86it/s] 65%|██████▌ | 242154/371472 [8:41:31<12:02:43, 2.98it/s] 65%|██████▌ | 242155/371472 [8:41:31<11:36:20, 3.10it/s] 65%|██████▌ | 242156/371472 [8:41:31<11:20:47, 3.17it/s] 65%|██████▌ | 242157/371472 [8:41:32<11:13:27, 3.20it/s] 65%|██████▌ | 242158/371472 [8:41:32<11:11:04, 3.21it/s] 65%|██████▌ | 242159/371472 [8:41:32<10:46:33, 3.33it/s] 65%|██████▌ | 242160/371472 [8:41:32<10:52:22, 3.30it/s] {'loss': 2.5922, 'learning_rate': 4.134650606563381e-07, 'epoch': 10.43} + 65%|██████▌ | 242160/371472 [8:41:32<10:52:22, 3.30it/s] 65%|██████▌ | 242161/371472 [8:41:33<10:24:31, 3.45it/s] 65%|██████▌ | 242162/371472 [8:41:33<10:36:30, 3.39it/s] 65%|██████▌ | 242163/371472 [8:41:33<10:31:26, 3.41it/s] 65%|██████▌ | 242164/371472 [8:41:34<10:35:34, 3.39it/s] 65%|██████▌ | 242165/371472 [8:41:34<10:16:23, 3.50it/s] 65%|██████▌ | 242166/371472 [8:41:34<10:08:52, 3.54it/s] 65%|██████▌ | 242167/371472 [8:41:34<10:40:25, 3.37it/s] 65%|██████▌ | 242168/371472 [8:41:35<10:38:09, 3.38it/s] 65%|██████▌ | 242169/371472 [8:41:35<10:25:35, 3.44it/s] 65%|██████▌ | 242170/371472 [8:41:35<10:22:26, 3.46it/s] 65%|██████▌ | 242171/371472 [8:41:36<10:25:39, 3.44it/s] 65%|██████▌ | 242172/371472 [8:41:36<10:30:40, 3.42it/s] 65%|██████▌ | 242173/371472 [8:41:36<10:27:40, 3.43it/s] 65%|██████▌ | 242174/371472 [8:41:36<10:46:05, 3.34it/s] 65%|██████▌ | 242175/371472 [8:41:37<11:15:48, 3.19it/s] 65%|██████▌ | 242176/371472 [8:41:37<11:20:53, 3.16it/s] 65%|██████▌ | 242177/371472 [8:41:37<11:17:17, 3.18it/s] 65%|██████▌ | 242178/371472 [8:41:38<11:11:24, 3.21it/s] 65%|██████▌ | 242179/371472 [8:41:38<11:15:56, 3.19it/s] 65%|██████▌ | 242180/371472 [8:41:38<11:00:33, 3.26it/s] {'loss': 2.6969, 'learning_rate': 4.134165786808593e-07, 'epoch': 10.43} + 65%|██████▌ | 242180/371472 [8:41:38<11:00:33, 3.26it/s] 65%|██████▌ | 242181/371472 [8:41:39<11:05:13, 3.24it/s] 65%|██████▌ | 242182/371472 [8:41:39<10:59:31, 3.27it/s] 65%|██████▌ | 242183/371472 [8:41:39<10:58:33, 3.27it/s] 65%|██████▌ | 242184/371472 [8:41:40<11:05:51, 3.24it/s] 65%|██████▌ | 242185/371472 [8:41:40<11:09:57, 3.22it/s] 65%|██████▌ | 242186/371472 [8:41:40<11:52:00, 3.03it/s] 65%|██████▌ | 242187/371472 [8:41:41<11:21:35, 3.16it/s] 65%|██████▌ | 242188/371472 [8:41:41<11:27:50, 3.13it/s] 65%|██████▌ | 242189/371472 [8:41:41<11:07:12, 3.23it/s] 65%|██████▌ | 242190/371472 [8:41:42<11:25:07, 3.15it/s] 65%|██████▌ | 242191/371472 [8:41:42<11:30:11, 3.12it/s] 65%|██████▌ | 242192/371472 [8:41:42<11:24:37, 3.15it/s] 65%|██████▌ | 242193/371472 [8:41:42<11:02:08, 3.25it/s] 65%|██████▌ | 242194/371472 [8:41:43<10:44:55, 3.34it/s] 65%|██████▌ | 242195/371472 [8:41:43<12:22:50, 2.90it/s] 65%|██████▌ | 242196/371472 [8:41:43<11:51:09, 3.03it/s] 65%|██████▌ | 242197/371472 [8:41:44<11:22:14, 3.16it/s] 65%|██████▌ | 242198/371472 [8:41:44<11:04:41, 3.24it/s] 65%|██████▌ | 242199/371472 [8:41:44<10:45:07, 3.34it/s] 65%|██████▌ | 242200/371472 [8:41:45<10:45:53, 3.34it/s] {'loss': 2.5188, 'learning_rate': 4.133680967053804e-07, 'epoch': 10.43} + 65%|██████▌ | 242200/371472 [8:41:45<10:45:53, 3.34it/s] 65%|██████▌ | 242201/371472 [8:41:45<10:54:25, 3.29it/s] 65%|██████▌ | 242202/371472 [8:41:45<10:41:54, 3.36it/s] 65%|██████▌ | 242203/371472 [8:41:46<10:48:57, 3.32it/s] 65%|██████▌ | 242204/371472 [8:41:46<10:49:36, 3.32it/s] 65%|██████▌ | 242205/371472 [8:41:46<10:39:25, 3.37it/s] 65%|██████▌ | 242206/371472 [8:41:46<10:46:27, 3.33it/s] 65%|██████▌ | 242207/371472 [8:41:47<10:52:12, 3.30it/s] 65%|██████▌ | 242208/371472 [8:41:47<11:15:05, 3.19it/s] 65%|██████▌ | 242209/371472 [8:41:47<11:22:01, 3.16it/s] 65%|██████▌ | 242210/371472 [8:41:48<10:59:45, 3.27it/s] 65%|██████▌ | 242211/371472 [8:41:48<11:18:30, 3.18it/s] 65%|██████▌ | 242212/371472 [8:41:48<10:45:25, 3.34it/s] 65%|██████▌ | 242213/371472 [8:41:49<10:25:09, 3.45it/s] 65%|██████▌ | 242214/371472 [8:41:49<10:21:23, 3.47it/s] 65%|██████▌ | 242215/371472 [8:41:49<10:13:45, 3.51it/s] 65%|██████▌ | 242216/371472 [8:41:49<10:50:48, 3.31it/s] 65%|██████▌ | 242217/371472 [8:41:50<10:29:53, 3.42it/s] 65%|██████▌ | 242218/371472 [8:41:50<10:16:07, 3.50it/s] 65%|██████▌ | 242219/371472 [8:41:50<10:34:46, 3.39it/s] 65%|██████▌ | 242220/371472 [8:41:51<10:19:26, 3.48it/s] {'loss': 2.8282, 'learning_rate': 4.133196147299015e-07, 'epoch': 10.43} + 65%|██████▌ | 242220/371472 [8:41:51<10:19:26, 3.48it/s] 65%|██████▌ | 242221/371472 [8:41:51<10:15:11, 3.50it/s] 65%|██████▌ | 242222/371472 [8:41:51<10:32:10, 3.41it/s] 65%|██████▌ | 242223/371472 [8:41:51<10:21:06, 3.47it/s] 65%|██████▌ | 242224/371472 [8:41:52<10:10:46, 3.53it/s] 65%|██████▌ | 242225/371472 [8:41:52<10:24:11, 3.45it/s] 65%|██████▌ | 242226/371472 [8:41:52<10:11:06, 3.52it/s] 65%|██████▌ | 242227/371472 [8:41:53<9:59:42, 3.59it/s] 65%|██████▌ | 242228/371472 [8:41:53<10:26:50, 3.44it/s] 65%|██████▌ | 242229/371472 [8:41:53<10:44:34, 3.34it/s] 65%|██████▌ | 242230/371472 [8:41:54<10:46:23, 3.33it/s] 65%|██████▌ | 242231/371472 [8:41:54<10:29:14, 3.42it/s] 65%|██████▌ | 242232/371472 [8:41:54<10:17:17, 3.49it/s] 65%|██████▌ | 242233/371472 [8:41:54<10:11:18, 3.52it/s] 65%|██████▌ | 242234/371472 [8:41:55<10:31:15, 3.41it/s] 65%|██████▌ | 242235/371472 [8:41:55<10:09:30, 3.53it/s] 65%|██████▌ | 242236/371472 [8:41:55<10:30:47, 3.41it/s] 65%|██████▌ | 242237/371472 [8:41:56<10:18:59, 3.48it/s] 65%|██████▌ | 242238/371472 [8:41:56<10:18:52, 3.48it/s] 65%|██████▌ | 242239/371472 [8:41:56<10:18:27, 3.48it/s] 65%|██████▌ | 242240/371472 [8:41:56<10:06:53, 3.55it/s] {'loss': 2.6584, 'learning_rate': 4.1327113275442257e-07, 'epoch': 10.43} + 65%|██████▌ | 242240/371472 [8:41:56<10:06:53, 3.55it/s] 65%|██████▌ | 242241/371472 [8:41:57<10:01:20, 3.58it/s] 65%|██████▌ | 242242/371472 [8:41:57<10:08:58, 3.54it/s] 65%|██████▌ | 242243/371472 [8:41:57<10:40:12, 3.36it/s] 65%|██████▌ | 242244/371472 [8:41:58<11:00:42, 3.26it/s] 65%|██████▌ | 242245/371472 [8:41:58<10:51:12, 3.31it/s] 65%|██████▌ | 242246/371472 [8:41:58<11:03:10, 3.25it/s] 65%|██████▌ | 242247/371472 [8:41:59<11:24:57, 3.14it/s] 65%|██████▌ | 242248/371472 [8:41:59<11:04:51, 3.24it/s] 65%|██████▌ | 242249/371472 [8:41:59<10:44:36, 3.34it/s] 65%|██████▌ | 242250/371472 [8:41:59<10:40:51, 3.36it/s] 65%|██████▌ | 242251/371472 [8:42:00<10:26:20, 3.44it/s] 65%|██████▌ | 242252/371472 [8:42:00<12:24:23, 2.89it/s] 65%|██████▌ | 242253/371472 [8:42:00<11:50:22, 3.03it/s] 65%|██████▌ | 242254/371472 [8:42:01<11:22:24, 3.16it/s] 65%|██████▌ | 242255/371472 [8:42:01<11:33:20, 3.11it/s] 65%|██████▌ | 242256/371472 [8:42:01<11:24:12, 3.15it/s] 65%|██████▌ | 242257/371472 [8:42:02<11:04:19, 3.24it/s] 65%|██████▌ | 242258/371472 [8:42:02<10:53:17, 3.30it/s] 65%|██████▌ | 242259/371472 [8:42:02<11:37:50, 3.09it/s] 65%|██████▌ | 242260/371472 [8:42:03<11:11:34, 3.21it/s] {'loss': 2.7135, 'learning_rate': 4.1322265077894375e-07, 'epoch': 10.43} + 65%|██████▌ | 242260/371472 [8:42:03<11:11:34, 3.21it/s] 65%|██████▌ | 242261/371472 [8:42:03<10:52:31, 3.30it/s] 65%|██████▌ | 242262/371472 [8:42:03<10:40:47, 3.36it/s] 65%|██████▌ | 242263/371472 [8:42:03<10:23:00, 3.46it/s] 65%|██████▌ | 242264/371472 [8:42:04<10:23:52, 3.45it/s] 65%|██████▌ | 242265/371472 [8:42:04<10:26:53, 3.44it/s] 65%|██████▌ | 242266/371472 [8:42:04<10:41:13, 3.36it/s] 65%|██████▌ | 242267/371472 [8:42:05<11:21:52, 3.16it/s] 65%|██████▌ | 242268/371472 [8:42:05<11:24:51, 3.14it/s] 65%|██████▌ | 242269/371472 [8:42:05<11:02:06, 3.25it/s] 65%|██████▌ | 242270/371472 [8:42:06<10:39:18, 3.37it/s] 65%|██████▌ | 242271/371472 [8:42:06<10:31:34, 3.41it/s] 65%|██████▌ | 242272/371472 [8:42:06<10:22:06, 3.46it/s] 65%|██████▌ | 242273/371472 [8:42:06<10:16:05, 3.50it/s] 65%|██████▌ | 242274/371472 [8:42:07<10:17:02, 3.49it/s] 65%|██████▌ | 242275/371472 [8:42:07<10:05:09, 3.56it/s] 65%|██████▌ | 242276/371472 [8:42:07<10:22:41, 3.46it/s] 65%|██████▌ | 242277/371472 [8:42:08<10:58:20, 3.27it/s] 65%|██████▌ | 242278/371472 [8:42:08<10:43:30, 3.35it/s] 65%|██████▌ | 242279/371472 [8:42:08<10:44:30, 3.34it/s] 65%|██████▌ | 242280/371472 [8:42:08<10:28:51, 3.42it/s] {'loss': 2.747, 'learning_rate': 4.131741688034648e-07, 'epoch': 10.44} + 65%|██████▌ | 242280/371472 [8:42:08<10:28:51, 3.42it/s] 65%|██████▌ | 242281/371472 [8:42:09<10:34:27, 3.39it/s] 65%|██████▌ | 242282/371472 [8:42:09<12:02:28, 2.98it/s] 65%|██████▌ | 242283/371472 [8:42:10<12:07:43, 2.96it/s] 65%|██████▌ | 242284/371472 [8:42:10<11:30:21, 3.12it/s] 65%|██████▌ | 242285/371472 [8:42:10<11:04:23, 3.24it/s] 65%|██████▌ | 242286/371472 [8:42:10<11:10:45, 3.21it/s] 65%|██████▌ | 242287/371472 [8:42:11<10:57:08, 3.28it/s] 65%|██████▌ | 242288/371472 [8:42:11<11:21:00, 3.16it/s] 65%|██████�� | 242289/371472 [8:42:11<11:07:57, 3.22it/s] 65%|██████▌ | 242290/371472 [8:42:12<10:56:10, 3.28it/s] 65%|██████▌ | 242291/371472 [8:42:12<10:53:52, 3.29it/s] 65%|██████▌ | 242292/371472 [8:42:12<10:46:26, 3.33it/s] 65%|██████▌ | 242293/371472 [8:42:13<10:45:40, 3.33it/s] 65%|██████▌ | 242294/371472 [8:42:13<10:28:30, 3.43it/s] 65%|██████▌ | 242295/371472 [8:42:13<10:21:00, 3.47it/s] 65%|██████▌ | 242296/371472 [8:42:13<10:59:16, 3.27it/s] 65%|██████▌ | 242297/371472 [8:42:14<11:01:07, 3.26it/s] 65%|██████▌ | 242298/371472 [8:42:14<10:38:33, 3.37it/s] 65%|██████▌ | 242299/371472 [8:42:14<10:38:21, 3.37it/s] 65%|██████▌ | 242300/371472 [8:42:15<10:26:37, 3.44it/s] {'loss': 2.6868, 'learning_rate': 4.1312568682798594e-07, 'epoch': 10.44} + 65%|██████▌ | 242300/371472 [8:42:15<10:26:37, 3.44it/s] 65%|██████▌ | 242301/371472 [8:42:15<10:48:40, 3.32it/s] 65%|██████▌ | 242302/371472 [8:42:15<10:35:11, 3.39it/s] 65%|██████▌ | 242303/371472 [8:42:16<11:15:11, 3.19it/s] 65%|██████▌ | 242304/371472 [8:42:16<11:01:50, 3.25it/s] 65%|██████▌ | 242305/371472 [8:42:16<10:49:09, 3.32it/s] 65%|██████▌ | 242306/371472 [8:42:17<11:22:49, 3.15it/s] 65%|██████▌ | 242307/371472 [8:42:17<11:01:34, 3.25it/s] 65%|██████▌ | 242308/371472 [8:42:17<10:43:04, 3.35it/s] 65%|██████▌ | 242309/371472 [8:42:17<10:44:50, 3.34it/s] 65%|██████▌ | 242310/371472 [8:42:18<10:28:08, 3.43it/s] 65%|██████▌ | 242311/371472 [8:42:18<10:27:29, 3.43it/s] 65%|██████▌ | 242312/371472 [8:42:18<10:39:44, 3.36it/s] 65%|██████▌ | 242313/371472 [8:42:19<10:44:26, 3.34it/s] 65%|██████▌ | 242314/371472 [8:42:19<10:59:51, 3.26it/s] 65%|██████▌ | 242315/371472 [8:42:19<10:56:33, 3.28it/s] 65%|██████▌ | 242316/371472 [8:42:19<10:56:56, 3.28it/s] 65%|██████▌ | 242317/371472 [8:42:20<10:35:16, 3.39it/s] 65%|██████▌ | 242318/371472 [8:42:20<10:43:17, 3.35it/s] 65%|██████▌ | 242319/371472 [8:42:20<10:34:09, 3.39it/s] 65%|██████▌ | 242320/371472 [8:42:21<11:14:09, 3.19it/s] {'loss': 2.8851, 'learning_rate': 4.13077204852507e-07, 'epoch': 10.44} + 65%|██████▌ | 242320/371472 [8:42:21<11:14:09, 3.19it/s] 65%|██████▌ | 242321/371472 [8:42:21<10:59:53, 3.26it/s] 65%|██████▌ | 242322/371472 [8:42:21<11:13:33, 3.20it/s] 65%|██████▌ | 242323/371472 [8:42:22<10:55:46, 3.28it/s] 65%|██████▌ | 242324/371472 [8:42:22<10:52:24, 3.30it/s] 65%|██████▌ | 242325/371472 [8:42:22<10:43:03, 3.35it/s] 65%|██████▌ | 242326/371472 [8:42:22<10:27:07, 3.43it/s] 65%|██████▌ | 242327/371472 [8:42:23<10:19:54, 3.47it/s] 65%|██████▌ | 242328/371472 [8:42:23<10:31:46, 3.41it/s] 65%|██████▌ | 242329/371472 [8:42:23<10:18:26, 3.48it/s] 65%|██████▌ | 242330/371472 [8:42:24<10:20:06, 3.47it/s] 65%|██████▌ | 242331/371472 [8:42:24<10:09:45, 3.53it/s] 65%|██████▌ | 242332/371472 [8:42:24<10:30:17, 3.41it/s] 65%|██████▌ | 242333/371472 [8:42:25<11:42:32, 3.06it/s] 65%|██████▌ | 242334/371472 [8:42:25<11:02:25, 3.25it/s] 65%|██████▌ | 242335/371472 [8:42:25<10:36:56, 3.38it/s] 65%|██████▌ | 242336/371472 [8:42:25<10:38:05, 3.37it/s] 65%|██████▌ | 242337/371472 [8:42:26<10:58:13, 3.27it/s] 65%|██████▌ | 242338/371472 [8:42:26<11:30:58, 3.11it/s] 65%|██████▌ | 242339/371472 [8:42:26<11:01:49, 3.25it/s] 65%|██████▌ | 242340/371472 [8:42:27<11:03:28, 3.24it/s] {'loss': 2.8996, 'learning_rate': 4.130287228770282e-07, 'epoch': 10.44} + 65%|██████▌ | 242340/371472 [8:42:27<11:03:28, 3.24it/s] 65%|██████▌ | 242341/371472 [8:42:27<10:50:05, 3.31it/s] 65%|██████▌ | 242342/371472 [8:42:27<10:39:37, 3.36it/s] 65%|██████▌ | 242343/371472 [8:42:28<10:42:47, 3.35it/s] 65%|██████▌ | 242344/371472 [8:42:28<10:48:33, 3.32it/s] 65%|██████▌ | 242345/371472 [8:42:28<11:20:10, 3.16it/s] 65%|██████▌ | 242346/371472 [8:42:29<11:48:50, 3.04it/s] 65%|██████▌ | 242347/371472 [8:42:29<12:09:29, 2.95it/s] 65%|██████▌ | 242348/371472 [8:42:29<11:37:10, 3.09it/s] 65%|██���███▌ | 242349/371472 [8:42:30<11:29:18, 3.12it/s] 65%|██████▌ | 242350/371472 [8:42:30<11:09:33, 3.21it/s] 65%|██████▌ | 242351/371472 [8:42:30<10:59:04, 3.27it/s] 65%|██████▌ | 242352/371472 [8:42:30<10:41:58, 3.35it/s] 65%|██████▌ | 242353/371472 [8:42:31<10:28:03, 3.43it/s] 65%|██████▌ | 242354/371472 [8:42:31<10:48:40, 3.32it/s] 65%|██████▌ | 242355/371472 [8:42:31<10:38:04, 3.37it/s] 65%|██████▌ | 242356/371472 [8:42:32<10:35:42, 3.39it/s] 65%|██████▌ | 242357/371472 [8:42:32<10:44:19, 3.34it/s] 65%|██████▌ | 242358/371472 [8:42:32<10:30:06, 3.42it/s] 65%|██████▌ | 242359/371472 [8:42:33<11:22:44, 3.15it/s] 65%|██████▌ | 242360/371472 [8:42:33<11:00:43, 3.26it/s] {'loss': 2.6106, 'learning_rate': 4.129802409015492e-07, 'epoch': 10.44} + 65%|██████▌ | 242360/371472 [8:42:33<11:00:43, 3.26it/s] 65%|██████▌ | 242361/371472 [8:42:33<10:32:32, 3.40it/s] 65%|██████▌ | 242362/371472 [8:42:33<10:24:49, 3.44it/s] 65%|██████▌ | 242363/371472 [8:42:34<10:36:53, 3.38it/s] 65%|██████▌ | 242364/371472 [8:42:34<10:37:48, 3.37it/s] 65%|██████▌ | 242365/371472 [8:42:34<10:41:00, 3.36it/s] 65%|██████▌ | 242366/371472 [8:42:35<10:49:28, 3.31it/s] 65%|██████▌ | 242367/371472 [8:42:35<10:23:23, 3.45it/s] 65%|██████▌ | 242368/371472 [8:42:35<10:13:58, 3.50it/s] 65%|██████▌ | 242369/371472 [8:42:36<11:01:58, 3.25it/s] 65%|██████▌ | 242370/371472 [8:42:36<11:25:05, 3.14it/s] 65%|██████▌ | 242371/371472 [8:42:36<11:02:48, 3.25it/s] 65%|██████▌ | 242372/371472 [8:42:36<10:58:32, 3.27it/s] 65%|██████▌ | 242373/371472 [8:42:37<10:57:50, 3.27it/s] 65%|██████▌ | 242374/371472 [8:42:37<11:01:32, 3.25it/s] 65%|██████▌ | 242375/371472 [8:42:37<11:03:52, 3.24it/s] 65%|██████▌ | 242376/371472 [8:42:38<10:51:13, 3.30it/s] 65%|██████▌ | 242377/371472 [8:42:38<10:34:46, 3.39it/s] 65%|██████▌ | 242378/371472 [8:42:38<10:33:10, 3.40it/s] 65%|██████▌ | 242379/371472 [8:42:39<10:23:19, 3.45it/s] 65%|██████▌ | 242380/371472 [8:42:39<10:49:58, 3.31it/s] {'loss': 2.7284, 'learning_rate': 4.129317589260704e-07, 'epoch': 10.44} + 65%|██████▌ | 242380/371472 [8:42:39<10:49:58, 3.31it/s] 65%|██████▌ | 242381/371472 [8:42:39<10:52:34, 3.30it/s] 65%|██████▌ | 242382/371472 [8:42:39<11:09:29, 3.21it/s] 65%|██████▌ | 242383/371472 [8:42:40<10:56:27, 3.28it/s] 65%|██████▌ | 242384/371472 [8:42:40<10:57:46, 3.27it/s] 65%|██████▌ | 242385/371472 [8:42:40<10:56:26, 3.28it/s] 65%|██████▌ | 242386/371472 [8:42:41<11:13:44, 3.19it/s] 65%|██████▌ | 242387/371472 [8:42:41<11:27:03, 3.13it/s] 65%|██████▌ | 242388/371472 [8:42:41<11:17:10, 3.18it/s] 65%|██████▌ | 242389/371472 [8:42:42<11:22:42, 3.15it/s] 65%|██████▌ | 242390/371472 [8:42:42<11:19:35, 3.17it/s] 65%|██████▌ | 242391/371472 [8:42:42<11:00:48, 3.26it/s] 65%|██████▌ | 242392/371472 [8:42:43<10:59:32, 3.26it/s] 65%|██████▌ | 242393/371472 [8:42:43<10:53:46, 3.29it/s] 65%|██████▌ | 242394/371472 [8:42:43<10:42:11, 3.35it/s] 65%|██████▌ | 242395/371472 [8:42:43<10:30:41, 3.41it/s] 65%|██████▌ | 242396/371472 [8:42:44<10:31:17, 3.41it/s] 65%|██████▌ | 242397/371472 [8:42:44<10:19:19, 3.47it/s] 65%|██████▌ | 242398/371472 [8:42:44<10:27:52, 3.43it/s] 65%|██████▌ | 242399/371472 [8:42:45<10:28:02, 3.43it/s] 65%|██████▌ | 242400/371472 [8:42:45<10:47:19, 3.32it/s] {'loss': 2.6931, 'learning_rate': 4.1288327695059146e-07, 'epoch': 10.44} + 65%|██████▌ | 242400/371472 [8:42:45<10:47:19, 3.32it/s] 65%|██████▌ | 242401/371472 [8:42:45<10:49:03, 3.31it/s] 65%|██████▌ | 242402/371472 [8:42:46<10:43:27, 3.34it/s] 65%|██████▌ | 242403/371472 [8:42:46<11:11:34, 3.20it/s] 65%|██████▌ | 242404/371472 [8:42:46<11:10:45, 3.21it/s] 65%|██████▌ | 242405/371472 [8:42:46<10:43:12, 3.34it/s] 65%|██████▌ | 242406/371472 [8:42:47<10:41:21, 3.35it/s] 65%|██████▌ | 242407/371472 [8:42:47<10:29:43, 3.42it/s] 65%|██████▌ | 242408/371472 [8:42:47<10:31:19, 3.41it/s] 65%|██████▌ | 242409/371472 [8:42:48<10:51:08, 3.30it/s] 65%|██████▌ | 242410/371472 [8:42:48<10:53:10, 3.29it/s] 65%|██████▌ | 242411/371472 [8:42:48<10:53:32, 3.29it/s] 65%|██████▌ | 242412/371472 [8:42:49<10:37:59, 3.37it/s] 65%|██████▌ | 242413/371472 [8:42:49<10:45:20, 3.33it/s] 65%|██████▌ | 242414/371472 [8:42:49<10:53:03, 3.29it/s] 65%|██████▌ | 242415/371472 [8:42:49<11:20:27, 3.16it/s] 65%|██████▌ | 242416/371472 [8:42:50<12:19:34, 2.91it/s] 65%|██████▌ | 242417/371472 [8:42:50<12:41:26, 2.82it/s] 65%|██████▌ | 242418/371472 [8:42:51<12:31:15, 2.86it/s] 65%|██████▌ | 242419/371472 [8:42:51<11:59:46, 2.99it/s] 65%|██████▌ | 242420/371472 [8:42:51<11:19:27, 3.17it/s] {'loss': 2.6258, 'learning_rate': 4.128347949751126e-07, 'epoch': 10.44} + 65%|██████▌ | 242420/371472 [8:42:51<11:19:27, 3.17it/s] 65%|██████▌ | 242421/371472 [8:42:51<10:59:34, 3.26it/s] 65%|██████▌ | 242422/371472 [8:42:52<11:27:20, 3.13it/s] 65%|██████▌ | 242423/371472 [8:42:52<11:18:49, 3.17it/s] 65%|██████▌ | 242424/371472 [8:42:52<11:09:48, 3.21it/s] 65%|██████▌ | 242425/371472 [8:42:53<10:54:15, 3.29it/s] 65%|██████▌ | 242426/371472 [8:42:53<10:49:03, 3.31it/s] 65%|██████▌ | 242427/371472 [8:42:53<10:34:22, 3.39it/s] 65%|██████▌ | 242428/371472 [8:42:54<10:42:40, 3.35it/s] 65%|██████▌ | 242429/371472 [8:42:54<10:45:17, 3.33it/s] 65%|██████▌ | 242430/371472 [8:42:54<10:51:58, 3.30it/s] 65%|██████▌ | 242431/371472 [8:42:54<10:35:55, 3.38it/s] 65%|██████▌ | 242432/371472 [8:42:55<10:36:59, 3.38it/s] 65%|██████▌ | 242433/371472 [8:42:55<10:59:47, 3.26it/s] 65%|██████▌ | 242434/371472 [8:42:55<10:59:48, 3.26it/s] 65%|██████▌ | 242435/371472 [8:42:56<10:43:52, 3.34it/s] 65%|██████▌ | 242436/371472 [8:42:56<11:01:41, 3.25it/s] 65%|██████▌ | 242437/371472 [8:42:56<11:00:07, 3.26it/s] 65%|██████▌ | 242438/371472 [8:42:57<10:48:27, 3.32it/s] 65%|██████▌ | 242439/371472 [8:42:57<10:54:24, 3.29it/s] 65%|██████▌ | 242440/371472 [8:42:57<10:57:49, 3.27it/s] {'loss': 2.5783, 'learning_rate': 4.1278631299963366e-07, 'epoch': 10.44} + 65%|██████▌ | 242440/371472 [8:42:57<10:57:49, 3.27it/s] 65%|██████▌ | 242441/371472 [8:42:58<10:52:50, 3.29it/s] 65%|██████▌ | 242442/371472 [8:42:58<10:35:12, 3.39it/s] 65%|██████▌ | 242443/371472 [8:42:58<10:32:28, 3.40it/s] 65%|██████▌ | 242444/371472 [8:42:58<10:40:15, 3.36it/s] 65%|██████▌ | 242445/371472 [8:42:59<10:31:25, 3.41it/s] 65%|██████▌ | 242446/371472 [8:42:59<10:25:06, 3.44it/s] 65%|██████▌ | 242447/371472 [8:42:59<10:52:50, 3.29it/s] 65%|██████▌ | 242448/371472 [8:43:00<11:17:24, 3.17it/s] 65%|██████▌ | 242449/371472 [8:43:00<11:04:47, 3.23it/s] 65%|██████▌ | 242450/371472 [8:43:00<10:41:05, 3.35it/s] 65%|██████▌ | 242451/371472 [8:43:01<10:36:01, 3.38it/s] 65%|██████▌ | 242452/371472 [8:43:01<10:19:37, 3.47it/s] 65%|██████▌ | 242453/371472 [8:43:01<10:16:58, 3.49it/s] 65%|██████▌ | 242454/371472 [8:43:01<11:07:30, 3.22it/s] 65%|██████▌ | 242455/371472 [8:43:02<11:26:55, 3.13it/s] 65%|██████▌ | 242456/371472 [8:43:02<11:17:09, 3.18it/s] 65%|██████▌ | 242457/371472 [8:43:02<11:20:22, 3.16it/s] 65%|██████▌ | 242458/371472 [8:43:03<10:53:29, 3.29it/s] 65%|██████▌ | 242459/371472 [8:43:03<10:48:05, 3.32it/s] 65%|██████▌ | 242460/371472 [8:43:03<11:19:19, 3.17it/s] {'loss': 2.7085, 'learning_rate': 4.1273783102415483e-07, 'epoch': 10.44} + 65%|██████▌ | 242460/371472 [8:43:03<11:19:19, 3.17it/s] 65%|██████▌ | 242461/371472 [8:43:04<11:27:18, 3.13it/s] 65%|██████▌ | 242462/371472 [8:43:04<11:08:37, 3.22it/s] 65%|██████▌ | 242463/371472 [8:43:04<10:56:45, 3.27it/s] 65%|██████▌ | 242464/371472 [8:43:05<11:19:24, 3.16it/s] 65%|██████▌ | 242465/371472 [8:43:05<10:48:55, 3.31it/s] 65%|██████▌ | 242466/371472 [8:43:05<10:39:17, 3.36it/s] 65%|██████▌ | 242467/371472 [8:43:05<10:37:44, 3.37it/s] 65%|██████▌ | 242468/371472 [8:43:06<10:31:39, 3.40it/s] 65%|██████▌ | 242469/371472 [8:43:06<10:21:17, 3.46it/s] 65%|██████▌ | 242470/371472 [8:43:06<10:36:39, 3.38it/s] 65%|██████▌ | 242471/371472 [8:43:07<10:24:54, 3.44it/s] 65%|██████▌ | 242472/371472 [8:43:07<10:13:19, 3.51it/s] 65%|██████▌ | 242473/371472 [8:43:07<10:15:22, 3.49it/s] 65%|██████▌ | 242474/371472 [8:43:07<10:14:17, 3.50it/s] 65%|██████▌ | 242475/371472 [8:43:08<10:07:14, 3.54it/s] 65%|██████▌ | 242476/371472 [8:43:08<10:06:35, 3.54it/s] 65%|██████▌ | 242477/371472 [8:43:08<10:42:29, 3.35it/s] 65%|██████▌ | 242478/371472 [8:43:09<10:42:05, 3.35it/s] 65%|██████▌ | 242479/371472 [8:43:09<12:26:39, 2.88it/s] 65%|██████▌ | 242480/371472 [8:43:09<12:13:48, 2.93it/s] {'loss': 2.7755, 'learning_rate': 4.1268934904867585e-07, 'epoch': 10.44} + 65%|██████▌ | 242480/371472 [8:43:09<12:13:48, 2.93it/s] 65%|██████▌ | 242481/371472 [8:43:10<11:45:11, 3.05it/s] 65%|██████▌ | 242482/371472 [8:43:10<11:30:37, 3.11it/s] 65%|██████▌ | 242483/371472 [8:43:10<11:30:26, 3.11it/s] 65%|██████▌ | 242484/371472 [8:43:11<11:06:42, 3.22it/s] 65%|██████▌ | 242485/371472 [8:43:11<10:41:20, 3.35it/s] 65%|██████▌ | 242486/371472 [8:43:11<10:35:48, 3.38it/s] 65%|██████▌ | 242487/371472 [8:43:11<10:38:14, 3.37it/s] 65%|██████▌ | 242488/371472 [8:43:12<10:29:31, 3.41it/s] 65%|██████▌ | 242489/371472 [8:43:12<10:15:33, 3.49it/s] 65%|██████▌ | 242490/371472 [8:43:12<11:02:15, 3.25it/s] 65%|██████▌ | 242491/371472 [8:43:13<10:54:08, 3.29it/s] 65%|██████▌ | 242492/371472 [8:43:13<11:01:58, 3.25it/s] 65%|██████▌ | 242493/371472 [8:43:13<10:53:53, 3.29it/s] 65%|██████▌ | 242494/371472 [8:43:14<10:52:32, 3.29it/s] 65%|██████▌ | 242495/371472 [8:43:14<11:05:29, 3.23it/s] 65%|██████▌ | 242496/371472 [8:43:14<10:57:59, 3.27it/s] 65%|██████▌ | 242497/371472 [8:43:14<10:34:37, 3.39it/s] 65%|██████▌ | 242498/371472 [8:43:15<10:19:25, 3.47it/s] 65%|██████▌ | 242499/371472 [8:43:15<10:18:35, 3.47it/s] 65%|██████▌ | 242500/371472 [8:43:15<10:22:21, 3.45it/s] {'loss': 2.6098, 'learning_rate': 4.1264086707319703e-07, 'epoch': 10.44} + 65%|██████▌ | 242500/371472 [8:43:15<10:22:21, 3.45it/s] 65%|██████▌ | 242501/371472 [8:43:16<10:14:29, 3.50it/s] 65%|██████▌ | 242502/371472 [8:43:16<10:00:29, 3.58it/s] 65%|██████▌ | 242503/371472 [8:43:16<10:11:51, 3.51it/s] 65%|██████▌ | 242504/371472 [8:43:17<10:35:25, 3.38it/s] 65%|██████▌ | 242505/371472 [8:43:17<10:37:01, 3.37it/s] 65%|██████▌ | 242506/371472 [8:43:17<10:33:00, 3.40it/s] 65%|██████▌ | 242507/371472 [8:43:17<10:48:52, 3.31it/s] 65%|██████▌ | 242508/371472 [8:43:18<10:34:23, 3.39it/s] 65%|██████▌ | 242509/371472 [8:43:18<10:39:01, 3.36it/s] 65%|██████▌ | 242510/371472 [8:43:18<10:52:53, 3.29it/s] 65%|██████▌ | 242511/371472 [8:43:19<10:53:49, 3.29it/s] 65%|██████▌ | 242512/371472 [8:43:19<10:50:46, 3.30it/s] 65%|██████▌ | 242513/371472 [8:43:19<11:15:56, 3.18it/s] 65%|██████▌ | 242514/371472 [8:43:20<11:01:25, 3.25it/s] 65%|██████▌ | 242515/371472 [8:43:20<10:44:14, 3.34it/s] 65%|██████▌ | 242516/371472 [8:43:20<10:22:04, 3.45it/s] 65%|██████▌ | 242517/371472 [8:43:20<10:25:59, 3.43it/s] 65%|██████▌ | 242518/371472 [8:43:21<10:47:42, 3.32it/s] 65%|██████▌ | 242519/371472 [8:43:21<10:50:50, 3.30it/s] 65%|██████▌ | 242520/371472 [8:43:21<11:26:11, 3.13it/s] {'loss': 2.5546, 'learning_rate': 4.125923850977181e-07, 'epoch': 10.45} + 65%|██████▌ | 242520/371472 [8:43:21<11:26:11, 3.13it/s] 65%|██████▌ | 242521/371472 [8:43:22<11:20:34, 3.16it/s] 65%|██████▌ | 242522/371472 [8:43:22<11:06:51, 3.22it/s] 65%|██████▌ | 242523/371472 [8:43:22<11:18:13, 3.17it/s] 65%|██████▌ | 242524/371472 [8:43:23<11:03:41, 3.24it/s] 65%|██████▌ | 242525/371472 [8:43:23<11:09:35, 3.21it/s] 65%|██████▌ | 242526/371472 [8:43:23<11:03:33, 3.24it/s] 65%|██████▌ | 242527/371472 [8:43:24<12:27:01, 2.88it/s] 65%|██████▌ | 242528/371472 [8:43:24<11:40:53, 3.07it/s] 65%|██████▌ | 242529/371472 [8:43:24<11:52:16, 3.02it/s] 65%|██████▌ | 242530/371472 [8:43:25<11:14:06, 3.19it/s] 65%|██████▌ | 242531/371472 [8:43:25<11:54:54, 3.01it/s] 65%|██████▌ | 242532/371472 [8:43:25<11:24:28, 3.14it/s] 65%|██████▌ | 242533/371472 [8:43:26<11:00:47, 3.25it/s] 65%|██████▌ | 242534/371472 [8:43:26<10:45:21, 3.33it/s] 65%|██████▌ | 242535/371472 [8:43:26<10:41:32, 3.35it/s] 65%|██████▌ | 242536/371472 [8:43:26<10:25:21, 3.44it/s] 65%|██████▌ | 242537/371472 [8:43:27<10:31:21, 3.40it/s] 65%|██████▌ | 242538/371472 [8:43:27<10:23:16, 3.45it/s] 65%|██████▌ | 242539/371472 [8:43:27<10:18:15, 3.48it/s] 65%|██████▌ | 242540/371472 [8:43:28<10:44:39, 3.33it/s] {'loss': 2.7509, 'learning_rate': 4.125439031222392e-07, 'epoch': 10.45} + 65%|██████▌ | 242540/371472 [8:43:28<10:44:39, 3.33it/s] 65%|██████▌ | 242541/371472 [8:43:28<11:08:23, 3.21it/s] 65%|██████▌ | 242542/371472 [8:43:28<11:44:56, 3.05it/s] 65%|██████▌ | 242543/371472 [8:43:29<11:09:35, 3.21it/s] 65%|██████▌ | 242544/371472 [8:43:29<10:53:02, 3.29it/s] 65%|██████▌ | 242545/371472 [8:43:29<10:41:47, 3.35it/s] 65%|██████▌ | 242546/371472 [8:43:29<10:21:30, 3.46it/s] 65%|██████▌ | 242547/371472 [8:43:30<10:52:27, 3.29it/s] 65%|██████▌ | 242548/371472 [8:43:30<11:36:16, 3.09it/s] 65%|██████▌ | 242549/371472 [8:43:30<11:06:51, 3.22it/s] 65%|██████▌ | 242550/371472 [8:43:31<10:47:36, 3.32it/s] 65%|██████▌ | 242551/371472 [8:43:31<10:37:51, 3.37it/s] 65%|██████▌ | 242552/371472 [8:43:31<10:28:36, 3.42it/s] 65%|██████▌ | 242553/371472 [8:43:31<10:20:22, 3.46it/s] 65%|██████▌ | 242554/371472 [8:43:32<10:41:16, 3.35it/s] 65%|██████▌ | 242555/371472 [8:43:32<10:20:48, 3.46it/s] 65%|██████▌ | 242556/371472 [8:43:32<10:05:11, 3.55it/s] 65%|██████▌ | 242557/371472 [8:43:33<10:09:13, 3.53it/s] 65%|██████▌ | 242558/371472 [8:43:33<10:09:58, 3.52it/s] 65%|██████▌ | 242559/371472 [8:43:33<10:21:41, 3.46it/s] 65%|██████▌ | 242560/371472 [8:43:34<11:28:21, 3.12it/s] {'loss': 2.7256, 'learning_rate': 4.124954211467603e-07, 'epoch': 10.45} + 65%|██████▌ | 242560/371472 [8:43:34<11:28:21, 3.12it/s] 65%|██████▌ | 242561/371472 [8:43:34<11:09:12, 3.21it/s] 65%|██████▌ | 242562/371472 [8:43:34<11:12:54, 3.19it/s] 65%|██████▌ | 242563/371472 [8:43:34<10:48:09, 3.31it/s] 65%|██████▌ | 242564/371472 [8:43:35<11:13:03, 3.19it/s] 65%|██████▌ | 242565/371472 [8:43:35<11:09:53, 3.21it/s] 65%|██████▌ | 242566/371472 [8:43:35<10:56:03, 3.27it/s] 65%|██████▌ | 242567/371472 [8:43:36<10:36:03, 3.38it/s] 65%|██████▌ | 242568/371472 [8:43:36<10:27:59, 3.42it/s] 65%|██████▌ | 242569/371472 [8:43:36<10:05:57, 3.55it/s] 65%|██████▌ | 242570/371472 [8:43:37<10:16:30, 3.48it/s] 65%|██████▌ | 242571/371472 [8:43:37<10:18:00, 3.48it/s] 65%|██████▌ | 242572/371472 [8:43:37<10:03:31, 3.56it/s] 65%|██████▌ | 242573/371472 [8:43:37<10:17:19, 3.48it/s] 65%|██████▌ | 242574/371472 [8:43:38<10:16:17, 3.49it/s] 65%|██████▌ | 242575/371472 [8:43:38<10:11:37, 3.51it/s] 65%|██████▌ | 242576/371472 [8:43:38<10:17:46, 3.48it/s] 65%|██████▌ | 242577/371472 [8:43:39<10:14:26, 3.50it/s] 65%|██████▌ | 242578/371472 [8:43:39<10:50:45, 3.30it/s] 65%|██████▌ | 242579/371472 [8:43:39<10:35:37, 3.38it/s] 65%|██████▌ | 242580/371472 [8:43:39<10:23:51, 3.44it/s] {'loss': 2.6558, 'learning_rate': 4.124469391712815e-07, 'epoch': 10.45} + 65%|██████▌ | 242580/371472 [8:43:39<10:23:51, 3.44it/s] 65%|██████▌ | 242581/371472 [8:43:40<10:27:10, 3.43it/s] 65%|██████▌ | 242582/371472 [8:43:40<10:13:53, 3.50it/s] 65%|██████▌ | 242583/371472 [8:43:40<10:12:16, 3.51it/s] 65%|██████▌ | 242584/371472 [8:43:41<11:07:18, 3.22it/s] 65%|██████▌ | 242585/371472 [8:43:41<11:28:16, 3.12it/s] 65%|██████▌ | 242586/371472 [8:43:41<11:23:53, 3.14it/s] 65%|██████▌ | 242587/371472 [8:43:42<11:41:56, 3.06it/s] 65%|██████▌ | 242588/371472 [8:43:42<11:49:23, 3.03it/s] 65%|██████▌ | 242589/371472 [8:43:42<11:33:51, 3.10it/s] 65%|██████▌ | 242590/371472 [8:43:43<11:06:09, 3.22it/s] 65%|██████▌ | 242591/371472 [8:43:43<10:42:43, 3.34it/s] 65%|██████▌ | 242592/371472 [8:43:43<10:35:21, 3.38it/s] 65%|██████▌ | 242593/371472 [8:43:43<11:09:36, 3.21it/s] 65%|██████▌ | 242594/371472 [8:43:44<10:47:45, 3.32it/s] 65%|██████▌ | 242595/371472 [8:43:44<11:11:47, 3.20it/s] 65%|██████▌ | 242596/371472 [8:43:44<10:53:06, 3.29it/s] 65%|██████▌ | 242597/371472 [8:43:45<10:43:14, 3.34it/s] 65%|██████▌ | 242598/371472 [8:43:45<10:18:47, 3.47it/s] 65%|██████▌ | 242599/371472 [8:43:45<10:31:35, 3.40it/s] 65%|██████▌ | 242600/371472 [8:43:46<10:20:22, 3.46it/s] {'loss': 2.6617, 'learning_rate': 4.123984571958025e-07, 'epoch': 10.45} + 65%|██████▌ | 242600/371472 [8:43:46<10:20:22, 3.46it/s] 65%|██████▌ | 242601/371472 [8:43:46<10:29:05, 3.41it/s] 65%|██████▌ | 242602/371472 [8:43:46<10:21:17, 3.46it/s] 65%|██████▌ | 242603/371472 [8:43:46<10:00:09, 3.58it/s] 65%|██████▌ | 242604/371472 [8:43:47<10:05:30, 3.55it/s] 65%|██████▌ | 242605/371472 [8:43:47<10:07:38, 3.53it/s] 65%|██████▌ | 242606/371472 [8:43:47<10:49:58, 3.30it/s] 65%|██████▌ | 242607/371472 [8:43:48<10:34:00, 3.39it/s] 65%|██████▌ | 242608/371472 [8:43:48<10:40:44, 3.35it/s] 65%|██████▌ | 242609/371472 [8:43:48<10:32:29, 3.40it/s] 65%|██████▌ | 242610/371472 [8:43:48<10:21:11, 3.46it/s] 65%|██████▌ | 242611/371472 [8:43:49<11:07:15, 3.22it/s] 65%|██████▌ | 242612/371472 [8:43:49<11:24:22, 3.14it/s] 65%|██████▌ | 242613/371472 [8:43:49<10:56:15, 3.27it/s] 65%|██████▌ | 242614/371472 [8:43:50<10:56:58, 3.27it/s] 65%|██████▌ | 242615/371472 [8:43:50<10:35:43, 3.38it/s] 65%|██████▌ | 242616/371472 [8:43:50<10:35:07, 3.38it/s] 65%|██████▌ | 242617/371472 [8:43:51<10:27:24, 3.42it/s] 65%|██████▌ | 242618/371472 [8:43:51<10:48:19, 3.31it/s] 65%|██████▌ | 242619/371472 [8:43:51<11:05:43, 3.23it/s] 65%|██████▌ | 242620/371472 [8:43:52<10:53:25, 3.29it/s] {'loss': 2.6414, 'learning_rate': 4.1234997522032356e-07, 'epoch': 10.45} + 65%|██████▌ | 242620/371472 [8:43:52<10:53:25, 3.29it/s] 65%|██████▌ | 242621/371472 [8:43:52<11:16:52, 3.17it/s] 65%|██████▌ | 242622/371472 [8:43:52<11:14:48, 3.18it/s] 65%|██████▌ | 242623/371472 [8:43:52<11:13:18, 3.19it/s] 65%|██████▌ | 242624/371472 [8:43:53<11:26:43, 3.13it/s] 65%|██████▌ | 242625/371472 [8:43:53<11:23:11, 3.14it/s] 65%|██████▌ | 242626/371472 [8:43:53<11:04:47, 3.23it/s] 65%|██████▌ | 242627/371472 [8:43:54<10:56:26, 3.27it/s] 65%|██████▌ | 242628/371472 [8:43:54<10:40:28, 3.35it/s] 65%|██████▌ | 242629/371472 [8:43:54<10:35:16, 3.38it/s] 65%|██████▌ | 242630/371472 [8:43:55<11:39:57, 3.07it/s] 65%|██████▌ | 242631/371472 [8:43:55<11:17:45, 3.17it/s] 65%|██████▌ | 242632/371472 [8:43:55<11:46:27, 3.04it/s] 65%|██████▌ | 242633/371472 [8:43:56<11:31:39, 3.10it/s] 65%|██████▌ | 242634/371472 [8:43:56<11:18:59, 3.16it/s] 65%|██████▌ | 242635/371472 [8:43:56<11:07:45, 3.22it/s] 65%|██████▌ | 242636/371472 [8:43:57<11:04:03, 3.23it/s] 65%|██████▌ | 242637/371472 [8:43:57<11:25:13, 3.13it/s] 65%|██████▌ | 242638/371472 [8:43:57<11:53:36, 3.01it/s] 65%|██████▌ | 242639/371472 [8:43:58<11:50:45, 3.02it/s] 65%|██████▌ | 242640/371472 [8:43:58<11:19:20, 3.16it/s] {'loss': 2.6696, 'learning_rate': 4.1230149324484474e-07, 'epoch': 10.45} + 65%|██████▌ | 242640/371472 [8:43:58<11:19:20, 3.16it/s] 65%|██████▌ | 242641/371472 [8:43:58<11:02:34, 3.24it/s] 65%|██████▌ | 242642/371472 [8:43:58<11:17:59, 3.17it/s] 65%|██████▌ | 242643/371472 [8:43:59<10:59:36, 3.26it/s] 65%|██████▌ | 242644/371472 [8:43:59<10:43:12, 3.34it/s] 65%|██████▌ | 242645/371472 [8:43:59<10:29:44, 3.41it/s] 65%|██████▌ | 242646/371472 [8:44:00<10:19:06, 3.47it/s] 65%|██████▌ | 242647/371472 [8:44:00<10:06:43, 3.54it/s] 65%|███���██▌ | 242648/371472 [8:44:00<10:10:47, 3.52it/s] 65%|██████▌ | 242649/371472 [8:44:00<10:25:04, 3.43it/s] 65%|██████▌ | 242650/371472 [8:44:01<10:07:08, 3.54it/s] 65%|██████▌ | 242651/371472 [8:44:01<10:43:48, 3.33it/s] 65%|██████▌ | 242652/371472 [8:44:01<10:39:31, 3.36it/s] 65%|██████▌ | 242653/371472 [8:44:02<10:36:26, 3.37it/s] 65%|██████▌ | 242654/371472 [8:44:02<10:35:52, 3.38it/s] 65%|██████▌ | 242655/371472 [8:44:02<10:29:53, 3.41it/s] 65%|██████▌ | 242656/371472 [8:44:03<10:27:18, 3.42it/s] 65%|██████▌ | 242657/371472 [8:44:03<10:25:07, 3.43it/s] 65%|██████▌ | 242658/371472 [8:44:03<10:09:18, 3.52it/s] 65%|██████▌ | 242659/371472 [8:44:03<10:22:01, 3.45it/s] 65%|██████▌ | 242660/371472 [8:44:04<10:07:23, 3.53it/s] {'loss': 2.6282, 'learning_rate': 4.122530112693658e-07, 'epoch': 10.45} + 65%|██████▌ | 242660/371472 [8:44:04<10:07:23, 3.53it/s] 65%|██████▌ | 242661/371472 [8:44:04<10:18:30, 3.47it/s] 65%|██████▌ | 242662/371472 [8:44:04<10:14:05, 3.50it/s] 65%|██████▌ | 242663/371472 [8:44:05<10:21:59, 3.45it/s] 65%|██████▌ | 242664/371472 [8:44:05<10:28:00, 3.42it/s] 65%|██████▌ | 242665/371472 [8:44:05<10:24:21, 3.44it/s] 65%|██████▌ | 242666/371472 [8:44:05<10:07:35, 3.53it/s] 65%|██████▌ | 242667/371472 [8:44:06<10:05:03, 3.55it/s] 65%|██████▌ | 242668/371472 [8:44:06<10:03:44, 3.56it/s] 65%|██████▌ | 242669/371472 [8:44:06<10:09:12, 3.52it/s] 65%|██████▌ | 242670/371472 [8:44:07<10:09:32, 3.52it/s] 65%|██████▌ | 242671/371472 [8:44:07<10:10:00, 3.52it/s] 65%|██████▌ | 242672/371472 [8:44:07<10:12:55, 3.50it/s] 65%|██████▌ | 242673/371472 [8:44:07<10:29:30, 3.41it/s] 65%|██████▌ | 242674/371472 [8:44:08<10:27:08, 3.42it/s] 65%|██████▌ | 242675/371472 [8:44:08<10:24:37, 3.44it/s] 65%|██████▌ | 242676/371472 [8:44:08<10:25:03, 3.43it/s] 65%|██████▌ | 242677/371472 [8:44:09<11:40:20, 3.07it/s] 65%|██████▌ | 242678/371472 [8:44:09<11:32:39, 3.10it/s] 65%|██████▌ | 242679/371472 [8:44:09<11:16:02, 3.18it/s] 65%|██████▌ | 242680/371472 [8:44:10<11:02:53, 3.24it/s] {'loss': 2.7701, 'learning_rate': 4.1220452929388694e-07, 'epoch': 10.45} + 65%|██████▌ | 242680/371472 [8:44:10<11:02:53, 3.24it/s] 65%|██████▌ | 242681/371472 [8:44:10<11:28:42, 3.12it/s] 65%|██████▌ | 242682/371472 [8:44:10<10:55:21, 3.28it/s] 65%|██████▌ | 242683/371472 [8:44:11<10:49:12, 3.31it/s] 65%|██████▌ | 242684/371472 [8:44:11<10:46:51, 3.32it/s] 65%|██████▌ | 242685/371472 [8:44:11<11:22:54, 3.14it/s] 65%|██████▌ | 242686/371472 [8:44:11<11:07:18, 3.22it/s] 65%|██████▌ | 242687/371472 [8:44:12<11:12:58, 3.19it/s] 65%|██████▌ | 242688/371472 [8:44:12<10:59:29, 3.25it/s] 65%|██████▌ | 242689/371472 [8:44:12<10:40:33, 3.35it/s] 65%|██████▌ | 242690/371472 [8:44:13<10:37:05, 3.37it/s] 65%|██████▌ | 242691/371472 [8:44:13<10:26:22, 3.43it/s] 65%|██████▌ | 242692/371472 [8:44:13<10:27:12, 3.42it/s] 65%|██████▌ | 242693/371472 [8:44:13<10:10:10, 3.52it/s] 65%|██████▌ | 242694/371472 [8:44:14<10:05:20, 3.55it/s] 65%|██████▌ | 242695/371472 [8:44:14<10:27:26, 3.42it/s] 65%|██████▌ | 242696/371472 [8:44:14<10:11:13, 3.51it/s] 65%|██████▌ | 242697/371472 [8:44:15<10:01:48, 3.57it/s] 65%|██████▌ | 242698/371472 [8:44:15<10:34:55, 3.38it/s] 65%|██████▌ | 242699/371472 [8:44:15<10:25:03, 3.43it/s] 65%|██████▌ | 242700/371472 [8:44:15<10:04:11, 3.55it/s] {'loss': 2.6444, 'learning_rate': 4.12156047318408e-07, 'epoch': 10.45} + 65%|██████▌ | 242700/371472 [8:44:15<10:04:11, 3.55it/s] 65%|██████▌ | 242701/371472 [8:44:16<10:05:09, 3.55it/s] 65%|██████▌ | 242702/371472 [8:44:16<10:04:43, 3.55it/s] 65%|██████▌ | 242703/371472 [8:44:16<9:59:18, 3.58it/s] 65%|██████▌ | 242704/371472 [8:44:17<10:16:42, 3.48it/s] 65%|██████▌ | 242705/371472 [8:44:17<10:00:00, 3.58it/s] 65%|██████▌ | 242706/371472 [8:44:17<10:00:43, 3.57it/s] 65%|██████▌ | 242707/371472 [8:44:17<10:21:56, 3.45it/s] 65%|██████▌ | 242708/371472 [8:44:18<10:13:55, 3.50it/s] 65%|██████▌ | 242709/371472 [8:44:18<10:17:31, 3.48it/s] 65%|██████▌ | 242710/371472 [8:44:18<10:09:52, 3.52it/s] 65%|██████▌ | 242711/371472 [8:44:19<10:06:50, 3.54it/s] 65%|██████▌ | 242712/371472 [8:44:19<11:15:59, 3.17it/s] 65%|██████▌ | 242713/371472 [8:44:19<11:23:15, 3.14it/s] 65%|██████▌ | 242714/371472 [8:44:20<11:12:35, 3.19it/s] 65%|██████▌ | 242715/371472 [8:44:20<11:22:46, 3.14it/s] 65%|██████▌ | 242716/371472 [8:44:20<11:19:43, 3.16it/s] 65%|██████▌ | 242717/371472 [8:44:21<11:05:07, 3.23it/s] 65%|██████▌ | 242718/371472 [8:44:21<10:48:33, 3.31it/s] 65%|██████▌ | 242719/371472 [8:44:21<10:30:36, 3.40it/s] 65%|██████▌ | 242720/371472 [8:44:21<10:44:44, 3.33it/s] {'loss': 2.717, 'learning_rate': 4.121075653429292e-07, 'epoch': 10.45} + 65%|██████▌ | 242720/371472 [8:44:21<10:44:44, 3.33it/s] 65%|██████▌ | 242721/371472 [8:44:22<10:31:20, 3.40it/s] 65%|██████▌ | 242722/371472 [8:44:22<10:31:11, 3.40it/s] 65%|██████▌ | 242723/371472 [8:44:22<10:38:08, 3.36it/s] 65%|██████▌ | 242724/371472 [8:44:23<11:00:03, 3.25it/s] 65%|██████▌ | 242725/371472 [8:44:23<10:41:38, 3.34it/s] 65%|██████▌ | 242726/371472 [8:44:23<10:29:43, 3.41it/s] 65%|██████▌ | 242727/371472 [8:44:23<10:15:02, 3.49it/s] 65%|██████▌ | 242728/371472 [8:44:24<10:01:41, 3.57it/s] 65%|██████▌ | 242729/371472 [8:44:24<9:56:33, 3.60it/s] 65%|██████▌ | 242730/371472 [8:44:24<10:56:23, 3.27it/s] 65%|██████▌ | 242731/371472 [8:44:25<10:49:41, 3.30it/s] 65%|██████▌ | 242732/371472 [8:44:25<10:44:16, 3.33it/s] 65%|██████▌ | 242733/371472 [8:44:25<10:33:39, 3.39it/s] 65%|██████▌ | 242734/371472 [8:44:26<11:58:49, 2.98it/s] 65%|██████▌ | 242735/371472 [8:44:26<11:27:08, 3.12it/s] 65%|██████▌ | 242736/371472 [8:44:26<11:32:26, 3.10it/s] 65%|██████▌ | 242737/371472 [8:44:27<10:53:45, 3.28it/s] 65%|██████▌ | 242738/371472 [8:44:27<11:02:10, 3.24it/s] 65%|██████▌ | 242739/371472 [8:44:27<11:25:02, 3.13it/s] 65%|██████▌ | 242740/371472 [8:44:28<11:29:14, 3.11it/s] {'loss': 2.8215, 'learning_rate': 4.120590833674502e-07, 'epoch': 10.46} + 65%|██████▌ | 242740/371472 [8:44:28<11:29:14, 3.11it/s] 65%|██████▌ | 242741/371472 [8:44:28<11:17:09, 3.17it/s] 65%|██████▌ | 242742/371472 [8:44:28<10:59:22, 3.25it/s] 65%|██████▌ | 242743/371472 [8:44:28<10:59:51, 3.25it/s] 65%|██████▌ | 242744/371472 [8:44:29<10:51:57, 3.29it/s] 65%|██████▌ | 242745/371472 [8:44:29<10:59:22, 3.25it/s] 65%|██████▌ | 242746/371472 [8:44:29<10:47:26, 3.31it/s] 65%|██████▌ | 242747/371472 [8:44:30<10:40:23, 3.35it/s] 65%|██████▌ | 242748/371472 [8:44:30<10:58:55, 3.26it/s] 65%|██████▌ | 242749/371472 [8:44:30<10:46:55, 3.32it/s] 65%|██████▌ | 242750/371472 [8:44:31<10:30:08, 3.40it/s] 65%|██████▌ | 242751/371472 [8:44:31<11:11:35, 3.19it/s] 65%|██████▌ | 242752/371472 [8:44:31<11:06:17, 3.22it/s] 65%|██████▌ | 242753/371472 [8:44:31<10:59:18, 3.25it/s] 65%|██████▌ | 242754/371472 [8:44:32<10:47:40, 3.31it/s] 65%|██████▌ | 242755/371472 [8:44:32<10:28:10, 3.42it/s] 65%|██████▌ | 242756/371472 [8:44:32<10:25:54, 3.43it/s] 65%|██████▌ | 242757/371472 [8:44:33<10:21:09, 3.45it/s] 65%|██████▌ | 242758/371472 [8:44:33<10:21:33, 3.45it/s] 65%|██████▌ | 242759/371472 [8:44:33<10:18:32, 3.47it/s] 65%|██████▌ | 242760/371472 [8:44:34<10:43:33, 3.33it/s] {'loss': 2.8347, 'learning_rate': 4.120106013919714e-07, 'epoch': 10.46} + 65%|██████▌ | 242760/371472 [8:44:34<10:43:33, 3.33it/s] 65%|██████▌ | 242761/371472 [8:44:34<10:37:52, 3.36it/s] 65%|██████▌ | 242762/371472 [8:44:34<10:19:29, 3.46it/s] 65%|██████▌ | 242763/371472 [8:44:34<10:26:37, 3.42it/s] 65%|██████▌ | 242764/371472 [8:44:35<10:52:07, 3.29it/s] 65%|██████▌ | 242765/371472 [8:44:35<11:05:01, 3.23it/s] 65%|██████▌ | 242766/371472 [8:44:35<10:47:24, 3.31it/s] 65%|██████▌ | 242767/371472 [8:44:36<10:39:15, 3.36it/s] 65%|██████▌ | 242768/371472 [8:44:36<10:26:30, 3.42it/s] 65%|██████▌ | 242769/371472 [8:44:36<10:35:59, 3.37it/s] 65%|██████▌ | 242770/371472 [8:44:36<10:20:53, 3.45it/s] 65%|██████▌ | 242771/371472 [8:44:37<10:50:40, 3.30it/s] 65%|██████▌ | 242772/371472 [8:44:37<10:31:36, 3.40it/s] 65%|██████▌ | 242773/371472 [8:44:37<10:41:49, 3.34it/s] 65%|██████▌ | 242774/371472 [8:44:38<10:35:41, 3.37it/s] 65%|██████▌ | 242775/371472 [8:44:38<10:43:06, 3.34it/s] 65%|██████▌ | 242776/371472 [8:44:38<10:42:12, 3.34it/s] 65%|██████▌ | 242777/371472 [8:44:39<10:26:15, 3.42it/s] 65%|██████▌ | 242778/371472 [8:44:39<10:11:57, 3.51it/s] 65%|██████▌ | 242779/371472 [8:44:39<10:33:42, 3.38it/s] 65%|██████▌ | 242780/371472 [8:44:40<11:08:00, 3.21it/s] {'loss': 2.8176, 'learning_rate': 4.1196211941649245e-07, 'epoch': 10.46} + 65%|██████▌ | 242780/371472 [8:44:40<11:08:00, 3.21it/s] 65%|██████▌ | 242781/371472 [8:44:40<10:44:53, 3.33it/s] 65%|██████▌ | 242782/371472 [8:44:40<10:45:29, 3.32it/s] 65%|██████▌ | 242783/371472 [8:44:40<11:13:07, 3.19it/s] 65%|██████▌ | 242784/371472 [8:44:41<10:47:53, 3.31it/s] 65%|██████▌ | 242785/371472 [8:44:41<10:43:04, 3.34it/s] 65%|██████▌ | 242786/371472 [8:44:41<10:24:15, 3.44it/s] 65%|██████▌ | 242787/371472 [8:44:42<10:25:09, 3.43it/s] 65%|██████▌ | 242788/371472 [8:44:42<10:20:55, 3.45it/s] 65%|██████▌ | 242789/371472 [8:44:42<10:14:41, 3.49it/s] 65%|██████▌ | 242790/371472 [8:44:42<10:11:49, 3.51it/s] 65%|██████▌ | 242791/371472 [8:44:43<10:25:54, 3.43it/s] 65%|██████▌ | 242792/371472 [8:44:43<11:16:37, 3.17it/s] 65%|██████▌ | 242793/371472 [8:44:43<10:45:54, 3.32it/s] 65%|██████▌ | 242794/371472 [8:44:44<10:31:13, 3.40it/s] 65%|██████▌ | 242795/371472 [8:44:44<10:13:05, 3.50it/s] 65%|██████▌ | 242796/371472 [8:44:44<9:58:56, 3.58it/s] 65%|██████▌ | 242797/371472 [8:44:44<10:10:51, 3.51it/s] 65%|██████▌ | 242798/371472 [8:44:45<9:59:21, 3.58it/s] 65%|██████▌ | 242799/371472 [8:44:45<9:52:04, 3.62it/s] 65%|██████▌ | 242800/371472 [8:44:45<9:54:45, 3.61it/s] {'loss': 2.8437, 'learning_rate': 4.119136374410136e-07, 'epoch': 10.46} + 65%|██████▌ | 242800/371472 [8:44:45<9:54:45, 3.61it/s] 65%|██████▌ | 242801/371472 [8:44:46<9:49:12, 3.64it/s] 65%|██████▌ | 242802/371472 [8:44:46<10:11:12, 3.51it/s] 65%|██████▌ | 242803/371472 [8:44:46<10:00:17, 3.57it/s] 65%|██████▌ | 242804/371472 [8:44:46<10:01:01, 3.57it/s] 65%|██████▌ | 242805/371472 [8:44:47<10:57:48, 3.26it/s] 65%|██████▌ | 242806/371472 [8:44:47<11:05:05, 3.22it/s] 65%|██████▌ | 242807/371472 [8:44:47<11:40:55, 3.06it/s] 65%|██████▌ | 242808/371472 [8:44:48<11:01:42, 3.24it/s] 65%|██████▌ | 242809/371472 [8:44:48<10:35:57, 3.37it/s] 65%|██████▌ | 242810/371472 [8:44:48<10:12:59, 3.50it/s] 65%|██████▌ | 242811/371472 [8:44:49<10:12:26, 3.50it/s] 65%|██████▌ | 242812/371472 [8:44:49<10:21:04, 3.45it/s] 65%|██████▌ | 242813/371472 [8:44:49<10:05:20, 3.54it/s] 65%|██████▌ | 242814/371472 [8:44:49<9:51:44, 3.62it/s] 65%|██████▌ | 242815/371472 [8:44:50<9:55:32, 3.60it/s] 65%|██████▌ | 242816/371472 [8:44:50<11:11:55, 3.19it/s] 65%|██████▌ | 242817/371472 [8:44:50<10:58:39, 3.26it/s] 65%|██████▌ | 242818/371472 [8:44:51<11:02:50, 3.23it/s] 65%|██████▌ | 242819/371472 [8:44:51<10:46:49, 3.31it/s] 65%|██████▌ | 242820/371472 [8:44:51<10:25:17, 3.43it/s] {'loss': 2.7224, 'learning_rate': 4.1186515546553465e-07, 'epoch': 10.46} + 65%|██████▌ | 242820/371472 [8:44:51<10:25:17, 3.43it/s] 65%|██████▌ | 242821/371472 [8:44:52<10:41:12, 3.34it/s] 65%|██████▌ | 242822/371472 [8:44:52<10:31:54, 3.39it/s] 65%|██████▌ | 242823/371472 [8:44:52<10:30:23, 3.40it/s] 65%|██████▌ | 242824/371472 [8:44:52<10:16:38, 3.48it/s] 65%|██████▌ | 242825/371472 [8:44:53<10:20:17, 3.46it/s] 65%|██████▌ | 242826/371472 [8:44:53<10:23:46, 3.44it/s] 65%|██████▌ | 242827/371472 [8:44:53<10:51:54, 3.29it/s] 65%|██████▌ | 242828/371472 [8:44:54<10:45:50, 3.32it/s] 65%|██████▌ | 242829/371472 [8:44:54<10:27:08, 3.42it/s] 65%|██████▌ | 242830/371472 [8:44:54<10:25:37, 3.43it/s] 65%|██████▌ | 242831/371472 [8:44:55<11:48:08, 3.03it/s] 65%|██████▌ | 242832/371472 [8:44:55<11:24:07, 3.13it/s] 65%|██████▌ | 242833/371472 [8:44:55<10:57:49, 3.26it/s] 65%|██████▌ | 242834/371472 [8:44:55<10:45:12, 3.32it/s] 65%|██████▌ | 242835/371472 [8:44:56<11:06:08, 3.22it/s] 65%|██████▌ | 242836/371472 [8:44:56<11:07:46, 3.21it/s] 65%|██████▌ | 242837/371472 [8:44:56<10:59:40, 3.25it/s] 65%|██████▌ | 242838/371472 [8:44:57<10:50:10, 3.30it/s] 65%|██████▌ | 242839/371472 [8:44:57<10:43:46, 3.33it/s] 65%|██████▌ | 242840/371472 [8:44:57<11:21:12, 3.15it/s] {'loss': 2.513, 'learning_rate': 4.1181667349005583e-07, 'epoch': 10.46} + 65%|██████▌ | 242840/371472 [8:44:57<11:21:12, 3.15it/s] 65%|██████▌ | 242841/371472 [8:44:58<11:26:08, 3.12it/s] 65%|██████▌ | 242842/371472 [8:44:58<10:46:13, 3.32it/s] 65%|██████▌ | 242843/371472 [8:44:58<11:06:29, 3.22it/s] 65%|██████▌ | 242844/371472 [8:44:59<11:03:05, 3.23it/s] 65%|██████▌ | 242845/371472 [8:44:59<10:46:15, 3.32it/s] 65%|██████▌ | 242846/371472 [8:44:59<10:49:50, 3.30it/s] 65%|██████▌ | 242847/371472 [8:44:59<11:02:49, 3.23it/s] 65%|██████▌ | 242848/371472 [8:45:00<11:12:25, 3.19it/s] 65%|██████▌ | 242849/371472 [8:45:00<11:08:55, 3.20it/s] 65%|██████▌ | 242850/371472 [8:45:00<11:04:34, 3.23it/s] 65%|██████▌ | 242851/371472 [8:45:01<11:17:44, 3.16it/s] 65%|██████▌ | 242852/371472 [8:45:01<11:08:00, 3.21it/s] 65%|██████▌ | 242853/371472 [8:45:01<11:14:34, 3.18it/s] 65%|██████▌ | 242854/371472 [8:45:02<10:59:01, 3.25it/s] 65%|██████▌ | 242855/371472 [8:45:02<11:09:25, 3.20it/s] 65%|██████▌ | 242856/371472 [8:45:02<10:51:50, 3.29it/s] 65%|██████▌ | 242857/371472 [8:45:03<11:42:47, 3.05it/s] 65%|██████▌ | 242858/371472 [8:45:03<11:49:25, 3.02it/s] 65%|██████▌ | 242859/371472 [8:45:03<11:12:46, 3.19it/s] 65%|██████▌ | 242860/371472 [8:45:04<11:11:11, 3.19it/s] {'loss': 2.6324, 'learning_rate': 4.1176819151457685e-07, 'epoch': 10.46} + 65%|██████▌ | 242860/371472 [8:45:04<11:11:11, 3.19it/s] 65%|██████▌ | 242861/371472 [8:45:04<11:30:55, 3.10it/s] 65%|██████▌ | 242862/371472 [8:45:04<11:33:26, 3.09it/s] 65%|██████▌ | 242863/371472 [8:45:05<11:45:26, 3.04it/s] 65%|██████▌ | 242864/371472 [8:45:05<11:24:59, 3.13it/s] 65%|██████▌ | 242865/371472 [8:45:05<11:57:25, 2.99it/s] 65%|██████▌ | 242866/371472 [8:45:06<11:33:14, 3.09it/s] 65%|██████▌ | 242867/371472 [8:45:06<11:17:37, 3.16it/s] 65%|██████▌ | 242868/371472 [8:45:06<11:18:34, 3.16it/s] 65%|██████▌ | 242869/371472 [8:45:06<10:57:49, 3.26it/s] 65%|██████▌ | 242870/371472 [8:45:07<10:38:52, 3.35it/s] 65%|██████▌ | 242871/371472 [8:45:07<10:19:36, 3.46it/s] 65%|██████▌ | 242872/371472 [8:45:07<11:18:38, 3.16it/s] 65%|██████▌ | 242873/371472 [8:45:08<11:18:48, 3.16it/s] 65%|██████▌ | 242874/371472 [8:45:08<10:52:27, 3.28it/s] 65%|██████▌ | 242875/371472 [8:45:08<10:36:46, 3.37it/s] 65%|██████▌ | 242876/371472 [8:45:09<11:28:19, 3.11it/s] 65%|██████▌ | 242877/371472 [8:45:09<12:58:49, 2.75it/s] 65%|██████▌ | 242878/371472 [8:45:09<12:46:27, 2.80it/s] 65%|██████▌ | 242879/371472 [8:45:10<11:52:31, 3.01it/s] 65%|██████▌ | 242880/371472 [8:45:10<11:09:03, 3.20it/s] {'loss': 2.7459, 'learning_rate': 4.11719709539098e-07, 'epoch': 10.46} + 65%|██████▌ | 242880/371472 [8:45:10<11:09:03, 3.20it/s] 65%|██████▌ | 242881/371472 [8:45:10<10:48:24, 3.31it/s] 65%|██████▌ | 242882/371472 [8:45:11<10:35:47, 3.37it/s] 65%|██████▌ | 242883/371472 [8:45:11<10:28:46, 3.41it/s] 65%|██████▌ | 242884/371472 [8:45:11<10:16:34, 3.48it/s] 65%|██████▌ | 242885/371472 [8:45:11<10:30:07, 3.40it/s] 65%|██████▌ | 242886/371472 [8:45:12<10:24:09, 3.43it/s] 65%|██████▌ | 242887/371472 [8:45:12<10:47:55, 3.31it/s] 65%|██████▌ | 242888/371472 [8:45:12<10:46:39, 3.31it/s] 65%|██████▌ | 242889/371472 [8:45:13<10:30:06, 3.40it/s] 65%|██████▌ | 242890/371472 [8:45:13<10:48:55, 3.30it/s] 65%|██████▌ | 242891/371472 [8:45:13<10:44:43, 3.32it/s] 65%|██████▌ | 242892/371472 [8:45:14<11:21:17, 3.15it/s] 65%|██████▌ | 242893/371472 [8:45:14<11:29:09, 3.11it/s] 65%|██████▌ | 242894/371472 [8:45:14<11:32:39, 3.09it/s] 65%|██████▌ | 242895/371472 [8:45:14<11:20:25, 3.15it/s] 65%|██████▌ | 242896/371472 [8:45:15<11:42:05, 3.05it/s] 65%|██████▌ | 242897/371472 [8:45:15<11:23:07, 3.14it/s] 65%|██████▌ | 242898/371472 [8:45:15<11:15:15, 3.17it/s] 65%|██████▌ | 242899/371472 [8:45:16<10:55:04, 3.27it/s] 65%|██████▌ | 242900/371472 [8:45:16<10:59:42, 3.25it/s] {'loss': 2.7043, 'learning_rate': 4.116712275636191e-07, 'epoch': 10.46} + 65%|██████▌ | 242900/371472 [8:45:16<10:59:42, 3.25it/s] 65%|██████▌ | 242901/371472 [8:45:16<10:37:47, 3.36it/s] 65%|██████▌ | 242902/371472 [8:45:17<10:25:42, 3.42it/s] 65%|██████▌ | 242903/371472 [8:45:17<10:07:28, 3.53it/s] 65%|██████▌ | 242904/371472 [8:45:17<9:57:11, 3.59it/s] 65%|██████▌ | 242905/371472 [8:45:17<10:18:28, 3.46it/s] 65%|██████▌ | 242906/371472 [8:45:18<10:00:49, 3.57it/s] 65%|██████▌ | 242907/371472 [8:45:18<10:18:49, 3.46it/s] 65%|██████▌ | 242908/371472 [8:45:18<10:57:15, 3.26it/s] 65%|██████▌ | 242909/371472 [8:45:19<10:30:23, 3.40it/s] 65%|██████▌ | 242910/371472 [8:45:19<10:49:38, 3.30it/s] 65%|██████▌ | 242911/371472 [8:45:19<10:42:46, 3.33it/s] 65%|██████▌ | 242912/371472 [8:45:20<11:09:25, 3.20it/s] 65%|██████▌ | 242913/371472 [8:45:20<11:04:33, 3.22it/s] 65%|██████▌ | 242914/371472 [8:45:20<10:51:11, 3.29it/s] 65%|██████▌ | 242915/371472 [8:45:20<10:28:24, 3.41it/s] 65%|██████▌ | 242916/371472 [8:45:21<11:02:22, 3.23it/s] 65%|██████▌ | 242917/371472 [8:45:21<10:50:19, 3.29it/s] 65%|██████▌ | 242918/371472 [8:45:21<10:30:57, 3.40it/s] 65%|██████▌ | 242919/371472 [8:45:22<10:18:39, 3.46it/s] 65%|██████▌ | 242920/371472 [8:45:22<10:01:22, 3.56it/s] {'loss': 2.7917, 'learning_rate': 4.116227455881402e-07, 'epoch': 10.46} + 65%|██████▌ | 242920/371472 [8:45:22<10:01:22, 3.56it/s] 65%|██████▌ | 242921/371472 [8:45:22<10:59:01, 3.25it/s] 65%|██████▌ | 242922/371472 [8:45:23<10:58:25, 3.25it/s] 65%|██████▌ | 242923/371472 [8:45:23<10:41:41, 3.34it/s] 65%|██████▌ | 242924/371472 [8:45:23<10:33:20, 3.38it/s] 65%|██████▌ | 242925/371472 [8:45:23<10:14:12, 3.49it/s] 65%|██████▌ | 242926/371472 [8:45:24<10:28:51, 3.41it/s] 65%|██████▌ | 242927/371472 [8:45:24<10:15:24, 3.48it/s] 65%|██████▌ | 242928/371472 [8:45:24<10:23:32, 3.44it/s] 65%|██████▌ | 242929/371472 [8:45:25<10:21:59, 3.44it/s] 65%|██████▌ | 242930/371472 [8:45:25<10:30:04, 3.40it/s] 65%|██████▌ | 242931/371472 [8:45:25<10:29:11, 3.40it/s] 65%|██████▌ | 242932/371472 [8:45:25<10:17:13, 3.47it/s] 65%|██████▌ | 242933/371472 [8:45:26<9:57:02, 3.59it/s] 65%|██████▌ | 242934/371472 [8:45:26<9:52:41, 3.61it/s] 65%|██████▌ | 242935/371472 [8:45:26<10:19:45, 3.46it/s] 65%|██████▌ | 242936/371472 [8:45:27<10:21:26, 3.45it/s] 65%|██████▌ | 242937/371472 [8:45:27<10:32:30, 3.39it/s] 65%|██████▌ | 242938/371472 [8:45:27<10:40:33, 3.34it/s] 65%|██████▌ | 242939/371472 [8:45:28<10:55:39, 3.27it/s] 65%|██████▌ | 242940/371472 [8:45:28<10:44:31, 3.32it/s] {'loss': 2.611, 'learning_rate': 4.115742636126613e-07, 'epoch': 10.46} + 65%|██████▌ | 242940/371472 [8:45:28<10:44:31, 3.32it/s] 65%|██████▌ | 242941/371472 [8:45:28<10:35:35, 3.37it/s] 65%|██████▌ | 242942/371472 [8:45:28<11:24:59, 3.13it/s] 65%|██████▌ | 242943/371472 [8:45:29<10:48:28, 3.30it/s] 65%|██████▌ | 242944/371472 [8:45:29<10:25:08, 3.43it/s] 65%|██████▌ | 242945/371472 [8:45:29<10:21:44, 3.45it/s] 65%|██████▌ | 242946/371472 [8:45:30<10:14:50, 3.48it/s] 65%|██████▌ | 242947/371472 [8:45:30<10:13:11, 3.49it/s] 65%|██████▌ | 242948/371472 [8:45:30<10:37:41, 3.36it/s] 65%|██████▌ | 242949/371472 [8:45:30<10:33:59, 3.38it/s] 65%|██████▌ | 242950/371472 [8:45:31<10:34:42, 3.37it/s] 65%|██████▌ | 242951/371472 [8:45:31<10:19:43, 3.46it/s] 65%|██████▌ | 242952/371472 [8:45:31<10:22:18, 3.44it/s] 65%|██████▌ | 242953/371472 [8:45:32<10:51:03, 3.29it/s] 65%|██████▌ | 242954/371472 [8:45:32<10:56:26, 3.26it/s] 65%|██████▌ | 242955/371472 [8:45:32<11:07:25, 3.21it/s] 65%|██████▌ | 242956/371472 [8:45:33<10:52:40, 3.28it/s] 65%|██████▌ | 242957/371472 [8:45:33<12:08:23, 2.94it/s] 65%|██████▌ | 242958/371472 [8:45:33<12:24:16, 2.88it/s] 65%|██████▌ | 242959/371472 [8:45:34<12:37:03, 2.83it/s] 65%|██████▌ | 242960/371472 [8:45:34<11:28:15, 3.11it/s] {'loss': 2.8265, 'learning_rate': 4.1152578163718247e-07, 'epoch': 10.46} + 65%|██████▌ | 242960/371472 [8:45:34<11:28:15, 3.11it/s] 65%|██████▌ | 242961/371472 [8:45:34<11:34:30, 3.08it/s] 65%|██████▌ | 242962/371472 [8:45:35<11:10:26, 3.19it/s] 65%|██████▌ | 242963/371472 [8:45:35<10:48:36, 3.30it/s] 65%|██████▌ | 242964/371472 [8:45:35<11:11:52, 3.19it/s] 65%|██████▌ | 242965/371472 [8:45:35<10:36:26, 3.37it/s] 65%|██████▌ | 242966/371472 [8:45:36<10:24:49, 3.43it/s] 65%|██████▌ | 242967/371472 [8:45:36<10:31:30, 3.39it/s] 65%|██████▌ | 242968/371472 [8:45:36<10:33:32, 3.38it/s] 65%|██████▌ | 242969/371472 [8:45:37<10:18:07, 3.46it/s] 65%|██████▌ | 242970/371472 [8:45:37<10:01:04, 3.56it/s] 65%|██████▌ | 242971/371472 [8:45:37<10:11:46, 3.50it/s] 65%|██████▌ | 242972/371472 [8:45:38<10:32:57, 3.38it/s] 65%|██████▌ | 242973/371472 [8:45:38<10:22:21, 3.44it/s] 65%|██████▌ | 242974/371472 [8:45:38<10:38:24, 3.35it/s] 65%|██████▌ | 242975/371472 [8:45:38<11:22:46, 3.14it/s] 65%|██████▌ | 242976/371472 [8:45:39<10:52:34, 3.28it/s] 65%|██████▌ | 242977/371472 [8:45:39<11:48:50, 3.02it/s] 65%|██████▌ | 242978/371472 [8:45:39<11:13:54, 3.18it/s] 65%|██████▌ | 242979/371472 [8:45:40<10:48:45, 3.30it/s] 65%|██████▌ | 242980/371472 [8:45:40<11:02:36, 3.23it/s] {'loss': 2.6467, 'learning_rate': 4.1147729966170354e-07, 'epoch': 10.47} + 65%|██████▌ | 242980/371472 [8:45:40<11:02:36, 3.23it/s] 65%|██████▌ | 242981/371472 [8:45:40<10:52:15, 3.28it/s] 65%|██████▌ | 242982/371472 [8:45:41<10:28:37, 3.41it/s] 65%|██████▌ | 242983/371472 [8:45:41<10:23:41, 3.43it/s] 65%|██████▌ | 242984/371472 [8:45:41<10:17:46, 3.47it/s] 65%|██████▌ | 242985/371472 [8:45:41<10:49:51, 3.30it/s] 65%|██████▌ | 242986/371472 [8:45:42<10:50:46, 3.29it/s] 65%|██████▌ | 242987/371472 [8:45:42<10:40:21, 3.34it/s] 65%|██████▌ | 242988/371472 [8:45:42<10:27:37, 3.41it/s] 65%|██████▌ | 242989/371472 [8:45:43<10:13:17, 3.49it/s] 65%|██████▌ | 242990/371472 [8:45:43<10:32:37, 3.38it/s] 65%|██████▌ | 242991/371472 [8:45:43<10:19:09, 3.46it/s] 65%|██████▌ | 242992/371472 [8:45:44<10:19:11, 3.46it/s] 65%|██████▌ | 242993/371472 [8:45:44<10:28:56, 3.40it/s] 65%|██████▌ | 242994/371472 [8:45:44<10:19:59, 3.45it/s] 65%|██████▌ | 242995/371472 [8:45:44<10:14:18, 3.49it/s] 65%|██████▌ | 242996/371472 [8:45:45<10:36:12, 3.37it/s] 65%|██████▌ | 242997/371472 [8:45:45<10:47:51, 3.31it/s] 65%|██████▌ | 242998/371472 [8:45:45<10:36:08, 3.37it/s] 65%|██████▌ | 242999/371472 [8:45:46<10:13:49, 3.49it/s] 65%|██████▌ | 243000/371472 [8:45:46<11:35:03, 3.08it/s] {'loss': 2.5529, 'learning_rate': 4.1142881768622466e-07, 'epoch': 10.47} + 65%|██████▌ | 243000/371472 [8:45:46<11:35:03, 3.08it/s] 65%|██████▌ | 243001/371472 [8:45:46<11:54:25, 3.00it/s] 65%|██████▌ | 243002/371472 [8:45:47<11:18:27, 3.16it/s] 65%|██████▌ | 243003/371472 [8:45:47<10:54:07, 3.27it/s] 65%|██████▌ | 243004/371472 [8:45:47<11:19:45, 3.15it/s] 65%|██████▌ | 243005/371472 [8:45:48<11:32:10, 3.09it/s] 65%|██████▌ | 243006/371472 [8:45:48<10:58:18, 3.25it/s] 65%|███��██▌ | 243007/371472 [8:45:48<10:50:13, 3.29it/s] 65%|██████▌ | 243008/371472 [8:45:48<10:36:21, 3.36it/s] 65%|██████▌ | 243009/371472 [8:45:49<10:22:34, 3.44it/s] 65%|██████▌ | 243010/371472 [8:45:49<10:08:06, 3.52it/s] 65%|██████▌ | 243011/371472 [8:45:49<9:56:30, 3.59it/s] 65%|██████▌ | 243012/371472 [8:45:49<9:47:11, 3.65it/s] 65%|██████▌ | 243013/371472 [8:45:50<10:36:27, 3.36it/s] 65%|██████▌ | 243014/371472 [8:45:50<10:21:43, 3.44it/s] 65%|██████▌ | 243015/371472 [8:45:50<10:19:44, 3.45it/s] 65%|██████▌ | 243016/371472 [8:45:51<10:10:34, 3.51it/s] 65%|██████▌ | 243017/371472 [8:45:51<10:41:00, 3.34it/s] 65%|██████▌ | 243018/371472 [8:45:51<10:43:52, 3.32it/s] 65%|██████▌ | 243019/371472 [8:45:52<10:19:48, 3.45it/s] 65%|██████▌ | 243020/371472 [8:45:52<10:17:04, 3.47it/s] {'loss': 2.711, 'learning_rate': 4.113803357107458e-07, 'epoch': 10.47} + 65%|██████▌ | 243020/371472 [8:45:52<10:17:04, 3.47it/s] 65%|██████▌ | 243021/371472 [8:45:52<10:02:17, 3.55it/s] 65%|██████▌ | 243022/371472 [8:45:52<10:03:45, 3.55it/s] 65%|██████▌ | 243023/371472 [8:45:53<9:56:16, 3.59it/s] 65%|██████▌ | 243024/371472 [8:45:53<10:12:16, 3.50it/s] 65%|██████▌ | 243025/371472 [8:45:53<10:17:08, 3.47it/s] 65%|██████▌ | 243026/371472 [8:45:54<10:04:40, 3.54it/s] 65%|██████▌ | 243027/371472 [8:45:54<9:49:00, 3.63it/s] 65%|██████▌ | 243028/371472 [8:45:54<9:56:39, 3.59it/s] 65%|██████▌ | 243029/371472 [8:45:54<9:52:47, 3.61it/s] 65%|██████▌ | 243030/371472 [8:45:55<9:48:32, 3.64it/s] 65%|██████▌ | 243031/371472 [8:45:55<11:24:46, 3.13it/s] 65%|██████▌ | 243032/371472 [8:45:55<10:57:55, 3.25it/s] 65%|██████▌ | 243033/371472 [8:45:56<10:43:17, 3.33it/s] 65%|██████▌ | 243034/371472 [8:45:56<10:37:58, 3.36it/s] 65%|██████▌ | 243035/371472 [8:45:56<10:40:52, 3.34it/s] 65%|██████▌ | 243036/371472 [8:45:57<10:41:30, 3.34it/s] 65%|██████▌ | 243037/371472 [8:45:57<11:00:50, 3.24it/s] 65%|██████▌ | 243038/371472 [8:45:57<10:50:33, 3.29it/s] 65%|██████▌ | 243039/371472 [8:45:57<10:44:25, 3.32it/s] 65%|██████▌ | 243040/371472 [8:45:58<10:50:40, 3.29it/s] {'loss': 2.7606, 'learning_rate': 4.113318537352669e-07, 'epoch': 10.47} + 65%|██████▌ | 243040/371472 [8:45:58<10:50:40, 3.29it/s] 65%|██████▌ | 243041/371472 [8:45:58<10:56:08, 3.26it/s] 65%|██████▌ | 243042/371472 [8:45:58<10:42:29, 3.33it/s] 65%|██████▌ | 243043/371472 [8:45:59<11:05:17, 3.22it/s] 65%|██████▌ | 243044/371472 [8:45:59<10:39:07, 3.35it/s] 65%|██████▌ | 243045/371472 [8:45:59<10:47:44, 3.30it/s] 65%|██████▌ | 243046/371472 [8:46:00<10:35:24, 3.37it/s] 65%|██████▌ | 243047/371472 [8:46:00<10:23:41, 3.43it/s] 65%|██████▌ | 243048/371472 [8:46:00<10:30:00, 3.40it/s] 65%|██████▌ | 243049/371472 [8:46:00<10:29:59, 3.40it/s] 65%|██████▌ | 243050/371472 [8:46:01<10:33:39, 3.38it/s] 65%|██████▌ | 243051/371472 [8:46:01<10:50:19, 3.29it/s] 65%|██████▌ | 243052/371472 [8:46:01<10:27:49, 3.41it/s] 65%|██████▌ | 243053/371472 [8:46:02<10:07:43, 3.52it/s] 65%|██████▌ | 243054/371472 [8:46:02<9:53:56, 3.60it/s] 65%|██████▌ | 243055/371472 [8:46:02<9:59:13, 3.57it/s] 65%|██████▌ | 243056/371472 [8:46:02<9:49:53, 3.63it/s] 65%|██████▌ | 243057/371472 [8:46:03<9:50:23, 3.63it/s] 65%|██████▌ | 243058/371472 [8:46:03<9:54:16, 3.60it/s] 65%|██████▌ | 243059/371472 [8:46:03<9:45:17, 3.66it/s] 65%|██████▌ | 243060/371472 [8:46:03<9:55:39, 3.59it/s] {'loss': 2.8217, 'learning_rate': 4.1128337175978793e-07, 'epoch': 10.47} + 65%|██████▌ | 243060/371472 [8:46:04<9:55:39, 3.59it/s] 65%|██████▌ | 243061/371472 [8:46:04<10:21:18, 3.44it/s] 65%|██████▌ | 243062/371472 [8:46:04<10:46:26, 3.31it/s] 65%|██████▌ | 243063/371472 [8:46:04<11:13:19, 3.18it/s] 65%|██████▌ | 243064/371472 [8:46:05<11:25:17, 3.12it/s] 65%|██████▌ | 243065/371472 [8:46:05<11:35:01, 3.08it/s] 65%|██████▌ | 243066/371472 [8:46:05<10:51:38, 3.28it/s] 65%|███���██▌ | 243067/371472 [8:46:06<10:33:59, 3.38it/s] 65%|██████▌ | 243068/371472 [8:46:06<10:12:36, 3.49it/s] 65%|██████▌ | 243069/371472 [8:46:06<10:11:22, 3.50it/s] 65%|██████▌ | 243070/371472 [8:46:07<10:05:35, 3.53it/s] 65%|██████▌ | 243071/371472 [8:46:07<10:26:01, 3.42it/s] 65%|██████▌ | 243072/371472 [8:46:07<10:08:34, 3.52it/s] 65%|██████▌ | 243073/371472 [8:46:07<10:21:03, 3.45it/s] 65%|██████▌ | 243074/371472 [8:46:08<10:13:56, 3.49it/s] 65%|██████▌ | 243075/371472 [8:46:08<10:17:48, 3.46it/s] 65%|██████▌ | 243076/371472 [8:46:08<10:37:03, 3.36it/s] 65%|██████▌ | 243077/371472 [8:46:09<10:11:57, 3.50it/s] 65%|██████▌ | 243078/371472 [8:46:09<10:07:00, 3.53it/s] 65%|██████▌ | 243079/371472 [8:46:09<10:14:55, 3.48it/s] 65%|██████▌ | 243080/371472 [8:46:09<10:34:25, 3.37it/s] {'loss': 2.6495, 'learning_rate': 4.112348897843091e-07, 'epoch': 10.47} + 65%|██████▌ | 243080/371472 [8:46:09<10:34:25, 3.37it/s] 65%|██████▌ | 243081/371472 [8:46:10<11:06:12, 3.21it/s] 65%|██████▌ | 243082/371472 [8:46:10<10:35:34, 3.37it/s] 65%|██████▌ | 243083/371472 [8:46:10<10:34:15, 3.37it/s] 65%|██████▌ | 243084/371472 [8:46:11<10:12:31, 3.49it/s] 65%|██████▌ | 243085/371472 [8:46:11<10:36:14, 3.36it/s] 65%|██████▌ | 243086/371472 [8:46:11<10:35:57, 3.36it/s] 65%|██████▌ | 243087/371472 [8:46:12<10:31:39, 3.39it/s] 65%|██████▌ | 243088/371472 [8:46:12<10:31:55, 3.39it/s] 65%|██████▌ | 243089/371472 [8:46:12<11:21:27, 3.14it/s] 65%|██████▌ | 243090/371472 [8:46:12<10:55:49, 3.26it/s] 65%|██████▌ | 243091/371472 [8:46:13<10:42:46, 3.33it/s] 65%|██████▌ | 243092/371472 [8:46:13<10:45:40, 3.31it/s] 65%|██████▌ | 243093/371472 [8:46:13<10:16:22, 3.47it/s] 65%|██████▌ | 243094/371472 [8:46:14<10:17:49, 3.46it/s] 65%|██████▌ | 243095/371472 [8:46:14<10:49:29, 3.29it/s] 65%|██████▌ | 243096/371472 [8:46:14<10:54:52, 3.27it/s] 65%|██████▌ | 243097/371472 [8:46:15<10:46:46, 3.31it/s] 65%|██████▌ | 243098/371472 [8:46:15<11:05:48, 3.21it/s] 65%|██████▌ | 243099/371472 [8:46:15<10:48:46, 3.30it/s] 65%|██████▌ | 243100/371472 [8:46:15<10:44:39, 3.32it/s] {'loss': 2.6198, 'learning_rate': 4.111864078088302e-07, 'epoch': 10.47} + 65%|██████▌ | 243100/371472 [8:46:15<10:44:39, 3.32it/s] 65%|██████▌ | 243101/371472 [8:46:16<10:36:38, 3.36it/s] 65%|██████▌ | 243102/371472 [8:46:16<10:26:30, 3.42it/s] 65%|██████▌ | 243103/371472 [8:46:16<10:37:10, 3.36it/s] 65%|██████▌ | 243104/371472 [8:46:17<10:58:03, 3.25it/s] 65%|██████▌ | 243105/371472 [8:46:17<10:37:37, 3.36it/s] 65%|██████▌ | 243106/371472 [8:46:17<11:01:50, 3.23it/s] 65%|██████▌ | 243107/371472 [8:46:18<11:52:10, 3.00it/s] 65%|██████▌ | 243108/371472 [8:46:18<12:36:02, 2.83it/s] 65%|██████▌ | 243109/371472 [8:46:18<12:20:51, 2.89it/s] 65%|██████▌ | 243110/371472 [8:46:19<11:50:38, 3.01it/s] 65%|██████▌ | 243111/371472 [8:46:19<11:43:50, 3.04it/s] 65%|██████▌ | 243112/371472 [8:46:19<11:21:01, 3.14it/s] 65%|██████▌ | 243113/371472 [8:46:20<10:57:15, 3.25it/s] 65%|██████▌ | 243114/371472 [8:46:20<10:38:28, 3.35it/s] 65%|██████▌ | 243115/371472 [8:46:20<10:33:51, 3.38it/s] 65%|██████▌ | 243116/371472 [8:46:20<10:26:37, 3.41it/s] 65%|██████▌ | 243117/371472 [8:46:21<10:20:20, 3.45it/s] 65%|██████▌ | 243118/371472 [8:46:21<10:30:22, 3.39it/s] 65%|██████▌ | 243119/371472 [8:46:21<10:32:44, 3.38it/s] 65%|██████▌ | 243120/371472 [8:46:22<10:23:47, 3.43it/s] {'loss': 2.6805, 'learning_rate': 4.111379258333513e-07, 'epoch': 10.47} + 65%|██████▌ | 243120/371472 [8:46:22<10:23:47, 3.43it/s] 65%|██████▌ | 243121/371472 [8:46:22<10:20:05, 3.45it/s] 65%|██████▌ | 243122/371472 [8:46:22<10:11:26, 3.50it/s] 65%|██████▌ | 243123/371472 [8:46:23<10:59:44, 3.24it/s] 65%|██████▌ | 243124/371472 [8:46:23<10:33:41, 3.38it/s] 65%|██████▌ | 243125/371472 [8:46:23<10:31:01, 3.39it/s] 65%|██████▌ | 243126/371472 [8:46:23<10:55:49, 3.26it/s] 65%|██████▌ | 243127/371472 [8:46:24<10:49:48, 3.29it/s] 65%|██████▌ | 243128/371472 [8:46:24<10:42:30, 3.33it/s] 65%|██████▌ | 243129/371472 [8:46:24<11:31:16, 3.09it/s] 65%|██████▌ | 243130/371472 [8:46:25<11:59:13, 2.97it/s] 65%|██████▌ | 243131/371472 [8:46:25<11:30:16, 3.10it/s] 65%|██████▌ | 243132/371472 [8:46:25<11:09:02, 3.20it/s] 65%|██████▌ | 243133/371472 [8:46:26<10:56:29, 3.26it/s] 65%|██████▌ | 243134/371472 [8:46:26<11:16:15, 3.16it/s] 65%|██████▌ | 243135/371472 [8:46:26<11:12:34, 3.18it/s] 65%|██████▌ | 243136/371472 [8:46:27<11:15:08, 3.17it/s] 65%|██████▌ | 243137/371472 [8:46:27<11:21:55, 3.14it/s] 65%|██████▌ | 243138/371472 [8:46:27<11:21:19, 3.14it/s] 65%|██████▌ | 243139/371472 [8:46:28<11:05:37, 3.21it/s] 65%|██████▌ | 243140/371472 [8:46:28<10:40:39, 3.34it/s] {'loss': 2.5894, 'learning_rate': 4.110894438578724e-07, 'epoch': 10.47} + 65%|██████▌ | 243140/371472 [8:46:28<10:40:39, 3.34it/s] 65%|██████▌ | 243141/371472 [8:46:28<10:25:17, 3.42it/s] 65%|██████▌ | 243142/371472 [8:46:28<10:49:29, 3.29it/s] 65%|██████▌ | 243143/371472 [8:46:29<10:48:45, 3.30it/s] 65%|██████▌ | 243144/371472 [8:46:29<10:28:19, 3.40it/s] 65%|██████▌ | 243145/371472 [8:46:29<10:35:43, 3.36it/s] 65%|██████▌ | 243146/371472 [8:46:30<10:58:46, 3.25it/s] 65%|██████▌ | 243147/371472 [8:46:30<11:30:33, 3.10it/s] 65%|██████▌ | 243148/371472 [8:46:30<11:09:32, 3.19it/s] 65%|██████▌ | 243149/371472 [8:46:31<10:45:48, 3.31it/s] 65%|██████▌ | 243150/371472 [8:46:31<11:06:58, 3.21it/s] 65%|██████▌ | 243151/371472 [8:46:31<10:56:58, 3.26it/s] 65%|██████▌ | 243152/371472 [8:46:32<11:12:56, 3.18it/s] 65%|██████▌ | 243153/371472 [8:46:32<11:00:17, 3.24it/s] 65%|██████▌ | 243154/371472 [8:46:32<10:33:57, 3.37it/s] 65%|██████▌ | 243155/371472 [8:46:32<10:43:10, 3.33it/s] 65%|██████▌ | 243156/371472 [8:46:33<10:31:24, 3.39it/s] 65%|██████▌ | 243157/371472 [8:46:33<10:12:46, 3.49it/s] 65%|██████▌ | 243158/371472 [8:46:33<10:20:10, 3.45it/s] 65%|██████▌ | 243159/371472 [8:46:34<10:59:53, 3.24it/s] 65%|██████▌ | 243160/371472 [8:46:34<11:04:00, 3.22it/s] {'loss': 2.6904, 'learning_rate': 4.1104096188239345e-07, 'epoch': 10.47} + 65%|██████▌ | 243160/371472 [8:46:34<11:04:00, 3.22it/s] 65%|██████▌ | 243161/371472 [8:46:34<10:48:07, 3.30it/s] 65%|██████▌ | 243162/371472 [8:46:34<10:32:00, 3.38it/s] 65%|██████▌ | 243163/371472 [8:46:35<10:28:26, 3.40it/s] 65%|██████▌ | 243164/371472 [8:46:35<10:00:07, 3.56it/s] 65%|██████▌ | 243165/371472 [8:46:35<10:26:06, 3.42it/s] 65%|██████▌ | 243166/371472 [8:46:36<10:45:51, 3.31it/s] 65%|██████▌ | 243167/371472 [8:46:36<11:14:42, 3.17it/s] 65%|██████▌ | 243168/371472 [8:46:36<10:55:24, 3.26it/s] 65%|██████▌ | 243169/371472 [8:46:37<11:03:51, 3.22it/s] 65%|██████▌ | 243170/371472 [8:46:37<10:46:13, 3.31it/s] 65%|██████▌ | 243171/371472 [8:46:37<10:56:29, 3.26it/s] 65%|██████▌ | 243172/371472 [8:46:38<11:26:29, 3.11it/s] 65%|██████▌ | 243173/371472 [8:46:38<10:55:30, 3.26it/s] 65%|██████▌ | 243174/371472 [8:46:38<11:01:25, 3.23it/s] 65%|██████▌ | 243175/371472 [8:46:38<10:55:59, 3.26it/s] 65%|██████▌ | 243176/371472 [8:46:39<10:26:11, 3.41it/s] 65%|██████▌ | 243177/371472 [8:46:39<10:29:37, 3.40it/s] 65%|██████▌ | 243178/371472 [8:46:39<10:37:34, 3.35it/s] 65%|██████▌ | 243179/371472 [8:46:40<10:31:39, 3.39it/s] 65%|██████▌ | 243180/371472 [8:46:40<10:20:37, 3.45it/s] {'loss': 2.7166, 'learning_rate': 4.1099247990691457e-07, 'epoch': 10.47} + 65%|██████▌ | 243180/371472 [8:46:40<10:20:37, 3.45it/s] 65%|██████▌ | 243181/371472 [8:46:40<10:06:54, 3.52it/s] 65%|██████▌ | 243182/371472 [8:46:40<10:05:01, 3.53it/s] 65%|██████▌ | 243183/371472 [8:46:41<10:01:14, 3.56it/s] 65%|██████▌ | 243184/371472 [8:46:41<9:58:44, 3.57it/s] 65%|██████▌ | 243185/371472 [8:46:41<10:15:12, 3.48it/s] 65%|██████▌ | 243186/371472 [8:46:42<10:07:02, 3.52it/s] 65%|██████▌ | 243187/371472 [8:46:42<9:49:09, 3.63it/s] 65%|██████▌ | 243188/371472 [8:46:42<9:46:47, 3.64it/s] 65%|██████▌ | 243189/371472 [8:46:42<10:03:58, 3.54it/s] 65%|██████▌ | 243190/371472 [8:46:43<11:02:53, 3.23it/s] 65%|██████▌ | 243191/371472 [8:46:43<10:30:35, 3.39it/s] 65%|██████▌ | 243192/371472 [8:46:43<10:33:56, 3.37it/s] 65%|██████▌ | 243193/371472 [8:46:44<10:37:08, 3.36it/s] 65%|██████▌ | 243194/371472 [8:46:44<10:49:04, 3.29it/s] 65%|██████▌ | 243195/371472 [8:46:44<11:19:58, 3.14it/s] 65%|██████▌ | 243196/371472 [8:46:45<10:48:10, 3.30it/s] 65%|██████▌ | 243197/371472 [8:46:45<10:49:27, 3.29it/s] 65%|██████▌ | 243198/371472 [8:46:45<10:44:54, 3.32it/s] 65%|██████▌ | 243199/371472 [8:46:45<10:27:13, 3.41it/s] 65%|██████▌ | 243200/371472 [8:46:46<10:22:23, 3.43it/s] {'loss': 2.7099, 'learning_rate': 4.1094399793143564e-07, 'epoch': 10.48} + 65%|██████▌ | 243200/371472 [8:46:46<10:22:23, 3.43it/s] 65%|██████▌ | 243201/371472 [8:46:46<10:12:29, 3.49it/s] 65%|██████▌ | 243202/371472 [8:46:46<10:52:49, 3.27it/s] 65%|██████▌ | 243203/371472 [8:46:47<10:35:34, 3.36it/s] 65%|██████▌ | 243204/371472 [8:46:47<10:23:28, 3.43it/s] 65%|██████▌ | 243205/371472 [8:46:47<10:35:22, 3.36it/s] 65%|██████▌ | 243206/371472 [8:46:48<10:29:38, 3.40it/s] 65%|██████▌ | 243207/371472 [8:46:48<10:22:53, 3.43it/s] 65%|██████▌ | 243208/371472 [8:46:48<10:14:04, 3.48it/s] 65%|██████▌ | 243209/371472 [8:46:48<10:34:07, 3.37it/s] 65%|██████▌ | 243210/371472 [8:46:49<10:40:21, 3.34it/s] 65%|██████▌ | 243211/371472 [8:46:49<10:28:59, 3.40it/s] 65%|██████▌ | 243212/371472 [8:46:49<10:22:28, 3.43it/s] 65%|██████▌ | 243213/371472 [8:46:50<10:24:43, 3.42it/s] 65%|██████▌ | 243214/371472 [8:46:50<10:14:16, 3.48it/s] 65%|██████▌ | 243215/371472 [8:46:50<10:09:16, 3.51it/s] 65%|██████▌ | 243216/371472 [8:46:50<10:40:10, 3.34it/s] 65%|██████▌ | 243217/371472 [8:46:51<10:11:53, 3.49it/s] 65%|██████▌ | 243218/371472 [8:46:51<10:03:18, 3.54it/s] 65%|██████▌ | 243219/371472 [8:46:51<10:02:03, 3.55it/s] 65%|██████▌ | 243220/371472 [8:46:52<11:05:00, 3.21it/s] {'loss': 2.7125, 'learning_rate': 4.108955159559568e-07, 'epoch': 10.48} + 65%|██████▌ | 243220/371472 [8:46:52<11:05:00, 3.21it/s] 65%|██████▌ | 243221/371472 [8:46:52<11:04:38, 3.22it/s] 65%|██████▌ | 243222/371472 [8:46:52<10:43:37, 3.32it/s] 65%|██████▌ | 243223/371472 [8:46:53<10:52:30, 3.28it/s] 65%|██████▌ | 243224/371472 [8:46:53<10:25:10, 3.42it/s] 65%|██████▌ | 243225/371472 [8:46:53<10:09:24, 3.51it/s] 65%|██████▌ | 243226/371472 [8:46:53<10:26:07, 3.41it/s] 65%|██████▌ | 243227/371472 [8:46:54<10:15:26, 3.47it/s] 65%|██████▌ | 243228/371472 [8:46:54<10:06:50, 3.52it/s] 65%|██████▌ | 243229/371472 [8:46:54<9:54:50, 3.59it/s] 65%|██████▌ | 243230/371472 [8:46:54<9:58:01, 3.57it/s] 65%|██████▌ | 243231/371472 [8:46:55<10:18:06, 3.46it/s] 65%|██████▌ | 243232/371472 [8:46:55<10:20:46, 3.44it/s] 65%|██████▌ | 243233/371472 [8:46:55<11:20:30, 3.14it/s] 65%|██████▌ | 243234/371472 [8:46:56<11:12:23, 3.18it/s] 65%|██████▌ | 243235/371472 [8:46:56<10:40:04, 3.34it/s] 65%|██████▌ | 243236/371472 [8:46:56<10:34:57, 3.37it/s] 65%|██████▌ | 243237/371472 [8:46:57<10:35:45, 3.36it/s] 65%|██████▌ | 243238/371472 [8:46:57<10:27:25, 3.41it/s] 65%|██████▌ | 243239/371472 [8:46:57<10:24:02, 3.42it/s] 65%|██████▌ | 243240/371472 [8:46:58<10:40:24, 3.34it/s] {'loss': 2.7862, 'learning_rate': 4.1084703398047784e-07, 'epoch': 10.48} + 65%|██████▌ | 243240/371472 [8:46:58<10:40:24, 3.34it/s] 65%|██████▌ | 243241/371472 [8:46:58<10:44:59, 3.31it/s] 65%|██████▌ | 243242/371472 [8:46:58<10:21:01, 3.44it/s] 65%|██████▌ | 243243/371472 [8:46:58<10:30:37, 3.39it/s] 65%|██████▌ | 243244/371472 [8:46:59<10:03:27, 3.54it/s] 65%|██████▌ | 243245/371472 [8:46:59<10:31:32, 3.38it/s] 65%|██████▌ | 243246/371472 [8:46:59<10:53:19, 3.27it/s] 65%|██████▌ | 243247/371472 [8:47:00<10:34:17, 3.37it/s] 65%|██████▌ | 243248/371472 [8:47:00<10:30:45, 3.39it/s] 65%|██████▌ | 243249/371472 [8:47:00<11:05:18, 3.21it/s] 65%|██████▌ | 243250/371472 [8:47:01<11:01:25, 3.23it/s] 65%|██████▌ | 243251/371472 [8:47:01<11:03:24, 3.22it/s] 65%|██████▌ | 243252/371472 [8:47:01<10:58:13, 3.25it/s] 65%|██████▌ | 243253/371472 [8:47:01<11:02:33, 3.23it/s] 65%|██████▌ | 243254/371472 [8:47:02<10:37:29, 3.35it/s] 65%|██████▌ | 243255/371472 [8:47:02<10:48:03, 3.30it/s] 65%|██████▌ | 243256/371472 [8:47:02<10:30:33, 3.39it/s] 65%|██████▌ | 243257/371472 [8:47:03<10:38:49, 3.35it/s] 65%|██████▌ | 243258/371472 [8:47:03<11:39:51, 3.05it/s] 65%|██████▌ | 243259/371472 [8:47:03<11:05:21, 3.21it/s] 65%|██████▌ | 243260/371472 [8:47:04<11:09:23, 3.19it/s] {'loss': 2.7367, 'learning_rate': 4.10798552004999e-07, 'epoch': 10.48} + 65%|██████▌ | 243260/371472 [8:47:04<11:09:23, 3.19it/s] 65%|██████▌ | 243261/371472 [8:47:04<11:33:29, 3.08it/s] 65%|██████▌ | 243262/371472 [8:47:04<11:09:53, 3.19it/s] 65%|██████▌ | 243263/371472 [8:47:05<10:50:14, 3.29it/s] 65%|██████▌ | 243264/371472 [8:47:05<10:56:25, 3.26it/s] 65%|██████▌ | 243265/371472 [8:47:05<10:26:30, 3.41it/s] 65%|██████▌ | 243266/371472 [8:47:05<10:57:33, 3.25it/s] 65%|██████▌ | 243267/371472 [8:47:06<10:38:14, 3.35it/s] 65%|██████▌ | 243268/371472 [8:47:06<10:21:22, 3.44it/s] 65%|██████▌ | 243269/371472 [8:47:06<11:06:49, 3.20it/s] 65%|██████▌ | 243270/371472 [8:47:07<10:42:06, 3.33it/s] 65%|██████▌ | 243271/371472 [8:47:07<10:51:39, 3.28it/s] 65%|██████▌ | 243272/371472 [8:47:07<10:51:58, 3.28it/s] 65%|██████▌ | 243273/371472 [8:47:08<10:44:57, 3.31it/s] 65%|██████▌ | 243274/371472 [8:47:08<10:58:52, 3.24it/s] 65%|██████▌ | 243275/371472 [8:47:08<10:51:03, 3.28it/s] 65%|██████▌ | 243276/371472 [8:47:08<10:31:56, 3.38it/s] 65%|██████▌ | 243277/371472 [8:47:09<10:01:54, 3.55it/s] 65%|██████▌ | 243278/371472 [8:47:09<9:52:52, 3.60it/s] 65%|██████▌ | 243279/371472 [8:47:09<10:21:40, 3.44it/s] 65%|██████▌ | 243280/371472 [8:47:10<10:07:50, 3.51it/s] {'loss': 2.9008, 'learning_rate': 4.107500700295201e-07, 'epoch': 10.48} + 65%|██████▌ | 243280/371472 [8:47:10<10:07:50, 3.51it/s] 65%|██████▌ | 243281/371472 [8:47:10<10:13:07, 3.48it/s] 65%|██████▌ | 243282/371472 [8:47:10<10:35:27, 3.36it/s] 65%|██████▌ | 243283/371472 [8:47:11<11:16:45, 3.16it/s] 65%|██████▌ | 243284/371472 [8:47:11<11:32:47, 3.08it/s] 65%|██████▌ | 243285/371472 [8:47:11<11:29:42, 3.10it/s] 65%|██████▌ | 243286/371472 [8:47:11<10:49:35, 3.29it/s] 65%|██████▌ | 243287/371472 [8:47:12<10:21:58, 3.43it/s] 65%|██████▌ | 243288/371472 [8:47:12<10:39:55, 3.34it/s] 65%|██████▌ | 243289/371472 [8:47:12<11:06:38, 3.20it/s] 65%|██████▌ | 243290/371472 [8:47:13<10:40:32, 3.34it/s] 65%|██████▌ | 243291/371472 [8:47:13<11:26:08, 3.11it/s] 65%|██████▌ | 243292/371472 [8:47:13<11:17:49, 3.15it/s] 65%|██████▌ | 243293/371472 [8:47:14<10:56:54, 3.25it/s] 65%|██████▌ | 243294/371472 [8:47:14<10:46:01, 3.31it/s] 65%|██████▌ | 243295/371472 [8:47:14<10:30:19, 3.39it/s] 65%|██████▌ | 243296/371472 [8:47:14<10:16:46, 3.46it/s] 65%|██████▌ | 243297/371472 [8:47:15<10:03:55, 3.54it/s] 65%|██████▌ | 243298/371472 [8:47:15<10:05:55, 3.53it/s] 65%|██████▌ | 243299/371472 [8:47:15<10:10:33, 3.50it/s] 65%|██████▌ | 243300/371472 [8:47:16<10:14:40, 3.48it/s] {'loss': 2.5499, 'learning_rate': 4.107015880540412e-07, 'epoch': 10.48} + 65%|██████▌ | 243300/371472 [8:47:16<10:14:40, 3.48it/s] 65%|██████▌ | 243301/371472 [8:47:16<10:14:11, 3.48it/s] 65%|██████▌ | 243302/371472 [8:47:16<10:20:51, 3.44it/s] 65%|██████▌ | 243303/371472 [8:47:16<10:29:15, 3.39it/s] 65%|██████▌ | 243304/371472 [8:47:17<11:01:35, 3.23it/s] 65%|██████▌ | 243305/371472 [8:47:17<11:14:31, 3.17it/s] 65%|██████▌ | 243306/371472 [8:47:17<11:10:30, 3.19it/s] 65%|██████▌ | 243307/371472 [8:47:18<11:18:24, 3.15it/s] 65%|██████▌ | 243308/371472 [8:47:18<10:40:58, 3.33it/s] 65%|██████▌ | 243309/371472 [8:47:18<10:29:29, 3.39it/s] 65%|██████▌ | 243310/371472 [8:47:19<10:26:24, 3.41it/s] 65%|██████▌ | 243311/371472 [8:47:19<10:09:01, 3.51it/s] 65%|██████▌ | 243312/371472 [8:47:19<10:35:43, 3.36it/s] 65%|██████▌ | 243313/371472 [8:47:19<10:18:57, 3.45it/s] 65%|██████▌ | 243314/371472 [8:47:20<10:12:55, 3.48it/s] 66%|██████▌ | 243315/371472 [8:47:20<10:14:41, 3.47it/s] 66%|██████▌ | 243316/371472 [8:47:20<10:49:12, 3.29it/s] 66%|██████▌ | 243317/371472 [8:47:21<10:47:09, 3.30it/s] 66%|██████▌ | 243318/371472 [8:47:21<10:33:27, 3.37it/s] 66%|██████▌ | 243319/371472 [8:47:21<10:40:22, 3.34it/s] 66%|██████▌ | 243320/371472 [8:47:22<10:43:19, 3.32it/s] {'loss': 2.7328, 'learning_rate': 4.106531060785623e-07, 'epoch': 10.48} + 66%|██████▌ | 243320/371472 [8:47:22<10:43:19, 3.32it/s] 66%|██████▌ | 243321/371472 [8:47:22<11:05:10, 3.21it/s] 66%|██████▌ | 243322/371472 [8:47:22<11:25:55, 3.11it/s] 66%|██████▌ | 243323/371472 [8:47:23<11:17:36, 3.15it/s] 66%|██████▌ | 243324/371472 [8:47:23<10:50:05, 3.29it/s] 66%|██████▌ | 243325/371472 [8:47:23<10:23:20, 3.43it/s] 66%|██████▌ | 243326/371472 [8:47:24<11:21:15, 3.14it/s] 66%|██████▌ | 243327/371472 [8:47:24<11:16:49, 3.16it/s] 66%|██████▌ | 243328/371472 [8:47:24<10:42:20, 3.32it/s] 66%|██████▌ | 243329/371472 [8:47:24<10:15:36, 3.47it/s] 66%|██████▌ | 243330/371472 [8:47:25<10:12:46, 3.49it/s] 66%|██████▌ | 243331/371472 [8:47:25<10:28:19, 3.40it/s] 66%|██████▌ | 243332/371472 [8:47:25<10:20:52, 3.44it/s] 66%|██████▌ | 243333/371472 [8:47:26<10:35:31, 3.36it/s] 66%|██████▌ | 243334/371472 [8:47:26<11:11:10, 3.18it/s] 66%|██████▌ | 243335/371472 [8:47:26<10:47:40, 3.30it/s] 66%|██████▌ | 243336/371472 [8:47:27<12:19:48, 2.89it/s] 66%|██████▌ | 243337/371472 [8:47:27<12:04:45, 2.95it/s] 66%|██████▌ | 243338/371472 [8:47:27<12:22:24, 2.88it/s] 66%|██████▌ | 243339/371472 [8:47:28<12:05:27, 2.94it/s] 66%|██████▌ | 243340/371472 [8:47:28<11:28:01, 3.10it/s] {'loss': 2.5978, 'learning_rate': 4.1060462410308346e-07, 'epoch': 10.48} + 66%|██████▌ | 243340/371472 [8:47:28<11:28:01, 3.10it/s] 66%|██████▌ | 243341/371472 [8:47:28<11:01:00, 3.23it/s] 66%|██████▌ | 243342/371472 [8:47:28<11:04:46, 3.21it/s] 66%|██████▌ | 243343/371472 [8:47:29<10:36:46, 3.35it/s] 66%|██████▌ | 243344/371472 [8:47:29<10:17:23, 3.46it/s] 66%|██████▌ | 243345/371472 [8:47:29<10:20:44, 3.44it/s] 66%|██████▌ | 243346/371472 [8:47:30<10:16:02, 3.47it/s] 66%|██████▌ | 243347/371472 [8:47:30<11:07:15, 3.20it/s] 66%|██████▌ | 243348/371472 [8:47:30<11:14:50, 3.16it/s] 66%|██████▌ | 243349/371472 [8:47:31<10:53:14, 3.27it/s] 66%|██████▌ | 243350/371472 [8:47:31<11:15:30, 3.16it/s] 66%|██████▌ | 243351/371472 [8:47:31<11:33:31, 3.08it/s] 66%|██████▌ | 243352/371472 [8:47:32<10:53:49, 3.27it/s] 66%|██████▌ | 243353/371472 [8:47:32<11:00:35, 3.23it/s] 66%|██████▌ | 243354/371472 [8:47:32<10:57:01, 3.25it/s] 66%|██████▌ | 243355/371472 [8:47:32<10:55:54, 3.26it/s] 66%|██████▌ | 243356/371472 [8:47:33<10:59:58, 3.24it/s] 66%|██████▌ | 243357/371472 [8:47:33<10:37:50, 3.35it/s] 66%|██████▌ | 243358/371472 [8:47:33<10:46:06, 3.30it/s] 66%|██████▌ | 243359/371472 [8:47:34<10:52:43, 3.27it/s] 66%|██████▌ | 243360/371472 [8:47:34<10:29:12, 3.39it/s] {'loss': 2.6652, 'learning_rate': 4.1055614212760453e-07, 'epoch': 10.48} + 66%|██████▌ | 243360/371472 [8:47:34<10:29:12, 3.39it/s] 66%|██████▌ | 243361/371472 [8:47:34<10:29:42, 3.39it/s] 66%|██████▌ | 243362/371472 [8:47:35<10:20:39, 3.44it/s] 66%|██████▌ | 243363/371472 [8:47:35<10:09:08, 3.51it/s] 66%|██████▌ | 243364/371472 [8:47:35<11:02:39, 3.22it/s] 66%|██████▌ | 243365/371472 [8:47:36<11:42:00, 3.04it/s] 66%|█████��▌ | 243366/371472 [8:47:36<11:11:42, 3.18it/s] 66%|██████▌ | 243367/371472 [8:47:36<10:49:19, 3.29it/s] 66%|██████▌ | 243368/371472 [8:47:36<10:38:16, 3.35it/s] 66%|██████▌ | 243369/371472 [8:47:37<10:14:55, 3.47it/s] 66%|██████▌ | 243370/371472 [8:47:37<9:55:45, 3.58it/s] 66%|██████▌ | 243371/371472 [8:47:37<9:58:31, 3.57it/s] 66%|██████▌ | 243372/371472 [8:47:38<10:52:23, 3.27it/s] 66%|██████▌ | 243373/371472 [8:47:38<10:34:40, 3.36it/s] 66%|██████▌ | 243374/371472 [8:47:38<10:31:03, 3.38it/s] 66%|██████▌ | 243375/371472 [8:47:38<10:20:29, 3.44it/s] 66%|██████▌ | 243376/371472 [8:47:39<10:09:39, 3.50it/s] 66%|██████▌ | 243377/371472 [8:47:39<9:57:57, 3.57it/s] 66%|██████▌ | 243378/371472 [8:47:39<9:53:26, 3.60it/s] 66%|██████▌ | 243379/371472 [8:47:40<10:19:41, 3.45it/s] 66%|██████▌ | 243380/371472 [8:47:40<10:26:12, 3.41it/s] {'loss': 2.9084, 'learning_rate': 4.1050766015212566e-07, 'epoch': 10.48} + 66%|██████▌ | 243380/371472 [8:47:40<10:26:12, 3.41it/s] 66%|██████▌ | 243381/371472 [8:47:40<10:29:28, 3.39it/s] 66%|██████▌ | 243382/371472 [8:47:40<10:31:26, 3.38it/s] 66%|██████▌ | 243383/371472 [8:47:41<10:14:12, 3.48it/s] 66%|██████▌ | 243384/371472 [8:47:41<10:22:39, 3.43it/s] 66%|██████▌ | 243385/371472 [8:47:41<9:59:20, 3.56it/s] 66%|██████▌ | 243386/371472 [8:47:42<9:44:48, 3.65it/s] 66%|██████▌ | 243387/371472 [8:47:42<9:52:35, 3.60it/s] 66%|██████▌ | 243388/371472 [8:47:42<10:00:12, 3.56it/s] 66%|██████▌ | 243389/371472 [8:47:42<10:10:54, 3.49it/s] 66%|██████▌ | 243390/371472 [8:47:43<10:05:25, 3.53it/s] 66%|██████▌ | 243391/371472 [8:47:43<10:07:09, 3.52it/s] 66%|██████▌ | 243392/371472 [8:47:43<10:08:41, 3.51it/s] 66%|██████▌ | 243393/371472 [8:47:44<10:21:06, 3.44it/s] 66%|██████▌ | 243394/371472 [8:47:44<12:06:20, 2.94it/s] 66%|██████▌ | 243395/371472 [8:47:44<11:37:34, 3.06it/s] 66%|██████▌ | 243396/371472 [8:47:45<11:07:28, 3.20it/s] 66%|██████▌ | 243397/371472 [8:47:45<11:19:02, 3.14it/s] 66%|██████▌ | 243398/371472 [8:47:45<11:04:54, 3.21it/s] 66%|██████▌ | 243399/371472 [8:47:45<10:47:28, 3.30it/s] 66%|██████▌ | 243400/371472 [8:47:46<10:41:48, 3.33it/s] {'loss': 2.9277, 'learning_rate': 4.1045917817664673e-07, 'epoch': 10.48} + 66%|██████▌ | 243400/371472 [8:47:46<10:41:48, 3.33it/s] 66%|██████▌ | 243401/371472 [8:47:46<11:23:19, 3.12it/s] 66%|██████▌ | 243402/371472 [8:47:46<10:54:03, 3.26it/s] 66%|██████▌ | 243403/371472 [8:47:47<10:35:25, 3.36it/s] 66%|██████▌ | 243404/371472 [8:47:47<10:27:54, 3.40it/s] 66%|██████▌ | 243405/371472 [8:47:47<10:50:52, 3.28it/s] 66%|██████▌ | 243406/371472 [8:47:48<11:29:06, 3.10it/s] 66%|██████▌ | 243407/371472 [8:47:48<10:54:04, 3.26it/s] 66%|██████▌ | 243408/371472 [8:47:48<11:15:09, 3.16it/s] 66%|██████▌ | 243409/371472 [8:47:49<10:40:56, 3.33it/s] 66%|██████▌ | 243410/371472 [8:47:49<10:16:19, 3.46it/s] 66%|██████▌ | 243411/371472 [8:47:49<10:45:05, 3.31it/s] 66%|██████▌ | 243412/371472 [8:47:49<11:20:49, 3.13it/s] 66%|██████▌ | 243413/371472 [8:47:50<10:51:38, 3.28it/s] 66%|██████▌ | 243414/371472 [8:47:50<10:29:45, 3.39it/s] 66%|██████▌ | 243415/371472 [8:47:50<10:21:40, 3.43it/s] 66%|██████▌ | 243416/371472 [8:47:51<10:16:01, 3.46it/s] 66%|██████▌ | 243417/371472 [8:47:51<10:02:55, 3.54it/s] 66%|██████▌ | 243418/371472 [8:47:51<10:24:55, 3.42it/s] 66%|██████▌ | 243419/371472 [8:47:51<10:28:02, 3.40it/s] 66%|██████▌ | 243420/371472 [8:47:52<10:12:02, 3.49it/s] {'loss': 2.7471, 'learning_rate': 4.104106962011679e-07, 'epoch': 10.48} + 66%|██████▌ | 243420/371472 [8:47:52<10:12:02, 3.49it/s] 66%|██████▌ | 243421/371472 [8:47:52<10:03:31, 3.54it/s] 66%|██████▌ | 243422/371472 [8:47:52<10:28:02, 3.40it/s] 66%|██████▌ | 243423/371472 [8:47:53<10:22:07, 3.43it/s] 66%|██████▌ | 243424/371472 [8:47:53<10:18:09, 3.45it/s] 66%|██████▌ | 243425/371472 [8:47:53<10:04:16, 3.53it/s] 66%|██████▌ | 243426/371472 [8:47:53<10:07:11, 3.51it/s] 66%|██████▌ | 243427/371472 [8:47:54<10:25:30, 3.41it/s] 66%|██████▌ | 243428/371472 [8:47:54<10:05:51, 3.52it/s] 66%|██████▌ | 243429/371472 [8:47:54<10:04:58, 3.53it/s] 66%|██████▌ | 243430/371472 [8:47:55<10:06:57, 3.52it/s] 66%|██████▌ | 243431/371472 [8:47:55<9:59:14, 3.56it/s] 66%|██████▌ | 243432/371472 [8:47:55<10:05:00, 3.53it/s] 66%|██████▌ | 243433/371472 [8:47:55<10:05:43, 3.52it/s] 66%|██████▌ | 243434/371472 [8:47:56<10:13:07, 3.48it/s] 66%|██████▌ | 243435/371472 [8:47:56<9:57:01, 3.57it/s] 66%|██████▌ | 243436/371472 [8:47:56<10:18:34, 3.45it/s] 66%|██████▌ | 243437/371472 [8:47:57<10:15:33, 3.47it/s] 66%|██████▌ | 243438/371472 [8:47:57<9:51:46, 3.61it/s] 66%|██████▌ | 243439/371472 [8:47:57<9:49:00, 3.62it/s] 66%|██████▌ | 243440/371472 [8:47:58<10:39:31, 3.34it/s] {'loss': 2.6108, 'learning_rate': 4.10362214225689e-07, 'epoch': 10.49} + 66%|██████▌ | 243440/371472 [8:47:58<10:39:31, 3.34it/s] 66%|██████▌ | 243441/371472 [8:47:58<10:27:51, 3.40it/s] 66%|██████▌ | 243442/371472 [8:47:58<10:36:15, 3.35it/s] 66%|██████▌ | 243443/371472 [8:47:58<10:34:57, 3.36it/s] 66%|██████▌ | 243444/371472 [8:47:59<10:32:08, 3.38it/s] 66%|██████▌ | 243445/371472 [8:47:59<10:36:47, 3.35it/s] 66%|██████▌ | 243446/371472 [8:47:59<10:39:11, 3.34it/s] 66%|██████▌ | 243447/371472 [8:48:00<10:42:50, 3.32it/s] 66%|██████▌ | 243448/371472 [8:48:00<10:24:12, 3.42it/s] 66%|██████▌ | 243449/371472 [8:48:00<10:04:51, 3.53it/s] 66%|██████▌ | 243450/371472 [8:48:00<10:50:12, 3.28it/s] 66%|██████▌ | 243451/371472 [8:48:01<11:12:38, 3.17it/s] 66%|██████▌ | 243452/371472 [8:48:01<10:38:09, 3.34it/s] 66%|██████▌ | 243453/371472 [8:48:01<11:00:05, 3.23it/s] 66%|██████▌ | 243454/371472 [8:48:02<10:45:10, 3.31it/s] 66%|██████▌ | 243455/371472 [8:48:02<10:19:48, 3.44it/s] 66%|██████▌ | 243456/371472 [8:48:02<10:15:05, 3.47it/s] 66%|██████▌ | 243457/371472 [8:48:03<10:18:24, 3.45it/s] 66%|██████▌ | 243458/371472 [8:48:03<10:28:38, 3.39it/s] 66%|██████▌ | 243459/371472 [8:48:03<10:30:19, 3.38it/s] 66%|██████▌ | 243460/371472 [8:48:04<11:13:54, 3.17it/s] {'loss': 2.6274, 'learning_rate': 4.103137322502101e-07, 'epoch': 10.49} + 66%|██████▌ | 243460/371472 [8:48:04<11:13:54, 3.17it/s] 66%|██████▌ | 243461/371472 [8:48:04<10:50:49, 3.28it/s] 66%|██████▌ | 243462/371472 [8:48:04<10:52:47, 3.27it/s] 66%|██████▌ | 243463/371472 [8:48:04<11:02:22, 3.22it/s] 66%|██████▌ | 243464/371472 [8:48:05<10:56:50, 3.25it/s] 66%|██████▌ | 243465/371472 [8:48:05<10:52:00, 3.27it/s] 66%|██████▌ | 243466/371472 [8:48:05<12:12:05, 2.91it/s] 66%|██████▌ | 243467/371472 [8:48:06<11:49:49, 3.01it/s] 66%|██████▌ | 243468/371472 [8:48:06<12:14:18, 2.91it/s] 66%|██████▌ | 243469/371472 [8:48:06<12:18:12, 2.89it/s] 66%|██████▌ | 243470/371472 [8:48:07<11:34:35, 3.07it/s] 66%|██████▌ | 243471/371472 [8:48:07<12:08:46, 2.93it/s] 66%|██████▌ | 243472/371472 [8:48:07<11:31:52, 3.08it/s] 66%|██████▌ | 243473/371472 [8:48:08<11:31:23, 3.09it/s] 66%|██████▌ | 243474/371472 [8:48:08<11:27:45, 3.10it/s] 66%|██████▌ | 243475/371472 [8:48:08<10:45:02, 3.31it/s] 66%|██████▌ | 243476/371472 [8:48:09<11:04:32, 3.21it/s] 66%|██████▌ | 243477/371472 [8:48:09<11:11:15, 3.18it/s] 66%|██████▌ | 243478/371472 [8:48:09<10:46:51, 3.30it/s] 66%|██████▌ | 243479/371472 [8:48:10<11:21:01, 3.13it/s] 66%|██████▌ | 243480/371472 [8:48:10<11:15:52, 3.16it/s] {'loss': 2.7421, 'learning_rate': 4.102652502747312e-07, 'epoch': 10.49} + 66%|██████▌ | 243480/371472 [8:48:10<11:15:52, 3.16it/s] 66%|██████▌ | 243481/371472 [8:48:10<11:00:29, 3.23it/s] 66%|██████▌ | 243482/371472 [8:48:11<10:49:50, 3.28it/s] 66%|██████▌ | 243483/371472 [8:48:11<11:10:07, 3.18it/s] 66%|██████▌ | 243484/371472 [8:48:11<11:13:23, 3.17it/s] 66%|██████▌ | 243485/371472 [8:48:11<10:52:04, 3.27it/s] 66%|██████▌ | 243486/371472 [8:48:12<10:41:34, 3.32it/s] 66%|██████▌ | 243487/371472 [8:48:12<10:38:36, 3.34it/s] 66%|██████▌ | 243488/371472 [8:48:12<10:28:55, 3.39it/s] 66%|██████▌ | 243489/371472 [8:48:13<10:25:34, 3.41it/s] 66%|██████▌ | 243490/371472 [8:48:13<11:29:41, 3.09it/s] 66%|██████▌ | 243491/371472 [8:48:13<11:00:06, 3.23it/s] 66%|██████▌ | 243492/371472 [8:48:14<11:19:26, 3.14it/s] 66%|██████▌ | 243493/371472 [8:48:14<11:08:07, 3.19it/s] 66%|██████▌ | 243494/371472 [8:48:14<10:49:07, 3.29it/s] 66%|██████▌ | 243495/371472 [8:48:14<10:39:10, 3.34it/s] 66%|██████▌ | 243496/371472 [8:48:15<11:11:51, 3.17it/s] 66%|██████▌ | 243497/371472 [8:48:15<11:03:33, 3.21it/s] 66%|██████▌ | 243498/371472 [8:48:15<10:54:14, 3.26it/s] 66%|██████▌ | 243499/371472 [8:48:16<10:33:56, 3.36it/s] 66%|██████▌ | 243500/371472 [8:48:16<10:18:21, 3.45it/s] {'loss': 2.7093, 'learning_rate': 4.102167682992523e-07, 'epoch': 10.49} + 66%|██████▌ | 243500/371472 [8:48:16<10:18:21, 3.45it/s] 66%|██████▌ | 243501/371472 [8:48:16<11:01:19, 3.23it/s] 66%|██████▌ | 243502/371472 [8:48:17<10:26:29, 3.40it/s] 66%|██████▌ | 243503/371472 [8:48:17<10:21:12, 3.43it/s] 66%|██████▌ | 243504/371472 [8:48:17<10:07:44, 3.51it/s] 66%|██████▌ | 243505/371472 [8:48:17<10:03:57, 3.53it/s] 66%|██████▌ | 243506/371472 [8:48:18<11:37:53, 3.06it/s] 66%|██████▌ | 243507/371472 [8:48:18<11:19:03, 3.14it/s] 66%|██████▌ | 243508/371472 [8:48:18<10:56:15, 3.25it/s] 66%|██████▌ | 243509/371472 [8:48:19<10:42:13, 3.32it/s] 66%|██████▌ | 243510/371472 [8:48:19<11:10:15, 3.18it/s] 66%|██████▌ | 243511/371472 [8:48:19<10:36:41, 3.35it/s] 66%|██████▌ | 243512/371472 [8:48:20<10:35:24, 3.36it/s] 66%|██████▌ | 243513/371472 [8:48:20<10:25:52, 3.41it/s] 66%|██████▌ | 243514/371472 [8:48:20<10:54:58, 3.26it/s] 66%|██████▌ | 243515/371472 [8:48:21<10:54:55, 3.26it/s] 66%|██████▌ | 243516/371472 [8:48:21<11:05:51, 3.20it/s] 66%|██████▌ | 243517/371472 [8:48:21<11:06:27, 3.20it/s] 66%|██████▌ | 243518/371472 [8:48:21<10:46:49, 3.30it/s] 66%|██████▌ | 243519/371472 [8:48:22<10:31:24, 3.38it/s] 66%|██████▌ | 243520/371472 [8:48:22<10:31:39, 3.38it/s] {'loss': 2.65, 'learning_rate': 4.1016828632377337e-07, 'epoch': 10.49} + 66%|██████▌ | 243520/371472 [8:48:22<10:31:39, 3.38it/s] 66%|██████▌ | 243521/371472 [8:48:22<10:36:36, 3.35it/s] 66%|██████▌ | 243522/371472 [8:48:23<10:46:23, 3.30it/s] 66%|██████▌ | 243523/371472 [8:48:23<11:57:46, 2.97it/s] 66%|██████▌ | 243524/371472 [8:48:23<11:15:47, 3.16it/s] 66%|██████▌ | 243525/371472 [8:48:24<10:50:54, 3.28it/s] 66%|██████▌ | 243526/371472 [8:48:24<10:39:41, 3.33it/s] 66%|██████▌ | 243527/371472 [8:48:24<11:15:18, 3.16it/s] 66%|██████▌ | 243528/371472 [8:48:25<10:53:09, 3.26it/s] 66%|██████▌ | 243529/371472 [8:48:25<10:36:57, 3.35it/s] 66%|██████▌ | 243530/371472 [8:48:25<10:30:16, 3.38it/s] 66%|██████▌ | 243531/371472 [8:48:25<10:29:13, 3.39it/s] 66%|██████▌ | 243532/371472 [8:48:26<10:34:16, 3.36it/s] 66%|██████▌ | 243533/371472 [8:48:26<10:31:12, 3.38it/s] 66%|██████▌ | 243534/371472 [8:48:26<10:33:27, 3.37it/s] 66%|██████▌ | 243535/371472 [8:48:27<10:24:11, 3.42it/s] 66%|██████▌ | 243536/371472 [8:48:27<10:23:50, 3.42it/s] 66%|██████▌ | 243537/371472 [8:48:27<10:10:58, 3.49it/s] 66%|██████▌ | 243538/371472 [8:48:27<10:40:17, 3.33it/s] 66%|██████▌ | 243539/371472 [8:48:28<10:21:31, 3.43it/s] 66%|██████▌ | 243540/371472 [8:48:28<10:15:01, 3.47it/s] {'loss': 2.7218, 'learning_rate': 4.1011980434829455e-07, 'epoch': 10.49} + 66%|██████▌ | 243540/371472 [8:48:28<10:15:01, 3.47it/s] 66%|██████▌ | 243541/371472 [8:48:28<10:11:26, 3.49it/s] 66%|██████▌ | 243542/371472 [8:48:29<10:20:26, 3.44it/s] 66%|██████▌ | 243543/371472 [8:48:29<10:17:13, 3.45it/s] 66%|██████▌ | 243544/371472 [8:48:29<10:05:18, 3.52it/s] 66%|██████▌ | 243545/371472 [8:48:29<10:08:48, 3.50it/s] 66%|██████▌ | 243546/371472 [8:48:30<10:35:21, 3.36it/s] 66%|██████▌ | 243547/371472 [8:48:30<10:19:52, 3.44it/s] 66%|██████▌ | 243548/371472 [8:48:30<11:00:10, 3.23it/s] 66%|██████▌ | 243549/371472 [8:48:31<11:34:21, 3.07it/s] 66%|██████▌ | 243550/371472 [8:48:31<11:09:03, 3.19it/s] 66%|██████▌ | 243551/371472 [8:48:31<11:17:38, 3.15it/s] 66%|██████▌ | 243552/371472 [8:48:32<11:19:16, 3.14it/s] 66%|██████▌ | 243553/371472 [8:48:32<11:27:18, 3.10it/s] 66%|██████▌ | 243554/371472 [8:48:32<11:02:42, 3.22it/s] 66%|██████▌ | 243555/371472 [8:48:33<10:41:15, 3.32it/s] 66%|██████▌ | 243556/371472 [8:48:33<10:49:59, 3.28it/s] 66%|██████▌ | 243557/371472 [8:48:33<11:08:03, 3.19it/s] 66%|██████▌ | 243558/371472 [8:48:34<10:59:31, 3.23it/s] 66%|██████▌ | 243559/371472 [8:48:34<10:29:58, 3.38it/s] 66%|██████▌ | 243560/371472 [8:48:34<10:20:29, 3.44it/s] {'loss': 2.8047, 'learning_rate': 4.1007132237281557e-07, 'epoch': 10.49} + 66%|██████▌ | 243560/371472 [8:48:34<10:20:29, 3.44it/s] 66%|██████▌ | 243561/371472 [8:48:34<10:59:37, 3.23it/s] 66%|██████▌ | 243562/371472 [8:48:35<11:39:37, 3.05it/s] 66%|██████▌ | 243563/371472 [8:48:35<12:06:03, 2.94it/s] 66%|██████▌ | 243564/371472 [8:48:36<12:25:10, 2.86it/s] 66%|██████▌ | 243565/371472 [8:48:36<12:22:09, 2.87it/s] 66%|██████▌ | 243566/371472 [8:48:36<11:48:59, 3.01it/s] 66%|██████▌ | 243567/371472 [8:48:37<11:36:23, 3.06it/s] 66%|██████▌ | 243568/371472 [8:48:37<11:04:37, 3.21it/s] 66%|██████▌ | 243569/371472 [8:48:37<10:40:08, 3.33it/s] 66%|██████▌ | 243570/371472 [8:48:37<10:59:38, 3.23it/s] 66%|██████▌ | 243571/371472 [8:48:38<11:00:43, 3.23it/s] 66%|██████▌ | 243572/371472 [8:48:38<11:15:29, 3.16it/s] 66%|██████▌ | 243573/371472 [8:48:38<10:56:02, 3.25it/s] 66%|██████▌ | 243574/371472 [8:48:39<10:28:10, 3.39it/s] 66%|██████▌ | 243575/371472 [8:48:39<10:42:31, 3.32it/s] 66%|██████▌ | 243576/371472 [8:48:39<10:32:07, 3.37it/s] 66%|██████▌ | 243577/371472 [8:48:39<10:17:43, 3.45it/s] 66%|██████▌ | 243578/371472 [8:48:40<10:13:40, 3.47it/s] 66%|██████▌ | 243579/371472 [8:48:40<10:07:00, 3.51it/s] 66%|██████▌ | 243580/371472 [8:48:40<10:19:33, 3.44it/s] {'loss': 2.5298, 'learning_rate': 4.1002284039733674e-07, 'epoch': 10.49} + 66%|██████▌ | 243580/371472 [8:48:40<10:19:33, 3.44it/s] 66%|██████▌ | 243581/371472 [8:48:41<10:20:18, 3.44it/s] 66%|██████▌ | 243582/371472 [8:48:41<10:43:23, 3.31it/s] 66%|██████▌ | 243583/371472 [8:48:41<10:29:49, 3.38it/s] 66%|██████▌ | 243584/371472 [8:48:42<10:19:32, 3.44it/s] 66%|██████▌ | 243585/371472 [8:48:42<11:16:17, 3.15it/s] 66%|██████▌ | 243586/371472 [8:48:42<12:26:24, 2.86it/s] 66%|██████▌ | 243587/371472 [8:48:43<11:41:28, 3.04it/s] 66%|██████▌ | 243588/371472 [8:48:43<11:17:58, 3.14it/s] 66%|██████▌ | 243589/371472 [8:48:43<11:08:37, 3.19it/s] 66%|██████▌ | 243590/371472 [8:48:44<11:24:13, 3.12it/s] 66%|██████▌ | 243591/371472 [8:48:44<11:21:52, 3.13it/s] 66%|██████▌ | 243592/371472 [8:48:44<10:54:34, 3.26it/s] 66%|██████▌ | 243593/371472 [8:48:44<10:32:54, 3.37it/s] 66%|██████▌ | 243594/371472 [8:48:45<10:34:31, 3.36it/s] 66%|██████▌ | 243595/371472 [8:48:45<10:10:28, 3.49it/s] 66%|██████▌ | 243596/371472 [8:48:45<10:31:52, 3.37it/s] 66%|██████▌ | 243597/371472 [8:48:46<10:19:09, 3.44it/s] 66%|██████▌ | 243598/371472 [8:48:46<10:19:54, 3.44it/s] 66%|██████▌ | 243599/371472 [8:48:46<10:22:27, 3.42it/s] 66%|██████▌ | 243600/371472 [8:48:46<10:12:29, 3.48it/s] {'loss': 2.6055, 'learning_rate': 4.099743584218578e-07, 'epoch': 10.49} + 66%|██████▌ | 243600/371472 [8:48:46<10:12:29, 3.48it/s] 66%|██████▌ | 243601/371472 [8:48:47<10:12:55, 3.48it/s] 66%|██████▌ | 243602/371472 [8:48:47<10:05:05, 3.52it/s] 66%|██████▌ | 243603/371472 [8:48:47<10:31:45, 3.37it/s] 66%|██████▌ | 243604/371472 [8:48:48<10:32:44, 3.37it/s] 66%|██████▌ | 243605/371472 [8:48:48<10:26:10, 3.40it/s] 66%|██████▌ | 243606/371472 [8:48:48<10:25:08, 3.41it/s] 66%|██████▌ | 243607/371472 [8:48:49<11:05:28, 3.20it/s] 66%|██████▌ | 243608/371472 [8:48:49<10:54:20, 3.26it/s] 66%|██████▌ | 243609/371472 [8:48:49<10:42:00, 3.32it/s] 66%|██████▌ | 243610/371472 [8:48:49<10:31:18, 3.38it/s] 66%|██████▌ | 243611/371472 [8:48:50<10:29:33, 3.38it/s] 66%|██████▌ | 243612/371472 [8:48:50<10:40:59, 3.32it/s] 66%|██████▌ | 243613/371472 [8:48:50<10:48:42, 3.28it/s] 66%|██████▌ | 243614/371472 [8:48:51<10:26:30, 3.40it/s] 66%|██████▌ | 243615/371472 [8:48:51<10:08:39, 3.50it/s] 66%|██████▌ | 243616/371472 [8:48:51<10:30:53, 3.38it/s] 66%|██████▌ | 243617/371472 [8:48:51<10:15:38, 3.46it/s] 66%|██████▌ | 243618/371472 [8:48:52<10:04:53, 3.52it/s] 66%|██████▌ | 243619/371472 [8:48:52<9:51:44, 3.60it/s] 66%|██████▌ | 243620/371472 [8:48:52<10:59:01, 3.23it/s] {'loss': 2.6417, 'learning_rate': 4.0992587644637894e-07, 'epoch': 10.49} + 66%|██████▌ | 243620/371472 [8:48:52<10:59:01, 3.23it/s] 66%|██████▌ | 243621/371472 [8:48:53<10:45:11, 3.30it/s] 66%|██████▌ | 243622/371472 [8:48:53<10:36:40, 3.35it/s] 66%|██████▌ | 243623/371472 [8:48:53<10:29:25, 3.39it/s] 66%|██████▌ | 243624/371472 [8:48:54<10:25:02, 3.41it/s] 66%|██████▌ | 243625/371472 [8:48:54<10:24:25, 3.41it/s] 66%|██████▌ | 243626/371472 [8:48:54<10:54:10, 3.26it/s] 66%|██████▌ | 243627/371472 [8:48:54<11:02:49, 3.21it/s] 66%|██████▌ | 243628/371472 [8:48:55<10:47:27, 3.29it/s] 66%|██████▌ | 243629/371472 [8:48:55<10:29:34, 3.38it/s] 66%|██████▌ | 243630/371472 [8:48:55<10:51:36, 3.27it/s] 66%|██████▌ | 243631/371472 [8:48:56<10:49:13, 3.28it/s] 66%|██████▌ | 243632/371472 [8:48:56<10:34:52, 3.36it/s] 66%|██████▌ | 243633/371472 [8:48:56<11:10:12, 3.18it/s] 66%|██████▌ | 243634/371472 [8:48:57<10:51:27, 3.27it/s] 66%|██████▌ | 243635/371472 [8:48:57<11:07:34, 3.19it/s] 66%|██████▌ | 243636/371472 [8:48:57<10:45:21, 3.30it/s] 66%|██████▌ | 243637/371472 [8:48:58<11:02:51, 3.21it/s] 66%|██████▌ | 243638/371472 [8:48:58<11:02:49, 3.21it/s] 66%|██████▌ | 243639/371472 [8:48:58<11:05:42, 3.20it/s] 66%|██████▌ | 243640/371472 [8:48:58<10:41:00, 3.32it/s] {'loss': 2.6597, 'learning_rate': 4.098773944709e-07, 'epoch': 10.49} + 66%|██████▌ | 243640/371472 [8:48:58<10:41:00, 3.32it/s] 66%|██████▌ | 243641/371472 [8:48:59<10:18:18, 3.45it/s] 66%|██████▌ | 243642/371472 [8:48:59<11:27:27, 3.10it/s] 66%|██████▌ | 243643/371472 [8:48:59<11:02:05, 3.22it/s] 66%|██████▌ | 243644/371472 [8:49:00<11:20:39, 3.13it/s] 66%|██████▌ | 243645/371472 [8:49:00<11:24:28, 3.11it/s] 66%|██████▌ | 243646/371472 [8:49:00<11:17:10, 3.15it/s] 66%|██████▌ | 243647/371472 [8:49:01<12:06:36, 2.93it/s] 66%|██████▌ | 243648/371472 [8:49:01<11:33:45, 3.07it/s] 66%|██████▌ | 243649/371472 [8:49:01<11:53:24, 2.99it/s] 66%|██████▌ | 243650/371472 [8:49:02<11:32:57, 3.07it/s] 66%|██████▌ | 243651/371472 [8:49:02<11:03:19, 3.21it/s] 66%|██████▌ | 243652/371472 [8:49:02<10:45:11, 3.30it/s] 66%|██████▌ | 243653/371472 [8:49:03<11:49:47, 3.00it/s] 66%|██████▌ | 243654/371472 [8:49:03<11:18:09, 3.14it/s] 66%|██████▌ | 243655/371472 [8:49:03<10:51:03, 3.27it/s] 66%|██████▌ | 243656/371472 [8:49:04<10:35:39, 3.35it/s] 66%|██████▌ | 243657/371472 [8:49:04<11:02:05, 3.22it/s] 66%|██████▌ | 243658/371472 [8:49:04<10:32:36, 3.37it/s] 66%|██████▌ | 243659/371472 [8:49:04<10:25:04, 3.41it/s] 66%|██████▌ | 243660/371472 [8:49:05<10:38:18, 3.34it/s] {'loss': 2.6881, 'learning_rate': 4.098289124954212e-07, 'epoch': 10.49} + 66%|██████▌ | 243660/371472 [8:49:05<10:38:18, 3.34it/s] 66%|██████▌ | 243661/371472 [8:49:05<10:54:49, 3.25it/s] 66%|██████▌ | 243662/371472 [8:49:05<10:38:51, 3.33it/s] 66%|██████▌ | 243663/371472 [8:49:06<11:04:09, 3.21it/s] 66%|██████▌ | 243664/371472 [8:49:06<11:00:38, 3.22it/s] 66%|██████▌ | 243665/371472 [8:49:06<10:56:43, 3.24it/s] 66%|██████▌ | 243666/371472 [8:49:07<11:08:30, 3.19it/s] 66%|██████▌ | 243667/371472 [8:49:07<10:46:32, 3.29it/s] 66%|██████▌ | 243668/371472 [8:49:07<10:30:57, 3.38it/s] 66%|██████▌ | 243669/371472 [8:49:07<10:31:11, 3.37it/s] 66%|██████▌ | 243670/371472 [8:49:08<11:49:41, 3.00it/s] 66%|██████▌ | 243671/371472 [8:49:08<11:39:17, 3.05it/s] 66%|██████▌ | 243672/371472 [8:49:08<10:59:37, 3.23it/s] 66%|██████▌ | 243673/371472 [8:49:09<11:07:21, 3.19it/s] 66%|██████▌ | 243674/371472 [8:49:09<11:19:26, 3.13it/s] 66%|██████▌ | 243675/371472 [8:49:09<11:05:32, 3.20it/s] 66%|██████▌ | 243676/371472 [8:49:10<10:38:48, 3.33it/s] 66%|██████▌ | 243677/371472 [8:49:10<10:30:04, 3.38it/s] 66%|██████▌ | 243678/371472 [8:49:10<10:14:03, 3.47it/s] 66%|██████▌ | 243679/371472 [8:49:11<10:08:10, 3.50it/s] 66%|██████▌ | 243680/371472 [8:49:11<10:02:14, 3.54it/s] {'loss': 2.7074, 'learning_rate': 4.097804305199422e-07, 'epoch': 10.5} + 66%|██████▌ | 243680/371472 [8:49:11<10:02:14, 3.54it/s] 66%|██████▌ | 243681/371472 [8:49:11<9:59:57, 3.55it/s] 66%|██████▌ | 243682/371472 [8:49:11<10:11:12, 3.48it/s] 66%|██████▌ | 243683/371472 [8:49:12<10:04:24, 3.52it/s] 66%|██████▌ | 243684/371472 [8:49:12<9:51:04, 3.60it/s] 66%|██████▌ | 243685/371472 [8:49:12<10:15:34, 3.46it/s] 66%|██████▌ | 243686/371472 [8:49:13<10:32:37, 3.37it/s] 66%|██████▌ | 243687/371472 [8:49:13<10:15:56, 3.46it/s] 66%|██████▌ | 243688/371472 [8:49:13<10:20:41, 3.43it/s] 66%|██████▌ | 243689/371472 [8:49:13<10:49:13, 3.28it/s] 66%|██████▌ | 243690/371472 [8:49:14<10:30:35, 3.38it/s] 66%|██████▌ | 243691/371472 [8:49:14<10:45:50, 3.30it/s] 66%|██████▌ | 243692/371472 [8:49:14<11:01:36, 3.22it/s] 66%|██████▌ | 243693/371472 [8:49:15<10:42:58, 3.31it/s] 66%|██████▌ | 243694/371472 [8:49:15<11:17:59, 3.14it/s] 66%|██████▌ | 243695/371472 [8:49:15<10:50:59, 3.27it/s] 66%|██████▌ | 243696/371472 [8:49:16<10:51:32, 3.27it/s] 66%|██████▌ | 243697/371472 [8:49:16<10:28:38, 3.39it/s] 66%|██████▌ | 243698/371472 [8:49:16<11:39:58, 3.04it/s] 66%|██████▌ | 243699/371472 [8:49:17<12:03:19, 2.94it/s] 66%|██████▌ | 243700/371472 [8:49:17<11:27:21, 3.10it/s] {'loss': 2.6582, 'learning_rate': 4.097319485444633e-07, 'epoch': 10.5} + 66%|██████▌ | 243700/371472 [8:49:17<11:27:21, 3.10it/s] 66%|██████▌ | 243701/371472 [8:49:17<10:51:16, 3.27it/s] 66%|██████▌ | 243702/371472 [8:49:18<10:53:17, 3.26it/s] 66%|██████▌ | 243703/371472 [8:49:18<10:29:26, 3.38it/s] 66%|██████▌ | 243704/371472 [8:49:18<10:35:05, 3.35it/s] 66%|██████▌ | 243705/371472 [8:49:18<10:30:52, 3.38it/s] 66%|██████▌ | 243706/371472 [8:49:19<10:32:01, 3.37it/s] 66%|██████▌ | 243707/371472 [8:49:19<11:00:41, 3.22it/s] 66%|██████▌ | 243708/371472 [8:49:19<10:47:04, 3.29it/s] 66%|██████▌ | 243709/371472 [8:49:20<10:36:49, 3.34it/s] 66%|██████▌ | 243710/371472 [8:49:20<10:35:35, 3.35it/s] 66%|██████▌ | 243711/371472 [8:49:20<10:44:18, 3.30it/s] 66%|██████▌ | 243712/371472 [8:49:20<10:44:10, 3.31it/s] 66%|██████▌ | 243713/371472 [8:49:21<10:30:46, 3.38it/s] 66%|██████▌ | 243714/371472 [8:49:21<10:16:42, 3.45it/s] 66%|██████▌ | 243715/371472 [8:49:21<11:13:47, 3.16it/s] 66%|██████▌ | 243716/371472 [8:49:22<10:47:49, 3.29it/s] 66%|██████▌ | 243717/371472 [8:49:22<10:44:34, 3.30it/s] 66%|██████▌ | 243718/371472 [8:49:22<10:27:29, 3.39it/s] 66%|██████▌ | 243719/371472 [8:49:23<11:16:48, 3.15it/s] 66%|██████▌ | 243720/371472 [8:49:23<11:02:22, 3.21it/s] {'loss': 2.6949, 'learning_rate': 4.0968346656898446e-07, 'epoch': 10.5} + 66%|██████▌ | 243720/371472 [8:49:23<11:02:22, 3.21it/s] 66%|██████▌ | 243721/371472 [8:49:23<10:43:07, 3.31it/s] 66%|██████▌ | 243722/371472 [8:49:24<10:28:09, 3.39it/s] 66%|██████▌ | 243723/371472 [8:49:24<10:27:50, 3.39it/s] 66%|██████▌ | 243724/371472 [8:49:24<10:18:20, 3.44it/s] 66%|██████�� | 243725/371472 [8:49:24<10:05:03, 3.52it/s] 66%|██████▌ | 243726/371472 [8:49:25<10:10:06, 3.49it/s] 66%|██████▌ | 243727/371472 [8:49:25<10:14:44, 3.46it/s] 66%|██████▌ | 243728/371472 [8:49:25<10:11:32, 3.48it/s] 66%|██████▌ | 243729/371472 [8:49:26<10:25:00, 3.41it/s] 66%|██████▌ | 243730/371472 [8:49:26<10:52:44, 3.26it/s] 66%|██████▌ | 243731/371472 [8:49:26<10:19:19, 3.44it/s] 66%|██████▌ | 243732/371472 [8:49:26<10:34:59, 3.35it/s] 66%|██████▌ | 243733/371472 [8:49:27<10:34:04, 3.36it/s] 66%|██████▌ | 243734/371472 [8:49:27<10:40:00, 3.33it/s] 66%|██████▌ | 243735/371472 [8:49:27<11:40:32, 3.04it/s] 66%|██████▌ | 243736/371472 [8:49:28<11:20:48, 3.13it/s] 66%|██████▌ | 243737/371472 [8:49:28<11:14:25, 3.16it/s] 66%|██████▌ | 243738/371472 [8:49:28<11:15:45, 3.15it/s] 66%|██████▌ | 243739/371472 [8:49:29<10:43:24, 3.31it/s] 66%|██████▌ | 243740/371472 [8:49:29<10:35:11, 3.35it/s] {'loss': 2.6477, 'learning_rate': 4.0963498459350553e-07, 'epoch': 10.5} + 66%|██████▌ | 243740/371472 [8:49:29<10:35:11, 3.35it/s] 66%|██████▌ | 243741/371472 [8:49:29<10:37:18, 3.34it/s] 66%|██████▌ | 243742/371472 [8:49:30<10:27:10, 3.39it/s] 66%|██████▌ | 243743/371472 [8:49:30<10:43:08, 3.31it/s] 66%|██████▌ | 243744/371472 [8:49:30<11:20:39, 3.13it/s] 66%|██████▌ | 243745/371472 [8:49:31<11:44:53, 3.02it/s] 66%|██████▌ | 243746/371472 [8:49:31<11:07:36, 3.19it/s] 66%|██████▌ | 243747/371472 [8:49:31<11:03:23, 3.21it/s] 66%|██████▌ | 243748/371472 [8:49:31<10:43:10, 3.31it/s] 66%|██████▌ | 243749/371472 [8:49:32<10:46:27, 3.29it/s] 66%|██████▌ | 243750/371472 [8:49:32<11:07:54, 3.19it/s] 66%|██████▌ | 243751/371472 [8:49:32<10:48:08, 3.28it/s] 66%|██████▌ | 243752/371472 [8:49:33<10:35:09, 3.35it/s] 66%|██████▌ | 243753/371472 [8:49:33<10:58:47, 3.23it/s] 66%|██████▌ | 243754/371472 [8:49:33<10:48:59, 3.28it/s] 66%|██████▌ | 243755/371472 [8:49:34<10:30:07, 3.38it/s] 66%|██████▌ | 243756/371472 [8:49:34<10:22:40, 3.42it/s] 66%|██████▌ | 243757/371472 [8:49:34<10:14:18, 3.46it/s] 66%|██████▌ | 243758/371472 [8:49:34<10:20:00, 3.43it/s] 66%|██████▌ | 243759/371472 [8:49:35<10:07:33, 3.50it/s] 66%|██████▌ | 243760/371472 [8:49:35<9:59:45, 3.55it/s] {'loss': 2.6339, 'learning_rate': 4.0958650261802665e-07, 'epoch': 10.5} + 66%|██████▌ | 243760/371472 [8:49:35<9:59:45, 3.55it/s] 66%|██████▌ | 243761/371472 [8:49:35<9:59:18, 3.55it/s] 66%|██████▌ | 243762/371472 [8:49:36<10:31:50, 3.37it/s] 66%|██████▌ | 243763/371472 [8:49:36<10:20:29, 3.43it/s] 66%|██████▌ | 243764/371472 [8:49:36<10:32:45, 3.36it/s] 66%|██████▌ | 243765/371472 [8:49:36<10:37:12, 3.34it/s] 66%|██████▌ | 243766/371472 [8:49:37<10:26:55, 3.40it/s] 66%|██████▌ | 243767/371472 [8:49:37<10:28:35, 3.39it/s] 66%|██████▌ | 243768/371472 [8:49:37<10:58:41, 3.23it/s] 66%|██████▌ | 243769/371472 [8:49:38<11:05:15, 3.20it/s] 66%|██████▌ | 243770/371472 [8:49:38<10:53:44, 3.26it/s] 66%|██████▌ | 243771/371472 [8:49:38<11:20:42, 3.13it/s] 66%|██████▌ | 243772/371472 [8:49:39<10:47:41, 3.29it/s] 66%|██████▌ | 243773/371472 [8:49:39<10:41:45, 3.32it/s] 66%|██████▌ | 243774/371472 [8:49:39<10:38:29, 3.33it/s] 66%|██████▌ | 243775/371472 [8:49:39<10:25:34, 3.40it/s] 66%|██████▌ | 243776/371472 [8:49:40<11:00:02, 3.22it/s] 66%|██████▌ | 243777/371472 [8:49:40<10:43:19, 3.31it/s] 66%|██████▌ | 243778/371472 [8:49:40<10:52:06, 3.26it/s] 66%|██████▌ | 243779/371472 [8:49:41<10:37:21, 3.34it/s] 66%|██████▌ | 243780/371472 [8:49:41<10:41:05, 3.32it/s] {'loss': 2.9636, 'learning_rate': 4.095380206425477e-07, 'epoch': 10.5} + 66%|██████▌ | 243780/371472 [8:49:41<10:41:05, 3.32it/s] 66%|██████▌ | 243781/371472 [8:49:41<10:37:12, 3.34it/s] 66%|██████▌ | 243782/371472 [8:49:42<10:56:45, 3.24it/s] 66%|██████▌ | 243783/371472 [8:49:42<10:55:58, 3.24it/s] 66%|██████▌ | 243784/371472 [8:49:42<11:01:16, 3.22it/s] 66%|███��██▌ | 243785/371472 [8:49:43<10:38:28, 3.33it/s] 66%|██████▌ | 243786/371472 [8:49:43<10:31:02, 3.37it/s] 66%|██████▌ | 243787/371472 [8:49:43<10:24:06, 3.41it/s] 66%|██████▌ | 243788/371472 [8:49:43<10:14:08, 3.47it/s] 66%|██████▌ | 243789/371472 [8:49:44<10:07:44, 3.50it/s] 66%|██████▌ | 243790/371472 [8:49:44<10:08:37, 3.50it/s] 66%|██████▌ | 243791/371472 [8:49:44<10:14:44, 3.46it/s] 66%|██████▌ | 243792/371472 [8:49:45<10:38:36, 3.33it/s] 66%|██████▌ | 243793/371472 [8:49:45<10:46:19, 3.29it/s] 66%|██████▌ | 243794/371472 [8:49:45<10:56:21, 3.24it/s] 66%|██████▌ | 243795/371472 [8:49:45<10:34:34, 3.35it/s] 66%|██████▌ | 243796/371472 [8:49:46<10:41:23, 3.32it/s] 66%|██████▌ | 243797/371472 [8:49:46<10:32:01, 3.37it/s] 66%|██████▌ | 243798/371472 [8:49:46<10:12:54, 3.47it/s] 66%|██████▌ | 243799/371472 [8:49:47<10:11:38, 3.48it/s] 66%|██████▌ | 243800/371472 [8:49:47<10:16:28, 3.45it/s] {'loss': 2.6991, 'learning_rate': 4.094895386670689e-07, 'epoch': 10.5} + 66%|██████▌ | 243800/371472 [8:49:47<10:16:28, 3.45it/s] 66%|██████▌ | 243801/371472 [8:49:47<11:01:32, 3.22it/s] 66%|██████▌ | 243802/371472 [8:49:48<10:54:50, 3.25it/s] 66%|██████▌ | 243803/371472 [8:49:48<10:34:31, 3.35it/s] 66%|██████▌ | 243804/371472 [8:49:48<10:17:55, 3.44it/s] 66%|██████▌ | 243805/371472 [8:49:48<10:23:10, 3.41it/s] 66%|██████▌ | 243806/371472 [8:49:49<10:19:20, 3.44it/s] 66%|██████▌ | 243807/371472 [8:49:49<10:08:40, 3.50it/s] 66%|██████▌ | 243808/371472 [8:49:49<10:16:11, 3.45it/s] 66%|██████▌ | 243809/371472 [8:49:50<10:39:09, 3.33it/s] 66%|██████▌ | 243810/371472 [8:49:50<10:17:24, 3.45it/s] 66%|██████▌ | 243811/371472 [8:49:50<10:39:36, 3.33it/s] 66%|██████▌ | 243812/371472 [8:49:50<10:29:31, 3.38it/s] 66%|██████▌ | 243813/371472 [8:49:51<10:29:38, 3.38it/s] 66%|██████▌ | 243814/371472 [8:49:51<10:19:17, 3.44it/s] 66%|██████▌ | 243815/371472 [8:49:51<10:21:12, 3.42it/s] 66%|██████▌ | 243816/371472 [8:49:52<10:37:41, 3.34it/s] 66%|██████▌ | 243817/371472 [8:49:52<11:00:29, 3.22it/s] 66%|██████▌ | 243818/371472 [8:49:52<11:24:01, 3.11it/s] 66%|██████▌ | 243819/371472 [8:49:53<11:32:52, 3.07it/s] 66%|██████▌ | 243820/371472 [8:49:53<11:06:00, 3.19it/s] {'loss': 2.7597, 'learning_rate': 4.094410566915899e-07, 'epoch': 10.5} + 66%|██████▌ | 243820/371472 [8:49:53<11:06:00, 3.19it/s] 66%|██████▌ | 243821/371472 [8:49:53<10:38:06, 3.33it/s] 66%|██████▌ | 243822/371472 [8:49:54<10:21:50, 3.42it/s] 66%|██████▌ | 243823/371472 [8:49:54<10:20:52, 3.43it/s] 66%|██████▌ | 243824/371472 [8:49:54<10:05:04, 3.52it/s] 66%|██████▌ | 243825/371472 [8:49:54<10:05:47, 3.51it/s] 66%|██████▌ | 243826/371472 [8:49:55<10:05:25, 3.51it/s] 66%|██████▌ | 243827/371472 [8:49:55<10:12:36, 3.47it/s] 66%|██████▌ | 243828/371472 [8:49:55<10:04:43, 3.52it/s] 66%|██████▌ | 243829/371472 [8:49:55<10:01:17, 3.54it/s] 66%|██████▌ | 243830/371472 [8:49:56<11:44:04, 3.02it/s] 66%|██████▌ | 243831/371472 [8:49:56<11:14:50, 3.15it/s] 66%|██████▌ | 243832/371472 [8:49:57<10:53:46, 3.25it/s] 66%|██████▌ | 243833/371472 [8:49:57<10:55:24, 3.25it/s] 66%|██████▌ | 243834/371472 [8:49:57<11:06:43, 3.19it/s] 66%|██████▌ | 243835/371472 [8:49:57<11:02:02, 3.21it/s] 66%|██████▌ | 243836/371472 [8:49:58<10:45:09, 3.30it/s] 66%|██████▌ | 243837/371472 [8:49:58<10:38:57, 3.33it/s] 66%|██████▌ | 243838/371472 [8:49:58<10:29:06, 3.38it/s] 66%|██████▌ | 243839/371472 [8:49:59<10:10:34, 3.48it/s] 66%|██████▌ | 243840/371472 [8:49:59<10:38:08, 3.33it/s] {'loss': 2.7381, 'learning_rate': 4.093925747161111e-07, 'epoch': 10.5} + 66%|██████▌ | 243840/371472 [8:49:59<10:38:08, 3.33it/s] 66%|██████▌ | 243841/371472 [8:49:59<10:49:10, 3.28it/s] 66%|██████▌ | 243842/371472 [8:50:00<10:40:33, 3.32it/s] 66%|██████▌ | 243843/371472 [8:50:00<10:33:52, 3.36it/s] 66%|██████▌ | 243844/371472 [8:50:00<10:39:24, 3.33it/s] 66%|���█████▌ | 243845/371472 [8:50:00<10:27:01, 3.39it/s] 66%|██████▌ | 243846/371472 [8:50:01<10:51:21, 3.27it/s] 66%|██████▌ | 243847/371472 [8:50:01<10:32:34, 3.36it/s] 66%|██████▌ | 243848/371472 [8:50:01<10:37:50, 3.33it/s] 66%|██████▌ | 243849/371472 [8:50:02<11:10:01, 3.17it/s] 66%|██████▌ | 243850/371472 [8:50:02<11:20:28, 3.13it/s] 66%|██████▌ | 243851/371472 [8:50:02<10:49:29, 3.27it/s] 66%|██████▌ | 243852/371472 [8:50:03<11:06:22, 3.19it/s] 66%|██████▌ | 243853/371472 [8:50:03<11:03:11, 3.21it/s] 66%|██████▌ | 243854/371472 [8:50:03<10:50:58, 3.27it/s] 66%|██████▌ | 243855/371472 [8:50:03<10:31:50, 3.37it/s] 66%|██████▌ | 243856/371472 [8:50:04<10:15:53, 3.45it/s] 66%|██████▌ | 243857/371472 [8:50:04<10:17:53, 3.44it/s] 66%|██████▌ | 243858/371472 [8:50:04<10:10:25, 3.48it/s] 66%|██████▌ | 243859/371472 [8:50:05<10:12:23, 3.47it/s] 66%|██████▌ | 243860/371472 [8:50:05<10:44:39, 3.30it/s] {'loss': 2.7609, 'learning_rate': 4.093440927406321e-07, 'epoch': 10.5} + 66%|██████▌ | 243860/371472 [8:50:05<10:44:39, 3.30it/s] 66%|██████▌ | 243861/371472 [8:50:05<11:02:36, 3.21it/s] 66%|██████▌ | 243862/371472 [8:50:06<10:46:31, 3.29it/s] 66%|██████▌ | 243863/371472 [8:50:06<10:43:29, 3.31it/s] 66%|██████▌ | 243864/371472 [8:50:06<10:46:53, 3.29it/s] 66%|██████▌ | 243865/371472 [8:50:06<10:45:52, 3.29it/s] 66%|██████▌ | 243866/371472 [8:50:07<10:43:20, 3.31it/s] 66%|██████▌ | 243867/371472 [8:50:07<10:59:01, 3.23it/s] 66%|██████▌ | 243868/371472 [8:50:07<11:12:11, 3.16it/s] 66%|██████▌ | 243869/371472 [8:50:08<10:42:46, 3.31it/s] 66%|██████▌ | 243870/371472 [8:50:08<10:26:18, 3.40it/s] 66%|██████▌ | 243871/371472 [8:50:08<10:46:58, 3.29it/s] 66%|██████▌ | 243872/371472 [8:50:09<11:06:32, 3.19it/s] 66%|██████▌ | 243873/371472 [8:50:09<10:48:09, 3.28it/s] 66%|██████▌ | 243874/371472 [8:50:09<10:49:39, 3.27it/s] 66%|██████▌ | 243875/371472 [8:50:10<10:34:08, 3.35it/s] 66%|██████▌ | 243876/371472 [8:50:10<10:52:05, 3.26it/s] 66%|██████▌ | 243877/371472 [8:50:10<12:01:04, 2.95it/s] 66%|██████▌ | 243878/371472 [8:50:11<12:52:30, 2.75it/s] 66%|██████▌ | 243879/371472 [8:50:11<12:13:51, 2.90it/s] 66%|██████▌ | 243880/371472 [8:50:11<11:33:20, 3.07it/s] {'loss': 2.6154, 'learning_rate': 4.092956107651533e-07, 'epoch': 10.5} + 66%|██████▌ | 243880/371472 [8:50:11<11:33:20, 3.07it/s] 66%|██████▌ | 243881/371472 [8:50:12<10:59:19, 3.23it/s] 66%|██████▌ | 243882/371472 [8:50:12<11:01:53, 3.21it/s] 66%|██████▌ | 243883/371472 [8:50:12<11:23:46, 3.11it/s] 66%|██████▌ | 243884/371472 [8:50:13<12:12:04, 2.90it/s] 66%|██████▌ | 243885/371472 [8:50:13<11:18:19, 3.13it/s] 66%|██████▌ | 243886/371472 [8:50:13<11:14:40, 3.15it/s] 66%|██████▌ | 243887/371472 [8:50:14<11:37:28, 3.05it/s] 66%|██████▌ | 243888/371472 [8:50:14<11:47:17, 3.01it/s] 66%|██████▌ | 243889/371472 [8:50:14<11:24:27, 3.11it/s] 66%|██████▌ | 243890/371472 [8:50:14<11:03:35, 3.20it/s] 66%|██████▌ | 243891/371472 [8:50:15<10:40:07, 3.32it/s] 66%|██████▌ | 243892/371472 [8:50:15<10:27:53, 3.39it/s] 66%|██████▌ | 243893/371472 [8:50:15<10:04:41, 3.52it/s] 66%|██████▌ | 243894/371472 [8:50:16<10:04:54, 3.52it/s] 66%|██████▌ | 243895/371472 [8:50:16<10:23:25, 3.41it/s] 66%|██████▌ | 243896/371472 [8:50:16<11:21:47, 3.12it/s] 66%|██████▌ | 243897/371472 [8:50:17<10:51:27, 3.26it/s] 66%|██████▌ | 243898/371472 [8:50:17<10:36:48, 3.34it/s] 66%|██████▌ | 243899/371472 [8:50:17<11:16:50, 3.14it/s] 66%|██████▌ | 243900/371472 [8:50:17<10:53:50, 3.25it/s] {'loss': 2.6193, 'learning_rate': 4.0924712878967436e-07, 'epoch': 10.51} + 66%|██████▌ | 243900/371472 [8:50:17<10:53:50, 3.25it/s] 66%|██████▌ | 243901/371472 [8:50:18<10:39:55, 3.32it/s] 66%|██████▌ | 243902/371472 [8:50:18<10:13:34, 3.47it/s] 66%|██████▌ | 243903/371472 [8:50:18<10:56:58, 3.24it/s] 66%|██████▌ | 243904/371472 [8:50:19<10:31:40, 3.37it/s] 66%|██████▌ | 243905/371472 [8:50:19<10:37:01, 3.34it/s] 66%|██████▌ | 243906/371472 [8:50:19<10:26:52, 3.39it/s] 66%|██████▌ | 243907/371472 [8:50:19<10:21:54, 3.42it/s] 66%|██████▌ | 243908/371472 [8:50:20<10:28:10, 3.38it/s] 66%|██████▌ | 243909/371472 [8:50:20<10:50:05, 3.27it/s] 66%|██████▌ | 243910/371472 [8:50:20<10:35:29, 3.35it/s] 66%|██████▌ | 243911/371472 [8:50:21<10:48:55, 3.28it/s] 66%|██████▌ | 243912/371472 [8:50:21<10:48:25, 3.28it/s] 66%|██████▌ | 243913/371472 [8:50:21<10:47:28, 3.28it/s] 66%|██████▌ | 243914/371472 [8:50:22<10:20:43, 3.43it/s] 66%|██████▌ | 243915/371472 [8:50:22<10:13:20, 3.47it/s] 66%|██████▌ | 243916/371472 [8:50:22<10:34:15, 3.35it/s] 66%|██████▌ | 243917/371472 [8:50:22<10:18:25, 3.44it/s] 66%|██████▌ | 243918/371472 [8:50:23<10:23:22, 3.41it/s] 66%|██████▌ | 243919/371472 [8:50:23<10:09:16, 3.49it/s] 66%|██████▌ | 243920/371472 [8:50:23<10:03:41, 3.52it/s] {'loss': 2.6014, 'learning_rate': 4.0919864681419554e-07, 'epoch': 10.51} + 66%|██████▌ | 243920/371472 [8:50:23<10:03:41, 3.52it/s] 66%|██████▌ | 243921/371472 [8:50:24<10:04:16, 3.52it/s] 66%|██████▌ | 243922/371472 [8:50:24<10:27:38, 3.39it/s] 66%|██████▌ | 243923/371472 [8:50:24<10:20:12, 3.43it/s] 66%|██████▌ | 243924/371472 [8:50:24<10:01:38, 3.53it/s] 66%|██████▌ | 243925/371472 [8:50:25<10:18:26, 3.44it/s] 66%|██████▌ | 243926/371472 [8:50:25<11:03:27, 3.20it/s] 66%|██████▌ | 243927/371472 [8:50:25<10:50:59, 3.27it/s] 66%|██████▌ | 243928/371472 [8:50:26<10:33:34, 3.36it/s] 66%|██████▌ | 243929/371472 [8:50:26<10:55:01, 3.25it/s] 66%|██████▌ | 243930/371472 [8:50:26<10:38:41, 3.33it/s] 66%|██████▌ | 243931/371472 [8:50:27<10:26:18, 3.39it/s] 66%|██████▌ | 243932/371472 [8:50:27<10:19:24, 3.43it/s] 66%|██████▌ | 243933/371472 [8:50:27<10:20:43, 3.42it/s] 66%|██████▌ | 243934/371472 [8:50:27<10:25:06, 3.40it/s] 66%|██████▌ | 243935/371472 [8:50:28<10:15:37, 3.45it/s] 66%|██████▌ | 243936/371472 [8:50:28<10:16:05, 3.45it/s] 66%|██████▌ | 243937/371472 [8:50:28<11:55:18, 2.97it/s] 66%|██████▌ | 243938/371472 [8:50:29<11:30:21, 3.08it/s] 66%|██████▌ | 243939/371472 [8:50:29<11:06:07, 3.19it/s] 66%|██████▌ | 243940/371472 [8:50:29<11:17:36, 3.14it/s] {'loss': 2.7556, 'learning_rate': 4.0915016483871656e-07, 'epoch': 10.51} + 66%|██████▌ | 243940/371472 [8:50:29<11:17:36, 3.14it/s] 66%|██████▌ | 243941/371472 [8:50:30<10:57:18, 3.23it/s] 66%|██████▌ | 243942/371472 [8:50:30<10:56:08, 3.24it/s] 66%|██████▌ | 243943/371472 [8:50:30<10:46:16, 3.29it/s] 66%|██████▌ | 243944/371472 [8:50:31<10:44:07, 3.30it/s] 66%|██████▌ | 243945/371472 [8:50:31<10:24:08, 3.41it/s] 66%|██████▌ | 243946/371472 [8:50:31<11:06:05, 3.19it/s] 66%|██████▌ | 243947/371472 [8:50:32<10:45:35, 3.29it/s] 66%|██████▌ | 243948/371472 [8:50:32<10:28:47, 3.38it/s] 66%|██████▌ | 243949/371472 [8:50:32<10:20:24, 3.43it/s] 66%|██████▌ | 243950/371472 [8:50:32<9:58:21, 3.55it/s] 66%|██████▌ | 243951/371472 [8:50:33<10:01:49, 3.53it/s] 66%|██████▌ | 243952/371472 [8:50:33<10:02:49, 3.53it/s] 66%|██████▌ | 243953/371472 [8:50:33<10:23:16, 3.41it/s] 66%|██████▌ | 243954/371472 [8:50:33<10:03:46, 3.52it/s] 66%|██████▌ | 243955/371472 [8:50:34<10:30:55, 3.37it/s] 66%|██████▌ | 243956/371472 [8:50:34<10:44:50, 3.30it/s] 66%|██████▌ | 243957/371472 [8:50:34<10:42:36, 3.31it/s] 66%|██████▌ | 243958/371472 [8:50:35<10:37:53, 3.33it/s] 66%|██████▌ | 243959/371472 [8:50:35<10:33:14, 3.36it/s] 66%|██████▌ | 243960/371472 [8:50:35<10:13:40, 3.46it/s] {'loss': 2.7253, 'learning_rate': 4.0910168286323774e-07, 'epoch': 10.51} + 66%|██████▌ | 243960/371472 [8:50:35<10:13:40, 3.46it/s] 66%|██████▌ | 243961/371472 [8:50:36<9:56:21, 3.56it/s] 66%|██████▌ | 243962/371472 [8:50:36<9:59:03, 3.55it/s] 66%|██████▌ | 243963/371472 [8:50:36<9:50:54, 3.60it/s] 66%|██████▌ | 243964/371472 [8:50:36<10:02:28, 3.53it/s] 66%|██████▌ | 243965/371472 [8:50:37<10:00:38, 3.54it/s] 66%|██████▌ | 243966/371472 [8:50:37<9:57:52, 3.55it/s] 66%|██████▌ | 243967/371472 [8:50:37<9:58:55, 3.55it/s] 66%|██████▌ | 243968/371472 [8:50:38<10:20:36, 3.42it/s] 66%|██████▌ | 243969/371472 [8:50:38<10:57:44, 3.23it/s] 66%|██████▌ | 243970/371472 [8:50:38<10:45:36, 3.29it/s] 66%|██████▌ | 243971/371472 [8:50:38<10:41:20, 3.31it/s] 66%|██████▌ | 243972/371472 [8:50:39<10:13:20, 3.46it/s] 66%|██████▌ | 243973/371472 [8:50:39<10:09:33, 3.49it/s] 66%|██████▌ | 243974/371472 [8:50:39<9:51:58, 3.59it/s] 66%|██████▌ | 243975/371472 [8:50:40<9:50:47, 3.60it/s] 66%|██████▌ | 243976/371472 [8:50:40<10:17:38, 3.44it/s] 66%|██████▌ | 243977/371472 [8:50:40<10:15:10, 3.45it/s] 66%|██████▌ | 243978/371472 [8:50:40<10:01:43, 3.53it/s] 66%|██████▌ | 243979/371472 [8:50:41<9:55:30, 3.57it/s] 66%|██████▌ | 243980/371472 [8:50:41<10:26:52, 3.39it/s] {'loss': 2.783, 'learning_rate': 4.090532008877588e-07, 'epoch': 10.51} + 66%|██████▌ | 243980/371472 [8:50:41<10:26:52, 3.39it/s] 66%|██████▌ | 243981/371472 [8:50:41<10:21:39, 3.42it/s] 66%|██████▌ | 243982/371472 [8:50:42<10:13:39, 3.46it/s] 66%|██████▌ | 243983/371472 [8:50:42<10:05:45, 3.51it/s] 66%|██████▌ | 243984/371472 [8:50:42<10:38:48, 3.33it/s] 66%|██████▌ | 243985/371472 [8:50:42<10:18:17, 3.44it/s] 66%|██████▌ | 243986/371472 [8:50:43<10:10:06, 3.48it/s] 66%|██████▌ | 243987/371472 [8:50:43<10:10:13, 3.48it/s] 66%|██████▌ | 243988/371472 [8:50:43<10:18:52, 3.43it/s] 66%|██████▌ | 243989/371472 [8:50:44<10:04:45, 3.51it/s] 66%|██████▌ | 243990/371472 [8:50:44<10:41:37, 3.31it/s] 66%|██████▌ | 243991/371472 [8:50:44<10:18:17, 3.44it/s] 66%|██████▌ | 243992/371472 [8:50:45<10:45:11, 3.29it/s] 66%|██████▌ | 243993/371472 [8:50:45<10:39:18, 3.32it/s] 66%|██████▌ | 243994/371472 [8:50:45<10:21:24, 3.42it/s] 66%|██████▌ | 243995/371472 [8:50:45<10:09:54, 3.48it/s] 66%|██████▌ | 243996/371472 [8:50:46<10:04:27, 3.51it/s] 66%|██████▌ | 243997/371472 [8:50:46<10:03:54, 3.52it/s] 66%|██████▌ | 243998/371472 [8:50:46<9:59:15, 3.55it/s] 66%|██████▌ | 243999/371472 [8:50:47<10:26:48, 3.39it/s] 66%|██████▌ | 244000/371472 [8:50:47<10:26:05, 3.39it/s] {'loss': 2.7339, 'learning_rate': 4.0900471891227993e-07, 'epoch': 10.51} + 66%|██████▌ | 244000/371472 [8:50:47<10:26:05, 3.39it/s] 66%|██████▌ | 244001/371472 [8:50:47<10:26:30, 3.39it/s] 66%|██████▌ | 244002/371472 [8:50:47<10:24:31, 3.40it/s] 66%|██████▌ | 244003/371472 [8:50:48<10:19:33, 3.43it/s] 66%|██████▌ | 244004/371472 [8:50:48<10:07:49, 3.50it/s] 66%|██████▌ | 244005/371472 [8:50:48<10:01:16, 3.53it/s] 66%|██████▌ | 244006/371472 [8:50:49<9:53:42, 3.58it/s] 66%|██████▌ | 244007/371472 [8:50:49<10:02:20, 3.53it/s] 66%|██████▌ | 244008/371472 [8:50:49<10:00:00, 3.54it/s] 66%|██████▌ | 244009/371472 [8:50:49<10:40:46, 3.32it/s] 66%|██████▌ | 244010/371472 [8:50:50<11:06:45, 3.19it/s] 66%|██████▌ | 244011/371472 [8:50:50<10:51:58, 3.26it/s] 66%|██████▌ | 244012/371472 [8:50:50<10:36:06, 3.34it/s] 66%|██████▌ | 244013/371472 [8:50:51<10:15:13, 3.45it/s] 66%|██████▌ | 244014/371472 [8:50:51<10:58:51, 3.22it/s] 66%|██████▌ | 244015/371472 [8:50:51<10:48:50, 3.27it/s] 66%|██████▌ | 244016/371472 [8:50:52<10:35:58, 3.34it/s] 66%|██████▌ | 244017/371472 [8:50:52<10:51:49, 3.26it/s] 66%|██████▌ | 244018/371472 [8:50:52<10:59:37, 3.22it/s] 66%|██████▌ | 244019/371472 [8:50:53<10:51:08, 3.26it/s] 66%|██████▌ | 244020/371472 [8:50:53<10:41:43, 3.31it/s] {'loss': 2.7184, 'learning_rate': 4.08956236936801e-07, 'epoch': 10.51} + 66%|██████▌ | 244020/371472 [8:50:53<10:41:43, 3.31it/s] 66%|██████▌ | 244021/371472 [8:50:53<10:49:37, 3.27it/s] 66%|██████▌ | 244022/371472 [8:50:53<10:54:26, 3.25it/s] 66%|██████▌ | 244023/371472 [8:50:54<10:37:17, 3.33it/s] 66%|██████▌ | 244024/371472 [8:50:54<10:21:01, 3.42it/s] 66%|██████▌ | 244025/371472 [8:50:54<10:21:11, 3.42it/s] 66%|██████▌ | 244026/371472 [8:50:55<10:27:57, 3.38it/s] 66%|██████▌ | 244027/371472 [8:50:55<10:51:13, 3.26it/s] 66%|██████▌ | 244028/371472 [8:50:55<10:37:17, 3.33it/s] 66%|██████▌ | 244029/371472 [8:50:56<11:02:37, 3.21it/s] 66%|██████▌ | 244030/371472 [8:50:56<11:01:54, 3.21it/s] 66%|██████▌ | 244031/371472 [8:50:56<11:10:51, 3.17it/s] 66%|██████▌ | 244032/371472 [8:50:57<11:15:28, 3.14it/s] 66%|██████▌ | 244033/371472 [8:50:57<11:07:51, 3.18it/s] 66%|██████▌ | 244034/371472 [8:50:57<11:52:49, 2.98it/s] 66%|██████▌ | 244035/371472 [8:50:58<11:41:46, 3.03it/s] 66%|██████▌ | 244036/371472 [8:50:58<11:20:11, 3.12it/s] 66%|██████▌ | 244037/371472 [8:50:58<11:54:51, 2.97it/s] 66%|██████▌ | 244038/371472 [8:50:58<11:24:41, 3.10it/s] 66%|██████▌ | 244039/371472 [8:50:59<11:01:54, 3.21it/s] 66%|██████▌ | 244040/371472 [8:50:59<10:43:43, 3.30it/s] {'loss': 2.5618, 'learning_rate': 4.089077549613222e-07, 'epoch': 10.51} + 66%|██████▌ | 244040/371472 [8:50:59<10:43:43, 3.30it/s] 66%|██████▌ | 244041/371472 [8:50:59<10:30:04, 3.37it/s] 66%|██████▌ | 244042/371472 [8:51:00<10:32:30, 3.36it/s] 66%|██████▌ | 244043/371472 [8:51:00<11:01:26, 3.21it/s] 66%|██████▌ | 244044/371472 [8:51:00<10:44:26, 3.30it/s] 66%|██████▌ | 244045/371472 [8:51:01<10:30:51, 3.37it/s] 66%|██████▌ | 244046/371472 [8:51:01<10:26:46, 3.39it/s] 66%|██████▌ | 244047/371472 [8:51:01<10:17:02, 3.44it/s] 66%|██████▌ | 244048/371472 [8:51:01<10:19:54, 3.43it/s] 66%|██████▌ | 244049/371472 [8:51:02<10:13:06, 3.46it/s] 66%|██████▌ | 244050/371472 [8:51:02<10:30:45, 3.37it/s] 66%|██████▌ | 244051/371472 [8:51:02<10:32:09, 3.36it/s] 66%|██████▌ | 244052/371472 [8:51:03<10:24:00, 3.40it/s] 66%|██████▌ | 244053/371472 [8:51:03<10:27:59, 3.38it/s] 66%|██████▌ | 244054/371472 [8:51:03<10:26:35, 3.39it/s] 66%|██████▌ | 244055/371472 [8:51:03<10:20:19, 3.42it/s] 66%|██████▌ | 244056/371472 [8:51:04<10:14:02, 3.46it/s] 66%|██████▌ | 244057/371472 [8:51:04<10:14:19, 3.46it/s] 66%|██████▌ | 244058/371472 [8:51:04<10:09:40, 3.48it/s] 66%|██████▌ | 244059/371472 [8:51:05<9:54:55, 3.57it/s] 66%|██████▌ | 244060/371472 [8:51:05<10:45:10, 3.29it/s] {'loss': 2.855, 'learning_rate': 4.0885927298584325e-07, 'epoch': 10.51} + 66%|██████▌ | 244060/371472 [8:51:05<10:45:10, 3.29it/s] 66%|██████▌ | 244061/371472 [8:51:05<11:34:04, 3.06it/s] 66%|██████▌ | 244062/371472 [8:51:06<11:24:11, 3.10it/s] 66%|██████▌ | 244063/371472 [8:51:06<11:08:46, 3.18it/s] 66%|██████▌ | 244064/371472 [8:51:06<10:47:19, 3.28it/s] 66%|██████▌ | 244065/371472 [8:51:07<10:35:59, 3.34it/s] 66%|██████▌ | 244066/371472 [8:51:07<10:21:05, 3.42it/s] 66%|██████▌ | 244067/371472 [8:51:07<10:29:32, 3.37it/s] 66%|██████▌ | 244068/371472 [8:51:08<11:49:19, 2.99it/s] 66%|██████▌ | 244069/371472 [8:51:08<11:18:46, 3.13it/s] 66%|██████▌ | 244070/371472 [8:51:08<11:06:54, 3.18it/s] 66%|██████▌ | 244071/371472 [8:51:08<10:38:35, 3.33it/s] 66%|██████▌ | 244072/371472 [8:51:09<10:43:29, 3.30it/s] 66%|██████▌ | 244073/371472 [8:51:09<10:36:45, 3.33it/s] 66%|██████▌ | 244074/371472 [8:51:09<10:40:08, 3.32it/s] 66%|██████▌ | 244075/371472 [8:51:10<10:40:49, 3.31it/s] 66%|██████▌ | 244076/371472 [8:51:10<10:53:30, 3.25it/s] 66%|██████▌ | 244077/371472 [8:51:10<10:35:46, 3.34it/s] 66%|██████▌ | 244078/371472 [8:51:11<11:15:37, 3.14it/s] 66%|██████▌ | 244079/371472 [8:51:11<11:25:59, 3.10it/s] 66%|██████▌ | 244080/371472 [8:51:11<11:24:50, 3.10it/s] {'loss': 2.7471, 'learning_rate': 4.088107910103644e-07, 'epoch': 10.51} + 66%|██████▌ | 244080/371472 [8:51:11<11:24:50, 3.10it/s] 66%|██████▌ | 244081/371472 [8:51:12<11:21:42, 3.11it/s] 66%|██████▌ | 244082/371472 [8:51:12<11:21:51, 3.11it/s] 66%|██████▌ | 244083/371472 [8:51:12<10:59:53, 3.22it/s] 66%|██████▌ | 244084/371472 [8:51:12<10:59:52, 3.22it/s] 66%|██████▌ | 244085/371472 [8:51:13<11:58:57, 2.95it/s] 66%|██████▌ | 244086/371472 [8:51:13<11:19:41, 3.12it/s] 66%|██████▌ | 244087/371472 [8:51:14<12:00:55, 2.94it/s] 66%|██████▌ | 244088/371472 [8:51:14<11:50:37, 2.99it/s] 66%|██████▌ | 244089/371472 [8:51:14<11:45:19, 3.01it/s] 66%|██████▌ | 244090/371472 [8:51:14<11:13:47, 3.15it/s] 66%|██████▌ | 244091/371472 [8:51:15<13:01:09, 2.72it/s] 66%|██████▌ | 244092/371472 [8:51:15<12:09:10, 2.91it/s] 66%|██████▌ | 244093/371472 [8:51:16<11:31:06, 3.07it/s] 66%|██████▌ | 244094/371472 [8:51:16<12:04:58, 2.93it/s] 66%|██████▌ | 244095/371472 [8:51:16<11:18:12, 3.13it/s] 66%|██████▌ | 244096/371472 [8:51:16<10:48:57, 3.27it/s] 66%|██████▌ | 244097/371472 [8:51:17<10:51:02, 3.26it/s] 66%|██████▌ | 244098/371472 [8:51:17<10:41:05, 3.31it/s] 66%|██████▌ | 244099/371472 [8:51:17<11:19:26, 3.12it/s] 66%|██████▌ | 244100/371472 [8:51:18<11:04:12, 3.20it/s] {'loss': 2.5773, 'learning_rate': 4.0876230903488545e-07, 'epoch': 10.51} + 66%|██████▌ | 244100/371472 [8:51:18<11:04:12, 3.20it/s] 66%|██████▌ | 244101/371472 [8:51:18<11:01:04, 3.21it/s] 66%|██████▌ | 244102/371472 [8:51:18<10:43:11, 3.30it/s] 66%|██████▌ | 244103/371472 [8:51:19<10:50:29, 3.26it/s] 66%|██████▌ | 244104/371472 [8:51:19<10:24:36, 3.40it/s] 66%|██████▌ | 244105/371472 [8:51:19<11:55:49, 2.97it/s] 66%|██████▌ | 244106/371472 [8:51:20<11:17:09, 3.13it/s] 66%|██████▌ | 244107/371472 [8:51:20<11:55:34, 2.97it/s] 66%|██████▌ | 244108/371472 [8:51:20<12:03:28, 2.93it/s] 66%|██████▌ | 244109/371472 [8:51:21<11:26:40, 3.09it/s] 66%|██████▌ | 244110/371472 [8:51:21<10:50:41, 3.26it/s] 66%|██████▌ | 244111/371472 [8:51:21<10:58:52, 3.22it/s] 66%|██████▌ | 244112/371472 [8:51:22<11:16:36, 3.14it/s] 66%|██████▌ | 244113/371472 [8:51:22<10:51:21, 3.26it/s] 66%|██████▌ | 244114/371472 [8:51:22<11:12:47, 3.15it/s] 66%|██████▌ | 244115/371472 [8:51:22<11:23:58, 3.10it/s] 66%|██████▌ | 244116/371472 [8:51:23<11:01:46, 3.21it/s] 66%|██████▌ | 244117/371472 [8:51:23<10:55:12, 3.24it/s] 66%|██████▌ | 244118/371472 [8:51:23<10:37:12, 3.33it/s] 66%|██████▌ | 244119/371472 [8:51:24<10:42:58, 3.30it/s] 66%|██████▌ | 244120/371472 [8:51:24<10:42:36, 3.30it/s] {'loss': 2.8157, 'learning_rate': 4.0871382705940663e-07, 'epoch': 10.51} + 66%|██████▌ | 244120/371472 [8:51:24<10:42:36, 3.30it/s] 66%|██████▌ | 244121/371472 [8:51:24<10:57:59, 3.23it/s] 66%|██████▌ | 244122/371472 [8:51:25<10:46:07, 3.28it/s] 66%|██████▌ | 244123/371472 [8:51:25<10:34:46, 3.34it/s] 66%|██████▌ | 244124/371472 [8:51:25<10:35:15, 3.34it/s] 66%|██████▌ | 244125/371472 [8:51:25<10:12:24, 3.47it/s] 66%|██████▌ | 244126/371472 [8:51:26<9:58:00, 3.55it/s] 66%|██████▌ | 244127/371472 [8:51:26<10:22:53, 3.41it/s] 66%|██████▌ | 244128/371472 [8:51:26<10:13:52, 3.46it/s] 66%|██████▌ | 244129/371472 [8:51:27<10:59:38, 3.22it/s] 66%|██████▌ | 244130/371472 [8:51:27<10:34:47, 3.34it/s] 66%|██████▌ | 244131/371472 [8:51:27<10:26:13, 3.39it/s] 66%|██████▌ | 244132/371472 [8:51:28<11:01:41, 3.21it/s] 66%|██████▌ | 244133/371472 [8:51:28<10:42:38, 3.30it/s] 66%|██████▌ | 244134/371472 [8:51:28<10:27:26, 3.38it/s] 66%|██████▌ | 244135/371472 [8:51:28<10:29:00, 3.37it/s] 66%|██████▌ | 244136/371472 [8:51:29<10:24:11, 3.40it/s] 66%|██████▌ | 244137/371472 [8:51:29<10:11:07, 3.47it/s] 66%|██████▌ | 244138/371472 [8:51:29<11:49:14, 2.99it/s] 66%|██████▌ | 244139/371472 [8:51:30<11:08:49, 3.17it/s] 66%|██████▌ | 244140/371472 [8:51:30<10:51:51, 3.26it/s] {'loss': 2.6612, 'learning_rate': 4.0866534508392765e-07, 'epoch': 10.52} + 66%|██████▌ | 244140/371472 [8:51:30<10:51:51, 3.26it/s] 66%|██████▌ | 244141/371472 [8:51:30<10:41:59, 3.31it/s] 66%|██████▌ | 244142/371472 [8:51:31<10:25:17, 3.39it/s] 66%|██████▌ | 244143/371472 [8:51:31<10:31:12, 3.36it/s] 66%|███��██▌ | 244144/371472 [8:51:31<10:34:14, 3.35it/s] 66%|██████▌ | 244145/371472 [8:51:31<10:41:35, 3.31it/s] 66%|██████▌ | 244146/371472 [8:51:32<11:11:29, 3.16it/s] 66%|██████▌ | 244147/371472 [8:51:32<10:55:34, 3.24it/s] 66%|██████▌ | 244148/371472 [8:51:32<11:19:03, 3.13it/s] 66%|██████▌ | 244149/371472 [8:51:33<11:19:58, 3.12it/s] 66%|██████▌ | 244150/371472 [8:51:33<10:59:44, 3.22it/s] 66%|██████▌ | 244151/371472 [8:51:33<10:38:40, 3.32it/s] 66%|██████▌ | 244152/371472 [8:51:34<10:47:47, 3.28it/s] 66%|██████▌ | 244153/371472 [8:51:34<10:40:38, 3.31it/s] 66%|██████▌ | 244154/371472 [8:51:34<10:41:15, 3.31it/s] 66%|██████▌ | 244155/371472 [8:51:35<10:36:59, 3.33it/s] 66%|██████▌ | 244156/371472 [8:51:35<10:23:57, 3.40it/s] 66%|██████▌ | 244157/371472 [8:51:35<10:21:10, 3.42it/s] 66%|██████▌ | 244158/371472 [8:51:35<10:25:38, 3.39it/s] 66%|██████▌ | 244159/371472 [8:51:36<10:20:20, 3.42it/s] 66%|██████▌ | 244160/371472 [8:51:36<10:05:41, 3.50it/s] {'loss': 2.6533, 'learning_rate': 4.086168631084488e-07, 'epoch': 10.52} + 66%|██████▌ | 244160/371472 [8:51:36<10:05:41, 3.50it/s] 66%|██████▌ | 244161/371472 [8:51:36<10:03:29, 3.52it/s] 66%|██████▌ | 244162/371472 [8:51:37<10:10:40, 3.47it/s] 66%|██████▌ | 244163/371472 [8:51:37<10:25:54, 3.39it/s] 66%|██████▌ | 244164/371472 [8:51:37<10:33:56, 3.35it/s] 66%|██████▌ | 244165/371472 [8:51:37<10:26:30, 3.39it/s] 66%|██████▌ | 244166/371472 [8:51:38<10:30:41, 3.36it/s] 66%|██████▌ | 244167/371472 [8:51:38<10:28:20, 3.38it/s] 66%|██████▌ | 244168/371472 [8:51:38<10:39:05, 3.32it/s] 66%|██████▌ | 244169/371472 [8:51:39<10:47:35, 3.28it/s] 66%|██████▌ | 244170/371472 [8:51:39<10:33:55, 3.35it/s] 66%|██████▌ | 244171/371472 [8:51:39<10:24:59, 3.39it/s] 66%|██████▌ | 244172/371472 [8:51:40<10:56:07, 3.23it/s] 66%|██████▌ | 244173/371472 [8:51:40<11:21:24, 3.11it/s] 66%|██████▌ | 244174/371472 [8:51:40<10:54:44, 3.24it/s] 66%|██████▌ | 244175/371472 [8:51:41<10:57:40, 3.23it/s] 66%|██████▌ | 244176/371472 [8:51:41<10:39:19, 3.32it/s] 66%|██████▌ | 244177/371472 [8:51:41<10:18:56, 3.43it/s] 66%|██████▌ | 244178/371472 [8:51:41<9:57:15, 3.55it/s] 66%|██████▌ | 244179/371472 [8:51:42<10:06:09, 3.50it/s] 66%|██████▌ | 244180/371472 [8:51:42<10:09:40, 3.48it/s] {'loss': 2.5078, 'learning_rate': 4.085683811329699e-07, 'epoch': 10.52} + 66%|██████▌ | 244180/371472 [8:51:42<10:09:40, 3.48it/s] 66%|██████▌ | 244181/371472 [8:51:42<10:06:16, 3.50it/s] 66%|██████▌ | 244182/371472 [8:51:43<10:39:02, 3.32it/s] 66%|██████▌ | 244183/371472 [8:51:43<10:45:39, 3.29it/s] 66%|██████▌ | 244184/371472 [8:51:43<11:12:06, 3.16it/s] 66%|██████▌ | 244185/371472 [8:51:43<10:57:52, 3.22it/s] 66%|██████▌ | 244186/371472 [8:51:44<10:47:02, 3.28it/s] 66%|██████▌ | 244187/371472 [8:51:44<10:45:05, 3.29it/s] 66%|██████▌ | 244188/371472 [8:51:44<10:25:45, 3.39it/s] 66%|██████▌ | 244189/371472 [8:51:45<10:14:06, 3.45it/s] 66%|██████▌ | 244190/371472 [8:51:45<10:16:27, 3.44it/s] 66%|██████▌ | 244191/371472 [8:51:45<10:17:09, 3.44it/s] 66%|██████▌ | 244192/371472 [8:51:46<10:21:24, 3.41it/s] 66%|██████▌ | 244193/371472 [8:51:46<10:15:35, 3.45it/s] 66%|██████▌ | 244194/371472 [8:51:46<10:15:55, 3.44it/s] 66%|██████▌ | 244195/371472 [8:51:46<10:44:43, 3.29it/s] 66%|██████▌ | 244196/371472 [8:51:47<10:35:23, 3.34it/s] 66%|██████▌ | 244197/371472 [8:51:47<10:59:28, 3.22it/s] 66%|██████▌ | 244198/371472 [8:51:47<10:45:36, 3.29it/s] 66%|██████▌ | 244199/371472 [8:51:48<10:59:14, 3.22it/s] 66%|██████▌ | 244200/371472 [8:51:48<10:44:09, 3.29it/s] {'loss': 2.6689, 'learning_rate': 4.08519899157491e-07, 'epoch': 10.52} + 66%|██████▌ | 244200/371472 [8:51:48<10:44:09, 3.29it/s] 66%|██████▌ | 244201/371472 [8:51:48<10:28:27, 3.38it/s] 66%|██████▌ | 244202/371472 [8:51:49<10:20:52, 3.42it/s] 66%|██████▌ | 244203/371472 [8:51:49<10:10:34, 3.47it/s] 66%|██████▌ | 244204/371472 [8:51:49<10:03:43, 3.51it/s] 66%|██████▌ | 244205/371472 [8:51:49<10:11:11, 3.47it/s] 66%|██████▌ | 244206/371472 [8:51:50<10:01:08, 3.53it/s] 66%|██████▌ | 244207/371472 [8:51:50<10:15:39, 3.45it/s] 66%|██████▌ | 244208/371472 [8:51:50<10:38:30, 3.32it/s] 66%|██████▌ | 244209/371472 [8:51:51<10:33:10, 3.35it/s] 66%|██████▌ | 244210/371472 [8:51:51<10:46:52, 3.28it/s] 66%|██████▌ | 244211/371472 [8:51:51<10:23:10, 3.40it/s] 66%|██████▌ | 244212/371472 [8:51:51<10:14:06, 3.45it/s] 66%|██████▌ | 244213/371472 [8:51:52<10:19:43, 3.42it/s] 66%|██████▌ | 244214/371472 [8:51:52<10:14:19, 3.45it/s] 66%|██████▌ | 244215/371472 [8:51:52<10:53:25, 3.25it/s] 66%|██████▌ | 244216/371472 [8:51:53<11:54:48, 2.97it/s] 66%|██████▌ | 244217/371472 [8:51:53<12:43:39, 2.78it/s] 66%|██████▌ | 244218/371472 [8:51:53<11:52:33, 2.98it/s] 66%|██████▌ | 244219/371472 [8:51:54<11:32:21, 3.06it/s] 66%|██████▌ | 244220/371472 [8:51:54<10:54:43, 3.24it/s] {'loss': 2.6541, 'learning_rate': 4.084714171820121e-07, 'epoch': 10.52} + 66%|██████▌ | 244220/371472 [8:51:54<10:54:43, 3.24it/s] 66%|██████▌ | 244221/371472 [8:51:54<11:27:10, 3.09it/s] 66%|██████▌ | 244222/371472 [8:51:55<11:30:32, 3.07it/s] 66%|██████▌ | 244223/371472 [8:51:55<11:05:28, 3.19it/s] 66%|██████▌ | 244224/371472 [8:51:55<10:39:57, 3.31it/s] 66%|██████▌ | 244225/371472 [8:51:56<10:52:58, 3.25it/s] 66%|██████▌ | 244226/371472 [8:51:56<11:04:21, 3.19it/s] 66%|██████▌ | 244227/371472 [8:51:56<11:42:28, 3.02it/s] 66%|██████▌ | 244228/371472 [8:51:57<11:13:48, 3.15it/s] 66%|██████▌ | 244229/371472 [8:51:57<11:00:26, 3.21it/s] 66%|██████▌ | 244230/371472 [8:51:57<10:33:11, 3.35it/s] 66%|██████▌ | 244231/371472 [8:51:57<10:29:59, 3.37it/s] 66%|██████▌ | 244232/371472 [8:51:58<10:09:54, 3.48it/s] 66%|██████▌ | 244233/371472 [8:51:58<10:20:29, 3.42it/s] 66%|██████▌ | 244234/371472 [8:51:58<11:07:15, 3.18it/s] 66%|██████▌ | 244235/371472 [8:51:59<10:38:06, 3.32it/s] 66%|██████▌ | 244236/371472 [8:51:59<10:28:54, 3.37it/s] 66%|██████▌ | 244237/371472 [8:51:59<10:31:50, 3.36it/s] 66%|██████▌ | 244238/371472 [8:52:00<10:58:29, 3.22it/s] 66%|██████▌ | 244239/371472 [8:52:00<10:46:12, 3.28it/s] 66%|██████▌ | 244240/371472 [8:52:00<10:36:15, 3.33it/s] {'loss': 2.5645, 'learning_rate': 4.0842293520653316e-07, 'epoch': 10.52} + 66%|██████▌ | 244240/371472 [8:52:00<10:36:15, 3.33it/s] 66%|██████▌ | 244241/371472 [8:52:00<10:30:50, 3.36it/s] 66%|██████▌ | 244242/371472 [8:52:01<10:12:33, 3.46it/s] 66%|██████▌ | 244243/371472 [8:52:01<10:02:38, 3.52it/s] 66%|██████▌ | 244244/371472 [8:52:01<9:58:14, 3.54it/s] 66%|██████▌ | 244245/371472 [8:52:02<10:06:38, 3.50it/s] 66%|██████▌ | 244246/371472 [8:52:02<10:25:38, 3.39it/s] 66%|██████▌ | 244247/371472 [8:52:02<10:11:07, 3.47it/s] 66%|██████▌ | 244248/371472 [8:52:02<10:29:38, 3.37it/s] 66%|██████▌ | 244249/371472 [8:52:03<10:39:05, 3.32it/s] 66%|██████▌ | 244250/371472 [8:52:03<10:47:52, 3.27it/s] 66%|██████▌ | 244251/371472 [8:52:03<10:37:05, 3.33it/s] 66%|██████▌ | 244252/371472 [8:52:04<10:27:37, 3.38it/s] 66%|██████▌ | 244253/371472 [8:52:04<11:18:58, 3.12it/s] 66%|██████▌ | 244254/371472 [8:52:04<11:05:26, 3.19it/s] 66%|██████▌ | 244255/371472 [8:52:05<10:54:34, 3.24it/s] 66%|██████▌ | 244256/371472 [8:52:05<10:46:47, 3.28it/s] 66%|██████▌ | 244257/371472 [8:52:05<10:32:38, 3.35it/s] 66%|██████▌ | 244258/371472 [8:52:06<10:29:27, 3.37it/s] 66%|██████▌ | 244259/371472 [8:52:06<10:34:38, 3.34it/s] 66%|██████▌ | 244260/371472 [8:52:06<10:59:39, 3.21it/s] {'loss': 2.7696, 'learning_rate': 4.083744532310543e-07, 'epoch': 10.52} + 66%|██████▌ | 244260/371472 [8:52:06<10:59:39, 3.21it/s] 66%|██████▌ | 244261/371472 [8:52:06<10:39:25, 3.32it/s] 66%|██████▌ | 244262/371472 [8:52:07<11:00:52, 3.21it/s] 66%|██████▌ | 244263/371472 [8:52:07<10:44:54, 3.29it/s] 66%|██████▌ | 244264/371472 [8:52:07<11:21:42, 3.11it/s] 66%|██████▌ | 244265/371472 [8:52:08<11:43:22, 3.01it/s] 66%|██████▌ | 244266/371472 [8:52:08<11:15:04, 3.14it/s] 66%|██████▌ | 244267/371472 [8:52:08<10:46:18, 3.28it/s] 66%|██████▌ | 244268/371472 [8:52:09<10:38:23, 3.32it/s] 66%|██████▌ | 244269/371472 [8:52:09<11:45:08, 3.01it/s] 66%|██████▌ | 244270/371472 [8:52:09<11:19:29, 3.12it/s] 66%|██████▌ | 244271/371472 [8:52:10<11:00:59, 3.21it/s] 66%|██████▌ | 244272/371472 [8:52:10<11:57:04, 2.96it/s] 66%|██████▌ | 244273/371472 [8:52:10<12:04:45, 2.93it/s] 66%|██████▌ | 244274/371472 [8:52:11<11:38:35, 3.03it/s] 66%|██████▌ | 244275/371472 [8:52:11<11:08:19, 3.17it/s] 66%|██████▌ | 244276/371472 [8:52:11<11:07:04, 3.18it/s] 66%|██████▌ | 244277/371472 [8:52:12<10:52:03, 3.25it/s] 66%|██████▌ | 244278/371472 [8:52:12<11:25:58, 3.09it/s] 66%|██████▌ | 244279/371472 [8:52:12<10:57:28, 3.22it/s] 66%|██████▌ | 244280/371472 [8:52:12<10:49:41, 3.26it/s] {'loss': 2.7563, 'learning_rate': 4.083259712555753e-07, 'epoch': 10.52} + 66%|██████▌ | 244280/371472 [8:52:12<10:49:41, 3.26it/s] 66%|██████▌ | 244281/371472 [8:52:13<10:46:37, 3.28it/s] 66%|██████▌ | 244282/371472 [8:52:13<10:34:57, 3.34it/s] 66%|██████▌ | 244283/371472 [8:52:13<10:35:01, 3.34it/s] 66%|██████▌ | 244284/371472 [8:52:14<10:50:07, 3.26it/s] 66%|██████▌ | 244285/371472 [8:52:14<11:28:29, 3.08it/s] 66%|██████▌ | 244286/371472 [8:52:14<11:09:02, 3.17it/s] 66%|██████▌ | 244287/371472 [8:52:15<11:28:46, 3.08it/s] 66%|██████▌ | 244288/371472 [8:52:15<11:26:03, 3.09it/s] 66%|██████▌ | 244289/371472 [8:52:15<10:56:59, 3.23it/s] 66%|██████▌ | 244290/371472 [8:52:16<10:52:39, 3.25it/s] 66%|██████▌ | 244291/371472 [8:52:16<11:38:26, 3.03it/s] 66%|██████▌ | 244292/371472 [8:52:16<11:00:32, 3.21it/s] 66%|██████▌ | 244293/371472 [8:52:17<10:32:32, 3.35it/s] 66%|██████▌ | 244294/371472 [8:52:17<10:42:42, 3.30it/s] 66%|██████▌ | 244295/371472 [8:52:17<10:23:44, 3.40it/s] 66%|██████▌ | 244296/371472 [8:52:17<10:23:48, 3.40it/s] 66%|██████▌ | 244297/371472 [8:52:18<10:11:56, 3.46it/s] 66%|██████▌ | 244298/371472 [8:52:18<10:11:54, 3.46it/s] 66%|██████▌ | 244299/371472 [8:52:18<10:06:17, 3.50it/s] 66%|██████▌ | 244300/371472 [8:52:19<10:22:42, 3.40it/s] {'loss': 2.6911, 'learning_rate': 4.0827748928009653e-07, 'epoch': 10.52} + 66%|██████▌ | 244300/371472 [8:52:19<10:22:42, 3.40it/s] 66%|██████▌ | 244301/371472 [8:52:19<10:35:33, 3.33it/s] 66%|██████▌ | 244302/371472 [8:52:19<10:20:41, 3.41it/s] 66%|██████▌ | 244303/371472 [8:52:19<10:33:05, 3.35it/s] 66%|██████▌ | 244304/371472 [8:52:20<10:39:44, 3.31it/s] 66%|██████▌ | 244305/371472 [8:52:20<10:35:23, 3.34it/s] 66%|██████▌ | 244306/371472 [8:52:20<10:24:51, 3.39it/s] 66%|██████▌ | 244307/371472 [8:52:21<10:12:32, 3.46it/s] 66%|██████▌ | 244308/371472 [8:52:21<10:16:30, 3.44it/s] 66%|██████▌ | 244309/371472 [8:52:21<10:12:14, 3.46it/s] 66%|██████▌ | 244310/371472 [8:52:21<10:04:54, 3.50it/s] 66%|██████▌ | 244311/371472 [8:52:22<9:57:20, 3.55it/s] 66%|██████▌ | 244312/371472 [8:52:22<9:55:22, 3.56it/s] 66%|██████▌ | 244313/371472 [8:52:22<9:54:55, 3.56it/s] 66%|██████▌ | 244314/371472 [8:52:23<9:54:49, 3.56it/s] 66%|██████▌ | 244315/371472 [8:52:23<10:02:08, 3.52it/s] 66%|██████▌ | 244316/371472 [8:52:23<10:08:14, 3.48it/s] 66%|██████▌ | 244317/371472 [8:52:23<10:18:34, 3.43it/s] 66%|██████▌ | 244318/371472 [8:52:24<10:50:12, 3.26it/s] 66%|██████▌ | 244319/371472 [8:52:24<10:52:22, 3.25it/s] 66%|██████▌ | 244320/371472 [8:52:25<11:45:07, 3.01it/s] {'loss': 2.7087, 'learning_rate': 4.0822900730461755e-07, 'epoch': 10.52} + 66%|██████▌ | 244320/371472 [8:52:25<11:45:07, 3.01it/s] 66%|██████▌ | 244321/371472 [8:52:25<11:25:54, 3.09it/s] 66%|██████▌ | 244322/371472 [8:52:25<11:18:20, 3.12it/s] 66%|██████▌ | 244323/371472 [8:52:25<10:46:17, 3.28it/s] 66%|██████▌ | 244324/371472 [8:52:26<11:37:10, 3.04it/s] 66%|██████▌ | 244325/371472 [8:52:26<11:15:47, 3.14it/s] 66%|██████▌ | 244326/371472 [8:52:26<11:02:34, 3.20it/s] 66%|██████▌ | 244327/371472 [8:52:27<10:52:22, 3.25it/s] 66%|██████▌ | 244328/371472 [8:52:27<10:38:12, 3.32it/s] 66%|██████▌ | 244329/371472 [8:52:27<10:29:36, 3.37it/s] 66%|██████▌ | 244330/371472 [8:52:28<11:16:25, 3.13it/s] 66%|██████▌ | 244331/371472 [8:52:28<10:58:44, 3.22it/s] 66%|██████▌ | 244332/371472 [8:52:28<10:44:40, 3.29it/s] 66%|██████▌ | 244333/371472 [8:52:28<10:23:33, 3.40it/s] 66%|██████▌ | 244334/371472 [8:52:29<10:08:45, 3.48it/s] 66%|██████▌ | 244335/371472 [8:52:29<10:01:00, 3.53it/s] 66%|██████▌ | 244336/371472 [8:52:29<9:58:47, 3.54it/s] 66%|██████▌ | 244337/371472 [8:52:30<10:22:04, 3.41it/s] 66%|██████▌ | 244338/371472 [8:52:30<11:02:22, 3.20it/s] 66%|██████▌ | 244339/371472 [8:52:30<10:41:58, 3.30it/s] 66%|██████▌ | 244340/371472 [8:52:31<10:42:19, 3.30it/s] {'loss': 2.7004, 'learning_rate': 4.0818052532913873e-07, 'epoch': 10.52} + 66%|██████▌ | 244340/371472 [8:52:31<10:42:19, 3.30it/s] 66%|██████▌ | 244341/371472 [8:52:31<10:31:09, 3.36it/s] 66%|██████▌ | 244342/371472 [8:52:31<10:26:05, 3.38it/s] 66%|██████▌ | 244343/371472 [8:52:31<10:21:17, 3.41it/s] 66%|██████▌ | 244344/371472 [8:52:32<10:09:31, 3.48it/s] 66%|██████▌ | 244345/371472 [8:52:32<10:23:10, 3.40it/s] 66%|██████▌ | 244346/371472 [8:52:32<10:22:46, 3.40it/s] 66%|██████▌ | 244347/371472 [8:52:33<10:23:18, 3.40it/s] 66%|██████▌ | 244348/371472 [8:52:33<11:00:41, 3.21it/s] 66%|██████▌ | 244349/371472 [8:52:33<10:53:11, 3.24it/s] 66%|██████▌ | 244350/371472 [8:52:34<10:42:48, 3.30it/s] 66%|██████▌ | 244351/371472 [8:52:34<11:00:06, 3.21it/s] 66%|██████▌ | 244352/371472 [8:52:34<11:03:34, 3.19it/s] 66%|██████▌ | 244353/371472 [8:52:35<10:52:43, 3.25it/s] 66%|██████▌ | 244354/371472 [8:52:35<10:28:36, 3.37it/s] 66%|██████▌ | 244355/371472 [8:52:35<11:15:53, 3.13it/s] 66%|██████▌ | 244356/371472 [8:52:35<10:41:27, 3.30it/s] 66%|██████▌ | 244357/371472 [8:52:36<11:29:55, 3.07it/s] 66%|██████▌ | 244358/371472 [8:52:36<10:53:27, 3.24it/s] 66%|██████▌ | 244359/371472 [8:52:36<10:38:43, 3.32it/s] 66%|██████▌ | 244360/371472 [8:52:37<10:16:25, 3.44it/s] {'loss': 2.616, 'learning_rate': 4.081320433536598e-07, 'epoch': 10.53} + 66%|██████▌ | 244360/371472 [8:52:37<10:16:25, 3.44it/s] 66%|██████▌ | 244361/371472 [8:52:37<10:26:45, 3.38it/s] 66%|██████▌ | 244362/371472 [8:52:37<10:26:10, 3.38it/s] 66%|██████▌ | 244363/371472 [8:52:38<10:33:08, 3.35it/s] 66%|██████▌ | 244364/371472 [8:52:38<10:23:52, 3.40it/s] 66%|██████▌ | 244365/371472 [8:52:38<10:08:51, 3.48it/s] 66%|██████▌ | 244366/371472 [8:52:38<10:15:10, 3.44it/s] 66%|██████▌ | 244367/371472 [8:52:39<10:26:18, 3.38it/s] 66%|██████▌ | 244368/371472 [8:52:39<10:45:21, 3.28it/s] 66%|██████▌ | 244369/371472 [8:52:39<10:39:19, 3.31it/s] 66%|██████▌ | 244370/371472 [8:52:40<10:28:58, 3.37it/s] 66%|██████▌ | 244371/371472 [8:52:40<10:19:02, 3.42it/s] 66%|██████▌ | 244372/371472 [8:52:40<10:17:19, 3.43it/s] 66%|██████▌ | 244373/371472 [8:52:40<10:32:02, 3.35it/s] 66%|██████▌ | 244374/371472 [8:52:41<10:11:00, 3.47it/s] 66%|██████▌ | 244375/371472 [8:52:41<10:08:32, 3.48it/s] 66%|██████▌ | 244376/371472 [8:52:41<9:58:01, 3.54it/s] 66%|██████▌ | 244377/371472 [8:52:42<9:48:52, 3.60it/s] 66%|██████▌ | 244378/371472 [8:52:42<10:16:54, 3.43it/s] 66%|██████▌ | 244379/371472 [8:52:42<11:07:31, 3.17it/s] 66%|██████▌ | 244380/371472 [8:52:43<10:59:42, 3.21it/s] {'loss': 2.6794, 'learning_rate': 4.0808356137818093e-07, 'epoch': 10.53} + 66%|██████▌ | 244380/371472 [8:52:43<10:59:42, 3.21it/s] 66%|██████▌ | 244381/371472 [8:52:43<10:51:33, 3.25it/s] 66%|██████▌ | 244382/371472 [8:52:43<10:21:15, 3.41it/s] 66%|██████▌ | 244383/371472 [8:52:43<10:14:45, 3.45it/s] 66%|██████▌ | 244384/371472 [8:52:44<10:13:00, 3.46it/s] 66%|██████▌ | 244385/371472 [8:52:44<11:06:43, 3.18it/s] 66%|██████▌ | 244386/371472 [8:52:44<11:01:46, 3.20it/s] 66%|██████▌ | 244387/371472 [8:52:45<10:55:56, 3.23it/s] 66%|██████▌ | 244388/371472 [8:52:45<11:25:25, 3.09it/s] 66%|██████▌ | 244389/371472 [8:52:45<11:04:30, 3.19it/s] 66%|██████▌ | 244390/371472 [8:52:46<11:08:06, 3.17it/s] 66%|██████▌ | 244391/371472 [8:52:46<11:08:27, 3.17it/s] 66%|██████▌ | 244392/371472 [8:52:46<10:36:15, 3.33it/s] 66%|██████▌ | 244393/371472 [8:52:47<10:41:27, 3.30it/s] 66%|██████▌ | 244394/371472 [8:52:47<10:43:03, 3.29it/s] 66%|██████▌ | 244395/371472 [8:52:47<10:34:25, 3.34it/s] 66%|██████▌ | 244396/371472 [8:52:47<11:08:20, 3.17it/s] 66%|██████▌ | 244397/371472 [8:52:48<10:43:59, 3.29it/s] 66%|██████▌ | 244398/371472 [8:52:48<11:00:38, 3.21it/s] 66%|██████▌ | 244399/371472 [8:52:48<10:40:30, 3.31it/s] 66%|██████▌ | 244400/371472 [8:52:49<10:35:20, 3.33it/s] {'loss': 2.7325, 'learning_rate': 4.08035079402702e-07, 'epoch': 10.53} + 66%|██████▌ | 244400/371472 [8:52:49<10:35:20, 3.33it/s] 66%|██████▌ | 244401/371472 [8:52:49<10:17:38, 3.43it/s] 66%|██████▌ | 244402/371472 [8:52:49<10:09:17, 3.48it/s] 66%|██████▌ | 244403/371472 [8:52:50<10:30:46, 3.36it/s] 66%|██████▌ | 244404/371472 [8:52:50<10:26:09, 3.38it/s] 66%|██████▌ | 244405/371472 [8:52:50<10:46:05, 3.28it/s] 66%|██████▌ | 244406/371472 [8:52:50<10:52:52, 3.24it/s] 66%|██████▌ | 244407/371472 [8:52:51<11:13:48, 3.14it/s] 66%|██████▌ | 244408/371472 [8:52:51<11:40:55, 3.02it/s] 66%|██████▌ | 244409/371472 [8:52:51<11:30:53, 3.07it/s] 66%|██████▌ | 244410/371472 [8:52:52<11:14:12, 3.14it/s] 66%|██████▌ | 244411/371472 [8:52:52<11:03:35, 3.19it/s] 66%|██████▌ | 244412/371472 [8:52:52<10:57:12, 3.22it/s] 66%|██████▌ | 244413/371472 [8:52:53<11:56:43, 2.95it/s] 66%|██████▌ | 244414/371472 [8:52:53<11:30:39, 3.07it/s] 66%|██████▌ | 244415/371472 [8:52:53<11:06:56, 3.18it/s] 66%|██████▌ | 244416/371472 [8:52:54<10:47:13, 3.27it/s] 66%|██████▌ | 244417/371472 [8:52:54<10:44:24, 3.29it/s] 66%|██████▌ | 244418/371472 [8:52:54<10:14:16, 3.45it/s] 66%|██████▌ | 244419/371472 [8:52:54<10:03:11, 3.51it/s] 66%|██████▌ | 244420/371472 [8:52:55<10:47:28, 3.27it/s] {'loss': 2.5289, 'learning_rate': 4.079865974272232e-07, 'epoch': 10.53} + 66%|██████▌ | 244420/371472 [8:52:55<10:47:28, 3.27it/s] 66%|██████▌ | 244421/371472 [8:52:55<10:53:12, 3.24it/s] 66%|██████▌ | 244422/371472 [8:52:55<10:39:12, 3.31it/s] 66%|██████▌ | 244423/371472 [8:52:56<10:45:11, 3.28it/s] 66%|██████▌ | 244424/371472 [8:52:56<10:35:16, 3.33it/s] 66%|██████▌ | 244425/371472 [8:52:56<10:23:52, 3.39it/s] 66%|██████▌ | 244426/371472 [8:52:57<10:14:25, 3.45it/s] 66%|██████▌ | 244427/371472 [8:52:57<10:27:28, 3.37it/s] 66%|██████▌ | 244428/371472 [8:52:57<10:53:11, 3.24it/s] 66%|██████▌ | 244429/371472 [8:52:58<10:54:12, 3.24it/s] 66%|██████▌ | 244430/371472 [8:52:58<10:40:11, 3.31it/s] 66%|██████▌ | 244431/371472 [8:52:58<10:41:24, 3.30it/s] 66%|██████▌ | 244432/371472 [8:52:59<11:10:57, 3.16it/s] 66%|██████▌ | 244433/371472 [8:52:59<10:36:09, 3.33it/s] 66%|██████▌ | 244434/371472 [8:52:59<10:53:00, 3.24it/s] 66%|██████▌ | 244435/371472 [8:52:59<10:53:59, 3.24it/s] 66%|██████▌ | 244436/371472 [8:53:00<10:41:27, 3.30it/s] 66%|██████▌ | 244437/371472 [8:53:00<11:21:55, 3.10it/s] 66%|██████▌ | 244438/371472 [8:53:00<10:49:19, 3.26it/s] 66%|██████▌ | 244439/371472 [8:53:01<11:31:56, 3.06it/s] 66%|██████▌ | 244440/371472 [8:53:01<10:58:24, 3.22it/s] {'loss': 2.8028, 'learning_rate': 4.0793811545174425e-07, 'epoch': 10.53} + 66%|██████▌ | 244440/371472 [8:53:01<10:58:24, 3.22it/s] 66%|██████▌ | 244441/371472 [8:53:01<10:44:32, 3.28it/s] 66%|██████▌ | 244442/371472 [8:53:02<10:37:09, 3.32it/s] 66%|██████▌ | 244443/371472 [8:53:02<10:52:06, 3.25it/s] 66%|██████▌ | 244444/371472 [8:53:02<10:49:20, 3.26it/s] 66%|██████▌ | 244445/371472 [8:53:02<10:27:55, 3.37it/s] 66%|██████▌ | 244446/371472 [8:53:03<10:56:56, 3.22it/s] 66%|██████▌ | 244447/371472 [8:53:03<10:40:15, 3.31it/s] 66%|██████▌ | 244448/371472 [8:53:03<10:27:02, 3.38it/s] 66%|██████▌ | 244449/371472 [8:53:04<10:25:48, 3.38it/s] 66%|██████▌ | 244450/371472 [8:53:04<10:15:39, 3.44it/s] 66%|██████▌ | 244451/371472 [8:53:04<10:13:54, 3.45it/s] 66%|██████▌ | 244452/371472 [8:53:05<10:16:51, 3.43it/s] 66%|██████▌ | 244453/371472 [8:53:05<10:10:04, 3.47it/s] 66%|██████▌ | 244454/371472 [8:53:05<10:08:47, 3.48it/s] 66%|██████▌ | 244455/371472 [8:53:05<10:30:02, 3.36it/s] 66%|██████▌ | 244456/371472 [8:53:06<11:32:41, 3.06it/s] 66%|██████▌ | 244457/371472 [8:53:06<11:10:10, 3.16it/s] 66%|██████▌ | 244458/371472 [8:53:06<11:21:41, 3.11it/s] 66%|██████▌ | 244459/371472 [8:53:07<11:00:20, 3.21it/s] 66%|██████▌ | 244460/371472 [8:53:07<11:36:04, 3.04it/s] {'loss': 2.7712, 'learning_rate': 4.0788963347626537e-07, 'epoch': 10.53} + 66%|██████▌ | 244460/371472 [8:53:07<11:36:04, 3.04it/s] 66%|██████▌ | 244461/371472 [8:53:07<11:25:45, 3.09it/s] 66%|██████▌ | 244462/371472 [8:53:08<11:06:56, 3.17it/s] 66%|██████▌ | 244463/371472 [8:53:08<10:56:43, 3.22it/s] 66%|██████▌ | 244464/371472 [8:53:08<10:40:14, 3.31it/s] 66%|██████▌ | 244465/371472 [8:53:09<10:43:28, 3.29it/s] 66%|██████▌ | 244466/371472 [8:53:09<10:28:38, 3.37it/s] 66%|██████▌ | 244467/371472 [8:53:09<10:20:11, 3.41it/s] 66%|██████▌ | 244468/371472 [8:53:09<10:14:50, 3.44it/s] 66%|██████▌ | 244469/371472 [8:53:10<10:15:22, 3.44it/s] 66%|██████▌ | 244470/371472 [8:53:10<10:29:33, 3.36it/s] 66%|██████▌ | 244471/371472 [8:53:10<10:14:20, 3.45it/s] 66%|██████▌ | 244472/371472 [8:53:11<10:34:40, 3.34it/s] 66%|██████▌ | 244473/371472 [8:53:11<10:54:56, 3.23it/s] 66%|██████▌ | 244474/371472 [8:53:11<11:21:42, 3.10it/s] 66%|██████▌ | 244475/371472 [8:53:12<11:11:09, 3.15it/s] 66%|██████▌ | 244476/371472 [8:53:12<11:30:12, 3.07it/s] 66%|██████▌ | 244477/371472 [8:53:12<10:56:56, 3.22it/s] 66%|██████▌ | 244478/371472 [8:53:13<10:51:39, 3.25it/s] 66%|██████▌ | 244479/371472 [8:53:13<11:08:13, 3.17it/s] 66%|██████▌ | 244480/371472 [8:53:13<11:00:40, 3.20it/s] {'loss': 2.7467, 'learning_rate': 4.0784115150078644e-07, 'epoch': 10.53} + 66%|██████▌ | 244480/371472 [8:53:13<11:00:40, 3.20it/s] 66%|██████▌ | 244481/371472 [8:53:13<10:54:39, 3.23it/s] 66%|██████▌ | 244482/371472 [8:53:14<10:44:58, 3.28it/s] 66%|██████▌ | 244483/371472 [8:53:14<10:46:37, 3.27it/s] 66%|██████▌ | 244484/371472 [8:53:14<10:43:51, 3.29it/s] 66%|██████▌ | 244485/371472 [8:53:15<10:36:39, 3.32it/s] 66%|██████▌ | 244486/371472 [8:53:15<10:27:12, 3.37it/s] 66%|██████▌ | 244487/371472 [8:53:15<10:39:15, 3.31it/s] 66%|██████▌ | 244488/371472 [8:53:16<10:32:31, 3.35it/s] 66%|██████▌ | 244489/371472 [8:53:16<10:25:10, 3.39it/s] 66%|██████▌ | 244490/371472 [8:53:16<11:00:13, 3.21it/s] 66%|██████▌ | 244491/371472 [8:53:17<11:22:13, 3.10it/s] 66%|██████▌ | 244492/371472 [8:53:17<11:56:10, 2.96it/s] 66%|██████▌ | 244493/371472 [8:53:17<12:13:52, 2.88it/s] 66%|██████▌ | 244494/371472 [8:53:18<13:14:40, 2.66it/s] 66%|██████▌ | 244495/371472 [8:53:18<12:24:42, 2.84it/s] 66%|██████▌ | 244496/371472 [8:53:18<11:51:28, 2.97it/s] 66%|██████▌ | 244497/371472 [8:53:19<11:32:38, 3.06it/s] 66%|██████▌ | 244498/371472 [8:53:19<11:23:55, 3.09it/s] 66%|██████▌ | 244499/371472 [8:53:19<11:01:18, 3.20it/s] 66%|██████▌ | 244500/371472 [8:53:20<10:42:31, 3.29it/s] {'loss': 2.7799, 'learning_rate': 4.077926695253076e-07, 'epoch': 10.53} + 66%|██████▌ | 244500/371472 [8:53:20<10:42:31, 3.29it/s] 66%|██████▌ | 244501/371472 [8:53:20<10:28:28, 3.37it/s] 66%|██████▌ | 244502/371472 [8:53:20<10:19:07, 3.42it/s] 66%|██���███▌ | 244503/371472 [8:53:20<10:40:09, 3.31it/s] 66%|██████▌ | 244504/371472 [8:53:21<10:50:15, 3.25it/s] 66%|██████▌ | 244505/371472 [8:53:21<10:56:15, 3.22it/s] 66%|██████▌ | 244506/371472 [8:53:21<10:52:43, 3.24it/s] 66%|██████▌ | 244507/371472 [8:53:22<10:58:29, 3.21it/s] 66%|██████▌ | 244508/371472 [8:53:22<10:50:33, 3.25it/s] 66%|██████▌ | 244509/371472 [8:53:22<10:48:38, 3.26it/s] 66%|██████▌ | 244510/371472 [8:53:23<10:39:07, 3.31it/s] 66%|██████▌ | 244511/371472 [8:53:23<11:11:03, 3.15it/s] 66%|██████▌ | 244512/371472 [8:53:23<11:01:47, 3.20it/s] 66%|██████▌ | 244513/371472 [8:53:24<10:35:47, 3.33it/s] 66%|██████▌ | 244514/371472 [8:53:24<10:26:32, 3.38it/s] 66%|██████▌ | 244515/371472 [8:53:24<10:35:56, 3.33it/s] 66%|██████▌ | 244516/371472 [8:53:24<10:40:18, 3.30it/s] 66%|██████▌ | 244517/371472 [8:53:25<10:45:25, 3.28it/s] 66%|██████▌ | 244518/371472 [8:53:25<10:49:42, 3.26it/s] 66%|██████▌ | 244519/371472 [8:53:25<11:02:58, 3.19it/s] 66%|██████▌ | 244520/371472 [8:53:26<11:17:31, 3.12it/s] {'loss': 2.727, 'learning_rate': 4.0774418754982864e-07, 'epoch': 10.53} + 66%|██████▌ | 244520/371472 [8:53:26<11:17:31, 3.12it/s] 66%|██████▌ | 244521/371472 [8:53:26<10:53:32, 3.24it/s] 66%|██████▌ | 244522/371472 [8:53:26<11:05:20, 3.18it/s] 66%|██████▌ | 244523/371472 [8:53:27<10:47:20, 3.27it/s] 66%|██████▌ | 244524/371472 [8:53:27<10:40:04, 3.31it/s] 66%|██████▌ | 244525/371472 [8:53:27<10:25:05, 3.38it/s] 66%|██████▌ | 244526/371472 [8:53:28<11:21:59, 3.10it/s] 66%|██████▌ | 244527/371472 [8:53:28<10:56:58, 3.22it/s] 66%|██████▌ | 244528/371472 [8:53:28<11:00:22, 3.20it/s] 66%|██████▌ | 244529/371472 [8:53:28<10:43:15, 3.29it/s] 66%|██████▌ | 244530/371472 [8:53:29<10:37:55, 3.32it/s] 66%|██████▌ | 244531/371472 [8:53:29<10:40:37, 3.30it/s] 66%|██████▌ | 244532/371472 [8:53:29<10:30:56, 3.35it/s] 66%|██████▌ | 244533/371472 [8:53:30<10:35:15, 3.33it/s] 66%|██████▌ | 244534/371472 [8:53:30<10:19:00, 3.42it/s] 66%|██████▌ | 244535/371472 [8:53:30<10:08:24, 3.48it/s] 66%|██████▌ | 244536/371472 [8:53:30<10:19:16, 3.42it/s] 66%|██████▌ | 244537/371472 [8:53:31<10:07:46, 3.48it/s] 66%|██████▌ | 244538/371472 [8:53:31<10:01:47, 3.52it/s] 66%|██████▌ | 244539/371472 [8:53:31<10:03:16, 3.51it/s] 66%|██████▌ | 244540/371472 [8:53:32<10:01:56, 3.51it/s] {'loss': 2.8432, 'learning_rate': 4.076957055743498e-07, 'epoch': 10.53} + 66%|██████▌ | 244540/371472 [8:53:32<10:01:56, 3.51it/s] 66%|██████▌ | 244541/371472 [8:53:32<9:59:40, 3.53it/s] 66%|██████▌ | 244542/371472 [8:53:32<10:15:46, 3.44it/s] 66%|██████▌ | 244543/371472 [8:53:32<10:18:09, 3.42it/s] 66%|██████▌ | 244544/371472 [8:53:33<10:26:14, 3.38it/s] 66%|██████▌ | 244545/371472 [8:53:33<10:11:53, 3.46it/s] 66%|██████▌ | 244546/371472 [8:53:33<10:44:51, 3.28it/s] 66%|██████▌ | 244547/371472 [8:53:34<10:56:29, 3.22it/s] 66%|██████▌ | 244548/371472 [8:53:34<10:28:48, 3.36it/s] 66%|██████▌ | 244549/371472 [8:53:34<10:20:52, 3.41it/s] 66%|██████▌ | 244550/371472 [8:53:35<10:58:26, 3.21it/s] 66%|██████▌ | 244551/371472 [8:53:35<11:02:33, 3.19it/s] 66%|██████▌ | 244552/371472 [8:53:35<10:58:21, 3.21it/s] 66%|██████▌ | 244553/371472 [8:53:36<11:19:35, 3.11it/s] 66%|██████▌ | 244554/371472 [8:53:36<11:01:14, 3.20it/s] 66%|██████▌ | 244555/371472 [8:53:36<11:27:03, 3.08it/s] 66%|██████▌ | 244556/371472 [8:53:37<11:03:49, 3.19it/s] 66%|██████▌ | 244557/371472 [8:53:37<10:54:18, 3.23it/s] 66%|██████▌ | 244558/371472 [8:53:37<10:59:05, 3.21it/s] 66%|██████▌ | 244559/371472 [8:53:37<10:45:24, 3.28it/s] 66%|██████▌ | 244560/371472 [8:53:38<10:56:14, 3.22it/s] {'loss': 2.6869, 'learning_rate': 4.076472235988709e-07, 'epoch': 10.53} + 66%|██████▌ | 244560/371472 [8:53:38<10:56:14, 3.22it/s] 66%|██████▌ | 244561/371472 [8:53:38<10:30:19, 3.36it/s] 66%|██████▌ | 244562/371472 [8:53:38<10:39:38, 3.31it/s] 66%|██████▌ | 244563/371472 [8:53:39<10:31:09, 3.35it/s] 66%|██████▌ | 244564/371472 [8:53:39<10:33:20, 3.34it/s] 66%|██████▌ | 244565/371472 [8:53:39<10:27:07, 3.37it/s] 66%|██████▌ | 244566/371472 [8:53:40<10:14:05, 3.44it/s] 66%|██████▌ | 244567/371472 [8:53:40<10:17:09, 3.43it/s] 66%|██████▌ | 244568/371472 [8:53:40<10:15:48, 3.43it/s] 66%|██████▌ | 244569/371472 [8:53:40<10:58:45, 3.21it/s] 66%|██████▌ | 244570/371472 [8:53:41<10:32:24, 3.34it/s] 66%|██████▌ | 244571/371472 [8:53:41<10:32:16, 3.35it/s] 66%|██████▌ | 244572/371472 [8:53:41<10:13:35, 3.45it/s] 66%|██████▌ | 244573/371472 [8:53:42<10:12:05, 3.46it/s] 66%|██████▌ | 244574/371472 [8:53:42<10:10:58, 3.46it/s] 66%|██████▌ | 244575/371472 [8:53:42<10:19:39, 3.41it/s] 66%|██████▌ | 244576/371472 [8:53:42<10:32:21, 3.34it/s] 66%|██████▌ | 244577/371472 [8:53:43<11:25:25, 3.09it/s] 66%|██████▌ | 244578/371472 [8:53:43<11:34:04, 3.05it/s] 66%|██████▌ | 244579/371472 [8:53:43<11:06:33, 3.17it/s] 66%|██████▌ | 244580/371472 [8:53:44<12:02:00, 2.93it/s] {'loss': 2.7177, 'learning_rate': 4.07598741623392e-07, 'epoch': 10.53} + 66%|██████▌ | 244580/371472 [8:53:44<12:02:00, 2.93it/s] 66%|██████▌ | 244581/371472 [8:53:44<11:38:54, 3.03it/s] 66%|██████▌ | 244582/371472 [8:53:44<11:12:46, 3.14it/s] 66%|██████▌ | 244583/371472 [8:53:45<10:47:03, 3.27it/s] 66%|██████▌ | 244584/371472 [8:53:45<10:34:15, 3.33it/s] 66%|██████▌ | 244585/371472 [8:53:45<11:00:07, 3.20it/s] 66%|██████▌ | 244586/371472 [8:53:46<10:41:37, 3.30it/s] 66%|██████▌ | 244587/371472 [8:53:46<10:40:16, 3.30it/s] 66%|██████▌ | 244588/371472 [8:53:46<10:51:57, 3.24it/s] 66%|██████▌ | 244589/371472 [8:53:47<10:41:07, 3.30it/s] 66%|██████▌ | 244590/371472 [8:53:47<10:57:07, 3.22it/s] 66%|██████▌ | 244591/371472 [8:53:47<10:40:16, 3.30it/s] 66%|██████▌ | 244592/371472 [8:53:48<11:04:31, 3.18it/s] 66%|██████▌ | 244593/371472 [8:53:48<10:57:30, 3.22it/s] 66%|██████▌ | 244594/371472 [8:53:48<10:42:31, 3.29it/s] 66%|██████▌ | 244595/371472 [8:53:48<10:38:44, 3.31it/s] 66%|██████▌ | 244596/371472 [8:53:49<10:40:39, 3.30it/s] 66%|██████▌ | 244597/371472 [8:53:49<10:35:34, 3.33it/s] 66%|██████▌ | 244598/371472 [8:53:49<10:43:03, 3.29it/s] 66%|██████▌ | 244599/371472 [8:53:50<10:37:43, 3.32it/s] 66%|██████▌ | 244600/371472 [8:53:50<11:14:12, 3.14it/s] {'loss': 2.7105, 'learning_rate': 4.075502596479131e-07, 'epoch': 10.54} + 66%|██████▌ | 244600/371472 [8:53:50<11:14:12, 3.14it/s] 66%|██████▌ | 244601/371472 [8:53:50<11:08:02, 3.17it/s] 66%|██████▌ | 244602/371472 [8:53:51<10:42:00, 3.29it/s] 66%|██████▌ | 244603/371472 [8:53:51<10:47:03, 3.27it/s] 66%|██████▌ | 244604/371472 [8:53:51<10:46:27, 3.27it/s] 66%|██████▌ | 244605/371472 [8:53:51<10:24:49, 3.38it/s] 66%|██████▌ | 244606/371472 [8:53:52<10:47:01, 3.27it/s] 66%|██████▌ | 244607/371472 [8:53:52<10:37:57, 3.31it/s] 66%|██████▌ | 244608/371472 [8:53:52<10:20:42, 3.41it/s] 66%|██████▌ | 244609/371472 [8:53:53<10:10:46, 3.46it/s] 66%|██████▌ | 244610/371472 [8:53:53<10:19:25, 3.41it/s] 66%|██████▌ | 244611/371472 [8:53:53<10:04:15, 3.50it/s] 66%|██████▌ | 244612/371472 [8:53:53<9:53:49, 3.56it/s] 66%|██████▌ | 244613/371472 [8:53:54<9:53:43, 3.56it/s] 66%|██████▌ | 244614/371472 [8:53:54<10:08:29, 3.47it/s] 66%|██████▌ | 244615/371472 [8:53:54<10:06:22, 3.49it/s] 66%|██████▌ | 244616/371472 [8:53:55<10:06:57, 3.48it/s] 66%|██████▌ | 244617/371472 [8:53:55<10:09:24, 3.47it/s] 66%|██████▌ | 244618/371472 [8:53:55<10:08:26, 3.47it/s] 66%|██████▌ | 244619/371472 [8:53:55<10:04:43, 3.50it/s] 66%|██████▌ | 244620/371472 [8:53:56<10:11:02, 3.46it/s] {'loss': 2.8797, 'learning_rate': 4.0750177767243426e-07, 'epoch': 10.54} + 66%|██████▌ | 244620/371472 [8:53:56<10:11:02, 3.46it/s] 66%|██████▌ | 244621/371472 [8:53:56<11:32:11, 3.05it/s] 66%|██████▌ | 244622/371472 [8:53:56<11:00:40, 3.20it/s] 66%|██████▌ | 244623/371472 [8:53:57<10:33:48, 3.34it/s] 66%|██████▌ | 244624/371472 [8:53:57<10:44:54, 3.28it/s] 66%|██████▌ | 244625/371472 [8:53:57<10:38:25, 3.31it/s] 66%|██████▌ | 244626/371472 [8:53:58<11:16:37, 3.12it/s] 66%|██████▌ | 244627/371472 [8:53:58<11:43:10, 3.01it/s] 66%|██████▌ | 244628/371472 [8:53:58<11:52:29, 2.97it/s] 66%|██████▌ | 244629/371472 [8:53:59<11:15:01, 3.13it/s] 66%|██████▌ | 244630/371472 [8:53:59<10:48:03, 3.26it/s] 66%|██████▌ | 244631/371472 [8:53:59<10:44:28, 3.28it/s] 66%|██████▌ | 244632/371472 [8:54:00<10:31:31, 3.35it/s] 66%|██████▌ | 244633/371472 [8:54:00<10:25:53, 3.38it/s] 66%|██████▌ | 244634/371472 [8:54:00<10:43:25, 3.29it/s] 66%|██████▌ | 244635/371472 [8:54:00<10:26:56, 3.37it/s] 66%|██████▌ | 244636/371472 [8:54:01<10:36:23, 3.32it/s] 66%|██████▌ | 244637/371472 [8:54:01<10:28:37, 3.36it/s] 66%|██████▌ | 244638/371472 [8:54:01<10:14:08, 3.44it/s] 66%|██████▌ | 244639/371472 [8:54:02<10:10:40, 3.46it/s] 66%|██████▌ | 244640/371472 [8:54:02<10:16:42, 3.43it/s] {'loss': 2.6954, 'learning_rate': 4.074532956969553e-07, 'epoch': 10.54} + 66%|██████▌ | 244640/371472 [8:54:02<10:16:42, 3.43it/s] 66%|██████▌ | 244641/371472 [8:54:02<10:47:26, 3.26it/s] 66%|██████▌ | 244642/371472 [8:54:03<10:31:21, 3.35it/s] 66%|██████▌ | 244643/371472 [8:54:03<10:16:31, 3.43it/s] 66%|██████▌ | 244644/371472 [8:54:03<10:12:24, 3.45it/s] 66%|██████▌ | 244645/371472 [8:54:03<10:05:36, 3.49it/s] 66%|██████▌ | 244646/371472 [8:54:04<9:58:40, 3.53it/s] 66%|██████▌ | 244647/371472 [8:54:04<10:06:43, 3.48it/s] 66%|██████▌ | 244648/371472 [8:54:04<10:17:25, 3.42it/s] 66%|██████▌ | 244649/371472 [8:54:05<10:29:36, 3.36it/s] 66%|██████▌ | 244650/371472 [8:54:05<10:41:29, 3.29it/s] 66%|██████▌ | 244651/371472 [8:54:05<11:17:01, 3.12it/s] 66%|██████▌ | 244652/371472 [8:54:06<11:26:13, 3.08it/s] 66%|██████▌ | 244653/371472 [8:54:06<10:58:19, 3.21it/s] 66%|██████▌ | 244654/371472 [8:54:06<10:50:19, 3.25it/s] 66%|██████▌ | 244655/371472 [8:54:06<10:27:26, 3.37it/s] 66%|██████▌ | 244656/371472 [8:54:07<10:28:28, 3.36it/s] 66%|██████▌ | 244657/371472 [8:54:07<10:16:40, 3.43it/s] 66%|██████▌ | 244658/371472 [8:54:07<10:02:21, 3.51it/s] 66%|██████▌ | 244659/371472 [8:54:08<10:12:17, 3.45it/s] 66%|██████▌ | 244660/371472 [8:54:08<10:05:40, 3.49it/s] {'loss': 2.7067, 'learning_rate': 4.0740481372147646e-07, 'epoch': 10.54} + 66%|██████▌ | 244660/371472 [8:54:08<10:05:40, 3.49it/s] 66%|██████▌ | 244661/371472 [8:54:08<10:13:27, 3.45it/s] 66%|██████▌ | 244662/371472 [8:54:09<11:18:31, 3.11it/s] 66%|██████▌ | 244663/371472 [8:54:09<11:02:11, 3.19it/s] 66%|██████▌ | 244664/371472 [8:54:09<10:38:13, 3.31it/s] 66%|██████▌ | 244665/371472 [8:54:09<10:37:00, 3.32it/s] 66%|██████▌ | 244666/371472 [8:54:10<10:09:37, 3.47it/s] 66%|██████▌ | 244667/371472 [8:54:10<10:03:50, 3.50it/s] 66%|██████▌ | 244668/371472 [8:54:10<10:22:00, 3.40it/s] 66%|██████▌ | 244669/371472 [8:54:11<10:10:17, 3.46it/s] 66%|██████▌ | 244670/371472 [8:54:11<10:00:46, 3.52it/s] 66%|██████▌ | 244671/371472 [8:54:11<10:06:12, 3.49it/s] 66%|██████▌ | 244672/371472 [8:54:11<9:56:37, 3.54it/s] 66%|██████▌ | 244673/371472 [8:54:12<10:10:53, 3.46it/s] 66%|██████▌ | 244674/371472 [8:54:12<10:42:52, 3.29it/s] 66%|██████▌ | 244675/371472 [8:54:12<11:04:58, 3.18it/s] 66%|██████▌ | 244676/371472 [8:54:13<10:41:22, 3.29it/s] 66%|██████▌ | 244677/371472 [8:54:13<10:31:26, 3.35it/s] 66%|██████▌ | 244678/371472 [8:54:13<10:17:20, 3.42it/s] 66%|██████▌ | 244679/371472 [8:54:13<10:07:43, 3.48it/s] 66%|██████▌ | 244680/371472 [8:54:14<10:41:39, 3.29it/s] {'loss': 2.767, 'learning_rate': 4.0735633174599753e-07, 'epoch': 10.54} + 66%|██████▌ | 244680/371472 [8:54:14<10:41:39, 3.29it/s] 66%|██████▌ | 244681/371472 [8:54:14<10:51:57, 3.24it/s] 66%|██████▌ | 244682/371472 [8:54:14<10:43:29, 3.28it/s] 66%|██████▌ | 244683/371472 [8:54:15<10:58:46, 3.21it/s] 66%|██████▌ | 244684/371472 [8:54:15<10:43:53, 3.28it/s] 66%|██████▌ | 244685/371472 [8:54:15<10:25:08, 3.38it/s] 66%|██████▌ | 244686/371472 [8:54:16<10:14:15, 3.44it/s] 66%|██████▌ | 244687/371472 [8:54:16<10:36:41, 3.32it/s] 66%|██████▌ | 244688/371472 [8:54:16<10:13:00, 3.45it/s] 66%|██████▌ | 244689/371472 [8:54:17<10:24:01, 3.39it/s] 66%|██████▌ | 244690/371472 [8:54:17<10:52:17, 3.24it/s] 66%|██████▌ | 244691/371472 [8:54:17<10:36:36, 3.32it/s] 66%|██████▌ | 244692/371472 [8:54:17<10:30:22, 3.35it/s] 66%|██████▌ | 244693/371472 [8:54:18<10:26:06, 3.37it/s] 66%|██████▌ | 244694/371472 [8:54:18<10:29:25, 3.36it/s] 66%|██████▌ | 244695/371472 [8:54:18<10:15:26, 3.43it/s] 66%|██████▌ | 244696/371472 [8:54:19<10:04:25, 3.50it/s] 66%|██████▌ | 244697/371472 [8:54:19<10:48:10, 3.26it/s] 66%|██████▌ | 244698/371472 [8:54:19<10:56:06, 3.22it/s] 66%|██████▌ | 244699/371472 [8:54:20<10:55:03, 3.23it/s] 66%|██████▌ | 244700/371472 [8:54:20<11:06:26, 3.17it/s] {'loss': 2.8059, 'learning_rate': 4.0730784977051865e-07, 'epoch': 10.54} + 66%|██████▌ | 244700/371472 [8:54:20<11:06:26, 3.17it/s] 66%|██████▌ | 244701/371472 [8:54:20<11:10:10, 3.15it/s] 66%|██████▌ | 244702/371472 [8:54:21<11:30:54, 3.06it/s] 66%|██████▌ | 244703/371472 [8:54:21<11:09:02, 3.16it/s] 66%|██████▌ | 244704/371472 [8:54:21<10:51:13, 3.24it/s] 66%|██████▌ | 244705/371472 [8:54:21<10:26:45, 3.37it/s] 66%|██████▌ | 244706/371472 [8:54:22<10:07:49, 3.48it/s] 66%|██████▌ | 244707/371472 [8:54:22<10:03:34, 3.50it/s] 66%|██████▌ | 244708/371472 [8:54:22<10:05:22, 3.49it/s] 66%|██████▌ | 244709/371472 [8:54:23<10:13:46, 3.44it/s] 66%|██████▌ | 244710/371472 [8:54:23<10:29:29, 3.36it/s] 66%|██████▌ | 244711/371472 [8:54:23<11:16:54, 3.12it/s] 66%|██████▌ | 244712/371472 [8:54:24<11:08:47, 3.16it/s] 66%|██████▌ | 244713/371472 [8:54:24<10:49:59, 3.25it/s] 66%|██████▌ | 244714/371472 [8:54:24<10:38:25, 3.31it/s] 66%|██████▌ | 244715/371472 [8:54:24<10:38:22, 3.31it/s] 66%|██████▌ | 244716/371472 [8:54:25<10:49:48, 3.25it/s] 66%|██████▌ | 244717/371472 [8:54:25<10:47:39, 3.26it/s] 66%|██████▌ | 244718/371472 [8:54:25<10:37:25, 3.31it/s] 66%|██████▌ | 244719/371472 [8:54:26<10:28:53, 3.36it/s] 66%|██████▌ | 244720/371472 [8:54:26<10:16:48, 3.42it/s] {'loss': 2.6957, 'learning_rate': 4.072593677950397e-07, 'epoch': 10.54} + 66%|██████▌ | 244720/371472 [8:54:26<10:16:48, 3.42it/s] 66%|██████▌ | 244721/371472 [8:54:26<10:17:04, 3.42it/s] 66%|██████▌ | 244722/371472 [8:54:27<10:32:41, 3.34it/s] 66%|██████▌ | 244723/371472 [8:54:27<10:38:48, 3.31it/s] 66%|██████▌ | 244724/371472 [8:54:27<10:22:55, 3.39it/s] 66%|██████▌ | 244725/371472 [8:54:27<10:33:24, 3.34it/s] 66%|██████▌ | 244726/371472 [8:54:28<10:39:38, 3.30it/s] 66%|██████▌ | 244727/371472 [8:54:28<10:48:04, 3.26it/s] 66%|██████▌ | 244728/371472 [8:54:28<11:27:22, 3.07it/s] 66%|██████▌ | 244729/371472 [8:54:29<11:05:02, 3.18it/s] 66%|██████▌ | 244730/371472 [8:54:29<10:55:37, 3.22it/s] 66%|██████▌ | 244731/371472 [8:54:29<10:49:12, 3.25it/s] 66%|██████▌ | 244732/371472 [8:54:30<10:26:21, 3.37it/s] 66%|██████▌ | 244733/371472 [8:54:30<10:22:48, 3.39it/s] 66%|██████▌ | 244734/371472 [8:54:30<10:36:31, 3.32it/s] 66%|██████▌ | 244735/371472 [8:54:30<10:48:58, 3.25it/s] 66%|██████▌ | 244736/371472 [8:54:31<10:27:37, 3.37it/s] 66%|██████▌ | 244737/371472 [8:54:31<10:25:31, 3.38it/s] 66%|██████▌ | 244738/371472 [8:54:31<10:30:29, 3.35it/s] 66%|██████▌ | 244739/371472 [8:54:32<10:39:36, 3.30it/s] 66%|██████▌ | 244740/371472 [8:54:32<12:23:35, 2.84it/s] {'loss': 2.6242, 'learning_rate': 4.072108858195609e-07, 'epoch': 10.54} + 66%|██████▌ | 244740/371472 [8:54:32<12:23:35, 2.84it/s] 66%|██████▌ | 244741/371472 [8:54:32<11:32:07, 3.05it/s] 66%|██████▌ | 244742/371472 [8:54:33<11:13:36, 3.14it/s] 66%|██████▌ | 244743/371472 [8:54:33<11:02:06, 3.19it/s] 66%|██████▌ | 244744/371472 [8:54:33<10:47:02, 3.26it/s] 66%|██████▌ | 244745/371472 [8:54:34<10:28:28, 3.36it/s] 66%|██████▌ | 244746/371472 [8:54:34<10:58:55, 3.21it/s] 66%|██████▌ | 244747/371472 [8:54:34<11:13:01, 3.14it/s] 66%|██████▌ | 244748/371472 [8:54:35<11:16:16, 3.12it/s] 66%|██████▌ | 244749/371472 [8:54:35<10:53:52, 3.23it/s] 66%|██████▌ | 244750/371472 [8:54:35<12:43:15, 2.77it/s] 66%|██████▌ | 244751/371472 [8:54:36<11:58:18, 2.94it/s] 66%|██████▌ | 244752/371472 [8:54:36<11:28:56, 3.07it/s] 66%|██████▌ | 244753/371472 [8:54:36<11:22:34, 3.09it/s] 66%|██████▌ | 244754/371472 [8:54:37<11:05:59, 3.17it/s] 66%|██████▌ | 244755/371472 [8:54:37<10:49:36, 3.25it/s] 66%|██████▌ | 244756/371472 [8:54:37<11:10:50, 3.15it/s] 66%|██████▌ | 244757/371472 [8:54:38<11:30:45, 3.06it/s] 66%|██████▌ | 244758/371472 [8:54:38<10:54:21, 3.23it/s] 66%|██████▌ | 244759/371472 [8:54:38<10:31:48, 3.34it/s] 66%|██████▌ | 244760/371472 [8:54:38<10:17:36, 3.42it/s] {'loss': 2.7862, 'learning_rate': 4.0716240384408197e-07, 'epoch': 10.54} + 66%|██████▌ | 244760/371472 [8:54:38<10:17:36, 3.42it/s] 66%|██████▌ | 244761/371472 [8:54:39<10:08:45, 3.47it/s] 66%|██████▌ | 244762/371472 [8:54:39<10:23:31, 3.39it/s] 66%|██████▌ | 244763/371472 [8:54:39<10:17:00, 3.42it/s] 66%|██████▌ | 244764/371472 [8:54:40<10:57:09, 3.21it/s] 66%|██████▌ | 244765/371472 [8:54:40<10:33:02, 3.34it/s] 66%|██████▌ | 244766/371472 [8:54:40<10:21:23, 3.40it/s] 66%|██████▌ | 244767/371472 [8:54:40<10:11:34, 3.45it/s] 66%|██████▌ | 244768/371472 [8:54:41<9:55:38, 3.55it/s] 66%|██████▌ | 244769/371472 [8:54:41<9:47:10, 3.60it/s] 66%|██████▌ | 244770/371472 [8:54:41<9:49:54, 3.58it/s] 66%|██████▌ | 244771/371472 [8:54:42<10:39:07, 3.30it/s] 66%|██████▌ | 244772/371472 [8:54:42<10:26:56, 3.37it/s] 66%|██████▌ | 244773/371472 [8:54:42<10:27:28, 3.37it/s] 66%|██████▌ | 244774/371472 [8:54:43<10:56:55, 3.21it/s] 66%|██████▌ | 244775/371472 [8:54:43<10:40:13, 3.30it/s] 66%|██████▌ | 244776/371472 [8:54:43<10:41:37, 3.29it/s] 66%|██████▌ | 244777/371472 [8:54:43<10:31:46, 3.34it/s] 66%|██████▌ | 244778/371472 [8:54:44<10:16:56, 3.42it/s] 66%|██████▌ | 244779/371472 [8:54:44<10:04:53, 3.49it/s] 66%|██████▌ | 244780/371472 [8:54:44<10:26:31, 3.37it/s] {'loss': 2.6113, 'learning_rate': 4.07113921868603e-07, 'epoch': 10.54} + 66%|██████▌ | 244780/371472 [8:54:44<10:26:31, 3.37it/s] 66%|██████▌ | 244781/371472 [8:54:45<10:32:35, 3.34it/s] 66%|██████▌ | 244782/371472 [8:54:45<10:07:41, 3.47it/s] 66%|██████▌ | 244783/371472 [8:54:45<9:58:37, 3.53it/s] 66%|██████▌ | 244784/371472 [8:54:45<9:56:52, 3.54it/s] 66%|██████▌ | 244785/371472 [8:54:46<9:49:21, 3.58it/s] 66%|██████▌ | 244786/371472 [8:54:46<10:19:39, 3.41it/s] 66%|██████▌ | 244787/371472 [8:54:46<10:31:51, 3.34it/s] 66%|██████▌ | 244788/371472 [8:54:47<10:18:56, 3.41it/s] 66%|██████▌ | 244789/371472 [8:54:47<10:02:13, 3.51it/s] 66%|██████▌ | 244790/371472 [8:54:47<10:12:16, 3.45it/s] 66%|██████▌ | 244791/371472 [8:54:47<10:03:53, 3.50it/s] 66%|██████▌ | 244792/371472 [8:54:48<10:24:19, 3.38it/s] 66%|██████▌ | 244793/371472 [8:54:48<10:10:22, 3.46it/s] 66%|██████▌ | 244794/371472 [8:54:48<9:58:52, 3.53it/s] 66%|██████▌ | 244795/371472 [8:54:49<9:54:45, 3.55it/s] 66%|██████▌ | 244796/371472 [8:54:49<10:26:48, 3.37it/s] 66%|██████▌ | 244797/371472 [8:54:49<10:27:15, 3.37it/s] 66%|██████▌ | 244798/371472 [8:54:50<11:18:37, 3.11it/s] 66%|██████▌ | 244799/371472 [8:54:50<11:03:14, 3.18it/s] 66%|██████▌ | 244800/371472 [8:54:50<10:55:28, 3.22it/s] {'loss': 2.6656, 'learning_rate': 4.070654398931242e-07, 'epoch': 10.54} + 66%|██████▌ | 244800/371472 [8:54:50<10:55:28, 3.22it/s] 66%|██████▌ | 244801/371472 [8:54:50<10:45:30, 3.27it/s] 66%|██████▌ | 244802/371472 [8:54:51<11:04:48, 3.18it/s] 66%|██████▌ | 244803/371472 [8:54:51<10:47:43, 3.26it/s] 66%|██████▌ | 244804/371472 [8:54:51<10:45:45, 3.27it/s] 66%|██████▌ | 244805/371472 [8:54:52<11:09:16, 3.15it/s] 66%|██████▌ | 244806/371472 [8:54:52<10:55:03, 3.22it/s] 66%|██████▌ | 244807/371472 [8:54:52<10:46:51, 3.26it/s] 66%|██████▌ | 244808/371472 [8:54:53<10:33:42, 3.33it/s] 66%|██████▌ | 244809/371472 [8:54:53<10:21:49, 3.39it/s] 66%|██████▌ | 244810/371472 [8:54:53<10:11:57, 3.45it/s] 66%|██████▌ | 244811/371472 [8:54:53<10:09:00, 3.47it/s] 66%|██████▌ | 244812/371472 [8:54:54<10:11:09, 3.45it/s] 66%|██████▌ | 244813/371472 [8:54:54<10:16:24, 3.42it/s] 66%|██████▌ | 244814/371472 [8:54:54<10:22:18, 3.39it/s] 66%|██████▌ | 244815/371472 [8:54:55<10:19:27, 3.41it/s] 66%|██████▌ | 244816/371472 [8:54:55<10:55:42, 3.22it/s] 66%|██████▌ | 244817/371472 [8:54:55<10:41:01, 3.29it/s] 66%|██████▌ | 244818/371472 [8:54:56<10:40:36, 3.30it/s] 66%|██████▌ | 244819/371472 [8:54:56<10:40:26, 3.30it/s] 66%|██████▌ | 244820/371472 [8:54:56<10:31:54, 3.34it/s] {'loss': 2.7469, 'learning_rate': 4.0701695791764524e-07, 'epoch': 10.54} + 66%|██████▌ | 244820/371472 [8:54:56<10:31:54, 3.34it/s] 66%|██████▌ | 244821/371472 [8:54:56<10:37:57, 3.31it/s] 66%|██████▌ | 244822/371472 [8:54:57<10:28:02, 3.36it/s] 66%|██████▌ | 244823/371472 [8:54:57<10:23:16, 3.39it/s] 66%|██████▌ | 244824/371472 [8:54:57<10:18:30, 3.41it/s] 66%|██████▌ | 244825/371472 [8:54:58<11:00:42, 3.19it/s] 66%|██████▌ | 244826/371472 [8:54:58<10:47:00, 3.26it/s] 66%|██████▌ | 244827/371472 [8:54:58<10:56:48, 3.21it/s] 66%|██████▌ | 244828/371472 [8:54:59<10:30:29, 3.35it/s] 66%|██████▌ | 244829/371472 [8:54:59<10:41:03, 3.29it/s] 66%|██████▌ | 244830/371472 [8:54:59<10:25:49, 3.37it/s] 66%|██████▌ | 244831/371472 [8:55:00<10:47:46, 3.26it/s] 66%|██████▌ | 244832/371472 [8:55:00<11:39:35, 3.02it/s] 66%|██████▌ | 244833/371472 [8:55:00<11:28:35, 3.07it/s] 66%|██████▌ | 244834/371472 [8:55:01<11:32:55, 3.05it/s] 66%|██████▌ | 244835/371472 [8:55:01<10:54:54, 3.22it/s] 66%|██████▌ | 244836/371472 [8:55:01<10:38:21, 3.31it/s] 66%|██████▌ | 244837/371472 [8:55:01<10:24:32, 3.38it/s] 66%|██████▌ | 244838/371472 [8:55:02<10:48:37, 3.25it/s] 66%|██████▌ | 244839/371472 [8:55:02<10:49:45, 3.25it/s] 66%|██████▌ | 244840/371472 [8:55:02<10:37:55, 3.31it/s] {'loss': 2.6042, 'learning_rate': 4.0696847594216637e-07, 'epoch': 10.55} + 66%|██████▌ | 244840/371472 [8:55:02<10:37:55, 3.31it/s] 66%|██████▌ | 244841/371472 [8:55:03<10:31:13, 3.34it/s] 66%|██████▌ | 244842/371472 [8:55:03<10:14:13, 3.44it/s] 66%|██████▌ | 244843/371472 [8:55:03<10:16:25, 3.42it/s] 66%|██████▌ | 244844/371472 [8:55:03<10:21:55, 3.39it/s] 66%|██████▌ | 244845/371472 [8:55:04<10:17:31, 3.42it/s] 66%|██████▌ | 244846/371472 [8:55:04<10:03:28, 3.50it/s] 66%|██████▌ | 244847/371472 [8:55:04<10:17:14, 3.42it/s] 66%|██████▌ | 244848/371472 [8:55:05<10:45:23, 3.27it/s] 66%|██████▌ | 244849/371472 [8:55:05<10:47:15, 3.26it/s] 66%|██████▌ | 244850/371472 [8:55:05<10:30:25, 3.35it/s] 66%|██████▌ | 244851/371472 [8:55:06<10:23:18, 3.39it/s] 66%|██████▌ | 244852/371472 [8:55:06<10:19:05, 3.41it/s] 66%|██████▌ | 244853/371472 [8:55:06<10:05:34, 3.48it/s] 66%|██████▌ | 244854/371472 [8:55:06<9:53:13, 3.56it/s] 66%|██████▌ | 244855/371472 [8:55:07<9:42:26, 3.62it/s] 66%|██████▌ | 244856/371472 [8:55:07<10:15:49, 3.43it/s] 66%|██████▌ | 244857/371472 [8:55:07<10:14:31, 3.43it/s] 66%|██████▌ | 244858/371472 [8:55:08<10:25:32, 3.37it/s] 66%|██████▌ | 244859/371472 [8:55:08<10:19:47, 3.40it/s] 66%|██████▌ | 244860/371472 [8:55:08<11:02:02, 3.19it/s] {'loss': 2.7045, 'learning_rate': 4.0691999396668744e-07, 'epoch': 10.55} + 66%|██████▌ | 244860/371472 [8:55:08<11:02:02, 3.19it/s] 66%|██████▌ | 244861/371472 [8:55:09<10:54:45, 3.22it/s] 66%|██████▌ | 244862/371472 [8:55:09<10:34:38, 3.32it/s] 66%|██████▌ | 244863/371472 [8:55:09<10:11:43, 3.45it/s] 66%|██████▌ | 244864/371472 [8:55:09<10:13:47, 3.44it/s] 66%|██████▌ | 244865/371472 [8:55:10<10:39:43, 3.30it/s] 66%|██████▌ | 244866/371472 [8:55:10<10:28:42, 3.36it/s] 66%|██████▌ | 244867/371472 [8:55:10<10:20:05, 3.40it/s] 66%|██████▌ | 244868/371472 [8:55:11<10:25:44, 3.37it/s] 66%|██████▌ | 244869/371472 [8:55:11<10:29:39, 3.35it/s] 66%|██████▌ | 244870/371472 [8:55:11<10:30:21, 3.35it/s] 66%|██████▌ | 244871/371472 [8:55:11<10:19:14, 3.41it/s] 66%|██████▌ | 244872/371472 [8:55:12<10:21:23, 3.40it/s] 66%|██████▌ | 244873/371472 [8:55:12<10:29:19, 3.35it/s] 66%|██████▌ | 244874/371472 [8:55:12<10:39:48, 3.30it/s] 66%|██████▌ | 244875/371472 [8:55:13<11:22:38, 3.09it/s] 66%|██████▌ | 244876/371472 [8:55:13<10:49:38, 3.25it/s] 66%|██████▌ | 244877/371472 [8:55:13<10:41:00, 3.29it/s] 66%|██████▌ | 244878/371472 [8:55:14<10:53:16, 3.23it/s] 66%|██████▌ | 244879/371472 [8:55:14<10:31:33, 3.34it/s] 66%|██████▌ | 244880/371472 [8:55:14<10:38:49, 3.30it/s] {'loss': 2.6508, 'learning_rate': 4.068715119912086e-07, 'epoch': 10.55} + 66%|██████▌ | 244880/371472 [8:55:14<10:38:49, 3.30it/s] 66%|██████▌ | 244881/371472 [8:55:15<10:29:55, 3.35it/s] 66%|██████▌ | 244882/371472 [8:55:15<10:48:16, 3.25it/s] 66%|██████▌ | 244883/371472 [8:55:15<10:36:23, 3.32it/s] 66%|██████▌ | 244884/371472 [8:55:15<10:27:29, 3.36it/s] 66%|██████▌ | 244885/371472 [8:55:16<10:12:18, 3.45it/s] 66%|██████▌ | 244886/371472 [8:55:16<10:21:34, 3.39it/s] 66%|██████▌ | 244887/371472 [8:55:16<10:27:42, 3.36it/s] 66%|██████▌ | 244888/371472 [8:55:17<10:44:22, 3.27it/s] 66%|██████▌ | 244889/371472 [8:55:17<10:48:07, 3.26it/s] 66%|██████▌ | 244890/371472 [8:55:17<10:27:37, 3.36it/s] 66%|██████▌ | 244891/371472 [8:55:18<10:58:27, 3.20it/s] 66%|██████▌ | 244892/371472 [8:55:18<11:04:31, 3.17it/s] 66%|██████▌ | 244893/371472 [8:55:18<10:56:21, 3.21it/s] 66%|██████▌ | 244894/371472 [8:55:18<10:49:57, 3.25it/s] 66%|██████▌ | 244895/371472 [8:55:19<11:12:27, 3.14it/s] 66%|██████▌ | 244896/371472 [8:55:19<10:53:05, 3.23it/s] 66%|██████▌ | 244897/371472 [8:55:19<10:43:05, 3.28it/s] 66%|██████▌ | 244898/371472 [8:55:20<10:24:20, 3.38it/s] 66%|██████▌ | 244899/371472 [8:55:20<10:27:26, 3.36it/s] 66%|██████▌ | 244900/371472 [8:55:20<10:09:33, 3.46it/s] {'loss': 2.6896, 'learning_rate': 4.0682303001572963e-07, 'epoch': 10.55} + 66%|██████▌ | 244900/371472 [8:55:20<10:09:33, 3.46it/s] 66%|██████▌ | 244901/371472 [8:55:21<10:19:52, 3.40it/s] 66%|██████▌ | 244902/371472 [8:55:21<10:06:48, 3.48it/s] 66%|██████▌ | 244903/371472 [8:55:21<9:56:05, 3.54it/s] 66%|██████▌ | 244904/371472 [8:55:21<10:02:21, 3.50it/s] 66%|██████▌ | 244905/371472 [8:55:22<9:52:37, 3.56it/s] 66%|██████▌ | 244906/371472 [8:55:22<10:06:05, 3.48it/s] 66%|██████▌ | 244907/371472 [8:55:22<10:52:37, 3.23it/s] 66%|██████▌ | 244908/371472 [8:55:23<11:42:02, 3.00it/s] 66%|██████▌ | 244909/371472 [8:55:23<11:40:58, 3.01it/s] 66%|██████▌ | 244910/371472 [8:55:23<11:07:37, 3.16it/s] 66%|██████▌ | 244911/371472 [8:55:24<11:06:18, 3.17it/s] 66%|██████▌ | 244912/371472 [8:55:24<10:40:09, 3.29it/s] 66%|██████▌ | 244913/371472 [8:55:24<10:34:37, 3.32it/s] 66%|██████▌ | 244914/371472 [8:55:25<10:37:03, 3.31it/s] 66%|██████▌ | 244915/371472 [8:55:25<11:02:48, 3.18it/s] 66%|██████▌ | 244916/371472 [8:55:25<11:30:01, 3.06it/s] 66%|██████▌ | 244917/371472 [8:55:25<11:10:04, 3.15it/s] 66%|██████▌ | 244918/371472 [8:55:26<11:17:14, 3.11it/s] 66%|██████▌ | 244919/371472 [8:55:26<10:50:57, 3.24it/s] 66%|██████▌ | 244920/371472 [8:55:26<10:50:38, 3.24it/s] {'loss': 2.7048, 'learning_rate': 4.067745480402508e-07, 'epoch': 10.55} + 66%|██████▌ | 244920/371472 [8:55:26<10:50:38, 3.24it/s] 66%|██████▌ | 244921/371472 [8:55:27<10:38:59, 3.30it/s] 66%|██████▌ | 244922/371472 [8:55:27<10:22:20, 3.39it/s] 66%|██████▌ | 244923/371472 [8:55:27<10:28:08, 3.36it/s] 66%|██████▌ | 244924/371472 [8:55:28<10:40:00, 3.30it/s] 66%|██████▌ | 244925/371472 [8:55:28<11:04:25, 3.17it/s] 66%|██████▌ | 244926/371472 [8:55:28<11:14:22, 3.13it/s] 66%|██████▌ | 244927/371472 [8:55:29<10:37:32, 3.31it/s] 66%|██████▌ | 244928/371472 [8:55:29<10:16:21, 3.42it/s] 66%|██████▌ | 244929/371472 [8:55:29<9:54:11, 3.55it/s] 66%|██████▌ | 244930/371472 [8:55:29<10:38:20, 3.30it/s] 66%|██████▌ | 244931/371472 [8:55:30<10:38:50, 3.30it/s] 66%|██████▌ | 244932/371472 [8:55:30<10:42:45, 3.28it/s] 66%|██████▌ | 244933/371472 [8:55:30<10:27:20, 3.36it/s] 66%|██████▌ | 244934/371472 [8:55:31<10:47:58, 3.25it/s] 66%|██████▌ | 244935/371472 [8:55:31<10:39:32, 3.30it/s] 66%|██████▌ | 244936/371472 [8:55:31<10:50:40, 3.24it/s] 66%|██████▌ | 244937/371472 [8:55:32<10:33:00, 3.33it/s] 66%|██████▌ | 244938/371472 [8:55:32<10:21:29, 3.39it/s] 66%|██████▌ | 244939/371472 [8:55:32<10:17:39, 3.41it/s] 66%|██████▌ | 244940/371472 [8:55:32<10:27:30, 3.36it/s] {'loss': 2.7177, 'learning_rate': 4.067260660647719e-07, 'epoch': 10.55} + 66%|██████▌ | 244940/371472 [8:55:32<10:27:30, 3.36it/s] 66%|██████▌ | 244941/371472 [8:55:33<10:24:34, 3.38it/s] 66%|██████▌ | 244942/371472 [8:55:33<10:17:46, 3.41it/s] 66%|██████▌ | 244943/371472 [8:55:33<11:19:37, 3.10it/s] 66%|██████▌ | 244944/371472 [8:55:34<11:13:22, 3.13it/s] 66%|██████▌ | 244945/371472 [8:55:34<11:28:17, 3.06it/s] 66%|██████▌ | 244946/371472 [8:55:34<11:23:59, 3.08it/s] 66%|██████▌ | 244947/371472 [8:55:35<10:59:01, 3.20it/s] 66%|██████▌ | 244948/371472 [8:55:35<10:41:46, 3.29it/s] 66%|██████▌ | 244949/371472 [8:55:35<10:59:27, 3.20it/s] 66%|██████▌ | 244950/371472 [8:55:36<10:46:36, 3.26it/s] 66%|██████▌ | 244951/371472 [8:55:36<10:54:06, 3.22it/s] 66%|██████▌ | 244952/371472 [8:55:36<10:39:28, 3.30it/s] 66%|██████▌ | 244953/371472 [8:55:36<10:43:06, 3.28it/s] 66%|██████▌ | 244954/371472 [8:55:37<10:44:14, 3.27it/s] 66%|██████▌ | 244955/371472 [8:55:37<10:30:11, 3.35it/s] 66%|██████▌ | 244956/371472 [8:55:37<10:38:43, 3.30it/s] 66%|██████▌ | 244957/371472 [8:55:38<11:16:03, 3.12it/s] 66%|██████▌ | 244958/371472 [8:55:38<10:49:37, 3.25it/s] 66%|██████▌ | 244959/371472 [8:55:38<10:46:36, 3.26it/s] 66%|██████▌ | 244960/371472 [8:55:39<10:58:54, 3.20it/s] {'loss': 2.7456, 'learning_rate': 4.06677584089293e-07, 'epoch': 10.55} + 66%|██████▌ | 244960/371472 [8:55:39<10:58:54, 3.20it/s] 66%|██████▌ | 244961/371472 [8:55:39<10:29:55, 3.35it/s] 66%|██████▌ | 244962/371472 [8:55:39<10:38:47, 3.30it/s] 66%|██████▌ | 244963/371472 [8:55:40<10:26:56, 3.36it/s] 66%|██████▌ | 244964/371472 [8:55:40<10:54:49, 3.22it/s] 66%|██████▌ | 244965/371472 [8:55:40<10:43:29, 3.28it/s] 66%|██████▌ | 244966/371472 [8:55:40<10:23:15, 3.38it/s] 66%|██████▌ | 244967/371472 [8:55:41<10:55:43, 3.22it/s] 66%|██████▌ | 244968/371472 [8:55:41<10:54:30, 3.22it/s] 66%|██████▌ | 244969/371472 [8:55:41<11:20:09, 3.10it/s] 66%|██████▌ | 244970/371472 [8:55:42<11:21:33, 3.09it/s] 66%|██████▌ | 244971/371472 [8:55:42<11:13:09, 3.13it/s] 66%|██████▌ | 244972/371472 [8:55:42<11:01:11, 3.19it/s] 66%|██████▌ | 244973/371472 [8:55:43<10:50:55, 3.24it/s] 66%|██████▌ | 244974/371472 [8:55:43<10:38:58, 3.30it/s] 66%|██████▌ | 244975/371472 [8:55:43<10:51:36, 3.24it/s] 66%|██████▌ | 244976/371472 [8:55:44<10:41:19, 3.29it/s] 66%|██████▌ | 244977/371472 [8:55:44<10:34:15, 3.32it/s] 66%|██████▌ | 244978/371472 [8:55:44<11:53:18, 2.96it/s] 66%|██████▌ | 244979/371472 [8:55:45<11:19:48, 3.10it/s] 66%|██████▌ | 244980/371472 [8:55:45<10:50:23, 3.24it/s] {'loss': 2.6942, 'learning_rate': 4.066291021138141e-07, 'epoch': 10.55} + 66%|██████▌ | 244980/371472 [8:55:45<10:50:23, 3.24it/s] 66%|██████▌ | 244981/371472 [8:55:45<10:36:07, 3.31it/s] 66%|██████▌ | 244982/371472 [8:55:45<10:45:09, 3.27it/s] 66%|██████▌ | 244983/371472 [8:55:46<10:49:51, 3.24it/s] 66%|██████▌ | 244984/371472 [8:55:46<11:00:03, 3.19it/s] 66%|██████▌ | 244985/371472 [8:55:46<11:22:01, 3.09it/s] 66%|██████▌ | 244986/371472 [8:55:47<10:57:37, 3.21it/s] 66%|██████▌ | 244987/371472 [8:55:47<10:33:10, 3.33it/s] 66%|██████▌ | 244988/371472 [8:55:47<10:31:36, 3.34it/s] 66%|██████▌ | 244989/371472 [8:55:48<10:27:00, 3.36it/s] 66%|██████▌ | 244990/371472 [8:55:48<10:34:31, 3.32it/s] 66%|██████▌ | 244991/371472 [8:55:48<10:17:10, 3.42it/s] 66%|██████▌ | 244992/371472 [8:55:48<9:53:24, 3.55it/s] 66%|██████▌ | 244993/371472 [8:55:49<10:24:35, 3.37it/s] 66%|██████▌ | 244994/371472 [8:55:49<11:07:23, 3.16it/s] 66%|██████▌ | 244995/371472 [8:55:49<11:26:38, 3.07it/s] 66%|██████▌ | 244996/371472 [8:55:50<11:07:50, 3.16it/s] 66%|██████▌ | 244997/371472 [8:55:50<10:41:52, 3.28it/s] 66%|██████▌ | 244998/371472 [8:55:50<11:01:28, 3.19it/s] 66%|██████▌ | 244999/371472 [8:55:51<10:46:21, 3.26it/s] 66%|██████▌ | 245000/371472 [8:55:51<10:30:51, 3.34it/s] {'loss': 2.6131, 'learning_rate': 4.0658062013833525e-07, 'epoch': 10.55} + 66%|██████▌ | 245000/371472 [8:55:51<10:30:51, 3.34it/s] 66%|██████▌ | 245001/371472 [8:55:51<11:12:26, 3.13it/s] 66%|██████▌ | 245002/371472 [8:55:52<11:26:13, 3.07it/s] 66%|██████▌ | 245003/371472 [8:55:52<11:06:26, 3.16it/s] 66%|██████▌ | 245004/371472 [8:55:52<10:47:09, 3.26it/s] 66%|██████▌ | 245005/371472 [8:55:53<10:46:07, 3.26it/s] 66%|██████▌ | 245006/371472 [8:55:53<10:40:26, 3.29it/s] 66%|██████▌ | 245007/371472 [8:55:53<10:50:51, 3.24it/s] 66%|██████▌ | 245008/371472 [8:55:53<10:59:19, 3.20it/s] 66%|██████▌ | 245009/371472 [8:55:54<11:25:17, 3.08it/s] 66%|██████▌ | 245010/371472 [8:55:54<11:14:11, 3.13it/s] 66%|██████▌ | 245011/371472 [8:55:54<10:57:17, 3.21it/s] 66%|██████▌ | 245012/371472 [8:55:55<10:56:10, 3.21it/s] 66%|██████▌ | 245013/371472 [8:55:55<10:43:01, 3.28it/s] 66%|██████▌ | 245014/371472 [8:55:55<10:38:20, 3.30it/s] 66%|██████▌ | 245015/371472 [8:55:56<10:34:47, 3.32it/s] 66%|██████▌ | 245016/371472 [8:55:56<10:23:23, 3.38it/s] 66%|██████▌ | 245017/371472 [8:55:56<10:26:27, 3.36it/s] 66%|██████▌ | 245018/371472 [8:55:56<10:27:19, 3.36it/s] 66%|██████▌ | 245019/371472 [8:55:57<10:25:32, 3.37it/s] 66%|██████▌ | 245020/371472 [8:55:57<10:14:11, 3.43it/s] {'loss': 2.7857, 'learning_rate': 4.0653213816285627e-07, 'epoch': 10.55} + 66%|██████▌ | 245020/371472 [8:55:57<10:14:11, 3.43it/s] 66%|██████▌ | 245021/371472 [8:55:57<10:22:18, 3.39it/s] 66%|██████▌ | 245022/371472 [8:55:58<10:24:41, 3.37it/s] 66%|██████▌ | 245023/371472 [8:55:58<10:08:17, 3.46it/s] 66%|██████▌ | 245024/371472 [8:55:58<10:05:31, 3.48it/s] 66%|██████▌ | 245025/371472 [8:55:59<10:21:54, 3.39it/s] 66%|██████▌ | 245026/371472 [8:55:59<10:08:56, 3.46it/s] 66%|██████▌ | 245027/371472 [8:55:59<10:07:35, 3.47it/s] 66%|██████▌ | 245028/371472 [8:55:59<10:37:59, 3.30it/s] 66%|██████▌ | 245029/371472 [8:56:00<10:29:19, 3.35it/s] 66%|██████▌ | 245030/371472 [8:56:00<10:27:46, 3.36it/s] 66%|██████▌ | 245031/371472 [8:56:00<10:25:31, 3.37it/s] 66%|██████▌ | 245032/371472 [8:56:01<10:09:08, 3.46it/s] 66%|██████▌ | 245033/371472 [8:56:01<10:16:16, 3.42it/s] 66%|██████▌ | 245034/371472 [8:56:01<9:56:01, 3.54it/s] 66%|██████▌ | 245035/371472 [8:56:01<9:44:54, 3.60it/s] 66%|██████▌ | 245036/371472 [8:56:02<9:56:40, 3.53it/s] 66%|██████▌ | 245037/371472 [8:56:02<10:34:25, 3.32it/s] 66%|██████▌ | 245038/371472 [8:56:02<11:19:09, 3.10it/s] 66%|██████▌ | 245039/371472 [8:56:03<10:45:41, 3.26it/s] 66%|██████▌ | 245040/371472 [8:56:03<11:02:10, 3.18it/s] {'loss': 2.716, 'learning_rate': 4.0648365618737745e-07, 'epoch': 10.55} + 66%|██████▌ | 245040/371472 [8:56:03<11:02:10, 3.18it/s] 66%|██████▌ | 245041/371472 [8:56:03<10:56:13, 3.21it/s] 66%|██████▌ | 245042/371472 [8:56:04<10:47:22, 3.25it/s] 66%|██████▌ | 245043/371472 [8:56:04<10:36:29, 3.31it/s] 66%|██████▌ | 245044/371472 [8:56:04<10:20:45, 3.39it/s] 66%|██████▌ | 245045/371472 [8:56:04<10:03:18, 3.49it/s] 66%|██████▌ | 245046/371472 [8:56:05<10:29:12, 3.35it/s] 66%|██████▌ | 245047/371472 [8:56:05<10:13:25, 3.43it/s] 66%|██████▌ | 245048/371472 [8:56:05<10:17:02, 3.41it/s] 66%|██████▌ | 245049/371472 [8:56:06<10:30:19, 3.34it/s] 66%|██████▌ | 245050/371472 [8:56:06<10:32:21, 3.33it/s] 66%|██████▌ | 245051/371472 [8:56:06<10:35:03, 3.32it/s] 66%|██████▌ | 245052/371472 [8:56:07<10:17:09, 3.41it/s] 66%|██████▌ | 245053/371472 [8:56:07<10:21:59, 3.39it/s] 66%|██████▌ | 245054/371472 [8:56:07<9:57:03, 3.53it/s] 66%|██████▌ | 245055/371472 [8:56:07<10:06:49, 3.47it/s] 66%|██████▌ | 245056/371472 [8:56:08<10:05:42, 3.48it/s] 66%|██████▌ | 245057/371472 [8:56:08<9:51:14, 3.56it/s] 66%|██████▌ | 245058/371472 [8:56:08<9:49:12, 3.58it/s] 66%|██████▌ | 245059/371472 [8:56:09<10:05:21, 3.48it/s] 66%|██████▌ | 245060/371472 [8:56:09<10:31:35, 3.34it/s] {'loss': 2.7965, 'learning_rate': 4.064351742118985e-07, 'epoch': 10.56} + 66%|██████▌ | 245060/371472 [8:56:09<10:31:35, 3.34it/s] 66%|██████▌ | 245061/371472 [8:56:09<10:23:26, 3.38it/s] 66%|██████▌ | 245062/371472 [8:56:09<10:37:55, 3.30it/s] 66%|██████▌ | 245063/371472 [8:56:10<11:29:54, 3.05it/s] 66%|██████▌ | 245064/371472 [8:56:10<11:11:05, 3.14it/s] 66%|██████▌ | 245065/371472 [8:56:10<10:58:58, 3.20it/s] 66%|██████▌ | 245066/371472 [8:56:11<10:42:55, 3.28it/s] 66%|██████▌ | 245067/371472 [8:56:11<10:21:51, 3.39it/s] 66%|██████▌ | 245068/371472 [8:56:11<10:33:46, 3.32it/s] 66%|██████▌ | 245069/371472 [8:56:12<10:29:16, 3.35it/s] 66%|██████▌ | 245070/371472 [8:56:12<10:58:39, 3.20it/s] 66%|██████▌ | 245071/371472 [8:56:12<11:11:51, 3.14it/s] 66%|██████▌ | 245072/371472 [8:56:13<10:47:02, 3.26it/s] 66%|██████▌ | 245073/371472 [8:56:13<10:34:43, 3.32it/s] 66%|██████▌ | 245074/371472 [8:56:13<10:29:51, 3.34it/s] 66%|██████▌ | 245075/371472 [8:56:13<10:35:16, 3.32it/s] 66%|██████▌ | 245076/371472 [8:56:14<10:21:35, 3.39it/s] 66%|██████▌ | 245077/371472 [8:56:14<10:11:44, 3.44it/s] 66%|██████▌ | 245078/371472 [8:56:14<10:03:18, 3.49it/s] 66%|██████▌ | 245079/371472 [8:56:15<10:10:00, 3.45it/s] 66%|██████▌ | 245080/371472 [8:56:15<10:43:18, 3.27it/s] {'loss': 2.7223, 'learning_rate': 4.0638669223641965e-07, 'epoch': 10.56} + 66%|██████▌ | 245080/371472 [8:56:15<10:43:18, 3.27it/s] 66%|██████▌ | 245081/371472 [8:56:15<11:10:28, 3.14it/s] 66%|██████▌ | 245082/371472 [8:56:16<10:48:59, 3.25it/s] 66%|██████▌ | 245083/371472 [8:56:16<11:05:41, 3.16it/s] 66%|██████▌ | 245084/371472 [8:56:16<10:45:40, 3.26it/s] 66%|██████▌ | 245085/371472 [8:56:16<10:31:15, 3.34it/s] 66%|██████▌ | 245086/371472 [8:56:17<10:36:57, 3.31it/s] 66%|██████▌ | 245087/371472 [8:56:17<10:44:11, 3.27it/s] 66%|██████▌ | 245088/371472 [8:56:17<10:51:00, 3.24it/s] 66%|██████▌ | 245089/371472 [8:56:18<10:50:26, 3.24it/s] 66%|██████▌ | 245090/371472 [8:56:18<10:42:47, 3.28it/s] 66%|██████▌ | 245091/371472 [8:56:18<11:32:12, 3.04it/s] 66%|██████▌ | 245092/371472 [8:56:19<11:56:08, 2.94it/s] 66%|██████▌ | 245093/371472 [8:56:19<11:31:01, 3.05it/s] 66%|██████▌ | 245094/371472 [8:56:19<11:14:20, 3.12it/s] 66%|██████▌ | 245095/371472 [8:56:20<11:11:19, 3.14it/s] 66%|██████▌ | 245096/371472 [8:56:20<10:51:06, 3.23it/s] 66%|██████▌ | 245097/371472 [8:56:20<10:41:24, 3.28it/s] 66%|██████▌ | 245098/371472 [8:56:21<10:32:52, 3.33it/s] 66%|██████▌ | 245099/371472 [8:56:21<10:59:17, 3.19it/s] 66%|██████▌ | 245100/371472 [8:56:21<11:31:02, 3.05it/s] {'loss': 2.716, 'learning_rate': 4.063382102609407e-07, 'epoch': 10.56} + 66%|██████▌ | 245100/371472 [8:56:21<11:31:02, 3.05it/s] 66%|██████▌ | 245101/371472 [8:56:22<11:08:59, 3.15it/s] 66%|██████▌ | 245102/371472 [8:56:22<10:58:43, 3.20it/s] 66%|██████▌ | 245103/371472 [8:56:22<10:39:57, 3.29it/s] 66%|██████▌ | 245104/371472 [8:56:22<10:36:01, 3.31it/s] 66%|██████▌ | 245105/371472 [8:56:23<10:17:10, 3.41it/s] 66%|██████▌ | 245106/371472 [8:56:23<10:17:21, 3.41it/s] 66%|██████▌ | 245107/371472 [8:56:23<10:18:33, 3.40it/s] 66%|██████▌ | 245108/371472 [8:56:24<10:00:38, 3.51it/s] 66%|██████▌ | 245109/371472 [8:56:24<10:14:10, 3.43it/s] 66%|██████▌ | 245110/371472 [8:56:24<9:56:04, 3.53it/s] 66%|██████▌ | 245111/371472 [8:56:24<10:11:12, 3.45it/s] 66%|██████▌ | 245112/371472 [8:56:25<10:02:59, 3.49it/s] 66%|██████▌ | 245113/371472 [8:56:25<10:09:49, 3.45it/s] 66%|██████▌ | 245114/371472 [8:56:25<10:10:56, 3.45it/s] 66%|██████▌ | 245115/371472 [8:56:26<10:01:50, 3.50it/s] 66%|██████▌ | 245116/371472 [8:56:26<10:14:47, 3.43it/s] 66%|██████▌ | 245117/371472 [8:56:26<11:24:44, 3.08it/s] 66%|██████▌ | 245118/371472 [8:56:27<11:10:51, 3.14it/s] 66%|██████▌ | 245119/371472 [8:56:27<10:43:53, 3.27it/s] 66%|██████▌ | 245120/371472 [8:56:27<10:27:32, 3.36it/s] {'loss': 2.6, 'learning_rate': 4.062897282854619e-07, 'epoch': 10.56} + 66%|██████▌ | 245120/371472 [8:56:27<10:27:32, 3.36it/s] 66%|██████▌ | 245121/371472 [8:56:27<10:48:33, 3.25it/s] 66%|██████▌ | 245122/371472 [8:56:28<11:26:15, 3.07it/s] 66%|██████▌ | 245123/371472 [8:56:28<10:54:28, 3.22it/s] 66%|██████▌ | 245124/371472 [8:56:28<10:51:20, 3.23it/s] 66%|██████▌ | 245125/371472 [8:56:29<10:43:51, 3.27it/s] 66%|██████▌ | 245126/371472 [8:56:29<10:59:23, 3.19it/s] 66%|██████▌ | 245127/371472 [8:56:29<10:30:34, 3.34it/s] 66%|██████▌ | 245128/371472 [8:56:30<10:03:05, 3.49it/s] 66%|██████▌ | 245129/371472 [8:56:30<10:16:09, 3.42it/s] 66%|██████▌ | 245130/371472 [8:56:30<9:58:16, 3.52it/s] 66%|██████▌ | 245131/371472 [8:56:30<10:13:35, 3.43it/s] 66%|██████▌ | 245132/371472 [8:56:31<10:45:19, 3.26it/s] 66%|██████▌ | 245133/371472 [8:56:31<11:00:07, 3.19it/s] 66%|██████▌ | 245134/371472 [8:56:31<10:53:23, 3.22it/s] 66%|██████▌ | 245135/371472 [8:56:32<10:44:11, 3.27it/s] 66%|██████▌ | 245136/371472 [8:56:32<10:26:28, 3.36it/s] 66%|██████▌ | 245137/371472 [8:56:32<10:14:57, 3.42it/s] 66%|██████▌ | 245138/371472 [8:56:33<10:15:24, 3.42it/s] 66%|██████▌ | 245139/371472 [8:56:33<10:04:34, 3.48it/s] 66%|██████▌ | 245140/371472 [8:56:33<10:19:49, 3.40it/s] {'loss': 2.5992, 'learning_rate': 4.0624124630998297e-07, 'epoch': 10.56} + 66%|██████▌ | 245140/371472 [8:56:33<10:19:49, 3.40it/s] 66%|██████▌ | 245141/371472 [8:56:33<10:31:25, 3.33it/s] 66%|██████▌ | 245142/371472 [8:56:34<10:17:24, 3.41it/s] 66%|██████▌ | 245143/371472 [8:56:34<10:25:09, 3.37it/s] 66%|██████▌ | 245144/371472 [8:56:34<10:16:05, 3.42it/s] 66%|██████▌ | 245145/371472 [8:56:35<10:15:59, 3.42it/s] 66%|██████▌ | 245146/371472 [8:56:35<10:01:32, 3.50it/s] 66%|██████▌ | 245147/371472 [8:56:35<9:53:21, 3.55it/s] 66%|██████▌ | 245148/371472 [8:56:35<10:07:59, 3.46it/s] 66%|██████▌ | 245149/371472 [8:56:36<10:43:37, 3.27it/s] 66%|██████▌ | 245150/371472 [8:56:36<10:22:55, 3.38it/s] 66%|██████▌ | 245151/371472 [8:56:36<10:37:24, 3.30it/s] 66%|██████▌ | 245152/371472 [8:56:37<10:58:14, 3.20it/s] 66%|██████▌ | 245153/371472 [8:56:37<10:56:41, 3.21it/s] 66%|██████▌ | 245154/371472 [8:56:37<10:55:47, 3.21it/s] 66%|██████▌ | 245155/371472 [8:56:38<10:53:07, 3.22it/s] 66%|██████▌ | 245156/371472 [8:56:38<10:23:37, 3.38it/s] 66%|██████▌ | 245157/371472 [8:56:38<10:03:17, 3.49it/s] 66%|██████▌ | 245158/371472 [8:56:39<10:02:26, 3.49it/s] 66%|██████▌ | 245159/371472 [8:56:39<10:07:22, 3.47it/s] 66%|██████▌ | 245160/371472 [8:56:39<9:54:50, 3.54it/s] {'loss': 2.8501, 'learning_rate': 4.061927643345041e-07, 'epoch': 10.56} + 66%|██████▌ | 245160/371472 [8:56:39<9:54:50, 3.54it/s] 66%|██████▌ | 245161/371472 [8:56:39<10:04:10, 3.48it/s] 66%|██████▌ | 245162/371472 [8:56:40<10:21:23, 3.39it/s] 66%|██████▌ | 245163/371472 [8:56:40<10:18:31, 3.40it/s] 66%|██████▌ | 245164/371472 [8:56:40<10:28:19, 3.35it/s] 66%|██████▌ | 245165/371472 [8:56:41<13:22:51, 2.62it/s] 66%|██████▌ | 245166/371472 [8:56:41<12:19:09, 2.85it/s] 66%|██████▌ | 245167/371472 [8:56:41<12:01:30, 2.92it/s] 66%|██████▌ | 245168/371472 [8:56:42<11:28:00, 3.06it/s] 66%|██████▌ | 245169/371472 [8:56:42<11:38:08, 3.02it/s] 66%|██████▌ | 245170/371472 [8:56:42<11:24:10, 3.08it/s] 66%|██████▌ | 245171/371472 [8:56:43<10:54:04, 3.22it/s] 66%|██████▌ | 245172/371472 [8:56:43<11:48:30, 2.97it/s] 66%|██████▌ | 245173/371472 [8:56:43<11:12:04, 3.13it/s] 66%|██████▌ | 245174/371472 [8:56:44<11:35:43, 3.03it/s] 66%|██████▌ | 245175/371472 [8:56:44<11:02:30, 3.18it/s] 66%|██████▌ | 245176/371472 [8:56:44<10:26:31, 3.36it/s] 66%|██████▌ | 245177/371472 [8:56:45<10:03:51, 3.49it/s] 66%|██████▌ | 245178/371472 [8:56:45<9:53:06, 3.55it/s] 66%|██████▌ | 245179/371472 [8:56:45<9:58:28, 3.52it/s] 66%|██████▌ | 245180/371472 [8:56:45<10:04:02, 3.48it/s] {'loss': 2.7108, 'learning_rate': 4.0614428235902516e-07, 'epoch': 10.56} + 66%|██████▌ | 245180/371472 [8:56:45<10:04:02, 3.48it/s] 66%|██████▌ | 245181/371472 [8:56:46<10:15:40, 3.42it/s] 66%|██████▌ | 245182/371472 [8:56:46<10:07:20, 3.47it/s] 66%|██████▌ | 245183/371472 [8:56:46<9:51:57, 3.56it/s] 66%|██████▌ | 245184/371472 [8:56:47<10:09:30, 3.45it/s] 66%|██████▌ | 245185/371472 [8:56:47<10:08:48, 3.46it/s] 66%|██████▌ | 245186/371472 [8:56:47<10:08:03, 3.46it/s] 66%|██████▌ | 245187/371472 [8:56:47<10:33:26, 3.32it/s] 66%|██████▌ | 245188/371472 [8:56:48<10:40:10, 3.29it/s] 66%|██████▌ | 245189/371472 [8:56:48<10:32:10, 3.33it/s] 66%|██████▌ | 245190/371472 [8:56:48<10:41:43, 3.28it/s] 66%|██████▌ | 245191/371472 [8:56:49<10:38:12, 3.30it/s] 66%|██████▌ | 245192/371472 [8:56:49<10:30:20, 3.34it/s] 66%|██████▌ | 245193/371472 [8:56:49<10:30:54, 3.34it/s] 66%|██████▌ | 245194/371472 [8:56:50<10:31:35, 3.33it/s] 66%|██████▌ | 245195/371472 [8:56:50<10:25:08, 3.37it/s] 66%|██████▌ | 245196/371472 [8:56:50<10:29:33, 3.34it/s] 66%|██████▌ | 245197/371472 [8:56:50<10:33:14, 3.32it/s] 66%|██████▌ | 245198/371472 [8:56:51<10:50:53, 3.23it/s] 66%|██████▌ | 245199/371472 [8:56:51<10:46:32, 3.26it/s] 66%|██████▌ | 245200/371472 [8:56:51<10:28:30, 3.35it/s] {'loss': 2.8172, 'learning_rate': 4.0609580038354634e-07, 'epoch': 10.56} + 66%|██████▌ | 245200/371472 [8:56:51<10:28:30, 3.35it/s] 66%|██████▌ | 245201/371472 [8:56:52<10:30:57, 3.34it/s] 66%|██████▌ | 245202/371472 [8:56:52<10:35:37, 3.31it/s] 66%|██████▌ | 245203/371472 [8:56:52<10:15:32, 3.42it/s] 66%|██████▌ | 245204/371472 [8:56:53<10:08:43, 3.46it/s] 66%|██████▌ | 245205/371472 [8:56:53<10:09:08, 3.45it/s] 66%|██████▌ | 245206/371472 [8:56:53<9:58:12, 3.52it/s] 66%|██████▌ | 245207/371472 [8:56:53<9:58:17, 3.52it/s] 66%|██████▌ | 245208/371472 [8:56:54<10:31:39, 3.33it/s] 66%|██████▌ | 245209/371472 [8:56:54<10:22:41, 3.38it/s] 66%|██████▌ | 245210/371472 [8:56:54<10:35:43, 3.31it/s] 66%|██████▌ | 245211/371472 [8:56:55<10:41:15, 3.28it/s] 66%|██████▌ | 245212/371472 [8:56:55<10:14:49, 3.42it/s] 66%|██████▌ | 245213/371472 [8:56:55<10:01:12, 3.50it/s] 66%|██████▌ | 245214/371472 [8:56:55<10:35:47, 3.31it/s] 66%|██████▌ | 245215/371472 [8:56:56<11:03:22, 3.17it/s] 66%|██████▌ | 245216/371472 [8:56:56<10:57:48, 3.20it/s] 66%|██████▌ | 245217/371472 [8:56:56<10:53:37, 3.22it/s] 66%|██████▌ | 245218/371472 [8:56:57<17:09:20, 2.04it/s] 66%|██████▌ | 245219/371472 [8:56:58<14:58:11, 2.34it/s] 66%|██████▌ | 245220/371472 [8:56:58<13:29:17, 2.60it/s] {'loss': 2.6279, 'learning_rate': 4.060473184080674e-07, 'epoch': 10.56} + 66%|██████▌ | 245220/371472 [8:56:58<13:29:17, 2.60it/s] 66%|██��███▌ | 245221/371472 [8:56:58<13:27:10, 2.61it/s] 66%|██████▌ | 245222/371472 [8:56:59<12:17:33, 2.85it/s] 66%|██████▌ | 245223/371472 [8:56:59<12:00:42, 2.92it/s] 66%|██████▌ | 245224/371472 [8:56:59<11:18:36, 3.10it/s] 66%|██████▌ | 245225/371472 [8:56:59<11:17:14, 3.11it/s] 66%|██████▌ | 245226/371472 [8:57:00<10:50:19, 3.24it/s] 66%|██████▌ | 245227/371472 [8:57:00<10:51:11, 3.23it/s] 66%|██████▌ | 245228/371472 [8:57:00<10:32:49, 3.32it/s] 66%|██████▌ | 245229/371472 [8:57:01<10:19:39, 3.40it/s] 66%|██████▌ | 245230/371472 [8:57:01<10:28:25, 3.35it/s] 66%|██████▌ | 245231/371472 [8:57:01<10:27:49, 3.35it/s] 66%|██████▌ | 245232/371472 [8:57:02<10:13:37, 3.43it/s] 66%|██████▌ | 245233/371472 [8:57:02<10:10:22, 3.45it/s] 66%|██████▌ | 245234/371472 [8:57:02<10:13:14, 3.43it/s] 66%|██████▌ | 245235/371472 [8:57:02<10:26:53, 3.36it/s] 66%|██████▌ | 245236/371472 [8:57:03<10:16:58, 3.41it/s] 66%|██████▌ | 245237/371472 [8:57:03<10:13:59, 3.43it/s] 66%|██████▌ | 245238/371472 [8:57:03<10:12:45, 3.43it/s] 66%|██████▌ | 245239/371472 [8:57:04<10:13:00, 3.43it/s] 66%|██████▌ | 245240/371472 [8:57:04<10:06:05, 3.47it/s] {'loss': 2.6777, 'learning_rate': 4.0599883643258854e-07, 'epoch': 10.56} + 66%|██████▌ | 245240/371472 [8:57:04<10:06:05, 3.47it/s] 66%|██████▌ | 245241/371472 [8:57:04<9:55:55, 3.53it/s] 66%|██████▌ | 245242/371472 [8:57:04<10:29:23, 3.34it/s] 66%|██████▌ | 245243/371472 [8:57:05<10:11:40, 3.44it/s] 66%|██████▌ | 245244/371472 [8:57:05<10:07:28, 3.46it/s] 66%|██████▌ | 245245/371472 [8:57:05<10:11:41, 3.44it/s] 66%|██████▌ | 245246/371472 [8:57:06<10:14:01, 3.43it/s] 66%|██████▌ | 245247/371472 [8:57:06<10:04:20, 3.48it/s] 66%|██████▌ | 245248/371472 [8:57:06<10:17:49, 3.41it/s] 66%|██████▌ | 245249/371472 [8:57:06<10:13:45, 3.43it/s] 66%|██████▌ | 245250/371472 [8:57:07<10:01:29, 3.50it/s] 66%|██████▌ | 245251/371472 [8:57:07<10:13:54, 3.43it/s] 66%|██████▌ | 245252/371472 [8:57:07<10:10:17, 3.45it/s] 66%|██████▌ | 245253/371472 [8:57:08<10:40:22, 3.29it/s] 66%|██████▌ | 245254/371472 [8:57:08<10:45:12, 3.26it/s] 66%|██████▌ | 245255/371472 [8:57:08<10:43:54, 3.27it/s] 66%|██████▌ | 245256/371472 [8:57:09<10:49:05, 3.24it/s] 66%|██████▌ | 245257/371472 [8:57:09<11:00:37, 3.18it/s] 66%|██████▌ | 245258/371472 [8:57:09<11:18:49, 3.10it/s] 66%|██████▌ | 245259/371472 [8:57:10<10:55:18, 3.21it/s] 66%|██████▌ | 245260/371472 [8:57:10<10:35:53, 3.31it/s] {'loss': 2.8771, 'learning_rate': 4.059503544571096e-07, 'epoch': 10.56} + 66%|██████▌ | 245260/371472 [8:57:10<10:35:53, 3.31it/s] 66%|██████▌ | 245261/371472 [8:57:10<10:27:19, 3.35it/s] 66%|██████▌ | 245262/371472 [8:57:10<10:19:14, 3.40it/s] 66%|██████▌ | 245263/371472 [8:57:11<10:14:54, 3.42it/s] 66%|██████▌ | 245264/371472 [8:57:11<10:19:59, 3.39it/s] 66%|██████▌ | 245265/371472 [8:57:11<10:06:13, 3.47it/s] 66%|██████▌ | 245266/371472 [8:57:12<10:46:27, 3.25it/s] 66%|██████▌ | 245267/371472 [8:57:12<10:32:11, 3.33it/s] 66%|██████▌ | 245268/371472 [8:57:12<10:16:51, 3.41it/s] 66%|██████▌ | 245269/371472 [8:57:12<10:15:52, 3.42it/s] 66%|██████▌ | 245270/371472 [8:57:13<10:25:48, 3.36it/s] 66%|██████▌ | 245271/371472 [8:57:13<10:51:07, 3.23it/s] 66%|██████▌ | 245272/371472 [8:57:13<10:25:59, 3.36it/s] 66%|██████▌ | 245273/371472 [8:57:14<10:51:44, 3.23it/s] 66%|██████▌ | 245274/371472 [8:57:14<10:32:08, 3.33it/s] 66%|██████▌ | 245275/371472 [8:57:14<10:24:00, 3.37it/s] 66%|██████▌ | 245276/371472 [8:57:15<10:03:04, 3.49it/s] 66%|██████▌ | 245277/371472 [8:57:15<10:11:13, 3.44it/s] 66%|██████▌ | 245278/371472 [8:57:15<9:59:28, 3.51it/s] 66%|██████▌ | 245279/371472 [8:57:15<9:46:45, 3.58it/s] 66%|██████▌ | 245280/371472 [8:57:16<9:54:46, 3.54it/s] {'loss': 2.6919, 'learning_rate': 4.0590187248163073e-07, 'epoch': 10.56} + 66%|██████▌ | 245280/371472 [8:57:16<9:54:46, 3.54it/s] 66%|██████▌ | 245281/371472 [8:57:16<9:57:15, 3.52it/s] 66%|██████▌ | 245282/371472 [8:57:16<9:58:48, 3.51it/s] 66%|██████▌ | 245283/371472 [8:57:17<9:56:13, 3.53it/s] 66%|██████▌ | 245284/371472 [8:57:17<10:33:34, 3.32it/s] 66%|██████▌ | 245285/371472 [8:57:17<10:47:25, 3.25it/s] 66%|██████▌ | 245286/371472 [8:57:18<11:03:08, 3.17it/s] 66%|██████▌ | 245287/371472 [8:57:18<11:14:17, 3.12it/s] 66%|██████▌ | 245288/371472 [8:57:18<10:42:39, 3.27it/s] 66%|██████▌ | 245289/371472 [8:57:19<11:59:38, 2.92it/s] 66%|██████▌ | 245290/371472 [8:57:19<11:23:03, 3.08it/s] 66%|██████▌ | 245291/371472 [8:57:19<10:56:59, 3.20it/s] 66%|██████▌ | 245292/371472 [8:57:19<10:22:42, 3.38it/s] 66%|██████▌ | 245293/371472 [8:57:20<10:17:58, 3.40it/s] 66%|██████▌ | 245294/371472 [8:57:20<10:54:07, 3.21it/s] 66%|██████▌ | 245295/371472 [8:57:20<11:50:23, 2.96it/s] 66%|██████▌ | 245296/371472 [8:57:21<11:30:52, 3.04it/s] 66%|██████▌ | 245297/371472 [8:57:21<11:00:47, 3.18it/s] 66%|██████▌ | 245298/371472 [8:57:21<10:51:50, 3.23it/s] 66%|██████▌ | 245299/371472 [8:57:22<10:29:26, 3.34it/s] 66%|██████▌ | 245300/371472 [8:57:22<10:12:38, 3.43it/s] {'loss': 2.78, 'learning_rate': 4.058533905061518e-07, 'epoch': 10.57} + 66%|██████▌ | 245300/371472 [8:57:22<10:12:38, 3.43it/s] 66%|██████▌ | 245301/371472 [8:57:22<10:19:34, 3.39it/s] 66%|██████▌ | 245302/371472 [8:57:22<10:13:58, 3.42it/s] 66%|██████▌ | 245303/371472 [8:57:23<9:56:39, 3.52it/s] 66%|██████▌ | 245304/371472 [8:57:23<9:54:04, 3.54it/s] 66%|██████▌ | 245305/371472 [8:57:23<9:37:58, 3.64it/s] 66%|██████▌ | 245306/371472 [8:57:24<9:51:09, 3.56it/s] 66%|██████▌ | 245307/371472 [8:57:24<9:48:11, 3.57it/s] 66%|██████▌ | 245308/371472 [8:57:24<9:55:55, 3.53it/s] 66%|██████▌ | 245309/371472 [8:57:24<9:46:03, 3.59it/s] 66%|██████▌ | 245310/371472 [8:57:25<10:11:34, 3.44it/s] 66%|██████▌ | 245311/371472 [8:57:25<10:07:10, 3.46it/s] 66%|██████▌ | 245312/371472 [8:57:25<11:21:10, 3.09it/s] 66%|██████▌ | 245313/371472 [8:57:26<11:15:13, 3.11it/s] 66%|██████▌ | 245314/371472 [8:57:26<10:53:18, 3.22it/s] 66%|██████▌ | 245315/371472 [8:57:26<10:50:22, 3.23it/s] 66%|██████▌ | 245316/371472 [8:57:27<10:35:16, 3.31it/s] 66%|██████▌ | 245317/371472 [8:57:27<10:36:48, 3.30it/s] 66%|██████▌ | 245318/371472 [8:57:27<10:49:52, 3.24it/s] 66%|██████▌ | 245319/371472 [8:57:28<11:08:58, 3.14it/s] 66%|██████▌ | 245320/371472 [8:57:28<10:55:30, 3.21it/s] {'loss': 2.6916, 'learning_rate': 4.058049085306729e-07, 'epoch': 10.57} + 66%|██████▌ | 245320/371472 [8:57:28<10:55:30, 3.21it/s] 66%|██████▌ | 245321/371472 [8:57:28<10:42:28, 3.27it/s] 66%|██████▌ | 245322/371472 [8:57:28<10:34:02, 3.32it/s] 66%|██████▌ | 245323/371472 [8:57:29<10:41:22, 3.28it/s] 66%|██████▌ | 245324/371472 [8:57:29<10:28:25, 3.35it/s] 66%|██████▌ | 245325/371472 [8:57:29<10:48:01, 3.24it/s] 66%|██████▌ | 245326/371472 [8:57:30<13:01:24, 2.69it/s] 66%|██████▌ | 245327/371472 [8:57:30<12:05:04, 2.90it/s] 66%|██████▌ | 245328/371472 [8:57:30<11:28:46, 3.05it/s] 66%|██████▌ | 245329/371472 [8:57:31<10:58:01, 3.20it/s] 66%|██████▌ | 245330/371472 [8:57:31<10:41:40, 3.28it/s] 66%|██████▌ | 245331/371472 [8:57:31<10:24:39, 3.37it/s] 66%|██████▌ | 245332/371472 [8:57:32<10:33:57, 3.32it/s] 66%|██████▌ | 245333/371472 [8:57:32<10:35:50, 3.31it/s] 66%|██████▌ | 245334/371472 [8:57:32<10:31:07, 3.33it/s] 66%|██████▌ | 245335/371472 [8:57:32<10:14:23, 3.42it/s] 66%|██████▌ | 245336/371472 [8:57:33<10:07:53, 3.46it/s] 66%|██████▌ | 245337/371472 [8:57:33<10:11:20, 3.44it/s] 66%|██████▌ | 245338/371472 [8:57:33<10:27:15, 3.35it/s] 66%|██████▌ | 245339/371472 [8:57:34<10:21:51, 3.38it/s] 66%|██████▌ | 245340/371472 [8:57:34<10:23:26, 3.37it/s] {'loss': 2.6583, 'learning_rate': 4.05756426555194e-07, 'epoch': 10.57} + 66%|██████▌ | 245340/371472 [8:57:34<10:23:26, 3.37it/s] 66%|██████▌ | 245341/371472 [8:57:34<10:38:15, 3.29it/s] 66%|██████▌ | 245342/371472 [8:57:35<11:11:40, 3.13it/s] 66%|██████▌ | 245343/371472 [8:57:35<10:46:27, 3.25it/s] 66%|██████▌ | 245344/371472 [8:57:35<10:50:22, 3.23it/s] 66%|██████▌ | 245345/371472 [8:57:36<10:32:32, 3.32it/s] 66%|██████▌ | 245346/371472 [8:57:36<10:44:18, 3.26it/s] 66%|██████▌ | 245347/371472 [8:57:36<10:22:14, 3.38it/s] 66%|██████▌ | 245348/371472 [8:57:36<10:11:07, 3.44it/s] 66%|██████▌ | 245349/371472 [8:57:37<10:14:16, 3.42it/s] 66%|██████▌ | 245350/371472 [8:57:37<10:22:22, 3.38it/s] 66%|██████▌ | 245351/371472 [8:57:37<11:34:56, 3.02it/s] 66%|██████▌ | 245352/371472 [8:57:38<11:12:03, 3.13it/s] 66%|██████▌ | 245353/371472 [8:57:38<10:36:37, 3.30it/s] 66%|██████▌ | 245354/371472 [8:57:38<11:13:11, 3.12it/s] 66%|██████▌ | 245355/371472 [8:57:39<10:43:43, 3.27it/s] 66%|██████▌ | 245356/371472 [8:57:39<10:54:59, 3.21it/s] 66%|██████▌ | 245357/371472 [8:57:39<10:59:27, 3.19it/s] 66%|██████▌ | 245358/371472 [8:57:40<10:34:37, 3.31it/s] 66%|██████▌ | 245359/371472 [8:57:40<10:44:54, 3.26it/s] 66%|██████▌ | 245360/371472 [8:57:40<10:36:45, 3.30it/s] {'loss': 2.6407, 'learning_rate': 4.0570794457971507e-07, 'epoch': 10.57} + 66%|██████▌ | 245360/371472 [8:57:40<10:36:45, 3.30it/s] 66%|██████▌ | 245361/371472 [8:57:41<11:20:22, 3.09it/s] 66%|██████▌ | 245362/371472 [8:57:41<11:53:25, 2.95it/s] 66%|██████▌ | 245363/371472 [8:57:41<11:23:03, 3.08it/s] 66%|██████▌ | 245364/371472 [8:57:41<11:04:22, 3.16it/s] 66%|██████▌ | 245365/371472 [8:57:42<10:43:34, 3.27it/s] 66%|██████▌ | 245366/371472 [8:57:42<10:26:16, 3.36it/s] 66%|██████▌ | 245367/371472 [8:57:42<10:17:30, 3.40it/s] 66%|██████▌ | 245368/371472 [8:57:43<10:07:06, 3.46it/s] 66%|██████▌ | 245369/371472 [8:57:43<10:03:21, 3.48it/s] 66%|██████▌ | 245370/371472 [8:57:43<10:11:21, 3.44it/s] 66%|██████▌ | 245371/371472 [8:57:43<10:19:38, 3.39it/s] 66%|██████▌ | 245372/371472 [8:57:44<9:57:42, 3.52it/s] 66%|██████▌ | 245373/371472 [8:57:44<9:57:30, 3.52it/s] 66%|██████▌ | 245374/371472 [8:57:44<9:54:35, 3.53it/s] 66%|██████▌ | 245375/371472 [8:57:45<9:57:44, 3.52it/s] 66%|██████▌ | 245376/371472 [8:57:45<9:59:17, 3.51it/s] 66%|██████▌ | 245377/371472 [8:57:45<9:45:17, 3.59it/s] 66%|██████▌ | 245378/371472 [8:57:45<9:44:37, 3.59it/s] 66%|██████▌ | 245379/371472 [8:57:46<9:37:50, 3.64it/s] 66%|██████▌ | 245380/371472 [8:57:46<10:10:20, 3.44it/s] {'loss': 2.5549, 'learning_rate': 4.0565946260423625e-07, 'epoch': 10.57} + 66%|██████▌ | 245380/371472 [8:57:46<10:10:20, 3.44it/s] 66%|██████▌ | 245381/371472 [8:57:46<10:07:29, 3.46it/s] 66%|██████▌ | 245382/371472 [8:57:47<10:16:12, 3.41it/s] 66%|██████▌ | 245383/371472 [8:57:47<10:18:33, 3.40it/s] 66%|██████▌ | 245384/371472 [8:57:47<10:14:18, 3.42it/s] 66%|██████▌ | 245385/371472 [8:57:47<10:22:55, 3.37it/s] 66%|██████▌ | 245386/371472 [8:57:48<10:08:17, 3.45it/s] 66%|██████▌ | 245387/371472 [8:57:48<9:59:49, 3.50it/s] 66%|██████▌ | 245388/371472 [8:57:48<9:45:32, 3.59it/s] 66%|██████▌ | 245389/371472 [8:57:49<9:44:22, 3.60it/s] 66%|██████▌ | 245390/371472 [8:57:49<9:48:04, 3.57it/s] 66%|██████▌ | 245391/371472 [8:57:49<9:40:25, 3.62it/s] 66%|██████▌ | 245392/371472 [8:57:49<9:34:27, 3.66it/s] 66%|██████▌ | 245393/371472 [8:57:50<9:52:47, 3.54it/s] 66%|██████▌ | 245394/371472 [8:57:50<10:01:02, 3.50it/s] 66%|██████▌ | 245395/371472 [8:57:50<9:41:36, 3.61it/s] 66%|██████▌ | 245396/371472 [8:57:51<10:02:29, 3.49it/s] 66%|██████▌ | 245397/371472 [8:57:51<9:55:22, 3.53it/s] 66%|██████▌ | 245398/371472 [8:57:51<9:52:19, 3.55it/s] 66%|██████▌ | 245399/371472 [8:57:51<9:41:20, 3.61it/s] 66%|██████▌ | 245400/371472 [8:57:52<9:41:31, 3.61it/s] {'loss': 2.8551, 'learning_rate': 4.0561098062875727e-07, 'epoch': 10.57} + 66%|██████▌ | 245400/371472 [8:57:52<9:41:31, 3.61it/s] 66%|██████▌ | 245401/371472 [8:57:52<10:06:09, 3.47it/s] 66%|██████▌ | 245402/371472 [8:57:52<11:14:57, 3.11it/s] 66%|██████▌ | 245403/371472 [8:57:53<10:34:30, 3.31it/s] 66%|██████▌ | 245404/371472 [8:57:53<10:14:33, 3.42it/s] 66%|██████▌ | 245405/371472 [8:57:53<9:58:29, 3.51it/s] 66%|██████▌ | 245406/371472 [8:57:53<10:06:42, 3.46it/s] 66%|██████▌ | 245407/371472 [8:57:54<10:23:10, 3.37it/s] 66%|██████▌ | 245408/371472 [8:57:54<10:14:28, 3.42it/s] 66%|██████▌ | 245409/371472 [8:57:54<10:06:16, 3.47it/s] 66%|██████▌ | 245410/371472 [8:57:55<10:03:45, 3.48it/s] 66%|██████▌ | 245411/371472 [8:57:55<10:37:17, 3.30it/s] 66%|██████▌ | 245412/371472 [8:57:55<10:25:32, 3.36it/s] 66%|██████▌ | 245413/371472 [8:57:56<10:03:23, 3.48it/s] 66%|██████▌ | 245414/371472 [8:57:56<10:04:44, 3.47it/s] 66%|██████▌ | 245415/371472 [8:57:56<10:08:51, 3.45it/s] 66%|██████▌ | 245416/371472 [8:57:56<9:54:09, 3.54it/s] 66%|██████▌ | 245417/371472 [8:57:57<10:17:18, 3.40it/s] 66%|██████▌ | 245418/371472 [8:57:57<10:14:02, 3.42it/s] 66%|██████▌ | 245419/371472 [8:57:57<10:16:16, 3.41it/s] 66%|██████▌ | 245420/371472 [8:57:58<10:27:24, 3.35it/s] {'loss': 2.709, 'learning_rate': 4.0556249865327844e-07, 'epoch': 10.57} + 66%|██████▌ | 245420/371472 [8:57:58<10:27:24, 3.35it/s] 66%|██████▌ | 245421/371472 [8:57:58<10:58:31, 3.19it/s] 66%|██████▌ | 245422/371472 [8:57:58<10:48:07, 3.24it/s] 66%|██████▌ | 245423/371472 [8:57:59<11:20:44, 3.09it/s] 66%|██████▌ | 245424/371472 [8:57:59<11:41:37, 2.99it/s] 66%|██████▌ | 245425/371472 [8:57:59<11:46:15, 2.97it/s] 66%|██████▌ | 245426/371472 [8:58:00<11:59:58, 2.92it/s] 66%|██████▌ | 245427/371472 [8:58:00<11:26:07, 3.06it/s] 66%|██████▌ | 245428/371472 [8:58:00<11:44:16, 2.98it/s] 66%|██████▌ | 245429/371472 [8:58:01<11:21:31, 3.08it/s] 66%|██████▌ | 245430/371472 [8:58:01<11:28:28, 3.05it/s] 66%|██████▌ | 245431/371472 [8:58:01<11:00:44, 3.18it/s] 66%|██████▌ | 245432/371472 [8:58:01<10:33:08, 3.32it/s] 66%|██████▌ | 245433/371472 [8:58:02<10:19:00, 3.39it/s] 66%|██████▌ | 245434/371472 [8:58:02<10:13:30, 3.42it/s] 66%|██████▌ | 245435/371472 [8:58:02<10:10:30, 3.44it/s] 66%|██████▌ | 245436/371472 [8:58:03<10:03:17, 3.48it/s] 66%|██████▌ | 245437/371472 [8:58:03<10:06:02, 3.47it/s] 66%|██████▌ | 245438/371472 [8:58:03<10:10:05, 3.44it/s] 66%|██████▌ | 245439/371472 [8:58:03<10:12:48, 3.43it/s] 66%|██████▌ | 245440/371472 [8:58:04<10:36:51, 3.30it/s] {'loss': 2.6679, 'learning_rate': 4.055140166777995e-07, 'epoch': 10.57} + 66%|██████▌ | 245440/371472 [8:58:04<10:36:51, 3.30it/s] 66%|██████▌ | 245441/371472 [8:58:04<11:46:50, 2.97it/s] 66%|██████▌ | 245442/371472 [8:58:05<12:32:37, 2.79it/s] 66%|██████▌ | 245443/371472 [8:58:05<11:53:11, 2.95it/s] 66%|██████▌ | 245444/371472 [8:58:05<11:48:54, 2.96it/s] 66%|██████▌ | 245445/371472 [8:58:06<11:08:29, 3.14it/s] 66%|██████▌ | 245446/371472 [8:58:06<10:45:14, 3.26it/s] 66%|██████▌ | 245447/371472 [8:58:06<11:17:39, 3.10it/s] 66%|██████▌ | 245448/371472 [8:58:06<11:00:45, 3.18it/s] 66%|██████▌ | 245449/371472 [8:58:07<10:36:54, 3.30it/s] 66%|██████▌ | 245450/371472 [8:58:07<10:24:04, 3.37it/s] 66%|██████▌ | 245451/371472 [8:58:07<10:42:20, 3.27it/s] 66%|██████▌ | 245452/371472 [8:58:08<10:55:47, 3.20it/s] 66%|██████▌ | 245453/371472 [8:58:08<10:39:33, 3.28it/s] 66%|██████▌ | 245454/371472 [8:58:08<10:36:08, 3.30it/s] 66%|██████▌ | 245455/371472 [8:58:09<10:18:48, 3.39it/s] 66%|██████▌ | 245456/371472 [8:58:09<10:22:12, 3.38it/s] 66%|██████▌ | 245457/371472 [8:58:09<10:28:35, 3.34it/s] 66%|██████▌ | 245458/371472 [8:58:09<10:05:03, 3.47it/s] 66%|██████▌ | 245459/371472 [8:58:10<10:09:03, 3.45it/s] 66%|██████▌ | 245460/371472 [8:58:10<10:15:26, 3.41it/s] {'loss': 2.7534, 'learning_rate': 4.0546553470232064e-07, 'epoch': 10.57} + 66%|██████▌ | 245460/371472 [8:58:10<10:15:26, 3.41it/s] 66%|██████▌ | 245461/371472 [8:58:10<10:09:01, 3.45it/s] 66%|██████▌ | 245462/371472 [8:58:11<9:58:04, 3.51it/s] 66%|██████▌ | 245463/371472 [8:58:11<10:34:58, 3.31it/s] 66%|██████▌ | 245464/371472 [8:58:11<10:34:31, 3.31it/s] 66%|██████▌ | 245465/371472 [8:58:11<10:11:37, 3.43it/s] 66%|██████▌ | 245466/371472 [8:58:12<10:00:43, 3.50it/s] 66%|██████▌ | 245467/371472 [8:58:12<10:26:26, 3.35it/s] 66%|██████▌ | 245468/371472 [8:58:12<10:22:40, 3.37it/s] 66%|██████▌ | 245469/371472 [8:58:13<10:38:36, 3.29it/s] 66%|██████▌ | 245470/371472 [8:58:13<10:29:45, 3.33it/s] 66%|██████▌ | 245471/371472 [8:58:13<10:26:40, 3.35it/s] 66%|██████▌ | 245472/371472 [8:58:14<10:21:19, 3.38it/s] 66%|██████▌ | 245473/371472 [8:58:14<10:35:31, 3.30it/s] 66%|██████▌ | 245474/371472 [8:58:14<10:26:38, 3.35it/s] 66%|██████▌ | 245475/371472 [8:58:14<10:16:49, 3.40it/s] 66%|██████▌ | 245476/371472 [8:58:15<10:21:55, 3.38it/s] 66%|██████▌ | 245477/371472 [8:58:15<11:04:30, 3.16it/s] 66%|██████▌ | 245478/371472 [8:58:15<10:58:42, 3.19it/s] 66%|██████▌ | 245479/371472 [8:58:16<11:03:23, 3.17it/s] 66%|██████▌ | 245480/371472 [8:58:16<10:55:06, 3.21it/s] {'loss': 2.6787, 'learning_rate': 4.054170527268417e-07, 'epoch': 10.57} + 66%|██████▌ | 245480/371472 [8:58:16<10:55:06, 3.21it/s] 66%|██████▌ | 245481/371472 [8:58:16<11:11:57, 3.12it/s] 66%|██████▌ | 245482/371472 [8:58:17<11:23:59, 3.07it/s] 66%|██████▌ | 245483/371472 [8:58:17<11:07:03, 3.15it/s] 66%|██████▌ | 245484/371472 [8:58:17<10:59:44, 3.18it/s] 66%|██████▌ | 245485/371472 [8:58:18<10:48:45, 3.24it/s] 66%|██████▌ | 245486/371472 [8:58:18<11:39:45, 3.00it/s] 66%|██████▌ | 245487/371472 [8:58:18<11:00:18, 3.18it/s] 66%|██████▌ | 245488/371472 [8:58:19<10:34:44, 3.31it/s] 66%|██████▌ | 245489/371472 [8:58:19<10:29:58, 3.33it/s] 66%|██████▌ | 245490/371472 [8:58:19<10:04:40, 3.47it/s] 66%|██████▌ | 245491/371472 [8:58:19<9:42:54, 3.60it/s] 66%|██████▌ | 245492/371472 [8:58:20<9:41:53, 3.61it/s] 66%|██████▌ | 245493/371472 [8:58:20<9:42:32, 3.60it/s] 66%|██████▌ | 245494/371472 [8:58:20<9:58:09, 3.51it/s] 66%|██████▌ | 245495/371472 [8:58:21<10:00:11, 3.50it/s] 66%|██████▌ | 245496/371472 [8:58:21<9:55:14, 3.53it/s] 66%|██████▌ | 245497/371472 [8:58:21<10:03:38, 3.48it/s] 66%|██████▌ | 245498/371472 [8:58:21<10:02:30, 3.48it/s] 66%|██████▌ | 245499/371472 [8:58:22<10:49:17, 3.23it/s] 66%|██████▌ | 245500/371472 [8:58:22<10:36:14, 3.30it/s] {'loss': 2.9504, 'learning_rate': 4.053685707513629e-07, 'epoch': 10.57} + 66%|██████▌ | 245500/371472 [8:58:22<10:36:14, 3.30it/s] 66%|██████▌ | 245501/371472 [8:58:22<10:42:38, 3.27it/s] 66%|██████▌ | 245502/371472 [8:58:23<11:21:42, 3.08it/s] 66%|██████▌ | 245503/371472 [8:58:23<10:58:57, 3.19it/s] 66%|██████▌ | 245504/371472 [8:58:23<11:13:14, 3.12it/s] 66%|██████▌ | 245505/371472 [8:58:24<11:32:04, 3.03it/s] 66%|██████▌ | 245506/371472 [8:58:24<11:07:08, 3.15it/s] 66%|██████▌ | 245507/371472 [8:58:24<11:04:14, 3.16it/s] 66%|██████▌ | 245508/371472 [8:58:25<10:33:40, 3.31it/s] 66%|██████▌ | 245509/371472 [8:58:25<10:52:46, 3.22it/s] 66%|██████▌ | 245510/371472 [8:58:25<11:05:15, 3.16it/s] 66%|██████▌ | 245511/371472 [8:58:26<11:09:54, 3.13it/s] 66%|██████▌ | 245512/371472 [8:58:26<10:50:14, 3.23it/s] 66%|██████▌ | 245513/371472 [8:58:26<10:44:08, 3.26it/s] 66%|██████▌ | 245514/371472 [8:58:26<10:34:56, 3.31it/s] 66%|██████▌ | 245515/371472 [8:58:27<11:09:59, 3.13it/s] 66%|██████▌ | 245516/371472 [8:58:27<11:08:37, 3.14it/s] 66%|██████▌ | 245517/371472 [8:58:28<12:43:27, 2.75it/s] 66%|██████▌ | 245518/371472 [8:58:28<12:08:04, 2.88it/s] 66%|██████▌ | 245519/371472 [8:58:28<11:48:59, 2.96it/s] 66%|██████▌ | 245520/371472 [8:58:28<11:20:45, 3.08it/s] {'loss': 2.7671, 'learning_rate': 4.0532008877588396e-07, 'epoch': 10.58} + 66%|██████▌ | 245520/371472 [8:58:28<11:20:45, 3.08it/s] 66%|██████▌ | 245521/371472 [8:58:29<11:00:32, 3.18it/s] 66%|██████▌ | 245522/371472 [8:58:29<10:58:57, 3.19it/s] 66%|██████▌ | 245523/371472 [8:58:29<10:40:40, 3.28it/s] 66%|██████▌ | 245524/371472 [8:58:30<10:20:27, 3.38it/s] 66%|██████▌ | 245525/371472 [8:58:30<10:41:35, 3.27it/s] 66%|██████▌ | 245526/371472 [8:58:30<10:41:23, 3.27it/s] 66%|██████▌ | 245527/371472 [8:58:31<10:52:06, 3.22it/s] 66%|██████▌ | 245528/371472 [8:58:31<10:36:13, 3.30it/s] 66%|██████▌ | 245529/371472 [8:58:31<10:35:51, 3.30it/s] 66%|██████▌ | 245530/371472 [8:58:31<10:16:51, 3.40it/s] 66%|██████▌ | 245531/371472 [8:58:32<11:03:55, 3.16it/s] 66%|██████▌ | 245532/371472 [8:58:32<11:01:02, 3.18it/s] 66%|██████▌ | 245533/371472 [8:58:32<10:43:37, 3.26it/s] 66%|██████▌ | 245534/371472 [8:58:33<11:00:23, 3.18it/s] 66%|██████▌ | 245535/371472 [8:58:33<10:38:49, 3.29it/s] 66%|██████▌ | 245536/371472 [8:58:33<10:23:29, 3.37it/s] 66%|██████▌ | 245537/371472 [8:58:34<10:43:04, 3.26it/s] 66%|██████▌ | 245538/371472 [8:58:34<10:54:16, 3.21it/s] 66%|██████▌ | 245539/371472 [8:58:34<10:30:13, 3.33it/s] 66%|██████▌ | 245540/371472 [8:58:35<10:05:38, 3.47it/s] {'loss': 2.7004, 'learning_rate': 4.052716068004051e-07, 'epoch': 10.58} + 66%|██████▌ | 245540/371472 [8:58:35<10:05:38, 3.47it/s] 66%|██████▌ | 245541/371472 [8:58:35<11:18:00, 3.10it/s] 66%|██████▌ | 245542/371472 [8:58:35<10:54:15, 3.21it/s] 66%|██████▌ | 245543/371472 [8:58:35<10:36:32, 3.30it/s] 66%|██████▌ | 245544/371472 [8:58:36<10:30:37, 3.33it/s] 66%|██████▌ | 245545/371472 [8:58:36<10:42:48, 3.27it/s] 66%|██████▌ | 245546/371472 [8:58:36<10:15:49, 3.41it/s] 66%|██████▌ | 245547/371472 [8:58:37<10:17:00, 3.40it/s] 66%|██████▌ | 245548/371472 [8:58:37<10:19:12, 3.39it/s] 66%|██████▌ | 245549/371472 [8:58:37<10:09:32, 3.44it/s] 66%|██████▌ | 245550/371472 [8:58:38<10:19:42, 3.39it/s] 66%|██████▌ | 245551/371472 [8:58:38<10:05:37, 3.47it/s] 66%|██████▌ | 245552/371472 [8:58:38<10:40:37, 3.28it/s] 66%|██████▌ | 245553/371472 [8:58:38<10:44:24, 3.26it/s] 66%|██████▌ | 245554/371472 [8:58:39<10:29:52, 3.33it/s] 66%|██████▌ | 245555/371472 [8:58:39<10:22:44, 3.37it/s] 66%|██████▌ | 245556/371472 [8:58:39<10:44:59, 3.25it/s] 66%|██████▌ | 245557/371472 [8:58:40<10:30:19, 3.33it/s] 66%|██████▌ | 245558/371472 [8:58:40<10:27:08, 3.35it/s] 66%|██████▌ | 245559/371472 [8:58:40<9:59:02, 3.50it/s] 66%|██████▌ | 245560/371472 [8:58:41<10:11:56, 3.43it/s] {'loss': 2.7547, 'learning_rate': 4.0522312482492616e-07, 'epoch': 10.58} + 66%|██████▌ | 245560/371472 [8:58:41<10:11:56, 3.43it/s] 66%|██████▌ | 245561/371472 [8:58:41<10:09:17, 3.44it/s] 66%|██████▌ | 245562/371472 [8:58:41<10:03:36, 3.48it/s] 66%|██████▌ | 245563/371472 [8:58:41<10:38:17, 3.29it/s] 66%|██████▌ | 245564/371472 [8:58:42<10:45:51, 3.25it/s] 66%|██████▌ | 245565/371472 [8:58:42<10:57:19, 3.19it/s] 66%|██████▌ | 245566/371472 [8:58:42<10:29:07, 3.34it/s] 66%|██████▌ | 245567/371472 [8:58:43<11:20:55, 3.08it/s] 66%|██████▌ | 245568/371472 [8:58:43<11:04:35, 3.16it/s] 66%|██████▌ | 245569/371472 [8:58:43<10:44:43, 3.25it/s] 66%|██████▌ | 245570/371472 [8:58:44<10:16:09, 3.41it/s] 66%|██████▌ | 245571/371472 [8:58:44<10:11:00, 3.43it/s] 66%|██████▌ | 245572/371472 [8:58:44<10:08:16, 3.45it/s] 66%|██████▌ | 245573/371472 [8:58:44<10:42:20, 3.27it/s] 66%|██████▌ | 245574/371472 [8:58:45<11:28:07, 3.05it/s] 66%|██████▌ | 245575/371472 [8:58:45<10:58:24, 3.19it/s] 66%|██████▌ | 245576/371472 [8:58:45<10:33:19, 3.31it/s] 66%|██████▌ | 245577/371472 [8:58:46<10:22:50, 3.37it/s] 66%|██████▌ | 245578/371472 [8:58:46<10:18:14, 3.39it/s] 66%|██████▌ | 245579/371472 [8:58:46<10:18:36, 3.39it/s] 66%|██████▌ | 245580/371472 [8:58:47<10:20:21, 3.38it/s] {'loss': 2.7269, 'learning_rate': 4.0517464284944733e-07, 'epoch': 10.58} + 66%|██████▌ | 245580/371472 [8:58:47<10:20:21, 3.38it/s] 66%|██████▌ | 245581/371472 [8:58:47<10:47:39, 3.24it/s] 66%|██████▌ | 245582/371472 [8:58:47<10:28:31, 3.34it/s] 66%|██████▌ | 245583/371472 [8:58:48<10:39:14, 3.28it/s] 66%|██████▌ | 245584/371472 [8:58:48<10:22:30, 3.37it/s] 66%|██████▌ | 245585/371472 [8:58:48<10:29:28, 3.33it/s] 66%|██████▌ | 245586/371472 [8:58:48<10:19:23, 3.39it/s] 66%|██████▌ | 245587/371472 [8:58:49<10:25:31, 3.35it/s] 66%|██████▌ | 245588/371472 [8:58:49<10:23:41, 3.36it/s] 66%|██████▌ | 245589/371472 [8:58:49<10:03:45, 3.47it/s] 66%|██████▌ | 245590/371472 [8:58:50<12:10:16, 2.87it/s] 66%|██████▌ | 245591/371472 [8:58:50<11:48:41, 2.96it/s] 66%|██████▌ | 245592/371472 [8:58:50<11:12:54, 3.12it/s] 66%|██████▌ | 245593/371472 [8:58:51<11:18:04, 3.09it/s] 66%|██████▌ | 245594/371472 [8:58:51<10:48:00, 3.24it/s] 66%|██████▌ | 245595/371472 [8:58:51<10:40:51, 3.27it/s] 66%|██████▌ | 245596/371472 [8:58:52<10:39:59, 3.28it/s] 66%|██████▌ | 245597/371472 [8:58:52<10:26:28, 3.35it/s] 66%|██████▌ | 245598/371472 [8:58:52<10:47:28, 3.24it/s] 66%|██████▌ | 245599/371472 [8:58:52<10:31:27, 3.32it/s] 66%|██████▌ | 245600/371472 [8:58:53<10:20:56, 3.38it/s] {'loss': 2.7758, 'learning_rate': 4.0512616087396835e-07, 'epoch': 10.58} + 66%|██████▌ | 245600/371472 [8:58:53<10:20:56, 3.38it/s] 66%|██████▌ | 245601/371472 [8:58:53<10:29:02, 3.33it/s] 66%|██████▌ | 245602/371472 [8:58:53<10:29:59, 3.33it/s] 66%|██████▌ | 245603/371472 [8:58:54<10:28:49, 3.34it/s] 66%|██████▌ | 245604/371472 [8:58:54<10:13:37, 3.42it/s] 66%|██████▌ | 245605/371472 [8:58:54<10:30:47, 3.33it/s] 66%|██████▌ | 245606/371472 [8:58:55<10:44:12, 3.26it/s] 66%|██████▌ | 245607/371472 [8:58:55<10:31:59, 3.32it/s] 66%|██████▌ | 245608/371472 [8:58:55<10:23:18, 3.37it/s] 66%|██████▌ | 245609/371472 [8:58:55<10:02:52, 3.48it/s] 66%|██████▌ | 245610/371472 [8:58:56<9:51:33, 3.55it/s] 66%|██████▌ | 245611/371472 [8:58:56<9:49:35, 3.56it/s] 66%|██████▌ | 245612/371472 [8:58:56<9:52:35, 3.54it/s] 66%|██████▌ | 245613/371472 [8:58:57<9:52:50, 3.54it/s] 66%|██████▌ | 245614/371472 [8:58:57<9:50:34, 3.55it/s] 66%|██████▌ | 245615/371472 [8:58:57<10:00:58, 3.49it/s] 66%|██████▌ | 245616/371472 [8:58:57<10:07:33, 3.45it/s] 66%|██████▌ | 245617/371472 [8:58:58<9:54:42, 3.53it/s] 66%|██████▌ | 245618/371472 [8:58:58<9:48:17, 3.57it/s] 66%|██████▌ | 245619/371472 [8:58:58<9:34:42, 3.65it/s] 66%|██████▌ | 245620/371472 [8:58:58<9:36:09, 3.64it/s] {'loss': 2.8585, 'learning_rate': 4.0507767889848953e-07, 'epoch': 10.58} + 66%|██████▌ | 245620/371472 [8:58:58<9:36:09, 3.64it/s] 66%|██████▌ | 245621/371472 [8:58:59<9:38:05, 3.63it/s] 66%|██████▌ | 245622/371472 [8:58:59<9:45:44, 3.58it/s] 66%|██████▌ | 245623/371472 [8:58:59<10:04:25, 3.47it/s] 66%|██████▌ | 245624/371472 [8:59:00<9:55:17, 3.52it/s] 66%|██████▌ | 245625/371472 [8:59:00<10:27:02, 3.34it/s] 66%|██████▌ | 245626/371472 [8:59:00<10:37:05, 3.29it/s] 66%|██████▌ | 245627/371472 [8:59:01<10:18:38, 3.39it/s] 66%|██████▌ | 245628/371472 [8:59:01<10:16:41, 3.40it/s] 66%|██████▌ | 245629/371472 [8:59:01<10:11:54, 3.43it/s] 66%|██████▌ | 245630/371472 [8:59:01<10:36:39, 3.29it/s] 66%|██████▌ | 245631/371472 [8:59:02<10:40:42, 3.27it/s] 66%|██████▌ | 245632/371472 [8:59:02<11:07:39, 3.14it/s] 66%|██████▌ | 245633/371472 [8:59:02<11:22:45, 3.07it/s] 66%|██████▌ | 245634/371472 [8:59:03<10:59:32, 3.18it/s] 66%|██████▌ | 245635/371472 [8:59:03<10:51:47, 3.22it/s] 66%|██████▌ | 245636/371472 [8:59:03<10:36:03, 3.30it/s] 66%|██████▌ | 245637/371472 [8:59:04<10:27:31, 3.34it/s] 66%|██████▌ | 245638/371472 [8:59:04<10:15:25, 3.41it/s] 66%|██████▌ | 245639/371472 [8:59:04<10:01:55, 3.48it/s] 66%|██████▌ | 245640/371472 [8:59:05<10:38:31, 3.28it/s] {'loss': 2.7244, 'learning_rate': 4.0502919692301055e-07, 'epoch': 10.58} + 66%|██████▌ | 245640/371472 [8:59:05<10:38:31, 3.28it/s] 66%|██████▌ | 245641/371472 [8:59:05<11:05:26, 3.15it/s] 66%|██████▌ | 245642/371472 [8:59:05<10:43:56, 3.26it/s] 66%|██████▌ | 245643/371472 [8:59:06<11:23:55, 3.07it/s] 66%|██████▌ | 245644/371472 [8:59:06<11:50:27, 2.95it/s] 66%|██████▌ | 245645/371472 [8:59:06<11:57:23, 2.92it/s] 66%|██████▌ | 245646/371472 [8:59:07<11:19:48, 3.08it/s] 66%|██████▌ | 245647/371472 [8:59:07<10:56:23, 3.19it/s] 66%|██████▌ | 245648/371472 [8:59:07<10:47:54, 3.24it/s] 66%|██████▌ | 245649/371472 [8:59:07<10:34:04, 3.31it/s] 66%|██████▌ | 245650/371472 [8:59:08<10:41:20, 3.27it/s] 66%|██████▌ | 245651/371472 [8:59:08<10:36:32, 3.29it/s] 66%|██████▌ | 245652/371472 [8:59:08<10:43:33, 3.26it/s] 66%|██████▌ | 245653/371472 [8:59:09<10:26:34, 3.35it/s] 66%|██████▌ | 245654/371472 [8:59:09<10:20:26, 3.38it/s] 66%|██████▌ | 245655/371472 [8:59:09<10:20:15, 3.38it/s] 66%|██████▌ | 245656/371472 [8:59:10<10:48:04, 3.24it/s] 66%|██████▌ | 245657/371472 [8:59:10<10:40:27, 3.27it/s] 66%|██████▌ | 245658/371472 [8:59:10<10:38:04, 3.29it/s] 66%|██████▌ | 245659/371472 [8:59:10<10:22:45, 3.37it/s] 66%|██████▌ | 245660/371472 [8:59:11<10:06:05, 3.46it/s] {'loss': 2.7507, 'learning_rate': 4.049807149475317e-07, 'epoch': 10.58} + 66%|██████▌ | 245660/371472 [8:59:11<10:06:05, 3.46it/s] 66%|██████▌ | 245661/371472 [8:59:11<10:23:39, 3.36it/s] 66%|██████▌ | 245662/371472 [8:59:11<10:12:05, 3.43it/s] 66%|██████▌ | 245663/371472 [8:59:12<10:08:40, 3.44it/s] 66%|██████▌ | 245664/371472 [8:59:12<10:28:14, 3.34it/s] 66%|██████▌ | 245665/371472 [8:59:12<10:21:30, 3.37it/s] 66%|██████▌ | 245666/371472 [8:59:12<10:15:09, 3.41it/s] 66%|██████▌ | 245667/371472 [8:59:13<12:02:00, 2.90it/s] 66%|██████▌ | 245668/371472 [8:59:13<11:48:19, 2.96it/s] 66%|██████▌ | 245669/371472 [8:59:14<11:19:39, 3.08it/s] 66%|██████▌ | 245670/371472 [8:59:14<11:56:24, 2.93it/s] 66%|██████▌ | 245671/371472 [8:59:14<11:55:22, 2.93it/s] 66%|██████▌ | 245672/371472 [8:59:15<11:17:29, 3.09it/s] 66%|██████▌ | 245673/371472 [8:59:15<11:13:34, 3.11it/s] 66%|██████▌ | 245674/371472 [8:59:15<10:54:44, 3.20it/s] 66%|██████▌ | 245675/371472 [8:59:15<10:34:40, 3.30it/s] 66%|██████▌ | 245676/371472 [8:59:16<10:14:30, 3.41it/s] 66%|██████▌ | 245677/371472 [8:59:16<10:10:00, 3.44it/s] 66%|██████▌ | 245678/371472 [8:59:16<10:29:32, 3.33it/s] 66%|██████▌ | 245679/371472 [8:59:17<10:46:35, 3.24it/s] 66%|██████▌ | 245680/371472 [8:59:17<11:05:37, 3.15it/s] {'loss': 2.8038, 'learning_rate': 4.049322329720528e-07, 'epoch': 10.58} + 66%|██████▌ | 245680/371472 [8:59:17<11:05:37, 3.15it/s] 66%|██████▌ | 245681/371472 [8:59:17<10:46:26, 3.24it/s] 66%|██████▌ | 245682/371472 [8:59:18<10:44:02, 3.26it/s] 66%|██████▌ | 245683/371472 [8:59:18<10:40:14, 3.27it/s] 66%|██████▌ | 245684/371472 [8:59:18<13:12:46, 2.64it/s] 66%|██████▌ | 245685/371472 [8:59:19<12:06:40, 2.89it/s] 66%|██████▌ | 245686/371472 [8:59:19<11:14:36, 3.11it/s] 66%|██████▌ | 245687/371472 [8:59:19<10:40:13, 3.27it/s] 66%|██████▌ | 245688/371472 [8:59:20<10:51:44, 3.22it/s] 66%|██████▌ | 245689/371472 [8:59:20<11:01:49, 3.17it/s] 66%|██████▌ | 245690/371472 [8:59:20<10:39:24, 3.28it/s] 66%|██████▌ | 245691/371472 [8:59:20<10:44:22, 3.25it/s] 66%|██████▌ | 245692/371472 [8:59:21<10:29:16, 3.33it/s] 66%|██████▌ | 245693/371472 [8:59:21<10:24:51, 3.35it/s] 66%|██████▌ | 245694/371472 [8:59:21<11:47:46, 2.96it/s] 66%|██████▌ | 245695/371472 [8:59:22<11:39:27, 3.00it/s] 66%|██████▌ | 245696/371472 [8:59:22<11:12:06, 3.12it/s] 66%|██████▌ | 245697/371472 [8:59:22<10:59:51, 3.18it/s] 66%|██████▌ | 245698/371472 [8:59:23<10:33:09, 3.31it/s] 66%|██████▌ | 245699/371472 [8:59:23<10:46:02, 3.24it/s] 66%|██████▌ | 245700/371472 [8:59:23<10:30:06, 3.33it/s] {'loss': 2.574, 'learning_rate': 4.04883750996574e-07, 'epoch': 10.58} + 66%|█████��▌ | 245700/371472 [8:59:23<10:30:06, 3.33it/s] 66%|██████▌ | 245701/371472 [8:59:24<10:21:45, 3.37it/s] 66%|██████▌ | 245702/371472 [8:59:24<10:18:09, 3.39it/s] 66%|██████▌ | 245703/371472 [8:59:24<10:01:47, 3.48it/s] 66%|██████▌ | 245704/371472 [8:59:24<10:16:49, 3.40it/s] 66%|██████▌ | 245705/371472 [8:59:25<10:22:34, 3.37it/s] 66%|██████▌ | 245706/371472 [8:59:25<10:17:52, 3.39it/s] 66%|██████▌ | 245707/371472 [8:59:25<10:06:27, 3.46it/s] 66%|██████▌ | 245708/371472 [8:59:26<10:06:37, 3.46it/s] 66%|██████▌ | 245709/371472 [8:59:26<10:54:41, 3.20it/s] 66%|██████▌ | 245710/371472 [8:59:26<10:26:33, 3.35it/s] 66%|██████▌ | 245711/371472 [8:59:27<11:06:03, 3.15it/s] 66%|██████▌ | 245712/371472 [8:59:27<11:10:05, 3.13it/s] 66%|██████▌ | 245713/371472 [8:59:27<10:59:05, 3.18it/s] 66%|██████▌ | 245714/371472 [8:59:27<10:54:31, 3.20it/s] 66%|██████▌ | 245715/371472 [8:59:28<10:30:58, 3.32it/s] 66%|██████▌ | 245716/371472 [8:59:28<10:22:56, 3.36it/s] 66%|██████▌ | 245717/371472 [8:59:28<10:01:41, 3.48it/s] 66%|██████▌ | 245718/371472 [8:59:29<10:33:45, 3.31it/s] 66%|██████▌ | 245719/371472 [8:59:29<10:18:16, 3.39it/s] 66%|██████▌ | 245720/371472 [8:59:29<10:28:45, 3.33it/s] {'loss': 2.64, 'learning_rate': 4.04835269021095e-07, 'epoch': 10.58} + 66%|██████▌ | 245720/371472 [8:59:29<10:28:45, 3.33it/s] 66%|██████▌ | 245721/371472 [8:59:30<10:26:17, 3.35it/s] 66%|██████▌ | 245722/371472 [8:59:30<10:50:01, 3.22it/s] 66%|██████▌ | 245723/371472 [8:59:30<10:34:01, 3.31it/s] 66%|██████▌ | 245724/371472 [8:59:30<10:31:30, 3.32it/s] 66%|██████▌ | 245725/371472 [8:59:31<10:55:06, 3.20it/s] 66%|██████▌ | 245726/371472 [8:59:31<10:45:59, 3.24it/s] 66%|██████▌ | 245727/371472 [8:59:31<10:34:39, 3.30it/s] 66%|██████▌ | 245728/371472 [8:59:32<10:35:04, 3.30it/s] 66%|██████▌ | 245729/371472 [8:59:32<10:17:37, 3.39it/s] 66%|██████▌ | 245730/371472 [8:59:32<10:16:36, 3.40it/s] 66%|██████▌ | 245731/371472 [8:59:33<10:28:03, 3.34it/s] 66%|██████▌ | 245732/371472 [8:59:33<11:00:01, 3.18it/s] 66%|██████▌ | 245733/371472 [8:59:33<10:44:46, 3.25it/s] 66%|██████▌ | 245734/371472 [8:59:34<10:47:14, 3.24it/s] 66%|██████▌ | 245735/371472 [8:59:34<10:41:46, 3.27it/s] 66%|██████▌ | 245736/371472 [8:59:34<10:32:13, 3.31it/s] 66%|██████▌ | 245737/371472 [8:59:34<10:39:16, 3.28it/s] 66%|██████▌ | 245738/371472 [8:59:35<10:22:21, 3.37it/s] 66%|██████▌ | 245739/371472 [8:59:35<11:04:50, 3.15it/s] 66%|██████▌ | 245740/371472 [8:59:35<10:46:20, 3.24it/s] {'loss': 2.7477, 'learning_rate': 4.0478678704561617e-07, 'epoch': 10.58} + 66%|██████▌ | 245740/371472 [8:59:35<10:46:20, 3.24it/s] 66%|██████▌ | 245741/371472 [8:59:36<10:17:25, 3.39it/s] 66%|██████▌ | 245742/371472 [8:59:36<10:43:54, 3.25it/s] 66%|██████▌ | 245743/371472 [8:59:36<10:31:31, 3.32it/s] 66%|██████▌ | 245744/371472 [8:59:37<10:25:03, 3.35it/s] 66%|██████▌ | 245745/371472 [8:59:37<10:02:38, 3.48it/s] 66%|██████▌ | 245746/371472 [8:59:37<9:55:40, 3.52it/s] 66%|██████▌ | 245747/371472 [8:59:37<10:07:32, 3.45it/s] 66%|██████▌ | 245748/371472 [8:59:38<10:15:55, 3.40it/s] 66%|██████▌ | 245749/371472 [8:59:38<10:22:15, 3.37it/s] 66%|██████▌ | 245750/371472 [8:59:38<10:45:24, 3.25it/s] 66%|██████▌ | 245751/371472 [8:59:39<10:22:13, 3.37it/s] 66%|██████▌ | 245752/371472 [8:59:39<10:19:55, 3.38it/s] 66%|██████▌ | 245753/371472 [8:59:39<10:12:47, 3.42it/s] 66%|██████▌ | 245754/371472 [8:59:39<9:56:41, 3.51it/s] 66%|██████▌ | 245755/371472 [8:59:40<10:33:50, 3.31it/s] 66%|██████▌ | 245756/371472 [8:59:40<12:09:58, 2.87it/s] 66%|██████▌ | 245757/371472 [8:59:41<11:32:35, 3.03it/s] 66%|██████▌ | 245758/371472 [8:59:41<11:03:40, 3.16it/s] 66%|██████▌ | 245759/371472 [8:59:41<10:49:00, 3.23it/s] 66%|██████▌ | 245760/371472 [8:59:41<11:06:43, 3.14it/s] {'loss': 2.6317, 'learning_rate': 4.0473830507013724e-07, 'epoch': 10.59} + 66%|█��████▌ | 245760/371472 [8:59:41<11:06:43, 3.14it/s] 66%|██████▌ | 245761/371472 [8:59:42<11:16:03, 3.10it/s] 66%|██████▌ | 245762/371472 [8:59:42<12:00:48, 2.91it/s] 66%|██████▌ | 245763/371472 [8:59:42<11:13:39, 3.11it/s] 66%|██████▌ | 245764/371472 [8:59:43<11:47:47, 2.96it/s] 66%|██████▌ | 245765/371472 [8:59:43<11:53:25, 2.94it/s] 66%|██████▌ | 245766/371472 [8:59:43<11:43:26, 2.98it/s] 66%|██████▌ | 245767/371472 [8:59:44<11:05:07, 3.15it/s] 66%|██████▌ | 245768/371472 [8:59:44<10:43:08, 3.26it/s] 66%|██████▌ | 245769/371472 [8:59:44<10:28:25, 3.33it/s] 66%|██████▌ | 245770/371472 [8:59:45<10:08:02, 3.45it/s] 66%|██████▌ | 245771/371472 [8:59:45<9:59:57, 3.49it/s] 66%|██████▌ | 245772/371472 [8:59:45<10:04:21, 3.47it/s] 66%|██████▌ | 245773/371472 [8:59:45<10:29:59, 3.33it/s] 66%|██████▌ | 245774/371472 [8:59:46<10:23:07, 3.36it/s] 66%|██████▌ | 245775/371472 [8:59:46<10:17:08, 3.39it/s] 66%|██████▌ | 245776/371472 [8:59:46<10:55:10, 3.20it/s] 66%|██████▌ | 245777/371472 [8:59:47<10:30:55, 3.32it/s] 66%|██████▌ | 245778/371472 [8:59:47<10:21:28, 3.37it/s] 66%|██████▌ | 245779/371472 [8:59:47<10:16:13, 3.40it/s] 66%|██████▌ | 245780/371472 [8:59:48<10:34:58, 3.30it/s] {'loss': 2.6242, 'learning_rate': 4.0468982309465837e-07, 'epoch': 10.59} + 66%|██████▌ | 245780/371472 [8:59:48<10:34:58, 3.30it/s] 66%|██████▌ | 245781/371472 [8:59:48<10:32:50, 3.31it/s] 66%|██████▌ | 245782/371472 [8:59:48<11:04:46, 3.15it/s] 66%|██████▌ | 245783/371472 [8:59:49<11:41:24, 2.99it/s] 66%|██████▌ | 245784/371472 [8:59:49<11:12:04, 3.12it/s] 66%|██████▌ | 245785/371472 [8:59:49<11:26:18, 3.05it/s] 66%|██████▌ | 245786/371472 [8:59:50<10:54:44, 3.20it/s] 66%|██████▌ | 245787/371472 [8:59:50<10:57:07, 3.19it/s] 66%|██████▌ | 245788/371472 [8:59:50<10:57:43, 3.18it/s] 66%|██████▌ | 245789/371472 [8:59:50<10:25:02, 3.35it/s] 66%|██████▌ | 245790/371472 [8:59:51<10:27:42, 3.34it/s] 66%|██████▌ | 245791/371472 [8:59:51<11:06:14, 3.14it/s] 66%|██████▌ | 245792/371472 [8:59:51<10:38:57, 3.28it/s] 66%|██████▌ | 245793/371472 [8:59:52<10:39:34, 3.28it/s] 66%|██████▌ | 245794/371472 [8:59:52<10:23:42, 3.36it/s] 66%|██████▌ | 245795/371472 [8:59:52<10:44:07, 3.25it/s] 66%|██████▌ | 245796/371472 [8:59:53<11:13:51, 3.11it/s] 66%|██████▌ | 245797/371472 [8:59:53<10:57:35, 3.19it/s] 66%|██████▌ | 245798/371472 [8:59:53<10:52:21, 3.21it/s] 66%|██████▌ | 245799/371472 [8:59:54<10:32:15, 3.31it/s] 66%|██████▌ | 245800/371472 [8:59:54<10:38:19, 3.28it/s] {'loss': 2.778, 'learning_rate': 4.0464134111917944e-07, 'epoch': 10.59} + 66%|██████▌ | 245800/371472 [8:59:54<10:38:19, 3.28it/s] 66%|██████▌ | 245801/371472 [8:59:54<10:24:43, 3.35it/s] 66%|██████▌ | 245802/371472 [8:59:54<10:13:17, 3.42it/s] 66%|██████▌ | 245803/371472 [8:59:55<11:18:40, 3.09it/s] 66%|██████▌ | 245804/371472 [8:59:55<10:47:54, 3.23it/s] 66%|██████▌ | 245805/371472 [8:59:55<10:28:16, 3.33it/s] 66%|██████▌ | 245806/371472 [8:59:56<10:27:19, 3.34it/s] 66%|██████▌ | 245807/371472 [8:59:56<10:24:47, 3.35it/s] 66%|██████▌ | 245808/371472 [8:59:56<10:11:27, 3.43it/s] 66%|██████▌ | 245809/371472 [8:59:57<10:10:17, 3.43it/s] 66%|██████▌ | 245810/371472 [8:59:57<10:38:23, 3.28it/s] 66%|██████▌ | 245811/371472 [8:59:57<11:36:40, 3.01it/s] 66%|██████▌ | 245812/371472 [8:59:58<11:28:48, 3.04it/s] 66%|██████▌ | 245813/371472 [8:59:58<10:53:28, 3.20it/s] 66%|██████▌ | 245814/371472 [8:59:58<10:31:05, 3.32it/s] 66%|██████▌ | 245815/371472 [8:59:58<10:48:14, 3.23it/s] 66%|██████▌ | 245816/371472 [8:59:59<10:26:58, 3.34it/s] 66%|██████▌ | 245817/371472 [8:59:59<10:24:02, 3.36it/s] 66%|██████▌ | 245818/371472 [8:59:59<10:17:16, 3.39it/s] 66%|██████▌ | 245819/371472 [9:00:00<10:06:48, 3.45it/s] 66%|██████▌ | 245820/371472 [9:00:00<10:00:58, 3.48it/s] {'loss': 2.5842, 'learning_rate': 4.045928591437006e-07, 'epoch': 10.59} + 66%|██████▌ | 245820/371472 [9:00:00<10:00:58, 3.48it/s] 66%|██████▌ | 245821/371472 [9:00:00<10:17:25, 3.39it/s] 66%|██████▌ | 245822/371472 [9:00:00<10:02:08, 3.48it/s] 66%|██████▌ | 245823/371472 [9:00:01<9:58:26, 3.50it/s] 66%|██████▌ | 245824/371472 [9:00:01<10:09:20, 3.44it/s] 66%|██████▌ | 245825/371472 [9:00:01<10:06:16, 3.45it/s] 66%|██████▌ | 245826/371472 [9:00:02<9:51:06, 3.54it/s] 66%|██████▌ | 245827/371472 [9:00:02<10:03:12, 3.47it/s] 66%|██████▌ | 245828/371472 [9:00:02<10:07:40, 3.45it/s] 66%|██████▌ | 245829/371472 [9:00:02<9:50:57, 3.54it/s] 66%|██████▌ | 245830/371472 [9:00:03<10:11:32, 3.42it/s] 66%|██████▌ | 245831/371472 [9:00:03<10:04:51, 3.46it/s] 66%|██████▌ | 245832/371472 [9:00:03<10:06:55, 3.45it/s] 66%|██████▌ | 245833/371472 [9:00:04<9:57:53, 3.50it/s] 66%|██████▌ | 245834/371472 [9:00:04<10:17:02, 3.39it/s] 66%|██████▌ | 245835/371472 [9:00:04<10:26:42, 3.34it/s] 66%|██████▌ | 245836/371472 [9:00:04<10:11:49, 3.42it/s] 66%|██████▌ | 245837/371472 [9:00:05<10:57:26, 3.18it/s] 66%|██████▌ | 245838/371472 [9:00:05<10:40:13, 3.27it/s] 66%|██████▌ | 245839/371472 [9:00:05<10:44:29, 3.25it/s] 66%|██████▌ | 245840/371472 [9:00:06<10:33:11, 3.31it/s] {'loss': 2.6206, 'learning_rate': 4.045443771682217e-07, 'epoch': 10.59} + 66%|██████▌ | 245840/371472 [9:00:06<10:33:11, 3.31it/s] 66%|██████▌ | 245841/371472 [9:00:06<10:20:14, 3.38it/s] 66%|██████▌ | 245842/371472 [9:00:06<10:34:24, 3.30it/s] 66%|██████▌ | 245843/371472 [9:00:07<10:20:12, 3.38it/s] 66%|██████▌ | 245844/371472 [9:00:07<10:04:25, 3.46it/s] 66%|██████▌ | 245845/371472 [9:00:07<10:14:27, 3.41it/s] 66%|██████▌ | 245846/371472 [9:00:08<10:43:44, 3.25it/s] 66%|██████▌ | 245847/371472 [9:00:08<10:37:19, 3.29it/s] 66%|██████▌ | 245848/371472 [9:00:08<10:25:26, 3.35it/s] 66%|██████▌ | 245849/371472 [9:00:08<10:42:14, 3.26it/s] 66%|██████▌ | 245850/371472 [9:00:09<10:21:32, 3.37it/s] 66%|██████▌ | 245851/371472 [9:00:09<10:39:17, 3.27it/s] 66%|██████▌ | 245852/371472 [9:00:09<10:17:03, 3.39it/s] 66%|██████▌ | 245853/371472 [9:00:10<10:22:45, 3.36it/s] 66%|██████▌ | 245854/371472 [9:00:10<10:23:40, 3.36it/s] 66%|██████▌ | 245855/371472 [9:00:10<10:41:32, 3.26it/s] 66%|██████▌ | 245856/371472 [9:00:11<10:30:37, 3.32it/s] 66%|██████▌ | 245857/371472 [9:00:11<10:15:21, 3.40it/s] 66%|██████▌ | 245858/371472 [9:00:11<10:33:14, 3.31it/s] 66%|██████▌ | 245859/371472 [9:00:11<10:30:02, 3.32it/s] 66%|██████▌ | 245860/371472 [9:00:12<11:12:35, 3.11it/s] {'loss': 2.6498, 'learning_rate': 4.044958951927427e-07, 'epoch': 10.59} + 66%|██████▌ | 245860/371472 [9:00:12<11:12:35, 3.11it/s] 66%|██████▌ | 245861/371472 [9:00:12<10:42:30, 3.26it/s] 66%|██████▌ | 245862/371472 [9:00:12<11:02:49, 3.16it/s] 66%|██████▌ | 245863/371472 [9:00:13<10:42:13, 3.26it/s] 66%|██████▌ | 245864/371472 [9:00:13<10:23:18, 3.36it/s] 66%|██████▌ | 245865/371472 [9:00:13<10:25:43, 3.35it/s] 66%|██████▌ | 245866/371472 [9:00:14<10:48:32, 3.23it/s] 66%|██████▌ | 245867/371472 [9:00:14<10:44:05, 3.25it/s] 66%|██████▌ | 245868/371472 [9:00:14<10:34:46, 3.30it/s] 66%|██████▌ | 245869/371472 [9:00:14<10:13:06, 3.41it/s] 66%|██████▌ | 245870/371472 [9:00:15<10:22:52, 3.36it/s] 66%|██████▌ | 245871/371472 [9:00:15<10:25:37, 3.35it/s] 66%|██████▌ | 245872/371472 [9:00:15<10:44:22, 3.25it/s] 66%|██████▌ | 245873/371472 [9:00:16<10:41:24, 3.26it/s] 66%|██████▌ | 245874/371472 [9:00:16<11:08:07, 3.13it/s] 66%|██████▌ | 245875/371472 [9:00:16<10:54:56, 3.20it/s] 66%|██████▌ | 245876/371472 [9:00:17<10:56:13, 3.19it/s] 66%|██████▌ | 245877/371472 [9:00:17<10:36:46, 3.29it/s] 66%|██████▌ | 245878/371472 [9:00:17<10:29:53, 3.32it/s] 66%|██████▌ | 245879/371472 [9:00:18<10:37:48, 3.28it/s] 66%|██████▌ | 245880/371472 [9:00:18<10:31:02, 3.32it/s] {'loss': 2.6468, 'learning_rate': 4.044474132172639e-07, 'epoch': 10.59} + 66%|██████▌ | 245880/371472 [9:00:18<10:31:02, 3.32it/s] 66%|██████▌ | 245881/371472 [9:00:18<10:54:25, 3.20it/s] 66%|██████▌ | 245882/371472 [9:00:19<11:32:57, 3.02it/s] 66%|██████▌ | 245883/371472 [9:00:19<11:09:02, 3.13it/s] 66%|██████▌ | 245884/371472 [9:00:19<11:01:27, 3.16it/s] 66%|██████▌ | 245885/371472 [9:00:20<11:05:59, 3.14it/s] 66%|██████▌ | 245886/371472 [9:00:20<11:14:09, 3.10it/s] 66%|██████▌ | 245887/371472 [9:00:20<11:06:18, 3.14it/s] 66%|██████▌ | 245888/371472 [9:00:20<10:47:22, 3.23it/s] 66%|██████▌ | 245889/371472 [9:00:21<10:29:09, 3.33it/s] 66%|██████▌ | 245890/371472 [9:00:21<10:54:57, 3.20it/s] 66%|██████▌ | 245891/371472 [9:00:21<10:47:57, 3.23it/s] 66%|██████▌ | 245892/371472 [9:00:22<10:37:50, 3.28it/s] 66%|██████▌ | 245893/371472 [9:00:22<10:32:13, 3.31it/s] 66%|██████▌ | 245894/371472 [9:00:22<10:22:06, 3.36it/s] 66%|██████▌ | 245895/371472 [9:00:23<10:53:17, 3.20it/s] 66%|██████▌ | 245896/371472 [9:00:23<10:26:17, 3.34it/s] 66%|██████▌ | 245897/371472 [9:00:23<11:19:46, 3.08it/s] 66%|██████▌ | 245898/371472 [9:00:24<10:49:39, 3.22it/s] 66%|██████▌ | 245899/371472 [9:00:24<10:28:11, 3.33it/s] 66%|██████▌ | 245900/371472 [9:00:24<10:20:41, 3.37it/s] {'loss': 2.8539, 'learning_rate': 4.0439893124178495e-07, 'epoch': 10.59} + 66%|██████▌ | 245900/371472 [9:00:24<10:20:41, 3.37it/s] 66%|██████▌ | 245901/371472 [9:00:24<10:09:59, 3.43it/s] 66%|██████▌ | 245902/371472 [9:00:25<10:21:58, 3.36it/s] 66%|██████▌ | 245903/371472 [9:00:25<10:27:27, 3.34it/s] 66%|██████▌ | 245904/371472 [9:00:25<10:44:49, 3.25it/s] 66%|██████▌ | 245905/371472 [9:00:26<10:45:46, 3.24it/s] 66%|██████▌ | 245906/371472 [9:00:26<10:50:37, 3.22it/s] 66%|██████▌ | 245907/371472 [9:00:26<10:57:11, 3.18it/s] 66%|██████▌ | 245908/371472 [9:00:27<10:28:42, 3.33it/s] 66%|██████▌ | 245909/371472 [9:00:27<10:58:53, 3.18it/s] 66%|██████▌ | 245910/371472 [9:00:27<10:52:33, 3.21it/s] 66%|██████▌ | 245911/371472 [9:00:27<11:02:41, 3.16it/s] 66%|██████▌ | 245912/371472 [9:00:28<10:47:44, 3.23it/s] 66%|██████▌ | 245913/371472 [9:00:28<10:56:46, 3.19it/s] 66%|██████▌ | 245914/371472 [9:00:28<10:38:40, 3.28it/s] 66%|██████▌ | 245915/371472 [9:00:29<10:11:24, 3.42it/s] 66%|██████▌ | 245916/371472 [9:00:29<10:11:01, 3.42it/s] 66%|██████▌ | 245917/371472 [9:00:29<10:10:12, 3.43it/s] 66%|██████▌ | 245918/371472 [9:00:30<10:30:08, 3.32it/s] 66%|██████▌ | 245919/371472 [9:00:30<10:23:55, 3.35it/s] 66%|██████▌ | 245920/371472 [9:00:30<10:11:50, 3.42it/s] {'loss': 2.6748, 'learning_rate': 4.043504492663061e-07, 'epoch': 10.59} + 66%|██████▌ | 245920/371472 [9:00:30<10:11:50, 3.42it/s] 66%|██████▌ | 245921/371472 [9:00:30<10:14:34, 3.40it/s] 66%|██████▌ | 245922/371472 [9:00:31<10:16:25, 3.39it/s] 66%|██████▌ | 245923/371472 [9:00:31<10:06:38, 3.45it/s] 66%|██████▌ | 245924/371472 [9:00:31<10:13:59, 3.41it/s] 66%|██████▌ | 245925/371472 [9:00:32<10:30:38, 3.32it/s] 66%|██████▌ | 245926/371472 [9:00:32<11:13:23, 3.11it/s] 66%|██████▌ | 245927/371472 [9:00:32<10:59:50, 3.17it/s] 66%|██████▌ | 245928/371472 [9:00:33<10:29:00, 3.33it/s] 66%|██████▌ | 245929/371472 [9:00:33<10:19:30, 3.38it/s] 66%|██████▌ | 245930/371472 [9:00:33<10:40:37, 3.27it/s] 66%|██████▌ | 245931/371472 [9:00:34<10:58:49, 3.18it/s] 66%|██████▌ | 245932/371472 [9:00:34<10:37:55, 3.28it/s] 66%|██████▌ | 245933/371472 [9:00:34<10:25:30, 3.34it/s] 66%|██████▌ | 245934/371472 [9:00:34<10:12:58, 3.41it/s] 66%|██████▌ | 245935/371472 [9:00:35<10:09:40, 3.43it/s] 66%|██████▌ | 245936/371472 [9:00:35<10:07:21, 3.44it/s] 66%|██████▌ | 245937/371472 [9:00:35<10:19:06, 3.38it/s] 66%|██████▌ | 245938/371472 [9:00:36<10:19:32, 3.38it/s] 66%|██████▌ | 245939/371472 [9:00:36<10:21:01, 3.37it/s] 66%|██████▌ | 245940/371472 [9:00:36<10:20:09, 3.37it/s] {'loss': 2.8282, 'learning_rate': 4.0430196729082715e-07, 'epoch': 10.59} + 66%|██████▌ | 245940/371472 [9:00:36<10:20:09, 3.37it/s] 66%|██████▌ | 245941/371472 [9:00:36<10:20:49, 3.37it/s] 66%|██████▌ | 245942/371472 [9:00:37<10:25:34, 3.34it/s] 66%|██████▌ | 245943/371472 [9:00:37<10:19:06, 3.38it/s] 66%|██████▌ | 245944/371472 [9:00:37<10:31:48, 3.31it/s] 66%|██████▌ | 245945/371472 [9:00:38<10:33:00, 3.31it/s] 66%|██████▌ | 245946/371472 [9:00:38<10:21:31, 3.37it/s] 66%|██████▌ | 245947/371472 [9:00:38<10:42:40, 3.26it/s] 66%|██████▌ | 245948/371472 [9:00:39<10:25:27, 3.34it/s] 66%|██████▌ | 245949/371472 [9:00:39<10:19:48, 3.38it/s] 66%|██████▌ | 245950/371472 [9:00:39<10:19:05, 3.38it/s] 66%|██████▌ | 245951/371472 [9:00:39<10:20:17, 3.37it/s] 66%|██████▌ | 245952/371472 [9:00:40<10:36:18, 3.29it/s] 66%|██████▌ | 245953/371472 [9:00:40<10:34:07, 3.30it/s] 66%|██████▌ | 245954/371472 [9:00:40<10:33:07, 3.30it/s] 66%|██████▌ | 245955/371472 [9:00:41<10:40:49, 3.26it/s] 66%|██████▌ | 245956/371472 [9:00:41<10:40:05, 3.27it/s] 66%|██████▌ | 245957/371472 [9:00:41<10:20:36, 3.37it/s] 66%|██████▌ | 245958/371472 [9:00:42<10:20:04, 3.37it/s] 66%|██████▌ | 245959/371472 [9:00:42<10:16:14, 3.39it/s] 66%|██████▌ | 245960/371472 [9:00:42<10:55:18, 3.19it/s] {'loss': 2.6743, 'learning_rate': 4.0425348531534833e-07, 'epoch': 10.59} + 66%|██████▌ | 245960/371472 [9:00:42<10:55:18, 3.19it/s] 66%|██████▌ | 245961/371472 [9:00:42<10:34:50, 3.30it/s] 66%|██████▌ | 245962/371472 [9:00:43<11:27:49, 3.04it/s] 66%|██████▌ | 245963/371472 [9:00:43<11:06:20, 3.14it/s] 66%|██████▌ | 245964/371472 [9:00:43<11:16:55, 3.09it/s] 66%|██████▌ | 245965/371472 [9:00:44<11:02:18, 3.16it/s] 66%|██████▌ | 245966/371472 [9:00:44<11:26:13, 3.05it/s] 66%|██████▌ | 245967/371472 [9:00:44<11:10:06, 3.12it/s] 66%|██████▌ | 245968/371472 [9:00:45<10:59:55, 3.17it/s] 66%|██████▌ | 245969/371472 [9:00:45<10:37:18, 3.28it/s] 66%|██████▌ | 245970/371472 [9:00:45<10:23:26, 3.36it/s] 66%|██████▌ | 245971/371472 [9:00:46<10:19:31, 3.38it/s] 66%|██████▌ | 245972/371472 [9:00:46<10:18:13, 3.38it/s] 66%|██████▌ | 245973/371472 [9:00:46<10:30:44, 3.32it/s] 66%|██████▌ | 245974/371472 [9:00:47<10:51:16, 3.21it/s] 66%|██████▌ | 245975/371472 [9:00:47<10:28:42, 3.33it/s] 66%|██████▌ | 245976/371472 [9:00:47<10:30:59, 3.31it/s] 66%|██████▌ | 245977/371472 [9:00:47<10:52:39, 3.20it/s] 66%|██████▌ | 245978/371472 [9:00:48<10:53:10, 3.20it/s] 66%|██████▌ | 245979/371472 [9:00:48<10:39:56, 3.27it/s] 66%|██████▌ | 245980/371472 [9:00:48<11:03:40, 3.15it/s] {'loss': 2.675, 'learning_rate': 4.0420500333986935e-07, 'epoch': 10.59} + 66%|██████▌ | 245980/371472 [9:00:48<11:03:40, 3.15it/s] 66%|██████▌ | 245981/371472 [9:00:49<10:57:46, 3.18it/s] 66%|██████▌ | 245982/371472 [9:00:49<10:38:26, 3.28it/s] 66%|██████▌ | 245983/371472 [9:00:49<10:58:46, 3.17it/s] 66%|██████▌ | 245984/371472 [9:00:50<11:18:46, 3.08it/s] 66%|██████▌ | 245985/371472 [9:00:50<10:55:57, 3.19it/s] 66%|██████▌ | 245986/371472 [9:00:50<11:12:47, 3.11it/s] 66%|██████▌ | 245987/371472 [9:00:51<11:10:22, 3.12it/s] 66%|██████▌ | 245988/371472 [9:00:51<11:01:39, 3.16it/s] 66%|██████▌ | 245989/371472 [9:00:51<10:41:17, 3.26it/s] 66%|██████▌ | 245990/371472 [9:00:52<10:54:50, 3.19it/s] 66%|██████▌ | 245991/371472 [9:00:52<10:53:15, 3.20it/s] 66%|██████▌ | 245992/371472 [9:00:52<10:32:51, 3.30it/s] 66%|██████▌ | 245993/371472 [9:00:52<10:26:48, 3.34it/s] 66%|██████▌ | 245994/371472 [9:00:53<10:45:06, 3.24it/s] 66%|██████▌ | 245995/371472 [9:00:53<10:31:13, 3.31it/s] 66%|██████▌ | 245996/371472 [9:00:53<10:23:30, 3.35it/s] 66%|██████▌ | 245997/371472 [9:00:54<10:17:32, 3.39it/s] 66%|██████▌ | 245998/371472 [9:00:54<10:21:02, 3.37it/s] 66%|██████▌ | 245999/371472 [9:00:54<10:16:46, 3.39it/s] 66%|██████▌ | 246000/371472 [9:00:55<10:28:59, 3.32it/s] {'loss': 2.6944, 'learning_rate': 4.041565213643905e-07, 'epoch': 10.6} + 66%|██████▌ | 246000/371472 [9:00:55<10:28:59, 3.32it/s] 66%|██████▌ | 246001/371472 [9:00:55<10:12:10, 3.42it/s] 66%|██████▌ | 246002/371472 [9:00:55<11:13:22, 3.11it/s] 66%|██████▌ | 246003/371472 [9:00:55<10:58:05, 3.18it/s] 66%|██████▌ | 246004/371472 [9:00:56<10:40:54, 3.26it/s] 66%|██████▌ | 246005/371472 [9:00:56<11:25:21, 3.05it/s] 66%|██████▌ | 246006/371472 [9:00:56<11:21:42, 3.07it/s] 66%|██████▌ | 246007/371472 [9:00:57<11:12:29, 3.11it/s] 66%|██████▌ | 246008/371472 [9:00:57<11:37:12, 3.00it/s] 66%|██████▌ | 246009/371472 [9:00:57<11:23:28, 3.06it/s] 66%|██████▌ | 246010/371472 [9:00:58<10:57:31, 3.18it/s] 66%|██████▌ | 246011/371472 [9:00:58<11:25:48, 3.05it/s] 66%|██████▌ | 246012/371472 [9:00:58<11:18:09, 3.08it/s] 66%|██████▌ | 246013/371472 [9:00:59<10:54:55, 3.19it/s] 66%|██████▌ | 246014/371472 [9:00:59<11:14:31, 3.10it/s] 66%|██████▌ | 246015/371472 [9:00:59<11:07:36, 3.13it/s] 66%|██████▌ | 246016/371472 [9:01:00<10:41:05, 3.26it/s] 66%|██████▌ | 246017/371472 [9:01:00<10:21:29, 3.36it/s] 66%|██████▌ | 246018/371472 [9:01:00<10:07:21, 3.44it/s] 66%|██████▌ | 246019/371472 [9:01:01<11:34:14, 3.01it/s] 66%|██████▌ | 246020/371472 [9:01:01<11:08:14, 3.13it/s] {'loss': 2.675, 'learning_rate': 4.041080393889116e-07, 'epoch': 10.6} + 66%|██████▌ | 246020/371472 [9:01:01<11:08:14, 3.13it/s] 66%|██████▌ | 246021/371472 [9:01:01<10:48:20, 3.22it/s] 66%|██████▌ | 246022/371472 [9:01:01<10:32:46, 3.30it/s] 66%|██████▌ | 246023/371472 [9:01:02<10:30:21, 3.32it/s] 66%|██████▌ | 246024/371472 [9:01:02<10:32:49, 3.30it/s] 66%|██████▌ | 246025/371472 [9:01:02<10:29:17, 3.32it/s] 66%|██████▌ | 246026/371472 [9:01:03<10:32:36, 3.30it/s] 66%|██████▌ | 246027/371472 [9:01:03<10:36:18, 3.29it/s] 66%|██████▌ | 246028/371472 [9:01:03<10:51:50, 3.21it/s] 66%|██████▌ | 246029/371472 [9:01:04<10:49:34, 3.22it/s] 66%|██████▌ | 246030/371472 [9:01:04<10:37:14, 3.28it/s] 66%|██████▌ | 246031/371472 [9:01:04<11:05:49, 3.14it/s] 66%|██████▌ | 246032/371472 [9:01:05<11:04:28, 3.15it/s] 66%|██████▌ | 246033/371472 [9:01:05<10:37:55, 3.28it/s] 66%|██████▌ | 246034/371472 [9:01:05<10:24:16, 3.35it/s] 66%|██████▌ | 246035/371472 [9:01:05<10:21:50, 3.36it/s] 66%|██████▌ | 246036/371472 [9:01:06<10:21:38, 3.36it/s] 66%|██████▌ | 246037/371472 [9:01:06<10:07:12, 3.44it/s] 66%|██████▌ | 246038/371472 [9:01:06<10:36:17, 3.29it/s] 66%|██████▌ | 246039/371472 [9:01:07<11:00:29, 3.17it/s] 66%|██████▌ | 246040/371472 [9:01:07<10:55:45, 3.19it/s] {'loss': 2.669, 'learning_rate': 4.040595574134327e-07, 'epoch': 10.6} + 66%|██████▌ | 246040/371472 [9:01:07<10:55:45, 3.19it/s] 66%|██████▌ | 246041/371472 [9:01:07<10:56:14, 3.19it/s] 66%|██████▌ | 246042/371472 [9:01:08<10:43:56, 3.25it/s] 66%|██████▌ | 246043/371472 [9:01:08<10:34:51, 3.29it/s] 66%|██████▌ | 246044/371472 [9:01:08<10:31:18, 3.31it/s] 66%|██████▌ | 246045/371472 [9:01:09<10:33:21, 3.30it/s] 66%|██████▌ | 246046/371472 [9:01:09<10:23:27, 3.35it/s] 66%|██████▌ | 246047/371472 [9:01:09<10:20:27, 3.37it/s] 66%|██████▌ | 246048/371472 [9:01:09<10:59:22, 3.17it/s] 66%|██████▌ | 246049/371472 [9:01:10<10:48:35, 3.22it/s] 66%|██████▌ | 246050/371472 [9:01:10<10:33:50, 3.30it/s] 66%|██████▌ | 246051/371472 [9:01:10<10:37:11, 3.28it/s] 66%|██████▌ | 246052/371472 [9:01:11<10:27:25, 3.33it/s] 66%|██████▌ | 246053/371472 [9:01:11<10:21:36, 3.36it/s] 66%|██████▌ | 246054/371472 [9:01:11<10:03:35, 3.46it/s] 66%|██████▌ | 246055/371472 [9:01:11<10:03:23, 3.46it/s] 66%|██████▌ | 246056/371472 [9:01:12<10:12:30, 3.41it/s] 66%|██████▌ | 246057/371472 [9:01:12<10:01:49, 3.47it/s] 66%|██████▌ | 246058/371472 [9:01:12<9:49:15, 3.55it/s] 66%|██████▌ | 246059/371472 [9:01:13<10:11:00, 3.42it/s] 66%|██████▌ | 246060/371472 [9:01:13<10:10:52, 3.42it/s] {'loss': 2.586, 'learning_rate': 4.0401107543795374e-07, 'epoch': 10.6} + 66%|██████▌ | 246060/371472 [9:01:13<10:10:52, 3.42it/s] 66%|██████▌ | 246061/371472 [9:01:13<10:08:44, 3.43it/s] 66%|██████▌ | 246062/371472 [9:01:14<10:44:50, 3.24it/s] 66%|██████▌ | 246063/371472 [9:01:14<10:31:29, 3.31it/s] 66%|██████▌ | 246064/371472 [9:01:14<10:25:34, 3.34it/s] 66%|██████▌ | 246065/371472 [9:01:14<10:19:53, 3.37it/s] 66%|██████▌ | 246066/371472 [9:01:15<10:44:52, 3.24it/s] 66%|██████▌ | 246067/371472 [9:01:15<10:34:35, 3.29it/s] 66%|██████▌ | 246068/371472 [9:01:15<10:08:27, 3.44it/s] 66%|██████▌ | 246069/371472 [9:01:16<10:05:41, 3.45it/s] 66%|██████▌ | 246070/371472 [9:01:16<10:30:35, 3.31it/s] 66%|██████▌ | 246071/371472 [9:01:16<10:45:08, 3.24it/s] 66%|██████▌ | 246072/371472 [9:01:17<11:09:31, 3.12it/s] 66%|██████▌ | 246073/371472 [9:01:17<11:11:52, 3.11it/s] 66%|██████▌ | 246074/371472 [9:01:17<11:31:02, 3.02it/s] 66%|██████▌ | 246075/371472 [9:01:18<11:21:29, 3.07it/s] 66%|██████▌ | 246076/371472 [9:01:18<11:17:05, 3.09it/s] 66%|██████▌ | 246077/371472 [9:01:18<10:48:19, 3.22it/s] 66%|██████▌ | 246078/371472 [9:01:19<10:37:48, 3.28it/s] 66%|██████▌ | 246079/371472 [9:01:19<10:36:48, 3.28it/s] 66%|██████▌ | 246080/371472 [9:01:19<10:38:48, 3.27it/s] {'loss': 2.7529, 'learning_rate': 4.0396259346247497e-07, 'epoch': 10.6} + 66%|██████▌ | 246080/371472 [9:01:19<10:38:48, 3.27it/s] 66%|██████▌ | 246081/371472 [9:01:19<10:49:06, 3.22it/s] 66%|██████▌ | 246082/371472 [9:01:20<10:30:04, 3.32it/s] 66%|██████▌ | 246083/371472 [9:01:20<10:17:11, 3.39it/s] 66%|██████▌ | 246084/371472 [9:01:20<10:16:40, 3.39it/s] 66%|██████▌ | 246085/371472 [9:01:21<10:09:54, 3.43it/s] 66%|██████▌ | 246086/371472 [9:01:21<10:25:35, 3.34it/s] 66%|██████▌ | 246087/371472 [9:01:21<10:49:30, 3.22it/s] 66%|██████▌ | 246088/371472 [9:01:22<10:34:36, 3.29it/s] 66%|██████▌ | 246089/371472 [9:01:22<10:50:53, 3.21it/s] 66%|██████▌ | 246090/371472 [9:01:22<10:42:54, 3.25it/s] 66%|██████▌ | 246091/371472 [9:01:23<11:41:06, 2.98it/s] 66%|██████▌ | 246092/371472 [9:01:23<10:58:21, 3.17it/s] 66%|██████▌ | 246093/371472 [9:01:23<10:44:12, 3.24it/s] 66%|██████▌ | 246094/371472 [9:01:23<10:27:21, 3.33it/s] 66%|██████▌ | 246095/371472 [9:01:24<10:37:49, 3.28it/s] 66%|██████▌ | 246096/371472 [9:01:24<10:42:34, 3.25it/s] 66%|██████▌ | 246097/371472 [9:01:24<10:32:53, 3.30it/s] 66%|██████▌ | 246098/371472 [9:01:25<10:14:55, 3.40it/s] 66%|██████▌ | 246099/371472 [9:01:25<10:30:21, 3.31it/s] 66%|██████▌ | 246100/371472 [9:01:25<10:34:28, 3.29it/s] {'loss': 2.7618, 'learning_rate': 4.03914111486996e-07, 'epoch': 10.6} + 66%|██████▌ | 246100/371472 [9:01:25<10:34:28, 3.29it/s] 66%|██████▋ | 246101/371472 [9:01:26<10:30:34, 3.31it/s] 66%|██████▋ | 246102/371472 [9:01:26<10:15:20, 3.40it/s] 66%|██████▋ | 246103/371472 [9:01:26<10:25:32, 3.34it/s] 66%|██████▋ | 246104/371472 [9:01:26<10:44:03, 3.24it/s] 66%|██████▋ | 246105/371472 [9:01:27<10:39:18, 3.27it/s] 66%|██████▋ | 246106/371472 [9:01:27<10:47:39, 3.23it/s] 66%|██████▋ | 246107/371472 [9:01:27<10:52:45, 3.20it/s] 66%|██████▋ | 246108/371472 [9:01:28<11:31:36, 3.02it/s] 66%|██████▋ | 246109/371472 [9:01:28<12:04:13, 2.89it/s] 66%|██████▋ | 246110/371472 [9:01:28<11:32:24, 3.02it/s] 66%|██████▋ | 246111/371472 [9:01:29<12:08:32, 2.87it/s] 66%|██████▋ | 246112/371472 [9:01:29<11:38:39, 2.99it/s] 66%|██████▋ | 246113/371472 [9:01:29<11:01:08, 3.16it/s] 66%|██████▋ | 246114/371472 [9:01:30<11:06:27, 3.13it/s] 66%|██████▋ | 246115/371472 [9:01:30<10:51:04, 3.21it/s] 66%|██████▋ | 246116/371472 [9:01:30<10:34:39, 3.29it/s] 66%|██████▋ | 246117/371472 [9:01:31<10:51:37, 3.21it/s] 66%|██████▋ | 246118/371472 [9:01:31<10:41:21, 3.26it/s] 66%|██████▋ | 246119/371472 [9:01:31<10:53:21, 3.20it/s] 66%|██████▋ | 246120/371472 [9:01:32<10:40:24, 3.26it/s] {'loss': 2.6064, 'learning_rate': 4.0386562951151716e-07, 'epoch': 10.6} + 66%|██████▋ | 246120/371472 [9:01:32<10:40:24, 3.26it/s] 66%|██████▋ | 246121/371472 [9:01:32<10:35:56, 3.29it/s] 66%|██████▋ | 246122/371472 [9:01:32<10:18:13, 3.38it/s] 66%|██████▋ | 246123/371472 [9:01:32<10:10:31, 3.42it/s] 66%|██████▋ | 246124/371472 [9:01:33<10:01:30, 3.47it/s] 66%|██████▋ | 246125/371472 [9:01:33<10:20:25, 3.37it/s] 66%|██████▋ | 246126/371472 [9:01:33<10:42:13, 3.25it/s] 66%|██████▋ | 246127/371472 [9:01:34<10:48:58, 3.22it/s] 66%|██████▋ | 246128/371472 [9:01:34<10:26:16, 3.34it/s] 66%|██████▋ | 246129/371472 [9:01:34<10:17:37, 3.38it/s] 66%|██████▋ | 246130/371472 [9:01:35<10:17:13, 3.38it/s] 66%|██████▋ | 246131/371472 [9:01:35<10:24:05, 3.35it/s] 66%|██████▋ | 246132/371472 [9:01:35<10:03:44, 3.46it/s] 66%|██████▋ | 246133/371472 [9:01:35<10:05:22, 3.45it/s] 66%|██████▋ | 246134/371472 [9:01:36<9:47:03, 3.56it/s] 66%|██████▋ | 246135/371472 [9:01:36<9:50:57, 3.53it/s] 66%|██████▋ | 246136/371472 [9:01:36<9:53:41, 3.52it/s] 66%|██████▋ | 246137/371472 [9:01:36<9:57:22, 3.50it/s] 66%|██████▋ | 246138/371472 [9:01:37<10:29:43, 3.32it/s] 66%|██████▋ | 246139/371472 [9:01:37<10:18:41, 3.38it/s] 66%|██████▋ | 246140/371472 [9:01:37<10:37:59, 3.27it/s] {'loss': 2.8967, 'learning_rate': 4.0381714753603824e-07, 'epoch': 10.6} + 66%|██████▋ | 246140/371472 [9:01:37<10:37:59, 3.27it/s] 66%|██████▋ | 246141/371472 [9:01:38<10:53:46, 3.20it/s] 66%|██████▋ | 246142/371472 [9:01:38<10:43:17, 3.25it/s] 66%|██████▋ | 246143/371472 [9:01:38<10:51:47, 3.20it/s] 66%|██████▋ | 246144/371472 [9:01:39<11:07:07, 3.13it/s] 66%|██████▋ | 246145/371472 [9:01:39<10:54:11, 3.19it/s] 66%|██████▋ | 246146/371472 [9:01:39<11:02:52, 3.15it/s] 66%|██████▋ | 246147/371472 [9:01:40<11:29:46, 3.03it/s] 66%|██████▋ | 246148/371472 [9:01:40<10:55:13, 3.19it/s] 66%|██████▋ | 246149/371472 [9:01:40<10:47:00, 3.23it/s] 66%|██████▋ | 246150/371472 [9:01:41<10:32:06, 3.30it/s] 66%|██████▋ | 246151/371472 [9:01:41<10:26:10, 3.34it/s] 66%|██████▋ | 246152/371472 [9:01:41<10:23:45, 3.35it/s] 66%|██████▋ | 246153/371472 [9:01:41<10:13:12, 3.41it/s] 66%|██████▋ | 246154/371472 [9:01:42<10:02:58, 3.46it/s] 66%|██████▋ | 246155/371472 [9:01:42<9:51:31, 3.53it/s] 66%|██████▋ | 246156/371472 [9:01:42<10:33:36, 3.30it/s] 66%|██████▋ | 246157/371472 [9:01:43<10:27:06, 3.33it/s] 66%|██████▋ | 246158/371472 [9:01:43<10:26:27, 3.33it/s] 66%|██████▋ | 246159/371472 [9:01:43<11:02:02, 3.15it/s] 66%|██████▋ | 246160/371472 [9:01:44<11:02:24, 3.15it/s] {'loss': 2.663, 'learning_rate': 4.0376866556055936e-07, 'epoch': 10.6} + 66%|██████▋ | 246160/371472 [9:01:44<11:02:24, 3.15it/s] 66%|██████▋ | 246161/371472 [9:01:44<10:45:13, 3.24it/s] 66%|██████▋ | 246162/371472 [9:01:44<10:29:35, 3.32it/s] 66%|██████▋ | 246163/371472 [9:01:44<10:34:45, 3.29it/s] 66%|██████▋ | 246164/371472 [9:01:45<10:16:12, 3.39it/s] 66%|██████▋ | 246165/371472 [9:01:45<10:09:35, 3.43it/s] 66%|██████▋ | 246166/371472 [9:01:45<10:04:28, 3.45it/s] 66%|██████▋ | 246167/371472 [9:01:46<10:05:27, 3.45it/s] 66%|██████▋ | 246168/371472 [9:01:46<10:41:05, 3.26it/s] 66%|██████▋ | 246169/371472 [9:01:46<10:45:41, 3.23it/s] 66%|██████▋ | 246170/371472 [9:01:47<10:15:37, 3.39it/s] 66%|██████▋ | 246171/371472 [9:01:47<10:13:32, 3.40it/s] 66%|██████▋ | 246172/371472 [9:01:47<10:21:01, 3.36it/s] 66%|██████▋ | 246173/371472 [9:01:47<10:13:39, 3.40it/s] 66%|██████▋ | 246174/371472 [9:01:48<10:27:32, 3.33it/s] 66%|██████▋ | 246175/371472 [9:01:48<10:10:40, 3.42it/s] 66%|██████▋ | 246176/371472 [9:01:48<10:40:57, 3.26it/s] 66%|██████▋ | 246177/371472 [9:01:49<10:46:37, 3.23it/s] 66%|██████▋ | 246178/371472 [9:01:49<10:27:18, 3.33it/s] 66%|██████▋ | 246179/371472 [9:01:49<10:00:39, 3.48it/s] 66%|██████▋ | 246180/371472 [9:01:50<10:15:08, 3.39it/s] {'loss': 2.7052, 'learning_rate': 4.0372018358508043e-07, 'epoch': 10.6} + 66%|██████▋ | 246180/371472 [9:01:50<10:15:08, 3.39it/s] 66%|██████▋ | 246181/371472 [9:01:50<10:35:19, 3.29it/s] 66%|██████▋ | 246182/371472 [9:01:50<10:58:23, 3.17it/s] 66%|██████▋ | 246183/371472 [9:01:50<10:38:49, 3.27it/s] 66%|██████▋ | 246184/371472 [9:01:51<10:25:30, 3.34it/s] 66%|██████▋ | 246185/371472 [9:01:51<10:15:49, 3.39it/s] 66%|██████▋ | 246186/371472 [9:01:51<10:18:40, 3.38it/s] 66%|██████▋ | 246187/371472 [9:01:52<10:35:18, 3.29it/s] 66%|██████▋ | 246188/371472 [9:01:52<10:09:51, 3.42it/s] 66%|██████▋ | 246189/371472 [9:01:52<10:24:12, 3.35it/s] 66%|██████▋ | 246190/371472 [9:01:53<10:04:42, 3.45it/s] 66%|██████▋ | 246191/371472 [9:01:53<10:11:27, 3.41it/s] 66%|██████▋ | 246192/371472 [9:01:53<11:12:45, 3.10it/s] 66%|██████▋ | 246193/371472 [9:01:54<10:56:07, 3.18it/s] 66%|██████▋ | 246194/371472 [9:01:54<11:15:44, 3.09it/s] 66%|██████▋ | 246195/371472 [9:01:54<10:45:16, 3.24it/s] 66%|██████▋ | 246196/371472 [9:01:54<10:30:27, 3.31it/s] 66%|██████▋ | 246197/371472 [9:01:55<10:30:49, 3.31it/s] 66%|██████▋ | 246198/371472 [9:01:55<10:23:08, 3.35it/s] 66%|██████▋ | 246199/371472 [9:01:55<10:13:43, 3.40it/s] 66%|██████▋ | 246200/371472 [9:01:56<10:01:58, 3.47it/s] {'loss': 2.7495, 'learning_rate': 4.036717016096016e-07, 'epoch': 10.6} + 66%|██████▋ | 246200/371472 [9:01:56<10:01:58, 3.47it/s] 66%|██████▋ | 246201/371472 [9:01:56<10:45:57, 3.23it/s] 66%|██████▋ | 246202/371472 [9:01:56<10:22:32, 3.35it/s] 66%|██████▋ | 246203/371472 [9:01:56<10:16:23, 3.39it/s] 66%|██████▋ | 246204/371472 [9:01:57<10:28:45, 3.32it/s] 66%|██████▋ | 246205/371472 [9:01:57<10:16:56, 3.38it/s] 66%|██████▋ | 246206/371472 [9:01:57<10:26:19, 3.33it/s] 66%|██████▋ | 246207/371472 [9:01:58<10:18:49, 3.37it/s] 66%|██████▋ | 246208/371472 [9:01:58<10:51:07, 3.21it/s] 66%|██████▋ | 246209/371472 [9:01:58<11:01:22, 3.16it/s] 66%|██████▋ | 246210/371472 [9:01:59<10:39:25, 3.26it/s] 66%|██████▋ | 246211/371472 [9:01:59<10:31:17, 3.31it/s] 66%|██████▋ | 246212/371472 [9:01:59<10:59:51, 3.16it/s] 66%|██████▋ | 246213/371472 [9:02:00<10:41:59, 3.25it/s] 66%|██████▋ | 246214/371472 [9:02:00<10:29:37, 3.32it/s] 66%|██████▋ | 246215/371472 [9:02:00<10:32:14, 3.30it/s] 66%|██████▋ | 246216/371472 [9:02:00<10:13:20, 3.40it/s] 66%|██████▋ | 246217/371472 [9:02:01<10:31:37, 3.31it/s] 66%|██████▋ | 246218/371472 [9:02:01<10:34:16, 3.29it/s] 66%|██████▋ | 246219/371472 [9:02:01<10:23:06, 3.35it/s] 66%|██████▋ | 246220/371472 [9:02:02<10:21:05, 3.36it/s] {'loss': 2.6622, 'learning_rate': 4.036232196341227e-07, 'epoch': 10.61} + 66%|██████▋ | 246220/371472 [9:02:02<10:21:05, 3.36it/s] 66%|██████▋ | 246221/371472 [9:02:02<10:50:15, 3.21it/s] 66%|██████▋ | 246222/371472 [9:02:02<10:22:56, 3.35it/s] 66%|██████▋ | 246223/371472 [9:02:03<10:50:31, 3.21it/s] 66%|██████▋ | 246224/371472 [9:02:03<11:04:32, 3.14it/s] 66%|██████▋ | 246225/371472 [9:02:03<10:41:20, 3.25it/s] 66%|██████▋ | 246226/371472 [9:02:04<11:00:55, 3.16it/s] 66%|██████▋ | 246227/371472 [9:02:04<12:23:29, 2.81it/s] 66%|██████▋ | 246228/371472 [9:02:04<11:24:34, 3.05it/s] 66%|██████▋ | 246229/371472 [9:02:05<11:39:16, 2.99it/s] 66%|██████▋ | 246230/371472 [9:02:05<11:06:49, 3.13it/s] 66%|██████▋ | 246231/371472 [9:02:05<10:46:09, 3.23it/s] 66%|██████▋ | 246232/371472 [9:02:05<10:36:24, 3.28it/s] 66%|██████▋ | 246233/371472 [9:02:06<10:24:56, 3.34it/s] 66%|██████▋ | 246234/371472 [9:02:06<10:27:06, 3.33it/s] 66%|██████▋ | 246235/371472 [9:02:06<10:36:51, 3.28it/s] 66%|██████▋ | 246236/371472 [9:02:07<10:32:30, 3.30it/s] 66%|██████▋ | 246237/371472 [9:02:07<10:22:15, 3.35it/s] 66%|██████▋ | 246238/371472 [9:02:07<10:37:38, 3.27it/s] 66%|██████▋ | 246239/371472 [9:02:08<10:47:46, 3.22it/s] 66%|██████▋ | 246240/371472 [9:02:08<10:42:45, 3.25it/s] {'loss': 2.6962, 'learning_rate': 4.035747376586438e-07, 'epoch': 10.61} + 66%|██████▋ | 246240/371472 [9:02:08<10:42:45, 3.25it/s] 66%|██████▋ | 246241/371472 [9:02:08<10:36:55, 3.28it/s] 66%|██████▋ | 246242/371472 [9:02:08<10:19:56, 3.37it/s] 66%|██████▋ | 246243/371472 [9:02:09<10:49:17, 3.21it/s] 66%|██████▋ | 246244/371472 [9:02:09<11:20:09, 3.07it/s] 66%|██████▋ | 246245/371472 [9:02:09<11:08:36, 3.12it/s] 66%|██████▋ | 246246/371472 [9:02:10<10:39:08, 3.27it/s] 66%|██████▋ | 246247/371472 [9:02:10<10:54:50, 3.19it/s] 66%|██████▋ | 246248/371472 [9:02:10<10:37:48, 3.27it/s] 66%|██████▋ | 246249/371472 [9:02:11<10:24:53, 3.34it/s] 66%|██████▋ | 246250/371472 [9:02:11<10:24:19, 3.34it/s] 66%|██████▋ | 246251/371472 [9:02:11<10:02:41, 3.46it/s] 66%|██████▋ | 246252/371472 [9:02:12<10:00:07, 3.48it/s] 66%|██████▋ | 246253/371472 [9:02:12<9:45:20, 3.57it/s] 66%|██████▋ | 246254/371472 [9:02:12<9:45:25, 3.56it/s] 66%|██████▋ | 246255/371472 [9:02:12<10:26:57, 3.33it/s] 66%|██████▋ | 246256/371472 [9:02:13<10:28:30, 3.32it/s] 66%|██████▋ | 246257/371472 [9:02:13<10:33:04, 3.30it/s] 66%|██████▋ | 246258/371472 [9:02:13<10:27:18, 3.33it/s] 66%|██████▋ | 246259/371472 [9:02:14<10:19:10, 3.37it/s] 66%|██████▋ | 246260/371472 [9:02:14<10:27:26, 3.33it/s] {'loss': 2.8449, 'learning_rate': 4.035262556831649e-07, 'epoch': 10.61} + 66%|██████▋ | 246260/371472 [9:02:14<10:27:26, 3.33it/s] 66%|██████▋ | 246261/371472 [9:02:14<10:26:31, 3.33it/s] 66%|██████▋ | 246262/371472 [9:02:15<10:20:09, 3.37it/s] 66%|██████▋ | 246263/371472 [9:02:15<10:09:41, 3.42it/s] 66%|██████▋ | 246264/371472 [9:02:15<10:19:02, 3.37it/s] 66%|██████▋ | 246265/371472 [9:02:15<11:09:04, 3.12it/s] 66%|██████▋ | 246266/371472 [9:02:16<10:48:56, 3.22it/s] 66%|██████▋ | 246267/371472 [9:02:16<11:20:52, 3.06it/s] 66%|██████▋ | 246268/371472 [9:02:17<12:06:53, 2.87it/s] 66%|██████▋ | 246269/371472 [9:02:17<11:29:03, 3.03it/s] 66%|██████▋ | 246270/371472 [9:02:17<11:22:53, 3.06it/s] 66%|██████▋ | 246271/371472 [9:02:17<10:58:57, 3.17it/s] 66%|██████▋ | 246272/371472 [9:02:18<10:31:38, 3.30it/s] 66%|██████▋ | 246273/371472 [9:02:18<10:20:06, 3.36it/s] 66%|██████▋ | 246274/371472 [9:02:18<10:07:19, 3.44it/s] 66%|██████▋ | 246275/371472 [9:02:19<9:58:43, 3.49it/s] 66%|██████▋ | 246276/371472 [9:02:19<10:22:04, 3.35it/s] 66%|██████▋ | 246277/371472 [9:02:19<10:07:46, 3.43it/s] 66%|██████▋ | 246278/371472 [9:02:20<11:05:13, 3.14it/s] 66%|██████▋ | 246279/371472 [9:02:20<10:41:16, 3.25it/s] 66%|██████▋ | 246280/371472 [9:02:20<10:25:35, 3.34it/s] {'loss': 2.749, 'learning_rate': 4.0347777370768605e-07, 'epoch': 10.61} + 66%|██████▋ | 246280/371472 [9:02:20<10:25:35, 3.34it/s] 66%|██████▋ | 246281/371472 [9:02:20<10:22:41, 3.35it/s] 66%|██████▋ | 246282/371472 [9:02:21<10:15:40, 3.39it/s] 66%|██████▋ | 246283/371472 [9:02:21<10:08:29, 3.43it/s] 66%|██████▋ | 246284/371472 [9:02:21<10:05:26, 3.45it/s] 66%|██████▋ | 246285/371472 [9:02:22<10:00:26, 3.47it/s] 66%|██████▋ | 246286/371472 [9:02:22<10:12:20, 3.41it/s] 66%|██████▋ | 246287/371472 [9:02:22<9:53:12, 3.52it/s] 66%|██████▋ | 246288/371472 [9:02:22<10:00:07, 3.48it/s] 66%|██████▋ | 246289/371472 [9:02:23<9:53:03, 3.52it/s] 66%|██████▋ | 246290/371472 [9:02:23<10:14:54, 3.39it/s] 66%|██████▋ | 246291/371472 [9:02:23<9:58:49, 3.48it/s] 66%|██████▋ | 246292/371472 [9:02:24<10:02:56, 3.46it/s] 66%|██████▋ | 246293/371472 [9:02:24<10:48:11, 3.22it/s] 66%|██████▋ | 246294/371472 [9:02:24<10:51:11, 3.20it/s] 66%|██████▋ | 246295/371472 [9:02:24<10:26:31, 3.33it/s] 66%|██████▋ | 246296/371472 [9:02:25<10:28:36, 3.32it/s] 66%|██████▋ | 246297/371472 [9:02:25<10:24:05, 3.34it/s] 66%|██████▋ | 246298/371472 [9:02:25<10:19:16, 3.37it/s] 66%|██████▋ | 246299/371472 [9:02:26<10:11:10, 3.41it/s] 66%|██████▋ | 246300/371472 [9:02:26<10:39:22, 3.26it/s] {'loss': 2.8367, 'learning_rate': 4.0342929173220707e-07, 'epoch': 10.61} + 66%|██████▋ | 246300/371472 [9:02:26<10:39:22, 3.26it/s] 66%|██████▋ | 246301/371472 [9:02:26<10:31:30, 3.30it/s] 66%|██████▋ | 246302/371472 [9:02:27<10:13:53, 3.40it/s] 66%|██████▋ | 246303/371472 [9:02:27<10:22:42, 3.35it/s] 66%|██████▋ | 246304/371472 [9:02:27<10:16:16, 3.39it/s] 66%|██████▋ | 246305/371472 [9:02:27<10:19:43, 3.37it/s] 66%|██████▋ | 246306/371472 [9:02:28<10:10:04, 3.42it/s] 66%|██████▋ | 246307/371472 [9:02:28<13:06:47, 2.65it/s] 66%|██████▋ | 246308/371472 [9:02:29<12:54:06, 2.69it/s] 66%|██████▋ | 246309/371472 [9:02:29<12:57:29, 2.68it/s] 66%|██████▋ | 246310/371472 [9:02:29<12:06:32, 2.87it/s] 66%|██████▋ | 246311/371472 [9:02:30<12:05:44, 2.87it/s] 66%|██████▋ | 246312/371472 [9:02:30<11:26:25, 3.04it/s] 66%|██████▋ | 246313/371472 [9:02:30<11:22:16, 3.06it/s] 66%|██████▋ | 246314/371472 [9:02:31<11:16:51, 3.08it/s] 66%|██████▋ | 246315/371472 [9:02:31<10:45:44, 3.23it/s] 66%|██████▋ | 246316/371472 [9:02:31<12:09:27, 2.86it/s] 66%|██████▋ | 246317/371472 [9:02:32<11:22:05, 3.06it/s] 66%|██████▋ | 246318/371472 [9:02:32<11:32:44, 3.01it/s] 66%|██████▋ | 246319/371472 [9:02:32<11:01:36, 3.15it/s] 66%|██████▋ | 246320/371472 [9:02:33<10:46:43, 3.23it/s] {'loss': 2.6259, 'learning_rate': 4.0338080975672825e-07, 'epoch': 10.61} + 66%|██████▋ | 246320/371472 [9:02:33<10:46:43, 3.23it/s] 66%|██████▋ | 246321/371472 [9:02:33<10:45:16, 3.23it/s] 66%|██████▋ | 246322/371472 [9:02:33<11:50:16, 2.94it/s] 66%|██████▋ | 246323/371472 [9:02:34<11:29:04, 3.03it/s] 66%|██████▋ | 246324/371472 [9:02:34<11:17:40, 3.08it/s] 66%|██████▋ | 246325/371472 [9:02:34<11:24:45, 3.05it/s] 66%|██████▋ | 246326/371472 [9:02:34<10:55:57, 3.18it/s] 66%|██████▋ | 246327/371472 [9:02:35<10:37:39, 3.27it/s] 66%|██████▋ | 246328/371472 [9:02:35<11:10:30, 3.11it/s] 66%|██████▋ | 246329/371472 [9:02:35<10:37:24, 3.27it/s] 66%|██████▋ | 246330/371472 [9:02:36<11:00:15, 3.16it/s] 66%|██████▋ | 246331/371472 [9:02:36<10:38:24, 3.27it/s] 66%|██████▋ | 246332/371472 [9:02:36<10:22:32, 3.35it/s] 66%|██████▋ | 246333/371472 [9:02:37<10:23:56, 3.34it/s] 66%|██████▋ | 246334/371472 [9:02:37<10:21:01, 3.36it/s] 66%|██████▋ | 246335/371472 [9:02:37<10:45:56, 3.23it/s] 66%|██████▋ | 246336/371472 [9:02:38<10:47:38, 3.22it/s] 66%|██████▋ | 246337/371472 [9:02:38<10:48:53, 3.21it/s] 66%|██████▋ | 246338/371472 [9:02:38<10:44:31, 3.24it/s] 66%|██████▋ | 246339/371472 [9:02:38<10:28:12, 3.32it/s] 66%|██████▋ | 246340/371472 [9:02:39<11:06:29, 3.13it/s] {'loss': 2.49, 'learning_rate': 4.033323277812493e-07, 'epoch': 10.61} + 66%|██████▋ | 246340/371472 [9:02:39<11:06:29, 3.13it/s] 66%|██████▋ | 246341/371472 [9:02:39<10:43:10, 3.24it/s] 66%|██████▋ | 246342/371472 [9:02:39<10:29:55, 3.31it/s] 66%|██████▋ | 246343/371472 [9:02:40<10:11:01, 3.41it/s] 66%|██████▋ | 246344/371472 [9:02:40<10:19:34, 3.37it/s] 66%|██████▋ | 246345/371472 [9:02:40<10:35:55, 3.28it/s] 66%|██████▋ | 246346/371472 [9:02:41<10:31:21, 3.30it/s] 66%|██████▋ | 246347/371472 [9:02:41<10:23:06, 3.35it/s] 66%|██████▋ | 246348/371472 [9:02:41<11:01:07, 3.15it/s] 66%|██████▋ | 246349/371472 [9:02:41<10:29:31, 3.31it/s] 66%|██████▋ | 246350/371472 [9:02:42<10:18:26, 3.37it/s] 66%|██████▋ | 246351/371472 [9:02:42<10:33:22, 3.29it/s] 66%|██████▋ | 246352/371472 [9:02:42<10:34:15, 3.29it/s] 66%|██████▋ | 246353/371472 [9:02:43<10:33:45, 3.29it/s] 66%|██████▋ | 246354/371472 [9:02:43<10:33:20, 3.29it/s] 66%|██████▋ | 246355/371472 [9:02:43<10:27:34, 3.32it/s] 66%|██████▋ | 246356/371472 [9:02:44<10:27:11, 3.32it/s] 66%|██████▋ | 246357/371472 [9:02:44<10:07:30, 3.43it/s] 66%|██████▋ | 246358/371472 [9:02:44<10:13:57, 3.40it/s] 66%|██████▋ | 246359/371472 [9:02:44<10:21:44, 3.35it/s] 66%|██████▋ | 246360/371472 [9:02:45<10:20:52, 3.36it/s] {'loss': 2.8884, 'learning_rate': 4.0328384580577045e-07, 'epoch': 10.61} + 66%|██████▋ | 246360/371472 [9:02:45<10:20:52, 3.36it/s] 66%|██████▋ | 246361/371472 [9:02:45<10:26:04, 3.33it/s] 66%|██████▋ | 246362/371472 [9:02:45<10:35:00, 3.28it/s] 66%|██████▋ | 246363/371472 [9:02:46<10:35:17, 3.28it/s] 66%|██████▋ | 246364/371472 [9:02:46<10:56:09, 3.18it/s] 66%|██████▋ | 246365/371472 [9:02:46<10:50:08, 3.21it/s] 66%|██████▋ | 246366/371472 [9:02:47<10:41:30, 3.25it/s] 66%|██████▋ | 246367/371472 [9:02:47<10:32:25, 3.30it/s] 66%|██████▋ | 246368/371472 [9:02:47<11:15:42, 3.09it/s] 66%|██████▋ | 246369/371472 [9:02:48<11:31:50, 3.01it/s] 66%|██████▋ | 246370/371472 [9:02:48<11:38:31, 2.98it/s] 66%|██████▋ | 246371/371472 [9:02:48<11:13:01, 3.10it/s] 66%|██████▋ | 246372/371472 [9:02:49<10:59:31, 3.16it/s] 66%|██████▋ | 246373/371472 [9:02:49<11:55:52, 2.91it/s] 66%|██████▋ | 246374/371472 [9:02:49<11:57:57, 2.90it/s] 66%|██████▋ | 246375/371472 [9:02:50<11:26:54, 3.04it/s] 66%|██████▋ | 246376/371472 [9:02:50<10:59:54, 3.16it/s] 66%|██████▋ | 246377/371472 [9:02:50<10:38:46, 3.26it/s] 66%|██████▋ | 246378/371472 [9:02:50<10:16:34, 3.38it/s] 66%|██████▋ | 246379/371472 [9:02:51<10:06:13, 3.44it/s] 66%|██████▋ | 246380/371472 [9:02:51<10:03:26, 3.45it/s] {'loss': 2.5794, 'learning_rate': 4.032353638302915e-07, 'epoch': 10.61} + 66%|██████▋ | 246380/371472 [9:02:51<10:03:26, 3.45it/s] 66%|██████▋ | 246381/371472 [9:02:51<10:01:32, 3.47it/s] 66%|██████▋ | 246382/371472 [9:02:52<10:02:40, 3.46it/s] 66%|██████▋ | 246383/371472 [9:02:52<10:07:39, 3.43it/s] 66%|██████▋ | 246384/371472 [9:02:52<10:03:35, 3.45it/s] 66%|██████▋ | 246385/371472 [9:02:52<9:58:53, 3.48it/s] 66%|██████▋ | 246386/371472 [9:02:53<11:16:30, 3.08it/s] 66%|██████▋ | 246387/371472 [9:02:53<11:14:59, 3.09it/s] 66%|██████▋ | 246388/371472 [9:02:53<10:36:42, 3.27it/s] 66%|██████▋ | 246389/371472 [9:02:54<10:13:03, 3.40it/s] 66%|██████▋ | 246390/371472 [9:02:54<10:18:20, 3.37it/s] 66%|██████▋ | 246391/371472 [9:02:54<10:18:40, 3.37it/s] 66%|██████▋ | 246392/371472 [9:02:55<10:24:07, 3.34it/s] 66%|██████▋ | 246393/371472 [9:02:55<10:12:01, 3.41it/s] 66%|██████▋ | 246394/371472 [9:02:55<10:01:03, 3.47it/s] 66%|██████▋ | 246395/371472 [9:02:56<10:00:50, 3.47it/s] 66%|██████▋ | 246396/371472 [9:02:56<10:01:02, 3.47it/s] 66%|██████▋ | 246397/371472 [9:02:56<10:15:44, 3.39it/s] 66%|██████▋ | 246398/371472 [9:02:56<10:29:48, 3.31it/s] 66%|██████▋ | 246399/371472 [9:02:57<10:20:56, 3.36it/s] 66%|██████▋ | 246400/371472 [9:02:57<10:14:12, 3.39it/s] {'loss': 2.7271, 'learning_rate': 4.031868818548126e-07, 'epoch': 10.61} + 66%|██████▋ | 246400/371472 [9:02:57<10:14:12, 3.39it/s] 66%|██████▋ | 246401/371472 [9:02:57<10:15:17, 3.39it/s] 66%|██████▋ | 246402/371472 [9:02:58<10:22:49, 3.35it/s] 66%|██████▋ | 246403/371472 [9:02:58<10:27:06, 3.32it/s] 66%|██████▋ | 246404/371472 [9:02:58<10:18:27, 3.37it/s] 66%|██████▋ | 246405/371472 [9:02:59<10:32:22, 3.30it/s] 66%|██████▋ | 246406/371472 [9:02:59<10:08:43, 3.42it/s] 66%|██████▋ | 246407/371472 [9:02:59<9:56:08, 3.50it/s] 66%|██████▋ | 246408/371472 [9:02:59<9:52:56, 3.52it/s] 66%|██████▋ | 246409/371472 [9:03:00<9:54:06, 3.51it/s] 66%|██████▋ | 246410/371472 [9:03:00<10:32:22, 3.30it/s] 66%|██████▋ | 246411/371472 [9:03:00<10:18:12, 3.37it/s] 66%|██████▋ | 246412/371472 [9:03:01<10:15:58, 3.38it/s] 66%|██████▋ | 246413/371472 [9:03:01<10:25:55, 3.33it/s] 66%|██████▋ | 246414/371472 [9:03:01<10:12:32, 3.40it/s] 66%|██████▋ | 246415/371472 [9:03:01<10:09:43, 3.42it/s] 66%|██████▋ | 246416/371472 [9:03:02<10:23:21, 3.34it/s] 66%|██████▋ | 246417/371472 [9:03:02<11:02:02, 3.15it/s] 66%|██████▋ | 246418/371472 [9:03:02<10:48:40, 3.21it/s] 66%|██████▋ | 246419/371472 [9:03:03<11:26:20, 3.04it/s] 66%|██████▋ | 246420/371472 [9:03:03<11:12:43, 3.10it/s] {'loss': 2.7578, 'learning_rate': 4.031383998793337e-07, 'epoch': 10.61} + 66%|██████▋ | 246420/371472 [9:03:03<11:12:43, 3.10it/s] 66%|██████▋ | 246421/371472 [9:03:03<10:48:24, 3.21it/s] 66%|██████▋ | 246422/371472 [9:03:04<10:39:15, 3.26it/s] 66%|██████▋ | 246423/371472 [9:03:04<10:15:50, 3.38it/s] 66%|██████▋ | 246424/371472 [9:03:04<10:13:37, 3.40it/s] 66%|██████▋ | 246425/371472 [9:03:04<9:52:31, 3.52it/s] 66%|██████▋ | 246426/371472 [9:03:05<10:06:54, 3.43it/s] 66%|██████▋ | 246427/371472 [9:03:05<10:07:55, 3.43it/s] 66%|██████▋ | 246428/371472 [9:03:05<10:03:48, 3.45it/s] 66%|██████▋ | 246429/371472 [9:03:06<10:04:24, 3.45it/s] 66%|██████▋ | 246430/371472 [9:03:06<10:13:42, 3.40it/s] 66%|██████▋ | 246431/371472 [9:03:06<10:56:11, 3.18it/s] 66%|██████▋ | 246432/371472 [9:03:07<12:34:10, 2.76it/s] 66%|██████▋ | 246433/371472 [9:03:07<13:07:38, 2.65it/s] 66%|██████▋ | 246434/371472 [9:03:08<12:25:38, 2.79it/s] 66%|██████▋ | 246435/371472 [9:03:08<11:44:53, 2.96it/s] 66%|██████▋ | 246436/371472 [9:03:08<11:08:27, 3.12it/s] 66%|██████▋ | 246437/371472 [9:03:08<11:00:55, 3.15it/s] 66%|██████▋ | 246438/371472 [9:03:09<10:57:46, 3.17it/s] 66%|██████▋ | 246439/371472 [9:03:09<10:46:56, 3.22it/s] 66%|██████▋ | 246440/371472 [9:03:09<11:05:16, 3.13it/s] {'loss': 2.6441, 'learning_rate': 4.030899179038548e-07, 'epoch': 10.61} + 66%|██████▋ | 246440/371472 [9:03:09<11:05:16, 3.13it/s] 66%|██████▋ | 246441/371472 [9:03:10<10:35:27, 3.28it/s] 66%|██████▋ | 246442/371472 [9:03:10<10:17:21, 3.38it/s] 66%|██████▋ | 246443/371472 [9:03:10<10:09:30, 3.42it/s] 66%|██████▋ | 246444/371472 [9:03:11<10:38:19, 3.26it/s] 66%|██████▋ | 246445/371472 [9:03:11<10:19:38, 3.36it/s] 66%|██████▋ | 246446/371472 [9:03:11<10:52:07, 3.20it/s] 66%|██████▋ | 246447/371472 [9:03:11<10:27:16, 3.32it/s] 66%|██████▋ | 246448/371472 [9:03:12<10:10:23, 3.41it/s] 66%|██████▋ | 246449/371472 [9:03:12<9:58:54, 3.48it/s] 66%|██████▋ | 246450/371472 [9:03:12<10:14:19, 3.39it/s] 66%|██████▋ | 246451/371472 [9:03:13<10:34:58, 3.28it/s] 66%|██████▋ | 246452/371472 [9:03:13<10:45:16, 3.23it/s] 66%|██████▋ | 246453/371472 [9:03:13<10:58:37, 3.16it/s] 66%|██████▋ | 246454/371472 [9:03:14<10:51:52, 3.20it/s] 66%|██████▋ | 246455/371472 [9:03:14<11:00:21, 3.16it/s] 66%|██████▋ | 246456/371472 [9:03:14<10:39:10, 3.26it/s] 66%|██████▋ | 246457/371472 [9:03:14<10:23:30, 3.34it/s] 66%|██████▋ | 246458/371472 [9:03:15<10:09:47, 3.42it/s] 66%|██████▋ | 246459/371472 [9:03:15<10:33:05, 3.29it/s] 66%|██████▋ | 246460/371472 [9:03:15<10:42:07, 3.24it/s] {'loss': 2.6346, 'learning_rate': 4.0304143592837596e-07, 'epoch': 10.62} + 66%|██████▋ | 246460/371472 [9:03:15<10:42:07, 3.24it/s] 66%|██████▋ | 246461/371472 [9:03:16<10:23:35, 3.34it/s] 66%|██████▋ | 246462/371472 [9:03:16<10:24:42, 3.34it/s] 66%|██████▋ | 246463/371472 [9:03:16<10:15:26, 3.39it/s] 66%|██████▋ | 246464/371472 [9:03:17<10:14:01, 3.39it/s] 66%|██████▋ | 246465/371472 [9:03:17<10:02:42, 3.46it/s] 66%|██████▋ | 246466/371472 [9:03:17<10:36:11, 3.27it/s] 66%|██████▋ | 246467/371472 [9:03:18<11:03:36, 3.14it/s] 66%|██████▋ | 246468/371472 [9:03:18<10:31:49, 3.30it/s] 66%|██████▋ | 246469/371472 [9:03:18<10:29:51, 3.31it/s] 66%|██████▋ | 246470/371472 [9:03:18<10:40:09, 3.25it/s] 66%|██████▋ | 246471/371472 [9:03:19<10:30:43, 3.30it/s] 66%|██████▋ | 246472/371472 [9:03:19<10:50:23, 3.20it/s] 66%|██████▋ | 246473/371472 [9:03:19<11:42:35, 2.97it/s] 66%|██████▋ | 246474/371472 [9:03:20<11:03:29, 3.14it/s] 66%|██████▋ | 246475/371472 [9:03:20<10:45:00, 3.23it/s] 66%|██████▋ | 246476/371472 [9:03:20<10:29:26, 3.31it/s] 66%|██████▋ | 246477/371472 [9:03:21<10:07:28, 3.43it/s] 66%|██████▋ | 246478/371472 [9:03:21<10:07:50, 3.43it/s] 66%|██████▋ | 246479/371472 [9:03:21<10:11:55, 3.40it/s] 66%|██████▋ | 246480/371472 [9:03:21<10:29:06, 3.31it/s] {'loss': 2.6772, 'learning_rate': 4.0299295395289703e-07, 'epoch': 10.62} + 66%|██████▋ | 246480/371472 [9:03:21<10:29:06, 3.31it/s] 66%|██████▋ | 246481/371472 [9:03:22<10:23:07, 3.34it/s] 66%|██████▋ | 246482/371472 [9:03:22<10:10:12, 3.41it/s] 66%|██████▋ | 246483/371472 [9:03:22<10:51:08, 3.20it/s] 66%|██████▋ | 246484/371472 [9:03:23<10:33:54, 3.29it/s] 66%|██████▋ | 246485/371472 [9:03:23<10:40:01, 3.25it/s] 66%|██████▋ | 246486/371472 [9:03:23<10:18:02, 3.37it/s] 66%|██████▋ | 246487/371472 [9:03:24<10:07:15, 3.43it/s] 66%|██████▋ | 246488/371472 [9:03:24<10:41:17, 3.25it/s] 66%|██████▋ | 246489/371472 [9:03:24<11:01:24, 3.15it/s] 66%|██████▋ | 246490/371472 [9:03:24<10:25:59, 3.33it/s] 66%|██████▋ | 246491/371472 [9:03:25<10:13:55, 3.39it/s] 66%|██████▋ | 246492/371472 [9:03:25<9:57:56, 3.48it/s] 66%|██████▋ | 246493/371472 [9:03:25<10:20:48, 3.36it/s] 66%|██████▋ | 246494/371472 [9:03:26<10:07:17, 3.43it/s] 66%|██████▋ | 246495/371472 [9:03:26<10:11:47, 3.40it/s] 66%|██████▋ | 246496/371472 [9:03:26<10:31:16, 3.30it/s] 66%|██████▋ | 246497/371472 [9:03:27<10:17:13, 3.37it/s] 66%|██████▋ | 246498/371472 [9:03:27<10:15:14, 3.39it/s] 66%|██████▋ | 246499/371472 [9:03:27<10:03:41, 3.45it/s] 66%|██████▋ | 246500/371472 [9:03:27<10:03:24, 3.45it/s] {'loss': 2.7151, 'learning_rate': 4.0294447197741816e-07, 'epoch': 10.62} + 66%|██████▋ | 246500/371472 [9:03:27<10:03:24, 3.45it/s] 66%|██████▋ | 246501/371472 [9:03:28<10:18:08, 3.37it/s] 66%|██████▋ | 246502/371472 [9:03:28<10:27:07, 3.32it/s] 66%|██████▋ | 246503/371472 [9:03:28<10:40:08, 3.25it/s] 66%|██████▋ | 246504/371472 [9:03:29<10:21:30, 3.35it/s] 66%|██████▋ | 246505/371472 [9:03:29<10:16:59, 3.38it/s] 66%|██████▋ | 246506/371472 [9:03:29<10:16:44, 3.38it/s] 66%|██████▋ | 246507/371472 [9:03:29<10:20:08, 3.36it/s] 66%|██████▋ | 246508/371472 [9:03:30<10:31:13, 3.30it/s] 66%|██████▋ | 246509/371472 [9:03:30<10:17:55, 3.37it/s] 66%|██████▋ | 246510/371472 [9:03:30<10:23:37, 3.34it/s] 66%|██████▋ | 246511/371472 [9:03:31<10:11:16, 3.41it/s] 66%|██████▋ | 246512/371472 [9:03:31<10:29:38, 3.31it/s] 66%|██████▋ | 246513/371472 [9:03:31<11:26:20, 3.03it/s] 66%|██████▋ | 246514/371472 [9:03:32<10:48:21, 3.21it/s] 66%|██████▋ | 246515/371472 [9:03:32<10:28:19, 3.31it/s] 66%|██████▋ | 246516/371472 [9:03:32<10:27:27, 3.32it/s] 66%|██████▋ | 246517/371472 [9:03:33<10:10:24, 3.41it/s] 66%|██████▋ | 246518/371472 [9:03:33<10:11:44, 3.40it/s] 66%|██████▋ | 246519/371472 [9:03:33<10:29:38, 3.31it/s] 66%|██████▋ | 246520/371472 [9:03:33<10:40:19, 3.25it/s] {'loss': 2.613, 'learning_rate': 4.0289599000193923e-07, 'epoch': 10.62} + 66%|██████▋ | 246520/371472 [9:03:33<10:40:19, 3.25it/s] 66%|██████▋ | 246521/371472 [9:03:34<10:27:54, 3.32it/s] 66%|██████▋ | 246522/371472 [9:03:34<10:22:00, 3.35it/s] 66%|██████▋ | 246523/371472 [9:03:34<10:23:16, 3.34it/s] 66%|██████▋ | 246524/371472 [9:03:35<10:14:31, 3.39it/s] 66%|██████▋ | 246525/371472 [9:03:35<10:14:59, 3.39it/s] 66%|██████▋ | 246526/371472 [9:03:35<10:43:01, 3.24it/s] 66%|██████▋ | 246527/371472 [9:03:36<10:13:24, 3.39it/s] 66%|██████▋ | 246528/371472 [9:03:36<9:57:11, 3.49it/s] 66%|██████▋ | 246529/371472 [9:03:36<9:51:50, 3.52it/s] 66%|██████▋ | 246530/371472 [9:03:36<10:11:02, 3.41it/s] 66%|██████▋ | 246531/371472 [9:03:37<9:56:09, 3.49it/s] 66%|██████▋ | 246532/371472 [9:03:37<9:58:10, 3.48it/s] 66%|██████▋ | 246533/371472 [9:03:37<9:51:18, 3.52it/s] 66%|██████▋ | 246534/371472 [9:03:38<10:14:41, 3.39it/s] 66%|██████▋ | 246535/371472 [9:03:38<10:08:36, 3.42it/s] 66%|██████▋ | 246536/371472 [9:03:38<10:20:25, 3.36it/s] 66%|██████▋ | 246537/371472 [9:03:38<10:30:10, 3.30it/s] 66%|██████▋ | 246538/371472 [9:03:39<10:18:05, 3.37it/s] 66%|██████▋ | 246539/371472 [9:03:39<10:06:30, 3.43it/s] 66%|██████▋ | 246540/371472 [9:03:39<10:25:43, 3.33it/s] {'loss': 2.8516, 'learning_rate': 4.028475080264604e-07, 'epoch': 10.62} + 66%|██████▋ | 246540/371472 [9:03:39<10:25:43, 3.33it/s] 66%|██████▋ | 246541/371472 [9:03:40<10:48:30, 3.21it/s] 66%|██████▋ | 246542/371472 [9:03:40<10:59:38, 3.16it/s] 66%|██████▋ | 246543/371472 [9:03:40<11:15:38, 3.08it/s] 66%|██████▋ | 246544/371472 [9:03:41<10:45:11, 3.23it/s] 66%|██████▋ | 246545/371472 [9:03:41<10:36:08, 3.27it/s] 66%|██████▋ | 246546/371472 [9:03:41<10:41:29, 3.25it/s] 66%|██████▋ | 246547/371472 [9:03:42<10:27:29, 3.32it/s] 66%|██████▋ | 246548/371472 [9:03:42<10:23:08, 3.34it/s] 66%|██████▋ | 246549/371472 [9:03:42<10:14:00, 3.39it/s] 66%|██████▋ | 246550/371472 [9:03:42<10:49:55, 3.20it/s] 66%|██████▋ | 246551/371472 [9:03:43<10:37:13, 3.27it/s] 66%|██████▋ | 246552/371472 [9:03:43<10:32:21, 3.29it/s] 66%|██████▋ | 246553/371472 [9:03:43<10:46:03, 3.22it/s] 66%|██████▋ | 246554/371472 [9:03:44<11:08:10, 3.12it/s] 66%|██████▋ | 246555/371472 [9:03:44<11:08:26, 3.11it/s] 66%|██████▋ | 246556/371472 [9:03:44<11:24:29, 3.04it/s] 66%|██████▋ | 246557/371472 [9:03:45<10:52:12, 3.19it/s] 66%|██████▋ | 246558/371472 [9:03:45<10:33:07, 3.29it/s] 66%|██████▋ | 246559/371472 [9:03:45<11:47:50, 2.94it/s] 66%|██████▋ | 246560/371472 [9:03:46<11:16:55, 3.08it/s] {'loss': 2.7367, 'learning_rate': 4.027990260509814e-07, 'epoch': 10.62} + 66%|██████▋ | 246560/371472 [9:03:46<11:16:55, 3.08it/s] 66%|██████▋ | 246561/371472 [9:03:46<11:15:49, 3.08it/s] 66%|██████▋ | 246562/371472 [9:03:46<11:11:57, 3.10it/s] 66%|██████▋ | 246563/371472 [9:03:47<10:47:13, 3.22it/s] 66%|██████▋ | 246564/371472 [9:03:47<10:32:21, 3.29it/s] 66%|██████▋ | 246565/371472 [9:03:47<10:27:15, 3.32it/s] 66%|██████▋ | 246566/371472 [9:03:47<10:39:21, 3.26it/s] 66%|██████▋ | 246567/371472 [9:03:48<11:06:17, 3.12it/s] 66%|██████▋ | 246568/371472 [9:03:48<10:48:52, 3.21it/s] 66%|██████▋ | 246569/371472 [9:03:48<10:51:39, 3.19it/s] 66%|██████▋ | 246570/371472 [9:03:49<10:45:19, 3.23it/s] 66%|██████▋ | 246571/371472 [9:03:49<10:25:03, 3.33it/s] 66%|██████▋ | 246572/371472 [9:03:49<10:09:57, 3.41it/s] 66%|██████▋ | 246573/371472 [9:03:50<9:53:12, 3.51it/s] 66%|██████▋ | 246574/371472 [9:03:50<10:09:05, 3.42it/s] 66%|██████▋ | 246575/371472 [9:03:50<10:07:38, 3.43it/s] 66%|██████▋ | 246576/371472 [9:03:50<10:19:44, 3.36it/s] 66%|██████▋ | 246577/371472 [9:03:51<10:04:55, 3.44it/s] 66%|██████▋ | 246578/371472 [9:03:51<9:59:40, 3.47it/s] 66%|██████▋ | 246579/371472 [9:03:51<11:03:15, 3.14it/s] 66%|██████▋ | 246580/371472 [9:03:52<11:05:23, 3.13it/s] {'loss': 2.7396, 'learning_rate': 4.0275054407550266e-07, 'epoch': 10.62} + 66%|██████▋ | 246580/371472 [9:03:52<11:05:23, 3.13it/s] 66%|██████▋ | 246581/371472 [9:03:52<11:28:15, 3.02it/s] 66%|██████▋ | 246582/371472 [9:03:52<11:50:14, 2.93it/s] 66%|██████▋ | 246583/371472 [9:03:53<11:08:54, 3.11it/s] 66%|██████▋ | 246584/371472 [9:03:53<11:01:55, 3.14it/s] 66%|██████▋ | 246585/371472 [9:03:53<10:50:10, 3.20it/s] 66%|██████▋ | 246586/371472 [9:03:54<11:15:04, 3.08it/s] 66%|██████▋ | 246587/371472 [9:03:54<10:43:38, 3.23it/s] 66%|██████▋ | 246588/371472 [9:03:54<10:29:15, 3.31it/s] 66%|██████▋ | 246589/371472 [9:03:55<10:58:39, 3.16it/s] 66%|██████▋ | 246590/371472 [9:03:55<10:58:49, 3.16it/s] 66%|██████▋ | 246591/371472 [9:03:55<11:51:49, 2.92it/s] 66%|██████▋ | 246592/371472 [9:03:56<12:28:57, 2.78it/s] 66%|██████▋ | 246593/371472 [9:03:56<12:23:55, 2.80it/s] 66%|██████▋ | 246594/371472 [9:03:56<11:43:32, 2.96it/s] 66%|██████▋ | 246595/371472 [9:03:57<11:59:53, 2.89it/s] 66%|██████▋ | 246596/371472 [9:03:57<11:44:07, 2.96it/s] 66%|██████▋ | 246597/371472 [9:03:57<11:32:03, 3.01it/s] 66%|██████▋ | 246598/371472 [9:03:58<11:44:09, 2.96it/s] 66%|██████▋ | 246599/371472 [9:03:58<11:08:52, 3.11it/s] 66%|██████▋ | 246600/371472 [9:03:58<11:16:48, 3.08it/s] {'loss': 2.7509, 'learning_rate': 4.027020621000237e-07, 'epoch': 10.62} + 66%|██████▋ | 246600/371472 [9:03:58<11:16:48, 3.08it/s] 66%|██████▋ | 246601/371472 [9:03:59<11:18:53, 3.07it/s] 66%|██████▋ | 246602/371472 [9:03:59<11:00:03, 3.15it/s] 66%|██████▋ | 246603/371472 [9:03:59<10:53:04, 3.19it/s] 66%|██████▋ | 246604/371472 [9:04:00<10:52:24, 3.19it/s] 66%|██████▋ | 246605/371472 [9:04:00<10:59:20, 3.16it/s] 66%|██████▋ | 246606/371472 [9:04:00<10:30:30, 3.30it/s] 66%|██████▋ | 246607/371472 [9:04:01<11:06:45, 3.12it/s] 66%|██████▋ | 246608/371472 [9:04:01<10:52:06, 3.19it/s] 66%|██████▋ | 246609/371472 [9:04:01<10:35:06, 3.28it/s] 66%|██████▋ | 246610/371472 [9:04:01<10:54:42, 3.18it/s] 66%|██████▋ | 246611/371472 [9:04:02<10:34:45, 3.28it/s] 66%|██████▋ | 246612/371472 [9:04:02<10:19:30, 3.36it/s] 66%|██████▋ | 246613/371472 [9:04:02<10:17:38, 3.37it/s] 66%|██████▋ | 246614/371472 [9:04:03<10:04:55, 3.44it/s] 66%|██████▋ | 246615/371472 [9:04:03<10:02:22, 3.45it/s] 66%|██████▋ | 246616/371472 [9:04:03<10:14:12, 3.39it/s] 66%|██████▋ | 246617/371472 [9:04:03<10:15:53, 3.38it/s] 66%|██████▋ | 246618/371472 [9:04:04<10:41:26, 3.24it/s] 66%|██████▋ | 246619/371472 [9:04:04<10:22:39, 3.34it/s] 66%|██████▋ | 246620/371472 [9:04:04<10:19:11, 3.36it/s] {'loss': 2.6947, 'learning_rate': 4.026535801245448e-07, 'epoch': 10.62} + 66%|██████▋ | 246620/371472 [9:04:04<10:19:11, 3.36it/s] 66%|██████▋ | 246621/371472 [9:04:05<10:26:55, 3.32it/s] 66%|██████▋ | 246622/371472 [9:04:05<10:11:30, 3.40it/s] 66%|██████▋ | 246623/371472 [9:04:05<10:03:43, 3.45it/s] 66%|██████▋ | 246624/371472 [9:04:06<10:10:49, 3.41it/s] 66%|██████▋ | 246625/371472 [9:04:06<10:13:25, 3.39it/s] 66%|██████▋ | 246626/371472 [9:04:06<10:04:09, 3.44it/s] 66%|██████▋ | 246627/371472 [9:04:06<10:30:57, 3.30it/s] 66%|██████▋ | 246628/371472 [9:04:07<10:52:27, 3.19it/s] 66%|██████▋ | 246629/371472 [9:04:07<10:42:21, 3.24it/s] 66%|██████▋ | 246630/371472 [9:04:07<10:24:57, 3.33it/s] 66%|██████▋ | 246631/371472 [9:04:08<10:16:23, 3.38it/s] 66%|██████▋ | 246632/371472 [9:04:08<10:57:54, 3.16it/s] 66%|██████▋ | 246633/371472 [9:04:08<10:54:32, 3.18it/s] 66%|██████▋ | 246634/371472 [9:04:09<10:31:48, 3.29it/s] 66%|██████▋ | 246635/371472 [9:04:09<10:22:33, 3.34it/s] 66%|██████▋ | 246636/371472 [9:04:09<11:04:00, 3.13it/s] 66%|██████▋ | 246637/371472 [9:04:10<10:47:45, 3.21it/s] 66%|██████▋ | 246638/371472 [9:04:10<10:14:49, 3.38it/s] 66%|██████▋ | 246639/371472 [9:04:10<11:02:52, 3.14it/s] 66%|██████▋ | 246640/371472 [9:04:11<11:06:45, 3.12it/s] {'loss': 2.8514, 'learning_rate': 4.0260509814906587e-07, 'epoch': 10.62} + 66%|██████▋ | 246640/371472 [9:04:11<11:06:45, 3.12it/s] 66%|██████▋ | 246641/371472 [9:04:11<10:33:13, 3.29it/s] 66%|██████▋ | 246642/371472 [9:04:11<10:30:17, 3.30it/s] 66%|██████▋ | 246643/371472 [9:04:11<10:23:47, 3.34it/s] 66%|██████▋ | 246644/371472 [9:04:12<10:25:53, 3.32it/s] 66%|██████▋ | 246645/371472 [9:04:12<10:13:31, 3.39it/s] 66%|██████▋ | 246646/371472 [9:04:12<10:17:11, 3.37it/s] 66%|██████▋ | 246647/371472 [9:04:13<10:18:46, 3.36it/s] 66%|██████▋ | 246648/371472 [9:04:13<10:04:13, 3.44it/s] 66%|██████▋ | 246649/371472 [9:04:13<10:10:55, 3.41it/s] 66%|██████▋ | 246650/371472 [9:04:13<10:28:38, 3.31it/s] 66%|██████▋ | 246651/371472 [9:04:14<10:36:26, 3.27it/s] 66%|██████▋ | 246652/371472 [9:04:14<10:53:56, 3.18it/s] 66%|██████▋ | 246653/371472 [9:04:14<11:01:15, 3.15it/s] 66%|██████▋ | 246654/371472 [9:04:15<10:45:59, 3.22it/s] 66%|██████▋ | 246655/371472 [9:04:15<10:58:34, 3.16it/s] 66%|██████▋ | 246656/371472 [9:04:16<12:24:14, 2.80it/s] 66%|██████▋ | 246657/371472 [9:04:16<11:42:33, 2.96it/s] 66%|██████▋ | 246658/371472 [9:04:16<11:10:44, 3.10it/s] 66%|██████▋ | 246659/371472 [9:04:16<11:27:00, 3.03it/s] 66%|██████▋ | 246660/371472 [9:04:17<11:03:32, 3.14it/s] {'loss': 2.8385, 'learning_rate': 4.0255661617358705e-07, 'epoch': 10.62} + 66%|██████▋ | 246660/371472 [9:04:17<11:03:32, 3.14it/s] 66%|██████▋ | 246661/371472 [9:04:17<11:05:44, 3.12it/s] 66%|██████▋ | 246662/371472 [9:04:17<11:03:03, 3.14it/s] 66%|██████▋ | 246663/371472 [9:04:18<10:48:48, 3.21it/s] 66%|██████▋ | 246664/371472 [9:04:18<10:55:35, 3.17it/s] 66%|██████▋ | 246665/371472 [9:04:18<10:40:39, 3.25it/s] 66%|██████▋ | 246666/371472 [9:04:19<10:21:32, 3.35it/s] 66%|██████▋ | 246667/371472 [9:04:19<9:56:22, 3.49it/s] 66%|██████▋ | 246668/371472 [9:04:19<10:33:39, 3.28it/s] 66%|██████▋ | 246669/371472 [9:04:19<10:22:34, 3.34it/s] 66%|██████▋ | 246670/371472 [9:04:20<10:12:46, 3.39it/s] 66%|██████▋ | 246671/371472 [9:04:20<10:03:23, 3.45it/s] 66%|██████▋ | 246672/371472 [9:04:20<10:10:49, 3.41it/s] 66%|██████▋ | 246673/371472 [9:04:21<10:01:09, 3.46it/s] 66%|██████▋ | 246674/371472 [9:04:21<10:26:43, 3.32it/s] 66%|██████▋ | 246675/371472 [9:04:21<10:22:54, 3.34it/s] 66%|██████▋ | 246676/371472 [9:04:22<10:07:26, 3.42it/s] 66%|██████▋ | 246677/371472 [9:04:22<9:57:57, 3.48it/s] 66%|██████▋ | 246678/371472 [9:04:22<10:52:15, 3.19it/s] 66%|██████▋ | 246679/371472 [9:04:22<10:32:46, 3.29it/s] 66%|██████▋ | 246680/371472 [9:04:23<10:17:26, 3.37it/s] {'loss': 2.7319, 'learning_rate': 4.0250813419810807e-07, 'epoch': 10.62} + 66%|██████▋ | 246680/371472 [9:04:23<10:17:26, 3.37it/s] 66%|██████▋ | 246681/371472 [9:04:23<10:17:19, 3.37it/s] 66%|██████▋ | 246682/371472 [9:04:23<10:16:14, 3.38it/s] 66%|██████▋ | 246683/371472 [9:04:24<10:06:58, 3.43it/s] 66%|██████▋ | 246684/371472 [9:04:24<10:37:47, 3.26it/s] 66%|██████▋ | 246685/371472 [9:04:24<11:09:10, 3.11it/s] 66%|██████▋ | 246686/371472 [9:04:25<10:53:48, 3.18it/s] 66%|██████▋ | 246687/371472 [9:04:25<11:54:12, 2.91it/s] 66%|██████▋ | 246688/371472 [9:04:25<11:13:03, 3.09it/s] 66%|██████▋ | 246689/371472 [9:04:26<10:44:44, 3.23it/s] 66%|██████▋ | 246690/371472 [9:04:26<10:33:22, 3.28it/s] 66%|██████▋ | 246691/371472 [9:04:26<10:14:38, 3.38it/s] 66%|██████▋ | 246692/371472 [9:04:26<10:19:38, 3.36it/s] 66%|██████▋ | 246693/371472 [9:04:27<11:00:10, 3.15it/s] 66%|██████▋ | 246694/371472 [9:04:27<11:03:24, 3.13it/s] 66%|██████▋ | 246695/371472 [9:04:27<10:40:27, 3.25it/s] 66%|██████▋ | 246696/371472 [9:04:28<11:16:13, 3.08it/s] 66%|██████▋ | 246697/371472 [9:04:28<10:49:03, 3.20it/s] 66%|██████▋ | 246698/371472 [9:04:28<10:25:53, 3.32it/s] 66%|██████▋ | 246699/371472 [9:04:29<10:38:09, 3.26it/s] 66%|██████▋ | 246700/371472 [9:04:29<10:29:24, 3.30it/s] {'loss': 2.5677, 'learning_rate': 4.0245965222262924e-07, 'epoch': 10.63} + 66%|██████▋ | 246700/371472 [9:04:29<10:29:24, 3.30it/s] 66%|██████▋ | 246701/371472 [9:04:29<10:09:48, 3.41it/s] 66%|██████▋ | 246702/371472 [9:04:29<10:13:19, 3.39it/s] 66%|██████▋ | 246703/371472 [9:04:30<10:18:00, 3.36it/s] 66%|██████▋ | 246704/371472 [9:04:30<10:28:28, 3.31it/s] 66%|██████▋ | 246705/371472 [9:04:30<10:40:45, 3.25it/s] 66%|██████▋ | 246706/371472 [9:04:31<11:00:30, 3.15it/s] 66%|██████▋ | 246707/371472 [9:04:31<10:45:30, 3.22it/s] 66%|██████▋ | 246708/371472 [9:04:31<11:00:05, 3.15it/s] 66%|██████▋ | 246709/371472 [9:04:32<10:48:07, 3.21it/s] 66%|██████▋ | 246710/371472 [9:04:32<10:37:50, 3.26it/s] 66%|██████▋ | 246711/371472 [9:04:32<10:55:43, 3.17it/s] 66%|██████▋ | 246712/371472 [9:04:33<10:30:37, 3.30it/s] 66%|██████▋ | 246713/371472 [9:04:33<10:31:12, 3.29it/s] 66%|██████▋ | 246714/371472 [9:04:33<10:25:41, 3.32it/s] 66%|██████▋ | 246715/371472 [9:04:33<10:00:31, 3.46it/s] 66%|██████▋ | 246716/371472 [9:04:34<9:44:38, 3.56it/s] 66%|██████▋ | 246717/371472 [9:04:34<9:49:11, 3.53it/s] 66%|██████▋ | 246718/371472 [9:04:34<10:10:02, 3.41it/s] 66%|██████▋ | 246719/371472 [9:04:35<10:11:52, 3.40it/s] 66%|██████▋ | 246720/371472 [9:04:35<10:07:20, 3.42it/s] {'loss': 2.5839, 'learning_rate': 4.024111702471503e-07, 'epoch': 10.63} + 66%|██████▋ | 246720/371472 [9:04:35<10:07:20, 3.42it/s] 66%|██████▋ | 246721/371472 [9:04:35<10:09:59, 3.41it/s] 66%|██████▋ | 246722/371472 [9:04:36<10:07:42, 3.42it/s] 66%|██████▋ | 246723/371472 [9:04:36<10:05:12, 3.44it/s] 66%|██████▋ | 246724/371472 [9:04:36<10:02:49, 3.45it/s] 66%|██████▋ | 246725/371472 [9:04:36<11:12:46, 3.09it/s] 66%|██████▋ | 246726/371472 [9:04:37<11:09:24, 3.11it/s] 66%|██████▋ | 246727/371472 [9:04:37<10:42:38, 3.24it/s] 66%|██████▋ | 246728/371472 [9:04:37<10:46:37, 3.22it/s] 66%|██████▋ | 246729/371472 [9:04:38<10:33:49, 3.28it/s] 66%|██████▋ | 246730/371472 [9:04:38<10:40:06, 3.25it/s] 66%|██████▋ | 246731/371472 [9:04:38<10:27:27, 3.31it/s] 66%|██████▋ | 246732/371472 [9:04:39<10:58:58, 3.15it/s] 66%|██████▋ | 246733/371472 [9:04:39<11:21:07, 3.05it/s] 66%|██████▋ | 246734/371472 [9:04:39<11:22:48, 3.04it/s] 66%|██████▋ | 246735/371472 [9:04:40<10:51:49, 3.19it/s] 66%|██████▋ | 246736/371472 [9:04:40<10:42:46, 3.23it/s] 66%|██████▋ | 246737/371472 [9:04:40<10:40:51, 3.24it/s] 66%|██████▋ | 246738/371472 [9:04:41<10:43:11, 3.23it/s] 66%|██████▋ | 246739/371472 [9:04:41<11:50:12, 2.93it/s] 66%|██████▋ | 246740/371472 [9:04:41<11:03:16, 3.13it/s] {'loss': 2.6964, 'learning_rate': 4.0236268827167144e-07, 'epoch': 10.63} + 66%|██████▋ | 246740/371472 [9:04:41<11:03:16, 3.13it/s] 66%|██████▋ | 246741/371472 [9:04:42<11:40:14, 2.97it/s] 66%|██████▋ | 246742/371472 [9:04:42<11:29:38, 3.01it/s] 66%|██████▋ | 246743/371472 [9:04:42<11:02:40, 3.14it/s] 66%|██████▋ | 246744/371472 [9:04:42<10:44:03, 3.23it/s] 66%|██████▋ | 246745/371472 [9:04:43<11:35:06, 2.99it/s] 66%|██████▋ | 246746/371472 [9:04:43<11:28:16, 3.02it/s] 66%|██████▋ | 246747/371472 [9:04:43<11:01:35, 3.14it/s] 66%|██████▋ | 246748/371472 [9:04:44<11:39:36, 2.97it/s] 66%|██████▋ | 246749/371472 [9:04:44<10:59:59, 3.15it/s] 66%|██████▋ | 246750/371472 [9:04:44<10:56:03, 3.17it/s] 66%|██████▋ | 246751/371472 [9:04:45<10:43:11, 3.23it/s] 66%|██████▋ | 246752/371472 [9:04:45<10:28:45, 3.31it/s] 66%|██████▋ | 246753/371472 [9:04:45<10:47:47, 3.21it/s] 66%|██████▋ | 246754/371472 [9:04:46<10:44:42, 3.22it/s] 66%|██████▋ | 246755/371472 [9:04:46<10:49:06, 3.20it/s] 66%|██████▋ | 246756/371472 [9:04:46<12:16:14, 2.82it/s] 66%|██████▋ | 246757/371472 [9:04:47<11:48:28, 2.93it/s] 66%|██████▋ | 246758/371472 [9:04:47<11:29:12, 3.02it/s] 66%|██████▋ | 246759/371472 [9:04:47<10:57:55, 3.16it/s] 66%|██████▋ | 246760/371472 [9:04:48<10:53:56, 3.18it/s] {'loss': 2.8188, 'learning_rate': 4.023142062961925e-07, 'epoch': 10.63} + 66%|██████▋ | 246760/371472 [9:04:48<10:53:56, 3.18it/s] 66%|██████▋ | 246761/371472 [9:04:48<11:32:41, 3.00it/s] 66%|██████▋ | 246762/371472 [9:04:48<11:00:48, 3.15it/s] 66%|██████▋ | 246763/371472 [9:04:49<10:38:36, 3.25it/s] 66%|██████▋ | 246764/371472 [9:04:49<10:32:14, 3.29it/s] 66%|██████▋ | 246765/371472 [9:04:49<10:17:49, 3.36it/s] 66%|██████▋ | 246766/371472 [9:04:49<10:03:29, 3.44it/s] 66%|██████▋ | 246767/371472 [9:04:50<10:01:10, 3.46it/s] 66%|██████▋ | 246768/371472 [9:04:50<10:00:26, 3.46it/s] 66%|██████▋ | 246769/371472 [9:04:50<9:58:05, 3.48it/s] 66%|██████▋ | 246770/371472 [9:04:51<9:45:04, 3.55it/s] 66%|██████▋ | 246771/371472 [9:04:51<9:49:06, 3.53it/s] 66%|██████▋ | 246772/371472 [9:04:51<10:08:59, 3.41it/s] 66%|██████▋ | 246773/371472 [9:04:51<10:03:44, 3.44it/s] 66%|██████▋ | 246774/371472 [9:04:52<9:56:26, 3.48it/s] 66%|██████▋ | 246775/371472 [9:04:52<10:01:11, 3.46it/s] 66%|██████▋ | 246776/371472 [9:04:52<9:55:10, 3.49it/s] 66%|██████▋ | 246777/371472 [9:04:53<9:46:06, 3.55it/s] 66%|██████▋ | 246778/371472 [9:04:53<9:33:34, 3.62it/s] 66%|██████▋ | 246779/371472 [9:04:53<10:36:53, 3.26it/s] 66%|█��████▋ | 246780/371472 [9:04:54<10:25:22, 3.32it/s] {'loss': 2.8455, 'learning_rate': 4.022657243207137e-07, 'epoch': 10.63} + 66%|██████▋ | 246780/371472 [9:04:54<10:25:22, 3.32it/s] 66%|██████▋ | 246781/371472 [9:04:54<10:28:37, 3.31it/s] 66%|██████▋ | 246782/371472 [9:04:54<10:33:41, 3.28it/s] 66%|██████▋ | 246783/371472 [9:04:54<10:20:56, 3.35it/s] 66%|██████▋ | 246784/371472 [9:04:55<10:06:29, 3.43it/s] 66%|██████▋ | 246785/371472 [9:04:55<9:47:52, 3.53it/s] 66%|██████▋ | 246786/371472 [9:04:55<9:54:06, 3.50it/s] 66%|██████▋ | 246787/371472 [9:04:56<9:50:07, 3.52it/s] 66%|██████▋ | 246788/371472 [9:04:56<9:45:16, 3.55it/s] 66%|██████▋ | 246789/371472 [9:04:56<10:01:14, 3.46it/s] 66%|██████▋ | 246790/371472 [9:04:56<9:56:20, 3.48it/s] 66%|██████▋ | 246791/371472 [9:04:57<9:55:13, 3.49it/s] 66%|██████▋ | 246792/371472 [9:04:57<9:56:52, 3.48it/s] 66%|██████▋ | 246793/371472 [9:04:57<10:05:47, 3.43it/s] 66%|██████▋ | 246794/371472 [9:04:58<10:34:16, 3.28it/s] 66%|██████▋ | 246795/371472 [9:04:58<10:22:55, 3.34it/s] 66%|██████▋ | 246796/371472 [9:04:58<10:21:29, 3.34it/s] 66%|██████▋ | 246797/371472 [9:04:59<10:59:57, 3.15it/s] 66%|██████▋ | 246798/371472 [9:04:59<10:40:51, 3.24it/s] 66%|██████▋ | 246799/371472 [9:04:59<10:34:41, 3.27it/s] 66%|██████▋ | 246800/371472 [9:04:59<10:45:58, 3.22it/s] {'loss': 2.7445, 'learning_rate': 4.022172423452347e-07, 'epoch': 10.63} + 66%|██████▋ | 246800/371472 [9:04:59<10:45:58, 3.22it/s] 66%|██████▋ | 246801/371472 [9:05:00<10:33:01, 3.28it/s] 66%|██████▋ | 246802/371472 [9:05:00<10:43:43, 3.23it/s] 66%|██████▋ | 246803/371472 [9:05:00<10:48:59, 3.20it/s] 66%|██████▋ | 246804/371472 [9:05:01<11:24:44, 3.03it/s] 66%|██████▋ | 246805/371472 [9:05:01<11:07:00, 3.12it/s] 66%|██████▋ | 246806/371472 [9:05:01<12:19:29, 2.81it/s] 66%|██████▋ | 246807/371472 [9:05:02<11:32:27, 3.00it/s] 66%|██████▋ | 246808/371472 [9:05:02<11:04:25, 3.13it/s] 66%|██████▋ | 246809/371472 [9:05:02<10:55:13, 3.17it/s] 66%|██████▋ | 246810/371472 [9:05:03<10:54:38, 3.17it/s] 66%|██████▋ | 246811/371472 [9:05:03<10:44:30, 3.22it/s] 66%|██████▋ | 246812/371472 [9:05:03<10:34:25, 3.27it/s] 66%|██████▋ | 246813/371472 [9:05:04<10:40:11, 3.25it/s] 66%|██████▋ | 246814/371472 [9:05:04<10:23:11, 3.33it/s] 66%|██████▋ | 246815/371472 [9:05:04<10:22:38, 3.34it/s] 66%|██████▋ | 246816/371472 [9:05:05<10:57:07, 3.16it/s] 66%|██████▋ | 246817/371472 [9:05:05<10:42:30, 3.23it/s] 66%|██████▋ | 246818/371472 [9:05:05<10:22:42, 3.34it/s] 66%|██████▋ | 246819/371472 [9:05:05<11:02:01, 3.14it/s] 66%|██████▋ | 246820/371472 [9:05:06<10:30:41, 3.29it/s] {'loss': 2.6141, 'learning_rate': 4.021687603697559e-07, 'epoch': 10.63} + 66%|██████▋ | 246820/371472 [9:05:06<10:30:41, 3.29it/s] 66%|██████▋ | 246821/371472 [9:05:06<10:25:42, 3.32it/s] 66%|██████▋ | 246822/371472 [9:05:06<10:24:20, 3.33it/s] 66%|██████▋ | 246823/371472 [9:05:07<10:36:37, 3.26it/s] 66%|██████▋ | 246824/371472 [9:05:07<10:30:33, 3.29it/s] 66%|██████▋ | 246825/371472 [9:05:07<10:24:06, 3.33it/s] 66%|██████▋ | 246826/371472 [9:05:08<10:36:12, 3.27it/s] 66%|██████▋ | 246827/371472 [9:05:08<10:15:09, 3.38it/s] 66%|██████▋ | 246828/371472 [9:05:08<10:13:10, 3.39it/s] 66%|██████▋ | 246829/371472 [9:05:08<10:27:08, 3.31it/s] 66%|██████▋ | 246830/371472 [9:05:09<10:21:57, 3.34it/s] 66%|██████▋ | 246831/371472 [9:05:09<10:28:32, 3.30it/s] 66%|██████▋ | 246832/371472 [9:05:09<10:45:29, 3.22it/s] 66%|██████▋ | 246833/371472 [9:05:10<10:39:43, 3.25it/s] 66%|██████▋ | 246834/371472 [9:05:10<10:14:12, 3.38it/s] 66%|██████▋ | 246835/371472 [9:05:10<10:14:44, 3.38it/s] 66%|██████▋ | 246836/371472 [9:05:11<10:39:22, 3.25it/s] 66%|██████▋ | 246837/371472 [9:05:11<10:15:16, 3.38it/s] 66%|██████▋ | 246838/371472 [9:05:11<10:10:12, 3.40it/s] 66%|██████▋ | 246839/371472 [9:05:11<10:08:27, 3.41it/s] 66%|██████▋ | 246840/371472 [9:05:12<10:03:25, 3.44it/s] {'loss': 2.6317, 'learning_rate': 4.0212027839427696e-07, 'epoch': 10.63} + 66%|██████▋ | 246840/371472 [9:05:12<10:03:25, 3.44it/s] 66%|██████▋ | 246841/371472 [9:05:12<10:21:00, 3.34it/s] 66%|██████▋ | 246842/371472 [9:05:12<10:22:04, 3.34it/s] 66%|██████▋ | 246843/371472 [9:05:13<10:05:49, 3.43it/s] 66%|██████▋ | 246844/371472 [9:05:13<10:09:06, 3.41it/s] 66%|██████▋ | 246845/371472 [9:05:13<10:08:18, 3.41it/s] 66%|██████▋ | 246846/371472 [9:05:14<10:31:26, 3.29it/s] 66%|██████▋ | 246847/371472 [9:05:14<10:25:16, 3.32it/s] 66%|██████▋ | 246848/371472 [9:05:14<10:12:40, 3.39it/s] 66%|██████▋ | 246849/371472 [9:05:14<10:27:36, 3.31it/s] 66%|██████▋ | 246850/371472 [9:05:15<10:49:03, 3.20it/s] 66%|██████▋ | 246851/371472 [9:05:15<10:36:03, 3.27it/s] 66%|██████▋ | 246852/371472 [9:05:15<10:16:01, 3.37it/s] 66%|██████▋ | 246853/371472 [9:05:16<10:27:16, 3.31it/s] 66%|██████▋ | 246854/371472 [9:05:16<10:57:39, 3.16it/s] 66%|██████▋ | 246855/371472 [9:05:16<10:58:12, 3.16it/s] 66%|██████▋ | 246856/371472 [9:05:17<10:35:10, 3.27it/s] 66%|██████▋ | 246857/371472 [9:05:17<10:13:29, 3.39it/s] 66%|██████▋ | 246858/371472 [9:05:17<10:01:42, 3.45it/s] 66%|██████▋ | 246859/371472 [9:05:17<10:45:52, 3.22it/s] 66%|██████▋ | 246860/371472 [9:05:18<10:58:23, 3.15it/s] {'loss': 2.7151, 'learning_rate': 4.020717964187981e-07, 'epoch': 10.63} + 66%|██████▋ | 246860/371472 [9:05:18<10:58:23, 3.15it/s] 66%|██████▋ | 246861/371472 [9:05:18<10:43:26, 3.23it/s] 66%|██████▋ | 246862/371472 [9:05:18<10:40:55, 3.24it/s] 66%|██████▋ | 246863/371472 [9:05:19<10:41:48, 3.24it/s] 66%|██████▋ | 246864/371472 [9:05:19<10:20:46, 3.35it/s] 66%|██████▋ | 246865/371472 [9:05:19<10:10:11, 3.40it/s] 66%|██████▋ | 246866/371472 [9:05:20<9:48:04, 3.53it/s] 66%|██████▋ | 246867/371472 [9:05:20<10:01:29, 3.45it/s] 66%|██████▋ | 246868/371472 [9:05:20<9:59:20, 3.47it/s] 66%|██████▋ | 246869/371472 [9:05:20<9:54:32, 3.49it/s] 66%|██████▋ | 246870/371472 [9:05:21<10:42:52, 3.23it/s] 66%|██████▋ | 246871/371472 [9:05:21<10:37:48, 3.26it/s] 66%|██████▋ | 246872/371472 [9:05:21<10:27:18, 3.31it/s] 66%|██████▋ | 246873/371472 [9:05:22<10:14:51, 3.38it/s] 66%|██████▋ | 246874/371472 [9:05:22<9:58:38, 3.47it/s] 66%|██████▋ | 246875/371472 [9:05:22<10:07:23, 3.42it/s] 66%|██████▋ | 246876/371472 [9:05:22<10:01:34, 3.45it/s] 66%|██████▋ | 246877/371472 [9:05:23<10:04:53, 3.43it/s] 66%|██████▋ | 246878/371472 [9:05:23<10:35:23, 3.27it/s] 66%|██████▋ | 246879/371472 [9:05:23<10:26:27, 3.31it/s] 66%|██████▋ | 246880/371472 [9:05:24<10:25:28, 3.32it/s] {'loss': 2.7095, 'learning_rate': 4.0202331444331915e-07, 'epoch': 10.63} + 66%|██████▋ | 246880/371472 [9:05:24<10:25:28, 3.32it/s] 66%|██████▋ | 246881/371472 [9:05:24<10:19:00, 3.35it/s] 66%|██████▋ | 246882/371472 [9:05:24<10:30:07, 3.30it/s] 66%|██████▋ | 246883/371472 [9:05:25<10:10:22, 3.40it/s] 66%|██████▋ | 246884/371472 [9:05:25<10:05:01, 3.43it/s] 66%|██████▋ | 246885/371472 [9:05:25<10:08:06, 3.41it/s] 66%|██████▋ | 246886/371472 [9:05:25<10:10:24, 3.40it/s] 66%|██████▋ | 246887/371472 [9:05:26<10:17:38, 3.36it/s] 66%|██████▋ | 246888/371472 [9:05:26<10:52:22, 3.18it/s] 66%|██████▋ | 246889/371472 [9:05:26<10:38:34, 3.25it/s] 66%|██████▋ | 246890/371472 [9:05:27<11:24:12, 3.03it/s] 66%|██████▋ | 246891/371472 [9:05:27<11:15:54, 3.07it/s] 66%|██████▋ | 246892/371472 [9:05:27<10:58:20, 3.15it/s] 66%|██████▋ | 246893/371472 [9:05:28<11:29:00, 3.01it/s] 66%|██████▋ | 246894/371472 [9:05:28<10:59:27, 3.15it/s] 66%|██████▋ | 246895/371472 [9:05:28<10:42:19, 3.23it/s] 66%|██████▋ | 246896/371472 [9:05:29<10:39:11, 3.25it/s] 66%|██████▋ | 246897/371472 [9:05:29<10:28:34, 3.30it/s] 66%|██████▋ | 246898/371472 [9:05:29<10:21:27, 3.34it/s] 66%|██████▋ | 246899/371472 [9:05:30<10:00:46, 3.46it/s] 66%|██████▋ | 246900/371472 [9:05:30<10:24:27, 3.32it/s] {'loss': 2.7266, 'learning_rate': 4.0197483246784033e-07, 'epoch': 10.63} + 66%|██████▋ | 246900/371472 [9:05:30<10:24:27, 3.32it/s] 66%|██████▋ | 246901/371472 [9:05:30<10:27:16, 3.31it/s] 66%|██████▋ | 246902/371472 [9:05:30<10:21:44, 3.34it/s] 66%|██████▋ | 246903/371472 [9:05:31<11:01:52, 3.14it/s] 66%|██████▋ | 246904/371472 [9:05:31<10:51:01, 3.19it/s] 66%|██████▋ | 246905/371472 [9:05:31<10:40:50, 3.24it/s] 66%|██████▋ | 246906/371472 [9:05:32<10:38:19, 3.25it/s] 66%|██████▋ | 246907/371472 [9:05:32<10:39:07, 3.25it/s] 66%|██████▋ | 246908/371472 [9:05:32<10:40:37, 3.24it/s] 66%|██████▋ | 246909/371472 [9:05:33<10:23:34, 3.33it/s] 66%|██████▋ | 246910/371472 [9:05:33<10:45:32, 3.22it/s] 66%|██████▋ | 246911/371472 [9:05:33<10:54:18, 3.17it/s] 66%|██████▋ | 246912/371472 [9:05:34<10:39:39, 3.25it/s] 66%|██████▋ | 246913/371472 [9:05:34<10:20:29, 3.35it/s] 66%|██████▋ | 246914/371472 [9:05:34<10:13:59, 3.38it/s] 66%|██████▋ | 246915/371472 [9:05:34<9:54:22, 3.49it/s] 66%|██████▋ | 246916/371472 [9:05:35<10:26:49, 3.31it/s] 66%|██████▋ | 246917/371472 [9:05:35<10:27:27, 3.31it/s] 66%|██████▋ | 246918/371472 [9:05:35<10:28:22, 3.30it/s] 66%|██████▋ | 246919/371472 [9:05:36<10:19:45, 3.35it/s] 66%|██████▋ | 246920/371472 [9:05:36<10:11:12, 3.40it/s] {'loss': 2.7092, 'learning_rate': 4.019263504923614e-07, 'epoch': 10.64} + 66%|██████▋ | 246920/371472 [9:05:36<10:11:12, 3.40it/s] 66%|██████▋ | 246921/371472 [9:05:36<9:55:45, 3.48it/s] 66%|██████▋ | 246922/371472 [9:05:36<10:04:35, 3.43it/s] 66%|██████▋ | 246923/371472 [9:05:37<10:07:37, 3.42it/s] 66%|██████▋ | 246924/371472 [9:05:37<10:19:56, 3.35it/s] 66%|██████▋ | 246925/371472 [9:05:37<10:34:38, 3.27it/s] 66%|██████▋ | 246926/371472 [9:05:38<10:28:05, 3.30it/s] 66%|██████▋ | 246927/371472 [9:05:38<10:20:07, 3.35it/s] 66%|██████▋ | 246928/371472 [9:05:38<10:10:06, 3.40it/s] 66%|██████▋ | 246929/371472 [9:05:39<10:28:08, 3.30it/s] 66%|██████▋ | 246930/371472 [9:05:39<11:16:14, 3.07it/s] 66%|██████▋ | 246931/371472 [9:05:39<10:56:11, 3.16it/s] 66%|██████▋ | 246932/371472 [9:05:40<10:44:11, 3.22it/s] 66%|██████▋ | 246933/371472 [9:05:40<10:27:29, 3.31it/s] 66%|██████▋ | 246934/371472 [9:05:40<10:10:57, 3.40it/s] 66%|██████▋ | 246935/371472 [9:05:40<10:10:07, 3.40it/s] 66%|██████▋ | 246936/371472 [9:05:41<10:06:48, 3.42it/s] 66%|██████▋ | 246937/371472 [9:05:41<10:28:11, 3.30it/s] 66%|██████▋ | 246938/371472 [9:05:41<10:27:29, 3.31it/s] 66%|██████▋ | 246939/371472 [9:05:42<10:11:27, 3.39it/s] 66%|██████▋ | 246940/371472 [9:05:42<10:30:15, 3.29it/s] {'loss': 2.8431, 'learning_rate': 4.018778685168824e-07, 'epoch': 10.64} + 66%|██████▋ | 246940/371472 [9:05:42<10:30:15, 3.29it/s] 66%|██████▋ | 246941/371472 [9:05:42<12:01:29, 2.88it/s] 66%|██████▋ | 246942/371472 [9:05:43<11:21:24, 3.05it/s] 66%|██████▋ | 246943/371472 [9:05:43<10:51:23, 3.19it/s] 66%|██████▋ | 246944/371472 [9:05:43<10:44:47, 3.22it/s] 66%|██████▋ | 246945/371472 [9:05:44<10:27:10, 3.31it/s] 66%|██████▋ | 246946/371472 [9:05:44<10:26:00, 3.32it/s] 66%|██████▋ | 246947/371472 [9:05:44<10:26:28, 3.31it/s] 66%|██████▋ | 246948/371472 [9:05:44<10:45:19, 3.22it/s] 66%|██████▋ | 246949/371472 [9:05:45<10:51:39, 3.18it/s] 66%|██████▋ | 246950/371472 [9:05:45<10:23:46, 3.33it/s] 66%|██████▋ | 246951/371472 [9:05:45<10:24:29, 3.32it/s] 66%|██████▋ | 246952/371472 [9:05:46<10:23:22, 3.33it/s] 66%|██████▋ | 246953/371472 [9:05:46<11:26:08, 3.02it/s] 66%|██████▋ | 246954/371472 [9:05:46<11:03:53, 3.13it/s] 66%|██████▋ | 246955/371472 [9:05:47<11:02:14, 3.13it/s] 66%|██████▋ | 246956/371472 [9:05:47<10:50:07, 3.19it/s] 66%|██████▋ | 246957/371472 [9:05:47<11:30:02, 3.01it/s] 66%|██████▋ | 246958/371472 [9:05:48<11:12:23, 3.09it/s] 66%|██████▋ | 246959/371472 [9:05:48<10:48:15, 3.20it/s] 66%|██████▋ | 246960/371472 [9:05:48<10:36:41, 3.26it/s] {'loss': 2.6595, 'learning_rate': 4.018293865414036e-07, 'epoch': 10.64} + 66%|██████▋ | 246960/371472 [9:05:48<10:36:41, 3.26it/s] 66%|██████▋ | 246961/371472 [9:05:49<10:53:32, 3.18it/s] 66%|██████▋ | 246962/371472 [9:05:49<11:04:15, 3.12it/s] 66%|██████▋ | 246963/371472 [9:05:49<10:51:37, 3.18it/s] 66%|██████▋ | 246964/371472 [9:05:50<10:41:28, 3.23it/s] 66%|██████▋ | 246965/371472 [9:05:50<13:16:08, 2.61it/s] 66%|██████▋ | 246966/371472 [9:05:50<12:18:05, 2.81it/s] 66%|██████▋ | 246967/371472 [9:05:51<11:40:43, 2.96it/s] 66%|██████▋ | 246968/371472 [9:05:51<11:01:40, 3.14it/s] 66%|██████▋ | 246969/371472 [9:05:51<10:49:49, 3.19it/s] 66%|██████▋ | 246970/371472 [9:05:52<10:36:05, 3.26it/s] 66%|██████▋ | 246971/371472 [9:05:52<10:26:38, 3.31it/s] 66%|██████▋ | 246972/371472 [9:05:52<10:32:59, 3.28it/s] 66%|██████▋ | 246973/371472 [9:05:52<10:29:43, 3.30it/s] 66%|██████▋ | 246974/371472 [9:05:53<10:13:33, 3.38it/s] 66%|██████▋ | 246975/371472 [9:05:53<10:30:23, 3.29it/s] 66%|██████▋ | 246976/371472 [9:05:53<10:09:53, 3.40it/s] 66%|██████▋ | 246977/371472 [9:05:54<9:58:10, 3.47it/s] 66%|██████▋ | 246978/371472 [9:05:54<10:10:37, 3.40it/s] 66%|██████▋ | 246979/371472 [9:05:54<10:37:32, 3.25it/s] 66%|██████▋ | 246980/371472 [9:05:55<10:46:00, 3.21it/s] {'loss': 2.7157, 'learning_rate': 4.0178090456592467e-07, 'epoch': 10.64} + 66%|██████▋ | 246980/371472 [9:05:55<10:46:00, 3.21it/s] 66%|██████▋ | 246981/371472 [9:05:55<10:39:22, 3.25it/s] 66%|██████▋ | 246982/371472 [9:05:55<10:17:42, 3.36it/s] 66%|██████▋ | 246983/371472 [9:05:55<10:34:48, 3.27it/s] 66%|██████▋ | 246984/371472 [9:05:56<10:19:54, 3.35it/s] 66%|██████▋ | 246985/371472 [9:05:56<10:09:57, 3.40it/s] 66%|██████▋ | 246986/371472 [9:05:56<10:07:50, 3.41it/s] 66%|██████▋ | 246987/371472 [9:05:57<12:04:01, 2.87it/s] 66%|██████▋ | 246988/371472 [9:05:57<11:43:33, 2.95it/s] 66%|██████▋ | 246989/371472 [9:05:57<11:29:54, 3.01it/s] 66%|██████▋ | 246990/371472 [9:05:58<11:37:25, 2.97it/s] 66%|██████▋ | 246991/371472 [9:05:58<11:31:10, 3.00it/s] 66%|██████▋ | 246992/371472 [9:05:58<10:54:53, 3.17it/s] 66%|██████▋ | 246993/371472 [9:05:59<10:51:31, 3.18it/s] 66%|██████▋ | 246994/371472 [9:05:59<10:55:08, 3.17it/s] 66%|██████▋ | 246995/371472 [9:05:59<10:47:57, 3.20it/s] 66%|██████▋ | 246996/371472 [9:06:00<10:31:49, 3.28it/s] 66%|██████▋ | 246997/371472 [9:06:00<10:46:09, 3.21it/s] 66%|██████▋ | 246998/371472 [9:06:00<10:28:58, 3.30it/s] 66%|██████▋ | 246999/371472 [9:06:00<10:13:12, 3.38it/s] 66%|██████▋ | 247000/371472 [9:06:01<10:01:02, 3.45it/s] {'loss': 2.5974, 'learning_rate': 4.0173242259044584e-07, 'epoch': 10.64} + 66%|██████▋ | 247000/371472 [9:06:01<10:01:02, 3.45it/s] 66%|██████▋ | 247001/371472 [9:06:01<10:17:27, 3.36it/s] 66%|██████▋ | 247002/371472 [9:06:01<10:34:03, 3.27it/s] 66%|██████▋ | 247003/371472 [9:06:02<10:30:33, 3.29it/s] 66%|██████▋ | 247004/371472 [9:06:02<10:51:47, 3.18it/s] 66%|██████▋ | 247005/371472 [9:06:02<10:29:01, 3.30it/s] 66%|██████▋ | 247006/371472 [9:06:03<10:32:36, 3.28it/s] 66%|██████▋ | 247007/371472 [9:06:03<10:29:49, 3.29it/s] 66%|██████▋ | 247008/371472 [9:06:03<10:25:02, 3.32it/s] 66%|██████▋ | 247009/371472 [9:06:03<10:06:32, 3.42it/s] 66%|██████▋ | 247010/371472 [9:06:04<10:15:42, 3.37it/s] 66%|██████▋ | 247011/371472 [9:06:04<10:02:17, 3.44it/s] 66%|██████▋ | 247012/371472 [9:06:04<9:52:21, 3.50it/s] 66%|██████▋ | 247013/371472 [9:06:05<10:05:42, 3.42it/s] 66%|██████▋ | 247014/371472 [9:06:05<10:26:19, 3.31it/s] 66%|██████▋ | 247015/371472 [9:06:05<10:08:10, 3.41it/s] 66%|██████▋ | 247016/371472 [9:06:06<10:14:00, 3.38it/s] 66%|██████▋ | 247017/371472 [9:06:06<10:08:34, 3.41it/s] 66%|██████▋ | 247018/371472 [9:06:06<10:37:37, 3.25it/s] 66%|██████▋ | 247019/371472 [9:06:06<10:14:40, 3.37it/s] 66%|██████▋ | 247020/371472 [9:06:07<10:11:39, 3.39it/s] {'loss': 2.7854, 'learning_rate': 4.0168394061496686e-07, 'epoch': 10.64} + 66%|██████▋ | 247020/371472 [9:06:07<10:11:39, 3.39it/s] 66%|██████▋ | 247021/371472 [9:06:07<10:10:42, 3.40it/s] 66%|██████▋ | 247022/371472 [9:06:07<10:03:58, 3.43it/s] 66%|██████▋ | 247023/371472 [9:06:08<10:34:35, 3.27it/s] 66%|██████▋ | 247024/371472 [9:06:08<10:21:58, 3.33it/s] 66%|██████▋ | 247025/371472 [9:06:08<10:54:43, 3.17it/s] 66%|██████▋ | 247026/371472 [9:06:09<10:48:45, 3.20it/s] 66%|██████▋ | 247027/371472 [9:06:09<10:31:18, 3.29it/s] 66%|██████▋ | 247028/371472 [9:06:09<10:29:15, 3.30it/s] 67%|██████▋ | 247029/371472 [9:06:09<10:11:20, 3.39it/s] 67%|██████▋ | 247030/371472 [9:06:10<10:05:54, 3.42it/s] 67%|██████▋ | 247031/371472 [9:06:10<10:32:06, 3.28it/s] 67%|██████▋ | 247032/371472 [9:06:10<10:27:10, 3.31it/s] 67%|██████▋ | 247033/371472 [9:06:11<10:30:24, 3.29it/s] 67%|██████▋ | 247034/371472 [9:06:11<10:27:46, 3.30it/s] 67%|██████▋ | 247035/371472 [9:06:11<10:24:01, 3.32it/s] 67%|██████▋ | 247036/371472 [9:06:12<10:01:16, 3.45it/s] 67%|██████▋ | 247037/371472 [9:06:12<10:28:53, 3.30it/s] 67%|██████▋ | 247038/371472 [9:06:12<10:20:59, 3.34it/s] 67%|██████▋ | 247039/371472 [9:06:12<10:25:13, 3.32it/s] 67%|██████▋ | 247040/371472 [9:06:13<10:14:25, 3.38it/s] {'loss': 2.5843, 'learning_rate': 4.0163545863948804e-07, 'epoch': 10.64} + 67%|██████▋ | 247040/371472 [9:06:13<10:14:25, 3.38it/s] 67%|██████▋ | 247041/371472 [9:06:13<10:18:17, 3.35it/s] 67%|██████▋ | 247042/371472 [9:06:13<10:21:34, 3.34it/s] 67%|██████▋ | 247043/371472 [9:06:14<10:18:32, 3.35it/s] 67%|██████▋ | 247044/371472 [9:06:14<10:05:39, 3.42it/s] 67%|██████▋ | 247045/371472 [9:06:14<9:59:59, 3.46it/s] 67%|██████▋ | 247046/371472 [9:06:14<10:00:09, 3.46it/s] 67%|██████▋ | 247047/371472 [9:06:15<9:45:16, 3.54it/s] 67%|██████▋ | 247048/371472 [9:06:15<9:51:25, 3.51it/s] 67%|██████▋ | 247049/371472 [9:06:15<9:49:41, 3.52it/s] 67%|██████▋ | 247050/371472 [9:06:16<10:10:19, 3.40it/s] 67%|██████▋ | 247051/371472 [9:06:16<10:21:14, 3.34it/s] 67%|██████▋ | 247052/371472 [9:06:16<10:15:04, 3.37it/s] 67%|██████▋ | 247053/371472 [9:06:17<10:00:21, 3.45it/s] 67%|██████▋ | 247054/371472 [9:06:17<9:47:18, 3.53it/s] 67%|██████▋ | 247055/371472 [9:06:17<9:34:19, 3.61it/s] 67%|██████▋ | 247056/371472 [9:06:17<9:37:39, 3.59it/s] 67%|██████▋ | 247057/371472 [9:06:18<9:43:29, 3.55it/s] 67%|██████▋ | 247058/371472 [9:06:18<9:54:10, 3.49it/s] 67%|██████▋ | 247059/371472 [9:06:18<9:57:34, 3.47it/s] 67%|██████▋ | 247060/371472 [9:06:19<10:35:34, 3.26it/s] {'loss': 2.8019, 'learning_rate': 4.0158697666400906e-07, 'epoch': 10.64} + 67%|██████▋ | 247060/371472 [9:06:19<10:35:34, 3.26it/s] 67%|██████▋ | 247061/371472 [9:06:19<10:30:40, 3.29it/s] 67%|██████▋ | 247062/371472 [9:06:19<10:26:18, 3.31it/s] 67%|██████▋ | 247063/371472 [9:06:19<10:27:39, 3.30it/s] 67%|██████▋ | 247064/371472 [9:06:20<10:24:21, 3.32it/s] 67%|██████▋ | 247065/371472 [9:06:20<10:40:31, 3.24it/s] 67%|██████▋ | 247066/371472 [9:06:20<10:45:46, 3.21it/s] 67%|██████▋ | 247067/371472 [9:06:21<11:08:24, 3.10it/s] 67%|██████▋ | 247068/371472 [9:06:21<11:25:05, 3.03it/s] 67%|██████▋ | 247069/371472 [9:06:21<11:43:28, 2.95it/s] 67%|██████▋ | 247070/371472 [9:06:22<11:26:49, 3.02it/s] 67%|██████▋ | 247071/371472 [9:06:22<11:03:52, 3.12it/s] 67%|██████▋ | 247072/371472 [9:06:22<10:43:44, 3.22it/s] 67%|██████▋ | 247073/371472 [9:06:23<10:38:13, 3.25it/s] 67%|██████▋ | 247074/371472 [9:06:23<10:44:01, 3.22it/s] 67%|██████▋ | 247075/371472 [9:06:23<11:05:22, 3.12it/s] 67%|██████▋ | 247076/371472 [9:06:24<10:46:16, 3.21it/s] 67%|██████▋ | 247077/371472 [9:06:24<11:22:51, 3.04it/s] 67%|██████▋ | 247078/371472 [9:06:24<11:00:13, 3.14it/s] 67%|██████�� | 247079/371472 [9:06:25<10:33:00, 3.28it/s] 67%|██████▋ | 247080/371472 [9:06:25<10:25:21, 3.32it/s] {'loss': 2.7425, 'learning_rate': 4.0153849468853024e-07, 'epoch': 10.64} + 67%|██████▋ | 247080/371472 [9:06:25<10:25:21, 3.32it/s] 67%|██████▋ | 247081/371472 [9:06:25<10:29:54, 3.29it/s] 67%|██████▋ | 247082/371472 [9:06:26<11:08:47, 3.10it/s] 67%|██████▋ | 247083/371472 [9:06:26<11:06:32, 3.11it/s] 67%|██████▋ | 247084/371472 [9:06:26<10:47:22, 3.20it/s] 67%|██████▋ | 247085/371472 [9:06:26<11:11:21, 3.09it/s] 67%|██████▋ | 247086/371472 [9:06:27<10:34:33, 3.27it/s] 67%|██████▋ | 247087/371472 [9:06:27<10:46:20, 3.21it/s] 67%|██████▋ | 247088/371472 [9:06:27<10:59:49, 3.14it/s] 67%|██████▋ | 247089/371472 [9:06:28<10:30:21, 3.29it/s] 67%|██████▋ | 247090/371472 [9:06:28<10:10:50, 3.39it/s] 67%|██████▋ | 247091/371472 [9:06:28<10:13:43, 3.38it/s] 67%|██████▋ | 247092/371472 [9:06:29<10:07:49, 3.41it/s] 67%|██████▋ | 247093/371472 [9:06:29<10:21:02, 3.34it/s] 67%|██████▋ | 247094/371472 [9:06:29<10:24:44, 3.32it/s] 67%|██████▋ | 247095/371472 [9:06:29<10:17:11, 3.36it/s] 67%|██████▋ | 247096/371472 [9:06:30<11:24:14, 3.03it/s] 67%|██████▋ | 247097/371472 [9:06:30<11:20:32, 3.05it/s] 67%|██████▋ | 247098/371472 [9:06:31<11:42:32, 2.95it/s] 67%|██████▋ | 247099/371472 [9:06:31<11:02:49, 3.13it/s] 67%|██████▋ | 247100/371472 [9:06:31<11:07:03, 3.11it/s] {'loss': 2.7409, 'learning_rate': 4.014900127130513e-07, 'epoch': 10.64} + 67%|██████▋ | 247100/371472 [9:06:31<11:07:03, 3.11it/s] 67%|██████▋ | 247101/371472 [9:06:31<11:01:56, 3.13it/s] 67%|██████▋ | 247102/371472 [9:06:32<11:58:09, 2.89it/s] 67%|██████▋ | 247103/371472 [9:06:32<11:29:38, 3.01it/s] 67%|██████▋ | 247104/371472 [9:06:33<11:43:52, 2.94it/s] 67%|██████▋ | 247105/371472 [9:06:33<11:04:11, 3.12it/s] 67%|██████▋ | 247106/371472 [9:06:33<11:12:39, 3.08it/s] 67%|██████▋ | 247107/371472 [9:06:33<10:31:16, 3.28it/s] 67%|██████▋ | 247108/371472 [9:06:34<10:17:17, 3.36it/s] 67%|██████▋ | 247109/371472 [9:06:34<10:35:19, 3.26it/s] 67%|██████▋ | 247110/371472 [9:06:34<10:32:04, 3.28it/s] 67%|██████▋ | 247111/371472 [9:06:35<10:04:41, 3.43it/s] 67%|██████▋ | 247112/371472 [9:06:35<10:59:14, 3.14it/s] 67%|██████▋ | 247113/371472 [9:06:35<11:54:52, 2.90it/s] 67%|██████▋ | 247114/371472 [9:06:36<11:19:43, 3.05it/s] 67%|██████▋ | 247115/371472 [9:06:36<10:41:21, 3.23it/s] 67%|██████▋ | 247116/371472 [9:06:36<10:18:27, 3.35it/s] 67%|██████▋ | 247117/371472 [9:06:36<10:09:13, 3.40it/s] 67%|██████▋ | 247118/371472 [9:06:37<10:03:37, 3.43it/s] 67%|██████▋ | 247119/371472 [9:06:37<10:20:21, 3.34it/s] 67%|██████▋ | 247120/371472 [9:06:37<10:57:50, 3.15it/s] {'loss': 2.8053, 'learning_rate': 4.0144153073757243e-07, 'epoch': 10.64} + 67%|██████▋ | 247120/371472 [9:06:37<10:57:50, 3.15it/s] 67%|██████▋ | 247121/371472 [9:06:38<11:25:13, 3.02it/s] 67%|██████▋ | 247122/371472 [9:06:38<11:14:15, 3.07it/s] 67%|██████▋ | 247123/371472 [9:06:38<10:46:34, 3.21it/s] 67%|██████▋ | 247124/371472 [9:06:39<10:17:17, 3.36it/s] 67%|██████▋ | 247125/371472 [9:06:39<10:40:17, 3.24it/s] 67%|██████▋ | 247126/371472 [9:06:39<10:39:19, 3.24it/s] 67%|██████▋ | 247127/371472 [9:06:40<10:29:23, 3.29it/s] 67%|██████▋ | 247128/371472 [9:06:40<10:07:33, 3.41it/s] 67%|██████▋ | 247129/371472 [9:06:40<9:53:17, 3.49it/s] 67%|██████▋ | 247130/371472 [9:06:40<10:09:31, 3.40it/s] 67%|██████▋ | 247131/371472 [9:06:41<9:56:32, 3.47it/s] 67%|██████▋ | 247132/371472 [9:06:41<10:08:51, 3.40it/s] 67%|██████▋ | 247133/371472 [9:06:41<10:15:41, 3.37it/s] 67%|██████▋ | 247134/371472 [9:06:42<9:52:04, 3.50it/s] 67%|██████▋ | 247135/371472 [9:06:42<10:15:53, 3.36it/s] 67%|██████▋ | 247136/371472 [9:06:42<10:13:07, 3.38it/s] 67%|██████▋ | 247137/371472 [9:06:42<10:10:10, 3.40it/s] 67%|██████▋ | 247138/371472 [9:06:43<10:10:35, 3.39it/s] 67%|█��████▋ | 247139/371472 [9:06:43<10:05:35, 3.42it/s] 67%|██████▋ | 247140/371472 [9:06:43<10:00:41, 3.45it/s] {'loss': 2.9431, 'learning_rate': 4.013930487620935e-07, 'epoch': 10.64} + 67%|██████▋ | 247140/371472 [9:06:43<10:00:41, 3.45it/s] 67%|██████▋ | 247141/371472 [9:06:44<10:07:16, 3.41it/s] 67%|██████▋ | 247142/371472 [9:06:44<10:57:26, 3.15it/s] 67%|██████▋ | 247143/371472 [9:06:44<10:44:51, 3.21it/s] 67%|██████▋ | 247144/371472 [9:06:45<10:34:26, 3.27it/s] 67%|██████▋ | 247145/371472 [9:06:45<10:35:26, 3.26it/s] 67%|██████▋ | 247146/371472 [9:06:45<10:21:43, 3.33it/s] 67%|██████▋ | 247147/371472 [9:06:45<10:04:09, 3.43it/s] 67%|██████▋ | 247148/371472 [9:06:46<10:16:18, 3.36it/s] 67%|██████▋ | 247149/371472 [9:06:46<10:09:36, 3.40it/s] 67%|██████▋ | 247150/371472 [9:06:46<10:13:22, 3.38it/s] 67%|██████▋ | 247151/371472 [9:06:47<9:55:21, 3.48it/s] 67%|██████▋ | 247152/371472 [9:06:47<9:49:35, 3.51it/s] 67%|██████▋ | 247153/371472 [9:06:47<10:04:11, 3.43it/s] 67%|██████▋ | 247154/371472 [9:06:48<10:35:18, 3.26it/s] 67%|██████▋ | 247155/371472 [9:06:48<10:50:24, 3.19it/s] 67%|██████▋ | 247156/371472 [9:06:48<11:00:33, 3.14it/s] 67%|██████▋ | 247157/371472 [9:06:49<10:40:33, 3.23it/s] 67%|██████▋ | 247158/371472 [9:06:49<10:19:08, 3.35it/s] 67%|██████▋ | 247159/371472 [9:06:49<10:09:12, 3.40it/s] 67%|██████▋ | 247160/371472 [9:06:49<10:11:18, 3.39it/s] {'loss': 2.8047, 'learning_rate': 4.013445667866147e-07, 'epoch': 10.65} + 67%|██████▋ | 247160/371472 [9:06:49<10:11:18, 3.39it/s] 67%|██████▋ | 247161/371472 [9:06:50<10:09:55, 3.40it/s] 67%|██████▋ | 247162/371472 [9:06:50<10:05:51, 3.42it/s] 67%|██████▋ | 247163/371472 [9:06:50<10:01:11, 3.45it/s] 67%|██████▋ | 247164/371472 [9:06:51<10:06:40, 3.42it/s] 67%|██████▋ | 247165/371472 [9:06:51<9:55:09, 3.48it/s] 67%|██████▋ | 247166/371472 [9:06:51<9:53:42, 3.49it/s] 67%|██████▋ | 247167/371472 [9:06:51<9:58:13, 3.46it/s] 67%|██████▋ | 247168/371472 [9:06:52<10:06:30, 3.42it/s] 67%|██████▋ | 247169/371472 [9:06:52<10:36:17, 3.26it/s] 67%|██████▋ | 247170/371472 [9:06:52<10:33:42, 3.27it/s] 67%|██████▋ | 247171/371472 [9:06:53<10:49:38, 3.19it/s] 67%|██████▋ | 247172/371472 [9:06:53<10:31:38, 3.28it/s] 67%|██████▋ | 247173/371472 [9:06:53<10:19:22, 3.34it/s] 67%|██████▋ | 247174/371472 [9:06:54<10:18:34, 3.35it/s] 67%|██████▋ | 247175/371472 [9:06:54<11:20:20, 3.04it/s] 67%|██████▋ | 247176/371472 [9:06:54<11:08:55, 3.10it/s] 67%|██████▋ | 247177/371472 [9:06:55<10:56:45, 3.15it/s] 67%|██████▋ | 247178/371472 [9:06:55<10:29:58, 3.29it/s] 67%|██████▋ | 247179/371472 [9:06:55<12:08:37, 2.84it/s] 67%|██████▋ | 247180/371472 [9:06:56<11:27:22, 3.01it/s] {'loss': 2.6465, 'learning_rate': 4.0129608481113575e-07, 'epoch': 10.65} + 67%|██████▋ | 247180/371472 [9:06:56<11:27:22, 3.01it/s] 67%|██████▋ | 247181/371472 [9:06:56<11:09:11, 3.10it/s] 67%|██████▋ | 247182/371472 [9:06:56<10:47:40, 3.20it/s] 67%|██████▋ | 247183/371472 [9:06:56<10:33:21, 3.27it/s] 67%|██████▋ | 247184/371472 [9:06:57<10:15:44, 3.36it/s] 67%|██████▋ | 247185/371472 [9:06:57<10:11:36, 3.39it/s] 67%|██████▋ | 247186/371472 [9:06:57<10:25:52, 3.31it/s] 67%|██████▋ | 247187/371472 [9:06:58<10:26:29, 3.31it/s] 67%|██████▋ | 247188/371472 [9:06:58<10:20:16, 3.34it/s] 67%|██████▋ | 247189/371472 [9:06:58<10:20:24, 3.34it/s] 67%|██████▋ | 247190/371472 [9:06:59<10:48:14, 3.20it/s] 67%|██████▋ | 247191/371472 [9:06:59<10:21:19, 3.33it/s] 67%|██████▋ | 247192/371472 [9:06:59<10:21:05, 3.33it/s] 67%|██████▋ | 247193/371472 [9:06:59<10:20:13, 3.34it/s] 67%|██████▋ | 247194/371472 [9:07:00<10:27:02, 3.30it/s] 67%|██████▋ | 247195/371472 [9:07:00<11:00:55, 3.13it/s] 67%|██████▋ | 247196/371472 [9:07:00<10:31:04, 3.28it/s] 67%|██████▋ | 247197/371472 [9:07:01<10:20:02, 3.34it/s] 67%|██████▋ | 247198/371472 [9:07:01<10:28:45, 3.29it/s] 67%|██████▋ | 247199/371472 [9:07:01<10:31:30, 3.28it/s] 67%|██████▋ | 247200/371472 [9:07:02<10:21:36, 3.33it/s] {'loss': 2.6798, 'learning_rate': 4.012476028356569e-07, 'epoch': 10.65} + 67%|██████▋ | 247200/371472 [9:07:02<10:21:36, 3.33it/s] 67%|██████▋ | 247201/371472 [9:07:02<10:19:18, 3.34it/s] 67%|██████▋ | 247202/371472 [9:07:02<10:18:07, 3.35it/s] 67%|██████▋ | 247203/371472 [9:07:03<10:48:20, 3.19it/s] 67%|██████▋ | 247204/371472 [9:07:03<11:11:08, 3.09it/s] 67%|██████▋ | 247205/371472 [9:07:03<11:34:17, 2.98it/s] 67%|██████▋ | 247206/371472 [9:07:04<11:17:52, 3.06it/s] 67%|██████▋ | 247207/371472 [9:07:04<10:37:28, 3.25it/s] 67%|██████▋ | 247208/371472 [9:07:04<10:22:41, 3.33it/s] 67%|██████▋ | 247209/371472 [9:07:04<10:15:51, 3.36it/s] 67%|██████▋ | 247210/371472 [9:07:05<10:08:39, 3.40it/s] 67%|██████▋ | 247211/371472 [9:07:05<10:40:29, 3.23it/s] 67%|██████▋ | 247212/371472 [9:07:05<10:13:32, 3.38it/s] 67%|██████▋ | 247213/371472 [9:07:06<10:11:23, 3.39it/s] 67%|██████▋ | 247214/371472 [9:07:06<10:31:55, 3.28it/s] 67%|██████▋ | 247215/371472 [9:07:06<10:27:33, 3.30it/s] 67%|██████▋ | 247216/371472 [9:07:06<10:28:24, 3.30it/s] 67%|██████▋ | 247217/371472 [9:07:07<10:46:54, 3.20it/s] 67%|██████▋ | 247218/371472 [9:07:07<10:31:51, 3.28it/s] 67%|██████▋ | 247219/371472 [9:07:07<10:26:57, 3.30it/s] 67%|██████▋ | 247220/371472 [9:07:08<10:28:02, 3.30it/s] {'loss': 2.6021, 'learning_rate': 4.0119912086017795e-07, 'epoch': 10.65} + 67%|██████▋ | 247220/371472 [9:07:08<10:28:02, 3.30it/s] 67%|██████▋ | 247221/371472 [9:07:08<10:22:45, 3.33it/s] 67%|██████▋ | 247222/371472 [9:07:08<10:13:55, 3.37it/s] 67%|██████▋ | 247223/371472 [9:07:09<10:31:17, 3.28it/s] 67%|██████▋ | 247224/371472 [9:07:09<10:15:46, 3.36it/s] 67%|██████▋ | 247225/371472 [9:07:09<10:04:56, 3.42it/s] 67%|██████▋ | 247226/371472 [9:07:09<9:59:12, 3.46it/s] 67%|██████▋ | 247227/371472 [9:07:10<10:22:19, 3.33it/s] 67%|██████▋ | 247228/371472 [9:07:10<10:22:51, 3.32it/s] 67%|██████▋ | 247229/371472 [9:07:10<11:01:45, 3.13it/s] 67%|██████▋ | 247230/371472 [9:07:11<10:45:43, 3.21it/s] 67%|██████▋ | 247231/371472 [9:07:11<10:33:56, 3.27it/s] 67%|██████▋ | 247232/371472 [9:07:11<11:01:05, 3.13it/s] 67%|██████▋ | 247233/371472 [9:07:12<10:25:03, 3.31it/s] 67%|██████▋ | 247234/371472 [9:07:12<10:27:27, 3.30it/s] 67%|██████▋ | 247235/371472 [9:07:12<10:34:28, 3.26it/s] 67%|██████▋ | 247236/371472 [9:07:13<10:21:09, 3.33it/s] 67%|██████▋ | 247237/371472 [9:07:13<10:03:44, 3.43it/s] 67%|██████▋ | 247238/371472 [9:07:13<11:05:52, 3.11it/s] 67%|██████▋ | 247239/371472 [9:07:14<10:53:42, 3.17it/s] 67%|██████▋ | 247240/371472 [9:07:14<10:41:37, 3.23it/s] {'loss': 2.795, 'learning_rate': 4.0115063888469907e-07, 'epoch': 10.65} + 67%|██████▋ | 247240/371472 [9:07:14<10:41:37, 3.23it/s] 67%|██████▋ | 247241/371472 [9:07:14<10:43:32, 3.22it/s] 67%|██████▋ | 247242/371472 [9:07:14<10:34:49, 3.26it/s] 67%|██████▋ | 247243/371472 [9:07:15<10:22:50, 3.32it/s] 67%|██████▋ | 247244/371472 [9:07:15<10:25:51, 3.31it/s] 67%|██████▋ | 247245/371472 [9:07:15<10:20:25, 3.34it/s] 67%|██████▋ | 247246/371472 [9:07:16<10:14:31, 3.37it/s] 67%|██████▋ | 247247/371472 [9:07:16<9:44:38, 3.54it/s] 67%|██████▋ | 247248/371472 [9:07:16<9:40:30, 3.57it/s] 67%|██████▋ | 247249/371472 [9:07:16<9:46:36, 3.53it/s] 67%|██████▋ | 247250/371472 [9:07:17<9:50:15, 3.51it/s] 67%|██████▋ | 247251/371472 [9:07:17<9:52:19, 3.50it/s] 67%|██████▋ | 247252/371472 [9:07:17<10:39:26, 3.24it/s] 67%|██████▋ | 247253/371472 [9:07:18<11:03:34, 3.12it/s] 67%|██████▋ | 247254/371472 [9:07:18<10:38:26, 3.24it/s] 67%|██████▋ | 247255/371472 [9:07:18<10:27:25, 3.30it/s] 67%|██████▋ | 247256/371472 [9:07:19<10:12:50, 3.38it/s] 67%|██████▋ | 247257/371472 [9:07:19<10:04:11, 3.43it/s] 67%|██████▋ | 247258/371472 [9:07:19<10:47:14, 3.20it/s] 67%|██████▋ | 247259/371472 [9:07:19<10:22:52, 3.32it/s] 67%|██████▋ | 247260/371472 [9:07:20<10:30:36, 3.28it/s] {'loss': 2.6861, 'learning_rate': 4.0110215690922014e-07, 'epoch': 10.65} + 67%|██████▋ | 247260/371472 [9:07:20<10:30:36, 3.28it/s] 67%|██████▋ | 247261/371472 [9:07:20<10:20:05, 3.34it/s] 67%|██████▋ | 247262/371472 [9:07:20<10:15:23, 3.36it/s] 67%|██████▋ | 247263/371472 [9:07:21<9:58:34, 3.46it/s] 67%|██████▋ | 247264/371472 [9:07:21<9:56:36, 3.47it/s] 67%|██████▋ | 247265/371472 [9:07:21<10:05:22, 3.42it/s] 67%|██████▋ | 247266/371472 [9:07:22<9:59:43, 3.45it/s] 67%|██████▋ | 247267/371472 [9:07:22<9:44:10, 3.54it/s] 67%|██████▋ | 247268/371472 [9:07:22<9:43:29, 3.55it/s] 67%|██████▋ | 247269/371472 [9:07:22<10:22:21, 3.33it/s] 67%|██████▋ | 247270/371472 [9:07:23<10:13:35, 3.37it/s] 67%|██████▋ | 247271/371472 [9:07:23<10:43:36, 3.22it/s] 67%|██████▋ | 247272/371472 [9:07:23<11:13:24, 3.07it/s] 67%|██████▋ | 247273/371472 [9:07:24<11:51:16, 2.91it/s] 67%|██████▋ | 247274/371472 [9:07:24<12:25:18, 2.78it/s] 67%|██████▋ | 247275/371472 [9:07:24<11:45:55, 2.93it/s] 67%|██████▋ | 247276/371472 [9:07:25<11:44:51, 2.94it/s] 67%|██████▋ | 247277/371472 [9:07:25<11:07:57, 3.10it/s] 67%|██████▋ | 247278/371472 [9:07:25<10:49:25, 3.19it/s] 67%|██████▋ | 247279/371472 [9:07:26<11:56:40, 2.89it/s] 67%|██████▋ | 247280/371472 [9:07:26<11:17:33, 3.05it/s] {'loss': 2.6823, 'learning_rate': 4.010536749337413e-07, 'epoch': 10.65} + 67%|██████▋ | 247280/371472 [9:07:26<11:17:33, 3.05it/s] 67%|██████▋ | 247281/371472 [9:07:26<11:26:18, 3.02it/s] 67%|██████▋ | 247282/371472 [9:07:27<10:58:46, 3.14it/s] 67%|██████▋ | 247283/371472 [9:07:27<10:34:10, 3.26it/s] 67%|██████▋ | 247284/371472 [9:07:27<10:34:07, 3.26it/s] 67%|██████▋ | 247285/371472 [9:07:28<10:58:44, 3.14it/s] 67%|██████▋ | 247286/371472 [9:07:28<10:47:38, 3.20it/s] 67%|██████▋ | 247287/371472 [9:07:28<10:28:45, 3.29it/s] 67%|██████▋ | 247288/371472 [9:07:28<10:05:56, 3.42it/s] 67%|██████▋ | 247289/371472 [9:07:29<10:36:18, 3.25it/s] 67%|██████▋ | 247290/371472 [9:07:29<10:46:35, 3.20it/s] 67%|██████▋ | 247291/371472 [9:07:29<10:51:25, 3.18it/s] 67%|██████▋ | 247292/371472 [9:07:30<10:38:38, 3.24it/s] 67%|██████▋ | 247293/371472 [9:07:30<11:24:40, 3.02it/s] 67%|██████▋ | 247294/371472 [9:07:30<11:02:02, 3.13it/s] 67%|██████▋ | 247295/371472 [9:07:31<10:56:08, 3.15it/s] 67%|██████▋ | 247296/371472 [9:07:31<10:25:30, 3.31it/s] 67%|██████▋ | 247297/371472 [9:07:31<10:25:59, 3.31it/s] 67%|██████▋ | 247298/371472 [9:07:32<10:50:07, 3.18it/s] 67%|██████▋ | 247299/371472 [9:07:32<11:03:02, 3.12it/s] 67%|██████▋ | 247300/371472 [9:07:32<11:06:19, 3.11it/s] {'loss': 2.7419, 'learning_rate': 4.010051929582624e-07, 'epoch': 10.65} + 67%|██████▋ | 247300/371472 [9:07:32<11:06:19, 3.11it/s] 67%|██████▋ | 247301/371472 [9:07:33<11:02:24, 3.12it/s] 67%|██████▋ | 247302/371472 [9:07:33<10:59:49, 3.14it/s] 67%|██████▋ | 247303/371472 [9:07:33<10:39:30, 3.24it/s] 67%|██████▋ | 247304/371472 [9:07:34<10:24:10, 3.32it/s] 67%|██████▋ | 247305/371472 [9:07:34<11:02:13, 3.13it/s] 67%|██████▋ | 247306/371472 [9:07:34<10:40:34, 3.23it/s] 67%|██████▋ | 247307/371472 [9:07:34<10:21:03, 3.33it/s] 67%|██████▋ | 247308/371472 [9:07:35<10:36:49, 3.25it/s] 67%|██████▋ | 247309/371472 [9:07:35<10:06:59, 3.41it/s] 67%|██████▋ | 247310/371472 [9:07:35<10:03:16, 3.43it/s] 67%|██████▋ | 247311/371472 [9:07:36<9:57:42, 3.46it/s] 67%|██████▋ | 247312/371472 [9:07:36<10:23:55, 3.32it/s] 67%|██████▋ | 247313/371472 [9:07:36<10:07:09, 3.41it/s] 67%|██████▋ | 247314/371472 [9:07:37<9:59:09, 3.45it/s] 67%|██████▋ | 247315/371472 [9:07:37<10:14:28, 3.37it/s] 67%|██████▋ | 247316/371472 [9:07:37<9:50:28, 3.50it/s] 67%|██████▋ | 247317/371472 [9:07:37<10:08:28, 3.40it/s] 67%|██████▋ | 247318/371472 [9:07:38<10:09:45, 3.39it/s] 67%|██████▋ | 247319/371472 [9:07:38<10:40:38, 3.23it/s] 67%|██████▋ | 247320/371472 [9:07:38<10:19:49, 3.34it/s] {'loss': 2.5948, 'learning_rate': 4.009567109827835e-07, 'epoch': 10.65} + 67%|██████▋ | 247320/371472 [9:07:38<10:19:49, 3.34it/s] 67%|██████▋ | 247321/371472 [9:07:39<10:39:44, 3.23it/s] 67%|██████▋ | 247322/371472 [9:07:39<10:23:37, 3.32it/s] 67%|██████▋ | 247323/371472 [9:07:39<10:29:42, 3.29it/s] 67%|██████▋ | 247324/371472 [9:07:40<10:22:21, 3.32it/s] 67%|██████▋ | 247325/371472 [9:07:40<10:07:48, 3.40it/s] 67%|██████▋ | 247326/371472 [9:07:40<11:11:01, 3.08it/s] 67%|██████▋ | 247327/371472 [9:07:41<11:06:16, 3.11it/s] 67%|██████▋ | 247328/371472 [9:07:41<10:29:03, 3.29it/s] 67%|██████▋ | 247329/371472 [9:07:41<10:43:21, 3.22it/s] 67%|██████▋ | 247330/371472 [9:07:41<10:32:30, 3.27it/s] 67%|██████▋ | 247331/371472 [9:07:42<10:51:36, 3.18it/s] 67%|██████▋ | 247332/371472 [9:07:42<10:49:00, 3.19it/s] 67%|██████▋ | 247333/371472 [9:07:42<10:35:10, 3.26it/s] 67%|██████▋ | 247334/371472 [9:07:43<10:12:28, 3.38it/s] 67%|██████▋ | 247335/371472 [9:07:43<10:00:53, 3.44it/s] 67%|██████▋ | 247336/371472 [9:07:43<10:02:54, 3.43it/s] 67%|██████▋ | 247337/371472 [9:07:43<9:56:15, 3.47it/s] 67%|██████▋ | 247338/371472 [9:07:44<9:55:55, 3.47it/s] 67%|██████▋ | 247339/371472 [9:07:44<10:34:20, 3.26it/s] 67%|██████▋ | 247340/371472 [9:07:44<10:09:49, 3.39it/s] {'loss': 2.6084, 'learning_rate': 4.009082290073046e-07, 'epoch': 10.65} + 67%|██████▋ | 247340/371472 [9:07:44<10:09:49, 3.39it/s] 67%|██████▋ | 247341/371472 [9:07:45<10:27:53, 3.29it/s] 67%|██████▋ | 247342/371472 [9:07:45<10:17:45, 3.35it/s] 67%|██████▋ | 247343/371472 [9:07:45<10:06:26, 3.41it/s] 67%|██████▋ | 247344/371472 [9:07:46<9:58:14, 3.46it/s] 67%|██████▋ | 247345/371472 [9:07:46<9:49:04, 3.51it/s] 67%|██████▋ | 247346/371472 [9:07:46<9:48:50, 3.51it/s] 67%|██████▋ | 247347/371472 [9:07:46<9:48:31, 3.52it/s] 67%|██████▋ | 247348/371472 [9:07:47<9:46:28, 3.53it/s] 67%|██████▋ | 247349/371472 [9:07:47<9:52:58, 3.49it/s] 67%|██████▋ | 247350/371472 [9:07:47<9:58:57, 3.45it/s] 67%|██████▋ | 247351/371472 [9:07:48<10:14:53, 3.36it/s] 67%|██████▋ | 247352/371472 [9:07:48<10:01:19, 3.44it/s] 67%|██████▋ | 247353/371472 [9:07:48<10:16:10, 3.36it/s] 67%|██████▋ | 247354/371472 [9:07:48<10:03:51, 3.43it/s] 67%|██████▋ | 247355/371472 [9:07:49<9:51:53, 3.49it/s] 67%|██████▋ | 247356/371472 [9:07:49<9:53:27, 3.49it/s] 67%|██████▋ | 247357/371472 [9:07:49<9:54:49, 3.48it/s] 67%|██████▋ | 247358/371472 [9:07:50<10:10:30, 3.39it/s] 67%|██████▋ | 247359/371472 [9:07:50<9:52:58, 3.49it/s] 67%|██████▋ | 247360/371472 [9:07:50<9:59:47, 3.45it/s] {'loss': 2.8237, 'learning_rate': 4.0085974703182577e-07, 'epoch': 10.65} + 67%|██████▋ | 247360/371472 [9:07:50<9:59:47, 3.45it/s] 67%|██████▋ | 247361/371472 [9:07:50<10:03:39, 3.43it/s] 67%|██████▋ | 247362/371472 [9:07:51<10:18:08, 3.35it/s] 67%|██████▋ | 247363/371472 [9:07:51<10:04:18, 3.42it/s] 67%|██████▋ | 247364/371472 [9:07:51<10:24:03, 3.31it/s] 67%|██████▋ | 247365/371472 [9:07:52<10:20:54, 3.33it/s] 67%|██████▋ | 247366/371472 [9:07:52<10:06:27, 3.41it/s] 67%|██████▋ | 247367/371472 [9:07:52<10:17:37, 3.35it/s] 67%|██████▋ | 247368/371472 [9:07:53<10:14:32, 3.37it/s] 67%|██████▋ | 247369/371472 [9:07:53<10:05:19, 3.42it/s] 67%|██████▋ | 247370/371472 [9:07:53<9:57:11, 3.46it/s] 67%|██████▋ | 247371/371472 [9:07:53<10:16:41, 3.35it/s] 67%|██████▋ | 247372/371472 [9:07:54<10:34:10, 3.26it/s] 67%|██████▋ | 247373/371472 [9:07:54<10:27:26, 3.30it/s] 67%|██████▋ | 247374/371472 [9:07:54<10:25:15, 3.31it/s] 67%|██████▋ | 247375/371472 [9:07:55<10:28:04, 3.29it/s] 67%|██████▋ | 247376/371472 [9:07:55<10:24:34, 3.31it/s] 67%|██████▋ | 247377/371472 [9:07:55<10:18:48, 3.34it/s] 67%|██████▋ | 247378/371472 [9:07:56<10:08:22, 3.40it/s] 67%|██████▋ | 247379/371472 [9:07:56<10:23:56, 3.31it/s] 67%|██████▋ | 247380/371472 [9:07:56<10:20:57, 3.33it/s] {'loss': 2.7328, 'learning_rate': 4.008112650563468e-07, 'epoch': 10.66} + 67%|██████▋ | 247380/371472 [9:07:56<10:20:57, 3.33it/s] 67%|██████▋ | 247381/371472 [9:07:56<10:07:32, 3.40it/s] 67%|██████▋ | 247382/371472 [9:07:57<10:05:38, 3.41it/s] 67%|██████▋ | 247383/371472 [9:07:57<10:04:58, 3.42it/s] 67%|██████▋ | 247384/371472 [9:07:57<10:10:28, 3.39it/s] 67%|██████▋ | 247385/371472 [9:07:58<10:16:53, 3.35it/s] 67%|██████▋ | 247386/371472 [9:07:58<10:44:06, 3.21it/s] 67%|██████▋ | 247387/371472 [9:07:58<10:44:19, 3.21it/s] 67%|██████▋ | 247388/371472 [9:07:59<10:37:34, 3.24it/s] 67%|██████▋ | 247389/371472 [9:07:59<10:14:25, 3.37it/s] 67%|██████▋ | 247390/371472 [9:07:59<10:26:00, 3.30it/s] 67%|██████▋ | 247391/371472 [9:07:59<10:05:33, 3.42it/s] 67%|██████▋ | 247392/371472 [9:08:00<10:00:45, 3.44it/s] 67%|██████▋ | 247393/371472 [9:08:00<9:58:13, 3.46it/s] 67%|██████▋ | 247394/371472 [9:08:00<10:22:43, 3.32it/s] 67%|██████▋ | 247395/371472 [9:08:01<10:04:48, 3.42it/s] 67%|██████▋ | 247396/371472 [9:08:01<10:38:44, 3.24it/s] 67%|██████▋ | 247397/371472 [9:08:01<11:02:54, 3.12it/s] 67%|██████▋ | 247398/371472 [9:08:02<10:52:43, 3.17it/s] 67%|██████▋ | 247399/371472 [9:08:02<10:27:14, 3.30it/s] 67%|██████▋ | 247400/371472 [9:08:02<11:16:03, 3.06it/s] {'loss': 2.7196, 'learning_rate': 4.0076278308086796e-07, 'epoch': 10.66} + 67%|██████▋ | 247400/371472 [9:08:02<11:16:03, 3.06it/s] 67%|██████▋ | 247401/371472 [9:08:03<10:52:09, 3.17it/s] 67%|██████▋ | 247402/371472 [9:08:03<10:47:48, 3.19it/s] 67%|██████▋ | 247403/371472 [9:08:03<11:05:39, 3.11it/s] 67%|██████▋ | 247404/371472 [9:08:03<10:45:23, 3.20it/s] 67%|██████▋ | 247405/371472 [9:08:04<10:29:18, 3.29it/s] 67%|██████▋ | 247406/371472 [9:08:04<11:04:07, 3.11it/s] 67%|██████▋ | 247407/371472 [9:08:04<11:24:24, 3.02it/s] 67%|██████▋ | 247408/371472 [9:08:05<10:41:26, 3.22it/s] 67%|██████▋ | 247409/371472 [9:08:05<10:15:10, 3.36it/s] 67%|██████▋ | 247410/371472 [9:08:05<10:26:55, 3.30it/s] 67%|██████▋ | 247411/371472 [9:08:06<10:35:15, 3.25it/s] 67%|██████▋ | 247412/371472 [9:08:06<10:14:51, 3.36it/s] 67%|██████▋ | 247413/371472 [9:08:06<10:01:04, 3.44it/s] 67%|██████▋ | 247414/371472 [9:08:07<10:51:57, 3.17it/s] 67%|██████▋ | 247415/371472 [9:08:07<11:25:56, 3.01it/s] 67%|██████▋ | 247416/371472 [9:08:07<11:00:56, 3.13it/s] 67%|██████▋ | 247417/371472 [9:08:08<11:19:52, 3.04it/s] 67%|██████▋ | 247418/371472 [9:08:08<10:42:41, 3.22it/s] 67%|██████▋ | 247419/371472 [9:08:08<10:18:41, 3.34it/s] 67%|██████▋ | 247420/371472 [9:08:08<10:06:59, 3.41it/s] {'loss': 2.7776, 'learning_rate': 4.00714301105389e-07, 'epoch': 10.66} + 67%|██████▋ | 247420/371472 [9:08:08<10:06:59, 3.41it/s] 67%|██████▋ | 247421/371472 [9:08:09<11:08:16, 3.09it/s] 67%|██████▋ | 247422/371472 [9:08:09<10:36:00, 3.25it/s] 67%|██████▋ | 247423/371472 [9:08:09<10:28:20, 3.29it/s] 67%|██████▋ | 247424/371472 [9:08:10<10:21:40, 3.33it/s] 67%|██████▋ | 247425/371472 [9:08:10<10:13:46, 3.37it/s] 67%|██████▋ | 247426/371472 [9:08:10<10:39:38, 3.23it/s] 67%|██████▋ | 247427/371472 [9:08:11<10:25:16, 3.31it/s] 67%|██████▋ | 247428/371472 [9:08:11<10:39:44, 3.23it/s] 67%|██████▋ | 247429/371472 [9:08:11<10:27:54, 3.29it/s] 67%|██████▋ | 247430/371472 [9:08:12<10:55:04, 3.16it/s] 67%|██████▋ | 247431/371472 [9:08:12<10:35:42, 3.25it/s] 67%|██████▋ | 247432/371472 [9:08:12<10:53:12, 3.16it/s] 67%|██████▋ | 247433/371472 [9:08:12<10:58:30, 3.14it/s] 67%|██████▋ | 247434/371472 [9:08:13<11:48:37, 2.92it/s] 67%|██████▋ | 247435/371472 [9:08:13<11:12:25, 3.07it/s] 67%|██████▋ | 247436/371472 [9:08:13<11:05:27, 3.11it/s] 67%|██████▋ | 247437/371472 [9:08:14<10:45:44, 3.20it/s] 67%|██████▋ | 247438/371472 [9:08:14<11:43:24, 2.94it/s] 67%|██████▋ | 247439/371472 [9:08:15<11:54:16, 2.89it/s] 67%|██████▋ | 247440/371472 [9:08:15<11:20:30, 3.04it/s] {'loss': 2.5936, 'learning_rate': 4.0066581912991016e-07, 'epoch': 10.66} + 67%|██████▋ | 247440/371472 [9:08:15<11:20:30, 3.04it/s] 67%|██████▋ | 247441/371472 [9:08:15<10:55:53, 3.15it/s] 67%|██████▋ | 247442/371472 [9:08:15<10:30:32, 3.28it/s] 67%|██████▋ | 247443/371472 [9:08:16<11:34:04, 2.98it/s] 67%|██████▋ | 247444/371472 [9:08:16<11:09:23, 3.09it/s] 67%|██████▋ | 247445/371472 [9:08:16<10:49:03, 3.18it/s] 67%|██████▋ | 247446/371472 [9:08:17<10:36:05, 3.25it/s] 67%|██████▋ | 247447/371472 [9:08:17<10:31:18, 3.27it/s] 67%|██████▋ | 247448/371472 [9:08:17<10:19:15, 3.34it/s] 67%|██████▋ | 247449/371472 [9:08:18<10:14:29, 3.36it/s] 67%|██████▋ | 247450/371472 [9:08:18<10:21:50, 3.32it/s] 67%|██████▋ | 247451/371472 [9:08:18<10:37:07, 3.24it/s] 67%|██████▋ | 247452/371472 [9:08:18<10:11:48, 3.38it/s] 67%|██████▋ | 247453/371472 [9:08:19<9:51:27, 3.49it/s] 67%|██████▋ | 247454/371472 [9:08:19<9:47:08, 3.52it/s] 67%|██████▋ | 247455/371472 [9:08:19<10:02:00, 3.43it/s] 67%|██████▋ | 247456/371472 [9:08:20<9:49:22, 3.51it/s] 67%|██████▋ | 247457/371472 [9:08:20<9:56:59, 3.46it/s] 67%|██████▋ | 247458/371472 [9:08:20<9:53:19, 3.48it/s] 67%|██████▋ | 247459/371472 [9:08:20<9:47:56, 3.52it/s] 67%|██████▋ | 247460/371472 [9:08:21<9:40:36, 3.56it/s] {'loss': 2.7986, 'learning_rate': 4.0061733715443123e-07, 'epoch': 10.66} + 67%|██████▋ | 247460/371472 [9:08:21<9:40:36, 3.56it/s] 67%|██████▋ | 247461/371472 [9:08:21<9:53:04, 3.48it/s] 67%|██████▋ | 247462/371472 [9:08:21<9:46:58, 3.52it/s] 67%|██████▋ | 247463/371472 [9:08:22<9:48:04, 3.51it/s] 67%|██████▋ | 247464/371472 [9:08:22<9:43:02, 3.54it/s] 67%|██████▋ | 247465/371472 [9:08:22<9:38:06, 3.58it/s] 67%|██████▋ | 247466/371472 [9:08:22<9:48:19, 3.51it/s] 67%|██████▋ | 247467/371472 [9:08:23<9:52:54, 3.49it/s] 67%|██████▋ | 247468/371472 [9:08:23<9:53:44, 3.48it/s] 67%|██████▋ | 247469/371472 [9:08:23<10:07:51, 3.40it/s] 67%|██████▋ | 247470/371472 [9:08:24<9:52:44, 3.49it/s] 67%|██████▋ | 247471/371472 [9:08:24<10:02:53, 3.43it/s] 67%|██████▋ | 247472/371472 [9:08:24<9:56:29, 3.46it/s] 67%|██████▋ | 247473/371472 [9:08:24<10:11:16, 3.38it/s] 67%|██████▋ | 247474/371472 [9:08:25<10:02:29, 3.43it/s] 67%|██████▋ | 247475/371472 [9:08:25<10:13:27, 3.37it/s] 67%|██████▋ | 247476/371472 [9:08:25<9:59:41, 3.45it/s] 67%|██████▋ | 247477/371472 [9:08:26<10:00:38, 3.44it/s] 67%|██████▋ | 247478/371472 [9:08:26<10:19:32, 3.34it/s] 67%|██████▋ | 247479/371472 [9:08:26<11:10:36, 3.08it/s] 67%|██████▋ | 247480/371472 [9:08:27<11:14:23, 3.06it/s] {'loss': 2.5998, 'learning_rate': 4.005688551789523e-07, 'epoch': 10.66} + 67%|██████▋ | 247480/371472 [9:08:27<11:14:23, 3.06it/s] 67%|██████▋ | 247481/371472 [9:08:27<10:43:05, 3.21it/s] 67%|██████▋ | 247482/371472 [9:08:27<10:55:26, 3.15it/s] 67%|██████▋ | 247483/371472 [9:08:28<10:52:32, 3.17it/s] 67%|██████▋ | 247484/371472 [9:08:28<10:35:14, 3.25it/s] 67%|██████▋ | 247485/371472 [9:08:28<10:46:34, 3.20it/s] 67%|██████▋ | 247486/371472 [9:08:29<10:53:21, 3.16it/s] 67%|██████▋ | 247487/371472 [9:08:29<10:50:44, 3.18it/s] 67%|██████▋ | 247488/371472 [9:08:29<10:50:06, 3.18it/s] 67%|██████▋ | 247489/371472 [9:08:29<10:35:10, 3.25it/s] 67%|██████▋ | 247490/371472 [9:08:30<10:22:22, 3.32it/s] 67%|██████▋ | 247491/371472 [9:08:30<10:22:43, 3.32it/s] 67%|██████▋ | 247492/371472 [9:08:30<10:46:13, 3.20it/s] 67%|██████▋ | 247493/371472 [9:08:31<10:29:59, 3.28it/s] 67%|██████▋ | 247494/371472 [9:08:31<10:42:58, 3.21it/s] 67%|██████▋ | 247495/371472 [9:08:31<10:15:09, 3.36it/s] 67%|██████▋ | 247496/371472 [9:08:32<10:02:30, 3.43it/s] 67%|██████▋ | 247497/371472 [9:08:32<10:05:32, 3.41it/s] 67%|██████▋ | 247498/371472 [9:08:32<9:56:46, 3.46it/s] 67%|██████▋ | 247499/371472 [9:08:32<9:40:49, 3.56it/s] 67%|██████▋ | 247500/371472 [9:08:33<9:53:43, 3.48it/s] {'loss': 2.7327, 'learning_rate': 4.005203732034734e-07, 'epoch': 10.66} + 67%|██████▋ | 247500/371472 [9:08:33<9:53:43, 3.48it/s] 67%|██████▋ | 247501/371472 [9:08:33<10:17:05, 3.35it/s] 67%|██████▋ | 247502/371472 [9:08:33<10:26:07, 3.30it/s] 67%|██████▋ | 247503/371472 [9:08:34<10:30:53, 3.27it/s] 67%|██████▋ | 247504/371472 [9:08:34<10:43:24, 3.21it/s] 67%|██████▋ | 247505/371472 [9:08:34<10:37:51, 3.24it/s] 67%|██████▋ | 247506/371472 [9:08:35<10:28:10, 3.29it/s] 67%|██████▋ | 247507/371472 [9:08:35<10:12:25, 3.37it/s] 67%|██████▋ | 247508/371472 [9:08:35<10:10:19, 3.39it/s] 67%|██████▋ | 247509/371472 [9:08:35<10:04:30, 3.42it/s] 67%|██████▋ | 247510/371472 [9:08:36<9:57:16, 3.46it/s] 67%|██████▋ | 247511/371472 [9:08:36<10:03:30, 3.42it/s] 67%|██████▋ | 247512/371472 [9:08:36<10:21:26, 3.32it/s] 67%|██████▋ | 247513/371472 [9:08:37<10:20:17, 3.33it/s] 67%|██████▋ | 247514/371472 [9:08:37<10:18:13, 3.34it/s] 67%|██████▋ | 247515/371472 [9:08:37<10:26:27, 3.30it/s] 67%|██████▋ | 247516/371472 [9:08:38<10:25:40, 3.30it/s] 67%|██████▋ | 247517/371472 [9:08:38<10:35:10, 3.25it/s] 67%|██████▋ | 247518/371472 [9:08:38<10:35:46, 3.25it/s] 67%|██████▋ | 247519/371472 [9:08:38<10:20:25, 3.33it/s] 67%|██████▋ | 247520/371472 [9:08:39<10:25:21, 3.30it/s] {'loss': 2.7414, 'learning_rate': 4.004718912279945e-07, 'epoch': 10.66} + 67%|██████▋ | 247520/371472 [9:08:39<10:25:21, 3.30it/s] 67%|██████▋ | 247521/371472 [9:08:39<10:40:13, 3.23it/s] 67%|██████▋ | 247522/371472 [9:08:39<10:35:18, 3.25it/s] 67%|██████▋ | 247523/371472 [9:08:40<10:22:45, 3.32it/s] 67%|██████▋ | 247524/371472 [9:08:40<11:13:36, 3.07it/s] 67%|██████▋ | 247525/371472 [9:08:40<11:00:16, 3.13it/s] 67%|██████▋ | 247526/371472 [9:08:41<11:36:21, 2.97it/s] 67%|██████▋ | 247527/371472 [9:08:41<11:09:36, 3.08it/s] 67%|██████▋ | 247528/371472 [9:08:41<10:59:08, 3.13it/s] 67%|██████▋ | 247529/371472 [9:08:42<11:13:10, 3.07it/s] 67%|██████▋ | 247530/371472 [9:08:42<10:55:21, 3.15it/s] 67%|██████▋ | 247531/371472 [9:08:42<10:55:01, 3.15it/s] 67%|██████▋ | 247532/371472 [9:08:43<10:35:46, 3.25it/s] 67%|██████▋ | 247533/371472 [9:08:43<10:23:21, 3.31it/s] 67%|██████▋ | 247534/371472 [9:08:43<10:53:35, 3.16it/s] 67%|██████▋ | 247535/371472 [9:08:43<10:49:39, 3.18it/s] 67%|██████▋ | 247536/371472 [9:08:44<10:34:24, 3.26it/s] 67%|██████▋ | 247537/371472 [9:08:44<10:23:08, 3.31it/s] 67%|██████▋ | 247538/371472 [9:08:45<12:27:33, 2.76it/s] 67%|██████▋ | 247539/371472 [9:08:45<11:31:36, 2.99it/s] 67%|██████▋ | 247540/371472 [9:08:45<11:00:54, 3.13it/s] {'loss': 2.661, 'learning_rate': 4.004234092525157e-07, 'epoch': 10.66} + 67%|██████▋ | 247540/371472 [9:08:45<11:00:54, 3.13it/s] 67%|██████▋ | 247541/371472 [9:08:45<10:45:50, 3.20it/s] 67%|██████▋ | 247542/371472 [9:08:46<10:27:19, 3.29it/s] 67%|██████▋ | 247543/371472 [9:08:46<10:19:47, 3.33it/s] 67%|██████▋ | 247544/371472 [9:08:46<10:23:52, 3.31it/s] 67%|██████▋ | 247545/371472 [9:08:47<10:18:59, 3.34it/s] 67%|██████▋ | 247546/371472 [9:08:47<11:31:09, 2.99it/s] 67%|██████▋ | 247547/371472 [9:08:47<11:16:28, 3.05it/s] 67%|██████▋ | 247548/371472 [9:08:48<11:01:54, 3.12it/s] 67%|██████▋ | 247549/371472 [9:08:48<10:50:10, 3.18it/s] 67%|██████▋ | 247550/371472 [9:08:48<10:28:14, 3.29it/s] 67%|██████▋ | 247551/371472 [9:08:49<11:13:44, 3.07it/s] 67%|██████▋ | 247552/371472 [9:08:49<11:47:51, 2.92it/s] 67%|██████▋ | 247553/371472 [9:08:49<11:16:31, 3.05it/s] 67%|██████▋ | 247554/371472 [9:08:50<10:59:44, 3.13it/s] 67%|██████▋ | 247555/371472 [9:08:50<11:07:12, 3.10it/s] 67%|██████▋ | 247556/371472 [9:08:50<10:38:53, 3.23it/s] 67%|██████▋ | 247557/371472 [9:08:50<10:17:32, 3.34it/s] 67%|██████▋ | 247558/371472 [9:08:51<10:51:50, 3.17it/s] 67%|██████▋ | 247559/371472 [9:08:51<10:45:12, 3.20it/s] 67%|██████▋ | 247560/371472 [9:08:51<10:17:36, 3.34it/s] {'loss': 2.7196, 'learning_rate': 4.0037492727703675e-07, 'epoch': 10.66} + 67%|██████▋ | 247560/371472 [9:08:51<10:17:36, 3.34it/s] 67%|██████▋ | 247561/371472 [9:08:52<11:47:12, 2.92it/s] 67%|██████▋ | 247562/371472 [9:08:52<11:37:49, 2.96it/s] 67%|██████▋ | 247563/371472 [9:08:52<11:22:18, 3.03it/s] 67%|██████▋ | 247564/371472 [9:08:53<10:59:52, 3.13it/s] 67%|██████▋ | 247565/371472 [9:08:53<10:42:19, 3.22it/s] 67%|██████▋ | 247566/371472 [9:08:53<10:26:45, 3.29it/s] 67%|██████▋ | 247567/371472 [9:08:54<10:15:06, 3.36it/s] 67%|██████▋ | 247568/371472 [9:08:54<10:01:27, 3.43it/s] 67%|██████▋ | 247569/371472 [9:08:54<10:01:46, 3.43it/s] 67%|██████▋ | 247570/371472 [9:08:54<10:01:56, 3.43it/s] 67%|██████▋ | 247571/371472 [9:08:55<9:57:19, 3.46it/s] 67%|██████▋ | 247572/371472 [9:08:55<10:03:53, 3.42it/s] 67%|██████▋ | 247573/371472 [9:08:55<10:03:57, 3.42it/s] 67%|██████▋ | 247574/371472 [9:08:56<9:51:08, 3.49it/s] 67%|██████▋ | 247575/371472 [9:08:56<9:41:38, 3.55it/s] 67%|██████▋ | 247576/371472 [9:08:56<9:41:41, 3.55it/s] 67%|██████▋ | 247577/371472 [9:08:56<9:32:06, 3.61it/s] 67%|██████▋ | 247578/371472 [9:08:57<9:34:09, 3.60it/s] 67%|██████▋ | 247579/371472 [9:08:57<10:14:39, 3.36it/s] 67%|██████▋ | 247580/371472 [9:08:57<10:30:31, 3.27it/s] {'loss': 2.8726, 'learning_rate': 4.0032644530155787e-07, 'epoch': 10.66} + 67%|██████▋ | 247580/371472 [9:08:57<10:30:31, 3.27it/s] 67%|██████▋ | 247581/371472 [9:08:58<10:42:19, 3.21it/s] 67%|██████▋ | 247582/371472 [9:08:58<10:22:49, 3.32it/s] 67%|██████▋ | 247583/371472 [9:08:58<10:19:35, 3.33it/s] 67%|██████▋ | 247584/371472 [9:08:59<11:56:50, 2.88it/s] 67%|██████▋ | 247585/371472 [9:08:59<11:22:48, 3.02it/s] 67%|██████▋ | 247586/371472 [9:08:59<11:22:18, 3.03it/s] 67%|██████▋ | 247587/371472 [9:09:00<11:23:26, 3.02it/s] 67%|██████▋ | 247588/371472 [9:09:00<10:59:48, 3.13it/s] 67%|██████▋ | 247589/371472 [9:09:00<10:42:35, 3.21it/s] 67%|██████▋ | 247590/371472 [9:09:01<11:03:13, 3.11it/s] 67%|██████▋ | 247591/371472 [9:09:01<10:38:37, 3.23it/s] 67%|██████▋ | 247592/371472 [9:09:01<10:49:01, 3.18it/s] 67%|██████▋ | 247593/371472 [9:09:02<10:36:17, 3.24it/s] 67%|██████▋ | 247594/371472 [9:09:02<10:28:45, 3.28it/s] 67%|██████▋ | 247595/371472 [9:09:02<11:17:18, 3.05it/s] 67%|██████▋ | 247596/371472 [9:09:03<11:06:31, 3.10it/s] 67%|██████▋ | 247597/371472 [9:09:03<10:31:13, 3.27it/s] 67%|██████▋ | 247598/371472 [9:09:03<10:24:01, 3.31it/s] 67%|██████▋ | 247599/371472 [9:09:03<10:15:15, 3.36it/s] 67%|██████▋ | 247600/371472 [9:09:04<10:56:34, 3.14it/s] {'loss': 2.7016, 'learning_rate': 4.0027796332607894e-07, 'epoch': 10.66} + 67%|██████▋ | 247600/371472 [9:09:04<10:56:34, 3.14it/s] 67%|██████▋ | 247601/371472 [9:09:04<10:22:52, 3.31it/s] 67%|██████▋ | 247602/371472 [9:09:04<10:30:09, 3.28it/s] 67%|██████▋ | 247603/371472 [9:09:05<10:15:00, 3.36it/s] 67%|██████▋ | 247604/371472 [9:09:05<10:06:05, 3.41it/s] 67%|██████▋ | 247605/371472 [9:09:05<10:02:16, 3.43it/s] 67%|██████▋ | 247606/371472 [9:09:05<9:56:49, 3.46it/s] 67%|██████▋ | 247607/371472 [9:09:06<10:50:58, 3.17it/s] 67%|██████▋ | 247608/371472 [9:09:06<11:20:13, 3.03it/s] 67%|██████▋ | 247609/371472 [9:09:06<10:53:48, 3.16it/s] 67%|██████▋ | 247610/371472 [9:09:07<10:47:27, 3.19it/s] 67%|██████▋ | 247611/371472 [9:09:07<11:09:37, 3.08it/s] 67%|██████▋ | 247612/371472 [9:09:07<10:50:55, 3.17it/s] 67%|██████▋ | 247613/371472 [9:09:08<10:32:09, 3.27it/s] 67%|██████▋ | 247614/371472 [9:09:08<10:13:30, 3.36it/s] 67%|██████▋ | 247615/371472 [9:09:08<10:04:35, 3.41it/s] 67%|██████▋ | 247616/371472 [9:09:09<10:02:46, 3.42it/s] 67%|██████▋ | 247617/371472 [9:09:09<10:06:15, 3.40it/s] 67%|████���█▋ | 247618/371472 [9:09:09<10:26:05, 3.30it/s] 67%|██████▋ | 247619/371472 [9:09:10<10:40:18, 3.22it/s] 67%|██████▋ | 247620/371472 [9:09:10<10:25:41, 3.30it/s] {'loss': 2.793, 'learning_rate': 4.002294813506001e-07, 'epoch': 10.67} + 67%|██████▋ | 247620/371472 [9:09:10<10:25:41, 3.30it/s] 67%|██████▋ | 247621/371472 [9:09:10<10:31:17, 3.27it/s] 67%|██████▋ | 247622/371472 [9:09:10<10:35:58, 3.25it/s] 67%|██████▋ | 247623/371472 [9:09:11<10:21:08, 3.32it/s] 67%|██████▋ | 247624/371472 [9:09:11<10:01:13, 3.43it/s] 67%|██████▋ | 247625/371472 [9:09:11<9:51:33, 3.49it/s] 67%|██████▋ | 247626/371472 [9:09:12<9:55:14, 3.47it/s] 67%|██████▋ | 247627/371472 [9:09:12<9:57:09, 3.46it/s] 67%|██████▋ | 247628/371472 [9:09:12<9:58:46, 3.45it/s] 67%|██████▋ | 247629/371472 [9:09:12<10:25:33, 3.30it/s] 67%|██████▋ | 247630/371472 [9:09:13<10:28:21, 3.28it/s] 67%|██████▋ | 247631/371472 [9:09:13<10:10:24, 3.38it/s] 67%|██████▋ | 247632/371472 [9:09:13<10:04:59, 3.41it/s] 67%|██████▋ | 247633/371472 [9:09:14<10:43:49, 3.21it/s] 67%|██████▋ | 247634/371472 [9:09:14<10:22:35, 3.32it/s] 67%|██████▋ | 247635/371472 [9:09:14<9:56:51, 3.46it/s] 67%|██████▋ | 247636/371472 [9:09:15<9:53:39, 3.48it/s] 67%|██████▋ | 247637/371472 [9:09:15<10:12:21, 3.37it/s] 67%|██████▋ | 247638/371472 [9:09:15<10:47:58, 3.19it/s] 67%|██████▋ | 247639/371472 [9:09:15<10:46:04, 3.19it/s] 67%|██████▋ | 247640/371472 [9:09:16<10:26:49, 3.29it/s] {'loss': 2.6924, 'learning_rate': 4.0018099937512114e-07, 'epoch': 10.67} + 67%|██████▋ | 247640/371472 [9:09:16<10:26:49, 3.29it/s] 67%|██████▋ | 247641/371472 [9:09:16<10:15:45, 3.35it/s] 67%|██████▋ | 247642/371472 [9:09:16<10:08:19, 3.39it/s] 67%|██████▋ | 247643/371472 [9:09:17<10:06:14, 3.40it/s] 67%|██████▋ | 247644/371472 [9:09:17<9:56:01, 3.46it/s] 67%|██████▋ | 247645/371472 [9:09:17<9:48:29, 3.51it/s] 67%|██████▋ | 247646/371472 [9:09:18<10:09:12, 3.39it/s] 67%|██████▋ | 247647/371472 [9:09:18<10:00:49, 3.43it/s] 67%|██████▋ | 247648/371472 [9:09:18<10:53:14, 3.16it/s] 67%|██████▋ | 247649/371472 [9:09:18<10:28:02, 3.29it/s] 67%|██████▋ | 247650/371472 [9:09:19<10:16:03, 3.35it/s] 67%|██████▋ | 247651/371472 [9:09:19<10:44:02, 3.20it/s] 67%|██████▋ | 247652/371472 [9:09:19<10:24:40, 3.30it/s] 67%|██████▋ | 247653/371472 [9:09:20<10:23:48, 3.31it/s] 67%|██████▋ | 247654/371472 [9:09:20<10:18:03, 3.34it/s] 67%|██████▋ | 247655/371472 [9:09:20<10:07:28, 3.40it/s] 67%|██████▋ | 247656/371472 [9:09:21<10:00:05, 3.44it/s] 67%|██████▋ | 247657/371472 [9:09:21<10:00:56, 3.43it/s] 67%|██████▋ | 247658/371472 [9:09:21<9:46:34, 3.52it/s] 67%|██████▋ | 247659/371472 [9:09:21<9:36:17, 3.58it/s] 67%|██████▋ | 247660/371472 [9:09:22<9:39:38, 3.56it/s] {'loss': 2.9092, 'learning_rate': 4.001325173996423e-07, 'epoch': 10.67} + 67%|██████▋ | 247660/371472 [9:09:22<9:39:38, 3.56it/s] 67%|██████▋ | 247661/371472 [9:09:22<9:40:03, 3.56it/s] 67%|██████▋ | 247662/371472 [9:09:22<9:43:35, 3.54it/s] 67%|██████▋ | 247663/371472 [9:09:22<9:44:47, 3.53it/s] 67%|██████▋ | 247664/371472 [9:09:23<9:40:06, 3.56it/s] 67%|██████▋ | 247665/371472 [9:09:23<9:36:47, 3.58it/s] 67%|██████▋ | 247666/371472 [9:09:23<9:45:01, 3.53it/s] 67%|██████▋ | 247667/371472 [9:09:24<9:58:21, 3.45it/s] 67%|██████▋ | 247668/371472 [9:09:24<10:26:01, 3.30it/s] 67%|██████▋ | 247669/371472 [9:09:24<10:10:00, 3.38it/s] 67%|██████▋ | 247670/371472 [9:09:25<9:58:35, 3.45it/s] 67%|██████▋ | 247671/371472 [9:09:25<9:48:54, 3.50it/s] 67%|██████▋ | 247672/371472 [9:09:25<9:41:43, 3.55it/s] 67%|██████▋ | 247673/371472 [9:09:25<9:49:50, 3.50it/s] 67%|██████▋ | 247674/371472 [9:09:26<10:26:11, 3.29it/s] 67%|██████▋ | 247675/371472 [9:09:26<10:27:58, 3.29it/s] 67%|██████▋ | 247676/371472 [9:09:26<10:24:02, 3.31it/s] 67%|██████▋ | 247677/371472 [9:09:27<10:54:04, 3.15it/s] 67%|██████��� | 247678/371472 [9:09:27<11:01:11, 3.12it/s] 67%|██████▋ | 247679/371472 [9:09:27<10:34:54, 3.25it/s] 67%|██████▋ | 247680/371472 [9:09:28<10:24:38, 3.30it/s] {'loss': 2.668, 'learning_rate': 4.000840354241634e-07, 'epoch': 10.67} + 67%|██████▋ | 247680/371472 [9:09:28<10:24:38, 3.30it/s] 67%|██████▋ | 247681/371472 [9:09:28<10:05:45, 3.41it/s] 67%|██████▋ | 247682/371472 [9:09:28<9:57:17, 3.45it/s] 67%|██████▋ | 247683/371472 [9:09:28<10:52:28, 3.16it/s] 67%|██████▋ | 247684/371472 [9:09:29<10:32:20, 3.26it/s] 67%|██████▋ | 247685/371472 [9:09:29<10:36:29, 3.24it/s] 67%|██████▋ | 247686/371472 [9:09:29<10:20:31, 3.32it/s] 67%|██████▋ | 247687/371472 [9:09:30<10:07:47, 3.39it/s] 67%|██████▋ | 247688/371472 [9:09:30<10:48:46, 3.18it/s] 67%|██████▋ | 247689/371472 [9:09:30<10:42:43, 3.21it/s] 67%|██████▋ | 247690/371472 [9:09:31<10:30:47, 3.27it/s] 67%|██████▋ | 247691/371472 [9:09:31<10:34:36, 3.25it/s] 67%|██████▋ | 247692/371472 [9:09:31<10:21:26, 3.32it/s] 67%|██████▋ | 247693/371472 [9:09:32<10:18:29, 3.34it/s] 67%|██████▋ | 247694/371472 [9:09:32<10:30:06, 3.27it/s] 67%|██████▋ | 247695/371472 [9:09:32<10:31:32, 3.27it/s] 67%|██████▋ | 247696/371472 [9:09:32<10:13:34, 3.36it/s] 67%|██████▋ | 247697/371472 [9:09:33<10:04:48, 3.41it/s] 67%|██████▋ | 247698/371472 [9:09:33<10:03:33, 3.42it/s] 67%|██████▋ | 247699/371472 [9:09:33<9:55:22, 3.46it/s] 67%|██████▋ | 247700/371472 [9:09:34<9:55:53, 3.46it/s] {'loss': 2.7045, 'learning_rate': 4.000355534486845e-07, 'epoch': 10.67} + 67%|██████▋ | 247700/371472 [9:09:34<9:55:53, 3.46it/s] 67%|██████▋ | 247701/371472 [9:09:34<9:55:57, 3.46it/s] 67%|██████▋ | 247702/371472 [9:09:34<9:39:11, 3.56it/s] 67%|██████▋ | 247703/371472 [9:09:34<9:28:18, 3.63it/s] 67%|██████▋ | 247704/371472 [9:09:35<9:24:03, 3.66it/s] 67%|██████▋ | 247705/371472 [9:09:35<9:31:02, 3.61it/s] 67%|██████▋ | 247706/371472 [9:09:35<9:18:28, 3.69it/s] 67%|██████▋ | 247707/371472 [9:09:35<9:29:44, 3.62it/s] 67%|██████▋ | 247708/371472 [9:09:36<9:50:05, 3.50it/s] 67%|██████▋ | 247709/371472 [9:09:36<10:02:57, 3.42it/s] 67%|██████▋ | 247710/371472 [9:09:36<10:00:14, 3.44it/s] 67%|██████▋ | 247711/371472 [9:09:37<10:01:22, 3.43it/s] 67%|██████▋ | 247712/371472 [9:09:37<9:50:54, 3.49it/s] 67%|██████▋ | 247713/371472 [9:09:37<9:52:29, 3.48it/s] 67%|██████▋ | 247714/371472 [9:09:38<10:21:50, 3.32it/s] 67%|██████▋ | 247715/371472 [9:09:38<10:05:59, 3.40it/s] 67%|██████▋ | 247716/371472 [9:09:38<10:05:46, 3.40it/s] 67%|██████▋ | 247717/371472 [9:09:38<10:03:38, 3.42it/s] 67%|██████▋ | 247718/371472 [9:09:39<9:57:03, 3.45it/s] 67%|██████▋ | 247719/371472 [9:09:39<10:10:59, 3.38it/s] 67%|██████▋ | 247720/371472 [9:09:39<11:04:27, 3.10it/s] {'loss': 2.6134, 'learning_rate': 3.999870714732056e-07, 'epoch': 10.67} + 67%|██████▋ | 247720/371472 [9:09:39<11:04:27, 3.10it/s] 67%|██████▋ | 247721/371472 [9:09:40<10:41:42, 3.21it/s] 67%|██████▋ | 247722/371472 [9:09:40<11:01:30, 3.12it/s] 67%|██████▋ | 247723/371472 [9:09:40<10:49:59, 3.17it/s] 67%|██████▋ | 247724/371472 [9:09:41<10:13:36, 3.36it/s] 67%|██████▋ | 247725/371472 [9:09:41<10:06:05, 3.40it/s] 67%|██████▋ | 247726/371472 [9:09:41<9:55:00, 3.47it/s] 67%|██████▋ | 247727/371472 [9:09:41<10:17:09, 3.34it/s] 67%|██████▋ | 247728/371472 [9:09:42<10:59:36, 3.13it/s] 67%|██████▋ | 247729/371472 [9:09:42<10:24:59, 3.30it/s] 67%|██████▋ | 247730/371472 [9:09:43<11:29:34, 2.99it/s] 67%|██████▋ | 247731/371472 [9:09:43<11:01:18, 3.12it/s] 67%|██████▋ | 247732/371472 [9:09:43<10:28:35, 3.28it/s] 67%|██████▋ | 247733/371472 [9:09:43<10:18:13, 3.34it/s] 67%|██████▋ | 247734/371472 [9:09:44<10:08:23, 3.39it/s] 67%|██████▋ | 247735/371472 [9:09:44<11:15:19, 3.05it/s] 67%|██████▋ | 247736/371472 [9:09:44<10:56:58, 3.14it/s] 67%|██████▋ | 247737/371472 [9:09:45<12:20:30, 2.78it/s] 67%|██████��� | 247738/371472 [9:09:45<11:32:35, 2.98it/s] 67%|██████▋ | 247739/371472 [9:09:45<11:04:19, 3.10it/s] 67%|██████▋ | 247740/371472 [9:09:46<11:08:35, 3.08it/s] {'loss': 2.5507, 'learning_rate': 3.9993858949772676e-07, 'epoch': 10.67} + 67%|██████▋ | 247740/371472 [9:09:46<11:08:35, 3.08it/s] 67%|██████▋ | 247741/371472 [9:09:46<10:36:23, 3.24it/s] 67%|██████▋ | 247742/371472 [9:09:46<10:33:56, 3.25it/s] 67%|██████▋ | 247743/371472 [9:09:47<10:46:30, 3.19it/s] 67%|██████▋ | 247744/371472 [9:09:47<10:28:54, 3.28it/s] 67%|██████▋ | 247745/371472 [9:09:47<10:23:13, 3.31it/s] 67%|██████▋ | 247746/371472 [9:09:47<10:26:41, 3.29it/s] 67%|██████▋ | 247747/371472 [9:09:48<10:14:28, 3.36it/s] 67%|██████▋ | 247748/371472 [9:09:48<10:06:36, 3.40it/s] 67%|██████▋ | 247749/371472 [9:09:48<10:10:38, 3.38it/s] 67%|██████▋ | 247750/371472 [9:09:49<11:03:20, 3.11it/s] 67%|██████▋ | 247751/371472 [9:09:49<10:56:21, 3.14it/s] 67%|██████▋ | 247752/371472 [9:09:49<10:44:52, 3.20it/s] 67%|██████▋ | 247753/371472 [9:09:50<10:37:14, 3.24it/s] 67%|██████▋ | 247754/371472 [9:09:50<11:04:27, 3.10it/s] 67%|██████▋ | 247755/371472 [9:09:50<10:45:51, 3.19it/s] 67%|██████▋ | 247756/371472 [9:09:51<10:30:03, 3.27it/s] 67%|██████▋ | 247757/371472 [9:09:51<10:16:56, 3.34it/s] 67%|██████▋ | 247758/371472 [9:09:51<10:06:50, 3.40it/s] 67%|██████▋ | 247759/371472 [9:09:51<9:48:48, 3.50it/s] 67%|██████▋ | 247760/371472 [9:09:52<9:58:22, 3.45it/s] {'loss': 2.8916, 'learning_rate': 3.998901075222478e-07, 'epoch': 10.67} + 67%|██████▋ | 247760/371472 [9:09:52<9:58:22, 3.45it/s] 67%|██████▋ | 247761/371472 [9:09:52<9:59:59, 3.44it/s] 67%|██████▋ | 247762/371472 [9:09:52<9:49:55, 3.50it/s] 67%|██████▋ | 247763/371472 [9:09:53<9:48:31, 3.50it/s] 67%|██████▋ | 247764/371472 [9:09:53<10:21:36, 3.32it/s] 67%|██████▋ | 247765/371472 [9:09:53<10:02:14, 3.42it/s] 67%|██████▋ | 247766/371472 [9:09:53<9:57:32, 3.45it/s] 67%|██████▋ | 247767/371472 [9:09:54<10:06:22, 3.40it/s] 67%|██████▋ | 247768/371472 [9:09:54<10:17:54, 3.34it/s] 67%|██████▋ | 247769/371472 [9:09:54<10:49:32, 3.17it/s] 67%|██████▋ | 247770/371472 [9:09:55<11:14:04, 3.06it/s] 67%|██████▋ | 247771/371472 [9:09:55<10:42:05, 3.21it/s] 67%|██████▋ | 247772/371472 [9:09:55<10:28:00, 3.28it/s] 67%|██████▋ | 247773/371472 [9:09:56<10:07:22, 3.39it/s] 67%|██████▋ | 247774/371472 [9:09:56<10:03:23, 3.42it/s] 67%|██████▋ | 247775/371472 [9:09:56<9:46:50, 3.51it/s] 67%|██████▋ | 247776/371472 [9:09:56<9:56:17, 3.46it/s] 67%|██████▋ | 247777/371472 [9:09:57<9:55:07, 3.46it/s] 67%|██████▋ | 247778/371472 [9:09:57<10:01:39, 3.43it/s] 67%|██████▋ | 247779/371472 [9:09:57<9:54:43, 3.47it/s] 67%|██████▋ | 247780/371472 [9:09:58<10:10:58, 3.37it/s] {'loss': 2.878, 'learning_rate': 3.9984162554676896e-07, 'epoch': 10.67} + 67%|██████▋ | 247780/371472 [9:09:58<10:10:58, 3.37it/s] 67%|██████▋ | 247781/371472 [9:09:58<10:30:53, 3.27it/s] 67%|██████▋ | 247782/371472 [9:09:58<11:21:46, 3.02it/s] 67%|██████▋ | 247783/371472 [9:09:59<10:49:19, 3.17it/s] 67%|██████▋ | 247784/371472 [9:09:59<10:35:39, 3.24it/s] 67%|██████▋ | 247785/371472 [9:09:59<10:22:15, 3.31it/s] 67%|██████▋ | 247786/371472 [9:10:00<10:18:06, 3.34it/s] 67%|██████▋ | 247787/371472 [9:10:00<10:32:20, 3.26it/s] 67%|██████▋ | 247788/371472 [9:10:00<10:31:31, 3.26it/s] 67%|██████▋ | 247789/371472 [9:10:00<10:22:03, 3.31it/s] 67%|██████▋ | 247790/371472 [9:10:01<10:51:40, 3.16it/s] 67%|██████▋ | 247791/371472 [9:10:01<10:26:49, 3.29it/s] 67%|██████▋ | 247792/371472 [9:10:01<10:18:37, 3.33it/s] 67%|██████▋ | 247793/371472 [9:10:02<10:41:27, 3.21it/s] 67%|██████▋ | 247794/371472 [9:10:02<11:43:24, 2.93it/s] 67%|██████▋ | 247795/371472 [9:10:02<11:20:19, 3.03it/s] 67%|██████▋ | 247796/371472 [9:10:03<11:15:18, 3.05it/s] 67%|██████▋ | 247797/371472 [9:10:03<11:35:46, 2.96it/s] 67%|████���█▋ | 247798/371472 [9:10:03<11:09:48, 3.08it/s] 67%|██████▋ | 247799/371472 [9:10:04<11:15:26, 3.05it/s] 67%|██████▋ | 247800/371472 [9:10:04<11:23:30, 3.02it/s] {'loss': 2.6711, 'learning_rate': 3.9979314357129003e-07, 'epoch': 10.67} + 67%|██████▋ | 247800/371472 [9:10:04<11:23:30, 3.02it/s] 67%|██████▋ | 247801/371472 [9:10:04<10:53:33, 3.15it/s] 67%|██████▋ | 247802/371472 [9:10:05<10:38:20, 3.23it/s] 67%|██████▋ | 247803/371472 [9:10:05<10:22:57, 3.31it/s] 67%|██████▋ | 247804/371472 [9:10:05<10:31:19, 3.26it/s] 67%|██████▋ | 247805/371472 [9:10:06<11:24:32, 3.01it/s] 67%|██████▋ | 247806/371472 [9:10:06<11:03:30, 3.11it/s] 67%|██████▋ | 247807/371472 [9:10:06<10:32:46, 3.26it/s] 67%|██████▋ | 247808/371472 [9:10:07<10:31:11, 3.27it/s] 67%|██████▋ | 247809/371472 [9:10:07<10:20:55, 3.32it/s] 67%|██████▋ | 247810/371472 [9:10:07<10:29:38, 3.27it/s] 67%|██████▋ | 247811/371472 [9:10:07<10:31:02, 3.27it/s] 67%|██████▋ | 247812/371472 [9:10:08<10:18:18, 3.33it/s] 67%|██████▋ | 247813/371472 [9:10:08<10:02:55, 3.42it/s] 67%|██████▋ | 247814/371472 [9:10:08<9:57:55, 3.45it/s] 67%|██████▋ | 247815/371472 [9:10:09<10:12:33, 3.36it/s] 67%|██████▋ | 247816/371472 [9:10:09<10:00:17, 3.43it/s] 67%|██████▋ | 247817/371472 [9:10:09<9:53:04, 3.47it/s] 67%|██████▋ | 247818/371472 [9:10:09<9:50:30, 3.49it/s] 67%|██████▋ | 247819/371472 [9:10:10<9:50:24, 3.49it/s] 67%|██████▋ | 247820/371472 [9:10:10<10:18:18, 3.33it/s] {'loss': 2.7284, 'learning_rate': 3.9974466159581115e-07, 'epoch': 10.67} + 67%|██████▋ | 247820/371472 [9:10:10<10:18:18, 3.33it/s] 67%|██████▋ | 247821/371472 [9:10:10<10:16:58, 3.34it/s] 67%|██████▋ | 247822/371472 [9:10:11<9:58:57, 3.44it/s] 67%|██████▋ | 247823/371472 [9:10:11<10:12:16, 3.37it/s] 67%|██████▋ | 247824/371472 [9:10:11<10:13:16, 3.36it/s] 67%|██████▋ | 247825/371472 [9:10:12<10:40:21, 3.22it/s] 67%|██████▋ | 247826/371472 [9:10:12<11:04:00, 3.10it/s] 67%|██████▋ | 247827/371472 [9:10:12<10:55:04, 3.15it/s] 67%|██████▋ | 247828/371472 [9:10:13<11:04:52, 3.10it/s] 67%|██████▋ | 247829/371472 [9:10:13<10:56:57, 3.14it/s] 67%|██████▋ | 247830/371472 [9:10:13<10:39:13, 3.22it/s] 67%|██████▋ | 247831/371472 [9:10:13<11:06:07, 3.09it/s] 67%|██████▋ | 247832/371472 [9:10:14<11:51:44, 2.90it/s] 67%|██████▋ | 247833/371472 [9:10:14<11:11:27, 3.07it/s] 67%|██████▋ | 247834/371472 [9:10:15<11:44:32, 2.92it/s] 67%|██████▋ | 247835/371472 [9:10:15<11:06:19, 3.09it/s] 67%|██████▋ | 247836/371472 [9:10:15<11:00:46, 3.12it/s] 67%|██████▋ | 247837/371472 [9:10:15<10:39:37, 3.22it/s] 67%|██████▋ | 247838/371472 [9:10:16<10:21:11, 3.32it/s] 67%|██████▋ | 247839/371472 [9:10:16<10:11:51, 3.37it/s] 67%|██████▋ | 247840/371472 [9:10:16<10:58:24, 3.13it/s] {'loss': 2.7175, 'learning_rate': 3.9969617962033217e-07, 'epoch': 10.67} + 67%|██████▋ | 247840/371472 [9:10:16<10:58:24, 3.13it/s] 67%|██████▋ | 247841/371472 [9:10:17<10:35:13, 3.24it/s] 67%|██████▋ | 247842/371472 [9:10:17<11:22:06, 3.02it/s] 67%|██████▋ | 247843/371472 [9:10:17<10:49:28, 3.17it/s] 67%|██████▋ | 247844/371472 [9:10:18<11:00:28, 3.12it/s] 67%|██████▋ | 247845/371472 [9:10:18<12:08:35, 2.83it/s] 67%|██████▋ | 247846/371472 [9:10:18<11:28:24, 2.99it/s] 67%|██████▋ | 247847/371472 [9:10:19<10:58:34, 3.13it/s] 67%|██████▋ | 247848/371472 [9:10:19<11:40:24, 2.94it/s] 67%|██████▋ | 247849/371472 [9:10:19<11:16:50, 3.04it/s] 67%|██████▋ | 247850/371472 [9:10:20<11:39:16, 2.95it/s] 67%|██████▋ | 247851/371472 [9:10:20<11:06:58, 3.09it/s] 67%|██████▋ | 247852/371472 [9:10:20<10:29:55, 3.27it/s] 67%|██████▋ | 247853/371472 [9:10:21<10:08:41, 3.38it/s] 67%|██████▋ | 247854/371472 [9:10:21<10:03:45, 3.41it/s] 67%|██████▋ | 247855/371472 [9:10:21<10:13:49, 3.36it/s] 67%|██████▋ | 247856/371472 [9:10:21<10:14:52, 3.35it/s] 67%|██████▋ | 247857/371472 [9:10:22<9:51:35, 3.48it/s] 67%|██████▋ | 247858/371472 [9:10:22<9:54:53, 3.46it/s] 67%|██████▋ | 247859/371472 [9:10:22<10:32:34, 3.26it/s] 67%|██████▋ | 247860/371472 [9:10:23<11:10:02, 3.07it/s] {'loss': 2.908, 'learning_rate': 3.996476976448534e-07, 'epoch': 10.68} + 67%|██████▋ | 247860/371472 [9:10:23<11:10:02, 3.07it/s] 67%|██████▋ | 247861/371472 [9:10:23<11:03:51, 3.10it/s] 67%|██████▋ | 247862/371472 [9:10:23<10:37:24, 3.23it/s] 67%|██████▋ | 247863/371472 [9:10:24<10:30:31, 3.27it/s] 67%|██████▋ | 247864/371472 [9:10:24<10:10:23, 3.38it/s] 67%|██████▋ | 247865/371472 [9:10:24<10:02:16, 3.42it/s] 67%|██████▋ | 247866/371472 [9:10:24<9:53:36, 3.47it/s] 67%|██████▋ | 247867/371472 [9:10:25<9:55:30, 3.46it/s] 67%|██████▋ | 247868/371472 [9:10:25<9:49:47, 3.49it/s] 67%|██████▋ | 247869/371472 [9:10:25<10:03:33, 3.41it/s] 67%|██████▋ | 247870/371472 [9:10:26<9:57:56, 3.45it/s] 67%|██████▋ | 247871/371472 [9:10:26<9:53:14, 3.47it/s] 67%|██████▋ | 247872/371472 [9:10:26<9:55:49, 3.46it/s] 67%|██████▋ | 247873/371472 [9:10:26<10:09:47, 3.38it/s] 67%|██████▋ | 247874/371472 [9:10:27<10:00:03, 3.43it/s] 67%|██████▋ | 247875/371472 [9:10:27<10:07:21, 3.39it/s] 67%|██████▋ | 247876/371472 [9:10:27<10:51:00, 3.16it/s] 67%|██████▋ | 247877/371472 [9:10:28<10:43:51, 3.20it/s] 67%|██████▋ | 247878/371472 [9:10:28<10:40:22, 3.22it/s] 67%|██████▋ | 247879/371472 [9:10:28<10:25:15, 3.29it/s] 67%|██████▋ | 247880/371472 [9:10:29<10:24:31, 3.30it/s] {'loss': 2.6948, 'learning_rate': 3.995992156693744e-07, 'epoch': 10.68} + 67%|██████▋ | 247880/371472 [9:10:29<10:24:31, 3.30it/s] 67%|██████▋ | 247881/371472 [9:10:29<10:20:57, 3.32it/s] 67%|██████▋ | 247882/371472 [9:10:29<10:12:00, 3.37it/s] 67%|██████▋ | 247883/371472 [9:10:30<10:33:58, 3.25it/s] 67%|██████▋ | 247884/371472 [9:10:30<10:18:54, 3.33it/s] 67%|██████▋ | 247885/371472 [9:10:30<10:12:43, 3.36it/s] 67%|██████▋ | 247886/371472 [9:10:30<9:58:45, 3.44it/s] 67%|██████▋ | 247887/371472 [9:10:31<10:04:46, 3.41it/s] 67%|██████▋ | 247888/371472 [9:10:31<10:09:52, 3.38it/s] 67%|██████▋ | 247889/371472 [9:10:31<11:21:55, 3.02it/s] 67%|██████▋ | 247890/371472 [9:10:32<10:59:14, 3.12it/s] 67%|██████▋ | 247891/371472 [9:10:32<10:45:07, 3.19it/s] 67%|██████▋ | 247892/371472 [9:10:32<10:22:15, 3.31it/s] 67%|██████▋ | 247893/371472 [9:10:33<10:13:56, 3.35it/s] 67%|██████▋ | 247894/371472 [9:10:33<10:05:05, 3.40it/s] 67%|██████▋ | 247895/371472 [9:10:33<10:04:11, 3.41it/s] 67%|██████▋ | 247896/371472 [9:10:33<10:03:29, 3.41it/s] 67%|██████▋ | 247897/371472 [9:10:34<10:33:26, 3.25it/s] 67%|██████▋ | 247898/371472 [9:10:34<10:16:24, 3.34it/s] 67%|██████▋ | 247899/371472 [9:10:34<9:56:10, 3.45it/s] 67%|██████▋ | 247900/371472 [9:10:35<9:42:53, 3.53it/s] {'loss': 2.7499, 'learning_rate': 3.995507336938956e-07, 'epoch': 10.68} + 67%|██████▋ | 247900/371472 [9:10:35<9:42:53, 3.53it/s] 67%|██████▋ | 247901/371472 [9:10:35<9:54:54, 3.46it/s] 67%|██████▋ | 247902/371472 [9:10:35<11:14:53, 3.05it/s] 67%|██████▋ | 247903/371472 [9:10:36<10:59:34, 3.12it/s] 67%|██████▋ | 247904/371472 [9:10:36<10:30:24, 3.27it/s] 67%|██████▋ | 247905/371472 [9:10:36<11:00:24, 3.12it/s] 67%|██████▋ | 247906/371472 [9:10:37<10:59:28, 3.12it/s] 67%|██████▋ | 247907/371472 [9:10:37<10:56:06, 3.14it/s] 67%|██████▋ | 247908/371472 [9:10:37<10:40:28, 3.22it/s] 67%|██████▋ | 247909/371472 [9:10:37<10:47:20, 3.18it/s] 67%|██████▋ | 247910/371472 [9:10:38<10:35:47, 3.24it/s] 67%|██████▋ | 247911/371472 [9:10:38<10:23:41, 3.30it/s] 67%|██████▋ | 247912/371472 [9:10:38<10:06:56, 3.39it/s] 67%|██████▋ | 247913/371472 [9:10:39<9:54:13, 3.47it/s] 67%|██████▋ | 247914/371472 [9:10:39<10:02:23, 3.42it/s] 67%|██████▋ | 247915/371472 [9:10:39<9:57:07, 3.45it/s] 67%|██████▋ | 247916/371472 [9:10:40<9:58:39, 3.44it/s] 67%|██████▋ | 247917/371472 [9:10:40<9:59:53, 3.43it/s] 67%|██████▋ | 247918/371472 [9:10:40<10:56:08, 3.14it/s] 67%|██████▋ | 247919/371472 [9:10:40<10:31:14, 3.26it/s] 67%|██████▋ | 247920/371472 [9:10:41<10:30:28, 3.27it/s] {'loss': 2.6621, 'learning_rate': 3.9950225171841667e-07, 'epoch': 10.68} + 67%|██████▋ | 247920/371472 [9:10:41<10:30:28, 3.27it/s] 67%|██████▋ | 247921/371472 [9:10:41<10:08:30, 3.38it/s] 67%|██████▋ | 247922/371472 [9:10:41<10:06:57, 3.39it/s] 67%|██████▋ | 247923/371472 [9:10:42<10:01:20, 3.42it/s] 67%|██████▋ | 247924/371472 [9:10:42<10:53:46, 3.15it/s] 67%|██████▋ | 247925/371472 [9:10:42<10:31:04, 3.26it/s] 67%|██████▋ | 247926/371472 [9:10:43<10:15:22, 3.35it/s] 67%|██████▋ | 247927/371472 [9:10:43<10:36:32, 3.23it/s] 67%|██████▋ | 247928/371472 [9:10:43<10:36:17, 3.24it/s] 67%|██████▋ | 247929/371472 [9:10:43<10:34:15, 3.25it/s] 67%|██████▋ | 247930/371472 [9:10:44<10:40:13, 3.22it/s] 67%|██████▋ | 247931/371472 [9:10:44<10:34:22, 3.25it/s] 67%|██████▋ | 247932/371472 [9:10:44<10:52:56, 3.15it/s] 67%|██████▋ | 247933/371472 [9:10:45<10:27:41, 3.28it/s] 67%|██████▋ | 247934/371472 [9:10:45<10:11:39, 3.37it/s] 67%|██████▋ | 247935/371472 [9:10:45<9:57:33, 3.45it/s] 67%|██████▋ | 247936/371472 [9:10:46<9:46:39, 3.51it/s] 67%|██████▋ | 247937/371472 [9:10:46<9:47:40, 3.50it/s] 67%|██████▋ | 247938/371472 [9:10:46<9:38:57, 3.56it/s] 67%|██████▋ | 247939/371472 [9:10:47<10:42:33, 3.20it/s] 67%|██████▋ | 247940/371472 [9:10:47<10:38:51, 3.22it/s] {'loss': 2.6516, 'learning_rate': 3.994537697429378e-07, 'epoch': 10.68} + 67%|██████▋ | 247940/371472 [9:10:47<10:38:51, 3.22it/s] 67%|██████▋ | 247941/371472 [9:10:47<10:24:22, 3.30it/s] 67%|██████▋ | 247942/371472 [9:10:47<10:27:40, 3.28it/s] 67%|██████▋ | 247943/371472 [9:10:48<10:01:30, 3.42it/s] 67%|██████▋ | 247944/371472 [9:10:48<10:10:27, 3.37it/s] 67%|██████▋ | 247945/371472 [9:10:48<10:18:10, 3.33it/s] 67%|██████▋ | 247946/371472 [9:10:49<10:14:44, 3.35it/s] 67%|██████▋ | 247947/371472 [9:10:49<10:16:20, 3.34it/s] 67%|██████▋ | 247948/371472 [9:10:49<10:01:56, 3.42it/s] 67%|██████▋ | 247949/371472 [9:10:49<9:47:23, 3.50it/s] 67%|██████▋ | 247950/371472 [9:10:50<9:47:13, 3.51it/s] 67%|██████▋ | 247951/371472 [9:10:50<10:38:41, 3.22it/s] 67%|██████▋ | 247952/371472 [9:10:50<10:45:59, 3.19it/s] 67%|██████▋ | 247953/371472 [9:10:51<10:24:51, 3.29it/s] 67%|██████▋ | 247954/371472 [9:10:51<10:26:32, 3.29it/s] 67%|██████▋ | 247955/371472 [9:10:51<10:35:09, 3.24it/s] 67%|██████▋ | 247956/371472 [9:10:52<10:18:50, 3.33it/s] 67%|██████▋ | 247957/371472 [9:10:52<10:17:56, 3.33it/s] 67%|██████▋ | 247958/371472 [9:10:52<10:08:57, 3.38it/s] 67%|██████▋ | 247959/371472 [9:10:52<10:25:20, 3.29it/s] 67%|██████▋ | 247960/371472 [9:10:53<10:17:44, 3.33it/s] {'loss': 2.7411, 'learning_rate': 3.9940528776745886e-07, 'epoch': 10.68} + 67%|██████▋ | 247960/371472 [9:10:53<10:17:44, 3.33it/s] 67%|██████▋ | 247961/371472 [9:10:53<9:55:26, 3.46it/s] 67%|██████▋ | 247962/371472 [9:10:53<9:45:37, 3.52it/s] 67%|██████▋ | 247963/371472 [9:10:54<9:36:56, 3.57it/s] 67%|██████▋ | 247964/371472 [9:10:54<9:57:51, 3.44it/s] 67%|██████▋ | 247965/371472 [9:10:54<10:05:49, 3.40it/s] 67%|██████▋ | 247966/371472 [9:10:54<9:53:36, 3.47it/s] 67%|██████▋ | 247967/371472 [9:10:55<9:51:34, 3.48it/s] 67%|██████▋ | 247968/371472 [9:10:55<9:54:47, 3.46it/s] 67%|██████▋ | 247969/371472 [9:10:55<9:42:23, 3.53it/s] 67%|██████▋ | 247970/371472 [9:10:56<9:37:19, 3.57it/s] 67%|██████▋ | 247971/371472 [9:10:56<9:31:53, 3.60it/s] 67%|██████▋ | 247972/371472 [9:10:56<9:29:48, 3.61it/s] 67%|██████▋ | 247973/371472 [9:10:56<9:31:13, 3.60it/s] 67%|██████▋ | 247974/371472 [9:10:57<9:47:35, 3.50it/s] 67%|██████▋ | 247975/371472 [9:10:57<9:36:34, 3.57it/s] 67%|██████▋ | 247976/371472 [9:10:57<9:36:45, 3.57it/s] 67%|██████▋ | 247977/371472 [9:10:58<9:57:52, 3.44it/s] 67%|���█████▋ | 247978/371472 [9:10:58<10:01:21, 3.42it/s] 67%|██████▋ | 247979/371472 [9:10:58<10:24:32, 3.30it/s] 67%|██████▋ | 247980/371472 [9:10:59<10:48:17, 3.17it/s] {'loss': 2.8197, 'learning_rate': 3.9935680579198004e-07, 'epoch': 10.68} + 67%|██████▋ | 247980/371472 [9:10:59<10:48:17, 3.17it/s] 67%|██████▋ | 247981/371472 [9:10:59<10:30:03, 3.27it/s] 67%|██████▋ | 247982/371472 [9:10:59<10:45:51, 3.19it/s] 67%|██████▋ | 247983/371472 [9:10:59<10:47:47, 3.18it/s] 67%|██████▋ | 247984/371472 [9:11:00<10:30:04, 3.27it/s] 67%|██████▋ | 247985/371472 [9:11:00<10:09:50, 3.37it/s] 67%|██████▋ | 247986/371472 [9:11:00<10:11:35, 3.37it/s] 67%|██████▋ | 247987/371472 [9:11:01<9:59:12, 3.43it/s] 67%|██████▋ | 247988/371472 [9:11:01<10:14:08, 3.35it/s] 67%|██████▋ | 247989/371472 [9:11:01<10:08:18, 3.38it/s] 67%|██████▋ | 247990/371472 [9:11:02<10:13:18, 3.36it/s] 67%|██████▋ | 247991/371472 [9:11:02<10:16:41, 3.34it/s] 67%|██████▋ | 247992/371472 [9:11:02<10:30:09, 3.27it/s] 67%|██████▋ | 247993/371472 [9:11:03<11:08:37, 3.08it/s] 67%|██████▋ | 247994/371472 [9:11:03<11:24:19, 3.01it/s] 67%|██████▋ | 247995/371472 [9:11:03<11:29:09, 2.99it/s] 67%|██████▋ | 247996/371472 [9:11:04<11:26:35, 3.00it/s] 67%|██████▋ | 247997/371472 [9:11:04<11:05:44, 3.09it/s] 67%|██████▋ | 247998/371472 [9:11:04<11:20:31, 3.02it/s] 67%|██████▋ | 247999/371472 [9:11:05<11:24:41, 3.01it/s] 67%|██████▋ | 248000/371472 [9:11:05<11:08:52, 3.08it/s] {'loss': 2.8438, 'learning_rate': 3.993083238165011e-07, 'epoch': 10.68} + 67%|██████▋ | 248000/371472 [9:11:05<11:08:52, 3.08it/s] 67%|██████▋ | 248001/371472 [9:11:05<10:53:56, 3.15it/s] 67%|██████▋ | 248002/371472 [9:11:05<10:53:45, 3.15it/s] 67%|██████▋ | 248003/371472 [9:11:06<10:35:16, 3.24it/s] 67%|██████▋ | 248004/371472 [9:11:06<10:23:36, 3.30it/s] 67%|██████▋ | 248005/371472 [9:11:06<10:16:34, 3.34it/s] 67%|██████▋ | 248006/371472 [9:11:07<10:23:35, 3.30it/s] 67%|██████▋ | 248007/371472 [9:11:07<10:16:43, 3.34it/s] 67%|██████▋ | 248008/371472 [9:11:07<10:13:49, 3.35it/s] 67%|██████▋ | 248009/371472 [9:11:08<10:40:14, 3.21it/s] 67%|██████▋ | 248010/371472 [9:11:08<10:40:20, 3.21it/s] 67%|██████▋ | 248011/371472 [9:11:08<10:28:55, 3.27it/s] 67%|██████▋ | 248012/371472 [9:11:08<10:26:22, 3.28it/s] 67%|██████▋ | 248013/371472 [9:11:09<10:31:03, 3.26it/s] 67%|██████▋ | 248014/371472 [9:11:09<10:24:15, 3.30it/s] 67%|██████▋ | 248015/371472 [9:11:09<11:08:52, 3.08it/s] 67%|██████▋ | 248016/371472 [9:11:10<11:33:58, 2.96it/s] 67%|██████▋ | 248017/371472 [9:11:10<11:27:50, 2.99it/s] 67%|██████▋ | 248018/371472 [9:11:10<10:50:57, 3.16it/s] 67%|██████▋ | 248019/371472 [9:11:11<10:41:13, 3.21it/s] 67%|██████▋ | 248020/371472 [9:11:11<10:34:37, 3.24it/s] {'loss': 2.7967, 'learning_rate': 3.9925984184102213e-07, 'epoch': 10.68} + 67%|██████▋ | 248020/371472 [9:11:11<10:34:37, 3.24it/s] 67%|██████▋ | 248021/371472 [9:11:11<10:24:26, 3.30it/s] 67%|██████▋ | 248022/371472 [9:11:12<10:12:08, 3.36it/s] 67%|██████▋ | 248023/371472 [9:11:12<9:59:36, 3.43it/s] 67%|██████▋ | 248024/371472 [9:11:12<10:01:21, 3.42it/s] 67%|██████▋ | 248025/371472 [9:11:12<9:51:48, 3.48it/s] 67%|██████▋ | 248026/371472 [9:11:13<10:46:01, 3.18it/s] 67%|██████▋ | 248027/371472 [9:11:13<10:59:43, 3.12it/s] 67%|██████▋ | 248028/371472 [9:11:13<10:32:39, 3.25it/s] 67%|██████▋ | 248029/371472 [9:11:14<10:26:39, 3.28it/s] 67%|██████▋ | 248030/371472 [9:11:14<10:12:50, 3.36it/s] 67%|██████▋ | 248031/371472 [9:11:14<10:14:22, 3.35it/s] 67%|██████▋ | 248032/371472 [9:11:15<10:14:22, 3.35it/s] 67%|██████▋ | 248033/371472 [9:11:15<10:16:59, 3.33it/s] 67%|██████▋ | 248034/371472 [9:11:15<10:18:57, 3.32it/s] 67%|██████▋ | 248035/371472 [9:11:16<10:24:43, 3.29it/s] 67%|██████▋ | 248036/371472 [9:11:16<10:22:56, 3.30it/s] 67%|██████▋ | 248037/371472 [9:11:16<9:58:29, 3.44it/s] 67%|██████▋ | 248038/371472 [9:11:16<9:40:28, 3.54it/s] 67%|██████▋ | 248039/371472 [9:11:17<9:49:08, 3.49it/s] 67%|██████▋ | 248040/371472 [9:11:17<9:53:07, 3.47it/s] {'loss': 2.8629, 'learning_rate': 3.992113598655433e-07, 'epoch': 10.68} + 67%|██████▋ | 248040/371472 [9:11:17<9:53:07, 3.47it/s] 67%|██████▋ | 248041/371472 [9:11:17<10:01:26, 3.42it/s] 67%|██████▋ | 248042/371472 [9:11:18<10:21:14, 3.31it/s] 67%|██████▋ | 248043/371472 [9:11:18<10:05:48, 3.40it/s] 67%|██████▋ | 248044/371472 [9:11:18<9:58:17, 3.44it/s] 67%|██████▋ | 248045/371472 [9:11:18<9:59:52, 3.43it/s] 67%|██████▋ | 248046/371472 [9:11:19<9:45:57, 3.51it/s] 67%|██████▋ | 248047/371472 [9:11:19<9:52:49, 3.47it/s] 67%|██████▋ | 248048/371472 [9:11:19<9:35:45, 3.57it/s] 67%|██████▋ | 248049/371472 [9:11:20<10:44:08, 3.19it/s] 67%|██████▋ | 248050/371472 [9:11:20<10:24:44, 3.29it/s] 67%|██████▋ | 248051/371472 [9:11:20<10:27:09, 3.28it/s] 67%|██████▋ | 248052/371472 [9:11:21<10:16:53, 3.33it/s] 67%|██████▋ | 248053/371472 [9:11:21<11:20:55, 3.02it/s] 67%|██████▋ | 248054/371472 [9:11:21<10:38:24, 3.22it/s] 67%|██████▋ | 248055/371472 [9:11:22<10:37:29, 3.23it/s] 67%|██████▋ | 248056/371472 [9:11:22<10:21:03, 3.31it/s] 67%|██████▋ | 248057/371472 [9:11:22<10:23:04, 3.30it/s] 67%|██████▋ | 248058/371472 [9:11:22<10:10:07, 3.37it/s] 67%|██████▋ | 248059/371472 [9:11:23<11:03:39, 3.10it/s] 67%|██████▋ | 248060/371472 [9:11:23<10:39:50, 3.21it/s] {'loss': 2.7564, 'learning_rate': 3.991628778900644e-07, 'epoch': 10.68} + 67%|██████▋ | 248060/371472 [9:11:23<10:39:50, 3.21it/s] 67%|██████▋ | 248061/371472 [9:11:23<10:31:08, 3.26it/s] 67%|██████▋ | 248062/371472 [9:11:24<10:24:42, 3.29it/s] 67%|██████▋ | 248063/371472 [9:11:24<10:00:15, 3.43it/s] 67%|██████▋ | 248064/371472 [9:11:24<10:17:15, 3.33it/s] 67%|██████▋ | 248065/371472 [9:11:24<10:05:00, 3.40it/s] 67%|██████▋ | 248066/371472 [9:11:25<10:10:17, 3.37it/s] 67%|██████▋ | 248067/371472 [9:11:25<9:56:52, 3.45it/s] 67%|██████▋ | 248068/371472 [9:11:25<9:45:27, 3.51it/s] 67%|██████▋ | 248069/371472 [9:11:26<9:45:16, 3.51it/s] 67%|██████▋ | 248070/371472 [9:11:26<9:52:22, 3.47it/s] 67%|██████▋ | 248071/371472 [9:11:26<9:48:45, 3.49it/s] 67%|██████▋ | 248072/371472 [9:11:26<9:49:26, 3.49it/s] 67%|██████▋ | 248073/371472 [9:11:27<9:48:31, 3.49it/s] 67%|██████▋ | 248074/371472 [9:11:27<10:14:42, 3.35it/s] 67%|██████▋ | 248075/371472 [9:11:27<10:03:59, 3.41it/s] 67%|██████▋ | 248076/371472 [9:11:28<9:53:00, 3.47it/s] 67%|██████▋ | 248077/371472 [9:11:28<9:52:09, 3.47it/s] 67%|██████▋ | 248078/371472 [9:11:28<9:47:26, 3.50it/s] 67%|██████▋ | 248079/371472 [9:11:28<9:33:28, 3.59it/s] 67%|██████▋ | 248080/371472 [9:11:29<9:34:38, 3.58it/s] {'loss': 2.9037, 'learning_rate': 3.991143959145855e-07, 'epoch': 10.69} + 67%|██████▋ | 248080/371472 [9:11:29<9:34:38, 3.58it/s] 67%|██████▋ | 248081/371472 [9:11:29<10:13:23, 3.35it/s] 67%|██████▋ | 248082/371472 [9:11:29<10:05:44, 3.40it/s] 67%|██████▋ | 248083/371472 [9:11:30<10:24:08, 3.29it/s] 67%|██████▋ | 248084/371472 [9:11:30<10:29:28, 3.27it/s] 67%|██████▋ | 248085/371472 [9:11:30<10:00:35, 3.42it/s] 67%|██████▋ | 248086/371472 [9:11:31<11:04:58, 3.09it/s] 67%|██████▋ | 248087/371472 [9:11:31<10:49:14, 3.17it/s] 67%|██████▋ | 248088/371472 [9:11:31<10:33:06, 3.25it/s] 67%|██████▋ | 248089/371472 [9:11:32<10:14:06, 3.35it/s] 67%|██████▋ | 248090/371472 [9:11:32<10:08:57, 3.38it/s] 67%|██████▋ | 248091/371472 [9:11:32<10:09:14, 3.38it/s] 67%|██████▋ | 248092/371472 [9:11:32<9:52:19, 3.47it/s] 67%|██████▋ | 248093/371472 [9:11:33<10:08:37, 3.38it/s] 67%|██████▋ | 248094/371472 [9:11:33<10:01:14, 3.42it/s] 67%|██████▋ | 248095/371472 [9:11:33<9:51:28, 3.48it/s] 67%|██████▋ | 248096/371472 [9:11:34<10:02:08, 3.41it/s] 67%|██████▋ | 248097/371472 [9:11:34<10:26:23, 3.28it/s] 67%|██████▋ | 248098/371472 [9:11:34<10:12:24, 3.36it/s] 67%|██████▋ | 248099/371472 [9:11:34<9:51:44, 3.47it/s] 67%|██████▋ | 248100/371472 [9:11:35<9:35:49, 3.57it/s] {'loss': 2.5739, 'learning_rate': 3.990659139391066e-07, 'epoch': 10.69} + 67%|██████▋ | 248100/371472 [9:11:35<9:35:49, 3.57it/s] 67%|██████▋ | 248101/371472 [9:11:35<9:54:15, 3.46it/s] 67%|██████▋ | 248102/371472 [9:11:35<10:13:06, 3.35it/s] 67%|██████▋ | 248103/371472 [9:11:36<11:16:27, 3.04it/s] 67%|██████▋ | 248104/371472 [9:11:36<11:36:22, 2.95it/s] 67%|██████▋ | 248105/371472 [9:11:36<10:56:05, 3.13it/s] 67%|██████▋ | 248106/371472 [9:11:37<10:23:19, 3.30it/s] 67%|██████▋ | 248107/371472 [9:11:37<10:19:19, 3.32it/s] 67%|██████▋ | 248108/371472 [9:11:37<10:19:56, 3.32it/s] 67%|██████▋ | 248109/371472 [9:11:38<11:07:51, 3.08it/s] 67%|██████▋ | 248110/371472 [9:11:38<11:10:08, 3.07it/s] 67%|██████▋ | 248111/371472 [9:11:38<10:42:30, 3.20it/s] 67%|██████▋ | 248112/371472 [9:11:39<10:43:13, 3.20it/s] 67%|██████▋ | 248113/371472 [9:11:39<10:38:44, 3.22it/s] 67%|██████▋ | 248114/371472 [9:11:39<10:21:29, 3.31it/s] 67%|██████▋ | 248115/371472 [9:11:39<10:10:09, 3.37it/s] 67%|██████▋ | 248116/371472 [9:11:40<10:08:23, 3.38it/s] 67%|██████▋ | 248117/371472 [9:11:40<9:51:40, 3.47it/s] 67%|██████▋ | 248118/371472 [9:11:40<11:33:45, 2.96it/s] 67%|██████▋ | 248119/371472 [9:11:41<11:04:36, 3.09it/s] 67%|██████▋ | 248120/371472 [9:11:41<10:27:07, 3.28it/s] {'loss': 2.4965, 'learning_rate': 3.9901743196362775e-07, 'epoch': 10.69} + 67%|██████▋ | 248120/371472 [9:11:41<10:27:07, 3.28it/s] 67%|██████▋ | 248121/371472 [9:11:41<10:19:57, 3.32it/s] 67%|██████▋ | 248122/371472 [9:11:42<10:12:39, 3.36it/s] 67%|██████▋ | 248123/371472 [9:11:42<9:59:20, 3.43it/s] 67%|██████▋ | 248124/371472 [9:11:42<9:54:42, 3.46it/s] 67%|██████▋ | 248125/371472 [9:11:43<10:31:13, 3.26it/s] 67%|██████▋ | 248126/371472 [9:11:43<10:07:59, 3.38it/s] 67%|██████▋ | 248127/371472 [9:11:43<10:00:51, 3.42it/s] 67%|██████▋ | 248128/371472 [9:11:43<9:59:53, 3.43it/s] 67%|██████▋ | 248129/371472 [9:11:44<10:54:49, 3.14it/s] 67%|██████▋ | 248130/371472 [9:11:44<11:18:04, 3.03it/s] 67%|██████▋ | 248131/371472 [9:11:44<10:54:45, 3.14it/s] 67%|██████▋ | 248132/371472 [9:11:45<10:26:54, 3.28it/s] 67%|██████▋ | 248133/371472 [9:11:45<10:13:22, 3.35it/s] 67%|██████▋ | 248134/371472 [9:11:45<10:14:05, 3.35it/s] 67%|██████▋ | 248135/371472 [9:11:46<10:36:58, 3.23it/s] 67%|██████▋ | 248136/371472 [9:11:46<10:36:35, 3.23it/s] 67%|██████▋ | 248137/371472 [9:11:46<10:33:13, 3.25it/s] 67%|██████▋ | 248138/371472 [9:11:46<10:27:52, 3.27it/s] 67%|██████▋ | 248139/371472 [9:11:47<10:12:07, 3.36it/s] 67%|██████▋ | 248140/371472 [9:11:47<10:13:50, 3.35it/s] {'loss': 2.7925, 'learning_rate': 3.9896894998814877e-07, 'epoch': 10.69} + 67%|██████▋ | 248140/371472 [9:11:47<10:13:50, 3.35it/s] 67%|██████▋ | 248141/371472 [9:11:47<10:16:35, 3.33it/s] 67%|██████▋ | 248142/371472 [9:11:48<10:09:31, 3.37it/s] 67%|██████▋ | 248143/371472 [9:11:48<11:06:24, 3.08it/s] 67%|██████▋ | 248144/371472 [9:11:48<10:56:20, 3.13it/s] 67%|██████▋ | 248145/371472 [9:11:49<10:32:16, 3.25it/s] 67%|██████▋ | 248146/371472 [9:11:49<10:13:57, 3.35it/s] 67%|██████▋ | 248147/371472 [9:11:49<10:39:53, 3.21it/s] 67%|██████▋ | 248148/371472 [9:11:50<10:26:36, 3.28it/s] 67%|██████▋ | 248149/371472 [9:11:50<10:08:00, 3.38it/s] 67%|██████▋ | 248150/371472 [9:11:50<10:01:23, 3.42it/s] 67%|██████▋ | 248151/371472 [9:11:50<10:43:58, 3.19it/s] 67%|██████▋ | 248152/371472 [9:11:51<10:20:31, 3.31it/s] 67%|██████▋ | 248153/371472 [9:11:51<10:16:33, 3.33it/s] 67%|██████▋ | 248154/371472 [9:11:51<10:15:09, 3.34it/s] 67%|██████▋ | 248155/371472 [9:11:52<10:34:19, 3.24it/s] 67%|██████▋ | 248156/371472 [9:11:52<10:09:53, 3.37it/s] 67%|██████▋ | 248157/371472 [9:11:52<10:15:44, 3.34it/s] 67%|██████▋ | 248158/371472 [9:11:53<10:37:02, 3.23it/s] 67%|██████▋ | 248159/371472 [9:11:53<10:07:52, 3.38it/s] 67%|██████▋ | 248160/371472 [9:11:53<10:21:43, 3.31it/s] {'loss': 2.7307, 'learning_rate': 3.9892046801266995e-07, 'epoch': 10.69} + 67%|██████▋ | 248160/371472 [9:11:53<10:21:43, 3.31it/s] 67%|██████▋ | 248161/371472 [9:11:54<10:52:56, 3.15it/s] 67%|██████▋ | 248162/371472 [9:11:54<10:27:59, 3.27it/s] 67%|██████▋ | 248163/371472 [9:11:54<10:22:09, 3.30it/s] 67%|██████▋ | 248164/371472 [9:11:54<9:48:27, 3.49it/s] 67%|██████▋ | 248165/371472 [9:11:55<9:32:32, 3.59it/s] 67%|██████▋ | 248166/371472 [9:11:55<9:40:03, 3.54it/s] 67%|██████▋ | 248167/371472 [9:11:55<10:33:58, 3.24it/s] 67%|██████▋ | 248168/371472 [9:11:56<10:38:10, 3.22it/s] 67%|██████▋ | 248169/371472 [9:11:56<10:20:02, 3.31it/s] 67%|██████▋ | 248170/371472 [9:11:56<10:11:32, 3.36it/s] 67%|██████▋ | 248171/371472 [9:11:56<10:20:02, 3.31it/s] 67%|██████▋ | 248172/371472 [9:11:57<10:10:29, 3.37it/s] 67%|██████▋ | 248173/371472 [9:11:57<10:08:16, 3.38it/s] 67%|██████▋ | 248174/371472 [9:11:57<10:14:14, 3.35it/s] 67%|██████▋ | 248175/371472 [9:11:58<10:08:49, 3.38it/s] 67%|██████▋ | 248176/371472 [9:11:58<9:40:07, 3.54it/s] 67%|██████▋ | 248177/371472 [9:11:58<9:43:50, 3.52it/s] 67%|██████▋ | 248178/371472 [9:11:58<9:52:15, 3.47it/s] 67%|██████▋ | 248179/371472 [9:11:59<9:51:36, 3.47it/s] 67%|██████▋ | 248180/371472 [9:11:59<9:42:55, 3.53it/s] {'loss': 2.6534, 'learning_rate': 3.98871986037191e-07, 'epoch': 10.69} + 67%|██████▋ | 248180/371472 [9:11:59<9:42:55, 3.53it/s] 67%|██████▋ | 248181/371472 [9:11:59<9:47:08, 3.50it/s] 67%|██████▋ | 248182/371472 [9:12:00<9:43:28, 3.52it/s] 67%|██████▋ | 248183/371472 [9:12:00<9:31:07, 3.60it/s] 67%|██████▋ | 248184/371472 [9:12:00<9:34:44, 3.58it/s] 67%|██████▋ | 248185/371472 [9:12:01<10:33:02, 3.25it/s] 67%|██████▋ | 248186/371472 [9:12:01<10:07:31, 3.38it/s] 67%|██████▋ | 248187/371472 [9:12:01<10:26:05, 3.28it/s] 67%|██████▋ | 248188/371472 [9:12:01<10:19:00, 3.32it/s] 67%|██████▋ | 248189/371472 [9:12:02<10:09:42, 3.37it/s] 67%|██████▋ | 248190/371472 [9:12:02<9:52:43, 3.47it/s] 67%|██████▋ | 248191/371472 [9:12:02<10:23:19, 3.30it/s] 67%|██████▋ | 248192/371472 [9:12:03<10:01:17, 3.42it/s] 67%|██████▋ | 248193/371472 [9:12:03<10:01:24, 3.42it/s] 67%|██████▋ | 248194/371472 [9:12:03<10:04:24, 3.40it/s] 67%|██████▋ | 248195/371472 [9:12:03<9:48:16, 3.49it/s] 67%|██████▋ | 248196/371472 [9:12:04<10:08:42, 3.38it/s] 67%|██████▋ | 248197/371472 [9:12:04<10:03:57, 3.40it/s] 67%|██████▋ | 248198/371472 [9:12:04<10:09:39, 3.37it/s] 67%|██████▋ | 248199/371472 [9:12:05<10:07:22, 3.38it/s] 67%|██████▋ | 248200/371472 [9:12:05<9:51:13, 3.48it/s] {'loss': 2.664, 'learning_rate': 3.9882350406171215e-07, 'epoch': 10.69} + 67%|██████▋ | 248200/371472 [9:12:05<9:51:13, 3.48it/s] 67%|██████▋ | 248201/371472 [9:12:05<10:12:01, 3.36it/s] 67%|██████▋ | 248202/371472 [9:12:06<10:34:05, 3.24it/s] 67%|██████▋ | 248203/371472 [9:12:06<11:03:41, 3.10it/s] 67%|██████▋ | 248204/371472 [9:12:06<11:24:24, 3.00it/s] 67%|██████▋ | 248205/371472 [9:12:07<11:01:53, 3.10it/s] 67%|██████▋ | 248206/371472 [9:12:07<10:51:56, 3.15it/s] 67%|██████▋ | 248207/371472 [9:12:07<10:28:45, 3.27it/s] 67%|██████▋ | 248208/371472 [9:12:07<10:16:13, 3.33it/s] 67%|██████▋ | 248209/371472 [9:12:08<10:45:26, 3.18it/s] 67%|██████▋ | 248210/371472 [9:12:08<10:51:10, 3.15it/s] 67%|██████▋ | 248211/371472 [9:12:08<10:27:11, 3.28it/s] 67%|██████▋ | 248212/371472 [9:12:09<11:19:55, 3.02it/s] 67%|██████▋ | 248213/371472 [9:12:09<10:47:04, 3.17it/s] 67%|██████▋ | 248214/371472 [9:12:09<11:22:38, 3.01it/s] 67%|██████▋ | 248215/371472 [9:12:10<10:52:57, 3.15it/s] 67%|██████▋ | 248216/371472 [9:12:10<10:28:58, 3.27it/s] 67%|██████▋ | 248217/371472 [9:12:10<10:39:01, 3.21it/s] 67%|██████▋ | 248218/371472 [9:12:11<10:17:13, 3.33it/s] 67%|██████▋ | 248219/371472 [9:12:11<10:31:55, 3.25it/s] 67%|██████▋ | 248220/371472 [9:12:11<10:25:04, 3.29it/s] {'loss': 2.8853, 'learning_rate': 3.987750220862332e-07, 'epoch': 10.69} + 67%|██████▋ | 248220/371472 [9:12:11<10:25:04, 3.29it/s] 67%|██████▋ | 248221/371472 [9:12:12<10:31:09, 3.25it/s] 67%|██████▋ | 248222/371472 [9:12:12<10:50:06, 3.16it/s] 67%|██████▋ | 248223/371472 [9:12:12<10:20:56, 3.31it/s] 67%|██████▋ | 248224/371472 [9:12:12<10:32:34, 3.25it/s] 67%|██████▋ | 248225/371472 [9:12:13<10:34:49, 3.24it/s] 67%|██████▋ | 248226/371472 [9:12:13<10:29:45, 3.26it/s] 67%|██████▋ | 248227/371472 [9:12:13<10:14:04, 3.35it/s] 67%|██████▋ | 248228/371472 [9:12:14<10:52:20, 3.15it/s] 67%|██████▋ | 248229/371472 [9:12:14<10:32:09, 3.25it/s] 67%|██████▋ | 248230/371472 [9:12:14<10:19:17, 3.32it/s] 67%|██████▋ | 248231/371472 [9:12:15<10:08:50, 3.37it/s] 67%|██████▋ | 248232/371472 [9:12:15<9:51:07, 3.47it/s] 67%|██████▋ | 248233/371472 [9:12:15<9:57:09, 3.44it/s] 67%|██████▋ | 248234/371472 [9:12:15<9:58:06, 3.43it/s] 67%|██████▋ | 248235/371472 [9:12:16<9:46:18, 3.50it/s] 67%|██████▋ | 248236/371472 [9:12:16<9:40:05, 3.54it/s] 67%|██████▋ | 248237/371472 [9:12:16<9:40:08, 3.54it/s] 67%|██████▋ | 248238/371472 [9:12:17<9:50:47, 3.48it/s] 67%|██████▋ | 248239/371472 [9:12:17<10:03:35, 3.40it/s] 67%|██████▋ | 248240/371472 [9:12:17<10:02:52, 3.41it/s] {'loss': 2.7974, 'learning_rate': 3.987265401107544e-07, 'epoch': 10.69} + 67%|██████▋ | 248240/371472 [9:12:17<10:02:52, 3.41it/s] 67%|██████▋ | 248241/371472 [9:12:17<10:01:57, 3.41it/s] 67%|██████▋ | 248242/371472 [9:12:18<10:15:52, 3.33it/s] 67%|██████▋ | 248243/371472 [9:12:18<10:10:40, 3.36it/s] 67%|██████▋ | 248244/371472 [9:12:18<10:40:14, 3.21it/s] 67%|██████▋ | 248245/371472 [9:12:19<10:34:55, 3.23it/s] 67%|██████▋ | 248246/371472 [9:12:19<10:18:27, 3.32it/s] 67%|██████▋ | 248247/371472 [9:12:19<9:52:14, 3.47it/s] 67%|██████▋ | 248248/371472 [9:12:19<9:39:32, 3.54it/s] 67%|██████▋ | 248249/371472 [9:12:20<9:36:43, 3.56it/s] 67%|██████▋ | 248250/371472 [9:12:20<10:27:41, 3.27it/s] 67%|██████▋ | 248251/371472 [9:12:20<10:02:54, 3.41it/s] 67%|██████▋ | 248252/371472 [9:12:21<10:01:21, 3.42it/s] 67%|██████▋ | 248253/371472 [9:12:21<9:58:52, 3.43it/s] 67%|██████▋ | 248254/371472 [9:12:21<10:04:02, 3.40it/s] 67%|██████▋ | 248255/371472 [9:12:22<9:59:24, 3.43it/s] 67%|██████▋ | 248256/371472 [9:12:22<10:03:53, 3.40it/s] 67%|██████▋ | 248257/371472 [9:12:22<9:53:44, 3.46it/s] 67%|██████▋ | 248258/371472 [9:12:22<9:54:25, 3.45it/s] 67%|██████▋ | 248259/371472 [9:12:23<9:54:13, 3.46it/s] 67%|██████▋ | 248260/371472 [9:12:23<9:51:49, 3.47it/s] {'loss': 2.8183, 'learning_rate': 3.9867805813527547e-07, 'epoch': 10.69} + 67%|██████▋ | 248260/371472 [9:12:23<9:51:49, 3.47it/s] 67%|██████▋ | 248261/371472 [9:12:23<9:55:00, 3.45it/s] 67%|██████▋ | 248262/371472 [9:12:24<9:57:10, 3.44it/s] 67%|██████▋ | 248263/371472 [9:12:24<10:04:31, 3.40it/s] 67%|██████▋ | 248264/371472 [9:12:24<10:11:14, 3.36it/s] 67%|██████▋ | 248265/371472 [9:12:25<10:15:00, 3.34it/s] 67%|██████▋ | 248266/371472 [9:12:25<10:02:57, 3.41it/s] 67%|██████▋ | 248267/371472 [9:12:25<10:07:13, 3.38it/s] 67%|██████▋ | 248268/371472 [9:12:25<9:58:48, 3.43it/s] 67%|██████▋ | 248269/371472 [9:12:26<10:00:55, 3.42it/s] 67%|██████▋ | 248270/371472 [9:12:26<10:44:16, 3.19it/s] 67%|██████▋ | 248271/371472 [9:12:26<10:25:00, 3.29it/s] 67%|██████▋ | 248272/371472 [9:12:27<10:09:52, 3.37it/s] 67%|██████▋ | 248273/371472 [9:12:27<10:21:33, 3.30it/s] 67%|██████▋ | 248274/371472 [9:12:27<10:29:54, 3.26it/s] 67%|██████▋ | 248275/371472 [9:12:28<10:24:27, 3.29it/s] 67%|██████▋ | 248276/371472 [9:12:28<10:01:17, 3.41it/s] 67%|██████▋ | 248277/371472 [9:12:28<10:04:11, 3.40it/s] 67%|██████▋ | 248278/371472 [9:12:28<9:46:07, 3.50it/s] 67%|██████▋ | 248279/371472 [9:12:29<9:34:56, 3.57it/s] 67%|██████▋ | 248280/371472 [9:12:29<9:45:02, 3.51it/s] {'loss': 2.7635, 'learning_rate': 3.986295761597966e-07, 'epoch': 10.69} + 67%|██████▋ | 248280/371472 [9:12:29<9:45:02, 3.51it/s] 67%|██████▋ | 248281/371472 [9:12:29<9:52:18, 3.47it/s] 67%|██████▋ | 248282/371472 [9:12:29<9:44:22, 3.51it/s] 67%|██████▋ | 248283/371472 [9:12:30<9:43:58, 3.52it/s] 67%|██████▋ | 248284/371472 [9:12:30<9:37:21, 3.56it/s] 67%|██████▋ | 248285/371472 [9:12:30<9:56:09, 3.44it/s] 67%|██████▋ | 248286/371472 [9:12:31<9:43:46, 3.52it/s] 67%|██████▋ | 248287/371472 [9:12:31<9:35:54, 3.56it/s] 67%|██████▋ | 248288/371472 [9:12:31<9:40:12, 3.54it/s] 67%|██████▋ | 248289/371472 [9:12:31<9:40:26, 3.54it/s] 67%|██████▋ | 248290/371472 [9:12:32<9:34:29, 3.57it/s] 67%|██████▋ | 248291/371472 [9:12:32<9:18:29, 3.68it/s] 67%|██████▋ | 248292/371472 [9:12:32<9:20:00, 3.67it/s] 67%|██████▋ | 248293/371472 [9:12:33<9:29:52, 3.60it/s] 67%|██████▋ | 248294/371472 [9:12:33<9:44:53, 3.51it/s] 67%|██████▋ | 248295/371472 [9:12:33<9:57:10, 3.44it/s] 67%|██████▋ | 248296/371472 [9:12:33<9:50:24, 3.48it/s] 67%|██████▋ | 248297/371472 [9:12:34<9:32:38, 3.58it/s] 67%|██████▋ | 248298/371472 [9:12:34<10:11:25, 3.36it/s] 67%|██████▋ | 248299/371472 [9:12:34<9:54:02, 3.46it/s] 67%|██████▋ | 248300/371472 [9:12:35<10:03:30, 3.40it/s] {'loss': 2.7195, 'learning_rate': 3.9858109418431766e-07, 'epoch': 10.69} + 67%|██████▋ | 248300/371472 [9:12:35<10:03:30, 3.40it/s] 67%|██████▋ | 248301/371472 [9:12:35<9:52:12, 3.47it/s] 67%|██████▋ | 248302/371472 [9:12:35<10:05:26, 3.39it/s] 67%|██████▋ | 248303/371472 [9:12:35<9:56:14, 3.44it/s] 67%|██████▋ | 248304/371472 [9:12:36<9:47:46, 3.49it/s] 67%|██████▋ | 248305/371472 [9:12:36<10:06:17, 3.39it/s] 67%|██████▋ | 248306/371472 [9:12:36<9:49:38, 3.48it/s] 67%|██████▋ | 248307/371472 [9:12:37<9:51:59, 3.47it/s] 67%|██████▋ | 248308/371472 [9:12:37<9:56:05, 3.44it/s] 67%|██████▋ | 248309/371472 [9:12:37<10:25:52, 3.28it/s] 67%|██████▋ | 248310/371472 [9:12:38<10:46:03, 3.18it/s] 67%|██████▋ | 248311/371472 [9:12:38<11:01:31, 3.10it/s] 67%|██████▋ | 248312/371472 [9:12:38<10:33:51, 3.24it/s] 67%|██████▋ | 248313/371472 [9:12:39<10:41:59, 3.20it/s] 67%|██████▋ | 248314/371472 [9:12:39<10:37:44, 3.22it/s] 67%|██████▋ | 248315/371472 [9:12:39<10:15:22, 3.34it/s] 67%|██████▋ | 248316/371472 [9:12:39<10:49:42, 3.16it/s] 67%|██████▋ | 248317/371472 [9:12:40<10:42:41, 3.19it/s] 67%|██████▋ | 248318/371472 [9:12:40<10:42:46, 3.19it/s] 67%|██████▋ | 248319/371472 [9:12:40<10:24:17, 3.29it/s] 67%|██████▋ | 248320/371472 [9:12:41<10:30:41, 3.25it/s] {'loss': 2.7906, 'learning_rate': 3.9853261220883884e-07, 'epoch': 10.7} + 67%|██████▋ | 248320/371472 [9:12:41<10:30:41, 3.25it/s] 67%|██████▋ | 248321/371472 [9:12:41<10:28:46, 3.26it/s] 67%|██████▋ | 248322/371472 [9:12:41<10:10:28, 3.36it/s] 67%|██████▋ | 248323/371472 [9:12:42<10:29:04, 3.26it/s] 67%|██████▋ | 248324/371472 [9:12:42<10:00:28, 3.42it/s] 67%|██████▋ | 248325/371472 [9:12:42<10:12:25, 3.35it/s] 67%|██████▋ | 248326/371472 [9:12:42<9:55:12, 3.45it/s] 67%|██████▋ | 248327/371472 [9:12:43<9:52:28, 3.46it/s] 67%|██████▋ | 248328/371472 [9:12:43<10:09:42, 3.37it/s] 67%|██████▋ | 248329/371472 [9:12:43<10:23:49, 3.29it/s] 67%|██████▋ | 248330/371472 [9:12:44<10:14:19, 3.34it/s] 67%|██████▋ | 248331/371472 [9:12:44<10:06:27, 3.38it/s] 67%|██████▋ | 248332/371472 [9:12:44<10:08:29, 3.37it/s] 67%|██████▋ | 248333/371472 [9:12:45<9:59:21, 3.42it/s] 67%|██████▋ | 248334/371472 [9:12:45<9:54:50, 3.45it/s] 67%|██████▋ | 248335/371472 [9:12:45<10:06:29, 3.38it/s] 67%|██████▋ | 248336/371472 [9:12:45<10:07:57, 3.38it/s] 67%|██████▋ | 248337/371472 [9:12:46<9:51:42, 3.47it/s] 67%|██████▋ | 248338/371472 [9:12:46<10:04:20, 3.40it/s] 67%|██████▋ | 248339/371472 [9:12:46<9:51:25, 3.47it/s] 67%|██████▋ | 248340/371472 [9:12:47<10:02:37, 3.41it/s] {'loss': 2.5707, 'learning_rate': 3.9848413023335986e-07, 'epoch': 10.7} + 67%|██████▋ | 248340/371472 [9:12:47<10:02:37, 3.41it/s] 67%|██████▋ | 248341/371472 [9:12:47<10:09:59, 3.36it/s] 67%|██████▋ | 248342/371472 [9:12:47<10:12:24, 3.35it/s] 67%|██████▋ | 248343/371472 [9:12:47<10:16:35, 3.33it/s] 67%|██████▋ | 248344/371472 [9:12:48<10:08:22, 3.37it/s] 67%|██████▋ | 248345/371472 [9:12:48<10:35:38, 3.23it/s] 67%|██████▋ | 248346/371472 [9:12:48<10:19:30, 3.31it/s] 67%|██████▋ | 248347/371472 [9:12:49<10:02:56, 3.40it/s] 67%|██████▋ | 248348/371472 [9:12:49<10:00:43, 3.42it/s] 67%|██████▋ | 248349/371472 [9:12:49<9:58:19, 3.43it/s] 67%|██████▋ | 248350/371472 [9:12:50<9:54:37, 3.45it/s] 67%|██████▋ | 248351/371472 [9:12:50<9:50:20, 3.48it/s] 67%|██████▋ | 248352/371472 [9:12:50<9:29:53, 3.60it/s] 67%|██████▋ | 248353/371472 [9:12:50<9:14:10, 3.70it/s] 67%|██████▋ | 248354/371472 [9:12:51<9:28:10, 3.61it/s] 67%|██████▋ | 248355/371472 [9:12:51<9:56:59, 3.44it/s] 67%|██████▋ | 248356/371472 [9:12:51<9:52:32, 3.46it/s] 67%|██████▋ | 248357/371472 [9:12:52<10:12:14, 3.35it/s] 67%|██████▋ | 248358/371472 [9:12:52<9:50:40, 3.47it/s] 67%|██████▋ | 248359/371472 [9:12:52<9:49:48, 3.48it/s] 67%|██████▋ | 248360/371472 [9:12:53<11:59:43, 2.85it/s] {'loss': 2.7345, 'learning_rate': 3.984356482578811e-07, 'epoch': 10.7} + 67%|██████▋ | 248360/371472 [9:12:53<11:59:43, 2.85it/s] 67%|██████▋ | 248361/371472 [9:12:53<11:19:08, 3.02it/s] 67%|██████▋ | 248362/371472 [9:12:53<10:36:32, 3.22it/s] 67%|██████▋ | 248363/371472 [9:12:53<10:14:48, 3.34it/s] 67%|██████▋ | 248364/371472 [9:12:54<9:57:03, 3.44it/s] 67%|██████▋ | 248365/371472 [9:12:54<10:18:10, 3.32it/s] 67%|██████▋ | 248366/371472 [9:12:54<10:02:50, 3.40it/s] 67%|██████▋ | 248367/371472 [9:12:55<10:04:20, 3.40it/s] 67%|██████▋ | 248368/371472 [9:12:55<10:31:19, 3.25it/s] 67%|██████▋ | 248369/371472 [9:12:55<10:13:32, 3.34it/s] 67%|██████▋ | 248370/371472 [9:12:56<10:47:40, 3.17it/s] 67%|██████▋ | 248371/371472 [9:12:56<10:55:31, 3.13it/s] 67%|██████▋ | 248372/371472 [9:12:56<10:17:07, 3.32it/s] 67%|██████▋ | 248373/371472 [9:12:56<10:11:00, 3.36it/s] 67%|██████▋ | 248374/371472 [9:12:57<9:56:50, 3.44it/s] 67%|██████▋ | 248375/371472 [9:12:57<9:53:18, 3.46it/s] 67%|██████▋ | 248376/371472 [9:12:57<10:20:01, 3.31it/s] 67%|██████▋ | 248377/371472 [9:12:58<10:31:16, 3.25it/s] 67%|██████▋ | 248378/371472 [9:12:58<10:05:44, 3.39it/s] 67%|██████▋ | 248379/371472 [9:12:58<9:56:49, 3.44it/s] 67%|██████▋ | 248380/371472 [9:12:59<10:02:44, 3.40it/s] {'loss': 2.7566, 'learning_rate': 3.983871662824021e-07, 'epoch': 10.7} + 67%|██████▋ | 248380/371472 [9:12:59<10:02:44, 3.40it/s] 67%|██████▋ | 248381/371472 [9:12:59<9:43:00, 3.52it/s] 67%|██████▋ | 248382/371472 [9:12:59<9:42:51, 3.52it/s] 67%|██████▋ | 248383/371472 [9:12:59<9:46:03, 3.50it/s] 67%|██████▋ | 248384/371472 [9:13:00<9:39:06, 3.54it/s] 67%|██████▋ | 248385/371472 [9:13:00<9:33:44, 3.58it/s] 67%|██████▋ | 248386/371472 [9:13:00<9:35:35, 3.56it/s] 67%|██████▋ | 248387/371472 [9:13:00<9:34:36, 3.57it/s] 67%|██████▋ | 248388/371472 [9:13:01<10:11:06, 3.36it/s] 67%|██████▋ | 248389/371472 [9:13:01<10:01:49, 3.41it/s] 67%|██████▋ | 248390/371472 [9:13:01<10:26:52, 3.27it/s] 67%|██████▋ | 248391/371472 [9:13:02<10:14:46, 3.34it/s] 67%|██████▋ | 248392/371472 [9:13:02<10:16:11, 3.33it/s] 67%|██████▋ | 248393/371472 [9:13:02<10:02:40, 3.40it/s] 67%|██████▋ | 248394/371472 [9:13:03<11:07:16, 3.07it/s] 67%|██████▋ | 248395/371472 [9:13:03<11:10:38, 3.06it/s] 67%|██████▋ | 248396/371472 [9:13:03<10:59:10, 3.11it/s] 67%|██████▋ | 248397/371472 [9:13:04<10:49:40, 3.16it/s] 67%|��█████▋ | 248398/371472 [9:13:04<10:32:39, 3.24it/s] 67%|██████▋ | 248399/371472 [9:13:04<10:16:51, 3.33it/s] 67%|██████▋ | 248400/371472 [9:13:04<10:16:07, 3.33it/s] {'loss': 2.7558, 'learning_rate': 3.9833868430692323e-07, 'epoch': 10.7} + 67%|██████▋ | 248400/371472 [9:13:04<10:16:07, 3.33it/s] 67%|██████▋ | 248401/371472 [9:13:05<9:48:40, 3.48it/s] 67%|██████▋ | 248402/371472 [9:13:05<10:21:28, 3.30it/s] 67%|██████▋ | 248403/371472 [9:13:05<10:10:19, 3.36it/s] 67%|██████▋ | 248404/371472 [9:13:06<9:55:55, 3.44it/s] 67%|██████▋ | 248405/371472 [9:13:06<10:01:18, 3.41it/s] 67%|██████▋ | 248406/371472 [9:13:06<10:37:16, 3.22it/s] 67%|██████▋ | 248407/371472 [9:13:07<10:25:08, 3.28it/s] 67%|██████▋ | 248408/371472 [9:13:07<10:12:03, 3.35it/s] 67%|██████▋ | 248409/371472 [9:13:07<10:15:05, 3.33it/s] 67%|██████▋ | 248410/371472 [9:13:07<10:04:37, 3.39it/s] 67%|██████▋ | 248411/371472 [9:13:08<10:12:16, 3.35it/s] 67%|██████▋ | 248412/371472 [9:13:08<10:20:57, 3.30it/s] 67%|██████▋ | 248413/371472 [9:13:08<10:15:51, 3.33it/s] 67%|██████▋ | 248414/371472 [9:13:09<10:23:12, 3.29it/s] 67%|██████▋ | 248415/371472 [9:13:09<10:16:37, 3.33it/s] 67%|██████▋ | 248416/371472 [9:13:09<10:34:09, 3.23it/s] 67%|██████▋ | 248417/371472 [9:13:10<10:19:46, 3.31it/s] 67%|██████▋ | 248418/371472 [9:13:10<9:57:29, 3.43it/s] 67%|██████▋ | 248419/371472 [9:13:10<10:16:43, 3.33it/s] 67%|██████▋ | 248420/371472 [9:13:10<10:06:54, 3.38it/s] {'loss': 2.8115, 'learning_rate': 3.982902023314443e-07, 'epoch': 10.7} + 67%|██████▋ | 248420/371472 [9:13:10<10:06:54, 3.38it/s] 67%|██████▋ | 248421/371472 [9:13:11<9:45:39, 3.50it/s] 67%|██████▋ | 248422/371472 [9:13:11<9:32:46, 3.58it/s] 67%|██████▋ | 248423/371472 [9:13:11<10:01:52, 3.41it/s] 67%|██████▋ | 248424/371472 [9:13:12<10:46:30, 3.17it/s] 67%|██████▋ | 248425/371472 [9:13:12<10:32:01, 3.24it/s] 67%|██████▋ | 248426/371472 [9:13:12<10:20:11, 3.31it/s] 67%|██████▋ | 248427/371472 [9:13:13<10:02:40, 3.40it/s] 67%|██████▋ | 248428/371472 [9:13:13<9:53:00, 3.46it/s] 67%|██████▋ | 248429/371472 [9:13:13<9:48:58, 3.48it/s] 67%|██████▋ | 248430/371472 [9:13:13<9:52:33, 3.46it/s] 67%|██████▋ | 248431/371472 [9:13:14<9:52:01, 3.46it/s] 67%|██████▋ | 248432/371472 [9:13:14<9:43:22, 3.52it/s] 67%|██████▋ | 248433/371472 [9:13:14<9:37:09, 3.55it/s] 67%|██████▋ | 248434/371472 [9:13:15<10:07:23, 3.38it/s] 67%|██████▋ | 248435/371472 [9:13:15<9:56:47, 3.44it/s] 67%|██████▋ | 248436/371472 [9:13:15<10:00:42, 3.41it/s] 67%|██████▋ | 248437/371472 [9:13:15<10:01:39, 3.41it/s] 67%|██████▋ | 248438/371472 [9:13:16<10:01:52, 3.41it/s] 67%|██████▋ | 248439/371472 [9:13:16<10:37:32, 3.22it/s] 67%|██████▋ | 248440/371472 [9:13:16<10:11:23, 3.35it/s] {'loss': 2.7139, 'learning_rate': 3.982417203559655e-07, 'epoch': 10.7} + 67%|██████▋ | 248440/371472 [9:13:16<10:11:23, 3.35it/s] 67%|██████▋ | 248441/371472 [9:13:17<10:06:56, 3.38it/s] 67%|██████▋ | 248442/371472 [9:13:17<10:27:10, 3.27it/s] 67%|██████▋ | 248443/371472 [9:13:18<13:02:57, 2.62it/s] 67%|██████▋ | 248444/371472 [9:13:18<12:17:51, 2.78it/s] 67%|██████▋ | 248445/371472 [9:13:18<11:37:27, 2.94it/s] 67%|██████▋ | 248446/371472 [9:13:18<11:37:23, 2.94it/s] 67%|██████▋ | 248447/371472 [9:13:19<11:07:44, 3.07it/s] 67%|██████▋ | 248448/371472 [9:13:19<10:52:02, 3.14it/s] 67%|██████▋ | 248449/371472 [9:13:19<10:52:19, 3.14it/s] 67%|██████▋ | 248450/371472 [9:13:20<10:39:01, 3.21it/s] 67%|██████▋ | 248451/371472 [9:13:20<10:30:28, 3.25it/s] 67%|██████▋ | 248452/371472 [9:13:20<10:39:30, 3.21it/s] 67%|██████▋ | 248453/371472 [9:13:21<10:16:00, 3.33it/s] 67%|██████▋ | 248454/371472 [9:13:21<9:59:50, 3.42it/s] 67%|██████▋ | 248455/371472 [9:13:21<11:23:18, 3.00it/s] 67%|██████▋ | 248456/371472 [9:13:22<11:36:12, 2.94it/s] 67%|██████▋ | 248457/371472 [9:13:22<11:16:25, 3.03it/s] 67%|██████▋ | 248458/371472 [9:13:22<11:06:36, 3.08it/s] 67%|██████▋ | 248459/371472 [9:13:23<11:05:30, 3.08it/s] 67%|██████▋ | 248460/371472 [9:13:23<10:47:11, 3.17it/s] {'loss': 2.6825, 'learning_rate': 3.981932383804865e-07, 'epoch': 10.7} + 67%|██████▋ | 248460/371472 [9:13:23<10:47:11, 3.17it/s] 67%|██████▋ | 248461/371472 [9:13:23<11:20:33, 3.01it/s] 67%|██████▋ | 248462/371472 [9:13:23<10:44:12, 3.18it/s] 67%|██████▋ | 248463/371472 [9:13:24<10:42:26, 3.19it/s] 67%|██████▋ | 248464/371472 [9:13:24<10:22:49, 3.29it/s] 67%|██████▋ | 248465/371472 [9:13:24<10:02:47, 3.40it/s] 67%|██████▋ | 248466/371472 [9:13:25<9:49:41, 3.48it/s] 67%|██████▋ | 248467/371472 [9:13:25<9:50:40, 3.47it/s] 67%|██████▋ | 248468/371472 [9:13:25<9:42:59, 3.52it/s] 67%|██████▋ | 248469/371472 [9:13:25<9:40:54, 3.53it/s] 67%|██████▋ | 248470/371472 [9:13:26<9:43:56, 3.51it/s] 67%|██████▋ | 248471/371472 [9:13:26<9:36:59, 3.55it/s] 67%|██████▋ | 248472/371472 [9:13:26<9:48:32, 3.48it/s] 67%|██████▋ | 248473/371472 [9:13:27<9:46:59, 3.49it/s] 67%|██████▋ | 248474/371472 [9:13:27<10:27:26, 3.27it/s] 67%|██████▋ | 248475/371472 [9:13:27<10:09:42, 3.36it/s] 67%|██████▋ | 248476/371472 [9:13:28<10:15:34, 3.33it/s] 67%|██████▋ | 248477/371472 [9:13:28<9:59:41, 3.42it/s] 67%|██████▋ | 248478/371472 [9:13:28<10:27:11, 3.27it/s] 67%|██████▋ | 248479/371472 [9:13:28<10:06:05, 3.38it/s] 67%|██████▋ | 248480/371472 [9:13:29<10:40:16, 3.20it/s] {'loss': 2.7256, 'learning_rate': 3.981447564050077e-07, 'epoch': 10.7} + 67%|██████▋ | 248480/371472 [9:13:29<10:40:16, 3.20it/s] 67%|██████▋ | 248481/371472 [9:13:29<10:48:49, 3.16it/s] 67%|██████▋ | 248482/371472 [9:13:29<10:47:48, 3.16it/s] 67%|██████▋ | 248483/371472 [9:13:30<10:11:47, 3.35it/s] 67%|██████▋ | 248484/371472 [9:13:30<9:58:56, 3.42it/s] 67%|██████▋ | 248485/371472 [9:13:30<10:10:33, 3.36it/s] 67%|██████▋ | 248486/371472 [9:13:31<9:56:23, 3.44it/s] 67%|██████▋ | 248487/371472 [9:13:31<10:13:02, 3.34it/s] 67%|██████▋ | 248488/371472 [9:13:31<9:51:06, 3.47it/s] 67%|██████▋ | 248489/371472 [9:13:31<9:33:10, 3.58it/s] 67%|██████▋ | 248490/371472 [9:13:32<9:48:34, 3.48it/s] 67%|██████▋ | 248491/371472 [9:13:32<10:16:56, 3.32it/s] 67%|██████▋ | 248492/371472 [9:13:32<10:16:17, 3.33it/s] 67%|██████▋ | 248493/371472 [9:13:33<10:34:47, 3.23it/s] 67%|██████▋ | 248494/371472 [9:13:33<10:54:04, 3.13it/s] 67%|██████▋ | 248495/371472 [9:13:33<10:38:28, 3.21it/s] 67%|██████▋ | 248496/371472 [9:13:34<10:07:23, 3.37it/s] 67%|██████▋ | 248497/371472 [9:13:34<10:59:49, 3.11it/s] 67%|██████▋ | 248498/371472 [9:13:34<11:37:10, 2.94it/s] 67%|██████▋ | 248499/371472 [9:13:35<11:09:28, 3.06it/s] 67%|██████▋ | 248500/371472 [9:13:35<10:36:09, 3.22it/s] {'loss': 2.8035, 'learning_rate': 3.9809627442952875e-07, 'epoch': 10.7} + 67%|██████▋ | 248500/371472 [9:13:35<10:36:09, 3.22it/s] 67%|██████▋ | 248501/371472 [9:13:35<10:50:30, 3.15it/s] 67%|██████▋ | 248502/371472 [9:13:36<10:35:50, 3.22it/s] 67%|██████▋ | 248503/371472 [9:13:36<11:48:14, 2.89it/s] 67%|██████▋ | 248504/371472 [9:13:36<11:11:21, 3.05it/s] 67%|██████▋ | 248505/371472 [9:13:37<10:39:36, 3.20it/s] 67%|██████▋ | 248506/371472 [9:13:37<10:14:53, 3.33it/s] 67%|██████▋ | 248507/371472 [9:13:37<10:23:48, 3.29it/s] 67%|██████▋ | 248508/371472 [9:13:37<10:27:06, 3.27it/s] 67%|██████▋ | 248509/371472 [9:13:38<10:59:15, 3.11it/s] 67%|██████▋ | 248510/371472 [9:13:38<10:33:02, 3.24it/s] 67%|██████▋ | 248511/371472 [9:13:38<10:12:35, 3.35it/s] 67%|██████▋ | 248512/371472 [9:13:39<10:08:26, 3.37it/s] 67%|██████▋ | 248513/371472 [9:13:39<9:45:37, 3.50it/s] 67%|██████▋ | 248514/371472 [9:13:39<10:05:31, 3.38it/s] 67%|██████▋ | 248515/371472 [9:13:40<10:54:50, 3.13it/s] 67%|██████▋ | 248516/371472 [9:13:40<10:46:46, 3.17it/s] 67%|██████▋ | 248517/371472 [9:13:40<10:40:57, 3.20it/s] 67%|██████▋ | 248518/371472 [9:13:40<10:40:10, 3.20it/s] 67%|██████▋ | 248519/371472 [9:13:41<10:25:19, 3.28it/s] 67%|██████▋ | 248520/371472 [9:13:41<10:12:24, 3.35it/s] {'loss': 2.7793, 'learning_rate': 3.9804779245404987e-07, 'epoch': 10.7} + 67%|██████▋ | 248520/371472 [9:13:41<10:12:24, 3.35it/s] 67%|██████▋ | 248521/371472 [9:13:41<10:05:59, 3.38it/s] 67%|██████▋ | 248522/371472 [9:13:42<10:44:41, 3.18it/s] 67%|██████▋ | 248523/371472 [9:13:42<10:36:51, 3.22it/s] 67%|██████▋ | 248524/371472 [9:13:42<10:41:21, 3.19it/s] 67%|██████▋ | 248525/371472 [9:13:43<10:48:02, 3.16it/s] 67%|██████▋ | 248526/371472 [9:13:43<11:39:45, 2.93it/s] 67%|██████▋ | 248527/371472 [9:13:43<11:07:05, 3.07it/s] 67%|██████▋ | 248528/371472 [9:13:44<11:33:11, 2.96it/s] 67%|██████▋ | 248529/371472 [9:13:44<12:43:03, 2.69it/s] 67%|██████▋ | 248530/371472 [9:13:44<11:57:51, 2.85it/s] 67%|██████▋ | 248531/371472 [9:13:45<11:14:00, 3.04it/s] 67%|██████▋ | 248532/371472 [9:13:45<10:46:19, 3.17it/s] 67%|██████▋ | 248533/371472 [9:13:45<10:32:01, 3.24it/s] 67%|██████▋ | 248534/371472 [9:13:46<10:17:40, 3.32it/s] 67%|██████▋ | 248535/371472 [9:13:46<10:12:18, 3.35it/s] 67%|██████▋ | 248536/371472 [9:13:46<10:08:24, 3.37it/s] 67%|██████▋ | 248537/371472 [9:13:46<9:58:53, 3.42it/s] 67%|██████▋ | 248538/371472 [9:13:47<9:44:55, 3.50it/s] 67%|██████▋ | 248539/371472 [9:13:47<9:47:21, 3.49it/s] 67%|██████▋ | 248540/371472 [9:13:47<9:39:24, 3.54it/s] {'loss': 2.7632, 'learning_rate': 3.9799931047857094e-07, 'epoch': 10.71} + 67%|██████▋ | 248540/371472 [9:13:47<9:39:24, 3.54it/s] 67%|██████▋ | 248541/371472 [9:13:48<9:48:50, 3.48it/s] 67%|██████▋ | 248542/371472 [9:13:48<11:18:33, 3.02it/s] 67%|██████▋ | 248543/371472 [9:13:48<10:43:52, 3.18it/s] 67%|██████▋ | 248544/371472 [9:13:49<10:40:48, 3.20it/s] 67%|██████▋ | 248545/371472 [9:13:49<10:14:17, 3.34it/s] 67%|██████▋ | 248546/371472 [9:13:49<10:22:22, 3.29it/s] 67%|██████▋ | 248547/371472 [9:13:49<9:54:42, 3.44it/s] 67%|██████▋ | 248548/371472 [9:13:50<10:02:46, 3.40it/s] 67%|██████▋ | 248549/371472 [9:13:50<10:07:23, 3.37it/s] 67%|██████▋ | 248550/371472 [9:13:50<10:00:21, 3.41it/s] 67%|██████▋ | 248551/371472 [9:13:51<9:44:21, 3.51it/s] 67%|██████▋ | 248552/371472 [9:13:51<9:50:17, 3.47it/s] 67%|██████▋ | 248553/371472 [9:13:51<10:39:55, 3.20it/s] 67%|██████▋ | 248554/371472 [9:13:52<10:28:03, 3.26it/s] 67%|██████▋ | 248555/371472 [9:13:52<10:07:39, 3.37it/s] 67%|██████▋ | 248556/371472 [9:13:52<9:53:16, 3.45it/s] 67%|██████▋ | 248557/371472 [9:13:52<9:43:30, 3.51it/s] 67%|██████▋ | 248558/371472 [9:13:53<9:34:19, 3.57it/s] 67%|██████▋ | 248559/371472 [9:13:53<10:04:34, 3.39it/s] 67%|██████▋ | 248560/371472 [9:13:53<9:52:02, 3.46it/s] {'loss': 2.7417, 'learning_rate': 3.97950828503092e-07, 'epoch': 10.71} + 67%|██████▋ | 248560/371472 [9:13:53<9:52:02, 3.46it/s] 67%|██████▋ | 248561/371472 [9:13:54<9:47:33, 3.49it/s] 67%|██████▋ | 248562/371472 [9:13:54<9:54:25, 3.45it/s] 67%|██████▋ | 248563/371472 [9:13:54<10:01:45, 3.40it/s] 67%|██████▋ | 248564/371472 [9:13:54<9:50:55, 3.47it/s] 67%|██████▋ | 248565/371472 [9:13:55<9:51:55, 3.46it/s] 67%|██████▋ | 248566/371472 [9:13:55<9:43:12, 3.51it/s] 67%|██████▋ | 248567/371472 [9:13:55<10:32:06, 3.24it/s] 67%|██████▋ | 248568/371472 [9:13:56<10:09:16, 3.36it/s] 67%|██████▋ | 248569/371472 [9:13:56<10:17:18, 3.32it/s] 67%|██████▋ | 248570/371472 [9:13:56<10:25:14, 3.28it/s] 67%|██████▋ | 248571/371472 [9:13:57<10:06:37, 3.38it/s] 67%|██████▋ | 248572/371472 [9:13:57<10:03:14, 3.40it/s] 67%|██████▋ | 248573/371472 [9:13:57<9:56:16, 3.44it/s] 67%|██████▋ | 248574/371472 [9:13:57<10:15:48, 3.33it/s] 67%|██████▋ | 248575/371472 [9:13:58<10:05:58, 3.38it/s] 67%|██████▋ | 248576/371472 [9:13:58<9:59:09, 3.42it/s] 67%|██████▋ | 248577/371472 [9:13:58<9:52:37, 3.46it/s] 67%|██████▋ | 248578/371472 [9:13:59<9:55:00, 3.44it/s] 67%|██████▋ | 248579/371472 [9:13:59<9:52:14, 3.46it/s] 67%|██████▋ | 248580/371472 [9:13:59<9:49:01, 3.48it/s] {'loss': 2.6433, 'learning_rate': 3.9790234652761314e-07, 'epoch': 10.71} + 67%|██████▋ | 248580/371472 [9:13:59<9:49:01, 3.48it/s] 67%|██████▋ | 248581/371472 [9:13:59<9:34:19, 3.57it/s] 67%|██████▋ | 248582/371472 [9:14:00<9:50:15, 3.47it/s] 67%|██████▋ | 248583/371472 [9:14:00<9:57:33, 3.43it/s] 67%|██████▋ | 248584/371472 [9:14:00<9:55:09, 3.44it/s] 67%|██████▋ | 248585/371472 [9:14:01<10:33:33, 3.23it/s] 67%|██████▋ | 248586/371472 [9:14:01<10:30:00, 3.25it/s] 67%|██████▋ | 248587/371472 [9:14:01<10:31:44, 3.24it/s] 67%|██████▋ | 248588/371472 [9:14:02<10:00:33, 3.41it/s] 67%|██████▋ | 248589/371472 [9:14:02<10:11:34, 3.35it/s] 67%|██████▋ | 248590/371472 [9:14:02<10:02:30, 3.40it/s] 67%|██████▋ | 248591/371472 [9:14:02<9:46:19, 3.49it/s] 67%|██████▋ | 248592/371472 [9:14:03<9:36:36, 3.55it/s] 67%|██████▋ | 248593/371472 [9:14:03<9:29:35, 3.60it/s] 67%|██████▋ | 248594/371472 [9:14:03<9:30:35, 3.59it/s] 67%|██████▋ | 248595/371472 [9:14:03<9:37:09, 3.55it/s] 67%|██████▋ | 248596/371472 [9:14:04<9:51:14, 3.46it/s] 67%|██████▋ | 248597/371472 [9:14:04<9:53:12, 3.45it/s] 67%|██████▋ | 248598/371472 [9:14:04<9:48:43, 3.48it/s] 67%|██████▋ | 248599/371472 [9:14:05<9:48:35, 3.48it/s] 67%|██████▋ | 248600/371472 [9:14:05<9:56:44, 3.43it/s] {'loss': 2.7302, 'learning_rate': 3.978538645521342e-07, 'epoch': 10.71} + 67%|██████▋ | 248600/371472 [9:14:05<9:56:44, 3.43it/s] 67%|██████▋ | 248601/371472 [9:14:05<9:52:05, 3.46it/s] 67%|██████▋ | 248602/371472 [9:14:06<10:30:51, 3.25it/s] 67%|██████▋ | 248603/371472 [9:14:06<10:26:37, 3.27it/s] 67%|██████▋ | 248604/371472 [9:14:06<10:49:41, 3.15it/s] 67%|██████▋ | 248605/371472 [9:14:07<10:17:45, 3.31it/s] 67%|██████▋ | 248606/371472 [9:14:07<10:17:15, 3.32it/s] 67%|██████▋ | 248607/371472 [9:14:07<10:03:00, 3.40it/s] 67%|██████▋ | 248608/371472 [9:14:07<10:08:41, 3.36it/s] 67%|██████▋ | 248609/371472 [9:14:08<9:59:51, 3.41it/s] 67%|██████▋ | 248610/371472 [9:14:08<9:50:47, 3.47it/s] 67%|██████▋ | 248611/371472 [9:14:08<10:16:46, 3.32it/s] 67%|██████▋ | 248612/371472 [9:14:09<10:27:41, 3.26it/s] 67%|██████▋ | 248613/371472 [9:14:09<10:02:47, 3.40it/s] 67%|██████▋ | 248614/371472 [9:14:09<10:10:10, 3.36it/s] 67%|██████▋ | 248615/371472 [9:14:09<10:05:56, 3.38it/s] 67%|██████▋ | 248616/371472 [9:14:10<9:43:05, 3.51it/s] 67%|██████▋ | 248617/371472 [9:14:10<9:46:31, 3.49it/s] 67%|██████▋ | 248618/371472 [9:14:10<9:58:18, 3.42it/s] 67%|██████▋ | 248619/371472 [9:14:11<10:25:03, 3.28it/s] 67%|██████▋ | 248620/371472 [9:14:11<10:23:06, 3.29it/s] {'loss': 2.6308, 'learning_rate': 3.978053825766554e-07, 'epoch': 10.71} + 67%|██████▋ | 248620/371472 [9:14:11<10:23:06, 3.29it/s] 67%|██████▋ | 248621/371472 [9:14:11<10:29:36, 3.25it/s] 67%|██████▋ | 248622/371472 [9:14:12<10:33:57, 3.23it/s] 67%|██████▋ | 248623/371472 [9:14:12<10:22:14, 3.29it/s] 67%|██████▋ | 248624/371472 [9:14:12<10:15:16, 3.33it/s] 67%|██████▋ | 248625/371472 [9:14:12<9:59:32, 3.42it/s] 67%|██████▋ | 248626/371472 [9:14:13<10:14:41, 3.33it/s] 67%|██████▋ | 248627/371472 [9:14:13<10:48:32, 3.16it/s] 67%|██████▋ | 248628/371472 [9:14:13<10:28:14, 3.26it/s] 67%|██████▋ | 248629/371472 [9:14:14<10:12:25, 3.34it/s] 67%|██████▋ | 248630/371472 [9:14:14<10:03:25, 3.39it/s] 67%|██████▋ | 248631/371472 [9:14:14<9:59:55, 3.41it/s] 67%|██████▋ | 248632/371472 [9:14:15<9:48:59, 3.48it/s] 67%|██████▋ | 248633/371472 [9:14:15<9:44:47, 3.50it/s] 67%|██████▋ | 248634/371472 [9:14:15<9:56:37, 3.43it/s] 67%|██████▋ | 248635/371472 [9:14:15<9:47:06, 3.49it/s] 67%|██████▋ | 248636/371472 [9:14:16<9:53:35, 3.45it/s] 67%|██████▋ | 248637/371472 [9:14:16<9:46:39, 3.49it/s] 67%|████��█▋ | 248638/371472 [9:14:16<9:34:47, 3.56it/s] 67%|██████▋ | 248639/371472 [9:14:17<9:57:43, 3.42it/s] 67%|██████▋ | 248640/371472 [9:14:17<10:02:41, 3.40it/s] {'loss': 2.7982, 'learning_rate': 3.9775690060117646e-07, 'epoch': 10.71} + 67%|██████▋ | 248640/371472 [9:14:17<10:02:41, 3.40it/s] 67%|██████▋ | 248641/371472 [9:14:17<10:01:13, 3.40it/s] 67%|██████▋ | 248642/371472 [9:14:17<10:16:32, 3.32it/s] 67%|██████▋ | 248643/371472 [9:14:18<10:42:12, 3.19it/s] 67%|██████▋ | 248644/371472 [9:14:18<11:28:27, 2.97it/s] 67%|██████▋ | 248645/371472 [9:14:19<11:23:01, 3.00it/s] 67%|██████▋ | 248646/371472 [9:14:19<11:53:25, 2.87it/s] 67%|██████▋ | 248647/371472 [9:14:19<11:25:47, 2.98it/s] 67%|██████▋ | 248648/371472 [9:14:19<10:44:05, 3.18it/s] 67%|██████▋ | 248649/371472 [9:14:20<10:26:33, 3.27it/s] 67%|██████▋ | 248650/371472 [9:14:20<10:06:18, 3.38it/s] 67%|██████▋ | 248651/371472 [9:14:20<9:57:59, 3.42it/s] 67%|██████▋ | 248652/371472 [9:14:21<10:21:14, 3.30it/s] 67%|██████▋ | 248653/371472 [9:14:21<10:11:33, 3.35it/s] 67%|██████▋ | 248654/371472 [9:14:21<10:15:59, 3.32it/s] 67%|██████▋ | 248655/371472 [9:14:22<10:16:35, 3.32it/s] 67%|██████▋ | 248656/371472 [9:14:22<9:59:06, 3.42it/s] 67%|██████▋ | 248657/371472 [9:14:22<10:07:17, 3.37it/s] 67%|██████▋ | 248658/371472 [9:14:22<10:07:02, 3.37it/s] 67%|██████▋ | 248659/371472 [9:14:23<10:10:33, 3.35it/s] 67%|██████▋ | 248660/371472 [9:14:23<10:19:53, 3.30it/s] {'loss': 2.8, 'learning_rate': 3.977084186256976e-07, 'epoch': 10.71} + 67%|██████▋ | 248660/371472 [9:14:23<10:19:53, 3.30it/s] 67%|██████▋ | 248661/371472 [9:14:23<10:38:25, 3.21it/s] 67%|██████▋ | 248662/371472 [9:14:24<10:55:56, 3.12it/s] 67%|██████▋ | 248663/371472 [9:14:24<10:38:19, 3.21it/s] 67%|██████▋ | 248664/371472 [9:14:24<10:21:11, 3.29it/s] 67%|██████▋ | 248665/371472 [9:14:25<10:07:39, 3.37it/s] 67%|██████▋ | 248666/371472 [9:14:25<10:03:34, 3.39it/s] 67%|██████▋ | 248667/371472 [9:14:25<10:11:12, 3.35it/s] 67%|██████▋ | 248668/371472 [9:14:25<9:56:14, 3.43it/s] 67%|██████▋ | 248669/371472 [9:14:26<9:57:59, 3.42it/s] 67%|██████▋ | 248670/371472 [9:14:26<9:45:23, 3.50it/s] 67%|██████▋ | 248671/371472 [9:14:26<10:00:24, 3.41it/s] 67%|██████▋ | 248672/371472 [9:14:27<10:01:46, 3.40it/s] 67%|██████▋ | 248673/371472 [9:14:27<9:57:12, 3.43it/s] 67%|██████▋ | 248674/371472 [9:14:27<10:38:59, 3.20it/s] 67%|██████▋ | 248675/371472 [9:14:28<10:42:35, 3.18it/s] 67%|██████▋ | 248676/371472 [9:14:28<11:08:14, 3.06it/s] 67%|██████▋ | 248677/371472 [9:14:28<11:11:20, 3.05it/s] 67%|██████▋ | 248678/371472 [9:14:29<10:54:15, 3.13it/s] 67%|██████▋ | 248679/371472 [9:14:29<10:44:27, 3.18it/s] 67%|██████▋ | 248680/371472 [9:14:29<10:31:11, 3.24it/s] {'loss': 2.5638, 'learning_rate': 3.9765993665021866e-07, 'epoch': 10.71} + 67%|██████▋ | 248680/371472 [9:14:29<10:31:11, 3.24it/s] 67%|██████▋ | 248681/371472 [9:14:29<10:35:23, 3.22it/s] 67%|██████▋ | 248682/371472 [9:14:30<10:19:07, 3.31it/s] 67%|██████▋ | 248683/371472 [9:14:30<10:10:33, 3.35it/s] 67%|██████▋ | 248684/371472 [9:14:30<10:10:28, 3.35it/s] 67%|██████▋ | 248685/371472 [9:14:31<10:19:48, 3.30it/s] 67%|██████▋ | 248686/371472 [9:14:31<11:08:58, 3.06it/s] 67%|██████▋ | 248687/371472 [9:14:31<10:37:37, 3.21it/s] 67%|██████▋ | 248688/371472 [9:14:32<10:47:35, 3.16it/s] 67%|██████▋ | 248689/371472 [9:14:32<10:30:18, 3.25it/s] 67%|██████▋ | 248690/371472 [9:14:32<10:18:04, 3.31it/s] 67%|██████▋ | 248691/371472 [9:14:33<11:04:05, 3.08it/s] 67%|██████▋ | 248692/371472 [9:14:33<10:56:13, 3.12it/s] 67%|██████▋ | 248693/371472 [9:14:33<10:43:07, 3.18it/s] 67%|██████▋ | 248694/371472 [9:14:34<10:29:26, 3.25it/s] 67%|██████▋ | 248695/371472 [9:14:34<10:41:19, 3.19it/s] 67%|██████▋ | 248696/371472 [9:14:34<11:05:55, 3.07it/s] 67%|██████▋ | 248697/371472 [9:14:35<11:15:08, 3.03it/s] 67%|██████▋ | 248698/371472 [9:14:35<10:37:28, 3.21it/s] 67%|██████▋ | 248699/371472 [9:14:35<10:34:53, 3.22it/s] 67%|██████▋ | 248700/371472 [9:14:35<10:18:57, 3.31it/s] {'loss': 2.6755, 'learning_rate': 3.9761145467473983e-07, 'epoch': 10.71} + 67%|██████▋ | 248700/371472 [9:14:35<10:18:57, 3.31it/s] 67%|██████▋ | 248701/371472 [9:14:36<10:16:25, 3.32it/s] 67%|██████▋ | 248702/371472 [9:14:36<10:21:46, 3.29it/s] 67%|██████▋ | 248703/371472 [9:14:36<10:15:49, 3.32it/s] 67%|██████▋ | 248704/371472 [9:14:37<9:59:55, 3.41it/s] 67%|██████▋ | 248705/371472 [9:14:37<10:35:40, 3.22it/s] 67%|██████▋ | 248706/371472 [9:14:37<10:44:33, 3.17it/s] 67%|██████▋ | 248707/371472 [9:14:38<10:55:41, 3.12it/s] 67%|██████▋ | 248708/371472 [9:14:38<11:04:56, 3.08it/s] 67%|██████▋ | 248709/371472 [9:14:38<10:30:35, 3.24it/s] 67%|██████▋ | 248710/371472 [9:14:38<10:17:50, 3.31it/s] 67%|██████▋ | 248711/371472 [9:14:39<10:02:27, 3.40it/s] 67%|██████▋ | 248712/371472 [9:14:39<10:07:43, 3.37it/s] 67%|██████▋ | 248713/371472 [9:14:39<10:12:15, 3.34it/s] 67%|██████▋ | 248714/371472 [9:14:40<10:12:57, 3.34it/s] 67%|██████▋ | 248715/371472 [9:14:40<9:59:03, 3.42it/s] 67%|██████▋ | 248716/371472 [9:14:40<9:49:54, 3.47it/s] 67%|██████▋ | 248717/371472 [9:14:40<9:44:14, 3.50it/s] 67%|██████▋ | 248718/371472 [9:14:41<9:46:49, 3.49it/s] 67%|██████▋ | 248719/371472 [9:14:41<10:03:02, 3.39it/s] 67%|██████▋ | 248720/371472 [9:14:41<9:54:59, 3.44it/s] {'loss': 2.6702, 'learning_rate': 3.9756297269926085e-07, 'epoch': 10.71} + 67%|██████▋ | 248720/371472 [9:14:41<9:54:59, 3.44it/s] 67%|██████▋ | 248721/371472 [9:14:42<10:17:35, 3.31it/s] 67%|██████▋ | 248722/371472 [9:14:42<10:17:26, 3.31it/s] 67%|██████▋ | 248723/371472 [9:14:42<10:50:24, 3.15it/s] 67%|██████▋ | 248724/371472 [9:14:43<10:42:48, 3.18it/s] 67%|██████▋ | 248725/371472 [9:14:43<10:19:58, 3.30it/s] 67%|██████▋ | 248726/371472 [9:14:43<10:09:02, 3.36it/s] 67%|██████▋ | 248727/371472 [9:14:43<9:53:49, 3.45it/s] 67%|██████▋ | 248728/371472 [9:14:44<10:38:05, 3.21it/s] 67%|██████▋ | 248729/371472 [9:14:44<10:14:05, 3.33it/s] 67%|██████▋ | 248730/371472 [9:14:44<10:10:15, 3.35it/s] 67%|██████▋ | 248731/371472 [9:14:45<10:01:00, 3.40it/s] 67%|██████▋ | 248732/371472 [9:14:45<9:48:27, 3.48it/s] 67%|██████▋ | 248733/371472 [9:14:45<9:33:24, 3.57it/s] 67%|██████▋ | 248734/371472 [9:14:46<10:03:12, 3.39it/s] 67%|██████▋ | 248735/371472 [9:14:46<10:19:46, 3.30it/s] 67%|██████▋ | 248736/371472 [9:14:46<10:12:09, 3.34it/s] 67%|██████▋ | 248737/371472 [9:14:46<10:05:43, 3.38it/s] 67%|██████▋ | 248738/371472 [9:14:47<10:04:55, 3.38it/s] 67%|██████▋ | 248739/371472 [9:14:47<9:48:58, 3.47it/s] 67%|██████▋ | 248740/371472 [9:14:47<10:05:09, 3.38it/s] {'loss': 2.6553, 'learning_rate': 3.9751449072378203e-07, 'epoch': 10.71} + 67%|██████▋ | 248740/371472 [9:14:47<10:05:09, 3.38it/s] 67%|██████▋ | 248741/371472 [9:14:48<10:20:48, 3.29it/s] 67%|██████▋ | 248742/371472 [9:14:48<10:05:44, 3.38it/s] 67%|██████▋ | 248743/371472 [9:14:48<9:54:18, 3.44it/s] 67%|██████▋ | 248744/371472 [9:14:49<9:43:43, 3.50it/s] 67%|██████▋ | 248745/371472 [9:14:49<9:43:23, 3.51it/s] 67%|██████▋ | 248746/371472 [9:14:49<10:01:40, 3.40it/s] 67%|██████▋ | 248747/371472 [9:14:49<10:34:22, 3.22it/s] 67%|██████▋ | 248748/371472 [9:14:50<10:32:31, 3.23it/s] 67%|██████▋ | 248749/371472 [9:14:50<10:18:14, 3.31it/s] 67%|██████▋ | 248750/371472 [9:14:50<10:06:12, 3.37it/s] 67%|██████▋ | 248751/371472 [9:14:51<10:04:18, 3.38it/s] 67%|██████▋ | 248752/371472 [9:14:51<10:53:54, 3.13it/s] 67%|██████▋ | 248753/371472 [9:14:51<11:13:01, 3.04it/s] 67%|██████▋ | 248754/371472 [9:14:52<10:53:19, 3.13it/s] 67%|██████▋ | 248755/371472 [9:14:52<10:29:07, 3.25it/s] 67%|██████▋ | 248756/371472 [9:14:52<10:25:13, 3.27it/s] 67%|██████▋ | 248757/371472 [9:14:53<10:07:46, 3.37it/s] 67%|██████▋ | 248758/371472 [9:14:53<10:00:32, 3.41it/s] 67%|██████▋ | 248759/371472 [9:14:53<9:50:30, 3.46it/s] 67%|██████▋ | 248760/371472 [9:14:53<9:58:34, 3.42it/s] {'loss': 2.8686, 'learning_rate': 3.974660087483031e-07, 'epoch': 10.71} + 67%|██████▋ | 248760/371472 [9:14:53<9:58:34, 3.42it/s] 67%|██████▋ | 248761/371472 [9:14:54<10:21:34, 3.29it/s] 67%|██████▋ | 248762/371472 [9:14:54<10:32:42, 3.23it/s] 67%|██████▋ | 248763/371472 [9:14:54<10:19:16, 3.30it/s] 67%|██████▋ | 248764/371472 [9:14:55<10:00:53, 3.40it/s] 67%|██████▋ | 248765/371472 [9:14:55<9:50:13, 3.46it/s] 67%|██████▋ | 248766/371472 [9:14:55<9:55:09, 3.44it/s] 67%|██████▋ | 248767/371472 [9:14:55<9:39:41, 3.53it/s] 67%|██████▋ | 248768/371472 [9:14:56<9:40:48, 3.52it/s] 67%|██████▋ | 248769/371472 [9:14:56<10:20:04, 3.30it/s] 67%|██████▋ | 248770/371472 [9:14:56<10:38:19, 3.20it/s] 67%|██████▋ | 248771/371472 [9:14:57<10:11:30, 3.34it/s] 67%|██████▋ | 248772/371472 [9:14:57<9:59:22, 3.41it/s] 67%|██████▋ | 248773/371472 [9:14:57<10:25:27, 3.27it/s] 67%|██████▋ | 248774/371472 [9:14:58<10:27:43, 3.26it/s] 67%|██████▋ | 248775/371472 [9:14:58<10:00:06, 3.41it/s] 67%|██████▋ | 248776/371472 [9:14:58<10:47:46, 3.16it/s] 67%|██████▋ | 248777/371472 [9:14:58<10:30:46, 3.24it/s] 67%|██████▋ | 248778/371472 [9:14:59<10:08:17, 3.36it/s] 67%|██████▋ | 248779/371472 [9:14:59<10:00:55, 3.40it/s] 67%|██████▋ | 248780/371472 [9:14:59<9:51:25, 3.46it/s] {'loss': 2.6178, 'learning_rate': 3.974175267728243e-07, 'epoch': 10.72} + 67%|██████▋ | 248780/371472 [9:14:59<9:51:25, 3.46it/s] 67%|██████▋ | 248781/371472 [9:15:00<9:49:33, 3.47it/s] 67%|██████▋ | 248782/371472 [9:15:00<10:22:54, 3.28it/s] 67%|██████▋ | 248783/371472 [9:15:00<10:14:59, 3.32it/s] 67%|██████▋ | 248784/371472 [9:15:01<9:56:21, 3.43it/s] 67%|██████▋ | 248785/371472 [9:15:01<9:51:55, 3.45it/s] 67%|██████▋ | 248786/371472 [9:15:01<10:01:18, 3.40it/s] 67%|██████▋ | 248787/371472 [9:15:01<10:10:18, 3.35it/s] 67%|██████▋ | 248788/371472 [9:15:02<10:31:13, 3.24it/s] 67%|██████▋ | 248789/371472 [9:15:02<10:17:45, 3.31it/s] 67%|██████▋ | 248790/371472 [9:15:02<10:33:45, 3.23it/s] 67%|██████▋ | 248791/371472 [9:15:03<10:41:02, 3.19it/s] 67%|██████▋ | 248792/371472 [9:15:03<10:13:50, 3.33it/s] 67%|██████▋ | 248793/371472 [9:15:03<10:20:38, 3.29it/s] 67%|██████▋ | 248794/371472 [9:15:04<10:48:35, 3.15it/s] 67%|██████▋ | 248795/371472 [9:15:04<10:39:26, 3.20it/s] 67%|██████▋ | 248796/371472 [9:15:04<10:17:24, 3.31it/s] 67%|██████▋ | 248797/371472 [9:15:04<9:58:35, 3.42it/s] 67%|██████▋ | 248798/371472 [9:15:05<9:59:24, 3.41it/s] 67%|██████▋ | 248799/371472 [9:15:05<10:21:45, 3.29it/s] 67%|██████▋ | 248800/371472 [9:15:05<10:12:09, 3.34it/s] {'loss': 2.7126, 'learning_rate': 3.973690447973453e-07, 'epoch': 10.72} + 67%|██████▋ | 248800/371472 [9:15:05<10:12:09, 3.34it/s] 67%|██████▋ | 248801/371472 [9:15:06<9:52:58, 3.45it/s] 67%|██████▋ | 248802/371472 [9:15:06<10:08:34, 3.36it/s] 67%|██████▋ | 248803/371472 [9:15:06<10:48:17, 3.15it/s] 67%|██████▋ | 248804/371472 [9:15:07<10:24:30, 3.27it/s] 67%|██████▋ | 248805/371472 [9:15:07<10:08:00, 3.36it/s] 67%|██████▋ | 248806/371472 [9:15:07<10:04:32, 3.38it/s] 67%|██████▋ | 248807/371472 [9:15:08<10:44:38, 3.17it/s] 67%|██████▋ | 248808/371472 [9:15:08<10:56:25, 3.11it/s] 67%|██████▋ | 248809/371472 [9:15:08<10:31:57, 3.23it/s] 67%|██████▋ | 248810/371472 [9:15:08<10:27:34, 3.26it/s] 67%|██████▋ | 248811/371472 [9:15:09<10:07:13, 3.37it/s] 67%|██████▋ | 248812/371472 [9:15:09<10:01:52, 3.40it/s] 67%|██████▋ | 248813/371472 [9:15:09<10:21:29, 3.29it/s] 67%|██████▋ | 248814/371472 [9:15:10<10:31:43, 3.24it/s] 67%|██████▋ | 248815/371472 [9:15:10<10:52:29, 3.13it/s] 67%|██████▋ | 248816/371472 [9:15:10<10:14:11, 3.33it/s] 67%|██████▋ | 248817/371472 [9:15:11<10:37:47, 3.21it/s] 67%|██████▋ | 248818/371472 [9:15:11<10:17:39, 3.31it/s] 67%|██████▋ | 248819/371472 [9:15:11<9:57:52, 3.42it/s] 67%|██████▋ | 248820/371472 [9:15:11<10:21:02, 3.29it/s] {'loss': 2.6747, 'learning_rate': 3.973205628218665e-07, 'epoch': 10.72} + 67%|██████▋ | 248820/371472 [9:15:11<10:21:02, 3.29it/s] 67%|██████▋ | 248821/371472 [9:15:12<10:08:25, 3.36it/s] 67%|██████▋ | 248822/371472 [9:15:12<9:48:58, 3.47it/s] 67%|██████▋ | 248823/371472 [9:15:12<9:39:16, 3.53it/s] 67%|██████▋ | 248824/371472 [9:15:13<10:40:57, 3.19it/s] 67%|██████▋ | 248825/371472 [9:15:13<11:10:39, 3.05it/s] 67%|██████▋ | 248826/371472 [9:15:13<10:49:48, 3.15it/s] 67%|██████▋ | 248827/371472 [9:15:14<10:33:59, 3.22it/s] 67%|██████▋ | 248828/371472 [9:15:14<10:17:21, 3.31it/s] 67%|██████▋ | 248829/371472 [9:15:14<10:02:42, 3.39it/s] 67%|██████▋ | 248830/371472 [9:15:14<10:01:48, 3.40it/s] 67%|██████▋ | 248831/371472 [9:15:15<10:22:46, 3.28it/s] 67%|██████▋ | 248832/371472 [9:15:15<10:57:12, 3.11it/s] 67%|██████▋ | 248833/371472 [9:15:15<10:18:26, 3.31it/s] 67%|██████▋ | 248834/371472 [9:15:16<10:02:01, 3.40it/s] 67%|██████▋ | 248835/371472 [9:15:16<9:48:01, 3.48it/s] 67%|██████▋ | 248836/371472 [9:15:16<9:59:40, 3.41it/s] 67%|██████▋ | 248837/371472 [9:15:17<10:10:07, 3.35it/s] 67%|██████▋ | 248838/371472 [9:15:17<9:54:03, 3.44it/s] 67%|██████▋ | 248839/371472 [9:15:17<10:29:02, 3.25it/s] 67%|██████▋ | 248840/371472 [9:15:18<10:42:40, 3.18it/s] {'loss': 2.6992, 'learning_rate': 3.972720808463875e-07, 'epoch': 10.72} + 67%|██████▋ | 248840/371472 [9:15:18<10:42:40, 3.18it/s] 67%|██████▋ | 248841/371472 [9:15:18<11:22:50, 2.99it/s] 67%|██████▋ | 248842/371472 [9:15:18<11:07:51, 3.06it/s] 67%|██████▋ | 248843/371472 [9:15:19<10:41:17, 3.19it/s] 67%|██████▋ | 248844/371472 [9:15:19<10:36:01, 3.21it/s] 67%|██████▋ | 248845/371472 [9:15:19<12:25:34, 2.74it/s] 67%|██████▋ | 248846/371472 [9:15:20<11:23:54, 2.99it/s] 67%|██████▋ | 248847/371472 [9:15:20<10:47:41, 3.16it/s] 67%|██████▋ | 248848/371472 [9:15:20<10:31:00, 3.24it/s] 67%|██████▋ | 248849/371472 [9:15:20<10:22:53, 3.28it/s] 67%|██████▋ | 248850/371472 [9:15:21<10:50:10, 3.14it/s] 67%|██████▋ | 248851/371472 [9:15:21<10:29:04, 3.25it/s] 67%|██████▋ | 248852/371472 [9:15:21<10:29:58, 3.24it/s] 67%|██████▋ | 248853/371472 [9:15:22<9:54:30, 3.44it/s] 67%|██████▋ | 248854/371472 [9:15:22<10:01:34, 3.40it/s] 67%|██████▋ | 248855/371472 [9:15:22<10:04:34, 3.38it/s] 67%|██████▋ | 248856/371472 [9:15:23<10:15:23, 3.32it/s] 67%|██████▋ | 248857/371472 [9:15:23<10:04:25, 3.38it/s] 67%|██████▋ | 248858/371472 [9:15:23<10:11:13, 3.34it/s] 67%|██████▋ | 248859/371472 [9:15:23<9:56:27, 3.43it/s] 67%|██████▋ | 248860/371472 [9:15:24<9:52:21, 3.45it/s] {'loss': 2.5974, 'learning_rate': 3.9722359887090867e-07, 'epoch': 10.72} + 67%|██████▋ | 248860/371472 [9:15:24<9:52:21, 3.45it/s] 67%|██████▋ | 248861/371472 [9:15:24<9:47:24, 3.48it/s] 67%|██████▋ | 248862/371472 [9:15:24<9:58:27, 3.41it/s] 67%|██████▋ | 248863/371472 [9:15:25<10:00:28, 3.40it/s] 67%|██████▋ | 248864/371472 [9:15:25<11:23:15, 2.99it/s] 67%|██████▋ | 248865/371472 [9:15:25<11:05:09, 3.07it/s] 67%|██████▋ | 248866/371472 [9:15:26<11:03:07, 3.08it/s] 67%|██████▋ | 248867/371472 [9:15:26<10:30:56, 3.24it/s] 67%|██████▋ | 248868/371472 [9:15:26<10:53:31, 3.13it/s] 67%|██████▋ | 248869/371472 [9:15:27<10:37:25, 3.21it/s] 67%|██████▋ | 248870/371472 [9:15:27<10:21:35, 3.29it/s] 67%|██████▋ | 248871/371472 [9:15:27<10:29:52, 3.24it/s] 67%|██████▋ | 248872/371472 [9:15:27<10:33:00, 3.23it/s] 67%|██████▋ | 248873/371472 [9:15:28<10:08:33, 3.36it/s] 67%|██████▋ | 248874/371472 [9:15:28<10:08:33, 3.36it/s] 67%|██████▋ | 248875/371472 [9:15:28<10:09:41, 3.35it/s] 67%|██████▋ | 248876/371472 [9:15:29<10:06:34, 3.37it/s] 67%|██████▋ | 248877/371472 [9:15:29<9:59:09, 3.41it/s] 67%|██████▋ | 248878/371472 [9:15:29<10:08:25, 3.36it/s] 67%|██████▋ | 248879/371472 [9:15:30<10:12:22, 3.34it/s] 67%|██████▋ | 248880/371472 [9:15:30<10:06:58, 3.37it/s] {'loss': 2.5828, 'learning_rate': 3.9717511689542974e-07, 'epoch': 10.72} + 67%|██████▋ | 248880/371472 [9:15:30<10:06:58, 3.37it/s] 67%|██████▋ | 248881/371472 [9:15:30<10:07:10, 3.37it/s] 67%|██████▋ | 248882/371472 [9:15:30<10:03:03, 3.39it/s] 67%|██████▋ | 248883/371472 [9:15:31<10:14:19, 3.33it/s] 67%|██████▋ | 248884/371472 [9:15:31<10:02:41, 3.39it/s] 67%|██████▋ | 248885/371472 [9:15:31<10:01:05, 3.40it/s] 67%|██████▋ | 248886/371472 [9:15:32<10:33:54, 3.22it/s] 67%|██████▋ | 248887/371472 [9:15:32<10:12:35, 3.34it/s] 67%|██████▋ | 248888/371472 [9:15:32<10:02:38, 3.39it/s] 67%|██████▋ | 248889/371472 [9:15:33<10:01:17, 3.40it/s] 67%|██████▋ | 248890/371472 [9:15:33<10:06:19, 3.37it/s] 67%|██████▋ | 248891/371472 [9:15:33<10:06:29, 3.37it/s] 67%|██████▋ | 248892/371472 [9:15:33<10:15:01, 3.32it/s] 67%|██████▋ | 248893/371472 [9:15:34<11:13:00, 3.04it/s] 67%|██████▋ | 248894/371472 [9:15:34<10:46:49, 3.16it/s] 67%|██████▋ | 248895/371472 [9:15:34<10:23:47, 3.28it/s] 67%|██████▋ | 248896/371472 [9:15:35<10:17:18, 3.31it/s] 67%|██████▋ | 248897/371472 [9:15:35<10:05:28, 3.37it/s] 67%|██████▋ | 248898/371472 [9:15:35<10:42:54, 3.18it/s] 67%|██████▋ | 248899/371472 [9:15:36<10:35:49, 3.21it/s] 67%|██████▋ | 248900/371472 [9:15:36<10:23:26, 3.28it/s] {'loss': 2.6206, 'learning_rate': 3.9712663491995087e-07, 'epoch': 10.72} + 67%|██████▋ | 248900/371472 [9:15:36<10:23:26, 3.28it/s] 67%|██████▋ | 248901/371472 [9:15:36<10:18:50, 3.30it/s] 67%|██████▋ | 248902/371472 [9:15:36<10:06:41, 3.37it/s] 67%|██████▋ | 248903/371472 [9:15:37<10:07:18, 3.36it/s] 67%|██████▋ | 248904/371472 [9:15:37<10:26:47, 3.26it/s] 67%|██████▋ | 248905/371472 [9:15:37<10:32:27, 3.23it/s] 67%|██████▋ | 248906/371472 [9:15:38<10:28:35, 3.25it/s] 67%|██████▋ | 248907/371472 [9:15:38<11:13:26, 3.03it/s] 67%|██████▋ | 248908/371472 [9:15:38<10:51:23, 3.14it/s] 67%|██████▋ | 248909/371472 [9:15:39<10:38:36, 3.20it/s] 67%|██████▋ | 248910/371472 [9:15:39<10:26:17, 3.26it/s] 67%|██████▋ | 248911/371472 [9:15:39<11:11:24, 3.04it/s] 67%|██████▋ | 248912/371472 [9:15:40<10:37:33, 3.20it/s] 67%|██████▋ | 248913/371472 [9:15:40<10:53:32, 3.13it/s] 67%|██████▋ | 248914/371472 [9:15:40<10:37:50, 3.20it/s] 67%|██████▋ | 248915/371472 [9:15:41<10:26:01, 3.26it/s] 67%|██████▋ | 248916/371472 [9:15:41<10:20:09, 3.29it/s] 67%|██████▋ | 248917/371472 [9:15:41<10:01:55, 3.39it/s] 67%|██████▋ | 248918/371472 [9:15:41<9:54:17, 3.44it/s] 67%|██████▋ | 248919/371472 [9:15:42<9:52:25, 3.45it/s] 67%|██████▋ | 248920/371472 [9:15:42<10:04:49, 3.38it/s] {'loss': 2.5613, 'learning_rate': 3.9707815294447194e-07, 'epoch': 10.72} + 67%|██████▋ | 248920/371472 [9:15:42<10:04:49, 3.38it/s] 67%|██████▋ | 248921/371472 [9:15:42<10:07:44, 3.36it/s] 67%|██████▋ | 248922/371472 [9:15:43<10:14:45, 3.32it/s] 67%|██████▋ | 248923/371472 [9:15:43<10:21:21, 3.29it/s] 67%|██████▋ | 248924/371472 [9:15:43<10:08:57, 3.35it/s] 67%|██████▋ | 248925/371472 [9:15:44<10:20:05, 3.29it/s] 67%|██████▋ | 248926/371472 [9:15:44<10:01:31, 3.40it/s] 67%|██████▋ | 248927/371472 [9:15:44<10:17:58, 3.30it/s] 67%|██████▋ | 248928/371472 [9:15:44<10:36:03, 3.21it/s] 67%|██████▋ | 248929/371472 [9:15:45<10:15:12, 3.32it/s] 67%|██████▋ | 248930/371472 [9:15:45<10:49:16, 3.15it/s] 67%|██████▋ | 248931/371472 [9:15:45<11:28:40, 2.97it/s] 67%|██████▋ | 248932/371472 [9:15:46<11:16:09, 3.02it/s] 67%|██████▋ | 248933/371472 [9:15:46<11:02:16, 3.08it/s] 67%|██████▋ | 248934/371472 [9:15:46<10:34:26, 3.22it/s] 67%|██████▋ | 248935/371472 [9:15:47<10:19:54, 3.29it/s] 67%|██████▋ | 248936/371472 [9:15:47<10:09:47, 3.35it/s] 67%|██████▋ | 248937/371472 [9:15:47<9:59:26, 3.41it/s] 67%|██████▋ | 248938/371472 [9:15:48<10:11:11, 3.34it/s] 67%|██████▋ | 248939/371472 [9:15:48<10:00:04, 3.40it/s] 67%|██████▋ | 248940/371472 [9:15:48<9:55:00, 3.43it/s] {'loss': 2.7067, 'learning_rate': 3.970296709689931e-07, 'epoch': 10.72} + 67%|██████▋ | 248940/371472 [9:15:48<9:55:00, 3.43it/s] 67%|██████▋ | 248941/371472 [9:15:49<10:56:30, 3.11it/s] 67%|██████▋ | 248942/371472 [9:15:49<11:13:36, 3.03it/s] 67%|██████▋ | 248943/371472 [9:15:49<10:41:16, 3.18it/s] 67%|██████▋ | 248944/371472 [9:15:49<10:46:54, 3.16it/s] 67%|██████▋ | 248945/371472 [9:15:50<10:25:14, 3.27it/s] 67%|██████▋ | 248946/371472 [9:15:50<10:58:44, 3.10it/s] 67%|██████▋ | 248947/371472 [9:15:50<10:44:23, 3.17it/s] 67%|██████▋ | 248948/371472 [9:15:51<10:27:03, 3.26it/s] 67%|██████▋ | 248949/371472 [9:15:51<10:29:51, 3.24it/s] 67%|██████▋ | 248950/371472 [9:15:51<10:31:22, 3.23it/s] 67%|██████▋ | 248951/371472 [9:15:52<10:38:15, 3.20it/s] 67%|██████▋ | 248952/371472 [9:15:52<10:26:01, 3.26it/s] 67%|██████▋ | 248953/371472 [9:15:52<10:20:53, 3.29it/s] 67%|██████▋ | 248954/371472 [9:15:53<10:10:54, 3.34it/s] 67%|██████▋ | 248955/371472 [9:15:53<9:50:39, 3.46it/s] 67%|██████▋ | 248956/371472 [9:15:53<9:43:48, 3.50it/s] 67%|██████▋ | 248957/371472 [9:15:53<9:44:31, 3.49it/s] 67%|██████▋ | 248958/371472 [9:15:54<10:22:38, 3.28it/s] 67%|██████▋ | 248959/371472 [9:15:54<10:25:21, 3.27it/s] 67%|██████▋ | 248960/371472 [9:15:54<10:10:03, 3.35it/s] {'loss': 2.6689, 'learning_rate': 3.969811889935142e-07, 'epoch': 10.72} + 67%|██████▋ | 248960/371472 [9:15:54<10:10:03, 3.35it/s] 67%|██████▋ | 248961/371472 [9:15:55<10:09:29, 3.35it/s] 67%|██████▋ | 248962/371472 [9:15:55<10:11:27, 3.34it/s] 67%|██████▋ | 248963/371472 [9:15:55<9:57:26, 3.42it/s] 67%|██████▋ | 248964/371472 [9:15:55<9:51:51, 3.45it/s] 67%|██████▋ | 248965/371472 [9:15:56<9:47:51, 3.47it/s] 67%|██████▋ | 248966/371472 [9:15:56<9:48:55, 3.47it/s] 67%|██████▋ | 248967/371472 [9:15:56<9:43:06, 3.50it/s] 67%|██████▋ | 248968/371472 [9:15:57<9:58:50, 3.41it/s] 67%|██████▋ | 248969/371472 [9:15:57<10:21:51, 3.28it/s] 67%|██████▋ | 248970/371472 [9:15:57<10:24:29, 3.27it/s] 67%|██████▋ | 248971/371472 [9:15:58<10:25:07, 3.27it/s] 67%|██████▋ | 248972/371472 [9:15:58<10:36:20, 3.21it/s] 67%|██████▋ | 248973/371472 [9:15:58<10:18:50, 3.30it/s] 67%|██████▋ | 248974/371472 [9:15:58<10:19:42, 3.29it/s] 67%|██████▋ | 248975/371472 [9:15:59<10:45:57, 3.16it/s] 67%|██████▋ | 248976/371472 [9:15:59<10:49:17, 3.14it/s] 67%|██████▋ | 248977/371472 [9:16:00<11:18:27, 3.01it/s] 67%|██████▋ | 248978/371472 [9:16:00<10:47:01, 3.16it/s] 67%|██████▋ | 248979/371472 [9:16:00<10:30:15, 3.24it/s] 67%|██████▋ | 248980/371472 [9:16:00<10:16:07, 3.31it/s] {'loss': 2.6738, 'learning_rate': 3.969327070180353e-07, 'epoch': 10.72} + 67%|██████▋ | 248980/371472 [9:16:00<10:16:07, 3.31it/s] 67%|██████▋ | 248981/371472 [9:16:01<10:10:43, 3.34it/s] 67%|██████▋ | 248982/371472 [9:16:01<10:31:45, 3.23it/s] 67%|██████▋ | 248983/371472 [9:16:01<10:18:03, 3.30it/s] 67%|██████▋ | 248984/371472 [9:16:02<10:19:30, 3.30it/s] 67%|██████▋ | 248985/371472 [9:16:02<10:02:31, 3.39it/s] 67%|██████▋ | 248986/371472 [9:16:02<10:07:03, 3.36it/s] 67%|██████▋ | 248987/371472 [9:16:02<10:01:48, 3.39it/s] 67%|██████▋ | 248988/371472 [9:16:03<10:00:39, 3.40it/s] 67%|██████▋ | 248989/371472 [9:16:03<10:24:27, 3.27it/s] 67%|██████▋ | 248990/371472 [9:16:03<10:19:17, 3.30it/s] 67%|██████▋ | 248991/371472 [9:16:04<10:12:43, 3.33it/s] 67%|██████▋ | 248992/371472 [9:16:04<9:44:21, 3.49it/s] 67%|██████▋ | 248993/371472 [9:16:04<9:52:53, 3.44it/s] 67%|██████▋ | 248994/371472 [9:16:05<9:54:46, 3.43it/s] 67%|██████▋ | 248995/371472 [9:16:05<9:46:40, 3.48it/s] 67%|██████▋ | 248996/371472 [9:16:05<9:48:47, 3.47it/s] 67%|██████▋ | 248997/371472 [9:16:05<9:38:18, 3.53it/s] 67%|██████▋ | 248998/371472 [9:16:06<10:23:40, 3.27it/s] 67%|██████▋ | 248999/371472 [9:16:06<10:35:28, 3.21it/s] 67%|██████▋ | 249000/371472 [9:16:06<11:26:38, 2.97it/s] {'loss': 2.6238, 'learning_rate': 3.968842250425564e-07, 'epoch': 10.72} + 67%|██████▋ | 249000/371472 [9:16:06<11:26:38, 2.97it/s] 67%|██████▋ | 249001/371472 [9:16:07<10:59:54, 3.09it/s] 67%|██████▋ | 249002/371472 [9:16:07<10:47:03, 3.15it/s] 67%|██████▋ | 249003/371472 [9:16:07<10:46:02, 3.16it/s] 67%|██████▋ | 249004/371472 [9:16:08<10:36:13, 3.21it/s] 67%|██████▋ | 249005/371472 [9:16:08<10:11:17, 3.34it/s] 67%|██████▋ | 249006/371472 [9:16:08<10:04:22, 3.38it/s] 67%|██████▋ | 249007/371472 [9:16:09<10:08:29, 3.35it/s] 67%|██████▋ | 249008/371472 [9:16:09<9:47:38, 3.47it/s] 67%|██████▋ | 249009/371472 [9:16:09<9:47:17, 3.48it/s] 67%|██████▋ | 249010/371472 [9:16:09<9:42:41, 3.50it/s] 67%|██████▋ | 249011/371472 [9:16:10<10:01:20, 3.39it/s] 67%|██████▋ | 249012/371472 [9:16:10<10:20:17, 3.29it/s] 67%|██████▋ | 249013/371472 [9:16:10<10:19:13, 3.30it/s] 67%|██████▋ | 249014/371472 [9:16:11<10:26:17, 3.26it/s] 67%|██████▋ | 249015/371472 [9:16:11<10:43:15, 3.17it/s] 67%|██████▋ | 249016/371472 [9:16:11<10:52:12, 3.13it/s] 67%|██████▋ | 249017/371472 [9:16:12<10:27:05, 3.25it/s] 67%|██████▋ | 249018/371472 [9:16:12<10:41:35, 3.18it/s] 67%|██████▋ | 249019/371472 [9:16:12<10:13:55, 3.32it/s] 67%|██████▋ | 249020/371472 [9:16:12<10:09:00, 3.35it/s] {'loss': 2.9424, 'learning_rate': 3.968357430670775e-07, 'epoch': 10.73} + 67%|██████▋ | 249020/371472 [9:16:12<10:09:00, 3.35it/s] 67%|██████▋ | 249021/371472 [9:16:13<9:54:04, 3.44it/s] 67%|██████▋ | 249022/371472 [9:16:13<10:17:41, 3.30it/s] 67%|██████▋ | 249023/371472 [9:16:13<10:08:05, 3.36it/s] 67%|██████▋ | 249024/371472 [9:16:14<9:54:11, 3.43it/s] 67%|██████▋ | 249025/371472 [9:16:14<10:08:28, 3.35it/s] 67%|██████▋ | 249026/371472 [9:16:14<10:03:10, 3.38it/s] 67%|██████▋ | 249027/371472 [9:16:14<9:56:18, 3.42it/s] 67%|██████▋ | 249028/371472 [9:16:15<10:01:42, 3.39it/s] 67%|██████▋ | 249029/371472 [9:16:15<10:05:32, 3.37it/s] 67%|██████▋ | 249030/371472 [9:16:15<10:21:31, 3.28it/s] 67%|██████▋ | 249031/371472 [9:16:16<10:17:01, 3.31it/s] 67%|██████▋ | 249032/371472 [9:16:16<10:19:45, 3.29it/s] 67%|██████▋ | 249033/371472 [9:16:16<11:12:37, 3.03it/s] 67%|██████▋ | 249034/371472 [9:16:17<10:53:49, 3.12it/s] 67%|██████▋ | 249035/371472 [9:16:17<10:34:52, 3.21it/s] 67%|██████▋ | 249036/371472 [9:16:17<10:46:41, 3.16it/s] 67%|██████▋ | 249037/371472 [9:16:18<11:06:53, 3.06it/s] 67%|██████▋ | 249038/371472 [9:16:18<10:22:13, 3.28it/s] 67%|██████▋ | 249039/371472 [9:16:18<10:17:09, 3.31it/s] 67%|██████▋ | 249040/371472 [9:16:19<10:09:44, 3.35it/s] {'loss': 2.8769, 'learning_rate': 3.967872610915986e-07, 'epoch': 10.73} + 67%|██████▋ | 249040/371472 [9:16:19<10:09:44, 3.35it/s] 67%|██████▋ | 249041/371472 [9:16:19<10:01:17, 3.39it/s] 67%|██████▋ | 249042/371472 [9:16:19<10:02:40, 3.39it/s] 67%|██████▋ | 249043/371472 [9:16:19<9:46:15, 3.48it/s] 67%|██████▋ | 249044/371472 [9:16:20<9:56:58, 3.42it/s] 67%|██████▋ | 249045/371472 [9:16:20<10:33:07, 3.22it/s] 67%|██████▋ | 249046/371472 [9:16:20<10:55:00, 3.12it/s] 67%|██████▋ | 249047/371472 [9:16:21<10:39:55, 3.19it/s] 67%|██████▋ | 249048/371472 [9:16:21<10:39:30, 3.19it/s] 67%|██████▋ | 249049/371472 [9:16:21<10:16:09, 3.31it/s] 67%|██████▋ | 249050/371472 [9:16:22<10:01:17, 3.39it/s] 67%|██████▋ | 249051/371472 [9:16:22<9:59:46, 3.40it/s] 67%|██████▋ | 249052/371472 [9:16:22<9:56:03, 3.42it/s] 67%|██████▋ | 249053/371472 [9:16:22<9:50:25, 3.46it/s] 67%|██████▋ | 249054/371472 [9:16:23<10:45:52, 3.16it/s] 67%|██████▋ | 249055/371472 [9:16:23<10:25:39, 3.26it/s] 67%|██████▋ | 249056/371472 [9:16:23<10:26:16, 3.26it/s] 67%|██████▋ | 249057/371472 [9:16:24<10:18:31, 3.30it/s] 67%|██████▋ | 249058/371472 [9:16:24<9:57:54, 3.41it/s] 67%|██████▋ | 249059/371472 [9:16:24<10:04:07, 3.38it/s] 67%|██████▋ | 249060/371472 [9:16:25<9:54:46, 3.43it/s] {'loss': 2.7418, 'learning_rate': 3.9673877911611976e-07, 'epoch': 10.73} + 67%|██████▋ | 249060/371472 [9:16:25<9:54:46, 3.43it/s] 67%|██████▋ | 249061/371472 [9:16:25<9:48:09, 3.47it/s] 67%|██████▋ | 249062/371472 [9:16:25<9:49:56, 3.46it/s] 67%|██████▋ | 249063/371472 [9:16:25<9:36:26, 3.54it/s] 67%|██████▋ | 249064/371472 [9:16:26<9:44:40, 3.49it/s] 67%|██████▋ | 249065/371472 [9:16:26<9:44:18, 3.49it/s] 67%|██████▋ | 249066/371472 [9:16:26<9:56:22, 3.42it/s] 67%|██████▋ | 249067/371472 [9:16:27<10:07:35, 3.36it/s] 67%|██████▋ | 249068/371472 [9:16:27<9:45:42, 3.48it/s] 67%|██████▋ | 249069/371472 [9:16:27<9:50:52, 3.45it/s] 67%|██████▋ | 249070/371472 [9:16:27<9:58:56, 3.41it/s] 67%|██████▋ | 249071/371472 [9:16:28<9:58:58, 3.41it/s] 67%|██████▋ | 249072/371472 [9:16:28<9:56:02, 3.42it/s] 67%|██████▋ | 249073/371472 [9:16:28<9:45:47, 3.48it/s] 67%|██████▋ | 249074/371472 [9:16:29<9:45:39, 3.48it/s] 67%|██████▋ | 249075/371472 [9:16:29<9:38:35, 3.53it/s] 67%|██████▋ | 249076/371472 [9:16:29<9:51:07, 3.45it/s] 67%|██████▋ | 249077/371472 [9:16:29<10:35:23, 3.21it/s] 67%|██████▋ | 249078/371472 [9:16:30<10:13:21, 3.33it/s] 67%|██████▋ | 249079/371472 [9:16:30<10:20:06, 3.29it/s] 67%|██████▋ | 249080/371472 [9:16:30<10:24:19, 3.27it/s] {'loss': 2.6448, 'learning_rate': 3.9669029714064083e-07, 'epoch': 10.73} + 67%|██████▋ | 249080/371472 [9:16:30<10:24:19, 3.27it/s] 67%|██████▋ | 249081/371472 [9:16:31<10:06:59, 3.36it/s] 67%|██████▋ | 249082/371472 [9:16:31<10:02:48, 3.38it/s] 67%|██████▋ | 249083/371472 [9:16:31<10:40:02, 3.19it/s] 67%|██████▋ | 249084/371472 [9:16:32<10:17:46, 3.30it/s] 67%|██████▋ | 249085/371472 [9:16:32<10:04:20, 3.38it/s] 67%|██████▋ | 249086/371472 [9:16:32<9:50:27, 3.45it/s] 67%|██████▋ | 249087/371472 [9:16:32<9:44:00, 3.49it/s] 67%|██████▋ | 249088/371472 [9:16:33<9:30:37, 3.57it/s] 67%|██████▋ | 249089/371472 [9:16:33<10:17:14, 3.30it/s] 67%|██████▋ | 249090/371472 [9:16:33<10:30:23, 3.24it/s] 67%|██████▋ | 249091/371472 [9:16:34<10:56:53, 3.11it/s] 67%|██████▋ | 249092/371472 [9:16:34<10:43:26, 3.17it/s] 67%|██████▋ | 249093/371472 [9:16:34<11:07:47, 3.05it/s] 67%|██████▋ | 249094/371472 [9:16:35<11:09:23, 3.05it/s] 67%|██████▋ | 249095/371472 [9:16:35<10:35:28, 3.21it/s] 67%|██████▋ | 249096/371472 [9:16:35<10:26:49, 3.25it/s] 67%|██████▋ | 249097/371472 [9:16:36<10:18:41, 3.30it/s] 67%|██████▋ | 249098/371472 [9:16:36<9:50:44, 3.45it/s] 67%|██████▋ | 249099/371472 [9:16:36<10:04:56, 3.37it/s] 67%|██████▋ | 249100/371472 [9:16:36<9:48:51, 3.46it/s] {'loss': 2.7918, 'learning_rate': 3.9664181516516185e-07, 'epoch': 10.73} + 67%|██████▋ | 249100/371472 [9:16:36<9:48:51, 3.46it/s] 67%|██████▋ | 249101/371472 [9:16:37<10:12:55, 3.33it/s] 67%|██████▋ | 249102/371472 [9:16:37<10:08:56, 3.35it/s] 67%|██████▋ | 249103/371472 [9:16:37<10:46:41, 3.15it/s] 67%|██████▋ | 249104/371472 [9:16:38<10:31:13, 3.23it/s] 67%|██████▋ | 249105/371472 [9:16:38<10:19:28, 3.29it/s] 67%|██████▋ | 249106/371472 [9:16:38<10:03:16, 3.38it/s] 67%|██████▋ | 249107/371472 [9:16:39<10:00:17, 3.40it/s] 67%|██████▋ | 249108/371472 [9:16:39<10:04:06, 3.38it/s] 67%|██████▋ | 249109/371472 [9:16:39<10:31:31, 3.23it/s] 67%|██████▋ | 249110/371472 [9:16:40<10:56:18, 3.11it/s] 67%|██████▋ | 249111/371472 [9:16:40<10:18:52, 3.30it/s] 67%|██████▋ | 249112/371472 [9:16:40<10:07:54, 3.35it/s] 67%|██████▋ | 249113/371472 [9:16:40<10:16:51, 3.31it/s] 67%|██████▋ | 249114/371472 [9:16:41<10:15:36, 3.31it/s] 67%|██████▋ | 249115/371472 [9:16:41<10:35:14, 3.21it/s] 67%|██████▋ | 249116/371472 [9:16:41<10:17:20, 3.30it/s] 67%|██████▋ | 249117/371472 [9:16:42<10:14:11, 3.32it/s] 67%|██████▋ | 249118/371472 [9:16:42<10:36:07, 3.21it/s] 67%|██████▋ | 249119/371472 [9:16:42<10:16:21, 3.31it/s] 67%|██████▋ | 249120/371472 [9:16:43<10:08:18, 3.35it/s] {'loss': 2.6833, 'learning_rate': 3.96593333189683e-07, 'epoch': 10.73} + 67%|██████▋ | 249120/371472 [9:16:43<10:08:18, 3.35it/s] 67%|██████▋ | 249121/371472 [9:16:43<10:02:07, 3.39it/s] 67%|██████▋ | 249122/371472 [9:16:43<10:33:30, 3.22it/s] 67%|██████▋ | 249123/371472 [9:16:43<10:14:03, 3.32it/s] 67%|██████▋ | 249124/371472 [9:16:44<10:19:10, 3.29it/s] 67%|██████▋ | 249125/371472 [9:16:44<11:25:49, 2.97it/s] 67%|██████▋ | 249126/371472 [9:16:44<11:19:25, 3.00it/s] 67%|██████▋ | 249127/371472 [9:16:45<10:41:29, 3.18it/s] 67%|██████▋ | 249128/371472 [9:16:45<10:34:43, 3.21it/s] 67%|██████▋ | 249129/371472 [9:16:45<10:25:09, 3.26it/s] 67%|██████▋ | 249130/371472 [9:16:46<10:03:21, 3.38it/s] 67%|██████▋ | 249131/371472 [9:16:46<10:09:07, 3.35it/s] 67%|██████▋ | 249132/371472 [9:16:46<10:49:46, 3.14it/s] 67%|██████▋ | 249133/371472 [9:16:47<10:34:16, 3.21it/s] 67%|██████▋ | 249134/371472 [9:16:47<10:20:35, 3.29it/s] 67%|██████▋ | 249135/371472 [9:16:47<10:20:30, 3.29it/s] 67%|██████▋ | 249136/371472 [9:16:48<10:33:40, 3.22it/s] 67%|██████▋ | 249137/371472 [9:16:48<10:37:19, 3.20it/s] 67%|██████▋ | 249138/371472 [9:16:48<10:28:40, 3.24it/s] 67%|██████▋ | 249139/371472 [9:16:48<10:26:53, 3.25it/s] 67%|██████▋ | 249140/371472 [9:16:49<10:59:55, 3.09it/s] {'loss': 2.7579, 'learning_rate': 3.965448512142041e-07, 'epoch': 10.73} + 67%|██████▋ | 249140/371472 [9:16:49<10:59:55, 3.09it/s] 67%|██████▋ | 249141/371472 [9:16:49<10:54:22, 3.12it/s] 67%|██████▋ | 249142/371472 [9:16:49<10:44:58, 3.16it/s] 67%|██████▋ | 249143/371472 [9:16:50<10:31:21, 3.23it/s] 67%|██████▋ | 249144/371472 [9:16:50<10:22:48, 3.27it/s] 67%|██████▋ | 249145/371472 [9:16:50<10:24:58, 3.26it/s] 67%|██████▋ | 249146/371472 [9:16:51<10:33:18, 3.22it/s] 67%|██████▋ | 249147/371472 [9:16:51<11:01:15, 3.08it/s] 67%|██████▋ | 249148/371472 [9:16:51<10:34:12, 3.21it/s] 67%|██████▋ | 249149/371472 [9:16:52<10:26:21, 3.25it/s] 67%|██████▋ | 249150/371472 [9:16:52<10:44:09, 3.16it/s] 67%|██████▋ | 249151/371472 [9:16:52<10:18:56, 3.29it/s] 67%|██████▋ | 249152/371472 [9:16:52<10:16:43, 3.31it/s] 67%|██████▋ | 249153/371472 [9:16:53<10:22:25, 3.28it/s] 67%|██████▋ | 249154/371472 [9:16:53<10:02:14, 3.39it/s] 67%|██████▋ | 249155/371472 [9:16:53<10:10:50, 3.34it/s] 67%|██████▋ | 249156/371472 [9:16:54<10:07:11, 3.36it/s] 67%|██████▋ | 249157/371472 [9:16:54<11:36:57, 2.92it/s] 67%|██████▋ | 249158/371472 [9:16:54<10:51:28, 3.13it/s] 67%|██████▋ | 249159/371472 [9:16:55<10:31:42, 3.23it/s] 67%|██████▋ | 249160/371472 [9:16:55<10:38:10, 3.19it/s] {'loss': 2.6477, 'learning_rate': 3.964963692387252e-07, 'epoch': 10.73} + 67%|██████▋ | 249160/371472 [9:16:55<10:38:10, 3.19it/s] 67%|██████▋ | 249161/371472 [9:16:55<10:36:08, 3.20it/s] 67%|██████▋ | 249162/371472 [9:16:56<10:14:34, 3.32it/s] 67%|██████▋ | 249163/371472 [9:16:56<9:57:09, 3.41it/s] 67%|██████▋ | 249164/371472 [9:16:56<9:54:53, 3.43it/s] 67%|██████▋ | 249165/371472 [9:16:56<9:51:22, 3.45it/s] 67%|██████▋ | 249166/371472 [9:16:57<9:45:02, 3.48it/s] 67%|██████▋ | 249167/371472 [9:16:57<9:55:56, 3.42it/s] 67%|██████▋ | 249168/371472 [9:16:57<10:22:51, 3.27it/s] 67%|██████▋ | 249169/371472 [9:16:58<10:30:22, 3.23it/s] 67%|██████▋ | 249170/371472 [9:16:58<10:21:29, 3.28it/s] 67%|██████▋ | 249171/371472 [9:16:58<10:17:14, 3.30it/s] 67%|██████▋ | 249172/371472 [9:16:59<10:21:32, 3.28it/s] 67%|██████▋ | 249173/371472 [9:16:59<10:19:36, 3.29it/s] 67%|██████▋ | 249174/371472 [9:16:59<10:10:35, 3.34it/s] 67%|██████▋ | 249175/371472 [9:16:59<10:09:00, 3.35it/s] 67%|██████▋ | 249176/371472 [9:17:00<10:03:16, 3.38it/s] 67%|██████▋ | 249177/371472 [9:17:00<10:01:28, 3.39it/s] 67%|██████▋ | 249178/371472 [9:17:00<11:01:16, 3.08it/s] 67%|██████▋ | 249179/371472 [9:17:01<11:26:58, 2.97it/s] 67%|██████▋ | 249180/371472 [9:17:01<11:00:28, 3.09it/s] {'loss': 2.8299, 'learning_rate': 3.964478872632463e-07, 'epoch': 10.73} + 67%|██████▋ | 249180/371472 [9:17:01<11:00:28, 3.09it/s] 67%|██████▋ | 249181/371472 [9:17:01<10:42:46, 3.17it/s] 67%|██████▋ | 249182/371472 [9:17:02<10:14:07, 3.32it/s] 67%|██████▋ | 249183/371472 [9:17:02<10:25:32, 3.26it/s] 67%|██████▋ | 249184/371472 [9:17:02<10:13:25, 3.32it/s] 67%|██████▋ | 249185/371472 [9:17:03<10:38:34, 3.19it/s] 67%|██████▋ | 249186/371472 [9:17:03<10:41:08, 3.18it/s] 67%|██████▋ | 249187/371472 [9:17:03<10:23:42, 3.27it/s] 67%|██████▋ | 249188/371472 [9:17:03<10:17:14, 3.30it/s] 67%|██████▋ | 249189/371472 [9:17:04<10:44:00, 3.16it/s] 67%|██████▋ | 249190/371472 [9:17:04<10:28:25, 3.24it/s] 67%|██████▋ | 249191/371472 [9:17:04<10:18:31, 3.29it/s] 67%|██████▋ | 249192/371472 [9:17:05<9:48:04, 3.47it/s] 67%|██████▋ | 249193/371472 [9:17:05<9:53:22, 3.43it/s] 67%|██████▋ | 249194/371472 [9:17:05<9:34:31, 3.55it/s] 67%|██████▋ | 249195/371472 [9:17:06<9:43:52, 3.49it/s] 67%|██████▋ | 249196/371472 [9:17:06<9:37:54, 3.53it/s] 67%|██████▋ | 249197/371472 [9:17:06<9:31:29, 3.57it/s] 67%|██████▋ | 249198/371472 [9:17:06<10:22:32, 3.27it/s] 67%|██████▋ | 249199/371472 [9:17:07<9:52:04, 3.44it/s] 67%|██████▋ | 249200/371472 [9:17:07<10:16:19, 3.31it/s] {'loss': 2.8103, 'learning_rate': 3.963994052877674e-07, 'epoch': 10.73} + 67%|██████▋ | 249200/371472 [9:17:07<10:16:19, 3.31it/s] 67%|██████▋ | 249201/371472 [9:17:07<11:03:39, 3.07it/s] 67%|██████▋ | 249202/371472 [9:17:08<10:32:07, 3.22it/s] 67%|██████▋ | 249203/371472 [9:17:08<10:11:52, 3.33it/s] 67%|██████▋ | 249204/371472 [9:17:08<9:54:48, 3.43it/s] 67%|██████▋ | 249205/371472 [9:17:09<10:15:30, 3.31it/s] 67%|██████▋ | 249206/371472 [9:17:09<9:57:36, 3.41it/s] 67%|██████▋ | 249207/371472 [9:17:09<10:03:40, 3.38it/s] 67%|██████▋ | 249208/371472 [9:17:09<10:19:09, 3.29it/s] 67%|██████▋ | 249209/371472 [9:17:10<10:34:41, 3.21it/s] 67%|██████▋ | 249210/371472 [9:17:10<10:33:34, 3.22it/s] 67%|██████▋ | 249211/371472 [9:17:10<10:18:13, 3.30it/s] 67%|██████▋ | 249212/371472 [9:17:11<10:14:01, 3.32it/s] 67%|██████▋ | 249213/371472 [9:17:11<9:55:03, 3.42it/s] 67%|██████▋ | 249214/371472 [9:17:11<10:37:52, 3.19it/s] 67%|██████▋ | 249215/371472 [9:17:12<10:31:29, 3.23it/s] 67%|██████▋ | 249216/371472 [9:17:12<10:36:48, 3.20it/s] 67%|██████▋ | 249217/371472 [9:17:12<11:23:46, 2.98it/s] 67%|██████▋ | 249218/371472 [9:17:13<10:55:27, 3.11it/s] 67%|██████▋ | 249219/371472 [9:17:13<10:46:51, 3.15it/s] 67%|██████▋ | 249220/371472 [9:17:13<10:27:19, 3.25it/s] {'loss': 2.8128, 'learning_rate': 3.963509233122885e-07, 'epoch': 10.73} + 67%|██████▋ | 249220/371472 [9:17:13<10:27:19, 3.25it/s] 67%|██████▋ | 249221/371472 [9:17:13<10:20:14, 3.29it/s] 67%|██████▋ | 249222/371472 [9:17:14<10:08:41, 3.35it/s] 67%|██████▋ | 249223/371472 [9:17:14<10:03:24, 3.38it/s] 67%|██████▋ | 249224/371472 [9:17:14<9:59:37, 3.40it/s] 67%|██████▋ | 249225/371472 [9:17:15<10:04:31, 3.37it/s] 67%|██████▋ | 249226/371472 [9:17:15<9:42:25, 3.50it/s] 67%|██████▋ | 249227/371472 [9:17:15<9:43:18, 3.49it/s] 67%|██████▋ | 249228/371472 [9:17:16<9:54:39, 3.43it/s] 67%|██████▋ | 249229/371472 [9:17:16<9:35:01, 3.54it/s] 67%|██████▋ | 249230/371472 [9:17:16<11:17:16, 3.01it/s] 67%|██████▋ | 249231/371472 [9:17:17<10:57:37, 3.10it/s] 67%|██████▋ | 249232/371472 [9:17:17<10:38:30, 3.19it/s] 67%|██████▋ | 249233/371472 [9:17:17<10:03:36, 3.38it/s] 67%|██████▋ | 249234/371472 [9:17:17<10:25:19, 3.26it/s] 67%|██████▋ | 249235/371472 [9:17:18<10:13:55, 3.32it/s] 67%|██████▋ | 249236/371472 [9:17:18<10:39:46, 3.18it/s] 67%|██████▋ | 249237/371472 [9:17:18<10:43:10, 3.17it/s] 67%|██████▋ | 249238/371472 [9:17:19<10:46:29, 3.15it/s] 67%|██████▋ | 249239/371472 [9:17:19<10:23:12, 3.27it/s] 67%|██████▋ | 249240/371472 [9:17:19<10:26:27, 3.25it/s] {'loss': 2.5805, 'learning_rate': 3.9630244133680966e-07, 'epoch': 10.74} + 67%|██████▋ | 249240/371472 [9:17:19<10:26:27, 3.25it/s] 67%|██████▋ | 249241/371472 [9:17:20<10:26:20, 3.25it/s] 67%|██████▋ | 249242/371472 [9:17:20<10:53:06, 3.12it/s] 67%|██████▋ | 249243/371472 [9:17:20<10:20:49, 3.28it/s] 67%|██████▋ | 249244/371472 [9:17:21<10:31:05, 3.23it/s] 67%|██████▋ | 249245/371472 [9:17:21<10:13:34, 3.32it/s] 67%|██████▋ | 249246/371472 [9:17:21<10:17:29, 3.30it/s] 67%|██████▋ | 249247/371472 [9:17:21<10:18:06, 3.30it/s] 67%|██████▋ | 249248/371472 [9:17:22<10:06:23, 3.36it/s] 67%|██████▋ | 249249/371472 [9:17:22<10:03:57, 3.37it/s] 67%|██████▋ | 249250/371472 [9:17:22<9:57:26, 3.41it/s] 67%|██████▋ | 249251/371472 [9:17:23<10:00:19, 3.39it/s] 67%|██████▋ | 249252/371472 [9:17:23<9:50:34, 3.45it/s] 67%|██████▋ | 249253/371472 [9:17:23<9:40:12, 3.51it/s] 67%|██████▋ | 249254/371472 [9:17:23<9:27:41, 3.59it/s] 67%|██████▋ | 249255/371472 [9:17:24<9:20:26, 3.63it/s] 67%|██████▋ | 249256/371472 [9:17:24<10:25:29, 3.26it/s] 67%|██████▋ | 249257/371472 [9:17:24<10:17:56, 3.30it/s] 67%|██████▋ | 249258/371472 [9:17:25<11:25:08, 2.97it/s] 67%|██████▋ | 249259/371472 [9:17:25<11:02:36, 3.07it/s] 67%|██████▋ | 249260/371472 [9:17:25<10:45:22, 3.16it/s] {'loss': 2.7847, 'learning_rate': 3.9625395936133074e-07, 'epoch': 10.74} + 67%|██████▋ | 249260/371472 [9:17:25<10:45:22, 3.16it/s] 67%|██████▋ | 249261/371472 [9:17:26<10:31:09, 3.23it/s] 67%|██████▋ | 249262/371472 [9:17:26<10:14:04, 3.32it/s] 67%|██████▋ | 249263/371472 [9:17:26<10:14:00, 3.32it/s] 67%|██████▋ | 249264/371472 [9:17:26<9:53:01, 3.43it/s] 67%|██████▋ | 249265/371472 [9:17:27<9:53:11, 3.43it/s] 67%|██████▋ | 249266/371472 [9:17:27<9:45:09, 3.48it/s] 67%|██████▋ | 249267/371472 [9:17:27<9:37:41, 3.53it/s] 67%|██████▋ | 249268/371472 [9:17:28<10:00:24, 3.39it/s] 67%|██████▋ | 249269/371472 [9:17:28<9:44:13, 3.49it/s] 67%|██████▋ | 249270/371472 [9:17:28<9:54:41, 3.42it/s] 67%|██████▋ | 249271/371472 [9:17:29<10:05:58, 3.36it/s] 67%|██████▋ | 249272/371472 [9:17:29<9:55:15, 3.42it/s] 67%|██████▋ | 249273/371472 [9:17:29<9:53:11, 3.43it/s] 67%|██████▋ | 249274/371472 [9:17:29<9:50:58, 3.45it/s] 67%|██████▋ | 249275/371472 [9:17:30<10:07:19, 3.35it/s] 67%|██████▋ | 249276/371472 [9:17:30<10:17:02, 3.30it/s] 67%|██████▋ | 249277/371472 [9:17:30<10:11:41, 3.33it/s] 67%|██████▋ | 249278/371472 [9:17:31<10:13:20, 3.32it/s] 67%|██████▋ | 249279/371472 [9:17:31<10:03:19, 3.38it/s] 67%|██████▋ | 249280/371472 [9:17:31<10:08:52, 3.34it/s] {'loss': 2.6476, 'learning_rate': 3.9620547738585186e-07, 'epoch': 10.74} + 67%|██████▋ | 249280/371472 [9:17:31<10:08:52, 3.34it/s] 67%|██████▋ | 249281/371472 [9:17:31<10:01:59, 3.38it/s] 67%|██████▋ | 249282/371472 [9:17:32<9:56:00, 3.42it/s] 67%|██████▋ | 249283/371472 [9:17:32<9:43:04, 3.49it/s] 67%|██████▋ | 249284/371472 [9:17:32<9:34:03, 3.55it/s] 67%|██████▋ | 249285/371472 [9:17:33<9:29:09, 3.58it/s] 67%|██████▋ | 249286/371472 [9:17:33<9:36:08, 3.53it/s] 67%|██████▋ | 249287/371472 [9:17:33<9:40:51, 3.51it/s] 67%|██████▋ | 249288/371472 [9:17:34<10:11:22, 3.33it/s] 67%|██████▋ | 249289/371472 [9:17:34<10:02:56, 3.38it/s] 67%|██████▋ | 249290/371472 [9:17:34<10:52:05, 3.12it/s] 67%|██████▋ | 249291/371472 [9:17:34<10:31:31, 3.22it/s] 67%|██████▋ | 249292/371472 [9:17:35<10:14:28, 3.31it/s] 67%|██████▋ | 249293/371472 [9:17:35<10:03:53, 3.37it/s] 67%|██████▋ | 249294/371472 [9:17:35<10:08:24, 3.35it/s] 67%|██████▋ | 249295/371472 [9:17:36<9:48:57, 3.46it/s] 67%|██████▋ | 249296/371472 [9:17:36<9:49:20, 3.46it/s] 67%|██████▋ | 249297/371472 [9:17:36<9:38:56, 3.52it/s] 67%|██████▋ | 249298/371472 [9:17:36<9:56:09, 3.42it/s] 67%|██████▋ | 249299/371472 [9:17:37<10:01:25, 3.39it/s] 67%|██████▋ | 249300/371472 [9:17:37<9:45:29, 3.48it/s] {'loss': 2.559, 'learning_rate': 3.9615699541037293e-07, 'epoch': 10.74} + 67%|██████▋ | 249300/371472 [9:17:37<9:45:29, 3.48it/s] 67%|██████▋ | 249301/371472 [9:17:37<10:32:22, 3.22it/s] 67%|██████▋ | 249302/371472 [9:17:38<10:10:58, 3.33it/s] 67%|██████▋ | 249303/371472 [9:17:38<10:21:40, 3.28it/s] 67%|██████▋ | 249304/371472 [9:17:38<10:13:23, 3.32it/s] 67%|██████▋ | 249305/371472 [9:17:39<10:03:49, 3.37it/s] 67%|██████▋ | 249306/371472 [9:17:39<10:27:00, 3.25it/s] 67%|██████▋ | 249307/371472 [9:17:39<10:32:20, 3.22it/s] 67%|██████▋ | 249308/371472 [9:17:40<10:37:46, 3.19it/s] 67%|██████▋ | 249309/371472 [9:17:40<10:12:47, 3.32it/s] 67%|██████▋ | 249310/371472 [9:17:40<9:57:33, 3.41it/s] 67%|██████▋ | 249311/371472 [9:17:40<9:54:38, 3.42it/s] 67%|██████▋ | 249312/371472 [9:17:41<9:46:09, 3.47it/s] 67%|██████▋ | 249313/371472 [9:17:41<9:27:21, 3.59it/s] 67%|██████▋ | 249314/371472 [9:17:41<9:18:11, 3.65it/s] 67%|██████▋ | 249315/371472 [9:17:42<9:39:30, 3.51it/s] 67%|██████▋ | 249316/371472 [9:17:42<9:45:07, 3.48it/s] 67%|██████▋ | 249317/371472 [9:17:42<9:55:50, 3.42it/s] 67%|██████▋ | 249318/371472 [9:17:42<9:45:59, 3.47it/s] 67%|██████▋ | 249319/371472 [9:17:43<10:14:40, 3.31it/s] 67%|██████▋ | 249320/371472 [9:17:43<10:09:08, 3.34it/s] {'loss': 2.6233, 'learning_rate': 3.961085134348941e-07, 'epoch': 10.74} + 67%|██████▋ | 249320/371472 [9:17:43<10:09:08, 3.34it/s] 67%|██████▋ | 249321/371472 [9:17:43<9:57:35, 3.41it/s] 67%|██████▋ | 249322/371472 [9:17:44<10:01:38, 3.38it/s] 67%|██████▋ | 249323/371472 [9:17:44<9:55:50, 3.42it/s] 67%|██████▋ | 249324/371472 [9:17:44<9:49:28, 3.45it/s] 67%|██████▋ | 249325/371472 [9:17:44<9:53:48, 3.43it/s] 67%|██████▋ | 249326/371472 [9:17:45<10:35:29, 3.20it/s] 67%|██████▋ | 249327/371472 [9:17:45<10:05:12, 3.36it/s] 67%|██████▋ | 249328/371472 [9:17:45<10:06:42, 3.36it/s] 67%|██████▋ | 249329/371472 [9:17:46<10:03:22, 3.37it/s] 67%|██████▋ | 249330/371472 [9:17:46<9:57:12, 3.41it/s] 67%|██████▋ | 249331/371472 [9:17:46<9:41:15, 3.50it/s] 67%|██████▋ | 249332/371472 [9:17:47<9:48:19, 3.46it/s] 67%|██████▋ | 249333/371472 [9:17:47<9:46:19, 3.47it/s] 67%|██████▋ | 249334/371472 [9:17:47<9:37:05, 3.53it/s] 67%|██████▋ | 249335/371472 [9:17:47<9:32:03, 3.56it/s] 67%|██████▋ | 249336/371472 [9:17:48<9:37:49, 3.52it/s] 67%|██████▋ | 249337/371472 [9:17:48<9:29:52, 3.57it/s] 67%|██████▋ | 249338/371472 [9:17:48<9:13:42, 3.68it/s] 67%|██████▋ | 249339/371472 [9:17:48<9:49:13, 3.45it/s] 67%|██████▋ | 249340/371472 [9:17:49<10:48:17, 3.14it/s] {'loss': 2.8206, 'learning_rate': 3.960600314594152e-07, 'epoch': 10.74} + 67%|██████▋ | 249340/371472 [9:17:49<10:48:17, 3.14it/s] 67%|██████▋ | 249341/371472 [9:17:49<10:33:57, 3.21it/s] 67%|██████▋ | 249342/371472 [9:17:49<10:27:55, 3.24it/s] 67%|██████▋ | 249343/371472 [9:17:50<10:48:24, 3.14it/s] 67%|██████▋ | 249344/371472 [9:17:50<11:19:35, 3.00it/s] 67%|██████▋ | 249345/371472 [9:17:50<10:55:15, 3.11it/s] 67%|██████▋ | 249346/371472 [9:17:51<10:42:42, 3.17it/s] 67%|██████▋ | 249347/371472 [9:17:51<10:15:49, 3.31it/s] 67%|██████▋ | 249348/371472 [9:17:51<10:05:14, 3.36it/s] 67%|██████▋ | 249349/371472 [9:17:52<9:54:28, 3.42it/s] 67%|██████▋ | 249350/371472 [9:17:52<9:37:56, 3.52it/s] 67%|██████▋ | 249351/371472 [9:17:52<9:36:32, 3.53it/s] 67%|██████▋ | 249352/371472 [9:17:52<9:24:51, 3.60it/s] 67%|██████▋ | 249353/371472 [9:17:53<9:45:24, 3.48it/s] 67%|██████▋ | 249354/371472 [9:17:53<9:50:32, 3.45it/s] 67%|██████▋ | 249355/371472 [9:17:53<9:45:12, 3.48it/s] 67%|██████▋ | 249356/371472 [9:17:54<9:45:26, 3.48it/s] 67%|██████▋ | 249357/371472 [9:17:54<9:34:10, 3.54it/s] 67%|██████▋ | 249358/371472 [9:17:54<9:36:20, 3.53it/s] 67%|██████▋ | 249359/371472 [9:17:55<10:58:13, 3.09it/s] 67%|██████▋ | 249360/371472 [9:17:55<10:46:43, 3.15it/s] {'loss': 2.7888, 'learning_rate': 3.960115494839363e-07, 'epoch': 10.74} + 67%|██████▋ | 249360/371472 [9:17:55<10:46:43, 3.15it/s] 67%|██████▋ | 249361/371472 [9:17:55<10:36:27, 3.20it/s] 67%|██████▋ | 249362/371472 [9:17:56<10:44:36, 3.16it/s] 67%|██████▋ | 249363/371472 [9:17:56<10:23:49, 3.26it/s] 67%|██████▋ | 249364/371472 [9:17:56<10:03:32, 3.37it/s] 67%|██████▋ | 249365/371472 [9:17:56<10:14:00, 3.31it/s] 67%|██████▋ | 249366/371472 [9:17:57<9:59:18, 3.40it/s] 67%|██████▋ | 249367/371472 [9:17:57<9:59:21, 3.40it/s] 67%|██████▋ | 249368/371472 [9:17:57<9:49:35, 3.45it/s] 67%|██████▋ | 249369/371472 [9:17:58<9:58:19, 3.40it/s] 67%|██████▋ | 249370/371472 [9:17:58<9:53:07, 3.43it/s] 67%|██████▋ | 249371/371472 [9:17:58<10:57:05, 3.10it/s] 67%|██████▋ | 249372/371472 [9:17:59<10:41:01, 3.17it/s] 67%|██████▋ | 249373/371472 [9:17:59<10:34:29, 3.21it/s] 67%|██████▋ | 249374/371472 [9:17:59<10:32:10, 3.22it/s] 67%|██████▋ | 249375/371472 [9:17:59<10:08:57, 3.34it/s] 67%|██████▋ | 249376/371472 [9:18:00<10:08:02, 3.35it/s] 67%|██████▋ | 249377/371472 [9:18:00<10:16:53, 3.30it/s] 67%|██████▋ | 249378/371472 [9:18:00<10:14:01, 3.31it/s] 67%|██████▋ | 249379/371472 [9:18:01<10:08:39, 3.34it/s] 67%|██████▋ | 249380/371472 [9:18:01<10:11:02, 3.33it/s] {'loss': 2.7177, 'learning_rate': 3.959630675084574e-07, 'epoch': 10.74} + 67%|██████▋ | 249380/371472 [9:18:01<10:11:02, 3.33it/s] 67%|██████▋ | 249381/371472 [9:18:01<10:19:39, 3.28it/s] 67%|██████▋ | 249382/371472 [9:18:02<10:39:33, 3.18it/s] 67%|██████▋ | 249383/371472 [9:18:02<10:15:32, 3.31it/s] 67%|██████▋ | 249384/371472 [9:18:02<10:14:05, 3.31it/s] 67%|██████▋ | 249385/371472 [9:18:02<10:03:51, 3.37it/s] 67%|██████▋ | 249386/371472 [9:18:03<10:10:25, 3.33it/s] 67%|██████▋ | 249387/371472 [9:18:03<10:03:50, 3.37it/s] 67%|██████▋ | 249388/371472 [9:18:03<10:26:58, 3.25it/s] 67%|██████▋ | 249389/371472 [9:18:04<10:15:05, 3.31it/s] 67%|██████▋ | 249390/371472 [9:18:04<10:01:37, 3.38it/s] 67%|██████▋ | 249391/371472 [9:18:04<10:51:42, 3.12it/s] 67%|██████▋ | 249392/371472 [9:18:05<10:40:04, 3.18it/s] 67%|██████▋ | 249393/371472 [9:18:05<10:18:19, 3.29it/s] 67%|██████▋ | 249394/371472 [9:18:05<10:06:32, 3.35it/s] 67%|██████▋ | 249395/371472 [9:18:05<10:00:22, 3.39it/s] 67%|██████▋ | 249396/371472 [9:18:06<9:49:23, 3.45it/s] 67%|██████▋ | 249397/371472 [9:18:06<9:42:58, 3.49it/s] 67%|██████▋ | 249398/371472 [9:18:06<9:48:31, 3.46it/s] 67%|██████▋ | 249399/371472 [9:18:07<9:56:36, 3.41it/s] 67%|██████▋ | 249400/371472 [9:18:07<10:04:22, 3.37it/s] {'loss': 2.7815, 'learning_rate': 3.9591458553297855e-07, 'epoch': 10.74} + 67%|██████▋ | 249400/371472 [9:18:07<10:04:22, 3.37it/s] 67%|██████▋ | 249401/371472 [9:18:07<10:11:07, 3.33it/s] 67%|██████▋ | 249402/371472 [9:18:08<10:25:49, 3.25it/s] 67%|██████▋ | 249403/371472 [9:18:08<10:05:17, 3.36it/s] 67%|██████▋ | 249404/371472 [9:18:08<9:57:13, 3.41it/s] 67%|██████▋ | 249405/371472 [9:18:08<9:45:33, 3.47it/s] 67%|██████▋ | 249406/371472 [9:18:09<11:03:17, 3.07it/s] 67%|██████▋ | 249407/371472 [9:18:09<11:09:16, 3.04it/s] 67%|██████▋ | 249408/371472 [9:18:09<10:55:58, 3.10it/s] 67%|██████▋ | 249409/371472 [9:18:10<10:41:33, 3.17it/s] 67%|██████▋ | 249410/371472 [9:18:10<10:47:09, 3.14it/s] 67%|██████▋ | 249411/371472 [9:18:10<10:47:36, 3.14it/s] 67%|██████▋ | 249412/371472 [9:18:11<10:28:44, 3.24it/s] 67%|██████▋ | 249413/371472 [9:18:11<10:23:52, 3.26it/s] 67%|██████▋ | 249414/371472 [9:18:11<10:42:42, 3.17it/s] 67%|██████▋ | 249415/371472 [9:18:12<11:02:49, 3.07it/s] 67%|██████▋ | 249416/371472 [9:18:12<10:50:31, 3.13it/s] 67%|██████▋ | 249417/371472 [9:18:12<10:27:12, 3.24it/s] 67%|██████▋ | 249418/371472 [9:18:13<10:29:08, 3.23it/s] 67%|██████▋ | 249419/371472 [9:18:13<10:47:33, 3.14it/s] 67%|██████▋ | 249420/371472 [9:18:13<10:42:37, 3.17it/s] {'loss': 2.8109, 'learning_rate': 3.9586610355749957e-07, 'epoch': 10.74} + 67%|██████▋ | 249420/371472 [9:18:13<10:42:37, 3.17it/s] 67%|██████▋ | 249421/371472 [9:18:13<10:19:29, 3.28it/s] 67%|██████▋ | 249422/371472 [9:18:14<10:07:16, 3.35it/s] 67%|██████▋ | 249423/371472 [9:18:14<9:52:08, 3.44it/s] 67%|██████▋ | 249424/371472 [9:18:15<11:43:18, 2.89it/s] 67%|██████▋ | 249425/371472 [9:18:15<11:06:51, 3.05it/s] 67%|██████▋ | 249426/371472 [9:18:15<10:58:53, 3.09it/s] 67%|██████▋ | 249427/371472 [9:18:15<11:11:55, 3.03it/s] 67%|██████▋ | 249428/371472 [9:18:16<10:45:48, 3.15it/s] 67%|██████▋ | 249429/371472 [9:18:16<10:33:58, 3.21it/s] 67%|██████▋ | 249430/371472 [9:18:16<10:17:23, 3.29it/s] 67%|██████▋ | 249431/371472 [9:18:17<10:26:17, 3.25it/s] 67%|██████▋ | 249432/371472 [9:18:17<10:26:24, 3.25it/s] 67%|██████▋ | 249433/371472 [9:18:17<10:16:47, 3.30it/s] 67%|██████▋ | 249434/371472 [9:18:18<10:18:29, 3.29it/s] 67%|██████▋ | 249435/371472 [9:18:18<10:16:55, 3.30it/s] 67%|██████▋ | 249436/371472 [9:18:18<10:02:30, 3.38it/s] 67%|██████▋ | 249437/371472 [9:18:18<9:43:21, 3.49it/s] 67%|██████▋ | 249438/371472 [9:18:19<9:58:34, 3.40it/s] 67%|██████▋ | 249439/371472 [9:18:19<9:45:07, 3.48it/s] 67%|██████▋ | 249440/371472 [9:18:19<10:57:38, 3.09it/s] {'loss': 2.6889, 'learning_rate': 3.9581762158202075e-07, 'epoch': 10.74} + 67%|██████▋ | 249440/371472 [9:18:19<10:57:38, 3.09it/s] 67%|██████▋ | 249441/371472 [9:18:20<10:45:35, 3.15it/s] 67%|██████▋ | 249442/371472 [9:18:20<10:19:51, 3.28it/s] 67%|██████▋ | 249443/371472 [9:18:20<12:30:16, 2.71it/s] 67%|██████▋ | 249444/371472 [9:18:21<11:45:24, 2.88it/s] 67%|██████▋ | 249445/371472 [9:18:21<11:45:35, 2.88it/s] 67%|██████▋ | 249446/371472 [9:18:21<11:50:23, 2.86it/s] 67%|██████▋ | 249447/371472 [9:18:22<11:19:11, 2.99it/s] 67%|██████▋ | 249448/371472 [9:18:22<10:43:20, 3.16it/s] 67%|██████▋ | 249449/371472 [9:18:22<10:26:49, 3.24it/s] 67%|██████▋ | 249450/371472 [9:18:23<10:08:49, 3.34it/s] 67%|██████▋ | 249451/371472 [9:18:23<9:50:12, 3.45it/s] 67%|██████▋ | 249452/371472 [9:18:23<9:34:06, 3.54it/s] 67%|██████▋ | 249453/371472 [9:18:23<10:11:49, 3.32it/s] 67%|██████▋ | 249454/371472 [9:18:24<9:56:10, 3.41it/s] 67%|██████▋ | 249455/371472 [9:18:24<9:46:55, 3.46it/s] 67%|██████▋ | 249456/371472 [9:18:24<9:51:30, 3.44it/s] 67%|██████▋ | 249457/371472 [9:18:25<9:51:54, 3.44it/s] 67%|██████▋ | 249458/371472 [9:18:25<9:41:14, 3.50it/s] 67%|██████▋ | 249459/371472 [9:18:25<9:52:31, 3.43it/s] 67%|██████▋ | 249460/371472 [9:18:25<9:47:14, 3.46it/s] {'loss': 2.7934, 'learning_rate': 3.957691396065418e-07, 'epoch': 10.74} + 67%|██████▋ | 249460/371472 [9:18:26<9:47:14, 3.46it/s] 67%|██████▋ | 249461/371472 [9:18:26<9:42:05, 3.49it/s] 67%|██████▋ | 249462/371472 [9:18:26<9:34:06, 3.54it/s] 67%|██████▋ | 249463/371472 [9:18:26<9:36:14, 3.53it/s] 67%|██████▋ | 249464/371472 [9:18:27<10:04:49, 3.36it/s] 67%|██████▋ | 249465/371472 [9:18:27<10:19:15, 3.28it/s] 67%|██████▋ | 249466/371472 [9:18:27<10:48:03, 3.14it/s] 67%|██████▋ | 249467/371472 [9:18:28<10:46:18, 3.15it/s] 67%|██████▋ | 249468/371472 [9:18:28<10:25:18, 3.25it/s] 67%|██████▋ | 249469/371472 [9:18:28<10:23:44, 3.26it/s] 67%|██████▋ | 249470/371472 [9:18:29<10:34:06, 3.21it/s] 67%|██████▋ | 249471/371472 [9:18:29<10:40:48, 3.17it/s] 67%|██████▋ | 249472/371472 [9:18:29<10:56:20, 3.10it/s] 67%|██████▋ | 249473/371472 [9:18:30<10:36:52, 3.19it/s] 67%|██████▋ | 249474/371472 [9:18:30<11:32:11, 2.94it/s] 67%|██████▋ | 249475/371472 [9:18:30<11:05:47, 3.05it/s] 67%|██████▋ | 249476/371472 [9:18:31<10:40:51, 3.17it/s] 67%|██████▋ | 249477/371472 [9:18:31<10:54:16, 3.11it/s] 67%|██████▋ | 249478/371472 [9:18:31<10:20:32, 3.28it/s] 67%|██████▋ | 249479/371472 [9:18:31<9:55:05, 3.42it/s] 67%|██████▋ | 249480/371472 [9:18:32<10:08:20, 3.34it/s] {'loss': 2.6443, 'learning_rate': 3.9572065763106295e-07, 'epoch': 10.75} + 67%|██████▋ | 249480/371472 [9:18:32<10:08:20, 3.34it/s] 67%|██████▋ | 249481/371472 [9:18:32<10:21:11, 3.27it/s] 67%|██████▋ | 249482/371472 [9:18:32<10:05:33, 3.36it/s] 67%|██████▋ | 249483/371472 [9:18:33<10:17:43, 3.29it/s] 67%|██████▋ | 249484/371472 [9:18:33<10:41:17, 3.17it/s] 67%|██████▋ | 249485/371472 [9:18:33<10:10:33, 3.33it/s] 67%|██████▋ | 249486/371472 [9:18:34<10:01:47, 3.38it/s] 67%|██████▋ | 249487/371472 [9:18:34<10:02:00, 3.38it/s] 67%|██████▋ | 249488/371472 [9:18:34<9:52:18, 3.43it/s] 67%|██████▋ | 249489/371472 [9:18:34<10:21:58, 3.27it/s] 67%|██████▋ | 249490/371472 [9:18:35<10:07:20, 3.35it/s] 67%|██████▋ | 249491/371472 [9:18:35<9:56:13, 3.41it/s] 67%|██████▋ | 249492/371472 [9:18:35<9:49:43, 3.45it/s] 67%|██████▋ | 249493/371472 [9:18:36<9:42:43, 3.49it/s] 67%|██████▋ | 249494/371472 [9:18:36<9:45:36, 3.47it/s] 67%|██████▋ | 249495/371472 [9:18:36<10:02:48, 3.37it/s] 67%|██████▋ | 249496/371472 [9:18:36<9:53:27, 3.43it/s] 67%|██████▋ | 249497/371472 [9:18:37<9:40:45, 3.50it/s] 67%|██████▋ | 249498/371472 [9:18:37<10:02:27, 3.37it/s] 67%|██████▋ | 249499/371472 [9:18:37<10:02:29, 3.37it/s] 67%|██████▋ | 249500/371472 [9:18:38<10:07:13, 3.35it/s] {'loss': 2.6251, 'learning_rate': 3.95672175655584e-07, 'epoch': 10.75} + 67%|██████▋ | 249500/371472 [9:18:38<10:07:13, 3.35it/s] 67%|██████▋ | 249501/371472 [9:18:38<9:53:22, 3.43it/s] 67%|██████▋ | 249502/371472 [9:18:38<9:39:10, 3.51it/s] 67%|██████▋ | 249503/371472 [9:18:38<9:50:08, 3.44it/s] 67%|██████▋ | 249504/371472 [9:18:39<10:11:05, 3.33it/s] 67%|██████▋ | 249505/371472 [9:18:39<9:56:41, 3.41it/s] 67%|██████▋ | 249506/371472 [9:18:39<9:54:46, 3.42it/s] 67%|██████▋ | 249507/371472 [9:18:40<10:38:15, 3.18it/s] 67%|██████▋ | 249508/371472 [9:18:40<10:14:34, 3.31it/s] 67%|██████▋ | 249509/371472 [9:18:40<10:04:19, 3.36it/s] 67%|██████▋ | 249510/371472 [9:18:41<9:55:20, 3.41it/s] 67%|██████▋ | 249511/371472 [9:18:41<10:27:28, 3.24it/s] 67%|██████▋ | 249512/371472 [9:18:41<10:47:57, 3.14it/s] 67%|██████▋ | 249513/371472 [9:18:42<10:20:51, 3.27it/s] 67%|██████▋ | 249514/371472 [9:18:42<11:48:39, 2.87it/s] 67%|██████▋ | 249515/371472 [9:18:42<11:03:34, 3.06it/s] 67%|██████▋ | 249516/371472 [9:18:43<10:47:10, 3.14it/s] 67%|██████▋ | 249517/371472 [9:18:43<10:58:37, 3.09it/s] 67%|██████▋ | 249518/371472 [9:18:43<10:49:34, 3.13it/s] 67%|██████▋ | 249519/371472 [9:18:43<10:15:47, 3.30it/s] 67%|██████▋ | 249520/371472 [9:18:44<9:57:11, 3.40it/s] {'loss': 2.783, 'learning_rate': 3.956236936801052e-07, 'epoch': 10.75} + 67%|██████▋ | 249520/371472 [9:18:44<9:57:11, 3.40it/s] 67%|██████▋ | 249521/371472 [9:18:44<9:48:28, 3.45it/s] 67%|██████▋ | 249522/371472 [9:18:44<9:55:43, 3.41it/s] 67%|██████▋ | 249523/371472 [9:18:45<9:42:11, 3.49it/s] 67%|██████▋ | 249524/371472 [9:18:45<11:20:07, 2.99it/s] 67%|██████▋ | 249525/371472 [9:18:45<11:09:23, 3.04it/s] 67%|██████▋ | 249526/371472 [9:18:46<11:01:13, 3.07it/s] 67%|██████▋ | 249527/371472 [9:18:46<11:04:08, 3.06it/s] 67%|██████▋ | 249528/371472 [9:18:46<10:38:38, 3.18it/s] 67%|██████▋ | 249529/371472 [9:18:47<10:52:56, 3.11it/s] 67%|██████▋ | 249530/371472 [9:18:47<10:23:12, 3.26it/s] 67%|██████▋ | 249531/371472 [9:18:47<10:05:16, 3.36it/s] 67%|██████▋ | 249532/371472 [9:18:48<10:36:32, 3.19it/s] 67%|██████▋ | 249533/371472 [9:18:48<10:23:16, 3.26it/s] 67%|██████▋ | 249534/371472 [9:18:48<10:17:17, 3.29it/s] 67%|██████▋ | 249535/371472 [9:18:48<10:05:34, 3.36it/s] 67%|██████▋ | 249536/371472 [9:18:49<10:32:59, 3.21it/s] 67%|██████▋ | 249537/371472 [9:18:49<10:47:46, 3.14it/s] 67%|██████▋ | 249538/371472 [9:18:49<10:22:38, 3.26it/s] 67%|██████▋ | 249539/371472 [9:18:50<10:13:33, 3.31it/s] 67%|██████▋ | 249540/371472 [9:18:50<9:57:57, 3.40it/s] {'loss': 2.6905, 'learning_rate': 3.955752117046262e-07, 'epoch': 10.75} + 67%|██████▋ | 249540/371472 [9:18:50<9:57:57, 3.40it/s] 67%|██████▋ | 249541/371472 [9:18:50<10:00:15, 3.39it/s] 67%|██████▋ | 249542/371472 [9:18:51<10:16:39, 3.30it/s] 67%|██████▋ | 249543/371472 [9:18:51<10:20:27, 3.28it/s] 67%|██████▋ | 249544/371472 [9:18:51<10:08:15, 3.34it/s] 67%|██████▋ | 249545/371472 [9:18:51<10:14:49, 3.31it/s] 67%|██████▋ | 249546/371472 [9:18:52<10:50:46, 3.12it/s] 67%|██████▋ | 249547/371472 [9:18:52<10:22:08, 3.27it/s] 67%|██████▋ | 249548/371472 [9:18:52<10:28:45, 3.23it/s] 67%|██████▋ | 249549/371472 [9:18:53<10:46:30, 3.14it/s] 67%|██████▋ | 249550/371472 [9:18:53<10:42:52, 3.16it/s] 67%|██████▋ | 249551/371472 [9:18:53<10:32:33, 3.21it/s] 67%|██████▋ | 249552/371472 [9:18:54<10:16:26, 3.30it/s] 67%|██████▋ | 249553/371472 [9:18:54<10:13:55, 3.31it/s] 67%|██████▋ | 249554/371472 [9:18:54<9:55:24, 3.41it/s] 67%|██████▋ | 249555/371472 [9:18:55<10:39:28, 3.18it/s] 67%|██████▋ | 249556/371472 [9:18:55<10:29:34, 3.23it/s] 67%|██████▋ | 249557/371472 [9:18:55<10:12:30, 3.32it/s] 67%|██████▋ | 249558/371472 [9:18:55<10:02:40, 3.37it/s] 67%|██████▋ | 249559/371472 [9:18:56<9:49:24, 3.45it/s] 67%|██████▋ | 249560/371472 [9:18:56<9:51:26, 3.44it/s] {'loss': 2.6168, 'learning_rate': 3.955267297291474e-07, 'epoch': 10.75} + 67%|██████▋ | 249560/371472 [9:18:56<9:51:26, 3.44it/s] 67%|██████▋ | 249561/371472 [9:18:56<9:37:23, 3.52it/s] 67%|██████▋ | 249562/371472 [9:18:57<9:59:00, 3.39it/s] 67%|██████▋ | 249563/371472 [9:18:57<9:54:22, 3.42it/s] 67%|██████▋ | 249564/371472 [9:18:57<9:38:35, 3.51it/s] 67%|██████▋ | 249565/371472 [9:18:57<9:37:56, 3.52it/s] 67%|██████▋ | 249566/371472 [9:18:58<9:56:00, 3.41it/s] 67%|██████▋ | 249567/371472 [9:18:58<9:35:00, 3.53it/s] 67%|██████▋ | 249568/371472 [9:18:58<9:36:19, 3.53it/s] 67%|██████▋ | 249569/371472 [9:18:59<9:29:15, 3.57it/s] 67%|██████▋ | 249570/371472 [9:18:59<9:49:28, 3.45it/s] 67%|██████▋ | 249571/371472 [9:18:59<9:35:21, 3.53it/s] 67%|██████▋ | 249572/371472 [9:18:59<9:59:22, 3.39it/s] 67%|██████▋ | 249573/371472 [9:19:00<10:03:13, 3.37it/s] 67%|██████▋ | 249574/371472 [9:19:00<9:44:50, 3.47it/s] 67%|██████▋ | 249575/371472 [9:19:00<9:34:41, 3.54it/s] 67%|██████▋ | 249576/371472 [9:19:01<9:36:06, 3.53it/s] 67%|██████▋ | 249577/371472 [9:19:01<9:40:02, 3.50it/s] 67%|██████▋ | 249578/371472 [9:19:01<9:43:51, 3.48it/s] 67%|██████▋ | 249579/371472 [9:19:01<9:56:07, 3.41it/s] 67%|██████▋ | 249580/371472 [9:19:02<9:53:27, 3.42it/s] {'loss': 2.6612, 'learning_rate': 3.9547824775366846e-07, 'epoch': 10.75} + 67%|██████▋ | 249580/371472 [9:19:02<9:53:27, 3.42it/s] 67%|██████▋ | 249581/371472 [9:19:02<9:32:12, 3.55it/s] 67%|██████▋ | 249582/371472 [9:19:02<9:33:28, 3.54it/s] 67%|██████▋ | 249583/371472 [9:19:03<9:56:28, 3.41it/s] 67%|██████▋ | 249584/371472 [9:19:03<10:11:00, 3.32it/s] 67%|██████▋ | 249585/371472 [9:19:03<10:03:05, 3.37it/s] 67%|██████▋ | 249586/371472 [9:19:04<9:51:51, 3.43it/s] 67%|██████▋ | 249587/371472 [9:19:04<9:55:06, 3.41it/s] 67%|██████▋ | 249588/371472 [9:19:04<9:49:12, 3.45it/s] 67%|██████▋ | 249589/371472 [9:19:04<9:32:41, 3.55it/s] 67%|██████▋ | 249590/371472 [9:19:05<9:50:37, 3.44it/s] 67%|██████▋ | 249591/371472 [9:19:05<10:32:16, 3.21it/s] 67%|██████▋ | 249592/371472 [9:19:05<10:26:54, 3.24it/s] 67%|██████▋ | 249593/371472 [9:19:06<10:01:04, 3.38it/s] 67%|██████▋ | 249594/371472 [9:19:06<9:55:17, 3.41it/s] 67%|██████▋ | 249595/371472 [9:19:06<10:00:05, 3.38it/s] 67%|██████▋ | 249596/371472 [9:19:06<9:45:14, 3.47it/s] 67%|██████▋ | 249597/371472 [9:19:07<9:45:11, 3.47it/s] 67%|██████▋ | 249598/371472 [9:19:07<10:03:30, 3.37it/s] 67%|██████▋ | 249599/371472 [9:19:07<9:52:52, 3.43it/s] 67%|██████▋ | 249600/371472 [9:19:08<9:57:32, 3.40it/s] {'loss': 2.7306, 'learning_rate': 3.954297657781896e-07, 'epoch': 10.75} + 67%|██████▋ | 249600/371472 [9:19:08<9:57:32, 3.40it/s] 67%|██████▋ | 249601/371472 [9:19:08<9:59:14, 3.39it/s] 67%|██████▋ | 249602/371472 [9:19:08<9:46:05, 3.47it/s] 67%|██████▋ | 249603/371472 [9:19:09<10:10:38, 3.33it/s] 67%|██████▋ | 249604/371472 [9:19:09<10:02:28, 3.37it/s] 67%|██████▋ | 249605/371472 [9:19:09<9:53:16, 3.42it/s] 67%|██████▋ | 249606/371472 [9:19:09<9:45:27, 3.47it/s] 67%|██████▋ | 249607/371472 [9:19:10<9:55:41, 3.41it/s] 67%|██████▋ | 249608/371472 [9:19:10<9:34:26, 3.54it/s] 67%|██████▋ | 249609/371472 [9:19:10<9:24:32, 3.60it/s] 67%|██████▋ | 249610/371472 [9:19:11<10:10:26, 3.33it/s] 67%|██████▋ | 249611/371472 [9:19:11<10:17:24, 3.29it/s] 67%|██████▋ | 249612/371472 [9:19:11<10:10:36, 3.33it/s] 67%|██████▋ | 249613/371472 [9:19:11<10:09:52, 3.33it/s] 67%|██████▋ | 249614/371472 [9:19:12<10:57:59, 3.09it/s] 67%|██████▋ | 249615/371472 [9:19:12<10:40:38, 3.17it/s] 67%|██████▋ | 249616/371472 [9:19:12<10:44:36, 3.15it/s] 67%|██████▋ | 249617/371472 [9:19:13<10:46:31, 3.14it/s] 67%|██████▋ | 249618/371472 [9:19:13<11:00:44, 3.07it/s] 67%|██████▋ | 249619/371472 [9:19:13<10:39:37, 3.18it/s] 67%|██████▋ | 249620/371472 [9:19:14<10:28:07, 3.23it/s] {'loss': 2.7064, 'learning_rate': 3.953812838027106e-07, 'epoch': 10.75} + 67%|██████▋ | 249620/371472 [9:19:14<10:28:07, 3.23it/s] 67%|██████▋ | 249621/371472 [9:19:14<10:39:34, 3.18it/s] 67%|██████▋ | 249622/371472 [9:19:14<10:18:28, 3.28it/s] 67%|██████▋ | 249623/371472 [9:19:15<10:16:47, 3.29it/s] 67%|██████▋ | 249624/371472 [9:19:15<10:34:36, 3.20it/s] 67%|██████▋ | 249625/371472 [9:19:15<10:26:55, 3.24it/s] 67%|██████▋ | 249626/371472 [9:19:16<10:32:40, 3.21it/s] 67%|██████▋ | 249627/371472 [9:19:16<10:49:52, 3.12it/s] 67%|██████▋ | 249628/371472 [9:19:16<10:51:06, 3.12it/s] 67%|██████▋ | 249629/371472 [9:19:17<10:38:08, 3.18it/s] 67%|██████▋ | 249630/371472 [9:19:17<10:39:21, 3.18it/s] 67%|██████▋ | 249631/371472 [9:19:17<10:39:13, 3.18it/s] 67%|██████▋ | 249632/371472 [9:19:17<10:33:01, 3.21it/s] 67%|██████▋ | 249633/371472 [9:19:18<10:46:45, 3.14it/s] 67%|██████▋ | 249634/371472 [9:19:18<10:43:55, 3.15it/s] 67%|██████▋ | 249635/371472 [9:19:18<10:33:31, 3.21it/s] 67%|██████▋ | 249636/371472 [9:19:19<10:16:32, 3.29it/s] 67%|██████▋ | 249637/371472 [9:19:19<10:02:33, 3.37it/s] 67%|██████▋ | 249638/371472 [9:19:19<10:06:56, 3.35it/s] 67%|██████▋ | 249639/371472 [9:19:20<10:42:21, 3.16it/s] 67%|██████▋ | 249640/371472 [9:19:20<10:30:24, 3.22it/s] {'loss': 2.7647, 'learning_rate': 3.9533280182723173e-07, 'epoch': 10.75} + 67%|██████▋ | 249640/371472 [9:19:20<10:30:24, 3.22it/s] 67%|██████▋ | 249641/371472 [9:19:20<10:19:18, 3.28it/s] 67%|██████▋ | 249642/371472 [9:19:21<10:07:41, 3.34it/s] 67%|██████▋ | 249643/371472 [9:19:21<10:04:17, 3.36it/s] 67%|██████▋ | 249644/371472 [9:19:21<10:08:30, 3.34it/s] 67%|██████▋ | 249645/371472 [9:19:21<9:58:06, 3.39it/s] 67%|██████▋ | 249646/371472 [9:19:22<10:04:28, 3.36it/s] 67%|██████▋ | 249647/371472 [9:19:22<9:51:26, 3.43it/s] 67%|██████▋ | 249648/371472 [9:19:22<9:57:23, 3.40it/s] 67%|██████▋ | 249649/371472 [9:19:23<9:58:50, 3.39it/s] 67%|██████▋ | 249650/371472 [9:19:23<9:59:18, 3.39it/s] 67%|██████▋ | 249651/371472 [9:19:23<9:39:07, 3.51it/s] 67%|██████▋ | 249652/371472 [9:19:23<9:39:49, 3.50it/s] 67%|██████▋ | 249653/371472 [9:19:24<10:11:11, 3.32it/s] 67%|██████▋ | 249654/371472 [9:19:24<10:30:43, 3.22it/s] 67%|██████▋ | 249655/371472 [9:19:24<10:17:37, 3.29it/s] 67%|██████▋ | 249656/371472 [9:19:25<10:08:08, 3.34it/s] 67%|██████▋ | 249657/371472 [9:19:25<9:59:36, 3.39it/s] 67%|██████▋ | 249658/371472 [9:19:25<10:57:09, 3.09it/s] 67%|██████▋ | 249659/371472 [9:19:26<10:41:38, 3.16it/s] 67%|██████▋ | 249660/371472 [9:19:26<10:18:24, 3.28it/s] {'loss': 2.6554, 'learning_rate': 3.9528431985175285e-07, 'epoch': 10.75} + 67%|██████▋ | 249660/371472 [9:19:26<10:18:24, 3.28it/s] 67%|██████▋ | 249661/371472 [9:19:26<10:19:19, 3.28it/s] 67%|██████▋ | 249662/371472 [9:19:27<10:01:53, 3.37it/s] 67%|██████▋ | 249663/371472 [9:19:27<9:56:23, 3.40it/s] 67%|██████▋ | 249664/371472 [9:19:27<9:58:56, 3.39it/s] 67%|██████▋ | 249665/371472 [9:19:27<10:24:25, 3.25it/s] 67%|██████▋ | 249666/371472 [9:19:28<10:20:12, 3.27it/s] 67%|██████▋ | 249667/371472 [9:19:28<10:01:34, 3.37it/s] 67%|██████▋ | 249668/371472 [9:19:28<10:08:13, 3.34it/s] 67%|██████▋ | 249669/371472 [9:19:29<10:00:50, 3.38it/s] 67%|██████▋ | 249670/371472 [9:19:29<10:26:20, 3.24it/s] 67%|██████▋ | 249671/371472 [9:19:29<10:21:15, 3.27it/s] 67%|██████▋ | 249672/371472 [9:19:30<10:24:50, 3.25it/s] 67%|██████▋ | 249673/371472 [9:19:30<10:15:38, 3.30it/s] 67%|██████▋ | 249674/371472 [9:19:30<9:58:06, 3.39it/s] 67%|██████▋ | 249675/371472 [9:19:30<9:50:50, 3.44it/s] 67%|██████▋ | 249676/371472 [9:19:31<9:59:22, 3.39it/s] 67%|██████▋ | 249677/371472 [9:19:31<9:55:36, 3.41it/s] 67%|██████▋ | 249678/371472 [9:19:31<9:56:12, 3.40it/s] 67%|██████▋ | 249679/371472 [9:19:32<9:53:45, 3.42it/s] 67%|██████▋ | 249680/371472 [9:19:32<9:55:30, 3.41it/s] {'loss': 2.7716, 'learning_rate': 3.952358378762739e-07, 'epoch': 10.75} + 67%|██████▋ | 249680/371472 [9:19:32<9:55:30, 3.41it/s] 67%|██████▋ | 249681/371472 [9:19:32<9:56:07, 3.41it/s] 67%|██████▋ | 249682/371472 [9:19:32<9:49:52, 3.44it/s] 67%|██████▋ | 249683/371472 [9:19:33<9:37:23, 3.52it/s] 67%|██████▋ | 249684/371472 [9:19:33<9:45:23, 3.47it/s] 67%|██████▋ | 249685/371472 [9:19:33<10:09:04, 3.33it/s] 67%|██████▋ | 249686/371472 [9:19:34<10:18:47, 3.28it/s] 67%|██████▋ | 249687/371472 [9:19:34<10:19:01, 3.28it/s] 67%|██████▋ | 249688/371472 [9:19:34<9:50:38, 3.44it/s] 67%|██████▋ | 249689/371472 [9:19:35<9:54:19, 3.42it/s] 67%|██████▋ | 249690/371472 [9:19:35<9:52:42, 3.42it/s] 67%|██████▋ | 249691/371472 [9:19:35<9:52:31, 3.43it/s] 67%|██████▋ | 249692/371472 [9:19:35<9:58:59, 3.39it/s] 67%|██████▋ | 249693/371472 [9:19:36<9:53:18, 3.42it/s] 67%|██████▋ | 249694/371472 [9:19:36<9:36:21, 3.52it/s] 67%|██████▋ | 249695/371472 [9:19:36<9:32:03, 3.55it/s] 67%|██████▋ | 249696/371472 [9:19:37<9:32:58, 3.54it/s] 67%|██████▋ | 249697/371472 [9:19:37<9:30:44, 3.56it/s] 67%|██████▋ | 249698/371472 [9:19:37<9:30:30, 3.56it/s] 67%|██████▋ | 249699/371472 [9:19:37<10:04:33, 3.36it/s] 67%|██████▋ | 249700/371472 [9:19:38<10:03:21, 3.36it/s] {'loss': 2.9324, 'learning_rate': 3.951873559007951e-07, 'epoch': 10.76} + 67%|██████▋ | 249700/371472 [9:19:38<10:03:21, 3.36it/s] 67%|██████▋ | 249701/371472 [9:19:38<10:04:28, 3.36it/s] 67%|██████▋ | 249702/371472 [9:19:38<9:52:31, 3.43it/s] 67%|██████▋ | 249703/371472 [9:19:39<9:58:25, 3.39it/s] 67%|██████▋ | 249704/371472 [9:19:39<9:55:50, 3.41it/s] 67%|██████▋ | 249705/371472 [9:19:39<9:58:29, 3.39it/s] 67%|██████▋ | 249706/371472 [9:19:39<9:43:25, 3.48it/s] 67%|██████▋ | 249707/371472 [9:19:40<9:50:59, 3.43it/s] 67%|██████▋ | 249708/371472 [9:19:40<9:51:15, 3.43it/s] 67%|██████▋ | 249709/371472 [9:19:40<9:48:12, 3.45it/s] 67%|██████▋ | 249710/371472 [9:19:41<9:45:16, 3.47it/s] 67%|██████▋ | 249711/371472 [9:19:41<9:37:45, 3.51it/s] 67%|██████▋ | 249712/371472 [9:19:41<9:32:19, 3.55it/s] 67%|██████▋ | 249713/371472 [9:19:41<9:21:13, 3.62it/s] 67%|██████▋ | 249714/371472 [9:19:42<9:24:07, 3.60it/s] 67%|██████▋ | 249715/371472 [9:19:42<9:26:20, 3.58it/s] 67%|██████▋ | 249716/371472 [9:19:42<9:33:54, 3.54it/s] 67%|██████▋ | 249717/371472 [9:19:43<10:22:24, 3.26it/s] 67%|███��██▋ | 249718/371472 [9:19:43<10:21:55, 3.26it/s] 67%|██████▋ | 249719/371472 [9:19:43<10:19:16, 3.28it/s] 67%|██████▋ | 249720/371472 [9:19:44<10:06:17, 3.35it/s] {'loss': 2.5961, 'learning_rate': 3.951388739253162e-07, 'epoch': 10.76} + 67%|██████▋ | 249720/371472 [9:19:44<10:06:17, 3.35it/s] 67%|██████▋ | 249721/371472 [9:19:44<10:34:28, 3.20it/s] 67%|██████▋ | 249722/371472 [9:19:44<10:36:37, 3.19it/s] 67%|██████▋ | 249723/371472 [9:19:44<10:26:59, 3.24it/s] 67%|██████▋ | 249724/371472 [9:19:45<10:18:14, 3.28it/s] 67%|██████▋ | 249725/371472 [9:19:45<10:01:26, 3.37it/s] 67%|██████▋ | 249726/371472 [9:19:45<9:52:38, 3.42it/s] 67%|██████▋ | 249727/371472 [9:19:46<10:10:16, 3.32it/s] 67%|██████▋ | 249728/371472 [9:19:46<9:53:05, 3.42it/s] 67%|██████▋ | 249729/371472 [9:19:46<9:56:48, 3.40it/s] 67%|██████▋ | 249730/371472 [9:19:47<10:04:33, 3.36it/s] 67%|██████▋ | 249731/371472 [9:19:47<9:40:20, 3.50it/s] 67%|██████▋ | 249732/371472 [9:19:47<9:46:00, 3.46it/s] 67%|██████▋ | 249733/371472 [9:19:47<9:45:34, 3.46it/s] 67%|██████▋ | 249734/371472 [9:19:48<10:24:20, 3.25it/s] 67%|██████▋ | 249735/371472 [9:19:48<10:16:06, 3.29it/s] 67%|██████▋ | 249736/371472 [9:19:48<10:07:38, 3.34it/s] 67%|██████▋ | 249737/371472 [9:19:49<9:49:46, 3.44it/s] 67%|██████▋ | 249738/371472 [9:19:49<9:48:58, 3.44it/s] 67%|██████▋ | 249739/371472 [9:19:49<10:15:25, 3.30it/s] 67%|██████▋ | 249740/371472 [9:19:49<9:58:04, 3.39it/s] {'loss': 2.7217, 'learning_rate': 3.950903919498373e-07, 'epoch': 10.76} + 67%|██████▋ | 249740/371472 [9:19:50<9:58:04, 3.39it/s] 67%|██████▋ | 249741/371472 [9:19:50<10:09:56, 3.33it/s] 67%|██████▋ | 249742/371472 [9:19:50<10:10:36, 3.32it/s] 67%|██████▋ | 249743/371472 [9:19:50<9:57:33, 3.40it/s] 67%|██████▋ | 249744/371472 [9:19:51<10:09:44, 3.33it/s] 67%|██████▋ | 249745/371472 [9:19:51<10:03:58, 3.36it/s] 67%|██████▋ | 249746/371472 [9:19:51<11:29:16, 2.94it/s] 67%|██████▋ | 249747/371472 [9:19:52<11:37:02, 2.91it/s] 67%|██████▋ | 249748/371472 [9:19:52<11:07:30, 3.04it/s] 67%|██████▋ | 249749/371472 [9:19:52<10:51:57, 3.11it/s] 67%|██████▋ | 249750/371472 [9:19:53<10:27:51, 3.23it/s] 67%|██████▋ | 249751/371472 [9:19:53<10:14:41, 3.30it/s] 67%|██████▋ | 249752/371472 [9:19:53<9:53:18, 3.42it/s] 67%|██████▋ | 249753/371472 [9:19:54<9:43:59, 3.47it/s] 67%|██████▋ | 249754/371472 [9:19:54<9:40:01, 3.50it/s] 67%|██████▋ | 249755/371472 [9:19:54<10:11:54, 3.32it/s] 67%|██████▋ | 249756/371472 [9:19:54<9:50:50, 3.43it/s] 67%|██████▋ | 249757/371472 [9:19:55<9:32:43, 3.54it/s] 67%|██████▋ | 249758/371472 [9:19:55<9:53:20, 3.42it/s] 67%|██████▋ | 249759/371472 [9:19:55<10:18:10, 3.28it/s] 67%|██████▋ | 249760/371472 [9:19:56<10:06:54, 3.34it/s] {'loss': 2.6498, 'learning_rate': 3.9504190997435837e-07, 'epoch': 10.76} + 67%|██████▋ | 249760/371472 [9:19:56<10:06:54, 3.34it/s] 67%|██████▋ | 249761/371472 [9:19:56<9:54:23, 3.41it/s] 67%|██████▋ | 249762/371472 [9:19:56<10:08:19, 3.33it/s] 67%|██████▋ | 249763/371472 [9:19:56<9:58:47, 3.39it/s] 67%|██████▋ | 249764/371472 [9:19:57<10:10:22, 3.32it/s] 67%|██████▋ | 249765/371472 [9:19:57<10:45:19, 3.14it/s] 67%|██████▋ | 249766/371472 [9:19:58<11:18:29, 2.99it/s] 67%|██████▋ | 249767/371472 [9:19:58<10:57:47, 3.08it/s] 67%|██████▋ | 249768/371472 [9:19:58<10:59:03, 3.08it/s] 67%|██████▋ | 249769/371472 [9:19:58<10:45:32, 3.14it/s] 67%|██████▋ | 249770/371472 [9:19:59<10:17:24, 3.29it/s] 67%|██████▋ | 249771/371472 [9:19:59<9:59:00, 3.39it/s] 67%|██████▋ | 249772/371472 [9:19:59<9:55:37, 3.41it/s] 67%|██████▋ | 249773/371472 [9:20:00<10:45:33, 3.14it/s] 67%|██████▋ | 249774/371472 [9:20:00<10:23:19, 3.25it/s] 67%|██████▋ | 249775/371472 [9:20:00<9:56:14, 3.40it/s] 67%|██████▋ | 249776/371472 [9:20:00<9:58:30, 3.39it/s] 67%|██████▋ | 249777/371472 [9:20:01<9:42:19, 3.48it/s] 67%|███���██▋ | 249778/371472 [9:20:01<9:48:38, 3.45it/s] 67%|██████▋ | 249779/371472 [9:20:01<10:14:46, 3.30it/s] 67%|██████▋ | 249780/371472 [9:20:02<10:06:04, 3.35it/s] {'loss': 2.8038, 'learning_rate': 3.9499342799887955e-07, 'epoch': 10.76} + 67%|██████▋ | 249780/371472 [9:20:02<10:06:04, 3.35it/s] 67%|██████▋ | 249781/371472 [9:20:02<10:15:41, 3.29it/s] 67%|██████▋ | 249782/371472 [9:20:02<10:05:46, 3.35it/s] 67%|██████▋ | 249783/371472 [9:20:03<10:19:45, 3.27it/s] 67%|██████▋ | 249784/371472 [9:20:03<10:02:19, 3.37it/s] 67%|██████▋ | 249785/371472 [9:20:03<10:29:48, 3.22it/s] 67%|██████▋ | 249786/371472 [9:20:04<10:47:59, 3.13it/s] 67%|██████▋ | 249787/371472 [9:20:04<10:20:09, 3.27it/s] 67%|██████▋ | 249788/371472 [9:20:04<10:07:31, 3.34it/s] 67%|██████▋ | 249789/371472 [9:20:04<10:21:46, 3.26it/s] 67%|██████▋ | 249790/371472 [9:20:05<10:15:02, 3.30it/s] 67%|██████▋ | 249791/371472 [9:20:05<10:10:12, 3.32it/s] 67%|██████▋ | 249792/371472 [9:20:05<10:51:36, 3.11it/s] 67%|██████▋ | 249793/371472 [9:20:06<10:39:44, 3.17it/s] 67%|██████▋ | 249794/371472 [9:20:06<10:29:16, 3.22it/s] 67%|██████▋ | 249795/371472 [9:20:06<10:15:15, 3.30it/s] 67%|██████▋ | 249796/371472 [9:20:07<10:47:03, 3.13it/s] 67%|██████▋ | 249797/371472 [9:20:07<10:14:20, 3.30it/s] 67%|██████▋ | 249798/371472 [9:20:07<9:52:22, 3.42it/s] 67%|██████▋ | 249799/371472 [9:20:07<9:51:58, 3.43it/s] 67%|██████▋ | 249800/371472 [9:20:08<9:49:05, 3.44it/s] {'loss': 2.7005, 'learning_rate': 3.9494494602340057e-07, 'epoch': 10.76} + 67%|██████▋ | 249800/371472 [9:20:08<9:49:05, 3.44it/s] 67%|██████▋ | 249801/371472 [9:20:08<10:22:42, 3.26it/s] 67%|██████▋ | 249802/371472 [9:20:08<10:22:26, 3.26it/s] 67%|██████▋ | 249803/371472 [9:20:09<10:09:55, 3.32it/s] 67%|██████▋ | 249804/371472 [9:20:09<9:49:40, 3.44it/s] 67%|██████▋ | 249805/371472 [9:20:09<10:17:48, 3.28it/s] 67%|██████▋ | 249806/371472 [9:20:10<10:08:07, 3.33it/s] 67%|██████▋ | 249807/371472 [9:20:10<10:06:52, 3.34it/s] 67%|██████▋ | 249808/371472 [9:20:10<9:48:14, 3.45it/s] 67%|██████▋ | 249809/371472 [9:20:10<9:43:49, 3.47it/s] 67%|██████▋ | 249810/371472 [9:20:11<9:55:45, 3.40it/s] 67%|██████▋ | 249811/371472 [9:20:11<10:14:32, 3.30it/s] 67%|██████▋ | 249812/371472 [9:20:11<9:54:11, 3.41it/s] 67%|██████▋ | 249813/371472 [9:20:12<9:41:39, 3.49it/s] 67%|██████▋ | 249814/371472 [9:20:12<10:05:20, 3.35it/s] 67%|██████▋ | 249815/371472 [9:20:12<9:40:47, 3.49it/s] 67%|██████▋ | 249816/371472 [9:20:12<9:41:47, 3.49it/s] 67%|██████▋ | 249817/371472 [9:20:13<10:04:16, 3.36it/s] 67%|██████▋ | 249818/371472 [9:20:13<10:12:00, 3.31it/s] 67%|██████▋ | 249819/371472 [9:20:13<10:36:53, 3.18it/s] 67%|██████▋ | 249820/371472 [9:20:14<10:06:32, 3.34it/s] {'loss': 2.8195, 'learning_rate': 3.9489646404792174e-07, 'epoch': 10.76} + 67%|██████▋ | 249820/371472 [9:20:14<10:06:32, 3.34it/s] 67%|██████▋ | 249821/371472 [9:20:14<9:56:05, 3.40it/s] 67%|██████▋ | 249822/371472 [9:20:14<9:41:28, 3.49it/s] 67%|██████▋ | 249823/371472 [9:20:15<10:13:34, 3.30it/s] 67%|██████▋ | 249824/371472 [9:20:15<10:04:36, 3.35it/s] 67%|██████▋ | 249825/371472 [9:20:15<10:05:32, 3.35it/s] 67%|██████▋ | 249826/371472 [9:20:16<10:22:13, 3.26it/s] 67%|██████▋ | 249827/371472 [9:20:16<10:36:41, 3.18it/s] 67%|██████▋ | 249828/371472 [9:20:16<10:17:23, 3.28it/s] 67%|██████▋ | 249829/371472 [9:20:16<10:02:49, 3.36it/s] 67%|██████▋ | 249830/371472 [9:20:17<9:51:44, 3.43it/s] 67%|██████▋ | 249831/371472 [9:20:17<10:18:05, 3.28it/s] 67%|██████▋ | 249832/371472 [9:20:17<9:59:17, 3.38it/s] 67%|██████▋ | 249833/371472 [9:20:18<10:09:46, 3.32it/s] 67%|██████▋ | 249834/371472 [9:20:18<10:00:15, 3.38it/s] 67%|██████▋ | 249835/371472 [9:20:18<9:38:19, 3.51it/s] 67%|██████▋ | 249836/371472 [9:20:18<9:25:34, 3.58it/s] 67%|██████▋ | 249837/371472 [9:20:19<9:26:13, 3.58it/s] 67%|██████▋ | 249838/371472 [9:20:19<9:34:13, 3.53it/s] 67%|██████▋ | 249839/371472 [9:20:19<10:14:42, 3.30it/s] 67%|██████▋ | 249840/371472 [9:20:20<10:12:51, 3.31it/s] {'loss': 2.6579, 'learning_rate': 3.948479820724428e-07, 'epoch': 10.76} + 67%|██████▋ | 249840/371472 [9:20:20<10:12:51, 3.31it/s] 67%|██████▋ | 249841/371472 [9:20:20<10:13:41, 3.30it/s] 67%|██████▋ | 249842/371472 [9:20:20<10:20:18, 3.27it/s] 67%|██████▋ | 249843/371472 [9:20:21<10:16:35, 3.29it/s] 67%|██████▋ | 249844/371472 [9:20:21<10:26:30, 3.24it/s] 67%|██████▋ | 249845/371472 [9:20:21<10:22:01, 3.26it/s] 67%|██████▋ | 249846/371472 [9:20:21<10:04:23, 3.35it/s] 67%|██████▋ | 249847/371472 [9:20:22<9:51:33, 3.43it/s] 67%|██████▋ | 249848/371472 [9:20:22<10:06:42, 3.34it/s] 67%|██████▋ | 249849/371472 [9:20:22<10:32:15, 3.21it/s] 67%|██████▋ | 249850/371472 [9:20:23<10:24:19, 3.25it/s] 67%|██████▋ | 249851/371472 [9:20:23<10:20:53, 3.26it/s] 67%|██████▋ | 249852/371472 [9:20:23<10:15:20, 3.29it/s] 67%|██████▋ | 249853/371472 [9:20:24<9:59:52, 3.38it/s] 67%|██████▋ | 249854/371472 [9:20:24<10:02:33, 3.36it/s] 67%|██████▋ | 249855/371472 [9:20:24<10:05:33, 3.35it/s] 67%|██████▋ | 249856/371472 [9:20:24<10:04:23, 3.35it/s] 67%|██████▋ | 249857/371472 [9:20:25<10:11:03, 3.32it/s] 67%|██████▋ | 249858/371472 [9:20:25<10:03:49, 3.36it/s] 67%|██████▋ | 249859/371472 [9:20:25<9:51:19, 3.43it/s] 67%|██████▋ | 249860/371472 [9:20:26<9:42:49, 3.48it/s] {'loss': 2.7223, 'learning_rate': 3.9479950009696394e-07, 'epoch': 10.76} + 67%|██████▋ | 249860/371472 [9:20:26<9:42:49, 3.48it/s] 67%|██████▋ | 249861/371472 [9:20:26<9:42:53, 3.48it/s] 67%|██████▋ | 249862/371472 [9:20:26<10:13:32, 3.30it/s] 67%|██████▋ | 249863/371472 [9:20:27<10:58:51, 3.08it/s] 67%|██████▋ | 249864/371472 [9:20:27<10:49:17, 3.12it/s] 67%|██████▋ | 249865/371472 [9:20:27<11:24:35, 2.96it/s] 67%|██████▋ | 249866/371472 [9:20:28<11:49:06, 2.86it/s] 67%|██████▋ | 249867/371472 [9:20:28<11:33:41, 2.92it/s] 67%|██████▋ | 249868/371472 [9:20:28<11:12:54, 3.01it/s] 67%|██████▋ | 249869/371472 [9:20:29<10:32:38, 3.20it/s] 67%|██████▋ | 249870/371472 [9:20:29<10:12:28, 3.31it/s] 67%|██████▋ | 249871/371472 [9:20:29<10:02:09, 3.37it/s] 67%|██████▋ | 249872/371472 [9:20:29<9:51:26, 3.43it/s] 67%|██████▋ | 249873/371472 [9:20:30<10:22:00, 3.26it/s] 67%|██████▋ | 249874/371472 [9:20:30<10:39:54, 3.17it/s] 67%|██████▋ | 249875/371472 [9:20:30<10:15:58, 3.29it/s] 67%|██████▋ | 249876/371472 [9:20:31<10:08:09, 3.33it/s] 67%|██████▋ | 249877/371472 [9:20:31<10:38:40, 3.17it/s] 67%|██████▋ | 249878/371472 [9:20:31<10:32:23, 3.20it/s] 67%|██████▋ | 249879/371472 [9:20:32<10:15:06, 3.29it/s] 67%|██████▋ | 249880/371472 [9:20:32<10:12:54, 3.31it/s] {'loss': 2.6784, 'learning_rate': 3.94751018121485e-07, 'epoch': 10.76} + 67%|██████▋ | 249880/371472 [9:20:32<10:12:54, 3.31it/s] 67%|██████▋ | 249881/371472 [9:20:32<10:14:09, 3.30it/s] 67%|██████▋ | 249882/371472 [9:20:33<10:01:02, 3.37it/s] 67%|██████▋ | 249883/371472 [9:20:33<11:08:49, 3.03it/s] 67%|██████▋ | 249884/371472 [9:20:33<10:51:05, 3.11it/s] 67%|██████▋ | 249885/371472 [9:20:34<10:29:19, 3.22it/s] 67%|██████▋ | 249886/371472 [9:20:34<10:13:28, 3.30it/s] 67%|██████▋ | 249887/371472 [9:20:34<10:08:08, 3.33it/s] 67%|██████▋ | 249888/371472 [9:20:34<10:20:56, 3.26it/s] 67%|██████▋ | 249889/371472 [9:20:35<10:05:57, 3.34it/s] 67%|██████▋ | 249890/371472 [9:20:35<10:01:04, 3.37it/s] 67%|██████▋ | 249891/371472 [9:20:35<10:23:56, 3.25it/s] 67%|██████▋ | 249892/371472 [9:20:36<10:20:25, 3.27it/s] 67%|██████▋ | 249893/371472 [9:20:36<10:34:15, 3.19it/s] 67%|██████▋ | 249894/371472 [9:20:36<10:09:29, 3.32it/s] 67%|██████▋ | 249895/371472 [9:20:36<9:42:14, 3.48it/s] 67%|██████▋ | 249896/371472 [9:20:37<9:45:44, 3.46it/s] 67%|██████▋ | 249897/371472 [9:20:37<9:33:17, 3.53it/s] 67%|██████▋ | 249898/371472 [9:20:37<9:49:49, 3.44it/s] 67%|██████▋ | 249899/371472 [9:20:38<10:13:46, 3.30it/s] 67%|██████▋ | 249900/371472 [9:20:38<9:53:39, 3.41it/s] {'loss': 2.6136, 'learning_rate': 3.947025361460062e-07, 'epoch': 10.76} + 67%|██████▋ | 249900/371472 [9:20:38<9:53:39, 3.41it/s] 67%|██████▋ | 249901/371472 [9:20:38<9:47:31, 3.45it/s] 67%|██████▋ | 249902/371472 [9:20:39<9:51:27, 3.43it/s] 67%|██████▋ | 249903/371472 [9:20:39<9:40:02, 3.49it/s] 67%|██████▋ | 249904/371472 [9:20:39<9:49:50, 3.44it/s] 67%|██████▋ | 249905/371472 [9:20:39<10:01:06, 3.37it/s] 67%|██████▋ | 249906/371472 [9:20:40<10:04:08, 3.35it/s] 67%|██████▋ | 249907/371472 [9:20:40<10:07:20, 3.34it/s] 67%|██████▋ | 249908/371472 [9:20:40<10:24:08, 3.25it/s] 67%|██████▋ | 249909/371472 [9:20:41<10:07:19, 3.34it/s] 67%|██████▋ | 249910/371472 [9:20:41<10:39:56, 3.17it/s] 67%|██████▋ | 249911/371472 [9:20:41<10:24:31, 3.24it/s] 67%|██████▋ | 249912/371472 [9:20:42<10:30:29, 3.21it/s] 67%|██████▋ | 249913/371472 [9:20:42<10:17:20, 3.28it/s] 67%|██████▋ | 249914/371472 [9:20:42<10:02:40, 3.36it/s] 67%|██████▋ | 249915/371472 [9:20:42<9:54:57, 3.41it/s] 67%|██████▋ | 249916/371472 [9:20:43<9:59:15, 3.38it/s] 67%|██████▋ | 249917/371472 [9:20:43<9:48:14, 3.44it/s] 67%|██████▋ | 249918/371472 [9:20:43<9:40:59, 3.49it/s] 67%|██████▋ | 249919/371472 [9:20:44<10:25:46, 3.24it/s] 67%|██████▋ | 249920/371472 [9:20:44<10:16:10, 3.29it/s] {'loss': 2.656, 'learning_rate': 3.946540541705272e-07, 'epoch': 10.76} + 67%|██████▋ | 249920/371472 [9:20:44<10:16:10, 3.29it/s] 67%|██████▋ | 249921/371472 [9:20:44<11:33:17, 2.92it/s] 67%|██████▋ | 249922/371472 [9:20:45<11:14:21, 3.00it/s] 67%|██████▋ | 249923/371472 [9:20:45<10:42:43, 3.15it/s] 67%|██████▋ | 249924/371472 [9:20:45<10:14:41, 3.30it/s] 67%|██████▋ | 249925/371472 [9:20:46<10:34:44, 3.19it/s] 67%|██████▋ | 249926/371472 [9:20:46<10:33:09, 3.20it/s] 67%|██████▋ | 249927/371472 [9:20:46<10:32:51, 3.20it/s] 67%|██████▋ | 249928/371472 [9:20:46<10:05:34, 3.35it/s] 67%|██████▋ | 249929/371472 [9:20:47<10:30:40, 3.21it/s] 67%|██████▋ | 249930/371472 [9:20:47<10:55:27, 3.09it/s] 67%|██████▋ | 249931/371472 [9:20:47<10:43:49, 3.15it/s] 67%|██████▋ | 249932/371472 [9:20:48<10:16:25, 3.29it/s] 67%|██████▋ | 249933/371472 [9:20:48<10:06:35, 3.34it/s] 67%|██████▋ | 249934/371472 [9:20:48<11:07:52, 3.03it/s] 67%|██████▋ | 249935/371472 [9:20:49<10:45:40, 3.14it/s] 67%|██████▋ | 249936/371472 [9:20:49<10:19:05, 3.27it/s] 67%|██████▋ | 249937/371472 [9:20:49<10:06:39, 3.34it/s] 67%|██████▋ | 249938/371472 [9:20:50<10:52:54, 3.10it/s] 67%|██████▋ | 249939/371472 [9:20:50<10:23:51, 3.25it/s] 67%|██████▋ | 249940/371472 [9:20:50<10:04:55, 3.35it/s] {'loss': 2.6751, 'learning_rate': 3.946055721950484e-07, 'epoch': 10.77} + 67%|██████▋ | 249940/371472 [9:20:50<10:04:55, 3.35it/s] 67%|██████▋ | 249941/371472 [9:20:51<9:57:03, 3.39it/s] 67%|██████▋ | 249942/371472 [9:20:51<9:44:56, 3.46it/s] 67%|██████▋ | 249943/371472 [9:20:51<9:48:32, 3.44it/s] 67%|██████▋ | 249944/371472 [9:20:51<9:42:34, 3.48it/s] 67%|██████▋ | 249945/371472 [9:20:52<9:43:58, 3.47it/s] 67%|██████▋ | 249946/371472 [9:20:52<9:39:17, 3.50it/s] 67%|██████▋ | 249947/371472 [9:20:52<9:44:20, 3.47it/s] 67%|██████▋ | 249948/371472 [9:20:53<10:15:32, 3.29it/s] 67%|██████▋ | 249949/371472 [9:20:53<10:15:19, 3.29it/s] 67%|██████▋ | 249950/371472 [9:20:53<10:32:57, 3.20it/s] 67%|██████▋ | 249951/371472 [9:20:54<10:38:29, 3.17it/s] 67%|██████▋ | 249952/371472 [9:20:54<10:16:11, 3.29it/s] 67%|██████▋ | 249953/371472 [9:20:54<10:10:53, 3.32it/s] 67%|██████▋ | 249954/371472 [9:20:54<10:34:49, 3.19it/s] 67%|██████▋ | 249955/371472 [9:20:55<10:20:14, 3.27it/s] 67%|██████▋ | 249956/371472 [9:20:55<9:56:59, 3.39it/s] 67%|██████▋ | 249957/371472 [9:20:55<10:06:39, 3.34it/s] 67%|█���████▋ | 249958/371472 [9:20:56<10:12:24, 3.31it/s] 67%|██████▋ | 249959/371472 [9:20:56<9:51:09, 3.43it/s] 67%|██████▋ | 249960/371472 [9:20:56<9:50:01, 3.43it/s] {'loss': 2.6143, 'learning_rate': 3.9455709021956946e-07, 'epoch': 10.77} + 67%|██████▋ | 249960/371472 [9:20:56<9:50:01, 3.43it/s] 67%|██████▋ | 249961/371472 [9:20:56<9:39:57, 3.49it/s] 67%|██████▋ | 249962/371472 [9:20:57<9:42:02, 3.48it/s] 67%|██████▋ | 249963/371472 [9:20:57<10:06:51, 3.34it/s] 67%|██████▋ | 249964/371472 [9:20:57<10:07:16, 3.33it/s] 67%|██████▋ | 249965/371472 [9:20:58<10:28:12, 3.22it/s] 67%|██████▋ | 249966/371472 [9:20:58<10:26:22, 3.23it/s] 67%|██████▋ | 249967/371472 [9:20:58<10:36:36, 3.18it/s] 67%|██████▋ | 249968/371472 [9:20:59<10:55:17, 3.09it/s] 67%|██████▋ | 249969/371472 [9:20:59<10:54:21, 3.09it/s] 67%|██████▋ | 249970/371472 [9:20:59<10:29:27, 3.22it/s] 67%|██████▋ | 249971/371472 [9:21:00<10:53:18, 3.10it/s] 67%|██████▋ | 249972/371472 [9:21:00<10:45:18, 3.14it/s] 67%|██████▋ | 249973/371472 [9:21:00<10:26:53, 3.23it/s] 67%|██████▋ | 249974/371472 [9:21:01<10:42:46, 3.15it/s] 67%|██████▋ | 249975/371472 [9:21:01<11:01:31, 3.06it/s] 67%|██████▋ | 249976/371472 [9:21:01<10:53:01, 3.10it/s] 67%|██████▋ | 249977/371472 [9:21:02<10:25:10, 3.24it/s] 67%|██████▋ | 249978/371472 [9:21:02<10:27:05, 3.23it/s] 67%|██████▋ | 249979/371472 [9:21:02<10:12:08, 3.31it/s] 67%|██████▋ | 249980/371472 [9:21:02<9:59:51, 3.38it/s] {'loss': 2.6903, 'learning_rate': 3.945086082440906e-07, 'epoch': 10.77} + 67%|██████▋ | 249980/371472 [9:21:02<9:59:51, 3.38it/s] 67%|██████▋ | 249981/371472 [9:21:03<9:54:38, 3.41it/s] 67%|██████▋ | 249982/371472 [9:21:03<9:45:33, 3.46it/s] 67%|██████▋ | 249983/371472 [9:21:03<10:10:32, 3.32it/s] 67%|██████▋ | 249984/371472 [9:21:04<9:56:39, 3.39it/s] 67%|██████▋ | 249985/371472 [9:21:04<9:49:52, 3.43it/s] 67%|██████▋ | 249986/371472 [9:21:04<9:56:17, 3.40it/s] 67%|██████▋ | 249987/371472 [9:21:04<9:56:09, 3.40it/s] 67%|██████▋ | 249988/371472 [9:21:05<9:45:15, 3.46it/s] 67%|██████▋ | 249989/371472 [9:21:05<9:52:58, 3.41it/s] 67%|██████▋ | 249990/371472 [9:21:05<9:51:18, 3.42it/s] 67%|██████▋ | 249991/371472 [9:21:06<9:34:26, 3.52it/s] 67%|██████▋ | 249992/371472 [9:21:06<9:41:01, 3.48it/s] 67%|██████▋ | 249993/371472 [9:21:06<9:38:52, 3.50it/s] 67%|██████▋ | 249994/371472 [9:21:06<9:28:07, 3.56it/s] 67%|██████▋ | 249995/371472 [9:21:07<9:27:33, 3.57it/s] 67%|██████▋ | 249996/371472 [9:21:07<9:33:25, 3.53it/s] 67%|██████▋ | 249997/371472 [9:21:07<9:33:00, 3.53it/s] 67%|██████▋ | 249998/371472 [9:21:08<10:11:37, 3.31it/s] 67%|██████▋ | 249999/371472 [9:21:08<10:08:58, 3.32it/s] 67%|██████▋ | 250000/371472 [9:21:08<10:03:34, 3.35it/s] {'loss': 2.6309, 'learning_rate': 3.9446012626861165e-07, 'epoch': 10.77} + 67%|██████▋ | 250000/371472 [9:21:08<10:03:34, 3.35it/s] 67%|██████▋ | 250001/371472 [9:21:08<9:53:22, 3.41it/s] 67%|██████▋ | 250002/371472 [9:21:09<10:30:48, 3.21it/s] 67%|██████▋ | 250003/371472 [9:21:09<10:12:01, 3.31it/s] 67%|██████▋ | 250004/371472 [9:21:09<10:14:38, 3.29it/s] 67%|██████▋ | 250005/371472 [9:21:10<11:20:18, 2.98it/s] 67%|██████▋ | 250006/371472 [9:21:10<11:01:02, 3.06it/s] 67%|██████▋ | 250007/371472 [9:21:10<10:51:36, 3.11it/s] 67%|██████▋ | 250008/371472 [9:21:11<10:30:09, 3.21it/s] 67%|██████▋ | 250009/371472 [9:21:11<10:31:32, 3.21it/s] 67%|██████▋ | 250010/371472 [9:21:11<10:17:49, 3.28it/s] 67%|██████▋ | 250011/371472 [9:21:12<9:51:22, 3.42it/s] 67%|██████▋ | 250012/371472 [9:21:12<9:44:17, 3.46it/s] 67%|██████▋ | 250013/371472 [9:21:12<9:40:14, 3.49it/s] 67%|██████▋ | 250014/371472 [9:21:12<9:45:11, 3.46it/s] 67%|██████▋ | 250015/371472 [9:21:13<9:43:26, 3.47it/s] 67%|██████▋ | 250016/371472 [9:21:13<9:54:07, 3.41it/s] 67%|██████▋ | 250017/371472 [9:21:13<9:54:08, 3.41it/s] 67%|█████��▋ | 250018/371472 [9:21:14<9:46:58, 3.45it/s] 67%|██████▋ | 250019/371472 [9:21:14<10:04:10, 3.35it/s] 67%|██████▋ | 250020/371472 [9:21:14<10:35:48, 3.18it/s] {'loss': 2.7828, 'learning_rate': 3.9441164429313283e-07, 'epoch': 10.77} + 67%|██████▋ | 250020/371472 [9:21:14<10:35:48, 3.18it/s] 67%|██████▋ | 250021/371472 [9:21:15<10:04:03, 3.35it/s] 67%|██████▋ | 250022/371472 [9:21:15<9:52:34, 3.42it/s] 67%|██████▋ | 250023/371472 [9:21:15<9:50:26, 3.43it/s] 67%|██████▋ | 250024/371472 [9:21:15<10:15:04, 3.29it/s] 67%|██████▋ | 250025/371472 [9:21:16<10:10:51, 3.31it/s] 67%|██████▋ | 250026/371472 [9:21:16<11:12:51, 3.01it/s] 67%|██████▋ | 250027/371472 [9:21:16<10:39:21, 3.17it/s] 67%|██████▋ | 250028/371472 [9:21:17<10:09:16, 3.32it/s] 67%|██████▋ | 250029/371472 [9:21:17<9:57:05, 3.39it/s] 67%|██████▋ | 250030/371472 [9:21:17<9:56:38, 3.39it/s] 67%|██████▋ | 250031/371472 [9:21:18<9:59:58, 3.37it/s] 67%|██████▋ | 250032/371472 [9:21:18<9:56:27, 3.39it/s] 67%|██████▋ | 250033/371472 [9:21:18<9:55:20, 3.40it/s] 67%|██████▋ | 250034/371472 [9:21:18<10:02:04, 3.36it/s] 67%|██████▋ | 250035/371472 [9:21:19<10:19:16, 3.27it/s] 67%|██████▋ | 250036/371472 [9:21:19<10:18:57, 3.27it/s] 67%|██████▋ | 250037/371472 [9:21:19<10:00:18, 3.37it/s] 67%|██████▋ | 250038/371472 [9:21:20<10:07:18, 3.33it/s] 67%|██████▋ | 250039/371472 [9:21:20<10:15:45, 3.29it/s] 67%|██████▋ | 250040/371472 [9:21:20<10:09:01, 3.32it/s] {'loss': 2.8303, 'learning_rate': 3.943631623176539e-07, 'epoch': 10.77} + 67%|██████▋ | 250040/371472 [9:21:20<10:09:01, 3.32it/s] 67%|██████▋ | 250041/371472 [9:21:21<9:55:28, 3.40it/s] 67%|██████▋ | 250042/371472 [9:21:21<9:42:00, 3.48it/s] 67%|██████▋ | 250043/371472 [9:21:21<10:06:52, 3.33it/s] 67%|██████▋ | 250044/371472 [9:21:21<10:15:34, 3.29it/s] 67%|██████▋ | 250045/371472 [9:21:22<10:27:15, 3.23it/s] 67%|██████▋ | 250046/371472 [9:21:22<10:12:28, 3.30it/s] 67%|██████▋ | 250047/371472 [9:21:22<9:59:23, 3.38it/s] 67%|██████▋ | 250048/371472 [9:21:23<10:10:28, 3.32it/s] 67%|██████▋ | 250049/371472 [9:21:23<9:59:21, 3.38it/s] 67%|██████▋ | 250050/371472 [9:21:23<10:11:27, 3.31it/s] 67%|██████▋ | 250051/371472 [9:21:24<10:51:26, 3.11it/s] 67%|██████▋ | 250052/371472 [9:21:24<10:39:38, 3.16it/s] 67%|██████▋ | 250053/371472 [9:21:24<10:15:42, 3.29it/s] 67%|██████▋ | 250054/371472 [9:21:25<10:05:24, 3.34it/s] 67%|██████▋ | 250055/371472 [9:21:25<9:50:32, 3.43it/s] 67%|██████▋ | 250056/371472 [9:21:25<10:05:00, 3.34it/s] 67%|██████▋ | 250057/371472 [9:21:25<9:46:39, 3.45it/s] 67%|██████▋ | 250058/371472 [9:21:26<9:42:44, 3.47it/s] 67%|██████▋ | 250059/371472 [9:21:26<9:42:23, 3.47it/s] 67%|██████▋ | 250060/371472 [9:21:26<9:35:45, 3.51it/s] {'loss': 2.6908, 'learning_rate': 3.94314680342175e-07, 'epoch': 10.77} + 67%|██████▋ | 250060/371472 [9:21:26<9:35:45, 3.51it/s] 67%|██████▋ | 250061/371472 [9:21:27<9:28:18, 3.56it/s] 67%|██████▋ | 250062/371472 [9:21:27<9:26:25, 3.57it/s] 67%|██████▋ | 250063/371472 [9:21:27<9:31:09, 3.54it/s] 67%|██████▋ | 250064/371472 [9:21:27<9:21:59, 3.60it/s] 67%|██████▋ | 250065/371472 [9:21:28<10:07:26, 3.33it/s] 67%|██████▋ | 250066/371472 [9:21:28<9:54:58, 3.40it/s] 67%|██████▋ | 250067/371472 [9:21:28<9:53:59, 3.41it/s] 67%|██████▋ | 250068/371472 [9:21:29<10:06:09, 3.34it/s] 67%|██████▋ | 250069/371472 [9:21:29<9:53:00, 3.41it/s] 67%|██████▋ | 250070/371472 [9:21:29<10:02:57, 3.36it/s] 67%|██████▋ | 250071/371472 [9:21:29<10:22:46, 3.25it/s] 67%|██████▋ | 250072/371472 [9:21:30<11:20:12, 2.97it/s] 67%|██████▋ | 250073/371472 [9:21:30<11:04:12, 3.05it/s] 67%|██████▋ | 250074/371472 [9:21:31<10:44:41, 3.14it/s] 67%|██████▋ | 250075/371472 [9:21:31<10:28:13, 3.22it/s] 67%|██████▋ | 250076/371472 [9:21:31<10:30:48, 3.21it/s] 67%|██████▋ | 250077/371472 [9:21:31<11:18:25, 2.98it/s] 67%|██████▋ | 250078/371472 [9:21:32<10:44:40, 3.14it/s] 67%|██████▋ | 250079/371472 [9:21:32<10:47:03, 3.13it/s] 67%|██████▋ | 250080/371472 [9:21:32<10:33:16, 3.19it/s] {'loss': 2.7573, 'learning_rate': 3.942661983666961e-07, 'epoch': 10.77} + 67%|██████▋ | 250080/371472 [9:21:32<10:33:16, 3.19it/s] 67%|██████▋ | 250081/371472 [9:21:33<10:42:30, 3.15it/s] 67%|██████▋ | 250082/371472 [9:21:33<10:58:05, 3.07it/s] 67%|██████▋ | 250083/371472 [9:21:33<10:28:16, 3.22it/s] 67%|██████▋ | 250084/371472 [9:21:34<10:40:34, 3.16it/s] 67%|██████▋ | 250085/371472 [9:21:34<10:48:16, 3.12it/s] 67%|██████▋ | 250086/371472 [9:21:34<10:38:54, 3.17it/s] 67%|██████▋ | 250087/371472 [9:21:35<10:30:03, 3.21it/s] 67%|██████▋ | 250088/371472 [9:21:35<10:39:35, 3.16it/s] 67%|██████▋ | 250089/371472 [9:21:35<10:32:04, 3.20it/s] 67%|██████▋ | 250090/371472 [9:21:36<10:55:37, 3.09it/s] 67%|██████▋ | 250091/371472 [9:21:36<10:28:10, 3.22it/s] 67%|██████▋ | 250092/371472 [9:21:36<10:06:11, 3.34it/s] 67%|██████▋ | 250093/371472 [9:21:36<10:26:27, 3.23it/s] 67%|██████▋ | 250094/371472 [9:21:37<10:09:02, 3.32it/s] 67%|██████▋ | 250095/371472 [9:21:37<9:58:53, 3.38it/s] 67%|██████▋ | 250096/371472 [9:21:37<10:20:21, 3.26it/s] 67%|██████▋ | 250097/371472 [9:21:38<10:27:55, 3.22it/s] 67%|██████▋ | 250098/371472 [9:21:38<10:19:30, 3.27it/s] 67%|██████▋ | 250099/371472 [9:21:38<10:07:34, 3.33it/s] 67%|██████▋ | 250100/371472 [9:21:39<9:49:25, 3.43it/s] {'loss': 2.6386, 'learning_rate': 3.9421771639121727e-07, 'epoch': 10.77} + 67%|██████▋ | 250100/371472 [9:21:39<9:49:25, 3.43it/s] 67%|██████▋ | 250101/371472 [9:21:39<9:41:29, 3.48it/s] 67%|██████▋ | 250102/371472 [9:21:39<9:41:28, 3.48it/s] 67%|██████▋ | 250103/371472 [9:21:39<9:43:09, 3.47it/s] 67%|██████▋ | 250104/371472 [9:21:40<9:44:04, 3.46it/s] 67%|██████▋ | 250105/371472 [9:21:40<10:03:11, 3.35it/s] 67%|██████▋ | 250106/371472 [9:21:40<10:44:08, 3.14it/s] 67%|██████▋ | 250107/371472 [9:21:41<10:33:48, 3.19it/s] 67%|██████▋ | 250108/371472 [9:21:41<10:21:47, 3.25it/s] 67%|██████▋ | 250109/371472 [9:21:41<10:38:39, 3.17it/s] 67%|██████▋ | 250110/371472 [9:21:42<10:09:35, 3.32it/s] 67%|██████▋ | 250111/371472 [9:21:42<10:29:40, 3.21it/s] 67%|██████▋ | 250112/371472 [9:21:42<11:21:54, 2.97it/s] 67%|██████▋ | 250113/371472 [9:21:43<10:57:44, 3.08it/s] 67%|██████▋ | 250114/371472 [9:21:43<10:38:53, 3.17it/s] 67%|██████▋ | 250115/371472 [9:21:43<10:17:09, 3.28it/s] 67%|██████▋ | 250116/371472 [9:21:43<10:11:24, 3.31it/s] 67%|██████▋ | 250117/371472 [9:21:44<9:53:41, 3.41it/s] 67%|██████▋ | 250118/371472 [9:21:44<10:00:04, 3.37it/s] 67%|██████▋ | 250119/371472 [9:21:44<10:29:18, 3.21it/s] 67%|██████▋ | 250120/371472 [9:21:45<10:00:32, 3.37it/s] {'loss': 2.6396, 'learning_rate': 3.941692344157383e-07, 'epoch': 10.77} + 67%|██████▋ | 250120/371472 [9:21:45<10:00:32, 3.37it/s] 67%|██████▋ | 250121/371472 [9:21:45<9:50:19, 3.43it/s] 67%|██████▋ | 250122/371472 [9:21:45<9:38:07, 3.50it/s] 67%|██████▋ | 250123/371472 [9:21:46<9:39:30, 3.49it/s] 67%|██████▋ | 250124/371472 [9:21:46<9:42:02, 3.47it/s] 67%|██████▋ | 250125/371472 [9:21:46<10:15:47, 3.28it/s] 67%|██████▋ | 250126/371472 [9:21:46<10:08:59, 3.32it/s] 67%|██████▋ | 250127/371472 [9:21:47<10:06:06, 3.34it/s] 67%|██████▋ | 250128/371472 [9:21:47<10:01:28, 3.36it/s] 67%|██████▋ | 250129/371472 [9:21:47<10:00:49, 3.37it/s] 67%|██████▋ | 250130/371472 [9:21:48<10:05:23, 3.34it/s] 67%|██████▋ | 250131/371472 [9:21:48<9:59:48, 3.37it/s] 67%|██████▋ | 250132/371472 [9:21:48<9:59:19, 3.37it/s] 67%|██████▋ | 250133/371472 [9:21:48<9:45:55, 3.45it/s] 67%|██████▋ | 250134/371472 [9:21:49<10:05:30, 3.34it/s] 67%|██████▋ | 250135/371472 [9:21:49<10:15:36, 3.28it/s] 67%|██████▋ | 250136/371472 [9:21:49<10:05:28, 3.34it/s] 67%|██████▋ | 250137/371472 [9:21:50<9:46:23, 3.45it/s] 67%|██████▋ | 250138/371472 [9:21:50<10:18:28, 3.27it/s] 67%|██████▋ | 250139/371472 [9:21:50<9:54:52, 3.40it/s] 67%|██████▋ | 250140/371472 [9:21:51<9:56:43, 3.39it/s] {'loss': 2.688, 'learning_rate': 3.941207524402595e-07, 'epoch': 10.77} + 67%|██████▋ | 250140/371472 [9:21:51<9:56:43, 3.39it/s] 67%|██████▋ | 250141/371472 [9:21:51<10:01:39, 3.36it/s] 67%|██████▋ | 250142/371472 [9:21:51<10:00:12, 3.37it/s] 67%|██████▋ | 250143/371472 [9:21:51<9:49:22, 3.43it/s] 67%|██████▋ | 250144/371472 [9:21:52<9:42:39, 3.47it/s] 67%|██████▋ | 250145/371472 [9:21:52<10:15:32, 3.29it/s] 67%|██████▋ | 250146/371472 [9:21:52<10:06:18, 3.34it/s] 67%|██████▋ | 250147/371472 [9:21:53<9:56:12, 3.39it/s] 67%|██████▋ | 250148/371472 [9:21:53<10:05:51, 3.34it/s] 67%|██████▋ | 250149/371472 [9:21:53<9:50:26, 3.42it/s] 67%|██████▋ | 250150/371472 [9:21:54<10:08:31, 3.32it/s] 67%|██████▋ | 250151/371472 [9:21:54<10:06:46, 3.33it/s] 67%|██████▋ | 250152/371472 [9:21:54<9:35:40, 3.51it/s] 67%|██████▋ | 250153/371472 [9:21:54<9:31:13, 3.54it/s] 67%|██████▋ | 250154/371472 [9:21:55<9:23:31, 3.59it/s] 67%|██████▋ | 250155/371472 [9:21:55<9:20:28, 3.61it/s] 67%|██████▋ | 250156/371472 [9:21:55<9:27:47, 3.56it/s] 67%|██████▋ | 250157/371472 [9:21:56<9:44:29, 3.46it/s] 67%|██████▋ | 250158/371472 [9:21:56<10:14:50, 3.29it/s] 67%|██████▋ | 250159/371472 [9:21:56<10:13:02, 3.30it/s] 67%|██████▋ | 250160/371472 [9:21:56<9:50:35, 3.42it/s] {'loss': 2.7022, 'learning_rate': 3.9407227046478054e-07, 'epoch': 10.77} + 67%|██████▋ | 250160/371472 [9:21:56<9:50:35, 3.42it/s] 67%|██████▋ | 250161/371472 [9:21:57<9:43:37, 3.46it/s] 67%|██████▋ | 250162/371472 [9:21:57<9:32:41, 3.53it/s] 67%|██████▋ | 250163/371472 [9:21:57<10:49:36, 3.11it/s] 67%|██████▋ | 250164/371472 [9:21:58<10:56:22, 3.08it/s] 67%|██████▋ | 250165/371472 [9:21:58<10:20:12, 3.26it/s] 67%|██████▋ | 250166/371472 [9:21:58<10:33:15, 3.19it/s] 67%|██████▋ | 250167/371472 [9:21:59<10:28:52, 3.21it/s] 67%|██████▋ | 250168/371472 [9:21:59<10:13:42, 3.29it/s] 67%|██████▋ | 250169/371472 [9:21:59<10:01:27, 3.36it/s] 67%|██████▋ | 250170/371472 [9:22:00<10:25:34, 3.23it/s] 67%|██████▋ | 250171/371472 [9:22:00<10:06:50, 3.33it/s] 67%|██████▋ | 250172/371472 [9:22:00<11:02:13, 3.05it/s] 67%|██████▋ | 250173/371472 [9:22:01<10:57:52, 3.07it/s] 67%|██████▋ | 250174/371472 [9:22:01<10:44:15, 3.14it/s] 67%|██████▋ | 250175/371472 [9:22:01<10:22:31, 3.25it/s] 67%|██████▋ | 250176/371472 [9:22:02<11:17:33, 2.98it/s] 67%|██████▋ | 250177/371472 [9:22:02<11:08:39, 3.02it/s] 67%|██████▋ | 250178/371472 [9:22:02<10:32:35, 3.20it/s] 67%|██████▋ | 250179/371472 [9:22:02<10:15:01, 3.29it/s] 67%|██████▋ | 250180/371472 [9:22:03<10:24:35, 3.24it/s] {'loss': 2.5293, 'learning_rate': 3.9402378848930156e-07, 'epoch': 10.78} + 67%|██████▋ | 250180/371472 [9:22:03<10:24:35, 3.24it/s] 67%|██████▋ | 250181/371472 [9:22:03<10:02:15, 3.36it/s] 67%|██████▋ | 250182/371472 [9:22:03<9:47:09, 3.44it/s] 67%|██████▋ | 250183/371472 [9:22:04<10:20:27, 3.26it/s] 67%|██████▋ | 250184/371472 [9:22:04<10:00:26, 3.37it/s] 67%|██████▋ | 250185/371472 [9:22:04<10:03:28, 3.35it/s] 67%|██████▋ | 250186/371472 [9:22:04<10:04:48, 3.34it/s] 67%|██████▋ | 250187/371472 [9:22:05<9:58:52, 3.38it/s] 67%|██████▋ | 250188/371472 [9:22:05<10:13:02, 3.30it/s] 67%|██████▋ | 250189/371472 [9:22:05<10:09:05, 3.32it/s] 67%|██████▋ | 250190/371472 [9:22:06<10:26:51, 3.22it/s] 67%|██████▋ | 250191/371472 [9:22:06<10:05:51, 3.34it/s] 67%|██████▋ | 250192/371472 [9:22:06<10:24:06, 3.24it/s] 67%|██████▋ | 250193/371472 [9:22:07<11:11:03, 3.01it/s] 67%|██████▋ | 250194/371472 [9:22:07<10:32:52, 3.19it/s] 67%|██████▋ | 250195/371472 [9:22:07<10:36:34, 3.18it/s] 67%|██████▋ | 250196/371472 [9:22:08<10:43:26, 3.14it/s] 67%|██████▋ | 250197/371472 [9:22:08<10:24:03, 3.24it/s] 67%|█████��▋ | 250198/371472 [9:22:08<10:20:01, 3.26it/s] 67%|██████▋ | 250199/371472 [9:22:09<10:19:32, 3.26it/s] 67%|██████▋ | 250200/371472 [9:22:09<10:37:49, 3.17it/s] {'loss': 2.5658, 'learning_rate': 3.9397530651382274e-07, 'epoch': 10.78} + 67%|██████▋ | 250200/371472 [9:22:09<10:37:49, 3.17it/s] 67%|██████▋ | 250201/371472 [9:22:09<10:24:02, 3.24it/s] 67%|██████▋ | 250202/371472 [9:22:09<10:31:40, 3.20it/s] 67%|██████▋ | 250203/371472 [9:22:10<10:40:21, 3.16it/s] 67%|██████▋ | 250204/371472 [9:22:10<10:23:19, 3.24it/s] 67%|██████▋ | 250205/371472 [9:22:10<10:14:40, 3.29it/s] 67%|██████▋ | 250206/371472 [9:22:11<9:56:30, 3.39it/s] 67%|██████▋ | 250207/371472 [9:22:11<9:54:01, 3.40it/s] 67%|██████▋ | 250208/371472 [9:22:11<9:49:03, 3.43it/s] 67%|██████▋ | 250209/371472 [9:22:12<9:50:36, 3.42it/s] 67%|██████▋ | 250210/371472 [9:22:12<10:28:15, 3.22it/s] 67%|██████▋ | 250211/371472 [9:22:12<11:08:04, 3.03it/s] 67%|██████▋ | 250212/371472 [9:22:13<10:49:40, 3.11it/s] 67%|██████▋ | 250213/371472 [9:22:13<11:33:34, 2.91it/s] 67%|██████▋ | 250214/371472 [9:22:13<11:09:57, 3.02it/s] 67%|██████▋ | 250215/371472 [9:22:14<10:33:22, 3.19it/s] 67%|██████▋ | 250216/371472 [9:22:14<10:02:25, 3.35it/s] 67%|██████▋ | 250217/371472 [9:22:14<9:36:36, 3.50it/s] 67%|██████▋ | 250218/371472 [9:22:14<9:41:12, 3.48it/s] 67%|██████▋ | 250219/371472 [9:22:15<10:06:47, 3.33it/s] 67%|██████▋ | 250220/371472 [9:22:15<10:50:12, 3.11it/s] {'loss': 2.6185, 'learning_rate': 3.939268245383438e-07, 'epoch': 10.78} + 67%|██████▋ | 250220/371472 [9:22:15<10:50:12, 3.11it/s] 67%|██████▋ | 250221/371472 [9:22:15<10:32:54, 3.19it/s] 67%|██████▋ | 250222/371472 [9:22:16<10:39:38, 3.16it/s] 67%|██████▋ | 250223/371472 [9:22:16<10:06:45, 3.33it/s] 67%|██████▋ | 250224/371472 [9:22:16<9:55:27, 3.39it/s] 67%|██████▋ | 250225/371472 [9:22:16<9:30:14, 3.54it/s] 67%|██████▋ | 250226/371472 [9:22:17<9:47:36, 3.44it/s] 67%|██████▋ | 250227/371472 [9:22:17<10:52:11, 3.10it/s] 67%|██████▋ | 250228/371472 [9:22:17<10:26:01, 3.23it/s] 67%|██████▋ | 250229/371472 [9:22:18<10:09:40, 3.31it/s] 67%|██████▋ | 250230/371472 [9:22:18<9:49:26, 3.43it/s] 67%|██████▋ | 250231/371472 [9:22:18<9:43:38, 3.46it/s] 67%|██████▋ | 250232/371472 [9:22:19<10:36:32, 3.17it/s] 67%|██████▋ | 250233/371472 [9:22:19<9:57:30, 3.38it/s] 67%|██████▋ | 250234/371472 [9:22:19<9:46:31, 3.45it/s] 67%|██████▋ | 250235/371472 [9:22:20<10:56:47, 3.08it/s] 67%|██████▋ | 250236/371472 [9:22:20<10:27:52, 3.22it/s] 67%|██████▋ | 250237/371472 [9:22:20<10:33:01, 3.19it/s] 67%|██████▋ | 250238/371472 [9:22:20<10:22:25, 3.25it/s] 67%|██████▋ | 250239/371472 [9:22:21<10:23:33, 3.24it/s] 67%|██████▋ | 250240/371472 [9:22:21<10:06:31, 3.33it/s] {'loss': 2.7524, 'learning_rate': 3.9387834256286493e-07, 'epoch': 10.78} + 67%|██████▋ | 250240/371472 [9:22:21<10:06:31, 3.33it/s] 67%|██████▋ | 250241/371472 [9:22:21<9:55:41, 3.39it/s] 67%|██████▋ | 250242/371472 [9:22:22<10:22:20, 3.25it/s] 67%|██████▋ | 250243/371472 [9:22:22<9:56:50, 3.39it/s] 67%|██████▋ | 250244/371472 [9:22:22<9:40:28, 3.48it/s] 67%|██████▋ | 250245/371472 [9:22:22<9:28:09, 3.56it/s] 67%|██████▋ | 250246/371472 [9:22:23<9:46:46, 3.44it/s] 67%|██████▋ | 250247/371472 [9:22:23<9:47:52, 3.44it/s] 67%|██████▋ | 250248/371472 [9:22:24<11:09:46, 3.02it/s] 67%|██████▋ | 250249/371472 [9:22:24<10:42:20, 3.15it/s] 67%|██████▋ | 250250/371472 [9:22:24<10:14:02, 3.29it/s] 67%|██████▋ | 250251/371472 [9:22:24<10:33:14, 3.19it/s] 67%|██████▋ | 250252/371472 [9:22:25<10:30:51, 3.20it/s] 67%|██████▋ | 250253/371472 [9:22:25<10:15:45, 3.28it/s] 67%|██████▋ | 250254/371472 [9:22:25<10:34:55, 3.18it/s] 67%|██████▋ | 250255/371472 [9:22:26<10:00:25, 3.36it/s] 67%|██████▋ | 250256/371472 [9:22:26<10:06:42, 3.33it/s] 67%|██████▋ | 250257/371472 [9:22:26<10:24:24, 3.24it/s] 67%|████��█▋ | 250258/371472 [9:22:27<10:34:06, 3.19it/s] 67%|██████▋ | 250259/371472 [9:22:27<11:11:18, 3.01it/s] 67%|██████▋ | 250260/371472 [9:22:27<10:40:06, 3.16it/s] {'loss': 2.7248, 'learning_rate': 3.93829860587386e-07, 'epoch': 10.78} + 67%|██████▋ | 250260/371472 [9:22:27<10:40:06, 3.16it/s] 67%|██████▋ | 250261/371472 [9:22:28<10:50:28, 3.11it/s] 67%|██████▋ | 250262/371472 [9:22:28<10:31:12, 3.20it/s] 67%|██████▋ | 250263/371472 [9:22:28<10:27:17, 3.22it/s] 67%|██████▋ | 250264/371472 [9:22:28<10:46:47, 3.12it/s] 67%|██████▋ | 250265/371472 [9:22:29<10:32:07, 3.20it/s] 67%|██████▋ | 250266/371472 [9:22:29<10:03:07, 3.35it/s] 67%|██████▋ | 250267/371472 [9:22:29<9:50:46, 3.42it/s] 67%|██████▋ | 250268/371472 [9:22:30<10:00:56, 3.36it/s] 67%|██████▋ | 250269/371472 [9:22:30<9:41:31, 3.47it/s] 67%|██████▋ | 250270/371472 [9:22:30<10:06:55, 3.33it/s] 67%|██████▋ | 250271/371472 [9:22:31<9:59:42, 3.37it/s] 67%|██████▋ | 250272/371472 [9:22:31<10:24:27, 3.23it/s] 67%|██████▋ | 250273/371472 [9:22:31<10:02:23, 3.35it/s] 67%|██████▋ | 250274/371472 [9:22:31<9:51:38, 3.41it/s] 67%|██████▋ | 250275/371472 [9:22:32<9:54:20, 3.40it/s] 67%|██████▋ | 250276/371472 [9:22:32<10:22:41, 3.24it/s] 67%|██████▋ | 250277/371472 [9:22:32<9:59:14, 3.37it/s] 67%|██████▋ | 250278/371472 [9:22:33<9:54:34, 3.40it/s] 67%|██████▋ | 250279/371472 [9:22:33<10:01:36, 3.36it/s] 67%|██████▋ | 250280/371472 [9:22:33<10:50:03, 3.11it/s] {'loss': 2.7396, 'learning_rate': 3.937813786119072e-07, 'epoch': 10.78} + 67%|██████▋ | 250280/371472 [9:22:33<10:50:03, 3.11it/s] 67%|██████▋ | 250281/371472 [9:22:34<10:45:59, 3.13it/s] 67%|██████▋ | 250282/371472 [9:22:34<10:45:45, 3.13it/s] 67%|██████▋ | 250283/371472 [9:22:34<10:10:13, 3.31it/s] 67%|██████▋ | 250284/371472 [9:22:35<10:24:26, 3.23it/s] 67%|██████▋ | 250285/371472 [9:22:35<10:02:50, 3.35it/s] 67%|██████▋ | 250286/371472 [9:22:35<10:18:22, 3.27it/s] 67%|██████▋ | 250287/371472 [9:22:35<10:01:25, 3.36it/s] 67%|██████▋ | 250288/371472 [9:22:36<9:42:40, 3.47it/s] 67%|██████▋ | 250289/371472 [9:22:36<9:31:47, 3.53it/s] 67%|██████▋ | 250290/371472 [9:22:36<9:19:13, 3.61it/s] 67%|██████▋ | 250291/371472 [9:22:37<9:57:12, 3.38it/s] 67%|██████▋ | 250292/371472 [9:22:37<9:47:05, 3.44it/s] 67%|██████▋ | 250293/371472 [9:22:37<9:29:50, 3.54it/s] 67%|██████▋ | 250294/371472 [9:22:37<9:37:31, 3.50it/s] 67%|██████▋ | 250295/371472 [9:22:38<10:06:09, 3.33it/s] 67%|██████▋ | 250296/371472 [9:22:38<9:46:16, 3.44it/s] 67%|██████▋ | 250297/371472 [9:22:38<9:38:59, 3.49it/s] 67%|██████▋ | 250298/371472 [9:22:39<9:41:03, 3.48it/s] 67%|██████▋ | 250299/371472 [9:22:39<9:45:39, 3.45it/s] 67%|██████▋ | 250300/371472 [9:22:39<10:06:02, 3.33it/s] {'loss': 2.7933, 'learning_rate': 3.937328966364282e-07, 'epoch': 10.78} + 67%|██████▋ | 250300/371472 [9:22:39<10:06:02, 3.33it/s] 67%|██████▋ | 250301/371472 [9:22:39<10:02:17, 3.35it/s] 67%|██████▋ | 250302/371472 [9:22:40<9:53:31, 3.40it/s] 67%|██████▋ | 250303/371472 [9:22:40<9:33:10, 3.52it/s] 67%|██████▋ | 250304/371472 [9:22:40<9:50:31, 3.42it/s] 67%|██████▋ | 250305/371472 [9:22:41<9:43:15, 3.46it/s] 67%|██████▋ | 250306/371472 [9:22:41<10:15:12, 3.28it/s] 67%|██████▋ | 250307/371472 [9:22:41<10:03:37, 3.35it/s] 67%|██████▋ | 250308/371472 [9:22:41<9:47:03, 3.44it/s] 67%|██████▋ | 250309/371472 [9:22:42<9:45:28, 3.45it/s] 67%|██████▋ | 250310/371472 [9:22:42<10:02:12, 3.35it/s] 67%|██████▋ | 250311/371472 [9:22:42<10:16:42, 3.27it/s] 67%|██████▋ | 250312/371472 [9:22:43<10:04:58, 3.34it/s] 67%|██████▋ | 250313/371472 [9:22:43<10:03:07, 3.35it/s] 67%|██████▋ | 250314/371472 [9:22:43<10:02:22, 3.35it/s] 67%|██████▋ | 250315/371472 [9:22:44<9:56:47, 3.38it/s] 67%|██████▋ | 250316/371472 [9:22:44<9:54:10, 3.40it/s] 67%|██████▋ | 250317/371472 [9:22:44<9:57:34, 3.38it/s] 67%|██████��� | 250318/371472 [9:22:44<10:00:02, 3.37it/s] 67%|██████▋ | 250319/371472 [9:22:45<9:54:46, 3.39it/s] 67%|██████▋ | 250320/371472 [9:22:45<10:12:32, 3.30it/s] {'loss': 2.7059, 'learning_rate': 3.936844146609494e-07, 'epoch': 10.78} + 67%|██████▋ | 250320/371472 [9:22:45<10:12:32, 3.30it/s] 67%|██████▋ | 250321/371472 [9:22:45<10:38:57, 3.16it/s] 67%|██████▋ | 250322/371472 [9:22:46<10:29:11, 3.21it/s] 67%|██████▋ | 250323/371472 [9:22:46<10:20:36, 3.25it/s] 67%|██████▋ | 250324/371472 [9:22:46<10:19:36, 3.26it/s] 67%|██████▋ | 250325/371472 [9:22:47<11:03:08, 3.04it/s] 67%|██████▋ | 250326/371472 [9:22:47<10:39:20, 3.16it/s] 67%|██████▋ | 250327/371472 [9:22:47<10:22:07, 3.25it/s] 67%|██████▋ | 250328/371472 [9:22:48<10:41:23, 3.15it/s] 67%|██████▋ | 250329/371472 [9:22:48<10:16:35, 3.27it/s] 67%|██████▋ | 250330/371472 [9:22:48<10:03:42, 3.34it/s] 67%|██████▋ | 250331/371472 [9:22:48<9:49:06, 3.43it/s] 67%|██████▋ | 250332/371472 [9:22:49<9:42:35, 3.47it/s] 67%|██████▋ | 250333/371472 [9:22:49<9:41:40, 3.47it/s] 67%|██████▋ | 250334/371472 [9:22:49<9:31:51, 3.53it/s] 67%|██████▋ | 250335/371472 [9:22:50<9:49:24, 3.43it/s] 67%|██████▋ | 250336/371472 [9:22:50<10:26:19, 3.22it/s] 67%|██████▋ | 250337/371472 [9:22:50<10:13:06, 3.29it/s] 67%|██████▋ | 250338/371472 [9:22:51<9:50:43, 3.42it/s] 67%|██████▋ | 250339/371472 [9:22:51<9:43:10, 3.46it/s] 67%|██████▋ | 250340/371472 [9:22:51<10:05:44, 3.33it/s] {'loss': 2.74, 'learning_rate': 3.9363593268547045e-07, 'epoch': 10.78} + 67%|██████▋ | 250340/371472 [9:22:51<10:05:44, 3.33it/s] 67%|██████▋ | 250341/371472 [9:22:51<9:52:31, 3.41it/s] 67%|██████▋ | 250342/371472 [9:22:52<9:41:19, 3.47it/s] 67%|██████▋ | 250343/371472 [9:22:52<9:49:44, 3.42it/s] 67%|██████▋ | 250344/371472 [9:22:52<10:23:16, 3.24it/s] 67%|██████▋ | 250345/371472 [9:22:53<10:15:54, 3.28it/s] 67%|██████▋ | 250346/371472 [9:22:53<10:04:50, 3.34it/s] 67%|██████▋ | 250347/371472 [9:22:53<9:54:43, 3.39it/s] 67%|██████▋ | 250348/371472 [9:22:54<10:17:30, 3.27it/s] 67%|██████▋ | 250349/371472 [9:22:54<9:57:13, 3.38it/s] 67%|██████▋ | 250350/371472 [9:22:54<10:06:44, 3.33it/s] 67%|██████▋ | 250351/371472 [9:22:54<10:00:18, 3.36it/s] 67%|██████▋ | 250352/371472 [9:22:55<10:02:38, 3.35it/s] 67%|██████▋ | 250353/371472 [9:22:55<9:54:28, 3.40it/s] 67%|██████▋ | 250354/371472 [9:22:55<9:51:32, 3.41it/s] 67%|██████▋ | 250355/371472 [9:22:56<9:43:00, 3.46it/s] 67%|██████▋ | 250356/371472 [9:22:56<9:37:41, 3.49it/s] 67%|██████▋ | 250357/371472 [9:22:56<9:40:39, 3.48it/s] 67%|██████▋ | 250358/371472 [9:22:56<9:36:40, 3.50it/s] 67%|██████▋ | 250359/371472 [9:22:57<9:27:08, 3.56it/s] 67%|██████▋ | 250360/371472 [9:22:57<9:23:53, 3.58it/s] {'loss': 2.7157, 'learning_rate': 3.9358745070999157e-07, 'epoch': 10.78} + 67%|██████▋ | 250360/371472 [9:22:57<9:23:53, 3.58it/s] 67%|██████▋ | 250361/371472 [9:22:57<9:38:08, 3.49it/s] 67%|██████▋ | 250362/371472 [9:22:58<9:52:10, 3.41it/s] 67%|██████▋ | 250363/371472 [9:22:58<10:07:08, 3.32it/s] 67%|██████▋ | 250364/371472 [9:22:58<10:00:12, 3.36it/s] 67%|██████▋ | 250365/371472 [9:22:58<9:41:55, 3.47it/s] 67%|██████▋ | 250366/371472 [9:22:59<9:35:05, 3.51it/s] 67%|██████▋ | 250367/371472 [9:22:59<9:28:32, 3.55it/s] 67%|██████▋ | 250368/371472 [9:22:59<10:46:47, 3.12it/s] 67%|██████▋ | 250369/371472 [9:23:00<10:24:39, 3.23it/s] 67%|██████▋ | 250370/371472 [9:23:00<10:18:24, 3.26it/s] 67%|██████▋ | 250371/371472 [9:23:00<10:05:31, 3.33it/s] 67%|██████▋ | 250372/371472 [9:23:01<10:09:58, 3.31it/s] 67%|██████▋ | 250373/371472 [9:23:01<10:01:08, 3.36it/s] 67%|██████▋ | 250374/371472 [9:23:01<9:58:38, 3.37it/s] 67%|██████▋ | 250375/371472 [9:23:01<9:51:40, 3.41it/s] 67%|██████▋ | 250376/371472 [9:23:02<11:00:23, 3.06it/s] 67%|██████▋ | 250377/371472 [9:23:02<10:33:12, 3.19it/s] 67%|██████▋ | 250378/371472 [9:23:02<10:38:23, 3.16it/s] 67%|██████▋ | 250379/371472 [9:23:03<10:20:22, 3.25it/s] 67%|██████▋ | 250380/371472 [9:23:03<10:24:54, 3.23it/s] {'loss': 2.8119, 'learning_rate': 3.9353896873451264e-07, 'epoch': 10.78} + 67%|██████▋ | 250380/371472 [9:23:03<10:24:54, 3.23it/s] 67%|██████▋ | 250381/371472 [9:23:03<10:20:27, 3.25it/s] 67%|██████▋ | 250382/371472 [9:23:04<10:13:24, 3.29it/s] 67%|██████▋ | 250383/371472 [9:23:04<10:22:51, 3.24it/s] 67%|██████▋ | 250384/371472 [9:23:04<10:06:44, 3.33it/s] 67%|██████▋ | 250385/371472 [9:23:05<9:58:34, 3.37it/s] 67%|██████▋ | 250386/371472 [9:23:05<10:06:17, 3.33it/s] 67%|██████▋ | 250387/371472 [9:23:05<9:53:20, 3.40it/s] 67%|██████▋ | 250388/371472 [9:23:05<9:40:39, 3.48it/s] 67%|██████▋ | 250389/371472 [9:23:06<10:04:09, 3.34it/s] 67%|██████▋ | 250390/371472 [9:23:06<10:12:26, 3.30it/s] 67%|██████▋ | 250391/371472 [9:23:06<9:51:50, 3.41it/s] 67%|██████▋ | 250392/371472 [9:23:07<10:28:55, 3.21it/s] 67%|██████▋ | 250393/371472 [9:23:07<10:46:04, 3.12it/s] 67%|██████▋ | 250394/371472 [9:23:07<10:46:02, 3.12it/s] 67%|██████▋ | 250395/371472 [9:23:08<10:46:26, 3.12it/s] 67%|██████▋ | 250396/371472 [9:23:08<10:47:31, 3.12it/s] 67%|██████▋ | 250397/371472 [9:23:08<10:48:01, 3.11it/s] 67%|██████▋ | 250398/371472 [9:23:09<10:43:12, 3.14it/s] 67%|██████▋ | 250399/371472 [9:23:09<10:50:52, 3.10it/s] 67%|██████▋ | 250400/371472 [9:23:09<10:34:26, 3.18it/s] {'loss': 2.6462, 'learning_rate': 3.934904867590338e-07, 'epoch': 10.79} + 67%|██████▋ | 250400/371472 [9:23:09<10:34:26, 3.18it/s] 67%|██████▋ | 250401/371472 [9:23:10<10:54:32, 3.08it/s] 67%|██████▋ | 250402/371472 [9:23:10<10:27:32, 3.22it/s] 67%|██████▋ | 250403/371472 [9:23:10<10:22:34, 3.24it/s] 67%|██████▋ | 250404/371472 [9:23:10<10:21:01, 3.25it/s] 67%|██████▋ | 250405/371472 [9:23:11<10:04:07, 3.34it/s] 67%|██████▋ | 250406/371472 [9:23:11<10:04:08, 3.34it/s] 67%|██████▋ | 250407/371472 [9:23:11<9:54:49, 3.39it/s] 67%|██████▋ | 250408/371472 [9:23:12<10:13:01, 3.29it/s] 67%|██████▋ | 250409/371472 [9:23:12<10:05:17, 3.33it/s] 67%|██████▋ | 250410/371472 [9:23:12<10:32:14, 3.19it/s] 67%|██████▋ | 250411/371472 [9:23:13<10:42:13, 3.14it/s] 67%|██████▋ | 250412/371472 [9:23:13<11:06:18, 3.03it/s] 67%|██████▋ | 250413/371472 [9:23:13<10:49:41, 3.11it/s] 67%|██████▋ | 250414/371472 [9:23:14<10:34:08, 3.18it/s] 67%|██████▋ | 250415/371472 [9:23:14<11:14:54, 2.99it/s] 67%|██████▋ | 250416/371472 [9:23:14<11:00:56, 3.05it/s] 67%|██████▋ | 250417/371472 [9:23:15<10:31:22, 3.20it/s] 67%|██████▋ | 250418/371472 [9:23:15<10:26:08, 3.22it/s] 67%|██████▋ | 250419/371472 [9:23:15<10:36:39, 3.17it/s] 67%|██████▋ | 250420/371472 [9:23:15<10:20:44, 3.25it/s] {'loss': 2.6425, 'learning_rate': 3.934420047835549e-07, 'epoch': 10.79} + 67%|██████▋ | 250420/371472 [9:23:15<10:20:44, 3.25it/s] 67%|██████▋ | 250421/371472 [9:23:16<11:00:05, 3.06it/s] 67%|██████▋ | 250422/371472 [9:23:16<10:52:32, 3.09it/s] 67%|██████▋ | 250423/371472 [9:23:16<10:25:01, 3.23it/s] 67%|██████▋ | 250424/371472 [9:23:17<10:14:18, 3.28it/s] 67%|██████▋ | 250425/371472 [9:23:17<10:21:20, 3.25it/s] 67%|██████▋ | 250426/371472 [9:23:17<10:20:05, 3.25it/s] 67%|██████▋ | 250427/371472 [9:23:18<10:08:32, 3.32it/s] 67%|██████▋ | 250428/371472 [9:23:18<10:24:26, 3.23it/s] 67%|██████▋ | 250429/371472 [9:23:18<10:56:12, 3.07it/s] 67%|██████▋ | 250430/371472 [9:23:19<10:42:36, 3.14it/s] 67%|██████▋ | 250431/371472 [9:23:19<11:03:05, 3.04it/s] 67%|██████▋ | 250432/371472 [9:23:19<10:38:30, 3.16it/s] 67%|██████▋ | 250433/371472 [9:23:20<10:17:56, 3.26it/s] 67%|██████▋ | 250434/371472 [9:23:20<10:07:05, 3.32it/s] 67%|██████▋ | 250435/371472 [9:23:20<9:46:27, 3.44it/s] 67%|██████▋ | 250436/371472 [9:23:20<9:31:11, 3.53it/s] 67%|██████▋ | 250437/371472 [9:23:21<9:51:08, 3.41it/s] 67%|█████���▋ | 250438/371472 [9:23:21<9:41:18, 3.47it/s] 67%|██████▋ | 250439/371472 [9:23:21<9:43:13, 3.46it/s] 67%|██████▋ | 250440/371472 [9:23:22<9:57:11, 3.38it/s] {'loss': 2.6152, 'learning_rate': 3.93393522808076e-07, 'epoch': 10.79} + 67%|██████▋ | 250440/371472 [9:23:22<9:57:11, 3.38it/s] 67%|██████▋ | 250441/371472 [9:23:22<10:11:57, 3.30it/s] 67%|██████▋ | 250442/371472 [9:23:22<10:47:24, 3.12it/s] 67%|██████▋ | 250443/371472 [9:23:23<10:15:52, 3.28it/s] 67%|██████▋ | 250444/371472 [9:23:23<10:14:49, 3.28it/s] 67%|██████▋ | 250445/371472 [9:23:23<10:01:07, 3.36it/s] 67%|██████▋ | 250446/371472 [9:23:24<11:00:32, 3.05it/s] 67%|██████▋ | 250447/371472 [9:23:24<10:35:50, 3.17it/s] 67%|██████▋ | 250448/371472 [9:23:24<10:12:20, 3.29it/s] 67%|██████▋ | 250449/371472 [9:23:24<11:11:15, 3.00it/s] 67%|██████▋ | 250450/371472 [9:23:25<10:32:14, 3.19it/s] 67%|██████▋ | 250451/371472 [9:23:25<10:14:04, 3.28it/s] 67%|██████▋ | 250452/371472 [9:23:25<10:23:36, 3.23it/s] 67%|██████▋ | 250453/371472 [9:23:26<10:39:35, 3.15it/s] 67%|██████▋ | 250454/371472 [9:23:26<10:12:19, 3.29it/s] 67%|██████▋ | 250455/371472 [9:23:26<10:01:44, 3.35it/s] 67%|██████▋ | 250456/371472 [9:23:27<10:15:24, 3.28it/s] 67%|██████▋ | 250457/371472 [9:23:27<9:57:23, 3.38it/s] 67%|██████▋ | 250458/371472 [9:23:27<9:58:54, 3.37it/s] 67%|██████▋ | 250459/371472 [9:23:27<9:56:19, 3.38it/s] 67%|██████▋ | 250460/371472 [9:23:28<10:03:49, 3.34it/s] {'loss': 2.6737, 'learning_rate': 3.933450408325971e-07, 'epoch': 10.79} + 67%|██████▋ | 250460/371472 [9:23:28<10:03:49, 3.34it/s] 67%|██████▋ | 250461/371472 [9:23:28<10:10:25, 3.30it/s] 67%|██████▋ | 250462/371472 [9:23:28<10:37:48, 3.16it/s] 67%|██████▋ | 250463/371472 [9:23:29<10:59:29, 3.06it/s] 67%|██████▋ | 250464/371472 [9:23:29<10:28:44, 3.21it/s] 67%|██████▋ | 250465/371472 [9:23:29<10:11:52, 3.30it/s] 67%|██████▋ | 250466/371472 [9:23:30<10:17:46, 3.26it/s] 67%|██████▋ | 250467/371472 [9:23:30<10:19:08, 3.26it/s] 67%|██████▋ | 250468/371472 [9:23:30<10:08:25, 3.31it/s] 67%|██████▋ | 250469/371472 [9:23:31<10:04:26, 3.34it/s] 67%|██████▋ | 250470/371472 [9:23:31<9:52:38, 3.40it/s] 67%|██████▋ | 250471/371472 [9:23:31<10:23:19, 3.24it/s] 67%|██████▋ | 250472/371472 [9:23:31<10:13:42, 3.29it/s] 67%|██████▋ | 250473/371472 [9:23:32<10:13:47, 3.29it/s] 67%|██████▋ | 250474/371472 [9:23:32<10:33:30, 3.18it/s] 67%|██████▋ | 250475/371472 [9:23:32<10:14:59, 3.28it/s] 67%|██████▋ | 250476/371472 [9:23:33<9:51:50, 3.41it/s] 67%|██████▋ | 250477/371472 [9:23:33<9:59:51, 3.36it/s] 67%|██████▋ | 250478/371472 [9:23:33<10:06:57, 3.32it/s] 67%|██████▋ | 250479/371472 [9:23:34<9:49:19, 3.42it/s] 67%|██████▋ | 250480/371472 [9:23:34<9:41:47, 3.47it/s] {'loss': 2.6755, 'learning_rate': 3.9329655885711827e-07, 'epoch': 10.79} + 67%|██████▋ | 250480/371472 [9:23:34<9:41:47, 3.47it/s] 67%|██████▋ | 250481/371472 [9:23:34<9:50:05, 3.42it/s] 67%|██████▋ | 250482/371472 [9:23:34<10:09:36, 3.31it/s] 67%|██████▋ | 250483/371472 [9:23:35<10:19:40, 3.25it/s] 67%|██████▋ | 250484/371472 [9:23:35<10:34:05, 3.18it/s] 67%|██████▋ | 250485/371472 [9:23:35<10:25:12, 3.23it/s] 67%|██████▋ | 250486/371472 [9:23:36<10:08:01, 3.32it/s] 67%|██████▋ | 250487/371472 [9:23:36<10:07:43, 3.32it/s] 67%|██████▋ | 250488/371472 [9:23:36<10:00:05, 3.36it/s] 67%|██████▋ | 250489/371472 [9:23:37<10:07:05, 3.32it/s] 67%|██████▋ | 250490/371472 [9:23:37<9:56:48, 3.38it/s] 67%|██████▋ | 250491/371472 [9:23:37<11:10:02, 3.01it/s] 67%|██████▋ | 250492/371472 [9:23:38<10:51:07, 3.10it/s] 67%|██████▋ | 250493/371472 [9:23:38<10:30:42, 3.20it/s] 67%|██████▋ | 250494/371472 [9:23:38<10:16:35, 3.27it/s] 67%|██████▋ | 250495/371472 [9:23:38<10:16:53, 3.27it/s] 67%|██████▋ | 250496/371472 [9:23:39<10:52:39, 3.09it/s] 67%|██████▋ | 250497/371472 [9:23:39<10:22:48, 3.24it/s] 67%|██████▋ | 250498/371472 [9:23:39<10:24:40, 3.23it/s] 67%|██████▋ | 250499/371472 [9:23:40<10:18:21, 3.26it/s] 67%|██████▋ | 250500/371472 [9:23:40<10:59:17, 3.06it/s] {'loss': 2.8558, 'learning_rate': 3.932480768816393e-07, 'epoch': 10.79} + 67%|██████▋ | 250500/371472 [9:23:40<10:59:17, 3.06it/s] 67%|██████▋ | 250501/371472 [9:23:40<11:05:34, 3.03it/s] 67%|██████▋ | 250502/371472 [9:23:41<10:48:33, 3.11it/s] 67%|██████▋ | 250503/371472 [9:23:41<11:22:11, 2.96it/s] 67%|██████▋ | 250504/371472 [9:23:41<11:06:41, 3.02it/s] 67%|██████▋ | 250505/371472 [9:23:42<10:37:31, 3.16it/s] 67%|██████▋ | 250506/371472 [9:23:42<10:18:36, 3.26it/s] 67%|██████▋ | 250507/371472 [9:23:42<10:30:42, 3.20it/s] 67%|██████▋ | 250508/371472 [9:23:43<10:09:47, 3.31it/s] 67%|██████▋ | 250509/371472 [9:23:43<10:10:00, 3.30it/s] 67%|██████▋ | 250510/371472 [9:23:43<10:51:17, 3.10it/s] 67%|██████▋ | 250511/371472 [9:23:44<10:30:34, 3.20it/s] 67%|██████▋ | 250512/371472 [9:23:44<10:08:19, 3.31it/s] 67%|██████▋ | 250513/371472 [9:23:44<9:59:47, 3.36it/s] 67%|██████▋ | 250514/371472 [9:23:44<9:46:44, 3.44it/s] 67%|██████▋ | 250515/371472 [9:23:45<9:48:31, 3.43it/s] 67%|██████▋ | 250516/371472 [9:23:45<9:49:27, 3.42it/s] 67%|██████▋ | 250517/371472 [9:23:45<9:57:08, 3.38it/s] 67%|██████▋ | 250518/371472 [9:23:46<9:58:34, 3.37it/s] 67%|██████▋ | 250519/371472 [9:23:46<9:59:49, 3.36it/s] 67%|██████▋ | 250520/371472 [9:23:46<9:40:26, 3.47it/s] {'loss': 2.7448, 'learning_rate': 3.9319959490616046e-07, 'epoch': 10.79} + 67%|██████▋ | 250520/371472 [9:23:46<9:40:26, 3.47it/s] 67%|██████▋ | 250521/371472 [9:23:46<9:44:49, 3.45it/s] 67%|██████▋ | 250522/371472 [9:23:47<9:51:48, 3.41it/s] 67%|██████▋ | 250523/371472 [9:23:47<9:34:47, 3.51it/s] 67%|██████▋ | 250524/371472 [9:23:47<9:39:32, 3.48it/s] 67%|██████▋ | 250525/371472 [9:23:48<10:02:58, 3.34it/s] 67%|██████▋ | 250526/371472 [9:23:48<9:55:30, 3.38it/s] 67%|██████▋ | 250527/371472 [9:23:48<10:11:14, 3.30it/s] 67%|██████▋ | 250528/371472 [9:23:48<10:00:30, 3.36it/s] 67%|██████▋ | 250529/371472 [9:23:49<10:11:53, 3.29it/s] 67%|██████▋ | 250530/371472 [9:23:49<10:12:24, 3.29it/s] 67%|██████▋ | 250531/371472 [9:23:49<9:58:05, 3.37it/s] 67%|██████▋ | 250532/371472 [9:23:50<9:47:01, 3.43it/s] 67%|██████▋ | 250533/371472 [9:23:50<9:44:49, 3.45it/s] 67%|██████▋ | 250534/371472 [9:23:50<9:44:38, 3.45it/s] 67%|██████▋ | 250535/371472 [9:23:51<9:43:22, 3.46it/s] 67%|██████▋ | 250536/371472 [9:23:51<9:49:48, 3.42it/s] 67%|██████▋ | 250537/371472 [9:23:51<9:54:06, 3.39it/s] 67%|██████▋ | 250538/371472 [9:23:51<9:50:28, 3.41it/s] 67%|██████▋ | 250539/371472 [9:23:52<9:41:18, 3.47it/s] 67%|██████▋ | 250540/371472 [9:23:52<9:49:52, 3.42it/s] {'loss': 2.6994, 'learning_rate': 3.9315111293068153e-07, 'epoch': 10.79} + 67%|██████▋ | 250540/371472 [9:23:52<9:49:52, 3.42it/s] 67%|██████▋ | 250541/371472 [9:23:52<9:57:27, 3.37it/s] 67%|██████▋ | 250542/371472 [9:23:53<9:51:44, 3.41it/s] 67%|██████▋ | 250543/371472 [9:23:53<10:26:44, 3.22it/s] 67%|██████▋ | 250544/371472 [9:23:53<10:16:43, 3.27it/s] 67%|██████▋ | 250545/371472 [9:23:54<9:53:03, 3.40it/s] 67%|██████▋ | 250546/371472 [9:23:54<9:38:51, 3.48it/s] 67%|██████▋ | 250547/371472 [9:23:54<10:15:07, 3.28it/s] 67%|██████▋ | 250548/371472 [9:23:54<9:57:53, 3.37it/s] 67%|██████▋ | 250549/371472 [9:23:55<9:40:01, 3.47it/s] 67%|██████▋ | 250550/371472 [9:23:55<9:42:50, 3.46it/s] 67%|██████▋ | 250551/371472 [9:23:55<9:37:54, 3.49it/s] 67%|██████▋ | 250552/371472 [9:23:56<9:49:56, 3.42it/s] 67%|██████▋ | 250553/371472 [9:23:56<9:38:24, 3.48it/s] 67%|██████▋ | 250554/371472 [9:23:56<9:38:15, 3.49it/s] 67%|██████▋ | 250555/371472 [9:23:56<9:33:35, 3.51it/s] 67%|██████▋ | 250556/371472 [9:23:57<10:01:16, 3.35it/s] 67%|██████▋ | 250557/371472 [9:23:57<10:04:32, 3.33it/s] 67%|██████▋ | 250558/371472 [9:23:57<10:10:16, 3.30it/s] 67%|██████▋ | 250559/371472 [9:23:58<10:36:31, 3.17it/s] 67%|██████▋ | 250560/371472 [9:23:58<10:36:18, 3.17it/s] {'loss': 2.8555, 'learning_rate': 3.931026309552027e-07, 'epoch': 10.79} + 67%|██████▋ | 250560/371472 [9:23:58<10:36:18, 3.17it/s] 67%|██████▋ | 250561/371472 [9:23:58<10:10:14, 3.30it/s] 67%|██████▋ | 250562/371472 [9:23:59<10:16:17, 3.27it/s] 67%|██████▋ | 250563/371472 [9:23:59<9:55:15, 3.39it/s] 67%|██████▋ | 250564/371472 [9:23:59<10:21:45, 3.24it/s] 67%|██████▋ | 250565/371472 [9:24:00<10:40:49, 3.14it/s] 67%|██████▋ | 250566/371472 [9:24:00<10:53:38, 3.08it/s] 67%|██████▋ | 250567/371472 [9:24:00<10:54:24, 3.08it/s] 67%|██████▋ | 250568/371472 [9:24:01<10:40:28, 3.15it/s] 67%|██████▋ | 250569/371472 [9:24:01<10:11:08, 3.30it/s] 67%|██████▋ | 250570/371472 [9:24:01<9:57:26, 3.37it/s] 67%|██████▋ | 250571/371472 [9:24:01<9:40:05, 3.47it/s] 67%|██████▋ | 250572/371472 [9:24:02<9:41:47, 3.46it/s] 67%|██████▋ | 250573/371472 [9:24:02<10:05:48, 3.33it/s] 67%|██████▋ | 250574/371472 [9:24:02<9:55:14, 3.39it/s] 67%|██████▋ | 250575/371472 [9:24:03<10:29:47, 3.20it/s] 67%|██████▋ | 250576/371472 [9:24:03<10:15:40, 3.27it/s] 67%|██████▋ | 250577/371472 [9:24:03<11:09:05, 3.01it/s] 67%|██████▋ | 250578/371472 [9:24:04<10:44:09, 3.13it/s] 67%|██████▋ | 250579/371472 [9:24:04<11:12:38, 3.00it/s] 67%|██████▋ | 250580/371472 [9:24:04<11:29:34, 2.92it/s] {'loss': 2.6691, 'learning_rate': 3.9305414897972373e-07, 'epoch': 10.79} + 67%|██████▋ | 250580/371472 [9:24:04<11:29:34, 2.92it/s] 67%|██████▋ | 250581/371472 [9:24:05<10:54:25, 3.08it/s] 67%|██████▋ | 250582/371472 [9:24:05<10:38:16, 3.16it/s] 67%|██████▋ | 250583/371472 [9:24:05<10:43:47, 3.13it/s] 67%|██████▋ | 250584/371472 [9:24:05<10:17:12, 3.26it/s] 67%|██████▋ | 250585/371472 [9:24:06<11:19:23, 2.97it/s] 67%|██████▋ | 250586/371472 [9:24:06<11:39:10, 2.88it/s] 67%|██████▋ | 250587/371472 [9:24:07<11:02:12, 3.04it/s] 67%|██████▋ | 250588/371472 [9:24:07<11:29:39, 2.92it/s] 67%|██████▋ | 250589/371472 [9:24:07<10:46:14, 3.12it/s] 67%|██████▋ | 250590/371472 [9:24:07<10:35:39, 3.17it/s] 67%|██████▋ | 250591/371472 [9:24:08<10:13:46, 3.28it/s] 67%|██████▋ | 250592/371472 [9:24:08<10:16:03, 3.27it/s] 67%|██████▋ | 250593/371472 [9:24:08<10:02:17, 3.34it/s] 67%|██████▋ | 250594/371472 [9:24:09<9:58:14, 3.37it/s] 67%|██████▋ | 250595/371472 [9:24:09<11:44:11, 2.86it/s] 67%|██████▋ | 250596/371472 [9:24:09<11:10:13, 3.01it/s] 67%|██████▋ | 250597/371472 [9:24:10<10:44:00, 3.13it/s] 67%|██████▋ | 250598/371472 [9:24:10<10:32:57, 3.18it/s] 67%|██████▋ | 250599/371472 [9:24:10<10:08:30, 3.31it/s] 67%|██████▋ | 250600/371472 [9:24:11<9:55:29, 3.38it/s] {'loss': 2.7229, 'learning_rate': 3.930056670042449e-07, 'epoch': 10.79} + 67%|██████▋ | 250600/371472 [9:24:11<9:55:29, 3.38it/s] 67%|██████▋ | 250601/371472 [9:24:11<9:55:01, 3.39it/s] 67%|██████▋ | 250602/371472 [9:24:11<9:40:35, 3.47it/s] 67%|██████▋ | 250603/371472 [9:24:11<9:41:11, 3.47it/s] 67%|██████▋ | 250604/371472 [9:24:12<9:47:59, 3.43it/s] 67%|██████▋ | 250605/371472 [9:24:12<9:43:47, 3.45it/s] 67%|██████▋ | 250606/371472 [9:24:12<9:48:58, 3.42it/s] 67%|██████▋ | 250607/371472 [9:24:13<9:38:50, 3.48it/s] 67%|██████▋ | 250608/371472 [9:24:13<9:40:44, 3.47it/s] 67%|██████▋ | 250609/371472 [9:24:13<9:28:26, 3.54it/s] 67%|██████▋ | 250610/371472 [9:24:13<9:24:08, 3.57it/s] 67%|██████▋ | 250611/371472 [9:24:14<10:00:35, 3.35it/s] 67%|██████▋ | 250612/371472 [9:24:14<9:55:12, 3.38it/s] 67%|██████▋ | 250613/371472 [9:24:14<9:56:39, 3.38it/s] 67%|██████▋ | 250614/371472 [9:24:15<9:47:09, 3.43it/s] 67%|██████▋ | 250615/371472 [9:24:15<9:41:31, 3.46it/s] 67%|██████▋ | 250616/371472 [9:24:15<9:32:19, 3.52it/s] 67%|██████▋ | 250617/371472 [9:24:15<9:36:23, 3.49it/s] 67%|██████▋ | 250618/371472 [9:24:16<10:11:20, 3.29it/s] 67%|██████▋ | 250619/371472 [9:24:16<9:50:51, 3.41it/s] 67%|██████▋ | 250620/371472 [9:24:16<9:37:11, 3.49it/s] {'loss': 2.672, 'learning_rate': 3.929571850287659e-07, 'epoch': 10.79} + 67%|██████▋ | 250620/371472 [9:24:16<9:37:11, 3.49it/s] 67%|██████▋ | 250621/371472 [9:24:17<10:06:42, 3.32it/s] 67%|██████▋ | 250622/371472 [9:24:17<10:58:31, 3.06it/s] 67%|██████▋ | 250623/371472 [9:24:17<10:28:12, 3.21it/s] 67%|██████▋ | 250624/371472 [9:24:18<11:45:31, 2.85it/s] 67%|██████▋ | 250625/371472 [9:24:18<10:59:30, 3.05it/s] 67%|██████▋ | 250626/371472 [9:24:18<10:34:05, 3.18it/s] 67%|██████▋ | 250627/371472 [9:24:19<10:33:32, 3.18it/s] 67%|██████▋ | 250628/371472 [9:24:19<10:20:27, 3.25it/s] 67%|██████▋ | 250629/371472 [9:24:19<10:05:57, 3.32it/s] 67%|██████▋ | 250630/371472 [9:24:20<10:11:08, 3.30it/s] 67%|██████▋ | 250631/371472 [9:24:20<10:22:56, 3.23it/s] 67%|██████▋ | 250632/371472 [9:24:20<9:58:22, 3.37it/s] 67%|██████▋ | 250633/371472 [9:24:20<10:04:09, 3.33it/s] 67%|██████▋ | 250634/371472 [9:24:21<9:52:18, 3.40it/s] 67%|██████▋ | 250635/371472 [9:24:21<9:54:25, 3.39it/s] 67%|██████▋ | 250636/371472 [9:24:21<9:40:16, 3.47it/s] 67%|██████▋ | 250637/371472 [9:24:22<9:50:59, 3.41it/s] 67%|██████▋ | 250638/371472 [9:24:22<9:33:04, 3.51it/s] 67%|██████▋ | 250639/371472 [9:24:22<9:52:44, 3.40it/s] 67%|██████▋ | 250640/371472 [9:24:22<10:05:43, 3.32it/s] {'loss': 2.6419, 'learning_rate': 3.929087030532871e-07, 'epoch': 10.8} + 67%|██████▋ | 250640/371472 [9:24:22<10:05:43, 3.32it/s] 67%|██████▋ | 250641/371472 [9:24:23<9:59:26, 3.36it/s] 67%|██████▋ | 250642/371472 [9:24:23<9:47:17, 3.43it/s] 67%|██████▋ | 250643/371472 [9:24:23<9:37:31, 3.49it/s] 67%|██████▋ | 250644/371472 [9:24:24<9:35:18, 3.50it/s] 67%|██████▋ | 250645/371472 [9:24:24<9:35:36, 3.50it/s] 67%|██████▋ | 250646/371472 [9:24:24<9:24:21, 3.57it/s] 67%|██████▋ | 250647/371472 [9:24:25<10:25:58, 3.22it/s] 67%|██████▋ | 250648/371472 [9:24:25<10:08:06, 3.31it/s] 67%|██████▋ | 250649/371472 [9:24:25<10:11:39, 3.29it/s] 67%|██████▋ | 250650/371472 [9:24:25<10:00:14, 3.35it/s] 67%|██████▋ | 250651/371472 [9:24:26<10:12:19, 3.29it/s] 67%|██████▋ | 250652/371472 [9:24:26<10:35:02, 3.17it/s] 67%|██████▋ | 250653/371472 [9:24:26<10:14:05, 3.28it/s] 67%|██████▋ | 250654/371472 [9:24:27<10:07:49, 3.31it/s] 67%|██████▋ | 250655/371472 [9:24:27<9:48:08, 3.42it/s] 67%|██████▋ | 250656/371472 [9:24:27<9:48:56, 3.42it/s] 67%|██████▋ | 250657/371472 [9:24:28<10:02:44, 3.34it/s] 67%|██████▋ | 250658/371472 [9:24:28<10:05:14, 3.33it/s] 67%|██████▋ | 250659/371472 [9:24:28<10:39:26, 3.15it/s] 67%|██████▋ | 250660/371472 [9:24:28<10:25:03, 3.22it/s] {'loss': 2.6856, 'learning_rate': 3.928602210778082e-07, 'epoch': 10.8} + 67%|██████▋ | 250660/371472 [9:24:28<10:25:03, 3.22it/s] 67%|██████▋ | 250661/371472 [9:24:29<10:32:30, 3.18it/s] 67%|██████▋ | 250662/371472 [9:24:29<10:19:30, 3.25it/s] 67%|██████▋ | 250663/371472 [9:24:29<10:16:24, 3.27it/s] 67%|██████▋ | 250664/371472 [9:24:30<11:06:13, 3.02it/s] 67%|██████▋ | 250665/371472 [9:24:30<11:09:11, 3.01it/s] 67%|██████▋ | 250666/371472 [9:24:30<10:28:51, 3.20it/s] 67%|██████▋ | 250667/371472 [9:24:31<10:35:33, 3.17it/s] 67%|██████▋ | 250668/371472 [9:24:31<10:07:44, 3.31it/s] 67%|██████▋ | 250669/371472 [9:24:31<9:55:41, 3.38it/s] 67%|██████▋ | 250670/371472 [9:24:32<10:08:34, 3.31it/s] 67%|██████▋ | 250671/371472 [9:24:32<9:52:56, 3.40it/s] 67%|██████▋ | 250672/371472 [9:24:32<9:51:17, 3.40it/s] 67%|██████▋ | 250673/371472 [9:24:32<10:06:11, 3.32it/s] 67%|██████▋ | 250674/371472 [9:24:33<9:56:17, 3.38it/s] 67%|██████▋ | 250675/371472 [9:24:33<9:51:18, 3.40it/s] 67%|██████▋ | 250676/371472 [9:24:33<10:52:19, 3.09it/s] 67%|██████▋ | 250677/371472 [9:24:34<11:16:32, 2.98it/s] 67%|██████▋ | 250678/371472 [9:24:34<10:49:05, 3.10it/s] 67%|██████▋ | 250679/371472 [9:24:34<10:29:41, 3.20it/s] 67%|██████▋ | 250680/371472 [9:24:35<10:11:13, 3.29it/s] {'loss': 2.6191, 'learning_rate': 3.928117391023293e-07, 'epoch': 10.8} + 67%|██████▋ | 250680/371472 [9:24:35<10:11:13, 3.29it/s] 67%|██████▋ | 250681/371472 [9:24:35<9:54:44, 3.38it/s] 67%|██████▋ | 250682/371472 [9:24:35<9:52:13, 3.40it/s] 67%|██████▋ | 250683/371472 [9:24:36<9:39:48, 3.47it/s] 67%|██████▋ | 250684/371472 [9:24:36<10:00:11, 3.35it/s] 67%|██████▋ | 250685/371472 [9:24:36<10:16:27, 3.27it/s] 67%|██████▋ | 250686/371472 [9:24:36<10:32:06, 3.18it/s] 67%|██████▋ | 250687/371472 [9:24:37<10:33:54, 3.18it/s] 67%|██████▋ | 250688/371472 [9:24:37<10:56:37, 3.07it/s] 67%|██████▋ | 250689/371472 [9:24:38<11:16:00, 2.98it/s] 67%|██████▋ | 250690/371472 [9:24:38<11:10:01, 3.00it/s] 67%|██████▋ | 250691/371472 [9:24:38<10:45:20, 3.12it/s] 67%|██████▋ | 250692/371472 [9:24:38<10:17:04, 3.26it/s] 67%|██████▋ | 250693/371472 [9:24:39<10:01:32, 3.35it/s] 67%|██████▋ | 250694/371472 [9:24:39<9:52:25, 3.40it/s] 67%|██████▋ | 250695/371472 [9:24:39<10:04:15, 3.33it/s] 67%|██████▋ | 250696/371472 [9:24:40<9:52:12, 3.40it/s] 67%|██████▋ | 250697/371472 [9:24:40<9:59:57, 3.36it/s] 67%|██████▋ | 250698/371472 [9:24:40<9:54:43, 3.38it/s] 67%|██████▋ | 250699/371472 [9:24:40<9:51:34, 3.40it/s] 67%|██████▋ | 250700/371472 [9:24:41<10:14:42, 3.27it/s] {'loss': 2.626, 'learning_rate': 3.9276325712685037e-07, 'epoch': 10.8} + 67%|██████▋ | 250700/371472 [9:24:41<10:14:42, 3.27it/s] 67%|██████▋ | 250701/371472 [9:24:41<10:02:24, 3.34it/s] 67%|██████▋ | 250702/371472 [9:24:41<10:03:40, 3.33it/s] 67%|██████▋ | 250703/371472 [9:24:42<9:57:49, 3.37it/s] 67%|██████▋ | 250704/371472 [9:24:42<9:49:49, 3.41it/s] 67%|██████▋ | 250705/371472 [9:24:42<9:39:49, 3.47it/s] 67%|██████▋ | 250706/371472 [9:24:43<9:33:16, 3.51it/s] 67%|██████▋ | 250707/371472 [9:24:43<9:43:42, 3.45it/s] 67%|██████▋ | 250708/371472 [9:24:43<9:41:29, 3.46it/s] 67%|██████▋ | 250709/371472 [9:24:43<9:36:42, 3.49it/s] 67%|██████▋ | 250710/371472 [9:24:44<9:49:06, 3.42it/s] 67%|██████▋ | 250711/371472 [9:24:44<9:51:08, 3.40it/s] 67%|██████▋ | 250712/371472 [9:24:44<10:21:25, 3.24it/s] 67%|██████▋ | 250713/371472 [9:24:45<10:03:28, 3.34it/s] 67%|██████▋ | 250714/371472 [9:24:45<10:01:02, 3.35it/s] 67%|██████▋ | 250715/371472 [9:24:45<9:58:49, 3.36it/s] 67%|██████▋ | 250716/371472 [9:24:46<11:38:34, 2.88it/s] 67%|██████▋ | 250717/371472 [9:24:46<11:01:24, 3.04it/s] 67%|██████▋ | 250718/371472 [9:24:46<10:41:11, 3.14it/s] 67%|██████▋ | 250719/371472 [9:24:46<10:03:48, 3.33it/s] 67%|██████▋ | 250720/371472 [9:24:47<9:56:38, 3.37it/s] {'loss': 2.7637, 'learning_rate': 3.927147751513715e-07, 'epoch': 10.8} + 67%|██████▋ | 250720/371472 [9:24:47<9:56:38, 3.37it/s] 67%|██████▋ | 250721/371472 [9:24:47<10:01:08, 3.35it/s] 67%|██████▋ | 250722/371472 [9:24:47<9:53:24, 3.39it/s] 67%|██████▋ | 250723/371472 [9:24:48<10:25:30, 3.22it/s] 67%|██████▋ | 250724/371472 [9:24:48<10:25:58, 3.21it/s] 67%|██████▋ | 250725/371472 [9:24:48<10:28:15, 3.20it/s] 67%|██████▋ | 250726/371472 [9:24:49<10:17:58, 3.26it/s] 67%|██████▋ | 250727/371472 [9:24:49<9:57:28, 3.37it/s] 67%|██████▋ | 250728/371472 [9:24:49<10:32:58, 3.18it/s] 67%|██████▋ | 250729/371472 [9:24:50<10:31:16, 3.19it/s] 67%|██████▋ | 250730/371472 [9:24:50<10:25:58, 3.21it/s] 67%|██████▋ | 250731/371472 [9:24:50<10:25:22, 3.22it/s] 67%|██████▋ | 250732/371472 [9:24:50<10:04:01, 3.33it/s] 67%|██████▋ | 250733/371472 [9:24:51<10:16:54, 3.26it/s] 67%|██████▋ | 250734/371472 [9:24:51<10:04:47, 3.33it/s] 67%|██████▋ | 250735/371472 [9:24:51<9:58:59, 3.36it/s] 67%|██████▋ | 250736/371472 [9:24:52<9:40:08, 3.47it/s] 67%|██████▋ | 250737/371472 [9:24:52<9:30:30, 3.53it/s] 67%|██████▋ | 250738/371472 [9:24:52<10:05:44, 3.32it/s] 67%|██████▋ | 250739/371472 [9:24:53<9:48:06, 3.42it/s] 67%|██████▋ | 250740/371472 [9:24:53<9:57:30, 3.37it/s] {'loss': 2.7734, 'learning_rate': 3.926662931758926e-07, 'epoch': 10.8} + 67%|██████▋ | 250740/371472 [9:24:53<9:57:30, 3.37it/s] 67%|██████▋ | 250741/371472 [9:24:53<9:47:57, 3.42it/s] 67%|██████▋ | 250742/371472 [9:24:53<9:37:31, 3.48it/s] 67%|██████▋ | 250743/371472 [9:24:54<9:37:23, 3.48it/s] 68%|██████▊ | 250744/371472 [9:24:54<9:35:45, 3.49it/s] 68%|██████▊ | 250745/371472 [9:24:54<9:41:37, 3.46it/s] 68%|██████▊ | 250746/371472 [9:24:55<9:54:39, 3.38it/s] 68%|██████▊ | 250747/371472 [9:24:55<9:57:26, 3.37it/s] 68%|██████▊ | 250748/371472 [9:24:55<10:43:40, 3.13it/s] 68%|██████▊ | 250749/371472 [9:24:56<10:31:15, 3.19it/s] 68%|██████▊ | 250750/371472 [9:24:56<10:24:34, 3.22it/s] 68%|██████▊ | 250751/371472 [9:24:56<10:33:17, 3.18it/s] 68%|██████▊ | 250752/371472 [9:24:56<10:34:16, 3.17it/s] 68%|██████▊ | 250753/371472 [9:24:57<10:24:10, 3.22it/s] 68%|██████▊ | 250754/371472 [9:24:57<10:11:45, 3.29it/s] 68%|██████▊ | 250755/371472 [9:24:57<11:07:13, 3.02it/s] 68%|██████▊ | 250756/371472 [9:24:58<10:41:05, 3.14it/s] 68%|██████▊ | 250757/371472 [9:24:58<10:24:40, 3.22it/s] 68%|██████▊ | 250758/371472 [9:24:58<9:58:40, 3.36it/s] 68%|██████▊ | 250759/371472 [9:24:59<9:51:51, 3.40it/s] 68%|██████▊ | 250760/371472 [9:24:59<10:29:19, 3.20it/s] {'loss': 2.8191, 'learning_rate': 3.9261781120041364e-07, 'epoch': 10.8} + 68%|██████▊ | 250760/371472 [9:24:59<10:29:19, 3.20it/s] 68%|██████▊ | 250761/371472 [9:24:59<10:18:06, 3.25it/s] 68%|██████▊ | 250762/371472 [9:25:00<9:52:56, 3.39it/s] 68%|██████▊ | 250763/371472 [9:25:00<9:45:59, 3.43it/s] 68%|██████▊ | 250764/371472 [9:25:00<9:39:22, 3.47it/s] 68%|██████▊ | 250765/371472 [9:25:00<9:42:02, 3.46it/s] 68%|██████▊ | 250766/371472 [9:25:01<9:38:24, 3.48it/s] 68%|██████▊ | 250767/371472 [9:25:01<9:58:18, 3.36it/s] 68%|██████▊ | 250768/371472 [9:25:01<10:35:52, 3.16it/s] 68%|██████▊ | 250769/371472 [9:25:02<10:46:30, 3.11it/s] 68%|██████▊ | 250770/371472 [9:25:02<10:29:46, 3.19it/s] 68%|██████▊ | 250771/371472 [9:25:02<10:11:58, 3.29it/s] 68%|██████▊ | 250772/371472 [9:25:03<10:27:33, 3.21it/s] 68%|██████▊ | 250773/371472 [9:25:03<10:33:32, 3.18it/s] 68%|██████▊ | 250774/371472 [9:25:03<10:21:27, 3.24it/s] 68%|██████▊ | 250775/371472 [9:25:04<10:25:27, 3.22it/s] 68%|██████▊ | 250776/371472 [9:25:04<10:07:39, 3.31it/s] 68%|██████▊ | 250777/371472 [9:25:04<9:59:42, 3.35it/s] 68%|██████▊ | 250778/371472 [9:25:04<9:50:19, 3.41it/s] 68%|██████▊ | 250779/371472 [9:25:05<9:36:23, 3.49it/s] 68%|██████▊ | 250780/371472 [9:25:05<9:37:55, 3.48it/s] {'loss': 2.6136, 'learning_rate': 3.925693292249348e-07, 'epoch': 10.8} + 68%|██████▊ | 250780/371472 [9:25:05<9:37:55, 3.48it/s] 68%|██████▊ | 250781/371472 [9:25:05<10:21:24, 3.24it/s] 68%|██████▊ | 250782/371472 [9:25:06<9:57:29, 3.37it/s] 68%|██████▊ | 250783/371472 [9:25:06<9:59:20, 3.36it/s] 68%|██████▊ | 250784/371472 [9:25:06<9:48:38, 3.42it/s] 68%|██████▊ | 250785/371472 [9:25:06<10:20:34, 3.24it/s] 68%|██████▊ | 250786/371472 [9:25:07<10:12:07, 3.29it/s] 68%|██████▊ | 250787/371472 [9:25:07<10:17:01, 3.26it/s] 68%|██████▊ | 250788/371472 [9:25:07<10:40:15, 3.14it/s] 68%|██████▊ | 250789/371472 [9:25:08<10:43:32, 3.13it/s] 68%|██████▊ | 250790/371472 [9:25:08<10:20:29, 3.24it/s] 68%|██████▊ | 250791/371472 [9:25:08<11:19:25, 2.96it/s] 68%|██████▊ | 250792/371472 [9:25:09<11:43:49, 2.86it/s] 68%|██████▊ | 250793/371472 [9:25:09<12:15:15, 2.74it/s] 68%|██████▊ | 250794/371472 [9:25:10<12:56:25, 2.59it/s] 68%|██████▊ | 250795/371472 [9:25:10<12:10:38, 2.75it/s] 68%|██████▊ | 250796/371472 [9:25:10<11:48:35, 2.84it/s] 68%|██████▊ | 250797/371472 [9:25:11<11:20:31, 2.96it/s] 68%|██████▊ | 250798/371472 [9:25:11<11:00:00, 3.05it/s] 68%|██████▊ | 250799/371472 [9:25:11<10:34:31, 3.17it/s] 68%|██████▊ | 250800/371472 [9:25:11<10:19:10, 3.25it/s] {'loss': 2.6629, 'learning_rate': 3.925208472494559e-07, 'epoch': 10.8} + 68%|██████▊ | 250800/371472 [9:25:11<10:19:10, 3.25it/s] 68%|██████▊ | 250801/371472 [9:25:12<10:13:56, 3.28it/s] 68%|██████▊ | 250802/371472 [9:25:12<10:12:57, 3.28it/s] 68%|██████▊ | 250803/371472 [9:25:12<10:03:52, 3.33it/s] 68%|██████▊ | 250804/371472 [9:25:13<9:55:37, 3.38it/s] 68%|██████▊ | 250805/371472 [9:25:13<9:44:42, 3.44it/s] 68%|██████▊ | 250806/371472 [9:25:13<9:40:11, 3.47it/s] 68%|██████▊ | 250807/371472 [9:25:13<9:37:45, 3.48it/s] 68%|██████▊ | 250808/371472 [9:25:14<10:32:48, 3.18it/s] 68%|██████▊ | 250809/371472 [9:25:14<10:18:16, 3.25it/s] 68%|██████▊ | 250810/371472 [9:25:14<10:06:06, 3.32it/s] 68%|██████▊ | 250811/371472 [9:25:15<10:06:32, 3.32it/s] 68%|██████▊ | 250812/371472 [9:25:15<9:46:18, 3.43it/s] 68%|██████▊ | 250813/371472 [9:25:15<9:40:56, 3.46it/s] 68%|██████▊ | 250814/371472 [9:25:16<9:49:50, 3.41it/s] 68%|██████▊ | 250815/371472 [9:25:16<9:55:54, 3.37it/s] 68%|██████▊ | 250816/371472 [9:25:16<9:55:15, 3.38it/s] 68%|██████▊ | 250817/371472 [9:25:16<9:43:36, 3.45it/s] 68%|██████▊ | 250818/371472 [9:25:17<10:08:04, 3.31it/s] 68%|██████▊ | 250819/371472 [9:25:17<10:25:22, 3.22it/s] 68%|██████▊ | 250820/371472 [9:25:18<11:00:10, 3.05it/s] {'loss': 2.6255, 'learning_rate': 3.92472365273977e-07, 'epoch': 10.8} + 68%|██████▊ | 250820/371472 [9:25:18<11:00:10, 3.05it/s] 68%|██████▊ | 250821/371472 [9:25:18<11:12:40, 2.99it/s] 68%|██████▊ | 250822/371472 [9:25:18<11:07:31, 3.01it/s] 68%|██████▊ | 250823/371472 [9:25:18<10:36:18, 3.16it/s] 68%|██████▊ | 250824/371472 [9:25:19<10:28:47, 3.20it/s] 68%|██████▊ | 250825/371472 [9:25:19<10:23:52, 3.22it/s] 68%|██████▊ | 250826/371472 [9:25:19<10:12:55, 3.28it/s] 68%|██████▊ | 250827/371472 [9:25:20<10:12:51, 3.28it/s] 68%|██████▊ | 250828/371472 [9:25:20<10:14:02, 3.27it/s] 68%|██████▊ | 250829/371472 [9:25:21<12:31:09, 2.68it/s] 68%|██████▊ | 250830/371472 [9:25:21<11:34:36, 2.89it/s] 68%|██████▊ | 250831/371472 [9:25:21<11:03:58, 3.03it/s] 68%|██████▊ | 250832/371472 [9:25:21<10:43:38, 3.12it/s] 68%|██████▊ | 250833/371472 [9:25:22<10:18:16, 3.25it/s] 68%|██████▊ | 250834/371472 [9:25:22<10:09:47, 3.30it/s] 68%|██████▊ | 250835/371472 [9:25:22<10:17:25, 3.26it/s] 68%|██████▊ | 250836/371472 [9:25:23<10:05:23, 3.32it/s] 68%|██████▊ | 250837/371472 [9:25:23<10:03:18, 3.33it/s] 68%|██████▊ | 250838/371472 [9:25:23<10:09:02, 3.30it/s] 68%|██████▊ | 250839/371472 [9:25:23<9:56:22, 3.37it/s] 68%|██████▊ | 250840/371472 [9:25:24<9:43:20, 3.45it/s] {'loss': 2.5363, 'learning_rate': 3.924238832984981e-07, 'epoch': 10.8} + 68%|██████▊ | 250840/371472 [9:25:24<9:43:20, 3.45it/s] 68%|██████▊ | 250841/371472 [9:25:24<9:52:40, 3.39it/s] 68%|██████▊ | 250842/371472 [9:25:24<10:10:10, 3.29it/s] 68%|██████▊ | 250843/371472 [9:25:25<9:52:41, 3.39it/s] 68%|██████▊ | 250844/371472 [9:25:25<9:49:03, 3.41it/s] 68%|██████▊ | 250845/371472 [9:25:25<10:16:36, 3.26it/s] 68%|██████▊ | 250846/371472 [9:25:26<10:47:46, 3.10it/s] 68%|██████▊ | 250847/371472 [9:25:26<10:26:09, 3.21it/s] 68%|██████▊ | 250848/371472 [9:25:26<10:46:48, 3.11it/s] 68%|██████▊ | 250849/371472 [9:25:27<10:25:27, 3.21it/s] 68%|██████▊ | 250850/371472 [9:25:27<10:16:58, 3.26it/s] 68%|██████▊ | 250851/371472 [9:25:27<10:26:26, 3.21it/s] 68%|██████▊ | 250852/371472 [9:25:27<10:16:11, 3.26it/s] 68%|██████▊ | 250853/371472 [9:25:28<10:06:46, 3.31it/s] 68%|██████▊ | 250854/371472 [9:25:28<9:50:27, 3.40it/s] 68%|██████▊ | 250855/371472 [9:25:28<10:28:44, 3.20it/s] 68%|██████▊ | 250856/371472 [9:25:29<10:03:45, 3.33it/s] 68%|██████▊ | 250857/371472 [9:25:29<9:55:15, 3.38it/s] 68%|██████▊ | 250858/371472 [9:25:29<9:53:06, 3.39it/s] 68%|██████▊ | 250859/371472 [9:25:30<9:46:44, 3.43it/s] 68%|██████▊ | 250860/371472 [9:25:30<9:45:58, 3.43it/s] {'loss': 2.7397, 'learning_rate': 3.9237540132301926e-07, 'epoch': 10.81} + 68%|██████▊ | 250860/371472 [9:25:30<9:45:58, 3.43it/s] 68%|██████▊ | 250861/371472 [9:25:30<10:35:53, 3.16it/s] 68%|██████▊ | 250862/371472 [9:25:31<11:07:45, 3.01it/s] 68%|██████▊ | 250863/371472 [9:25:31<10:50:22, 3.09it/s] 68%|██████▊ | 250864/371472 [9:25:31<10:52:02, 3.08it/s] 68%|██████▊ | 250865/371472 [9:25:31<10:26:00, 3.21it/s] 68%|██████▊ | 250866/371472 [9:25:32<10:04:48, 3.32it/s] 68%|██████▊ | 250867/371472 [9:25:32<9:59:07, 3.36it/s] 68%|██████▊ | 250868/371472 [9:25:32<9:57:31, 3.36it/s] 68%|██████▊ | 250869/371472 [9:25:33<9:59:45, 3.35it/s] 68%|██████▊ | 250870/371472 [9:25:33<10:28:08, 3.20it/s] 68%|██████▊ | 250871/371472 [9:25:33<10:23:55, 3.22it/s] 68%|██████▊ | 250872/371472 [9:25:34<10:00:56, 3.34it/s] 68%|██████▊ | 250873/371472 [9:25:34<9:59:14, 3.35it/s] 68%|██████▊ | 250874/371472 [9:25:34<10:00:49, 3.35it/s] 68%|██████▊ | 250875/371472 [9:25:34<10:03:51, 3.33it/s] 68%|██████▊ | 250876/371472 [9:25:35<10:08:23, 3.30it/s] 68%|██████▊ | 250877/371472 [9:25:35<10:23:59, 3.22it/s] 68%|██████▊ | 250878/371472 [9:25:35<10:10:59, 3.29it/s] 68%|██████▊ | 250879/371472 [9:25:36<10:09:08, 3.30it/s] 68%|██████▊ | 250880/371472 [9:25:36<9:54:29, 3.38it/s] {'loss': 2.7255, 'learning_rate': 3.923269193475403e-07, 'epoch': 10.81} + 68%|██████▊ | 250880/371472 [9:25:36<9:54:29, 3.38it/s] 68%|██████▊ | 250881/371472 [9:25:36<10:00:21, 3.35it/s] 68%|██████▊ | 250882/371472 [9:25:37<10:18:25, 3.25it/s] 68%|██████▊ | 250883/371472 [9:25:37<11:09:18, 3.00it/s] 68%|██████▊ | 250884/371472 [9:25:37<10:48:09, 3.10it/s] 68%|██████▊ | 250885/371472 [9:25:38<10:49:22, 3.09it/s] 68%|██████▊ | 250886/371472 [9:25:38<10:38:23, 3.15it/s] 68%|██████▊ | 250887/371472 [9:25:38<10:12:17, 3.28it/s] 68%|██████▊ | 250888/371472 [9:25:38<9:58:00, 3.36it/s] 68%|██████▊ | 250889/371472 [9:25:39<9:40:26, 3.46it/s] 68%|██████▊ | 250890/371472 [9:25:39<9:57:45, 3.36it/s] 68%|██████▊ | 250891/371472 [9:25:39<9:40:35, 3.46it/s] 68%|██████▊ | 250892/371472 [9:25:40<9:46:16, 3.43it/s] 68%|██████▊ | 250893/371472 [9:25:40<9:45:10, 3.43it/s] 68%|██████▊ | 250894/371472 [9:25:40<10:01:37, 3.34it/s] 68%|██████▊ | 250895/371472 [9:25:41<9:58:01, 3.36it/s] 68%|██████▊ | 250896/371472 [9:25:41<10:00:05, 3.35it/s] 68%|██████▊ | 250897/371472 [9:25:41<9:59:02, 3.35it/s] 68%|██████▊ | 250898/371472 [9:25:41<9:58:50, 3.36it/s] 68%|██████▊ | 250899/371472 [9:25:42<9:40:07, 3.46it/s] 68%|██████▊ | 250900/371472 [9:25:42<9:51:04, 3.40it/s] {'loss': 2.6742, 'learning_rate': 3.9227843737206146e-07, 'epoch': 10.81} + 68%|██████▊ | 250900/371472 [9:25:42<9:51:04, 3.40it/s] 68%|██████▊ | 250901/371472 [9:25:42<9:38:05, 3.48it/s] 68%|██████▊ | 250902/371472 [9:25:43<9:37:34, 3.48it/s] 68%|██████▊ | 250903/371472 [9:25:43<9:30:25, 3.52it/s] 68%|██████▊ | 250904/371472 [9:25:43<10:37:21, 3.15it/s] 68%|██████▊ | 250905/371472 [9:25:43<10:16:05, 3.26it/s] 68%|██████▊ | 250906/371472 [9:25:44<10:20:07, 3.24it/s] 68%|██████▊ | 250907/371472 [9:25:44<9:54:21, 3.38it/s] 68%|██████▊ | 250908/371472 [9:25:44<9:45:17, 3.43it/s] 68%|██████▊ | 250909/371472 [9:25:45<10:46:34, 3.11it/s] 68%|██████▊ | 250910/371472 [9:25:45<10:26:58, 3.20it/s] 68%|██████▊ | 250911/371472 [9:25:45<10:16:48, 3.26it/s] 68%|██████▊ | 250912/371472 [9:25:46<10:42:58, 3.13it/s] 68%|██████▊ | 250913/371472 [9:25:46<11:34:23, 2.89it/s] 68%|██████▊ | 250914/371472 [9:25:46<11:17:10, 2.97it/s] 68%|██████▊ | 250915/371472 [9:25:47<10:38:32, 3.15it/s] 68%|██████▊ | 250916/371472 [9:25:47<10:21:15, 3.23it/s] 68%|██████▊ | 250917/371472 [9:25:47<10:30:26, 3.19it/s] 68%|██████▊ | 250918/371472 [9:25:48<10:26:06, 3.21it/s] 68%|���█████▊ | 250919/371472 [9:25:48<12:24:51, 2.70it/s] 68%|██████▊ | 250920/371472 [9:25:48<12:21:40, 2.71it/s] {'loss': 2.5995, 'learning_rate': 3.9222995539658253e-07, 'epoch': 10.81} + 68%|██████▊ | 250920/371472 [9:25:48<12:21:40, 2.71it/s] 68%|██████▊ | 250921/371472 [9:25:49<11:37:27, 2.88it/s] 68%|██████▊ | 250922/371472 [9:25:49<11:02:07, 3.03it/s] 68%|██████▊ | 250923/371472 [9:25:49<10:43:17, 3.12it/s] 68%|██████▊ | 250924/371472 [9:25:50<10:33:49, 3.17it/s] 68%|██████▊ | 250925/371472 [9:25:50<10:13:21, 3.28it/s] 68%|██████▊ | 250926/371472 [9:25:50<10:18:51, 3.25it/s] 68%|██████▊ | 250927/371472 [9:25:51<10:05:08, 3.32it/s] 68%|██████▊ | 250928/371472 [9:25:51<9:54:45, 3.38it/s] 68%|██████▊ | 250929/371472 [9:25:51<10:37:09, 3.15it/s] 68%|██████▊ | 250930/371472 [9:25:51<10:35:26, 3.16it/s] 68%|██████▊ | 250931/371472 [9:25:52<10:26:49, 3.21it/s] 68%|██████▊ | 250932/371472 [9:25:52<10:55:57, 3.06it/s] 68%|██████▊ | 250933/371472 [9:25:52<10:32:49, 3.17it/s] 68%|██████▊ | 250934/371472 [9:25:53<10:19:18, 3.24it/s] 68%|██████▊ | 250935/371472 [9:25:53<10:28:25, 3.20it/s] 68%|██████▊ | 250936/371472 [9:25:53<10:37:20, 3.15it/s] 68%|██████▊ | 250937/371472 [9:25:54<10:23:59, 3.22it/s] 68%|██████▊ | 250938/371472 [9:25:54<10:19:09, 3.24it/s] 68%|██████▊ | 250939/371472 [9:25:54<10:16:59, 3.26it/s] 68%|██████▊ | 250940/371472 [9:25:55<10:37:08, 3.15it/s] {'loss': 2.6999, 'learning_rate': 3.9218147342110365e-07, 'epoch': 10.81} + 68%|██████▊ | 250940/371472 [9:25:55<10:37:08, 3.15it/s] 68%|██████▊ | 250941/371472 [9:25:55<10:29:21, 3.19it/s] 68%|██████▊ | 250942/371472 [9:25:55<10:06:00, 3.31it/s] 68%|██████▊ | 250943/371472 [9:25:56<10:33:08, 3.17it/s] 68%|██████▊ | 250944/371472 [9:25:56<10:17:41, 3.25it/s] 68%|██████▊ | 250945/371472 [9:25:56<11:10:17, 3.00it/s] 68%|██████▊ | 250946/371472 [9:25:57<10:57:13, 3.06it/s] 68%|██████▊ | 250947/371472 [9:25:57<11:02:15, 3.03it/s] 68%|██████▊ | 250948/371472 [9:25:57<10:46:54, 3.11it/s] 68%|██████▊ | 250949/371472 [9:25:58<10:47:45, 3.10it/s] 68%|██████▊ | 250950/371472 [9:25:58<10:43:41, 3.12it/s] 68%|██████▊ | 250951/371472 [9:25:58<10:25:18, 3.21it/s] 68%|██████▊ | 250952/371472 [9:25:58<10:25:04, 3.21it/s] 68%|██████▊ | 250953/371472 [9:25:59<10:13:17, 3.28it/s] 68%|██████▊ | 250954/371472 [9:25:59<10:12:08, 3.28it/s] 68%|██████▊ | 250955/371472 [9:25:59<10:26:39, 3.21it/s] 68%|██████▊ | 250956/371472 [9:26:00<10:13:35, 3.27it/s] 68%|██████▊ | 250957/371472 [9:26:00<9:59:06, 3.35it/s] 68%|██████▊ | 250958/371472 [9:26:00<9:43:44, 3.44it/s] 68%|██████▊ | 250959/371472 [9:26:00<9:38:09, 3.47it/s] 68%|██████▊ | 250960/371472 [9:26:01<9:40:55, 3.46it/s] {'loss': 2.5971, 'learning_rate': 3.921329914456247e-07, 'epoch': 10.81} + 68%|██████▊ | 250960/371472 [9:26:01<9:40:55, 3.46it/s] 68%|██████▊ | 250961/371472 [9:26:01<9:50:55, 3.40it/s] 68%|██████▊ | 250962/371472 [9:26:01<9:52:04, 3.39it/s] 68%|██████▊ | 250963/371472 [9:26:02<9:55:23, 3.37it/s] 68%|██████▊ | 250964/371472 [9:26:02<9:43:02, 3.44it/s] 68%|██████▊ | 250965/371472 [9:26:02<10:00:43, 3.34it/s] 68%|██████▊ | 250966/371472 [9:26:03<9:55:07, 3.37it/s] 68%|██████▊ | 250967/371472 [9:26:03<9:46:58, 3.42it/s] 68%|██████▊ | 250968/371472 [9:26:03<9:46:47, 3.42it/s] 68%|██████▊ | 250969/371472 [9:26:03<9:59:32, 3.35it/s] 68%|██████▊ | 250970/371472 [9:26:04<9:56:33, 3.37it/s] 68%|██████▊ | 250971/371472 [9:26:04<10:18:03, 3.25it/s] 68%|██████▊ | 250972/371472 [9:26:04<10:07:25, 3.31it/s] 68%|██████▊ | 250973/371472 [9:26:05<9:56:22, 3.37it/s] 68%|██████▊ | 250974/371472 [9:26:05<10:06:07, 3.31it/s] 68%|██████▊ | 250975/371472 [9:26:05<10:09:37, 3.29it/s] 68%|██████▊ | 250976/371472 [9:26:06<10:10:59, 3.29it/s] 68%|██████▊ | 250977/371472 [9:26:06<10:08:06, 3.30it/s] 68%|██████▊ | 250978/371472 [9:26:06<9:55:43, 3.37it/s] 68%|██████▊ | 250979/371472 [9:26:06<10:00:57, 3.34it/s] 68%|██████▊ | 250980/371472 [9:26:07<10:11:06, 3.29it/s] {'loss': 2.8558, 'learning_rate': 3.9208450947014585e-07, 'epoch': 10.81} + 68%|██████▊ | 250980/371472 [9:26:07<10:11:06, 3.29it/s] 68%|██████▊ | 250981/371472 [9:26:07<9:54:03, 3.38it/s] 68%|██████▊ | 250982/371472 [9:26:07<9:38:15, 3.47it/s] 68%|██████▊ | 250983/371472 [9:26:08<9:47:06, 3.42it/s] 68%|██████▊ | 250984/371472 [9:26:08<9:44:54, 3.43it/s] 68%|██████▊ | 250985/371472 [9:26:08<9:40:51, 3.46it/s] 68%|██████▊ | 250986/371472 [9:26:09<9:56:42, 3.37it/s] 68%|██████▊ | 250987/371472 [9:26:09<10:24:20, 3.22it/s] 68%|██████▊ | 250988/371472 [9:26:09<10:06:37, 3.31it/s] 68%|██████▊ | 250989/371472 [9:26:09<10:07:23, 3.31it/s] 68%|██████▊ | 250990/371472 [9:26:10<10:35:25, 3.16it/s] 68%|██████▊ | 250991/371472 [9:26:10<10:09:12, 3.30it/s] 68%|██████▊ | 250992/371472 [9:26:10<9:51:03, 3.40it/s] 68%|██████▊ | 250993/371472 [9:26:11<9:41:18, 3.45it/s] 68%|██████▊ | 250994/371472 [9:26:11<9:41:33, 3.45it/s] 68%|██████▊ | 250995/371472 [9:26:11<10:33:38, 3.17it/s] 68%|██████▊ | 250996/371472 [9:26:12<10:19:10, 3.24it/s] 68%|██████▊ | 250997/371472 [9:26:12<10:23:47, 3.22it/s] 68%|██████▊ | 250998/371472 [9:26:12<10:24:30, 3.22it/s] 68%|██████▊ | 250999/371472 [9:26:13<10:15:06, 3.26it/s] 68%|██████▊ | 251000/371472 [9:26:13<10:28:02, 3.20it/s] {'loss': 2.5188, 'learning_rate': 3.920360274946669e-07, 'epoch': 10.81} + 68%|██████▊ | 251000/371472 [9:26:13<10:28:02, 3.20it/s] 68%|██████▊ | 251001/371472 [9:26:13<10:05:16, 3.32it/s] 68%|██████▊ | 251002/371472 [9:26:13<10:09:19, 3.30it/s] 68%|██████▊ | 251003/371472 [9:26:14<10:33:07, 3.17it/s] 68%|██████▊ | 251004/371472 [9:26:14<10:37:35, 3.15it/s] 68%|██████▊ | 251005/371472 [9:26:14<10:44:04, 3.12it/s] 68%|██████▊ | 251006/371472 [9:26:15<10:15:40, 3.26it/s] 68%|██████▊ | 251007/371472 [9:26:15<9:50:06, 3.40it/s] 68%|██████▊ | 251008/371472 [9:26:15<9:42:09, 3.45it/s] 68%|██████▊ | 251009/371472 [9:26:15<9:27:55, 3.54it/s] 68%|██████▊ | 251010/371472 [9:26:16<9:45:52, 3.43it/s] 68%|██████▊ | 251011/371472 [9:26:16<9:51:28, 3.39it/s] 68%|██████▊ | 251012/371472 [9:26:16<10:26:33, 3.20it/s] 68%|██████▊ | 251013/371472 [9:26:17<10:09:22, 3.29it/s] 68%|██████▊ | 251014/371472 [9:26:17<10:06:43, 3.31it/s] 68%|██████▊ | 251015/371472 [9:26:17<11:06:48, 3.01it/s] 68%|██████▊ | 251016/371472 [9:26:18<10:57:48, 3.05it/s] 68%|██████▊ | 251017/371472 [9:26:18<11:14:09, 2.98it/s] 68%|██████▊ | 251018/371472 [9:26:18<10:49:33, 3.09it/s] 68%|██████▊ | 251019/371472 [9:26:19<10:25:26, 3.21it/s] 68%|██████▊ | 251020/371472 [9:26:19<10:12:01, 3.28it/s] {'loss': 2.6637, 'learning_rate': 3.919875455191881e-07, 'epoch': 10.81} + 68%|██████▊ | 251020/371472 [9:26:19<10:12:01, 3.28it/s] 68%|██████▊ | 251021/371472 [9:26:19<10:11:47, 3.28it/s] 68%|██████▊ | 251022/371472 [9:26:20<10:19:01, 3.24it/s] 68%|██████▊ | 251023/371472 [9:26:20<10:12:24, 3.28it/s] 68%|██████▊ | 251024/371472 [9:26:20<10:41:45, 3.13it/s] 68%|██████▊ | 251025/371472 [9:26:21<10:47:22, 3.10it/s] 68%|██████▊ | 251026/371472 [9:26:21<11:10:39, 2.99it/s] 68%|██████▊ | 251027/371472 [9:26:21<11:04:23, 3.02it/s] 68%|██████▊ | 251028/371472 [9:26:22<10:36:18, 3.15it/s] 68%|██████▊ | 251029/371472 [9:26:22<10:41:00, 3.13it/s] 68%|██████▊ | 251030/371472 [9:26:22<10:28:39, 3.19it/s] 68%|██████▊ | 251031/371472 [9:26:22<10:18:06, 3.25it/s] 68%|██████▊ | 251032/371472 [9:26:23<10:24:40, 3.21it/s] 68%|██████▊ | 251033/371472 [9:26:23<10:25:54, 3.21it/s] 68%|██████▊ | 251034/371472 [9:26:23<10:24:05, 3.22it/s] 68%|██████▊ | 251035/371472 [9:26:24<11:16:50, 2.97it/s] 68%|██████▊ | 251036/371472 [9:26:24<10:58:31, 3.05it/s] 68%|██████▊ | 251037/371472 [9:26:24<10:51:24, 3.08it/s] 68%|██████▊ | 251038/371472 [9:26:25<10:40:10, 3.14it/s] 68%|██████▊ | 251039/371472 [9:26:25<10:29:45, 3.19it/s] 68%|██████▊ | 251040/371472 [9:26:25<10:37:43, 3.15it/s] {'loss': 2.6799, 'learning_rate': 3.9193906354370917e-07, 'epoch': 10.81} + 68%|██████▊ | 251040/371472 [9:26:25<10:37:43, 3.15it/s] 68%|██████▊ | 251041/371472 [9:26:26<10:48:40, 3.09it/s] 68%|██████▊ | 251042/371472 [9:26:26<10:17:56, 3.25it/s] 68%|██████▊ | 251043/371472 [9:26:26<10:40:05, 3.14it/s] 68%|██████▊ | 251044/371472 [9:26:27<10:53:36, 3.07it/s] 68%|██████▊ | 251045/371472 [9:26:27<10:51:06, 3.08it/s] 68%|██████▊ | 251046/371472 [9:26:27<10:30:38, 3.18it/s] 68%|██████▊ | 251047/371472 [9:26:28<10:25:55, 3.21it/s] 68%|██████▊ | 251048/371472 [9:26:28<10:22:26, 3.22it/s] 68%|██████▊ | 251049/371472 [9:26:28<10:06:52, 3.31it/s] 68%|██████▊ | 251050/371472 [9:26:28<10:05:40, 3.31it/s] 68%|██████▊ | 251051/371472 [9:26:29<10:01:08, 3.34it/s] 68%|██████▊ | 251052/371472 [9:26:29<10:34:15, 3.16it/s] 68%|██████▊ | 251053/371472 [9:26:29<10:46:46, 3.10it/s] 68%|██████▊ | 251054/371472 [9:26:30<10:30:53, 3.18it/s] 68%|██████▊ | 251055/371472 [9:26:30<10:15:51, 3.26it/s] 68%|██████▊ | 251056/371472 [9:26:30<10:00:54, 3.34it/s] 68%|██████▊ | 251057/371472 [9:26:31<10:02:59, 3.33it/s] 68%|██████▊ | 251058/371472 [9:26:31<10:02:28, 3.33it/s] 68%|██████▊ | 251059/371472 [9:26:31<10:23:02, 3.22it/s] 68%|██████▊ | 251060/371472 [9:26:32<10:09:13, 3.29it/s] {'loss': 2.7201, 'learning_rate': 3.918905815682303e-07, 'epoch': 10.81} + 68%|██████▊ | 251060/371472 [9:26:32<10:09:13, 3.29it/s] 68%|██████▊ | 251061/371472 [9:26:32<10:18:51, 3.24it/s] 68%|██████▊ | 251062/371472 [9:26:32<10:13:10, 3.27it/s] 68%|██████▊ | 251063/371472 [9:26:33<10:31:30, 3.18it/s] 68%|██████▊ | 251064/371472 [9:26:33<10:21:04, 3.23it/s] 68%|██████▊ | 251065/371472 [9:26:33<10:01:27, 3.34it/s] 68%|██████▊ | 251066/371472 [9:26:33<9:47:36, 3.42it/s] 68%|██████▊ | 251067/371472 [9:26:34<9:33:17, 3.50it/s] 68%|██████▊ | 251068/371472 [9:26:34<9:32:09, 3.51it/s] 68%|██████▊ | 251069/371472 [9:26:34<9:40:13, 3.46it/s] 68%|██████▊ | 251070/371472 [9:26:35<9:45:52, 3.43it/s] 68%|██████▊ | 251071/371472 [9:26:35<9:51:47, 3.39it/s] 68%|██████▊ | 251072/371472 [9:26:35<9:53:41, 3.38it/s] 68%|██████▊ | 251073/371472 [9:26:35<9:43:15, 3.44it/s] 68%|██████▊ | 251074/371472 [9:26:36<10:24:48, 3.21it/s] 68%|██████▊ | 251075/371472 [9:26:36<10:08:03, 3.30it/s] 68%|██████▊ | 251076/371472 [9:26:36<9:49:20, 3.40it/s] 68%|██████▊ | 251077/371472 [9:26:37<10:20:07, 3.24it/s] 68%|██████▊ | 251078/371472 [9:26:37<10:06:31, 3.31it/s] 68%|██████▊ | 251079/371472 [9:26:37<9:53:26, 3.38it/s] 68%|██████▊ | 251080/371472 [9:26:38<10:04:34, 3.32it/s] {'loss': 2.7163, 'learning_rate': 3.9184209959275136e-07, 'epoch': 10.81} + 68%|██████▊ | 251080/371472 [9:26:38<10:04:34, 3.32it/s] 68%|██████▊ | 251081/371472 [9:26:38<9:55:13, 3.37it/s] 68%|██████▊ | 251082/371472 [9:26:38<9:58:15, 3.35it/s] 68%|██████▊ | 251083/371472 [9:26:38<9:43:46, 3.44it/s] 68%|██████▊ | 251084/371472 [9:26:39<9:50:31, 3.40it/s] 68%|██████▊ | 251085/371472 [9:26:39<10:08:56, 3.29it/s] 68%|██████▊ | 251086/371472 [9:26:39<10:04:15, 3.32it/s] 68%|██████▊ | 251087/371472 [9:26:40<10:58:12, 3.05it/s] 68%|██████▊ | 251088/371472 [9:26:40<11:40:41, 2.86it/s] 68%|██████▊ | 251089/371472 [9:26:40<10:46:42, 3.10it/s] 68%|██████▊ | 251090/371472 [9:26:41<10:30:12, 3.18it/s] 68%|██████▊ | 251091/371472 [9:26:41<10:12:52, 3.27it/s] 68%|██████▊ | 251092/371472 [9:26:41<10:04:57, 3.32it/s] 68%|██████▊ | 251093/371472 [9:26:42<10:09:40, 3.29it/s] 68%|██████▊ | 251094/371472 [9:26:42<9:58:50, 3.35it/s] 68%|██████▊ | 251095/371472 [9:26:42<10:10:36, 3.29it/s] 68%|██████▊ | 251096/371472 [9:26:43<10:44:57, 3.11it/s] 68%|██████▊ | 251097/371472 [9:26:43<10:24:20, 3.21it/s] 68%|██████▊ | 251098/371472 [9:26:43<12:13:58, 2.73it/s] 68%|██████▊ | 251099/371472 [9:26:44<11:27:06, 2.92it/s] 68%|██████▊ | 251100/371472 [9:26:44<10:48:30, 3.09it/s] {'loss': 2.7782, 'learning_rate': 3.9179361761727254e-07, 'epoch': 10.82} + 68%|██████▊ | 251100/371472 [9:26:44<10:48:30, 3.09it/s] 68%|██████▊ | 251101/371472 [9:26:44<10:33:18, 3.17it/s] 68%|██████▊ | 251102/371472 [9:26:44<10:28:59, 3.19it/s] 68%|██████▊ | 251103/371472 [9:26:45<10:23:04, 3.22it/s] 68%|██████▊ | 251104/371472 [9:26:45<10:26:37, 3.20it/s] 68%|██████▊ | 251105/371472 [9:26:45<10:10:25, 3.29it/s] 68%|██████▊ | 251106/371472 [9:26:46<9:48:57, 3.41it/s] 68%|██████▊ | 251107/371472 [9:26:46<9:29:34, 3.52it/s] 68%|██████▊ | 251108/371472 [9:26:46<9:58:20, 3.35it/s] 68%|██████▊ | 251109/371472 [9:26:47<9:53:06, 3.38it/s] 68%|██████▊ | 251110/371472 [9:26:47<9:37:46, 3.47it/s] 68%|██████▊ | 251111/371472 [9:26:47<9:29:54, 3.52it/s] 68%|██████▊ | 251112/371472 [9:26:47<9:39:43, 3.46it/s] 68%|██████▊ | 251113/371472 [9:26:48<9:48:51, 3.41it/s] 68%|██████▊ | 251114/371472 [9:26:48<11:36:22, 2.88it/s] 68%|██████▊ | 251115/371472 [9:26:48<10:59:25, 3.04it/s] 68%|██████▊ | 251116/371472 [9:26:49<10:53:01, 3.07it/s] 68%|██████▊ | 251117/371472 [9:26:49<10:21:25, 3.23it/s] 68%|██████▊ | 251118/371472 [9:26:49<10:25:20, 3.21it/s] 68%|██████▊ | 251119/371472 [9:26:50<10:28:22, 3.19it/s] 68%|██████▊ | 251120/371472 [9:26:50<10:09:47, 3.29it/s] {'loss': 2.6108, 'learning_rate': 3.917451356417936e-07, 'epoch': 10.82} + 68%|██████▊ | 251120/371472 [9:26:50<10:09:47, 3.29it/s] 68%|██████▊ | 251121/371472 [9:26:50<11:08:32, 3.00it/s] 68%|██████▊ | 251122/371472 [9:26:51<10:39:50, 3.13it/s] 68%|██████▊ | 251123/371472 [9:26:51<10:24:28, 3.21it/s] 68%|██████▊ | 251124/371472 [9:26:51<10:05:07, 3.31it/s] 68%|██████▊ | 251125/371472 [9:26:52<10:11:28, 3.28it/s] 68%|██████▊ | 251126/371472 [9:26:52<9:53:05, 3.38it/s] 68%|██████▊ | 251127/371472 [9:26:52<9:50:09, 3.40it/s] 68%|██████▊ | 251128/371472 [9:26:52<9:34:52, 3.49it/s] 68%|██████▊ | 251129/371472 [9:26:53<9:36:40, 3.48it/s] 68%|██████▊ | 251130/371472 [9:26:53<9:53:04, 3.38it/s] 68%|██████▊ | 251131/371472 [9:26:53<9:41:39, 3.45it/s] 68%|██████▊ | 251132/371472 [9:26:54<9:53:44, 3.38it/s] 68%|██████▊ | 251133/371472 [9:26:54<9:44:16, 3.43it/s] 68%|██████▊ | 251134/371472 [9:26:54<9:35:24, 3.49it/s] 68%|██████▊ | 251135/371472 [9:26:54<9:43:45, 3.44it/s] 68%|██████▊ | 251136/371472 [9:26:55<9:38:30, 3.47it/s] 68%|██████▊ | 251137/371472 [9:26:55<9:22:35, 3.56it/s] 68%|██████▊ | 251138/371472 [9:26:55<9:32:25, 3.50it/s] 68%|██████▊ | 251139/371472 [9:26:56<9:39:45, 3.46it/s] 68%|██████▊ | 251140/371472 [9:26:56<9:54:47, 3.37it/s] {'loss': 2.7799, 'learning_rate': 3.9169665366631474e-07, 'epoch': 10.82} + 68%|██████▊ | 251140/371472 [9:26:56<9:54:47, 3.37it/s] 68%|██████▊ | 251141/371472 [9:26:56<10:03:07, 3.33it/s] 68%|██████▊ | 251142/371472 [9:26:56<9:50:01, 3.40it/s] 68%|██████▊ | 251143/371472 [9:26:57<9:45:11, 3.43it/s] 68%|██████▊ | 251144/371472 [9:26:57<9:59:06, 3.35it/s] 68%|██████▊ | 251145/371472 [9:26:57<9:52:05, 3.39it/s] 68%|██████▊ | 251146/371472 [9:26:58<9:52:45, 3.38it/s] 68%|██████▊ | 251147/371472 [9:26:58<9:51:50, 3.39it/s] 68%|██████▊ | 251148/371472 [9:26:58<11:28:10, 2.91it/s] 68%|██████▊ | 251149/371472 [9:26:59<10:52:40, 3.07it/s] 68%|██████▊ | 251150/371472 [9:26:59<10:44:39, 3.11it/s] 68%|██████▊ | 251151/371472 [9:26:59<10:29:01, 3.19it/s] 68%|██████▊ | 251152/371472 [9:27:00<10:07:48, 3.30it/s] 68%|██████▊ | 251153/371472 [9:27:00<9:55:17, 3.37it/s] 68%|██████▊ | 251154/371472 [9:27:00<9:36:42, 3.48it/s] 68%|██████▊ | 251155/371472 [9:27:00<9:41:02, 3.45it/s] 68%|██████▊ | 251156/371472 [9:27:01<9:43:09, 3.44it/s] 68%|██████▊ | 251157/371472 [9:27:01<10:02:29, 3.33it/s] 68%|██████▊ | 251158/371472 [9:27:01<10:18:06, 3.24it/s] 68%|██���███▊ | 251159/371472 [9:27:02<10:01:42, 3.33it/s] 68%|██████▊ | 251160/371472 [9:27:02<10:30:40, 3.18it/s] {'loss': 2.7628, 'learning_rate': 3.916481716908358e-07, 'epoch': 10.82} + 68%|██████▊ | 251160/371472 [9:27:02<10:30:40, 3.18it/s] 68%|██████▊ | 251161/371472 [9:27:02<10:49:02, 3.09it/s] 68%|██████▊ | 251162/371472 [9:27:03<10:26:58, 3.20it/s] 68%|██████▊ | 251163/371472 [9:27:03<10:35:11, 3.16it/s] 68%|██████▊ | 251164/371472 [9:27:03<10:22:14, 3.22it/s] 68%|██████▊ | 251165/371472 [9:27:04<10:26:42, 3.20it/s] 68%|██████▊ | 251166/371472 [9:27:04<10:42:02, 3.12it/s] 68%|██████▊ | 251167/371472 [9:27:04<10:44:16, 3.11it/s] 68%|██████▊ | 251168/371472 [9:27:04<10:04:47, 3.32it/s] 68%|██████▊ | 251169/371472 [9:27:05<10:57:00, 3.05it/s] 68%|██████▊ | 251170/371472 [9:27:05<11:08:33, 3.00it/s] 68%|██████▊ | 251171/371472 [9:27:05<10:44:55, 3.11it/s] 68%|██████▊ | 251172/371472 [9:27:06<11:41:08, 2.86it/s] 68%|██████▊ | 251173/371472 [9:27:06<11:37:45, 2.87it/s] 68%|██████▊ | 251174/371472 [9:27:07<11:08:48, 3.00it/s] 68%|██████▊ | 251175/371472 [9:27:07<10:31:32, 3.17it/s] 68%|██████▊ | 251176/371472 [9:27:07<10:12:33, 3.27it/s] 68%|██████▊ | 251177/371472 [9:27:07<10:33:32, 3.16it/s] 68%|██████▊ | 251178/371472 [9:27:08<10:25:07, 3.21it/s] 68%|██████▊ | 251179/371472 [9:27:08<10:04:16, 3.32it/s] 68%|██████▊ | 251180/371472 [9:27:08<9:50:41, 3.39it/s] {'loss': 2.6062, 'learning_rate': 3.91599689715357e-07, 'epoch': 10.82} + 68%|██████▊ | 251180/371472 [9:27:08<9:50:41, 3.39it/s] 68%|██████▊ | 251181/371472 [9:27:09<9:53:13, 3.38it/s] 68%|██████▊ | 251182/371472 [9:27:09<9:40:03, 3.46it/s] 68%|██████▊ | 251183/371472 [9:27:09<9:36:52, 3.48it/s] 68%|██████▊ | 251184/371472 [9:27:09<9:31:55, 3.51it/s] 68%|██████▊ | 251185/371472 [9:27:10<10:11:38, 3.28it/s] 68%|██████▊ | 251186/371472 [9:27:10<10:25:16, 3.21it/s] 68%|██████▊ | 251187/371472 [9:27:10<10:04:58, 3.31it/s] 68%|██████▊ | 251188/371472 [9:27:11<9:52:15, 3.38it/s] 68%|██████▊ | 251189/371472 [9:27:11<9:48:04, 3.41it/s] 68%|██████▊ | 251190/371472 [9:27:11<9:42:14, 3.44it/s] 68%|██████▊ | 251191/371472 [9:27:12<9:55:56, 3.36it/s] 68%|██████▊ | 251192/371472 [9:27:12<9:50:24, 3.40it/s] 68%|██████▊ | 251193/371472 [9:27:12<9:47:40, 3.41it/s] 68%|██████▊ | 251194/371472 [9:27:12<9:53:41, 3.38it/s] 68%|██████▊ | 251195/371472 [9:27:13<9:50:39, 3.39it/s] 68%|██████▊ | 251196/371472 [9:27:13<9:57:10, 3.36it/s] 68%|██████▊ | 251197/371472 [9:27:13<9:51:39, 3.39it/s] 68%|██████▊ | 251198/371472 [9:27:14<9:49:41, 3.40it/s] 68%|██████▊ | 251199/371472 [9:27:14<10:11:09, 3.28it/s] 68%|██████▊ | 251200/371472 [9:27:14<10:15:44, 3.26it/s] {'loss': 2.8156, 'learning_rate': 3.91551207739878e-07, 'epoch': 10.82} + 68%|██████▊ | 251200/371472 [9:27:14<10:15:44, 3.26it/s] 68%|██████▊ | 251201/371472 [9:27:15<10:08:31, 3.29it/s] 68%|██████▊ | 251202/371472 [9:27:15<10:14:27, 3.26it/s] 68%|██████▊ | 251203/371472 [9:27:15<10:06:25, 3.31it/s] 68%|██████▊ | 251204/371472 [9:27:15<10:26:44, 3.20it/s] 68%|██████▊ | 251205/371472 [9:27:16<10:14:36, 3.26it/s] 68%|██████▊ | 251206/371472 [9:27:16<10:05:34, 3.31it/s] 68%|██████▊ | 251207/371472 [9:27:16<9:54:57, 3.37it/s] 68%|██████▊ | 251208/371472 [9:27:17<9:50:17, 3.40it/s] 68%|██████▊ | 251209/371472 [9:27:17<9:52:18, 3.38it/s] 68%|██████▊ | 251210/371472 [9:27:17<9:54:37, 3.37it/s] 68%|██████▊ | 251211/371472 [9:27:18<10:08:52, 3.29it/s] 68%|██████▊ | 251212/371472 [9:27:18<9:54:16, 3.37it/s] 68%|██████▊ | 251213/371472 [9:27:18<9:46:33, 3.42it/s] 68%|██████▊ | 251214/371472 [9:27:18<9:42:48, 3.44it/s] 68%|██████▊ | 251215/371472 [9:27:19<9:52:48, 3.38it/s] 68%|██████▊ | 251216/371472 [9:27:19<10:07:00, 3.30it/s] 68%|██████▊ | 251217/371472 [9:27:19<10:06:42, 3.30it/s] 68%|██████▊ | 251218/371472 [9:27:20<10:03:05, 3.32it/s] 68%|██████▊ | 251219/371472 [9:27:20<10:29:50, 3.18it/s] 68%|██████▊ | 251220/371472 [9:27:20<11:48:48, 2.83it/s] {'loss': 2.7541, 'learning_rate': 3.915027257643992e-07, 'epoch': 10.82} + 68%|██████▊ | 251220/371472 [9:27:20<11:48:48, 2.83it/s] 68%|██████▊ | 251221/371472 [9:27:21<11:17:49, 2.96it/s] 68%|██████▊ | 251222/371472 [9:27:21<10:42:15, 3.12it/s] 68%|██████▊ | 251223/371472 [9:27:21<10:50:01, 3.08it/s] 68%|██████▊ | 251224/371472 [9:27:22<10:47:53, 3.09it/s] 68%|██████▊ | 251225/371472 [9:27:22<10:26:33, 3.20it/s] 68%|██████▊ | 251226/371472 [9:27:22<10:09:51, 3.29it/s] 68%|██████▊ | 251227/371472 [9:27:23<10:18:03, 3.24it/s] 68%|██████▊ | 251228/371472 [9:27:23<10:17:54, 3.24it/s] 68%|██████▊ | 251229/371472 [9:27:23<9:56:48, 3.36it/s] 68%|██████▊ | 251230/371472 [9:27:23<10:12:05, 3.27it/s] 68%|██████▊ | 251231/371472 [9:27:24<9:59:50, 3.34it/s] 68%|██████▊ | 251232/371472 [9:27:24<9:38:59, 3.46it/s] 68%|██████▊ | 251233/371472 [9:27:24<9:48:16, 3.41it/s] 68%|██████▊ | 251234/371472 [9:27:25<9:54:15, 3.37it/s] 68%|██████▊ | 251235/371472 [9:27:25<11:50:59, 2.82it/s] 68%|██████▊ | 251236/371472 [9:27:25<11:30:06, 2.90it/s] 68%|██████▊ | 251237/371472 [9:27:26<10:45:02, 3.11it/s] 68%|██████▊ | 251238/371472 [9:27:26<10:14:17, 3.26it/s] 68%|██████▊ | 251239/371472 [9:27:26<10:30:52, 3.18it/s] 68%|██████▊ | 251240/371472 [9:27:27<10:36:58, 3.15it/s] {'loss': 2.7093, 'learning_rate': 3.9145424378892025e-07, 'epoch': 10.82} + 68%|██████▊ | 251240/371472 [9:27:27<10:36:58, 3.15it/s] 68%|██████▊ | 251241/371472 [9:27:27<10:09:13, 3.29it/s] 68%|██████▊ | 251242/371472 [9:27:27<10:06:29, 3.30it/s] 68%|██████▊ | 251243/371472 [9:27:27<10:03:55, 3.32it/s] 68%|██████▊ | 251244/371472 [9:27:28<9:45:00, 3.43it/s] 68%|██████▊ | 251245/371472 [9:27:28<9:37:06, 3.47it/s] 68%|██████▊ | 251246/371472 [9:27:28<10:08:54, 3.29it/s] 68%|██████▊ | 251247/371472 [9:27:29<9:59:41, 3.34it/s] 68%|██████▊ | 251248/371472 [9:27:29<10:29:00, 3.19it/s] 68%|██████▊ | 251249/371472 [9:27:29<10:19:06, 3.24it/s] 68%|██████▊ | 251250/371472 [9:27:30<10:20:22, 3.23it/s] 68%|██████▊ | 251251/371472 [9:27:30<10:03:20, 3.32it/s] 68%|██████▊ | 251252/371472 [9:27:30<10:16:49, 3.25it/s] 68%|██████▊ | 251253/371472 [9:27:31<10:38:24, 3.14it/s] 68%|██████▊ | 251254/371472 [9:27:31<10:16:24, 3.25it/s] 68%|██████▊ | 251255/371472 [9:27:31<10:00:38, 3.34it/s] 68%|██████▊ | 251256/371472 [9:27:31<10:13:16, 3.27it/s] 68%|██████▊ | 251257/371472 [9:27:32<10:06:21, 3.30it/s] 68%|██████▊ | 251258/371472 [9:27:32<10:15:54, 3.25it/s] 68%|██████▊ | 251259/371472 [9:27:32<10:14:40, 3.26it/s] 68%|██████▊ | 251260/371472 [9:27:33<10:47:16, 3.10it/s] {'loss': 2.7304, 'learning_rate': 3.9140576181344127e-07, 'epoch': 10.82} + 68%|██████▊ | 251260/371472 [9:27:33<10:47:16, 3.10it/s] 68%|██████▊ | 251261/371472 [9:27:33<10:35:45, 3.15it/s] 68%|██████▊ | 251262/371472 [9:27:33<11:12:28, 2.98it/s] 68%|██████▊ | 251263/371472 [9:27:34<10:54:44, 3.06it/s] 68%|██████▊ | 251264/371472 [9:27:34<10:36:40, 3.15it/s] 68%|██████▊ | 251265/371472 [9:27:34<10:13:36, 3.27it/s] 68%|██████▊ | 251266/371472 [9:27:35<10:19:34, 3.23it/s] 68%|██████▊ | 251267/371472 [9:27:35<10:17:33, 3.24it/s] 68%|██████▊ | 251268/371472 [9:27:35<10:13:12, 3.27it/s] 68%|██████▊ | 251269/371472 [9:27:36<10:15:19, 3.26it/s] 68%|██████▊ | 251270/371472 [9:27:36<9:58:45, 3.35it/s] 68%|██████▊ | 251271/371472 [9:27:36<10:00:05, 3.34it/s] 68%|██████▊ | 251272/371472 [9:27:36<10:05:18, 3.31it/s] 68%|██████▊ | 251273/371472 [9:27:37<10:07:02, 3.30it/s] 68%|██████▊ | 251274/371472 [9:27:37<10:15:09, 3.26it/s] 68%|██████▊ | 251275/371472 [9:27:37<10:14:51, 3.26it/s] 68%|██████▊ | 251276/371472 [9:27:38<10:37:42, 3.14it/s] 68%|██████▊ | 251277/371472 [9:27:38<10:21:10, 3.22it/s] 68%|██████▊ | 251278/371472 [9:27:38<10:10:26, 3.28it/s] 68%|██���███▊ | 251279/371472 [9:27:39<10:00:20, 3.34it/s] 68%|██████▊ | 251280/371472 [9:27:39<10:11:38, 3.28it/s] {'loss': 2.7137, 'learning_rate': 3.9135727983796245e-07, 'epoch': 10.82} + 68%|██████▊ | 251280/371472 [9:27:39<10:11:38, 3.28it/s] 68%|██████▊ | 251281/371472 [9:27:39<10:00:46, 3.33it/s] 68%|██████▊ | 251282/371472 [9:27:39<9:56:18, 3.36it/s] 68%|██████▊ | 251283/371472 [9:27:40<9:58:02, 3.35it/s] 68%|██████▊ | 251284/371472 [9:27:40<10:10:06, 3.28it/s] 68%|██████▊ | 251285/371472 [9:27:40<10:13:07, 3.27it/s] 68%|██████▊ | 251286/371472 [9:27:41<10:19:58, 3.23it/s] 68%|██████▊ | 251287/371472 [9:27:41<10:12:29, 3.27it/s] 68%|██████▊ | 251288/371472 [9:27:41<10:02:14, 3.33it/s] 68%|██████▊ | 251289/371472 [9:27:42<10:14:29, 3.26it/s] 68%|██████▊ | 251290/371472 [9:27:42<10:34:30, 3.16it/s] 68%|██████▊ | 251291/371472 [9:27:42<10:13:46, 3.26it/s] 68%|██████▊ | 251292/371472 [9:27:43<10:41:39, 3.12it/s] 68%|██████▊ | 251293/371472 [9:27:43<10:28:23, 3.19it/s] 68%|██████▊ | 251294/371472 [9:27:43<10:17:01, 3.25it/s] 68%|██████▊ | 251295/371472 [9:27:43<10:05:26, 3.31it/s] 68%|██████▊ | 251296/371472 [9:27:44<9:51:40, 3.39it/s] 68%|██████▊ | 251297/371472 [9:27:44<9:56:56, 3.36it/s] 68%|██████▊ | 251298/371472 [9:27:44<9:47:28, 3.41it/s] 68%|██████▊ | 251299/371472 [9:27:45<9:35:51, 3.48it/s] 68%|██████▊ | 251300/371472 [9:27:45<9:34:42, 3.49it/s] {'loss': 2.7261, 'learning_rate': 3.913087978624835e-07, 'epoch': 10.82} + 68%|██████▊ | 251300/371472 [9:27:45<9:34:42, 3.49it/s] 68%|██████▊ | 251301/371472 [9:27:45<10:11:10, 3.28it/s] 68%|██████▊ | 251302/371472 [9:27:46<10:13:02, 3.27it/s] 68%|██████▊ | 251303/371472 [9:27:46<10:09:48, 3.28it/s] 68%|██████▊ | 251304/371472 [9:27:46<10:15:05, 3.26it/s] 68%|██████▊ | 251305/371472 [9:27:46<9:51:42, 3.38it/s] 68%|██████▊ | 251306/371472 [9:27:47<9:59:07, 3.34it/s] 68%|██████▊ | 251307/371472 [9:27:47<9:52:16, 3.38it/s] 68%|██████▊ | 251308/371472 [9:27:47<10:28:32, 3.19it/s] 68%|██████▊ | 251309/371472 [9:27:48<10:10:38, 3.28it/s] 68%|██████▊ | 251310/371472 [9:27:48<10:09:38, 3.29it/s] 68%|██████▊ | 251311/371472 [9:27:48<9:58:27, 3.35it/s] 68%|██████▊ | 251312/371472 [9:27:49<9:53:07, 3.38it/s] 68%|██████▊ | 251313/371472 [9:27:49<9:50:12, 3.39it/s] 68%|██████▊ | 251314/371472 [9:27:49<10:12:54, 3.27it/s] 68%|██████▊ | 251315/371472 [9:27:49<10:06:32, 3.30it/s] 68%|██████▊ | 251316/371472 [9:27:50<11:19:18, 2.95it/s] 68%|██████▊ | 251317/371472 [9:27:50<10:49:30, 3.08it/s] 68%|██████▊ | 251318/371472 [9:27:51<11:37:41, 2.87it/s] 68%|██████▊ | 251319/371472 [9:27:51<11:04:11, 3.01it/s] 68%|██████▊ | 251320/371472 [9:27:51<10:29:41, 3.18it/s] {'loss': 2.7054, 'learning_rate': 3.9126031588700465e-07, 'epoch': 10.82} + 68%|██████▊ | 251320/371472 [9:27:51<10:29:41, 3.18it/s] 68%|██████▊ | 251321/371472 [9:27:51<10:37:19, 3.14it/s] 68%|██████▊ | 251322/371472 [9:27:52<10:07:04, 3.30it/s] 68%|██████▊ | 251323/371472 [9:27:52<10:01:13, 3.33it/s] 68%|██████▊ | 251324/371472 [9:27:52<10:08:05, 3.29it/s] 68%|██████▊ | 251325/371472 [9:27:53<9:46:23, 3.41it/s] 68%|██████▊ | 251326/371472 [9:27:53<9:39:32, 3.46it/s] 68%|██████▊ | 251327/371472 [9:27:53<10:46:43, 3.10it/s] 68%|██████▊ | 251328/371472 [9:27:54<10:16:09, 3.25it/s] 68%|██████▊ | 251329/371472 [9:27:54<9:55:24, 3.36it/s] 68%|██████▊ | 251330/371472 [9:27:54<9:40:47, 3.45it/s] 68%|██████▊ | 251331/371472 [9:27:54<9:36:36, 3.47it/s] 68%|██████▊ | 251332/371472 [9:27:55<10:14:48, 3.26it/s] 68%|██████▊ | 251333/371472 [9:27:55<10:19:52, 3.23it/s] 68%|██████▊ | 251334/371472 [9:27:55<10:02:03, 3.33it/s] 68%|██████▊ | 251335/371472 [9:27:56<9:54:06, 3.37it/s] 68%|██████▊ | 251336/371472 [9:27:56<9:48:52, 3.40it/s] 68%|██████▊ | 251337/371472 [9:27:56<9:41:51, 3.44it/s] 68%|██████▊ | 251338/371472 [9:27:57<10:56:53, 3.05it/s] 68%|██��███▊ | 251339/371472 [9:27:57<10:57:31, 3.05it/s] 68%|██████▊ | 251340/371472 [9:27:57<10:37:26, 3.14it/s] {'loss': 2.783, 'learning_rate': 3.912118339115257e-07, 'epoch': 10.83} + 68%|██████▊ | 251340/371472 [9:27:57<10:37:26, 3.14it/s] 68%|██████▊ | 251341/371472 [9:27:58<10:32:14, 3.17it/s] 68%|██████▊ | 251342/371472 [9:27:58<10:44:18, 3.11it/s] 68%|██████▊ | 251343/371472 [9:27:58<10:29:02, 3.18it/s] 68%|██████▊ | 251344/371472 [9:27:59<10:22:14, 3.22it/s] 68%|██████▊ | 251345/371472 [9:27:59<10:50:48, 3.08it/s] 68%|██████▊ | 251346/371472 [9:27:59<10:30:29, 3.18it/s] 68%|██████▊ | 251347/371472 [9:28:00<10:59:32, 3.04it/s] 68%|██████▊ | 251348/371472 [9:28:00<10:31:41, 3.17it/s] 68%|██████▊ | 251349/371472 [9:28:00<10:07:39, 3.29it/s] 68%|██████▊ | 251350/371472 [9:28:00<10:01:07, 3.33it/s] 68%|██████▊ | 251351/371472 [9:28:01<10:10:02, 3.28it/s] 68%|██████▊ | 251352/371472 [9:28:01<10:34:43, 3.15it/s] 68%|██████▊ | 251353/371472 [9:28:01<10:33:27, 3.16it/s] 68%|██████▊ | 251354/371472 [9:28:02<10:54:49, 3.06it/s] 68%|██████▊ | 251355/371472 [9:28:02<10:21:56, 3.22it/s] 68%|██████▊ | 251356/371472 [9:28:02<10:29:35, 3.18it/s] 68%|██████▊ | 251357/371472 [9:28:03<10:24:14, 3.21it/s] 68%|██████▊ | 251358/371472 [9:28:03<10:37:14, 3.14it/s] 68%|██████▊ | 251359/371472 [9:28:03<10:13:14, 3.26it/s] 68%|██████▊ | 251360/371472 [9:28:03<9:50:04, 3.39it/s] {'loss': 2.7209, 'learning_rate': 3.911633519360469e-07, 'epoch': 10.83} + 68%|██████▊ | 251360/371472 [9:28:03<9:50:04, 3.39it/s] 68%|██████▊ | 251361/371472 [9:28:04<10:03:50, 3.32it/s] 68%|██████▊ | 251362/371472 [9:28:04<10:06:26, 3.30it/s] 68%|██████▊ | 251363/371472 [9:28:04<10:32:52, 3.16it/s] 68%|██████▊ | 251364/371472 [9:28:05<10:18:15, 3.24it/s] 68%|██████▊ | 251365/371472 [9:28:05<10:18:40, 3.24it/s] 68%|██████▊ | 251366/371472 [9:28:05<10:13:51, 3.26it/s] 68%|██████▊ | 251367/371472 [9:28:06<10:19:20, 3.23it/s] 68%|██████▊ | 251368/371472 [9:28:06<10:02:41, 3.32it/s] 68%|██████▊ | 251369/371472 [9:28:06<9:47:51, 3.41it/s] 68%|██████▊ | 251370/371472 [9:28:07<9:44:22, 3.43it/s] 68%|██████▊ | 251371/371472 [9:28:07<9:58:21, 3.35it/s] 68%|██████▊ | 251372/371472 [9:28:07<10:11:27, 3.27it/s] 68%|██████▊ | 251373/371472 [9:28:07<9:55:53, 3.36it/s] 68%|██████▊ | 251374/371472 [9:28:08<9:49:51, 3.39it/s] 68%|██████▊ | 251375/371472 [9:28:08<9:41:46, 3.44it/s] 68%|██████▊ | 251376/371472 [9:28:08<9:37:12, 3.47it/s] 68%|██████▊ | 251377/371472 [9:28:09<9:42:08, 3.44it/s] 68%|██████▊ | 251378/371472 [9:28:09<9:36:01, 3.47it/s] 68%|██████▊ | 251379/371472 [9:28:09<9:42:53, 3.43it/s] 68%|██████▊ | 251380/371472 [9:28:09<9:45:44, 3.42it/s] {'loss': 2.6814, 'learning_rate': 3.911148699605679e-07, 'epoch': 10.83} + 68%|██████▊ | 251380/371472 [9:28:09<9:45:44, 3.42it/s] 68%|██████▊ | 251381/371472 [9:28:10<9:42:58, 3.43it/s] 68%|██████▊ | 251382/371472 [9:28:10<9:41:45, 3.44it/s] 68%|██████▊ | 251383/371472 [9:28:10<10:05:49, 3.30it/s] 68%|██████▊ | 251384/371472 [9:28:11<9:43:12, 3.43it/s] 68%|██████▊ | 251385/371472 [9:28:11<9:47:51, 3.40it/s] 68%|██████▊ | 251386/371472 [9:28:11<9:52:39, 3.38it/s] 68%|██████▊ | 251387/371472 [9:28:12<10:13:28, 3.26it/s] 68%|██████▊ | 251388/371472 [9:28:12<10:25:46, 3.20it/s] 68%|██████▊ | 251389/371472 [9:28:12<10:11:38, 3.27it/s] 68%|██████▊ | 251390/371472 [9:28:12<10:13:26, 3.26it/s] 68%|██████▊ | 251391/371472 [9:28:13<11:00:22, 3.03it/s] 68%|██████▊ | 251392/371472 [9:28:13<11:09:17, 2.99it/s] 68%|██████▊ | 251393/371472 [9:28:14<10:57:49, 3.04it/s] 68%|██████▊ | 251394/371472 [9:28:14<10:58:59, 3.04it/s] 68%|██████▊ | 251395/371472 [9:28:14<10:25:34, 3.20it/s] 68%|██████▊ | 251396/371472 [9:28:14<10:37:40, 3.14it/s] 68%|██████▊ | 251397/371472 [9:28:15<10:25:25, 3.20it/s] 68%|██████▊ | 251398/371472 [9:28:15<10:27:27, 3.19it/s] 68%|████���█▊ | 251399/371472 [9:28:15<10:21:47, 3.22it/s] 68%|██████▊ | 251400/371472 [9:28:16<10:48:35, 3.09it/s] {'loss': 2.5566, 'learning_rate': 3.9106638798508904e-07, 'epoch': 10.83} + 68%|██████▊ | 251400/371472 [9:28:16<10:48:35, 3.09it/s] 68%|██████▊ | 251401/371472 [9:28:16<10:29:00, 3.18it/s] 68%|██████▊ | 251402/371472 [9:28:16<10:06:04, 3.30it/s] 68%|██████▊ | 251403/371472 [9:28:17<10:35:40, 3.15it/s] 68%|██████▊ | 251404/371472 [9:28:17<10:40:08, 3.13it/s] 68%|██████▊ | 251405/371472 [9:28:17<10:32:51, 3.16it/s] 68%|██████▊ | 251406/371472 [9:28:18<10:45:11, 3.10it/s] 68%|██████▊ | 251407/371472 [9:28:18<10:50:09, 3.08it/s] 68%|██████▊ | 251408/371472 [9:28:18<10:45:38, 3.10it/s] 68%|██████▊ | 251409/371472 [9:28:19<10:22:49, 3.21it/s] 68%|██████▊ | 251410/371472 [9:28:19<10:23:08, 3.21it/s] 68%|██████▊ | 251411/371472 [9:28:19<10:55:09, 3.05it/s] 68%|██████▊ | 251412/371472 [9:28:20<11:02:27, 3.02it/s] 68%|██████▊ | 251413/371472 [9:28:20<10:47:59, 3.09it/s] 68%|██████▊ | 251414/371472 [9:28:20<10:33:27, 3.16it/s] 68%|██████▊ | 251415/371472 [9:28:20<10:26:33, 3.19it/s] 68%|██████▊ | 251416/371472 [9:28:21<10:36:17, 3.14it/s] 68%|██████▊ | 251417/371472 [9:28:21<10:19:17, 3.23it/s] 68%|██████▊ | 251418/371472 [9:28:21<10:39:55, 3.13it/s] 68%|██████▊ | 251419/371472 [9:28:22<10:25:04, 3.20it/s] 68%|██████▊ | 251420/371472 [9:28:22<10:30:37, 3.17it/s] {'loss': 2.7102, 'learning_rate': 3.9101790600961016e-07, 'epoch': 10.83} + 68%|██████▊ | 251420/371472 [9:28:22<10:30:37, 3.17it/s] 68%|██████▊ | 251421/371472 [9:28:22<10:12:22, 3.27it/s] 68%|██████▊ | 251422/371472 [9:28:23<9:59:00, 3.34it/s] 68%|██████▊ | 251423/371472 [9:28:23<10:44:49, 3.10it/s] 68%|██████▊ | 251424/371472 [9:28:23<11:01:32, 3.02it/s] 68%|██████▊ | 251425/371472 [9:28:24<10:43:59, 3.11it/s] 68%|██████▊ | 251426/371472 [9:28:24<10:23:47, 3.21it/s] 68%|██████▊ | 251427/371472 [9:28:24<10:04:49, 3.31it/s] 68%|██████▊ | 251428/371472 [9:28:25<10:24:25, 3.20it/s] 68%|██████▊ | 251429/371472 [9:28:25<10:33:06, 3.16it/s] 68%|██████▊ | 251430/371472 [9:28:25<10:04:51, 3.31it/s] 68%|██████▊ | 251431/371472 [9:28:25<10:02:32, 3.32it/s] 68%|██████▊ | 251432/371472 [9:28:26<9:50:20, 3.39it/s] 68%|██████▊ | 251433/371472 [9:28:26<9:48:42, 3.40it/s] 68%|██████▊ | 251434/371472 [9:28:26<9:52:07, 3.38it/s] 68%|██████▊ | 251435/371472 [9:28:27<9:54:52, 3.36it/s] 68%|██████▊ | 251436/371472 [9:28:27<10:13:16, 3.26it/s] 68%|██████▊ | 251437/371472 [9:28:27<10:08:17, 3.29it/s] 68%|██████▊ | 251438/371472 [9:28:28<10:19:11, 3.23it/s] 68%|██████▊ | 251439/371472 [9:28:28<10:10:31, 3.28it/s] 68%|██████▊ | 251440/371472 [9:28:28<10:01:04, 3.33it/s] {'loss': 2.625, 'learning_rate': 3.909694240341313e-07, 'epoch': 10.83} + 68%|██████▊ | 251440/371472 [9:28:28<10:01:04, 3.33it/s] 68%|██████▊ | 251441/371472 [9:28:28<9:51:30, 3.38it/s] 68%|██████▊ | 251442/371472 [9:28:29<10:13:27, 3.26it/s] 68%|██████▊ | 251443/371472 [9:28:29<10:00:08, 3.33it/s] 68%|██████▊ | 251444/371472 [9:28:29<9:47:14, 3.41it/s] 68%|██████▊ | 251445/371472 [9:28:30<9:38:53, 3.46it/s] 68%|██████▊ | 251446/371472 [9:28:30<10:17:04, 3.24it/s] 68%|██████▊ | 251447/371472 [9:28:30<10:21:08, 3.22it/s] 68%|██████▊ | 251448/371472 [9:28:31<10:09:02, 3.28it/s] 68%|██████▊ | 251449/371472 [9:28:31<9:51:18, 3.38it/s] 68%|██████▊ | 251450/371472 [9:28:31<10:11:04, 3.27it/s] 68%|██████▊ | 251451/371472 [9:28:31<9:48:53, 3.40it/s] 68%|██████▊ | 251452/371472 [9:28:32<9:49:01, 3.40it/s] 68%|██████▊ | 251453/371472 [9:28:32<9:35:36, 3.48it/s] 68%|██████▊ | 251454/371472 [9:28:32<9:29:39, 3.51it/s] 68%|██████▊ | 251455/371472 [9:28:33<9:52:41, 3.37it/s] 68%|██████▊ | 251456/371472 [9:28:33<10:34:21, 3.15it/s] 68%|██████▊ | 251457/371472 [9:28:33<10:33:12, 3.16it/s] 68%|██████▊ | 251458/371472 [9:28:34<10:38:13, 3.13it/s] 68%|██���███▊ | 251459/371472 [9:28:34<10:26:40, 3.19it/s] 68%|██████▊ | 251460/371472 [9:28:34<10:47:57, 3.09it/s] {'loss': 2.739, 'learning_rate': 3.9092094205865236e-07, 'epoch': 10.83} + 68%|██████▊ | 251460/371472 [9:28:34<10:47:57, 3.09it/s] 68%|██████▊ | 251461/371472 [9:28:35<11:05:02, 3.01it/s] 68%|██████▊ | 251462/371472 [9:28:35<10:24:13, 3.20it/s] 68%|██████▊ | 251463/371472 [9:28:35<10:05:29, 3.30it/s] 68%|██████▊ | 251464/371472 [9:28:35<10:07:52, 3.29it/s] 68%|██████▊ | 251465/371472 [9:28:36<9:53:21, 3.37it/s] 68%|██████▊ | 251466/371472 [9:28:36<10:03:45, 3.31it/s] 68%|██████▊ | 251467/371472 [9:28:36<9:58:59, 3.34it/s] 68%|██████▊ | 251468/371472 [9:28:37<9:52:24, 3.38it/s] 68%|██████▊ | 251469/371472 [9:28:37<9:45:20, 3.42it/s] 68%|██████▊ | 251470/371472 [9:28:37<9:46:47, 3.41it/s] 68%|██████▊ | 251471/371472 [9:28:38<10:17:56, 3.24it/s] 68%|██████▊ | 251472/371472 [9:28:38<10:13:09, 3.26it/s] 68%|██████▊ | 251473/371472 [9:28:38<10:02:06, 3.32it/s] 68%|██████▊ | 251474/371472 [9:28:39<10:26:08, 3.19it/s] 68%|██████▊ | 251475/371472 [9:28:39<10:07:08, 3.29it/s] 68%|██████▊ | 251476/371472 [9:28:39<9:51:59, 3.38it/s] 68%|██████▊ | 251477/371472 [9:28:39<9:41:18, 3.44it/s] 68%|██████▊ | 251478/371472 [9:28:40<9:36:23, 3.47it/s] 68%|██████▊ | 251479/371472 [9:28:40<9:45:43, 3.41it/s] 68%|██████▊ | 251480/371472 [9:28:40<10:18:51, 3.23it/s] {'loss': 2.7268, 'learning_rate': 3.9087246008317354e-07, 'epoch': 10.83} + 68%|██████▊ | 251480/371472 [9:28:40<10:18:51, 3.23it/s] 68%|██████▊ | 251481/371472 [9:28:41<10:56:00, 3.05it/s] 68%|██████▊ | 251482/371472 [9:28:41<10:42:32, 3.11it/s] 68%|██████▊ | 251483/371472 [9:28:41<12:30:32, 2.66it/s] 68%|██████▊ | 251484/371472 [9:28:42<12:10:03, 2.74it/s] 68%|██████▊ | 251485/371472 [9:28:42<11:40:35, 2.85it/s] 68%|██████▊ | 251486/371472 [9:28:42<11:34:52, 2.88it/s] 68%|██████▊ | 251487/371472 [9:28:43<11:14:09, 2.97it/s] 68%|██████▊ | 251488/371472 [9:28:43<10:39:21, 3.13it/s] 68%|██████▊ | 251489/371472 [9:28:43<10:23:06, 3.21it/s] 68%|██████▊ | 251490/371472 [9:28:44<10:16:24, 3.24it/s] 68%|██████▊ | 251491/371472 [9:28:44<10:08:33, 3.29it/s] 68%|██████▊ | 251492/371472 [9:28:44<9:57:09, 3.35it/s] 68%|██████▊ | 251493/371472 [9:28:45<9:59:56, 3.33it/s] 68%|██████▊ | 251494/371472 [9:28:45<9:45:45, 3.41it/s] 68%|██████▊ | 251495/371472 [9:28:45<9:37:49, 3.46it/s] 68%|██████▊ | 251496/371472 [9:28:45<9:47:17, 3.40it/s] 68%|██████▊ | 251497/371472 [9:28:46<10:02:23, 3.32it/s] 68%|██████▊ | 251498/371472 [9:28:46<9:56:47, 3.35it/s] 68%|██████▊ | 251499/371472 [9:28:46<10:13:53, 3.26it/s] 68%|██████▊ | 251500/371472 [9:28:47<10:11:41, 3.27it/s] {'loss': 2.5722, 'learning_rate': 3.908239781076946e-07, 'epoch': 10.83} + 68%|██████▊ | 251500/371472 [9:28:47<10:11:41, 3.27it/s] 68%|██████▊ | 251501/371472 [9:28:47<11:00:26, 3.03it/s] 68%|██████▊ | 251502/371472 [9:28:47<10:35:04, 3.15it/s] 68%|██████▊ | 251503/371472 [9:28:48<10:50:02, 3.08it/s] 68%|██████▊ | 251504/371472 [9:28:48<11:25:47, 2.92it/s] 68%|██████▊ | 251505/371472 [9:28:48<11:52:25, 2.81it/s] 68%|██████▊ | 251506/371472 [9:28:49<12:06:18, 2.75it/s] 68%|██████▊ | 251507/371472 [9:28:49<12:03:21, 2.76it/s] 68%|██████▊ | 251508/371472 [9:28:50<11:50:25, 2.81it/s] 68%|██████▊ | 251509/371472 [9:28:50<12:06:18, 2.75it/s] 68%|██████▊ | 251510/371472 [9:28:50<11:46:43, 2.83it/s] 68%|██████▊ | 251511/371472 [9:28:51<11:24:04, 2.92it/s] 68%|██████▊ | 251512/371472 [9:28:51<10:52:40, 3.06it/s] 68%|██████▊ | 251513/371472 [9:28:51<10:30:46, 3.17it/s] 68%|██████▊ | 251514/371472 [9:28:51<10:27:48, 3.18it/s] 68%|██████▊ | 251515/371472 [9:28:52<10:25:54, 3.19it/s] 68%|██████▊ | 251516/371472 [9:28:52<10:19:42, 3.23it/s] 68%|██████▊ | 251517/371472 [9:28:52<10:06:28, 3.30it/s] 68%|██████▊ | 251518/371472 [9:28:53<9:47:57, 3.40it/s] 68%|██████▊ | 251519/371472 [9:28:53<9:37:10, 3.46it/s] 68%|██████▊ | 251520/371472 [9:28:53<9:23:58, 3.54it/s] {'loss': 2.8868, 'learning_rate': 3.9077549613221573e-07, 'epoch': 10.83} + 68%|██████▊ | 251520/371472 [9:28:53<9:23:58, 3.54it/s] 68%|██████▊ | 251521/371472 [9:28:53<9:25:02, 3.54it/s] 68%|██████▊ | 251522/371472 [9:28:54<9:22:32, 3.55it/s] 68%|██████▊ | 251523/371472 [9:28:54<9:27:20, 3.52it/s] 68%|██████▊ | 251524/371472 [9:28:54<9:30:06, 3.51it/s] 68%|██████▊ | 251525/371472 [9:28:55<9:47:33, 3.40it/s] 68%|██████▊ | 251526/371472 [9:28:55<9:52:40, 3.37it/s] 68%|██████▊ | 251527/371472 [9:28:55<9:58:04, 3.34it/s] 68%|██████▊ | 251528/371472 [9:28:56<10:20:49, 3.22it/s] 68%|██████▊ | 251529/371472 [9:28:56<14:28:10, 2.30it/s] 68%|██████▊ | 251530/371472 [9:28:57<12:58:47, 2.57it/s] 68%|██████▊ | 251531/371472 [9:28:57<13:03:07, 2.55it/s] 68%|██████▊ | 251532/371472 [9:28:57<12:09:55, 2.74it/s] 68%|██████▊ | 251533/371472 [9:28:58<11:35:52, 2.87it/s] 68%|██████▊ | 251534/371472 [9:28:58<10:51:15, 3.07it/s] 68%|██████▊ | 251535/371472 [9:28:58<10:27:44, 3.18it/s] 68%|██████▊ | 251536/371472 [9:28:58<10:19:08, 3.23it/s] 68%|██████▊ | 251537/371472 [9:28:59<10:03:58, 3.31it/s] 68%|██████▊ | 251538/371472 [9:28:59<10:00:32, 3.33it/s] 68%|██████▊ | 251539/371472 [9:28:59<10:40:23, 3.12it/s] 68%|██████▊ | 251540/371472 [9:29:00<10:26:40, 3.19it/s] {'loss': 2.6677, 'learning_rate': 3.907270141567368e-07, 'epoch': 10.83} + 68%|██████▊ | 251540/371472 [9:29:00<10:26:40, 3.19it/s] 68%|██████▊ | 251541/371472 [9:29:00<10:17:22, 3.24it/s] 68%|██████▊ | 251542/371472 [9:29:00<10:13:53, 3.26it/s] 68%|██████▊ | 251543/371472 [9:29:01<10:03:26, 3.31it/s] 68%|██████▊ | 251544/371472 [9:29:01<10:09:58, 3.28it/s] 68%|██████▊ | 251545/371472 [9:29:01<9:51:04, 3.38it/s] 68%|██████▊ | 251546/371472 [9:29:01<10:10:16, 3.28it/s] 68%|██████▊ | 251547/371472 [9:29:02<10:36:35, 3.14it/s] 68%|██████▊ | 251548/371472 [9:29:02<10:11:01, 3.27it/s] 68%|██████▊ | 251549/371472 [9:29:02<10:07:15, 3.29it/s] 68%|██████▊ | 251550/371472 [9:29:03<10:23:23, 3.21it/s] 68%|██████▊ | 251551/371472 [9:29:03<10:12:29, 3.26it/s] 68%|██████▊ | 251552/371472 [9:29:03<10:07:48, 3.29it/s] 68%|██████▊ | 251553/371472 [9:29:04<9:58:16, 3.34it/s] 68%|██████▊ | 251554/371472 [9:29:04<9:42:38, 3.43it/s] 68%|██████▊ | 251555/371472 [9:29:04<9:34:08, 3.48it/s] 68%|██████▊ | 251556/371472 [9:29:04<9:50:02, 3.39it/s] 68%|██████▊ | 251557/371472 [9:29:05<9:49:54, 3.39it/s] 68%|██████▊ | 251558/371472 [9:29:05<9:40:59, 3.44it/s] 68%|██████▊ | 251559/371472 [9:29:05<9:47:35, 3.40it/s] 68%|██████▊ | 251560/371472 [9:29:06<9:45:58, 3.41it/s] {'loss': 2.8517, 'learning_rate': 3.90678532181258e-07, 'epoch': 10.84} + 68%|██████▊ | 251560/371472 [9:29:06<9:45:58, 3.41it/s] 68%|██████▊ | 251561/371472 [9:29:06<10:12:26, 3.26it/s] 68%|██████▊ | 251562/371472 [9:29:06<10:08:35, 3.28it/s] 68%|██████▊ | 251563/371472 [9:29:07<9:59:49, 3.33it/s] 68%|██████▊ | 251564/371472 [9:29:07<10:00:31, 3.33it/s] 68%|██████▊ | 251565/371472 [9:29:07<9:59:18, 3.33it/s] 68%|██████▊ | 251566/371472 [9:29:07<10:01:58, 3.32it/s] 68%|██████▊ | 251567/371472 [9:29:08<9:52:45, 3.37it/s] 68%|██████▊ | 251568/371472 [9:29:08<9:42:21, 3.43it/s] 68%|██████▊ | 251569/371472 [9:29:08<9:38:13, 3.46it/s] 68%|██████▊ | 251570/371472 [9:29:09<9:35:25, 3.47it/s] 68%|██████▊ | 251571/371472 [9:29:09<9:42:00, 3.43it/s] 68%|██████▊ | 251572/371472 [9:29:09<9:43:21, 3.43it/s] 68%|██████▊ | 251573/371472 [9:29:09<9:45:43, 3.41it/s] 68%|██████▊ | 251574/371472 [9:29:10<9:46:21, 3.41it/s] 68%|██████▊ | 251575/371472 [9:29:10<10:07:02, 3.29it/s] 68%|██████▊ | 251576/371472 [9:29:10<9:56:41, 3.35it/s] 68%|██████▊ | 251577/371472 [9:29:11<9:52:44, 3.37it/s] 68%|██████▊ | 251578/371472 [9:29:11<9:45:26, 3.41it/s] 68%|██████��� | 251579/371472 [9:29:11<9:45:28, 3.41it/s] 68%|██████▊ | 251580/371472 [9:29:12<9:58:44, 3.34it/s] {'loss': 2.587, 'learning_rate': 3.90630050205779e-07, 'epoch': 10.84} + 68%|██████▊ | 251580/371472 [9:29:12<9:58:44, 3.34it/s] 68%|██████▊ | 251581/371472 [9:29:12<9:58:50, 3.34it/s] 68%|██████▊ | 251582/371472 [9:29:12<9:53:38, 3.37it/s] 68%|██████▊ | 251583/371472 [9:29:12<10:11:51, 3.27it/s] 68%|██████▊ | 251584/371472 [9:29:13<10:13:16, 3.26it/s] 68%|██████▊ | 251585/371472 [9:29:13<11:04:38, 3.01it/s] 68%|██████▊ | 251586/371472 [9:29:13<10:37:37, 3.13it/s] 68%|██████▊ | 251587/371472 [9:29:14<10:50:41, 3.07it/s] 68%|██████▊ | 251588/371472 [9:29:14<11:17:03, 2.95it/s] 68%|██████▊ | 251589/371472 [9:29:14<10:49:20, 3.08it/s] 68%|██████▊ | 251590/371472 [9:29:15<10:30:00, 3.17it/s] 68%|██████▊ | 251591/371472 [9:29:15<10:12:11, 3.26it/s] 68%|██████▊ | 251592/371472 [9:29:15<9:58:53, 3.34it/s] 68%|██████▊ | 251593/371472 [9:29:16<9:59:45, 3.33it/s] 68%|██████▊ | 251594/371472 [9:29:16<9:54:54, 3.36it/s] 68%|██████▊ | 251595/371472 [9:29:16<9:58:03, 3.34it/s] 68%|██████▊ | 251596/371472 [9:29:17<9:58:47, 3.34it/s] 68%|██████▊ | 251597/371472 [9:29:17<10:23:04, 3.21it/s] 68%|██████▊ | 251598/371472 [9:29:17<10:10:58, 3.27it/s] 68%|██████▊ | 251599/371472 [9:29:17<10:10:56, 3.27it/s] 68%|██████▊ | 251600/371472 [9:29:18<10:07:10, 3.29it/s] {'loss': 2.752, 'learning_rate': 3.905815682303002e-07, 'epoch': 10.84} + 68%|██████▊ | 251600/371472 [9:29:18<10:07:10, 3.29it/s] 68%|██████▊ | 251601/371472 [9:29:18<9:53:50, 3.36it/s] 68%|██████▊ | 251602/371472 [9:29:18<9:59:18, 3.33it/s] 68%|██████▊ | 251603/371472 [9:29:19<9:47:48, 3.40it/s] 68%|██████▊ | 251604/371472 [9:29:19<10:23:04, 3.21it/s] 68%|██████▊ | 251605/371472 [9:29:19<10:25:38, 3.19it/s] 68%|██████▊ | 251606/371472 [9:29:20<10:32:27, 3.16it/s] 68%|██████▊ | 251607/371472 [9:29:20<10:39:35, 3.12it/s] 68%|██████▊ | 251608/371472 [9:29:20<10:11:16, 3.27it/s] 68%|██████▊ | 251609/371472 [9:29:21<10:01:09, 3.32it/s] 68%|██████▊ | 251610/371472 [9:29:21<10:05:44, 3.30it/s] 68%|██████▊ | 251611/371472 [9:29:21<9:58:17, 3.34it/s] 68%|██████▊ | 251612/371472 [9:29:21<10:05:48, 3.30it/s] 68%|██████▊ | 251613/371472 [9:29:22<10:56:54, 3.04it/s] 68%|██████▊ | 251614/371472 [9:29:22<10:22:54, 3.21it/s] 68%|██████▊ | 251615/371472 [9:29:22<10:16:46, 3.24it/s] 68%|██████▊ | 251616/371472 [9:29:23<10:14:43, 3.25it/s] 68%|██████▊ | 251617/371472 [9:29:23<9:54:12, 3.36it/s] 68%|██████▊ | 251618/371472 [9:29:23<10:04:26, 3.30it/s] 68%|██████▊ | 251619/371472 [9:29:24<10:05:42, 3.30it/s] 68%|██████▊ | 251620/371472 [9:29:24<10:15:40, 3.24it/s] {'loss': 2.7607, 'learning_rate': 3.9053308625482125e-07, 'epoch': 10.84} + 68%|██████▊ | 251620/371472 [9:29:24<10:15:40, 3.24it/s] 68%|██████▊ | 251621/371472 [9:29:24<9:55:43, 3.35it/s] 68%|██████▊ | 251622/371472 [9:29:25<10:37:42, 3.13it/s] 68%|██████▊ | 251623/371472 [9:29:25<10:22:33, 3.21it/s] 68%|██████▊ | 251624/371472 [9:29:25<10:03:11, 3.31it/s] 68%|██████▊ | 251625/371472 [9:29:25<10:09:15, 3.28it/s] 68%|██████▊ | 251626/371472 [9:29:26<9:56:49, 3.35it/s] 68%|██████▊ | 251627/371472 [9:29:26<9:51:06, 3.38it/s] 68%|██████▊ | 251628/371472 [9:29:26<9:41:01, 3.44it/s] 68%|██████▊ | 251629/371472 [9:29:27<9:44:47, 3.42it/s] 68%|██████▊ | 251630/371472 [9:29:27<9:24:27, 3.54it/s] 68%|██████▊ | 251631/371472 [9:29:27<9:31:11, 3.50it/s] 68%|██████▊ | 251632/371472 [9:29:27<9:43:42, 3.42it/s] 68%|██████▊ | 251633/371472 [9:29:28<10:00:10, 3.33it/s] 68%|██████▊ | 251634/371472 [9:29:28<10:03:00, 3.31it/s] 68%|██████▊ | 251635/371472 [9:29:28<10:00:24, 3.33it/s] 68%|██████▊ | 251636/371472 [9:29:29<9:48:24, 3.39it/s] 68%|██████▊ | 251637/371472 [9:29:29<9:37:34, 3.46it/s] 68%|██████▊ | 251638/371472 [9:29:29<10:02:24, 3.32it/s] 68%|██████▊ | 251639/371472 [9:29:30<9:56:27, 3.35it/s] 68%|██████▊ | 251640/371472 [9:29:30<9:49:27, 3.39it/s] {'loss': 2.5829, 'learning_rate': 3.9048460427934237e-07, 'epoch': 10.84} + 68%|██████▊ | 251640/371472 [9:29:30<9:49:27, 3.39it/s] 68%|██████▊ | 251641/371472 [9:29:30<10:08:48, 3.28it/s] 68%|██████▊ | 251642/371472 [9:29:30<10:00:49, 3.32it/s] 68%|██████▊ | 251643/371472 [9:29:31<9:52:21, 3.37it/s] 68%|██████▊ | 251644/371472 [9:29:31<10:18:34, 3.23it/s] 68%|██████▊ | 251645/371472 [9:29:31<10:34:33, 3.15it/s] 68%|██████▊ | 251646/371472 [9:29:32<10:19:30, 3.22it/s] 68%|██████▊ | 251647/371472 [9:29:32<9:57:08, 3.34it/s] 68%|██████▊ | 251648/371472 [9:29:32<9:43:18, 3.42it/s] 68%|██████▊ | 251649/371472 [9:29:33<9:49:35, 3.39it/s] 68%|██████▊ | 251650/371472 [9:29:33<9:39:51, 3.44it/s] 68%|██████▊ | 251651/371472 [9:29:33<9:34:00, 3.48it/s] 68%|██████▊ | 251652/371472 [9:29:33<9:34:06, 3.48it/s] 68%|██████▊ | 251653/371472 [9:29:34<9:45:43, 3.41it/s] 68%|██████▊ | 251654/371472 [9:29:34<9:39:46, 3.44it/s] 68%|██████▊ | 251655/371472 [9:29:34<9:44:16, 3.42it/s] 68%|██████▊ | 251656/371472 [9:29:35<9:31:43, 3.49it/s] 68%|██████▊ | 251657/371472 [9:29:35<9:57:51, 3.34it/s] 68%|██████▊ | 251658/371472 [9:29:35<9:54:16, 3.36it/s] 68%|██████▊ | 251659/371472 [9:29:36<10:09:45, 3.27it/s] 68%|██████▊ | 251660/371472 [9:29:36<9:49:42, 3.39it/s] {'loss': 2.7927, 'learning_rate': 3.9043612230386344e-07, 'epoch': 10.84} + 68%|██████▊ | 251660/371472 [9:29:36<9:49:42, 3.39it/s] 68%|██████▊ | 251661/371472 [9:29:36<9:35:52, 3.47it/s] 68%|██████▊ | 251662/371472 [9:29:36<9:52:24, 3.37it/s] 68%|██████▊ | 251663/371472 [9:29:37<9:40:54, 3.44it/s] 68%|██████▊ | 251664/371472 [9:29:37<9:22:56, 3.55it/s] 68%|██████▊ | 251665/371472 [9:29:37<9:24:49, 3.54it/s] 68%|██████▊ | 251666/371472 [9:29:38<9:45:34, 3.41it/s] 68%|██████▊ | 251667/371472 [9:29:38<9:46:15, 3.41it/s] 68%|██████▊ | 251668/371472 [9:29:38<9:39:36, 3.44it/s] 68%|██████▊ | 251669/371472 [9:29:38<9:48:37, 3.39it/s] 68%|██████▊ | 251670/371472 [9:29:39<9:53:03, 3.37it/s] 68%|██████▊ | 251671/371472 [9:29:39<9:31:19, 3.49it/s] 68%|██████▊ | 251672/371472 [9:29:39<10:16:50, 3.24it/s] 68%|██████▊ | 251673/371472 [9:29:40<10:03:29, 3.31it/s] 68%|██████▊ | 251674/371472 [9:29:40<9:51:44, 3.37it/s] 68%|██████▊ | 251675/371472 [9:29:40<9:44:45, 3.41it/s] 68%|██████▊ | 251676/371472 [9:29:41<10:42:35, 3.11it/s] 68%|██████▊ | 251677/371472 [9:29:41<10:26:45, 3.19it/s] 68%|██████▊ | 251678/371472 [9:29:41<10:30:26, 3.17it/s] 68%|██████▊ | 251679/371472 [9:29:42<10:28:08, 3.18it/s] 68%|██████▊ | 251680/371472 [9:29:42<9:59:08, 3.33it/s] {'loss': 2.798, 'learning_rate': 3.903876403283846e-07, 'epoch': 10.84} + 68%|██████▊ | 251680/371472 [9:29:42<9:59:08, 3.33it/s] 68%|██████▊ | 251681/371472 [9:29:42<9:52:04, 3.37it/s] 68%|██████▊ | 251682/371472 [9:29:42<9:51:49, 3.37it/s] 68%|██████▊ | 251683/371472 [9:29:43<10:17:29, 3.23it/s] 68%|██████▊ | 251684/371472 [9:29:43<10:08:57, 3.28it/s] 68%|██████▊ | 251685/371472 [9:29:43<9:55:40, 3.35it/s] 68%|██████▊ | 251686/371472 [9:29:44<9:51:01, 3.38it/s] 68%|██████▊ | 251687/371472 [9:29:44<9:56:19, 3.35it/s] 68%|██████▊ | 251688/371472 [9:29:44<9:51:39, 3.37it/s] 68%|██████▊ | 251689/371472 [9:29:44<9:37:27, 3.46it/s] 68%|██████▊ | 251690/371472 [9:29:45<10:07:19, 3.29it/s] 68%|██████▊ | 251691/371472 [9:29:45<9:47:54, 3.40it/s] 68%|██████▊ | 251692/371472 [9:29:45<9:54:05, 3.36it/s] 68%|██████▊ | 251693/371472 [9:29:46<9:53:07, 3.37it/s] 68%|██████▊ | 251694/371472 [9:29:46<9:50:18, 3.38it/s] 68%|██████▊ | 251695/371472 [9:29:46<9:46:33, 3.40it/s] 68%|██████▊ | 251696/371472 [9:29:47<9:32:11, 3.49it/s] 68%|██████▊ | 251697/371472 [9:29:47<9:26:51, 3.52it/s] 68%|██████▊ | 251698/371472 [9:29:47<10:16:23, 3.24it/s] 68%|██████▊ | 251699/371472 [9:29:47<10:20:45, 3.22it/s] 68%|██████▊ | 251700/371472 [9:29:48<10:06:21, 3.29it/s] {'loss': 2.8178, 'learning_rate': 3.9033915835290564e-07, 'epoch': 10.84} + 68%|██████▊ | 251700/371472 [9:29:48<10:06:21, 3.29it/s] 68%|██████▊ | 251701/371472 [9:29:48<9:48:47, 3.39it/s] 68%|██████▊ | 251702/371472 [9:29:48<9:40:15, 3.44it/s] 68%|██████▊ | 251703/371472 [9:29:49<10:00:06, 3.33it/s] 68%|██████▊ | 251704/371472 [9:29:49<10:14:21, 3.25it/s] 68%|██████▊ | 251705/371472 [9:29:49<10:02:07, 3.32it/s] 68%|██████▊ | 251706/371472 [9:29:50<10:56:08, 3.04it/s] 68%|██████▊ | 251707/371472 [9:29:50<10:27:30, 3.18it/s] 68%|██████▊ | 251708/371472 [9:29:50<11:15:36, 2.95it/s] 68%|██████▊ | 251709/371472 [9:29:51<10:53:27, 3.05it/s] 68%|██████▊ | 251710/371472 [9:29:51<11:08:38, 2.99it/s] 68%|██████▊ | 251711/371472 [9:29:51<10:38:04, 3.13it/s] 68%|██████▊ | 251712/371472 [9:29:52<10:11:30, 3.26it/s] 68%|██████▊ | 251713/371472 [9:29:52<10:04:06, 3.30it/s] 68%|██████▊ | 251714/371472 [9:29:52<9:56:16, 3.35it/s] 68%|██████▊ | 251715/371472 [9:29:52<10:00:49, 3.32it/s] 68%|██████▊ | 251716/371472 [9:29:53<9:54:31, 3.36it/s] 68%|██████▊ | 251717/371472 [9:29:53<9:42:12, 3.43it/s] 68%|██████▊ | 251718/371472 [9:29:53<9:36:34, 3.46it/s] 68%|██████▊ | 251719/371472 [9:29:54<9:42:59, 3.42it/s] 68%|██████▊ | 251720/371472 [9:29:54<9:46:51, 3.40it/s] {'loss': 2.7898, 'learning_rate': 3.902906763774268e-07, 'epoch': 10.84} + 68%|██████▊ | 251720/371472 [9:29:54<9:46:51, 3.40it/s] 68%|██████▊ | 251721/371472 [9:29:54<9:41:56, 3.43it/s] 68%|██████▊ | 251722/371472 [9:29:54<9:28:13, 3.51it/s] 68%|██████▊ | 251723/371472 [9:29:55<9:15:44, 3.59it/s] 68%|██████▊ | 251724/371472 [9:29:55<9:24:01, 3.54it/s] 68%|██████▊ | 251725/371472 [9:29:55<9:26:55, 3.52it/s] 68%|██████▊ | 251726/371472 [9:29:56<9:46:04, 3.41it/s] 68%|██████▊ | 251727/371472 [9:29:56<10:11:31, 3.26it/s] 68%|██████▊ | 251728/371472 [9:29:56<10:26:27, 3.19it/s] 68%|██████▊ | 251729/371472 [9:29:57<10:12:06, 3.26it/s] 68%|██████▊ | 251730/371472 [9:29:57<9:55:22, 3.35it/s] 68%|██████▊ | 251731/371472 [9:29:57<9:36:06, 3.46it/s] 68%|██████▊ | 251732/371472 [9:29:57<10:02:25, 3.31it/s] 68%|██████▊ | 251733/371472 [9:29:58<9:57:32, 3.34it/s] 68%|██████▊ | 251734/371472 [9:29:58<9:53:47, 3.36it/s] 68%|██████▊ | 251735/371472 [9:29:58<9:52:50, 3.37it/s] 68%|██████▊ | 251736/371472 [9:29:59<9:45:40, 3.41it/s] 68%|██████▊ | 251737/371472 [9:29:59<9:57:06, 3.34it/s] 68%|██████▊ | 251738/371472 [9:29:59<10:53:47, 3.05it/s] 68%|██████▊ | 251739/371472 [9:30:00<10:30:42, 3.16it/s] 68%|██████▊ | 251740/371472 [9:30:00<10:32:01, 3.16it/s] {'loss': 2.8232, 'learning_rate': 3.902421944019479e-07, 'epoch': 10.84} + 68%|██████▊ | 251740/371472 [9:30:00<10:32:01, 3.16it/s] 68%|██████▊ | 251741/371472 [9:30:00<10:10:15, 3.27it/s] 68%|██████▊ | 251742/371472 [9:30:00<10:00:51, 3.32it/s] 68%|██████▊ | 251743/371472 [9:30:01<9:59:38, 3.33it/s] 68%|██████▊ | 251744/371472 [9:30:01<10:23:00, 3.20it/s] 68%|██████▊ | 251745/371472 [9:30:01<10:50:17, 3.07it/s] 68%|██████▊ | 251746/371472 [9:30:02<10:46:16, 3.09it/s] 68%|██████▊ | 251747/371472 [9:30:02<10:23:48, 3.20it/s] 68%|██████▊ | 251748/371472 [9:30:02<10:18:09, 3.23it/s] 68%|██████▊ | 251749/371472 [9:30:03<10:16:08, 3.24it/s] 68%|██████▊ | 251750/371472 [9:30:03<10:17:44, 3.23it/s] 68%|██████▊ | 251751/371472 [9:30:03<10:10:48, 3.27it/s] 68%|██████▊ | 251752/371472 [9:30:04<10:11:03, 3.27it/s] 68%|██████▊ | 251753/371472 [9:30:04<10:17:50, 3.23it/s] 68%|██████▊ | 251754/371472 [9:30:04<9:57:03, 3.34it/s] 68%|██████▊ | 251755/371472 [9:30:04<10:00:10, 3.32it/s] 68%|██████▊ | 251756/371472 [9:30:05<9:47:07, 3.40it/s] 68%|██████▊ | 251757/371472 [9:30:05<10:08:01, 3.28it/s] 68%|██████▊ | 251758/371472 [9:30:05<9:43:02, 3.42it/s] 68%|██████▊ | 251759/371472 [9:30:06<9:48:05, 3.39it/s] 68%|██████▊ | 251760/371472 [9:30:06<9:46:27, 3.40it/s] {'loss': 2.8107, 'learning_rate': 3.90193712426469e-07, 'epoch': 10.84} + 68%|██████▊ | 251760/371472 [9:30:06<9:46:27, 3.40it/s] 68%|██████▊ | 251761/371472 [9:30:06<10:00:26, 3.32it/s] 68%|██████▊ | 251762/371472 [9:30:07<10:08:16, 3.28it/s] 68%|██████▊ | 251763/371472 [9:30:07<9:42:30, 3.43it/s] 68%|██████▊ | 251764/371472 [9:30:07<9:44:37, 3.41it/s] 68%|██████▊ | 251765/371472 [9:30:07<9:44:06, 3.42it/s] 68%|██████▊ | 251766/371472 [9:30:08<9:38:41, 3.45it/s] 68%|██████▊ | 251767/371472 [9:30:08<9:52:42, 3.37it/s] 68%|██████▊ | 251768/371472 [9:30:08<9:44:36, 3.41it/s] 68%|██████▊ | 251769/371472 [9:30:09<10:22:57, 3.20it/s] 68%|██████▊ | 251770/371472 [9:30:09<10:19:28, 3.22it/s] 68%|██████▊ | 251771/371472 [9:30:09<9:55:53, 3.35it/s] 68%|██████▊ | 251772/371472 [9:30:10<9:59:01, 3.33it/s] 68%|██████▊ | 251773/371472 [9:30:10<10:31:28, 3.16it/s] 68%|██████▊ | 251774/371472 [9:30:10<10:13:21, 3.25it/s] 68%|██████▊ | 251775/371472 [9:30:10<10:09:34, 3.27it/s] 68%|██████▊ | 251776/371472 [9:30:11<10:00:40, 3.32it/s] 68%|██████▊ | 251777/371472 [9:30:11<9:51:45, 3.37it/s] 68%|██████▊ | 251778/371472 [9:30:11<10:07:05, 3.29it/s] 68%|██████▊ | 251779/371472 [9:30:12<10:04:18, 3.30it/s] 68%|██████▊ | 251780/371472 [9:30:12<9:44:29, 3.41it/s] {'loss': 2.6956, 'learning_rate': 3.901452304509901e-07, 'epoch': 10.84} + 68%|██████▊ | 251780/371472 [9:30:12<9:44:29, 3.41it/s] 68%|██████▊ | 251781/371472 [9:30:12<9:46:48, 3.40it/s] 68%|██████▊ | 251782/371472 [9:30:13<9:50:26, 3.38it/s] 68%|██████▊ | 251783/371472 [9:30:13<10:42:24, 3.11it/s] 68%|██████▊ | 251784/371472 [9:30:13<10:28:21, 3.17it/s] 68%|██████▊ | 251785/371472 [9:30:14<10:03:46, 3.30it/s] 68%|██████▊ | 251786/371472 [9:30:14<10:25:46, 3.19it/s] 68%|██████▊ | 251787/371472 [9:30:14<10:40:15, 3.12it/s] 68%|██████▊ | 251788/371472 [9:30:14<10:21:30, 3.21it/s] 68%|██████▊ | 251789/371472 [9:30:15<10:33:35, 3.15it/s] 68%|██████▊ | 251790/371472 [9:30:15<10:38:33, 3.12it/s] 68%|██████▊ | 251791/371472 [9:30:15<10:22:45, 3.20it/s] 68%|██████▊ | 251792/371472 [9:30:16<10:07:35, 3.28it/s] 68%|██████▊ | 251793/371472 [9:30:16<9:44:21, 3.41it/s] 68%|██████▊ | 251794/371472 [9:30:16<9:39:44, 3.44it/s] 68%|██████▊ | 251795/371472 [9:30:17<9:48:09, 3.39it/s] 68%|██████▊ | 251796/371472 [9:30:17<10:01:13, 3.32it/s] 68%|██████▊ | 251797/371472 [9:30:17<10:02:27, 3.31it/s] 68%|██████▊ | 251798/371472 [9:30:18<10:07:53, 3.28it/s] 68%|██████▊ | 251799/371472 [9:30:18<10:09:46, 3.27it/s] 68%|██████▊ | 251800/371472 [9:30:18<9:49:39, 3.38it/s] {'loss': 2.7122, 'learning_rate': 3.9009674847551116e-07, 'epoch': 10.85} + 68%|██████▊ | 251800/371472 [9:30:18<9:49:39, 3.38it/s] 68%|██████▊ | 251801/371472 [9:30:18<9:59:39, 3.33it/s] 68%|██████▊ | 251802/371472 [9:30:19<9:45:48, 3.40it/s] 68%|██████▊ | 251803/371472 [9:30:19<9:32:05, 3.49it/s] 68%|██████▊ | 251804/371472 [9:30:19<9:27:29, 3.51it/s] 68%|██████▊ | 251805/371472 [9:30:20<9:48:12, 3.39it/s] 68%|██████▊ | 251806/371472 [9:30:20<9:35:02, 3.47it/s] 68%|██████▊ | 251807/371472 [9:30:20<9:29:47, 3.50it/s] 68%|██████▊ | 251808/371472 [9:30:20<10:30:39, 3.16it/s] 68%|██████▊ | 251809/371472 [9:30:21<10:08:26, 3.28it/s] 68%|██████▊ | 251810/371472 [9:30:21<9:53:35, 3.36it/s] 68%|██████▊ | 251811/371472 [9:30:21<9:40:32, 3.44it/s] 68%|██████▊ | 251812/371472 [9:30:22<9:36:12, 3.46it/s] 68%|██████▊ | 251813/371472 [9:30:22<9:49:28, 3.38it/s] 68%|██████▊ | 251814/371472 [9:30:22<10:10:01, 3.27it/s] 68%|██████▊ | 251815/371472 [9:30:23<9:59:25, 3.33it/s] 68%|██████▊ | 251816/371472 [9:30:23<10:14:03, 3.25it/s] 68%|██████▊ | 251817/371472 [9:30:23<9:56:36, 3.34it/s] 68%|██████▊ | 251818/371472 [9:30:23<9:43:30, 3.42it/s] 68%|██████▊ | 251819/371472 [9:30:24<10:25:17, 3.19it/s] 68%|██████▊ | 251820/371472 [9:30:24<9:58:47, 3.33it/s] {'loss': 2.9492, 'learning_rate': 3.9004826650003233e-07, 'epoch': 10.85} + 68%|██████▊ | 251820/371472 [9:30:24<9:58:47, 3.33it/s] 68%|██████▊ | 251821/371472 [9:30:24<10:14:18, 3.25it/s] 68%|██████▊ | 251822/371472 [9:30:25<10:15:25, 3.24it/s] 68%|██████▊ | 251823/371472 [9:30:25<9:54:12, 3.36it/s] 68%|██████▊ | 251824/371472 [9:30:25<10:59:19, 3.02it/s] 68%|██████▊ | 251825/371472 [9:30:26<10:32:26, 3.15it/s] 68%|██████▊ | 251826/371472 [9:30:26<10:32:42, 3.15it/s] 68%|██████▊ | 251827/371472 [9:30:26<10:17:25, 3.23it/s] 68%|██████▊ | 251828/371472 [9:30:27<9:50:56, 3.37it/s] 68%|██████▊ | 251829/371472 [9:30:27<9:48:37, 3.39it/s] 68%|██████▊ | 251830/371472 [9:30:27<10:07:39, 3.28it/s] 68%|██████▊ | 251831/371472 [9:30:27<10:17:29, 3.23it/s] 68%|██████▊ | 251832/371472 [9:30:28<9:59:52, 3.32it/s] 68%|██████▊ | 251833/371472 [9:30:28<9:52:09, 3.37it/s] 68%|██████▊ | 251834/371472 [9:30:28<9:45:10, 3.41it/s] 68%|██████▊ | 251835/371472 [9:30:29<9:37:45, 3.45it/s] 68%|██████▊ | 251836/371472 [9:30:29<9:45:53, 3.40it/s] 68%|██████▊ | 251837/371472 [9:30:29<9:23:55, 3.54it/s] 68%|██████▊ | 251838/371472 [9:30:30<10:14:50, 3.24it/s] 68%|██████▊ | 251839/371472 [9:30:30<9:58:02, 3.33it/s] 68%|██████▊ | 251840/371472 [9:30:30<9:43:51, 3.42it/s] {'loss': 2.6983, 'learning_rate': 3.8999978452455335e-07, 'epoch': 10.85} + 68%|██████▊ | 251840/371472 [9:30:30<9:43:51, 3.42it/s] 68%|██████▊ | 251841/371472 [9:30:30<10:10:20, 3.27it/s] 68%|██████▊ | 251842/371472 [9:30:31<9:48:58, 3.39it/s] 68%|██████▊ | 251843/371472 [9:30:31<10:01:09, 3.32it/s] 68%|██████▊ | 251844/371472 [9:30:31<9:43:38, 3.42it/s] 68%|██████▊ | 251845/371472 [9:30:32<9:32:40, 3.48it/s] 68%|██████▊ | 251846/371472 [9:30:32<9:40:26, 3.43it/s] 68%|██████▊ | 251847/371472 [9:30:32<9:37:36, 3.45it/s] 68%|██████▊ | 251848/371472 [9:30:32<9:36:54, 3.46it/s] 68%|██████▊ | 251849/371472 [9:30:33<9:42:02, 3.43it/s] 68%|██████▊ | 251850/371472 [9:30:33<9:42:41, 3.42it/s] 68%|██████▊ | 251851/371472 [9:30:33<9:37:26, 3.45it/s] 68%|██████▊ | 251852/371472 [9:30:34<9:37:54, 3.45it/s] 68%|██████▊ | 251853/371472 [9:30:34<10:00:24, 3.32it/s] 68%|██████▊ | 251854/371472 [9:30:34<10:12:45, 3.25it/s] 68%|██████▊ | 251855/371472 [9:30:35<10:03:04, 3.31it/s] 68%|██████▊ | 251856/371472 [9:30:35<10:14:00, 3.25it/s] 68%|██████▊ | 251857/371472 [9:30:35<9:54:10, 3.36it/s] 68%|██████▊ | 251858/371472 [9:30:35<9:52:44, 3.36it/s] 68%|██████▊ | 251859/371472 [9:30:36<9:56:12, 3.34it/s] 68%|██████▊ | 251860/371472 [9:30:36<9:35:57, 3.46it/s] {'loss': 2.6794, 'learning_rate': 3.8995130254907453e-07, 'epoch': 10.85} + 68%|██████▊ | 251860/371472 [9:30:36<9:35:57, 3.46it/s] 68%|██████▊ | 251861/371472 [9:30:36<9:32:32, 3.48it/s] 68%|██████▊ | 251862/371472 [9:30:37<9:38:36, 3.45it/s] 68%|██████▊ | 251863/371472 [9:30:37<9:29:11, 3.50it/s] 68%|██████▊ | 251864/371472 [9:30:37<9:33:19, 3.48it/s] 68%|██████▊ | 251865/371472 [9:30:37<9:36:28, 3.46it/s] 68%|██████▊ | 251866/371472 [9:30:38<9:51:01, 3.37it/s] 68%|██████▊ | 251867/371472 [9:30:38<10:37:11, 3.13it/s] 68%|██████▊ | 251868/371472 [9:30:38<10:14:55, 3.24it/s] 68%|██████▊ | 251869/371472 [9:30:39<9:53:06, 3.36it/s] 68%|██████▊ | 251870/371472 [9:30:39<9:48:25, 3.39it/s] 68%|██████▊ | 251871/371472 [9:30:39<9:47:40, 3.39it/s] 68%|██████▊ | 251872/371472 [9:30:40<9:48:50, 3.39it/s] 68%|██████▊ | 251873/371472 [9:30:40<10:01:09, 3.32it/s] 68%|██████▊ | 251874/371472 [9:30:40<9:41:31, 3.43it/s] 68%|██████▊ | 251875/371472 [9:30:40<9:45:20, 3.41it/s] 68%|██████▊ | 251876/371472 [9:30:41<9:48:36, 3.39it/s] 68%|██████▊ | 251877/371472 [9:30:41<9:52:00, 3.37it/s] 68%|██████▊ | 251878/371472 [9:30:41<9:31:40, 3.49it/s] 68%|██████▊ | 251879/371472 [9:30:42<9:47:18, 3.39it/s] 68%|██████▊ | 251880/371472 [9:30:42<9:56:22, 3.34it/s] {'loss': 2.6689, 'learning_rate': 3.899028205735956e-07, 'epoch': 10.85} + 68%|██████▊ | 251880/371472 [9:30:42<9:56:22, 3.34it/s] 68%|██████▊ | 251881/371472 [9:30:42<9:44:31, 3.41it/s] 68%|██████▊ | 251882/371472 [9:30:42<9:36:25, 3.46it/s] 68%|██████▊ | 251883/371472 [9:30:43<9:36:38, 3.46it/s] 68%|██████▊ | 251884/371472 [9:30:43<10:30:17, 3.16it/s] 68%|██████▊ | 251885/371472 [9:30:43<10:23:40, 3.20it/s] 68%|██████▊ | 251886/371472 [9:30:44<10:29:25, 3.17it/s] 68%|██████▊ | 251887/371472 [9:30:44<10:09:58, 3.27it/s] 68%|██████▊ | 251888/371472 [9:30:44<10:22:57, 3.20it/s] 68%|██████▊ | 251889/371472 [9:30:45<10:12:24, 3.25it/s] 68%|██████▊ | 251890/371472 [9:30:45<10:28:21, 3.17it/s] 68%|██████▊ | 251891/371472 [9:30:45<10:20:26, 3.21it/s] 68%|██████▊ | 251892/371472 [9:30:46<9:58:34, 3.33it/s] 68%|██████▊ | 251893/371472 [9:30:46<9:48:24, 3.39it/s] 68%|██████▊ | 251894/371472 [9:30:46<10:02:25, 3.31it/s] 68%|██████▊ | 251895/371472 [9:30:47<9:59:19, 3.33it/s] 68%|██████▊ | 251896/371472 [9:30:47<9:48:07, 3.39it/s] 68%|██████▊ | 251897/371472 [9:30:47<9:36:10, 3.46it/s] 68%|██████▊ | 251898/371472 [9:30:47<10:09:18, 3.27it/s] 68%|██████▊ | 251899/371472 [9:30:48<9:58:29, 3.33it/s] 68%|██████▊ | 251900/371472 [9:30:48<13:41:48, 2.42it/s] {'loss': 2.717, 'learning_rate': 3.898543385981167e-07, 'epoch': 10.85} + 68%|██████▊ | 251900/371472 [9:30:48<13:41:48, 2.42it/s] 68%|██████▊ | 251901/371472 [9:30:49<12:25:11, 2.67it/s] 68%|██████▊ | 251902/371472 [9:30:49<11:49:31, 2.81it/s] 68%|██████▊ | 251903/371472 [9:30:49<11:41:52, 2.84it/s] 68%|██████▊ | 251904/371472 [9:30:50<11:14:07, 2.96it/s] 68%|██████▊ | 251905/371472 [9:30:50<11:23:12, 2.92it/s] 68%|██████▊ | 251906/371472 [9:30:50<10:50:51, 3.06it/s] 68%|██████▊ | 251907/371472 [9:30:51<10:32:08, 3.15it/s] 68%|██████▊ | 251908/371472 [9:30:51<10:04:48, 3.29it/s] 68%|██████▊ | 251909/371472 [9:30:51<9:44:00, 3.41it/s] 68%|██████▊ | 251910/371472 [9:30:51<10:42:59, 3.10it/s] 68%|██████▊ | 251911/371472 [9:30:52<10:31:18, 3.16it/s] 68%|██████▊ | 251912/371472 [9:30:52<10:31:55, 3.15it/s] 68%|██████▊ | 251913/371472 [9:30:52<10:19:35, 3.22it/s] 68%|██████▊ | 251914/371472 [9:30:53<10:11:20, 3.26it/s] 68%|██████▊ | 251915/371472 [9:30:53<10:10:51, 3.26it/s] 68%|██████▊ | 251916/371472 [9:30:53<10:02:51, 3.31it/s] 68%|██████▊ | 251917/371472 [9:30:54<10:00:24, 3.32it/s] 68%|██████▊ | 251918/371472 [9:30:54<9:48:05, 3.39it/s] 68%|██████▊ | 251919/371472 [9:30:54<9:41:51, 3.42it/s] 68%|██████▊ | 251920/371472 [9:30:54<9:40:32, 3.43it/s] {'loss': 2.7677, 'learning_rate': 3.898058566226378e-07, 'epoch': 10.85} + 68%|██████▊ | 251920/371472 [9:30:54<9:40:32, 3.43it/s] 68%|██████▊ | 251921/371472 [9:30:55<9:37:01, 3.45it/s] 68%|██████▊ | 251922/371472 [9:30:55<10:01:42, 3.31it/s] 68%|██████▊ | 251923/371472 [9:30:55<9:56:18, 3.34it/s] 68%|██████▊ | 251924/371472 [9:30:56<9:52:06, 3.37it/s] 68%|██████▊ | 251925/371472 [9:30:56<9:43:25, 3.42it/s] 68%|██████▊ | 251926/371472 [9:30:56<9:32:43, 3.48it/s] 68%|██████▊ | 251927/371472 [9:30:57<9:37:32, 3.45it/s] 68%|██████▊ | 251928/371472 [9:30:57<9:47:00, 3.39it/s] 68%|██████▊ | 251929/371472 [9:30:57<9:37:02, 3.45it/s] 68%|██████▊ | 251930/371472 [9:30:57<9:32:22, 3.48it/s] 68%|██████▊ | 251931/371472 [9:30:58<9:42:31, 3.42it/s] 68%|██████▊ | 251932/371472 [9:30:58<9:41:28, 3.43it/s] 68%|██████▊ | 251933/371472 [9:30:58<9:29:10, 3.50it/s] 68%|██████▊ | 251934/371472 [9:30:59<9:41:03, 3.43it/s] 68%|██████▊ | 251935/371472 [9:30:59<9:34:36, 3.47it/s] 68%|██████▊ | 251936/371472 [9:30:59<9:53:36, 3.36it/s] 68%|██████▊ | 251937/371472 [9:30:59<9:35:03, 3.46it/s] 68%|██████▊ | 251938/371472 [9:31:00<10:08:14, 3.28it/s] 68%|██████▊ | 251939/371472 [9:31:00<11:04:03, 3.00it/s] 68%|██████▊ | 251940/371472 [9:31:00<10:29:03, 3.17it/s] {'loss': 2.7662, 'learning_rate': 3.89757374647159e-07, 'epoch': 10.85} + 68%|██████▊ | 251940/371472 [9:31:00<10:29:03, 3.17it/s] 68%|██████▊ | 251941/371472 [9:31:01<10:12:05, 3.25it/s] 68%|██████▊ | 251942/371472 [9:31:01<9:43:02, 3.42it/s] 68%|██████▊ | 251943/371472 [9:31:01<9:34:49, 3.47it/s] 68%|██████▊ | 251944/371472 [9:31:02<10:39:43, 3.11it/s] 68%|██████▊ | 251945/371472 [9:31:02<10:22:45, 3.20it/s] 68%|██████▊ | 251946/371472 [9:31:02<10:18:14, 3.22it/s] 68%|██████▊ | 251947/371472 [9:31:03<10:19:00, 3.22it/s] 68%|██████▊ | 251948/371472 [9:31:03<10:11:34, 3.26it/s] 68%|██████▊ | 251949/371472 [9:31:03<10:06:55, 3.28it/s] 68%|██████▊ | 251950/371472 [9:31:03<10:00:18, 3.32it/s] 68%|██████▊ | 251951/371472 [9:31:04<10:00:50, 3.32it/s] 68%|██████▊ | 251952/371472 [9:31:04<9:49:10, 3.38it/s] 68%|██████▊ | 251953/371472 [9:31:04<9:44:02, 3.41it/s] 68%|██████▊ | 251954/371472 [9:31:05<10:34:03, 3.14it/s] 68%|██████▊ | 251955/371472 [9:31:05<10:46:28, 3.08it/s] 68%|██████▊ | 251956/371472 [9:31:05<10:19:43, 3.21it/s] 68%|██████▊ | 251957/371472 [9:31:06<11:12:03, 2.96it/s] 68%|██████▊ | 251958/371472 [9:31:06<10:40:21, 3.11it/s] 68%|██████▊ | 251959/371472 [9:31:06<10:19:44, 3.21it/s] 68%|██████▊ | 251960/371472 [9:31:07<10:31:22, 3.15it/s] {'loss': 2.6663, 'learning_rate': 3.8970889267168e-07, 'epoch': 10.85} + 68%|██████▊ | 251960/371472 [9:31:07<10:31:22, 3.15it/s] 68%|██████▊ | 251961/371472 [9:31:07<10:16:41, 3.23it/s] 68%|██████▊ | 251962/371472 [9:31:07<10:25:52, 3.18it/s] 68%|██████▊ | 251963/371472 [9:31:08<10:30:47, 3.16it/s] 68%|██████▊ | 251964/371472 [9:31:08<10:34:00, 3.14it/s] 68%|██████▊ | 251965/371472 [9:31:08<10:06:32, 3.28it/s] 68%|██████▊ | 251966/371472 [9:31:09<10:43:48, 3.09it/s] 68%|██████▊ | 251967/371472 [9:31:09<10:17:13, 3.23it/s] 68%|██████▊ | 251968/371472 [9:31:09<10:00:58, 3.31it/s] 68%|██████▊ | 251969/371472 [9:31:09<9:52:30, 3.36it/s] 68%|██████▊ | 251970/371472 [9:31:10<9:59:36, 3.32it/s] 68%|██████▊ | 251971/371472 [9:31:10<9:48:37, 3.38it/s] 68%|██████▊ | 251972/371472 [9:31:10<9:37:25, 3.45it/s] 68%|██████▊ | 251973/371472 [9:31:11<10:14:19, 3.24it/s] 68%|██████▊ | 251974/371472 [9:31:11<10:11:05, 3.26it/s] 68%|██████▊ | 251975/371472 [9:31:11<9:55:50, 3.34it/s] 68%|██████▊ | 251976/371472 [9:31:11<10:03:31, 3.30it/s] 68%|██████▊ | 251977/371472 [9:31:12<10:37:26, 3.12it/s] 68%|██████▊ | 251978/371472 [9:31:12<10:47:17, 3.08it/s] 68%|██████▊ | 251979/371472 [9:31:12<10:43:51, 3.09it/s] 68%|██████▊ | 251980/371472 [9:31:13<10:26:36, 3.18it/s] {'loss': 2.8586, 'learning_rate': 3.8966041069620117e-07, 'epoch': 10.85} + 68%|██████▊ | 251980/371472 [9:31:13<10:26:36, 3.18it/s] 68%|██████▊ | 251981/371472 [9:31:13<10:08:44, 3.27it/s] 68%|██████▊ | 251982/371472 [9:31:13<9:57:26, 3.33it/s] 68%|██████▊ | 251983/371472 [9:31:14<10:06:24, 3.28it/s] 68%|██████▊ | 251984/371472 [9:31:14<10:01:16, 3.31it/s] 68%|██████▊ | 251985/371472 [9:31:14<9:59:14, 3.32it/s] 68%|██████▊ | 251986/371472 [9:31:15<9:49:32, 3.38it/s] 68%|██████▊ | 251987/371472 [9:31:15<9:44:09, 3.41it/s] 68%|██████▊ | 251988/371472 [9:31:15<9:30:53, 3.49it/s] 68%|██████▊ | 251989/371472 [9:31:15<9:35:30, 3.46it/s] 68%|██████▊ | 251990/371472 [9:31:16<9:31:13, 3.49it/s] 68%|██████▊ | 251991/371472 [9:31:16<9:21:19, 3.55it/s] 68%|██████▊ | 251992/371472 [9:31:16<9:15:57, 3.58it/s] 68%|██████▊ | 251993/371472 [9:31:17<9:09:37, 3.62it/s] 68%|██████▊ | 251994/371472 [9:31:17<9:09:16, 3.63it/s] 68%|██████▊ | 251995/371472 [9:31:17<9:14:40, 3.59it/s] 68%|██████▊ | 251996/371472 [9:31:17<9:26:46, 3.51it/s] 68%|██████▊ | 251997/371472 [9:31:18<9:35:24, 3.46it/s] 68%|██████▊ | 251998/371472 [9:31:18<9:41:38, 3.42it/s] 68%|██████▊ | 251999/371472 [9:31:18<9:38:26, 3.44it/s] 68%|██████▊ | 252000/371472 [9:31:19<9:44:37, 3.41it/s] {'loss': 2.632, 'learning_rate': 3.8961192872072224e-07, 'epoch': 10.85} + 68%|██████▊ | 252000/371472 [9:31:19<9:44:37, 3.41it/s] 68%|██████▊ | 252001/371472 [9:31:19<9:43:25, 3.41it/s] 68%|██████▊ | 252002/371472 [9:31:19<9:40:33, 3.43it/s] 68%|██████▊ | 252003/371472 [9:31:19<9:35:55, 3.46it/s] 68%|██████▊ | 252004/371472 [9:31:20<9:34:43, 3.46it/s] 68%|██████▊ | 252005/371472 [9:31:20<9:39:34, 3.44it/s] 68%|██████▊ | 252006/371472 [9:31:20<9:47:41, 3.39it/s] 68%|██████▊ | 252007/371472 [9:31:21<9:54:57, 3.35it/s] 68%|██████▊ | 252008/371472 [9:31:21<9:55:55, 3.34it/s] 68%|██████▊ | 252009/371472 [9:31:21<9:58:37, 3.33it/s] 68%|██████▊ | 252010/371472 [9:31:22<9:56:09, 3.34it/s] 68%|██████▊ | 252011/371472 [9:31:22<11:13:51, 2.95it/s] 68%|██████▊ | 252012/371472 [9:31:22<11:08:35, 2.98it/s] 68%|██████▊ | 252013/371472 [9:31:23<10:50:24, 3.06it/s] 68%|██████▊ | 252014/371472 [9:31:23<10:15:12, 3.24it/s] 68%|██████▊ | 252015/371472 [9:31:23<10:10:04, 3.26it/s] 68%|██████▊ | 252016/371472 [9:31:23<9:48:37, 3.38it/s] 68%|██████▊ | 252017/371472 [9:31:24<10:58:12, 3.02it/s] 68%|██████▊ | 252018/371472 [9:31:24<10:39:19, 3.11it/s] 68%|██████▊ | 252019/371472 [9:31:24<10:12:16, 3.25it/s] 68%|██████▊ | 252020/371472 [9:31:25<10:49:03, 3.07it/s] {'loss': 2.6612, 'learning_rate': 3.8956344674524337e-07, 'epoch': 10.85} + 68%|██████▊ | 252020/371472 [9:31:25<10:49:03, 3.07it/s] 68%|██████▊ | 252021/371472 [9:31:25<11:07:28, 2.98it/s] 68%|██████▊ | 252022/371472 [9:31:25<10:36:24, 3.13it/s] 68%|██████▊ | 252023/371472 [9:31:26<10:08:03, 3.27it/s] 68%|██████▊ | 252024/371472 [9:31:26<9:45:39, 3.40it/s] 68%|██████▊ | 252025/371472 [9:31:26<9:26:21, 3.52it/s] 68%|██████▊ | 252026/371472 [9:31:27<9:29:44, 3.49it/s] 68%|██████▊ | 252027/371472 [9:31:27<9:10:55, 3.61it/s] 68%|██████▊ | 252028/371472 [9:31:27<9:07:35, 3.64it/s] 68%|██████▊ | 252029/371472 [9:31:27<9:21:40, 3.54it/s] 68%|██████▊ | 252030/371472 [9:31:28<9:43:29, 3.41it/s] 68%|██████▊ | 252031/371472 [9:31:28<9:37:18, 3.45it/s] 68%|██████▊ | 252032/371472 [9:31:28<9:43:03, 3.41it/s] 68%|██████▊ | 252033/371472 [9:31:29<9:51:17, 3.37it/s] 68%|██████▊ | 252034/371472 [9:31:29<9:42:36, 3.42it/s] 68%|██████▊ | 252035/371472 [9:31:29<9:52:34, 3.36it/s] 68%|██████▊ | 252036/371472 [9:31:29<9:32:07, 3.48it/s] 68%|██████▊ | 252037/371472 [9:31:30<9:42:26, 3.42it/s] 68%|██████▊ | 252038/371472 [9:31:30<9:24:04, 3.53it/s] 68%|██████▊ | 252039/371472 [9:31:30<9:40:42, 3.43it/s] 68%|██████▊ | 252040/371472 [9:31:31<9:44:38, 3.40it/s] {'loss': 2.8107, 'learning_rate': 3.8951496476976444e-07, 'epoch': 10.86} + 68%|██████▊ | 252040/371472 [9:31:31<9:44:38, 3.40it/s] 68%|██████▊ | 252041/371472 [9:31:31<9:39:28, 3.44it/s] 68%|██████▊ | 252042/371472 [9:31:31<9:23:41, 3.53it/s] 68%|██████▊ | 252043/371472 [9:31:31<9:25:52, 3.52it/s] 68%|██████▊ | 252044/371472 [9:31:32<9:21:11, 3.55it/s] 68%|██████▊ | 252045/371472 [9:31:32<9:23:21, 3.53it/s] 68%|██████▊ | 252046/371472 [9:31:32<9:19:34, 3.56it/s] 68%|██████▊ | 252047/371472 [9:31:33<9:30:14, 3.49it/s] 68%|██████▊ | 252048/371472 [9:31:33<9:40:56, 3.43it/s] 68%|██████▊ | 252049/371472 [9:31:33<10:01:53, 3.31it/s] 68%|██████▊ | 252050/371472 [9:31:34<10:17:40, 3.22it/s] 68%|██████▊ | 252051/371472 [9:31:34<10:38:40, 3.12it/s] 68%|██████▊ | 252052/371472 [9:31:34<10:46:00, 3.08it/s] 68%|██████▊ | 252053/371472 [9:31:35<10:49:07, 3.07it/s] 68%|██████▊ | 252054/371472 [9:31:35<10:29:52, 3.16it/s] 68%|██████▊ | 252055/371472 [9:31:35<10:06:30, 3.28it/s] 68%|██████▊ | 252056/371472 [9:31:35<9:58:49, 3.32it/s] 68%|██████▊ | 252057/371472 [9:31:36<10:10:19, 3.26it/s] 68%|██████▊ | 252058/371472 [9:31:36<10:13:45, 3.24it/s] 68%|██████▊ | 252059/371472 [9:31:36<10:03:36, 3.30it/s] 68%|██████▊ | 252060/371472 [9:31:37<9:49:58, 3.37it/s] {'loss': 2.6739, 'learning_rate': 3.894664827942856e-07, 'epoch': 10.86} + 68%|██████▊ | 252060/371472 [9:31:37<9:49:58, 3.37it/s] 68%|██████▊ | 252061/371472 [9:31:37<10:07:00, 3.28it/s] 68%|██████▊ | 252062/371472 [9:31:37<10:12:43, 3.25it/s] 68%|██████▊ | 252063/371472 [9:31:38<10:07:47, 3.27it/s] 68%|██████▊ | 252064/371472 [9:31:38<9:54:45, 3.35it/s] 68%|██████▊ | 252065/371472 [9:31:38<9:55:03, 3.34it/s] 68%|██████▊ | 252066/371472 [9:31:38<9:46:07, 3.40it/s] 68%|██████▊ | 252067/371472 [9:31:39<9:45:12, 3.40it/s] 68%|██████▊ | 252068/371472 [9:31:39<9:44:43, 3.40it/s] 68%|██████▊ | 252069/371472 [9:31:39<10:14:50, 3.24it/s] 68%|██████▊ | 252070/371472 [9:31:40<10:10:54, 3.26it/s] 68%|██████▊ | 252071/371472 [9:31:40<10:24:24, 3.19it/s] 68%|██████▊ | 252072/371472 [9:31:40<10:09:06, 3.27it/s] 68%|██████▊ | 252073/371472 [9:31:41<9:58:41, 3.32it/s] 68%|██████▊ | 252074/371472 [9:31:41<9:51:03, 3.37it/s] 68%|██████▊ | 252075/371472 [9:31:41<9:40:10, 3.43it/s] 68%|██████▊ | 252076/371472 [9:31:41<10:29:34, 3.16it/s] 68%|██████▊ | 252077/371472 [9:31:42<10:12:53, 3.25it/s] 68%|██████▊ | 252078/371472 [9:31:42<10:33:03, 3.14it/s] 68%|██████▊ | 252079/371472 [9:31:42<10:12:20, 3.25it/s] 68%|██████▊ | 252080/371472 [9:31:43<10:24:18, 3.19it/s] {'loss': 2.7777, 'learning_rate': 3.8941800081880663e-07, 'epoch': 10.86} + 68%|██████▊ | 252080/371472 [9:31:43<10:24:18, 3.19it/s] 68%|██████▊ | 252081/371472 [9:31:43<10:13:08, 3.25it/s] 68%|██████▊ | 252082/371472 [9:31:43<9:50:06, 3.37it/s] 68%|██████▊ | 252083/371472 [9:31:44<9:35:49, 3.46it/s] 68%|██████▊ | 252084/371472 [9:31:44<10:48:53, 3.07it/s] 68%|██████▊ | 252085/371472 [9:31:44<11:02:44, 3.00it/s] 68%|██████▊ | 252086/371472 [9:31:45<11:42:19, 2.83it/s] 68%|██████▊ | 252087/371472 [9:31:45<11:09:40, 2.97it/s] 68%|██████▊ | 252088/371472 [9:31:45<10:56:14, 3.03it/s] 68%|██████▊ | 252089/371472 [9:31:46<10:24:29, 3.19it/s] 68%|██████▊ | 252090/371472 [9:31:46<10:40:52, 3.10it/s] 68%|██████▊ | 252091/371472 [9:31:46<10:27:49, 3.17it/s] 68%|██████▊ | 252092/371472 [9:31:47<10:15:41, 3.23it/s] 68%|██████▊ | 252093/371472 [9:31:47<9:54:07, 3.35it/s] 68%|██████▊ | 252094/371472 [9:31:47<10:17:44, 3.22it/s] 68%|██████▊ | 252095/371472 [9:31:47<10:09:48, 3.26it/s] 68%|██████▊ | 252096/371472 [9:31:48<9:56:01, 3.34it/s] 68%|██████▊ | 252097/371472 [9:31:48<9:51:24, 3.36it/s] 68%|██████▊ | 252098/371472 [9:31:48<9:53:31, 3.35it/s] 68%|██████▊ | 252099/371472 [9:31:49<10:16:52, 3.23it/s] 68%|██████▊ | 252100/371472 [9:31:49<10:17:28, 3.22it/s] {'loss': 2.7407, 'learning_rate': 3.893695188433278e-07, 'epoch': 10.86} + 68%|██████▊ | 252100/371472 [9:31:49<10:17:28, 3.22it/s] 68%|██████▊ | 252101/371472 [9:31:49<10:02:38, 3.30it/s] 68%|██████▊ | 252102/371472 [9:31:50<9:50:52, 3.37it/s] 68%|██████▊ | 252103/371472 [9:31:50<9:46:10, 3.39it/s] 68%|██████▊ | 252104/371472 [9:31:50<9:46:13, 3.39it/s] 68%|██████▊ | 252105/371472 [9:31:50<9:21:10, 3.55it/s] 68%|██████▊ | 252106/371472 [9:31:51<9:35:27, 3.46it/s] 68%|██████▊ | 252107/371472 [9:31:51<9:30:19, 3.49it/s] 68%|██████▊ | 252108/371472 [9:31:51<9:19:40, 3.55it/s] 68%|██████▊ | 252109/371472 [9:31:52<9:29:15, 3.49it/s] 68%|██████▊ | 252110/371472 [9:31:52<9:29:16, 3.49it/s] 68%|██████▊ | 252111/371472 [9:31:52<10:18:32, 3.22it/s] 68%|██████▊ | 252112/371472 [9:31:52<10:10:47, 3.26it/s] 68%|██████▊ | 252113/371472 [9:31:53<10:16:27, 3.23it/s] 68%|██████▊ | 252114/371472 [9:31:53<10:19:16, 3.21it/s] 68%|██████▊ | 252115/371472 [9:31:53<10:14:25, 3.24it/s] 68%|██████▊ | 252116/371472 [9:31:54<10:13:05, 3.24it/s] 68%|██████▊ | 252117/371472 [9:31:54<10:51:50, 3.05it/s] 68%|██████▊ | 252118/371472 [9:31:54<10:37:37, 3.12it/s] 68%|██████▊ | 252119/371472 [9:31:55<10:23:00, 3.19it/s] 68%|██████▊ | 252120/371472 [9:31:55<10:01:31, 3.31it/s] {'loss': 2.6793, 'learning_rate': 3.893210368678489e-07, 'epoch': 10.86} + 68%|██████▊ | 252120/371472 [9:31:55<10:01:31, 3.31it/s] 68%|██████▊ | 252121/371472 [9:31:55<10:03:25, 3.30it/s] 68%|██████▊ | 252122/371472 [9:31:56<9:50:57, 3.37it/s] 68%|██████▊ | 252123/371472 [9:31:56<9:55:39, 3.34it/s] 68%|██████▊ | 252124/371472 [9:31:56<10:02:17, 3.30it/s] 68%|██████▊ | 252125/371472 [9:31:56<9:53:14, 3.35it/s] 68%|██████▊ | 252126/371472 [9:31:57<10:22:15, 3.20it/s] 68%|██████▊ | 252127/371472 [9:31:57<10:23:15, 3.19it/s] 68%|██████▊ | 252128/371472 [9:31:57<10:06:06, 3.28it/s] 68%|██████▊ | 252129/371472 [9:31:58<10:06:25, 3.28it/s] 68%|██████▊ | 252130/371472 [9:31:58<9:53:34, 3.35it/s] 68%|██████▊ | 252131/371472 [9:31:58<9:53:31, 3.35it/s] 68%|██████▊ | 252132/371472 [9:31:59<9:50:20, 3.37it/s] 68%|██████▊ | 252133/371472 [9:31:59<10:22:31, 3.20it/s] 68%|██████▊ | 252134/371472 [9:31:59<10:07:44, 3.27it/s] 68%|██████▊ | 252135/371472 [9:32:00<10:32:05, 3.15it/s] 68%|██████▊ | 252136/371472 [9:32:00<10:35:29, 3.13it/s] 68%|██████▊ | 252137/371472 [9:32:00<10:17:21, 3.22it/s] 68%|██████▊ | 252138/371472 [9:32:00<10:10:59, 3.26it/s] 68%|██████▊ | 252139/371472 [9:32:01<10:33:09, 3.14it/s] 68%|██████▊ | 252140/371472 [9:32:01<10:29:39, 3.16it/s] {'loss': 2.5803, 'learning_rate': 3.8927255489237e-07, 'epoch': 10.86} + 68%|██████▊ | 252140/371472 [9:32:01<10:29:39, 3.16it/s] 68%|██████▊ | 252141/371472 [9:32:01<10:14:00, 3.24it/s] 68%|██████▊ | 252142/371472 [9:32:02<10:14:30, 3.24it/s] 68%|██████▊ | 252143/371472 [9:32:02<10:14:48, 3.23it/s] 68%|██████▊ | 252144/371472 [9:32:02<10:42:40, 3.09it/s] 68%|██████▊ | 252145/371472 [9:32:03<10:18:57, 3.21it/s] 68%|██████▊ | 252146/371472 [9:32:03<10:05:18, 3.29it/s] 68%|██████▊ | 252147/371472 [9:32:03<9:43:16, 3.41it/s] 68%|██████▊ | 252148/371472 [9:32:04<9:39:17, 3.43it/s] 68%|██████▊ | 252149/371472 [9:32:04<9:41:02, 3.42it/s] 68%|██████▊ | 252150/371472 [9:32:04<9:45:06, 3.40it/s] 68%|██████▊ | 252151/371472 [9:32:04<9:42:03, 3.42it/s] 68%|██████▊ | 252152/371472 [9:32:05<10:19:49, 3.21it/s] 68%|██████▊ | 252153/371472 [9:32:05<10:42:42, 3.09it/s] 68%|██████▊ | 252154/371472 [9:32:05<10:28:33, 3.16it/s] 68%|██████▊ | 252155/371472 [9:32:06<10:45:15, 3.08it/s] 68%|██████▊ | 252156/371472 [9:32:06<10:24:59, 3.18it/s] 68%|██████▊ | 252157/371472 [9:32:06<10:25:19, 3.18it/s] 68%|██████▊ | 252158/371472 [9:32:07<10:10:04, 3.26it/s] 68%|██████▊ | 252159/371472 [9:32:07<10:07:51, 3.27it/s] 68%|██████▊ | 252160/371472 [9:32:07<10:18:27, 3.22it/s] {'loss': 2.8207, 'learning_rate': 3.892240729168911e-07, 'epoch': 10.86} + 68%|██████▊ | 252160/371472 [9:32:07<10:18:27, 3.22it/s] 68%|██████▊ | 252161/371472 [9:32:08<10:08:55, 3.27it/s] 68%|██████▊ | 252162/371472 [9:32:08<10:41:10, 3.10it/s] 68%|██████▊ | 252163/371472 [9:32:08<10:27:57, 3.17it/s] 68%|██████▊ | 252164/371472 [9:32:09<10:32:49, 3.14it/s] 68%|██████▊ | 252165/371472 [9:32:09<10:18:54, 3.21it/s] 68%|██████▊ | 252166/371472 [9:32:09<10:18:18, 3.22it/s] 68%|██████▊ | 252167/371472 [9:32:09<10:13:25, 3.24it/s] 68%|██████▊ | 252168/371472 [9:32:10<10:07:23, 3.27it/s] 68%|██████▊ | 252169/371472 [9:32:10<10:01:30, 3.31it/s] 68%|██████▊ | 252170/371472 [9:32:10<9:58:54, 3.32it/s] 68%|██████▊ | 252171/371472 [9:32:11<10:00:51, 3.31it/s] 68%|██████▊ | 252172/371472 [9:32:11<9:45:21, 3.40it/s] 68%|██████▊ | 252173/371472 [9:32:11<10:18:44, 3.21it/s] 68%|██████▊ | 252174/371472 [9:32:12<9:58:56, 3.32it/s] 68%|██████▊ | 252175/371472 [9:32:12<10:00:28, 3.31it/s] 68%|██████▊ | 252176/371472 [9:32:12<9:37:41, 3.44it/s] 68%|██████▊ | 252177/371472 [9:32:12<10:09:39, 3.26it/s] 68%|██████▊ | 252178/371472 [9:32:13<10:20:41, 3.20it/s] 68%|██████▊ | 252179/371472 [9:32:13<10:25:50, 3.18it/s] 68%|██████▊ | 252180/371472 [9:32:13<9:55:54, 3.34it/s] {'loss': 2.7134, 'learning_rate': 3.8917559094141226e-07, 'epoch': 10.86} + 68%|██████▊ | 252180/371472 [9:32:13<9:55:54, 3.34it/s] 68%|██████▊ | 252181/371472 [9:32:14<9:55:39, 3.34it/s] 68%|██████▊ | 252182/371472 [9:32:14<10:30:20, 3.15it/s] 68%|██████▊ | 252183/371472 [9:32:14<10:55:03, 3.04it/s] 68%|██████▊ | 252184/371472 [9:32:15<10:37:11, 3.12it/s] 68%|██████▊ | 252185/371472 [9:32:15<10:33:32, 3.14it/s] 68%|██████▊ | 252186/371472 [9:32:15<10:20:16, 3.21it/s] 68%|██████▊ | 252187/371472 [9:32:16<10:52:08, 3.05it/s] 68%|██████▊ | 252188/371472 [9:32:16<10:26:42, 3.17it/s] 68%|██████▊ | 252189/371472 [9:32:16<10:19:01, 3.21it/s] 68%|██████▊ | 252190/371472 [9:32:17<10:08:55, 3.26it/s] 68%|██████▊ | 252191/371472 [9:32:17<10:14:22, 3.24it/s] 68%|██████▊ | 252192/371472 [9:32:17<10:15:52, 3.23it/s] 68%|██████▊ | 252193/371472 [9:32:18<10:25:49, 3.18it/s] 68%|██████▊ | 252194/371472 [9:32:18<10:32:06, 3.15it/s] 68%|██████▊ | 252195/371472 [9:32:18<10:45:15, 3.08it/s] 68%|██████▊ | 252196/371472 [9:32:18<10:23:14, 3.19it/s] 68%|██████▊ | 252197/371472 [9:32:19<10:03:20, 3.29it/s] 68%|██████▊ | 252198/371472 [9:32:19<10:05:10, 3.28it/s] 68%|██████▊ | 252199/371472 [9:32:19<10:17:24, 3.22it/s] 68%|██████▊ | 252200/371472 [9:32:20<10:12:12, 3.25it/s] {'loss': 2.4867, 'learning_rate': 3.8912710896593333e-07, 'epoch': 10.86} + 68%|██████▊ | 252200/371472 [9:32:20<10:12:12, 3.25it/s] 68%|██████▊ | 252201/371472 [9:32:20<10:20:42, 3.20it/s] 68%|██████▊ | 252202/371472 [9:32:20<10:13:31, 3.24it/s] 68%|██████▊ | 252203/371472 [9:32:21<10:18:05, 3.22it/s] 68%|██████▊ | 252204/371472 [9:32:21<10:05:46, 3.28it/s] 68%|██████▊ | 252205/371472 [9:32:21<10:11:14, 3.25it/s] 68%|██████▊ | 252206/371472 [9:32:22<10:11:10, 3.25it/s] 68%|██████▊ | 252207/371472 [9:32:22<10:07:41, 3.27it/s] 68%|██████▊ | 252208/371472 [9:32:22<10:12:15, 3.25it/s] 68%|██████▊ | 252209/371472 [9:32:22<10:14:50, 3.23it/s] 68%|██████▊ | 252210/371472 [9:32:23<10:02:59, 3.30it/s] 68%|██████▊ | 252211/371472 [9:32:23<10:07:20, 3.27it/s] 68%|██████▊ | 252212/371472 [9:32:23<10:20:31, 3.20it/s] 68%|██████▊ | 252213/371472 [9:32:24<11:05:54, 2.98it/s] 68%|██████▊ | 252214/371472 [9:32:24<10:42:37, 3.09it/s] 68%|██████▊ | 252215/371472 [9:32:24<10:42:42, 3.09it/s] 68%|██████▊ | 252216/371472 [9:32:25<10:37:26, 3.12it/s] 68%|██████▊ | 252217/371472 [9:32:25<10:23:47, 3.19it/s] 68%|██████▊ | 252218/371472 [9:32:25<10:26:55, 3.17it/s] 68%|██████▊ | 252219/371472 [9:32:26<10:19:16, 3.21it/s] 68%|██████▊ | 252220/371472 [9:32:26<10:15:55, 3.23it/s] {'loss': 2.6984, 'learning_rate': 3.8907862699045445e-07, 'epoch': 10.86} + 68%|██████▊ | 252220/371472 [9:32:26<10:15:55, 3.23it/s] 68%|██████▊ | 252221/371472 [9:32:26<10:13:14, 3.24it/s] 68%|██████▊ | 252222/371472 [9:32:27<10:04:35, 3.29it/s] 68%|██████▊ | 252223/371472 [9:32:27<9:58:35, 3.32it/s] 68%|██████▊ | 252224/371472 [9:32:27<10:05:44, 3.28it/s] 68%|██████▊ | 252225/371472 [9:32:27<10:15:52, 3.23it/s] 68%|██████▊ | 252226/371472 [9:32:28<10:29:04, 3.16it/s] 68%|██████▊ | 252227/371472 [9:32:28<10:15:28, 3.23it/s] 68%|██████▊ | 252228/371472 [9:32:28<10:20:27, 3.20it/s] 68%|██████▊ | 252229/371472 [9:32:29<10:08:42, 3.26it/s] 68%|██████▊ | 252230/371472 [9:32:29<10:39:47, 3.11it/s] 68%|██████▊ | 252231/371472 [9:32:29<10:25:36, 3.18it/s] 68%|██████▊ | 252232/371472 [9:32:30<10:19:45, 3.21it/s] 68%|██████▊ | 252233/371472 [9:32:30<9:59:48, 3.31it/s] 68%|██████▊ | 252234/371472 [9:32:30<10:31:29, 3.15it/s] 68%|██████▊ | 252235/371472 [9:32:31<10:17:07, 3.22it/s] 68%|██████▊ | 252236/371472 [9:32:31<10:11:23, 3.25it/s] 68%|██████▊ | 252237/371472 [9:32:31<9:50:56, 3.36it/s] 68%|██████▊ | 252238/371472 [9:32:31<9:36:46, 3.45it/s] 68%|██████▊ | 252239/371472 [9:32:32<9:37:23, 3.44it/s] 68%|██████▊ | 252240/371472 [9:32:32<9:33:55, 3.46it/s] {'loss': 2.7083, 'learning_rate': 3.890301450149755e-07, 'epoch': 10.86} + 68%|██████▊ | 252240/371472 [9:32:32<9:33:55, 3.46it/s] 68%|██████▊ | 252241/371472 [9:32:32<9:29:05, 3.49it/s] 68%|██████▊ | 252242/371472 [9:32:33<9:19:20, 3.55it/s] 68%|██████▊ | 252243/371472 [9:32:33<9:16:57, 3.57it/s] 68%|██████▊ | 252244/371472 [9:32:33<9:22:37, 3.53it/s] 68%|██████▊ | 252245/371472 [9:32:34<11:32:16, 2.87it/s] 68%|██████▊ | 252246/371472 [9:32:34<11:01:59, 3.00it/s] 68%|██████▊ | 252247/371472 [9:32:34<10:38:41, 3.11it/s] 68%|██████▊ | 252248/371472 [9:32:34<10:05:54, 3.28it/s] 68%|██████▊ | 252249/371472 [9:32:35<9:52:17, 3.35it/s] 68%|██████▊ | 252250/371472 [9:32:35<9:42:52, 3.41it/s] 68%|██████▊ | 252251/371472 [9:32:35<10:15:40, 3.23it/s] 68%|██████▊ | 252252/371472 [9:32:36<10:16:45, 3.22it/s] 68%|██████▊ | 252253/371472 [9:32:36<10:26:16, 3.17it/s] 68%|██████▊ | 252254/371472 [9:32:36<10:28:26, 3.16it/s] 68%|██████▊ | 252255/371472 [9:32:37<10:14:37, 3.23it/s] 68%|██████▊ | 252256/371472 [9:32:37<9:54:54, 3.34it/s] 68%|██████▊ | 252257/371472 [9:32:37<10:34:20, 3.13it/s] 68%|██████▊ | 252258/371472 [9:32:38<10:21:22, 3.20it/s] 68%|██████▊ | 252259/371472 [9:32:38<11:13:01, 2.95it/s] 68%|██████▊ | 252260/371472 [9:32:38<10:29:35, 3.16it/s] {'loss': 2.6565, 'learning_rate': 3.889816630394967e-07, 'epoch': 10.87} + 68%|██████▊ | 252260/371472 [9:32:38<10:29:35, 3.16it/s] 68%|██████▊ | 252261/371472 [9:32:39<10:31:54, 3.14it/s] 68%|██████▊ | 252262/371472 [9:32:39<10:19:43, 3.21it/s] 68%|██████▊ | 252263/371472 [9:32:39<10:07:23, 3.27it/s] 68%|██████▊ | 252264/371472 [9:32:39<10:04:58, 3.28it/s] 68%|██████▊ | 252265/371472 [9:32:40<9:54:28, 3.34it/s] 68%|██████▊ | 252266/371472 [9:32:40<9:48:10, 3.38it/s] 68%|██████▊ | 252267/371472 [9:32:40<9:47:18, 3.38it/s] 68%|██████▊ | 252268/371472 [9:32:41<9:30:26, 3.48it/s] 68%|██████▊ | 252269/371472 [9:32:41<10:09:04, 3.26it/s] 68%|██████▊ | 252270/371472 [9:32:41<10:08:10, 3.27it/s] 68%|██████▊ | 252271/371472 [9:32:42<10:32:15, 3.14it/s] 68%|██████▊ | 252272/371472 [9:32:42<10:05:20, 3.28it/s] 68%|██████▊ | 252273/371472 [9:32:42<10:47:11, 3.07it/s] 68%|██████▊ | 252274/371472 [9:32:43<10:25:02, 3.18it/s] 68%|██████▊ | 252275/371472 [9:32:43<10:04:44, 3.29it/s] 68%|██████▊ | 252276/371472 [9:32:43<9:57:57, 3.32it/s] 68%|██████▊ | 252277/371472 [9:32:43<9:52:32, 3.35it/s] 68%|██████▊ | 252278/371472 [9:32:44<9:41:33, 3.42it/s] 68%|██████▊ | 252279/371472 [9:32:44<9:41:46, 3.41it/s] 68%|██████▊ | 252280/371472 [9:32:44<9:35:32, 3.45it/s] {'loss': 2.6843, 'learning_rate': 3.889331810640177e-07, 'epoch': 10.87} + 68%|██████▊ | 252280/371472 [9:32:44<9:35:32, 3.45it/s] 68%|██████▊ | 252281/371472 [9:32:45<9:31:51, 3.47it/s] 68%|██████▊ | 252282/371472 [9:32:45<9:56:16, 3.33it/s] 68%|██████▊ | 252283/371472 [9:32:45<9:42:45, 3.41it/s] 68%|██████▊ | 252284/371472 [9:32:45<9:26:13, 3.51it/s] 68%|██████▊ | 252285/371472 [9:32:46<9:29:27, 3.49it/s] 68%|██████▊ | 252286/371472 [9:32:46<10:36:49, 3.12it/s] 68%|██████▊ | 252287/371472 [9:32:46<10:23:33, 3.19it/s] 68%|██████▊ | 252288/371472 [9:32:47<10:20:20, 3.20it/s] 68%|██████▊ | 252289/371472 [9:32:47<10:01:36, 3.30it/s] 68%|██████▊ | 252290/371472 [9:32:47<9:42:04, 3.41it/s] 68%|██████▊ | 252291/371472 [9:32:48<10:01:05, 3.30it/s] 68%|██████▊ | 252292/371472 [9:32:48<10:11:40, 3.25it/s] 68%|██████▊ | 252293/371472 [9:32:48<10:14:33, 3.23it/s] 68%|██████▊ | 252294/371472 [9:32:49<10:00:37, 3.31it/s] 68%|██████▊ | 252295/371472 [9:32:49<9:54:34, 3.34it/s] 68%|██████▊ | 252296/371472 [9:32:49<10:03:21, 3.29it/s] 68%|██████▊ | 252297/371472 [9:32:49<9:45:08, 3.39it/s] 68%|██████▊ | 252298/371472 [9:32:50<9:46:37, 3.39it/s] 68%|██████▊ | 252299/371472 [9:32:50<9:24:31, 3.52it/s] 68%|██████▊ | 252300/371472 [9:32:50<9:14:10, 3.58it/s] {'loss': 2.622, 'learning_rate': 3.888846990885389e-07, 'epoch': 10.87} + 68%|██████▊ | 252300/371472 [9:32:50<9:14:10, 3.58it/s] 68%|██████▊ | 252301/371472 [9:32:51<9:23:10, 3.53it/s] 68%|██████▊ | 252302/371472 [9:32:51<9:26:33, 3.51it/s] 68%|██████▊ | 252303/371472 [9:32:51<9:35:13, 3.45it/s] 68%|██████▊ | 252304/371472 [9:32:51<9:21:14, 3.54it/s] 68%|██████▊ | 252305/371472 [9:32:52<9:45:17, 3.39it/s] 68%|██████▊ | 252306/371472 [9:32:52<10:18:37, 3.21it/s] 68%|██████▊ | 252307/371472 [9:32:52<9:52:03, 3.35it/s] 68%|██████▊ | 252308/371472 [9:32:53<11:21:16, 2.92it/s] 68%|██████▊ | 252309/371472 [9:32:53<10:49:31, 3.06it/s] 68%|██████▊ | 252310/371472 [9:32:53<10:30:22, 3.15it/s] 68%|██████▊ | 252311/371472 [9:32:54<10:15:18, 3.23it/s] 68%|██████▊ | 252312/371472 [9:32:54<10:02:22, 3.30it/s] 68%|██████▊ | 252313/371472 [9:32:54<9:58:40, 3.32it/s] 68%|██████▊ | 252314/371472 [9:32:55<9:55:59, 3.33it/s] 68%|██████▊ | 252315/371472 [9:32:55<9:57:58, 3.32it/s] 68%|██████▊ | 252316/371472 [9:32:55<9:55:22, 3.34it/s] 68%|██████▊ | 252317/371472 [9:32:55<9:43:36, 3.40it/s] 68%|██████▊ | 252318/371472 [9:32:56<9:23:36, 3.52it/s] 68%|██████▊ | 252319/371472 [9:32:56<9:41:25, 3.42it/s] 68%|██████▊ | 252320/371472 [9:32:56<9:50:43, 3.36it/s] {'loss': 2.6381, 'learning_rate': 3.8883621711305997e-07, 'epoch': 10.87} + 68%|██████▊ | 252320/371472 [9:32:56<9:50:43, 3.36it/s] 68%|██████▊ | 252321/371472 [9:32:57<9:45:54, 3.39it/s] 68%|██████▊ | 252322/371472 [9:32:57<9:47:46, 3.38it/s] 68%|██████▊ | 252323/371472 [9:32:57<9:44:34, 3.40it/s] 68%|██████▊ | 252324/371472 [9:32:57<9:32:01, 3.47it/s] 68%|██████▊ | 252325/371472 [9:32:58<9:20:01, 3.55it/s] 68%|██████▊ | 252326/371472 [9:32:58<9:29:28, 3.49it/s] 68%|██████▊ | 252327/371472 [9:32:58<9:40:48, 3.42it/s] 68%|██████▊ | 252328/371472 [9:32:59<9:58:14, 3.32it/s] 68%|██████▊ | 252329/371472 [9:32:59<10:10:29, 3.25it/s] 68%|██████▊ | 252330/371472 [9:32:59<10:08:24, 3.26it/s] 68%|██████▊ | 252331/371472 [9:33:00<10:12:54, 3.24it/s] 68%|██████▊ | 252332/371472 [9:33:00<9:53:15, 3.35it/s] 68%|██████▊ | 252333/371472 [9:33:00<10:15:07, 3.23it/s] 68%|██████▊ | 252334/371472 [9:33:01<10:32:36, 3.14it/s] 68%|██████▊ | 252335/371472 [9:33:01<10:21:15, 3.20it/s] 68%|██████▊ | 252336/371472 [9:33:01<10:11:07, 3.25it/s] 68%|██████▊ | 252337/371472 [9:33:01<10:12:36, 3.24it/s] 68%|██████▊ | 252338/371472 [9:33:02<10:06:03, 3.28it/s] 68%|██████▊ | 252339/371472 [9:33:02<9:51:40, 3.36it/s] 68%|██████▊ | 252340/371472 [9:33:02<9:49:39, 3.37it/s] {'loss': 2.7306, 'learning_rate': 3.88787735137581e-07, 'epoch': 10.87} + 68%|██████▊ | 252340/371472 [9:33:02<9:49:39, 3.37it/s] 68%|██████▊ | 252341/371472 [9:33:03<9:46:50, 3.38it/s] 68%|██████▊ | 252342/371472 [9:33:03<9:37:42, 3.44it/s] 68%|██████▊ | 252343/371472 [9:33:03<9:34:59, 3.45it/s] 68%|██████▊ | 252344/371472 [9:33:03<9:21:57, 3.53it/s] 68%|██████▊ | 252345/371472 [9:33:04<9:37:06, 3.44it/s] 68%|██████▊ | 252346/371472 [9:33:04<9:23:25, 3.52it/s] 68%|██████▊ | 252347/371472 [9:33:04<9:18:14, 3.56it/s] 68%|██████▊ | 252348/371472 [9:33:05<9:22:06, 3.53it/s] 68%|██████▊ | 252349/371472 [9:33:05<9:17:08, 3.56it/s] 68%|██████▊ | 252350/371472 [9:33:05<9:32:07, 3.47it/s] 68%|██████▊ | 252351/371472 [9:33:05<9:52:47, 3.35it/s] 68%|██████▊ | 252352/371472 [9:33:06<9:55:20, 3.33it/s] 68%|██████▊ | 252353/371472 [9:33:06<9:51:23, 3.36it/s] 68%|██████▊ | 252354/371472 [9:33:06<9:51:58, 3.35it/s] 68%|██████▊ | 252355/371472 [9:33:07<9:36:51, 3.44it/s] 68%|██████▊ | 252356/371472 [9:33:07<9:41:17, 3.42it/s] 68%|██████▊ | 252357/371472 [9:33:07<9:33:28, 3.46it/s] 68%|██████▊ | 252358/371472 [9:33:08<9:43:35, 3.40it/s] 68%|██████▊ | 252359/371472 [9:33:08<9:30:03, 3.48it/s] 68%|██████▊ | 252360/371472 [9:33:08<9:30:14, 3.48it/s] {'loss': 2.6979, 'learning_rate': 3.8873925316210216e-07, 'epoch': 10.87} + 68%|██████▊ | 252360/371472 [9:33:08<9:30:14, 3.48it/s] 68%|██████▊ | 252361/371472 [9:33:08<9:33:24, 3.46it/s] 68%|██████▊ | 252362/371472 [9:33:09<9:26:34, 3.50it/s] 68%|██████▊ | 252363/371472 [9:33:09<9:34:00, 3.46it/s] 68%|██████▊ | 252364/371472 [9:33:09<9:38:52, 3.43it/s] 68%|██████▊ | 252365/371472 [9:33:10<14:12:06, 2.33it/s] 68%|██████▊ | 252366/371472 [9:33:10<13:19:11, 2.48it/s] 68%|██████▊ | 252367/371472 [9:33:11<12:11:19, 2.71it/s] 68%|██████▊ | 252368/371472 [9:33:11<11:26:09, 2.89it/s] 68%|██████▊ | 252369/371472 [9:33:11<10:54:04, 3.03it/s] 68%|██████▊ | 252370/371472 [9:33:11<10:22:05, 3.19it/s] 68%|██████▊ | 252371/371472 [9:33:12<10:07:30, 3.27it/s] 68%|██████▊ | 252372/371472 [9:33:12<10:02:49, 3.29it/s] 68%|██████▊ | 252373/371472 [9:33:12<10:08:33, 3.26it/s] 68%|██████▊ | 252374/371472 [9:33:13<9:58:31, 3.32it/s] 68%|██████▊ | 252375/371472 [9:33:13<10:00:24, 3.31it/s] 68%|██████▊ | 252376/371472 [9:33:13<10:12:25, 3.24it/s] 68%|██████▊ | 252377/371472 [9:33:14<10:54:10, 3.03it/s] 68%|██████▊ | 252378/371472 [9:33:14<10:40:58, 3.10it/s] 68%|██████▊ | 252379/371472 [9:33:14<10:15:28, 3.22it/s] 68%|██████▊ | 252380/371472 [9:33:15<9:58:48, 3.31it/s] {'loss': 2.6787, 'learning_rate': 3.8869077118662323e-07, 'epoch': 10.87} + 68%|██████▊ | 252380/371472 [9:33:15<9:58:48, 3.31it/s] 68%|██████▊ | 252381/371472 [9:33:15<10:29:45, 3.15it/s] 68%|██████▊ | 252382/371472 [9:33:15<10:35:44, 3.12it/s] 68%|██████▊ | 252383/371472 [9:33:16<10:16:38, 3.22it/s] 68%|██████▊ | 252384/371472 [9:33:16<10:03:17, 3.29it/s] 68%|██████▊ | 252385/371472 [9:33:16<10:13:20, 3.24it/s] 68%|██████▊ | 252386/371472 [9:33:16<10:06:37, 3.27it/s] 68%|██████▊ | 252387/371472 [9:33:17<10:02:41, 3.29it/s] 68%|██████▊ | 252388/371472 [9:33:17<9:55:41, 3.33it/s] 68%|██████▊ | 252389/371472 [9:33:17<10:12:26, 3.24it/s] 68%|██████▊ | 252390/371472 [9:33:18<10:35:56, 3.12it/s] 68%|██████▊ | 252391/371472 [9:33:18<10:24:59, 3.18it/s] 68%|██████▊ | 252392/371472 [9:33:18<10:10:20, 3.25it/s] 68%|██████▊ | 252393/371472 [9:33:19<9:52:24, 3.35it/s] 68%|██████▊ | 252394/371472 [9:33:19<9:58:19, 3.32it/s] 68%|██████▊ | 252395/371472 [9:33:19<10:29:41, 3.15it/s] 68%|██████▊ | 252396/371472 [9:33:20<10:24:27, 3.18it/s] 68%|██████▊ | 252397/371472 [9:33:20<10:16:28, 3.22it/s] 68%|██████▊ | 252398/371472 [9:33:20<9:57:54, 3.32it/s] 68%|██████▊ | 252399/371472 [9:33:20<10:24:55, 3.18it/s] 68%|██████▊ | 252400/371472 [9:33:21<10:03:37, 3.29it/s] {'loss': 2.7172, 'learning_rate': 3.8864228921114436e-07, 'epoch': 10.87} + 68%|██████▊ | 252400/371472 [9:33:21<10:03:37, 3.29it/s] 68%|██████▊ | 252401/371472 [9:33:21<10:23:17, 3.18it/s] 68%|██████▊ | 252402/371472 [9:33:21<9:59:21, 3.31it/s] 68%|██████▊ | 252403/371472 [9:33:22<10:05:50, 3.28it/s] 68%|██████▊ | 252404/371472 [9:33:22<9:49:38, 3.37it/s] 68%|██████▊ | 252405/371472 [9:33:22<9:52:01, 3.35it/s] 68%|██████▊ | 252406/371472 [9:33:23<9:51:41, 3.35it/s] 68%|██████▊ | 252407/371472 [9:33:23<9:39:29, 3.42it/s] 68%|██████▊ | 252408/371472 [9:33:23<9:54:02, 3.34it/s] 68%|██████▊ | 252409/371472 [9:33:23<10:10:51, 3.25it/s] 68%|██████▊ | 252410/371472 [9:33:24<10:20:19, 3.20it/s] 68%|██████▊ | 252411/371472 [9:33:24<10:48:03, 3.06it/s] 68%|██████▊ | 252412/371472 [9:33:24<10:26:23, 3.17it/s] 68%|██████▊ | 252413/371472 [9:33:25<10:10:16, 3.25it/s] 68%|██████▊ | 252414/371472 [9:33:25<10:01:33, 3.30it/s] 68%|██████▊ | 252415/371472 [9:33:25<9:58:37, 3.31it/s] 68%|██████▊ | 252416/371472 [9:33:26<10:06:16, 3.27it/s] 68%|██████▊ | 252417/371472 [9:33:26<9:52:16, 3.35it/s] 68%|██████▊ | 252418/371472 [9:33:26<9:46:42, 3.38it/s] 68%|██████▊ | 252419/371472 [9:33:27<9:54:44, 3.34it/s] 68%|██████▊ | 252420/371472 [9:33:27<9:35:22, 3.45it/s] {'loss': 2.636, 'learning_rate': 3.8859380723566543e-07, 'epoch': 10.87} + 68%|██████▊ | 252420/371472 [9:33:27<9:35:22, 3.45it/s] 68%|██████▊ | 252421/371472 [9:33:27<9:32:19, 3.47it/s] 68%|██████▊ | 252422/371472 [9:33:27<10:30:34, 3.15it/s] 68%|██████▊ | 252423/371472 [9:33:28<10:15:10, 3.23it/s] 68%|██████▊ | 252424/371472 [9:33:28<10:08:12, 3.26it/s] 68%|██████▊ | 252425/371472 [9:33:28<9:45:21, 3.39it/s] 68%|██████▊ | 252426/371472 [9:33:29<9:43:58, 3.40it/s] 68%|██████▊ | 252427/371472 [9:33:29<9:43:47, 3.40it/s] 68%|██████▊ | 252428/371472 [9:33:29<9:53:59, 3.34it/s] 68%|██████▊ | 252429/371472 [9:33:30<10:37:56, 3.11it/s] 68%|██████▊ | 252430/371472 [9:33:30<10:27:44, 3.16it/s] 68%|██████▊ | 252431/371472 [9:33:30<11:39:17, 2.84it/s] 68%|██████▊ | 252432/371472 [9:33:31<12:12:11, 2.71it/s] 68%|██████▊ | 252433/371472 [9:33:31<12:36:53, 2.62it/s] 68%|██████▊ | 252434/371472 [9:33:32<12:31:33, 2.64it/s] 68%|██████▊ | 252435/371472 [9:33:32<11:52:50, 2.78it/s] 68%|██████▊ | 252436/371472 [9:33:32<11:27:59, 2.88it/s] 68%|██████▊ | 252437/371472 [9:33:32<10:54:58, 3.03it/s] 68%|██████▊ | 252438/371472 [9:33:33<10:39:34, 3.10it/s] 68%|██████▊ | 252439/371472 [9:33:33<10:25:15, 3.17it/s] 68%|██████▊ | 252440/371472 [9:33:33<10:05:52, 3.27it/s] {'loss': 2.6099, 'learning_rate': 3.885453252601866e-07, 'epoch': 10.87} + 68%|██████▊ | 252440/371472 [9:33:33<10:05:52, 3.27it/s] 68%|██████▊ | 252441/371472 [9:33:34<10:05:41, 3.28it/s] 68%|██████▊ | 252442/371472 [9:33:34<10:06:01, 3.27it/s] 68%|██████▊ | 252443/371472 [9:33:34<9:58:48, 3.31it/s] 68%|██████▊ | 252444/371472 [9:33:35<10:10:18, 3.25it/s] 68%|██████▊ | 252445/371472 [9:33:35<10:05:43, 3.28it/s] 68%|██████▊ | 252446/371472 [9:33:35<10:02:58, 3.29it/s] 68%|██████▊ | 252447/371472 [9:33:35<9:52:14, 3.35it/s] 68%|██████▊ | 252448/371472 [9:33:36<9:56:19, 3.33it/s] 68%|██████▊ | 252449/371472 [9:33:36<9:48:38, 3.37it/s] 68%|██████▊ | 252450/371472 [9:33:36<9:37:13, 3.44it/s] 68%|██████▊ | 252451/371472 [9:33:37<10:17:34, 3.21it/s] 68%|██████▊ | 252452/371472 [9:33:37<9:52:37, 3.35it/s] 68%|██████▊ | 252453/371472 [9:33:37<9:57:41, 3.32it/s] 68%|██████▊ | 252454/371472 [9:33:38<10:06:20, 3.27it/s] 68%|██████▊ | 252455/371472 [9:33:38<9:52:15, 3.35it/s] 68%|██████▊ | 252456/371472 [9:33:38<10:07:58, 3.26it/s] 68%|██████▊ | 252457/371472 [9:33:38<9:57:31, 3.32it/s] 68%|██████▊ | 252458/371472 [9:33:39<10:02:09, 3.29it/s] 68%|██████▊ | 252459/371472 [9:33:39<9:51:54, 3.35it/s] 68%|██████▊ | 252460/371472 [9:33:39<10:14:16, 3.23it/s] {'loss': 2.6221, 'learning_rate': 3.884968432847077e-07, 'epoch': 10.87} + 68%|██████▊ | 252460/371472 [9:33:39<10:14:16, 3.23it/s] 68%|██████▊ | 252461/371472 [9:33:40<9:52:13, 3.35it/s] 68%|██████▊ | 252462/371472 [9:33:40<9:53:34, 3.34it/s] 68%|██████▊ | 252463/371472 [9:33:40<9:48:34, 3.37it/s] 68%|██████▊ | 252464/371472 [9:33:41<9:50:42, 3.36it/s] 68%|██████▊ | 252465/371472 [9:33:41<9:28:14, 3.49it/s] 68%|██████▊ | 252466/371472 [9:33:41<9:18:33, 3.55it/s] 68%|██████▊ | 252467/371472 [9:33:41<9:17:14, 3.56it/s] 68%|██████▊ | 252468/371472 [9:33:42<9:17:57, 3.55it/s] 68%|██████▊ | 252469/371472 [9:33:42<9:14:49, 3.57it/s] 68%|██████▊ | 252470/371472 [9:33:42<9:22:14, 3.53it/s] 68%|██████▊ | 252471/371472 [9:33:42<9:22:52, 3.52it/s] 68%|██████▊ | 252472/371472 [9:33:43<9:32:00, 3.47it/s] 68%|██████▊ | 252473/371472 [9:33:43<9:55:56, 3.33it/s] 68%|██████▊ | 252474/371472 [9:33:43<9:50:05, 3.36it/s] 68%|██████▊ | 252475/371472 [9:33:44<9:28:46, 3.49it/s] 68%|██████▊ | 252476/371472 [9:33:44<9:12:06, 3.59it/s] 68%|██████▊ | 252477/371472 [9:33:44<9:19:33, 3.54it/s] 68%|██████▊ | 252478/371472 [9:33:45<9:27:08, 3.50it/s] 68%|██████▊ | 252479/371472 [9:33:45<9:30:07, 3.48it/s] 68%|██████▊ | 252480/371472 [9:33:45<9:47:16, 3.38it/s] {'loss': 2.6044, 'learning_rate': 3.884483613092288e-07, 'epoch': 10.87} + 68%|██████▊ | 252480/371472 [9:33:45<9:47:16, 3.38it/s] 68%|██████▊ | 252481/371472 [9:33:45<9:50:23, 3.36it/s] 68%|██████▊ | 252482/371472 [9:33:46<9:40:11, 3.42it/s] 68%|██████▊ | 252483/371472 [9:33:46<9:31:08, 3.47it/s] 68%|██████▊ | 252484/371472 [9:33:46<9:17:42, 3.56it/s] 68%|██████▊ | 252485/371472 [9:33:47<9:39:37, 3.42it/s] 68%|██████▊ | 252486/371472 [9:33:47<9:30:38, 3.48it/s] 68%|██████▊ | 252487/371472 [9:33:47<9:24:57, 3.51it/s] 68%|██████▊ | 252488/371472 [9:33:47<9:36:04, 3.44it/s] 68%|██████▊ | 252489/371472 [9:33:48<9:35:18, 3.45it/s] 68%|██████▊ | 252490/371472 [9:33:48<9:47:30, 3.38it/s] 68%|██████▊ | 252491/371472 [9:33:48<9:42:41, 3.40it/s] 68%|██████▊ | 252492/371472 [9:33:49<9:54:14, 3.34it/s] 68%|██████▊ | 252493/371472 [9:33:49<9:37:45, 3.43it/s] 68%|██████▊ | 252494/371472 [9:33:49<9:21:54, 3.53it/s] 68%|██████▊ | 252495/371472 [9:33:49<9:39:13, 3.42it/s] 68%|██████▊ | 252496/371472 [9:33:50<9:26:56, 3.50it/s] 68%|██████▊ | 252497/371472 [9:33:50<9:57:51, 3.32it/s] 68%|██████▊ | 252498/371472 [9:33:50<9:46:57, 3.38it/s] 68%|██████▊ | 252499/371472 [9:33:51<9:48:55, 3.37it/s] 68%|██████▊ | 252500/371472 [9:33:51<9:47:37, 3.37it/s] {'loss': 2.7876, 'learning_rate': 3.8839987933374993e-07, 'epoch': 10.88} + 68%|██████▊ | 252500/371472 [9:33:51<9:47:37, 3.37it/s] 68%|██████▊ | 252501/371472 [9:33:51<9:48:17, 3.37it/s] 68%|██████▊ | 252502/371472 [9:33:52<9:56:30, 3.32it/s] 68%|██████▊ | 252503/371472 [9:33:52<10:13:39, 3.23it/s] 68%|██████▊ | 252504/371472 [9:33:52<9:52:48, 3.34it/s] 68%|██████▊ | 252505/371472 [9:33:52<9:46:42, 3.38it/s] 68%|██████▊ | 252506/371472 [9:33:53<9:37:25, 3.43it/s] 68%|██████▊ | 252507/371472 [9:33:53<9:39:28, 3.42it/s] 68%|██████▊ | 252508/371472 [9:33:53<9:44:39, 3.39it/s] 68%|██████▊ | 252509/371472 [9:33:54<9:36:04, 3.44it/s] 68%|██████▊ | 252510/371472 [9:33:54<9:38:59, 3.42it/s] 68%|██████▊ | 252511/371472 [9:33:54<9:43:15, 3.40it/s] 68%|██████▊ | 252512/371472 [9:33:54<9:40:42, 3.41it/s] 68%|██████▊ | 252513/371472 [9:33:55<9:34:16, 3.45it/s] 68%|██████▊ | 252514/371472 [9:33:55<9:26:53, 3.50it/s] 68%|██████▊ | 252515/371472 [9:33:55<9:25:01, 3.51it/s] 68%|██████▊ | 252516/371472 [9:33:56<10:01:08, 3.30it/s] 68%|██████▊ | 252517/371472 [9:33:56<9:42:39, 3.40it/s] 68%|██████▊ | 252518/371472 [9:33:56<9:28:26, 3.49it/s] 68%|██████▊ | 252519/371472 [9:33:57<9:32:57, 3.46it/s] 68%|██████▊ | 252520/371472 [9:33:57<9:21:42, 3.53it/s] {'loss': 2.8552, 'learning_rate': 3.8835139735827105e-07, 'epoch': 10.88} + 68%|██████▊ | 252520/371472 [9:33:57<9:21:42, 3.53it/s] 68%|██████▊ | 252521/371472 [9:33:57<9:08:52, 3.61it/s] 68%|██████▊ | 252522/371472 [9:33:57<9:15:18, 3.57it/s] 68%|██████▊ | 252523/371472 [9:33:58<9:44:11, 3.39it/s] 68%|██████▊ | 252524/371472 [9:33:58<9:51:20, 3.35it/s] 68%|██████▊ | 252525/371472 [9:33:58<9:56:41, 3.32it/s] 68%|██████▊ | 252526/371472 [9:33:59<10:08:02, 3.26it/s] 68%|██████▊ | 252527/371472 [9:33:59<10:17:28, 3.21it/s] 68%|██████▊ | 252528/371472 [9:33:59<10:12:24, 3.24it/s] 68%|██████▊ | 252529/371472 [9:34:00<10:04:35, 3.28it/s] 68%|██████▊ | 252530/371472 [9:34:00<9:42:39, 3.40it/s] 68%|██████▊ | 252531/371472 [9:34:00<9:42:51, 3.40it/s] 68%|██████▊ | 252532/371472 [9:34:00<9:36:06, 3.44it/s] 68%|██████▊ | 252533/371472 [9:34:01<9:28:17, 3.49it/s] 68%|██████▊ | 252534/371472 [9:34:01<9:40:40, 3.41it/s] 68%|██████▊ | 252535/371472 [9:34:01<9:32:18, 3.46it/s] 68%|██████▊ | 252536/371472 [9:34:02<9:38:18, 3.43it/s] 68%|██████▊ | 252537/371472 [9:34:02<9:41:04, 3.41it/s] 68%|██████▊ | 252538/371472 [9:34:02<9:45:21, 3.39it/s] 68%|██████▊ | 252539/371472 [9:34:02<9:38:08, 3.43it/s] 68%|██████▊ | 252540/371472 [9:34:03<9:42:25, 3.40it/s] {'loss': 2.7441, 'learning_rate': 3.8830291538279207e-07, 'epoch': 10.88} + 68%|██████▊ | 252540/371472 [9:34:03<9:42:25, 3.40it/s] 68%|██████▊ | 252541/371472 [9:34:03<10:57:42, 3.01it/s] 68%|██████▊ | 252542/371472 [9:34:03<10:29:44, 3.15it/s] 68%|██████▊ | 252543/371472 [9:34:04<10:13:34, 3.23it/s] 68%|██████▊ | 252544/371472 [9:34:04<9:55:32, 3.33it/s] 68%|██████▊ | 252545/371472 [9:34:04<10:00:42, 3.30it/s] 68%|██████▊ | 252546/371472 [9:34:05<10:18:14, 3.21it/s] 68%|██████▊ | 252547/371472 [9:34:05<9:51:07, 3.35it/s] 68%|██████▊ | 252548/371472 [9:34:05<9:41:22, 3.41it/s] 68%|██████▊ | 252549/371472 [9:34:05<9:32:29, 3.46it/s] 68%|██████▊ | 252550/371472 [9:34:06<9:24:14, 3.51it/s] 68%|██████▊ | 252551/371472 [9:34:06<9:31:46, 3.47it/s] 68%|██████▊ | 252552/371472 [9:34:06<9:50:55, 3.35it/s] 68%|██████▊ | 252553/371472 [9:34:07<9:50:02, 3.36it/s] 68%|██████▊ | 252554/371472 [9:34:07<10:38:33, 3.10it/s] 68%|██████▊ | 252555/371472 [9:34:07<10:35:21, 3.12it/s] 68%|██████▊ | 252556/371472 [9:34:08<10:15:00, 3.22it/s] 68%|██████▊ | 252557/371472 [9:34:08<10:09:19, 3.25it/s] 68%|██████▊ | 252558/371472 [9:34:08<10:20:29, 3.19it/s] 68%|██████▊ | 252559/371472 [9:34:09<11:45:04, 2.81it/s] 68%|██████▊ | 252560/371472 [9:34:09<11:15:21, 2.93it/s] {'loss': 2.8744, 'learning_rate': 3.8825443340731325e-07, 'epoch': 10.88} + 68%|██████▊ | 252560/371472 [9:34:09<11:15:21, 2.93it/s] 68%|██████▊ | 252561/371472 [9:34:09<10:48:22, 3.06it/s] 68%|██████▊ | 252562/371472 [9:34:10<10:36:28, 3.11it/s] 68%|██████▊ | 252563/371472 [9:34:10<10:19:28, 3.20it/s] 68%|██████▊ | 252564/371472 [9:34:10<9:58:16, 3.31it/s] 68%|██████▊ | 252565/371472 [9:34:10<9:38:06, 3.43it/s] 68%|██████▊ | 252566/371472 [9:34:11<10:13:15, 3.23it/s] 68%|██████▊ | 252567/371472 [9:34:11<9:55:59, 3.33it/s] 68%|██████▊ | 252568/371472 [9:34:11<10:15:46, 3.22it/s] 68%|██████▊ | 252569/371472 [9:34:12<10:08:11, 3.26it/s] 68%|██████▊ | 252570/371472 [9:34:12<9:47:05, 3.38it/s] 68%|██████▊ | 252571/371472 [9:34:12<10:11:43, 3.24it/s] 68%|██████▊ | 252572/371472 [9:34:13<10:05:37, 3.27it/s] 68%|██████▊ | 252573/371472 [9:34:13<9:47:50, 3.37it/s] 68%|██████▊ | 252574/371472 [9:34:13<9:36:59, 3.43it/s] 68%|██████▊ | 252575/371472 [9:34:13<9:29:18, 3.48it/s] 68%|██████▊ | 252576/371472 [9:34:14<9:25:17, 3.51it/s] 68%|██████▊ | 252577/371472 [9:34:14<10:06:35, 3.27it/s] 68%|██████▊ | 252578/371472 [9:34:14<9:46:38, 3.38it/s] 68%|██████▊ | 252579/371472 [9:34:15<9:35:58, 3.44it/s] 68%|██████▊ | 252580/371472 [9:34:15<11:04:42, 2.98it/s] {'loss': 2.5416, 'learning_rate': 3.882059514318343e-07, 'epoch': 10.88} + 68%|██████▊ | 252580/371472 [9:34:15<11:04:42, 2.98it/s] 68%|██████▊ | 252581/371472 [9:34:15<10:44:51, 3.07it/s] 68%|██████▊ | 252582/371472 [9:34:16<10:11:16, 3.24it/s] 68%|██████▊ | 252583/371472 [9:34:16<10:00:15, 3.30it/s] 68%|██████▊ | 252584/371472 [9:34:16<9:37:28, 3.43it/s] 68%|██████▊ | 252585/371472 [9:34:16<9:34:26, 3.45it/s] 68%|██████▊ | 252586/371472 [9:34:17<9:27:52, 3.49it/s] 68%|██████▊ | 252587/371472 [9:34:17<9:32:50, 3.46it/s] 68%|██████▊ | 252588/371472 [9:34:17<9:55:05, 3.33it/s] 68%|██████▊ | 252589/371472 [9:34:18<9:49:25, 3.36it/s] 68%|██████▊ | 252590/371472 [9:34:18<9:47:51, 3.37it/s] 68%|██████▊ | 252591/371472 [9:34:18<9:32:28, 3.46it/s] 68%|██████▊ | 252592/371472 [9:34:19<9:31:15, 3.47it/s] 68%|██████▊ | 252593/371472 [9:34:19<9:35:57, 3.44it/s] 68%|██████▊ | 252594/371472 [9:34:19<9:36:40, 3.44it/s] 68%|██████▊ | 252595/371472 [9:34:19<10:02:24, 3.29it/s] 68%|██████▊ | 252596/371472 [9:34:20<10:10:09, 3.25it/s] 68%|██████▊ | 252597/371472 [9:34:20<10:03:43, 3.28it/s] 68%|██████▊ | 252598/371472 [9:34:20<10:00:03, 3.30it/s] 68%|██████▊ | 252599/371472 [9:34:21<9:44:43, 3.39it/s] 68%|██████▊ | 252600/371472 [9:34:21<9:35:29, 3.44it/s] {'loss': 2.6524, 'learning_rate': 3.8815746945635544e-07, 'epoch': 10.88} + 68%|██████▊ | 252600/371472 [9:34:21<9:35:29, 3.44it/s] 68%|██████▊ | 252601/371472 [9:34:21<9:34:04, 3.45it/s] 68%|██████▊ | 252602/371472 [9:34:22<9:30:43, 3.47it/s] 68%|██████▊ | 252603/371472 [9:34:22<9:36:35, 3.44it/s] 68%|██████▊ | 252604/371472 [9:34:22<9:30:23, 3.47it/s] 68%|██████▊ | 252605/371472 [9:34:22<9:20:20, 3.54it/s] 68%|██████▊ | 252606/371472 [9:34:23<9:33:43, 3.45it/s] 68%|██████▊ | 252607/371472 [9:34:23<9:21:46, 3.53it/s] 68%|██████▊ | 252608/371472 [9:34:23<9:23:17, 3.52it/s] 68%|██████▊ | 252609/371472 [9:34:24<9:27:10, 3.49it/s] 68%|██████▊ | 252610/371472 [9:34:24<9:54:57, 3.33it/s] 68%|██████▊ | 252611/371472 [9:34:24<9:39:43, 3.42it/s] 68%|██████▊ | 252612/371472 [9:34:24<9:45:24, 3.38it/s] 68%|██████▊ | 252613/371472 [9:34:25<9:33:20, 3.46it/s] 68%|██████▊ | 252614/371472 [9:34:25<9:28:46, 3.48it/s] 68%|██████▊ | 252615/371472 [9:34:25<9:26:28, 3.50it/s] 68%|██████▊ | 252616/371472 [9:34:26<9:22:23, 3.52it/s] 68%|██████▊ | 252617/371472 [9:34:26<10:05:36, 3.27it/s] 68%|██████▊ | 252618/371472 [9:34:26<10:04:28, 3.28it/s] 68%|██████▊ | 252619/371472 [9:34:26<9:50:36, 3.35it/s] 68%|██████▊ | 252620/371472 [9:34:27<9:48:11, 3.37it/s] {'loss': 2.6658, 'learning_rate': 3.881089874808765e-07, 'epoch': 10.88} + 68%|██████▊ | 252620/371472 [9:34:27<9:48:11, 3.37it/s] 68%|██████▊ | 252621/371472 [9:34:27<9:38:57, 3.42it/s] 68%|██████▊ | 252622/371472 [9:34:27<9:26:40, 3.50it/s] 68%|██████▊ | 252623/371472 [9:34:28<9:43:03, 3.40it/s] 68%|██████▊ | 252624/371472 [9:34:28<9:49:04, 3.36it/s] 68%|██████▊ | 252625/371472 [9:34:28<9:38:41, 3.42it/s] 68%|██████▊ | 252626/371472 [9:34:29<9:57:19, 3.32it/s] 68%|██████▊ | 252627/371472 [9:34:29<9:50:30, 3.35it/s] 68%|██████▊ | 252628/371472 [9:34:29<10:04:57, 3.27it/s] 68%|██████▊ | 252629/371472 [9:34:29<9:39:32, 3.42it/s] 68%|██████▊ | 252630/371472 [9:34:30<9:47:57, 3.37it/s] 68%|██████▊ | 252631/371472 [9:34:30<9:50:38, 3.35it/s] 68%|██████▊ | 252632/371472 [9:34:30<10:02:25, 3.29it/s] 68%|██████▊ | 252633/371472 [9:34:31<10:18:02, 3.20it/s] 68%|██████▊ | 252634/371472 [9:34:31<10:08:04, 3.26it/s] 68%|██████▊ | 252635/371472 [9:34:31<10:05:23, 3.27it/s] 68%|██████▊ | 252636/371472 [9:34:32<9:36:39, 3.43it/s] 68%|██████▊ | 252637/371472 [9:34:32<9:28:26, 3.48it/s] 68%|██████▊ | 252638/371472 [9:34:32<10:03:49, 3.28it/s] 68%|██████▊ | 252639/371472 [9:34:32<9:39:36, 3.42it/s] 68%|██████▊ | 252640/371472 [9:34:33<9:29:08, 3.48it/s] {'loss': 2.6793, 'learning_rate': 3.880605055053977e-07, 'epoch': 10.88} + 68%|██████▊ | 252640/371472 [9:34:33<9:29:08, 3.48it/s] 68%|██████▊ | 252641/371472 [9:34:33<9:53:53, 3.33it/s] 68%|██████▊ | 252642/371472 [9:34:33<9:57:18, 3.32it/s] 68%|██████▊ | 252643/371472 [9:34:34<10:04:11, 3.28it/s] 68%|██████▊ | 252644/371472 [9:34:34<9:49:08, 3.36it/s] 68%|██████▊ | 252645/371472 [9:34:34<10:09:28, 3.25it/s] 68%|██████▊ | 252646/371472 [9:34:35<9:48:32, 3.36it/s] 68%|██████▊ | 252647/371472 [9:34:35<10:00:37, 3.30it/s] 68%|██████▊ | 252648/371472 [9:34:35<10:40:00, 3.09it/s] 68%|██████▊ | 252649/371472 [9:34:35<10:16:35, 3.21it/s] 68%|██████▊ | 252650/371472 [9:34:36<10:02:32, 3.29it/s] 68%|██████▊ | 252651/371472 [9:34:36<9:49:23, 3.36it/s] 68%|██████▊ | 252652/371472 [9:34:36<9:36:54, 3.43it/s] 68%|██████▊ | 252653/371472 [9:34:37<9:19:40, 3.54it/s] 68%|██████▊ | 252654/371472 [9:34:37<9:23:13, 3.52it/s] 68%|██████▊ | 252655/371472 [9:34:37<9:58:07, 3.31it/s] 68%|██████▊ | 252656/371472 [9:34:38<10:07:16, 3.26it/s] 68%|██████▊ | 252657/371472 [9:34:38<9:49:54, 3.36it/s] 68%|██████▊ | 252658/371472 [9:34:38<9:53:11, 3.34it/s] 68%|██████▊ | 252659/371472 [9:34:38<9:36:25, 3.44it/s] 68%|██████▊ | 252660/371472 [9:34:39<9:39:17, 3.42it/s] {'loss': 2.7381, 'learning_rate': 3.880120235299187e-07, 'epoch': 10.88} + 68%|██████▊ | 252660/371472 [9:34:39<9:39:17, 3.42it/s] 68%|██████▊ | 252661/371472 [9:34:39<9:42:40, 3.40it/s] 68%|██████▊ | 252662/371472 [9:34:39<9:31:28, 3.46it/s] 68%|██████▊ | 252663/371472 [9:34:40<9:16:47, 3.56it/s] 68%|██████▊ | 252664/371472 [9:34:40<9:55:07, 3.33it/s] 68%|██████▊ | 252665/371472 [9:34:40<9:46:13, 3.38it/s] 68%|██████▊ | 252666/371472 [9:34:40<9:39:39, 3.42it/s] 68%|██████▊ | 252667/371472 [9:34:41<9:33:24, 3.45it/s] 68%|██████▊ | 252668/371472 [9:34:41<9:27:02, 3.49it/s] 68%|██████▊ | 252669/371472 [9:34:41<9:37:56, 3.43it/s] 68%|██████▊ | 252670/371472 [9:34:42<9:57:01, 3.32it/s] 68%|██████▊ | 252671/371472 [9:34:42<9:44:43, 3.39it/s] 68%|██████▊ | 252672/371472 [9:34:42<9:38:48, 3.42it/s] 68%|██████▊ | 252673/371472 [9:34:42<9:21:11, 3.53it/s] 68%|██████▊ | 252674/371472 [9:34:43<9:38:38, 3.42it/s] 68%|██████▊ | 252675/371472 [9:34:43<9:26:34, 3.49it/s] 68%|██████▊ | 252676/371472 [9:34:43<9:49:50, 3.36it/s] 68%|██████▊ | 252677/371472 [9:34:44<9:33:17, 3.45it/s] 68%|██████▊ | 252678/371472 [9:34:44<9:31:23, 3.47it/s] 68%|██████▊ | 252679/371472 [9:34:44<9:28:40, 3.48it/s] 68%|██████▊ | 252680/371472 [9:34:45<9:45:25, 3.38it/s] {'loss': 2.6739, 'learning_rate': 3.879635415544399e-07, 'epoch': 10.88} + 68%|██████▊ | 252680/371472 [9:34:45<9:45:25, 3.38it/s] 68%|██████▊ | 252681/371472 [9:34:45<9:33:15, 3.45it/s] 68%|██████▊ | 252682/371472 [9:34:45<9:31:08, 3.47it/s] 68%|██████▊ | 252683/371472 [9:34:45<10:01:21, 3.29it/s] 68%|██████▊ | 252684/371472 [9:34:46<9:49:51, 3.36it/s] 68%|██████▊ | 252685/371472 [9:34:46<9:34:48, 3.44it/s] 68%|██████▊ | 252686/371472 [9:34:46<9:41:16, 3.41it/s] 68%|██████▊ | 252687/371472 [9:34:47<9:47:41, 3.37it/s] 68%|██████▊ | 252688/371472 [9:34:47<9:49:10, 3.36it/s] 68%|██████▊ | 252689/371472 [9:34:47<9:34:48, 3.44it/s] 68%|██████▊ | 252690/371472 [9:34:47<9:42:15, 3.40it/s] 68%|██████▊ | 252691/371472 [9:34:48<9:45:31, 3.38it/s] 68%|██████▊ | 252692/371472 [9:34:48<9:37:05, 3.43it/s] 68%|██████▊ | 252693/371472 [9:34:48<10:16:29, 3.21it/s] 68%|██████▊ | 252694/371472 [9:34:49<10:28:20, 3.15it/s] 68%|██████▊ | 252695/371472 [9:34:49<10:05:00, 3.27it/s] 68%|██████▊ | 252696/371472 [9:34:49<10:16:48, 3.21it/s] 68%|██████▊ | 252697/371472 [9:34:50<9:48:19, 3.36it/s] 68%|██████▊ | 252698/371472 [9:34:50<9:50:01, 3.36it/s] 68%|██████▊ | 252699/371472 [9:34:50<9:58:32, 3.31it/s] 68%|██████▊ | 252700/371472 [9:34:51<10:02:51, 3.28it/s] {'loss': 2.7802, 'learning_rate': 3.8791505957896096e-07, 'epoch': 10.88} + 68%|██████▊ | 252700/371472 [9:34:51<10:02:51, 3.28it/s] 68%|██████▊ | 252701/371472 [9:34:51<9:59:14, 3.30it/s] 68%|██████▊ | 252702/371472 [9:34:51<10:02:56, 3.28it/s] 68%|██████▊ | 252703/371472 [9:34:51<10:05:41, 3.27it/s] 68%|██████▊ | 252704/371472 [9:34:52<9:51:05, 3.35it/s] 68%|██████▊ | 252705/371472 [9:34:52<10:04:13, 3.28it/s] 68%|██████▊ | 252706/371472 [9:34:52<9:40:22, 3.41it/s] 68%|██████▊ | 252707/371472 [9:34:53<9:23:29, 3.51it/s] 68%|██████▊ | 252708/371472 [9:34:53<9:13:34, 3.58it/s] 68%|██████▊ | 252709/371472 [9:34:53<9:25:12, 3.50it/s] 68%|██████▊ | 252710/371472 [9:34:53<9:22:12, 3.52it/s] 68%|██████▊ | 252711/371472 [9:34:54<9:41:19, 3.40it/s] 68%|██████▊ | 252712/371472 [9:34:54<10:39:18, 3.10it/s] 68%|██████▊ | 252713/371472 [9:34:54<10:11:43, 3.24it/s] 68%|██████▊ | 252714/371472 [9:34:55<10:15:41, 3.21it/s] 68%|██████▊ | 252715/371472 [9:34:55<9:50:39, 3.35it/s] 68%|██████▊ | 252716/371472 [9:34:55<9:46:25, 3.38it/s] 68%|██████▊ | 252717/371472 [9:34:56<9:27:29, 3.49it/s] 68%|██████▊ | 252718/371472 [9:34:56<9:53:43, 3.33it/s] 68%|██████▊ | 252719/371472 [9:34:56<10:34:15, 3.12it/s] 68%|██████▊ | 252720/371472 [9:34:57<10:38:40, 3.10it/s] {'loss': 2.6218, 'learning_rate': 3.878665776034821e-07, 'epoch': 10.89} + 68%|██████▊ | 252720/371472 [9:34:57<10:38:40, 3.10it/s] 68%|██████▊ | 252721/371472 [9:34:57<10:11:42, 3.24it/s] 68%|██████▊ | 252722/371472 [9:34:57<9:55:09, 3.33it/s] 68%|██████▊ | 252723/371472 [9:34:57<10:11:54, 3.23it/s] 68%|██████▊ | 252724/371472 [9:34:58<10:12:47, 3.23it/s] 68%|██████▊ | 252725/371472 [9:34:58<9:55:13, 3.33it/s] 68%|██████▊ | 252726/371472 [9:34:58<10:06:07, 3.27it/s] 68%|██████▊ | 252727/371472 [9:34:59<10:25:32, 3.16it/s] 68%|██████▊ | 252728/371472 [9:34:59<10:12:17, 3.23it/s] 68%|██████▊ | 252729/371472 [9:34:59<9:56:19, 3.32it/s] 68%|██████▊ | 252730/371472 [9:35:00<9:47:14, 3.37it/s] 68%|██████▊ | 252731/371472 [9:35:00<9:24:19, 3.51it/s] 68%|██████▊ | 252732/371472 [9:35:00<9:21:19, 3.53it/s] 68%|██████▊ | 252733/371472 [9:35:00<9:56:05, 3.32it/s] 68%|██████▊ | 252734/371472 [9:35:01<9:37:06, 3.43it/s] 68%|██████▊ | 252735/371472 [9:35:01<9:23:04, 3.51it/s] 68%|██████▊ | 252736/371472 [9:35:01<9:15:56, 3.56it/s] 68%|██████▊ | 252737/371472 [9:35:02<9:21:53, 3.52it/s] 68%|██████▊ | 252738/371472 [9:35:02<9:55:05, 3.33it/s] 68%|██████▊ | 252739/371472 [9:35:02<9:35:41, 3.44it/s] 68%|██████▊ | 252740/371472 [9:35:02<9:27:34, 3.49it/s] {'loss': 2.9297, 'learning_rate': 3.8781809562800316e-07, 'epoch': 10.89} + 68%|██████▊ | 252740/371472 [9:35:02<9:27:34, 3.49it/s] 68%|██████▊ | 252741/371472 [9:35:03<9:19:04, 3.54it/s] 68%|██████▊ | 252742/371472 [9:35:03<9:11:34, 3.59it/s] 68%|██████▊ | 252743/371472 [9:35:03<9:30:14, 3.47it/s] 68%|██████▊ | 252744/371472 [9:35:04<9:42:30, 3.40it/s] 68%|██████▊ | 252745/371472 [9:35:04<9:22:22, 3.52it/s] 68%|██████▊ | 252746/371472 [9:35:04<9:36:46, 3.43it/s] 68%|██████▊ | 252747/371472 [9:35:05<10:36:34, 3.11it/s] 68%|██████▊ | 252748/371472 [9:35:05<10:13:30, 3.23it/s] 68%|██████▊ | 252749/371472 [9:35:05<10:55:40, 3.02it/s] 68%|██████▊ | 252750/371472 [9:35:06<10:39:43, 3.09it/s] 68%|██████▊ | 252751/371472 [9:35:06<10:28:57, 3.15it/s] 68%|██████▊ | 252752/371472 [9:35:06<10:29:09, 3.14it/s] 68%|██████▊ | 252753/371472 [9:35:06<10:19:10, 3.20it/s] 68%|██████▊ | 252754/371472 [9:35:07<9:55:01, 3.33it/s] 68%|██████▊ | 252755/371472 [9:35:07<9:50:45, 3.35it/s] 68%|██████▊ | 252756/371472 [9:35:07<10:20:23, 3.19it/s] 68%|██████▊ | 252757/371472 [9:35:08<9:55:37, 3.32it/s] 68%|██████▊ | 252758/371472 [9:35:08<9:34:05, 3.45it/s] 68%|██████▊ | 252759/371472 [9:35:08<9:59:57, 3.30it/s] 68%|██████▊ | 252760/371472 [9:35:09<9:45:07, 3.38it/s] {'loss': 2.6872, 'learning_rate': 3.877696136525243e-07, 'epoch': 10.89} + 68%|██████▊ | 252760/371472 [9:35:09<9:45:07, 3.38it/s] 68%|██████▊ | 252761/371472 [9:35:09<9:45:56, 3.38it/s] 68%|██████▊ | 252762/371472 [9:35:09<10:13:01, 3.23it/s] 68%|██████▊ | 252763/371472 [9:35:09<10:05:31, 3.27it/s] 68%|██████▊ | 252764/371472 [9:35:10<10:00:31, 3.29it/s] 68%|██████▊ | 252765/371472 [9:35:10<9:47:35, 3.37it/s] 68%|██████▊ | 252766/371472 [9:35:10<9:49:20, 3.36it/s] 68%|██████▊ | 252767/371472 [9:35:11<10:20:02, 3.19it/s] 68%|██████▊ | 252768/371472 [9:35:11<10:01:29, 3.29it/s] 68%|██████▊ | 252769/371472 [9:35:11<9:52:44, 3.34it/s] 68%|██████▊ | 252770/371472 [9:35:12<9:59:41, 3.30it/s] 68%|██████▊ | 252771/371472 [9:35:12<10:05:33, 3.27it/s] 68%|██████▊ | 252772/371472 [9:35:12<10:22:23, 3.18it/s] 68%|██████▊ | 252773/371472 [9:35:13<10:31:21, 3.13it/s] 68%|██████▊ | 252774/371472 [9:35:13<10:45:15, 3.07it/s] 68%|██████▊ | 252775/371472 [9:35:13<10:57:46, 3.01it/s] 68%|██████▊ | 252776/371472 [9:35:14<11:23:49, 2.89it/s] 68%|██████▊ | 252777/371472 [9:35:14<11:19:56, 2.91it/s] 68%|██████▊ | 252778/371472 [9:35:14<10:42:53, 3.08it/s] 68%|██████▊ | 252779/371472 [9:35:15<10:12:52, 3.23it/s] 68%|██████▊ | 252780/371472 [9:35:15<10:05:38, 3.27it/s] {'loss': 2.768, 'learning_rate': 3.8772113167704535e-07, 'epoch': 10.89} + 68%|██████▊ | 252780/371472 [9:35:15<10:05:38, 3.27it/s] 68%|██████▊ | 252781/371472 [9:35:15<9:44:39, 3.38it/s] 68%|██████▊ | 252782/371472 [9:35:15<10:33:30, 3.12it/s] 68%|██████▊ | 252783/371472 [9:35:16<10:13:58, 3.22it/s] 68%|██████▊ | 252784/371472 [9:35:16<10:07:15, 3.26it/s] 68%|██████▊ | 252785/371472 [9:35:16<9:43:23, 3.39it/s] 68%|██████▊ | 252786/371472 [9:35:17<9:45:50, 3.38it/s] 68%|██████▊ | 252787/371472 [9:35:17<9:36:52, 3.43it/s] 68%|██████▊ | 252788/371472 [9:35:17<9:38:44, 3.42it/s] 68%|██████▊ | 252789/371472 [9:35:18<9:53:06, 3.34it/s] 68%|██████▊ | 252790/371472 [9:35:18<9:55:22, 3.32it/s] 68%|██████▊ | 252791/371472 [9:35:18<9:36:10, 3.43it/s] 68%|██████▊ | 252792/371472 [9:35:18<9:40:07, 3.41it/s] 68%|██████▊ | 252793/371472 [9:35:19<10:11:03, 3.24it/s] 68%|██████▊ | 252794/371472 [9:35:19<9:58:20, 3.31it/s] 68%|██████▊ | 252795/371472 [9:35:19<10:07:31, 3.26it/s] 68%|██████▊ | 252796/371472 [9:35:20<9:46:50, 3.37it/s] 68%|██████▊ | 252797/371472 [9:35:20<10:06:36, 3.26it/s] 68%|██████▊ | 252798/371472 [9:35:20<9:56:23, 3.32it/s] 68%|██████▊ | 252799/371472 [9:35:21<9:48:17, 3.36it/s] 68%|██████▊ | 252800/371472 [9:35:21<9:44:51, 3.38it/s] {'loss': 2.6201, 'learning_rate': 3.8767264970156653e-07, 'epoch': 10.89} + 68%|██████▊ | 252800/371472 [9:35:21<9:44:51, 3.38it/s] 68%|██████▊ | 252801/371472 [9:35:21<9:51:45, 3.34it/s] 68%|██████▊ | 252802/371472 [9:35:21<9:35:57, 3.43it/s] 68%|██████▊ | 252803/371472 [9:35:22<10:46:24, 3.06it/s] 68%|██████▊ | 252804/371472 [9:35:22<10:23:13, 3.17it/s] 68%|██████▊ | 252805/371472 [9:35:22<10:15:53, 3.21it/s] 68%|██████▊ | 252806/371472 [9:35:23<9:54:02, 3.33it/s] 68%|██████▊ | 252807/371472 [9:35:23<9:57:50, 3.31it/s] 68%|██████▊ | 252808/371472 [9:35:23<9:54:16, 3.33it/s] 68%|██████▊ | 252809/371472 [9:35:24<9:47:06, 3.37it/s] 68%|██████▊ | 252810/371472 [9:35:24<9:40:03, 3.41it/s] 68%|██████▊ | 252811/371472 [9:35:24<10:30:04, 3.14it/s] 68%|██████▊ | 252812/371472 [9:35:24<10:10:19, 3.24it/s] 68%|██████▊ | 252813/371472 [9:35:25<10:00:18, 3.29it/s] 68%|██████▊ | 252814/371472 [9:35:25<9:42:00, 3.40it/s] 68%|██████▊ | 252815/371472 [9:35:25<9:40:04, 3.41it/s] 68%|██████▊ | 252816/371472 [9:35:26<9:40:27, 3.41it/s] 68%|██████▊ | 252817/371472 [9:35:26<9:29:06, 3.47it/s] 68%|██████▊ | 252818/371472 [9:35:26<10:16:18, 3.21it/s] 68%|██████▊ | 252819/371472 [9:35:27<10:03:16, 3.28it/s] 68%|██████▊ | 252820/371472 [9:35:27<9:57:19, 3.31it/s] {'loss': 2.7861, 'learning_rate': 3.876241677260876e-07, 'epoch': 10.89} + 68%|██████▊ | 252820/371472 [9:35:27<9:57:19, 3.31it/s] 68%|██████▊ | 252821/371472 [9:35:27<10:09:09, 3.25it/s] 68%|██████▊ | 252822/371472 [9:35:28<10:13:58, 3.22it/s] 68%|██████▊ | 252823/371472 [9:35:28<10:06:26, 3.26it/s] 68%|██████▊ | 252824/371472 [9:35:28<9:49:36, 3.35it/s] 68%|██████▊ | 252825/371472 [9:35:28<10:26:02, 3.16it/s] 68%|██████▊ | 252826/371472 [9:35:29<10:09:56, 3.24it/s] 68%|██████▊ | 252827/371472 [9:35:29<9:53:10, 3.33it/s] 68%|██████▊ | 252828/371472 [9:35:29<9:53:39, 3.33it/s] 68%|██████▊ | 252829/371472 [9:35:30<9:47:35, 3.37it/s] 68%|██████▊ | 252830/371472 [9:35:30<9:40:39, 3.41it/s] 68%|██████▊ | 252831/371472 [9:35:30<9:23:27, 3.51it/s] 68%|██████▊ | 252832/371472 [9:35:30<9:42:22, 3.40it/s] 68%|██████▊ | 252833/371472 [9:35:31<9:41:10, 3.40it/s] 68%|██████▊ | 252834/371472 [9:35:31<9:28:30, 3.48it/s] 68%|██████▊ | 252835/371472 [9:35:31<9:47:01, 3.37it/s] 68%|██████▊ | 252836/371472 [9:35:32<9:42:10, 3.40it/s] 68%|██████▊ | 252837/371472 [9:35:32<9:42:24, 3.39it/s] 68%|██████▊ | 252838/371472 [9:35:32<9:35:42, 3.43it/s] 68%|██████▊ | 252839/371472 [9:35:33<9:49:27, 3.35it/s] 68%|██████▊ | 252840/371472 [9:35:33<10:41:31, 3.08it/s] {'loss': 2.6204, 'learning_rate': 3.875756857506087e-07, 'epoch': 10.89} + 68%|██████▊ | 252840/371472 [9:35:33<10:41:31, 3.08it/s] 68%|██████▊ | 252841/371472 [9:35:33<10:22:03, 3.18it/s] 68%|██████▊ | 252842/371472 [9:35:33<10:00:00, 3.30it/s] 68%|██████▊ | 252843/371472 [9:35:34<9:44:31, 3.38it/s] 68%|██████▊ | 252844/371472 [9:35:34<10:59:18, 3.00it/s] 68%|██████▊ | 252845/371472 [9:35:35<10:46:49, 3.06it/s] 68%|██████▊ | 252846/371472 [9:35:35<10:25:02, 3.16it/s] 68%|██████▊ | 252847/371472 [9:35:35<10:47:59, 3.05it/s] 68%|██████▊ | 252848/371472 [9:35:35<10:56:39, 3.01it/s] 68%|██████▊ | 252849/371472 [9:35:36<11:24:50, 2.89it/s] 68%|██████▊ | 252850/371472 [9:35:36<11:11:51, 2.94it/s] 68%|██████▊ | 252851/371472 [9:35:36<10:49:33, 3.04it/s] 68%|██████▊ | 252852/371472 [9:35:37<10:24:01, 3.17it/s] 68%|██████▊ | 252853/371472 [9:35:37<10:06:03, 3.26it/s] 68%|██████▊ | 252854/371472 [9:35:37<10:04:56, 3.27it/s] 68%|██████▊ | 252855/371472 [9:35:38<10:00:41, 3.29it/s] 68%|██████▊ | 252856/371472 [9:35:38<9:50:31, 3.35it/s] 68%|██████▊ | 252857/371472 [9:35:38<10:15:58, 3.21it/s] 68%|██████▊ | 252858/371472 [9:35:39<10:46:54, 3.06it/s] 68%|██████▊ | 252859/371472 [9:35:39<10:53:08, 3.03it/s] 68%|██████▊ | 252860/371472 [9:35:39<10:29:59, 3.14it/s] {'loss': 2.6845, 'learning_rate': 3.875272037751298e-07, 'epoch': 10.89} + 68%|██████▊ | 252860/371472 [9:35:39<10:29:59, 3.14it/s] 68%|██████▊ | 252861/371472 [9:35:40<10:11:45, 3.23it/s] 68%|██████▊ | 252862/371472 [9:35:40<9:48:49, 3.36it/s] 68%|██████▊ | 252863/371472 [9:35:40<11:25:27, 2.88it/s] 68%|██████▊ | 252864/371472 [9:35:41<10:58:24, 3.00it/s] 68%|██████▊ | 252865/371472 [9:35:41<11:14:10, 2.93it/s] 68%|██████▊ | 252866/371472 [9:35:41<11:13:28, 2.94it/s] 68%|██████▊ | 252867/371472 [9:35:42<10:45:05, 3.06it/s] 68%|██████▊ | 252868/371472 [9:35:42<10:28:33, 3.14it/s] 68%|██████▊ | 252869/371472 [9:35:42<10:10:45, 3.24it/s] 68%|██████▊ | 252870/371472 [9:35:42<9:46:18, 3.37it/s] 68%|██████▊ | 252871/371472 [9:35:43<9:34:51, 3.44it/s] 68%|██████▊ | 252872/371472 [9:35:43<9:29:17, 3.47it/s] 68%|██████▊ | 252873/371472 [9:35:43<9:24:16, 3.50it/s] 68%|██████▊ | 252874/371472 [9:35:44<9:48:21, 3.36it/s] 68%|██████▊ | 252875/371472 [9:35:44<9:47:24, 3.36it/s] 68%|██████▊ | 252876/371472 [9:35:44<9:41:05, 3.40it/s] 68%|██████▊ | 252877/371472 [9:35:45<9:45:51, 3.37it/s] 68%|██████▊ | 252878/371472 [9:35:45<9:34:37, 3.44it/s] 68%|██████▊ | 252879/371472 [9:35:45<9:36:28, 3.43it/s] 68%|██████▊ | 252880/371472 [9:35:45<9:38:01, 3.42it/s] {'loss': 2.8161, 'learning_rate': 3.8747872179965087e-07, 'epoch': 10.89} + 68%|██████▊ | 252880/371472 [9:35:45<9:38:01, 3.42it/s] 68%|██████▊ | 252881/371472 [9:35:46<9:48:09, 3.36it/s] 68%|██████▊ | 252882/371472 [9:35:46<9:50:43, 3.35it/s] 68%|██████▊ | 252883/371472 [9:35:46<9:59:44, 3.30it/s] 68%|██████▊ | 252884/371472 [9:35:47<9:50:17, 3.35it/s] 68%|██████▊ | 252885/371472 [9:35:47<9:51:04, 3.34it/s] 68%|██████▊ | 252886/371472 [9:35:47<9:47:41, 3.36it/s] 68%|██████▊ | 252887/371472 [9:35:48<10:09:03, 3.25it/s] 68%|██████▊ | 252888/371472 [9:35:48<9:53:21, 3.33it/s] 68%|██████▊ | 252889/371472 [9:35:48<9:39:00, 3.41it/s] 68%|██████▊ | 252890/371472 [9:35:48<9:24:09, 3.50it/s] 68%|██████▊ | 252891/371472 [9:35:49<10:07:14, 3.25it/s] 68%|██████▊ | 252892/371472 [9:35:49<10:32:13, 3.13it/s] 68%|██████▊ | 252893/371472 [9:35:49<11:03:13, 2.98it/s] 68%|██████▊ | 252894/371472 [9:35:50<10:51:02, 3.04it/s] 68%|██████▊ | 252895/371472 [9:35:50<10:29:09, 3.14it/s] 68%|██████▊ | 252896/371472 [9:35:50<10:19:52, 3.19it/s] 68%|██████▊ | 252897/371472 [9:35:51<10:49:13, 3.04it/s] 68%|██████▊ | 252898/371472 [9:35:51<10:40:52, 3.08it/s] 68%|██████▊ | 252899/371472 [9:35:51<10:56:15, 3.01it/s] 68%|██████▊ | 252900/371472 [9:35:52<10:34:08, 3.12it/s] {'loss': 2.5913, 'learning_rate': 3.8743023982417205e-07, 'epoch': 10.89} + 68%|██████▊ | 252900/371472 [9:35:52<10:34:08, 3.12it/s] 68%|██████▊ | 252901/371472 [9:35:52<10:43:55, 3.07it/s] 68%|██████▊ | 252902/371472 [9:35:52<10:17:20, 3.20it/s] 68%|██████▊ | 252903/371472 [9:35:53<10:21:50, 3.18it/s] 68%|██████▊ | 252904/371472 [9:35:53<10:20:07, 3.19it/s] 68%|██████▊ | 252905/371472 [9:35:53<10:18:41, 3.19it/s] 68%|██████▊ | 252906/371472 [9:35:54<11:00:14, 2.99it/s] 68%|██████▊ | 252907/371472 [9:35:54<11:10:30, 2.95it/s] 68%|██████▊ | 252908/371472 [9:35:54<11:05:02, 2.97it/s] 68%|██████▊ | 252909/371472 [9:35:55<11:46:03, 2.80it/s] 68%|██████▊ | 252910/371472 [9:35:55<11:08:04, 2.96it/s] 68%|██████▊ | 252911/371472 [9:35:55<10:55:16, 3.02it/s] 68%|██████▊ | 252912/371472 [9:35:56<10:33:26, 3.12it/s] 68%|██████▊ | 252913/371472 [9:35:56<10:31:27, 3.13it/s] 68%|██████▊ | 252914/371472 [9:35:56<10:19:19, 3.19it/s] 68%|██████▊ | 252915/371472 [9:35:57<10:07:34, 3.25it/s] 68%|██████▊ | 252916/371472 [9:35:57<9:57:59, 3.30it/s] 68%|██████▊ | 252917/371472 [9:35:57<9:54:14, 3.33it/s] 68%|██████▊ | 252918/371472 [9:35:57<10:07:59, 3.25it/s] 68%|██████▊ | 252919/371472 [9:35:58<10:23:03, 3.17it/s] 68%|██████▊ | 252920/371472 [9:35:58<10:12:05, 3.23it/s] {'loss': 2.7811, 'learning_rate': 3.8738175784869307e-07, 'epoch': 10.89} + 68%|██████▊ | 252920/371472 [9:35:58<10:12:05, 3.23it/s] 68%|██████▊ | 252921/371472 [9:35:58<10:07:10, 3.25it/s] 68%|██████▊ | 252922/371472 [9:35:59<10:08:40, 3.25it/s] 68%|██████▊ | 252923/371472 [9:35:59<10:01:13, 3.29it/s] 68%|██████▊ | 252924/371472 [9:35:59<10:09:28, 3.24it/s] 68%|██████▊ | 252925/371472 [9:36:00<10:03:21, 3.27it/s] 68%|██████▊ | 252926/371472 [9:36:00<10:01:02, 3.29it/s] 68%|██████▊ | 252927/371472 [9:36:00<9:55:47, 3.32it/s] 68%|██████▊ | 252928/371472 [9:36:00<10:02:32, 3.28it/s] 68%|██████▊ | 252929/371472 [9:36:01<9:59:12, 3.30it/s] 68%|██████▊ | 252930/371472 [9:36:01<10:18:56, 3.19it/s] 68%|██████▊ | 252931/371472 [9:36:01<10:00:18, 3.29it/s] 68%|██████▊ | 252932/371472 [9:36:02<9:48:45, 3.36it/s] 68%|██████▊ | 252933/371472 [9:36:02<9:35:58, 3.43it/s] 68%|██████▊ | 252934/371472 [9:36:02<9:24:08, 3.50it/s] 68%|██████▊ | 252935/371472 [9:36:03<9:35:27, 3.43it/s] 68%|██████▊ | 252936/371472 [9:36:03<9:33:34, 3.44it/s] 68%|██████▊ | 252937/371472 [9:36:03<9:19:18, 3.53it/s] 68%|██████▊ | 252938/371472 [9:36:03<9:24:01, 3.50it/s] 68%|██████▊ | 252939/371472 [9:36:04<9:40:59, 3.40it/s] 68%|██████▊ | 252940/371472 [9:36:04<9:32:42, 3.45it/s] {'loss': 2.5047, 'learning_rate': 3.8733327587321424e-07, 'epoch': 10.89} + 68%|██████▊ | 252940/371472 [9:36:04<9:32:42, 3.45it/s] 68%|██████▊ | 252941/371472 [9:36:04<9:53:22, 3.33it/s] 68%|██████▊ | 252942/371472 [9:36:05<10:00:26, 3.29it/s] 68%|██████▊ | 252943/371472 [9:36:05<9:34:12, 3.44it/s] 68%|██████▊ | 252944/371472 [9:36:05<9:57:03, 3.31it/s] 68%|██████▊ | 252945/371472 [9:36:06<10:06:02, 3.26it/s] 68%|██████▊ | 252946/371472 [9:36:06<9:52:59, 3.33it/s] 68%|██████▊ | 252947/371472 [9:36:06<9:41:36, 3.40it/s] 68%|██████▊ | 252948/371472 [9:36:06<9:59:59, 3.29it/s] 68%|██████▊ | 252949/371472 [9:36:07<10:39:23, 3.09it/s] 68%|██████▊ | 252950/371472 [9:36:07<10:29:31, 3.14it/s] 68%|██████▊ | 252951/371472 [9:36:07<10:56:29, 3.01it/s] 68%|██████▊ | 252952/371472 [9:36:08<10:37:52, 3.10it/s] 68%|██████▊ | 252953/371472 [9:36:08<10:17:10, 3.20it/s] 68%|██████▊ | 252954/371472 [9:36:08<10:05:32, 3.26it/s] 68%|██████▊ | 252955/371472 [9:36:09<9:52:10, 3.34it/s] 68%|██████▊ | 252956/371472 [9:36:09<9:56:27, 3.31it/s] 68%|██████▊ | 252957/371472 [9:36:09<9:58:55, 3.30it/s] 68%|██████▊ | 252958/371472 [9:36:10<10:17:54, 3.20it/s] 68%|██████▊ | 252959/371472 [9:36:10<10:15:14, 3.21it/s] 68%|██████▊ | 252960/371472 [9:36:10<10:11:14, 3.23it/s] {'loss': 2.6822, 'learning_rate': 3.872847938977353e-07, 'epoch': 10.9} + 68%|██████▊ | 252960/371472 [9:36:10<10:11:14, 3.23it/s] 68%|██████▊ | 252961/371472 [9:36:10<10:12:36, 3.22it/s] 68%|██████▊ | 252962/371472 [9:36:11<9:45:07, 3.38it/s] 68%|██████▊ | 252963/371472 [9:36:11<9:48:10, 3.36it/s] 68%|██████▊ | 252964/371472 [9:36:11<9:48:14, 3.36it/s] 68%|██████▊ | 252965/371472 [9:36:12<9:26:26, 3.49it/s] 68%|██████▊ | 252966/371472 [9:36:12<9:34:21, 3.44it/s] 68%|██████▊ | 252967/371472 [9:36:12<10:32:51, 3.12it/s] 68%|██████▊ | 252968/371472 [9:36:13<10:08:32, 3.25it/s] 68%|██████▊ | 252969/371472 [9:36:13<9:54:03, 3.32it/s] 68%|██████▊ | 252970/371472 [9:36:13<9:39:03, 3.41it/s] 68%|██████▊ | 252971/371472 [9:36:13<9:39:31, 3.41it/s] 68%|██████▊ | 252972/371472 [9:36:14<9:47:18, 3.36it/s] 68%|██████▊ | 252973/371472 [9:36:14<9:36:28, 3.43it/s] 68%|██████▊ | 252974/371472 [9:36:14<9:36:16, 3.43it/s] 68%|██████▊ | 252975/371472 [9:36:15<9:29:27, 3.47it/s] 68%|██████▊ | 252976/371472 [9:36:15<9:20:44, 3.52it/s] 68%|██████▊ | 252977/371472 [9:36:15<10:06:28, 3.26it/s] 68%|██████▊ | 252978/371472 [9:36:16<9:49:13, 3.35it/s] 68%|██████▊ | 252979/371472 [9:36:16<11:26:57, 2.87it/s] 68%|██████▊ | 252980/371472 [9:36:16<11:09:42, 2.95it/s] {'loss': 2.7796, 'learning_rate': 3.8723631192225644e-07, 'epoch': 10.9} + 68%|██████▊ | 252980/371472 [9:36:16<11:09:42, 2.95it/s] 68%|██████▊ | 252981/371472 [9:36:17<10:50:27, 3.04it/s] 68%|██████▊ | 252982/371472 [9:36:17<10:18:58, 3.19it/s] 68%|██████▊ | 252983/371472 [9:36:17<10:11:24, 3.23it/s] 68%|██████▊ | 252984/371472 [9:36:17<10:06:54, 3.25it/s] 68%|██████▊ | 252985/371472 [9:36:18<9:53:28, 3.33it/s] 68%|██████▊ | 252986/371472 [9:36:18<9:43:47, 3.38it/s] 68%|██████▊ | 252987/371472 [9:36:18<9:40:51, 3.40it/s] 68%|██████▊ | 252988/371472 [9:36:19<9:34:18, 3.44it/s] 68%|██████▊ | 252989/371472 [9:36:19<10:00:11, 3.29it/s] 68%|██████▊ | 252990/371472 [9:36:19<10:14:31, 3.21it/s] 68%|██████▊ | 252991/371472 [9:36:20<10:01:36, 3.28it/s] 68%|██████▊ | 252992/371472 [9:36:20<9:48:34, 3.35it/s] 68%|██████▊ | 252993/371472 [9:36:20<9:46:10, 3.37it/s] 68%|██████▊ | 252994/371472 [9:36:20<9:53:27, 3.33it/s] 68%|██████▊ | 252995/371472 [9:36:21<9:52:35, 3.33it/s] 68%|██████▊ | 252996/371472 [9:36:21<9:43:36, 3.38it/s] 68%|██████▊ | 252997/371472 [9:36:21<9:37:06, 3.42it/s] 68%|██████▊ | 252998/371472 [9:36:22<9:38:35, 3.41it/s] 68%|██████▊ | 252999/371472 [9:36:22<9:31:03, 3.46it/s] 68%|██████▊ | 253000/371472 [9:36:22<9:22:18, 3.51it/s] {'loss': 2.7107, 'learning_rate': 3.871878299467775e-07, 'epoch': 10.9} + 68%|██████▊ | 253000/371472 [9:36:22<9:22:18, 3.51it/s] 68%|██████▊ | 253001/371472 [9:36:22<9:22:19, 3.51it/s] 68%|██████▊ | 253002/371472 [9:36:23<9:46:47, 3.36it/s] 68%|██████▊ | 253003/371472 [9:36:23<9:54:55, 3.32it/s] 68%|██████▊ | 253004/371472 [9:36:23<10:08:30, 3.24it/s] 68%|██████▊ | 253005/371472 [9:36:24<9:48:37, 3.35it/s] 68%|██████▊ | 253006/371472 [9:36:24<9:32:35, 3.45it/s] 68%|██████▊ | 253007/371472 [9:36:24<9:31:15, 3.46it/s] 68%|██████▊ | 253008/371472 [9:36:25<9:28:56, 3.47it/s] 68%|██████▊ | 253009/371472 [9:36:25<9:27:49, 3.48it/s] 68%|██████▊ | 253010/371472 [9:36:25<9:33:41, 3.44it/s] 68%|██████▊ | 253011/371472 [9:36:25<9:43:29, 3.38it/s] 68%|██████▊ | 253012/371472 [9:36:26<9:58:13, 3.30it/s] 68%|██████▊ | 253013/371472 [9:36:26<9:42:24, 3.39it/s] 68%|██████▊ | 253014/371472 [9:36:26<9:49:39, 3.35it/s] 68%|██████▊ | 253015/371472 [9:36:27<9:57:53, 3.30it/s] 68%|██████▊ | 253016/371472 [9:36:27<9:40:23, 3.40it/s] 68%|██████▊ | 253017/371472 [9:36:27<9:36:55, 3.42it/s] 68%|██████▊ | 253018/371472 [9:36:28<9:50:00, 3.35it/s] 68%|██████▊ | 253019/371472 [9:36:28<9:54:37, 3.32it/s] 68%|██████▊ | 253020/371472 [9:36:28<9:49:47, 3.35it/s] {'loss': 2.6998, 'learning_rate': 3.871393479712987e-07, 'epoch': 10.9} + 68%|██████▊ | 253020/371472 [9:36:28<9:49:47, 3.35it/s] 68%|██████▊ | 253021/371472 [9:36:28<9:54:35, 3.32it/s] 68%|██████▊ | 253022/371472 [9:36:29<9:42:56, 3.39it/s] 68%|██████▊ | 253023/371472 [9:36:29<10:07:50, 3.25it/s] 68%|██████▊ | 253024/371472 [9:36:29<10:14:28, 3.21it/s] 68%|██████▊ | 253025/371472 [9:36:30<10:19:39, 3.19it/s] 68%|██████▊ | 253026/371472 [9:36:30<10:05:30, 3.26it/s] 68%|██████▊ | 253027/371472 [9:36:30<10:15:26, 3.21it/s] 68%|██████▊ | 253028/371472 [9:36:31<10:32:18, 3.12it/s] 68%|██████▊ | 253029/371472 [9:36:31<10:15:55, 3.21it/s] 68%|██████▊ | 253030/371472 [9:36:31<10:06:38, 3.25it/s] 68%|██████▊ | 253031/371472 [9:36:32<9:58:12, 3.30it/s] 68%|██████▊ | 253032/371472 [9:36:32<9:52:53, 3.33it/s] 68%|██████▊ | 253033/371472 [9:36:32<9:42:57, 3.39it/s] 68%|██████▊ | 253034/371472 [9:36:32<9:48:52, 3.35it/s] 68%|██████▊ | 253035/371472 [9:36:33<9:32:19, 3.45it/s] 68%|██████▊ | 253036/371472 [9:36:33<9:47:17, 3.36it/s] 68%|██████▊ | 253037/371472 [9:36:33<9:40:32, 3.40it/s] 68%|██████▊ | 253038/371472 [9:36:34<10:12:13, 3.22it/s] 68%|██████▊ | 253039/371472 [9:36:34<9:50:45, 3.34it/s] 68%|██████▊ | 253040/371472 [9:36:34<10:23:46, 3.16it/s] {'loss': 2.5733, 'learning_rate': 3.870908659958197e-07, 'epoch': 10.9} + 68%|██████▊ | 253040/371472 [9:36:34<10:23:46, 3.16it/s] 68%|██████▊ | 253041/371472 [9:36:35<10:09:57, 3.24it/s] 68%|██████▊ | 253042/371472 [9:36:35<9:49:58, 3.35it/s] 68%|██████▊ | 253043/371472 [9:36:35<9:35:52, 3.43it/s] 68%|██████▊ | 253044/371472 [9:36:35<9:54:18, 3.32it/s] 68%|██████▊ | 253045/371472 [9:36:36<9:42:08, 3.39it/s] 68%|██████▊ | 253046/371472 [9:36:36<9:47:43, 3.36it/s] 68%|██████▊ | 253047/371472 [9:36:36<10:19:24, 3.19it/s] 68%|██████▊ | 253048/371472 [9:36:37<10:13:25, 3.22it/s] 68%|██████▊ | 253049/371472 [9:36:37<9:56:39, 3.31it/s] 68%|██████▊ | 253050/371472 [9:36:37<9:57:17, 3.30it/s] 68%|██████▊ | 253051/371472 [9:36:38<9:56:01, 3.31it/s] 68%|██████▊ | 253052/371472 [9:36:38<9:43:50, 3.38it/s] 68%|██████▊ | 253053/371472 [9:36:38<10:09:39, 3.24it/s] 68%|██████▊ | 253054/371472 [9:36:39<10:51:22, 3.03it/s] 68%|██████▊ | 253055/371472 [9:36:39<10:18:42, 3.19it/s] 68%|██████▊ | 253056/371472 [9:36:39<10:07:31, 3.25it/s] 68%|██████▊ | 253057/371472 [9:36:39<10:08:30, 3.24it/s] 68%|██████▊ | 253058/371472 [9:36:40<9:48:31, 3.35it/s] 68%|██████▊ | 253059/371472 [9:36:40<9:30:10, 3.46it/s] 68%|██████▊ | 253060/371472 [9:36:40<9:16:44, 3.54it/s] {'loss': 2.6319, 'learning_rate': 3.870423840203409e-07, 'epoch': 10.9} + 68%|██████▊ | 253060/371472 [9:36:40<9:16:44, 3.54it/s] 68%|██████▊ | 253061/371472 [9:36:41<9:23:36, 3.50it/s] 68%|██████▊ | 253062/371472 [9:36:41<9:25:42, 3.49it/s] 68%|██████▊ | 253063/371472 [9:36:41<9:40:12, 3.40it/s] 68%|██████▊ | 253064/371472 [9:36:41<9:41:12, 3.40it/s] 68%|██████▊ | 253065/371472 [9:36:42<9:27:24, 3.48it/s] 68%|██████▊ | 253066/371472 [9:36:42<9:46:12, 3.37it/s] 68%|██████▊ | 253067/371472 [9:36:42<10:49:01, 3.04it/s] 68%|██████▊ | 253068/371472 [9:36:43<10:39:48, 3.08it/s] 68%|██████▊ | 253069/371472 [9:36:43<10:16:30, 3.20it/s] 68%|██████▊ | 253070/371472 [9:36:43<10:02:53, 3.27it/s] 68%|██████▊ | 253071/371472 [9:36:44<9:53:13, 3.33it/s] 68%|██████▊ | 253072/371472 [9:36:44<9:47:00, 3.36it/s] 68%|██████▊ | 253073/371472 [9:36:44<10:21:53, 3.17it/s] 68%|██████▊ | 253074/371472 [9:36:45<10:07:13, 3.25it/s] 68%|██████▊ | 253075/371472 [9:36:45<10:01:01, 3.28it/s] 68%|██████▊ | 253076/371472 [9:36:45<9:54:49, 3.32it/s] 68%|██████▊ | 253077/371472 [9:36:45<10:18:20, 3.19it/s] 68%|██████▊ | 253078/371472 [9:36:46<10:14:17, 3.21it/s] 68%|██████▊ | 253079/371472 [9:36:46<9:55:11, 3.32it/s] 68%|██████▊ | 253080/371472 [9:36:46<9:45:09, 3.37it/s] {'loss': 2.6217, 'learning_rate': 3.8699390204486195e-07, 'epoch': 10.9} + 68%|██████▊ | 253080/371472 [9:36:46<9:45:09, 3.37it/s] 68%|██████▊ | 253081/371472 [9:36:47<9:43:16, 3.38it/s] 68%|██████▊ | 253082/371472 [9:36:47<9:45:28, 3.37it/s] 68%|██████▊ | 253083/371472 [9:36:47<9:39:31, 3.40it/s] 68%|██████▊ | 253084/371472 [9:36:48<10:04:19, 3.27it/s] 68%|██████▊ | 253085/371472 [9:36:48<10:06:35, 3.25it/s] 68%|██████▊ | 253086/371472 [9:36:48<10:29:31, 3.13it/s] 68%|██████▊ | 253087/371472 [9:36:48<10:16:49, 3.20it/s] 68%|██████▊ | 253088/371472 [9:36:49<9:58:21, 3.30it/s] 68%|██████▊ | 253089/371472 [9:36:49<9:53:44, 3.32it/s] 68%|██████▊ | 253090/371472 [9:36:49<9:33:17, 3.44it/s] 68%|██████▊ | 253091/371472 [9:36:50<10:02:20, 3.28it/s] 68%|██████▊ | 253092/371472 [9:36:50<10:25:51, 3.15it/s] 68%|██████▊ | 253093/371472 [9:36:50<10:04:30, 3.26it/s] 68%|██████▊ | 253094/371472 [9:36:51<10:03:57, 3.27it/s] 68%|██████▊ | 253095/371472 [9:36:51<10:51:51, 3.03it/s] 68%|██████▊ | 253096/371472 [9:36:51<10:47:10, 3.05it/s] 68%|██████▊ | 253097/371472 [9:36:52<10:39:20, 3.09it/s] 68%|██████▊ | 253098/371472 [9:36:52<10:05:50, 3.26it/s] 68%|██████▊ | 253099/371472 [9:36:52<10:08:11, 3.24it/s] 68%|██████▊ | 253100/371472 [9:36:53<9:54:58, 3.32it/s] {'loss': 2.7137, 'learning_rate': 3.869454200693831e-07, 'epoch': 10.9} + 68%|██████▊ | 253100/371472 [9:36:53<9:54:58, 3.32it/s] 68%|██████▊ | 253101/371472 [9:36:53<9:58:15, 3.30it/s] 68%|██████▊ | 253102/371472 [9:36:53<9:54:25, 3.32it/s] 68%|██████▊ | 253103/371472 [9:36:53<9:36:23, 3.42it/s] 68%|██████▊ | 253104/371472 [9:36:54<9:41:41, 3.39it/s] 68%|██████▊ | 253105/371472 [9:36:54<9:44:06, 3.38it/s] 68%|██████▊ | 253106/371472 [9:36:54<9:40:31, 3.40it/s] 68%|██████▊ | 253107/371472 [9:36:55<9:34:34, 3.43it/s] 68%|██████▊ | 253108/371472 [9:36:55<9:48:53, 3.35it/s] 68%|██████▊ | 253109/371472 [9:36:55<9:47:53, 3.36it/s] 68%|██████▊ | 253110/371472 [9:36:55<9:36:34, 3.42it/s] 68%|██████▊ | 253111/371472 [9:36:56<9:42:31, 3.39it/s] 68%|██████▊ | 253112/371472 [9:36:56<9:50:36, 3.34it/s] 68%|██████▊ | 253113/371472 [9:36:56<9:50:52, 3.34it/s] 68%|██████▊ | 253114/371472 [9:36:57<9:36:42, 3.42it/s] 68%|██████▊ | 253115/371472 [9:36:57<9:42:12, 3.39it/s] 68%|██████▊ | 253116/371472 [9:36:57<9:41:29, 3.39it/s] 68%|██████▊ | 253117/371472 [9:36:58<9:51:51, 3.33it/s] 68%|██████▊ | 253118/371472 [9:36:58<9:51:36, 3.33it/s] 68%|██████▊ | 253119/371472 [9:36:58<9:45:42, 3.37it/s] 68%|██████▊ | 253120/371472 [9:36:58<9:45:16, 3.37it/s] {'loss': 2.6858, 'learning_rate': 3.8689693809390415e-07, 'epoch': 10.9} + 68%|██████▊ | 253120/371472 [9:36:58<9:45:16, 3.37it/s] 68%|██████▊ | 253121/371472 [9:36:59<9:46:44, 3.36it/s] 68%|██████▊ | 253122/371472 [9:36:59<9:37:51, 3.41it/s] 68%|██████▊ | 253123/371472 [9:36:59<9:35:12, 3.43it/s] 68%|██████▊ | 253124/371472 [9:37:00<10:05:48, 3.26it/s] 68%|██████▊ | 253125/371472 [9:37:00<9:54:33, 3.32it/s] 68%|██████▊ | 253126/371472 [9:37:00<9:38:56, 3.41it/s] 68%|██████▊ | 253127/371472 [9:37:01<10:06:16, 3.25it/s] 68%|██████▊ | 253128/371472 [9:37:01<10:41:47, 3.07it/s] 68%|██████▊ | 253129/371472 [9:37:01<10:31:39, 3.12it/s] 68%|██████▊ | 253130/371472 [9:37:02<10:16:53, 3.20it/s] 68%|██████▊ | 253131/371472 [9:37:02<10:11:35, 3.22it/s] 68%|██████▊ | 253132/371472 [9:37:02<10:02:55, 3.27it/s] 68%|██████▊ | 253133/371472 [9:37:02<9:47:11, 3.36it/s] 68%|██████▊ | 253134/371472 [9:37:03<9:49:22, 3.35it/s] 68%|██████▊ | 253135/371472 [9:37:03<10:29:42, 3.13it/s] 68%|██████▊ | 253136/371472 [9:37:03<10:16:27, 3.20it/s] 68%|██████▊ | 253137/371472 [9:37:04<9:54:02, 3.32it/s] 68%|██████▊ | 253138/371472 [9:37:04<9:54:38, 3.32it/s] 68%|██████▊ | 253139/371472 [9:37:04<10:38:39, 3.09it/s] 68%|██████▊ | 253140/371472 [9:37:05<10:17:23, 3.19it/s] {'loss': 2.5383, 'learning_rate': 3.8684845611842533e-07, 'epoch': 10.9} + 68%|██████▊ | 253140/371472 [9:37:05<10:17:23, 3.19it/s] 68%|██████▊ | 253141/371472 [9:37:05<9:50:26, 3.34it/s] 68%|██████▊ | 253142/371472 [9:37:05<9:40:45, 3.40it/s] 68%|██████▊ | 253143/371472 [9:37:05<9:35:47, 3.43it/s] 68%|██████▊ | 253144/371472 [9:37:06<9:42:04, 3.39it/s] 68%|██████▊ | 253145/371472 [9:37:06<9:33:58, 3.44it/s] 68%|██████▊ | 253146/371472 [9:37:06<9:34:25, 3.43it/s] 68%|██████▊ | 253147/371472 [9:37:07<9:40:05, 3.40it/s] 68%|██████▊ | 253148/371472 [9:37:07<9:48:30, 3.35it/s] 68%|██████▊ | 253149/371472 [9:37:07<9:43:42, 3.38it/s] 68%|██████▊ | 253150/371472 [9:37:08<10:02:54, 3.27it/s] 68%|██████▊ | 253151/371472 [9:37:08<10:29:29, 3.13it/s] 68%|██████▊ | 253152/371472 [9:37:08<10:38:13, 3.09it/s] 68%|██████▊ | 253153/371472 [9:37:08<10:12:32, 3.22it/s] 68%|██████▊ | 253154/371472 [9:37:09<10:11:31, 3.22it/s] 68%|██████▊ | 253155/371472 [9:37:09<9:55:53, 3.31it/s] 68%|██████▊ | 253156/371472 [9:37:09<10:38:32, 3.09it/s] 68%|██████▊ | 253157/371472 [9:37:10<10:52:45, 3.02it/s] 68%|██████▊ | 253158/371472 [9:37:10<11:11:48, 2.94it/s] 68%|██████▊ | 253159/371472 [9:37:11<11:04:51, 2.97it/s] 68%|██████▊ | 253160/371472 [9:37:11<10:32:11, 3.12it/s] {'loss': 2.6669, 'learning_rate': 3.8679997414294635e-07, 'epoch': 10.9} + 68%|██████▊ | 253160/371472 [9:37:11<10:32:11, 3.12it/s] 68%|██████▊ | 253161/371472 [9:37:11<10:14:53, 3.21it/s] 68%|██████▊ | 253162/371472 [9:37:11<10:42:20, 3.07it/s] 68%|██████▊ | 253163/371472 [9:37:12<10:43:09, 3.07it/s] 68%|██████▊ | 253164/371472 [9:37:12<10:16:13, 3.20it/s] 68%|██████▊ | 253165/371472 [9:37:12<10:01:51, 3.28it/s] 68%|██████▊ | 253166/371472 [9:37:13<9:43:46, 3.38it/s] 68%|██████▊ | 253167/371472 [9:37:13<9:27:07, 3.48it/s] 68%|██████▊ | 253168/371472 [9:37:13<9:21:02, 3.51it/s] 68%|██████▊ | 253169/371472 [9:37:13<9:37:55, 3.41it/s] 68%|██████▊ | 253170/371472 [9:37:14<9:39:02, 3.41it/s] 68%|██████▊ | 253171/371472 [9:37:14<9:51:11, 3.34it/s] 68%|██████▊ | 253172/371472 [9:37:14<9:40:31, 3.40it/s] 68%|██████▊ | 253173/371472 [9:37:15<9:42:27, 3.39it/s] 68%|██████▊ | 253174/371472 [9:37:15<9:55:44, 3.31it/s] 68%|██████▊ | 253175/371472 [9:37:15<9:42:52, 3.38it/s] 68%|██████▊ | 253176/371472 [9:37:16<9:43:59, 3.38it/s] 68%|██████▊ | 253177/371472 [9:37:16<9:36:35, 3.42it/s] 68%|██████▊ | 253178/371472 [9:37:16<10:42:30, 3.07it/s] 68%|██████▊ | 253179/371472 [9:37:17<10:44:40, 3.06it/s] 68%|██████▊ | 253180/371472 [9:37:17<10:18:07, 3.19it/s] {'loss': 2.7556, 'learning_rate': 3.8675149216746747e-07, 'epoch': 10.9} + 68%|██████▊ | 253180/371472 [9:37:17<10:18:07, 3.19it/s] 68%|██████▊ | 253181/371472 [9:37:17<9:55:24, 3.31it/s] 68%|██████▊ | 253182/371472 [9:37:17<10:15:53, 3.20it/s] 68%|██████▊ | 253183/371472 [9:37:18<10:00:54, 3.28it/s] 68%|██████▊ | 253184/371472 [9:37:18<10:04:34, 3.26it/s] 68%|██████▊ | 253185/371472 [9:37:18<9:50:23, 3.34it/s] 68%|██████▊ | 253186/371472 [9:37:19<9:41:09, 3.39it/s] 68%|██████▊ | 253187/371472 [9:37:19<10:21:54, 3.17it/s] 68%|██████▊ | 253188/371472 [9:37:19<10:17:03, 3.19it/s] 68%|██████▊ | 253189/371472 [9:37:20<10:11:41, 3.22it/s] 68%|██████▊ | 253190/371472 [9:37:20<10:19:41, 3.18it/s] 68%|██████▊ | 253191/371472 [9:37:20<10:04:45, 3.26it/s] 68%|██████▊ | 253192/371472 [9:37:21<9:54:30, 3.32it/s] 68%|██████▊ | 253193/371472 [9:37:21<9:34:31, 3.43it/s] 68%|██████▊ | 253194/371472 [9:37:21<9:50:39, 3.34it/s] 68%|██████▊ | 253195/371472 [9:37:21<9:41:02, 3.39it/s] 68%|██████▊ | 253196/371472 [9:37:22<10:06:51, 3.25it/s] 68%|██████▊ | 253197/371472 [9:37:22<10:02:58, 3.27it/s] 68%|██████▊ | 253198/371472 [9:37:22<10:00:29, 3.28it/s] 68%|██████▊ | 253199/371472 [9:37:23<9:57:37, 3.30it/s] 68%|██████▊ | 253200/371472 [9:37:23<9:45:22, 3.37it/s] {'loss': 2.7454, 'learning_rate': 3.867030101919886e-07, 'epoch': 10.91} + 68%|██████▊ | 253200/371472 [9:37:23<9:45:22, 3.37it/s] 68%|██████▊ | 253201/371472 [9:37:23<9:39:15, 3.40it/s] 68%|��█████▊ | 253202/371472 [9:37:23<9:39:15, 3.40it/s] 68%|██████▊ | 253203/371472 [9:37:24<9:32:21, 3.44it/s] 68%|██████▊ | 253204/371472 [9:37:24<9:25:54, 3.48it/s] 68%|██████▊ | 253205/371472 [9:37:24<9:27:14, 3.47it/s] 68%|██████▊ | 253206/371472 [9:37:25<9:28:47, 3.47it/s] 68%|██████▊ | 253207/371472 [9:37:25<9:45:25, 3.37it/s] 68%|██████▊ | 253208/371472 [9:37:25<9:40:05, 3.40it/s] 68%|██████▊ | 253209/371472 [9:37:26<9:46:45, 3.36it/s] 68%|██████▊ | 253210/371472 [9:37:26<9:36:28, 3.42it/s] 68%|██████▊ | 253211/371472 [9:37:26<9:39:38, 3.40it/s] 68%|██████▊ | 253212/371472 [9:37:26<9:47:28, 3.36it/s] 68%|██████▊ | 253213/371472 [9:37:27<9:42:10, 3.39it/s] 68%|██████▊ | 253214/371472 [9:37:27<9:51:22, 3.33it/s] 68%|██████▊ | 253215/371472 [9:37:27<9:49:36, 3.34it/s] 68%|██████▊ | 253216/371472 [9:37:28<9:57:51, 3.30it/s] 68%|██████▊ | 253217/371472 [9:37:28<9:48:06, 3.35it/s] 68%|██████▊ | 253218/371472 [9:37:28<9:37:48, 3.41it/s] 68%|██████▊ | 253219/371472 [9:37:29<9:57:54, 3.30it/s] 68%|██████▊ | 253220/371472 [9:37:29<9:46:28, 3.36it/s] {'loss': 2.6015, 'learning_rate': 3.866545282165097e-07, 'epoch': 10.91} + 68%|██████▊ | 253220/371472 [9:37:29<9:46:28, 3.36it/s] 68%|██████▊ | 253221/371472 [9:37:29<11:03:43, 2.97it/s] 68%|██████▊ | 253222/371472 [9:37:30<10:27:06, 3.14it/s] 68%|██████▊ | 253223/371472 [9:37:30<10:12:23, 3.22it/s] 68%|██████▊ | 253224/371472 [9:37:30<9:52:07, 3.33it/s] 68%|██████▊ | 253225/371472 [9:37:30<9:58:38, 3.29it/s] 68%|██████▊ | 253226/371472 [9:37:31<10:03:30, 3.27it/s] 68%|██████▊ | 253227/371472 [9:37:31<10:03:55, 3.26it/s] 68%|██████▊ | 253228/371472 [9:37:31<10:02:00, 3.27it/s] 68%|██████▊ | 253229/371472 [9:37:32<9:39:27, 3.40it/s] 68%|██████▊ | 253230/371472 [9:37:32<9:49:28, 3.34it/s] 68%|██████▊ | 253231/371472 [9:37:32<9:43:38, 3.38it/s] 68%|██████▊ | 253232/371472 [9:37:32<9:46:23, 3.36it/s] 68%|██████▊ | 253233/371472 [9:37:33<10:38:56, 3.08it/s] 68%|██████▊ | 253234/371472 [9:37:33<10:26:27, 3.15it/s] 68%|██████▊ | 253235/371472 [9:37:33<10:01:43, 3.27it/s] 68%|██████▊ | 253236/371472 [9:37:34<9:46:19, 3.36it/s] 68%|██████▊ | 253237/371472 [9:37:34<9:30:52, 3.45it/s] 68%|██████▊ | 253238/371472 [9:37:34<9:21:58, 3.51it/s] 68%|██████▊ | 253239/371472 [9:37:35<9:29:42, 3.46it/s] 68%|██████▊ | 253240/371472 [9:37:35<9:24:00, 3.49it/s] {'loss': 2.7307, 'learning_rate': 3.866060462410308e-07, 'epoch': 10.91} + 68%|██████▊ | 253240/371472 [9:37:35<9:24:00, 3.49it/s] 68%|██████▊ | 253241/371472 [9:37:35<9:21:40, 3.51it/s] 68%|██████▊ | 253242/371472 [9:37:35<9:14:56, 3.55it/s] 68%|██████▊ | 253243/371472 [9:37:36<9:24:36, 3.49it/s] 68%|██████▊ | 253244/371472 [9:37:36<9:16:52, 3.54it/s] 68%|██████▊ | 253245/371472 [9:37:36<9:26:08, 3.48it/s] 68%|██████▊ | 253246/371472 [9:37:37<9:34:08, 3.43it/s] 68%|██████▊ | 253247/371472 [9:37:37<9:30:45, 3.45it/s] 68%|██████▊ | 253248/371472 [9:37:37<9:36:13, 3.42it/s] 68%|██████▊ | 253249/371472 [9:37:37<9:41:41, 3.39it/s] 68%|██████▊ | 253250/371472 [9:37:38<10:24:42, 3.15it/s] 68%|██████▊ | 253251/371472 [9:37:38<9:56:12, 3.30it/s] 68%|██████▊ | 253252/371472 [9:37:38<9:49:04, 3.34it/s] 68%|██████▊ | 253253/371472 [9:37:39<10:08:05, 3.24it/s] 68%|██████▊ | 253254/371472 [9:37:39<9:54:36, 3.31it/s] 68%|██████▊ | 253255/371472 [9:37:39<10:11:07, 3.22it/s] 68%|██████▊ | 253256/371472 [9:37:40<10:14:30, 3.21it/s] 68%|██████▊ | 253257/371472 [9:37:40<10:17:23, 3.19it/s] 68%|██████▊ | 253258/371472 [9:37:40<10:00:55, 3.28it/s] 68%|██████▊ | 253259/371472 [9:37:41<9:40:06, 3.40it/s] 68%|██████▊ | 253260/371472 [9:37:41<9:51:40, 3.33it/s] {'loss': 2.7609, 'learning_rate': 3.8655756426555197e-07, 'epoch': 10.91} + 68%|██████▊ | 253260/371472 [9:37:41<9:51:40, 3.33it/s] 68%|██████▊ | 253261/371472 [9:37:41<10:01:13, 3.28it/s] 68%|██████▊ | 253262/371472 [9:37:42<10:27:00, 3.14it/s] 68%|██████▊ | 253263/371472 [9:37:42<10:38:58, 3.08it/s] 68%|██████▊ | 253264/371472 [9:37:42<10:31:56, 3.12it/s] 68%|██████▊ | 253265/371472 [9:37:42<10:11:39, 3.22it/s] 68%|██████▊ | 253266/371472 [9:37:43<10:06:53, 3.25it/s] 68%|██████▊ | 253267/371472 [9:37:43<9:49:06, 3.34it/s] 68%|██████▊ | 253268/371472 [9:37:43<9:39:56, 3.40it/s] 68%|██████▊ | 253269/371472 [9:37:44<10:38:05, 3.09it/s] 68%|██████▊ | 253270/371472 [9:37:44<10:12:46, 3.21it/s] 68%|██████▊ | 253271/371472 [9:37:44<10:09:09, 3.23it/s] 68%|██████▊ | 253272/371472 [9:37:45<10:09:37, 3.23it/s] 68%|██████▊ | 253273/371472 [9:37:45<9:48:43, 3.35it/s] 68%|██████▊ | 253274/371472 [9:37:45<9:35:10, 3.42it/s] 68%|██████▊ | 253275/371472 [9:37:45<9:38:38, 3.40it/s] 68%|██████▊ | 253276/371472 [9:37:46<9:34:24, 3.43it/s] 68%|██████▊ | 253277/371472 [9:37:46<9:38:14, 3.41it/s] 68%|██████▊ | 253278/371472 [9:37:46<9:30:49, 3.45it/s] 68%|██████▊ | 253279/371472 [9:37:47<10:28:05, 3.14it/s] 68%|██████▊ | 253280/371472 [9:37:47<10:09:54, 3.23it/s] {'loss': 2.6122, 'learning_rate': 3.8650908229007304e-07, 'epoch': 10.91} + 68%|██████▊ | 253280/371472 [9:37:47<10:09:54, 3.23it/s] 68%|██████▊ | 253281/371472 [9:37:47<10:11:44, 3.22it/s] 68%|██████▊ | 253282/371472 [9:37:48<10:29:11, 3.13it/s] 68%|██████▊ | 253283/371472 [9:37:48<10:13:45, 3.21it/s] 68%|██████▊ | 253284/371472 [9:37:48<10:01:22, 3.28it/s] 68%|██████▊ | 253285/371472 [9:37:49<10:09:18, 3.23it/s] 68%|██████▊ | 253286/371472 [9:37:49<9:59:33, 3.29it/s] 68%|██████▊ | 253287/371472 [9:37:49<9:54:30, 3.31it/s] 68%|██████▊ | 253288/371472 [9:37:49<9:54:26, 3.31it/s] 68%|██████▊ | 253289/371472 [9:37:50<10:35:00, 3.10it/s] 68%|██████▊ | 253290/371472 [9:37:50<10:22:15, 3.17it/s] 68%|██████▊ | 253291/371472 [9:37:50<9:58:24, 3.29it/s] 68%|██████▊ | 253292/371472 [9:37:51<9:51:23, 3.33it/s] 68%|██████▊ | 253293/371472 [9:37:51<9:42:04, 3.38it/s] 68%|██████▊ | 253294/371472 [9:37:51<9:28:29, 3.46it/s] 68%|██████▊ | 253295/371472 [9:37:52<9:59:41, 3.28it/s] 68%|██████▊ | 253296/371472 [9:37:52<9:54:52, 3.31it/s] 68%|██████▊ | 253297/371472 [9:37:52<9:42:40, 3.38it/s] 68%|██████▊ | 253298/371472 [9:37:52<9:58:52, 3.29it/s] 68%|██████▊ | 253299/371472 [9:37:53<10:00:11, 3.28it/s] 68%|██████▊ | 253300/371472 [9:37:53<9:47:25, 3.35it/s] {'loss': 2.6757, 'learning_rate': 3.8646060031459416e-07, 'epoch': 10.91} + 68%|██████▊ | 253300/371472 [9:37:53<9:47:25, 3.35it/s] 68%|██████▊ | 253301/371472 [9:37:53<9:45:06, 3.37it/s] 68%|██████▊ | 253302/371472 [9:37:54<9:38:29, 3.40it/s] 68%|██████▊ | 253303/371472 [9:37:54<9:30:08, 3.45it/s] 68%|██████▊ | 253304/371472 [9:37:54<9:23:10, 3.50it/s] 68%|██████▊ | 253305/371472 [9:37:54<9:21:16, 3.51it/s] 68%|██████▊ | 253306/371472 [9:37:55<9:50:38, 3.33it/s] 68%|██████▊ | 253307/371472 [9:37:55<9:35:41, 3.42it/s] 68%|██████▊ | 253308/371472 [9:37:55<9:21:34, 3.51it/s] 68%|██████▊ | 253309/371472 [9:37:56<9:34:14, 3.43it/s] 68%|██████▊ | 253310/371472 [9:37:56<9:31:56, 3.44it/s] 68%|██████▊ | 253311/371472 [9:37:56<9:54:14, 3.31it/s] 68%|██████▊ | 253312/371472 [9:37:57<9:33:57, 3.43it/s] 68%|██████▊ | 253313/371472 [9:37:57<9:25:24, 3.48it/s] 68%|██████▊ | 253314/371472 [9:37:57<9:46:10, 3.36it/s] 68%|██████▊ | 253315/371472 [9:37:57<9:52:05, 3.33it/s] 68%|██████▊ | 253316/371472 [9:37:58<9:41:08, 3.39it/s] 68%|██████▊ | 253317/371472 [9:37:58<9:32:42, 3.44it/s] 68%|██████▊ | 253318/371472 [9:37:58<9:45:33, 3.36it/s] 68%|██████▊ | 253319/371472 [9:37:59<9:51:22, 3.33it/s] 68%|██████▊ | 253320/371472 [9:37:59<9:44:15, 3.37it/s] {'loss': 2.6429, 'learning_rate': 3.8641211833911524e-07, 'epoch': 10.91} + 68%|██████▊ | 253320/371472 [9:37:59<9:44:15, 3.37it/s] 68%|██████▊ | 253321/371472 [9:37:59<9:43:33, 3.37it/s] 68%|██████▊ | 253322/371472 [9:38:00<10:12:32, 3.21it/s] 68%|██████▊ | 253323/371472 [9:38:00<10:00:32, 3.28it/s] 68%|██████▊ | 253324/371472 [9:38:00<10:40:59, 3.07it/s] 68%|██████▊ | 253325/371472 [9:38:01<10:36:02, 3.10it/s] 68%|██████▊ | 253326/371472 [9:38:01<10:16:51, 3.19it/s] 68%|██████▊ | 253327/371472 [9:38:01<10:29:22, 3.13it/s] 68%|██████▊ | 253328/371472 [9:38:01<10:14:52, 3.20it/s] 68%|██████▊ | 253329/371472 [9:38:02<9:47:58, 3.35it/s] 68%|██████▊ | 253330/371472 [9:38:02<9:39:19, 3.40it/s] 68%|██████▊ | 253331/371472 [9:38:02<9:56:24, 3.30it/s] 68%|██████▊ | 253332/371472 [9:38:03<9:56:20, 3.30it/s] 68%|██████▊ | 253333/371472 [9:38:03<9:39:20, 3.40it/s] 68%|██████▊ | 253334/371472 [9:38:03<9:40:12, 3.39it/s] 68%|██████▊ | 253335/371472 [9:38:04<9:52:04, 3.33it/s] 68%|██████▊ | 253336/371472 [9:38:04<9:48:00, 3.35it/s] 68%|██████▊ | 253337/371472 [9:38:04<9:34:40, 3.43it/s] 68%|██████▊ | 253338/371472 [9:38:04<9:29:24, 3.46it/s] 68%|██████▊ | 253339/371472 [9:38:05<9:56:39, 3.30it/s] 68%|██████▊ | 253340/371472 [9:38:05<9:49:54, 3.34it/s] {'loss': 2.7088, 'learning_rate': 3.863636363636364e-07, 'epoch': 10.91} + 68%|██████▊ | 253340/371472 [9:38:05<9:49:54, 3.34it/s] 68%|██████▊ | 253341/371472 [9:38:05<9:54:37, 3.31it/s] 68%|██████▊ | 253342/371472 [9:38:06<9:36:42, 3.41it/s] 68%|██████▊ | 253343/371472 [9:38:06<9:39:16, 3.40it/s] 68%|██████▊ | 253344/371472 [9:38:06<9:41:50, 3.38it/s] 68%|██████▊ | 253345/371472 [9:38:06<9:33:22, 3.43it/s] 68%|██████▊ | 253346/371472 [9:38:07<9:25:06, 3.48it/s] 68%|██████▊ | 253347/371472 [9:38:07<9:27:00, 3.47it/s] 68%|██████▊ | 253348/371472 [9:38:07<9:24:35, 3.49it/s] 68%|██████▊ | 253349/371472 [9:38:08<9:47:37, 3.35it/s] 68%|██████▊ | 253350/371472 [9:38:08<9:28:38, 3.46it/s] 68%|██████▊ | 253351/371472 [9:38:08<9:31:55, 3.44it/s] 68%|██████▊ | 253352/371472 [9:38:08<9:31:54, 3.44it/s] 68%|██████▊ | 253353/371472 [9:38:09<9:41:08, 3.39it/s] 68%|██████▊ | 253354/371472 [9:38:09<9:46:39, 3.36it/s] 68%|██████▊ | 253355/371472 [9:38:09<9:36:03, 3.42it/s] 68%|██████▊ | 253356/371472 [9:38:10<10:01:23, 3.27it/s] 68%|██████▊ | 253357/371472 [9:38:10<10:17:30, 3.19it/s] 68%|██████▊ | 253358/371472 [9:38:10<10:38:46, 3.08it/s] 68%|██████▊ | 253359/371472 [9:38:11<10:44:19, 3.06it/s] 68%|██████▊ | 253360/371472 [9:38:11<10:07:09, 3.24it/s] {'loss': 2.628, 'learning_rate': 3.8631515438815743e-07, 'epoch': 10.91} + 68%|██████▊ | 253360/371472 [9:38:11<10:07:09, 3.24it/s] 68%|██████▊ | 253361/371472 [9:38:11<9:59:58, 3.28it/s] 68%|██████▊ | 253362/371472 [9:38:12<10:06:47, 3.24it/s] 68%|██████▊ | 253363/371472 [9:38:12<9:43:59, 3.37it/s] 68%|██████▊ | 253364/371472 [9:38:12<9:36:13, 3.42it/s] 68%|██████▊ | 253365/371472 [9:38:12<10:00:35, 3.28it/s] 68%|██████▊ | 253366/371472 [9:38:13<10:55:45, 3.00it/s] 68%|██████▊ | 253367/371472 [9:38:13<11:10:26, 2.94it/s] 68%|██████▊ | 253368/371472 [9:38:14<11:30:02, 2.85it/s] 68%|██████▊ | 253369/371472 [9:38:14<11:35:07, 2.83it/s] 68%|██████▊ | 253370/371472 [9:38:14<10:58:06, 2.99it/s] 68%|██████▊ | 253371/371472 [9:38:15<11:01:34, 2.98it/s] 68%|██████▊ | 253372/371472 [9:38:15<11:00:38, 2.98it/s] 68%|██████▊ | 253373/371472 [9:38:15<10:24:25, 3.15it/s] 68%|██████▊ | 253374/371472 [9:38:15<9:57:14, 3.30it/s] 68%|██████▊ | 253375/371472 [9:38:16<10:26:42, 3.14it/s] 68%|██████▊ | 253376/371472 [9:38:16<10:09:13, 3.23it/s] 68%|██████▊ | 253377/371472 [9:38:16<10:32:32, 3.11it/s] 68%|██████▊ | 253378/371472 [9:38:17<10:26:41, 3.14it/s] 68%|██████▊ | 253379/371472 [9:38:17<10:05:25, 3.25it/s] 68%|██████▊ | 253380/371472 [9:38:17<9:59:29, 3.28it/s] {'loss': 2.842, 'learning_rate': 3.862666724126786e-07, 'epoch': 10.91} + 68%|██████▊ | 253380/371472 [9:38:17<9:59:29, 3.28it/s] 68%|██████▊ | 253381/371472 [9:38:18<9:59:24, 3.28it/s] 68%|██████▊ | 253382/371472 [9:38:18<10:18:22, 3.18it/s] 68%|���█████▊ | 253383/371472 [9:38:18<10:10:17, 3.22it/s] 68%|██████▊ | 253384/371472 [9:38:19<10:19:47, 3.18it/s] 68%|██████▊ | 253385/371472 [9:38:19<10:07:32, 3.24it/s] 68%|██████▊ | 253386/371472 [9:38:19<9:50:26, 3.33it/s] 68%|██████▊ | 253387/371472 [9:38:19<9:31:32, 3.44it/s] 68%|██████▊ | 253388/371472 [9:38:20<9:26:32, 3.47it/s] 68%|██████▊ | 253389/371472 [9:38:20<9:28:20, 3.46it/s] 68%|██████▊ | 253390/371472 [9:38:20<9:31:26, 3.44it/s] 68%|██████▊ | 253391/371472 [9:38:21<9:29:56, 3.45it/s] 68%|██████▊ | 253392/371472 [9:38:21<9:19:52, 3.52it/s] 68%|██████▊ | 253393/371472 [9:38:21<9:34:08, 3.43it/s] 68%|██████▊ | 253394/371472 [9:38:22<9:46:05, 3.36it/s] 68%|██████▊ | 253395/371472 [9:38:22<9:41:41, 3.38it/s] 68%|██████▊ | 253396/371472 [9:38:22<9:32:57, 3.43it/s] 68%|██████▊ | 253397/371472 [9:38:22<9:49:06, 3.34it/s] 68%|██████▊ | 253398/371472 [9:38:23<9:35:34, 3.42it/s] 68%|██████▊ | 253399/371472 [9:38:23<9:35:11, 3.42it/s] 68%|██████▊ | 253400/371472 [9:38:23<10:02:27, 3.27it/s] {'loss': 2.764, 'learning_rate': 3.862181904371997e-07, 'epoch': 10.91} + 68%|██████▊ | 253400/371472 [9:38:23<10:02:27, 3.27it/s] 68%|██████▊ | 253401/371472 [9:38:24<9:54:09, 3.31it/s] 68%|██████▊ | 253402/371472 [9:38:24<9:49:39, 3.34it/s] 68%|██████▊ | 253403/371472 [9:38:24<9:42:13, 3.38it/s] 68%|██████▊ | 253404/371472 [9:38:25<9:41:52, 3.38it/s] 68%|██████▊ | 253405/371472 [9:38:25<11:06:42, 2.95it/s] 68%|██████▊ | 253406/371472 [9:38:25<10:37:27, 3.09it/s] 68%|██████▊ | 253407/371472 [9:38:26<10:12:51, 3.21it/s] 68%|██████▊ | 253408/371472 [9:38:26<10:00:54, 3.27it/s] 68%|██████▊ | 253409/371472 [9:38:26<9:38:07, 3.40it/s] 68%|██████▊ | 253410/371472 [9:38:26<9:50:17, 3.33it/s] 68%|██████▊ | 253411/371472 [9:38:27<10:36:59, 3.09it/s] 68%|██████▊ | 253412/371472 [9:38:27<10:12:47, 3.21it/s] 68%|██████▊ | 253413/371472 [9:38:27<9:58:37, 3.29it/s] 68%|██████▊ | 253414/371472 [9:38:28<9:59:47, 3.28it/s] 68%|██████▊ | 253415/371472 [9:38:28<9:50:38, 3.33it/s] 68%|██████▊ | 253416/371472 [9:38:28<9:40:22, 3.39it/s] 68%|██████▊ | 253417/371472 [9:38:29<9:48:28, 3.34it/s] 68%|██████▊ | 253418/371472 [9:38:29<9:41:46, 3.38it/s] 68%|██████▊ | 253419/371472 [9:38:29<9:39:06, 3.40it/s] 68%|██████▊ | 253420/371472 [9:38:29<9:42:00, 3.38it/s] {'loss': 2.6701, 'learning_rate': 3.861697084617207e-07, 'epoch': 10.92} + 68%|██████▊ | 253420/371472 [9:38:29<9:42:00, 3.38it/s] 68%|██████▊ | 253421/371472 [9:38:30<9:50:40, 3.33it/s] 68%|██████▊ | 253422/371472 [9:38:30<9:56:46, 3.30it/s] 68%|██████▊ | 253423/371472 [9:38:30<9:49:11, 3.34it/s] 68%|██████▊ | 253424/371472 [9:38:31<9:54:42, 3.31it/s] 68%|██████▊ | 253425/371472 [9:38:31<9:55:04, 3.31it/s] 68%|██████▊ | 253426/371472 [9:38:31<9:42:03, 3.38it/s] 68%|██████▊ | 253427/371472 [9:38:32<9:50:23, 3.33it/s] 68%|██████▊ | 253428/371472 [9:38:32<9:41:03, 3.39it/s] 68%|██████▊ | 253429/371472 [9:38:32<9:42:18, 3.38it/s] 68%|██████▊ | 253430/371472 [9:38:32<9:56:09, 3.30it/s] 68%|██████▊ | 253431/371472 [9:38:33<9:34:25, 3.42it/s] 68%|██████▊ | 253432/371472 [9:38:33<9:48:52, 3.34it/s] 68%|██████▊ | 253433/371472 [9:38:33<9:32:58, 3.43it/s] 68%|██████▊ | 253434/371472 [9:38:34<9:31:50, 3.44it/s] 68%|██████▊ | 253435/371472 [9:38:34<9:21:19, 3.50it/s] 68%|██████▊ | 253436/371472 [9:38:34<9:22:54, 3.49it/s] 68%|██████▊ | 253437/371472 [9:38:34<9:23:22, 3.49it/s] 68%|██████▊ | 253438/371472 [9:38:35<9:34:13, 3.43it/s] 68%|██████▊ | 253439/371472 [9:38:35<10:13:47, 3.20it/s] 68%|██████▊ | 253440/371472 [9:38:35<10:15:17, 3.20it/s] {'loss': 2.7695, 'learning_rate': 3.861212264862419e-07, 'epoch': 10.92} + 68%|██████▊ | 253440/371472 [9:38:35<10:15:17, 3.20it/s] 68%|██████▊ | 253441/371472 [9:38:36<9:54:30, 3.31it/s] 68%|██████▊ | 253442/371472 [9:38:36<9:40:04, 3.39it/s] 68%|██████▊ | 253443/371472 [9:38:36<9:33:27, 3.43it/s] 68%|██████▊ | 253444/371472 [9:38:37<9:26:30, 3.47it/s] 68%|██████▊ | 253445/371472 [9:38:37<9:14:15, 3.55it/s] 68%|██████▊ | 253446/371472 [9:38:37<9:21:03, 3.51it/s] 68%|██████▊ | 253447/371472 [9:38:37<9:39:00, 3.40it/s] 68%|██████▊ | 253448/371472 [9:38:38<9:30:34, 3.45it/s] 68%|██████▊ | 253449/371472 [9:38:38<9:35:43, 3.42it/s] 68%|██████▊ | 253450/371472 [9:38:38<9:26:41, 3.47it/s] 68%|██████▊ | 253451/371472 [9:38:39<9:50:12, 3.33it/s] 68%|██████▊ | 253452/371472 [9:38:39<9:48:08, 3.34it/s] 68%|██████▊ | 253453/371472 [9:38:39<9:53:25, 3.31it/s] 68%|██████▊ | 253454/371472 [9:38:39<9:38:42, 3.40it/s] 68%|██████▊ | 253455/371472 [9:38:40<9:25:45, 3.48it/s] 68%|██████▊ | 253456/371472 [9:38:40<9:23:04, 3.49it/s] 68%|██████▊ | 253457/371472 [9:38:40<9:33:51, 3.43it/s] 68%|██████▊ | 253458/371472 [9:38:41<9:41:11, 3.38it/s] 68%|██████▊ | 253459/371472 [9:38:41<9:41:51, 3.38it/s] 68%|██████▊ | 253460/371472 [9:38:41<9:50:22, 3.33it/s] {'loss': 2.8627, 'learning_rate': 3.8607274451076295e-07, 'epoch': 10.92} + 68%|██████▊ | 253460/371472 [9:38:41<9:50:22, 3.33it/s] 68%|██████▊ | 253461/371472 [9:38:41<9:37:47, 3.40it/s] 68%|██████▊ | 253462/371472 [9:38:42<9:19:20, 3.52it/s] 68%|██████▊ | 253463/371472 [9:38:42<9:21:49, 3.50it/s] 68%|██████▊ | 253464/371472 [9:38:42<9:38:21, 3.40it/s] 68%|██████▊ | 253465/371472 [9:38:43<9:30:31, 3.45it/s] 68%|██████▊ | 253466/371472 [9:38:43<9:25:48, 3.48it/s] 68%|██████▊ | 253467/371472 [9:38:43<9:16:54, 3.53it/s] 68%|██████▊ | 253468/371472 [9:38:44<9:32:43, 3.43it/s] 68%|██████▊ | 253469/371472 [9:38:44<9:33:17, 3.43it/s] 68%|██████▊ | 253470/371472 [9:38:44<9:49:25, 3.34it/s] 68%|██████▊ | 253471/371472 [9:38:44<9:58:45, 3.28it/s] 68%|██████▊ | 253472/371472 [9:38:45<10:11:53, 3.21it/s] 68%|██████▊ | 253473/371472 [9:38:45<10:07:33, 3.24it/s] 68%|██████▊ | 253474/371472 [9:38:45<9:52:57, 3.32it/s] 68%|██████▊ | 253475/371472 [9:38:46<9:59:58, 3.28it/s] 68%|██████▊ | 253476/371472 [9:38:46<10:54:30, 3.00it/s] 68%|██████▊ | 253477/371472 [9:38:46<10:34:00, 3.10it/s] 68%|██████▊ | 253478/371472 [9:38:47<10:28:07, 3.13it/s] 68%|██████▊ | 253479/371472 [9:38:47<10:09:57, 3.22it/s] 68%|██████▊ | 253480/371472 [9:38:47<10:11:02, 3.22it/s] {'loss': 2.904, 'learning_rate': 3.8602426253528407e-07, 'epoch': 10.92} + 68%|██████▊ | 253480/371472 [9:38:47<10:11:02, 3.22it/s] 68%|██████▊ | 253481/371472 [9:38:48<10:21:03, 3.17it/s] 68%|██████▊ | 253482/371472 [9:38:48<10:19:20, 3.18it/s] 68%|██████▊ | 253483/371472 [9:38:48<10:11:50, 3.21it/s] 68%|██████▊ | 253484/371472 [9:38:48<9:46:46, 3.35it/s] 68%|██████▊ | 253485/371472 [9:38:49<10:03:23, 3.26it/s] 68%|██████▊ | 253486/371472 [9:38:49<10:08:21, 3.23it/s] 68%|██████▊ | 253487/371472 [9:38:49<9:59:26, 3.28it/s] 68%|██████▊ | 253488/371472 [9:38:50<10:14:07, 3.20it/s] 68%|██████▊ | 253489/371472 [9:38:50<10:43:41, 3.05it/s] 68%|██████▊ | 253490/371472 [9:38:50<10:16:14, 3.19it/s] 68%|██████▊ | 253491/371472 [9:38:51<9:58:34, 3.29it/s] 68%|██████▊ | 253492/371472 [9:38:51<9:38:48, 3.40it/s] 68%|██████▊ | 253493/371472 [9:38:51<10:05:55, 3.25it/s] 68%|██████▊ | 253494/371472 [9:38:52<9:58:11, 3.29it/s] 68%|██████▊ | 253495/371472 [9:38:52<9:41:01, 3.38it/s] 68%|██████▊ | 253496/371472 [9:38:52<10:41:17, 3.07it/s] 68%|██████▊ | 253497/371472 [9:38:53<10:50:44, 3.02it/s] 68%|██████▊ | 253498/371472 [9:38:53<10:24:59, 3.15it/s] 68%|██████▊ | 253499/371472 [9:38:53<10:04:44, 3.25it/s] 68%|██████▊ | 253500/371472 [9:38:53<9:54:37, 3.31it/s] {'loss': 2.7059, 'learning_rate': 3.8597578055980514e-07, 'epoch': 10.92} + 68%|██████▊ | 253500/371472 [9:38:53<9:54:37, 3.31it/s] 68%|██████▊ | 253501/371472 [9:38:54<9:51:42, 3.32it/s] 68%|██████▊ | 253502/371472 [9:38:54<11:08:34, 2.94it/s] 68%|██████▊ | 253503/371472 [9:38:54<10:34:37, 3.10it/s] 68%|██████▊ | 253504/371472 [9:38:55<10:39:33, 3.07it/s] 68%|██████▊ | 253505/371472 [9:38:55<10:02:16, 3.26it/s] 68%|██████▊ | 253506/371472 [9:38:55<9:49:05, 3.34it/s] 68%|██████▊ | 253507/371472 [9:38:56<9:42:35, 3.37it/s] 68%|██████▊ | 253508/371472 [9:38:56<9:49:55, 3.33it/s] 68%|██████▊ | 253509/371472 [9:38:56<9:52:54, 3.32it/s] 68%|██████▊ | 253510/371472 [9:38:57<9:49:22, 3.34it/s] 68%|██████▊ | 253511/371472 [9:38:57<9:51:40, 3.32it/s] 68%|██████▊ | 253512/371472 [9:38:57<9:52:08, 3.32it/s] 68%|██████▊ | 253513/371472 [9:38:57<9:38:02, 3.40it/s] 68%|██████▊ | 253514/371472 [9:38:58<9:55:31, 3.30it/s] 68%|██████▊ | 253515/371472 [9:38:58<9:59:40, 3.28it/s] 68%|██████▊ | 253516/371472 [9:38:58<10:03:03, 3.26it/s] 68%|██████▊ | 253517/371472 [9:38:59<9:34:27, 3.42it/s] 68%|██████▊ | 253518/371472 [9:38:59<10:19:29, 3.17it/s] 68%|██████▊ | 253519/371472 [9:38:59<10:49:21, 3.03it/s] 68%|██████▊ | 253520/371472 [9:39:00<11:03:37, 2.96it/s] {'loss': 2.782, 'learning_rate': 3.859272985843263e-07, 'epoch': 10.92} + 68%|██████▊ | 253520/371472 [9:39:00<11:03:37, 2.96it/s] 68%|██████▊ | 253521/371472 [9:39:00<10:52:41, 3.01it/s] 68%|██████▊ | 253522/371472 [9:39:00<10:26:43, 3.14it/s] 68%|██████▊ | 253523/371472 [9:39:01<10:07:13, 3.24it/s] 68%|██████▊ | 253524/371472 [9:39:01<9:58:56, 3.28it/s] 68%|██████▊ | 253525/371472 [9:39:01<9:56:40, 3.29it/s] 68%|██████▊ | 253526/371472 [9:39:01<9:36:26, 3.41it/s] 68%|██████▊ | 253527/371472 [9:39:02<10:46:43, 3.04it/s] 68%|██████▊ | 253528/371472 [9:39:02<10:48:55, 3.03it/s] 68%|██████▊ | 253529/371472 [9:39:03<10:40:13, 3.07it/s] 68%|██████▊ | 253530/371472 [9:39:03<10:40:37, 3.07it/s] 68%|██████▊ | 253531/371472 [9:39:03<10:14:51, 3.20it/s] 68%|██████▊ | 253532/371472 [9:39:03<10:06:09, 3.24it/s] 68%|██████▊ | 253533/371472 [9:39:04<9:53:32, 3.31it/s] 68%|██████▊ | 253534/371472 [9:39:04<10:00:38, 3.27it/s] 68%|██████▊ | 253535/371472 [9:39:04<9:52:07, 3.32it/s] 68%|██████▊ | 253536/371472 [9:39:05<9:46:12, 3.35it/s] 68%|██████▊ | 253537/371472 [9:39:05<10:00:59, 3.27it/s] 68%|██████▊ | 253538/371472 [9:39:05<11:07:07, 2.95it/s] 68%|██████▊ | 253539/371472 [9:39:06<10:44:39, 3.05it/s] 68%|██████▊ | 253540/371472 [9:39:06<10:23:08, 3.15it/s] {'loss': 2.6654, 'learning_rate': 3.858788166088474e-07, 'epoch': 10.92} + 68%|██████▊ | 253540/371472 [9:39:06<10:23:08, 3.15it/s] 68%|██████▊ | 253541/371472 [9:39:06<10:10:46, 3.22it/s] 68%|██████▊ | 253542/371472 [9:39:07<9:47:42, 3.34it/s] 68%|██████▊ | 253543/371472 [9:39:07<9:30:13, 3.45it/s] 68%|██████▊ | 253544/371472 [9:39:07<9:25:40, 3.47it/s] 68%|██████▊ | 253545/371472 [9:39:07<10:08:29, 3.23it/s] 68%|██████▊ | 253546/371472 [9:39:08<9:50:20, 3.33it/s] 68%|██████▊ | 253547/371472 [9:39:08<9:29:20, 3.45it/s] 68%|██████▊ | 253548/371472 [9:39:08<9:49:56, 3.33it/s] 68%|██████▊ | 253549/371472 [9:39:09<9:34:55, 3.42it/s] 68%|██████▊ | 253550/371472 [9:39:09<9:23:43, 3.49it/s] 68%|██████▊ | 253551/371472 [9:39:09<9:05:07, 3.61it/s] 68%|██████▊ | 253552/371472 [9:39:09<9:01:37, 3.63it/s] 68%|██████▊ | 253553/371472 [9:39:10<9:09:54, 3.57it/s] 68%|██████▊ | 253554/371472 [9:39:10<9:16:31, 3.53it/s] 68%|██████▊ | 253555/371472 [9:39:10<9:46:55, 3.35it/s] 68%|██████▊ | 253556/371472 [9:39:11<10:07:32, 3.23it/s] 68%|██████▊ | 253557/371472 [9:39:11<9:55:18, 3.30it/s] 68%|██████▊ | 253558/371472 [9:39:11<9:51:58, 3.32it/s] 68%|██████▊ | 253559/371472 [9:39:11<9:40:14, 3.39it/s] 68%|██████▊ | 253560/371472 [9:39:12<9:59:45, 3.28it/s] {'loss': 2.8532, 'learning_rate': 3.858303346333685e-07, 'epoch': 10.92} + 68%|██████▊ | 253560/371472 [9:39:12<9:59:45, 3.28it/s] 68%|██████▊ | 253561/371472 [9:39:12<9:56:50, 3.29it/s] 68%|██████▊ | 253562/371472 [9:39:12<9:51:04, 3.32it/s] 68%|██████▊ | 253563/371472 [9:39:13<9:54:47, 3.30it/s] 68%|██████▊ | 253564/371472 [9:39:13<9:47:34, 3.34it/s] 68%|██████▊ | 253565/371472 [9:39:13<9:40:16, 3.39it/s] 68%|██████▊ | 253566/371472 [9:39:14<9:30:29, 3.44it/s] 68%|██████▊ | 253567/371472 [9:39:14<9:27:22, 3.46it/s] 68%|██████▊ | 253568/371472 [9:39:14<9:19:17, 3.51it/s] 68%|██████▊ | 253569/371472 [9:39:14<9:18:05, 3.52it/s] 68%|██████▊ | 253570/371472 [9:39:15<9:17:14, 3.53it/s] 68%|██████▊ | 253571/371472 [9:39:15<9:42:39, 3.37it/s] 68%|██████▊ | 253572/371472 [9:39:15<9:43:56, 3.37it/s] 68%|██████▊ | 253573/371472 [9:39:16<10:18:47, 3.18it/s] 68%|██████▊ | 253574/371472 [9:39:16<10:24:19, 3.15it/s] 68%|██████▊ | 253575/371472 [9:39:16<10:08:57, 3.23it/s] 68%|██████▊ | 253576/371472 [9:39:17<10:05:22, 3.25it/s] 68%|██████▊ | 253577/371472 [9:39:17<10:20:36, 3.17it/s] 68%|██████▊ | 253578/371472 [9:39:17<10:14:51, 3.20it/s] 68%|██████▊ | 253579/371472 [9:39:18<10:13:49, 3.20it/s] 68%|██████▊ | 253580/371472 [9:39:18<10:02:09, 3.26it/s] {'loss': 2.6286, 'learning_rate': 3.857818526578896e-07, 'epoch': 10.92} + 68%|██████▊ | 253580/371472 [9:39:18<10:02:09, 3.26it/s] 68%|██████▊ | 253581/371472 [9:39:18<9:53:19, 3.31it/s] 68%|██████▊ | 253582/371472 [9:39:18<9:41:26, 3.38it/s] 68%|██████▊ | 253583/371472 [9:39:19<10:02:39, 3.26it/s] 68%|██████▊ | 253584/371472 [9:39:19<10:00:42, 3.27it/s] 68%|██████▊ | 253585/371472 [9:39:19<9:49:57, 3.33it/s] 68%|██████▊ | 253586/371472 [9:39:20<9:45:01, 3.36it/s] 68%|██████▊ | 253587/371472 [9:39:20<10:20:32, 3.17it/s] 68%|██████▊ | 253588/371472 [9:39:20<9:48:17, 3.34it/s] 68%|██████▊ | 253589/371472 [9:39:21<9:26:28, 3.47it/s] 68%|██████▊ | 253590/371472 [9:39:21<9:42:42, 3.37it/s] 68%|██████▊ | 253591/371472 [9:39:21<9:41:26, 3.38it/s] 68%|██████▊ | 253592/371472 [9:39:21<9:40:37, 3.38it/s] 68%|██████▊ | 253593/371472 [9:39:22<10:25:21, 3.14it/s] 68%|██████▊ | 253594/371472 [9:39:22<10:46:00, 3.04it/s] 68%|██████▊ | 253595/371472 [9:39:22<10:15:25, 3.19it/s] 68%|██████▊ | 253596/371472 [9:39:23<10:10:37, 3.22it/s] 68%|██████▊ | 253597/371472 [9:39:23<9:53:53, 3.31it/s] 68%|██████▊ | 253598/371472 [9:39:23<9:31:29, 3.44it/s] 68%|██████▊ | 253599/371472 [9:39:24<9:21:00, 3.50it/s] 68%|██████▊ | 253600/371472 [9:39:24<9:31:12, 3.44it/s] {'loss': 2.7574, 'learning_rate': 3.8573337068241077e-07, 'epoch': 10.92} + 68%|██████▊ | 253600/371472 [9:39:24<9:31:12, 3.44it/s] 68%|██████▊ | 253601/371472 [9:39:24<9:23:58, 3.48it/s] 68%|██████▊ | 253602/371472 [9:39:24<9:29:55, 3.45it/s] 68%|██████▊ | 253603/371472 [9:39:25<9:51:27, 3.32it/s] 68%|██████▊ | 253604/371472 [9:39:25<9:42:14, 3.37it/s] 68%|██████▊ | 253605/371472 [9:39:25<9:59:57, 3.27it/s] 68%|██████▊ | 253606/371472 [9:39:26<10:10:40, 3.22it/s] 68%|██████▊ | 253607/371472 [9:39:26<10:23:41, 3.15it/s] 68%|██████▊ | 253608/371472 [9:39:26<10:14:20, 3.20it/s] 68%|██████▊ | 253609/371472 [9:39:27<10:07:46, 3.23it/s] 68%|██████▊ | 253610/371472 [9:39:27<10:15:28, 3.19it/s] 68%|██████▊ | 253611/371472 [9:39:27<10:27:31, 3.13it/s] 68%|██████▊ | 253612/371472 [9:39:28<10:32:06, 3.11it/s] 68%|██████▊ | 253613/371472 [9:39:28<10:21:02, 3.16it/s] 68%|██████▊ | 253614/371472 [9:39:28<10:15:11, 3.19it/s] 68%|██████▊ | 253615/371472 [9:39:29<11:44:46, 2.79it/s] 68%|██████▊ | 253616/371472 [9:39:29<11:28:56, 2.85it/s] 68%|██████▊ | 253617/371472 [9:39:29<11:20:23, 2.89it/s] 68%|██████▊ | 253618/371472 [9:39:30<10:55:29, 3.00it/s] 68%|██████▊ | 253619/371472 [9:39:30<10:45:49, 3.04it/s] 68%|██████▊ | 253620/371472 [9:39:30<10:24:23, 3.15it/s] {'loss': 2.4667, 'learning_rate': 3.856848887069318e-07, 'epoch': 10.92} + 68%|██████▊ | 253620/371472 [9:39:30<10:24:23, 3.15it/s] 68%|██████▊ | 253621/371472 [9:39:31<10:25:16, 3.14it/s] 68%|██████▊ | 253622/371472 [9:39:31<10:10:06, 3.22it/s] 68%|██████▊ | 253623/371472 [9:39:31<10:05:00, 3.25it/s] 68%|███��██▊ | 253624/371472 [9:39:31<9:49:25, 3.33it/s] 68%|██████▊ | 253625/371472 [9:39:32<10:29:24, 3.12it/s] 68%|██████▊ | 253626/371472 [9:39:32<10:06:17, 3.24it/s] 68%|██████▊ | 253627/371472 [9:39:32<9:54:28, 3.30it/s] 68%|██████▊ | 253628/371472 [9:39:33<9:31:51, 3.43it/s] 68%|██████▊ | 253629/371472 [9:39:33<10:45:13, 3.04it/s] 68%|██████▊ | 253630/371472 [9:39:33<10:53:42, 3.00it/s] 68%|██████▊ | 253631/371472 [9:39:34<10:39:29, 3.07it/s] 68%|██████▊ | 253632/371472 [9:39:34<11:05:56, 2.95it/s] 68%|██████▊ | 253633/371472 [9:39:34<11:03:52, 2.96it/s] 68%|██████▊ | 253634/371472 [9:39:35<11:13:13, 2.92it/s] 68%|██████▊ | 253635/371472 [9:39:35<11:15:31, 2.91it/s] 68%|██████▊ | 253636/371472 [9:39:36<11:30:00, 2.85it/s] 68%|██████▊ | 253637/371472 [9:39:36<10:59:14, 2.98it/s] 68%|██████▊ | 253638/371472 [9:39:36<10:56:50, 2.99it/s] 68%|██████▊ | 253639/371472 [9:39:36<10:37:12, 3.08it/s] 68%|██████▊ | 253640/371472 [9:39:37<10:13:08, 3.20it/s] {'loss': 2.7814, 'learning_rate': 3.8563640673145296e-07, 'epoch': 10.92} + 68%|██████▊ | 253640/371472 [9:39:37<10:13:08, 3.20it/s] 68%|██████▊ | 253641/371472 [9:39:37<10:54:12, 3.00it/s] 68%|██████▊ | 253642/371472 [9:39:37<10:44:16, 3.05it/s] 68%|██████▊ | 253643/371472 [9:39:38<10:29:54, 3.12it/s] 68%|██████▊ | 253644/371472 [9:39:38<10:07:41, 3.23it/s] 68%|██████▊ | 253645/371472 [9:39:38<10:02:30, 3.26it/s] 68%|██████▊ | 253646/371472 [9:39:39<9:57:21, 3.29it/s] 68%|██████▊ | 253647/371472 [9:39:39<9:34:06, 3.42it/s] 68%|██████▊ | 253648/371472 [9:39:39<9:46:16, 3.35it/s] 68%|██████▊ | 253649/371472 [9:39:39<9:42:19, 3.37it/s] 68%|██████▊ | 253650/371472 [9:39:40<9:33:07, 3.43it/s] 68%|██████▊ | 253651/371472 [9:39:40<9:44:34, 3.36it/s] 68%|██████▊ | 253652/371472 [9:39:40<9:52:50, 3.31it/s] 68%|██████▊ | 253653/371472 [9:39:41<9:36:42, 3.40it/s] 68%|██████▊ | 253654/371472 [9:39:41<9:56:45, 3.29it/s] 68%|██████▊ | 253655/371472 [9:39:41<10:01:50, 3.26it/s] 68%|██████▊ | 253656/371472 [9:39:42<9:38:51, 3.39it/s] 68%|██████▊ | 253657/371472 [9:39:42<9:31:07, 3.44it/s] 68%|██████▊ | 253658/371472 [9:39:42<9:28:14, 3.46it/s] 68%|██████▊ | 253659/371472 [9:39:42<9:33:31, 3.42it/s] 68%|██████▊ | 253660/371472 [9:39:43<9:37:41, 3.40it/s] {'loss': 2.8162, 'learning_rate': 3.8558792475597403e-07, 'epoch': 10.93} + 68%|██████▊ | 253660/371472 [9:39:43<9:37:41, 3.40it/s] 68%|██████▊ | 253661/371472 [9:39:43<9:48:53, 3.33it/s] 68%|██████▊ | 253662/371472 [9:39:43<9:37:37, 3.40it/s] 68%|██████▊ | 253663/371472 [9:39:44<9:45:06, 3.36it/s] 68%|██████▊ | 253664/371472 [9:39:44<9:46:32, 3.35it/s] 68%|██████▊ | 253665/371472 [9:39:44<9:29:19, 3.45it/s] 68%|██████▊ | 253666/371472 [9:39:45<9:51:11, 3.32it/s] 68%|██████▊ | 253667/371472 [9:39:45<10:17:37, 3.18it/s] 68%|██████▊ | 253668/371472 [9:39:45<10:09:46, 3.22it/s] 68%|██████▊ | 253669/371472 [9:39:46<10:28:10, 3.13it/s] 68%|██████▊ | 253670/371472 [9:39:46<9:58:37, 3.28it/s] 68%|██████▊ | 253671/371472 [9:39:46<9:52:21, 3.31it/s] 68%|██████▊ | 253672/371472 [9:39:46<9:35:40, 3.41it/s] 68%|██████▊ | 253673/371472 [9:39:47<9:38:03, 3.40it/s] 68%|██████▊ | 253674/371472 [9:39:47<9:55:17, 3.30it/s] 68%|██████▊ | 253675/371472 [9:39:47<10:03:46, 3.25it/s] 68%|██████▊ | 253676/371472 [9:39:48<10:13:18, 3.20it/s] 68%|██████▊ | 253677/371472 [9:39:48<10:12:35, 3.20it/s] 68%|██████▊ | 253678/371472 [9:39:48<11:03:25, 2.96it/s] 68%|██████▊ | 253679/371472 [9:39:49<10:24:20, 3.14it/s] 68%|██████▊ | 253680/371472 [9:39:49<12:03:37, 2.71it/s] {'loss': 2.5512, 'learning_rate': 3.8553944278049516e-07, 'epoch': 10.93} + 68%|██████▊ | 253680/371472 [9:39:49<12:03:37, 2.71it/s] 68%|██████▊ | 253681/371472 [9:39:49<11:16:33, 2.90it/s] 68%|██████▊ | 253682/371472 [9:39:50<11:21:01, 2.88it/s] 68%|██████▊ | 253683/371472 [9:39:50<10:40:35, 3.06it/s] 68%|██████�� | 253684/371472 [9:39:50<10:37:18, 3.08it/s] 68%|██████▊ | 253685/371472 [9:39:51<10:07:42, 3.23it/s] 68%|██████▊ | 253686/371472 [9:39:51<10:08:51, 3.22it/s] 68%|██████▊ | 253687/371472 [9:39:51<9:48:00, 3.34it/s] 68%|██████▊ | 253688/371472 [9:39:51<9:39:13, 3.39it/s] 68%|██████▊ | 253689/371472 [9:39:52<10:02:55, 3.26it/s] 68%|██████▊ | 253690/371472 [9:39:52<9:46:04, 3.35it/s] 68%|██████▊ | 253691/371472 [9:39:52<9:58:00, 3.28it/s] 68%|██████▊ | 253692/371472 [9:39:53<9:39:38, 3.39it/s] 68%|██████▊ | 253693/371472 [9:39:53<9:48:58, 3.33it/s] 68%|██████▊ | 253694/371472 [9:39:53<10:10:41, 3.21it/s] 68%|██████▊ | 253695/371472 [9:39:54<10:16:32, 3.18it/s] 68%|██████▊ | 253696/371472 [9:39:54<9:47:45, 3.34it/s] 68%|██████▊ | 253697/371472 [9:39:54<9:50:08, 3.33it/s] 68%|██████▊ | 253698/371472 [9:39:55<10:30:45, 3.11it/s] 68%|██████▊ | 253699/371472 [9:39:55<10:18:39, 3.17it/s] 68%|██████▊ | 253700/371472 [9:39:55<10:10:39, 3.21it/s] {'loss': 2.7637, 'learning_rate': 3.8549096080501623e-07, 'epoch': 10.93} + 68%|██████▊ | 253700/371472 [9:39:55<10:10:39, 3.21it/s] 68%|██████▊ | 253701/371472 [9:39:55<10:05:42, 3.24it/s] 68%|██████▊ | 253702/371472 [9:39:56<10:29:51, 3.12it/s] 68%|██████▊ | 253703/371472 [9:39:56<10:23:24, 3.15it/s] 68%|██████▊ | 253704/371472 [9:39:56<9:57:19, 3.29it/s] 68%|██████▊ | 253705/371472 [9:39:57<10:49:25, 3.02it/s] 68%|██████▊ | 253706/371472 [9:39:57<10:19:56, 3.17it/s] 68%|██████▊ | 253707/371472 [9:39:57<10:16:11, 3.19it/s] 68%|██████▊ | 253708/371472 [9:39:58<10:11:58, 3.21it/s] 68%|██████▊ | 253709/371472 [9:39:58<9:50:42, 3.32it/s] 68%|██████▊ | 253710/371472 [9:39:58<9:49:28, 3.33it/s] 68%|██████▊ | 253711/371472 [9:39:59<9:26:45, 3.46it/s] 68%|██████▊ | 253712/371472 [9:39:59<9:18:02, 3.52it/s] 68%|██████▊ | 253713/371472 [9:39:59<9:35:46, 3.41it/s] 68%|██████▊ | 253714/371472 [9:39:59<9:38:18, 3.39it/s] 68%|██████▊ | 253715/371472 [9:40:00<9:28:57, 3.45it/s] 68%|██████▊ | 253716/371472 [9:40:00<9:19:00, 3.51it/s] 68%|██████▊ | 253717/371472 [9:40:00<9:23:22, 3.48it/s] 68%|██████▊ | 253718/371472 [9:40:01<9:27:55, 3.46it/s] 68%|██████▊ | 253719/371472 [9:40:01<9:36:06, 3.41it/s] 68%|██████▊ | 253720/371472 [9:40:01<9:22:26, 3.49it/s] {'loss': 2.6994, 'learning_rate': 3.854424788295374e-07, 'epoch': 10.93} + 68%|██████▊ | 253720/371472 [9:40:01<9:22:26, 3.49it/s] 68%|██████▊ | 253721/371472 [9:40:01<9:21:39, 3.49it/s] 68%|██████▊ | 253722/371472 [9:40:02<9:34:22, 3.42it/s] 68%|██████▊ | 253723/371472 [9:40:02<9:16:18, 3.53it/s] 68%|██████▊ | 253724/371472 [9:40:02<9:12:24, 3.55it/s] 68%|██████▊ | 253725/371472 [9:40:03<9:20:56, 3.50it/s] 68%|██████▊ | 253726/371472 [9:40:03<10:16:18, 3.18it/s] 68%|██████▊ | 253727/371472 [9:40:03<10:46:06, 3.04it/s] 68%|██████▊ | 253728/371472 [9:40:04<10:13:44, 3.20it/s] 68%|██████▊ | 253729/371472 [9:40:04<9:57:07, 3.29it/s] 68%|██████▊ | 253730/371472 [9:40:04<9:44:05, 3.36it/s] 68%|██████▊ | 253731/371472 [9:40:04<9:35:07, 3.41it/s] 68%|██████▊ | 253732/371472 [9:40:05<9:32:49, 3.43it/s] 68%|██████▊ | 253733/371472 [9:40:05<9:22:30, 3.49it/s] 68%|██████▊ | 253734/371472 [9:40:05<9:26:10, 3.47it/s] 68%|██████▊ | 253735/371472 [9:40:06<9:25:51, 3.47it/s] 68%|██████▊ | 253736/371472 [9:40:06<9:45:04, 3.35it/s] 68%|██████▊ | 253737/371472 [9:40:06<9:35:42, 3.41it/s] 68%|██████▊ | 253738/371472 [9:40:06<9:30:11, 3.44it/s] 68%|██████▊ | 253739/371472 [9:40:07<9:33:06, 3.42it/s] 68%|██████▊ | 253740/371472 [9:40:07<9:39:51, 3.38it/s] {'loss': 2.7207, 'learning_rate': 3.853939968540584e-07, 'epoch': 10.93} + 68%|██████▊ | 253740/371472 [9:40:07<9:39:51, 3.38it/s] 68%|██████▊ | 253741/371472 [9:40:07<10:11:04, 3.21it/s] 68%|██████▊ | 253742/371472 [9:40:08<9:41:06, 3.38it/s] 68%|██████▊ | 253743/371472 [9:40:08<9:47:44, 3.34it/s] 68%|██████▊ | 253744/371472 [9:40:08<10:08:55, 3.22it/s] 68%|██████▊ | 253745/371472 [9:40:09<10:00:50, 3.27it/s] 68%|██████▊ | 253746/371472 [9:40:09<9:51:49, 3.32it/s] 68%|██████▊ | 253747/371472 [9:40:09<9:37:09, 3.40it/s] 68%|██████▊ | 253748/371472 [9:40:09<9:28:14, 3.45it/s] 68%|██████▊ | 253749/371472 [9:40:10<9:36:35, 3.40it/s] 68%|██████▊ | 253750/371472 [9:40:10<9:31:50, 3.43it/s] 68%|██████▊ | 253751/371472 [9:40:10<9:33:54, 3.42it/s] 68%|██████▊ | 253752/371472 [9:40:11<9:27:48, 3.46it/s] 68%|██████▊ | 253753/371472 [9:40:11<9:27:21, 3.46it/s] 68%|██████▊ | 253754/371472 [9:40:11<9:40:20, 3.38it/s] 68%|██████▊ | 253755/371472 [9:40:12<9:30:37, 3.44it/s] 68%|██████▊ | 253756/371472 [9:40:12<10:34:19, 3.09it/s] 68%|██████▊ | 253757/371472 [9:40:12<10:20:42, 3.16it/s] 68%|██████▊ | 253758/371472 [9:40:12<9:59:59, 3.27it/s] 68%|██████▊ | 253759/371472 [9:40:13<9:49:32, 3.33it/s] 68%|██████▊ | 253760/371472 [9:40:13<9:34:54, 3.41it/s] {'loss': 2.6725, 'learning_rate': 3.853455148785796e-07, 'epoch': 10.93} + 68%|██████▊ | 253760/371472 [9:40:13<9:34:54, 3.41it/s] 68%|██████▊ | 253761/371472 [9:40:13<9:36:34, 3.40it/s] 68%|██████▊ | 253762/371472 [9:40:14<9:52:13, 3.31it/s] 68%|██████▊ | 253763/371472 [9:40:14<10:01:10, 3.26it/s] 68%|██████▊ | 253764/371472 [9:40:14<9:47:11, 3.34it/s] 68%|██████▊ | 253765/371472 [9:40:15<9:33:22, 3.42it/s] 68%|██████▊ | 253766/371472 [9:40:15<9:37:25, 3.40it/s] 68%|██████▊ | 253767/371472 [9:40:15<9:36:44, 3.40it/s] 68%|██████▊ | 253768/371472 [9:40:15<9:52:41, 3.31it/s] 68%|██████▊ | 253769/371472 [9:40:16<9:54:09, 3.30it/s] 68%|██████▊ | 253770/371472 [9:40:16<10:32:54, 3.10it/s] 68%|██████▊ | 253771/371472 [9:40:16<9:50:32, 3.32it/s] 68%|██████▊ | 253772/371472 [9:40:17<9:43:35, 3.36it/s] 68%|██████▊ | 253773/371472 [9:40:17<9:31:43, 3.43it/s] 68%|██████▊ | 253774/371472 [9:40:17<9:52:32, 3.31it/s] 68%|██████▊ | 253775/371472 [9:40:18<10:28:05, 3.12it/s] 68%|██████▊ | 253776/371472 [9:40:18<10:32:25, 3.10it/s] 68%|██████▊ | 253777/371472 [9:40:18<10:18:07, 3.17it/s] 68%|██████▊ | 253778/371472 [9:40:19<11:01:03, 2.97it/s] 68%|██████▊ | 253779/371472 [9:40:19<10:50:09, 3.02it/s] 68%|██████▊ | 253780/371472 [9:40:19<10:33:02, 3.10it/s] {'loss': 2.6883, 'learning_rate': 3.852970329031007e-07, 'epoch': 10.93} + 68%|██████▊ | 253780/371472 [9:40:19<10:33:02, 3.10it/s] 68%|██████▊ | 253781/371472 [9:40:20<10:14:53, 3.19it/s] 68%|██████▊ | 253782/371472 [9:40:20<9:52:52, 3.31it/s] 68%|██████▊ | 253783/371472 [9:40:20<9:39:35, 3.38it/s] 68%|██████▊ | 253784/371472 [9:40:20<9:42:08, 3.37it/s] 68%|██████▊ | 253785/371472 [9:40:21<9:33:19, 3.42it/s] 68%|██████▊ | 253786/371472 [9:40:21<9:28:40, 3.45it/s] 68%|██████▊ | 253787/371472 [9:40:21<9:59:06, 3.27it/s] 68%|██████▊ | 253788/371472 [9:40:22<9:46:02, 3.35it/s] 68%|██████▊ | 253789/371472 [9:40:22<9:24:57, 3.47it/s] 68%|██████▊ | 253790/371472 [9:40:22<9:10:00, 3.57it/s] 68%|██████▊ | 253791/371472 [9:40:22<9:19:39, 3.50it/s] 68%|██████▊ | 253792/371472 [9:40:23<9:12:24, 3.55it/s] 68%|██████▊ | 253793/371472 [9:40:23<9:47:16, 3.34it/s] 68%|██████▊ | 253794/371472 [9:40:23<9:45:09, 3.35it/s] 68%|██████▊ | 253795/371472 [9:40:24<9:57:16, 3.28it/s] 68%|██████▊ | 253796/371472 [9:40:24<9:48:51, 3.33it/s] 68%|██████▊ | 253797/371472 [9:40:24<9:34:16, 3.42it/s] 68%|██████▊ | 253798/371472 [9:40:25<9:30:40, 3.44it/s] 68%|██████▊ | 253799/371472 [9:40:25<9:27:58, 3.45it/s] 68%|██████▊ | 253800/371472 [9:40:25<9:47:22, 3.34it/s] {'loss': 2.5921, 'learning_rate': 3.852485509276218e-07, 'epoch': 10.93} + 68%|██████▊ | 253800/371472 [9:40:25<9:47:22, 3.34it/s] 68%|██████▊ | 253801/371472 [9:40:25<9:44:52, 3.35it/s] 68%|██████▊ | 253802/371472 [9:40:26<9:29:04, 3.45it/s] 68%|██████▊ | 253803/371472 [9:40:26<9:16:10, 3.53it/s] 68%|██████▊ | 253804/371472 [9:40:26<9:18:52, 3.51it/s] 68%|��█████▊ | 253805/371472 [9:40:27<9:12:59, 3.55it/s] 68%|██████▊ | 253806/371472 [9:40:27<9:10:48, 3.56it/s] 68%|██████▊ | 253807/371472 [9:40:27<9:08:57, 3.57it/s] 68%|██████▊ | 253808/371472 [9:40:27<9:14:30, 3.54it/s] 68%|██████▊ | 253809/371472 [9:40:28<9:43:26, 3.36it/s] 68%|██████▊ | 253810/371472 [9:40:28<10:03:32, 3.25it/s] 68%|██████▊ | 253811/371472 [9:40:28<9:41:03, 3.37it/s] 68%|██████▊ | 253812/371472 [9:40:29<9:35:20, 3.41it/s] 68%|██████▊ | 253813/371472 [9:40:29<9:30:56, 3.43it/s] 68%|██████▊ | 253814/371472 [9:40:29<9:37:49, 3.39it/s] 68%|██████▊ | 253815/371472 [9:40:29<9:34:01, 3.42it/s] 68%|██████▊ | 253816/371472 [9:40:30<9:33:46, 3.42it/s] 68%|██████▊ | 253817/371472 [9:40:30<9:18:12, 3.51it/s] 68%|██████▊ | 253818/371472 [9:40:30<9:58:42, 3.28it/s] 68%|██████▊ | 253819/371472 [9:40:31<9:46:49, 3.34it/s] 68%|██████▊ | 253820/371472 [9:40:31<9:42:57, 3.36it/s] {'loss': 2.684, 'learning_rate': 3.8520006895214287e-07, 'epoch': 10.93} + 68%|██████▊ | 253820/371472 [9:40:31<9:42:57, 3.36it/s] 68%|██████▊ | 253821/371472 [9:40:31<9:34:34, 3.41it/s] 68%|██████▊ | 253822/371472 [9:40:32<9:32:12, 3.43it/s] 68%|██████▊ | 253823/371472 [9:40:32<9:26:10, 3.46it/s] 68%|██████▊ | 253824/371472 [9:40:32<9:20:38, 3.50it/s] 68%|██████▊ | 253825/371472 [9:40:32<9:36:16, 3.40it/s] 68%|██████▊ | 253826/371472 [9:40:33<9:29:52, 3.44it/s] 68%|██████▊ | 253827/371472 [9:40:33<9:34:26, 3.41it/s] 68%|██████▊ | 253828/371472 [9:40:33<9:34:30, 3.41it/s] 68%|██████▊ | 253829/371472 [9:40:34<9:31:45, 3.43it/s] 68%|██████▊ | 253830/371472 [9:40:34<9:28:04, 3.45it/s] 68%|██████▊ | 253831/371472 [9:40:34<9:18:37, 3.51it/s] 68%|██████▊ | 253832/371472 [9:40:34<9:03:41, 3.61it/s] 68%|██████▊ | 253833/371472 [9:40:35<9:05:25, 3.59it/s] 68%|██████▊ | 253834/371472 [9:40:35<9:45:31, 3.35it/s] 68%|██████▊ | 253835/371472 [9:40:35<9:42:26, 3.37it/s] 68%|██████▊ | 253836/371472 [9:40:36<9:52:30, 3.31it/s] 68%|██████▊ | 253837/371472 [9:40:36<9:36:36, 3.40it/s] 68%|██████▊ | 253838/371472 [9:40:36<10:24:27, 3.14it/s] 68%|██████▊ | 253839/371472 [9:40:37<9:53:33, 3.30it/s] 68%|██████▊ | 253840/371472 [9:40:37<9:39:31, 3.38it/s] {'loss': 2.6772, 'learning_rate': 3.8515158697666405e-07, 'epoch': 10.93} + 68%|██████▊ | 253840/371472 [9:40:37<9:39:31, 3.38it/s] 68%|██████▊ | 253841/371472 [9:40:37<9:33:43, 3.42it/s] 68%|██████▊ | 253842/371472 [9:40:37<9:53:48, 3.30it/s] 68%|██████▊ | 253843/371472 [9:40:38<9:39:09, 3.39it/s] 68%|██████▊ | 253844/371472 [9:40:38<9:51:23, 3.32it/s] 68%|██████▊ | 253845/371472 [9:40:38<9:39:33, 3.38it/s] 68%|██████▊ | 253846/371472 [9:40:39<9:39:29, 3.38it/s] 68%|██████▊ | 253847/371472 [9:40:39<9:56:26, 3.29it/s] 68%|██████▊ | 253848/371472 [9:40:39<9:41:57, 3.37it/s] 68%|██████▊ | 253849/371472 [9:40:39<9:37:58, 3.39it/s] 68%|██████▊ | 253850/371472 [9:40:40<9:42:50, 3.36it/s] 68%|██████▊ | 253851/371472 [9:40:40<9:43:42, 3.36it/s] 68%|██████▊ | 253852/371472 [9:40:40<9:25:07, 3.47it/s] 68%|██████▊ | 253853/371472 [9:40:41<9:20:08, 3.50it/s] 68%|██████▊ | 253854/371472 [9:40:41<9:28:30, 3.45it/s] 68%|██████▊ | 253855/371472 [9:40:41<9:27:09, 3.46it/s] 68%|██████▊ | 253856/371472 [9:40:42<9:33:04, 3.42it/s] 68%|██████▊ | 253857/371472 [9:40:42<9:17:37, 3.52it/s] 68%|██████▊ | 253858/371472 [9:40:42<10:03:46, 3.25it/s] 68%|██████▊ | 253859/371472 [9:40:43<10:43:41, 3.05it/s] 68%|██████▊ | 253860/371472 [9:40:43<10:17:56, 3.17it/s] {'loss': 2.8591, 'learning_rate': 3.8510310500118507e-07, 'epoch': 10.93} + 68%|██████▊ | 253860/371472 [9:40:43<10:17:56, 3.17it/s] 68%|██████▊ | 253861/371472 [9:40:43<9:50:09, 3.32it/s] 68%|██████▊ | 253862/371472 [9:40:43<9:50:44, 3.32it/s] 68%|██████▊ | 253863/371472 [9:40:44<9:35:56, 3.40it/s] 68%|██████▊ | 253864/371472 [9:40:44<10:23:55, 3.14it/s] 68%|██████▊ | 253865/371472 [9:40:44<9:54:43, 3.30it/s] 68%|██████▊ | 253866/371472 [9:40:45<10:31:20, 3.10it/s] 68%|██████▊ | 253867/371472 [9:40:45<10:32:08, 3.10it/s] 68%|██████▊ | 253868/371472 [9:40:45<10:58:09, 2.98it/s] 68%|██████▊ | 253869/371472 [9:40:46<10:43:10, 3.05it/s] 68%|██████▊ | 253870/371472 [9:40:46<10:25:58, 3.13it/s] 68%|██████▊ | 253871/371472 [9:40:46<10:27:02, 3.13it/s] 68%|██████▊ | 253872/371472 [9:40:47<10:19:42, 3.16it/s] 68%|██████▊ | 253873/371472 [9:40:47<9:48:32, 3.33it/s] 68%|██████▊ | 253874/371472 [9:40:47<10:07:49, 3.22it/s] 68%|██████▊ | 253875/371472 [9:40:48<10:24:05, 3.14it/s] 68%|██████▊ | 253876/371472 [9:40:48<10:19:17, 3.16it/s] 68%|██████▊ | 253877/371472 [9:40:48<10:35:03, 3.09it/s] 68%|██████▊ | 253878/371472 [9:40:48<10:11:17, 3.21it/s] 68%|██████▊ | 253879/371472 [9:40:49<9:57:28, 3.28it/s] 68%|██████▊ | 253880/371472 [9:40:49<10:19:01, 3.17it/s] {'loss': 2.7687, 'learning_rate': 3.8505462302570624e-07, 'epoch': 10.94} + 68%|██████▊ | 253880/371472 [9:40:49<10:19:01, 3.17it/s] 68%|██████▊ | 253881/371472 [9:40:49<10:20:41, 3.16it/s] 68%|██████▊ | 253882/371472 [9:40:50<9:57:24, 3.28it/s] 68%|██████▊ | 253883/371472 [9:40:50<9:40:02, 3.38it/s] 68%|██████▊ | 253884/371472 [9:40:50<9:27:47, 3.45it/s] 68%|██████▊ | 253885/371472 [9:40:51<9:31:30, 3.43it/s] 68%|██████▊ | 253886/371472 [9:40:51<9:28:29, 3.45it/s] 68%|██████▊ | 253887/371472 [9:40:51<9:51:43, 3.31it/s] 68%|██████▊ | 253888/371472 [9:40:51<9:28:59, 3.44it/s] 68%|██████▊ | 253889/371472 [9:40:52<9:23:36, 3.48it/s] 68%|██████▊ | 253890/371472 [9:40:52<9:33:14, 3.42it/s] 68%|██████▊ | 253891/371472 [9:40:52<9:17:57, 3.51it/s] 68%|██████▊ | 253892/371472 [9:40:53<9:28:05, 3.45it/s] 68%|██████▊ | 253893/371472 [9:40:53<9:44:42, 3.35it/s] 68%|██████▊ | 253894/371472 [9:40:53<10:06:40, 3.23it/s] 68%|██████▊ | 253895/371472 [9:40:54<10:13:59, 3.19it/s] 68%|██████▊ | 253896/371472 [9:40:54<9:51:46, 3.31it/s] 68%|██████▊ | 253897/371472 [9:40:54<9:39:47, 3.38it/s] 68%|██████▊ | 253898/371472 [9:40:54<9:44:54, 3.35it/s] 68%|██████▊ | 253899/371472 [9:40:55<9:32:36, 3.42it/s] 68%|██████▊ | 253900/371472 [9:40:55<9:45:56, 3.34it/s] {'loss': 2.6965, 'learning_rate': 3.850061410502273e-07, 'epoch': 10.94} + 68%|██████▊ | 253900/371472 [9:40:55<9:45:56, 3.34it/s] 68%|██████▊ | 253901/371472 [9:40:55<10:11:59, 3.20it/s] 68%|██████▊ | 253902/371472 [9:40:56<9:50:16, 3.32it/s] 68%|██████▊ | 253903/371472 [9:40:56<9:42:08, 3.37it/s] 68%|██████▊ | 253904/371472 [9:40:56<10:02:28, 3.25it/s] 68%|██████▊ | 253905/371472 [9:40:57<9:49:27, 3.32it/s] 68%|██████▊ | 253906/371472 [9:40:57<9:40:06, 3.38it/s] 68%|██████▊ | 253907/371472 [9:40:57<10:04:05, 3.24it/s] 68%|██████▊ | 253908/371472 [9:40:57<9:52:55, 3.30it/s] 68%|██████▊ | 253909/371472 [9:40:58<9:49:55, 3.32it/s] 68%|██████▊ | 253910/371472 [9:40:58<9:54:32, 3.30it/s] 68%|██████▊ | 253911/371472 [9:40:58<9:51:33, 3.31it/s] 68%|██████▊ | 253912/371472 [9:40:59<9:47:11, 3.34it/s] 68%|██████▊ | 253913/371472 [9:40:59<10:17:39, 3.17it/s] 68%|██████▊ | 253914/371472 [9:40:59<10:00:46, 3.26it/s] 68%|██████▊ | 253915/371472 [9:41:00<9:55:15, 3.29it/s] 68%|██████▊ | 253916/371472 [9:41:00<9:54:53, 3.29it/s] 68%|██████▊ | 253917/371472 [9:41:00<9:48:26, 3.33it/s] 68%|██████▊ | 253918/371472 [9:41:00<9:36:16, 3.40it/s] 68%|██████▊ | 253919/371472 [9:41:01<9:41:36, 3.37it/s] 68%|██████▊ | 253920/371472 [9:41:01<9:24:53, 3.47it/s] {'loss': 2.8039, 'learning_rate': 3.8495765907474844e-07, 'epoch': 10.94} + 68%|██████▊ | 253920/371472 [9:41:01<9:24:53, 3.47it/s] 68%|██████▊ | 253921/371472 [9:41:01<9:20:46, 3.49it/s] 68%|██████▊ | 253922/371472 [9:41:02<9:34:18, 3.41it/s] 68%|██████▊ | 253923/371472 [9:41:02<10:24:25, 3.14it/s] 68%|██████▊ | 253924/371472 [9:41:02<10:01:31, 3.26it/s] 68%|██████▊ | 253925/371472 [9:41:03<9:40:38, 3.37it/s] 68%|██████▊ | 253926/371472 [9:41:03<10:00:08, 3.26it/s] 68%|██████▊ | 253927/371472 [9:41:03<9:44:49, 3.35it/s] 68%|██████▊ | 253928/371472 [9:41:03<9:36:54, 3.40it/s] 68%|██████▊ | 253929/371472 [9:41:04<9:20:02, 3.50it/s] 68%|██████▊ | 253930/371472 [9:41:04<9:13:26, 3.54it/s] 68%|██████▊ | 253931/371472 [9:41:04<9:18:17, 3.51it/s] 68%|██████▊ | 253932/371472 [9:41:05<9:13:27, 3.54it/s] 68%|██████▊ | 253933/371472 [9:41:05<9:11:12, 3.55it/s] 68%|██████▊ | 253934/371472 [9:41:05<9:42:07, 3.37it/s] 68%|██████▊ | 253935/371472 [9:41:05<9:50:13, 3.32it/s] 68%|██████▊ | 253936/371472 [9:41:06<9:36:40, 3.40it/s] 68%|██████▊ | 253937/371472 [9:41:06<9:37:05, 3.39it/s] 68%|██████▊ | 253938/371472 [9:41:06<9:18:15, 3.51it/s] 68%|██████▊ | 253939/371472 [9:41:07<9:12:13, 3.55it/s] 68%|██████▊ | 253940/371472 [9:41:07<9:25:19, 3.47it/s] {'loss': 2.7694, 'learning_rate': 3.849091770992695e-07, 'epoch': 10.94} + 68%|██████▊ | 253940/371472 [9:41:07<9:25:19, 3.47it/s] 68%|██████▊ | 253941/371472 [9:41:07<9:38:01, 3.39it/s] 68%|██████▊ | 253942/371472 [9:41:08<9:48:17, 3.33it/s] 68%|██████▊ | 253943/371472 [9:41:08<10:07:51, 3.22it/s] 68%|██████▊ | 253944/371472 [9:41:08<10:25:27, 3.13it/s] 68%|██████▊ | 253945/371472 [9:41:08<10:08:13, 3.22it/s] 68%|██████▊ | 253946/371472 [9:41:09<9:50:49, 3.32it/s] 68%|██████▊ | 253947/371472 [9:41:09<9:58:01, 3.28it/s] 68%|██████▊ | 253948/371472 [9:41:09<9:46:49, 3.34it/s] 68%|██████▊ | 253949/371472 [9:41:10<9:55:46, 3.29it/s] 68%|██████▊ | 253950/371472 [9:41:10<9:58:21, 3.27it/s] 68%|██████▊ | 253951/371472 [9:41:10<9:47:49, 3.33it/s] 68%|██████▊ | 253952/371472 [9:41:11<10:03:12, 3.25it/s] 68%|██████▊ | 253953/371472 [9:41:11<9:38:58, 3.38it/s] 68%|██████▊ | 253954/371472 [9:41:11<9:42:40, 3.36it/s] 68%|██████▊ | 253955/371472 [9:41:11<9:27:16, 3.45it/s] 68%|██████▊ | 253956/371472 [9:41:12<9:14:34, 3.53it/s] 68%|██████▊ | 253957/371472 [9:41:12<9:03:17, 3.61it/s] 68%|██████▊ | 253958/371472 [9:41:12<9:16:04, 3.52it/s] 68%|██████▊ | 253959/371472 [9:41:13<10:22:54, 3.14it/s] 68%|██████▊ | 253960/371472 [9:41:13<10:40:47, 3.06it/s] {'loss': 2.7452, 'learning_rate': 3.848606951237906e-07, 'epoch': 10.94} + 68%|██████▊ | 253960/371472 [9:41:13<10:40:47, 3.06it/s] 68%|██████▊ | 253961/371472 [9:41:13<10:27:48, 3.12it/s] 68%|██████▊ | 253962/371472 [9:41:14<10:36:28, 3.08it/s] 68%|██████▊ | 253963/371472 [9:41:14<10:15:42, 3.18it/s] 68%|██████▊ | 253964/371472 [9:41:14<10:11:28, 3.20it/s] 68%|██████▊ | 253965/371472 [9:41:15<10:04:38, 3.24it/s] 68%|██████▊ | 253966/371472 [9:41:15<9:52:45, 3.30it/s] 68%|██████▊ | 253967/371472 [9:41:15<10:20:38, 3.16it/s] 68%|██████▊ | 253968/371472 [9:41:15<10:07:02, 3.23it/s] 68%|██████▊ | 253969/371472 [9:41:16<9:52:44, 3.30it/s] 68%|██████▊ | 253970/371472 [9:41:16<9:45:09, 3.35it/s] 68%|██████▊ | 253971/371472 [9:41:16<9:29:15, 3.44it/s] 68%|██████▊ | 253972/371472 [9:41:17<10:04:32, 3.24it/s] 68%|██████▊ | 253973/371472 [9:41:17<9:34:51, 3.41it/s] 68%|██████▊ | 253974/371472 [9:41:17<9:49:39, 3.32it/s] 68%|██████▊ | 253975/371472 [9:41:18<9:43:30, 3.36it/s] 68%|██████▊ | 253976/371472 [9:41:18<9:22:01, 3.48it/s] 68%|██████▊ | 253977/371472 [9:41:18<9:40:59, 3.37it/s] 68%|██████▊ | 253978/371472 [9:41:18<9:17:37, 3.51it/s] 68%|██████▊ | 253979/371472 [9:41:19<9:31:02, 3.43it/s] 68%|██████▊ | 253980/371472 [9:41:19<9:27:30, 3.45it/s] {'loss': 2.4987, 'learning_rate': 3.8481221314831176e-07, 'epoch': 10.94} + 68%|██████▊ | 253980/371472 [9:41:19<9:27:30, 3.45it/s] 68%|██████▊ | 253981/371472 [9:41:19<9:28:36, 3.44it/s] 68%|██████▊ | 253982/371472 [9:41:20<9:28:03, 3.45it/s] 68%|██████▊ | 253983/371472 [9:41:20<9:13:12, 3.54it/s] 68%|██████▊ | 253984/371472 [9:41:20<9:12:01, 3.55it/s] 68%|██████▊ | 253985/371472 [9:41:20<9:16:49, 3.52it/s] 68%|████���█▊ | 253986/371472 [9:41:21<9:26:32, 3.46it/s] 68%|██████▊ | 253987/371472 [9:41:21<9:42:39, 3.36it/s] 68%|██████▊ | 253988/371472 [9:41:21<9:34:28, 3.41it/s] 68%|██████▊ | 253989/371472 [9:41:22<10:32:53, 3.09it/s] 68%|██████▊ | 253990/371472 [9:41:22<10:22:18, 3.15it/s] 68%|██████▊ | 253991/371472 [9:41:22<10:06:39, 3.23it/s] 68%|██████▊ | 253992/371472 [9:41:23<10:28:28, 3.12it/s] 68%|██████▊ | 253993/371472 [9:41:23<10:19:30, 3.16it/s] 68%|██████▊ | 253994/371472 [9:41:23<10:17:55, 3.17it/s] 68%|██████▊ | 253995/371472 [9:41:24<10:10:12, 3.21it/s] 68%|██████▊ | 253996/371472 [9:41:24<10:34:20, 3.09it/s] 68%|██████▊ | 253997/371472 [9:41:24<10:40:34, 3.06it/s] 68%|██████▊ | 253998/371472 [9:41:25<11:08:06, 2.93it/s] 68%|██████▊ | 253999/371472 [9:41:25<10:29:35, 3.11it/s] 68%|██████▊ | 254000/371472 [9:41:25<10:06:37, 3.23it/s] {'loss': 2.5379, 'learning_rate': 3.847637311728328e-07, 'epoch': 10.94} + 68%|██████▊ | 254000/371472 [9:41:25<10:06:37, 3.23it/s] 68%|██████▊ | 254001/371472 [9:41:25<9:45:51, 3.34it/s] 68%|██████▊ | 254002/371472 [9:41:26<9:25:02, 3.46it/s] 68%|██████▊ | 254003/371472 [9:41:26<9:39:45, 3.38it/s] 68%|██████▊ | 254004/371472 [9:41:26<9:33:19, 3.41it/s] 68%|██████▊ | 254005/371472 [9:41:27<9:44:06, 3.35it/s] 68%|██████▊ | 254006/371472 [9:41:27<9:35:26, 3.40it/s] 68%|██████▊ | 254007/371472 [9:41:27<9:13:08, 3.54it/s] 68%|██████▊ | 254008/371472 [9:41:27<9:29:14, 3.44it/s] 68%|██████▊ | 254009/371472 [9:41:28<9:11:39, 3.55it/s] 68%|██████▊ | 254010/371472 [9:41:28<9:35:02, 3.40it/s] 68%|██████▊ | 254011/371472 [9:41:28<9:34:22, 3.41it/s] 68%|██████▊ | 254012/371472 [9:41:29<9:56:16, 3.28it/s] 68%|██████▊ | 254013/371472 [9:41:29<9:47:23, 3.33it/s] 68%|██████▊ | 254014/371472 [9:41:29<10:09:41, 3.21it/s] 68%|██████▊ | 254015/371472 [9:41:30<10:18:18, 3.17it/s] 68%|██████▊ | 254016/371472 [9:41:30<10:18:17, 3.17it/s] 68%|██████▊ | 254017/371472 [9:41:30<10:07:52, 3.22it/s] 68%|██████▊ | 254018/371472 [9:41:31<10:17:21, 3.17it/s] 68%|██████▊ | 254019/371472 [9:41:31<10:00:50, 3.26it/s] 68%|██████▊ | 254020/371472 [9:41:31<9:38:13, 3.39it/s] {'loss': 2.8784, 'learning_rate': 3.8471524919735396e-07, 'epoch': 10.94} + 68%|██████▊ | 254020/371472 [9:41:31<9:38:13, 3.39it/s] 68%|██████▊ | 254021/371472 [9:41:31<9:43:34, 3.35it/s] 68%|██████▊ | 254022/371472 [9:41:32<9:47:18, 3.33it/s] 68%|██████▊ | 254023/371472 [9:41:32<9:37:37, 3.39it/s] 68%|██████▊ | 254024/371472 [9:41:32<10:14:25, 3.19it/s] 68%|██████▊ | 254025/371472 [9:41:33<10:03:40, 3.24it/s] 68%|██████▊ | 254026/371472 [9:41:33<9:55:05, 3.29it/s] 68%|██████▊ | 254027/371472 [9:41:33<9:56:03, 3.28it/s] 68%|██████▊ | 254028/371472 [9:41:34<9:46:09, 3.34it/s] 68%|██████▊ | 254029/371472 [9:41:34<9:39:53, 3.38it/s] 68%|██████▊ | 254030/371472 [9:41:34<9:26:16, 3.46it/s] 68%|██████▊ | 254031/371472 [9:41:34<9:22:45, 3.48it/s] 68%|██████▊ | 254032/371472 [9:41:35<9:04:25, 3.60it/s] 68%|██████▊ | 254033/371472 [9:41:35<9:08:42, 3.57it/s] 68%|██████▊ | 254034/371472 [9:41:35<9:21:00, 3.49it/s] 68%|██████▊ | 254035/371472 [9:41:36<9:23:17, 3.47it/s] 68%|██████▊ | 254036/371472 [9:41:36<9:16:19, 3.52it/s] 68%|██████▊ | 254037/371472 [9:41:36<9:33:16, 3.41it/s] 68%|██████▊ | 254038/371472 [9:41:36<9:26:48, 3.45it/s] 68%|██████▊ | 254039/371472 [9:41:37<9:13:27, 3.54it/s] 68%|██████▊ | 254040/371472 [9:41:37<9:13:06, 3.54it/s] {'loss': 2.7421, 'learning_rate': 3.8466676722187503e-07, 'epoch': 10.94} + 68%|██████▊ | 254040/371472 [9:41:37<9:13:06, 3.54it/s] 68%|██████▊ | 254041/371472 [9:41:37<9:23:06, 3.48it/s] 68%|██████▊ | 254042/371472 [9:41:38<9:35:38, 3.40it/s] 68%|██████▊ | 254043/371472 [9:41:38<9:22:24, 3.48it/s] 68%|██████▊ | 254044/371472 [9:41:38<9:22:42, 3.48it/s] 68%|██████▊ | 254045/371472 [9:41:38<9:29:48, 3.43it/s] 68%|██████▊ | 254046/371472 [9:41:39<9:24:08, 3.47it/s] 68%|██████▊ | 254047/371472 [9:41:39<9:30:05, 3.43it/s] 68%|██████▊ | 254048/371472 [9:41:39<9:26:56, 3.45it/s] 68%|██████▊ | 254049/371472 [9:41:40<9:33:46, 3.41it/s] 68%|██████▊ | 254050/371472 [9:41:40<9:40:11, 3.37it/s] 68%|██████▊ | 254051/371472 [9:41:40<10:10:08, 3.21it/s] 68%|██████▊ | 254052/371472 [9:41:41<10:02:23, 3.25it/s] 68%|██████▊ | 254053/371472 [9:41:41<10:22:37, 3.14it/s] 68%|██████▊ | 254054/371472 [9:41:41<10:00:14, 3.26it/s] 68%|██████▊ | 254055/371472 [9:41:41<10:01:12, 3.26it/s] 68%|██████▊ | 254056/371472 [9:41:42<9:44:26, 3.35it/s] 68%|██████▊ | 254057/371472 [9:41:42<9:55:55, 3.28it/s] 68%|██████▊ | 254058/371472 [9:41:42<10:04:03, 3.24it/s] 68%|██████▊ | 254059/371472 [9:41:43<10:09:58, 3.21it/s] 68%|██████▊ | 254060/371472 [9:41:43<9:51:12, 3.31it/s] {'loss': 2.7414, 'learning_rate': 3.8461828524639615e-07, 'epoch': 10.94} + 68%|██████▊ | 254060/371472 [9:41:43<9:51:12, 3.31it/s] 68%|██████▊ | 254061/371472 [9:41:43<9:36:17, 3.40it/s] 68%|██████▊ | 254062/371472 [9:41:44<9:40:07, 3.37it/s] 68%|██████▊ | 254063/371472 [9:41:44<9:27:34, 3.45it/s] 68%|██████▊ | 254064/371472 [9:41:44<9:22:42, 3.48it/s] 68%|██████▊ | 254065/371472 [9:41:44<9:27:29, 3.45it/s] 68%|██████▊ | 254066/371472 [9:41:45<9:29:45, 3.43it/s] 68%|██████▊ | 254067/371472 [9:41:45<9:23:03, 3.48it/s] 68%|██████▊ | 254068/371472 [9:41:45<9:30:53, 3.43it/s] 68%|██████▊ | 254069/371472 [9:41:46<9:29:04, 3.44it/s] 68%|██████▊ | 254070/371472 [9:41:46<9:30:54, 3.43it/s] 68%|██████▊ | 254071/371472 [9:41:46<9:26:05, 3.46it/s] 68%|██████▊ | 254072/371472 [9:41:46<9:25:43, 3.46it/s] 68%|██████▊ | 254073/371472 [9:41:47<9:40:53, 3.37it/s] 68%|██████▊ | 254074/371472 [9:41:47<9:52:38, 3.30it/s] 68%|██████▊ | 254075/371472 [9:41:47<9:44:05, 3.35it/s] 68%|██████▊ | 254076/371472 [9:41:48<9:47:13, 3.33it/s] 68%|██████▊ | 254077/371472 [9:41:48<10:07:16, 3.22it/s] 68%|██████▊ | 254078/371472 [9:41:48<10:04:25, 3.24it/s] 68%|██████▊ | 254079/371472 [9:41:49<10:40:15, 3.06it/s] 68%|██████▊ | 254080/371472 [9:41:49<11:01:37, 2.96it/s] {'loss': 2.8405, 'learning_rate': 3.845698032709172e-07, 'epoch': 10.94} + 68%|██████▊ | 254080/371472 [9:41:49<11:01:37, 2.96it/s] 68%|██████▊ | 254081/371472 [9:41:49<10:26:02, 3.13it/s] 68%|██████▊ | 254082/371472 [9:41:50<10:02:37, 3.25it/s] 68%|██████▊ | 254083/371472 [9:41:50<10:14:28, 3.18it/s] 68%|██████▊ | 254084/371472 [9:41:50<10:34:29, 3.08it/s] 68%|██████▊ | 254085/371472 [9:41:51<10:17:05, 3.17it/s] 68%|██████▊ | 254086/371472 [9:41:51<10:06:58, 3.22it/s] 68%|██████▊ | 254087/371472 [9:41:51<9:59:04, 3.27it/s] 68%|██████▊ | 254088/371472 [9:41:51<9:58:04, 3.27it/s] 68%|██████▊ | 254089/371472 [9:41:52<9:54:20, 3.29it/s] 68%|██████▊ | 254090/371472 [9:41:52<10:08:43, 3.21it/s] 68%|██████▊ | 254091/371472 [9:41:52<10:01:22, 3.25it/s] 68%|██████▊ | 254092/371472 [9:41:53<10:05:43, 3.23it/s] 68%|██████▊ | 254093/371472 [9:41:53<10:28:59, 3.11it/s] 68%|██████▊ | 254094/371472 [9:41:53<10:42:19, 3.05it/s] 68%|██████▊ | 254095/371472 [9:41:54<10:22:45, 3.14it/s] 68%|██████▊ | 254096/371472 [9:41:54<10:05:31, 3.23it/s] 68%|██████▊ | 254097/371472 [9:41:54<9:49:27, 3.32it/s] 68%|██████▊ | 254098/371472 [9:41:55<9:33:43, 3.41it/s] 68%|██████▊ | 254099/371472 [9:41:55<9:34:19, 3.41it/s] 68%|██████▊ | 254100/371472 [9:41:55<9:24:57, 3.46it/s] {'loss': 2.7445, 'learning_rate': 3.845213212954384e-07, 'epoch': 10.94} + 68%|██████▊ | 254100/371472 [9:41:55<9:24:57, 3.46it/s] 68%|██████▊ | 254101/371472 [9:41:55<9:24:05, 3.47it/s] 68%|██████▊ | 254102/371472 [9:41:56<9:18:48, 3.50it/s] 68%|██████▊ | 254103/371472 [9:41:56<9:23:34, 3.47it/s] 68%|██████▊ | 254104/371472 [9:41:56<9:18:37, 3.50it/s] 68%|██████▊ | 254105/371472 [9:41:57<9:24:47, 3.46it/s] 68%|██████▊ | 254106/371472 [9:41:57<9:46:38, 3.33it/s] 68%|██████▊ | 254107/371472 [9:41:57<9:37:51, 3.39it/s] 68%|██████▊ | 254108/371472 [9:41:58<10:02:22, 3.25it/s] 68%|██████▊ | 254109/371472 [9:41:58<9:43:57, 3.35it/s] 68%|██████▊ | 254110/371472 [9:41:58<9:31:42, 3.42it/s] 68%|██████▊ | 254111/371472 [9:41:58<10:51:07, 3.00it/s] 68%|██████▊ | 254112/371472 [9:41:59<10:36:47, 3.07it/s] 68%|██████▊ | 254113/371472 [9:41:59<10:32:54, 3.09it/s] 68%|██████▊ | 254114/371472 [9:41:59<10:01:00, 3.25it/s] 68%|██████▊ | 254115/371472 [9:42:00<10:11:38, 3.20it/s] 68%|██████▊ | 254116/371472 [9:42:00<10:00:01, 3.26it/s] 68%|██████▊ | 254117/371472 [9:42:00<10:40:36, 3.05it/s] 68%|██████▊ | 254118/371472 [9:42:01<10:26:21, 3.12it/s] 68%|██████▊ | 254119/371472 [9:42:01<10:08:56, 3.21it/s] 68%|██████▊ | 254120/371472 [9:42:01<9:53:18, 3.30it/s] {'loss': 2.8707, 'learning_rate': 3.844728393199594e-07, 'epoch': 10.95} + 68%|██████▊ | 254120/371472 [9:42:01<9:53:18, 3.30it/s] 68%|██████▊ | 254121/371472 [9:42:02<9:50:15, 3.31it/s] 68%|██████▊ | 254122/371472 [9:42:02<9:41:20, 3.36it/s] 68%|██████▊ | 254123/371472 [9:42:02<10:23:10, 3.14it/s] 68%|██████▊ | 254124/371472 [9:42:02<10:02:22, 3.25it/s] 68%|██████▊ | 254125/371472 [9:42:03<10:08:08, 3.22it/s] 68%|██████▊ | 254126/371472 [9:42:03<9:53:34, 3.29it/s] 68%|██████▊ | 254127/371472 [9:42:03<9:47:47, 3.33it/s] 68%|██████▊ | 254128/371472 [9:42:04<9:23:46, 3.47it/s] 68%|██████▊ | 254129/371472 [9:42:04<9:56:29, 3.28it/s] 68%|██████▊ | 254130/371472 [9:42:04<9:43:03, 3.35it/s] 68%|██████▊ | 254131/371472 [9:42:05<9:52:09, 3.30it/s] 68%|██████▊ | 254132/371472 [9:42:05<9:43:38, 3.35it/s] 68%|██████▊ | 254133/371472 [9:42:05<10:09:39, 3.21it/s] 68%|██████▊ | 254134/371472 [9:42:05<9:47:02, 3.33it/s] 68%|██████▊ | 254135/371472 [9:42:06<10:00:36, 3.26it/s] 68%|██████▊ | 254136/371472 [9:42:06<9:57:50, 3.27it/s] 68%|██████▊ | 254137/371472 [9:42:06<9:35:27, 3.40it/s] 68%|██████▊ | 254138/371472 [9:42:07<9:31:10, 3.42it/s] 68%|██████▊ | 254139/371472 [9:42:07<9:24:11, 3.47it/s] 68%|██████▊ | 254140/371472 [9:42:07<9:46:10, 3.34it/s] {'loss': 2.7064, 'learning_rate': 3.844243573444806e-07, 'epoch': 10.95} + 68%|██████▊ | 254140/371472 [9:42:07<9:46:10, 3.34it/s] 68%|██████▊ | 254141/371472 [9:42:08<9:48:48, 3.32it/s] 68%|██████▊ | 254142/371472 [9:42:08<9:40:00, 3.37it/s] 68%|██████▊ | 254143/371472 [9:42:08<9:36:14, 3.39it/s] 68%|██████▊ | 254144/371472 [9:42:09<10:02:04, 3.25it/s] 68%|██████▊ | 254145/371472 [9:42:09<9:57:55, 3.27it/s] 68%|██████▊ | 254146/371472 [9:42:09<10:05:02, 3.23it/s] 68%|██████▊ | 254147/371472 [9:42:09<9:52:19, 3.30it/s] 68%|██████▊ | 254148/371472 [9:42:10<9:39:58, 3.37it/s] 68%|██████▊ | 254149/371472 [9:42:10<10:14:55, 3.18it/s] 68%|██████▊ | 254150/371472 [9:42:10<10:02:41, 3.24it/s] 68%|██████▊ | 254151/371472 [9:42:11<10:06:26, 3.22it/s] 68%|██████▊ | 254152/371472 [9:42:11<10:42:36, 3.04it/s] 68%|██████▊ | 254153/371472 [9:42:11<10:14:53, 3.18it/s] 68%|██████▊ | 254154/371472 [9:42:12<9:57:48, 3.27it/s] 68%|██████▊ | 254155/371472 [9:42:12<11:03:18, 2.95it/s] 68%|██████▊ | 254156/371472 [9:42:12<10:49:31, 3.01it/s] 68%|██████▊ | 254157/371472 [9:42:13<10:37:34, 3.07it/s] 68%|██████▊ | 254158/371472 [9:42:13<10:37:18, 3.07it/s] 68%|██████▊ | 254159/371472 [9:42:13<10:18:21, 3.16it/s] 68%|██████▊ | 254160/371472 [9:42:14<9:50:01, 3.31it/s] {'loss': 2.6081, 'learning_rate': 3.8437587536900167e-07, 'epoch': 10.95} + 68%|██████▊ | 254160/371472 [9:42:14<9:50:01, 3.31it/s] 68%|██████▊ | 254161/371472 [9:42:14<10:04:41, 3.23it/s] 68%|██████▊ | 254162/371472 [9:42:14<9:46:47, 3.33it/s] 68%|██████▊ | 254163/371472 [9:42:14<9:34:06, 3.41it/s] 68%|██████▊ | 254164/371472 [9:42:15<9:20:28, 3.49it/s] 68%|██████▊ | 254165/371472 [9:42:15<9:13:43, 3.53it/s] 68%|██████▊ | 254166/371472 [9:42:15<9:18:58, 3.50it/s] 68%|���█████▊ | 254167/371472 [9:42:16<9:18:00, 3.50it/s] 68%|██████▊ | 254168/371472 [9:42:16<9:16:27, 3.51it/s] 68%|██████▊ | 254169/371472 [9:42:16<9:12:05, 3.54it/s] 68%|██████▊ | 254170/371472 [9:42:16<9:05:46, 3.58it/s] 68%|██████▊ | 254171/371472 [9:42:17<9:17:56, 3.50it/s] 68%|██████▊ | 254172/371472 [9:42:17<9:23:35, 3.47it/s] 68%|██████▊ | 254173/371472 [9:42:17<9:12:22, 3.54it/s] 68%|██████▊ | 254174/371472 [9:42:18<9:24:50, 3.46it/s] 68%|██████▊ | 254175/371472 [9:42:18<9:18:39, 3.50it/s] 68%|██████▊ | 254176/371472 [9:42:18<9:29:22, 3.43it/s] 68%|██████▊ | 254177/371472 [9:42:18<10:01:08, 3.25it/s] 68%|██████▊ | 254178/371472 [9:42:19<9:50:50, 3.31it/s] 68%|██████▊ | 254179/371472 [9:42:19<9:52:46, 3.30it/s] 68%|██████▊ | 254180/371472 [9:42:19<9:50:54, 3.31it/s] {'loss': 2.8442, 'learning_rate': 3.843273933935228e-07, 'epoch': 10.95} + 68%|██████▊ | 254180/371472 [9:42:19<9:50:54, 3.31it/s] 68%|██████▊ | 254181/371472 [9:42:20<11:24:03, 2.86it/s] 68%|██████▊ | 254182/371472 [9:42:20<10:45:24, 3.03it/s] 68%|██████▊ | 254183/371472 [9:42:20<10:31:53, 3.09it/s] 68%|██████▊ | 254184/371472 [9:42:21<11:08:21, 2.92it/s] 68%|██████▊ | 254185/371472 [9:42:21<10:38:02, 3.06it/s] 68%|██████▊ | 254186/371472 [9:42:21<10:13:58, 3.18it/s] 68%|██████▊ | 254187/371472 [9:42:22<9:45:21, 3.34it/s] 68%|██████▊ | 254188/371472 [9:42:22<10:24:01, 3.13it/s] 68%|██████▊ | 254189/371472 [9:42:22<10:14:36, 3.18it/s] 68%|██████▊ | 254190/371472 [9:42:23<10:00:31, 3.25it/s] 68%|██████▊ | 254191/371472 [9:42:23<10:12:38, 3.19it/s] 68%|██████▊ | 254192/371472 [9:42:23<9:53:09, 3.30it/s] 68%|██████▊ | 254193/371472 [9:42:24<9:56:25, 3.28it/s] 68%|██████▊ | 254194/371472 [9:42:24<10:12:59, 3.19it/s] 68%|██████▊ | 254195/371472 [9:42:24<10:23:08, 3.14it/s] 68%|██████▊ | 254196/371472 [9:42:24<10:07:38, 3.22it/s] 68%|██████▊ | 254197/371472 [9:42:25<9:45:32, 3.34it/s] 68%|██████▊ | 254198/371472 [9:42:25<9:34:28, 3.40it/s] 68%|██████▊ | 254199/371472 [9:42:25<9:45:45, 3.34it/s] 68%|██████▊ | 254200/371472 [9:42:26<9:39:28, 3.37it/s] {'loss': 2.7482, 'learning_rate': 3.8427891141804386e-07, 'epoch': 10.95} + 68%|██████▊ | 254200/371472 [9:42:26<9:39:28, 3.37it/s] 68%|██████▊ | 254201/371472 [9:42:26<10:01:16, 3.25it/s] 68%|██████▊ | 254202/371472 [9:42:26<9:41:24, 3.36it/s] 68%|██████▊ | 254203/371472 [9:42:27<9:40:51, 3.36it/s] 68%|██████▊ | 254204/371472 [9:42:27<9:36:36, 3.39it/s] 68%|██████▊ | 254205/371472 [9:42:27<9:21:10, 3.48it/s] 68%|██████▊ | 254206/371472 [9:42:27<9:24:06, 3.46it/s] 68%|██████▊ | 254207/371472 [9:42:28<9:27:59, 3.44it/s] 68%|██████▊ | 254208/371472 [9:42:28<9:41:03, 3.36it/s] 68%|██████▊ | 254209/371472 [9:42:28<9:38:18, 3.38it/s] 68%|██████▊ | 254210/371472 [9:42:29<10:28:40, 3.11it/s] 68%|██████▊ | 254211/371472 [9:42:29<9:59:23, 3.26it/s] 68%|██████▊ | 254212/371472 [9:42:29<10:06:43, 3.22it/s] 68%|██████▊ | 254213/371472 [9:42:30<10:01:37, 3.25it/s] 68%|██████▊ | 254214/371472 [9:42:30<9:40:48, 3.36it/s] 68%|██████▊ | 254215/371472 [9:42:30<10:22:10, 3.14it/s] 68%|██████▊ | 254216/371472 [9:42:30<10:05:03, 3.23it/s] 68%|██████▊ | 254217/371472 [9:42:31<10:06:05, 3.22it/s] 68%|██████▊ | 254218/371472 [9:42:31<9:55:54, 3.28it/s] 68%|██████▊ | 254219/371472 [9:42:32<11:45:41, 2.77it/s] 68%|██████▊ | 254220/371472 [9:42:32<10:51:46, 3.00it/s] {'loss': 2.7234, 'learning_rate': 3.8423042944256504e-07, 'epoch': 10.95} + 68%|██████▊ | 254220/371472 [9:42:32<10:51:46, 3.00it/s] 68%|██████▊ | 254221/371472 [9:42:32<10:22:47, 3.14it/s] 68%|██████▊ | 254222/371472 [9:42:32<10:28:31, 3.11it/s] 68%|██████▊ | 254223/371472 [9:42:33<10:16:35, 3.17it/s] 68%|██████▊ | 254224/371472 [9:42:33<9:53:35, 3.29it/s] 68%|██████▊ | 254225/371472 [9:42:33<9:38:56, 3.38it/s] 68%|██████▊ | 254226/371472 [9:42:34<9:52:25, 3.30it/s] 68%|██████▊ | 254227/371472 [9:42:34<9:30:57, 3.42it/s] 68%|██████▊ | 254228/371472 [9:42:34<9:34:36, 3.40it/s] 68%|██████▊ | 254229/371472 [9:42:34<9:17:08, 3.51it/s] 68%|██████▊ | 254230/371472 [9:42:35<9:09:42, 3.55it/s] 68%|██████▊ | 254231/371472 [9:42:35<9:15:55, 3.51it/s] 68%|██████▊ | 254232/371472 [9:42:35<9:30:39, 3.42it/s] 68%|██████▊ | 254233/371472 [9:42:36<11:04:32, 2.94it/s] 68%|██████▊ | 254234/371472 [9:42:36<10:28:24, 3.11it/s] 68%|██████▊ | 254235/371472 [9:42:36<10:04:32, 3.23it/s] 68%|██████▊ | 254236/371472 [9:42:37<9:37:18, 3.38it/s] 68%|██████▊ | 254237/371472 [9:42:37<10:33:20, 3.09it/s] 68%|██████▊ | 254238/371472 [9:42:37<11:02:47, 2.95it/s] 68%|██████▊ | 254239/371472 [9:42:38<10:38:42, 3.06it/s] 68%|██████▊ | 254240/371472 [9:42:38<10:39:34, 3.05it/s] {'loss': 2.7027, 'learning_rate': 3.841819474670861e-07, 'epoch': 10.95} + 68%|██████▊ | 254240/371472 [9:42:38<10:39:34, 3.05it/s] 68%|██████▊ | 254241/371472 [9:42:38<10:21:56, 3.14it/s] 68%|██████▊ | 254242/371472 [9:42:39<10:09:12, 3.21it/s] 68%|██████▊ | 254243/371472 [9:42:39<9:45:19, 3.34it/s] 68%|██████▊ | 254244/371472 [9:42:39<9:28:38, 3.44it/s] 68%|██████▊ | 254245/371472 [9:42:39<9:23:45, 3.47it/s] 68%|██████▊ | 254246/371472 [9:42:40<9:27:02, 3.45it/s] 68%|██████▊ | 254247/371472 [9:42:40<9:53:30, 3.29it/s] 68%|██████▊ | 254248/371472 [9:42:40<9:43:25, 3.35it/s] 68%|██████▊ | 254249/371472 [9:42:41<9:55:08, 3.28it/s] 68%|██████▊ | 254250/371472 [9:42:41<9:55:05, 3.28it/s] 68%|██████▊ | 254251/371472 [9:42:41<9:39:04, 3.37it/s] 68%|██████▊ | 254252/371472 [9:42:42<9:46:29, 3.33it/s] 68%|██████▊ | 254253/371472 [9:42:42<10:19:23, 3.15it/s] 68%|██████▊ | 254254/371472 [9:42:42<10:12:58, 3.19it/s] 68%|██████▊ | 254255/371472 [9:42:42<9:43:46, 3.35it/s] 68%|██████▊ | 254256/371472 [9:42:43<9:27:08, 3.44it/s] 68%|██████▊ | 254257/371472 [9:42:43<9:38:24, 3.38it/s] 68%|██████▊ | 254258/371472 [9:42:43<10:27:12, 3.11it/s] 68%|██████▊ | 254259/371472 [9:42:44<10:04:08, 3.23it/s] 68%|██████▊ | 254260/371472 [9:42:44<9:37:37, 3.38it/s] {'loss': 2.8861, 'learning_rate': 3.8413346549160724e-07, 'epoch': 10.95} + 68%|██████▊ | 254260/371472 [9:42:44<9:37:37, 3.38it/s] 68%|██████▊ | 254261/371472 [9:42:44<9:55:13, 3.28it/s] 68%|██████▊ | 254262/371472 [9:42:45<9:36:29, 3.39it/s] 68%|██████▊ | 254263/371472 [9:42:45<9:31:52, 3.42it/s] 68%|██████▊ | 254264/371472 [9:42:45<9:49:00, 3.32it/s] 68%|██████▊ | 254265/371472 [9:42:45<9:44:53, 3.34it/s] 68%|██████▊ | 254266/371472 [9:42:46<9:33:54, 3.40it/s] 68%|██████▊ | 254267/371472 [9:42:46<9:51:15, 3.30it/s] 68%|██████▊ | 254268/371472 [9:42:46<9:35:55, 3.39it/s] 68%|██████▊ | 254269/371472 [9:42:47<9:28:15, 3.44it/s] 68%|██████▊ | 254270/371472 [9:42:47<9:20:44, 3.48it/s] 68%|██████▊ | 254271/371472 [9:42:47<9:16:06, 3.51it/s] 68%|██████▊ | 254272/371472 [9:42:48<9:23:56, 3.46it/s] 68%|██████▊ | 254273/371472 [9:42:48<9:26:17, 3.45it/s] 68%|██████▊ | 254274/371472 [9:42:48<9:20:25, 3.49it/s] 68%|██████▊ | 254275/371472 [9:42:48<9:18:11, 3.50it/s] 68%|██████▊ | 254276/371472 [9:42:49<9:21:59, 3.48it/s] 68%|██████▊ | 254277/371472 [9:42:49<9:22:49, 3.47it/s] 68%|██████▊ | 254278/371472 [9:42:49<9:43:05, 3.35it/s] 68%|██████▊ | 254279/371472 [9:42:50<9:35:07, 3.40it/s] 68%|██████▊ | 254280/371472 [9:42:50<9:36:01, 3.39it/s] {'loss': 2.6787, 'learning_rate': 3.8408498351612836e-07, 'epoch': 10.95} + 68%|██████▊ | 254280/371472 [9:42:50<9:36:01, 3.39it/s] 68%|██████▊ | 254281/371472 [9:42:50<9:25:57, 3.45it/s] 68%|██████▊ | 254282/371472 [9:42:50<10:03:57, 3.23it/s] 68%|██████▊ | 254283/371472 [9:42:51<10:29:08, 3.10it/s] 68%|██████▊ | 254284/371472 [9:42:51<10:08:02, 3.21it/s] 68%|██████▊ | 254285/371472 [9:42:51<9:42:07, 3.36it/s] 68%|██████▊ | 254286/371472 [9:42:52<10:01:45, 3.25it/s] 68%|██████▊ | 254287/371472 [9:42:52<9:44:19, 3.34it/s] 68%|██████▊ | 254288/371472 [9:42:52<9:30:08, 3.43it/s] 68%|██████▊ | 254289/371472 [9:42:53<10:17:18, 3.16it/s] 68%|██████▊ | 254290/371472 [9:42:53<10:20:25, 3.15it/s] 68%|██████▊ | 254291/371472 [9:42:53<10:18:26, 3.16it/s] 68%|██████▊ | 254292/371472 [9:42:54<10:04:02, 3.23it/s] 68%|██████▊ | 254293/371472 [9:42:54<9:46:10, 3.33it/s] 68%|██████▊ | 254294/371472 [9:42:54<9:38:53, 3.37it/s] 68%|██████▊ | 254295/371472 [9:42:54<9:39:56, 3.37it/s] 68%|██████▊ | 254296/371472 [9:42:55<9:45:34, 3.34it/s] 68%|██████▊ | 254297/371472 [9:42:55<9:50:45, 3.31it/s] 68%|██████▊ | 254298/371472 [9:42:55<10:02:42, 3.24it/s] 68%|██████▊ | 254299/371472 [9:42:56<10:04:17, 3.23it/s] 68%|██████▊ | 254300/371472 [9:42:56<10:00:59, 3.25it/s] {'loss': 2.6898, 'learning_rate': 3.840365015406495e-07, 'epoch': 10.95} + 68%|██████▊ | 254300/371472 [9:42:56<10:00:59, 3.25it/s] 68%|██████▊ | 254301/371472 [9:42:56<10:04:02, 3.23it/s] 68%|██████▊ | 254302/371472 [9:42:57<9:46:59, 3.33it/s] 68%|██████▊ | 254303/371472 [9:42:57<9:55:24, 3.28it/s] 68%|██████▊ | 254304/371472 [9:42:57<9:42:30, 3.35it/s] 68%|██████▊ | 254305/371472 [9:42:57<9:47:21, 3.32it/s] 68%|██████▊ | 254306/371472 [9:42:58<9:33:31, 3.40it/s] 68%|██████▊ | 254307/371472 [9:42:58<9:19:27, 3.49it/s] 68%|██████▊ | 254308/371472 [9:42:58<9:51:52, 3.30it/s] 68%|██████▊ | 254309/371472 [9:42:59<9:58:53, 3.26it/s] 68%|██████▊ | 254310/371472 [9:42:59<9:48:40, 3.32it/s] 68%|██████▊ | 254311/371472 [9:42:59<9:50:31, 3.31it/s] 68%|██████▊ | 254312/371472 [9:43:00<9:52:57, 3.29it/s] 68%|██████▊ | 254313/371472 [9:43:00<9:47:11, 3.33it/s] 68%|██████▊ | 254314/371472 [9:43:00<10:02:15, 3.24it/s] 68%|██████▊ | 254315/371472 [9:43:01<10:06:47, 3.22it/s] 68%|██████▊ | 254316/371472 [9:43:01<9:58:18, 3.26it/s] 68%|██████▊ | 254317/371472 [9:43:01<9:48:04, 3.32it/s] 68%|██████▊ | 254318/371472 [9:43:01<9:24:59, 3.46it/s] 68%|██████▊ | 254319/371472 [9:43:02<9:08:56, 3.56it/s] 68%|██████▊ | 254320/371472 [9:43:02<9:04:54, 3.58it/s] {'loss': 2.6418, 'learning_rate': 3.839880195651705e-07, 'epoch': 10.95} + 68%|██████▊ | 254320/371472 [9:43:02<9:04:54, 3.58it/s] 68%|██████▊ | 254321/371472 [9:43:02<9:19:08, 3.49it/s] 68%|██████▊ | 254322/371472 [9:43:02<9:16:23, 3.51it/s] 68%|██████▊ | 254323/371472 [9:43:03<9:20:48, 3.48it/s] 68%|██████▊ | 254324/371472 [9:43:03<9:19:33, 3.49it/s] 68%|██████▊ | 254325/371472 [9:43:03<9:28:35, 3.43it/s] 68%|██████▊ | 254326/371472 [9:43:04<9:28:55, 3.43it/s] 68%|██████▊ | 254327/371472 [9:43:04<9:35:53, 3.39it/s] 68%|██████▊ | 254328/371472 [9:43:04<11:31:44, 2.82it/s] 68%|██████▊ | 254329/371472 [9:43:05<11:24:24, 2.85it/s] 68%|██████▊ | 254330/371472 [9:43:05<10:43:32, 3.03it/s] 68%|██████▊ | 254331/371472 [9:43:05<10:17:04, 3.16it/s] 68%|██████▊ | 254332/371472 [9:43:06<10:02:45, 3.24it/s] 68%|██████▊ | 254333/371472 [9:43:06<9:50:35, 3.31it/s] 68%|██████▊ | 254334/371472 [9:43:06<9:43:47, 3.34it/s] 68%|██████▊ | 254335/371472 [9:43:07<9:38:32, 3.37it/s] 68%|██████▊ | 254336/371472 [9:43:07<9:51:49, 3.30it/s] 68%|██████▊ | 254337/371472 [9:43:07<9:54:46, 3.28it/s] 68%|██████▊ | 254338/371472 [9:43:07<9:56:06, 3.27it/s] 68%|██████▊ | 254339/371472 [9:43:08<9:40:05, 3.37it/s] 68%|██████▊ | 254340/371472 [9:43:08<9:56:54, 3.27it/s] {'loss': 2.6845, 'learning_rate': 3.839395375896917e-07, 'epoch': 10.95} + 68%|██████▊ | 254340/371472 [9:43:08<9:56:54, 3.27it/s] 68%|██████▊ | 254341/371472 [9:43:08<9:39:09, 3.37it/s] 68%|██████▊ | 254342/371472 [9:43:09<9:27:09, 3.44it/s] 68%|██████▊ | 254343/371472 [9:43:09<9:25:27, 3.45it/s] 68%|██████▊ | 254344/371472 [9:43:09<9:32:35, 3.41it/s] 68%|██████▊ | 254345/371472 [9:43:09<9:32:49, 3.41it/s] 68%|██████▊ | 254346/371472 [9:43:10<10:15:10, 3.17it/s] 68%|██████▊ | 254347/371472 [9:43:10<10:12:30, 3.19it/s] 68%|██████▊ | 254348/371472 [9:43:10<9:41:19, 3.36it/s] 68%|██████▊ | 254349/371472 [9:43:11<9:31:45, 3.41it/s] 68%|██████▊ | 254350/371472 [9:43:11<9:38:23, 3.37it/s] 68%|██████▊ | 254351/371472 [9:43:11<9:14:57, 3.52it/s] 68%|██████▊ | 254352/371472 [9:43:12<9:29:25, 3.43it/s] 68%|██████▊ | 254353/371472 [9:43:12<9:20:48, 3.48it/s] 68%|██████▊ | 254354/371472 [9:43:12<9:45:38, 3.33it/s] 68%|██████▊ | 254355/371472 [9:43:12<9:33:01, 3.41it/s] 68%|██████▊ | 254356/371472 [9:43:13<9:18:35, 3.49it/s] 68%|██████▊ | 254357/371472 [9:43:13<9:16:55, 3.50it/s] 68%|██████▊ | 254358/371472 [9:43:13<9:17:38, 3.50it/s] 68%|██████▊ | 254359/371472 [9:43:14<9:37:36, 3.38it/s] 68%|██████▊ | 254360/371472 [9:43:14<10:30:22, 3.10it/s] {'loss': 2.6792, 'learning_rate': 3.8389105561421275e-07, 'epoch': 10.96} + 68%|██████▊ | 254360/371472 [9:43:14<10:30:22, 3.10it/s] 68%|██████▊ | 254361/371472 [9:43:14<10:03:55, 3.23it/s] 68%|██████▊ | 254362/371472 [9:43:15<10:00:17, 3.25it/s] 68%|██████▊ | 254363/371472 [9:43:15<10:34:22, 3.08it/s] 68%|██████▊ | 254364/371472 [9:43:15<10:31:17, 3.09it/s] 68%|██████▊ | 254365/371472 [9:43:16<10:22:50, 3.13it/s] 68%|██████▊ | 254366/371472 [9:43:16<9:53:13, 3.29it/s] 68%|██████▊ | 254367/371472 [9:43:16<9:39:46, 3.37it/s] 68%|██████▊ | 254368/371472 [9:43:16<9:45:58, 3.33it/s] 68%|██████▊ | 254369/371472 [9:43:17<9:52:23, 3.29it/s] 68%|██████▊ | 254370/371472 [9:43:17<10:32:02, 3.09it/s] 68%|██████▊ | 254371/371472 [9:43:18<11:05:08, 2.93it/s] 68%|██████▊ | 254372/371472 [9:43:18<10:41:52, 3.04it/s] 68%|██████▊ | 254373/371472 [9:43:18<10:02:14, 3.24it/s] 68%|██████▊ | 254374/371472 [9:43:18<10:46:17, 3.02it/s] 68%|██████▊ | 254375/371472 [9:43:19<11:20:48, 2.87it/s] 68%|██████▊ | 254376/371472 [9:43:19<10:32:21, 3.09it/s] 68%|██████▊ | 254377/371472 [9:43:19<10:29:14, 3.10it/s] 68%|██████▊ | 254378/371472 [9:43:20<10:11:53, 3.19it/s] 68%|██████▊ | 254379/371472 [9:43:20<10:07:29, 3.21it/s] 68%|██████▊ | 254380/371472 [9:43:20<10:03:59, 3.23it/s] {'loss': 2.7364, 'learning_rate': 3.838425736387339e-07, 'epoch': 10.96} + 68%|██████▊ | 254380/371472 [9:43:20<10:03:59, 3.23it/s] 68%|██████▊ | 254381/371472 [9:43:21<10:07:23, 3.21it/s] 68%|██████▊ | 254382/371472 [9:43:21<10:28:39, 3.10it/s] 68%|██████▊ | 254383/371472 [9:43:21<10:02:59, 3.24it/s] 68%|██████▊ | 254384/371472 [9:43:22<9:55:35, 3.28it/s] 68%|██████▊ | 254385/371472 [9:43:22<9:52:45, 3.29it/s] 68%|██████▊ | 254386/371472 [9:43:22<9:51:24, 3.30it/s] 68%|██████▊ | 254387/371472 [9:43:22<9:55:56, 3.27it/s] 68%|██████▊ | 254388/371472 [9:43:23<9:57:01, 3.27it/s] 68%|██████▊ | 254389/371472 [9:43:23<9:33:44, 3.40it/s] 68%|██████▊ | 254390/371472 [9:43:23<10:32:47, 3.08it/s] 68%|██████▊ | 254391/371472 [9:43:24<10:05:21, 3.22it/s] 68%|██████▊ | 254392/371472 [9:43:24<9:53:54, 3.29it/s] 68%|██████▊ | 254393/371472 [9:43:24<9:48:58, 3.31it/s] 68%|██████▊ | 254394/371472 [9:43:25<9:32:52, 3.41it/s] 68%|██████▊ | 254395/371472 [9:43:25<9:26:43, 3.44it/s] 68%|██████▊ | 254396/371472 [9:43:25<9:32:36, 3.41it/s] 68%|██████▊ | 254397/371472 [9:43:25<9:26:07, 3.45it/s] 68%|██████▊ | 254398/371472 [9:43:26<9:31:11, 3.42it/s] 68%|██████▊ | 254399/371472 [9:43:26<9:25:21, 3.45it/s] 68%|██████▊ | 254400/371472 [9:43:26<9:53:35, 3.29it/s] {'loss': 2.6681, 'learning_rate': 3.8379409166325495e-07, 'epoch': 10.96} + 68%|██████▊ | 254400/371472 [9:43:26<9:53:35, 3.29it/s] 68%|██████▊ | 254401/371472 [9:43:27<10:05:41, 3.22it/s] 68%|██████▊ | 254402/371472 [9:43:27<9:40:51, 3.36it/s] 68%|██████▊ | 254403/371472 [9:43:27<9:43:40, 3.34it/s] 68%|██████▊ | 254404/371472 [9:43:28<9:51:03, 3.30it/s] 68%|██████▊ | 254405/371472 [9:43:28<9:30:43, 3.42it/s] 68%|██████▊ | 254406/371472 [9:43:28<9:30:39, 3.42it/s] 68%|██████▊ | 254407/371472 [9:43:28<9:28:15, 3.43it/s] 68%|█████���▊ | 254408/371472 [9:43:29<9:09:55, 3.55it/s] 68%|██████▊ | 254409/371472 [9:43:29<9:18:52, 3.49it/s] 68%|██████▊ | 254410/371472 [9:43:29<9:16:30, 3.51it/s] 68%|██████▊ | 254411/371472 [9:43:30<9:07:43, 3.56it/s] 68%|██████▊ | 254412/371472 [9:43:30<9:04:41, 3.58it/s] 68%|██████▊ | 254413/371472 [9:43:30<9:20:51, 3.48it/s] 68%|██████▊ | 254414/371472 [9:43:30<9:30:54, 3.42it/s] 68%|██████▊ | 254415/371472 [9:43:31<9:14:32, 3.52it/s] 68%|██████▊ | 254416/371472 [9:43:31<9:26:11, 3.45it/s] 68%|██████▊ | 254417/371472 [9:43:31<9:19:57, 3.48it/s] 68%|██████▊ | 254418/371472 [9:43:32<9:01:37, 3.60it/s] 68%|██████▊ | 254419/371472 [9:43:32<9:02:31, 3.60it/s] 68%|██████▊ | 254420/371472 [9:43:32<9:15:45, 3.51it/s] {'loss': 2.7149, 'learning_rate': 3.8374560968777613e-07, 'epoch': 10.96} + 68%|██████▊ | 254420/371472 [9:43:32<9:15:45, 3.51it/s] 68%|██████▊ | 254421/371472 [9:43:32<9:23:53, 3.46it/s] 68%|██████▊ | 254422/371472 [9:43:33<9:19:47, 3.48it/s] 68%|██████▊ | 254423/371472 [9:43:33<9:20:55, 3.48it/s] 68%|██████▊ | 254424/371472 [9:43:33<9:04:01, 3.59it/s] 68%|██████▊ | 254425/371472 [9:43:34<9:05:34, 3.58it/s] 68%|██████▊ | 254426/371472 [9:43:34<9:09:33, 3.55it/s] 68%|██████▊ | 254427/371472 [9:43:34<9:01:10, 3.60it/s] 68%|██████▊ | 254428/371472 [9:43:34<9:00:06, 3.61it/s] 68%|██████▊ | 254429/371472 [9:43:35<9:40:14, 3.36it/s] 68%|██████▊ | 254430/371472 [9:43:35<9:26:47, 3.44it/s] 68%|██████▊ | 254431/371472 [9:43:35<9:20:08, 3.48it/s] 68%|██████▊ | 254432/371472 [9:43:36<10:07:12, 3.21it/s] 68%|██████▊ | 254433/371472 [9:43:36<10:16:58, 3.16it/s] 68%|██████▊ | 254434/371472 [9:43:36<10:12:10, 3.19it/s] 68%|██████▊ | 254435/371472 [9:43:37<9:50:44, 3.30it/s] 68%|██████▊ | 254436/371472 [9:43:37<9:49:33, 3.31it/s] 68%|██████▊ | 254437/371472 [9:43:37<10:00:55, 3.25it/s] 68%|██████▊ | 254438/371472 [9:43:38<10:25:46, 3.12it/s] 68%|██████▊ | 254439/371472 [9:43:38<10:04:23, 3.23it/s] 68%|██████▊ | 254440/371472 [9:43:38<10:15:37, 3.17it/s] {'loss': 2.7263, 'learning_rate': 3.8369712771229715e-07, 'epoch': 10.96} + 68%|██████▊ | 254440/371472 [9:43:38<10:15:37, 3.17it/s] 68%|██████▊ | 254441/371472 [9:43:38<9:53:20, 3.29it/s] 68%|██████▊ | 254442/371472 [9:43:39<9:48:33, 3.31it/s] 68%|██████▊ | 254443/371472 [9:43:39<9:32:56, 3.40it/s] 68%|██████▊ | 254444/371472 [9:43:39<9:21:35, 3.47it/s] 68%|██████▊ | 254445/371472 [9:43:40<9:42:38, 3.35it/s] 68%|██████▊ | 254446/371472 [9:43:40<9:57:06, 3.27it/s] 68%|██████▊ | 254447/371472 [9:43:40<9:49:18, 3.31it/s] 68%|██████▊ | 254448/371472 [9:43:40<9:24:03, 3.46it/s] 68%|██████▊ | 254449/371472 [9:43:41<9:34:39, 3.39it/s] 68%|██████▊ | 254450/371472 [9:43:41<9:23:43, 3.46it/s] 68%|██████▊ | 254451/371472 [9:43:41<9:08:31, 3.56it/s] 68%|██████▊ | 254452/371472 [9:43:42<8:57:35, 3.63it/s] 68%|██████▊ | 254453/371472 [9:43:42<9:02:12, 3.60it/s] 68%|██████▊ | 254454/371472 [9:43:42<8:58:40, 3.62it/s] 68%|██████▊ | 254455/371472 [9:43:42<9:12:19, 3.53it/s] 68%|██████▊ | 254456/371472 [9:43:43<9:28:31, 3.43it/s] 68%|██████▊ | 254457/371472 [9:43:43<9:14:19, 3.52it/s] 68%|██████▊ | 254458/371472 [9:43:43<9:10:43, 3.54it/s] 69%|██████▊ | 254459/371472 [9:43:44<9:20:01, 3.48it/s] 69%|██████▊ | 254460/371472 [9:43:44<10:03:28, 3.23it/s] {'loss': 2.6764, 'learning_rate': 3.836486457368183e-07, 'epoch': 10.96} + 69%|██████▊ | 254460/371472 [9:43:44<10:03:28, 3.23it/s] 69%|██████▊ | 254461/371472 [9:43:44<9:46:03, 3.33it/s] 69%|██████▊ | 254462/371472 [9:43:45<9:46:08, 3.33it/s] 69%|██████▊ | 254463/371472 [9:43:45<9:30:57, 3.42it/s] 69%|██████▊ | 254464/371472 [9:43:45<9:33:18, 3.40it/s] 69%|██████▊ | 254465/371472 [9:43:45<9:27:50, 3.43it/s] 69%|██████▊ | 254466/371472 [9:43:46<9:31:34, 3.41it/s] 69%|██████▊ | 254467/371472 [9:43:46<9:28:39, 3.43it/s] 69%|██████▊ | 254468/371472 [9:43:46<9:36:29, 3.38it/s] 69%|██████▊ | 254469/371472 [9:43:47<9:34:47, 3.39it/s] 69%|██████▊ | 254470/371472 [9:43:47<9:30:32, 3.42it/s] 69%|██████▊ | 254471/371472 [9:43:47<9:29:37, 3.42it/s] 69%|██████▊ | 254472/371472 [9:43:47<9:46:25, 3.33it/s] 69%|██████▊ | 254473/371472 [9:43:48<9:45:34, 3.33it/s] 69%|██████▊ | 254474/371472 [9:43:48<9:35:03, 3.39it/s] 69%|██████▊ | 254475/371472 [9:43:48<9:20:53, 3.48it/s] 69%|██████▊ | 254476/371472 [9:43:49<9:03:29, 3.59it/s] 69%|██████▊ | 254477/371472 [9:43:49<9:04:14, 3.58it/s] 69%|██████▊ | 254478/371472 [9:43:49<9:57:51, 3.26it/s] 69%|██████▊ | 254479/371472 [9:43:49<9:43:25, 3.34it/s] 69%|██████▊ | 254480/371472 [9:43:50<9:28:41, 3.43it/s] {'loss': 2.803, 'learning_rate': 3.836001637613394e-07, 'epoch': 10.96} + 69%|██████▊ | 254480/371472 [9:43:50<9:28:41, 3.43it/s] 69%|██████▊ | 254481/371472 [9:43:50<9:55:58, 3.27it/s] 69%|██████▊ | 254482/371472 [9:43:50<9:50:02, 3.30it/s] 69%|██████▊ | 254483/371472 [9:43:51<9:34:14, 3.40it/s] 69%|██████▊ | 254484/371472 [9:43:51<9:20:58, 3.48it/s] 69%|██████▊ | 254485/371472 [9:43:51<9:12:06, 3.53it/s] 69%|██████▊ | 254486/371472 [9:43:52<9:34:45, 3.39it/s] 69%|██████▊ | 254487/371472 [9:43:52<9:18:12, 3.49it/s] 69%|██████▊ | 254488/371472 [9:43:52<9:06:52, 3.57it/s] 69%|██████▊ | 254489/371472 [9:43:52<9:23:12, 3.46it/s] 69%|██████▊ | 254490/371472 [9:43:53<9:27:54, 3.43it/s] 69%|██████▊ | 254491/371472 [9:43:53<9:22:47, 3.46it/s] 69%|██████▊ | 254492/371472 [9:43:53<9:15:46, 3.51it/s] 69%|██████▊ | 254493/371472 [9:43:54<9:33:28, 3.40it/s] 69%|██████▊ | 254494/371472 [9:43:54<10:15:42, 3.17it/s] 69%|██████▊ | 254495/371472 [9:43:54<10:03:26, 3.23it/s] 69%|██████▊ | 254496/371472 [9:43:54<9:47:38, 3.32it/s] 69%|██████▊ | 254497/371472 [9:43:55<9:44:18, 3.34it/s] 69%|██████▊ | 254498/371472 [9:43:55<10:04:37, 3.22it/s] 69%|██████▊ | 254499/371472 [9:43:55<9:57:05, 3.27it/s] 69%|██████▊ | 254500/371472 [9:43:56<9:31:12, 3.41it/s] {'loss': 2.7733, 'learning_rate': 3.835516817858604e-07, 'epoch': 10.96} + 69%|██████▊ | 254500/371472 [9:43:56<9:31:12, 3.41it/s] 69%|██████▊ | 254501/371472 [9:43:56<9:28:27, 3.43it/s] 69%|██████▊ | 254502/371472 [9:43:56<9:32:46, 3.40it/s] 69%|██████▊ | 254503/371472 [9:43:57<9:44:38, 3.33it/s] 69%|██████▊ | 254504/371472 [9:43:57<9:24:44, 3.45it/s] 69%|██████▊ | 254505/371472 [9:43:57<9:16:48, 3.50it/s] 69%|██████▊ | 254506/371472 [9:43:57<9:35:25, 3.39it/s] 69%|██████▊ | 254507/371472 [9:43:58<9:59:37, 3.25it/s] 69%|██████▊ | 254508/371472 [9:43:58<11:05:38, 2.93it/s] 69%|██████▊ | 254509/371472 [9:43:59<11:29:03, 2.83it/s] 69%|██████▊ | 254510/371472 [9:43:59<10:59:34, 2.96it/s] 69%|██████▊ | 254511/371472 [9:43:59<10:48:20, 3.01it/s] 69%|██████▊ | 254512/371472 [9:43:59<10:22:11, 3.13it/s] 69%|██████▊ | 254513/371472 [9:44:00<10:35:15, 3.07it/s] 69%|██████▊ | 254514/371472 [9:44:00<10:11:07, 3.19it/s] 69%|██████▊ | 254515/371472 [9:44:00<9:54:37, 3.28it/s] 69%|██████▊ | 254516/371472 [9:44:01<9:54:53, 3.28it/s] 69%|██████▊ | 254517/371472 [9:44:01<9:44:29, 3.33it/s] 69%|██████▊ | 254518/371472 [9:44:01<9:50:03, 3.30it/s] 69%|██████▊ | 254519/371472 [9:44:02<9:42:07, 3.35it/s] 69%|██████▊ | 254520/371472 [9:44:02<9:30:09, 3.42it/s] {'loss': 2.6544, 'learning_rate': 3.835031998103816e-07, 'epoch': 10.96} + 69%|██████▊ | 254520/371472 [9:44:02<9:30:09, 3.42it/s] 69%|██████▊ | 254521/371472 [9:44:02<9:23:38, 3.46it/s] 69%|██████▊ | 254522/371472 [9:44:02<9:31:44, 3.41it/s] 69%|██████▊ | 254523/371472 [9:44:03<9:20:59, 3.47it/s] 69%|██████▊ | 254524/371472 [9:44:03<9:14:43, 3.51it/s] 69%|██████▊ | 254525/371472 [9:44:03<9:39:38, 3.36it/s] 69%|██████▊ | 254526/371472 [9:44:04<9:31:00, 3.41it/s] 69%|██████▊ | 254527/371472 [9:44:04<9:20:16, 3.48it/s] 69%|██████▊ | 254528/371472 [9:44:04<9:23:02, 3.46it/s] 69%|██████��� | 254529/371472 [9:44:04<9:15:53, 3.51it/s] 69%|██████▊ | 254530/371472 [9:44:05<9:01:50, 3.60it/s] 69%|██████▊ | 254531/371472 [9:44:05<9:00:36, 3.61it/s] 69%|██████▊ | 254532/371472 [9:44:05<9:02:57, 3.59it/s] 69%|██████▊ | 254533/371472 [9:44:06<9:02:52, 3.59it/s] 69%|██████▊ | 254534/371472 [9:44:06<8:56:38, 3.63it/s] 69%|██████▊ | 254535/371472 [9:44:06<8:50:12, 3.68it/s] 69%|██████▊ | 254536/371472 [9:44:06<9:08:49, 3.55it/s] 69%|██████▊ | 254537/371472 [9:44:07<9:07:15, 3.56it/s] 69%|██████▊ | 254538/371472 [9:44:07<9:04:45, 3.58it/s] 69%|██████▊ | 254539/371472 [9:44:07<9:09:39, 3.55it/s] 69%|██████▊ | 254540/371472 [9:44:08<9:20:03, 3.48it/s] {'loss': 2.836, 'learning_rate': 3.8345471783490266e-07, 'epoch': 10.96} + 69%|██████▊ | 254540/371472 [9:44:08<9:20:03, 3.48it/s] 69%|██████▊ | 254541/371472 [9:44:08<9:01:05, 3.60it/s] 69%|██████▊ | 254542/371472 [9:44:08<9:03:43, 3.58it/s] 69%|██████▊ | 254543/371472 [9:44:08<9:19:14, 3.48it/s] 69%|██████▊ | 254544/371472 [9:44:09<9:51:32, 3.29it/s] 69%|██████▊ | 254545/371472 [9:44:09<9:39:52, 3.36it/s] 69%|██████▊ | 254546/371472 [9:44:09<9:28:37, 3.43it/s] 69%|██████▊ | 254547/371472 [9:44:10<9:12:09, 3.53it/s] 69%|██████▊ | 254548/371472 [9:44:10<9:06:34, 3.57it/s] 69%|██████▊ | 254549/371472 [9:44:10<9:01:32, 3.60it/s] 69%|██████▊ | 254550/371472 [9:44:10<8:57:24, 3.63it/s] 69%|██████▊ | 254551/371472 [9:44:11<9:04:44, 3.58it/s] 69%|██████▊ | 254552/371472 [9:44:11<9:06:16, 3.57it/s] 69%|██████▊ | 254553/371472 [9:44:11<9:57:43, 3.26it/s] 69%|██████▊ | 254554/371472 [9:44:12<9:55:01, 3.27it/s] 69%|██████▊ | 254555/371472 [9:44:12<9:36:56, 3.38it/s] 69%|██████▊ | 254556/371472 [9:44:12<9:15:48, 3.51it/s] 69%|██████▊ | 254557/371472 [9:44:12<8:57:46, 3.62it/s] 69%|██████▊ | 254558/371472 [9:44:13<9:19:23, 3.48it/s] 69%|██████▊ | 254559/371472 [9:44:13<9:14:49, 3.51it/s] 69%|██████▊ | 254560/371472 [9:44:13<9:22:26, 3.46it/s] {'loss': 2.7806, 'learning_rate': 3.834062358594238e-07, 'epoch': 10.96} + 69%|██████▊ | 254560/371472 [9:44:13<9:22:26, 3.46it/s] 69%|██████▊ | 254561/371472 [9:44:14<9:11:08, 3.54it/s] 69%|██████▊ | 254562/371472 [9:44:14<9:10:00, 3.54it/s] 69%|██████▊ | 254563/371472 [9:44:14<10:10:13, 3.19it/s] 69%|██████▊ | 254564/371472 [9:44:15<10:25:33, 3.11it/s] 69%|██████▊ | 254565/371472 [9:44:15<10:06:07, 3.21it/s] 69%|██████▊ | 254566/371472 [9:44:15<9:51:58, 3.29it/s] 69%|██████▊ | 254567/371472 [9:44:15<9:27:27, 3.43it/s] 69%|██████▊ | 254568/371472 [9:44:16<9:27:27, 3.43it/s] 69%|██████▊ | 254569/371472 [9:44:16<9:13:02, 3.52it/s] 69%|██████▊ | 254570/371472 [9:44:16<9:48:57, 3.31it/s] 69%|██████▊ | 254571/371472 [9:44:17<10:08:18, 3.20it/s] 69%|██████▊ | 254572/371472 [9:44:17<9:47:11, 3.32it/s] 69%|██████▊ | 254573/371472 [9:44:17<9:52:49, 3.29it/s] 69%|██████▊ | 254574/371472 [9:44:18<10:17:13, 3.16it/s] 69%|██████▊ | 254575/371472 [9:44:18<10:42:32, 3.03it/s] 69%|██████▊ | 254576/371472 [9:44:18<10:13:46, 3.17it/s] 69%|██████▊ | 254577/371472 [9:44:18<9:39:08, 3.36it/s] 69%|██████▊ | 254578/371472 [9:44:19<9:40:24, 3.36it/s] 69%|██████▊ | 254579/371472 [9:44:19<9:24:04, 3.45it/s] 69%|██████▊ | 254580/371472 [9:44:19<9:15:19, 3.51it/s] {'loss': 2.7177, 'learning_rate': 3.8335775388394486e-07, 'epoch': 10.97} + 69%|██████▊ | 254580/371472 [9:44:19<9:15:19, 3.51it/s] 69%|██████▊ | 254581/371472 [9:44:20<9:11:29, 3.53it/s] 69%|██████▊ | 254582/371472 [9:44:20<9:06:33, 3.56it/s] 69%|██████▊ | 254583/371472 [9:44:20<9:26:30, 3.44it/s] 69%|██████▊ | 254584/371472 [9:44:21<10:59:40, 2.95it/s] 69%|██████▊ | 254585/371472 [9:44:21<10:22:33, 3.13it/s] 69%|██████▊ | 254586/371472 [9:44:21<10:07:51, 3.20it/s] 69%|██████▊ | 254587/371472 [9:44:21<10:00:35, 3.24it/s] 69%|██████▊ | 254588/371472 [9:44:22<9:44:27, 3.33it/s] 69%|██████▊ | 254589/371472 [9:44:22<9:40:31, 3.36it/s] 69%|██████▊ | 254590/371472 [9:44:22<9:36:44, 3.38it/s] 69%|██████▊ | 254591/371472 [9:44:23<9:21:33, 3.47it/s] 69%|██████▊ | 254592/371472 [9:44:23<9:46:35, 3.32it/s] 69%|██████▊ | 254593/371472 [9:44:23<9:53:20, 3.28it/s] 69%|██████▊ | 254594/371472 [9:44:24<9:27:41, 3.43it/s] 69%|██████▊ | 254595/371472 [9:44:24<9:17:05, 3.50it/s] 69%|██████▊ | 254596/371472 [9:44:24<9:17:06, 3.50it/s] 69%|██████▊ | 254597/371472 [9:44:24<9:33:16, 3.40it/s] 69%|██████▊ | 254598/371472 [9:44:25<9:22:23, 3.46it/s] 69%|██████▊ | 254599/371472 [9:44:25<9:20:57, 3.47it/s] 69%|██████▊ | 254600/371472 [9:44:25<9:41:25, 3.35it/s] {'loss': 2.6857, 'learning_rate': 3.8330927190846604e-07, 'epoch': 10.97} + 69%|██████▊ | 254600/371472 [9:44:25<9:41:25, 3.35it/s] 69%|██████▊ | 254601/371472 [9:44:26<9:24:49, 3.45it/s] 69%|██████▊ | 254602/371472 [9:44:26<9:24:03, 3.45it/s] 69%|██████▊ | 254603/371472 [9:44:26<9:19:49, 3.48it/s] 69%|██████▊ | 254604/371472 [9:44:26<9:12:51, 3.52it/s] 69%|██████▊ | 254605/371472 [9:44:27<9:14:48, 3.51it/s] 69%|██████▊ | 254606/371472 [9:44:27<9:29:29, 3.42it/s] 69%|██████▊ | 254607/371472 [9:44:27<9:18:57, 3.48it/s] 69%|██████▊ | 254608/371472 [9:44:28<9:41:11, 3.35it/s] 69%|██████▊ | 254609/371472 [9:44:28<9:26:58, 3.44it/s] 69%|██████▊ | 254610/371472 [9:44:28<9:20:08, 3.48it/s] 69%|██████▊ | 254611/371472 [9:44:28<8:57:37, 3.62it/s] 69%|██████▊ | 254612/371472 [9:44:29<9:14:51, 3.51it/s] 69%|██████▊ | 254613/371472 [9:44:29<9:48:32, 3.31it/s] 69%|██████▊ | 254614/371472 [9:44:29<10:08:30, 3.20it/s] 69%|██████▊ | 254615/371472 [9:44:30<9:48:43, 3.31it/s] 69%|██████▊ | 254616/371472 [9:44:30<9:56:45, 3.26it/s] 69%|██████▊ | 254617/371472 [9:44:30<9:53:23, 3.28it/s] 69%|██████▊ | 254618/371472 [9:44:31<9:31:30, 3.41it/s] 69%|██████▊ | 254619/371472 [9:44:31<9:15:06, 3.51it/s] 69%|██████▊ | 254620/371472 [9:44:31<9:18:36, 3.49it/s] {'loss': 2.9224, 'learning_rate': 3.832607899329871e-07, 'epoch': 10.97} + 69%|██████▊ | 254620/371472 [9:44:31<9:18:36, 3.49it/s] 69%|██████▊ | 254621/371472 [9:44:31<9:09:38, 3.54it/s] 69%|██████▊ | 254622/371472 [9:44:32<9:21:47, 3.47it/s] 69%|██████▊ | 254623/371472 [9:44:32<10:25:16, 3.11it/s] 69%|██████▊ | 254624/371472 [9:44:32<9:44:08, 3.33it/s] 69%|██████▊ | 254625/371472 [9:44:33<9:45:35, 3.33it/s] 69%|██████▊ | 254626/371472 [9:44:33<9:32:47, 3.40it/s] 69%|██████▊ | 254627/371472 [9:44:33<9:18:59, 3.48it/s] 69%|██████▊ | 254628/371472 [9:44:33<9:26:16, 3.44it/s] 69%|██████▊ | 254629/371472 [9:44:34<9:26:17, 3.44it/s] 69%|██████▊ | 254630/371472 [9:44:34<9:21:43, 3.47it/s] 69%|██████▊ | 254631/371472 [9:44:34<9:12:57, 3.52it/s] 69%|██████▊ | 254632/371472 [9:44:35<9:40:15, 3.36it/s] 69%|██████▊ | 254633/371472 [9:44:35<9:36:11, 3.38it/s] 69%|██████▊ | 254634/371472 [9:44:35<9:51:22, 3.29it/s] 69%|██████▊ | 254635/371472 [9:44:36<9:49:12, 3.30it/s] 69%|██████▊ | 254636/371472 [9:44:36<9:52:34, 3.29it/s] 69%|██████▊ | 254637/371472 [9:44:36<9:46:07, 3.32it/s] 69%|██████▊ | 254638/371472 [9:44:36<9:42:51, 3.34it/s] 69%|██████▊ | 254639/371472 [9:44:37<9:15:36, 3.50it/s] 69%|██████▊ | 254640/371472 [9:44:37<8:59:54, 3.61it/s] {'loss': 2.6148, 'learning_rate': 3.8321230795750823e-07, 'epoch': 10.97} + 69%|██████▊ | 254640/371472 [9:44:37<8:59:54, 3.61it/s] 69%|██████▊ | 254641/371472 [9:44:37<9:12:58, 3.52it/s] 69%|██████▊ | 254642/371472 [9:44:38<9:24:33, 3.45it/s] 69%|██████▊ | 254643/371472 [9:44:38<9:30:33, 3.41it/s] 69%|██████▊ | 254644/371472 [9:44:38<9:40:49, 3.35it/s] 69%|██████▊ | 254645/371472 [9:44:39<9:51:39, 3.29it/s] 69%|██████▊ | 254646/371472 [9:44:39<9:32:36, 3.40it/s] 69%|██████▊ | 254647/371472 [9:44:39<9:45:55, 3.32it/s] 69%|██████▊ | 254648/371472 [9:44:39<9:42:07, 3.34it/s] 69%|██████▊ | 254649/371472 [9:44:40<9:58:57, 3.25it/s] 69%|██████▊ | 254650/371472 [9:44:40<9:55:25, 3.27it/s] 69%|██████▊ | 254651/371472 [9:44:40<9:42:44, 3.34it/s] 69%|██████▊ | 254652/371472 [9:44:41<9:30:24, 3.41it/s] 69%|██████▊ | 254653/371472 [9:44:41<9:53:43, 3.28it/s] 69%|██████▊ | 254654/371472 [9:44:41<9:40:07, 3.36it/s] 69%|██████▊ | 254655/371472 [9:44:41<9:20:40, 3.47it/s] 69%|██████▊ | 254656/371472 [9:44:42<9:17:38, 3.49it/s] 69%|██████▊ | 254657/371472 [9:44:42<9:01:08, 3.60it/s] 69%|██████▊ | 254658/371472 [9:44:42<9:11:48, 3.53it/s] 69%|██████▊ | 254659/371472 [9:44:43<9:33:31, 3.39it/s] 69%|██████▊ | 254660/371472 [9:44:43<9:31:08, 3.41it/s] {'loss': 2.7, 'learning_rate': 3.831638259820293e-07, 'epoch': 10.97} + 69%|██████▊ | 254660/371472 [9:44:43<9:31:08, 3.41it/s] 69%|██████▊ | 254661/371472 [9:44:43<9:38:25, 3.37it/s] 69%|██████▊ | 254662/371472 [9:44:44<10:29:09, 3.09it/s] 69%|██████▊ | 254663/371472 [9:44:44<10:03:48, 3.22it/s] 69%|██████▊ | 254664/371472 [9:44:44<9:48:23, 3.31it/s] 69%|██████▊ | 254665/371472 [9:44:44<10:00:49, 3.24it/s] 69%|██████▊ | 254666/371472 [9:44:45<9:48:08, 3.31it/s] 69%|██████▊ | 254667/371472 [9:44:45<9:30:59, 3.41it/s] 69%|██████▊ | 254668/371472 [9:44:45<9:25:56, 3.44it/s] 69%|██████▊ | 254669/371472 [9:44:46<9:20:26, 3.47it/s] 69%|██████▊ | 254670/371472 [9:44:46<9:18:41, 3.48it/s] 69%|██████▊ | 254671/371472 [9:44:46<9:23:58, 3.45it/s] 69%|██████▊ | 254672/371472 [9:44:46<9:21:40, 3.47it/s] 69%|██████▊ | 254673/371472 [9:44:47<9:59:17, 3.25it/s] 69%|██████▊ | 254674/371472 [9:44:47<9:56:58, 3.26it/s] 69%|██████▊ | 254675/371472 [9:44:47<10:01:18, 3.24it/s] 69%|██████▊ | 254676/371472 [9:44:48<9:44:08, 3.33it/s] 69%|██████▊ | 254677/371472 [9:44:48<9:30:45, 3.41it/s] 69%|██████▊ | 254678/371472 [9:44:48<9:26:59, 3.43it/s] 69%|██████▊ | 254679/371472 [9:44:49<9:09:18, 3.54it/s] 69%|██████▊ | 254680/371472 [9:44:49<9:20:52, 3.47it/s] {'loss': 2.7589, 'learning_rate': 3.831153440065505e-07, 'epoch': 10.97} + 69%|██████▊ | 254680/371472 [9:44:49<9:20:52, 3.47it/s] 69%|██████▊ | 254681/371472 [9:44:49<9:09:33, 3.54it/s] 69%|██████▊ | 254682/371472 [9:44:49<9:43:07, 3.34it/s] 69%|██████▊ | 254683/371472 [9:44:50<9:21:40, 3.47it/s] 69%|██████▊ | 254684/371472 [9:44:50<9:06:27, 3.56it/s] 69%|██████▊ | 254685/371472 [9:44:50<9:16:13, 3.50it/s] 69%|██████▊ | 254686/371472 [9:44:51<9:40:59, 3.35it/s] 69%|██████▊ | 254687/371472 [9:44:51<10:25:09, 3.11it/s] 69%|██████▊ | 254688/371472 [9:44:51<9:55:29, 3.27it/s] 69%|██████▊ | 254689/371472 [9:44:52<10:34:34, 3.07it/s] 69%|██████▊ | 254690/371472 [9:44:52<9:57:23, 3.26it/s] 69%|██████▊ | 254691/371472 [9:44:52<9:34:18, 3.39it/s] 69%|██████▊ | 254692/371472 [9:44:52<9:39:51, 3.36it/s] 69%|██████▊ | 254693/371472 [9:44:53<9:49:40, 3.30it/s] 69%|██████▊ | 254694/371472 [9:44:53<9:20:34, 3.47it/s] 69%|██████▊ | 254695/371472 [9:44:53<9:35:42, 3.38it/s] 69%|██████▊ | 254696/371472 [9:44:54<10:27:30, 3.10it/s] 69%|██████▊ | 254697/371472 [9:44:54<10:43:30, 3.02it/s] 69%|██████▊ | 254698/371472 [9:44:54<11:00:05, 2.95it/s] 69%|██████▊ | 254699/371472 [9:44:55<10:38:52, 3.05it/s] 69%|██████▊ | 254700/371472 [9:44:55<10:43:10, 3.03it/s] {'loss': 2.7418, 'learning_rate': 3.830668620310715e-07, 'epoch': 10.97} + 69%|██████▊ | 254700/371472 [9:44:55<10:43:10, 3.03it/s] 69%|██████▊ | 254701/371472 [9:44:55<10:16:33, 3.16it/s] 69%|██████▊ | 254702/371472 [9:44:56<9:44:49, 3.33it/s] 69%|██████▊ | 254703/371472 [9:44:56<9:56:19, 3.26it/s] 69%|██████▊ | 254704/371472 [9:44:56<10:18:06, 3.15it/s] 69%|██████▊ | 254705/371472 [9:44:57<10:19:20, 3.14it/s] 69%|██████▊ | 254706/371472 [9:44:57<10:12:15, 3.18it/s] 69%|██████▊ | 254707/371472 [9:44:57<10:31:37, 3.08it/s] 69%|██████▊ | 254708/371472 [9:44:58<10:21:19, 3.13it/s] 69%|██████▊ | 254709/371472 [9:44:58<10:07:47, 3.20it/s] 69%|██████▊ | 254710/371472 [9:44:58<10:10:03, 3.19it/s] 69%|██████▊ | 254711/371472 [9:44:58<9:43:46, 3.33it/s] 69%|██████▊ | 254712/371472 [9:44:59<9:25:21, 3.44it/s] 69%|██████▊ | 254713/371472 [9:44:59<10:13:59, 3.17it/s] 69%|██████▊ | 254714/371472 [9:44:59<9:51:17, 3.29it/s] 69%|██████▊ | 254715/371472 [9:45:00<10:00:00, 3.24it/s] 69%|██████▊ | 254716/371472 [9:45:00<9:52:32, 3.28it/s] 69%|██████▊ | 254717/371472 [9:45:00<10:10:57, 3.18it/s] 69%|██████▊ | 254718/371472 [9:45:01<10:40:50, 3.04it/s] 69%|██████▊ | 254719/371472 [9:45:01<10:23:55, 3.12it/s] 69%|██████▊ | 254720/371472 [9:45:01<10:30:35, 3.09it/s] {'loss': 2.5423, 'learning_rate': 3.830183800555927e-07, 'epoch': 10.97} + 69%|██████▊ | 254720/371472 [9:45:01<10:30:35, 3.09it/s] 69%|██████▊ | 254721/371472 [9:45:02<10:00:19, 3.24it/s] 69%|██████▊ | 254722/371472 [9:45:02<9:49:31, 3.30it/s] 69%|██████▊ | 254723/371472 [9:45:02<9:38:09, 3.37it/s] 69%|██████▊ | 254724/371472 [9:45:02<9:34:02, 3.39it/s] 69%|██████▊ | 254725/371472 [9:45:03<9:49:01, 3.30it/s] 69%|██████▊ | 254726/371472 [9:45:03<9:31:08, 3.41it/s] 69%|██████▊ | 254727/371472 [9:45:03<9:11:01, 3.53it/s] 69%|██████▊ | 254728/371472 [9:45:04<9:36:42, 3.37it/s] 69%|██████▊ | 254729/371472 [9:45:04<9:42:38, 3.34it/s] 69%|██████▊ | 254730/371472 [9:45:04<9:44:27, 3.33it/s] 69%|██████▊ | 254731/371472 [9:45:05<9:25:40, 3.44it/s] 69%|██████▊ | 254732/371472 [9:45:05<9:52:47, 3.28it/s] 69%|██████▊ | 254733/371472 [9:45:05<10:01:56, 3.23it/s] 69%|██████▊ | 254734/371472 [9:45:05<9:48:45, 3.30it/s] 69%|██████▊ | 254735/371472 [9:45:06<9:46:23, 3.32it/s] 69%|██████▊ | 254736/371472 [9:45:06<9:57:50, 3.25it/s] 69%|██████▊ | 254737/371472 [9:45:06<10:15:14, 3.16it/s] 69%|██████▊ | 254738/371472 [9:45:07<9:44:00, 3.33it/s] 69%|██████▊ | 254739/371472 [9:45:07<9:40:05, 3.35it/s] 69%|██████▊ | 254740/371472 [9:45:07<9:40:38, 3.35it/s] {'loss': 2.7039, 'learning_rate': 3.8296989808011375e-07, 'epoch': 10.97} + 69%|██████▊ | 254740/371472 [9:45:07<9:40:38, 3.35it/s] 69%|██████▊ | 254741/371472 [9:45:08<9:46:21, 3.32it/s] 69%|██████▊ | 254742/371472 [9:45:08<10:02:16, 3.23it/s] 69%|██████▊ | 254743/371472 [9:45:08<9:46:27, 3.32it/s] 69%|██████▊ | 254744/371472 [9:45:08<9:25:08, 3.44it/s] 69%|██████▊ | 254745/371472 [9:45:09<9:17:02, 3.49it/s] 69%|██████▊ | 254746/371472 [9:45:09<9:05:17, 3.57it/s] 69%|██████▊ | 254747/371472 [9:45:09<9:23:37, 3.45it/s] 69%|██████▊ | 254748/371472 [9:45:10<9:13:52, 3.51it/s] 69%|██████▊ | 254749/371472 [9:45:10<9:23:56, 3.45it/s] 69%|██████▊ | 254750/371472 [9:45:10<9:17:05, 3.49it/s] 69%|██████▊ | 254751/371472 [9:45:10<9:16:47, 3.49it/s] 69%|██████▊ | 254752/371472 [9:45:11<9:13:24, 3.52it/s] 69%|██████▊ | 254753/371472 [9:45:11<9:11:32, 3.53it/s] 69%|██████▊ | 254754/371472 [9:45:11<9:12:33, 3.52it/s] 69%|██████▊ | 254755/371472 [9:45:12<9:28:53, 3.42it/s] 69%|██████▊ | 254756/371472 [9:45:12<9:17:31, 3.49it/s] 69%|██████▊ | 254757/371472 [9:45:12<9:03:09, 3.58it/s] 69%|██████▊ | 254758/371472 [9:45:12<9:12:48, 3.52it/s] 69%|██████▊ | 254759/371472 [9:45:13<9:31:27, 3.40it/s] 69%|██████▊ | 254760/371472 [9:45:13<9:22:18, 3.46it/s] {'loss': 2.8299, 'learning_rate': 3.8292141610463487e-07, 'epoch': 10.97} + 69%|██████▊ | 254760/371472 [9:45:13<9:22:18, 3.46it/s] 69%|██████▊ | 254761/371472 [9:45:13<9:21:49, 3.46it/s] 69%|██████▊ | 254762/371472 [9:45:14<9:31:54, 3.40it/s] 69%|██████▊ | 254763/371472 [9:45:14<9:10:03, 3.54it/s] 69%|██████▊ | 254764/371472 [9:45:14<9:14:18, 3.51it/s] 69%|██████▊ | 254765/371472 [9:45:14<9:25:00, 3.44it/s] 69%|██████▊ | 254766/371472 [9:45:15<9:12:30, 3.52it/s] 69%|██████▊ | 254767/371472 [9:45:15<9:16:59, 3.49it/s] 69%|██████▊ | 254768/371472 [9:45:15<9:20:20, 3.47it/s] 69%|██████▊ | 254769/371472 [9:45:16<9:17:03, 3.49it/s] 69%|██████▊ | 254770/371472 [9:45:16<9:59:50, 3.24it/s] 69%|█████���▊ | 254771/371472 [9:45:16<9:59:56, 3.24it/s] 69%|██████▊ | 254772/371472 [9:45:17<9:35:14, 3.38it/s] 69%|██████▊ | 254773/371472 [9:45:17<9:28:29, 3.42it/s] 69%|██████▊ | 254774/371472 [9:45:17<9:33:55, 3.39it/s] 69%|██████▊ | 254775/371472 [9:45:17<9:29:35, 3.41it/s] 69%|██████▊ | 254776/371472 [9:45:18<9:23:50, 3.45it/s] 69%|██████▊ | 254777/371472 [9:45:18<9:11:32, 3.53it/s] 69%|██████▊ | 254778/371472 [9:45:18<9:08:43, 3.54it/s] 69%|██████▊ | 254779/371472 [9:45:19<9:01:34, 3.59it/s] 69%|██████▊ | 254780/371472 [9:45:19<9:02:24, 3.59it/s] {'loss': 2.6977, 'learning_rate': 3.8287293412915594e-07, 'epoch': 10.97} + 69%|██████▊ | 254780/371472 [9:45:19<9:02:24, 3.59it/s] 69%|██████▊ | 254781/371472 [9:45:19<8:55:41, 3.63it/s] 69%|██████▊ | 254782/371472 [9:45:19<8:59:22, 3.61it/s] 69%|██████▊ | 254783/371472 [9:45:20<8:59:05, 3.61it/s] 69%|██████▊ | 254784/371472 [9:45:20<9:05:23, 3.57it/s] 69%|██████▊ | 254785/371472 [9:45:20<9:20:35, 3.47it/s] 69%|██████▊ | 254786/371472 [9:45:21<9:15:50, 3.50it/s] 69%|██████▊ | 254787/371472 [9:45:21<9:26:02, 3.44it/s] 69%|██████▊ | 254788/371472 [9:45:21<9:23:29, 3.45it/s] 69%|██████▊ | 254789/371472 [9:45:21<9:29:51, 3.41it/s] 69%|██████▊ | 254790/371472 [9:45:22<9:24:25, 3.45it/s] 69%|██████▊ | 254791/371472 [9:45:22<9:25:01, 3.44it/s] 69%|██████▊ | 254792/371472 [9:45:22<9:44:27, 3.33it/s] 69%|██████▊ | 254793/371472 [9:45:23<9:58:14, 3.25it/s] 69%|██████▊ | 254794/371472 [9:45:23<9:39:42, 3.35it/s] 69%|██████▊ | 254795/371472 [9:45:23<9:50:33, 3.29it/s] 69%|██████▊ | 254796/371472 [9:45:23<9:36:05, 3.38it/s] 69%|██████▊ | 254797/371472 [9:45:24<9:19:48, 3.47it/s] 69%|██████▊ | 254798/371472 [9:45:24<9:13:07, 3.52it/s] 69%|██████▊ | 254799/371472 [9:45:24<9:25:27, 3.44it/s] 69%|██████▊ | 254800/371472 [9:45:25<9:21:46, 3.46it/s] {'loss': 2.6704, 'learning_rate': 3.828244521536771e-07, 'epoch': 10.97} + 69%|██████▊ | 254800/371472 [9:45:25<9:21:46, 3.46it/s] 69%|██████▊ | 254801/371472 [9:45:25<9:29:59, 3.41it/s] 69%|██████▊ | 254802/371472 [9:45:25<9:55:22, 3.27it/s] 69%|██████▊ | 254803/371472 [9:45:26<9:50:41, 3.29it/s] 69%|██████▊ | 254804/371472 [9:45:26<9:56:08, 3.26it/s] 69%|██████▊ | 254805/371472 [9:45:26<10:27:27, 3.10it/s] 69%|██████▊ | 254806/371472 [9:45:27<10:12:23, 3.18it/s] 69%|██████▊ | 254807/371472 [9:45:27<10:03:29, 3.22it/s] 69%|██████▊ | 254808/371472 [9:45:27<9:46:57, 3.31it/s] 69%|██████▊ | 254809/371472 [9:45:27<9:46:15, 3.32it/s] 69%|██████▊ | 254810/371472 [9:45:28<9:35:31, 3.38it/s] 69%|██████▊ | 254811/371472 [9:45:28<9:29:20, 3.42it/s] 69%|██████▊ | 254812/371472 [9:45:28<9:23:48, 3.45it/s] 69%|██████▊ | 254813/371472 [9:45:29<9:18:13, 3.48it/s] 69%|██████▊ | 254814/371472 [9:45:29<9:10:30, 3.53it/s] 69%|██████▊ | 254815/371472 [9:45:29<9:34:07, 3.39it/s] 69%|██████▊ | 254816/371472 [9:45:29<9:19:31, 3.47it/s] 69%|██████▊ | 254817/371472 [9:45:30<9:34:06, 3.39it/s] 69%|██████▊ | 254818/371472 [9:45:30<9:38:48, 3.36it/s] 69%|██████▊ | 254819/371472 [9:45:30<10:07:28, 3.20it/s] 69%|██████▊ | 254820/371472 [9:45:31<10:10:24, 3.19it/s] {'loss': 2.6605, 'learning_rate': 3.8277597017819814e-07, 'epoch': 10.98} + 69%|██████▊ | 254820/371472 [9:45:31<10:10:24, 3.19it/s] 69%|██████▊ | 254821/371472 [9:45:31<9:44:08, 3.33it/s] 69%|██████▊ | 254822/371472 [9:45:31<9:24:07, 3.45it/s] 69%|██████▊ | 254823/371472 [9:45:32<9:38:33, 3.36it/s] 69%|██████▊ | 254824/371472 [9:45:32<9:43:28, 3.33it/s] 69%|██████▊ | 254825/371472 [9:45:32<10:31:51, 3.08it/s] 69%|██████▊ | 254826/371472 [9:45:33<10:23:06, 3.12it/s] 69%|██████▊ | 254827/371472 [9:45:33<10:00:37, 3.24it/s] 69%|██████▊ | 254828/371472 [9:45:33<9:32:33, 3.40it/s] 69%|██████▊ | 254829/371472 [9:45:33<9:34:08, 3.39it/s] 69%|██████▊ | 254830/371472 [9:45:34<9:22:08, 3.46it/s] 69%|██████▊ | 254831/371472 [9:45:34<9:14:31, 3.51it/s] 69%|██████▊ | 254832/371472 [9:45:34<9:19:25, 3.48it/s] 69%|██████▊ | 254833/371472 [9:45:35<9:35:37, 3.38it/s] 69%|██████▊ | 254834/371472 [9:45:35<9:26:42, 3.43it/s] 69%|██████▊ | 254835/371472 [9:45:35<9:27:07, 3.43it/s] 69%|██████▊ | 254836/371472 [9:45:35<9:19:20, 3.48it/s] 69%|██████▊ | 254837/371472 [9:45:36<9:22:46, 3.45it/s] 69%|██████▊ | 254838/371472 [9:45:36<9:44:59, 3.32it/s] 69%|██████▊ | 254839/371472 [9:45:36<10:07:58, 3.20it/s] 69%|██████▊ | 254840/371472 [9:45:37<10:05:03, 3.21it/s] {'loss': 2.5926, 'learning_rate': 3.827274882027193e-07, 'epoch': 10.98} + 69%|██████▊ | 254840/371472 [9:45:37<10:05:03, 3.21it/s] 69%|██████▊ | 254841/371472 [9:45:37<9:47:21, 3.31it/s] 69%|██████▊ | 254842/371472 [9:45:37<9:57:11, 3.25it/s] 69%|██████▊ | 254843/371472 [9:45:38<9:51:40, 3.29it/s] 69%|██████▊ | 254844/371472 [9:45:38<10:07:57, 3.20it/s] 69%|██████▊ | 254845/371472 [9:45:38<10:10:47, 3.18it/s] 69%|██████▊ | 254846/371472 [9:45:39<10:37:01, 3.05it/s] 69%|██████▊ | 254847/371472 [9:45:39<10:47:20, 3.00it/s] 69%|██████▊ | 254848/371472 [9:45:39<10:13:24, 3.17it/s] 69%|██████▊ | 254849/371472 [9:45:39<9:44:10, 3.33it/s] 69%|██████▊ | 254850/371472 [9:45:40<9:37:21, 3.37it/s] 69%|██████▊ | 254851/371472 [9:45:40<9:25:41, 3.44it/s] 69%|██████▊ | 254852/371472 [9:45:40<9:16:55, 3.49it/s] 69%|██████▊ | 254853/371472 [9:45:41<9:54:34, 3.27it/s] 69%|██████▊ | 254854/371472 [9:45:41<9:32:35, 3.39it/s] 69%|██████▊ | 254855/371472 [9:45:41<9:40:25, 3.35it/s] 69%|██████▊ | 254856/371472 [9:45:41<9:19:24, 3.47it/s] 69%|██████▊ | 254857/371472 [9:45:42<9:40:28, 3.35it/s] 69%|██████▊ | 254858/371472 [9:45:42<9:36:49, 3.37it/s] 69%|██████▊ | 254859/371472 [9:45:42<9:46:58, 3.31it/s] 69%|██████▊ | 254860/371472 [9:45:43<9:31:34, 3.40it/s] {'loss': 2.6429, 'learning_rate': 3.826790062272404e-07, 'epoch': 10.98} + 69%|██████▊ | 254860/371472 [9:45:43<9:31:34, 3.40it/s] 69%|██████▊ | 254861/371472 [9:45:43<9:45:47, 3.32it/s] 69%|██████▊ | 254862/371472 [9:45:43<9:32:10, 3.40it/s] 69%|██████▊ | 254863/371472 [9:45:44<9:15:53, 3.50it/s] 69%|██████▊ | 254864/371472 [9:45:44<9:29:04, 3.42it/s] 69%|██████▊ | 254865/371472 [9:45:44<9:41:33, 3.34it/s] 69%|██████▊ | 254866/371472 [9:45:44<9:21:37, 3.46it/s] 69%|██████▊ | 254867/371472 [9:45:45<8:57:20, 3.62it/s] 69%|██████▊ | 254868/371472 [9:45:45<9:14:04, 3.51it/s] 69%|██████▊ | 254869/371472 [9:45:45<9:37:55, 3.36it/s] 69%|██████▊ | 254870/371472 [9:45:46<9:18:21, 3.48it/s] 69%|██████▊ | 254871/371472 [9:45:46<9:17:04, 3.49it/s] 69%|██████▊ | 254872/371472 [9:45:46<10:22:52, 3.12it/s] 69%|██████▊ | 254873/371472 [9:45:47<10:17:01, 3.15it/s] 69%|██████▊ | 254874/371472 [9:45:47<10:26:44, 3.10it/s] 69%|██████▊ | 254875/371472 [9:45:47<9:59:42, 3.24it/s] 69%|██████▊ | 254876/371472 [9:45:48<10:01:41, 3.23it/s] 69%|██████▊ | 254877/371472 [9:45:48<10:26:18, 3.10it/s] 69%|██████▊ | 254878/371472 [9:45:48<10:04:33, 3.21it/s] 69%|██████▊ | 254879/371472 [9:45:48<10:03:20, 3.22it/s] 69%|██████▊ | 254880/371472 [9:45:49<9:38:24, 3.36it/s] {'loss': 2.55, 'learning_rate': 3.826305242517615e-07, 'epoch': 10.98} + 69%|██████▊ | 254880/371472 [9:45:49<9:38:24, 3.36it/s] 69%|██████▊ | 254881/371472 [9:45:49<10:40:53, 3.03it/s] 69%|██████▊ | 254882/371472 [9:45:49<10:10:53, 3.18it/s] 69%|██████▊ | 254883/371472 [9:45:50<9:33:26, 3.39it/s] 69%|██████▊ | 254884/371472 [9:45:50<9:15:18, 3.50it/s] 69%|██████▊ | 254885/371472 [9:45:50<9:12:06, 3.52it/s] 69%|██████▊ | 254886/371472 [9:45:51<9:28:10, 3.42it/s] 69%|██████▊ | 254887/371472 [9:45:51<9:15:58, 3.49it/s] 69%|██████▊ | 254888/371472 [9:45:51<9:20:09, 3.47it/s] 69%|██████▊ | 254889/371472 [9:45:51<9:23:02, 3.45it/s] 69%|██████▊ | 254890/371472 [9:45:52<9:29:15, 3.41it/s] 69%|██████▊ | 254891/371472 [9:45:52<9:16:16, 3.49it/s] 69%|██████▊ | 254892/371472 [9:45:52<9:55:28, 3.26it/s] 69%|██████▊ | 254893/371472 [9:45:53<9:30:06, 3.41it/s] 69%|██████▊ | 254894/371472 [9:45:53<9:18:47, 3.48it/s] 69%|██████▊ | 254895/371472 [9:45:53<9:13:33, 3.51it/s] 69%|██████▊ | 254896/371472 [9:45:53<9:37:50, 3.36it/s] 69%|██████▊ | 254897/371472 [9:45:54<9:23:34, 3.45it/s] 69%|██████▊ | 254898/371472 [9:45:54<9:26:44, 3.43it/s] 69%|██████▊ | 254899/371472 [9:45:54<9:25:50, 3.43it/s] 69%|██████▊ | 254900/371472 [9:45:55<9:33:18, 3.39it/s] {'loss': 2.7065, 'learning_rate': 3.825820422762826e-07, 'epoch': 10.98} + 69%|██████▊ | 254900/371472 [9:45:55<9:33:18, 3.39it/s] 69%|██████▊ | 254901/371472 [9:45:55<9:19:14, 3.47it/s] 69%|██████▊ | 254902/371472 [9:45:55<9:22:26, 3.45it/s] 69%|██████▊ | 254903/371472 [9:45:55<9:09:45, 3.53it/s] 69%|██████▊ | 254904/371472 [9:45:56<9:10:36, 3.53it/s] 69%|██████▊ | 254905/371472 [9:45:56<9:25:51, 3.43it/s] 69%|██████▊ | 254906/371472 [9:45:56<9:37:07, 3.37it/s] 69%|██████▊ | 254907/371472 [9:45:57<9:51:04, 3.29it/s] 69%|██████▊ | 254908/371472 [9:45:57<9:42:35, 3.33it/s] 69%|██████▊ | 254909/371472 [9:45:57<9:22:14, 3.46it/s] 69%|██████▊ | 254910/371472 [9:45:58<9:35:21, 3.38it/s] 69%|██████▊ | 254911/371472 [9:45:58<9:09:39, 3.53it/s] 69%|██████▊ | 254912/371472 [9:45:58<9:03:58, 3.57it/s] 69%|██████▊ | 254913/371472 [9:45:59<12:36:15, 2.57it/s] 69%|██████▊ | 254914/371472 [9:45:59<11:27:33, 2.83it/s] 69%|██████▊ | 254915/371472 [9:45:59<10:46:03, 3.01it/s] 69%|██████▊ | 254916/371472 [9:46:00<10:08:14, 3.19it/s] 69%|██████▊ | 254917/371472 [9:46:00<9:47:43, 3.31it/s] 69%|██████▊ | 254918/371472 [9:46:00<9:24:00, 3.44it/s] 69%|██████▊ | 254919/371472 [9:46:00<9:16:01, 3.49it/s] 69%|██████▊ | 254920/371472 [9:46:01<10:10:27, 3.18it/s] {'loss': 2.6166, 'learning_rate': 3.8253356030080376e-07, 'epoch': 10.98} + 69%|██████▊ | 254920/371472 [9:46:01<10:10:27, 3.18it/s] 69%|██████▊ | 254921/371472 [9:46:01<10:07:13, 3.20it/s] 69%|██████▊ | 254922/371472 [9:46:01<9:58:41, 3.24it/s] 69%|██████▊ | 254923/371472 [9:46:02<9:49:08, 3.30it/s] 69%|██████▊ | 254924/371472 [9:46:02<9:44:11, 3.33it/s] 69%|██████▊ | 254925/371472 [9:46:02<9:32:03, 3.40it/s] 69%|██████▊ | 254926/371472 [9:46:02<9:32:10, 3.39it/s] 69%|██████▊ | 254927/371472 [9:46:03<9:30:55, 3.40it/s] 69%|██████▊ | 254928/371472 [9:46:03<9:51:10, 3.29it/s] 69%|██████▊ | 254929/371472 [9:46:03<9:33:44, 3.39it/s] 69%|██████▊ | 254930/371472 [9:46:04<9:20:45, 3.46it/s] 69%|██████▊ | 254931/371472 [9:46:04<10:14:09, 3.16it/s] 69%|██████▊ | 254932/371472 [9:46:04<9:58:47, 3.24it/s] 69%|██████▊ | 254933/371472 [9:46:05<9:35:00, 3.38it/s] 69%|██████▊ | 254934/371472 [9:46:05<9:43:10, 3.33it/s] 69%|██████▊ | 254935/371472 [9:46:05<9:51:34, 3.28it/s] 69%|██████▊ | 254936/371472 [9:46:05<9:34:33, 3.38it/s] 69%|██████▊ | 254937/371472 [9:46:06<9:57:04, 3.25it/s] 69%|██████▊ | 254938/371472 [9:46:06<10:12:10, 3.17it/s] 69%|██████▊ | 254939/371472 [9:46:06<9:47:03, 3.31it/s] 69%|██████▊ | 254940/371472 [9:46:07<9:33:20, 3.39it/s] {'loss': 2.7052, 'learning_rate': 3.824850783253248e-07, 'epoch': 10.98} + 69%|██████▊ | 254940/371472 [9:46:07<9:33:20, 3.39it/s] 69%|██████▊ | 254941/371472 [9:46:07<9:33:53, 3.38it/s] 69%|██████▊ | 254942/371472 [9:46:07<9:28:53, 3.41it/s] 69%|██████▊ | 254943/371472 [9:46:08<11:07:18, 2.91it/s] 69%|██████▊ | 254944/371472 [9:46:08<10:28:28, 3.09it/s] 69%|██████▊ | 254945/371472 [9:46:08<10:03:09, 3.22it/s] 69%|██████▊ | 254946/371472 [9:46:09<9:43:12, 3.33it/s] 69%|██████▊ | 254947/371472 [9:46:09<9:28:13, 3.42it/s] 69%|██████▊ | 254948/371472 [9:46:09<9:34:48, 3.38it/s] 69%|██████▊ | 254949/371472 [9:46:09<9:40:50, 3.34it/s] 69%|██████▊ | 254950/371472 [9:46:10<9:25:29, 3.43it/s] 69%|██████▊ | 254951/371472 [9:46:10<9:19:13, 3.47it/s] 69%|██████▊ | 254952/371472 [9:46:10<9:24:37, 3.44it/s] 69%|██████▊ | 254953/371472 [9:46:11<9:18:05, 3.48it/s] 69%|██████▊ | 254954/371472 [9:46:11<9:26:36, 3.43it/s] 69%|██████▊ | 254955/371472 [9:46:11<9:35:40, 3.37it/s] 69%|██████▊ | 254956/371472 [9:46:12<9:34:13, 3.38it/s] 69%|██████▊ | 254957/371472 [9:46:12<9:31:10, 3.40it/s] 69%|██████▊ | 254958/371472 [9:46:12<9:33:20, 3.39it/s] 69%|██████▊ | 254959/371472 [9:46:12<9:31:54, 3.40it/s] 69%|██████▊ | 254960/371472 [9:46:13<9:27:51, 3.42it/s] {'loss': 2.9047, 'learning_rate': 3.824365963498459e-07, 'epoch': 10.98} + 69%|██████▊ | 254960/371472 [9:46:13<9:27:51, 3.42it/s] 69%|██████▊ | 254961/371472 [9:46:13<9:34:38, 3.38it/s] 69%|██████▊ | 254962/371472 [9:46:13<10:05:00, 3.21it/s] 69%|██████▊ | 254963/371472 [9:46:14<9:56:42, 3.25it/s] 69%|██████▊ | 254964/371472 [9:46:14<9:34:51, 3.38it/s] 69%|██████▊ | 254965/371472 [9:46:14<9:25:43, 3.43it/s] 69%|██████▊ | 254966/371472 [9:46:14<9:19:44, 3.47it/s] 69%|██████▊ | 254967/371472 [9:46:15<9:32:59, 3.39it/s] 69%|██████▊ | 254968/371472 [9:46:15<9:35:17, 3.38it/s] 69%|██████▊ | 254969/371472 [9:46:15<9:23:24, 3.45it/s] 69%|██████▊ | 254970/371472 [9:46:16<9:32:11, 3.39it/s] 69%|██████▊ | 254971/371472 [9:46:16<9:32:21, 3.39it/s] 69%|██████▊ | 254972/371472 [9:46:16<9:25:58, 3.43it/s] 69%|██████▊ | 254973/371472 [9:46:17<9:53:21, 3.27it/s] 69%|██████▊ | 254974/371472 [9:46:17<9:42:17, 3.33it/s] 69%|██████▊ | 254975/371472 [9:46:17<9:26:59, 3.42it/s] 69%|██████▊ | 254976/371472 [9:46:17<9:37:28, 3.36it/s] 69%|██████▊ | 254977/371472 [9:46:18<9:38:59, 3.35it/s] 69%|██████▊ | 254978/371472 [9:46:18<9:40:30, 3.34it/s] 69%|██████▊ | 254979/371472 [9:46:18<10:19:09, 3.14it/s] 69%|██████▊ | 254980/371472 [9:46:19<9:54:18, 3.27it/s] {'loss': 2.6298, 'learning_rate': 3.8238811437436703e-07, 'epoch': 10.98} + 69%|██████▊ | 254980/371472 [9:46:19<9:54:18, 3.27it/s] 69%|██████▊ | 254981/371472 [9:46:19<9:51:56, 3.28it/s] 69%|██████▊ | 254982/371472 [9:46:19<9:34:19, 3.38it/s] 69%|██████▊ | 254983/371472 [9:46:20<9:20:14, 3.47it/s] 69%|██████▊ | 254984/371472 [9:46:20<9:06:16, 3.55it/s] 69%|██████▊ | 254985/371472 [9:46:20<8:52:45, 3.64it/s] 69%|██████▊ | 254986/371472 [9:46:20<8:52:43, 3.64it/s] 69%|██████▊ | 254987/371472 [9:46:21<9:02:34, 3.58it/s] 69%|██████▊ | 254988/371472 [9:46:21<9:15:05, 3.50it/s] 69%|██████▊ | 254989/371472 [9:46:21<9:24:40, 3.44it/s] 69%|██████▊ | 254990/371472 [9:46:22<9:42:34, 3.33it/s] 69%|██████▊ | 254991/371472 [9:46:22<9:41:30, 3.34it/s] 69%|██████▊ | 254992/371472 [9:46:22<9:42:49, 3.33it/s] 69%|██████▊ | 254993/371472 [9:46:22<9:46:04, 3.31it/s] 69%|██████▊ | 254994/371472 [9:46:23<9:21:32, 3.46it/s] 69%|██████▊ | 254995/371472 [9:46:23<10:23:02, 3.12it/s] 69%|██████▊ | 254996/371472 [9:46:23<9:53:58, 3.27it/s] 69%|██████▊ | 254997/371472 [9:46:24<10:01:39, 3.23it/s] 69%|██████▊ | 254998/371472 [9:46:24<9:48:50, 3.30it/s] 69%|██████▊ | 254999/371472 [9:46:24<9:59:32, 3.24it/s] 69%|██████▊ | 255000/371472 [9:46:25<9:38:52, 3.35it/s] {'loss': 2.7228, 'learning_rate': 3.8233963239888815e-07, 'epoch': 10.98} + 69%|██████▊ | 255000/371472 [9:46:25<9:38:52, 3.35it/s] 69%|██████▊ | 255001/371472 [9:46:25<9:21:54, 3.45it/s] 69%|██████▊ | 255002/371472 [9:46:25<9:39:18, 3.35it/s] 69%|██████▊ | 255003/371472 [9:46:25<9:19:49, 3.47it/s] 69%|██████▊ | 255004/371472 [9:46:26<9:48:50, 3.30it/s] 69%|██████▊ | 255005/371472 [9:46:26<10:02:06, 3.22it/s] 69%|██████▊ | 255006/371472 [9:46:26<9:54:38, 3.26it/s] 69%|██████▊ | 255007/371472 [9:46:27<9:35:41, 3.37it/s] 69%|██████▊ | 255008/371472 [9:46:27<9:35:48, 3.37it/s] 69%|██████▊ | 255009/371472 [9:46:27<9:36:02, 3.37it/s] 69%|██████▊ | 255010/371472 [9:46:28<9:42:30, 3.33it/s] 69%|██████▊ | 255011/371472 [9:46:28<9:29:18, 3.41it/s] 69%|██████▊ | 255012/371472 [9:46:28<9:37:25, 3.36it/s] 69%|████���█▊ | 255013/371472 [9:46:28<9:22:21, 3.45it/s] 69%|██████▊ | 255014/371472 [9:46:29<9:23:41, 3.44it/s] 69%|██████▊ | 255015/371472 [9:46:29<9:38:02, 3.36it/s] 69%|██████▊ | 255016/371472 [9:46:29<9:33:46, 3.38it/s] 69%|██████▊ | 255017/371472 [9:46:30<9:18:33, 3.47it/s] 69%|██████▊ | 255018/371472 [9:46:30<9:13:51, 3.50it/s] 69%|██████▊ | 255019/371472 [9:46:30<9:32:06, 3.39it/s] 69%|██████▊ | 255020/371472 [9:46:30<9:13:45, 3.50it/s] {'loss': 2.809, 'learning_rate': 3.822911504234092e-07, 'epoch': 10.98} + 69%|██████▊ | 255020/371472 [9:46:30<9:13:45, 3.50it/s] 69%|██████▊ | 255021/371472 [9:46:31<9:12:30, 3.51it/s] 69%|██████▊ | 255022/371472 [9:46:31<9:26:57, 3.42it/s] 69%|██████▊ | 255023/371472 [9:46:31<10:08:58, 3.19it/s] 69%|██████▊ | 255024/371472 [9:46:32<9:53:55, 3.27it/s] 69%|██████▊ | 255025/371472 [9:46:32<10:25:28, 3.10it/s] 69%|██████▊ | 255026/371472 [9:46:32<10:09:04, 3.19it/s] 69%|██████▊ | 255027/371472 [9:46:33<10:04:16, 3.21it/s] 69%|██████▊ | 255028/371472 [9:46:33<10:18:17, 3.14it/s] 69%|██████▊ | 255029/371472 [9:46:33<10:09:05, 3.19it/s] 69%|██████▊ | 255030/371472 [9:46:34<9:49:17, 3.29it/s] 69%|██████▊ | 255031/371472 [9:46:34<10:04:35, 3.21it/s] 69%|██████▊ | 255032/371472 [9:46:34<10:05:29, 3.21it/s] 69%|██████▊ | 255033/371472 [9:46:35<10:17:19, 3.14it/s] 69%|██████▊ | 255034/371472 [9:46:35<10:32:10, 3.07it/s] 69%|██████▊ | 255035/371472 [9:46:35<10:51:06, 2.98it/s] 69%|██████▊ | 255036/371472 [9:46:36<10:54:24, 2.97it/s] 69%|██████▊ | 255037/371472 [9:46:36<10:24:00, 3.11it/s] 69%|██████▊ | 255038/371472 [9:46:36<10:25:06, 3.10it/s] 69%|██████▊ | 255039/371472 [9:46:36<10:12:19, 3.17it/s] 69%|██████▊ | 255040/371472 [9:46:37<10:08:51, 3.19it/s] {'loss': 2.7017, 'learning_rate': 3.822426684479303e-07, 'epoch': 10.99} + 69%|██████▊ | 255040/371472 [9:46:37<10:08:51, 3.19it/s] 69%|██████▊ | 255041/371472 [9:46:37<10:32:01, 3.07it/s] 69%|██████▊ | 255042/371472 [9:46:38<10:51:53, 2.98it/s] 69%|██████▊ | 255043/371472 [9:46:38<10:16:26, 3.15it/s] 69%|██████▊ | 255044/371472 [9:46:38<9:50:28, 3.29it/s] 69%|██████▊ | 255045/371472 [9:46:38<9:25:36, 3.43it/s] 69%|██████▊ | 255046/371472 [9:46:39<9:17:35, 3.48it/s] 69%|██████▊ | 255047/371472 [9:46:39<9:46:18, 3.31it/s] 69%|██████▊ | 255048/371472 [9:46:39<10:05:59, 3.20it/s] 69%|██████▊ | 255049/371472 [9:46:40<9:36:05, 3.37it/s] 69%|██████▊ | 255050/371472 [9:46:40<9:31:05, 3.40it/s] 69%|██████▊ | 255051/371472 [9:46:40<9:35:07, 3.37it/s] 69%|██████▊ | 255052/371472 [9:46:40<9:38:33, 3.35it/s] 69%|██████▊ | 255053/371472 [9:46:41<9:30:24, 3.40it/s] 69%|██████▊ | 255054/371472 [9:46:41<9:22:25, 3.45it/s] 69%|██████▊ | 255055/371472 [9:46:41<9:33:17, 3.38it/s] 69%|██████▊ | 255056/371472 [9:46:42<10:13:41, 3.16it/s] 69%|██████▊ | 255057/371472 [9:46:42<9:48:12, 3.30it/s] 69%|██████▊ | 255058/371472 [9:46:42<9:31:08, 3.40it/s] 69%|██████▊ | 255059/371472 [9:46:43<9:45:17, 3.31it/s] 69%|██████▊ | 255060/371472 [9:46:43<9:25:05, 3.43it/s] {'loss': 2.7488, 'learning_rate': 3.821941864724515e-07, 'epoch': 10.99} + 69%|██████▊ | 255060/371472 [9:46:43<9:25:05, 3.43it/s] 69%|██████▊ | 255061/371472 [9:46:43<9:36:44, 3.36it/s] 69%|██████▊ | 255062/371472 [9:46:43<9:30:31, 3.40it/s] 69%|██████▊ | 255063/371472 [9:46:44<9:36:15, 3.37it/s] 69%|██████▊ | 255064/371472 [9:46:44<9:29:02, 3.41it/s] 69%|██████▊ | 255065/371472 [9:46:44<9:20:52, 3.46it/s] 69%|██████▊ | 255066/371472 [9:46:45<10:20:18, 3.13it/s] 69%|██████▊ | 255067/371472 [9:46:45<9:55:44, 3.26it/s] 69%|██████▊ | 255068/371472 [9:46:45<10:12:13, 3.17it/s] 69%|██████▊ | 255069/371472 [9:46:46<9:53:07, 3.27it/s] 69%|██████▊ | 255070/371472 [9:46:46<11:21:42, 2.85it/s] 69%|██████▊ | 255071/371472 [9:46:46<10:32:37, 3.07it/s] 69%|██████▊ | 255072/371472 [9:46:47<10:11:30, 3.17it/s] 69%|██████▊ | 255073/371472 [9:46:47<9:51:57, 3.28it/s] 69%|██████▊ | 255074/371472 [9:46:47<9:35:25, 3.37it/s] 69%|██████▊ | 255075/371472 [9:46:47<9:22:16, 3.45it/s] 69%|██████▊ | 255076/371472 [9:46:48<9:12:43, 3.51it/s] 69%|██████▊ | 255077/371472 [9:46:48<9:10:42, 3.52it/s] 69%|██████▊ | 255078/371472 [9:46:48<9:11:55, 3.51it/s] 69%|██████▊ | 255079/371472 [9:46:48<8:51:39, 3.65it/s] 69%|██████▊ | 255080/371472 [9:46:49<8:45:55, 3.69it/s] {'loss': 2.7029, 'learning_rate': 3.821457044969725e-07, 'epoch': 10.99} + 69%|██████▊ | 255080/371472 [9:46:49<8:45:55, 3.69it/s] 69%|██████▊ | 255081/371472 [9:46:49<9:12:12, 3.51it/s] 69%|██████▊ | 255082/371472 [9:46:49<9:01:12, 3.58it/s] 69%|██████▊ | 255083/371472 [9:46:50<9:14:41, 3.50it/s] 69%|██████▊ | 255084/371472 [9:46:50<9:00:36, 3.59it/s] 69%|██████▊ | 255085/371472 [9:46:50<9:03:25, 3.57it/s] 69%|██████▊ | 255086/371472 [9:46:50<9:16:17, 3.49it/s] 69%|██████▊ | 255087/371472 [9:46:51<9:04:47, 3.56it/s] 69%|██████▊ | 255088/371472 [9:46:51<9:24:01, 3.44it/s] 69%|██████▊ | 255089/371472 [9:46:51<9:21:34, 3.45it/s] 69%|██████▊ | 255090/371472 [9:46:52<9:07:04, 3.55it/s] 69%|██████▊ | 255091/371472 [9:46:52<9:11:43, 3.52it/s] 69%|██████▊ | 255092/371472 [9:46:52<9:02:28, 3.58it/s] 69%|██████▊ | 255093/371472 [9:46:52<8:56:51, 3.61it/s] 69%|██████▊ | 255094/371472 [9:46:53<8:57:14, 3.61it/s] 69%|██████▊ | 255095/371472 [9:46:53<9:12:43, 3.51it/s] 69%|██████▊ | 255096/371472 [9:46:53<9:16:35, 3.48it/s] 69%|██████▊ | 255097/371472 [9:46:54<8:54:01, 3.63it/s] 69%|██████▊ | 255098/371472 [9:46:54<9:13:57, 3.50it/s] 69%|██████▊ | 255099/371472 [9:46:54<8:59:10, 3.60it/s] 69%|██████▊ | 255100/371472 [9:46:55<10:02:41, 3.22it/s] {'loss': 2.5678, 'learning_rate': 3.8209722252149367e-07, 'epoch': 10.99} + 69%|██████▊ | 255100/371472 [9:46:55<10:02:41, 3.22it/s] 69%|██████▊ | 255101/371472 [9:46:55<10:42:29, 3.02it/s] 69%|██████▊ | 255102/371472 [9:46:55<10:13:30, 3.16it/s] 69%|██████▊ | 255103/371472 [9:46:55<9:43:29, 3.32it/s] 69%|██████▊ | 255104/371472 [9:46:56<9:27:33, 3.42it/s] 69%|██████▊ | 255105/371472 [9:46:56<9:19:30, 3.47it/s] 69%|██████▊ | 255106/371472 [9:46:56<9:08:10, 3.54it/s] 69%|██████▊ | 255107/371472 [9:46:57<8:59:45, 3.59it/s] 69%|██████▊ | 255108/371472 [9:46:57<8:59:04, 3.60it/s] 69%|██████▊ | 255109/371472 [9:46:57<8:57:01, 3.61it/s] 69%|██████▊ | 255110/371472 [9:46:57<9:06:22, 3.55it/s] 69%|██████▊ | 255111/371472 [9:46:58<9:36:33, 3.36it/s] 69%|██████▊ | 255112/371472 [9:46:58<9:25:52, 3.43it/s] 69%|██████▊ | 255113/371472 [9:46:58<9:08:09, 3.54it/s] 69%|██████▊ | 255114/371472 [9:46:59<9:18:42, 3.47it/s] 69%|██████▊ | 255115/371472 [9:46:59<9:52:30, 3.27it/s] 69%|██████▊ | 255116/371472 [9:46:59<9:40:55, 3.34it/s] 69%|██████▊ | 255117/371472 [9:46:59<9:33:52, 3.38it/s] 69%|██████▊ | 255118/371472 [9:47:00<9:22:28, 3.45it/s] 69%|██████▊ | 255119/371472 [9:47:00<9:06:20, 3.55it/s] 69%|██████▊ | 255120/371472 [9:47:00<9:36:57, 3.36it/s] {'loss': 2.6809, 'learning_rate': 3.820487405460147e-07, 'epoch': 10.99} + 69%|██████▊ | 255120/371472 [9:47:00<9:36:57, 3.36it/s] 69%|██████▊ | 255121/371472 [9:47:01<10:17:58, 3.14it/s] 69%|██████▊ | 255122/371472 [9:47:01<10:03:30, 3.21it/s] 69%|██████▊ | 255123/371472 [9:47:01<10:10:47, 3.17it/s] 69%|██████▊ | 255124/371472 [9:47:02<9:46:34, 3.31it/s] 69%|██████▊ | 255125/371472 [9:47:02<9:33:41, 3.38it/s] 69%|██████▊ | 255126/371472 [9:47:02<11:17:47, 2.86it/s] 69%|██████▊ | 255127/371472 [9:47:03<10:28:44, 3.08it/s] 69%|██████▊ | 255128/371472 [9:47:03<9:50:14, 3.29it/s] 69%|██████▊ | 255129/371472 [9:47:03<9:37:25, 3.36it/s] 69%|██████▊ | 255130/371472 [9:47:03<9:20:31, 3.46it/s] 69%|██████▊ | 255131/371472 [9:47:04<9:45:09, 3.31it/s] 69%|██████▊ | 255132/371472 [9:47:04<10:06:56, 3.19it/s] 69%|██████▊ | 255133/371472 [9:47:04<9:49:11, 3.29it/s] 69%|██████▊ | 255134/371472 [9:47:05<9:37:16, 3.36it/s] 69%|██████▊ | 255135/371472 [9:47:05<9:26:12, 3.42it/s] 69%|██████▊ | 255136/371472 [9:47:05<9:33:13, 3.38it/s] 69%|██████▊ | 255137/371472 [9:47:06<10:11:49, 3.17it/s] 69%|██████▊ | 255138/371472 [9:47:06<9:50:47, 3.28it/s] 69%|██████▊ | 255139/371472 [9:47:06<9:41:49, 3.33it/s] 69%|██████▊ | 255140/371472 [9:47:06<9:17:08, 3.48it/s] {'loss': 2.7815, 'learning_rate': 3.8200025857053587e-07, 'epoch': 10.99} + 69%|██████▊ | 255140/371472 [9:47:06<9:17:08, 3.48it/s] 69%|██████▊ | 255141/371472 [9:47:07<9:47:01, 3.30it/s] 69%|██████▊ | 255142/371472 [9:47:07<9:27:57, 3.41it/s] 69%|██████▊ | 255143/371472 [9:47:07<9:44:23, 3.32it/s] 69%|██████▊ | 255144/371472 [9:47:08<9:42:13, 3.33it/s] 69%|██████▊ | 255145/371472 [9:47:08<10:05:53, 3.20it/s] 69%|██████▊ | 255146/371472 [9:47:08<9:50:53, 3.28it/s] 69%|██████▊ | 255147/371472 [9:47:09<9:42:57, 3.33it/s] 69%|██████▊ | 255148/371472 [9:47:09<9:33:40, 3.38it/s] 69%|██████▊ | 255149/371472 [9:47:09<9:15:51, 3.49it/s] 69%|██████▊ | 255150/371472 [9:47:09<9:34:41, 3.37it/s] 69%|██████▊ | 255151/371472 [9:47:10<9:45:08, 3.31it/s] 69%|██████▊ | 255152/371472 [9:47:10<9:44:41, 3.32it/s] 69%|██████▊ | 255153/371472 [9:47:10<9:24:59, 3.43it/s] 69%|██████▊ | 255154/371472 [9:47:11<9:35:12, 3.37it/s] 69%|██████▊ | 255155/371472 [9:47:11<9:24:35, 3.43it/s] 69%|██████▊ | 255156/371472 [9:47:11<10:10:19, 3.18it/s] 69%|██████▊ | 255157/371472 [9:47:12<9:52:16, 3.27it/s] 69%|██████▊ | 255158/371472 [9:47:12<9:30:49, 3.40it/s] 69%|██████▊ | 255159/371472 [9:47:12<9:39:13, 3.35it/s] 69%|██████▊ | 255160/371472 [9:47:12<9:25:27, 3.43it/s] {'loss': 2.5145, 'learning_rate': 3.8195177659505694e-07, 'epoch': 10.99} + 69%|██████▊ | 255160/371472 [9:47:12<9:25:27, 3.43it/s] 69%|██████▊ | 255161/371472 [9:47:13<9:08:06, 3.54it/s] 69%|██████▊ | 255162/371472 [9:47:13<9:08:04, 3.54it/s] 69%|██████▊ | 255163/371472 [9:47:13<9:01:33, 3.58it/s] 69%|██████▊ | 255164/371472 [9:47:14<9:10:40, 3.52it/s] 69%|██████▊ | 255165/371472 [9:47:14<9:14:46, 3.49it/s] 69%|██████▊ | 255166/371472 [9:47:14<9:31:02, 3.39it/s] 69%|██████▊ | 255167/371472 [9:47:14<9:15:51, 3.49it/s] 69%|██████▊ | 255168/371472 [9:47:15<9:24:22, 3.43it/s] 69%|██████▊ | 255169/371472 [9:47:15<9:23:27, 3.44it/s] 69%|██████▊ | 255170/371472 [9:47:15<9:23:42, 3.44it/s] 69%|██████▊ | 255171/371472 [9:47:16<9:22:47, 3.44it/s] 69%|██████▊ | 255172/371472 [9:47:16<9:22:24, 3.45it/s] 69%|██████▊ | 255173/371472 [9:47:16<9:32:34, 3.39it/s] 69%|██████▊ | 255174/371472 [9:47:16<9:28:48, 3.41it/s] 69%|██████▊ | 255175/371472 [9:47:17<9:12:45, 3.51it/s] 69%|██████▊ | 255176/371472 [9:47:17<9:06:45, 3.54it/s] 69%|██████▊ | 255177/371472 [9:47:17<9:07:55, 3.54it/s] 69%|██████▊ | 255178/371472 [9:47:18<9:32:12, 3.39it/s] 69%|██████▊ | 255179/371472 [9:47:18<9:24:35, 3.43it/s] 69%|██████▊ | 255180/371472 [9:47:18<9:30:22, 3.40it/s] {'loss': 2.6788, 'learning_rate': 3.819032946195781e-07, 'epoch': 10.99} + 69%|██████▊ | 255180/371472 [9:47:18<9:30:22, 3.40it/s] 69%|██████▊ | 255181/371472 [9:47:18<9:23:27, 3.44it/s] 69%|██████▊ | 255182/371472 [9:47:19<9:59:59, 3.23it/s] 69%|██████▊ | 255183/371472 [9:47:19<9:56:35, 3.25it/s] 69%|██████▊ | 255184/371472 [9:47:19<9:46:45, 3.30it/s] 69%|██████▊ | 255185/371472 [9:47:20<9:30:26, 3.40it/s] 69%|██████▊ | 255186/371472 [9:47:20<9:31:54, 3.39it/s] 69%|██████▊ | 255187/371472 [9:47:20<9:23:11, 3.44it/s] 69%|██████▊ | 255188/371472 [9:47:21<9:15:45, 3.49it/s] 69%|██████▊ | 255189/371472 [9:47:21<9:16:00, 3.49it/s] 69%|██████▊ | 255190/371472 [9:47:21<9:04:54, 3.56it/s] 69%|██████▊ | 255191/371472 [9:47:21<9:02:33, 3.57it/s] 69%|██████▊ | 255192/371472 [9:47:22<9:47:23, 3.30it/s] 69%|██████▊ | 255193/371472 [9:47:22<9:28:05, 3.41it/s] 69%|██████▊ | 255194/371472 [9:47:22<9:29:15, 3.40it/s] 69%|██████▊ | 255195/371472 [9:47:23<9:23:18, 3.44it/s] 69%|██████▊ | 255196/371472 [9:47:23<9:12:05, 3.51it/s] 69%|██████▊ | 255197/371472 [9:47:23<8:59:22, 3.59it/s] 69%|██████▊ | 255198/371472 [9:47:23<9:20:03, 3.46it/s] 69%|██████▊ | 255199/371472 [9:47:24<10:10:18, 3.18it/s] 69%|██████▊ | 255200/371472 [9:47:24<9:45:19, 3.31it/s] {'loss': 2.9221, 'learning_rate': 3.8185481264409913e-07, 'epoch': 10.99} + 69%|██████▊ | 255200/371472 [9:47:24<9:45:19, 3.31it/s] 69%|██████▊ | 255201/371472 [9:47:24<9:35:55, 3.36it/s] 69%|██████▊ | 255202/371472 [9:47:25<9:48:25, 3.29it/s] 69%|██████▊ | 255203/371472 [9:47:25<9:25:09, 3.43it/s] 69%|██████▊ | 255204/371472 [9:47:25<9:19:53, 3.46it/s] 69%|██████▊ | 255205/371472 [9:47:26<9:04:48, 3.56it/s] 69%|██████▊ | 255206/371472 [9:47:26<9:24:30, 3.43it/s] 69%|██████▊ | 255207/371472 [9:47:26<9:13:57, 3.50it/s] 69%|██████▊ | 255208/371472 [9:47:26<9:11:13, 3.52it/s] 69%|██████▊ | 255209/371472 [9:47:27<9:05:52, 3.55it/s] 69%|██████▊ | 255210/371472 [9:47:27<9:15:21, 3.49it/s] 69%|██████▊ | 255211/371472 [9:47:27<9:13:11, 3.50it/s] 69%|██████▊ | 255212/371472 [9:47:28<9:23:48, 3.44it/s] 69%|██████▊ | 255213/371472 [9:47:28<9:38:09, 3.35it/s] 69%|██████▊ | 255214/371472 [9:47:28<9:32:28, 3.38it/s] 69%|██████▊ | 255215/371472 [9:47:28<9:25:38, 3.43it/s] 69%|██████▊ | 255216/371472 [9:47:29<9:36:54, 3.36it/s] 69%|██████▊ | 255217/371472 [9:47:29<9:49:50, 3.28it/s] 69%|██████▊ | 255218/371472 [9:47:29<9:34:44, 3.37it/s] 69%|██████▊ | 255219/371472 [9:47:30<9:12:42, 3.51it/s] 69%|██████▊ | 255220/371472 [9:47:30<9:00:00, 3.59it/s] {'loss': 2.7145, 'learning_rate': 3.818063306686203e-07, 'epoch': 10.99} + 69%|██████▊ | 255220/371472 [9:47:30<9:00:00, 3.59it/s] 69%|██████▊ | 255221/371472 [9:47:30<9:15:41, 3.49it/s] 69%|██████▊ | 255222/371472 [9:47:30<9:23:27, 3.44it/s] 69%|██████▊ | 255223/371472 [9:47:31<9:05:55, 3.55it/s] 69%|██████▊ | 255224/371472 [9:47:31<8:58:49, 3.60it/s] 69%|██████▊ | 255225/371472 [9:47:31<9:10:34, 3.52it/s] 69%|██████▊ | 255226/371472 [9:47:32<9:07:20, 3.54it/s] 69%|██████▊ | 255227/371472 [9:47:32<9:13:37, 3.50it/s] 69%|██████▊ | 255228/371472 [9:47:32<9:14:45, 3.49it/s] 69%|██████▊ | 255229/371472 [9:47:32<9:21:53, 3.45it/s] 69%|██████▊ | 255230/371472 [9:47:33<9:18:06, 3.47it/s] 69%|██████▊ | 255231/371472 [9:47:33<9:18:23, 3.47it/s] 69%|██████▊ | 255232/371472 [9:47:33<9:12:19, 3.51it/s] 69%|██████▊ | 255233/371472 [9:47:34<9:13:18, 3.50it/s] 69%|██████▊ | 255234/371472 [9:47:34<9:06:30, 3.54it/s] 69%|██████▊ | 255235/371472 [9:47:34<9:31:03, 3.39it/s] 69%|██████▊ | 255236/371472 [9:47:35<9:38:59, 3.35it/s] 69%|██████▊ | 255237/371472 [9:47:35<9:24:13, 3.43it/s] 69%|██████▊ | 255238/371472 [9:47:35<9:41:07, 3.33it/s] 69%|██████▊ | 255239/371472 [9:47:35<9:41:38, 3.33it/s] 69%|██████▊ | 255240/371472 [9:47:36<9:26:44, 3.42it/s] {'loss': 2.6509, 'learning_rate': 3.817578486931414e-07, 'epoch': 10.99} + 69%|██████▊ | 255240/371472 [9:47:36<9:26:44, 3.42it/s] 69%|██████▊ | 255241/371472 [9:47:36<9:18:22, 3.47it/s] 69%|██████▊ | 255242/371472 [9:47:36<9:13:21, 3.50it/s] 69%|██████▊ | 255243/371472 [9:47:37<9:49:22, 3.29it/s] 69%|██████▊ | 255244/371472 [9:47:37<9:43:22, 3.32it/s] 69%|██████▊ | 255245/371472 [9:47:37<9:44:30, 3.31it/s] 69%|██████▊ | 255246/371472 [9:47:38<10:04:26, 3.20it/s] 69%|██████▊ | 255247/371472 [9:47:38<9:41:26, 3.33it/s] 69%|██████▊ | 255248/371472 [9:47:38<9:20:11, 3.46it/s] 69%|██████▊ | 255249/371472 [9:47:38<9:29:38, 3.40it/s] 69%|██████▊ | 255250/371472 [9:47:39<9:56:07, 3.25it/s] 69%|██████▊ | 255251/371472 [9:47:39<10:08:45, 3.18it/s] 69%|██████▊ | 255252/371472 [9:47:39<9:44:39, 3.31it/s] 69%|██████▊ | 255253/371472 [9:47:40<9:34:22, 3.37it/s] 69%|██████▊ | 255254/371472 [9:47:40<9:18:00, 3.47it/s] 69%|███��██▊ | 255255/371472 [9:47:40<9:18:06, 3.47it/s] 69%|██████▊ | 255256/371472 [9:47:40<9:37:10, 3.36it/s] 69%|██████▊ | 255257/371472 [9:47:41<9:18:03, 3.47it/s] 69%|██████▊ | 255258/371472 [9:47:41<9:57:22, 3.24it/s] 69%|██████▊ | 255259/371472 [9:47:41<9:24:57, 3.43it/s] 69%|██████▊ | 255260/371472 [9:47:42<9:22:13, 3.45it/s] {'loss': 2.7283, 'learning_rate': 3.817093667176625e-07, 'epoch': 10.99} + 69%|██████▊ | 255260/371472 [9:47:42<9:22:13, 3.45it/s] 69%|██████▊ | 255261/371472 [9:47:42<9:23:23, 3.44it/s] 69%|██████▊ | 255262/371472 [9:47:42<9:19:54, 3.46it/s] 69%|██████▊ | 255263/371472 [9:47:43<9:29:43, 3.40it/s] 69%|██████▊ | 255264/371472 [9:47:43<9:43:00, 3.32it/s] 69%|██████▊ | 255265/371472 [9:47:43<9:58:03, 3.24it/s] 69%|██████▊ | 255266/371472 [9:47:43<9:30:38, 3.39it/s] 69%|██████▊ | 255267/371472 [9:47:44<9:19:29, 3.46it/s] 69%|██████▊ | 255268/371472 [9:47:44<9:09:36, 3.52it/s] 69%|██████▊ | 255269/371472 [9:47:44<9:18:36, 3.47it/s] 69%|██████▊ | 255270/371472 [9:47:45<9:58:23, 3.24it/s] 69%|██████▊ | 255271/371472 [9:47:45<10:05:19, 3.20it/s] 69%|██████▊ | 255272/371472 [9:47:45<9:46:11, 3.30it/s] 69%|██████▊ | 255273/371472 [9:47:46<10:16:43, 3.14it/s] 69%|██████▊ | 255274/371472 [9:47:46<10:40:11, 3.03it/s] 69%|██████▊ | 255275/371472 [9:47:46<9:56:21, 3.25it/s] 69%|██████▊ | 255276/371472 [9:47:46<9:27:57, 3.41it/s] 69%|██████▊ | 255277/371472 [9:47:47<9:04:50, 3.55it/s] 69%|██████▊ | 255278/371472 [9:47:47<9:41:31, 3.33it/s] 69%|██████▊ | 255279/371472 [9:47:47<10:24:01, 3.10it/s] 69%|██████▊ | 255280/371472 [9:47:48<10:01:51, 3.22it/s] {'loss': 2.6823, 'learning_rate': 3.816608847421836e-07, 'epoch': 11.0} + 69%|██████▊ | 255280/371472 [9:47:48<10:01:51, 3.22it/s] 69%|██████▊ | 255281/371472 [9:47:48<9:44:13, 3.31it/s] 69%|██████▊ | 255282/371472 [9:47:48<9:38:09, 3.35it/s] 69%|██████▊ | 255283/371472 [9:47:49<9:27:21, 3.41it/s] 69%|██████▊ | 255284/371472 [9:47:49<9:52:02, 3.27it/s] 69%|██████▊ | 255285/371472 [9:47:49<9:50:13, 3.28it/s] 69%|██████▊ | 255286/371472 [9:47:50<10:22:44, 3.11it/s] 69%|██████▊ | 255287/371472 [9:47:50<11:07:41, 2.90it/s] 69%|██████▊ | 255288/371472 [9:47:50<11:11:33, 2.88it/s] 69%|██████▊ | 255289/371472 [9:47:51<10:48:34, 2.99it/s] 69%|██████▊ | 255290/371472 [9:47:51<10:11:15, 3.17it/s] 69%|██████▊ | 255291/371472 [9:47:51<9:58:09, 3.24it/s] 69%|██████▊ | 255292/371472 [9:47:51<10:07:35, 3.19it/s] 69%|██████▊ | 255293/371472 [9:47:52<10:22:14, 3.11it/s] 69%|██████▊ | 255294/371472 [9:47:52<10:17:37, 3.14it/s] 69%|██████▊ | 255295/371472 [9:47:52<10:01:54, 3.22it/s] 69%|██████▊ | 255296/371472 [9:47:53<10:01:25, 3.22it/s] 69%|██████▊ | 255297/371472 [9:47:53<10:20:38, 3.12it/s] 69%|██████▊ | 255298/371472 [9:47:53<10:06:53, 3.19it/s] 69%|██████▊ | 255299/371472 [9:47:54<9:44:28, 3.31it/s] 69%|██████▊ | 255300/371472 [9:47:54<9:31:14, 3.39it/s] {'loss': 2.7784, 'learning_rate': 3.8161240276670475e-07, 'epoch': 11.0} + 69%|██████▊ | 255300/371472 [9:47:54<9:31:14, 3.39it/s] 69%|██████▊ | 255301/371472 [9:47:54<9:33:54, 3.37it/s] 69%|██████▊ | 255302/371472 [9:47:55<9:35:44, 3.36it/s] 69%|██████▊ | 255303/371472 [9:47:55<9:41:40, 3.33it/s] 69%|██████▊ | 255304/371472 [9:47:55<9:20:11, 3.46it/s] 69%|██████▊ | 255305/371472 [9:47:55<9:05:18, 3.55it/s] 69%|██████▊ | 255306/371472 [9:47:56<9:00:11, 3.58it/s] 69%|██████▊ | 255307/371472 [9:47:56<9:02:10, 3.57it/s] 69%|██████▊ | 255308/371472 [9:47:56<9:17:35, 3.47it/s] 69%|██████▊ | 255309/371472 [9:47:57<9:50:28, 3.28it/s] 69%|██████▊ | 255310/371472 [9:47:57<9:31:47, 3.39it/s] 69%|██████▊ | 255311/371472 [9:47:57<9:14:02, 3.49it/s] 69%|██████▊ | 255312/371472 [9:47:57<9:17:31, 3.47it/s] 69%|██████▊ | 255313/371472 [9:47:58<9:15:56, 3.48it/s] 69%|██████▊ | 255314/371472 [9:47:58<9:42:25, 3.32it/s] 69%|██████▊ | 255315/371472 [9:47:58<9:38:50, 3.34it/s] 69%|██████▊ | 255316/371472 [9:47:59<9:17:36, 3.47it/s] 69%|██████▊ | 255317/371472 [9:47:59<9:17:54, 3.47it/s] 69%|██████▊ | 255318/371472 [9:47:59<9:13:54, 3.49it/s] 69%|██████▊ | 255319/371472 [9:47:59<9:23:57, 3.43it/s] 69%|██████▊ | 255320/371472 [9:48:00<9:14:57, 3.49it/s] {'loss': 2.8586, 'learning_rate': 3.815639207912258e-07, 'epoch': 11.0} + 69%|██████▊ | 255320/371472 [9:48:00<9:14:57, 3.49it/s] 69%|██████▊ | 255321/371472 [9:48:00<9:07:15, 3.54it/s] 69%|██████▊ | 255322/371472 [9:48:00<9:20:58, 3.45it/s] 69%|██████▊ | 255323/371472 [9:48:01<9:35:03, 3.37it/s] 69%|██████▊ | 255324/371472 [9:48:01<9:17:20, 3.47it/s] 69%|██████▊ | 255325/371472 [9:48:01<9:13:18, 3.50it/s] 69%|██████▊ | 255326/371472 [9:48:02<9:35:59, 3.36it/s] 69%|██████▊ | 255327/371472 [9:48:02<9:21:16, 3.45it/s] 69%|██████▊ | 255328/371472 [9:48:02<9:20:53, 3.45it/s] 69%|██████▊ | 255329/371472 [9:48:02<9:32:34, 3.38it/s] 69%|██████▊ | 255330/371472 [9:48:03<9:27:25, 3.41it/s] 69%|██████▊ | 255331/371472 [9:48:03<9:12:11, 3.51it/s] 69%|██████▊ | 255332/371472 [9:48:03<9:21:57, 3.44it/s] 69%|██████▊ | 255333/371472 [9:48:04<9:08:30, 3.53it/s] 69%|██████▊ | 255334/371472 [9:48:04<8:58:55, 3.59it/s] 69%|██████▊ | 255335/371472 [9:48:04<9:05:32, 3.55it/s] 69%|██████▊ | 255336/371472 [9:48:04<9:22:15, 3.44it/s] 69%|██████▊ | 255337/371472 [9:48:05<9:28:14, 3.41it/s] 69%|██████▊ | 255338/371472 [9:48:05<9:17:17, 3.47it/s] 69%|██████▊ | 255339/371472 [9:48:05<9:12:51, 3.50it/s] 69%|██████▊ | 255340/371472 [9:48:05<9:01:22, 3.58it/s] {'loss': 2.7214, 'learning_rate': 3.8151543881574695e-07, 'epoch': 11.0} + 69%|██████▊ | 255340/371472 [9:48:05<9:01:22, 3.58it/s] 69%|██████▊ | 255341/371472 [9:48:06<9:18:36, 3.46it/s] 69%|██████▊ | 255342/371472 [9:48:06<9:13:26, 3.50it/s] 69%|██████▊ | 255343/371472 [9:48:06<9:28:48, 3.40it/s] 69%|██████▊ | 255344/371472 [9:48:07<9:15:35, 3.48it/s] 69%|██████▊ | 255345/371472 [9:48:07<9:27:09, 3.41it/s] 69%|██████▊ | 255346/371472 [9:48:07<9:26:50, 3.41it/s] 69%|██████▊ | 255347/371472 [9:48:08<9:25:33, 3.42it/s] 69%|██████▊ | 255348/371472 [9:48:08<10:19:36, 3.12it/s] 69%|██████▊ | 255349/371472 [9:48:08<10:34:35, 3.05it/s] 69%|██████▊ | 255350/371472 [9:48:09<10:23:13, 3.11it/s] 69%|██████▊ | 255351/371472 [9:48:09<10:01:37, 3.22it/s] 69%|██████▊ | 255352/371472 [9:48:09<9:51:42, 3.27it/s] 69%|██████▊ | 255353/371472 [9:48:10<10:23:13, 3.11it/s] 69%|██████▊ | 255354/371472 [9:48:10<10:15:14, 3.15it/s] 69%|██████▊ | 255355/371472 [9:48:10<9:57:46, 3.24it/s] 69%|██████▊ | 255356/371472 [9:48:10<9:30:04, 3.39it/s] 69%|██████▊ | 255357/371472 [9:48:11<9:34:10, 3.37it/s] 69%|██████▊ | 255358/371472 [9:48:11<9:23:35, 3.43it/s] 69%|██████▊ | 255359/371472 [9:48:11<9:13:55, 3.49it/s] 69%|██████▊ | 255360/371472 [9:48:12<9:06:13, 3.54it/s] {'loss': 2.7323, 'learning_rate': 3.81466956840268e-07, 'epoch': 11.0} + 69%|██████▊ | 255360/371472 [9:48:12<9:06:13, 3.54it/s] 69%|██████▊ | 255361/371472 [9:48:12<8:58:32, 3.59it/s] 69%|██████▊ | 255362/371472 [9:48:12<8:44:25, 3.69it/s] 69%|██████▊ | 255363/371472 [9:48:12<8:58:41, 3.59it/s] 69%|██████▊ | 255364/371472 [9:48:13<9:22:28, 3.44it/s] 69%|██████▊ | 255365/371472 [9:48:13<9:37:43, 3.35it/s] 69%|██████▊ | 255366/371472 [9:48:13<9:19:51, 3.46it/s] 69%|██████▊ | 255367/371472 [9:48:14<9:27:01, 3.41it/s] 69%|██████▊ | 255368/371472 [9:48:14<9:18:10, 3.47it/s] 69%|██████▊ | 255369/371472 [9:48:14<9:11:38, 3.51it/s] 69%|██████▊ | 255370/371472 [9:48:14<10:09:05, 3.18it/s] 69%|██████▊ | 255371/371472 [9:48:15<9:49:36, 3.28it/s] 69%|██████▊ | 255372/371472 [9:48:15<9:45:33, 3.30it/s] 69%|██████▊ | 255373/371472 [9:48:15<9:29:50, 3.40it/s] 69%|██████▊ | 255374/371472 [9:48:16<9:29:35, 3.40it/s] 69%|██████▊ | 255375/371472 [9:48:16<9:27:54, 3.41it/s] 69%|████���█▊ | 255376/371472 [9:48:16<9:18:26, 3.46it/s] 69%|██████▊ | 255377/371472 [9:48:17<9:50:30, 3.28it/s] 69%|██████▊ | 255378/371472 [9:48:17<9:29:12, 3.40it/s] 69%|██████▊ | 255379/371472 [9:48:17<9:28:40, 3.40it/s] 69%|██████▊ | 255380/371472 [9:48:17<9:24:55, 3.43it/s] {'loss': 2.657, 'learning_rate': 3.814184748647892e-07, 'epoch': 11.0} + 69%|██████▊ | 255380/371472 [9:48:17<9:24:55, 3.43it/s] 69%|██████▊ | 255381/371472 [9:48:18<9:16:17, 3.48it/s] 69%|██████▊ | 255382/371472 [9:48:18<9:23:57, 3.43it/s] 69%|██████▊ | 255383/371472 [9:48:18<9:22:34, 3.44it/s] 69%|██████▊ | 255384/371472 [9:48:19<9:18:45, 3.46it/s] 69%|██████▊ | 255385/371472 [9:48:19<9:21:41, 3.44it/s] 69%|██████▊ | 255386/371472 [9:48:19<9:19:31, 3.46it/s] 69%|██████▉ | 255387/371472 [9:48:19<10:07:11, 3.19it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 69%|██████▉ | 255388/371472 [9:48:41<214:28:13, 6.65s/it] 69%|██████▉ | 255389/371472 [9:48:41<153:10:02, 4.75s/it] 69%|██████▉ | 255390/371472 [9:48:42<109:56:04, 3.41s/it] 69%|██████▉ | 255391/371472 [9:48:42<79:39:56, 2.47s/it] 69%|██████▉ | 255392/371472 [9:48:42<58:32:01, 1.82s/it] 69%|██████▉ | 255393/371472 [9:48:42<44:22:48, 1.38s/it] 69%|██████▉ | 255394/371472 [9:48:43<34:02:58, 1.06s/it] 69%|██████▉ | 255395/371472 [9:48:43<27:45:42, 1.16it/s] 69%|██████▉ | 255396/371472 [9:48:43<22:14:57, 1.45it/s] 69%|██████▉ | 255397/371472 [9:48:44<19:38:33, 1.64it/s] 69%|██████▉ | 255398/371472 [9:48:44<16:45:36, 1.92it/s] 69%|██████▉ | 255399/371472 [9:48:44<14:25:58, 2.23it/s] 69%|██████▉ | 255400/371472 [9:48:45<13:07:37, 2.46it/s] {'loss': 2.6821, 'learning_rate': 3.813699928893102e-07, 'epoch': 11.0} + 69%|██████▉ | 255400/371472 [9:48:45<13:07:37, 2.46it/s] 69%|██████▉ | 255401/371472 [9:48:45<12:55:22, 2.49it/s] 69%|██████▉ | 255402/371472 [9:48:45<11:54:35, 2.71it/s] 69%|██████▉ | 255403/371472 [9:48:46<11:16:52, 2.86it/s] 69%|██████▉ | 255404/371472 [9:48:46<10:46:20, 2.99it/s] 69%|██████▉ | 255405/371472 [9:48:46<10:32:00, 3.06it/s] 69%|██████▉ | 255406/371472 [9:48:47<10:16:13, 3.14it/s] 69%|██████▉ | 255407/371472 [9:48:47<10:18:36, 3.13it/s] 69%|██████▉ | 255408/371472 [9:48:47<10:20:48, 3.12it/s] 69%|██████▉ | 255409/371472 [9:48:48<10:59:23, 2.93it/s] 69%|██████▉ | 255410/371472 [9:48:48<11:34:06, 2.79it/s] 69%|██████▉ | 255411/371472 [9:48:48<11:02:14, 2.92it/s] 69%|██████▉ | 255412/371472 [9:48:49<10:34:46, 3.05it/s] 69%|██████▉ | 255413/371472 [9:48:49<10:31:33, 3.06it/s] 69%|██████▉ | 255414/371472 [9:48:49<10:13:22, 3.15it/s] 69%|██████▉ | 255415/371472 [9:48:50<10:01:17, 3.22it/s] 69%|██████▉ | 255416/371472 [9:48:50<10:01:58, 3.21it/s] 69%|██████▉ | 255417/371472 [9:48:50<10:03:46, 3.20it/s] 69%|██████▉ | 255418/371472 [9:48:51<10:20:45, 3.12it/s] 69%|██████▉ | 255419/371472 [9:48:51<10:26:40, 3.09it/s] 69%|██████▉ | 255420/371472 [9:48:51<10:15:21, 3.14it/s] {'loss': 2.6677, 'learning_rate': 3.813215109138314e-07, 'epoch': 11.0} + 69%|██████▉ | 255420/371472 [9:48:51<10:15:21, 3.14it/s] 69%|██████▉ | 255421/371472 [9:48:52<10:01:45, 3.21it/s] 69%|██████▉ | 255422/371472 [9:48:52<10:10:46, 3.17it/s] 69%|██████▉ | 255423/371472 [9:48:52<9:55:40, 3.25it/s] 69%|██████▉ | 255424/371472 [9:48:52<9:59:10, 3.23it/s] 69%|██████▉ | 255425/371472 [9:48:53<10:00:37, 3.22it/s] 69%|██████▉ | 255426/371472 [9:48:53<10:19:29, 3.12it/s] 69%|██████▉ | 255427/371472 [9:48:53<10:13:34, 3.15it/s] 69%|██████▉ | 255428/371472 [9:48:54<9:58:38, 3.23it/s] 69%|██████▉ | 255429/371472 [9:48:54<10:19:17, 3.12it/s] 69%|██████▉ | 255430/371472 [9:48:54<10:02:30, 3.21it/s] 69%|██████▉ | 255431/371472 [9:48:55<10:03:35, 3.20it/s] 69%|██████▉ | 255432/371472 [9:48:55<10:49:24, 2.98it/s] 69%|██████▉ | 255433/371472 [9:48:55<10:53:38, 2.96it/s] 69%|██████▉ | 255434/371472 [9:48:56<10:55:52, 2.95it/s] 69%|██████▉ | 255435/371472 [9:48:56<11:43:13, 2.75it/s] 69%|██████▉ | 255436/371472 [9:48:56<11:19:32, 2.85it/s] 69%|██████▉ | 255437/371472 [9:48:57<10:56:21, 2.95it/s] 69%|██████▉ | 255438/371472 [9:48:57<11:16:33, 2.86it/s] 69%|██████▉ | 255439/371472 [9:48:57<11:09:50, 2.89it/s] 69%|██████▉ | 255440/371472 [9:48:58<10:54:27, 2.95it/s] {'loss': 2.6476, 'learning_rate': 3.8127302893835247e-07, 'epoch': 11.0} + 69%|██████▉ | 255440/371472 [9:48:58<10:54:27, 2.95it/s] 69%|██████▉ | 255441/371472 [9:48:58<10:38:42, 3.03it/s] 69%|██████▉ | 255442/371472 [9:48:58<10:24:30, 3.10it/s] 69%|██████▉ | 255443/371472 [9:48:59<10:52:26, 2.96it/s] 69%|██████▉ | 255444/371472 [9:48:59<10:49:39, 2.98it/s] 69%|██████▉ | 255445/371472 [9:48:59<10:36:48, 3.04it/s] 69%|██████▉ | 255446/371472 [9:49:00<10:41:41, 3.01it/s] 69%|██████▉ | 255447/371472 [9:49:00<11:05:00, 2.91it/s] 69%|██████▉ | 255448/371472 [9:49:01<11:16:20, 2.86it/s] 69%|██████▉ | 255449/371472 [9:49:01<11:12:15, 2.88it/s] 69%|██████▉ | 255450/371472 [9:49:01<11:12:27, 2.88it/s] 69%|██████▉ | 255451/371472 [9:49:02<11:00:53, 2.93it/s] 69%|██████▉ | 255452/371472 [9:49:02<10:51:48, 2.97it/s] 69%|██████▉ | 255453/371472 [9:49:02<10:30:36, 3.07it/s] 69%|██████▉ | 255454/371472 [9:49:02<10:26:40, 3.09it/s] 69%|██████▉ | 255455/371472 [9:49:03<10:26:11, 3.09it/s] 69%|██████▉ | 255456/371472 [9:49:03<10:08:51, 3.18it/s] 69%|██████▉ | 255457/371472 [9:49:03<9:56:50, 3.24it/s] 69%|██████▉ | 255458/371472 [9:49:04<10:01:08, 3.22it/s] 69%|██████▉ | 255459/371472 [9:49:04<10:12:50, 3.16it/s] 69%|██████▉ | 255460/371472 [9:49:04<9:57:46, 3.23it/s] {'loss': 2.6695, 'learning_rate': 3.812245469628736e-07, 'epoch': 11.0} + 69%|██████▉ | 255460/371472 [9:49:04<9:57:46, 3.23it/s] 69%|██████▉ | 255461/371472 [9:49:05<10:10:27, 3.17it/s] 69%|██████▉ | 255462/371472 [9:49:05<10:01:27, 3.21it/s] 69%|██████▉ | 255463/371472 [9:49:05<10:14:15, 3.15it/s] 69%|██████▉ | 255464/371472 [9:49:06<10:13:42, 3.15it/s] 69%|██████▉ | 255465/371472 [9:49:06<10:11:35, 3.16it/s] 69%|██████▉ | 255466/371472 [9:49:06<9:45:08, 3.30it/s] 69%|██████▉ | 255467/371472 [9:49:06<9:27:26, 3.41it/s] 69%|██████▉ | 255468/371472 [9:49:07<9:39:18, 3.34it/s] 69%|██████▉ | 255469/371472 [9:49:07<10:36:33, 3.04it/s] 69%|██████▉ | 255470/371472 [9:49:08<10:37:46, 3.03it/s] 69%|██████▉ | 255471/371472 [9:49:08<10:23:10, 3.10it/s] 69%|██████▉ | 255472/371472 [9:49:08<10:14:28, 3.15it/s] 69%|██████▉ | 255473/371472 [9:49:09<10:53:02, 2.96it/s] 69%|██████▉ | 255474/371472 [9:49:09<11:04:04, 2.91it/s] 69%|██████▉ | 255475/371472 [9:49:09<10:53:44, 2.96it/s] 69%|██████▉ | 255476/371472 [9:49:10<10:49:27, 2.98it/s] 69%|██████▉ | 255477/371472 [9:49:10<10:21:54, 3.11it/s] 69%|██████▉ | 255478/371472 [9:49:10<11:41:06, 2.76it/s] 69%|██████▉ | 255479/371472 [9:49:11<11:40:07, 2.76it/s] 69%|██████▉ | 255480/371472 [9:49:11<11:49:39, 2.72it/s] {'loss': 2.5307, 'learning_rate': 3.8117606498739466e-07, 'epoch': 11.0} + 69%|██████▉ | 255480/371472 [9:49:11<11:49:39, 2.72it/s] 69%|██████▉ | 255481/371472 [9:49:11<11:54:06, 2.71it/s] 69%|██████▉ | 255482/371472 [9:49:12<12:27:46, 2.59it/s] 69%|██████▉ | 255483/371472 [9:49:12<11:46:04, 2.74it/s] 69%|██████▉ | 255484/371472 [9:49:12<11:05:31, 2.90it/s] 69%|██████▉ | 255485/371472 [9:49:13<10:28:08, 3.08it/s] 69%|██████▉ | 255486/371472 [9:49:13<10:24:25, 3.10it/s] 69%|██████▉ | 255487/371472 [9:49:13<9:56:40, 3.24it/s] 69%|██████▉ | 255488/371472 [9:49:14<9:51:54, 3.27it/s] 69%|██████▉ | 255489/371472 [9:49:14<10:06:12, 3.19it/s] 69%|██████▉ | 255490/371472 [9:49:14<10:10:04, 3.17it/s] 69%|██████▉ | 255491/371472 [9:49:15<10:04:08, 3.20it/s] 69%|██████▉ | 255492/371472 [9:49:15<11:29:37, 2.80it/s] 69%|██████▉ | 255493/371472 [9:49:15<11:29:48, 2.80it/s] 69%|██████▉ | 255494/371472 [9:49:16<11:08:47, 2.89it/s] 69%|██████▉ | 255495/371472 [9:49:16<10:54:12, 2.95it/s] 69%|██████▉ | 255496/371472 [9:49:16<10:37:46, 3.03it/s] 69%|██████▉ | 255497/371472 [9:49:17<10:37:27, 3.03it/s] 69%|██████▉ | 255498/371472 [9:49:17<10:48:33, 2.98it/s] 69%|██████▉ | 255499/371472 [9:49:17<11:06:24, 2.90it/s] 69%|██████▉ | 255500/371472 [9:49:18<10:49:53, 2.97it/s] {'loss': 2.7569, 'learning_rate': 3.8112758301191584e-07, 'epoch': 11.0} + 69%|██████▉ | 255500/371472 [9:49:18<10:49:53, 2.97it/s] 69%|██████▉ | 255501/371472 [9:49:18<10:48:46, 2.98it/s] 69%|██████▉ | 255502/371472 [9:49:18<10:20:20, 3.12it/s] 69%|██████▉ | 255503/371472 [9:49:19<10:06:51, 3.18it/s] 69%|██████▉ | 255504/371472 [9:49:19<10:26:55, 3.08it/s] 69%|██████▉ | 255505/371472 [9:49:19<10:48:20, 2.98it/s] 69%|██████▉ | 255506/371472 [9:49:20<10:42:10, 3.01it/s] 69%|██████▉ | 255507/371472 [9:49:20<10:12:50, 3.15it/s] 69%|██████▉ | 255508/371472 [9:49:20<10:15:45, 3.14it/s] 69%|██████▉ | 255509/371472 [9:49:21<10:07:17, 3.18it/s] \ No newline at end of file