diff --git "a/run-2024-07-20T09:26:09+00:00.log" "b/run-2024-07-20T09:26:09+00:00.log" --- "a/run-2024-07-20T09:26:09+00:00.log" +++ "b/run-2024-07-20T09:26:09+00:00.log" @@ -2364,4 +2364,1169 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 88%|████████▊ | 325100/371472 [3:42:01<3:41:29, 3.49it/s] 88%|████████▊ | 325101/371472 [3:42:01<3:52:05, 3.33it/s] 88%|████████▊ | 325102/371472 [3:42:01<3:46:24, 3.41it/s] 88%|████████▊ | 325103/371472 [3:42:02<3:48:31, 3.38it/s] 88%|████████▊ | 325104/371472 [3:42:02<3:49:43, 3.36it/s] 88%|████████▊ | 325105/371472 [3:42:02<3:54:45, 3.29it/s] 88%|████████▊ | 325106/371472 [3:42:03<3:45:06, 3.43it/s] 88%|████████▊ | 325107/371472 [3:42:03<3:56:52, 3.26it/s] 88%|████████▊ | 325108/371472 [3:42:03<4:18:26, 2.99it/s] 88%|████████▊ | 325109/371472 [3:42:04<4:29:57, 2.86it/s] 88%|████████▊ | 325110/371472 [3:42:04<4:22:30, 2.94it/s] 88%|████████▊ | 325111/371472 [3:42:04<4:22:55, 2.94it/s] 88%|████████▊ | 325112/371472 [3:42:05<4:13:14, 3.05it/s] 88%|████████▊ | 325113/371472 [3:42:05<4:26:50, 2.90it/s] 88%|████████▊ | 325114/371472 [3:42:05<4:15:03, 3.03it/s] 88%|████████▊ | 325115/371472 [3:42:06<4:05:07, 3.15it/s] 88%|████████▊ | 325116/371472 [3:42:06<4:07:56, 3.12it/s] 88%|████████▊ | 325117/371472 [3:42:06<4:10:09, 3.09it/s] 88%|████████▊ | 325118/371472 [3:42:07<4:06:14, 3.14it/s] 88%|████████▊ | 325119/371472 [3:42:07<4:02:02, 3.19it/s] 88%|████████▊ | 325120/371472 [3:42:07<4:00:08, 3.22it/s] {'loss': 2.6083, 'learning_rate': 2.1236182636988508e-07, 'epoch': 14.0} 88%|████████▊ | 325120/371472 [3:42:07<4:00:08, 3.22it/s] 88%|████████▊ | 325121/371472 [3:42:07<4:03:19, 3.17it/s] 88%|████████▊ | 325122/371472 [3:42:08<3:57:29, 3.25it/s] 88%|████████▊ | 325123/371472 [3:42:08<4:01:03, 3.20it/s] 88%|████████▊ | 325124/371472 [3:42:08<3:47:31, 3.40it/s] 88%|████████▊ | 325125/371472 [3:42:09<3:41:22, 3.49it/s] 88%|████████▊ | 325126/371472 [3:42:09<3:40:10, 3.51it/s] 88%|████████▊ | 325127/371472 [3:42:09<3:36:49, 3.56it/s] 88%|████████▊ | 325128/371472 [3:42:09<3:40:22, 3.50it/s] 88%|████████▊ | 325129/371472 [3:42:10<3:41:51, 3.48it/s] 88%|████████▊ | 325130/371472 [3:42:10<3:51:21, 3.34it/s] 88%|████████▊ | 325131/371472 [3:42:10<3:49:59, 3.36it/s] 88%|████████▊ | 325132/371472 [3:42:11<4:10:19, 3.09it/s] 88%|████████▊ | 325133/371472 [3:42:11<4:42:44, 2.73it/s] 88%|████████▊ | 325134/371472 [3:42:11<4:20:16, 2.97it/s] 88%|████████▊ | 325135/371472 [3:42:12<4:00:59, 3.20it/s] 88%|████████▊ | 325136/371472 [3:42:12<4:10:43, 3.08it/s] 88%|████████▊ | 325137/371472 [3:42:12<4:03:49, 3.17it/s] 88%|████████▊ | 325138/371472 [3:42:13<3:58:15, 3.24it/s] 88%|████████▊ | 325139/371472 [3:42:13<4:06:31, 3.13it/s] 88%|████████▊ | 325140/371472 [3:42:13<3:50:53, 3.34it/s] {'loss': 2.6165, 'learning_rate': 2.1231334439440626e-07, 'epoch': 14.0} 88%|████████▊ | 325140/371472 [3:42:13<3:50:53, 3.34it/s] 88%|████████▊ | 325141/371472 [3:42:14<4:02:59, 3.18it/s] 88%|████████▊ | 325142/371472 [3:42:14<3:59:29, 3.22it/s] 88%|████████▊ | 325143/371472 [3:42:14<4:02:56, 3.18it/s] 88%|████████▊ | 325144/371472 [3:42:15<4:09:21, 3.10it/s] 88%|████████▊ | 325145/371472 [3:42:15<4:10:24, 3.08it/s] 88%|████████▊ | 325146/371472 [3:42:15<4:02:13, 3.19it/s] 88%|████████▊ | 325147/371472 [3:42:15<3:50:11, 3.35it/s] 88%|████████▊ | 325148/371472 [3:42:16<3:47:48, 3.39it/s] 88%|████████▊ | 325149/371472 [3:42:16<3:57:54, 3.25it/s] 88%|████████▊ | 325150/371472 [3:42:16<4:00:11, 3.21it/s] 88%|████████▊ | 325151/371472 [3:42:17<3:53:50, 3.30it/s] 88%|████████▊ | 325152/371472 [3:42:17<3:57:24, 3.25it/s] 88%|████████▊ | 325153/371472 [3:42:17<3:56:26, 3.27it/s] 88%|████████▊ | 325154/371472 [3:42:18<3:54:35, 3.29it/s] 88%|████████▊ | 325155/371472 [3:42:18<3:51:49, 3.33it/s] 88%|████████▊ | 325156/371472 [3:42:18<3:45:18, 3.43it/s] 88%|████████▊ | 325157/371472 [3:42:18<3:52:11, 3.32it/s] 88%|████████▊ | 325158/371472 [3:42:19<3:47:04, 3.40it/s] 88%|████████▊ | 325159/371472 [3:42:19<3:38:22, 3.53it/s] 88%|████████▊ | 325160/371472 [3:42:19<3:50:55, 3.34it/s] {'loss': 2.5899, 'learning_rate': 2.1226486241892733e-07, 'epoch': 14.01} - 88%|████████▊ | 325160/371472 [3:42:19<3:50:55, 3.34it/s] 88%|████████▊ | 325161/371472 [3:42:20<3:46:44, 3.40it/s] 88%|████████▊ | 325162/371472 [3:42:20<3:46:28, 3.41it/s] 88%|████████▊ | 325163/371472 [3:42:20<3:45:52, 3.42it/s] 88%|████████▊ | 325164/371472 [3:42:21<3:48:07, 3.38it/s] 88%|████████▊ | 325165/371472 [3:42:21<3:48:42, 3.37it/s] 88%|████████▊ | 325166/371472 [3:42:21<3:48:46, 3.37it/s] 88%|████████▊ | 325167/371472 [3:42:21<4:02:27, 3.18it/s] 88%|████████▊ | 325168/371472 [3:42:22<3:59:00, 3.23it/s] \ No newline at end of file + 88%|████████▊ | 325160/371472 [3:42:19<3:50:55, 3.34it/s] 88%|████████▊ | 325161/371472 [3:42:20<3:46:44, 3.40it/s] 88%|████████▊ | 325162/371472 [3:42:20<3:46:28, 3.41it/s] 88%|████████▊ | 325163/371472 [3:42:20<3:45:52, 3.42it/s] 88%|████████▊ | 325164/371472 [3:42:21<3:48:07, 3.38it/s] 88%|████████▊ | 325165/371472 [3:42:21<3:48:42, 3.37it/s] 88%|████████▊ | 325166/371472 [3:42:21<3:48:46, 3.37it/s] 88%|████████▊ | 325167/371472 [3:42:21<4:02:27, 3.18it/s] 88%|████████▊ | 325168/371472 [3:42:22<3:59:00, 3.23it/s] 88%|████████▊ | 325169/371472 [3:42:22<4:55:31, 2.61it/s] 88%|████████▊ | 325170/371472 [3:42:23<4:31:56, 2.84it/s] 88%|████████▊ | 325171/371472 [3:42:23<4:26:20, 2.90it/s] 88%|████████▊ | 325172/371472 [3:42:23<4:06:08, 3.14it/s] 88%|████████▊ | 325173/371472 [3:42:24<4:13:40, 3.04it/s] 88%|████████▊ | 325174/371472 [3:42:24<4:02:03, 3.19it/s] 88%|████████▊ | 325175/371472 [3:42:24<4:00:16, 3.21it/s] 88%|████████▊ | 325176/371472 [3:42:24<3:50:54, 3.34it/s] 88%|████████▊ | 325177/371472 [3:42:25<3:52:46, 3.31it/s] 88%|████████▊ | 325178/371472 [3:42:25<3:52:16, 3.32it/s] 88%|████████▊ | 325179/371472 [3:42:25<3:49:00, 3.37it/s] 88%|████████▊ | 325180/371472 [3:42:26<3:39:49, 3.51it/s] {'loss': 2.6185, 'learning_rate': 2.1221638044344846e-07, 'epoch': 14.01} + 88%|████████▊ | 325180/371472 [3:42:26<3:39:49, 3.51it/s] 88%|████████▊ | 325181/371472 [3:42:26<3:36:29, 3.56it/s] 88%|████████▊ | 325182/371472 [3:42:26<3:36:41, 3.56it/s] 88%|████████▊ | 325183/371472 [3:42:26<3:48:57, 3.37it/s] 88%|████████▊ | 325184/371472 [3:42:27<3:44:54, 3.43it/s] 88%|████████▊ | 325185/371472 [3:42:27<3:36:46, 3.56it/s] 88%|████████▊ | 325186/371472 [3:42:27<3:33:04, 3.62it/s] 88%|████████▊ | 325187/371472 [3:42:28<3:41:13, 3.49it/s] 88%|████████▊ | 325188/371472 [3:42:28<3:41:47, 3.48it/s] 88%|████████▊ | 325189/371472 [3:42:28<3:55:18, 3.28it/s] 88%|████████▊ | 325190/371472 [3:42:28<3:41:36, 3.48it/s] 88%|████████▊ | 325191/371472 [3:42:29<3:48:46, 3.37it/s] 88%|████████▊ | 325192/371472 [3:42:29<3:50:09, 3.35it/s] 88%|████████▊ | 325193/371472 [3:42:29<4:02:39, 3.18it/s] 88%|████████▊ | 325194/371472 [3:42:30<3:53:14, 3.31it/s] 88%|████████▊ | 325195/371472 [3:42:30<3:55:08, 3.28it/s] 88%|████████▊ | 325196/371472 [3:42:30<3:54:39, 3.29it/s] 88%|████████▊ | 325197/371472 [3:42:31<3:50:52, 3.34it/s] 88%|████████▊ | 325198/371472 [3:42:31<3:58:33, 3.23it/s] 88%|████████▊ | 325199/371472 [3:42:31<3:46:14, 3.41it/s] 88%|████████▊ | 325200/371472 [3:42:31<3:36:41, 3.56it/s] {'loss': 2.548, 'learning_rate': 2.1216789846796953e-07, 'epoch': 14.01} + 88%|████████▊ | 325200/371472 [3:42:31<3:36:41, 3.56it/s] 88%|████████▊ | 325201/371472 [3:42:32<3:40:34, 3.50it/s] 88%|████████▊ | 325202/371472 [3:42:32<3:42:39, 3.46it/s] 88%|████████▊ | 325203/371472 [3:42:32<3:48:29, 3.37it/s] 88%|████████▊ | 325204/371472 [3:42:33<3:44:28, 3.44it/s] 88%|████████▊ | 325205/371472 [3:42:33<4:10:24, 3.08it/s] 88%|████████▊ | 325206/371472 [3:42:33<3:57:49, 3.24it/s] 88%|████████▊ | 325207/371472 [3:42:34<3:59:07, 3.22it/s] 88%|████████▊ | 325208/371472 [3:42:34<3:50:15, 3.35it/s] 88%|████████▊ | 325209/371472 [3:42:34<4:24:24, 2.92it/s] 88%|████████▊ | 325210/371472 [3:42:35<4:26:47, 2.89it/s] 88%|████████▊ | 325211/371472 [3:42:35<4:10:45, 3.07it/s] 88%|████████▊ | 325212/371472 [3:42:35<4:18:23, 2.98it/s] 88%|████████▊ | 325213/371472 [3:42:36<4:13:00, 3.05it/s] 88%|████████▊ | 325214/371472 [3:42:36<4:02:58, 3.17it/s] 88%|████████▊ | 325215/371472 [3:42:36<4:03:54, 3.16it/s] 88%|████████▊ | 325216/371472 [3:42:37<3:56:43, 3.26it/s] 88%|████████▊ | 325217/371472 [3:42:37<3:56:13, 3.26it/s] 88%|████████▊ | 325218/371472 [3:42:37<3:58:03, 3.24it/s] 88%|████████▊ | 325219/371472 [3:42:37<3:51:59, 3.32it/s] 88%|████████▊ | 325220/371472 [3:42:38<3:41:07, 3.49it/s] {'loss': 2.6094, 'learning_rate': 2.121194164924907e-07, 'epoch': 14.01} + 88%|████████▊ | 325220/371472 [3:42:38<3:41:07, 3.49it/s] 88%|████████▊ | 325221/371472 [3:42:38<3:38:22, 3.53it/s] 88%|████████▊ | 325222/371472 [3:42:38<3:51:29, 3.33it/s] 88%|████████▊ | 325223/371472 [3:42:39<3:38:11, 3.53it/s] 88%|████████▊ | 325224/371472 [3:42:39<3:31:31, 3.64it/s] 88%|████████▊ | 325225/371472 [3:42:39<3:28:24, 3.70it/s] 88%|████████▊ | 325226/371472 [3:42:39<3:22:16, 3.81it/s] 88%|████████▊ | 325227/371472 [3:42:40<3:27:10, 3.72it/s] 88%|████████▊ | 325228/371472 [3:42:40<3:27:27, 3.72it/s] 88%|████████▊ | 325229/371472 [3:42:40<3:44:41, 3.43it/s] 88%|████████▊ | 325230/371472 [3:42:40<3:40:46, 3.49it/s] 88%|████████▊ | 325231/371472 [3:42:41<3:36:26, 3.56it/s] 88%|████████▊ | 325232/371472 [3:42:41<3:34:31, 3.59it/s] 88%|████████▊ | 325233/371472 [3:42:41<3:47:27, 3.39it/s] 88%|████████▊ | 325234/371472 [3:42:42<3:38:28, 3.53it/s] 88%|████████▊ | 325235/371472 [3:42:42<3:32:02, 3.63it/s] 88%|████████▊ | 325236/371472 [3:42:42<3:39:05, 3.52it/s] 88%|████████▊ | 325237/371472 [3:42:42<3:40:00, 3.50it/s] 88%|████████▊ | 325238/371472 [3:42:43<3:49:38, 3.36it/s] 88%|████████▊ | 325239/371472 [3:42:43<3:47:09, 3.39it/s] 88%|████████▊ | 325240/371472 [3:42:43<3:43:32, 3.45it/s] {'loss': 2.7087, 'learning_rate': 2.1207093451701175e-07, 'epoch': 14.01} + 88%|████████▊ | 325240/371472 [3:42:43<3:43:32, 3.45it/s] 88%|████████▊ | 325241/371472 [3:42:44<3:34:23, 3.59it/s] 88%|████████▊ | 325242/371472 [3:42:44<3:37:41, 3.54it/s] 88%|████████▊ | 325243/371472 [3:42:44<3:32:56, 3.62it/s] 88%|████████▊ | 325244/371472 [3:42:44<3:36:08, 3.56it/s] 88%|████████▊ | 325245/371472 [3:42:45<3:38:46, 3.52it/s] 88%|████████▊ | 325246/371472 [3:42:45<3:37:57, 3.53it/s] 88%|████████▊ | 325247/371472 [3:42:45<3:49:40, 3.35it/s] 88%|████████▊ | 325248/371472 [3:42:46<3:44:14, 3.44it/s] 88%|████████▊ | 325249/371472 [3:42:46<3:38:58, 3.52it/s] 88%|████████▊ | 325250/371472 [3:42:46<3:38:16, 3.53it/s] 88%|████████▊ | 325251/371472 [3:42:46<3:34:07, 3.60it/s] 88%|████████▊ | 325252/371472 [3:42:47<3:43:21, 3.45it/s] 88%|████████▊ | 325253/371472 [3:42:47<3:42:22, 3.46it/s] 88%|████████▊ | 325254/371472 [3:42:47<3:43:33, 3.45it/s] 88%|████████▊ | 325255/371472 [3:42:48<3:59:12, 3.22it/s] 88%|████████▊ | 325256/371472 [3:42:48<3:47:43, 3.38it/s] 88%|████████▊ | 325257/371472 [3:42:48<4:08:39, 3.10it/s] 88%|████████▊ | 325258/371472 [3:42:49<4:03:23, 3.16it/s] 88%|████████▊ | 325259/371472 [3:42:49<3:59:05, 3.22it/s] 88%|████████▊ | 325260/371472 [3:42:49<3:45:46, 3.41it/s] {'loss': 2.6854, 'learning_rate': 2.120224525415329e-07, 'epoch': 14.01} + 88%|████████▊ | 325260/371472 [3:42:49<3:45:46, 3.41it/s] 88%|████████▊ | 325261/371472 [3:42:50<4:22:10, 2.94it/s] 88%|████████▊ | 325262/371472 [3:42:50<4:02:59, 3.17it/s] 88%|████████▊ | 325263/371472 [3:42:50<4:08:35, 3.10it/s] 88%|█████���██▊ | 325264/371472 [3:42:50<3:55:36, 3.27it/s] 88%|████████▊ | 325265/371472 [3:42:51<3:51:16, 3.33it/s] 88%|████████▊ | 325266/371472 [3:42:51<3:39:47, 3.50it/s] 88%|████████▊ | 325267/371472 [3:42:51<3:34:46, 3.59it/s] 88%|████████▊ | 325268/371472 [3:42:52<3:26:15, 3.73it/s] 88%|████████▊ | 325269/371472 [3:42:52<3:39:59, 3.50it/s] 88%|████████▊ | 325270/371472 [3:42:52<3:40:32, 3.49it/s] 88%|████████▊ | 325271/371472 [3:42:53<3:59:34, 3.21it/s] 88%|████████▊ | 325272/371472 [3:42:53<4:08:52, 3.09it/s] 88%|████████▊ | 325273/371472 [3:42:53<3:57:11, 3.25it/s] 88%|████████▊ | 325274/371472 [3:42:53<3:48:56, 3.36it/s] 88%|████████▊ | 325275/371472 [3:42:54<3:44:37, 3.43it/s] 88%|████████▊ | 325276/371472 [3:42:54<3:49:57, 3.35it/s] 88%|████████▊ | 325277/371472 [3:42:54<3:52:03, 3.32it/s] 88%|████████▊ | 325278/371472 [3:42:55<3:56:15, 3.26it/s] 88%|████████▊ | 325279/371472 [3:42:55<3:51:56, 3.32it/s] 88%|████████▊ | 325280/371472 [3:42:55<3:51:21, 3.33it/s] {'loss': 2.5596, 'learning_rate': 2.1197397056605397e-07, 'epoch': 14.01} + 88%|████████▊ | 325280/371472 [3:42:55<3:51:21, 3.33it/s] 88%|████████▊ | 325281/371472 [3:42:56<3:55:55, 3.26it/s] 88%|████████▊ | 325282/371472 [3:42:56<3:47:38, 3.38it/s] 88%|████████▊ | 325283/371472 [3:42:56<3:47:49, 3.38it/s] 88%|████████▊ | 325284/371472 [3:42:56<3:45:32, 3.41it/s] 88%|████████▊ | 325285/371472 [3:42:57<3:54:21, 3.28it/s] 88%|████████▊ | 325286/371472 [3:42:57<4:07:08, 3.11it/s] 88%|████████▊ | 325287/371472 [3:42:57<4:03:26, 3.16it/s] 88%|████████▊ | 325288/371472 [3:42:58<4:07:52, 3.11it/s] 88%|████████▊ | 325289/371472 [3:42:58<4:16:35, 3.00it/s] 88%|████████▊ | 325290/371472 [3:42:58<4:02:45, 3.17it/s] 88%|████████▊ | 325291/371472 [3:42:59<3:57:04, 3.25it/s] 88%|████████▊ | 325292/371472 [3:42:59<3:58:09, 3.23it/s] 88%|████████▊ | 325293/371472 [3:42:59<3:50:05, 3.34it/s] 88%|████████▊ | 325294/371472 [3:42:59<3:40:44, 3.49it/s] 88%|████████▊ | 325295/371472 [3:43:00<3:50:29, 3.34it/s] 88%|████████▊ | 325296/371472 [3:43:00<3:37:48, 3.53it/s] 88%|████████▊ | 325297/371472 [3:43:00<3:53:19, 3.30it/s] 88%|████████▊ | 325298/371472 [3:43:01<3:59:10, 3.22it/s] 88%|████████▊ | 325299/371472 [3:43:01<4:00:02, 3.21it/s] 88%|████████▊ | 325300/371472 [3:43:01<3:57:19, 3.24it/s] {'loss': 2.6496, 'learning_rate': 2.1192548859057512e-07, 'epoch': 14.01} + 88%|████████▊ | 325300/371472 [3:43:01<3:57:19, 3.24it/s] 88%|████████▊ | 325301/371472 [3:43:02<4:08:48, 3.09it/s] 88%|████████▊ | 325302/371472 [3:43:02<3:55:25, 3.27it/s] 88%|████████▊ | 325303/371472 [3:43:02<3:52:32, 3.31it/s] 88%|████████▊ | 325304/371472 [3:43:03<3:52:33, 3.31it/s] 88%|████████▊ | 325305/371472 [3:43:03<3:57:29, 3.24it/s] 88%|████████▊ | 325306/371472 [3:43:03<3:57:41, 3.24it/s] 88%|████████▊ | 325307/371472 [3:43:04<4:01:21, 3.19it/s] 88%|████████▊ | 325308/371472 [3:43:04<4:02:31, 3.17it/s] 88%|████████▊ | 325309/371472 [3:43:04<3:53:38, 3.29it/s] 88%|████████▊ | 325310/371472 [3:43:04<3:43:31, 3.44it/s] 88%|████████▊ | 325311/371472 [3:43:05<3:35:34, 3.57it/s] 88%|████████▊ | 325312/371472 [3:43:05<3:44:36, 3.43it/s] 88%|████████▊ | 325313/371472 [3:43:05<3:39:30, 3.50it/s] 88%|████████▊ | 325314/371472 [3:43:05<3:33:34, 3.60it/s] 88%|████████▊ | 325315/371472 [3:43:06<3:48:36, 3.37it/s] 88%|████████▊ | 325316/371472 [3:43:06<3:56:08, 3.26it/s] 88%|████████▊ | 325317/371472 [3:43:06<3:52:03, 3.31it/s] 88%|████████▊ | 325318/371472 [3:43:07<3:53:20, 3.30it/s] 88%|████████▊ | 325319/371472 [3:43:07<3:51:25, 3.32it/s] 88%|████████▊ | 325320/371472 [3:43:07<3:50:24, 3.34it/s] {'loss': 2.6619, 'learning_rate': 2.1187700661509617e-07, 'epoch': 14.01} + 88%|████████▊ | 325320/371472 [3:43:07<3:50:24, 3.34it/s] 88%|████████▊ | 325321/371472 [3:43:08<3:45:24, 3.41it/s] 88%|████████▊ | 325322/371472 [3:43:08<3:37:25, 3.54it/s] 88%|████████▊ | 325323/371472 [3:43:08<3:52:53, 3.30it/s] 88%|████████▊ | 325324/371472 [3:43:09<3:43:53, 3.44it/s] 88%|████████▊ | 325325/371472 [3:43:09<3:50:41, 3.33it/s] 88%|████████▊ | 325326/371472 [3:43:09<3:43:14, 3.45it/s] 88%|████████▊ | 325327/371472 [3:43:09<3:44:29, 3.43it/s] 88%|████████▊ | 325328/371472 [3:43:10<3:42:02, 3.46it/s] 88%|████████▊ | 325329/371472 [3:43:10<3:45:32, 3.41it/s] 88%|████████▊ | 325330/371472 [3:43:10<3:42:39, 3.45it/s] 88%|████████▊ | 325331/371472 [3:43:11<3:54:06, 3.28it/s] 88%|████████▊ | 325332/371472 [3:43:11<3:47:29, 3.38it/s] 88%|████████▊ | 325333/371472 [3:43:11<3:52:21, 3.31it/s] 88%|████████▊ | 325334/371472 [3:43:11<3:52:46, 3.30it/s] 88%|████████▊ | 325335/371472 [3:43:12<3:46:09, 3.40it/s] 88%|████████▊ | 325336/371472 [3:43:12<3:55:44, 3.26it/s] 88%|████████▊ | 325337/371472 [3:43:12<3:57:12, 3.24it/s] 88%|████████▊ | 325338/371472 [3:43:13<3:57:41, 3.23it/s] 88%|████████▊ | 325339/371472 [3:43:13<3:47:38, 3.38it/s] 88%|████████▊ | 325340/371472 [3:43:13<4:09:27, 3.08it/s] {'loss': 2.5829, 'learning_rate': 2.1182852463961735e-07, 'epoch': 14.01} + 88%|████████▊ | 325340/371472 [3:43:13<4:09:27, 3.08it/s] 88%|████████▊ | 325341/371472 [3:43:14<3:56:33, 3.25it/s] 88%|████████▊ | 325342/371472 [3:43:14<3:47:54, 3.37it/s] 88%|████████▊ | 325343/371472 [3:43:14<3:44:01, 3.43it/s] 88%|████████▊ | 325344/371472 [3:43:14<3:41:36, 3.47it/s] 88%|████████▊ | 325345/371472 [3:43:15<3:51:00, 3.33it/s] 88%|████████▊ | 325346/371472 [3:43:15<4:04:34, 3.14it/s] 88%|████████▊ | 325347/371472 [3:43:16<4:22:04, 2.93it/s] 88%|████████▊ | 325348/371472 [3:43:16<4:08:18, 3.10it/s] 88%|████████▊ | 325349/371472 [3:43:16<4:10:03, 3.07it/s] 88%|████████▊ | 325350/371472 [3:43:16<4:06:41, 3.12it/s] 88%|████████▊ | 325351/371472 [3:43:17<4:02:18, 3.17it/s] 88%|████████▊ | 325352/371472 [3:43:17<4:06:57, 3.11it/s] 88%|████████▊ | 325353/371472 [3:43:17<4:04:58, 3.14it/s] 88%|████████▊ | 325354/371472 [3:43:18<3:49:31, 3.35it/s] 88%|████████▊ | 325355/371472 [3:43:18<3:48:24, 3.37it/s] 88%|████████▊ | 325356/371472 [3:43:18<3:37:27, 3.53it/s] 88%|████████▊ | 325357/371472 [3:43:19<3:33:55, 3.59it/s] 88%|████████▊ | 325358/371472 [3:43:19<3:30:05, 3.66it/s] 88%|████████▊ | 325359/371472 [3:43:19<3:56:07, 3.25it/s] 88%|████████▊ | 325360/371472 [3:43:19<3:51:18, 3.32it/s] {'loss': 2.5924, 'learning_rate': 2.117800426641384e-07, 'epoch': 14.01} + 88%|████████▊ | 325360/371472 [3:43:19<3:51:18, 3.32it/s] 88%|████████▊ | 325361/371472 [3:43:20<3:45:21, 3.41it/s] 88%|████████▊ | 325362/371472 [3:43:20<3:38:10, 3.52it/s] 88%|████████▊ | 325363/371472 [3:43:20<3:30:52, 3.64it/s] 88%|████████▊ | 325364/371472 [3:43:20<3:27:19, 3.71it/s] 88%|████████▊ | 325365/371472 [3:43:21<3:35:41, 3.56it/s] 88%|████████▊ | 325366/371472 [3:43:21<3:32:12, 3.62it/s] 88%|████████▊ | 325367/371472 [3:43:21<3:34:21, 3.58it/s] 88%|████████▊ | 325368/371472 [3:43:22<3:40:09, 3.49it/s] 88%|████████▊ | 325369/371472 [3:43:22<3:54:25, 3.28it/s] 88%|████████▊ | 325370/371472 [3:43:22<3:56:23, 3.25it/s] 88%|████████▊ | 325371/371472 [3:43:23<3:52:41, 3.30it/s] 88%|████████▊ | 325372/371472 [3:43:23<3:44:26, 3.42it/s] 88%|████████▊ | 325373/371472 [3:43:23<3:36:16, 3.55it/s] 88%|████████▊ | 325374/371472 [3:43:23<3:43:28, 3.44it/s] 88%|████████▊ | 325375/371472 [3:43:24<3:37:03, 3.54it/s] 88%|████████▊ | 325376/371472 [3:43:24<3:39:26, 3.50it/s] 88%|████████▊ | 325377/371472 [3:43:24<3:36:09, 3.55it/s] 88%|████████▊ | 325378/371472 [3:43:25<3:34:37, 3.58it/s] 88%|████████▊ | 325379/371472 [3:43:25<3:27:55, 3.69it/s] 88%|████████▊ | 325380/371472 [3:43:25<3:36:15, 3.55it/s] {'loss': 2.6212, 'learning_rate': 2.1173156068865954e-07, 'epoch': 14.01} + 88%|████████▊ | 325380/371472 [3:43:25<3:36:15, 3.55it/s] 88%|████████▊ | 325381/371472 [3:43:25<3:44:28, 3.42it/s] 88%|████████▊ | 325382/371472 [3:43:26<3:43:14, 3.44it/s] 88%|████████▊ | 325383/371472 [3:43:26<3:39:05, 3.51it/s] 88%|████████▊ | 325384/371472 [3:43:26<3:56:52, 3.24it/s] 88%|████████▊ | 325385/371472 [3:43:27<4:01:51, 3.18it/s] 88%|████████▊ | 325386/371472 [3:43:27<3:52:05, 3.31it/s] 88%|████████▊ | 325387/371472 [3:43:27<3:49:31, 3.35it/s] 88%|████████▊ | 325388/371472 [3:43:28<3:57:21, 3.24it/s] 88%|████████▊ | 325389/371472 [3:43:28<3:48:41, 3.36it/s] 88%|████████▊ | 325390/371472 [3:43:28<3:48:46, 3.36it/s] 88%|████████▊ | 325391/371472 [3:43:28<3:44:49, 3.42it/s] 88%|████████▊ | 325392/371472 [3:43:29<3:46:18, 3.39it/s] 88%|████████▊ | 325393/371472 [3:43:29<3:36:40, 3.54it/s] 88%|████████▊ | 325394/371472 [3:43:29<3:29:56, 3.66it/s] 88%|████████▊ | 325395/371472 [3:43:29<3:24:30, 3.76it/s] 88%|████████▊ | 325396/371472 [3:43:30<3:20:56, 3.82it/s] 88%|████████▊ | 325397/371472 [3:43:30<3:19:47, 3.84it/s] 88%|████████▊ | 325398/371472 [3:43:30<3:24:35, 3.75it/s] 88%|████████▊ | 325399/371472 [3:43:31<3:21:16, 3.81it/s] 88%|████████▊ | 325400/371472 [3:43:31<3:36:48, 3.54it/s] {'loss': 2.705, 'learning_rate': 2.1168307871318061e-07, 'epoch': 14.02} + 88%|████████▊ | 325400/371472 [3:43:31<3:36:48, 3.54it/s] 88%|████████▊ | 325401/371472 [3:43:31<3:33:27, 3.60it/s] 88%|████████▊ | 325402/371472 [3:43:31<3:24:29, 3.75it/s] 88%|████████▊ | 325403/371472 [3:43:32<3:26:44, 3.71it/s] 88%|████████▊ | 325404/371472 [3:43:32<3:29:22, 3.67it/s] 88%|████████▊ | 325405/371472 [3:43:32<3:34:07, 3.59it/s] 88%|████████▊ | 325406/371472 [3:43:32<3:35:47, 3.56it/s] 88%|████████▊ | 325407/371472 [3:43:33<3:40:24, 3.48it/s] 88%|████████▊ | 325408/371472 [3:43:33<3:35:07, 3.57it/s] 88%|████████▊ | 325409/371472 [3:43:33<3:41:32, 3.47it/s] 88%|████████▊ | 325410/371472 [3:43:34<3:41:19, 3.47it/s] 88%|████████▊ | 325411/371472 [3:43:34<3:56:48, 3.24it/s] 88%|████████▊ | 325412/371472 [3:43:34<3:51:11, 3.32it/s] 88%|████████▊ | 325413/371472 [3:43:35<3:44:58, 3.41it/s] 88%|████████▊ | 325414/371472 [3:43:35<3:36:47, 3.54it/s] 88%|████████▊ | 325415/371472 [3:43:35<3:50:05, 3.34it/s] 88%|████████▊ | 325416/371472 [3:43:35<3:55:42, 3.26it/s] 88%|████████▊ | 325417/371472 [3:43:36<3:46:38, 3.39it/s] 88%|████████▊ | 325418/371472 [3:43:36<3:52:43, 3.30it/s] 88%|████████▊ | 325419/371472 [3:43:36<3:42:04, 3.46it/s] 88%|████████▊ | 325420/371472 [3:43:37<3:46:45, 3.38it/s] {'loss': 2.862, 'learning_rate': 2.1163459673770177e-07, 'epoch': 14.02} + 88%|████████▊ | 325420/371472 [3:43:37<3:46:45, 3.38it/s] 88%|████████▊ | 325421/371472 [3:43:37<3:52:18, 3.30it/s] 88%|████████▊ | 325422/371472 [3:43:37<3:45:40, 3.40it/s] 88%|████████▊ | 325423/371472 [3:43:38<3:54:57, 3.27it/s] 88%|████████▊ | 325424/371472 [3:43:38<3:44:38, 3.42it/s] 88%|████████▊ | 325425/371472 [3:43:38<3:36:25, 3.55it/s] 88%|████████▊ | 325426/371472 [3:43:38<3:46:38, 3.39it/s] 88%|████████▊ | 325427/371472 [3:43:39<3:46:49, 3.38it/s] 88%|████████▊ | 325428/371472 [3:43:39<3:38:48, 3.51it/s] 88%|████████▊ | 325429/371472 [3:43:39<3:36:12, 3.55it/s] 88%|████████▊ | 325430/371472 [3:43:39<3:31:34, 3.63it/s] 88%|████████▊ | 325431/371472 [3:43:40<3:27:20, 3.70it/s] 88%|████████▊ | 325432/371472 [3:43:40<3:26:31, 3.72it/s] 88%|████████▊ | 325433/371472 [3:43:40<4:02:14, 3.17it/s] 88%|████████▊ | 325434/371472 [3:43:41<3:56:50, 3.24it/s] 88%|████████▊ | 325435/371472 [3:43:41<3:57:35, 3.23it/s] 88%|████████▊ | 325436/371472 [3:43:41<4:02:24, 3.17it/s] 88%|████████▊ | 325437/371472 [3:43:42<3:59:37, 3.20it/s] 88%|████████▊ | 325438/371472 [3:43:42<3:48:04, 3.36it/s] 88%|█████��██▊ | 325439/371472 [3:43:42<3:39:26, 3.50it/s] 88%|████████▊ | 325440/371472 [3:43:43<3:47:05, 3.38it/s] {'loss': 2.6015, 'learning_rate': 2.115861147622228e-07, 'epoch': 14.02} + 88%|████████▊ | 325440/371472 [3:43:43<3:47:05, 3.38it/s] 88%|████████▊ | 325441/371472 [3:43:43<3:52:01, 3.31it/s] 88%|████████▊ | 325442/371472 [3:43:43<3:51:12, 3.32it/s] 88%|████████▊ | 325443/371472 [3:43:44<4:05:15, 3.13it/s] 88%|████████▊ | 325444/371472 [3:43:44<4:07:38, 3.10it/s] 88%|████████▊ | 325445/371472 [3:43:44<4:11:34, 3.05it/s] 88%|████████▊ | 325446/371472 [3:43:44<4:05:12, 3.13it/s] 88%|████████▊ | 325447/371472 [3:43:45<3:54:08, 3.28it/s] 88%|████████▊ | 325448/371472 [3:43:45<4:22:37, 2.92it/s] 88%|████████▊ | 325449/371472 [3:43:45<4:11:22, 3.05it/s] 88%|████████▊ | 325450/371472 [3:43:46<3:55:47, 3.25it/s] 88%|████████▊ | 325451/371472 [3:43:46<3:55:43, 3.25it/s] 88%|████████▊ | 325452/371472 [3:43:46<3:55:23, 3.26it/s] 88%|████████▊ | 325453/371472 [3:43:47<3:49:17, 3.35it/s] 88%|████████▊ | 325454/371472 [3:43:47<3:47:00, 3.38it/s] 88%|████████▊ | 325455/371472 [3:43:47<3:46:38, 3.38it/s] 88%|████████▊ | 325456/371472 [3:43:47<3:43:57, 3.42it/s] 88%|████████▊ | 325457/371472 [3:43:48<3:32:31, 3.61it/s] 88%|████████▊ | 325458/371472 [3:43:48<3:30:22, 3.65it/s] 88%|████████▊ | 325459/371472 [3:43:48<3:33:28, 3.59it/s] 88%|████████▊ | 325460/371472 [3:43:49<3:30:14, 3.65it/s] {'loss': 2.5867, 'learning_rate': 2.11537632786744e-07, 'epoch': 14.02} + 88%|████████▊ | 325460/371472 [3:43:49<3:30:14, 3.65it/s] 88%|████████▊ | 325461/371472 [3:43:49<3:30:27, 3.64it/s] 88%|████████▊ | 325462/371472 [3:43:49<3:22:11, 3.79it/s] 88%|████████▊ | 325463/371472 [3:43:49<3:27:32, 3.69it/s] 88%|████████▊ | 325464/371472 [3:43:50<3:24:11, 3.76it/s] 88%|████████▊ | 325465/371472 [3:43:50<3:18:57, 3.85it/s] 88%|████████▊ | 325466/371472 [3:43:50<3:19:47, 3.84it/s] 88%|████████▊ | 325467/371472 [3:43:50<3:19:09, 3.85it/s] 88%|████████▊ | 325468/371472 [3:43:51<3:16:51, 3.89it/s] 88%|████████▊ | 325469/371472 [3:43:51<3:32:43, 3.60it/s] 88%|████████▊ | 325470/371472 [3:43:51<3:26:05, 3.72it/s] 88%|████████▊ | 325471/371472 [3:43:51<3:25:48, 3.73it/s] 88%|████████▊ | 325472/371472 [3:43:52<3:28:49, 3.67it/s] 88%|████████▊ | 325473/371472 [3:43:52<3:29:33, 3.66it/s] 88%|████████▊ | 325474/371472 [3:43:52<3:27:16, 3.70it/s] 88%|████████▊ | 325475/371472 [3:43:53<3:28:19, 3.68it/s] 88%|████████▊ | 325476/371472 [3:43:53<3:48:39, 3.35it/s] 88%|████████▊ | 325477/371472 [3:43:53<3:44:42, 3.41it/s] 88%|████████▊ | 325478/371472 [3:43:54<4:05:06, 3.13it/s] 88%|████████▊ | 325479/371472 [3:43:54<3:45:14, 3.40it/s] 88%|████████▊ | 325480/371472 [3:43:54<3:45:01, 3.41it/s] {'loss': 2.7102, 'learning_rate': 2.1148915081126503e-07, 'epoch': 14.02} + 88%|████████▊ | 325480/371472 [3:43:54<3:45:01, 3.41it/s] 88%|████████▊ | 325481/371472 [3:43:54<3:43:11, 3.43it/s] 88%|████████▊ | 325482/371472 [3:43:55<3:39:13, 3.50it/s] 88%|████████▊ | 325483/371472 [3:43:55<3:28:42, 3.67it/s] 88%|████████▊ | 325484/371472 [3:43:55<3:53:00, 3.29it/s] 88%|████████▊ | 325485/371472 [3:43:56<3:46:01, 3.39it/s] 88%|████████▊ | 325486/371472 [3:43:56<3:35:28, 3.56it/s] 88%|████████▊ | 325487/371472 [3:43:56<3:35:26, 3.56it/s] 88%|████████▊ | 325488/371472 [3:43:56<3:32:21, 3.61it/s] 88%|████████▊ | 325489/371472 [3:43:57<3:46:05, 3.39it/s] 88%|████████▊ | 325490/371472 [3:43:57<3:40:03, 3.48it/s] 88%|████████▊ | 325491/371472 [3:43:57<3:49:27, 3.34it/s] 88%|████████▊ | 325492/371472 [3:43:58<3:53:45, 3.28it/s] 88%|████████▊ | 325493/371472 [3:43:58<3:44:02, 3.42it/s] 88%|████████▊ | 325494/371472 [3:43:58<3:37:33, 3.52it/s] 88%|████████▊ | 325495/371472 [3:43:58<3:26:38, 3.71it/s] 88%|████████▊ | 325496/371472 [3:43:59<3:40:33, 3.47it/s] 88%|████████▊ | 325497/371472 [3:43:59<3:42:10, 3.45it/s] 88%|████████▊ | 325498/371472 [3:43:59<3:33:46, 3.58it/s] 88%|████████▊ | 325499/371472 [3:43:59<3:26:00, 3.72it/s] 88%|████████▊ | 325500/371472 [3:44:00<3:18:44, 3.86it/s] {'loss': 2.4939, 'learning_rate': 2.1144066883578618e-07, 'epoch': 14.02} + 88%|████████▊ | 325500/371472 [3:44:00<3:18:44, 3.86it/s] 88%|████████▊ | 325501/371472 [3:44:00<3:17:47, 3.87it/s] 88%|████████▊ | 325502/371472 [3:44:00<3:27:33, 3.69it/s] 88%|████████▊ | 325503/371472 [3:44:01<3:44:26, 3.41it/s] 88%|████████▊ | 325504/371472 [3:44:01<3:42:39, 3.44it/s] 88%|████████▊ | 325505/371472 [3:44:01<3:34:26, 3.57it/s] 88%|████████▊ | 325506/371472 [3:44:01<3:28:11, 3.68it/s] 88%|████████▊ | 325507/371472 [3:44:02<3:22:32, 3.78it/s] 88%|████████▊ | 325508/371472 [3:44:02<3:18:57, 3.85it/s] 88%|████████▊ | 325509/371472 [3:44:02<3:16:57, 3.89it/s] 88%|████████▊ | 325510/371472 [3:44:03<3:32:39, 3.60it/s] 88%|████████▊ | 325511/371472 [3:44:03<3:27:26, 3.69it/s] 88%|████████▊ | 325512/371472 [3:44:03<3:23:31, 3.76it/s] 88%|████████▊ | 325513/371472 [3:44:03<3:24:15, 3.75it/s] 88%|████████▊ | 325514/371472 [3:44:04<3:49:18, 3.34it/s] 88%|████████▊ | 325515/371472 [3:44:04<3:46:46, 3.38it/s] 88%|████████▊ | 325516/371472 [3:44:04<3:33:22, 3.59it/s] 88%|████████▊ | 325517/371472 [3:44:04<3:41:07, 3.46it/s] 88%|████████▊ | 325518/371472 [3:44:05<3:59:37, 3.20it/s] 88%|████████▊ | 325519/371472 [3:44:05<4:06:11, 3.11it/s] 88%|████████▊ | 325520/371472 [3:44:05<3:58:52, 3.21it/s] {'loss': 2.577, 'learning_rate': 2.1139218686030725e-07, 'epoch': 14.02} + 88%|████████▊ | 325520/371472 [3:44:05<3:58:52, 3.21it/s] 88%|████████▊ | 325521/371472 [3:44:06<3:41:56, 3.45it/s] 88%|████████▊ | 325522/371472 [3:44:06<3:41:37, 3.46it/s] 88%|████████▊ | 325523/371472 [3:44:06<3:36:26, 3.54it/s] 88%|████████▊ | 325524/371472 [3:44:07<3:31:56, 3.61it/s] 88%|████████▊ | 325525/371472 [3:44:07<3:40:22, 3.47it/s] 88%|████████▊ | 325526/371472 [3:44:07<3:32:27, 3.60it/s] 88%|████████▊ | 325527/371472 [3:44:07<3:25:15, 3.73it/s] 88%|████████▊ | 325528/371472 [3:44:08<3:22:23, 3.78it/s] 88%|████████▊ | 325529/371472 [3:44:08<3:28:42, 3.67it/s] 88%|████████▊ | 325530/371472 [3:44:08<3:20:27, 3.82it/s] 88%|████████▊ | 325531/371472 [3:44:08<3:18:00, 3.87it/s] 88%|████████▊ | 325532/371472 [3:44:09<3:17:09, 3.88it/s] 88%|████████▊ | 325533/371472 [3:44:09<3:21:32, 3.80it/s] 88%|████████▊ | 325534/371472 [3:44:09<3:24:27, 3.74it/s] 88%|████████▊ | 325535/371472 [3:44:09<3:24:52, 3.74it/s] 88%|████████▊ | 325536/371472 [3:44:10<3:26:44, 3.70it/s] 88%|████████▊ | 325537/371472 [3:44:10<3:22:22, 3.78it/s] 88%|████████▊ | 325538/371472 [3:44:10<3:33:05, 3.59it/s] 88%|████████▊ | 325539/371472 [3:44:11<3:29:39, 3.65it/s] 88%|████████▊ | 325540/371472 [3:44:11<3:30:24, 3.64it/s] {'loss': 2.5337, 'learning_rate': 2.113437048848284e-07, 'epoch': 14.02} + 88%|████████▊ | 325540/371472 [3:44:11<3:30:24, 3.64it/s] 88%|████████▊ | 325541/371472 [3:44:11<3:32:58, 3.59it/s] 88%|████████▊ | 325542/371472 [3:44:11<3:26:26, 3.71it/s] 88%|████████▊ | 325543/371472 [3:44:12<3:19:05, 3.84it/s] 88%|████████▊ | 325544/371472 [3:44:12<3:15:29, 3.92it/s] 88%|████████▊ | 325545/371472 [3:44:12<3:20:18, 3.82it/s] 88%|████████▊ | 325546/371472 [3:44:12<3:27:45, 3.68it/s] 88%|████████▊ | 325547/371472 [3:44:13<3:22:05, 3.79it/s] 88%|████████▊ | 325548/371472 [3:44:13<3:16:46, 3.89it/s] 88%|████████▊ | 325549/371472 [3:44:13<3:23:49, 3.76it/s] 88%|████████▊ | 325550/371472 [3:44:14<3:28:42, 3.67it/s] 88%|████████▊ | 325551/371472 [3:44:14<3:19:12, 3.84it/s] 88%|████████▊ | 325552/371472 [3:44:14<3:27:57, 3.68it/s] 88%|████████▊ | 325553/371472 [3:44:14<3:28:37, 3.67it/s] 88%|██��█████▊ | 325554/371472 [3:44:15<3:34:26, 3.57it/s] 88%|████████▊ | 325555/371472 [3:44:15<3:41:59, 3.45it/s] 88%|████████▊ | 325556/371472 [3:44:15<3:39:53, 3.48it/s] 88%|████████▊ | 325557/371472 [3:44:15<3:32:26, 3.60it/s] 88%|████████▊ | 325558/371472 [3:44:16<3:31:00, 3.63it/s] 88%|████████▊ | 325559/371472 [3:44:16<3:42:24, 3.44it/s] 88%|████████▊ | 325560/371472 [3:44:16<3:35:57, 3.54it/s] {'loss': 2.657, 'learning_rate': 2.1129522290934945e-07, 'epoch': 14.02} + 88%|████████▊ | 325560/371472 [3:44:16<3:35:57, 3.54it/s] 88%|████████▊ | 325561/371472 [3:44:17<3:31:43, 3.61it/s] 88%|████████▊ | 325562/371472 [3:44:17<3:40:45, 3.47it/s] 88%|████████▊ | 325563/371472 [3:44:17<3:57:37, 3.22it/s] 88%|████████▊ | 325564/371472 [3:44:18<3:45:08, 3.40it/s] 88%|████████▊ | 325565/371472 [3:44:18<3:47:59, 3.36it/s] 88%|████████▊ | 325566/371472 [3:44:18<3:51:41, 3.30it/s] 88%|████████▊ | 325567/371472 [3:44:18<3:46:43, 3.37it/s] 88%|████████▊ | 325568/371472 [3:44:19<3:39:06, 3.49it/s] 88%|████████▊ | 325569/371472 [3:44:19<3:35:19, 3.55it/s] 88%|████████▊ | 325570/371472 [3:44:19<3:40:09, 3.47it/s] 88%|████████▊ | 325571/371472 [3:44:19<3:30:08, 3.64it/s] 88%|████████▊ | 325572/371472 [3:44:20<3:43:27, 3.42it/s] 88%|████████▊ | 325573/371472 [3:44:20<3:35:29, 3.55it/s] 88%|████████▊ | 325574/371472 [3:44:20<3:31:52, 3.61it/s] 88%|████████▊ | 325575/371472 [3:44:21<3:25:13, 3.73it/s] 88%|████████▊ | 325576/371472 [3:44:21<3:26:19, 3.71it/s] 88%|████████▊ | 325577/371472 [3:44:21<3:34:16, 3.57it/s] 88%|████████▊ | 325578/371472 [3:44:21<3:35:37, 3.55it/s] 88%|████████▊ | 325579/371472 [3:44:22<3:28:26, 3.67it/s] 88%|████████▊ | 325580/371472 [3:44:22<3:27:04, 3.69it/s] {'loss': 2.7199, 'learning_rate': 2.1124674093387052e-07, 'epoch': 14.02} + 88%|████████▊ | 325580/371472 [3:44:22<3:27:04, 3.69it/s] 88%|████████▊ | 325581/371472 [3:44:22<3:29:50, 3.64it/s] 88%|████████▊ | 325582/371472 [3:44:23<3:24:35, 3.74it/s] 88%|████████▊ | 325583/371472 [3:44:23<3:33:39, 3.58it/s] 88%|████████▊ | 325584/371472 [3:44:23<3:30:19, 3.64it/s] 88%|████████▊ | 325585/371472 [3:44:23<3:56:44, 3.23it/s] 88%|████████▊ | 325586/371472 [3:44:24<3:44:37, 3.40it/s] 88%|████████▊ | 325587/371472 [3:44:24<3:45:48, 3.39it/s] 88%|████████▊ | 325588/371472 [3:44:24<4:04:32, 3.13it/s] 88%|████████▊ | 325589/371472 [3:44:25<3:52:48, 3.28it/s] 88%|████████▊ | 325590/371472 [3:44:25<3:41:29, 3.45it/s] 88%|████████▊ | 325591/371472 [3:44:25<3:45:56, 3.38it/s] 88%|████████▊ | 325592/371472 [3:44:26<3:40:20, 3.47it/s] 88%|████████▊ | 325593/371472 [3:44:26<3:30:27, 3.63it/s] 88%|████████▊ | 325594/371472 [3:44:26<3:30:11, 3.64it/s] 88%|████████▊ | 325595/371472 [3:44:26<3:38:33, 3.50it/s] 88%|████████▊ | 325596/371472 [3:44:27<3:30:55, 3.62it/s] 88%|████████▊ | 325597/371472 [3:44:27<3:25:23, 3.72it/s] 88%|████████▊ | 325598/371472 [3:44:27<3:25:46, 3.72it/s] 88%|████████▊ | 325599/371472 [3:44:27<3:21:13, 3.80it/s] 88%|████████▊ | 325600/371472 [3:44:28<3:25:45, 3.72it/s] {'loss': 2.5682, 'learning_rate': 2.111982589583917e-07, 'epoch': 14.02} + 88%|████████▊ | 325600/371472 [3:44:28<3:25:45, 3.72it/s] 88%|████████▊ | 325601/371472 [3:44:28<3:20:23, 3.82it/s] 88%|████████▊ | 325602/371472 [3:44:28<3:22:18, 3.78it/s] 88%|████████▊ | 325603/371472 [3:44:28<3:24:47, 3.73it/s] 88%|████████▊ | 325604/371472 [3:44:29<3:22:06, 3.78it/s] 88%|████████▊ | 325605/371472 [3:44:29<3:35:01, 3.56it/s] 88%|████████▊ | 325606/371472 [3:44:29<3:27:11, 3.69it/s] 88%|████████▊ | 325607/371472 [3:44:30<3:46:24, 3.38it/s] 88%|████████▊ | 325608/371472 [3:44:30<3:43:15, 3.42it/s] 88%|████████▊ | 325609/371472 [3:44:30<3:39:58, 3.47it/s] 88%|████████▊ | 325610/371472 [3:44:30<3:43:47, 3.42it/s] 88%|████████▊ | 325611/371472 [3:44:31<3:35:36, 3.55it/s] 88%|████████▊ | 325612/371472 [3:44:31<3:30:13, 3.64it/s] 88%|████████▊ | 325613/371472 [3:44:31<3:36:23, 3.53it/s] 88%|████████▊ | 325614/371472 [3:44:32<3:26:07, 3.71it/s] 88%|████████▊ | 325615/371472 [3:44:32<3:24:40, 3.73it/s] 88%|████████▊ | 325616/371472 [3:44:32<3:25:12, 3.72it/s] 88%|████████▊ | 325617/371472 [3:44:32<3:28:45, 3.66it/s] 88%|████████▊ | 325618/371472 [3:44:33<3:27:05, 3.69it/s] 88%|████████▊ | 325619/371472 [3:44:33<3:23:43, 3.75it/s] 88%|████████▊ | 325620/371472 [3:44:33<3:22:11, 3.78it/s] {'loss': 2.5102, 'learning_rate': 2.1114977698291274e-07, 'epoch': 14.03} + 88%|████████▊ | 325620/371472 [3:44:33<3:22:11, 3.78it/s] 88%|████████▊ | 325621/371472 [3:44:33<3:18:04, 3.86it/s] 88%|████████▊ | 325622/371472 [3:44:34<3:19:30, 3.83it/s] 88%|████████▊ | 325623/371472 [3:44:34<3:30:44, 3.63it/s] 88%|████████▊ | 325624/371472 [3:44:34<3:34:27, 3.56it/s] 88%|████████▊ | 325625/371472 [3:44:35<3:29:28, 3.65it/s] 88%|████████▊ | 325626/371472 [3:44:35<3:46:23, 3.38it/s] 88%|████████▊ | 325627/371472 [3:44:35<3:40:24, 3.47it/s] 88%|████████▊ | 325628/371472 [3:44:35<3:45:49, 3.38it/s] 88%|████████▊ | 325629/371472 [3:44:36<3:56:38, 3.23it/s] 88%|████████▊ | 325630/371472 [3:44:36<3:41:34, 3.45it/s] 88%|████████▊ | 325631/371472 [3:44:36<3:52:12, 3.29it/s] 88%|████████▊ | 325632/371472 [3:44:37<3:42:12, 3.44it/s] 88%|████████▊ | 325633/371472 [3:44:37<3:32:11, 3.60it/s] 88%|████████▊ | 325634/371472 [3:44:37<3:28:30, 3.66it/s] 88%|████████▊ | 325635/371472 [3:44:37<3:41:12, 3.45it/s] 88%|████████▊ | 325636/371472 [3:44:38<3:55:59, 3.24it/s] 88%|████████▊ | 325637/371472 [3:44:38<4:04:07, 3.13it/s] 88%|████████▊ | 325638/371472 [3:44:38<3:58:23, 3.20it/s] 88%|████████▊ | 325639/371472 [3:44:39<3:44:46, 3.40it/s] 88%|████████▊ | 325640/371472 [3:44:39<3:35:17, 3.55it/s] {'loss': 2.5134, 'learning_rate': 2.111012950074339e-07, 'epoch': 14.03} + 88%|████████▊ | 325640/371472 [3:44:39<3:35:17, 3.55it/s] 88%|████████▊ | 325641/371472 [3:44:39<3:33:42, 3.57it/s] 88%|████████▊ | 325642/371472 [3:44:40<3:33:40, 3.57it/s] 88%|████████▊ | 325643/371472 [3:44:40<3:39:25, 3.48it/s] 88%|████████▊ | 325644/371472 [3:44:40<3:28:07, 3.67it/s] 88%|████████▊ | 325645/371472 [3:44:40<3:40:15, 3.47it/s] 88%|████████▊ | 325646/371472 [3:44:41<3:31:20, 3.61it/s] 88%|████████▊ | 325647/371472 [3:44:41<3:29:06, 3.65it/s] 88%|████████▊ | 325648/371472 [3:44:41<3:40:45, 3.46it/s] 88%|████████▊ | 325649/371472 [3:44:41<3:35:22, 3.55it/s] 88%|████████▊ | 325650/371472 [3:44:42<3:32:15, 3.60it/s] 88%|████████▊ | 325651/371472 [3:44:42<3:28:29, 3.66it/s] 88%|████████▊ | 325652/371472 [3:44:42<3:24:51, 3.73it/s] 88%|████████▊ | 325653/371472 [3:44:43<3:28:06, 3.67it/s] 88%|████████▊ | 325654/371472 [3:44:43<3:28:13, 3.67it/s] 88%|████████▊ | 325655/371472 [3:44:43<3:21:21, 3.79it/s] 88%|████████▊ | 325656/371472 [3:44:43<3:48:27, 3.34it/s] 88%|████████▊ | 325657/371472 [3:44:44<3:37:22, 3.51it/s] 88%|████████▊ | 325658/371472 [3:44:44<3:30:00, 3.64it/s] 88%|████████▊ | 325659/371472 [3:44:44<3:29:54, 3.64it/s] 88%|████████▊ | 325660/371472 [3:44:44<3:26:12, 3.70it/s] {'loss': 2.7421, 'learning_rate': 2.1105281303195497e-07, 'epoch': 14.03} + 88%|████████▊ | 325660/371472 [3:44:45<3:26:12, 3.70it/s] 88%|████████▊ | 325661/371472 [3:44:45<3:44:39, 3.40it/s] 88%|████████▊ | 325662/371472 [3:44:45<3:40:06, 3.47it/s] 88%|████████▊ | 325663/371472 [3:44:45<3:27:33, 3.68it/s] 88%|████████▊ | 325664/371472 [3:44:46<3:19:25, 3.83it/s] 88%|████████▊ | 325665/371472 [3:44:46<3:25:01, 3.72it/s] 88%|████████▊ | 325666/371472 [3:44:46<3:32:50, 3.59it/s] 88%|████████▊ | 325667/371472 [3:44:46<3:31:17, 3.61it/s] 88%|████████▊ | 325668/371472 [3:44:47<3:38:05, 3.50it/s] 88%|████████▊ | 325669/371472 [3:44:47<3:37:30, 3.51it/s] 88%|████████▊ | 325670/371472 [3:44:47<3:29:50, 3.64it/s] 88%|████████▊ | 325671/371472 [3:44:48<3:25:44, 3.71it/s] 88%|████████▊ | 325672/371472 [3:44:48<3:19:56, 3.82it/s] 88%|████████▊ | 325673/371472 [3:44:48<3:23:39, 3.75it/s] 88%|████████▊ | 325674/371472 [3:44:48<3:14:32, 3.92it/s] 88%|████████▊ | 325675/371472 [3:44:49<3:42:10, 3.44it/s] 88%|████████▊ | 325676/371472 [3:44:49<3:35:58, 3.53it/s] 88%|████████▊ | 325677/371472 [3:44:49<3:41:12, 3.45it/s] 88%|████████▊ | 325678/371472 [3:44:49<3:31:23, 3.61it/s] 88%|████████▊ | 325679/371472 [3:44:50<3:36:30, 3.53it/s] 88%|████████▊ | 325680/371472 [3:44:50<3:29:29, 3.64it/s] {'loss': 2.6455, 'learning_rate': 2.1100433105647612e-07, 'epoch': 14.03} + 88%|████████▊ | 325680/371472 [3:44:50<3:29:29, 3.64it/s] 88%|████████▊ | 325681/371472 [3:44:50<3:27:16, 3.68it/s] 88%|████████▊ | 325682/371472 [3:44:51<3:27:19, 3.68it/s] 88%|████████▊ | 325683/371472 [3:44:51<3:23:02, 3.76it/s] 88%|████████▊ | 325684/371472 [3:44:51<3:26:47, 3.69it/s] 88%|████████▊ | 325685/371472 [3:44:51<3:34:22, 3.56it/s] 88%|████████▊ | 325686/371472 [3:44:52<3:36:29, 3.52it/s] 88%|████████▊ | 325687/371472 [3:44:52<3:33:07, 3.58it/s] 88%|████████▊ | 325688/371472 [3:44:52<3:34:21, 3.56it/s] 88%|████████▊ | 325689/371472 [3:44:53<3:31:36, 3.61it/s] 88%|████████▊ | 325690/371472 [3:44:53<3:24:33, 3.73it/s] 88%|████████▊ | 325691/371472 [3:44:53<3:21:52, 3.78it/s] 88%|████████▊ | 325692/371472 [3:44:53<3:24:02, 3.74it/s] 88%|████████▊ | 325693/371472 [3:44:54<3:23:15, 3.75it/s] 88%|████████▊ | 325694/371472 [3:44:54<3:26:42, 3.69it/s] 88%|████████▊ | 325695/371472 [3:44:54<3:27:11, 3.68it/s] 88%|████████▊ | 325696/371472 [3:44:54<3:29:05, 3.65it/s] 88%|████████▊ | 325697/371472 [3:44:55<3:37:38, 3.51it/s] 88%|████████▊ | 325698/371472 [3:44:55<3:31:57, 3.60it/s] 88%|████████▊ | 325699/371472 [3:44:55<3:33:05, 3.58it/s] 88%|████████▊ | 325700/371472 [3:44:56<3:37:07, 3.51it/s] {'loss': 2.6902, 'learning_rate': 2.1095584908099716e-07, 'epoch': 14.03} + 88%|████████▊ | 325700/371472 [3:44:56<3:37:07, 3.51it/s] 88%|████████▊ | 325701/371472 [3:44:56<3:49:34, 3.32it/s] 88%|████████▊ | 325702/371472 [3:44:56<3:58:24, 3.20it/s] 88%|████████▊ | 325703/371472 [3:44:57<3:53:34, 3.27it/s] 88%|████████▊ | 325704/371472 [3:44:57<3:40:29, 3.46it/s] 88%|████████▊ | 325705/371472 [3:44:57<3:33:26, 3.57it/s] 88%|████████▊ | 325706/371472 [3:44:57<3:36:42, 3.52it/s] 88%|████████▊ | 325707/371472 [3:44:58<3:36:36, 3.52it/s] 88%|████████▊ | 325708/371472 [3:44:58<3:27:07, 3.68it/s] 88%|████████▊ | 325709/371472 [3:44:58<3:21:52, 3.78it/s] 88%|████████▊ | 325710/371472 [3:44:58<3:22:41, 3.76it/s] 88%|████████▊ | 325711/371472 [3:44:59<3:49:26, 3.32it/s] 88%|████████▊ | 325712/371472 [3:44:59<3:53:17, 3.27it/s] 88%|████████▊ | 325713/371472 [3:44:59<3:56:46, 3.22it/s] 88%|████████▊ | 325714/371472 [3:45:00<3:54:09, 3.26it/s] 88%|████████▊ | 325715/371472 [3:45:00<3:53:48, 3.26it/s] 88%|████████▊ | 325716/371472 [3:45:00<3:50:21, 3.31it/s] 88%|████████▊ | 325717/371472 [3:45:01<3:44:48, 3.39it/s] 88%|████████▊ | 325718/371472 [3:45:01<3:37:55, 3.50it/s] 88%|████████▊ | 325719/371472 [3:45:01<3:37:07, 3.51it/s] 88%|████████▊ | 325720/371472 [3:45:01<3:37:29, 3.51it/s] {'loss': 2.6748, 'learning_rate': 2.1090736710551834e-07, 'epoch': 14.03} + 88%|████████▊ | 325720/371472 [3:45:01<3:37:29, 3.51it/s] 88%|████████▊ | 325721/371472 [3:45:02<3:35:57, 3.53it/s] 88%|████████▊ | 325722/371472 [3:45:02<3:33:09, 3.58it/s] 88%|████████▊ | 325723/371472 [3:45:02<3:30:54, 3.62it/s] 88%|████████▊ | 325724/371472 [3:45:03<3:45:37, 3.38it/s] 88%|████████▊ | 325725/371472 [3:45:03<3:39:00, 3.48it/s] 88%|████████▊ | 325726/371472 [3:45:03<3:30:12, 3.63it/s] 88%|████████▊ | 325727/371472 [3:45:03<3:25:05, 3.72it/s] 88%|████████▊ | 325728/371472 [3:45:04<3:24:56, 3.72it/s] 88%|████████▊ | 325729/371472 [3:45:04<3:32:45, 3.58it/s] 88%|████████▊ | 325730/371472 [3:45:04<3:36:33, 3.52it/s] 88%|████████▊ | 325731/371472 [3:45:04<3:30:34, 3.62it/s] 88%|████████▊ | 325732/371472 [3:45:05<3:24:20, 3.73it/s] 88%|████████▊ | 325733/371472 [3:45:05<3:18:47, 3.83it/s] 88%|████████▊ | 325734/371472 [3:45:05<3:23:56, 3.74it/s] 88%|████████▊ | 325735/371472 [3:45:06<3:25:08, 3.72it/s] 88%|████████▊ | 325736/371472 [3:45:06<3:22:02, 3.77it/s] 88%|████████▊ | 325737/371472 [3:45:06<3:36:30, 3.52it/s] 88%|████████▊ | 325738/371472 [3:45:06<3:58:46, 3.19it/s] 88%|████████▊ | 325739/371472 [3:45:07<3:54:21, 3.25it/s] 88%|████████▊ | 325740/371472 [3:45:07<3:42:33, 3.42it/s] {'loss': 2.5247, 'learning_rate': 2.1085888513003939e-07, 'epoch': 14.03} + 88%|████████▊ | 325740/371472 [3:45:07<3:42:33, 3.42it/s] 88%|████████▊ | 325741/371472 [3:45:07<3:36:41, 3.52it/s] 88%|████████▊ | 325742/371472 [3:45:08<3:38:47, 3.48it/s] 88%|████████▊ | 325743/371472 [3:45:08<3:32:52, 3.58it/s] 88%|████████▊ | 325744/371472 [3:45:08<3:32:29, 3.59it/s] 88%|████████▊ | 325745/371472 [3:45:08<3:38:35, 3.49it/s] 88%|████████▊ | 325746/371472 [3:45:09<3:31:14, 3.61it/s] 88%|████████▊ | 325747/371472 [3:45:09<3:27:33, 3.67it/s] 88%|████████▊ | 325748/371472 [3:45:09<3:31:15, 3.61it/s] 88%|████████▊ | 325749/371472 [3:45:10<3:36:06, 3.53it/s] 88%|████████▊ | 325750/371472 [3:45:10<3:36:23, 3.52it/s] 88%|████████▊ | 325751/371472 [3:45:10<3:30:20, 3.62it/s] 88%|████████▊ | 325752/371472 [3:45:10<3:22:20, 3.77it/s] 88%|████████▊ | 325753/371472 [3:45:11<3:30:08, 3.63it/s] 88%|████████▊ | 325754/371472 [3:45:11<3:34:56, 3.54it/s] 88%|████████▊ | 325755/371472 [3:45:11<3:48:16, 3.34it/s] 88%|████████▊ | 325756/371472 [3:45:12<3:38:56, 3.48it/s] 88%|████████▊ | 325757/371472 [3:45:12<3:32:01, 3.59it/s] 88%|████████▊ | 325758/371472 [3:45:12<3:33:20, 3.57it/s] 88%|████████▊ | 325759/371472 [3:45:12<3:28:49, 3.65it/s] 88%|████████▊ | 325760/371472 [3:45:13<3:32:13, 3.59it/s] {'loss': 2.7505, 'learning_rate': 2.1081040315456054e-07, 'epoch': 14.03} + 88%|████████▊ | 325760/371472 [3:45:13<3:32:13, 3.59it/s] 88%|████████▊ | 325761/371472 [3:45:13<3:54:09, 3.25it/s] 88%|████████▊ | 325762/371472 [3:45:13<3:48:28, 3.33it/s] 88%|████████▊ | 325763/371472 [3:45:14<3:54:13, 3.25it/s] 88%|████████▊ | 325764/371472 [3:45:14<3:47:11, 3.35it/s] 88%|████████▊ | 325765/371472 [3:45:14<3:39:38, 3.47it/s] 88%|████████▊ | 325766/371472 [3:45:14<3:34:56, 3.54it/s] 88%|████████▊ | 325767/371472 [3:45:15<3:39:01, 3.48it/s] 88%|████████▊ | 325768/371472 [3:45:15<3:31:52, 3.60it/s] 88%|████████▊ | 325769/371472 [3:45:15<3:25:50, 3.70it/s] 88%|████████▊ | 325770/371472 [3:45:15<3:24:11, 3.73it/s] 88%|████████▊ | 325771/371472 [3:45:16<3:49:03, 3.33it/s] 88%|████████▊ | 325772/371472 [3:45:16<3:41:39, 3.44it/s] 88%|████████▊ | 325773/371472 [3:45:16<3:34:23, 3.55it/s] 88%|████████▊ | 325774/371472 [3:45:17<3:32:31, 3.58it/s] 88%|████████▊ | 325775/371472 [3:45:17<3:27:39, 3.67it/s] 88%|████████▊ | 325776/371472 [3:45:17<3:19:12, 3.82it/s] 88%|████████▊ | 325777/371472 [3:45:17<3:24:19, 3.73it/s] 88%|████████▊ | 325778/371472 [3:45:18<3:26:44, 3.68it/s] 88%|████████▊ | 325779/371472 [3:45:18<3:26:24, 3.69it/s] 88%|████████▊ | 325780/371472 [3:45:18<3:31:20, 3.60it/s] {'loss': 2.5805, 'learning_rate': 2.107619211790816e-07, 'epoch': 14.03} + 88%|████████▊ | 325780/371472 [3:45:18<3:31:20, 3.60it/s] 88%|████████▊ | 325781/371472 [3:45:19<3:35:16, 3.54it/s] 88%|████████▊ | 325782/371472 [3:45:19<3:34:36, 3.55it/s] 88%|████████▊ | 325783/371472 [3:45:19<3:33:22, 3.57it/s] 88%|████████▊ | 325784/371472 [3:45:19<3:26:57, 3.68it/s] 88%|████████▊ | 325785/371472 [3:45:20<3:35:12, 3.54it/s] 88%|████████▊ | 325786/371472 [3:45:20<3:27:34, 3.67it/s] 88%|████████▊ | 325787/371472 [3:45:20<3:34:27, 3.55it/s] 88%|████████▊ | 325788/371472 [3:45:20<3:30:07, 3.62it/s] 88%|████████▊ | 325789/371472 [3:45:21<3:32:28, 3.58it/s] 88%|████████▊ | 325790/371472 [3:45:21<3:38:41, 3.48it/s] 88%|████████▊ | 325791/371472 [3:45:21<3:50:03, 3.31it/s] 88%|████████▊ | 325792/371472 [3:45:22<3:39:20, 3.47it/s] 88%|████████▊ | 325793/371472 [3:45:22<3:29:58, 3.63it/s] 88%|████████▊ | 325794/371472 [3:45:22<3:30:56, 3.61it/s] 88%|████████▊ | 325795/371472 [3:45:22<3:29:31, 3.63it/s] 88%|████████▊ | 325796/371472 [3:45:23<3:25:12, 3.71it/s] 88%|████████▊ | 325797/371472 [3:45:23<3:23:54, 3.73it/s] 88%|████████▊ | 325798/371472 [3:45:23<3:17:50, 3.85it/s] 88%|████████▊ | 325799/371472 [3:45:24<3:32:50, 3.58it/s] 88%|████████▊ | 325800/371472 [3:45:24<3:27:36, 3.67it/s] {'loss': 2.6958, 'learning_rate': 2.1071343920360276e-07, 'epoch': 14.03} + 88%|████████▊ | 325800/371472 [3:45:24<3:27:36, 3.67it/s] 88%|████████▊ | 325801/371472 [3:45:24<3:42:28, 3.42it/s] 88%|████████▊ | 325802/371472 [3:45:24<3:52:43, 3.27it/s] 88%|████████▊ | 325803/371472 [3:45:25<3:48:02, 3.34it/s] 88%|████████▊ | 325804/371472 [3:45:25<3:51:07, 3.29it/s] 88%|████████▊ | 325805/371472 [3:45:25<3:53:29, 3.26it/s] 88%|████████▊ | 325806/371472 [3:45:26<3:42:11, 3.43it/s] 88%|████████▊ | 325807/371472 [3:45:26<3:52:49, 3.27it/s] 88%|████████▊ | 325808/371472 [3:45:26<3:58:28, 3.19it/s] 88%|████████▊ | 325809/371472 [3:45:27<3:41:44, 3.43it/s] 88%|████████▊ | 325810/371472 [3:45:27<3:38:08, 3.49it/s] 88%|████████▊ | 325811/371472 [3:45:27<3:38:13, 3.49it/s] 88%|████████▊ | 325812/371472 [3:45:27<3:30:06, 3.62it/s] 88%|████████▊ | 325813/371472 [3:45:28<3:30:12, 3.62it/s] 88%|████████▊ | 325814/371472 [3:45:28<3:31:50, 3.59it/s] 88%|████████▊ | 325815/371472 [3:45:28<3:39:38, 3.46it/s] 88%|████████▊ | 325816/371472 [3:45:29<3:49:45, 3.31it/s] 88%|████████▊ | 325817/371472 [3:45:29<3:32:49, 3.58it/s] 88%|████████▊ | 325818/371472 [3:45:29<3:31:46, 3.59it/s] 88%|████████▊ | 325819/371472 [3:45:29<3:29:17, 3.64it/s] 88%|████████▊ | 325820/371472 [3:45:30<3:21:24, 3.78it/s] {'loss': 2.5002, 'learning_rate': 2.106649572281238e-07, 'epoch': 14.03} + 88%|████████▊ | 325820/371472 [3:45:30<3:21:24, 3.78it/s] 88%|████████▊ | 325821/371472 [3:45:30<3:18:57, 3.82it/s] 88%|████████▊ | 325822/371472 [3:45:30<3:19:37, 3.81it/s] 88%|████████▊ | 325823/371472 [3:45:30<3:19:17, 3.82it/s] 88%|████████▊ | 325824/371472 [3:45:31<3:26:13, 3.69it/s] 88%|████████▊ | 325825/371472 [3:45:31<3:24:14, 3.73it/s] 88%|████████▊ | 325826/371472 [3:45:31<3:25:00, 3.71it/s] 88%|████████▊ | 325827/371472 [3:45:31<3:23:38, 3.74it/s] 88%|████████▊ | 325828/371472 [3:45:32<3:19:56, 3.80it/s] 88%|████████▊ | 325829/371472 [3:45:32<3:31:43, 3.59it/s] 88%|████████▊ | 325830/371472 [3:45:32<3:30:47, 3.61it/s] 88%|████████▊ | 325831/371472 [3:45:33<3:36:10, 3.52it/s] 88%|████████▊ | 325832/371472 [3:45:33<3:51:42, 3.28it/s] 88%|████████▊ | 325833/371472 [3:45:33<3:40:15, 3.45it/s] 88%|████████▊ | 325834/371472 [3:45:33<3:36:20, 3.52it/s] 88%|████████▊ | 325835/371472 [3:45:34<3:27:42, 3.66it/s] 88%|████████▊ | 325836/371472 [3:45:34<3:21:24, 3.78it/s] 88%|████████▊ | 325837/371472 [3:45:34<3:32:10, 3.58it/s] 88%|████████▊ | 325838/371472 [3:45:35<3:28:30, 3.65it/s] 88%|████████▊ | 325839/371472 [3:45:35<3:23:04, 3.75it/s] 88%|████████▊ | 325840/371472 [3:45:35<3:30:54, 3.61it/s] {'loss': 2.5137, 'learning_rate': 2.1061647525264498e-07, 'epoch': 14.03} + 88%|████████▊ | 325840/371472 [3:45:35<3:30:54, 3.61it/s] 88%|█████��██▊ | 325841/371472 [3:45:35<3:25:17, 3.70it/s] 88%|████████▊ | 325842/371472 [3:45:36<3:28:45, 3.64it/s] 88%|████████▊ | 325843/371472 [3:45:36<3:30:19, 3.62it/s] 88%|████████▊ | 325844/371472 [3:45:36<3:36:11, 3.52it/s] 88%|████████▊ | 325845/371472 [3:45:37<3:41:27, 3.43it/s] 88%|████████▊ | 325846/371472 [3:45:37<3:41:57, 3.43it/s] 88%|████████▊ | 325847/371472 [3:45:37<3:36:30, 3.51it/s] 88%|████████▊ | 325848/371472 [3:45:37<3:36:01, 3.52it/s] 88%|████████▊ | 325849/371472 [3:45:38<3:31:18, 3.60it/s] 88%|████████▊ | 325850/371472 [3:45:38<3:35:42, 3.53it/s] 88%|████████▊ | 325851/371472 [3:45:38<3:25:47, 3.69it/s] 88%|████████▊ | 325852/371472 [3:45:38<3:26:44, 3.68it/s] 88%|████████▊ | 325853/371472 [3:45:39<3:24:54, 3.71it/s] 88%|████████▊ | 325854/371472 [3:45:39<3:20:33, 3.79it/s] 88%|████████▊ | 325855/371472 [3:45:39<3:18:55, 3.82it/s] 88%|████████▊ | 325856/371472 [3:45:39<3:18:31, 3.83it/s] 88%|████████▊ | 325857/371472 [3:45:40<3:24:15, 3.72it/s] 88%|████████▊ | 325858/371472 [3:45:40<3:32:09, 3.58it/s] 88%|████████▊ | 325859/371472 [3:45:40<3:34:34, 3.54it/s] 88%|████████▊ | 325860/371472 [3:45:41<3:30:53, 3.60it/s] {'loss': 2.6137, 'learning_rate': 2.1056799327716605e-07, 'epoch': 14.04} + 88%|████████▊ | 325860/371472 [3:45:41<3:30:53, 3.60it/s] 88%|████████▊ | 325861/371472 [3:45:41<3:25:40, 3.70it/s] 88%|████████▊ | 325862/371472 [3:45:41<3:22:23, 3.76it/s] 88%|████████▊ | 325863/371472 [3:45:41<3:30:30, 3.61it/s] 88%|████████▊ | 325864/371472 [3:45:42<3:23:10, 3.74it/s] 88%|████████▊ | 325865/371472 [3:45:42<3:37:23, 3.50it/s] 88%|████████▊ | 325866/371472 [3:45:42<3:29:42, 3.62it/s] 88%|████████▊ | 325867/371472 [3:45:43<3:35:10, 3.53it/s] 88%|████████▊ | 325868/371472 [3:45:43<3:26:39, 3.68it/s] 88%|████████▊ | 325869/371472 [3:45:43<3:20:43, 3.79it/s] 88%|████████▊ | 325870/371472 [3:45:43<3:17:17, 3.85it/s] 88%|████████▊ | 325871/371472 [3:45:44<3:14:12, 3.91it/s] 88%|████████▊ | 325872/371472 [3:45:44<3:20:25, 3.79it/s] 88%|████████▊ | 325873/371472 [3:45:44<3:21:23, 3.77it/s] 88%|████████▊ | 325874/371472 [3:45:44<3:19:25, 3.81it/s] 88%|████████▊ | 325875/371472 [3:45:45<3:18:50, 3.82it/s] 88%|████████▊ | 325876/371472 [3:45:45<3:19:15, 3.81it/s] 88%|████████▊ | 325877/371472 [3:45:45<3:15:49, 3.88it/s] 88%|████████▊ | 325878/371472 [3:45:45<3:24:09, 3.72it/s] 88%|████████▊ | 325879/371472 [3:45:46<3:20:04, 3.80it/s] 88%|████████▊ | 325880/371472 [3:45:46<3:29:53, 3.62it/s] {'loss': 2.8274, 'learning_rate': 2.1051951130168718e-07, 'epoch': 14.04} + 88%|████████▊ | 325880/371472 [3:45:46<3:29:53, 3.62it/s] 88%|████████▊ | 325881/371472 [3:45:46<3:31:00, 3.60it/s] 88%|████████▊ | 325882/371472 [3:45:47<3:26:15, 3.68it/s] 88%|████████▊ | 325883/371472 [3:45:47<3:23:46, 3.73it/s] 88%|████████▊ | 325884/371472 [3:45:47<3:23:34, 3.73it/s] 88%|████████▊ | 325885/371472 [3:45:47<3:24:45, 3.71it/s] 88%|████████▊ | 325886/371472 [3:45:48<3:26:15, 3.68it/s] 88%|████████▊ | 325887/371472 [3:45:48<3:31:25, 3.59it/s] 88%|████████▊ | 325888/371472 [3:45:48<3:49:43, 3.31it/s] 88%|████████▊ | 325889/371472 [3:45:49<3:41:43, 3.43it/s] 88%|████████▊ | 325890/371472 [3:45:49<3:38:02, 3.48it/s] 88%|████████▊ | 325891/371472 [3:45:49<3:33:43, 3.55it/s] 88%|████████▊ | 325892/371472 [3:45:49<3:31:15, 3.60it/s] 88%|████████▊ | 325893/371472 [3:45:50<3:36:26, 3.51it/s] 88%|████████▊ | 325894/371472 [3:45:50<3:36:52, 3.50it/s] 88%|████████▊ | 325895/371472 [3:45:50<3:28:10, 3.65it/s] 88%|████████▊ | 325896/371472 [3:45:50<3:22:26, 3.75it/s] 88%|████████▊ | 325897/371472 [3:45:51<3:21:22, 3.77it/s] 88%|████████▊ | 325898/371472 [3:45:51<3:30:22, 3.61it/s] 88%|████████▊ | 325899/371472 [3:45:51<3:26:38, 3.68it/s] 88%|████████▊ | 325900/371472 [3:45:51<3:18:51, 3.82it/s] {'loss': 2.5865, 'learning_rate': 2.1047102932620825e-07, 'epoch': 14.04} + 88%|████████▊ | 325900/371472 [3:45:51<3:18:51, 3.82it/s] 88%|████████▊ | 325901/371472 [3:45:52<3:23:17, 3.74it/s] 88%|████████▊ | 325902/371472 [3:45:52<3:19:03, 3.82it/s] 88%|████████▊ | 325903/371472 [3:45:52<3:17:40, 3.84it/s] 88%|████████▊ | 325904/371472 [3:45:53<3:40:08, 3.45it/s] 88%|████████▊ | 325905/371472 [3:45:53<3:49:21, 3.31it/s] 88%|████████▊ | 325906/371472 [3:45:53<3:48:45, 3.32it/s] 88%|████████▊ | 325907/371472 [3:45:54<4:18:37, 2.94it/s] 88%|████████▊ | 325908/371472 [3:45:54<4:18:03, 2.94it/s] 88%|████████▊ | 325909/371472 [3:45:54<4:02:32, 3.13it/s] 88%|████████▊ | 325910/371472 [3:45:55<3:47:21, 3.34it/s] 88%|████████▊ | 325911/371472 [3:45:55<3:37:23, 3.49it/s] 88%|████████▊ | 325912/371472 [3:45:55<3:30:06, 3.61it/s] 88%|████████▊ | 325913/371472 [3:45:55<3:33:06, 3.56it/s] 88%|████████▊ | 325914/371472 [3:45:56<3:38:35, 3.47it/s] 88%|████████▊ | 325915/371472 [3:45:56<3:28:33, 3.64it/s] 88%|████████▊ | 325916/371472 [3:45:56<3:31:09, 3.60it/s] 88%|████████▊ | 325917/371472 [3:45:57<3:46:04, 3.36it/s] 88%|████████▊ | 325918/371472 [3:45:57<3:35:00, 3.53it/s] 88%|████████▊ | 325919/371472 [3:45:57<3:25:28, 3.69it/s] 88%|████████▊ | 325920/371472 [3:45:57<3:29:42, 3.62it/s] {'loss': 2.6856, 'learning_rate': 2.1042254735072943e-07, 'epoch': 14.04} + 88%|████████▊ | 325920/371472 [3:45:57<3:29:42, 3.62it/s] 88%|████████▊ | 325921/371472 [3:45:58<3:23:47, 3.73it/s] 88%|████████▊ | 325922/371472 [3:45:58<3:25:46, 3.69it/s] 88%|████████▊ | 325923/371472 [3:45:58<3:36:58, 3.50it/s] 88%|████████▊ | 325924/371472 [3:45:59<4:14:53, 2.98it/s] 88%|████████▊ | 325925/371472 [3:45:59<4:08:28, 3.06it/s] 88%|████████▊ | 325926/371472 [3:45:59<3:50:05, 3.30it/s] 88%|████████▊ | 325927/371472 [3:46:00<3:57:43, 3.19it/s] 88%|████████▊ | 325928/371472 [3:46:00<3:45:29, 3.37it/s] 88%|████████▊ | 325929/371472 [3:46:00<3:49:30, 3.31it/s] 88%|████████▊ | 325930/371472 [3:46:00<4:04:44, 3.10it/s] 88%|████████▊ | 325931/371472 [3:46:01<3:49:55, 3.30it/s] 88%|████████▊ | 325932/371472 [3:46:01<3:51:11, 3.28it/s] 88%|████████▊ | 325933/371472 [3:46:01<3:45:45, 3.36it/s] 88%|████████▊ | 325934/371472 [3:46:02<3:35:29, 3.52it/s] 88%|████████▊ | 325935/371472 [3:46:02<3:32:18, 3.57it/s] 88%|████████▊ | 325936/371472 [3:46:02<3:32:15, 3.58it/s] 88%|████████▊ | 325937/371472 [3:46:02<3:28:10, 3.65it/s] 88%|████████▊ | 325938/371472 [3:46:03<3:19:58, 3.80it/s] 88%|████████▊ | 325939/371472 [3:46:03<3:46:25, 3.35it/s] 88%|████████▊ | 325940/371472 [3:46:03<3:32:59, 3.56it/s] {'loss': 2.6057, 'learning_rate': 2.1037406537525047e-07, 'epoch': 14.04} + 88%|████████▊ | 325940/371472 [3:46:03<3:32:59, 3.56it/s] 88%|████████▊ | 325941/371472 [3:46:03<3:22:50, 3.74it/s] 88%|████████▊ | 325942/371472 [3:46:04<3:26:28, 3.68it/s] 88%|████████▊ | 325943/371472 [3:46:04<3:32:40, 3.57it/s] 88%|████████▊ | 325944/371472 [3:46:04<3:39:20, 3.46it/s] 88%|████████▊ | 325945/371472 [3:46:05<3:35:03, 3.53it/s] 88%|████████▊ | 325946/371472 [3:46:05<4:37:55, 2.73it/s] 88%|████████▊ | 325947/371472 [3:46:05<4:19:50, 2.92it/s] 88%|████████▊ | 325948/371472 [3:46:06<4:04:56, 3.10it/s] 88%|████████▊ | 325949/371472 [3:46:06<4:15:38, 2.97it/s] 88%|████████▊ | 325950/371472 [3:46:06<4:25:42, 2.86it/s] 88%|████████▊ | 325951/371472 [3:46:07<4:08:13, 3.06it/s] 88%|████████▊ | 325952/371472 [3:46:07<4:01:05, 3.15it/s] 88%|████████▊ | 325953/371472 [3:46:07<3:54:50, 3.23it/s] 88%|████████▊ | 325954/371472 [3:46:08<3:43:29, 3.39it/s] 88%|████████▊ | 325955/371472 [3:46:08<4:00:32, 3.15it/s] 88%|████████▊ | 325956/371472 [3:46:08<3:47:48, 3.33it/s] 88%|████████▊ | 325957/371472 [3:46:08<3:34:49, 3.53it/s] 88%|████████▊ | 325958/371472 [3:46:09<3:49:44, 3.30it/s] 88%|████████▊ | 325959/371472 [3:46:09<3:42:37, 3.41it/s] 88%|████████▊ | 325960/371472 [3:46:09<3:38:34, 3.47it/s] {'loss': 2.4845, 'learning_rate': 2.1032558339977162e-07, 'epoch': 14.04} + 88%|████████▊ | 325960/371472 [3:46:09<3:38:34, 3.47it/s] 88%|████████▊ | 325961/371472 [3:46:10<3:34:22, 3.54it/s] 88%|████████▊ | 325962/371472 [3:46:10<3:33:21, 3.55it/s] 88%|████████▊ | 325963/371472 [3:46:10<3:29:29, 3.62it/s] 88%|████████▊ | 325964/371472 [3:46:10<3:21:24, 3.77it/s] 88%|████████▊ | 325965/371472 [3:46:11<3:17:20, 3.84it/s] 88%|████████▊ | 325966/371472 [3:46:11<3:20:22, 3.79it/s] 88%|████████▊ | 325967/371472 [3:46:11<3:16:29, 3.86it/s] 88%|████████▊ | 325968/371472 [3:46:11<3:23:32, 3.73it/s] 88%|████████▊ | 325969/371472 [3:46:12<3:35:35, 3.52it/s] 88%|████████▊ | 325970/371472 [3:46:12<3:29:04, 3.63it/s] 88%|████████▊ | 325971/371472 [3:46:12<3:44:07, 3.38it/s] 88%|████████▊ | 325972/371472 [3:46:13<3:47:22, 3.34it/s] 88%|████████▊ | 325973/371472 [3:46:13<3:42:20, 3.41it/s] 88%|████████▊ | 325974/371472 [3:46:13<3:38:41, 3.47it/s] 88%|████████▊ | 325975/371472 [3:46:14<3:35:13, 3.52it/s] 88%|████████▊ | 325976/371472 [3:46:14<3:34:55, 3.53it/s] 88%|████████▊ | 325977/371472 [3:46:14<3:33:06, 3.56it/s] 88%|████████▊ | 325978/371472 [3:46:14<3:50:17, 3.29it/s] 88%|████████▊ | 325979/371472 [3:46:15<3:42:39, 3.41it/s] 88%|████████▊ | 325980/371472 [3:46:15<3:39:02, 3.46it/s] {'loss': 2.5936, 'learning_rate': 2.102771014242927e-07, 'epoch': 14.04} + 88%|████████▊ | 325980/371472 [3:46:15<3:39:02, 3.46it/s] 88%|████████▊ | 325981/371472 [3:46:15<3:28:49, 3.63it/s] 88%|████████▊ | 325982/371472 [3:46:16<3:26:55, 3.66it/s] 88%|████████▊ | 325983/371472 [3:46:16<3:17:54, 3.83it/s] 88%|████████▊ | 325984/371472 [3:46:16<3:32:45, 3.56it/s] 88%|████████▊ | 325985/371472 [3:46:16<3:40:55, 3.43it/s] 88%|████████▊ | 325986/371472 [3:46:17<3:37:33, 3.48it/s] 88%|████████▊ | 325987/371472 [3:46:17<3:40:34, 3.44it/s] 88%|████████▊ | 325988/371472 [3:46:17<3:30:45, 3.60it/s] 88%|████████▊ | 325989/371472 [3:46:17<3:22:46, 3.74it/s] 88%|████████▊ | 325990/371472 [3:46:18<3:26:06, 3.68it/s] 88%|████████▊ | 325991/371472 [3:46:18<3:28:09, 3.64it/s] 88%|████████▊ | 325992/371472 [3:46:18<3:30:31, 3.60it/s] 88%|████████▊ | 325993/371472 [3:46:19<3:33:25, 3.55it/s] 88%|████████▊ | 325994/371472 [3:46:19<3:46:56, 3.34it/s] 88%|████████▊ | 325995/371472 [3:46:19<3:37:09, 3.49it/s] 88%|████████▊ | 325996/371472 [3:46:19<3:23:24, 3.73it/s] 88%|████████▊ | 325997/371472 [3:46:20<3:23:13, 3.73it/s] 88%|████████▊ | 325998/371472 [3:46:20<3:18:43, 3.81it/s] 88%|████████▊ | 325999/371472 [3:46:20<3:19:58, 3.79it/s] 88%|████████▊ | 326000/371472 [3:46:20<3:17:23, 3.84it/s] {'loss': 2.6098, 'learning_rate': 2.1022861944881384e-07, 'epoch': 14.04} + 88%|████████▊ | 326000/371472 [3:46:20<3:17:23, 3.84it/s] 88%|████████▊ | 326001/371472 [3:46:21<3:29:18, 3.62it/s] 88%|████████▊ | 326002/371472 [3:46:21<3:34:41, 3.53it/s] 88%|████████▊ | 326003/371472 [3:46:21<3:53:04, 3.25it/s] 88%|████████▊ | 326004/371472 [3:46:22<3:42:50, 3.40it/s] 88%|████████▊ | 326005/371472 [3:46:22<3:38:38, 3.47it/s] 88%|████████▊ | 326006/371472 [3:46:22<3:24:20, 3.71it/s] 88%|████████▊ | 326007/371472 [3:46:23<3:30:25, 3.60it/s] 88%|████████▊ | 326008/371472 [3:46:23<3:26:43, 3.67it/s] 88%|████████▊ | 326009/371472 [3:46:23<3:43:07, 3.40it/s] 88%|████████▊ | 326010/371472 [3:46:23<3:50:38, 3.29it/s] 88%|████████▊ | 326011/371472 [3:46:24<3:36:50, 3.49it/s] 88%|████████▊ | 326012/371472 [3:46:24<3:38:01, 3.48it/s] 88%|████████▊ | 326013/371472 [3:46:24<3:28:51, 3.63it/s] 88%|████████▊ | 326014/371472 [3:46:24<3:23:55, 3.72it/s] 88%|████████▊ | 326015/371472 [3:46:25<3:31:21, 3.58it/s] 88%|█████���██▊ | 326016/371472 [3:46:25<3:25:32, 3.69it/s] 88%|████████▊ | 326017/371472 [3:46:25<3:21:45, 3.75it/s] 88%|████████▊ | 326018/371472 [3:46:26<3:23:10, 3.73it/s] 88%|████████▊ | 326019/371472 [3:46:26<3:27:01, 3.66it/s] 88%|████████▊ | 326020/371472 [3:46:26<3:24:41, 3.70it/s] {'loss': 2.6872, 'learning_rate': 2.101801374733349e-07, 'epoch': 14.04} + 88%|████████▊ | 326020/371472 [3:46:26<3:24:41, 3.70it/s] 88%|████████▊ | 326021/371472 [3:46:26<3:28:02, 3.64it/s] 88%|████████▊ | 326022/371472 [3:46:27<3:30:11, 3.60it/s] 88%|████████▊ | 326023/371472 [3:46:27<3:32:48, 3.56it/s] 88%|████████▊ | 326024/371472 [3:46:27<3:30:13, 3.60it/s] 88%|████████▊ | 326025/371472 [3:46:27<3:22:06, 3.75it/s] 88%|████████▊ | 326026/371472 [3:46:28<3:26:51, 3.66it/s] 88%|████████▊ | 326027/371472 [3:46:28<3:20:58, 3.77it/s] 88%|████████▊ | 326028/371472 [3:46:28<3:25:44, 3.68it/s] 88%|████████▊ | 326029/371472 [3:46:29<3:26:29, 3.67it/s] 88%|████████▊ | 326030/371472 [3:46:29<3:31:05, 3.59it/s] 88%|████████▊ | 326031/371472 [3:46:29<3:28:07, 3.64it/s] 88%|████████▊ | 326032/371472 [3:46:29<3:40:09, 3.44it/s] 88%|████████▊ | 326033/371472 [3:46:30<3:36:37, 3.50it/s] 88%|████████▊ | 326034/371472 [3:46:30<3:38:18, 3.47it/s] 88%|████████▊ | 326035/371472 [3:46:30<3:34:52, 3.52it/s] 88%|████████▊ | 326036/371472 [3:46:31<3:39:17, 3.45it/s] 88%|████████▊ | 326037/371472 [3:46:31<3:31:56, 3.57it/s] 88%|████████▊ | 326038/371472 [3:46:31<3:22:34, 3.74it/s] 88%|████████▊ | 326039/371472 [3:46:31<3:17:57, 3.83it/s] 88%|████████▊ | 326040/371472 [3:46:32<3:25:14, 3.69it/s] {'loss': 2.6471, 'learning_rate': 2.1013165549785604e-07, 'epoch': 14.04} + 88%|████████▊ | 326040/371472 [3:46:32<3:25:14, 3.69it/s] 88%|████████▊ | 326041/371472 [3:46:32<3:19:44, 3.79it/s] 88%|████████▊ | 326042/371472 [3:46:32<3:19:28, 3.80it/s] 88%|████████▊ | 326043/371472 [3:46:32<3:18:41, 3.81it/s] 88%|████████▊ | 326044/371472 [3:46:33<3:29:09, 3.62it/s] 88%|████████▊ | 326045/371472 [3:46:33<3:33:03, 3.55it/s] 88%|████████▊ | 326046/371472 [3:46:33<3:26:59, 3.66it/s] 88%|████████▊ | 326047/371472 [3:46:34<3:23:20, 3.72it/s] 88%|████████▊ | 326048/371472 [3:46:34<3:23:08, 3.73it/s] 88%|████████▊ | 326049/371472 [3:46:34<3:19:36, 3.79it/s] 88%|████████▊ | 326050/371472 [3:46:34<3:37:26, 3.48it/s] 88%|████████▊ | 326051/371472 [3:46:35<3:30:20, 3.60it/s] 88%|████████▊ | 326052/371472 [3:46:35<3:27:26, 3.65it/s] 88%|████████▊ | 326053/371472 [3:46:35<3:27:35, 3.65it/s] 88%|████████▊ | 326054/371472 [3:46:36<3:42:53, 3.40it/s] 88%|████████▊ | 326055/371472 [3:46:36<3:37:15, 3.48it/s] 88%|████████▊ | 326056/371472 [3:46:36<3:38:45, 3.46it/s] 88%|████████▊ | 326057/371472 [3:46:36<3:31:00, 3.59it/s] 88%|████████▊ | 326058/371472 [3:46:37<3:25:59, 3.67it/s] 88%|████████▊ | 326059/371472 [3:46:37<3:27:19, 3.65it/s] 88%|████████▊ | 326060/371472 [3:46:37<3:24:55, 3.69it/s] {'loss': 2.6495, 'learning_rate': 2.100831735223771e-07, 'epoch': 14.04} + 88%|████████▊ | 326060/371472 [3:46:37<3:24:55, 3.69it/s] 88%|████████▊ | 326061/371472 [3:46:37<3:21:23, 3.76it/s] 88%|████████▊ | 326062/371472 [3:46:38<3:26:13, 3.67it/s] 88%|████████▊ | 326063/371472 [3:46:38<3:23:51, 3.71it/s] 88%|████████▊ | 326064/371472 [3:46:38<3:21:20, 3.76it/s] 88%|████████▊ | 326065/371472 [3:46:38<3:24:22, 3.70it/s] 88%|████████▊ | 326066/371472 [3:46:39<3:28:22, 3.63it/s] 88%|████████▊ | 326067/371472 [3:46:39<3:28:03, 3.64it/s] 88%|████████▊ | 326068/371472 [3:46:39<3:36:28, 3.50it/s] 88%|████████▊ | 326069/371472 [3:46:40<3:29:32, 3.61it/s] 88%|████████▊ | 326070/371472 [3:46:40<3:26:49, 3.66it/s] 88%|████████▊ | 326071/371472 [3:46:40<3:29:49, 3.61it/s] 88%|████████▊ | 326072/371472 [3:46:40<3:28:48, 3.62it/s] 88%|████████▊ | 326073/371472 [3:46:41<3:22:54, 3.73it/s] 88%|████████▊ | 326074/371472 [3:46:41<3:22:39, 3.73it/s] 88%|████████▊ | 326075/371472 [3:46:41<3:26:23, 3.67it/s] 88%|████████▊ | 326076/371472 [3:46:41<3:21:50, 3.75it/s] 88%|████████▊ | 326077/371472 [3:46:42<3:15:23, 3.87it/s] 88%|████████▊ | 326078/371472 [3:46:42<3:17:20, 3.83it/s] 88%|████████▊ | 326079/371472 [3:46:42<3:28:36, 3.63it/s] 88%|████████▊ | 326080/371472 [3:46:43<3:21:34, 3.75it/s] {'loss': 2.5127, 'learning_rate': 2.1003469154689826e-07, 'epoch': 14.04} + 88%|████████▊ | 326080/371472 [3:46:43<3:21:34, 3.75it/s] 88%|████████▊ | 326081/371472 [3:46:43<3:25:44, 3.68it/s] 88%|████████▊ | 326082/371472 [3:46:43<3:25:04, 3.69it/s] 88%|████████▊ | 326083/371472 [3:46:43<3:36:14, 3.50it/s] 88%|████████▊ | 326084/371472 [3:46:44<3:44:57, 3.36it/s] 88%|████████▊ | 326085/371472 [3:46:44<3:53:07, 3.24it/s] 88%|████████▊ | 326086/371472 [3:46:44<3:41:50, 3.41it/s] 88%|████████▊ | 326087/371472 [3:46:45<3:32:14, 3.56it/s] 88%|████████▊ | 326088/371472 [3:46:45<3:35:33, 3.51it/s] 88%|████████▊ | 326089/371472 [3:46:45<3:25:39, 3.68it/s] 88%|████████▊ | 326090/371472 [3:46:45<3:18:24, 3.81it/s] 88%|████████▊ | 326091/371472 [3:46:46<3:21:09, 3.76it/s] 88%|████████▊ | 326092/371472 [3:46:46<3:21:24, 3.76it/s] 88%|████████▊ | 326093/371472 [3:46:46<3:37:22, 3.48it/s] 88%|████████▊ | 326094/371472 [3:46:46<3:26:47, 3.66it/s] 88%|████████▊ | 326095/371472 [3:46:47<3:26:32, 3.66it/s] 88%|████████▊ | 326096/371472 [3:46:47<3:22:47, 3.73it/s] 88%|████████▊ | 326097/371472 [3:46:47<3:23:19, 3.72it/s] 88%|████████▊ | 326098/371472 [3:46:48<3:20:52, 3.76it/s] 88%|████████▊ | 326099/371472 [3:46:48<3:23:49, 3.71it/s] 88%|████████▊ | 326100/371472 [3:46:48<4:31:20, 2.79it/s] {'loss': 2.5968, 'learning_rate': 2.0998620957141933e-07, 'epoch': 14.05} + 88%|████████▊ | 326100/371472 [3:46:48<4:31:20, 2.79it/s] 88%|████████▊ | 326101/371472 [3:46:49<4:24:15, 2.86it/s] 88%|████████▊ | 326102/371472 [3:46:49<4:22:39, 2.88it/s] 88%|████████▊ | 326103/371472 [3:46:49<4:04:26, 3.09it/s] 88%|████████▊ | 326104/371472 [3:46:50<3:47:58, 3.32it/s] 88%|████████▊ | 326105/371472 [3:46:50<3:37:09, 3.48it/s] 88%|████████▊ | 326106/371472 [3:46:50<3:30:06, 3.60it/s] 88%|████████▊ | 326107/371472 [3:46:50<3:25:22, 3.68it/s] 88%|████████▊ | 326108/371472 [3:46:51<3:22:59, 3.72it/s] 88%|████████▊ | 326109/371472 [3:46:51<3:21:12, 3.76it/s] 88%|████████▊ | 326110/371472 [3:46:51<3:22:09, 3.74it/s] 88%|████████▊ | 326111/371472 [3:46:51<3:30:58, 3.58it/s] 88%|████████▊ | 326112/371472 [3:46:52<3:27:30, 3.64it/s] 88%|████████▊ | 326113/371472 [3:46:52<3:34:26, 3.53it/s] 88%|████████▊ | 326114/371472 [3:46:52<3:37:03, 3.48it/s] 88%|████████▊ | 326115/371472 [3:46:53<3:34:18, 3.53it/s] 88%|████████▊ | 326116/371472 [3:46:53<3:32:11, 3.56it/s] 88%|████████▊ | 326117/371472 [3:46:53<3:42:08, 3.40it/s] 88%|████████▊ | 326118/371472 [3:46:53<3:37:49, 3.47it/s] 88%|████████▊ | 326119/371472 [3:46:54<3:39:16, 3.45it/s] 88%|████████▊ | 326120/371472 [3:46:54<3:27:29, 3.64it/s] {'loss': 2.7458, 'learning_rate': 2.0993772759594038e-07, 'epoch': 14.05} + 88%|████████▊ | 326120/371472 [3:46:54<3:27:29, 3.64it/s] 88%|████████▊ | 326121/371472 [3:46:54<3:21:53, 3.74it/s] 88%|████████▊ | 326122/371472 [3:46:55<3:24:15, 3.70it/s] 88%|████████▊ | 326123/371472 [3:46:55<3:20:33, 3.77it/s] 88%|████████▊ | 326124/371472 [3:46:55<3:43:41, 3.38it/s] 88%|████████▊ | 326125/371472 [3:46:55<3:56:10, 3.20it/s] 88%|████████▊ | 326126/371472 [3:46:56<3:45:23, 3.35it/s] 88%|████████▊ | 326127/371472 [3:46:56<3:36:48, 3.49it/s] 88%|████████▊ | 326128/371472 [3:46:56<3:52:53, 3.24it/s] 88%|████████▊ | 326129/371472 [3:46:57<3:45:03, 3.36it/s] 88%|████████▊ | 326130/371472 [3:46:57<3:38:23, 3.46it/s] 88%|█���██████▊ | 326131/371472 [3:46:57<3:28:49, 3.62it/s] 88%|████████▊ | 326132/371472 [3:46:57<3:38:09, 3.46it/s] 88%|████████▊ | 326133/371472 [3:46:58<3:28:26, 3.63it/s] 88%|████████▊ | 326134/371472 [3:46:58<3:26:12, 3.66it/s] 88%|████████▊ | 326135/371472 [3:46:58<3:18:53, 3.80it/s] 88%|████████▊ | 326136/371472 [3:46:58<3:13:58, 3.90it/s] 88%|████████▊ | 326137/371472 [3:46:59<3:15:41, 3.86it/s] 88%|████████▊ | 326138/371472 [3:46:59<3:29:03, 3.61it/s] 88%|████████▊ | 326139/371472 [3:46:59<3:33:29, 3.54it/s] 88%|████████▊ | 326140/371472 [3:47:00<3:24:29, 3.69it/s] {'loss': 2.5864, 'learning_rate': 2.0988924562046153e-07, 'epoch': 14.05} + 88%|████████▊ | 326140/371472 [3:47:00<3:24:29, 3.69it/s] 88%|████████▊ | 326141/371472 [3:47:00<3:21:17, 3.75it/s] 88%|████████▊ | 326142/371472 [3:47:00<3:25:36, 3.67it/s] 88%|████████▊ | 326143/371472 [3:47:00<3:26:06, 3.67it/s] 88%|████████▊ | 326144/371472 [3:47:01<3:49:32, 3.29it/s] 88%|████████▊ | 326145/371472 [3:47:01<3:37:29, 3.47it/s] 88%|████████▊ | 326146/371472 [3:47:01<3:38:21, 3.46it/s] 88%|████████▊ | 326147/371472 [3:47:02<3:37:51, 3.47it/s] 88%|████████▊ | 326148/371472 [3:47:02<3:48:51, 3.30it/s] 88%|████████▊ | 326149/371472 [3:47:02<3:39:57, 3.43it/s] 88%|████████▊ | 326150/371472 [3:47:02<3:34:23, 3.52it/s] 88%|████████▊ | 326151/371472 [3:47:03<3:34:30, 3.52it/s] 88%|████████▊ | 326152/371472 [3:47:03<3:26:35, 3.66it/s] 88%|████████▊ | 326153/371472 [3:47:03<3:31:43, 3.57it/s] 88%|████████▊ | 326154/371472 [3:47:04<3:30:02, 3.60it/s] 88%|████████▊ | 326155/371472 [3:47:04<3:36:20, 3.49it/s] 88%|████████▊ | 326156/371472 [3:47:04<3:47:34, 3.32it/s] 88%|████████▊ | 326157/371472 [3:47:04<3:34:36, 3.52it/s] 88%|████████▊ | 326158/371472 [3:47:05<3:29:55, 3.60it/s] 88%|████████▊ | 326159/371472 [3:47:05<3:27:29, 3.64it/s] 88%|████████▊ | 326160/371472 [3:47:05<3:28:26, 3.62it/s] {'loss': 2.6283, 'learning_rate': 2.098407636449826e-07, 'epoch': 14.05} + 88%|████████▊ | 326160/371472 [3:47:05<3:28:26, 3.62it/s] 88%|████████▊ | 326161/371472 [3:47:06<3:38:40, 3.45it/s] 88%|████████▊ | 326162/371472 [3:47:06<3:37:08, 3.48it/s] 88%|████████▊ | 326163/371472 [3:47:06<3:39:20, 3.44it/s] 88%|████████▊ | 326164/371472 [3:47:06<3:42:41, 3.39it/s] 88%|████████▊ | 326165/371472 [3:47:07<3:44:35, 3.36it/s] 88%|████████▊ | 326166/371472 [3:47:07<3:44:18, 3.37it/s] 88%|████████▊ | 326167/371472 [3:47:07<3:31:50, 3.56it/s] 88%|████████▊ | 326168/371472 [3:47:08<3:26:59, 3.65it/s] 88%|████████▊ | 326169/371472 [3:47:08<3:32:18, 3.56it/s] 88%|████████▊ | 326170/371472 [3:47:08<3:22:10, 3.73it/s] 88%|████████▊ | 326171/371472 [3:47:08<3:16:03, 3.85it/s] 88%|████████▊ | 326172/371472 [3:47:09<3:15:42, 3.86it/s] 88%|████████▊ | 326173/371472 [3:47:09<3:09:53, 3.98it/s] 88%|████████▊ | 326174/371472 [3:47:09<3:10:40, 3.96it/s] 88%|████████▊ | 326175/371472 [3:47:09<3:22:29, 3.73it/s] 88%|████████▊ | 326176/371472 [3:47:10<3:28:21, 3.62it/s] 88%|████████▊ | 326177/371472 [3:47:10<3:28:23, 3.62it/s] 88%|████████▊ | 326178/371472 [3:47:10<3:23:50, 3.70it/s] 88%|████████▊ | 326179/371472 [3:47:11<3:26:01, 3.66it/s] 88%|████████▊ | 326180/371472 [3:47:11<3:24:41, 3.69it/s] {'loss': 2.514, 'learning_rate': 2.0979228166950375e-07, 'epoch': 14.05} + 88%|████████▊ | 326180/371472 [3:47:11<3:24:41, 3.69it/s] 88%|████████▊ | 326181/371472 [3:47:11<3:19:45, 3.78it/s] 88%|████████▊ | 326182/371472 [3:47:11<3:19:42, 3.78it/s] 88%|████████▊ | 326183/371472 [3:47:12<3:28:14, 3.62it/s] 88%|████████▊ | 326184/371472 [3:47:12<3:44:24, 3.36it/s] 88%|████████▊ | 326185/371472 [3:47:12<3:48:44, 3.30it/s] 88%|████████▊ | 326186/371472 [3:47:13<3:52:09, 3.25it/s] 88%|████████▊ | 326187/371472 [3:47:13<3:40:55, 3.42it/s] 88%|████████▊ | 326188/371472 [3:47:13<3:33:44, 3.53it/s] 88%|████████▊ | 326189/371472 [3:47:13<3:32:57, 3.54it/s] 88%|████████▊ | 326190/371472 [3:47:14<3:32:45, 3.55it/s] 88%|████████▊ | 326191/371472 [3:47:14<3:34:39, 3.52it/s] 88%|████████▊ | 326192/371472 [3:47:14<3:25:33, 3.67it/s] 88%|████████▊ | 326193/371472 [3:47:14<3:20:40, 3.76it/s] 88%|████████▊ | 326194/371472 [3:47:15<3:20:48, 3.76it/s] 88%|████████▊ | 326195/371472 [3:47:15<3:21:32, 3.74it/s] 88%|████████▊ | 326196/371472 [3:47:15<3:26:44, 3.65it/s] 88%|████████▊ | 326197/371472 [3:47:16<3:33:03, 3.54it/s] 88%|████████▊ | 326198/371472 [3:47:16<3:33:27, 3.53it/s] 88%|████████▊ | 326199/371472 [3:47:16<3:36:24, 3.49it/s] 88%|████████▊ | 326200/371472 [3:47:16<3:36:26, 3.49it/s] {'loss': 2.5769, 'learning_rate': 2.097437996940248e-07, 'epoch': 14.05} + 88%|████████▊ | 326200/371472 [3:47:16<3:36:26, 3.49it/s] 88%|████████▊ | 326201/371472 [3:47:17<3:40:05, 3.43it/s] 88%|████████▊ | 326202/371472 [3:47:17<3:35:45, 3.50it/s] 88%|████████▊ | 326203/371472 [3:47:17<3:31:08, 3.57it/s] 88%|████████▊ | 326204/371472 [3:47:18<3:23:44, 3.70it/s] 88%|████████▊ | 326205/371472 [3:47:18<3:18:29, 3.80it/s] 88%|████████▊ | 326206/371472 [3:47:18<3:34:19, 3.52it/s] 88%|████████▊ | 326207/371472 [3:47:18<3:30:48, 3.58it/s] 88%|████████▊ | 326208/371472 [3:47:19<3:25:14, 3.68it/s] 88%|████████▊ | 326209/371472 [3:47:19<3:28:50, 3.61it/s] 88%|████████▊ | 326210/371472 [3:47:19<3:26:13, 3.66it/s] 88%|████████▊ | 326211/371472 [3:47:19<3:32:09, 3.56it/s] 88%|████████▊ | 326212/371472 [3:47:20<3:30:52, 3.58it/s] 88%|████████▊ | 326213/371472 [3:47:20<3:28:42, 3.61it/s] 88%|████████▊ | 326214/371472 [3:47:20<3:23:15, 3.71it/s] 88%|████████▊ | 326215/371472 [3:47:21<3:25:26, 3.67it/s] 88%|████████▊ | 326216/371472 [3:47:21<3:33:39, 3.53it/s] 88%|████████▊ | 326217/371472 [3:47:21<3:28:19, 3.62it/s] 88%|████████▊ | 326218/371472 [3:47:21<3:23:00, 3.72it/s] 88%|████████▊ | 326219/371472 [3:47:22<3:18:09, 3.81it/s] 88%|████████▊ | 326220/371472 [3:47:22<3:16:42, 3.83it/s] {'loss': 2.5135, 'learning_rate': 2.0969531771854597e-07, 'epoch': 14.05} + 88%|████████▊ | 326220/371472 [3:47:22<3:16:42, 3.83it/s] 88%|████████▊ | 326221/371472 [3:47:22<3:14:52, 3.87it/s] 88%|████████▊ | 326222/371472 [3:47:22<3:21:01, 3.75it/s] 88%|████████▊ | 326223/371472 [3:47:23<3:17:24, 3.82it/s] 88%|████████▊ | 326224/371472 [3:47:23<3:22:27, 3.72it/s] 88%|████████▊ | 326225/371472 [3:47:23<3:30:07, 3.59it/s] 88%|████████▊ | 326226/371472 [3:47:24<3:48:22, 3.30it/s] 88%|████████▊ | 326227/371472 [3:47:24<3:42:14, 3.39it/s] 88%|████████▊ | 326228/371472 [3:47:24<3:45:42, 3.34it/s] 88%|████████▊ | 326229/371472 [3:47:25<3:51:56, 3.25it/s] 88%|████████▊ | 326230/371472 [3:47:25<3:39:19, 3.44it/s] 88%|████████▊ | 326231/371472 [3:47:25<3:28:55, 3.61it/s] 88%|████████▊ | 326232/371472 [3:47:25<3:24:42, 3.68it/s] 88%|████████▊ | 326233/371472 [3:47:26<3:19:59, 3.77it/s] 88%|████████▊ | 326234/371472 [3:47:26<3:30:25, 3.58it/s] 88%|████████▊ | 326235/371472 [3:47:26<3:24:16, 3.69it/s] 88%|████████▊ | 326236/371472 [3:47:26<3:23:49, 3.70it/s] 88%|████████▊ | 326237/371472 [3:47:27<3:19:06, 3.79it/s] 88%|████████▊ | 326238/371472 [3:47:27<3:18:42, 3.79it/s] 88%|████████▊ | 326239/371472 [3:47:27<3:56:38, 3.19it/s] 88%|████████▊ | 326240/371472 [3:47:28<3:39:23, 3.44it/s] {'loss': 2.649, 'learning_rate': 2.0964683574306705e-07, 'epoch': 14.05} + 88%|████████▊ | 326240/371472 [3:47:28<3:39:23, 3.44it/s] 88%|████████▊ | 326241/371472 [3:47:28<3:36:02, 3.49it/s] 88%|████████▊ | 326242/371472 [3:47:28<3:29:40, 3.60it/s] 88%|████████▊ | 326243/371472 [3:47:28<3:30:47, 3.58it/s] 88%|████████▊ | 326244/371472 [3:47:29<3:33:32, 3.53it/s] 88%|████████▊ | 326245/371472 [3:47:29<3:28:57, 3.61it/s] 88%|████████▊ | 326246/371472 [3:47:29<3:21:53, 3.73it/s] 88%|████████▊ | 326247/371472 [3:47:29<3:15:15, 3.86it/s] 88%|████████▊ | 326248/371472 [3:47:30<3:26:47, 3.65it/s] 88%|████████▊ | 326249/371472 [3:47:30<3:22:25, 3.72it/s] 88%|████████▊ | 326250/371472 [3:47:30<3:21:19, 3.74it/s] 88%|████████▊ | 326251/371472 [3:47:31<3:30:05, 3.59it/s] 88%|████████▊ | 326252/371472 [3:47:31<3:25:00, 3.68it/s] 88%|████████▊ | 326253/371472 [3:47:31<3:37:08, 3.47it/s] 88%|████████▊ | 326254/371472 [3:47:31<3:35:52, 3.49it/s] 88%|████████▊ | 326255/371472 [3:47:32<3:41:52, 3.40it/s] 88%|████████▊ | 326256/371472 [3:47:32<3:29:48, 3.59it/s] 88%|████████▊ | 326257/371472 [3:47:32<3:23:31, 3.70it/s] 88%|████████▊ | 326258/371472 [3:47:32<3:17:50, 3.81it/s] 88%|████████▊ | 326259/371472 [3:47:33<3:14:14, 3.88it/s] 88%|████████▊ | 326260/371472 [3:47:33<3:20:23, 3.76it/s] {'loss': 2.7837, 'learning_rate': 2.0959835376758817e-07, 'epoch': 14.05} + 88%|████████▊ | 326260/371472 [3:47:33<3:20:23, 3.76it/s] 88%|████████▊ | 326261/371472 [3:47:33<3:18:12, 3.80it/s] 88%|████████▊ | 326262/371472 [3:47:34<3:17:00, 3.82it/s] 88%|████████▊ | 326263/371472 [3:47:34<3:19:31, 3.78it/s] 88%|████████▊ | 326264/371472 [3:47:34<3:20:50, 3.75it/s] 88%|████████▊ | 326265/371472 [3:47:34<3:30:44, 3.58it/s] 88%|████████▊ | 326266/371472 [3:47:35<3:33:43, 3.53it/s] 88%|████████▊ | 326267/371472 [3:47:35<3:49:09, 3.29it/s] 88%|████████▊ | 326268/371472 [3:47:35<3:54:21, 3.21it/s] 88%|████████▊ | 326269/371472 [3:47:36<3:48:30, 3.30it/s] 88%|████████▊ | 326270/371472 [3:47:36<3:43:18, 3.37it/s] 88%|████████▊ | 326271/371472 [3:47:36<3:38:56, 3.44it/s] 88%|████████▊ | 326272/371472 [3:47:36<3:27:55, 3.62it/s] 88%|████████▊ | 326273/371472 [3:47:37<3:28:03, 3.62it/s] 88%|████████▊ | 326274/371472 [3:47:37<3:25:57, 3.66it/s] 88%|████████▊ | 326275/371472 [3:47:37<3:41:27, 3.40it/s] 88%|████████▊ | 326276/371472 [3:47:38<3:30:35, 3.58it/s] 88%|████████▊ | 326277/371472 [3:47:38<3:34:15, 3.52it/s] 88%|████████▊ | 326278/371472 [3:47:38<3:44:34, 3.35it/s] 88%|████████▊ | 326279/371472 [3:47:38<3:49:44, 3.28it/s] 88%|████████▊ | 326280/371472 [3:47:39<3:43:35, 3.37it/s] {'loss': 2.4911, 'learning_rate': 2.0954987179210922e-07, 'epoch': 14.05} + 88%|████████▊ | 326280/371472 [3:47:39<3:43:35, 3.37it/s] 88%|████████▊ | 326281/371472 [3:47:39<3:41:15, 3.40it/s] 88%|████████▊ | 326282/371472 [3:47:39<3:48:43, 3.29it/s] 88%|████████▊ | 326283/371472 [3:47:40<4:02:30, 3.11it/s] 88%|████████▊ | 326284/371472 [3:47:40<3:53:36, 3.22it/s] 88%|████████▊ | 326285/371472 [3:47:40<3:58:38, 3.16it/s] 88%|████████▊ | 326286/371472 [3:47:41<3:39:24, 3.43it/s] 88%|████████▊ | 326287/371472 [3:47:41<3:34:42, 3.51it/s] 88%|████████▊ | 326288/371472 [3:47:41<3:46:57, 3.32it/s] 88%|████████▊ | 326289/371472 [3:47:42<3:49:33, 3.28it/s] 88%|████████▊ | 326290/371472 [3:47:42<3:48:19, 3.30it/s] 88%|████████▊ | 326291/371472 [3:47:42<3:44:25, 3.36it/s] 88%|████████▊ | 326292/371472 [3:47:42<3:40:26, 3.42it/s] 88%|████████▊ | 326293/371472 [3:47:43<3:39:43, 3.43it/s] 88%|████████▊ | 326294/371472 [3:47:43<3:32:18, 3.55it/s] 88%|████████▊ | 326295/371472 [3:47:43<3:37:23, 3.46it/s] 88%|████████▊ | 326296/371472 [3:47:44<3:40:31, 3.41it/s] 88%|████████▊ | 326297/371472 [3:47:44<3:32:59, 3.54it/s] 88%|████████▊ | 326298/371472 [3:47:44<3:33:13, 3.53it/s] 88%|████████▊ | 326299/371472 [3:47:44<3:23:54, 3.69it/s] 88%|████████▊ | 326300/371472 [3:47:45<3:18:11, 3.80it/s] {'loss': 2.6214, 'learning_rate': 2.0950138981663042e-07, 'epoch': 14.05} + 88%|████████▊ | 326300/371472 [3:47:45<3:18:11, 3.80it/s] 88%|████████▊ | 326301/371472 [3:47:45<3:48:06, 3.30it/s] 88%|████████▊ | 326302/371472 [3:47:45<3:35:17, 3.50it/s] 88%|████████�� | 326303/371472 [3:47:46<3:36:48, 3.47it/s] 88%|████████▊ | 326304/371472 [3:47:46<3:30:19, 3.58it/s] 88%|████████▊ | 326305/371472 [3:47:46<3:34:37, 3.51it/s] 88%|████████▊ | 326306/371472 [3:47:46<3:27:41, 3.62it/s] 88%|████████▊ | 326307/371472 [3:47:47<3:24:05, 3.69it/s] 88%|████████▊ | 326308/371472 [3:47:47<3:23:38, 3.70it/s] 88%|████████▊ | 326309/371472 [3:47:47<3:18:08, 3.80it/s] 88%|████████▊ | 326310/371472 [3:47:47<3:20:28, 3.75it/s] 88%|████████▊ | 326311/371472 [3:47:48<3:29:38, 3.59it/s] 88%|████████▊ | 326312/371472 [3:47:48<3:37:31, 3.46it/s] 88%|████████▊ | 326313/371472 [3:47:48<3:51:32, 3.25it/s] 88%|████████▊ | 326314/371472 [3:47:49<3:40:07, 3.42it/s] 88%|████████▊ | 326315/371472 [3:47:49<3:31:40, 3.56it/s] 88%|████████▊ | 326316/371472 [3:47:49<3:24:55, 3.67it/s] 88%|████████▊ | 326317/371472 [3:47:49<3:21:56, 3.73it/s] 88%|████████▊ | 326318/371472 [3:47:50<3:20:24, 3.76it/s] 88%|████████▊ | 326319/371472 [3:47:50<3:59:28, 3.14it/s] 88%|████████▊ | 326320/371472 [3:47:50<3:54:37, 3.21it/s] {'loss': 2.6214, 'learning_rate': 2.0945290784115146e-07, 'epoch': 14.06} + 88%|████████▊ | 326320/371472 [3:47:50<3:54:37, 3.21it/s] 88%|████████▊ | 326321/371472 [3:47:51<3:51:44, 3.25it/s] 88%|████████▊ | 326322/371472 [3:47:51<3:58:50, 3.15it/s] 88%|████████▊ | 326323/371472 [3:47:51<3:57:01, 3.17it/s] 88%|████████▊ | 326324/371472 [3:47:52<3:44:44, 3.35it/s] 88%|████████▊ | 326325/371472 [3:47:52<3:46:06, 3.33it/s] 88%|████████▊ | 326326/371472 [3:47:52<3:38:48, 3.44it/s] 88%|████████▊ | 326327/371472 [3:47:52<3:33:36, 3.52it/s] 88%|████████▊ | 326328/371472 [3:47:53<3:43:43, 3.36it/s] 88%|████████▊ | 326329/371472 [3:47:53<3:46:26, 3.32it/s] 88%|████████▊ | 326330/371472 [3:47:53<3:33:20, 3.53it/s] 88%|████████▊ | 326331/371472 [3:47:54<3:29:17, 3.59it/s] 88%|████████▊ | 326332/371472 [3:47:54<3:24:48, 3.67it/s] 88%|████████▊ | 326333/371472 [3:47:54<3:54:22, 3.21it/s] 88%|████████▊ | 326334/371472 [3:47:55<3:50:42, 3.26it/s] 88%|████████▊ | 326335/371472 [3:47:55<3:54:30, 3.21it/s] 88%|████████▊ | 326336/371472 [3:47:55<3:50:52, 3.26it/s] 88%|████████▊ | 326337/371472 [3:47:55<3:46:49, 3.32it/s] 88%|████████▊ | 326338/371472 [3:47:56<3:38:31, 3.44it/s] 88%|████████▊ | 326339/371472 [3:47:56<3:37:31, 3.46it/s] 88%|████████▊ | 326340/371472 [3:47:56<3:40:25, 3.41it/s] {'loss': 2.6949, 'learning_rate': 2.0940442586567262e-07, 'epoch': 14.06} + 88%|████████▊ | 326340/371472 [3:47:56<3:40:25, 3.41it/s] 88%|████████▊ | 326341/371472 [3:47:57<3:34:09, 3.51it/s] 88%|████████▊ | 326342/371472 [3:47:57<3:28:51, 3.60it/s] 88%|████████▊ | 326343/371472 [3:47:57<3:43:34, 3.36it/s] 88%|████████▊ | 326344/371472 [3:47:57<3:36:14, 3.48it/s] 88%|████████▊ | 326345/371472 [3:47:58<3:31:40, 3.55it/s] 88%|████████▊ | 326346/371472 [3:47:58<3:27:37, 3.62it/s] 88%|████████▊ | 326347/371472 [3:47:58<3:39:59, 3.42it/s] 88%|████████▊ | 326348/371472 [3:47:59<3:26:18, 3.65it/s] 88%|████████▊ | 326349/371472 [3:47:59<3:35:08, 3.50it/s] 88%|████████▊ | 326350/371472 [3:47:59<3:30:38, 3.57it/s] 88%|████████▊ | 326351/371472 [3:47:59<3:26:26, 3.64it/s] 88%|████████▊ | 326352/371472 [3:48:00<3:22:05, 3.72it/s] 88%|████████▊ | 326353/371472 [3:48:00<3:31:22, 3.56it/s] 88%|████████▊ | 326354/371472 [3:48:00<3:35:04, 3.50it/s] 88%|████████▊ | 326355/371472 [3:48:01<3:38:02, 3.45it/s] 88%|████████▊ | 326356/371472 [3:48:01<3:44:39, 3.35it/s] 88%|████████▊ | 326357/371472 [3:48:01<3:42:40, 3.38it/s] 88%|████████▊ | 326358/371472 [3:48:01<3:49:26, 3.28it/s] 88%|████████▊ | 326359/371472 [3:48:02<3:41:45, 3.39it/s] 88%|████████▊ | 326360/371472 [3:48:02<3:42:57, 3.37it/s] {'loss': 2.5636, 'learning_rate': 2.093559438901937e-07, 'epoch': 14.06} + 88%|████████▊ | 326360/371472 [3:48:02<3:42:57, 3.37it/s] 88%|████████▊ | 326361/371472 [3:48:02<3:35:25, 3.49it/s] 88%|████████▊ | 326362/371472 [3:48:03<3:30:23, 3.57it/s] 88%|████████▊ | 326363/371472 [3:48:03<3:32:00, 3.55it/s] 88%|████████▊ | 326364/371472 [3:48:03<3:28:25, 3.61it/s] 88%|████████▊ | 326365/371472 [3:48:03<3:39:08, 3.43it/s] 88%|████████▊ | 326366/371472 [3:48:04<3:44:11, 3.35it/s] 88%|████████▊ | 326367/371472 [3:48:04<3:33:55, 3.51it/s] 88%|████████▊ | 326368/371472 [3:48:04<3:38:06, 3.45it/s] 88%|████████▊ | 326369/371472 [3:48:05<3:25:50, 3.65it/s] 88%|████████▊ | 326370/371472 [3:48:05<3:29:49, 3.58it/s] 88%|████████▊ | 326371/371472 [3:48:05<3:28:43, 3.60it/s] 88%|████████▊ | 326372/371472 [3:48:05<3:35:58, 3.48it/s] 88%|████████▊ | 326373/371472 [3:48:06<3:30:53, 3.56it/s] 88%|████████▊ | 326374/371472 [3:48:06<3:47:59, 3.30it/s] 88%|████████▊ | 326375/371472 [3:48:06<3:40:54, 3.40it/s] 88%|████████▊ | 326376/371472 [3:48:07<3:36:24, 3.47it/s] 88%|████████▊ | 326377/371472 [3:48:07<3:35:26, 3.49it/s] 88%|████████▊ | 326378/371472 [3:48:07<3:35:23, 3.49it/s] 88%|████████▊ | 326379/371472 [3:48:07<3:29:13, 3.59it/s] 88%|████████▊ | 326380/371472 [3:48:08<3:30:11, 3.58it/s] {'loss': 2.5917, 'learning_rate': 2.0930746191471484e-07, 'epoch': 14.06} + 88%|████████▊ | 326380/371472 [3:48:08<3:30:11, 3.58it/s] 88%|████████▊ | 326381/371472 [3:48:08<3:26:14, 3.64it/s] 88%|████████▊ | 326382/371472 [3:48:08<3:50:41, 3.26it/s] 88%|████████▊ | 326383/371472 [3:48:09<3:43:09, 3.37it/s] 88%|████████▊ | 326384/371472 [3:48:09<3:35:34, 3.49it/s] 88%|████████▊ | 326385/371472 [3:48:09<3:31:34, 3.55it/s] 88%|████████▊ | 326386/371472 [3:48:09<3:30:16, 3.57it/s] 88%|████████▊ | 326387/371472 [3:48:10<3:45:43, 3.33it/s] 88%|████████▊ | 326388/371472 [3:48:10<3:38:44, 3.44it/s] 88%|████████▊ | 326389/371472 [3:48:10<3:35:09, 3.49it/s] 88%|████████▊ | 326390/371472 [3:48:11<3:26:17, 3.64it/s] 88%|████████▊ | 326391/371472 [3:48:11<3:25:29, 3.66it/s] 88%|████████▊ | 326392/371472 [3:48:11<3:22:30, 3.71it/s] 88%|████████▊ | 326393/371472 [3:48:11<3:21:20, 3.73it/s] 88%|████████▊ | 326394/371472 [3:48:12<3:25:31, 3.66it/s] 88%|████████▊ | 326395/371472 [3:48:12<3:20:44, 3.74it/s] 88%|████████▊ | 326396/371472 [3:48:12<3:23:12, 3.70it/s] 88%|████████▊ | 326397/371472 [3:48:13<3:43:26, 3.36it/s] 88%|████████▊ | 326398/371472 [3:48:13<3:46:51, 3.31it/s] 88%|████████▊ | 326399/371472 [3:48:13<3:34:36, 3.50it/s] 88%|████████▊ | 326400/371472 [3:48:13<3:32:41, 3.53it/s] {'loss': 2.5601, 'learning_rate': 2.0925897993923588e-07, 'epoch': 14.06} + 88%|████████▊ | 326400/371472 [3:48:13<3:32:41, 3.53it/s] 88%|████████▊ | 326401/371472 [3:48:14<4:12:28, 2.98it/s] 88%|████████▊ | 326402/371472 [3:48:14<4:10:32, 3.00it/s] 88%|████████▊ | 326403/371472 [3:48:14<4:13:31, 2.96it/s] 88%|████████▊ | 326404/371472 [3:48:15<4:21:07, 2.88it/s] 88%|████████▊ | 326405/371472 [3:48:15<4:15:50, 2.94it/s] 88%|████████▊ | 326406/371472 [3:48:16<4:29:14, 2.79it/s] 88%|████████▊ | 326407/371472 [3:48:16<4:12:51, 2.97it/s] 88%|████████▊ | 326408/371472 [3:48:16<4:05:24, 3.06it/s] 88%|████████▊ | 326409/371472 [3:48:16<3:49:12, 3.28it/s] 88%|████████▊ | 326410/371472 [3:48:17<3:35:54, 3.48it/s] 88%|████████▊ | 326411/371472 [3:48:17<3:31:50, 3.55it/s] 88%|████████▊ | 326412/371472 [3:48:17<3:27:22, 3.62it/s] 88%|████████▊ | 326413/371472 [3:48:17<3:18:41, 3.78it/s] 88%|████████▊ | 326414/371472 [3:48:18<4:09:44, 3.01it/s] 88%|████████▊ | 326415/371472 [3:48:18<4:08:02, 3.03it/s] 88%|████████▊ | 326416/371472 [3:48:19<4:05:52, 3.05it/s] 88%|████████▊ | 326417/371472 [3:48:19<4:02:01, 3.10it/s] 88%|████████▊ | 326418/371472 [3:48:19<3:47:03, 3.31it/s] 88%|████████▊ | 326419/371472 [3:48:19<3:43:11, 3.36it/s] 88%|████████▊ | 326420/371472 [3:48:20<3:44:59, 3.34it/s] {'loss': 2.6498, 'learning_rate': 2.0921049796375706e-07, 'epoch': 14.06} + 88%|████████▊ | 326420/371472 [3:48:20<3:44:59, 3.34it/s] 88%|████████▊ | 326421/371472 [3:48:20<3:45:18, 3.33it/s] 88%|████████▊ | 326422/371472 [3:48:20<3:38:30, 3.44it/s] 88%|████████▊ | 326423/371472 [3:48:21<3:27:01, 3.63it/s] 88%|████████▊ | 326424/371472 [3:48:21<3:29:11, 3.59it/s] 88%|████████▊ | 326425/371472 [3:48:21<3:25:11, 3.66it/s] 88%|████████▊ | 326426/371472 [3:48:21<3:34:53, 3.49it/s] 88%|████████▊ | 326427/371472 [3:48:22<3:29:35, 3.58it/s] 88%|████████▊ | 326428/371472 [3:48:22<3:26:23, 3.64it/s] 88%|████████▊ | 326429/371472 [3:48:22<3:30:38, 3.56it/s] 88%|████████▊ | 326430/371472 [3:48:22<3:26:07, 3.64it/s] 88%|████████▊ | 326431/371472 [3:48:23<3:24:08, 3.68it/s] 88%|████████▊ | 326432/371472 [3:48:23<3:15:46, 3.83it/s] 88%|████████▊ | 326433/371472 [3:48:23<3:18:54, 3.77it/s] 88%|████████▊ | 326434/371472 [3:48:24<3:38:05, 3.44it/s] 88%|████████▊ | 326435/371472 [3:48:24<3:31:48, 3.54it/s] 88%|████████▊ | 326436/371472 [3:48:24<3:38:08, 3.44it/s] 88%|████████▊ | 326437/371472 [3:48:24<3:27:35, 3.62it/s] 88%|████████▊ | 326438/371472 [3:48:25<3:44:47, 3.34it/s] 88%|████████▊ | 326439/371472 [3:48:25<3:39:07, 3.43it/s] 88%|████████▊ | 326440/371472 [3:48:25<3:30:02, 3.57it/s] {'loss': 2.6198, 'learning_rate': 2.091620159882781e-07, 'epoch': 14.06} + 88%|████████▊ | 326440/371472 [3:48:25<3:30:02, 3.57it/s] 88%|████████▊ | 326441/371472 [3:48:26<3:24:47, 3.66it/s] 88%|████████▊ | 326442/371472 [3:48:26<3:26:44, 3.63it/s] 88%|████████▊ | 326443/371472 [3:48:26<3:23:46, 3.68it/s] 88%|████████▊ | 326444/371472 [3:48:26<3:24:18, 3.67it/s] 88%|████████▊ | 326445/371472 [3:48:27<3:31:50, 3.54it/s] 88%|████████▊ | 326446/371472 [3:48:27<3:20:28, 3.74it/s] 88%|████████▊ | 326447/371472 [3:48:27<3:20:10, 3.75it/s] 88%|████████▊ | 326448/371472 [3:48:27<3:14:25, 3.86it/s] 88%|████████▊ | 326449/371472 [3:48:28<3:13:16, 3.88it/s] 88%|████████▊ | 326450/371472 [3:48:28<3:14:46, 3.85it/s] 88%|████████▊ | 326451/371472 [3:48:28<3:14:50, 3.85it/s] 88%|████████▊ | 326452/371472 [3:48:29<3:21:57, 3.72it/s] 88%|████████▊ | 326453/371472 [3:48:29<3:22:49, 3.70it/s] 88%|████████▊ | 326454/371472 [3:48:29<3:19:16, 3.77it/s] 88%|████████▊ | 326455/371472 [3:48:29<3:31:41, 3.54it/s] 88%|████████▊ | 326456/371472 [3:48:30<3:24:41, 3.67it/s] 88%|████████▊ | 326457/371472 [3:48:30<3:24:11, 3.67it/s] 88%|████████▊ | 326458/371472 [3:48:30<3:25:46, 3.65it/s] 88%|████████▊ | 326459/371472 [3:48:30<3:19:04, 3.77it/s] 88%|████████▊ | 326460/371472 [3:48:31<3:26:05, 3.64it/s] {'loss': 2.5998, 'learning_rate': 2.0911353401279928e-07, 'epoch': 14.06} + 88%|████████▊ | 326460/371472 [3:48:31<3:26:05, 3.64it/s] 88%|████████▊ | 326461/371472 [3:48:31<3:21:23, 3.72it/s] 88%|████████▊ | 326462/371472 [3:48:31<3:22:11, 3.71it/s] 88%|████████▊ | 326463/371472 [3:48:32<3:27:02, 3.62it/s] 88%|████████▊ | 326464/371472 [3:48:32<3:30:52, 3.56it/s] 88%|████████▊ | 326465/371472 [3:48:32<3:22:29, 3.70it/s] 88%|████████▊ | 326466/371472 [3:48:32<3:57:11, 3.16it/s] 88%|████████▊ | 326467/371472 [3:48:33<3:50:51, 3.25it/s] 88%|████████▊ | 326468/371472 [3:48:33<3:40:17, 3.40it/s] 88%|████████▊ | 326469/371472 [3:48:33<3:30:02, 3.57it/s] 88%|████████▊ | 326470/371472 [3:48:34<3:21:45, 3.72it/s] 88%|████████▊ | 326471/371472 [3:48:34<3:22:17, 3.71it/s] 88%|████████▊ | 326472/371472 [3:48:34<3:22:42, 3.70it/s] 88%|████████▊ | 326473/371472 [3:48:34<3:23:23, 3.69it/s] 88%|████████▊ | 326474/371472 [3:48:35<3:21:03, 3.73it/s] 88%|████████▊ | 326475/371472 [3:48:35<3:21:37, 3.72it/s] 88%|████████▊ | 326476/371472 [3:48:35<3:25:51, 3.64it/s] 88%|████████▊ | 326477/371472 [3:48:35<3:27:04, 3.62it/s] 88%|████████�� | 326478/371472 [3:48:36<3:24:47, 3.66it/s] 88%|████████▊ | 326479/371472 [3:48:36<3:34:05, 3.50it/s] 88%|████████▊ | 326480/371472 [3:48:36<3:34:46, 3.49it/s] {'loss': 2.6851, 'learning_rate': 2.0906505203732033e-07, 'epoch': 14.06} + 88%|████████▊ | 326480/371472 [3:48:36<3:34:46, 3.49it/s] 88%|████████▊ | 326481/371472 [3:48:37<3:38:40, 3.43it/s] 88%|████████▊ | 326482/371472 [3:48:37<3:34:58, 3.49it/s] 88%|████████▊ | 326483/371472 [3:48:37<3:23:56, 3.68it/s] 88%|████████▊ | 326484/371472 [3:48:37<3:26:55, 3.62it/s] 88%|████████▊ | 326485/371472 [3:48:38<3:20:19, 3.74it/s] 88%|████████▊ | 326486/371472 [3:48:38<3:34:30, 3.50it/s] 88%|████████▊ | 326487/371472 [3:48:38<3:36:10, 3.47it/s] 88%|████████▊ | 326488/371472 [3:48:39<3:40:19, 3.40it/s] 88%|████████▊ | 326489/371472 [3:48:39<3:30:52, 3.56it/s] 88%|████████▊ | 326490/371472 [3:48:39<3:53:02, 3.22it/s] 88%|████████▊ | 326491/371472 [3:48:39<3:46:41, 3.31it/s] 88%|████████▊ | 326492/371472 [3:48:40<3:44:45, 3.34it/s] 88%|████████▊ | 326493/371472 [3:48:40<3:32:43, 3.52it/s] 88%|████████▊ | 326494/371472 [3:48:40<3:30:55, 3.55it/s] 88%|████████▊ | 326495/371472 [3:48:41<3:35:20, 3.48it/s] 88%|████████▊ | 326496/371472 [3:48:41<3:27:49, 3.61it/s] 88%|████████▊ | 326497/371472 [3:48:41<3:23:38, 3.68it/s] 88%|████████▊ | 326498/371472 [3:48:41<3:18:51, 3.77it/s] 88%|████████▊ | 326499/371472 [3:48:42<3:28:49, 3.59it/s] 88%|████████▊ | 326500/371472 [3:48:42<3:46:51, 3.30it/s] {'loss': 2.5409, 'learning_rate': 2.0901657006184148e-07, 'epoch': 14.06} + 88%|████████▊ | 326500/371472 [3:48:42<3:46:51, 3.30it/s] 88%|████████▊ | 326501/371472 [3:48:42<3:35:14, 3.48it/s] 88%|████████▊ | 326502/371472 [3:48:43<3:38:52, 3.42it/s] 88%|████████▊ | 326503/371472 [3:48:43<3:43:26, 3.35it/s] 88%|████████▊ | 326504/371472 [3:48:43<3:44:57, 3.33it/s] 88%|████████▊ | 326505/371472 [3:48:43<3:34:26, 3.49it/s] 88%|████████▊ | 326506/371472 [3:48:44<3:35:40, 3.47it/s] 88%|████████▊ | 326507/371472 [3:48:44<3:34:25, 3.50it/s] 88%|████████▊ | 326508/371472 [3:48:44<3:27:26, 3.61it/s] 88%|████████▊ | 326509/371472 [3:48:45<3:24:02, 3.67it/s] 88%|████████▊ | 326510/371472 [3:48:45<3:18:59, 3.77it/s] 88%|████████▊ | 326511/371472 [3:48:45<3:20:35, 3.74it/s] 88%|████████▊ | 326512/371472 [3:48:45<3:28:41, 3.59it/s] 88%|████████▊ | 326513/371472 [3:48:46<3:24:28, 3.66it/s] 88%|████████▊ | 326514/371472 [3:48:46<3:25:10, 3.65it/s] 88%|████████▊ | 326515/371472 [3:48:46<3:32:54, 3.52it/s] 88%|████████▊ | 326516/371472 [3:48:47<3:35:29, 3.48it/s] 88%|████████▊ | 326517/371472 [3:48:47<3:44:43, 3.33it/s] 88%|████████▊ | 326518/371472 [3:48:47<3:41:00, 3.39it/s] 88%|████████▊ | 326519/371472 [3:48:47<3:30:38, 3.56it/s] 88%|████████▊ | 326520/371472 [3:48:48<3:29:02, 3.58it/s] {'loss': 2.6197, 'learning_rate': 2.0896808808636252e-07, 'epoch': 14.06} + 88%|████████▊ | 326520/371472 [3:48:48<3:29:02, 3.58it/s] 88%|████████▊ | 326521/371472 [3:48:48<3:27:07, 3.62it/s] 88%|████████▊ | 326522/371472 [3:48:48<3:34:46, 3.49it/s] 88%|████████▊ | 326523/371472 [3:48:49<3:34:08, 3.50it/s] 88%|████████▊ | 326524/371472 [3:48:49<3:26:11, 3.63it/s] 88%|████████▊ | 326525/371472 [3:48:49<3:45:07, 3.33it/s] 88%|████████▊ | 326526/371472 [3:48:49<3:34:01, 3.50it/s] 88%|████████▊ | 326527/371472 [3:48:50<3:44:40, 3.33it/s] 88%|████████▊ | 326528/371472 [3:48:50<3:36:30, 3.46it/s] 88%|████████▊ | 326529/371472 [3:48:50<3:39:34, 3.41it/s] 88%|████████▊ | 326530/371472 [3:48:51<3:34:04, 3.50it/s] 88%|████████▊ | 326531/371472 [3:48:51<3:24:48, 3.66it/s] 88%|████████▊ | 326532/371472 [3:48:51<3:34:04, 3.50it/s] 88%|████████▊ | 326533/371472 [3:48:51<3:32:21, 3.53it/s] 88%|████████▊ | 326534/371472 [3:48:52<3:38:46, 3.42it/s] 88%|████████▊ | 326535/371472 [3:48:52<3:35:19, 3.48it/s] 88%|████████▊ | 326536/371472 [3:48:52<3:23:39, 3.68it/s] 88%|████████▊ | 326537/371472 [3:48:52<3:22:26, 3.70it/s] 88%|████████▊ | 326538/371472 [3:48:53<3:21:48, 3.71it/s] 88%|████████▊ | 326539/371472 [3:48:53<3:25:23, 3.65it/s] 88%|████████▊ | 326540/371472 [3:48:53<3:24:32, 3.66it/s] {'loss': 2.602, 'learning_rate': 2.089196061108837e-07, 'epoch': 14.06} + 88%|████████▊ | 326540/371472 [3:48:53<3:24:32, 3.66it/s] 88%|████████▊ | 326541/371472 [3:48:54<3:21:50, 3.71it/s] 88%|████████▊ | 326542/371472 [3:48:54<3:42:04, 3.37it/s] 88%|████████▊ | 326543/371472 [3:48:54<3:34:56, 3.48it/s] 88%|████████▊ | 326544/371472 [3:48:54<3:28:17, 3.59it/s] 88%|████████▊ | 326545/371472 [3:48:55<3:20:22, 3.74it/s] 88%|████████▊ | 326546/371472 [3:48:55<3:21:36, 3.71it/s] 88%|████████▊ | 326547/371472 [3:48:55<3:21:49, 3.71it/s] 88%|████████▊ | 326548/371472 [3:48:56<3:21:15, 3.72it/s] 88%|████████▊ | 326549/371472 [3:48:56<3:34:49, 3.49it/s] 88%|████████▊ | 326550/371472 [3:48:56<3:34:50, 3.48it/s] 88%|████████▊ | 326551/371472 [3:48:56<3:23:55, 3.67it/s] 88%|████████▊ | 326552/371472 [3:48:57<3:37:17, 3.45it/s] 88%|████████▊ | 326553/371472 [3:48:57<3:28:29, 3.59it/s] 88%|████████▊ | 326554/371472 [3:48:57<3:26:18, 3.63it/s] 88%|████████▊ | 326555/371472 [3:48:58<3:32:42, 3.52it/s] 88%|████████▊ | 326556/371472 [3:48:58<3:28:07, 3.60it/s] 88%|████████▊ | 326557/371472 [3:48:58<3:33:12, 3.51it/s] 88%|████████▊ | 326558/371472 [3:48:58<3:39:51, 3.40it/s] 88%|████████▊ | 326559/371472 [3:48:59<3:29:17, 3.58it/s] 88%|████████▊ | 326560/371472 [3:48:59<3:43:29, 3.35it/s] {'loss': 2.6465, 'learning_rate': 2.0887112413540477e-07, 'epoch': 14.07} + 88%|████████▊ | 326560/371472 [3:48:59<3:43:29, 3.35it/s] 88%|████████▊ | 326561/371472 [3:48:59<3:34:46, 3.49it/s] 88%|████████▊ | 326562/371472 [3:49:00<3:50:39, 3.24it/s] 88%|████████▊ | 326563/371472 [3:49:00<3:39:21, 3.41it/s] 88%|████████▊ | 326564/371472 [3:49:00<3:46:53, 3.30it/s] 88%|████████▊ | 326565/371472 [3:49:00<3:33:32, 3.50it/s] 88%|████████▊ | 326566/371472 [3:49:01<3:30:07, 3.56it/s] 88%|████████▊ | 326567/371472 [3:49:01<3:27:32, 3.61it/s] 88%|████████▊ | 326568/371472 [3:49:01<3:27:42, 3.60it/s] 88%|████████▊ | 326569/371472 [3:49:02<3:33:58, 3.50it/s] 88%|████████▊ | 326570/371472 [3:49:02<3:24:44, 3.66it/s] 88%|████████▊ | 326571/371472 [3:49:02<3:36:04, 3.46it/s] 88%|████████▊ | 326572/371472 [3:49:02<3:49:23, 3.26it/s] 88%|████████▊ | 326573/371472 [3:49:03<3:35:23, 3.47it/s] 88%|████████▊ | 326574/371472 [3:49:03<3:28:56, 3.58it/s] 88%|████████▊ | 326575/371472 [3:49:03<3:36:11, 3.46it/s] 88%|████████▊ | 326576/371472 [3:49:04<3:54:39, 3.19it/s] 88%|████████▊ | 326577/371472 [3:49:04<3:56:30, 3.16it/s] 88%|████████▊ | 326578/371472 [3:49:04<3:57:05, 3.16it/s] 88%|████████▊ | 326579/371472 [3:49:05<3:43:46, 3.34it/s] 88%|████████▊ | 326580/371472 [3:49:05<3:46:23, 3.31it/s] {'loss': 2.5901, 'learning_rate': 2.088226421599259e-07, 'epoch': 14.07} + 88%|████████▊ | 326580/371472 [3:49:05<3:46:23, 3.31it/s] 88%|████████▊ | 326581/371472 [3:49:05<3:36:37, 3.45it/s] 88%|████████▊ | 326582/371472 [3:49:05<3:25:32, 3.64it/s] 88%|████████▊ | 326583/371472 [3:49:06<3:17:45, 3.78it/s] 88%|████████▊ | 326584/371472 [3:49:06<3:18:45, 3.76it/s] 88%|████████▊ | 326585/371472 [3:49:06<3:25:33, 3.64it/s] 88%|████████▊ | 326586/371472 [3:49:06<3:19:55, 3.74it/s] 88%|████████▊ | 326587/371472 [3:49:07<3:25:17, 3.64it/s] 88%|████████▊ | 326588/371472 [3:49:07<3:28:47, 3.58it/s] 88%|████████▊ | 326589/371472 [3:49:07<3:35:07, 3.48it/s] 88%|████████▊ | 326590/371472 [3:49:08<3:31:26, 3.54it/s] 88%|████████▊ | 326591/371472 [3:49:08<3:29:55, 3.56it/s] 88%|████████▊ | 326592/371472 [3:49:08<3:26:02, 3.63it/s] 88%|████████▊ | 326593/371472 [3:49:08<3:28:20, 3.59it/s] 88%|████████▊ | 326594/371472 [3:49:09<3:24:44, 3.65it/s] 88%|████████▊ | 326595/371472 [3:49:09<3:29:49, 3.56it/s] 88%|████████▊ | 326596/371472 [3:49:09<3:45:54, 3.31it/s] 88%|████████▊ | 326597/371472 [3:49:10<3:34:43, 3.48it/s] 88%|████████▊ | 326598/371472 [3:49:10<3:29:14, 3.57it/s] 88%|████████▊ | 326599/371472 [3:49:10<3:30:51, 3.55it/s] 88%|████████▊ | 326600/371472 [3:49:10<3:29:09, 3.58it/s] {'loss': 2.629, 'learning_rate': 2.0877416018444697e-07, 'epoch': 14.07} + 88%|████████▊ | 326600/371472 [3:49:10<3:29:09, 3.58it/s] 88%|████████▊ | 326601/371472 [3:49:11<3:25:18, 3.64it/s] 88%|████████▊ | 326602/371472 [3:49:11<3:24:11, 3.66it/s] 88%|████████▊ | 326603/371472 [3:49:11<3:16:14, 3.81it/s] 88%|████████▊ | 326604/371472 [3:49:11<3:22:11, 3.70it/s] 88%|████████▊ | 326605/371472 [3:49:12<3:19:01, 3.76it/s] 88%|████████▊ | 326606/371472 [3:49:12<3:41:14, 3.38it/s] 88%|████████▊ | 326607/371472 [3:49:12<3:29:36, 3.57it/s] 88%|████████▊ | 326608/371472 [3:49:13<3:22:12, 3.70it/s] 88%|████████▊ | 326609/371472 [3:49:13<3:23:52, 3.67it/s] 88%|████████▊ | 326610/371472 [3:49:13<3:32:01, 3.53it/s] 88%|████████▊ | 326611/371472 [3:49:13<3:47:32, 3.29it/s] 88%|████████▊ | 326612/371472 [3:49:14<3:42:34, 3.36it/s] 88%|████████▊ | 326613/371472 [3:49:14<3:51:08, 3.23it/s] 88%|████████▊ | 326614/371472 [3:49:14<3:39:59, 3.40it/s] 88%|████████▊ | 326615/371472 [3:49:15<3:37:36, 3.44it/s] 88%|████████▊ | 326616/371472 [3:49:15<3:40:08, 3.40it/s] 88%|████████▊ | 326617/371472 [3:49:15<3:27:14, 3.61it/s] 88%|████████▊ | 326618/371472 [3:49:15<3:25:37, 3.64it/s] 88%|████████▊ | 326619/371472 [3:49:16<3:24:52, 3.65it/s] 88%|████████▊ | 326620/371472 [3:49:16<3:28:15, 3.59it/s] {'loss': 2.7136, 'learning_rate': 2.0872567820896812e-07, 'epoch': 14.07} + 88%|████████▊ | 326620/371472 [3:49:16<3:28:15, 3.59it/s] 88%|████████▊ | 326621/371472 [3:49:16<3:34:56, 3.48it/s] 88%|████████▊ | 326622/371472 [3:49:17<3:27:49, 3.60it/s] 88%|████████▊ | 326623/371472 [3:49:17<3:23:20, 3.68it/s] 88%|████████▊ | 326624/371472 [3:49:17<3:23:17, 3.68it/s] 88%|████████▊ | 326625/371472 [3:49:17<3:44:52, 3.32it/s] 88%|████████▊ | 326626/371472 [3:49:18<3:39:37, 3.40it/s] 88%|████████▊ | 326627/371472 [3:49:18<3:59:33, 3.12it/s] 88%|████████▊ | 326628/371472 [3:49:18<4:02:52, 3.08it/s] 88%|████████▊ | 326629/371472 [3:49:19<3:54:33, 3.19it/s] 88%|████████▊ | 326630/371472 [3:49:19<4:01:56, 3.09it/s] 88%|████████▊ | 326631/371472 [3:49:19<3:55:31, 3.17it/s] 88%|████████▊ | 326632/371472 [3:49:20<4:00:56, 3.10it/s] 88%|████████▊ | 326633/371472 [3:49:20<3:45:00, 3.32it/s] 88%|████████▊ | 326634/371472 [3:49:20<3:47:46, 3.28it/s] 88%|████████▊ | 326635/371472 [3:49:21<3:34:34, 3.48it/s] 88%|████████▊ | 326636/371472 [3:49:21<3:37:33, 3.43it/s] 88%|████████▊ | 326637/371472 [3:49:21<3:25:53, 3.63it/s] 88%|████████▊ | 326638/371472 [3:49:21<3:28:51, 3.58it/s] 88%|████████▊ | 326639/371472 [3:49:22<3:33:43, 3.50it/s] 88%|████████▊ | 326640/371472 [3:49:22<3:29:11, 3.57it/s] {'loss': 2.5696, 'learning_rate': 2.0867719623348916e-07, 'epoch': 14.07} + 88%|████████▊ | 326640/371472 [3:49:22<3:29:11, 3.57it/s] 88%|████████▊ | 326641/371472 [3:49:22<3:25:00, 3.64it/s] 88%|████████▊ | 326642/371472 [3:49:22<3:21:55, 3.70it/s] 88%|████████▊ | 326643/371472 [3:49:23<3:17:51, 3.78it/s] 88%|████████▊ | 326644/371472 [3:49:23<3:18:59, 3.75it/s] 88%|████████▊ | 326645/371472 [3:49:23<3:21:39, 3.70it/s] 88%|████████▊ | 326646/371472 [3:49:24<3:33:03, 3.51it/s] 88%|████████▊ | 326647/371472 [3:49:24<3:26:29, 3.62it/s] 88%|████████▊ | 326648/371472 [3:49:24<3:23:59, 3.66it/s] 88%|████████▊ | 326649/371472 [3:49:24<3:30:49, 3.54it/s] 88%|████████▊ | 326650/371472 [3:49:25<3:25:44, 3.63it/s] 88%|████████▊ | 326651/371472 [3:49:25<3:19:51, 3.74it/s] 88%|████████▊ | 326652/371472 [3:49:25<3:25:12, 3.64it/s] 88%|████████▊ | 326653/371472 [3:49:26<3:31:40, 3.53it/s] 88%|████████▊ | 326654/371472 [3:49:26<3:28:26, 3.58it/s] 88%|████████▊ | 326655/371472 [3:49:26<3:28:05, 3.59it/s] 88%|████████▊ | 326656/371472 [3:49:26<3:25:22, 3.64it/s] 88%|████████▊ | 326657/371472 [3:49:27<3:22:18, 3.69it/s] 88%|████████▊ | 326658/371472 [3:49:27<3:28:52, 3.58it/s] 88%|████████▊ | 326659/371472 [3:49:27<3:47:24, 3.28it/s] 88%|████████▊ | 326660/371472 [3:49:28<3:54:41, 3.18it/s] {'loss': 2.8239, 'learning_rate': 2.0862871425801024e-07, 'epoch': 14.07} + 88%|████████▊ | 326660/371472 [3:49:28<3:54:41, 3.18it/s] 88%|████████▊ | 326661/371472 [3:49:28<3:42:55, 3.35it/s] 88%|████████▊ | 326662/371472 [3:49:28<3:34:02, 3.49it/s] 88%|████████▊ | 326663/371472 [3:49:28<3:26:29, 3.62it/s] 88%|████████▊ | 326664/371472 [3:49:29<3:34:22, 3.48it/s] 88%|████████▊ | 326665/371472 [3:49:29<3:29:29, 3.56it/s] 88%|████████▊ | 326666/371472 [3:49:29<3:30:20, 3.55it/s] 88%|████████▊ | 326667/371472 [3:49:30<3:27:27, 3.60it/s] 88%|████████▊ | 326668/371472 [3:49:30<3:29:15, 3.57it/s] 88%|████████▊ | 326669/371472 [3:49:30<3:33:23, 3.50it/s] 88%|████████▊ | 326670/371472 [3:49:30<3:43:50, 3.34it/s] 88%|████████▊ | 326671/371472 [3:49:31<3:56:02, 3.16it/s] 88%|████████▊ | 326672/371472 [3:49:31<3:45:46, 3.31it/s] 88%|████████▊ | 326673/371472 [3:49:31<3:49:18, 3.26it/s] 88%|████████▊ | 326674/371472 [3:49:32<3:48:14, 3.27it/s] 88%|████████▊ | 326675/371472 [3:49:32<3:39:35, 3.40it/s] 88%|████████▊ | 326676/371472 [3:49:32<3:30:28, 3.55it/s] 88%|████████▊ | 326677/371472 [3:49:33<3:40:28, 3.39it/s] 88%|████████▊ | 326678/371472 [3:49:33<3:38:01, 3.42it/s] 88%|████████▊ | 326679/371472 [3:49:33<3:38:03, 3.42it/s] 88%|████████▊ | 326680/371472 [3:49:33<3:47:58, 3.27it/s] {'loss': 2.6161, 'learning_rate': 2.085802322825314e-07, 'epoch': 14.07} + 88%|████████▊ | 326680/371472 [3:49:33<3:47:58, 3.27it/s] 88%|████████▊ | 326681/371472 [3:49:34<3:47:11, 3.29it/s] 88%|████████▊ | 326682/371472 [3:49:34<3:37:41, 3.43it/s] 88%|████████▊ | 326683/371472 [3:49:34<3:46:46, 3.29it/s] 88%|████████▊ | 326684/371472 [3:49:35<3:38:39, 3.41it/s] 88%|████████▊ | 326685/371472 [3:49:35<3:33:23, 3.50it/s] 88%|████████▊ | 326686/371472 [3:49:35<3:38:17, 3.42it/s] 88%|████████▊ | 326687/371472 [3:49:35<3:28:08, 3.59it/s] 88%|████████▊ | 326688/371472 [3:49:36<3:21:53, 3.70it/s] 88%|████████▊ | 326689/371472 [3:49:36<3:25:05, 3.64it/s] 88%|████████▊ | 326690/371472 [3:49:36<3:26:58, 3.61it/s] 88%|████████▊ | 326691/371472 [3:49:37<3:27:37, 3.59it/s] 88%|████████▊ | 326692/371472 [3:49:37<3:22:30, 3.69it/s] 88%|████████▊ | 326693/371472 [3:49:37<3:26:50, 3.61it/s] 88%|████████▊ | 326694/371472 [3:49:37<3:33:51, 3.49it/s] 88%|████████▊ | 326695/371472 [3:49:38<3:29:30, 3.56it/s] 88%|████████▊ | 326696/371472 [3:49:38<3:32:34, 3.51it/s] 88%|████████▊ | 326697/371472 [3:49:38<4:32:27, 2.74it/s] 88%|████████▊ | 326698/371472 [3:49:39<4:11:28, 2.97it/s] 88%|████████▊ | 326699/371472 [3:49:39<3:50:02, 3.24it/s] 88%|████████▊ | 326700/371472 [3:49:39<3:55:22, 3.17it/s] {'loss': 2.6433, 'learning_rate': 2.0853175030705246e-07, 'epoch': 14.07} + 88%|████████▊ | 326700/371472 [3:49:39<3:55:22, 3.17it/s] 88%|████████▊ | 326701/371472 [3:49:40<3:46:24, 3.30it/s] 88%|████████▊ | 326702/371472 [3:49:40<3:45:28, 3.31it/s] 88%|████████▊ | 326703/371472 [3:49:40<3:39:42, 3.40it/s] 88%|████████▊ | 326704/371472 [3:49:40<3:26:18, 3.62it/s] 88%|████████▊ | 326705/371472 [3:49:41<3:21:41, 3.70it/s] 88%|████████▊ | 326706/371472 [3:49:41<3:21:14, 3.71it/s] 88%|████████▊ | 326707/371472 [3:49:41<3:21:54, 3.70it/s] 88%|████████▊ | 326708/371472 [3:49:41<3:22:35, 3.68it/s] 88%|████████▊ | 326709/371472 [3:49:42<3:28:30, 3.58it/s] 88%|████████▊ | 326710/371472 [3:49:42<3:29:42, 3.56it/s] 88%|████████▊ | 326711/371472 [3:49:42<3:43:19, 3.34it/s] 88%|████████▊ | 326712/371472 [3:49:43<3:36:59, 3.44it/s] 88%|████████▊ | 326713/371472 [3:49:43<3:26:35, 3.61it/s] 88%|████████▊ | 326714/371472 [3:49:43<3:35:28, 3.46it/s] 88%|████████▊ | 326715/371472 [3:49:44<3:40:49, 3.38it/s] 88%|████████▊ | 326716/371472 [3:49:44<3:40:19, 3.39it/s] 88%|████████▊ | 326717/371472 [3:49:44<3:34:17, 3.48it/s] 88%|████████▊ | 326718/371472 [3:49:44<3:21:31, 3.70it/s] 88%|████████▊ | 326719/371472 [3:49:45<3:24:53, 3.64it/s] 88%|████████▊ | 326720/371472 [3:49:45<3:48:32, 3.26it/s] {'loss': 2.6542, 'learning_rate': 2.084832683315736e-07, 'epoch': 14.07} + 88%|████████▊ | 326720/371472 [3:49:45<3:48:32, 3.26it/s] 88%|████████▊ | 326721/371472 [3:49:45<3:38:11, 3.42it/s] 88%|████████▊ | 326722/371472 [3:49:46<3:41:21, 3.37it/s] 88%|████████▊ | 326723/371472 [3:49:46<3:30:46, 3.54it/s] 88%|████████▊ | 326724/371472 [3:49:46<3:29:10, 3.57it/s] 88%|████████▊ | 326725/371472 [3:49:46<3:31:47, 3.52it/s] 88%|████████▊ | 326726/371472 [3:49:47<3:32:36, 3.51it/s] 88%|████████▊ | 326727/371472 [3:49:47<3:37:06, 3.43it/s] 88%|████████▊ | 326728/371472 [3:49:47<3:46:09, 3.30it/s] 88%|████████▊ | 326729/371472 [3:49:48<3:37:02, 3.44it/s] 88%|████████▊ | 326730/371472 [3:49:48<3:28:13, 3.58it/s] 88%|████████▊ | 326731/371472 [3:49:48<3:26:21, 3.61it/s] 88%|████████▊ | 326732/371472 [3:49:48<3:21:45, 3.70it/s] 88%|████████▊ | 326733/371472 [3:49:49<3:26:51, 3.60it/s] 88%|████████▊ | 326734/371472 [3:49:49<3:33:19, 3.50it/s] 88%|████████▊ | 326735/371472 [3:49:49<3:30:19, 3.55it/s] 88%|████████▊ | 326736/371472 [3:49:50<3:40:24, 3.38it/s] 88%|████████▊ | 326737/371472 [3:49:50<3:35:19, 3.46it/s] 88%|████████▊ | 326738/371472 [3:49:50<3:35:31, 3.46it/s] 88%|████████▊ | 326739/371472 [3:49:50<3:29:53, 3.55it/s] 88%|████████▊ | 326740/371472 [3:49:51<3:29:26, 3.56it/s] {'loss': 2.552, 'learning_rate': 2.0843478635609468e-07, 'epoch': 14.07} + 88%|████████▊ | 326740/371472 [3:49:51<3:29:26, 3.56it/s] 88%|████████▊ | 326741/371472 [3:49:51<3:24:14, 3.65it/s] 88%|████████▊ | 326742/371472 [3:49:51<3:23:36, 3.66it/s] 88%|████████▊ | 326743/371472 [3:49:51<3:21:39, 3.70it/s] 88%|████████▊ | 326744/371472 [3:49:52<3:30:49, 3.54it/s] 88%|████████▊ | 326745/371472 [3:49:52<3:22:47, 3.68it/s] 88%|████████▊ | 326746/371472 [3:49:52<3:19:47, 3.73it/s] 88%|████████▊ | 326747/371472 [3:49:53<3:16:14, 3.80it/s] 88%|████████▊ | 326748/371472 [3:49:53<3:18:49, 3.75it/s] 88%|████████▊ | 326749/371472 [3:49:53<3:27:04, 3.60it/s] 88%|████████▊ | 326750/371472 [3:49:53<3:27:21, 3.59it/s] 88%|████████▊ | 326751/371472 [3:49:54<3:41:27, 3.37it/s] 88%|████████▊ | 326752/371472 [3:49:54<3:33:49, 3.49it/s] 88%|████████▊ | 326753/371472 [3:49:54<3:33:20, 3.49it/s] 88%|████████▊ | 326754/371472 [3:49:55<3:37:45, 3.42it/s] 88%|████████▊ | 326755/371472 [3:49:55<3:29:51, 3.55it/s] 88%|████████▊ | 326756/371472 [3:49:55<3:20:19, 3.72it/s] 88%|████████▊ | 326757/371472 [3:49:55<3:20:10, 3.72it/s] 88%|████████▊ | 326758/371472 [3:49:56<3:17:46, 3.77it/s] 88%|████████▊ | 326759/371472 [3:49:56<3:16:50, 3.79it/s] 88%|████████▊ | 326760/371472 [3:49:56<3:25:56, 3.62it/s] {'loss': 2.4334, 'learning_rate': 2.0838630438061583e-07, 'epoch': 14.07} + 88%|████████▊ | 326760/371472 [3:49:56<3:25:56, 3.62it/s] 88%|████████▊ | 326761/371472 [3:49:57<3:40:11, 3.38it/s] 88%|████████▊ | 326762/371472 [3:49:57<3:38:20, 3.41it/s] 88%|████████▊ | 326763/371472 [3:49:57<3:34:10, 3.48it/s] 88%|████████▊ | 326764/371472 [3:49:57<3:30:38, 3.54it/s] 88%|████████▊ | 326765/371472 [3:49:58<3:20:22, 3.72it/s] 88%|████████▊ | 326766/371472 [3:49:58<3:23:56, 3.65it/s] 88%|████████▊ | 326767/371472 [3:49:58<3:21:31, 3.70it/s] 88%|████████▊ | 326768/371472 [3:49:58<3:20:43, 3.71it/s] 88%|████████▊ | 326769/371472 [3:49:59<3:27:53, 3.58it/s] 88%|████████▊ | 326770/371472 [3:49:59<3:17:36, 3.77it/s] 88%|████████▊ | 326771/371472 [3:49:59<3:20:35, 3.71it/s] 88%|████████▊ | 326772/371472 [3:49:59<3:18:24, 3.75it/s] 88%|████████▊ | 326773/371472 [3:50:00<3:13:40, 3.85it/s] 88%|████████▊ | 326774/371472 [3:50:00<3:14:56, 3.82it/s] 88%|████████▊ | 326775/371472 [3:50:00<3:14:18, 3.83it/s] 88%|████████▊ | 326776/371472 [3:50:00<3:13:03, 3.86it/s] 88%|████████▊ | 326777/371472 [3:50:01<3:09:24, 3.93it/s] 88%|████████▊ | 326778/371472 [3:50:01<3:07:44, 3.97it/s] 88%|████████▊ | 326779/371472 [3:50:01<3:08:07, 3.96it/s] 88%|████████▊ | 326780/371472 [3:50:01<3:07:50, 3.97it/s] {'loss': 2.6701, 'learning_rate': 2.0833782240513688e-07, 'epoch': 14.08} + 88%|████████▊ | 326780/371472 [3:50:01<3:07:50, 3.97it/s] 88%|████████▊ | 326781/371472 [3:50:02<3:11:33, 3.89it/s] 88%|████████▊ | 326782/371472 [3:50:02<3:09:57, 3.92it/s] 88%|████████▊ | 326783/371472 [3:50:02<3:29:03, 3.56it/s] 88%|████████▊ | 326784/371472 [3:50:03<3:27:46, 3.58it/s] 88%|████████▊ | 326785/371472 [3:50:03<3:28:24, 3.57it/s] 88%|████████▊ | 326786/371472 [3:50:03<3:29:34, 3.55it/s] 88%|████████▊ | 326787/371472 [3:50:03<3:26:56, 3.60it/s] 88%|████████▊ | 326788/371472 [3:50:04<3:21:26, 3.70it/s] 88%|████████▊ | 326789/371472 [3:50:04<3:27:25, 3.59it/s] 88%|████████▊ | 326790/371472 [3:50:04<3:22:32, 3.68it/s] 88%|████████▊ | 326791/371472 [3:50:05<3:34:38, 3.47it/s] 88%|████████▊ | 326792/371472 [3:50:05<3:33:20, 3.49it/s] 88%|████████▊ | 326793/371472 [3:50:05<3:41:40, 3.36it/s] 88%|████████▊ | 326794/371472 [3:50:05<3:39:47, 3.39it/s] 88%|████████▊ | 326795/371472 [3:50:06<3:39:55, 3.39it/s] 88%|████████▊ | 326796/371472 [3:50:06<3:30:21, 3.54it/s] 88%|████████▊ | 326797/371472 [3:50:06<3:29:12, 3.56it/s] 88%|████████▊ | 326798/371472 [3:50:07<3:24:20, 3.64it/s] 88%|████████▊ | 326799/371472 [3:50:07<3:27:52, 3.58it/s] 88%|████████▊ | 326800/371472 [3:50:07<3:28:07, 3.58it/s] {'loss': 2.5073, 'learning_rate': 2.0828934042965805e-07, 'epoch': 14.08} + 88%|████████▊ | 326800/371472 [3:50:07<3:28:07, 3.58it/s] 88%|████████▊ | 326801/371472 [3:50:07<3:28:00, 3.58it/s] 88%|████████▊ | 326802/371472 [3:50:08<3:22:31, 3.68it/s] 88%|████████▊ | 326803/371472 [3:50:08<3:24:13, 3.65it/s] 88%|████████▊ | 326804/371472 [3:50:08<3:20:34, 3.71it/s] 88%|████████▊ | 326805/371472 [3:50:08<3:16:40, 3.79it/s] 88%|████████▊ | 326806/371472 [3:50:09<3:11:55, 3.88it/s] 88%|████████▊ | 326807/371472 [3:50:09<3:14:35, 3.83it/s] 88%|████████▊ | 326808/371472 [3:50:09<3:13:20, 3.85it/s] 88%|████████▊ | 326809/371472 [3:50:10<3:23:22, 3.66it/s] 88%|████████▊ | 326810/371472 [3:50:10<3:18:33, 3.75it/s] 88%|████████▊ | 326811/371472 [3:50:10<3:24:24, 3.64it/s] 88%|████████▊ | 326812/371472 [3:50:10<3:27:11, 3.59it/s] 88%|████████▊ | 326813/371472 [3:50:11<3:25:10, 3.63it/s] 88%|████████▊ | 326814/371472 [3:50:11<3:22:11, 3.68it/s] 88%|████████▊ | 326815/371472 [3:50:11<3:28:50, 3.56it/s] 88%|████████▊ | 326816/371472 [3:50:12<3:43:29, 3.33it/s] 88%|████████▊ | 326817/371472 [3:50:12<3:43:35, 3.33it/s] 88%|████████▊ | 326818/371472 [3:50:12<3:38:22, 3.41it/s] 88%|████████▊ | 326819/371472 [3:50:12<3:45:33, 3.30it/s] 88%|████████▊ | 326820/371472 [3:50:13<3:40:33, 3.37it/s] {'loss': 2.6875, 'learning_rate': 2.082408584541791e-07, 'epoch': 14.08} + 88%|████████▊ | 326820/371472 [3:50:13<3:40:33, 3.37it/s] 88%|████████▊ | 326821/371472 [3:50:13<3:47:40, 3.27it/s] 88%|████████▊ | 326822/371472 [3:50:13<3:39:13, 3.39it/s] 88%|████████▊ | 326823/371472 [3:50:14<3:34:45, 3.47it/s] 88%|████████▊ | 326824/371472 [3:50:14<3:24:35, 3.64it/s] 88%|████████▊ | 326825/371472 [3:50:14<3:27:03, 3.59it/s] 88%|████████▊ | 326826/371472 [3:50:14<3:29:20, 3.55it/s] 88%|████████▊ | 326827/371472 [3:50:15<3:29:03, 3.56it/s] 88%|████████▊ | 326828/371472 [3:50:15<3:20:55, 3.70it/s] 88%|████████▊ | 326829/371472 [3:50:15<3:28:34, 3.57it/s] 88%|████████▊ | 326830/371472 [3:50:16<3:21:59, 3.68it/s] 88%|████████▊ | 326831/371472 [3:50:16<3:16:58, 3.78it/s] 88%|████████▊ | 326832/371472 [3:50:16<3:12:47, 3.86it/s] 88%|████████▊ | 326833/371472 [3:50:16<3:18:55, 3.74it/s] 88%|████████▊ | 326834/371472 [3:50:17<3:17:10, 3.77it/s] 88%|████████▊ | 326835/371472 [3:50:17<3:13:15, 3.85it/s] 88%|████████▊ | 326836/371472 [3:50:17<3:11:50, 3.88it/s] 88%|████████▊ | 326837/371472 [3:50:17<3:11:26, 3.89it/s] 88%|████████▊ | 326838/371472 [3:50:18<3:12:08, 3.87it/s] 88%|████████▊ | 326839/371472 [3:50:18<3:28:14, 3.57it/s] 88%|████████▊ | 326840/371472 [3:50:18<3:27:20, 3.59it/s] {'loss': 2.6327, 'learning_rate': 2.0819237647870025e-07, 'epoch': 14.08} + 88%|████████▊ | 326840/371472 [3:50:18<3:27:20, 3.59it/s] 88%|████████▊ | 326841/371472 [3:50:18<3:25:00, 3.63it/s] 88%|████████▊ | 326842/371472 [3:50:19<3:29:16, 3.55it/s] 88%|████████▊ | 326843/371472 [3:50:19<3:25:39, 3.62it/s] 88%|████████▊ | 326844/371472 [3:50:19<3:20:00, 3.72it/s] 88%|████████▊ | 326845/371472 [3:50:20<3:20:50, 3.70it/s] 88%|████████▊ | 326846/371472 [3:50:20<3:24:49, 3.63it/s] 88%|████████▊ | 326847/371472 [3:50:20<3:33:48, 3.48it/s] 88%|████████▊ | 326848/371472 [3:50:20<3:35:10, 3.46it/s] 88%|████████▊ | 326849/371472 [3:50:21<4:04:54, 3.04it/s] 88%|████████▊ | 326850/371472 [3:50:21<3:46:10, 3.29it/s] 88%|████████▊ | 326851/371472 [3:50:21<3:39:33, 3.39it/s] 88%|████████▊ | 326852/371472 [3:50:22<3:43:00, 3.33it/s] 88%|████████▊ | 326853/371472 [3:50:22<3:36:57, 3.43it/s] 88%|████████▊ | 326854/371472 [3:50:22<3:31:47, 3.51it/s] 88%|████████▊ | 326855/371472 [3:50:22<3:25:11, 3.62it/s] 88%|████████▊ | 326856/371472 [3:50:23<3:26:19, 3.60it/s] 88%|████████▊ | 326857/371472 [3:50:23<3:23:11, 3.66it/s] 88%|████████▊ | 326858/371472 [3:50:23<3:54:27, 3.17it/s] 88%|████████▊ | 326859/371472 [3:50:24<3:46:39, 3.28it/s] 88%|████████▊ | 326860/371472 [3:50:24<3:36:37, 3.43it/s] {'loss': 2.5871, 'learning_rate': 2.0814389450322132e-07, 'epoch': 14.08} + 88%|████████▊ | 326860/371472 [3:50:24<3:36:37, 3.43it/s] 88%|████████▊ | 326861/371472 [3:50:24<3:29:15, 3.55it/s] 88%|████████▊ | 326862/371472 [3:50:25<3:27:20, 3.59it/s] 88%|████████▊ | 326863/371472 [3:50:25<3:24:51, 3.63it/s] 88%|████████▊ | 326864/371472 [3:50:25<3:37:13, 3.42it/s] 88%|████████▊ | 326865/371472 [3:50:25<3:33:49, 3.48it/s] 88%|████████▊ | 326866/371472 [3:50:26<3:29:20, 3.55it/s] 88%|████████▊ | 326867/371472 [3:50:26<3:26:55, 3.59it/s] 88%|████████▊ | 326868/371472 [3:50:26<3:27:37, 3.58it/s] 88%|████████▊ | 326869/371472 [3:50:27<3:36:53, 3.43it/s] 88%|████████▊ | 326870/371472 [3:50:27<3:30:04, 3.54it/s] 88%|████████▊ | 326871/371472 [3:50:27<3:26:51, 3.59it/s] 88%|████████▊ | 326872/371472 [3:50:27<3:23:28, 3.65it/s] 88%|████████▊ | 326873/371472 [3:50:28<3:21:21, 3.69it/s] 88%|████████▊ | 326874/371472 [3:50:28<3:21:13, 3.69it/s] 88%|████████▊ | 326875/371472 [3:50:28<3:12:43, 3.86it/s] 88%|████████▊ | 326876/371472 [3:50:28<3:15:55, 3.79it/s] 88%|████████▊ | 326877/371472 [3:50:29<3:11:10, 3.89it/s] 88%|████████▊ | 326878/371472 [3:50:29<3:09:03, 3.93it/s] 88%|████████▊ | 326879/371472 [3:50:29<3:08:47, 3.94it/s] 88%|████████▊ | 326880/371472 [3:50:29<3:36:59, 3.42it/s] {'loss': 2.5354, 'learning_rate': 2.0809541252774245e-07, 'epoch': 14.08} + 88%|████████▊ | 326880/371472 [3:50:29<3:36:59, 3.42it/s] 88%|████████▊ | 326881/371472 [3:50:30<3:35:49, 3.44it/s] 88%|████████▊ | 326882/371472 [3:50:30<3:29:19, 3.55it/s] 88%|████████▊ | 326883/371472 [3:50:30<3:25:07, 3.62it/s] 88%|████████▊ | 326884/371472 [3:50:31<3:21:24, 3.69it/s] 88%|████████▊ | 326885/371472 [3:50:31<3:15:53, 3.79it/s] 88%|████████▊ | 326886/371472 [3:50:31<3:17:47, 3.76it/s] 88%|████████▊ | 326887/371472 [3:50:31<3:22:57, 3.66it/s] 88%|████████▊ | 326888/371472 [3:50:32<3:22:36, 3.67it/s] 88%|████████▊ | 326889/371472 [3:50:32<3:24:03, 3.64it/s] 88%|████████▊ | 326890/371472 [3:50:32<3:22:26, 3.67it/s] 88%|████████▊ | 326891/371472 [3:50:32<3:25:16, 3.62it/s] 88%|████████▊ | 326892/371472 [3:50:33<3:23:18, 3.65it/s] 88%|████████▊ | 326893/371472 [3:50:33<3:21:31, 3.69it/s] 88%|████████▊ | 326894/371472 [3:50:33<3:46:32, 3.28it/s] 88%|████████▊ | 326895/371472 [3:50:34<3:40:49, 3.36it/s] 88%|████████▊ | 326896/371472 [3:50:34<3:40:35, 3.37it/s] 88%|████████▊ | 326897/371472 [3:50:34<3:56:31, 3.14it/s] 88%|████████▊ | 326898/371472 [3:50:35<3:44:25, 3.31it/s] 88%|████████▊ | 326899/371472 [3:50:35<3:41:02, 3.36it/s] 88%|████████▊ | 326900/371472 [3:50:35<3:43:29, 3.32it/s] {'loss': 2.6025, 'learning_rate': 2.0804693055226352e-07, 'epoch': 14.08} + 88%|████████▊ | 326900/371472 [3:50:35<3:43:29, 3.32it/s] 88%|████████▊ | 326901/371472 [3:50:35<3:36:26, 3.43it/s] 88%|████████▊ | 326902/371472 [3:50:36<3:25:39, 3.61it/s] 88%|████████▊ | 326903/371472 [3:50:36<3:41:12, 3.36it/s] 88%|████████▊ | 326904/371472 [3:50:36<3:32:19, 3.50it/s] 88%|████████▊ | 326905/371472 [3:50:37<3:22:19, 3.67it/s] 88%|████████▊ | 326906/371472 [3:50:37<3:17:57, 3.75it/s] 88%|████████▊ | 326907/371472 [3:50:37<3:24:13, 3.64it/s] 88%|████████▊ | 326908/371472 [3:50:37<3:19:37, 3.72it/s] 88%|████████▊ | 326909/371472 [3:50:38<3:16:57, 3.77it/s] 88%|████████▊ | 326910/371472 [3:50:38<3:19:47, 3.72it/s] 88%|████████▊ | 326911/371472 [3:50:38<3:26:23, 3.60it/s] 88%|████████▊ | 326912/371472 [3:50:38<3:28:33, 3.56it/s] 88%|████████▊ | 326913/371472 [3:50:39<3:25:16, 3.62it/s] 88%|████████▊ | 326914/371472 [3:50:39<3:22:09, 3.67it/s] 88%|████████▊ | 326915/371472 [3:50:39<3:15:57, 3.79it/s] 88%|████████▊ | 326916/371472 [3:50:39<3:07:29, 3.96it/s] 88%|████████▊ | 326917/371472 [3:50:40<3:09:18, 3.92it/s] 88%|████████▊ | 326918/371472 [3:50:40<3:21:34, 3.68it/s] 88%|████████▊ | 326919/371472 [3:50:40<3:15:54, 3.79it/s] 88%|████████▊ | 326920/371472 [3:50:41<3:15:15, 3.80it/s] {'loss': 2.602, 'learning_rate': 2.079984485767847e-07, 'epoch': 14.08} + 88%|████████▊ | 326920/371472 [3:50:41<3:15:15, 3.80it/s] 88%|████████▊ | 326921/371472 [3:50:41<3:50:48, 3.22it/s] 88%|████████▊ | 326922/371472 [3:50:41<3:36:37, 3.43it/s] 88%|████████▊ | 326923/371472 [3:50:41<3:33:29, 3.48it/s] 88%|████████▊ | 326924/371472 [3:50:42<3:30:40, 3.52it/s] 88%|████████▊ | 326925/371472 [3:50:42<3:21:26, 3.69it/s] 88%|████████▊ | 326926/371472 [3:50:42<3:28:28, 3.56it/s] 88%|████████▊ | 326927/371472 [3:50:43<3:24:15, 3.63it/s] 88%|████████▊ | 326928/371472 [3:50:43<3:34:02, 3.47it/s] 88%|████████▊ | 326929/371472 [3:50:43<3:48:45, 3.25it/s] 88%|████████▊ | 326930/371472 [3:50:44<3:40:32, 3.37it/s] 88%|████████▊ | 326931/371472 [3:50:44<3:33:52, 3.47it/s] 88%|████████▊ | 326932/371472 [3:50:44<3:21:27, 3.68it/s] 88%|████████▊ | 326933/371472 [3:50:44<3:24:08, 3.64it/s] 88%|████████▊ | 326934/371472 [3:50:45<3:24:18, 3.63it/s] 88%|████████▊ | 326935/371472 [3:50:45<3:28:30, 3.56it/s] 88%|████████▊ | 326936/371472 [3:50:45<3:18:03, 3.75it/s] 88%|████████▊ | 326937/371472 [3:50:45<3:33:59, 3.47it/s] 88%|████████▊ | 326938/371472 [3:50:46<3:46:07, 3.28it/s] 88%|████████▊ | 326939/371472 [3:50:46<3:38:47, 3.39it/s] 88%|████████▊ | 326940/371472 [3:50:46<3:35:47, 3.44it/s] {'loss': 2.6622, 'learning_rate': 2.0794996660130577e-07, 'epoch': 14.08} + 88%|████████▊ | 326940/371472 [3:50:46<3:35:47, 3.44it/s] 88%|████████▊ | 326941/371472 [3:50:47<3:44:32, 3.31it/s] 88%|████████▊ | 326942/371472 [3:50:47<3:37:30, 3.41it/s] 88%|████████▊ | 326943/371472 [3:50:47<3:31:08, 3.51it/s] 88%|████████▊ | 326944/371472 [3:50:47<3:25:34, 3.61it/s] 88%|████████▊ | 326945/371472 [3:50:48<3:20:46, 3.70it/s] 88%|████████▊ | 326946/371472 [3:50:48<3:16:34, 3.78it/s] 88%|████████▊ | 326947/371472 [3:50:48<3:25:16, 3.61it/s] 88%|████████▊ | 326948/371472 [3:50:49<3:32:30, 3.49it/s] 88%|████████▊ | 326949/371472 [3:50:49<3:46:49, 3.27it/s] 88%|████████▊ | 326950/371472 [3:50:49<3:36:16, 3.43it/s] 88%|████████▊ | 326951/371472 [3:50:50<3:47:03, 3.27it/s] 88%|████████▊ | 326952/371472 [3:50:50<3:37:02, 3.42it/s] 88%|████████▊ | 326953/371472 [3:50:50<3:45:48, 3.29it/s] 88%|████████▊ | 326954/371472 [3:50:50<3:35:32, 3.44it/s] 88%|████████▊ | 326955/371472 [3:50:51<3:39:13, 3.38it/s] 88%|████████▊ | 326956/371472 [3:50:51<3:28:25, 3.56it/s] 88%|████████▊ | 326957/371472 [3:50:51<3:25:39, 3.61it/s] 88%|████████▊ | 326958/371472 [3:50:52<3:30:47, 3.52it/s] 88%|████████▊ | 326959/371472 [3:50:52<3:28:42, 3.55it/s] 88%|████████▊ | 326960/371472 [3:50:52<3:20:23, 3.70it/s] {'loss': 2.6982, 'learning_rate': 2.079014846258269e-07, 'epoch': 14.08} + 88%|████████▊ | 326960/371472 [3:50:52<3:20:23, 3.70it/s] 88%|████████▊ | 326961/371472 [3:50:52<3:17:51, 3.75it/s] 88%|████████▊ | 326962/371472 [3:50:53<3:20:19, 3.70it/s] 88%|████████▊ | 326963/371472 [3:50:53<3:24:59, 3.62it/s] 88%|████████▊ | 326964/371472 [3:50:53<3:22:07, 3.67it/s] 88%|████████▊ | 326965/371472 [3:50:53<3:28:54, 3.55it/s] 88%|████████▊ | 326966/371472 [3:50:54<3:32:24, 3.49it/s] 88%|████████▊ | 326967/371472 [3:50:54<3:34:35, 3.46it/s] 88%|████████▊ | 326968/371472 [3:50:54<3:26:29, 3.59it/s] 88%|████████▊ | 326969/371472 [3:50:55<3:21:20, 3.68it/s] 88%|████████▊ | 326970/371472 [3:50:55<3:14:20, 3.82it/s] 88%|████████▊ | 326971/371472 [3:50:55<3:20:52, 3.69it/s] 88%|████████▊ | 326972/371472 [3:50:55<3:23:04, 3.65it/s] 88%|████████▊ | 326973/371472 [3:50:56<3:23:37, 3.64it/s] 88%|████████▊ | 326974/371472 [3:50:56<3:23:57, 3.64it/s] 88%|████████▊ | 326975/371472 [3:50:56<3:27:35, 3.57it/s] 88%|████████▊ | 326976/371472 [3:50:56<3:22:27, 3.66it/s] 88%|████████▊ | 326977/371472 [3:50:57<3:16:04, 3.78it/s] 88%|████████▊ | 326978/371472 [3:50:57<3:16:25, 3.78it/s] 88%|████████▊ | 326979/371472 [3:50:57<3:20:21, 3.70it/s] 88%|████████▊ | 326980/371472 [3:50:57<3:22:22, 3.66it/s] {'loss': 2.5206, 'learning_rate': 2.0785300265034796e-07, 'epoch': 14.08} + 88%|████████▊ | 326980/371472 [3:50:58<3:22:22, 3.66it/s] 88%|████████▊ | 326981/371472 [3:50:58<3:25:28, 3.61it/s] 88%|████████▊ | 326982/371472 [3:50:58<3:19:51, 3.71it/s] 88%|████████▊ | 326983/371472 [3:50:58<3:18:39, 3.73it/s] 88%|████████▊ | 326984/371472 [3:50:59<3:33:03, 3.48it/s] 88%|████████▊ | 326985/371472 [3:50:59<3:38:20, 3.40it/s] 88%|████████▊ | 326986/371472 [3:50:59<3:35:44, 3.44it/s] 88%|████████▊ | 326987/371472 [3:51:00<3:39:33, 3.38it/s] 88%|████████▊ | 326988/371472 [3:51:00<3:37:26, 3.41it/s] 88%|████████▊ | 326989/371472 [3:51:00<3:31:58, 3.50it/s] 88%|████████▊ | 326990/371472 [3:51:00<3:37:39, 3.41it/s] 88%|████████▊ | 326991/371472 [3:51:01<3:32:13, 3.49it/s] 88%|████████▊ | 326992/371472 [3:51:01<3:28:09, 3.56it/s] 88%|████████▊ | 326993/371472 [3:51:01<3:53:06, 3.18it/s] 88%|████████▊ | 326994/371472 [3:51:02<3:39:03, 3.38it/s] 88%|████████▊ | 326995/371472 [3:51:02<3:31:12, 3.51it/s] 88%|████████▊ | 326996/371472 [3:51:02<3:20:24, 3.70it/s] 88%|████████▊ | 326997/371472 [3:51:02<3:18:58, 3.73it/s] 88%|████████▊ | 326998/371472 [3:51:03<3:18:42, 3.73it/s] 88%|████████▊ | 326999/371472 [3:51:03<3:26:46, 3.58it/s] 88%|████████▊ | 327000/371472 [3:51:03<3:19:17, 3.72it/s] {'loss': 2.5749, 'learning_rate': 2.0780452067486914e-07, 'epoch': 14.08} + 88%|████████▊ | 327000/371472 [3:51:03<3:19:17, 3.72it/s] 88%|████████▊ | 327001/371472 [3:51:03<3:17:32, 3.75it/s] 88%|████████▊ | 327002/371472 [3:51:04<3:13:44, 3.83it/s] 88%|████████▊ | 327003/371472 [3:51:04<3:24:36, 3.62it/s] 88%|████████▊ | 327004/371472 [3:51:04<3:25:11, 3.61it/s] 88%|████████▊ | 327005/371472 [3:51:05<3:20:22, 3.70it/s] 88%|████████▊ | 327006/371472 [3:51:05<3:15:52, 3.78it/s] 88%|████████▊ | 327007/371472 [3:51:05<3:19:58, 3.71it/s] 88%|████████▊ | 327008/371472 [3:51:05<3:21:45, 3.67it/s] 88%|████████▊ | 327009/371472 [3:51:06<3:15:18, 3.79it/s] 88%|████████▊ | 327010/371472 [3:51:06<3:14:26, 3.81it/s] 88%|████████▊ | 327011/371472 [3:51:06<3:22:01, 3.67it/s] 88%|████████▊ | 327012/371472 [3:51:06<3:23:50, 3.64it/s] 88%|████████▊ | 327013/371472 [3:51:07<3:22:36, 3.66it/s] 88%|████████▊ | 327014/371472 [3:51:07<3:26:10, 3.59it/s] 88%|████████▊ | 327015/371472 [3:51:07<3:25:11, 3.61it/s] 88%|████████▊ | 327016/371472 [3:51:08<3:26:19, 3.59it/s] 88%|████████▊ | 327017/371472 [3:51:08<3:20:46, 3.69it/s] 88%|████████▊ | 327018/371472 [3:51:08<3:28:28, 3.55it/s] 88%|████████▊ | 327019/371472 [3:51:08<3:19:22, 3.72it/s] 88%|████████▊ | 327020/371472 [3:51:09<3:19:17, 3.72it/s] {'loss': 2.509, 'learning_rate': 2.0775603869939018e-07, 'epoch': 14.09} + 88%|████████▊ | 327020/371472 [3:51:09<3:19:17, 3.72it/s] 88%|████████▊ | 327021/371472 [3:51:09<3:17:37, 3.75it/s] 88%|████████▊ | 327022/371472 [3:51:09<3:17:44, 3.75it/s] 88%|████████▊ | 327023/371472 [3:51:09<3:14:45, 3.80it/s] 88%|████████▊ | 327024/371472 [3:51:10<3:16:26, 3.77it/s] 88%|████████▊ | 327025/371472 [3:51:10<3:29:20, 3.54it/s] 88%|████████▊ | 327026/371472 [3:51:10<3:28:33, 3.55it/s] 88%|████████▊ | 327027/371472 [3:51:11<3:23:03, 3.65it/s] 88%|████████▊ | 327028/371472 [3:51:11<3:16:37, 3.77it/s] 88%|████████▊ | 327029/371472 [3:51:11<3:08:15, 3.93it/s] 88%|████████▊ | 327030/371472 [3:51:11<3:07:53, 3.94it/s] 88%|████████▊ | 327031/371472 [3:51:12<3:15:47, 3.78it/s] 88%|████████▊ | 327032/371472 [3:51:12<3:16:06, 3.78it/s] 88%|████████▊ | 327033/371472 [3:51:12<3:24:59, 3.61it/s] 88%|████████▊ | 327034/371472 [3:51:12<3:15:15, 3.79it/s] 88%|████████▊ | 327035/371472 [3:51:13<3:14:08, 3.81it/s] 88%|████████▊ | 327036/371472 [3:51:13<3:40:29, 3.36it/s] 88%|████████▊ | 327037/371472 [3:51:13<3:32:21, 3.49it/s] 88%|████████▊ | 327038/371472 [3:51:13<3:23:00, 3.65it/s] 88%|████████▊ | 327039/371472 [3:51:14<3:20:59, 3.68it/s] 88%|████████▊ | 327040/371472 [3:51:14<3:33:35, 3.47it/s] {'loss': 2.6565, 'learning_rate': 2.0770755672391134e-07, 'epoch': 14.09} + 88%|████████▊ | 327040/371472 [3:51:14<3:33:35, 3.47it/s] 88%|████████▊ | 327041/371472 [3:51:14<3:35:39, 3.43it/s] 88%|████████▊ | 327042/371472 [3:51:15<3:23:38, 3.64it/s] 88%|████████▊ | 327043/371472 [3:51:15<3:21:10, 3.68it/s] 88%|████████▊ | 327044/371472 [3:51:15<3:33:38, 3.47it/s] 88%|████████▊ | 327045/371472 [3:51:15<3:23:05, 3.65it/s] 88%|████████▊ | 327046/371472 [3:51:16<3:15:58, 3.78it/s] 88%|████████▊ | 327047/371472 [3:51:16<3:15:52, 3.78it/s] 88%|████████▊ | 327048/371472 [3:51:16<3:17:46, 3.74it/s] 88%|████████▊ | 327049/371472 [3:51:17<3:23:40, 3.64it/s] 88%|████████▊ | 327050/371472 [3:51:17<3:23:01, 3.65it/s] 88%|████████▊ | 327051/371472 [3:51:17<3:27:29, 3.57it/s] 88%|████████▊ | 327052/371472 [3:51:17<3:24:34, 3.62it/s] 88%|████████▊ | 327053/371472 [3:51:18<3:29:02, 3.54it/s] 88%|████████▊ | 327054/371472 [3:51:18<3:19:34, 3.71it/s] 88%|████████�� | 327055/371472 [3:51:18<3:26:22, 3.59it/s] 88%|████████▊ | 327056/371472 [3:51:18<3:21:32, 3.67it/s] 88%|████████▊ | 327057/371472 [3:51:19<3:29:31, 3.53it/s] 88%|████████▊ | 327058/371472 [3:51:19<3:20:51, 3.69it/s] 88%|████████▊ | 327059/371472 [3:51:19<3:18:36, 3.73it/s] 88%|████████▊ | 327060/371472 [3:51:20<3:23:01, 3.65it/s] {'loss': 2.4305, 'learning_rate': 2.076590747484324e-07, 'epoch': 14.09} + 88%|████████▊ | 327060/371472 [3:51:20<3:23:01, 3.65it/s] 88%|████████▊ | 327061/371472 [3:51:20<3:17:08, 3.75it/s] 88%|████████▊ | 327062/371472 [3:51:20<3:14:42, 3.80it/s] 88%|████████▊ | 327063/371472 [3:51:20<3:12:58, 3.84it/s] 88%|████████▊ | 327064/371472 [3:51:21<3:10:25, 3.89it/s] 88%|████████▊ | 327065/371472 [3:51:21<3:11:16, 3.87it/s] 88%|████████▊ | 327066/371472 [3:51:21<3:12:07, 3.85it/s] 88%|████████▊ | 327067/371472 [3:51:21<3:15:00, 3.80it/s] 88%|████████▊ | 327068/371472 [3:51:22<3:23:37, 3.63it/s] 88%|████████▊ | 327069/371472 [3:51:22<3:27:47, 3.56it/s] 88%|████████▊ | 327070/371472 [3:51:22<3:33:23, 3.47it/s] 88%|████████▊ | 327071/371472 [3:51:22<3:24:13, 3.62it/s] 88%|████████▊ | 327072/371472 [3:51:23<3:27:07, 3.57it/s] 88%|████████▊ | 327073/371472 [3:51:23<3:27:01, 3.57it/s] 88%|████████▊ | 327074/371472 [3:51:23<3:31:55, 3.49it/s] 88%|████████▊ | 327075/371472 [3:51:24<3:23:33, 3.63it/s] 88%|████████▊ | 327076/371472 [3:51:24<3:17:40, 3.74it/s] 88%|████████▊ | 327077/371472 [3:51:24<3:28:25, 3.55it/s] 88%|████████▊ | 327078/371472 [3:51:24<3:21:07, 3.68it/s] 88%|████████▊ | 327079/371472 [3:51:25<3:21:51, 3.67it/s] 88%|████████▊ | 327080/371472 [3:51:25<3:17:03, 3.75it/s] {'loss': 2.6392, 'learning_rate': 2.0761059277295356e-07, 'epoch': 14.09} + 88%|████████▊ | 327080/371472 [3:51:25<3:17:03, 3.75it/s] 88%|████████▊ | 327081/371472 [3:51:25<3:14:25, 3.81it/s] 88%|████████▊ | 327082/371472 [3:51:25<3:19:32, 3.71it/s] 88%|████████▊ | 327083/371472 [3:51:26<3:22:03, 3.66it/s] 88%|████████▊ | 327084/371472 [3:51:26<3:22:57, 3.64it/s] 88%|████████▊ | 327085/371472 [3:51:26<3:44:04, 3.30it/s] 88%|████████▊ | 327086/371472 [3:51:27<3:36:27, 3.42it/s] 88%|████████▊ | 327087/371472 [3:51:27<3:30:46, 3.51it/s] 88%|████████▊ | 327088/371472 [3:51:27<3:34:11, 3.45it/s] 88%|████████▊ | 327089/371472 [3:51:28<3:40:24, 3.36it/s] 88%|████████▊ | 327090/371472 [3:51:28<3:42:41, 3.32it/s] 88%|████████▊ | 327091/371472 [3:51:28<3:46:25, 3.27it/s] 88%|████████▊ | 327092/371472 [3:51:28<3:38:58, 3.38it/s] 88%|████████▊ | 327093/371472 [3:51:29<3:34:52, 3.44it/s] 88%|████████▊ | 327094/371472 [3:51:29<3:40:23, 3.36it/s] 88%|████████▊ | 327095/371472 [3:51:29<3:31:04, 3.50it/s] 88%|████████▊ | 327096/371472 [3:51:30<3:28:30, 3.55it/s] 88%|████████▊ | 327097/371472 [3:51:30<3:34:21, 3.45it/s] 88%|████████▊ | 327098/371472 [3:51:30<3:52:28, 3.18it/s] 88%|████████▊ | 327099/371472 [3:51:31<3:40:17, 3.36it/s] 88%|████████▊ | 327100/371472 [3:51:31<3:49:44, 3.22it/s] {'loss': 2.5178, 'learning_rate': 2.075621107974746e-07, 'epoch': 14.09} + 88%|████████▊ | 327100/371472 [3:51:31<3:49:44, 3.22it/s] 88%|████████▊ | 327101/371472 [3:51:31<3:43:33, 3.31it/s] 88%|████████▊ | 327102/371472 [3:51:31<3:31:36, 3.49it/s] 88%|████████▊ | 327103/371472 [3:51:32<3:27:00, 3.57it/s] 88%|████████▊ | 327104/371472 [3:51:32<3:24:23, 3.62it/s] 88%|████████▊ | 327105/371472 [3:51:32<3:19:12, 3.71it/s] 88%|████████▊ | 327106/371472 [3:51:32<3:16:40, 3.76it/s] 88%|████████▊ | 327107/371472 [3:51:33<3:27:50, 3.56it/s] 88%|████████▊ | 327108/371472 [3:51:33<3:21:51, 3.66it/s] 88%|████████▊ | 327109/371472 [3:51:33<3:31:24, 3.50it/s] 88%|████████▊ | 327110/371472 [3:51:34<3:25:19, 3.60it/s] 88%|████████▊ | 327111/371472 [3:51:34<3:21:55, 3.66it/s] 88%|████████▊ | 327112/371472 [3:51:34<3:26:05, 3.59it/s] 88%|████████▊ | 327113/371472 [3:51:34<3:27:22, 3.57it/s] 88%|████████▊ | 327114/371472 [3:51:35<3:29:50, 3.52it/s] 88%|████████▊ | 327115/371472 [3:51:35<3:21:11, 3.67it/s] 88%|████████▊ | 327116/371472 [3:51:35<3:21:32, 3.67it/s] 88%|████████▊ | 327117/371472 [3:51:36<3:24:45, 3.61it/s] 88%|████████▊ | 327118/371472 [3:51:36<3:18:50, 3.72it/s] 88%|████████▊ | 327119/371472 [3:51:36<3:28:25, 3.55it/s] 88%|████████▊ | 327120/371472 [3:51:36<3:28:10, 3.55it/s] {'loss': 2.5122, 'learning_rate': 2.0751362882199578e-07, 'epoch': 14.09} + 88%|████████▊ | 327120/371472 [3:51:36<3:28:10, 3.55it/s] 88%|████████▊ | 327121/371472 [3:51:37<3:26:05, 3.59it/s] 88%|████████▊ | 327122/371472 [3:51:37<3:20:29, 3.69it/s] 88%|████████▊ | 327123/371472 [3:51:37<3:29:12, 3.53it/s] 88%|████████▊ | 327124/371472 [3:51:38<3:50:10, 3.21it/s] 88%|████████▊ | 327125/371472 [3:51:38<3:40:39, 3.35it/s] 88%|████████▊ | 327126/371472 [3:51:38<3:27:42, 3.56it/s] 88%|████████▊ | 327127/371472 [3:51:38<3:23:54, 3.62it/s] 88%|████████▊ | 327128/371472 [3:51:39<3:20:22, 3.69it/s] 88%|████████▊ | 327129/371472 [3:51:39<3:19:40, 3.70it/s] 88%|████████▊ | 327130/371472 [3:51:39<3:26:31, 3.58it/s] 88%|████████▊ | 327131/371472 [3:51:39<3:22:44, 3.65it/s] 88%|████████▊ | 327132/371472 [3:51:40<3:19:17, 3.71it/s] 88%|████████▊ | 327133/371472 [3:51:40<3:18:50, 3.72it/s] 88%|████████▊ | 327134/371472 [3:51:40<3:25:28, 3.60it/s] 88%|████████▊ | 327135/371472 [3:51:41<3:25:22, 3.60it/s] 88%|████████▊ | 327136/371472 [3:51:41<3:20:52, 3.68it/s] 88%|████████▊ | 327137/371472 [3:51:41<3:16:50, 3.75it/s] 88%|████████▊ | 327138/371472 [3:51:41<3:38:59, 3.37it/s] 88%|████████▊ | 327139/371472 [3:51:42<3:39:54, 3.36it/s] 88%|████████▊ | 327140/371472 [3:51:42<3:43:34, 3.30it/s] {'loss': 2.574, 'learning_rate': 2.0746514684651683e-07, 'epoch': 14.09} + 88%|████████▊ | 327140/371472 [3:51:42<3:43:34, 3.30it/s] 88%|████████▊ | 327141/371472 [3:51:42<3:41:18, 3.34it/s] 88%|████████▊ | 327142/371472 [3:51:43<3:34:45, 3.44it/s] 88%|████████▊ | 327143/371472 [3:51:43<3:31:56, 3.49it/s] 88%|████████▊ | 327144/371472 [3:51:43<3:39:57, 3.36it/s] 88%|████████▊ | 327145/371472 [3:51:43<3:33:54, 3.45it/s] 88%|████████▊ | 327146/371472 [3:51:44<3:31:03, 3.50it/s] 88%|████████▊ | 327147/371472 [3:51:44<3:31:47, 3.49it/s] 88%|████████▊ | 327148/371472 [3:51:44<3:26:57, 3.57it/s] 88%|████████▊ | 327149/371472 [3:51:45<3:17:42, 3.74it/s] 88%|████████▊ | 327150/371472 [3:51:45<3:13:27, 3.82it/s] 88%|████████▊ | 327151/371472 [3:51:45<3:11:09, 3.86it/s] 88%|████████▊ | 327152/371472 [3:51:46<4:09:01, 2.97it/s] 88%|████████▊ | 327153/371472 [3:51:46<3:52:59, 3.17it/s] 88%|████████▊ | 327154/371472 [3:51:46<3:42:49, 3.31it/s] 88%|████████▊ | 327155/371472 [3:51:46<3:30:02, 3.52it/s] 88%|████████▊ | 327156/371472 [3:51:47<3:31:42, 3.49it/s] 88%|████████▊ | 327157/371472 [3:51:47<3:33:00, 3.47it/s] 88%|████████▊ | 327158/371472 [3:51:47<3:24:30, 3.61it/s] 88%|████████▊ | 327159/371472 [3:51:48<3:36:19, 3.41it/s] 88%|████████▊ | 327160/371472 [3:51:48<3:27:42, 3.56it/s] {'loss': 2.643, 'learning_rate': 2.0741666487103798e-07, 'epoch': 14.09} + 88%|████████▊ | 327160/371472 [3:51:48<3:27:42, 3.56it/s] 88%|████████▊ | 327161/371472 [3:51:48<3:20:27, 3.68it/s] 88%|████████▊ | 327162/371472 [3:51:48<3:19:01, 3.71it/s] 88%|████████▊ | 327163/371472 [3:51:49<3:12:20, 3.84it/s] 88%|████████▊ | 327164/371472 [3:51:49<3:13:18, 3.82it/s] 88%|████████▊ | 327165/371472 [3:51:49<3:16:16, 3.76it/s] 88%|████████▊ | 327166/371472 [3:51:49<3:12:27, 3.84it/s] 88%|████████▊ | 327167/371472 [3:51:50<3:31:34, 3.49it/s] 88%|████████▊ | 327168/371472 [3:51:50<3:19:28, 3.70it/s] 88%|████████▊ | 327169/371472 [3:51:50<3:50:59, 3.20it/s] 88%|█████���██▊ | 327170/371472 [3:51:51<4:21:05, 2.83it/s] 88%|████████▊ | 327171/371472 [3:51:51<4:16:31, 2.88it/s] 88%|████████▊ | 327172/371472 [3:51:51<4:04:43, 3.02it/s] 88%|████████▊ | 327173/371472 [3:51:52<3:54:56, 3.14it/s] 88%|████████▊ | 327174/371472 [3:51:52<3:48:06, 3.24it/s] 88%|████████▊ | 327175/371472 [3:51:52<3:36:56, 3.40it/s] 88%|████████▊ | 327176/371472 [3:51:52<3:33:54, 3.45it/s] 88%|████████▊ | 327177/371472 [3:51:53<3:39:51, 3.36it/s] 88%|████████▊ | 327178/371472 [3:51:53<3:36:17, 3.41it/s] 88%|████████▊ | 327179/371472 [3:51:53<3:36:46, 3.41it/s] 88%|████████▊ | 327180/371472 [3:51:54<3:29:07, 3.53it/s] {'loss': 2.5705, 'learning_rate': 2.0736818289555905e-07, 'epoch': 14.09} + 88%|████████▊ | 327180/371472 [3:51:54<3:29:07, 3.53it/s] 88%|████████▊ | 327181/371472 [3:51:54<3:19:47, 3.69it/s] 88%|████████▊ | 327182/371472 [3:51:54<3:21:47, 3.66it/s] 88%|████████▊ | 327183/371472 [3:51:54<3:19:36, 3.70it/s] 88%|████████▊ | 327184/371472 [3:51:55<3:15:59, 3.77it/s] 88%|████████▊ | 327185/371472 [3:51:55<3:17:10, 3.74it/s] 88%|████████▊ | 327186/371472 [3:51:55<3:15:28, 3.78it/s] 88%|████████▊ | 327187/371472 [3:51:55<3:12:06, 3.84it/s] 88%|████████▊ | 327188/371472 [3:51:56<3:20:51, 3.67it/s] 88%|████████▊ | 327189/371472 [3:51:56<3:27:06, 3.56it/s] 88%|████████▊ | 327190/371472 [3:51:56<3:46:15, 3.26it/s] 88%|████████▊ | 327191/371472 [3:51:57<3:39:12, 3.37it/s] 88%|████████▊ | 327192/371472 [3:51:57<3:37:37, 3.39it/s] 88%|████████▊ | 327193/371472 [3:51:57<3:54:32, 3.15it/s] 88%|████████▊ | 327194/371472 [3:51:58<3:39:22, 3.36it/s] 88%|████████▊ | 327195/371472 [3:51:58<3:31:11, 3.49it/s] 88%|████████▊ | 327196/371472 [3:51:58<3:24:55, 3.60it/s] 88%|████████▊ | 327197/371472 [3:51:58<3:43:34, 3.30it/s] 88%|████████▊ | 327198/371472 [3:51:59<3:41:06, 3.34it/s] 88%|████████▊ | 327199/371472 [3:51:59<3:28:22, 3.54it/s] 88%|████████▊ | 327200/371472 [3:51:59<3:35:12, 3.43it/s] {'loss': 2.5766, 'learning_rate': 2.073197009200801e-07, 'epoch': 14.09} + 88%|████████▊ | 327200/371472 [3:51:59<3:35:12, 3.43it/s] 88%|████████▊ | 327201/371472 [3:52:00<3:25:10, 3.60it/s] 88%|████████▊ | 327202/371472 [3:52:00<3:22:07, 3.65it/s] 88%|████████▊ | 327203/371472 [3:52:00<3:21:50, 3.66it/s] 88%|████████▊ | 327204/371472 [3:52:00<3:16:57, 3.75it/s] 88%|████████▊ | 327205/371472 [3:52:01<3:15:17, 3.78it/s] 88%|████████▊ | 327206/371472 [3:52:01<3:19:17, 3.70it/s] 88%|████████▊ | 327207/371472 [3:52:01<3:13:17, 3.82it/s] 88%|████████▊ | 327208/371472 [3:52:01<3:23:28, 3.63it/s] 88%|████████▊ | 327209/371472 [3:52:02<3:21:04, 3.67it/s] 88%|████████▊ | 327210/371472 [3:52:02<3:26:32, 3.57it/s] 88%|████████▊ | 327211/371472 [3:52:02<3:30:35, 3.50it/s] 88%|████████▊ | 327212/371472 [3:52:03<3:27:17, 3.56it/s] 88%|████████▊ | 327213/371472 [3:52:03<3:40:42, 3.34it/s] 88%|████████▊ | 327214/371472 [3:52:03<3:31:58, 3.48it/s] 88%|████████▊ | 327215/371472 [3:52:03<3:23:23, 3.63it/s] 88%|████████▊ | 327216/371472 [3:52:04<3:25:57, 3.58it/s] 88%|████████▊ | 327217/371472 [3:52:04<3:24:37, 3.60it/s] 88%|████████▊ | 327218/371472 [3:52:04<3:18:30, 3.72it/s] 88%|████████▊ | 327219/371472 [3:52:05<3:16:01, 3.76it/s] 88%|████████▊ | 327220/371472 [3:52:05<3:12:35, 3.83it/s] {'loss': 2.6371, 'learning_rate': 2.0727121894460124e-07, 'epoch': 14.09} + 88%|████████▊ | 327220/371472 [3:52:05<3:12:35, 3.83it/s] 88%|████████▊ | 327221/371472 [3:52:05<3:16:59, 3.74it/s] 88%|████████▊ | 327222/371472 [3:52:05<3:21:47, 3.65it/s] 88%|████████▊ | 327223/371472 [3:52:06<3:27:03, 3.56it/s] 88%|████████▊ | 327224/371472 [3:52:06<3:22:19, 3.65it/s] 88%|████████▊ | 327225/371472 [3:52:06<3:26:40, 3.57it/s] 88%|████████▊ | 327226/371472 [3:52:06<3:34:11, 3.44it/s] 88%|████████▊ | 327227/371472 [3:52:07<3:29:30, 3.52it/s] 88%|████████▊ | 327228/371472 [3:52:07<3:26:11, 3.58it/s] 88%|████████▊ | 327229/371472 [3:52:07<3:21:40, 3.66it/s] 88%|████████▊ | 327230/371472 [3:52:08<3:29:12, 3.52it/s] 88%|████████▊ | 327231/371472 [3:52:08<3:37:20, 3.39it/s] 88%|████████▊ | 327232/371472 [3:52:08<3:24:08, 3.61it/s] 88%|████████▊ | 327233/371472 [3:52:08<3:31:33, 3.49it/s] 88%|████████▊ | 327234/371472 [3:52:09<3:26:19, 3.57it/s] 88%|████████▊ | 327235/371472 [3:52:09<3:22:42, 3.64it/s] 88%|████████▊ | 327236/371472 [3:52:09<3:19:50, 3.69it/s] 88%|████████▊ | 327237/371472 [3:52:09<3:10:41, 3.87it/s] 88%|████████▊ | 327238/371472 [3:52:10<3:06:12, 3.96it/s] 88%|████████▊ | 327239/371472 [3:52:10<3:02:04, 4.05it/s] 88%|████████▊ | 327240/371472 [3:52:10<3:03:51, 4.01it/s] {'loss': 2.6091, 'learning_rate': 2.0722273696912231e-07, 'epoch': 14.09} + 88%|████████▊ | 327240/371472 [3:52:10<3:03:51, 4.01it/s] 88%|████████▊ | 327241/371472 [3:52:11<3:22:01, 3.65it/s] 88%|████████▊ | 327242/371472 [3:52:11<3:18:27, 3.71it/s] 88%|████████▊ | 327243/371472 [3:52:11<3:28:12, 3.54it/s] 88%|████████▊ | 327244/371472 [3:52:11<3:28:21, 3.54it/s] 88%|████████▊ | 327245/371472 [3:52:12<3:24:45, 3.60it/s] 88%|████████▊ | 327246/371472 [3:52:12<3:24:07, 3.61it/s] 88%|████████▊ | 327247/371472 [3:52:12<3:15:51, 3.76it/s] 88%|████████▊ | 327248/371472 [3:52:12<3:14:10, 3.80it/s] 88%|████████▊ | 327249/371472 [3:52:13<3:13:52, 3.80it/s] 88%|████████▊ | 327250/371472 [3:52:13<3:17:13, 3.74it/s] 88%|████████▊ | 327251/371472 [3:52:13<3:18:51, 3.71it/s] 88%|████████▊ | 327252/371472 [3:52:14<3:16:29, 3.75it/s] 88%|████████▊ | 327253/371472 [3:52:14<3:14:25, 3.79it/s] 88%|████████▊ | 327254/371472 [3:52:14<3:17:03, 3.74it/s] 88%|████████▊ | 327255/371472 [3:52:15<3:58:48, 3.09it/s] 88%|████████▊ | 327256/371472 [3:52:15<3:45:45, 3.26it/s] 88%|████████▊ | 327257/371472 [3:52:15<3:41:20, 3.33it/s] 88%|████████▊ | 327258/371472 [3:52:15<3:34:29, 3.44it/s] 88%|████████▊ | 327259/371472 [3:52:16<3:30:22, 3.50it/s] 88%|████████▊ | 327260/371472 [3:52:16<3:27:05, 3.56it/s] {'loss': 2.6952, 'learning_rate': 2.0717425499364347e-07, 'epoch': 14.1} + 88%|████████▊ | 327260/371472 [3:52:16<3:27:05, 3.56it/s] 88%|████████▊ | 327261/371472 [3:52:16<3:27:18, 3.55it/s] 88%|████████▊ | 327262/371472 [3:52:16<3:19:11, 3.70it/s] 88%|████████▊ | 327263/371472 [3:52:17<3:25:29, 3.59it/s] 88%|████████▊ | 327264/371472 [3:52:17<3:21:33, 3.66it/s] 88%|████████▊ | 327265/371472 [3:52:17<3:18:31, 3.71it/s] 88%|████████▊ | 327266/371472 [3:52:17<3:16:49, 3.74it/s] 88%|████████▊ | 327267/371472 [3:52:18<3:14:22, 3.79it/s] 88%|████████▊ | 327268/371472 [3:52:18<3:12:38, 3.82it/s] 88%|████████▊ | 327269/371472 [3:52:18<3:24:47, 3.60it/s] 88%|████████▊ | 327270/371472 [3:52:19<3:19:05, 3.70it/s] 88%|████████▊ | 327271/371472 [3:52:19<3:18:02, 3.72it/s] 88%|████████▊ | 327272/371472 [3:52:19<3:35:57, 3.41it/s] 88%|████████▊ | 327273/371472 [3:52:19<3:33:55, 3.44it/s] 88%|████████▊ | 327274/371472 [3:52:20<3:29:36, 3.51it/s] 88%|████████▊ | 327275/371472 [3:52:20<3:36:01, 3.41it/s] 88%|████████▊ | 327276/371472 [3:52:20<3:25:55, 3.58it/s] 88%|████████▊ | 327277/371472 [3:52:21<3:15:20, 3.77it/s] 88%|████████▊ | 327278/371472 [3:52:21<3:19:32, 3.69it/s] 88%|████████▊ | 327279/371472 [3:52:21<3:34:52, 3.43it/s] 88%|████████▊ | 327280/371472 [3:52:21<3:28:02, 3.54it/s] {'loss': 2.5807, 'learning_rate': 2.071257730181645e-07, 'epoch': 14.1} + 88%|████████▊ | 327280/371472 [3:52:21<3:28:02, 3.54it/s] 88%|████████▊ | 327281/371472 [3:52:22<3:31:08, 3.49it/s] 88%|████████▊ | 327282/371472 [3:52:22<3:36:01, 3.41it/s] 88%|████████▊ | 327283/371472 [3:52:22<3:28:33, 3.53it/s] 88%|████████▊ | 327284/371472 [3:52:23<3:23:03, 3.63it/s] 88%|████████▊ | 327285/371472 [3:52:23<3:18:53, 3.70it/s] 88%|████████▊ | 327286/371472 [3:52:23<3:20:00, 3.68it/s] 88%|████████▊ | 327287/371472 [3:52:23<3:19:21, 3.69it/s] 88%|████████▊ | 327288/371472 [3:52:24<3:24:39, 3.60it/s] 88%|████████▊ | 327289/371472 [3:52:24<3:44:01, 3.29it/s] 88%|████████▊ | 327290/371472 [3:52:24<3:34:52, 3.43it/s] 88%|████████▊ | 327291/371472 [3:52:25<3:36:00, 3.41it/s] 88%|████████▊ | 327292/371472 [3:52:25<3:34:44, 3.43it/s] 88%|████████▊ | 327293/371472 [3:52:25<3:28:20, 3.53it/s] 88%|████████▊ | 327294/371472 [3:52:25<3:31:30, 3.48it/s] 88%|████████▊ | 327295/371472 [3:52:26<3:38:13, 3.37it/s] 88%|████████▊ | 327296/371472 [3:52:26<3:35:18, 3.42it/s] 88%|████████▊ | 327297/371472 [3:52:26<3:39:22, 3.36it/s] 88%|████████▊ | 327298/371472 [3:52:27<3:35:14, 3.42it/s] 88%|████████▊ | 327299/371472 [3:52:27<3:28:19, 3.53it/s] 88%|████████▊ | 327300/371472 [3:52:27<3:20:20, 3.67it/s] {'loss': 2.6771, 'learning_rate': 2.070772910426857e-07, 'epoch': 14.1} + 88%|████████▊ | 327300/371472 [3:52:27<3:20:20, 3.67it/s] 88%|████████▊ | 327301/371472 [3:52:27<3:17:00, 3.74it/s] 88%|████████▊ | 327302/371472 [3:52:28<3:11:31, 3.84it/s] 88%|████████▊ | 327303/371472 [3:52:28<3:11:05, 3.85it/s] 88%|████████▊ | 327304/371472 [3:52:28<3:11:11, 3.85it/s] 88%|████████▊ | 327305/371472 [3:52:28<3:27:02, 3.56it/s] 88%|████████▊ | 327306/371472 [3:52:29<3:25:39, 3.58it/s] 88%|████████▊ | 327307/371472 [3:52:29<3:35:08, 3.42it/s] 88%|████████▊ | 327308/371472 [3:52:29<3:41:45, 3.32it/s] 88%|████████▊ | 327309/371472 [3:52:30<3:34:01, 3.44it/s] 88%|████████▊ | 327310/371472 [3:52:30<3:31:07, 3.49it/s] 88%|████████▊ | 327311/371472 [3:52:30<3:21:54, 3.65it/s] 88%|████████▊ | 327312/371472 [3:52:30<3:20:36, 3.67it/s] 88%|████████▊ | 327313/371472 [3:52:31<3:18:44, 3.70it/s] 88%|████████▊ | 327314/371472 [3:52:31<3:16:46, 3.74it/s] 88%|████████▊ | 327315/371472 [3:52:31<3:13:55, 3.79it/s] 88%|████████▊ | 327316/371472 [3:52:31<3:14:52, 3.78it/s] 88%|████████▊ | 327317/371472 [3:52:32<3:13:19, 3.81it/s] 88%|████████▊ | 327318/371472 [3:52:32<3:21:11, 3.66it/s] 88%|████████▊ | 327319/371472 [3:52:32<3:20:14, 3.68it/s] 88%|████████▊ | 327320/371472 [3:52:33<3:16:37, 3.74it/s] {'loss': 2.5527, 'learning_rate': 2.0702880906720676e-07, 'epoch': 14.1} + 88%|████████▊ | 327320/371472 [3:52:33<3:16:37, 3.74it/s] 88%|████████▊ | 327321/371472 [3:52:33<3:16:05, 3.75it/s] 88%|████████▊ | 327322/371472 [3:52:33<3:17:40, 3.72it/s] 88%|████████▊ | 327323/371472 [3:52:33<3:22:38, 3.63it/s] 88%|████████▊ | 327324/371472 [3:52:34<3:23:46, 3.61it/s] 88%|████████▊ | 327325/371472 [3:52:34<3:18:48, 3.70it/s] 88%|████████▊ | 327326/371472 [3:52:34<3:30:39, 3.49it/s] 88%|████████▊ | 327327/371472 [3:52:35<3:25:18, 3.58it/s] 88%|████████▊ | 327328/371472 [3:52:35<3:18:23, 3.71it/s] 88%|████████▊ | 327329/371472 [3:52:35<3:15:04, 3.77it/s] 88%|████████▊ | 327330/371472 [3:52:35<3:15:37, 3.76it/s] 88%|████████▊ | 327331/371472 [3:52:36<3:21:43, 3.65it/s] 88%|████████▊ | 327332/371472 [3:52:36<3:19:20, 3.69it/s] 88%|████████▊ | 327333/371472 [3:52:36<3:20:01, 3.68it/s] 88%|████████▊ | 327334/371472 [3:52:36<3:30:40, 3.49it/s] 88%|████████▊ | 327335/371472 [3:52:37<3:37:16, 3.39it/s] 88%|████████▊ | 327336/371472 [3:52:37<3:27:44, 3.54it/s] 88%|████████▊ | 327337/371472 [3:52:37<3:22:40, 3.63it/s] 88%|████████▊ | 327338/371472 [3:52:38<3:35:08, 3.42it/s] 88%|████████▊ | 327339/371472 [3:52:38<3:48:32, 3.22it/s] 88%|████████▊ | 327340/371472 [3:52:38<4:05:08, 3.00it/s] {'loss': 2.5488, 'learning_rate': 2.0698032709172788e-07, 'epoch': 14.1} + 88%|████████▊ | 327340/371472 [3:52:38<4:05:08, 3.00it/s] 88%|████████▊ | 327341/371472 [3:52:39<3:48:24, 3.22it/s] 88%|████████▊ | 327342/371472 [3:52:39<3:37:00, 3.39it/s] 88%|████████▊ | 327343/371472 [3:52:39<3:37:58, 3.37it/s] 88%|████████▊ | 327344/371472 [3:52:39<3:31:10, 3.48it/s] 88%|████████▊ | 327345/371472 [3:52:40<3:31:46, 3.47it/s] 88%|████████▊ | 327346/371472 [3:52:40<3:23:23, 3.62it/s] 88%|████████▊ | 327347/371472 [3:52:40<3:21:34, 3.65it/s] 88%|████████▊ | 327348/371472 [3:52:40<3:22:53, 3.62it/s] 88%|████████▊ | 327349/371472 [3:52:41<3:26:08, 3.57it/s] 88%|████████▊ | 327350/371472 [3:52:41<3:34:20, 3.43it/s] 88%|████████▊ | 327351/371472 [3:52:41<3:52:43, 3.16it/s] 88%|████████▊ | 327352/371472 [3:52:42<3:55:38, 3.12it/s] 88%|████████▊ | 327353/371472 [3:52:42<3:42:52, 3.30it/s] 88%|████████▊ | 327354/371472 [3:52:42<3:46:57, 3.24it/s] 88%|████████▊ | 327355/371472 [3:52:43<3:44:01, 3.28it/s] 88%|████████▊ | 327356/371472 [3:52:43<3:32:14, 3.46it/s] 88%|████████▊ | 327357/371472 [3:52:43<3:26:37, 3.56it/s] 88%|████████▊ | 327358/371472 [3:52:43<3:30:33, 3.49it/s] 88%|████████▊ | 327359/371472 [3:52:44<3:38:15, 3.37it/s] 88%|████████▊ | 327360/371472 [3:52:44<3:33:36, 3.44it/s] {'loss': 2.6285, 'learning_rate': 2.0693184511624896e-07, 'epoch': 14.1} + 88%|████████▊ | 327360/371472 [3:52:44<3:33:36, 3.44it/s] 88%|████████▊ | 327361/371472 [3:52:44<3:37:50, 3.37it/s] 88%|████████▊ | 327362/371472 [3:52:45<3:39:15, 3.35it/s] 88%|████████▊ | 327363/371472 [3:52:45<3:37:43, 3.38it/s] 88%|████████▊ | 327364/371472 [3:52:45<3:31:25, 3.48it/s] 88%|████████▊ | 327365/371472 [3:52:46<3:27:07, 3.55it/s] 88%|████████▊ | 327366/371472 [3:52:46<3:38:51, 3.36it/s] 88%|████████▊ | 327367/371472 [3:52:46<3:34:08, 3.43it/s] 88%|████████▊ | 327368/371472 [3:52:46<3:25:12, 3.58it/s] 88%|████████▊ | 327369/371472 [3:52:47<3:32:39, 3.46it/s] 88%|████████▊ | 327370/371472 [3:52:47<3:36:39, 3.39it/s] 88%|████████▊ | 327371/371472 [3:52:47<3:29:59, 3.50it/s] 88%|████████▊ | 327372/371472 [3:52:48<3:24:53, 3.59it/s] 88%|████████▊ | 327373/371472 [3:52:48<3:53:00, 3.15it/s] 88%|████████▊ | 327374/371472 [3:52:48<3:41:28, 3.32it/s] 88%|████████▊ | 327375/371472 [3:52:49<3:38:35, 3.36it/s] 88%|████████▊ | 327376/371472 [3:52:49<3:32:16, 3.46it/s] 88%|████████▊ | 327377/371472 [3:52:49<3:27:00, 3.55it/s] 88%|████████▊ | 327378/371472 [3:52:49<3:23:28, 3.61it/s] 88%|████████▊ | 327379/371472 [3:52:50<3:31:18, 3.48it/s] 88%|████████▊ | 327380/371472 [3:52:50<3:27:04, 3.55it/s] {'loss': 2.504, 'learning_rate': 2.0688336314077013e-07, 'epoch': 14.1} + 88%|████████▊ | 327380/371472 [3:52:50<3:27:04, 3.55it/s] 88%|████████▊ | 327381/371472 [3:52:50<3:33:49, 3.44it/s] 88%|████████▊ | 327382/371472 [3:52:50<3:24:48, 3.59it/s] 88%|████████▊ | 327383/371472 [3:52:51<3:35:53, 3.40it/s] 88%|████████▊ | 327384/371472 [3:52:51<3:29:57, 3.50it/s] 88%|████████▊ | 327385/371472 [3:52:51<3:36:47, 3.39it/s] 88%|████████▊ | 327386/371472 [3:52:52<3:25:01, 3.58it/s] 88%|████████▊ | 327387/371472 [3:52:52<3:39:35, 3.35it/s] 88%|████████▊ | 327388/371472 [3:52:52<3:44:34, 3.27it/s] 88%|████████▊ | 327389/371472 [3:52:53<3:37:02, 3.39it/s] 88%|████████▊ | 327390/371472 [3:52:53<3:25:41, 3.57it/s] 88%|████████▊ | 327391/371472 [3:52:53<3:42:08, 3.31it/s] 88%|████████▊ | 327392/371472 [3:52:53<3:30:39, 3.49it/s] 88%|████████▊ | 327393/371472 [3:52:54<3:33:06, 3.45it/s] 88%|████████▊ | 327394/371472 [3:52:54<3:33:31, 3.44it/s] 88%|████████▊ | 327395/371472 [3:52:54<3:31:58, 3.47it/s] 88%|████████▊ | 327396/371472 [3:52:55<3:49:20, 3.20it/s] 88%|████████▊ | 327397/371472 [3:52:55<4:09:50, 2.94it/s] 88%|████████▊ | 327398/371472 [3:52:55<3:52:07, 3.16it/s] 88%|████████▊ | 327399/371472 [3:52:56<3:41:05, 3.32it/s] 88%|████████▊ | 327400/371472 [3:52:56<3:40:40, 3.33it/s] {'loss': 2.5926, 'learning_rate': 2.0683488116529118e-07, 'epoch': 14.1} + 88%|████████▊ | 327400/371472 [3:52:56<3:40:40, 3.33it/s] 88%|████████▊ | 327401/371472 [3:52:56<3:42:39, 3.30it/s] 88%|████████▊ | 327402/371472 [3:52:56<3:40:48, 3.33it/s] 88%|████████▊ | 327403/371472 [3:52:57<3:49:17, 3.20it/s] 88%|████████▊ | 327404/371472 [3:52:57<3:48:27, 3.21it/s] 88%|████████▊ | 327405/371472 [3:52:57<3:52:23, 3.16it/s] 88%|████████▊ | 327406/371472 [3:52:58<3:43:01, 3.29it/s] 88%|████████▊ | 327407/371472 [3:52:58<3:35:29, 3.41it/s] 88%|████████▊ | 327408/371472 [3:52:58<3:33:10, 3.44it/s] 88%|████████▊ | 327409/371472 [3:52:59<3:43:31, 3.29it/s] 88%|████████▊ | 327410/371472 [3:52:59<3:57:01, 3.10it/s] 88%|████████▊ | 327411/371472 [3:52:59<3:37:55, 3.37it/s] 88%|████████▊ | 327412/371472 [3:52:59<3:28:00, 3.53it/s] 88%|████████▊ | 327413/371472 [3:53:00<3:27:15, 3.54it/s] 88%|████████▊ | 327414/371472 [3:53:00<3:33:55, 3.43it/s] 88%|████████▊ | 327415/371472 [3:53:00<3:53:43, 3.14it/s] 88%|████████▊ | 327416/371472 [3:53:01<3:45:37, 3.25it/s] 88%|████████▊ | 327417/371472 [3:53:01<3:38:43, 3.36it/s] 88%|████████▊ | 327418/371472 [3:53:01<3:27:37, 3.54it/s] 88%|████████▊ | 327419/371472 [3:53:02<3:40:45, 3.33it/s] 88%|████████▊ | 327420/371472 [3:53:02<3:36:07, 3.40it/s] {'loss': 2.4549, 'learning_rate': 2.0678639918981233e-07, 'epoch': 14.1} + 88%|████████▊ | 327420/371472 [3:53:02<3:36:07, 3.40it/s] 88%|████████▊ | 327421/371472 [3:53:02<3:29:06, 3.51it/s] 88%|████████▊ | 327422/371472 [3:53:02<3:26:52, 3.55it/s] 88%|████████▊ | 327423/371472 [3:53:03<3:26:09, 3.56it/s] 88%|████████▊ | 327424/371472 [3:53:03<3:20:53, 3.65it/s] 88%|████████▊ | 327425/371472 [3:53:03<3:30:30, 3.49it/s] 88%|████████▊ | 327426/371472 [3:53:04<3:24:10, 3.60it/s] 88%|████████▊ | 327427/371472 [3:53:04<3:26:31, 3.55it/s] 88%|████████▊ | 327428/371472 [3:53:04<3:21:39, 3.64it/s] 88%|████████▊ | 327429/371472 [3:53:04<3:17:50, 3.71it/s] 88%|████████▊ | 327430/371472 [3:53:05<3:22:27, 3.63it/s] 88%|████████▊ | 327431/371472 [3:53:05<3:20:55, 3.65it/s] 88%|████████▊ | 327432/371472 [3:53:05<3:17:07, 3.72it/s] 88%|████████▊ | 327433/371472 [3:53:05<3:18:48, 3.69it/s] 88%|████████▊ | 327434/371472 [3:53:06<3:22:13, 3.63it/s] 88%|████████▊ | 327435/371472 [3:53:06<3:46:45, 3.24it/s] 88%|████████▊ | 327436/371472 [3:53:06<3:43:43, 3.28it/s] 88%|████████▊ | 327437/371472 [3:53:07<3:36:50, 3.38it/s] 88%|████████▊ | 327438/371472 [3:53:07<3:32:34, 3.45it/s] 88%|████████▊ | 327439/371472 [3:53:07<3:34:33, 3.42it/s] 88%|████████▊ | 327440/371472 [3:53:08<3:35:45, 3.40it/s] {'loss': 2.5193, 'learning_rate': 2.067379172143334e-07, 'epoch': 14.1} + 88%|████████▊ | 327440/371472 [3:53:08<3:35:45, 3.40it/s] 88%|████████▊ | 327441/371472 [3:53:08<3:46:10, 3.24it/s] 88%|████████▊ | 327442/371472 [3:53:08<3:35:56, 3.40it/s] 88%|████████▊ | 327443/371472 [3:53:08<3:43:42, 3.28it/s] 88%|████████▊ | 327444/371472 [3:53:09<3:30:58, 3.48it/s] 88%|████████▊ | 327445/371472 [3:53:09<3:24:17, 3.59it/s] 88%|████████▊ | 327446/371472 [3:53:09<3:19:10, 3.68it/s] 88%|████████▊ | 327447/371472 [3:53:10<3:32:37, 3.45it/s] 88%|████████▊ | 327448/371472 [3:53:10<3:21:59, 3.63it/s] 88%|████████▊ | 327449/371472 [3:53:10<3:24:19, 3.59it/s] 88%|████████▊ | 327450/371472 [3:53:10<3:19:38, 3.68it/s] 88%|████████▊ | 327451/371472 [3:53:11<3:30:04, 3.49it/s] 88%|████████▊ | 327452/371472 [3:53:11<3:35:42, 3.40it/s] 88%|████████▊ | 327453/371472 [3:53:11<3:29:35, 3.50it/s] 88%|████████▊ | 327454/371472 [3:53:12<3:28:20, 3.52it/s] 88%|████████▊ | 327455/371472 [3:53:12<3:28:44, 3.51it/s] 88%|████████▊ | 327456/371472 [3:53:12<3:30:14, 3.49it/s] 88%|████████▊ | 327457/371472 [3:53:12<3:20:21, 3.66it/s] 88%|████████▊ | 327458/371472 [3:53:13<3:17:17, 3.72it/s] 88%|████████▊ | 327459/371472 [3:53:13<3:21:58, 3.63it/s] 88%|████████▊ | 327460/371472 [3:53:13<3:23:35, 3.60it/s] {'loss': 2.7701, 'learning_rate': 2.0668943523885455e-07, 'epoch': 14.1} + 88%|████████▊ | 327460/371472 [3:53:13<3:23:35, 3.60it/s] 88%|████████▊ | 327461/371472 [3:53:13<3:19:27, 3.68it/s] 88%|████████▊ | 327462/371472 [3:53:14<3:21:52, 3.63it/s] 88%|████████▊ | 327463/371472 [3:53:14<3:13:36, 3.79it/s] 88%|████████▊ | 327464/371472 [3:53:14<3:23:50, 3.60it/s] 88%|████████▊ | 327465/371472 [3:53:15<3:21:20, 3.64it/s] 88%|████████▊ | 327466/371472 [3:53:15<3:17:36, 3.71it/s] 88%|████████▊ | 327467/371472 [3:53:15<3:19:43, 3.67it/s] 88%|████████▊ | 327468/371472 [3:53:15<3:28:28, 3.52it/s] 88%|████████▊ | 327469/371472 [3:53:16<3:23:04, 3.61it/s] 88%|████████▊ | 327470/371472 [3:53:16<3:22:53, 3.61it/s] 88%|████████▊ | 327471/371472 [3:53:16<3:29:58, 3.49it/s] 88%|████████▊ | 327472/371472 [3:53:17<3:33:59, 3.43it/s] 88%|████████▊ | 327473/371472 [3:53:17<3:43:11, 3.29it/s] 88%|████████▊ | 327474/371472 [3:53:17<3:30:16, 3.49it/s] 88%|████████▊ | 327475/371472 [3:53:17<3:28:06, 3.52it/s] 88%|████████▊ | 327476/371472 [3:53:18<3:32:13, 3.46it/s] 88%|████████▊ | 327477/371472 [3:53:18<3:37:50, 3.37it/s] 88%|████████▊ | 327478/371472 [3:53:18<3:31:40, 3.46it/s] 88%|████████▊ | 327479/371472 [3:53:19<3:23:07, 3.61it/s] 88%|████████▊ | 327480/371472 [3:53:19<3:24:49, 3.58it/s] {'loss': 2.5079, 'learning_rate': 2.066409532633756e-07, 'epoch': 14.11} + 88%|████████▊ | 327480/371472 [3:53:19<3:24:49, 3.58it/s] 88%|████████▊ | 327481/371472 [3:53:19<3:17:30, 3.71it/s] 88%|████████▊ | 327482/371472 [3:53:19<3:12:47, 3.80it/s] 88%|████████▊ | 327483/371472 [3:53:20<3:18:42, 3.69it/s] 88%|████████▊ | 327484/371472 [3:53:20<3:14:20, 3.77it/s] 88%|████████▊ | 327485/371472 [3:53:20<3:19:07, 3.68it/s] 88%|████████▊ | 327486/371472 [3:53:20<3:21:37, 3.64it/s] 88%|████████▊ | 327487/371472 [3:53:21<3:15:29, 3.75it/s] 88%|████████▊ | 327488/371472 [3:53:21<3:31:48, 3.46it/s] 88%|████████▊ | 327489/371472 [3:53:21<3:25:51, 3.56it/s] 88%|████████▊ | 327490/371472 [3:53:22<3:26:24, 3.55it/s] 88%|████████▊ | 327491/371472 [3:53:22<3:18:22, 3.70it/s] 88%|████████▊ | 327492/371472 [3:53:22<3:23:51, 3.60it/s] 88%|████████▊ | 327493/371472 [3:53:22<3:27:08, 3.54it/s] 88%|████████▊ | 327494/371472 [3:53:23<3:30:28, 3.48it/s] 88%|████████▊ | 327495/371472 [3:53:23<3:22:08, 3.63it/s] 88%|████████▊ | 327496/371472 [3:53:23<3:22:01, 3.63it/s] 88%|████████▊ | 327497/371472 [3:53:23<3:30:09, 3.49it/s] 88%|████████▊ | 327498/371472 [3:53:24<3:29:25, 3.50it/s] 88%|████████▊ | 327499/371472 [3:53:24<3:25:53, 3.56it/s] 88%|████████▊ | 327500/371472 [3:53:24<3:30:04, 3.49it/s] {'loss': 2.4918, 'learning_rate': 2.0659247128789677e-07, 'epoch': 14.11} + 88%|████████▊ | 327500/371472 [3:53:24<3:30:04, 3.49it/s] 88%|████████▊ | 327501/371472 [3:53:25<3:37:18, 3.37it/s] 88%|████████▊ | 327502/371472 [3:53:25<3:27:23, 3.53it/s] 88%|████████▊ | 327503/371472 [3:53:25<3:28:56, 3.51it/s] 88%|████████▊ | 327504/371472 [3:53:26<3:37:54, 3.36it/s] 88%|████████▊ | 327505/371472 [3:53:26<3:24:37, 3.58it/s] 88%|████████▊ | 327506/371472 [3:53:26<3:35:27, 3.40it/s] 88%|████████▊ | 327507/371472 [3:53:26<3:48:57, 3.20it/s] 88%|████████▊ | 327508/371472 [3:53:27<3:34:33, 3.42it/s] 88%|████████▊ | 327509/371472 [3:53:27<3:26:43, 3.54it/s] 88%|████████▊ | 327510/371472 [3:53:27<3:27:37, 3.53it/s] 88%|████████▊ | 327511/371472 [3:53:28<3:23:56, 3.59it/s] 88%|████████▊ | 327512/371472 [3:53:28<3:28:17, 3.52it/s] 88%|████████▊ | 327513/371472 [3:53:28<3:27:37, 3.53it/s] 88%|████████▊ | 327514/371472 [3:53:28<3:35:05, 3.41it/s] 88%|████████▊ | 327515/371472 [3:53:29<3:29:06, 3.50it/s] 88%|████████▊ | 327516/371472 [3:53:29<3:28:31, 3.51it/s] 88%|████████▊ | 327517/371472 [3:53:29<3:26:56, 3.54it/s] 88%|████████▊ | 327518/371472 [3:53:29<3:22:24, 3.62it/s] 88%|████████▊ | 327519/371472 [3:53:30<3:25:22, 3.57it/s] 88%|████████▊ | 327520/371472 [3:53:30<3:15:12, 3.75it/s] {'loss': 2.5357, 'learning_rate': 2.0654398931241782e-07, 'epoch': 14.11} + 88%|████████▊ | 327520/371472 [3:53:30<3:15:12, 3.75it/s] 88%|████████▊ | 327521/371472 [3:53:30<3:12:18, 3.81it/s] 88%|████████▊ | 327522/371472 [3:53:31<3:26:49, 3.54it/s] 88%|████████▊ | 327523/371472 [3:53:31<3:22:13, 3.62it/s] 88%|████████▊ | 327524/371472 [3:53:31<3:24:26, 3.58it/s] 88%|████████▊ | 327525/371472 [3:53:31<3:15:09, 3.75it/s] 88%|████████▊ | 327526/371472 [3:53:32<3:12:58, 3.80it/s] 88%|████████▊ | 327527/371472 [3:53:32<3:34:52, 3.41it/s] 88%|████████▊ | 327528/371472 [3:53:32<3:27:21, 3.53it/s] 88%|████████▊ | 327529/371472 [3:53:33<3:21:04, 3.64it/s] 88%|████████▊ | 327530/371472 [3:53:33<3:21:21, 3.64it/s] 88%|████████▊ | 327531/371472 [3:53:33<3:21:11, 3.64it/s] 88%|████████▊ | 327532/371472 [3:53:33<3:22:35, 3.61it/s] 88%|████████▊ | 327533/371472 [3:53:34<3:23:39, 3.60it/s] 88%|████████▊ | 327534/371472 [3:53:34<3:14:34, 3.76it/s] 88%|████████▊ | 327535/371472 [3:53:34<3:16:55, 3.72it/s] 88%|████████▊ | 327536/371472 [3:53:34<3:18:40, 3.69it/s] 88%|████████▊ | 327537/371472 [3:53:35<3:20:30, 3.65it/s] 88%|████████▊ | 327538/371472 [3:53:35<3:16:11, 3.73it/s] 88%|████████▊ | 327539/371472 [3:53:35<3:10:08, 3.85it/s] 88%|████████▊ | 327540/371472 [3:53:35<3:08:13, 3.89it/s] {'loss': 2.6307, 'learning_rate': 2.0649550733693897e-07, 'epoch': 14.11} + 88%|████████▊ | 327540/371472 [3:53:35<3:08:13, 3.89it/s] 88%|████████▊ | 327541/371472 [3:53:36<3:28:09, 3.52it/s] 88%|████████▊ | 327542/371472 [3:53:36<3:25:45, 3.56it/s] 88%|████████▊ | 327543/371472 [3:53:36<3:15:59, 3.74it/s] 88%|████████▊ | 327544/371472 [3:53:37<3:13:39, 3.78it/s] 88%|████████▊ | 327545/371472 [3:53:37<3:14:21, 3.77it/s] 88%|████████▊ | 327546/371472 [3:53:37<3:16:24, 3.73it/s] 88%|████████▊ | 327547/371472 [3:53:37<3:22:06, 3.62it/s] 88%|████████▊ | 327548/371472 [3:53:38<3:17:17, 3.71it/s] 88%|████████▊ | 327549/371472 [3:53:38<3:21:30, 3.63it/s] 88%|████████▊ | 327550/371472 [3:53:38<3:32:39, 3.44it/s] 88%|████████▊ | 327551/371472 [3:53:39<3:36:14, 3.39it/s] 88%|████████▊ | 327552/371472 [3:53:39<3:27:57, 3.52it/s] 88%|████████▊ | 327553/371472 [3:53:39<3:42:21, 3.29it/s] 88%|████████▊ | 327554/371472 [3:53:39<3:32:51, 3.44it/s] 88%|████████▊ | 327555/371472 [3:53:40<3:26:57, 3.54it/s] 88%|████████▊ | 327556/371472 [3:53:40<3:23:45, 3.59it/s] 88%|████████▊ | 327557/371472 [3:53:40<3:28:14, 3.51it/s] 88%|████████▊ | 327558/371472 [3:53:41<3:30:16, 3.48it/s] 88%|████████▊ | 327559/371472 [3:53:41<3:37:40, 3.36it/s] 88%|████████▊ | 327560/371472 [3:53:41<3:28:45, 3.51it/s] {'loss': 2.4225, 'learning_rate': 2.0644702536146004e-07, 'epoch': 14.11} + 88%|████████▊ | 327560/371472 [3:53:41<3:28:45, 3.51it/s] 88%|████████▊ | 327561/371472 [3:53:41<3:36:55, 3.37it/s] 88%|████████▊ | 327562/371472 [3:53:42<3:28:20, 3.51it/s] 88%|████████▊ | 327563/371472 [3:53:42<3:31:01, 3.47it/s] 88%|████████▊ | 327564/371472 [3:53:42<3:28:44, 3.51it/s] 88%|████████▊ | 327565/371472 [3:53:43<3:42:55, 3.28it/s] 88%|████████▊ | 327566/371472 [3:53:43<3:33:35, 3.43it/s] 88%|████████▊ | 327567/371472 [3:53:43<3:30:23, 3.48it/s] 88%|████████▊ | 327568/371472 [3:53:44<3:37:51, 3.36it/s] 88%|████████▊ | 327569/371472 [3:53:44<3:26:02, 3.55it/s] 88%|████████▊ | 327570/371472 [3:53:44<3:20:30, 3.65it/s] 88%|████████▊ | 327571/371472 [3:53:44<3:19:42, 3.66it/s] 88%|████████▊ | 327572/371472 [3:53:45<3:19:28, 3.67it/s] 88%|████████▊ | 327573/371472 [3:53:45<3:13:46, 3.78it/s] 88%|████████▊ | 327574/371472 [3:53:45<3:14:15, 3.77it/s] 88%|████████▊ | 327575/371472 [3:53:45<3:14:23, 3.76it/s] 88%|████████▊ | 327576/371472 [3:53:46<3:08:34, 3.88it/s] 88%|████████▊ | 327577/371472 [3:53:46<3:09:19, 3.86it/s] 88%|████████▊ | 327578/371472 [3:53:46<3:29:46, 3.49it/s] 88%|████████▊ | 327579/371472 [3:53:46<3:19:42, 3.66it/s] 88%|████████▊ | 327580/371472 [3:53:47<3:15:19, 3.75it/s] {'loss': 2.6836, 'learning_rate': 2.063985433859812e-07, 'epoch': 14.11} + 88%|████████▊ | 327580/371472 [3:53:47<3:15:19, 3.75it/s] 88%|████████▊ | 327581/371472 [3:53:47<3:25:16, 3.56it/s] 88%|████████▊ | 327582/371472 [3:53:47<3:16:50, 3.72it/s] 88%|████████▊ | 327583/371472 [3:53:48<3:18:34, 3.68it/s] 88%|████████▊ | 327584/371472 [3:53:48<3:18:57, 3.68it/s] 88%|████████▊ | 327585/371472 [3:53:48<3:17:29, 3.70it/s] 88%|████████▊ | 327586/371472 [3:53:48<3:18:25, 3.69it/s] 88%|████████▊ | 327587/371472 [3:53:49<3:19:39, 3.66it/s] 88%|████████▊ | 327588/371472 [3:53:49<3:21:35, 3.63it/s] 88%|████████▊ | 327589/371472 [3:53:49<3:18:01, 3.69it/s] 88%|████████▊ | 327590/371472 [3:53:49<3:35:24, 3.40it/s] 88%|████████▊ | 327591/371472 [3:53:50<3:33:37, 3.42it/s] 88%|████████▊ | 327592/371472 [3:53:50<3:25:02, 3.57it/s] 88%|████████▊ | 327593/371472 [3:53:50<3:14:45, 3.76it/s] 88%|████████▊ | 327594/371472 [3:53:51<3:08:25, 3.88it/s] 88%|████████▊ | 327595/371472 [3:53:51<3:08:29, 3.88it/s] 88%|████████▊ | 327596/371472 [3:53:51<3:14:40, 3.76it/s] 88%|████████▊ | 327597/371472 [3:53:51<3:12:44, 3.79it/s] 88%|████████▊ | 327598/371472 [3:53:52<3:17:18, 3.71it/s] 88%|████████▊ | 327599/371472 [3:53:52<3:20:20, 3.65it/s] 88%|████████▊ | 327600/371472 [3:53:52<3:17:11, 3.71it/s] {'loss': 2.4589, 'learning_rate': 2.0635006141050224e-07, 'epoch': 14.11} + 88%|████████▊ | 327600/371472 [3:53:52<3:17:11, 3.71it/s] 88%|████████▊ | 327601/371472 [3:53:52<3:18:07, 3.69it/s] 88%|████████▊ | 327602/371472 [3:53:53<3:13:27, 3.78it/s] 88%|████████▊ | 327603/371472 [3:53:53<3:43:58, 3.26it/s] 88%|████████▊ | 327604/371472 [3:53:53<3:44:21, 3.26it/s] 88%|████████▊ | 327605/371472 [3:53:54<3:29:17, 3.49it/s] 88%|████████▊ | 327606/371472 [3:53:54<3:26:23, 3.54it/s] 88%|████████▊ | 327607/371472 [3:53:54<3:26:32, 3.54it/s] 88%|████████▊ | 327608/371472 [3:53:54<3:31:23, 3.46it/s] 88%|████████▊ | 327609/371472 [3:53:55<3:27:38, 3.52it/s] 88%|████████▊ | 327610/371472 [3:53:55<3:23:52, 3.59it/s] 88%|████████▊ | 327611/371472 [3:53:55<3:17:59, 3.69it/s] 88%|████████▊ | 327612/371472 [3:53:56<3:25:25, 3.56it/s] 88%|████████▊ | 327613/371472 [3:53:56<3:22:33, 3.61it/s] 88%|████████▊ | 327614/371472 [3:53:56<3:19:04, 3.67it/s] 88%|████████▊ | 327615/371472 [3:53:56<3:17:07, 3.71it/s] 88%|████████▊ | 327616/371472 [3:53:57<3:11:53, 3.81it/s] 88%|████████▊ | 327617/371472 [3:53:57<3:19:33, 3.66it/s] 88%|████████▊ | 327618/371472 [3:53:57<3:28:19, 3.51it/s] 88%|████████▊ | 327619/371472 [3:53:57<3:25:49, 3.55it/s] 88%|████████▊ | 327620/371472 [3:53:58<3:21:18, 3.63it/s] {'loss': 2.6186, 'learning_rate': 2.0630157943502341e-07, 'epoch': 14.11} + 88%|████████▊ | 327620/371472 [3:53:58<3:21:18, 3.63it/s] 88%|████████▊ | 327621/371472 [3:53:58<3:16:12, 3.72it/s] 88%|████████▊ | 327622/371472 [3:53:58<3:27:05, 3.53it/s] 88%|████████▊ | 327623/371472 [3:53:59<3:24:11, 3.58it/s] 88%|████████▊ | 327624/371472 [3:53:59<3:21:14, 3.63it/s] 88%|████████▊ | 327625/371472 [3:53:59<3:25:26, 3.56it/s] 88%|████████▊ | 327626/371472 [3:53:59<3:31:11, 3.46it/s] 88%|████████▊ | 327627/371472 [3:54:00<3:30:59, 3.46it/s] 88%|████████▊ | 327628/371472 [3:54:00<3:28:39, 3.50it/s] 88%|████████▊ | 327629/371472 [3:54:00<3:46:05, 3.23it/s] 88%|████████▊ | 327630/371472 [3:54:01<3:35:17, 3.39it/s] 88%|████████▊ | 327631/371472 [3:54:01<3:33:11, 3.43it/s] 88%|████████▊ | 327632/371472 [3:54:01<3:28:13, 3.51it/s] 88%|████████▊ | 327633/371472 [3:54:02<3:43:46, 3.27it/s] 88%|████████▊ | 327634/371472 [3:54:02<3:47:17, 3.21it/s] 88%|████████▊ | 327635/371472 [3:54:02<3:45:07, 3.25it/s] 88%|████████▊ | 327636/371472 [3:54:02<3:35:02, 3.40it/s] 88%|████████▊ | 327637/371472 [3:54:03<3:36:01, 3.38it/s] 88%|████████▊ | 327638/371472 [3:54:03<3:31:22, 3.46it/s] 88%|████████▊ | 327639/371472 [3:54:03<3:38:57, 3.34it/s] 88%|████████▊ | 327640/371472 [3:54:04<3:27:53, 3.51it/s] {'loss': 2.5458, 'learning_rate': 2.0625309745954449e-07, 'epoch': 14.11} + 88%|████████▊ | 327640/371472 [3:54:04<3:27:53, 3.51it/s] 88%|████████▊ | 327641/371472 [3:54:04<3:19:06, 3.67it/s] 88%|████████▊ | 327642/371472 [3:54:04<3:21:15, 3.63it/s] 88%|████████▊ | 327643/371472 [3:54:04<3:17:47, 3.69it/s] 88%|████████▊ | 327644/371472 [3:54:05<3:14:35, 3.75it/s] 88%|████████▊ | 327645/371472 [3:54:05<3:16:11, 3.72it/s] 88%|████████▊ | 327646/371472 [3:54:05<3:15:11, 3.74it/s] 88%|████████▊ | 327647/371472 [3:54:05<3:21:48, 3.62it/s] 88%|████████▊ | 327648/371472 [3:54:06<3:25:57, 3.55it/s] 88%|████████▊ | 327649/371472 [3:54:06<3:26:18, 3.54it/s] 88%|████████▊ | 327650/371472 [3:54:06<3:20:55, 3.64it/s] 88%|████████▊ | 327651/371472 [3:54:07<3:29:28, 3.49it/s] 88%|████████▊ | 327652/371472 [3:54:07<3:24:53, 3.56it/s] 88%|████████▊ | 327653/371472 [3:54:07<3:21:07, 3.63it/s] 88%|████████▊ | 327654/371472 [3:54:07<3:20:41, 3.64it/s] 88%|████████▊ | 327655/371472 [3:54:08<3:22:55, 3.60it/s] 88%|████████▊ | 327656/371472 [3:54:08<3:29:15, 3.49it/s] 88%|████████▊ | 327657/371472 [3:54:08<3:35:02, 3.40it/s] 88%|████████▊ | 327658/371472 [3:54:09<3:44:01, 3.26it/s] 88%|████████▊ | 327659/371472 [3:54:09<3:42:01, 3.29it/s] 88%|████████▊ | 327660/371472 [3:54:09<3:43:32, 3.27it/s] {'loss': 2.5726, 'learning_rate': 2.062046154840656e-07, 'epoch': 14.11} + 88%|████████▊ | 327660/371472 [3:54:09<3:43:32, 3.27it/s] 88%|████████▊ | 327661/371472 [3:54:10<3:32:15, 3.44it/s] 88%|████████▊ | 327662/371472 [3:54:10<3:29:47, 3.48it/s] 88%|████████▊ | 327663/371472 [3:54:10<3:30:39, 3.47it/s] 88%|████████▊ | 327664/371472 [3:54:10<3:35:16, 3.39it/s] 88%|████████▊ | 327665/371472 [3:54:11<3:39:36, 3.32it/s] 88%|████████▊ | 327666/371472 [3:54:11<3:51:55, 3.15it/s] 88%|████████▊ | 327667/371472 [3:54:11<3:52:09, 3.14it/s] 88%|████████▊ | 327668/371472 [3:54:12<3:46:32, 3.22it/s] 88%|████████▊ | 327669/371472 [3:54:12<3:33:58, 3.41it/s] 88%|████████▊ | 327670/371472 [3:54:12<3:38:20, 3.34it/s] 88%|████████▊ | 327671/371472 [3:54:13<3:40:44, 3.31it/s] 88%|████████▊ | 327672/371472 [3:54:13<3:46:50, 3.22it/s] 88%|████████▊ | 327673/371472 [3:54:13<3:36:28, 3.37it/s] 88%|████████▊ | 327674/371472 [3:54:13<3:26:49, 3.53it/s] 88%|████████▊ | 327675/371472 [3:54:14<3:33:02, 3.43it/s] 88%|████████▊ | 327676/371472 [3:54:14<3:47:52, 3.20it/s] 88%|████████▊ | 327677/371472 [3:54:14<3:33:56, 3.41it/s] 88%|████████▊ | 327678/371472 [3:54:15<3:28:41, 3.50it/s] 88%|████████▊ | 327679/371472 [3:54:15<3:20:01, 3.65it/s] 88%|████████▊ | 327680/371472 [3:54:15<3:20:14, 3.65it/s] {'loss': 2.449, 'learning_rate': 2.0615613350858668e-07, 'epoch': 14.11} + 88%|████████▊ | 327680/371472 [3:54:15<3:20:14, 3.65it/s] 88%|████████▊ | 327681/371472 [3:54:15<3:18:14, 3.68it/s] 88%|████████▊ | 327682/371472 [3:54:16<3:10:46, 3.83it/s] 88%|████████▊ | 327683/371472 [3:54:16<3:16:23, 3.72it/s] 88%|████████▊ | 327684/371472 [3:54:16<3:17:58, 3.69it/s] 88%|████████▊ | 327685/371472 [3:54:16<3:18:17, 3.68it/s] 88%|████████▊ | 327686/371472 [3:54:17<3:23:35, 3.58it/s] 88%|████████▊ | 327687/371472 [3:54:17<3:18:51, 3.67it/s] 88%|████████▊ | 327688/371472 [3:54:17<3:12:11, 3.80it/s] 88%|████████▊ | 327689/371472 [3:54:18<3:09:42, 3.85it/s] 88%|████████▊ | 327690/371472 [3:54:18<3:10:51, 3.82it/s] 88%|████████▊ | 327691/371472 [3:54:18<3:14:07, 3.76it/s] 88%|████████▊ | 327692/371472 [3:54:18<3:15:08, 3.74it/s] 88%|████████▊ | 327693/371472 [3:54:19<3:13:03, 3.78it/s] 88%|████████▊ | 327694/371472 [3:54:19<3:17:00, 3.70it/s] 88%|████████▊ | 327695/371472 [3:54:19<3:11:30, 3.81it/s] 88%|████████▊ | 327696/371472 [3:54:19<3:16:16, 3.72it/s] 88%|████████▊ | 327697/371472 [3:54:20<3:41:25, 3.29it/s] 88%|████████▊ | 327698/371472 [3:54:20<3:37:18, 3.36it/s] 88%|████████▊ | 327699/371472 [3:54:20<3:33:10, 3.42it/s] 88%|████████▊ | 327700/371472 [3:54:21<3:39:43, 3.32it/s] {'loss': 2.6367, 'learning_rate': 2.0610765153310786e-07, 'epoch': 14.11} + 88%|████████▊ | 327700/371472 [3:54:21<3:39:43, 3.32it/s] 88%|████████▊ | 327701/371472 [3:54:21<3:32:25, 3.43it/s] 88%|████████▊ | 327702/371472 [3:54:21<3:31:21, 3.45it/s] 88%|████████▊ | 327703/371472 [3:54:22<3:32:20, 3.44it/s] 88%|████████▊ | 327704/371472 [3:54:22<3:29:14, 3.49it/s] 88%|████████▊ | 327705/371472 [3:54:22<3:31:30, 3.45it/s] 88%|████████▊ | 327706/371472 [3:54:22<3:39:19, 3.33it/s] 88%|████████▊ | 327707/371472 [3:54:23<3:37:20, 3.36it/s] 88%|████████▊ | 327708/371472 [3:54:23<3:30:33, 3.46it/s] 88%|████████▊ | 327709/371472 [3:54:23<3:29:46, 3.48it/s] 88%|████████▊ | 327710/371472 [3:54:24<3:32:44, 3.43it/s] 88%|████████▊ | 327711/371472 [3:54:24<3:37:33, 3.35it/s] 88%|████████▊ | 327712/371472 [3:54:24<3:32:46, 3.43it/s] 88%|████████▊ | 327713/371472 [3:54:24<3:24:41, 3.56it/s] 88%|████████▊ | 327714/371472 [3:54:25<3:27:28, 3.52it/s] 88%|████████▊ | 327715/371472 [3:54:25<3:19:15, 3.66it/s] 88%|████████▊ | 327716/371472 [3:54:25<3:19:23, 3.66it/s] 88%|████████▊ | 327717/371472 [3:54:25<3:18:53, 3.67it/s] 88%|████████▊ | 327718/371472 [3:54:26<3:19:10, 3.66it/s] 88%|████████▊ | 327719/371472 [3:54:26<3:19:23, 3.66it/s] 88%|████████▊ | 327720/371472 [3:54:26<3:22:14, 3.61it/s] {'loss': 2.5798, 'learning_rate': 2.060591695576289e-07, 'epoch': 14.12} + 88%|████████▊ | 327720/371472 [3:54:26<3:22:14, 3.61it/s] 88%|████████▊ | 327721/371472 [3:54:27<3:15:25, 3.73it/s] 88%|████████▊ | 327722/371472 [3:54:27<3:16:37, 3.71it/s] 88%|████████▊ | 327723/371472 [3:54:27<3:09:41, 3.84it/s] 88%|████████▊ | 327724/371472 [3:54:27<3:08:18, 3.87it/s] 88%|████████▊ | 327725/371472 [3:54:28<3:13:28, 3.77it/s] 88%|████████▊ | 327726/371472 [3:54:28<3:10:16, 3.83it/s] 88%|████████▊ | 327727/371472 [3:54:28<3:13:19, 3.77it/s] 88%|████████▊ | 327728/371472 [3:54:28<3:24:36, 3.56it/s] 88%|████████▊ | 327729/371472 [3:54:29<3:16:07, 3.72it/s] 88%|████████▊ | 327730/371472 [3:54:29<3:22:46, 3.60it/s] 88%|████████▊ | 327731/371472 [3:54:29<3:20:49, 3.63it/s] 88%|████████▊ | 327732/371472 [3:54:30<3:22:47, 3.59it/s] 88%|████████▊ | 327733/371472 [3:54:30<3:16:29, 3.71it/s] 88%|████████▊ | 327734/371472 [3:54:30<3:18:40, 3.67it/s] 88%|████████▊ | 327735/371472 [3:54:30<3:44:54, 3.24it/s] 88%|████████▊ | 327736/371472 [3:54:31<3:41:18, 3.29it/s] 88%|████████▊ | 327737/371472 [3:54:31<3:36:02, 3.37it/s] 88%|████████▊ | 327738/371472 [3:54:31<3:30:53, 3.46it/s] 88%|████████▊ | 327739/371472 [3:54:32<3:32:17, 3.43it/s] 88%|████████▊ | 327740/371472 [3:54:32<3:31:08, 3.45it/s] {'loss': 2.6088, 'learning_rate': 2.0601068758214995e-07, 'epoch': 14.12} + 88%|████████▊ | 327740/371472 [3:54:32<3:31:08, 3.45it/s] 88%|████████▊ | 327741/371472 [3:54:32<3:29:07, 3.49it/s] 88%|████████▊ | 327742/371472 [3:54:32<3:22:41, 3.60it/s] 88%|████████▊ | 327743/371472 [3:54:33<3:20:10, 3.64it/s] 88%|████████▊ | 327744/371472 [3:54:33<3:22:50, 3.59it/s] 88%|████████▊ | 327745/371472 [3:54:33<3:17:56, 3.68it/s] 88%|████████▊ | 327746/371472 [3:54:33<3:11:11, 3.81it/s] 88%|████████��� | 327747/371472 [3:54:34<3:15:43, 3.72it/s] 88%|████████▊ | 327748/371472 [3:54:34<3:14:25, 3.75it/s] 88%|████████▊ | 327749/371472 [3:54:34<3:13:13, 3.77it/s] 88%|████████▊ | 327750/371472 [3:54:35<3:08:57, 3.86it/s] 88%|████████▊ | 327751/371472 [3:54:35<3:14:20, 3.75it/s] 88%|████████▊ | 327752/371472 [3:54:35<3:13:02, 3.77it/s] 88%|████████▊ | 327753/371472 [3:54:35<3:21:18, 3.62it/s] 88%|████████▊ | 327754/371472 [3:54:36<3:28:35, 3.49it/s] 88%|████████▊ | 327755/371472 [3:54:36<3:24:01, 3.57it/s] 88%|████████▊ | 327756/371472 [3:54:36<3:47:12, 3.21it/s] 88%|████████▊ | 327757/371472 [3:54:37<3:47:23, 3.20it/s] 88%|████████▊ | 327758/371472 [3:54:37<3:43:02, 3.27it/s] 88%|████████▊ | 327759/371472 [3:54:37<3:39:08, 3.32it/s] 88%|████████▊ | 327760/371472 [3:54:38<3:34:57, 3.39it/s] {'loss': 2.7447, 'learning_rate': 2.0596220560667113e-07, 'epoch': 14.12} + 88%|████████▊ | 327760/371472 [3:54:38<3:34:57, 3.39it/s] 88%|████████▊ | 327761/371472 [3:54:38<3:38:32, 3.33it/s] 88%|████████▊ | 327762/371472 [3:54:38<3:47:06, 3.21it/s] 88%|████████▊ | 327763/371472 [3:54:38<3:46:35, 3.21it/s] 88%|████████▊ | 327764/371472 [3:54:39<3:41:52, 3.28it/s] 88%|████████▊ | 327765/371472 [3:54:39<3:35:09, 3.39it/s] 88%|████████▊ | 327766/371472 [3:54:39<3:31:24, 3.45it/s] 88%|████████▊ | 327767/371472 [3:54:40<3:29:06, 3.48it/s] 88%|████████▊ | 327768/371472 [3:54:40<3:28:42, 3.49it/s] 88%|████████▊ | 327769/371472 [3:54:40<3:34:13, 3.40it/s] 88%|████████▊ | 327770/371472 [3:54:40<3:33:45, 3.41it/s] 88%|████████▊ | 327771/371472 [3:54:41<3:38:32, 3.33it/s] 88%|████████▊ | 327772/371472 [3:54:41<3:36:15, 3.37it/s] 88%|████████▊ | 327773/371472 [3:54:41<3:36:57, 3.36it/s] 88%|████████▊ | 327774/371472 [3:54:42<3:34:32, 3.39it/s] 88%|████████▊ | 327775/371472 [3:54:42<3:33:55, 3.40it/s] 88%|████████▊ | 327776/371472 [3:54:42<3:32:44, 3.42it/s] 88%|████████▊ | 327777/371472 [3:54:43<3:42:58, 3.27it/s] 88%|████████▊ | 327778/371472 [3:54:43<3:36:28, 3.36it/s] 88%|████████▊ | 327779/371472 [3:54:43<3:32:38, 3.42it/s] 88%|████████▊ | 327780/371472 [3:54:44<3:52:44, 3.13it/s] {'loss': 2.4584, 'learning_rate': 2.0591372363119217e-07, 'epoch': 14.12} + 88%|████████▊ | 327780/371472 [3:54:44<3:52:44, 3.13it/s] 88%|████████▊ | 327781/371472 [3:54:44<3:48:18, 3.19it/s] 88%|████████▊ | 327782/371472 [3:54:44<3:41:33, 3.29it/s] 88%|████████▊ | 327783/371472 [3:54:44<3:37:21, 3.35it/s] 88%|████████▊ | 327784/371472 [3:54:45<3:40:24, 3.30it/s] 88%|████████▊ | 327785/371472 [3:54:45<3:46:27, 3.22it/s] 88%|████████▊ | 327786/371472 [3:54:45<3:30:59, 3.45it/s] 88%|████████▊ | 327787/371472 [3:54:46<3:32:05, 3.43it/s] 88%|████████▊ | 327788/371472 [3:54:46<3:29:03, 3.48it/s] 88%|████████▊ | 327789/371472 [3:54:46<3:34:27, 3.39it/s] 88%|████████▊ | 327790/371472 [3:54:46<3:32:25, 3.43it/s] 88%|████████▊ | 327791/371472 [3:54:47<3:27:49, 3.50it/s] 88%|████████▊ | 327792/371472 [3:54:47<3:21:03, 3.62it/s] 88%|████████▊ | 327793/371472 [3:54:47<3:15:05, 3.73it/s] 88%|████████▊ | 327794/371472 [3:54:47<3:11:52, 3.79it/s] 88%|████████▊ | 327795/371472 [3:54:48<4:20:33, 2.79it/s] 88%|████████▊ | 327796/371472 [3:54:48<4:05:59, 2.96it/s] 88%|████████▊ | 327797/371472 [3:54:49<4:00:18, 3.03it/s] 88%|████████▊ | 327798/371472 [3:54:49<3:41:53, 3.28it/s] 88%|████████▊ | 327799/371472 [3:54:49<3:46:35, 3.21it/s] 88%|████████▊ | 327800/371472 [3:54:50<3:36:10, 3.37it/s] {'loss': 2.6853, 'learning_rate': 2.0586524165571332e-07, 'epoch': 14.12} + 88%|████████▊ | 327800/371472 [3:54:50<3:36:10, 3.37it/s] 88%|████████▊ | 327801/371472 [3:54:50<3:29:15, 3.48it/s] 88%|████████▊ | 327802/371472 [3:54:50<3:30:08, 3.46it/s] 88%|████████▊ | 327803/371472 [3:54:50<3:28:42, 3.49it/s] 88%|████████▊ | 327804/371472 [3:54:51<3:55:43, 3.09it/s] 88%|████████▊ | 327805/371472 [3:54:51<3:42:42, 3.27it/s] 88%|████████▊ | 327806/371472 [3:54:51<3:36:28, 3.36it/s] 88%|████████▊ | 327807/371472 [3:54:52<3:28:05, 3.50it/s] 88%|████████▊ | 327808/371472 [3:54:52<3:34:23, 3.39it/s] 88%|████████▊ | 327809/371472 [3:54:52<3:51:51, 3.14it/s] 88%|████████▊ | 327810/371472 [3:54:53<3:41:03, 3.29it/s] 88%|████████▊ | 327811/371472 [3:54:53<3:32:16, 3.43it/s] 88%|████████▊ | 327812/371472 [3:54:53<3:57:06, 3.07it/s] 88%|████████▊ | 327813/371472 [3:54:54<4:20:06, 2.80it/s] 88%|████████▊ | 327814/371472 [3:54:54<3:57:32, 3.06it/s] 88%|████████▊ | 327815/371472 [3:54:54<3:55:54, 3.08it/s] 88%|████████▊ | 327816/371472 [3:54:54<3:42:06, 3.28it/s] 88%|████████▊ | 327817/371472 [3:54:55<3:36:03, 3.37it/s] 88%|████████▊ | 327818/371472 [3:54:55<3:28:40, 3.49it/s] 88%|████████▊ | 327819/371472 [3:54:55<3:25:57, 3.53it/s] 88%|████████▊ | 327820/371472 [3:54:56<3:31:13, 3.44it/s] {'loss': 2.4444, 'learning_rate': 2.058167596802344e-07, 'epoch': 14.12} + 88%|████████▊ | 327820/371472 [3:54:56<3:31:13, 3.44it/s] 88%|████████▊ | 327821/371472 [3:54:56<3:18:46, 3.66it/s] 88%|████████▊ | 327822/371472 [3:54:56<3:28:50, 3.48it/s] 88%|████████▊ | 327823/371472 [3:54:56<3:26:55, 3.52it/s] 88%|████████▊ | 327824/371472 [3:54:57<3:22:29, 3.59it/s] 88%|████████▊ | 327825/371472 [3:54:57<3:30:49, 3.45it/s] 88%|████████▊ | 327826/371472 [3:54:57<3:26:08, 3.53it/s] 88%|████████▊ | 327827/371472 [3:54:58<3:36:57, 3.35it/s] 88%|████████▊ | 327828/371472 [3:54:58<3:33:51, 3.40it/s] 88%|████████▊ | 327829/371472 [3:54:58<3:32:27, 3.42it/s] 88%|████████▊ | 327830/371472 [3:54:58<3:25:31, 3.54it/s] 88%|████████▊ | 327831/371472 [3:54:59<3:17:22, 3.68it/s] 88%|████████▊ | 327832/371472 [3:54:59<3:24:32, 3.56it/s] 88%|████████▊ | 327833/371472 [3:54:59<3:22:44, 3.59it/s] 88%|████████▊ | 327834/371472 [3:55:00<3:22:00, 3.60it/s] 88%|████████▊ | 327835/371472 [3:55:00<3:30:25, 3.46it/s] 88%|████████▊ | 327836/371472 [3:55:00<3:30:56, 3.45it/s] 88%|████████▊ | 327837/371472 [3:55:00<3:24:51, 3.55it/s] 88%|████████▊ | 327838/371472 [3:55:01<3:38:51, 3.32it/s] 88%|████████▊ | 327839/371472 [3:55:01<3:39:11, 3.32it/s] 88%|████████▊ | 327840/371472 [3:55:01<3:29:52, 3.46it/s] {'loss': 2.4476, 'learning_rate': 2.0576827770475554e-07, 'epoch': 14.12} + 88%|████████▊ | 327840/371472 [3:55:01<3:29:52, 3.46it/s] 88%|████████▊ | 327841/371472 [3:55:02<3:29:30, 3.47it/s] 88%|████████▊ | 327842/371472 [3:55:02<3:24:48, 3.55it/s] 88%|████████▊ | 327843/371472 [3:55:02<3:27:22, 3.51it/s] 88%|████████▊ | 327844/371472 [3:55:02<3:25:43, 3.53it/s] 88%|████████▊ | 327845/371472 [3:55:03<3:26:17, 3.52it/s] 88%|████████▊ | 327846/371472 [3:55:03<3:26:54, 3.51it/s] 88%|████████▊ | 327847/371472 [3:55:03<3:39:02, 3.32it/s] 88%|████████▊ | 327848/371472 [3:55:04<3:32:30, 3.42it/s] 88%|████████▊ | 327849/371472 [3:55:04<3:40:02, 3.30it/s] 88%|████████▊ | 327850/371472 [3:55:04<3:27:01, 3.51it/s] 88%|████████▊ | 327851/371472 [3:55:04<3:21:10, 3.61it/s] 88%|████████▊ | 327852/371472 [3:55:05<3:16:13, 3.70it/s] 88%|████████▊ | 327853/371472 [3:55:05<3:17:27, 3.68it/s] 88%|████████▊ | 327854/371472 [3:55:05<3:18:12, 3.67it/s] 88%|████████▊ | 327855/371472 [3:55:06<3:18:26, 3.66it/s] 88%|████████▊ | 327856/371472 [3:55:06<3:18:16, 3.67it/s] 88%|████████▊ | 327857/371472 [3:55:06<3:34:22, 3.39it/s] 88%|████████▊ | 327858/371472 [3:55:06<3:35:21, 3.38it/s] 88%|████████▊ | 327859/371472 [3:55:07<3:31:58, 3.43it/s] 88%|████████▊ | 327860/371472 [3:55:07<3:23:45, 3.57it/s] {'loss': 2.7241, 'learning_rate': 2.057197957292766e-07, 'epoch': 14.12} + 88%|████████▊ | 327860/371472 [3:55:07<3:23:45, 3.57it/s] 88%|████████▊ | 327861/371472 [3:55:07<3:17:04, 3.69it/s] 88%|████���███▊ | 327862/371472 [3:55:08<3:31:49, 3.43it/s] 88%|████████▊ | 327863/371472 [3:55:08<3:27:42, 3.50it/s] 88%|████████▊ | 327864/371472 [3:55:08<3:22:43, 3.59it/s] 88%|████████▊ | 327865/371472 [3:55:08<3:22:13, 3.59it/s] 88%|████████▊ | 327866/371472 [3:55:09<3:20:00, 3.63it/s] 88%|████████▊ | 327867/371472 [3:55:09<3:30:58, 3.44it/s] 88%|████████▊ | 327868/371472 [3:55:09<3:30:29, 3.45it/s] 88%|████████▊ | 327869/371472 [3:55:10<3:55:11, 3.09it/s] 88%|████████▊ | 327870/371472 [3:55:10<3:44:01, 3.24it/s] 88%|████████▊ | 327871/371472 [3:55:10<3:47:10, 3.20it/s] 88%|████████▊ | 327872/371472 [3:55:11<3:35:52, 3.37it/s] 88%|████████▊ | 327873/371472 [3:55:11<4:00:43, 3.02it/s] 88%|████████▊ | 327874/371472 [3:55:11<3:44:14, 3.24it/s] 88%|████████▊ | 327875/371472 [3:55:11<3:40:05, 3.30it/s] 88%|████████▊ | 327876/371472 [3:55:12<3:33:51, 3.40it/s] 88%|████████▊ | 327877/371472 [3:55:12<3:39:37, 3.31it/s] 88%|████████▊ | 327878/371472 [3:55:12<3:28:13, 3.49it/s] 88%|████████▊ | 327879/371472 [3:55:13<3:18:17, 3.66it/s] 88%|████████▊ | 327880/371472 [3:55:13<3:17:46, 3.67it/s] {'loss': 2.5864, 'learning_rate': 2.0567131375379777e-07, 'epoch': 14.12} + 88%|████████▊ | 327880/371472 [3:55:13<3:17:46, 3.67it/s] 88%|████████▊ | 327881/371472 [3:55:13<3:21:25, 3.61it/s] 88%|████████▊ | 327882/371472 [3:55:13<3:22:29, 3.59it/s] 88%|████████▊ | 327883/371472 [3:55:14<3:15:17, 3.72it/s] 88%|████████▊ | 327884/371472 [3:55:14<3:14:56, 3.73it/s] 88%|████████▊ | 327885/371472 [3:55:14<3:13:48, 3.75it/s] 88%|████████▊ | 327886/371472 [3:55:14<3:14:31, 3.73it/s] 88%|████████▊ | 327887/371472 [3:55:15<3:14:58, 3.73it/s] 88%|████████▊ | 327888/371472 [3:55:15<3:26:24, 3.52it/s] 88%|████████▊ | 327889/371472 [3:55:15<3:26:11, 3.52it/s] 88%|████████▊ | 327890/371472 [3:55:16<3:25:52, 3.53it/s] 88%|████████▊ | 327891/371472 [3:55:16<3:45:49, 3.22it/s] 88%|████████▊ | 327892/371472 [3:55:16<3:46:37, 3.21it/s] 88%|████████▊ | 327893/371472 [3:55:17<3:49:30, 3.16it/s] 88%|████████▊ | 327894/371472 [3:55:17<3:39:50, 3.30it/s] 88%|████████▊ | 327895/371472 [3:55:17<3:29:51, 3.46it/s] 88%|████████▊ | 327896/371472 [3:55:17<3:21:37, 3.60it/s] 88%|████████▊ | 327897/371472 [3:55:18<3:29:36, 3.46it/s] 88%|████████▊ | 327898/371472 [3:55:18<3:23:59, 3.56it/s] 88%|████████▊ | 327899/371472 [3:55:18<3:21:14, 3.61it/s] 88%|████████▊ | 327900/371472 [3:55:19<3:30:40, 3.45it/s] {'loss': 2.5999, 'learning_rate': 2.056228317783188e-07, 'epoch': 14.12} + 88%|████████▊ | 327900/371472 [3:55:19<3:30:40, 3.45it/s] 88%|████████▊ | 327901/371472 [3:55:19<3:20:40, 3.62it/s] 88%|████████▊ | 327902/371472 [3:55:19<3:19:33, 3.64it/s] 88%|████████▊ | 327903/371472 [3:55:19<3:14:51, 3.73it/s] 88%|████████▊ | 327904/371472 [3:55:20<3:13:55, 3.74it/s] 88%|████████▊ | 327905/371472 [3:55:20<3:11:57, 3.78it/s] 88%|████████▊ | 327906/371472 [3:55:20<3:28:46, 3.48it/s] 88%|████████▊ | 327907/371472 [3:55:20<3:20:20, 3.62it/s] 88%|████████▊ | 327908/371472 [3:55:21<3:16:38, 3.69it/s] 88%|████████▊ | 327909/371472 [3:55:21<3:18:46, 3.65it/s] 88%|████████▊ | 327910/371472 [3:55:21<3:16:22, 3.70it/s] 88%|████████▊ | 327911/371472 [3:55:22<3:18:16, 3.66it/s] 88%|████████▊ | 327912/371472 [3:55:22<3:17:40, 3.67it/s] 88%|████████▊ | 327913/371472 [3:55:22<3:23:33, 3.57it/s] 88%|████████▊ | 327914/371472 [3:55:22<3:37:36, 3.34it/s] 88%|████████▊ | 327915/371472 [3:55:23<3:35:04, 3.38it/s] 88%|████████▊ | 327916/371472 [3:55:23<3:29:22, 3.47it/s] 88%|████████▊ | 327917/371472 [3:55:23<3:37:08, 3.34it/s] 88%|████████▊ | 327918/371472 [3:55:24<3:39:22, 3.31it/s] 88%|████████▊ | 327919/371472 [3:55:24<3:29:10, 3.47it/s] 88%|████████▊ | 327920/371472 [3:55:24<3:25:47, 3.53it/s] {'loss': 2.5752, 'learning_rate': 2.0557434980283996e-07, 'epoch': 14.12} + 88%|████████▊ | 327920/371472 [3:55:24<3:25:47, 3.53it/s] 88%|████████▊ | 327921/371472 [3:55:25<3:42:02, 3.27it/s] 88%|████████▊ | 327922/371472 [3:55:25<3:36:14, 3.36it/s] 88%|████████▊ | 327923/371472 [3:55:25<3:34:24, 3.39it/s] 88%|████████▊ | 327924/371472 [3:55:25<3:29:03, 3.47it/s] 88%|████████▊ | 327925/371472 [3:55:26<3:35:09, 3.37it/s] 88%|████████▊ | 327926/371472 [3:55:26<3:33:56, 3.39it/s] 88%|████████▊ | 327927/371472 [3:55:26<3:33:31, 3.40it/s] 88%|████████▊ | 327928/371472 [3:55:27<3:37:59, 3.33it/s] 88%|████████▊ | 327929/371472 [3:55:27<3:34:23, 3.39it/s] 88%|████████▊ | 327930/371472 [3:55:27<3:27:40, 3.49it/s] 88%|████████▊ | 327931/371472 [3:55:27<3:21:44, 3.60it/s] 88%|████████▊ | 327932/371472 [3:55:28<3:18:17, 3.66it/s] 88%|████████▊ | 327933/371472 [3:55:28<3:19:27, 3.64it/s] 88%|████████▊ | 327934/371472 [3:55:28<3:11:39, 3.79it/s] 88%|████████▊ | 327935/371472 [3:55:28<3:12:38, 3.77it/s] 88%|████████▊ | 327936/371472 [3:55:29<3:16:20, 3.70it/s] 88%|████████▊ | 327937/371472 [3:55:29<3:21:27, 3.60it/s] 88%|████████▊ | 327938/371472 [3:55:29<3:33:08, 3.40it/s] 88%|████████▊ | 327939/371472 [3:55:30<3:25:38, 3.53it/s] 88%|████████▊ | 327940/371472 [3:55:30<3:27:48, 3.49it/s] {'loss': 2.6883, 'learning_rate': 2.0552586782736103e-07, 'epoch': 14.12} + 88%|████████▊ | 327940/371472 [3:55:30<3:27:48, 3.49it/s] 88%|████████▊ | 327941/371472 [3:55:30<3:34:24, 3.38it/s] 88%|████████▊ | 327942/371472 [3:55:31<3:51:40, 3.13it/s] 88%|████████▊ | 327943/371472 [3:55:31<4:01:00, 3.01it/s] 88%|████████▊ | 327944/371472 [3:55:31<3:54:22, 3.10it/s] 88%|████████▊ | 327945/371472 [3:55:32<3:54:31, 3.09it/s] 88%|████████▊ | 327946/371472 [3:55:32<3:41:50, 3.27it/s] 88%|████████▊ | 327947/371472 [3:55:32<3:34:37, 3.38it/s] 88%|████████▊ | 327948/371472 [3:55:32<3:36:46, 3.35it/s] 88%|████████▊ | 327949/371472 [3:55:33<3:31:57, 3.42it/s] 88%|████████▊ | 327950/371472 [3:55:33<3:22:50, 3.58it/s] 88%|████████▊ | 327951/371472 [3:55:33<3:29:00, 3.47it/s] 88%|████████▊ | 327952/371472 [3:55:33<3:20:08, 3.62it/s] 88%|████████▊ | 327953/371472 [3:55:34<3:19:49, 3.63it/s] 88%|████████▊ | 327954/371472 [3:55:34<3:19:55, 3.63it/s] 88%|████████▊ | 327955/371472 [3:55:34<3:23:14, 3.57it/s] 88%|████████▊ | 327956/371472 [3:55:35<3:37:06, 3.34it/s] 88%|████████▊ | 327957/371472 [3:55:35<3:47:59, 3.18it/s] 88%|████████▊ | 327958/371472 [3:55:35<3:43:53, 3.24it/s] 88%|████████▊ | 327959/371472 [3:55:36<3:43:27, 3.25it/s] 88%|████████▊ | 327960/371472 [3:55:36<3:34:46, 3.38it/s] {'loss': 2.5918, 'learning_rate': 2.0547738585188219e-07, 'epoch': 14.13} + 88%|████████▊ | 327960/371472 [3:55:36<3:34:46, 3.38it/s] 88%|████████▊ | 327961/371472 [3:55:36<3:42:20, 3.26it/s] 88%|████████▊ | 327962/371472 [3:55:37<3:42:09, 3.26it/s] 88%|████████▊ | 327963/371472 [3:55:37<3:38:26, 3.32it/s] 88%|████████▊ | 327964/371472 [3:55:37<3:36:24, 3.35it/s] 88%|████████▊ | 327965/371472 [3:55:37<3:28:41, 3.47it/s] 88%|████████▊ | 327966/371472 [3:55:38<3:28:35, 3.48it/s] 88%|████████▊ | 327967/371472 [3:55:38<3:25:33, 3.53it/s] 88%|████████▊ | 327968/371472 [3:55:38<3:27:04, 3.50it/s] 88%|████████▊ | 327969/371472 [3:55:38<3:21:36, 3.60it/s] 88%|████████▊ | 327970/371472 [3:55:39<3:19:56, 3.63it/s] 88%|████████▊ | 327971/371472 [3:55:39<3:16:01, 3.70it/s] 88%|████████▊ | 327972/371472 [3:55:39<3:10:54, 3.80it/s] 88%|████████▊ | 327973/371472 [3:55:40<3:10:45, 3.80it/s] 88%|████████▊ | 327974/371472 [3:55:40<3:10:08, 3.81it/s] 88%|████████▊ | 327975/371472 [3:55:40<3:21:58, 3.59it/s] 88%|████████▊ | 327976/371472 [3:55:40<3:20:32, 3.61it/s] 88%|████████▊ | 327977/371472 [3:55:41<3:27:08, 3.50it/s] 88%|████████▊ | 327978/371472 [3:55:41<3:20:43, 3.61it/s] 88%|████████▊ | 327979/371472 [3:55:41<3:16:48, 3.68it/s] 88%|████████▊ | 327980/371472 [3:55:42<3:34:16, 3.38it/s] {'loss': 2.5221, 'learning_rate': 2.0542890387640323e-07, 'epoch': 14.13} + 88%|████████▊ | 327980/371472 [3:55:42<3:34:16, 3.38it/s] 88%|████████▊ | 327981/371472 [3:55:42<3:33:39, 3.39it/s] 88%|████████▊ | 327982/371472 [3:55:42<3:25:09, 3.53it/s] 88%|████████▊ | 327983/371472 [3:55:42<3:23:18, 3.57it/s] 88%|████████▊ | 327984/371472 [3:55:43<3:17:51, 3.66it/s] 88%|████████▊ | 327985/371472 [3:55:43<3:15:35, 3.71it/s] 88%|████████▊ | 327986/371472 [3:55:43<3:25:46, 3.52it/s] 88%|████████▊ | 327987/371472 [3:55:43<3:27:24, 3.49it/s] 88%|████████▊ | 327988/371472 [3:55:44<3:24:52, 3.54it/s] 88%|████████▊ | 327989/371472 [3:55:44<3:38:24, 3.32it/s] 88%|████████▊ | 327990/371472 [3:55:44<3:35:09, 3.37it/s] 88%|████████▊ | 327991/371472 [3:55:45<3:51:26, 3.13it/s] 88%|████████▊ | 327992/371472 [3:55:45<3:42:02, 3.26it/s] 88%|████████▊ | 327993/371472 [3:55:45<3:44:25, 3.23it/s] 88%|████████▊ | 327994/371472 [3:55:46<3:38:20, 3.32it/s] 88%|████████▊ | 327995/371472 [3:55:46<3:35:18, 3.37it/s] 88%|████████▊ | 327996/371472 [3:55:46<3:32:17, 3.41it/s] 88%|████████▊ | 327997/371472 [3:55:47<3:35:16, 3.37it/s] 88%|████████▊ | 327998/371472 [3:55:47<3:29:57, 3.45it/s] 88%|████████▊ | 327999/371472 [3:55:47<3:29:08, 3.46it/s] 88%|████████▊ | 328000/371472 [3:55:47<3:28:56, 3.47it/s] {'loss': 2.6786, 'learning_rate': 2.053804219009244e-07, 'epoch': 14.13} + 88%|████████▊ | 328000/371472 [3:55:47<3:28:56, 3.47it/s] 88%|████████▊ | 328001/371472 [3:55:48<3:28:40, 3.47it/s] 88%|████████▊ | 328002/371472 [3:55:48<3:28:30, 3.47it/s] 88%|████████▊ | 328003/371472 [3:55:48<3:28:12, 3.48it/s] 88%|████████▊ | 328004/371472 [3:55:49<3:29:16, 3.46it/s] 88%|████████▊ | 328005/371472 [3:55:49<3:27:01, 3.50it/s] 88%|████████▊ | 328006/371472 [3:55:49<3:21:45, 3.59it/s] 88%|████████▊ | 328007/371472 [3:55:49<3:22:19, 3.58it/s] 88%|████████▊ | 328008/371472 [3:55:50<3:19:34, 3.63it/s] 88%|████████▊ | 328009/371472 [3:55:50<3:17:28, 3.67it/s] 88%|████████▊ | 328010/371472 [3:55:50<3:16:32, 3.69it/s] 88%|████████▊ | 328011/371472 [3:55:50<3:27:21, 3.49it/s] 88%|████████▊ | 328012/371472 [3:55:51<3:18:33, 3.65it/s] 88%|████████▊ | 328013/371472 [3:55:51<3:17:32, 3.67it/s] 88%|████████▊ | 328014/371472 [3:55:51<3:29:23, 3.46it/s] 88%|████████▊ | 328015/371472 [3:55:52<3:26:09, 3.51it/s] 88%|████████▊ | 328016/371472 [3:55:52<3:24:35, 3.54it/s] 88%|████████▊ | 328017/371472 [3:55:52<3:36:09, 3.35it/s] 88%|████████▊ | 328018/371472 [3:55:52<3:30:24, 3.44it/s] 88%|████████▊ | 328019/371472 [3:55:53<3:34:06, 3.38it/s] 88%|████████▊ | 328020/371472 [3:55:53<3:27:14, 3.49it/s] {'loss': 2.4778, 'learning_rate': 2.0533193992544548e-07, 'epoch': 14.13} + 88%|████████▊ | 328020/371472 [3:55:53<3:27:14, 3.49it/s] 88%|████████▊ | 328021/371472 [3:55:53<3:21:41, 3.59it/s] 88%|████████▊ | 328022/371472 [3:55:54<3:19:58, 3.62it/s] 88%|████████▊ | 328023/371472 [3:55:54<3:21:10, 3.60it/s] 88%|████████▊ | 328024/371472 [3:55:54<3:27:58, 3.48it/s] 88%|████████▊ | 328025/371472 [3:55:54<3:27:55, 3.48it/s] 88%|████████▊ | 328026/371472 [3:55:55<3:26:59, 3.50it/s] 88%|████████▊ | 328027/371472 [3:55:55<3:25:53, 3.52it/s] 88%|████████▊ | 328028/371472 [3:55:55<3:22:07, 3.58it/s] 88%|████████▊ | 328029/371472 [3:55:56<3:22:27, 3.58it/s] 88%|████████▊ | 328030/371472 [3:55:56<3:17:07, 3.67it/s] 88%|████████▊ | 328031/371472 [3:55:56<3:20:06, 3.62it/s] 88%|████████▊ | 328032/371472 [3:55:56<3:12:06, 3.77it/s] 88%|████████▊ | 328033/371472 [3:55:57<3:17:44, 3.66it/s] 88%|████████▊ | 328034/371472 [3:55:57<3:16:56, 3.68it/s] 88%|████████▊ | 328035/371472 [3:55:57<3:24:17, 3.54it/s] 88%|████████▊ | 328036/371472 [3:55:57<3:18:28, 3.65it/s] 88%|████���███▊ | 328037/371472 [3:55:58<3:25:22, 3.52it/s] 88%|████████▊ | 328038/371472 [3:55:58<3:29:31, 3.45it/s] 88%|████████▊ | 328039/371472 [3:55:58<3:26:24, 3.51it/s] 88%|████████▊ | 328040/371472 [3:55:59<3:27:55, 3.48it/s] {'loss': 2.5681, 'learning_rate': 2.052834579499666e-07, 'epoch': 14.13} + 88%|████████▊ | 328040/371472 [3:55:59<3:27:55, 3.48it/s] 88%|████████▊ | 328041/371472 [3:55:59<3:38:33, 3.31it/s] 88%|████████▊ | 328042/371472 [3:55:59<3:45:59, 3.20it/s] 88%|████████▊ | 328043/371472 [3:56:00<3:42:31, 3.25it/s] 88%|████████▊ | 328044/371472 [3:56:00<3:45:24, 3.21it/s] 88%|████████▊ | 328045/371472 [3:56:00<3:43:56, 3.23it/s] 88%|████████▊ | 328046/371472 [3:56:01<3:35:52, 3.35it/s] 88%|████████▊ | 328047/371472 [3:56:01<3:30:35, 3.44it/s] 88%|████████▊ | 328048/371472 [3:56:01<3:23:53, 3.55it/s] 88%|████████▊ | 328049/371472 [3:56:01<3:21:39, 3.59it/s] 88%|████████▊ | 328050/371472 [3:56:02<3:28:20, 3.47it/s] 88%|████████▊ | 328051/371472 [3:56:02<3:30:33, 3.44it/s] 88%|████████▊ | 328052/371472 [3:56:02<3:31:02, 3.43it/s] 88%|████████▊ | 328053/371472 [3:56:03<3:54:15, 3.09it/s] 88%|████████▊ | 328054/371472 [3:56:03<3:49:36, 3.15it/s] 88%|████████▊ | 328055/371472 [3:56:03<4:24:28, 2.74it/s] 88%|████████▊ | 328056/371472 [3:56:04<3:59:31, 3.02it/s] 88%|████████▊ | 328057/371472 [3:56:04<3:50:20, 3.14it/s] 88%|████████▊ | 328058/371472 [3:56:04<3:43:47, 3.23it/s] 88%|████████▊ | 328059/371472 [3:56:05<3:45:56, 3.20it/s] 88%|████████▊ | 328060/371472 [3:56:05<3:35:12, 3.36it/s] {'loss': 2.5637, 'learning_rate': 2.0523497597448765e-07, 'epoch': 14.13} + 88%|████████▊ | 328060/371472 [3:56:05<3:35:12, 3.36it/s] 88%|████████▊ | 328061/371472 [3:56:05<3:34:44, 3.37it/s] 88%|████████▊ | 328062/371472 [3:56:05<3:33:44, 3.38it/s] 88%|████████▊ | 328063/371472 [3:56:06<3:34:44, 3.37it/s] 88%|████████▊ | 328064/371472 [3:56:06<3:27:35, 3.49it/s] 88%|████████▊ | 328065/371472 [3:56:06<3:28:20, 3.47it/s] 88%|████████▊ | 328066/371472 [3:56:07<3:27:09, 3.49it/s] 88%|████████▊ | 328067/371472 [3:56:07<3:25:41, 3.52it/s] 88%|████████▊ | 328068/371472 [3:56:07<3:23:23, 3.56it/s] 88%|████████▊ | 328069/371472 [3:56:07<3:27:51, 3.48it/s] 88%|████████▊ | 328070/371472 [3:56:08<3:33:25, 3.39it/s] 88%|████████▊ | 328071/371472 [3:56:08<3:33:01, 3.40it/s] 88%|████████▊ | 328072/371472 [3:56:08<3:38:09, 3.32it/s] 88%|████████▊ | 328073/371472 [3:56:09<3:33:47, 3.38it/s] 88%|████████▊ | 328074/371472 [3:56:09<3:32:46, 3.40it/s] 88%|████████▊ | 328075/371472 [3:56:09<3:26:54, 3.50it/s] 88%|████████▊ | 328076/371472 [3:56:09<3:23:12, 3.56it/s] 88%|████████▊ | 328077/371472 [3:56:10<3:31:10, 3.42it/s] 88%|████████▊ | 328078/371472 [3:56:10<3:53:09, 3.10it/s] 88%|████████▊ | 328079/371472 [3:56:10<3:54:19, 3.09it/s] 88%|████████▊ | 328080/371472 [3:56:11<3:48:28, 3.17it/s] {'loss': 2.5302, 'learning_rate': 2.0518649399900885e-07, 'epoch': 14.13} + 88%|████████▊ | 328080/371472 [3:56:11<3:48:28, 3.17it/s] 88%|████████▊ | 328081/371472 [3:56:11<3:38:53, 3.30it/s] 88%|████████▊ | 328082/371472 [3:56:11<3:33:23, 3.39it/s] 88%|████████▊ | 328083/371472 [3:56:12<3:33:29, 3.39it/s] 88%|████████▊ | 328084/371472 [3:56:12<3:36:18, 3.34it/s] 88%|████████▊ | 328085/371472 [3:56:12<3:37:20, 3.33it/s] 88%|████████▊ | 328086/371472 [3:56:12<3:30:57, 3.43it/s] 88%|████████▊ | 328087/371472 [3:56:13<3:31:07, 3.42it/s] 88%|████████▊ | 328088/371472 [3:56:13<3:28:53, 3.46it/s] 88%|████████▊ | 328089/371472 [3:56:13<3:27:24, 3.49it/s] 88%|████████▊ | 328090/371472 [3:56:14<3:32:42, 3.40it/s] 88%|████████▊ | 328091/371472 [3:56:14<3:29:34, 3.45it/s] 88%|████████▊ | 328092/371472 [3:56:14<3:39:50, 3.29it/s] 88%|████████▊ | 328093/371472 [3:56:15<3:36:19, 3.34it/s] 88%|████████▊ | 328094/371472 [3:56:15<3:23:32, 3.55it/s] 88%|████████▊ | 328095/371472 [3:56:15<3:22:39, 3.57it/s] 88%|████████▊ | 328096/371472 [3:56:15<3:20:55, 3.60it/s] 88%|████████▊ | 328097/371472 [3:56:16<3:21:53, 3.58it/s] 88%|████████▊ | 328098/371472 [3:56:16<3:22:11, 3.58it/s] 88%|████████▊ | 328099/371472 [3:56:16<3:22:08, 3.58it/s] 88%|████████▊ | 328100/371472 [3:56:16<3:19:59, 3.61it/s] {'loss': 2.7689, 'learning_rate': 2.051380120235299e-07, 'epoch': 14.13} + 88%|████████▊ | 328100/371472 [3:56:16<3:19:59, 3.61it/s] 88%|████████▊ | 328101/371472 [3:56:17<3:18:58, 3.63it/s] 88%|████████▊ | 328102/371472 [3:56:17<3:29:58, 3.44it/s] 88%|████████▊ | 328103/371472 [3:56:17<3:29:11, 3.46it/s] 88%|████████▊ | 328104/371472 [3:56:18<3:22:35, 3.57it/s] 88%|████████▊ | 328105/371472 [3:56:18<3:21:12, 3.59it/s] 88%|████████▊ | 328106/371472 [3:56:18<3:19:42, 3.62it/s] 88%|████████▊ | 328107/371472 [3:56:18<3:17:48, 3.65it/s] 88%|████████▊ | 328108/371472 [3:56:19<3:23:23, 3.55it/s] 88%|████████▊ | 328109/371472 [3:56:19<3:52:37, 3.11it/s] 88%|████████▊ | 328110/371472 [3:56:19<3:47:02, 3.18it/s] 88%|████████▊ | 328111/371472 [3:56:20<3:40:39, 3.28it/s] 88%|████████▊ | 328112/371472 [3:56:20<3:32:32, 3.40it/s] 88%|████████▊ | 328113/371472 [3:56:20<3:22:03, 3.58it/s] 88%|████████▊ | 328114/371472 [3:56:20<3:18:55, 3.63it/s] 88%|████████▊ | 328115/371472 [3:56:21<3:26:31, 3.50it/s] 88%|████████▊ | 328116/371472 [3:56:21<3:22:47, 3.56it/s] 88%|████████▊ | 328117/371472 [3:56:21<3:30:51, 3.43it/s] 88%|████████▊ | 328118/371472 [3:56:22<3:24:40, 3.53it/s] 88%|████████▊ | 328119/371472 [3:56:22<3:17:49, 3.65it/s] 88%|████████▊ | 328120/371472 [3:56:22<3:23:23, 3.55it/s] {'loss': 2.5556, 'learning_rate': 2.0508953004805105e-07, 'epoch': 14.13} + 88%|████████▊ | 328120/371472 [3:56:22<3:23:23, 3.55it/s] 88%|████████▊ | 328121/371472 [3:56:23<3:38:06, 3.31it/s] 88%|████████▊ | 328122/371472 [3:56:23<3:28:49, 3.46it/s] 88%|████████▊ | 328123/371472 [3:56:23<3:24:52, 3.53it/s] 88%|████████▊ | 328124/371472 [3:56:23<3:18:51, 3.63it/s] 88%|████████▊ | 328125/371472 [3:56:24<3:13:57, 3.72it/s] 88%|████████▊ | 328126/371472 [3:56:24<3:10:41, 3.79it/s] 88%|████████▊ | 328127/371472 [3:56:24<3:09:36, 3.81it/s] 88%|████████▊ | 328128/371472 [3:56:24<3:23:57, 3.54it/s] 88%|████████▊ | 328129/371472 [3:56:25<3:20:15, 3.61it/s] 88%|████████▊ | 328130/371472 [3:56:25<3:11:37, 3.77it/s] 88%|████████▊ | 328131/371472 [3:56:25<3:20:27, 3.60it/s] 88%|████████▊ | 328132/371472 [3:56:26<3:23:39, 3.55it/s] 88%|████████▊ | 328133/371472 [3:56:26<3:25:05, 3.52it/s] 88%|████████▊ | 328134/371472 [3:56:26<3:34:10, 3.37it/s] 88%|████████▊ | 328135/371472 [3:56:27<3:49:41, 3.14it/s] 88%|████████▊ | 328136/371472 [3:56:27<3:50:18, 3.14it/s] 88%|████████▊ | 328137/371472 [3:56:27<3:56:44, 3.05it/s] 88%|████████▊ | 328138/371472 [3:56:27<3:54:08, 3.08it/s] 88%|████████▊ | 328139/371472 [3:56:28<3:37:10, 3.33it/s] 88%|████████▊ | 328140/371472 [3:56:28<3:33:45, 3.38it/s] {'loss': 2.6551, 'learning_rate': 2.0504104807257212e-07, 'epoch': 14.13} + 88%|████████▊ | 328140/371472 [3:56:28<3:33:45, 3.38it/s] 88%|████████▊ | 328141/371472 [3:56:28<3:25:28, 3.51it/s] 88%|████████▊ | 328142/371472 [3:56:29<3:23:00, 3.56it/s] 88%|████████▊ | 328143/371472 [3:56:29<3:21:00, 3.59it/s] 88%|████████▊ | 328144/371472 [3:56:29<3:20:01, 3.61it/s] 88%|████████▊ | 328145/371472 [3:56:29<3:17:07, 3.66it/s] 88%|████████▊ | 328146/371472 [3:56:30<3:28:31, 3.46it/s] 88%|████████▊ | 328147/371472 [3:56:30<3:26:44, 3.49it/s] 88%|████████▊ | 328148/371472 [3:56:30<3:34:07, 3.37it/s] 88%|████████▊ | 328149/371472 [3:56:31<3:27:38, 3.48it/s] 88%|████████▊ | 328150/371472 [3:56:31<3:25:32, 3.51it/s] 88%|████████▊ | 328151/371472 [3:56:31<3:37:17, 3.32it/s] 88%|���███████▊ | 328152/371472 [3:56:32<3:50:13, 3.14it/s] 88%|████████▊ | 328153/371472 [3:56:32<3:42:58, 3.24it/s] 88%|████████▊ | 328154/371472 [3:56:32<3:38:22, 3.31it/s] 88%|████████▊ | 328155/371472 [3:56:32<3:48:12, 3.16it/s] 88%|████████▊ | 328156/371472 [3:56:33<3:36:51, 3.33it/s] 88%|████████▊ | 328157/371472 [3:56:33<3:39:33, 3.29it/s] 88%|████████▊ | 328158/371472 [3:56:33<3:36:41, 3.33it/s] 88%|████████▊ | 328159/371472 [3:56:34<3:26:45, 3.49it/s] 88%|████████▊ | 328160/371472 [3:56:34<3:19:44, 3.61it/s] {'loss': 2.5965, 'learning_rate': 2.0499256609709327e-07, 'epoch': 14.13} + 88%|████████▊ | 328160/371472 [3:56:34<3:19:44, 3.61it/s] 88%|████████▊ | 328161/371472 [3:56:34<3:24:24, 3.53it/s] 88%|████████▊ | 328162/371472 [3:56:34<3:28:52, 3.46it/s] 88%|████████▊ | 328163/371472 [3:56:35<3:32:06, 3.40it/s] 88%|████████▊ | 328164/371472 [3:56:35<3:28:45, 3.46it/s] 88%|████████▊ | 328165/371472 [3:56:35<3:25:09, 3.52it/s] 88%|████████▊ | 328166/371472 [3:56:36<3:28:51, 3.46it/s] 88%|████████▊ | 328167/371472 [3:56:36<3:22:43, 3.56it/s] 88%|████████▊ | 328168/371472 [3:56:36<3:28:37, 3.46it/s] 88%|████████▊ | 328169/371472 [3:56:36<3:23:30, 3.55it/s] 88%|████████▊ | 328170/371472 [3:56:37<3:48:25, 3.16it/s] 88%|████████▊ | 328171/371472 [3:56:37<3:42:39, 3.24it/s] 88%|████████▊ | 328172/371472 [3:56:37<3:44:47, 3.21it/s] 88%|████████▊ | 328173/371472 [3:56:38<3:50:16, 3.13it/s] 88%|████████▊ | 328174/371472 [3:56:38<3:36:09, 3.34it/s] 88%|████████▊ | 328175/371472 [3:56:38<3:24:57, 3.52it/s] 88%|████████▊ | 328176/371472 [3:56:39<3:29:55, 3.44it/s] 88%|████████▊ | 328177/371472 [3:56:39<3:20:28, 3.60it/s] 88%|████████▊ | 328178/371472 [3:56:39<3:15:58, 3.68it/s] 88%|████████▊ | 328179/371472 [3:56:39<3:24:15, 3.53it/s] 88%|████████▊ | 328180/371472 [3:56:40<3:23:11, 3.55it/s] {'loss': 2.692, 'learning_rate': 2.0494408412161432e-07, 'epoch': 14.14} + 88%|████████▊ | 328180/371472 [3:56:40<3:23:11, 3.55it/s] 88%|████████▊ | 328181/371472 [3:56:40<3:23:08, 3.55it/s] 88%|████████▊ | 328182/371472 [3:56:40<3:20:13, 3.60it/s] 88%|████████▊ | 328183/371472 [3:56:40<3:15:53, 3.68it/s] 88%|████████▊ | 328184/371472 [3:56:41<3:15:24, 3.69it/s] 88%|████████▊ | 328185/371472 [3:56:41<3:13:25, 3.73it/s] 88%|████████▊ | 328186/371472 [3:56:41<3:12:09, 3.75it/s] 88%|████████▊ | 328187/371472 [3:56:42<3:18:15, 3.64it/s] 88%|████████▊ | 328188/371472 [3:56:42<3:20:17, 3.60it/s] 88%|████████▊ | 328189/371472 [3:56:42<3:23:56, 3.54it/s] 88%|████████▊ | 328190/371472 [3:56:42<3:27:02, 3.48it/s] 88%|████████▊ | 328191/371472 [3:56:43<3:19:55, 3.61it/s] 88%|████████▊ | 328192/371472 [3:56:43<3:18:16, 3.64it/s] 88%|████████▊ | 328193/371472 [3:56:43<3:15:44, 3.69it/s] 88%|████████▊ | 328194/371472 [3:56:44<3:26:10, 3.50it/s] 88%|████████▊ | 328195/371472 [3:56:44<3:38:43, 3.30it/s] 88%|████████▊ | 328196/371472 [3:56:44<3:34:58, 3.35it/s] 88%|████████▊ | 328197/371472 [3:56:44<3:24:45, 3.52it/s] 88%|████████▊ | 328198/371472 [3:56:45<3:24:08, 3.53it/s] 88%|████████▊ | 328199/371472 [3:56:45<3:19:09, 3.62it/s] 88%|████████▊ | 328200/371472 [3:56:45<3:17:12, 3.66it/s] {'loss': 2.5998, 'learning_rate': 2.048956021461355e-07, 'epoch': 14.14} + 88%|████████▊ | 328200/371472 [3:56:45<3:17:12, 3.66it/s] 88%|████████▊ | 328201/371472 [3:56:46<3:22:30, 3.56it/s] 88%|████████▊ | 328202/371472 [3:56:46<3:22:11, 3.57it/s] 88%|████████▊ | 328203/371472 [3:56:46<3:16:15, 3.67it/s] 88%|████████▊ | 328204/371472 [3:56:46<3:14:28, 3.71it/s] 88%|████████▊ | 328205/371472 [3:56:47<3:14:37, 3.71it/s] 88%|████████▊ | 328206/371472 [3:56:47<3:08:12, 3.83it/s] 88%|████████▊ | 328207/371472 [3:56:47<3:11:06, 3.77it/s] 88%|████████▊ | 328208/371472 [3:56:47<3:07:59, 3.84it/s] 88%|████████▊ | 328209/371472 [3:56:48<3:16:10, 3.68it/s] 88%|████████▊ | 328210/371472 [3:56:48<3:11:52, 3.76it/s] 88%|████████▊ | 328211/371472 [3:56:48<3:12:56, 3.74it/s] 88%|████████▊ | 328212/371472 [3:56:48<3:12:00, 3.75it/s] 88%|████████▊ | 328213/371472 [3:56:49<3:08:45, 3.82it/s] 88%|████████▊ | 328214/371472 [3:56:49<3:12:58, 3.74it/s] 88%|████████▊ | 328215/371472 [3:56:49<3:30:02, 3.43it/s] 88%|████████▊ | 328216/371472 [3:56:50<3:31:59, 3.40it/s] 88%|████████▊ | 328217/371472 [3:56:50<3:30:11, 3.43it/s] 88%|████████▊ | 328218/371472 [3:56:50<3:39:09, 3.29it/s] 88%|████████▊ | 328219/371472 [3:56:51<4:01:12, 2.99it/s] 88%|████████▊ | 328220/371472 [3:56:51<3:50:37, 3.13it/s] {'loss': 2.5066, 'learning_rate': 2.0484712017065654e-07, 'epoch': 14.14} + 88%|████████▊ | 328220/371472 [3:56:51<3:50:37, 3.13it/s] 88%|████████▊ | 328221/371472 [3:56:51<3:40:15, 3.27it/s] 88%|████████▊ | 328222/371472 [3:56:52<3:41:41, 3.25it/s] 88%|████████▊ | 328223/371472 [3:56:52<3:30:25, 3.43it/s] 88%|████████▊ | 328224/371472 [3:56:52<3:41:59, 3.25it/s] 88%|████████▊ | 328225/371472 [3:56:52<3:45:11, 3.20it/s] 88%|████████▊ | 328226/371472 [3:56:53<3:42:46, 3.24it/s] 88%|████████▊ | 328227/371472 [3:56:53<3:33:21, 3.38it/s] 88%|████████▊ | 328228/371472 [3:56:53<3:32:57, 3.38it/s] 88%|████████▊ | 328229/371472 [3:56:54<3:27:56, 3.47it/s] 88%|████████▊ | 328230/371472 [3:56:54<3:23:35, 3.54it/s] 88%|████████▊ | 328231/371472 [3:56:54<3:26:11, 3.50it/s] 88%|████████▊ | 328232/371472 [3:56:54<3:19:57, 3.60it/s] 88%|████████▊ | 328233/371472 [3:56:55<3:37:24, 3.31it/s] 88%|████████▊ | 328234/371472 [3:56:55<3:49:25, 3.14it/s] 88%|████████▊ | 328235/371472 [3:56:55<3:35:01, 3.35it/s] 88%|████████▊ | 328236/371472 [3:56:56<3:39:07, 3.29it/s] 88%|████████▊ | 328237/371472 [3:56:56<3:28:25, 3.46it/s] 88%|████████▊ | 328238/371472 [3:56:56<3:27:10, 3.48it/s] 88%|████████▊ | 328239/371472 [3:56:56<3:22:14, 3.56it/s] 88%|████████▊ | 328240/371472 [3:56:57<3:12:53, 3.74it/s] {'loss': 2.5264, 'learning_rate': 2.0479863819517772e-07, 'epoch': 14.14} + 88%|████████▊ | 328240/371472 [3:56:57<3:12:53, 3.74it/s] 88%|████████▊ | 328241/371472 [3:56:57<3:26:52, 3.48it/s] 88%|████████▊ | 328242/371472 [3:56:57<3:26:21, 3.49it/s] 88%|████████▊ | 328243/371472 [3:56:58<3:21:26, 3.58it/s] 88%|████████▊ | 328244/371472 [3:56:58<3:35:13, 3.35it/s] 88%|████████▊ | 328245/371472 [3:56:58<3:29:35, 3.44it/s] 88%|████████▊ | 328246/371472 [3:56:58<3:23:49, 3.53it/s] 88%|████████▊ | 328247/371472 [3:56:59<3:21:35, 3.57it/s] 88%|████████▊ | 328248/371472 [3:56:59<3:41:38, 3.25it/s] 88%|████████▊ | 328249/371472 [3:56:59<3:30:59, 3.41it/s] 88%|████████▊ | 328250/371472 [3:57:00<3:44:22, 3.21it/s] 88%|████████▊ | 328251/371472 [3:57:00<3:33:55, 3.37it/s] 88%|████████▊ | 328252/371472 [3:57:00<3:29:17, 3.44it/s] 88%|████████▊ | 328253/371472 [3:57:01<3:25:01, 3.51it/s] 88%|████████▊ | 328254/371472 [3:57:01<3:20:02, 3.60it/s] 88%|████████▊ | 328255/371472 [3:57:01<3:28:07, 3.46it/s] 88%|████████▊ | 328256/371472 [3:57:01<3:21:47, 3.57it/s] 88%|████████▊ | 328257/371472 [3:57:02<3:18:55, 3.62it/s] 88%|████████▊ | 328258/371472 [3:57:02<3:22:44, 3.55it/s] 88%|████████▊ | 328259/371472 [3:57:02<3:28:22, 3.46it/s] 88%|████████▊ | 328260/371472 [3:57:03<3:22:28, 3.56it/s] {'loss': 2.5356, 'learning_rate': 2.0475015621969876e-07, 'epoch': 14.14} + 88%|████████▊ | 328260/371472 [3:57:03<3:22:28, 3.56it/s] 88%|████████▊ | 328261/371472 [3:57:03<3:19:26, 3.61it/s] 88%|████████▊ | 328262/371472 [3:57:03<3:16:55, 3.66it/s] 88%|████████▊ | 328263/371472 [3:57:03<3:30:41, 3.42it/s] 88%|████████▊ | 328264/371472 [3:57:04<3:31:44, 3.40it/s] 88%|████████▊ | 328265/371472 [3:57:04<3:25:02, 3.51it/s] 88%|████████▊ | 328266/371472 [3:57:04<3:23:21, 3.54it/s] 88%|████████▊ | 328267/371472 [3:57:05<3:21:43, 3.57it/s] 88%|████████▊ | 328268/371472 [3:57:05<3:34:27, 3.36it/s] 88%|████████▊ | 328269/371472 [3:57:05<3:28:02, 3.46it/s] 88%|████████▊ | 328270/371472 [3:57:05<3:23:52, 3.53it/s] 88%|████████▊ | 328271/371472 [3:57:06<3:33:21, 3.37it/s] 88%|████████▊ | 328272/371472 [3:57:06<3:35:55, 3.33it/s] 88%|████████▊ | 328273/371472 [3:57:06<3:32:56, 3.38it/s] 88%|████████▊ | 328274/371472 [3:57:07<3:24:48, 3.52it/s] 88%|████████▊ | 328275/371472 [3:57:07<3:25:37, 3.50it/s] 88%|████████▊ | 328276/371472 [3:57:07<3:16:41, 3.66it/s] 88%|████████▊ | 328277/371472 [3:57:07<3:15:55, 3.67it/s] 88%|████████▊ | 328278/371472 [3:57:08<3:20:04, 3.60it/s] 88%|████████▊ | 328279/371472 [3:57:08<3:30:04, 3.43it/s] 88%|████████▊ | 328280/371472 [3:57:08<3:28:54, 3.45it/s] {'loss': 2.7004, 'learning_rate': 2.0470167424421983e-07, 'epoch': 14.14} + 88%|████████▊ | 328280/371472 [3:57:08<3:28:54, 3.45it/s] 88%|████████▊ | 328281/371472 [3:57:09<3:31:45, 3.40it/s] 88%|████████▊ | 328282/371472 [3:57:09<3:23:02, 3.55it/s] 88%|████████▊ | 328283/371472 [3:57:09<3:20:23, 3.59it/s] 88%|████████▊ | 328284/371472 [3:57:09<3:32:41, 3.38it/s] 88%|████████▊ | 328285/371472 [3:57:10<3:41:12, 3.25it/s] 88%|████████▊ | 328286/371472 [3:57:10<3:32:54, 3.38it/s] 88%|████████▊ | 328287/371472 [3:57:10<3:34:12, 3.36it/s] 88%|████████▊ | 328288/371472 [3:57:11<3:22:12, 3.56it/s] 88%|████████▊ | 328289/371472 [3:57:11<3:17:08, 3.65it/s] 88%|████████▊ | 328290/371472 [3:57:11<3:33:12, 3.38it/s] 88%|████████▊ | 328291/371472 [3:57:11<3:29:49, 3.43it/s] 88%|████████▊ | 328292/371472 [3:57:12<3:26:24, 3.49it/s] 88%|████████▊ | 328293/371472 [3:57:12<3:47:16, 3.17it/s] 88%|████████▊ | 328294/371472 [3:57:12<3:41:20, 3.25it/s] 88%|████████▊ | 328295/371472 [3:57:13<3:29:15, 3.44it/s] 88%|████████▊ | 328296/371472 [3:57:13<3:29:18, 3.44it/s] 88%|████████▊ | 328297/371472 [3:57:13<3:29:06, 3.44it/s] 88%|████████▊ | 328298/371472 [3:57:13<3:19:48, 3.60it/s] 88%|████████▊ | 328299/371472 [3:57:14<3:13:14, 3.72it/s] 88%|████████▊ | 328300/371472 [3:57:14<3:20:19, 3.59it/s] {'loss': 2.6231, 'learning_rate': 2.0465319226874096e-07, 'epoch': 14.14} + 88%|████████▊ | 328300/371472 [3:57:14<3:20:19, 3.59it/s] 88%|████████▊ | 328301/371472 [3:57:14<3:31:24, 3.40it/s] 88%|████████▊ | 328302/371472 [3:57:15<3:35:43, 3.34it/s] 88%|████████▊ | 328303/371472 [3:57:15<3:31:44, 3.40it/s] 88%|████████▊ | 328304/371472 [3:57:15<3:23:40, 3.53it/s] 88%|████████▊ | 328305/371472 [3:57:16<3:24:02, 3.53it/s] 88%|████████▊ | 328306/371472 [3:57:16<3:29:00, 3.44it/s] 88%|████████▊ | 328307/371472 [3:57:16<3:25:15, 3.50it/s] 88%|████████▊ | 328308/371472 [3:57:16<3:21:34, 3.57it/s] 88%|████████▊ | 328309/371472 [3:57:17<3:20:52, 3.58it/s] 88%|████████▊ | 328310/371472 [3:57:17<3:24:18, 3.52it/s] 88%|████████▊ | 328311/371472 [3:57:17<3:24:30, 3.52it/s] 88%|████████▊ | 328312/371472 [3:57:18<3:32:36, 3.38it/s] 88%|████████▊ | 328313/371472 [3:57:18<3:24:07, 3.52it/s] 88%|████████▊ | 328314/371472 [3:57:18<3:19:41, 3.60it/s] 88%|████████▊ | 328315/371472 [3:57:18<3:14:25, 3.70it/s] 88%|████████▊ | 328316/371472 [3:57:19<3:33:58, 3.36it/s] 88%|████████▊ | 328317/371472 [3:57:19<3:36:57, 3.32it/s] 88%|████████▊ | 328318/371472 [3:57:19<3:45:17, 3.19it/s] 88%|████████▊ | 328319/371472 [3:57:20<3:42:30, 3.23it/s] 88%|████████▊ | 328320/371472 [3:57:20<3:30:51, 3.41it/s] {'loss': 2.6344, 'learning_rate': 2.0460471029326203e-07, 'epoch': 14.14} + 88%|████████▊ | 328320/371472 [3:57:20<3:30:51, 3.41it/s] 88%|████████▊ | 328321/371472 [3:57:20<3:31:03, 3.41it/s] 88%|████████▊ | 328322/371472 [3:57:20<3:20:34, 3.59it/s] 88%|████████▊ | 328323/371472 [3:57:21<3:14:18, 3.70it/s] 88%|████████▊ | 328324/371472 [3:57:21<3:20:47, 3.58it/s] 88%|████████▊ | 328325/371472 [3:57:21<3:21:24, 3.57it/s] 88%|████████▊ | 328326/371472 [3:57:22<3:33:10, 3.37it/s] 88%|████████▊ | 328327/371472 [3:57:22<3:52:22, 3.09it/s] 88%|████████▊ | 328328/371472 [3:57:22<3:36:11, 3.33it/s] 88%|████████▊ | 328329/371472 [3:57:22<3:33:46, 3.36it/s] 88%|████████▊ | 328330/371472 [3:57:23<3:27:20, 3.47it/s] 88%|████████▊ | 328331/371472 [3:57:23<3:34:51, 3.35it/s] 88%|████████▊ | 328332/371472 [3:57:23<3:25:54, 3.49it/s] 88%|████████▊ | 328333/371472 [3:57:24<3:33:59, 3.36it/s] 88%|████████▊ | 328334/371472 [3:57:24<3:20:47, 3.58it/s] 88%|████████▊ | 328335/371472 [3:57:24<3:17:54, 3.63it/s] 88%|████████▊ | 328336/371472 [3:57:24<3:18:12, 3.63it/s] 88%|████████▊ | 328337/371472 [3:57:25<3:14:04, 3.70it/s] 88%|████████▊ | 328338/371472 [3:57:25<3:20:26, 3.59it/s] 88%|████████▊ | 328339/371472 [3:57:25<3:19:33, 3.60it/s] 88%|████████▊ | 328340/371472 [3:57:26<3:18:50, 3.62it/s] {'loss': 2.6045, 'learning_rate': 2.045562283177832e-07, 'epoch': 14.14} + 88%|████████▊ | 328340/371472 [3:57:26<3:18:50, 3.62it/s] 88%|████████▊ | 328341/371472 [3:57:26<3:24:36, 3.51it/s] 88%|████████▊ | 328342/371472 [3:57:26<3:24:39, 3.51it/s] 88%|████████▊ | 328343/371472 [3:57:26<3:22:07, 3.56it/s] 88%|████████▊ | 328344/371472 [3:57:27<3:24:23, 3.52it/s] 88%|████████▊ | 328345/371472 [3:57:27<3:20:29, 3.59it/s] 88%|████████▊ | 328346/371472 [3:57:27<3:12:44, 3.73it/s] 88%|████████▊ | 328347/371472 [3:57:27<3:13:25, 3.72it/s] 88%|████████▊ | 328348/371472 [3:57:28<3:13:16, 3.72it/s] 88%|████████▊ | 328349/371472 [3:57:28<3:07:43, 3.83it/s] 88%|████████▊ | 328350/371472 [3:57:28<3:09:54, 3.78it/s] 88%|████████▊ | 328351/371472 [3:57:29<3:16:03, 3.67it/s] 88%|████████▊ | 328352/371472 [3:57:29<3:15:28, 3.68it/s] 88%|████████▊ | 328353/371472 [3:57:29<3:21:45, 3.56it/s] 88%|████████▊ | 328354/371472 [3:57:29<3:24:21, 3.52it/s] 88%|████████▊ | 328355/371472 [3:57:30<3:16:21, 3.66it/s] 88%|████████▊ | 328356/371472 [3:57:30<3:09:48, 3.79it/s] 88%|████████▊ | 328357/371472 [3:57:30<3:15:17, 3.68it/s] 88%|████████▊ | 328358/371472 [3:57:30<3:16:35, 3.66it/s] 88%|████████▊ | 328359/371472 [3:57:31<3:20:27, 3.58it/s] 88%|████████▊ | 328360/371472 [3:57:31<3:13:45, 3.71it/s] {'loss': 2.7304, 'learning_rate': 2.0450774634230422e-07, 'epoch': 14.14} + 88%|████████▊ | 328360/371472 [3:57:31<3:13:45, 3.71it/s] 88%|████████▊ | 328361/371472 [3:57:31<3:09:26, 3.79it/s] 88%|████████▊ | 328362/371472 [3:57:32<3:15:53, 3.67it/s] 88%|████████▊ | 328363/371472 [3:57:32<3:24:09, 3.52it/s] 88%|████████▊ | 328364/371472 [3:57:32<3:22:31, 3.55it/s] 88%|████████▊ | 328365/371472 [3:57:32<3:14:52, 3.69it/s] 88%|████████▊ | 328366/371472 [3:57:33<3:19:41, 3.60it/s] 88%|████████▊ | 328367/371472 [3:57:33<3:28:21, 3.45it/s] 88%|████████▊ | 328368/371472 [3:57:33<3:27:04, 3.47it/s] 88%|████████▊ | 328369/371472 [3:57:34<3:21:22, 3.57it/s] 88%|████████▊ | 328370/371472 [3:57:34<3:20:34, 3.58it/s] 88%|████████▊ | 328371/371472 [3:57:34<3:16:29, 3.66it/s] 88%|████████▊ | 328372/371472 [3:57:34<3:09:29, 3.79it/s] 88%|████████▊ | 328373/371472 [3:57:35<3:06:04, 3.86it/s] 88%|████████▊ | 328374/371472 [3:57:35<3:17:52, 3.63it/s] 88%|████████▊ | 328375/371472 [3:57:35<3:20:01, 3.59it/s] 88%|████████▊ | 328376/371472 [3:57:35<3:26:21, 3.48it/s] 88%|████████▊ | 328377/371472 [3:57:36<3:25:09, 3.50it/s] 88%|████████▊ | 328378/371472 [3:57:36<3:19:59, 3.59it/s] 88%|████████▊ | 328379/371472 [3:57:36<3:16:01, 3.66it/s] 88%|████████▊ | 328380/371472 [3:57:37<3:15:08, 3.68it/s] {'loss': 2.7115, 'learning_rate': 2.044592643668254e-07, 'epoch': 14.14} + 88%|████████▊ | 328380/371472 [3:57:37<3:15:08, 3.68it/s] 88%|████████▊ | 328381/371472 [3:57:37<3:33:48, 3.36it/s] 88%|████████▊ | 328382/371472 [3:57:37<3:28:30, 3.44it/s] 88%|████████▊ | 328383/371472 [3:57:37<3:18:52, 3.61it/s] 88%|████████▊ | 328384/371472 [3:57:38<3:15:52, 3.67it/s] 88%|████████▊ | 328385/371472 [3:57:38<3:12:42, 3.73it/s] 88%|████████▊ | 328386/371472 [3:57:38<3:16:35, 3.65it/s] 88%|████████▊ | 328387/371472 [3:57:39<3:25:55, 3.49it/s] 88%|████████▊ | 328388/371472 [3:57:39<3:24:30, 3.51it/s] 88%|████████▊ | 328389/371472 [3:57:39<3:24:22, 3.51it/s] 88%|████████▊ | 328390/371472 [3:57:39<3:20:37, 3.58it/s] 88%|████████▊ | 328391/371472 [3:57:40<3:23:38, 3.53it/s] 88%|████████▊ | 328392/371472 [3:57:40<3:17:53, 3.63it/s] 88%|████████▊ | 328393/371472 [3:57:40<3:13:16, 3.71it/s] 88%|████████▊ | 328394/371472 [3:57:40<3:17:54, 3.63it/s] 88%|████████▊ | 328395/371472 [3:57:41<3:32:37, 3.38it/s] 88%|████████▊ | 328396/371472 [3:57:41<3:27:00, 3.47it/s] 88%|████████▊ | 328397/371472 [3:57:41<3:34:15, 3.35it/s] 88%|████████▊ | 328398/371472 [3:57:42<3:57:36, 3.02it/s] 88%|████████▊ | 328399/371472 [3:57:42<3:40:01, 3.26it/s] 88%|████████▊ | 328400/371472 [3:57:42<3:39:27, 3.27it/s] {'loss': 2.6907, 'learning_rate': 2.044107823913465e-07, 'epoch': 14.14} + 88%|████████▊ | 328400/371472 [3:57:42<3:39:27, 3.27it/s] 88%|████████▊ | 328401/371472 [3:57:43<3:34:45, 3.34it/s] 88%|████████▊ | 328402/371472 [3:57:43<3:32:54, 3.37it/s] 88%|████████▊ | 328403/371472 [3:57:43<3:56:05, 3.04it/s] 88%|████████▊ | 328404/371472 [3:57:44<3:39:23, 3.27it/s] 88%|████████▊ | 328405/371472 [3:57:44<3:30:47, 3.41it/s] 88%|████████▊ | 328406/371472 [3:57:44<3:28:24, 3.44it/s] 88%|████████▊ | 328407/371472 [3:57:44<3:24:34, 3.51it/s] 88%|████████▊ | 328408/371472 [3:57:45<3:22:03, 3.55it/s] 88%|████████▊ | 328409/371472 [3:57:45<3:23:17, 3.53it/s] 88%|████████▊ | 328410/371472 [3:57:45<3:20:23, 3.58it/s] 88%|████████▊ | 328411/371472 [3:57:46<3:32:39, 3.37it/s] 88%|████████▊ | 328412/371472 [3:57:46<3:37:47, 3.30it/s] 88%|████████▊ | 328413/371472 [3:57:46<3:31:14, 3.40it/s] 88%|████████▊ | 328414/371472 [3:57:46<3:27:47, 3.45it/s] 88%|████████▊ | 328415/371472 [3:57:47<3:21:46, 3.56it/s] 88%|████████▊ | 328416/371472 [3:57:47<3:20:53, 3.57it/s] 88%|████████▊ | 328417/371472 [3:57:47<3:25:27, 3.49it/s] 88%|████████▊ | 328418/371472 [3:57:48<3:17:43, 3.63it/s] 88%|████████▊ | 328419/371472 [3:57:48<3:14:55, 3.68it/s] 88%|████████▊ | 328420/371472 [3:57:48<3:15:09, 3.68it/s] {'loss': 2.4737, 'learning_rate': 2.043623004158676e-07, 'epoch': 14.15} + 88%|████████▊ | 328420/371472 [3:57:48<3:15:09, 3.68it/s] 88%|████████▊ | 328421/371472 [3:57:48<3:26:04, 3.48it/s] 88%|████████▊ | 328422/371472 [3:57:49<3:16:29, 3.65it/s] 88%|████████▊ | 328423/371472 [3:57:49<3:11:12, 3.75it/s] 88%|████████▊ | 328424/371472 [3:57:49<3:19:21, 3.60it/s] 88%|████████▊ | 328425/371472 [3:57:49<3:11:17, 3.75it/s] 88%|████████▊ | 328426/371472 [3:57:50<3:13:31, 3.71it/s] 88%|████████▊ | 328427/371472 [3:57:50<3:18:37, 3.61it/s] 88%|████████▊ | 328428/371472 [3:57:50<3:20:31, 3.58it/s] 88%|████████▊ | 328429/371472 [3:57:51<3:21:56, 3.55it/s] 88%|████████▊ | 328430/371472 [3:57:51<3:16:30, 3.65it/s] 88%|████████▊ | 328431/371472 [3:57:51<3:09:28, 3.79it/s] 88%|████████▊ | 328432/371472 [3:57:51<3:08:49, 3.80it/s] 88%|████████▊ | 328433/371472 [3:57:52<3:22:01, 3.55it/s] 88%|████████▊ | 328434/371472 [3:57:52<3:16:16, 3.65it/s] 88%|████████▊ | 328435/371472 [3:57:52<3:26:11, 3.48it/s] 88%|████████▊ | 328436/371472 [3:57:53<3:20:00, 3.59it/s] 88%|████████▊ | 328437/371472 [3:57:53<3:14:16, 3.69it/s] 88%|████████▊ | 328438/371472 [3:57:53<3:27:17, 3.46it/s] 88%|████████▊ | 328439/371472 [3:57:53<3:22:40, 3.54it/s] 88%|████████▊ | 328440/371472 [3:57:54<3:30:59, 3.40it/s] {'loss': 2.6369, 'learning_rate': 2.0431381844038867e-07, 'epoch': 14.15} + 88%|████████▊ | 328440/371472 [3:57:54<3:30:59, 3.40it/s] 88%|████████▊ | 328441/371472 [3:57:54<3:28:21, 3.44it/s] 88%|████████▊ | 328442/371472 [3:57:54<3:27:51, 3.45it/s] 88%|████████▊ | 328443/371472 [3:57:55<3:27:25, 3.46it/s] 88%|████████▊ | 328444/371472 [3:57:55<3:31:58, 3.38it/s] 88%|████████▊ | 328445/371472 [3:57:55<3:22:18, 3.54it/s] 88%|████████▊ | 328446/371472 [3:57:55<3:22:01, 3.55it/s] 88%|████████▊ | 328447/371472 [3:57:56<4:06:15, 2.91it/s] 88%|████████▊ | 328448/371472 [3:57:56<4:01:14, 2.97it/s] 88%|████████▊ | 328449/371472 [3:57:57<3:53:00, 3.08it/s] 88%|████████▊ | 328450/371472 [3:57:57<3:48:17, 3.14it/s] 88%|████████▊ | 328451/371472 [3:57:57<3:43:04, 3.21it/s] 88%|████████▊ | 328452/371472 [3:57:57<3:38:48, 3.28it/s] 88%|████████▊ | 328453/371472 [3:57:58<3:53:27, 3.07it/s] 88%|████████▊ | 328454/371472 [3:57:58<3:48:46, 3.13it/s] 88%|████████▊ | 328455/371472 [3:57:58<3:58:37, 3.00it/s] 88%|████████▊ | 328456/371472 [3:57:59<3:45:17, 3.18it/s] 88%|████████▊ | 328457/371472 [3:57:59<3:38:25, 3.28it/s] 88%|████████▊ | 328458/371472 [3:57:59<3:31:37, 3.39it/s] 88%|████████▊ | 328459/371472 [3:58:00<3:26:51, 3.47it/s] 88%|████████▊ | 328460/371472 [3:58:00<3:16:57, 3.64it/s] {'loss': 2.6047, 'learning_rate': 2.0426533646490985e-07, 'epoch': 14.15} + 88%|████████▊ | 328460/371472 [3:58:00<3:16:57, 3.64it/s] 88%|████████▊ | 328461/371472 [3:58:00<3:18:35, 3.61it/s] 88%|████████▊ | 328462/371472 [3:58:00<3:16:05, 3.66it/s] 88%|████████▊ | 328463/371472 [3:58:01<3:16:19, 3.65it/s] 88%|████████▊ | 328464/371472 [3:58:01<3:29:09, 3.43it/s] 88%|████████▊ | 328465/371472 [3:58:01<3:20:58, 3.57it/s] 88%|████████▊ | 328466/371472 [3:58:02<3:34:04, 3.35it/s] 88%|████████▊ | 328467/371472 [3:58:02<3:22:40, 3.54it/s] 88%|████████▊ | 328468/371472 [3:58:02<3:18:57, 3.60it/s] 88%|████████▊ | 328469/371472 [3:58:02<3:22:47, 3.53it/s] 88%|████████▊ | 328470/371472 [3:58:03<3:28:38, 3.44it/s] 88%|████████▊ | 328471/371472 [3:58:03<3:40:15, 3.25it/s] 88%|████████▊ | 328472/371472 [3:58:03<3:32:09, 3.38it/s] 88%|████████▊ | 328473/371472 [3:58:04<3:25:39, 3.48it/s] 88%|████████▊ | 328474/371472 [3:58:04<3:43:37, 3.20it/s] 88%|████████▊ | 328475/371472 [3:58:04<3:41:20, 3.24it/s] 88%|████████▊ | 328476/371472 [3:58:04<3:30:28, 3.40it/s] 88%|████████▊ | 328477/371472 [3:58:05<3:22:52, 3.53it/s] 88%|████████▊ | 328478/371472 [3:58:05<3:52:00, 3.09it/s] 88%|████████▊ | 328479/371472 [3:58:05<3:48:19, 3.14it/s] 88%|████████▊ | 328480/371472 [3:58:06<3:36:33, 3.31it/s] {'loss': 2.5723, 'learning_rate': 2.042168544894309e-07, 'epoch': 14.15} + 88%|████████▊ | 328480/371472 [3:58:06<3:36:33, 3.31it/s] 88%|████████▊ | 328481/371472 [3:58:06<3:39:31, 3.26it/s] 88%|████████▊ | 328482/371472 [3:58:06<3:30:53, 3.40it/s] 88%|████████▊ | 328483/371472 [3:58:07<3:24:47, 3.50it/s] 88%|████████▊ | 328484/371472 [3:58:07<3:19:39, 3.59it/s] 88%|████████▊ | 328485/371472 [3:58:07<3:20:08, 3.58it/s] 88%|████████▊ | 328486/371472 [3:58:07<3:32:44, 3.37it/s] 88%|████████▊ | 328487/371472 [3:58:08<3:36:42, 3.31it/s] 88%|████████▊ | 328488/371472 [3:58:08<3:36:28, 3.31it/s] 88%|████████▊ | 328489/371472 [3:58:08<3:32:45, 3.37it/s] 88%|████████▊ | 328490/371472 [3:58:09<3:36:32, 3.31it/s] 88%|████████▊ | 328491/371472 [3:58:09<3:24:48, 3.50it/s] 88%|████████▊ | 328492/371472 [3:58:09<3:19:53, 3.58it/s] 88%|████████▊ | 328493/371472 [3:58:09<3:20:08, 3.58it/s] 88%|████████▊ | 328494/371472 [3:58:10<3:16:08, 3.65it/s] 88%|████████▊ | 328495/371472 [3:58:10<3:14:24, 3.68it/s] 88%|████████▊ | 328496/371472 [3:58:10<3:14:05, 3.69it/s] 88%|████████▊ | 328497/371472 [3:58:11<3:30:51, 3.40it/s] 88%|████████▊ | 328498/371472 [3:58:11<3:27:49, 3.45it/s] 88%|████████▊ | 328499/371472 [3:58:11<3:28:17, 3.44it/s] 88%|████████▊ | 328500/371472 [3:58:11<3:38:08, 3.28it/s] {'loss': 2.584, 'learning_rate': 2.0416837251395204e-07, 'epoch': 14.15} + 88%|████████▊ | 328500/371472 [3:58:11<3:38:08, 3.28it/s] 88%|████████▊ | 328501/371472 [3:58:12<3:31:45, 3.38it/s] 88%|████████▊ | 328502/371472 [3:58:12<3:35:50, 3.32it/s] 88%|████████▊ | 328503/371472 [3:58:12<3:26:47, 3.46it/s] 88%|████████▊ | 328504/371472 [3:58:13<3:28:58, 3.43it/s] 88%|████████▊ | 328505/371472 [3:58:13<3:24:01, 3.51it/s] 88%|████████▊ | 328506/371472 [3:58:13<3:23:22, 3.52it/s] 88%|████████▊ | 328507/371472 [3:58:13<3:21:35, 3.55it/s] 88%|████████▊ | 328508/371472 [3:58:14<3:16:38, 3.64it/s] 88%|████████▊ | 328509/371472 [3:58:14<3:20:40, 3.57it/s] 88%|████████▊ | 328510/371472 [3:58:14<3:18:41, 3.60it/s] 88%|████████▊ | 328511/371472 [3:58:15<3:20:20, 3.57it/s] 88%|████████▊ | 328512/371472 [3:58:15<3:21:50, 3.55it/s] 88%|████████▊ | 328513/371472 [3:58:15<3:26:44, 3.46it/s] 88%|████████▊ | 328514/371472 [3:58:15<3:18:29, 3.61it/s] 88%|████████▊ | 328515/371472 [3:58:16<3:27:57, 3.44it/s] 88%|████████▊ | 328516/371472 [3:58:16<3:29:24, 3.42it/s] 88%|████████▊ | 328517/371472 [3:58:16<3:32:11, 3.37it/s] 88%|████████▊ | 328518/371472 [3:58:17<3:24:35, 3.50it/s] 88%|████████▊ | 328519/371472 [3:58:17<3:18:17, 3.61it/s] 88%|████████▊ | 328520/371472 [3:58:17<3:22:23, 3.54it/s] {'loss': 2.6698, 'learning_rate': 2.0411989053847311e-07, 'epoch': 14.15} + 88%|████████▊ | 328520/371472 [3:58:17<3:22:23, 3.54it/s] 88%|████████▊ | 328521/371472 [3:58:17<3:24:15, 3.50it/s] 88%|████████▊ | 328522/371472 [3:58:18<3:46:42, 3.16it/s] 88%|████████▊ | 328523/371472 [3:58:18<3:39:45, 3.26it/s] 88%|████████▊ | 328524/371472 [3:58:18<3:46:41, 3.16it/s] 88%|████████▊ | 328525/371472 [3:58:19<3:37:05, 3.30it/s] 88%|████████▊ | 328526/371472 [3:58:19<3:28:09, 3.44it/s] 88%|████████▊ | 328527/371472 [3:58:19<3:24:33, 3.50it/s] 88%|████████▊ | 328528/371472 [3:58:20<3:24:06, 3.51it/s] 88%|████████▊ | 328529/371472 [3:58:20<3:41:44, 3.23it/s] 88%|████████▊ | 328530/371472 [3:58:20<3:40:05, 3.25it/s] 88%|████████▊ | 328531/371472 [3:58:20<3:30:25, 3.40it/s] 88%|████████▊ | 328532/371472 [3:58:21<3:20:55, 3.56it/s] 88%|████████▊ | 328533/371472 [3:58:21<3:23:44, 3.51it/s] 88%|████████▊ | 328534/371472 [3:58:21<3:33:31, 3.35it/s] 88%|████████▊ | 328535/371472 [3:58:22<3:26:30, 3.47it/s] 88%|████████▊ | 328536/371472 [3:58:22<3:18:51, 3.60it/s] 88%|████████▊ | 328537/371472 [3:58:22<3:13:04, 3.71it/s] 88%|████████▊ | 328538/371472 [3:58:23<3:36:11, 3.31it/s] 88%|████████▊ | 328539/371472 [3:58:23<3:28:43, 3.43it/s] 88%|████████▊ | 328540/371472 [3:58:23<3:23:18, 3.52it/s] {'loss': 2.5686, 'learning_rate': 2.0407140856299426e-07, 'epoch': 14.15} + 88%|████████▊ | 328540/371472 [3:58:23<3:23:18, 3.52it/s] 88%|████████▊ | 328541/371472 [3:58:23<3:21:07, 3.56it/s] 88%|████████▊ | 328542/371472 [3:58:24<3:23:10, 3.52it/s] 88%|████████▊ | 328543/371472 [3:58:24<3:32:26, 3.37it/s] 88%|████████▊ | 328544/371472 [3:58:24<3:31:47, 3.38it/s] 88%|████████▊ | 328545/371472 [3:58:25<3:38:32, 3.27it/s] 88%|████████▊ | 328546/371472 [3:58:25<3:42:37, 3.21it/s] 88%|████████▊ | 328547/371472 [3:58:25<3:33:02, 3.36it/s] 88%|████████▊ | 328548/371472 [3:58:25<3:30:32, 3.40it/s] 88%|████████▊ | 328549/371472 [3:58:26<3:27:31, 3.45it/s] 88%|████████▊ | 328550/371472 [3:58:26<3:30:11, 3.40it/s] 88%|████████▊ | 328551/371472 [3:58:26<3:29:08, 3.42it/s] 88%|████████▊ | 328552/371472 [3:58:27<3:30:29, 3.40it/s] 88%|████████▊ | 328553/371472 [3:58:27<3:27:57, 3.44it/s] 88%|████████▊ | 328554/371472 [3:58:27<3:24:59, 3.49it/s] 88%|████████▊ | 328555/371472 [3:58:28<3:46:56, 3.15it/s] 88%|████████▊ | 328556/371472 [3:58:28<3:40:01, 3.25it/s] 88%|████████▊ | 328557/371472 [3:58:28<3:30:40, 3.40it/s] 88%|████████▊ | 328558/371472 [3:58:28<3:25:26, 3.48it/s] 88%|████████▊ | 328559/371472 [3:58:29<3:19:46, 3.58it/s] 88%|████████▊ | 328560/371472 [3:58:29<3:30:03, 3.40it/s] {'loss': 2.6313, 'learning_rate': 2.040229265875153e-07, 'epoch': 14.15} + 88%|████████▊ | 328560/371472 [3:58:29<3:30:03, 3.40it/s] 88%|████████▊ | 328561/371472 [3:58:29<3:25:42, 3.48it/s] 88%|████████▊ | 328562/371472 [3:58:30<3:30:16, 3.40it/s] 88%|████████▊ | 328563/371472 [3:58:30<3:32:24, 3.37it/s] 88%|████████▊ | 328564/371472 [3:58:30<3:26:21, 3.47it/s] 88%|████████▊ | 328565/371472 [3:58:30<3:21:16, 3.55it/s] 88%|████████▊ | 328566/371472 [3:58:31<3:22:06, 3.54it/s] 88%|████████▊ | 328567/371472 [3:58:31<3:24:58, 3.49it/s] 88%|████████▊ | 328568/371472 [3:58:31<3:25:00, 3.49it/s] 88%|████████▊ | 328569/371472 [3:58:32<3:37:54, 3.28it/s] 88%|████████▊ | 328570/371472 [3:58:32<3:32:50, 3.36it/s] 88%|████████▊ | 328571/371472 [3:58:32<3:31:44, 3.38it/s] 88%|████████▊ | 328572/371472 [3:58:32<3:21:50, 3.54it/s] 88%|████████▊ | 328573/371472 [3:58:33<3:14:12, 3.68it/s] 88%|████████▊ | 328574/371472 [3:58:33<3:30:08, 3.40it/s] 88%|████████▊ | 328575/371472 [3:58:33<3:50:44, 3.10it/s] 88%|████████▊ | 328576/371472 [3:58:34<4:00:45, 2.97it/s] 88%|████████▊ | 328577/371472 [3:58:34<3:47:10, 3.15it/s] 88%|████████▊ | 328578/371472 [3:58:34<3:44:32, 3.18it/s] 88%|████████▊ | 328579/371472 [3:58:35<3:35:10, 3.32it/s] 88%|████████▊ | 328580/371472 [3:58:35<3:29:16, 3.42it/s] {'loss': 2.4553, 'learning_rate': 2.039744446120365e-07, 'epoch': 14.15} + 88%|████████▊ | 328580/371472 [3:58:35<3:29:16, 3.42it/s] 88%|████████▊ | 328581/371472 [3:58:35<3:24:13, 3.50it/s] 88%|████████▊ | 328582/371472 [3:58:35<3:20:52, 3.56it/s] 88%|████████▊ | 328583/371472 [3:58:36<3:25:51, 3.47it/s] 88%|████████▊ | 328584/371472 [3:58:36<3:30:40, 3.39it/s] 88%|████████▊ | 328585/371472 [3:58:36<3:37:02, 3.29it/s] 88%|████████▊ | 328586/371472 [3:58:37<3:43:29, 3.20it/s] 88%|████████▊ | 328587/371472 [3:58:37<3:35:23, 3.32it/s] 88%|████████▊ | 328588/371472 [3:58:37<3:40:19, 3.24it/s] 88%|████████▊ | 328589/371472 [3:58:38<4:12:49, 2.83it/s] 88%|████████▊ | 328590/371472 [3:58:38<3:59:09, 2.99it/s] 88%|████████▊ | 328591/371472 [3:58:38<3:57:53, 3.00it/s] 88%|████████▊ | 328592/371472 [3:58:39<3:37:34, 3.28it/s] 88%|████████▊ | 328593/371472 [3:58:39<3:42:59, 3.20it/s] 88%|████████▊ | 328594/371472 [3:58:39<3:46:15, 3.16it/s] 88%|████████▊ | 328595/371472 [3:58:40<3:36:12, 3.31it/s] 88%|████████▊ | 328596/371472 [3:58:40<3:37:17, 3.29it/s] 88%|████████▊ | 328597/371472 [3:58:40<3:41:54, 3.22it/s] 88%|████████▊ | 328598/371472 [3:58:41<3:52:27, 3.07it/s] 88%|████████▊ | 328599/371472 [3:58:41<3:47:16, 3.14it/s] 88%|████████▊ | 328600/371472 [3:58:41<3:41:55, 3.22it/s] {'loss': 2.5679, 'learning_rate': 2.0392596263655753e-07, 'epoch': 14.15} + 88%|████████▊ | 328600/371472 [3:58:41<3:41:55, 3.22it/s] 88%|████████▊ | 328601/371472 [3:58:41<3:29:48, 3.41it/s] 88%|████████▊ | 328602/371472 [3:58:42<3:29:38, 3.41it/s] 88%|████████▊ | 328603/371472 [3:58:42<3:38:35, 3.27it/s] 88%|████████▊ | 328604/371472 [3:58:42<3:29:45, 3.41it/s] 88%|████████▊ | 328605/371472 [3:58:43<3:35:23, 3.32it/s] 88%|████████▊ | 328606/371472 [3:58:43<3:35:50, 3.31it/s] 88%|████████▊ | 328607/371472 [3:58:43<3:41:43, 3.22it/s] 88%|████████▊ | 328608/371472 [3:58:43<3:32:22, 3.36it/s] 88%|████████▊ | 328609/371472 [3:58:44<3:31:56, 3.37it/s] 88%|████████▊ | 328610/371472 [3:58:44<3:24:07, 3.50it/s] 88%|████████▊ | 328611/371472 [3:58:44<3:22:05, 3.53it/s] 88%|████████▊ | 328612/371472 [3:58:45<3:21:36, 3.54it/s] 88%|████████▊ | 328613/371472 [3:58:45<3:21:51, 3.54it/s] 88%|████���███▊ | 328614/371472 [3:58:45<3:21:09, 3.55it/s] 88%|████████▊ | 328615/371472 [3:58:45<3:16:12, 3.64it/s] 88%|████████▊ | 328616/371472 [3:58:46<3:26:45, 3.45it/s] 88%|████████▊ | 328617/371472 [3:58:46<3:25:05, 3.48it/s] 88%|████████▊ | 328618/371472 [3:58:46<3:25:44, 3.47it/s] 88%|████████▊ | 328619/371472 [3:58:47<3:34:45, 3.33it/s] 88%|████████▊ | 328620/371472 [3:58:47<3:32:38, 3.36it/s] {'loss': 2.6164, 'learning_rate': 2.0387748066107868e-07, 'epoch': 14.15} + 88%|████████▊ | 328620/371472 [3:58:47<3:32:38, 3.36it/s] 88%|████████▊ | 328621/371472 [3:58:47<3:23:14, 3.51it/s] 88%|████████▊ | 328622/371472 [3:58:48<3:41:30, 3.22it/s] 88%|████████▊ | 328623/371472 [3:58:48<3:41:22, 3.23it/s] 88%|████████▊ | 328624/371472 [3:58:48<3:38:32, 3.27it/s] 88%|████████▊ | 328625/371472 [3:58:48<3:27:34, 3.44it/s] 88%|████████▊ | 328626/371472 [3:58:49<3:21:29, 3.54it/s] 88%|████████▊ | 328627/371472 [3:58:49<3:29:29, 3.41it/s] 88%|████████▊ | 328628/371472 [3:58:49<3:23:14, 3.51it/s] 88%|████████▊ | 328629/371472 [3:58:50<3:20:57, 3.55it/s] 88%|████████▊ | 328630/371472 [3:58:50<3:25:02, 3.48it/s] 88%|████████▊ | 328631/371472 [3:58:50<3:25:15, 3.48it/s] 88%|████████▊ | 328632/371472 [3:58:50<3:29:14, 3.41it/s] 88%|████████▊ | 328633/371472 [3:58:51<3:28:37, 3.42it/s] 88%|████████▊ | 328634/371472 [3:58:51<3:29:34, 3.41it/s] 88%|████████▊ | 328635/371472 [3:58:51<3:24:47, 3.49it/s] 88%|████████▊ | 328636/371472 [3:58:52<3:26:01, 3.47it/s] 88%|████████▊ | 328637/371472 [3:58:52<3:23:30, 3.51it/s] 88%|████████▊ | 328638/371472 [3:58:52<3:16:33, 3.63it/s] 88%|████████▊ | 328639/371472 [3:58:52<3:22:48, 3.52it/s] 88%|████████▊ | 328640/371472 [3:58:53<3:14:20, 3.67it/s] {'loss': 2.6186, 'learning_rate': 2.0382899868559975e-07, 'epoch': 14.16} + 88%|████████▊ | 328640/371472 [3:58:53<3:14:20, 3.67it/s] 88%|████████▊ | 328641/371472 [3:58:53<3:10:34, 3.75it/s] 88%|████████▊ | 328642/371472 [3:58:53<3:04:15, 3.87it/s] 88%|████████▊ | 328643/371472 [3:58:54<3:26:29, 3.46it/s] 88%|████████▊ | 328644/371472 [3:58:54<3:16:43, 3.63it/s] 88%|████████▊ | 328645/371472 [3:58:54<3:15:56, 3.64it/s] 88%|████████▊ | 328646/371472 [3:58:54<3:24:24, 3.49it/s] 88%|████████▊ | 328647/371472 [3:58:55<3:22:23, 3.53it/s] 88%|████████▊ | 328648/371472 [3:58:55<3:18:25, 3.60it/s] 88%|████████▊ | 328649/371472 [3:58:55<3:23:29, 3.51it/s] 88%|████████▊ | 328650/371472 [3:58:56<3:29:02, 3.41it/s] 88%|████████▊ | 328651/371472 [3:58:56<3:19:02, 3.59it/s] 88%|████████▊ | 328652/371472 [3:58:56<3:27:17, 3.44it/s] 88%|████████▊ | 328653/371472 [3:58:56<3:18:52, 3.59it/s] 88%|████████▊ | 328654/371472 [3:58:57<3:14:35, 3.67it/s] 88%|████████▊ | 328655/371472 [3:58:57<3:25:12, 3.48it/s] 88%|████████▊ | 328656/371472 [3:58:57<3:33:58, 3.33it/s] 88%|████████▊ | 328657/371472 [3:58:58<3:41:11, 3.23it/s] 88%|████████▊ | 328658/371472 [3:58:58<3:31:28, 3.37it/s] 88%|████████▊ | 328659/371472 [3:58:58<3:20:45, 3.55it/s] 88%|████████▊ | 328660/371472 [3:58:58<3:31:56, 3.37it/s] {'loss': 2.5828, 'learning_rate': 2.0378051671012088e-07, 'epoch': 14.16} + 88%|████████▊ | 328660/371472 [3:58:58<3:31:56, 3.37it/s] 88%|████████▊ | 328661/371472 [3:58:59<3:29:28, 3.41it/s] 88%|████████▊ | 328662/371472 [3:58:59<3:24:46, 3.48it/s] 88%|████████▊ | 328663/371472 [3:58:59<3:16:19, 3.63it/s] 88%|████████▊ | 328664/371472 [3:59:00<3:19:06, 3.58it/s] 88%|████████▊ | 328665/371472 [3:59:00<3:12:56, 3.70it/s] 88%|████████▊ | 328666/371472 [3:59:00<3:13:05, 3.69it/s] 88%|████████▊ | 328667/371472 [3:59:00<3:11:30, 3.73it/s] 88%|████████▊ | 328668/371472 [3:59:01<3:09:53, 3.76it/s] 88%|████████▊ | 328669/371472 [3:59:01<3:17:58, 3.60it/s] 88%|████████▊ | 328670/371472 [3:59:01<3:16:04, 3.64it/s] 88%|████████▊ | 328671/371472 [3:59:01<3:20:59, 3.55it/s] 88%|████████▊ | 328672/371472 [3:59:02<3:20:28, 3.56it/s] 88%|████████▊ | 328673/371472 [3:59:02<3:26:10, 3.46it/s] 88%|████████▊ | 328674/371472 [3:59:02<3:32:04, 3.36it/s] 88%|████████▊ | 328675/371472 [3:59:03<3:37:38, 3.28it/s] 88%|████████▊ | 328676/371472 [3:59:03<3:29:52, 3.40it/s] 88%|████████▊ | 328677/371472 [3:59:03<3:28:50, 3.42it/s] 88%|████████▊ | 328678/371472 [3:59:03<3:22:36, 3.52it/s] 88%|████████▊ | 328679/371472 [3:59:04<3:34:04, 3.33it/s] 88%|████████▊ | 328680/371472 [3:59:04<3:24:53, 3.48it/s] {'loss': 2.6503, 'learning_rate': 2.0373203473464195e-07, 'epoch': 14.16} + 88%|████████▊ | 328680/371472 [3:59:04<3:24:53, 3.48it/s] 88%|████████▊ | 328681/371472 [3:59:04<3:22:18, 3.53it/s] 88%|████████▊ | 328682/371472 [3:59:05<3:28:05, 3.43it/s] 88%|████████▊ | 328683/371472 [3:59:05<3:20:01, 3.57it/s] 88%|████████▊ | 328684/371472 [3:59:05<3:25:08, 3.48it/s] 88%|████████▊ | 328685/371472 [3:59:05<3:19:03, 3.58it/s] 88%|████████▊ | 328686/371472 [3:59:06<3:16:51, 3.62it/s] 88%|████████▊ | 328687/371472 [3:59:06<3:21:17, 3.54it/s] 88%|████████▊ | 328688/371472 [3:59:06<3:21:12, 3.54it/s] 88%|████████▊ | 328689/371472 [3:59:07<3:15:45, 3.64it/s] 88%|████████▊ | 328690/371472 [3:59:07<3:22:25, 3.52it/s] 88%|████████▊ | 328691/371472 [3:59:07<3:13:37, 3.68it/s] 88%|████████▊ | 328692/371472 [3:59:07<3:11:19, 3.73it/s] 88%|████████▊ | 328693/371472 [3:59:08<3:11:11, 3.73it/s] 88%|████████▊ | 328694/371472 [3:59:08<3:20:52, 3.55it/s] 88%|████████▊ | 328695/371472 [3:59:08<3:18:02, 3.60it/s] 88%|████████▊ | 328696/371472 [3:59:09<3:21:07, 3.54it/s] 88%|████████▊ | 328697/371472 [3:59:09<3:29:33, 3.40it/s] 88%|████████▊ | 328698/371472 [3:59:09<3:32:06, 3.36it/s] 88%|████████▊ | 328699/371472 [3:59:09<3:25:35, 3.47it/s] 88%|████████▊ | 328700/371472 [3:59:10<3:23:47, 3.50it/s] {'loss': 2.5059, 'learning_rate': 2.0368355275916313e-07, 'epoch': 14.16} + 88%|████████▊ | 328700/371472 [3:59:10<3:23:47, 3.50it/s] 88%|████████▊ | 328701/371472 [3:59:10<3:24:18, 3.49it/s] 88%|████████▊ | 328702/371472 [3:59:10<3:16:44, 3.62it/s] 88%|████████▊ | 328703/371472 [3:59:11<3:12:07, 3.71it/s] 88%|████████▊ | 328704/371472 [3:59:11<3:11:00, 3.73it/s] 88%|████████▊ | 328705/371472 [3:59:11<3:14:05, 3.67it/s] 88%|████████▊ | 328706/371472 [3:59:11<3:12:37, 3.70it/s] 88%|████████▊ | 328707/371472 [3:59:12<3:06:53, 3.81it/s] 88%|████████▊ | 328708/371472 [3:59:12<3:02:42, 3.90it/s] 88%|████████▊ | 328709/371472 [3:59:12<3:27:05, 3.44it/s] 88%|████████▊ | 328710/371472 [3:59:13<3:37:34, 3.28it/s] 88%|████████▊ | 328711/371472 [3:59:13<3:31:17, 3.37it/s] 88%|████████▊ | 328712/371472 [3:59:13<3:26:06, 3.46it/s] 88%|████████▊ | 328713/371472 [3:59:13<3:25:44, 3.46it/s] 88%|████████▊ | 328714/371472 [3:59:14<3:26:23, 3.45it/s] 88%|████████▊ | 328715/371472 [3:59:14<3:39:27, 3.25it/s] 88%|████████▊ | 328716/371472 [3:59:14<3:32:21, 3.36it/s] 88%|████████▊ | 328717/371472 [3:59:15<3:37:40, 3.27it/s] 88%|████████▊ | 328718/371472 [3:59:15<3:31:45, 3.36it/s] 88%|████████▊ | 328719/371472 [3:59:15<3:22:12, 3.52it/s] 88%|████████▊ | 328720/371472 [3:59:15<3:19:17, 3.58it/s] {'loss': 2.6934, 'learning_rate': 2.036350707836842e-07, 'epoch': 14.16} + 88%|████████▊ | 328720/371472 [3:59:15<3:19:17, 3.58it/s] 88%|████████▊ | 328721/371472 [3:59:16<3:16:28, 3.63it/s] 88%|████████▊ | 328722/371472 [3:59:16<3:13:19, 3.69it/s] 88%|████████▊ | 328723/371472 [3:59:16<3:41:33, 3.22it/s] 88%|████████▊ | 328724/371472 [3:59:17<3:31:00, 3.38it/s] 88%|████████▊ | 328725/371472 [3:59:17<3:18:45, 3.58it/s] 88%|████████▊ | 328726/371472 [3:59:17<3:27:04, 3.44it/s] 88%|████████▊ | 328727/371472 [3:59:18<3:48:53, 3.11it/s] 88%|████████▊ | 328728/371472 [3:59:18<3:47:55, 3.13it/s] 88%|████████▊ | 328729/371472 [3:59:18<3:52:11, 3.07it/s] 88%|████████▊ | 328730/371472 [3:59:18<3:36:38, 3.29it/s] 88%|████████▊ | 328731/371472 [3:59:19<3:33:43, 3.33it/s] 88%|████████▊ | 328732/371472 [3:59:19<3:26:01, 3.46it/s] 88%|████████▊ | 328733/371472 [3:59:19<3:17:33, 3.61it/s] 88%|████████▊ | 328734/371472 [3:59:20<3:16:01, 3.63it/s] 88%|████████▊ | 328735/371472 [3:59:20<3:13:17, 3.69it/s] 88%|████████▊ | 328736/371472 [3:59:20<3:06:54, 3.81it/s] 88%|████████▊ | 328737/371472 [3:59:20<3:08:20, 3.78it/s] 88%|████████▊ | 328738/371472 [3:59:21<3:05:13, 3.85it/s] 88%|████████▊ | 328739/371472 [3:59:21<3:13:08, 3.69it/s] 88%|████████▊ | 328740/371472 [3:59:21<3:17:40, 3.60it/s] {'loss': 2.5057, 'learning_rate': 2.0358658880820532e-07, 'epoch': 14.16} + 88%|████████▊ | 328740/371472 [3:59:21<3:17:40, 3.60it/s] 88%|████████▊ | 328741/371472 [3:59:21<3:13:20, 3.68it/s] 88%|████████▊ | 328742/371472 [3:59:22<3:25:34, 3.46it/s] 88%|████████▊ | 328743/371472 [3:59:22<3:29:40, 3.40it/s] 88%|████████▊ | 328744/371472 [3:59:22<3:25:32, 3.46it/s] 88%|████████▊ | 328745/371472 [3:59:23<3:26:28, 3.45it/s] 88%|████████▊ | 328746/371472 [3:59:23<3:22:42, 3.51it/s] 88%|████████▊ | 328747/371472 [3:59:23<3:28:30, 3.42it/s] 88%|████████▊ | 328748/371472 [3:59:23<3:23:00, 3.51it/s] 88%|████████▊ | 328749/371472 [3:59:24<3:22:22, 3.52it/s] 88%|████████▊ | 328750/371472 [3:59:24<3:17:17, 3.61it/s] 88%|████████▊ | 328751/371472 [3:59:24<3:14:23, 3.66it/s] 88%|████████▊ | 328752/371472 [3:59:25<3:17:27, 3.61it/s] 89%|████████▊ | 328753/371472 [3:59:25<3:20:44, 3.55it/s] 89%|████████▊ | 328754/371472 [3:59:25<3:20:20, 3.55it/s] 89%|████████▊ | 328755/371472 [3:59:25<3:17:33, 3.60it/s] 89%|████████▊ | 328756/371472 [3:59:26<3:15:24, 3.64it/s] 89%|████████▊ | 328757/371472 [3:59:26<3:13:43, 3.67it/s] 89%|████████▊ | 328758/371472 [3:59:26<3:19:56, 3.56it/s] 89%|████████▊ | 328759/371472 [3:59:27<3:22:55, 3.51it/s] 89%|████████▊ | 328760/371472 [3:59:27<3:23:16, 3.50it/s] {'loss': 2.57, 'learning_rate': 2.035381068327264e-07, 'epoch': 14.16} + 89%|████████▊ | 328760/371472 [3:59:27<3:23:16, 3.50it/s] 89%|████████▊ | 328761/371472 [3:59:27<3:21:14, 3.54it/s] 89%|████████▊ | 328762/371472 [3:59:27<3:14:05, 3.67it/s] 89%|████████▊ | 328763/371472 [3:59:28<3:10:59, 3.73it/s] 89%|████████▊ | 328764/371472 [3:59:28<3:17:43, 3.60it/s] 89%|████████▊ | 328765/371472 [3:59:28<3:25:55, 3.46it/s] 89%|████████▊ | 328766/371472 [3:59:29<3:30:50, 3.38it/s] 89%|████████▊ | 328767/371472 [3:59:29<3:25:40, 3.46it/s] 89%|████████▊ | 328768/371472 [3:59:29<3:33:35, 3.33it/s] 89%|████████▊ | 328769/371472 [3:59:29<3:27:59, 3.42it/s] 89%|████████▊ | 328770/371472 [3:59:30<3:46:26, 3.14it/s] 89%|████████▊ | 328771/371472 [3:59:30<3:42:28, 3.20it/s] 89%|████████▊ | 328772/371472 [3:59:30<3:33:53, 3.33it/s] 89%|████████▊ | 328773/371472 [3:59:31<3:45:10, 3.16it/s] 89%|████████▊ | 328774/371472 [3:59:31<3:36:37, 3.29it/s] 89%|████████▊ | 328775/371472 [3:59:31<3:31:47, 3.36it/s] 89%|████████▊ | 328776/371472 [3:59:31<3:23:34, 3.50it/s] 89%|████████▊ | 328777/371472 [3:59:32<3:28:42, 3.41it/s] 89%|████████▊ | 328778/371472 [3:59:32<3:35:00, 3.31it/s] 89%|████████▊ | 328779/371472 [3:59:32<3:28:13, 3.42it/s] 89%|████████▊ | 328780/371472 [3:59:33<3:21:01, 3.54it/s] {'loss': 2.5908, 'learning_rate': 2.0348962485724757e-07, 'epoch': 14.16} + 89%|████████▊ | 328780/371472 [3:59:33<3:21:01, 3.54it/s] 89%|████████▊ | 328781/371472 [3:59:33<3:14:01, 3.67it/s] 89%|████████▊ | 328782/371472 [3:59:33<3:13:59, 3.67it/s] 89%|████████▊ | 328783/371472 [3:59:33<3:09:23, 3.76it/s] 89%|████████▊ | 328784/371472 [3:59:34<3:09:24, 3.76it/s] 89%|████████▊ | 328785/371472 [3:59:34<3:09:36, 3.75it/s] 89%|████████▊ | 328786/371472 [3:59:34<3:21:57, 3.52it/s] 89%|████████▊ | 328787/371472 [3:59:35<3:24:13, 3.48it/s] 89%|████████▊ | 328788/371472 [3:59:35<3:24:27, 3.48it/s] 89%|████████▊ | 328789/371472 [3:59:35<3:21:46, 3.53it/s] 89%|████████▊ | 328790/371472 [3:59:35<3:19:58, 3.56it/s] 89%|████████▊ | 328791/371472 [3:59:36<3:14:52, 3.65it/s] 89%|████████▊ | 328792/371472 [3:59:36<3:22:21, 3.52it/s] 89%|████████▊ | 328793/371472 [3:59:36<3:25:11, 3.47it/s] 89%|████████▊ | 328794/371472 [3:59:37<3:25:36, 3.46it/s] 89%|████████▊ | 328795/371472 [3:59:37<3:17:35, 3.60it/s] 89%|████████▊ | 328796/371472 [3:59:37<3:14:25, 3.66it/s] 89%|████████▊ | 328797/371472 [3:59:37<3:10:36, 3.73it/s] 89%|████████▊ | 328798/371472 [3:59:38<3:11:21, 3.72it/s] 89%|████████▊ | 328799/371472 [3:59:38<3:10:37, 3.73it/s] 89%|████████▊ | 328800/371472 [3:59:38<3:35:44, 3.30it/s] {'loss': 2.6793, 'learning_rate': 2.0344114288176862e-07, 'epoch': 14.16} + 89%|████████▊ | 328800/371472 [3:59:38<3:35:44, 3.30it/s] 89%|████████▊ | 328801/371472 [3:59:39<3:27:46, 3.42it/s] 89%|████████▊ | 328802/371472 [3:59:39<3:24:23, 3.48it/s] 89%|████████▊ | 328803/371472 [3:59:39<3:37:09, 3.27it/s] 89%|████████▊ | 328804/371472 [3:59:39<3:29:15, 3.40it/s] 89%|████████▊ | 328805/371472 [3:59:40<3:17:46, 3.60it/s] 89%|████████▊ | 328806/371472 [3:59:40<3:25:22, 3.46it/s] 89%|████████▊ | 328807/371472 [3:59:40<3:33:32, 3.33it/s] 89%|████████▊ | 328808/371472 [3:59:41<3:25:20, 3.46it/s] 89%|████████▊ | 328809/371472 [3:59:41<3:26:35, 3.44it/s] 89%|████████▊ | 328810/371472 [3:59:41<3:39:09, 3.24it/s] 89%|████████▊ | 328811/371472 [3:59:41<3:31:14, 3.37it/s] 89%|████████▊ | 328812/371472 [3:59:42<3:30:52, 3.37it/s] 89%|████████▊ | 328813/371472 [3:59:42<3:35:52, 3.29it/s] 89%|████████▊ | 328814/371472 [3:59:42<3:29:38, 3.39it/s] 89%|████████▊ | 328815/371472 [3:59:43<3:25:35, 3.46it/s] 89%|████████▊ | 328816/371472 [3:59:43<3:24:29, 3.48it/s] 89%|████████▊ | 328817/371472 [3:59:43<3:16:31, 3.62it/s] 89%|████████▊ | 328818/371472 [3:59:44<3:33:20, 3.33it/s] 89%|████████▊ | 328819/371472 [3:59:44<3:33:45, 3.33it/s] 89%|████████▊ | 328820/371472 [3:59:44<3:26:04, 3.45it/s] {'loss': 2.6085, 'learning_rate': 2.0339266090628966e-07, 'epoch': 14.16} + 89%|████████▊ | 328820/371472 [3:59:44<3:26:04, 3.45it/s] 89%|████████▊ | 328821/371472 [3:59:44<3:25:52, 3.45it/s] 89%|████████▊ | 328822/371472 [3:59:45<3:23:51, 3.49it/s] 89%|████████▊ | 328823/371472 [3:59:45<3:25:51, 3.45it/s] 89%|████████▊ | 328824/371472 [3:59:45<3:22:58, 3.50it/s] 89%|████████▊ | 328825/371472 [3:59:46<3:23:54, 3.49it/s] 89%|████████▊ | 328826/371472 [3:59:46<3:25:55, 3.45it/s] 89%|████████▊ | 328827/371472 [3:59:46<3:21:18, 3.53it/s] 89%|████████▊ | 328828/371472 [3:59:46<3:14:27, 3.66it/s] 89%|████████▊ | 328829/371472 [3:59:47<3:21:13, 3.53it/s] 89%|████████▊ | 328830/371472 [3:59:47<3:41:41, 3.21it/s] 89%|████████▊ | 328831/371472 [3:59:47<3:36:07, 3.29it/s] 89%|████████▊ | 328832/371472 [3:59:48<3:29:56, 3.39it/s] 89%|████████▊ | 328833/371472 [3:59:48<3:33:01, 3.34it/s] 89%|████████▊ | 328834/371472 [3:59:48<3:35:56, 3.29it/s] 89%|████████▊ | 328835/371472 [3:59:49<3:37:40, 3.26it/s] 89%|████████▊ | 328836/371472 [3:59:49<3:35:22, 3.30it/s] 89%|████████▊ | 328837/371472 [3:59:49<3:26:37, 3.44it/s] 89%|████████▊ | 328838/371472 [3:59:49<3:29:48, 3.39it/s] 89%|████████▊ | 328839/371472 [3:59:50<3:26:37, 3.44it/s] 89%|████████▊ | 328840/371472 [3:59:50<3:19:44, 3.56it/s] {'loss': 2.5505, 'learning_rate': 2.0334417893081084e-07, 'epoch': 14.16} + 89%|████████▊ | 328840/371472 [3:59:50<3:19:44, 3.56it/s] 89%|████████▊ | 328841/371472 [3:59:50<3:20:47, 3.54it/s] 89%|████████▊ | 328842/371472 [3:59:51<3:22:11, 3.51it/s] 89%|████████▊ | 328843/371472 [3:59:51<3:18:54, 3.57it/s] 89%|████████▊ | 328844/371472 [3:59:51<3:18:22, 3.58it/s] 89%|████████▊ | 328845/371472 [3:59:51<3:18:40, 3.58it/s] 89%|████████▊ | 328846/371472 [3:59:52<3:13:37, 3.67it/s] 89%|████████▊ | 328847/371472 [3:59:52<3:18:19, 3.58it/s] 89%|████████▊ | 328848/371472 [3:59:52<3:33:04, 3.33it/s] 89%|████████▊ | 328849/371472 [3:59:53<3:32:28, 3.34it/s] 89%|████████▊ | 328850/371472 [3:59:53<3:27:16, 3.43it/s] 89%|████████▊ | 328851/371472 [3:59:53<3:25:39, 3.45it/s] 89%|████████▊ | 328852/371472 [3:59:53<3:23:24, 3.49it/s] 89%|████████▊ | 328853/371472 [3:59:54<3:26:48, 3.43it/s] 89%|████████▊ | 328854/371472 [3:59:54<3:37:51, 3.26it/s] 89%|████████▊ | 328855/371472 [3:59:54<3:26:04, 3.45it/s] 89%|████████▊ | 328856/371472 [3:59:55<3:19:17, 3.56it/s] 89%|████████▊ | 328857/371472 [3:59:55<3:18:37, 3.58it/s] 89%|████████▊ | 328858/371472 [3:59:55<3:17:40, 3.59it/s] 89%|████████▊ | 328859/371472 [3:59:55<3:18:32, 3.58it/s] 89%|████████▊ | 328860/371472 [3:59:56<3:12:20, 3.69it/s] {'loss': 2.7867, 'learning_rate': 2.0329569695533189e-07, 'epoch': 14.16} + 89%|████████▊ | 328860/371472 [3:59:56<3:12:20, 3.69it/s] 89%|████████▊ | 328861/371472 [3:59:56<3:16:48, 3.61it/s] 89%|████████▊ | 328862/371472 [3:59:56<3:16:47, 3.61it/s] 89%|████████▊ | 328863/371472 [3:59:57<3:25:45, 3.45it/s] 89%|████████▊ | 328864/371472 [3:59:57<3:21:13, 3.53it/s] 89%|████████▊ | 328865/371472 [3:59:57<3:29:00, 3.40it/s] 89%|████████▊ | 328866/371472 [3:59:57<3:25:35, 3.45it/s] 89%|████████▊ | 328867/371472 [3:59:58<3:26:18, 3.44it/s] 89%|████████▊ | 328868/371472 [3:59:58<3:33:09, 3.33it/s] 89%|████████▊ | 328869/371472 [3:59:58<3:26:02, 3.45it/s] 89%|████████▊ | 328870/371472 [3:59:59<3:19:23, 3.56it/s] 89%|████████▊ | 328871/371472 [3:59:59<3:32:57, 3.33it/s] 89%|████████▊ | 328872/371472 [3:59:59<3:35:09, 3.30it/s] 89%|████████▊ | 328873/371472 [3:59:59<3:37:35, 3.26it/s] 89%|████████▊ | 328874/371472 [4:00:00<3:29:19, 3.39it/s] 89%|████████▊ | 328875/371472 [4:00:00<3:41:32, 3.20it/s] 89%|████████▊ | 328876/371472 [4:00:00<3:30:36, 3.37it/s] 89%|████████▊ | 328877/371472 [4:00:01<3:27:26, 3.42it/s] 89%|████████▊ | 328878/371472 [4:00:01<3:26:58, 3.43it/s] 89%|████████▊ | 328879/371472 [4:00:01<3:19:25, 3.56it/s] 89%|████████▊ | 328880/371472 [4:00:02<3:29:42, 3.39it/s] {'loss': 2.7212, 'learning_rate': 2.0324721497985304e-07, 'epoch': 14.17} + 89%|████████▊ | 328880/371472 [4:00:02<3:29:42, 3.39it/s] 89%|████████▊ | 328881/371472 [4:00:02<3:21:01, 3.53it/s] 89%|████████▊ | 328882/371472 [4:00:02<3:26:49, 3.43it/s] 89%|████████▊ | 328883/371472 [4:00:02<3:19:37, 3.56it/s] 89%|████████▊ | 328884/371472 [4:00:03<3:14:43, 3.65it/s] 89%|████████▊ | 328885/371472 [4:00:03<3:24:40, 3.47it/s] 89%|████████▊ | 328886/371472 [4:00:03<3:15:28, 3.63it/s] 89%|████████▊ | 328887/371472 [4:00:04<3:31:40, 3.35it/s] 89%|████████▊ | 328888/371472 [4:00:04<3:33:26, 3.33it/s] 89%|████████▊ | 328889/371472 [4:00:04<3:32:00, 3.35it/s] 89%|████████▊ | 328890/371472 [4:00:04<3:29:16, 3.39it/s] 89%|████████▊ | 328891/371472 [4:00:05<3:25:26, 3.45it/s] 89%|████████▊ | 328892/371472 [4:00:05<3:19:58, 3.55it/s] 89%|████████▊ | 328893/371472 [4:00:05<3:20:20, 3.54it/s] 89%|████████▊ | 328894/371472 [4:00:06<3:22:06, 3.51it/s] 89%|████████▊ | 328895/371472 [4:00:06<3:27:24, 3.42it/s] 89%|████████▊ | 328896/371472 [4:00:06<3:42:42, 3.19it/s] 89%|████████▊ | 328897/371472 [4:00:06<3:34:30, 3.31it/s] 89%|████████▊ | 328898/371472 [4:00:07<3:33:11, 3.33it/s] 89%|████████▊ | 328899/371472 [4:00:07<3:28:33, 3.40it/s] 89%|████████▊ | 328900/371472 [4:00:07<3:45:26, 3.15it/s] {'loss': 2.5915, 'learning_rate': 2.031987330043741e-07, 'epoch': 14.17} + 89%|████████▊ | 328900/371472 [4:00:07<3:45:26, 3.15it/s] 89%|███████���▊ | 328901/371472 [4:00:08<3:35:43, 3.29it/s] 89%|████████▊ | 328902/371472 [4:00:08<3:35:07, 3.30it/s] 89%|████████▊ | 328903/371472 [4:00:08<3:31:52, 3.35it/s] 89%|████████▊ | 328904/371472 [4:00:09<3:22:36, 3.50it/s] 89%|████████▊ | 328905/371472 [4:00:09<3:35:13, 3.30it/s] 89%|████████▊ | 328906/371472 [4:00:09<3:31:54, 3.35it/s] 89%|████████▊ | 328907/371472 [4:00:10<3:42:05, 3.19it/s] 89%|████████▊ | 328908/371472 [4:00:10<3:33:45, 3.32it/s] 89%|████████▊ | 328909/371472 [4:00:10<3:36:09, 3.28it/s] 89%|████████▊ | 328910/371472 [4:00:10<3:30:27, 3.37it/s] 89%|████████▊ | 328911/371472 [4:00:11<3:24:36, 3.47it/s] 89%|████████▊ | 328912/371472 [4:00:11<3:17:33, 3.59it/s] 89%|████████▊ | 328913/371472 [4:00:11<3:22:38, 3.50it/s] 89%|████████▊ | 328914/371472 [4:00:11<3:18:08, 3.58it/s] 89%|████████▊ | 328915/371472 [4:00:12<3:22:14, 3.51it/s] 89%|████████▊ | 328916/371472 [4:00:12<3:15:56, 3.62it/s] 89%|████████▊ | 328917/371472 [4:00:12<3:14:46, 3.64it/s] 89%|████████▊ | 328918/371472 [4:00:13<3:14:24, 3.65it/s] 89%|████████▊ | 328919/371472 [4:00:13<3:16:17, 3.61it/s] 89%|████████▊ | 328920/371472 [4:00:13<3:13:19, 3.67it/s] {'loss': 2.5987, 'learning_rate': 2.0315025102889526e-07, 'epoch': 14.17} + 89%|████████▊ | 328920/371472 [4:00:13<3:13:19, 3.67it/s] 89%|████████▊ | 328921/371472 [4:00:13<3:16:44, 3.60it/s] 89%|████████▊ | 328922/371472 [4:00:14<3:13:31, 3.66it/s] 89%|████████▊ | 328923/371472 [4:00:14<3:08:54, 3.75it/s] 89%|████████▊ | 328924/371472 [4:00:14<3:07:25, 3.78it/s] 89%|████████▊ | 328925/371472 [4:00:14<3:14:48, 3.64it/s] 89%|████████▊ | 328926/371472 [4:00:15<3:10:22, 3.72it/s] 89%|████████▊ | 328927/371472 [4:00:15<3:05:56, 3.81it/s] 89%|████████▊ | 328928/371472 [4:00:15<3:05:21, 3.83it/s] 89%|████████▊ | 328929/371472 [4:00:16<3:06:37, 3.80it/s] 89%|████████▊ | 328930/371472 [4:00:16<3:04:39, 3.84it/s] 89%|████████▊ | 328931/371472 [4:00:16<3:11:32, 3.70it/s] 89%|████████▊ | 328932/371472 [4:00:16<3:16:36, 3.61it/s] 89%|████████▊ | 328933/371472 [4:00:17<3:14:19, 3.65it/s] 89%|████████▊ | 328934/371472 [4:00:17<3:23:55, 3.48it/s] 89%|████████▊ | 328935/371472 [4:00:17<3:25:22, 3.45it/s] 89%|████████▊ | 328936/371472 [4:00:18<3:23:00, 3.49it/s] 89%|████████▊ | 328937/371472 [4:00:18<3:26:32, 3.43it/s] 89%|████████▊ | 328938/371472 [4:00:18<3:31:50, 3.35it/s] 89%|████████▊ | 328939/371472 [4:00:18<3:17:53, 3.58it/s] 89%|████████▊ | 328940/371472 [4:00:19<3:14:12, 3.65it/s] {'loss': 2.5591, 'learning_rate': 2.031017690534163e-07, 'epoch': 14.17} + 89%|████████▊ | 328940/371472 [4:00:19<3:14:12, 3.65it/s] 89%|████████▊ | 328941/371472 [4:00:19<3:14:07, 3.65it/s] 89%|████████▊ | 328942/371472 [4:00:19<3:16:32, 3.61it/s] 89%|████████▊ | 328943/371472 [4:00:19<3:13:17, 3.67it/s] 89%|████████▊ | 328944/371472 [4:00:20<3:19:20, 3.56it/s] 89%|████████▊ | 328945/371472 [4:00:20<3:14:23, 3.65it/s] 89%|████████▊ | 328946/371472 [4:00:20<3:14:06, 3.65it/s] 89%|████████▊ | 328947/371472 [4:00:21<3:11:04, 3.71it/s] 89%|████████▊ | 328948/371472 [4:00:21<3:09:44, 3.74it/s] 89%|████████▊ | 328949/371472 [4:00:21<3:07:13, 3.79it/s] 89%|████████▊ | 328950/371472 [4:00:21<3:25:37, 3.45it/s] 89%|████████▊ | 328951/371472 [4:00:22<3:22:38, 3.50it/s] 89%|████████▊ | 328952/371472 [4:00:22<3:28:06, 3.41it/s] 89%|████████▊ | 328953/371472 [4:00:22<3:27:38, 3.41it/s] 89%|████████▊ | 328954/371472 [4:00:23<3:21:55, 3.51it/s] 89%|████████▊ | 328955/371472 [4:00:23<3:38:04, 3.25it/s] 89%|████████▊ | 328956/371472 [4:00:23<3:29:47, 3.38it/s] 89%|████████▊ | 328957/371472 [4:00:23<3:25:00, 3.46it/s] 89%|████████▊ | 328958/371472 [4:00:24<3:20:39, 3.53it/s] 89%|████████▊ | 328959/371472 [4:00:24<3:17:24, 3.59it/s] 89%|████████▊ | 328960/371472 [4:00:24<3:18:44, 3.57it/s] {'loss': 2.5458, 'learning_rate': 2.0305328707793748e-07, 'epoch': 14.17} + 89%|████████▊ | 328960/371472 [4:00:24<3:18:44, 3.57it/s] 89%|████████▊ | 328961/371472 [4:00:25<3:20:25, 3.53it/s] 89%|████████▊ | 328962/371472 [4:00:25<3:26:09, 3.44it/s] 89%|████████▊ | 328963/371472 [4:00:25<3:38:39, 3.24it/s] 89%|████████▊ | 328964/371472 [4:00:25<3:26:39, 3.43it/s] 89%|████████▊ | 328965/371472 [4:00:26<3:18:21, 3.57it/s] 89%|████████▊ | 328966/371472 [4:00:26<3:16:12, 3.61it/s] 89%|████████▊ | 328967/371472 [4:00:26<3:28:49, 3.39it/s] 89%|████████▊ | 328968/371472 [4:00:27<3:27:50, 3.41it/s] 89%|████████▊ | 328969/371472 [4:00:27<3:25:56, 3.44it/s] 89%|████████▊ | 328970/371472 [4:00:27<3:40:57, 3.21it/s] 89%|████████▊ | 328971/371472 [4:00:28<3:29:05, 3.39it/s] 89%|████████▊ | 328972/371472 [4:00:28<3:36:37, 3.27it/s] 89%|████████▊ | 328973/371472 [4:00:28<3:30:30, 3.36it/s] 89%|████████▊ | 328974/371472 [4:00:28<3:24:26, 3.46it/s] 89%|████████▊ | 328975/371472 [4:00:29<3:16:22, 3.61it/s] 89%|████████▊ | 328976/371472 [4:00:29<3:29:16, 3.38it/s] 89%|████████▊ | 328977/371472 [4:00:29<3:20:35, 3.53it/s] 89%|████████▊ | 328978/371472 [4:00:30<3:21:50, 3.51it/s] 89%|████████▊ | 328979/371472 [4:00:30<3:12:09, 3.69it/s] 89%|████████▊ | 328980/371472 [4:00:30<3:13:45, 3.66it/s] {'loss': 2.6632, 'learning_rate': 2.0300480510245853e-07, 'epoch': 14.17} + 89%|████████▊ | 328980/371472 [4:00:30<3:13:45, 3.66it/s] 89%|████████▊ | 328981/371472 [4:00:30<3:15:04, 3.63it/s] 89%|████████▊ | 328982/371472 [4:00:31<3:07:00, 3.79it/s] 89%|████████▊ | 328983/371472 [4:00:31<3:09:25, 3.74it/s] 89%|████████▊ | 328984/371472 [4:00:31<3:15:10, 3.63it/s] 89%|████████▊ | 328985/371472 [4:00:31<3:20:21, 3.53it/s] 89%|████████▊ | 328986/371472 [4:00:32<3:24:56, 3.46it/s] 89%|████████▊ | 328987/371472 [4:00:32<3:19:30, 3.55it/s] 89%|████████▊ | 328988/371472 [4:00:32<3:12:14, 3.68it/s] 89%|████████▊ | 328989/371472 [4:00:33<3:12:13, 3.68it/s] 89%|████████▊ | 328990/371472 [4:00:33<3:10:36, 3.71it/s] 89%|████████▊ | 328991/371472 [4:00:33<3:10:57, 3.71it/s] 89%|████████▊ | 328992/371472 [4:00:33<3:09:11, 3.74it/s] 89%|████████▊ | 328993/371472 [4:00:34<3:12:53, 3.67it/s] 89%|████████▊ | 328994/371472 [4:00:34<3:23:05, 3.49it/s] 89%|████████▊ | 328995/371472 [4:00:34<3:34:37, 3.30it/s] 89%|████████▊ | 328996/371472 [4:00:35<3:27:09, 3.42it/s] 89%|████████▊ | 328997/371472 [4:00:35<3:28:03, 3.40it/s] 89%|████████▊ | 328998/371472 [4:00:35<3:19:17, 3.55it/s] 89%|████████▊ | 328999/371472 [4:00:35<3:32:55, 3.32it/s] 89%|████████▊ | 329000/371472 [4:00:36<3:31:52, 3.34it/s] {'loss': 2.6286, 'learning_rate': 2.0295632312697968e-07, 'epoch': 14.17} + 89%|████████▊ | 329000/371472 [4:00:36<3:31:52, 3.34it/s] 89%|████████▊ | 329001/371472 [4:00:36<3:26:06, 3.43it/s] 89%|████████▊ | 329002/371472 [4:00:36<3:24:20, 3.46it/s] 89%|████████▊ | 329003/371472 [4:00:37<3:34:28, 3.30it/s] 89%|████████▊ | 329004/371472 [4:00:37<3:38:49, 3.23it/s] 89%|████████▊ | 329005/371472 [4:00:37<3:28:52, 3.39it/s] 89%|████████▊ | 329006/371472 [4:00:37<3:20:09, 3.54it/s] 89%|████████▊ | 329007/371472 [4:00:38<3:21:06, 3.52it/s] 89%|████████▊ | 329008/371472 [4:00:38<3:19:55, 3.54it/s] 89%|████████▊ | 329009/371472 [4:00:38<3:21:12, 3.52it/s] 89%|████████▊ | 329010/371472 [4:00:39<3:15:58, 3.61it/s] 89%|████████▊ | 329011/371472 [4:00:39<3:19:13, 3.55it/s] 89%|████████▊ | 329012/371472 [4:00:39<3:14:18, 3.64it/s] 89%|████████▊ | 329013/371472 [4:00:39<3:17:09, 3.59it/s] 89%|████████▊ | 329014/371472 [4:00:40<3:19:06, 3.55it/s] 89%|████████▊ | 329015/371472 [4:00:40<3:20:36, 3.53it/s] 89%|████████▊ | 329016/371472 [4:00:40<3:14:03, 3.65it/s] 89%|████████▊ | 329017/371472 [4:00:41<3:22:03, 3.50it/s] 89%|████████▊ | 329018/371472 [4:00:41<3:27:00, 3.42it/s] 89%|████████▊ | 329019/371472 [4:00:41<3:30:30, 3.36it/s] 89%|████████▊ | 329020/371472 [4:00:41<3:30:18, 3.36it/s] {'loss': 2.4723, 'learning_rate': 2.0290784115150075e-07, 'epoch': 14.17} + 89%|████████▊ | 329020/371472 [4:00:41<3:30:18, 3.36it/s] 89%|████████▊ | 329021/371472 [4:00:42<3:24:49, 3.45it/s] 89%|████████▊ | 329022/371472 [4:00:42<3:23:19, 3.48it/s] 89%|████████▊ | 329023/371472 [4:00:42<3:26:20, 3.43it/s] 89%|████████▊ | 329024/371472 [4:00:43<3:23:02, 3.48it/s] 89%|████████▊ | 329025/371472 [4:00:43<3:16:47, 3.59it/s] 89%|████████▊ | 329026/371472 [4:00:43<3:10:20, 3.72it/s] 89%|████████▊ | 329027/371472 [4:00:43<3:08:16, 3.76it/s] 89%|████████▊ | 329028/371472 [4:00:44<3:19:43, 3.54it/s] 89%|████████▊ | 329029/371472 [4:00:44<3:19:48, 3.54it/s] 89%|████████▊ | 329030/371472 [4:00:44<3:26:48, 3.42it/s] 89%|████████▊ | 329031/371472 [4:00:45<3:17:31, 3.58it/s] 89%|████████▊ | 329032/371472 [4:00:45<3:19:22, 3.55it/s] 89%|████████▊ | 329033/371472 [4:00:45<3:19:36, 3.54it/s] 89%|████████▊ | 329034/371472 [4:00:45<3:25:14, 3.45it/s] 89%|████████▊ | 329035/371472 [4:00:46<3:19:48, 3.54it/s] 89%|████████▊ | 329036/371472 [4:00:46<3:15:34, 3.62it/s] 89%|████████▊ | 329037/371472 [4:00:46<3:17:00, 3.59it/s] 89%|████████▊ | 329038/371472 [4:00:47<3:19:13, 3.55it/s] 89%|████████▊ | 329039/371472 [4:00:47<3:13:42, 3.65it/s] 89%|████████▊ | 329040/371472 [4:00:47<3:36:20, 3.27it/s] {'loss': 2.6061, 'learning_rate': 2.028593591760219e-07, 'epoch': 14.17} + 89%|████████▊ | 329040/371472 [4:00:47<3:36:20, 3.27it/s] 89%|████████▊ | 329041/371472 [4:00:48<3:55:01, 3.01it/s] 89%|████████▊ | 329042/371472 [4:00:48<3:39:23, 3.22it/s] 89%|████████▊ | 329043/371472 [4:00:48<3:36:48, 3.26it/s] 89%|████████▊ | 329044/371472 [4:00:48<3:27:39, 3.41it/s] 89%|████████▊ | 329045/371472 [4:00:49<3:30:31, 3.36it/s] 89%|████████▊ | 329046/371472 [4:00:49<3:21:17, 3.51it/s] 89%|████████▊ | 329047/371472 [4:00:49<3:18:11, 3.57it/s] 89%|████████▊ | 329048/371472 [4:00:49<3:13:55, 3.65it/s] 89%|████████▊ | 329049/371472 [4:00:50<3:31:15, 3.35it/s] 89%|████████▊ | 329050/371472 [4:00:50<3:24:21, 3.46it/s] 89%|████████▊ | 329051/371472 [4:00:50<3:16:32, 3.60it/s] 89%|████████▊ | 329052/371472 [4:00:51<3:12:05, 3.68it/s] 89%|████████▊ | 329053/371472 [4:00:51<3:08:50, 3.74it/s] 89%|████████▊ | 329054/371472 [4:00:51<3:08:17, 3.75it/s] 89%|████████▊ | 329055/371472 [4:00:51<3:09:58, 3.72it/s] 89%|████████▊ | 329056/371472 [4:00:52<3:13:14, 3.66it/s] 89%|████████▊ | 329057/371472 [4:00:52<3:23:54, 3.47it/s] 89%|████████▊ | 329058/371472 [4:00:52<3:28:56, 3.38it/s] 89%|████████▊ | 329059/371472 [4:00:53<3:39:16, 3.22it/s] 89%|████████▊ | 329060/371472 [4:00:53<3:26:38, 3.42it/s] {'loss': 2.5959, 'learning_rate': 2.0281087720054294e-07, 'epoch': 14.17} + 89%|████████▊ | 329060/371472 [4:00:53<3:26:38, 3.42it/s] 89%|████████▊ | 329061/371472 [4:00:53<3:22:19, 3.49it/s] 89%|████████▊ | 329062/371472 [4:00:54<3:39:42, 3.22it/s] 89%|████████▊ | 329063/371472 [4:00:54<3:27:31, 3.41it/s] 89%|████████▊ | 329064/371472 [4:00:54<3:38:15, 3.24it/s] 89%|████████▊ | 329065/371472 [4:00:54<3:28:54, 3.38it/s] 89%|████████▊ | 329066/371472 [4:00:55<3:24:32, 3.46it/s] 89%|████████▊ | 329067/371472 [4:00:55<3:35:01, 3.29it/s] 89%|████████▊ | 329068/371472 [4:00:55<3:36:20, 3.27it/s] 89%|████████▊ | 329069/371472 [4:00:56<3:30:03, 3.36it/s] 89%|████████▊ | 329070/371472 [4:00:56<3:26:46, 3.42it/s] 89%|████████▊ | 329071/371472 [4:00:56<3:28:37, 3.39it/s] 89%|████████▊ | 329072/371472 [4:00:56<3:21:48, 3.50it/s] 89%|████████▊ | 329073/371472 [4:00:57<3:16:12, 3.60it/s] 89%|████████▊ | 329074/371472 [4:00:57<3:26:58, 3.41it/s] 89%|████████▊ | 329075/371472 [4:00:57<3:43:50, 3.16it/s] 89%|███████���▊ | 329076/371472 [4:00:58<3:44:11, 3.15it/s] 89%|████████▊ | 329077/371472 [4:00:58<3:38:04, 3.24it/s] 89%|████████▊ | 329078/371472 [4:00:58<3:29:12, 3.38it/s] 89%|████████▊ | 329079/371472 [4:00:59<3:19:24, 3.54it/s] 89%|████████▊ | 329080/371472 [4:00:59<3:19:59, 3.53it/s] {'loss': 2.6348, 'learning_rate': 2.0276239522506412e-07, 'epoch': 14.17} + 89%|████████▊ | 329080/371472 [4:00:59<3:19:59, 3.53it/s] 89%|████████▊ | 329081/371472 [4:00:59<3:19:57, 3.53it/s] 89%|████████▊ | 329082/371472 [4:00:59<3:14:42, 3.63it/s] 89%|████████▊ | 329083/371472 [4:01:00<3:12:02, 3.68it/s] 89%|████████▊ | 329084/371472 [4:01:00<3:25:53, 3.43it/s] 89%|████████▊ | 329085/371472 [4:01:00<3:22:31, 3.49it/s] 89%|████████▊ | 329086/371472 [4:01:00<3:17:12, 3.58it/s] 89%|████████▊ | 329087/371472 [4:01:01<3:13:23, 3.65it/s] 89%|████████▊ | 329088/371472 [4:01:01<3:13:08, 3.66it/s] 89%|████████▊ | 329089/371472 [4:01:01<3:16:46, 3.59it/s] 89%|████████▊ | 329090/371472 [4:01:02<3:14:01, 3.64it/s] 89%|████████▊ | 329091/371472 [4:01:02<3:11:40, 3.69it/s] 89%|████████▊ | 329092/371472 [4:01:02<3:15:52, 3.61it/s] 89%|████████▊ | 329093/371472 [4:01:02<3:17:01, 3.59it/s] 89%|████████▊ | 329094/371472 [4:01:03<3:11:46, 3.68it/s] 89%|████████▊ | 329095/371472 [4:01:03<3:15:28, 3.61it/s] 89%|████████▊ | 329096/371472 [4:01:03<3:15:32, 3.61it/s] 89%|████████▊ | 329097/371472 [4:01:03<3:12:43, 3.66it/s] 89%|████████▊ | 329098/371472 [4:01:04<3:28:25, 3.39it/s] 89%|████████▊ | 329099/371472 [4:01:04<3:23:19, 3.47it/s] 89%|████████▊ | 329100/371472 [4:01:04<3:21:41, 3.50it/s] {'loss': 2.5327, 'learning_rate': 2.027139132495852e-07, 'epoch': 14.17} + 89%|████████▊ | 329100/371472 [4:01:04<3:21:41, 3.50it/s] 89%|████████▊ | 329101/371472 [4:01:05<3:14:33, 3.63it/s] 89%|████████▊ | 329102/371472 [4:01:05<3:11:28, 3.69it/s] 89%|████████▊ | 329103/371472 [4:01:05<3:08:51, 3.74it/s] 89%|████████▊ | 329104/371472 [4:01:05<3:13:25, 3.65it/s] 89%|████████▊ | 329105/371472 [4:01:06<3:16:17, 3.60it/s] 89%|████████▊ | 329106/371472 [4:01:06<3:32:38, 3.32it/s] 89%|████████▊ | 329107/371472 [4:01:06<3:38:06, 3.24it/s] 89%|████████▊ | 329108/371472 [4:01:07<3:29:26, 3.37it/s] 89%|████████▊ | 329109/371472 [4:01:07<3:20:11, 3.53it/s] 89%|████████▊ | 329110/371472 [4:01:07<3:14:46, 3.62it/s] 89%|████████▊ | 329111/371472 [4:01:07<3:13:31, 3.65it/s] 89%|████████▊ | 329112/371472 [4:01:08<3:11:01, 3.70it/s] 89%|████████▊ | 329113/371472 [4:01:08<3:13:33, 3.65it/s] 89%|████████▊ | 329114/371472 [4:01:08<3:11:11, 3.69it/s] 89%|████████▊ | 329115/371472 [4:01:09<3:21:24, 3.51it/s] 89%|████████▊ | 329116/371472 [4:01:09<3:17:44, 3.57it/s] 89%|████████▊ | 329117/371472 [4:01:09<3:17:32, 3.57it/s] 89%|████████▊ | 329118/371472 [4:01:09<3:15:22, 3.61it/s] 89%|████████▊ | 329119/371472 [4:01:10<3:19:05, 3.55it/s] 89%|████████▊ | 329120/371472 [4:01:10<3:25:02, 3.44it/s] {'loss': 2.738, 'learning_rate': 2.0266543127410632e-07, 'epoch': 14.18} + 89%|████████▊ | 329120/371472 [4:01:10<3:25:02, 3.44it/s] 89%|████████▊ | 329121/371472 [4:01:10<3:26:02, 3.43it/s] 89%|████████▊ | 329122/371472 [4:01:11<3:15:55, 3.60it/s] 89%|████████▊ | 329123/371472 [4:01:11<3:43:52, 3.15it/s] 89%|████████▊ | 329124/371472 [4:01:11<3:34:47, 3.29it/s] 89%|████████▊ | 329125/371472 [4:01:12<3:28:59, 3.38it/s] 89%|████████▊ | 329126/371472 [4:01:12<3:28:00, 3.39it/s] 89%|████████▊ | 329127/371472 [4:01:12<3:28:46, 3.38it/s] 89%|████████▊ | 329128/371472 [4:01:12<3:21:24, 3.50it/s] 89%|████████▊ | 329129/371472 [4:01:13<3:33:41, 3.30it/s] 89%|████████▊ | 329130/371472 [4:01:13<3:21:55, 3.49it/s] 89%|████████▊ | 329131/371472 [4:01:13<3:19:13, 3.54it/s] 89%|████████▊ | 329132/371472 [4:01:13<3:12:47, 3.66it/s] 89%|████████▊ | 329133/371472 [4:01:14<3:13:16, 3.65it/s] 89%|████████▊ | 329134/371472 [4:01:14<3:08:05, 3.75it/s] 89%|████████▊ | 329135/371472 [4:01:14<3:04:42, 3.82it/s] 89%|████████▊ | 329136/371472 [4:01:15<3:10:49, 3.70it/s] 89%|████████▊ | 329137/371472 [4:01:15<3:25:43, 3.43it/s] 89%|████████▊ | 329138/371472 [4:01:15<3:20:42, 3.52it/s] 89%|████████▊ | 329139/371472 [4:01:15<3:25:16, 3.44it/s] 89%|████████▊ | 329140/371472 [4:01:16<3:22:07, 3.49it/s] {'loss': 2.6292, 'learning_rate': 2.026169492986274e-07, 'epoch': 14.18} + 89%|████████▊ | 329140/371472 [4:01:16<3:22:07, 3.49it/s] 89%|████████▊ | 329141/371472 [4:01:16<3:23:04, 3.47it/s] 89%|████████▊ | 329142/371472 [4:01:16<3:21:14, 3.51it/s] 89%|████████▊ | 329143/371472 [4:01:17<3:18:47, 3.55it/s] 89%|████████▊ | 329144/371472 [4:01:17<3:14:33, 3.63it/s] 89%|████████▊ | 329145/371472 [4:01:17<3:19:51, 3.53it/s] 89%|████████▊ | 329146/371472 [4:01:17<3:10:44, 3.70it/s] 89%|████████▊ | 329147/371472 [4:01:18<3:09:36, 3.72it/s] 89%|████████▊ | 329148/371472 [4:01:18<3:18:42, 3.55it/s] 89%|████████▊ | 329149/371472 [4:01:18<3:17:02, 3.58it/s] 89%|████████▊ | 329150/371472 [4:01:19<3:22:20, 3.49it/s] 89%|████████▊ | 329151/371472 [4:01:19<3:21:44, 3.50it/s] 89%|████████▊ | 329152/371472 [4:01:19<3:16:28, 3.59it/s] 89%|████████▊ | 329153/371472 [4:01:19<3:24:42, 3.45it/s] 89%|████████▊ | 329154/371472 [4:01:20<3:26:43, 3.41it/s] 89%|████████▊ | 329155/371472 [4:01:20<3:40:38, 3.20it/s] 89%|████████▊ | 329156/371472 [4:01:20<3:27:45, 3.39it/s] 89%|████████▊ | 329157/371472 [4:01:21<3:28:20, 3.39it/s] 89%|████████▊ | 329158/371472 [4:01:21<3:31:34, 3.33it/s] 89%|████████▊ | 329159/371472 [4:01:21<3:25:52, 3.43it/s] 89%|████████▊ | 329160/371472 [4:01:21<3:14:32, 3.63it/s] {'loss': 2.7782, 'learning_rate': 2.0256846732314857e-07, 'epoch': 14.18} + 89%|████████▊ | 329160/371472 [4:01:21<3:14:32, 3.63it/s] 89%|████████▊ | 329161/371472 [4:01:22<3:21:21, 3.50it/s] 89%|████████▊ | 329162/371472 [4:01:22<3:19:17, 3.54it/s] 89%|████████▊ | 329163/371472 [4:01:22<3:30:42, 3.35it/s] 89%|████████▊ | 329164/371472 [4:01:23<3:31:51, 3.33it/s] 89%|████████▊ | 329165/371472 [4:01:23<3:37:21, 3.24it/s] 89%|████████▊ | 329166/371472 [4:01:23<3:33:57, 3.30it/s] 89%|████████▊ | 329167/371472 [4:01:24<3:23:58, 3.46it/s] 89%|████████▊ | 329168/371472 [4:01:24<3:25:40, 3.43it/s] 89%|████████▊ | 329169/371472 [4:01:24<3:24:04, 3.45it/s] 89%|████████▊ | 329170/371472 [4:01:24<3:15:57, 3.60it/s] 89%|████████▊ | 329171/371472 [4:01:25<3:23:45, 3.46it/s] 89%|████████▊ | 329172/371472 [4:01:25<3:18:19, 3.55it/s] 89%|████████▊ | 329173/371472 [4:01:25<3:17:23, 3.57it/s] 89%|████████▊ | 329174/371472 [4:01:26<3:20:15, 3.52it/s] 89%|████████▊ | 329175/371472 [4:01:26<3:23:44, 3.46it/s] 89%|████████▊ | 329176/371472 [4:01:26<3:39:41, 3.21it/s] 89%|████████▊ | 329177/371472 [4:01:26<3:29:04, 3.37it/s] 89%|████████▊ | 329178/371472 [4:01:27<3:26:57, 3.41it/s] 89%|████████▊ | 329179/371472 [4:01:27<3:31:42, 3.33it/s] 89%|████████▊ | 329180/371472 [4:01:27<3:25:28, 3.43it/s] {'loss': 2.6756, 'learning_rate': 2.025199853476696e-07, 'epoch': 14.18} + 89%|████████▊ | 329180/371472 [4:01:27<3:25:28, 3.43it/s] 89%|████████▊ | 329181/371472 [4:01:28<3:15:43, 3.60it/s] 89%|████████▊ | 329182/371472 [4:01:28<3:12:04, 3.67it/s] 89%|████████▊ | 329183/371472 [4:01:28<3:08:22, 3.74it/s] 89%|████████▊ | 329184/371472 [4:01:28<3:01:34, 3.88it/s] 89%|████████▊ | 329185/371472 [4:01:29<3:04:58, 3.81it/s] 89%|████████▊ | 329186/371472 [4:01:29<3:09:28, 3.72it/s] 89%|████████▊ | 329187/371472 [4:01:29<3:13:45, 3.64it/s] 89%|████████▊ | 329188/371472 [4:01:29<3:17:28, 3.57it/s] 89%|████████▊ | 329189/371472 [4:01:30<3:16:46, 3.58it/s] 89%|████████▊ | 329190/371472 [4:01:30<3:19:02, 3.54it/s] 89%|████████▊ | 329191/371472 [4:01:30<3:18:39, 3.55it/s] 89%|████████▊ | 329192/371472 [4:01:31<3:28:36, 3.38it/s] 89%|████████▊ | 329193/371472 [4:01:31<4:47:25, 2.45it/s] 89%|████████▊ | 329194/371472 [4:01:32<4:15:24, 2.76it/s] 89%|████████▊ | 329195/371472 [4:01:32<3:57:24, 2.97it/s] 89%|████████▊ | 329196/371472 [4:01:32<3:47:20, 3.10it/s] 89%|████████▊ | 329197/371472 [4:01:32<3:36:00, 3.26it/s] 89%|████████▊ | 329198/371472 [4:01:33<3:29:31, 3.36it/s] 89%|████████▊ | 329199/371472 [4:01:33<3:23:37, 3.46it/s] 89%|████████▊ | 329200/371472 [4:01:33<3:38:29, 3.22it/s] {'loss': 2.5928, 'learning_rate': 2.0247150337219076e-07, 'epoch': 14.18} + 89%|████████▊ | 329200/371472 [4:01:33<3:38:29, 3.22it/s] 89%|████████▊ | 329201/371472 [4:01:34<3:41:18, 3.18it/s] 89%|████████▊ | 329202/371472 [4:01:34<3:28:33, 3.38it/s] 89%|████████▊ | 329203/371472 [4:01:34<3:44:13, 3.14it/s] 89%|████████▊ | 329204/371472 [4:01:34<3:29:53, 3.36it/s] 89%|████████▊ | 329205/371472 [4:01:35<3:30:38, 3.34it/s] 89%|████████▊ | 329206/371472 [4:01:35<3:34:27, 3.28it/s] 89%|████████▊ | 329207/371472 [4:01:35<3:22:25, 3.48it/s] 89%|████████▊ | 329208/371472 [4:01:36<3:19:56, 3.52it/s] 89%|████████▊ | 329209/371472 [4:01:36<3:21:12, 3.50it/s] 89%|████████▊ | 329210/371472 [4:01:36<3:19:38, 3.53it/s] 89%|████████▊ | 329211/371472 [4:01:36<3:17:25, 3.57it/s] 89%|████████▊ | 329212/371472 [4:01:37<3:12:58, 3.65it/s] 89%|████████▊ | 329213/371472 [4:01:37<3:20:24, 3.51it/s] 89%|████████▊ | 329214/371472 [4:01:37<3:18:47, 3.54it/s] 89%|████████▊ | 329215/371472 [4:01:38<3:21:06, 3.50it/s] 89%|████████▊ | 329216/371472 [4:01:38<3:17:13, 3.57it/s] 89%|████████▊ | 329217/371472 [4:01:38<3:20:01, 3.52it/s] 89%|████████▊ | 329218/371472 [4:01:39<3:30:22, 3.35it/s] 89%|████████▊ | 329219/371472 [4:01:39<3:32:45, 3.31it/s] 89%|████████▊ | 329220/371472 [4:01:39<3:29:33, 3.36it/s] {'loss': 2.6703, 'learning_rate': 2.0242302139671183e-07, 'epoch': 14.18} + 89%|████████▊ | 329220/371472 [4:01:39<3:29:33, 3.36it/s] 89%|████████▊ | 329221/371472 [4:01:39<3:28:39, 3.37it/s] 89%|████████▊ | 329222/371472 [4:01:40<3:18:18, 3.55it/s] 89%|████████▊ | 329223/371472 [4:01:40<3:14:12, 3.63it/s] 89%|████████▊ | 329224/371472 [4:01:40<3:11:52, 3.67it/s] 89%|████████▊ | 329225/371472 [4:01:40<3:18:48, 3.54it/s] 89%|████████▊ | 329226/371472 [4:01:41<3:15:15, 3.61it/s] 89%|████████▊ | 329227/371472 [4:01:41<3:19:35, 3.53it/s] 89%|████████▊ | 329228/371472 [4:01:41<3:19:54, 3.52it/s] 89%|████████▊ | 329229/371472 [4:01:42<3:13:20, 3.64it/s] 89%|████████▊ | 329230/371472 [4:01:42<3:10:46, 3.69it/s] 89%|████████▊ | 329231/371472 [4:01:42<3:11:18, 3.68it/s] 89%|████████▊ | 329232/371472 [4:01:42<3:17:24, 3.57it/s] 89%|████████▊ | 329233/371472 [4:01:43<3:12:38, 3.65it/s] 89%|████████▊ | 329234/371472 [4:01:43<3:15:04, 3.61it/s] 89%|████████▊ | 329235/371472 [4:01:43<3:12:08, 3.66it/s] 89%|████████▊ | 329236/371472 [4:01:44<3:14:37, 3.62it/s] 89%|████████▊ | 329237/371472 [4:01:44<3:14:31, 3.62it/s] 89%|████████▊ | 329238/371472 [4:01:44<3:13:09, 3.64it/s] 89%|████████▊ | 329239/371472 [4:01:44<3:14:53, 3.61it/s] 89%|████████▊ | 329240/371472 [4:01:45<3:18:03, 3.55it/s] {'loss': 2.5994, 'learning_rate': 2.0237453942123298e-07, 'epoch': 14.18} + 89%|████████▊ | 329240/371472 [4:01:45<3:18:03, 3.55it/s] 89%|████████▊ | 329241/371472 [4:01:45<3:28:38, 3.37it/s] 89%|████████▊ | 329242/371472 [4:01:45<3:20:17, 3.51it/s] 89%|████████▊ | 329243/371472 [4:01:45<3:15:04, 3.61it/s] 89%|████████▊ | 329244/371472 [4:01:46<3:12:31, 3.66it/s] 89%|████████▊ | 329245/371472 [4:01:46<3:11:17, 3.68it/s] 89%|████████▊ | 329246/371472 [4:01:46<3:35:24, 3.27it/s] 89%|████████▊ | 329247/371472 [4:01:47<3:33:41, 3.29it/s] 89%|████████▊ | 329248/371472 [4:01:47<3:39:56, 3.20it/s] 89%|████████▊ | 329249/371472 [4:01:47<3:33:49, 3.29it/s] 89%|████████▊ | 329250/371472 [4:01:48<3:33:49, 3.29it/s] 89%|████████▊ | 329251/371472 [4:01:48<3:33:26, 3.30it/s] 89%|████████▊ | 329252/371472 [4:01:48<3:29:43, 3.36it/s] 89%|████████▊ | 329253/371472 [4:01:48<3:18:46, 3.54it/s] 89%|████████▊ | 329254/371472 [4:01:49<3:14:01, 3.63it/s] 89%|████████▊ | 329255/371472 [4:01:49<3:11:49, 3.67it/s] 89%|████████▊ | 329256/371472 [4:01:49<3:14:48, 3.61it/s] 89%|████████▊ | 329257/371472 [4:01:50<3:23:09, 3.46it/s] 89%|████████▊ | 329258/371472 [4:01:50<3:13:44, 3.63it/s] 89%|████████▊ | 329259/371472 [4:01:50<3:21:36, 3.49it/s] 89%|████████▊ | 329260/371472 [4:01:50<3:19:56, 3.52it/s] {'loss': 2.4292, 'learning_rate': 2.0232605744575403e-07, 'epoch': 14.18} + 89%|████████▊ | 329260/371472 [4:01:50<3:19:56, 3.52it/s] 89%|████████▊ | 329261/371472 [4:01:51<3:17:46, 3.56it/s] 89%|████████▊ | 329262/371472 [4:01:51<3:42:25, 3.16it/s] 89%|████████▊ | 329263/371472 [4:01:51<3:30:41, 3.34it/s] 89%|████████▊ | 329264/371472 [4:01:52<3:29:52, 3.35it/s] 89%|████████▊ | 329265/371472 [4:01:52<3:39:19, 3.21it/s] 89%|████████▊ | 329266/371472 [4:01:52<3:28:26, 3.37it/s] 89%|████████▊ | 329267/371472 [4:01:53<3:26:56, 3.40it/s] 89%|████████▊ | 329268/371472 [4:01:53<3:22:14, 3.48it/s] 89%|████████▊ | 329269/371472 [4:01:53<3:33:05, 3.30it/s] 89%|████████▊ | 329270/371472 [4:01:53<3:22:19, 3.48it/s] 89%|████████▊ | 329271/371472 [4:01:54<3:33:48, 3.29it/s] 89%|████████▊ | 329272/371472 [4:01:54<3:37:54, 3.23it/s] 89%|████████▊ | 329273/371472 [4:01:54<3:31:03, 3.33it/s] 89%|████████▊ | 329274/371472 [4:01:55<3:32:07, 3.32it/s] 89%|████████▊ | 329275/371472 [4:01:55<3:25:26, 3.42it/s] 89%|████████▊ | 329276/371472 [4:01:55<3:18:06, 3.55it/s] 89%|████████▊ | 329277/371472 [4:01:55<3:11:10, 3.68it/s] 89%|████████▊ | 329278/371472 [4:01:56<3:10:03, 3.70it/s] 89%|████████▊ | 329279/371472 [4:01:56<3:14:38, 3.61it/s] 89%|████████▊ | 329280/371472 [4:01:56<3:23:42, 3.45it/s] {'loss': 2.512, 'learning_rate': 2.022775754702752e-07, 'epoch': 14.18} + 89%|████████▊ | 329280/371472 [4:01:56<3:23:42, 3.45it/s] 89%|████████▊ | 329281/371472 [4:01:57<3:23:23, 3.46it/s] 89%|████████▊ | 329282/371472 [4:01:57<3:23:16, 3.46it/s] 89%|████████▊ | 329283/371472 [4:01:57<3:52:25, 3.03it/s] 89%|████████▊ | 329284/371472 [4:01:58<3:35:59, 3.26it/s] 89%|████████▊ | 329285/371472 [4:01:58<3:26:15, 3.41it/s] 89%|████████▊ | 329286/371472 [4:01:58<3:32:25, 3.31it/s] 89%|████████▊ | 329287/371472 [4:01:58<3:24:47, 3.43it/s] 89%|████████▊ | 329288/371472 [4:01:59<3:18:09, 3.55it/s] 89%|████████▊ | 329289/371472 [4:01:59<3:21:54, 3.48it/s] 89%|████████▊ | 329290/371472 [4:01:59<3:28:29, 3.37it/s] 89%|████████▊ | 329291/371472 [4:02:00<3:26:37, 3.40it/s] 89%|████████▊ | 329292/371472 [4:02:00<3:20:59, 3.50it/s] 89%|████████▊ | 329293/371472 [4:02:00<3:28:36, 3.37it/s] 89%|████████▊ | 329294/371472 [4:02:00<3:21:22, 3.49it/s] 89%|████████▊ | 329295/371472 [4:02:01<3:18:56, 3.53it/s] 89%|████████▊ | 329296/371472 [4:02:01<3:33:53, 3.29it/s] 89%|████████▊ | 329297/371472 [4:02:01<3:25:59, 3.41it/s] 89%|████████▊ | 329298/371472 [4:02:02<3:27:34, 3.39it/s] 89%|████████▊ | 329299/371472 [4:02:02<3:26:23, 3.41it/s] 89%|████████▊ | 329300/371472 [4:02:02<3:40:27, 3.19it/s] {'loss': 2.5576, 'learning_rate': 2.0222909349479625e-07, 'epoch': 14.18} + 89%|████████▊ | 329300/371472 [4:02:02<3:40:27, 3.19it/s] 89%|████████▊ | 329301/371472 [4:02:03<3:30:01, 3.35it/s] 89%|████████▊ | 329302/371472 [4:02:03<3:23:58, 3.45it/s] 89%|████████▊ | 329303/371472 [4:02:03<3:14:42, 3.61it/s] 89%|████████▊ | 329304/371472 [4:02:03<3:20:34, 3.50it/s] 89%|████████▊ | 329305/371472 [4:02:04<3:23:25, 3.45it/s] 89%|████████▊ | 329306/371472 [4:02:04<3:27:39, 3.38it/s] 89%|████████▊ | 329307/371472 [4:02:04<3:18:35, 3.54it/s] 89%|████████▊ | 329308/371472 [4:02:05<3:31:23, 3.32it/s] 89%|████████▊ | 329309/371472 [4:02:05<3:21:52, 3.48it/s] 89%|████████▊ | 329310/371472 [4:02:05<3:18:06, 3.55it/s] 89%|████████▊ | 329311/371472 [4:02:05<3:23:12, 3.46it/s] 89%|████████▊ | 329312/371472 [4:02:06<3:14:43, 3.61it/s] 89%|████████▊ | 329313/371472 [4:02:06<3:14:26, 3.61it/s] 89%|████████▊ | 329314/371472 [4:02:06<3:39:08, 3.21it/s] 89%|████████▊ | 329315/371472 [4:02:07<3:30:39, 3.34it/s] 89%|████████▊ | 329316/371472 [4:02:07<3:21:56, 3.48it/s] 89%|████████▊ | 329317/371472 [4:02:07<3:23:57, 3.44it/s] 89%|████████▊ | 329318/371472 [4:02:07<3:20:55, 3.50it/s] 89%|████████▊ | 329319/371472 [4:02:08<3:22:25, 3.47it/s] 89%|████████▊ | 329320/371472 [4:02:08<3:17:41, 3.55it/s] {'loss': 2.6813, 'learning_rate': 2.021806115193174e-07, 'epoch': 14.18} + 89%|████████▊ | 329320/371472 [4:02:08<3:17:41, 3.55it/s] 89%|████████▊ | 329321/371472 [4:02:08<3:15:42, 3.59it/s] 89%|████████▊ | 329322/371472 [4:02:09<3:14:04, 3.62it/s] 89%|████████▊ | 329323/371472 [4:02:09<3:12:09, 3.66it/s] 89%|████████▊ | 329324/371472 [4:02:09<3:20:32, 3.50it/s] 89%|████████▊ | 329325/371472 [4:02:09<3:16:09, 3.58it/s] 89%|████████▊ | 329326/371472 [4:02:10<3:15:46, 3.59it/s] 89%|████████▊ | 329327/371472 [4:02:10<3:08:45, 3.72it/s] 89%|████████▊ | 329328/371472 [4:02:10<3:10:13, 3.69it/s] 89%|████████▊ | 329329/371472 [4:02:10<3:10:19, 3.69it/s] 89%|████████▊ | 329330/371472 [4:02:11<3:28:46, 3.36it/s] 89%|████████▊ | 329331/371472 [4:02:11<3:34:13, 3.28it/s] 89%|████████▊ | 329332/371472 [4:02:11<3:23:10, 3.46it/s] 89%|████████▊ | 329333/371472 [4:02:12<3:12:30, 3.65it/s] 89%|████████▊ | 329334/371472 [4:02:12<3:16:57, 3.57it/s] 89%|████████▊ | 329335/371472 [4:02:12<3:12:54, 3.64it/s] 89%|████████▊ | 329336/371472 [4:02:12<3:10:53, 3.68it/s] 89%|████████▊ | 329337/371472 [4:02:13<3:05:52, 3.78it/s] 89%|████████▊ | 329338/371472 [4:02:13<3:17:24, 3.56it/s] 89%|████████▊ | 329339/371472 [4:02:13<3:15:20, 3.59it/s] 89%|████████▊ | 329340/371472 [4:02:14<3:18:46, 3.53it/s] {'loss': 2.5658, 'learning_rate': 2.0213212954383847e-07, 'epoch': 14.19} + 89%|████████▊ | 329340/371472 [4:02:14<3:18:46, 3.53it/s] 89%|████████▊ | 329341/371472 [4:02:14<3:37:30, 3.23it/s] 89%|████████▊ | 329342/371472 [4:02:14<3:43:29, 3.14it/s] 89%|████████▊ | 329343/371472 [4:02:15<3:28:58, 3.36it/s] 89%|████████▊ | 329344/371472 [4:02:15<3:18:17, 3.54it/s] 89%|████████▊ | 329345/371472 [4:02:15<3:59:35, 2.93it/s] 89%|████████▊ | 329346/371472 [4:02:16<3:54:30, 2.99it/s] 89%|████████▊ | 329347/371472 [4:02:16<3:49:00, 3.07it/s] 89%|████████▊ | 329348/371472 [4:02:16<3:41:12, 3.17it/s] 89%|████████▊ | 329349/371472 [4:02:16<3:37:43, 3.22it/s] 89%|████████▊ | 329350/371472 [4:02:17<3:28:32, 3.37it/s] 89%|████████▊ | 329351/371472 [4:02:17<3:23:17, 3.45it/s] 89%|████████▊ | 329352/371472 [4:02:17<3:17:22, 3.56it/s] 89%|████████▊ | 329353/371472 [4:02:18<3:16:37, 3.57it/s] 89%|████████▊ | 329354/371472 [4:02:18<3:22:34, 3.47it/s] 89%|████████▊ | 329355/371472 [4:02:18<3:19:02, 3.53it/s] 89%|████████▊ | 329356/371472 [4:02:18<3:21:57, 3.48it/s] 89%|████████▊ | 329357/371472 [4:02:19<3:24:04, 3.44it/s] 89%|████████▊ | 329358/371472 [4:02:19<3:23:25, 3.45it/s] 89%|████████▊ | 329359/371472 [4:02:19<3:29:55, 3.34it/s] 89%|████████▊ | 329360/371472 [4:02:20<3:18:56, 3.53it/s] {'loss': 2.6545, 'learning_rate': 2.0208364756835955e-07, 'epoch': 14.19} + 89%|████████▊ | 329360/371472 [4:02:20<3:18:56, 3.53it/s] 89%|████████▊ | 329361/371472 [4:02:20<3:21:06, 3.49it/s] 89%|████████▊ | 329362/371472 [4:02:20<3:18:46, 3.53it/s] 89%|████████▊ | 329363/371472 [4:02:20<3:12:05, 3.65it/s] 89%|████████▊ | 329364/371472 [4:02:21<3:07:52, 3.74it/s] 89%|████████▊ | 329365/371472 [4:02:21<3:01:30, 3.87it/s] 89%|████████▊ | 329366/371472 [4:02:21<3:05:13, 3.79it/s] 89%|████████▊ | 329367/371472 [4:02:21<3:13:27, 3.63it/s] 89%|████████▊ | 329368/371472 [4:02:22<3:21:36, 3.48it/s] 89%|████████▊ | 329369/371472 [4:02:22<3:19:18, 3.52it/s] 89%|████████▊ | 329370/371472 [4:02:22<3:20:51, 3.49it/s] 89%|████████▊ | 329371/371472 [4:02:23<3:13:55, 3.62it/s] 89%|████████▊ | 329372/371472 [4:02:23<3:21:36, 3.48it/s] 89%|████████▊ | 329373/371472 [4:02:23<3:25:33, 3.41it/s] 89%|████████▊ | 329374/371472 [4:02:24<3:32:28, 3.30it/s] 89%|████████▊ | 329375/371472 [4:02:24<3:23:12, 3.45it/s] 89%|████████▊ | 329376/371472 [4:02:24<3:17:05, 3.56it/s] 89%|████████▊ | 329377/371472 [4:02:24<3:19:54, 3.51it/s] 89%|████████▊ | 329378/371472 [4:02:25<3:22:51, 3.46it/s] 89%|████████▊ | 329379/371472 [4:02:25<3:29:12, 3.35it/s] 89%|████████▊ | 329380/371472 [4:02:25<3:32:01, 3.31it/s] {'loss': 2.5554, 'learning_rate': 2.0203516559288067e-07, 'epoch': 14.19} + 89%|████████▊ | 329380/371472 [4:02:25<3:32:01, 3.31it/s] 89%|████████▊ | 329381/371472 [4:02:26<3:24:05, 3.44it/s] 89%|████████▊ | 329382/371472 [4:02:26<3:17:52, 3.55it/s] 89%|████████▊ | 329383/371472 [4:02:26<3:22:42, 3.46it/s] 89%|████████▊ | 329384/371472 [4:02:26<3:23:11, 3.45it/s] 89%|████████▊ | 329385/371472 [4:02:27<3:17:16, 3.56it/s] 89%|████████▊ | 329386/371472 [4:02:27<3:26:57, 3.39it/s] 89%|████████▊ | 329387/371472 [4:02:27<3:18:36, 3.53it/s] 89%|████████▊ | 329388/371472 [4:02:27<3:08:25, 3.72it/s] 89%|████████▊ | 329389/371472 [4:02:28<3:15:12, 3.59it/s] 89%|████████▊ | 329390/371472 [4:02:28<3:17:52, 3.54it/s] 89%|████████▊ | 329391/371472 [4:02:28<3:29:36, 3.35it/s] 89%|████████▊ | 329392/371472 [4:02:29<3:28:27, 3.36it/s] 89%|████████▊ | 329393/371472 [4:02:29<3:21:56, 3.47it/s] 89%|████████▊ | 329394/371472 [4:02:29<3:19:01, 3.52it/s] 89%|████████▊ | 329395/371472 [4:02:30<3:16:12, 3.57it/s] 89%|████████▊ | 329396/371472 [4:02:30<3:14:20, 3.61it/s] 89%|████████▊ | 329397/371472 [4:02:30<3:12:51, 3.64it/s] 89%|████████▊ | 329398/371472 [4:02:30<3:39:57, 3.19it/s] 89%|████████▊ | 329399/371472 [4:02:31<3:27:56, 3.37it/s] 89%|████████▊ | 329400/371472 [4:02:31<3:27:34, 3.38it/s] {'loss': 2.6162, 'learning_rate': 2.0198668361740174e-07, 'epoch': 14.19} + 89%|████████▊ | 329400/371472 [4:02:31<3:27:34, 3.38it/s] 89%|████████▊ | 329401/371472 [4:02:31<3:20:06, 3.50it/s] 89%|████████▊ | 329402/371472 [4:02:32<3:25:48, 3.41it/s] 89%|████████▊ | 329403/371472 [4:02:32<3:22:53, 3.46it/s] 89%|████████▊ | 329404/371472 [4:02:32<3:32:20, 3.30it/s] 89%|████████▊ | 329405/371472 [4:02:32<3:29:25, 3.35it/s] 89%|████████▊ | 329406/371472 [4:02:33<3:25:16, 3.42it/s] 89%|████████▊ | 329407/371472 [4:02:33<3:29:05, 3.35it/s] 89%|████████▊ | 329408/371472 [4:02:33<3:45:52, 3.10it/s] 89%|████████▊ | 329409/371472 [4:02:34<3:31:50, 3.31it/s] 89%|████████▊ | 329410/371472 [4:02:34<3:24:37, 3.43it/s] 89%|████████▊ | 329411/371472 [4:02:34<3:28:55, 3.36it/s] 89%|████████▊ | 329412/371472 [4:02:35<3:22:57, 3.45it/s] 89%|████████▊ | 329413/371472 [4:02:35<3:18:43, 3.53it/s] 89%|████████▊ | 329414/371472 [4:02:35<3:13:06, 3.63it/s] 89%|████████▊ | 329415/371472 [4:02:35<3:13:17, 3.63it/s] 89%|████████▊ | 329416/371472 [4:02:36<3:12:02, 3.65it/s] 89%|████████▊ | 329417/371472 [4:02:36<3:16:35, 3.57it/s] 89%|████████▊ | 329418/371472 [4:02:36<3:36:16, 3.24it/s] 89%|████████▊ | 329419/371472 [4:02:37<3:28:57, 3.35it/s] 89%|████████▊ | 329420/371472 [4:02:37<3:18:49, 3.53it/s] {'loss': 2.5343, 'learning_rate': 2.0193820164192292e-07, 'epoch': 14.19} + 89%|████████▊ | 329420/371472 [4:02:37<3:18:49, 3.53it/s] 89%|████████▊ | 329421/371472 [4:02:37<3:11:45, 3.65it/s] 89%|████████▊ | 329422/371472 [4:02:37<3:10:14, 3.68it/s] 89%|████████▊ | 329423/371472 [4:02:38<3:10:00, 3.69it/s] 89%|████████▊ | 329424/371472 [4:02:38<3:05:31, 3.78it/s] 89%|████████▊ | 329425/371472 [4:02:38<3:13:05, 3.63it/s] 89%|████████▊ | 329426/371472 [4:02:38<3:11:38, 3.66it/s] 89%|████████▊ | 329427/371472 [4:02:39<3:27:08, 3.38it/s] 89%|████████▊ | 329428/371472 [4:02:39<3:25:54, 3.40it/s] 89%|████████▊ | 329429/371472 [4:02:39<3:18:01, 3.54it/s] 89%|████████▊ | 329430/371472 [4:02:40<3:14:42, 3.60it/s] 89%|████████▊ | 329431/371472 [4:02:40<3:11:54, 3.65it/s] 89%|████████▊ | 329432/371472 [4:02:40<3:14:40, 3.60it/s] 89%|████████▊ | 329433/371472 [4:02:40<3:12:38, 3.64it/s] 89%|████████▊ | 329434/371472 [4:02:41<3:11:04, 3.67it/s] 89%|████████▊ | 329435/371472 [4:02:41<3:20:31, 3.49it/s] 89%|████████▊ | 329436/371472 [4:02:41<3:19:29, 3.51it/s] 89%|████████▊ | 329437/371472 [4:02:42<3:23:42, 3.44it/s] 89%|████████▊ | 329438/371472 [4:02:42<3:28:39, 3.36it/s] 89%|████████▊ | 329439/371472 [4:02:42<3:27:11, 3.38it/s] 89%|████████▊ | 329440/371472 [4:02:42<3:25:31, 3.41it/s] {'loss': 2.5276, 'learning_rate': 2.0188971966644396e-07, 'epoch': 14.19} + 89%|████████▊ | 329440/371472 [4:02:42<3:25:31, 3.41it/s] 89%|████████▊ | 329441/371472 [4:02:43<3:25:31, 3.41it/s] 89%|████████▊ | 329442/371472 [4:02:43<3:20:34, 3.49it/s] 89%|████████▊ | 329443/371472 [4:02:43<3:22:09, 3.47it/s] 89%|████████▊ | 329444/371472 [4:02:44<3:57:59, 2.94it/s] 89%|████████▊ | 329445/371472 [4:02:44<3:41:37, 3.16it/s] 89%|████████▊ | 329446/371472 [4:02:44<3:28:54, 3.35it/s] 89%|████████▊ | 329447/371472 [4:02:45<3:41:37, 3.16it/s] 89%|████████▊ | 329448/371472 [4:02:45<3:41:11, 3.17it/s] 89%|████████▊ | 329449/371472 [4:02:45<3:34:09, 3.27it/s] 89%|████████▊ | 329450/371472 [4:02:46<3:23:48, 3.44it/s] 89%|████████▊ | 329451/371472 [4:02:46<3:43:07, 3.14it/s] 89%|████████▊ | 329452/371472 [4:02:46<3:36:10, 3.24it/s] 89%|████████▊ | 329453/371472 [4:02:46<3:34:01, 3.27it/s] 89%|████████▊ | 329454/371472 [4:02:47<3:23:16, 3.45it/s] 89%|████████▊ | 329455/371472 [4:02:47<3:16:33, 3.56it/s] 89%|████████▊ | 329456/371472 [4:02:47<3:16:03, 3.57it/s] 89%|████████▊ | 329457/371472 [4:02:48<3:17:29, 3.55it/s] 89%|████████▊ | 329458/371472 [4:02:48<3:15:04, 3.59it/s] 89%|████████▊ | 329459/371472 [4:02:48<3:24:28, 3.42it/s] 89%|████████▊ | 329460/371472 [4:02:48<3:19:22, 3.51it/s] {'loss': 2.6356, 'learning_rate': 2.0184123769096512e-07, 'epoch': 14.19} + 89%|████████▊ | 329460/371472 [4:02:48<3:19:22, 3.51it/s] 89%|████████▊ | 329461/371472 [4:02:49<3:29:43, 3.34it/s] 89%|████████▊ | 329462/371472 [4:02:49<3:17:52, 3.54it/s] 89%|████████▊ | 329463/371472 [4:02:49<3:26:17, 3.39it/s] 89%|████████▊ | 329464/371472 [4:02:50<3:18:22, 3.53it/s] 89%|████████▊ | 329465/371472 [4:02:50<3:10:26, 3.68it/s] 89%|████████▊ | 329466/371472 [4:02:50<3:19:04, 3.52it/s] 89%|████████▊ | 329467/371472 [4:02:51<3:34:23, 3.27it/s] 89%|████████▊ | 329468/371472 [4:02:51<3:30:27, 3.33it/s] 89%|████████▊ | 329469/371472 [4:02:51<3:19:56, 3.50it/s] 89%|████████▊ | 329470/371472 [4:02:51<3:18:25, 3.53it/s] 89%|████████▊ | 329471/371472 [4:02:52<3:10:57, 3.67it/s] 89%|████████▊ | 329472/371472 [4:02:52<3:16:30, 3.56it/s] 89%|████████▊ | 329473/371472 [4:02:52<3:14:11, 3.60it/s] 89%|████████▊ | 329474/371472 [4:02:52<3:08:26, 3.71it/s] 89%|████████▊ | 329475/371472 [4:02:53<3:19:20, 3.51it/s] 89%|████████▊ | 329476/371472 [4:02:53<3:19:12, 3.51it/s] 89%|████████▊ | 329477/371472 [4:02:53<3:23:35, 3.44it/s] 89%|████████▊ | 329478/371472 [4:02:54<3:15:30, 3.58it/s] 89%|████████▊ | 329479/371472 [4:02:54<3:08:38, 3.71it/s] 89%|████████▊ | 329480/371472 [4:02:54<3:20:50, 3.48it/s] {'loss': 2.6236, 'learning_rate': 2.0179275571548619e-07, 'epoch': 14.19} + 89%|████████▊ | 329480/371472 [4:02:54<3:20:50, 3.48it/s] 89%|████████▊ | 329481/371472 [4:02:54<3:18:15, 3.53it/s] 89%|████████▊ | 329482/371472 [4:02:55<3:11:37, 3.65it/s] 89%|████████▊ | 329483/371472 [4:02:55<3:16:24, 3.56it/s] 89%|████████▊ | 329484/371472 [4:02:55<3:27:58, 3.36it/s] 89%|████████▊ | 329485/371472 [4:02:56<3:19:55, 3.50it/s] 89%|████████▊ | 329486/371472 [4:02:56<3:29:17, 3.34it/s] 89%|████████▊ | 329487/371472 [4:02:56<3:27:53, 3.37it/s] 89%|████████▊ | 329488/371472 [4:02:56<3:21:39, 3.47it/s] 89%|████████▊ | 329489/371472 [4:02:57<3:31:42, 3.31it/s] 89%|████████▊ | 329490/371472 [4:02:57<3:25:59, 3.40it/s] 89%|████████▊ | 329491/371472 [4:02:57<3:22:34, 3.45it/s] 89%|████████▊ | 329492/371472 [4:02:58<3:18:41, 3.52it/s] 89%|████████▊ | 329493/371472 [4:02:58<3:31:48, 3.30it/s] 89%|████████▊ | 329494/371472 [4:02:58<3:31:48, 3.30it/s] 89%|████████▊ | 329495/371472 [4:02:59<3:26:14, 3.39it/s] 89%|████████▊ | 329496/371472 [4:02:59<3:21:08, 3.48it/s] 89%|████████▊ | 329497/371472 [4:02:59<3:16:56, 3.55it/s] 89%|████████▊ | 329498/371472 [4:02:59<3:22:42, 3.45it/s] 89%|████████▊ | 329499/371472 [4:03:00<3:25:58, 3.40it/s] 89%|████████▊ | 329500/371472 [4:03:00<3:25:04, 3.41it/s] {'loss': 2.526, 'learning_rate': 2.0174427374000734e-07, 'epoch': 14.19} + 89%|████████▊ | 329500/371472 [4:03:00<3:25:04, 3.41it/s] 89%|████████▊ | 329501/371472 [4:03:00<3:19:00, 3.52it/s] 89%|████████▊ | 329502/371472 [4:03:01<3:16:14, 3.56it/s] 89%|████████▊ | 329503/371472 [4:03:01<3:16:12, 3.57it/s] 89%|████████▊ | 329504/371472 [4:03:01<3:20:29, 3.49it/s] 89%|████████▊ | 329505/371472 [4:03:01<3:24:44, 3.42it/s] 89%|████████▊ | 329506/371472 [4:03:02<3:28:16, 3.36it/s] 89%|████████▊ | 329507/371472 [4:03:02<3:22:33, 3.45it/s] 89%|████████▊ | 329508/371472 [4:03:02<3:16:38, 3.56it/s] 89%|████████▊ | 329509/371472 [4:03:03<3:15:41, 3.57it/s] 89%|████████▊ | 329510/371472 [4:03:03<3:09:25, 3.69it/s] 89%|████████▊ | 329511/371472 [4:03:03<3:05:21, 3.77it/s] 89%|████████▊ | 329512/371472 [4:03:03<3:14:01, 3.60it/s] 89%|████████▊ | 329513/371472 [4:03:04<3:11:35, 3.65it/s] 89%|████████▊ | 329514/371472 [4:03:04<3:20:41, 3.48it/s] 89%|████████▊ | 329515/371472 [4:03:04<3:17:26, 3.54it/s] 89%|████████▊ | 329516/371472 [4:03:04<3:13:13, 3.62it/s] 89%|████████▊ | 329517/371472 [4:03:05<3:21:06, 3.48it/s] 89%|████████▊ | 329518/371472 [4:03:05<3:10:38, 3.67it/s] 89%|████████▊ | 329519/371472 [4:03:05<3:02:56, 3.82it/s] 89%|████████▊ | 329520/371472 [4:03:06<3:20:18, 3.49it/s] {'loss': 2.5021, 'learning_rate': 2.0169579176452838e-07, 'epoch': 14.19} + 89%|████████▊ | 329520/371472 [4:03:06<3:20:18, 3.49it/s] 89%|████████▊ | 329521/371472 [4:03:06<3:15:35, 3.57it/s] 89%|████████▊ | 329522/371472 [4:03:06<3:31:39, 3.30it/s] 89%|████████▊ | 329523/371472 [4:03:06<3:19:54, 3.50it/s] 89%|████████▊ | 329524/371472 [4:03:07<3:34:39, 3.26it/s] 89%|████████▊ | 329525/371472 [4:03:07<3:25:17, 3.41it/s] 89%|████████▊ | 329526/371472 [4:03:07<3:21:49, 3.46it/s] 89%|████████▊ | 329527/371472 [4:03:08<3:27:03, 3.38it/s] 89%|████████▊ | 329528/371472 [4:03:08<3:31:31, 3.30it/s] 89%|████████▊ | 329529/371472 [4:03:08<3:26:14, 3.39it/s] 89%|████████▊ | 329530/371472 [4:03:09<3:20:08, 3.49it/s] 89%|████████▊ | 329531/371472 [4:03:09<3:17:51, 3.53it/s] 89%|████████▊ | 329532/371472 [4:03:09<3:14:40, 3.59it/s] 89%|████████▊ | 329533/371472 [4:03:09<3:24:00, 3.43it/s] 89%|████████▊ | 329534/371472 [4:03:10<3:24:05, 3.42it/s] 89%|████████▊ | 329535/371472 [4:03:10<3:23:46, 3.43it/s] 89%|████████▊ | 329536/371472 [4:03:10<3:21:03, 3.48it/s] 89%|████████▊ | 329537/371472 [4:03:11<3:56:55, 2.95it/s] 89%|████████▊ | 329538/371472 [4:03:11<3:38:24, 3.20it/s] 89%|████████▊ | 329539/371472 [4:03:11<3:29:45, 3.33it/s] 89%|████████▊ | 329540/371472 [4:03:12<3:31:50, 3.30it/s] {'loss': 2.5861, 'learning_rate': 2.0164730978904956e-07, 'epoch': 14.19} + 89%|████████▊ | 329540/371472 [4:03:12<3:31:50, 3.30it/s] 89%|████████▊ | 329541/371472 [4:03:12<3:19:34, 3.50it/s] 89%|████████▊ | 329542/371472 [4:03:12<3:17:09, 3.54it/s] 89%|████████▊ | 329543/371472 [4:03:12<3:19:58, 3.49it/s] 89%|████████▊ | 329544/371472 [4:03:13<3:17:35, 3.54it/s] 89%|████████▊ | 329545/371472 [4:03:13<3:11:17, 3.65it/s] 89%|████████▊ | 329546/371472 [4:03:13<3:14:09, 3.60it/s] 89%|████████▊ | 329547/371472 [4:03:13<3:13:43, 3.61it/s] 89%|████████▊ | 329548/371472 [4:03:14<3:10:53, 3.66it/s] 89%|████████▊ | 329549/371472 [4:03:14<3:25:25, 3.40it/s] 89%|████████▊ | 329550/371472 [4:03:14<3:17:29, 3.54it/s] 89%|████████▊ | 329551/371472 [4:03:15<3:12:12, 3.64it/s] 89%|████████▊ | 329552/371472 [4:03:15<3:07:01, 3.74it/s] 89%|████████▊ | 329553/371472 [4:03:15<3:37:19, 3.21it/s] 89%|████████▊ | 329554/371472 [4:03:15<3:26:07, 3.39it/s] 89%|████████▊ | 329555/371472 [4:03:16<3:19:53, 3.49it/s] 89%|████████▊ | 329556/371472 [4:03:16<3:24:54, 3.41it/s] 89%|████████▊ | 329557/371472 [4:03:16<3:21:50, 3.46it/s] 89%|████████▊ | 329558/371472 [4:03:17<3:14:42, 3.59it/s] 89%|████████▊ | 329559/371472 [4:03:17<3:07:26, 3.73it/s] 89%|████████▊ | 329560/371472 [4:03:17<3:06:34, 3.74it/s] {'loss': 2.5766, 'learning_rate': 2.015988278135706e-07, 'epoch': 14.19} + 89%|████████▊ | 329560/371472 [4:03:17<3:06:34, 3.74it/s] 89%|████████▊ | 329561/371472 [4:03:17<3:04:34, 3.78it/s] 89%|████████▊ | 329562/371472 [4:03:18<3:08:31, 3.71it/s] 89%|████████▊ | 329563/371472 [4:03:18<3:09:31, 3.69it/s] 89%|████████▊ | 329564/371472 [4:03:18<3:13:35, 3.61it/s] 89%|████████▊ | 329565/371472 [4:03:18<3:04:09, 3.79it/s] 89%|████████▊ | 329566/371472 [4:03:19<3:17:31, 3.54it/s] 89%|████████▊ | 329567/371472 [4:03:19<3:09:35, 3.68it/s] 89%|████████▊ | 329568/371472 [4:03:19<3:05:08, 3.77it/s] 89%|████████▊ | 329569/371472 [4:03:20<3:06:51, 3.74it/s] 89%|████████▊ | 329570/371472 [4:03:20<3:00:11, 3.88it/s] 89%|████████▊ | 329571/371472 [4:03:20<3:01:34, 3.85it/s] 89%|████████▊ | 329572/371472 [4:03:20<3:00:13, 3.87it/s] 89%|████████▊ | 329573/371472 [4:03:21<3:16:16, 3.56it/s] 89%|████████▊ | 329574/371472 [4:03:21<3:18:10, 3.52it/s] 89%|████████▊ | 329575/371472 [4:03:21<3:23:08, 3.44it/s] 89%|████████▊ | 329576/371472 [4:03:21<3:16:15, 3.56it/s] 89%|████████▊ | 329577/371472 [4:03:22<3:20:22, 3.48it/s] 89%|████████▊ | 329578/371472 [4:03:22<3:24:21, 3.42it/s] 89%|████████▊ | 329579/371472 [4:03:22<3:20:34, 3.48it/s] 89%|████████▊ | 329580/371472 [4:03:23<3:17:23, 3.54it/s] {'loss': 2.5968, 'learning_rate': 2.0155034583809176e-07, 'epoch': 14.2} + 89%|████████▊ | 329580/371472 [4:03:23<3:17:23, 3.54it/s] 89%|████████▊ | 329581/371472 [4:03:23<3:21:08, 3.47it/s] 89%|████████▊ | 329582/371472 [4:03:23<3:16:09, 3.56it/s] 89%|████████▊ | 329583/371472 [4:03:24<3:28:04, 3.36it/s] 89%|████████▊ | 329584/371472 [4:03:24<3:25:15, 3.40it/s] 89%|████████▊ | 329585/371472 [4:03:24<3:34:07, 3.26it/s] 89%|████████▊ | 329586/371472 [4:03:24<3:24:34, 3.41it/s] 89%|████████▊ | 329587/371472 [4:03:25<3:35:07, 3.24it/s] 89%|████████▊ | 329588/371472 [4:03:25<3:34:03, 3.26it/s] 89%|████████▊ | 329589/371472 [4:03:25<3:26:05, 3.39it/s] 89%|████████▊ | 329590/371472 [4:03:26<3:35:56, 3.23it/s] 89%|████████▊ | 329591/371472 [4:03:26<3:30:54, 3.31it/s] 89%|████████▊ | 329592/371472 [4:03:26<3:26:03, 3.39it/s] 89%|████████▊ | 329593/371472 [4:03:27<3:27:19, 3.37it/s] 89%|████████▊ | 329594/371472 [4:03:27<3:29:48, 3.33it/s] 89%|████████▊ | 329595/371472 [4:03:27<3:32:21, 3.29it/s] 89%|████████▊ | 329596/371472 [4:03:27<3:26:25, 3.38it/s] 89%|████████▊ | 329597/371472 [4:03:28<3:23:45, 3.43it/s] 89%|████████▊ | 329598/371472 [4:03:28<3:32:19, 3.29it/s] 89%|████████▊ | 329599/371472 [4:03:28<3:38:13, 3.20it/s] 89%|████████▊ | 329600/371472 [4:03:29<3:24:08, 3.42it/s] {'loss': 2.5368, 'learning_rate': 2.0150186386261283e-07, 'epoch': 14.2} + 89%|████████▊ | 329600/371472 [4:03:29<3:24:08, 3.42it/s] 89%|████████▊ | 329601/371472 [4:03:29<3:19:12, 3.50it/s] 89%|████████▊ | 329602/371472 [4:03:29<3:23:21, 3.43it/s] 89%|████████▊ | 329603/371472 [4:03:29<3:24:01, 3.42it/s] 89%|████████▊ | 329604/371472 [4:03:30<3:21:45, 3.46it/s] 89%|████████▊ | 329605/371472 [4:03:30<3:18:23, 3.52it/s] 89%|████████▊ | 329606/371472 [4:03:30<3:23:34, 3.43it/s] 89%|████████▊ | 329607/371472 [4:03:31<3:19:11, 3.50it/s] 89%|████████▊ | 329608/371472 [4:03:31<3:13:53, 3.60it/s] 89%|████████▊ | 329609/371472 [4:03:31<3:16:50, 3.54it/s] 89%|████████▊ | 329610/371472 [4:03:31<3:10:26, 3.66it/s] 89%|████████▊ | 329611/371472 [4:03:32<3:18:24, 3.52it/s] 89%|████████▊ | 329612/371472 [4:03:32<3:09:45, 3.68it/s] 89%|████████▊ | 329613/371472 [4:03:32<3:14:18, 3.59it/s] 89%|████████▊ | 329614/371472 [4:03:33<3:14:45, 3.58it/s] 89%|████████▊ | 329615/371472 [4:03:33<3:16:21, 3.55it/s] 89%|████████▊ | 329616/371472 [4:03:33<4:04:54, 2.85it/s] 89%|████████▊ | 329617/371472 [4:03:34<3:54:13, 2.98it/s] 89%|████████▊ | 329618/371472 [4:03:34<3:38:56, 3.19it/s] 89%|████████▊ | 329619/371472 [4:03:34<3:27:59, 3.35it/s] 89%|████████▊ | 329620/371472 [4:03:34<3:17:09, 3.54it/s] {'loss': 2.5951, 'learning_rate': 2.0145338188713398e-07, 'epoch': 14.2} + 89%|████████▊ | 329620/371472 [4:03:34<3:17:09, 3.54it/s] 89%|████████▊ | 329621/371472 [4:03:35<3:14:52, 3.58it/s] 89%|████████▊ | 329622/371472 [4:03:35<3:26:12, 3.38it/s] 89%|████████▊ | 329623/371472 [4:03:35<3:20:32, 3.48it/s] 89%|████████▊ | 329624/371472 [4:03:36<3:26:44, 3.37it/s] 89%|████████▊ | 329625/371472 [4:03:36<3:21:47, 3.46it/s] 89%|████████▊ | 329626/371472 [4:03:36<3:20:45, 3.47it/s] 89%|████████▊ | 329627/371472 [4:03:36<3:12:13, 3.63it/s] 89%|████████▊ | 329628/371472 [4:03:37<3:05:45, 3.75it/s] 89%|████████▊ | 329629/371472 [4:03:37<3:04:39, 3.78it/s] 89%|████████▊ | 329630/371472 [4:03:37<3:03:36, 3.80it/s] 89%|████████▊ | 329631/371472 [4:03:37<3:02:03, 3.83it/s] 89%|████████▊ | 329632/371472 [4:03:38<2:56:24, 3.95it/s] 89%|████████▊ | 329633/371472 [4:03:38<2:55:10, 3.98it/s] 89%|████████▊ | 329634/371472 [4:03:38<3:37:14, 3.21it/s] 89%|████████▊ | 329635/371472 [4:03:39<3:23:51, 3.42it/s] 89%|████████▊ | 329636/371472 [4:03:39<3:14:22, 3.59it/s] 89%|████████▊ | 329637/371472 [4:03:39<3:17:24, 3.53it/s] 89%|████████▊ | 329638/371472 [4:03:39<3:14:47, 3.58it/s] 89%|████████▊ | 329639/371472 [4:03:40<3:23:01, 3.43it/s] 89%|████████▊ | 329640/371472 [4:03:40<3:25:43, 3.39it/s] {'loss': 2.6412, 'learning_rate': 2.0140489991165502e-07, 'epoch': 14.2} + 89%|████████▊ | 329640/371472 [4:03:40<3:25:43, 3.39it/s] 89%|████████▊ | 329641/371472 [4:03:40<3:27:12, 3.36it/s] 89%|████████▊ | 329642/371472 [4:03:41<3:20:12, 3.48it/s] 89%|████████▊ | 329643/371472 [4:03:41<3:17:46, 3.52it/s] 89%|████████▊ | 329644/371472 [4:03:41<3:23:25, 3.43it/s] 89%|████████▊ | 329645/371472 [4:03:41<3:17:39, 3.53it/s] 89%|████████▊ | 329646/371472 [4:03:42<3:23:42, 3.42it/s] 89%|████████▊ | 329647/371472 [4:03:42<3:27:23, 3.36it/s] 89%|████████▊ | 329648/371472 [4:03:42<3:16:02, 3.56it/s] 89%|████████▊ | 329649/371472 [4:03:43<3:11:51, 3.63it/s] 89%|████████▊ | 329650/371472 [4:03:43<3:11:53, 3.63it/s] 89%|████████▊ | 329651/371472 [4:03:43<3:14:20, 3.59it/s] 89%|████████▊ | 329652/371472 [4:03:43<3:13:56, 3.59it/s] 89%|███████��▊ | 329653/371472 [4:03:44<3:14:54, 3.58it/s] 89%|████████▊ | 329654/371472 [4:03:44<3:14:37, 3.58it/s] 89%|████████▊ | 329655/371472 [4:03:44<3:13:09, 3.61it/s] 89%|████████▊ | 329656/371472 [4:03:45<3:06:20, 3.74it/s] 89%|████████▊ | 329657/371472 [4:03:45<3:06:10, 3.74it/s] 89%|████████▊ | 329658/371472 [4:03:45<3:00:51, 3.85it/s] 89%|████████▊ | 329659/371472 [4:03:45<3:06:20, 3.74it/s] 89%|████████▊ | 329660/371472 [4:03:46<3:08:36, 3.69it/s] {'loss': 2.682, 'learning_rate': 2.013564179361762e-07, 'epoch': 14.2} + 89%|████████▊ | 329660/371472 [4:03:46<3:08:36, 3.69it/s] 89%|████████▊ | 329661/371472 [4:03:46<3:03:11, 3.80it/s] 89%|████████▊ | 329662/371472 [4:03:46<3:20:23, 3.48it/s] 89%|████████▊ | 329663/371472 [4:03:46<3:19:55, 3.49it/s] 89%|████████▊ | 329664/371472 [4:03:47<3:28:45, 3.34it/s] 89%|████████▊ | 329665/371472 [4:03:47<3:28:42, 3.34it/s] 89%|████████▊ | 329666/371472 [4:03:47<3:16:54, 3.54it/s] 89%|████████▊ | 329667/371472 [4:03:48<3:19:36, 3.49it/s] 89%|████████▊ | 329668/371472 [4:03:48<3:14:13, 3.59it/s] 89%|████████▊ | 329669/371472 [4:03:48<3:18:02, 3.52it/s] 89%|████████▊ | 329670/371472 [4:03:48<3:14:06, 3.59it/s] 89%|████████▊ | 329671/371472 [4:03:49<3:08:39, 3.69it/s] 89%|████████▊ | 329672/371472 [4:03:49<3:09:23, 3.68it/s] 89%|████████▊ | 329673/371472 [4:03:49<3:15:42, 3.56it/s] 89%|████████▊ | 329674/371472 [4:03:50<3:17:48, 3.52it/s] 89%|████████▊ | 329675/371472 [4:03:50<3:33:40, 3.26it/s] 89%|████████▊ | 329676/371472 [4:03:50<3:25:00, 3.40it/s] 89%|████████▊ | 329677/371472 [4:03:50<3:19:54, 3.48it/s] 89%|████████▊ | 329678/371472 [4:03:51<3:18:40, 3.51it/s] 89%|████████▊ | 329679/371472 [4:03:51<3:13:19, 3.60it/s] 89%|████████▊ | 329680/371472 [4:03:51<3:07:15, 3.72it/s] {'loss': 2.6015, 'learning_rate': 2.0130793596069725e-07, 'epoch': 14.2} + 89%|████████▊ | 329680/371472 [4:03:51<3:07:15, 3.72it/s] 89%|████████▊ | 329681/371472 [4:03:52<3:03:24, 3.80it/s] 89%|████████▉ | 329682/371472 [4:03:52<3:06:17, 3.74it/s] 89%|████████▉ | 329683/371472 [4:03:52<2:59:19, 3.88it/s] 89%|████████▉ | 329684/371472 [4:03:52<3:01:04, 3.85it/s] 89%|████████▉ | 329685/371472 [4:03:53<3:27:50, 3.35it/s] 89%|████████▉ | 329686/371472 [4:03:53<3:19:17, 3.49it/s] 89%|████████▉ | 329687/371472 [4:03:53<3:09:53, 3.67it/s] 89%|████████▉ | 329688/371472 [4:03:53<3:06:30, 3.73it/s] 89%|████████▉ | 329689/371472 [4:03:54<3:03:12, 3.80it/s] 89%|████████▉ | 329690/371472 [4:03:54<3:02:06, 3.82it/s] 89%|████████▉ | 329691/371472 [4:03:54<3:02:38, 3.81it/s] 89%|████████▉ | 329692/371472 [4:03:54<3:00:52, 3.85it/s] 89%|████████▉ | 329693/371472 [4:03:55<3:05:21, 3.76it/s] 89%|████████▉ | 329694/371472 [4:03:55<3:06:41, 3.73it/s] 89%|████████▉ | 329695/371472 [4:03:55<3:21:45, 3.45it/s] 89%|████████▉ | 329696/371472 [4:03:56<3:20:29, 3.47it/s] 89%|████████▉ | 329697/371472 [4:03:56<3:13:32, 3.60it/s] 89%|████████▉ | 329698/371472 [4:03:56<3:31:19, 3.29it/s] 89%|████████▉ | 329699/371472 [4:03:56<3:17:46, 3.52it/s] 89%|████████▉ | 329700/371472 [4:03:57<3:34:10, 3.25it/s] {'loss': 2.8202, 'learning_rate': 2.012594539852184e-07, 'epoch': 14.2} + 89%|████████▉ | 329700/371472 [4:03:57<3:34:10, 3.25it/s] 89%|████████▉ | 329701/371472 [4:03:57<3:21:54, 3.45it/s] 89%|████████▉ | 329702/371472 [4:03:57<3:24:28, 3.40it/s] 89%|████████▉ | 329703/371472 [4:03:58<3:33:33, 3.26it/s] 89%|████████▉ | 329704/371472 [4:03:58<3:29:16, 3.33it/s] 89%|████████▉ | 329705/371472 [4:03:58<3:25:21, 3.39it/s] 89%|████████▉ | 329706/371472 [4:03:59<3:24:46, 3.40it/s] 89%|████████▉ | 329707/371472 [4:03:59<3:30:30, 3.31it/s] 89%|████████▉ | 329708/371472 [4:03:59<3:28:50, 3.33it/s] 89%|████████▉ | 329709/371472 [4:04:00<3:31:20, 3.29it/s] 89%|████████▉ | 329710/371472 [4:04:00<3:37:51, 3.19it/s] 89%|████████▉ | 329711/371472 [4:04:00<3:31:29, 3.29it/s] 89%|████████▉ | 329712/371472 [4:04:00<3:17:10, 3.53it/s] 89%|████████▉ | 329713/371472 [4:04:01<3:13:03, 3.61it/s] 89%|████████▉ | 329714/371472 [4:04:01<3:10:27, 3.65it/s] 89%|████████▉ | 329715/371472 [4:04:01<3:05:07, 3.76it/s] 89%|████████▉ | 329716/371472 [4:04:01<3:03:18, 3.80it/s] 89%|████████▉ | 329717/371472 [4:04:02<3:09:56, 3.66it/s] 89%|████████▉ | 329718/371472 [4:04:02<3:06:03, 3.74it/s] 89%|████████▉ | 329719/371472 [4:04:02<3:04:45, 3.77it/s] 89%|████████▉ | 329720/371472 [4:04:02<3:01:52, 3.83it/s] {'loss': 2.6653, 'learning_rate': 2.0121097200973947e-07, 'epoch': 14.2} + 89%|████████▉ | 329720/371472 [4:04:02<3:01:52, 3.83it/s] 89%|████████▉ | 329721/371472 [4:04:03<3:03:52, 3.78it/s] 89%|████████▉ | 329722/371472 [4:04:03<2:57:59, 3.91it/s] 89%|████████▉ | 329723/371472 [4:04:03<3:01:02, 3.84it/s] 89%|████████▉ | 329724/371472 [4:04:04<3:13:39, 3.59it/s] 89%|████████▉ | 329725/371472 [4:04:04<3:25:11, 3.39it/s] 89%|████████▉ | 329726/371472 [4:04:04<3:20:16, 3.47it/s] 89%|████████▉ | 329727/371472 [4:04:04<3:18:44, 3.50it/s] 89%|████████▉ | 329728/371472 [4:04:05<3:18:49, 3.50it/s] 89%|████████▉ | 329729/371472 [4:04:05<3:16:58, 3.53it/s] 89%|████████▉ | 329730/371472 [4:04:05<3:17:42, 3.52it/s] 89%|████████▉ | 329731/371472 [4:04:06<3:25:43, 3.38it/s] 89%|████████▉ | 329732/371472 [4:04:06<3:16:05, 3.55it/s] 89%|████████▉ | 329733/371472 [4:04:06<3:16:37, 3.54it/s] 89%|████████▉ | 329734/371472 [4:04:06<3:15:04, 3.57it/s] 89%|████████▉ | 329735/371472 [4:04:07<3:13:06, 3.60it/s] 89%|████████▉ | 329736/371472 [4:04:07<3:17:08, 3.53it/s] 89%|████████▉ | 329737/371472 [4:04:07<3:14:25, 3.58it/s] 89%|████████▉ | 329738/371472 [4:04:08<3:18:42, 3.50it/s] 89%|████████▉ | 329739/371472 [4:04:08<3:09:24, 3.67it/s] 89%|████████▉ | 329740/371472 [4:04:08<3:05:18, 3.75it/s] {'loss': 2.558, 'learning_rate': 2.0116249003426062e-07, 'epoch': 14.2} + 89%|████████▉ | 329740/371472 [4:04:08<3:05:18, 3.75it/s] 89%|████████▉ | 329741/371472 [4:04:08<3:06:25, 3.73it/s] 89%|████████▉ | 329742/371472 [4:04:09<3:01:36, 3.83it/s] 89%|████████▉ | 329743/371472 [4:04:09<3:02:53, 3.80it/s] 89%|████████▉ | 329744/371472 [4:04:09<3:02:39, 3.81it/s] 89%|████████▉ | 329745/371472 [4:04:09<3:04:11, 3.78it/s] 89%|████████▉ | 329746/371472 [4:04:10<3:51:51, 3.00it/s] 89%|████████▉ | 329747/371472 [4:04:10<3:38:30, 3.18it/s] 89%|████████▉ | 329748/371472 [4:04:10<3:37:42, 3.19it/s] 89%|████████▉ | 329749/371472 [4:04:11<3:37:31, 3.20it/s] 89%|████████▉ | 329750/371472 [4:04:11<3:36:35, 3.21it/s] 89%|████████▉ | 329751/371472 [4:04:11<3:31:25, 3.29it/s] 89%|████████▉ | 329752/371472 [4:04:12<3:25:22, 3.39it/s] 89%|████████▉ | 329753/371472 [4:04:12<3:14:06, 3.58it/s] 89%|████████▉ | 329754/371472 [4:04:12<3:25:51, 3.38it/s] 89%|████████▉ | 329755/371472 [4:04:12<3:20:29, 3.47it/s] 89%|████████▉ | 329756/371472 [4:04:13<3:16:11, 3.54it/s] 89%|████████▉ | 329757/371472 [4:04:13<3:11:25, 3.63it/s] 89%|████████▉ | 329758/371472 [4:04:13<3:20:24, 3.47it/s] 89%|████████▉ | 329759/371472 [4:04:14<3:12:59, 3.60it/s] 89%|████████▉ | 329760/371472 [4:04:14<3:11:44, 3.63it/s] {'loss': 2.7746, 'learning_rate': 2.0111400805878166e-07, 'epoch': 14.2} + 89%|████████▉ | 329760/371472 [4:04:14<3:11:44, 3.63it/s] 89%|████████▉ | 329761/371472 [4:04:14<3:13:07, 3.60it/s] 89%|████████▉ | 329762/371472 [4:04:15<3:30:41, 3.30it/s] 89%|████████▉ | 329763/371472 [4:04:15<3:24:13, 3.40it/s] 89%|████████▉ | 329764/371472 [4:04:15<3:20:22, 3.47it/s] 89%|████████▉ | 329765/371472 [4:04:15<3:30:07, 3.31it/s] 89%|████████▉ | 329766/371472 [4:04:16<3:23:30, 3.42it/s] 89%|████████▉ | 329767/371472 [4:04:16<3:24:44, 3.40it/s] 89%|██████���█▉ | 329768/371472 [4:04:16<3:29:13, 3.32it/s] 89%|████████▉ | 329769/371472 [4:04:17<3:24:43, 3.40it/s] 89%|████████▉ | 329770/371472 [4:04:17<3:16:55, 3.53it/s] 89%|████████▉ | 329771/371472 [4:04:17<3:09:53, 3.66it/s] 89%|████████▉ | 329772/371472 [4:04:17<3:07:23, 3.71it/s] 89%|████████▉ | 329773/371472 [4:04:18<3:04:13, 3.77it/s] 89%|████████▉ | 329774/371472 [4:04:18<3:04:19, 3.77it/s] 89%|████████▉ | 329775/371472 [4:04:18<3:00:47, 3.84it/s] 89%|████████▉ | 329776/371472 [4:04:18<3:04:11, 3.77it/s] 89%|████████▉ | 329777/371472 [4:04:19<3:23:52, 3.41it/s] 89%|████████▉ | 329778/371472 [4:04:19<3:27:16, 3.35it/s] 89%|████████▉ | 329779/371472 [4:04:19<3:16:21, 3.54it/s] 89%|████████▉ | 329780/371472 [4:04:20<3:20:36, 3.46it/s] {'loss': 2.5992, 'learning_rate': 2.0106552608330284e-07, 'epoch': 14.2} + 89%|████████▉ | 329780/371472 [4:04:20<3:20:36, 3.46it/s] 89%|████████▉ | 329781/371472 [4:04:20<3:43:21, 3.11it/s] 89%|████████▉ | 329782/371472 [4:04:20<3:32:28, 3.27it/s] 89%|████████▉ | 329783/371472 [4:04:21<3:26:41, 3.36it/s] 89%|████████▉ | 329784/371472 [4:04:21<3:28:03, 3.34it/s] 89%|████████▉ | 329785/371472 [4:04:21<3:14:35, 3.57it/s] 89%|████████▉ | 329786/371472 [4:04:21<3:14:05, 3.58it/s] 89%|████████▉ | 329787/371472 [4:04:22<3:17:50, 3.51it/s] 89%|████████▉ | 329788/371472 [4:04:22<3:13:40, 3.59it/s] 89%|████████▉ | 329789/371472 [4:04:22<3:10:51, 3.64it/s] 89%|████████▉ | 329790/371472 [4:04:22<3:08:10, 3.69it/s] 89%|████████▉ | 329791/371472 [4:04:23<3:07:20, 3.71it/s] 89%|████████▉ | 329792/371472 [4:04:23<3:09:13, 3.67it/s] 89%|████████▉ | 329793/371472 [4:04:23<3:30:47, 3.30it/s] 89%|████████▉ | 329794/371472 [4:04:24<3:31:57, 3.28it/s] 89%|████████▉ | 329795/371472 [4:04:24<3:27:07, 3.35it/s] 89%|████████▉ | 329796/371472 [4:04:24<3:29:02, 3.32it/s] 89%|████████▉ | 329797/371472 [4:04:25<3:34:57, 3.23it/s] 89%|████████▉ | 329798/371472 [4:04:25<3:32:28, 3.27it/s] 89%|████████▉ | 329799/371472 [4:04:25<3:27:13, 3.35it/s] 89%|████████▉ | 329800/371472 [4:04:25<3:24:37, 3.39it/s] {'loss': 2.4453, 'learning_rate': 2.010170441078239e-07, 'epoch': 14.21} + 89%|████████▉ | 329800/371472 [4:04:25<3:24:37, 3.39it/s] 89%|████████▉ | 329801/371472 [4:04:26<3:32:22, 3.27it/s] 89%|████████▉ | 329802/371472 [4:04:26<3:25:09, 3.39it/s] 89%|████████▉ | 329803/371472 [4:04:26<3:18:19, 3.50it/s] 89%|████████▉ | 329804/371472 [4:04:27<3:18:30, 3.50it/s] 89%|████████▉ | 329805/371472 [4:04:27<3:16:59, 3.53it/s] 89%|████████▉ | 329806/371472 [4:04:27<3:20:50, 3.46it/s] 89%|████████▉ | 329807/371472 [4:04:27<3:21:16, 3.45it/s] 89%|████████▉ | 329808/371472 [4:04:28<3:22:42, 3.43it/s] 89%|████████▉ | 329809/371472 [4:04:28<3:17:10, 3.52it/s] 89%|████████▉ | 329810/371472 [4:04:28<3:14:41, 3.57it/s] 89%|████████▉ | 329811/371472 [4:04:29<3:06:56, 3.71it/s] 89%|████████▉ | 329812/371472 [4:04:29<3:12:09, 3.61it/s] 89%|████████▉ | 329813/371472 [4:04:29<3:11:44, 3.62it/s] 89%|████████▉ | 329814/371472 [4:04:29<3:09:10, 3.67it/s] 89%|████████▉ | 329815/371472 [4:04:30<3:08:40, 3.68it/s] 89%|████████▉ | 329816/371472 [4:04:30<3:05:08, 3.75it/s] 89%|████████▉ | 329817/371472 [4:04:30<3:08:14, 3.69it/s] 89%|████████▉ | 329818/371472 [4:04:30<3:06:39, 3.72it/s] 89%|████████▉ | 329819/371472 [4:04:31<3:13:23, 3.59it/s] 89%|████████▉ | 329820/371472 [4:04:31<3:14:38, 3.57it/s] {'loss': 2.6176, 'learning_rate': 2.0096856213234504e-07, 'epoch': 14.21} + 89%|████████▉ | 329820/371472 [4:04:31<3:14:38, 3.57it/s] 89%|████████▉ | 329821/371472 [4:04:31<3:13:09, 3.59it/s] 89%|████████▉ | 329822/371472 [4:04:32<3:13:36, 3.59it/s] 89%|████████▉ | 329823/371472 [4:04:32<3:11:03, 3.63it/s] 89%|████████▉ | 329824/371472 [4:04:32<3:14:27, 3.57it/s] 89%|████████▉ | 329825/371472 [4:04:32<3:08:03, 3.69it/s] 89%|████████▉ | 329826/371472 [4:04:33<3:12:09, 3.61it/s] 89%|████████▉ | 329827/371472 [4:04:33<3:08:13, 3.69it/s] 89%|████████▉ | 329828/371472 [4:04:33<3:10:54, 3.64it/s] 89%|████████▉ | 329829/371472 [4:04:33<3:06:56, 3.71it/s] 89%|████████▉ | 329830/371472 [4:04:34<3:12:16, 3.61it/s] 89%|████████▉ | 329831/371472 [4:04:34<3:08:07, 3.69it/s] 89%|████████▉ | 329832/371472 [4:04:34<3:08:38, 3.68it/s] 89%|████████▉ | 329833/371472 [4:04:35<3:12:38, 3.60it/s] 89%|████████▉ | 329834/371472 [4:04:35<3:08:45, 3.68it/s] 89%|████████▉ | 329835/371472 [4:04:35<3:02:40, 3.80it/s] 89%|████████▉ | 329836/371472 [4:04:35<3:05:24, 3.74it/s] 89%|████████▉ | 329837/371472 [4:04:36<3:13:56, 3.58it/s] 89%|████████▉ | 329838/371472 [4:04:36<3:10:15, 3.65it/s] 89%|████████▉ | 329839/371472 [4:04:36<3:12:33, 3.60it/s] 89%|████████▉ | 329840/371472 [4:04:37<3:27:18, 3.35it/s] {'loss': 2.5654, 'learning_rate': 2.0092008015686608e-07, 'epoch': 14.21} + 89%|████████▉ | 329840/371472 [4:04:37<3:27:18, 3.35it/s] 89%|████████▉ | 329841/371472 [4:04:37<3:16:12, 3.54it/s] 89%|████████▉ | 329842/371472 [4:04:37<3:36:20, 3.21it/s] 89%|████████▉ | 329843/371472 [4:04:38<3:50:22, 3.01it/s] 89%|████████▉ | 329844/371472 [4:04:38<3:46:14, 3.07it/s] 89%|████████▉ | 329845/371472 [4:04:38<3:43:16, 3.11it/s] 89%|████████▉ | 329846/371472 [4:04:38<3:32:31, 3.26it/s] 89%|████████▉ | 329847/371472 [4:04:39<3:20:17, 3.46it/s] 89%|████████▉ | 329848/371472 [4:04:39<3:32:25, 3.27it/s] 89%|████████▉ | 329849/371472 [4:04:39<3:44:39, 3.09it/s] 89%|████████▉ | 329850/371472 [4:04:40<3:47:39, 3.05it/s] 89%|████████▉ | 329851/371472 [4:04:40<3:37:30, 3.19it/s] 89%|████████▉ | 329852/371472 [4:04:40<3:46:30, 3.06it/s] 89%|████████▉ | 329853/371472 [4:04:41<3:31:31, 3.28it/s] 89%|████████▉ | 329854/371472 [4:04:41<3:26:07, 3.37it/s] 89%|████████▉ | 329855/371472 [4:04:41<3:18:21, 3.50it/s] 89%|████████▉ | 329856/371472 [4:04:42<3:25:36, 3.37it/s] 89%|████████▉ | 329857/371472 [4:04:42<3:21:01, 3.45it/s] 89%|████████▉ | 329858/371472 [4:04:42<3:19:19, 3.48it/s] 89%|████████▉ | 329859/371472 [4:04:42<3:23:02, 3.42it/s] 89%|████████▉ | 329860/371472 [4:04:43<3:16:31, 3.53it/s] {'loss': 2.5254, 'learning_rate': 2.0087159818138729e-07, 'epoch': 14.21} + 89%|████████▉ | 329860/371472 [4:04:43<3:16:31, 3.53it/s] 89%|████████▉ | 329861/371472 [4:04:43<3:25:03, 3.38it/s] 89%|████████▉ | 329862/371472 [4:04:43<3:21:51, 3.44it/s] 89%|████████▉ | 329863/371472 [4:04:44<3:24:06, 3.40it/s] 89%|████████▉ | 329864/371472 [4:04:44<3:20:50, 3.45it/s] 89%|████████▉ | 329865/371472 [4:04:44<3:14:50, 3.56it/s] 89%|████████▉ | 329866/371472 [4:04:44<3:15:11, 3.55it/s] 89%|████████▉ | 329867/371472 [4:04:45<3:11:12, 3.63it/s] 89%|████████▉ | 329868/371472 [4:04:45<3:07:50, 3.69it/s] 89%|████████▉ | 329869/371472 [4:04:45<3:19:44, 3.47it/s] 89%|████████▉ | 329870/371472 [4:04:46<3:26:01, 3.37it/s] 89%|████████▉ | 329871/371472 [4:04:46<3:17:00, 3.52it/s] 89%|████████▉ | 329872/371472 [4:04:46<3:13:00, 3.59it/s] 89%|████████▉ | 329873/371472 [4:04:46<3:13:27, 3.58it/s] 89%|████████▉ | 329874/371472 [4:04:47<3:13:46, 3.58it/s] 89%|████████▉ | 329875/371472 [4:04:47<3:18:11, 3.50it/s] 89%|████████▉ | 329876/371472 [4:04:47<3:17:45, 3.51it/s] 89%|████████▉ | 329877/371472 [4:04:47<3:09:33, 3.66it/s] 89%|████████▉ | 329878/371472 [4:04:48<3:18:34, 3.49it/s] 89%|████████▉ | 329879/371472 [4:04:48<3:16:19, 3.53it/s] 89%|████████▉ | 329880/371472 [4:04:48<3:17:24, 3.51it/s] {'loss': 2.6811, 'learning_rate': 2.0082311620590833e-07, 'epoch': 14.21} + 89%|████████▉ | 329880/371472 [4:04:48<3:17:24, 3.51it/s] 89%|████████▉ | 329881/371472 [4:04:49<3:24:02, 3.40it/s] 89%|████████▉ | 329882/371472 [4:04:49<3:25:55, 3.37it/s] 89%|██���█████▉ | 329883/371472 [4:04:49<3:32:08, 3.27it/s] 89%|████████▉ | 329884/371472 [4:04:50<3:23:51, 3.40it/s] 89%|████████▉ | 329885/371472 [4:04:50<3:32:31, 3.26it/s] 89%|████████▉ | 329886/371472 [4:04:50<3:21:45, 3.44it/s] 89%|████████▉ | 329887/371472 [4:04:50<3:23:43, 3.40it/s] 89%|████████▉ | 329888/371472 [4:04:51<3:15:09, 3.55it/s] 89%|████████▉ | 329889/371472 [4:04:51<3:21:48, 3.43it/s] 89%|████████▉ | 329890/371472 [4:04:51<3:22:17, 3.43it/s] 89%|████████▉ | 329891/371472 [4:04:52<3:17:25, 3.51it/s] 89%|████████▉ | 329892/371472 [4:04:52<3:17:18, 3.51it/s] 89%|████████▉ | 329893/371472 [4:04:52<3:17:18, 3.51it/s] 89%|████████▉ | 329894/371472 [4:04:52<3:29:03, 3.31it/s] 89%|████████▉ | 329895/371472 [4:04:53<3:18:26, 3.49it/s] 89%|████████▉ | 329896/371472 [4:04:53<3:36:31, 3.20it/s] 89%|████████▉ | 329897/371472 [4:04:53<3:31:55, 3.27it/s] 89%|████████▉ | 329898/371472 [4:04:54<3:22:32, 3.42it/s] 89%|████████▉ | 329899/371472 [4:04:54<3:19:46, 3.47it/s] 89%|████████▉ | 329900/371472 [4:04:54<3:10:02, 3.65it/s] {'loss': 2.4981, 'learning_rate': 2.0077463423042938e-07, 'epoch': 14.21} + 89%|████████▉ | 329900/371472 [4:04:54<3:10:02, 3.65it/s] 89%|████████▉ | 329901/371472 [4:04:55<3:29:18, 3.31it/s] 89%|████████▉ | 329902/371472 [4:04:55<3:18:20, 3.49it/s] 89%|████████▉ | 329903/371472 [4:04:55<3:24:21, 3.39it/s] 89%|████████▉ | 329904/371472 [4:04:55<3:25:59, 3.36it/s] 89%|████████▉ | 329905/371472 [4:04:56<3:24:13, 3.39it/s] 89%|████████▉ | 329906/371472 [4:04:56<3:28:36, 3.32it/s] 89%|████████▉ | 329907/371472 [4:04:56<3:36:27, 3.20it/s] 89%|████████▉ | 329908/371472 [4:04:57<3:27:58, 3.33it/s] 89%|████████▉ | 329909/371472 [4:04:57<3:22:22, 3.42it/s] 89%|████████▉ | 329910/371472 [4:04:57<3:13:56, 3.57it/s] 89%|████████▉ | 329911/371472 [4:04:57<3:17:13, 3.51it/s] 89%|████████▉ | 329912/371472 [4:04:58<3:10:14, 3.64it/s] 89%|████████▉ | 329913/371472 [4:04:58<3:15:08, 3.55it/s] 89%|████████▉ | 329914/371472 [4:04:58<3:19:27, 3.47it/s] 89%|████████▉ | 329915/371472 [4:04:59<3:15:32, 3.54it/s] 89%|████████▉ | 329916/371472 [4:04:59<3:12:47, 3.59it/s] 89%|████████▉ | 329917/371472 [4:04:59<3:07:43, 3.69it/s] 89%|████████▉ | 329918/371472 [4:04:59<3:13:01, 3.59it/s] 89%|████████▉ | 329919/371472 [4:05:00<3:11:25, 3.62it/s] 89%|████████▉ | 329920/371472 [4:05:00<3:10:28, 3.64it/s] {'loss': 2.8439, 'learning_rate': 2.0072615225495055e-07, 'epoch': 14.21} + 89%|████████▉ | 329920/371472 [4:05:00<3:10:28, 3.64it/s] 89%|████████▉ | 329921/371472 [4:05:00<3:05:39, 3.73it/s] 89%|████████▉ | 329922/371472 [4:05:00<3:04:36, 3.75it/s] 89%|████████▉ | 329923/371472 [4:05:01<2:58:48, 3.87it/s] 89%|████████▉ | 329924/371472 [4:05:01<3:06:15, 3.72it/s] 89%|████████▉ | 329925/371472 [4:05:01<3:12:48, 3.59it/s] 89%|████████▉ | 329926/371472 [4:05:02<3:09:38, 3.65it/s] 89%|████████▉ | 329927/371472 [4:05:02<3:14:53, 3.55it/s] 89%|████████▉ | 329928/371472 [4:05:02<3:06:57, 3.70it/s] 89%|████████▉ | 329929/371472 [4:05:02<3:03:03, 3.78it/s] 89%|████████▉ | 329930/371472 [4:05:03<2:55:53, 3.94it/s] 89%|████████▉ | 329931/371472 [4:05:03<2:58:16, 3.88it/s] 89%|████████▉ | 329932/371472 [4:05:03<2:55:59, 3.93it/s] 89%|████████▉ | 329933/371472 [4:05:03<2:57:54, 3.89it/s] 89%|████████▉ | 329934/371472 [4:05:04<3:03:06, 3.78it/s] 89%|████████▉ | 329935/371472 [4:05:04<3:00:10, 3.84it/s] 89%|████████▉ | 329936/371472 [4:05:04<3:04:14, 3.76it/s] 89%|████████▉ | 329937/371472 [4:05:04<3:02:02, 3.80it/s] 89%|████████▉ | 329938/371472 [4:05:05<3:04:30, 3.75it/s] 89%|████████▉ | 329939/371472 [4:05:05<3:13:32, 3.58it/s] 89%|████████▉ | 329940/371472 [4:05:05<3:23:19, 3.40it/s] {'loss': 2.6659, 'learning_rate': 2.006776702794716e-07, 'epoch': 14.21} + 89%|████████▉ | 329940/371472 [4:05:05<3:23:19, 3.40it/s] 89%|████████▉ | 329941/371472 [4:05:06<3:25:43, 3.36it/s] 89%|████████▉ | 329942/371472 [4:05:06<3:26:51, 3.35it/s] 89%|████████▉ | 329943/371472 [4:05:06<3:31:28, 3.27it/s] 89%|████████▉ | 329944/371472 [4:05:07<3:25:15, 3.37it/s] 89%|████████▉ | 329945/371472 [4:05:07<3:23:46, 3.40it/s] 89%|████████▉ | 329946/371472 [4:05:07<3:13:57, 3.57it/s] 89%|████████▉ | 329947/371472 [4:05:07<3:07:23, 3.69it/s] 89%|████████▉ | 329948/371472 [4:05:08<3:06:15, 3.72it/s] 89%|████████▉ | 329949/371472 [4:05:08<3:12:32, 3.59it/s] 89%|████████▉ | 329950/371472 [4:05:08<3:23:28, 3.40it/s] 89%|████████▉ | 329951/371472 [4:05:09<3:25:23, 3.37it/s] 89%|████████▉ | 329952/371472 [4:05:09<3:33:11, 3.25it/s] 89%|████████▉ | 329953/371472 [4:05:09<3:40:10, 3.14it/s] 89%|████████▉ | 329954/371472 [4:05:09<3:36:11, 3.20it/s] 89%|████████▉ | 329955/371472 [4:05:10<3:25:45, 3.36it/s] 89%|████████▉ | 329956/371472 [4:05:10<3:19:37, 3.47it/s] 89%|████████▉ | 329957/371472 [4:05:10<3:09:17, 3.66it/s] 89%|████████▉ | 329958/371472 [4:05:11<3:09:04, 3.66it/s] 89%|████████▉ | 329959/371472 [4:05:11<3:14:13, 3.56it/s] 89%|████████▉ | 329960/371472 [4:05:11<3:27:09, 3.34it/s] {'loss': 2.454, 'learning_rate': 2.0062918830399275e-07, 'epoch': 14.21} + 89%|████████▉ | 329960/371472 [4:05:11<3:27:09, 3.34it/s] 89%|████████▉ | 329961/371472 [4:05:12<3:32:07, 3.26it/s] 89%|████████▉ | 329962/371472 [4:05:12<3:37:00, 3.19it/s] 89%|████████▉ | 329963/371472 [4:05:12<3:32:28, 3.26it/s] 89%|████████▉ | 329964/371472 [4:05:12<3:28:59, 3.31it/s] 89%|████████▉ | 329965/371472 [4:05:13<3:31:08, 3.28it/s] 89%|████████▉ | 329966/371472 [4:05:13<3:23:12, 3.40it/s] 89%|████████▉ | 329967/371472 [4:05:13<3:13:02, 3.58it/s] 89%|████████▉ | 329968/371472 [4:05:14<3:10:22, 3.63it/s] 89%|████████▉ | 329969/371472 [4:05:14<3:05:11, 3.74it/s] 89%|████████▉ | 329970/371472 [4:05:14<3:13:20, 3.58it/s] 89%|████████▉ | 329971/371472 [4:05:14<3:21:34, 3.43it/s] 89%|████████▉ | 329972/371472 [4:05:15<3:12:47, 3.59it/s] 89%|████████▉ | 329973/371472 [4:05:15<3:18:42, 3.48it/s] 89%|████████▉ | 329974/371472 [4:05:15<3:24:00, 3.39it/s] 89%|████████▉ | 329975/371472 [4:05:16<3:24:41, 3.38it/s] 89%|████████▉ | 329976/371472 [4:05:16<3:25:05, 3.37it/s] 89%|████████▉ | 329977/371472 [4:05:16<3:28:55, 3.31it/s] 89%|████████▉ | 329978/371472 [4:05:17<3:55:20, 2.94it/s] 89%|████████▉ | 329979/371472 [4:05:17<4:01:59, 2.86it/s] 89%|████████▉ | 329980/371472 [4:05:17<4:01:38, 2.86it/s] {'loss': 2.594, 'learning_rate': 2.0058070632851382e-07, 'epoch': 14.21} + 89%|████████▉ | 329980/371472 [4:05:17<4:01:38, 2.86it/s] 89%|████████▉ | 329981/371472 [4:05:18<3:46:45, 3.05it/s] 89%|████████▉ | 329982/371472 [4:05:18<3:41:02, 3.13it/s] 89%|████████▉ | 329983/371472 [4:05:18<3:54:36, 2.95it/s] 89%|████████▉ | 329984/371472 [4:05:19<3:34:09, 3.23it/s] 89%|████████▉ | 329985/371472 [4:05:19<3:29:23, 3.30it/s] 89%|████████▉ | 329986/371472 [4:05:19<3:23:27, 3.40it/s] 89%|████████▉ | 329987/371472 [4:05:19<3:27:40, 3.33it/s] 89%|████████▉ | 329988/371472 [4:05:20<3:18:29, 3.48it/s] 89%|████████▉ | 329989/371472 [4:05:20<3:12:06, 3.60it/s] 89%|████████▉ | 329990/371472 [4:05:20<3:16:14, 3.52it/s] 89%|████████▉ | 329991/371472 [4:05:20<3:11:25, 3.61it/s] 89%|████████▉ | 329992/371472 [4:05:21<3:11:26, 3.61it/s] 89%|████████▉ | 329993/371472 [4:05:21<3:08:20, 3.67it/s] 89%|████████▉ | 329994/371472 [4:05:21<3:10:38, 3.63it/s] 89%|████████▉ | 329995/371472 [4:05:22<3:23:19, 3.40it/s] 89%|████████▉ | 329996/371472 [4:05:22<3:14:02, 3.56it/s] 89%|████████▉ | 329997/371472 [4:05:22<3:19:51, 3.46it/s] 89%|████████▉ | 329998/371472 [4:05:22<3:14:48, 3.55it/s] 89%|████████▉ | 329999/371472 [4:05:23<3:13:11, 3.58it/s] 89%|████████▉ | 330000/371472 [4:05:23<3:10:43, 3.62it/s] {'loss': 2.4966, 'learning_rate': 2.0053222435303497e-07, 'epoch': 14.21} + 89%|████████▉ | 330000/371472 [4:05:23<3:10:43, 3.62it/s] 89%|████████▉ | 330001/371472 [4:05:23<3:03:36, 3.76it/s] 89%|████████▉ | 330002/371472 [4:05:24<3:12:05, 3.60it/s] 89%|████████▉ | 330003/371472 [4:05:24<3:32:24, 3.25it/s] 89%|████████▉ | 330004/371472 [4:05:24<3:24:26, 3.38it/s] 89%|████████▉ | 330005/371472 [4:05:24<3:17:27, 3.50it/s] 89%|████████▉ | 330006/371472 [4:05:25<3:16:59, 3.51it/s] 89%|████████▉ | 330007/371472 [4:05:25<3:15:15, 3.54it/s] 89%|████████▉ | 330008/371472 [4:05:25<3:10:40, 3.62it/s] 89%|████████▉ | 330009/371472 [4:05:26<3:09:14, 3.65it/s] 89%|████████▉ | 330010/371472 [4:05:26<3:02:04, 3.80it/s] 89%|████████▉ | 330011/371472 [4:05:26<3:01:44, 3.80it/s] 89%|████████▉ | 330012/371472 [4:05:26<3:07:45, 3.68it/s] 89%|████████▉ | 330013/371472 [4:05:27<3:08:36, 3.66it/s] 89%|████████▉ | 330014/371472 [4:05:27<3:12:24, 3.59it/s] 89%|████████▉ | 330015/371472 [4:05:27<3:11:46, 3.60it/s] 89%|████████▉ | 330016/371472 [4:05:27<3:11:23, 3.61it/s] 89%|████████▉ | 330017/371472 [4:05:28<3:11:23, 3.61it/s] 89%|████████▉ | 330018/371472 [4:05:28<3:08:26, 3.67it/s] 89%|████████▉ | 330019/371472 [4:05:28<3:16:31, 3.52it/s] 89%|████████▉ | 330020/371472 [4:05:29<3:14:33, 3.55it/s] {'loss': 2.6551, 'learning_rate': 2.0048374237755602e-07, 'epoch': 14.21} + 89%|████████▉ | 330020/371472 [4:05:29<3:14:33, 3.55it/s] 89%|████████▉ | 330021/371472 [4:05:29<3:13:08, 3.58it/s] 89%|████████▉ | 330022/371472 [4:05:29<3:19:24, 3.46it/s] 89%|████████▉ | 330023/371472 [4:05:29<3:28:54, 3.31it/s] 89%|████████▉ | 330024/371472 [4:05:30<3:42:03, 3.11it/s] 89%|████████▉ | 330025/371472 [4:05:30<3:51:54, 2.98it/s] 89%|████████▉ | 330026/371472 [4:05:31<3:45:53, 3.06it/s] 89%|████████▉ | 330027/371472 [4:05:31<3:34:16, 3.22it/s] 89%|████████▉ | 330028/371472 [4:05:31<3:29:18, 3.30it/s] 89%|████████▉ | 330029/371472 [4:05:31<3:20:46, 3.44it/s] 89%|████████▉ | 330030/371472 [4:05:32<3:18:34, 3.48it/s] 89%|████████▉ | 330031/371472 [4:05:32<3:38:32, 3.16it/s] 89%|████████▉ | 330032/371472 [4:05:32<3:23:57, 3.39it/s] 89%|████████▉ | 330033/371472 [4:05:33<3:20:19, 3.45it/s] 89%|████████▉ | 330034/371472 [4:05:33<3:16:35, 3.51it/s] 89%|████████▉ | 330035/371472 [4:05:33<3:06:51, 3.70it/s] 89%|████████▉ | 330036/371472 [4:05:33<3:02:39, 3.78it/s] 89%|████████▉ | 330037/371472 [4:05:34<2:58:34, 3.87it/s] 89%|████████▉ | 330038/371472 [4:05:34<2:59:33, 3.85it/s] 89%|████████▉ | 330039/371472 [4:05:34<3:01:12, 3.81it/s] 89%|████████▉ | 330040/371472 [4:05:34<3:03:09, 3.77it/s] {'loss': 2.4553, 'learning_rate': 2.004352604020772e-07, 'epoch': 14.22} + 89%|████████▉ | 330040/371472 [4:05:34<3:03:09, 3.77it/s] 89%|████████▉ | 330041/371472 [4:05:35<3:00:11, 3.83it/s] 89%|████████▉ | 330042/371472 [4:05:35<3:15:41, 3.53it/s] 89%|████████▉ | 330043/371472 [4:05:35<3:13:01, 3.58it/s] 89%|████████▉ | 330044/371472 [4:05:35<3:11:05, 3.61it/s] 89%|████████▉ | 330045/371472 [4:05:36<3:13:18, 3.57it/s] 89%|████████▉ | 330046/371472 [4:05:36<3:13:49, 3.56it/s] 89%|████████▉ | 330047/371472 [4:05:36<3:12:49, 3.58it/s] 89%|████████▉ | 330048/371472 [4:05:37<3:08:42, 3.66it/s] 89%|████████▉ | 330049/371472 [4:05:37<3:02:37, 3.78it/s] 89%|████████▉ | 330050/371472 [4:05:37<2:57:13, 3.90it/s] 89%|████████▉ | 330051/371472 [4:05:37<3:08:10, 3.67it/s] 89%|████████▉ | 330052/371472 [4:05:38<3:05:26, 3.72it/s] 89%|████████▉ | 330053/371472 [4:05:38<3:05:16, 3.73it/s] 89%|████████▉ | 330054/371472 [4:05:38<3:07:10, 3.69it/s] 89%|████████▉ | 330055/371472 [4:05:38<3:14:53, 3.54it/s] 89%|████████▉ | 330056/371472 [4:05:39<3:12:32, 3.58it/s] 89%|████████▉ | 330057/371472 [4:05:39<3:10:11, 3.63it/s] 89%|████████▉ | 330058/371472 [4:05:39<3:34:26, 3.22it/s] 89%|████████▉ | 330059/371472 [4:05:40<3:31:03, 3.27it/s] 89%|████████▉ | 330060/371472 [4:05:40<3:19:18, 3.46it/s] {'loss': 2.7358, 'learning_rate': 2.0038677842659827e-07, 'epoch': 14.22} + 89%|████████▉ | 330060/371472 [4:05:40<3:19:18, 3.46it/s] 89%|████████▉ | 330061/371472 [4:05:40<3:15:22, 3.53it/s] 89%|████████▉ | 330062/371472 [4:05:40<3:07:33, 3.68it/s] 89%|████████▉ | 330063/371472 [4:05:41<3:08:59, 3.65it/s] 89%|████████▉ | 330064/371472 [4:05:41<3:10:40, 3.62it/s] 89%|████████▉ | 330065/371472 [4:05:41<3:09:47, 3.64it/s] 89%|████████▉ | 330066/371472 [4:05:42<3:09:27, 3.64it/s] 89%|████████▉ | 330067/371472 [4:05:42<3:13:00, 3.58it/s] 89%|████████▉ | 330068/371472 [4:05:42<3:11:30, 3.60it/s] 89%|████████▉ | 330069/371472 [4:05:42<3:23:19, 3.39it/s] 89%|████████▉ | 330070/371472 [4:05:43<3:20:02, 3.45it/s] 89%|████████▉ | 330071/371472 [4:05:43<3:11:51, 3.60it/s] 89%|████████▉ | 330072/371472 [4:05:43<3:15:30, 3.53it/s] 89%|████████▉ | 330073/371472 [4:05:44<3:12:34, 3.58it/s] 89%|████████▉ | 330074/371472 [4:05:44<3:03:05, 3.77it/s] 89%|████████▉ | 330075/371472 [4:05:44<3:03:16, 3.76it/s] 89%|████████▉ | 330076/371472 [4:05:44<3:04:40, 3.74it/s] 89%|████████▉ | 330077/371472 [4:05:45<3:15:17, 3.53it/s] 89%|████████▉ | 330078/371472 [4:05:45<3:32:48, 3.24it/s] 89%|████████▉ | 330079/371472 [4:05:45<3:23:05, 3.40it/s] 89%|████████▉ | 330080/371472 [4:05:46<3:15:52, 3.52it/s] {'loss': 2.7593, 'learning_rate': 2.003382964511194e-07, 'epoch': 14.22} + 89%|████████▉ | 330080/371472 [4:05:46<3:15:52, 3.52it/s] 89%|████████▉ | 330081/371472 [4:05:46<3:09:40, 3.64it/s] 89%|████████▉ | 330082/371472 [4:05:46<3:07:00, 3.69it/s] 89%|████████▉ | 330083/371472 [4:05:46<3:07:58, 3.67it/s] 89%|████████▉ | 330084/371472 [4:05:47<3:05:01, 3.73it/s] 89%|████████▉ | 330085/371472 [4:05:47<3:22:01, 3.41it/s] 89%|████████▉ | 330086/371472 [4:05:47<3:12:03, 3.59it/s] 89%|████████▉ | 330087/371472 [4:05:47<3:05:06, 3.73it/s] 89%|████████▉ | 330088/371472 [4:05:48<3:05:29, 3.72it/s] 89%|████████▉ | 330089/371472 [4:05:48<3:24:36, 3.37it/s] 89%|████████▉ | 330090/371472 [4:05:48<3:18:02, 3.48it/s] 89%|████████▉ | 330091/371472 [4:05:49<3:14:38, 3.54it/s] 89%|████████▉ | 330092/371472 [4:05:49<3:12:32, 3.58it/s] 89%|████████▉ | 330093/371472 [4:05:49<3:04:48, 3.73it/s] 89%|████████▉ | 330094/371472 [4:05:49<3:07:53, 3.67it/s] 89%|████████▉ | 330095/371472 [4:05:50<3:02:35, 3.78it/s] 89%|████████▉ | 330096/371472 [4:05:50<2:59:56, 3.83it/s] 89%|████████▉ | 330097/371472 [4:05:50<3:02:41, 3.77it/s] 89%|████████▉ | 330098/371472 [4:05:50<3:08:57, 3.65it/s] 89%|████████▉ | 330099/371472 [4:05:51<3:08:22, 3.66it/s] 89%|████████▉ | 330100/371472 [4:05:51<3:11:11, 3.61it/s] {'loss': 2.6432, 'learning_rate': 2.0028981447564046e-07, 'epoch': 14.22} + 89%|████████▉ | 330100/371472 [4:05:51<3:11:11, 3.61it/s] 89%|████████▉ | 330101/371472 [4:05:51<3:05:23, 3.72it/s] 89%|████████▉ | 330102/371472 [4:05:52<3:05:56, 3.71it/s] 89%|████████▉ | 330103/371472 [4:05:52<3:08:41, 3.65it/s] 89%|████████▉ | 330104/371472 [4:05:52<3:09:08, 3.65it/s] 89%|████████▉ | 330105/371472 [4:05:52<3:23:42, 3.38it/s] 89%|████████▉ | 330106/371472 [4:05:53<3:18:18, 3.48it/s] 89%|████████▉ | 330107/371472 [4:05:53<3:29:31, 3.29it/s] 89%|████████▉ | 330108/371472 [4:05:53<3:36:27, 3.18it/s] 89%|████████▉ | 330109/371472 [4:05:54<3:51:47, 2.97it/s] 89%|████████▉ | 330110/371472 [4:05:54<3:50:18, 2.99it/s] 89%|████████▉ | 330111/371472 [4:05:54<3:44:55, 3.06it/s] 89%|████████▉ | 330112/371472 [4:05:55<3:31:03, 3.27it/s] 89%|████████▉ | 330113/371472 [4:05:55<3:21:59, 3.41it/s] 89%|████████▉ | 330114/371472 [4:05:55<3:12:18, 3.58it/s] 89%|████████▉ | 330115/371472 [4:05:55<3:07:46, 3.67it/s] 89%|████████▉ | 330116/371472 [4:05:56<3:17:03, 3.50it/s] 89%|████████▉ | 330117/371472 [4:05:56<3:14:30, 3.54it/s] 89%|████████▉ | 330118/371472 [4:05:56<3:12:38, 3.58it/s] 89%|████████▉ | 330119/371472 [4:05:57<3:22:04, 3.41it/s] 89%|████████▉ | 330120/371472 [4:05:57<3:11:12, 3.60it/s] {'loss': 2.5327, 'learning_rate': 2.0024133250016164e-07, 'epoch': 14.22} + 89%|████████▉ | 330120/371472 [4:05:57<3:11:12, 3.60it/s] 89%|████████▉ | 330121/371472 [4:05:57<3:08:14, 3.66it/s] 89%|████████▉ | 330122/371472 [4:05:57<3:08:31, 3.66it/s] 89%|████████▉ | 330123/371472 [4:05:58<3:12:01, 3.59it/s] 89%|████████▉ | 330124/371472 [4:05:58<3:05:03, 3.72it/s] 89%|████████▉ | 330125/371472 [4:05:58<3:07:55, 3.67it/s] 89%|████████▉ | 330126/371472 [4:05:59<3:11:55, 3.59it/s] 89%|████████▉ | 330127/371472 [4:05:59<3:09:05, 3.64it/s] 89%|████████▉ | 330128/371472 [4:05:59<3:13:48, 3.56it/s] 89%|████████▉ | 330129/371472 [4:05:59<3:12:49, 3.57it/s] 89%|████████▉ | 330130/371472 [4:06:00<3:08:57, 3.65it/s] 89%|████████▉ | 330131/371472 [4:06:00<3:10:58, 3.61it/s] 89%|████████▉ | 330132/371472 [4:06:00<3:14:37, 3.54it/s] 89%|████████▉ | 330133/371472 [4:06:00<3:10:00, 3.63it/s] 89%|████████▉ | 330134/371472 [4:06:01<3:54:07, 2.94it/s] 89%|████████▉ | 330135/371472 [4:06:01<3:44:08, 3.07it/s] 89%|████████▉ | 330136/371472 [4:06:01<3:24:40, 3.37it/s] 89%|████████▉ | 330137/371472 [4:06:02<3:13:23, 3.56it/s] 89%|████████▉ | 330138/371472 [4:06:02<3:17:48, 3.48it/s] 89%|████████▉ | 330139/371472 [4:06:02<3:31:59, 3.25it/s] 89%|████████▉ | 330140/371472 [4:06:03<3:38:23, 3.15it/s] {'loss': 2.4931, 'learning_rate': 2.0019285052468266e-07, 'epoch': 14.22} + 89%|████████▉ | 330140/371472 [4:06:03<3:38:23, 3.15it/s] 89%|████████▉ | 330141/371472 [4:06:03<3:31:56, 3.25it/s] 89%|████████▉ | 330142/371472 [4:06:03<3:18:55, 3.46it/s] 89%|████████▉ | 330143/371472 [4:06:04<3:14:55, 3.53it/s] 89%|████████▉ | 330144/371472 [4:06:04<3:14:51, 3.53it/s] 89%|████████▉ | 330145/371472 [4:06:04<3:15:05, 3.53it/s] 89%|████████▉ | 330146/371472 [4:06:04<3:11:23, 3.60it/s] 89%|████████▉ | 330147/371472 [4:06:05<3:13:16, 3.56it/s] 89%|████████▉ | 330148/371472 [4:06:05<3:27:25, 3.32it/s] 89%|████████▉ | 330149/371472 [4:06:05<3:17:40, 3.48it/s] 89%|████████▉ | 330150/371472 [4:06:06<3:15:36, 3.52it/s] 89%|████████▉ | 330151/371472 [4:06:06<3:17:54, 3.48it/s] 89%|████████▉ | 330152/371472 [4:06:06<3:13:28, 3.56it/s] 89%|████████▉ | 330153/371472 [4:06:06<3:07:22, 3.68it/s] 89%|████████▉ | 330154/371472 [4:06:07<3:18:36, 3.47it/s] 89%|████████▉ | 330155/371472 [4:06:07<3:30:34, 3.27it/s] 89%|████████▉ | 330156/371472 [4:06:07<3:32:04, 3.25it/s] 89%|████████▉ | 330157/371472 [4:06:08<3:21:46, 3.41it/s] 89%|████████▉ | 330158/371472 [4:06:08<3:25:38, 3.35it/s] 89%|████████▉ | 330159/371472 [4:06:08<3:22:25, 3.40it/s] 89%|████████▉ | 330160/371472 [4:06:08<3:23:59, 3.38it/s] {'loss': 2.5199, 'learning_rate': 2.0014436854920383e-07, 'epoch': 14.22} + 89%|████████▉ | 330160/371472 [4:06:08<3:23:59, 3.38it/s] 89%|████████▉ | 330161/371472 [4:06:09<3:24:07, 3.37it/s] 89%|████████▉ | 330162/371472 [4:06:09<3:26:28, 3.33it/s] 89%|████████▉ | 330163/371472 [4:06:09<3:21:21, 3.42it/s] 89%|████████▉ | 330164/371472 [4:06:10<3:11:37, 3.59it/s] 89%|████████▉ | 330165/371472 [4:06:10<3:09:31, 3.63it/s] 89%|████████▉ | 330166/371472 [4:06:10<3:08:03, 3.66it/s] 89%|████████▉ | 330167/371472 [4:06:10<3:05:46, 3.71it/s] 89%|████████▉ | 330168/371472 [4:06:11<3:08:41, 3.65it/s] 89%|████████▉ | 330169/371472 [4:06:11<3:07:06, 3.68it/s] 89%|████████▉ | 330170/371472 [4:06:11<3:15:15, 3.53it/s] 89%|████████▉ | 330171/371472 [4:06:12<3:10:17, 3.62it/s] 89%|████████▉ | 330172/371472 [4:06:12<3:15:13, 3.53it/s] 89%|████████▉ | 330173/371472 [4:06:12<3:08:00, 3.66it/s] 89%|████████▉ | 330174/371472 [4:06:12<3:12:06, 3.58it/s] 89%|████████▉ | 330175/371472 [4:06:13<3:06:33, 3.69it/s] 89%|████████▉ | 330176/371472 [4:06:13<3:11:36, 3.59it/s] 89%|████████▉ | 330177/371472 [4:06:13<3:10:18, 3.62it/s] 89%|████████▉ | 330178/371472 [4:06:13<3:05:02, 3.72it/s] 89%|████████▉ | 330179/371472 [4:06:14<3:08:40, 3.65it/s] 89%|████████▉ | 330180/371472 [4:06:14<3:10:50, 3.61it/s] {'loss': 2.6703, 'learning_rate': 2.0009588657372493e-07, 'epoch': 14.22} + 89%|████████▉ | 330180/371472 [4:06:14<3:10:50, 3.61it/s] 89%|████████▉ | 330181/371472 [4:06:14<3:10:26, 3.61it/s] 89%|████████▉ | 330182/371472 [4:06:15<3:07:39, 3.67it/s] 89%|████████▉ | 330183/371472 [4:06:15<3:06:03, 3.70it/s] 89%|████████▉ | 330184/371472 [4:06:15<3:01:10, 3.80it/s] 89%|████████▉ | 330185/371472 [4:06:15<3:09:57, 3.62it/s] 89%|████████▉ | 330186/371472 [4:06:16<3:25:27, 3.35it/s] 89%|████████▉ | 330187/371472 [4:06:16<3:25:13, 3.35it/s] 89%|████████▉ | 330188/371472 [4:06:16<3:30:40, 3.27it/s] 89%|████████▉ | 330189/371472 [4:06:17<3:36:01, 3.19it/s] 89%|████████▉ | 330190/371472 [4:06:17<3:35:01, 3.20it/s] 89%|████████▉ | 330191/371472 [4:06:17<3:25:24, 3.35it/s] 89%|████████▉ | 330192/371472 [4:06:18<3:20:52, 3.43it/s] 89%|████████▉ | 330193/371472 [4:06:18<3:34:48, 3.20it/s] 89%|████████▉ | 330194/371472 [4:06:18<3:21:47, 3.41it/s] 89%|████████▉ | 330195/371472 [4:06:18<3:17:20, 3.49it/s] 89%|████████▉ | 330196/371472 [4:06:19<3:28:07, 3.31it/s] 89%|████████▉ | 330197/371472 [4:06:19<3:16:40, 3.50it/s] 89%|████████▉ | 330198/371472 [4:06:19<3:17:14, 3.49it/s] 89%|████████▉ | 330199/371472 [4:06:20<3:29:33, 3.28it/s] 89%|████████▉ | 330200/371472 [4:06:20<3:33:33, 3.22it/s] {'loss': 2.5269, 'learning_rate': 2.0004740459824603e-07, 'epoch': 14.22} + 89%|████████▉ | 330200/371472 [4:06:20<3:33:33, 3.22it/s] 89%|████████▉ | 330201/371472 [4:06:20<3:23:47, 3.38it/s] 89%|████████▉ | 330202/371472 [4:06:20<3:18:21, 3.47it/s] 89%|████████▉ | 330203/371472 [4:06:21<3:11:59, 3.58it/s] 89%|████████▉ | 330204/371472 [4:06:21<3:28:06, 3.30it/s] 89%|████████▉ | 330205/371472 [4:06:21<3:25:39, 3.34it/s] 89%|████████▉ | 330206/371472 [4:06:22<3:17:25, 3.48it/s] 89%|████████▉ | 330207/371472 [4:06:22<3:11:49, 3.59it/s] 89%|████████▉ | 330208/371472 [4:06:22<3:28:53, 3.29it/s] 89%|████████▉ | 330209/371472 [4:06:23<3:27:35, 3.31it/s] 89%|████████▉ | 330210/371472 [4:06:23<3:21:03, 3.42it/s] 89%|████████▉ | 330211/371472 [4:06:23<3:09:55, 3.62it/s] 89%|████████▉ | 330212/371472 [4:06:23<3:14:28, 3.54it/s] 89%|████████▉ | 330213/371472 [4:06:24<3:14:46, 3.53it/s] 89%|████████▉ | 330214/371472 [4:06:24<3:14:05, 3.54it/s] 89%|████████▉ | 330215/371472 [4:06:24<3:16:54, 3.49it/s] 89%|████████▉ | 330216/371472 [4:06:24<3:12:29, 3.57it/s] 89%|████████▉ | 330217/371472 [4:06:25<3:21:30, 3.41it/s] 89%|████████▉ | 330218/371472 [4:06:25<3:15:30, 3.52it/s] 89%|████████▉ | 330219/371472 [4:06:25<3:13:05, 3.56it/s] 89%|████████▉ | 330220/371472 [4:06:26<3:09:07, 3.64it/s] {'loss': 2.6215, 'learning_rate': 1.999989226227671e-07, 'epoch': 14.22} + 89%|████████▉ | 330220/371472 [4:06:26<3:09:07, 3.64it/s] 89%|████████▉ | 330221/371472 [4:06:26<3:10:25, 3.61it/s] 89%|████████▉ | 330222/371472 [4:06:26<3:11:32, 3.59it/s] 89%|████████▉ | 330223/371472 [4:06:26<3:08:19, 3.65it/s] 89%|████████▉ | 330224/371472 [4:06:27<3:08:32, 3.65it/s] 89%|████████▉ | 330225/371472 [4:06:27<3:06:57, 3.68it/s] 89%|████████▉ | 330226/371472 [4:06:27<3:06:37, 3.68it/s] 89%|████████▉ | 330227/371472 [4:06:28<3:05:39, 3.70it/s] 89%|████████▉ | 330228/371472 [4:06:28<3:03:46, 3.74it/s] 89%|████████▉ | 330229/371472 [4:06:28<3:00:59, 3.80it/s] 89%|████████▉ | 330230/371472 [4:06:28<3:04:54, 3.72it/s] 89%|████████▉ | 330231/371472 [4:06:29<3:21:28, 3.41it/s] 89%|████████▉ | 330232/371472 [4:06:29<3:49:50, 2.99it/s] 89%|████████▉ | 330233/371472 [4:06:29<3:45:40, 3.05it/s] 89%|████████▉ | 330234/371472 [4:06:30<3:30:36, 3.26it/s] 89%|████████▉ | 330235/371472 [4:06:30<3:16:32, 3.50it/s] 89%|████████▉ | 330236/371472 [4:06:30<3:20:19, 3.43it/s] 89%|████████▉ | 330237/371472 [4:06:30<3:15:04, 3.52it/s] 89%|████████▉ | 330238/371472 [4:06:31<3:12:12, 3.58it/s] 89%|████████▉ | 330239/371472 [4:06:31<3:04:36, 3.72it/s] 89%|████████▉ | 330240/371472 [4:06:31<3:17:27, 3.48it/s] {'loss': 2.5763, 'learning_rate': 1.9995044064728828e-07, 'epoch': 14.22} + 89%|████████▉ | 330240/371472 [4:06:31<3:17:27, 3.48it/s] 89%|████████▉ | 330241/371472 [4:06:32<3:14:33, 3.53it/s] 89%|████████▉ | 330242/371472 [4:06:32<3:08:09, 3.65it/s] 89%|████████▉ | 330243/371472 [4:06:32<3:14:17, 3.54it/s] 89%|████████▉ | 330244/371472 [4:06:32<3:14:31, 3.53it/s] 89%|████████▉ | 330245/371472 [4:06:33<3:22:43, 3.39it/s] 89%|████████▉ | 330246/371472 [4:06:33<3:33:51, 3.21it/s] 89%|████████▉ | 330247/371472 [4:06:33<3:37:04, 3.17it/s] 89%|████████▉ | 330248/371472 [4:06:34<3:25:07, 3.35it/s] 89%|████████▉ | 330249/371472 [4:06:34<3:34:41, 3.20it/s] 89%|████████▉ | 330250/371472 [4:06:34<3:25:36, 3.34it/s] 89%|████████▉ | 330251/371472 [4:06:35<3:16:51, 3.49it/s] 89%|████████▉ | 330252/371472 [4:06:35<3:20:57, 3.42it/s] 89%|████████▉ | 330253/371472 [4:06:35<3:28:18, 3.30it/s] 89%|████████▉ | 330254/371472 [4:06:35<3:24:16, 3.36it/s] 89%|████████▉ | 330255/371472 [4:06:36<3:18:43, 3.46it/s] 89%|████████▉ | 330256/371472 [4:06:36<3:17:20, 3.48it/s] 89%|████████▉ | 330257/371472 [4:06:36<3:17:23, 3.48it/s] 89%|████████▉ | 330258/371472 [4:06:37<3:15:29, 3.51it/s] 89%|████████▉ | 330259/371472 [4:06:37<3:04:37, 3.72it/s] 89%|████████▉ | 330260/371472 [4:06:37<3:03:30, 3.74it/s] {'loss': 2.7285, 'learning_rate': 1.9990195867180932e-07, 'epoch': 14.22} + 89%|████████▉ | 330260/371472 [4:06:37<3:03:30, 3.74it/s] 89%|████████▉ | 330261/371472 [4:06:37<3:24:45, 3.35it/s] 89%|████████▉ | 330262/371472 [4:06:38<3:31:01, 3.25it/s] 89%|████████▉ | 330263/371472 [4:06:38<3:20:45, 3.42it/s] 89%|████████▉ | 330264/371472 [4:06:38<3:12:47, 3.56it/s] 89%|████████▉ | 330265/371472 [4:06:39<3:36:04, 3.18it/s] 89%|████████▉ | 330266/371472 [4:06:39<3:37:00, 3.16it/s] 89%|████████▉ | 330267/371472 [4:06:39<3:23:12, 3.38it/s] 89%|████████▉ | 330268/371472 [4:06:40<3:16:59, 3.49it/s] 89%|████████▉ | 330269/371472 [4:06:40<3:18:09, 3.47it/s] 89%|████████▉ | 330270/371472 [4:06:40<3:13:52, 3.54it/s] 89%|████████▉ | 330271/371472 [4:06:40<3:11:49, 3.58it/s] 89%|████████▉ | 330272/371472 [4:06:41<3:15:51, 3.51it/s] 89%|████████▉ | 330273/371472 [4:06:41<3:11:21, 3.59it/s] 89%|████████▉ | 330274/371472 [4:06:41<3:09:05, 3.63it/s] 89%|████████▉ | 330275/371472 [4:06:41<3:07:55, 3.65it/s] 89%|████████▉ | 330276/371472 [4:06:42<3:02:40, 3.76it/s] 89%|████████▉ | 330277/371472 [4:06:42<2:55:16, 3.92it/s] 89%|████████▉ | 330278/371472 [4:06:42<3:06:28, 3.68it/s] 89%|████████▉ | 330279/371472 [4:06:43<3:05:30, 3.70it/s] 89%|████████▉ | 330280/371472 [4:06:43<3:05:20, 3.70it/s] {'loss': 2.523, 'learning_rate': 1.9985347669633048e-07, 'epoch': 14.23} + 89%|████████▉ | 330280/371472 [4:06:43<3:05:20, 3.70it/s] 89%|████████▉ | 330281/371472 [4:06:43<3:00:18, 3.81it/s] 89%|████████▉ | 330282/371472 [4:06:43<3:08:18, 3.65it/s] 89%|████████▉ | 330283/371472 [4:06:44<3:10:03, 3.61it/s] 89%|████████▉ | 330284/371472 [4:06:44<3:13:28, 3.55it/s] 89%|████████▉ | 330285/371472 [4:06:44<3:06:49, 3.67it/s] 89%|████████▉ | 330286/371472 [4:06:44<3:05:39, 3.70it/s] 89%|████████▉ | 330287/371472 [4:06:45<3:00:50, 3.80it/s] 89%|████████▉ | 330288/371472 [4:06:45<3:14:32, 3.53it/s] 89%|████████▉ | 330289/371472 [4:06:45<3:25:40, 3.34it/s] 89%|████████▉ | 330290/371472 [4:06:46<3:23:18, 3.38it/s] 89%|████████▉ | 330291/371472 [4:06:46<3:19:03, 3.45it/s] 89%|████████▉ | 330292/371472 [4:06:46<3:29:04, 3.28it/s] 89%|████████▉ | 330293/371472 [4:06:47<3:24:58, 3.35it/s] 89%|████████▉ | 330294/371472 [4:06:47<3:23:58, 3.36it/s] 89%|████████▉ | 330295/371472 [4:06:47<3:31:29, 3.24it/s] 89%|████████▉ | 330296/371472 [4:06:47<3:37:13, 3.16it/s] 89%|████████▉ | 330297/371472 [4:06:48<3:23:41, 3.37it/s] 89%|████████▉ | 330298/371472 [4:06:48<3:24:51, 3.35it/s] 89%|████████▉ | 330299/371472 [4:06:48<3:30:42, 3.26it/s] 89%|████████▉ | 330300/371472 [4:06:49<3:30:50, 3.25it/s] {'loss': 2.5758, 'learning_rate': 1.9980499472085155e-07, 'epoch': 14.23} + 89%|████████▉ | 330300/371472 [4:06:49<3:30:50, 3.25it/s] 89%|████████▉ | 330301/371472 [4:06:49<3:32:56, 3.22it/s] 89%|████████▉ | 330302/371472 [4:06:49<3:22:40, 3.39it/s] 89%|████████▉ | 330303/371472 [4:06:50<3:18:43, 3.45it/s] 89%|████████▉ | 330304/371472 [4:06:50<3:14:47, 3.52it/s] 89%|████████▉ | 330305/371472 [4:06:50<3:15:23, 3.51it/s] 89%|████████▉ | 330306/371472 [4:06:50<3:10:32, 3.60it/s] 89%|████████▉ | 330307/371472 [4:06:51<3:21:39, 3.40it/s] 89%|████████▉ | 330308/371472 [4:06:51<3:22:32, 3.39it/s] 89%|████████▉ | 330309/371472 [4:06:51<3:13:09, 3.55it/s] 89%|████████▉ | 330310/371472 [4:06:51<3:04:59, 3.71it/s] 89%|████████▉ | 330311/371472 [4:06:52<3:11:39, 3.58it/s] 89%|████████▉ | 330312/371472 [4:06:52<3:08:26, 3.64it/s] 89%|████████▉ | 330313/371472 [4:06:52<3:09:42, 3.62it/s] 89%|████████▉ | 330314/371472 [4:06:53<3:03:11, 3.74it/s] 89%|████████▉ | 330315/371472 [4:06:53<2:59:50, 3.81it/s] 89%|████████▉ | 330316/371472 [4:06:53<3:03:39, 3.73it/s] 89%|████████▉ | 330317/371472 [4:06:53<3:28:46, 3.29it/s] 89%|████████▉ | 330318/371472 [4:06:54<3:21:24, 3.41it/s] 89%|████████▉ | 330319/371472 [4:06:54<3:17:56, 3.47it/s] 89%|████████▉ | 330320/371472 [4:06:54<3:16:13, 3.50it/s] {'loss': 2.4564, 'learning_rate': 1.997565127453727e-07, 'epoch': 14.23} + 89%|████████▉ | 330320/371472 [4:06:54<3:16:13, 3.50it/s] 89%|████████▉ | 330321/371472 [4:06:55<3:16:26, 3.49it/s] 89%|████████▉ | 330322/371472 [4:06:55<3:36:19, 3.17it/s] 89%|████████▉ | 330323/371472 [4:06:55<3:26:17, 3.32it/s] 89%|████████▉ | 330324/371472 [4:06:56<3:21:32, 3.40it/s] 89%|████████▉ | 330325/371472 [4:06:56<3:26:33, 3.32it/s] 89%|████████▉ | 330326/371472 [4:06:56<3:25:32, 3.34it/s] 89%|████████▉ | 330327/371472 [4:06:56<3:17:07, 3.48it/s] 89%|████████▉ | 330328/371472 [4:06:57<3:15:48, 3.50it/s] 89%|████████▉ | 330329/371472 [4:06:57<3:25:58, 3.33it/s] 89%|████████▉ | 330330/371472 [4:06:57<3:33:28, 3.21it/s] 89%|████████▉ | 330331/371472 [4:06:58<3:23:56, 3.36it/s] 89%|████████▉ | 330332/371472 [4:06:58<3:27:21, 3.31it/s] 89%|████████▉ | 330333/371472 [4:06:58<3:20:51, 3.41it/s] 89%|████████▉ | 330334/371472 [4:06:58<3:16:50, 3.48it/s] 89%|████████▉ | 330335/371472 [4:06:59<3:07:46, 3.65it/s] 89%|████████▉ | 330336/371472 [4:06:59<3:07:27, 3.66it/s] 89%|████████▉ | 330337/371472 [4:06:59<3:01:35, 3.78it/s] 89%|████████▉ | 330338/371472 [4:06:59<2:57:28, 3.86it/s] 89%|████████▉ | 330339/371472 [4:07:00<2:57:18, 3.87it/s] 89%|████████▉ | 330340/371472 [4:07:00<3:00:00, 3.81it/s] {'loss': 2.5022, 'learning_rate': 1.9970803076989374e-07, 'epoch': 14.23} + 89%|████████▉ | 330340/371472 [4:07:00<3:00:00, 3.81it/s] 89%|████████▉ | 330341/371472 [4:07:00<3:00:21, 3.80it/s] 89%|████████▉ | 330342/371472 [4:07:01<3:11:52, 3.57it/s] 89%|████████▉ | 330343/371472 [4:07:01<3:07:31, 3.66it/s] 89%|████████▉ | 330344/371472 [4:07:01<3:04:40, 3.71it/s] 89%|█████��██▉ | 330345/371472 [4:07:01<3:03:25, 3.74it/s] 89%|████████▉ | 330346/371472 [4:07:02<3:04:20, 3.72it/s] 89%|████████▉ | 330347/371472 [4:07:02<3:23:27, 3.37it/s] 89%|████████▉ | 330348/371472 [4:07:02<3:39:41, 3.12it/s] 89%|████████▉ | 330349/371472 [4:07:03<3:28:43, 3.28it/s] 89%|████████▉ | 330350/371472 [4:07:03<3:17:56, 3.46it/s] 89%|████████▉ | 330351/371472 [4:07:03<3:12:16, 3.56it/s] 89%|████████▉ | 330352/371472 [4:07:03<3:15:50, 3.50it/s] 89%|████████▉ | 330353/371472 [4:07:04<3:13:16, 3.55it/s] 89%|████████▉ | 330354/371472 [4:07:04<3:22:23, 3.39it/s] 89%|████████▉ | 330355/371472 [4:07:04<3:17:59, 3.46it/s] 89%|████████▉ | 330356/371472 [4:07:05<3:20:40, 3.41it/s] 89%|████████▉ | 330357/371472 [4:07:05<3:11:54, 3.57it/s] 89%|████████▉ | 330358/371472 [4:07:05<3:20:03, 3.43it/s] 89%|████████▉ | 330359/371472 [4:07:05<3:18:35, 3.45it/s] 89%|████████▉ | 330360/371472 [4:07:06<3:10:26, 3.60it/s] {'loss': 2.6425, 'learning_rate': 1.9965954879441495e-07, 'epoch': 14.23} + 89%|████████▉ | 330360/371472 [4:07:06<3:10:26, 3.60it/s] 89%|████████▉ | 330361/371472 [4:07:06<3:11:47, 3.57it/s] 89%|████████▉ | 330362/371472 [4:07:06<3:14:14, 3.53it/s] 89%|████████▉ | 330363/371472 [4:07:07<3:20:06, 3.42it/s] 89%|████████▉ | 330364/371472 [4:07:07<3:08:36, 3.63it/s] 89%|████████▉ | 330365/371472 [4:07:07<3:10:21, 3.60it/s] 89%|████████▉ | 330366/371472 [4:07:07<3:19:50, 3.43it/s] 89%|████████▉ | 330367/371472 [4:07:08<3:15:10, 3.51it/s] 89%|████████▉ | 330368/371472 [4:07:08<3:25:26, 3.33it/s] 89%|████████▉ | 330369/371472 [4:07:08<3:22:04, 3.39it/s] 89%|████████▉ | 330370/371472 [4:07:09<3:16:18, 3.49it/s] 89%|████████▉ | 330371/371472 [4:07:09<3:11:15, 3.58it/s] 89%|████████▉ | 330372/371472 [4:07:09<3:09:26, 3.62it/s] 89%|████████▉ | 330373/371472 [4:07:09<3:04:26, 3.71it/s] 89%|████████▉ | 330374/371472 [4:07:10<3:13:05, 3.55it/s] 89%|████████▉ | 330375/371472 [4:07:10<3:13:14, 3.54it/s] 89%|████████▉ | 330376/371472 [4:07:10<3:15:23, 3.51it/s] 89%|████████▉ | 330377/371472 [4:07:11<3:19:02, 3.44it/s] 89%|████████▉ | 330378/371472 [4:07:11<3:27:49, 3.30it/s] 89%|████████▉ | 330379/371472 [4:07:11<3:25:37, 3.33it/s] 89%|████████▉ | 330380/371472 [4:07:12<3:25:19, 3.34it/s] {'loss': 2.5238, 'learning_rate': 1.9961106681893597e-07, 'epoch': 14.23} + 89%|████████▉ | 330380/371472 [4:07:12<3:25:19, 3.34it/s] 89%|████████▉ | 330381/371472 [4:07:12<3:17:41, 3.46it/s] 89%|████████▉ | 330382/371472 [4:07:12<3:21:59, 3.39it/s] 89%|████████▉ | 330383/371472 [4:07:12<3:12:37, 3.56it/s] 89%|████████▉ | 330384/371472 [4:07:13<3:10:17, 3.60it/s] 89%|████████▉ | 330385/371472 [4:07:13<3:01:10, 3.78it/s] 89%|████████▉ | 330386/371472 [4:07:13<3:00:01, 3.80it/s] 89%|████████▉ | 330387/371472 [4:07:13<2:58:13, 3.84it/s] 89%|████████▉ | 330388/371472 [4:07:14<3:03:41, 3.73it/s] 89%|████████▉ | 330389/371472 [4:07:14<3:03:11, 3.74it/s] 89%|████████▉ | 330390/371472 [4:07:14<3:02:42, 3.75it/s] 89%|████████▉ | 330391/371472 [4:07:14<2:59:07, 3.82it/s] 89%|████████▉ | 330392/371472 [4:07:15<3:08:08, 3.64it/s] 89%|████████▉ | 330393/371472 [4:07:15<3:09:24, 3.61it/s] 89%|████████▉ | 330394/371472 [4:07:15<3:08:37, 3.63it/s] 89%|████████▉ | 330395/371472 [4:07:16<3:07:24, 3.65it/s] 89%|████████▉ | 330396/371472 [4:07:16<3:10:51, 3.59it/s] 89%|████████▉ | 330397/371472 [4:07:16<3:04:49, 3.70it/s] 89%|████████▉ | 330398/371472 [4:07:16<3:07:10, 3.66it/s] 89%|████████▉ | 330399/371472 [4:07:17<3:06:57, 3.66it/s] 89%|████████▉ | 330400/371472 [4:07:17<3:08:26, 3.63it/s] {'loss': 2.6752, 'learning_rate': 1.9956258484345712e-07, 'epoch': 14.23} + 89%|████████▉ | 330400/371472 [4:07:17<3:08:26, 3.63it/s] 89%|████████▉ | 330401/371472 [4:07:17<3:04:41, 3.71it/s] 89%|████████▉ | 330402/371472 [4:07:17<3:05:14, 3.70it/s] 89%|████████▉ | 330403/371472 [4:07:18<3:04:18, 3.71it/s] 89%|████████▉ | 330404/371472 [4:07:18<3:11:38, 3.57it/s] 89%|████████▉ | 330405/371472 [4:07:18<3:05:35, 3.69it/s] 89%|████████▉ | 330406/371472 [4:07:19<3:09:12, 3.62it/s] 89%|████████▉ | 330407/371472 [4:07:19<3:12:19, 3.56it/s] 89%|████████▉ | 330408/371472 [4:07:19<3:09:27, 3.61it/s] 89%|████████▉ | 330409/371472 [4:07:19<3:14:06, 3.53it/s] 89%|████████▉ | 330410/371472 [4:07:20<3:09:43, 3.61it/s] 89%|████████▉ | 330411/371472 [4:07:20<3:09:04, 3.62it/s] 89%|████████▉ | 330412/371472 [4:07:20<3:09:57, 3.60it/s] 89%|████████▉ | 330413/371472 [4:07:21<3:10:47, 3.59it/s] 89%|████████▉ | 330414/371472 [4:07:21<3:21:51, 3.39it/s] 89%|████████▉ | 330415/371472 [4:07:21<3:13:20, 3.54it/s] 89%|████████▉ | 330416/371472 [4:07:21<3:18:03, 3.45it/s] 89%|████████▉ | 330417/371472 [4:07:22<3:11:28, 3.57it/s] 89%|████████▉ | 330418/371472 [4:07:22<3:10:50, 3.59it/s] 89%|████████▉ | 330419/371472 [4:07:22<3:04:32, 3.71it/s] 89%|████████▉ | 330420/371472 [4:07:22<3:04:54, 3.70it/s] {'loss': 2.4676, 'learning_rate': 1.995141028679782e-07, 'epoch': 14.23} + 89%|████████▉ | 330420/371472 [4:07:22<3:04:54, 3.70it/s] 89%|████████▉ | 330421/371472 [4:07:23<3:05:37, 3.69it/s] 89%|████████▉ | 330422/371472 [4:07:23<3:27:37, 3.30it/s] 89%|████████▉ | 330423/371472 [4:07:23<3:19:11, 3.43it/s] 89%|████████▉ | 330424/371472 [4:07:24<3:18:43, 3.44it/s] 89%|████████▉ | 330425/371472 [4:07:24<3:16:07, 3.49it/s] 89%|████████▉ | 330426/371472 [4:07:24<3:08:15, 3.63it/s] 89%|████████▉ | 330427/371472 [4:07:25<3:19:06, 3.44it/s] 89%|████████▉ | 330428/371472 [4:07:25<3:16:20, 3.48it/s] 89%|████████▉ | 330429/371472 [4:07:25<3:17:06, 3.47it/s] 89%|████████▉ | 330430/371472 [4:07:25<3:10:10, 3.60it/s] 89%|████████▉ | 330431/371472 [4:07:26<3:07:27, 3.65it/s] 89%|████████▉ | 330432/371472 [4:07:26<3:09:23, 3.61it/s] 89%|████████▉ | 330433/371472 [4:07:26<3:08:20, 3.63it/s] 89%|████████▉ | 330434/371472 [4:07:26<3:05:55, 3.68it/s] 89%|████████▉ | 330435/371472 [4:07:27<3:13:50, 3.53it/s] 89%|████████▉ | 330436/371472 [4:07:27<3:09:31, 3.61it/s] 89%|████████▉ | 330437/371472 [4:07:27<3:08:33, 3.63it/s] 89%|████████▉ | 330438/371472 [4:07:28<3:08:59, 3.62it/s] 89%|████████▉ | 330439/371472 [4:07:28<3:07:17, 3.65it/s] 89%|████████▉ | 330440/371472 [4:07:28<3:10:07, 3.60it/s] {'loss': 2.504, 'learning_rate': 1.9946562089249926e-07, 'epoch': 14.23} + 89%|████████▉ | 330440/371472 [4:07:28<3:10:07, 3.60it/s] 89%|████████▉ | 330441/371472 [4:07:28<3:20:16, 3.41it/s] 89%|████████▉ | 330442/371472 [4:07:29<3:16:52, 3.47it/s] 89%|████████▉ | 330443/371472 [4:07:29<3:14:59, 3.51it/s] 89%|████████▉ | 330444/371472 [4:07:29<3:09:29, 3.61it/s] 89%|████████▉ | 330445/371472 [4:07:30<3:14:24, 3.52it/s] 89%|████████▉ | 330446/371472 [4:07:30<3:11:50, 3.56it/s] 89%|████████▉ | 330447/371472 [4:07:30<3:09:08, 3.61it/s] 89%|████████▉ | 330448/371472 [4:07:30<3:03:13, 3.73it/s] 89%|████████▉ | 330449/371472 [4:07:31<3:01:41, 3.76it/s] 89%|████████▉ | 330450/371472 [4:07:31<3:02:24, 3.75it/s] 89%|████████▉ | 330451/371472 [4:07:31<3:13:22, 3.54it/s] 89%|████████▉ | 330452/371472 [4:07:31<3:11:06, 3.58it/s] 89%|████████▉ | 330453/371472 [4:07:32<3:20:11, 3.41it/s] 89%|████████▉ | 330454/371472 [4:07:32<3:13:11, 3.54it/s] 89%|████████▉ | 330455/371472 [4:07:32<3:12:36, 3.55it/s] 89%|████████▉ | 330456/371472 [4:07:33<3:20:47, 3.40it/s] 89%|████████▉ | 330457/371472 [4:07:33<3:17:37, 3.46it/s] 89%|████████▉ | 330458/371472 [4:07:33<3:08:48, 3.62it/s] 89%|████████▉ | 330459/371472 [4:07:33<3:03:00, 3.73it/s] 89%|████████▉ | 330460/371472 [4:07:34<3:02:19, 3.75it/s] {'loss': 2.664, 'learning_rate': 1.9941713891702038e-07, 'epoch': 14.23} + 89%|████████▉ | 330460/371472 [4:07:34<3:02:19, 3.75it/s] 89%|████████▉ | 330461/371472 [4:07:34<3:08:09, 3.63it/s] 89%|████████▉ | 330462/371472 [4:07:34<3:06:12, 3.67it/s] 89%|████████▉ | 330463/371472 [4:07:35<3:01:40, 3.76it/s] 89%|████████▉ | 330464/371472 [4:07:35<3:02:42, 3.74it/s] 89%|████████▉ | 330465/371472 [4:07:35<3:02:06, 3.75it/s] 89%|████████▉ | 330466/371472 [4:07:35<3:15:49, 3.49it/s] 89%|████████▉ | 330467/371472 [4:07:36<3:34:26, 3.19it/s] 89%|████████▉ | 330468/371472 [4:07:36<3:35:26, 3.17it/s] 89%|████████▉ | 330469/371472 [4:07:36<3:32:45, 3.21it/s] 89%|████████▉ | 330470/371472 [4:07:37<3:31:25, 3.23it/s] 89%|████████▉ | 330471/371472 [4:07:37<3:25:42, 3.32it/s] 89%|████████▉ | 330472/371472 [4:07:37<3:16:51, 3.47it/s] 89%|████████▉ | 330473/371472 [4:07:38<3:21:31, 3.39it/s] 89%|████████▉ | 330474/371472 [4:07:38<3:15:38, 3.49it/s] 89%|████████▉ | 330475/371472 [4:07:38<3:34:02, 3.19it/s] 89%|████████▉ | 330476/371472 [4:07:38<3:23:08, 3.36it/s] 89%|████████▉ | 330477/371472 [4:07:39<3:11:33, 3.57it/s] 89%|████████▉ | 330478/371472 [4:07:39<3:11:28, 3.57it/s] 89%|████████▉ | 330479/371472 [4:07:39<3:14:30, 3.51it/s] 89%|████████▉ | 330480/371472 [4:07:40<3:11:30, 3.57it/s] {'loss': 2.59, 'learning_rate': 1.9936865694154146e-07, 'epoch': 14.23} + 89%|████████▉ | 330480/371472 [4:07:40<3:11:30, 3.57it/s] 89%|████████▉ | 330481/371472 [4:07:40<3:14:22, 3.51it/s] 89%|████████▉ | 330482/371472 [4:07:40<3:12:37, 3.55it/s] 89%|████████▉ | 330483/371472 [4:07:40<3:07:50, 3.64it/s] 89%|████████▉ | 330484/371472 [4:07:41<3:02:50, 3.74it/s] 89%|████████▉ | 330485/371472 [4:07:41<3:23:12, 3.36it/s] 89%|████████▉ | 330486/371472 [4:07:41<3:16:13, 3.48it/s] 89%|████████▉ | 330487/371472 [4:07:41<3:08:38, 3.62it/s] 89%|████████▉ | 330488/371472 [4:07:42<3:15:29, 3.49it/s] 89%|████████▉ | 330489/371472 [4:07:42<3:11:09, 3.57it/s] 89%|████████▉ | 330490/371472 [4:07:42<3:10:41, 3.58it/s] 89%|████████▉ | 330491/371472 [4:07:43<3:21:58, 3.38it/s] 89%|████████▉ | 330492/371472 [4:07:43<3:13:30, 3.53it/s] 89%|████████▉ | 330493/371472 [4:07:43<3:06:33, 3.66it/s] 89%|████████▉ | 330494/371472 [4:07:43<3:02:34, 3.74it/s] 89%|████████▉ | 330495/371472 [4:07:44<3:06:49, 3.66it/s] 89%|████████▉ | 330496/371472 [4:07:44<3:05:32, 3.68it/s] 89%|████████▉ | 330497/371472 [4:07:44<3:04:03, 3.71it/s] 89%|████████▉ | 330498/371472 [4:07:45<3:10:54, 3.58it/s] 89%|████████▉ | 330499/371472 [4:07:45<3:12:42, 3.54it/s] 89%|████████▉ | 330500/371472 [4:07:45<3:11:58, 3.56it/s] {'loss': 2.5515, 'learning_rate': 1.9932017496606263e-07, 'epoch': 14.24} + 89%|████████▉ | 330500/371472 [4:07:45<3:11:58, 3.56it/s] 89%|████████▉ | 330501/371472 [4:07:45<3:03:17, 3.73it/s] 89%|████████▉ | 330502/371472 [4:07:46<3:01:06, 3.77it/s] 89%|████████▉ | 330503/371472 [4:07:46<3:04:18, 3.70it/s] 89%|████████▉ | 330504/371472 [4:07:46<3:04:59, 3.69it/s] 89%|████████▉ | 330505/371472 [4:07:46<3:05:17, 3.68it/s] 89%|████████▉ | 330506/371472 [4:07:47<3:02:52, 3.73it/s] 89%|████████▉ | 330507/371472 [4:07:47<3:01:59, 3.75it/s] 89%|████████▉ | 330508/371472 [4:07:47<3:15:44, 3.49it/s] 89%|████████▉ | 330509/371472 [4:07:48<3:11:48, 3.56it/s] 89%|████████▉ | 330510/371472 [4:07:48<3:23:52, 3.35it/s] 89%|████████▉ | 330511/371472 [4:07:48<3:30:40, 3.24it/s] 89%|████████▉ | 330512/371472 [4:07:48<3:20:28, 3.41it/s] 89%|████████▉ | 330513/371472 [4:07:49<3:36:44, 3.15it/s] 89%|████████▉ | 330514/371472 [4:07:49<3:44:50, 3.04it/s] 89%|████████▉ | 330515/371472 [4:07:50<3:35:26, 3.17it/s] 89%|████████▉ | 330516/371472 [4:07:50<3:31:18, 3.23it/s] 89%|████████▉ | 330517/371472 [4:07:50<3:22:55, 3.36it/s] 89%|████████▉ | 330518/371472 [4:07:50<3:11:51, 3.56it/s] 89%|████████▉ | 330519/371472 [4:07:51<3:14:57, 3.50it/s] 89%|██████��█▉ | 330520/371472 [4:07:51<3:12:11, 3.55it/s] {'loss': 2.48, 'learning_rate': 1.9927169299058368e-07, 'epoch': 14.24} + 89%|████████▉ | 330520/371472 [4:07:51<3:12:11, 3.55it/s] 89%|████████▉ | 330521/371472 [4:07:51<3:08:47, 3.62it/s] 89%|████████▉ | 330522/371472 [4:07:51<3:05:52, 3.67it/s] 89%|████████▉ | 330523/371472 [4:07:52<2:59:56, 3.79it/s] 89%|████████▉ | 330524/371472 [4:07:52<2:56:57, 3.86it/s] 89%|████████▉ | 330525/371472 [4:07:52<2:57:10, 3.85it/s] 89%|████████▉ | 330526/371472 [4:07:52<2:58:42, 3.82it/s] 89%|████████▉ | 330527/371472 [4:07:53<3:03:29, 3.72it/s] 89%|████████▉ | 330528/371472 [4:07:53<3:10:40, 3.58it/s] 89%|████████▉ | 330529/371472 [4:07:54<5:01:37, 2.26it/s] 89%|████████▉ | 330530/371472 [4:07:54<4:22:15, 2.60it/s] 89%|████████▉ | 330531/371472 [4:07:54<4:01:58, 2.82it/s] 89%|████████▉ | 330532/371472 [4:07:55<3:43:27, 3.05it/s] 89%|████████▉ | 330533/371472 [4:07:55<3:40:58, 3.09it/s] 89%|████████▉ | 330534/371472 [4:07:55<3:36:15, 3.16it/s] 89%|████████▉ | 330535/371472 [4:07:56<3:25:58, 3.31it/s] 89%|████████▉ | 330536/371472 [4:07:56<3:13:34, 3.52it/s] 89%|████████▉ | 330537/371472 [4:07:56<3:17:08, 3.46it/s] 89%|████████▉ | 330538/371472 [4:07:56<3:15:46, 3.48it/s] 89%|████████▉ | 330539/371472 [4:07:57<3:11:53, 3.56it/s] 89%|████████▉ | 330540/371472 [4:07:57<3:13:26, 3.53it/s] {'loss': 2.4761, 'learning_rate': 1.9922321101510483e-07, 'epoch': 14.24} + 89%|████████▉ | 330540/371472 [4:07:57<3:13:26, 3.53it/s] 89%|████████▉ | 330541/371472 [4:07:57<3:13:20, 3.53it/s] 89%|████████▉ | 330542/371472 [4:07:57<3:06:36, 3.66it/s] 89%|████████▉ | 330543/371472 [4:07:58<3:18:50, 3.43it/s] 89%|████████▉ | 330544/371472 [4:07:58<3:23:04, 3.36it/s] 89%|████████▉ | 330545/371472 [4:07:58<3:22:47, 3.36it/s] 89%|████████▉ | 330546/371472 [4:07:59<3:23:49, 3.35it/s] 89%|████████▉ | 330547/371472 [4:07:59<3:17:15, 3.46it/s] 89%|████████▉ | 330548/371472 [4:07:59<3:10:49, 3.57it/s] 89%|████████▉ | 330549/371472 [4:08:00<3:26:20, 3.31it/s] 89%|████████▉ | 330550/371472 [4:08:00<3:24:35, 3.33it/s] 89%|████████▉ | 330551/371472 [4:08:00<3:23:44, 3.35it/s] 89%|████████▉ | 330552/371472 [4:08:00<3:32:01, 3.22it/s] 89%|████████▉ | 330553/371472 [4:08:01<3:36:34, 3.15it/s] 89%|████████▉ | 330554/371472 [4:08:01<3:19:34, 3.42it/s] 89%|████████▉ | 330555/371472 [4:08:01<3:14:31, 3.51it/s] 89%|████████▉ | 330556/371472 [4:08:02<3:08:15, 3.62it/s] 89%|████████▉ | 330557/371472 [4:08:02<4:04:31, 2.79it/s] 89%|████████▉ | 330558/371472 [4:08:02<3:40:02, 3.10it/s] 89%|████████▉ | 330559/371472 [4:08:03<3:23:19, 3.35it/s] 89%|████████▉ | 330560/371472 [4:08:03<3:12:27, 3.54it/s] {'loss': 2.6843, 'learning_rate': 1.991747290396259e-07, 'epoch': 14.24} + 89%|████████▉ | 330560/371472 [4:08:03<3:12:27, 3.54it/s] 89%|████████▉ | 330561/371472 [4:08:03<3:07:27, 3.64it/s] 89%|████████▉ | 330562/371472 [4:08:03<3:16:36, 3.47it/s] 89%|████████▉ | 330563/371472 [4:08:04<3:16:16, 3.47it/s] 89%|████████▉ | 330564/371472 [4:08:04<3:28:16, 3.27it/s] 89%|████████▉ | 330565/371472 [4:08:04<3:25:39, 3.32it/s] 89%|████████▉ | 330566/371472 [4:08:05<3:14:45, 3.50it/s] 89%|████████▉ | 330567/371472 [4:08:05<3:09:31, 3.60it/s] 89%|████████▉ | 330568/371472 [4:08:05<3:01:24, 3.76it/s] 89%|████████▉ | 330569/371472 [4:08:05<2:59:30, 3.80it/s] 89%|████████▉ | 330570/371472 [4:08:06<2:56:13, 3.87it/s] 89%|████████▉ | 330571/371472 [4:08:06<2:56:33, 3.86it/s] 89%|████████▉ | 330572/371472 [4:08:06<3:05:40, 3.67it/s] 89%|████████▉ | 330573/371472 [4:08:06<3:07:04, 3.64it/s] 89%|████████▉ | 330574/371472 [4:08:07<3:10:41, 3.57it/s] 89%|████████▉ | 330575/371472 [4:08:07<3:12:15, 3.55it/s] 89%|████████▉ | 330576/371472 [4:08:07<3:07:27, 3.64it/s] 89%|████████▉ | 330577/371472 [4:08:08<3:20:59, 3.39it/s] 89%|████████▉ | 330578/371472 [4:08:08<3:13:41, 3.52it/s] 89%|████████▉ | 330579/371472 [4:08:08<3:09:42, 3.59it/s] 89%|████████▉ | 330580/371472 [4:08:08<3:07:12, 3.64it/s] {'loss': 2.5665, 'learning_rate': 1.9912624706414705e-07, 'epoch': 14.24} + 89%|████████▉ | 330580/371472 [4:08:08<3:07:12, 3.64it/s] 89%|████████▉ | 330581/371472 [4:08:09<3:13:00, 3.53it/s] 89%|████████▉ | 330582/371472 [4:08:09<3:09:23, 3.60it/s] 89%|████████▉ | 330583/371472 [4:08:09<3:08:53, 3.61it/s] 89%|████████▉ | 330584/371472 [4:08:10<3:01:50, 3.75it/s] 89%|████████▉ | 330585/371472 [4:08:10<3:16:57, 3.46it/s] 89%|████████▉ | 330586/371472 [4:08:10<3:10:39, 3.57it/s] 89%|████████▉ | 330587/371472 [4:08:10<3:05:54, 3.67it/s] 89%|████████▉ | 330588/371472 [4:08:11<3:06:28, 3.65it/s] 89%|████████▉ | 330589/371472 [4:08:11<3:25:09, 3.32it/s] 89%|████████▉ | 330590/371472 [4:08:11<3:34:17, 3.18it/s] 89%|████████▉ | 330591/371472 [4:08:12<3:32:38, 3.20it/s] 89%|████████▉ | 330592/371472 [4:08:12<3:20:19, 3.40it/s] 89%|████████▉ | 330593/371472 [4:08:12<3:16:00, 3.48it/s] 89%|████████▉ | 330594/371472 [4:08:12<3:13:49, 3.52it/s] 89%|████████▉ | 330595/371472 [4:08:13<3:17:42, 3.45it/s] 89%|████████▉ | 330596/371472 [4:08:13<3:47:43, 2.99it/s] 89%|████████▉ | 330597/371472 [4:08:13<3:35:05, 3.17it/s] 89%|████████▉ | 330598/371472 [4:08:14<3:26:51, 3.29it/s] 89%|████████▉ | 330599/371472 [4:08:14<3:18:30, 3.43it/s] 89%|████████▉ | 330600/371472 [4:08:14<3:08:00, 3.62it/s] {'loss': 2.5285, 'learning_rate': 1.9907776508866812e-07, 'epoch': 14.24} + 89%|████████▉ | 330600/371472 [4:08:14<3:08:00, 3.62it/s] 89%|████████▉ | 330601/371472 [4:08:15<3:02:53, 3.72it/s] 89%|████████▉ | 330602/371472 [4:08:15<3:02:28, 3.73it/s] 89%|████████▉ | 330603/371472 [4:08:15<3:08:53, 3.61it/s] 89%|████████▉ | 330604/371472 [4:08:15<3:10:11, 3.58it/s] 89%|████████▉ | 330605/371472 [4:08:16<3:09:08, 3.60it/s] 89%|████████▉ | 330606/371472 [4:08:16<3:22:12, 3.37it/s] 89%|████████▉ | 330607/371472 [4:08:16<3:24:01, 3.34it/s] 89%|████████▉ | 330608/371472 [4:08:17<3:21:21, 3.38it/s] 89%|████████▉ | 330609/371472 [4:08:17<3:15:28, 3.48it/s] 89%|████████▉ | 330610/371472 [4:08:17<3:08:42, 3.61it/s] 89%|████████▉ | 330611/371472 [4:08:17<3:05:43, 3.67it/s] 89%|████████▉ | 330612/371472 [4:08:18<3:02:39, 3.73it/s] 89%|████████▉ | 330613/371472 [4:08:18<3:00:03, 3.78it/s] 89%|████████▉ | 330614/371472 [4:08:18<3:08:56, 3.60it/s] 89%|████████▉ | 330615/371472 [4:08:18<3:09:57, 3.58it/s] 89%|████████▉ | 330616/371472 [4:08:19<3:09:29, 3.59it/s] 89%|████████▉ | 330617/371472 [4:08:19<3:12:02, 3.55it/s] 89%|████████▉ | 330618/371472 [4:08:19<3:08:44, 3.61it/s] 89%|████████▉ | 330619/371472 [4:08:20<3:03:42, 3.71it/s] 89%|████████▉ | 330620/371472 [4:08:20<3:04:12, 3.70it/s] {'loss': 2.6818, 'learning_rate': 1.9902928311318927e-07, 'epoch': 14.24} + 89%|████████▉ | 330620/371472 [4:08:20<3:04:12, 3.70it/s] 89%|████████▉ | 330621/371472 [4:08:20<3:21:07, 3.39it/s] 89%|████████▉ | 330622/371472 [4:08:21<3:33:12, 3.19it/s] 89%|████████▉ | 330623/371472 [4:08:21<3:19:10, 3.42it/s] 89%|████████▉ | 330624/371472 [4:08:21<3:11:14, 3.56it/s] 89%|████████▉ | 330625/371472 [4:08:21<3:14:36, 3.50it/s] 89%|████████▉ | 330626/371472 [4:08:22<3:07:45, 3.63it/s] 89%|████████▉ | 330627/371472 [4:08:22<3:08:48, 3.61it/s] 89%|████████▉ | 330628/371472 [4:08:22<3:16:47, 3.46it/s] 89%|████████▉ | 330629/371472 [4:08:22<3:08:12, 3.62it/s] 89%|████████▉ | 330630/371472 [4:08:23<3:09:24, 3.59it/s] 89%|████████▉ | 330631/371472 [4:08:23<3:05:50, 3.66it/s] 89%|████████▉ | 330632/371472 [4:08:23<3:00:51, 3.76it/s] 89%|████████▉ | 330633/371472 [4:08:23<3:01:52, 3.74it/s] 89%|████████▉ | 330634/371472 [4:08:24<2:57:31, 3.83it/s] 89%|████████▉ | 330635/371472 [4:08:24<2:56:07, 3.86it/s] 89%|████████▉ | 330636/371472 [4:08:24<3:03:52, 3.70it/s] 89%|████████▉ | 330637/371472 [4:08:25<3:03:50, 3.70it/s] 89%|████████▉ | 330638/371472 [4:08:25<3:06:05, 3.66it/s] 89%|████████▉ | 330639/371472 [4:08:25<3:28:17, 3.27it/s] 89%|████████▉ | 330640/371472 [4:08:25<3:23:39, 3.34it/s] {'loss': 2.6861, 'learning_rate': 1.9898080113771032e-07, 'epoch': 14.24} + 89%|████████▉ | 330640/371472 [4:08:25<3:23:39, 3.34it/s] 89%|████████▉ | 330641/371472 [4:08:26<3:19:07, 3.42it/s] 89%|████████▉ | 330642/371472 [4:08:26<3:23:48, 3.34it/s] 89%|████████▉ | 330643/371472 [4:08:26<3:31:24, 3.22it/s] 89%|████████▉ | 330644/371472 [4:08:27<3:20:36, 3.39it/s] 89%|████████▉ | 330645/371472 [4:08:27<3:12:03, 3.54it/s] 89%|████████▉ | 330646/371472 [4:08:27<3:11:22, 3.56it/s] 89%|████████▉ | 330647/371472 [4:08:28<3:18:36, 3.43it/s] 89%|████████▉ | 330648/371472 [4:08:28<3:33:01, 3.19it/s] 89%|████████▉ | 330649/371472 [4:08:28<3:28:47, 3.26it/s] 89%|████████▉ | 330650/371472 [4:08:28<3:17:55, 3.44it/s] 89%|████████▉ | 330651/371472 [4:08:29<3:26:16, 3.30it/s] 89%|████████▉ | 330652/371472 [4:08:29<3:15:21, 3.48it/s] 89%|████████▉ | 330653/371472 [4:08:29<3:19:44, 3.41it/s] 89%|████████▉ | 330654/371472 [4:08:30<3:12:59, 3.52it/s] 89%|████████▉ | 330655/371472 [4:08:30<3:13:51, 3.51it/s] 89%|████████▉ | 330656/371472 [4:08:30<3:11:46, 3.55it/s] 89%|████████▉ | 330657/371472 [4:08:30<3:22:17, 3.36it/s] 89%|████████▉ | 330658/371472 [4:08:31<3:22:17, 3.36it/s] 89%|████████▉ | 330659/371472 [4:08:31<3:44:00, 3.04it/s] 89%|████████▉ | 330660/371472 [4:08:31<3:40:31, 3.08it/s] {'loss': 2.6274, 'learning_rate': 1.9893231916223147e-07, 'epoch': 14.24} + 89%|████████▉ | 330660/371472 [4:08:31<3:40:31, 3.08it/s] 89%|████████▉ | 330661/371472 [4:08:32<3:44:56, 3.02it/s] 89%|████████▉ | 330662/371472 [4:08:32<3:28:07, 3.27it/s] 89%|████████▉ | 330663/371472 [4:08:32<3:32:46, 3.20it/s] 89%|████████▉ | 330664/371472 [4:08:33<3:41:19, 3.07it/s] 89%|████████▉ | 330665/371472 [4:08:33<3:33:45, 3.18it/s] 89%|████████▉ | 330666/371472 [4:08:33<3:26:52, 3.29it/s] 89%|████████▉ | 330667/371472 [4:08:34<3:19:52, 3.40it/s] 89%|████████▉ | 330668/371472 [4:08:34<3:17:48, 3.44it/s] 89%|████████▉ | 330669/371472 [4:08:34<3:12:13, 3.54it/s] 89%|████████▉ | 330670/371472 [4:08:34<3:22:15, 3.36it/s] 89%|████████▉ | 330671/371472 [4:08:35<3:13:05, 3.52it/s] 89%|████████▉ | 330672/371472 [4:08:35<3:06:44, 3.64it/s] 89%|████████▉ | 330673/371472 [4:08:35<3:02:41, 3.72it/s] 89%|████████▉ | 330674/371472 [4:08:36<3:21:44, 3.37it/s] 89%|████████▉ | 330675/371472 [4:08:36<3:43:26, 3.04it/s] 89%|████████▉ | 330676/371472 [4:08:36<3:39:21, 3.10it/s] 89%|████████▉ | 330677/371472 [4:08:37<3:51:26, 2.94it/s] 89%|████████▉ | 330678/371472 [4:08:37<3:34:06, 3.18it/s] 89%|████████▉ | 330679/371472 [4:08:37<3:23:12, 3.35it/s] 89%|████████▉ | 330680/371472 [4:08:37<3:17:40, 3.44it/s] {'loss': 2.6109, 'learning_rate': 1.9888383718675254e-07, 'epoch': 14.24} + 89%|████████▉ | 330680/371472 [4:08:37<3:17:40, 3.44it/s] 89%|████████▉ | 330681/371472 [4:08:38<3:18:19, 3.43it/s] 89%|████████▉ | 330682/371472 [4:08:38<3:18:41, 3.42it/s] 89%|████████▉ | 330683/371472 [4:08:38<3:20:42, 3.39it/s] 89%|████████▉ | 330684/371472 [4:08:39<3:20:22, 3.39it/s] 89%|████████▉ | 330685/371472 [4:08:39<3:41:14, 3.07it/s] 89%|████████▉ | 330686/371472 [4:08:39<3:48:26, 2.98it/s] 89%|████████▉ | 330687/371472 [4:08:40<3:36:19, 3.14it/s] 89%|████████▉ | 330688/371472 [4:08:40<3:28:03, 3.27it/s] 89%|████████▉ | 330689/371472 [4:08:40<3:18:46, 3.42it/s] 89%|████████▉ | 330690/371472 [4:08:41<3:14:01, 3.50it/s] 89%|████████▉ | 330691/371472 [4:08:41<3:07:25, 3.63it/s] 89%|████████▉ | 330692/371472 [4:08:41<3:13:44, 3.51it/s] 89%|████████▉ | 330693/371472 [4:08:41<3:13:43, 3.51it/s] 89%|████████▉ | 330694/371472 [4:08:42<3:06:22, 3.65it/s] 89%|████████▉ | 330695/371472 [4:08:42<3:02:06, 3.73it/s] 89%|████████▉ | 330696/371472 [4:08:42<3:08:26, 3.61it/s] 89%|████████▉ | 330697/371472 [4:08:42<3:05:57, 3.65it/s] 89%|████████▉ | 330698/371472 [4:08:43<3:06:52, 3.64it/s] 89%|████████▉ | 330699/371472 [4:08:43<3:09:03, 3.59it/s] 89%|████████▉ | 330700/371472 [4:08:43<3:17:52, 3.43it/s] {'loss': 2.5062, 'learning_rate': 1.988353552112737e-07, 'epoch': 14.24} + 89%|████████▉ | 330700/371472 [4:08:43<3:17:52, 3.43it/s] 89%|████████▉ | 330701/371472 [4:08:44<3:11:43, 3.54it/s] 89%|████████▉ | 330702/371472 [4:08:44<3:05:44, 3.66it/s] 89%|████████▉ | 330703/371472 [4:08:44<3:16:03, 3.47it/s] 89%|████████▉ | 330704/371472 [4:08:44<3:14:16, 3.50it/s] 89%|████████▉ | 330705/371472 [4:08:45<3:10:29, 3.57it/s] 89%|████████▉ | 330706/371472 [4:08:45<3:17:26, 3.44it/s] 89%|████████▉ | 330707/371472 [4:08:45<3:13:38, 3.51it/s] 89%|████████▉ | 330708/371472 [4:08:46<3:20:03, 3.40it/s] 89%|████████▉ | 330709/371472 [4:08:46<3:19:36, 3.40it/s] 89%|████████▉ | 330710/371472 [4:08:46<3:15:04, 3.48it/s] 89%|████████▉ | 330711/371472 [4:08:46<3:23:14, 3.34it/s] 89%|████████▉ | 330712/371472 [4:08:47<3:12:14, 3.53it/s] 89%|████████▉ | 330713/371472 [4:08:47<3:34:45, 3.16it/s] 89%|████████▉ | 330714/371472 [4:08:47<3:23:14, 3.34it/s] 89%|████████▉ | 330715/371472 [4:08:48<3:14:02, 3.50it/s] 89%|████████▉ | 330716/371472 [4:08:48<3:07:29, 3.62it/s] 89%|████████▉ | 330717/371472 [4:08:48<3:07:42, 3.62it/s] 89%|████████▉ | 330718/371472 [4:08:48<3:13:32, 3.51it/s] 89%|████████▉ | 330719/371472 [4:08:49<3:20:00, 3.40it/s] 89%|████████▉ | 330720/371472 [4:08:49<3:16:57, 3.45it/s] {'loss': 2.5264, 'learning_rate': 1.9878687323579474e-07, 'epoch': 14.24} + 89%|████████▉ | 330720/371472 [4:08:49<3:16:57, 3.45it/s] 89%|████████▉ | 330721/371472 [4:08:49<3:14:09, 3.50it/s] 89%|████████▉ | 330722/371472 [4:08:50<3:19:23, 3.41it/s] 89%|████████▉ | 330723/371472 [4:08:50<3:21:34, 3.37it/s] 89%|████████▉ | 330724/371472 [4:08:50<3:18:09, 3.43it/s] 89%|████████▉ | 330725/371472 [4:08:51<3:11:32, 3.55it/s] 89%|████████▉ | 330726/371472 [4:08:51<3:40:02, 3.09it/s] 89%|████████▉ | 330727/371472 [4:08:51<3:33:04, 3.19it/s] 89%|████████▉ | 330728/371472 [4:08:52<3:27:42, 3.27it/s] 89%|████████▉ | 330729/371472 [4:08:52<3:23:48, 3.33it/s] 89%|████████▉ | 330730/371472 [4:08:52<3:13:20, 3.51it/s] 89%|████████▉ | 330731/371472 [4:08:52<3:13:24, 3.51it/s] 89%|████████▉ | 330732/371472 [4:08:53<3:04:20, 3.68it/s] 89%|████████▉ | 330733/371472 [4:08:53<3:06:16, 3.65it/s] 89%|████████▉ | 330734/371472 [4:08:53<3:09:09, 3.59it/s] 89%|████████▉ | 330735/371472 [4:08:53<3:02:58, 3.71it/s] 89%|████████▉ | 330736/371472 [4:08:54<3:06:34, 3.64it/s] 89%|████████▉ | 330737/371472 [4:08:54<3:18:15, 3.42it/s] 89%|████████▉ | 330738/371472 [4:08:54<3:10:27, 3.56it/s] 89%|████████▉ | 330739/371472 [4:08:55<3:04:38, 3.68it/s] 89%|████████▉ | 330740/371472 [4:08:55<3:15:10, 3.48it/s] {'loss': 2.7055, 'learning_rate': 1.9873839126031591e-07, 'epoch': 14.25} + 89%|████████▉ | 330740/371472 [4:08:55<3:15:10, 3.48it/s] 89%|████████▉ | 330741/371472 [4:08:55<3:12:47, 3.52it/s] 89%|████████▉ | 330742/371472 [4:08:55<3:14:37, 3.49it/s] 89%|████████▉ | 330743/371472 [4:08:56<3:16:48, 3.45it/s] 89%|████████▉ | 330744/371472 [4:08:56<3:19:28, 3.40it/s] 89%|████████▉ | 330745/371472 [4:08:56<3:31:52, 3.20it/s] 89%|████████▉ | 330746/371472 [4:08:57<3:29:43, 3.24it/s] 89%|████████▉ | 330747/371472 [4:08:57<3:20:52, 3.38it/s] 89%|████████▉ | 330748/371472 [4:08:57<3:16:22, 3.46it/s] 89%|████████▉ | 330749/371472 [4:08:58<3:28:44, 3.25it/s] 89%|████████▉ | 330750/371472 [4:08:58<3:20:23, 3.39it/s] 89%|████████▉ | 330751/371472 [4:08:58<3:15:11, 3.48it/s] 89%|████████▉ | 330752/371472 [4:08:58<3:16:18, 3.46it/s] 89%|████████▉ | 330753/371472 [4:08:59<3:20:03, 3.39it/s] 89%|████████▉ | 330754/371472 [4:08:59<3:24:09, 3.32it/s] 89%|████████▉ | 330755/371472 [4:08:59<3:16:49, 3.45it/s] 89%|████████▉ | 330756/371472 [4:09:00<3:16:23, 3.46it/s] 89%|████████▉ | 330757/371472 [4:09:00<3:16:59, 3.44it/s] 89%|████████▉ | 330758/371472 [4:09:00<3:14:15, 3.49it/s] 89%|████████▉ | 330759/371472 [4:09:00<3:20:25, 3.39it/s] 89%|████████▉ | 330760/371472 [4:09:01<3:14:35, 3.49it/s] {'loss': 2.5887, 'learning_rate': 1.9868990928483696e-07, 'epoch': 14.25} + 89%|████████▉ | 330760/371472 [4:09:01<3:14:35, 3.49it/s] 89%|████████▉ | 330761/371472 [4:09:01<3:20:26, 3.39it/s] 89%|████████▉ | 330762/371472 [4:09:01<3:10:24, 3.56it/s] 89%|████████▉ | 330763/371472 [4:09:02<3:12:15, 3.53it/s] 89%|████████▉ | 330764/371472 [4:09:02<3:12:07, 3.53it/s] 89%|████████▉ | 330765/371472 [4:09:02<3:08:22, 3.60it/s] 89%|████████▉ | 330766/371472 [4:09:02<3:03:28, 3.70it/s] 89%|████████▉ | 330767/371472 [4:09:03<3:04:31, 3.68it/s] 89%|████████▉ | 330768/371472 [4:09:03<3:05:28, 3.66it/s] 89%|████████▉ | 330769/371472 [4:09:03<3:01:26, 3.74it/s] 89%|████████▉ | 330770/371472 [4:09:03<3:00:14, 3.76it/s] 89%|████████▉ | 330771/371472 [4:09:04<2:58:31, 3.80it/s] 89%|████████▉ | 330772/371472 [4:09:04<2:58:34, 3.80it/s] 89%|████████▉ | 330773/371472 [4:09:04<3:02:58, 3.71it/s] 89%|████████▉ | 330774/371472 [4:09:05<3:08:21, 3.60it/s] 89%|████████▉ | 330775/371472 [4:09:05<3:16:46, 3.45it/s] 89%|████████▉ | 330776/371472 [4:09:05<3:23:53, 3.33it/s] 89%|████████▉ | 330777/371472 [4:09:05<3:23:09, 3.34it/s] 89%|████████▉ | 330778/371472 [4:09:06<3:28:40, 3.25it/s] 89%|████████▉ | 330779/371472 [4:09:06<3:25:29, 3.30it/s] 89%|████████▉ | 330780/371472 [4:09:06<3:16:29, 3.45it/s] {'loss': 2.5158, 'learning_rate': 1.986414273093581e-07, 'epoch': 14.25} + 89%|████████▉ | 330780/371472 [4:09:06<3:16:29, 3.45it/s] 89%|████████▉ | 330781/371472 [4:09:07<3:30:09, 3.23it/s] 89%|████████▉ | 330782/371472 [4:09:07<3:34:38, 3.16it/s] 89%|████████▉ | 330783/371472 [4:09:07<3:32:36, 3.19it/s] 89%|████████▉ | 330784/371472 [4:09:08<3:23:05, 3.34it/s] 89%|████████▉ | 330785/371472 [4:09:08<3:14:57, 3.48it/s] 89%|████████▉ | 330786/371472 [4:09:08<3:19:39, 3.40it/s] 89%|████████▉ | 330787/371472 [4:09:08<3:20:23, 3.38it/s] 89%|████████▉ | 330788/371472 [4:09:09<3:14:56, 3.48it/s] 89%|████████▉ | 330789/371472 [4:09:09<3:09:57, 3.57it/s] 89%|████████▉ | 330790/371472 [4:09:09<3:10:57, 3.55it/s] 89%|████████▉ | 330791/371472 [4:09:10<3:08:45, 3.59it/s] 89%|████████▉ | 330792/371472 [4:09:10<3:16:11, 3.46it/s] 89%|████████▉ | 330793/371472 [4:09:10<3:10:39, 3.56it/s] 89%|████████▉ | 330794/371472 [4:09:10<3:13:44, 3.50it/s] 89%|████████▉ | 330795/371472 [4:09:11<3:15:18, 3.47it/s] 89%|████████▉ | 330796/371472 [4:09:11<3:10:31, 3.56it/s] 89%|████████▉ | 330797/371472 [4:09:11<3:16:30, 3.45it/s] 89%|████████▉ | 330798/371472 [4:09:12<3:05:08, 3.66it/s] 89%|████████▉ | 330799/371472 [4:09:12<3:00:34, 3.75it/s] 89%|████████▉ | 330800/371472 [4:09:12<2:58:35, 3.80it/s] {'loss': 2.6444, 'learning_rate': 1.9859294533387918e-07, 'epoch': 14.25} + 89%|████████▉ | 330800/371472 [4:09:12<2:58:35, 3.80it/s] 89%|████████▉ | 330801/371472 [4:09:12<3:12:49, 3.52it/s] 89%|████████▉ | 330802/371472 [4:09:13<3:13:23, 3.50it/s] 89%|████████▉ | 330803/371472 [4:09:13<3:03:33, 3.69it/s] 89%|████████▉ | 330804/371472 [4:09:13<2:59:57, 3.77it/s] 89%|████████▉ | 330805/371472 [4:09:13<2:56:06, 3.85it/s] 89%|████████▉ | 330806/371472 [4:09:14<3:21:46, 3.36it/s] 89%|████████▉ | 330807/371472 [4:09:14<3:14:10, 3.49it/s] 89%|████████▉ | 330808/371472 [4:09:14<3:08:26, 3.60it/s] 89%|████████▉ | 330809/371472 [4:09:15<3:03:36, 3.69it/s] 89%|████████▉ | 330810/371472 [4:09:15<3:19:17, 3.40it/s] 89%|████████▉ | 330811/371472 [4:09:15<3:15:48, 3.46it/s] 89%|████████▉ | 330812/371472 [4:09:15<3:10:25, 3.56it/s] 89%|████████▉ | 330813/371472 [4:09:16<3:17:28, 3.43it/s] 89%|████████▉ | 330814/371472 [4:09:16<3:34:56, 3.15it/s] 89%|████████▉ | 330815/371472 [4:09:16<3:25:50, 3.29it/s] 89%|████████▉ | 330816/371472 [4:09:17<3:15:18, 3.47it/s] 89%|████████▉ | 330817/371472 [4:09:17<3:31:48, 3.20it/s] 89%|████████▉ | 330818/371472 [4:09:17<3:22:57, 3.34it/s] 89%|████████▉ | 330819/371472 [4:09:18<3:16:31, 3.45it/s] 89%|████████▉ | 330820/371472 [4:09:18<3:22:57, 3.34it/s] {'loss': 2.5475, 'learning_rate': 1.9854446335840033e-07, 'epoch': 14.25} + 89%|████████▉ | 330820/371472 [4:09:18<3:22:57, 3.34it/s] 89%|████████▉ | 330821/371472 [4:09:18<3:21:33, 3.36it/s] 89%|████████▉ | 330822/371472 [4:09:19<3:33:53, 3.17it/s] 89%|████████▉ | 330823/371472 [4:09:19<3:22:58, 3.34it/s] 89%|████████▉ | 330824/371472 [4:09:19<3:34:35, 3.16it/s] 89%|████████▉ | 330825/371472 [4:09:20<3:45:30, 3.00it/s] 89%|████████▉ | 330826/371472 [4:09:20<3:32:22, 3.19it/s] 89%|████████▉ | 330827/371472 [4:09:20<3:28:29, 3.25it/s] 89%|████████▉ | 330828/371472 [4:09:20<3:22:18, 3.35it/s] 89%|████████▉ | 330829/371472 [4:09:21<3:14:34, 3.48it/s] 89%|████████▉ | 330830/371472 [4:09:21<3:09:17, 3.58it/s] 89%|████████▉ | 330831/371472 [4:09:21<3:09:02, 3.58it/s] 89%|████████▉ | 330832/371472 [4:09:22<3:19:14, 3.40it/s] 89%|████████▉ | 330833/371472 [4:09:22<3:20:42, 3.37it/s] 89%|████████▉ | 330834/371472 [4:09:22<3:26:58, 3.27it/s] 89%|████████▉ | 330835/371472 [4:09:22<3:22:44, 3.34it/s] 89%|████████▉ | 330836/371472 [4:09:23<3:25:57, 3.29it/s] 89%|████████▉ | 330837/371472 [4:09:23<3:19:36, 3.39it/s] 89%|████████▉ | 330838/371472 [4:09:23<3:38:59, 3.09it/s] 89%|████████▉ | 330839/371472 [4:09:24<3:26:57, 3.27it/s] 89%|████████▉ | 330840/371472 [4:09:24<3:14:41, 3.48it/s] {'loss': 2.6158, 'learning_rate': 1.9849598138292138e-07, 'epoch': 14.25} + 89%|████████▉ | 330840/371472 [4:09:24<3:14:41, 3.48it/s] 89%|████████▉ | 330841/371472 [4:09:24<3:06:31, 3.63it/s] 89%|████████▉ | 330842/371472 [4:09:24<3:03:50, 3.68it/s] 89%|████████▉ | 330843/371472 [4:09:25<3:03:41, 3.69it/s] 89%|████████▉ | 330844/371472 [4:09:25<3:04:44, 3.67it/s] 89%|████████▉ | 330845/371472 [4:09:25<3:01:34, 3.73it/s] 89%|████████▉ | 330846/371472 [4:09:26<3:07:30, 3.61it/s] 89%|████████▉ | 330847/371472 [4:09:26<3:23:46, 3.32it/s] 89%|████████▉ | 330848/371472 [4:09:26<3:24:45, 3.31it/s] 89%|████████▉ | 330849/371472 [4:09:26<3:15:45, 3.46it/s] 89%|████████▉ | 330850/371472 [4:09:27<3:15:58, 3.45it/s] 89%|████████▉ | 330851/371472 [4:09:27<3:09:34, 3.57it/s] 89%|████████▉ | 330852/371472 [4:09:27<3:11:11, 3.54it/s] 89%|████████▉ | 330853/371472 [4:09:28<3:05:51, 3.64it/s] 89%|████████▉ | 330854/371472 [4:09:28<3:02:20, 3.71it/s] 89%|████████▉ | 330855/371472 [4:09:28<2:57:17, 3.82it/s] 89%|████████▉ | 330856/371472 [4:09:28<2:59:31, 3.77it/s] 89%|████████▉ | 330857/371472 [4:09:29<3:16:44, 3.44it/s] 89%|████████▉ | 330858/371472 [4:09:29<3:10:12, 3.56it/s] 89%|████████▉ | 330859/371472 [4:09:29<3:04:38, 3.67it/s] 89%|████████▉ | 330860/371472 [4:09:30<3:16:29, 3.44it/s] {'loss': 2.441, 'learning_rate': 1.9844749940744255e-07, 'epoch': 14.25} + 89%|████████▉ | 330860/371472 [4:09:30<3:16:29, 3.44it/s] 89%|████████▉ | 330861/371472 [4:09:30<3:08:44, 3.59it/s] 89%|████████▉ | 330862/371472 [4:09:30<3:13:15, 3.50it/s] 89%|████████▉ | 330863/371472 [4:09:30<3:25:53, 3.29it/s] 89%|████████▉ | 330864/371472 [4:09:31<3:21:52, 3.35it/s] 89%|████████▉ | 330865/371472 [4:09:31<3:16:48, 3.44it/s] 89%|████████▉ | 330866/371472 [4:09:31<3:12:02, 3.52it/s] 89%|████████▉ | 330867/371472 [4:09:31<3:04:53, 3.66it/s] 89%|████████▉ | 330868/371472 [4:09:32<3:05:51, 3.64it/s] 89%|████████▉ | 330869/371472 [4:09:32<3:09:05, 3.58it/s] 89%|████████▉ | 330870/371472 [4:09:32<3:12:11, 3.52it/s] 89%|████████▉ | 330871/371472 [4:09:33<3:10:06, 3.56it/s] 89%|████████▉ | 330872/371472 [4:09:33<3:10:31, 3.55it/s] 89%|████████▉ | 330873/371472 [4:09:33<3:19:24, 3.39it/s] 89%|████████▉ | 330874/371472 [4:09:33<3:09:34, 3.57it/s] 89%|████████▉ | 330875/371472 [4:09:34<3:06:17, 3.63it/s] 89%|████████▉ | 330876/371472 [4:09:34<3:17:21, 3.43it/s] 89%|████████▉ | 330877/371472 [4:09:34<3:20:12, 3.38it/s] 89%|████████▉ | 330878/371472 [4:09:35<3:22:14, 3.35it/s] 89%|████████▉ | 330879/371472 [4:09:35<3:24:37, 3.31it/s] 89%|████████▉ | 330880/371472 [4:09:35<3:18:05, 3.42it/s] {'loss': 2.6782, 'learning_rate': 1.9839901743196363e-07, 'epoch': 14.25} + 89%|████████▉ | 330880/371472 [4:09:35<3:18:05, 3.42it/s] 89%|████████▉ | 330881/371472 [4:09:36<3:11:21, 3.54it/s] 89%|████████▉ | 330882/371472 [4:09:36<3:09:03, 3.58it/s] 89%|████████▉ | 330883/371472 [4:09:36<3:06:30, 3.63it/s] 89%|████████▉ | 330884/371472 [4:09:36<3:06:44, 3.62it/s] 89%|████████▉ | 330885/371472 [4:09:37<3:10:33, 3.55it/s] 89%|████████▉ | 330886/371472 [4:09:37<3:37:12, 3.11it/s] 89%|████████▉ | 330887/371472 [4:09:37<3:34:54, 3.15it/s] 89%|████████▉ | 330888/371472 [4:09:38<3:27:28, 3.26it/s] 89%|████████▉ | 330889/371472 [4:09:38<3:23:34, 3.32it/s] 89%|████████▉ | 330890/371472 [4:09:38<3:22:02, 3.35it/s] 89%|████████▉ | 330891/371472 [4:09:38<3:09:36, 3.57it/s] 89%|████████▉ | 330892/371472 [4:09:39<3:25:16, 3.29it/s] 89%|████████▉ | 330893/371472 [4:09:39<3:26:39, 3.27it/s] 89%|████████▉ | 330894/371472 [4:09:39<3:15:52, 3.45it/s] 89%|████████▉ | 330895/371472 [4:09:40<3:22:57, 3.33it/s] 89%|████████▉ | 330896/371472 [4:09:40<3:16:47, 3.44it/s] 89%|████████▉ | 330897/371472 [4:09:40<3:13:39, 3.49it/s] 89%|████████▉ | 330898/371472 [4:09:41<3:15:41, 3.46it/s] 89%|████████▉ | 330899/371472 [4:09:41<3:15:14, 3.46it/s] 89%|████████▉ | 330900/371472 [4:09:41<3:15:53, 3.45it/s] {'loss': 2.6698, 'learning_rate': 1.9835053545648475e-07, 'epoch': 14.25} + 89%|████████▉ | 330900/371472 [4:09:41<3:15:53, 3.45it/s] 89%|████████▉ | 330901/371472 [4:09:41<3:17:24, 3.43it/s] 89%|████████▉ | 330902/371472 [4:09:42<3:15:12, 3.46it/s] 89%|████████▉ | 330903/371472 [4:09:42<3:17:26, 3.42it/s] 89%|████████▉ | 330904/371472 [4:09:42<3:25:41, 3.29it/s] 89%|████████▉ | 330905/371472 [4:09:43<3:17:03, 3.43it/s] 89%|████████▉ | 330906/371472 [4:09:43<3:09:29, 3.57it/s] 89%|████████▉ | 330907/371472 [4:09:43<3:09:27, 3.57it/s] 89%|████████▉ | 330908/371472 [4:09:43<3:04:17, 3.67it/s] 89%|████████▉ | 330909/371472 [4:09:44<3:01:09, 3.73it/s] 89%|████████▉ | 330910/371472 [4:09:44<3:01:34, 3.72it/s] 89%|████████▉ | 330911/371472 [4:09:44<3:07:20, 3.61it/s] 89%|████████▉ | 330912/371472 [4:09:45<3:12:25, 3.51it/s] 89%|████████▉ | 330913/371472 [4:09:45<3:23:17, 3.33it/s] 89%|████████▉ | 330914/371472 [4:09:45<3:17:08, 3.43it/s] 89%|████████▉ | 330915/371472 [4:09:45<3:09:48, 3.56it/s] 89%|████████▉ | 330916/371472 [4:09:46<3:12:08, 3.52it/s] 89%|████████▉ | 330917/371472 [4:09:46<3:16:13, 3.44it/s] 89%|████████▉ | 330918/371472 [4:09:46<3:18:17, 3.41it/s] 89%|████████▉ | 330919/371472 [4:09:47<3:15:57, 3.45it/s] 89%|████████▉ | 330920/371472 [4:09:47<3:17:03, 3.43it/s] {'loss': 2.6545, 'learning_rate': 1.9830205348100582e-07, 'epoch': 14.25} + 89%|████████▉ | 330920/371472 [4:09:47<3:17:03, 3.43it/s] 89%|████████▉ | 330921/371472 [4:09:47<3:15:39, 3.45it/s] 89%|█████���██▉ | 330922/371472 [4:09:47<3:11:41, 3.53it/s] 89%|████████▉ | 330923/371472 [4:09:48<3:07:27, 3.61it/s] 89%|████████▉ | 330924/371472 [4:09:48<3:07:43, 3.60it/s] 89%|████████▉ | 330925/371472 [4:09:48<3:12:12, 3.52it/s] 89%|████████▉ | 330926/371472 [4:09:48<3:07:21, 3.61it/s] 89%|████████▉ | 330927/371472 [4:09:49<3:03:02, 3.69it/s] 89%|████████▉ | 330928/371472 [4:09:49<3:02:11, 3.71it/s] 89%|████████▉ | 330929/371472 [4:09:49<3:11:24, 3.53it/s] 89%|████████▉ | 330930/371472 [4:09:50<3:14:54, 3.47it/s] 89%|████████▉ | 330931/371472 [4:09:50<3:13:06, 3.50it/s] 89%|████████▉ | 330932/371472 [4:09:50<3:17:09, 3.43it/s] 89%|████████▉ | 330933/371472 [4:09:50<3:14:08, 3.48it/s] 89%|████████▉ | 330934/371472 [4:09:51<3:12:26, 3.51it/s] 89%|████████▉ | 330935/371472 [4:09:51<3:11:30, 3.53it/s] 89%|████████▉ | 330936/371472 [4:09:51<3:11:43, 3.52it/s] 89%|████████▉ | 330937/371472 [4:09:52<3:04:40, 3.66it/s] 89%|████████▉ | 330938/371472 [4:09:52<3:19:39, 3.38it/s] 89%|████████▉ | 330939/371472 [4:09:52<3:14:54, 3.47it/s] 89%|████████▉ | 330940/371472 [4:09:53<3:21:28, 3.35it/s] {'loss': 2.5486, 'learning_rate': 1.98253571505527e-07, 'epoch': 14.25} + 89%|████████▉ | 330940/371472 [4:09:53<3:21:28, 3.35it/s] 89%|████████▉ | 330941/371472 [4:09:53<3:16:29, 3.44it/s] 89%|████████▉ | 330942/371472 [4:09:53<3:16:06, 3.44it/s] 89%|████████▉ | 330943/371472 [4:09:53<3:13:25, 3.49it/s] 89%|████████▉ | 330944/371472 [4:09:54<3:15:07, 3.46it/s] 89%|████████▉ | 330945/371472 [4:09:54<3:13:12, 3.50it/s] 89%|████████▉ | 330946/371472 [4:09:54<3:11:49, 3.52it/s] 89%|████████▉ | 330947/371472 [4:09:54<3:09:03, 3.57it/s] 89%|████████▉ | 330948/371472 [4:09:55<3:05:19, 3.64it/s] 89%|████████▉ | 330949/371472 [4:09:55<3:15:06, 3.46it/s] 89%|████████▉ | 330950/371472 [4:09:55<3:26:40, 3.27it/s] 89%|████████▉ | 330951/371472 [4:09:56<3:15:45, 3.45it/s] 89%|████████▉ | 330952/371472 [4:09:56<3:13:54, 3.48it/s] 89%|████████▉ | 330953/371472 [4:09:56<3:12:25, 3.51it/s] 89%|████████▉ | 330954/371472 [4:09:57<3:15:15, 3.46it/s] 89%|████████▉ | 330955/371472 [4:09:57<3:14:48, 3.47it/s] 89%|████████▉ | 330956/371472 [4:09:57<3:14:12, 3.48it/s] 89%|████████▉ | 330957/371472 [4:09:57<3:11:54, 3.52it/s] 89%|████████▉ | 330958/371472 [4:09:58<3:10:45, 3.54it/s] 89%|████████▉ | 330959/371472 [4:09:58<3:10:54, 3.54it/s] 89%|████████▉ | 330960/371472 [4:09:58<3:14:18, 3.47it/s] {'loss': 2.5391, 'learning_rate': 1.9820508953004804e-07, 'epoch': 14.26} + 89%|████████▉ | 330960/371472 [4:09:58<3:14:18, 3.47it/s] 89%|████████▉ | 330961/371472 [4:09:59<3:24:38, 3.30it/s] 89%|████████▉ | 330962/371472 [4:09:59<3:16:16, 3.44it/s] 89%|████████▉ | 330963/371472 [4:09:59<3:19:42, 3.38it/s] 89%|████████▉ | 330964/371472 [4:09:59<3:11:04, 3.53it/s] 89%|████████▉ | 330965/371472 [4:10:00<3:10:17, 3.55it/s] 89%|████████▉ | 330966/371472 [4:10:00<3:13:46, 3.48it/s] 89%|████████▉ | 330967/371472 [4:10:00<3:29:13, 3.23it/s] 89%|████████▉ | 330968/371472 [4:10:01<3:17:39, 3.42it/s] 89%|████████▉ | 330969/371472 [4:10:01<3:12:30, 3.51it/s] 89%|████████▉ | 330970/371472 [4:10:01<3:09:01, 3.57it/s] 89%|████████▉ | 330971/371472 [4:10:01<3:08:43, 3.58it/s] 89%|████████▉ | 330972/371472 [4:10:02<3:20:12, 3.37it/s] 89%|████████▉ | 330973/371472 [4:10:02<3:27:46, 3.25it/s] 89%|████████▉ | 330974/371472 [4:10:02<3:22:05, 3.34it/s] 89%|████████▉ | 330975/371472 [4:10:03<3:29:43, 3.22it/s] 89%|████████▉ | 330976/371472 [4:10:03<3:26:33, 3.27it/s] 89%|████████▉ | 330977/371472 [4:10:03<3:19:07, 3.39it/s] 89%|████████▉ | 330978/371472 [4:10:04<3:08:26, 3.58it/s] 89%|████████▉ | 330979/371472 [4:10:04<3:10:13, 3.55it/s] 89%|████████▉ | 330980/371472 [4:10:04<3:04:37, 3.66it/s] {'loss': 2.6845, 'learning_rate': 1.981566075545691e-07, 'epoch': 14.26} + 89%|████████▉ | 330980/371472 [4:10:04<3:04:37, 3.66it/s] 89%|████████▉ | 330981/371472 [4:10:04<3:18:31, 3.40it/s] 89%|████████▉ | 330982/371472 [4:10:05<3:37:36, 3.10it/s] 89%|████████▉ | 330983/371472 [4:10:05<3:37:49, 3.10it/s] 89%|████████▉ | 330984/371472 [4:10:05<3:33:13, 3.16it/s] 89%|████████▉ | 330985/371472 [4:10:06<3:27:51, 3.25it/s] 89%|████████▉ | 330986/371472 [4:10:06<3:22:58, 3.32it/s] 89%|████████▉ | 330987/371472 [4:10:06<3:27:35, 3.25it/s] 89%|████████▉ | 330988/371472 [4:10:07<3:17:08, 3.42it/s] 89%|████████▉ | 330989/371472 [4:10:07<3:14:20, 3.47it/s] 89%|████████▉ | 330990/371472 [4:10:07<3:08:29, 3.58it/s] 89%|████████▉ | 330991/371472 [4:10:07<3:18:20, 3.40it/s] 89%|████████▉ | 330992/371472 [4:10:08<3:08:36, 3.58it/s] 89%|████████▉ | 330993/371472 [4:10:08<3:14:00, 3.48it/s] 89%|████████▉ | 330994/371472 [4:10:08<3:06:17, 3.62it/s] 89%|████████▉ | 330995/371472 [4:10:09<3:22:27, 3.33it/s] 89%|████████▉ | 330996/371472 [4:10:09<3:20:12, 3.37it/s] 89%|████████▉ | 330997/371472 [4:10:09<3:20:23, 3.37it/s] 89%|████████▉ | 330998/371472 [4:10:09<3:23:44, 3.31it/s] 89%|████████▉ | 330999/371472 [4:10:10<3:25:26, 3.28it/s] 89%|████████▉ | 331000/371472 [4:10:10<3:29:42, 3.22it/s] {'loss': 2.3916, 'learning_rate': 1.9810812557909027e-07, 'epoch': 14.26} + 89%|████████▉ | 331000/371472 [4:10:10<3:29:42, 3.22it/s] 89%|████████▉ | 331001/371472 [4:10:10<3:23:16, 3.32it/s] 89%|████████▉ | 331002/371472 [4:10:11<3:11:24, 3.52it/s] 89%|████████▉ | 331003/371472 [4:10:11<3:19:03, 3.39it/s] 89%|████████▉ | 331004/371472 [4:10:11<3:15:41, 3.45it/s] 89%|████████▉ | 331005/371472 [4:10:12<3:11:46, 3.52it/s] 89%|████████▉ | 331006/371472 [4:10:12<3:24:07, 3.30it/s] 89%|████████▉ | 331007/371472 [4:10:12<3:24:09, 3.30it/s] 89%|████████▉ | 331008/371472 [4:10:12<3:19:44, 3.38it/s] 89%|████████▉ | 331009/371472 [4:10:13<3:13:11, 3.49it/s] 89%|████████▉ | 331010/371472 [4:10:13<3:18:50, 3.39it/s] 89%|████████▉ | 331011/371472 [4:10:13<3:12:57, 3.49it/s] 89%|████████▉ | 331012/371472 [4:10:14<3:14:38, 3.46it/s] 89%|████████▉ | 331013/371472 [4:10:14<3:14:20, 3.47it/s] 89%|████████▉ | 331014/371472 [4:10:14<3:18:24, 3.40it/s] 89%|████████▉ | 331015/371472 [4:10:14<3:12:02, 3.51it/s] 89%|████████▉ | 331016/371472 [4:10:15<3:12:48, 3.50it/s] 89%|████████▉ | 331017/371472 [4:10:15<3:04:36, 3.65it/s] 89%|████████▉ | 331018/371472 [4:10:15<3:01:40, 3.71it/s] 89%|████████▉ | 331019/371472 [4:10:16<3:07:44, 3.59it/s] 89%|████████▉ | 331020/371472 [4:10:16<3:09:19, 3.56it/s] {'loss': 2.6396, 'learning_rate': 1.9805964360361129e-07, 'epoch': 14.26} + 89%|████████▉ | 331020/371472 [4:10:16<3:09:19, 3.56it/s] 89%|████████▉ | 331021/371472 [4:10:16<3:10:58, 3.53it/s] 89%|████████▉ | 331022/371472 [4:10:16<3:18:25, 3.40it/s] 89%|████████▉ | 331023/371472 [4:10:17<3:08:00, 3.59it/s] 89%|████████▉ | 331024/371472 [4:10:17<3:06:03, 3.62it/s] 89%|████████▉ | 331025/371472 [4:10:17<3:07:05, 3.60it/s] 89%|████████▉ | 331026/371472 [4:10:17<3:05:20, 3.64it/s] 89%|████████▉ | 331027/371472 [4:10:18<3:15:34, 3.45it/s] 89%|████████▉ | 331028/371472 [4:10:18<3:22:51, 3.32it/s] 89%|████████▉ | 331029/371472 [4:10:18<3:15:44, 3.44it/s] 89%|████████▉ | 331030/371472 [4:10:19<3:09:17, 3.56it/s] 89%|████████▉ | 331031/371472 [4:10:19<3:15:05, 3.45it/s] 89%|████████▉ | 331032/371472 [4:10:19<3:08:24, 3.58it/s] 89%|████████▉ | 331033/371472 [4:10:20<3:18:26, 3.40it/s] 89%|████████▉ | 331034/371472 [4:10:20<3:21:21, 3.35it/s] 89%|████████▉ | 331035/371472 [4:10:20<3:14:29, 3.47it/s] 89%|████████▉ | 331036/371472 [4:10:20<3:09:41, 3.55it/s] 89%|████████▉ | 331037/371472 [4:10:21<3:09:22, 3.56it/s] 89%|████████▉ | 331038/371472 [4:10:21<3:19:41, 3.37it/s] 89%|████████▉ | 331039/371472 [4:10:21<3:32:49, 3.17it/s] 89%|████████▉ | 331040/371472 [4:10:22<3:33:01, 3.16it/s] {'loss': 2.6673, 'learning_rate': 1.9801116162813246e-07, 'epoch': 14.26} + 89%|████████▉ | 331040/371472 [4:10:22<3:33:01, 3.16it/s] 89%|████████▉ | 331041/371472 [4:10:22<3:29:01, 3.22it/s] 89%|████████▉ | 331042/371472 [4:10:22<3:15:11, 3.45it/s] 89%|████████▉ | 331043/371472 [4:10:22<3:09:57, 3.55it/s] 89%|████████▉ | 331044/371472 [4:10:23<3:05:57, 3.62it/s] 89%|████████▉ | 331045/371472 [4:10:23<3:03:43, 3.67it/s] 89%|████████▉ | 331046/371472 [4:10:23<3:05:28, 3.63it/s] 89%|████████▉ | 331047/371472 [4:10:24<3:05:43, 3.63it/s] 89%|████████▉ | 331048/371472 [4:10:24<3:01:31, 3.71it/s] 89%|████████▉ | 331049/371472 [4:10:24<3:04:56, 3.64it/s] 89%|████████▉ | 331050/371472 [4:10:24<3:02:53, 3.68it/s] 89%|████████▉ | 331051/371472 [4:10:25<3:08:07, 3.58it/s] 89%|████████▉ | 331052/371472 [4:10:25<3:09:58, 3.55it/s] 89%|████████▉ | 331053/371472 [4:10:25<3:23:54, 3.30it/s] 89%|████████▉ | 331054/371472 [4:10:26<3:27:50, 3.24it/s] 89%|████████▉ | 331055/371472 [4:10:26<3:28:22, 3.23it/s] 89%|████████▉ | 331056/371472 [4:10:26<3:31:55, 3.18it/s] 89%|████████▉ | 331057/371472 [4:10:27<3:23:52, 3.30it/s] 89%|████████▉ | 331058/371472 [4:10:27<3:21:18, 3.35it/s] 89%|████████▉ | 331059/371472 [4:10:27<3:16:09, 3.43it/s] 89%|████████▉ | 331060/371472 [4:10:27<3:10:34, 3.53it/s] {'loss': 2.5822, 'learning_rate': 1.9796267965265353e-07, 'epoch': 14.26} + 89%|████████▉ | 331060/371472 [4:10:27<3:10:34, 3.53it/s] 89%|████████▉ | 331061/371472 [4:10:28<3:09:52, 3.55it/s] 89%|████████▉ | 331062/371472 [4:10:28<3:04:26, 3.65it/s] 89%|████████▉ | 331063/371472 [4:10:28<3:13:46, 3.48it/s] 89%|████████▉ | 331064/371472 [4:10:29<3:27:19, 3.25it/s] 89%|████████▉ | 331065/371472 [4:10:29<3:29:20, 3.22it/s] 89%|████████▉ | 331066/371472 [4:10:29<3:16:46, 3.42it/s] 89%|████████▉ | 331067/371472 [4:10:29<3:12:46, 3.49it/s] 89%|████████▉ | 331068/371472 [4:10:30<3:21:56, 3.33it/s] 89%|████████▉ | 331069/371472 [4:10:30<3:20:09, 3.36it/s] 89%|████████▉ | 331070/371472 [4:10:30<3:13:22, 3.48it/s] 89%|████████▉ | 331071/371472 [4:10:31<3:25:15, 3.28it/s] 89%|████████▉ | 331072/371472 [4:10:31<3:25:03, 3.28it/s] 89%|████████▉ | 331073/371472 [4:10:31<3:23:35, 3.31it/s] 89%|████████▉ | 331074/371472 [4:10:32<3:10:50, 3.53it/s] 89%|████████▉ | 331075/371472 [4:10:32<3:08:05, 3.58it/s] 89%|████████▉ | 331076/371472 [4:10:32<3:03:21, 3.67it/s] 89%|████████▉ | 331077/371472 [4:10:32<3:07:54, 3.58it/s] 89%|████████▉ | 331078/371472 [4:10:33<3:24:34, 3.29it/s] 89%|████████▉ | 331079/371472 [4:10:33<3:19:23, 3.38it/s] 89%|████████▉ | 331080/371472 [4:10:33<3:19:10, 3.38it/s] {'loss': 2.5869, 'learning_rate': 1.9791419767717469e-07, 'epoch': 14.26} + 89%|████████▉ | 331080/371472 [4:10:33<3:19:10, 3.38it/s] 89%|████████▉ | 331081/371472 [4:10:34<3:27:44, 3.24it/s] 89%|████████▉ | 331082/371472 [4:10:34<3:18:06, 3.40it/s] 89%|████████▉ | 331083/371472 [4:10:34<3:15:06, 3.45it/s] 89%|████████▉ | 331084/371472 [4:10:34<3:15:15, 3.45it/s] 89%|████████▉ | 331085/371472 [4:10:35<3:07:17, 3.59it/s] 89%|████████▉ | 331086/371472 [4:10:35<3:05:58, 3.62it/s] 89%|████████▉ | 331087/371472 [4:10:35<3:05:22, 3.63it/s] 89%|████████▉ | 331088/371472 [4:10:35<3:04:31, 3.65it/s] 89%|████████▉ | 331089/371472 [4:10:36<3:02:32, 3.69it/s] 89%|████████▉ | 331090/371472 [4:10:36<3:04:56, 3.64it/s] 89%|████████▉ | 331091/371472 [4:10:36<2:58:38, 3.77it/s] 89%|████████▉ | 331092/371472 [4:10:37<2:57:29, 3.79it/s] 89%|████████▉ | 331093/371472 [4:10:37<2:57:33, 3.79it/s] 89%|████████▉ | 331094/371472 [4:10:37<2:57:16, 3.80it/s] 89%|████████▉ | 331095/371472 [4:10:37<3:07:00, 3.60it/s] 89%|████████▉ | 331096/371472 [4:10:38<3:10:15, 3.54it/s] 89%|█████��██▉ | 331097/371472 [4:10:38<3:10:15, 3.54it/s] 89%|████████▉ | 331098/371472 [4:10:38<3:10:40, 3.53it/s] 89%|████████▉ | 331099/371472 [4:10:39<3:15:32, 3.44it/s] 89%|████████▉ | 331100/371472 [4:10:39<3:19:31, 3.37it/s] {'loss': 2.5835, 'learning_rate': 1.9786571570169573e-07, 'epoch': 14.26} + 89%|████████▉ | 331100/371472 [4:10:39<3:19:31, 3.37it/s] 89%|████████▉ | 331101/371472 [4:10:39<3:23:31, 3.31it/s] 89%|████████▉ | 331102/371472 [4:10:39<3:23:10, 3.31it/s] 89%|████████▉ | 331103/371472 [4:10:40<3:18:19, 3.39it/s] 89%|████████▉ | 331104/371472 [4:10:40<3:15:34, 3.44it/s] 89%|████████▉ | 331105/371472 [4:10:40<3:22:54, 3.32it/s] 89%|████████▉ | 331106/371472 [4:10:41<3:16:27, 3.42it/s] 89%|████████▉ | 331107/371472 [4:10:41<3:11:38, 3.51it/s] 89%|████████▉ | 331108/371472 [4:10:41<3:19:24, 3.37it/s] 89%|████████▉ | 331109/371472 [4:10:41<3:10:55, 3.52it/s] 89%|████████▉ | 331110/371472 [4:10:42<3:23:51, 3.30it/s] 89%|████████▉ | 331111/371472 [4:10:42<3:17:06, 3.41it/s] 89%|████████▉ | 331112/371472 [4:10:42<3:13:20, 3.48it/s] 89%|████████▉ | 331113/371472 [4:10:43<3:09:26, 3.55it/s] 89%|████████▉ | 331114/371472 [4:10:43<3:09:17, 3.55it/s] 89%|████████▉ | 331115/371472 [4:10:43<3:07:41, 3.58it/s] 89%|████████▉ | 331116/371472 [4:10:43<3:07:42, 3.58it/s] 89%|████████▉ | 331117/371472 [4:10:44<3:27:36, 3.24it/s] 89%|████████▉ | 331118/371472 [4:10:44<3:28:26, 3.23it/s] 89%|████████▉ | 331119/371472 [4:10:44<3:27:15, 3.24it/s] 89%|████████▉ | 331120/371472 [4:10:45<3:14:12, 3.46it/s] {'loss': 2.6657, 'learning_rate': 1.978172337262169e-07, 'epoch': 14.26} + 89%|████████▉ | 331120/371472 [4:10:45<3:14:12, 3.46it/s] 89%|████████▉ | 331121/371472 [4:10:45<3:11:48, 3.51it/s] 89%|████████▉ | 331122/371472 [4:10:45<3:11:15, 3.52it/s] 89%|████████▉ | 331123/371472 [4:10:46<3:26:17, 3.26it/s] 89%|████████▉ | 331124/371472 [4:10:46<3:17:19, 3.41it/s] 89%|████████▉ | 331125/371472 [4:10:46<3:21:32, 3.34it/s] 89%|████████▉ | 331126/371472 [4:10:47<3:30:34, 3.19it/s] 89%|████████▉ | 331127/371472 [4:10:47<3:29:30, 3.21it/s] 89%|████████▉ | 331128/371472 [4:10:47<3:20:03, 3.36it/s] 89%|████████▉ | 331129/371472 [4:10:47<3:30:24, 3.20it/s] 89%|████████▉ | 331130/371472 [4:10:48<3:20:00, 3.36it/s] 89%|████████▉ | 331131/371472 [4:10:48<3:16:47, 3.42it/s] 89%|████████▉ | 331132/371472 [4:10:48<3:36:17, 3.11it/s] 89%|████████▉ | 331133/371472 [4:10:49<3:25:27, 3.27it/s] 89%|████████▉ | 331134/371472 [4:10:49<3:25:29, 3.27it/s] 89%|████████▉ | 331135/371472 [4:10:49<3:16:15, 3.43it/s] 89%|████████▉ | 331136/371472 [4:10:50<3:09:18, 3.55it/s] 89%|████████▉ | 331137/371472 [4:10:50<3:10:07, 3.54it/s] 89%|████████▉ | 331138/371472 [4:10:50<3:07:01, 3.59it/s] 89%|████████▉ | 331139/371472 [4:10:50<3:06:38, 3.60it/s] 89%|████████▉ | 331140/371472 [4:10:51<3:00:51, 3.72it/s] {'loss': 2.6775, 'learning_rate': 1.9776875175073798e-07, 'epoch': 14.26} + 89%|████████▉ | 331140/371472 [4:10:51<3:00:51, 3.72it/s] 89%|████████▉ | 331141/371472 [4:10:51<3:05:42, 3.62it/s] 89%|████████▉ | 331142/371472 [4:10:51<3:11:26, 3.51it/s] 89%|████████▉ | 331143/371472 [4:10:51<3:02:49, 3.68it/s] 89%|████████▉ | 331144/371472 [4:10:52<3:10:51, 3.52it/s] 89%|████████▉ | 331145/371472 [4:10:52<3:07:34, 3.58it/s] 89%|████████▉ | 331146/371472 [4:10:52<3:06:26, 3.60it/s] 89%|████████▉ | 331147/371472 [4:10:53<3:09:11, 3.55it/s] 89%|████████▉ | 331148/371472 [4:10:53<3:06:31, 3.60it/s] 89%|████████▉ | 331149/371472 [4:10:53<3:05:19, 3.63it/s] 89%|████████▉ | 331150/371472 [4:10:53<3:02:33, 3.68it/s] 89%|████████▉ | 331151/371472 [4:10:54<3:04:55, 3.63it/s] 89%|████████▉ | 331152/371472 [4:10:54<3:00:48, 3.72it/s] 89%|████████▉ | 331153/371472 [4:10:54<3:07:13, 3.59it/s] 89%|████████▉ | 331154/371472 [4:10:54<3:08:52, 3.56it/s] 89%|████████▉ | 331155/371472 [4:10:55<3:05:22, 3.62it/s] 89%|████████▉ | 331156/371472 [4:10:55<3:06:46, 3.60it/s] 89%|████████▉ | 331157/371472 [4:10:55<3:09:04, 3.55it/s] 89%|████████▉ | 331158/371472 [4:10:56<3:09:32, 3.54it/s] 89%|████████▉ | 331159/371472 [4:10:56<3:06:37, 3.60it/s] 89%|████████▉ | 331160/371472 [4:10:56<3:14:37, 3.45it/s] {'loss': 2.5308, 'learning_rate': 1.977202697752591e-07, 'epoch': 14.26} + 89%|████████▉ | 331160/371472 [4:10:56<3:14:37, 3.45it/s] 89%|████████▉ | 331161/371472 [4:10:56<3:11:34, 3.51it/s] 89%|████████▉ | 331162/371472 [4:10:57<3:11:28, 3.51it/s] 89%|████████▉ | 331163/371472 [4:10:57<3:22:31, 3.32it/s] 89%|████████▉ | 331164/371472 [4:10:57<3:18:17, 3.39it/s] 89%|████████▉ | 331165/371472 [4:10:58<3:17:26, 3.40it/s] 89%|████████▉ | 331166/371472 [4:10:58<3:18:27, 3.39it/s] 89%|████████▉ | 331167/371472 [4:10:58<3:22:07, 3.32it/s] 89%|████████▉ | 331168/371472 [4:10:59<3:15:36, 3.43it/s] 89%|████████▉ | 331169/371472 [4:10:59<3:13:26, 3.47it/s] 89%|████████▉ | 331170/371472 [4:10:59<3:16:47, 3.41it/s] 89%|████████▉ | 331171/371472 [4:10:59<3:07:42, 3.58it/s] 89%|████████▉ | 331172/371472 [4:11:00<3:21:08, 3.34it/s] 89%|████████▉ | 331173/371472 [4:11:00<3:13:17, 3.47it/s] 89%|████████▉ | 331174/371472 [4:11:00<3:12:16, 3.49it/s] 89%|████████▉ | 331175/371472 [4:11:01<3:03:08, 3.67it/s] 89%|████████▉ | 331176/371472 [4:11:01<3:01:47, 3.69it/s] 89%|████████▉ | 331177/371472 [4:11:01<3:02:16, 3.68it/s] 89%|████████▉ | 331178/371472 [4:11:01<3:08:08, 3.57it/s] 89%|████████▉ | 331179/371472 [4:11:02<3:04:51, 3.63it/s] 89%|████████▉ | 331180/371472 [4:11:02<3:01:10, 3.71it/s] {'loss': 2.5507, 'learning_rate': 1.9767178779978018e-07, 'epoch': 14.26} + 89%|████████▉ | 331180/371472 [4:11:02<3:01:10, 3.71it/s] 89%|████████▉ | 331181/371472 [4:11:02<2:56:41, 3.80it/s] 89%|████████▉ | 331182/371472 [4:11:03<3:25:12, 3.27it/s] 89%|████████▉ | 331183/371472 [4:11:03<3:25:45, 3.26it/s] 89%|████████▉ | 331184/371472 [4:11:03<3:30:20, 3.19it/s] 89%|████████▉ | 331185/371472 [4:11:03<3:25:34, 3.27it/s] 89%|████████▉ | 331186/371472 [4:11:04<3:12:40, 3.48it/s] 89%|████████▉ | 331187/371472 [4:11:04<3:14:37, 3.45it/s] 89%|████████▉ | 331188/371472 [4:11:04<3:15:56, 3.43it/s] 89%|████████▉ | 331189/371472 [4:11:05<3:10:11, 3.53it/s] 89%|████████▉ | 331190/371472 [4:11:05<3:12:13, 3.49it/s] 89%|████████▉ | 331191/371472 [4:11:05<3:11:23, 3.51it/s] 89%|████████▉ | 331192/371472 [4:11:05<3:08:21, 3.56it/s] 89%|████████▉ | 331193/371472 [4:11:06<3:04:14, 3.64it/s] 89%|████████▉ | 331194/371472 [4:11:06<3:02:09, 3.69it/s] 89%|████████▉ | 331195/371472 [4:11:06<3:06:12, 3.61it/s] 89%|████████▉ | 331196/371472 [4:11:06<3:06:53, 3.59it/s] 89%|████████▉ | 331197/371472 [4:11:07<3:22:58, 3.31it/s] 89%|████████▉ | 331198/371472 [4:11:07<3:26:07, 3.26it/s] 89%|████████▉ | 331199/371472 [4:11:08<3:31:59, 3.17it/s] 89%|████████▉ | 331200/371472 [4:11:08<3:31:27, 3.17it/s] {'loss': 2.5392, 'learning_rate': 1.9762330582430135e-07, 'epoch': 14.27} + 89%|████████▉ | 331200/371472 [4:11:08<3:31:27, 3.17it/s] 89%|████████▉ | 331201/371472 [4:11:08<3:28:55, 3.21it/s] 89%|████████▉ | 331202/371472 [4:11:08<3:21:42, 3.33it/s] 89%|████████▉ | 331203/371472 [4:11:09<3:18:47, 3.38it/s] 89%|████████▉ | 331204/371472 [4:11:09<3:14:54, 3.44it/s] 89%|████████▉ | 331205/371472 [4:11:09<3:07:09, 3.59it/s] 89%|████████▉ | 331206/371472 [4:11:10<3:11:16, 3.51it/s] 89%|████████▉ | 331207/371472 [4:11:10<3:12:56, 3.48it/s] 89%|████████▉ | 331208/371472 [4:11:10<3:23:38, 3.30it/s] 89%|████████▉ | 331209/371472 [4:11:10<3:12:20, 3.49it/s] 89%|████████▉ | 331210/371472 [4:11:11<3:24:50, 3.28it/s] 89%|████████▉ | 331211/371472 [4:11:11<3:19:09, 3.37it/s] 89%|█��██████▉ | 331212/371472 [4:11:11<3:17:46, 3.39it/s] 89%|████████▉ | 331213/371472 [4:11:12<3:09:54, 3.53it/s] 89%|████████▉ | 331214/371472 [4:11:12<3:12:57, 3.48it/s] 89%|████████▉ | 331215/371472 [4:11:12<3:07:18, 3.58it/s] 89%|████████▉ | 331216/371472 [4:11:12<3:10:39, 3.52it/s] 89%|████████▉ | 331217/371472 [4:11:13<3:01:23, 3.70it/s] 89%|████████▉ | 331218/371472 [4:11:13<2:56:00, 3.81it/s] 89%|████████▉ | 331219/371472 [4:11:13<2:55:56, 3.81it/s] 89%|████████▉ | 331220/371472 [4:11:13<2:55:49, 3.82it/s] {'loss': 2.7014, 'learning_rate': 1.975748238488224e-07, 'epoch': 14.27} + 89%|████████▉ | 331220/371472 [4:11:13<2:55:49, 3.82it/s] 89%|████████▉ | 331221/371472 [4:11:14<2:58:46, 3.75it/s] 89%|████████▉ | 331222/371472 [4:11:14<3:04:01, 3.65it/s] 89%|████████▉ | 331223/371472 [4:11:14<3:13:48, 3.46it/s] 89%|████████▉ | 331224/371472 [4:11:15<3:16:02, 3.42it/s] 89%|████████▉ | 331225/371472 [4:11:15<3:16:56, 3.41it/s] 89%|████████▉ | 331226/371472 [4:11:15<3:17:40, 3.39it/s] 89%|████████▉ | 331227/371472 [4:11:16<3:33:12, 3.15it/s] 89%|████████▉ | 331228/371472 [4:11:16<3:28:02, 3.22it/s] 89%|████████▉ | 331229/371472 [4:11:16<3:23:57, 3.29it/s] 89%|████████▉ | 331230/371472 [4:11:16<3:22:35, 3.31it/s] 89%|████████▉ | 331231/371472 [4:11:17<3:27:38, 3.23it/s] 89%|████████▉ | 331232/371472 [4:11:17<3:27:13, 3.24it/s] 89%|████████▉ | 331233/371472 [4:11:17<3:22:17, 3.32it/s] 89%|████████▉ | 331234/371472 [4:11:18<3:16:52, 3.41it/s] 89%|████████▉ | 331235/371472 [4:11:18<3:12:50, 3.48it/s] 89%|████████▉ | 331236/371472 [4:11:18<3:11:13, 3.51it/s] 89%|████████▉ | 331237/371472 [4:11:18<3:06:33, 3.59it/s] 89%|████████▉ | 331238/371472 [4:11:19<3:03:58, 3.64it/s] 89%|████████▉ | 331239/371472 [4:11:19<3:01:53, 3.69it/s] 89%|████████▉ | 331240/371472 [4:11:19<3:16:58, 3.40it/s] {'loss': 2.7447, 'learning_rate': 1.9752634187334355e-07, 'epoch': 14.27} + 89%|████████▉ | 331240/371472 [4:11:19<3:16:58, 3.40it/s] 89%|████████▉ | 331241/371472 [4:11:20<3:18:05, 3.38it/s] 89%|████████▉ | 331242/371472 [4:11:20<3:09:26, 3.54it/s] 89%|████████▉ | 331243/371472 [4:11:20<3:05:48, 3.61it/s] 89%|████████▉ | 331244/371472 [4:11:20<3:09:03, 3.55it/s] 89%|████████▉ | 331245/371472 [4:11:21<3:04:23, 3.64it/s] 89%|████████▉ | 331246/371472 [4:11:21<3:06:37, 3.59it/s] 89%|████████▉ | 331247/371472 [4:11:21<3:03:43, 3.65it/s] 89%|████████▉ | 331248/371472 [4:11:22<3:02:20, 3.68it/s] 89%|████████▉ | 331249/371472 [4:11:22<3:03:22, 3.66it/s] 89%|████████▉ | 331250/371472 [4:11:22<3:05:25, 3.62it/s] 89%|████████▉ | 331251/371472 [4:11:22<3:04:20, 3.64it/s] 89%|████████▉ | 331252/371472 [4:11:23<3:04:40, 3.63it/s] 89%|████████▉ | 331253/371472 [4:11:23<3:04:42, 3.63it/s] 89%|████████▉ | 331254/371472 [4:11:23<3:04:02, 3.64it/s] 89%|████████▉ | 331255/371472 [4:11:23<3:03:40, 3.65it/s] 89%|████████▉ | 331256/371472 [4:11:24<3:00:40, 3.71it/s] 89%|████████▉ | 331257/371472 [4:11:24<3:06:03, 3.60it/s] 89%|████████▉ | 331258/371472 [4:11:24<3:14:05, 3.45it/s] 89%|████████▉ | 331259/371472 [4:11:25<3:13:31, 3.46it/s] 89%|████████▉ | 331260/371472 [4:11:25<3:17:07, 3.40it/s] {'loss': 2.7136, 'learning_rate': 1.9747785989786462e-07, 'epoch': 14.27} + 89%|████████▉ | 331260/371472 [4:11:25<3:17:07, 3.40it/s] 89%|████████▉ | 331261/371472 [4:11:25<3:16:35, 3.41it/s] 89%|████████▉ | 331262/371472 [4:11:26<3:19:23, 3.36it/s] 89%|████████▉ | 331263/371472 [4:11:26<3:17:25, 3.39it/s] 89%|████████▉ | 331264/371472 [4:11:26<3:12:59, 3.47it/s] 89%|████████▉ | 331265/371472 [4:11:26<3:30:45, 3.18it/s] 89%|████████▉ | 331266/371472 [4:11:27<3:22:40, 3.31it/s] 89%|████████▉ | 331267/371472 [4:11:27<3:20:55, 3.33it/s] 89%|████████▉ | 331268/371472 [4:11:27<3:17:48, 3.39it/s] 89%|████████▉ | 331269/371472 [4:11:28<3:14:56, 3.44it/s] 89%|████████▉ | 331270/371472 [4:11:28<3:07:49, 3.57it/s] 89%|████████▉ | 331271/371472 [4:11:28<3:03:07, 3.66it/s] 89%|████████▉ | 331272/371472 [4:11:28<2:59:51, 3.73it/s] 89%|████████▉ | 331273/371472 [4:11:29<3:04:11, 3.64it/s] 89%|████████▉ | 331274/371472 [4:11:29<3:06:28, 3.59it/s] 89%|████████▉ | 331275/371472 [4:11:29<3:06:08, 3.60it/s] 89%|████████▉ | 331276/371472 [4:11:30<3:24:55, 3.27it/s] 89%|████████▉ | 331277/371472 [4:11:30<3:14:43, 3.44it/s] 89%|████████▉ | 331278/371472 [4:11:30<3:18:58, 3.37it/s] 89%|████████▉ | 331279/371472 [4:11:30<3:13:09, 3.47it/s] 89%|████████▉ | 331280/371472 [4:11:31<3:12:18, 3.48it/s] {'loss': 2.5458, 'learning_rate': 1.9742937792238577e-07, 'epoch': 14.27} + 89%|████████▉ | 331280/371472 [4:11:31<3:12:18, 3.48it/s] 89%|████████▉ | 331281/371472 [4:11:31<3:08:55, 3.55it/s] 89%|████████▉ | 331282/371472 [4:11:31<3:06:48, 3.59it/s] 89%|████████▉ | 331283/371472 [4:11:32<3:04:00, 3.64it/s] 89%|████████▉ | 331284/371472 [4:11:32<3:15:16, 3.43it/s] 89%|████████▉ | 331285/371472 [4:11:32<3:12:58, 3.47it/s] 89%|████████▉ | 331286/371472 [4:11:32<3:17:56, 3.38it/s] 89%|████████▉ | 331287/371472 [4:11:33<3:08:05, 3.56it/s] 89%|████████▉ | 331288/371472 [4:11:33<3:05:56, 3.60it/s] 89%|████████▉ | 331289/371472 [4:11:33<3:09:08, 3.54it/s] 89%|████████▉ | 331290/371472 [4:11:34<3:04:44, 3.62it/s] 89%|████████▉ | 331291/371472 [4:11:34<3:02:36, 3.67it/s] 89%|████████▉ | 331292/371472 [4:11:34<3:04:05, 3.64it/s] 89%|████████▉ | 331293/371472 [4:11:34<3:28:15, 3.22it/s] 89%|████████▉ | 331294/371472 [4:11:35<3:29:44, 3.19it/s] 89%|████████▉ | 331295/371472 [4:11:35<3:27:17, 3.23it/s] 89%|████████▉ | 331296/371472 [4:11:35<3:16:40, 3.40it/s] 89%|████████▉ | 331297/371472 [4:11:36<3:19:56, 3.35it/s] 89%|████████▉ | 331298/371472 [4:11:36<3:17:33, 3.39it/s] 89%|████████▉ | 331299/371472 [4:11:36<3:14:10, 3.45it/s] 89%|████████▉ | 331300/371472 [4:11:36<3:12:03, 3.49it/s] {'loss': 2.565, 'learning_rate': 1.9738089594690682e-07, 'epoch': 14.27} + 89%|████████▉ | 331300/371472 [4:11:36<3:12:03, 3.49it/s] 89%|████████▉ | 331301/371472 [4:11:37<3:14:00, 3.45it/s] 89%|████████▉ | 331302/371472 [4:11:37<3:10:18, 3.52it/s] 89%|████████▉ | 331303/371472 [4:11:37<3:01:55, 3.68it/s] 89%|████████▉ | 331304/371472 [4:11:38<2:59:02, 3.74it/s] 89%|████████▉ | 331305/371472 [4:11:38<2:57:56, 3.76it/s] 89%|████████▉ | 331306/371472 [4:11:38<3:06:02, 3.60it/s] 89%|████████▉ | 331307/371472 [4:11:38<3:06:16, 3.59it/s] 89%|████████▉ | 331308/371472 [4:11:39<3:10:09, 3.52it/s] 89%|████████▉ | 331309/371472 [4:11:39<3:23:33, 3.29it/s] 89%|████████▉ | 331310/371472 [4:11:39<3:34:03, 3.13it/s] 89%|████████▉ | 331311/371472 [4:11:40<3:40:32, 3.03it/s] 89%|████████▉ | 331312/371472 [4:11:40<3:38:16, 3.07it/s] 89%|████████▉ | 331313/371472 [4:11:40<3:30:38, 3.18it/s] 89%|████████▉ | 331314/371472 [4:11:41<3:23:37, 3.29it/s] 89%|████████▉ | 331315/371472 [4:11:41<3:17:30, 3.39it/s] 89%|████████▉ | 331316/371472 [4:11:41<3:10:29, 3.51it/s] 89%|████████▉ | 331317/371472 [4:11:41<3:09:25, 3.53it/s] 89%|████████▉ | 331318/371472 [4:11:42<3:04:31, 3.63it/s] 89%|████████▉ | 331319/371472 [4:11:42<3:08:20, 3.55it/s] 89%|████████▉ | 331320/371472 [4:11:42<3:15:34, 3.42it/s] {'loss': 2.5227, 'learning_rate': 1.97332413971428e-07, 'epoch': 14.27} + 89%|████████▉ | 331320/371472 [4:11:42<3:15:34, 3.42it/s] 89%|████████▉ | 331321/371472 [4:11:43<3:10:28, 3.51it/s] 89%|████████▉ | 331322/371472 [4:11:43<3:08:57, 3.54it/s] 89%|████████▉ | 331323/371472 [4:11:43<3:07:47, 3.56it/s] 89%|████████▉ | 331324/371472 [4:11:43<3:10:43, 3.51it/s] 89%|████████▉ | 331325/371472 [4:11:44<3:02:57, 3.66it/s] 89%|████████▉ | 331326/371472 [4:11:44<3:16:55, 3.40it/s] 89%|████████▉ | 331327/371472 [4:11:44<3:32:44, 3.14it/s] 89%|████████▉ | 331328/371472 [4:11:45<3:37:55, 3.07it/s] 89%|████████▉ | 331329/371472 [4:11:45<3:36:13, 3.09it/s] 89%|████████▉ | 331330/371472 [4:11:45<3:25:05, 3.26it/s] 89%|████████▉ | 331331/371472 [4:11:46<3:16:53, 3.40it/s] 89%|████████▉ | 331332/371472 [4:11:46<3:06:36, 3.58it/s] 89%|████████▉ | 331333/371472 [4:11:46<3:13:22, 3.46it/s] 89%|████████▉ | 331334/371472 [4:11:46<3:14:40, 3.44it/s] 89%|████████▉ | 331335/371472 [4:11:47<3:10:02, 3.52it/s] 89%|████████▉ | 331336/371472 [4:11:47<3:12:14, 3.48it/s] 89%|████████▉ | 331337/371472 [4:11:47<3:08:17, 3.55it/s] 89%|████████▉ | 331338/371472 [4:11:48<3:33:09, 3.14it/s] 89%|████████▉ | 331339/371472 [4:11:48<3:28:36, 3.21it/s] 89%|████████▉ | 331340/371472 [4:11:48<3:20:13, 3.34it/s] {'loss': 2.5149, 'learning_rate': 1.9728393199594904e-07, 'epoch': 14.27} + 89%|████████▉ | 331340/371472 [4:11:48<3:20:13, 3.34it/s] 89%|████████▉ | 331341/371472 [4:11:49<3:27:28, 3.22it/s] 89%|████████▉ | 331342/371472 [4:11:49<3:15:50, 3.42it/s] 89%|████████▉ | 331343/371472 [4:11:49<3:09:09, 3.54it/s] 89%|████████▉ | 331344/371472 [4:11:49<3:05:41, 3.60it/s] 89%|████████▉ | 331345/371472 [4:11:50<3:07:17, 3.57it/s] 89%|████████▉ | 331346/371472 [4:11:50<3:06:08, 3.59it/s] 89%|████████▉ | 331347/371472 [4:11:50<3:06:08, 3.59it/s] 89%|████████▉ | 331348/371472 [4:11:50<3:07:53, 3.56it/s] 89%|████████▉ | 331349/371472 [4:11:51<3:04:20, 3.63it/s] 89%|████████▉ | 331350/371472 [4:11:51<3:13:09, 3.46it/s] 89%|████████▉ | 331351/371472 [4:11:51<3:15:41, 3.42it/s] 89%|████████▉ | 331352/371472 [4:11:52<3:09:47, 3.52it/s] 89%|████████▉ | 331353/371472 [4:11:52<3:07:55, 3.56it/s] 89%|████████▉ | 331354/371472 [4:11:52<3:16:54, 3.40it/s] 89%|████████▉ | 331355/371472 [4:11:53<3:13:34, 3.45it/s] 89%|████████▉ | 331356/371472 [4:11:53<3:10:34, 3.51it/s] 89%|████████▉ | 331357/371472 [4:11:53<3:07:16, 3.57it/s] 89%|████████▉ | 331358/371472 [4:11:53<3:04:59, 3.61it/s] 89%|████████▉ | 331359/371472 [4:11:54<3:06:55, 3.58it/s] 89%|████████▉ | 331360/371472 [4:11:54<3:01:48, 3.68it/s] {'loss': 2.6294, 'learning_rate': 1.972354500204702e-07, 'epoch': 14.27} + 89%|████████▉ | 331360/371472 [4:11:54<3:01:48, 3.68it/s] 89%|████████▉ | 331361/371472 [4:11:54<3:04:07, 3.63it/s] 89%|████████▉ | 331362/371472 [4:11:54<3:18:23, 3.37it/s] 89%|████████▉ | 331363/371472 [4:11:55<3:23:42, 3.28it/s] 89%|████████▉ | 331364/371472 [4:11:55<3:18:15, 3.37it/s] 89%|████████▉ | 331365/371472 [4:11:55<3:10:46, 3.50it/s] 89%|████████▉ | 331366/371472 [4:11:56<3:17:05, 3.39it/s] 89%|████████▉ | 331367/371472 [4:11:56<3:20:16, 3.34it/s] 89%|████████▉ | 331368/371472 [4:11:56<3:21:06, 3.32it/s] 89%|████████▉ | 331369/371472 [4:11:57<3:16:08, 3.41it/s] 89%|████████▉ | 331370/371472 [4:11:57<3:29:08, 3.20it/s] 89%|████████▉ | 331371/371472 [4:11:57<3:35:49, 3.10it/s] 89%|████████▉ | 331372/371472 [4:11:58<3:35:01, 3.11it/s] 89%|████████▉ | 331373/371472 [4:11:58<3:40:13, 3.03it/s] 89%|████████▉ | 331374/371472 [4:11:58<3:37:01, 3.08it/s] 89%|████████▉ | 331375/371472 [4:11:59<3:31:49, 3.15it/s] 89%|████████▉ | 331376/371472 [4:11:59<3:22:24, 3.30it/s] 89%|████████▉ | 331377/371472 [4:11:59<3:25:56, 3.24it/s] 89%|████████▉ | 331378/371472 [4:11:59<3:16:54, 3.39it/s] 89%|████████▉ | 331379/371472 [4:12:00<3:19:14, 3.35it/s] 89%|████████▉ | 331380/371472 [4:12:00<3:35:09, 3.11it/s] {'loss': 2.5741, 'learning_rate': 1.9718696804499126e-07, 'epoch': 14.27} + 89%|████████▉ | 331380/371472 [4:12:00<3:35:09, 3.11it/s] 89%|████████▉ | 331381/371472 [4:12:00<3:25:28, 3.25it/s] 89%|████████▉ | 331382/371472 [4:12:01<3:24:57, 3.26it/s] 89%|████████▉ | 331383/371472 [4:12:01<3:15:43, 3.41it/s] 89%|████████▉ | 331384/371472 [4:12:01<3:10:47, 3.50it/s] 89%|████████▉ | 331385/371472 [4:12:01<3:09:56, 3.52it/s] 89%|████████▉ | 331386/371472 [4:12:02<3:03:52, 3.63it/s] 89%|████████▉ | 331387/371472 [4:12:02<3:07:02, 3.57it/s] 89%|████████▉ | 331388/371472 [4:12:02<3:12:52, 3.46it/s] 89%|████████▉ | 331389/371472 [4:12:03<3:08:51, 3.54it/s] 89%|████████▉ | 331390/371472 [4:12:03<3:01:58, 3.67it/s] 89%|████████▉ | 331391/371472 [4:12:03<3:01:10, 3.69it/s] 89%|████████▉ | 331392/371472 [4:12:03<3:09:58, 3.52it/s] 89%|████████▉ | 331393/371472 [4:12:04<3:11:57, 3.48it/s] 89%|████████▉ | 331394/371472 [4:12:04<3:09:31, 3.52it/s] 89%|████████▉ | 331395/371472 [4:12:04<3:10:56, 3.50it/s] 89%|████████▉ | 331396/371472 [4:12:05<3:28:49, 3.20it/s] 89%|████████▉ | 331397/371472 [4:12:05<3:36:42, 3.08it/s] 89%|████████▉ | 331398/371472 [4:12:05<3:30:11, 3.18it/s] 89%|████████▉ | 331399/371472 [4:12:06<3:21:16, 3.32it/s] 89%|████████▉ | 331400/371472 [4:12:06<3:14:47, 3.43it/s] {'loss': 2.4625, 'learning_rate': 1.971384860695124e-07, 'epoch': 14.27} + 89%|████████▉ | 331400/371472 [4:12:06<3:14:47, 3.43it/s] 89%|████████▉ | 331401/371472 [4:12:06<3:12:57, 3.46it/s] 89%|████████▉ | 331402/371472 [4:12:06<3:05:53, 3.59it/s] 89%|████████▉ | 331403/371472 [4:12:07<3:01:04, 3.69it/s] 89%|████████▉ | 331404/371472 [4:12:07<2:59:56, 3.71it/s] 89%|████████▉ | 331405/371472 [4:12:07<2:58:23, 3.74it/s] 89%|████████▉ | 331406/371472 [4:12:07<3:00:03, 3.71it/s] 89%|████████▉ | 331407/371472 [4:12:08<3:09:43, 3.52it/s] 89%|████████▉ | 331408/371472 [4:12:08<3:05:00, 3.61it/s] 89%|████████▉ | 331409/371472 [4:12:08<3:03:34, 3.64it/s] 89%|████████▉ | 331410/371472 [4:12:09<3:01:57, 3.67it/s] 89%|████████▉ | 331411/371472 [4:12:09<3:08:52, 3.54it/s] 89%|████████▉ | 331412/371472 [4:12:09<3:13:03, 3.46it/s] 89%|████████▉ | 331413/371472 [4:12:09<3:10:02, 3.51it/s] 89%|████████▉ | 331414/371472 [4:12:10<3:05:30, 3.60it/s] 89%|████████▉ | 331415/371472 [4:12:10<3:15:24, 3.42it/s] 89%|████████▉ | 331416/371472 [4:12:10<3:10:53, 3.50it/s] 89%|████████▉ | 331417/371472 [4:12:11<3:24:19, 3.27it/s] 89%|████████▉ | 331418/371472 [4:12:11<3:16:10, 3.40it/s] 89%|████████▉ | 331419/371472 [4:12:11<3:08:42, 3.54it/s] 89%|████████▉ | 331420/371472 [4:12:11<3:08:30, 3.54it/s] {'loss': 2.7991, 'learning_rate': 1.9709000409403346e-07, 'epoch': 14.27} + 89%|████████▉ | 331420/371472 [4:12:11<3:08:30, 3.54it/s] 89%|████████▉ | 331421/371472 [4:12:12<3:04:50, 3.61it/s] 89%|████████▉ | 331422/371472 [4:12:12<3:04:46, 3.61it/s] 89%|████████▉ | 331423/371472 [4:12:12<3:22:05, 3.30it/s] 89%|████████▉ | 331424/371472 [4:12:13<3:17:18, 3.38it/s] 89%|████████▉ | 331425/371472 [4:12:13<3:06:28, 3.58it/s] 89%|████████▉ | 331426/371472 [4:12:13<3:21:47, 3.31it/s] 89%|████████▉ | 331427/371472 [4:12:14<3:16:00, 3.41it/s] 89%|████████▉ | 331428/371472 [4:12:14<3:09:38, 3.52it/s] 89%|████████▉ | 331429/371472 [4:12:14<3:02:26, 3.66it/s] 89%|████████▉ | 331430/371472 [4:12:14<3:07:27, 3.56it/s] 89%|████████▉ | 331431/371472 [4:12:15<3:08:05, 3.55it/s] 89%|████████▉ | 331432/371472 [4:12:15<3:04:00, 3.63it/s] 89%|████████▉ | 331433/371472 [4:12:15<3:01:55, 3.67it/s] 89%|████████▉ | 331434/371472 [4:12:15<3:00:51, 3.69it/s] 89%|████████▉ | 331435/371472 [4:12:16<3:01:26, 3.68it/s] 89%|████████▉ | 331436/371472 [4:12:16<3:10:55, 3.50it/s] 89%|████████▉ | 331437/371472 [4:12:16<3:15:02, 3.42it/s] 89%|████████▉ | 331438/371472 [4:12:17<3:13:54, 3.44it/s] 89%|████████▉ | 331439/371472 [4:12:17<3:13:35, 3.45it/s] 89%|████████▉ | 331440/371472 [4:12:17<3:18:09, 3.37it/s] {'loss': 2.5004, 'learning_rate': 1.9704152211855463e-07, 'epoch': 14.28} + 89%|████████▉ | 331440/371472 [4:12:17<3:18:09, 3.37it/s] 89%|████████▉ | 331441/371472 [4:12:17<3:20:20, 3.33it/s] 89%|████████▉ | 331442/371472 [4:12:18<3:18:20, 3.36it/s] 89%|████████▉ | 331443/371472 [4:12:18<3:12:42, 3.46it/s] 89%|████████▉ | 331444/371472 [4:12:18<3:05:43, 3.59it/s] 89%|████████▉ | 331445/371472 [4:12:19<3:04:11, 3.62it/s] 89%|████████▉ | 331446/371472 [4:12:19<3:07:54, 3.55it/s] 89%|████████▉ | 331447/371472 [4:12:19<3:02:50, 3.65it/s] 89%|████████▉ | 331448/371472 [4:12:19<3:17:08, 3.38it/s] 89%|████████▉ | 331449/371472 [4:12:20<3:13:53, 3.44it/s] 89%|████████▉ | 331450/371472 [4:12:20<3:07:30, 3.56it/s] 89%|████████▉ | 331451/371472 [4:12:20<3:08:26, 3.54it/s] 89%|████████▉ | 331452/371472 [4:12:21<3:12:34, 3.46it/s] 89%|████████▉ | 331453/371472 [4:12:21<3:08:27, 3.54it/s] 89%|████████▉ | 331454/371472 [4:12:21<3:01:17, 3.68it/s] 89%|████████▉ | 331455/371472 [4:12:21<3:01:02, 3.68it/s] 89%|████████▉ | 331456/371472 [4:12:22<3:03:58, 3.63it/s] 89%|████████▉ | 331457/371472 [4:12:22<2:59:52, 3.71it/s] 89%|████████▉ | 331458/371472 [4:12:22<3:05:38, 3.59it/s] 89%|████████▉ | 331459/371472 [4:12:22<3:01:58, 3.66it/s] 89%|████████▉ | 331460/371472 [4:12:23<3:09:39, 3.52it/s] {'loss': 2.5922, 'learning_rate': 1.9699304014307568e-07, 'epoch': 14.28} + 89%|████████▉ | 331460/371472 [4:12:23<3:09:39, 3.52it/s] 89%|████████▉ | 331461/371472 [4:12:23<3:27:48, 3.21it/s] 89%|████████▉ | 331462/371472 [4:12:23<3:21:16, 3.31it/s] 89%|████████▉ | 331463/371472 [4:12:24<3:26:34, 3.23it/s] 89%|████████▉ | 331464/371472 [4:12:24<3:18:01, 3.37it/s] 89%|████████▉ | 331465/371472 [4:12:24<3:15:08, 3.42it/s] 89%|████████▉ | 331466/371472 [4:12:25<3:07:02, 3.56it/s] 89%|████████▉ | 331467/371472 [4:12:25<3:07:57, 3.55it/s] 89%|████████▉ | 331468/371472 [4:12:25<3:06:26, 3.58it/s] 89%|████████▉ | 331469/371472 [4:12:25<3:04:40, 3.61it/s] 89%|████████▉ | 331470/371472 [4:12:26<3:10:42, 3.50it/s] 89%|████████▉ | 331471/371472 [4:12:26<3:07:06, 3.56it/s] 89%|████████▉ | 331472/371472 [4:12:26<3:07:18, 3.56it/s] 89%|████████▉ | 331473/371472 [4:12:27<3:06:26, 3.58it/s] 89%|████████▉ | 331474/371472 [4:12:27<3:00:45, 3.69it/s] 89%|████████▉ | 331475/371472 [4:12:27<2:59:01, 3.72it/s] 89%|████████▉ | 331476/371472 [4:12:27<3:03:23, 3.63it/s] 89%|████████▉ | 331477/371472 [4:12:28<2:59:06, 3.72it/s] 89%|████████▉ | 331478/371472 [4:12:28<3:03:05, 3.64it/s] 89%|████████▉ | 331479/371472 [4:12:28<3:00:11, 3.70it/s] 89%|████████▉ | 331480/371472 [4:12:28<2:59:59, 3.70it/s] {'loss': 2.7885, 'learning_rate': 1.9694455816759683e-07, 'epoch': 14.28} + 89%|████████▉ | 331480/371472 [4:12:28<2:59:59, 3.70it/s] 89%|████████▉ | 331481/371472 [4:12:29<3:03:28, 3.63it/s] 89%|████████▉ | 331482/371472 [4:12:29<3:04:06, 3.62it/s] 89%|████████▉ | 331483/371472 [4:12:29<3:03:24, 3.63it/s] 89%|████████▉ | 331484/371472 [4:12:30<3:01:06, 3.68it/s] 89%|████████▉ | 331485/371472 [4:12:30<3:25:20, 3.25it/s] 89%|████████▉ | 331486/371472 [4:12:30<3:16:07, 3.40it/s] 89%|████████▉ | 331487/371472 [4:12:30<3:09:42, 3.51it/s] 89%|████████▉ | 331488/371472 [4:12:31<3:12:39, 3.46it/s] 89%|████████▉ | 331489/371472 [4:12:31<3:14:09, 3.43it/s] 89%|████████▉ | 331490/371472 [4:12:31<3:11:44, 3.48it/s] 89%|████████▉ | 331491/371472 [4:12:32<3:18:49, 3.35it/s] 89%|████████▉ | 331492/371472 [4:12:32<3:23:11, 3.28it/s] 89%|████████▉ | 331493/371472 [4:12:32<3:38:22, 3.05it/s] 89%|████████▉ | 331494/371472 [4:12:33<3:25:25, 3.24it/s] 89%|████████▉ | 331495/371472 [4:12:33<3:34:30, 3.11it/s] 89%|████████▉ | 331496/371472 [4:12:33<3:21:44, 3.30it/s] 89%|████████▉ | 331497/371472 [4:12:34<3:42:36, 2.99it/s] 89%|████████▉ | 331498/371472 [4:12:34<3:37:09, 3.07it/s] 89%|████████▉ | 331499/371472 [4:12:34<3:33:05, 3.13it/s] 89%|████████▉ | 331500/371472 [4:12:35<3:24:21, 3.26it/s] {'loss': 2.412, 'learning_rate': 1.968960761921179e-07, 'epoch': 14.28} + 89%|████████▉ | 331500/371472 [4:12:35<3:24:21, 3.26it/s] 89%|████████▉ | 331501/371472 [4:12:35<3:19:59, 3.33it/s] 89%|████████▉ | 331502/371472 [4:12:35<3:17:58, 3.36it/s] 89%|████████▉ | 331503/371472 [4:12:35<3:13:33, 3.44it/s] 89%|████████▉ | 331504/371472 [4:12:36<3:14:50, 3.42it/s] 89%|████████▉ | 331505/371472 [4:12:36<3:07:11, 3.56it/s] 89%|████████▉ | 331506/371472 [4:12:36<3:16:52, 3.38it/s] 89%|████████▉ | 331507/371472 [4:12:37<3:14:32, 3.42it/s] 89%|████████▉ | 331508/371472 [4:12:37<3:08:52, 3.53it/s] 89%|████████▉ | 331509/371472 [4:12:37<3:16:10, 3.40it/s] 89%|████████▉ | 331510/371472 [4:12:37<3:15:47, 3.40it/s] 89%|████████▉ | 331511/371472 [4:12:38<3:11:44, 3.47it/s] 89%|████████▉ | 331512/371472 [4:12:38<3:03:27, 3.63it/s] 89%|████████▉ | 331513/371472 [4:12:38<3:16:45, 3.38it/s] 89%|████████▉ | 331514/371472 [4:12:39<3:13:35, 3.44it/s] 89%|████████▉ | 331515/371472 [4:12:39<3:09:17, 3.52it/s] 89%|████████▉ | 331516/371472 [4:12:39<3:09:04, 3.52it/s] 89%|████████▉ | 331517/371472 [4:12:39<3:13:51, 3.43it/s] 89%|████████▉ | 331518/371472 [4:12:40<3:08:39, 3.53it/s] 89%|████████▉ | 331519/371472 [4:12:40<3:07:53, 3.54it/s] 89%|████████▉ | 331520/371472 [4:12:40<3:03:58, 3.62it/s] {'loss': 2.6446, 'learning_rate': 1.9684759421663897e-07, 'epoch': 14.28} + 89%|████████▉ | 331520/371472 [4:12:40<3:03:58, 3.62it/s] 89%|████████▉ | 331521/371472 [4:12:40<3:02:42, 3.64it/s] 89%|████████▉ | 331522/371472 [4:12:41<2:58:53, 3.72it/s] 89%|████████▉ | 331523/371472 [4:12:41<3:00:16, 3.69it/s] 89%|████████▉ | 331524/371472 [4:12:41<3:02:05, 3.66it/s] 89%|████████▉ | 331525/371472 [4:12:42<3:03:34, 3.63it/s] 89%|████████▉ | 331526/371472 [4:12:42<3:08:45, 3.53it/s] 89%|████████▉ | 331527/371472 [4:12:42<3:07:42, 3.55it/s] 89%|████████▉ | 331528/371472 [4:12:42<3:01:15, 3.67it/s] 89%|████████▉ | 331529/371472 [4:12:43<3:08:14, 3.54it/s] 89%|████████▉ | 331530/371472 [4:12:43<3:09:54, 3.51it/s] 89%|████████▉ | 331531/371472 [4:12:43<3:07:23, 3.55it/s] 89%|████████▉ | 331532/371472 [4:12:44<3:05:13, 3.59it/s] 89%|████████▉ | 331533/371472 [4:12:44<3:06:09, 3.58it/s] 89%|████████▉ | 331534/371472 [4:12:44<3:28:39, 3.19it/s] 89%|████████▉ | 331535/371472 [4:12:44<3:20:50, 3.31it/s] 89%|████████▉ | 331536/371472 [4:12:45<3:17:09, 3.38it/s] 89%|████████▉ | 331537/371472 [4:12:45<3:19:10, 3.34it/s] 89%|████████▉ | 331538/371472 [4:12:45<3:15:04, 3.41it/s] 89%|████████▉ | 331539/371472 [4:12:46<3:13:17, 3.44it/s] 89%|████████▉ | 331540/371472 [4:12:46<3:19:52, 3.33it/s] {'loss': 2.5314, 'learning_rate': 1.967991122411601e-07, 'epoch': 14.28} + 89%|████████▉ | 331540/371472 [4:12:46<3:19:52, 3.33it/s] 89%|████████▉ | 331541/371472 [4:12:46<3:26:44, 3.22it/s] 89%|████████▉ | 331542/371472 [4:12:47<3:19:00, 3.34it/s] 89%|████████▉ | 331543/371472 [4:12:47<3:15:51, 3.40it/s] 89%|████████▉ | 331544/371472 [4:12:47<3:19:29, 3.34it/s] 89%|████████▉ | 331545/371472 [4:12:47<3:18:26, 3.35it/s] 89%|████████▉ | 331546/371472 [4:12:48<3:13:45, 3.43it/s] 89%|████████▉ | 331547/371472 [4:12:48<3:08:15, 3.53it/s] 89%|████████▉ | 331548/371472 [4:12:48<3:09:12, 3.52it/s] 89%|████████▉ | 331549/371472 [4:12:49<3:09:15, 3.52it/s] 89%|████████▉ | 331550/371472 [4:12:49<3:17:45, 3.36it/s] 89%|████████▉ | 331551/371472 [4:12:49<3:21:45, 3.30it/s] 89%|████████▉ | 331552/371472 [4:12:49<3:12:49, 3.45it/s] 89%|████████▉ | 331553/371472 [4:12:50<3:14:55, 3.41it/s] 89%|████████▉ | 331554/371472 [4:12:50<3:06:48, 3.56it/s] 89%|████████▉ | 331555/371472 [4:12:50<3:02:53, 3.64it/s] 89%|████████▉ | 331556/371472 [4:12:51<3:05:08, 3.59it/s] 89%|████████▉ | 331557/371472 [4:12:51<3:05:48, 3.58it/s] 89%|████████▉ | 331558/371472 [4:12:51<3:09:04, 3.52it/s] 89%|████████▉ | 331559/371472 [4:12:51<2:59:39, 3.70it/s] 89%|████████▉ | 331560/371472 [4:12:52<3:03:36, 3.62it/s] {'loss': 2.6604, 'learning_rate': 1.9675063026568117e-07, 'epoch': 14.28} + 89%|████████▉ | 331560/371472 [4:12:52<3:03:36, 3.62it/s] 89%|████████▉ | 331561/371472 [4:12:52<3:01:55, 3.66it/s] 89%|████████▉ | 331562/371472 [4:12:52<3:00:03, 3.69it/s] 89%|████████▉ | 331563/371472 [4:12:53<3:03:38, 3.62it/s] 89%|████████▉ | 331564/371472 [4:12:53<3:01:30, 3.66it/s] 89%|████████▉ | 331565/371472 [4:12:53<3:02:09, 3.65it/s] 89%|████████▉ | 331566/371472 [4:12:53<3:15:41, 3.40it/s] 89%|████████▉ | 331567/371472 [4:12:54<3:12:25, 3.46it/s] 89%|████████▉ | 331568/371472 [4:12:54<3:11:54, 3.47it/s] 89%|████████▉ | 331569/371472 [4:12:54<3:07:51, 3.54it/s] 89%|████████▉ | 331570/371472 [4:12:55<3:12:19, 3.46it/s] 89%|████████▉ | 331571/371472 [4:12:55<3:15:00, 3.41it/s] 89%|████████▉ | 331572/371472 [4:12:55<3:14:43, 3.42it/s] 89%|████████▉ | 331573/371472 [4:12:55<3:17:58, 3.36it/s] 89%|████████▉ | 331574/371472 [4:12:56<3:17:25, 3.37it/s] 89%|████████▉ | 331575/371472 [4:12:56<3:28:25, 3.19it/s] 89%|████████▉ | 331576/371472 [4:12:56<3:20:26, 3.32it/s] 89%|████████▉ | 331577/371472 [4:12:57<3:41:12, 3.01it/s] 89%|████████▉ | 331578/371472 [4:12:57<3:30:26, 3.16it/s] 89%|████████▉ | 331579/371472 [4:12:57<3:27:29, 3.20it/s] 89%|████████▉ | 331580/371472 [4:12:58<3:29:33, 3.17it/s] {'loss': 2.5079, 'learning_rate': 1.9670214829020235e-07, 'epoch': 14.28} + 89%|████████▉ | 331580/371472 [4:12:58<3:29:33, 3.17it/s] 89%|████████▉ | 331581/371472 [4:12:58<3:23:18, 3.27it/s] 89%|████████▉ | 331582/371472 [4:12:58<3:31:45, 3.14it/s] 89%|████████▉ | 331583/371472 [4:12:59<3:28:31, 3.19it/s] 89%|████████▉ | 331584/371472 [4:12:59<3:21:45, 3.29it/s] 89%|████████▉ | 331585/371472 [4:12:59<3:34:07, 3.10it/s] 89%|████████▉ | 331586/371472 [4:12:59<3:20:55, 3.31it/s] 89%|████████▉ | 331587/371472 [4:13:00<3:10:33, 3.49it/s] 89%|████████▉ | 331588/371472 [4:13:00<3:06:34, 3.56it/s] 89%|████████▉ | 331589/371472 [4:13:00<3:04:18, 3.61it/s] 89%|████████▉ | 331590/371472 [4:13:01<3:02:07, 3.65it/s] 89%|████████▉ | 331591/371472 [4:13:01<3:06:56, 3.56it/s] 89%|████████▉ | 331592/371472 [4:13:01<3:02:46, 3.64it/s] 89%|████████▉ | 331593/371472 [4:13:01<3:04:00, 3.61it/s] 89%|████████▉ | 331594/371472 [4:13:02<3:05:07, 3.59it/s] 89%|████████▉ | 331595/371472 [4:13:02<3:10:23, 3.49it/s] 89%|████████▉ | 331596/371472 [4:13:02<3:10:57, 3.48it/s] 89%|████████▉ | 331597/371472 [4:13:03<3:13:07, 3.44it/s] 89%|████████▉ | 331598/371472 [4:13:03<3:09:10, 3.51it/s] 89%|████████▉ | 331599/371472 [4:13:03<3:07:48, 3.54it/s] 89%|████████▉ | 331600/371472 [4:13:03<3:09:01, 3.52it/s] {'loss': 2.5743, 'learning_rate': 1.966536663147234e-07, 'epoch': 14.28} + 89%|████████▉ | 331600/371472 [4:13:03<3:09:01, 3.52it/s] 89%|████████▉ | 331601/371472 [4:13:04<3:15:09, 3.40it/s] 89%|████████▉ | 331602/371472 [4:13:04<3:13:40, 3.43it/s] 89%|████████▉ | 331603/371472 [4:13:04<3:06:56, 3.55it/s] 89%|████████▉ | 331604/371472 [4:13:05<3:03:44, 3.62it/s] 89%|████████▉ | 331605/371472 [4:13:05<2:57:19, 3.75it/s] 89%|████████▉ | 331606/371472 [4:13:05<2:57:34, 3.74it/s] 89%|████████▉ | 331607/371472 [4:13:05<3:09:22, 3.51it/s] 89%|████████▉ | 331608/371472 [4:13:06<3:15:07, 3.41it/s] 89%|████████▉ | 331609/371472 [4:13:06<3:10:47, 3.48it/s] 89%|████████▉ | 331610/371472 [4:13:06<3:13:01, 3.44it/s] 89%|████████▉ | 331611/371472 [4:13:07<3:15:22, 3.40it/s] 89%|████████▉ | 331612/371472 [4:13:07<3:12:45, 3.45it/s] 89%|████████▉ | 331613/371472 [4:13:07<3:20:12, 3.32it/s] 89%|████████▉ | 331614/371472 [4:13:07<3:08:37, 3.52it/s] 89%|████████▉ | 331615/371472 [4:13:08<3:13:54, 3.43it/s] 89%|████████▉ | 331616/371472 [4:13:08<3:10:40, 3.48it/s] 89%|████████▉ | 331617/371472 [4:13:08<3:21:34, 3.30it/s] 89%|████████▉ | 331618/371472 [4:13:09<3:11:47, 3.46it/s] 89%|████████▉ | 331619/371472 [4:13:09<3:06:21, 3.56it/s] 89%|████████▉ | 331620/371472 [4:13:09<3:01:00, 3.67it/s] {'loss': 2.7382, 'learning_rate': 1.9660518433924452e-07, 'epoch': 14.28} + 89%|████████▉ | 331620/371472 [4:13:09<3:01:00, 3.67it/s] 89%|████████▉ | 331621/371472 [4:13:09<3:07:56, 3.53it/s] 89%|████████▉ | 331622/371472 [4:13:10<3:00:02, 3.69it/s] 89%|████████▉ | 331623/371472 [4:13:10<2:59:55, 3.69it/s] 89%|████████▉ | 331624/371472 [4:13:10<3:03:36, 3.62it/s] 89%|████████▉ | 331625/371472 [4:13:11<3:13:02, 3.44it/s] 89%|████████▉ | 331626/371472 [4:13:11<3:12:12, 3.46it/s] 89%|████████▉ | 331627/371472 [4:13:11<3:05:36, 3.58it/s] 89%|████████▉ | 331628/371472 [4:13:11<3:06:38, 3.56it/s] 89%|████████▉ | 331629/371472 [4:13:12<3:20:46, 3.31it/s] 89%|████████▉ | 331630/371472 [4:13:12<3:15:28, 3.40it/s] 89%|████████▉ | 331631/371472 [4:13:12<3:15:19, 3.40it/s] 89%|████████▉ | 331632/371472 [4:13:13<3:15:23, 3.40it/s] 89%|████████▉ | 331633/371472 [4:13:13<3:12:45, 3.44it/s] 89%|████████▉ | 331634/371472 [4:13:13<3:06:46, 3.55it/s] 89%|████████▉ | 331635/371472 [4:13:13<3:13:39, 3.43it/s] 89%|████████▉ | 331636/371472 [4:13:14<3:12:05, 3.46it/s] 89%|████████▉ | 331637/371472 [4:13:14<3:06:53, 3.55it/s] 89%|████████▉ | 331638/371472 [4:13:14<3:05:48, 3.57it/s] 89%|████████▉ | 331639/371472 [4:13:15<3:10:35, 3.48it/s] 89%|████████▉ | 331640/371472 [4:13:15<3:21:27, 3.30it/s] {'loss': 2.7292, 'learning_rate': 1.9655670236376561e-07, 'epoch': 14.28} + 89%|████████▉ | 331640/371472 [4:13:15<3:21:27, 3.30it/s] 89%|████████▉ | 331641/371472 [4:13:15<3:12:11, 3.45it/s] 89%|████████▉ | 331642/371472 [4:13:15<3:08:15, 3.53it/s] 89%|████████▉ | 331643/371472 [4:13:16<3:07:25, 3.54it/s] 89%|████████▉ | 331644/371472 [4:13:16<3:05:53, 3.57it/s] 89%|████████▉ | 331645/371472 [4:13:16<3:11:07, 3.47it/s] 89%|████████▉ | 331646/371472 [4:13:17<3:06:44, 3.55it/s] 89%|████████▉ | 331647/371472 [4:13:17<3:05:18, 3.58it/s] 89%|████████▉ | 331648/371472 [4:13:17<3:06:26, 3.56it/s] 89%|████████▉ | 331649/371472 [4:13:17<3:12:16, 3.45it/s] 89%|████████▉ | 331650/371472 [4:13:18<3:04:51, 3.59it/s] 89%|████████▉ | 331651/371472 [4:13:18<3:15:15, 3.40it/s] 89%|████████▉ | 331652/371472 [4:13:18<3:20:42, 3.31it/s] 89%|████████▉ | 331653/371472 [4:13:19<3:14:16, 3.42it/s] 89%|████████▉ | 331654/371472 [4:13:19<3:09:58, 3.49it/s] 89%|████████▉ | 331655/371472 [4:13:19<3:05:52, 3.57it/s] 89%|████████▉ | 331656/371472 [4:13:19<3:02:15, 3.64it/s] 89%|████████▉ | 331657/371472 [4:13:20<2:59:31, 3.70it/s] 89%|████████▉ | 331658/371472 [4:13:20<3:02:46, 3.63it/s] 89%|████████▉ | 331659/371472 [4:13:20<3:13:12, 3.43it/s] 89%|████████▉ | 331660/371472 [4:13:21<3:12:39, 3.44it/s] {'loss': 2.5718, 'learning_rate': 1.9650822038828676e-07, 'epoch': 14.29} + 89%|████████▉ | 331660/371472 [4:13:21<3:12:39, 3.44it/s] 89%|████████▉ | 331661/371472 [4:13:21<3:04:19, 3.60it/s] 89%|████████▉ | 331662/371472 [4:13:21<3:09:04, 3.51it/s] 89%|████████▉ | 331663/371472 [4:13:21<3:09:44, 3.50it/s] 89%|████████▉ | 331664/371472 [4:13:22<3:04:28, 3.60it/s] 89%|████████▉ | 331665/371472 [4:13:22<3:06:44, 3.55it/s] 89%|████████▉ | 331666/371472 [4:13:22<3:05:44, 3.57it/s] 89%|████████▉ | 331667/371472 [4:13:23<3:20:19, 3.31it/s] 89%|████████▉ | 331668/371472 [4:13:23<3:30:15, 3.16it/s] 89%|████████▉ | 331669/371472 [4:13:23<3:18:57, 3.33it/s] 89%|████████▉ | 331670/371472 [4:13:23<3:15:34, 3.39it/s] 89%|████████▉ | 331671/371472 [4:13:24<3:24:46, 3.24it/s] 89%|████████▉ | 331672/371472 [4:13:24<3:26:36, 3.21it/s] 89%|████████▉ | 331673/371472 [4:13:24<3:33:46, 3.10it/s] 89%|█████��██▉ | 331674/371472 [4:13:25<3:23:32, 3.26it/s] 89%|████████▉ | 331675/371472 [4:13:25<3:17:23, 3.36it/s] 89%|████████▉ | 331676/371472 [4:13:25<3:09:20, 3.50it/s] 89%|████████▉ | 331677/371472 [4:13:26<3:20:01, 3.32it/s] 89%|████████▉ | 331678/371472 [4:13:26<3:36:28, 3.06it/s] 89%|████████▉ | 331679/371472 [4:13:26<3:26:04, 3.22it/s] 89%|████████▉ | 331680/371472 [4:13:27<3:24:54, 3.24it/s] {'loss': 2.4583, 'learning_rate': 1.964597384128078e-07, 'epoch': 14.29} + 89%|████████▉ | 331680/371472 [4:13:27<3:24:54, 3.24it/s] 89%|████████▉ | 331681/371472 [4:13:27<3:23:30, 3.26it/s] 89%|████████▉ | 331682/371472 [4:13:27<3:12:01, 3.45it/s] 89%|████████▉ | 331683/371472 [4:13:28<3:36:07, 3.07it/s] 89%|████████▉ | 331684/371472 [4:13:28<3:57:06, 2.80it/s] 89%|████████▉ | 331685/371472 [4:13:28<3:36:47, 3.06it/s] 89%|████████▉ | 331686/371472 [4:13:29<3:30:26, 3.15it/s] 89%|████████▉ | 331687/371472 [4:13:29<3:38:40, 3.03it/s] 89%|████████▉ | 331688/371472 [4:13:29<3:23:03, 3.27it/s] 89%|████████▉ | 331689/371472 [4:13:29<3:15:42, 3.39it/s] 89%|████████▉ | 331690/371472 [4:13:30<3:09:47, 3.49it/s] 89%|████████▉ | 331691/371472 [4:13:30<3:13:52, 3.42it/s] 89%|████████▉ | 331692/371472 [4:13:30<3:09:39, 3.50it/s] 89%|████████▉ | 331693/371472 [4:13:31<3:03:50, 3.61it/s] 89%|████████▉ | 331694/371472 [4:13:31<3:15:20, 3.39it/s] 89%|████████▉ | 331695/371472 [4:13:31<3:15:01, 3.40it/s] 89%|████████▉ | 331696/371472 [4:13:31<3:10:54, 3.47it/s] 89%|████████▉ | 331697/371472 [4:13:32<3:08:47, 3.51it/s] 89%|████████▉ | 331698/371472 [4:13:32<3:22:27, 3.27it/s] 89%|████████▉ | 331699/371472 [4:13:32<3:19:13, 3.33it/s] 89%|████████▉ | 331700/371472 [4:13:33<3:10:52, 3.47it/s] {'loss': 2.6099, 'learning_rate': 1.96411256437329e-07, 'epoch': 14.29} + 89%|████████▉ | 331700/371472 [4:13:33<3:10:52, 3.47it/s] 89%|████████▉ | 331701/371472 [4:13:33<3:04:33, 3.59it/s] 89%|████████▉ | 331702/371472 [4:13:33<3:04:31, 3.59it/s] 89%|████████▉ | 331703/371472 [4:13:33<3:10:47, 3.47it/s] 89%|████████▉ | 331704/371472 [4:13:34<3:06:55, 3.55it/s] 89%|████████▉ | 331705/371472 [4:13:34<3:21:56, 3.28it/s] 89%|████████▉ | 331706/371472 [4:13:34<3:18:21, 3.34it/s] 89%|████████▉ | 331707/371472 [4:13:35<3:20:04, 3.31it/s] 89%|████████▉ | 331708/371472 [4:13:35<3:21:09, 3.29it/s] 89%|████████▉ | 331709/371472 [4:13:35<3:17:23, 3.36it/s] 89%|████████▉ | 331710/371472 [4:13:36<3:20:17, 3.31it/s] 89%|████████▉ | 331711/371472 [4:13:36<3:20:47, 3.30it/s] 89%|████████▉ | 331712/371472 [4:13:36<3:19:59, 3.31it/s] 89%|████████▉ | 331713/371472 [4:13:36<3:15:23, 3.39it/s] 89%|████████▉ | 331714/371472 [4:13:37<3:07:58, 3.53it/s] 89%|████████▉ | 331715/371472 [4:13:37<3:05:40, 3.57it/s] 89%|████████▉ | 331716/371472 [4:13:37<3:00:25, 3.67it/s] 89%|████████▉ | 331717/371472 [4:13:38<3:05:03, 3.58it/s] 89%|████████▉ | 331718/371472 [4:13:38<3:03:47, 3.60it/s] 89%|████████▉ | 331719/371472 [4:13:38<2:58:19, 3.72it/s] 89%|████████▉ | 331720/371472 [4:13:38<2:57:32, 3.73it/s] {'loss': 2.519, 'learning_rate': 1.9636277446185003e-07, 'epoch': 14.29} + 89%|████████▉ | 331720/371472 [4:13:38<2:57:32, 3.73it/s] 89%|████████▉ | 331721/371472 [4:13:39<3:10:51, 3.47it/s] 89%|████████▉ | 331722/371472 [4:13:39<3:29:55, 3.16it/s] 89%|████████▉ | 331723/371472 [4:13:39<3:24:27, 3.24it/s] 89%|████████▉ | 331724/371472 [4:13:40<3:20:23, 3.31it/s] 89%|████████▉ | 331725/371472 [4:13:40<3:17:07, 3.36it/s] 89%|████████▉ | 331726/371472 [4:13:40<3:10:55, 3.47it/s] 89%|████████▉ | 331727/371472 [4:13:40<3:15:58, 3.38it/s] 89%|████████▉ | 331728/371472 [4:13:41<3:11:56, 3.45it/s] 89%|████████▉ | 331729/371472 [4:13:41<3:07:10, 3.54it/s] 89%|████████▉ | 331730/371472 [4:13:41<3:04:29, 3.59it/s] 89%|████████▉ | 331731/371472 [4:13:42<2:59:42, 3.69it/s] 89%|████████▉ | 331732/371472 [4:13:42<2:58:20, 3.71it/s] 89%|████████▉ | 331733/371472 [4:13:42<2:55:26, 3.78it/s] 89%|████████▉ | 331734/371472 [4:13:42<2:55:37, 3.77it/s] 89%|████████▉ | 331735/371472 [4:13:43<2:58:53, 3.70it/s] 89%|████████▉ | 331736/371472 [4:13:43<3:19:16, 3.32it/s] 89%|████████▉ | 331737/371472 [4:13:43<3:13:39, 3.42it/s] 89%|████████▉ | 331738/371472 [4:13:44<3:09:56, 3.49it/s] 89%|████████▉ | 331739/371472 [4:13:44<3:23:58, 3.25it/s] 89%|████████▉ | 331740/371472 [4:13:44<3:23:11, 3.26it/s] {'loss': 2.6473, 'learning_rate': 1.9631429248637118e-07, 'epoch': 14.29} + 89%|████████▉ | 331740/371472 [4:13:44<3:23:11, 3.26it/s] 89%|████████▉ | 331741/371472 [4:13:44<3:12:46, 3.43it/s] 89%|████████▉ | 331742/371472 [4:13:45<3:12:19, 3.44it/s] 89%|████████▉ | 331743/371472 [4:13:45<3:17:57, 3.34it/s] 89%|████████▉ | 331744/371472 [4:13:45<3:14:19, 3.41it/s] 89%|████████▉ | 331745/371472 [4:13:46<3:18:24, 3.34it/s] 89%|████████▉ | 331746/371472 [4:13:46<3:11:04, 3.47it/s] 89%|████████▉ | 331747/371472 [4:13:46<3:23:48, 3.25it/s] 89%|████████▉ | 331748/371472 [4:13:47<3:18:17, 3.34it/s] 89%|████████▉ | 331749/371472 [4:13:47<3:14:33, 3.40it/s] 89%|████████▉ | 331750/371472 [4:13:47<3:09:28, 3.49it/s] 89%|████████▉ | 331751/371472 [4:13:47<3:03:40, 3.60it/s] 89%|████████▉ | 331752/371472 [4:13:48<3:02:14, 3.63it/s] 89%|████████▉ | 331753/371472 [4:13:48<3:08:43, 3.51it/s] 89%|████████▉ | 331754/371472 [4:13:48<3:04:59, 3.58it/s] 89%|████████▉ | 331755/371472 [4:13:48<2:58:26, 3.71it/s] 89%|████████▉ | 331756/371472 [4:13:49<3:52:31, 2.85it/s] 89%|████████▉ | 331757/371472 [4:13:49<3:52:56, 2.84it/s] 89%|████████▉ | 331758/371472 [4:13:50<4:02:43, 2.73it/s] 89%|████████▉ | 331759/371472 [4:13:50<3:45:59, 2.93it/s] 89%|████████▉ | 331760/371472 [4:13:50<3:33:00, 3.11it/s] {'loss': 2.5738, 'learning_rate': 1.9626581051089225e-07, 'epoch': 14.29} + 89%|████████▉ | 331760/371472 [4:13:50<3:33:00, 3.11it/s] 89%|████████▉ | 331761/371472 [4:13:51<3:17:36, 3.35it/s] 89%|████████▉ | 331762/371472 [4:13:51<3:24:19, 3.24it/s] 89%|████████▉ | 331763/371472 [4:13:51<3:14:18, 3.41it/s] 89%|████████▉ | 331764/371472 [4:13:51<3:17:54, 3.34it/s] 89%|████████▉ | 331765/371472 [4:13:52<3:21:09, 3.29it/s] 89%|████████▉ | 331766/371472 [4:13:52<3:33:07, 3.11it/s] 89%|████████▉ | 331767/371472 [4:13:52<3:26:11, 3.21it/s] 89%|████████▉ | 331768/371472 [4:13:53<3:24:37, 3.23it/s] 89%|████████▉ | 331769/371472 [4:13:53<3:17:02, 3.36it/s] 89%|████████▉ | 331770/371472 [4:13:53<3:17:14, 3.35it/s] 89%|████████▉ | 331771/371472 [4:13:54<3:14:33, 3.40it/s] 89%|████████▉ | 331772/371472 [4:13:54<3:14:18, 3.41it/s] 89%|████████▉ | 331773/371472 [4:13:54<3:16:09, 3.37it/s] 89%|████████▉ | 331774/371472 [4:13:54<3:09:15, 3.50it/s] 89%|████████▉ | 331775/371472 [4:13:55<3:02:32, 3.62it/s] 89%|████████▉ | 331776/371472 [4:13:55<3:04:11, 3.59it/s] 89%|████████▉ | 331777/371472 [4:13:55<3:02:41, 3.62it/s] 89%|████████▉ | 331778/371472 [4:13:56<3:03:34, 3.60it/s] 89%|████████▉ | 331779/371472 [4:13:56<3:21:44, 3.28it/s] 89%|████████▉ | 331780/371472 [4:13:56<3:25:09, 3.22it/s] {'loss': 2.7053, 'learning_rate': 1.962173285354134e-07, 'epoch': 14.29} + 89%|████████▉ | 331780/371472 [4:13:56<3:25:09, 3.22it/s] 89%|████████▉ | 331781/371472 [4:13:57<3:23:34, 3.25it/s] 89%|████████▉ | 331782/371472 [4:13:57<3:30:48, 3.14it/s] 89%|████████▉ | 331783/371472 [4:13:57<3:27:24, 3.19it/s] 89%|████████▉ | 331784/371472 [4:13:57<3:20:43, 3.30it/s] 89%|████████▉ | 331785/371472 [4:13:58<3:21:08, 3.29it/s] 89%|████████▉ | 331786/371472 [4:13:58<3:21:28, 3.28it/s] 89%|████████▉ | 331787/371472 [4:13:58<3:20:31, 3.30it/s] 89%|████████▉ | 331788/371472 [4:13:59<3:18:19, 3.33it/s] 89%|██��█████▉ | 331789/371472 [4:13:59<3:16:16, 3.37it/s] 89%|████████▉ | 331790/371472 [4:13:59<3:13:07, 3.42it/s] 89%|████████▉ | 331791/371472 [4:13:59<3:12:51, 3.43it/s] 89%|████████▉ | 331792/371472 [4:14:00<3:06:20, 3.55it/s] 89%|████████▉ | 331793/371472 [4:14:00<3:11:31, 3.45it/s] 89%|████████▉ | 331794/371472 [4:14:00<3:11:25, 3.45it/s] 89%|████████▉ | 331795/371472 [4:14:01<3:10:14, 3.48it/s] 89%|████████▉ | 331796/371472 [4:14:01<3:06:55, 3.54it/s] 89%|████████▉ | 331797/371472 [4:14:01<3:41:43, 2.98it/s] 89%|████████▉ | 331798/371472 [4:14:02<3:46:14, 2.92it/s] 89%|████████▉ | 331799/371472 [4:14:02<3:41:28, 2.99it/s] 89%|████████▉ | 331800/371472 [4:14:02<3:38:20, 3.03it/s] {'loss': 2.5313, 'learning_rate': 1.9616884655993445e-07, 'epoch': 14.29} + 89%|████████▉ | 331800/371472 [4:14:02<3:38:20, 3.03it/s] 89%|████████▉ | 331801/371472 [4:14:03<3:23:05, 3.26it/s] 89%|████████▉ | 331802/371472 [4:14:03<3:19:53, 3.31it/s] 89%|████████▉ | 331803/371472 [4:14:03<3:37:23, 3.04it/s] 89%|████████▉ | 331804/371472 [4:14:04<3:36:52, 3.05it/s] 89%|████████▉ | 331805/371472 [4:14:04<3:34:46, 3.08it/s] 89%|████████▉ | 331806/371472 [4:14:04<3:24:15, 3.24it/s] 89%|████████▉ | 331807/371472 [4:14:04<3:17:27, 3.35it/s] 89%|████████▉ | 331808/371472 [4:14:05<3:11:39, 3.45it/s] 89%|████████▉ | 331809/371472 [4:14:05<3:08:25, 3.51it/s] 89%|████████▉ | 331810/371472 [4:14:05<2:59:54, 3.67it/s] 89%|████████▉ | 331811/371472 [4:14:06<2:58:03, 3.71it/s] 89%|████████▉ | 331812/371472 [4:14:06<2:59:34, 3.68it/s] 89%|████████▉ | 331813/371472 [4:14:06<3:20:53, 3.29it/s] 89%|████████▉ | 331814/371472 [4:14:06<3:20:38, 3.29it/s] 89%|████████▉ | 331815/371472 [4:14:07<3:51:56, 2.85it/s] 89%|████████▉ | 331816/371472 [4:14:07<3:36:54, 3.05it/s] 89%|████████▉ | 331817/371472 [4:14:08<3:28:32, 3.17it/s] 89%|████████▉ | 331818/371472 [4:14:08<3:23:15, 3.25it/s] 89%|████████▉ | 331819/371472 [4:14:08<3:20:08, 3.30it/s] 89%|████████▉ | 331820/371472 [4:14:08<3:12:00, 3.44it/s] {'loss': 2.5603, 'learning_rate': 1.9612036458445563e-07, 'epoch': 14.29} + 89%|████████▉ | 331820/371472 [4:14:08<3:12:00, 3.44it/s] 89%|████████▉ | 331821/371472 [4:14:09<3:11:54, 3.44it/s] 89%|████████▉ | 331822/371472 [4:14:09<3:13:07, 3.42it/s] 89%|████████▉ | 331823/371472 [4:14:09<3:16:24, 3.36it/s] 89%|████████▉ | 331824/371472 [4:14:10<3:12:51, 3.43it/s] 89%|████████▉ | 331825/371472 [4:14:10<3:10:24, 3.47it/s] 89%|████████▉ | 331826/371472 [4:14:10<3:09:49, 3.48it/s] 89%|████████▉ | 331827/371472 [4:14:10<3:04:13, 3.59it/s] 89%|████████▉ | 331828/371472 [4:14:11<3:02:52, 3.61it/s] 89%|████████▉ | 331829/371472 [4:14:11<3:05:19, 3.57it/s] 89%|████████▉ | 331830/371472 [4:14:11<3:07:06, 3.53it/s] 89%|████████▉ | 331831/371472 [4:14:11<3:06:26, 3.54it/s] 89%|████████▉ | 331832/371472 [4:14:12<3:03:56, 3.59it/s] 89%|████████▉ | 331833/371472 [4:14:12<3:10:06, 3.48it/s] 89%|████████▉ | 331834/371472 [4:14:12<3:08:09, 3.51it/s] 89%|████████▉ | 331835/371472 [4:14:13<3:16:38, 3.36it/s] 89%|████████▉ | 331836/371472 [4:14:13<3:22:34, 3.26it/s] 89%|████████▉ | 331837/371472 [4:14:13<3:17:05, 3.35it/s] 89%|████████▉ | 331838/371472 [4:14:14<3:07:11, 3.53it/s] 89%|████████▉ | 331839/371472 [4:14:14<3:13:12, 3.42it/s] 89%|████████▉ | 331840/371472 [4:14:14<3:06:42, 3.54it/s] {'loss': 2.6287, 'learning_rate': 1.960718826089767e-07, 'epoch': 14.29} + 89%|████████▉ | 331840/371472 [4:14:14<3:06:42, 3.54it/s] 89%|████████▉ | 331841/371472 [4:14:14<3:05:41, 3.56it/s] 89%|████████▉ | 331842/371472 [4:14:15<3:05:08, 3.57it/s] 89%|████████▉ | 331843/371472 [4:14:15<3:05:45, 3.56it/s] 89%|████████▉ | 331844/371472 [4:14:15<3:03:40, 3.60it/s] 89%|████████▉ | 331845/371472 [4:14:16<3:09:55, 3.48it/s] 89%|████████▉ | 331846/371472 [4:14:16<3:08:54, 3.50it/s] 89%|████████▉ | 331847/371472 [4:14:16<3:15:34, 3.38it/s] 89%|████████▉ | 331848/371472 [4:14:16<3:10:13, 3.47it/s] 89%|████████▉ | 331849/371472 [4:14:17<3:15:44, 3.37it/s] 89%|████████▉ | 331850/371472 [4:14:17<3:21:15, 3.28it/s] 89%|████████▉ | 331851/371472 [4:14:17<3:35:12, 3.07it/s] 89%|████████▉ | 331852/371472 [4:14:18<3:23:58, 3.24it/s] 89%|████████▉ | 331853/371472 [4:14:18<3:17:12, 3.35it/s] 89%|████████▉ | 331854/371472 [4:14:18<3:16:32, 3.36it/s] 89%|████████▉ | 331855/371472 [4:14:18<3:06:25, 3.54it/s] 89%|████████▉ | 331856/371472 [4:14:19<3:07:31, 3.52it/s] 89%|████████▉ | 331857/371472 [4:14:19<3:03:19, 3.60it/s] 89%|████████▉ | 331858/371472 [4:14:19<2:59:20, 3.68it/s] 89%|████████▉ | 331859/371472 [4:14:20<3:04:15, 3.58it/s] 89%|████████▉ | 331860/371472 [4:14:20<3:05:48, 3.55it/s] {'loss': 2.7383, 'learning_rate': 1.9602340063349782e-07, 'epoch': 14.29} + 89%|████████▉ | 331860/371472 [4:14:20<3:05:48, 3.55it/s] 89%|████████▉ | 331861/371472 [4:14:20<3:05:44, 3.55it/s] 89%|████████▉ | 331862/371472 [4:14:20<3:08:46, 3.50it/s] 89%|████████▉ | 331863/371472 [4:14:21<3:04:49, 3.57it/s] 89%|████████▉ | 331864/371472 [4:14:21<3:03:18, 3.60it/s] 89%|████████▉ | 331865/371472 [4:14:21<3:07:41, 3.52it/s] 89%|████████▉ | 331866/371472 [4:14:22<3:09:48, 3.48it/s] 89%|████████▉ | 331867/371472 [4:14:22<3:03:07, 3.60it/s] 89%|████████▉ | 331868/371472 [4:14:22<3:00:16, 3.66it/s] 89%|████████▉ | 331869/371472 [4:14:22<3:06:48, 3.53it/s] 89%|████████▉ | 331870/371472 [4:14:23<3:29:58, 3.14it/s] 89%|████████▉ | 331871/371472 [4:14:23<3:22:56, 3.25it/s] 89%|████████▉ | 331872/371472 [4:14:23<3:22:15, 3.26it/s] 89%|████████▉ | 331873/371472 [4:14:24<3:16:34, 3.36it/s] 89%|████████▉ | 331874/371472 [4:14:24<3:16:48, 3.35it/s] 89%|████████▉ | 331875/371472 [4:14:24<3:12:48, 3.42it/s] 89%|████████▉ | 331876/371472 [4:14:25<3:04:58, 3.57it/s] 89%|████████▉ | 331877/371472 [4:14:25<3:03:29, 3.60it/s] 89%|████████▉ | 331878/371472 [4:14:25<3:02:11, 3.62it/s] 89%|████████▉ | 331879/371472 [4:14:25<3:05:02, 3.57it/s] 89%|████████▉ | 331880/371472 [4:14:26<3:07:42, 3.52it/s] {'loss': 2.8386, 'learning_rate': 1.959749186580189e-07, 'epoch': 14.29} + 89%|████████▉ | 331880/371472 [4:14:26<3:07:42, 3.52it/s] 89%|████████▉ | 331881/371472 [4:14:26<3:07:03, 3.53it/s] 89%|████████▉ | 331882/371472 [4:14:26<3:23:29, 3.24it/s] 89%|████████▉ | 331883/371472 [4:14:27<3:26:01, 3.20it/s] 89%|████████▉ | 331884/371472 [4:14:27<3:22:42, 3.25it/s] 89%|████████▉ | 331885/371472 [4:14:27<3:15:23, 3.38it/s] 89%|████████▉ | 331886/371472 [4:14:27<3:11:26, 3.45it/s] 89%|████████▉ | 331887/371472 [4:14:28<3:09:59, 3.47it/s] 89%|████████▉ | 331888/371472 [4:14:28<3:06:05, 3.55it/s] 89%|████████▉ | 331889/371472 [4:14:28<2:58:30, 3.70it/s] 89%|████████▉ | 331890/371472 [4:14:29<3:23:21, 3.24it/s] 89%|████████▉ | 331891/371472 [4:14:29<3:18:37, 3.32it/s] 89%|████████▉ | 331892/371472 [4:14:29<3:27:55, 3.17it/s] 89%|████████▉ | 331893/371472 [4:14:30<3:23:59, 3.23it/s] 89%|████████▉ | 331894/371472 [4:14:30<3:30:54, 3.13it/s] 89%|████████▉ | 331895/371472 [4:14:30<3:23:13, 3.25it/s] 89%|████████▉ | 331896/371472 [4:14:30<3:13:29, 3.41it/s] 89%|████████▉ | 331897/371472 [4:14:31<3:08:48, 3.49it/s] 89%|████████▉ | 331898/371472 [4:14:31<3:07:03, 3.53it/s] 89%|████████▉ | 331899/371472 [4:14:31<3:08:36, 3.50it/s] 89%|████████▉ | 331900/371472 [4:14:32<3:04:34, 3.57it/s] {'loss': 2.4935, 'learning_rate': 1.9592643668254007e-07, 'epoch': 14.3} + 89%|████████▉ | 331900/371472 [4:14:32<3:04:34, 3.57it/s] 89%|████████▉ | 331901/371472 [4:14:32<3:11:02, 3.45it/s] 89%|████████▉ | 331902/371472 [4:14:32<3:07:42, 3.51it/s] 89%|████████▉ | 331903/371472 [4:14:32<3:05:59, 3.55it/s] 89%|████████▉ | 331904/371472 [4:14:33<3:04:34, 3.57it/s] 89%|████████▉ | 331905/371472 [4:14:33<3:07:44, 3.51it/s] 89%|████████▉ | 331906/371472 [4:14:33<3:02:41, 3.61it/s] 89%|████████▉ | 331907/371472 [4:14:34<3:01:27, 3.63it/s] 89%|████████▉ | 331908/371472 [4:14:34<2:59:24, 3.68it/s] 89%|████████▉ | 331909/371472 [4:14:34<2:58:39, 3.69it/s] 89%|████████▉ | 331910/371472 [4:14:34<3:12:41, 3.42it/s] 89%|████████▉ | 331911/371472 [4:14:35<3:25:22, 3.21it/s] 89%|████████▉ | 331912/371472 [4:14:35<3:09:47, 3.47it/s] 89%|████████▉ | 331913/371472 [4:14:35<3:04:02, 3.58it/s] 89%|████████▉ | 331914/371472 [4:14:36<3:08:39, 3.49it/s] 89%|████████▉ | 331915/371472 [4:14:36<3:03:11, 3.60it/s] 89%|████████▉ | 331916/371472 [4:14:36<3:02:47, 3.61it/s] 89%|████████▉ | 331917/371472 [4:14:36<3:01:50, 3.63it/s] 89%|████████▉ | 331918/371472 [4:14:37<3:10:47, 3.46it/s] 89%|████████▉ | 331919/371472 [4:14:37<3:02:04, 3.62it/s] 89%|████████▉ | 331920/371472 [4:14:37<2:59:21, 3.68it/s] {'loss': 2.6157, 'learning_rate': 1.958779547070611e-07, 'epoch': 14.3} + 89%|████████▉ | 331920/371472 [4:14:37<2:59:21, 3.68it/s] 89%|████████▉ | 331921/371472 [4:14:37<3:04:53, 3.57it/s] 89%|████████▉ | 331922/371472 [4:14:38<2:58:53, 3.68it/s] 89%|████████▉ | 331923/371472 [4:14:38<3:08:12, 3.50it/s] 89%|████████▉ | 331924/371472 [4:14:38<3:15:19, 3.37it/s] 89%|████████▉ | 331925/371472 [4:14:39<3:12:28, 3.42it/s] 89%|████████▉ | 331926/371472 [4:14:39<3:11:19, 3.44it/s] 89%|████████▉ | 331927/371472 [4:14:39<3:04:26, 3.57it/s] 89%|████████▉ | 331928/371472 [4:14:39<3:03:57, 3.58it/s] 89%|████████▉ | 331929/371472 [4:14:40<3:14:33, 3.39it/s] 89%|████████▉ | 331930/371472 [4:14:40<3:07:15, 3.52it/s] 89%|████████▉ | 331931/371472 [4:14:40<3:05:31, 3.55it/s] 89%|████████▉ | 331932/371472 [4:14:41<3:08:36, 3.49it/s] 89%|████████▉ | 331933/371472 [4:14:41<3:07:03, 3.52it/s] 89%|████████▉ | 331934/371472 [4:14:41<3:05:53, 3.54it/s] 89%|████████▉ | 331935/371472 [4:14:41<3:03:53, 3.58it/s] 89%|████████▉ | 331936/371472 [4:14:42<3:07:59, 3.51it/s] 89%|████████▉ | 331937/371472 [4:14:42<3:06:28, 3.53it/s] 89%|████████▉ | 331938/371472 [4:14:42<3:03:43, 3.59it/s] 89%|████████▉ | 331939/371472 [4:14:43<3:04:08, 3.58it/s] 89%|████████▉ | 331940/371472 [4:14:43<3:06:53, 3.53it/s] {'loss': 2.7505, 'learning_rate': 1.9582947273158227e-07, 'epoch': 14.3} + 89%|████████▉ | 331940/371472 [4:14:43<3:06:53, 3.53it/s] 89%|████████▉ | 331941/371472 [4:14:43<3:13:29, 3.41it/s] 89%|████████▉ | 331942/371472 [4:14:43<3:10:13, 3.46it/s] 89%|████████▉ | 331943/371472 [4:14:44<3:02:57, 3.60it/s] 89%|████████▉ | 331944/371472 [4:14:44<2:58:58, 3.68it/s] 89%|████████▉ | 331945/371472 [4:14:44<3:01:25, 3.63it/s] 89%|████████▉ | 331946/371472 [4:14:45<3:05:07, 3.56it/s] 89%|████████▉ | 331947/371472 [4:14:45<3:08:58, 3.49it/s] 89%|████████▉ | 331948/371472 [4:14:45<3:06:31, 3.53it/s] 89%|████████▉ | 331949/371472 [4:14:45<2:59:21, 3.67it/s] 89%|████████▉ | 331950/371472 [4:14:46<3:18:21, 3.32it/s] 89%|████████▉ | 331951/371472 [4:14:46<3:18:19, 3.32it/s] 89%|████████▉ | 331952/371472 [4:14:46<3:14:20, 3.39it/s] 89%|████████▉ | 331953/371472 [4:14:47<3:06:49, 3.53it/s] 89%|████████▉ | 331954/371472 [4:14:47<3:10:58, 3.45it/s] 89%|████████▉ | 331955/371472 [4:14:47<3:03:59, 3.58it/s] 89%|████████▉ | 331956/371472 [4:14:47<3:08:19, 3.50it/s] 89%|████████▉ | 331957/371472 [4:14:48<3:06:43, 3.53it/s] 89%|████████▉ | 331958/371472 [4:14:48<3:08:26, 3.49it/s] 89%|████████▉ | 331959/371472 [4:14:48<3:01:05, 3.64it/s] 89%|████████▉ | 331960/371472 [4:14:49<2:57:20, 3.71it/s] {'loss': 2.6382, 'learning_rate': 1.9578099075610337e-07, 'epoch': 14.3} + 89%|████████▉ | 331960/371472 [4:14:49<2:57:20, 3.71it/s] 89%|████████▉ | 331961/371472 [4:14:49<3:07:20, 3.52it/s] 89%|████████▉ | 331962/371472 [4:14:49<3:02:38, 3.61it/s] 89%|████████▉ | 331963/371472 [4:14:49<3:07:04, 3.52it/s] 89%|████████▉ | 331964/371472 [4:14:50<2:59:01, 3.68it/s] 89%|████████▉ | 331965/371472 [4:14:50<3:20:25, 3.29it/s] 89%|████████▉ | 331966/371472 [4:14:50<3:10:31, 3.46it/s] 89%|████████▉ | 331967/371472 [4:14:51<3:02:51, 3.60it/s] 89%|████████▉ | 331968/371472 [4:14:51<3:02:00, 3.62it/s] 89%|████████▉ | 331969/371472 [4:14:51<3:05:29, 3.55it/s] 89%|████████▉ | 331970/371472 [4:14:51<3:06:39, 3.53it/s] 89%|████████▉ | 331971/371472 [4:14:52<3:08:35, 3.49it/s] 89%|████████▉ | 331972/371472 [4:14:52<3:01:05, 3.64it/s] 89%|████████▉ | 331973/371472 [4:14:52<3:12:54, 3.41it/s] 89%|████████▉ | 331974/371472 [4:14:53<3:05:52, 3.54it/s] 89%|████████▉ | 331975/371472 [4:14:53<2:59:43, 3.66it/s] 89%|████████▉ | 331976/371472 [4:14:53<3:05:45, 3.54it/s] 89%|████████▉ | 331977/371472 [4:14:53<3:01:06, 3.63it/s] 89%|████████▉ | 331978/371472 [4:14:54<2:57:21, 3.71it/s] 89%|████████▉ | 331979/371472 [4:14:54<2:53:58, 3.78it/s] 89%|████████▉ | 331980/371472 [4:14:54<2:59:11, 3.67it/s] {'loss': 2.6148, 'learning_rate': 1.9573250878062446e-07, 'epoch': 14.3} + 89%|████████▉ | 331980/371472 [4:14:54<2:59:11, 3.67it/s] 89%|████████▉ | 331981/371472 [4:14:54<3:04:06, 3.57it/s] 89%|████████▉ | 331982/371472 [4:14:55<3:08:43, 3.49it/s] 89%|████████▉ | 331983/371472 [4:14:55<3:06:25, 3.53it/s] 89%|████████▉ | 331984/371472 [4:14:55<3:04:04, 3.58it/s] 89%|████████▉ | 331985/371472 [4:14:56<3:11:42, 3.43it/s] 89%|████████▉ | 331986/371472 [4:14:56<3:03:57, 3.58it/s] 89%|████████▉ | 331987/371472 [4:14:56<3:04:25, 3.57it/s] 89%|████████▉ | 331988/371472 [4:14:56<3:05:43, 3.54it/s] 89%|████████▉ | 331989/371472 [4:14:57<3:11:07, 3.44it/s] 89%|████████▉ | 331990/371472 [4:14:57<3:15:13, 3.37it/s] 89%|████████▉ | 331991/371472 [4:14:57<3:15:01, 3.37it/s] 89%|████████▉ | 331992/371472 [4:14:58<3:16:59, 3.34it/s] 89%|████████▉ | 331993/371472 [4:14:58<3:16:15, 3.35it/s] 89%|████████▉ | 331994/371472 [4:14:58<3:11:18, 3.44it/s] 89%|████████▉ | 331995/371472 [4:14:59<3:23:42, 3.23it/s] 89%|████████▉ | 331996/371472 [4:14:59<3:29:34, 3.14it/s] 89%|████████▉ | 331997/371472 [4:14:59<3:23:08, 3.24it/s] 89%|████████▉ | 331998/371472 [4:15:00<3:21:09, 3.27it/s] 89%|████████▉ | 331999/371472 [4:15:00<3:16:25, 3.35it/s] 89%|████████▉ | 332000/371472 [4:15:00<3:12:41, 3.41it/s] {'loss': 2.5819, 'learning_rate': 1.9568402680514554e-07, 'epoch': 14.3} + 89%|████████▉ | 332000/371472 [4:15:00<3:12:41, 3.41it/s] 89%|████████▉ | 332001/371472 [4:15:00<3:07:49, 3.50it/s] 89%|████████▉ | 332002/371472 [4:15:01<3:02:11, 3.61it/s] 89%|████████▉ | 332003/371472 [4:15:01<3:04:09, 3.57it/s] 89%|████████▉ | 332004/371472 [4:15:01<3:01:53, 3.62it/s] 89%|████████▉ | 332005/371472 [4:15:01<3:12:50, 3.41it/s] 89%|████████▉ | 332006/371472 [4:15:02<3:08:52, 3.48it/s] 89%|████████▉ | 332007/371472 [4:15:02<3:02:39, 3.60it/s] 89%|████████▉ | 332008/371472 [4:15:02<3:03:58, 3.58it/s] 89%|████████▉ | 332009/371472 [4:15:03<3:03:46, 3.58it/s] 89%|████████▉ | 332010/371472 [4:15:03<3:03:24, 3.59it/s] 89%|████████▉ | 332011/371472 [4:15:03<3:02:47, 3.60it/s] 89%|████████▉ | 332012/371472 [4:15:03<3:03:46, 3.58it/s] 89%|████████▉ | 332013/371472 [4:15:04<3:15:17, 3.37it/s] 89%|████████▉ | 332014/371472 [4:15:04<3:13:44, 3.39it/s] 89%|████████▉ | 332015/371472 [4:15:04<3:11:59, 3.43it/s] 89%|████████▉ | 332016/371472 [4:15:05<3:10:28, 3.45it/s] 89%|████████▉ | 332017/371472 [4:15:05<3:05:55, 3.54it/s] 89%|████████▉ | 332018/371472 [4:15:05<3:09:38, 3.47it/s] 89%|████████▉ | 332019/371472 [4:15:05<3:04:43, 3.56it/s] 89%|████████▉ | 332020/371472 [4:15:06<3:06:02, 3.53it/s] {'loss': 2.6886, 'learning_rate': 1.956355448296667e-07, 'epoch': 14.3} + 89%|████████▉ | 332020/371472 [4:15:06<3:06:02, 3.53it/s] 89%|████████▉ | 332021/371472 [4:15:06<3:14:47, 3.38it/s] 89%|████████▉ | 332022/371472 [4:15:06<3:13:43, 3.39it/s] 89%|████████▉ | 332023/371472 [4:15:07<3:09:59, 3.46it/s] 89%|████████▉ | 332024/371472 [4:15:07<3:09:06, 3.48it/s] 89%|████████▉ | 332025/371472 [4:15:07<3:05:02, 3.55it/s] 89%|████████▉ | 332026/371472 [4:15:07<3:15:08, 3.37it/s] 89%|████████▉ | 332027/371472 [4:15:08<3:10:39, 3.45it/s] 89%|████████▉ | 332028/371472 [4:15:08<3:05:46, 3.54it/s] 89%|████████▉ | 332029/371472 [4:15:08<3:02:54, 3.59it/s] 89%|████████▉ | 332030/371472 [4:15:09<3:04:08, 3.57it/s] 89%|████████▉ | 332031/371472 [4:15:09<3:00:57, 3.63it/s] 89%|████████▉ | 332032/371472 [4:15:09<3:02:35, 3.60it/s] 89%|████████▉ | 332033/371472 [4:15:09<3:06:45, 3.52it/s] 89%|████████▉ | 332034/371472 [4:15:10<3:05:40, 3.54it/s] 89%|████████▉ | 332035/371472 [4:15:10<3:06:56, 3.52it/s] 89%|████████▉ | 332036/371472 [4:15:10<3:05:35, 3.54it/s] 89%|████████▉ | 332037/371472 [4:15:11<3:07:43, 3.50it/s] 89%|████████▉ | 332038/371472 [4:15:11<3:06:33, 3.52it/s] 89%|████████▉ | 332039/371472 [4:15:11<3:07:00, 3.51it/s] 89%|████████▉ | 332040/371472 [4:15:11<3:04:15, 3.57it/s] {'loss': 2.5853, 'learning_rate': 1.9558706285418776e-07, 'epoch': 14.3} + 89%|████████▉ | 332040/371472 [4:15:11<3:04:15, 3.57it/s] 89%|████████▉ | 332041/371472 [4:15:12<3:00:21, 3.64it/s] 89%|████████▉ | 332042/371472 [4:15:12<2:58:55, 3.67it/s] 89%|████████▉ | 332043/371472 [4:15:12<2:57:42, 3.70it/s] 89%|████████▉ | 332044/371472 [4:15:12<2:59:30, 3.66it/s] 89%|████████▉ | 332045/371472 [4:15:13<3:07:52, 3.50it/s] 89%|████████▉ | 332046/371472 [4:15:13<3:09:31, 3.47it/s] 89%|████████▉ | 332047/371472 [4:15:13<3:24:43, 3.21it/s] 89%|████████▉ | 332048/371472 [4:15:14<3:28:33, 3.15it/s] 89%|████████▉ | 332049/371472 [4:15:14<3:24:10, 3.22it/s] 89%|████████▉ | 332050/371472 [4:15:14<3:21:11, 3.27it/s] 89%|████████▉ | 332051/371472 [4:15:15<3:15:48, 3.36it/s] 89%|████████▉ | 332052/371472 [4:15:15<3:08:25, 3.49it/s] 89%|████████▉ | 332053/371472 [4:15:15<3:06:29, 3.52it/s] 89%|████████▉ | 332054/371472 [4:15:16<3:13:14, 3.40it/s] 89%|████████▉ | 332055/371472 [4:15:16<3:09:09, 3.47it/s] 89%|████████▉ | 332056/371472 [4:15:16<3:11:32, 3.43it/s] 89%|████████▉ | 332057/371472 [4:15:16<3:11:07, 3.44it/s] 89%|████████▉ | 332058/371472 [4:15:17<3:05:45, 3.54it/s] 89%|████████▉ | 332059/371472 [4:15:17<3:04:20, 3.56it/s] 89%|████████▉ | 332060/371472 [4:15:17<2:59:20, 3.66it/s] {'loss': 2.5626, 'learning_rate': 1.955385808787088e-07, 'epoch': 14.3} + 89%|████████▉ | 332060/371472 [4:15:17<2:59:20, 3.66it/s] 89%|████████▉ | 332061/371472 [4:15:17<3:00:12, 3.65it/s] 89%|████████▉ | 332062/371472 [4:15:18<3:17:56, 3.32it/s] 89%|████████▉ | 332063/371472 [4:15:18<3:09:51, 3.46it/s] 89%|████████▉ | 332064/371472 [4:15:18<3:07:24, 3.50it/s] 89%|████████▉ | 332065/371472 [4:15:19<3:03:11, 3.59it/s] 89%|████████▉ | 332066/371472 [4:15:19<3:08:25, 3.49it/s] 89%|████████▉ | 332067/371472 [4:15:19<3:05:52, 3.53it/s] 89%|████████▉ | 332068/371472 [4:15:20<3:13:24, 3.40it/s] 89%|████████▉ | 332069/371472 [4:15:20<3:10:14, 3.45it/s] 89%|████████▉ | 332070/371472 [4:15:20<3:02:59, 3.59it/s] 89%|████████▉ | 332071/371472 [4:15:20<3:01:29, 3.62it/s] 89%|████████▉ | 332072/371472 [4:15:21<3:00:22, 3.64it/s] 89%|████████▉ | 332073/371472 [4:15:21<2:57:09, 3.71it/s] 89%|████████▉ | 332074/371472 [4:15:21<2:58:04, 3.69it/s] 89%|████████▉ | 332075/371472 [4:15:21<3:04:13, 3.56it/s] 89%|████████▉ | 332076/371472 [4:15:22<3:02:35, 3.60it/s] 89%|████████▉ | 332077/371472 [4:15:22<2:58:06, 3.69it/s] 89%|████████▉ | 332078/371472 [4:15:22<3:01:14, 3.62it/s] 89%|████████▉ | 332079/371472 [4:15:23<3:00:23, 3.64it/s] 89%|████████▉ | 332080/371472 [4:15:23<3:00:54, 3.63it/s] {'loss': 2.646, 'learning_rate': 1.9549009890322998e-07, 'epoch': 14.3} + 89%|████████▉ | 332080/371472 [4:15:23<3:00:54, 3.63it/s] 89%|████████▉ | 332081/371472 [4:15:23<3:11:08, 3.43it/s] 89%|████████▉ | 332082/371472 [4:15:23<3:01:24, 3.62it/s] 89%|████████▉ | 332083/371472 [4:15:24<3:01:57, 3.61it/s] 89%|████████▉ | 332084/371472 [4:15:24<3:09:16, 3.47it/s] 89%|████████▉ | 332085/371472 [4:15:24<3:32:58, 3.08it/s] 89%|████████▉ | 332086/371472 [4:15:25<3:19:38, 3.29it/s] 89%|████████▉ | 332087/371472 [4:15:25<3:09:59, 3.45it/s] 89%|████████▉ | 332088/371472 [4:15:25<3:04:30, 3.56it/s] 89%|████████▉ | 332089/371472 [4:15:25<3:02:15, 3.60it/s] 89%|████████▉ | 332090/371472 [4:15:26<3:06:19, 3.52it/s] 89%|████████▉ | 332091/371472 [4:15:26<3:04:29, 3.56it/s] 89%|████████▉ | 332092/371472 [4:15:26<3:16:08, 3.35it/s] 89%|████████▉ | 332093/371472 [4:15:27<3:14:53, 3.37it/s] 89%|████████▉ | 332094/371472 [4:15:27<3:11:21, 3.43it/s] 89%|████████▉ | 332095/371472 [4:15:27<3:12:32, 3.41it/s] 89%|████████▉ | 332096/371472 [4:15:28<3:20:04, 3.28it/s] 89%|████████▉ | 332097/371472 [4:15:28<3:17:25, 3.32it/s] 89%|████████▉ | 332098/371472 [4:15:28<3:16:03, 3.35it/s] 89%|████████▉ | 332099/371472 [4:15:28<3:08:49, 3.48it/s] 89%|████████▉ | 332100/371472 [4:15:29<3:09:03, 3.47it/s] {'loss': 2.5197, 'learning_rate': 1.9544161692775103e-07, 'epoch': 14.3} + 89%|████████▉ | 332100/371472 [4:15:29<3:09:03, 3.47it/s] 89%|████████▉ | 332101/371472 [4:15:29<3:05:14, 3.54it/s] 89%|████████▉ | 332102/371472 [4:15:29<3:02:40, 3.59it/s] 89%|████████▉ | 332103/371472 [4:15:30<3:16:21, 3.34it/s] 89%|████████▉ | 332104/371472 [4:15:30<3:19:42, 3.29it/s] 89%|████████▉ | 332105/371472 [4:15:30<3:18:56, 3.30it/s] 89%|████████▉ | 332106/371472 [4:15:31<3:35:38, 3.04it/s] 89%|████████▉ | 332107/371472 [4:15:31<3:32:58, 3.08it/s] 89%|████████▉ | 332108/371472 [4:15:31<3:23:48, 3.22it/s] 89%|████████▉ | 332109/371472 [4:15:31<3:14:51, 3.37it/s] 89%|████████▉ | 332110/371472 [4:15:32<3:09:56, 3.45it/s] 89%|████████▉ | 332111/371472 [4:15:32<3:07:53, 3.49it/s] 89%|████████▉ | 332112/371472 [4:15:32<3:09:40, 3.46it/s] 89%|████████▉ | 332113/371472 [4:15:32<3:00:37, 3.63it/s] 89%|████████▉ | 332114/371472 [4:15:33<3:11:41, 3.42it/s] 89%|████████▉ | 332115/371472 [4:15:33<3:02:00, 3.60it/s] 89%|████████▉ | 332116/371472 [4:15:33<3:07:08, 3.50it/s] 89%|████████▉ | 332117/371472 [4:15:34<3:05:30, 3.54it/s] 89%|████████▉ | 332118/371472 [4:15:34<3:00:15, 3.64it/s] 89%|████████▉ | 332119/371472 [4:15:34<3:12:43, 3.40it/s] 89%|████████▉ | 332120/371472 [4:15:35<3:10:26, 3.44it/s] {'loss': 2.5781, 'learning_rate': 1.9539313495227218e-07, 'epoch': 14.31} + 89%|████████▉ | 332120/371472 [4:15:35<3:10:26, 3.44it/s] 89%|████████▉ | 332121/371472 [4:15:35<3:13:45, 3.39it/s] 89%|████████▉ | 332122/371472 [4:15:35<3:08:47, 3.47it/s] 89%|████████▉ | 332123/371472 [4:15:35<3:03:32, 3.57it/s] 89%|████████▉ | 332124/371472 [4:15:36<3:14:40, 3.37it/s] 89%|████████▉ | 332125/371472 [4:15:36<3:12:30, 3.41it/s] 89%|████████▉ | 332126/371472 [4:15:36<3:25:31, 3.19it/s] 89%|████████▉ | 332127/371472 [4:15:37<3:16:50, 3.33it/s] 89%|████████▉ | 332128/371472 [4:15:37<3:06:56, 3.51it/s] 89%|████████▉ | 332129/371472 [4:15:37<3:06:35, 3.51it/s] 89%|████████▉ | 332130/371472 [4:15:37<3:09:21, 3.46it/s] 89%|████████▉ | 332131/371472 [4:15:38<3:09:36, 3.46it/s] 89%|████████▉ | 332132/371472 [4:15:38<3:08:15, 3.48it/s] 89%|████████▉ | 332133/371472 [4:15:38<3:24:57, 3.20it/s] 89%|████████▉ | 332134/371472 [4:15:39<3:21:55, 3.25it/s] 89%|████████▉ | 332135/371472 [4:15:39<3:11:06, 3.43it/s] 89%|████████▉ | 332136/371472 [4:15:39<3:18:17, 3.31it/s] 89%|████████▉ | 332137/371472 [4:15:40<3:10:41, 3.44it/s] 89%|████████▉ | 332138/371472 [4:15:40<3:07:13, 3.50it/s] 89%|████████▉ | 332139/371472 [4:15:40<3:11:27, 3.42it/s] 89%|████████▉ | 332140/371472 [4:15:40<3:23:22, 3.22it/s] {'loss': 2.6782, 'learning_rate': 1.9534465297679325e-07, 'epoch': 14.31} + 89%|████████▉ | 332140/371472 [4:15:40<3:23:22, 3.22it/s] 89%|████████▉ | 332141/371472 [4:15:41<3:16:15, 3.34it/s] 89%|████████▉ | 332142/371472 [4:15:41<3:08:13, 3.48it/s] 89%|████████▉ | 332143/371472 [4:15:41<3:11:47, 3.42it/s] 89%|████████▉ | 332144/371472 [4:15:42<3:13:10, 3.39it/s] 89%|████████▉ | 332145/371472 [4:15:42<3:06:36, 3.51it/s] 89%|████████▉ | 332146/371472 [4:15:42<3:04:19, 3.56it/s] 89%|████████▉ | 332147/371472 [4:15:42<3:06:36, 3.51it/s] 89%|████████▉ | 332148/371472 [4:15:43<3:02:52, 3.58it/s] 89%|████████▉ | 332149/371472 [4:15:43<3:02:00, 3.60it/s] 89%|████████▉ | 332150/371472 [4:15:43<3:00:56, 3.62it/s] 89%|████████▉ | 332151/371472 [4:15:44<3:05:26, 3.53it/s] 89%|████████▉ | 332152/371472 [4:15:44<3:18:12, 3.31it/s] 89%|████████▉ | 332153/371472 [4:15:44<3:10:01, 3.45it/s] 89%|████████▉ | 332154/371472 [4:15:44<3:08:03, 3.48it/s] 89%|████████▉ | 332155/371472 [4:15:45<3:06:50, 3.51it/s] 89%|████████▉ | 332156/371472 [4:15:45<2:59:49, 3.64it/s] 89%|████████▉ | 332157/371472 [4:15:45<3:01:24, 3.61it/s] 89%|████████▉ | 332158/371472 [4:15:46<3:00:57, 3.62it/s] 89%|████████▉ | 332159/371472 [4:15:46<3:01:50, 3.60it/s] 89%|████████▉ | 332160/371472 [4:15:46<2:57:21, 3.69it/s] {'loss': 2.5861, 'learning_rate': 1.952961710013144e-07, 'epoch': 14.31} + 89%|████████▉ | 332160/371472 [4:15:46<2:57:21, 3.69it/s] 89%|████████▉ | 332161/371472 [4:15:46<3:06:55, 3.50it/s] 89%|████████▉ | 332162/371472 [4:15:47<3:18:34, 3.30it/s] 89%|████████▉ | 332163/371472 [4:15:47<3:08:48, 3.47it/s] 89%|████████▉ | 332164/371472 [4:15:47<3:21:43, 3.25it/s] 89%|████████▉ | 332165/371472 [4:15:48<3:15:22, 3.35it/s] 89%|████████▉ | 332166/371472 [4:15:48<3:21:37, 3.25it/s] 89%|████████▉ | 332167/371472 [4:15:48<3:22:02, 3.24it/s] 89%|████████▉ | 332168/371472 [4:15:49<3:12:06, 3.41it/s] 89%|████████▉ | 332169/371472 [4:15:49<3:35:41, 3.04it/s] 89%|████████▉ | 332170/371472 [4:15:49<3:32:46, 3.08it/s] 89%|████████▉ | 332171/371472 [4:15:50<3:40:31, 2.97it/s] 89%|████████▉ | 332172/371472 [4:15:50<3:40:45, 2.97it/s] 89%|████████▉ | 332173/371472 [4:15:50<3:24:46, 3.20it/s] 89%|████████▉ | 332174/371472 [4:15:50<3:13:48, 3.38it/s] 89%|████████▉ | 332175/371472 [4:15:51<3:08:55, 3.47it/s] 89%|████████▉ | 332176/371472 [4:15:51<3:07:17, 3.50it/s] 89%|████████▉ | 332177/371472 [4:15:51<3:02:55, 3.58it/s] 89%|████████▉ | 332178/371472 [4:15:52<3:07:24, 3.49it/s] 89%|████████▉ | 332179/371472 [4:15:52<3:01:51, 3.60it/s] 89%|████████▉ | 332180/371472 [4:15:52<3:00:48, 3.62it/s] {'loss': 2.6391, 'learning_rate': 1.9524768902583544e-07, 'epoch': 14.31} + 89%|████████▉ | 332180/371472 [4:15:52<3:00:48, 3.62it/s] 89%|████████▉ | 332181/371472 [4:15:52<2:58:51, 3.66it/s] 89%|████████▉ | 332182/371472 [4:15:53<3:06:23, 3.51it/s] 89%|████████▉ | 332183/371472 [4:15:53<3:02:51, 3.58it/s] 89%|████████▉ | 332184/371472 [4:15:53<3:07:41, 3.49it/s] 89%|████████▉ | 332185/371472 [4:15:54<3:12:16, 3.41it/s] 89%|████████▉ | 332186/371472 [4:15:54<3:07:01, 3.50it/s] 89%|████████▉ | 332187/371472 [4:15:54<3:03:36, 3.57it/s] 89%|████████▉ | 332188/371472 [4:15:54<2:57:36, 3.69it/s] 89%|████████▉ | 332189/371472 [4:15:55<2:57:19, 3.69it/s] 89%|████████▉ | 332190/371472 [4:15:55<2:55:20, 3.73it/s] 89%|████████▉ | 332191/371472 [4:15:55<2:54:37, 3.75it/s] 89%|████████▉ | 332192/371472 [4:15:55<2:54:10, 3.76it/s] 89%|████████▉ | 332193/371472 [4:15:56<3:04:12, 3.55it/s] 89%|████████▉ | 332194/371472 [4:15:56<3:07:52, 3.48it/s] 89%|████████▉ | 332195/371472 [4:15:56<3:10:47, 3.43it/s] 89%|████████▉ | 332196/371472 [4:15:57<3:04:56, 3.54it/s] 89%|████████▉ | 332197/371472 [4:15:57<3:07:34, 3.49it/s] 89%|████████▉ | 332198/371472 [4:15:57<3:13:24, 3.38it/s] 89%|████████▉ | 332199/371472 [4:15:57<3:05:54, 3.52it/s] 89%|████████▉ | 332200/371472 [4:15:58<3:24:32, 3.20it/s] {'loss': 2.6668, 'learning_rate': 1.9519920705035662e-07, 'epoch': 14.31} + 89%|████████▉ | 332200/371472 [4:15:58<3:24:32, 3.20it/s] 89%|████████▉ | 332201/371472 [4:15:58<3:21:18, 3.25it/s] 89%|████████▉ | 332202/371472 [4:15:58<3:12:57, 3.39it/s] 89%|████████▉ | 332203/371472 [4:15:59<3:28:09, 3.14it/s] 89%|████████▉ | 332204/371472 [4:15:59<3:19:15, 3.28it/s] 89%|████████▉ | 332205/371472 [4:15:59<3:13:41, 3.38it/s] 89%|████████▉ | 332206/371472 [4:16:00<3:21:43, 3.24it/s] 89%|████████▉ | 332207/371472 [4:16:00<3:12:59, 3.39it/s] 89%|████████▉ | 332208/371472 [4:16:00<3:11:15, 3.42it/s] 89%|████████▉ | 332209/371472 [4:16:00<3:07:29, 3.49it/s] 89%|████████▉ | 332210/371472 [4:16:01<3:01:28, 3.61it/s] 89%|████████▉ | 332211/371472 [4:16:01<3:01:45, 3.60it/s] 89%|████████▉ | 332212/371472 [4:16:01<3:03:25, 3.57it/s] 89%|████████▉ | 332213/371472 [4:16:02<2:55:55, 3.72it/s] 89%|████████▉ | 332214/371472 [4:16:02<2:57:16, 3.69it/s] 89%|████████▉ | 332215/371472 [4:16:02<2:50:28, 3.84it/s] 89%|████████▉ | 332216/371472 [4:16:02<3:02:01, 3.59it/s] 89%|████████▉ | 332217/371472 [4:16:03<3:11:06, 3.42it/s] 89%|████████▉ | 332218/371472 [4:16:03<3:13:03, 3.39it/s] 89%|████████▉ | 332219/371472 [4:16:03<3:15:15, 3.35it/s] 89%|████████▉ | 332220/371472 [4:16:04<3:15:01, 3.35it/s] {'loss': 2.5859, 'learning_rate': 1.951507250748777e-07, 'epoch': 14.31} + 89%|████████▉ | 332220/371472 [4:16:04<3:15:01, 3.35it/s] 89%|████████▉ | 332221/371472 [4:16:04<3:10:28, 3.43it/s] 89%|████████▉ | 332222/371472 [4:16:04<3:07:59, 3.48it/s] 89%|████████▉ | 332223/371472 [4:16:04<3:10:28, 3.43it/s] 89%|████████▉ | 332224/371472 [4:16:05<3:10:16, 3.44it/s] 89%|████████▉ | 332225/371472 [4:16:05<3:12:04, 3.41it/s] 89%|████████▉ | 332226/371472 [4:16:05<3:08:00, 3.48it/s] 89%|████████▉ | 332227/371472 [4:16:06<3:07:18, 3.49it/s] 89%|████████▉ | 332228/371472 [4:16:06<3:07:06, 3.50it/s] 89%|████████▉ | 332229/371472 [4:16:06<3:13:20, 3.38it/s] 89%|████████▉ | 332230/371472 [4:16:07<3:17:12, 3.32it/s] 89%|████████▉ | 332231/371472 [4:16:07<3:16:25, 3.33it/s] 89%|████████▉ | 332232/371472 [4:16:07<3:11:10, 3.42it/s] 89%|████████▉ | 332233/371472 [4:16:07<3:08:01, 3.48it/s] 89%|████████▉ | 332234/371472 [4:16:08<3:18:29, 3.29it/s] 89%|████████▉ | 332235/371472 [4:16:08<3:29:43, 3.12it/s] 89%|████████▉ | 332236/371472 [4:16:08<3:22:24, 3.23it/s] 89%|████████▉ | 332237/371472 [4:16:09<3:16:53, 3.32it/s] 89%|████████▉ | 332238/371472 [4:16:09<3:22:47, 3.22it/s] 89%|████████▉ | 332239/371472 [4:16:09<3:11:00, 3.42it/s] 89%|████████▉ | 332240/371472 [4:16:09<3:09:06, 3.46it/s] {'loss': 2.5037, 'learning_rate': 1.9510224309939882e-07, 'epoch': 14.31} + 89%|████████▉ | 332240/371472 [4:16:09<3:09:06, 3.46it/s] 89%|████████▉ | 332241/371472 [4:16:10<3:03:59, 3.55it/s] 89%|████████▉ | 332242/371472 [4:16:10<3:00:57, 3.61it/s] 89%|████████▉ | 332243/371472 [4:16:10<2:56:23, 3.71it/s] 89%|████████▉ | 332244/371472 [4:16:11<2:56:58, 3.69it/s] 89%|████████▉ | 332245/371472 [4:16:11<2:58:10, 3.67it/s] 89%|████████▉ | 332246/371472 [4:16:11<2:56:12, 3.71it/s] 89%|████████▉ | 332247/371472 [4:16:11<3:12:54, 3.39it/s] 89%|████████▉ | 332248/371472 [4:16:12<3:20:01, 3.27it/s] 89%|████████▉ | 332249/371472 [4:16:12<3:14:17, 3.36it/s] 89%|████████▉ | 332250/371472 [4:16:12<3:31:08, 3.10it/s] 89%|████████▉ | 332251/371472 [4:16:13<3:18:49, 3.29it/s] 89%|████████▉ | 332252/371472 [4:16:13<3:22:24, 3.23it/s] 89%|████████▉ | 332253/371472 [4:16:13<3:16:10, 3.33it/s] 89%|████████▉ | 332254/371472 [4:16:14<3:07:36, 3.48it/s] 89%|████████▉ | 332255/371472 [4:16:14<3:11:51, 3.41it/s] 89%|████████▉ | 332256/371472 [4:16:14<3:04:48, 3.54it/s] 89%|████████▉ | 332257/371472 [4:16:14<3:03:45, 3.56it/s] 89%|████████▉ | 332258/371472 [4:16:15<3:01:19, 3.60it/s] 89%|████████▉ | 332259/371472 [4:16:15<3:01:32, 3.60it/s] 89%|████████▉ | 332260/371472 [4:16:15<3:07:28, 3.49it/s] {'loss': 2.764, 'learning_rate': 1.950537611239199e-07, 'epoch': 14.31} + 89%|████████▉ | 332260/371472 [4:16:15<3:07:28, 3.49it/s] 89%|████████▉ | 332261/371472 [4:16:16<3:12:18, 3.40it/s] 89%|████████▉ | 332262/371472 [4:16:16<3:20:07, 3.27it/s] 89%|████████▉ | 332263/371472 [4:16:16<3:23:47, 3.21it/s] 89%|████████▉ | 332264/371472 [4:16:16<3:17:35, 3.31it/s] 89%|████████▉ | 332265/371472 [4:16:17<3:14:11, 3.37it/s] 89%|████████▉ | 332266/371472 [4:16:17<3:09:55, 3.44it/s] 89%|████████▉ | 332267/371472 [4:16:17<3:08:21, 3.47it/s] 89%|████████▉ | 332268/371472 [4:16:18<3:05:09, 3.53it/s] 89%|████████▉ | 332269/371472 [4:16:18<3:30:44, 3.10it/s] 89%|████████▉ | 332270/371472 [4:16:18<3:25:12, 3.18it/s] 89%|████████▉ | 332271/371472 [4:16:19<3:14:06, 3.37it/s] 89%|████████▉ | 332272/371472 [4:16:19<3:24:21, 3.20it/s] 89%|████████▉ | 332273/371472 [4:16:19<3:11:09, 3.42it/s] 89%|████████▉ | 332274/371472 [4:16:19<3:08:57, 3.46it/s] 89%|████████▉ | 332275/371472 [4:16:20<3:08:19, 3.47it/s] 89%|████████▉ | 332276/371472 [4:16:20<3:10:45, 3.42it/s] 89%|████████▉ | 332277/371472 [4:16:20<3:06:54, 3.50it/s] 89%|████████▉ | 332278/371472 [4:16:21<3:08:10, 3.47it/s] 89%|████████▉ | 332279/371472 [4:16:21<3:04:43, 3.54it/s] 89%|████████▉ | 332280/371472 [4:16:21<3:22:00, 3.23it/s] {'loss': 2.4548, 'learning_rate': 1.9500527914844107e-07, 'epoch': 14.31} + 89%|████████▉ | 332280/371472 [4:16:21<3:22:00, 3.23it/s] 89%|████████▉ | 332281/371472 [4:16:22<3:17:59, 3.30it/s] 89%|████████▉ | 332282/371472 [4:16:22<3:24:46, 3.19it/s] 89%|████████▉ | 332283/371472 [4:16:22<3:23:05, 3.22it/s] 89%|████████▉ | 332284/371472 [4:16:22<3:13:39, 3.37it/s] 89%|████████▉ | 332285/371472 [4:16:23<3:09:10, 3.45it/s] 89%|████████▉ | 332286/371472 [4:16:23<3:04:47, 3.53it/s] 89%|████████▉ | 332287/371472 [4:16:23<3:00:08, 3.63it/s] 89%|████████▉ | 332288/371472 [4:16:24<3:02:46, 3.57it/s] 89%|████████▉ | 332289/371472 [4:16:24<2:59:33, 3.64it/s] 89%|████████▉ | 332290/371472 [4:16:24<3:05:17, 3.52it/s] 89%|████████▉ | 332291/371472 [4:16:24<3:03:55, 3.55it/s] 89%|████████▉ | 332292/371472 [4:16:25<3:11:33, 3.41it/s] 89%|████████▉ | 332293/371472 [4:16:25<3:36:38, 3.01it/s] 89%|████████▉ | 332294/371472 [4:16:25<3:23:36, 3.21it/s] 89%|████████▉ | 332295/371472 [4:16:26<3:11:16, 3.41it/s] 89%|████████▉ | 332296/371472 [4:16:26<3:16:33, 3.32it/s] 89%|████████▉ | 332297/371472 [4:16:26<3:16:48, 3.32it/s] 89%|████████▉ | 332298/371472 [4:16:27<3:13:35, 3.37it/s] 89%|████████▉ | 332299/371472 [4:16:27<3:09:13, 3.45it/s] 89%|████████▉ | 332300/371472 [4:16:27<3:15:31, 3.34it/s] {'loss': 2.6337, 'learning_rate': 1.949567971729621e-07, 'epoch': 14.31} + 89%|████████▉ | 332300/371472 [4:16:27<3:15:31, 3.34it/s] 89%|████████▉ | 332301/371472 [4:16:27<3:16:16, 3.33it/s] 89%|████████▉ | 332302/371472 [4:16:28<3:08:24, 3.46it/s] 89%|████████▉ | 332303/371472 [4:16:28<3:03:58, 3.55it/s] 89%|████████▉ | 332304/371472 [4:16:28<3:03:42, 3.55it/s] 89%|████████▉ | 332305/371472 [4:16:29<3:16:28, 3.32it/s] 89%|████████▉ | 332306/371472 [4:16:29<3:09:38, 3.44it/s] 89%|████████▉ | 332307/371472 [4:16:29<3:13:27, 3.37it/s] 89%|████████▉ | 332308/371472 [4:16:29<3:07:34, 3.48it/s] 89%|████████▉ | 332309/371472 [4:16:30<3:00:50, 3.61it/s] 89%|████████▉ | 332310/371472 [4:16:30<3:03:54, 3.55it/s] 89%|████████▉ | 332311/371472 [4:16:30<3:17:52, 3.30it/s] 89%|████████▉ | 332312/371472 [4:16:31<3:12:55, 3.38it/s] 89%|████████▉ | 332313/371472 [4:16:31<3:08:47, 3.46it/s] 89%|████████▉ | 332314/371472 [4:16:31<3:00:01, 3.63it/s] 89%|████████▉ | 332315/371472 [4:16:32<3:19:47, 3.27it/s] 89%|████████▉ | 332316/371472 [4:16:32<3:10:42, 3.42it/s] 89%|████████▉ | 332317/371472 [4:16:32<3:16:24, 3.32it/s] 89%|████████▉ | 332318/371472 [4:16:32<3:08:23, 3.46it/s] 89%|████████▉ | 332319/371472 [4:16:33<3:07:52, 3.47it/s] 89%|████████▉ | 332320/371472 [4:16:33<3:04:10, 3.54it/s] {'loss': 2.5539, 'learning_rate': 1.9490831519748326e-07, 'epoch': 14.31} + 89%|████████▉ | 332320/371472 [4:16:33<3:04:10, 3.54it/s] 89%|████████▉ | 332321/371472 [4:16:33<3:07:16, 3.48it/s] 89%|████████▉ | 332322/371472 [4:16:33<3:08:43, 3.46it/s] 89%|████████▉ | 332323/371472 [4:16:34<3:08:23, 3.46it/s] 89%|████████▉ | 332324/371472 [4:16:34<3:17:13, 3.31it/s] 89%|████████▉ | 332325/371472 [4:16:34<3:10:59, 3.42it/s] 89%|████████▉ | 332326/371472 [4:16:35<3:09:40, 3.44it/s] 89%|████████▉ | 332327/371472 [4:16:35<3:16:21, 3.32it/s] 89%|████████▉ | 332328/371472 [4:16:35<3:22:06, 3.23it/s] 89%|████████▉ | 332329/371472 [4:16:36<3:14:30, 3.35it/s] 89%|████████▉ | 332330/371472 [4:16:36<3:12:15, 3.39it/s] 89%|████████▉ | 332331/371472 [4:16:36<3:09:55, 3.43it/s] 89%|████████▉ | 332332/371472 [4:16:37<3:25:20, 3.18it/s] 89%|████████▉ | 332333/371472 [4:16:37<3:20:20, 3.26it/s] 89%|████████▉ | 332334/371472 [4:16:37<3:10:27, 3.42it/s] 89%|████████▉ | 332335/371472 [4:16:37<3:25:47, 3.17it/s] 89%|████████▉ | 332336/371472 [4:16:38<3:28:31, 3.13it/s] 89%|████████▉ | 332337/371472 [4:16:38<3:23:46, 3.20it/s] 89%|████████▉ | 332338/371472 [4:16:38<3:18:25, 3.29it/s] 89%|████████▉ | 332339/371472 [4:16:39<3:23:13, 3.21it/s] 89%|████████▉ | 332340/371472 [4:16:39<3:28:13, 3.13it/s] {'loss': 2.6331, 'learning_rate': 1.9485983322200433e-07, 'epoch': 14.31} + 89%|████████▉ | 332340/371472 [4:16:39<3:28:13, 3.13it/s] 89%|████████▉ | 332341/371472 [4:16:39<3:31:32, 3.08it/s] 89%|████████▉ | 332342/371472 [4:16:40<3:26:54, 3.15it/s] 89%|████████▉ | 332343/371472 [4:16:40<3:15:00, 3.34it/s] 89%|████████▉ | 332344/371472 [4:16:40<3:09:32, 3.44it/s] 89%|████████▉ | 332345/371472 [4:16:40<3:04:17, 3.54it/s] 89%|████████▉ | 332346/371472 [4:16:41<3:07:07, 3.48it/s] 89%|████████▉ | 332347/371472 [4:16:41<3:00:26, 3.61it/s] 89%|████████▉ | 332348/371472 [4:16:41<2:58:42, 3.65it/s] 89%|████████▉ | 332349/371472 [4:16:42<2:54:19, 3.74it/s] 89%|████████▉ | 332350/371472 [4:16:42<3:02:17, 3.58it/s] 89%|████████▉ | 332351/371472 [4:16:42<3:05:29, 3.51it/s] 89%|████████▉ | 332352/371472 [4:16:42<3:15:38, 3.33it/s] 89%|████████▉ | 332353/371472 [4:16:43<3:15:40, 3.33it/s] 89%|████████▉ | 332354/371472 [4:16:43<3:07:56, 3.47it/s] 89%|████████▉ | 332355/371472 [4:16:43<3:03:19, 3.56it/s] 89%|████████▉ | 332356/371472 [4:16:44<3:05:39, 3.51it/s] 89%|████████▉ | 332357/371472 [4:16:44<2:58:54, 3.64it/s] 89%|████████▉ | 332358/371472 [4:16:44<3:09:13, 3.45it/s] 89%|████████▉ | 332359/371472 [4:16:44<3:02:01, 3.58it/s] 89%|████████▉ | 332360/371472 [4:16:45<3:09:34, 3.44it/s] {'loss': 2.5254, 'learning_rate': 1.9481135124652548e-07, 'epoch': 14.32} + 89%|████████▉ | 332360/371472 [4:16:45<3:09:34, 3.44it/s] 89%|████████▉ | 332361/371472 [4:16:45<3:05:46, 3.51it/s] 89%|████████▉ | 332362/371472 [4:16:45<3:04:40, 3.53it/s] 89%|████████▉ | 332363/371472 [4:16:46<2:58:09, 3.66it/s] 89%|████████▉ | 332364/371472 [4:16:46<2:58:09, 3.66it/s] 89%|████████▉ | 332365/371472 [4:16:46<2:58:37, 3.65it/s] 89%|████████▉ | 332366/371472 [4:16:46<2:54:51, 3.73it/s] 89%|████████▉ | 332367/371472 [4:16:47<2:59:10, 3.64it/s] 89%|████████▉ | 332368/371472 [4:16:47<2:58:36, 3.65it/s] 89%|████████▉ | 332369/371472 [4:16:47<2:59:49, 3.62it/s] 89%|████████▉ | 332370/371472 [4:16:47<3:05:46, 3.51it/s] 89%|████████▉ | 332371/371472 [4:16:48<3:04:28, 3.53it/s] 89%|████████▉ | 332372/371472 [4:16:48<3:03:46, 3.55it/s] 89%|████████▉ | 332373/371472 [4:16:48<3:02:49, 3.56it/s] 89%|████████▉ | 332374/371472 [4:16:49<3:02:54, 3.56it/s] 89%|████████▉ | 332375/371472 [4:16:49<2:59:07, 3.64it/s] 89%|████████▉ | 332376/371472 [4:16:49<2:58:32, 3.65it/s] 89%|████████▉ | 332377/371472 [4:16:49<3:00:32, 3.61it/s] 89%|████████▉ | 332378/371472 [4:16:50<3:03:29, 3.55it/s] 89%|████████▉ | 332379/371472 [4:16:50<3:00:12, 3.62it/s] 89%|████████▉ | 332380/371472 [4:16:50<3:03:02, 3.56it/s] {'loss': 2.6452, 'learning_rate': 1.9476286927104656e-07, 'epoch': 14.32} + 89%|████████▉ | 332380/371472 [4:16:50<3:03:02, 3.56it/s] 89%|████████▉ | 332381/371472 [4:16:51<3:05:31, 3.51it/s] 89%|████████▉ | 332382/371472 [4:16:51<2:59:30, 3.63it/s] 89%|████████▉ | 332383/371472 [4:16:51<3:20:27, 3.25it/s] 89%|████████▉ | 332384/371472 [4:16:51<3:11:19, 3.41it/s] 89%|████████▉ | 332385/371472 [4:16:52<3:11:11, 3.41it/s] 89%|████████▉ | 332386/371472 [4:16:52<3:07:49, 3.47it/s] 89%|████████▉ | 332387/371472 [4:16:52<3:06:06, 3.50it/s] 89%|████████▉ | 332388/371472 [4:16:53<3:01:09, 3.60it/s] 89%|████████▉ | 332389/371472 [4:16:53<3:11:57, 3.39it/s] 89%|████████▉ | 332390/371472 [4:16:53<3:12:45, 3.38it/s] 89%|████████▉ | 332391/371472 [4:16:53<3:08:13, 3.46it/s] 89%|████████▉ | 332392/371472 [4:16:54<3:00:52, 3.60it/s] 89%|████████▉ | 332393/371472 [4:16:54<3:05:24, 3.51it/s] 89%|████████▉ | 332394/371472 [4:16:54<3:01:23, 3.59it/s] 89%|████████▉ | 332395/371472 [4:16:55<3:14:21, 3.35it/s] 89%|████████▉ | 332396/371472 [4:16:55<3:31:23, 3.08it/s] 89%|████████▉ | 332397/371472 [4:16:55<3:26:54, 3.15it/s] 89%|████████▉ | 332398/371472 [4:16:56<3:18:13, 3.29it/s] 89%|████████▉ | 332399/371472 [4:16:56<3:11:26, 3.40it/s] 89%|████████▉ | 332400/371472 [4:16:56<3:21:36, 3.23it/s] {'loss': 2.5895, 'learning_rate': 1.947143872955677e-07, 'epoch': 14.32} + 89%|████████▉ | 332400/371472 [4:16:56<3:21:36, 3.23it/s] 89%|████████▉ | 332401/371472 [4:16:57<3:18:01, 3.29it/s] 89%|████████▉ | 332402/371472 [4:16:57<3:28:28, 3.12it/s] 89%|████████▉ | 332403/371472 [4:16:57<3:24:08, 3.19it/s] 89%|████████▉ | 332404/371472 [4:16:57<3:15:39, 3.33it/s] 89%|████████▉ | 332405/371472 [4:16:58<3:05:20, 3.51it/s] 89%|████████▉ | 332406/371472 [4:16:58<2:58:41, 3.64it/s] 89%|████████▉ | 332407/371472 [4:16:58<2:56:13, 3.69it/s] 89%|████████▉ | 332408/371472 [4:16:58<3:02:47, 3.56it/s] 89%|████████▉ | 332409/371472 [4:16:59<2:57:57, 3.66it/s] 89%|████████▉ | 332410/371472 [4:16:59<2:48:42, 3.86it/s] 89%|████████▉ | 332411/371472 [4:16:59<2:53:07, 3.76it/s] 89%|████████▉ | 332412/371472 [4:17:00<2:52:37, 3.77it/s] 89%|████████▉ | 332413/371472 [4:17:00<2:59:38, 3.62it/s] 89%|████████▉ | 332414/371472 [4:17:00<3:04:03, 3.54it/s] 89%|████████▉ | 332415/371472 [4:17:00<3:06:09, 3.50it/s] 89%|████████▉ | 332416/371472 [4:17:01<3:05:56, 3.50it/s] 89%|████████▉ | 332417/371472 [4:17:01<3:02:53, 3.56it/s] 89%|████████▉ | 332418/371472 [4:17:01<2:56:11, 3.69it/s] 89%|████████▉ | 332419/371472 [4:17:01<2:55:47, 3.70it/s] 89%|████████▉ | 332420/371472 [4:17:02<2:56:16, 3.69it/s] {'loss': 2.6433, 'learning_rate': 1.9466590532008875e-07, 'epoch': 14.32} + 89%|████████▉ | 332420/371472 [4:17:02<2:56:16, 3.69it/s] 89%|████████▉ | 332421/371472 [4:17:02<2:53:11, 3.76it/s] 89%|████████▉ | 332422/371472 [4:17:02<2:53:16, 3.76it/s] 89%|████████▉ | 332423/371472 [4:17:03<2:55:34, 3.71it/s] 89%|████████▉ | 332424/371472 [4:17:03<2:50:39, 3.81it/s] 89%|████████▉ | 332425/371472 [4:17:03<3:01:16, 3.59it/s] 89%|████████▉ | 332426/371472 [4:17:03<2:56:45, 3.68it/s] 89%|████████▉ | 332427/371472 [4:17:04<3:20:19, 3.25it/s] 89%|████████▉ | 332428/371472 [4:17:04<3:08:32, 3.45it/s] 89%|████████▉ | 332429/371472 [4:17:04<3:03:23, 3.55it/s] 89%|████████▉ | 332430/371472 [4:17:05<3:08:21, 3.45it/s] 89%|████████▉ | 332431/371472 [4:17:05<3:00:21, 3.61it/s] 89%|████████▉ | 332432/371472 [4:17:05<2:59:25, 3.63it/s] 89%|████████▉ | 332433/371472 [4:17:05<2:57:34, 3.66it/s] 89%|████████▉ | 332434/371472 [4:17:06<2:56:26, 3.69it/s] 89%|████████▉ | 332435/371472 [4:17:06<3:01:10, 3.59it/s] 89%|████████▉ | 332436/371472 [4:17:06<3:10:41, 3.41it/s] 89%|████████▉ | 332437/371472 [4:17:07<3:01:28, 3.58it/s] 89%|████████▉ | 332438/371472 [4:17:07<3:13:35, 3.36it/s] 89%|████████▉ | 332439/371472 [4:17:07<3:26:10, 3.16it/s] 89%|████████▉ | 332440/371472 [4:17:07<3:13:06, 3.37it/s] {'loss': 2.6665, 'learning_rate': 1.946174233446099e-07, 'epoch': 14.32} + 89%|████████▉ | 332440/371472 [4:17:07<3:13:06, 3.37it/s] 89%|████████▉ | 332441/371472 [4:17:08<3:09:40, 3.43it/s] 89%|████████▉ | 332442/371472 [4:17:08<3:09:27, 3.43it/s] 89%|████████▉ | 332443/371472 [4:17:08<3:14:19, 3.35it/s] 89%|████████▉ | 332444/371472 [4:17:09<3:11:52, 3.39it/s] 89%|████████▉ | 332445/371472 [4:17:09<3:02:53, 3.56it/s] 89%|████████▉ | 332446/371472 [4:17:09<2:59:25, 3.63it/s] 89%|████████▉ | 332447/371472 [4:17:09<2:55:55, 3.70it/s] 89%|████████▉ | 332448/371472 [4:17:10<3:25:39, 3.16it/s] 89%|████████▉ | 332449/371472 [4:17:10<3:50:29, 2.82it/s] 89%|████████▉ | 332450/371472 [4:17:11<3:39:55, 2.96it/s] 89%|████████▉ | 332451/371472 [4:17:11<3:24:58, 3.17it/s] 89%|████████▉ | 332452/371472 [4:17:11<3:16:48, 3.30it/s] 89%|████████▉ | 332453/371472 [4:17:11<3:06:26, 3.49it/s] 89%|████████▉ | 332454/371472 [4:17:12<3:12:17, 3.38it/s] 89%|████████▉ | 332455/371472 [4:17:12<3:15:26, 3.33it/s] 89%|████████▉ | 332456/371472 [4:17:12<3:13:05, 3.37it/s] 89%|████████▉ | 332457/371472 [4:17:13<3:17:30, 3.29it/s] 89%|████████▉ | 332458/371472 [4:17:13<3:15:50, 3.32it/s] 89%|████████▉ | 332459/371472 [4:17:13<3:10:14, 3.42it/s] 89%|████████▉ | 332460/371472 [4:17:13<3:05:53, 3.50it/s] {'loss': 2.4366, 'learning_rate': 1.9456894136913097e-07, 'epoch': 14.32} + 89%|████████▉ | 332460/371472 [4:17:13<3:05:53, 3.50it/s] 89%|████████▉ | 332461/371472 [4:17:14<3:11:55, 3.39it/s] 89%|████████▉ | 332462/371472 [4:17:14<3:12:26, 3.38it/s] 89%|████████▉ | 332463/371472 [4:17:14<3:25:19, 3.17it/s] 89%|████████▉ | 332464/371472 [4:17:15<3:33:25, 3.05it/s] 89%|████████▉ | 332465/371472 [4:17:15<3:20:41, 3.24it/s] 89%|████████▉ | 332466/371472 [4:17:15<3:05:18, 3.51it/s] 89%|████████▉ | 332467/371472 [4:17:16<3:05:09, 3.51it/s] 90%|████████▉ | 332468/371472 [4:17:16<3:28:26, 3.12it/s] 90%|████████▉ | 332469/371472 [4:17:16<3:20:47, 3.24it/s] 90%|████████▉ | 332470/371472 [4:17:16<3:11:09, 3.40it/s] 90%|████████▉ | 332471/371472 [4:17:17<3:13:56, 3.35it/s] 90%|████████▉ | 332472/371472 [4:17:17<3:20:59, 3.23it/s] 90%|████████▉ | 332473/371472 [4:17:17<3:08:44, 3.44it/s] 90%|████████▉ | 332474/371472 [4:17:18<3:06:48, 3.48it/s] 90%|████████▉ | 332475/371472 [4:17:18<3:14:22, 3.34it/s] 90%|████████▉ | 332476/371472 [4:17:18<3:17:34, 3.29it/s] 90%|████████▉ | 332477/371472 [4:17:19<3:12:34, 3.38it/s] 90%|████████▉ | 332478/371472 [4:17:19<3:06:17, 3.49it/s] 90%|████████▉ | 332479/371472 [4:17:19<3:03:49, 3.54it/s] 90%|████████▉ | 332480/371472 [4:17:19<3:04:07, 3.53it/s] {'loss': 2.6425, 'learning_rate': 1.9452045939365212e-07, 'epoch': 14.32} + 90%|████████▉ | 332480/371472 [4:17:19<3:04:07, 3.53it/s] 90%|████████▉ | 332481/371472 [4:17:20<3:16:21, 3.31it/s] 90%|████████▉ | 332482/371472 [4:17:20<3:22:01, 3.22it/s] 90%|████████▉ | 332483/371472 [4:17:20<3:09:56, 3.42it/s] 90%|████████▉ | 332484/371472 [4:17:21<3:25:34, 3.16it/s] 90%|████████▉ | 332485/371472 [4:17:21<3:16:01, 3.31it/s] 90%|████████▉ | 332486/371472 [4:17:21<3:19:53, 3.25it/s] 90%|████████▉ | 332487/371472 [4:17:22<3:14:49, 3.33it/s] 90%|████████▉ | 332488/371472 [4:17:22<3:09:10, 3.43it/s] 90%|████████▉ | 332489/371472 [4:17:22<3:18:48, 3.27it/s] 90%|████████▉ | 332490/371472 [4:17:22<3:19:59, 3.25it/s] 90%|████████▉ | 332491/371472 [4:17:23<3:14:38, 3.34it/s] 90%|████████▉ | 332492/371472 [4:17:23<3:06:12, 3.49it/s] 90%|████████▉ | 332493/371472 [4:17:23<3:08:42, 3.44it/s] 90%|████████▉ | 332494/371472 [4:17:24<3:03:54, 3.53it/s] 90%|████████▉ | 332495/371472 [4:17:24<2:54:38, 3.72it/s] 90%|████████▉ | 332496/371472 [4:17:24<2:55:16, 3.71it/s] 90%|████████▉ | 332497/371472 [4:17:24<3:09:55, 3.42it/s] 90%|████████▉ | 332498/371472 [4:17:25<3:04:25, 3.52it/s] 90%|████████▉ | 332499/371472 [4:17:25<2:58:30, 3.64it/s] 90%|████████▉ | 332500/371472 [4:17:25<2:53:08, 3.75it/s] {'loss': 2.4537, 'learning_rate': 1.9447197741817317e-07, 'epoch': 14.32} + 90%|████████▉ | 332500/371472 [4:17:25<2:53:08, 3.75it/s] 90%|████████▉ | 332501/371472 [4:17:25<2:50:35, 3.81it/s] 90%|████████▉ | 332502/371472 [4:17:26<2:47:51, 3.87it/s] 90%|████████▉ | 332503/371472 [4:17:26<2:55:24, 3.70it/s] 90%|████████▉ | 332504/371472 [4:17:26<3:07:49, 3.46it/s] 90%|████████▉ | 332505/371472 [4:17:27<3:04:06, 3.53it/s] 90%|████████▉ | 332506/371472 [4:17:27<3:05:32, 3.50it/s] 90%|████████▉ | 332507/371472 [4:17:27<2:59:50, 3.61it/s] 90%|████████▉ | 332508/371472 [4:17:27<3:02:42, 3.55it/s] 90%|████████▉ | 332509/371472 [4:17:28<2:56:11, 3.69it/s] 90%|████████▉ | 332510/371472 [4:17:28<2:56:12, 3.69it/s] 90%|████████▉ | 332511/371472 [4:17:28<2:57:18, 3.66it/s] 90%|████████▉ | 332512/371472 [4:17:28<2:50:45, 3.80it/s] 90%|████████▉ | 332513/371472 [4:17:29<2:50:15, 3.81it/s] 90%|████████▉ | 332514/371472 [4:17:29<2:50:53, 3.80it/s] 90%|████████▉ | 332515/371472 [4:17:29<2:47:02, 3.89it/s] 90%|████████▉ | 332516/371472 [4:17:30<2:50:34, 3.81it/s] 90%|████████▉ | 332517/371472 [4:17:30<2:55:16, 3.70it/s] 90%|████████▉ | 332518/371472 [4:17:30<2:50:45, 3.80it/s] 90%|████████▉ | 332519/371472 [4:17:30<2:54:51, 3.71it/s] 90%|████████▉ | 332520/371472 [4:17:31<3:06:10, 3.49it/s] {'loss': 2.6706, 'learning_rate': 1.9442349544269435e-07, 'epoch': 14.32} + 90%|████████▉ | 332520/371472 [4:17:31<3:06:10, 3.49it/s] 90%|████████▉ | 332521/371472 [4:17:31<3:06:22, 3.48it/s] 90%|████████▉ | 332522/371472 [4:17:31<3:07:16, 3.47it/s] 90%|████████▉ | 332523/371472 [4:17:32<3:05:13, 3.50it/s] 90%|████████▉ | 332524/371472 [4:17:32<3:11:18, 3.39it/s] 90%|████████▉ | 332525/371472 [4:17:32<3:14:52, 3.33it/s] 90%|████████▉ | 332526/371472 [4:17:32<3:03:45, 3.53it/s] 90%|████████▉ | 332527/371472 [4:17:33<3:12:25, 3.37it/s] 90%|████████▉ | 332528/371472 [4:17:33<3:03:21, 3.54it/s] 90%|████████▉ | 332529/371472 [4:17:33<3:11:20, 3.39it/s] 90%|████████▉ | 332530/371472 [4:17:34<3:06:58, 3.47it/s] 90%|████████▉ | 332531/371472 [4:17:34<3:01:31, 3.58it/s] 90%|████████▉ | 332532/371472 [4:17:34<3:11:27, 3.39it/s] 90%|████████▉ | 332533/371472 [4:17:34<3:08:59, 3.43it/s] 90%|████████▉ | 332534/371472 [4:17:35<3:01:38, 3.57it/s] 90%|████████▉ | 332535/371472 [4:17:35<3:06:54, 3.47it/s] 90%|████████▉ | 332536/371472 [4:17:35<2:58:51, 3.63it/s] 90%|████████▉ | 332537/371472 [4:17:36<2:56:42, 3.67it/s] 90%|████████▉ | 332538/371472 [4:17:36<2:57:42, 3.65it/s] 90%|████████▉ | 332539/371472 [4:17:36<2:56:21, 3.68it/s] 90%|████████▉ | 332540/371472 [4:17:36<3:13:49, 3.35it/s] {'loss': 2.7051, 'learning_rate': 1.943750134672154e-07, 'epoch': 14.32} + 90%|████████▉ | 332540/371472 [4:17:36<3:13:49, 3.35it/s] 90%|████████▉ | 332541/371472 [4:17:37<3:15:06, 3.33it/s] 90%|████████▉ | 332542/371472 [4:17:37<3:03:57, 3.53it/s] 90%|████████▉ | 332543/371472 [4:17:37<3:14:07, 3.34it/s] 90%|████████▉ | 332544/371472 [4:17:38<3:04:55, 3.51it/s] 90%|████████▉ | 332545/371472 [4:17:38<2:59:14, 3.62it/s] 90%|████████▉ | 332546/371472 [4:17:38<2:57:03, 3.66it/s] 90%|████████▉ | 332547/371472 [4:17:38<2:59:03, 3.62it/s] 90%|████████▉ | 332548/371472 [4:17:39<3:00:49, 3.59it/s] 90%|████████▉ | 332549/371472 [4:17:39<2:57:58, 3.64it/s] 90%|████████▉ | 332550/371472 [4:17:39<2:51:43, 3.78it/s] 90%|████████▉ | 332551/371472 [4:17:39<2:51:13, 3.79it/s] 90%|████████▉ | 332552/371472 [4:17:40<2:54:29, 3.72it/s] 90%|████████▉ | 332553/371472 [4:17:40<2:56:23, 3.68it/s] 90%|████████▉ | 332554/371472 [4:17:40<2:51:56, 3.77it/s] 90%|████████▉ | 332555/371472 [4:17:41<3:08:29, 3.44it/s] 90%|████████▉ | 332556/371472 [4:17:41<3:03:29, 3.53it/s] 90%|████████▉ | 332557/371472 [4:17:41<3:26:08, 3.15it/s] 90%|████████▉ | 332558/371472 [4:17:42<3:21:33, 3.22it/s] 90%|████████▉ | 332559/371472 [4:17:42<3:16:26, 3.30it/s] 90%|████████▉ | 332560/371472 [4:17:42<3:07:34, 3.46it/s] {'loss': 2.523, 'learning_rate': 1.9432653149173654e-07, 'epoch': 14.32} + 90%|████████▉ | 332560/371472 [4:17:42<3:07:34, 3.46it/s] 90%|████████▉ | 332561/371472 [4:17:42<3:03:50, 3.53it/s] 90%|████████▉ | 332562/371472 [4:17:43<2:58:34, 3.63it/s] 90%|████████▉ | 332563/371472 [4:17:43<2:53:44, 3.73it/s] 90%|████████▉ | 332564/371472 [4:17:43<2:56:17, 3.68it/s] 90%|████████▉ | 332565/371472 [4:17:43<3:05:21, 3.50it/s] 90%|████████▉ | 332566/371472 [4:17:44<3:26:52, 3.13it/s] 90%|████████▉ | 332567/371472 [4:17:44<3:14:51, 3.33it/s] 90%|████████▉ | 332568/371472 [4:17:44<3:20:24, 3.24it/s] 90%|████████▉ | 332569/371472 [4:17:45<3:27:04, 3.13it/s] 90%|████████▉ | 332570/371472 [4:17:45<3:16:09, 3.31it/s] 90%|████████▉ | 332571/371472 [4:17:45<3:09:14, 3.43it/s] 90%|████████▉ | 332572/371472 [4:17:46<3:07:57, 3.45it/s] 90%|████████▉ | 332573/371472 [4:17:46<3:05:32, 3.49it/s] 90%|████████▉ | 332574/371472 [4:17:46<2:58:39, 3.63it/s] 90%|████████▉ | 332575/371472 [4:17:46<3:07:15, 3.46it/s] 90%|████████▉ | 332576/371472 [4:17:47<3:09:05, 3.43it/s] 90%|████████▉ | 332577/371472 [4:17:47<3:15:00, 3.32it/s] 90%|████████▉ | 332578/371472 [4:17:47<3:11:21, 3.39it/s] 90%|████████▉ | 332579/371472 [4:17:48<3:09:37, 3.42it/s] 90%|████████▉ | 332580/371472 [4:17:48<3:03:39, 3.53it/s] {'loss': 2.4584, 'learning_rate': 1.9427804951625761e-07, 'epoch': 14.32} + 90%|████████▉ | 332580/371472 [4:17:48<3:03:39, 3.53it/s] 90%|████████▉ | 332581/371472 [4:17:48<2:58:17, 3.64it/s] 90%|████████▉ | 332582/371472 [4:17:48<2:59:18, 3.61it/s] 90%|████████▉ | 332583/371472 [4:17:49<3:03:55, 3.52it/s] 90%|████████▉ | 332584/371472 [4:17:49<2:59:37, 3.61it/s] 90%|████████▉ | 332585/371472 [4:17:49<3:16:02, 3.31it/s] 90%|████████▉ | 332586/371472 [4:17:50<3:14:58, 3.32it/s] 90%|████████▉ | 332587/371472 [4:17:50<3:07:44, 3.45it/s] 90%|████████▉ | 332588/371472 [4:17:50<2:58:33, 3.63it/s] 90%|████████▉ | 332589/371472 [4:17:50<2:57:38, 3.65it/s] 90%|████████▉ | 332590/371472 [4:17:51<2:56:24, 3.67it/s] 90%|████████▉ | 332591/371472 [4:17:51<2:52:50, 3.75it/s] 90%|████████▉ | 332592/371472 [4:17:51<3:00:34, 3.59it/s] 90%|████████▉ | 332593/371472 [4:17:52<3:02:11, 3.56it/s] 90%|████████▉ | 332594/371472 [4:17:52<2:59:37, 3.61it/s] 90%|████████▉ | 332595/371472 [4:17:52<2:56:34, 3.67it/s] 90%|████████▉ | 332596/371472 [4:17:52<2:57:56, 3.64it/s] 90%|████████▉ | 332597/371472 [4:17:53<3:04:39, 3.51it/s] 90%|████████▉ | 332598/371472 [4:17:53<2:59:29, 3.61it/s] 90%|████████▉ | 332599/371472 [4:17:53<2:51:50, 3.77it/s] 90%|████████▉ | 332600/371472 [4:17:53<2:50:53, 3.79it/s] {'loss': 2.6631, 'learning_rate': 1.9422956754077869e-07, 'epoch': 14.33} + 90%|████████▉ | 332600/371472 [4:17:53<2:50:53, 3.79it/s] 90%|████████▉ | 332601/371472 [4:17:54<2:53:09, 3.74it/s] 90%|████████▉ | 332602/371472 [4:17:54<2:51:08, 3.79it/s] 90%|████████▉ | 332603/371472 [4:17:54<2:48:14, 3.85it/s] 90%|████████▉ | 332604/371472 [4:17:55<2:54:45, 3.71it/s] 90%|████████▉ | 332605/371472 [4:17:55<2:48:49, 3.84it/s] 90%|████████▉ | 332606/371472 [4:17:55<2:48:15, 3.85it/s] 90%|████████▉ | 332607/371472 [4:17:55<2:48:46, 3.84it/s] 90%|████████▉ | 332608/371472 [4:17:56<3:08:24, 3.44it/s] 90%|████████▉ | 332609/371472 [4:17:56<3:01:38, 3.57it/s] 90%|████████▉ | 332610/371472 [4:17:56<3:13:06, 3.35it/s] 90%|████████▉ | 332611/371472 [4:17:56<3:05:05, 3.50it/s] 90%|████████▉ | 332612/371472 [4:17:57<3:21:51, 3.21it/s] 90%|████████▉ | 332613/371472 [4:17:57<3:16:21, 3.30it/s] 90%|████████▉ | 332614/371472 [4:17:57<3:23:09, 3.19it/s] 90%|████████▉ | 332615/371472 [4:17:58<3:12:37, 3.36it/s] 90%|████████▉ | 332616/371472 [4:17:58<3:07:10, 3.46it/s] 90%|████████▉ | 332617/371472 [4:17:58<3:06:30, 3.47it/s] 90%|████████▉ | 332618/371472 [4:17:59<3:01:43, 3.56it/s] 90%|████████▉ | 332619/371472 [4:17:59<3:09:05, 3.42it/s] 90%|████████▉ | 332620/371472 [4:17:59<3:01:00, 3.58it/s] {'loss': 2.6835, 'learning_rate': 1.941810855652998e-07, 'epoch': 14.33} + 90%|████████▉ | 332620/371472 [4:17:59<3:01:00, 3.58it/s] 90%|████████▉ | 332621/371472 [4:17:59<3:02:43, 3.54it/s] 90%|████████▉ | 332622/371472 [4:18:00<3:08:48, 3.43it/s] 90%|████████▉ | 332623/371472 [4:18:00<2:59:08, 3.61it/s] 90%|████████▉ | 332624/371472 [4:18:00<3:06:50, 3.47it/s] 90%|████████▉ | 332625/371472 [4:18:01<3:06:52, 3.46it/s] 90%|████████▉ | 332626/371472 [4:18:01<2:59:12, 3.61it/s] 90%|████████▉ | 332627/371472 [4:18:01<3:08:03, 3.44it/s] 90%|████████▉ | 332628/371472 [4:18:01<3:01:31, 3.57it/s] 90%|████████▉ | 332629/371472 [4:18:02<2:59:22, 3.61it/s] 90%|████████▉ | 332630/371472 [4:18:02<2:58:07, 3.63it/s] 90%|████████▉ | 332631/371472 [4:18:02<2:56:32, 3.67it/s] 90%|████████▉ | 332632/371472 [4:18:02<3:00:12, 3.59it/s] 90%|████████▉ | 332633/371472 [4:18:03<3:09:46, 3.41it/s] 90%|████████▉ | 332634/371472 [4:18:03<3:03:20, 3.53it/s] 90%|████████▉ | 332635/371472 [4:18:04<3:34:14, 3.02it/s] 90%|████████▉ | 332636/371472 [4:18:04<3:31:01, 3.07it/s] 90%|████████▉ | 332637/371472 [4:18:04<3:32:26, 3.05it/s] 90%|████████▉ | 332638/371472 [4:18:04<3:19:19, 3.25it/s] 90%|████████▉ | 332639/371472 [4:18:05<3:14:51, 3.32it/s] 90%|████████▉ | 332640/371472 [4:18:05<3:05:53, 3.48it/s] {'loss': 2.8009, 'learning_rate': 1.9413260358982088e-07, 'epoch': 14.33} + 90%|████████▉ | 332640/371472 [4:18:05<3:05:53, 3.48it/s] 90%|████████▉ | 332641/371472 [4:18:05<3:13:19, 3.35it/s] 90%|████████▉ | 332642/371472 [4:18:06<3:05:22, 3.49it/s] 90%|████████▉ | 332643/371472 [4:18:06<3:10:05, 3.40it/s] 90%|████████▉ | 332644/371472 [4:18:06<3:14:13, 3.33it/s] 90%|████████▉ | 332645/371472 [4:18:06<3:13:26, 3.35it/s] 90%|████████▉ | 332646/371472 [4:18:07<3:12:19, 3.36it/s] 90%|████████▉ | 332647/371472 [4:18:07<3:08:53, 3.43it/s] 90%|████████▉ | 332648/371472 [4:18:07<3:02:13, 3.55it/s] 90%|████████▉ | 332649/371472 [4:18:08<3:03:17, 3.53it/s] 90%|████████▉ | 332650/371472 [4:18:08<3:07:07, 3.46it/s] 90%|████████▉ | 332651/371472 [4:18:08<2:59:52, 3.60it/s] 90%|████████▉ | 332652/371472 [4:18:08<2:58:07, 3.63it/s] 90%|████████▉ | 332653/371472 [4:18:09<2:51:30, 3.77it/s] 90%|████████▉ | 332654/371472 [4:18:09<2:47:26, 3.86it/s] 90%|████████▉ | 332655/371472 [4:18:09<2:54:31, 3.71it/s] 90%|██���█████▉ | 332656/371472 [4:18:09<2:58:50, 3.62it/s] 90%|████████▉ | 332657/371472 [4:18:10<2:51:06, 3.78it/s] 90%|████████▉ | 332658/371472 [4:18:10<3:01:27, 3.57it/s] 90%|████████▉ | 332659/371472 [4:18:10<3:04:26, 3.51it/s] 90%|████████▉ | 332660/371472 [4:18:11<2:59:17, 3.61it/s] {'loss': 2.5667, 'learning_rate': 1.9408412161434206e-07, 'epoch': 14.33} + 90%|████████▉ | 332660/371472 [4:18:11<2:59:17, 3.61it/s] 90%|████████▉ | 332661/371472 [4:18:11<2:57:02, 3.65it/s] 90%|████████▉ | 332662/371472 [4:18:11<2:52:22, 3.75it/s] 90%|████████▉ | 332663/371472 [4:18:11<3:11:54, 3.37it/s] 90%|████████▉ | 332664/371472 [4:18:12<3:05:55, 3.48it/s] 90%|████████▉ | 332665/371472 [4:18:12<2:57:30, 3.64it/s] 90%|████████▉ | 332666/371472 [4:18:12<2:59:57, 3.59it/s] 90%|████████▉ | 332667/371472 [4:18:13<3:00:51, 3.58it/s] 90%|████████▉ | 332668/371472 [4:18:13<3:03:22, 3.53it/s] 90%|████████▉ | 332669/371472 [4:18:13<3:18:55, 3.25it/s] 90%|████████▉ | 332670/371472 [4:18:13<3:11:07, 3.38it/s] 90%|████████▉ | 332671/371472 [4:18:14<3:11:32, 3.38it/s] 90%|████████▉ | 332672/371472 [4:18:14<3:18:09, 3.26it/s] 90%|████████▉ | 332673/371472 [4:18:14<3:09:14, 3.42it/s] 90%|████████▉ | 332674/371472 [4:18:15<3:04:14, 3.51it/s] 90%|████████▉ | 332675/371472 [4:18:15<3:04:52, 3.50it/s] 90%|████████▉ | 332676/371472 [4:18:15<2:54:38, 3.70it/s] 90%|████████▉ | 332677/371472 [4:18:16<3:15:31, 3.31it/s] 90%|████████▉ | 332678/371472 [4:18:16<3:30:35, 3.07it/s] 90%|████████▉ | 332679/371472 [4:18:16<3:30:57, 3.06it/s] 90%|████████▉ | 332680/371472 [4:18:17<3:18:25, 3.26it/s] {'loss': 2.6475, 'learning_rate': 1.940356396388631e-07, 'epoch': 14.33} + 90%|████████▉ | 332680/371472 [4:18:17<3:18:25, 3.26it/s] 90%|████████▉ | 332681/371472 [4:18:17<3:11:36, 3.37it/s] 90%|████████▉ | 332682/371472 [4:18:17<3:07:03, 3.46it/s] 90%|████████▉ | 332683/371472 [4:18:17<3:10:59, 3.38it/s] 90%|████████▉ | 332684/371472 [4:18:18<3:05:51, 3.48it/s] 90%|████████▉ | 332685/371472 [4:18:18<3:00:13, 3.59it/s] 90%|████████▉ | 332686/371472 [4:18:18<2:56:35, 3.66it/s] 90%|████████▉ | 332687/371472 [4:18:18<2:50:38, 3.79it/s] 90%|████████▉ | 332688/371472 [4:18:19<2:52:16, 3.75it/s] 90%|████████▉ | 332689/371472 [4:18:19<2:49:19, 3.82it/s] 90%|████████▉ | 332690/371472 [4:18:19<2:47:55, 3.85it/s] 90%|████████▉ | 332691/371472 [4:18:19<2:46:55, 3.87it/s] 90%|████████▉ | 332692/371472 [4:18:20<2:51:26, 3.77it/s] 90%|████████▉ | 332693/371472 [4:18:20<2:56:43, 3.66it/s] 90%|████████▉ | 332694/371472 [4:18:20<2:54:48, 3.70it/s] 90%|████████▉ | 332695/371472 [4:18:21<2:55:32, 3.68it/s] 90%|████████▉ | 332696/371472 [4:18:21<2:49:51, 3.80it/s] 90%|████████▉ | 332697/371472 [4:18:21<2:55:19, 3.69it/s] 90%|████████▉ | 332698/371472 [4:18:21<2:54:29, 3.70it/s] 90%|████████▉ | 332699/371472 [4:18:22<3:12:32, 3.36it/s] 90%|████████▉ | 332700/371472 [4:18:22<3:08:47, 3.42it/s] {'loss': 2.6094, 'learning_rate': 1.9398715766338426e-07, 'epoch': 14.33} + 90%|████████▉ | 332700/371472 [4:18:22<3:08:47, 3.42it/s] 90%|████████▉ | 332701/371472 [4:18:22<3:23:00, 3.18it/s] 90%|████████▉ | 332702/371472 [4:18:23<3:09:06, 3.42it/s] 90%|████████▉ | 332703/371472 [4:18:23<3:06:50, 3.46it/s] 90%|████████▉ | 332704/371472 [4:18:23<3:01:56, 3.55it/s] 90%|████████▉ | 332705/371472 [4:18:23<2:54:18, 3.71it/s] 90%|████████▉ | 332706/371472 [4:18:24<2:50:30, 3.79it/s] 90%|████████▉ | 332707/371472 [4:18:24<2:46:05, 3.89it/s] 90%|████████▉ | 332708/371472 [4:18:24<2:47:10, 3.86it/s] 90%|████████▉ | 332709/371472 [4:18:24<2:46:54, 3.87it/s] 90%|████████▉ | 332710/371472 [4:18:25<3:01:45, 3.55it/s] 90%|████████▉ | 332711/371472 [4:18:25<3:05:39, 3.48it/s] 90%|████████▉ | 332712/371472 [4:18:25<3:07:48, 3.44it/s] 90%|████████▉ | 332713/371472 [4:18:26<3:03:47, 3.51it/s] 90%|████████▉ | 332714/371472 [4:18:26<3:22:54, 3.18it/s] 90%|████████▉ | 332715/371472 [4:18:26<3:24:59, 3.15it/s] 90%|████████▉ | 332716/371472 [4:18:27<3:20:20, 3.22it/s] 90%|████████▉ | 332717/371472 [4:18:27<3:13:55, 3.33it/s] 90%|████████▉ | 332718/371472 [4:18:27<3:12:26, 3.36it/s] 90%|████████▉ | 332719/371472 [4:18:27<3:16:03, 3.29it/s] 90%|████████▉ | 332720/371472 [4:18:28<3:04:04, 3.51it/s] {'loss': 2.4633, 'learning_rate': 1.9393867568790533e-07, 'epoch': 14.33} + 90%|████████▉ | 332720/371472 [4:18:28<3:04:04, 3.51it/s] 90%|████████▉ | 332721/371472 [4:18:28<3:02:09, 3.55it/s] 90%|████████▉ | 332722/371472 [4:18:28<3:19:17, 3.24it/s] 90%|████████▉ | 332723/371472 [4:18:29<3:12:13, 3.36it/s] 90%|████████▉ | 332724/371472 [4:18:29<3:06:08, 3.47it/s] 90%|████████▉ | 332725/371472 [4:18:29<3:10:59, 3.38it/s] 90%|████████▉ | 332726/371472 [4:18:30<3:11:09, 3.38it/s] 90%|████████▉ | 332727/371472 [4:18:30<3:24:43, 3.15it/s] 90%|████████▉ | 332728/371472 [4:18:30<3:35:40, 2.99it/s] 90%|████████▉ | 332729/371472 [4:18:31<3:18:05, 3.26it/s] 90%|████████▉ | 332730/371472 [4:18:31<3:15:34, 3.30it/s] 90%|████████▉ | 332731/371472 [4:18:31<3:03:14, 3.52it/s] 90%|████████▉ | 332732/371472 [4:18:31<3:10:14, 3.39it/s] 90%|████████▉ | 332733/371472 [4:18:32<3:04:13, 3.50it/s] 90%|████████▉ | 332734/371472 [4:18:32<3:32:11, 3.04it/s] 90%|████████▉ | 332735/371472 [4:18:32<3:17:57, 3.26it/s] 90%|████████▉ | 332736/371472 [4:18:33<3:17:08, 3.27it/s] 90%|████████▉ | 332737/371472 [4:18:33<3:12:12, 3.36it/s] 90%|████████▉ | 332738/371472 [4:18:33<3:04:47, 3.49it/s] 90%|████████▉ | 332739/371472 [4:18:33<3:02:49, 3.53it/s] 90%|████████▉ | 332740/371472 [4:18:34<3:21:36, 3.20it/s] {'loss': 2.57, 'learning_rate': 1.9389019371242648e-07, 'epoch': 14.33} + 90%|████████▉ | 332740/371472 [4:18:34<3:21:36, 3.20it/s] 90%|████████▉ | 332741/371472 [4:18:34<3:15:10, 3.31it/s] 90%|████████▉ | 332742/371472 [4:18:34<3:17:26, 3.27it/s] 90%|████████▉ | 332743/371472 [4:18:35<3:07:39, 3.44it/s] 90%|████████▉ | 332744/371472 [4:18:35<3:15:43, 3.30it/s] 90%|████████▉ | 332745/371472 [4:18:35<3:10:37, 3.39it/s] 90%|████████▉ | 332746/371472 [4:18:36<3:02:44, 3.53it/s] 90%|████████▉ | 332747/371472 [4:18:36<3:00:11, 3.58it/s] 90%|████████▉ | 332748/371472 [4:18:36<3:06:27, 3.46it/s] 90%|████████▉ | 332749/371472 [4:18:36<3:04:57, 3.49it/s] 90%|████████▉ | 332750/371472 [4:18:37<3:19:58, 3.23it/s] 90%|████████▉ | 332751/371472 [4:18:37<3:20:13, 3.22it/s] 90%|████████▉ | 332752/371472 [4:18:37<3:11:33, 3.37it/s] 90%|████████▉ | 332753/371472 [4:18:38<3:07:10, 3.45it/s] 90%|████████▉ | 332754/371472 [4:18:38<2:59:47, 3.59it/s] 90%|████████▉ | 332755/371472 [4:18:38<2:56:12, 3.66it/s] 90%|████████▉ | 332756/371472 [4:18:38<2:49:58, 3.80it/s] 90%|████████▉ | 332757/371472 [4:18:39<2:45:33, 3.90it/s] 90%|████████▉ | 332758/371472 [4:18:39<2:49:24, 3.81it/s] 90%|████████▉ | 332759/371472 [4:18:39<2:51:54, 3.75it/s] 90%|████████▉ | 332760/371472 [4:18:39<2:53:11, 3.73it/s] {'loss': 2.5228, 'learning_rate': 1.9384171173694752e-07, 'epoch': 14.33} + 90%|████████▉ | 332760/371472 [4:18:39<2:53:11, 3.73it/s] 90%|████████▉ | 332761/371472 [4:18:40<3:02:56, 3.53it/s] 90%|████████▉ | 332762/371472 [4:18:40<2:57:35, 3.63it/s] 90%|████████▉ | 332763/371472 [4:18:40<2:55:26, 3.68it/s] 90%|████████▉ | 332764/371472 [4:18:41<2:56:30, 3.66it/s] 90%|████████▉ | 332765/371472 [4:18:41<2:58:20, 3.62it/s] 90%|████████▉ | 332766/371472 [4:18:41<2:55:46, 3.67it/s] 90%|████████▉ | 332767/371472 [4:18:41<2:54:16, 3.70it/s] 90%|████████▉ | 332768/371472 [4:18:42<2:53:04, 3.73it/s] 90%|████████▉ | 332769/371472 [4:18:42<2:56:54, 3.65it/s] 90%|████████▉ | 332770/371472 [4:18:42<2:54:14, 3.70it/s] 90%|████████▉ | 332771/371472 [4:18:42<3:01:57, 3.54it/s] 90%|████████▉ | 332772/371472 [4:18:43<2:58:49, 3.61it/s] 90%|████████▉ | 332773/371472 [4:18:43<2:56:53, 3.65it/s] 90%|████████▉ | 332774/371472 [4:18:43<2:59:45, 3.59it/s] 90%|████████▉ | 332775/371472 [4:18:44<2:53:46, 3.71it/s] 90%|████████▉ | 332776/371472 [4:18:44<2:55:57, 3.67it/s] 90%|████████▉ | 332777/371472 [4:18:44<3:15:04, 3.31it/s] 90%|████████▉ | 332778/371472 [4:18:44<3:07:52, 3.43it/s] 90%|████████▉ | 332779/371472 [4:18:45<3:01:59, 3.54it/s] 90%|████████▉ | 332780/371472 [4:18:45<3:00:24, 3.57it/s] {'loss': 2.5863, 'learning_rate': 1.937932297614687e-07, 'epoch': 14.33} + 90%|████████▉ | 332780/371472 [4:18:45<3:00:24, 3.57it/s] 90%|████████▉ | 332781/371472 [4:18:45<2:57:51, 3.63it/s] 90%|████████▉ | 332782/371472 [4:18:46<2:57:07, 3.64it/s] 90%|████████▉ | 332783/371472 [4:18:46<3:03:17, 3.52it/s] 90%|████████▉ | 332784/371472 [4:18:46<2:59:46, 3.59it/s] 90%|████████▉ | 332785/371472 [4:18:46<2:53:27, 3.72it/s] 90%|████████▉ | 332786/371472 [4:18:47<2:48:33, 3.83it/s] 90%|████████▉ | 332787/371472 [4:18:47<2:53:08, 3.72it/s] 90%|████████▉ | 332788/371472 [4:18:47<2:57:02, 3.64it/s] 90%|████████▉ | 332789/371472 [4:18:47<2:51:51, 3.75it/s] 90%|████████▉ | 332790/371472 [4:18:48<2:49:56, 3.79it/s] 90%|████████▉ | 332791/371472 [4:18:48<3:00:46, 3.57it/s] 90%|████████▉ | 332792/371472 [4:18:48<2:58:54, 3.60it/s] 90%|████████▉ | 332793/371472 [4:18:49<2:58:36, 3.61it/s] 90%|████████▉ | 332794/371472 [4:18:49<3:04:12, 3.50it/s] 90%|████████▉ | 332795/371472 [4:18:49<3:06:28, 3.46it/s] 90%|████████▉ | 332796/371472 [4:18:49<2:57:29, 3.63it/s] 90%|████████▉ | 332797/371472 [4:18:50<3:02:31, 3.53it/s] 90%|████████▉ | 332798/371472 [4:18:50<2:54:48, 3.69it/s] 90%|████████▉ | 332799/371472 [4:18:50<3:05:07, 3.48it/s] 90%|████████▉ | 332800/371472 [4:18:51<3:10:52, 3.38it/s] {'loss': 2.6231, 'learning_rate': 1.9374474778598972e-07, 'epoch': 14.33} + 90%|████████▉ | 332800/371472 [4:18:51<3:10:52, 3.38it/s] 90%|████████▉ | 332801/371472 [4:18:51<3:13:54, 3.32it/s] 90%|████████▉ | 332802/371472 [4:18:51<3:04:50, 3.49it/s] 90%|████████▉ | 332803/371472 [4:18:51<3:02:54, 3.52it/s] 90%|████████▉ | 332804/371472 [4:18:52<2:57:56, 3.62it/s] 90%|████████▉ | 332805/371472 [4:18:52<2:58:04, 3.62it/s] 90%|████████▉ | 332806/371472 [4:18:52<2:53:22, 3.72it/s] 90%|████████▉ | 332807/371472 [4:18:52<2:57:58, 3.62it/s] 90%|████████▉ | 332808/371472 [4:18:53<2:57:51, 3.62it/s] 90%|████████▉ | 332809/371472 [4:18:53<3:03:44, 3.51it/s] 90%|████████▉ | 332810/371472 [4:18:53<3:01:01, 3.56it/s] 90%|████████▉ | 332811/371472 [4:18:54<2:56:54, 3.64it/s] 90%|████████▉ | 332812/371472 [4:18:54<2:54:04, 3.70it/s] 90%|████████▉ | 332813/371472 [4:18:54<2:53:34, 3.71it/s] 90%|████████▉ | 332814/371472 [4:18:54<2:54:44, 3.69it/s] 90%|████████▉ | 332815/371472 [4:18:55<2:51:51, 3.75it/s] 90%|████████▉ | 332816/371472 [4:18:55<2:55:15, 3.68it/s] 90%|████████▉ | 332817/371472 [4:18:55<2:53:18, 3.72it/s] 90%|████████▉ | 332818/371472 [4:18:55<2:51:44, 3.75it/s] 90%|████████▉ | 332819/371472 [4:18:56<2:52:39, 3.73it/s] 90%|████████▉ | 332820/371472 [4:18:56<2:51:32, 3.76it/s] {'loss': 2.6002, 'learning_rate': 1.936962658105109e-07, 'epoch': 14.34} + 90%|████████▉ | 332820/371472 [4:18:56<2:51:32, 3.76it/s] 90%|████████▉ | 332821/371472 [4:18:56<3:02:51, 3.52it/s] 90%|████████▉ | 332822/371472 [4:18:57<3:18:38, 3.24it/s] 90%|████████▉ | 332823/371472 [4:18:57<3:13:46, 3.32it/s] 90%|████████▉ | 332824/371472 [4:18:57<3:06:12, 3.46it/s] 90%|████████▉ | 332825/371472 [4:18:58<3:08:59, 3.41it/s] 90%|████████▉ | 332826/371472 [4:18:58<3:13:00, 3.34it/s] 90%|████████▉ | 332827/371472 [4:18:58<3:10:04, 3.39it/s] 90%|████████▉ | 332828/371472 [4:18:58<3:08:54, 3.41it/s] 90%|████████▉ | 332829/371472 [4:18:59<3:25:36, 3.13it/s] 90%|████████▉ | 332830/371472 [4:18:59<3:13:45, 3.32it/s] 90%|████████▉ | 332831/371472 [4:18:59<3:10:11, 3.39it/s] 90%|████████▉ | 332832/371472 [4:19:00<3:07:40, 3.43it/s] 90%|████████▉ | 332833/371472 [4:19:00<3:02:27, 3.53it/s] 90%|████████▉ | 332834/371472 [4:19:00<2:58:17, 3.61it/s] 90%|████████▉ | 332835/371472 [4:19:00<2:53:16, 3.72it/s] 90%|████████▉ | 332836/371472 [4:19:01<3:03:15, 3.51it/s] 90%|████████▉ | 332837/371472 [4:19:01<3:11:32, 3.36it/s] 90%|████████▉ | 332838/371472 [4:19:01<3:06:56, 3.44it/s] 90%|████████▉ | 332839/371472 [4:19:02<3:09:39, 3.39it/s] 90%|████████▉ | 332840/371472 [4:19:02<3:01:13, 3.55it/s] {'loss': 2.5584, 'learning_rate': 1.9364778383503197e-07, 'epoch': 14.34} + 90%|████████▉ | 332840/371472 [4:19:02<3:01:13, 3.55it/s] 90%|████████▉ | 332841/371472 [4:19:02<2:58:17, 3.61it/s] 90%|████████▉ | 332842/371472 [4:19:03<3:14:43, 3.31it/s] 90%|████████▉ | 332843/371472 [4:19:03<3:15:42, 3.29it/s] 90%|████████▉ | 332844/371472 [4:19:03<3:08:10, 3.42it/s] 90%|████████▉ | 332845/371472 [4:19:03<3:04:57, 3.48it/s] 90%|████████▉ | 332846/371472 [4:19:04<3:05:19, 3.47it/s] 90%|████████▉ | 332847/371472 [4:19:04<3:15:34, 3.29it/s] 90%|████████▉ | 332848/371472 [4:19:04<3:06:54, 3.44it/s] 90%|████████▉ | 332849/371472 [4:19:05<3:01:00, 3.56it/s] 90%|████████▉ | 332850/371472 [4:19:05<2:59:40, 3.58it/s] 90%|████████▉ | 332851/371472 [4:19:05<3:08:26, 3.42it/s] 90%|████████▉ | 332852/371472 [4:19:05<3:19:39, 3.22it/s] 90%|████████▉ | 332853/371472 [4:19:06<3:12:43, 3.34it/s] 90%|████████▉ | 332854/371472 [4:19:06<3:09:32, 3.40it/s] 90%|████████▉ | 332855/371472 [4:19:06<3:15:06, 3.30it/s] 90%|████████▉ | 332856/371472 [4:19:07<3:10:00, 3.39it/s] 90%|████████▉ | 332857/371472 [4:19:07<4:01:12, 2.67it/s] 90%|████████▉ | 332858/371472 [4:19:07<3:42:01, 2.90it/s] 90%|████████▉ | 332859/371472 [4:19:08<3:49:23, 2.81it/s] 90%|████████▉ | 332860/371472 [4:19:08<3:46:58, 2.84it/s] {'loss': 2.4926, 'learning_rate': 1.9359930185955312e-07, 'epoch': 14.34} + 90%|████████▉ | 332860/371472 [4:19:08<3:46:58, 2.84it/s] 90%|████████▉ | 332861/371472 [4:19:08<3:39:49, 2.93it/s] 90%|████████▉ | 332862/371472 [4:19:09<3:35:22, 2.99it/s] 90%|████████▉ | 332863/371472 [4:19:09<3:28:16, 3.09it/s] 90%|████████▉ | 332864/371472 [4:19:09<3:18:48, 3.24it/s] 90%|████████▉ | 332865/371472 [4:19:10<3:33:07, 3.02it/s] 90%|████████▉ | 332866/371472 [4:19:10<3:21:35, 3.19it/s] 90%|████████▉ | 332867/371472 [4:19:10<3:12:31, 3.34it/s] 90%|████████▉ | 332868/371472 [4:19:11<3:04:27, 3.49it/s] 90%|████████▉ | 332869/371472 [4:19:11<3:00:13, 3.57it/s] 90%|████████▉ | 332870/371472 [4:19:11<3:06:01, 3.46it/s] 90%|████████▉ | 332871/371472 [4:19:11<3:01:57, 3.54it/s] 90%|████████▉ | 332872/371472 [4:19:12<3:06:10, 3.46it/s] 90%|████████▉ | 332873/371472 [4:19:12<3:04:15, 3.49it/s] 90%|████████▉ | 332874/371472 [4:19:12<3:04:00, 3.50it/s] 90%|████████▉ | 332875/371472 [4:19:13<2:59:36, 3.58it/s] 90%|████████▉ | 332876/371472 [4:19:13<2:53:59, 3.70it/s] 90%|████████▉ | 332877/371472 [4:19:13<2:53:18, 3.71it/s] 90%|████████▉ | 332878/371472 [4:19:13<2:50:21, 3.78it/s] 90%|████████▉ | 332879/371472 [4:19:14<2:52:11, 3.74it/s] 90%|████████▉ | 332880/371472 [4:19:14<3:01:17, 3.55it/s] {'loss': 2.595, 'learning_rate': 1.9355081988407416e-07, 'epoch': 14.34} + 90%|████████▉ | 332880/371472 [4:19:14<3:01:17, 3.55it/s] 90%|████████▉ | 332881/371472 [4:19:14<2:57:05, 3.63it/s] 90%|████████▉ | 332882/371472 [4:19:14<2:50:21, 3.78it/s] 90%|████████▉ | 332883/371472 [4:19:15<2:55:17, 3.67it/s] 90%|████████▉ | 332884/371472 [4:19:15<2:50:08, 3.78it/s] 90%|████████▉ | 332885/371472 [4:19:15<2:52:12, 3.73it/s] 90%|████████▉ | 332886/371472 [4:19:15<2:48:16, 3.82it/s] 90%|████████▉ | 332887/371472 [4:19:16<2:48:03, 3.83it/s] 90%|████████▉ | 332888/371472 [4:19:16<2:54:08, 3.69it/s] 90%|████████▉ | 332889/371472 [4:19:16<2:58:01, 3.61it/s] 90%|████████▉ | 332890/371472 [4:19:17<2:56:06, 3.65it/s] 90%|████████▉ | 332891/371472 [4:19:17<2:54:08, 3.69it/s] 90%|████████▉ | 332892/371472 [4:19:17<2:59:09, 3.59it/s] 90%|████████▉ | 332893/371472 [4:19:17<3:10:59, 3.37it/s] 90%|████████▉ | 332894/371472 [4:19:18<3:03:47, 3.50it/s] 90%|████████▉ | 332895/371472 [4:19:18<3:00:29, 3.56it/s] 90%|████████▉ | 332896/371472 [4:19:18<2:58:11, 3.61it/s] 90%|████████▉ | 332897/371472 [4:19:19<3:01:20, 3.55it/s] 90%|████████▉ | 332898/371472 [4:19:19<3:05:34, 3.46it/s] 90%|████████▉ | 332899/371472 [4:19:19<3:03:21, 3.51it/s] 90%|████████▉ | 332900/371472 [4:19:19<3:02:13, 3.53it/s] {'loss': 2.5737, 'learning_rate': 1.9350233790859534e-07, 'epoch': 14.34} + 90%|████████▉ | 332900/371472 [4:19:19<3:02:13, 3.53it/s] 90%|████████▉ | 332901/371472 [4:19:20<2:57:12, 3.63it/s] 90%|████████▉ | 332902/371472 [4:19:20<2:53:54, 3.70it/s] 90%|████████▉ | 332903/371472 [4:19:20<2:56:49, 3.64it/s] 90%|████████▉ | 332904/371472 [4:19:20<2:50:50, 3.76it/s] 90%|████████▉ | 332905/371472 [4:19:21<2:54:26, 3.68it/s] 90%|████████▉ | 332906/371472 [4:19:21<2:55:45, 3.66it/s] 90%|████████▉ | 332907/371472 [4:19:21<3:08:17, 3.41it/s] 90%|████████▉ | 332908/371472 [4:19:22<3:02:27, 3.52it/s] 90%|████████▉ | 332909/371472 [4:19:22<3:04:18, 3.49it/s] 90%|████████▉ | 332910/371472 [4:19:22<3:05:35, 3.46it/s] 90%|████████▉ | 332911/371472 [4:19:22<2:58:20, 3.60it/s] 90%|████████▉ | 332912/371472 [4:19:23<2:56:33, 3.64it/s] 90%|████████▉ | 332913/371472 [4:19:23<2:53:42, 3.70it/s] 90%|████████▉ | 332914/371472 [4:19:23<3:12:00, 3.35it/s] 90%|████████▉ | 332915/371472 [4:19:24<3:04:29, 3.48it/s] 90%|████████▉ | 332916/371472 [4:19:24<3:02:15, 3.53it/s] 90%|████████▉ | 332917/371472 [4:19:24<3:06:52, 3.44it/s] 90%|████████▉ | 332918/371472 [4:19:25<3:08:53, 3.40it/s] 90%|████████▉ | 332919/371472 [4:19:25<3:02:29, 3.52it/s] 90%|████████▉ | 332920/371472 [4:19:25<2:57:00, 3.63it/s] {'loss': 2.5501, 'learning_rate': 1.934538559331164e-07, 'epoch': 14.34} + 90%|████████▉ | 332920/371472 [4:19:25<2:57:00, 3.63it/s] 90%|████████▉ | 332921/371472 [4:19:25<2:53:52, 3.70it/s] 90%|████████▉ | 332922/371472 [4:19:26<2:51:40, 3.74it/s] 90%|████████▉ | 332923/371472 [4:19:26<2:51:41, 3.74it/s] 90%|████████▉ | 332924/371472 [4:19:26<3:00:43, 3.55it/s] 90%|████████▉ | 332925/371472 [4:19:26<2:58:33, 3.60it/s] 90%|████████▉ | 332926/371472 [4:19:27<2:58:22, 3.60it/s] 90%|████████▉ | 332927/371472 [4:19:27<2:57:06, 3.63it/s] 90%|████████▉ | 332928/371472 [4:19:27<2:58:46, 3.59it/s] 90%|████████▉ | 332929/371472 [4:19:28<2:57:25, 3.62it/s] 90%|████████▉ | 332930/371472 [4:19:28<3:07:33, 3.42it/s] 90%|████████▉ | 332931/371472 [4:19:28<2:59:29, 3.58it/s] 90%|████████▉ | 332932/371472 [4:19:28<2:59:54, 3.57it/s] 90%|████████▉ | 332933/371472 [4:19:29<3:09:05, 3.40it/s] 90%|████████▉ | 332934/371472 [4:19:29<3:03:06, 3.51it/s] 90%|████████▉ | 332935/371472 [4:19:29<3:10:39, 3.37it/s] 90%|████████▉ | 332936/371472 [4:19:30<3:18:39, 3.23it/s] 90%|████████▉ | 332937/371472 [4:19:30<3:10:25, 3.37it/s] 90%|████████▉ | 332938/371472 [4:19:30<3:07:02, 3.43it/s] 90%|████████▉ | 332939/371472 [4:19:30<3:07:22, 3.43it/s] 90%|████████▉ | 332940/371472 [4:19:31<3:01:35, 3.54it/s] {'loss': 2.7005, 'learning_rate': 1.9340537395763754e-07, 'epoch': 14.34} + 90%|████████▉ | 332940/371472 [4:19:31<3:01:35, 3.54it/s] 90%|████████▉ | 332941/371472 [4:19:31<3:05:02, 3.47it/s] 90%|████████▉ | 332942/371472 [4:19:31<3:05:16, 3.47it/s] 90%|████████▉ | 332943/371472 [4:19:32<3:13:41, 3.32it/s] 90%|████████▉ | 332944/371472 [4:19:32<3:05:40, 3.46it/s] 90%|████████▉ | 332945/371472 [4:19:32<3:01:40, 3.53it/s] 90%|████████▉ | 332946/371472 [4:19:32<2:57:16, 3.62it/s] 90%|████████▉ | 332947/371472 [4:19:33<2:59:12, 3.58it/s] 90%|████████▉ | 332948/371472 [4:19:33<3:01:52, 3.53it/s] 90%|████████▉ | 332949/371472 [4:19:33<3:01:35, 3.54it/s] 90%|████████▉ | 332950/371472 [4:19:34<3:09:04, 3.40it/s] 90%|████████▉ | 332951/371472 [4:19:34<3:09:32, 3.39it/s] 90%|████████▉ | 332952/371472 [4:19:34<3:16:34, 3.27it/s] 90%|████████▉ | 332953/371472 [4:19:35<3:09:47, 3.38it/s] 90%|████████▉ | 332954/371472 [4:19:35<3:17:21, 3.25it/s] 90%|████████▉ | 332955/371472 [4:19:35<3:13:32, 3.32it/s] 90%|████████▉ | 332956/371472 [4:19:35<3:07:57, 3.42it/s] 90%|████████▉ | 332957/371472 [4:19:36<3:02:45, 3.51it/s] 90%|████████▉ | 332958/371472 [4:19:36<2:58:45, 3.59it/s] 90%|████████▉ | 332959/371472 [4:19:36<3:02:16, 3.52it/s] 90%|████████▉ | 332960/371472 [4:19:37<3:02:25, 3.52it/s] {'loss': 2.6157, 'learning_rate': 1.933568919821586e-07, 'epoch': 14.34} + 90%|████████▉ | 332960/371472 [4:19:37<3:02:25, 3.52it/s] 90%|████████▉ | 332961/371472 [4:19:37<3:13:55, 3.31it/s] 90%|████████▉ | 332962/371472 [4:19:37<3:06:18, 3.44it/s] 90%|████████▉ | 332963/371472 [4:19:37<3:09:38, 3.38it/s] 90%|████████▉ | 332964/371472 [4:19:38<3:15:49, 3.28it/s] 90%|████████▉ | 332965/371472 [4:19:38<3:11:38, 3.35it/s] 90%|████████▉ | 332966/371472 [4:19:38<3:08:37, 3.40it/s] 90%|████████▉ | 332967/371472 [4:19:39<3:06:47, 3.44it/s] 90%|████████▉ | 332968/371472 [4:19:39<3:11:26, 3.35it/s] 90%|████████▉ | 332969/371472 [4:19:39<3:10:36, 3.37it/s] 90%|████████▉ | 332970/371472 [4:19:40<3:18:01, 3.24it/s] 90%|████████▉ | 332971/371472 [4:19:40<3:26:44, 3.10it/s] 90%|████████▉ | 332972/371472 [4:19:40<3:27:27, 3.09it/s] 90%|████████▉ | 332973/371472 [4:19:40<3:16:40, 3.26it/s] 90%|████████▉ | 332974/371472 [4:19:41<3:26:28, 3.11it/s] 90%|████████▉ | 332975/371472 [4:19:41<3:26:25, 3.11it/s] 90%|████████▉ | 332976/371472 [4:19:41<3:15:24, 3.28it/s] 90%|████████▉ | 332977/371472 [4:19:42<3:13:45, 3.31it/s] 90%|████████▉ | 332978/371472 [4:19:42<3:15:08, 3.29it/s] 90%|████████▉ | 332979/371472 [4:19:42<3:05:17, 3.46it/s] 90%|████████▉ | 332980/371472 [4:19:43<3:04:07, 3.48it/s] {'loss': 2.5972, 'learning_rate': 1.9330841000667979e-07, 'epoch': 14.34} + 90%|████████▉ | 332980/371472 [4:19:43<3:04:07, 3.48it/s] 90%|████████▉ | 332981/371472 [4:19:43<2:59:41, 3.57it/s] 90%|████████▉ | 332982/371472 [4:19:43<2:56:03, 3.64it/s] 90%|████████▉ | 332983/371472 [4:19:43<2:50:08, 3.77it/s] 90%|████████▉ | 332984/371472 [4:19:44<2:52:49, 3.71it/s] 90%|████████▉ | 332985/371472 [4:19:44<2:57:25, 3.62it/s] 90%|████████▉ | 332986/371472 [4:19:44<2:55:56, 3.65it/s] 90%|████████▉ | 332987/371472 [4:19:44<2:56:38, 3.63it/s] 90%|████████▉ | 332988/371472 [4:19:45<2:54:12, 3.68it/s] 90%|████████▉ | 332989/371472 [4:19:45<2:48:56, 3.80it/s] 90%|████████▉ | 332990/371472 [4:19:45<2:56:38, 3.63it/s] 90%|████████▉ | 332991/371472 [4:19:46<2:57:26, 3.61it/s] 90%|████████▉ | 332992/371472 [4:19:46<3:02:57, 3.51it/s] 90%|████████▉ | 332993/371472 [4:19:46<3:05:00, 3.47it/s] 90%|████████▉ | 332994/371472 [4:19:46<3:02:14, 3.52it/s] 90%|████████▉ | 332995/371472 [4:19:47<2:56:14, 3.64it/s] 90%|████████▉ | 332996/371472 [4:19:47<3:05:17, 3.46it/s] 90%|████████▉ | 332997/371472 [4:19:47<3:04:45, 3.47it/s] 90%|████████▉ | 332998/371472 [4:19:48<3:05:56, 3.45it/s] 90%|████████▉ | 332999/371472 [4:19:48<3:06:17, 3.44it/s] 90%|████████▉ | 333000/371472 [4:19:48<3:02:23, 3.52it/s] {'loss': 2.6967, 'learning_rate': 1.9325992803120083e-07, 'epoch': 14.34} + 90%|████████▉ | 333000/371472 [4:19:48<3:02:23, 3.52it/s] 90%|████████▉ | 333001/371472 [4:19:48<3:05:40, 3.45it/s] 90%|████████▉ | 333002/371472 [4:19:49<3:15:05, 3.29it/s] 90%|████████▉ | 333003/371472 [4:19:49<3:19:08, 3.22it/s] 90%|████████▉ | 333004/371472 [4:19:49<3:15:21, 3.28it/s] 90%|████████▉ | 333005/371472 [4:19:50<3:11:47, 3.34it/s] 90%|████████▉ | 333006/371472 [4:19:50<3:10:25, 3.37it/s] 90%|████████▉ | 333007/371472 [4:19:50<3:04:40, 3.47it/s] 90%|████████▉ | 333008/371472 [4:19:51<3:03:23, 3.50it/s] 90%|████████▉ | 333009/371472 [4:19:51<2:56:09, 3.64it/s] 90%|████████▉ | 333010/371472 [4:19:51<2:51:40, 3.73it/s] 90%|████████▉ | 333011/371472 [4:19:51<2:49:56, 3.77it/s] 90%|████████▉ | 333012/371472 [4:19:52<2:56:33, 3.63it/s] 90%|████████▉ | 333013/371472 [4:19:52<2:54:08, 3.68it/s] 90%|████████▉ | 333014/371472 [4:19:52<2:57:44, 3.61it/s] 90%|████████▉ | 333015/371472 [4:19:52<2:57:48, 3.60it/s] 90%|████████▉ | 333016/371472 [4:19:53<2:59:34, 3.57it/s] 90%|████████▉ | 333017/371472 [4:19:53<3:03:59, 3.48it/s] 90%|████████▉ | 333018/371472 [4:19:53<3:02:14, 3.52it/s] 90%|████████▉ | 333019/371472 [4:19:54<2:59:35, 3.57it/s] 90%|████████▉ | 333020/371472 [4:19:54<2:59:32, 3.57it/s] {'loss': 2.7307, 'learning_rate': 1.9321144605572198e-07, 'epoch': 14.34} + 90%|████████▉ | 333020/371472 [4:19:54<2:59:32, 3.57it/s] 90%|████████▉ | 333021/371472 [4:19:54<3:02:40, 3.51it/s] 90%|████████▉ | 333022/371472 [4:19:54<3:03:22, 3.49it/s] 90%|████████▉ | 333023/371472 [4:19:55<2:59:48, 3.56it/s] 90%|████████▉ | 333024/371472 [4:19:55<3:01:15, 3.54it/s] 90%|████████▉ | 333025/371472 [4:19:55<3:03:58, 3.48it/s] 90%|████████▉ | 333026/371472 [4:19:56<3:03:05, 3.50it/s] 90%|████████▉ | 333027/371472 [4:19:56<3:03:34, 3.49it/s] 90%|████████▉ | 333028/371472 [4:19:56<3:00:59, 3.54it/s] 90%|████████▉ | 333029/371472 [4:19:56<3:03:20, 3.49it/s] 90%|████████▉ | 333030/371472 [4:19:57<3:01:42, 3.53it/s] 90%|████████▉ | 333031/371472 [4:19:57<3:04:16, 3.48it/s] 90%|████████▉ | 333032/371472 [4:19:57<3:02:55, 3.50it/s] 90%|████████▉ | 333033/371472 [4:19:58<3:04:42, 3.47it/s] 90%|████████▉ | 333034/371472 [4:19:58<3:00:38, 3.55it/s] 90%|████████▉ | 333035/371472 [4:19:58<3:03:36, 3.49it/s] 90%|████████▉ | 333036/371472 [4:19:58<2:56:07, 3.64it/s] 90%|████████▉ | 333037/371472 [4:19:59<2:54:34, 3.67it/s] 90%|████████▉ | 333038/371472 [4:19:59<2:59:57, 3.56it/s] 90%|████████▉ | 333039/371472 [4:19:59<2:58:55, 3.58it/s] 90%|████████▉ | 333040/371472 [4:19:59<2:56:37, 3.63it/s] {'loss': 2.6198, 'learning_rate': 1.9316296408024305e-07, 'epoch': 14.34} + 90%|████████▉ | 333040/371472 [4:19:59<2:56:37, 3.63it/s] 90%|████████▉ | 333041/371472 [4:20:00<3:01:23, 3.53it/s] 90%|████████▉ | 333042/371472 [4:20:00<2:56:16, 3.63it/s] 90%|████████▉ | 333043/371472 [4:20:00<2:55:28, 3.65it/s] 90%|████████▉ | 333044/371472 [4:20:01<2:52:13, 3.72it/s] 90%|████████▉ | 333045/371472 [4:20:01<2:58:06, 3.60it/s] 90%|████████▉ | 333046/371472 [4:20:01<2:57:02, 3.62it/s] 90%|████████▉ | 333047/371472 [4:20:01<2:56:40, 3.62it/s] 90%|████████▉ | 333048/371472 [4:20:02<3:08:11, 3.40it/s] 90%|████████▉ | 333049/371472 [4:20:02<3:03:32, 3.49it/s] 90%|████████▉ | 333050/371472 [4:20:02<3:02:01, 3.52it/s] 90%|████████▉ | 333051/371472 [4:20:03<2:56:17, 3.63it/s] 90%|████████▉ | 333052/371472 [4:20:03<2:53:33, 3.69it/s] 90%|████████▉ | 333053/371472 [4:20:03<2:50:47, 3.75it/s] 90%|████████▉ | 333054/371472 [4:20:03<2:53:36, 3.69it/s] 90%|████████▉ | 333055/371472 [4:20:04<2:51:39, 3.73it/s] 90%|████████▉ | 333056/371472 [4:20:04<2:54:36, 3.67it/s] 90%|████████▉ | 333057/371472 [4:20:04<2:55:08, 3.66it/s] 90%|████████▉ | 333058/371472 [4:20:04<2:55:13, 3.65it/s] 90%|████████▉ | 333059/371472 [4:20:05<2:51:18, 3.74it/s] 90%|████████▉ | 333060/371472 [4:20:05<2:58:45, 3.58it/s] {'loss': 2.4553, 'learning_rate': 1.931144821047642e-07, 'epoch': 14.35} + 90%|████████▉ | 333060/371472 [4:20:05<2:58:45, 3.58it/s] 90%|████████▉ | 333061/371472 [4:20:05<2:58:05, 3.59it/s] 90%|████████▉ | 333062/371472 [4:20:06<2:59:50, 3.56it/s] 90%|████████▉ | 333063/371472 [4:20:06<3:04:28, 3.47it/s] 90%|████████▉ | 333064/371472 [4:20:06<3:05:22, 3.45it/s] 90%|████████▉ | 333065/371472 [4:20:06<3:02:18, 3.51it/s] 90%|████████▉ | 333066/371472 [4:20:07<2:53:54, 3.68it/s] 90%|████████▉ | 333067/371472 [4:20:07<2:51:07, 3.74it/s] 90%|████████▉ | 333068/371472 [4:20:07<2:51:03, 3.74it/s] 90%|████████▉ | 333069/371472 [4:20:07<2:52:21, 3.71it/s] 90%|████████▉ | 333070/371472 [4:20:08<2:59:26, 3.57it/s] 90%|████████▉ | 333071/371472 [4:20:08<3:02:23, 3.51it/s] 90%|████████▉ | 333072/371472 [4:20:08<3:06:28, 3.43it/s] 90%|████████▉ | 333073/371472 [4:20:09<3:03:40, 3.48it/s] 90%|████████▉ | 333074/371472 [4:20:09<3:04:56, 3.46it/s] 90%|████████▉ | 333075/371472 [4:20:09<3:11:07, 3.35it/s] 90%|████████▉ | 333076/371472 [4:20:10<3:05:12, 3.46it/s] 90%|████████▉ | 333077/371472 [4:20:10<3:11:32, 3.34it/s] 90%|████████▉ | 333078/371472 [4:20:10<3:07:19, 3.42it/s] 90%|████████▉ | 333079/371472 [4:20:10<3:05:56, 3.44it/s] 90%|████████▉ | 333080/371472 [4:20:11<3:00:43, 3.54it/s] {'loss': 2.6497, 'learning_rate': 1.9306600012928525e-07, 'epoch': 14.35} + 90%|████████▉ | 333080/371472 [4:20:11<3:00:43, 3.54it/s] 90%|████████▉ | 333081/371472 [4:20:11<3:05:51, 3.44it/s] 90%|████████▉ | 333082/371472 [4:20:11<3:02:06, 3.51it/s] 90%|████████▉ | 333083/371472 [4:20:12<3:02:02, 3.51it/s] 90%|████████▉ | 333084/371472 [4:20:12<2:58:26, 3.59it/s] 90%|████████▉ | 333085/371472 [4:20:12<2:55:25, 3.65it/s] 90%|████████▉ | 333086/371472 [4:20:12<2:56:50, 3.62it/s] 90%|████████▉ | 333087/371472 [4:20:13<2:52:23, 3.71it/s] 90%|████████▉ | 333088/371472 [4:20:13<2:50:53, 3.74it/s] 90%|████████▉ | 333089/371472 [4:20:13<3:07:25, 3.41it/s] 90%|████████▉ | 333090/371472 [4:20:13<2:58:00, 3.59it/s] 90%|████████▉ | 333091/371472 [4:20:14<2:54:40, 3.66it/s] 90%|████████▉ | 333092/371472 [4:20:14<2:54:43, 3.66it/s] 90%|████████▉ | 333093/371472 [4:20:14<2:52:39, 3.70it/s] 90%|████████▉ | 333094/371472 [4:20:15<2:57:28, 3.60it/s] 90%|████████▉ | 333095/371472 [4:20:15<2:55:45, 3.64it/s] 90%|████████▉ | 333096/371472 [4:20:15<3:02:30, 3.50it/s] 90%|████████▉ | 333097/371472 [4:20:15<2:59:20, 3.57it/s] 90%|████████▉ | 333098/371472 [4:20:16<3:00:22, 3.55it/s] 90%|████████▉ | 333099/371472 [4:20:16<3:05:19, 3.45it/s] 90%|████████▉ | 333100/371472 [4:20:16<3:04:33, 3.47it/s] {'loss': 2.6282, 'learning_rate': 1.9301751815380643e-07, 'epoch': 14.35} + 90%|████████▉ | 333100/371472 [4:20:16<3:04:33, 3.47it/s] 90%|████████▉ | 333101/371472 [4:20:17<2:57:34, 3.60it/s] 90%|████████▉ | 333102/371472 [4:20:17<2:57:50, 3.60it/s] 90%|████████▉ | 333103/371472 [4:20:17<2:55:06, 3.65it/s] 90%|████████▉ | 333104/371472 [4:20:17<2:53:44, 3.68it/s] 90%|████████▉ | 333105/371472 [4:20:18<3:01:04, 3.53it/s] 90%|████████▉ | 333106/371472 [4:20:18<2:59:02, 3.57it/s] 90%|████████▉ | 333107/371472 [4:20:18<2:58:01, 3.59it/s] 90%|████████▉ | 333108/371472 [4:20:18<2:53:20, 3.69it/s] 90%|████████▉ | 333109/371472 [4:20:19<3:16:27, 3.25it/s] 90%|████████▉ | 333110/371472 [4:20:19<3:11:36, 3.34it/s] 90%|████████▉ | 333111/371472 [4:20:19<3:03:11, 3.49it/s] 90%|████████▉ | 333112/371472 [4:20:20<2:56:30, 3.62it/s] 90%|████████▉ | 333113/371472 [4:20:20<3:11:01, 3.35it/s] 90%|████████▉ | 333114/371472 [4:20:20<3:01:52, 3.51it/s] 90%|████████▉ | 333115/371472 [4:20:21<3:09:27, 3.37it/s] 90%|████████▉ | 333116/371472 [4:20:21<3:07:34, 3.41it/s] 90%|████████▉ | 333117/371472 [4:20:21<3:04:16, 3.47it/s] 90%|████████▉ | 333118/371472 [4:20:21<3:10:01, 3.36it/s] 90%|████████▉ | 333119/371472 [4:20:22<3:02:03, 3.51it/s] 90%|████████▉ | 333120/371472 [4:20:22<3:00:45, 3.54it/s] {'loss': 2.638, 'learning_rate': 1.9296903617832747e-07, 'epoch': 14.35} + 90%|████████▉ | 333120/371472 [4:20:22<3:00:45, 3.54it/s] 90%|████████▉ | 333121/371472 [4:20:22<3:04:24, 3.47it/s] 90%|████████▉ | 333122/371472 [4:20:23<2:59:31, 3.56it/s] 90%|████████▉ | 333123/371472 [4:20:23<2:58:02, 3.59it/s] 90%|████████▉ | 333124/371472 [4:20:23<2:51:21, 3.73it/s] 90%|████████▉ | 333125/371472 [4:20:23<3:01:48, 3.52it/s] 90%|████████▉ | 333126/371472 [4:20:24<3:04:02, 3.47it/s] 90%|████████▉ | 333127/371472 [4:20:24<2:58:37, 3.58it/s] 90%|████████▉ | 333128/371472 [4:20:24<2:57:27, 3.60it/s] 90%|████████▉ | 333129/371472 [4:20:25<3:02:14, 3.51it/s] 90%|████████▉ | 333130/371472 [4:20:25<3:34:33, 2.98it/s] 90%|████████▉ | 333131/371472 [4:20:25<3:23:46, 3.14it/s] 90%|████████▉ | 333132/371472 [4:20:26<3:20:43, 3.18it/s] 90%|████████▉ | 333133/371472 [4:20:26<3:20:59, 3.18it/s] 90%|████████▉ | 333134/371472 [4:20:26<3:28:23, 3.07it/s] 90%|████████▉ | 333135/371472 [4:20:27<3:16:15, 3.26it/s] 90%|████████▉ | 333136/371472 [4:20:27<3:03:45, 3.48it/s] 90%|████████▉ | 333137/371472 [4:20:27<3:17:46, 3.23it/s] 90%|████████▉ | 333138/371472 [4:20:27<3:07:05, 3.41it/s] 90%|████████▉ | 333139/371472 [4:20:28<3:32:54, 3.00it/s] 90%|████████▉ | 333140/371472 [4:20:28<3:21:44, 3.17it/s] {'loss': 2.587, 'learning_rate': 1.9292055420284852e-07, 'epoch': 14.35} + 90%|████████▉ | 333140/371472 [4:20:28<3:21:44, 3.17it/s] 90%|████████▉ | 333141/371472 [4:20:28<3:17:57, 3.23it/s] 90%|████████▉ | 333142/371472 [4:20:29<3:10:08, 3.36it/s] 90%|████████▉ | 333143/371472 [4:20:29<3:09:25, 3.37it/s] 90%|████████▉ | 333144/371472 [4:20:29<3:02:56, 3.49it/s] 90%|████████▉ | 333145/371472 [4:20:29<2:58:28, 3.58it/s] 90%|████████▉ | 333146/371472 [4:20:30<2:51:59, 3.71it/s] 90%|████████▉ | 333147/371472 [4:20:30<2:52:34, 3.70it/s] 90%|████████▉ | 333148/371472 [4:20:30<3:15:15, 3.27it/s] 90%|████████▉ | 333149/371472 [4:20:31<3:13:19, 3.30it/s] 90%|████████▉ | 333150/371472 [4:20:31<3:11:59, 3.33it/s] 90%|████████▉ | 333151/371472 [4:20:31<3:11:48, 3.33it/s] 90%|████████▉ | 333152/371472 [4:20:32<3:08:12, 3.39it/s] 90%|████████▉ | 333153/371472 [4:20:32<3:03:20, 3.48it/s] 90%|████████▉ | 333154/371472 [4:20:32<3:05:50, 3.44it/s] 90%|████████▉ | 333155/371472 [4:20:32<3:00:59, 3.53it/s] 90%|████████▉ | 333156/371472 [4:20:33<3:08:23, 3.39it/s] 90%|████████▉ | 333157/371472 [4:20:33<3:00:49, 3.53it/s] 90%|████████▉ | 333158/371472 [4:20:33<2:58:34, 3.58it/s] 90%|████████▉ | 333159/371472 [4:20:33<2:52:48, 3.70it/s] 90%|████████▉ | 333160/371472 [4:20:34<2:56:12, 3.62it/s] {'loss': 2.5188, 'learning_rate': 1.928720722273697e-07, 'epoch': 14.35} + 90%|████████▉ | 333160/371472 [4:20:34<2:56:12, 3.62it/s] 90%|████████▉ | 333161/371472 [4:20:34<2:52:35, 3.70it/s] 90%|████████▉ | 333162/371472 [4:20:34<2:52:51, 3.69it/s] 90%|████████▉ | 333163/371472 [4:20:35<2:53:23, 3.68it/s] 90%|████████▉ | 333164/371472 [4:20:35<2:53:19, 3.68it/s] 90%|████████▉ | 333165/371472 [4:20:35<2:53:30, 3.68it/s] 90%|████████▉ | 333166/371472 [4:20:35<2:54:51, 3.65it/s] 90%|████████▉ | 333167/371472 [4:20:36<2:53:00, 3.69it/s] 90%|████████▉ | 333168/371472 [4:20:36<2:53:42, 3.68it/s] 90%|████████▉ | 333169/371472 [4:20:36<3:04:23, 3.46it/s] 90%|████████▉ | 333170/371472 [4:20:37<3:03:37, 3.48it/s] 90%|████████▉ | 333171/371472 [4:20:37<3:03:52, 3.47it/s] 90%|████████▉ | 333172/371472 [4:20:37<3:05:11, 3.45it/s] 90%|████████▉ | 333173/371472 [4:20:37<3:08:21, 3.39it/s] 90%|████████▉ | 333174/371472 [4:20:38<3:04:05, 3.47it/s] 90%|████████▉ | 333175/371472 [4:20:38<3:04:30, 3.46it/s] 90%|████████▉ | 333176/371472 [4:20:38<3:09:44, 3.36it/s] 90%|████████▉ | 333177/371472 [4:20:39<3:21:25, 3.17it/s] 90%|████████▉ | 333178/371472 [4:20:39<3:11:13, 3.34it/s] 90%|████████▉ | 333179/371472 [4:20:39<3:22:03, 3.16it/s] 90%|████████▉ | 333180/371472 [4:20:40<3:21:10, 3.17it/s] {'loss': 2.5317, 'learning_rate': 1.9282359025189074e-07, 'epoch': 14.35} + 90%|████████▉ | 333180/371472 [4:20:40<3:21:10, 3.17it/s] 90%|████████▉ | 333181/371472 [4:20:40<3:21:34, 3.17it/s] 90%|████████▉ | 333182/371472 [4:20:40<3:30:19, 3.03it/s] 90%|████████▉ | 333183/371472 [4:20:41<3:20:02, 3.19it/s] 90%|████████▉ | 333184/371472 [4:20:41<3:30:06, 3.04it/s] 90%|████████▉ | 333185/371472 [4:20:41<3:18:59, 3.21it/s] 90%|████████▉ | 333186/371472 [4:20:41<3:14:08, 3.29it/s] 90%|████████▉ | 333187/371472 [4:20:42<3:08:10, 3.39it/s] 90%|████████▉ | 333188/371472 [4:20:42<3:00:55, 3.53it/s] 90%|████████▉ | 333189/371472 [4:20:42<2:59:04, 3.56it/s] 90%|████████▉ | 333190/371472 [4:20:43<3:18:44, 3.21it/s] 90%|████████▉ | 333191/371472 [4:20:43<3:16:04, 3.25it/s] 90%|████████▉ | 333192/371472 [4:20:43<3:21:57, 3.16it/s] 90%|████████▉ | 333193/371472 [4:20:44<3:14:23, 3.28it/s] 90%|████████▉ | 333194/371472 [4:20:44<3:06:34, 3.42it/s] 90%|████████▉ | 333195/371472 [4:20:44<3:21:39, 3.16it/s] 90%|████████▉ | 333196/371472 [4:20:44<3:15:06, 3.27it/s] 90%|████████▉ | 333197/371472 [4:20:45<3:11:55, 3.32it/s] 90%|████████▉ | 333198/371472 [4:20:45<3:03:46, 3.47it/s] 90%|████████▉ | 333199/371472 [4:20:45<3:01:32, 3.51it/s] 90%|████████▉ | 333200/371472 [4:20:46<2:56:56, 3.60it/s] {'loss': 2.708, 'learning_rate': 1.927751082764119e-07, 'epoch': 14.35} + 90%|████████▉ | 333200/371472 [4:20:46<2:56:56, 3.60it/s] 90%|████████▉ | 333201/371472 [4:20:46<2:58:41, 3.57it/s] 90%|████████▉ | 333202/371472 [4:20:46<3:08:08, 3.39it/s] 90%|████████▉ | 333203/371472 [4:20:46<3:01:05, 3.52it/s] 90%|████████▉ | 333204/371472 [4:20:47<3:03:03, 3.48it/s] 90%|████████▉ | 333205/371472 [4:20:47<3:08:05, 3.39it/s] 90%|████████▉ | 333206/371472 [4:20:47<3:07:19, 3.40it/s] 90%|████████▉ | 333207/371472 [4:20:48<3:09:55, 3.36it/s] 90%|████████▉ | 333208/371472 [4:20:48<3:03:51, 3.47it/s] 90%|████████▉ | 333209/371472 [4:20:48<3:15:14, 3.27it/s] 90%|████████▉ | 333210/371472 [4:20:49<3:06:45, 3.41it/s] 90%|████████▉ | 333211/371472 [4:20:49<3:05:59, 3.43it/s] 90%|████████▉ | 333212/371472 [4:20:49<3:03:27, 3.48it/s] 90%|████████▉ | 333213/371472 [4:20:49<3:05:09, 3.44it/s] 90%|████████▉ | 333214/371472 [4:20:50<3:03:19, 3.48it/s] 90%|████████▉ | 333215/371472 [4:20:50<3:26:52, 3.08it/s] 90%|████████▉ | 333216/371472 [4:20:50<3:30:15, 3.03it/s] 90%|████████▉ | 333217/371472 [4:20:51<3:16:00, 3.25it/s] 90%|████████▉ | 333218/371472 [4:20:51<3:08:36, 3.38it/s] 90%|████████▉ | 333219/371472 [4:20:51<3:04:39, 3.45it/s] 90%|████████▉ | 333220/371472 [4:20:52<3:01:59, 3.50it/s] {'loss': 2.5211, 'learning_rate': 1.9272662630093296e-07, 'epoch': 14.35} + 90%|████████▉ | 333220/371472 [4:20:52<3:01:59, 3.50it/s] 90%|████████▉ | 333221/371472 [4:20:52<2:58:05, 3.58it/s] 90%|████████▉ | 333222/371472 [4:20:52<2:57:49, 3.59it/s] 90%|████████▉ | 333223/371472 [4:20:52<2:57:42, 3.59it/s] 90%|████████▉ | 333224/371472 [4:20:53<2:57:35, 3.59it/s] 90%|████████▉ | 333225/371472 [4:20:53<2:55:08, 3.64it/s] 90%|████████▉ | 333226/371472 [4:20:53<2:59:51, 3.54it/s] 90%|████████▉ | 333227/371472 [4:20:54<3:24:31, 3.12it/s] 90%|████████▉ | 333228/371472 [4:20:54<3:13:07, 3.30it/s] 90%|████████▉ | 333229/371472 [4:20:54<3:03:01, 3.48it/s] 90%|████████▉ | 333230/371472 [4:20:54<2:58:56, 3.56it/s] 90%|████████▉ | 333231/371472 [4:20:55<2:58:56, 3.56it/s] 90%|████████▉ | 333232/371472 [4:20:55<2:56:42, 3.61it/s] 90%|████████▉ | 333233/371472 [4:20:55<3:04:15, 3.46it/s] 90%|████████▉ | 333234/371472 [4:20:56<3:02:46, 3.49it/s] 90%|████████▉ | 333235/371472 [4:20:56<3:18:03, 3.22it/s] 90%|████████▉ | 333236/371472 [4:20:56<3:15:49, 3.25it/s] 90%|████████▉ | 333237/371472 [4:20:56<3:11:43, 3.32it/s] 90%|████████▉ | 333238/371472 [4:20:57<3:12:46, 3.31it/s] 90%|████████▉ | 333239/371472 [4:20:57<3:15:08, 3.27it/s] 90%|████████▉ | 333240/371472 [4:20:57<3:09:55, 3.35it/s] {'loss': 2.4548, 'learning_rate': 1.926781443254541e-07, 'epoch': 14.35} + 90%|████████▉ | 333240/371472 [4:20:57<3:09:55, 3.35it/s] 90%|████████▉ | 333241/371472 [4:20:58<3:10:21, 3.35it/s] 90%|████████▉ | 333242/371472 [4:20:58<3:09:38, 3.36it/s] 90%|████████▉ | 333243/371472 [4:20:58<3:05:15, 3.44it/s] 90%|████████▉ | 333244/371472 [4:20:59<3:01:29, 3.51it/s] 90%|████████▉ | 333245/371472 [4:20:59<3:00:20, 3.53it/s] 90%|████████▉ | 333246/371472 [4:20:59<3:01:34, 3.51it/s] 90%|████████▉ | 333247/371472 [4:20:59<3:09:10, 3.37it/s] 90%|████████▉ | 333248/371472 [4:21:00<3:10:57, 3.34it/s] 90%|████████▉ | 333249/371472 [4:21:00<3:16:11, 3.25it/s] 90%|████████▉ | 333250/371472 [4:21:00<3:10:08, 3.35it/s] 90%|████████▉ | 333251/371472 [4:21:01<3:06:59, 3.41it/s] 90%|████████▉ | 333252/371472 [4:21:01<3:17:31, 3.22it/s] 90%|████████▉ | 333253/371472 [4:21:01<3:14:34, 3.27it/s] 90%|████████▉ | 333254/371472 [4:21:02<3:13:43, 3.29it/s] 90%|████████▉ | 333255/371472 [4:21:02<3:09:59, 3.35it/s] 90%|████████▉ | 333256/371472 [4:21:02<3:05:02, 3.44it/s] 90%|████████▉ | 333257/371472 [4:21:02<3:02:56, 3.48it/s] 90%|████████▉ | 333258/371472 [4:21:03<3:16:16, 3.25it/s] 90%|████████▉ | 333259/371472 [4:21:03<3:12:15, 3.31it/s] 90%|████████▉ | 333260/371472 [4:21:03<3:10:59, 3.33it/s] {'loss': 2.5561, 'learning_rate': 1.9262966234997516e-07, 'epoch': 14.35} + 90%|████████▉ | 333260/371472 [4:21:03<3:10:59, 3.33it/s] 90%|████████▉ | 333261/371472 [4:21:04<3:15:37, 3.26it/s] 90%|████████▉ | 333262/371472 [4:21:04<3:05:42, 3.43it/s] 90%|████████▉ | 333263/371472 [4:21:04<3:13:12, 3.30it/s] 90%|████████▉ | 333264/371472 [4:21:04<3:06:18, 3.42it/s] 90%|████████▉ | 333265/371472 [4:21:05<3:08:06, 3.39it/s] 90%|████████▉ | 333266/371472 [4:21:05<3:09:50, 3.35it/s] 90%|████████▉ | 333267/371472 [4:21:05<3:03:29, 3.47it/s] 90%|████████▉ | 333268/371472 [4:21:06<3:05:43, 3.43it/s] 90%|████████▉ | 333269/371472 [4:21:06<2:59:48, 3.54it/s] 90%|████████▉ | 333270/371472 [4:21:06<2:58:36, 3.56it/s] 90%|████████▉ | 333271/371472 [4:21:06<2:54:06, 3.66it/s] 90%|████████▉ | 333272/371472 [4:21:07<2:51:57, 3.70it/s] 90%|████████▉ | 333273/371472 [4:21:07<3:05:22, 3.43it/s] 90%|████████▉ | 333274/371472 [4:21:07<3:00:52, 3.52it/s] 90%|████████▉ | 333275/371472 [4:21:08<3:16:17, 3.24it/s] 90%|████████▉ | 333276/371472 [4:21:08<3:11:23, 3.33it/s] 90%|████████▉ | 333277/371472 [4:21:08<3:04:42, 3.45it/s] 90%|████████▉ | 333278/371472 [4:21:09<3:01:02, 3.52it/s] 90%|████████▉ | 333279/371472 [4:21:09<2:58:02, 3.58it/s] 90%|████████▉ | 333280/371472 [4:21:09<3:00:41, 3.52it/s] {'loss': 2.5424, 'learning_rate': 1.9258118037449633e-07, 'epoch': 14.35} + 90%|████████▉ | 333280/371472 [4:21:09<3:00:41, 3.52it/s] 90%|████████▉ | 333281/371472 [4:21:09<3:06:07, 3.42it/s] 90%|████████▉ | 333282/371472 [4:21:10<3:01:20, 3.51it/s] 90%|████████▉ | 333283/371472 [4:21:10<3:09:21, 3.36it/s] 90%|████████▉ | 333284/371472 [4:21:10<3:13:48, 3.28it/s] 90%|████████▉ | 333285/371472 [4:21:11<3:07:11, 3.40it/s] 90%|████████▉ | 333286/371472 [4:21:11<3:18:35, 3.20it/s] 90%|████████▉ | 333287/371472 [4:21:11<3:17:54, 3.22it/s] 90%|████████▉ | 333288/371472 [4:21:11<3:08:42, 3.37it/s] 90%|████████▉ | 333289/371472 [4:21:12<3:01:28, 3.51it/s] 90%|████████▉ | 333290/371472 [4:21:12<3:21:07, 3.16it/s] 90%|████████▉ | 333291/371472 [4:21:12<3:13:55, 3.28it/s] 90%|████████▉ | 333292/371472 [4:21:13<3:11:19, 3.33it/s] 90%|████████▉ | 333293/371472 [4:21:13<3:09:43, 3.35it/s] 90%|████████▉ | 333294/371472 [4:21:13<3:09:35, 3.36it/s] 90%|████████▉ | 333295/371472 [4:21:14<3:07:17, 3.40it/s] 90%|████████▉ | 333296/371472 [4:21:14<3:07:34, 3.39it/s] 90%|████████▉ | 333297/371472 [4:21:14<3:04:44, 3.44it/s] 90%|████████▉ | 333298/371472 [4:21:15<3:17:54, 3.21it/s] 90%|████████▉ | 333299/371472 [4:21:15<3:09:04, 3.36it/s] 90%|████████▉ | 333300/371472 [4:21:15<3:12:07, 3.31it/s] {'loss': 2.5594, 'learning_rate': 1.925326983990174e-07, 'epoch': 14.36} + 90%|████████▉ | 333300/371472 [4:21:15<3:12:07, 3.31it/s] 90%|████████▉ | 333301/371472 [4:21:15<3:12:15, 3.31it/s] 90%|████████▉ | 333302/371472 [4:21:16<3:19:07, 3.19it/s] 90%|████████▉ | 333303/371472 [4:21:16<3:16:39, 3.23it/s] 90%|████████▉ | 333304/371472 [4:21:16<3:14:19, 3.27it/s] 90%|████████▉ | 333305/371472 [4:21:17<3:10:54, 3.33it/s] 90%|████████▉ | 333306/371472 [4:21:17<3:08:55, 3.37it/s] 90%|████████▉ | 333307/371472 [4:21:17<3:02:13, 3.49it/s] 90%|████████▉ | 333308/371472 [4:21:17<3:08:00, 3.38it/s] 90%|████████▉ | 333309/371472 [4:21:18<3:02:39, 3.48it/s] 90%|████████▉ | 333310/371472 [4:21:18<3:04:15, 3.45it/s] 90%|████████▉ | 333311/371472 [4:21:18<3:01:06, 3.51it/s] 90%|████████▉ | 333312/371472 [4:21:19<2:56:24, 3.61it/s] 90%|████████▉ | 333313/371472 [4:21:19<3:10:08, 3.34it/s] 90%|████████▉ | 333314/371472 [4:21:19<3:05:29, 3.43it/s] 90%|████████▉ | 333315/371472 [4:21:20<3:06:43, 3.41it/s] 90%|████████▉ | 333316/371472 [4:21:20<3:16:29, 3.24it/s] 90%|████████▉ | 333317/371472 [4:21:20<3:13:47, 3.28it/s] 90%|████████▉ | 333318/371472 [4:21:20<3:08:55, 3.37it/s] 90%|████████▉ | 333319/371472 [4:21:21<3:02:34, 3.48it/s] 90%|████████▉ | 333320/371472 [4:21:21<2:56:38, 3.60it/s] {'loss': 2.5578, 'learning_rate': 1.9248421642353853e-07, 'epoch': 14.36} + 90%|████████▉ | 333320/371472 [4:21:21<2:56:38, 3.60it/s] 90%|████████▉ | 333321/371472 [4:21:21<2:55:31, 3.62it/s] 90%|████████▉ | 333322/371472 [4:21:21<2:54:50, 3.64it/s] 90%|████████▉ | 333323/371472 [4:21:22<2:52:01, 3.70it/s] 90%|████████▉ | 333324/371472 [4:21:22<2:52:40, 3.68it/s] 90%|████████▉ | 333325/371472 [4:21:22<2:50:13, 3.73it/s] 90%|████████▉ | 333326/371472 [4:21:23<2:53:42, 3.66it/s] 90%|████████▉ | 333327/371472 [4:21:23<2:54:29, 3.64it/s] 90%|████████▉ | 333328/371472 [4:21:23<2:57:26, 3.58it/s] 90%|████████▉ | 333329/371472 [4:21:23<3:02:35, 3.48it/s] 90%|████████▉ | 333330/371472 [4:21:24<3:12:25, 3.30it/s] 90%|████████▉ | 333331/371472 [4:21:24<3:09:47, 3.35it/s] 90%|████████▉ | 333332/371472 [4:21:24<3:03:32, 3.46it/s] 90%|████████▉ | 333333/371472 [4:21:25<2:56:57, 3.59it/s] 90%|████████▉ | 333334/371472 [4:21:25<2:57:51, 3.57it/s] 90%|████████▉ | 333335/371472 [4:21:25<2:58:40, 3.56it/s] 90%|████████▉ | 333336/371472 [4:21:25<3:04:11, 3.45it/s] 90%|████████▉ | 333337/371472 [4:21:26<3:05:41, 3.42it/s] 90%|████████▉ | 333338/371472 [4:21:26<2:59:31, 3.54it/s] 90%|████████▉ | 333339/371472 [4:21:26<3:00:49, 3.51it/s] 90%|████████▉ | 333340/371472 [4:21:27<3:01:35, 3.50it/s] {'loss': 2.6253, 'learning_rate': 1.924357344480596e-07, 'epoch': 14.36} + 90%|████████▉ | 333340/371472 [4:21:27<3:01:35, 3.50it/s] 90%|████████▉ | 333341/371472 [4:21:27<2:59:55, 3.53it/s] 90%|████████▉ | 333342/371472 [4:21:27<3:10:00, 3.34it/s] 90%|████████▉ | 333343/371472 [4:21:27<3:05:16, 3.43it/s] 90%|████████▉ | 333344/371472 [4:21:28<2:56:43, 3.60it/s] 90%|████████▉ | 333345/371472 [4:21:28<2:55:26, 3.62it/s] 90%|████████▉ | 333346/371472 [4:21:28<2:58:23, 3.56it/s] 90%|████████▉ | 333347/371472 [4:21:29<3:00:17, 3.52it/s] 90%|████████▉ | 333348/371472 [4:21:29<3:00:50, 3.51it/s] 90%|████████▉ | 333349/371472 [4:21:29<2:57:09, 3.59it/s] 90%|████████▉ | 333350/371472 [4:21:29<2:55:53, 3.61it/s] 90%|████████▉ | 333351/371472 [4:21:30<3:00:45, 3.51it/s] 90%|████████▉ | 333352/371472 [4:21:30<2:55:20, 3.62it/s] 90%|████████▉ | 333353/371472 [4:21:30<2:53:42, 3.66it/s] 90%|████████▉ | 333354/371472 [4:21:30<2:50:35, 3.72it/s] 90%|████████▉ | 333355/371472 [4:21:31<2:48:32, 3.77it/s] 90%|████████▉ | 333356/371472 [4:21:31<2:51:50, 3.70it/s] 90%|████████▉ | 333357/371472 [4:21:31<2:51:35, 3.70it/s] 90%|████████▉ | 333358/371472 [4:21:32<2:50:40, 3.72it/s] 90%|████████▉ | 333359/371472 [4:21:32<2:48:08, 3.78it/s] 90%|████████▉ | 333360/371472 [4:21:32<2:55:31, 3.62it/s] {'loss': 2.7186, 'learning_rate': 1.9238725247258078e-07, 'epoch': 14.36} + 90%|████████▉ | 333360/371472 [4:21:32<2:55:31, 3.62it/s] 90%|████████▉ | 333361/371472 [4:21:32<2:55:55, 3.61it/s] 90%|████████▉ | 333362/371472 [4:21:33<2:55:49, 3.61it/s] 90%|████████▉ | 333363/371472 [4:21:33<2:59:43, 3.53it/s] 90%|████████▉ | 333364/371472 [4:21:33<2:57:37, 3.58it/s] 90%|████████▉ | 333365/371472 [4:21:34<3:03:18, 3.46it/s] 90%|████████▉ | 333366/371472 [4:21:34<2:59:44, 3.53it/s] 90%|████████▉ | 333367/371472 [4:21:34<3:08:40, 3.37it/s] 90%|████████▉ | 333368/371472 [4:21:34<3:07:43, 3.38it/s] 90%|████████▉ | 333369/371472 [4:21:35<3:06:52, 3.40it/s] 90%|████████▉ | 333370/371472 [4:21:35<3:05:41, 3.42it/s] 90%|████████▉ | 333371/371472 [4:21:35<3:12:57, 3.29it/s] 90%|████████▉ | 333372/371472 [4:21:36<3:11:06, 3.32it/s] 90%|████████▉ | 333373/371472 [4:21:36<3:15:46, 3.24it/s] 90%|████████▉ | 333374/371472 [4:21:36<3:16:49, 3.23it/s] 90%|████████▉ | 333375/371472 [4:21:37<3:26:54, 3.07it/s] 90%|████████▉ | 333376/371472 [4:21:37<3:21:10, 3.16it/s] 90%|████████▉ | 333377/371472 [4:21:37<3:11:19, 3.32it/s] 90%|████████▉ | 333378/371472 [4:21:38<3:13:47, 3.28it/s] 90%|████████▉ | 333379/371472 [4:21:38<3:13:30, 3.28it/s] 90%|████████▉ | 333380/371472 [4:21:38<3:02:31, 3.48it/s] {'loss': 2.579, 'learning_rate': 1.9233877049710182e-07, 'epoch': 14.36} + 90%|████████▉ | 333380/371472 [4:21:38<3:02:31, 3.48it/s] 90%|████████▉ | 333381/371472 [4:21:38<2:59:52, 3.53it/s] 90%|████████▉ | 333382/371472 [4:21:39<2:59:26, 3.54it/s] 90%|████████▉ | 333383/371472 [4:21:39<2:52:55, 3.67it/s] 90%|████████▉ | 333384/371472 [4:21:39<2:53:25, 3.66it/s] 90%|████████▉ | 333385/371472 [4:21:39<2:51:34, 3.70it/s] 90%|████████▉ | 333386/371472 [4:21:40<2:57:44, 3.57it/s] 90%|████████▉ | 333387/371472 [4:21:40<2:51:08, 3.71it/s] 90%|████████▉ | 333388/371472 [4:21:40<2:53:35, 3.66it/s] 90%|████████▉ | 333389/371472 [4:21:41<2:59:51, 3.53it/s] 90%|████████▉ | 333390/371472 [4:21:41<3:02:23, 3.48it/s] 90%|████████▉ | 333391/371472 [4:21:41<3:07:59, 3.38it/s] 90%|████████▉ | 333392/371472 [4:21:41<3:05:38, 3.42it/s] 90%|████████▉ | 333393/371472 [4:21:42<3:01:59, 3.49it/s] 90%|████████▉ | 333394/371472 [4:21:42<3:02:20, 3.48it/s] 90%|████████▉ | 333395/371472 [4:21:42<3:08:13, 3.37it/s] 90%|████████▉ | 333396/371472 [4:21:43<3:15:28, 3.25it/s] 90%|████████▉ | 333397/371472 [4:21:43<3:10:40, 3.33it/s] 90%|████████▉ | 333398/371472 [4:21:43<3:06:04, 3.41it/s] 90%|████████▉ | 333399/371472 [4:21:44<3:27:22, 3.06it/s] 90%|████████▉ | 333400/371472 [4:21:44<3:26:53, 3.07it/s] {'loss': 2.4764, 'learning_rate': 1.9229028852162295e-07, 'epoch': 14.36} + 90%|████████▉ | 333400/371472 [4:21:44<3:26:53, 3.07it/s] 90%|████████▉ | 333401/371472 [4:21:44<3:12:37, 3.29it/s] 90%|████████▉ | 333402/371472 [4:21:44<3:07:15, 3.39it/s] 90%|████████▉ | 333403/371472 [4:21:45<3:03:20, 3.46it/s] 90%|████████▉ | 333404/371472 [4:21:45<3:04:58, 3.43it/s] 90%|████████▉ | 333405/371472 [4:21:45<3:02:25, 3.48it/s] 90%|████████▉ | 333406/371472 [4:21:46<3:07:44, 3.38it/s] 90%|████████▉ | 333407/371472 [4:21:46<3:30:42, 3.01it/s] 90%|████████▉ | 333408/371472 [4:21:46<3:26:21, 3.07it/s] 90%|████████▉ | 333409/371472 [4:21:47<3:15:04, 3.25it/s] 90%|████████▉ | 333410/371472 [4:21:47<3:06:30, 3.40it/s] 90%|████████▉ | 333411/371472 [4:21:47<3:01:05, 3.50it/s] 90%|████████▉ | 333412/371472 [4:21:47<2:59:43, 3.53it/s] 90%|████████▉ | 333413/371472 [4:21:48<2:56:46, 3.59it/s] 90%|████████▉ | 333414/371472 [4:21:48<2:54:57, 3.63it/s] 90%|████████▉ | 333415/371472 [4:21:48<2:58:06, 3.56it/s] 90%|████████▉ | 333416/371472 [4:21:49<3:10:54, 3.32it/s] 90%|████████▉ | 333417/371472 [4:21:49<3:18:34, 3.19it/s] 90%|████████▉ | 333418/371472 [4:21:49<3:13:37, 3.28it/s] 90%|████████▉ | 333419/371472 [4:21:50<3:11:04, 3.32it/s] 90%|████████▉ | 333420/371472 [4:21:50<3:04:51, 3.43it/s] {'loss': 2.6434, 'learning_rate': 1.9224180654614405e-07, 'epoch': 14.36} + 90%|████████▉ | 333420/371472 [4:21:50<3:04:51, 3.43it/s] 90%|████████▉ | 333421/371472 [4:21:50<3:11:03, 3.32it/s] 90%|████████▉ | 333422/371472 [4:21:50<3:04:02, 3.45it/s] 90%|████████▉ | 333423/371472 [4:21:51<3:06:32, 3.40it/s] 90%|████████▉ | 333424/371472 [4:21:51<3:03:49, 3.45it/s] 90%|████████▉ | 333425/371472 [4:21:51<2:57:48, 3.57it/s] 90%|████████▉ | 333426/371472 [4:21:52<3:17:25, 3.21it/s] 90%|████████▉ | 333427/371472 [4:21:52<3:19:32, 3.18it/s] 90%|████████▉ | 333428/371472 [4:21:52<3:13:25, 3.28it/s] 90%|████████▉ | 333429/371472 [4:21:53<3:05:32, 3.42it/s] 90%|████████▉ | 333430/371472 [4:21:53<3:00:23, 3.51it/s] 90%|████████▉ | 333431/371472 [4:21:53<2:56:19, 3.60it/s] 90%|████████▉ | 333432/371472 [4:21:53<2:54:41, 3.63it/s] 90%|████████▉ | 333433/371472 [4:21:54<2:53:14, 3.66it/s] 90%|████████▉ | 333434/371472 [4:21:54<2:53:09, 3.66it/s] 90%|████████▉ | 333435/371472 [4:21:54<2:52:24, 3.68it/s] 90%|████████▉ | 333436/371472 [4:21:54<2:51:35, 3.69it/s] 90%|████████▉ | 333437/371472 [4:21:55<2:54:42, 3.63it/s] 90%|████████▉ | 333438/371472 [4:21:55<3:00:41, 3.51it/s] 90%|████████▉ | 333439/371472 [4:21:55<2:56:49, 3.58it/s] 90%|████████▉ | 333440/371472 [4:21:56<3:08:28, 3.36it/s] {'loss': 2.6498, 'learning_rate': 1.921933245706652e-07, 'epoch': 14.36} + 90%|████████▉ | 333440/371472 [4:21:56<3:08:28, 3.36it/s] 90%|████████▉ | 333441/371472 [4:21:56<3:16:03, 3.23it/s] 90%|████████▉ | 333442/371472 [4:21:56<3:22:06, 3.14it/s] 90%|████████▉ | 333443/371472 [4:21:57<3:20:48, 3.16it/s] 90%|████████▉ | 333444/371472 [4:21:57<3:12:22, 3.29it/s] 90%|████████▉ | 333445/371472 [4:21:57<3:13:29, 3.28it/s] 90%|████████▉ | 333446/371472 [4:21:57<3:07:08, 3.39it/s] 90%|████████▉ | 333447/371472 [4:21:58<3:02:53, 3.47it/s] 90%|████████▉ | 333448/371472 [4:21:58<3:03:52, 3.45it/s] 90%|████████▉ | 333449/371472 [4:21:58<2:59:41, 3.53it/s] 90%|████████▉ | 333450/371472 [4:21:59<2:53:16, 3.66it/s] 90%|████████▉ | 333451/371472 [4:21:59<2:54:36, 3.63it/s] 90%|████████▉ | 333452/371472 [4:21:59<2:58:23, 3.55it/s] 90%|████████▉ | 333453/371472 [4:21:59<2:56:00, 3.60it/s] 90%|████████▉ | 333454/371472 [4:22:00<3:00:33, 3.51it/s] 90%|████████▉ | 333455/371472 [4:22:00<2:59:37, 3.53it/s] 90%|████████▉ | 333456/371472 [4:22:00<2:56:24, 3.59it/s] 90%|████████▉ | 333457/371472 [4:22:00<2:48:32, 3.76it/s] 90%|████████▉ | 333458/371472 [4:22:01<2:57:51, 3.56it/s] 90%|████████▉ | 333459/371472 [4:22:01<2:54:44, 3.63it/s] 90%|████████▉ | 333460/371472 [4:22:01<2:49:31, 3.74it/s] {'loss': 2.5829, 'learning_rate': 1.9214484259518624e-07, 'epoch': 14.36} + 90%|████████▉ | 333460/371472 [4:22:01<2:49:31, 3.74it/s] 90%|████████▉ | 333461/371472 [4:22:02<2:54:04, 3.64it/s] 90%|████████▉ | 333462/371472 [4:22:02<2:49:38, 3.73it/s] 90%|████████▉ | 333463/371472 [4:22:02<2:58:01, 3.56it/s] 90%|████████▉ | 333464/371472 [4:22:02<2:53:45, 3.65it/s] 90%|████████▉ | 333465/371472 [4:22:03<2:52:03, 3.68it/s] 90%|████████▉ | 333466/371472 [4:22:03<2:48:59, 3.75it/s] 90%|████████▉ | 333467/371472 [4:22:03<2:53:17, 3.66it/s] 90%|████████▉ | 333468/371472 [4:22:03<2:52:04, 3.68it/s] 90%|████████▉ | 333469/371472 [4:22:04<2:54:30, 3.63it/s] 90%|████████▉ | 333470/371472 [4:22:04<2:58:17, 3.55it/s] 90%|████████▉ | 333471/371472 [4:22:04<2:57:52, 3.56it/s] 90%|████████▉ | 333472/371472 [4:22:05<2:57:34, 3.57it/s] 90%|████████▉ | 333473/371472 [4:22:05<2:54:23, 3.63it/s] 90%|████████▉ | 333474/371472 [4:22:05<3:00:43, 3.50it/s] 90%|████████▉ | 333475/371472 [4:22:05<2:58:22, 3.55it/s] 90%|████████▉ | 333476/371472 [4:22:06<2:57:30, 3.57it/s] 90%|████████▉ | 333477/371472 [4:22:06<3:00:54, 3.50it/s] 90%|████████▉ | 333478/371472 [4:22:06<3:04:43, 3.43it/s] 90%|████████▉ | 333479/371472 [4:22:07<3:10:19, 3.33it/s] 90%|████████▉ | 333480/371472 [4:22:07<3:03:50, 3.44it/s] {'loss': 2.5793, 'learning_rate': 1.9209636061970742e-07, 'epoch': 14.36} + 90%|████████▉ | 333480/371472 [4:22:07<3:03:50, 3.44it/s] 90%|████████▉ | 333481/371472 [4:22:07<2:58:42, 3.54it/s] 90%|████████▉ | 333482/371472 [4:22:07<2:56:17, 3.59it/s] 90%|████████▉ | 333483/371472 [4:22:08<2:59:16, 3.53it/s] 90%|████████▉ | 333484/371472 [4:22:08<2:57:56, 3.56it/s] 90%|████████▉ | 333485/371472 [4:22:08<3:03:15, 3.45it/s] 90%|████████▉ | 333486/371472 [4:22:09<3:01:09, 3.49it/s] 90%|████████▉ | 333487/371472 [4:22:09<2:59:04, 3.54it/s] 90%|████████▉ | 333488/371472 [4:22:09<2:53:47, 3.64it/s] 90%|████████▉ | 333489/371472 [4:22:09<2:50:43, 3.71it/s] 90%|████████▉ | 333490/371472 [4:22:10<2:51:10, 3.70it/s] 90%|████████▉ | 333491/371472 [4:22:10<2:49:41, 3.73it/s] 90%|████████▉ | 333492/371472 [4:22:10<2:56:37, 3.58it/s] 90%|████████▉ | 333493/371472 [4:22:10<2:52:59, 3.66it/s] 90%|████████▉ | 333494/371472 [4:22:11<2:56:08, 3.59it/s] 90%|████████▉ | 333495/371472 [4:22:11<2:59:39, 3.52it/s] 90%|████████▉ | 333496/371472 [4:22:11<3:01:12, 3.49it/s] 90%|████████▉ | 333497/371472 [4:22:12<3:13:09, 3.28it/s] 90%|████████▉ | 333498/371472 [4:22:12<3:07:28, 3.38it/s] 90%|████████▉ | 333499/371472 [4:22:12<3:09:46, 3.33it/s] 90%|████████▉ | 333500/371472 [4:22:13<3:16:00, 3.23it/s] {'loss': 2.5894, 'learning_rate': 1.9204787864422847e-07, 'epoch': 14.36} + 90%|████████▉ | 333500/371472 [4:22:13<3:16:00, 3.23it/s] 90%|████████▉ | 333501/371472 [4:22:13<3:17:18, 3.21it/s] 90%|████████▉ | 333502/371472 [4:22:13<3:07:52, 3.37it/s] 90%|████████▉ | 333503/371472 [4:22:13<3:03:54, 3.44it/s] 90%|████████▉ | 333504/371472 [4:22:14<2:55:01, 3.62it/s] 90%|████████▉ | 333505/371472 [4:22:14<2:57:17, 3.57it/s] 90%|████████▉ | 333506/371472 [4:22:14<3:04:27, 3.43it/s] 90%|████████▉ | 333507/371472 [4:22:15<3:05:55, 3.40it/s] 90%|████████▉ | 333508/371472 [4:22:15<3:03:26, 3.45it/s] 90%|████████▉ | 333509/371472 [4:22:15<3:00:49, 3.50it/s] 90%|████████▉ | 333510/371472 [4:22:15<2:57:24, 3.57it/s] 90%|████████▉ | 333511/371472 [4:22:16<2:55:03, 3.61it/s] 90%|████████▉ | 333512/371472 [4:22:16<3:01:16, 3.49it/s] 90%|████████▉ | 333513/371472 [4:22:16<3:03:16, 3.45it/s] 90%|████████▉ | 333514/371472 [4:22:17<3:00:27, 3.51it/s] 90%|████████▉ | 333515/371472 [4:22:17<3:00:40, 3.50it/s] 90%|████████▉ | 333516/371472 [4:22:17<3:01:08, 3.49it/s] 90%|████████▉ | 333517/371472 [4:22:17<2:53:02, 3.66it/s] 90%|████████▉ | 333518/371472 [4:22:18<2:53:06, 3.65it/s] 90%|████████▉ | 333519/371472 [4:22:18<2:57:02, 3.57it/s] 90%|████████▉ | 333520/371472 [4:22:18<2:55:48, 3.60it/s] {'loss': 2.3995, 'learning_rate': 1.9199939666874962e-07, 'epoch': 14.37} + 90%|████████▉ | 333520/371472 [4:22:18<2:55:48, 3.60it/s] 90%|████████▉ | 333521/371472 [4:22:19<3:06:54, 3.38it/s] 90%|████████▉ | 333522/371472 [4:22:19<3:06:19, 3.39it/s] 90%|████████▉ | 333523/371472 [4:22:19<3:08:13, 3.36it/s] 90%|████████▉ | 333524/371472 [4:22:20<3:15:20, 3.24it/s] 90%|████████▉ | 333525/371472 [4:22:20<3:09:42, 3.33it/s] 90%|████████▉ | 333526/371472 [4:22:20<3:01:06, 3.49it/s] 90%|████████▉ | 333527/371472 [4:22:20<2:54:06, 3.63it/s] 90%|████████▉ | 333528/371472 [4:22:21<3:03:11, 3.45it/s] 90%|████████▉ | 333529/371472 [4:22:21<2:57:18, 3.57it/s] 90%|████████▉ | 333530/371472 [4:22:21<2:54:01, 3.63it/s] 90%|████████▉ | 333531/371472 [4:22:21<2:52:53, 3.66it/s] 90%|████████▉ | 333532/371472 [4:22:22<2:52:01, 3.68it/s] 90%|████████▉ | 333533/371472 [4:22:22<2:49:12, 3.74it/s] 90%|████████▉ | 333534/371472 [4:22:22<2:45:51, 3.81it/s] 90%|████████▉ | 333535/371472 [4:22:23<2:58:44, 3.54it/s] 90%|████████▉ | 333536/371472 [4:22:23<2:55:38, 3.60it/s] 90%|████████▉ | 333537/371472 [4:22:23<2:52:24, 3.67it/s] 90%|████████▉ | 333538/371472 [4:22:23<2:50:16, 3.71it/s] 90%|████████▉ | 333539/371472 [4:22:24<3:08:30, 3.35it/s] 90%|████████▉ | 333540/371472 [4:22:24<3:10:54, 3.31it/s] {'loss': 2.7329, 'learning_rate': 1.919509146932707e-07, 'epoch': 14.37} + 90%|████████▉ | 333540/371472 [4:22:24<3:10:54, 3.31it/s] 90%|████████▉ | 333541/371472 [4:22:24<3:05:39, 3.41it/s] 90%|████████▉ | 333542/371472 [4:22:25<3:22:40, 3.12it/s] 90%|████████▉ | 333543/371472 [4:22:25<3:11:03, 3.31it/s] 90%|████████▉ | 333544/371472 [4:22:25<3:02:57, 3.45it/s] 90%|████████▉ | 333545/371472 [4:22:25<2:58:08, 3.55it/s] 90%|████████▉ | 333546/371472 [4:22:26<2:57:43, 3.56it/s] 90%|████████▉ | 333547/371472 [4:22:26<2:53:53, 3.63it/s] 90%|████████▉ | 333548/371472 [4:22:26<2:57:27, 3.56it/s] 90%|████████▉ | 333549/371472 [4:22:27<2:59:54, 3.51it/s] 90%|████████▉ | 333550/371472 [4:22:27<2:52:45, 3.66it/s] 90%|████████▉ | 333551/371472 [4:22:27<2:54:24, 3.62it/s] 90%|████████▉ | 333552/371472 [4:22:27<2:51:39, 3.68it/s] 90%|████████▉ | 333553/371472 [4:22:28<2:53:04, 3.65it/s] 90%|████████▉ | 333554/371472 [4:22:28<2:54:19, 3.63it/s] 90%|████████▉ | 333555/371472 [4:22:28<3:04:34, 3.42it/s] 90%|████████▉ | 333556/371472 [4:22:29<2:59:46, 3.51it/s] 90%|████████▉ | 333557/371472 [4:22:29<3:01:02, 3.49it/s] 90%|████████▉ | 333558/371472 [4:22:29<3:03:39, 3.44it/s] 90%|████████▉ | 333559/371472 [4:22:29<2:56:17, 3.58it/s] 90%|████████▉ | 333560/371472 [4:22:30<3:02:38, 3.46it/s] {'loss': 2.5861, 'learning_rate': 1.9190243271779184e-07, 'epoch': 14.37} + 90%|████████▉ | 333560/371472 [4:22:30<3:02:38, 3.46it/s] 90%|████████▉ | 333561/371472 [4:22:30<3:00:41, 3.50it/s] 90%|████████▉ | 333562/371472 [4:22:30<2:59:10, 3.53it/s] 90%|████████▉ | 333563/371472 [4:22:31<3:06:45, 3.38it/s] 90%|████████▉ | 333564/371472 [4:22:31<3:03:13, 3.45it/s] 90%|████████▉ | 333565/371472 [4:22:31<3:05:07, 3.41it/s] 90%|████████▉ | 333566/371472 [4:22:31<3:04:04, 3.43it/s] 90%|████████▉ | 333567/371472 [4:22:32<3:04:20, 3.43it/s] 90%|████████▉ | 333568/371472 [4:22:32<3:10:01, 3.32it/s] 90%|████████▉ | 333569/371472 [4:22:32<3:03:27, 3.44it/s] 90%|████████▉ | 333570/371472 [4:22:33<2:55:30, 3.60it/s] 90%|████████▉ | 333571/371472 [4:22:33<3:04:20, 3.43it/s] 90%|████████▉ | 333572/371472 [4:22:33<3:02:01, 3.47it/s] 90%|████████▉ | 333573/371472 [4:22:33<3:06:00, 3.40it/s] 90%|████████▉ | 333574/371472 [4:22:34<3:03:37, 3.44it/s] 90%|████████▉ | 333575/371472 [4:22:34<3:28:11, 3.03it/s] 90%|████████▉ | 333576/371472 [4:22:34<3:21:41, 3.13it/s] 90%|████████▉ | 333577/371472 [4:22:35<3:14:53, 3.24it/s] 90%|████████▉ | 333578/371472 [4:22:35<3:16:30, 3.21it/s] 90%|████████▉ | 333579/371472 [4:22:35<3:23:24, 3.10it/s] 90%|████████▉ | 333580/371472 [4:22:36<3:19:48, 3.16it/s] {'loss': 2.6629, 'learning_rate': 1.9185395074231288e-07, 'epoch': 14.37} + 90%|████████▉ | 333580/371472 [4:22:36<3:19:48, 3.16it/s] 90%|████████▉ | 333581/371472 [4:22:36<3:16:12, 3.22it/s] 90%|████████▉ | 333582/371472 [4:22:36<3:07:02, 3.38it/s] 90%|████████▉ | 333583/371472 [4:22:37<2:59:57, 3.51it/s] 90%|████████▉ | 333584/371472 [4:22:37<2:57:15, 3.56it/s] 90%|████████▉ | 333585/371472 [4:22:37<3:02:49, 3.45it/s] 90%|████████▉ | 333586/371472 [4:22:37<3:00:43, 3.49it/s] 90%|████████▉ | 333587/371472 [4:22:38<2:56:31, 3.58it/s] 90%|████████▉ | 333588/371472 [4:22:38<3:01:56, 3.47it/s] 90%|████████▉ | 333589/371472 [4:22:38<3:00:21, 3.50it/s] 90%|████████▉ | 333590/371472 [4:22:39<3:02:40, 3.46it/s] 90%|████████▉ | 333591/371472 [4:22:39<2:59:12, 3.52it/s] 90%|████████▉ | 333592/371472 [4:22:39<2:56:10, 3.58it/s] 90%|████████▉ | 333593/371472 [4:22:39<3:15:04, 3.24it/s] 90%|████████▉ | 333594/371472 [4:22:40<3:07:46, 3.36it/s] 90%|████████▉ | 333595/371472 [4:22:40<3:07:26, 3.37it/s] 90%|████████▉ | 333596/371472 [4:22:40<3:10:36, 3.31it/s] 90%|████████▉ | 333597/371472 [4:22:41<3:05:54, 3.40it/s] 90%|████████▉ | 333598/371472 [4:22:41<2:59:02, 3.53it/s] 90%|████████▉ | 333599/371472 [4:22:41<2:57:52, 3.55it/s] 90%|████████▉ | 333600/371472 [4:22:41<2:58:06, 3.54it/s] {'loss': 2.537, 'learning_rate': 1.9180546876683406e-07, 'epoch': 14.37} + 90%|████████▉ | 333600/371472 [4:22:41<2:58:06, 3.54it/s] 90%|████████▉ | 333601/371472 [4:22:42<2:54:20, 3.62it/s] 90%|████████▉ | 333602/371472 [4:22:42<3:09:45, 3.33it/s] 90%|████████▉ | 333603/371472 [4:22:42<3:05:44, 3.40it/s] 90%|████████▉ | 333604/371472 [4:22:43<2:56:58, 3.57it/s] 90%|████████▉ | 333605/371472 [4:22:43<3:07:47, 3.36it/s] 90%|████████▉ | 333606/371472 [4:22:43<3:07:14, 3.37it/s] 90%|████████▉ | 333607/371472 [4:22:43<3:00:58, 3.49it/s] 90%|████████▉ | 333608/371472 [4:22:44<3:04:55, 3.41it/s] 90%|████████▉ | 333609/371472 [4:22:44<2:58:25, 3.54it/s] 90%|████████▉ | 333610/371472 [4:22:44<2:51:53, 3.67it/s] 90%|████████▉ | 333611/371472 [4:22:45<2:50:24, 3.70it/s] 90%|████████▉ | 333612/371472 [4:22:45<2:55:33, 3.59it/s] 90%|████████▉ | 333613/371472 [4:22:45<2:58:46, 3.53it/s] 90%|████████▉ | 333614/371472 [4:22:45<3:05:40, 3.40it/s] 90%|████████▉ | 333615/371472 [4:22:46<3:03:25, 3.44it/s] 90%|████████▉ | 333616/371472 [4:22:46<3:05:12, 3.41it/s] 90%|████████▉ | 333617/371472 [4:22:46<3:20:11, 3.15it/s] 90%|████████▉ | 333618/371472 [4:22:47<3:16:12, 3.22it/s] 90%|████████▉ | 333619/371472 [4:22:47<3:13:26, 3.26it/s] 90%|████████▉ | 333620/371472 [4:22:47<3:24:16, 3.09it/s] {'loss': 2.6925, 'learning_rate': 1.9175698679135513e-07, 'epoch': 14.37} + 90%|████████▉ | 333620/371472 [4:22:47<3:24:16, 3.09it/s] 90%|████████▉ | 333621/371472 [4:22:48<3:08:47, 3.34it/s] 90%|████████▉ | 333622/371472 [4:22:48<3:00:54, 3.49it/s] 90%|████████▉ | 333623/371472 [4:22:48<3:00:27, 3.50it/s] 90%|████████▉ | 333624/371472 [4:22:48<3:04:33, 3.42it/s] 90%|████████▉ | 333625/371472 [4:22:49<3:11:17, 3.30it/s] 90%|████████▉ | 333626/371472 [4:22:49<3:16:50, 3.20it/s] 90%|████████▉ | 333627/371472 [4:22:49<3:06:06, 3.39it/s] 90%|████████▉ | 333628/371472 [4:22:50<3:00:41, 3.49it/s] 90%|████████▉ | 333629/371472 [4:22:50<3:10:55, 3.30it/s] 90%|████████▉ | 333630/371472 [4:22:50<3:11:27, 3.29it/s] 90%|████████▉ | 333631/371472 [4:22:51<3:05:12, 3.41it/s] 90%|████████▉ | 333632/371472 [4:22:51<3:14:30, 3.24it/s] 90%|████████▉ | 333633/371472 [4:22:51<3:06:13, 3.39it/s] 90%|████████▉ | 333634/371472 [4:22:52<3:20:02, 3.15it/s] 90%|████████▉ | 333635/371472 [4:22:52<3:23:45, 3.09it/s] 90%|████████▉ | 333636/371472 [4:22:52<3:30:19, 3.00it/s] 90%|████████▉ | 333637/371472 [4:22:53<3:23:36, 3.10it/s] 90%|████████▉ | 333638/371472 [4:22:53<3:26:36, 3.05it/s] 90%|████████▉ | 333639/371472 [4:22:53<3:40:17, 2.86it/s] 90%|████████▉ | 333640/371472 [4:22:54<3:22:44, 3.11it/s] {'loss': 2.6224, 'learning_rate': 1.9170850481587626e-07, 'epoch': 14.37} + 90%|████████▉ | 333640/371472 [4:22:54<3:22:44, 3.11it/s] 90%|████████▉ | 333641/371472 [4:22:54<3:12:16, 3.28it/s] 90%|████████▉ | 333642/371472 [4:22:54<3:13:31, 3.26it/s] 90%|████████▉ | 333643/371472 [4:22:54<3:17:56, 3.19it/s] 90%|████████▉ | 333644/371472 [4:22:55<3:10:21, 3.31it/s] 90%|████████▉ | 333645/371472 [4:22:55<3:08:56, 3.34it/s] 90%|████████▉ | 333646/371472 [4:22:55<3:40:13, 2.86it/s] 90%|████████▉ | 333647/371472 [4:22:56<3:25:09, 3.07it/s] 90%|████████▉ | 333648/371472 [4:22:56<3:16:48, 3.20it/s] 90%|████████▉ | 333649/371472 [4:22:56<3:10:36, 3.31it/s] 90%|████████▉ | 333650/371472 [4:22:57<3:05:46, 3.39it/s] 90%|████████▉ | 333651/371472 [4:22:57<3:01:24, 3.47it/s] 90%|████████▉ | 333652/371472 [4:22:57<3:13:28, 3.26it/s] 90%|████████▉ | 333653/371472 [4:22:58<3:13:48, 3.25it/s] 90%|████████▉ | 333654/371472 [4:22:58<3:10:28, 3.31it/s] 90%|████████▉ | 333655/371472 [4:22:58<3:06:48, 3.37it/s] 90%|████████▉ | 333656/371472 [4:22:58<3:00:52, 3.48it/s] 90%|████████▉ | 333657/371472 [4:22:59<2:54:58, 3.60it/s] 90%|████████▉ | 333658/371472 [4:22:59<2:55:46, 3.59it/s] 90%|████████▉ | 333659/371472 [4:22:59<2:55:00, 3.60it/s] 90%|████████▉ | 333660/371472 [4:22:59<2:59:47, 3.51it/s] {'loss': 2.5203, 'learning_rate': 1.9166002284039733e-07, 'epoch': 14.37} + 90%|████████▉ | 333660/371472 [4:22:59<2:59:47, 3.51it/s] 90%|████████▉ | 333661/371472 [4:23:00<2:53:10, 3.64it/s] 90%|████████▉ | 333662/371472 [4:23:00<2:59:14, 3.52it/s] 90%|████████▉ | 333663/371472 [4:23:00<3:08:37, 3.34it/s] 90%|████████▉ | 333664/371472 [4:23:01<3:07:16, 3.36it/s] 90%|████████▉ | 333665/371472 [4:23:01<3:00:55, 3.48it/s] 90%|████████▉ | 333666/371472 [4:23:01<3:02:35, 3.45it/s] 90%|████████▉ | 333667/371472 [4:23:02<3:01:19, 3.47it/s] 90%|████████▉ | 333668/371472 [4:23:02<3:12:16, 3.28it/s] 90%|████████▉ | 333669/371472 [4:23:02<3:04:37, 3.41it/s] 90%|████████▉ | 333670/371472 [4:23:02<3:03:50, 3.43it/s] 90%|████████▉ | 333671/371472 [4:23:03<3:09:03, 3.33it/s] 90%|████████▉ | 333672/371472 [4:23:03<3:03:18, 3.44it/s] 90%|████████▉ | 333673/371472 [4:23:03<3:04:59, 3.41it/s] 90%|████████▉ | 333674/371472 [4:23:04<3:03:02, 3.44it/s] 90%|████████▉ | 333675/371472 [4:23:04<2:56:26, 3.57it/s] 90%|████████▉ | 333676/371472 [4:23:04<2:57:47, 3.54it/s] 90%|████████▉ | 333677/371472 [4:23:04<3:01:36, 3.47it/s] 90%|████████▉ | 333678/371472 [4:23:05<2:59:42, 3.51it/s] 90%|████████▉ | 333679/371472 [4:23:05<3:11:11, 3.29it/s] 90%|████████▉ | 333680/371472 [4:23:05<3:01:33, 3.47it/s] {'loss': 2.4317, 'learning_rate': 1.916115408649184e-07, 'epoch': 14.37} + 90%|████████▉ | 333680/371472 [4:23:05<3:01:33, 3.47it/s] 90%|████████▉ | 333681/371472 [4:23:06<3:18:57, 3.17it/s] 90%|████████▉ | 333682/371472 [4:23:06<3:16:20, 3.21it/s] 90%|████████▉ | 333683/371472 [4:23:06<3:17:39, 3.19it/s] 90%|████████▉ | 333684/371472 [4:23:07<3:06:47, 3.37it/s] 90%|████████▉ | 333685/371472 [4:23:07<3:15:10, 3.23it/s] 90%|████████▉ | 333686/371472 [4:23:07<3:04:55, 3.41it/s] 90%|████████▉ | 333687/371472 [4:23:07<3:10:31, 3.31it/s] 90%|████████▉ | 333688/371472 [4:23:08<3:01:56, 3.46it/s] 90%|████████▉ | 333689/371472 [4:23:08<2:57:56, 3.54it/s] 90%|████████▉ | 333690/371472 [4:23:08<3:06:45, 3.37it/s] 90%|████████▉ | 333691/371472 [4:23:09<3:09:35, 3.32it/s] 90%|████████▉ | 333692/371472 [4:23:09<3:03:51, 3.42it/s] 90%|████████▉ | 333693/371472 [4:23:09<3:04:03, 3.42it/s] 90%|████████▉ | 333694/371472 [4:23:10<3:04:06, 3.42it/s] 90%|██████���█▉ | 333695/371472 [4:23:10<2:57:38, 3.54it/s] 90%|████████▉ | 333696/371472 [4:23:10<2:53:10, 3.64it/s] 90%|████████▉ | 333697/371472 [4:23:10<2:51:31, 3.67it/s] 90%|████████▉ | 333698/371472 [4:23:11<2:51:12, 3.68it/s] 90%|████████▉ | 333699/371472 [4:23:11<2:53:05, 3.64it/s] 90%|████████▉ | 333700/371472 [4:23:11<2:56:22, 3.57it/s] {'loss': 2.5561, 'learning_rate': 1.9156305888943952e-07, 'epoch': 14.37} + 90%|████████▉ | 333700/371472 [4:23:11<2:56:22, 3.57it/s] 90%|████████▉ | 333701/371472 [4:23:11<2:53:30, 3.63it/s] 90%|████████▉ | 333702/371472 [4:23:12<3:01:58, 3.46it/s] 90%|████████▉ | 333703/371472 [4:23:12<2:51:55, 3.66it/s] 90%|████████▉ | 333704/371472 [4:23:12<2:53:15, 3.63it/s] 90%|████████▉ | 333705/371472 [4:23:12<2:51:10, 3.68it/s] 90%|████████▉ | 333706/371472 [4:23:13<2:55:59, 3.58it/s] 90%|████████▉ | 333707/371472 [4:23:13<3:06:43, 3.37it/s] 90%|████████▉ | 333708/371472 [4:23:13<2:58:09, 3.53it/s] 90%|████████▉ | 333709/371472 [4:23:14<3:06:46, 3.37it/s] 90%|████████▉ | 333710/371472 [4:23:14<3:07:05, 3.36it/s] 90%|████████▉ | 333711/371472 [4:23:14<2:59:45, 3.50it/s] 90%|████████▉ | 333712/371472 [4:23:15<2:58:13, 3.53it/s] 90%|████████▉ | 333713/371472 [4:23:15<2:51:52, 3.66it/s] 90%|████████▉ | 333714/371472 [4:23:15<3:03:28, 3.43it/s] 90%|████████▉ | 333715/371472 [4:23:15<3:12:23, 3.27it/s] 90%|████████▉ | 333716/371472 [4:23:16<3:08:58, 3.33it/s] 90%|████████▉ | 333717/371472 [4:23:16<3:11:16, 3.29it/s] 90%|████████▉ | 333718/371472 [4:23:16<3:09:07, 3.33it/s] 90%|████████▉ | 333719/371472 [4:23:17<3:07:15, 3.36it/s] 90%|████████▉ | 333720/371472 [4:23:17<3:08:10, 3.34it/s] {'loss': 2.5688, 'learning_rate': 1.915145769139606e-07, 'epoch': 14.37} + 90%|████████▉ | 333720/371472 [4:23:17<3:08:10, 3.34it/s] 90%|████████▉ | 333721/371472 [4:23:17<3:03:41, 3.43it/s] 90%|████████▉ | 333722/371472 [4:23:18<3:01:01, 3.48it/s] 90%|████████▉ | 333723/371472 [4:23:18<3:19:33, 3.15it/s] 90%|████████▉ | 333724/371472 [4:23:18<3:14:28, 3.24it/s] 90%|████████▉ | 333725/371472 [4:23:18<3:05:41, 3.39it/s] 90%|████████▉ | 333726/371472 [4:23:19<3:05:05, 3.40it/s] 90%|████████▉ | 333727/371472 [4:23:19<2:59:17, 3.51it/s] 90%|████████▉ | 333728/371472 [4:23:19<3:08:56, 3.33it/s] 90%|████████▉ | 333729/371472 [4:23:20<3:03:04, 3.44it/s] 90%|████████▉ | 333730/371472 [4:23:20<3:05:30, 3.39it/s] 90%|████████▉ | 333731/371472 [4:23:20<3:19:59, 3.15it/s] 90%|████████▉ | 333732/371472 [4:23:21<3:05:56, 3.38it/s] 90%|████████▉ | 333733/371472 [4:23:21<3:04:30, 3.41it/s] 90%|████████▉ | 333734/371472 [4:23:21<3:03:57, 3.42it/s] 90%|████████▉ | 333735/371472 [4:23:21<3:01:26, 3.47it/s] 90%|████████▉ | 333736/371472 [4:23:22<2:59:09, 3.51it/s] 90%|████████▉ | 333737/371472 [4:23:22<2:55:02, 3.59it/s] 90%|████████▉ | 333738/371472 [4:23:22<2:51:58, 3.66it/s] 90%|████████▉ | 333739/371472 [4:23:22<2:48:41, 3.73it/s] 90%|████████▉ | 333740/371472 [4:23:23<2:46:00, 3.79it/s] {'loss': 2.5172, 'learning_rate': 1.9146609493848175e-07, 'epoch': 14.37} + 90%|████████▉ | 333740/371472 [4:23:23<2:46:00, 3.79it/s] 90%|████████▉ | 333741/371472 [4:23:23<2:47:24, 3.76it/s] 90%|████████▉ | 333742/371472 [4:23:23<2:49:35, 3.71it/s] 90%|████████▉ | 333743/371472 [4:23:23<2:45:58, 3.79it/s] 90%|████████▉ | 333744/371472 [4:23:24<2:49:18, 3.71it/s] 90%|████████▉ | 333745/371472 [4:23:24<2:53:47, 3.62it/s] 90%|████████▉ | 333746/371472 [4:23:24<2:53:29, 3.62it/s] 90%|████████▉ | 333747/371472 [4:23:25<2:50:35, 3.69it/s] 90%|████████▉ | 333748/371472 [4:23:25<3:01:48, 3.46it/s] 90%|████████▉ | 333749/371472 [4:23:25<2:59:11, 3.51it/s] 90%|████████▉ | 333750/371472 [4:23:26<3:03:17, 3.43it/s] 90%|████████▉ | 333751/371472 [4:23:26<3:06:17, 3.37it/s] 90%|████████▉ | 333752/371472 [4:23:26<3:08:55, 3.33it/s] 90%|████████▉ | 333753/371472 [4:23:26<3:09:10, 3.32it/s] 90%|████████▉ | 333754/371472 [4:23:27<3:10:35, 3.30it/s] 90%|████████▉ | 333755/371472 [4:23:27<3:03:41, 3.42it/s] 90%|████████▉ | 333756/371472 [4:23:27<2:55:33, 3.58it/s] 90%|████████▉ | 333757/371472 [4:23:28<2:54:29, 3.60it/s] 90%|████████▉ | 333758/371472 [4:23:28<2:51:44, 3.66it/s] 90%|████████▉ | 333759/371472 [4:23:28<2:50:31, 3.69it/s] 90%|████████▉ | 333760/371472 [4:23:28<3:00:19, 3.49it/s] {'loss': 2.6316, 'learning_rate': 1.9141761296300282e-07, 'epoch': 14.38} + 90%|████████▉ | 333760/371472 [4:23:28<3:00:19, 3.49it/s] 90%|████████▉ | 333761/371472 [4:23:29<2:57:27, 3.54it/s] 90%|████████▉ | 333762/371472 [4:23:29<2:55:31, 3.58it/s] 90%|████████▉ | 333763/371472 [4:23:29<2:53:35, 3.62it/s] 90%|████████▉ | 333764/371472 [4:23:29<2:49:15, 3.71it/s] 90%|████████▉ | 333765/371472 [4:23:30<2:44:43, 3.82it/s] 90%|████████▉ | 333766/371472 [4:23:30<2:43:12, 3.85it/s] 90%|████████▉ | 333767/371472 [4:23:30<2:51:23, 3.67it/s] 90%|████████▉ | 333768/371472 [4:23:31<2:47:39, 3.75it/s] 90%|████████▉ | 333769/371472 [4:23:31<2:52:03, 3.65it/s] 90%|████████▉ | 333770/371472 [4:23:31<2:56:21, 3.56it/s] 90%|████████▉ | 333771/371472 [4:23:31<2:56:58, 3.55it/s] 90%|████████▉ | 333772/371472 [4:23:32<3:03:47, 3.42it/s] 90%|████████▉ | 333773/371472 [4:23:32<2:54:52, 3.59it/s] 90%|████████▉ | 333774/371472 [4:23:32<2:48:59, 3.72it/s] 90%|████████▉ | 333775/371472 [4:23:32<2:43:53, 3.83it/s] 90%|████████▉ | 333776/371472 [4:23:33<2:47:43, 3.75it/s] 90%|████████▉ | 333777/371472 [4:23:33<2:46:44, 3.77it/s] 90%|████████▉ | 333778/371472 [4:23:33<2:58:23, 3.52it/s] 90%|████████▉ | 333779/371472 [4:23:34<3:02:26, 3.44it/s] 90%|████████▉ | 333780/371472 [4:23:34<3:11:19, 3.28it/s] {'loss': 2.6162, 'learning_rate': 1.9136913098752397e-07, 'epoch': 14.38} + 90%|████████▉ | 333780/371472 [4:23:34<3:11:19, 3.28it/s] 90%|████████▉ | 333781/371472 [4:23:34<3:05:06, 3.39it/s] 90%|████████▉ | 333782/371472 [4:23:35<3:09:13, 3.32it/s] 90%|████████▉ | 333783/371472 [4:23:35<3:01:13, 3.47it/s] 90%|████████▉ | 333784/371472 [4:23:35<2:57:39, 3.54it/s] 90%|████████▉ | 333785/371472 [4:23:35<2:52:05, 3.65it/s] 90%|████████▉ | 333786/371472 [4:23:36<2:47:58, 3.74it/s] 90%|████████▉ | 333787/371472 [4:23:36<2:46:21, 3.78it/s] 90%|████████▉ | 333788/371472 [4:23:36<2:56:21, 3.56it/s] 90%|████████▉ | 333789/371472 [4:23:36<2:51:55, 3.65it/s] 90%|████████▉ | 333790/371472 [4:23:37<2:50:36, 3.68it/s] 90%|████████▉ | 333791/371472 [4:23:37<2:51:46, 3.66it/s] 90%|████████▉ | 333792/371472 [4:23:37<2:57:43, 3.53it/s] 90%|████████▉ | 333793/371472 [4:23:38<2:52:19, 3.64it/s] 90%|████████▉ | 333794/371472 [4:23:38<2:54:03, 3.61it/s] 90%|████████▉ | 333795/371472 [4:23:38<2:53:16, 3.62it/s] 90%|████████▉ | 333796/371472 [4:23:38<2:54:07, 3.61it/s] 90%|████████▉ | 333797/371472 [4:23:39<2:56:29, 3.56it/s] 90%|████████▉ | 333798/371472 [4:23:39<2:59:04, 3.51it/s] 90%|████████▉ | 333799/371472 [4:23:39<3:28:41, 3.01it/s] 90%|████████▉ | 333800/371472 [4:23:40<3:20:55, 3.12it/s] {'loss': 2.6452, 'learning_rate': 1.9132064901204504e-07, 'epoch': 14.38} + 90%|████████▉ | 333800/371472 [4:23:40<3:20:55, 3.12it/s] 90%|████████▉ | 333801/371472 [4:23:40<3:12:33, 3.26it/s] 90%|████████▉ | 333802/371472 [4:23:40<3:10:44, 3.29it/s] 90%|████████▉ | 333803/371472 [4:23:41<3:14:39, 3.23it/s] 90%|████████▉ | 333804/371472 [4:23:41<3:11:40, 3.28it/s] 90%|████████▉ | 333805/371472 [4:23:41<3:28:49, 3.01it/s] 90%|████████▉ | 333806/371472 [4:23:42<3:27:07, 3.03it/s] 90%|████████▉ | 333807/371472 [4:23:42<3:23:55, 3.08it/s] 90%|████████▉ | 333808/371472 [4:23:42<3:11:13, 3.28it/s] 90%|████████▉ | 333809/371472 [4:23:42<3:08:45, 3.33it/s] 90%|████████▉ | 333810/371472 [4:23:43<3:03:01, 3.43it/s] 90%|████████▉ | 333811/371472 [4:23:43<3:00:05, 3.49it/s] 90%|████████▉ | 333812/371472 [4:23:43<3:05:48, 3.38it/s] 90%|████████▉ | 333813/371472 [4:23:44<3:10:45, 3.29it/s] 90%|████████▉ | 333814/371472 [4:23:44<3:05:52, 3.38it/s] 90%|████████▉ | 333815/371472 [4:23:44<3:01:12, 3.46it/s] 90%|████████▉ | 333816/371472 [4:23:44<2:54:24, 3.60it/s] 90%|████████▉ | 333817/371472 [4:23:45<2:51:28, 3.66it/s] 90%|████████▉ | 333818/371472 [4:23:45<2:52:21, 3.64it/s] 90%|████████▉ | 333819/371472 [4:23:45<2:52:46, 3.63it/s] 90%|████████▉ | 333820/371472 [4:23:45<2:49:32, 3.70it/s] {'loss': 2.7176, 'learning_rate': 1.912721670365662e-07, 'epoch': 14.38} + 90%|████████▉ | 333820/371472 [4:23:46<2:49:32, 3.70it/s] 90%|████████▉ | 333821/371472 [4:23:46<2:54:57, 3.59it/s] 90%|████████▉ | 333822/371472 [4:23:46<2:58:05, 3.52it/s] 90%|████████▉ | 333823/371472 [4:23:46<3:03:22, 3.42it/s] 90%|████████▉ | 333824/371472 [4:23:47<2:59:06, 3.50it/s] 90%|████████▉ | 333825/371472 [4:23:47<2:55:26, 3.58it/s] 90%|████████▉ | 333826/371472 [4:23:47<2:52:53, 3.63it/s] 90%|████████▉ | 333827/371472 [4:23:47<2:49:09, 3.71it/s] 90%|████████▉ | 333828/371472 [4:23:48<2:52:19, 3.64it/s] 90%|████████▉ | 333829/371472 [4:23:48<2:54:45, 3.59it/s] 90%|████████▉ | 333830/371472 [4:23:48<2:57:43, 3.53it/s] 90%|████████▉ | 333831/371472 [4:23:49<2:58:07, 3.52it/s] 90%|████████▉ | 333832/371472 [4:23:49<2:54:28, 3.60it/s] 90%|████████▉ | 333833/371472 [4:23:49<2:52:52, 3.63it/s] 90%|████████▉ | 333834/371472 [4:23:49<3:01:35, 3.45it/s] 90%|████████▉ | 333835/371472 [4:23:50<2:58:57, 3.51it/s] 90%|████████▉ | 333836/371472 [4:23:50<2:57:05, 3.54it/s] 90%|████████▉ | 333837/371472 [4:23:50<2:57:10, 3.54it/s] 90%|████████▉ | 333838/371472 [4:23:51<2:51:30, 3.66it/s] 90%|████████▉ | 333839/371472 [4:23:51<2:56:52, 3.55it/s] 90%|████████▉ | 333840/371472 [4:23:51<2:53:02, 3.62it/s] {'loss': 2.5501, 'learning_rate': 1.9122368506108724e-07, 'epoch': 14.38} + 90%|████████▉ | 333840/371472 [4:23:51<2:53:02, 3.62it/s] 90%|████████▉ | 333841/371472 [4:23:51<2:50:53, 3.67it/s] 90%|████████▉ | 333842/371472 [4:23:52<2:50:02, 3.69it/s] 90%|████████▉ | 333843/371472 [4:23:52<3:01:00, 3.46it/s] 90%|████████▉ | 333844/371472 [4:23:52<3:03:08, 3.42it/s] 90%|████████▉ | 333845/371472 [4:23:53<2:59:52, 3.49it/s] 90%|████████▉ | 333846/371472 [4:23:53<2:52:51, 3.63it/s] 90%|████████▉ | 333847/371472 [4:23:53<2:50:48, 3.67it/s] 90%|████████▉ | 333848/371472 [4:23:53<2:49:36, 3.70it/s] 90%|████████▉ | 333849/371472 [4:23:54<2:49:31, 3.70it/s] 90%|████████▉ | 333850/371472 [4:23:54<2:56:24, 3.55it/s] 90%|████████▉ | 333851/371472 [4:23:54<2:49:40, 3.70it/s] 90%|████████▉ | 333852/371472 [4:23:54<2:48:28, 3.72it/s] 90%|████████▉ | 333853/371472 [4:23:55<2:52:17, 3.64it/s] 90%|████████▉ | 333854/371472 [4:23:55<2:55:57, 3.56it/s] 90%|████████▉ | 333855/371472 [4:23:55<2:52:27, 3.64it/s] 90%|████████▉ | 333856/371472 [4:23:56<2:57:52, 3.52it/s] 90%|████████▉ | 333857/371472 [4:23:56<3:03:50, 3.41it/s] 90%|████████▉ | 333858/371472 [4:23:56<3:06:17, 3.37it/s] 90%|████████▉ | 333859/371472 [4:23:56<3:01:29, 3.45it/s] 90%|████████▉ | 333860/371472 [4:23:57<3:02:39, 3.43it/s] {'loss': 2.5236, 'learning_rate': 1.9117520308560841e-07, 'epoch': 14.38} + 90%|████████▉ | 333860/371472 [4:23:57<3:02:39, 3.43it/s] 90%|████████▉ | 333861/371472 [4:23:57<3:05:46, 3.37it/s] 90%|████████▉ | 333862/371472 [4:23:57<3:08:20, 3.33it/s] 90%|████████▉ | 333863/371472 [4:23:58<3:05:27, 3.38it/s] 90%|████████▉ | 333864/371472 [4:23:58<3:13:00, 3.25it/s] 90%|████████▉ | 333865/371472 [4:23:58<3:12:09, 3.26it/s] 90%|████████▉ | 333866/371472 [4:23:59<3:00:22, 3.47it/s] 90%|████████▉ | 333867/371472 [4:23:59<3:13:09, 3.24it/s] 90%|████████▉ | 333868/371472 [4:23:59<3:06:33, 3.36it/s] 90%|████████▉ | 333869/371472 [4:23:59<2:59:47, 3.49it/s] 90%|████████▉ | 333870/371472 [4:24:00<2:55:57, 3.56it/s] 90%|████████▉ | 333871/371472 [4:24:00<2:55:02, 3.58it/s] 90%|████████▉ | 333872/371472 [4:24:00<2:55:50, 3.56it/s] 90%|████████▉ | 333873/371472 [4:24:01<3:01:41, 3.45it/s] 90%|████████▉ | 333874/371472 [4:24:01<2:59:18, 3.49it/s] 90%|████████▉ | 333875/371472 [4:24:01<3:00:24, 3.47it/s] 90%|████████▉ | 333876/371472 [4:24:01<2:54:02, 3.60it/s] 90%|████████▉ | 333877/371472 [4:24:02<2:50:55, 3.67it/s] 90%|████████▉ | 333878/371472 [4:24:02<2:48:48, 3.71it/s] 90%|████████▉ | 333879/371472 [4:24:02<2:54:13, 3.60it/s] 90%|████████▉ | 333880/371472 [4:24:03<2:54:59, 3.58it/s] {'loss': 2.5268, 'learning_rate': 1.9112672111012946e-07, 'epoch': 14.38} + 90%|████████▉ | 333880/371472 [4:24:03<2:54:59, 3.58it/s] 90%|████████▉ | 333881/371472 [4:24:03<2:50:19, 3.68it/s] 90%|████████▉ | 333882/371472 [4:24:03<2:48:31, 3.72it/s] 90%|████████▉ | 333883/371472 [4:24:03<2:52:11, 3.64it/s] 90%|████████▉ | 333884/371472 [4:24:04<2:49:12, 3.70it/s] 90%|████████▉ | 333885/371472 [4:24:04<3:15:00, 3.21it/s] 90%|████████▉ | 333886/371472 [4:24:04<3:12:44, 3.25it/s] 90%|████████▉ | 333887/371472 [4:24:05<3:04:30, 3.40it/s] 90%|████████▉ | 333888/371472 [4:24:05<3:20:51, 3.12it/s] 90%|████████▉ | 333889/371472 [4:24:05<3:13:10, 3.24it/s] 90%|████████▉ | 333890/371472 [4:24:05<3:02:36, 3.43it/s] 90%|████████▉ | 333891/371472 [4:24:06<2:54:24, 3.59it/s] 90%|████████▉ | 333892/371472 [4:24:06<2:51:59, 3.64it/s] 90%|████████▉ | 333893/371472 [4:24:06<2:53:25, 3.61it/s] 90%|████████▉ | 333894/371472 [4:24:07<2:57:21, 3.53it/s] 90%|████████▉ | 333895/371472 [4:24:07<3:10:34, 3.29it/s] 90%|████████▉ | 333896/371472 [4:24:07<2:58:34, 3.51it/s] 90%|████████▉ | 333897/371472 [4:24:07<2:57:16, 3.53it/s] 90%|████████▉ | 333898/371472 [4:24:08<2:56:52, 3.54it/s] 90%|████████▉ | 333899/371472 [4:24:08<2:55:07, 3.58it/s] 90%|████████▉ | 333900/371472 [4:24:08<2:52:52, 3.62it/s] {'loss': 2.6182, 'learning_rate': 1.910782391346506e-07, 'epoch': 14.38} + 90%|████████▉ | 333900/371472 [4:24:08<2:52:52, 3.62it/s] 90%|████████▉ | 333901/371472 [4:24:09<2:53:53, 3.60it/s] 90%|████████▉ | 333902/371472 [4:24:09<2:49:48, 3.69it/s] 90%|████████▉ | 333903/371472 [4:24:09<2:53:34, 3.61it/s] 90%|████████▉ | 333904/371472 [4:24:09<3:08:10, 3.33it/s] 90%|████████▉ | 333905/371472 [4:24:10<3:08:16, 3.33it/s] 90%|████████▉ | 333906/371472 [4:24:10<3:01:49, 3.44it/s] 90%|████████▉ | 333907/371472 [4:24:10<2:57:30, 3.53it/s] 90%|████████▉ | 333908/371472 [4:24:11<2:56:15, 3.55it/s] 90%|████████▉ | 333909/371472 [4:24:11<2:54:35, 3.59it/s] 90%|████████▉ | 333910/371472 [4:24:11<2:52:35, 3.63it/s] 90%|████████▉ | 333911/371472 [4:24:11<2:50:02, 3.68it/s] 90%|████████▉ | 333912/371472 [4:24:12<2:51:29, 3.65it/s] 90%|████████▉ | 333913/371472 [4:24:12<2:48:56, 3.71it/s] 90%|████████▉ | 333914/371472 [4:24:12<2:49:39, 3.69it/s] 90%|████████▉ | 333915/371472 [4:24:12<2:51:58, 3.64it/s] 90%|████████▉ | 333916/371472 [4:24:13<3:01:29, 3.45it/s] 90%|████████▉ | 333917/371472 [4:24:13<3:01:01, 3.46it/s] 90%|████████▉ | 333918/371472 [4:24:13<2:57:30, 3.53it/s] 90%|████████▉ | 333919/371472 [4:24:14<3:01:36, 3.45it/s] 90%|████████▉ | 333920/371472 [4:24:14<2:59:08, 3.49it/s] {'loss': 2.6905, 'learning_rate': 1.9102975715917168e-07, 'epoch': 14.38} + 90%|████████▉ | 333920/371472 [4:24:14<2:59:08, 3.49it/s] 90%|████████▉ | 333921/371472 [4:24:14<2:53:05, 3.62it/s] 90%|████████▉ | 333922/371472 [4:24:14<2:58:37, 3.50it/s] 90%|████████▉ | 333923/371472 [4:24:15<2:56:20, 3.55it/s] 90%|████████▉ | 333924/371472 [4:24:15<2:53:05, 3.62it/s] 90%|████████▉ | 333925/371472 [4:24:15<2:48:59, 3.70it/s] 90%|████████▉ | 333926/371472 [4:24:16<2:54:15, 3.59it/s] 90%|████████▉ | 333927/371472 [4:24:16<2:49:48, 3.68it/s] 90%|████████▉ | 333928/371472 [4:24:16<2:49:47, 3.69it/s] 90%|████████▉ | 333929/371472 [4:24:16<2:51:23, 3.65it/s] 90%|████████▉ | 333930/371472 [4:24:17<2:45:22, 3.78it/s] 90%|████████▉ | 333931/371472 [4:24:17<2:44:48, 3.80it/s] 90%|████████▉ | 333932/371472 [4:24:17<2:48:03, 3.72it/s] 90%|████████▉ | 333933/371472 [4:24:17<2:48:00, 3.72it/s] 90%|████████▉ | 333934/371472 [4:24:18<2:47:25, 3.74it/s] 90%|████████▉ | 333935/371472 [4:24:18<2:51:34, 3.65it/s] 90%|████████▉ | 333936/371472 [4:24:18<2:44:37, 3.80it/s] 90%|████████▉ | 333937/371472 [4:24:19<2:59:36, 3.48it/s] 90%|████████▉ | 333938/371472 [4:24:19<2:51:47, 3.64it/s] 90%|████████▉ | 333939/371472 [4:24:19<2:48:52, 3.70it/s] 90%|████████▉ | 333940/371472 [4:24:19<2:46:58, 3.75it/s] {'loss': 2.5728, 'learning_rate': 1.9098127518369283e-07, 'epoch': 14.38} + 90%|████████▉ | 333940/371472 [4:24:19<2:46:58, 3.75it/s] 90%|████████▉ | 333941/371472 [4:24:20<2:51:46, 3.64it/s] 90%|████████▉ | 333942/371472 [4:24:20<2:55:23, 3.57it/s] 90%|████████▉ | 333943/371472 [4:24:20<2:53:30, 3.60it/s] 90%|████████▉ | 333944/371472 [4:24:20<2:54:23, 3.59it/s] 90%|████████▉ | 333945/371472 [4:24:21<3:06:18, 3.36it/s] 90%|████████▉ | 333946/371472 [4:24:21<3:00:48, 3.46it/s] 90%|████████▉ | 333947/371472 [4:24:21<2:51:49, 3.64it/s] 90%|████████▉ | 333948/371472 [4:24:22<2:58:48, 3.50it/s] 90%|████████▉ | 333949/371472 [4:24:22<2:53:12, 3.61it/s] 90%|████████▉ | 333950/371472 [4:24:22<2:48:32, 3.71it/s] 90%|████████▉ | 333951/371472 [4:24:22<2:46:39, 3.75it/s] 90%|████████▉ | 333952/371472 [4:24:23<2:58:05, 3.51it/s] 90%|████████▉ | 333953/371472 [4:24:23<2:50:06, 3.68it/s] 90%|████████▉ | 333954/371472 [4:24:23<2:46:48, 3.75it/s] 90%|████████▉ | 333955/371472 [4:24:24<2:53:10, 3.61it/s] 90%|████████▉ | 333956/371472 [4:24:24<2:51:12, 3.65it/s] 90%|████████▉ | 333957/371472 [4:24:24<2:51:47, 3.64it/s] 90%|████████▉ | 333958/371472 [4:24:24<3:02:50, 3.42it/s] 90%|████████▉ | 333959/371472 [4:24:25<3:05:39, 3.37it/s] 90%|████████▉ | 333960/371472 [4:24:25<2:55:46, 3.56it/s] {'loss': 2.6816, 'learning_rate': 1.9093279320821388e-07, 'epoch': 14.38} + 90%|████████▉ | 333960/371472 [4:24:25<2:55:46, 3.56it/s] 90%|████████▉ | 333961/371472 [4:24:25<2:54:05, 3.59it/s] 90%|████████▉ | 333962/371472 [4:24:26<3:12:57, 3.24it/s] 90%|████████▉ | 333963/371472 [4:24:26<3:02:08, 3.43it/s] 90%|████████▉ | 333964/371472 [4:24:26<2:59:49, 3.48it/s] 90%|████████▉ | 333965/371472 [4:24:26<2:55:07, 3.57it/s] 90%|████████▉ | 333966/371472 [4:24:27<3:00:43, 3.46it/s] 90%|████████▉ | 333967/371472 [4:24:27<2:54:22, 3.58it/s] 90%|████████▉ | 333968/371472 [4:24:27<2:46:21, 3.76it/s] 90%|████████▉ | 333969/371472 [4:24:27<2:50:32, 3.67it/s] 90%|████████▉ | 333970/371472 [4:24:28<2:45:03, 3.79it/s] 90%|████████▉ | 333971/371472 [4:24:28<2:45:24, 3.78it/s] 90%|████████▉ | 333972/371472 [4:24:28<2:47:06, 3.74it/s] 90%|████████▉ | 333973/371472 [4:24:29<2:47:33, 3.73it/s] 90%|████████▉ | 333974/371472 [4:24:29<2:45:49, 3.77it/s] 90%|████████▉ | 333975/371472 [4:24:29<2:44:54, 3.79it/s] 90%|████████▉ | 333976/371472 [4:24:29<2:44:48, 3.79it/s] 90%|████████▉ | 333977/371472 [4:24:30<2:43:00, 3.83it/s] 90%|████████▉ | 333978/371472 [4:24:30<2:54:29, 3.58it/s] 90%|████████▉ | 333979/371472 [4:24:30<2:50:27, 3.67it/s] 90%|████████▉ | 333980/371472 [4:24:30<2:44:53, 3.79it/s] {'loss': 2.5127, 'learning_rate': 1.9088431123273505e-07, 'epoch': 14.39} + 90%|████████▉ | 333980/371472 [4:24:30<2:44:53, 3.79it/s] 90%|████████▉ | 333981/371472 [4:24:31<2:53:13, 3.61it/s] 90%|████████�� | 333982/371472 [4:24:31<2:49:20, 3.69it/s] 90%|████████▉ | 333983/371472 [4:24:31<2:55:44, 3.56it/s] 90%|████████▉ | 333984/371472 [4:24:32<3:02:09, 3.43it/s] 90%|████████▉ | 333985/371472 [4:24:32<3:00:58, 3.45it/s] 90%|████████▉ | 333986/371472 [4:24:32<2:53:34, 3.60it/s] 90%|████████▉ | 333987/371472 [4:24:32<2:55:14, 3.56it/s] 90%|████████▉ | 333988/371472 [4:24:33<2:50:38, 3.66it/s] 90%|████████▉ | 333989/371472 [4:24:33<2:48:00, 3.72it/s] 90%|████████▉ | 333990/371472 [4:24:33<2:52:03, 3.63it/s] 90%|████████▉ | 333991/371472 [4:24:34<2:57:36, 3.52it/s] 90%|████████▉ | 333992/371472 [4:24:34<2:53:10, 3.61it/s] 90%|████████▉ | 333993/371472 [4:24:34<2:49:12, 3.69it/s] 90%|████████▉ | 333994/371472 [4:24:34<2:53:42, 3.60it/s] 90%|████████▉ | 333995/371472 [4:24:35<3:04:31, 3.38it/s] 90%|████████▉ | 333996/371472 [4:24:35<2:54:50, 3.57it/s] 90%|████████▉ | 333997/371472 [4:24:35<2:49:53, 3.68it/s] 90%|████████▉ | 333998/371472 [4:24:35<2:45:03, 3.78it/s] 90%|████████▉ | 333999/371472 [4:24:36<2:52:00, 3.63it/s] 90%|████████▉ | 334000/371472 [4:24:36<3:00:18, 3.46it/s] {'loss': 2.657, 'learning_rate': 1.9083582925725613e-07, 'epoch': 14.39} + 90%|████████▉ | 334000/371472 [4:24:36<3:00:18, 3.46it/s] 90%|████████▉ | 334001/371472 [4:24:36<3:07:37, 3.33it/s] 90%|████████▉ | 334002/371472 [4:24:37<3:05:11, 3.37it/s] 90%|████████▉ | 334003/371472 [4:24:37<3:02:07, 3.43it/s] 90%|████████▉ | 334004/371472 [4:24:37<2:57:38, 3.52it/s] 90%|████████▉ | 334005/371472 [4:24:37<2:59:30, 3.48it/s] 90%|████████▉ | 334006/371472 [4:24:38<2:55:06, 3.57it/s] 90%|████████▉ | 334007/371472 [4:24:38<2:52:25, 3.62it/s] 90%|████████▉ | 334008/371472 [4:24:38<3:21:18, 3.10it/s] 90%|████████▉ | 334009/371472 [4:24:39<3:05:27, 3.37it/s] 90%|████████▉ | 334010/371472 [4:24:39<3:04:51, 3.38it/s] 90%|████████▉ | 334011/371472 [4:24:39<3:03:20, 3.41it/s] 90%|████████▉ | 334012/371472 [4:24:40<2:55:57, 3.55it/s] 90%|████████▉ | 334013/371472 [4:24:40<2:49:24, 3.69it/s] 90%|████████▉ | 334014/371472 [4:24:40<2:48:32, 3.70it/s] 90%|████████▉ | 334015/371472 [4:24:40<2:50:59, 3.65it/s] 90%|████████▉ | 334016/371472 [4:24:41<2:53:24, 3.60it/s] 90%|████████▉ | 334017/371472 [4:24:41<3:06:17, 3.35it/s] 90%|████████▉ | 334018/371472 [4:24:41<2:56:39, 3.53it/s] 90%|████████▉ | 334019/371472 [4:24:41<2:53:37, 3.60it/s] 90%|████████▉ | 334020/371472 [4:24:42<2:57:27, 3.52it/s] {'loss': 2.5883, 'learning_rate': 1.9078734728177725e-07, 'epoch': 14.39} + 90%|████████▉ | 334020/371472 [4:24:42<2:57:27, 3.52it/s] 90%|████████▉ | 334021/371472 [4:24:42<2:56:09, 3.54it/s] 90%|████████▉ | 334022/371472 [4:24:42<2:51:26, 3.64it/s] 90%|████████▉ | 334023/371472 [4:24:43<3:03:38, 3.40it/s] 90%|████████▉ | 334024/371472 [4:24:43<2:57:29, 3.52it/s] 90%|████████▉ | 334025/371472 [4:24:43<3:06:08, 3.35it/s] 90%|████████▉ | 334026/371472 [4:24:43<2:56:53, 3.53it/s] 90%|████████▉ | 334027/371472 [4:24:44<2:57:13, 3.52it/s] 90%|████████▉ | 334028/371472 [4:24:44<2:59:39, 3.47it/s] 90%|████████▉ | 334029/371472 [4:24:44<2:56:37, 3.53it/s] 90%|████████▉ | 334030/371472 [4:24:45<2:50:59, 3.65it/s] 90%|████████▉ | 334031/371472 [4:24:45<2:50:42, 3.66it/s] 90%|████████▉ | 334032/371472 [4:24:45<2:56:21, 3.54it/s] 90%|████████▉ | 334033/371472 [4:24:45<2:49:36, 3.68it/s] 90%|████████▉ | 334034/371472 [4:24:46<2:55:00, 3.57it/s] 90%|████████▉ | 334035/371472 [4:24:46<2:52:22, 3.62it/s] 90%|████████▉ | 334036/371472 [4:24:46<2:58:54, 3.49it/s] 90%|████████▉ | 334037/371472 [4:24:47<2:53:10, 3.60it/s] 90%|████████▉ | 334038/371472 [4:24:47<3:02:54, 3.41it/s] 90%|████████▉ | 334039/371472 [4:24:47<2:54:17, 3.58it/s] 90%|████████▉ | 334040/371472 [4:24:47<3:05:02, 3.37it/s] {'loss': 2.6041, 'learning_rate': 1.9073886530629832e-07, 'epoch': 14.39} + 90%|████████▉ | 334040/371472 [4:24:47<3:05:02, 3.37it/s] 90%|████████▉ | 334041/371472 [4:24:48<2:56:15, 3.54it/s] 90%|████████▉ | 334042/371472 [4:24:48<3:01:42, 3.43it/s] 90%|████████▉ | 334043/371472 [4:24:48<2:53:11, 3.60it/s] 90%|████████▉ | 334044/371472 [4:24:49<2:49:54, 3.67it/s] 90%|████████▉ | 334045/371472 [4:24:49<2:51:08, 3.64it/s] 90%|████████▉ | 334046/371472 [4:24:49<2:55:38, 3.55it/s] 90%|████████▉ | 334047/371472 [4:24:49<2:55:23, 3.56it/s] 90%|████████▉ | 334048/371472 [4:24:50<2:49:30, 3.68it/s] 90%|████████▉ | 334049/371472 [4:24:50<2:54:05, 3.58it/s] 90%|████████▉ | 334050/371472 [4:24:50<2:51:07, 3.64it/s] 90%|████████▉ | 334051/371472 [4:24:50<2:58:34, 3.49it/s] 90%|████████▉ | 334052/371472 [4:24:51<3:05:44, 3.36it/s] 90%|████████▉ | 334053/371472 [4:24:51<3:05:44, 3.36it/s] 90%|████████▉ | 334054/371472 [4:24:51<3:10:18, 3.28it/s] 90%|████████▉ | 334055/371472 [4:24:52<3:03:26, 3.40it/s] 90%|████████▉ | 334056/371472 [4:24:52<2:55:24, 3.56it/s] 90%|████████▉ | 334057/371472 [4:24:52<2:56:17, 3.54it/s] 90%|████████▉ | 334058/371472 [4:24:52<2:49:22, 3.68it/s] 90%|████████▉ | 334059/371472 [4:24:53<3:03:39, 3.40it/s] 90%|████████▉ | 334060/371472 [4:24:53<3:06:26, 3.34it/s] {'loss': 2.5847, 'learning_rate': 1.906903833308195e-07, 'epoch': 14.39} + 90%|████████▉ | 334060/371472 [4:24:53<3:06:26, 3.34it/s] 90%|████████▉ | 334061/371472 [4:24:53<3:14:45, 3.20it/s] 90%|████████▉ | 334062/371472 [4:24:54<3:06:57, 3.33it/s] 90%|████████▉ | 334063/371472 [4:24:54<2:59:34, 3.47it/s] 90%|████████▉ | 334064/371472 [4:24:54<2:54:17, 3.58it/s] 90%|████████▉ | 334065/371472 [4:24:55<2:58:28, 3.49it/s] 90%|████████▉ | 334066/371472 [4:24:55<3:03:40, 3.39it/s] 90%|████████▉ | 334067/371472 [4:24:55<2:59:10, 3.48it/s] 90%|████████▉ | 334068/371472 [4:24:55<3:02:21, 3.42it/s] 90%|████████▉ | 334069/371472 [4:24:56<2:51:36, 3.63it/s] 90%|████████▉ | 334070/371472 [4:24:56<2:48:30, 3.70it/s] 90%|████████▉ | 334071/371472 [4:24:56<2:54:06, 3.58it/s] 90%|████████▉ | 334072/371472 [4:24:57<2:54:16, 3.58it/s] 90%|████████▉ | 334073/371472 [4:24:57<2:51:24, 3.64it/s] 90%|████████▉ | 334074/371472 [4:24:57<2:49:25, 3.68it/s] 90%|████████▉ | 334075/371472 [4:24:57<2:54:00, 3.58it/s] 90%|████████▉ | 334076/371472 [4:24:58<2:49:03, 3.69it/s] 90%|████████▉ | 334077/371472 [4:24:58<2:51:13, 3.64it/s] 90%|████████▉ | 334078/371472 [4:24:58<2:49:48, 3.67it/s] 90%|████████▉ | 334079/371472 [4:24:58<2:48:56, 3.69it/s] 90%|████████▉ | 334080/371472 [4:24:59<2:50:28, 3.66it/s] {'loss': 2.6398, 'learning_rate': 1.9064190135534054e-07, 'epoch': 14.39} + 90%|████████▉ | 334080/371472 [4:24:59<2:50:28, 3.66it/s] 90%|████████▉ | 334081/371472 [4:24:59<2:49:30, 3.68it/s] 90%|████████▉ | 334082/371472 [4:24:59<2:52:44, 3.61it/s] 90%|████████▉ | 334083/371472 [4:25:00<2:51:48, 3.63it/s] 90%|████████▉ | 334084/371472 [4:25:00<2:47:08, 3.73it/s] 90%|████████▉ | 334085/371472 [4:25:00<2:42:10, 3.84it/s] 90%|████████▉ | 334086/371472 [4:25:00<2:40:22, 3.89it/s] 90%|████████▉ | 334087/371472 [4:25:01<2:40:21, 3.89it/s] 90%|████████▉ | 334088/371472 [4:25:01<2:52:15, 3.62it/s] 90%|████████▉ | 334089/371472 [4:25:01<3:01:59, 3.42it/s] 90%|████████▉ | 334090/371472 [4:25:01<2:54:56, 3.56it/s] 90%|████████▉ | 334091/371472 [4:25:02<3:07:42, 3.32it/s] 90%|████████▉ | 334092/371472 [4:25:02<3:05:00, 3.37it/s] 90%|████████▉ | 334093/371472 [4:25:02<2:55:57, 3.54it/s] 90%|████████▉ | 334094/371472 [4:25:03<2:50:23, 3.66it/s] 90%|████████▉ | 334095/371472 [4:25:03<2:49:48, 3.67it/s] 90%|████████▉ | 334096/371472 [4:25:03<2:49:31, 3.67it/s] 90%|████████▉ | 334097/371472 [4:25:03<2:51:42, 3.63it/s] 90%|████████▉ | 334098/371472 [4:25:04<2:56:03, 3.54it/s] 90%|████████▉ | 334099/371472 [4:25:04<2:52:52, 3.60it/s] 90%|████████▉ | 334100/371472 [4:25:04<2:49:45, 3.67it/s] {'loss': 2.8185, 'learning_rate': 1.905934193798617e-07, 'epoch': 14.39} + 90%|████████▉ | 334100/371472 [4:25:04<2:49:45, 3.67it/s] 90%|████████▉ | 334101/371472 [4:25:05<2:55:42, 3.54it/s] 90%|████████▉ | 334102/371472 [4:25:05<2:54:16, 3.57it/s] 90%|████████▉ | 334103/371472 [4:25:05<2:55:21, 3.55it/s] 90%|████████▉ | 334104/371472 [4:25:05<2:51:56, 3.62it/s] 90%|████████▉ | 334105/371472 [4:25:06<2:50:29, 3.65it/s] 90%|████████▉ | 334106/371472 [4:25:06<2:46:03, 3.75it/s] 90%|████████▉ | 334107/371472 [4:25:06<2:48:43, 3.69it/s] 90%|████████▉ | 334108/371472 [4:25:07<3:02:37, 3.41it/s] 90%|████████▉ | 334109/371472 [4:25:07<3:04:15, 3.38it/s] 90%|████████▉ | 334110/371472 [4:25:07<3:05:38, 3.35it/s] 90%|████████▉ | 334111/371472 [4:25:07<2:53:31, 3.59it/s] 90%|████████▉ | 334112/371472 [4:25:08<2:51:12, 3.64it/s] 90%|████████▉ | 334113/371472 [4:25:08<2:58:23, 3.49it/s] 90%|████████▉ | 334114/371472 [4:25:08<3:12:51, 3.23it/s] 90%|████████▉ | 334115/371472 [4:25:09<3:00:36, 3.45it/s] 90%|████████▉ | 334116/371472 [4:25:09<3:08:16, 3.31it/s] 90%|████████▉ | 334117/371472 [4:25:09<3:00:59, 3.44it/s] 90%|████████▉ | 334118/371472 [4:25:09<3:03:40, 3.39it/s] 90%|████████▉ | 334119/371472 [4:25:10<3:02:19, 3.41it/s] 90%|████████▉ | 334120/371472 [4:25:10<2:51:20, 3.63it/s] {'loss': 2.5429, 'learning_rate': 1.9054493740438277e-07, 'epoch': 14.39} + 90%|████████▉ | 334120/371472 [4:25:10<2:51:20, 3.63it/s] 90%|████████▉ | 334121/371472 [4:25:10<2:49:18, 3.68it/s] 90%|████████▉ | 334122/371472 [4:25:10<2:48:37, 3.69it/s] 90%|████████▉ | 334123/371472 [4:25:11<2:58:20, 3.49it/s] 90%|████████▉ | 334124/371472 [4:25:11<2:51:02, 3.64it/s] 90%|████████▉ | 334125/371472 [4:25:11<2:46:23, 3.74it/s] 90%|████████▉ | 334126/371472 [4:25:12<2:44:41, 3.78it/s] 90%|████████▉ | 334127/371472 [4:25:12<2:46:14, 3.74it/s] 90%|████████▉ | 334128/371472 [4:25:12<2:40:03, 3.89it/s] 90%|████████▉ | 334129/371472 [4:25:12<2:43:25, 3.81it/s] 90%|████████▉ | 334130/371472 [4:25:13<2:47:55, 3.71it/s] 90%|████████▉ | 334131/371472 [4:25:13<2:47:56, 3.71it/s] 90%|████████▉ | 334132/371472 [4:25:13<3:02:06, 3.42it/s] 90%|████████▉ | 334133/371472 [4:25:14<2:56:04, 3.53it/s] 90%|████████▉ | 334134/371472 [4:25:14<3:05:49, 3.35it/s] 90%|████████▉ | 334135/371472 [4:25:14<3:03:02, 3.40it/s] 90%|████████▉ | 334136/371472 [4:25:14<2:54:49, 3.56it/s] 90%|████████▉ | 334137/371472 [4:25:15<2:49:05, 3.68it/s] 90%|████████▉ | 334138/371472 [4:25:15<3:01:29, 3.43it/s] 90%|████████▉ | 334139/371472 [4:25:15<3:01:18, 3.43it/s] 90%|████████▉ | 334140/371472 [4:25:16<2:51:10, 3.63it/s] {'loss': 2.7483, 'learning_rate': 1.9049645542890392e-07, 'epoch': 14.39} + 90%|████████▉ | 334140/371472 [4:25:16<2:51:10, 3.63it/s] 90%|████████▉ | 334141/371472 [4:25:16<3:08:45, 3.30it/s] 90%|████████▉ | 334142/371472 [4:25:16<3:04:43, 3.37it/s] 90%|████████▉ | 334143/371472 [4:25:16<3:08:13, 3.31it/s] 90%|████████▉ | 334144/371472 [4:25:17<3:05:00, 3.36it/s] 90%|████████▉ | 334145/371472 [4:25:17<3:13:31, 3.21it/s] 90%|████████▉ | 334146/371472 [4:25:17<3:01:58, 3.42it/s] 90%|████████▉ | 334147/371472 [4:25:18<3:00:13, 3.45it/s] 90%|████████▉ | 334148/371472 [4:25:18<2:57:00, 3.51it/s] 90%|████████▉ | 334149/371472 [4:25:18<2:51:27, 3.63it/s] 90%|████████▉ | 334150/371472 [4:25:18<2:47:00, 3.72it/s] 90%|████████▉ | 334151/371472 [4:25:19<2:47:30, 3.71it/s] 90%|████████▉ | 334152/371472 [4:25:19<2:47:17, 3.72it/s] 90%|████████▉ | 334153/371472 [4:25:19<2:57:04, 3.51it/s] 90%|████████▉ | 334154/371472 [4:25:20<2:50:58, 3.64it/s] 90%|████████▉ | 334155/371472 [4:25:20<2:45:11, 3.77it/s] 90%|████████▉ | 334156/371472 [4:25:20<2:39:36, 3.90it/s] 90%|████████▉ | 334157/371472 [4:25:20<2:38:14, 3.93it/s] 90%|████████▉ | 334158/371472 [4:25:21<2:54:21, 3.57it/s] 90%|████████▉ | 334159/371472 [4:25:21<3:01:54, 3.42it/s] 90%|████████▉ | 334160/371472 [4:25:21<3:10:36, 3.26it/s] {'loss': 2.543, 'learning_rate': 1.90447973453425e-07, 'epoch': 14.39} + 90%|████████▉ | 334160/371472 [4:25:21<3:10:36, 3.26it/s] 90%|████████▉ | 334161/371472 [4:25:22<3:06:58, 3.33it/s] 90%|████████▉ | 334162/371472 [4:25:22<3:04:29, 3.37it/s] 90%|████████▉ | 334163/371472 [4:25:22<3:04:23, 3.37it/s] 90%|████████▉ | 334164/371472 [4:25:22<3:03:11, 3.39it/s] 90%|████████▉ | 334165/371472 [4:25:23<3:10:16, 3.27it/s] 90%|████████▉ | 334166/371472 [4:25:23<3:04:02, 3.38it/s] 90%|████████▉ | 334167/371472 [4:25:23<2:56:40, 3.52it/s] 90%|████████▉ | 334168/371472 [4:25:24<2:55:13, 3.55it/s] 90%|████████▉ | 334169/371472 [4:25:24<2:49:23, 3.67it/s] 90%|████████▉ | 334170/371472 [4:25:24<2:57:52, 3.50it/s] 90%|████████▉ | 334171/371472 [4:25:24<2:51:55, 3.62it/s] 90%|████████▉ | 334172/371472 [4:25:25<2:47:46, 3.71it/s] 90%|████████▉ | 334173/371472 [4:25:25<2:55:28, 3.54it/s] 90%|████████▉ | 334174/371472 [4:25:25<2:55:23, 3.54it/s] 90%|████████▉ | 334175/371472 [4:25:25<2:51:57, 3.62it/s] 90%|████████▉ | 334176/371472 [4:25:26<2:49:23, 3.67it/s] 90%|████████▉ | 334177/371472 [4:25:26<2:54:29, 3.56it/s] 90%|████████▉ | 334178/371472 [4:25:26<3:00:06, 3.45it/s] 90%|████████▉ | 334179/371472 [4:25:27<3:06:40, 3.33it/s] 90%|████████▉ | 334180/371472 [4:25:27<2:57:41, 3.50it/s] {'loss': 2.5561, 'learning_rate': 1.9039949147794614e-07, 'epoch': 14.39} + 90%|████████▉ | 334180/371472 [4:25:27<2:57:41, 3.50it/s] 90%|████████▉ | 334181/371472 [4:25:27<2:51:55, 3.61it/s] 90%|████████▉ | 334182/371472 [4:25:27<2:51:20, 3.63it/s] 90%|████████▉ | 334183/371472 [4:25:28<2:54:22, 3.56it/s] 90%|████████▉ | 334184/371472 [4:25:28<2:48:01, 3.70it/s] 90%|████████▉ | 334185/371472 [4:25:28<2:46:44, 3.73it/s] 90%|████████▉ | 334186/371472 [4:25:29<2:46:34, 3.73it/s] 90%|████████▉ | 334187/371472 [4:25:29<3:29:14, 2.97it/s] 90%|████████▉ | 334188/371472 [4:25:29<3:11:45, 3.24it/s] 90%|████████▉ | 334189/371472 [4:25:30<2:58:24, 3.48it/s] 90%|████████▉ | 334190/371472 [4:25:30<2:53:10, 3.59it/s] 90%|████████▉ | 334191/371472 [4:25:30<2:56:44, 3.52it/s] 90%|████████▉ | 334192/371472 [4:25:30<2:58:16, 3.49it/s] 90%|████████▉ | 334193/371472 [4:25:31<3:04:38, 3.36it/s] 90%|████████▉ | 334194/371472 [4:25:31<2:58:01, 3.49it/s] 90%|████████▉ | 334195/371472 [4:25:31<3:00:00, 3.45it/s] 90%|████████▉ | 334196/371472 [4:25:32<2:59:29, 3.46it/s] 90%|████████▉ | 334197/371472 [4:25:32<2:57:54, 3.49it/s] 90%|████████▉ | 334198/371472 [4:25:32<2:58:12, 3.49it/s] 90%|████████▉ | 334199/371472 [4:25:32<2:58:48, 3.47it/s] 90%|████████▉ | 334200/371472 [4:25:33<2:56:12, 3.53it/s] {'loss': 2.6713, 'learning_rate': 1.9035100950246718e-07, 'epoch': 14.39} + 90%|████████▉ | 334200/371472 [4:25:33<2:56:12, 3.53it/s] 90%|████████▉ | 334201/371472 [4:25:33<3:04:24, 3.37it/s] 90%|████████▉ | 334202/371472 [4:25:33<3:01:50, 3.42it/s] 90%|████████▉ | 334203/371472 [4:25:34<2:54:41, 3.56it/s] 90%|████████▉ | 334204/371472 [4:25:34<2:47:50, 3.70it/s] 90%|████████▉ | 334205/371472 [4:25:34<2:41:41, 3.84it/s] 90%|████████▉ | 334206/371472 [4:25:34<2:59:58, 3.45it/s] 90%|████████▉ | 334207/371472 [4:25:35<2:57:35, 3.50it/s] 90%|████████▉ | 334208/371472 [4:25:35<2:49:33, 3.66it/s] 90%|████████▉ | 334209/371472 [4:25:35<2:45:44, 3.75it/s] 90%|████████▉ | 334210/371472 [4:25:35<2:51:57, 3.61it/s] 90%|████████▉ | 334211/371472 [4:25:36<2:46:25, 3.73it/s] 90%|████████▉ | 334212/371472 [4:25:36<2:58:14, 3.48it/s] 90%|████████▉ | 334213/371472 [4:25:36<3:03:04, 3.39it/s] 90%|████████▉ | 334214/371472 [4:25:37<3:03:53, 3.38it/s] 90%|████████▉ | 334215/371472 [4:25:37<3:02:28, 3.40it/s] 90%|████████▉ | 334216/371472 [4:25:37<2:53:05, 3.59it/s] 90%|████████▉ | 334217/371472 [4:25:37<2:45:39, 3.75it/s] 90%|████████▉ | 334218/371472 [4:25:38<2:41:02, 3.86it/s] 90%|████████▉ | 334219/371472 [4:25:38<3:02:34, 3.40it/s] 90%|████████▉ | 334220/371472 [4:25:38<3:04:27, 3.37it/s] {'loss': 2.5461, 'learning_rate': 1.9030252752698823e-07, 'epoch': 14.4} + 90%|████████▉ | 334220/371472 [4:25:38<3:04:27, 3.37it/s] 90%|████████▉ | 334221/371472 [4:25:39<3:13:46, 3.20it/s] 90%|████████▉ | 334222/371472 [4:25:39<3:06:38, 3.33it/s] 90%|████████▉ | 334223/371472 [4:25:39<3:06:57, 3.32it/s] 90%|████████▉ | 334224/371472 [4:25:40<2:58:37, 3.48it/s] 90%|████████▉ | 334225/371472 [4:25:40<2:54:44, 3.55it/s] 90%|████████▉ | 334226/371472 [4:25:40<2:55:49, 3.53it/s] 90%|████████▉ | 334227/371472 [4:25:40<2:50:10, 3.65it/s] 90%|████████▉ | 334228/371472 [4:25:41<2:45:37, 3.75it/s] 90%|████████▉ | 334229/371472 [4:25:41<2:50:40, 3.64it/s] 90%|████████▉ | 334230/371472 [4:25:41<2:44:57, 3.76it/s] 90%|████████▉ | 334231/371472 [4:25:41<2:47:11, 3.71it/s] 90%|████████▉ | 334232/371472 [4:25:42<2:45:25, 3.75it/s] 90%|████████▉ | 334233/371472 [4:25:42<2:41:33, 3.84it/s] 90%|████████▉ | 334234/371472 [4:25:42<2:41:56, 3.83it/s] 90%|████████▉ | 334235/371472 [4:25:42<2:44:59, 3.76it/s] 90%|████████▉ | 334236/371472 [4:25:43<2:48:03, 3.69it/s] 90%|████████▉ | 334237/371472 [4:25:43<2:47:29, 3.71it/s] 90%|████████▉ | 334238/371472 [4:25:43<2:45:05, 3.76it/s] 90%|████████▉ | 334239/371472 [4:25:44<2:52:40, 3.59it/s] 90%|████████▉ | 334240/371472 [4:25:44<2:56:36, 3.51it/s] {'loss': 2.4818, 'learning_rate': 1.902540455515094e-07, 'epoch': 14.4} + 90%|████████▉ | 334240/371472 [4:25:44<2:56:36, 3.51it/s] 90%|████████▉ | 334241/371472 [4:25:44<2:57:25, 3.50it/s] 90%|████████▉ | 334242/371472 [4:25:44<3:06:27, 3.33it/s] 90%|████████▉ | 334243/371472 [4:25:45<3:04:10, 3.37it/s] 90%|████████▉ | 334244/371472 [4:25:45<2:54:01, 3.57it/s] 90%|████████▉ | 334245/371472 [4:25:45<3:03:20, 3.38it/s] 90%|████████▉ | 334246/371472 [4:25:46<2:54:50, 3.55it/s] 90%|████████▉ | 334247/371472 [4:25:46<2:47:16, 3.71it/s] 90%|████████▉ | 334248/371472 [4:25:46<2:56:24, 3.52it/s] 90%|████████▉ | 334249/371472 [4:25:46<2:51:27, 3.62it/s] 90%|████████▉ | 334250/371472 [4:25:47<2:46:48, 3.72it/s] 90%|████████▉ | 334251/371472 [4:25:47<2:44:01, 3.78it/s] 90%|████████▉ | 334252/371472 [4:25:47<2:55:20, 3.54it/s] 90%|████████▉ | 334253/371472 [4:25:48<2:58:42, 3.47it/s] 90%|████████▉ | 334254/371472 [4:25:48<3:06:16, 3.33it/s] 90%|████████▉ | 334255/371472 [4:25:48<3:04:55, 3.35it/s] 90%|████████▉ | 334256/371472 [4:25:48<3:05:36, 3.34it/s] 90%|████████▉ | 334257/371472 [4:25:49<3:02:49, 3.39it/s] 90%|████████▉ | 334258/371472 [4:25:49<3:10:11, 3.26it/s] 90%|████████▉ | 334259/371472 [4:25:49<3:01:51, 3.41it/s] 90%|████████▉ | 334260/371472 [4:25:50<2:55:31, 3.53it/s] {'loss': 2.6773, 'learning_rate': 1.9020556357603045e-07, 'epoch': 14.4} + 90%|████████▉ | 334260/371472 [4:25:50<2:55:31, 3.53it/s] 90%|████████▉ | 334261/371472 [4:25:50<2:54:13, 3.56it/s] 90%|████████▉ | 334262/371472 [4:25:50<2:54:51, 3.55it/s] 90%|████████▉ | 334263/371472 [4:25:50<2:51:28, 3.62it/s] 90%|████████▉ | 334264/371472 [4:25:51<2:43:33, 3.79it/s] 90%|████████▉ | 334265/371472 [4:25:51<2:41:32, 3.84it/s] 90%|████████▉ | 334266/371472 [4:25:51<2:41:53, 3.83it/s] 90%|████████▉ | 334267/371472 [4:25:51<2:45:52, 3.74it/s] 90%|████████▉ | 334268/371472 [4:25:52<2:46:08, 3.73it/s] 90%|████████▉ | 334269/371472 [4:25:52<3:00:01, 3.44it/s] 90%|████████▉ | 334270/371472 [4:25:52<3:03:56, 3.37it/s] 90%|████████▉ | 334271/371472 [4:25:53<2:54:49, 3.55it/s] 90%|██████��█▉ | 334272/371472 [4:25:53<2:48:39, 3.68it/s] 90%|████████▉ | 334273/371472 [4:25:53<2:48:03, 3.69it/s] 90%|████████▉ | 334274/371472 [4:25:53<2:46:19, 3.73it/s] 90%|████████▉ | 334275/371472 [4:25:54<2:48:43, 3.67it/s] 90%|████████▉ | 334276/371472 [4:25:54<2:49:34, 3.66it/s] 90%|████████▉ | 334277/371472 [4:25:54<2:57:05, 3.50it/s] 90%|████████▉ | 334278/371472 [4:25:55<3:02:35, 3.40it/s] 90%|████████▉ | 334279/371472 [4:25:55<3:03:57, 3.37it/s] 90%|████████▉ | 334280/371472 [4:25:55<2:59:38, 3.45it/s] {'loss': 2.7351, 'learning_rate': 1.901570816005516e-07, 'epoch': 14.4} + 90%|████████▉ | 334280/371472 [4:25:55<2:59:38, 3.45it/s] 90%|████████▉ | 334281/371472 [4:25:55<2:50:54, 3.63it/s] 90%|████████▉ | 334282/371472 [4:25:56<2:54:14, 3.56it/s] 90%|████████▉ | 334283/371472 [4:25:56<2:50:32, 3.63it/s] 90%|████████▉ | 334284/371472 [4:25:56<2:52:32, 3.59it/s] 90%|████████▉ | 334285/371472 [4:25:56<2:49:43, 3.65it/s] 90%|████████▉ | 334286/371472 [4:25:57<2:53:18, 3.58it/s] 90%|████████▉ | 334287/371472 [4:25:57<2:50:58, 3.62it/s] 90%|████████▉ | 334288/371472 [4:25:57<2:46:43, 3.72it/s] 90%|████████▉ | 334289/371472 [4:25:58<2:56:48, 3.51it/s] 90%|████████▉ | 334290/371472 [4:25:58<3:01:28, 3.41it/s] 90%|████████▉ | 334291/371472 [4:25:58<3:32:19, 2.92it/s] 90%|████████▉ | 334292/371472 [4:25:59<3:19:33, 3.11it/s] 90%|████████▉ | 334293/371472 [4:25:59<3:15:02, 3.18it/s] 90%|████████▉ | 334294/371472 [4:25:59<3:13:48, 3.20it/s] 90%|████████▉ | 334295/371472 [4:26:00<3:03:36, 3.37it/s] 90%|████████▉ | 334296/371472 [4:26:00<2:58:28, 3.47it/s] 90%|████████▉ | 334297/371472 [4:26:00<2:58:40, 3.47it/s] 90%|████████▉ | 334298/371472 [4:26:00<3:01:08, 3.42it/s] 90%|████████▉ | 334299/371472 [4:26:01<2:56:32, 3.51it/s] 90%|████████▉ | 334300/371472 [4:26:01<2:56:48, 3.50it/s] {'loss': 2.4528, 'learning_rate': 1.9010859962507267e-07, 'epoch': 14.4} + 90%|████████▉ | 334300/371472 [4:26:01<2:56:48, 3.50it/s] 90%|████████▉ | 334301/371472 [4:26:01<3:04:17, 3.36it/s] 90%|████████▉ | 334302/371472 [4:26:02<2:57:12, 3.50it/s] 90%|████████▉ | 334303/371472 [4:26:02<2:51:04, 3.62it/s] 90%|████████▉ | 334304/371472 [4:26:02<2:49:57, 3.64it/s] 90%|████████▉ | 334305/371472 [4:26:02<2:59:24, 3.45it/s] 90%|████████▉ | 334306/371472 [4:26:03<2:56:44, 3.50it/s] 90%|████████▉ | 334307/371472 [4:26:03<2:52:10, 3.60it/s] 90%|████████▉ | 334308/371472 [4:26:03<3:16:37, 3.15it/s] 90%|████████▉ | 334309/371472 [4:26:04<3:13:37, 3.20it/s] 90%|████████▉ | 334310/371472 [4:26:04<3:02:03, 3.40it/s] 90%|████████▉ | 334311/371472 [4:26:04<2:58:07, 3.48it/s] 90%|████████▉ | 334312/371472 [4:26:04<2:49:32, 3.65it/s] 90%|████████▉ | 334313/371472 [4:26:05<2:47:12, 3.70it/s] 90%|████████▉ | 334314/371472 [4:26:05<2:44:30, 3.76it/s] 90%|████████▉ | 334315/371472 [4:26:05<2:42:44, 3.81it/s] 90%|████████▉ | 334316/371472 [4:26:05<2:46:17, 3.72it/s] 90%|████████▉ | 334317/371472 [4:26:06<2:43:55, 3.78it/s] 90%|████████▉ | 334318/371472 [4:26:06<2:44:53, 3.76it/s] 90%|████████▉ | 334319/371472 [4:26:06<2:46:35, 3.72it/s] 90%|████████▉ | 334320/371472 [4:26:07<2:47:59, 3.69it/s] {'loss': 2.6231, 'learning_rate': 1.9006011764959383e-07, 'epoch': 14.4} + 90%|████████▉ | 334320/371472 [4:26:07<2:47:59, 3.69it/s] 90%|████████▉ | 334321/371472 [4:26:07<2:44:42, 3.76it/s] 90%|████████▉ | 334322/371472 [4:26:07<2:50:40, 3.63it/s] 90%|████████▉ | 334323/371472 [4:26:07<2:57:09, 3.49it/s] 90%|████████▉ | 334324/371472 [4:26:08<2:53:01, 3.58it/s] 90%|█████████ | 334325/371472 [4:26:08<2:47:10, 3.70it/s] 90%|█████████ | 334326/371472 [4:26:08<2:41:26, 3.83it/s] 90%|█████████ | 334327/371472 [4:26:08<2:36:20, 3.96it/s] 90%|█████████ | 334328/371472 [4:26:09<2:39:59, 3.87it/s] 90%|█████████ | 334329/371472 [4:26:09<2:52:46, 3.58it/s] 90%|█████████ | 334330/371472 [4:26:09<2:47:10, 3.70it/s] 90%|█████████ | 334331/371472 [4:26:10<2:54:21, 3.55it/s] 90%|█████████ | 334332/371472 [4:26:10<3:09:01, 3.27it/s] 90%|█████████ | 334333/371472 [4:26:10<3:11:35, 3.23it/s] 90%|█████████ | 334334/371472 [4:26:11<3:08:51, 3.28it/s] 90%|█████████ | 334335/371472 [4:26:11<3:03:17, 3.38it/s] 90%|█████████ | 334336/371472 [4:26:11<3:05:40, 3.33it/s] 90%|█████████ | 334337/371472 [4:26:11<2:57:17, 3.49it/s] 90%|█████████ | 334338/371472 [4:26:12<2:49:39, 3.65it/s] 90%|█████████ | 334339/371472 [4:26:12<3:05:29, 3.34it/s] 90%|█████████ | 334340/371472 [4:26:12<2:55:50, 3.52it/s] {'loss': 2.6343, 'learning_rate': 1.9001163567411487e-07, 'epoch': 14.4} + 90%|█████████ | 334340/371472 [4:26:12<2:55:50, 3.52it/s] 90%|█████████ | 334341/371472 [4:26:12<2:57:39, 3.48it/s] 90%|█████████ | 334342/371472 [4:26:13<2:52:17, 3.59it/s] 90%|█████████ | 334343/371472 [4:26:13<2:47:50, 3.69it/s] 90%|█████████ | 334344/371472 [4:26:13<2:55:53, 3.52it/s] 90%|█████████ | 334345/371472 [4:26:14<2:55:46, 3.52it/s] 90%|█████████ | 334346/371472 [4:26:14<3:12:17, 3.22it/s] 90%|█████████ | 334347/371472 [4:26:14<3:20:43, 3.08it/s] 90%|█████████ | 334348/371472 [4:26:15<3:06:43, 3.31it/s] 90%|█████████ | 334349/371472 [4:26:15<2:58:40, 3.46it/s] 90%|█████████ | 334350/371472 [4:26:15<3:03:54, 3.36it/s] 90%|█████████ | 334351/371472 [4:26:15<2:59:21, 3.45it/s] 90%|█████████ | 334352/371472 [4:26:16<2:59:17, 3.45it/s] 90%|█████████ | 334353/371472 [4:26:16<3:00:57, 3.42it/s] 90%|█████████ | 334354/371472 [4:26:16<3:18:18, 3.12it/s] 90%|█████████ | 334355/371472 [4:26:17<3:15:54, 3.16it/s] 90%|█████████ | 334356/371472 [4:26:17<3:13:11, 3.20it/s] 90%|█████████ | 334357/371472 [4:26:17<3:18:26, 3.12it/s] 90%|█████████ | 334358/371472 [4:26:18<3:05:32, 3.33it/s] 90%|█████████ | 334359/371472 [4:26:18<3:05:16, 3.34it/s] 90%|█████████ | 334360/371472 [4:26:18<2:54:34, 3.54it/s] {'loss': 2.7862, 'learning_rate': 1.8996315369863605e-07, 'epoch': 14.4} + 90%|█████████ | 334360/371472 [4:26:18<2:54:34, 3.54it/s] 90%|█████████ | 334361/371472 [4:26:18<2:52:29, 3.59it/s] 90%|█████████ | 334362/371472 [4:26:19<2:46:54, 3.71it/s] 90%|█████████ | 334363/371472 [4:26:19<2:55:15, 3.53it/s] 90%|█████████ | 334364/371472 [4:26:19<2:54:19, 3.55it/s] 90%|█████████ | 334365/371472 [4:26:20<2:48:49, 3.66it/s] 90%|█████████ | 334366/371472 [4:26:20<2:56:03, 3.51it/s] 90%|█████████ | 334367/371472 [4:26:20<2:48:56, 3.66it/s] 90%|█████████ | 334368/371472 [4:26:20<2:42:04, 3.82it/s] 90%|█████████ | 334369/371472 [4:26:21<2:47:48, 3.69it/s] 90%|█████████ | 334370/371472 [4:26:21<2:46:37, 3.71it/s] 90%|█████████ | 334371/371472 [4:26:21<2:44:50, 3.75it/s] 90%|█████████ | 334372/371472 [4:26:21<2:45:12, 3.74it/s] 90%|█████████ | 334373/371472 [4:26:22<2:42:46, 3.80it/s] 90%|█████████ | 334374/371472 [4:26:22<2:42:46, 3.80it/s] 90%|█████████ | 334375/371472 [4:26:22<2:42:56, 3.79it/s] 90%|█████████ | 334376/371472 [4:26:22<2:38:50, 3.89it/s] 90%|█████████ | 334377/371472 [4:26:23<2:41:22, 3.83it/s] 90%|█████████ | 334378/371472 [4:26:23<2:36:40, 3.95it/s] 90%|█████████ | 334379/371472 [4:26:23<2:52:40, 3.58it/s] 90%|█████████ | 334380/371472 [4:26:24<2:56:09, 3.51it/s] {'loss': 2.7393, 'learning_rate': 1.8991467172315712e-07, 'epoch': 14.4} + 90%|█████████ | 334380/371472 [4:26:24<2:56:09, 3.51it/s] 90%|█████████ | 334381/371472 [4:26:24<3:00:57, 3.42it/s] 90%|█████████ | 334382/371472 [4:26:24<2:53:46, 3.56it/s] 90%|█████████ | 334383/371472 [4:26:24<2:52:09, 3.59it/s] 90%|█████████ | 334384/371472 [4:26:25<2:44:38, 3.75it/s] 90%|█████████ | 334385/371472 [4:26:25<2:47:21, 3.69it/s] 90%|█████████ | 334386/371472 [4:26:25<2:51:50, 3.60it/s] 90%|████���████ | 334387/371472 [4:26:26<2:55:17, 3.53it/s] 90%|█████████ | 334388/371472 [4:26:26<3:00:42, 3.42it/s] 90%|█████████ | 334389/371472 [4:26:26<2:54:34, 3.54it/s] 90%|█████████ | 334390/371472 [4:26:26<2:51:44, 3.60it/s] 90%|█████████ | 334391/371472 [4:26:27<2:48:31, 3.67it/s] 90%|█████████ | 334392/371472 [4:26:27<2:47:03, 3.70it/s] 90%|█████████ | 334393/371472 [4:26:27<2:58:32, 3.46it/s] 90%|█████████ | 334394/371472 [4:26:27<2:50:36, 3.62it/s] 90%|█████████ | 334395/371472 [4:26:28<2:48:46, 3.66it/s] 90%|█████████ | 334396/371472 [4:26:28<2:50:40, 3.62it/s] 90%|█████████ | 334397/371472 [4:26:28<2:42:48, 3.80it/s] 90%|█████████ | 334398/371472 [4:26:29<2:50:20, 3.63it/s] 90%|█████████ | 334399/371472 [4:26:29<2:49:09, 3.65it/s] 90%|█████████ | 334400/371472 [4:26:29<2:53:05, 3.57it/s] {'loss': 2.6667, 'learning_rate': 1.8986618974767824e-07, 'epoch': 14.4} + 90%|█████████ | 334400/371472 [4:26:29<2:53:05, 3.57it/s] 90%|█████████ | 334401/371472 [4:26:29<2:48:40, 3.66it/s] 90%|█████████ | 334402/371472 [4:26:30<2:45:42, 3.73it/s] 90%|█████████ | 334403/371472 [4:26:30<2:51:37, 3.60it/s] 90%|█████████ | 334404/371472 [4:26:30<2:43:16, 3.78it/s] 90%|█████████ | 334405/371472 [4:26:30<2:54:03, 3.55it/s] 90%|█████████ | 334406/371472 [4:26:31<2:52:19, 3.58it/s] 90%|█████████ | 334407/371472 [4:26:31<3:00:38, 3.42it/s] 90%|█████████ | 334408/371472 [4:26:31<2:54:57, 3.53it/s] 90%|█████████ | 334409/371472 [4:26:32<2:57:20, 3.48it/s] 90%|█████████ | 334410/371472 [4:26:32<2:55:12, 3.53it/s] 90%|█████████ | 334411/371472 [4:26:32<3:10:47, 3.24it/s] 90%|█████████ | 334412/371472 [4:26:33<3:08:59, 3.27it/s] 90%|█████████ | 334413/371472 [4:26:33<2:58:00, 3.47it/s] 90%|█████████ | 334414/371472 [4:26:33<2:58:35, 3.46it/s] 90%|█████████ | 334415/371472 [4:26:33<2:56:08, 3.51it/s] 90%|█████████ | 334416/371472 [4:26:34<2:48:30, 3.67it/s] 90%|█████████ | 334417/371472 [4:26:34<2:45:28, 3.73it/s] 90%|█████████ | 334418/371472 [4:26:34<2:46:19, 3.71it/s] 90%|█████████ | 334419/371472 [4:26:34<2:50:15, 3.63it/s] 90%|█████████ | 334420/371472 [4:26:35<2:56:12, 3.50it/s] {'loss': 2.5224, 'learning_rate': 1.8981770777219932e-07, 'epoch': 14.4} + 90%|█████████ | 334420/371472 [4:26:35<2:56:12, 3.50it/s] 90%|█████████ | 334421/371472 [4:26:35<2:51:36, 3.60it/s] 90%|█████████ | 334422/371472 [4:26:35<2:44:14, 3.76it/s] 90%|█████████ | 334423/371472 [4:26:36<2:58:46, 3.45it/s] 90%|█████████ | 334424/371472 [4:26:36<2:53:41, 3.55it/s] 90%|█████████ | 334425/371472 [4:26:36<2:57:02, 3.49it/s] 90%|█████████ | 334426/371472 [4:26:36<3:03:15, 3.37it/s] 90%|█████████ | 334427/371472 [4:26:37<2:56:43, 3.49it/s] 90%|█████████ | 334428/371472 [4:26:37<2:54:30, 3.54it/s] 90%|█████████ | 334429/371472 [4:26:37<2:48:31, 3.66it/s] 90%|█████████ | 334430/371472 [4:26:38<2:48:53, 3.66it/s] 90%|█████████ | 334431/371472 [4:26:38<2:45:06, 3.74it/s] 90%|█████████ | 334432/371472 [4:26:38<2:48:30, 3.66it/s] 90%|█████████ | 334433/371472 [4:26:38<2:57:36, 3.48it/s] 90%|█████████ | 334434/371472 [4:26:39<2:55:56, 3.51it/s] 90%|█████████ | 334435/371472 [4:26:39<2:51:51, 3.59it/s] 90%|█████████ | 334436/371472 [4:26:39<2:53:21, 3.56it/s] 90%|█████████ | 334437/371472 [4:26:40<3:01:11, 3.41it/s] 90%|█████████ | 334438/371472 [4:26:40<2:55:44, 3.51it/s] 90%|█████████ | 334439/371472 [4:26:40<2:54:56, 3.53it/s] 90%|█████████ | 334440/371472 [4:26:40<2:46:21, 3.71it/s] {'loss': 2.6811, 'learning_rate': 1.897692257967205e-07, 'epoch': 14.4} + 90%|█████████ | 334440/371472 [4:26:40<2:46:21, 3.71it/s] 90%|█████████ | 334441/371472 [4:26:41<2:49:34, 3.64it/s] 90%|█████████ | 334442/371472 [4:26:41<2:49:41, 3.64it/s] 90%|█████████ | 334443/371472 [4:26:41<2:45:56, 3.72it/s] 90%|█████████ | 334444/371472 [4:26:41<2:45:31, 3.73it/s] 90%|█████████ | 334445/371472 [4:26:42<2:42:49, 3.79it/s] 90%|█████████ | 334446/371472 [4:26:42<2:40:01, 3.86it/s] 90%|█████████ | 334447/371472 [4:26:42<2:37:34, 3.92it/s] 90%|█████████ | 334448/371472 [4:26:42<2:40:43, 3.84it/s] 90%|█████████ | 334449/371472 [4:26:43<2:41:17, 3.83it/s] 90%|█████████ | 334450/371472 [4:26:43<2:44:47, 3.74it/s] 90%|█████████ | 334451/371472 [4:26:43<2:50:17, 3.62it/s] 90%|█████████ | 334452/371472 [4:26:44<2:50:46, 3.61it/s] 90%|█████████ | 334453/371472 [4:26:44<3:07:36, 3.29it/s] 90%|█████████ | 334454/371472 [4:26:44<3:11:47, 3.22it/s] 90%|█████████ | 334455/371472 [4:26:45<3:02:21, 3.38it/s] 90%|█████████ | 334456/371472 [4:26:45<3:01:31, 3.40it/s] 90%|█████████ | 334457/371472 [4:26:45<2:57:17, 3.48it/s] 90%|█████████ | 334458/371472 [4:26:45<3:01:13, 3.40it/s] 90%|█████████ | 334459/371472 [4:26:46<3:05:10, 3.33it/s] 90%|█████████ | 334460/371472 [4:26:46<2:54:00, 3.54it/s] {'loss': 2.4879, 'learning_rate': 1.8972074382124154e-07, 'epoch': 14.41} + 90%|█████████ | 334460/371472 [4:26:46<2:54:00, 3.54it/s] 90%|█████████ | 334461/371472 [4:26:46<2:54:27, 3.54it/s] 90%|█████████ | 334462/371472 [4:26:46<2:50:26, 3.62it/s] 90%|█████████ | 334463/371472 [4:26:47<2:48:59, 3.65it/s] 90%|█████████ | 334464/371472 [4:26:47<2:44:49, 3.74it/s] 90%|█████████ | 334465/371472 [4:26:47<2:42:54, 3.79it/s] 90%|█████████ | 334466/371472 [4:26:48<2:54:15, 3.54it/s] 90%|█████████ | 334467/371472 [4:26:48<2:51:15, 3.60it/s] 90%|█████████ | 334468/371472 [4:26:48<2:57:54, 3.47it/s] 90%|█████████ | 334469/371472 [4:26:49<3:11:49, 3.21it/s] 90%|█████████ | 334470/371472 [4:26:49<3:10:58, 3.23it/s] 90%|█████████ | 334471/371472 [4:26:49<3:02:31, 3.38it/s] 90%|█████████ | 334472/371472 [4:26:49<2:57:57, 3.47it/s] 90%|█████████ | 334473/371472 [4:26:50<2:55:11, 3.52it/s] 90%|█████████ | 334474/371472 [4:26:50<2:56:37, 3.49it/s] 90%|█████████ | 334475/371472 [4:26:50<2:48:35, 3.66it/s] 90%|█████████ | 334476/371472 [4:26:50<2:55:31, 3.51it/s] 90%|█████████ | 334477/371472 [4:26:51<2:57:29, 3.47it/s] 90%|█████████ | 334478/371472 [4:26:51<3:00:01, 3.42it/s] 90%|█████████ | 334479/371472 [4:26:51<2:56:04, 3.50it/s] 90%|█████████ | 334480/371472 [4:26:52<3:00:15, 3.42it/s] {'loss': 2.4633, 'learning_rate': 1.896722618457627e-07, 'epoch': 14.41} + 90%|█████████ | 334480/371472 [4:26:52<3:00:15, 3.42it/s] 90%|█████████ | 334481/371472 [4:26:52<2:52:50, 3.57it/s] 90%|█████████ | 334482/371472 [4:26:52<2:53:48, 3.55it/s] 90%|█████████ | 334483/371472 [4:26:52<2:49:55, 3.63it/s] 90%|█████████ | 334484/371472 [4:26:53<2:48:05, 3.67it/s] 90%|█████████ | 334485/371472 [4:26:53<2:56:30, 3.49it/s] 90%|█████████ | 334486/371472 [4:26:53<2:48:11, 3.66it/s] 90%|█████████ | 334487/371472 [4:26:54<2:42:40, 3.79it/s] 90%|█████████ | 334488/371472 [4:26:54<3:01:15, 3.40it/s] 90%|█████████ | 334489/371472 [4:26:54<3:02:56, 3.37it/s] 90%|█████████ | 334490/371472 [4:26:54<2:54:57, 3.52it/s] 90%|█████████ | 334491/371472 [4:26:55<2:49:21, 3.64it/s] 90%|█████████ | 334492/371472 [4:26:55<3:03:28, 3.36it/s] 90%|█████████ | 334493/371472 [4:26:55<3:02:43, 3.37it/s] 90%|█████████ | 334494/371472 [4:26:56<3:01:53, 3.39it/s] 90%|█████████ | 334495/371472 [4:26:56<3:03:36, 3.36it/s] 90%|█████████ | 334496/371472 [4:26:56<3:07:41, 3.28it/s] 90%|█████████ | 334497/371472 [4:26:57<3:00:00, 3.42it/s] 90%|█████████ | 334498/371472 [4:26:57<2:57:14, 3.48it/s] 90%|█████████ | 334499/371472 [4:26:57<2:47:52, 3.67it/s] 90%|█████████ | 334500/371472 [4:26:57<2:42:58, 3.78it/s] {'loss': 2.4411, 'learning_rate': 1.8962377987028376e-07, 'epoch': 14.41} + 90%|█████████ | 334500/371472 [4:26:57<2:42:58, 3.78it/s] 90%|█████████ | 334501/371472 [4:26:58<2:49:30, 3.64it/s] 90%|█���███████ | 334502/371472 [4:26:58<2:44:38, 3.74it/s] 90%|█████████ | 334503/371472 [4:26:58<2:43:53, 3.76it/s] 90%|█████████ | 334504/371472 [4:26:58<2:47:10, 3.69it/s] 90%|█████████ | 334505/371472 [4:26:59<2:44:35, 3.74it/s] 90%|█████████ | 334506/371472 [4:26:59<2:41:06, 3.82it/s] 90%|█████████ | 334507/371472 [4:26:59<2:50:19, 3.62it/s] 90%|█████████ | 334508/371472 [4:27:00<2:57:08, 3.48it/s] 90%|█████████ | 334509/371472 [4:27:00<2:50:17, 3.62it/s] 90%|█████████ | 334510/371472 [4:27:00<2:47:32, 3.68it/s] 90%|█████████ | 334511/371472 [4:27:00<2:49:49, 3.63it/s] 90%|█████████ | 334512/371472 [4:27:01<2:57:23, 3.47it/s] 90%|█████████ | 334513/371472 [4:27:01<3:01:50, 3.39it/s] 90%|█████████ | 334514/371472 [4:27:01<2:50:56, 3.60it/s] 90%|█████████ | 334515/371472 [4:27:01<2:52:47, 3.56it/s] 90%|█████████ | 334516/371472 [4:27:02<2:54:45, 3.52it/s] 90%|█████████ | 334517/371472 [4:27:02<2:50:01, 3.62it/s] 90%|█████████ | 334518/371472 [4:27:02<2:59:07, 3.44it/s] 90%|█████████ | 334519/371472 [4:27:03<2:55:12, 3.52it/s] 90%|█████████ | 334520/371472 [4:27:03<2:50:22, 3.61it/s] {'loss': 2.6143, 'learning_rate': 1.895752978948049e-07, 'epoch': 14.41} + 90%|█████████ | 334520/371472 [4:27:03<2:50:22, 3.61it/s] 90%|█████████ | 334521/371472 [4:27:03<2:43:58, 3.76it/s] 90%|█████████ | 334522/371472 [4:27:03<2:46:12, 3.71it/s] 90%|█████████ | 334523/371472 [4:27:04<2:44:43, 3.74it/s] 90%|█████████ | 334524/371472 [4:27:04<2:43:11, 3.77it/s] 90%|█████████ | 334525/371472 [4:27:04<2:50:08, 3.62it/s] 90%|█████████ | 334526/371472 [4:27:04<2:45:03, 3.73it/s] 90%|█████████ | 334527/371472 [4:27:05<2:47:31, 3.68it/s] 90%|█████████ | 334528/371472 [4:27:05<2:47:43, 3.67it/s] 90%|█████████ | 334529/371472 [4:27:05<2:51:35, 3.59it/s] 90%|█████████ | 334530/371472 [4:27:06<2:55:13, 3.51it/s] 90%|█████████ | 334531/371472 [4:27:06<2:56:05, 3.50it/s] 90%|█████████ | 334532/371472 [4:27:06<3:09:00, 3.26it/s] 90%|█████████ | 334533/371472 [4:27:06<2:55:56, 3.50it/s] 90%|█████████ | 334534/371472 [4:27:07<2:53:12, 3.55it/s] 90%|█████████ | 334535/371472 [4:27:07<2:46:14, 3.70it/s] 90%|█████████ | 334536/371472 [4:27:07<2:41:17, 3.82it/s] 90%|█████████ | 334537/371472 [4:27:08<2:42:39, 3.78it/s] 90%|█████████ | 334538/371472 [4:27:08<2:50:36, 3.61it/s] 90%|█████████ | 334539/371472 [4:27:08<2:53:39, 3.54it/s] 90%|█████████ | 334540/371472 [4:27:08<2:45:56, 3.71it/s] {'loss': 2.494, 'learning_rate': 1.8952681591932596e-07, 'epoch': 14.41} + 90%|█████████ | 334540/371472 [4:27:08<2:45:56, 3.71it/s] 90%|█████████ | 334541/371472 [4:27:09<2:54:55, 3.52it/s] 90%|█████████ | 334542/371472 [4:27:09<2:50:13, 3.62it/s] 90%|█████████ | 334543/371472 [4:27:09<2:44:18, 3.75it/s] 90%|█████████ | 334544/371472 [4:27:10<3:05:47, 3.31it/s] 90%|█████████ | 334545/371472 [4:27:10<2:57:29, 3.47it/s] 90%|█████████ | 334546/371472 [4:27:10<2:54:54, 3.52it/s] 90%|█████████ | 334547/371472 [4:27:10<2:56:01, 3.50it/s] 90%|█████████ | 334548/371472 [4:27:11<3:12:30, 3.20it/s] 90%|█████████ | 334549/371472 [4:27:11<3:02:07, 3.38it/s] 90%|█████████ | 334550/371472 [4:27:11<3:05:32, 3.32it/s] 90%|█████████ | 334551/371472 [4:27:12<3:01:48, 3.38it/s] 90%|█████████ | 334552/371472 [4:27:12<3:12:09, 3.20it/s] 90%|█████████ | 334553/371472 [4:27:12<3:02:15, 3.38it/s] 90%|█████████ | 334554/371472 [4:27:13<3:00:39, 3.41it/s] 90%|█████████ | 334555/371472 [4:27:13<2:59:52, 3.42it/s] 90%|█████████ | 334556/371472 [4:27:13<2:53:45, 3.54it/s] 90%|█████████ | 334557/371472 [4:27:13<3:05:15, 3.32it/s] 90%|█████████ | 334558/371472 [4:27:14<3:08:50, 3.26it/s] 90%|█████████ | 334559/371472 [4:27:14<2:58:41, 3.44it/s] 90%|█████████ | 334560/371472 [4:27:14<3:00:37, 3.41it/s] {'loss': 2.4457, 'learning_rate': 1.8947833394384713e-07, 'epoch': 14.41} + 90%|█████████ | 334560/371472 [4:27:14<3:00:37, 3.41it/s] 90%|█████████ | 334561/371472 [4:27:15<2:55:15, 3.51it/s] 90%|█████████ | 334562/371472 [4:27:15<2:52:23, 3.57it/s] 90%|█████████ | 334563/371472 [4:27:15<2:49:32, 3.63it/s] 90%|█████████ | 334564/371472 [4:27:15<2:51:50, 3.58it/s] 90%|█████████ | 334565/371472 [4:27:16<2:59:15, 3.43it/s] 90%|█████████ | 334566/371472 [4:27:16<2:56:12, 3.49it/s] 90%|█████████ | 334567/371472 [4:27:16<2:59:15, 3.43it/s] 90%|█████████ | 334568/371472 [4:27:17<2:56:14, 3.49it/s] 90%|█████████ | 334569/371472 [4:27:17<2:49:32, 3.63it/s] 90%|█████████ | 334570/371472 [4:27:17<2:52:32, 3.56it/s] 90%|█████████ | 334571/371472 [4:27:17<2:45:50, 3.71it/s] 90%|█████████ | 334572/371472 [4:27:18<3:01:58, 3.38it/s] 90%|█████████ | 334573/371472 [4:27:18<3:15:37, 3.14it/s] 90%|█████████ | 334574/371472 [4:27:18<3:03:40, 3.35it/s] 90%|█████████ | 334575/371472 [4:27:19<2:54:47, 3.52it/s] 90%|█████████ | 334576/371472 [4:27:19<2:52:00, 3.57it/s] 90%|█████████ | 334577/371472 [4:27:19<2:48:25, 3.65it/s] 90%|█████████ | 334578/371472 [4:27:19<2:48:10, 3.66it/s] 90%|█████████ | 334579/371472 [4:27:20<3:05:38, 3.31it/s] 90%|█████████ | 334580/371472 [4:27:20<2:59:34, 3.42it/s] {'loss': 2.4504, 'learning_rate': 1.8942985196836815e-07, 'epoch': 14.41} + 90%|█████████ | 334580/371472 [4:27:20<2:59:34, 3.42it/s] 90%|█████████ | 334581/371472 [4:27:20<2:56:51, 3.48it/s] 90%|█████████ | 334582/371472 [4:27:21<3:03:24, 3.35it/s] 90%|█████████ | 334583/371472 [4:27:21<2:56:41, 3.48it/s] 90%|█████████ | 334584/371472 [4:27:21<2:48:47, 3.64it/s] 90%|█████████ | 334585/371472 [4:27:21<2:55:03, 3.51it/s] 90%|█████████ | 334586/371472 [4:27:22<2:51:59, 3.57it/s] 90%|█████████ | 334587/371472 [4:27:22<2:45:45, 3.71it/s] 90%|█████████ | 334588/371472 [4:27:22<2:46:34, 3.69it/s] 90%|█████████ | 334589/371472 [4:27:22<2:39:36, 3.85it/s] 90%|█████████ | 334590/371472 [4:27:23<2:38:00, 3.89it/s] 90%|█████████ | 334591/371472 [4:27:23<2:36:48, 3.92it/s] 90%|█████████ | 334592/371472 [4:27:23<2:39:28, 3.85it/s] 90%|█████████ | 334593/371472 [4:27:23<2:41:18, 3.81it/s] 90%|█████████ | 334594/371472 [4:27:24<2:40:38, 3.83it/s] 90%|█████████ | 334595/371472 [4:27:24<2:45:55, 3.70it/s] 90%|█████████ | 334596/371472 [4:27:24<2:48:32, 3.65it/s] 90%|█████████ | 334597/371472 [4:27:25<3:04:05, 3.34it/s] 90%|█████████ | 334598/371472 [4:27:25<3:07:56, 3.27it/s] 90%|█████████ | 334599/371472 [4:27:25<3:08:49, 3.25it/s] 90%|█████████ | 334600/371472 [4:27:26<3:04:05, 3.34it/s] {'loss': 2.7062, 'learning_rate': 1.8938136999288933e-07, 'epoch': 14.41} + 90%|█████████ | 334600/371472 [4:27:26<3:04:05, 3.34it/s] 90%|█████████ | 334601/371472 [4:27:26<3:15:27, 3.14it/s] 90%|█████████ | 334602/371472 [4:27:26<3:12:26, 3.19it/s] 90%|█████████ | 334603/371472 [4:27:27<3:06:21, 3.30it/s] 90%|█████████ | 334604/371472 [4:27:27<2:54:32, 3.52it/s] 90%|█████████ | 334605/371472 [4:27:27<2:52:39, 3.56it/s] 90%|█████████ | 334606/371472 [4:27:27<2:59:32, 3.42it/s] 90%|█████████ | 334607/371472 [4:27:28<3:02:03, 3.37it/s] 90%|█████████ | 334608/371472 [4:27:28<2:56:55, 3.47it/s] 90%|█████████ | 334609/371472 [4:27:28<2:58:06, 3.45it/s] 90%|█████████ | 334610/371472 [4:27:29<2:57:43, 3.46it/s] 90%|█████████ | 334611/371472 [4:27:29<2:56:41, 3.48it/s] 90%|█████████ | 334612/371472 [4:27:29<2:48:08, 3.65it/s] 90%|█████████ | 334613/371472 [4:27:29<2:42:33, 3.78it/s] 90%|█████████ | 334614/371472 [4:27:30<2:45:54, 3.70it/s] 90%|█████████ | 334615/371472 [4:27:30<2:48:46, 3.64it/s] 90%|█████████ | 334616/371472 [4:27:30<2:49:38, 3.62it/s] 90%|█████████ | 334617/371472 [4:27:30<2:50:27, 3.60it/s] 90%|█████████ | 334618/371472 [4:27:31<3:17:48, 3.11it/s] 90%|█████████ | 334619/371472 [4:27:31<3:19:06, 3.08it/s] 90%|█████████ | 334620/371472 [4:27:31<3:14:44, 3.15it/s] {'loss': 2.6154, 'learning_rate': 1.893328880174104e-07, 'epoch': 14.41} + 90%|█████████ | 334620/371472 [4:27:31<3:14:44, 3.15it/s] 90%|█████████ | 334621/371472 [4:27:32<3:03:08, 3.35it/s] 90%|█████████ | 334622/371472 [4:27:32<2:55:33, 3.50it/s] 90%|█████████ | 334623/371472 [4:27:32<2:53:42, 3.54it/s] 90%|█████████ | 334624/371472 [4:27:33<2:49:07, 3.63it/s] 90%|█████████ | 334625/371472 [4:27:33<2:59:53, 3.41it/s] 90%|█████████ | 334626/371472 [4:27:33<2:53:25, 3.54it/s] 90%|█████████ | 334627/371472 [4:27:33<2:49:51, 3.62it/s] 90%|█████████ | 334628/371472 [4:27:34<2:59:39, 3.42it/s] 90%|█████████ | 334629/371472 [4:27:34<2:56:44, 3.47it/s] 90%|█████████ | 334630/371472 [4:27:34<2:54:27, 3.52it/s] 90%|█████████ | 334631/371472 [4:27:35<2:50:03, 3.61it/s] 90%|█████████ | 334632/371472 [4:27:35<2:46:01, 3.70it/s] 90%|█████████ | 334633/371472 [4:27:35<2:48:16, 3.65it/s] 90%|█████████ | 334634/371472 [4:27:35<2:51:06, 3.59it/s] 90%|█████████ | 334635/371472 [4:27:36<2:50:20, 3.60it/s] 90%|█████████ | 334636/371472 [4:27:36<2:46:30, 3.69it/s] 90%|█████████ | 334637/371472 [4:27:36<2:53:45, 3.53it/s] 90%|█████████ | 334638/371472 [4:27:36<2:54:16, 3.52it/s] 90%|█████████ | 334639/371472 [4:27:37<2:48:35, 3.64it/s] 90%|█████████ | 334640/371472 [4:27:37<2:42:44, 3.77it/s] {'loss': 2.6459, 'learning_rate': 1.8928440604193155e-07, 'epoch': 14.41} + 90%|█████████ | 334640/371472 [4:27:37<2:42:44, 3.77it/s] 90%|█████████ | 334641/371472 [4:27:37<2:42:43, 3.77it/s] 90%|█████████ | 334642/371472 [4:27:37<2:36:01, 3.93it/s] 90%|█████████ | 334643/371472 [4:27:38<2:38:45, 3.87it/s] 90%|█████████ | 334644/371472 [4:27:38<2:50:25, 3.60it/s] 90%|█████████ | 334645/371472 [4:27:38<2:44:05, 3.74it/s] 90%|█████████ | 334646/371472 [4:27:39<2:43:32, 3.75it/s] 90%|█████████ | 334647/371472 [4:27:39<2:41:05, 3.81it/s] 90%|█████████ | 334648/371472 [4:27:39<2:43:25, 3.76it/s] 90%|█████████ | 334649/371472 [4:27:39<2:44:48, 3.72it/s] 90%|█████████ | 334650/371472 [4:27:40<2:43:57, 3.74it/s] 90%|█████████ | 334651/371472 [4:27:40<2:42:10, 3.78it/s] 90%|█████████ | 334652/371472 [4:27:40<2:42:39, 3.77it/s] 90%|█████████ | 334653/371472 [4:27:40<2:40:39, 3.82it/s] 90%|█████████ | 334654/371472 [4:27:41<2:38:22, 3.87it/s] 90%|█████████ | 334655/371472 [4:27:41<2:40:30, 3.82it/s] 90%|█████████ | 334656/371472 [4:27:41<2:37:10, 3.90it/s] 90%|█████████ | 334657/371472 [4:27:42<2:56:43, 3.47it/s] 90%|█████████ | 334658/371472 [4:27:42<2:48:53, 3.63it/s] 90%|█████████ | 334659/371472 [4:27:42<2:42:37, 3.77it/s] 90%|█████████ | 334660/371472 [4:27:42<2:42:36, 3.77it/s] {'loss': 2.7839, 'learning_rate': 1.892359240664526e-07, 'epoch': 14.41} + 90%|█████████ | 334660/371472 [4:27:42<2:42:36, 3.77it/s] 90%|█████████ | 334661/371472 [4:27:43<2:44:18, 3.73it/s] 90%|█████████ | 334662/371472 [4:27:43<2:44:17, 3.73it/s] 90%|█████████ | 334663/371472 [4:27:43<2:42:52, 3.77it/s] 90%|█████████ | 334664/371472 [4:27:43<2:44:48, 3.72it/s] 90%|█████████ | 334665/371472 [4:27:44<2:56:14, 3.48it/s] 90%|█████████ | 334666/371472 [4:27:44<2:54:43, 3.51it/s] 90%|█████████ | 334667/371472 [4:27:44<3:00:39, 3.40it/s] 90%|█████████ | 334668/371472 [4:27:45<2:52:16, 3.56it/s] 90%|█████████ | 334669/371472 [4:27:45<2:45:48, 3.70it/s] 90%|█████████ | 334670/371472 [4:27:45<2:43:29, 3.75it/s] 90%|█████████ | 334671/371472 [4:27:45<2:43:29, 3.75it/s] 90%|█████████ | 334672/371472 [4:27:46<2:45:21, 3.71it/s] 90%|█████████ | 334673/371472 [4:27:46<2:46:58, 3.67it/s] 90%|█████████ | 334674/371472 [4:27:46<3:05:10, 3.31it/s] 90%|█████████ | 334675/371472 [4:27:47<3:05:52, 3.30it/s] 90%|█████████ | 334676/371472 [4:27:47<3:03:37, 3.34it/s] 90%|█████████ | 334677/371472 [4:27:47<2:56:28, 3.47it/s] 90%|█████████ | 334678/371472 [4:27:47<2:57:33, 3.45it/s] 90%|█████████ | 334679/371472 [4:27:48<2:52:38, 3.55it/s] 90%|█████████ | 334680/371472 [4:27:48<2:52:28, 3.56it/s] {'loss': 2.5214, 'learning_rate': 1.8918744209097377e-07, 'epoch': 14.42} + 90%|█████████ | 334680/371472 [4:27:48<2:52:28, 3.56it/s] 90%|█████████ | 334681/371472 [4:27:48<2:56:01, 3.48it/s] 90%|█████████ | 334682/371472 [4:27:49<2:59:37, 3.41it/s] 90%|█████████ | 334683/371472 [4:27:49<3:00:15, 3.40it/s] 90%|█████████ | 334684/371472 [4:27:49<2:58:08, 3.44it/s] 90%|█████████ | 334685/371472 [4:27:49<2:54:04, 3.52it/s] 90%|█████████ | 334686/371472 [4:27:50<3:10:55, 3.21it/s] 90%|█████████ | 334687/371472 [4:27:50<3:02:26, 3.36it/s] 90%|█████████ | 334688/371472 [4:27:50<3:04:01, 3.33it/s] 90%|█████████ | 334689/371472 [4:27:51<2:58:43, 3.43it/s] 90%|█████████ | 334690/371472 [4:27:51<2:54:36, 3.51it/s] 90%|█████████ | 334691/371472 [4:27:51<2:52:27, 3.55it/s] 90%|█████████ | 334692/371472 [4:27:51<2:53:00, 3.54it/s] 90%|█████████ | 334693/371472 [4:27:52<2:48:36, 3.64it/s] 90%|█████████ | 334694/371472 [4:27:52<2:45:41, 3.70it/s] 90%|█████████ | 334695/371472 [4:27:52<2:50:02, 3.60it/s] 90%|█████████ | 334696/371472 [4:27:52<2:45:17, 3.71it/s] 90%|█████████ | 334697/371472 [4:27:53<2:43:49, 3.74it/s] 90%|█████████ | 334698/371472 [4:27:53<2:39:45, 3.84it/s] 90%|█████████ | 334699/371472 [4:27:53<2:47:34, 3.66it/s] 90%|█████████ | 334700/371472 [4:27:54<2:51:28, 3.57it/s] {'loss': 2.6788, 'learning_rate': 1.8913896011549485e-07, 'epoch': 14.42} + 90%|█████████ | 334700/371472 [4:27:54<2:51:28, 3.57it/s] 90%|█████████ | 334701/371472 [4:27:54<3:01:41, 3.37it/s] 90%|█████████ | 334702/371472 [4:27:54<2:53:52, 3.52it/s] 90%|█████████ | 334703/371472 [4:27:54<2:49:48, 3.61it/s] 90%|█████████ | 334704/371472 [4:27:55<2:57:01, 3.46it/s] 90%|█████████ | 334705/371472 [4:27:55<2:50:55, 3.58it/s] 90%|█████████ | 334706/371472 [4:27:55<3:03:05, 3.35it/s] 90%|█████████ | 334707/371472 [4:27:56<2:59:03, 3.42it/s] 90%|█████████ | 334708/371472 [4:27:56<2:55:24, 3.49it/s] 90%|█████████ | 334709/371472 [4:27:56<2:54:33, 3.51it/s] 90%|█████████ | 334710/371472 [4:27:57<3:10:12, 3.22it/s] 90%|█████████ | 334711/371472 [4:27:57<3:07:24, 3.27it/s] 90%|█████████ | 334712/371472 [4:27:57<3:09:12, 3.24it/s] 90%|█████████ | 334713/371472 [4:27:57<3:04:34, 3.32it/s] 90%|█████████ | 334714/371472 [4:27:58<2:54:49, 3.50it/s] 90%|█████████ | 334715/371472 [4:27:58<3:03:01, 3.35it/s] 90%|█████████ | 334716/371472 [4:27:58<2:57:04, 3.46it/s] 90%|█████████ | 334717/371472 [4:27:59<3:05:44, 3.30it/s] 90%|█████████ | 334718/371472 [4:27:59<3:08:46, 3.25it/s] 90%|█████████ | 334719/371472 [4:27:59<3:14:37, 3.15it/s] 90%|█████████ | 334720/371472 [4:28:00<3:06:49, 3.28it/s] {'loss': 2.657, 'learning_rate': 1.8909047814001597e-07, 'epoch': 14.42} + 90%|█████████ | 334720/371472 [4:28:00<3:06:49, 3.28it/s] 90%|█████████ | 334721/371472 [4:28:00<3:13:09, 3.17it/s] 90%|█████████ | 334722/371472 [4:28:00<3:06:27, 3.28it/s] 90%|█████████ | 334723/371472 [4:28:00<2:58:21, 3.43it/s] 90%|█████████ | 334724/371472 [4:28:01<2:51:30, 3.57it/s] 90%|█████████ | 334725/371472 [4:28:01<2:46:53, 3.67it/s] 90%|█████████ | 334726/371472 [4:28:01<2:43:31, 3.75it/s] 90%|█████████ | 334727/371472 [4:28:01<2:42:35, 3.77it/s] 90%|█████████ | 334728/371472 [4:28:02<2:41:13, 3.80it/s] 90%|█████████ | 334729/371472 [4:28:02<2:43:27, 3.75it/s] 90%|█████████ | 334730/371472 [4:28:02<2:48:35, 3.63it/s] 90%|█████████ | 334731/371472 [4:28:03<2:53:33, 3.53it/s] 90%|█████████ | 334732/371472 [4:28:03<2:54:39, 3.51it/s] 90%|█████████ | 334733/371472 [4:28:03<2:56:15, 3.47it/s] 90%|█████████ | 334734/371472 [4:28:03<2:57:38, 3.45it/s] 90%|█████████ | 334735/371472 [4:28:04<2:51:58, 3.56it/s] 90%|█████████ | 334736/371472 [4:28:04<2:45:42, 3.69it/s] 90%|█████████ | 334737/371472 [4:28:04<2:58:10, 3.44it/s] 90%|█████████ | 334738/371472 [4:28:05<2:50:23, 3.59it/s] 90%|█████████ | 334739/371472 [4:28:05<3:01:14, 3.38it/s] 90%|█████████ | 334740/371472 [4:28:05<2:51:46, 3.56it/s] {'loss': 2.5923, 'learning_rate': 1.8904199616453704e-07, 'epoch': 14.42} + 90%|█████████ | 334740/371472 [4:28:05<2:51:46, 3.56it/s] 90%|█████████ | 334741/371472 [4:28:05<2:50:49, 3.58it/s] 90%|█████████ | 334742/371472 [4:28:06<2:49:15, 3.62it/s] 90%|█████████ | 334743/371472 [4:28:06<2:48:49, 3.63it/s] 90%|█████████ | 334744/371472 [4:28:06<2:51:15, 3.57it/s] 90%|█████████ | 334745/371472 [4:28:07<2:57:52, 3.44it/s] 90%|█████████ | 334746/371472 [4:28:07<2:50:03, 3.60it/s] 90%|█████████ | 334747/371472 [4:28:07<2:56:28, 3.47it/s] 90%|█████████ | 334748/371472 [4:28:08<3:15:46, 3.13it/s] 90%|█████████ | 334749/371472 [4:28:08<3:05:22, 3.30it/s] 90%|█████████ | 334750/371472 [4:28:08<2:56:01, 3.48it/s] 90%|█████████ | 334751/371472 [4:28:08<2:54:08, 3.51it/s] 90%|█████████ | 334752/371472 [4:28:09<3:24:28, 2.99it/s] 90%|█████████ | 334753/371472 [4:28:09<3:12:18, 3.18it/s] 90%|█████████ | 334754/371472 [4:28:09<3:01:18, 3.38it/s] 90%|█████████ | 334755/371472 [4:28:10<3:05:48, 3.29it/s] 90%|█████████ | 334756/371472 [4:28:10<3:02:12, 3.36it/s] 90%|█████████ | 334757/371472 [4:28:10<3:16:17, 3.12it/s] 90%|█████████ | 334758/371472 [4:28:11<3:05:11, 3.30it/s] 90%|█████████ | 334759/371472 [4:28:11<2:54:54, 3.50it/s] 90%|█████████ | 334760/371472 [4:28:11<3:02:18, 3.36it/s] {'loss': 2.6079, 'learning_rate': 1.889935141890581e-07, 'epoch': 14.42} + 90%|█████████ | 334760/371472 [4:28:11<3:02:18, 3.36it/s] 90%|█████████ | 334761/371472 [4:28:11<2:59:11, 3.41it/s] 90%|█████████ | 334762/371472 [4:28:12<2:58:08, 3.43it/s] 90%|█████████ | 334763/371472 [4:28:12<2:54:06, 3.51it/s] 90%|█████████ | 334764/371472 [4:28:12<2:52:44, 3.54it/s] 90%|█████████ | 334765/371472 [4:28:12<2:45:52, 3.69it/s] 90%|█████████ | 334766/371472 [4:28:13<2:41:02, 3.80it/s] 90%|█████████ | 334767/371472 [4:28:13<2:54:45, 3.50it/s] 90%|█████████ | 334768/371472 [4:28:13<2:51:08, 3.57it/s] 90%|█████████ | 334769/371472 [4:28:14<2:50:38, 3.58it/s] 90%|█████████ | 334770/371472 [4:28:14<2:48:13, 3.64it/s] 90%|█████████ | 334771/371472 [4:28:14<2:41:55, 3.78it/s] 90%|█████████ | 334772/371472 [4:28:14<2:41:47, 3.78it/s] 90%|█████████ | 334773/371472 [4:28:15<2:42:56, 3.75it/s] 90%|█████████ | 334774/371472 [4:28:15<3:03:09, 3.34it/s] 90%|█████████ | 334775/371472 [4:28:15<2:52:47, 3.54it/s] 90%|█████████ | 334776/371472 [4:28:15<2:43:58, 3.73it/s] 90%|█████████ | 334777/371472 [4:28:16<2:48:42, 3.63it/s] 90%|█████████ | 334778/371472 [4:28:16<2:45:46, 3.69it/s] 90%|█████████ | 334779/371472 [4:28:16<3:09:03, 3.23it/s] 90%|█████████ | 334780/371472 [4:28:17<3:04:23, 3.32it/s] {'loss': 2.7267, 'learning_rate': 1.8894503221357926e-07, 'epoch': 14.42} + 90%|█████████ | 334780/371472 [4:28:17<3:04:23, 3.32it/s] 90%|█████████ | 334781/371472 [4:28:17<3:06:00, 3.29it/s] 90%|█████████ | 334782/371472 [4:28:17<3:04:12, 3.32it/s] 90%|█████████ | 334783/371472 [4:28:18<2:59:11, 3.41it/s] 90%|█████████ | 334784/371472 [4:28:18<2:58:34, 3.42it/s] 90%|█████████ | 334785/371472 [4:28:18<2:56:07, 3.47it/s] 90%|█████████ | 334786/371472 [4:28:18<2:49:49, 3.60it/s] 90%|█████████ | 334787/371472 [4:28:19<3:07:20, 3.26it/s] 90%|█████████ | 334788/371472 [4:28:19<3:08:12, 3.25it/s] 90%|█████████ | 334789/371472 [4:28:19<3:13:58, 3.15it/s] 90%|█████████ | 334790/371472 [4:28:20<3:22:00, 3.03it/s] 90%|█████████ | 334791/371472 [4:28:20<3:08:58, 3.24it/s] 90%|█████████ | 334792/371472 [4:28:20<3:08:12, 3.25it/s] 90%|█████████ | 334793/371472 [4:28:21<3:10:12, 3.21it/s] 90%|█████████ | 334794/371472 [4:28:21<3:28:52, 2.93it/s] 90%|█████████ | 334795/371472 [4:28:21<3:26:17, 2.96it/s] 90%|█████████ | 334796/371472 [4:28:22<3:19:15, 3.07it/s] 90%|█████████ | 334797/371472 [4:28:22<3:07:50, 3.25it/s] 90%|█████████ | 334798/371472 [4:28:22<3:01:23, 3.37it/s] 90%|█████████ | 334799/371472 [4:28:23<3:00:37, 3.38it/s] 90%|█████████ | 334800/371472 [4:28:23<2:57:35, 3.44it/s] {'loss': 2.6234, 'learning_rate': 1.888965502381003e-07, 'epoch': 14.42} + 90%|█████████ | 334800/371472 [4:28:23<2:57:35, 3.44it/s] 90%|█████████ | 334801/371472 [4:28:23<2:51:46, 3.56it/s] 90%|█████████ | 334802/371472 [4:28:23<2:48:10, 3.63it/s] 90%|█████████ | 334803/371472 [4:28:24<2:44:03, 3.73it/s] 90%|█████████ | 334804/371472 [4:28:24<2:41:08, 3.79it/s] 90%|█████████ | 334805/371472 [4:28:24<2:39:20, 3.84it/s] 90%|█████████ | 334806/371472 [4:28:24<2:41:45, 3.78it/s] 90%|█████████ | 334807/371472 [4:28:25<2:52:06, 3.55it/s] 90%|█████████ | 334808/371472 [4:28:25<2:47:01, 3.66it/s] 90%|█████████ | 334809/371472 [4:28:25<2:43:55, 3.73it/s] 90%|█████████ | 334810/371472 [4:28:26<2:53:48, 3.52it/s] 90%|█████████ | 334811/371472 [4:28:26<2:47:59, 3.64it/s] 90%|█████████ | 334812/371472 [4:28:26<2:52:00, 3.55it/s] 90%|█████████ | 334813/371472 [4:28:26<3:03:30, 3.33it/s] 90%|█████████ | 334814/371472 [4:28:27<2:58:19, 3.43it/s] 90%|█████████ | 334815/371472 [4:28:27<2:59:54, 3.40it/s] 90%|█████████ | 334816/371472 [4:28:27<2:51:11, 3.57it/s] 90%|█████████ | 334817/371472 [4:28:28<2:45:39, 3.69it/s] 90%|█████████ | 334818/371472 [4:28:28<2:46:53, 3.66it/s] 90%|█████████ | 334819/371472 [4:28:28<2:46:08, 3.68it/s] 90%|█████████ | 334820/371472 [4:28:28<2:42:06, 3.77it/s] {'loss': 2.5956, 'learning_rate': 1.8884806826262149e-07, 'epoch': 14.42} + 90%|█████████ | 334820/371472 [4:28:28<2:42:06, 3.77it/s] 90%|█████████ | 334821/371472 [4:28:29<2:47:06, 3.66it/s] 90%|█████████ | 334822/371472 [4:28:29<2:44:50, 3.71it/s] 90%|█████████ | 334823/371472 [4:28:29<2:57:15, 3.45it/s] 90%|█████████ | 334824/371472 [4:28:29<2:55:30, 3.48it/s] 90%|█████████ | 334825/371472 [4:28:30<2:49:39, 3.60it/s] 90%|█████████ | 334826/371472 [4:28:30<2:54:05, 3.51it/s] 90%|█████████ | 334827/371472 [4:28:30<3:02:15, 3.35it/s] 90%|█████████ | 334828/371472 [4:28:31<2:59:08, 3.41it/s] 90%|█████████ | 334829/371472 [4:28:31<2:56:59, 3.45it/s] 90%|█████████ | 334830/371472 [4:28:31<3:00:19, 3.39it/s] 90%|█████████ | 334831/371472 [4:28:32<3:04:12, 3.32it/s] 90%|█████████ | 334832/371472 [4:28:32<3:10:50, 3.20it/s] 90%|█████████ | 334833/371472 [4:28:32<3:14:02, 3.15it/s] 90%|█████████ | 334834/371472 [4:28:32<3:04:59, 3.30it/s] 90%|█████████ | 334835/371472 [4:28:33<2:52:43, 3.54it/s] 90%|█████████ | 334836/371472 [4:28:33<2:46:06, 3.68it/s] 90%|█████████ | 334837/371472 [4:28:33<2:47:09, 3.65it/s] 90%|█████████ | 334838/371472 [4:28:34<2:45:58, 3.68it/s] 90%|█████████ | 334839/371472 [4:28:34<2:39:52, 3.82it/s] 90%|█████████ | 334840/371472 [4:28:34<2:45:00, 3.70it/s] {'loss': 2.5753, 'learning_rate': 1.8879958628714253e-07, 'epoch': 14.42} + 90%|█████████ | 334840/371472 [4:28:34<2:45:00, 3.70it/s] 90%|█████████ | 334841/371472 [4:28:34<2:41:03, 3.79it/s] 90%|█████████ | 334842/371472 [4:28:35<2:55:36, 3.48it/s] 90%|█████████ | 334843/371472 [4:28:35<2:49:11, 3.61it/s] 90%|█████████ | 334844/371472 [4:28:35<2:43:32, 3.73it/s] 90%|█████████ | 334845/371472 [4:28:35<2:36:31, 3.90it/s] 90%|█████████ | 334846/371472 [4:28:36<2:46:17, 3.67it/s] 90%|█████████ | 334847/371472 [4:28:36<2:43:06, 3.74it/s] 90%|█████████ | 334848/371472 [4:28:36<2:47:52, 3.64it/s] 90%|█████████ | 334849/371472 [4:28:37<2:53:30, 3.52it/s] 90%|█████████ | 334850/371472 [4:28:37<2:53:18, 3.52it/s] 90%|█████████ | 334851/371472 [4:28:37<2:47:29, 3.64it/s] 90%|█████████ | 334852/371472 [4:28:37<2:50:28, 3.58it/s] 90%|█████████ | 334853/371472 [4:28:38<2:42:58, 3.74it/s] 90%|█████████ | 334854/371472 [4:28:38<3:17:53, 3.08it/s] 90%|█████████ | 334855/371472 [4:28:38<3:22:17, 3.02it/s] 90%|█████████ | 334856/371472 [4:28:39<3:08:18, 3.24it/s] 90%|█████████ | 334857/371472 [4:28:39<3:02:53, 3.34it/s] 90%|█████████ | 334858/371472 [4:28:39<2:57:04, 3.45it/s] 90%|█████████ | 334859/371472 [4:28:40<3:11:55, 3.18it/s] 90%|█████████ | 334860/371472 [4:28:40<3:01:34, 3.36it/s] {'loss': 2.5857, 'learning_rate': 1.8875110431166368e-07, 'epoch': 14.42} + 90%|█████████ | 334860/371472 [4:28:40<3:01:34, 3.36it/s] 90%|█████████ | 334861/371472 [4:28:40<2:56:54, 3.45it/s] 90%|█████████ | 334862/371472 [4:28:40<2:54:18, 3.50it/s] 90%|█████████ | 334863/371472 [4:28:41<2:55:22, 3.48it/s] 90%|█████████ | 334864/371472 [4:28:41<3:13:34, 3.15it/s] 90%|█████████ | 334865/371472 [4:28:41<3:24:55, 2.98it/s] 90%|█████████ | 334866/371472 [4:28:42<3:11:27, 3.19it/s] 90%|█████████ | 334867/371472 [4:28:42<3:16:46, 3.10it/s] 90%|█████████ | 334868/371472 [4:28:42<3:05:59, 3.28it/s] 90%|█████████ | 334869/371472 [4:28:43<2:57:49, 3.43it/s] 90%|█████████ | 334870/371472 [4:28:43<2:52:58, 3.53it/s] 90%|█████████ | 334871/371472 [4:28:43<3:00:11, 3.39it/s] 90%|█████████ | 334872/371472 [4:28:43<2:58:26, 3.42it/s] 90%|█████████ | 334873/371472 [4:28:44<2:58:25, 3.42it/s] 90%|█████████ | 334874/371472 [4:28:44<2:47:43, 3.64it/s] 90%|█████████ | 334875/371472 [4:28:44<2:53:41, 3.51it/s] 90%|█████████ | 334876/371472 [4:28:45<2:48:40, 3.62it/s] 90%|█████████ | 334877/371472 [4:28:45<2:45:21, 3.69it/s] 90%|█████████ | 334878/371472 [4:28:45<2:47:35, 3.64it/s] 90%|█████████ | 334879/371472 [4:28:45<2:46:55, 3.65it/s] 90%|█████████ | 334880/371472 [4:28:46<2:46:39, 3.66it/s] {'loss': 2.5525, 'learning_rate': 1.8870262233618475e-07, 'epoch': 14.42} + 90%|█████████ | 334880/371472 [4:28:46<2:46:39, 3.66it/s] 90%|█████████ | 334881/371472 [4:28:46<2:57:08, 3.44it/s] 90%|█████████ | 334882/371472 [4:28:46<2:56:07, 3.46it/s] 90%|█████████ | 334883/371472 [4:28:47<3:13:06, 3.16it/s] 90%|█████████ | 334884/371472 [4:28:47<3:05:28, 3.29it/s] 90%|█████████ | 334885/371472 [4:28:47<2:57:48, 3.43it/s] 90%|█████████ | 334886/371472 [4:28:47<3:03:37, 3.32it/s] 90%|█████████ | 334887/371472 [4:28:48<2:57:02, 3.44it/s] 90%|█████████ | 334888/371472 [4:28:48<2:53:29, 3.51it/s] 90%|█████████ | 334889/371472 [4:28:48<2:51:08, 3.56it/s] 90%|█████████ | 334890/371472 [4:28:49<2:55:08, 3.48it/s] 90%|█████████ | 334891/371472 [4:28:49<2:56:49, 3.45it/s] 90%|█████████ | 334892/371472 [4:28:49<2:53:08, 3.52it/s] 90%|█████████ | 334893/371472 [4:28:49<2:48:00, 3.63it/s] 90%|█████████ | 334894/371472 [4:28:50<2:44:08, 3.71it/s] 90%|█████████ | 334895/371472 [4:28:50<2:52:13, 3.54it/s] 90%|█████████ | 334896/371472 [4:28:50<3:07:19, 3.25it/s] 90%|█████████ | 334897/371472 [4:28:51<2:56:04, 3.46it/s] 90%|█████████ | 334898/371472 [4:28:51<2:50:06, 3.58it/s] 90%|█████████ | 334899/371472 [4:28:51<2:56:05, 3.46it/s] 90%|█████████ | 334900/371472 [4:28:52<3:05:55, 3.28it/s] {'loss': 2.5752, 'learning_rate': 1.886541403607059e-07, 'epoch': 14.42} + 90%|█████████ | 334900/371472 [4:28:52<3:05:55, 3.28it/s] 90%|█████████ | 334901/371472 [4:28:52<3:00:14, 3.38it/s] 90%|█████████ | 334902/371472 [4:28:52<2:56:50, 3.45it/s] 90%|█████████ | 334903/371472 [4:28:52<2:58:42, 3.41it/s] 90%|█████████ | 334904/371472 [4:28:53<2:55:03, 3.48it/s] 90%|█████████ | 334905/371472 [4:28:53<3:01:53, 3.35it/s] 90%|█████████ | 334906/371472 [4:28:53<2:56:28, 3.45it/s] 90%|█████████ | 334907/371472 [4:28:54<3:13:28, 3.15it/s] 90%|█████████ | 334908/371472 [4:28:54<3:09:29, 3.22it/s] 90%|█████████ | 334909/371472 [4:28:54<3:02:53, 3.33it/s] 90%|█████████ | 334910/371472 [4:28:54<2:57:53, 3.43it/s] 90%|█████████ | 334911/371472 [4:28:55<2:53:21, 3.51it/s] 90%|█████████ | 334912/371472 [4:28:55<2:55:22, 3.47it/s] 90%|█████████ | 334913/371472 [4:28:55<3:08:58, 3.22it/s] 90%|█████████ | 334914/371472 [4:28:56<3:16:29, 3.10it/s] 90%|█████████ | 334915/371472 [4:28:56<3:15:11, 3.12it/s] 90%|█████████ | 334916/371472 [4:28:56<3:07:13, 3.25it/s] 90%|█████████ | 334917/371472 [4:28:57<3:03:35, 3.32it/s] 90%|█████████ | 334918/371472 [4:28:57<3:00:00, 3.38it/s] 90%|█████████ | 334919/371472 [4:28:57<2:57:42, 3.43it/s] 90%|█████████ | 334920/371472 [4:28:58<3:08:00, 3.24it/s] {'loss': 2.5443, 'learning_rate': 1.8860565838522695e-07, 'epoch': 14.43} + 90%|█████████ | 334920/371472 [4:28:58<3:08:00, 3.24it/s] 90%|█████████ | 334921/371472 [4:28:58<3:04:36, 3.30it/s] 90%|█████████ | 334922/371472 [4:28:58<2:57:45, 3.43it/s] 90%|█████████ | 334923/371472 [4:28:58<2:58:18, 3.42it/s] 90%|█████████ | 334924/371472 [4:28:59<2:52:45, 3.53it/s] 90%|█████████ | 334925/371472 [4:28:59<3:09:05, 3.22it/s] 90%|█████████ | 334926/371472 [4:28:59<3:04:58, 3.29it/s] 90%|█████████ | 334927/371472 [4:29:00<2:59:38, 3.39it/s] 90%|█████████ | 334928/371472 [4:29:00<2:58:32, 3.41it/s] 90%|█████████ | 334929/371472 [4:29:00<2:59:06, 3.40it/s] 90%|█████████ | 334930/371472 [4:29:01<3:34:06, 2.84it/s] 90%|█████████ | 334931/371472 [4:29:01<3:15:39, 3.11it/s] 90%|█████████ | 334932/371472 [4:29:01<3:17:10, 3.09it/s] 90%|█████████ | 334933/371472 [4:29:01<3:05:58, 3.27it/s] 90%|█████████ | 334934/371472 [4:29:02<3:10:19, 3.20it/s] 90%|█████████ | 334935/371472 [4:29:02<3:07:02, 3.26it/s] 90%|█████████ | 334936/371472 [4:29:02<3:00:25, 3.37it/s] 90%|█████████ | 334937/371472 [4:29:03<3:13:55, 3.14it/s] 90%|█████████ | 334938/371472 [4:29:03<3:05:18, 3.29it/s] 90%|█████████ | 334939/371472 [4:29:03<2:57:49, 3.42it/s] 90%|█████████ | 334940/371472 [4:29:04<2:54:49, 3.48it/s] {'loss': 2.6076, 'learning_rate': 1.8855717640974813e-07, 'epoch': 14.43} + 90%|█████████ | 334940/371472 [4:29:04<2:54:49, 3.48it/s] 90%|█████████ | 334941/371472 [4:29:04<2:47:41, 3.63it/s] 90%|█████████ | 334942/371472 [4:29:04<2:45:14, 3.68it/s] 90%|█████████ | 334943/371472 [4:29:04<2:49:24, 3.59it/s] 90%|█████████ | 334944/371472 [4:29:05<2:43:32, 3.72it/s] 90%|█████████ | 334945/371472 [4:29:05<2:46:37, 3.65it/s] 90%|█████████ | 334946/371472 [4:29:05<2:41:13, 3.78it/s] 90%|█████████ | 334947/371472 [4:29:05<2:53:54, 3.50it/s] 90%|█████████ | 334948/371472 [4:29:06<2:57:37, 3.43it/s] 90%|█████████ | 334949/371472 [4:29:06<2:54:31, 3.49it/s] 90%|█████████ | 334950/371472 [4:29:06<2:48:32, 3.61it/s] 90%|█████████ | 334951/371472 [4:29:07<2:45:33, 3.68it/s] 90%|█████████ | 334952/371472 [4:29:07<2:51:10, 3.56it/s] 90%|█████████ | 334953/371472 [4:29:07<2:56:27, 3.45it/s] 90%|█████████ | 334954/371472 [4:29:07<2:51:23, 3.55it/s] 90%|█████████ | 334955/371472 [4:29:08<2:53:05, 3.52it/s] 90%|█████████ | 334956/371472 [4:29:08<2:48:03, 3.62it/s] 90%|█████████ | 334957/371472 [4:29:08<2:56:52, 3.44it/s] 90%|█████████ | 334958/371472 [4:29:09<2:50:35, 3.57it/s] 90%|█████████ | 334959/371472 [4:29:09<2:48:14, 3.62it/s] 90%|█████████ | 334960/371472 [4:29:09<2:47:38, 3.63it/s] {'loss': 2.6715, 'learning_rate': 1.8850869443426917e-07, 'epoch': 14.43} + 90%|█████████ | 334960/371472 [4:29:09<2:47:38, 3.63it/s] 90%|█████████ | 334961/371472 [4:29:09<2:46:07, 3.66it/s] 90%|█████████ | 334962/371472 [4:29:10<2:50:53, 3.56it/s] 90%|█████████ | 334963/371472 [4:29:10<2:41:25, 3.77it/s] 90%|████��████ | 334964/371472 [4:29:10<2:43:24, 3.72it/s] 90%|█████████ | 334965/371472 [4:29:10<2:41:33, 3.77it/s] 90%|█████████ | 334966/371472 [4:29:11<2:39:41, 3.81it/s] 90%|█████████ | 334967/371472 [4:29:11<2:45:14, 3.68it/s] 90%|█████████ | 334968/371472 [4:29:11<2:48:46, 3.60it/s] 90%|█████████ | 334969/371472 [4:29:12<2:57:36, 3.43it/s] 90%|█████████ | 334970/371472 [4:29:12<2:59:45, 3.38it/s] 90%|█████████ | 334971/371472 [4:29:12<2:52:14, 3.53it/s] 90%|█████████ | 334972/371472 [4:29:12<2:54:53, 3.48it/s] 90%|█████████ | 334973/371472 [4:29:13<2:48:09, 3.62it/s] 90%|█████████ | 334974/371472 [4:29:13<2:48:31, 3.61it/s] 90%|█████████ | 334975/371472 [4:29:13<2:48:55, 3.60it/s] 90%|█████████ | 334976/371472 [4:29:14<2:54:41, 3.48it/s] 90%|█████████ | 334977/371472 [4:29:14<2:50:31, 3.57it/s] 90%|█████████ | 334978/371472 [4:29:14<2:54:26, 3.49it/s] 90%|█████████ | 334979/371472 [4:29:14<2:56:41, 3.44it/s] 90%|█████████ | 334980/371472 [4:29:15<2:50:48, 3.56it/s] {'loss': 2.64, 'learning_rate': 1.8846021245879032e-07, 'epoch': 14.43} + 90%|█████████ | 334980/371472 [4:29:15<2:50:48, 3.56it/s] 90%|█████████ | 334981/371472 [4:29:15<2:46:12, 3.66it/s] 90%|█████████ | 334982/371472 [4:29:15<2:54:10, 3.49it/s] 90%|█████████ | 334983/371472 [4:29:16<3:04:58, 3.29it/s] 90%|█████████ | 334984/371472 [4:29:16<3:09:13, 3.21it/s] 90%|█████████ | 334985/371472 [4:29:16<3:19:12, 3.05it/s] 90%|█████████ | 334986/371472 [4:29:17<3:17:29, 3.08it/s] 90%|█████████ | 334987/371472 [4:29:17<3:11:13, 3.18it/s] 90%|█████████ | 334988/371472 [4:29:17<3:01:12, 3.36it/s] 90%|█████████ | 334989/371472 [4:29:17<2:55:52, 3.46it/s] 90%|█████████ | 334990/371472 [4:29:18<2:45:19, 3.68it/s] 90%|█████████ | 334991/371472 [4:29:18<3:02:31, 3.33it/s] 90%|█████████ | 334992/371472 [4:29:18<3:03:24, 3.32it/s] 90%|█████████ | 334993/371472 [4:29:19<2:55:39, 3.46it/s] 90%|█████████ | 334994/371472 [4:29:19<2:51:37, 3.54it/s] 90%|█████████ | 334995/371472 [4:29:19<2:45:23, 3.68it/s] 90%|█████████ | 334996/371472 [4:29:19<2:43:01, 3.73it/s] 90%|█████████ | 334997/371472 [4:29:20<2:41:12, 3.77it/s] 90%|█████████ | 334998/371472 [4:29:20<2:41:01, 3.78it/s] 90%|█████████ | 334999/371472 [4:29:20<2:51:03, 3.55it/s] 90%|█████████ | 335000/371472 [4:29:20<2:43:48, 3.71it/s] {'loss': 2.5165, 'learning_rate': 1.884117304833114e-07, 'epoch': 14.43} + 90%|█████████ | 335000/371472 [4:29:20<2:43:48, 3.71it/s] 90%|█████████ | 335001/371472 [4:29:21<2:49:09, 3.59it/s] 90%|█████████ | 335002/371472 [4:29:21<2:43:53, 3.71it/s] 90%|█████████ | 335003/371472 [4:29:21<2:52:13, 3.53it/s] 90%|█████████ | 335004/371472 [4:29:22<2:47:06, 3.64it/s] 90%|█████████ | 335005/371472 [4:29:22<2:47:27, 3.63it/s] 90%|█████████ | 335006/371472 [4:29:22<2:39:14, 3.82it/s] 90%|█████████ | 335007/371472 [4:29:22<2:36:40, 3.88it/s] 90%|█████████ | 335008/371472 [4:29:23<2:40:02, 3.80it/s] 90%|█████████ | 335009/371472 [4:29:23<2:44:44, 3.69it/s] 90%|█████████ | 335010/371472 [4:29:23<2:40:14, 3.79it/s] 90%|█████████ | 335011/371472 [4:29:23<2:41:49, 3.76it/s] 90%|█████████ | 335012/371472 [4:29:24<2:54:23, 3.48it/s] 90%|█████████ | 335013/371472 [4:29:24<2:51:14, 3.55it/s] 90%|█████████ | 335014/371472 [4:29:24<2:52:35, 3.52it/s] 90%|█████████ | 335015/371472 [4:29:25<2:48:03, 3.62it/s] 90%|█████████ | 335016/371472 [4:29:25<2:50:18, 3.57it/s] 90%|█████████ | 335017/371472 [4:29:25<2:51:08, 3.55it/s] 90%|█████████ | 335018/371472 [4:29:25<3:01:19, 3.35it/s] 90%|█████████ | 335019/371472 [4:29:26<2:55:31, 3.46it/s] 90%|█████████ | 335020/371472 [4:29:26<3:01:16, 3.35it/s] {'loss': 2.6189, 'learning_rate': 1.8836324850783255e-07, 'epoch': 14.43} + 90%|█████████ | 335020/371472 [4:29:26<3:01:16, 3.35it/s] 90%|█████████ | 335021/371472 [4:29:26<3:03:12, 3.32it/s] 90%|█████████ | 335022/371472 [4:29:27<2:55:32, 3.46it/s] 90%|█████████ | 335023/371472 [4:29:27<2:49:37, 3.58it/s] 90%|█████████ | 335024/371472 [4:29:27<3:03:32, 3.31it/s] 90%|█████████ | 335025/371472 [4:29:28<2:58:16, 3.41it/s] 90%|█████████ | 335026/371472 [4:29:28<2:57:04, 3.43it/s] 90%|█████████ | 335027/371472 [4:29:28<2:54:32, 3.48it/s] 90%|█████████ | 335028/371472 [4:29:28<3:06:21, 3.26it/s] 90%|█████████ | 335029/371472 [4:29:29<2:56:54, 3.43it/s] 90%|█████████ | 335030/371472 [4:29:29<2:59:04, 3.39it/s] 90%|█████████ | 335031/371472 [4:29:29<2:59:10, 3.39it/s] 90%|█████████ | 335032/371472 [4:29:30<2:55:33, 3.46it/s] 90%|█████████ | 335033/371472 [4:29:30<2:49:45, 3.58it/s] 90%|█████████ | 335034/371472 [4:29:30<2:54:07, 3.49it/s] 90%|█████████ | 335035/371472 [4:29:30<2:53:54, 3.49it/s] 90%|█████████ | 335036/371472 [4:29:31<2:49:13, 3.59it/s] 90%|█████████ | 335037/371472 [4:29:31<2:59:24, 3.38it/s] 90%|█████████ | 335038/371472 [4:29:31<3:03:11, 3.31it/s] 90%|█████████ | 335039/371472 [4:29:32<2:59:14, 3.39it/s] 90%|█████████ | 335040/371472 [4:29:32<2:54:52, 3.47it/s] {'loss': 2.6228, 'learning_rate': 1.883147665323536e-07, 'epoch': 14.43} + 90%|█████████ | 335040/371472 [4:29:32<2:54:52, 3.47it/s] 90%|█████████ | 335041/371472 [4:29:32<3:00:43, 3.36it/s] 90%|█████████ | 335042/371472 [4:29:32<2:54:57, 3.47it/s] 90%|█████████ | 335043/371472 [4:29:33<2:48:35, 3.60it/s] 90%|█████████ | 335044/371472 [4:29:33<2:53:25, 3.50it/s] 90%|█████████ | 335045/371472 [4:29:33<2:53:56, 3.49it/s] 90%|█████████ | 335046/371472 [4:29:34<2:56:23, 3.44it/s] 90%|█████████ | 335047/371472 [4:29:34<3:00:05, 3.37it/s] 90%|█████████ | 335048/371472 [4:29:34<3:06:10, 3.26it/s] 90%|█████████ | 335049/371472 [4:29:35<3:02:46, 3.32it/s] 90%|█████████ | 335050/371472 [4:29:35<3:09:16, 3.21it/s] 90%|█████████ | 335051/371472 [4:29:35<2:59:05, 3.39it/s] 90%|█████████ | 335052/371472 [4:29:35<2:51:57, 3.53it/s] 90%|█████████ | 335053/371472 [4:29:36<2:43:59, 3.70it/s] 90%|█████████ | 335054/371472 [4:29:36<2:44:01, 3.70it/s] 90%|█████████ | 335055/371472 [4:29:36<2:47:24, 3.63it/s] 90%|█████████ | 335056/371472 [4:29:36<2:40:19, 3.79it/s] 90%|█████████ | 335057/371472 [4:29:37<2:41:11, 3.77it/s] 90%|█████████ | 335058/371472 [4:29:37<2:41:56, 3.75it/s] 90%|█████████ | 335059/371472 [4:29:37<2:42:21, 3.74it/s] 90%|█████████ | 335060/371472 [4:29:38<2:41:41, 3.75it/s] {'loss': 2.6561, 'learning_rate': 1.8826628455687477e-07, 'epoch': 14.43} + 90%|█████████ | 335060/371472 [4:29:38<2:41:41, 3.75it/s] 90%|█████████ | 335061/371472 [4:29:38<2:37:15, 3.86it/s] 90%|█████████ | 335062/371472 [4:29:38<2:45:26, 3.67it/s] 90%|█████████ | 335063/371472 [4:29:38<3:02:06, 3.33it/s] 90%|█████████ | 335064/371472 [4:29:39<3:09:10, 3.21it/s] 90%|█████████ | 335065/371472 [4:29:39<2:55:46, 3.45it/s] 90%|█████████ | 335066/371472 [4:29:39<3:00:41, 3.36it/s] 90%|█████████ | 335067/371472 [4:29:40<2:52:10, 3.52it/s] 90%|█████████ | 335068/371472 [4:29:40<2:48:22, 3.60it/s] 90%|█████████ | 335069/371472 [4:29:40<2:52:10, 3.52it/s] 90%|█████████ | 335070/371472 [4:29:40<3:02:50, 3.32it/s] 90%|█████████ | 335071/371472 [4:29:41<2:57:56, 3.41it/s] 90%|█████████ | 335072/371472 [4:29:41<2:59:17, 3.38it/s] 90%|█████████ | 335073/371472 [4:29:41<2:51:44, 3.53it/s] 90%|█████████ | 335074/371472 [4:29:42<2:47:51, 3.61it/s] 90%|█████████ | 335075/371472 [4:29:42<2:48:42, 3.60it/s] 90%|█████████ | 335076/371472 [4:29:42<2:47:40, 3.62it/s] 90%|█████████ | 335077/371472 [4:29:42<2:52:57, 3.51it/s] 90%|█████████ | 335078/371472 [4:29:43<2:48:33, 3.60it/s] 90%|█████████ | 335079/371472 [4:29:43<2:45:18, 3.67it/s] 90%|█████████ | 335080/371472 [4:29:43<2:39:58, 3.79it/s] {'loss': 2.4965, 'learning_rate': 1.8821780258139584e-07, 'epoch': 14.43} + 90%|█████████ | 335080/371472 [4:29:43<2:39:58, 3.79it/s] 90%|█████████ | 335081/371472 [4:29:43<2:48:16, 3.60it/s] 90%|█████████ | 335082/371472 [4:29:44<2:47:34, 3.62it/s] 90%|█████████ | 335083/371472 [4:29:44<2:55:13, 3.46it/s] 90%|█████████ | 335084/371472 [4:29:44<2:58:55, 3.39it/s] 90%|█████████ | 335085/371472 [4:29:45<2:51:52, 3.53it/s] 90%|█████████ | 335086/371472 [4:29:45<2:53:54, 3.49it/s] 90%|█████████ | 335087/371472 [4:29:45<2:59:15, 3.38it/s] 90%|█████████ | 335088/371472 [4:29:46<2:50:11, 3.56it/s] 90%|█████████ | 335089/371472 [4:29:46<3:01:07, 3.35it/s] 90%|█████████ | 335090/371472 [4:29:46<2:59:41, 3.37it/s] 90%|█████████ | 335091/371472 [4:29:46<2:52:41, 3.51it/s] 90%|█████████ | 335092/371472 [4:29:47<3:00:15, 3.36it/s] 90%|█████████ | 335093/371472 [4:29:47<2:59:46, 3.37it/s] 90%|█████████ | 335094/371472 [4:29:47<2:53:58, 3.49it/s] 90%|█████████ | 335095/371472 [4:29:48<2:56:46, 3.43it/s] 90%|█████████ | 335096/371472 [4:29:48<2:58:50, 3.39it/s] 90%|█████████ | 335097/371472 [4:29:48<3:00:29, 3.36it/s] 90%|█████████ | 335098/371472 [4:29:48<2:51:45, 3.53it/s] 90%|█████████ | 335099/371472 [4:29:49<2:46:38, 3.64it/s] 90%|█████████ | 335100/371472 [4:29:49<2:49:35, 3.57it/s] {'loss': 2.7071, 'learning_rate': 1.8816932060591696e-07, 'epoch': 14.43} + 90%|█████████ | 335100/371472 [4:29:49<2:49:35, 3.57it/s] 90%|█████████ | 335101/371472 [4:29:49<2:51:19, 3.54it/s] 90%|█████████ | 335102/371472 [4:29:50<2:55:32, 3.45it/s] 90%|█████████ | 335103/371472 [4:29:50<2:49:58, 3.57it/s] 90%|█████████ | 335104/371472 [4:29:50<2:45:11, 3.67it/s] 90%|█████████ | 335105/371472 [4:29:50<2:46:38, 3.64it/s] 90%|█████████ | 335106/371472 [4:29:51<2:46:53, 3.63it/s] 90%|█████████ | 335107/371472 [4:29:51<2:45:27, 3.66it/s] 90%|█████████ | 335108/371472 [4:29:51<2:48:54, 3.59it/s] 90%|█████████ | 335109/371472 [4:29:51<2:49:22, 3.58it/s] 90%|█████████ | 335110/371472 [4:29:52<2:58:27, 3.40it/s] 90%|█████████ | 335111/371472 [4:29:52<2:55:45, 3.45it/s] 90%|█████████ | 335112/371472 [4:29:52<2:59:55, 3.37it/s] 90%|█████████ | 335113/371472 [4:29:53<3:23:18, 2.98it/s] 90%|█████████ | 335114/371472 [4:29:53<3:09:00, 3.21it/s] 90%|█████████ | 335115/371472 [4:29:53<3:08:48, 3.21it/s] 90%|█████████ | 335116/371472 [4:29:54<3:07:23, 3.23it/s] 90%|█████████ | 335117/371472 [4:29:54<3:02:18, 3.32it/s] 90%|█████████ | 335118/371472 [4:29:54<2:59:47, 3.37it/s] 90%|█████████ | 335119/371472 [4:29:55<2:54:50, 3.47it/s] 90%|█████████ | 335120/371472 [4:29:55<2:50:08, 3.56it/s] {'loss': 2.5633, 'learning_rate': 1.8812083863043804e-07, 'epoch': 14.43} + 90%|█████████ | 335120/371472 [4:29:55<2:50:08, 3.56it/s] 90%|█████████ | 335121/371472 [4:29:55<2:56:01, 3.44it/s] 90%|█████████ | 335122/371472 [4:29:55<2:49:39, 3.57it/s] 90%|█████████ | 335123/371472 [4:29:56<3:03:48, 3.30it/s] 90%|█████████ | 335124/371472 [4:29:56<2:58:31, 3.39it/s] 90%|█████████ | 335125/371472 [4:29:56<2:57:02, 3.42it/s] 90%|█████████ | 335126/371472 [4:29:57<3:04:56, 3.28it/s] 90%|█████████ | 335127/371472 [4:29:57<2:58:03, 3.40it/s] 90%|█████████ | 335128/371472 [4:29:57<2:49:02, 3.58it/s] 90%|█████████ | 335129/371472 [4:29:57<2:49:19, 3.58it/s] 90%|█████████ | 335130/371472 [4:29:58<2:50:50, 3.55it/s] 90%|█████████ | 335131/371472 [4:29:58<2:45:14, 3.67it/s] 90%|█████████ | 335132/371472 [4:29:58<2:46:28, 3.64it/s] 90%|█████████ | 335133/371472 [4:29:59<2:47:00, 3.63it/s] 90%|█████████ | 335134/371472 [4:29:59<2:45:08, 3.67it/s] 90%|█████████ | 335135/371472 [4:29:59<2:48:01, 3.60it/s] 90%|█████████ | 335136/371472 [4:29:59<2:45:53, 3.65it/s] 90%|█████████ | 335137/371472 [4:30:00<2:50:33, 3.55it/s] 90%|█████████ | 335138/371472 [4:30:00<2:44:27, 3.68it/s] 90%|█████���███ | 335139/371472 [4:30:00<2:43:43, 3.70it/s] 90%|█████████ | 335140/371472 [4:30:00<2:41:49, 3.74it/s] {'loss': 2.5293, 'learning_rate': 1.880723566549592e-07, 'epoch': 14.44} + 90%|█████████ | 335140/371472 [4:30:00<2:41:49, 3.74it/s] 90%|█████████ | 335141/371472 [4:30:01<3:49:51, 2.63it/s] 90%|█████████ | 335142/371472 [4:30:01<3:32:11, 2.85it/s] 90%|█████████ | 335143/371472 [4:30:02<3:11:53, 3.16it/s] 90%|█████████ | 335144/371472 [4:30:02<3:03:16, 3.30it/s] 90%|█████████ | 335145/371472 [4:30:02<3:11:55, 3.15it/s] 90%|█████████ | 335146/371472 [4:30:02<3:02:05, 3.32it/s] 90%|█████████ | 335147/371472 [4:30:03<3:00:02, 3.36it/s] 90%|█████████ | 335148/371472 [4:30:03<3:04:21, 3.28it/s] 90%|█████████ | 335149/371472 [4:30:03<2:54:04, 3.48it/s] 90%|█████████ | 335150/371472 [4:30:04<2:49:43, 3.57it/s] 90%|█████████ | 335151/371472 [4:30:04<2:45:49, 3.65it/s] 90%|█████████ | 335152/371472 [4:30:04<2:43:53, 3.69it/s] 90%|█████████ | 335153/371472 [4:30:04<2:48:08, 3.60it/s] 90%|█████████ | 335154/371472 [4:30:05<2:54:08, 3.48it/s] 90%|█████████ | 335155/371472 [4:30:05<2:51:27, 3.53it/s] 90%|█████████ | 335156/371472 [4:30:05<2:49:26, 3.57it/s] 90%|█████████ | 335157/371472 [4:30:06<2:46:39, 3.63it/s] 90%|█████████ | 335158/371472 [4:30:06<2:39:50, 3.79it/s] 90%|█████████ | 335159/371472 [4:30:06<2:48:22, 3.59it/s] 90%|█████████ | 335160/371472 [4:30:06<2:50:24, 3.55it/s] {'loss': 2.7117, 'learning_rate': 1.8802387467948026e-07, 'epoch': 14.44} + 90%|█████████ | 335160/371472 [4:30:06<2:50:24, 3.55it/s] 90%|█████████ | 335161/371472 [4:30:07<2:56:15, 3.43it/s] 90%|█████████ | 335162/371472 [4:30:07<2:57:20, 3.41it/s] 90%|█████████ | 335163/371472 [4:30:07<3:04:08, 3.29it/s] 90%|█████████ | 335164/371472 [4:30:08<2:55:12, 3.45it/s] 90%|█████████ | 335165/371472 [4:30:08<2:49:42, 3.57it/s] 90%|█████████ | 335166/371472 [4:30:08<2:45:13, 3.66it/s] 90%|█████████ | 335167/371472 [4:30:08<2:46:00, 3.65it/s] 90%|█████████ | 335168/371472 [4:30:09<2:44:02, 3.69it/s] 90%|█████████ | 335169/371472 [4:30:09<2:45:25, 3.66it/s] 90%|█████████ | 335170/371472 [4:30:09<2:43:31, 3.70it/s] 90%|█████████ | 335171/371472 [4:30:09<2:39:50, 3.79it/s] 90%|█████████ | 335172/371472 [4:30:10<2:52:40, 3.50it/s] 90%|█████████ | 335173/371472 [4:30:10<2:59:20, 3.37it/s] 90%|█████████ | 335174/371472 [4:30:10<3:01:19, 3.34it/s] 90%|█████████ | 335175/371472 [4:30:11<2:57:17, 3.41it/s] 90%|█████████ | 335176/371472 [4:30:11<2:51:08, 3.53it/s] 90%|█████████ | 335177/371472 [4:30:11<2:50:17, 3.55it/s] 90%|█████████ | 335178/371472 [4:30:11<2:48:50, 3.58it/s] 90%|█████████ | 335179/371472 [4:30:12<2:53:56, 3.48it/s] 90%|█████████ | 335180/371472 [4:30:12<3:10:08, 3.18it/s] {'loss': 2.6234, 'learning_rate': 1.8797539270400138e-07, 'epoch': 14.44} + 90%|█████████ | 335180/371472 [4:30:12<3:10:08, 3.18it/s] 90%|█████████ | 335181/371472 [4:30:12<3:01:33, 3.33it/s] 90%|█████████ | 335182/371472 [4:30:13<2:58:16, 3.39it/s] 90%|█████████ | 335183/371472 [4:30:13<2:47:47, 3.60it/s] 90%|█████████ | 335184/371472 [4:30:13<2:56:38, 3.42it/s] 90%|█████████ | 335185/371472 [4:30:14<2:52:12, 3.51it/s] 90%|█████████ | 335186/371472 [4:30:14<2:54:20, 3.47it/s] 90%|█████████ | 335187/371472 [4:30:14<2:50:15, 3.55it/s] 90%|█████████ | 335188/371472 [4:30:14<2:48:37, 3.59it/s] 90%|█████████ | 335189/371472 [4:30:15<2:54:34, 3.46it/s] 90%|█████████ | 335190/371472 [4:30:15<2:54:23, 3.47it/s] 90%|█████████ | 335191/371472 [4:30:15<3:15:30, 3.09it/s] 90%|█████████ | 335192/371472 [4:30:16<3:08:45, 3.20it/s] 90%|█████████ | 335193/371472 [4:30:16<2:57:44, 3.40it/s] 90%|█████████ | 335194/371472 [4:30:16<2:57:16, 3.41it/s] 90%|█████████ | 335195/371472 [4:30:16<2:52:13, 3.51it/s] 90%|█████████ | 335196/371472 [4:30:17<2:47:54, 3.60it/s] 90%|█████████ | 335197/371472 [4:30:17<2:44:14, 3.68it/s] 90%|█████████ | 335198/371472 [4:30:17<2:42:33, 3.72it/s] 90%|█████████ | 335199/371472 [4:30:18<2:43:55, 3.69it/s] 90%|█████████ | 335200/371472 [4:30:18<2:46:18, 3.64it/s] {'loss': 2.4454, 'learning_rate': 1.8792691072852248e-07, 'epoch': 14.44} + 90%|█████████ | 335200/371472 [4:30:18<2:46:18, 3.64it/s] 90%|█████████ | 335201/371472 [4:30:18<2:47:51, 3.60it/s] 90%|█████████ | 335202/371472 [4:30:18<2:42:31, 3.72it/s] 90%|█████████ | 335203/371472 [4:30:19<2:49:03, 3.58it/s] 90%|█████████ | 335204/371472 [4:30:19<2:51:02, 3.53it/s] 90%|█████████ | 335205/371472 [4:30:19<2:47:04, 3.62it/s] 90%|█████████ | 335206/371472 [4:30:19<2:43:50, 3.69it/s] 90%|█████████ | 335207/371472 [4:30:20<3:02:04, 3.32it/s] 90%|█████████ | 335208/371472 [4:30:20<3:00:50, 3.34it/s] 90%|█████████ | 335209/371472 [4:30:20<3:01:29, 3.33it/s] 90%|█████████ | 335210/371472 [4:30:21<2:55:14, 3.45it/s] 90%|█████████ | 335211/371472 [4:30:21<3:00:10, 3.35it/s] 90%|█████████ | 335212/371472 [4:30:21<2:51:48, 3.52it/s] 90%|█████████ | 335213/371472 [4:30:22<2:51:29, 3.52it/s] 90%|█████████ | 335214/371472 [4:30:22<2:49:50, 3.56it/s] 90%|█████████ | 335215/371472 [4:30:22<2:48:19, 3.59it/s] 90%|█████████ | 335216/371472 [4:30:22<2:49:43, 3.56it/s] 90%|█████████ | 335217/371472 [4:30:23<2:47:27, 3.61it/s] 90%|█████████ | 335218/371472 [4:30:23<2:43:09, 3.70it/s] 90%|█████████ | 335219/371472 [4:30:23<2:46:27, 3.63it/s] 90%|█████████ | 335220/371472 [4:30:23<2:45:42, 3.65it/s] {'loss': 2.5429, 'learning_rate': 1.8787842875304363e-07, 'epoch': 14.44} + 90%|█████████ | 335220/371472 [4:30:23<2:45:42, 3.65it/s] 90%|█████████ | 335221/371472 [4:30:24<2:46:31, 3.63it/s] 90%|█████████ | 335222/371472 [4:30:24<2:43:59, 3.68it/s] 90%|█████████ | 335223/371472 [4:30:24<2:50:31, 3.54it/s] 90%|█████████ | 335224/371472 [4:30:25<2:48:46, 3.58it/s] 90%|█████████ | 335225/371472 [4:30:25<2:47:12, 3.61it/s] 90%|█████████ | 335226/371472 [4:30:25<2:49:50, 3.56it/s] 90%|█████████ | 335227/371472 [4:30:25<2:52:08, 3.51it/s] 90%|█████████ | 335228/371472 [4:30:26<2:51:49, 3.52it/s] 90%|█████████ | 335229/371472 [4:30:26<2:59:57, 3.36it/s] 90%|█████████ | 335230/371472 [4:30:26<2:55:28, 3.44it/s] 90%|█████████ | 335231/371472 [4:30:27<2:53:16, 3.49it/s] 90%|█████████ | 335232/371472 [4:30:27<3:03:23, 3.29it/s] 90%|█████████ | 335233/371472 [4:30:27<2:57:50, 3.40it/s] 90%|█████████ | 335234/371472 [4:30:28<3:04:41, 3.27it/s] 90%|█████████ | 335235/371472 [4:30:28<3:03:20, 3.29it/s] 90%|█████████ | 335236/371472 [4:30:28<2:56:47, 3.42it/s] 90%|█████████ | 335237/371472 [4:30:28<2:52:27, 3.50it/s] 90%|█████████ | 335238/371472 [4:30:29<2:53:44, 3.48it/s] 90%|█████████ | 335239/371472 [4:30:29<2:51:30, 3.52it/s] 90%|█████████ | 335240/371472 [4:30:29<2:50:28, 3.54it/s] {'loss': 2.5376, 'learning_rate': 1.8782994677756468e-07, 'epoch': 14.44} + 90%|█████████ | 335240/371472 [4:30:29<2:50:28, 3.54it/s] 90%|█████████ | 335241/371472 [4:30:30<2:58:35, 3.38it/s] 90%|█████████ | 335242/371472 [4:30:30<2:54:02, 3.47it/s] 90%|█████████ | 335243/371472 [4:30:30<2:55:19, 3.44it/s] 90%|█████████ | 335244/371472 [4:30:30<2:56:00, 3.43it/s] 90%|█████████ | 335245/371472 [4:30:31<2:56:04, 3.43it/s] 90%|█████████ | 335246/371472 [4:30:31<3:12:18, 3.14it/s] 90%|█████████ | 335247/371472 [4:30:31<3:00:20, 3.35it/s] 90%|█████████ | 335248/371472 [4:30:32<2:58:36, 3.38it/s] 90%|█████████ | 335249/371472 [4:30:32<2:57:19, 3.40it/s] 90%|█████████ | 335250/371472 [4:30:32<3:01:45, 3.32it/s] 90%|█████████ | 335251/371472 [4:30:32<2:56:15, 3.42it/s] 90%|█████████ | 335252/371472 [4:30:33<2:51:02, 3.53it/s] 90%|█████████ | 335253/371472 [4:30:33<2:47:33, 3.60it/s] 90%|█████████ | 335254/371472 [4:30:33<2:53:27, 3.48it/s] 90%|█████████ | 335255/371472 [4:30:34<2:48:40, 3.58it/s] 90%|█████████ | 335256/371472 [4:30:34<2:44:44, 3.66it/s] 90%|█████████ | 335257/371472 [4:30:34<2:43:19, 3.70it/s] 90%|█████████ | 335258/371472 [4:30:34<2:46:49, 3.62it/s] 90%|█████████ | 335259/371472 [4:30:35<2:53:47, 3.47it/s] 90%|█████████ | 335260/371472 [4:30:35<2:55:45, 3.43it/s] {'loss': 2.4945, 'learning_rate': 1.8778146480208585e-07, 'epoch': 14.44} + 90%|█████████ | 335260/371472 [4:30:35<2:55:45, 3.43it/s] 90%|█████████ | 335261/371472 [4:30:35<2:52:48, 3.49it/s] 90%|█████████ | 335262/371472 [4:30:36<2:44:55, 3.66it/s] 90%|█████████ | 335263/371472 [4:30:36<2:50:42, 3.54it/s] 90%|█████████ | 335264/371472 [4:30:36<2:53:57, 3.47it/s] 90%|█████████ | 335265/371472 [4:30:36<2:53:20, 3.48it/s] 90%|█████████ | 335266/371472 [4:30:37<2:49:13, 3.57it/s] 90%|█████████ | 335267/371472 [4:30:37<2:51:28, 3.52it/s] 90%|█████████ | 335268/371472 [4:30:37<3:03:42, 3.28it/s] 90%|█████████ | 335269/371472 [4:30:38<2:55:32, 3.44it/s] 90%|█████████ | 335270/371472 [4:30:38<2:59:51, 3.35it/s] 90%|█████████ | 335271/371472 [4:30:38<2:54:37, 3.45it/s] 90%|█████████ | 335272/371472 [4:30:38<2:59:25, 3.36it/s] 90%|█████████ | 335273/371472 [4:30:39<2:55:20, 3.44it/s] 90%|█████████ | 335274/371472 [4:30:39<3:02:05, 3.31it/s] 90%|█████████ | 335275/371472 [4:30:39<2:54:42, 3.45it/s] 90%|█████████ | 335276/371472 [4:30:40<2:53:45, 3.47it/s] 90%|█████████ | 335277/371472 [4:30:40<2:48:40, 3.58it/s] 90%|█████████ | 335278/371472 [4:30:40<2:52:08, 3.50it/s] 90%|█████████ | 335279/371472 [4:30:40<2:48:37, 3.58it/s] 90%|█████████ | 335280/371472 [4:30:41<2:45:24, 3.65it/s] {'loss': 2.7709, 'learning_rate': 1.877329828266069e-07, 'epoch': 14.44} + 90%|█████████ | 335280/371472 [4:30:41<2:45:24, 3.65it/s] 90%|█████████ | 335281/371472 [4:30:41<2:44:19, 3.67it/s] 90%|█████████ | 335282/371472 [4:30:41<2:54:37, 3.45it/s] 90%|█████████ | 335283/371472 [4:30:42<2:52:34, 3.50it/s] 90%|█████████ | 335284/371472 [4:30:42<2:47:36, 3.60it/s] 90%|█████████ | 335285/371472 [4:30:42<2:45:45, 3.64it/s] 90%|█████████ | 335286/371472 [4:30:42<2:44:08, 3.67it/s] 90%|█████████ | 335287/371472 [4:30:43<2:52:56, 3.49it/s] 90%|█████████ | 335288/371472 [4:30:43<3:02:20, 3.31it/s] 90%|█████████ | 335289/371472 [4:30:43<2:51:27, 3.52it/s] 90%|█████████ | 335290/371472 [4:30:44<2:50:18, 3.54it/s] 90%|█████████ | 335291/371472 [4:30:44<2:46:31, 3.62it/s] 90%|█████████ | 335292/371472 [4:30:44<3:11:06, 3.16it/s] 90%|█████████ | 335293/371472 [4:30:45<3:05:48, 3.25it/s] 90%|█████████ | 335294/371472 [4:30:45<3:02:54, 3.30it/s] 90%|█████████ | 335295/371472 [4:30:45<3:09:52, 3.18it/s] 90%|█████████ | 335296/371472 [4:30:45<3:02:43, 3.30it/s] 90%|█████████ | 335297/371472 [4:30:46<3:02:58, 3.29it/s] 90%|█████████ | 335298/371472 [4:30:46<3:07:26, 3.22it/s] 90%|█████████ | 335299/371472 [4:30:46<2:54:57, 3.45it/s] 90%|█████████ | 335300/371472 [4:30:47<2:48:44, 3.57it/s] {'loss': 2.6673, 'learning_rate': 1.8768450085112794e-07, 'epoch': 14.44} + 90%|█████████ | 335300/371472 [4:30:47<2:48:44, 3.57it/s] 90%|█████████ | 335301/371472 [4:30:47<2:46:34, 3.62it/s] 90%|█████████ | 335302/371472 [4:30:47<2:47:57, 3.59it/s] 90%|█████████ | 335303/371472 [4:30:47<2:51:53, 3.51it/s] 90%|█████████ | 335304/371472 [4:30:48<2:49:42, 3.55it/s] 90%|█████████ | 335305/371472 [4:30:48<2:49:50, 3.55it/s] 90%|█████████ | 335306/371472 [4:30:48<2:54:53, 3.45it/s] 90%|█████████ | 335307/371472 [4:30:49<3:03:25, 3.29it/s] 90%|█████████ | 335308/371472 [4:30:49<2:57:42, 3.39it/s] 90%|█████████ | 335309/371472 [4:30:49<2:47:04, 3.61it/s] 90%|█████████ | 335310/371472 [4:30:49<2:57:51, 3.39it/s] 90%|█████████ | 335311/371472 [4:30:50<3:00:19, 3.34it/s] 90%|█████████ | 335312/371472 [4:30:50<2:56:41, 3.41it/s] 90%|█████████ | 335313/371472 [4:30:50<2:52:47, 3.49it/s] 90%|█████████ | 335314/371472 [4:30:51<2:57:01, 3.40it/s] 90%|█████████ | 335315/371472 [4:30:51<2:53:41, 3.47it/s] 90%|█████████ | 335316/371472 [4:30:51<2:57:40, 3.39it/s] 90%|█████████ | 335317/371472 [4:30:52<3:05:25, 3.25it/s] 90%|█████████ | 335318/371472 [4:30:52<2:57:03, 3.40it/s] 90%|█████████ | 335319/371472 [4:30:52<2:52:31, 3.49it/s] 90%|█████████ | 335320/371472 [4:30:52<2:48:52, 3.57it/s] {'loss': 2.6634, 'learning_rate': 1.8763601887564912e-07, 'epoch': 14.44} + 90%|█████████ | 335320/371472 [4:30:52<2:48:52, 3.57it/s] 90%|█████████ | 335321/371472 [4:30:53<2:49:30, 3.55it/s] 90%|█████████ | 335322/371472 [4:30:53<2:45:23, 3.64it/s] 90%|█████████ | 335323/371472 [4:30:53<2:49:05, 3.56it/s] 90%|█████████ | 335324/371472 [4:30:53<2:46:43, 3.61it/s] 90%|█████████ | 335325/371472 [4:30:54<2:45:36, 3.64it/s] 90%|█████████ | 335326/371472 [4:30:54<2:43:36, 3.68it/s] 90%|█████████ | 335327/371472 [4:30:54<2:39:15, 3.78it/s] 90%|█████████ | 335328/371472 [4:30:55<2:45:50, 3.63it/s] 90%|█████████ | 335329/371472 [4:30:55<2:40:55, 3.74it/s] 90%|█████████ | 335330/371472 [4:30:55<2:44:04, 3.67it/s] 90%|█████████ | 335331/371472 [4:30:55<2:42:39, 3.70it/s] 90%|█████████ | 335332/371472 [4:30:56<2:40:58, 3.74it/s] 90%|█████████ | 335333/371472 [4:30:56<2:36:40, 3.84it/s] 90%|█████████ | 335334/371472 [4:30:56<2:43:39, 3.68it/s] 90%|█████████ | 335335/371472 [4:30:56<2:49:33, 3.55it/s] 90%|█████████ | 335336/371472 [4:30:57<2:48:23, 3.58it/s] 90%|█████████ | 335337/371472 [4:30:57<2:47:12, 3.60it/s] 90%|█████████ | 335338/371472 [4:30:57<2:46:02, 3.63it/s] 90%|█████████ | 335339/371472 [4:30:58<2:48:20, 3.58it/s] 90%|█████████ | 335340/371472 [4:30:58<2:50:54, 3.52it/s] {'loss': 2.6563, 'learning_rate': 1.8758753690017017e-07, 'epoch': 14.44} + 90%|█████████ | 335340/371472 [4:30:58<2:50:54, 3.52it/s] 90%|█████████ | 335341/371472 [4:30:58<2:45:30, 3.64it/s] 90%|█████████ | 335342/371472 [4:30:58<2:42:15, 3.71it/s] 90%|█████████ | 335343/371472 [4:30:59<2:42:46, 3.70it/s] 90%|█████████ | 335344/371472 [4:30:59<2:40:32, 3.75it/s] 90%|█████████ | 335345/371472 [4:30:59<2:41:40, 3.72it/s] 90%|█████████ | 335346/371472 [4:30:59<2:36:52, 3.84it/s] 90%|█████████ | 335347/371472 [4:31:00<2:41:12, 3.73it/s] 90%|█████████ | 335348/371472 [4:31:00<2:41:13, 3.73it/s] 90%|█████████ | 335349/371472 [4:31:00<2:44:30, 3.66it/s] 90%|█████████ | 335350/371472 [4:31:01<2:51:17, 3.51it/s] 90%|█████████ | 335351/371472 [4:31:01<2:42:42, 3.70it/s] 90%|█████████ | 335352/371472 [4:31:01<2:42:59, 3.69it/s] 90%|█████████ | 335353/371472 [4:31:01<2:44:26, 3.66it/s] 90%|█████████ | 335354/371472 [4:31:02<2:45:14, 3.64it/s] 90%|█████████ | 335355/371472 [4:31:02<2:57:32, 3.39it/s] 90%|█████████ | 335356/371472 [4:31:02<3:15:16, 3.08it/s] 90%|█████████ | 335357/371472 [4:31:03<3:09:59, 3.17it/s] 90%|█████████ | 335358/371472 [4:31:03<3:00:26, 3.34it/s] 90%|█████████ | 335359/371472 [4:31:03<3:02:50, 3.29it/s] 90%|█████████ | 335360/371472 [4:31:04<3:01:46, 3.31it/s] {'loss': 2.6622, 'learning_rate': 1.8753905492469132e-07, 'epoch': 14.44} + 90%|█████████ | 335360/371472 [4:31:04<3:01:46, 3.31it/s] 90%|█████████ | 335361/371472 [4:31:04<2:54:55, 3.44it/s] 90%|█████████ | 335362/371472 [4:31:04<2:53:10, 3.48it/s] 90%|█████████ | 335363/371472 [4:31:04<2:55:36, 3.43it/s] 90%|█████████ | 335364/371472 [4:31:05<2:51:08, 3.52it/s] 90%|█████████ | 335365/371472 [4:31:05<2:50:54, 3.52it/s] 90%|█████████ | 335366/371472 [4:31:05<3:10:24, 3.16it/s] 90%|█████████ | 335367/371472 [4:31:06<3:09:03, 3.18it/s] 90%|█████████ | 335368/371472 [4:31:06<3:08:58, 3.18it/s] 90%|█████████ | 335369/371472 [4:31:06<3:03:27, 3.28it/s] 90%|█████████ | 335370/371472 [4:31:07<2:57:50, 3.38it/s] 90%|█████████ | 335371/371472 [4:31:07<2:58:28, 3.37it/s] 90%|█████████ | 335372/371472 [4:31:07<2:55:29, 3.43it/s] 90%|█████████ | 335373/371472 [4:31:07<2:55:57, 3.42it/s] 90%|█████████ | 335374/371472 [4:31:08<2:54:36, 3.45it/s] 90%|█████████ | 335375/371472 [4:31:08<2:50:08, 3.54it/s] 90%|█████████ | 335376/371472 [4:31:08<2:48:46, 3.56it/s] 90%|█████████ | 335377/371472 [4:31:09<3:01:49, 3.31it/s] 90%|█████████ | 335378/371472 [4:31:09<3:14:58, 3.09it/s] 90%|█████████ | 335379/371472 [4:31:09<2:59:31, 3.35it/s] 90%|█████████ | 335380/371472 [4:31:09<2:51:14, 3.51it/s] {'loss': 2.4427, 'learning_rate': 1.874905729492124e-07, 'epoch': 14.45} + 90%|█████████ | 335380/371472 [4:31:09<2:51:14, 3.51it/s] 90%|█████████ | 335381/371472 [4:31:10<2:45:41, 3.63it/s] 90%|█████████ | 335382/371472 [4:31:10<2:47:34, 3.59it/s] 90%|█████████ | 335383/371472 [4:31:10<2:42:58, 3.69it/s] 90%|█████████ | 335384/371472 [4:31:10<2:38:44, 3.79it/s] 90%|█████████ | 335385/371472 [4:31:11<2:35:59, 3.86it/s] 90%|█████████ | 335386/371472 [4:31:11<2:39:24, 3.77it/s] 90%|█████████ | 335387/371472 [4:31:11<2:42:04, 3.71it/s] 90%|█████████ | 335388/371472 [4:31:12<2:48:51, 3.56it/s] 90%|█████████ | 335389/371472 [4:31:12<2:51:26, 3.51it/s] 90%|█████████ | 335390/371472 [4:31:12<2:44:36, 3.65it/s] 90%|█████████ | 335391/371472 [4:31:12<2:54:34, 3.44it/s] 90%|█████████ | 335392/371472 [4:31:13<3:00:47, 3.33it/s] 90%|█████████ | 335393/371472 [4:31:13<2:48:24, 3.57it/s] 90%|█████████ | 335394/371472 [4:31:13<2:52:52, 3.48it/s] 90%|█████████ | 335395/371472 [4:31:14<2:47:33, 3.59it/s] 90%|█████████ | 335396/371472 [4:31:14<2:52:23, 3.49it/s] 90%|█████████ | 335397/371472 [4:31:14<2:45:58, 3.62it/s] 90%|█████████ | 335398/371472 [4:31:14<2:44:27, 3.66it/s] 90%|█████████ | 335399/371472 [4:31:15<2:42:52, 3.69it/s] 90%|█████████ | 335400/371472 [4:31:15<2:39:57, 3.76it/s] {'loss': 2.6766, 'learning_rate': 1.8744209097373357e-07, 'epoch': 14.45} + 90%|█████████ | 335400/371472 [4:31:15<2:39:57, 3.76it/s] 90%|█████████ | 335401/371472 [4:31:15<2:37:19, 3.82it/s] 90%|█████████ | 335402/371472 [4:31:15<2:41:17, 3.73it/s] 90%|█████████ | 335403/371472 [4:31:16<2:40:46, 3.74it/s] 90%|█████████ | 335404/371472 [4:31:16<2:45:30, 3.63it/s] 90%|█████████ | 335405/371472 [4:31:16<2:46:49, 3.60it/s] 90%|█████████ | 335406/371472 [4:31:17<2:44:15, 3.66it/s] 90%|█████████ | 335407/371472 [4:31:17<2:57:49, 3.38it/s] 90%|█████████ | 335408/371472 [4:31:17<2:53:52, 3.46it/s] 90%|█████████ | 335409/371472 [4:31:18<3:01:49, 3.31it/s] 90%|█████████ | 335410/371472 [4:31:18<2:52:19, 3.49it/s] 90%|█████████ | 335411/371472 [4:31:18<2:46:35, 3.61it/s] 90%|█████████ | 335412/371472 [4:31:18<2:58:19, 3.37it/s] 90%|█████████ | 335413/371472 [4:31:19<3:04:39, 3.25it/s] 90%|█████████ | 335414/371472 [4:31:19<2:59:31, 3.35it/s] 90%|█████████ | 335415/371472 [4:31:19<2:53:30, 3.46it/s] 90%|█████████ | 335416/371472 [4:31:19<2:45:02, 3.64it/s] 90%|█████████ | 335417/371472 [4:31:20<2:42:14, 3.70it/s] 90%|█████████ | 335418/371472 [4:31:20<2:38:42, 3.79it/s] 90%|█████████ | 335419/371472 [4:31:20<2:38:30, 3.79it/s] 90%|█████████ | 335420/371472 [4:31:21<2:40:02, 3.75it/s] {'loss': 2.6146, 'learning_rate': 1.873936089982546e-07, 'epoch': 14.45} + 90%|█████████ | 335420/371472 [4:31:21<2:40:02, 3.75it/s] 90%|█████████ | 335421/371472 [4:31:21<2:47:26, 3.59it/s] 90%|█████████ | 335422/371472 [4:31:21<2:50:23, 3.53it/s] 90%|█████████ | 335423/371472 [4:31:21<2:48:02, 3.58it/s] 90%|█████████ | 335424/371472 [4:31:22<2:51:02, 3.51it/s] 90%|█████████ | 335425/371472 [4:31:22<3:00:43, 3.32it/s] 90%|█████████ | 335426/371472 [4:31:22<3:05:21, 3.24it/s] 90%|█████████ | 335427/371472 [4:31:23<3:04:16, 3.26it/s] 90%|█████████ | 335428/371472 [4:31:23<2:50:45, 3.52it/s] 90%|█████████ | 335429/371472 [4:31:23<3:06:01, 3.23it/s] 90%|█████████ | 335430/371472 [4:31:24<2:56:37, 3.40it/s] 90%|█████████ | 335431/371472 [4:31:24<2:55:16, 3.43it/s] 90%|█████████ | 335432/371472 [4:31:24<2:48:29, 3.56it/s] 90%|█████████ | 335433/371472 [4:31:24<2:51:16, 3.51it/s] 90%|█████████ | 335434/371472 [4:31:25<2:49:15, 3.55it/s] 90%|█████████ | 335435/371472 [4:31:25<2:54:29, 3.44it/s] 90%|█████████ | 335436/371472 [4:31:25<2:51:51, 3.49it/s] 90%|█████████ | 335437/371472 [4:31:26<2:58:06, 3.37it/s] 90%|█████████ | 335438/371472 [4:31:26<2:48:09, 3.57it/s] 90%|█████████ | 335439/371472 [4:31:26<2:51:19, 3.51it/s] 90%|█████████ | 335440/371472 [4:31:26<2:51:00, 3.51it/s] {'loss': 2.6179, 'learning_rate': 1.8734512702277576e-07, 'epoch': 14.45} + 90%|█████████ | 335440/371472 [4:31:26<2:51:00, 3.51it/s] 90%|█████████ | 335441/371472 [4:31:27<3:02:59, 3.28it/s] 90%|█████████ | 335442/371472 [4:31:27<2:52:56, 3.47it/s] 90%|█████████ | 335443/371472 [4:31:27<2:52:36, 3.48it/s] 90%|█████████ | 335444/371472 [4:31:28<2:47:02, 3.59it/s] 90%|█████████ | 335445/371472 [4:31:28<2:47:29, 3.58it/s] 90%|█████████ | 335446/371472 [4:31:28<2:59:56, 3.34it/s] 90%|█████████ | 335447/371472 [4:31:28<3:00:56, 3.32it/s] 90%|█████████ | 335448/371472 [4:31:29<2:57:18, 3.39it/s] 90%|█████████ | 335449/371472 [4:31:29<2:50:32, 3.52it/s] 90%|█████████ | 335450/371472 [4:31:29<2:49:01, 3.55it/s] 90%|█████████ | 335451/371472 [4:31:30<2:50:57, 3.51it/s] 90%|█████████ | 335452/371472 [4:31:30<3:00:46, 3.32it/s] 90%|█████████ | 335453/371472 [4:31:30<2:55:33, 3.42it/s] 90%|█████████ | 335454/371472 [4:31:30<2:52:04, 3.49it/s] 90%|█████████ | 335455/371472 [4:31:31<2:43:28, 3.67it/s] 90%|█████████ | 335456/371472 [4:31:31<2:38:28, 3.79it/s] 90%|█████████ | 335457/371472 [4:31:31<2:37:22, 3.81it/s] 90%|█████████ | 335458/371472 [4:31:31<2:42:40, 3.69it/s] 90%|█████████ | 335459/371472 [4:31:32<2:43:39, 3.67it/s] 90%|█████████ | 335460/371472 [4:31:32<2:51:02, 3.51it/s] {'loss': 2.5001, 'learning_rate': 1.8729664504729683e-07, 'epoch': 14.45} + 90%|█████████ | 335460/371472 [4:31:32<2:51:02, 3.51it/s] 90%|█████████ | 335461/371472 [4:31:32<2:49:50, 3.53it/s] 90%|█████████ | 335462/371472 [4:31:33<2:51:12, 3.51it/s] 90%|█████████ | 335463/371472 [4:31:33<2:53:28, 3.46it/s] 90%|█████████ | 335464/371472 [4:31:33<3:08:04, 3.19it/s] 90%|█████████ | 335465/371472 [4:31:34<3:01:43, 3.30it/s] 90%|█████████ | 335466/371472 [4:31:34<3:03:24, 3.27it/s] 90%|█████████ | 335467/371472 [4:31:34<3:02:19, 3.29it/s] 90%|█████████ | 335468/371472 [4:31:34<2:56:26, 3.40it/s] 90%|█████████ | 335469/371472 [4:31:35<2:48:11, 3.57it/s] 90%|█████████ | 335470/371472 [4:31:35<2:46:55, 3.59it/s] 90%|█████████ | 335471/371472 [4:31:35<2:52:08, 3.49it/s] 90%|█████████ | 335472/371472 [4:31:36<2:57:28, 3.38it/s] 90%|█████████ | 335473/371472 [4:31:36<2:53:27, 3.46it/s] 90%|█████████ | 335474/371472 [4:31:36<2:54:13, 3.44it/s] 90%|█████████ | 335475/371472 [4:31:36<2:54:39, 3.43it/s] 90%|█████████ | 335476/371472 [4:31:37<2:53:48, 3.45it/s] 90%|█████████ | 335477/371472 [4:31:37<2:54:31, 3.44it/s] 90%|█████████ | 335478/371472 [4:31:37<2:52:41, 3.47it/s] 90%|█████████ | 335479/371472 [4:31:38<2:54:26, 3.44it/s] 90%|█████████ | 335480/371472 [4:31:38<2:59:54, 3.33it/s] {'loss': 2.4733, 'learning_rate': 1.8724816307181796e-07, 'epoch': 14.45} + 90%|█████████ | 335480/371472 [4:31:38<2:59:54, 3.33it/s] 90%|█████████ | 335481/371472 [4:31:38<3:00:43, 3.32it/s] 90%|█████████ | 335482/371472 [4:31:39<3:00:04, 3.33it/s] 90%|█████████ | 335483/371472 [4:31:39<3:03:26, 3.27it/s] 90%|█████████ | 335484/371472 [4:31:39<2:53:16, 3.46it/s] 90%|█████████ | 335485/371472 [4:31:39<2:48:50, 3.55it/s] 90%|█████████ | 335486/371472 [4:31:40<2:47:39, 3.58it/s] 90%|█████████ | 335487/371472 [4:31:40<2:55:31, 3.42it/s] 90%|█████████ | 335488/371472 [4:31:40<2:49:34, 3.54it/s] 90%|█████████ | 335489/371472 [4:31:40<2:44:41, 3.64it/s] 90%|█████████ | 335490/371472 [4:31:41<2:41:00, 3.72it/s] 90%|█████████ | 335491/371472 [4:31:41<2:36:52, 3.82it/s] 90%|█████████ | 335492/371472 [4:31:41<2:42:38, 3.69it/s] 90%|█████████ | 335493/371472 [4:31:42<2:47:20, 3.58it/s] 90%|█████████ | 335494/371472 [4:31:42<2:56:58, 3.39it/s] 90%|█████████ | 335495/371472 [4:31:42<2:51:48, 3.49it/s] 90%|█████████ | 335496/371472 [4:31:42<2:48:39, 3.56it/s] 90%|█████████ | 335497/371472 [4:31:43<2:45:05, 3.63it/s] 90%|█████████ | 335498/371472 [4:31:43<2:41:36, 3.71it/s] 90%|█████████ | 335499/371472 [4:31:43<2:47:43, 3.57it/s] 90%|█████████ | 335500/371472 [4:31:44<2:45:43, 3.62it/s] {'loss': 2.6842, 'learning_rate': 1.8719968109633903e-07, 'epoch': 14.45} + 90%|█████████ | 335500/371472 [4:31:44<2:45:43, 3.62it/s] 90%|█████████ | 335501/371472 [4:31:44<2:47:59, 3.57it/s] 90%|█████████ | 335502/371472 [4:31:44<2:40:01, 3.75it/s] 90%|█████████ | 335503/371472 [4:31:44<2:38:16, 3.79it/s] 90%|█████████ | 335504/371472 [4:31:45<2:50:31, 3.52it/s] 90%|█████████ | 335505/371472 [4:31:45<2:50:00, 3.53it/s] 90%|█████████ | 335506/371472 [4:31:45<2:50:13, 3.52it/s] 90%|█████████ | 335507/371472 [4:31:45<2:49:58, 3.53it/s] 90%|█████████ | 335508/371472 [4:31:46<2:53:12, 3.46it/s] 90%|█████████ | 335509/371472 [4:31:46<2:59:54, 3.33it/s] 90%|█████████ | 335510/371472 [4:31:46<2:56:05, 3.40it/s] 90%|█████████ | 335511/371472 [4:31:47<2:50:04, 3.52it/s] 90%|█████████ | 335512/371472 [4:31:47<2:43:07, 3.67it/s] 90%|█████████ | 335513/371472 [4:31:47<2:45:05, 3.63it/s] 90%|█████████ | 335514/371472 [4:31:48<2:53:52, 3.45it/s] 90%|█████████ | 335515/371472 [4:31:48<2:44:42, 3.64it/s] 90%|█████████ | 335516/371472 [4:31:48<2:45:56, 3.61it/s] 90%|█████████ | 335517/371472 [4:31:48<2:54:03, 3.44it/s] 90%|█████████ | 335518/371472 [4:31:49<3:02:22, 3.29it/s] 90%|█████████ | 335519/371472 [4:31:49<2:55:45, 3.41it/s] 90%|█████████ | 335520/371472 [4:31:49<2:49:33, 3.53it/s] {'loss': 2.5988, 'learning_rate': 1.8715119912086018e-07, 'epoch': 14.45} + 90%|█████████ | 335520/371472 [4:31:49<2:49:33, 3.53it/s] 90%|█████████ | 335521/371472 [4:31:50<2:49:45, 3.53it/s] 90%|█████████ | 335522/371472 [4:31:50<2:51:18, 3.50it/s] 90%|█████████ | 335523/371472 [4:31:50<2:54:47, 3.43it/s] 90%|█████████ | 335524/371472 [4:31:50<2:53:22, 3.46it/s] 90%|█████████ | 335525/371472 [4:31:51<2:51:33, 3.49it/s] 90%|█████████ | 335526/371472 [4:31:51<2:50:33, 3.51it/s] 90%|█████████ | 335527/371472 [4:31:51<2:46:46, 3.59it/s] 90%|█████████ | 335528/371472 [4:31:51<2:44:45, 3.64it/s] 90%|█████████ | 335529/371472 [4:31:52<2:49:19, 3.54it/s] 90%|█████████ | 335530/371472 [4:31:52<2:58:49, 3.35it/s] 90%|█████████ | 335531/371472 [4:31:52<2:50:12, 3.52it/s] 90%|█████████ | 335532/371472 [4:31:53<2:44:33, 3.64it/s] 90%|█████████ | 335533/371472 [4:31:53<2:40:39, 3.73it/s] 90%|█████████ | 335534/371472 [4:31:53<2:41:08, 3.72it/s] 90%|█████████ | 335535/371472 [4:31:53<2:46:10, 3.60it/s] 90%|█████████ | 335536/371472 [4:31:54<2:50:29, 3.51it/s] 90%|█████████ | 335537/371472 [4:31:54<2:51:47, 3.49it/s] 90%|█████████ | 335538/371472 [4:31:54<2:49:16, 3.54it/s] 90%|█████████ | 335539/371472 [4:31:55<2:58:01, 3.36it/s] 90%|█████████ | 335540/371472 [4:31:55<3:14:14, 3.08it/s] {'loss': 2.5853, 'learning_rate': 1.8710271714538125e-07, 'epoch': 14.45} + 90%|█████████ | 335540/371472 [4:31:55<3:14:14, 3.08it/s] 90%|███���█████ | 335541/371472 [4:31:55<3:06:27, 3.21it/s] 90%|█████████ | 335542/371472 [4:31:56<2:55:34, 3.41it/s] 90%|█████████ | 335543/371472 [4:31:56<2:47:07, 3.58it/s] 90%|█████████ | 335544/371472 [4:31:56<2:46:51, 3.59it/s] 90%|█████████ | 335545/371472 [4:31:56<2:38:10, 3.79it/s] 90%|█████████ | 335546/371472 [4:31:57<2:46:31, 3.60it/s] 90%|█████████ | 335547/371472 [4:31:57<2:45:28, 3.62it/s] 90%|█████████ | 335548/371472 [4:31:57<2:37:34, 3.80it/s] 90%|█████████ | 335549/371472 [4:31:57<2:39:36, 3.75it/s] 90%|█████████ | 335550/371472 [4:31:58<2:41:23, 3.71it/s] 90%|█████████ | 335551/371472 [4:31:58<2:42:33, 3.68it/s] 90%|█████████ | 335552/371472 [4:31:58<2:42:14, 3.69it/s] 90%|█████████ | 335553/371472 [4:31:58<2:39:30, 3.75it/s] 90%|█████████ | 335554/371472 [4:31:59<2:43:07, 3.67it/s] 90%|█████████ | 335555/371472 [4:31:59<2:40:37, 3.73it/s] 90%|█████████ | 335556/371472 [4:31:59<2:39:32, 3.75it/s] 90%|█████████ | 335557/371472 [4:32:00<2:34:59, 3.86it/s] 90%|█████████ | 335558/371472 [4:32:00<2:39:43, 3.75it/s] 90%|█████████ | 335559/371472 [4:32:00<2:38:17, 3.78it/s] 90%|█████████ | 335560/371472 [4:32:00<2:39:20, 3.76it/s] {'loss': 2.6677, 'learning_rate': 1.870542351699024e-07, 'epoch': 14.45} + 90%|█████████ | 335560/371472 [4:32:00<2:39:20, 3.76it/s] 90%|█████████ | 335561/371472 [4:32:01<2:46:15, 3.60it/s] 90%|█████████ | 335562/371472 [4:32:01<2:44:09, 3.65it/s] 90%|█████████ | 335563/371472 [4:32:01<2:54:50, 3.42it/s] 90%|█████████ | 335564/371472 [4:32:02<3:05:53, 3.22it/s] 90%|█████████ | 335565/371472 [4:32:02<3:07:04, 3.20it/s] 90%|█████████ | 335566/371472 [4:32:02<2:57:50, 3.36it/s] 90%|█████████ | 335567/371472 [4:32:02<2:51:08, 3.50it/s] 90%|█████████ | 335568/371472 [4:32:03<2:44:51, 3.63it/s] 90%|█████████ | 335569/371472 [4:32:03<2:42:06, 3.69it/s] 90%|█████████ | 335570/371472 [4:32:03<2:41:37, 3.70it/s] 90%|█████████ | 335571/371472 [4:32:03<2:40:37, 3.73it/s] 90%|█████████ | 335572/371472 [4:32:04<2:44:36, 3.64it/s] 90%|█████████ | 335573/371472 [4:32:04<2:48:14, 3.56it/s] 90%|█████████ | 335574/371472 [4:32:04<2:56:36, 3.39it/s] 90%|█████████ | 335575/371472 [4:32:05<2:55:57, 3.40it/s] 90%|█████████ | 335576/371472 [4:32:05<2:49:41, 3.53it/s] 90%|█████████ | 335577/371472 [4:32:05<2:47:01, 3.58it/s] 90%|█████████ | 335578/371472 [4:32:05<2:39:51, 3.74it/s] 90%|█████████ | 335579/371472 [4:32:06<2:39:24, 3.75it/s] 90%|█████████ | 335580/371472 [4:32:06<2:44:00, 3.65it/s] {'loss': 2.4537, 'learning_rate': 1.8700575319442347e-07, 'epoch': 14.45} + 90%|█████████ | 335580/371472 [4:32:06<2:44:00, 3.65it/s] 90%|█████████ | 335581/371472 [4:32:06<2:48:01, 3.56it/s] 90%|█████████ | 335582/371472 [4:32:07<2:48:12, 3.56it/s] 90%|█████████ | 335583/371472 [4:32:07<2:52:54, 3.46it/s] 90%|█████████ | 335584/371472 [4:32:07<2:50:14, 3.51it/s] 90%|█████████ | 335585/371472 [4:32:07<2:44:52, 3.63it/s] 90%|█████████ | 335586/371472 [4:32:08<2:37:52, 3.79it/s] 90%|█████████ | 335587/371472 [4:32:08<2:43:37, 3.66it/s] 90%|█████████ | 335588/371472 [4:32:08<2:41:58, 3.69it/s] 90%|█████████ | 335589/371472 [4:32:08<2:37:07, 3.81it/s] 90%|█████████ | 335590/371472 [4:32:09<2:37:57, 3.79it/s] 90%|█████████ | 335591/371472 [4:32:09<2:57:09, 3.38it/s] 90%|█████████ | 335592/371472 [4:32:09<2:51:34, 3.49it/s] 90%|█████████ | 335593/371472 [4:32:10<2:50:06, 3.52it/s] 90%|█████████ | 335594/371472 [4:32:10<2:55:10, 3.41it/s] 90%|█████████ | 335595/371472 [4:32:10<2:54:06, 3.43it/s] 90%|█████████ | 335596/371472 [4:32:11<2:48:27, 3.55it/s] 90%|█████████ | 335597/371472 [4:32:11<3:02:39, 3.27it/s] 90%|█████████ | 335598/371472 [4:32:11<3:08:27, 3.17it/s] 90%|█████████ | 335599/371472 [4:32:12<3:04:56, 3.23it/s] 90%|█████████ | 335600/371472 [4:32:12<2:54:17, 3.43it/s] {'loss': 2.6151, 'learning_rate': 1.8695727121894462e-07, 'epoch': 14.45} + 90%|█████████ | 335600/371472 [4:32:12<2:54:17, 3.43it/s] 90%|█████████ | 335601/371472 [4:32:12<2:59:25, 3.33it/s] 90%|█████████ | 335602/371472 [4:32:12<2:49:43, 3.52it/s] 90%|█████████ | 335603/371472 [4:32:13<2:43:25, 3.66it/s] 90%|█████████ | 335604/371472 [4:32:13<2:51:53, 3.48it/s] 90%|█████████ | 335605/371472 [4:32:13<2:50:32, 3.51it/s] 90%|█████████ | 335606/371472 [4:32:13<2:54:50, 3.42it/s] 90%|█████████ | 335607/371472 [4:32:14<2:57:50, 3.36it/s] 90%|█████████ | 335608/371472 [4:32:14<2:51:56, 3.48it/s] 90%|█████████ | 335609/371472 [4:32:14<2:50:03, 3.51it/s] 90%|█████████ | 335610/371472 [4:32:15<2:52:07, 3.47it/s] 90%|█████████ | 335611/371472 [4:32:15<2:47:36, 3.57it/s] 90%|█████████ | 335612/371472 [4:32:15<2:42:03, 3.69it/s] 90%|█████████ | 335613/371472 [4:32:15<2:43:48, 3.65it/s] 90%|█████████ | 335614/371472 [4:32:16<2:35:52, 3.83it/s] 90%|█████████ | 335615/371472 [4:32:16<2:33:10, 3.90it/s] 90%|█████████ | 335616/371472 [4:32:16<2:49:41, 3.52it/s] 90%|█████████ | 335617/371472 [4:32:17<2:45:07, 3.62it/s] 90%|█████████ | 335618/371472 [4:32:17<3:05:31, 3.22it/s] 90%|█████████ | 335619/371472 [4:32:17<3:03:04, 3.26it/s] 90%|█████████ | 335620/371472 [4:32:17<3:02:04, 3.28it/s] {'loss': 2.5345, 'learning_rate': 1.8690878924346567e-07, 'epoch': 14.46} + 90%|█████████ | 335620/371472 [4:32:17<3:02:04, 3.28it/s] 90%|█████████ | 335621/371472 [4:32:18<2:52:11, 3.47it/s] 90%|█████████ | 335622/371472 [4:32:18<2:46:52, 3.58it/s] 90%|█████████ | 335623/371472 [4:32:18<2:45:53, 3.60it/s] 90%|█████████ | 335624/371472 [4:32:19<2:46:27, 3.59it/s] 90%|█████████ | 335625/371472 [4:32:19<2:46:32, 3.59it/s] 90%|█████████ | 335626/371472 [4:32:19<2:50:45, 3.50it/s] 90%|█████████ | 335627/371472 [4:32:19<2:45:31, 3.61it/s] 90%|█████████ | 335628/371472 [4:32:20<2:57:39, 3.36it/s] 90%|█████████ | 335629/371472 [4:32:20<2:51:52, 3.48it/s] 90%|█████████ | 335630/371472 [4:32:20<2:49:12, 3.53it/s] 90%|█████████ | 335631/371472 [4:32:21<2:41:55, 3.69it/s] 90%|█████████ | 335632/371472 [4:32:21<2:39:33, 3.74it/s] 90%|█████████ | 335633/371472 [4:32:21<2:39:54, 3.74it/s] 90%|█████████ | 335634/371472 [4:32:21<2:36:51, 3.81it/s] 90%|█████████ | 335635/371472 [4:32:22<2:57:18, 3.37it/s] 90%|█████████ | 335636/371472 [4:32:22<2:47:27, 3.57it/s] 90%|█████████ | 335637/371472 [4:32:22<2:47:07, 3.57it/s] 90%|█████████ | 335638/371472 [4:32:22<2:43:43, 3.65it/s] 90%|█████████ | 335639/371472 [4:32:23<2:37:45, 3.79it/s] 90%|█████████ | 335640/371472 [4:32:23<2:42:03, 3.69it/s] {'loss': 2.5777, 'learning_rate': 1.8686030726798685e-07, 'epoch': 14.46} + 90%|█████████ | 335640/371472 [4:32:23<2:42:03, 3.69it/s] 90%|█████████ | 335641/371472 [4:32:23<2:43:54, 3.64it/s] 90%|█████████ | 335642/371472 [4:32:24<2:40:42, 3.72it/s] 90%|█████████ | 335643/371472 [4:32:24<2:45:55, 3.60it/s] 90%|█████████ | 335644/371472 [4:32:24<3:07:22, 3.19it/s] 90%|█████████ | 335645/371472 [4:32:24<3:01:52, 3.28it/s] 90%|█████████ | 335646/371472 [4:32:25<2:52:39, 3.46it/s] 90%|█████████ | 335647/371472 [4:32:25<3:03:48, 3.25it/s] 90%|█████████ | 335648/371472 [4:32:25<3:17:35, 3.02it/s] 90%|█████████ | 335649/371472 [4:32:26<3:05:58, 3.21it/s] 90%|█████████ | 335650/371472 [4:32:26<3:10:18, 3.14it/s] 90%|█████████ | 335651/371472 [4:32:26<3:07:49, 3.18it/s] 90%|█████████ | 335652/371472 [4:32:27<2:58:06, 3.35it/s] 90%|█████████ | 335653/371472 [4:32:27<2:53:25, 3.44it/s] 90%|█████████ | 335654/371472 [4:32:27<2:47:13, 3.57it/s] 90%|█████████ | 335655/371472 [4:32:27<2:41:58, 3.69it/s] 90%|█████████ | 335656/371472 [4:32:28<2:57:55, 3.35it/s] 90%|█████████ | 335657/371472 [4:32:28<2:47:46, 3.56it/s] 90%|█████████ | 335658/371472 [4:32:28<2:44:29, 3.63it/s] 90%|█████████ | 335659/371472 [4:32:29<2:36:48, 3.81it/s] 90%|█████████ | 335660/371472 [4:32:29<2:34:47, 3.86it/s] {'loss': 2.532, 'learning_rate': 1.868118252925079e-07, 'epoch': 14.46} + 90%|█████████ | 335660/371472 [4:32:29<2:34:47, 3.86it/s] 90%|█████████ | 335661/371472 [4:32:29<2:33:33, 3.89it/s] 90%|█████████ | 335662/371472 [4:32:29<2:43:27, 3.65it/s] 90%|█████████ | 335663/371472 [4:32:30<2:45:46, 3.60it/s] 90%|█████████ | 335664/371472 [4:32:30<2:44:06, 3.64it/s] 90%|█████████ | 335665/371472 [4:32:30<2:45:39, 3.60it/s] 90%|█████████ | 335666/371472 [4:32:31<2:53:05, 3.45it/s] 90%|█████████ | 335667/371472 [4:32:31<2:44:35, 3.63it/s] 90%|█████████ | 335668/371472 [4:32:31<2:43:10, 3.66it/s] 90%|█████████ | 335669/371472 [4:32:31<2:47:40, 3.56it/s] 90%|█████████ | 335670/371472 [4:32:32<2:57:18, 3.37it/s] 90%|█████████ | 335671/371472 [4:32:32<3:00:06, 3.31it/s] 90%|█████████ | 335672/371472 [4:32:32<3:02:05, 3.28it/s] 90%|█████████ | 335673/371472 [4:32:33<2:54:14, 3.42it/s] 90%|█████████ | 335674/371472 [4:32:33<2:58:41, 3.34it/s] 90%|█████████ | 335675/371472 [4:32:33<2:47:21, 3.56it/s] 90%|█████████ | 335676/371472 [4:32:33<3:03:42, 3.25it/s] 90%|█████████ | 335677/371472 [4:32:34<2:58:38, 3.34it/s] 90%|█████████ | 335678/371472 [4:32:34<2:56:39, 3.38it/s] 90%|█████████ | 335679/371472 [4:32:34<2:58:12, 3.35it/s] 90%|█████████ | 335680/371472 [4:32:35<3:03:11, 3.26it/s] {'loss': 2.5491, 'learning_rate': 1.8676334331702904e-07, 'epoch': 14.46} + 90%|█████████ | 335680/371472 [4:32:35<3:03:11, 3.26it/s] 90%|█████████ | 335681/371472 [4:32:35<3:00:54, 3.30it/s] 90%|█████████ | 335682/371472 [4:32:35<2:58:01, 3.35it/s] 90%|█████████ | 335683/371472 [4:32:35<2:50:38, 3.50it/s] 90%|█████████ | 335684/371472 [4:32:36<2:50:46, 3.49it/s] 90%|█████████ | 335685/371472 [4:32:36<2:51:10, 3.48it/s] 90%|█████████ | 335686/371472 [4:32:36<3:04:09, 3.24it/s] 90%|█████████ | 335687/371472 [4:32:37<2:58:20, 3.34it/s] 90%|█████████ | 335688/371472 [4:32:37<3:00:29, 3.30it/s] 90%|█████████ | 335689/371472 [4:32:37<2:54:08, 3.42it/s] 90%|█████████ | 335690/371472 [4:32:38<2:49:11, 3.52it/s] 90%|█████████ | 335691/371472 [4:32:38<2:46:49, 3.57it/s] 90%|█████████ | 335692/371472 [4:32:38<2:39:20, 3.74it/s] 90%|█████████ | 335693/371472 [4:32:38<2:39:28, 3.74it/s] 90%|█████████ | 335694/371472 [4:32:39<2:38:10, 3.77it/s] 90%|█████████ | 335695/371472 [4:32:39<2:38:29, 3.76it/s] 90%|█████████ | 335696/371472 [4:32:39<2:35:07, 3.84it/s] 90%|█████████ | 335697/371472 [4:32:39<2:46:56, 3.57it/s] 90%|█████████ | 335698/371472 [4:32:40<2:41:50, 3.68it/s] 90%|█████████ | 335699/371472 [4:32:40<2:47:12, 3.57it/s] 90%|█████████ | 335700/371472 [4:32:40<2:44:45, 3.62it/s] {'loss': 2.6189, 'learning_rate': 1.8671486134155011e-07, 'epoch': 14.46} + 90%|█████████ | 335700/371472 [4:32:40<2:44:45, 3.62it/s] 90%|█████████ | 335701/371472 [4:32:40<2:38:36, 3.76it/s] 90%|█████████ | 335702/371472 [4:32:41<2:37:13, 3.79it/s] 90%|█████████ | 335703/371472 [4:32:41<2:52:10, 3.46it/s] 90%|█████████ | 335704/371472 [4:32:41<2:56:37, 3.38it/s] 90%|█████████ | 335705/371472 [4:32:42<3:10:06, 3.14it/s] 90%|█████████ | 335706/371472 [4:32:42<2:59:14, 3.33it/s] 90%|█████████ | 335707/371472 [4:32:42<2:51:19, 3.48it/s] 90%|█████████ | 335708/371472 [4:32:43<2:49:40, 3.51it/s] 90%|█████████ | 335709/371472 [4:32:43<3:02:24, 3.27it/s] 90%|█████████ | 335710/371472 [4:32:43<2:53:12, 3.44it/s] 90%|█████████ | 335711/371472 [4:32:43<2:51:25, 3.48it/s] 90%|█████████ | 335712/371472 [4:32:44<3:09:35, 3.14it/s] 90%|█████████ | 335713/371472 [4:32:44<3:01:08, 3.29it/s] 90%|█████████ | 335714/371472 [4:32:44<2:53:41, 3.43it/s] 90%|█████████ | 335715/371472 [4:32:45<2:45:15, 3.61it/s] 90%|███��█████ | 335716/371472 [4:32:45<2:46:45, 3.57it/s] 90%|█████████ | 335717/371472 [4:32:45<2:49:46, 3.51it/s] 90%|█████████ | 335718/371472 [4:32:46<2:51:18, 3.48it/s] 90%|█████████ | 335719/371472 [4:32:46<2:43:53, 3.64it/s] 90%|█████████ | 335720/371472 [4:32:46<2:55:10, 3.40it/s] {'loss': 2.5919, 'learning_rate': 1.8666637936607127e-07, 'epoch': 14.46} + 90%|█████████ | 335720/371472 [4:32:46<2:55:10, 3.40it/s] 90%|█████████ | 335721/371472 [4:32:46<2:59:45, 3.31it/s] 90%|█████████ | 335722/371472 [4:32:47<2:53:25, 3.44it/s] 90%|█████████ | 335723/371472 [4:32:47<2:49:35, 3.51it/s] 90%|█████████ | 335724/371472 [4:32:47<2:47:44, 3.55it/s] 90%|█████████ | 335725/371472 [4:32:48<2:52:22, 3.46it/s] 90%|█████████ | 335726/371472 [4:32:48<2:47:43, 3.55it/s] 90%|█████████ | 335727/371472 [4:32:48<2:59:59, 3.31it/s] 90%|█████████ | 335728/371472 [4:32:49<3:15:08, 3.05it/s] 90%|█████████ | 335729/371472 [4:32:49<3:06:38, 3.19it/s] 90%|█████████ | 335730/371472 [4:32:49<3:00:50, 3.29it/s] 90%|█████████ | 335731/371472 [4:32:49<2:53:32, 3.43it/s] 90%|█████████ | 335732/371472 [4:32:50<2:58:15, 3.34it/s] 90%|█████████ | 335733/371472 [4:32:50<2:58:04, 3.34it/s] 90%|█████████ | 335734/371472 [4:32:50<2:49:52, 3.51it/s] 90%|█████████ | 335735/371472 [4:32:50<2:47:40, 3.55it/s] 90%|█████████ | 335736/371472 [4:32:51<2:41:05, 3.70it/s] 90%|█████████ | 335737/371472 [4:32:51<2:39:37, 3.73it/s] 90%|█████████ | 335738/371472 [4:32:51<2:38:25, 3.76it/s] 90%|█████████ | 335739/371472 [4:32:52<2:40:40, 3.71it/s] 90%|█████████ | 335740/371472 [4:32:52<2:40:59, 3.70it/s] {'loss': 2.5821, 'learning_rate': 1.866178973905923e-07, 'epoch': 14.46} + 90%|█████████ | 335740/371472 [4:32:52<2:40:59, 3.70it/s] 90%|█████████ | 335741/371472 [4:32:52<2:54:44, 3.41it/s] 90%|█████████ | 335742/371472 [4:32:52<2:54:37, 3.41it/s] 90%|█████████ | 335743/371472 [4:32:53<2:47:28, 3.56it/s] 90%|█████████ | 335744/371472 [4:32:53<2:43:35, 3.64it/s] 90%|█████████ | 335745/371472 [4:32:53<2:38:24, 3.76it/s] 90%|█████████ | 335746/371472 [4:32:54<2:47:07, 3.56it/s] 90%|█████████ | 335747/371472 [4:32:54<2:46:10, 3.58it/s] 90%|█████████ | 335748/371472 [4:32:54<2:44:48, 3.61it/s] 90%|█████████ | 335749/371472 [4:32:54<2:48:25, 3.53it/s] 90%|█████████ | 335750/371472 [4:32:55<2:54:13, 3.42it/s] 90%|█████████ | 335751/371472 [4:32:55<2:45:53, 3.59it/s] 90%|█████████ | 335752/371472 [4:32:55<2:40:04, 3.72it/s] 90%|█████████ | 335753/371472 [4:32:55<2:39:17, 3.74it/s] 90%|█████████ | 335754/371472 [4:32:56<2:35:57, 3.82it/s] 90%|█████████ | 335755/371472 [4:32:56<2:34:04, 3.86it/s] 90%|█████████ | 335756/371472 [4:32:56<2:37:32, 3.78it/s] 90%|█████████ | 335757/371472 [4:32:57<2:40:15, 3.71it/s] 90%|█████████ | 335758/371472 [4:32:57<2:40:58, 3.70it/s] 90%|█████████ | 335759/371472 [4:32:57<2:38:50, 3.75it/s] 90%|█████████ | 335760/371472 [4:32:57<2:50:17, 3.50it/s] {'loss': 2.5806, 'learning_rate': 1.865694154151135e-07, 'epoch': 14.46} + 90%|█████████ | 335760/371472 [4:32:57<2:50:17, 3.50it/s] 90%|█████████ | 335761/371472 [4:32:58<2:48:38, 3.53it/s] 90%|█████████ | 335762/371472 [4:32:58<2:46:45, 3.57it/s] 90%|█████████ | 335763/371472 [4:32:58<2:52:45, 3.45it/s] 90%|█████████ | 335764/371472 [4:32:58<2:43:42, 3.64it/s] 90%|█████████ | 335765/371472 [4:32:59<2:42:07, 3.67it/s] 90%|█████████ | 335766/371472 [4:32:59<2:42:37, 3.66it/s] 90%|█████████ | 335767/371472 [4:32:59<2:39:08, 3.74it/s] 90%|█████████ | 335768/371472 [4:33:00<2:38:07, 3.76it/s] 90%|█████████ | 335769/371472 [4:33:00<2:34:28, 3.85it/s] 90%|█████████ | 335770/371472 [4:33:00<2:34:39, 3.85it/s] 90%|█████████ | 335771/371472 [4:33:00<2:33:12, 3.88it/s] 90%|█████████ | 335772/371472 [4:33:01<2:36:47, 3.79it/s] 90%|█████████ | 335773/371472 [4:33:01<2:37:08, 3.79it/s] 90%|█████████ | 335774/371472 [4:33:01<2:41:34, 3.68it/s] 90%|█████████ | 335775/371472 [4:33:01<2:46:27, 3.57it/s] 90%|█████████ | 335776/371472 [4:33:02<2:47:56, 3.54it/s] 90%|█████████ | 335777/371472 [4:33:02<2:46:54, 3.56it/s] 90%|█████████ | 335778/371472 [4:33:02<2:40:44, 3.70it/s] 90%|█████████ | 335779/371472 [4:33:02<2:38:54, 3.74it/s] 90%|█████████ | 335780/371472 [4:33:03<2:40:52, 3.70it/s] {'loss': 2.6219, 'learning_rate': 1.8652093343963456e-07, 'epoch': 14.46} + 90%|█████████ | 335780/371472 [4:33:03<2:40:52, 3.70it/s] 90%|█████████ | 335781/371472 [4:33:03<2:42:11, 3.67it/s] 90%|█████████ | 335782/371472 [4:33:03<2:48:20, 3.53it/s] 90%|█████████ | 335783/371472 [4:33:04<2:48:06, 3.54it/s] 90%|█████████ | 335784/371472 [4:33:04<2:39:59, 3.72it/s] 90%|█████████ | 335785/371472 [4:33:04<2:52:54, 3.44it/s] 90%|█████████ | 335786/371472 [4:33:04<2:48:43, 3.52it/s] 90%|█████████ | 335787/371472 [4:33:05<2:48:14, 3.54it/s] 90%|█████████ | 335788/371472 [4:33:05<2:55:27, 3.39it/s] 90%|█████████ | 335789/371472 [4:33:05<2:52:55, 3.44it/s] 90%|█████████ | 335790/371472 [4:33:06<2:46:36, 3.57it/s] 90%|█████████ | 335791/371472 [4:33:06<2:43:16, 3.64it/s] 90%|█████████ | 335792/371472 [4:33:06<2:55:46, 3.38it/s] 90%|█████████ | 335793/371472 [4:33:06<2:44:12, 3.62it/s] 90%|█████████ | 335794/371472 [4:33:07<2:44:49, 3.61it/s] 90%|█████████ | 335795/371472 [4:33:07<2:55:21, 3.39it/s] 90%|█████████ | 335796/371472 [4:33:07<2:58:31, 3.33it/s] 90%|█████████ | 335797/371472 [4:33:08<2:50:37, 3.48it/s] 90%|█████████ | 335798/371472 [4:33:08<2:46:13, 3.58it/s] 90%|█████████ | 335799/371472 [4:33:08<2:45:13, 3.60it/s] 90%|█████████ | 335800/371472 [4:33:08<2:49:10, 3.51it/s] {'loss': 2.5689, 'learning_rate': 1.8647245146415568e-07, 'epoch': 14.46} + 90%|█████████ | 335800/371472 [4:33:08<2:49:10, 3.51it/s] 90%|█████████ | 335801/371472 [4:33:09<2:47:02, 3.56it/s] 90%|█████████ | 335802/371472 [4:33:09<2:44:00, 3.62it/s] 90%|█████████ | 335803/371472 [4:33:09<2:43:18, 3.64it/s] 90%|█████████ | 335804/371472 [4:33:10<2:43:04, 3.65it/s] 90%|█████████ | 335805/371472 [4:33:10<2:42:45, 3.65it/s] 90%|█████████ | 335806/371472 [4:33:10<2:50:04, 3.50it/s] 90%|█████████ | 335807/371472 [4:33:10<2:44:19, 3.62it/s] 90%|█████████ | 335808/371472 [4:33:11<2:44:54, 3.60it/s] 90%|█████████ | 335809/371472 [4:33:11<2:44:16, 3.62it/s] 90%|█████████ | 335810/371472 [4:33:11<2:41:38, 3.68it/s] 90%|█████████ | 335811/371472 [4:33:12<2:44:49, 3.61it/s] 90%|█████████ | 335812/371472 [4:33:12<2:48:02, 3.54it/s] 90%|█████████ | 335813/371472 [4:33:12<2:50:22, 3.49it/s] 90%|█████████ | 335814/371472 [4:33:12<2:49:03, 3.52it/s] 90%|█████████ | 335815/371472 [4:33:13<2:47:45, 3.54it/s] 90%|█████████ | 335816/371472 [4:33:13<2:58:25, 3.33it/s] 90%|█████████ | 335817/371472 [4:33:13<2:49:01, 3.52it/s] 90%|█████████ | 335818/371472 [4:33:13<2:43:44, 3.63it/s] 90%|█████████ | 335819/371472 [4:33:14<2:43:23, 3.64it/s] 90%|█████████ | 335820/371472 [4:33:14<2:41:49, 3.67it/s] {'loss': 2.5367, 'learning_rate': 1.8642396948867676e-07, 'epoch': 14.46} + 90%|█████████ | 335820/371472 [4:33:14<2:41:49, 3.67it/s] 90%|█████████ | 335821/371472 [4:33:14<2:40:28, 3.70it/s] 90%|█████████ | 335822/371472 [4:33:15<2:36:06, 3.81it/s] 90%|█████████ | 335823/371472 [4:33:15<2:35:17, 3.83it/s] 90%|█████████ | 335824/371472 [4:33:15<2:44:53, 3.60it/s] 90%|█████████ | 335825/371472 [4:33:15<2:44:40, 3.61it/s] 90%|█████████ | 335826/371472 [4:33:16<3:17:25, 3.01it/s] 90%|█████████ | 335827/371472 [4:33:16<2:59:55, 3.30it/s] 90%|█████████ | 335828/371472 [4:33:16<2:52:45, 3.44it/s] 90%|█████████ | 335829/371472 [4:33:17<2:57:54, 3.34it/s] 90%|█████████ | 335830/371472 [4:33:17<2:52:41, 3.44it/s] 90%|█████████ | 335831/371472 [4:33:17<2:46:20, 3.57it/s] 90%|█████████ | 335832/371472 [4:33:17<2:42:06, 3.66it/s] 90%|█████████ | 335833/371472 [4:33:18<2:43:25, 3.63it/s] 90%|█████████ | 335834/371472 [4:33:18<2:42:26, 3.66it/s] 90%|█████████ | 335835/371472 [4:33:18<2:41:27, 3.68it/s] 90%|█████████ | 335836/371472 [4:33:19<2:58:27, 3.33it/s] 90%|█████████ | 335837/371472 [4:33:19<3:17:30, 3.01it/s] 90%|█████████ | 335838/371472 [4:33:19<3:08:30, 3.15it/s] 90%|█████████ | 335839/371472 [4:33:20<2:58:18, 3.33it/s] 90%|█████████ | 335840/371472 [4:33:20<2:53:50, 3.42it/s] {'loss': 2.6006, 'learning_rate': 1.8637548751319783e-07, 'epoch': 14.47} + 90%|█████████ | 335840/371472 [4:33:20<2:53:50, 3.42it/s] 90%|█████████ | 335841/371472 [4:33:20<2:47:25, 3.55it/s] 90%|█████████ | 335842/371472 [4:33:20<2:46:22, 3.57it/s] 90%|█████████ | 335843/371472 [4:33:21<2:54:44, 3.40it/s] 90%|█████████ | 335844/371472 [4:33:21<2:56:42, 3.36it/s] 90%|█████████ | 335845/371472 [4:33:21<2:51:37, 3.46it/s] 90%|█████████ | 335846/371472 [4:33:22<2:50:36, 3.48it/s] 90%|█████████ | 335847/371472 [4:33:22<2:51:18, 3.47it/s] 90%|█████████ | 335848/371472 [4:33:22<2:49:04, 3.51it/s] 90%|█████████ | 335849/371472 [4:33:22<2:42:49, 3.65it/s] 90%|█████████ | 335850/371472 [4:33:23<2:41:28, 3.68it/s] 90%|█████████ | 335851/371472 [4:33:23<2:39:26, 3.72it/s] 90%|█████████ | 335852/371472 [4:33:23<2:40:04, 3.71it/s] 90%|█████████ | 335853/371472 [4:33:23<2:39:59, 3.71it/s] 90%|█████████ | 335854/371472 [4:33:24<2:42:00, 3.66it/s] 90%|█████████ | 335855/371472 [4:33:24<2:39:00, 3.73it/s] 90%|█████████ | 335856/371472 [4:33:24<2:37:37, 3.77it/s] 90%|█████████ | 335857/371472 [4:33:25<2:32:23, 3.90it/s] 90%|█████████ | 335858/371472 [4:33:25<2:49:40, 3.50it/s] 90%|█████████ | 335859/371472 [4:33:25<2:46:18, 3.57it/s] 90%|█████████ | 335860/371472 [4:33:25<2:42:00, 3.66it/s] {'loss': 2.5307, 'learning_rate': 1.8632700553771898e-07, 'epoch': 14.47} + 90%|█████████ | 335860/371472 [4:33:25<2:42:00, 3.66it/s] 90%|█████████ | 335861/371472 [4:33:26<2:46:34, 3.56it/s] 90%|█████████ | 335862/371472 [4:33:26<2:43:27, 3.63it/s] 90%|█████████ | 335863/371472 [4:33:26<2:45:02, 3.60it/s] 90%|█████████ | 335864/371472 [4:33:27<2:44:48, 3.60it/s] 90%|█████████ | 335865/371472 [4:33:27<2:56:21, 3.36it/s] 90%|█████████ | 335866/371472 [4:33:27<2:49:35, 3.50it/s] 90%|█████████ | 335867/371472 [4:33:27<2:50:19, 3.48it/s] 90%|█████████ | 335868/371472 [4:33:28<2:45:55, 3.58it/s] 90%|█████████ | 335869/371472 [4:33:28<2:57:33, 3.34it/s] 90%|█████████ | 335870/371472 [4:33:28<2:46:54, 3.55it/s] 90%|█████████ | 335871/371472 [4:33:28<2:42:24, 3.65it/s] 90%|█████████ | 335872/371472 [4:33:29<2:46:09, 3.57it/s] 90%|█████████ | 335873/371472 [4:33:29<2:49:50, 3.49it/s] 90%|█████████ | 335874/371472 [4:33:29<2:40:15, 3.70it/s] 90%|█████████ | 335875/371472 [4:33:30<2:39:55, 3.71it/s] 90%|█████████ | 335876/371472 [4:33:30<2:36:42, 3.79it/s] 90%|█████████ | 335877/371472 [4:33:30<2:43:18, 3.63it/s] 90%|█████████ | 335878/371472 [4:33:30<2:44:07, 3.61it/s] 90%|█████████ | 335879/371472 [4:33:31<2:41:51, 3.67it/s] 90%|█████████ | 335880/371472 [4:33:31<2:56:33, 3.36it/s] {'loss': 2.6742, 'learning_rate': 1.8627852356224002e-07, 'epoch': 14.47} + 90%|█████████ | 335880/371472 [4:33:31<2:56:33, 3.36it/s] 90%|█████████ | 335881/371472 [4:33:31<3:00:24, 3.29it/s] 90%|█████████ | 335882/371472 [4:33:32<2:56:02, 3.37it/s] 90%|█████████ | 335883/371472 [4:33:32<2:47:50, 3.53it/s] 90%|█████████ | 335884/371472 [4:33:32<2:41:51, 3.66it/s] 90%|█████████ | 335885/371472 [4:33:32<2:40:27, 3.70it/s] 90%|█████████ | 335886/371472 [4:33:33<2:42:15, 3.66it/s] 90%|█████████ | 335887/371472 [4:33:33<2:42:15, 3.66it/s] 90%|█████████ | 335888/371472 [4:33:33<2:36:24, 3.79it/s] 90%|█████████ | 335889/371472 [4:33:33<2:37:34, 3.76it/s] 90%|█████████ | 335890/371472 [4:33:34<2:35:21, 3.82it/s] 90%|█████████ | 335891/371472 [4:33:34<2:51:26, 3.46it/s] 90%|█████████ | 335892/371472 [4:33:34<2:49:16, 3.50it/s] 90%|█████████ | 335893/371472 [4:33:35<2:49:22, 3.50it/s] 90%|█████████ | 335894/371472 [4:33:35<2:42:38, 3.65it/s] 90%|█████████ | 335895/371472 [4:33:35<2:39:16, 3.72it/s] 90%|█████████ | 335896/371472 [4:33:35<2:38:40, 3.74it/s] 90%|█████████ | 335897/371472 [4:33:36<2:47:57, 3.53it/s] 90%|█████████ | 335898/371472 [4:33:36<2:57:35, 3.34it/s] 90%|█████████ | 335899/371472 [4:33:36<2:57:58, 3.33it/s] 90%|█████████ | 335900/371472 [4:33:37<2:58:29, 3.32it/s] {'loss': 2.6068, 'learning_rate': 1.862300415867612e-07, 'epoch': 14.47} + 90%|█████████ | 335900/371472 [4:33:37<2:58:29, 3.32it/s] 90%|█████████ | 335901/371472 [4:33:37<2:57:05, 3.35it/s] 90%|█████████ | 335902/371472 [4:33:37<2:53:28, 3.42it/s] 90%|█████████ | 335903/371472 [4:33:38<2:49:27, 3.50it/s] 90%|█████████ | 335904/371472 [4:33:38<2:41:32, 3.67it/s] 90%|█████████ | 335905/371472 [4:33:38<2:43:47, 3.62it/s] 90%|█████████ | 335906/371472 [4:33:38<2:37:29, 3.76it/s] 90%|█████████ | 335907/371472 [4:33:39<2:46:34, 3.56it/s] 90%|█████████ | 335908/371472 [4:33:39<2:44:24, 3.61it/s] 90%|█████████ | 335909/371472 [4:33:39<2:54:11, 3.40it/s] 90%|█████████ | 335910/371472 [4:33:40<2:57:48, 3.33it/s] 90%|█████████ | 335911/371472 [4:33:40<2:52:57, 3.43it/s] 90%|█████████ | 335912/371472 [4:33:40<2:46:49, 3.55it/s] 90%|█████████ | 335913/371472 [4:33:40<2:53:21, 3.42it/s] 90%|█████████ | 335914/371472 [4:33:41<2:56:05, 3.37it/s] 90%|█████████ | 335915/371472 [4:33:41<2:52:51, 3.43it/s] 90%|█████████ | 335916/371472 [4:33:41<2:45:46, 3.57it/s] 90%|█████████ | 335917/371472 [4:33:41<2:38:48, 3.73it/s] 90%|█████████ | 335918/371472 [4:33:42<2:47:58, 3.53it/s] 90%|█████████ | 335919/371472 [4:33:42<2:43:46, 3.62it/s] 90%|█████████ | 335920/371472 [4:33:42<2:41:41, 3.66it/s] {'loss': 2.4895, 'learning_rate': 1.8618155961128224e-07, 'epoch': 14.47} + 90%|█████████ | 335920/371472 [4:33:42<2:41:41, 3.66it/s] 90%|█████████ | 335921/371472 [4:33:43<2:40:27, 3.69it/s] 90%|█████████ | 335922/371472 [4:33:43<2:46:11, 3.57it/s] 90%|█████████ | 335923/371472 [4:33:43<2:40:37, 3.69it/s] 90%|█████████ | 335924/371472 [4:33:43<2:40:22, 3.69it/s] 90%|█████████ | 335925/371472 [4:33:44<2:44:49, 3.59it/s] 90%|█████████ | 335926/371472 [4:33:44<2:49:36, 3.49it/s] 90%|█████████ | 335927/371472 [4:33:44<2:46:47, 3.55it/s] 90%|█████████ | 335928/371472 [4:33:45<2:43:32, 3.62it/s] 90%|█████████ | 335929/371472 [4:33:45<2:40:13, 3.70it/s] 90%|█████████ | 335930/371472 [4:33:45<2:47:43, 3.53it/s] 90%|█████████ | 335931/371472 [4:33:45<2:46:08, 3.57it/s] 90%|█████████ | 335932/371472 [4:33:46<2:42:47, 3.64it/s] 90%|█████████ | 335933/371472 [4:33:46<2:40:10, 3.70it/s] 90%|█████████ | 335934/371472 [4:33:46<2:45:51, 3.57it/s] 90%|█████████ | 335935/371472 [4:33:46<2:42:10, 3.65it/s] 90%|█████████ | 335936/371472 [4:33:47<2:41:07, 3.68it/s] 90%|█████████ | 335937/371472 [4:33:47<2:37:25, 3.76it/s] 90%|█████████ | 335938/371472 [4:33:47<2:34:23, 3.84it/s] 90%|█████████ | 335939/371472 [4:33:47<2:35:40, 3.80it/s] 90%|█████████ | 335940/371472 [4:33:48<2:37:21, 3.76it/s] {'loss': 2.5679, 'learning_rate': 1.8613307763580342e-07, 'epoch': 14.47} + 90%|█████████ | 335940/371472 [4:33:48<2:37:21, 3.76it/s] 90%|█████████ | 335941/371472 [4:33:48<2:38:15, 3.74it/s] 90%|█████████ | 335942/371472 [4:33:48<2:44:33, 3.60it/s] 90%|█████████ | 335943/371472 [4:33:49<2:45:41, 3.57it/s] 90%|█████████ | 335944/371472 [4:33:49<2:45:07, 3.59it/s] 90%|█████████ | 335945/371472 [4:33:49<2:39:55, 3.70it/s] 90%|█████████ | 335946/371472 [4:33:49<2:40:15, 3.69it/s] 90%|█████████ | 335947/371472 [4:33:50<2:40:03, 3.70it/s] 90%|█████████ | 335948/371472 [4:33:50<2:39:35, 3.71it/s] 90%|█████████ | 335949/371472 [4:33:50<2:56:51, 3.35it/s] 90%|█████████ | 335950/371472 [4:33:51<2:49:57, 3.48it/s] 90%|█████████ | 335951/371472 [4:33:51<2:44:42, 3.59it/s] 90%|█████████ | 335952/371472 [4:33:51<2:42:14, 3.65it/s] 90%|█████████ | 335953/371472 [4:33:51<2:46:58, 3.55it/s] 90%|█████████ | 335954/371472 [4:33:52<2:48:45, 3.51it/s] 90%|█████████ | 335955/371472 [4:33:52<2:46:47, 3.55it/s] 90%|█████████ | 335956/371472 [4:33:52<2:42:35, 3.64it/s] 90%|█████████ | 335957/371472 [4:33:52<2:38:10, 3.74it/s] 90%|█████████ | 335958/371472 [4:33:53<2:36:25, 3.78it/s] 90%|█████████ | 335959/371472 [4:33:53<2:32:21, 3.88it/s] 90%|█████████ | 335960/371472 [4:33:53<2:33:23, 3.86it/s] {'loss': 2.7321, 'learning_rate': 1.8608459566032447e-07, 'epoch': 14.47} + 90%|█████████ | 335960/371472 [4:33:53<2:33:23, 3.86it/s] 90%|█████████ | 335961/371472 [4:33:54<2:38:39, 3.73it/s] 90%|█████████ | 335962/371472 [4:33:54<2:41:58, 3.65it/s] 90%|█████████ | 335963/371472 [4:33:54<2:41:46, 3.66it/s] 90%|█████████ | 335964/371472 [4:33:54<2:46:40, 3.55it/s] 90%|█████████ | 335965/371472 [4:33:55<2:55:43, 3.37it/s] 90%|█████████ | 335966/371472 [4:33:55<2:55:19, 3.38it/s] 90%|█████████ | 335967/371472 [4:33:55<2:52:18, 3.43it/s] 90%|█████████ | 335968/371472 [4:33:56<2:45:55, 3.57it/s] 90%|█████████ | 335969/371472 [4:33:56<2:41:16, 3.67it/s] 90%|█████████ | 335970/371472 [4:33:56<2:37:10, 3.76it/s] 90%|█████████ | 335971/371472 [4:33:56<2:42:59, 3.63it/s] 90%|█████████ | 335972/371472 [4:33:57<2:42:18, 3.65it/s] 90%|█████████ | 335973/371472 [4:33:57<2:45:24, 3.58it/s] 90%|█████████ | 335974/371472 [4:33:57<2:57:51, 3.33it/s] 90%|█████████ | 335975/371472 [4:33:58<2:53:59, 3.40it/s] 90%|█████████ | 335976/371472 [4:33:58<2:46:51, 3.55it/s] 90%|█████████ | 335977/371472 [4:33:58<2:54:08, 3.40it/s] 90%|█████████ | 335978/371472 [4:33:58<2:50:21, 3.47it/s] 90%|█████████ | 335979/371472 [4:33:59<2:48:06, 3.52it/s] 90%|█████████ | 335980/371472 [4:33:59<2:45:29, 3.57it/s] {'loss': 2.5233, 'learning_rate': 1.8603611368484562e-07, 'epoch': 14.47} + 90%|█████████ | 335980/371472 [4:33:59<2:45:29, 3.57it/s] 90%|█████████ | 335981/371472 [4:33:59<2:48:39, 3.51it/s] 90%|█████████ | 335982/371472 [4:33:59<2:41:57, 3.65it/s] 90%|█████████ | 335983/371472 [4:34:00<2:40:28, 3.69it/s] 90%|█████████ | 335984/371472 [4:34:00<2:36:39, 3.78it/s] 90%|█████████ | 335985/371472 [4:34:00<2:45:26, 3.57it/s] 90%|█████████ | 335986/371472 [4:34:01<2:46:16, 3.56it/s] 90%|█████████ | 335987/371472 [4:34:01<2:48:02, 3.52it/s] 90%|█████████ | 335988/371472 [4:34:01<2:39:55, 3.70it/s] 90%|█████████ | 335989/371472 [4:34:01<2:37:21, 3.76it/s] 90%|█████████ | 335990/371472 [4:34:02<2:47:17, 3.54it/s] 90%|█████████ | 335991/371472 [4:34:02<2:50:08, 3.48it/s] 90%|█████████ | 335992/371472 [4:34:02<2:55:02, 3.38it/s] 90%|█████████ | 335993/371472 [4:34:03<2:45:45, 3.57it/s] 90%|█████████ | 335994/371472 [4:34:03<2:49:42, 3.48it/s] 90%|█████████ | 335995/371472 [4:34:03<2:44:39, 3.59it/s] 90%|█████████ | 335996/371472 [4:34:03<2:50:23, 3.47it/s] 90%|█████████ | 335997/371472 [4:34:04<2:54:59, 3.38it/s] 90%|█████████ | 335998/371472 [4:34:04<2:46:16, 3.56it/s] 90%|█████████ | 335999/371472 [4:34:04<2:46:55, 3.54it/s] 90%|█████████ | 336000/371472 [4:34:05<2:44:45, 3.59it/s] {'loss': 2.6011, 'learning_rate': 1.8598763170936666e-07, 'epoch': 14.47} + 90%|█████████ | 336000/371472 [4:34:05<2:44:45, 3.59it/s] 90%|█████████ | 336001/371472 [4:34:05<2:45:41, 3.57it/s] 90%|█████████ | 336002/371472 [4:34:05<2:50:00, 3.48it/s] 90%|█████████ | 336003/371472 [4:34:05<2:43:31, 3.61it/s] 90%|█████████ | 336004/371472 [4:34:06<2:40:55, 3.67it/s] 90%|█████████ | 336005/371472 [4:34:06<2:42:12, 3.64it/s] 90%|█████████ | 336006/371472 [4:34:06<2:53:53, 3.40it/s] 90%|█████████ | 336007/371472 [4:34:07<2:51:35, 3.44it/s] 90%|█████████ | 336008/371472 [4:34:07<2:45:43, 3.57it/s] 90%|█████████ | 336009/371472 [4:34:07<2:45:58, 3.56it/s] 90%|█████████ | 336010/371472 [4:34:07<2:57:39, 3.33it/s] 90%|█████████ | 336011/371472 [4:34:08<2:51:30, 3.45it/s] 90%|█████████ | 336012/371472 [4:34:08<2:43:13, 3.62it/s] 90%|█████████ | 336013/371472 [4:34:08<2:53:40, 3.40it/s] 90%|█████████ | 336014/371472 [4:34:09<2:48:19, 3.51it/s] 90%|█████████ | 336015/371472 [4:34:09<3:04:26, 3.20it/s] 90%|█████████ | 336016/371472 [4:34:09<2:59:11, 3.30it/s] 90%|█████████ | 336017/371472 [4:34:09<2:57:24, 3.33it/s] 90%|█████████ | 336018/371472 [4:34:10<2:48:34, 3.51it/s] 90%|█████████ | 336019/371472 [4:34:10<2:47:51, 3.52it/s] 90%|█████████ | 336020/371472 [4:34:10<2:46:04, 3.56it/s] {'loss': 2.6345, 'learning_rate': 1.8593914973388784e-07, 'epoch': 14.47} + 90%|█████████ | 336020/371472 [4:34:10<2:46:04, 3.56it/s] 90%|█████████ | 336021/371472 [4:34:11<2:44:59, 3.58it/s] 90%|█████████ | 336022/371472 [4:34:11<2:39:30, 3.70it/s] 90%|█████████ | 336023/371472 [4:34:11<2:37:29, 3.75it/s] 90%|█████████ | 336024/371472 [4:34:11<2:33:59, 3.84it/s] 90%|█████████ | 336025/371472 [4:34:12<2:43:15, 3.62it/s] 90%|█████████ | 336026/371472 [4:34:12<2:39:27, 3.70it/s] 90%|█████████ | 336027/371472 [4:34:12<2:36:35, 3.77it/s] 90%|█████████ | 336028/371472 [4:34:12<2:34:19, 3.83it/s] 90%|█████████ | 336029/371472 [4:34:13<2:39:35, 3.70it/s] 90%|█████████ | 336030/371472 [4:34:13<2:43:41, 3.61it/s] 90%|█████████ | 336031/371472 [4:34:13<2:45:12, 3.58it/s] 90%|█████████ | 336032/371472 [4:34:14<2:40:10, 3.69it/s] 90%|█████████ | 336033/371472 [4:34:14<2:41:34, 3.66it/s] 90%|█████████ | 336034/371472 [4:34:14<2:47:16, 3.53it/s] 90%|█████████ | 336035/371472 [4:34:14<2:46:58, 3.54it/s] 90%|█████████ | 336036/371472 [4:34:15<2:40:13, 3.69it/s] 90%|█████████ | 336037/371472 [4:34:15<2:59:30, 3.29it/s] 90%|█████████ | 336038/371472 [4:34:15<3:04:51, 3.19it/s] 90%|█████████ | 336039/371472 [4:34:16<3:07:50, 3.14it/s] 90%|█████████ | 336040/371472 [4:34:16<2:52:53, 3.42it/s] {'loss': 2.3949, 'learning_rate': 1.8589066775840889e-07, 'epoch': 14.47} + 90%|█████████ | 336040/371472 [4:34:16<2:52:53, 3.42it/s] 90%|█████████ | 336041/371472 [4:34:16<2:48:08, 3.51it/s] 90%|█████████ | 336042/371472 [4:34:16<2:53:10, 3.41it/s] 90%|█████████ | 336043/371472 [4:34:17<2:48:56, 3.50it/s] 90%|█████████ | 336044/371472 [4:34:17<2:49:30, 3.48it/s] 90%|█████████ | 336045/371472 [4:34:17<2:44:47, 3.58it/s] 90%|█████████ | 336046/371472 [4:34:18<2:39:28, 3.70it/s] 90%|█████████ | 336047/371472 [4:34:18<2:49:18, 3.49it/s] 90%|█████████ | 336048/371472 [4:34:18<2:41:30, 3.66it/s] 90%|█████████ | 336049/371472 [4:34:18<2:34:24, 3.82it/s] 90%|█████████ | 336050/371472 [4:34:19<2:38:47, 3.72it/s] 90%|█████████ | 336051/371472 [4:34:19<2:36:23, 3.77it/s] 90%|█████████ | 336052/371472 [4:34:19<2:35:35, 3.79it/s] 90%|█████████ | 336053/371472 [4:34:19<2:35:05, 3.81it/s] 90%|█████████ | 336054/371472 [4:34:20<2:41:07, 3.66it/s] 90%|█████████ | 336055/371472 [4:34:20<2:42:27, 3.63it/s] 90%|█████████ | 336056/371472 [4:34:20<2:41:43, 3.65it/s] 90%|█████████ | 336057/371472 [4:34:21<2:38:14, 3.73it/s] 90%|█████████ | 336058/371472 [4:34:21<2:43:36, 3.61it/s] 90%|█████████ | 336059/371472 [4:34:21<2:46:47, 3.54it/s] 90%|█████████ | 336060/371472 [4:34:21<2:42:06, 3.64it/s] {'loss': 2.6121, 'learning_rate': 1.8584218578293004e-07, 'epoch': 14.47} + 90%|█████████ | 336060/371472 [4:34:21<2:42:06, 3.64it/s] 90%|█████████ | 336061/371472 [4:34:22<2:38:45, 3.72it/s] 90%|█████████ | 336062/371472 [4:34:22<2:42:05, 3.64it/s] 90%|█████████ | 336063/371472 [4:34:22<2:42:06, 3.64it/s] 90%|█████████ | 336064/371472 [4:34:22<2:40:38, 3.67it/s] 90%|█████████ | 336065/371472 [4:34:23<2:41:08, 3.66it/s] 90%|█████████ | 336066/371472 [4:34:23<2:40:24, 3.68it/s] 90%|█████████ | 336067/371472 [4:34:23<2:38:28, 3.72it/s] 90%|█████████ | 336068/371472 [4:34:24<2:43:06, 3.62it/s] 90%|█████████ | 336069/371472 [4:34:24<2:45:26, 3.57it/s] 90%|█████████ | 336070/371472 [4:34:24<2:43:25, 3.61it/s] 90%|█████████ | 336071/371472 [4:34:24<2:42:38, 3.63it/s] 90%|█████████ | 336072/371472 [4:34:25<2:36:19, 3.77it/s] 90%|█████████ | 336073/371472 [4:34:25<2:36:53, 3.76it/s] 90%|█████████ | 336074/371472 [4:34:25<2:36:33, 3.77it/s] 90%|█████████ | 336075/371472 [4:34:25<2:35:08, 3.80it/s] 90%|█████████ | 336076/371472 [4:34:26<2:57:20, 3.33it/s] 90%|█████████ | 336077/371472 [4:34:26<2:53:29, 3.40it/s] 90%|█████████ | 336078/371472 [4:34:26<2:47:25, 3.52it/s] 90%|█████████ | 336079/371472 [4:34:27<2:47:00, 3.53it/s] 90%|█████████ | 336080/371472 [4:34:27<2:44:47, 3.58it/s] {'loss': 2.6152, 'learning_rate': 1.857937038074511e-07, 'epoch': 14.48} + 90%|█████████ | 336080/371472 [4:34:27<2:44:47, 3.58it/s] 90%|█████████ | 336081/371472 [4:34:27<2:41:51, 3.64it/s] 90%|█████████ | 336082/371472 [4:34:28<2:58:15, 3.31it/s] 90%|█████████ | 336083/371472 [4:34:28<3:02:11, 3.24it/s] 90%|█████████ | 336084/371472 [4:34:28<3:14:30, 3.03it/s] 90%|█████████ | 336085/371472 [4:34:29<3:07:07, 3.15it/s] 90%|█████████ | 336086/371472 [4:34:29<2:57:39, 3.32it/s] 90%|█████████ | 336087/371472 [4:34:29<2:49:22, 3.48it/s] 90%|█████████ | 336088/371472 [4:34:29<2:42:42, 3.62it/s] 90%|█████████ | 336089/371472 [4:34:30<2:41:44, 3.65it/s] 90%|█████████ | 336090/371472 [4:34:30<2:37:05, 3.75it/s] 90%|█████████ | 336091/371472 [4:34:30<2:35:11, 3.80it/s] 90%|█████████ | 336092/371472 [4:34:30<2:44:03, 3.59it/s] 90%|█████████ | 336093/371472 [4:34:31<2:43:49, 3.60it/s] 90%|█████████ | 336094/371472 [4:34:31<2:52:18, 3.42it/s] 90%|█████████ | 336095/371472 [4:34:31<2:47:23, 3.52it/s] 90%|█████████ | 336096/371472 [4:34:31<2:39:59, 3.69it/s] 90%|█████████ | 336097/371472 [4:34:32<2:37:51, 3.73it/s] 90%|█████████ | 336098/371472 [4:34:32<2:41:20, 3.65it/s] 90%|█████████ | 336099/371472 [4:34:32<2:37:02, 3.75it/s] 90%|█████████ | 336100/371472 [4:34:33<2:37:34, 3.74it/s] {'loss': 2.5647, 'learning_rate': 1.8574522183197226e-07, 'epoch': 14.48} + 90%|█████████ | 336100/371472 [4:34:33<2:37:34, 3.74it/s] 90%|█████████ | 336101/371472 [4:34:33<2:37:02, 3.75it/s] 90%|█████████ | 336102/371472 [4:34:33<2:37:34, 3.74it/s] 90%|█████████ | 336103/371472 [4:34:33<2:34:04, 3.83it/s] 90%|█████████ | 336104/371472 [4:34:34<2:37:17, 3.75it/s] 90%|█████████ | 336105/371472 [4:34:34<2:32:21, 3.87it/s] 90%|█████████ | 336106/371472 [4:34:34<2:30:18, 3.92it/s] 90%|█████████ | 336107/371472 [4:34:34<2:29:20, 3.95it/s] 90%|█████████ | 336108/371472 [4:34:35<2:32:15, 3.87it/s] 90%|█████████ | 336109/371472 [4:34:35<2:27:32, 3.99it/s] 90%|█████████ | 336110/371472 [4:34:35<2:25:20, 4.06it/s] 90%|█████████ | 336111/371472 [4:34:35<2:27:32, 3.99it/s] 90%|█████████ | 336112/371472 [4:34:36<2:23:43, 4.10it/s] 90%|█████████ | 336113/371472 [4:34:36<2:23:45, 4.10it/s] 90%|█████████ | 336114/371472 [4:34:36<2:30:19, 3.92it/s] 90%|█████████ | 336115/371472 [4:34:36<2:35:05, 3.80it/s] 90%|█████████ | 336116/371472 [4:34:37<2:34:36, 3.81it/s] 90%|█████████ | 336117/371472 [4:34:37<2:34:55, 3.80it/s] 90%|█████████ | 336118/371472 [4:34:37<2:44:59, 3.57it/s] 90%|█████████ | 336119/371472 [4:34:38<2:50:19, 3.46it/s] 90%|█████████ | 336120/371472 [4:34:38<2:51:01, 3.45it/s] {'loss': 2.7412, 'learning_rate': 1.856967398564933e-07, 'epoch': 14.48} + 90%|█████████ | 336120/371472 [4:34:38<2:51:01, 3.45it/s] 90%|█████████ | 336121/371472 [4:34:38<2:58:25, 3.30it/s] 90%|█████████ | 336122/371472 [4:34:38<2:55:15, 3.36it/s] 90%|█████████ | 336123/371472 [4:34:39<2:52:51, 3.41it/s] 90%|█████████ | 336124/371472 [4:34:39<2:58:17, 3.30it/s] 90%|█████████ | 336125/371472 [4:34:39<2:48:09, 3.50it/s] 90%|█████████ | 336126/371472 [4:34:40<2:42:58, 3.61it/s] 90%|█████████ | 336127/371472 [4:34:40<2:52:07, 3.42it/s] 90%|█████████ | 336128/371472 [4:34:40<2:46:00, 3.55it/s] 90%|█████████ | 336129/371472 [4:34:40<2:42:45, 3.62it/s] 90%|█████████ | 336130/371472 [4:34:41<2:43:12, 3.61it/s] 90%|█████████ | 336131/371472 [4:34:41<3:08:42, 3.12it/s] 90%|█████████ | 336132/371472 [4:34:41<3:03:53, 3.20it/s] 90%|█████████ | 336133/371472 [4:34:42<2:57:34, 3.32it/s] 90%|█████████ | 336134/371472 [4:34:42<3:04:34, 3.19it/s] 90%|█████████ | 336135/371472 [4:34:42<2:56:09, 3.34it/s] 90%|█████████ | 336136/371472 [4:34:43<3:02:59, 3.22it/s] 90%|█████████ | 336137/371472 [4:34:43<2:50:40, 3.45it/s] 90%|█████████ | 336138/371472 [4:34:43<2:51:34, 3.43it/s] 90%|█████████ | 336139/371472 [4:34:44<3:17:15, 2.99it/s] 90%|█████████ | 336140/371472 [4:34:44<3:08:55, 3.12it/s] {'loss': 2.502, 'learning_rate': 1.8564825788101448e-07, 'epoch': 14.48} + 90%|█████████ | 336140/371472 [4:34:44<3:08:55, 3.12it/s] 90%|█████████ | 336141/371472 [4:34:44<3:03:16, 3.21it/s] 90%|█████████ | 336142/371472 [4:34:44<2:55:18, 3.36it/s] 90%|█████████ | 336143/371472 [4:34:45<3:00:40, 3.26it/s] 90%|█████████ | 336144/371472 [4:34:45<3:18:10, 2.97it/s] 90%|█████████ | 336145/371472 [4:34:45<3:06:53, 3.15it/s] 90%|█████████ | 336146/371472 [4:34:46<2:54:31, 3.37it/s] 90%|█████████ | 336147/371472 [4:34:46<2:48:12, 3.50it/s] 90%|█████████ | 336148/371472 [4:34:46<2:50:48, 3.45it/s] 90%|█████████ | 336149/371472 [4:34:47<2:59:55, 3.27it/s] 90%|█████████ | 336150/371472 [4:34:47<3:16:30, 3.00it/s] 90%|█████████ | 336151/371472 [4:34:47<3:03:15, 3.21it/s] 90%|█████████ | 336152/371472 [4:34:48<3:03:52, 3.20it/s] 90%|█████████ | 336153/371472 [4:34:48<3:01:39, 3.24it/s] 90%|█████████ | 336154/371472 [4:34:48<3:09:20, 3.11it/s] 90%|█████████ | 336155/371472 [4:34:49<3:08:30, 3.12it/s] 90%|█████████ | 336156/371472 [4:34:49<3:02:46, 3.22it/s] 90%|█████████ | 336157/371472 [4:34:49<2:54:40, 3.37it/s] 90%|█████████ | 336158/371472 [4:34:49<2:54:46, 3.37it/s] 90%|█████████ | 336159/371472 [4:34:50<3:06:49, 3.15it/s] 90%|█████████ | 336160/371472 [4:34:50<2:59:13, 3.28it/s] {'loss': 2.5522, 'learning_rate': 1.8559977590553555e-07, 'epoch': 14.48} + 90%|█████████ | 336160/371472 [4:34:50<2:59:13, 3.28it/s] 90%|█████████ | 336161/371472 [4:34:50<2:58:10, 3.30it/s] 90%|█████████ | 336162/371472 [4:34:51<2:48:31, 3.49it/s] 90%|█████████ | 336163/371472 [4:34:51<2:42:04, 3.63it/s] 90%|█████████ | 336164/371472 [4:34:51<2:47:13, 3.52it/s] 90%|█████████ | 336165/371472 [4:34:51<2:46:02, 3.54it/s] 90%|█████████ | 336166/371472 [4:34:52<2:39:49, 3.68it/s] 90%|█████████ | 336167/371472 [4:34:52<2:40:15, 3.67it/s] 90%|█████████ | 336168/371472 [4:34:52<2:39:00, 3.70it/s] 90%|█████████ | 336169/371472 [4:34:52<2:46:23, 3.54it/s] 90%|█████████ | 336170/371472 [4:34:53<2:45:42, 3.55it/s] 90%|█████████ | 336171/371472 [4:34:53<2:45:58, 3.54it/s] 90%|█████████ | 336172/371472 [4:34:53<2:50:55, 3.44it/s] 90%|█████████ | 336173/371472 [4:34:54<2:48:46, 3.49it/s] 90%|█████████ | 336174/371472 [4:34:54<2:48:35, 3.49it/s] 90%|█████████ | 336175/371472 [4:34:54<2:52:04, 3.42it/s] 90%|█████████ | 336176/371472 [4:34:55<2:55:50, 3.35it/s] 90%|█████████ | 336177/371472 [4:34:55<3:06:18, 3.16it/s] 90%|██████���██ | 336178/371472 [4:34:55<3:03:28, 3.21it/s] 90%|█████████ | 336179/371472 [4:34:55<2:58:11, 3.30it/s] 90%|█████████ | 336180/371472 [4:34:56<2:48:29, 3.49it/s] {'loss': 2.5612, 'learning_rate': 1.8555129393005668e-07, 'epoch': 14.48} + 90%|█████████ | 336180/371472 [4:34:56<2:48:29, 3.49it/s] 90%|█████████ | 336181/371472 [4:34:56<2:58:26, 3.30it/s] 90%|█████████ | 336182/371472 [4:34:56<2:51:14, 3.43it/s] 91%|█████████ | 336183/371472 [4:34:57<2:49:22, 3.47it/s] 91%|█████████ | 336184/371472 [4:34:57<2:54:07, 3.38it/s] 91%|█████████ | 336185/371472 [4:34:57<2:54:14, 3.38it/s] 91%|█████████ | 336186/371472 [4:34:57<2:45:12, 3.56it/s] 91%|█████████ | 336187/371472 [4:34:58<2:49:58, 3.46it/s] 91%|█████████ | 336188/371472 [4:34:58<2:47:36, 3.51it/s] 91%|█████████ | 336189/371472 [4:34:58<2:40:01, 3.67it/s] 91%|█████████ | 336190/371472 [4:34:59<2:41:30, 3.64it/s] 91%|█████████ | 336191/371472 [4:34:59<2:42:21, 3.62it/s] 91%|█████████ | 336192/371472 [4:34:59<2:58:56, 3.29it/s] 91%|█████████ | 336193/371472 [4:34:59<2:49:45, 3.46it/s] 91%|█████████ | 336194/371472 [4:35:00<2:52:26, 3.41it/s] 91%|█████████ | 336195/371472 [4:35:00<2:47:54, 3.50it/s] 91%|█████████ | 336196/371472 [4:35:00<2:41:06, 3.65it/s] 91%|█████████ | 336197/371472 [4:35:01<2:39:40, 3.68it/s] 91%|█████████ | 336198/371472 [4:35:01<2:50:12, 3.45it/s] 91%|█████████ | 336199/371472 [4:35:01<2:48:57, 3.48it/s] 91%|█████████ | 336200/371472 [4:35:01<2:45:20, 3.56it/s] {'loss': 2.5573, 'learning_rate': 1.8550281195457775e-07, 'epoch': 14.48} + 91%|█████████ | 336200/371472 [4:35:01<2:45:20, 3.56it/s] 91%|█████████ | 336201/371472 [4:35:02<2:44:54, 3.56it/s] 91%|█████████ | 336202/371472 [4:35:02<2:42:59, 3.61it/s] 91%|█████████ | 336203/371472 [4:35:02<2:47:00, 3.52it/s] 91%|█████████ | 336204/371472 [4:35:03<2:42:46, 3.61it/s] 91%|█████████ | 336205/371472 [4:35:03<2:44:05, 3.58it/s] 91%|█████████ | 336206/371472 [4:35:03<2:39:00, 3.70it/s] 91%|█████████ | 336207/371472 [4:35:03<2:42:43, 3.61it/s] 91%|█████████ | 336208/371472 [4:35:04<2:41:30, 3.64it/s] 91%|█████████ | 336209/371472 [4:35:04<2:43:52, 3.59it/s] 91%|█████████ | 336210/371472 [4:35:04<2:54:41, 3.36it/s] 91%|█████████ | 336211/371472 [4:35:05<2:45:14, 3.56it/s] 91%|█████████ | 336212/371472 [4:35:05<2:37:30, 3.73it/s] 91%|█████████ | 336213/371472 [4:35:05<2:36:57, 3.74it/s] 91%|█████████ | 336214/371472 [4:35:05<2:35:40, 3.77it/s] 91%|█████████ | 336215/371472 [4:35:06<2:31:49, 3.87it/s] 91%|█████████ | 336216/371472 [4:35:06<2:27:36, 3.98it/s] 91%|█████████ | 336217/371472 [4:35:06<2:34:47, 3.80it/s] 91%|█████████ | 336218/371472 [4:35:06<3:00:07, 3.26it/s] 91%|█████████ | 336219/371472 [4:35:07<3:08:13, 3.12it/s] 91%|█████████ | 336220/371472 [4:35:07<3:06:55, 3.14it/s] {'loss': 2.5753, 'learning_rate': 1.8545432997909893e-07, 'epoch': 14.48} + 91%|█████████ | 336220/371472 [4:35:07<3:06:55, 3.14it/s] 91%|█████████ | 336221/371472 [4:35:07<2:56:36, 3.33it/s] 91%|█████████ | 336222/371472 [4:35:08<2:47:57, 3.50it/s] 91%|█████████ | 336223/371472 [4:35:08<2:37:50, 3.72it/s] 91%|█████████ | 336224/371472 [4:35:08<2:34:13, 3.81it/s] 91%|█████████ | 336225/371472 [4:35:08<2:32:51, 3.84it/s] 91%|█████████ | 336226/371472 [4:35:09<2:38:42, 3.70it/s] 91%|█████████ | 336227/371472 [4:35:09<2:33:13, 3.83it/s] 91%|█████████ | 336228/371472 [4:35:09<2:39:32, 3.68it/s] 91%|█████████ | 336229/371472 [4:35:10<2:42:16, 3.62it/s] 91%|█████████ | 336230/371472 [4:35:10<2:42:43, 3.61it/s] 91%|█████████ | 336231/371472 [4:35:10<2:36:55, 3.74it/s] 91%|█████████ | 336232/371472 [4:35:10<2:39:34, 3.68it/s] 91%|█████████ | 336233/371472 [4:35:11<2:38:37, 3.70it/s] 91%|█████████ | 336234/371472 [4:35:11<2:37:11, 3.74it/s] 91%|█████████ | 336235/371472 [4:35:11<2:39:16, 3.69it/s] 91%|█████████ | 336236/371472 [4:35:11<2:51:17, 3.43it/s] 91%|█████████ | 336237/371472 [4:35:12<2:46:00, 3.54it/s] 91%|█████████ | 336238/371472 [4:35:12<2:42:08, 3.62it/s] 91%|█████████ | 336239/371472 [4:35:12<2:35:14, 3.78it/s] 91%|█████████ | 336240/371472 [4:35:12<2:39:01, 3.69it/s] {'loss': 2.5301, 'learning_rate': 1.8540584800361997e-07, 'epoch': 14.48} + 91%|█████████ | 336240/371472 [4:35:12<2:39:01, 3.69it/s] 91%|█████████ | 336241/371472 [4:35:13<2:42:03, 3.62it/s] 91%|█████████ | 336242/371472 [4:35:13<2:38:21, 3.71it/s] 91%|█████████ | 336243/371472 [4:35:13<2:42:26, 3.61it/s] 91%|█████████ | 336244/371472 [4:35:14<2:44:35, 3.57it/s] 91%|█████████ | 336245/371472 [4:35:14<2:43:09, 3.60it/s] 91%|█████████ | 336246/371472 [4:35:14<2:37:52, 3.72it/s] 91%|█████████ | 336247/371472 [4:35:14<2:37:50, 3.72it/s] 91%|█████████ | 336248/371472 [4:35:15<2:38:31, 3.70it/s] 91%|█████████ | 336249/371472 [4:35:15<2:47:55, 3.50it/s] 91%|█████████ | 336250/371472 [4:35:15<2:42:28, 3.61it/s] 91%|█████████ | 336251/371472 [4:35:16<2:40:46, 3.65it/s] 91%|█████████ | 336252/371472 [4:35:16<2:35:28, 3.78it/s] 91%|█████████ | 336253/371472 [4:35:16<2:44:11, 3.58it/s] 91%|█████████ | 336254/371472 [4:35:16<2:46:49, 3.52it/s] 91%|█████████ | 336255/371472 [4:35:17<2:48:24, 3.49it/s] 91%|█████████ | 336256/371472 [4:35:17<2:46:14, 3.53it/s] 91%|█████████ | 336257/371472 [4:35:17<2:37:34, 3.72it/s] 91%|█████████ | 336258/371472 [4:35:17<2:34:28, 3.80it/s] 91%|█████████ | 336259/371472 [4:35:18<2:32:00, 3.86it/s] 91%|█████████ | 336260/371472 [4:35:18<2:36:43, 3.74it/s] {'loss': 2.696, 'learning_rate': 1.8535736602814112e-07, 'epoch': 14.48} + 91%|█████████ | 336260/371472 [4:35:18<2:36:43, 3.74it/s] 91%|█████████ | 336261/371472 [4:35:18<2:38:30, 3.70it/s] 91%|█████████ | 336262/371472 [4:35:19<2:42:09, 3.62it/s] 91%|█████████ | 336263/371472 [4:35:19<2:38:53, 3.69it/s] 91%|█████████ | 336264/371472 [4:35:19<2:36:07, 3.76it/s] 91%|█████████ | 336265/371472 [4:35:19<2:36:39, 3.75it/s] 91%|█████████ | 336266/371472 [4:35:20<2:38:49, 3.69it/s] 91%|█████████ | 336267/371472 [4:35:20<2:36:27, 3.75it/s] 91%|█████████ | 336268/371472 [4:35:20<3:18:25, 2.96it/s] 91%|█████████ | 336269/371472 [4:35:21<3:01:47, 3.23it/s] 91%|█████████ | 336270/371472 [4:35:21<3:00:34, 3.25it/s] 91%|█████████ | 336271/371472 [4:35:21<2:59:48, 3.26it/s] 91%|█████████ | 336272/371472 [4:35:22<3:12:28, 3.05it/s] 91%|█████████ | 336273/371472 [4:35:22<3:01:00, 3.24it/s] 91%|█████████ | 336274/371472 [4:35:22<3:04:50, 3.17it/s] 91%|█████████ | 336275/371472 [4:35:22<2:55:51, 3.34it/s] 91%|█████████ | 336276/371472 [4:35:23<2:53:38, 3.38it/s] 91%|█████████ | 336277/371472 [4:35:23<2:44:42, 3.56it/s] 91%|█████████ | 336278/371472 [4:35:23<2:43:24, 3.59it/s] 91%|█████████ | 336279/371472 [4:35:24<2:43:19, 3.59it/s] 91%|█████████ | 336280/371472 [4:35:24<2:43:23, 3.59it/s] {'loss': 2.6997, 'learning_rate': 1.853088840526622e-07, 'epoch': 14.48} + 91%|█████████ | 336280/371472 [4:35:24<2:43:23, 3.59it/s] 91%|█████████ | 336281/371472 [4:35:24<2:40:14, 3.66it/s] 91%|█████████ | 336282/371472 [4:35:24<2:45:15, 3.55it/s] 91%|█████████ | 336283/371472 [4:35:25<2:53:34, 3.38it/s] 91%|█████████ | 336284/371472 [4:35:25<2:49:17, 3.46it/s] 91%|█████████ | 336285/371472 [4:35:25<2:40:08, 3.66it/s] 91%|█████████ | 336286/371472 [4:35:25<2:33:57, 3.81it/s] 91%|█████████ | 336287/371472 [4:35:26<2:42:23, 3.61it/s] 91%|█████████ | 336288/371472 [4:35:26<2:44:06, 3.57it/s] 91%|█████████ | 336289/371472 [4:35:26<2:43:02, 3.60it/s] 91%|█████████ | 336290/371472 [4:35:27<2:43:52, 3.58it/s] 91%|█████████ | 336291/371472 [4:35:27<2:50:02, 3.45it/s] 91%|█████████ | 336292/371472 [4:35:27<3:05:20, 3.16it/s] 91%|██���██████ | 336293/371472 [4:35:28<2:53:59, 3.37it/s] 91%|█████████ | 336294/371472 [4:35:28<2:47:13, 3.51it/s] 91%|█████████ | 336295/371472 [4:35:28<2:45:55, 3.53it/s] 91%|█████████ | 336296/371472 [4:35:28<2:43:14, 3.59it/s] 91%|█████████ | 336297/371472 [4:35:29<2:36:18, 3.75it/s] 91%|█████████ | 336298/371472 [4:35:29<2:40:11, 3.66it/s] 91%|█████████ | 336299/371472 [4:35:29<2:43:15, 3.59it/s] 91%|█████████ | 336300/371472 [4:35:29<2:39:05, 3.68it/s] {'loss': 2.5177, 'learning_rate': 1.8526040207718334e-07, 'epoch': 14.49} + 91%|█████████ | 336300/371472 [4:35:29<2:39:05, 3.68it/s] 91%|█████████ | 336301/371472 [4:35:30<2:58:20, 3.29it/s] 91%|█████████ | 336302/371472 [4:35:30<2:51:31, 3.42it/s] 91%|█████████ | 336303/371472 [4:35:30<2:48:45, 3.47it/s] 91%|█████████ | 336304/371472 [4:35:31<2:50:46, 3.43it/s] 91%|█████████ | 336305/371472 [4:35:31<2:47:30, 3.50it/s] 91%|█████████ | 336306/371472 [4:35:31<2:55:31, 3.34it/s] 91%|█████████ | 336307/371472 [4:35:32<2:48:16, 3.48it/s] 91%|█████████ | 336308/371472 [4:35:32<2:41:51, 3.62it/s] 91%|█████████ | 336309/371472 [4:35:32<2:33:03, 3.83it/s] 91%|█████████ | 336310/371472 [4:35:32<2:49:39, 3.45it/s] 91%|█████████ | 336311/371472 [4:35:33<2:50:30, 3.44it/s] 91%|█████████ | 336312/371472 [4:35:33<2:43:04, 3.59it/s] 91%|█████████ | 336313/371472 [4:35:33<2:58:46, 3.28it/s] 91%|█████████ | 336314/371472 [4:35:34<2:57:28, 3.30it/s] 91%|█████████ | 336315/371472 [4:35:34<3:01:09, 3.23it/s] 91%|█████████ | 336316/371472 [4:35:34<3:00:20, 3.25it/s] 91%|█████████ | 336317/371472 [4:35:34<2:57:47, 3.30it/s] 91%|█████████ | 336318/371472 [4:35:35<2:49:46, 3.45it/s] 91%|█████████ | 336319/371472 [4:35:35<2:44:17, 3.57it/s] 91%|█████████ | 336320/371472 [4:35:35<2:45:41, 3.54it/s] {'loss': 2.5498, 'learning_rate': 1.852119201017044e-07, 'epoch': 14.49} + 91%|█████████ | 336320/371472 [4:35:35<2:45:41, 3.54it/s] 91%|█████████ | 336321/371472 [4:35:36<2:40:47, 3.64it/s] 91%|█████████ | 336322/371472 [4:35:36<2:37:13, 3.73it/s] 91%|█████████ | 336323/371472 [4:35:36<2:43:31, 3.58it/s] 91%|█████████ | 336324/371472 [4:35:36<2:45:31, 3.54it/s] 91%|█████████ | 336325/371472 [4:35:37<2:52:52, 3.39it/s] 91%|█████████ | 336326/371472 [4:35:37<2:49:01, 3.47it/s] 91%|█████████ | 336327/371472 [4:35:37<2:53:40, 3.37it/s] 91%|█████████ | 336328/371472 [4:35:38<2:58:18, 3.29it/s] 91%|█████████ | 336329/371472 [4:35:38<2:50:58, 3.43it/s] 91%|█████████ | 336330/371472 [4:35:38<2:47:26, 3.50it/s] 91%|█████████ | 336331/371472 [4:35:38<2:48:02, 3.49it/s] 91%|█████████ | 336332/371472 [4:35:39<2:40:58, 3.64it/s] 91%|█████████ | 336333/371472 [4:35:39<2:50:20, 3.44it/s] 91%|█████████ | 336334/371472 [4:35:39<2:44:49, 3.55it/s] 91%|█████████ | 336335/371472 [4:35:40<2:48:12, 3.48it/s] 91%|█████████ | 336336/371472 [4:35:40<2:48:58, 3.47it/s] 91%|█████████ | 336337/371472 [4:35:40<2:48:17, 3.48it/s] 91%|█████████ | 336338/371472 [4:35:40<2:53:05, 3.38it/s] 91%|█████████ | 336339/371472 [4:35:41<2:47:56, 3.49it/s] 91%|█████████ | 336340/371472 [4:35:41<2:42:54, 3.59it/s] {'loss': 2.5846, 'learning_rate': 1.8516343812622557e-07, 'epoch': 14.49} + 91%|█████████ | 336340/371472 [4:35:41<2:42:54, 3.59it/s] 91%|█████████ | 336341/371472 [4:35:41<2:50:39, 3.43it/s] 91%|█████████ | 336342/371472 [4:35:42<2:43:22, 3.58it/s] 91%|█████████ | 336343/371472 [4:35:42<2:39:08, 3.68it/s] 91%|█████████ | 336344/371472 [4:35:42<2:36:41, 3.74it/s] 91%|█████████ | 336345/371472 [4:35:42<2:33:14, 3.82it/s] 91%|█████████ | 336346/371472 [4:35:43<2:40:38, 3.64it/s] 91%|█████████ | 336347/371472 [4:35:43<2:45:00, 3.55it/s] 91%|█████████ | 336348/371472 [4:35:43<2:40:24, 3.65it/s] 91%|█████████ | 336349/371472 [4:35:43<2:35:56, 3.75it/s] 91%|█████████ | 336350/371472 [4:35:44<2:43:51, 3.57it/s] 91%|█████████ | 336351/371472 [4:35:44<2:41:10, 3.63it/s] 91%|█████████ | 336352/371472 [4:35:44<2:37:46, 3.71it/s] 91%|█████████ | 336353/371472 [4:35:45<2:40:52, 3.64it/s] 91%|█████████ | 336354/371472 [4:35:45<2:38:20, 3.70it/s] 91%|█████████ | 336355/371472 [4:35:45<2:38:03, 3.70it/s] 91%|█████████ | 336356/371472 [4:35:45<2:43:54, 3.57it/s] 91%|█████████ | 336357/371472 [4:35:46<2:44:43, 3.55it/s] 91%|█████████ | 336358/371472 [4:35:46<2:38:05, 3.70it/s] 91%|█████████ | 336359/371472 [4:35:46<2:40:43, 3.64it/s] 91%|█████████ | 336360/371472 [4:35:47<2:49:53, 3.44it/s] {'loss': 2.618, 'learning_rate': 1.8511495615074659e-07, 'epoch': 14.49} + 91%|█████████ | 336360/371472 [4:35:47<2:49:53, 3.44it/s] 91%|█████████ | 336361/371472 [4:35:47<2:48:17, 3.48it/s] 91%|█████████ | 336362/371472 [4:35:47<2:38:20, 3.70it/s] 91%|█████████ | 336363/371472 [4:35:47<2:43:19, 3.58it/s] 91%|█████████ | 336364/371472 [4:35:48<2:39:51, 3.66it/s] 91%|█████████ | 336365/371472 [4:35:48<2:41:51, 3.62it/s] 91%|█████████ | 336366/371472 [4:35:48<2:36:29, 3.74it/s] 91%|█████████ | 336367/371472 [4:35:48<2:32:12, 3.84it/s] 91%|█████████ | 336368/371472 [4:35:49<2:33:17, 3.82it/s] 91%|█████████ | 336369/371472 [4:35:49<2:31:13, 3.87it/s] 91%|█████████ | 336370/371472 [4:35:49<2:35:00, 3.77it/s] 91%|█████████ | 336371/371472 [4:35:49<2:39:53, 3.66it/s] 91%|█████████ | 336372/371472 [4:35:50<2:51:05, 3.42it/s] 91%|█████████ | 336373/371472 [4:35:50<2:58:28, 3.28it/s] 91%|█████████ | 336374/371472 [4:35:50<3:01:41, 3.22it/s] 91%|█████████ | 336375/371472 [4:35:51<3:00:51, 3.23it/s] 91%|█████████ | 336376/371472 [4:35:51<3:03:21, 3.19it/s] 91%|█████████ | 336377/371472 [4:35:51<2:52:05, 3.40it/s] 91%|█████████ | 336378/371472 [4:35:52<2:46:02, 3.52it/s] 91%|█████████ | 336379/371472 [4:35:52<2:41:14, 3.63it/s] 91%|█████████ | 336380/371472 [4:35:52<2:40:00, 3.66it/s] {'loss': 2.6411, 'learning_rate': 1.8506647417526766e-07, 'epoch': 14.49} + 91%|█████████ | 336380/371472 [4:35:52<2:40:00, 3.66it/s] 91%|█████████ | 336381/371472 [4:35:52<2:35:23, 3.76it/s] 91%|█████████ | 336382/371472 [4:35:53<2:40:26, 3.65it/s] 91%|█████████ | 336383/371472 [4:35:53<2:38:39, 3.69it/s] 91%|█████████ | 336384/371472 [4:35:53<2:55:46, 3.33it/s] 91%|█████████ | 336385/371472 [4:35:54<2:58:23, 3.28it/s] 91%|█████████ | 336386/371472 [4:35:54<2:49:51, 3.44it/s] 91%|█████████ | 336387/371472 [4:35:54<2:41:50, 3.61it/s] 91%|█████████ | 336388/371472 [4:35:54<2:42:35, 3.60it/s] 91%|█████████ | 336389/371472 [4:35:55<2:41:33, 3.62it/s] 91%|█████████ | 336390/371472 [4:35:55<3:00:27, 3.24it/s] 91%|█████████ | 336391/371472 [4:35:55<3:06:22, 3.14it/s] 91%|█████████ | 336392/371472 [4:35:56<3:04:04, 3.18it/s] 91%|█████████ | 336393/371472 [4:35:56<2:56:03, 3.32it/s] 91%|█████████ | 336394/371472 [4:35:56<2:50:46, 3.42it/s] 91%|█████████ | 336395/371472 [4:35:57<2:50:26, 3.43it/s] 91%|█████████ | 336396/371472 [4:35:57<2:50:49, 3.42it/s] 91%|█████████ | 336397/371472 [4:35:57<2:47:18, 3.49it/s] 91%|█████████ | 336398/371472 [4:35:57<2:43:21, 3.58it/s] 91%|█████████ | 336399/371472 [4:35:58<2:42:06, 3.61it/s] 91%|█████████ | 336400/371472 [4:35:58<2:53:51, 3.36it/s] {'loss': 2.5752, 'learning_rate': 1.8501799219978883e-07, 'epoch': 14.49} + 91%|█████████ | 336400/371472 [4:35:58<2:53:51, 3.36it/s] 91%|█████████ | 336401/371472 [4:35:58<3:00:58, 3.23it/s] 91%|█████████ | 336402/371472 [4:35:59<2:53:33, 3.37it/s] 91%|█████████ | 336403/371472 [4:35:59<2:45:46, 3.53it/s] 91%|█████████ | 336404/371472 [4:35:59<2:36:01, 3.75it/s] 91%|█████████ | 336405/371472 [4:35:59<2:35:11, 3.77it/s] 91%|█████████ | 336406/371472 [4:36:00<2:38:28, 3.69it/s] 91%|█████████ | 336407/371472 [4:36:00<2:37:23, 3.71it/s] 91%|█████████ | 336408/371472 [4:36:00<2:48:10, 3.48it/s] 91%|█████████ | 336409/371472 [4:36:00<2:48:19, 3.47it/s] 91%|█████████ | 336410/371472 [4:36:01<2:44:06, 3.56it/s] 91%|█████████ | 336411/371472 [4:36:01<2:48:07, 3.48it/s] 91%|█████████ | 336412/371472 [4:36:01<2:43:19, 3.58it/s] 91%|█████████ | 336413/371472 [4:36:02<2:37:15, 3.72it/s] 91%|█████████ | 336414/371472 [4:36:02<2:33:45, 3.80it/s] 91%|█████████ | 336415/371472 [4:36:02<2:28:53, 3.92it/s] 91%|█████████ | 336416/371472 [4:36:02<2:28:55, 3.92it/s] 91%|█████████ | 336417/371472 [4:36:03<2:31:29, 3.86it/s] 91%|█████████ | 336418/371472 [4:36:03<2:32:02, 3.84it/s] 91%|█████████ | 336419/371472 [4:36:03<2:37:21, 3.71it/s] 91%|█████████ | 336420/371472 [4:36:03<2:33:41, 3.80it/s] {'loss': 2.5783, 'learning_rate': 1.849695102243099e-07, 'epoch': 14.49} + 91%|█████████ | 336420/371472 [4:36:03<2:33:41, 3.80it/s] 91%|█████████ | 336421/371472 [4:36:04<2:43:59, 3.56it/s] 91%|█████████ | 336422/371472 [4:36:04<2:50:59, 3.42it/s] 91%|█████████ | 336423/371472 [4:36:04<2:44:29, 3.55it/s] 91%|█████████ | 336424/371472 [4:36:05<2:59:27, 3.26it/s] 91%|█████████ | 336425/371472 [4:36:05<2:53:17, 3.37it/s] 91%|█████████ | 336426/371472 [4:36:05<2:56:08, 3.32it/s] 91%|█████████ | 336427/371472 [4:36:06<3:02:39, 3.20it/s] 91%|█████████ | 336428/371472 [4:36:06<3:09:23, 3.08it/s] 91%|█████████ | 336429/371472 [4:36:06<3:00:49, 3.23it/s] 91%|█████████ | 336430/371472 [4:36:06<2:53:40, 3.36it/s] 91%|█████████ | 336431/371472 [4:36:07<2:53:46, 3.36it/s] 91%|█████████ | 336432/371472 [4:36:07<2:46:11, 3.51it/s] 91%|█████████ | 336433/371472 [4:36:07<2:47:14, 3.49it/s] 91%|█████████ | 336434/371472 [4:36:08<2:38:57, 3.67it/s] 91%|█████████ | 336435/371472 [4:36:08<2:37:33, 3.71it/s] 91%|█████████ | 336436/371472 [4:36:08<2:38:57, 3.67it/s] 91%|█████████ | 336437/371472 [4:36:08<2:38:10, 3.69it/s] 91%|█████████ | 336438/371472 [4:36:09<2:52:45, 3.38it/s] 91%|█████████ | 336439/371472 [4:36:09<3:04:14, 3.17it/s] 91%|█████████ | 336440/371472 [4:36:09<2:52:08, 3.39it/s] {'loss': 2.4926, 'learning_rate': 1.8492102824883103e-07, 'epoch': 14.49} + 91%|█████████ | 336440/371472 [4:36:09<2:52:08, 3.39it/s] 91%|█████████ | 336441/371472 [4:36:10<2:54:30, 3.35it/s] 91%|█████████ | 336442/371472 [4:36:10<2:43:24, 3.57it/s] 91%|█████████ | 336443/371472 [4:36:10<2:47:28, 3.49it/s] 91%|█████████ | 336444/371472 [4:36:10<2:45:35, 3.53it/s] 91%|█████████ | 336445/371472 [4:36:11<2:54:35, 3.34it/s] 91%|█████████ | 336446/371472 [4:36:11<2:52:32, 3.38it/s] 91%|█████████ | 336447/371472 [4:36:11<2:48:16, 3.47it/s] 91%|█████████ | 336448/371472 [4:36:12<2:44:08, 3.56it/s] 91%|█████████ | 336449/371472 [4:36:12<2:46:15, 3.51it/s] 91%|█████████ | 336450/371472 [4:36:12<2:44:25, 3.55it/s] 91%|█████████ | 336451/371472 [4:36:13<2:56:53, 3.30it/s] 91%|█████████ | 336452/371472 [4:36:13<2:49:00, 3.45it/s] 91%|█████████ | 336453/371472 [4:36:13<2:51:32, 3.40it/s] 91%|█████████ | 336454/371472 [4:36:13<2:49:55, 3.43it/s] 91%|█████████ | 336455/371472 [4:36:14<2:51:02, 3.41it/s] 91%|█████████ | 336456/371472 [4:36:14<2:47:15, 3.49it/s] 91%|█████████ | 336457/371472 [4:36:14<2:54:18, 3.35it/s] 91%|█████████ | 336458/371472 [4:36:15<3:00:07, 3.24it/s] 91%|█████████ | 336459/371472 [4:36:15<2:54:41, 3.34it/s] 91%|█████████ | 336460/371472 [4:36:15<2:50:30, 3.42it/s] {'loss': 2.5619, 'learning_rate': 1.848725462733521e-07, 'epoch': 14.49} + 91%|█████████ | 336460/371472 [4:36:15<2:50:30, 3.42it/s] 91%|█████████ | 336461/371472 [4:36:15<2:43:20, 3.57it/s] 91%|█████████ | 336462/371472 [4:36:16<2:42:12, 3.60it/s] 91%|█████████ | 336463/371472 [4:36:16<2:49:20, 3.45it/s] 91%|█████████ | 336464/371472 [4:36:16<2:56:38, 3.30it/s] 91%|█████████ | 336465/371472 [4:36:17<2:52:51, 3.38it/s] 91%|█████████ | 336466/371472 [4:36:17<2:43:36, 3.57it/s] 91%|█████████ | 336467/371472 [4:36:17<2:48:45, 3.46it/s] 91%|█████████ | 336468/371472 [4:36:17<2:41:36, 3.61it/s] 91%|█████████ | 336469/371472 [4:36:18<2:42:31, 3.59it/s] 91%|█████████ | 336470/371472 [4:36:18<2:36:30, 3.73it/s] 91%|█████████ | 336471/371472 [4:36:18<2:48:06, 3.47it/s] 91%|█████████ | 336472/371472 [4:36:19<3:11:08, 3.05it/s] 91%|█████████ | 336473/371472 [4:36:19<2:54:45, 3.34it/s] 91%|█████████ | 336474/371472 [4:36:19<2:49:02, 3.45it/s] 91%|█████████ | 336475/371472 [4:36:19<2:43:50, 3.56it/s] 91%|█████████ | 336476/371472 [4:36:20<2:46:20, 3.51it/s] 91%|█████████ | 336477/371472 [4:36:20<2:43:57, 3.56it/s] 91%|█████████ | 336478/371472 [4:36:20<2:45:33, 3.52it/s] 91%|█████████ | 336479/371472 [4:36:21<2:43:16, 3.57it/s] 91%|█████████ | 336480/371472 [4:36:21<2:42:42, 3.58it/s] {'loss': 2.6067, 'learning_rate': 1.8482406429787328e-07, 'epoch': 14.49} + 91%|█████████ | 336480/371472 [4:36:21<2:42:42, 3.58it/s] 91%|█████████ | 336481/371472 [4:36:21<2:41:30, 3.61it/s] 91%|█████████ | 336482/371472 [4:36:21<2:43:19, 3.57it/s] 91%|█████████ | 336483/371472 [4:36:22<2:42:44, 3.58it/s] 91%|█████████ | 336484/371472 [4:36:22<2:42:40, 3.58it/s] 91%|█████████ | 336485/371472 [4:36:22<2:37:23, 3.70it/s] 91%|█████████ | 336486/371472 [4:36:23<2:43:41, 3.56it/s] 91%|█████████ | 336487/371472 [4:36:23<2:43:14, 3.57it/s] 91%|█████████ | 336488/371472 [4:36:23<2:47:30, 3.48it/s] 91%|█████████ | 336489/371472 [4:36:23<2:43:41, 3.56it/s] 91%|█████████ | 336490/371472 [4:36:24<3:16:25, 2.97it/s] 91%|█████████ | 336491/371472 [4:36:24<3:05:34, 3.14it/s] 91%|█████████ | 336492/371472 [4:36:24<2:53:54, 3.35it/s] 91%|█████████ | 336493/371472 [4:36:25<2:53:42, 3.36it/s] 91%|█████████ | 336494/371472 [4:36:25<3:01:21, 3.21it/s] 91%|█████████ | 336495/371472 [4:36:25<2:52:20, 3.38it/s] 91%|█████████ | 336496/371472 [4:36:26<2:45:00, 3.53it/s] 91%|█████████ | 336497/371472 [4:36:26<2:39:36, 3.65it/s] 91%|█████████ | 336498/371472 [4:36:26<2:42:23, 3.59it/s] 91%|█████████ | 336499/371472 [4:36:26<2:48:26, 3.46it/s] 91%|█████████ | 336500/371472 [4:36:27<2:50:03, 3.43it/s] {'loss': 2.6574, 'learning_rate': 1.8477558232239432e-07, 'epoch': 14.49} + 91%|█████████ | 336500/371472 [4:36:27<2:50:03, 3.43it/s] 91%|█████████ | 336501/371472 [4:36:27<2:43:07, 3.57it/s] 91%|█████████ | 336502/371472 [4:36:27<2:43:52, 3.56it/s] 91%|█████████ | 336503/371472 [4:36:28<2:48:26, 3.46it/s] 91%|█████████ | 336504/371472 [4:36:28<2:45:36, 3.52it/s] 91%|█████████ | 336505/371472 [4:36:28<2:48:05, 3.47it/s] 91%|█████████ | 336506/371472 [4:36:28<2:42:48, 3.58it/s] 91%|█████████ | 336507/371472 [4:36:29<2:41:05, 3.62it/s] 91%|█████████ | 336508/371472 [4:36:29<2:43:33, 3.56it/s] 91%|█████████ | 336509/371472 [4:36:29<2:39:06, 3.66it/s] 91%|█████████ | 336510/371472 [4:36:29<2:48:39, 3.45it/s] 91%|█████████ | 336511/371472 [4:36:30<2:53:21, 3.36it/s] 91%|█████████ | 336512/371472 [4:36:30<2:46:52, 3.49it/s] 91%|█████████ | 336513/371472 [4:36:31<3:16:38, 2.96it/s] 91%|█████████ | 336514/371472 [4:36:31<3:10:36, 3.06it/s] 91%|█████████ | 336515/371472 [4:36:31<3:04:32, 3.16it/s] 91%|█████████ | 336516/371472 [4:36:31<2:54:26, 3.34it/s] 91%|█████████ | 336517/371472 [4:36:32<3:07:31, 3.11it/s] 91%|█████████ | 336518/371472 [4:36:32<2:58:39, 3.26it/s] 91%|█████████ | 336519/371472 [4:36:32<2:57:04, 3.29it/s] 91%|█████████ | 336520/371472 [4:36:33<2:46:51, 3.49it/s] {'loss': 2.6785, 'learning_rate': 1.8472710034691548e-07, 'epoch': 14.49} + 91%|█████████ | 336520/371472 [4:36:33<2:46:51, 3.49it/s] 91%|█████████ | 336521/371472 [4:36:33<2:37:33, 3.70it/s] 91%|█████████ | 336522/371472 [4:36:33<2:39:29, 3.65it/s] 91%|█████████ | 336523/371472 [4:36:33<2:50:40, 3.41it/s] 91%|█████████ | 336524/371472 [4:36:34<2:45:48, 3.51it/s] 91%|█████████ | 336525/371472 [4:36:34<2:49:26, 3.44it/s] 91%|█████████ | 336526/371472 [4:36:34<2:37:34, 3.70it/s] 91%|█████████ | 336527/371472 [4:36:35<2:46:31, 3.50it/s] 91%|█████████ | 336528/371472 [4:36:35<2:50:59, 3.41it/s] 91%|█████████ | 336529/371472 [4:36:35<2:48:45, 3.45it/s] 91%|█████████ | 336530/371472 [4:36:35<2:51:50, 3.39it/s] 91%|█████████ | 336531/371472 [4:36:36<2:46:44, 3.49it/s] 91%|█████████ | 336532/371472 [4:36:36<2:42:22, 3.59it/s] 91%|█████████ | 336533/371472 [4:36:36<2:40:17, 3.63it/s] 91%|█████████ | 336534/371472 [4:36:36<2:41:03, 3.62it/s] 91%|█████████ | 336535/371472 [4:36:37<2:38:40, 3.67it/s] 91%|█████████ | 336536/371472 [4:36:37<2:38:18, 3.68it/s] 91%|█████████ | 336537/371472 [4:36:37<2:39:30, 3.65it/s] 91%|█████████ | 336538/371472 [4:36:38<2:36:14, 3.73it/s] 91%|█████████ | 336539/371472 [4:36:38<2:40:00, 3.64it/s] 91%|█████████ | 336540/371472 [4:36:38<2:52:53, 3.37it/s] {'loss': 2.5385, 'learning_rate': 1.8467861837143655e-07, 'epoch': 14.5} + 91%|█████████ | 336540/371472 [4:36:38<2:52:53, 3.37it/s] 91%|█████████ | 336541/371472 [4:36:38<2:49:10, 3.44it/s] 91%|█████████ | 336542/371472 [4:36:39<2:58:49, 3.26it/s] 91%|█████████ | 336543/371472 [4:36:39<2:50:25, 3.42it/s] 91%|█████████ | 336544/371472 [4:36:39<2:43:54, 3.55it/s] 91%|█████████ | 336545/371472 [4:36:40<2:38:15, 3.68it/s] 91%|█████████ | 336546/371472 [4:36:40<2:36:31, 3.72it/s] 91%|█████████ | 336547/371472 [4:36:40<2:38:57, 3.66it/s] 91%|█████████ | 336548/371472 [4:36:40<2:45:50, 3.51it/s] 91%|█████████ | 336549/371472 [4:36:41<2:43:12, 3.57it/s] 91%|█████████ | 336550/371472 [4:36:41<2:42:39, 3.58it/s] 91%|█████████ | 336551/371472 [4:36:41<2:44:47, 3.53it/s] 91%|█████████ | 336552/371472 [4:36:42<2:40:34, 3.62it/s] 91%|█████████ | 336553/371472 [4:36:42<2:39:57, 3.64it/s] 91%|█████████ | 336554/371472 [4:36:42<2:44:11, 3.54it/s] 91%|█████████ | 336555/371472 [4:36:42<2:42:28, 3.58it/s] 91%|█████████ | 336556/371472 [4:36:43<2:42:35, 3.58it/s] 91%|█████████ | 336557/371472 [4:36:43<2:36:52, 3.71it/s] 91%|█████████ | 336558/371472 [4:36:43<2:49:46, 3.43it/s] 91%|█████████ | 336559/371472 [4:36:44<2:58:14, 3.26it/s] 91%|█████████ | 336560/371472 [4:36:44<2:48:38, 3.45it/s] {'loss': 2.649, 'learning_rate': 1.846301363959577e-07, 'epoch': 14.5} + 91%|█████████ | 336560/371472 [4:36:44<2:48:38, 3.45it/s] 91%|█████████ | 336561/371472 [4:36:44<2:52:15, 3.38it/s] 91%|█████████ | 336562/371472 [4:36:44<2:42:04, 3.59it/s] 91%|█████████ | 336563/371472 [4:36:45<2:42:43, 3.58it/s] 91%|█████████ | 336564/371472 [4:36:45<2:39:43, 3.64it/s] 91%|█████████ | 336565/371472 [4:36:45<2:34:45, 3.76it/s] 91%|█████████ | 336566/371472 [4:36:45<2:33:02, 3.80it/s] 91%|█████████ | 336567/371472 [4:36:46<2:38:44, 3.66it/s] 91%|█████████ | 336568/371472 [4:36:46<2:49:19, 3.44it/s] 91%|█████████ | 336569/371472 [4:36:46<2:44:24, 3.54it/s] 91%|█████████ | 336570/371472 [4:36:47<2:56:27, 3.30it/s] 91%|█████████ | 336571/371472 [4:36:47<2:59:17, 3.24it/s] 91%|█████████ | 336572/371472 [4:36:47<2:48:21, 3.45it/s] 91%|█████████ | 336573/371472 [4:36:48<2:42:24, 3.58it/s] 91%|█████████ | 336574/371472 [4:36:48<2:47:09, 3.48it/s] 91%|█████████ | 336575/371472 [4:36:48<2:58:11, 3.26it/s] 91%|█████████ | 336576/371472 [4:36:48<2:47:40, 3.47it/s] 91%|█████████ | 336577/371472 [4:36:49<2:42:50, 3.57it/s] 91%|█████████ | 336578/371472 [4:36:49<2:47:40, 3.47it/s] 91%|█████████ | 336579/371472 [4:36:49<2:48:04, 3.46it/s] 91%|█████████ | 336580/371472 [4:36:50<2:38:35, 3.67it/s] {'loss': 2.703, 'learning_rate': 1.8458165442047874e-07, 'epoch': 14.5} + 91%|█████████ | 336580/371472 [4:36:50<2:38:35, 3.67it/s] 91%|█████████ | 336581/371472 [4:36:50<2:42:53, 3.57it/s] 91%|█████████ | 336582/371472 [4:36:50<2:38:07, 3.68it/s] 91%|█████████ | 336583/371472 [4:36:50<2:40:19, 3.63it/s] 91%|█████████ | 336584/371472 [4:36:51<2:35:16, 3.74it/s] 91%|█████████ | 336585/371472 [4:36:51<2:30:46, 3.86it/s] 91%|█████████ | 336586/371472 [4:36:51<2:33:00, 3.80it/s] 91%|█████████ | 336587/371472 [4:36:51<2:40:00, 3.63it/s] 91%|█████████ | 336588/371472 [4:36:52<2:34:22, 3.77it/s] 91%|█████████ | 336589/371472 [4:36:52<2:36:07, 3.72it/s] 91%|█████████ | 336590/371472 [4:36:52<2:53:48, 3.34it/s] 91%|█████████ | 336591/371472 [4:36:53<3:06:47, 3.11it/s] 91%|█████████ | 336592/371472 [4:36:53<3:27:24, 2.80it/s] 91%|█████████ | 336593/371472 [4:36:53<3:17:12, 2.95it/s] 91%|█████████ | 336594/371472 [4:36:54<3:04:32, 3.15it/s] 91%|█████████ | 336595/371472 [4:36:54<2:59:10, 3.24it/s] 91%|█████████ | 336596/371472 [4:36:54<2:55:34, 3.31it/s] 91%|█████████ | 336597/371472 [4:36:55<2:51:49, 3.38it/s] 91%|█████████ | 336598/371472 [4:36:55<3:00:27, 3.22it/s] 91%|█████████ | 336599/371472 [4:36:55<2:56:14, 3.30it/s] 91%|█████████ | 336600/371472 [4:36:56<3:20:40, 2.90it/s] {'loss': 2.6855, 'learning_rate': 1.8453317244499992e-07, 'epoch': 14.5} + 91%|█████████ | 336600/371472 [4:36:56<3:20:40, 2.90it/s] 91%|█████████ | 336601/371472 [4:36:56<3:30:41, 2.76it/s] 91%|█████████ | 336602/371472 [4:36:56<3:30:42, 2.76it/s] 91%|█████████ | 336603/371472 [4:36:57<3:10:28, 3.05it/s] 91%|█████████ | 336604/371472 [4:36:57<3:03:59, 3.16it/s] 91%|█████████ | 336605/371472 [4:36:57<3:12:05, 3.03it/s] 91%|█████████ | 336606/371472 [4:36:58<2:59:39, 3.23it/s] 91%|█████████ | 336607/371472 [4:36:58<2:50:56, 3.40it/s] 91%|█████████ | 336608/371472 [4:36:58<2:43:37, 3.55it/s] 91%|█████████ | 336609/371472 [4:36:58<2:41:33, 3.60it/s] 91%|█████████ | 336610/371472 [4:36:59<2:37:15, 3.69it/s] 91%|█████████ | 336611/371472 [4:36:59<2:35:45, 3.73it/s] 91%|█████████ | 336612/371472 [4:36:59<2:38:17, 3.67it/s] 91%|█████████ | 336613/371472 [4:36:59<2:42:43, 3.57it/s] 91%|█████████ | 336614/371472 [4:37:00<2:41:18, 3.60it/s] 91%|█████████ | 336615/371472 [4:37:00<2:39:19, 3.65it/s] 91%|█████████ | 336616/371472 [4:37:00<2:37:41, 3.68it/s] 91%|█████████ | 336617/371472 [4:37:00<2:39:25, 3.64it/s] 91%|█████████ | 336618/371472 [4:37:01<2:44:59, 3.52it/s] 91%|█████████ | 336619/371472 [4:37:01<2:42:33, 3.57it/s] 91%|█████████ | 336620/371472 [4:37:01<2:42:01, 3.58it/s] {'loss': 2.6806, 'learning_rate': 1.8448469046952096e-07, 'epoch': 14.5} + 91%|█████████ | 336620/371472 [4:37:01<2:42:01, 3.58it/s] 91%|█████████ | 336621/371472 [4:37:02<2:54:54, 3.32it/s] 91%|█████████ | 336622/371472 [4:37:02<2:47:23, 3.47it/s] 91%|█████████ | 336623/371472 [4:37:02<2:59:27, 3.24it/s] 91%|█████████ | 336624/371472 [4:37:03<2:48:17, 3.45it/s] 91%|█████████ | 336625/371472 [4:37:03<2:45:31, 3.51it/s] 91%|█████████ | 336626/371472 [4:37:03<2:39:25, 3.64it/s] 91%|█████████ | 336627/371472 [4:37:03<2:43:49, 3.54it/s] 91%|█████████ | 336628/371472 [4:37:04<2:41:02, 3.61it/s] 91%|█████████ | 336629/371472 [4:37:04<2:49:01, 3.44it/s] 91%|█████████ | 336630/371472 [4:37:04<2:43:06, 3.56it/s] 91%|█████████ | 336631/371472 [4:37:04<2:40:59, 3.61it/s] 91%|█████████ | 336632/371472 [4:37:05<2:39:28, 3.64it/s] 91%|█████████ | 336633/371472 [4:37:05<2:39:43, 3.64it/s] 91%|█████████ | 336634/371472 [4:37:05<2:45:55, 3.50it/s] 91%|█████████ | 336635/371472 [4:37:06<2:58:03, 3.26it/s] 91%|█████████ | 336636/371472 [4:37:06<3:04:48, 3.14it/s] 91%|█████████ | 336637/371472 [4:37:06<3:00:21, 3.22it/s] 91%|█████████ | 336638/371472 [4:37:07<2:54:26, 3.33it/s] 91%|█████████ | 336639/371472 [4:37:07<2:44:58, 3.52it/s] 91%|█████████ | 336640/371472 [4:37:07<2:50:39, 3.40it/s] {'loss': 2.6524, 'learning_rate': 1.8443620849404212e-07, 'epoch': 14.5} + 91%|█████████ | 336640/371472 [4:37:07<2:50:39, 3.40it/s] 91%|█████████ | 336641/371472 [4:37:07<2:49:57, 3.42it/s] 91%|█████████ | 336642/371472 [4:37:08<2:45:08, 3.52it/s] 91%|█████████ | 336643/371472 [4:37:08<2:51:09, 3.39it/s] 91%|█████████ | 336644/371472 [4:37:08<2:46:41, 3.48it/s] 91%|█████████ | 336645/371472 [4:37:09<2:38:30, 3.66it/s] 91%|█████████ | 336646/371472 [4:37:09<2:42:28, 3.57it/s] 91%|█████████ | 336647/371472 [4:37:09<2:46:40, 3.48it/s] 91%|█████████ | 336648/371472 [4:37:09<2:43:24, 3.55it/s] 91%|█████████ | 336649/371472 [4:37:10<2:56:50, 3.28it/s] 91%|█████████ | 336650/371472 [4:37:10<2:49:11, 3.43it/s] 91%|█████████ | 336651/371472 [4:37:10<2:59:22, 3.24it/s] 91%|█████████ | 336652/371472 [4:37:11<2:50:56, 3.40it/s] 91%|█████████ | 336653/371472 [4:37:11<2:48:34, 3.44it/s] 91%|█████████ | 336654/371472 [4:37:11<2:44:32, 3.53it/s] 91%|█████████ | 336655/371472 [4:37:11<2:38:35, 3.66it/s] 91%|█████████ | 336656/371472 [4:37:12<2:47:40, 3.46it/s] 91%|█████████ | 336657/371472 [4:37:12<2:44:07, 3.54it/s] 91%|█████████ | 336658/371472 [4:37:12<2:48:17, 3.45it/s] 91%|█████████ | 336659/371472 [4:37:13<2:50:40, 3.40it/s] 91%|█████████ | 336660/371472 [4:37:13<2:45:01, 3.52it/s] {'loss': 2.6472, 'learning_rate': 1.843877265185632e-07, 'epoch': 14.5} + 91%|█████████ | 336660/371472 [4:37:13<2:45:01, 3.52it/s] 91%|█████████ | 336661/371472 [4:37:13<2:42:11, 3.58it/s] 91%|█████████ | 336662/371472 [4:37:13<2:44:21, 3.53it/s] 91%|█████████ | 336663/371472 [4:37:14<2:41:35, 3.59it/s] 91%|█████████ | 336664/371472 [4:37:14<2:47:34, 3.46it/s] 91%|█████████ | 336665/371472 [4:37:14<2:50:21, 3.41it/s] 91%|█████████ | 336666/371472 [4:37:15<2:44:25, 3.53it/s] 91%|█████████ | 336667/371472 [4:37:15<2:42:18, 3.57it/s] 91%|█████████ | 336668/371472 [4:37:15<2:50:28, 3.40it/s] 91%|█████████ | 336669/371472 [4:37:16<2:47:33, 3.46it/s] 91%|█████████ | 336670/371472 [4:37:16<2:45:35, 3.50it/s] 91%|█████████ | 336671/371472 [4:37:16<2:43:10, 3.55it/s] 91%|█████████ | 336672/371472 [4:37:16<3:00:19, 3.22it/s] 91%|█████████ | 336673/371472 [4:37:17<2:53:06, 3.35it/s] 91%|█████████ | 336674/371472 [4:37:17<2:47:24, 3.46it/s] 91%|█████████ | 336675/371472 [4:37:17<2:44:10, 3.53it/s] 91%|█████████ | 336676/371472 [4:37:18<2:45:42, 3.50it/s] 91%|█████████ | 336677/371472 [4:37:18<2:41:16, 3.60it/s] 91%|█████████ | 336678/371472 [4:37:18<2:42:26, 3.57it/s] 91%|█████████ | 336679/371472 [4:37:18<2:41:47, 3.58it/s] 91%|█████████ | 336680/371472 [4:37:19<2:40:38, 3.61it/s] {'loss': 2.5838, 'learning_rate': 1.8433924454308434e-07, 'epoch': 14.5} + 91%|█████████ | 336680/371472 [4:37:19<2:40:38, 3.61it/s] 91%|█████████ | 336681/371472 [4:37:19<2:37:40, 3.68it/s] 91%|█████████ | 336682/371472 [4:37:19<2:37:49, 3.67it/s] 91%|█████████ | 336683/371472 [4:37:19<2:35:36, 3.73it/s] 91%|█████████ | 336684/371472 [4:37:20<2:34:36, 3.75it/s] 91%|█████████ | 336685/371472 [4:37:20<2:41:52, 3.58it/s] 91%|█████████ | 336686/371472 [4:37:20<2:35:40, 3.72it/s] 91%|█████████ | 336687/371472 [4:37:20<2:32:48, 3.79it/s] 91%|█████████ | 336688/371472 [4:37:21<2:37:38, 3.68it/s] 91%|█████████ | 336689/371472 [4:37:21<2:51:01, 3.39it/s] 91%|█████████ | 336690/371472 [4:37:21<2:48:13, 3.45it/s] 91%|█████████ | 336691/371472 [4:37:22<2:44:14, 3.53it/s] 91%|█████████ | 336692/371472 [4:37:22<2:45:45, 3.50it/s] 91%|█████████ | 336693/371472 [4:37:22<2:50:06, 3.41it/s] 91%|█████████ | 336694/371472 [4:37:23<2:48:03, 3.45it/s] 91%|█████████ | 336695/371472 [4:37:23<2:46:43, 3.48it/s] 91%|█████████ | 336696/371472 [4:37:23<2:42:56, 3.56it/s] 91%|█████████ | 336697/371472 [4:37:23<2:42:10, 3.57it/s] 91%|█████████ | 336698/371472 [4:37:24<2:44:16, 3.53it/s] 91%|█████████ | 336699/371472 [4:37:24<2:42:30, 3.57it/s] 91%|█████████ | 336700/371472 [4:37:24<2:40:48, 3.60it/s] {'loss': 2.657, 'learning_rate': 1.8429076256760538e-07, 'epoch': 14.5} + 91%|█████████ | 336700/371472 [4:37:24<2:40:48, 3.60it/s] 91%|█████████ | 336701/371472 [4:37:24<2:37:33, 3.68it/s] 91%|█████████ | 336702/371472 [4:37:25<2:42:31, 3.57it/s] 91%|█████████ | 336703/371472 [4:37:25<2:44:15, 3.53it/s] 91%|█████████ | 336704/371472 [4:37:25<2:43:10, 3.55it/s] 91%|█████████ | 336705/371472 [4:37:26<2:40:05, 3.62it/s] 91%|█████████ | 336706/371472 [4:37:26<2:34:59, 3.74it/s] 91%|█████████ | 336707/371472 [4:37:26<2:35:02, 3.74it/s] 91%|█████████ | 336708/371472 [4:37:26<2:40:26, 3.61it/s] 91%|█████████ | 336709/371472 [4:37:27<2:59:48, 3.22it/s] 91%|█████████ | 336710/371472 [4:37:27<2:58:04, 3.25it/s] 91%|█████████ | 336711/371472 [4:37:27<2:57:25, 3.27it/s] 91%|█████████ | 336712/371472 [4:37:28<2:55:22, 3.30it/s] 91%|█████████ | 336713/371472 [4:37:28<2:50:39, 3.39it/s] 91%|█████████ | 336714/371472 [4:37:28<2:43:08, 3.55it/s] 91%|█████████ | 336715/371472 [4:37:29<2:50:13, 3.40it/s] 91%|█████████ | 336716/371472 [4:37:29<2:44:14, 3.53it/s] 91%|█████████ | 336717/371472 [4:37:29<2:49:04, 3.43it/s] 91%|█████████ | 336718/371472 [4:37:29<2:43:43, 3.54it/s] 91%|█████████ | 336719/371472 [4:37:30<2:48:58, 3.43it/s] 91%|█████████ | 336720/371472 [4:37:30<2:50:44, 3.39it/s] {'loss': 2.6172, 'learning_rate': 1.8424228059212656e-07, 'epoch': 14.5} + 91%|█████████ | 336720/371472 [4:37:30<2:50:44, 3.39it/s] 91%|█████████ | 336721/371472 [4:37:30<2:50:59, 3.39it/s] 91%|█████████ | 336722/371472 [4:37:31<2:45:21, 3.50it/s] 91%|█████████ | 336723/371472 [4:37:31<2:54:19, 3.32it/s] 91%|█████████ | 336724/371472 [4:37:31<2:54:03, 3.33it/s] 91%|█████████ | 336725/371472 [4:37:31<2:43:36, 3.54it/s] 91%|█████████ | 336726/371472 [4:37:32<2:37:43, 3.67it/s] 91%|█████████ | 336727/371472 [4:37:32<2:38:26, 3.66it/s] 91%|█████████ | 336728/371472 [4:37:32<2:36:44, 3.69it/s] 91%|█████████ | 336729/371472 [4:37:33<2:51:48, 3.37it/s] 91%|█████████ | 336730/371472 [4:37:33<2:46:39, 3.47it/s] 91%|█████████ | 336731/371472 [4:37:33<2:42:03, 3.57it/s] 91%|█████████ | 336732/371472 [4:37:33<2:46:36, 3.48it/s] 91%|█████████ | 336733/371472 [4:37:34<2:38:21, 3.66it/s] 91%|█████████ | 336734/371472 [4:37:34<2:39:08, 3.64it/s] 91%|█████████ | 336735/371472 [4:37:34<2:44:46, 3.51it/s] 91%|█████████ | 336736/371472 [4:37:35<2:49:08, 3.42it/s] 91%|█████████ | 336737/371472 [4:37:35<2:53:28, 3.34it/s] 91%|█████████ | 336738/371472 [4:37:35<2:57:49, 3.26it/s] 91%|█████████ | 336739/371472 [4:37:35<2:52:02, 3.36it/s] 91%|█████████ | 336740/371472 [4:37:36<2:48:51, 3.43it/s] {'loss': 2.4239, 'learning_rate': 1.841937986166476e-07, 'epoch': 14.5} + 91%|█████████ | 336740/371472 [4:37:36<2:48:51, 3.43it/s] 91%|█████████ | 336741/371472 [4:37:36<2:47:54, 3.45it/s] 91%|█████████ | 336742/371472 [4:37:36<2:42:46, 3.56it/s] 91%|█████████ | 336743/371472 [4:37:37<2:41:38, 3.58it/s] 91%|█████████ | 336744/371472 [4:37:37<2:37:58, 3.66it/s] 91%|█████████ | 336745/371472 [4:37:37<2:36:48, 3.69it/s] 91%|█████████ | 336746/371472 [4:37:37<2:52:31, 3.35it/s] 91%|█████████ | 336747/371472 [4:37:38<2:54:40, 3.31it/s] 91%|█████████ | 336748/371472 [4:37:38<2:47:42, 3.45it/s] 91%|█████████ | 336749/371472 [4:37:38<2:46:06, 3.48it/s] 91%|█████████ | 336750/371472 [4:37:39<2:38:10, 3.66it/s] 91%|█████████ | 336751/371472 [4:37:39<2:36:20, 3.70it/s] 91%|█████████ | 336752/371472 [4:37:39<2:33:14, 3.78it/s] 91%|█████████ | 336753/371472 [4:37:39<2:39:31, 3.63it/s] 91%|█████████ | 336754/371472 [4:37:40<2:41:02, 3.59it/s] 91%|█████████ | 336755/371472 [4:37:40<2:40:47, 3.60it/s] 91%|█████████ | 336756/371472 [4:37:40<2:43:29, 3.54it/s] 91%|█████████ | 336757/371472 [4:37:41<2:46:55, 3.47it/s] 91%|█████████ | 336758/371472 [4:37:41<2:43:02, 3.55it/s] 91%|█████████ | 336759/371472 [4:37:41<2:40:58, 3.59it/s] 91%|█████████ | 336760/371472 [4:37:41<2:39:06, 3.64it/s] {'loss': 2.7046, 'learning_rate': 1.8414531664116876e-07, 'epoch': 14.5} + 91%|█████████ | 336760/371472 [4:37:41<2:39:06, 3.64it/s] 91%|█████████ | 336761/371472 [4:37:42<2:39:36, 3.62it/s] 91%|█████████ | 336762/371472 [4:37:42<2:59:44, 3.22it/s] 91%|█████████ | 336763/371472 [4:37:42<2:50:51, 3.39it/s] 91%|█████████ | 336764/371472 [4:37:43<2:45:43, 3.49it/s] 91%|█████████ | 336765/371472 [4:37:43<2:43:40, 3.53it/s] 91%|█████████ | 336766/371472 [4:37:43<2:41:40, 3.58it/s] 91%|█████████ | 336767/371472 [4:37:43<2:43:42, 3.53it/s] 91%|█████████ | 336768/371472 [4:37:44<2:40:34, 3.60it/s] 91%|█████████ | 336769/371472 [4:37:44<2:46:22, 3.48it/s] 91%|█████████ | 336770/371472 [4:37:44<2:55:31, 3.29it/s] 91%|█████████ | 336771/371472 [4:37:45<2:50:55, 3.38it/s] 91%|█████████ | 336772/371472 [4:37:45<2:45:56, 3.49it/s] 91%|█████████ | 336773/371472 [4:37:45<2:39:59, 3.61it/s] 91%|█████████ | 336774/371472 [4:37:45<2:39:27, 3.63it/s] 91%|█████████ | 336775/371472 [4:37:46<2:33:13, 3.77it/s] 91%|█████████ | 336776/371472 [4:37:46<2:37:46, 3.67it/s] 91%|█████████ | 336777/371472 [4:37:46<2:41:30, 3.58it/s] 91%|█████████ | 336778/371472 [4:37:47<2:57:33, 3.26it/s] 91%|█████████ | 336779/371472 [4:37:47<2:59:42, 3.22it/s] 91%|█████████ | 336780/371472 [4:37:47<3:06:02, 3.11it/s] {'loss': 2.713, 'learning_rate': 1.8409683466568983e-07, 'epoch': 14.51} + 91%|█████████ | 336780/371472 [4:37:47<3:06:02, 3.11it/s] 91%|█████████ | 336781/371472 [4:37:47<2:56:17, 3.28it/s] 91%|█████████ | 336782/371472 [4:37:48<2:47:57, 3.44it/s] 91%|█████████ | 336783/371472 [4:37:48<2:42:41, 3.55it/s] 91%|█████████ | 336784/371472 [4:37:48<2:45:28, 3.49it/s] 91%|█████████ | 336785/371472 [4:37:49<2:41:53, 3.57it/s] 91%|█████████ | 336786/371472 [4:37:49<2:52:30, 3.35it/s] 91%|█████████ | 336787/371472 [4:37:49<2:44:05, 3.52it/s] 91%|█████████ | 336788/371472 [4:37:49<2:47:26, 3.45it/s] 91%|█████████ | 336789/371472 [4:37:50<2:54:46, 3.31it/s] 91%|█████████ | 336790/371472 [4:37:50<2:52:49, 3.34it/s] 91%|█████████ | 336791/371472 [4:37:50<2:45:13, 3.50it/s] 91%|█████████ | 336792/371472 [4:37:51<2:44:03, 3.52it/s] 91%|█████████ | 336793/371472 [4:37:51<2:42:29, 3.56it/s] 91%|█████████ | 336794/371472 [4:37:51<2:41:17, 3.58it/s] 91%|█████████ | 336795/371472 [4:37:51<2:37:58, 3.66it/s] 91%|█████████ | 336796/371472 [4:37:52<2:33:56, 3.75it/s] 91%|█████████ | 336797/371472 [4:37:52<2:31:23, 3.82it/s] 91%|█████████ | 336798/371472 [4:37:52<2:29:34, 3.86it/s] 91%|█████████ | 336799/371472 [4:37:52<2:38:58, 3.64it/s] 91%|█████████ | 336800/371472 [4:37:53<2:40:41, 3.60it/s] {'loss': 2.638, 'learning_rate': 1.8404835269021098e-07, 'epoch': 14.51} + 91%|█████████ | 336800/371472 [4:37:53<2:40:41, 3.60it/s] 91%|█████████ | 336801/371472 [4:37:53<2:46:31, 3.47it/s] 91%|█████████ | 336802/371472 [4:37:53<2:49:44, 3.40it/s] 91%|█████████ | 336803/371472 [4:37:54<2:57:44, 3.25it/s] 91%|█████████ | 336804/371472 [4:37:54<2:51:42, 3.36it/s] 91%|█████████ | 336805/371472 [4:37:54<2:45:20, 3.49it/s] 91%|█████████ | 336806/371472 [4:37:55<2:58:30, 3.24it/s] 91%|█████████ | 336807/371472 [4:37:55<2:57:48, 3.25it/s] 91%|█████████ | 336808/371472 [4:37:55<2:54:00, 3.32it/s] 91%|█████████ | 336809/371472 [4:37:56<2:56:06, 3.28it/s] 91%|█████████ | 336810/371472 [4:37:56<2:55:13, 3.30it/s] 91%|█████████ | 336811/371472 [4:37:56<2:48:02, 3.44it/s] 91%|█████████ | 336812/371472 [4:37:57<3:25:40, 2.81it/s] 91%|█████████ | 336813/371472 [4:37:57<3:20:06, 2.89it/s] 91%|█████████ | 336814/371472 [4:37:57<3:07:27, 3.08it/s] 91%|█████████ | 336815/371472 [4:37:57<3:00:37, 3.20it/s] 91%|█████████ | 336816/371472 [4:37:58<2:52:25, 3.35it/s] 91%|█████████ | 336817/371472 [4:37:58<2:42:49, 3.55it/s] 91%|█████████ | 336818/371472 [4:37:58<2:57:33, 3.25it/s] 91%|█████████ | 336819/371472 [4:37:59<2:52:54, 3.34it/s] 91%|█████████ | 336820/371472 [4:37:59<2:49:40, 3.40it/s] {'loss': 2.4432, 'learning_rate': 1.8399987071473202e-07, 'epoch': 14.51} + 91%|█████████ | 336820/371472 [4:37:59<2:49:40, 3.40it/s] 91%|█████████ | 336821/371472 [4:37:59<2:52:45, 3.34it/s] 91%|█████████ | 336822/371472 [4:38:00<2:47:18, 3.45it/s] 91%|█████████ | 336823/371472 [4:38:00<2:52:37, 3.35it/s] 91%|█████████ | 336824/371472 [4:38:00<2:54:08, 3.32it/s] 91%|█████████ | 336825/371472 [4:38:00<2:46:08, 3.48it/s] 91%|█████████ | 336826/371472 [4:38:01<2:40:52, 3.59it/s] 91%|█████████ | 336827/371472 [4:38:01<2:39:25, 3.62it/s] 91%|█████████ | 336828/371472 [4:38:01<2:50:33, 3.39it/s] 91%|█████████ | 336829/371472 [4:38:02<2:48:30, 3.43it/s] 91%|█████████ | 336830/371472 [4:38:02<2:47:51, 3.44it/s] 91%|█████████ | 336831/371472 [4:38:02<2:45:08, 3.50it/s] 91%|█████████ | 336832/371472 [4:38:02<2:46:29, 3.47it/s] 91%|█████████ | 336833/371472 [4:38:03<2:42:40, 3.55it/s] 91%|█████████ | 336834/371472 [4:38:03<2:34:47, 3.73it/s] 91%|█████████ | 336835/371472 [4:38:03<2:29:52, 3.85it/s] 91%|█████████ | 336836/371472 [4:38:03<2:32:32, 3.78it/s] 91%|█████████ | 336837/371472 [4:38:04<2:29:53, 3.85it/s] 91%|█████████ | 336838/371472 [4:38:04<2:27:54, 3.90it/s] 91%|█████████ | 336839/371472 [4:38:04<2:31:45, 3.80it/s] 91%|█████████ | 336840/371472 [4:38:04<2:29:43, 3.86it/s] {'loss': 2.6298, 'learning_rate': 1.839513887392532e-07, 'epoch': 14.51} + 91%|█████████ | 336840/371472 [4:38:04<2:29:43, 3.86it/s] 91%|█████████ | 336841/371472 [4:38:05<2:35:13, 3.72it/s] 91%|█████████ | 336842/371472 [4:38:05<2:32:48, 3.78it/s] 91%|█████████ | 336843/371472 [4:38:05<2:35:52, 3.70it/s] 91%|█████████ | 336844/371472 [4:38:06<2:32:41, 3.78it/s] 91%|█████████ | 336845/371472 [4:38:06<2:39:15, 3.62it/s] 91%|█████████ | 336846/371472 [4:38:06<2:39:27, 3.62it/s] 91%|█████████ | 336847/371472 [4:38:06<2:38:01, 3.65it/s] 91%|█████████ | 336848/371472 [4:38:07<2:35:37, 3.71it/s] 91%|█████████ | 336849/371472 [4:38:07<2:36:45, 3.68it/s] 91%|█████████ | 336850/371472 [4:38:07<2:36:07, 3.70it/s] 91%|█████████ | 336851/371472 [4:38:07<2:37:36, 3.66it/s] 91%|█████████ | 336852/371472 [4:38:08<2:39:23, 3.62it/s] 91%|█████████ | 336853/371472 [4:38:08<2:47:00, 3.45it/s] 91%|█████████ | 336854/371472 [4:38:08<2:46:17, 3.47it/s] 91%|█████████ | 336855/371472 [4:38:09<2:46:34, 3.46it/s] 91%|█████████ | 336856/371472 [4:38:09<2:39:44, 3.61it/s] 91%|█████████ | 336857/371472 [4:38:09<2:46:46, 3.46it/s] 91%|█████████ | 336858/371472 [4:38:09<2:46:26, 3.47it/s] 91%|█████████ | 336859/371472 [4:38:10<2:41:00, 3.58it/s] 91%|█████████ | 336860/371472 [4:38:10<2:47:28, 3.44it/s] {'loss': 2.5205, 'learning_rate': 1.8390290676377427e-07, 'epoch': 14.51} + 91%|█████████ | 336860/371472 [4:38:10<2:47:28, 3.44it/s] 91%|█████████ | 336861/371472 [4:38:10<3:01:41, 3.17it/s] 91%|█████████ | 336862/371472 [4:38:11<2:53:27, 3.33it/s] 91%|█████████ | 336863/371472 [4:38:11<2:46:51, 3.46it/s] 91%|█████████ | 336864/371472 [4:38:11<2:43:16, 3.53it/s] 91%|█████████ | 336865/371472 [4:38:12<2:45:23, 3.49it/s] 91%|█████████ | 336866/371472 [4:38:12<2:38:48, 3.63it/s] 91%|█████████ | 336867/371472 [4:38:12<2:44:30, 3.51it/s] 91%|█████████ | 336868/371472 [4:38:12<2:59:19, 3.22it/s] 91%|█████████ | 336869/371472 [4:38:13<2:49:39, 3.40it/s] 91%|██████��██ | 336870/371472 [4:38:13<2:46:29, 3.46it/s] 91%|█████████ | 336871/371472 [4:38:13<2:43:13, 3.53it/s] 91%|█████████ | 336872/371472 [4:38:14<2:41:58, 3.56it/s] 91%|█████████ | 336873/371472 [4:38:14<2:58:18, 3.23it/s] 91%|█████████ | 336874/371472 [4:38:14<2:48:17, 3.43it/s] 91%|█████████ | 336875/371472 [4:38:14<2:52:42, 3.34it/s] 91%|█████████ | 336876/371472 [4:38:15<2:47:38, 3.44it/s] 91%|█████████ | 336877/371472 [4:38:15<2:45:04, 3.49it/s] 91%|█████████ | 336878/371472 [4:38:15<2:42:34, 3.55it/s] 91%|█████████ | 336879/371472 [4:38:16<2:41:10, 3.58it/s] 91%|█████████ | 336880/371472 [4:38:16<2:38:45, 3.63it/s] {'loss': 2.627, 'learning_rate': 1.838544247882954e-07, 'epoch': 14.51} + 91%|█████████ | 336880/371472 [4:38:16<2:38:45, 3.63it/s] 91%|█████████ | 336881/371472 [4:38:16<2:59:13, 3.22it/s] 91%|█████████ | 336882/371472 [4:38:16<2:53:16, 3.33it/s] 91%|█████████ | 336883/371472 [4:38:17<3:08:15, 3.06it/s] 91%|█████████ | 336884/371472 [4:38:17<2:54:46, 3.30it/s] 91%|█████████ | 336885/371472 [4:38:17<2:48:44, 3.42it/s] 91%|█████████ | 336886/371472 [4:38:18<2:47:49, 3.43it/s] 91%|█████████ | 336887/371472 [4:38:18<2:41:12, 3.58it/s] 91%|█████████ | 336888/371472 [4:38:18<2:39:40, 3.61it/s] 91%|█████████ | 336889/371472 [4:38:19<2:45:18, 3.49it/s] 91%|█████████ | 336890/371472 [4:38:19<2:38:48, 3.63it/s] 91%|█████████ | 336891/371472 [4:38:19<2:44:21, 3.51it/s] 91%|█████████ | 336892/371472 [4:38:19<2:37:49, 3.65it/s] 91%|█████████ | 336893/371472 [4:38:20<2:33:01, 3.77it/s] 91%|█████████ | 336894/371472 [4:38:20<2:31:12, 3.81it/s] 91%|█████████ | 336895/371472 [4:38:20<2:37:32, 3.66it/s] 91%|█████████ | 336896/371472 [4:38:20<2:39:33, 3.61it/s] 91%|█████████ | 336897/371472 [4:38:21<2:35:13, 3.71it/s] 91%|█████████ | 336898/371472 [4:38:21<2:32:42, 3.77it/s] 91%|█████████ | 336899/371472 [4:38:21<2:34:17, 3.73it/s] 91%|█████████ | 336900/371472 [4:38:22<2:47:12, 3.45it/s] {'loss': 2.7548, 'learning_rate': 1.8380594281281647e-07, 'epoch': 14.51} + 91%|█████████ | 336900/371472 [4:38:22<2:47:12, 3.45it/s] 91%|█████████ | 336901/371472 [4:38:22<2:50:51, 3.37it/s] 91%|█████████ | 336902/371472 [4:38:22<2:50:41, 3.38it/s] 91%|█████████ | 336903/371472 [4:38:22<2:43:23, 3.53it/s] 91%|█████████ | 336904/371472 [4:38:23<2:38:44, 3.63it/s] 91%|█████████ | 336905/371472 [4:38:23<2:43:03, 3.53it/s] 91%|█████████ | 336906/371472 [4:38:23<2:45:43, 3.48it/s] 91%|█████████ | 336907/371472 [4:38:24<2:39:05, 3.62it/s] 91%|█████████ | 336908/371472 [4:38:24<2:38:31, 3.63it/s] 91%|█████████ | 336909/371472 [4:38:24<2:40:21, 3.59it/s] 91%|█████████ | 336910/371472 [4:38:24<2:40:04, 3.60it/s] 91%|█████████ | 336911/371472 [4:38:25<2:35:34, 3.70it/s] 91%|█████████ | 336912/371472 [4:38:25<2:38:27, 3.63it/s] 91%|█████████ | 336913/371472 [4:38:25<2:39:41, 3.61it/s] 91%|█████████ | 336914/371472 [4:38:25<2:37:17, 3.66it/s] 91%|█████████ | 336915/371472 [4:38:26<2:38:03, 3.64it/s] 91%|█████████ | 336916/371472 [4:38:26<2:51:45, 3.35it/s] 91%|█████████ | 336917/371472 [4:38:26<2:53:35, 3.32it/s] 91%|█████████ | 336918/371472 [4:38:27<2:54:38, 3.30it/s] 91%|█████████ | 336919/371472 [4:38:27<2:52:15, 3.34it/s] 91%|█████████ | 336920/371472 [4:38:27<2:55:05, 3.29it/s] {'loss': 2.6773, 'learning_rate': 1.8375746083733754e-07, 'epoch': 14.51} + 91%|█████████ | 336920/371472 [4:38:27<2:55:05, 3.29it/s] 91%|█████████ | 336921/371472 [4:38:28<2:50:25, 3.38it/s] 91%|█████████ | 336922/371472 [4:38:28<2:51:56, 3.35it/s] 91%|█████████ | 336923/371472 [4:38:28<2:50:18, 3.38it/s] 91%|█████████ | 336924/371472 [4:38:28<2:47:32, 3.44it/s] 91%|█████████ | 336925/371472 [4:38:29<2:39:34, 3.61it/s] 91%|█████████ | 336926/371472 [4:38:29<2:34:30, 3.73it/s] 91%|█████████ | 336927/371472 [4:38:29<2:39:02, 3.62it/s] 91%|█████████ | 336928/371472 [4:38:29<2:36:14, 3.69it/s] 91%|█████████ | 336929/371472 [4:38:30<2:36:58, 3.67it/s] 91%|█████████ | 336930/371472 [4:38:30<2:36:09, 3.69it/s] 91%|█████████ | 336931/371472 [4:38:30<2:58:49, 3.22it/s] 91%|█████████ | 336932/371472 [4:38:31<2:56:14, 3.27it/s] 91%|█████████ | 336933/371472 [4:38:31<2:53:27, 3.32it/s] 91%|█████████ | 336934/371472 [4:38:31<2:55:31, 3.28it/s] 91%|█████████ | 336935/371472 [4:38:32<2:47:17, 3.44it/s] 91%|█████████ | 336936/371472 [4:38:32<2:51:16, 3.36it/s] 91%|█████████ | 336937/371472 [4:38:32<2:44:39, 3.50it/s] 91%|█████████ | 336938/371472 [4:38:32<2:46:26, 3.46it/s] 91%|█████████ | 336939/371472 [4:38:33<2:40:56, 3.58it/s] 91%|█████████ | 336940/371472 [4:38:33<2:42:23, 3.54it/s] {'loss': 2.5411, 'learning_rate': 1.837089788618587e-07, 'epoch': 14.51} + 91%|█████████ | 336940/371472 [4:38:33<2:42:23, 3.54it/s] 91%|█████████ | 336941/371472 [4:38:33<2:48:43, 3.41it/s] 91%|█████████ | 336942/371472 [4:38:34<2:46:19, 3.46it/s] 91%|█████████ | 336943/371472 [4:38:34<2:45:52, 3.47it/s] 91%|█████████ | 336944/371472 [4:38:34<2:45:24, 3.48it/s] 91%|█████████ | 336945/371472 [4:38:34<2:41:12, 3.57it/s] 91%|█████████ | 336946/371472 [4:38:35<2:37:55, 3.64it/s] 91%|█████████ | 336947/371472 [4:38:35<2:39:41, 3.60it/s] 91%|█████████ | 336948/371472 [4:38:35<2:39:47, 3.60it/s] 91%|█████████ | 336949/371472 [4:38:36<2:57:51, 3.23it/s] 91%|█████████ | 336950/371472 [4:38:36<3:01:38, 3.17it/s] 91%|█████████ | 336951/371472 [4:38:36<2:59:27, 3.21it/s] 91%|█████████ | 336952/371472 [4:38:37<2:53:09, 3.32it/s] 91%|█████████ | 336953/371472 [4:38:37<2:47:09, 3.44it/s] 91%|█████████ | 336954/371472 [4:38:37<2:45:04, 3.48it/s] 91%|█████████ | 336955/371472 [4:38:37<2:57:26, 3.24it/s] 91%|█████████ | 336956/371472 [4:38:38<3:06:00, 3.09it/s] 91%|█████████ | 336957/371472 [4:38:38<3:09:32, 3.03it/s] 91%|█████████ | 336958/371472 [4:38:38<2:59:55, 3.20it/s] 91%|█████████ | 336959/371472 [4:38:39<2:54:03, 3.30it/s] 91%|█████████ | 336960/371472 [4:38:39<2:53:54, 3.31it/s] {'loss': 2.5742, 'learning_rate': 1.8366049688637974e-07, 'epoch': 14.51} + 91%|█████████ | 336960/371472 [4:38:39<2:53:54, 3.31it/s] 91%|█████████ | 336961/371472 [4:38:39<2:53:27, 3.32it/s] 91%|█████████ | 336962/371472 [4:38:40<2:45:57, 3.47it/s] 91%|█████████ | 336963/371472 [4:38:40<2:43:05, 3.53it/s] 91%|█████████ | 336964/371472 [4:38:40<2:47:05, 3.44it/s] 91%|█████████ | 336965/371472 [4:38:40<2:44:43, 3.49it/s] 91%|█████████ | 336966/371472 [4:38:41<3:03:23, 3.14it/s] 91%|█████████ | 336967/371472 [4:38:41<2:51:30, 3.35it/s] 91%|█████████ | 336968/371472 [4:38:41<2:43:50, 3.51it/s] 91%|█████████ | 336969/371472 [4:38:42<2:39:46, 3.60it/s] 91%|█████████ | 336970/371472 [4:38:42<2:39:24, 3.61it/s] 91%|█████████ | 336971/371472 [4:38:42<2:58:10, 3.23it/s] 91%|█████████ | 336972/371472 [4:38:42<2:46:40, 3.45it/s] 91%|█████████ | 336973/371472 [4:38:43<2:43:40, 3.51it/s] 91%|█████████ | 336974/371472 [4:38:43<2:41:16, 3.57it/s] 91%|█████████ | 336975/371472 [4:38:43<3:03:12, 3.14it/s] 91%|█████████ | 336976/371472 [4:38:44<2:56:25, 3.26it/s] 91%|█████████ | 336977/371472 [4:38:44<2:48:14, 3.42it/s] 91%|█████████ | 336978/371472 [4:38:44<2:40:14, 3.59it/s] 91%|█████████ | 336979/371472 [4:38:44<2:37:25, 3.65it/s] 91%|█████████ | 336980/371472 [4:38:45<2:37:10, 3.66it/s] {'loss': 2.7246, 'learning_rate': 1.8361201491090091e-07, 'epoch': 14.51} + 91%|█████████ | 336980/371472 [4:38:45<2:37:10, 3.66it/s] 91%|█████████ | 336981/371472 [4:38:45<2:40:05, 3.59it/s] 91%|█████████ | 336982/371472 [4:38:45<2:43:26, 3.52it/s] 91%|█████████ | 336983/371472 [4:38:46<2:41:33, 3.56it/s] 91%|█████████ | 336984/371472 [4:38:46<2:38:31, 3.63it/s] 91%|█████████ | 336985/371472 [4:38:46<2:55:40, 3.27it/s] 91%|█████████ | 336986/371472 [4:38:47<2:50:18, 3.37it/s] 91%|█████████ | 336987/371472 [4:38:47<2:45:47, 3.47it/s] 91%|█████████ | 336988/371472 [4:38:47<2:46:39, 3.45it/s] 91%|█████████ | 336989/371472 [4:38:47<2:42:09, 3.54it/s] 91%|█████████ | 336990/371472 [4:38:48<2:46:11, 3.46it/s] 91%|█████████ | 336991/371472 [4:38:48<2:46:47, 3.45it/s] 91%|█████████ | 336992/371472 [4:38:48<2:44:09, 3.50it/s] 91%|█████████ | 336993/371472 [4:38:48<2:39:02, 3.61it/s] 91%|█████████ | 336994/371472 [4:38:49<2:42:27, 3.54it/s] 91%|█████████ | 336995/371472 [4:38:49<2:38:54, 3.62it/s] 91%|█████████ | 336996/371472 [4:38:49<2:43:04, 3.52it/s] 91%|█████████ | 336997/371472 [4:38:50<2:36:31, 3.67it/s] 91%|█████████ | 336998/371472 [4:38:50<2:38:59, 3.61it/s] 91%|█████████ | 336999/371472 [4:38:50<2:36:11, 3.68it/s] 91%|█████████ | 337000/371472 [4:38:50<2:42:50, 3.53it/s] {'loss': 2.6607, 'learning_rate': 1.8356353293542196e-07, 'epoch': 14.52} + 91%|█████████ | 337000/371472 [4:38:50<2:42:50, 3.53it/s] 91%|█████████ | 337001/371472 [4:38:51<2:39:32, 3.60it/s] 91%|█████████ | 337002/371472 [4:38:51<2:35:58, 3.68it/s] 91%|█████████ | 337003/371472 [4:38:51<2:37:33, 3.65it/s] 91%|█████████ | 337004/371472 [4:38:52<2:42:14, 3.54it/s] 91%|█████████ | 337005/371472 [4:38:52<2:48:37, 3.41it/s] 91%|█████████ | 337006/371472 [4:38:52<2:46:33, 3.45it/s] 91%|█████████ | 337007/371472 [4:38:52<2:54:26, 3.29it/s] 91%|█████████ | 337008/371472 [4:38:53<2:52:16, 3.33it/s] 91%|█████████ | 337009/371472 [4:38:53<2:44:59, 3.48it/s] 91%|█████████ | 337010/371472 [4:38:53<2:38:06, 3.63it/s] 91%|█████████ | 337011/371472 [4:38:54<2:43:14, 3.52it/s] 91%|█████████ | 337012/371472 [4:38:54<2:53:41, 3.31it/s] 91%|█████████ | 337013/371472 [4:38:54<2:49:52, 3.38it/s] 91%|█████████ | 337014/371472 [4:38:54<2:45:34, 3.47it/s] 91%|█████████ | 337015/371472 [4:38:55<2:44:35, 3.49it/s] 91%|█████████ | 337016/371472 [4:38:55<2:45:22, 3.47it/s] 91%|█████████ | 337017/371472 [4:38:55<2:44:04, 3.50it/s] 91%|█████████ | 337018/371472 [4:38:56<2:40:44, 3.57it/s] 91%|█████████ | 337019/371472 [4:38:56<2:45:54, 3.46it/s] 91%|█████████ | 337020/371472 [4:38:56<2:49:20, 3.39it/s] {'loss': 2.5649, 'learning_rate': 1.835150509599431e-07, 'epoch': 14.52} + 91%|█████████ | 337020/371472 [4:38:56<2:49:20, 3.39it/s] 91%|█████████ | 337021/371472 [4:38:56<2:45:20, 3.47it/s] 91%|█████████ | 337022/371472 [4:38:57<2:46:34, 3.45it/s] 91%|█████████ | 337023/371472 [4:38:57<2:49:24, 3.39it/s] 91%|█████████ | 337024/371472 [4:38:57<2:43:48, 3.50it/s] 91%|█████████ | 337025/371472 [4:38:58<2:43:23, 3.51it/s] 91%|█████████ | 337026/371472 [4:38:58<2:55:47, 3.27it/s] 91%|█████████ | 337027/371472 [4:38:58<3:03:46, 3.12it/s] 91%|█████████ | 337028/371472 [4:38:59<2:57:33, 3.23it/s] 91%|█████████ | 337029/371472 [4:38:59<2:52:40, 3.32it/s] 91%|█████████ | 337030/371472 [4:38:59<2:49:49, 3.38it/s] 91%|█████████ | 337031/371472 [4:38:59<2:43:16, 3.52it/s] 91%|█████████ | 337032/371472 [4:39:00<2:42:11, 3.54it/s] 91%|█████████ | 337033/371472 [4:39:00<3:22:01, 2.84it/s] 91%|█████████ | 337034/371472 [4:39:01<3:18:41, 2.89it/s] 91%|█████████ | 337035/371472 [4:39:01<3:03:41, 3.12it/s] 91%|█████████ | 337036/371472 [4:39:01<2:53:57, 3.30it/s] 91%|█████████ | 337037/371472 [4:39:01<2:53:06, 3.32it/s] 91%|█████████ | 337038/371472 [4:39:02<2:46:59, 3.44it/s] 91%|█████████ | 337039/371472 [4:39:02<2:56:47, 3.25it/s] 91%|█████████ | 337040/371472 [4:39:02<3:15:18, 2.94it/s] {'loss': 2.7537, 'learning_rate': 1.8346656898446418e-07, 'epoch': 14.52} + 91%|█████████ | 337040/371472 [4:39:02<3:15:18, 2.94it/s] 91%|█████████ | 337041/371472 [4:39:03<3:06:37, 3.07it/s] 91%|█████████ | 337042/371472 [4:39:03<3:02:51, 3.14it/s] 91%|█████████ | 337043/371472 [4:39:03<2:56:16, 3.26it/s] 91%|█████████ | 337044/371472 [4:39:04<3:04:44, 3.11it/s] 91%|█████████ | 337045/371472 [4:39:04<2:55:41, 3.27it/s] 91%|█████████ | 337046/371472 [4:39:04<2:56:58, 3.24it/s] 91%|█████████ | 337047/371472 [4:39:05<2:51:05, 3.35it/s] 91%|█████████ | 337048/371472 [4:39:05<2:55:04, 3.28it/s] 91%|█████████ | 337049/371472 [4:39:05<2:55:16, 3.27it/s] 91%|█████████ | 337050/371472 [4:39:05<2:47:47, 3.42it/s] 91%|█████████ | 337051/371472 [4:39:06<2:52:15, 3.33it/s] 91%|█████████ | 337052/371472 [4:39:06<2:51:51, 3.34it/s] 91%|█████████ | 337053/371472 [4:39:06<2:49:06, 3.39it/s] 91%|█████████ | 337054/371472 [4:39:07<2:45:05, 3.47it/s] 91%|█████████ | 337055/371472 [4:39:07<2:41:27, 3.55it/s] 91%|█████████ | 337056/371472 [4:39:07<2:45:25, 3.47it/s] 91%|█████████ | 337057/371472 [4:39:07<2:39:43, 3.59it/s] 91%|█████████ | 337058/371472 [4:39:08<2:38:05, 3.63it/s] 91%|█████████ | 337059/371472 [4:39:08<2:42:43, 3.52it/s] 91%|█████████ | 337060/371472 [4:39:08<2:40:09, 3.58it/s] {'loss': 2.5292, 'learning_rate': 1.8341808700898533e-07, 'epoch': 14.52} + 91%|█████████ | 337060/371472 [4:39:08<2:40:09, 3.58it/s] 91%|█████████ | 337061/371472 [4:39:09<2:45:52, 3.46it/s] 91%|█████████ | 337062/371472 [4:39:09<2:41:08, 3.56it/s] 91%|█████████ | 337063/371472 [4:39:09<2:39:23, 3.60it/s] 91%|█████████ | 337064/371472 [4:39:09<2:37:29, 3.64it/s] 91%|█████████ | 337065/371472 [4:39:10<2:36:04, 3.67it/s] 91%|█████████ | 337066/371472 [4:39:10<2:35:49, 3.68it/s] 91%|█████████ | 337067/371472 [4:39:10<2:31:53, 3.78it/s] 91%|█████████ | 337068/371472 [4:39:10<2:33:01, 3.75it/s] 91%|█████████ | 337069/371472 [4:39:11<2:30:44, 3.80it/s] 91%|█████████ | 337070/371472 [4:39:11<2:32:58, 3.75it/s] 91%|█████████ | 337071/371472 [4:39:11<2:32:41, 3.75it/s] 91%|█████████ | 337072/371472 [4:39:11<2:32:40, 3.76it/s] 91%|█████████ | 337073/371472 [4:39:12<2:29:45, 3.83it/s] 91%|█████████ | 337074/371472 [4:39:12<2:27:13, 3.89it/s] 91%|█████████ | 337075/371472 [4:39:12<2:31:43, 3.78it/s] 91%|█████████ | 337076/371472 [4:39:13<2:35:28, 3.69it/s] 91%|█████████ | 337077/371472 [4:39:13<2:32:52, 3.75it/s] 91%|█████████ | 337078/371472 [4:39:13<2:47:51, 3.42it/s] 91%|█████████ | 337079/371472 [4:39:13<2:45:44, 3.46it/s] 91%|█████████ | 337080/371472 [4:39:14<2:42:16, 3.53it/s] {'loss': 2.5679, 'learning_rate': 1.8336960503350638e-07, 'epoch': 14.52} + 91%|█████████ | 337080/371472 [4:39:14<2:42:16, 3.53it/s] 91%|█████████ | 337081/371472 [4:39:14<2:44:12, 3.49it/s] 91%|█████████ | 337082/371472 [4:39:14<2:45:35, 3.46it/s] 91%|█████████ | 337083/371472 [4:39:15<2:55:02, 3.27it/s] 91%|█████████ | 337084/371472 [4:39:15<2:51:24, 3.34it/s] 91%|█████████ | 337085/371472 [4:39:15<2:41:35, 3.55it/s] 91%|█████████ | 337086/371472 [4:39:15<2:37:16, 3.64it/s] 91%|█████████ | 337087/371472 [4:39:16<2:42:50, 3.52it/s] 91%|█████████ | 337088/371472 [4:39:16<2:34:11, 3.72it/s] 91%|█████████ | 337089/371472 [4:39:16<2:31:55, 3.77it/s] 91%|█████████ | 337090/371472 [4:39:16<2:35:05, 3.69it/s] 91%|█████████ | 337091/371472 [4:39:17<2:37:03, 3.65it/s] 91%|█████████ | 337092/371472 [4:39:17<2:33:44, 3.73it/s] 91%|█████████ | 337093/371472 [4:39:17<2:56:30, 3.25it/s] 91%|█████████ | 337094/371472 [4:39:18<2:52:14, 3.33it/s] 91%|█████████ | 337095/371472 [4:39:18<2:48:06, 3.41it/s] 91%|█████████ | 337096/371472 [4:39:18<2:38:16, 3.62it/s] 91%|█████████ | 337097/371472 [4:39:18<2:33:19, 3.74it/s] 91%|█████████ | 337098/371472 [4:39:19<2:36:46, 3.65it/s] 91%|█████████ | 337099/371472 [4:39:19<2:42:08, 3.53it/s] 91%|█████████ | 337100/371472 [4:39:19<2:37:12, 3.64it/s] {'loss': 2.612, 'learning_rate': 1.8332112305802755e-07, 'epoch': 14.52} + 91%|█████████ | 337100/371472 [4:39:19<2:37:12, 3.64it/s] 91%|█████████ | 337101/371472 [4:39:20<2:32:01, 3.77it/s] 91%|█████████ | 337102/371472 [4:39:20<2:33:01, 3.74it/s] 91%|█████████ | 337103/371472 [4:39:20<2:38:22, 3.62it/s] 91%|█████████ | 337104/371472 [4:39:20<2:35:34, 3.68it/s] 91%|█████████ | 337105/371472 [4:39:21<2:39:20, 3.59it/s] 91%|█████████ | 337106/371472 [4:39:21<2:35:42, 3.68it/s] 91%|█████████ | 337107/371472 [4:39:21<2:33:29, 3.73it/s] 91%|█████████ | 337108/371472 [4:39:22<2:39:22, 3.59it/s] 91%|█████████ | 337109/371472 [4:39:22<2:46:58, 3.43it/s] 91%|█████████ | 337110/371472 [4:39:22<2:47:03, 3.43it/s] 91%|█████████ | 337111/371472 [4:39:22<2:40:47, 3.56it/s] 91%|█████████ | 337112/371472 [4:39:23<2:50:33, 3.36it/s] 91%|█████████ | 337113/371472 [4:39:23<2:49:30, 3.38it/s] 91%|█████████ | 337114/371472 [4:39:23<2:42:45, 3.52it/s] 91%|█████████ | 337115/371472 [4:39:24<2:39:13, 3.60it/s] 91%|█████████ | 337116/371472 [4:39:24<2:38:19, 3.62it/s] 91%|█████████ | 337117/371472 [4:39:24<2:33:29, 3.73it/s] 91%|█████████ | 337118/371472 [4:39:24<2:48:18, 3.40it/s] 91%|█████████ | 337119/371472 [4:39:25<2:36:10, 3.67it/s] 91%|█████████ | 337120/371472 [4:39:25<2:42:43, 3.52it/s] {'loss': 2.529, 'learning_rate': 1.8327264108254863e-07, 'epoch': 14.52} + 91%|█████████ | 337120/371472 [4:39:25<2:42:43, 3.52it/s] 91%|█████████ | 337121/371472 [4:39:25<2:40:54, 3.56it/s] 91%|█████████ | 337122/371472 [4:39:26<2:46:06, 3.45it/s] 91%|█████████ | 337123/371472 [4:39:26<2:40:22, 3.57it/s] 91%|█████████ | 337124/371472 [4:39:26<2:44:21, 3.48it/s] 91%|█████████ | 337125/371472 [4:39:26<2:49:32, 3.38it/s] 91%|█████████ | 337126/371472 [4:39:27<2:43:05, 3.51it/s] 91%|█████████ | 337127/371472 [4:39:27<2:34:38, 3.70it/s] 91%|█████████ | 337128/371472 [4:39:27<2:37:17, 3.64it/s] 91%|█████████ | 337129/371472 [4:39:27<2:36:51, 3.65it/s] 91%|█████████ | 337130/371472 [4:39:28<2:47:32, 3.42it/s] 91%|█████████ | 337131/371472 [4:39:28<2:39:32, 3.59it/s] 91%|█████████ | 337132/371472 [4:39:28<2:36:50, 3.65it/s] 91%|█████████ | 337133/371472 [4:39:29<2:34:29, 3.70it/s] 91%|█████████ | 337134/371472 [4:39:29<2:33:28, 3.73it/s] 91%|█████████ | 337135/371472 [4:39:29<2:41:45, 3.54it/s] 91%|█████████ | 337136/371472 [4:39:29<2:47:12, 3.42it/s] 91%|█████████ | 337137/371472 [4:39:30<2:40:27, 3.57it/s] 91%|█████████ | 337138/371472 [4:39:30<2:43:40, 3.50it/s] 91%|█████████ | 337139/371472 [4:39:30<2:39:46, 3.58it/s] 91%|█████████ | 337140/371472 [4:39:31<2:53:45, 3.29it/s] {'loss': 2.5375, 'learning_rate': 1.8322415910706975e-07, 'epoch': 14.52} + 91%|█████████ | 337140/371472 [4:39:31<2:53:45, 3.29it/s] 91%|█████████ | 337141/371472 [4:39:31<2:54:28, 3.28it/s] 91%|█████████ | 337142/371472 [4:39:31<2:46:41, 3.43it/s] 91%|█████████ | 337143/371472 [4:39:31<2:45:52, 3.45it/s] 91%|█████████ | 337144/371472 [4:39:32<2:47:01, 3.43it/s] 91%|█████████ | 337145/371472 [4:39:32<2:38:47, 3.60it/s] 91%|█████████ | 337146/371472 [4:39:32<2:33:43, 3.72it/s] 91%|█████████ | 337147/371472 [4:39:33<2:35:06, 3.69it/s] 91%|█████████ | 337148/371472 [4:39:33<2:34:17, 3.71it/s] 91%|█████████ | 337149/371472 [4:39:33<2:28:20, 3.86it/s] 91%|█████████ | 337150/371472 [4:39:33<2:33:43, 3.72it/s] 91%|█████████ | 337151/371472 [4:39:34<2:40:55, 3.55it/s] 91%|█████████ | 337152/371472 [4:39:34<2:44:26, 3.48it/s] 91%|█████████ | 337153/371472 [4:39:34<2:40:58, 3.55it/s] 91%|█████████ | 337154/371472 [4:39:35<2:45:13, 3.46it/s] 91%|█████████ | 337155/371472 [4:39:35<2:40:43, 3.56it/s] 91%|█████████ | 337156/371472 [4:39:35<2:41:10, 3.55it/s] 91%|█████████ | 337157/371472 [4:39:35<2:39:16, 3.59it/s] 91%|█████████ | 337158/371472 [4:39:36<2:44:40, 3.47it/s] 91%|█████████ | 337159/371472 [4:39:36<2:45:10, 3.46it/s] 91%|███���█████ | 337160/371472 [4:39:36<2:42:35, 3.52it/s] {'loss': 2.4589, 'learning_rate': 1.8317567713159082e-07, 'epoch': 14.52} + 91%|█████████ | 337160/371472 [4:39:36<2:42:35, 3.52it/s] 91%|█████████ | 337161/371472 [4:39:37<2:41:36, 3.54it/s] 91%|█████████ | 337162/371472 [4:39:37<2:38:16, 3.61it/s] 91%|█████████ | 337163/371472 [4:39:37<2:40:50, 3.56it/s] 91%|█████████ | 337164/371472 [4:39:37<2:39:43, 3.58it/s] 91%|█████████ | 337165/371472 [4:39:38<2:42:51, 3.51it/s] 91%|█████████ | 337166/371472 [4:39:38<2:42:55, 3.51it/s] 91%|█████████ | 337167/371472 [4:39:38<2:40:59, 3.55it/s] 91%|█████████ | 337168/371472 [4:39:39<2:48:27, 3.39it/s] 91%|█████████ | 337169/371472 [4:39:39<2:46:08, 3.44it/s] 91%|█████████ | 337170/371472 [4:39:39<2:48:49, 3.39it/s] 91%|█████████ | 337171/371472 [4:39:39<2:53:21, 3.30it/s] 91%|█████████ | 337172/371472 [4:39:40<2:58:28, 3.20it/s] 91%|█████████ | 337173/371472 [4:39:40<2:58:12, 3.21it/s] 91%|█████████ | 337174/371472 [4:39:40<2:48:28, 3.39it/s] 91%|█████████ | 337175/371472 [4:39:41<2:44:26, 3.48it/s] 91%|█████████ | 337176/371472 [4:39:41<2:38:40, 3.60it/s] 91%|█████████ | 337177/371472 [4:39:41<2:52:50, 3.31it/s] 91%|█████████ | 337178/371472 [4:39:41<2:44:05, 3.48it/s] 91%|█████████ | 337179/371472 [4:39:42<2:38:52, 3.60it/s] 91%|█████████ | 337180/371472 [4:39:42<2:47:56, 3.40it/s] {'loss': 2.4873, 'learning_rate': 1.83127195156112e-07, 'epoch': 14.52} + 91%|█████████ | 337180/371472 [4:39:42<2:47:56, 3.40it/s] 91%|█████████ | 337181/371472 [4:39:42<2:42:36, 3.51it/s] 91%|█████████ | 337182/371472 [4:39:43<2:48:55, 3.38it/s] 91%|█████████ | 337183/371472 [4:39:43<2:45:40, 3.45it/s] 91%|█████████ | 337184/371472 [4:39:43<2:42:35, 3.51it/s] 91%|█████████ | 337185/371472 [4:39:44<2:51:41, 3.33it/s] 91%|█████████ | 337186/371472 [4:39:44<2:53:33, 3.29it/s] 91%|█████████ | 337187/371472 [4:39:44<2:47:34, 3.41it/s] 91%|█████████ | 337188/371472 [4:39:44<2:48:15, 3.40it/s] 91%|█████████ | 337189/371472 [4:39:45<2:47:59, 3.40it/s] 91%|█████████ | 337190/371472 [4:39:45<2:44:17, 3.48it/s] 91%|█████████ | 337191/371472 [4:39:45<2:38:44, 3.60it/s] 91%|█████████ | 337192/371472 [4:39:45<2:35:09, 3.68it/s] 91%|█████████ | 337193/371472 [4:39:46<2:48:57, 3.38it/s] 91%|█████████ | 337194/371472 [4:39:46<2:43:30, 3.49it/s] 91%|█████████ | 337195/371472 [4:39:46<2:46:23, 3.43it/s] 91%|█████████ | 337196/371472 [4:39:47<2:39:33, 3.58it/s] 91%|█████████ | 337197/371472 [4:39:47<2:31:27, 3.77it/s] 91%|█████████ | 337198/371472 [4:39:47<2:32:51, 3.74it/s] 91%|█████████ | 337199/371472 [4:39:47<2:35:13, 3.68it/s] 91%|█████████ | 337200/371472 [4:39:48<2:44:53, 3.46it/s] {'loss': 2.5065, 'learning_rate': 1.8307871318063304e-07, 'epoch': 14.52} + 91%|█████████ | 337200/371472 [4:39:48<2:44:53, 3.46it/s] 91%|█████████ | 337201/371472 [4:39:48<3:04:43, 3.09it/s] 91%|█████████ | 337202/371472 [4:39:48<2:55:08, 3.26it/s] 91%|█████████ | 337203/371472 [4:39:49<3:25:03, 2.79it/s] 91%|█████████ | 337204/371472 [4:39:49<3:20:02, 2.86it/s] 91%|█████████ | 337205/371472 [4:39:49<3:04:09, 3.10it/s] 91%|█████████ | 337206/371472 [4:39:50<2:57:37, 3.22it/s] 91%|█████████ | 337207/371472 [4:39:50<2:52:58, 3.30it/s] 91%|█████████ | 337208/371472 [4:39:50<2:57:45, 3.21it/s] 91%|█████████ | 337209/371472 [4:39:51<2:50:03, 3.36it/s] 91%|█████████ | 337210/371472 [4:39:51<2:41:42, 3.53it/s] 91%|█████████ | 337211/371472 [4:39:51<2:37:07, 3.63it/s] 91%|█████████ | 337212/371472 [4:39:51<2:39:07, 3.59it/s] 91%|█████████ | 337213/371472 [4:39:52<2:36:02, 3.66it/s] 91%|█████████ | 337214/371472 [4:39:52<2:38:08, 3.61it/s] 91%|█████████ | 337215/371472 [4:39:52<2:36:43, 3.64it/s] 91%|█████████ | 337216/371472 [4:39:53<2:50:21, 3.35it/s] 91%|█████████ | 337217/371472 [4:39:53<2:52:32, 3.31it/s] 91%|█████████ | 337218/371472 [4:39:53<2:44:32, 3.47it/s] 91%|█████████ | 337219/371472 [4:39:53<2:45:32, 3.45it/s] 91%|█████████ | 337220/371472 [4:39:54<2:40:55, 3.55it/s] {'loss': 2.4964, 'learning_rate': 1.830302312051542e-07, 'epoch': 14.52} + 91%|█████████ | 337220/371472 [4:39:54<2:40:55, 3.55it/s] 91%|█████████ | 337221/371472 [4:39:54<2:32:14, 3.75it/s] 91%|█████████ | 337222/371472 [4:39:54<2:32:55, 3.73it/s] 91%|█████████ | 337223/371472 [4:39:55<2:31:30, 3.77it/s] 91%|█████████ | 337224/371472 [4:39:55<2:28:58, 3.83it/s] 91%|█████████ | 337225/371472 [4:39:55<2:27:27, 3.87it/s] 91%|█████████ | 337226/371472 [4:39:55<2:39:13, 3.58it/s] 91%|█████████ | 337227/371472 [4:39:56<2:44:00, 3.48it/s] 91%|█████████ | 337228/371472 [4:39:56<2:47:58, 3.40it/s] 91%|█████████ | 337229/371472 [4:39:56<2:44:26, 3.47it/s] 91%|█████████ | 337230/371472 [4:39:57<2:45:37, 3.45it/s] 91%|█████████ | 337231/371472 [4:39:57<2:43:28, 3.49it/s] 91%|█████████ | 337232/371472 [4:39:57<2:42:09, 3.52it/s] 91%|█████████ | 337233/371472 [4:39:57<2:48:59, 3.38it/s] 91%|█████████ | 337234/371472 [4:39:58<2:49:02, 3.38it/s] 91%|█████████ | 337235/371472 [4:39:58<2:47:09, 3.41it/s] 91%|█████████ | 337236/371472 [4:39:58<2:55:38, 3.25it/s] 91%|█████████ | 337237/371472 [4:39:59<2:51:03, 3.34it/s] 91%|█████████ | 337238/371472 [4:39:59<2:44:45, 3.46it/s] 91%|█████████ | 337239/371472 [4:39:59<2:47:18, 3.41it/s] 91%|█████████ | 337240/371472 [4:39:59<2:51:48, 3.32it/s] {'loss': 2.6586, 'learning_rate': 1.8298174922967527e-07, 'epoch': 14.53} + 91%|█████████ | 337240/371472 [4:40:00<2:51:48, 3.32it/s] 91%|█████████ | 337241/371472 [4:40:00<2:43:17, 3.49it/s] 91%|█████████ | 337242/371472 [4:40:00<2:46:22, 3.43it/s] 91%|█████████ | 337243/371472 [4:40:00<2:48:21, 3.39it/s] 91%|█████████ | 337244/371472 [4:40:01<2:47:51, 3.40it/s] 91%|█████████ | 337245/371472 [4:40:01<2:49:52, 3.36it/s] 91%|█████████ | 337246/371472 [4:40:01<2:46:09, 3.43it/s] 91%|█████████ | 337247/371472 [4:40:02<2:42:32, 3.51it/s] 91%|█████████ | 337248/371472 [4:40:02<2:40:31, 3.55it/s] 91%|█████████ | 337249/371472 [4:40:02<2:42:58, 3.50it/s] 91%|█████████ | 337250/371472 [4:40:02<2:40:13, 3.56it/s] 91%|█████████ | 337251/371472 [4:40:03<2:36:35, 3.64it/s] 91%|█████████ | 337252/371472 [4:40:03<2:32:42, 3.73it/s] 91%|█████████ | 337253/371472 [4:40:03<2:27:26, 3.87it/s] 91%|█████████ | 337254/371472 [4:40:03<2:26:12, 3.90it/s] 91%|█████████ | 337255/371472 [4:40:04<2:36:36, 3.64it/s] 91%|█████████ | 337256/371472 [4:40:04<2:34:52, 3.68it/s] 91%|█████████ | 337257/371472 [4:40:04<2:32:46, 3.73it/s] 91%|█████████ | 337258/371472 [4:40:04<2:40:13, 3.56it/s] 91%|█████████ | 337259/371472 [4:40:05<2:39:05, 3.58it/s] 91%|█████████ | 337260/371472 [4:40:05<2:32:59, 3.73it/s] {'loss': 2.5288, 'learning_rate': 1.829332672541964e-07, 'epoch': 14.53} + 91%|█████████ | 337260/371472 [4:40:05<2:32:59, 3.73it/s] 91%|█████████ | 337261/371472 [4:40:05<2:52:44, 3.30it/s] 91%|█████████ | 337262/371472 [4:40:06<2:43:42, 3.48it/s] 91%|█████████ | 337263/371472 [4:40:06<2:38:06, 3.61it/s] 91%|█████████ | 337264/371472 [4:40:06<3:09:25, 3.01it/s] 91%|█████████ | 337265/371472 [4:40:07<3:00:44, 3.15it/s] 91%|█████████ | 337266/371472 [4:40:07<2:50:11, 3.35it/s] 91%|█████████ | 337267/371472 [4:40:07<2:44:58, 3.46it/s] 91%|█████████ | 337268/371472 [4:40:07<2:41:27, 3.53it/s] 91%|█████████ | 337269/371472 [4:40:08<2:55:04, 3.26it/s] 91%|█████████ | 337270/371472 [4:40:08<2:55:06, 3.26it/s] 91%|█████████ | 337271/371472 [4:40:08<2:56:04, 3.24it/s] 91%|█████████ | 337272/371472 [4:40:09<2:48:55, 3.37it/s] 91%|█████████ | 337273/371472 [4:40:09<2:47:51, 3.40it/s] 91%|█████████ | 337274/371472 [4:40:09<2:42:27, 3.51it/s] 91%|█████████ | 337275/371472 [4:40:10<2:44:21, 3.47it/s] 91%|█████████ | 337276/371472 [4:40:10<2:42:44, 3.50it/s] 91%|█████████ | 337277/371472 [4:40:10<2:48:57, 3.37it/s] 91%|█████████ | 337278/371472 [4:40:10<2:45:22, 3.45it/s] 91%|█████████ | 337279/371472 [4:40:11<2:41:55, 3.52it/s] 91%|█████████ | 337280/371472 [4:40:11<2:38:45, 3.59it/s] {'loss': 2.6021, 'learning_rate': 1.8288478527871746e-07, 'epoch': 14.53} + 91%|█████████ | 337280/371472 [4:40:11<2:38:45, 3.59it/s] 91%|█████████ | 337281/371472 [4:40:11<2:41:56, 3.52it/s] 91%|█████████ | 337282/371472 [4:40:12<2:39:23, 3.57it/s] 91%|█████████ | 337283/371472 [4:40:12<2:47:30, 3.40it/s] 91%|█████████ | 337284/371472 [4:40:12<2:37:41, 3.61it/s] 91%|█████████ | 337285/371472 [4:40:12<2:31:30, 3.76it/s] 91%|█████████ | 337286/371472 [4:40:13<2:31:53, 3.75it/s] 91%|█████████ | 337287/371472 [4:40:13<2:29:11, 3.82it/s] 91%|█████████ | 337288/371472 [4:40:13<2:26:31, 3.89it/s] 91%|█████████ | 337289/371472 [4:40:13<2:31:17, 3.77it/s] 91%|█████████ | 337290/371472 [4:40:14<2:38:52, 3.59it/s] 91%|█████████ | 337291/371472 [4:40:14<2:33:28, 3.71it/s] 91%|█████████ | 337292/371472 [4:40:14<2:35:06, 3.67it/s] 91%|█████████ | 337293/371472 [4:40:14<2:29:25, 3.81it/s] 91%|█████████ | 337294/371472 [4:40:15<2:31:25, 3.76it/s] 91%|█████████ | 337295/371472 [4:40:15<2:41:29, 3.53it/s] 91%|█████████ | 337296/371472 [4:40:15<2:39:19, 3.58it/s] 91%|█████████ | 337297/371472 [4:40:16<2:38:57, 3.58it/s] 91%|█████████ | 337298/371472 [4:40:16<2:35:08, 3.67it/s] 91%|█████████ | 337299/371472 [4:40:16<2:31:34, 3.76it/s] 91%|█████████ | 337300/371472 [4:40:16<2:34:40, 3.68it/s] {'loss': 2.6757, 'learning_rate': 1.828363033032386e-07, 'epoch': 14.53} + 91%|█████████ | 337300/371472 [4:40:16<2:34:40, 3.68it/s] 91%|█████████ | 337301/371472 [4:40:17<2:34:01, 3.70it/s] 91%|█████████ | 337302/371472 [4:40:17<2:37:36, 3.61it/s] 91%|█████████ | 337303/371472 [4:40:17<2:31:22, 3.76it/s] 91%|█████████ | 337304/371472 [4:40:17<2:34:15, 3.69it/s] 91%|█████████ | 337305/371472 [4:40:18<2:36:53, 3.63it/s] 91%|█████████ | 337306/371472 [4:40:18<2:29:50, 3.80it/s] 91%|█████████ | 337307/371472 [4:40:18<2:32:24, 3.74it/s] 91%|█████████ | 337308/371472 [4:40:19<2:34:10, 3.69it/s] 91%|█████████ | 337309/371472 [4:40:19<2:32:27, 3.73it/s] 91%|█████████ | 337310/371472 [4:40:19<2:32:34, 3.73it/s] 91%|█████████ | 337311/371472 [4:40:19<2:28:15, 3.84it/s] 91%|█████████ | 337312/371472 [4:40:20<2:28:53, 3.82it/s] 91%|█████████ | 337313/371472 [4:40:20<2:29:21, 3.81it/s] 91%|█████████ | 337314/371472 [4:40:20<2:37:13, 3.62it/s] 91%|█████████ | 337315/371472 [4:40:20<2:36:55, 3.63it/s] 91%|█████████ | 337316/371472 [4:40:21<2:53:08, 3.29it/s] 91%|█████████ | 337317/371472 [4:40:21<2:47:43, 3.39it/s] 91%|█████████ | 337318/371472 [4:40:21<2:38:54, 3.58it/s] 91%|█████████ | 337319/371472 [4:40:22<2:37:29, 3.61it/s] 91%|█████████ | 337320/371472 [4:40:22<3:11:22, 2.97it/s] {'loss': 2.4936, 'learning_rate': 1.8278782132775968e-07, 'epoch': 14.53} + 91%|█████████ | 337320/371472 [4:40:22<3:11:22, 2.97it/s] 91%|█████████ | 337321/371472 [4:40:22<3:12:14, 2.96it/s] 91%|█████████ | 337322/371472 [4:40:23<3:10:15, 2.99it/s] 91%|█████████ | 337323/371472 [4:40:23<3:08:59, 3.01it/s] 91%|█████████ | 337324/371472 [4:40:23<2:53:27, 3.28it/s] 91%|█████████ | 337325/371472 [4:40:24<2:43:10, 3.49it/s] 91%|█████████ | 337326/371472 [4:40:24<2:38:30, 3.59it/s] 91%|█████████ | 337327/371472 [4:40:24<2:35:32, 3.66it/s] 91%|█████████ | 337328/371472 [4:40:24<2:39:42, 3.56it/s] 91%|█████████ | 337329/371472 [4:40:25<2:33:38, 3.70it/s] 91%|█████████ | 337330/371472 [4:40:25<2:35:04, 3.67it/s] 91%|█████████ | 337331/371472 [4:40:25<2:38:45, 3.58it/s] 91%|█████████ | 337332/371472 [4:40:25<2:40:27, 3.55it/s] 91%|█████████ | 337333/371472 [4:40:26<2:45:56, 3.43it/s] 91%|█████████ | 337334/371472 [4:40:26<2:41:30, 3.52it/s] 91%|█████████ | 337335/371472 [4:40:26<2:40:14, 3.55it/s] 91%|█████████ | 337336/371472 [4:40:27<2:35:59, 3.65it/s] 91%|█████████ | 337337/371472 [4:40:27<2:35:25, 3.66it/s] 91%|█████████ | 337338/371472 [4:40:27<2:44:03, 3.47it/s] 91%|█████████ | 337339/371472 [4:40:27<2:43:03, 3.49it/s] 91%|█████████ | 337340/371472 [4:40:28<2:46:44, 3.41it/s] {'loss': 2.593, 'learning_rate': 1.8273933935228084e-07, 'epoch': 14.53} + 91%|█████████ | 337340/371472 [4:40:28<2:46:44, 3.41it/s] 91%|█████████ | 337341/371472 [4:40:28<2:40:17, 3.55it/s] 91%|█████████ | 337342/371472 [4:40:28<2:33:25, 3.71it/s] 91%|█████████ | 337343/371472 [4:40:29<2:37:05, 3.62it/s] 91%|█████████ | 337344/371472 [4:40:29<2:29:49, 3.80it/s] 91%|█████████ | 337345/371472 [4:40:29<2:35:17, 3.66it/s] 91%|█████████ | 337346/371472 [4:40:29<2:29:27, 3.81it/s] 91%|█████████ | 337347/371472 [4:40:30<2:34:41, 3.68it/s] 91%|█████████ | 337348/371472 [4:40:30<2:50:01, 3.34it/s] 91%|█████████ | 337349/371472 [4:40:30<2:41:21, 3.52it/s] 91%|█████████ | 337350/371472 [4:40:31<2:40:54, 3.53it/s] 91%|█████████ | 337351/371472 [4:40:31<2:39:34, 3.56it/s] 91%|█████████ | 337352/371472 [4:40:31<2:40:49, 3.54it/s] 91%|█████████ | 337353/371472 [4:40:31<2:36:14, 3.64it/s] 91%|█████████ | 337354/371472 [4:40:32<2:33:13, 3.71it/s] 91%|█████████ | 337355/371472 [4:40:32<2:33:35, 3.70it/s] 91%|█████████ | 337356/371472 [4:40:32<2:30:25, 3.78it/s] 91%|█████████ | 337357/371472 [4:40:32<2:29:09, 3.81it/s] 91%|█████████ | 337358/371472 [4:40:33<2:31:40, 3.75it/s] 91%|█████████ | 337359/371472 [4:40:33<2:43:27, 3.48it/s] 91%|█████████ | 337360/371472 [4:40:33<2:41:12, 3.53it/s] {'loss': 2.6045, 'learning_rate': 1.826908573768019e-07, 'epoch': 14.53} + 91%|█████████ | 337360/371472 [4:40:33<2:41:12, 3.53it/s] 91%|█████████ | 337361/371472 [4:40:33<2:34:46, 3.67it/s] 91%|█████████ | 337362/371472 [4:40:34<2:39:07, 3.57it/s] 91%|█████████ | 337363/371472 [4:40:34<2:34:35, 3.68it/s] 91%|█████████ | 337364/371472 [4:40:34<2:43:49, 3.47it/s] 91%|█████████ | 337365/371472 [4:40:35<2:40:22, 3.54it/s] 91%|█████████ | 337366/371472 [4:40:35<2:40:17, 3.55it/s] 91%|█████████ | 337367/371472 [4:40:35<2:42:59, 3.49it/s] 91%|█████████ | 337368/371472 [4:40:35<2:36:53, 3.62it/s] 91%|█████████ | 337369/371472 [4:40:36<2:48:59, 3.36it/s] 91%|█████████ | 337370/371472 [4:40:36<2:43:06, 3.48it/s] 91%|█████████ | 337371/371472 [4:40:36<2:40:26, 3.54it/s] 91%|█████████ | 337372/371472 [4:40:37<2:35:53, 3.65it/s] 91%|█████████ | 337373/371472 [4:40:37<2:33:41, 3.70it/s] 91%|█████████ | 337374/371472 [4:40:37<2:36:58, 3.62it/s] 91%|█████████ | 337375/371472 [4:40:37<2:38:58, 3.57it/s] 91%|█████████ | 337376/371472 [4:40:38<2:36:13, 3.64it/s] 91%|█████████ | 337377/371472 [4:40:38<2:42:30, 3.50it/s] 91%|█████████ | 337378/371472 [4:40:38<2:42:27, 3.50it/s] 91%|█████████ | 337379/371472 [4:40:39<2:35:52, 3.65it/s] 91%|█████████ | 337380/371472 [4:40:39<2:36:26, 3.63it/s] {'loss': 2.5138, 'learning_rate': 1.8264237540132306e-07, 'epoch': 14.53} + 91%|█████████ | 337380/371472 [4:40:39<2:36:26, 3.63it/s] 91%|█████████ | 337381/371472 [4:40:39<2:36:58, 3.62it/s] 91%|█████████ | 337382/371472 [4:40:39<2:34:01, 3.69it/s] 91%|█████████ | 337383/371472 [4:40:40<2:27:58, 3.84it/s] 91%|█████████ | 337384/371472 [4:40:40<2:27:42, 3.85it/s] 91%|█████████ | 337385/371472 [4:40:40<2:37:48, 3.60it/s] 91%|█████████ | 337386/371472 [4:40:40<2:36:27, 3.63it/s] 91%|█████████ | 337387/371472 [4:40:41<2:32:36, 3.72it/s] 91%|█████████ | 337388/371472 [4:40:41<2:30:25, 3.78it/s] 91%|█████████ | 337389/371472 [4:40:41<2:25:59, 3.89it/s] 91%|█████████ | 337390/371472 [4:40:41<2:25:18, 3.91it/s] 91%|█████████ | 337391/371472 [4:40:42<2:29:33, 3.80it/s] 91%|█████████ | 337392/371472 [4:40:42<2:32:19, 3.73it/s] 91%|█████████ | 337393/371472 [4:40:42<2:31:17, 3.75it/s] 91%|█████████ | 337394/371472 [4:40:43<2:29:55, 3.79it/s] 91%|█████████ | 337395/371472 [4:40:43<2:29:04, 3.81it/s] 91%|█████████ | 337396/371472 [4:40:43<2:40:12, 3.54it/s] 91%|█████████ | 337397/371472 [4:40:43<2:42:53, 3.49it/s] 91%|█████████ | 337398/371472 [4:40:44<2:40:50, 3.53it/s] 91%|█████████ | 337399/371472 [4:40:44<2:42:50, 3.49it/s] 91%|█████████ | 337400/371472 [4:40:44<2:38:20, 3.59it/s] {'loss': 2.7271, 'learning_rate': 1.825938934258441e-07, 'epoch': 14.53} + 91%|█████████ | 337400/371472 [4:40:44<2:38:20, 3.59it/s] 91%|█████████ | 337401/371472 [4:40:44<2:32:48, 3.72it/s] 91%|█████████ | 337402/371472 [4:40:45<2:38:42, 3.58it/s] 91%|█████████ | 337403/371472 [4:40:45<2:48:57, 3.36it/s] 91%|█████████ | 337404/371472 [4:40:45<2:42:45, 3.49it/s] 91%|█████████ | 337405/371472 [4:40:46<2:38:47, 3.58it/s] 91%|█████████ | 337406/371472 [4:40:46<2:36:59, 3.62it/s] 91%|█████████ | 337407/371472 [4:40:46<2:44:09, 3.46it/s] 91%|█████████ | 337408/371472 [4:40:47<2:41:02, 3.53it/s] 91%|█████████ | 337409/371472 [4:40:47<2:34:37, 3.67it/s] 91%|█████████ | 337410/371472 [4:40:47<2:38:13, 3.59it/s] 91%|█████████ | 337411/371472 [4:40:47<2:36:04, 3.64it/s] 91%|█████████ | 337412/371472 [4:40:48<2:32:54, 3.71it/s] 91%|█████████ | 337413/371472 [4:40:48<2:35:19, 3.65it/s] 91%|█████████ | 337414/371472 [4:40:48<2:47:15, 3.39it/s] 91%|█████████ | 337415/371472 [4:40:48<2:42:18, 3.50it/s] 91%|█████████ | 337416/371472 [4:40:49<2:45:28, 3.43it/s] 91%|█████████ | 337417/371472 [4:40:49<2:37:20, 3.61it/s] 91%|█████████ | 337418/371472 [4:40:49<2:40:41, 3.53it/s] 91%|█████████ | 337419/371472 [4:40:50<2:32:53, 3.71it/s] 91%|█████████ | 337420/371472 [4:40:50<2:31:11, 3.75it/s] {'loss': 2.5677, 'learning_rate': 1.8254541145036528e-07, 'epoch': 14.53} + 91%|█████████ | 337420/371472 [4:40:50<2:31:11, 3.75it/s] 91%|█████████ | 337421/371472 [4:40:50<2:33:35, 3.69it/s] 91%|█████████ | 337422/371472 [4:40:50<2:49:55, 3.34it/s] 91%|█████████ | 337423/371472 [4:40:51<2:43:23, 3.47it/s] 91%|█████████ | 337424/371472 [4:40:51<2:43:45, 3.47it/s] 91%|█████████ | 337425/371472 [4:40:51<2:36:45, 3.62it/s] 91%|█████████ | 337426/371472 [4:40:52<2:45:17, 3.43it/s] 91%|█████████ | 337427/371472 [4:40:52<2:54:37, 3.25it/s] 91%|█████████ | 337428/371472 [4:40:52<2:44:35, 3.45it/s] 91%|█████████ | 337429/371472 [4:40:52<2:41:08, 3.52it/s] 91%|█████████ | 337430/371472 [4:40:53<2:34:24, 3.67it/s] 91%|█████████ | 337431/371472 [4:40:53<2:30:02, 3.78it/s] 91%|█████████ | 337432/371472 [4:40:53<2:28:49, 3.81it/s] 91%|█████████ | 337433/371472 [4:40:53<2:27:42, 3.84it/s] 91%|█████████ | 337434/371472 [4:40:54<2:27:57, 3.83it/s] 91%|█████████ | 337435/371472 [4:40:54<2:28:07, 3.83it/s] 91%|█████████ | 337436/371472 [4:40:54<2:32:48, 3.71it/s] 91%|█████████ | 337437/371472 [4:40:55<2:34:06, 3.68it/s] 91%|█████████ | 337438/371472 [4:40:55<2:47:36, 3.38it/s] 91%|█████████ | 337439/371472 [4:40:55<2:51:33, 3.31it/s] 91%|█████████ | 337440/371472 [4:40:56<2:50:51, 3.32it/s] {'loss': 2.484, 'learning_rate': 1.8249692947488633e-07, 'epoch': 14.53} + 91%|█████████ | 337440/371472 [4:40:56<2:50:51, 3.32it/s] 91%|█████████ | 337441/371472 [4:40:56<2:47:36, 3.38it/s] 91%|█████████ | 337442/371472 [4:40:56<2:52:24, 3.29it/s] 91%|█████████ | 337443/371472 [4:40:56<2:43:16, 3.47it/s] 91%|█████████ | 337444/371472 [4:40:57<2:37:03, 3.61it/s] 91%|█████████ | 337445/371472 [4:40:57<2:40:51, 3.53it/s] 91%|█████████ | 337446/371472 [4:40:57<2:41:38, 3.51it/s] 91%|███████��█ | 337447/371472 [4:40:58<2:56:01, 3.22it/s] 91%|█████████ | 337448/371472 [4:40:58<2:47:37, 3.38it/s] 91%|█████████ | 337449/371472 [4:40:58<2:39:02, 3.57it/s] 91%|█████████ | 337450/371472 [4:40:58<2:42:17, 3.49it/s] 91%|█████████ | 337451/371472 [4:40:59<2:46:36, 3.40it/s] 91%|█████████ | 337452/371472 [4:40:59<2:42:11, 3.50it/s] 91%|█████████ | 337453/371472 [4:40:59<2:35:38, 3.64it/s] 91%|█████████ | 337454/371472 [4:41:00<2:38:58, 3.57it/s] 91%|█████████ | 337455/371472 [4:41:00<2:42:44, 3.48it/s] 91%|█████████ | 337456/371472 [4:41:00<2:41:30, 3.51it/s] 91%|█████████ | 337457/371472 [4:41:00<2:33:13, 3.70it/s] 91%|█████████ | 337458/371472 [4:41:01<2:34:57, 3.66it/s] 91%|█████████ | 337459/371472 [4:41:01<2:37:07, 3.61it/s] 91%|█████████ | 337460/371472 [4:41:01<2:31:34, 3.74it/s] {'loss': 2.6673, 'learning_rate': 1.8244844749940737e-07, 'epoch': 14.54} + 91%|█████████ | 337460/371472 [4:41:01<2:31:34, 3.74it/s] 91%|█████████ | 337461/371472 [4:41:01<2:27:04, 3.85it/s] 91%|█████████ | 337462/371472 [4:41:02<2:26:34, 3.87it/s] 91%|█████████ | 337463/371472 [4:41:02<2:28:13, 3.82it/s] 91%|█████████ | 337464/371472 [4:41:02<2:30:05, 3.78it/s] 91%|█████████ | 337465/371472 [4:41:02<2:28:19, 3.82it/s] 91%|█████████ | 337466/371472 [4:41:03<2:28:09, 3.83it/s] 91%|█████████ | 337467/371472 [4:41:03<2:37:57, 3.59it/s] 91%|█████████ | 337468/371472 [4:41:03<2:32:57, 3.71it/s] 91%|█████████ | 337469/371472 [4:41:04<2:33:39, 3.69it/s] 91%|█████████ | 337470/371472 [4:41:04<2:31:02, 3.75it/s] 91%|█████████ | 337471/371472 [4:41:04<2:30:04, 3.78it/s] 91%|█████████ | 337472/371472 [4:41:04<2:31:03, 3.75it/s] 91%|█████████ | 337473/371472 [4:41:05<2:38:24, 3.58it/s] 91%|█████████ | 337474/371472 [4:41:05<2:40:13, 3.54it/s] 91%|█████████ | 337475/371472 [4:41:05<2:36:08, 3.63it/s] 91%|█████████ | 337476/371472 [4:41:05<2:32:12, 3.72it/s] 91%|█████████ | 337477/371472 [4:41:06<2:38:48, 3.57it/s] 91%|█████████ | 337478/371472 [4:41:06<2:36:47, 3.61it/s] 91%|█████████ | 337479/371472 [4:41:06<2:40:15, 3.54it/s] 91%|█████████ | 337480/371472 [4:41:07<2:54:03, 3.25it/s] {'loss': 2.5696, 'learning_rate': 1.8239996552392855e-07, 'epoch': 14.54} + 91%|█████████ | 337480/371472 [4:41:07<2:54:03, 3.25it/s] 91%|█████████ | 337481/371472 [4:41:07<2:49:38, 3.34it/s] 91%|█████████ | 337482/371472 [4:41:07<2:48:01, 3.37it/s] 91%|█████████ | 337483/371472 [4:41:07<2:40:59, 3.52it/s] 91%|█████████ | 337484/371472 [4:41:08<3:03:56, 3.08it/s] 91%|█████████ | 337485/371472 [4:41:08<2:52:16, 3.29it/s] 91%|█████████ | 337486/371472 [4:41:08<2:49:44, 3.34it/s] 91%|█████████ | 337487/371472 [4:41:09<2:50:43, 3.32it/s] 91%|█████████ | 337488/371472 [4:41:09<2:40:42, 3.52it/s] 91%|█████████ | 337489/371472 [4:41:09<2:44:00, 3.45it/s] 91%|█████████ | 337490/371472 [4:41:10<2:40:23, 3.53it/s] 91%|█████████ | 337491/371472 [4:41:10<2:36:14, 3.62it/s] 91%|█████████ | 337492/371472 [4:41:10<2:40:44, 3.52it/s] 91%|█████████ | 337493/371472 [4:41:10<2:38:10, 3.58it/s] 91%|█████████ | 337494/371472 [4:41:11<2:45:00, 3.43it/s] 91%|█████████ | 337495/371472 [4:41:11<2:39:17, 3.56it/s] 91%|█████████ | 337496/371472 [4:41:11<2:48:32, 3.36it/s] 91%|█████████ | 337497/371472 [4:41:12<3:02:48, 3.10it/s] 91%|█████████ | 337498/371472 [4:41:12<2:49:37, 3.34it/s] 91%|█████████ | 337499/371472 [4:41:12<2:45:11, 3.43it/s] 91%|█████████ | 337500/371472 [4:41:12<2:41:08, 3.51it/s] {'loss': 2.632, 'learning_rate': 1.8235148354844962e-07, 'epoch': 14.54} + 91%|█████████ | 337500/371472 [4:41:12<2:41:08, 3.51it/s] 91%|█████████ | 337501/371472 [4:41:13<3:04:14, 3.07it/s] 91%|█████████ | 337502/371472 [4:41:13<2:57:21, 3.19it/s] 91%|█████████ | 337503/371472 [4:41:13<2:50:51, 3.31it/s] 91%|█████████ | 337504/371472 [4:41:14<2:41:51, 3.50it/s] 91%|█████████ | 337505/371472 [4:41:14<2:44:18, 3.45it/s] 91%|█████████ | 337506/371472 [4:41:14<2:43:07, 3.47it/s] 91%|█████████ | 337507/371472 [4:41:15<2:44:46, 3.44it/s] 91%|█████████ | 337508/371472 [4:41:15<2:56:47, 3.20it/s] 91%|█████████ | 337509/371472 [4:41:15<2:52:46, 3.28it/s] 91%|█████████ | 337510/371472 [4:41:16<2:52:42, 3.28it/s] 91%|█████████ | 337511/371472 [4:41:16<2:45:49, 3.41it/s] 91%|█████████ | 337512/371472 [4:41:16<2:39:12, 3.56it/s] 91%|█████████ | 337513/371472 [4:41:16<2:42:00, 3.49it/s] 91%|█████████ | 337514/371472 [4:41:17<2:36:31, 3.62it/s] 91%|█████████ | 337515/371472 [4:41:17<2:42:31, 3.48it/s] 91%|█████████ | 337516/371472 [4:41:17<2:39:31, 3.55it/s] 91%|█████████ | 337517/371472 [4:41:17<2:34:25, 3.66it/s] 91%|█████████ | 337518/371472 [4:41:18<2:39:13, 3.55it/s] 91%|█████████ | 337519/371472 [4:41:18<2:40:54, 3.52it/s] 91%|█████████ | 337520/371472 [4:41:18<2:35:15, 3.64it/s] {'loss': 2.6452, 'learning_rate': 1.8230300157297074e-07, 'epoch': 14.54} + 91%|█████████ | 337520/371472 [4:41:18<2:35:15, 3.64it/s] 91%|█████████ | 337521/371472 [4:41:19<2:30:28, 3.76it/s] 91%|█████████ | 337522/371472 [4:41:19<2:30:19, 3.76it/s] 91%|█████████ | 337523/371472 [4:41:19<2:35:58, 3.63it/s] 91%|█████████ | 337524/371472 [4:41:19<2:31:31, 3.73it/s] 91%|█████████ | 337525/371472 [4:41:20<2:32:33, 3.71it/s] 91%|█████████ | 337526/371472 [4:41:20<2:33:10, 3.69it/s] 91%|█████████ | 337527/371472 [4:41:20<2:39:39, 3.54it/s] 91%|█████████ | 337528/371472 [4:41:20<2:33:00, 3.70it/s] 91%|█████████ | 337529/371472 [4:41:21<3:06:12, 3.04it/s] 91%|█████████ | 337530/371472 [4:41:21<2:59:14, 3.16it/s] 91%|█████████ | 337531/371472 [4:41:21<2:47:18, 3.38it/s] 91%|█████████ | 337532/371472 [4:41:22<2:38:28, 3.57it/s] 91%|█████████ | 337533/371472 [4:41:22<2:42:50, 3.47it/s] 91%|█████████ | 337534/371472 [4:41:23<4:03:06, 2.33it/s] 91%|█████████ | 337535/371472 [4:41:23<3:35:11, 2.63it/s] 91%|█████████ | 337536/371472 [4:41:23<3:11:32, 2.95it/s] 91%|█████████ | 337537/371472 [4:41:24<2:57:58, 3.18it/s] 91%|█████████ | 337538/371472 [4:41:24<2:55:37, 3.22it/s] 91%|█████████ | 337539/371472 [4:41:24<2:44:40, 3.43it/s] 91%|█████████ | 337540/371472 [4:41:24<2:36:04, 3.62it/s] {'loss': 2.5718, 'learning_rate': 1.822545195974918e-07, 'epoch': 14.54} + 91%|█████████ | 337540/371472 [4:41:24<2:36:04, 3.62it/s] 91%|█████████ | 337541/371472 [4:41:25<2:35:53, 3.63it/s] 91%|█████████ | 337542/371472 [4:41:25<2:33:30, 3.68it/s] 91%|█████████ | 337543/371472 [4:41:25<2:28:21, 3.81it/s] 91%|█████████ | 337544/371472 [4:41:25<2:28:46, 3.80it/s] 91%|█████████ | 337545/371472 [4:41:26<2:25:43, 3.88it/s] 91%|█████████ | 337546/371472 [4:41:26<2:26:08, 3.87it/s] 91%|█████████ | 337547/371472 [4:41:26<2:27:32, 3.83it/s] 91%|█████████ | 337548/371472 [4:41:26<2:32:33, 3.71it/s] 91%|█████████ | 337549/371472 [4:41:27<2:32:24, 3.71it/s] 91%|█████████ | 337550/371472 [4:41:27<2:32:02, 3.72it/s] 91%|█████████ | 337551/371472 [4:41:27<2:27:42, 3.83it/s] 91%|█████████ | 337552/371472 [4:41:27<2:27:23, 3.84it/s] 91%|█████████ | 337553/371472 [4:41:28<2:29:36, 3.78it/s] 91%|█████████ | 337554/371472 [4:41:28<2:32:22, 3.71it/s] 91%|█████████ | 337555/371472 [4:41:28<2:28:14, 3.81it/s] 91%|█████████ | 337556/371472 [4:41:29<2:26:36, 3.86it/s] 91%|█████████ | 337557/371472 [4:41:29<2:36:41, 3.61it/s] 91%|█████████ | 337558/371472 [4:41:29<2:32:46, 3.70it/s] 91%|█████████ | 337559/371472 [4:41:29<2:29:53, 3.77it/s] 91%|█████████ | 337560/371472 [4:41:30<2:28:27, 3.81it/s] {'loss': 2.7629, 'learning_rate': 1.82206037622013e-07, 'epoch': 14.54} + 91%|█████████ | 337560/371472 [4:41:30<2:28:27, 3.81it/s] 91%|█████████ | 337561/371472 [4:41:30<2:25:22, 3.89it/s] 91%|████���████ | 337562/371472 [4:41:30<2:25:51, 3.87it/s] 91%|█████████ | 337563/371472 [4:41:30<2:32:13, 3.71it/s] 91%|█████████ | 337564/371472 [4:41:31<2:27:28, 3.83it/s] 91%|█████████ | 337565/371472 [4:41:31<2:32:21, 3.71it/s] 91%|█████████ | 337566/371472 [4:41:31<2:45:37, 3.41it/s] 91%|█████████ | 337567/371472 [4:41:32<2:40:59, 3.51it/s] 91%|█████████ | 337568/371472 [4:41:32<2:33:59, 3.67it/s] 91%|█████████ | 337569/371472 [4:41:32<2:28:52, 3.80it/s] 91%|█████████ | 337570/371472 [4:41:32<2:32:47, 3.70it/s] 91%|█████████ | 337571/371472 [4:41:33<2:32:35, 3.70it/s] 91%|█████████ | 337572/371472 [4:41:33<2:27:12, 3.84it/s] 91%|█████████ | 337573/371472 [4:41:33<2:34:53, 3.65it/s] 91%|█████████ | 337574/371472 [4:41:33<2:37:00, 3.60it/s] 91%|█████████ | 337575/371472 [4:41:34<2:42:09, 3.48it/s] 91%|█████████ | 337576/371472 [4:41:34<2:37:42, 3.58it/s] 91%|█████████ | 337577/371472 [4:41:34<2:36:30, 3.61it/s] 91%|█████████ | 337578/371472 [4:41:35<2:40:35, 3.52it/s] 91%|█████████ | 337579/371472 [4:41:35<2:39:35, 3.54it/s] 91%|█████████ | 337580/371472 [4:41:35<2:41:05, 3.51it/s] {'loss': 2.5936, 'learning_rate': 1.8215755564653404e-07, 'epoch': 14.54} + 91%|█████████ | 337580/371472 [4:41:35<2:41:05, 3.51it/s] 91%|█████████ | 337581/371472 [4:41:35<2:45:01, 3.42it/s] 91%|█████████ | 337582/371472 [4:41:36<2:46:04, 3.40it/s] 91%|█████████ | 337583/371472 [4:41:36<2:46:56, 3.38it/s] 91%|█████████ | 337584/371472 [4:41:36<2:45:00, 3.42it/s] 91%|█████████ | 337585/371472 [4:41:37<2:47:06, 3.38it/s] 91%|█████████ | 337586/371472 [4:41:37<2:46:16, 3.40it/s] 91%|█████████ | 337587/371472 [4:41:37<2:48:59, 3.34it/s] 91%|█████████ | 337588/371472 [4:41:38<2:44:49, 3.43it/s] 91%|█████████ | 337589/371472 [4:41:38<2:42:25, 3.48it/s] 91%|█████████ | 337590/371472 [4:41:38<2:37:34, 3.58it/s] 91%|█████████ | 337591/371472 [4:41:38<2:33:51, 3.67it/s] 91%|█████████ | 337592/371472 [4:41:39<2:32:15, 3.71it/s] 91%|█████████ | 337593/371472 [4:41:39<2:43:39, 3.45it/s] 91%|█████████ | 337594/371472 [4:41:39<2:46:32, 3.39it/s] 91%|█████████ | 337595/371472 [4:41:40<2:48:28, 3.35it/s] 91%|█████████ | 337596/371472 [4:41:40<2:40:36, 3.52it/s] 91%|█████████ | 337597/371472 [4:41:40<2:43:38, 3.45it/s] 91%|█████████ | 337598/371472 [4:41:40<2:50:57, 3.30it/s] 91%|█████████ | 337599/371472 [4:41:41<2:49:29, 3.33it/s] 91%|█████████ | 337600/371472 [4:41:41<2:39:21, 3.54it/s] {'loss': 2.5221, 'learning_rate': 1.821090736710552e-07, 'epoch': 14.54} + 91%|█████████ | 337600/371472 [4:41:41<2:39:21, 3.54it/s] 91%|█████████ | 337601/371472 [4:41:41<2:36:34, 3.61it/s] 91%|█████████ | 337602/371472 [4:41:41<2:32:55, 3.69it/s] 91%|█████████ | 337603/371472 [4:41:42<2:42:40, 3.47it/s] 91%|█████████ | 337604/371472 [4:41:42<2:50:47, 3.31it/s] 91%|█████████ | 337605/371472 [4:41:42<2:48:19, 3.35it/s] 91%|█████████ | 337606/371472 [4:41:43<2:45:26, 3.41it/s] 91%|█████████ | 337607/371472 [4:41:43<2:53:10, 3.26it/s] 91%|█████████ | 337608/371472 [4:41:43<2:44:06, 3.44it/s] 91%|█████████ | 337609/371472 [4:41:44<2:49:34, 3.33it/s] 91%|█████████ | 337610/371472 [4:41:44<2:40:33, 3.52it/s] 91%|█████████ | 337611/371472 [4:41:44<2:36:02, 3.62it/s] 91%|█████████ | 337612/371472 [4:41:44<2:34:31, 3.65it/s] 91%|█████████ | 337613/371472 [4:41:45<2:27:53, 3.82it/s] 91%|█████████ | 337614/371472 [4:41:45<2:26:30, 3.85it/s] 91%|█████████ | 337615/371472 [4:41:45<2:30:23, 3.75it/s] 91%|█████████ | 337616/371472 [4:41:45<2:30:37, 3.75it/s] 91%|█████████ | 337617/371472 [4:41:46<2:47:43, 3.36it/s] 91%|█████████ | 337618/371472 [4:41:46<2:53:41, 3.25it/s] 91%|█████████ | 337619/371472 [4:41:46<2:51:48, 3.28it/s] 91%|█████████ | 337620/371472 [4:41:47<2:47:20, 3.37it/s] {'loss': 2.7465, 'learning_rate': 1.8206059169557626e-07, 'epoch': 14.54} + 91%|█████████ | 337620/371472 [4:41:47<2:47:20, 3.37it/s] 91%|█████████ | 337621/371472 [4:41:47<2:43:27, 3.45it/s] 91%|█████████ | 337622/371472 [4:41:47<2:35:55, 3.62it/s] 91%|█████████ | 337623/371472 [4:41:48<2:42:28, 3.47it/s] 91%|█████████ | 337624/371472 [4:41:48<2:40:43, 3.51it/s] 91%|█████████ | 337625/371472 [4:41:48<2:36:25, 3.61it/s] 91%|█████████ | 337626/371472 [4:41:48<2:32:50, 3.69it/s] 91%|█████████ | 337627/371472 [4:41:49<2:31:43, 3.72it/s] 91%|█████████ | 337628/371472 [4:41:49<2:44:22, 3.43it/s] 91%|█████████ | 337629/371472 [4:41:49<2:38:23, 3.56it/s] 91%|█████████ | 337630/371472 [4:41:49<2:34:33, 3.65it/s] 91%|█████████ | 337631/371472 [4:41:50<2:36:17, 3.61it/s] 91%|█████████ | 337632/371472 [4:41:50<2:45:58, 3.40it/s] 91%|█████████ | 337633/371472 [4:41:50<2:37:44, 3.58it/s] 91%|█████████ | 337634/371472 [4:41:51<2:33:26, 3.68it/s] 91%|█████████ | 337635/371472 [4:41:51<2:31:58, 3.71it/s] 91%|█████████ | 337636/371472 [4:41:51<2:32:23, 3.70it/s] 91%|█████████ | 337637/371472 [4:41:51<2:32:22, 3.70it/s] 91%|█████████ | 337638/371472 [4:41:52<2:38:56, 3.55it/s] 91%|█████████ | 337639/371472 [4:41:52<2:33:40, 3.67it/s] 91%|█████████ | 337640/371472 [4:41:52<2:34:22, 3.65it/s] {'loss': 2.7285, 'learning_rate': 1.820121097200974e-07, 'epoch': 14.54} + 91%|█████████ | 337640/371472 [4:41:52<2:34:22, 3.65it/s] 91%|█████████ | 337641/371472 [4:41:53<2:37:49, 3.57it/s] 91%|█████████ | 337642/371472 [4:41:53<2:38:31, 3.56it/s] 91%|█████████ | 337643/371472 [4:41:53<2:38:05, 3.57it/s] 91%|█████████ | 337644/371472 [4:41:53<2:34:45, 3.64it/s] 91%|█████████ | 337645/371472 [4:41:54<2:31:27, 3.72it/s] 91%|█████████ | 337646/371472 [4:41:54<2:42:07, 3.48it/s] 91%|█████████ | 337647/371472 [4:41:54<2:49:16, 3.33it/s] 91%|█████████ | 337648/371472 [4:41:55<2:45:39, 3.40it/s] 91%|█████████ | 337649/371472 [4:41:55<2:39:52, 3.53it/s] 91%|█████████ | 337650/371472 [4:41:55<2:36:24, 3.60it/s] 91%|█████████ | 337651/371472 [4:41:55<2:33:39, 3.67it/s] 91%|█████████ | 337652/371472 [4:41:56<2:35:08, 3.63it/s] 91%|█████████ | 337653/371472 [4:41:56<2:32:59, 3.68it/s] 91%|█████████ | 337654/371472 [4:41:56<2:39:34, 3.53it/s] 91%|█████████ | 337655/371472 [4:41:56<2:46:13, 3.39it/s] 91%|█████████ | 337656/371472 [4:41:57<2:44:14, 3.43it/s] 91%|█████████ | 337657/371472 [4:41:57<2:37:33, 3.58it/s] 91%|█████████ | 337658/371472 [4:41:57<2:44:00, 3.44it/s] 91%|█████████ | 337659/371472 [4:41:58<2:38:15, 3.56it/s] 91%|█████████ | 337660/371472 [4:41:58<2:42:00, 3.48it/s] {'loss': 2.6008, 'learning_rate': 1.8196362774461846e-07, 'epoch': 14.54} + 91%|█████████ | 337660/371472 [4:41:58<2:42:00, 3.48it/s] 91%|█████████ | 337661/371472 [4:41:58<2:41:07, 3.50it/s] 91%|█████████ | 337662/371472 [4:41:58<2:38:51, 3.55it/s] 91%|█████████ | 337663/371472 [4:41:59<2:40:09, 3.52it/s] 91%|█████████ | 337664/371472 [4:41:59<2:35:48, 3.62it/s] 91%|█████████ | 337665/371472 [4:41:59<2:31:25, 3.72it/s] 91%|█████████ | 337666/371472 [4:41:59<2:27:43, 3.81it/s] 91%|█████████ | 337667/371472 [4:42:00<2:24:32, 3.90it/s] 91%|█████████ | 337668/371472 [4:42:00<2:27:50, 3.81it/s] 91%|█████████ | 337669/371472 [4:42:00<2:44:29, 3.42it/s] 91%|█████████ | 337670/371472 [4:42:01<2:44:33, 3.42it/s] 91%|█████████ | 337671/371472 [4:42:01<2:48:25, 3.34it/s] 91%|█████████ | 337672/371472 [4:42:01<2:39:12, 3.54it/s] 91%|█████████ | 337673/371472 [4:42:02<2:40:50, 3.50it/s] 91%|█████████ | 337674/371472 [4:42:02<2:37:47, 3.57it/s] 91%|█████████ | 337675/371472 [4:42:02<2:31:55, 3.71it/s] 91%|█████████ | 337676/371472 [4:42:02<2:27:54, 3.81it/s] 91%|█████████ | 337677/371472 [4:42:03<2:24:56, 3.89it/s] 91%|█████████ | 337678/371472 [4:42:03<2:23:10, 3.93it/s] 91%|█████████ | 337679/371472 [4:42:03<2:27:52, 3.81it/s] 91%|█████████ | 337680/371472 [4:42:03<2:29:46, 3.76it/s] {'loss': 2.617, 'learning_rate': 1.8191514576913963e-07, 'epoch': 14.54} + 91%|█████████ | 337680/371472 [4:42:03<2:29:46, 3.76it/s] 91%|█████████ | 337681/371472 [4:42:04<2:34:32, 3.64it/s] 91%|█████████ | 337682/371472 [4:42:04<2:43:31, 3.44it/s] 91%|█████████ | 337683/371472 [4:42:04<2:42:55, 3.46it/s] 91%|█████████ | 337684/371472 [4:42:05<2:47:53, 3.35it/s] 91%|█████████ | 337685/371472 [4:42:05<2:59:45, 3.13it/s] 91%|█████████ | 337686/371472 [4:42:06<3:55:50, 2.39it/s] 91%|█████████ | 337687/371472 [4:42:06<3:54:09, 2.40it/s] 91%|█████████ | 337688/371472 [4:42:06<3:35:04, 2.62it/s] 91%|█████████ | 337689/371472 [4:42:07<3:16:23, 2.87it/s] 91%|█████████ | 337690/371472 [4:42:07<3:01:36, 3.10it/s] 91%|█████████ | 337691/371472 [4:42:07<2:53:38, 3.24it/s] 91%|█████████ | 337692/371472 [4:42:07<2:42:20, 3.47it/s] 91%|█████████ | 337693/371472 [4:42:08<2:37:40, 3.57it/s] 91%|█████████ | 337694/371472 [4:42:08<2:29:22, 3.77it/s] 91%|█████████ | 337695/371472 [4:42:08<2:26:19, 3.85it/s] 91%|█████████ | 337696/371472 [4:42:08<2:37:33, 3.57it/s] 91%|█████████ | 337697/371472 [4:42:09<2:34:23, 3.65it/s] 91%|█████████ | 337698/371472 [4:42:09<2:27:48, 3.81it/s] 91%|█████████ | 337699/371472 [4:42:09<2:30:54, 3.73it/s] 91%|█████████ | 337700/371472 [4:42:09<2:27:29, 3.82it/s] {'loss': 2.5139, 'learning_rate': 1.8186666379366068e-07, 'epoch': 14.55} + 91%|█████████ | 337700/371472 [4:42:09<2:27:29, 3.82it/s] 91%|█████████ | 337701/371472 [4:42:10<2:28:09, 3.80it/s] 91%|█████████ | 337702/371472 [4:42:10<2:25:23, 3.87it/s] 91%|█████████ | 337703/371472 [4:42:10<2:28:51, 3.78it/s] 91%|█████████ | 337704/371472 [4:42:11<2:32:23, 3.69it/s] 91%|█████████ | 337705/371472 [4:42:11<2:39:49, 3.52it/s] 91%|█████████ | 337706/371472 [4:42:11<2:47:55, 3.35it/s] 91%|█████████ | 337707/371472 [4:42:11<2:54:08, 3.23it/s] 91%|█████████ | 337708/371472 [4:42:12<2:56:56, 3.18it/s] 91%|█████████ | 337709/371472 [4:42:12<2:47:54, 3.35it/s] 91%|█████████ | 337710/371472 [4:42:12<2:39:16, 3.53it/s] 91%|█████████ | 337711/371472 [4:42:13<2:35:14, 3.62it/s] 91%|█████████ | 337712/371472 [4:42:13<2:54:54, 3.22it/s] 91%|█████████ | 337713/371472 [4:42:13<2:52:43, 3.26it/s] 91%|█████████ | 337714/371472 [4:42:14<2:49:58, 3.31it/s] 91%|█████████ | 337715/371472 [4:42:14<2:45:09, 3.41it/s] 91%|█████████ | 337716/371472 [4:42:14<2:44:02, 3.43it/s] 91%|█████████ | 337717/371472 [4:42:14<2:38:43, 3.54it/s] 91%|█████████ | 337718/371472 [4:42:15<2:38:46, 3.54it/s] 91%|█████████ | 337719/371472 [4:42:15<2:32:26, 3.69it/s] 91%|█████████ | 337720/371472 [4:42:15<2:28:16, 3.79it/s] {'loss': 2.7798, 'learning_rate': 1.8181818181818186e-07, 'epoch': 14.55} + 91%|█████████ | 337720/371472 [4:42:15<2:28:16, 3.79it/s] 91%|█████████ | 337721/371472 [4:42:15<2:25:45, 3.86it/s] 91%|█████████ | 337722/371472 [4:42:16<2:33:56, 3.65it/s] 91%|█████████ | 337723/371472 [4:42:16<2:32:43, 3.68it/s] 91%|█████████ | 337724/371472 [4:42:16<2:37:45, 3.57it/s] 91%|█████████ | 337725/371472 [4:42:17<2:34:33, 3.64it/s] 91%|█████████ | 337726/371472 [4:42:17<2:38:16, 3.55it/s] 91%|█████████ | 337727/371472 [4:42:17<2:39:35, 3.52it/s] 91%|█████████ | 337728/371472 [4:42:17<2:35:37, 3.61it/s] 91%|█████████ | 337729/371472 [4:42:18<2:47:26, 3.36it/s] 91%|█████████ | 337730/371472 [4:42:18<2:41:56, 3.47it/s] 91%|█████████ | 337731/371472 [4:42:18<2:51:00, 3.29it/s] 91%|█████████ | 337732/371472 [4:42:19<2:45:20, 3.40it/s] 91%|█████████ | 337733/371472 [4:42:19<2:40:37, 3.50it/s] 91%|█████████ | 337734/371472 [4:42:19<2:36:24, 3.59it/s] 91%|█████████ | 337735/371472 [4:42:19<2:32:51, 3.68it/s] 91%|█████████ | 337736/371472 [4:42:20<2:37:55, 3.56it/s] 91%|████��████ | 337737/371472 [4:42:20<2:36:51, 3.58it/s] 91%|█████████ | 337738/371472 [4:42:20<2:40:20, 3.51it/s] 91%|█████████ | 337739/371472 [4:42:21<2:40:03, 3.51it/s] 91%|█████████ | 337740/371472 [4:42:21<2:35:09, 3.62it/s] {'loss': 2.5051, 'learning_rate': 1.817696998427029e-07, 'epoch': 14.55} + 91%|█████████ | 337740/371472 [4:42:21<2:35:09, 3.62it/s] 91%|█████████ | 337741/371472 [4:42:21<2:43:11, 3.44it/s] 91%|█████████ | 337742/371472 [4:42:21<2:41:01, 3.49it/s] 91%|█████████ | 337743/371472 [4:42:22<2:46:10, 3.38it/s] 91%|█████████ | 337744/371472 [4:42:22<2:47:42, 3.35it/s] 91%|█████████ | 337745/371472 [4:42:22<2:42:49, 3.45it/s] 91%|█████████ | 337746/371472 [4:42:23<2:49:12, 3.32it/s] 91%|█████████ | 337747/371472 [4:42:23<2:44:39, 3.41it/s] 91%|█████████ | 337748/371472 [4:42:23<2:37:24, 3.57it/s] 91%|█████████ | 337749/371472 [4:42:24<2:49:03, 3.32it/s] 91%|█████████ | 337750/371472 [4:42:24<2:44:48, 3.41it/s] 91%|█████████ | 337751/371472 [4:42:24<2:50:59, 3.29it/s] 91%|█████████ | 337752/371472 [4:42:24<2:42:37, 3.46it/s] 91%|█████████ | 337753/371472 [4:42:25<2:40:36, 3.50it/s] 91%|█████████ | 337754/371472 [4:42:25<2:40:17, 3.51it/s] 91%|█████████ | 337755/371472 [4:42:25<2:37:19, 3.57it/s] 91%|█████████ | 337756/371472 [4:42:25<2:39:49, 3.52it/s] 91%|█████████ | 337757/371472 [4:42:26<2:35:52, 3.60it/s] 91%|█████████ | 337758/371472 [4:42:26<2:38:50, 3.54it/s] 91%|█████████ | 337759/371472 [4:42:26<2:41:00, 3.49it/s] 91%|█████████ | 337760/371472 [4:42:27<2:39:38, 3.52it/s] {'loss': 2.6823, 'learning_rate': 1.8172121786722405e-07, 'epoch': 14.55} + 91%|█████████ | 337760/371472 [4:42:27<2:39:38, 3.52it/s] 91%|█████████ | 337761/371472 [4:42:27<2:34:58, 3.63it/s] 91%|█████████ | 337762/371472 [4:42:27<2:37:51, 3.56it/s] 91%|█████████ | 337763/371472 [4:42:28<3:06:48, 3.01it/s] 91%|█████████ | 337764/371472 [4:42:28<2:54:39, 3.22it/s] 91%|█████████ | 337765/371472 [4:42:28<2:47:03, 3.36it/s] 91%|█████████ | 337766/371472 [4:42:28<2:49:54, 3.31it/s] 91%|█████████ | 337767/371472 [4:42:29<2:54:02, 3.23it/s] 91%|█████████ | 337768/371472 [4:42:29<3:00:31, 3.11it/s] 91%|█████████ | 337769/371472 [4:42:29<2:56:44, 3.18it/s] 91%|█████████ | 337770/371472 [4:42:30<2:54:56, 3.21it/s] 91%|█████████ | 337771/371472 [4:42:30<2:47:53, 3.35it/s] 91%|█████████ | 337772/371472 [4:42:30<2:37:44, 3.56it/s] 91%|█████████ | 337773/371472 [4:42:31<2:41:57, 3.47it/s] 91%|█████████ | 337774/371472 [4:42:31<2:35:01, 3.62it/s] 91%|█████████ | 337775/371472 [4:42:31<2:32:57, 3.67it/s] 91%|█████████ | 337776/371472 [4:42:31<2:40:39, 3.50it/s] 91%|█████████ | 337777/371472 [4:42:32<2:33:24, 3.66it/s] 91%|█████████ | 337778/371472 [4:42:32<2:30:40, 3.73it/s] 91%|█████████ | 337779/371472 [4:42:32<2:34:05, 3.64it/s] 91%|█████████ | 337780/371472 [4:42:32<2:30:07, 3.74it/s] {'loss': 2.4576, 'learning_rate': 1.816727358917451e-07, 'epoch': 14.55} + 91%|█████████ | 337780/371472 [4:42:32<2:30:07, 3.74it/s] 91%|█████████ | 337781/371472 [4:42:33<2:33:38, 3.65it/s] 91%|█████████ | 337782/371472 [4:42:33<2:46:50, 3.37it/s] 91%|█████████ | 337783/371472 [4:42:33<2:39:00, 3.53it/s] 91%|█████████ | 337784/371472 [4:42:34<2:35:45, 3.60it/s] 91%|█████████ | 337785/371472 [4:42:34<2:49:04, 3.32it/s] 91%|█████████ | 337786/371472 [4:42:34<2:48:13, 3.34it/s] 91%|█████████ | 337787/371472 [4:42:34<2:41:50, 3.47it/s] 91%|█████████ | 337788/371472 [4:42:35<2:36:12, 3.59it/s] 91%|█████████ | 337789/371472 [4:42:35<2:32:38, 3.68it/s] 91%|█████████ | 337790/371472 [4:42:35<2:33:36, 3.65it/s] 91%|█████████ | 337791/371472 [4:42:36<2:31:44, 3.70it/s] 91%|█████████ | 337792/371472 [4:42:36<2:35:42, 3.61it/s] 91%|█████████ | 337793/371472 [4:42:36<2:37:39, 3.56it/s] 91%|█████████ | 337794/371472 [4:42:36<2:34:05, 3.64it/s] 91%|█████████ | 337795/371472 [4:42:37<2:31:30, 3.70it/s] 91%|█████████ | 337796/371472 [4:42:37<2:40:25, 3.50it/s] 91%|█████████ | 337797/371472 [4:42:37<2:39:09, 3.53it/s] 91%|█████████ | 337798/371472 [4:42:38<2:44:59, 3.40it/s] 91%|█████████ | 337799/371472 [4:42:38<2:39:32, 3.52it/s] 91%|█████████ | 337800/371472 [4:42:38<2:34:18, 3.64it/s] {'loss': 2.6677, 'learning_rate': 1.8162425391626627e-07, 'epoch': 14.55} + 91%|█████████ | 337800/371472 [4:42:38<2:34:18, 3.64it/s] 91%|█████████ | 337801/371472 [4:42:38<2:31:11, 3.71it/s] 91%|█████████ | 337802/371472 [4:42:39<2:27:13, 3.81it/s] 91%|█████████ | 337803/371472 [4:42:39<2:37:06, 3.57it/s] 91%|█████████ | 337804/371472 [4:42:39<2:34:02, 3.64it/s] 91%|█████████ | 337805/371472 [4:42:39<2:35:03, 3.62it/s] 91%|█████████ | 337806/371472 [4:42:40<2:37:05, 3.57it/s] 91%|█████████ | 337807/371472 [4:42:40<2:33:52, 3.65it/s] 91%|█████████ | 337808/371472 [4:42:40<2:33:14, 3.66it/s] 91%|█████████ | 337809/371472 [4:42:41<2:34:54, 3.62it/s] 91%|█████████ | 337810/371472 [4:42:41<2:32:25, 3.68it/s] 91%|█████████ | 337811/371472 [4:42:41<2:50:20, 3.29it/s] 91%|█████████ | 337812/371472 [4:42:41<2:42:14, 3.46it/s] 91%|█████████ | 337813/371472 [4:42:42<2:39:32, 3.52it/s] 91%|█████████ | 337814/371472 [4:42:42<2:45:10, 3.40it/s] 91%|█████████ | 337815/371472 [4:42:42<2:35:40, 3.60it/s] 91%|█████████ | 337816/371472 [4:42:43<2:32:14, 3.68it/s] 91%|█████████ | 337817/371472 [4:42:43<2:28:37, 3.77it/s] 91%|█████████ | 337818/371472 [4:42:43<2:30:38, 3.72it/s] 91%|█████████ | 337819/371472 [4:42:43<2:27:37, 3.80it/s] 91%|█████████ | 337820/371472 [4:42:44<2:31:28, 3.70it/s] {'loss': 2.4616, 'learning_rate': 1.8157577194078732e-07, 'epoch': 14.55} + 91%|█████████ | 337820/371472 [4:42:44<2:31:28, 3.70it/s] 91%|█████████ | 337821/371472 [4:42:44<2:33:39, 3.65it/s] 91%|█████████ | 337822/371472 [4:42:44<2:31:14, 3.71it/s] 91%|█████████ | 337823/371472 [4:42:44<2:32:03, 3.69it/s] 91%|█████████ | 337824/371472 [4:42:45<2:42:13, 3.46it/s] 91%|█████████ | 337825/371472 [4:42:45<2:36:23, 3.59it/s] 91%|█████████ | 337826/371472 [4:42:45<2:30:41, 3.72it/s] 91%|█████████ | 337827/371472 [4:42:46<2:36:28, 3.58it/s] 91%|█████████ | 337828/371472 [4:42:46<2:33:08, 3.66it/s] 91%|█████████ | 337829/371472 [4:42:46<2:33:25, 3.65it/s] 91%|█████████ | 337830/371472 [4:42:46<2:38:24, 3.54it/s] 91%|█████████ | 337831/371472 [4:42:47<2:34:22, 3.63it/s] 91%|█████████ | 337832/371472 [4:42:47<2:36:45, 3.58it/s] 91%|█████████ | 337833/371472 [4:42:47<2:29:47, 3.74it/s] 91%|█████████ | 337834/371472 [4:42:47<2:34:25, 3.63it/s] 91%|█████████ | 337835/371472 [4:42:48<2:34:04, 3.64it/s] 91%|█████████ | 337836/371472 [4:42:48<2:41:16, 3.48it/s] 91%|█████████ | 337837/371472 [4:42:48<2:40:01, 3.50it/s] 91%|█████████ | 337838/371472 [4:42:49<2:46:24, 3.37it/s] 91%|█████████ | 337839/371472 [4:42:49<2:51:30, 3.27it/s] 91%|█████████ | 337840/371472 [4:42:49<2:42:26, 3.45it/s] {'loss': 2.529, 'learning_rate': 1.8152728996530847e-07, 'epoch': 14.55} + 91%|█████████ | 337840/371472 [4:42:49<2:42:26, 3.45it/s] 91%|█████████ | 337841/371472 [4:42:49<2:35:18, 3.61it/s] 91%|█████████ | 337842/371472 [4:42:50<2:31:08, 3.71it/s] 91%|█████████ | 337843/371472 [4:42:50<2:28:53, 3.76it/s] 91%|█████████ | 337844/371472 [4:42:50<2:35:11, 3.61it/s] 91%|█████████ | 337845/371472 [4:42:51<2:35:11, 3.61it/s] 91%|█████████ | 337846/371472 [4:42:51<2:35:03, 3.61it/s] 91%|█████████ | 337847/371472 [4:42:51<2:35:05, 3.61it/s] 91%|█████████ | 337848/371472 [4:42:51<2:28:12, 3.78it/s] 91%|█████████ | 337849/371472 [4:42:52<2:27:16, 3.80it/s] 91%|█████████ | 337850/371472 [4:42:52<2:25:22, 3.85it/s] 91%|█████████ | 337851/371472 [4:42:52<2:36:56, 3.57it/s] 91%|█████████ | 337852/371472 [4:42:52<2:30:05, 3.73it/s] 91%|█████████ | 337853/371472 [4:42:53<2:34:16, 3.63it/s] 91%|█████████ | 337854/371472 [4:42:53<2:33:40, 3.65it/s] 91%|█████████ | 337855/371472 [4:42:53<2:31:46, 3.69it/s] 91%|█████████ | 337856/371472 [4:42:54<2:30:16, 3.73it/s] 91%|█████████ | 337857/371472 [4:42:54<2:33:00, 3.66it/s] 91%|█████████ | 337858/371472 [4:42:54<2:28:28, 3.77it/s] 91%|█████████ | 337859/371472 [4:42:54<2:38:40, 3.53it/s] 91%|█████████ | 337860/371472 [4:42:55<2:39:16, 3.52it/s] {'loss': 2.5993, 'learning_rate': 1.8147880798982954e-07, 'epoch': 14.55} + 91%|█████████ | 337860/371472 [4:42:55<2:39:16, 3.52it/s] 91%|█████████ | 337861/371472 [4:42:55<2:48:56, 3.32it/s] 91%|█████████ | 337862/371472 [4:42:55<2:46:31, 3.36it/s] 91%|█████████ | 337863/371472 [4:42:56<2:41:46, 3.46it/s] 91%|█████████ | 337864/371472 [4:42:56<2:36:35, 3.58it/s] 91%|█████████ | 337865/371472 [4:42:56<2:39:08, 3.52it/s] 91%|█████████ | 337866/371472 [4:42:56<2:34:02, 3.64it/s] 91%|█████████ | 337867/371472 [4:42:57<2:30:34, 3.72it/s] 91%|█████████ | 337868/371472 [4:42:57<2:30:18, 3.73it/s] 91%|█████████ | 337869/371472 [4:42:57<2:26:52, 3.81it/s] 91%|█████████ | 337870/371472 [4:42:57<2:33:12, 3.66it/s] 91%|█████████ | 337871/371472 [4:42:58<2:41:38, 3.46it/s] 91%|█████████ | 337872/371472 [4:42:58<2:40:39, 3.49it/s] 91%|█████████ | 337873/371472 [4:42:58<2:33:53, 3.64it/s] 91%|█████████ | 337874/371472 [4:42:59<2:32:22, 3.67it/s] 91%|█████████ | 337875/371472 [4:42:59<2:25:14, 3.86it/s] 91%|█████████ | 337876/371472 [4:42:59<2:24:33, 3.87it/s] 91%|█████████ | 337877/371472 [4:42:59<2:24:21, 3.88it/s] 91%|█████████ | 337878/371472 [4:43:00<2:40:46, 3.48it/s] 91%|█████████ | 337879/371472 [4:43:00<2:40:36, 3.49it/s] 91%|█████████ | 337880/371472 [4:43:00<2:38:46, 3.53it/s] {'loss': 2.7066, 'learning_rate': 1.814303260143507e-07, 'epoch': 14.55} + 91%|█████████ | 337880/371472 [4:43:00<2:38:46, 3.53it/s] 91%|█████████ | 337881/371472 [4:43:00<2:35:12, 3.61it/s] 91%|█████████ | 337882/371472 [4:43:01<2:32:12, 3.68it/s] 91%|█████████ | 337883/371472 [4:43:01<2:36:01, 3.59it/s] 91%|█████████ | 337884/371472 [4:43:01<2:47:22, 3.34it/s] 91%|█████████ | 337885/371472 [4:43:02<2:40:35, 3.49it/s] 91%|█████████ | 337886/371472 [4:43:02<2:39:49, 3.50it/s] 91%|█████████ | 337887/371472 [4:43:02<2:39:58, 3.50it/s] 91%|█████████ | 337888/371472 [4:43:02<2:35:56, 3.59it/s] 91%|█████████ | 337889/371472 [4:43:03<2:36:44, 3.57it/s] 91%|█████████ | 337890/371472 [4:43:03<2:32:26, 3.67it/s] 91%|█████████ | 337891/371472 [4:43:03<2:30:46, 3.71it/s] 91%|█████████ | 337892/371472 [4:43:04<2:34:40, 3.62it/s] 91%|█████████ | 337893/371472 [4:43:04<2:39:58, 3.50it/s] 91%|█████████ | 337894/371472 [4:43:04<2:40:22, 3.49it/s] 91%|█████████ | 337895/371472 [4:43:04<2:36:53, 3.57it/s] 91%|█████████ | 337896/371472 [4:43:05<2:37:30, 3.55it/s] 91%|█████████ | 337897/371472 [4:43:05<2:47:59, 3.33it/s] 91%|█████████ | 337898/371472 [4:43:06<3:12:24, 2.91it/s] 91%|█████████ | 337899/371472 [4:43:06<3:02:24, 3.07it/s] 91%|█████████ | 337900/371472 [4:43:06<2:55:45, 3.18it/s] {'loss': 2.5157, 'learning_rate': 1.8138184403887174e-07, 'epoch': 14.55} + 91%|█████████ | 337900/371472 [4:43:06<2:55:45, 3.18it/s] 91%|█████████ | 337901/371472 [4:43:06<2:52:53, 3.24it/s] 91%|█████████ | 337902/371472 [4:43:07<2:41:20, 3.47it/s] 91%|█████████ | 337903/371472 [4:43:07<2:44:14, 3.41it/s] 91%|█████████ | 337904/371472 [4:43:07<2:40:33, 3.48it/s] 91%|█████████ | 337905/371472 [4:43:07<2:32:42, 3.66it/s] 91%|█████████ | 337906/371472 [4:43:08<2:29:23, 3.74it/s] 91%|█████████ | 337907/371472 [4:43:08<2:36:47, 3.57it/s] 91%|█████████ | 337908/371472 [4:43:08<2:38:06, 3.54it/s] 91%|█████████ | 337909/371472 [4:43:09<2:33:11, 3.65it/s] 91%|█████████ | 337910/371472 [4:43:09<2:30:31, 3.72it/s] 91%|█████████ | 337911/371472 [4:43:09<2:48:39, 3.32it/s] 91%|█████████ | 337912/371472 [4:43:09<2:44:25, 3.40it/s] 91%|█████████ | 337913/371472 [4:43:10<2:42:30, 3.44it/s] 91%|█████████ | 337914/371472 [4:43:10<2:39:06, 3.52it/s] 91%|█████████ | 337915/371472 [4:43:10<2:37:42, 3.55it/s] 91%|█████████ | 337916/371472 [4:43:11<2:35:28, 3.60it/s] 91%|█████████ | 337917/371472 [4:43:11<2:39:12, 3.51it/s] 91%|█████████ | 337918/371472 [4:43:11<2:38:28, 3.53it/s] 91%|█████████ | 337919/371472 [4:43:11<2:34:38, 3.62it/s] 91%|█████████ | 337920/371472 [4:43:12<2:31:55, 3.68it/s] {'loss': 2.6327, 'learning_rate': 1.8133336206339291e-07, 'epoch': 14.55} + 91%|█████████ | 337920/371472 [4:43:12<2:31:55, 3.68it/s] 91%|█████████ | 337921/371472 [4:43:12<2:32:07, 3.68it/s] 91%|█████████ | 337922/371472 [4:43:12<2:36:19, 3.58it/s] 91%|█████████ | 337923/371472 [4:43:12<2:33:08, 3.65it/s] 91%|█████████ | 337924/371472 [4:43:13<2:28:26, 3.77it/s] 91%|█████████ | 337925/371472 [4:43:13<2:38:45, 3.52it/s] 91%|█████████ | 337926/371472 [4:43:13<2:35:24, 3.60it/s] 91%|█████████ | 337927/371472 [4:43:14<2:32:46, 3.66it/s] 91%|█████████ | 337928/371472 [4:43:14<2:32:24, 3.67it/s] 91%|█████████ | 337929/371472 [4:43:14<2:34:38, 3.62it/s] 91%|█████████ | 337930/371472 [4:43:14<2:36:36, 3.57it/s] 91%|█████████ | 337931/371472 [4:43:15<2:32:27, 3.67it/s] 91%|█████████ | 337932/371472 [4:43:15<2:34:24, 3.62it/s] 91%|█████████ | 337933/371472 [4:43:15<2:34:32, 3.62it/s] 91%|█████████ | 337934/371472 [4:43:16<2:35:33, 3.59it/s] 91%|█████████ | 337935/371472 [4:43:16<2:38:04, 3.54it/s] 91%|█████████ | 337936/371472 [4:43:16<2:47:25, 3.34it/s] 91%|█████████ | 337937/371472 [4:43:16<2:41:55, 3.45it/s] 91%|█████████ | 337938/371472 [4:43:17<2:49:18, 3.30it/s] 91%|█████████ | 337939/371472 [4:43:17<2:46:08, 3.36it/s] 91%|█████████ | 337940/371472 [4:43:17<2:35:39, 3.59it/s] {'loss': 2.5274, 'learning_rate': 1.8128488008791399e-07, 'epoch': 14.56} + 91%|█████████ | 337940/371472 [4:43:17<2:35:39, 3.59it/s] 91%|█████████ | 337941/371472 [4:43:18<2:45:24, 3.38it/s] 91%|█████████ | 337942/371472 [4:43:18<2:40:40, 3.48it/s] 91%|█████████ | 337943/371472 [4:43:18<2:40:45, 3.48it/s] 91%|█████████ | 337944/371472 [4:43:18<2:35:08, 3.60it/s] 91%|█████████ | 337945/371472 [4:43:19<2:44:59, 3.39it/s] 91%|█████████ | 337946/371472 [4:43:19<2:42:06, 3.45it/s] 91%|█████████ | 337947/371472 [4:43:19<2:33:47, 3.63it/s] 91%|█████████ | 337948/371472 [4:43:20<2:28:28, 3.76it/s] 91%|█████████ | 337949/371472 [4:43:20<2:28:29, 3.76it/s] 91%|█████████ | 337950/371472 [4:43:20<2:32:04, 3.67it/s] 91%|█████████ | 337951/371472 [4:43:20<2:30:25, 3.71it/s] 91%|█████████ | 337952/371472 [4:43:21<2:26:37, 3.81it/s] 91%|█████████ | 337953/371472 [4:43:21<2:27:45, 3.78it/s] 91%|█████████ | 337954/371472 [4:43:21<2:29:09, 3.75it/s] 91%|█████████ | 337955/371472 [4:43:21<2:41:22, 3.46it/s] 91%|█████████ | 337956/371472 [4:43:22<2:38:44, 3.52it/s] 91%|█████████ | 337957/371472 [4:43:22<2:35:07, 3.60it/s] 91%|█████████ | 337958/371472 [4:43:22<2:33:22, 3.64it/s] 91%|█████████ | 337959/371472 [4:43:23<2:57:14, 3.15it/s] 91%|█████████ | 337960/371472 [4:43:23<2:46:45, 3.35it/s] {'loss': 2.477, 'learning_rate': 1.812363981124351e-07, 'epoch': 14.56} + 91%|█████████ | 337960/371472 [4:43:23<2:46:45, 3.35it/s] 91%|█████████ | 337961/371472 [4:43:23<2:43:20, 3.42it/s] 91%|█████████ | 337962/371472 [4:43:23<2:40:05, 3.49it/s] 91%|█████████ | 337963/371472 [4:43:24<2:34:45, 3.61it/s] 91%|█████████ | 337964/371472 [4:43:24<2:35:16, 3.60it/s] 91%|█████████ | 337965/371472 [4:43:24<2:36:55, 3.56it/s] 91%|█████████ | 337966/371472 [4:43:25<2:38:52, 3.51it/s] 91%|█████████ | 337967/371472 [4:43:25<2:31:57, 3.67it/s] 91%|█████████ | 337968/371472 [4:43:25<2:40:30, 3.48it/s] 91%|█████████ | 337969/371472 [4:43:26<2:47:59, 3.32it/s] 91%|█████████ | 337970/371472 [4:43:26<2:46:42, 3.35it/s] 91%|█████████ | 337971/371472 [4:43:26<2:43:38, 3.41it/s] 91%|█████████ | 337972/371472 [4:43:26<2:49:48, 3.29it/s] 91%|█████████ | 337973/371472 [4:43:27<2:42:47, 3.43it/s] 91%|█████████ | 337974/371472 [4:43:27<2:46:08, 3.36it/s] 91%|█████████ | 337975/371472 [4:43:27<2:46:31, 3.35it/s] 91%|█████████ | 337976/371472 [4:43:28<2:44:41, 3.39it/s] 91%|█████████ | 337977/371472 [4:43:28<2:57:44, 3.14it/s] 91%|█████████ | 337978/371472 [4:43:28<2:58:38, 3.12it/s] 91%|█████████ | 337979/371472 [4:43:29<2:58:10, 3.13it/s] 91%|█████████ | 337980/371472 [4:43:29<2:50:02, 3.28it/s] {'loss': 2.5406, 'learning_rate': 1.8118791613695618e-07, 'epoch': 14.56} + 91%|█████████ | 337980/371472 [4:43:29<2:50:02, 3.28it/s] 91%|█████████ | 337981/371472 [4:43:29<2:44:42, 3.39it/s] 91%|█████████ | 337982/371472 [4:43:29<2:45:53, 3.36it/s] 91%|█████████ | 337983/371472 [4:43:30<2:41:27, 3.46it/s] 91%|█████████ | 337984/371472 [4:43:30<2:47:38, 3.33it/s] 91%|█████████ | 337985/371472 [4:43:30<2:46:48, 3.35it/s] 91%|█████████ | 337986/371472 [4:43:31<2:40:31, 3.48it/s] 91%|█████████ | 337987/371472 [4:43:31<2:49:08, 3.30it/s] 91%|█████████ | 337988/371472 [4:43:31<2:45:09, 3.38it/s] 91%|█████████ | 337989/371472 [4:43:31<2:42:42, 3.43it/s] 91%|█████████ | 337990/371472 [4:43:32<2:36:57, 3.56it/s] 91%|█████████ | 337991/371472 [4:43:32<2:30:01, 3.72it/s] 91%|█████████ | 337992/371472 [4:43:32<2:26:57, 3.80it/s] 91%|█████████ | 337993/371472 [4:43:33<2:34:34, 3.61it/s] 91%|█████████ | 337994/371472 [4:43:33<2:48:10, 3.32it/s] 91%|█████████ | 337995/371472 [4:43:33<2:43:45, 3.41it/s] 91%|█████████ | 337996/371472 [4:43:33<2:44:39, 3.39it/s] 91%|█████████ | 337997/371472 [4:43:34<2:43:20, 3.42it/s] 91%|█████████ | 337998/371472 [4:43:34<2:38:20, 3.52it/s] 91%|█████████ | 337999/371472 [4:43:34<2:33:10, 3.64it/s] 91%|█████████ | 338000/371472 [4:43:35<2:31:44, 3.68it/s] {'loss': 2.6239, 'learning_rate': 1.8113943416147725e-07, 'epoch': 14.56} + 91%|█████████ | 338000/371472 [4:43:35<2:31:44, 3.68it/s] 91%|█████████ | 338001/371472 [4:43:35<2:34:52, 3.60it/s] 91%|█████████ | 338002/371472 [4:43:35<2:34:59, 3.60it/s] 91%|█████████ | 338003/371472 [4:43:35<2:31:01, 3.69it/s] 91%|█████████ | 338004/371472 [4:43:36<2:33:20, 3.64it/s] 91%|█████████ | 338005/371472 [4:43:36<2:42:42, 3.43it/s] 91%|█████████ | 338006/371472 [4:43:36<2:43:51, 3.40it/s] 91%|█████████ | 338007/371472 [4:43:37<2:47:59, 3.32it/s] 91%|█████████ | 338008/371472 [4:43:37<2:53:32, 3.21it/s] 91%|█████████ | 338009/371472 [4:43:37<2:52:46, 3.23it/s] 91%|█████████ | 338010/371472 [4:43:38<2:54:37, 3.19it/s] 91%|█████████ | 338011/371472 [4:43:38<2:57:47, 3.14it/s] 91%|█████████ | 338012/371472 [4:43:38<2:51:17, 3.26it/s] 91%|█████████ | 338013/371472 [4:43:38<2:42:14, 3.44it/s] 91%|█████████ | 338014/371472 [4:43:39<2:38:54, 3.51it/s] 91%|█████████ | 338015/371472 [4:43:39<2:37:09, 3.55it/s] 91%|█████████ | 338016/371472 [4:43:39<2:32:34, 3.65it/s] 91%|█████████ | 338017/371472 [4:43:39<2:32:38, 3.65it/s] 91%|█████████ | 338018/371472 [4:43:40<2:36:07, 3.57it/s] 91%|█████████ | 338019/371472 [4:43:40<2:39:01, 3.51it/s] 91%|█████████ | 338020/371472 [4:43:40<2:33:47, 3.63it/s] {'loss': 2.4821, 'learning_rate': 1.810909521859984e-07, 'epoch': 14.56} + 91%|█████████ | 338020/371472 [4:43:40<2:33:47, 3.63it/s] 91%|█████████ | 338021/371472 [4:43:41<2:33:24, 3.63it/s] 91%|█████████ | 338022/371472 [4:43:41<2:32:24, 3.66it/s] 91%|█████████ | 338023/371472 [4:43:41<2:35:09, 3.59it/s] 91%|████████��� | 338024/371472 [4:43:41<2:33:46, 3.63it/s] 91%|█████████ | 338025/371472 [4:43:42<2:40:28, 3.47it/s] 91%|█████████ | 338026/371472 [4:43:42<2:33:12, 3.64it/s] 91%|█████████ | 338027/371472 [4:43:42<2:35:41, 3.58it/s] 91%|█████████ | 338028/371472 [4:43:43<2:38:47, 3.51it/s] 91%|█████████ | 338029/371472 [4:43:43<2:40:11, 3.48it/s] 91%|█████████ | 338030/371472 [4:43:43<2:37:03, 3.55it/s] 91%|█████████ | 338031/371472 [4:43:43<2:31:24, 3.68it/s] 91%|█████████ | 338032/371472 [4:43:44<2:32:56, 3.64it/s] 91%|█████████ | 338033/371472 [4:43:44<2:38:20, 3.52it/s] 91%|█████████ | 338034/371472 [4:43:44<2:57:24, 3.14it/s] 91%|█████████ | 338035/371472 [4:43:45<2:58:38, 3.12it/s] 91%|█████████ | 338036/371472 [4:43:45<2:53:20, 3.21it/s] 91%|█████████ | 338037/371472 [4:43:45<2:45:15, 3.37it/s] 91%|█████████ | 338038/371472 [4:43:46<2:43:42, 3.40it/s] 91%|█████████ | 338039/371472 [4:43:46<2:38:16, 3.52it/s] 91%|█████████ | 338040/371472 [4:43:46<2:45:14, 3.37it/s] {'loss': 2.6172, 'learning_rate': 1.8104247021051945e-07, 'epoch': 14.56} + 91%|█████████ | 338040/371472 [4:43:46<2:45:14, 3.37it/s] 91%|█████████ | 338041/371472 [4:43:46<2:43:18, 3.41it/s] 91%|█████████ | 338042/371472 [4:43:47<2:45:56, 3.36it/s] 91%|█████████ | 338043/371472 [4:43:47<2:47:10, 3.33it/s] 91%|█████████ | 338044/371472 [4:43:47<2:39:37, 3.49it/s] 91%|█████████ | 338045/371472 [4:43:48<2:36:06, 3.57it/s] 91%|█████████ | 338046/371472 [4:43:48<2:45:47, 3.36it/s] 91%|█████████ | 338047/371472 [4:43:48<2:39:43, 3.49it/s] 91%|█████████ | 338048/371472 [4:43:48<2:39:10, 3.50it/s] 91%|█████████ | 338049/371472 [4:43:49<2:36:29, 3.56it/s] 91%|█████████ | 338050/371472 [4:43:49<2:35:40, 3.58it/s] 91%|█████████ | 338051/371472 [4:43:49<2:32:55, 3.64it/s] 91%|█████████ | 338052/371472 [4:43:50<2:30:42, 3.70it/s] 91%|█████████ | 338053/371472 [4:43:50<2:38:53, 3.51it/s] 91%|█████████ | 338054/371472 [4:43:50<2:59:04, 3.11it/s] 91%|█████████ | 338055/371472 [4:43:51<3:16:18, 2.84it/s] 91%|█████████ | 338056/371472 [4:43:51<3:02:48, 3.05it/s] 91%|█████████ | 338057/371472 [4:43:51<2:52:20, 3.23it/s] 91%|█████████ | 338058/371472 [4:43:51<2:47:35, 3.32it/s] 91%|█████████ | 338059/371472 [4:43:52<2:40:58, 3.46it/s] 91%|█████████ | 338060/371472 [4:43:52<2:35:21, 3.58it/s] {'loss': 2.6636, 'learning_rate': 1.8099398823504063e-07, 'epoch': 14.56} + 91%|█████████ | 338060/371472 [4:43:52<2:35:21, 3.58it/s] 91%|█████████ | 338061/371472 [4:43:52<2:55:07, 3.18it/s] 91%|█████████ | 338062/371472 [4:43:53<2:59:26, 3.10it/s] 91%|█████████ | 338063/371472 [4:43:53<2:51:22, 3.25it/s] 91%|█████████ | 338064/371472 [4:43:53<2:41:58, 3.44it/s] 91%|█████████ | 338065/371472 [4:43:54<2:40:41, 3.47it/s] 91%|█████████ | 338066/371472 [4:43:54<2:34:10, 3.61it/s] 91%|█████████ | 338067/371472 [4:43:54<2:33:11, 3.63it/s] 91%|█████████ | 338068/371472 [4:43:54<2:36:44, 3.55it/s] 91%|█████████ | 338069/371472 [4:43:55<2:36:49, 3.55it/s] 91%|█████████ | 338070/371472 [4:43:55<2:39:39, 3.49it/s] 91%|█████████ | 338071/371472 [4:43:55<2:37:51, 3.53it/s] 91%|█████████ | 338072/371472 [4:43:55<2:31:23, 3.68it/s] 91%|█████████ | 338073/371472 [4:43:56<2:42:21, 3.43it/s] 91%|█████████ | 338074/371472 [4:43:56<2:44:14, 3.39it/s] 91%|█████████ | 338075/371472 [4:43:56<2:43:54, 3.40it/s] 91%|█████████ | 338076/371472 [4:43:57<2:39:01, 3.50it/s] 91%|█████████ | 338077/371472 [4:43:57<3:00:31, 3.08it/s] 91%|█████████ | 338078/371472 [4:43:57<2:51:10, 3.25it/s] 91%|█████████ | 338079/371472 [4:43:58<2:42:08, 3.43it/s] 91%|█████████ | 338080/371472 [4:43:58<2:41:44, 3.44it/s] {'loss': 2.7485, 'learning_rate': 1.8094550625956167e-07, 'epoch': 14.56} + 91%|█████████ | 338080/371472 [4:43:58<2:41:44, 3.44it/s] 91%|█████████ | 338081/371472 [4:43:58<2:40:04, 3.48it/s] 91%|█████████ | 338082/371472 [4:43:58<2:35:21, 3.58it/s] 91%|█████████ | 338083/371472 [4:43:59<2:40:47, 3.46it/s] 91%|█████████ | 338084/371472 [4:43:59<2:36:33, 3.55it/s] 91%|█████████ | 338085/371472 [4:43:59<2:34:40, 3.60it/s] 91%|█████████ | 338086/371472 [4:44:00<2:33:10, 3.63it/s] 91%|█████████ | 338087/371472 [4:44:00<2:29:08, 3.73it/s] 91%|█████████ | 338088/371472 [4:44:00<2:26:49, 3.79it/s] 91%|█████████ | 338089/371472 [4:44:00<2:24:38, 3.85it/s] 91%|█████████ | 338090/371472 [4:44:01<2:32:40, 3.64it/s] 91%|█████████ | 338091/371472 [4:44:01<2:31:29, 3.67it/s] 91%|█████████ | 338092/371472 [4:44:01<2:32:44, 3.64it/s] 91%|█████████ | 338093/371472 [4:44:01<2:28:13, 3.75it/s] 91%|█████████ | 338094/371472 [4:44:02<2:26:35, 3.79it/s] 91%|█████████ | 338095/371472 [4:44:02<2:26:36, 3.79it/s] 91%|█████████ | 338096/371472 [4:44:02<2:23:40, 3.87it/s] 91%|█████████ | 338097/371472 [4:44:02<2:22:47, 3.90it/s] 91%|█████████ | 338098/371472 [4:44:03<2:25:49, 3.81it/s] 91%|█████████ | 338099/371472 [4:44:03<2:40:51, 3.46it/s] 91%|█████████ | 338100/371472 [4:44:03<2:39:02, 3.50it/s] {'loss': 2.6885, 'learning_rate': 1.8089702428408282e-07, 'epoch': 14.56} + 91%|█████████ | 338100/371472 [4:44:03<2:39:02, 3.50it/s] 91%|█████████ | 338101/371472 [4:44:04<2:40:12, 3.47it/s] 91%|█████████ | 338102/371472 [4:44:04<2:34:43, 3.59it/s] 91%|█████████ | 338103/371472 [4:44:04<2:40:16, 3.47it/s] 91%|█████████ | 338104/371472 [4:44:05<2:46:33, 3.34it/s] 91%|█████████ | 338105/371472 [4:44:05<2:55:23, 3.17it/s] 91%|█████████ | 338106/371472 [4:44:05<2:47:25, 3.32it/s] 91%|█████████ | 338107/371472 [4:44:05<2:47:58, 3.31it/s] 91%|█████████ | 338108/371472 [4:44:06<2:43:05, 3.41it/s] 91%|█████████ | 338109/371472 [4:44:06<2:52:06, 3.23it/s] 91%|█████████ | 338110/371472 [4:44:06<2:46:03, 3.35it/s] 91%|█████████ | 338111/371472 [4:44:07<2:41:36, 3.44it/s] 91%|█████████ | 338112/371472 [4:44:07<2:37:08, 3.54it/s] 91%|█████████ | 338113/371472 [4:44:07<2:32:20, 3.65it/s] 91%|█████████ | 338114/371472 [4:44:07<2:32:50, 3.64it/s] 91%|█████████ | 338115/371472 [4:44:08<2:37:23, 3.53it/s] 91%|█████████ | 338116/371472 [4:44:08<2:36:10, 3.56it/s] 91%|█████████ | 338117/371472 [4:44:08<2:37:34, 3.53it/s] 91%|█████████ | 338118/371472 [4:44:09<2:52:15, 3.23it/s] 91%|█████████ | 338119/371472 [4:44:09<2:44:15, 3.38it/s] 91%|█████████ | 338120/371472 [4:44:09<2:36:33, 3.55it/s] {'loss': 2.6096, 'learning_rate': 1.808485423086039e-07, 'epoch': 14.56} + 91%|█████████ | 338120/371472 [4:44:09<2:36:33, 3.55it/s] 91%|█████████ | 338121/371472 [4:44:09<2:41:59, 3.43it/s] 91%|█████████ | 338122/371472 [4:44:10<2:36:28, 3.55it/s] 91%|█████████ | 338123/371472 [4:44:10<2:47:40, 3.31it/s] 91%|█████████ | 338124/371472 [4:44:10<2:40:41, 3.46it/s] 91%|█████████ | 338125/371472 [4:44:11<2:34:14, 3.60it/s] 91%|█████████ | 338126/371472 [4:44:11<2:37:26, 3.53it/s] 91%|█████████ | 338127/371472 [4:44:11<2:37:06, 3.54it/s] 91%|█████████ | 338128/371472 [4:44:11<2:40:12, 3.47it/s] 91%|█████████ | 338129/371472 [4:44:12<2:41:15, 3.45it/s] 91%|█████████ | 338130/371472 [4:44:12<2:39:09, 3.49it/s] 91%|█████████ | 338131/371472 [4:44:12<2:41:40, 3.44it/s] 91%|█████████ | 338132/371472 [4:44:13<2:41:18, 3.44it/s] 91%|█████████ | 338133/371472 [4:44:13<2:34:39, 3.59it/s] 91%|█████████ | 338134/371472 [4:44:13<2:41:13, 3.45it/s] 91%|█████████ | 338135/371472 [4:44:13<2:39:56, 3.47it/s] 91%|█████████ | 338136/371472 [4:44:14<2:40:52, 3.45it/s] 91%|█████████ | 338137/371472 [4:44:14<2:34:36, 3.59it/s] 91%|█████████ | 338138/371472 [4:44:14<2:37:35, 3.53it/s] 91%|█████████ | 338139/371472 [4:44:15<2:38:09, 3.51it/s] 91%|█████████ | 338140/371472 [4:44:15<2:36:09, 3.56it/s] {'loss': 2.4298, 'learning_rate': 1.8080006033312502e-07, 'epoch': 14.56} + 91%|█████████ | 338140/371472 [4:44:15<2:36:09, 3.56it/s] 91%|█████████ | 338141/371472 [4:44:15<2:41:18, 3.44it/s] 91%|█████████ | 338142/371472 [4:44:15<2:39:46, 3.48it/s] 91%|█████████ | 338143/371472 [4:44:16<2:42:06, 3.43it/s] 91%|█████████ | 338144/371472 [4:44:16<2:39:05, 3.49it/s] 91%|█████████ | 338145/371472 [4:44:16<2:37:41, 3.52it/s] 91%|█████████ | 338146/371472 [4:44:17<2:38:27, 3.51it/s] 91%|█████████ | 338147/371472 [4:44:17<2:34:11, 3.60it/s] 91%|█████████ | 338148/371472 [4:44:17<2:35:27, 3.57it/s] 91%|█████████ | 338149/371472 [4:44:17<2:36:03, 3.56it/s] 91%|█████████ | 338150/371472 [4:44:18<2:38:34, 3.50it/s] 91%|█████████ | 338151/371472 [4:44:18<2:38:15, 3.51it/s] 91%|█████████ | 338152/371472 [4:44:18<2:32:59, 3.63it/s] 91%|█████████ | 338153/371472 [4:44:19<2:32:21, 3.64it/s] 91%|█████████ | 338154/371472 [4:44:19<2:51:03, 3.25it/s] 91%|█████████ | 338155/371472 [4:44:19<2:51:58, 3.23it/s] 91%|█████████ | 338156/371472 [4:44:20<2:48:41, 3.29it/s] 91%|█████████ | 338157/371472 [4:44:20<2:38:07, 3.51it/s] 91%|█████████ | 338158/371472 [4:44:20<2:41:38, 3.43it/s] 91%|█████████ | 338159/371472 [4:44:20<2:36:43, 3.54it/s] 91%|█████████ | 338160/371472 [4:44:21<2:46:51, 3.33it/s] {'loss': 2.5278, 'learning_rate': 1.807515783576461e-07, 'epoch': 14.57} + 91%|█████████ | 338160/371472 [4:44:21<2:46:51, 3.33it/s] 91%|█████████ | 338161/371472 [4:44:21<2:41:46, 3.43it/s] 91%|█████████ | 338162/371472 [4:44:21<2:48:07, 3.30it/s] 91%|█████████ | 338163/371472 [4:44:22<2:40:28, 3.46it/s] 91%|█████████ | 338164/371472 [4:44:22<2:40:15, 3.46it/s] 91%|█████████ | 338165/371472 [4:44:22<2:37:46, 3.52it/s] 91%|█████████ | 338166/371472 [4:44:22<2:37:43, 3.52it/s] 91%|█████████ | 338167/371472 [4:44:23<2:33:06, 3.63it/s] 91%|█████████ | 338168/371472 [4:44:23<2:28:46, 3.73it/s] 91%|█████████ | 338169/371472 [4:44:23<2:34:27, 3.59it/s] 91%|█████████ | 338170/371472 [4:44:23<2:31:49, 3.66it/s] 91%|█████████ | 338171/371472 [4:44:24<2:31:16, 3.67it/s] 91%|█████████ | 338172/371472 [4:44:24<2:29:43, 3.71it/s] 91%|█████████ | 338173/371472 [4:44:24<2:32:50, 3.63it/s] 91%|█████████ | 338174/371472 [4:44:25<2:43:29, 3.39it/s] 91%|█████████ | 338175/371472 [4:44:25<2:50:44, 3.25it/s] 91%|█████████ | 338176/371472 [4:44:25<2:47:13, 3.32it/s] 91%|█████████ | 338177/371472 [4:44:26<2:40:59, 3.45it/s] 91%|█████████ | 338178/371472 [4:44:26<2:43:38, 3.39it/s] 91%|█████████ | 338179/371472 [4:44:26<2:38:13, 3.51it/s] 91%|█████████ | 338180/371472 [4:44:26<2:44:10, 3.38it/s] {'loss': 2.6301, 'learning_rate': 1.8070309638216727e-07, 'epoch': 14.57} + 91%|█████████ | 338180/371472 [4:44:26<2:44:10, 3.38it/s] 91%|█████████ | 338181/371472 [4:44:27<3:01:46, 3.05it/s] 91%|█████████ | 338182/371472 [4:44:27<2:50:13, 3.26it/s] 91%|█████████ | 338183/371472 [4:44:27<2:40:31, 3.46it/s] 91%|█████████ | 338184/371472 [4:44:28<2:50:18, 3.26it/s] 91%|█████████ | 338185/371472 [4:44:28<2:42:33, 3.41it/s] 91%|█████████ | 338186/371472 [4:44:28<2:38:35, 3.50it/s] 91%|█████████ | 338187/371472 [4:44:28<2:35:45, 3.56it/s] 91%|█████████ | 338188/371472 [4:44:29<2:37:09, 3.53it/s] 91%|█████████ | 338189/371472 [4:44:29<2:36:50, 3.54it/s] 91%|█████████ | 338190/371472 [4:44:29<2:46:05, 3.34it/s] 91%|█████████ | 338191/371472 [4:44:30<2:43:01, 3.40it/s] 91%|█████████ | 338192/371472 [4:44:30<2:38:25, 3.50it/s] 91%|█████████ | 338193/371472 [4:44:30<2:36:10, 3.55it/s] 91%|█████████ | 338194/371472 [4:44:30<2:33:46, 3.61it/s] 91%|█████████ | 338195/371472 [4:44:31<2:35:46, 3.56it/s] 91%|█████████ | 338196/371472 [4:44:31<2:37:18, 3.53it/s] 91%|█████████ | 338197/371472 [4:44:31<2:34:34, 3.59it/s] 91%|█████████ | 338198/371472 [4:44:32<2:32:56, 3.63it/s] 91%|████████��� | 338199/371472 [4:44:32<2:36:48, 3.54it/s] 91%|█████████ | 338200/371472 [4:44:32<2:41:18, 3.44it/s] {'loss': 2.6126, 'learning_rate': 1.8065461440668834e-07, 'epoch': 14.57} + 91%|█████████ | 338200/371472 [4:44:32<2:41:18, 3.44it/s] 91%|█████████ | 338201/371472 [4:44:32<2:38:42, 3.49it/s] 91%|█████████ | 338202/371472 [4:44:33<2:37:35, 3.52it/s] 91%|█████████ | 338203/371472 [4:44:33<2:39:12, 3.48it/s] 91%|█████████ | 338204/371472 [4:44:33<2:37:12, 3.53it/s] 91%|█████████ | 338205/371472 [4:44:34<2:39:39, 3.47it/s] 91%|█████████ | 338206/371472 [4:44:34<2:37:51, 3.51it/s] 91%|█████████ | 338207/371472 [4:44:34<2:41:53, 3.42it/s] 91%|█████████ | 338208/371472 [4:44:34<2:39:57, 3.47it/s] 91%|█████████ | 338209/371472 [4:44:35<2:41:24, 3.43it/s] 91%|█████████ | 338210/371472 [4:44:35<2:57:59, 3.11it/s] 91%|█████████ | 338211/371472 [4:44:35<2:50:58, 3.24it/s] 91%|█████████ | 338212/371472 [4:44:36<2:50:25, 3.25it/s] 91%|█████████ | 338213/371472 [4:44:36<2:46:36, 3.33it/s] 91%|█████████ | 338214/371472 [4:44:36<2:48:39, 3.29it/s] 91%|█████████ | 338215/371472 [4:44:37<2:48:02, 3.30it/s] 91%|█████████ | 338216/371472 [4:44:37<2:48:36, 3.29it/s] 91%|█████████ | 338217/371472 [4:44:37<2:52:37, 3.21it/s] 91%|█████████ | 338218/371472 [4:44:38<2:52:29, 3.21it/s] 91%|█████████ | 338219/371472 [4:44:38<2:43:56, 3.38it/s] 91%|█████████ | 338220/371472 [4:44:38<2:49:15, 3.27it/s] {'loss': 2.6192, 'learning_rate': 1.8060613243120946e-07, 'epoch': 14.57} + 91%|█████████ | 338220/371472 [4:44:38<2:49:15, 3.27it/s] 91%|█████████ | 338221/371472 [4:44:38<2:38:56, 3.49it/s] 91%|█████████ | 338222/371472 [4:44:39<2:32:49, 3.63it/s] 91%|█████████ | 338223/371472 [4:44:39<2:45:08, 3.36it/s] 91%|█████████ | 338224/371472 [4:44:39<2:39:27, 3.48it/s] 91%|█████████ | 338225/371472 [4:44:40<2:39:36, 3.47it/s] 91%|█████████ | 338226/371472 [4:44:40<2:36:02, 3.55it/s] 91%|█████████ | 338227/371472 [4:44:40<2:46:59, 3.32it/s] 91%|█████████ | 338228/371472 [4:44:41<3:00:24, 3.07it/s] 91%|█████████ | 338229/371472 [4:44:41<2:55:54, 3.15it/s] 91%|█████████ | 338230/371472 [4:44:41<2:51:54, 3.22it/s] 91%|█████████ | 338231/371472 [4:44:41<2:52:47, 3.21it/s] 91%|█████████ | 338232/371472 [4:44:42<3:02:19, 3.04it/s] 91%|█████████ | 338233/371472 [4:44:42<2:58:42, 3.10it/s] 91%|█████████ | 338234/371472 [4:44:42<2:54:12, 3.18it/s] 91%|█████████ | 338235/371472 [4:44:43<2:49:36, 3.27it/s] 91%|█████████ | 338236/371472 [4:44:43<2:45:27, 3.35it/s] 91%|█████████ | 338237/371472 [4:44:43<2:43:57, 3.38it/s] 91%|█████████ | 338238/371472 [4:44:44<2:44:04, 3.38it/s] 91%|█████████ | 338239/371472 [4:44:44<2:43:55, 3.38it/s] 91%|█████████ | 338240/371472 [4:44:44<2:42:05, 3.42it/s] {'loss': 2.5981, 'learning_rate': 1.8055765045573054e-07, 'epoch': 14.57} + 91%|█████████ | 338240/371472 [4:44:44<2:42:05, 3.42it/s] 91%|█████████ | 338241/371472 [4:44:44<2:38:38, 3.49it/s] 91%|█████████ | 338242/371472 [4:44:45<2:38:14, 3.50it/s] 91%|█████████ | 338243/371472 [4:44:45<2:42:19, 3.41it/s] 91%|█████████ | 338244/371472 [4:44:45<2:47:54, 3.30it/s] 91%|█████████ | 338245/371472 [4:44:46<2:36:27, 3.54it/s] 91%|█████████ | 338246/371472 [4:44:46<2:36:40, 3.53it/s] 91%|█████████ | 338247/371472 [4:44:46<2:44:24, 3.37it/s] 91%|█████████ | 338248/371472 [4:44:47<2:44:36, 3.36it/s] 91%|█████████ | 338249/371472 [4:44:47<2:38:46, 3.49it/s] 91%|█████████ | 338250/371472 [4:44:47<2:36:08, 3.55it/s] 91%|█████████ | 338251/371472 [4:44:47<2:49:04, 3.27it/s] 91%|█████████ | 338252/371472 [4:44:48<2:48:15, 3.29it/s] 91%|█████████ | 338253/371472 [4:44:48<2:44:42, 3.36it/s] 91%|█████████ | 338254/371472 [4:44:48<2:36:49, 3.53it/s] 91%|█████████ | 338255/371472 [4:44:49<2:34:25, 3.59it/s] 91%|█████████ | 338256/371472 [4:44:49<2:36:00, 3.55it/s] 91%|█████████ | 338257/371472 [4:44:49<2:35:51, 3.55it/s] 91%|█████████ | 338258/371472 [4:44:49<2:34:04, 3.59it/s] 91%|█████████ | 338259/371472 [4:44:50<2:41:53, 3.42it/s] 91%|█████████ | 338260/371472 [4:44:50<2:36:44, 3.53it/s] {'loss': 2.4902, 'learning_rate': 1.805091684802517e-07, 'epoch': 14.57} + 91%|█████████ | 338260/371472 [4:44:50<2:36:44, 3.53it/s] 91%|█████████ | 338261/371472 [4:44:50<2:31:11, 3.66it/s] 91%|█████████ | 338262/371472 [4:44:50<2:31:01, 3.67it/s] 91%|█████████ | 338263/371472 [4:44:51<2:33:44, 3.60it/s] 91%|█████████ | 338264/371472 [4:44:51<2:30:18, 3.68it/s] 91%|█████████ | 338265/371472 [4:44:51<2:32:39, 3.63it/s] 91%|█████████ | 338266/371472 [4:44:52<2:41:22, 3.43it/s] 91%|█████████ | 338267/371472 [4:44:52<2:33:36, 3.60it/s] 91%|█████████ | 338268/371472 [4:44:52<2:31:55, 3.64it/s] 91%|█████████ | 338269/371472 [4:44:52<2:28:09, 3.74it/s] 91%|█████████ | 338270/371472 [4:44:53<2:36:13, 3.54it/s] 91%|█████████ | 338271/371472 [4:44:53<2:33:47, 3.60it/s] 91%|█████████ | 338272/371472 [4:44:53<2:38:19, 3.49it/s] 91%|█████████ | 338273/371472 [4:44:54<2:32:43, 3.62it/s] 91%|█████████ | 338274/371472 [4:44:54<2:32:11, 3.64it/s] 91%|█████████ | 338275/371472 [4:44:54<2:27:05, 3.76it/s] 91%|█████████ | 338276/371472 [4:44:54<2:28:14, 3.73it/s] 91%|█████████ | 338277/371472 [4:44:55<2:26:44, 3.77it/s] 91%|█████████ | 338278/371472 [4:44:55<2:30:39, 3.67it/s] 91%|█████████ | 338279/371472 [4:44:55<2:34:45, 3.57it/s] 91%|█████████ | 338280/371472 [4:44:55<2:34:54, 3.57it/s] {'loss': 2.7589, 'learning_rate': 1.8046068650477276e-07, 'epoch': 14.57} + 91%|█████████ | 338280/371472 [4:44:55<2:34:54, 3.57it/s] 91%|█████████ | 338281/371472 [4:44:56<2:29:50, 3.69it/s] 91%|█████████ | 338282/371472 [4:44:56<2:31:50, 3.64it/s] 91%|█████████ | 338283/371472 [4:44:56<2:37:07, 3.52it/s] 91%|█████████ | 338284/371472 [4:44:57<2:37:27, 3.51it/s] 91%|█████████ | 338285/371472 [4:44:57<2:37:24, 3.51it/s] 91%|█████████ | 338286/371472 [4:44:57<2:33:09, 3.61it/s] 91%|█████████ | 338287/371472 [4:44:57<2:40:59, 3.44it/s] 91%|█████████ | 338288/371472 [4:44:58<2:36:17, 3.54it/s] 91%|█████████ | 338289/371472 [4:44:58<2:46:11, 3.33it/s] 91%|█████████ | 338290/371472 [4:44:58<2:44:44, 3.36it/s] 91%|█████████ | 338291/371472 [4:44:59<2:39:44, 3.46it/s] 91%|█████████ | 338292/371472 [4:44:59<2:37:36, 3.51it/s] 91%|█████████ | 338293/371472 [4:44:59<2:32:28, 3.63it/s] 91%|█████████ | 338294/371472 [4:44:59<2:37:02, 3.52it/s] 91%|█████████ | 338295/371472 [4:45:00<2:44:07, 3.37it/s] 91%|█████████ | 338296/371472 [4:45:00<2:40:38, 3.44it/s] 91%|█████████ | 338297/371472 [4:45:00<2:43:25, 3.38it/s] 91%|█████████ | 338298/371472 [4:45:01<2:49:49, 3.26it/s] 91%|█████████ | 338299/371472 [4:45:01<2:52:45, 3.20it/s] 91%|█████████ | 338300/371472 [4:45:01<2:42:53, 3.39it/s] {'loss': 2.5826, 'learning_rate': 1.804122045292939e-07, 'epoch': 14.57} + 91%|█████████ | 338300/371472 [4:45:01<2:42:53, 3.39it/s] 91%|█████████ | 338301/371472 [4:45:02<2:46:27, 3.32it/s] 91%|█████████ | 338302/371472 [4:45:02<2:40:19, 3.45it/s] 91%|█████████ | 338303/371472 [4:45:02<2:37:16, 3.51it/s] 91%|█████████ | 338304/371472 [4:45:02<2:34:16, 3.58it/s] 91%|█████████ | 338305/371472 [4:45:03<2:32:37, 3.62it/s] 91%|█████████ | 338306/371472 [4:45:03<2:32:43, 3.62it/s] 91%|█████████ | 338307/371472 [4:45:03<2:29:41, 3.69it/s] 91%|█████████ | 338308/371472 [4:45:03<2:27:33, 3.75it/s] 91%|█████████ | 338309/371472 [4:45:04<2:33:37, 3.60it/s] 91%|█████████ | 338310/371472 [4:45:04<2:40:57, 3.43it/s] 91%|█████████ | 338311/371472 [4:45:04<2:36:22, 3.53it/s] 91%|█████████ | 338312/371472 [4:45:05<2:32:13, 3.63it/s] 91%|█████████ | 338313/371472 [4:45:05<2:34:23, 3.58it/s] 91%|████���████ | 338314/371472 [4:45:05<2:32:23, 3.63it/s] 91%|█████████ | 338315/371472 [4:45:05<2:31:37, 3.64it/s] 91%|█████████ | 338316/371472 [4:45:06<2:35:23, 3.56it/s] 91%|█████████ | 338317/371472 [4:45:06<2:35:12, 3.56it/s] 91%|█████████ | 338318/371472 [4:45:06<2:37:45, 3.50it/s] 91%|█████████ | 338319/371472 [4:45:07<2:32:42, 3.62it/s] 91%|█████████ | 338320/371472 [4:45:07<2:29:01, 3.71it/s] {'loss': 2.5593, 'learning_rate': 1.8036372255381498e-07, 'epoch': 14.57} + 91%|█████████ | 338320/371472 [4:45:07<2:29:01, 3.71it/s] 91%|█████████ | 338321/371472 [4:45:07<2:50:08, 3.25it/s] 91%|█████████ | 338322/371472 [4:45:08<3:00:22, 3.06it/s] 91%|█████████ | 338323/371472 [4:45:08<2:50:50, 3.23it/s] 91%|█████████ | 338324/371472 [4:45:08<2:41:13, 3.43it/s] 91%|█████████ | 338325/371472 [4:45:08<2:34:56, 3.57it/s] 91%|█████████ | 338326/371472 [4:45:09<2:41:32, 3.42it/s] 91%|█████████ | 338327/371472 [4:45:09<2:33:32, 3.60it/s] 91%|█████████ | 338328/371472 [4:45:09<2:31:12, 3.65it/s] 91%|█████████ | 338329/371472 [4:45:09<2:29:27, 3.70it/s] 91%|█████████ | 338330/371472 [4:45:10<2:40:49, 3.43it/s] 91%|█████████ | 338331/371472 [4:45:10<2:36:56, 3.52it/s] 91%|█████████ | 338332/371472 [4:45:10<2:33:50, 3.59it/s] 91%|█████████ | 338333/371472 [4:45:11<2:29:12, 3.70it/s] 91%|█████████ | 338334/371472 [4:45:11<2:28:49, 3.71it/s] 91%|█████████ | 338335/371472 [4:45:11<2:26:39, 3.77it/s] 91%|█████████ | 338336/371472 [4:45:11<2:28:49, 3.71it/s] 91%|█████████ | 338337/371472 [4:45:12<2:42:00, 3.41it/s] 91%|█████████ | 338338/371472 [4:45:12<2:42:38, 3.40it/s] 91%|█████████ | 338339/371472 [4:45:12<2:42:52, 3.39it/s] 91%|█████████ | 338340/371472 [4:45:13<2:42:41, 3.39it/s] {'loss': 2.6938, 'learning_rate': 1.8031524057833613e-07, 'epoch': 14.57} + 91%|█████████ | 338340/371472 [4:45:13<2:42:41, 3.39it/s] 91%|█████████ | 338341/371472 [4:45:13<2:38:41, 3.48it/s] 91%|█████████ | 338342/371472 [4:45:13<2:37:31, 3.51it/s] 91%|█████████ | 338343/371472 [4:45:13<2:34:34, 3.57it/s] 91%|█████████ | 338344/371472 [4:45:14<2:38:58, 3.47it/s] 91%|█████████ | 338345/371472 [4:45:14<2:51:41, 3.22it/s] 91%|█████████ | 338346/371472 [4:45:14<2:41:45, 3.41it/s] 91%|█████████ | 338347/371472 [4:45:15<2:50:44, 3.23it/s] 91%|█████████ | 338348/371472 [4:45:15<2:42:54, 3.39it/s] 91%|█████████ | 338349/371472 [4:45:15<2:40:01, 3.45it/s] 91%|█████████ | 338350/371472 [4:45:16<2:45:10, 3.34it/s] 91%|█████████ | 338351/371472 [4:45:16<2:41:02, 3.43it/s] 91%|█████████ | 338352/371472 [4:45:16<2:43:08, 3.38it/s] 91%|█████████ | 338353/371472 [4:45:16<2:39:18, 3.46it/s] 91%|█████████ | 338354/371472 [4:45:17<2:33:05, 3.61it/s] 91%|█████████ | 338355/371472 [4:45:17<2:27:34, 3.74it/s] 91%|█████████ | 338356/371472 [4:45:17<2:25:47, 3.79it/s] 91%|█████████ | 338357/371472 [4:45:17<2:26:03, 3.78it/s] 91%|█████████ | 338358/371472 [4:45:18<2:24:11, 3.83it/s] 91%|█████████ | 338359/371472 [4:45:18<2:31:00, 3.65it/s] 91%|█████████ | 338360/371472 [4:45:18<2:29:44, 3.69it/s] {'loss': 2.5349, 'learning_rate': 1.8026675860285718e-07, 'epoch': 14.57} + 91%|█████████ | 338360/371472 [4:45:18<2:29:44, 3.69it/s] 91%|█████████ | 338361/371472 [4:45:19<2:46:09, 3.32it/s] 91%|█████████ | 338362/371472 [4:45:19<2:37:27, 3.50it/s] 91%|█████████ | 338363/371472 [4:45:19<2:35:17, 3.55it/s] 91%|█████████ | 338364/371472 [4:45:19<2:35:04, 3.56it/s] 91%|█████████ | 338365/371472 [4:45:20<2:33:35, 3.59it/s] 91%|█████████ | 338366/371472 [4:45:20<2:31:36, 3.64it/s] 91%|█████████ | 338367/371472 [4:45:20<2:33:45, 3.59it/s] 91%|█████████ | 338368/371472 [4:45:21<2:45:13, 3.34it/s] 91%|█████████ | 338369/371472 [4:45:21<2:44:11, 3.36it/s] 91%|█████████ | 338370/371472 [4:45:21<2:36:54, 3.52it/s] 91%|█████████ | 338371/371472 [4:45:21<2:31:34, 3.64it/s] 91%|█████████ | 338372/371472 [4:45:22<2:31:32, 3.64it/s] 91%|█████████ | 338373/371472 [4:45:22<2:27:39, 3.74it/s] 91%|█████████ | 338374/371472 [4:45:22<2:28:30, 3.71it/s] 91%|█████████ | 338375/371472 [4:45:22<2:29:58, 3.68it/s] 91%|█████████ | 338376/371472 [4:45:23<2:27:05, 3.75it/s] 91%|█████████ | 338377/371472 [4:45:23<2:36:43, 3.52it/s] 91%|█████████ | 338378/371472 [4:45:23<2:31:00, 3.65it/s] 91%|█████████ | 338379/371472 [4:45:24<2:31:07, 3.65it/s] 91%|█████████ | 338380/371472 [4:45:24<2:33:51, 3.58it/s] {'loss': 2.5463, 'learning_rate': 1.8021827662737835e-07, 'epoch': 14.57} + 91%|█████████ | 338380/371472 [4:45:24<2:33:51, 3.58it/s] 91%|█████████ | 338381/371472 [4:45:24<2:32:44, 3.61it/s] 91%|█████████ | 338382/371472 [4:45:24<2:42:26, 3.40it/s] 91%|█████████ | 338383/371472 [4:45:25<2:40:49, 3.43it/s] 91%|█████████ | 338384/371472 [4:45:25<2:36:58, 3.51it/s] 91%|█████████ | 338385/371472 [4:45:25<2:33:47, 3.59it/s] 91%|█████████ | 338386/371472 [4:45:26<2:40:41, 3.43it/s] 91%|█████████ | 338387/371472 [4:45:26<2:38:21, 3.48it/s] 91%|█████████ | 338388/371472 [4:45:26<2:34:58, 3.56it/s] 91%|█████████ | 338389/371472 [4:45:26<2:37:13, 3.51it/s] 91%|█████████ | 338390/371472 [4:45:27<2:33:18, 3.60it/s] 91%|█████████ | 338391/371472 [4:45:27<2:32:56, 3.61it/s] 91%|█████████ | 338392/371472 [4:45:27<2:33:59, 3.58it/s] 91%|█████████ | 338393/371472 [4:45:28<2:43:09, 3.38it/s] 91%|█████████ | 338394/371472 [4:45:28<2:41:48, 3.41it/s] 91%|█████████ | 338395/371472 [4:45:28<2:36:21, 3.53it/s] 91%|█████████ | 338396/371472 [4:45:28<2:29:24, 3.69it/s] 91%|█████████ | 338397/371472 [4:45:29<2:25:58, 3.78it/s] 91%|█████████ | 338398/371472 [4:45:29<2:35:41, 3.54it/s] 91%|█████████ | 338399/371472 [4:45:29<2:29:49, 3.68it/s] 91%|█████████ | 338400/371472 [4:45:29<2:28:56, 3.70it/s] {'loss': 2.5486, 'learning_rate': 1.801697946518994e-07, 'epoch': 14.58} + 91%|█████████ | 338400/371472 [4:45:29<2:28:56, 3.70it/s] 91%|█████████ | 338401/371472 [4:45:30<2:45:36, 3.33it/s] 91%|█████████ | 338402/371472 [4:45:30<2:42:53, 3.38it/s] 91%|█████████ | 338403/371472 [4:45:30<2:37:32, 3.50it/s] 91%|█████████ | 338404/371472 [4:45:31<2:38:21, 3.48it/s] 91%|█████████ | 338405/371472 [4:45:31<2:37:25, 3.50it/s] 91%|█████████ | 338406/371472 [4:45:31<2:31:23, 3.64it/s] 91%|█████████ | 338407/371472 [4:45:32<2:42:49, 3.38it/s] 91%|█████████ | 338408/371472 [4:45:32<2:43:14, 3.38it/s] 91%|█████████ | 338409/371472 [4:45:32<2:49:52, 3.24it/s] 91%|█████████ | 338410/371472 [4:45:32<2:43:34, 3.37it/s] 91%|█████████ | 338411/371472 [4:45:33<2:45:27, 3.33it/s] 91%|█████████ | 338412/371472 [4:45:33<2:40:41, 3.43it/s] 91%|█████████ | 338413/371472 [4:45:33<2:51:53, 3.21it/s] 91%|█████████ | 338414/371472 [4:45:34<2:40:46, 3.43it/s] 91%|█████████ | 338415/371472 [4:45:34<3:02:03, 3.03it/s] 91%|█████████ | 338416/371472 [4:45:34<2:51:47, 3.21it/s] 91%|█████████ | 338417/371472 [4:45:35<2:40:37, 3.43it/s] 91%|█████████ | 338418/371472 [4:45:35<2:36:30, 3.52it/s] 91%|█████████ | 338419/371472 [4:45:35<2:35:21, 3.55it/s] 91%|█████████ | 338420/371472 [4:45:35<2:50:15, 3.24it/s] {'loss': 2.604, 'learning_rate': 1.8012131267642055e-07, 'epoch': 14.58} + 91%|█████████ | 338420/371472 [4:45:35<2:50:15, 3.24it/s] 91%|█████████ | 338421/371472 [4:45:36<2:43:43, 3.36it/s] 91%|█████████ | 338422/371472 [4:45:36<2:42:15, 3.39it/s] 91%|█████████ | 338423/371472 [4:45:36<2:48:18, 3.27it/s] 91%|█████████ | 338424/371472 [4:45:37<2:44:40, 3.34it/s] 91%|█████████ | 338425/371472 [4:45:37<2:39:14, 3.46it/s] 91%|█████████ | 338426/371472 [4:45:37<2:40:38, 3.43it/s] 91%|█████████ | 338427/371472 [4:45:37<2:38:07, 3.48it/s] 91%|█████████ | 338428/371472 [4:45:38<2:42:35, 3.39it/s] 91%|���████████ | 338429/371472 [4:45:38<2:43:25, 3.37it/s] 91%|█████████ | 338430/371472 [4:45:38<2:40:46, 3.43it/s] 91%|█████████ | 338431/371472 [4:45:39<2:46:28, 3.31it/s] 91%|█████████ | 338432/371472 [4:45:39<2:38:12, 3.48it/s] 91%|█████████ | 338433/371472 [4:45:39<2:34:23, 3.57it/s] 91%|█████████ | 338434/371472 [4:45:40<2:34:57, 3.55it/s] 91%|█████████ | 338435/371472 [4:45:40<2:32:35, 3.61it/s] 91%|█████████ | 338436/371472 [4:45:40<2:27:58, 3.72it/s] 91%|█████████ | 338437/371472 [4:45:40<2:25:29, 3.78it/s] 91%|█████████ | 338438/371472 [4:45:41<2:33:50, 3.58it/s] 91%|█████████ | 338439/371472 [4:45:41<2:36:04, 3.53it/s] 91%|█████████ | 338440/371472 [4:45:41<2:31:01, 3.65it/s] {'loss': 2.5268, 'learning_rate': 1.8007283070094162e-07, 'epoch': 14.58} + 91%|█████████ | 338440/371472 [4:45:41<2:31:01, 3.65it/s] 91%|█████████ | 338441/371472 [4:45:41<2:28:11, 3.71it/s] 91%|█████████ | 338442/371472 [4:45:42<2:33:27, 3.59it/s] 91%|█████████ | 338443/371472 [4:45:42<2:34:55, 3.55it/s] 91%|█████████ | 338444/371472 [4:45:42<2:30:01, 3.67it/s] 91%|█████████ | 338445/371472 [4:45:42<2:27:42, 3.73it/s] 91%|█████████ | 338446/371472 [4:45:43<2:28:17, 3.71it/s] 91%|█████████ | 338447/371472 [4:45:43<2:33:30, 3.59it/s] 91%|█████████ | 338448/371472 [4:45:43<2:34:16, 3.57it/s] 91%|█████████ | 338449/371472 [4:45:44<2:30:48, 3.65it/s] 91%|█████████ | 338450/371472 [4:45:44<2:27:49, 3.72it/s] 91%|█████████ | 338451/371472 [4:45:44<2:26:16, 3.76it/s] 91%|█████████ | 338452/371472 [4:45:44<2:38:30, 3.47it/s] 91%|█████████ | 338453/371472 [4:45:45<2:37:27, 3.50it/s] 91%|█████████ | 338454/371472 [4:45:45<2:39:55, 3.44it/s] 91%|█████████ | 338455/371472 [4:45:45<2:48:47, 3.26it/s] 91%|█████████ | 338456/371472 [4:45:46<2:43:46, 3.36it/s] 91%|█████████ | 338457/371472 [4:45:46<2:40:34, 3.43it/s] 91%|█████████ | 338458/371472 [4:45:46<2:47:46, 3.28it/s] 91%|█████████ | 338459/371472 [4:45:47<2:49:05, 3.25it/s] 91%|█████████ | 338460/371472 [4:45:47<2:43:38, 3.36it/s] {'loss': 2.5312, 'learning_rate': 1.8002434872546277e-07, 'epoch': 14.58} + 91%|█████████ | 338460/371472 [4:45:47<2:43:38, 3.36it/s] 91%|█████████ | 338461/371472 [4:45:47<2:37:11, 3.50it/s] 91%|█████████ | 338462/371472 [4:45:47<2:33:10, 3.59it/s] 91%|█████████ | 338463/371472 [4:45:48<2:27:55, 3.72it/s] 91%|█████████ | 338464/371472 [4:45:48<2:26:07, 3.77it/s] 91%|█████████ | 338465/371472 [4:45:48<2:22:54, 3.85it/s] 91%|█████████ | 338466/371472 [4:45:49<2:45:10, 3.33it/s] 91%|█████████ | 338467/371472 [4:45:49<2:42:23, 3.39it/s] 91%|█████████ | 338468/371472 [4:45:49<2:38:00, 3.48it/s] 91%|█████████ | 338469/371472 [4:45:49<2:47:03, 3.29it/s] 91%|█████████ | 338470/371472 [4:45:50<2:40:46, 3.42it/s] 91%|█████████ | 338471/371472 [4:45:50<2:40:06, 3.44it/s] 91%|█████████ | 338472/371472 [4:45:50<2:46:52, 3.30it/s] 91%|█████████ | 338473/371472 [4:45:51<2:38:23, 3.47it/s] 91%|█████████ | 338474/371472 [4:45:51<2:35:33, 3.54it/s] 91%|█████████ | 338475/371472 [4:45:51<2:29:19, 3.68it/s] 91%|█████████ | 338476/371472 [4:45:51<2:28:13, 3.71it/s] 91%|█████████ | 338477/371472 [4:45:52<2:25:34, 3.78it/s] 91%|█████████ | 338478/371472 [4:45:52<2:28:09, 3.71it/s] 91%|█████████ | 338479/371472 [4:45:52<2:28:17, 3.71it/s] 91%|█████████ | 338480/371472 [4:45:52<2:29:42, 3.67it/s] {'loss': 2.5899, 'learning_rate': 1.7997586674998382e-07, 'epoch': 14.58} + 91%|█████████ | 338480/371472 [4:45:52<2:29:42, 3.67it/s] 91%|█████████ | 338481/371472 [4:45:53<2:40:41, 3.42it/s] 91%|█████████ | 338482/371472 [4:45:53<2:40:25, 3.43it/s] 91%|█████████ | 338483/371472 [4:45:53<2:38:18, 3.47it/s] 91%|█████████ | 338484/371472 [4:45:54<2:35:44, 3.53it/s] 91%|█████████ | 338485/371472 [4:45:54<2:37:56, 3.48it/s] 91%|█████████ | 338486/371472 [4:45:54<2:35:53, 3.53it/s] 91%|█████████ | 338487/371472 [4:45:54<2:32:33, 3.60it/s] 91%|█████████ | 338488/371472 [4:45:55<2:37:59, 3.48it/s] 91%|█████████ | 338489/371472 [4:45:55<2:37:28, 3.49it/s] 91%|█████████ | 338490/371472 [4:45:55<2:34:44, 3.55it/s] 91%|█████████ | 338491/371472 [4:45:56<2:34:59, 3.55it/s] 91%|█████████ | 338492/371472 [4:45:56<2:28:01, 3.71it/s] 91%|█████████ | 338493/371472 [4:45:56<2:24:21, 3.81it/s] 91%|█████████ | 338494/371472 [4:45:56<2:25:00, 3.79it/s] 91%|█████████ | 338495/371472 [4:45:57<2:21:48, 3.88it/s] 91%|█████████ | 338496/371472 [4:45:57<2:21:21, 3.89it/s] 91%|█████████ | 338497/371472 [4:45:57<2:24:34, 3.80it/s] 91%|█████████ | 338498/371472 [4:45:57<2:32:13, 3.61it/s] 91%|█████████ | 338499/371472 [4:45:58<2:32:25, 3.61it/s] 91%|█████████ | 338500/371472 [4:45:58<2:39:03, 3.45it/s] {'loss': 2.4637, 'learning_rate': 1.79927384774505e-07, 'epoch': 14.58} + 91%|█████████ | 338500/371472 [4:45:58<2:39:03, 3.45it/s] 91%|█████████ | 338501/371472 [4:45:58<2:36:39, 3.51it/s] 91%|█████████ | 338502/371472 [4:45:59<2:46:52, 3.29it/s] 91%|█████████ | 338503/371472 [4:45:59<2:42:22, 3.38it/s] 91%|█████████ | 338504/371472 [4:45:59<2:38:10, 3.47it/s] 91%|█████████ | 338505/371472 [4:46:00<2:40:44, 3.42it/s] 91%|█████████ | 338506/371472 [4:46:00<2:37:23, 3.49it/s] 91%|█████████ | 338507/371472 [4:46:00<2:32:06, 3.61it/s] 91%|█████████ | 338508/371472 [4:46:00<2:29:33, 3.67it/s] 91%|█████████ | 338509/371472 [4:46:01<2:29:56, 3.66it/s] 91%|█████████ | 338510/371472 [4:46:01<2:34:06, 3.56it/s] 91%|█████████ | 338511/371472 [4:46:01<2:32:50, 3.59it/s] 91%|█████████ | 338512/371472 [4:46:01<2:35:13, 3.54it/s] 91%|█████████ | 338513/371472 [4:46:02<2:34:05, 3.56it/s] 91%|█████████ | 338514/371472 [4:46:02<2:38:52, 3.46it/s] 91%|█████████ | 338515/371472 [4:46:02<2:29:36, 3.67it/s] 91%|█████████ | 338516/371472 [4:46:03<2:30:00, 3.66it/s] 91%|█████████ | 338517/371472 [4:46:03<2:42:31, 3.38it/s] 91%|█████████ | 338518/371472 [4:46:03<2:40:23, 3.42it/s] 91%|█████████ | 338519/371472 [4:46:03<2:38:53, 3.46it/s] 91%|█████████ | 338520/371472 [4:46:04<2:37:07, 3.50it/s] {'loss': 2.6968, 'learning_rate': 1.7987890279902604e-07, 'epoch': 14.58} + 91%|█████████ | 338520/371472 [4:46:04<2:37:07, 3.50it/s] 91%|█████████ | 338521/371472 [4:46:04<2:42:17, 3.38it/s] 91%|█████████ | 338522/371472 [4:46:04<2:43:59, 3.35it/s] 91%|█████████ | 338523/371472 [4:46:05<2:36:07, 3.52it/s] 91%|█████████ | 338524/371472 [4:46:05<2:32:51, 3.59it/s] 91%|█████████ | 338525/371472 [4:46:05<2:34:00, 3.57it/s] 91%|█████████ | 338526/371472 [4:46:05<2:39:12, 3.45it/s] 91%|█████████ | 338527/371472 [4:46:06<2:39:12, 3.45it/s] 91%|█████████ | 338528/371472 [4:46:06<2:40:39, 3.42it/s] 91%|█████████ | 338529/371472 [4:46:06<2:44:29, 3.34it/s] 91%|█████████ | 338530/371472 [4:46:07<2:40:08, 3.43it/s] 91%|█████████ | 338531/371472 [4:46:07<2:46:31, 3.30it/s] 91%|█████████ | 338532/371472 [4:46:07<2:46:04, 3.31it/s] 91%|█████████ | 338533/371472 [4:46:08<2:36:10, 3.52it/s] 91%|█████████ | 338534/371472 [4:46:08<2:37:12, 3.49it/s] 91%|█████████ | 338535/371472 [4:46:08<2:34:02, 3.56it/s] 91%|█████████ | 338536/371472 [4:46:08<2:50:49, 3.21it/s] 91%|█████████ | 338537/371472 [4:46:09<2:47:10, 3.28it/s] 91%|█████████ | 338538/371472 [4:46:09<2:54:36, 3.14it/s] 91%|█████████ | 338539/371472 [4:46:09<2:55:30, 3.13it/s] 91%|█████████ | 338540/371472 [4:46:10<2:51:45, 3.20it/s] {'loss': 2.5094, 'learning_rate': 1.7983042082354708e-07, 'epoch': 14.58} + 91%|█████████ | 338540/371472 [4:46:10<2:51:45, 3.20it/s] 91%|█████████ | 338541/371472 [4:46:10<2:47:25, 3.28it/s] 91%|█████████ | 338542/371472 [4:46:10<3:04:23, 2.98it/s] 91%|█████████ | 338543/371472 [4:46:11<3:01:41, 3.02it/s] 91%|█████████ | 338544/371472 [4:46:11<3:04:57, 2.97it/s] 91%|█████████ | 338545/371472 [4:46:11<3:00:46, 3.04it/s] 91%|█████████ | 338546/371472 [4:46:12<2:54:03, 3.15it/s] 91%|█████████ | 338547/371472 [4:46:12<2:45:08, 3.32it/s] 91%|█████████ | 338548/371472 [4:46:12<2:44:43, 3.33it/s] 91%|█████████ | 338549/371472 [4:46:13<2:52:09, 3.19it/s] 91%|█████████ | 338550/371472 [4:46:13<2:52:24, 3.18it/s] 91%|█████████ | 338551/371472 [4:46:13<2:51:56, 3.19it/s] 91%|█████████ | 338552/371472 [4:46:13<2:42:29, 3.38it/s] 91%|█████████ | 338553/371472 [4:46:14<2:39:44, 3.43it/s] 91%|█████████ | 338554/371472 [4:46:14<2:38:55, 3.45it/s] 91%|█████████ | 338555/371472 [4:46:14<2:34:23, 3.55it/s] 91%|█████████ | 338556/371472 [4:46:15<2:28:41, 3.69it/s] 91%|█████████ | 338557/371472 [4:46:15<2:25:11, 3.78it/s] 91%|█████████ | 338558/371472 [4:46:15<2:35:26, 3.53it/s] 91%|█████████ | 338559/371472 [4:46:15<2:32:22, 3.60it/s] 91%|█████████ | 338560/371472 [4:46:16<2:32:28, 3.60it/s] {'loss': 2.5594, 'learning_rate': 1.7978193884806826e-07, 'epoch': 14.58} + 91%|█████████ | 338560/371472 [4:46:16<2:32:28, 3.60it/s] 91%|█████████ | 338561/371472 [4:46:16<2:33:30, 3.57it/s] 91%|█████████ | 338562/371472 [4:46:16<2:46:37, 3.29it/s] 91%|█████████ | 338563/371472 [4:46:17<2:46:41, 3.29it/s] 91%|█████████ | 338564/371472 [4:46:17<2:40:23, 3.42it/s] 91%|█████████ | 338565/371472 [4:46:17<2:43:36, 3.35it/s] 91%|█████████ | 338566/371472 [4:46:17<2:37:20, 3.49it/s] 91%|█████████ | 338567/371472 [4:46:18<2:53:36, 3.16it/s] 91%|█████████ | 338568/371472 [4:46:18<2:44:14, 3.34it/s] 91%|█████████ | 338569/371472 [4:46:18<2:37:36, 3.48it/s] 91%|█████████ | 338570/371472 [4:46:19<2:31:44, 3.61it/s] 91%|█████████ | 338571/371472 [4:46:19<2:31:52, 3.61it/s] 91%|█████████ | 338572/371472 [4:46:19<2:45:27, 3.31it/s] 91%|█████████ | 338573/371472 [4:46:20<2:46:06, 3.30it/s] 91%|█████████ | 338574/371472 [4:46:20<2:37:28, 3.48it/s] 91%|█████████ | 338575/371472 [4:46:20<2:38:23, 3.46it/s] 91%|█████████ | 338576/371472 [4:46:20<2:40:47, 3.41it/s] 91%|█████████ | 338577/371472 [4:46:21<2:34:42, 3.54it/s] 91%|█████████ | 338578/371472 [4:46:21<2:30:34, 3.64it/s] 91%|█████████ | 338579/371472 [4:46:21<2:28:32, 3.69it/s] 91%|█████████ | 338580/371472 [4:46:21<2:30:07, 3.65it/s] {'loss': 2.5752, 'learning_rate': 1.7973345687258933e-07, 'epoch': 14.58} + 91%|█████████ | 338580/371472 [4:46:21<2:30:07, 3.65it/s] 91%|█████████ | 338581/371472 [4:46:22<2:37:53, 3.47it/s] 91%|█████████ | 338582/371472 [4:46:22<2:36:43, 3.50it/s] 91%|█████████ | 338583/371472 [4:46:22<2:31:04, 3.63it/s] 91%|█████████ | 338584/371472 [4:46:23<2:44:41, 3.33it/s] 91%|█████████ | 338585/371472 [4:46:23<2:51:54, 3.19it/s] 91%|█████████ | 338586/371472 [4:46:23<2:45:28, 3.31it/s] 91%|█████████ | 338587/371472 [4:46:24<2:48:45, 3.25it/s] 91%|█████████ | 338588/371472 [4:46:24<2:41:30, 3.39it/s] 91%|█████████ | 338589/371472 [4:46:24<2:53:03, 3.17it/s] 91%|█████████ | 338590/371472 [4:46:25<2:46:59, 3.28it/s] 91%|█████████ | 338591/371472 [4:46:25<2:38:31, 3.46it/s] 91%|█████████ | 338592/371472 [4:46:25<2:43:44, 3.35it/s] 91%|█████████ | 338593/371472 [4:46:25<2:47:23, 3.27it/s] 91%|█████████ | 338594/371472 [4:46:26<2:54:55, 3.13it/s] 91%|█████████ | 338595/371472 [4:46:26<2:48:54, 3.24it/s] 91%|█████████ | 338596/371472 [4:46:26<2:44:31, 3.33it/s] 91%|█████████ | 338597/371472 [4:46:27<2:48:45, 3.25it/s] 91%|█████████ | 338598/371472 [4:46:27<2:45:58, 3.30it/s] 91%|█████████ | 338599/371472 [4:46:27<2:38:24, 3.46it/s] 91%|█████████ | 338600/371472 [4:46:27<2:32:35, 3.59it/s] {'loss': 2.6366, 'learning_rate': 1.7968497489711046e-07, 'epoch': 14.58} + 91%|█████████ | 338600/371472 [4:46:27<2:32:35, 3.59it/s] 91%|█████████ | 338601/371472 [4:46:28<2:36:31, 3.50it/s] 91%|█████████ | 338602/371472 [4:46:28<2:32:33, 3.59it/s] 91%|█████████ | 338603/371472 [4:46:28<2:32:12, 3.60it/s] 91%|█████████ | 338604/371472 [4:46:29<2:36:24, 3.50it/s] 91%|█████████ | 338605/371472 [4:46:29<2:30:12, 3.65it/s] 91%|█████████ | 338606/371472 [4:46:29<2:31:40, 3.61it/s] 91%|█████████ | 338607/371472 [4:46:29<2:35:46, 3.52it/s] 91%|█████████ | 338608/371472 [4:46:30<2:34:34, 3.54it/s] 91%|█████████ | 338609/371472 [4:46:30<2:38:05, 3.46it/s] 91%|█████████ | 338610/371472 [4:46:30<2:49:43, 3.23it/s] 91%|█████████ | 338611/371472 [4:46:31<2:55:40, 3.12it/s] 91%|█████████ | 338612/371472 [4:46:31<2:52:41, 3.17it/s] 91%|█████████ | 338613/371472 [4:46:31<3:08:57, 2.90it/s] 91%|█████████ | 338614/371472 [4:46:32<2:59:25, 3.05it/s] 91%|█████████ | 338615/371472 [4:46:32<2:53:13, 3.16it/s] 91%|█████████ | 338616/371472 [4:46:32<2:46:00, 3.30it/s] 91%|█████████ | 338617/371472 [4:46:33<2:38:50, 3.45it/s] 91%|█████████ | 338618/371472 [4:46:33<2:45:24, 3.31it/s] 91%|█████████ | 338619/371472 [4:46:33<2:37:00, 3.49it/s] 91%|█████████ | 338620/371472 [4:46:33<2:45:35, 3.31it/s] {'loss': 2.6288, 'learning_rate': 1.7963649292163153e-07, 'epoch': 14.59} + 91%|█████████ | 338620/371472 [4:46:33<2:45:35, 3.31it/s] 91%|█████████ | 338621/371472 [4:46:34<2:52:26, 3.18it/s] 91%|█████████ | 338622/371472 [4:46:34<2:44:18, 3.33it/s] 91%|█████████ | 338623/371472 [4:46:34<2:45:10, 3.31it/s] 91%|█████████ | 338624/371472 [4:46:35<2:40:04, 3.42it/s] 91%|█████████ | 338625/371472 [4:46:35<2:32:45, 3.58it/s] 91%|█████████ | 338626/371472 [4:46:35<2:37:26, 3.48it/s] 91%|█████████ | 338627/371472 [4:46:35<2:39:43, 3.43it/s] 91%|█████████ | 338628/371472 [4:46:36<2:33:59, 3.55it/s] 91%|█████████ | 338629/371472 [4:46:36<2:37:57, 3.47it/s] 91%|█████████ | 338630/371472 [4:46:36<2:40:59, 3.40it/s] 91%|█████████ | 338631/371472 [4:46:37<2:36:15, 3.50it/s] 91%|█████████ | 338632/371472 [4:46:37<2:31:51, 3.60it/s] 91%|█████████ | 338633/371472 [4:46:37<2:30:18, 3.64it/s] 91%|█████████ | 338634/371472 [4:46:37<2:30:15, 3.64it/s] 91%|█████████ | 338635/371472 [4:46:38<2:35:30, 3.52it/s] 91%|█████████ | 338636/371472 [4:46:38<2:32:58, 3.58it/s] 91%|█████████ | 338637/371472 [4:46:38<2:29:37, 3.66it/s] 91%|█████████ | 338638/371472 [4:46:39<2:32:08, 3.60it/s] 91%|█████████ | 338639/371472 [4:46:39<2:27:41, 3.71it/s] 91%|█████████ | 338640/371472 [4:46:39<2:45:06, 3.31it/s] {'loss': 2.6625, 'learning_rate': 1.795880109461527e-07, 'epoch': 14.59} + 91%|█████████ | 338640/371472 [4:46:39<2:45:06, 3.31it/s] 91%|█████████ | 338641/371472 [4:46:39<2:36:01, 3.51it/s] 91%|█████████ | 338642/371472 [4:46:40<2:31:59, 3.60it/s] 91%|█████████ | 338643/371472 [4:46:40<2:31:59, 3.60it/s] 91%|█████████ | 338644/371472 [4:46:40<2:37:11, 3.48it/s] 91%|█████████ | 338645/371472 [4:46:41<2:32:25, 3.59it/s] 91%|█████████ | 338646/371472 [4:46:41<2:32:31, 3.59it/s] 91%|█████████ | 338647/371472 [4:46:41<2:31:39, 3.61it/s] 91%|█████████ | 338648/371472 [4:46:41<2:40:58, 3.40it/s] 91%|█████████ | 338649/371472 [4:46:42<2:38:06, 3.46it/s] 91%|█████████ | 338650/371472 [4:46:42<2:32:23, 3.59it/s] 91%|█████████ | 338651/371472 [4:46:42<2:30:33, 3.63it/s] 91%|█████████ | 338652/371472 [4:46:43<2:39:06, 3.44it/s] 91%|█████████ | 338653/371472 [4:46:43<2:44:05, 3.33it/s] 91%|█████████ | 338654/371472 [4:46:43<2:44:51, 3.32it/s] 91%|█████████ | 338655/371472 [4:46:44<2:50:31, 3.21it/s] 91%|█████████ | 338656/371472 [4:46:44<2:52:48, 3.16it/s] 91%|█████████ | 338657/371472 [4:46:44<2:44:08, 3.33it/s] 91%|█████████ | 338658/371472 [4:46:44<2:38:42, 3.45it/s] 91%|█████████ | 338659/371472 [4:46:45<2:35:46, 3.51it/s] 91%|█████████ | 338660/371472 [4:46:45<2:33:37, 3.56it/s] {'loss': 2.5961, 'learning_rate': 1.7953952897067375e-07, 'epoch': 14.59} + 91%|█████████ | 338660/371472 [4:46:45<2:33:37, 3.56it/s] 91%|█████████ | 338661/371472 [4:46:45<2:32:44, 3.58it/s] 91%|█████████ | 338662/371472 [4:46:45<2:27:53, 3.70it/s] 91%|█████████ | 338663/371472 [4:46:46<2:31:07, 3.62it/s] 91%|█████████ | 338664/371472 [4:46:46<2:32:54, 3.58it/s] 91%|█████████ | 338665/371472 [4:46:46<2:34:53, 3.53it/s] 91%|█████████ | 338666/371472 [4:46:47<2:33:11, 3.57it/s] 91%|█████████ | 338667/371472 [4:46:47<2:39:01, 3.44it/s] 91%|█████████ | 338668/371472 [4:46:47<2:39:17, 3.43it/s] 91%|█████████ | 338669/371472 [4:46:47<2:39:07, 3.44it/s] 91%|█████████ | 338670/371472 [4:46:48<2:36:21, 3.50it/s] 91%|█████████ | 338671/371472 [4:46:48<2:31:34, 3.61it/s] 91%|█████████ | 338672/371472 [4:46:48<2:30:36, 3.63it/s] 91%|█████████ | 338673/371472 [4:46:49<2:34:11, 3.55it/s] 91%|█████████ | 338674/371472 [4:46:49<2:29:17, 3.66it/s] 91%|█████████ | 338675/371472 [4:46:49<2:27:15, 3.71it/s] 91%|█████████ | 338676/371472 [4:46:49<2:21:48, 3.85it/s] 91%|█████████ | 338677/371472 [4:46:50<2:21:08, 3.87it/s] 91%|█████████ | 338678/371472 [4:46:50<2:18:20, 3.95it/s] 91%|█████████ | 338679/371472 [4:46:50<2:18:35, 3.94it/s] 91%|█████████ | 338680/371472 [4:46:50<2:25:40, 3.75it/s] {'loss': 2.6832, 'learning_rate': 1.794910469951949e-07, 'epoch': 14.59} + 91%|█████████ | 338680/371472 [4:46:50<2:25:40, 3.75it/s] 91%|█████████ | 338681/371472 [4:46:51<2:26:53, 3.72it/s] 91%|█████████ | 338682/371472 [4:46:51<2:25:44, 3.75it/s] 91%|█████████ | 338683/371472 [4:46:51<2:26:30, 3.73it/s] 91%|█████████ | 338684/371472 [4:46:52<2:37:40, 3.47it/s] 91%|█████████ | 338685/371472 [4:46:52<2:31:04, 3.62it/s] 91%|█████████ | 338686/371472 [4:46:52<2:36:20, 3.50it/s] 91%|█████████ | 338687/371472 [4:46:52<2:35:38, 3.51it/s] 91%|█████████ | 338688/371472 [4:46:53<2:43:19, 3.35it/s] 91%|█████████ | 338689/371472 [4:46:53<2:36:12, 3.50it/s] 91%|█████████ | 338690/371472 [4:46:53<2:40:02, 3.41it/s] 91%|█████████ | 338691/371472 [4:46:54<2:35:13, 3.52it/s] 91%|█████████ | 338692/371472 [4:46:54<2:30:53, 3.62it/s] 91%|█████████ | 338693/371472 [4:46:54<2:30:29, 3.63it/s] 91%|█████████ | 338694/371472 [4:46:54<2:30:06, 3.64it/s] 91%|█████████ | 338695/371472 [4:46:55<2:27:25, 3.71it/s] 91%|█████████ | 338696/371472 [4:46:55<2:23:36, 3.80it/s] 91%|█████████ | 338697/371472 [4:46:55<2:23:41, 3.80it/s] 91%|█████████ | 338698/371472 [4:46:55<2:28:20, 3.68it/s] 91%|█████████ | 338699/371472 [4:46:56<2:37:54, 3.46it/s] 91%|█████████ | 338700/371472 [4:46:56<2:33:47, 3.55it/s] {'loss': 2.6731, 'learning_rate': 1.7944256501971597e-07, 'epoch': 14.59} + 91%|█████████ | 338700/371472 [4:46:56<2:33:47, 3.55it/s] 91%|█████████ | 338701/371472 [4:46:56<2:37:54, 3.46it/s] 91%|█████████ | 338702/371472 [4:46:57<2:31:45, 3.60it/s] 91%|█████████ | 338703/371472 [4:46:57<2:37:22, 3.47it/s] 91%|█████████ | 338704/371472 [4:46:57<2:41:04, 3.39it/s] 91%|█████████ | 338705/371472 [4:46:57<2:44:57, 3.31it/s] 91%|█████████ | 338706/371472 [4:46:58<2:49:41, 3.22it/s] 91%|█████████ | 338707/371472 [4:46:58<2:41:28, 3.38it/s] 91%|█████████ | 338708/371472 [4:46:58<2:42:29, 3.36it/s] 91%|█████████ | 338709/371472 [4:46:59<2:34:54, 3.52it/s] 91%|█████████ | 338710/371472 [4:46:59<2:38:08, 3.45it/s] 91%|█████████ | 338711/371472 [4:46:59<2:35:44, 3.51it/s] 91%|█████████ | 338712/371472 [4:46:59<2:31:12, 3.61it/s] 91%|█████████ | 338713/371472 [4:47:00<2:26:40, 3.72it/s] 91%|█████████ | 338714/371472 [4:47:00<2:23:24, 3.81it/s] 91%|█████████ | 338715/371472 [4:47:00<2:25:58, 3.74it/s] 91%|█████████ | 338716/371472 [4:47:01<2:29:12, 3.66it/s] 91%|█████████ | 338717/371472 [4:47:01<2:29:32, 3.65it/s] 91%|█████████ | 338718/371472 [4:47:01<2:31:45, 3.60it/s] 91%|█████████ | 338719/371472 [4:47:01<2:33:10, 3.56it/s] 91%|█████████ | 338720/371472 [4:47:02<2:33:14, 3.56it/s] {'loss': 2.5802, 'learning_rate': 1.7939408304423712e-07, 'epoch': 14.59} + 91%|█████████ | 338720/371472 [4:47:02<2:33:14, 3.56it/s] 91%|█████████ | 338721/371472 [4:47:02<2:28:20, 3.68it/s] 91%|█████████ | 338722/371472 [4:47:02<2:35:37, 3.51it/s] 91%|█████████ | 338723/371472 [4:47:03<2:38:15, 3.45it/s] 91%|█████████ | 338724/371472 [4:47:03<2:34:09, 3.54it/s] 91%|█████████ | 338725/371472 [4:47:03<2:39:03, 3.43it/s] 91%|█████████ | 338726/371472 [4:47:03<2:38:41, 3.44it/s] 91%|█████████ | 338727/371472 [4:47:04<2:52:06, 3.17it/s] 91%|█████████ | 338728/371472 [4:47:04<3:09:49, 2.87it/s] 91%|█████████ | 338729/371472 [4:47:04<2:59:26, 3.04it/s] 91%|█████████ | 338730/371472 [4:47:05<2:53:14, 3.15it/s] 91%|█████████ | 338731/371472 [4:47:05<2:43:47, 3.33it/s] 91%|█████████ | 338732/371472 [4:47:05<2:39:18, 3.43it/s] 91%|█████████ | 338733/371472 [4:47:06<2:40:05, 3.41it/s] 91%|█████████ | 338734/371472 [4:47:06<2:37:10, 3.47it/s] 91%|█████████ | 338735/371472 [4:47:06<2:36:40, 3.48it/s] 91%|█████████ | 338736/371472 [4:47:06<2:41:05, 3.39it/s] 91%|█████████ | 338737/371472 [4:47:07<2:37:54, 3.45it/s] 91%|█████████ | 338738/371472 [4:47:07<2:32:36, 3.57it/s] 91%|█████████ | 338739/371472 [4:47:07<2:28:44, 3.67it/s] 91%|█████████ | 338740/371472 [4:47:08<2:29:32, 3.65it/s] {'loss': 2.4937, 'learning_rate': 1.7934560106875817e-07, 'epoch': 14.59} + 91%|█████████ | 338740/371472 [4:47:08<2:29:32, 3.65it/s] 91%|█████████ | 338741/371472 [4:47:08<2:34:09, 3.54it/s] 91%|█████████ | 338742/371472 [4:47:08<2:34:06, 3.54it/s] 91%|█████████ | 338743/371472 [4:47:08<2:29:36, 3.65it/s] 91%|█████████ | 338744/371472 [4:47:09<2:26:21, 3.73it/s] 91%|█████████ | 338745/371472 [4:47:09<2:27:05, 3.71it/s] 91%|█████████ | 338746/371472 [4:47:09<2:30:36, 3.62it/s] 91%|█████████ | 338747/371472 [4:47:09<2:29:01, 3.66it/s] 91%|█████████ | 338748/371472 [4:47:10<2:30:30, 3.62it/s] 91%|█████████ | 338749/371472 [4:47:10<2:30:17, 3.63it/s] 91%|█████████ | 338750/371472 [4:47:10<2:38:03, 3.45it/s] 91%|█████████ | 338751/371472 [4:47:11<2:33:13, 3.56it/s] 91%|█████████ | 338752/371472 [4:47:11<2:32:12, 3.58it/s] 91%|█████████ | 338753/371472 [4:47:11<2:34:05, 3.54it/s] 91%|█████████ | 338754/371472 [4:47:11<2:29:49, 3.64it/s] 91%|█████████ | 338755/371472 [4:47:12<2:28:29, 3.67it/s] 91%|█████████ | 338756/371472 [4:47:12<2:28:57, 3.66it/s] 91%|█████████ | 338757/371472 [4:47:12<2:25:05, 3.76it/s] 91%|█████████ | 338758/371472 [4:47:13<2:32:13, 3.58it/s] 91%|█████████ | 338759/371472 [4:47:13<2:35:31, 3.51it/s] 91%|█████████ | 338760/371472 [4:47:13<3:04:38, 2.95it/s] {'loss': 2.6134, 'learning_rate': 1.7929711909327935e-07, 'epoch': 14.59} + 91%|█████████ | 338760/371472 [4:47:13<3:04:38, 2.95it/s] 91%|█████████ | 338761/371472 [4:47:14<2:52:44, 3.16it/s] 91%|█████████ | 338762/371472 [4:47:14<2:48:46, 3.23it/s] 91%|█████████ | 338763/371472 [4:47:14<3:05:31, 2.94it/s] 91%|█████████ | 338764/371472 [4:47:15<2:56:51, 3.08it/s] 91%|█████████ | 338765/371472 [4:47:15<2:44:40, 3.31it/s] 91%|█████████ | 338766/371472 [4:47:15<2:42:07, 3.36it/s] 91%|█████████ | 338767/371472 [4:47:15<2:43:49, 3.33it/s] 91%|█████████ | 338768/371472 [4:47:16<2:42:36, 3.35it/s] 91%|█████████ | 338769/371472 [4:47:16<2:40:20, 3.40it/s] 91%|█████████ | 338770/371472 [4:47:16<2:50:13, 3.20it/s] 91%|█████████ | 338771/371472 [4:47:17<2:54:41, 3.12it/s] 91%|█████████ | 338772/371472 [4:47:17<2:47:46, 3.25it/s] 91%|█████████ | 338773/371472 [4:47:17<2:41:02, 3.38it/s] 91%|█████████ | 338774/371472 [4:47:18<2:43:25, 3.33it/s] 91%|█████████ | 338775/371472 [4:47:18<2:46:00, 3.28it/s] 91%|█████████ | 338776/371472 [4:47:18<2:42:34, 3.35it/s] 91%|█████████ | 338777/371472 [4:47:18<2:42:56, 3.34it/s] 91%|█████████ | 338778/371472 [4:47:19<2:33:43, 3.54it/s] 91%|█████████ | 338779/371472 [4:47:19<2:34:20, 3.53it/s] 91%|█████████ | 338780/371472 [4:47:19<2:31:55, 3.59it/s] {'loss': 2.4656, 'learning_rate': 1.792486371178004e-07, 'epoch': 14.59} + 91%|█████████ | 338780/371472 [4:47:19<2:31:55, 3.59it/s] 91%|█████████ | 338781/371472 [4:47:20<2:35:53, 3.49it/s] 91%|█████████ | 338782/371472 [4:47:20<2:45:32, 3.29it/s] 91%|█████████ | 338783/371472 [4:47:20<2:39:46, 3.41it/s] 91%|█████████ | 338784/371472 [4:47:20<2:40:28, 3.39it/s] 91%|█████████ | 338785/371472 [4:47:21<2:39:51, 3.41it/s] 91%|█████████ | 338786/371472 [4:47:21<2:47:43, 3.25it/s] 91%|█████████ | 338787/371472 [4:47:21<2:45:28, 3.29it/s] 91%|█████████ | 338788/371472 [4:47:22<2:37:45, 3.45it/s] 91%|█████████ | 338789/371472 [4:47:22<2:30:18, 3.62it/s] 91%|█████████ | 338790/371472 [4:47:22<2:29:25, 3.65it/s] 91%|█████████ | 338791/371472 [4:47:22<2:31:24, 3.60it/s] 91%|█████████ | 338792/371472 [4:47:23<2:37:08, 3.47it/s] 91%|█████████ | 338793/371472 [4:47:23<2:36:01, 3.49it/s] 91%|█████████ | 338794/371472 [4:47:23<2:31:50, 3.59it/s] 91%|█████████ | 338795/371472 [4:47:24<2:29:51, 3.63it/s] 91%|█████████ | 338796/371472 [4:47:24<2:26:11, 3.73it/s] 91%|█████████ | 338797/371472 [4:47:24<2:25:58, 3.73it/s] 91%|█████████ | 338798/371472 [4:47:24<2:29:47, 3.64it/s] 91%|█████████ | 338799/371472 [4:47:25<2:41:17, 3.38it/s] 91%|█████████ | 338800/371472 [4:47:25<2:33:15, 3.55it/s] {'loss': 2.6006, 'learning_rate': 1.7920015514232154e-07, 'epoch': 14.59} + 91%|█████████ | 338800/371472 [4:47:25<2:33:15, 3.55it/s] 91%|█████████ | 338801/371472 [4:47:25<2:35:58, 3.49it/s] 91%|█████████ | 338802/371472 [4:47:25<2:33:34, 3.55it/s] 91%|█████████ | 338803/371472 [4:47:26<2:34:45, 3.52it/s] 91%|█████████ | 338804/371472 [4:47:26<2:33:23, 3.55it/s] 91%|█████████ | 338805/371472 [4:47:26<2:33:27, 3.55it/s] 91%|█████████ | 338806/371472 [4:47:27<2:28:24, 3.67it/s] 91%|█████████ | 338807/371472 [4:47:27<2:24:41, 3.76it/s] 91%|█████████ | 338808/371472 [4:47:27<2:35:19, 3.50it/s] 91%|█████████ | 338809/371472 [4:47:27<2:34:31, 3.52it/s] 91%|█████████ | 338810/371472 [4:47:28<2:35:19, 3.50it/s] 91%|█████████ | 338811/371472 [4:47:28<2:43:14, 3.33it/s] 91%|█████████ | 338812/371472 [4:47:28<2:35:59, 3.49it/s] 91%|█████████ | 338813/371472 [4:47:29<2:36:58, 3.47it/s] 91%|█████████ | 338814/371472 [4:47:29<2:35:45, 3.49it/s] 91%|█████████ | 338815/371472 [4:47:29<2:39:06, 3.42it/s] 91%|█████████ | 338816/371472 [4:47:29<2:33:29, 3.55it/s] 91%|█████████ | 338817/371472 [4:47:30<2:38:39, 3.43it/s] 91%|█████████ | 338818/371472 [4:47:30<2:34:32, 3.52it/s] 91%|█████████ | 338819/371472 [4:47:30<2:30:18, 3.62it/s] 91%|█████████ | 338820/371472 [4:47:31<2:28:44, 3.66it/s] {'loss': 2.5593, 'learning_rate': 1.7915167316684261e-07, 'epoch': 14.59} + 91%|█████████ | 338820/371472 [4:47:31<2:28:44, 3.66it/s] 91%|█████████ | 338821/371472 [4:47:31<2:29:31, 3.64it/s] 91%|█████████ | 338822/371472 [4:47:31<2:34:09, 3.53it/s] 91%|█████████ | 338823/371472 [4:47:31<2:30:59, 3.60it/s] 91%|█████████ | 338824/371472 [4:47:32<2:46:14, 3.27it/s] 91%|█████████ | 338825/371472 [4:47:32<2:50:16, 3.20it/s] 91%|█████████ | 338826/371472 [4:47:32<2:46:24, 3.27it/s] 91%|█████████ | 338827/371472 [4:47:33<2:37:09, 3.46it/s] 91%|█████████ | 338828/371472 [4:47:33<2:30:25, 3.62it/s] 91%|█████████ | 338829/371472 [4:47:33<2:25:02, 3.75it/s] 91%|█████████ | 338830/371472 [4:47:33<2:21:00, 3.86it/s] 91%|█████████ | 338831/371472 [4:47:34<2:20:37, 3.87it/s] 91%|█████████ | 338832/371472 [4:47:34<2:27:04, 3.70it/s] 91%|█████████ | 338833/371472 [4:47:34<2:28:53, 3.65it/s] 91%|█████████ | 338834/371472 [4:47:34<2:26:59, 3.70it/s] 91%|█████████ | 338835/371472 [4:47:35<2:36:36, 3.47it/s] 91%|█████████ | 338836/371472 [4:47:35<2:38:06, 3.44it/s] 91%|█████████ | 338837/371472 [4:47:35<2:35:55, 3.49it/s] 91%|█████████ | 338838/371472 [4:47:36<2:35:07, 3.51it/s] 91%|█████████ | 338839/371472 [4:47:36<2:29:34, 3.64it/s] 91%|█████████ | 338840/371472 [4:47:36<2:29:47, 3.63it/s] {'loss': 2.6959, 'learning_rate': 1.7910319119136377e-07, 'epoch': 14.59} + 91%|█████████ | 338840/371472 [4:47:36<2:29:47, 3.63it/s] 91%|█████████ | 338841/371472 [4:47:37<2:33:55, 3.53it/s] 91%|█████████ | 338842/371472 [4:47:37<2:31:18, 3.59it/s] 91%|█████████ | 338843/371472 [4:47:37<2:29:30, 3.64it/s] 91%|█████████ | 338844/371472 [4:47:37<2:26:35, 3.71it/s] 91%|█████████ | 338845/371472 [4:47:38<2:23:14, 3.80it/s] 91%|█████████ | 338846/371472 [4:47:38<2:24:14, 3.77it/s] 91%|█████████ | 338847/371472 [4:47:38<2:25:44, 3.73it/s] 91%|█████████ | 338848/371472 [4:47:38<2:23:30, 3.79it/s] 91%|█████████ | 338849/371472 [4:47:39<2:34:39, 3.52it/s] 91%|█████████ | 338850/371472 [4:47:39<2:32:55, 3.56it/s] 91%|█████████ | 338851/371472 [4:47:39<2:31:21, 3.59it/s] 91%|█████████ | 338852/371472 [4:47:40<2:38:55, 3.42it/s] 91%|█████████ | 338853/371472 [4:47:40<2:45:51, 3.28it/s] 91%|█████████ | 338854/371472 [4:47:40<2:36:55, 3.46it/s] 91%|█████████ | 338855/371472 [4:47:40<2:40:35, 3.39it/s] 91%|█████████ | 338856/371472 [4:47:41<2:39:17, 3.41it/s] 91%|█████████ | 338857/371472 [4:47:41<2:49:40, 3.20it/s] 91%|█████████ | 338858/371472 [4:47:41<2:39:32, 3.41it/s] 91%|█████████ | 338859/371472 [4:47:42<2:43:45, 3.32it/s] 91%|█████████ | 338860/371472 [4:47:42<2:41:17, 3.37it/s] {'loss': 2.5178, 'learning_rate': 1.790547092158848e-07, 'epoch': 14.6} + 91%|█████████ | 338860/371472 [4:47:42<2:41:17, 3.37it/s] 91%|█████████ | 338861/371472 [4:47:42<2:35:07, 3.50it/s] 91%|█████████ | 338862/371472 [4:47:42<2:29:52, 3.63it/s] 91%|█████████ | 338863/371472 [4:47:43<2:31:12, 3.59it/s] 91%|█████████ | 338864/371472 [4:47:43<2:29:48, 3.63it/s] 91%|█████████ | 338865/371472 [4:47:43<2:27:11, 3.69it/s] 91%|█████████ | 338866/371472 [4:47:44<2:33:02, 3.55it/s] 91%|█████████ | 338867/371472 [4:47:44<2:27:17, 3.69it/s] 91%|█████████ | 338868/371472 [4:47:44<2:29:36, 3.63it/s] 91%|█████████ | 338869/371472 [4:47:44<2:28:45, 3.65it/s] 91%|█████████ | 338870/371472 [4:47:45<2:33:35, 3.54it/s] 91%|█████████ | 338871/371472 [4:47:45<2:32:09, 3.57it/s] 91%|█████████ | 338872/371472 [4:47:45<2:30:48, 3.60it/s] 91%|█████████ | 338873/371472 [4:47:46<2:30:55, 3.60it/s] 91%|█████████ | 338874/371472 [4:47:46<2:30:58, 3.60it/s] 91%|█████████ | 338875/371472 [4:47:46<2:31:13, 3.59it/s] 91%|█████████ | 338876/371472 [4:47:46<2:33:07, 3.55it/s] 91%|█████████ | 338877/371472 [4:47:47<2:36:03, 3.48it/s] 91%|█████████ | 338878/371472 [4:47:47<2:32:17, 3.57it/s] 91%|█████████ | 338879/371472 [4:47:47<2:37:38, 3.45it/s] 91%|█████████ | 338880/371472 [4:47:48<2:34:19, 3.52it/s] {'loss': 2.5967, 'learning_rate': 1.79006227240406e-07, 'epoch': 14.6} + 91%|█████████ | 338880/371472 [4:47:48<2:34:19, 3.52it/s] 91%|█████████ | 338881/371472 [4:47:48<2:29:51, 3.62it/s] 91%|█████████ | 338882/371472 [4:47:48<2:28:06, 3.67it/s] 91%|█████████ | 338883/371472 [4:47:48<2:38:07, 3.44it/s] 91%|█████████ | 338884/371472 [4:47:49<2:35:11, 3.50it/s] 91%|█████████ | 338885/371472 [4:47:49<2:30:07, 3.62it/s] 91%|█████████ | 338886/371472 [4:47:49<2:29:50, 3.62it/s] 91%|█████████ | 338887/371472 [4:47:49<2:28:21, 3.66it/s] 91%|█████████ | 338888/371472 [4:47:50<2:27:25, 3.68it/s] 91%|█████████ | 338889/371472 [4:47:50<2:24:07, 3.77it/s] 91%|█████████ | 338890/371472 [4:47:50<2:30:33, 3.61it/s] 91%|████���████ | 338891/371472 [4:47:51<2:51:19, 3.17it/s] 91%|█████████ | 338892/371472 [4:47:51<2:45:40, 3.28it/s] 91%|█████████ | 338893/371472 [4:47:51<2:39:28, 3.40it/s] 91%|█████████ | 338894/371472 [4:47:51<2:36:44, 3.46it/s] 91%|█████████ | 338895/371472 [4:47:52<2:33:55, 3.53it/s] 91%|█████████ | 338896/371472 [4:47:52<2:28:57, 3.64it/s] 91%|█████████ | 338897/371472 [4:47:52<2:34:21, 3.52it/s] 91%|█████████ | 338898/371472 [4:47:53<2:33:27, 3.54it/s] 91%|█████████ | 338899/371472 [4:47:53<2:30:07, 3.62it/s] 91%|█████████ | 338900/371472 [4:47:53<2:29:29, 3.63it/s] {'loss': 2.5897, 'learning_rate': 1.7895774526492706e-07, 'epoch': 14.6} + 91%|█████████ | 338900/371472 [4:47:53<2:29:29, 3.63it/s] 91%|█████████ | 338901/371472 [4:47:53<2:34:36, 3.51it/s] 91%|█████████ | 338902/371472 [4:47:54<2:28:48, 3.65it/s] 91%|█████████ | 338903/371472 [4:47:54<2:35:21, 3.49it/s] 91%|█████████ | 338904/371472 [4:47:54<2:44:33, 3.30it/s] 91%|█████████ | 338905/371472 [4:47:55<2:34:00, 3.52it/s] 91%|█████████ | 338906/371472 [4:47:55<2:29:46, 3.62it/s] 91%|█████████ | 338907/371472 [4:47:55<2:32:01, 3.57it/s] 91%|█████████ | 338908/371472 [4:47:55<2:29:54, 3.62it/s] 91%|█████████ | 338909/371472 [4:47:56<2:39:00, 3.41it/s] 91%|█████████ | 338910/371472 [4:47:56<2:30:30, 3.61it/s] 91%|█████████ | 338911/371472 [4:47:56<2:32:40, 3.55it/s] 91%|█████████ | 338912/371472 [4:47:57<2:33:18, 3.54it/s] 91%|█████████ | 338913/371472 [4:47:57<2:31:34, 3.58it/s] 91%|█████████ | 338914/371472 [4:47:57<2:32:44, 3.55it/s] 91%|█████████ | 338915/371472 [4:47:57<2:28:45, 3.65it/s] 91%|█████████ | 338916/371472 [4:47:58<2:25:57, 3.72it/s] 91%|█████████ | 338917/371472 [4:47:58<3:00:35, 3.00it/s] 91%|█████████ | 338918/371472 [4:47:58<2:57:15, 3.06it/s] 91%|█████████ | 338919/371472 [4:47:59<2:48:10, 3.23it/s] 91%|█████████ | 338920/371472 [4:47:59<2:40:13, 3.39it/s] {'loss': 2.496, 'learning_rate': 1.7890926328944818e-07, 'epoch': 14.6} + 91%|█████████ | 338920/371472 [4:47:59<2:40:13, 3.39it/s] 91%|█████████ | 338921/371472 [4:47:59<2:36:54, 3.46it/s] 91%|█████████ | 338922/371472 [4:47:59<2:32:05, 3.57it/s] 91%|█████████ | 338923/371472 [4:48:00<2:25:16, 3.73it/s] 91%|█████████ | 338924/371472 [4:48:00<2:34:27, 3.51it/s] 91%|█████████ | 338925/371472 [4:48:00<2:30:28, 3.60it/s] 91%|█████████ | 338926/371472 [4:48:01<2:43:18, 3.32it/s] 91%|█████████ | 338927/371472 [4:48:01<2:41:17, 3.36it/s] 91%|█████████ | 338928/371472 [4:48:01<2:35:00, 3.50it/s] 91%|█████████ | 338929/371472 [4:48:01<2:34:47, 3.50it/s] 91%|█████████ | 338930/371472 [4:48:02<2:35:05, 3.50it/s] 91%|█████████ | 338931/371472 [4:48:02<2:29:48, 3.62it/s] 91%|█████████ | 338932/371472 [4:48:02<2:34:21, 3.51it/s] 91%|█████████ | 338933/371472 [4:48:03<2:32:45, 3.55it/s] 91%|█████████ | 338934/371472 [4:48:03<2:32:21, 3.56it/s] 91%|█████████ | 338935/371472 [4:48:03<2:43:49, 3.31it/s] 91%|█████████ | 338936/371472 [4:48:04<2:37:50, 3.44it/s] 91%|█████████ | 338937/371472 [4:48:04<2:28:30, 3.65it/s] 91%|█████████ | 338938/371472 [4:48:04<2:26:04, 3.71it/s] 91%|█████████ | 338939/371472 [4:48:04<2:25:49, 3.72it/s] 91%|█████████ | 338940/371472 [4:48:05<2:23:24, 3.78it/s] {'loss': 2.7591, 'learning_rate': 1.7886078131396925e-07, 'epoch': 14.6} + 91%|█████████ | 338940/371472 [4:48:05<2:23:24, 3.78it/s] 91%|█████████ | 338941/371472 [4:48:05<2:26:02, 3.71it/s] 91%|█████████ | 338942/371472 [4:48:05<2:45:18, 3.28it/s] 91%|█████████ | 338943/371472 [4:48:05<2:41:41, 3.35it/s] 91%|█████████ | 338944/371472 [4:48:06<2:38:42, 3.42it/s] 91%|█████████ | 338945/371472 [4:48:06<2:48:19, 3.22it/s] 91%|█████████ | 338946/371472 [4:48:06<2:40:07, 3.39it/s] 91%|█████████ | 338947/371472 [4:48:07<2:35:50, 3.48it/s] 91%|█████████ | 338948/371472 [4:48:07<2:40:57, 3.37it/s] 91%|█████████ | 338949/371472 [4:48:07<2:38:22, 3.42it/s] 91%|█████████ | 338950/371472 [4:48:08<2:39:10, 3.41it/s] 91%|█████████ | 338951/371472 [4:48:08<2:40:18, 3.38it/s] 91%|█████████ | 338952/371472 [4:48:08<2:33:25, 3.53it/s] 91%|█████████ | 338953/371472 [4:48:08<2:40:09, 3.38it/s] 91%|█████████ | 338954/371472 [4:48:09<2:51:11, 3.17it/s] 91%|█████████ | 338955/371472 [4:48:09<2:41:58, 3.35it/s] 91%|█████████ | 338956/371472 [4:48:09<2:44:12, 3.30it/s] 91%|█████████ | 338957/371472 [4:48:10<2:44:12, 3.30it/s] 91%|█████████ | 338958/371472 [4:48:10<2:47:20, 3.24it/s] 91%|█████████ | 338959/371472 [4:48:10<2:42:48, 3.33it/s] 91%|█████████ | 338960/371472 [4:48:11<3:18:50, 2.73it/s] {'loss': 2.5345, 'learning_rate': 1.7881229933849043e-07, 'epoch': 14.6} + 91%|█████████ | 338960/371472 [4:48:11<3:18:50, 2.73it/s] 91%|█████████ | 338961/371472 [4:48:11<3:01:24, 2.99it/s] 91%|█████████ | 338962/371472 [4:48:11<2:49:15, 3.20it/s] 91%|█████████ | 338963/371472 [4:48:12<2:40:16, 3.38it/s] 91%|█████████ | 338964/371472 [4:48:12<2:33:21, 3.53it/s] 91%|█████████ | 338965/371472 [4:48:12<2:33:29, 3.53it/s] 91%|█████████ | 338966/371472 [4:48:12<2:42:02, 3.34it/s] 91%|█████████ | 338967/371472 [4:48:13<2:43:25, 3.32it/s] 91%|█████████ | 338968/371472 [4:48:13<2:34:21, 3.51it/s] 91%|█████████▏| 338969/371472 [4:48:13<2:31:34, 3.57it/s] 91%|█████████▏| 338970/371472 [4:48:14<2:31:51, 3.57it/s] 91%|█████████▏| 338971/371472 [4:48:14<2:27:25, 3.67it/s] 91%|█████████▏| 338972/371472 [4:48:14<2:29:15, 3.63it/s] 91%|█████████▏| 338973/371472 [4:48:14<2:26:06, 3.71it/s] 91%|█████████▏| 338974/371472 [4:48:15<2:38:54, 3.41it/s] 91%|█████████▏| 338975/371472 [4:48:15<2:39:24, 3.40it/s] 91%|█████████▏| 338976/371472 [4:48:15<2:37:40, 3.43it/s] 91%|█████████▏| 338977/371472 [4:48:15<2:30:46, 3.59it/s] 91%|█████████▏| 338978/371472 [4:48:16<2:23:39, 3.77it/s] 91%|█████████▏| 338979/371472 [4:48:16<2:22:59, 3.79it/s] 91%|█████████▏| 338980/371472 [4:48:16<2:37:31, 3.44it/s] {'loss': 2.7493, 'learning_rate': 1.7876381736301148e-07, 'epoch': 14.6} + 91%|█████████▏| 338980/371472 [4:48:16<2:37:31, 3.44it/s] 91%|█████████▏| 338981/371472 [4:48:17<2:34:20, 3.51it/s] 91%|█████████▏| 338982/371472 [4:48:17<2:54:28, 3.10it/s] 91%|█████████▏| 338983/371472 [4:48:17<2:58:18, 3.04it/s] 91%|█████████▏| 338984/371472 [4:48:18<2:49:56, 3.19it/s] 91%|█████████▏| 338985/371472 [4:48:18<2:42:50, 3.32it/s] 91%|█████████▏| 338986/371472 [4:48:18<2:51:46, 3.15it/s] 91%|█████████▏| 338987/371472 [4:48:19<2:42:56, 3.32it/s] 91%|█████████▏| 338988/371472 [4:48:19<2:36:10, 3.47it/s] 91%|█████████▏| 338989/371472 [4:48:19<2:32:47, 3.54it/s] 91%|█████████▏| 338990/371472 [4:48:19<2:31:06, 3.58it/s] 91%|█████████▏| 338991/371472 [4:48:20<2:33:30, 3.53it/s] 91%|█████████▏| 338992/371472 [4:48:20<2:35:47, 3.47it/s] 91%|█████████▏| 338993/371472 [4:48:20<2:34:04, 3.51it/s] 91%|█████████▏| 338994/371472 [4:48:20<2:31:10, 3.58it/s] 91%|█████████▏| 338995/371472 [4:48:21<2:32:16, 3.55it/s] 91%|█████████▏| 338996/371472 [4:48:21<2:27:44, 3.66it/s] 91%|█████████▏| 338997/371472 [4:48:21<2:26:13, 3.70it/s] 91%|█████████▏| 338998/371472 [4:48:22<2:24:40, 3.74it/s] 91%|█████████▏| 338999/371472 [4:48:22<2:22:15, 3.80it/s] 91%|█████████▏| 339000/371472 [4:48:22<2:21:00, 3.84it/s] {'loss': 2.694, 'learning_rate': 1.7871533538753263e-07, 'epoch': 14.6} + 91%|█████████▏| 339000/371472 [4:48:22<2:21:00, 3.84it/s] 91%|█████████▏| 339001/371472 [4:48:22<2:31:46, 3.57it/s] 91%|█████████▏| 339002/371472 [4:48:23<2:36:41, 3.45it/s] 91%|█████████▏| 339003/371472 [4:48:23<2:37:56, 3.43it/s] 91%|█████████▏| 339004/371472 [4:48:23<2:49:56, 3.18it/s] 91%|█████████▏| 339005/371472 [4:48:24<2:47:24, 3.23it/s] 91%|█████████▏| 339006/371472 [4:48:24<2:42:37, 3.33it/s] 91%|█████████▏| 339007/371472 [4:48:24<2:38:08, 3.42it/s] 91%|█████████▏| 339008/371472 [4:48:24<2:34:38, 3.50it/s] 91%|█████████▏| 339009/371472 [4:48:25<2:59:14, 3.02it/s] 91%|█████████▏| 339010/371472 [4:48:25<2:51:02, 3.16it/s] 91%|█████████▏| 339011/371472 [4:48:25<2:41:18, 3.35it/s] 91%|█████████▏| 339012/371472 [4:48:26<2:40:57, 3.36it/s] 91%|█████████▏| 339013/371472 [4:48:26<2:33:17, 3.53it/s] 91%|█████████▏| 339014/371472 [4:48:26<2:37:40, 3.43it/s] 91%|█████████▏| 339015/371472 [4:48:27<2:33:00, 3.54it/s] 91%|█████████▏| 339016/371472 [4:48:27<2:43:28, 3.31it/s] 91%|█████████▏| 339017/371472 [4:48:27<2:40:12, 3.38it/s] 91%|█████████▏| 339018/371472 [4:48:28<2:45:54, 3.26it/s] 91%|█████████▏| 339019/371472 [4:48:28<2:42:35, 3.33it/s] 91%|█████████▏| 339020/371472 [4:48:28<2:38:38, 3.41it/s] {'loss': 2.5914, 'learning_rate': 1.786668534120537e-07, 'epoch': 14.6} + 91%|█████████▏| 339020/371472 [4:48:28<2:38:38, 3.41it/s] 91%|█████████▏| 339021/371472 [4:48:28<2:30:23, 3.60it/s] 91%|█████████▏| 339022/371472 [4:48:29<2:25:05, 3.73it/s] 91%|█████████▏| 339023/371472 [4:48:29<2:27:01, 3.68it/s] 91%|█████████▏| 339024/371472 [4:48:29<2:22:22, 3.80it/s] 91%|█████████▏| 339025/371472 [4:48:29<2:20:14, 3.86it/s] 91%|█████████▏| 339026/371472 [4:48:30<2:20:28, 3.85it/s] 91%|█████████▏| 339027/371472 [4:48:30<2:23:27, 3.77it/s] 91%|█████████▏| 339028/371472 [4:48:30<2:23:40, 3.76it/s] 91%|█████████▏| 339029/371472 [4:48:30<2:28:32, 3.64it/s] 91%|█████████▏| 339030/371472 [4:48:31<2:29:45, 3.61it/s] 91%|█████████▏| 339031/371472 [4:48:31<2:33:56, 3.51it/s] 91%|█████████▏| 339032/371472 [4:48:31<2:32:48, 3.54it/s] 91%|█████████▏| 339033/371472 [4:48:32<2:29:36, 3.61it/s] 91%|█████████▏| 339034/371472 [4:48:32<2:35:28, 3.48it/s] 91%|█████████▏| 339035/371472 [4:48:32<2:34:45, 3.49it/s] 91%|█████████▏| 339036/371472 [4:48:32<2:34:39, 3.50it/s] 91%|█████████▏| 339037/371472 [4:48:33<2:41:52, 3.34it/s] 91%|█████████▏| 339038/371472 [4:48:33<2:38:57, 3.40it/s] 91%|█████████▏| 339039/371472 [4:48:33<2:35:28, 3.48it/s] 91%|█████████▏| 339040/371472 [4:48:34<2:45:08, 3.27it/s] {'loss': 2.5996, 'learning_rate': 1.7861837143657485e-07, 'epoch': 14.6} + 91%|█████████▏| 339040/371472 [4:48:34<2:45:08, 3.27it/s] 91%|█████████▏| 339041/371472 [4:48:34<2:41:31, 3.35it/s] 91%|█████████▏| 339042/371472 [4:48:34<2:37:48, 3.42it/s] 91%|█████████▏| 339043/371472 [4:48:35<2:36:16, 3.46it/s] 91%|█████████▏| 339044/371472 [4:48:35<2:34:47, 3.49it/s] 91%|█████████▏| 339045/371472 [4:48:35<2:38:42, 3.41it/s] 91%|█████████▏| 339046/371472 [4:48:35<2:46:02, 3.25it/s] 91%|█████████▏| 339047/371472 [4:48:36<2:37:39, 3.43it/s] 91%|█████████▏| 339048/371472 [4:48:36<2:34:49, 3.49it/s] 91%|█████████▏| 339049/371472 [4:48:36<2:34:22, 3.50it/s] 91%|█████████▏| 339050/371472 [4:48:37<2:28:22, 3.64it/s] 91%|█████████▏| 339051/371472 [4:48:37<2:36:37, 3.45it/s] 91%|█████████▏| 339052/371472 [4:48:37<2:28:49, 3.63it/s] 91%|█████████▏| 339053/371472 [4:48:37<2:28:42, 3.63it/s] 91%|█████████▏| 339054/371472 [4:48:38<2:29:33, 3.61it/s] 91%|█████████▏| 339055/371472 [4:48:38<2:27:53, 3.65it/s] 91%|█████████▏| 339056/371472 [4:48:38<2:26:15, 3.69it/s] 91%|█████████▏| 339057/371472 [4:48:38<2:22:52, 3.78it/s] 91%|█████████▏| 339058/371472 [4:48:39<2:25:44, 3.71it/s] 91%|█████████▏| 339059/371472 [4:48:39<2:31:17, 3.57it/s] 91%|█████████▏| 339060/371472 [4:48:39<2:28:22, 3.64it/s] {'loss': 2.7092, 'learning_rate': 1.785698894610959e-07, 'epoch': 14.6} + 91%|█████████▏| 339060/371472 [4:48:39<2:28:22, 3.64it/s] 91%|█████████▏| 339061/371472 [4:48:40<2:30:50, 3.58it/s] 91%|█████████▏| 339062/371472 [4:48:40<2:31:04, 3.58it/s] 91%|█████████▏| 339063/371472 [4:48:40<2:31:11, 3.57it/s] 91%|█████████▏| 339064/371472 [4:48:40<2:26:16, 3.69it/s] 91%|█████████▏| 339065/371472 [4:48:41<2:24:16, 3.74it/s] 91%|█████████▏| 339066/371472 [4:48:41<2:28:29, 3.64it/s] 91%|█████████▏| 339067/371472 [4:48:41<2:25:16, 3.72it/s] 91%|█████████▏| 339068/371472 [4:48:41<2:26:57, 3.67it/s] 91%|█████████▏| 339069/371472 [4:48:42<2:24:20, 3.74it/s] 91%|█████████▏| 339070/371472 [4:48:42<2:25:18, 3.72it/s] 91%|█████████▏| 339071/371472 [4:48:42<2:23:36, 3.76it/s] 91%|█████████▏| 339072/371472 [4:48:42<2:19:07, 3.88it/s] 91%|█████████▏| 339073/371472 [4:48:43<2:20:16, 3.85it/s] 91%|█████████▏| 339074/371472 [4:48:43<2:22:59, 3.78it/s] 91%|█████████▏| 339075/371472 [4:48:43<2:28:45, 3.63it/s] 91%|█████████▏| 339076/371472 [4:48:44<2:29:39, 3.61it/s] 91%|█████████▏| 339077/371472 [4:48:44<2:32:54, 3.53it/s] 91%|█████████▏| 339078/371472 [4:48:44<2:28:06, 3.65it/s] 91%|█████████▏| 339079/371472 [4:48:44<2:27:59, 3.65it/s] 91%|█████████▏| 339080/371472 [4:48:45<2:26:14, 3.69it/s] {'loss': 2.6678, 'learning_rate': 1.7852140748561697e-07, 'epoch': 14.6} + 91%|█████████▏| 339080/371472 [4:48:45<2:26:14, 3.69it/s] 91%|█████████▏| 339081/371472 [4:48:45<2:33:38, 3.51it/s] 91%|█████████▏| 339082/371472 [4:48:45<2:35:05, 3.48it/s] 91%|█████████▏| 339083/371472 [4:48:46<2:31:17, 3.57it/s] 91%|█████████▏| 339084/371472 [4:48:46<2:33:56, 3.51it/s] 91%|█████████▏| 339085/371472 [4:48:46<2:33:52, 3.51it/s] 91%|█████████▏| 339086/371472 [4:48:46<2:31:54, 3.55it/s] 91%|█████████▏| 339087/371472 [4:48:47<2:29:45, 3.60it/s] 91%|█████████▏| 339088/371472 [4:48:47<2:30:27, 3.59it/s] 91%|█████████▏| 339089/371472 [4:48:47<2:29:21, 3.61it/s] 91%|█████████▏| 339090/371472 [4:48:47<2:22:31, 3.79it/s] 91%|█████████▏| 339091/371472 [4:48:48<2:37:20, 3.43it/s] 91%|█████████▏| 339092/371472 [4:48:48<2:40:35, 3.36it/s] 91%|█████████▏| 339093/371472 [4:48:48<2:39:57, 3.37it/s] 91%|█████████▏| 339094/371472 [4:48:49<2:35:15, 3.48it/s] 91%|█████████▏| 339095/371472 [4:48:49<2:30:22, 3.59it/s] 91%|█████████▏| 339096/371472 [4:48:49<2:28:45, 3.63it/s] 91%|█████████▏| 339097/371472 [4:48:50<2:30:39, 3.58it/s] 91%|█████████▏| 339098/371472 [4:48:50<2:29:19, 3.61it/s] 91%|█████████▏| 339099/371472 [4:48:50<2:25:52, 3.70it/s] 91%|█████████▏| 339100/371472 [4:48:50<2:22:24, 3.79it/s] {'loss': 2.7574, 'learning_rate': 1.7847292551013812e-07, 'epoch': 14.61} + 91%|█████████▏| 339100/371472 [4:48:50<2:22:24, 3.79it/s] 91%|█████████▏| 339101/371472 [4:48:51<2:31:47, 3.55it/s] 91%|█████████▏| 339102/371472 [4:48:51<2:41:08, 3.35it/s] 91%|█████████▏| 339103/371472 [4:48:51<2:33:49, 3.51it/s] 91%|█████████▏| 339104/371472 [4:48:51<2:34:30, 3.49it/s] 91%|█████████▏| 339105/371472 [4:48:52<2:29:24, 3.61it/s] 91%|█████████▏| 339106/371472 [4:48:52<2:32:42, 3.53it/s] 91%|█████████▏| 339107/371472 [4:48:52<2:29:06, 3.62it/s] 91%|█████████▏| 339108/371472 [4:48:53<2:35:55, 3.46it/s] 91%|█████████▏| 339109/371472 [4:48:53<2:40:21, 3.36it/s] 91%|█████████▏| 339110/371472 [4:48:53<2:41:20, 3.34it/s] 91%|█████████▏| 339111/371472 [4:48:54<2:47:09, 3.23it/s] 91%|█████████▏| 339112/371472 [4:48:54<2:52:20, 3.13it/s] 91%|█████████▏| 339113/371472 [4:48:54<3:01:08, 2.98it/s] 91%|█████████▏| 339114/371472 [4:48:55<3:02:43, 2.95it/s] 91%|█████████▏| 339115/371472 [4:48:55<2:53:27, 3.11it/s] 91%|█████████▏| 339116/371472 [4:48:55<2:51:10, 3.15it/s] 91%|█████████▏| 339117/371472 [4:48:55<2:40:40, 3.36it/s] 91%|█████████▏| 339118/371472 [4:48:56<2:40:30, 3.36it/s] 91%|█████████▏| 339119/371472 [4:48:56<2:39:55, 3.37it/s] 91%|█████████▏| 339120/371472 [4:48:56<2:38:25, 3.40it/s] {'loss': 2.5218, 'learning_rate': 1.7842444353465916e-07, 'epoch': 14.61} + 91%|█████████▏| 339120/371472 [4:48:56<2:38:25, 3.40it/s] 91%|█████████▏| 339121/371472 [4:48:57<2:48:07, 3.21it/s] 91%|█████████▏| 339122/371472 [4:48:57<2:45:51, 3.25it/s] 91%|█████████▏| 339123/371472 [4:48:57<2:40:13, 3.36it/s] 91%|█████████▏| 339124/371472 [4:48:58<2:30:31, 3.58it/s] 91%|█████████▏| 339125/371472 [4:48:58<2:29:00, 3.62it/s] 91%|█████████▏| 339126/371472 [4:48:58<2:32:20, 3.54it/s] 91%|█████████▏| 339127/371472 [4:48:58<2:33:51, 3.50it/s] 91%|█████████▏| 339128/371472 [4:48:59<2:33:34, 3.51it/s] 91%|█████████▏| 339129/371472 [4:48:59<2:31:10, 3.57it/s] 91%|█████████▏| 339130/371472 [4:48:59<2:27:18, 3.66it/s] 91%|█████████▏| 339131/371472 [4:48:59<2:23:47, 3.75it/s] 91%|█████████▏| 339132/371472 [4:49:00<2:23:37, 3.75it/s] 91%|█████████▏| 339133/371472 [4:49:00<2:28:25, 3.63it/s] 91%|█████████▏| 339134/371472 [4:49:00<2:35:37, 3.46it/s] 91%|█████████▏| 339135/371472 [4:49:01<2:30:07, 3.59it/s] 91%|█████████▏| 339136/371472 [4:49:01<2:25:05, 3.71it/s] 91%|█████████▏| 339137/371472 [4:49:01<2:24:55, 3.72it/s] 91%|█████████▏| 339138/371472 [4:49:01<2:25:26, 3.71it/s] 91%|█████████▏| 339139/371472 [4:49:02<2:20:58, 3.82it/s] 91%|█████████▏| 339140/371472 [4:49:02<2:24:17, 3.73it/s] {'loss': 2.6695, 'learning_rate': 1.7837596155918034e-07, 'epoch': 14.61} + 91%|█████████▏| 339140/371472 [4:49:02<2:24:17, 3.73it/s] 91%|█████████▏| 339141/371472 [4:49:02<2:25:09, 3.71it/s] 91%|█████████▏| 339142/371472 [4:49:02<2:30:30, 3.58it/s] 91%|█████████▏| 339143/371472 [4:49:03<2:43:41, 3.29it/s] 91%|█████████▏| 339144/371472 [4:49:03<2:50:41, 3.16it/s] 91%|█████████▏| 339145/371472 [4:49:03<2:42:03, 3.32it/s] 91%|█████████▏| 339146/371472 [4:49:04<2:56:36, 3.05it/s] 91%|█████████▏| 339147/371472 [4:49:04<2:48:12, 3.20it/s] 91%|█████████▏| 339148/371472 [4:49:04<2:41:57, 3.33it/s] 91%|█████████▏| 339149/371472 [4:49:05<2:34:32, 3.49it/s] 91%|█████████▏| 339150/371472 [4:49:05<2:31:56, 3.55it/s] 91%|█████████▏| 339151/371472 [4:49:05<2:29:29, 3.60it/s] 91%|█████████▏| 339152/371472 [4:49:05<2:27:54, 3.64it/s] 91%|█████████▏| 339153/371472 [4:49:06<2:30:34, 3.58it/s] 91%|█████████▏| 339154/371472 [4:49:06<2:33:12, 3.52it/s] 91%|█████████▏| 339155/371472 [4:49:06<2:29:29, 3.60it/s] 91%|█████████▏| 339156/371472 [4:49:07<2:26:45, 3.67it/s] 91%|█████████▏| 339157/371472 [4:49:07<2:32:21, 3.53it/s] 91%|█████████▏| 339158/371472 [4:49:07<2:32:37, 3.53it/s] 91%|█████████▏| 339159/371472 [4:49:07<2:35:42, 3.46it/s] 91%|█████████▏| 339160/371472 [4:49:08<2:36:20, 3.44it/s] {'loss': 2.4204, 'learning_rate': 1.7832747958370139e-07, 'epoch': 14.61} + 91%|█████████▏| 339160/371472 [4:49:08<2:36:20, 3.44it/s] 91%|█████████▏| 339161/371472 [4:49:08<2:34:34, 3.48it/s] 91%|█████████▏| 339162/371472 [4:49:08<2:31:39, 3.55it/s] 91%|█████████▏| 339163/371472 [4:49:09<2:27:34, 3.65it/s] 91%|█████████▏| 339164/371472 [4:49:09<2:34:16, 3.49it/s] 91%|█████████▏| 339165/371472 [4:49:09<2:31:03, 3.56it/s] 91%|█████████▏| 339166/371472 [4:49:09<2:32:54, 3.52it/s] 91%|█████████▏| 339167/371472 [4:49:10<2:28:44, 3.62it/s] 91%|█████████▏| 339168/371472 [4:49:10<2:26:49, 3.67it/s] 91%|█████████▏| 339169/371472 [4:49:10<2:22:01, 3.79it/s] 91%|█████████▏| 339170/371472 [4:49:10<2:28:32, 3.62it/s] 91%|█████████▏| 339171/371472 [4:49:11<2:46:17, 3.24it/s] 91%|█████████▏| 339172/371472 [4:49:11<2:39:58, 3.37it/s] 91%|█████████▏| 339173/371472 [4:49:11<2:43:49, 3.29it/s] 91%|█████████▏| 339174/371472 [4:49:12<2:41:40, 3.33it/s] 91%|█████████▏| 339175/371472 [4:49:12<2:44:27, 3.27it/s] 91%|█████████▏| 339176/371472 [4:49:12<2:48:31, 3.19it/s] 91%|█████████▏| 339177/371472 [4:49:13<2:42:49, 3.31it/s] 91%|█████████▏| 339178/371472 [4:49:13<2:39:52, 3.37it/s] 91%|█████████▏| 339179/371472 [4:49:13<2:39:38, 3.37it/s] 91%|█████████▏| 339180/371472 [4:49:14<2:39:43, 3.37it/s] {'loss': 2.5512, 'learning_rate': 1.7827899760822254e-07, 'epoch': 14.61} + 91%|█████████▏| 339180/371472 [4:49:14<2:39:43, 3.37it/s] 91%|█████████▏| 339181/371472 [4:49:14<2:33:17, 3.51it/s] 91%|█████████▏| 339182/371472 [4:49:14<2:37:50, 3.41it/s] 91%|█████████▏| 339183/371472 [4:49:14<2:46:35, 3.23it/s] 91%|█████████▏| 339184/371472 [4:49:15<2:43:22, 3.29it/s] 91%|█████████▏| 339185/371472 [4:49:15<2:35:58, 3.45it/s] 91%|█████████▏| 339186/371472 [4:49:15<2:37:40, 3.41it/s] 91%|█████████▏| 339187/371472 [4:49:16<2:37:29, 3.42it/s] 91%|█████████▏| 339188/371472 [4:49:16<2:38:50, 3.39it/s] 91%|█████████▏| 339189/371472 [4:49:16<2:36:25, 3.44it/s] 91%|█████████▏| 339190/371472 [4:49:16<2:30:03, 3.59it/s] 91%|█████████▏| 339191/371472 [4:49:17<2:36:26, 3.44it/s] 91%|█████████▏| 339192/371472 [4:49:17<2:41:54, 3.32it/s] 91%|█████████▏| 339193/371472 [4:49:17<2:52:45, 3.11it/s] 91%|█████████▏| 339194/371472 [4:49:18<2:46:24, 3.23it/s] 91%|█████████▏| 339195/371472 [4:49:18<2:48:11, 3.20it/s] 91%|█████████▏| 339196/371472 [4:49:18<2:46:15, 3.24it/s] 91%|█████████▏| 339197/371472 [4:49:19<2:40:34, 3.35it/s] 91%|█████████▏| 339198/371472 [4:49:19<2:34:08, 3.49it/s] 91%|█████████▏| 339199/371472 [4:49:19<2:33:10, 3.51it/s] 91%|█████████▏| 339200/371472 [4:49:19<2:29:13, 3.60it/s] {'loss': 2.6443, 'learning_rate': 1.782305156327436e-07, 'epoch': 14.61} + 91%|█████████▏| 339200/371472 [4:49:19<2:29:13, 3.60it/s] 91%|█████████▏| 339201/371472 [4:49:20<2:34:10, 3.49it/s] 91%|█████████▏| 339202/371472 [4:49:20<2:33:40, 3.50it/s] 91%|█████████▏| 339203/371472 [4:49:20<2:45:18, 3.25it/s] 91%|█████████▏| 339204/371472 [4:49:21<2:37:45, 3.41it/s] 91%|█████████▏| 339205/371472 [4:49:21<2:47:34, 3.21it/s] 91%|█████████▏| 339206/371472 [4:49:21<2:45:27, 3.25it/s] 91%|█████████▏| 339207/371472 [4:49:22<2:44:31, 3.27it/s] 91%|█████████▏| 339208/371472 [4:49:22<2:37:19, 3.42it/s] 91%|█████████▏| 339209/371472 [4:49:22<2:29:03, 3.61it/s] 91%|█████████▏| 339210/371472 [4:49:22<2:27:12, 3.65it/s] 91%|█████████▏| 339211/371472 [4:49:23<2:26:36, 3.67it/s] 91%|█████████▏| 339212/371472 [4:49:23<2:30:37, 3.57it/s] 91%|█████████▏| 339213/371472 [4:49:23<2:47:37, 3.21it/s] 91%|█████████▏| 339214/371472 [4:49:24<2:45:57, 3.24it/s] 91%|█████████▏| 339215/371472 [4:49:24<2:41:15, 3.33it/s] 91%|█████████▏| 339216/371472 [4:49:24<2:37:26, 3.41it/s] 91%|█████████▏| 339217/371472 [4:49:24<2:38:28, 3.39it/s] 91%|█████████▏| 339218/371472 [4:49:25<2:38:05, 3.40it/s] 91%|█████████▏| 339219/371472 [4:49:25<2:40:24, 3.35it/s] 91%|█████████▏| 339220/371472 [4:49:25<2:39:33, 3.37it/s] {'loss': 2.6351, 'learning_rate': 1.7818203365726476e-07, 'epoch': 14.61} + 91%|█████████▏| 339220/371472 [4:49:25<2:39:33, 3.37it/s] 91%|█████████▏| 339221/371472 [4:49:26<2:35:55, 3.45it/s] 91%|█████████▏| 339222/371472 [4:49:26<2:43:39, 3.28it/s] 91%|█████████▏| 339223/371472 [4:49:26<2:45:39, 3.24it/s] 91%|█████████▏| 339224/371472 [4:49:27<2:46:14, 3.23it/s] 91%|█████████▏| 339225/371472 [4:49:27<2:47:05, 3.22it/s] 91%|█████████▏| 339226/371472 [4:49:27<2:50:10, 3.16it/s] 91%|█████████▏| 339227/371472 [4:49:28<2:41:28, 3.33it/s] 91%|█████████▏| 339228/371472 [4:49:28<2:40:49, 3.34it/s] 91%|█████████▏| 339229/371472 [4:49:28<2:41:04, 3.34it/s] 91%|█████████▏| 339230/371472 [4:49:28<2:38:45, 3.38it/s] 91%|█████████▏| 339231/371472 [4:49:29<2:34:52, 3.47it/s] 91%|█████████▏| 339232/371472 [4:49:29<2:32:48, 3.52it/s] 91%|█████████▏| 339233/371472 [4:49:29<2:30:04, 3.58it/s] 91%|█████████▏| 339234/371472 [4:49:30<2:37:18, 3.42it/s] 91%|█████████▏| 339235/371472 [4:49:30<2:37:18, 3.42it/s] 91%|█████████▏| 339236/371472 [4:49:30<2:34:38, 3.47it/s] 91%|█████████▏| 339237/371472 [4:49:30<2:38:11, 3.40it/s] 91%|█████████▏| 339238/371472 [4:49:31<2:31:46, 3.54it/s] 91%|█████████▏| 339239/371472 [4:49:31<2:30:52, 3.56it/s] 91%|█████████▏| 339240/371472 [4:49:31<2:25:29, 3.69it/s] {'loss': 2.6114, 'learning_rate': 1.781335516817858e-07, 'epoch': 14.61} + 91%|█████████▏| 339240/371472 [4:49:31<2:25:29, 3.69it/s] 91%|█████████▏| 339241/371472 [4:49:31<2:24:40, 3.71it/s] 91%|█████████▏| 339242/371472 [4:49:32<2:27:04, 3.65it/s] 91%|█████████▏| 339243/371472 [4:49:32<2:24:36, 3.71it/s] 91%|█████████▏| 339244/371472 [4:49:32<2:21:26, 3.80it/s] 91%|█████████▏| 339245/371472 [4:49:33<2:27:09, 3.65it/s] 91%|█████████▏| 339246/371472 [4:49:33<2:27:40, 3.64it/s] 91%|█████████▏| 339247/371472 [4:49:33<2:28:51, 3.61it/s] 91%|█████████▏| 339248/371472 [4:49:33<2:24:49, 3.71it/s] 91%|█████████▏| 339249/371472 [4:49:34<2:33:17, 3.50it/s] 91%|█████████▏| 339250/371472 [4:49:34<2:28:55, 3.61it/s] 91%|█████████▏| 339251/371472 [4:49:34<2:32:02, 3.53it/s] 91%|█████████▏| 339252/371472 [4:49:35<2:29:41, 3.59it/s] 91%|█████████▏| 339253/371472 [4:49:35<2:35:50, 3.45it/s] 91%|█████████▏| 339254/371472 [4:49:35<2:31:36, 3.54it/s] 91%|█████████▏| 339255/371472 [4:49:35<2:30:08, 3.58it/s] 91%|█████████▏| 339256/371472 [4:49:36<2:27:22, 3.64it/s] 91%|█████████▏| 339257/371472 [4:49:36<2:31:05, 3.55it/s] 91%|█████████▏| 339258/371472 [4:49:36<2:31:23, 3.55it/s] 91%|█████████▏| 339259/371472 [4:49:37<2:34:31, 3.47it/s] 91%|█████████▏| 339260/371472 [4:49:37<2:32:43, 3.52it/s] {'loss': 2.749, 'learning_rate': 1.7808506970630698e-07, 'epoch': 14.61} + 91%|█████████▏| 339260/371472 [4:49:37<2:32:43, 3.52it/s] 91%|█████████▏| 339261/371472 [4:49:37<2:33:45, 3.49it/s] 91%|█████████▏| 339262/371472 [4:49:37<2:28:51, 3.61it/s] 91%|█████████▏| 339263/371472 [4:49:38<2:33:13, 3.50it/s] 91%|█████████▏| 339264/371472 [4:49:38<2:28:48, 3.61it/s] 91%|█████████▏| 339265/371472 [4:49:38<2:27:02, 3.65it/s] 91%|█████████▏| 339266/371472 [4:49:38<2:27:55, 3.63it/s] 91%|█████████▏| 339267/371472 [4:49:39<2:24:39, 3.71it/s] 91%|█████████▏| 339268/371472 [4:49:39<2:24:23, 3.72it/s] 91%|█████████▏| 339269/371472 [4:49:39<2:25:03, 3.70it/s] 91%|█████████▏| 339270/371472 [4:49:40<2:29:56, 3.58it/s] 91%|█████████▏| 339271/371472 [4:49:40<2:26:11, 3.67it/s] 91%|█████████▏| 339272/371472 [4:49:40<2:31:42, 3.54it/s] 91%|█████████▏| 339273/371472 [4:49:40<2:44:15, 3.27it/s] 91%|█████████▏| 339274/371472 [4:49:41<2:35:58, 3.44it/s] 91%|█████████▏| 339275/371472 [4:49:41<2:29:22, 3.59it/s] 91%|█████████▏| 339276/371472 [4:49:41<2:36:48, 3.42it/s] 91%|█████████▏| 339277/371472 [4:49:42<2:50:18, 3.15it/s] 91%|█████████▏| 339278/371472 [4:49:42<2:42:24, 3.30it/s] 91%|█████████▏| 339279/371472 [4:49:42<2:40:14, 3.35it/s] 91%|█████████▏| 339280/371472 [4:49:43<2:37:37, 3.40it/s] {'loss': 2.5861, 'learning_rate': 1.7803658773082805e-07, 'epoch': 14.61} + 91%|█████████▏| 339280/371472 [4:49:43<2:37:37, 3.40it/s] 91%|█████████▏| 339281/371472 [4:49:43<2:51:01, 3.14it/s] 91%|█████████▏| 339282/371472 [4:49:43<2:43:09, 3.29it/s] 91%|█████████▏| 339283/371472 [4:49:43<2:37:50, 3.40it/s] 91%|█████████▏| 339284/371472 [4:49:44<2:38:16, 3.39it/s] 91%|█████████▏| 339285/371472 [4:49:44<2:35:28, 3.45it/s] 91%|█████████▏| 339286/371472 [4:49:44<2:29:46, 3.58it/s] 91%|█████████▏| 339287/371472 [4:49:45<2:30:47, 3.56it/s] 91%|█████████▏| 339288/371472 [4:49:45<2:28:52, 3.60it/s] 91%|█████████▏| 339289/371472 [4:49:45<2:23:07, 3.75it/s] 91%|█████████▏| 339290/371472 [4:49:45<2:22:16, 3.77it/s] 91%|█████████▏| 339291/371472 [4:49:46<2:20:23, 3.82it/s] 91%|█████████▏| 339292/371472 [4:49:46<2:25:06, 3.70it/s] 91%|█████████▏| 339293/371472 [4:49:46<2:25:22, 3.69it/s] 91%|█████████▏| 339294/371472 [4:49:46<2:24:29, 3.71it/s] 91%|█████████▏| 339295/371472 [4:49:47<2:22:24, 3.77it/s] 91%|█████████▏| 339296/371472 [4:49:47<2:21:09, 3.80it/s] 91%|█████████▏| 339297/371472 [4:49:47<2:43:52, 3.27it/s] 91%|█████████▏| 339298/371472 [4:49:48<2:37:47, 3.40it/s] 91%|█████████▏| 339299/371472 [4:49:48<2:29:50, 3.58it/s] 91%|█████████▏| 339300/371472 [4:49:48<2:30:25, 3.56it/s] {'loss': 2.5905, 'learning_rate': 1.7798810575534918e-07, 'epoch': 14.61} + 91%|█████████▏| 339300/371472 [4:49:48<2:30:25, 3.56it/s] 91%|█████████▏| 339301/371472 [4:49:48<2:25:24, 3.69it/s] 91%|█████████▏| 339302/371472 [4:49:49<2:24:30, 3.71it/s] 91%|█████████▏| 339303/371472 [4:49:49<2:23:25, 3.74it/s] 91%|█████████▏| 339304/371472 [4:49:49<2:22:28, 3.76it/s] 91%|█████████▏| 339305/371472 [4:49:50<2:39:31, 3.36it/s] 91%|█████████▏| 339306/371472 [4:49:50<2:40:10, 3.35it/s] 91%|█████████▏| 339307/371472 [4:49:50<2:37:48, 3.40it/s] 91%|█████████▏| 339308/371472 [4:49:50<2:35:47, 3.44it/s] 91%|█████████▏| 339309/371472 [4:49:51<2:30:46, 3.56it/s] 91%|█████████▏| 339310/371472 [4:49:51<2:32:40, 3.51it/s] 91%|█████████▏| 339311/371472 [4:49:51<2:26:56, 3.65it/s] 91%|█████████▏| 339312/371472 [4:49:51<2:22:25, 3.76it/s] 91%|█████████▏| 339313/371472 [4:49:52<2:30:14, 3.57it/s] 91%|█████████▏| 339314/371472 [4:49:52<2:26:45, 3.65it/s] 91%|█████████▏| 339315/371472 [4:49:52<2:27:05, 3.64it/s] 91%|█████████▏| 339316/371472 [4:49:53<2:26:04, 3.67it/s] 91%|█████████▏| 339317/371472 [4:49:53<2:30:46, 3.55it/s] 91%|█████████▏| 339318/371472 [4:49:53<2:27:57, 3.62it/s] 91%|█████████▏| 339319/371472 [4:49:53<2:22:55, 3.75it/s] 91%|█████████▏| 339320/371472 [4:49:54<2:21:36, 3.78it/s] {'loss': 2.454, 'learning_rate': 1.7793962377987022e-07, 'epoch': 14.62} + 91%|█████████▏| 339320/371472 [4:49:54<2:21:36, 3.78it/s] 91%|█████████▏| 339321/371472 [4:49:54<2:21:49, 3.78it/s] 91%|█████████▏| 339322/371472 [4:49:54<2:33:11, 3.50it/s] 91%|█████████▏| 339323/371472 [4:49:55<2:35:37, 3.44it/s] 91%|█████████▏| 339324/371472 [4:49:55<2:27:42, 3.63it/s] 91%|█████████▏| 339325/371472 [4:49:55<2:28:26, 3.61it/s] 91%|█████████▏| 339326/371472 [4:49:55<2:30:21, 3.56it/s] 91%|█████████▏| 339327/371472 [4:49:56<2:29:18, 3.59it/s] 91%|█████████▏| 339328/371472 [4:49:56<2:27:33, 3.63it/s] 91%|█████████▏| 339329/371472 [4:49:56<2:28:33, 3.61it/s] 91%|█████████▏| 339330/371472 [4:49:57<2:38:49, 3.37it/s] 91%|█████████▏| 339331/371472 [4:49:57<2:36:36, 3.42it/s] 91%|█████████▏| 339332/371472 [4:49:57<2:33:22, 3.49it/s] 91%|█████████▏| 339333/371472 [4:49:57<2:28:32, 3.61it/s] 91%|█████████▏| 339334/371472 [4:49:58<2:30:09, 3.57it/s] 91%|█████████▏| 339335/371472 [4:49:58<2:38:40, 3.38it/s] 91%|█████████▏| 339336/371472 [4:49:58<2:45:54, 3.23it/s] 91%|█████████▏| 339337/371472 [4:49:59<2:40:44, 3.33it/s] 91%|█████████▏| 339338/371472 [4:49:59<2:33:59, 3.48it/s] 91%|█████████▏| 339339/371472 [4:49:59<2:34:13, 3.47it/s] 91%|█████████▏| 339340/371472 [4:49:59<2:30:06, 3.57it/s] {'loss': 2.6315, 'learning_rate': 1.7789114180439143e-07, 'epoch': 14.62} + 91%|█████████▏| 339340/371472 [4:49:59<2:30:06, 3.57it/s] 91%|█████████▏| 339341/371472 [4:50:00<2:32:18, 3.52it/s] 91%|█████████▏| 339342/371472 [4:50:00<2:33:52, 3.48it/s] 91%|█████████▏| 339343/371472 [4:50:00<2:35:24, 3.45it/s] 91%|█████████▏| 339344/371472 [4:50:01<2:30:07, 3.57it/s] 91%|█████████▏| 339345/371472 [4:50:01<2:43:09, 3.28it/s] 91%|█████████▏| 339346/371472 [4:50:01<2:45:19, 3.24it/s] 91%|█████████▏| 339347/371472 [4:50:02<2:46:41, 3.21it/s] 91%|█████████▏| 339348/371472 [4:50:02<2:39:27, 3.36it/s] 91%|█████████▏| 339349/371472 [4:50:02<2:42:25, 3.30it/s] 91%|█████████▏| 339350/371472 [4:50:02<2:37:32, 3.40it/s] 91%|█████████▏| 339351/371472 [4:50:03<2:33:35, 3.49it/s] 91%|█████████▏| 339352/371472 [4:50:03<2:33:52, 3.48it/s] 91%|█████████▏| 339353/371472 [4:50:03<2:33:37, 3.48it/s] 91%|█████████▏| 339354/371472 [4:50:03<2:28:51, 3.60it/s] 91%|█████████▏| 339355/371472 [4:50:04<2:22:08, 3.77it/s] 91%|█████████▏| 339356/371472 [4:50:04<2:24:56, 3.69it/s] 91%|█████████▏| 339357/371472 [4:50:04<2:38:09, 3.38it/s] 91%|█████████▏| 339358/371472 [4:50:05<2:42:12, 3.30it/s] 91%|█████████▏| 339359/371472 [4:50:05<2:38:20, 3.38it/s] 91%|█████████▏| 339360/371472 [4:50:05<2:30:13, 3.56it/s] {'loss': 2.6015, 'learning_rate': 1.7784265982891247e-07, 'epoch': 14.62} + 91%|█████████▏| 339360/371472 [4:50:05<2:30:13, 3.56it/s] 91%|█████████▏| 339361/371472 [4:50:05<2:29:39, 3.58it/s] 91%|█████████▏| 339362/371472 [4:50:06<2:32:26, 3.51it/s] 91%|█████████▏| 339363/371472 [4:50:06<2:32:25, 3.51it/s] 91%|█████████▏| 339364/371472 [4:50:06<2:25:57, 3.67it/s] 91%|█████████▏| 339365/371472 [4:50:07<2:36:00, 3.43it/s] 91%|█████████▏| 339366/371472 [4:50:07<2:43:52, 3.27it/s] 91%|█████████▏| 339367/371472 [4:50:07<2:38:28, 3.38it/s] 91%|█████████▏| 339368/371472 [4:50:08<2:38:47, 3.37it/s] 91%|█████████▏| 339369/371472 [4:50:08<2:35:04, 3.45it/s] 91%|█████████▏| 339370/371472 [4:50:08<2:35:52, 3.43it/s] 91%|█████████▏| 339371/371472 [4:50:08<2:30:11, 3.56it/s] 91%|█████████▏| 339372/371472 [4:50:09<2:42:01, 3.30it/s] 91%|█████████▏| 339373/371472 [4:50:09<2:39:04, 3.36it/s] 91%|█████████▏| 339374/371472 [4:50:09<2:31:32, 3.53it/s] 91%|█████████▏| 339375/371472 [4:50:10<2:31:01, 3.54it/s] 91%|█████████▏| 339376/371472 [4:50:10<2:29:17, 3.58it/s] 91%|█████████▏| 339377/371472 [4:50:10<2:33:56, 3.47it/s] 91%|█████████▏| 339378/371472 [4:50:10<2:39:40, 3.35it/s] 91%|█████████▏| 339379/371472 [4:50:11<2:35:29, 3.44it/s] 91%|█████████▏| 339380/371472 [4:50:11<2:31:11, 3.54it/s] {'loss': 2.5794, 'learning_rate': 1.7779417785343362e-07, 'epoch': 14.62} + 91%|█████████▏| 339380/371472 [4:50:11<2:31:11, 3.54it/s] 91%|█████████▏| 339381/371472 [4:50:11<2:32:03, 3.52it/s] 91%|█████████▏| 339382/371472 [4:50:12<2:28:25, 3.60it/s] 91%|█████████▏| 339383/371472 [4:50:12<2:31:56, 3.52it/s] 91%|█████████▏| 339384/371472 [4:50:12<2:27:26, 3.63it/s] 91%|█████████▏| 339385/371472 [4:50:12<2:28:25, 3.60it/s] 91%|█████████▏| 339386/371472 [4:50:13<2:24:12, 3.71it/s] 91%|█████████▏| 339387/371472 [4:50:13<2:26:51, 3.64it/s] 91%|█████████▏| 339388/371472 [4:50:13<2:27:00, 3.64it/s] 91%|█████████▏| 339389/371472 [4:50:13<2:29:07, 3.59it/s] 91%|█████████▏| 339390/371472 [4:50:14<2:33:15, 3.49it/s] 91%|█████████▏| 339391/371472 [4:50:14<2:32:17, 3.51it/s] 91%|█████████▏| 339392/371472 [4:50:14<2:43:14, 3.28it/s] 91%|█████████▏| 339393/371472 [4:50:15<2:49:18, 3.16it/s] 91%|█████████▏| 339394/371472 [4:50:15<2:47:24, 3.19it/s] 91%|█████████▏| 339395/371472 [4:50:15<2:52:57, 3.09it/s] 91%|█████████▏| 339396/371472 [4:50:16<2:41:41, 3.31it/s] 91%|█████████▏| 339397/371472 [4:50:16<2:39:17, 3.36it/s] 91%|█████████▏| 339398/371472 [4:50:16<2:35:19, 3.44it/s] 91%|█████████▏| 339399/371472 [4:50:17<2:41:50, 3.30it/s] 91%|█████████▏| 339400/371472 [4:50:17<2:41:32, 3.31it/s] {'loss': 2.5939, 'learning_rate': 1.777456958779547e-07, 'epoch': 14.62} + 91%|█████████▏| 339400/371472 [4:50:17<2:41:32, 3.31it/s] 91%|█████████▏| 339401/371472 [4:50:17<2:49:47, 3.15it/s] 91%|█████████▏| 339402/371472 [4:50:18<3:21:50, 2.65it/s] 91%|█████████▏| 339403/371472 [4:50:18<3:00:46, 2.96it/s] 91%|█████████▏| 339404/371472 [4:50:18<2:49:15, 3.16it/s] 91%|█████████▏| 339405/371472 [4:50:19<2:51:11, 3.12it/s] 91%|█████████▏| 339406/371472 [4:50:19<2:39:46, 3.34it/s] 91%|█████████▏| 339407/371472 [4:50:19<2:35:55, 3.43it/s] 91%|█████████▏| 339408/371472 [4:50:19<2:34:43, 3.45it/s] 91%|█████████▏| 339409/371472 [4:50:20<2:30:59, 3.54it/s] 91%|█████████▏| 339410/371472 [4:50:20<2:33:07, 3.49it/s] 91%|█████████▏| 339411/371472 [4:50:20<2:41:47, 3.30it/s] 91%|█████████▏| 339412/371472 [4:50:21<2:34:35, 3.46it/s] 91%|█████████▏| 339413/371472 [4:50:21<2:46:30, 3.21it/s] 91%|█████████▏| 339414/371472 [4:50:21<2:39:14, 3.36it/s] 91%|█████████▏| 339415/371472 [4:50:21<2:36:44, 3.41it/s] 91%|█████████▏| 339416/371472 [4:50:22<2:30:17, 3.55it/s] 91%|█████████▏| 339417/371472 [4:50:22<2:50:50, 3.13it/s] 91%|█████████▏| 339418/371472 [4:50:22<2:49:44, 3.15it/s] 91%|█████████▏| 339419/371472 [4:50:23<2:53:26, 3.08it/s] 91%|█████████▏| 339420/371472 [4:50:23<2:52:18, 3.10it/s] {'loss': 2.6136, 'learning_rate': 1.7769721390247584e-07, 'epoch': 14.62} + 91%|█████████▏| 339420/371472 [4:50:23<2:52:18, 3.10it/s] 91%|█████████▏| 339421/371472 [4:50:23<2:56:39, 3.02it/s] 91%|█████████▏| 339422/371472 [4:50:24<2:44:04, 3.26it/s] 91%|█████████▏| 339423/371472 [4:50:24<2:54:44, 3.06it/s] 91%|█████████▏| 339424/371472 [4:50:24<2:42:02, 3.30it/s] 91%|█████████▏| 339425/371472 [4:50:25<2:32:05, 3.51it/s] 91%|█████████▏| 339426/371472 [4:50:25<2:28:43, 3.59it/s] 91%|█████████▏| 339427/371472 [4:50:25<2:37:07, 3.40it/s] 91%|█████████▏| 339428/371472 [4:50:25<2:31:28, 3.53it/s] 91%|█████████▏| 339429/371472 [4:50:26<2:30:46, 3.54it/s] 91%|█████████▏| 339430/371472 [4:50:26<2:34:38, 3.45it/s] 91%|█████████▏| 339431/371472 [4:50:26<2:32:52, 3.49it/s] 91%|█████████▏| 339432/371472 [4:50:27<2:29:42, 3.57it/s] 91%|█████████▏| 339433/371472 [4:50:27<2:30:00, 3.56it/s] 91%|█████████▏| 339434/371472 [4:50:27<2:27:01, 3.63it/s] 91%|█████████▏| 339435/371472 [4:50:27<2:39:32, 3.35it/s] 91%|█████████▏| 339436/371472 [4:50:28<2:38:46, 3.36it/s] 91%|█████████▏| 339437/371472 [4:50:28<2:37:50, 3.38it/s] 91%|█████████▏| 339438/371472 [4:50:28<2:44:40, 3.24it/s] 91%|█████████▏| 339439/371472 [4:50:29<2:38:41, 3.36it/s] 91%|█████████▏| 339440/371472 [4:50:29<2:33:29, 3.48it/s] {'loss': 2.6159, 'learning_rate': 1.776487319269969e-07, 'epoch': 14.62} + 91%|█████████▏| 339440/371472 [4:50:29<2:33:29, 3.48it/s] 91%|█████████▏| 339441/371472 [4:50:29<2:36:56, 3.40it/s] 91%|█████████▏| 339442/371472 [4:50:29<2:33:35, 3.48it/s] 91%|█████████▏| 339443/371472 [4:50:30<2:31:32, 3.52it/s] 91%|█████████▏| 339444/371472 [4:50:30<2:30:04, 3.56it/s] 91%|█████████▏| 339445/371472 [4:50:30<2:36:04, 3.42it/s] 91%|█████████▏| 339446/371472 [4:50:31<2:39:09, 3.35it/s] 91%|█████████▏| 339447/371472 [4:50:31<2:34:55, 3.45it/s] 91%|█████████▏| 339448/371472 [4:50:31<2:39:30, 3.35it/s] 91%|█████████▏| 339449/371472 [4:50:32<2:38:08, 3.37it/s] 91%|█████████▏| 339450/371472 [4:50:32<2:39:20, 3.35it/s] 91%|█████████▏| 339451/371472 [4:50:32<2:56:50, 3.02it/s] 91%|█████████▏| 339452/371472 [4:50:32<2:45:31, 3.22it/s] 91%|█████████▏| 339453/371472 [4:50:33<2:44:12, 3.25it/s] 91%|█████████▏| 339454/371472 [4:50:33<2:36:23, 3.41it/s] 91%|█████████▏| 339455/371472 [4:50:33<2:34:17, 3.46it/s] 91%|█████████▏| 339456/371472 [4:50:34<2:27:55, 3.61it/s] 91%|█████████▏| 339457/371472 [4:50:34<2:33:20, 3.48it/s] 91%|█████████▏| 339458/371472 [4:50:34<2:29:17, 3.57it/s] 91%|█████████▏| 339459/371472 [4:50:34<2:26:32, 3.64it/s] 91%|█████████▏| 339460/371472 [4:50:35<2:25:34, 3.67it/s] {'loss': 2.6264, 'learning_rate': 1.7760024995151807e-07, 'epoch': 14.62} + 91%|█████████▏| 339460/371472 [4:50:35<2:25:34, 3.67it/s] 91%|█████████▏| 339461/371472 [4:50:35<2:28:39, 3.59it/s] 91%|█████████▏| 339462/371472 [4:50:35<2:26:09, 3.65it/s] 91%|█████████▏| 339463/371472 [4:50:35<2:23:44, 3.71it/s] 91%|█████████▏| 339464/371472 [4:50:36<2:23:32, 3.72it/s] 91%|█████████▏| 339465/371472 [4:50:36<2:29:59, 3.56it/s] 91%|█████████▏| 339466/371472 [4:50:36<2:38:10, 3.37it/s] 91%|█████████▏| 339467/371472 [4:50:37<2:39:40, 3.34it/s] 91%|█████████▏| 339468/371472 [4:50:37<2:32:40, 3.49it/s] 91%|█████████▏| 339469/371472 [4:50:37<2:29:35, 3.57it/s] 91%|█████████▏| 339470/371472 [4:50:37<2:23:12, 3.72it/s] 91%|█████████▏| 339471/371472 [4:50:38<2:21:22, 3.77it/s] 91%|█████████▏| 339472/371472 [4:50:38<2:26:40, 3.64it/s] 91%|█████████▏| 339473/371472 [4:50:38<2:20:21, 3.80it/s] 91%|█████████▏| 339474/371472 [4:50:39<2:28:23, 3.59it/s] 91%|█████████▏| 339475/371472 [4:50:39<2:31:22, 3.52it/s] 91%|█████████▏| 339476/371472 [4:50:39<2:29:05, 3.58it/s] 91%|█████████▏| 339477/371472 [4:50:39<2:28:10, 3.60it/s] 91%|█████████▏| 339478/371472 [4:50:40<2:28:11, 3.60it/s] 91%|█████████▏| 339479/371472 [4:50:40<2:23:43, 3.71it/s] 91%|█████████▏| 339480/371472 [4:50:40<2:24:43, 3.68it/s] {'loss': 2.4667, 'learning_rate': 1.775517679760391e-07, 'epoch': 14.62} + 91%|█████████▏| 339480/371472 [4:50:40<2:24:43, 3.68it/s] 91%|█████████▏| 339481/371472 [4:50:40<2:23:03, 3.73it/s] 91%|█████████▏| 339482/371472 [4:50:41<2:22:22, 3.74it/s] 91%|█████████▏| 339483/371472 [4:50:41<2:35:06, 3.44it/s] 91%|█████████▏| 339484/371472 [4:50:41<2:29:49, 3.56it/s] 91%|█████████▏| 339485/371472 [4:50:42<2:24:43, 3.68it/s] 91%|█████████▏| 339486/371472 [4:50:42<2:27:41, 3.61it/s] 91%|█████████▏| 339487/371472 [4:50:42<2:33:15, 3.48it/s] 91%|█████████▏| 339488/371472 [4:50:42<2:26:43, 3.63it/s] 91%|█████████▏| 339489/371472 [4:50:43<2:20:40, 3.79it/s] 91%|█████████▏| 339490/371472 [4:50:43<2:16:30, 3.90it/s] 91%|█████████▏| 339491/371472 [4:50:43<2:19:08, 3.83it/s] 91%|█████████▏| 339492/371472 [4:50:43<2:22:20, 3.74it/s] 91%|█████████▏| 339493/371472 [4:50:44<2:28:49, 3.58it/s] 91%|█████████▏| 339494/371472 [4:50:44<2:25:12, 3.67it/s] 91%|█████████▏| 339495/371472 [4:50:44<2:27:04, 3.62it/s] 91%|█████████▏| 339496/371472 [4:50:45<2:23:58, 3.70it/s] 91%|█████████▏| 339497/371472 [4:50:45<2:26:24, 3.64it/s] 91%|█████████▏| 339498/371472 [4:50:45<2:27:39, 3.61it/s] 91%|█████████▏| 339499/371472 [4:50:46<2:39:18, 3.35it/s] 91%|█████████▏| 339500/371472 [4:50:46<2:40:00, 3.33it/s] {'loss': 2.6404, 'learning_rate': 1.775032860005603e-07, 'epoch': 14.62} + 91%|█████████▏| 339500/371472 [4:50:46<2:40:00, 3.33it/s] 91%|█████████▏| 339501/371472 [4:50:46<2:36:14, 3.41it/s] 91%|█████████▏| 339502/371472 [4:50:46<2:39:40, 3.34it/s] 91%|█████████▏| 339503/371472 [4:50:47<2:36:56, 3.40it/s] 91%|█████████▏| 339504/371472 [4:50:47<2:29:47, 3.56it/s] 91%|█████████▏| 339505/371472 [4:50:47<2:26:43, 3.63it/s] 91%|█████████▏| 339506/371472 [4:50:48<2:37:03, 3.39it/s] 91%|█████████▏| 339507/371472 [4:50:48<2:37:59, 3.37it/s] 91%|█████████▏| 339508/371472 [4:50:48<2:31:42, 3.51it/s] 91%|█████████▏| 339509/371472 [4:50:48<2:27:32, 3.61it/s] 91%|█████████▏| 339510/371472 [4:50:49<2:30:25, 3.54it/s] 91%|█████████▏| 339511/371472 [4:50:49<2:29:25, 3.56it/s] 91%|█████████▏| 339512/371472 [4:50:49<2:32:08, 3.50it/s] 91%|█████████▏| 339513/371472 [4:50:50<2:33:16, 3.48it/s] 91%|█████████▏| 339514/371472 [4:50:50<2:29:28, 3.56it/s] 91%|█████████▏| 339515/371472 [4:50:50<2:28:52, 3.58it/s] 91%|█████████▏| 339516/371472 [4:50:50<2:27:55, 3.60it/s] 91%|█████████▏| 339517/371472 [4:50:51<2:27:39, 3.61it/s] 91%|█████████▏| 339518/371472 [4:50:51<2:28:11, 3.59it/s] 91%|█████████▏| 339519/371472 [4:50:51<2:26:01, 3.65it/s] 91%|█████████▏| 339520/371472 [4:50:51<2:24:38, 3.68it/s] {'loss': 2.664, 'learning_rate': 1.7745480402508133e-07, 'epoch': 14.62} + 91%|█████████▏| 339520/371472 [4:50:51<2:24:38, 3.68it/s] 91%|█████████▏| 339521/371472 [4:50:52<2:24:20, 3.69it/s] 91%|█████████▏| 339522/371472 [4:50:52<2:46:42, 3.19it/s] 91%|█████████▏| 339523/371472 [4:50:52<2:45:04, 3.23it/s] 91%|█████████▏| 339524/371472 [4:50:53<2:36:31, 3.40it/s] 91%|█████████▏| 339525/371472 [4:50:53<2:30:44, 3.53it/s] 91%|█████████▏| 339526/371472 [4:50:53<2:37:19, 3.38it/s] 91%|█████████▏| 339527/371472 [4:50:54<2:32:59, 3.48it/s] 91%|█████████▏| 339528/371472 [4:50:54<2:28:37, 3.58it/s] 91%|█████████▏| 339529/371472 [4:50:54<2:29:52, 3.55it/s] 91%|█████████▏| 339530/371472 [4:50:54<2:26:07, 3.64it/s] 91%|█████████▏| 339531/371472 [4:50:55<2:23:25, 3.71it/s] 91%|█████████▏| 339532/371472 [4:50:55<3:15:42, 2.72it/s] 91%|█████████▏| 339533/371472 [4:50:55<2:55:44, 3.03it/s] 91%|█████████▏| 339534/371472 [4:50:56<2:54:09, 3.06it/s] 91%|█████████▏| 339535/371472 [4:50:56<2:46:19, 3.20it/s] 91%|█████████▏| 339536/371472 [4:50:56<2:42:35, 3.27it/s] 91%|█████████▏| 339537/371472 [4:50:57<3:04:11, 2.89it/s] 91%|█████████▏| 339538/371472 [4:50:57<3:00:15, 2.95it/s] 91%|█████████▏| 339539/371472 [4:50:57<2:53:09, 3.07it/s] 91%|█████████▏| 339540/371472 [4:50:58<2:51:34, 3.10it/s] {'loss': 2.6269, 'learning_rate': 1.7740632204960248e-07, 'epoch': 14.62} + 91%|█████████▏| 339540/371472 [4:50:58<2:51:34, 3.10it/s] 91%|█████████▏| 339541/371472 [4:50:58<2:44:03, 3.24it/s] 91%|█████████▏| 339542/371472 [4:50:58<2:43:39, 3.25it/s] 91%|█████████▏| 339543/371472 [4:50:59<2:44:35, 3.23it/s] 91%|█████████▏| 339544/371472 [4:50:59<2:36:34, 3.40it/s] 91%|█████████▏| 339545/371472 [4:50:59<2:38:20, 3.36it/s] 91%|█████████▏| 339546/371472 [4:50:59<2:37:29, 3.38it/s] 91%|█████████▏| 339547/371472 [4:51:00<2:35:57, 3.41it/s] 91%|█████████▏| 339548/371472 [4:51:00<2:35:01, 3.43it/s] 91%|█████████▏| 339549/371472 [4:51:00<2:39:03, 3.35it/s] 91%|█████████▏| 339550/371472 [4:51:01<2:33:35, 3.46it/s] 91%|█████████▏| 339551/371472 [4:51:01<2:42:57, 3.26it/s] 91%|█████████▏| 339552/371472 [4:51:01<2:39:07, 3.34it/s] 91%|█████████▏| 339553/371472 [4:51:01<2:33:43, 3.46it/s] 91%|█████████▏| 339554/371472 [4:51:02<2:29:34, 3.56it/s] 91%|█████████▏| 339555/371472 [4:51:02<2:26:55, 3.62it/s] 91%|█████████▏| 339556/371472 [4:51:02<2:27:51, 3.60it/s] 91%|█████████▏| 339557/371472 [4:51:03<2:27:25, 3.61it/s] 91%|█████████▏| 339558/371472 [4:51:03<2:34:22, 3.45it/s] 91%|█████████▏| 339559/371472 [4:51:03<2:39:57, 3.32it/s] 91%|█████████▏| 339560/371472 [4:51:03<2:33:48, 3.46it/s] {'loss': 2.4913, 'learning_rate': 1.7735784007412353e-07, 'epoch': 14.63} + 91%|█████████▏| 339560/371472 [4:51:03<2:33:48, 3.46it/s] 91%|█████████▏| 339561/371472 [4:51:04<2:35:39, 3.42it/s] 91%|█████████▏| 339562/371472 [4:51:04<2:33:39, 3.46it/s] 91%|█████████▏| 339563/371472 [4:51:04<2:38:15, 3.36it/s] 91%|█████████▏| 339564/371472 [4:51:05<2:30:53, 3.52it/s] 91%|█████████▏| 339565/371472 [4:51:05<2:42:31, 3.27it/s] 91%|█████████▏| 339566/371472 [4:51:05<2:41:13, 3.30it/s] 91%|█████████▏| 339567/371472 [4:51:06<2:33:19, 3.47it/s] 91%|█████████▏| 339568/371472 [4:51:06<2:35:59, 3.41it/s] 91%|█████████▏| 339569/371472 [4:51:06<2:40:27, 3.31it/s] 91%|█████████▏| 339570/371472 [4:51:06<2:33:49, 3.46it/s] 91%|█████████▏| 339571/371472 [4:51:07<2:30:55, 3.52it/s] 91%|█████████▏| 339572/371472 [4:51:07<2:32:47, 3.48it/s] 91%|█████████▏| 339573/371472 [4:51:07<2:29:24, 3.56it/s] 91%|█████████▏| 339574/371472 [4:51:08<2:26:36, 3.63it/s] 91%|█████████▏| 339575/371472 [4:51:08<2:30:40, 3.53it/s] 91%|█████████▏| 339576/371472 [4:51:08<2:31:43, 3.50it/s] 91%|█████████▏| 339577/371472 [4:51:08<2:33:00, 3.47it/s] 91%|█████████▏| 339578/371472 [4:51:09<2:34:27, 3.44it/s] 91%|█████████▏| 339579/371472 [4:51:09<2:35:01, 3.43it/s] 91%|█████████▏| 339580/371472 [4:51:09<2:32:25, 3.49it/s] {'loss': 2.5596, 'learning_rate': 1.773093580986447e-07, 'epoch': 14.63} + 91%|█████████▏| 339580/371472 [4:51:09<2:32:25, 3.49it/s] 91%|█████████▏| 339581/371472 [4:51:10<2:29:12, 3.56it/s] 91%|█████████▏| 339582/371472 [4:51:10<2:25:10, 3.66it/s] 91%|█████████▏| 339583/371472 [4:51:10<2:27:10, 3.61it/s] 91%|█████████▏| 339584/371472 [4:51:10<2:34:55, 3.43it/s] 91%|█████████▏| 339585/371472 [4:51:11<2:33:58, 3.45it/s] 91%|█████████▏| 339586/371472 [4:51:11<2:32:52, 3.48it/s] 91%|█████████▏| 339587/371472 [4:51:11<2:30:16, 3.54it/s] 91%|█████████▏| 339588/371472 [4:51:11<2:27:43, 3.60it/s] 91%|█████████▏| 339589/371472 [4:51:12<2:31:37, 3.50it/s] 91%|█████████▏| 339590/371472 [4:51:12<2:29:30, 3.55it/s] 91%|█████████▏| 339591/371472 [4:51:12<2:26:52, 3.62it/s] 91%|█████████▏| 339592/371472 [4:51:13<2:28:37, 3.57it/s] 91%|█████████▏| 339593/371472 [4:51:13<2:24:20, 3.68it/s] 91%|█████████▏| 339594/371472 [4:51:13<2:35:24, 3.42it/s] 91%|█████████▏| 339595/371472 [4:51:13<2:27:57, 3.59it/s] 91%|█████████▏| 339596/371472 [4:51:14<2:30:12, 3.54it/s] 91%|█████████▏| 339597/371472 [4:51:14<2:31:23, 3.51it/s] 91%|█████████▏| 339598/371472 [4:51:14<2:40:46, 3.30it/s] 91%|█████████▏| 339599/371472 [4:51:15<2:33:16, 3.47it/s] 91%|█████████▏| 339600/371472 [4:51:15<2:29:42, 3.55it/s] {'loss': 2.5817, 'learning_rate': 1.7726087612316578e-07, 'epoch': 14.63} + 91%|█████████▏| 339600/371472 [4:51:15<2:29:42, 3.55it/s] 91%|█████████▏| 339601/371472 [4:51:15<2:25:49, 3.64it/s] 91%|█████████▏| 339602/371472 [4:51:15<2:24:21, 3.68it/s] 91%|█████████▏| 339603/371472 [4:51:16<2:23:40, 3.70it/s] 91%|█████████▏| 339604/371472 [4:51:16<2:37:58, 3.36it/s] 91%|█████████▏| 339605/371472 [4:51:16<2:29:16, 3.56it/s] 91%|█████████▏| 339606/371472 [4:51:17<2:25:02, 3.66it/s] 91%|█████████▏| 339607/371472 [4:51:17<2:22:05, 3.74it/s] 91%|█████████▏| 339608/371472 [4:51:17<2:26:17, 3.63it/s] 91%|█████████▏| 339609/371472 [4:51:17<2:36:46, 3.39it/s] 91%|█████████▏| 339610/371472 [4:51:18<2:44:34, 3.23it/s] 91%|█████████▏| 339611/371472 [4:51:18<2:33:49, 3.45it/s] 91%|█████████▏| 339612/371472 [4:51:18<2:33:42, 3.45it/s] 91%|█████████▏| 339613/371472 [4:51:19<2:31:05, 3.51it/s] 91%|█████████▏| 339614/371472 [4:51:19<2:34:37, 3.43it/s] 91%|█████████▏| 339615/371472 [4:51:19<2:26:34, 3.62it/s] 91%|█████████▏| 339616/371472 [4:51:19<2:31:36, 3.50it/s] 91%|█████████▏| 339617/371472 [4:51:20<2:38:36, 3.35it/s] 91%|█████████▏| 339618/371472 [4:51:20<2:45:32, 3.21it/s] 91%|█████████▏| 339619/371472 [4:51:20<2:41:49, 3.28it/s] 91%|█████████▏| 339620/371472 [4:51:21<2:34:49, 3.43it/s] {'loss': 2.5338, 'learning_rate': 1.772123941476868e-07, 'epoch': 14.63} + 91%|█████████▏| 339620/371472 [4:51:21<2:34:49, 3.43it/s] 91%|█████████▏| 339621/371472 [4:51:21<2:37:19, 3.37it/s] 91%|█████████▏| 339622/371472 [4:51:21<2:27:18, 3.60it/s] 91%|█████████▏| 339623/371472 [4:51:21<2:27:09, 3.61it/s] 91%|█████████▏| 339624/371472 [4:51:22<2:20:45, 3.77it/s] 91%|█████████▏| 339625/371472 [4:51:22<2:21:35, 3.75it/s] 91%|█████████▏| 339626/371472 [4:51:22<2:29:53, 3.54it/s] 91%|█████████▏| 339627/371472 [4:51:23<2:25:54, 3.64it/s] 91%|█████████▏| 339628/371472 [4:51:23<2:29:10, 3.56it/s] 91%|█████████▏| 339629/371472 [4:51:23<2:22:41, 3.72it/s] 91%|█████████▏| 339630/371472 [4:51:23<2:22:35, 3.72it/s] 91%|█████████▏| 339631/371472 [4:51:24<2:27:08, 3.61it/s] 91%|█████████▏| 339632/371472 [4:51:24<2:31:15, 3.51it/s] 91%|█████████▏| 339633/371472 [4:51:24<2:28:34, 3.57it/s] 91%|█████████▏| 339634/371472 [4:51:25<2:26:55, 3.61it/s] 91%|█████████▏| 339635/371472 [4:51:25<2:22:43, 3.72it/s] 91%|█████████▏| 339636/371472 [4:51:25<2:33:48, 3.45it/s] 91%|█████████▏| 339637/371472 [4:51:25<2:31:46, 3.50it/s] 91%|█████████▏| 339638/371472 [4:51:26<2:35:54, 3.40it/s] 91%|█████████▏| 339639/371472 [4:51:26<2:32:14, 3.48it/s] 91%|█████████▏| 339640/371472 [4:51:26<2:31:46, 3.50it/s] {'loss': 2.6306, 'learning_rate': 1.7716391217220797e-07, 'epoch': 14.63} + 91%|█████████▏| 339640/371472 [4:51:26<2:31:46, 3.50it/s] 91%|█████████▏| 339641/371472 [4:51:27<2:28:45, 3.57it/s] 91%|█████████▏| 339642/371472 [4:51:27<2:27:36, 3.59it/s] 91%|█████████▏| 339643/371472 [4:51:27<2:31:45, 3.50it/s] 91%|█████████▏| 339644/371472 [4:51:27<2:33:11, 3.46it/s] 91%|█████████▏| 339645/371472 [4:51:28<2:38:49, 3.34it/s] 91%|█████████▏| 339646/371472 [4:51:28<2:40:49, 3.30it/s] 91%|█████████▏| 339647/371472 [4:51:28<2:39:00, 3.34it/s] 91%|█████████▏| 339648/371472 [4:51:29<2:37:49, 3.36it/s] 91%|█████████▏| 339649/371472 [4:51:29<2:32:32, 3.48it/s] 91%|█████████▏| 339650/371472 [4:51:29<2:26:23, 3.62it/s] 91%|█████████▏| 339651/371472 [4:51:29<2:20:07, 3.78it/s] 91%|█████████▏| 339652/371472 [4:51:30<2:24:45, 3.66it/s] 91%|█████████▏| 339653/371472 [4:51:30<2:24:02, 3.68it/s] 91%|█████████▏| 339654/371472 [4:51:30<2:19:58, 3.79it/s] 91%|█████████▏| 339655/371472 [4:51:30<2:22:26, 3.72it/s] 91%|█████████▏| 339656/371472 [4:51:31<2:23:01, 3.71it/s] 91%|█████████▏| 339657/371472 [4:51:31<2:23:03, 3.71it/s] 91%|█████████▏| 339658/371472 [4:51:31<2:22:18, 3.73it/s] 91%|█████████▏| 339659/371472 [4:51:31<2:18:43, 3.82it/s] 91%|█████████▏| 339660/371472 [4:51:32<2:20:16, 3.78it/s] {'loss': 2.6348, 'learning_rate': 1.7711543019672907e-07, 'epoch': 14.63} + 91%|█████████▏| 339660/371472 [4:51:32<2:20:16, 3.78it/s] 91%|█████████▏| 339661/371472 [4:51:32<2:36:32, 3.39it/s] 91%|█████████▏| 339662/371472 [4:51:32<2:29:10, 3.55it/s] 91%|█████████▏| 339663/371472 [4:51:33<2:30:35, 3.52it/s] 91%|█████████▏| 339664/371472 [4:51:33<2:26:22, 3.62it/s] 91%|█████████▏| 339665/371472 [4:51:33<2:26:46, 3.61it/s] 91%|█████████▏| 339666/371472 [4:51:33<2:22:23, 3.72it/s] 91%|█████████▏| 339667/371472 [4:51:34<2:24:25, 3.67it/s] 91%|█████████▏| 339668/371472 [4:51:34<2:27:32, 3.59it/s] 91%|█████████▏| 339669/371472 [4:51:34<2:24:41, 3.66it/s] 91%|█████████▏| 339670/371472 [4:51:35<2:31:39, 3.50it/s] 91%|█████████▏| 339671/371472 [4:51:35<2:37:35, 3.36it/s] 91%|█████████▏| 339672/371472 [4:51:35<2:31:12, 3.51it/s] 91%|█████████▏| 339673/371472 [4:51:36<2:51:25, 3.09it/s] 91%|█████████▏| 339674/371472 [4:51:36<2:42:23, 3.26it/s] 91%|█████████▏| 339675/371472 [4:51:36<2:46:17, 3.19it/s] 91%|█████████▏| 339676/371472 [4:51:36<2:39:00, 3.33it/s] 91%|█████████▏| 339677/371472 [4:51:37<2:54:25, 3.04it/s] 91%|█████████▏| 339678/371472 [4:51:37<2:50:53, 3.10it/s] 91%|█████████▏| 339679/371472 [4:51:37<2:47:19, 3.17it/s] 91%|█████████▏| 339680/371472 [4:51:38<3:02:43, 2.90it/s] {'loss': 2.5321, 'learning_rate': 1.7706694822125017e-07, 'epoch': 14.63} + 91%|█████████▏| 339680/371472 [4:51:38<3:02:43, 2.90it/s] 91%|█████████▏| 339681/371472 [4:51:38<3:18:22, 2.67it/s] 91%|█████████▏| 339682/371472 [4:51:39<2:59:29, 2.95it/s] 91%|█████████▏| 339683/371472 [4:51:39<2:47:44, 3.16it/s] 91%|█████████▏| 339684/371472 [4:51:39<2:51:57, 3.08it/s] 91%|█████████▏| 339685/371472 [4:51:39<2:44:02, 3.23it/s] 91%|█████████▏| 339686/371472 [4:51:40<2:36:50, 3.38it/s] 91%|█████████▏| 339687/371472 [4:51:40<2:30:58, 3.51it/s] 91%|█████████▏| 339688/371472 [4:51:40<2:33:29, 3.45it/s] 91%|█████████▏| 339689/371472 [4:51:41<2:31:11, 3.50it/s] 91%|█████████▏| 339690/371472 [4:51:41<2:28:44, 3.56it/s] 91%|█████████▏| 339691/371472 [4:51:41<2:28:41, 3.56it/s] 91%|█████████▏| 339692/371472 [4:51:41<2:26:19, 3.62it/s] 91%|█████████▏| 339693/371472 [4:51:42<2:28:50, 3.56it/s] 91%|█████████▏| 339694/371472 [4:51:42<2:36:52, 3.38it/s] 91%|█████████▏| 339695/371472 [4:51:42<2:30:42, 3.51it/s] 91%|█████████▏| 339696/371472 [4:51:43<2:25:50, 3.63it/s] 91%|█████████▏| 339697/371472 [4:51:43<2:29:02, 3.55it/s] 91%|█████████▏| 339698/371472 [4:51:43<2:39:08, 3.33it/s] 91%|█████████▏| 339699/371472 [4:51:43<2:44:09, 3.23it/s] 91%|█████████▏| 339700/371472 [4:51:44<2:39:46, 3.31it/s] {'loss': 2.5812, 'learning_rate': 1.7701846624577124e-07, 'epoch': 14.63} + 91%|█████████▏| 339700/371472 [4:51:44<2:39:46, 3.31it/s] 91%|█████████▏| 339701/371472 [4:51:44<2:36:15, 3.39it/s] 91%|█████████▏| 339702/371472 [4:51:44<2:35:06, 3.41it/s] 91%|█████████▏| 339703/371472 [4:51:45<2:32:39, 3.47it/s] 91%|█████████▏| 339704/371472 [4:51:45<2:30:14, 3.52it/s] 91%|█████████▏| 339705/371472 [4:51:45<2:27:17, 3.59it/s] 91%|█████████▏| 339706/371472 [4:51:45<2:30:30, 3.52it/s] 91%|█████████▏| 339707/371472 [4:51:46<2:31:00, 3.51it/s] 91%|█████████▏| 339708/371472 [4:51:46<2:28:48, 3.56it/s] 91%|█████████▏| 339709/371472 [4:51:46<2:30:28, 3.52it/s] 91%|█████████▏| 339710/371472 [4:51:47<2:25:41, 3.63it/s] 91%|█████████▏| 339711/371472 [4:51:47<2:31:09, 3.50it/s] 91%|█████████▏| 339712/371472 [4:51:47<2:34:04, 3.44it/s] 91%|█████████▏| 339713/371472 [4:51:47<2:27:00, 3.60it/s] 91%|█████████▏| 339714/371472 [4:51:48<2:24:43, 3.66it/s] 91%|█████████▏| 339715/371472 [4:51:48<2:26:30, 3.61it/s] 91%|█████████▏| 339716/371472 [4:51:48<2:21:00, 3.75it/s] 91%|█████████▏| 339717/371472 [4:51:48<2:21:01, 3.75it/s] 91%|█████████▏| 339718/371472 [4:51:49<2:20:45, 3.76it/s] 91%|█████████▏| 339719/371472 [4:51:49<2:34:08, 3.43it/s] 91%|█████████▏| 339720/371472 [4:51:49<2:38:14, 3.34it/s] {'loss': 2.6164, 'learning_rate': 1.7696998427029242e-07, 'epoch': 14.63} + 91%|█████████▏| 339720/371472 [4:51:49<2:38:14, 3.34it/s] 91%|█████████▏| 339721/371472 [4:51:50<2:29:42, 3.53it/s] 91%|█████████▏| 339722/371472 [4:51:50<2:31:26, 3.49it/s] 91%|█████████▏| 339723/371472 [4:51:50<2:32:08, 3.48it/s] 91%|█████████▏| 339724/371472 [4:51:50<2:25:53, 3.63it/s] 91%|█████████▏| 339725/371472 [4:51:51<2:26:57, 3.60it/s] 91%|█████████▏| 339726/371472 [4:51:51<2:24:09, 3.67it/s] 91%|█████████▏| 339727/371472 [4:51:51<2:18:39, 3.82it/s] 91%|█████████▏| 339728/371472 [4:51:52<2:17:11, 3.86it/s] 91%|█████████▏| 339729/371472 [4:51:52<2:20:12, 3.77it/s] 91%|█████████▏| 339730/371472 [4:51:52<2:22:24, 3.71it/s] 91%|█████████▏| 339731/371472 [4:51:52<2:19:58, 3.78it/s] 91%|█████████▏| 339732/371472 [4:51:53<2:18:33, 3.82it/s] 91%|█████████▏| 339733/371472 [4:51:53<2:20:16, 3.77it/s] 91%|█████████▏| 339734/371472 [4:51:53<2:25:59, 3.62it/s] 91%|█████████▏| 339735/371472 [4:51:54<2:37:01, 3.37it/s] 91%|█████████▏| 339736/371472 [4:51:54<2:42:31, 3.25it/s] 91%|█████████▏| 339737/371472 [4:51:54<2:35:03, 3.41it/s] 91%|█████████▏| 339738/371472 [4:51:54<2:36:54, 3.37it/s] 91%|█████████▏| 339739/371472 [4:51:55<2:30:50, 3.51it/s] 91%|███���█████▏| 339740/371472 [4:51:55<2:40:46, 3.29it/s] {'loss': 2.7126, 'learning_rate': 1.7692150229481346e-07, 'epoch': 14.63} + 91%|█████████▏| 339740/371472 [4:51:55<2:40:46, 3.29it/s] 91%|█████████▏| 339741/371472 [4:51:55<2:35:41, 3.40it/s] 91%|█████████▏| 339742/371472 [4:51:56<2:33:42, 3.44it/s] 91%|█████████▏| 339743/371472 [4:51:56<2:29:52, 3.53it/s] 91%|█████████▏| 339744/371472 [4:51:56<2:30:19, 3.52it/s] 91%|█████████▏| 339745/371472 [4:51:56<2:30:07, 3.52it/s] 91%|█████████▏| 339746/371472 [4:51:57<2:31:16, 3.50it/s] 91%|█████████▏| 339747/371472 [4:51:57<2:22:45, 3.70it/s] 91%|█████████▏| 339748/371472 [4:51:57<2:21:50, 3.73it/s] 91%|█████████▏| 339749/371472 [4:51:57<2:19:32, 3.79it/s] 91%|█████████▏| 339750/371472 [4:51:58<2:26:50, 3.60it/s] 91%|█████████▏| 339751/371472 [4:51:58<2:19:56, 3.78it/s] 91%|█████████▏| 339752/371472 [4:51:58<2:21:11, 3.74it/s] 91%|█████████▏| 339753/371472 [4:51:59<2:23:52, 3.67it/s] 91%|█████████▏| 339754/371472 [4:51:59<2:22:54, 3.70it/s] 91%|█████████▏| 339755/371472 [4:51:59<2:20:03, 3.77it/s] 91%|█████████▏| 339756/371472 [4:51:59<2:19:50, 3.78it/s] 91%|█████████▏| 339757/371472 [4:52:00<2:18:50, 3.81it/s] 91%|█████████▏| 339758/371472 [4:52:00<2:26:31, 3.61it/s] 91%|█████████▏| 339759/371472 [4:52:00<2:39:23, 3.32it/s] 91%|█████████▏| 339760/371472 [4:52:01<2:35:26, 3.40it/s] {'loss': 2.4992, 'learning_rate': 1.7687302031933462e-07, 'epoch': 14.63} + 91%|█████████▏| 339760/371472 [4:52:01<2:35:26, 3.40it/s] 91%|█████████▏| 339761/371472 [4:52:01<2:27:57, 3.57it/s] 91%|█████████▏| 339762/371472 [4:52:01<2:45:52, 3.19it/s] 91%|█████████▏| 339763/371472 [4:52:01<2:37:36, 3.35it/s] 91%|█████████▏| 339764/371472 [4:52:02<2:41:45, 3.27it/s] 91%|█████████▏| 339765/371472 [4:52:02<2:43:10, 3.24it/s] 91%|█████████▏| 339766/371472 [4:52:02<2:41:04, 3.28it/s] 91%|█████████▏| 339767/371472 [4:52:03<2:42:13, 3.26it/s] 91%|█████████▏| 339768/371472 [4:52:03<2:32:38, 3.46it/s] 91%|█████████▏| 339769/371472 [4:52:03<2:31:50, 3.48it/s] 91%|█████████▏| 339770/371472 [4:52:03<2:30:48, 3.50it/s] 91%|█████████▏| 339771/371472 [4:52:04<2:33:14, 3.45it/s] 91%|█████████▏| 339772/371472 [4:52:04<2:27:30, 3.58it/s] 91%|█████████▏| 339773/371472 [4:52:04<2:30:27, 3.51it/s] 91%|█████████▏| 339774/371472 [4:52:05<2:31:17, 3.49it/s] 91%|█████████▏| 339775/371472 [4:52:05<2:33:23, 3.44it/s] 91%|█████████▏| 339776/371472 [4:52:05<2:27:10, 3.59it/s] 91%|█████████▏| 339777/371472 [4:52:05<2:27:19, 3.59it/s] 91%|█████████▏| 339778/371472 [4:52:06<2:20:00, 3.77it/s] 91%|█████████▏| 339779/371472 [4:52:06<2:20:11, 3.77it/s] 91%|█████████▏| 339780/371472 [4:52:06<2:25:31, 3.63it/s] {'loss': 2.5162, 'learning_rate': 1.768245383438557e-07, 'epoch': 14.63} + 91%|█████████▏| 339780/371472 [4:52:06<2:25:31, 3.63it/s] 91%|█████████▏| 339781/371472 [4:52:07<2:32:46, 3.46it/s] 91%|█████████▏| 339782/371472 [4:52:07<2:33:14, 3.45it/s] 91%|█████████▏| 339783/371472 [4:52:07<2:27:12, 3.59it/s] 91%|█████████▏| 339784/371472 [4:52:07<2:31:36, 3.48it/s] 91%|█████████▏| 339785/371472 [4:52:08<2:34:49, 3.41it/s] 91%|█████████▏| 339786/371472 [4:52:08<2:44:23, 3.21it/s] 91%|█████████▏| 339787/371472 [4:52:08<2:37:43, 3.35it/s] 91%|█████████▏| 339788/371472 [4:52:09<2:33:09, 3.45it/s] 91%|█████████▏| 339789/371472 [4:52:09<2:38:23, 3.33it/s] 91%|█████████▏| 339790/371472 [4:52:09<2:34:23, 3.42it/s] 91%|█████████▏| 339791/371472 [4:52:10<2:33:37, 3.44it/s] 91%|█████████▏| 339792/371472 [4:52:10<2:45:41, 3.19it/s] 91%|█████████▏| 339793/371472 [4:52:10<2:36:39, 3.37it/s] 91%|█████████▏| 339794/371472 [4:52:10<2:30:16, 3.51it/s] 91%|█████████▏| 339795/371472 [4:52:11<2:27:11, 3.59it/s] 91%|█████████▏| 339796/371472 [4:52:11<2:23:55, 3.67it/s] 91%|█████████▏| 339797/371472 [4:52:11<2:24:17, 3.66it/s] 91%|█████████▏| 339798/371472 [4:52:11<2:18:41, 3.81it/s] 91%|█████████▏| 339799/371472 [4:52:12<2:21:38, 3.73it/s] 91%|█████████▏| 339800/371472 [4:52:12<2:26:39, 3.60it/s] {'loss': 2.3363, 'learning_rate': 1.7677605636837684e-07, 'epoch': 14.64} + 91%|█████████▏| 339800/371472 [4:52:12<2:26:39, 3.60it/s] 91%|█████████▏| 339801/371472 [4:52:12<2:34:22, 3.42it/s] 91%|█████████▏| 339802/371472 [4:52:13<2:33:10, 3.45it/s] 91%|█████████▏| 339803/371472 [4:52:13<2:27:57, 3.57it/s] 91%|█████████▏| 339804/371472 [4:52:13<2:30:36, 3.50it/s] 91%|█████████▏| 339805/371472 [4:52:13<2:30:16, 3.51it/s] 91%|█████████▏| 339806/371472 [4:52:14<2:23:47, 3.67it/s] 91%|█████████▏| 339807/371472 [4:52:14<2:21:51, 3.72it/s] 91%|█████████▏| 339808/371472 [4:52:14<2:23:29, 3.68it/s] 91%|█████████▏| 339809/371472 [4:52:15<2:32:22, 3.46it/s] 91%|█████████▏| 339810/371472 [4:52:15<2:27:38, 3.57it/s] 91%|█████████▏| 339811/371472 [4:52:15<2:24:44, 3.65it/s] 91%|█████████▏| 339812/371472 [4:52:15<2:27:25, 3.58it/s] 91%|█████████▏| 339813/371472 [4:52:16<2:36:06, 3.38it/s] 91%|█████████▏| 339814/371472 [4:52:16<2:29:04, 3.54it/s] 91%|█████████▏| 339815/371472 [4:52:16<2:32:48, 3.45it/s] 91%|█████████▏| 339816/371472 [4:52:17<2:33:20, 3.44it/s] 91%|█████████▏| 339817/371472 [4:52:17<2:27:32, 3.58it/s] 91%|█████████▏| 339818/371472 [4:52:17<2:31:07, 3.49it/s] 91%|█████████▏| 339819/371472 [4:52:17<2:37:19, 3.35it/s] 91%|█████████▏| 339820/371472 [4:52:18<2:43:08, 3.23it/s] {'loss': 2.531, 'learning_rate': 1.7672757439289788e-07, 'epoch': 14.64} + 91%|█████████▏| 339820/371472 [4:52:18<2:43:08, 3.23it/s] 91%|█████████▏| 339821/371472 [4:52:18<2:36:15, 3.38it/s] 91%|█████████▏| 339822/371472 [4:52:18<2:32:38, 3.46it/s] 91%|█████████▏| 339823/371472 [4:52:19<2:34:27, 3.42it/s] 91%|█████████▏| 339824/371472 [4:52:19<2:35:31, 3.39it/s] 91%|█████████▏| 339825/371472 [4:52:19<2:32:31, 3.46it/s] 91%|█████████▏| 339826/371472 [4:52:19<2:25:48, 3.62it/s] 91%|█████████▏| 339827/371472 [4:52:20<2:24:09, 3.66it/s] 91%|█████████▏| 339828/371472 [4:52:20<2:19:58, 3.77it/s] 91%|█████████▏| 339829/371472 [4:52:20<2:25:00, 3.64it/s] 91%|█████████▏| 339830/371472 [4:52:21<2:23:26, 3.68it/s] 91%|█████████▏| 339831/371472 [4:52:21<2:23:32, 3.67it/s] 91%|█████████▏| 339832/371472 [4:52:21<2:30:22, 3.51it/s] 91%|█████████▏| 339833/371472 [4:52:21<2:28:19, 3.55it/s] 91%|█████████▏| 339834/371472 [4:52:22<2:23:06, 3.68it/s] 91%|█████████▏| 339835/371472 [4:52:22<2:25:42, 3.62it/s] 91%|█████████▏| 339836/371472 [4:52:22<2:25:21, 3.63it/s] 91%|█████████▏| 339837/371472 [4:52:22<2:25:12, 3.63it/s] 91%|█████████▏| 339838/371472 [4:52:23<2:28:49, 3.54it/s] 91%|█████████▏| 339839/371472 [4:52:23<2:26:42, 3.59it/s] 91%|█████████▏| 339840/371472 [4:52:23<2:29:37, 3.52it/s] {'loss': 2.7059, 'learning_rate': 1.7667909241741906e-07, 'epoch': 14.64} + 91%|█████████▏| 339840/371472 [4:52:23<2:29:37, 3.52it/s] 91%|█████████▏| 339841/371472 [4:52:24<2:22:05, 3.71it/s] 91%|█████████▏| 339842/371472 [4:52:24<2:18:36, 3.80it/s] 91%|█████████▏| 339843/371472 [4:52:24<2:20:01, 3.76it/s] 91%|█████████▏| 339844/371472 [4:52:24<2:23:50, 3.66it/s] 91%|█████████▏| 339845/371472 [4:52:25<2:27:42, 3.57it/s] 91%|█████████▏| 339846/371472 [4:52:25<2:31:40, 3.48it/s] 91%|█████████▏| 339847/371472 [4:52:25<2:29:28, 3.53it/s] 91%|█████████▏| 339848/371472 [4:52:26<2:28:12, 3.56it/s] 91%|█████████▏| 339849/371472 [4:52:26<2:24:43, 3.64it/s] 91%|█████████▏| 339850/371472 [4:52:26<2:22:41, 3.69it/s] 91%|█████████▏| 339851/371472 [4:52:26<2:22:11, 3.71it/s] 91%|█████████▏| 339852/371472 [4:52:27<2:21:54, 3.71it/s] 91%|█████████▏| 339853/371472 [4:52:27<2:23:13, 3.68it/s] 91%|█████████▏| 339854/371472 [4:52:27<2:25:31, 3.62it/s] 91%|█████████▏| 339855/371472 [4:52:27<2:20:59, 3.74it/s] 91%|█████████▏| 339856/371472 [4:52:28<2:22:25, 3.70it/s] 91%|█████████▏| 339857/371472 [4:52:28<2:20:10, 3.76it/s] 91%|█████████▏| 339858/371472 [4:52:28<2:20:15, 3.76it/s] 91%|█████████▏| 339859/371472 [4:52:28<2:18:51, 3.79it/s] 91%|█████████▏| 339860/371472 [4:52:29<2:26:13, 3.60it/s] {'loss': 2.6678, 'learning_rate': 1.766306104419401e-07, 'epoch': 14.64} + 91%|█████████▏| 339860/371472 [4:52:29<2:26:13, 3.60it/s] 91%|█████████▏| 339861/371472 [4:52:29<2:22:55, 3.69it/s] 91%|█████████▏| 339862/371472 [4:52:29<2:15:56, 3.88it/s] 91%|█████████▏| 339863/371472 [4:52:30<2:14:46, 3.91it/s] 91%|█████████▏| 339864/371472 [4:52:30<2:13:04, 3.96it/s] 91%|█████████▏| 339865/371472 [4:52:30<2:13:38, 3.94it/s] 91%|█████████▏| 339866/371472 [4:52:30<2:24:55, 3.63it/s] 91%|█████████▏| 339867/371472 [4:52:31<2:35:48, 3.38it/s] 91%|█████████▏| 339868/371472 [4:52:31<2:29:35, 3.52it/s] 91%|█████████▏| 339869/371472 [4:52:31<2:31:14, 3.48it/s] 91%|█████████▏| 339870/371472 [4:52:32<2:40:54, 3.27it/s] 91%|█████████▏| 339871/371472 [4:52:32<2:36:22, 3.37it/s] 91%|█████████▏| 339872/371472 [4:52:32<2:32:49, 3.45it/s] 91%|█████████▏| 339873/371472 [4:52:32<2:27:00, 3.58it/s] 91%|█████████▏| 339874/371472 [4:52:33<2:28:06, 3.56it/s] 91%|█████████▏| 339875/371472 [4:52:33<2:32:23, 3.46it/s] 91%|█████████▏| 339876/371472 [4:52:33<2:29:54, 3.51it/s] 91%|█████████▏| 339877/371472 [4:52:34<2:30:12, 3.51it/s] 91%|█████████▏| 339878/371472 [4:52:34<2:28:54, 3.54it/s] 91%|█████████▏| 339879/371472 [4:52:34<2:29:42, 3.52it/s] 91%|█████████▏| 339880/371472 [4:52:34<2:25:05, 3.63it/s] {'loss': 2.7573, 'learning_rate': 1.7658212846646126e-07, 'epoch': 14.64} + 91%|█████████▏| 339880/371472 [4:52:34<2:25:05, 3.63it/s] 91%|█████████▏| 339881/371472 [4:52:35<2:33:13, 3.44it/s] 91%|█████████▏| 339882/371472 [4:52:35<2:33:12, 3.44it/s] 91%|█████████▏| 339883/371472 [4:52:35<2:27:07, 3.58it/s] 91%|█████████▏| 339884/371472 [4:52:35<2:23:35, 3.67it/s] 91%|█████████▏| 339885/371472 [4:52:36<2:19:20, 3.78it/s] 91%|█████████▏| 339886/371472 [4:52:36<2:16:47, 3.85it/s] 91%|█████████▏| 339887/371472 [4:52:36<2:21:56, 3.71it/s] 91%|█████████▏| 339888/371472 [4:52:37<2:20:35, 3.74it/s] 91%|█████████▏| 339889/371472 [4:52:37<2:21:03, 3.73it/s] 91%|█████████▏| 339890/371472 [4:52:37<2:19:30, 3.77it/s] 91%|█████████▏| 339891/371472 [4:52:37<2:19:30, 3.77it/s] 91%|█████████▏| 339892/371472 [4:52:38<2:18:09, 3.81it/s] 91%|█████████▏| 339893/371472 [4:52:38<2:20:01, 3.76it/s] 91%|█████████▏| 339894/371472 [4:52:38<2:18:00, 3.81it/s] 91%|█████████▏| 339895/371472 [4:52:38<2:21:04, 3.73it/s] 91%|█████████▏| 339896/371472 [4:52:39<2:30:36, 3.49it/s] 92%|█████████▏| 339897/371472 [4:52:39<2:29:15, 3.53it/s] 92%|█████████▏| 339898/371472 [4:52:39<2:26:36, 3.59it/s] 92%|█████████▏| 339899/371472 [4:52:40<2:42:22, 3.24it/s] 92%|█████████▏| 339900/371472 [4:52:40<2:45:24, 3.18it/s] {'loss': 2.6109, 'learning_rate': 1.7653364649098233e-07, 'epoch': 14.64} + 92%|█████████▏| 339900/371472 [4:52:40<2:45:24, 3.18it/s] 92%|█████████▏| 339901/371472 [4:52:40<2:37:03, 3.35it/s] 92%|█████████▏| 339902/371472 [4:52:41<2:37:26, 3.34it/s] 92%|█████████▏| 339903/371472 [4:52:41<2:32:44, 3.44it/s] 92%|█████████▏| 339904/371472 [4:52:41<2:30:31, 3.50it/s] 92%|█████████▏| 339905/371472 [4:52:41<2:34:27, 3.41it/s] 92%|█████████▏| 339906/371472 [4:52:42<2:31:08, 3.48it/s] 92%|█████████▏| 339907/371472 [4:52:42<2:33:28, 3.43it/s] 92%|█████████▏| 339908/371472 [4:52:42<2:26:44, 3.59it/s] 92%|█████████▏| 339909/371472 [4:52:42<2:24:25, 3.64it/s] 92%|█████████▏| 339910/371472 [4:52:43<2:22:24, 3.69it/s] 92%|█████████▏| 339911/371472 [4:52:43<2:26:29, 3.59it/s] 92%|█████████▏| 339912/371472 [4:52:43<2:23:32, 3.66it/s] 92%|█████████▏| 339913/371472 [4:52:44<2:25:44, 3.61it/s] 92%|█████████▏| 339914/371472 [4:52:44<2:26:38, 3.59it/s] 92%|█████████▏| 339915/371472 [4:52:44<2:30:19, 3.50it/s] 92%|█████████▏| 339916/371472 [4:52:44<2:30:39, 3.49it/s] 92%|█████████▏| 339917/371472 [4:52:45<2:37:11, 3.35it/s] 92%|█████████▏| 339918/371472 [4:52:45<2:34:43, 3.40it/s] 92%|█████████▏| 339919/371472 [4:52:45<2:30:44, 3.49it/s] 92%|█████████▏| 339920/371472 [4:52:46<2:41:32, 3.26it/s] {'loss': 2.5386, 'learning_rate': 1.7648516451550345e-07, 'epoch': 14.64} + 92%|█████████▏| 339920/371472 [4:52:46<2:41:32, 3.26it/s] 92%|█████████▏| 339921/371472 [4:52:46<2:48:10, 3.13it/s] 92%|█████████▏| 339922/371472 [4:52:46<2:41:56, 3.25it/s] 92%|█████████▏| 339923/371472 [4:52:47<2:32:39, 3.44it/s] 92%|█████████▏| 339924/371472 [4:52:47<2:36:53, 3.35it/s] 92%|█████████▏| 339925/371472 [4:52:47<2:30:33, 3.49it/s] 92%|█████████▏| 339926/371472 [4:52:47<2:32:47, 3.44it/s] 92%|█████████▏| 339927/371472 [4:52:48<2:42:42, 3.23it/s] 92%|█████████▏| 339928/371472 [4:52:48<2:40:59, 3.27it/s] 92%|█████████▏| 339929/371472 [4:52:48<2:49:15, 3.11it/s] 92%|█████████▏| 339930/371472 [4:52:49<2:42:16, 3.24it/s] 92%|█████████▏| 339931/371472 [4:52:49<2:37:18, 3.34it/s] 92%|█████████▏| 339932/371472 [4:52:49<2:38:39, 3.31it/s] 92%|█████████▏| 339933/371472 [4:52:50<2:28:26, 3.54it/s] 92%|█████████▏| 339934/371472 [4:52:50<2:32:55, 3.44it/s] 92%|█████████▏| 339935/371472 [4:52:50<2:29:39, 3.51it/s] 92%|█████████▏| 339936/371472 [4:52:50<2:25:18, 3.62it/s] 92%|█████████▏| 339937/371472 [4:52:51<2:24:52, 3.63it/s] 92%|█████████▏| 339938/371472 [4:52:51<2:24:41, 3.63it/s] 92%|█████████▏| 339939/371472 [4:52:51<2:21:35, 3.71it/s] 92%|█████████▏| 339940/371472 [4:52:51<2:16:34, 3.85it/s] {'loss': 2.582, 'learning_rate': 1.7643668254002452e-07, 'epoch': 14.64} + 92%|█████████▏| 339940/371472 [4:52:51<2:16:34, 3.85it/s] 92%|█████████▏| 339941/371472 [4:52:52<2:17:19, 3.83it/s] 92%|█████████▏| 339942/371472 [4:52:52<2:18:56, 3.78it/s] 92%|█████████▏| 339943/371472 [4:52:52<2:20:24, 3.74it/s] 92%|█████████▏| 339944/371472 [4:52:53<2:25:56, 3.60it/s] 92%|█████████▏| 339945/371472 [4:52:53<2:24:57, 3.62it/s] 92%|█████████▏| 339946/371472 [4:52:53<2:24:55, 3.63it/s] 92%|█████████▏| 339947/371472 [4:52:53<2:34:11, 3.41it/s] 92%|█████████▏| 339948/371472 [4:52:54<2:26:21, 3.59it/s] 92%|█████████▏| 339949/371472 [4:52:54<2:22:19, 3.69it/s] 92%|█████████▏| 339950/371472 [4:52:54<2:20:28, 3.74it/s] 92%|█████████▏| 339951/371472 [4:52:54<2:21:39, 3.71it/s] 92%|█████████▏| 339952/371472 [4:52:55<2:22:59, 3.67it/s] 92%|█████████▏| 339953/371472 [4:52:55<2:30:19, 3.49it/s] 92%|█████████▏| 339954/371472 [4:52:55<2:26:44, 3.58it/s] 92%|█████████▏| 339955/371472 [4:52:56<2:44:58, 3.18it/s] 92%|█████████▏| 339956/371472 [4:52:56<2:58:13, 2.95it/s] 92%|█████████▏| 339957/371472 [4:52:56<2:47:32, 3.14it/s] 92%|█████████▏| 339958/371472 [4:52:57<2:39:38, 3.29it/s] 92%|█████████▏| 339959/371472 [4:52:57<2:37:01, 3.34it/s] 92%|█████████▏| 339960/371472 [4:52:57<2:30:17, 3.49it/s] {'loss': 2.5466, 'learning_rate': 1.763882005645457e-07, 'epoch': 14.64} + 92%|█████████▏| 339960/371472 [4:52:57<2:30:17, 3.49it/s] 92%|█████████▏| 339961/371472 [4:52:57<2:27:19, 3.56it/s] 92%|█████████▏| 339962/371472 [4:52:58<2:22:20, 3.69it/s] 92%|█████████▏| 339963/371472 [4:52:58<2:23:12, 3.67it/s] 92%|█████████▏| 339964/371472 [4:52:58<2:23:31, 3.66it/s] 92%|█████████▏| 339965/371472 [4:52:59<2:29:59, 3.50it/s] 92%|█████████▏| 339966/371472 [4:52:59<2:26:30, 3.58it/s] 92%|█████████▏| 339967/371472 [4:52:59<2:47:51, 3.13it/s] 92%|█████████▏| 339968/371472 [4:53:00<2:37:08, 3.34it/s] 92%|█████████▏| 339969/371472 [4:53:00<2:37:08, 3.34it/s] 92%|█████████▏| 339970/371472 [4:53:00<2:46:29, 3.15it/s] 92%|█████████▏| 339971/371472 [4:53:00<2:38:25, 3.31it/s] 92%|█████████▏| 339972/371472 [4:53:01<2:40:26, 3.27it/s] 92%|█████████▏| 339973/371472 [4:53:01<2:36:51, 3.35it/s] 92%|█████████▏| 339974/371472 [4:53:01<2:36:10, 3.36it/s] 92%|█████████▏| 339975/371472 [4:53:02<2:32:18, 3.45it/s] 92%|█████████▏| 339976/371472 [4:53:02<2:35:28, 3.38it/s] 92%|█████████▏| 339977/371472 [4:53:02<2:39:09, 3.30it/s] 92%|█████████▏| 339978/371472 [4:53:03<2:38:35, 3.31it/s] 92%|█████████▏| 339979/371472 [4:53:03<2:32:23, 3.44it/s] 92%|█████████▏| 339980/371472 [4:53:03<2:28:18, 3.54it/s] {'loss': 2.6752, 'learning_rate': 1.7633971858906677e-07, 'epoch': 14.64} + 92%|█████████▏| 339980/371472 [4:53:03<2:28:18, 3.54it/s] 92%|█████████▏| 339981/371472 [4:53:03<2:23:38, 3.65it/s] 92%|█████████▏| 339982/371472 [4:53:04<2:37:20, 3.34it/s] 92%|█████████▏| 339983/371472 [4:53:04<2:36:07, 3.36it/s] 92%|█████████▏| 339984/371472 [4:53:04<2:32:48, 3.43it/s] 92%|█████████▏| 339985/371472 [4:53:05<2:33:55, 3.41it/s] 92%|█████████▏| 339986/371472 [4:53:05<2:31:40, 3.46it/s] 92%|█████████▏| 339987/371472 [4:53:05<2:27:56, 3.55it/s] 92%|█████████▏| 339988/371472 [4:53:05<2:31:50, 3.46it/s] 92%|█████████▏| 339989/371472 [4:53:06<2:45:27, 3.17it/s] 92%|█████████▏| 339990/371472 [4:53:06<2:41:58, 3.24it/s] 92%|█████████▏| 339991/371472 [4:53:06<2:32:17, 3.45it/s] 92%|█████████▏| 339992/371472 [4:53:07<2:32:30, 3.44it/s] 92%|█████████▏| 339993/371472 [4:53:07<2:36:10, 3.36it/s] 92%|█████████▏| 339994/371472 [4:53:07<2:40:57, 3.26it/s] 92%|█████████▏| 339995/371472 [4:53:08<2:35:20, 3.38it/s] 92%|█████████▏| 339996/371472 [4:53:08<2:32:25, 3.44it/s] 92%|█████████▏| 339997/371472 [4:53:08<2:37:09, 3.34it/s] 92%|█████████▏| 339998/371472 [4:53:08<2:32:39, 3.44it/s] 92%|█████████▏| 339999/371472 [4:53:09<2:30:15, 3.49it/s] 92%|█████████▏| 340000/371472 [4:53:09<2:37:21, 3.33it/s] {'loss': 2.6851, 'learning_rate': 1.762912366135879e-07, 'epoch': 14.64} + 92%|█████████▏| 340000/371472 [4:53:09<2:37:21, 3.33it/s] 92%|█████████▏| 340001/371472 [4:53:09<2:30:32, 3.48it/s] 92%|█████████▏| 340002/371472 [4:53:10<2:37:31, 3.33it/s] 92%|█████████▏| 340003/371472 [4:53:10<2:33:08, 3.42it/s] 92%|█████████▏| 340004/371472 [4:53:10<2:26:34, 3.58it/s] 92%|█████████▏| 340005/371472 [4:53:10<2:25:39, 3.60it/s] 92%|█████████▏| 340006/371472 [4:53:11<2:34:10, 3.40it/s] 92%|█████████▏| 340007/371472 [4:53:11<2:31:06, 3.47it/s] 92%|█████████▏| 340008/371472 [4:53:11<2:27:04, 3.57it/s] 92%|█████████▏| 340009/371472 [4:53:11<2:20:53, 3.72it/s] 92%|█████████▏| 340010/371472 [4:53:12<2:20:36, 3.73it/s] 92%|█████████▏| 340011/371472 [4:53:12<2:18:48, 3.78it/s] 92%|█████████▏| 340012/371472 [4:53:12<2:25:26, 3.61it/s] 92%|█████████▏| 340013/371472 [4:53:13<2:31:02, 3.47it/s] 92%|█████████▏| 340014/371472 [4:53:13<2:26:41, 3.57it/s] 92%|█████████▏| 340015/371472 [4:53:13<2:34:39, 3.39it/s] 92%|█████████▏| 340016/371472 [4:53:13<2:27:08, 3.56it/s] 92%|█████████▏| 340017/371472 [4:53:14<2:32:09, 3.45it/s] 92%|█████████▏| 340018/371472 [4:53:14<2:29:12, 3.51it/s] 92%|█████████▏| 340019/371472 [4:53:14<2:28:47, 3.52it/s] 92%|█████████▏| 340020/371472 [4:53:15<2:39:38, 3.28it/s] {'loss': 2.5789, 'learning_rate': 1.7624275463810897e-07, 'epoch': 14.65} + 92%|█████████▏| 340020/371472 [4:53:15<2:39:38, 3.28it/s] 92%|█████████▏| 340021/371472 [4:53:15<2:48:04, 3.12it/s] 92%|█████████▏| 340022/371472 [4:53:15<2:41:00, 3.26it/s] 92%|█████████▏| 340023/371472 [4:53:16<2:42:50, 3.22it/s] 92%|█████████▏| 340024/371472 [4:53:16<3:05:45, 2.82it/s] 92%|█████████▏| 340025/371472 [4:53:16<2:55:51, 2.98it/s] 92%|█████████▏| 340026/371472 [4:53:17<2:46:05, 3.16it/s] 92%|█████████▏| 340027/371472 [4:53:17<2:39:48, 3.28it/s] 92%|█████████▏| 340028/371472 [4:53:17<2:34:36, 3.39it/s] 92%|█████████▏| 340029/371472 [4:53:17<2:31:13, 3.47it/s] 92%|█████████▏| 340030/371472 [4:53:18<2:28:00, 3.54it/s] 92%|█████████▏| 340031/371472 [4:53:18<2:30:23, 3.48it/s] 92%|█████████▏| 340032/371472 [4:53:18<2:33:38, 3.41it/s] 92%|█████████▏| 340033/371472 [4:53:19<2:33:15, 3.42it/s] 92%|█████████▏| 340034/371472 [4:53:19<2:31:50, 3.45it/s] 92%|█████████▏| 340035/371472 [4:53:19<2:27:24, 3.55it/s] 92%|█████████▏| 340036/371472 [4:53:20<2:32:56, 3.43it/s] 92%|█████████▏| 340037/371472 [4:53:20<2:31:48, 3.45it/s] 92%|█████████▏| 340038/371472 [4:53:20<2:32:15, 3.44it/s] 92%|█████████▏| 340039/371472 [4:53:20<2:32:48, 3.43it/s] 92%|█████████▏| 340040/371472 [4:53:21<2:29:33, 3.50it/s] {'loss': 2.4464, 'learning_rate': 1.7619427266263015e-07, 'epoch': 14.65} + 92%|█████████▏| 340040/371472 [4:53:21<2:29:33, 3.50it/s] 92%|█████████▏| 340041/371472 [4:53:21<2:30:19, 3.48it/s] 92%|█████████▏| 340042/371472 [4:53:21<2:26:24, 3.58it/s] 92%|█████████▏| 340043/371472 [4:53:21<2:23:53, 3.64it/s] 92%|█████████▏| 340044/371472 [4:53:22<2:17:32, 3.81it/s] 92%|█████████▏| 340045/371472 [4:53:22<2:18:40, 3.78it/s] 92%|█████████▏| 340046/371472 [4:53:22<2:19:45, 3.75it/s] 92%|█████████▏| 340047/371472 [4:53:22<2:17:14, 3.82it/s] 92%|█████████▏| 340048/371472 [4:53:23<2:18:00, 3.79it/s] 92%|█████████▏| 340049/371472 [4:53:23<2:27:06, 3.56it/s] 92%|█████████▏| 340050/371472 [4:53:23<2:28:20, 3.53it/s] 92%|█████████▏| 340051/371472 [4:53:24<2:29:44, 3.50it/s] 92%|█████████▏| 340052/371472 [4:53:24<2:29:55, 3.49it/s] 92%|█████████▏| 340053/371472 [4:53:24<2:26:52, 3.57it/s] 92%|█████████▏| 340054/371472 [4:53:24<2:23:45, 3.64it/s] 92%|█████████▏| 340055/371472 [4:53:25<2:21:45, 3.69it/s] 92%|█████████▏| 340056/371472 [4:53:25<2:21:21, 3.70it/s] 92%|█████████▏| 340057/371472 [4:53:25<2:22:20, 3.68it/s] 92%|█████████▏| 340058/371472 [4:53:26<2:23:54, 3.64it/s] 92%|█████████▏| 340059/371472 [4:53:26<2:24:16, 3.63it/s] 92%|█████████▏| 340060/371472 [4:53:26<2:25:24, 3.60it/s] {'loss': 2.5959, 'learning_rate': 1.761457906871512e-07, 'epoch': 14.65} + 92%|█████████▏| 340060/371472 [4:53:26<2:25:24, 3.60it/s] 92%|█████████▏| 340061/371472 [4:53:26<2:34:39, 3.38it/s] 92%|█████████▏| 340062/371472 [4:53:27<2:31:00, 3.47it/s] 92%|█████████▏| 340063/371472 [4:53:27<2:27:49, 3.54it/s] 92%|█████████▏| 340064/371472 [4:53:27<2:28:48, 3.52it/s] 92%|█████████▏| 340065/371472 [4:53:28<2:30:11, 3.49it/s] 92%|█████████▏| 340066/371472 [4:53:28<2:25:52, 3.59it/s] 92%|█████████▏| 340067/371472 [4:53:28<2:33:08, 3.42it/s] 92%|█████████▏| 340068/371472 [4:53:28<2:28:38, 3.52it/s] 92%|█████████▏| 340069/371472 [4:53:29<2:28:21, 3.53it/s] 92%|█████████▏| 340070/371472 [4:53:29<2:41:26, 3.24it/s] 92%|█████████▏| 340071/371472 [4:53:29<2:31:59, 3.44it/s] 92%|█████████▏| 340072/371472 [4:53:30<2:25:33, 3.60it/s] 92%|█████████▏| 340073/371472 [4:53:30<2:41:29, 3.24it/s] 92%|█████████▏| 340074/371472 [4:53:30<2:33:26, 3.41it/s] 92%|█████████▏| 340075/371472 [4:53:30<2:32:00, 3.44it/s] 92%|█████████▏| 340076/371472 [4:53:31<2:38:50, 3.29it/s] 92%|█████████▏| 340077/371472 [4:53:31<2:32:38, 3.43it/s] 92%|████████���▏| 340078/371472 [4:53:31<2:25:07, 3.61it/s] 92%|█████████▏| 340079/371472 [4:53:32<2:24:22, 3.62it/s] 92%|█████████▏| 340080/371472 [4:53:32<2:28:58, 3.51it/s] {'loss': 2.6947, 'learning_rate': 1.7609730871167234e-07, 'epoch': 14.65} + 92%|█████████▏| 340080/371472 [4:53:32<2:28:58, 3.51it/s] 92%|█████████▏| 340081/371472 [4:53:32<2:25:05, 3.61it/s] 92%|█████████▏| 340082/371472 [4:53:32<2:28:53, 3.51it/s] 92%|█████████▏| 340083/371472 [4:53:33<2:31:43, 3.45it/s] 92%|█████████▏| 340084/371472 [4:53:33<2:27:30, 3.55it/s] 92%|█████████▏| 340085/371472 [4:53:33<2:22:45, 3.66it/s] 92%|█████████▏| 340086/371472 [4:53:34<2:19:20, 3.75it/s] 92%|█████████▏| 340087/371472 [4:53:34<2:17:20, 3.81it/s] 92%|█████████▏| 340088/371472 [4:53:34<2:18:00, 3.79it/s] 92%|█████████▏| 340089/371472 [4:53:34<2:24:07, 3.63it/s] 92%|█████████▏| 340090/371472 [4:53:35<2:22:00, 3.68it/s] 92%|█████████▏| 340091/371472 [4:53:35<2:18:39, 3.77it/s] 92%|█████████▏| 340092/371472 [4:53:35<2:16:51, 3.82it/s] 92%|█████████▏| 340093/371472 [4:53:35<2:17:16, 3.81it/s] 92%|█████████▏| 340094/371472 [4:53:36<2:27:33, 3.54it/s] 92%|█████████▏| 340095/371472 [4:53:36<2:29:05, 3.51it/s] 92%|█████████▏| 340096/371472 [4:53:36<2:33:35, 3.40it/s] 92%|█████████▏| 340097/371472 [4:53:37<2:28:20, 3.53it/s] 92%|█████████▏| 340098/371472 [4:53:37<2:27:57, 3.53it/s] 92%|█████████▏| 340099/371472 [4:53:37<2:22:42, 3.66it/s] 92%|█████████▏| 340100/371472 [4:53:37<2:24:31, 3.62it/s] {'loss': 2.5643, 'learning_rate': 1.760488267361934e-07, 'epoch': 14.65} + 92%|█████████▏| 340100/371472 [4:53:37<2:24:31, 3.62it/s] 92%|█████████▏| 340101/371472 [4:53:38<2:22:54, 3.66it/s] 92%|█████████▏| 340102/371472 [4:53:38<2:19:17, 3.75it/s] 92%|█████████▏| 340103/371472 [4:53:38<2:16:34, 3.83it/s] 92%|█████████▏| 340104/371472 [4:53:38<2:16:03, 3.84it/s] 92%|█████████▏| 340105/371472 [4:53:39<2:16:34, 3.83it/s] 92%|█████████▏| 340106/371472 [4:53:39<2:15:08, 3.87it/s] 92%|█████████▏| 340107/371472 [4:53:39<2:20:37, 3.72it/s] 92%|█████████▏| 340108/371472 [4:53:40<2:24:03, 3.63it/s] 92%|█████████▏| 340109/371472 [4:53:40<2:25:08, 3.60it/s] 92%|█████████▏| 340110/371472 [4:53:40<2:27:28, 3.54it/s] 92%|█████████▏| 340111/371472 [4:53:40<2:29:38, 3.49it/s] 92%|█████████▏| 340112/371472 [4:53:41<2:24:59, 3.60it/s] 92%|█████████▏| 340113/371472 [4:53:41<2:23:35, 3.64it/s] 92%|█████████▏| 340114/371472 [4:53:41<2:22:15, 3.67it/s] 92%|█████████▏| 340115/371472 [4:53:42<2:27:12, 3.55it/s] 92%|█████████▏| 340116/371472 [4:53:42<2:20:51, 3.71it/s] 92%|█████████▏| 340117/371472 [4:53:42<2:35:47, 3.35it/s] 92%|█████████▏| 340118/371472 [4:53:42<2:36:25, 3.34it/s] 92%|█████████▏| 340119/371472 [4:53:43<2:33:08, 3.41it/s] 92%|█████████▏| 340120/371472 [4:53:43<2:36:28, 3.34it/s] {'loss': 2.5277, 'learning_rate': 1.7600034476071446e-07, 'epoch': 14.65} + 92%|█████████▏| 340120/371472 [4:53:43<2:36:28, 3.34it/s] 92%|█████████▏| 340121/371472 [4:53:43<2:32:04, 3.44it/s] 92%|█████████▏| 340122/371472 [4:53:44<2:31:32, 3.45it/s] 92%|█████████▏| 340123/371472 [4:53:44<2:23:04, 3.65it/s] 92%|█████████▏| 340124/371472 [4:53:44<2:28:55, 3.51it/s] 92%|█████████▏| 340125/371472 [4:53:44<2:34:36, 3.38it/s] 92%|█████████▏| 340126/371472 [4:53:45<2:31:39, 3.44it/s] 92%|█████████▏| 340127/371472 [4:53:45<2:27:12, 3.55it/s] 92%|█████████▏| 340128/371472 [4:53:45<2:34:35, 3.38it/s] 92%|█████████▏| 340129/371472 [4:53:46<2:31:15, 3.45it/s] 92%|█████████▏| 340130/371472 [4:53:46<2:35:09, 3.37it/s] 92%|█████████▏| 340131/371472 [4:53:46<2:32:28, 3.43it/s] 92%|█████████▏| 340132/371472 [4:53:46<2:27:01, 3.55it/s] 92%|█████████▏| 340133/371472 [4:53:47<2:33:42, 3.40it/s] 92%|█████████▏| 340134/371472 [4:53:47<2:33:29, 3.40it/s] 92%|█████████▏| 340135/371472 [4:53:47<2:28:35, 3.51it/s] 92%|█████████▏| 340136/371472 [4:53:48<2:25:54, 3.58it/s] 92%|█████████▏| 340137/371472 [4:53:48<2:23:21, 3.64it/s] 92%|█████████▏| 340138/371472 [4:53:48<2:25:54, 3.58it/s] 92%|█████████▏| 340139/371472 [4:53:48<2:27:16, 3.55it/s] 92%|█████████▏| 340140/371472 [4:53:49<2:30:26, 3.47it/s] {'loss': 2.5237, 'learning_rate': 1.759518627852356e-07, 'epoch': 14.65} + 92%|█████████▏| 340140/371472 [4:53:49<2:30:26, 3.47it/s] 92%|█████████▏| 340141/371472 [4:53:49<2:26:04, 3.57it/s] 92%|█████████▏| 340142/371472 [4:53:49<2:31:46, 3.44it/s] 92%|█████████▏| 340143/371472 [4:53:50<2:35:11, 3.36it/s] 92%|█████████▏| 340144/371472 [4:53:50<2:29:26, 3.49it/s] 92%|█████████▏| 340145/371472 [4:53:50<2:29:57, 3.48it/s] 92%|█████████▏| 340146/371472 [4:53:50<2:26:07, 3.57it/s] 92%|█████████▏| 340147/371472 [4:53:51<2:23:33, 3.64it/s] 92%|█████████▏| 340148/371472 [4:53:51<2:28:43, 3.51it/s] 92%|█████████▏| 340149/371472 [4:53:51<2:31:38, 3.44it/s] 92%|█████████▏| 340150/371472 [4:53:52<2:29:20, 3.50it/s] 92%|█████████▏| 340151/371472 [4:53:52<2:31:59, 3.43it/s] 92%|█████████▏| 340152/371472 [4:53:52<2:28:50, 3.51it/s] 92%|█████████▏| 340153/371472 [4:53:52<2:22:43, 3.66it/s] 92%|█████████▏| 340154/371472 [4:53:53<2:19:37, 3.74it/s] 92%|█████████▏| 340155/371472 [4:53:53<2:19:51, 3.73it/s] 92%|█████████▏| 340156/371472 [4:53:53<2:20:57, 3.70it/s] 92%|█████████▏| 340157/371472 [4:53:54<2:32:05, 3.43it/s] 92%|█████████▏| 340158/371472 [4:53:54<2:29:42, 3.49it/s] 92%|█████████▏| 340159/371472 [4:53:54<2:28:30, 3.51it/s] 92%|█████████▏| 340160/371472 [4:53:54<2:24:42, 3.61it/s] {'loss': 2.6182, 'learning_rate': 1.7590338080975668e-07, 'epoch': 14.65} + 92%|█████████▏| 340160/371472 [4:53:54<2:24:42, 3.61it/s] 92%|█████████▏| 340161/371472 [4:53:55<2:22:43, 3.66it/s] 92%|█████████▏| 340162/371472 [4:53:55<2:21:19, 3.69it/s] 92%|█████████▏| 340163/371472 [4:53:55<2:24:18, 3.62it/s] 92%|█████████▏| 340164/371472 [4:53:55<2:31:00, 3.46it/s] 92%|█████████▏| 340165/371472 [4:53:56<2:25:07, 3.60it/s] 92%|█████████▏| 340166/371472 [4:53:56<2:31:15, 3.45it/s] 92%|█████████▏| 340167/371472 [4:53:56<2:26:56, 3.55it/s] 92%|█████████▏| 340168/371472 [4:53:57<2:30:25, 3.47it/s] 92%|█████████▏| 340169/371472 [4:53:57<2:29:29, 3.49it/s] 92%|█████████▏| 340170/371472 [4:53:57<2:24:41, 3.61it/s] 92%|█████████▏| 340171/371472 [4:53:57<2:22:12, 3.67it/s] 92%|█████████▏| 340172/371472 [4:53:58<2:19:40, 3.73it/s] 92%|█████████▏| 340173/371472 [4:53:58<2:17:16, 3.80it/s] 92%|█████████▏| 340174/371472 [4:53:58<2:22:10, 3.67it/s] 92%|█████████▏| 340175/371472 [4:53:58<2:21:43, 3.68it/s] 92%|█████████▏| 340176/371472 [4:53:59<2:21:44, 3.68it/s] 92%|█████████▏| 340177/371472 [4:53:59<2:37:19, 3.32it/s] 92%|█████████▏| 340178/371472 [4:53:59<2:35:16, 3.36it/s] 92%|█████████▏| 340179/371472 [4:54:00<2:33:53, 3.39it/s] 92%|█████████▏| 340180/371472 [4:54:00<2:31:41, 3.44it/s] {'loss': 2.6643, 'learning_rate': 1.7585489883427783e-07, 'epoch': 14.65} + 92%|█████████▏| 340180/371472 [4:54:00<2:31:41, 3.44it/s] 92%|█████████▏| 340181/371472 [4:54:00<2:29:43, 3.48it/s] 92%|█████████▏| 340182/371472 [4:54:01<2:36:13, 3.34it/s] 92%|█████████▏| 340183/371472 [4:54:01<2:29:38, 3.48it/s] 92%|█████████▏| 340184/371472 [4:54:01<2:29:33, 3.49it/s] 92%|█████████▏| 340185/371472 [4:54:01<2:34:05, 3.38it/s] 92%|█████████▏| 340186/371472 [4:54:02<2:29:43, 3.48it/s] 92%|█████████▏| 340187/371472 [4:54:02<2:26:04, 3.57it/s] 92%|█████████▏| 340188/371472 [4:54:02<2:21:18, 3.69it/s] 92%|█████████▏| 340189/371472 [4:54:03<2:22:46, 3.65it/s] 92%|███��█████▏| 340190/371472 [4:54:03<2:19:15, 3.74it/s] 92%|█████████▏| 340191/371472 [4:54:03<2:22:05, 3.67it/s] 92%|█████████▏| 340192/371472 [4:54:03<2:32:14, 3.42it/s] 92%|█████████▏| 340193/371472 [4:54:04<2:28:49, 3.50it/s] 92%|█████████▏| 340194/371472 [4:54:04<2:31:48, 3.43it/s] 92%|█████████▏| 340195/371472 [4:54:04<2:31:32, 3.44it/s] 92%|█████████▏| 340196/371472 [4:54:05<2:31:21, 3.44it/s] 92%|█████████▏| 340197/371472 [4:54:05<2:25:27, 3.58it/s] 92%|█████████▏| 340198/371472 [4:54:05<2:33:17, 3.40it/s] 92%|█████████▏| 340199/371472 [4:54:05<2:30:46, 3.46it/s] 92%|█████████▏| 340200/371472 [4:54:06<2:31:53, 3.43it/s] {'loss': 2.5842, 'learning_rate': 1.7580641685879888e-07, 'epoch': 14.65} + 92%|█████████▏| 340200/371472 [4:54:06<2:31:53, 3.43it/s] 92%|█████████▏| 340201/371472 [4:54:06<2:30:45, 3.46it/s] 92%|█████████▏| 340202/371472 [4:54:06<2:31:11, 3.45it/s] 92%|█████████▏| 340203/371472 [4:54:07<2:26:11, 3.56it/s] 92%|█████████▏| 340204/371472 [4:54:07<2:19:34, 3.73it/s] 92%|█████████▏| 340205/371472 [4:54:07<2:21:11, 3.69it/s] 92%|█████████▏| 340206/371472 [4:54:07<2:21:26, 3.68it/s] 92%|█████████▏| 340207/371472 [4:54:08<2:21:20, 3.69it/s] 92%|█████████▏| 340208/371472 [4:54:08<2:23:53, 3.62it/s] 92%|█████████▏| 340209/371472 [4:54:08<2:23:11, 3.64it/s] 92%|█████████▏| 340210/371472 [4:54:08<2:21:29, 3.68it/s] 92%|█████████▏| 340211/371472 [4:54:09<2:19:32, 3.73it/s] 92%|█████████▏| 340212/371472 [4:54:09<2:16:26, 3.82it/s] 92%|█████████▏| 340213/371472 [4:54:09<2:19:55, 3.72it/s] 92%|█████████▏| 340214/371472 [4:54:09<2:19:06, 3.74it/s] 92%|█████████▏| 340215/371472 [4:54:10<2:44:33, 3.17it/s] 92%|█████████▏| 340216/371472 [4:54:10<2:35:09, 3.36it/s] 92%|█████████▏| 340217/371472 [4:54:10<2:40:32, 3.24it/s] 92%|█████████▏| 340218/371472 [4:54:11<2:44:58, 3.16it/s] 92%|█████████▏| 340219/371472 [4:54:11<2:43:38, 3.18it/s] 92%|█████████▏| 340220/371472 [4:54:11<2:36:13, 3.33it/s] {'loss': 2.6119, 'learning_rate': 1.7575793488332005e-07, 'epoch': 14.65} + 92%|█████████▏| 340220/371472 [4:54:11<2:36:13, 3.33it/s] 92%|█████████▏| 340221/371472 [4:54:12<2:30:38, 3.46it/s] 92%|█████████▏| 340222/371472 [4:54:12<2:35:53, 3.34it/s] 92%|█████████▏| 340223/371472 [4:54:12<2:37:44, 3.30it/s] 92%|█████████▏| 340224/371472 [4:54:13<2:36:04, 3.34it/s] 92%|█████████▏| 340225/371472 [4:54:13<2:28:38, 3.50it/s] 92%|█████████▏| 340226/371472 [4:54:13<2:28:48, 3.50it/s] 92%|█████████▏| 340227/371472 [4:54:13<2:28:24, 3.51it/s] 92%|█████████▏| 340228/371472 [4:54:14<2:26:38, 3.55it/s] 92%|█████████▏| 340229/371472 [4:54:14<2:22:05, 3.66it/s] 92%|█████████▏| 340230/371472 [4:54:14<2:22:29, 3.65it/s] 92%|█████████▏| 340231/371472 [4:54:15<2:26:22, 3.56it/s] 92%|█████████▏| 340232/371472 [4:54:15<2:25:41, 3.57it/s] 92%|█████████▏| 340233/371472 [4:54:15<2:31:38, 3.43it/s] 92%|█████████▏| 340234/371472 [4:54:15<2:28:36, 3.50it/s] 92%|█████████▏| 340235/371472 [4:54:16<2:30:31, 3.46it/s] 92%|█████████▏| 340236/371472 [4:54:16<2:26:54, 3.54it/s] 92%|█████████▏| 340237/371472 [4:54:16<2:41:07, 3.23it/s] 92%|█████████▏| 340238/371472 [4:54:17<2:50:33, 3.05it/s] 92%|█████████▏| 340239/371472 [4:54:17<2:38:17, 3.29it/s] 92%|█████████▏| 340240/371472 [4:54:17<2:30:49, 3.45it/s] {'loss': 2.5157, 'learning_rate': 1.757094529078411e-07, 'epoch': 14.65} + 92%|█████████▏| 340240/371472 [4:54:17<2:30:49, 3.45it/s] 92%|█████████▏| 340241/371472 [4:54:18<2:33:07, 3.40it/s] 92%|█████████▏| 340242/371472 [4:54:18<2:30:07, 3.47it/s] 92%|█████████▏| 340243/371472 [4:54:18<2:22:42, 3.65it/s] 92%|█████████▏| 340244/371472 [4:54:18<2:22:41, 3.65it/s] 92%|█████████▏| 340245/371472 [4:54:19<2:25:28, 3.58it/s] 92%|█���███████▏| 340246/371472 [4:54:19<2:18:41, 3.75it/s] 92%|█████████▏| 340247/371472 [4:54:19<2:17:50, 3.78it/s] 92%|█████████▏| 340248/371472 [4:54:19<2:24:07, 3.61it/s] 92%|█████████▏| 340249/371472 [4:54:20<2:22:26, 3.65it/s] 92%|█████████▏| 340250/371472 [4:54:20<2:21:16, 3.68it/s] 92%|█████████▏| 340251/371472 [4:54:20<2:21:10, 3.69it/s] 92%|█████████▏| 340252/371472 [4:54:20<2:23:45, 3.62it/s] 92%|█████████▏| 340253/371472 [4:54:21<2:22:04, 3.66it/s] 92%|█████████▏| 340254/371472 [4:54:21<2:27:16, 3.53it/s] 92%|█████████▏| 340255/371472 [4:54:21<2:22:24, 3.65it/s] 92%|█████████▏| 340256/371472 [4:54:22<2:20:24, 3.71it/s] 92%|█████████▏| 340257/371472 [4:54:22<2:28:18, 3.51it/s] 92%|█████████▏| 340258/371472 [4:54:22<2:24:29, 3.60it/s] 92%|█████████▏| 340259/371472 [4:54:23<2:37:58, 3.29it/s] 92%|█████████▏| 340260/371472 [4:54:23<2:35:32, 3.34it/s] {'loss': 2.5149, 'learning_rate': 1.7566097093236225e-07, 'epoch': 14.66} + 92%|█████████▏| 340260/371472 [4:54:23<2:35:32, 3.34it/s] 92%|█████████▏| 340261/371472 [4:54:23<2:29:19, 3.48it/s] 92%|█████████▏| 340262/371472 [4:54:23<2:37:01, 3.31it/s] 92%|█████████▏| 340263/371472 [4:54:24<2:33:48, 3.38it/s] 92%|█████████▏| 340264/371472 [4:54:24<2:28:06, 3.51it/s] 92%|█████████▏| 340265/371472 [4:54:24<2:25:05, 3.58it/s] 92%|█████████▏| 340266/371472 [4:54:24<2:25:13, 3.58it/s] 92%|█████████▏| 340267/371472 [4:54:25<2:32:37, 3.41it/s] 92%|█████████▏| 340268/371472 [4:54:25<2:30:01, 3.47it/s] 92%|█████████▏| 340269/371472 [4:54:25<2:27:44, 3.52it/s] 92%|█████████▏| 340270/371472 [4:54:26<2:23:42, 3.62it/s] 92%|█████████▏| 340271/371472 [4:54:26<2:28:13, 3.51it/s] 92%|█████████▏| 340272/371472 [4:54:26<2:35:37, 3.34it/s] 92%|█████████▏| 340273/371472 [4:54:27<2:32:21, 3.41it/s] 92%|█████████▏| 340274/371472 [4:54:27<2:28:27, 3.50it/s] 92%|█████████▏| 340275/371472 [4:54:27<2:27:19, 3.53it/s] 92%|█████████▏| 340276/371472 [4:54:27<2:23:51, 3.61it/s] 92%|█████████▏| 340277/371472 [4:54:28<2:21:14, 3.68it/s] 92%|█████████▏| 340278/371472 [4:54:28<2:24:01, 3.61it/s] 92%|█████████▏| 340279/371472 [4:54:28<2:28:48, 3.49it/s] 92%|█████████▏| 340280/371472 [4:54:28<2:25:23, 3.58it/s] {'loss': 2.5127, 'learning_rate': 1.7561248895688332e-07, 'epoch': 14.66} + 92%|█████████▏| 340280/371472 [4:54:28<2:25:23, 3.58it/s] 92%|█████████▏| 340281/371472 [4:54:29<2:28:50, 3.49it/s] 92%|█████████▏| 340282/371472 [4:54:29<2:32:07, 3.42it/s] 92%|█████████▏| 340283/371472 [4:54:29<2:25:25, 3.57it/s] 92%|█████████▏| 340284/371472 [4:54:30<2:32:08, 3.42it/s] 92%|█████████▏| 340285/371472 [4:54:30<2:42:33, 3.20it/s] 92%|█████████▏| 340286/371472 [4:54:30<2:37:10, 3.31it/s] 92%|█████████▏| 340287/371472 [4:54:31<2:41:01, 3.23it/s] 92%|█████████▏| 340288/371472 [4:54:31<2:35:30, 3.34it/s] 92%|█████████▏| 340289/371472 [4:54:31<2:34:15, 3.37it/s] 92%|█████████▏| 340290/371472 [4:54:31<2:26:22, 3.55it/s] 92%|█████████▏| 340291/371472 [4:54:32<2:22:49, 3.64it/s] 92%|█████████▏| 340292/371472 [4:54:32<2:19:47, 3.72it/s] 92%|█████████▏| 340293/371472 [4:54:32<2:23:08, 3.63it/s] 92%|█████████▏| 340294/371472 [4:54:33<2:23:51, 3.61it/s] 92%|█████████▏| 340295/371472 [4:54:33<2:27:54, 3.51it/s] 92%|█████████▏| 340296/371472 [4:54:33<2:32:16, 3.41it/s] 92%|█████████▏| 340297/371472 [4:54:33<2:26:11, 3.55it/s] 92%|█████████▏| 340298/371472 [4:54:34<2:22:50, 3.64it/s] 92%|█████████▏| 340299/371472 [4:54:34<2:19:39, 3.72it/s] 92%|█████████▏| 340300/371472 [4:54:34<2:22:21, 3.65it/s] {'loss': 2.5413, 'learning_rate': 1.7556400698140447e-07, 'epoch': 14.66} + 92%|█████████▏| 340300/371472 [4:54:34<2:22:21, 3.65it/s] 92%|█████████▏| 340301/371472 [4:54:34<2:19:09, 3.73it/s] 92%|█████████▏| 340302/371472 [4:54:35<2:20:13, 3.70it/s] 92%|█████████▏| 340303/371472 [4:54:35<2:19:18, 3.73it/s] 92%|█████████▏| 340304/371472 [4:54:35<2:23:07, 3.63it/s] 92%|█████████▏| 340305/371472 [4:54:36<2:27:00, 3.53it/s] 92%|█████████▏| 340306/371472 [4:54:36<2:31:53, 3.42it/s] 92%|█████████▏| 340307/371472 [4:54:36<2:33:18, 3.39it/s] 92%|█████████▏| 340308/371472 [4:54:36<2:26:48, 3.54it/s] 92%|█████████▏| 340309/371472 [4:54:37<2:31:01, 3.44it/s] 92%|█████████▏| 340310/371472 [4:54:37<2:47:46, 3.10it/s] 92%|█████████▏| 340311/371472 [4:54:37<2:35:27, 3.34it/s] 92%|█████████▏| 340312/371472 [4:54:38<2:29:12, 3.48it/s] 92%|█████████▏| 340313/371472 [4:54:38<2:44:09, 3.16it/s] 92%|█████████▏| 340314/371472 [4:54:38<2:34:58, 3.35it/s] 92%|█████████▏| 340315/371472 [4:54:39<2:34:24, 3.36it/s] 92%|█████████▏| 340316/371472 [4:54:39<2:47:11, 3.11it/s] 92%|█████████▏| 340317/371472 [4:54:39<2:37:58, 3.29it/s] 92%|█████████▏| 340318/371472 [4:54:39<2:34:41, 3.36it/s] 92%|█████████▏| 340319/371472 [4:54:40<2:31:02, 3.44it/s] 92%|█████████▏| 340320/371472 [4:54:40<2:30:00, 3.46it/s] {'loss': 2.6602, 'learning_rate': 1.7551552500592552e-07, 'epoch': 14.66} + 92%|█████████▏| 340320/371472 [4:54:40<2:30:00, 3.46it/s] 92%|█████████▏| 340321/371472 [4:54:40<2:35:36, 3.34it/s] 92%|█████████▏| 340322/371472 [4:54:41<2:30:28, 3.45it/s] 92%|█████████▏| 340323/371472 [4:54:41<2:23:13, 3.62it/s] 92%|█████████▏| 340324/371472 [4:54:41<2:21:39, 3.66it/s] 92%|█████████▏| 340325/371472 [4:54:41<2:16:36, 3.80it/s] 92%|█████████▏| 340326/371472 [4:54:42<2:22:58, 3.63it/s] 92%|█████████▏| 340327/371472 [4:54:42<2:18:03, 3.76it/s] 92%|█████████▏| 340328/371472 [4:54:42<2:20:53, 3.68it/s] 92%|█████████▏| 340329/371472 [4:54:43<2:21:41, 3.66it/s] 92%|█████████▏| 340330/371472 [4:54:43<2:22:14, 3.65it/s] 92%|█████████▏| 340331/371472 [4:54:43<2:19:08, 3.73it/s] 92%|█████████▏| 340332/371472 [4:54:43<2:20:00, 3.71it/s] 92%|█████████▏| 340333/371472 [4:54:44<2:31:02, 3.44it/s] 92%|█████████▏| 340334/371472 [4:54:44<2:33:11, 3.39it/s] 92%|█████████▏| 340335/371472 [4:54:44<2:26:23, 3.55it/s] 92%|█████████▏| 340336/371472 [4:54:44<2:26:10, 3.55it/s] 92%|█████████▏| 340337/371472 [4:54:45<2:31:07, 3.43it/s] 92%|█████████▏| 340338/371472 [4:54:45<2:27:21, 3.52it/s] 92%|█████████▏| 340339/371472 [4:54:45<2:37:54, 3.29it/s] 92%|█████████▏| 340340/371472 [4:54:46<2:31:22, 3.43it/s] {'loss': 2.69, 'learning_rate': 1.754670430304467e-07, 'epoch': 14.66} + 92%|█████████▏| 340340/371472 [4:54:46<2:31:22, 3.43it/s] 92%|█████████▏| 340341/371472 [4:54:46<2:30:23, 3.45it/s] 92%|█████████▏| 340342/371472 [4:54:46<2:37:40, 3.29it/s] 92%|█████████▏| 340343/371472 [4:54:47<2:30:51, 3.44it/s] 92%|█████████▏| 340344/371472 [4:54:47<2:24:26, 3.59it/s] 92%|█████████▏| 340345/371472 [4:54:47<2:24:35, 3.59it/s] 92%|█████████▏| 340346/371472 [4:54:47<2:29:45, 3.46it/s] 92%|█████████▏| 340347/371472 [4:54:48<2:31:35, 3.42it/s] 92%|█████████▏| 340348/371472 [4:54:48<2:31:21, 3.43it/s] 92%|█████████▏| 340349/371472 [4:54:48<2:36:09, 3.32it/s] 92%|█████████▏| 340350/371472 [4:54:49<2:31:34, 3.42it/s] 92%|█████████▏| 340351/371472 [4:54:49<2:50:16, 3.05it/s] 92%|█████████▏| 340352/371472 [4:54:49<2:50:49, 3.04it/s] 92%|█████████▏| 340353/371472 [4:54:50<2:46:54, 3.11it/s] 92%|█████████▏| 340354/371472 [4:54:50<2:49:08, 3.07it/s] 92%|█████████▏| 340355/371472 [4:54:50<2:39:41, 3.25it/s] 92%|█████████▏| 340356/371472 [4:54:51<2:36:49, 3.31it/s] 92%|█████████▏| 340357/371472 [4:54:51<2:42:22, 3.19it/s] 92%|█████████▏| 340358/371472 [4:54:51<2:35:42, 3.33it/s] 92%|█████████▏| 340359/371472 [4:54:51<2:26:41, 3.54it/s] 92%|█████████▏| 340360/371472 [4:54:52<2:21:41, 3.66it/s] {'loss': 2.5693, 'learning_rate': 1.7541856105496777e-07, 'epoch': 14.66} + 92%|█████████▏| 340360/371472 [4:54:52<2:21:41, 3.66it/s] 92%|█████████▏| 340361/371472 [4:54:52<2:29:30, 3.47it/s] 92%|█████████▏| 340362/371472 [4:54:52<2:31:04, 3.43it/s] 92%|█████████▏| 340363/371472 [4:54:53<2:32:07, 3.41it/s] 92%|█████████▏| 340364/371472 [4:54:53<2:31:31, 3.42it/s] 92%|█████████▏| 340365/371472 [4:54:53<2:29:53, 3.46it/s] 92%|█████████▏| 340366/371472 [4:54:53<2:26:10, 3.55it/s] 92%|█████████▏| 340367/371472 [4:54:54<2:25:18, 3.57it/s] 92%|█████████▏| 340368/371472 [4:54:54<2:21:15, 3.67it/s] 92%|█████████▏| 340369/371472 [4:54:54<2:17:32, 3.77it/s] 92%|█████████▏| 340370/371472 [4:54:54<2:18:50, 3.73it/s] 92%|█████████▏| 340371/371472 [4:54:55<2:17:52, 3.76it/s] 92%|█████████▏| 340372/371472 [4:54:55<2:32:18, 3.40it/s] 92%|█████████▏| 340373/371472 [4:54:55<2:31:55, 3.41it/s] 92%|█████████▏| 340374/371472 [4:54:56<2:32:16, 3.40it/s] 92%|█████████▏| 340375/371472 [4:54:56<2:29:22, 3.47it/s] 92%|█████████▏| 340376/371472 [4:54:56<2:29:44, 3.46it/s] 92%|█████████▏| 340377/371472 [4:54:57<2:33:50, 3.37it/s] 92%|█████████▏| 340378/371472 [4:54:57<2:26:01, 3.55it/s] 92%|█████████▏| 340379/371472 [4:54:57<3:04:40, 2.81it/s] 92%|█████████▏| 340380/371472 [4:54:58<2:54:15, 2.97it/s] {'loss': 2.6219, 'learning_rate': 1.753700790794889e-07, 'epoch': 14.66} + 92%|█████████▏| 340380/371472 [4:54:58<2:54:15, 2.97it/s] 92%|█████████▏| 340381/371472 [4:54:58<2:42:36, 3.19it/s] 92%|█████████▏| 340382/371472 [4:54:58<2:33:50, 3.37it/s] 92%|█████████▏| 340383/371472 [4:54:58<2:38:04, 3.28it/s] 92%|█████████▏| 340384/371472 [4:54:59<2:32:29, 3.40it/s] 92%|█████████▏| 340385/371472 [4:54:59<2:39:51, 3.24it/s] 92%|█████████▏| 340386/371472 [4:54:59<2:34:41, 3.35it/s] 92%|█████████▏| 340387/371472 [4:55:00<2:29:49, 3.46it/s] 92%|█████████▏| 340388/371472 [4:55:00<2:39:30, 3.25it/s] 92%|█████████▏| 340389/371472 [4:55:00<2:32:11, 3.40it/s] 92%|█████████▏| 340390/371472 [4:55:00<2:26:52, 3.53it/s] 92%|█████████▏| 340391/371472 [4:55:01<2:35:42, 3.33it/s] 92%|█████████▏| 340392/371472 [4:55:01<2:35:28, 3.33it/s] 92%|█████████▏| 340393/371472 [4:55:01<2:31:49, 3.41it/s] 92%|█████████▏| 340394/371472 [4:55:02<2:24:39, 3.58it/s] 92%|█████████▏| 340395/371472 [4:55:02<2:25:25, 3.56it/s] 92%|█████████▏| 340396/371472 [4:55:02<2:31:56, 3.41it/s] 92%|█████████▏| 340397/371472 [4:55:02<2:26:58, 3.52it/s] 92%|█████████▏| 340398/371472 [4:55:03<2:22:41, 3.63it/s] 92%|█████████▏| 340399/371472 [4:55:03<2:22:10, 3.64it/s] 92%|█████████▏| 340400/371472 [4:55:03<2:20:19, 3.69it/s] {'loss': 2.6203, 'learning_rate': 1.7532159710400996e-07, 'epoch': 14.66} + 92%|█████████▏| 340400/371472 [4:55:03<2:20:19, 3.69it/s] 92%|█████████▏| 340401/371472 [4:55:04<2:16:19, 3.80it/s] 92%|█████████▏| 340402/371472 [4:55:04<2:18:14, 3.75it/s] 92%|█████████▏| 340403/371472 [4:55:04<2:19:08, 3.72it/s] 92%|█████████▏| 340404/371472 [4:55:04<2:27:02, 3.52it/s] 92%|█████████▏| 340405/371472 [4:55:05<2:32:04, 3.40it/s] 92%|█████████▏| 340406/371472 [4:55:05<2:30:12, 3.45it/s] 92%|█████████▏| 340407/371472 [4:55:05<2:23:02, 3.62it/s] 92%|█████████▏| 340408/371472 [4:55:06<2:27:50, 3.50it/s] 92%|█████████▏| 340409/371472 [4:55:06<2:42:18, 3.19it/s] 92%|█████████▏| 340410/371472 [4:55:06<2:37:37, 3.28it/s] 92%|█████████▏| 340411/371472 [4:55:07<2:40:19, 3.23it/s] 92%|█████████▏| 340412/371472 [4:55:07<2:38:33, 3.26it/s] 92%|█████████▏| 340413/371472 [4:55:07<2:38:17, 3.27it/s] 92%|█████████▏| 340414/371472 [4:55:07<2:41:53, 3.20it/s] 92%|█████████▏| 340415/371472 [4:55:08<2:40:12, 3.23it/s] 92%|█████████▏| 340416/371472 [4:55:08<2:36:19, 3.31it/s] 92%|█████████▏| 340417/371472 [4:55:08<2:29:36, 3.46it/s] 92%|█████████▏| 340418/371472 [4:55:09<2:28:58, 3.47it/s] 92%|█████████▏| 340419/371472 [4:55:09<2:33:21, 3.37it/s] 92%|█████████▏| 340420/371472 [4:55:09<2:26:36, 3.53it/s] {'loss': 2.6107, 'learning_rate': 1.7527311512853114e-07, 'epoch': 14.66} + 92%|█████████▏| 340420/371472 [4:55:09<2:26:36, 3.53it/s] 92%|█████████▏| 340421/371472 [4:55:09<2:29:37, 3.46it/s] 92%|█████████▏| 340422/371472 [4:55:10<2:33:44, 3.37it/s] 92%|█████████▏| 340423/371472 [4:55:10<2:36:11, 3.31it/s] 92%|█████████▏| 340424/371472 [4:55:10<2:34:26, 3.35it/s] 92%|█████████▏| 340425/371472 [4:55:11<2:28:05, 3.49it/s] 92%|█████████▏| 340426/371472 [4:55:11<2:29:33, 3.46it/s] 92%|█████████▏| 340427/371472 [4:55:11<2:23:43, 3.60it/s] 92%|█████████▏| 340428/371472 [4:55:11<2:26:51, 3.52it/s] 92%|█████████▏| 340429/371472 [4:55:12<2:21:31, 3.66it/s] 92%|█████████▏| 340430/371472 [4:55:12<2:27:32, 3.51it/s] 92%|█████████▏| 340431/371472 [4:55:12<2:33:28, 3.37it/s] 92%|█████████▏| 340432/371472 [4:55:13<2:33:21, 3.37it/s] 92%|█████████▏| 340433/371472 [4:55:13<2:29:38, 3.46it/s] 92%|█████████▏| 340434/371472 [4:55:13<2:30:59, 3.43it/s] 92%|█████████▏| 340435/371472 [4:55:14<2:27:29, 3.51it/s] 92%|█████████▏| 340436/371472 [4:55:14<2:34:07, 3.36it/s] 92%|█████████▏| 340437/371472 [4:55:14<2:42:37, 3.18it/s] 92%|█████████▏| 340438/371472 [4:55:14<2:34:41, 3.34it/s] 92%|█████████▏| 340439/371472 [4:55:15<2:37:06, 3.29it/s] 92%|█████████▏| 340440/371472 [4:55:15<2:40:35, 3.22it/s] {'loss': 2.5456, 'learning_rate': 1.7522463315305218e-07, 'epoch': 14.66} + 92%|█████████▏| 340440/371472 [4:55:15<2:40:35, 3.22it/s] 92%|█████████▏| 340441/371472 [4:55:15<2:32:19, 3.40it/s] 92%|█████████▏| 340442/371472 [4:55:16<2:23:59, 3.59it/s] 92%|█████████▏| 340443/371472 [4:55:16<2:24:36, 3.58it/s] 92%|█████████▏| 340444/371472 [4:55:16<2:22:58, 3.62it/s] 92%|█████████▏| 340445/371472 [4:55:16<2:17:31, 3.76it/s] 92%|█████████▏| 340446/371472 [4:55:17<2:21:36, 3.65it/s] 92%|█████████▏| 340447/371472 [4:55:17<2:25:27, 3.55it/s] 92%|█████████▏| 340448/371472 [4:55:17<2:22:27, 3.63it/s] 92%|█████████▏| 340449/371472 [4:55:18<2:20:29, 3.68it/s] 92%|█████████▏| 340450/371472 [4:55:18<2:15:26, 3.82it/s] 92%|█████████▏| 340451/371472 [4:55:18<2:15:02, 3.83it/s] 92%|█████████▏| 340452/371472 [4:55:18<2:15:17, 3.82it/s] 92%|█████████▏| 340453/371472 [4:55:19<2:14:45, 3.84it/s] 92%|█████████▏| 340454/371472 [4:55:19<2:25:59, 3.54it/s] 92%|█████████▏| 340455/371472 [4:55:19<2:21:10, 3.66it/s] 92%|█████████▏| 340456/371472 [4:55:19<2:25:34, 3.55it/s] 92%|█████████▏| 340457/371472 [4:55:20<2:22:58, 3.62it/s] 92%|█████████▏| 340458/371472 [4:55:20<2:33:13, 3.37it/s] 92%|█████████▏| 340459/371472 [4:55:20<2:30:49, 3.43it/s] 92%|█████████▏| 340460/371472 [4:55:21<2:27:56, 3.49it/s] {'loss': 2.6728, 'learning_rate': 1.7517615117757334e-07, 'epoch': 14.66} + 92%|█████████▏| 340460/371472 [4:55:21<2:27:56, 3.49it/s] 92%|█████████▏| 340461/371472 [4:55:21<2:27:28, 3.50it/s] 92%|█████████▏| 340462/371472 [4:55:21<2:22:00, 3.64it/s] 92%|█████████▏| 340463/371472 [4:55:21<2:18:27, 3.73it/s] 92%|█████████▏| 340464/371472 [4:55:22<2:23:33, 3.60it/s] 92%|█████████▏| 340465/371472 [4:55:22<2:21:34, 3.65it/s] 92%|█████████▏| 340466/371472 [4:55:22<2:20:30, 3.68it/s] 92%|█████████▏| 340467/371472 [4:55:22<2:21:26, 3.65it/s] 92%|█████████▏| 340468/371472 [4:55:23<2:27:21, 3.51it/s] 92%|█████████▏| 340469/371472 [4:55:23<2:35:07, 3.33it/s] 92%|█████████▏| 340470/371472 [4:55:23<2:32:34, 3.39it/s] 92%|█████████▏| 340471/371472 [4:55:24<2:28:27, 3.48it/s] 92%|█████████▏| 340472/371472 [4:55:24<2:24:14, 3.58it/s] 92%|█████████▏| 340473/371472 [4:55:24<2:23:08, 3.61it/s] 92%|█████████▏| 340474/371472 [4:55:25<2:28:33, 3.48it/s] 92%|█████████▏| 340475/371472 [4:55:25<2:35:20, 3.33it/s] 92%|█████████▏| 340476/371472 [4:55:25<2:32:58, 3.38it/s] 92%|█████████▏| 340477/371472 [4:55:26<2:56:34, 2.93it/s] 92%|█████████▏| 340478/371472 [4:55:26<2:52:46, 2.99it/s] 92%|█████████▏| 340479/371472 [4:55:26<2:47:13, 3.09it/s] 92%|█████████▏| 340480/371472 [4:55:27<2:45:49, 3.11it/s] {'loss': 2.5568, 'learning_rate': 1.751276692020944e-07, 'epoch': 14.67} + 92%|█████████▏| 340480/371472 [4:55:27<2:45:49, 3.11it/s] 92%|█████████▏| 340481/371472 [4:55:27<2:43:23, 3.16it/s] 92%|█████████▏| 340482/371472 [4:55:27<2:42:49, 3.17it/s] 92%|█████████▏| 340483/371472 [4:55:27<2:36:51, 3.29it/s] 92%|█████████▏| 340484/371472 [4:55:28<2:33:49, 3.36it/s] 92%|█████████▏| 340485/371472 [4:55:28<2:27:13, 3.51it/s] 92%|█████████▏| 340486/371472 [4:55:28<2:25:03, 3.56it/s] 92%|█████████▏| 340487/371472 [4:55:28<2:21:53, 3.64it/s] 92%|█████████▏| 340488/371472 [4:55:29<2:33:08, 3.37it/s] 92%|█████████▏| 340489/371472 [4:55:29<2:29:38, 3.45it/s] 92%|█████████▏| 340490/371472 [4:55:29<2:25:01, 3.56it/s] 92%|█████████▏| 340491/371472 [4:55:30<2:30:30, 3.43it/s] 92%|█████████▏| 340492/371472 [4:55:30<2:28:22, 3.48it/s] 92%|█████████▏| 340493/371472 [4:55:30<2:29:14, 3.46it/s] 92%|█████████▏| 340494/371472 [4:55:31<2:29:43, 3.45it/s] 92%|█████████▏| 340495/371472 [4:55:31<2:24:56, 3.56it/s] 92%|█████████▏| 340496/371472 [4:55:31<2:33:08, 3.37it/s] 92%|█████████▏| 340497/371472 [4:55:31<2:26:30, 3.52it/s] 92%|█████████▏| 340498/371472 [4:55:32<2:21:34, 3.65it/s] 92%|█████████▏| 340499/371472 [4:55:32<2:19:57, 3.69it/s] 92%|█████████▏| 340500/371472 [4:55:32<2:18:29, 3.73it/s] {'loss': 2.6637, 'learning_rate': 1.7507918722661556e-07, 'epoch': 14.67} + 92%|█████████▏| 340500/371472 [4:55:32<2:18:29, 3.73it/s] 92%|█████████▏| 340501/371472 [4:55:32<2:21:11, 3.66it/s] 92%|█████████▏| 340502/371472 [4:55:33<2:24:54, 3.56it/s] 92%|█████████▏| 340503/371472 [4:55:33<2:25:34, 3.55it/s] 92%|█████████▏| 340504/371472 [4:55:33<2:29:52, 3.44it/s] 92%|█████████▏| 340505/371472 [4:55:34<2:29:10, 3.46it/s] 92%|█████████▏| 340506/371472 [4:55:34<2:40:18, 3.22it/s] 92%|█████████▏| 340507/371472 [4:55:34<2:37:50, 3.27it/s] 92%|█████████▏| 340508/371472 [4:55:35<2:35:45, 3.31it/s] 92%|█████████▏| 340509/371472 [4:55:35<2:32:46, 3.38it/s] 92%|█████████▏| 340510/371472 [4:55:35<2:25:43, 3.54it/s] 92%|█████████▏| 340511/371472 [4:55:35<2:28:49, 3.47it/s] 92%|█████████▏| 340512/371472 [4:55:36<2:26:10, 3.53it/s] 92%|█████████▏| 340513/371472 [4:55:36<2:24:05, 3.58it/s] 92%|█████████▏| 340514/371472 [4:55:36<2:36:53, 3.29it/s] 92%|█████████▏| 340515/371472 [4:55:37<2:35:52, 3.31it/s] 92%|█████████▏| 340516/371472 [4:55:37<2:27:00, 3.51it/s] 92%|█████████▏| 340517/371472 [4:55:37<2:25:19, 3.55it/s] 92%|█████████▏| 340518/371472 [4:55:37<2:20:37, 3.67it/s] 92%|█████████▏| 340519/371472 [4:55:38<2:15:55, 3.80it/s] 92%|█████████▏| 340520/371472 [4:55:38<2:23:16, 3.60it/s] {'loss': 2.4692, 'learning_rate': 1.750307052511366e-07, 'epoch': 14.67} + 92%|█████████▏| 340520/371472 [4:55:38<2:23:16, 3.60it/s] 92%|█████████▏| 340521/371472 [4:55:38<2:21:26, 3.65it/s] 92%|█████████▏| 340522/371472 [4:55:38<2:25:28, 3.55it/s] 92%|█████████▏| 340523/371472 [4:55:39<2:20:39, 3.67it/s] 92%|█████████▏| 340524/371472 [4:55:39<2:23:35, 3.59it/s] 92%|█████████▏| 340525/371472 [4:55:39<2:25:44, 3.54it/s] 92%|█████████▏| 340526/371472 [4:55:40<2:20:55, 3.66it/s] 92%|█████████▏| 340527/371472 [4:55:40<2:26:08, 3.53it/s] 92%|████████��▏| 340528/371472 [4:55:40<2:32:19, 3.39it/s] 92%|█████████▏| 340529/371472 [4:55:41<2:32:45, 3.38it/s] 92%|█████████▏| 340530/371472 [4:55:41<2:31:00, 3.42it/s] 92%|█████████▏| 340531/371472 [4:55:41<2:28:16, 3.48it/s] 92%|█████████▏| 340532/371472 [4:55:41<2:27:16, 3.50it/s] 92%|█████████▏| 340533/371472 [4:55:42<2:28:31, 3.47it/s] 92%|█████████▏| 340534/371472 [4:55:42<2:38:33, 3.25it/s] 92%|█████████▏| 340535/371472 [4:55:42<2:33:39, 3.36it/s] 92%|█████████▏| 340536/371472 [4:55:43<2:30:48, 3.42it/s] 92%|█████████▏| 340537/371472 [4:55:43<2:42:21, 3.18it/s] 92%|█████████▏| 340538/371472 [4:55:43<2:48:01, 3.07it/s] 92%|█████████▏| 340539/371472 [4:55:44<2:38:10, 3.26it/s] 92%|█████████▏| 340540/371472 [4:55:44<2:32:27, 3.38it/s] {'loss': 2.6168, 'learning_rate': 1.7498222327565778e-07, 'epoch': 14.67} + 92%|█████████▏| 340540/371472 [4:55:44<2:32:27, 3.38it/s] 92%|█████████▏| 340541/371472 [4:55:44<2:29:30, 3.45it/s] 92%|█████████▏| 340542/371472 [4:55:44<2:29:50, 3.44it/s] 92%|█████████▏| 340543/371472 [4:55:45<2:33:09, 3.37it/s] 92%|█████████▏| 340544/371472 [4:55:45<2:40:53, 3.20it/s] 92%|█████████▏| 340545/371472 [4:55:45<2:38:22, 3.25it/s] 92%|█████████▏| 340546/371472 [4:55:46<2:39:13, 3.24it/s] 92%|█████████▏| 340547/371472 [4:55:46<2:33:03, 3.37it/s] 92%|█████████▏| 340548/371472 [4:55:46<2:32:31, 3.38it/s] 92%|█████████▏| 340549/371472 [4:55:47<2:43:33, 3.15it/s] 92%|█████████▏| 340550/371472 [4:55:47<2:34:51, 3.33it/s] 92%|█████████▏| 340551/371472 [4:55:47<2:28:23, 3.47it/s] 92%|█████████▏| 340552/371472 [4:55:47<2:30:46, 3.42it/s] 92%|█████████▏| 340553/371472 [4:55:48<2:22:10, 3.62it/s] 92%|█████████▏| 340554/371472 [4:55:48<2:20:38, 3.66it/s] 92%|█████████▏| 340555/371472 [4:55:48<2:26:23, 3.52it/s] 92%|█████████▏| 340556/371472 [4:55:48<2:26:34, 3.52it/s] 92%|█████████▏| 340557/371472 [4:55:49<2:26:36, 3.51it/s] 92%|█████████▏| 340558/371472 [4:55:49<2:28:46, 3.46it/s] 92%|█████████▏| 340559/371472 [4:55:49<2:28:01, 3.48it/s] 92%|█████████▏| 340560/371472 [4:55:50<2:24:21, 3.57it/s] {'loss': 2.7016, 'learning_rate': 1.7493374130017883e-07, 'epoch': 14.67} + 92%|█████████▏| 340560/371472 [4:55:50<2:24:21, 3.57it/s] 92%|█████████▏| 340561/371472 [4:55:50<2:23:14, 3.60it/s] 92%|█████████▏| 340562/371472 [4:55:50<2:31:49, 3.39it/s] 92%|█████████▏| 340563/371472 [4:55:51<2:42:28, 3.17it/s] 92%|█████████▏| 340564/371472 [4:55:51<2:39:32, 3.23it/s] 92%|█████████▏| 340565/371472 [4:55:51<2:34:53, 3.33it/s] 92%|█████████▏| 340566/371472 [4:55:51<2:37:17, 3.27it/s] 92%|█████████▏| 340567/371472 [4:55:52<2:34:22, 3.34it/s] 92%|█████████▏| 340568/371472 [4:55:52<3:06:48, 2.76it/s] 92%|█████████▏| 340569/371472 [4:55:53<2:58:17, 2.89it/s] 92%|█████████▏| 340570/371472 [4:55:53<2:47:06, 3.08it/s] 92%|█████████▏| 340571/371472 [4:55:53<2:45:10, 3.12it/s] 92%|█████████▏| 340572/371472 [4:55:53<2:35:11, 3.32it/s] 92%|█████████▏| 340573/371472 [4:55:54<2:29:55, 3.43it/s] 92%|█████████▏| 340574/371472 [4:55:54<2:26:49, 3.51it/s] 92%|█████████▏| 340575/371472 [4:55:54<2:27:50, 3.48it/s] 92%|█████████▏| 340576/371472 [4:55:55<2:22:39, 3.61it/s] 92%|█████████▏| 340577/371472 [4:55:55<2:23:11, 3.60it/s] 92%|█████████▏| 340578/371472 [4:55:55<2:21:44, 3.63it/s] 92%|█████████▏| 340579/371472 [4:55:55<2:20:56, 3.65it/s] 92%|█████████▏| 340580/371472 [4:55:56<2:23:20, 3.59it/s] {'loss': 2.6794, 'learning_rate': 1.7488525932469998e-07, 'epoch': 14.67} + 92%|█████████▏| 340580/371472 [4:55:56<2:23:20, 3.59it/s] 92%|█████████▏| 340581/371472 [4:55:56<2:26:35, 3.51it/s] 92%|█████████▏| 340582/371472 [4:55:56<2:27:34, 3.49it/s] 92%|█████████▏| 340583/371472 [4:55:57<2:27:52, 3.48it/s] 92%|█████████▏| 340584/371472 [4:55:57<2:24:55, 3.55it/s] 92%|█████████▏| 340585/371472 [4:55:57<2:22:14, 3.62it/s] 92%|█████████▏| 340586/371472 [4:55:57<2:20:17, 3.67it/s] 92%|█████████▏| 340587/371472 [4:55:58<2:19:50, 3.68it/s] 92%|█████████▏| 340588/371472 [4:55:58<2:17:46, 3.74it/s] 92%|█████████▏| 340589/371472 [4:55:58<2:16:07, 3.78it/s] 92%|█████████▏| 340590/371472 [4:55:58<2:14:36, 3.82it/s] 92%|█████████▏| 340591/371472 [4:55:59<2:15:29, 3.80it/s] 92%|█████████▏| 340592/371472 [4:55:59<2:14:03, 3.84it/s] 92%|█████████▏| 340593/371472 [4:55:59<2:14:50, 3.82it/s] 92%|█████████▏| 340594/371472 [4:55:59<2:12:52, 3.87it/s] 92%|█████████▏| 340595/371472 [4:56:00<2:16:37, 3.77it/s] 92%|█████████▏| 340596/371472 [4:56:00<2:14:12, 3.83it/s] 92%|█████████▏| 340597/371472 [4:56:00<2:19:18, 3.69it/s] 92%|█████████▏| 340598/371472 [4:56:00<2:16:42, 3.76it/s] 92%|█████████▏| 340599/371472 [4:56:01<2:22:22, 3.61it/s] 92%|█████████▏| 340600/371472 [4:56:01<2:22:00, 3.62it/s] {'loss': 2.698, 'learning_rate': 1.7483677734922105e-07, 'epoch': 14.67} + 92%|█████████▏| 340600/371472 [4:56:01<2:22:00, 3.62it/s] 92%|█████████▏| 340601/371472 [4:56:01<2:18:54, 3.70it/s] 92%|█████████▏| 340602/371472 [4:56:02<2:20:50, 3.65it/s] 92%|█████████▏| 340603/371472 [4:56:02<2:25:18, 3.54it/s] 92%|█████████▏| 340604/371472 [4:56:02<2:23:23, 3.59it/s] 92%|█████████▏| 340605/371472 [4:56:03<2:35:01, 3.32it/s] 92%|█████████▏| 340606/371472 [4:56:03<2:27:49, 3.48it/s] 92%|█████████▏| 340607/371472 [4:56:03<2:25:45, 3.53it/s] 92%|█████████▏| 340608/371472 [4:56:03<2:48:38, 3.05it/s] 92%|█████████▏| 340609/371472 [4:56:04<2:37:21, 3.27it/s] 92%|█████████▏| 340610/371472 [4:56:04<2:34:48, 3.32it/s] 92%|█████████▏| 340611/371472 [4:56:04<2:43:56, 3.14it/s] 92%|█████████▏| 340612/371472 [4:56:05<2:40:21, 3.21it/s] 92%|█████████▏| 340613/371472 [4:56:05<2:34:55, 3.32it/s] 92%|█████████▏| 340614/371472 [4:56:05<2:26:32, 3.51it/s] 92%|█████████▏| 340615/371472 [4:56:05<2:27:53, 3.48it/s] 92%|█████████▏| 340616/371472 [4:56:06<2:19:38, 3.68it/s] 92%|█████████▏| 340617/371472 [4:56:06<2:23:31, 3.58it/s] 92%|█████████▏| 340618/371472 [4:56:06<2:28:21, 3.47it/s] 92%|█████████▏| 340619/371472 [4:56:07<2:28:16, 3.47it/s] 92%|█████████▏| 340620/371472 [4:56:07<2:22:58, 3.60it/s] {'loss': 2.4233, 'learning_rate': 1.747882953737422e-07, 'epoch': 14.67} + 92%|█████████▏| 340620/371472 [4:56:07<2:22:58, 3.60it/s] 92%|█████████▏| 340621/371472 [4:56:07<2:24:11, 3.57it/s] 92%|█████████▏| 340622/371472 [4:56:07<2:30:51, 3.41it/s] 92%|█████████▏| 340623/371472 [4:56:08<2:31:19, 3.40it/s] 92%|█████████▏| 340624/371472 [4:56:08<2:30:40, 3.41it/s] 92%|█████████▏| 340625/371472 [4:56:08<2:22:53, 3.60it/s] 92%|█████████▏| 340626/371472 [4:56:09<2:40:18, 3.21it/s] 92%|█████████▏| 340627/371472 [4:56:09<2:37:38, 3.26it/s] 92%|█████████▏| 340628/371472 [4:56:09<2:35:20, 3.31it/s] 92%|█████████▏| 340629/371472 [4:56:10<2:32:39, 3.37it/s] 92%|█████████▏| 340630/371472 [4:56:10<2:34:45, 3.32it/s] 92%|█████████▏| 340631/371472 [4:56:10<2:42:22, 3.17it/s] 92%|█████████▏| 340632/371472 [4:56:10<2:33:05, 3.36it/s] 92%|█████████▏| 340633/371472 [4:56:11<2:28:53, 3.45it/s] 92%|█████████▏| 340634/371472 [4:56:11<2:30:01, 3.43it/s] 92%|█████████▏| 340635/371472 [4:56:11<2:46:24, 3.09it/s] 92%|█████████▏| 340636/371472 [4:56:12<2:38:40, 3.24it/s] 92%|█████████▏| 340637/371472 [4:56:12<2:35:32, 3.30it/s] 92%|█████████▏| 340638/371472 [4:56:12<2:29:37, 3.43it/s] 92%|█████████▏| 340639/371472 [4:56:13<2:47:22, 3.07it/s] 92%|█████████▏| 340640/371472 [4:56:13<2:36:11, 3.29it/s] {'loss': 2.6859, 'learning_rate': 1.7473981339826324e-07, 'epoch': 14.67} + 92%|█████████▏| 340640/371472 [4:56:13<2:36:11, 3.29it/s] 92%|█████████▏| 340641/371472 [4:56:13<2:36:55, 3.27it/s] 92%|█████████▏| 340642/371472 [4:56:14<2:30:44, 3.41it/s] 92%|█████████▏| 340643/371472 [4:56:14<2:25:01, 3.54it/s] 92%|█████████▏| 340644/371472 [4:56:14<2:22:34, 3.60it/s] 92%|█████████▏| 340645/371472 [4:56:14<2:36:58, 3.27it/s] 92%|█████████▏| 340646/371472 [4:56:15<2:37:55, 3.25it/s] 92%|█████████▏| 340647/371472 [4:56:15<2:31:37, 3.39it/s] 92%|█████████▏| 340648/371472 [4:56:15<2:29:40, 3.43it/s] 92%|█████████▏| 340649/371472 [4:56:16<2:27:32, 3.48it/s] 92%|█████████▏| 340650/371472 [4:56:16<2:27:47, 3.48it/s] 92%|█████████▏| 340651/371472 [4:56:16<2:21:03, 3.64it/s] 92%|█████████▏| 340652/371472 [4:56:16<2:19:56, 3.67it/s] 92%|█████████▏| 340653/371472 [4:56:17<2:19:14, 3.69it/s] 92%|█████████▏| 340654/371472 [4:56:17<2:22:12, 3.61it/s] 92%|█████████▏| 340655/371472 [4:56:17<2:20:17, 3.66it/s] 92%|█████████▏| 340656/371472 [4:56:17<2:17:50, 3.73it/s] 92%|█████████▏| 340657/371472 [4:56:18<2:23:01, 3.59it/s] 92%|█████████▏| 340658/371472 [4:56:18<2:21:17, 3.63it/s] 92%|█████████▏| 340659/371472 [4:56:18<2:21:44, 3.62it/s] 92%|█████████▏| 340660/371472 [4:56:19<2:21:45, 3.62it/s] {'loss': 2.8167, 'learning_rate': 1.7469133142278431e-07, 'epoch': 14.67} + 92%|█████████▏| 340660/371472 [4:56:19<2:21:45, 3.62it/s] 92%|█████████▏| 340661/371472 [4:56:19<2:17:52, 3.72it/s] 92%|█████████▏| 340662/371472 [4:56:19<2:17:19, 3.74it/s] 92%|█████████▏| 340663/371472 [4:56:19<2:18:50, 3.70it/s] 92%|█████████▏| 340664/371472 [4:56:20<2:15:36, 3.79it/s] 92%|█████████▏| 340665/371472 [4:56:20<2:28:25, 3.46it/s] 92%|█████████▏| 340666/371472 [4:56:20<2:20:48, 3.65it/s] 92%|█████████▏| 340667/371472 [4:56:20<2:19:48, 3.67it/s] 92%|█████████▏| 340668/371472 [4:56:21<2:34:07, 3.33it/s] 92%|█████████▏| 340669/371472 [4:56:21<2:30:44, 3.41it/s] 92%|█████████▏| 340670/371472 [4:56:21<2:26:48, 3.50it/s] 92%|█████████▏| 340671/371472 [4:56:22<2:31:55, 3.38it/s] 92%|█████████▏| 340672/371472 [4:56:22<2:31:01, 3.40it/s] 92%|█████████▏| 340673/371472 [4:56:22<2:30:02, 3.42it/s] 92%|█████████▏| 340674/371472 [4:56:23<2:27:33, 3.48it/s] 92%|█████████▏| 340675/371472 [4:56:23<2:22:56, 3.59it/s] 92%|█████████▏| 340676/371472 [4:56:23<2:20:10, 3.66it/s] 92%|█████████▏| 340677/371472 [4:56:23<2:31:19, 3.39it/s] 92%|█████████▏| 340678/371472 [4:56:24<2:30:26, 3.41it/s] 92%|█████████▏| 340679/371472 [4:56:24<2:33:22, 3.35it/s] 92%|█████████▏| 340680/371472 [4:56:24<2:40:03, 3.21it/s] {'loss': 2.643, 'learning_rate': 1.746428494473055e-07, 'epoch': 14.67} + 92%|█████████▏| 340680/371472 [4:56:24<2:40:03, 3.21it/s] 92%|█████████▏| 340681/371472 [4:56:25<2:31:23, 3.39it/s] 92%|█████████▏| 340682/371472 [4:56:25<2:25:56, 3.52it/s] 92%|█████████▏| 340683/371472 [4:56:25<2:18:49, 3.70it/s] 92%|█████████▏| 340684/371472 [4:56:25<2:23:08, 3.58it/s] 92%|█████████▏| 340685/371472 [4:56:26<2:32:10, 3.37it/s] 92%|█████████▏| 340686/371472 [4:56:26<2:39:27, 3.22it/s] 92%|█████████▏| 340687/371472 [4:56:26<2:31:04, 3.40it/s] 92%|█████████▏| 340688/371472 [4:56:27<2:29:09, 3.44it/s] 92%|█████████▏| 340689/371472 [4:56:27<2:25:53, 3.52it/s] 92%|█████████▏| 340690/371472 [4:56:27<2:25:17, 3.53it/s] 92%|█████████▏| 340691/371472 [4:56:27<2:24:49, 3.54it/s] 92%|█████████▏| 340692/371472 [4:56:28<2:25:59, 3.51it/s] 92%|█████████▏| 340693/371472 [4:56:28<2:28:31, 3.45it/s] 92%|█████████▏| 340694/371472 [4:56:28<2:31:12, 3.39it/s] 92%|█████████▏| 340695/371472 [4:56:29<2:24:06, 3.56it/s] 92%|█████████▏| 340696/371472 [4:56:29<2:22:36, 3.60it/s] 92%|█████████▏| 340697/371472 [4:56:29<2:21:12, 3.63it/s] 92%|█████████▏| 340698/371472 [4:56:29<2:25:54, 3.52it/s] 92%|█████████▏| 340699/371472 [4:56:30<2:18:42, 3.70it/s] 92%|█████████▏| 340700/371472 [4:56:30<2:21:05, 3.64it/s] {'loss': 2.5921, 'learning_rate': 1.7459436747182654e-07, 'epoch': 14.67} + 92%|█████████▏| 340700/371472 [4:56:30<2:21:05, 3.64it/s] 92%|█████████▏| 340701/371472 [4:56:30<2:40:01, 3.20it/s] 92%|█████████▏| 340702/371472 [4:56:31<2:36:02, 3.29it/s] 92%|█████████▏| 340703/371472 [4:56:31<2:31:29, 3.39it/s] 92%|█████████▏| 340704/371472 [4:56:31<2:28:20, 3.46it/s] 92%|█████████▏| 340705/371472 [4:56:31<2:26:10, 3.51it/s] 92%|█████████▏| 340706/371472 [4:56:32<2:23:34, 3.57it/s] 92%|█████████▏| 340707/371472 [4:56:32<2:34:11, 3.33it/s] 92%|█████████▏| 340708/371472 [4:56:32<2:28:05, 3.46it/s] 92%|█████████▏| 340709/371472 [4:56:33<2:29:26, 3.43it/s] 92%|█████████▏| 340710/371472 [4:56:33<2:26:46, 3.49it/s] 92%|█████████▏| 340711/371472 [4:56:33<2:31:33, 3.38it/s] 92%|█████████▏| 340712/371472 [4:56:33<2:25:20, 3.53it/s] 92%|█████████▏| 340713/371472 [4:56:34<2:21:39, 3.62it/s] 92%|█████████▏| 340714/371472 [4:56:34<2:21:27, 3.62it/s] 92%|█████████▏| 340715/371472 [4:56:34<2:17:13, 3.74it/s] 92%|█████████▏| 340716/371472 [4:56:35<2:17:53, 3.72it/s] 92%|█████████▏| 340717/371472 [4:56:35<2:20:54, 3.64it/s] 92%|█████████▏| 340718/371472 [4:56:35<2:25:09, 3.53it/s] 92%|█████████▏| 340719/371472 [4:56:35<2:31:08, 3.39it/s] 92%|█████████▏| 340720/371472 [4:56:36<2:25:41, 3.52it/s] {'loss': 2.5132, 'learning_rate': 1.745458854963477e-07, 'epoch': 14.68} + 92%|█████████▏| 340720/371472 [4:56:36<2:25:41, 3.52it/s] 92%|█████████▏| 340721/371472 [4:56:36<2:42:44, 3.15it/s] 92%|█████████▏| 340722/371472 [4:56:36<2:33:29, 3.34it/s] 92%|█████████▏| 340723/371472 [4:56:37<2:31:32, 3.38it/s] 92%|█████████▏| 340724/371472 [4:56:37<2:38:48, 3.23it/s] 92%|█████████▏| 340725/371472 [4:56:37<2:36:51, 3.27it/s] 92%|█████████▏| 340726/371472 [4:56:38<2:33:07, 3.35it/s] 92%|█████████▏| 340727/371472 [4:56:38<2:29:51, 3.42it/s] 92%|█████████▏| 340728/371472 [4:56:38<2:25:13, 3.53it/s] 92%|█████████▏| 340729/371472 [4:56:38<2:22:39, 3.59it/s] 92%|█████████▏| 340730/371472 [4:56:39<2:20:02, 3.66it/s] 92%|█████████▏| 340731/371472 [4:56:39<2:21:04, 3.63it/s] 92%|█████████▏| 340732/371472 [4:56:39<2:29:33, 3.43it/s] 92%|█████████▏| 340733/371472 [4:56:40<2:36:24, 3.28it/s] 92%|█████████▏| 340734/371472 [4:56:40<2:33:39, 3.33it/s] 92%|█████████▏| 340735/371472 [4:56:40<2:30:01, 3.41it/s] 92%|█████████▏| 340736/371472 [4:56:40<2:27:42, 3.47it/s] 92%|█████████▏| 340737/371472 [4:56:41<2:21:48, 3.61it/s] 92%|█████████▏| 340738/371472 [4:56:41<2:18:40, 3.69it/s] 92%|█████████▏| 340739/371472 [4:56:41<2:13:47, 3.83it/s] 92%|█████████▏| 340740/371472 [4:56:41<2:13:33, 3.83it/s] {'loss': 2.6309, 'learning_rate': 1.7449740352086876e-07, 'epoch': 14.68} + 92%|█████████▏| 340740/371472 [4:56:41<2:13:33, 3.83it/s] 92%|█████████▏| 340741/371472 [4:56:42<2:14:44, 3.80it/s] 92%|█████████▏| 340742/371472 [4:56:42<2:16:33, 3.75it/s] 92%|█████████▏| 340743/371472 [4:56:42<2:13:32, 3.84it/s] 92%|█████████▏| 340744/371472 [4:56:43<2:21:26, 3.62it/s] 92%|█████████▏| 340745/371472 [4:56:43<2:24:33, 3.54it/s] 92%|█████████▏| 340746/371472 [4:56:43<2:19:38, 3.67it/s] 92%|█████████▏| 340747/371472 [4:56:43<2:17:41, 3.72it/s] 92%|█████████▏| 340748/371472 [4:56:44<2:15:23, 3.78it/s] 92%|█████████▏| 340749/371472 [4:56:44<2:19:48, 3.66it/s] 92%|█████████▏| 340750/371472 [4:56:44<2:19:38, 3.67it/s] 92%|█████████▏| 340751/371472 [4:56:44<2:26:20, 3.50it/s] 92%|█████████▏| 340752/371472 [4:56:45<2:18:59, 3.68it/s] 92%|█████████▏| 340753/371472 [4:56:45<2:18:15, 3.70it/s] 92%|█████████▏| 340754/371472 [4:56:45<2:18:15, 3.70it/s] 92%|█████████▏| 340755/371472 [4:56:46<2:17:58, 3.71it/s] 92%|█████████▏| 340756/371472 [4:56:46<2:23:59, 3.56it/s] 92%|█████████▏| 340757/371472 [4:56:46<2:31:43, 3.37it/s] 92%|█████████▏| 340758/371472 [4:56:47<2:39:45, 3.20it/s] 92%|█████████▏| 340759/371472 [4:56:47<2:35:25, 3.29it/s] 92%|█████████▏| 340760/371472 [4:56:47<2:26:43, 3.49it/s] {'loss': 2.6264, 'learning_rate': 1.744489215453899e-07, 'epoch': 14.68} + 92%|█████████▏| 340760/371472 [4:56:47<2:26:43, 3.49it/s] 92%|█████████▏| 340761/371472 [4:56:47<2:30:49, 3.39it/s] 92%|█████████▏| 340762/371472 [4:56:48<2:26:19, 3.50it/s] 92%|█████████▏| 340763/371472 [4:56:48<2:42:34, 3.15it/s] 92%|█████████▏| 340764/371472 [4:56:48<2:36:31, 3.27it/s] 92%|█████████▏| 340765/371472 [4:56:49<2:30:37, 3.40it/s] 92%|█████████▏| 340766/371472 [4:56:49<2:24:43, 3.54it/s] 92%|█████████▏| 340767/371472 [4:56:49<2:30:14, 3.41it/s] 92%|█████████▏| 340768/371472 [4:56:49<2:30:10, 3.41it/s] 92%|█████████▏| 340769/371472 [4:56:50<2:27:52, 3.46it/s] 92%|█████████▏| 340770/371472 [4:56:50<2:25:51, 3.51it/s] 92%|█████████▏| 340771/371472 [4:56:50<2:23:01, 3.58it/s] 92%|█████████▏| 340772/371472 [4:56:51<2:23:47, 3.56it/s] 92%|█████████▏| 340773/371472 [4:56:51<2:25:55, 3.51it/s] 92%|█████████▏| 340774/371472 [4:56:51<2:25:25, 3.52it/s] 92%|█████████▏| 340775/371472 [4:56:51<2:25:36, 3.51it/s] 92%|█████████▏| 340776/371472 [4:56:52<2:23:51, 3.56it/s] 92%|█████████▏| 340777/371472 [4:56:52<2:20:42, 3.64it/s] 92%|█████████▏| 340778/371472 [4:56:52<2:22:18, 3.59it/s] 92%|█████████▏| 340779/371472 [4:56:52<2:20:42, 3.64it/s] 92%|█████████▏| 340780/371472 [4:56:53<2:20:53, 3.63it/s] {'loss': 2.4774, 'learning_rate': 1.7440043956991096e-07, 'epoch': 14.68} + 92%|█████████▏| 340780/371472 [4:56:53<2:20:53, 3.63it/s] 92%|█████████▏| 340781/371472 [4:56:53<2:23:36, 3.56it/s] 92%|█████████▏| 340782/371472 [4:56:53<2:21:27, 3.62it/s] 92%|█████████▏| 340783/371472 [4:56:54<2:18:37, 3.69it/s] 92%|█████████▏| 340784/371472 [4:56:54<2:17:00, 3.73it/s] 92%|█████████▏| 340785/371472 [4:56:54<2:18:39, 3.69it/s] 92%|█████████▏| 340786/371472 [4:56:54<2:24:33, 3.54it/s] 92%|█████████▏| 340787/371472 [4:56:55<2:19:33, 3.66it/s] 92%|█████████▏| 340788/371472 [4:56:55<2:18:46, 3.69it/s] 92%|█████████▏| 340789/371472 [4:56:55<2:37:19, 3.25it/s] 92%|█████████▏| 340790/371472 [4:56:56<2:28:40, 3.44it/s] 92%|█████████▏| 340791/371472 [4:56:56<2:34:15, 3.31it/s] 92%|█████████▏| 340792/371472 [4:56:56<2:32:33, 3.35it/s] 92%|█████████▏| 340793/371472 [4:56:57<2:38:44, 3.22it/s] 92%|█████████▏| 340794/371472 [4:56:57<2:38:29, 3.23it/s] 92%|█████████▏| 340795/371472 [4:56:57<2:35:20, 3.29it/s] 92%|█████████▏| 340796/371472 [4:56:57<2:37:53, 3.24it/s] 92%|█████████▏| 340797/371472 [4:56:58<2:32:03, 3.36it/s] 92%|█████████▏| 340798/371472 [4:56:58<2:31:50, 3.37it/s] 92%|█████████▏| 340799/371472 [4:56:58<2:24:31, 3.54it/s] 92%|█████████▏| 340800/371472 [4:56:59<2:28:50, 3.43it/s] {'loss': 2.5527, 'learning_rate': 1.7435195759443213e-07, 'epoch': 14.68} + 92%|█████████▏| 340800/371472 [4:56:59<2:28:50, 3.43it/s] 92%|█████████▏| 340801/371472 [4:56:59<2:32:34, 3.35it/s] 92%|█████████▏| 340802/371472 [4:56:59<2:27:18, 3.47it/s] 92%|█████████▏| 340803/371472 [4:56:59<2:26:25, 3.49it/s] 92%|█████████▏| 340804/371472 [4:57:00<2:23:15, 3.57it/s] 92%|█████████▏| 340805/371472 [4:57:00<2:18:17, 3.70it/s] 92%|█████████▏| 340806/371472 [4:57:00<2:23:23, 3.56it/s] 92%|█████████▏| 340807/371472 [4:57:01<2:20:48, 3.63it/s] 92%|█████████▏| 340808/371472 [4:57:01<2:24:33, 3.54it/s] 92%|█████████▏| 340809/371472 [4:57:01<2:20:10, 3.65it/s] 92%|█████████▏| 340810/371472 [4:57:01<2:22:12, 3.59it/s] 92%|█████████▏| 340811/371472 [4:57:02<2:23:25, 3.56it/s] 92%|█████████▏| 340812/371472 [4:57:02<2:19:45, 3.66it/s] 92%|█████████▏| 340813/371472 [4:57:02<2:19:10, 3.67it/s] 92%|█████████▏| 340814/371472 [4:57:02<2:23:37, 3.56it/s] 92%|█████████▏| 340815/371472 [4:57:03<2:26:37, 3.48it/s] 92%|█████████▏| 340816/371472 [4:57:03<2:34:10, 3.31it/s] 92%|█████████▏| 340817/371472 [4:57:03<2:28:32, 3.44it/s] 92%|█████████▏| 340818/371472 [4:57:04<2:25:59, 3.50it/s] 92%|█████████▏| 340819/371472 [4:57:04<2:23:56, 3.55it/s] 92%|█████████▏| 340820/371472 [4:57:04<2:19:28, 3.66it/s] {'loss': 2.5391, 'learning_rate': 1.7430347561895318e-07, 'epoch': 14.68} + 92%|█████████▏| 340820/371472 [4:57:04<2:19:28, 3.66it/s] 92%|█████████▏| 340821/371472 [4:57:04<2:19:13, 3.67it/s] 92%|█████████▏| 340822/371472 [4:57:05<2:16:03, 3.75it/s] 92%|█████████▏| 340823/371472 [4:57:05<2:25:19, 3.52it/s] 92%|█████████▏| 340824/371472 [4:57:05<2:27:31, 3.46it/s] 92%|█████████▏| 340825/371472 [4:57:06<2:23:27, 3.56it/s] 92%|█████████▏| 340826/371472 [4:57:06<2:22:07, 3.59it/s] 92%|█████████▏| 340827/371472 [4:57:06<2:31:10, 3.38it/s] 92%|█████████▏| 340828/371472 [4:57:06<2:29:08, 3.42it/s] 92%|█████████▏| 340829/371472 [4:57:07<2:28:16, 3.44it/s] 92%|█████████▏| 340830/371472 [4:57:07<2:25:42, 3.50it/s] 92%|█████████▏| 340831/371472 [4:57:07<2:21:48, 3.60it/s] 92%|█████████▏| 340832/371472 [4:57:08<2:26:55, 3.48it/s] 92%|█████████▏| 340833/371472 [4:57:08<2:31:09, 3.38it/s] 92%|█████████▏| 340834/371472 [4:57:08<2:26:13, 3.49it/s] 92%|█████████▏| 340835/371472 [4:57:08<2:21:01, 3.62it/s] 92%|█████████▏| 340836/371472 [4:57:09<2:27:27, 3.46it/s] 92%|█████████▏| 340837/371472 [4:57:09<2:26:07, 3.49it/s] 92%|█████████▏| 340838/371472 [4:57:09<2:23:00, 3.57it/s] 92%|█████████▏| 340839/371472 [4:57:10<2:20:09, 3.64it/s] 92%|█████████▏| 340840/371472 [4:57:10<2:22:10, 3.59it/s] {'loss': 2.5193, 'learning_rate': 1.7425499364347433e-07, 'epoch': 14.68} + 92%|█████████▏| 340840/371472 [4:57:10<2:22:10, 3.59it/s] 92%|█████████▏| 340841/371472 [4:57:10<2:19:28, 3.66it/s] 92%|█████████▏| 340842/371472 [4:57:10<2:14:55, 3.78it/s] 92%|█████████▏| 340843/371472 [4:57:11<2:20:35, 3.63it/s] 92%|█████████▏| 340844/371472 [4:57:11<2:18:09, 3.69it/s] 92%|█████████▏| 340845/371472 [4:57:11<2:14:42, 3.79it/s] 92%|█████████▏| 340846/371472 [4:57:11<2:13:59, 3.81it/s] 92%|█████████▏| 340847/371472 [4:57:12<2:12:22, 3.86it/s] 92%|█████████▏| 340848/371472 [4:57:12<2:24:03, 3.54it/s] 92%|█████████▏| 340849/371472 [4:57:12<2:22:29, 3.58it/s] 92%|█████████▏| 340850/371472 [4:57:13<2:19:36, 3.66it/s] 92%|█████████▏| 340851/371472 [4:57:13<2:22:50, 3.57it/s] 92%|█████████▏| 340852/371472 [4:57:13<2:17:45, 3.70it/s] 92%|█████████▏| 340853/371472 [4:57:13<2:33:21, 3.33it/s] 92%|█████████▏| 340854/371472 [4:57:14<2:37:43, 3.24it/s] 92%|█████████▏| 340855/371472 [4:57:14<2:45:53, 3.08it/s] 92%|█████████▏| 340856/371472 [4:57:14<2:45:43, 3.08it/s] 92%|█████████▏| 340857/371472 [4:57:15<2:37:06, 3.25it/s] 92%|█████████▏| 340858/371472 [4:57:15<2:41:43, 3.16it/s] 92%|█████████▏| 340859/371472 [4:57:15<2:43:26, 3.12it/s] 92%|█████████▏| 340860/371472 [4:57:16<2:37:50, 3.23it/s] {'loss': 2.6409, 'learning_rate': 1.742065116679954e-07, 'epoch': 14.68} + 92%|█████████▏| 340860/371472 [4:57:16<2:37:50, 3.23it/s] 92%|█████████▏| 340861/371472 [4:57:16<2:31:20, 3.37it/s] 92%|█████████▏| 340862/371472 [4:57:16<2:33:16, 3.33it/s] 92%|█████████▏| 340863/371472 [4:57:17<2:25:54, 3.50it/s] 92%|█████████▏| 340864/371472 [4:57:17<2:28:22, 3.44it/s] 92%|█████████▏| 340865/371472 [4:57:17<2:23:28, 3.56it/s] 92%|█████████▏| 340866/371472 [4:57:17<2:21:18, 3.61it/s] 92%|█████████▏| 340867/371472 [4:57:18<2:25:08, 3.51it/s] 92%|█████████▏| 340868/371472 [4:57:18<2:21:02, 3.62it/s] 92%|█████████▏| 340869/371472 [4:57:18<2:19:47, 3.65it/s] 92%|█████████▏| 340870/371472 [4:57:18<2:22:31, 3.58it/s] 92%|█████████▏| 340871/371472 [4:57:19<2:20:05, 3.64it/s] 92%|█████████▏| 340872/371472 [4:57:19<2:19:09, 3.66it/s] 92%|█████████▏| 340873/371472 [4:57:19<2:47:02, 3.05it/s] 92%|█████████▏| 340874/371472 [4:57:20<2:38:04, 3.23it/s] 92%|█████████▏| 340875/371472 [4:57:20<2:29:55, 3.40it/s] 92%|█████████▏| 340876/371472 [4:57:20<2:28:02, 3.44it/s] 92%|█████████▏| 340877/371472 [4:57:21<2:29:16, 3.42it/s] 92%|█████████▏| 340878/371472 [4:57:21<2:30:33, 3.39it/s] 92%|█████████▏| 340879/371472 [4:57:21<2:38:21, 3.22it/s] 92%|█████████▏| 340880/371472 [4:57:22<2:33:17, 3.33it/s] {'loss': 2.5824, 'learning_rate': 1.7415802969251655e-07, 'epoch': 14.68} + 92%|█████████▏| 340880/371472 [4:57:22<2:33:17, 3.33it/s] 92%|█████████▏| 340881/371472 [4:57:22<2:30:11, 3.39it/s] 92%|█████████▏| 340882/371472 [4:57:22<2:41:46, 3.15it/s] 92%|█████████▏| 340883/371472 [4:57:22<2:38:11, 3.22it/s] 92%|█████████▏| 340884/371472 [4:57:23<2:30:47, 3.38it/s] 92%|█████████▏| 340885/371472 [4:57:23<2:26:43, 3.47it/s] 92%|█████████▏| 340886/371472 [4:57:23<2:33:43, 3.32it/s] 92%|█████████▏| 340887/371472 [4:57:24<2:27:30, 3.46it/s] 92%|█████████▏| 340888/371472 [4:57:24<2:23:37, 3.55it/s] 92%|█████████▏| 340889/371472 [4:57:24<2:18:11, 3.69it/s] 92%|█████████▏| 340890/371472 [4:57:24<2:33:05, 3.33it/s] 92%|█████████▏| 340891/371472 [4:57:25<2:28:50, 3.42it/s] 92%|█████████▏| 340892/371472 [4:57:25<2:24:30, 3.53it/s] 92%|█████████▏| 340893/371472 [4:57:25<2:21:46, 3.59it/s] 92%|█████████▏| 340894/371472 [4:57:26<2:19:52, 3.64it/s] 92%|█████████▏| 340895/371472 [4:57:26<2:21:53, 3.59it/s] 92%|█████████▏| 340896/371472 [4:57:26<2:33:08, 3.33it/s] 92%|█████████▏| 340897/371472 [4:57:26<2:31:47, 3.36it/s] 92%|█████████▏| 340898/371472 [4:57:27<2:32:45, 3.34it/s] 92%|█████████▏| 340899/371472 [4:57:27<2:28:19, 3.44it/s] 92%|█████████▏| 340900/371472 [4:57:27<2:22:46, 3.57it/s] {'loss': 2.643, 'learning_rate': 1.741095477170376e-07, 'epoch': 14.68} + 92%|█████████▏| 340900/371472 [4:57:27<2:22:46, 3.57it/s] 92%|█████████▏| 340901/371472 [4:57:28<2:18:29, 3.68it/s] 92%|█████████▏| 340902/371472 [4:57:28<2:35:14, 3.28it/s] 92%|█████████▏| 340903/371472 [4:57:28<2:30:43, 3.38it/s] 92%|█████████▏| 340904/371472 [4:57:29<2:37:06, 3.24it/s] 92%|█████████▏| 340905/371472 [4:57:29<2:29:23, 3.41it/s] 92%|█████████▏| 340906/371472 [4:57:29<2:31:33, 3.36it/s] 92%|█████████▏| 340907/371472 [4:57:29<2:26:16, 3.48it/s] 92%|█████████▏| 340908/371472 [4:57:30<2:28:46, 3.42it/s] 92%|█████████▏| 340909/371472 [4:57:30<2:22:14, 3.58it/s] 92%|█████████▏| 340910/371472 [4:57:30<2:18:57, 3.67it/s] 92%|█████████▏| 340911/371472 [4:57:30<2:18:39, 3.67it/s] 92%|█████████▏| 340912/371472 [4:57:31<2:16:02, 3.74it/s] 92%|█████████▏| 340913/371472 [4:57:31<2:22:45, 3.57it/s] 92%|█████████▏| 340914/371472 [4:57:31<2:27:04, 3.46it/s] 92%|█████████▏| 340915/371472 [4:57:32<2:33:26, 3.32it/s] 92%|█████████▏| 340916/371472 [4:57:32<2:33:33, 3.32it/s] 92%|█████████▏| 340917/371472 [4:57:32<2:30:15, 3.39it/s] 92%|█████████▏| 340918/371472 [4:57:33<2:33:34, 3.32it/s] 92%|█████████▏| 340919/371472 [4:57:33<2:42:00, 3.14it/s] 92%|█████████▏| 340920/371472 [4:57:33<2:36:24, 3.26it/s] {'loss': 2.6, 'learning_rate': 1.7406106574155877e-07, 'epoch': 14.68} + 92%|█████████▏| 340920/371472 [4:57:33<2:36:24, 3.26it/s] 92%|█████████▏| 340921/371472 [4:57:33<2:32:09, 3.35it/s] 92%|█████████▏| 340922/371472 [4:57:34<2:26:06, 3.48it/s] 92%|█████████▏| 340923/371472 [4:57:34<2:21:12, 3.61it/s] 92%|█████████▏| 340924/371472 [4:57:34<2:22:05, 3.58it/s] 92%|█████████▏| 340925/371472 [4:57:35<2:29:50, 3.40it/s] 92%|█████████▏| 340926/371472 [4:57:35<2:28:17, 3.43it/s] 92%|█████████▏| 340927/371472 [4:57:35<2:31:04, 3.37it/s] 92%|█████████▏| 340928/371472 [4:57:35<2:27:02, 3.46it/s] 92%|█████████▏| 340929/371472 [4:57:36<2:27:25, 3.45it/s] 92%|█████████▏| 340930/371472 [4:57:36<2:36:22, 3.26it/s] 92%|█████████▏| 340931/371472 [4:57:36<2:35:19, 3.28it/s] 92%|█████████▏| 340932/371472 [4:57:37<2:32:43, 3.33it/s] 92%|█████████▏| 340933/371472 [4:57:37<2:31:32, 3.36it/s] 92%|█████████▏| 340934/371472 [4:57:37<2:36:44, 3.25it/s] 92%|█████████▏| 340935/371472 [4:57:38<2:36:47, 3.25it/s] 92%|█████████▏| 340936/371472 [4:57:38<2:26:14, 3.48it/s] 92%|█████████▏| 340937/371472 [4:57:38<2:21:25, 3.60it/s] 92%|█████████▏| 340938/371472 [4:57:38<2:27:46, 3.44it/s] 92%|█████████▏| 340939/371472 [4:57:39<2:22:54, 3.56it/s] 92%|█████████▏| 340940/371472 [4:57:39<2:25:35, 3.50it/s] {'loss': 2.5229, 'learning_rate': 1.7401258376607982e-07, 'epoch': 14.68} + 92%|█████████▏| 340940/371472 [4:57:39<2:25:35, 3.50it/s] 92%|█████████▏| 340941/371472 [4:57:39<2:25:47, 3.49it/s] 92%|█████████▏| 340942/371472 [4:57:40<2:31:40, 3.35it/s] 92%|█████████▏| 340943/371472 [4:57:40<2:28:48, 3.42it/s] 92%|█████████▏| 340944/371472 [4:57:40<2:34:41, 3.29it/s] 92%|█████████▏| 340945/371472 [4:57:40<2:32:33, 3.34it/s] 92%|█████████▏| 340946/371472 [4:57:41<2:28:11, 3.43it/s] 92%|█████████▏| 340947/371472 [4:57:41<2:28:11, 3.43it/s] 92%|█████████▏| 340948/371472 [4:57:41<2:27:43, 3.44it/s] 92%|█████████▏| 340949/371472 [4:57:42<2:27:22, 3.45it/s] 92%|█████████▏| 340950/371472 [4:57:42<2:24:48, 3.51it/s] 92%|█████████▏| 340951/371472 [4:57:42<2:23:46, 3.54it/s] 92%|█████████▏| 340952/371472 [4:57:42<2:21:59, 3.58it/s] 92%|█████████▏| 340953/371472 [4:57:43<2:17:38, 3.70it/s] 92%|█████████▏| 340954/371472 [4:57:43<2:38:09, 3.22it/s] 92%|█████████▏| 340955/371472 [4:57:43<2:34:39, 3.29it/s] 92%|█████████▏| 340956/371472 [4:57:44<2:32:27, 3.34it/s] 92%|█████████▏| 340957/371472 [4:57:44<2:28:26, 3.43it/s] 92%|█████████▏| 340958/371472 [4:57:44<2:24:07, 3.53it/s] 92%|█████████▏| 340959/371472 [4:57:45<2:40:59, 3.16it/s] 92%|█████████▏| 340960/371472 [4:57:45<2:39:01, 3.20it/s] {'loss': 2.4644, 'learning_rate': 1.7396410179060097e-07, 'epoch': 14.69} + 92%|█████████▏| 340960/371472 [4:57:45<2:39:01, 3.20it/s] 92%|█████████▏| 340961/371472 [4:57:45<2:30:03, 3.39it/s] 92%|█████████▏| 340962/371472 [4:57:45<2:28:39, 3.42it/s] 92%|█████████▏| 340963/371472 [4:57:46<2:28:47, 3.42it/s] 92%|█████████▏| 340964/371472 [4:57:46<2:22:24, 3.57it/s] 92%|█████████▏| 340965/371472 [4:57:46<2:24:31, 3.52it/s] 92%|█████████▏| 340966/371472 [4:57:47<2:22:02, 3.58it/s] 92%|█████████▏| 340967/371472 [4:57:47<2:20:09, 3.63it/s] 92%|█████████▏| 340968/371472 [4:57:47<2:18:58, 3.66it/s] 92%|█████████▏| 340969/371472 [4:57:47<2:19:54, 3.63it/s] 92%|█████████▏| 340970/371472 [4:57:48<2:17:48, 3.69it/s] 92%|█████████▏| 340971/371472 [4:57:48<2:22:38, 3.56it/s] 92%|█████████▏| 340972/371472 [4:57:48<2:17:47, 3.69it/s] 92%|█████████▏| 340973/371472 [4:57:48<2:18:54, 3.66it/s] 92%|█████████▏| 340974/371472 [4:57:49<2:23:13, 3.55it/s] 92%|█████████▏| 340975/371472 [4:57:49<2:19:29, 3.64it/s] 92%|█████████▏| 340976/371472 [4:57:49<2:31:56, 3.35it/s] 92%|█████████▏| 340977/371472 [4:57:50<2:30:02, 3.39it/s] 92%|█████████▏| 340978/371472 [4:57:50<2:25:43, 3.49it/s] 92%|█████████▏| 340979/371472 [4:57:50<2:32:09, 3.34it/s] 92%|█████████▏| 340980/371472 [4:57:51<2:30:33, 3.38it/s] {'loss': 2.6092, 'learning_rate': 1.7391561981512204e-07, 'epoch': 14.69} + 92%|█████████▏| 340980/371472 [4:57:51<2:30:33, 3.38it/s] 92%|█████████▏| 340981/371472 [4:57:51<2:21:08, 3.60it/s] 92%|█████████▏| 340982/371472 [4:57:51<2:18:30, 3.67it/s] 92%|█████████▏| 340983/371472 [4:57:51<2:20:06, 3.63it/s] 92%|█████████▏| 340984/371472 [4:57:52<2:23:38, 3.54it/s] 92%|█████████▏| 340985/371472 [4:57:52<2:22:42, 3.56it/s] 92%|█████████▏| 340986/371472 [4:57:52<2:49:25, 3.00it/s] 92%|█████████▏| 340987/371472 [4:57:53<2:38:57, 3.20it/s] 92%|█████████▏| 340988/371472 [4:57:53<2:39:28, 3.19it/s] 92%|█████████▏| 340989/371472 [4:57:53<2:41:20, 3.15it/s] 92%|█████████▏| 340990/371472 [4:57:54<2:32:18, 3.34it/s] 92%|█████████▏| 340991/371472 [4:57:54<2:31:47, 3.35it/s] 92%|█████████▏| 340992/371472 [4:57:54<2:28:55, 3.41it/s] 92%|█████████▏| 340993/371472 [4:57:54<2:26:14, 3.47it/s] 92%|█████████▏| 340994/371472 [4:57:55<2:34:18, 3.29it/s] 92%|█████████▏| 340995/371472 [4:57:55<2:31:08, 3.36it/s] 92%|█████████▏| 340996/371472 [4:57:55<2:30:24, 3.38it/s] 92%|█████████▏| 340997/371472 [4:57:56<2:31:00, 3.36it/s] 92%|█████████▏| 340998/371472 [4:57:56<2:35:13, 3.27it/s] 92%|█████████▏| 340999/371472 [4:57:56<2:29:17, 3.40it/s] 92%|█████████▏| 341000/371472 [4:57:57<2:41:42, 3.14it/s] {'loss': 2.5717, 'learning_rate': 1.738671378396432e-07, 'epoch': 14.69} + 92%|█████████▏| 341000/371472 [4:57:57<2:41:42, 3.14it/s] 92%|█████████▏| 341001/371472 [4:57:57<2:32:56, 3.32it/s] 92%|█████████▏| 341002/371472 [4:57:57<2:25:21, 3.49it/s] 92%|█████████▏| 341003/371472 [4:57:57<2:25:46, 3.48it/s] 92%|█████████▏| 341004/371472 [4:57:58<2:26:45, 3.46it/s] 92%|█████████▏| 341005/371472 [4:57:58<2:22:54, 3.55it/s] 92%|█████████▏| 341006/371472 [4:57:58<2:36:13, 3.25it/s] 92%|█████████▏| 341007/371472 [4:57:59<2:28:14, 3.43it/s] 92%|█████████▏| 341008/371472 [4:57:59<2:23:07, 3.55it/s] 92%|█████████▏| 341009/371472 [4:57:59<2:25:04, 3.50it/s] 92%|█████████▏| 341010/371472 [4:57:59<2:36:14, 3.25it/s] 92%|█████████▏| 341011/371472 [4:58:00<2:31:41, 3.35it/s] 92%|█████████▏| 341012/371472 [4:58:00<2:23:06, 3.55it/s] 92%|█████████▏| 341013/371472 [4:58:00<2:21:00, 3.60it/s] 92%|█████████▏| 341014/371472 [4:58:01<2:22:41, 3.56it/s] 92%|█████████▏| 341015/371472 [4:58:01<2:28:40, 3.41it/s] 92%|█████████▏| 341016/371472 [4:58:01<2:24:47, 3.51it/s] 92%|█████████▏| 341017/371472 [4:58:01<2:24:21, 3.52it/s] 92%|█████████▏| 341018/371472 [4:58:02<2:22:08, 3.57it/s] 92%|█████████▏| 341019/371472 [4:58:02<2:18:11, 3.67it/s] 92%|█████████▏| 341020/371472 [4:58:02<2:16:02, 3.73it/s] {'loss': 2.493, 'learning_rate': 1.7381865586416424e-07, 'epoch': 14.69} + 92%|█████████▏| 341020/371472 [4:58:02<2:16:02, 3.73it/s] 92%|█████████▏| 341021/371472 [4:58:03<2:21:10, 3.59it/s] 92%|█████████▏| 341022/371472 [4:58:03<2:37:07, 3.23it/s] 92%|█████████▏| 341023/371472 [4:58:03<2:30:39, 3.37it/s] 92%|█████████▏| 341024/371472 [4:58:03<2:26:45, 3.46it/s] 92%|█████████▏| 341025/371472 [4:58:04<2:22:19, 3.57it/s] 92%|█████████▏| 341026/371472 [4:58:04<2:19:07, 3.65it/s] 92%|█████████▏| 341027/371472 [4:58:04<2:34:05, 3.29it/s] 92%|█████████▏| 341028/371472 [4:58:05<2:31:05, 3.36it/s] 92%|█████████▏| 341029/371472 [4:58:05<2:38:05, 3.21it/s] 92%|█████████▏| 341030/371472 [4:58:05<2:31:15, 3.35it/s] 92%|█████████▏| 341031/371472 [4:58:06<2:30:55, 3.36it/s] 92%|█████████▏| 341032/371472 [4:58:06<2:23:25, 3.54it/s] 92%|█████████▏| 341033/371472 [4:58:06<2:25:33, 3.49it/s] 92%|█████████▏| 341034/371472 [4:58:06<2:38:04, 3.21it/s] 92%|█████████▏| 341035/371472 [4:58:07<2:30:38, 3.37it/s] 92%|█████████▏| 341036/371472 [4:58:07<2:30:36, 3.37it/s] 92%|█████████▏| 341037/371472 [4:58:07<2:34:04, 3.29it/s] 92%|█████████▏| 341038/371472 [4:58:08<2:40:43, 3.16it/s] 92%|█████████▏| 341039/371472 [4:58:08<2:29:26, 3.39it/s] 92%|█████████▏| 341040/371472 [4:58:08<2:24:58, 3.50it/s] {'loss': 2.4997, 'learning_rate': 1.7377017388868541e-07, 'epoch': 14.69} + 92%|█████████▏| 341040/371472 [4:58:08<2:24:58, 3.50it/s] 92%|█████████▏| 341041/371472 [4:58:08<2:18:44, 3.66it/s] 92%|█████████▏| 341042/371472 [4:58:09<2:29:14, 3.40it/s] 92%|█████████▏| 341043/371472 [4:58:09<2:35:13, 3.27it/s] 92%|█████████▏| 341044/371472 [4:58:09<2:28:14, 3.42it/s] 92%|█████████▏| 341045/371472 [4:58:10<2:20:33, 3.61it/s] 92%|█████████▏| 341046/371472 [4:58:10<2:27:38, 3.43it/s] 92%|█████████▏| 341047/371472 [4:58:10<2:28:04, 3.42it/s] 92%|█████████▏| 341048/371472 [4:58:10<2:27:15, 3.44it/s] 92%|█████████▏| 341049/371472 [4:58:11<2:22:05, 3.57it/s] 92%|█████████▏| 341050/371472 [4:58:11<2:22:08, 3.57it/s] 92%|█████████▏| 341051/371472 [4:58:11<2:25:06, 3.49it/s] 92%|█████████▏| 341052/371472 [4:58:12<2:24:24, 3.51it/s] 92%|█████████▏| 341053/371472 [4:58:12<2:25:58, 3.47it/s] 92%|█████████▏| 341054/371472 [4:58:12<2:21:15, 3.59it/s] 92%|█████████▏| 341055/371472 [4:58:12<2:17:45, 3.68it/s] 92%|█████████▏| 341056/371472 [4:58:13<2:14:51, 3.76it/s] 92%|█████████▏| 341057/371472 [4:58:13<2:17:17, 3.69it/s] 92%|█████████▏| 341058/371472 [4:58:13<2:16:30, 3.71it/s] 92%|█████████▏| 341059/371472 [4:58:14<2:23:46, 3.53it/s] 92%|█████████▏| 341060/371472 [4:58:14<2:30:28, 3.37it/s] {'loss': 2.4931, 'learning_rate': 1.7372169191320649e-07, 'epoch': 14.69} + 92%|█████████▏| 341060/371472 [4:58:14<2:30:28, 3.37it/s] 92%|█████████▏| 341061/371472 [4:58:14<2:23:44, 3.53it/s] 92%|█████████▏| 341062/371472 [4:58:14<2:16:14, 3.72it/s] 92%|█████████▏| 341063/371472 [4:58:15<2:30:31, 3.37it/s] 92%|█████████▏| 341064/371472 [4:58:15<2:26:13, 3.47it/s] 92%|█████████▏| 341065/371472 [4:58:15<2:30:50, 3.36it/s] 92%|█████████▏| 341066/371472 [4:58:16<2:26:33, 3.46it/s] 92%|█████████▏| 341067/371472 [4:58:16<2:23:17, 3.54it/s] 92%|█████████▏| 341068/371472 [4:58:16<2:22:47, 3.55it/s] 92%|█████████▏| 341069/371472 [4:58:16<2:26:31, 3.46it/s] 92%|█████████▏| 341070/371472 [4:58:17<2:22:22, 3.56it/s] 92%|█████████▏| 341071/371472 [4:58:17<2:26:31, 3.46it/s] 92%|█████████▏| 341072/371472 [4:58:17<2:31:03, 3.35it/s] 92%|█████████▏| 341073/371472 [4:58:18<2:25:07, 3.49it/s] 92%|█████████▏| 341074/371472 [4:58:18<2:18:46, 3.65it/s] 92%|█████████▏| 341075/371472 [4:58:18<2:19:36, 3.63it/s] 92%|█████████▏| 341076/371472 [4:58:18<2:27:52, 3.43it/s] 92%|█████████▏| 341077/371472 [4:58:19<2:37:00, 3.23it/s] 92%|█████████▏| 341078/371472 [4:58:19<2:29:50, 3.38it/s] 92%|█████████▏| 341079/371472 [4:58:19<2:29:51, 3.38it/s] 92%|█████████▏| 341080/371472 [4:58:20<2:28:45, 3.40it/s] {'loss': 2.5552, 'learning_rate': 1.736732099377276e-07, 'epoch': 14.69} + 92%|█████████▏| 341080/371472 [4:58:20<2:28:45, 3.40it/s] 92%|█████████▏| 341081/371472 [4:58:20<2:23:16, 3.54it/s] 92%|█████████▏| 341082/371472 [4:58:20<2:20:45, 3.60it/s] 92%|█████████▏| 341083/371472 [4:58:20<2:16:51, 3.70it/s] 92%|█████████▏| 341084/371472 [4:58:21<2:18:03, 3.67it/s] 92%|█████████▏| 341085/371472 [4:58:21<2:21:49, 3.57it/s] 92%|█████████▏| 341086/371472 [4:58:21<2:21:52, 3.57it/s] 92%|█████████▏| 341087/371472 [4:58:21<2:17:23, 3.69it/s] 92%|█████████▏| 341088/371472 [4:58:22<2:16:09, 3.72it/s] 92%|█████████▏| 341089/371472 [4:58:22<2:24:36, 3.50it/s] 92%|█████████▏| 341090/371472 [4:58:22<2:25:29, 3.48it/s] 92%|█████████▏| 341091/371472 [4:58:23<2:26:38, 3.45it/s] 92%|█████████▏| 341092/371472 [4:58:23<2:37:31, 3.21it/s] 92%|█████████▏| 341093/371472 [4:58:23<2:27:20, 3.44it/s] 92%|█████████▏| 341094/371472 [4:58:24<2:25:43, 3.47it/s] 92%|█████████▏| 341095/371472 [4:58:24<2:23:13, 3.53it/s] 92%|█████████▏| 341096/371472 [4:58:24<2:21:08, 3.59it/s] 92%|█████████▏| 341097/371472 [4:58:24<2:17:25, 3.68it/s] 92%|█████████▏| 341098/371472 [4:58:25<2:15:31, 3.74it/s] 92%|█████████▏| 341099/371472 [4:58:25<2:17:44, 3.68it/s] 92%|█████████▏| 341100/371472 [4:58:25<2:18:45, 3.65it/s] {'loss': 2.6086, 'learning_rate': 1.7362472796224866e-07, 'epoch': 14.69} + 92%|█████████▏| 341100/371472 [4:58:25<2:18:45, 3.65it/s] 92%|█████████▏| 341101/371472 [4:58:25<2:17:49, 3.67it/s] 92%|█████████▏| 341102/371472 [4:58:26<2:16:18, 3.71it/s] 92%|█████████▏| 341103/371472 [4:58:26<2:21:47, 3.57it/s] 92%|█████████▏| 341104/371472 [4:58:26<2:24:31, 3.50it/s] 92%|█████████▏| 341105/371472 [4:58:27<2:27:10, 3.44it/s] 92%|█████████▏| 341106/371472 [4:58:27<2:23:24, 3.53it/s] 92%|█████████▏| 341107/371472 [4:58:27<2:24:19, 3.51it/s] 92%|█████████▏| 341108/371472 [4:58:27<2:20:06, 3.61it/s] 92%|█████████▏| 341109/371472 [4:58:28<2:18:17, 3.66it/s] 92%|█████████▏| 341110/371472 [4:58:28<2:24:52, 3.49it/s] 92%|█████████▏| 341111/371472 [4:58:28<2:32:41, 3.31it/s] 92%|█████████▏| 341112/371472 [4:58:29<2:29:04, 3.39it/s] 92%|█████████▏| 341113/371472 [4:58:29<2:26:01, 3.47it/s] 92%|█████████▏| 341114/371472 [4:58:29<2:17:08, 3.69it/s] 92%|█████████▏| 341115/371472 [4:58:30<2:39:45, 3.17it/s] 92%|█████████▏| 341116/371472 [4:58:30<2:32:01, 3.33it/s] 92%|█████████▏| 341117/371472 [4:58:30<2:28:19, 3.41it/s] 92%|█████████▏| 341118/371472 [4:58:30<2:19:59, 3.61it/s] 92%|█████████▏| 341119/371472 [4:58:31<2:16:22, 3.71it/s] 92%|█████████▏| 341120/371472 [4:58:31<2:23:45, 3.52it/s] {'loss': 2.6389, 'learning_rate': 1.7357624598676986e-07, 'epoch': 14.69} + 92%|█████████▏| 341120/371472 [4:58:31<2:23:45, 3.52it/s] 92%|█████████▏| 341121/371472 [4:58:31<2:18:38, 3.65it/s] 92%|█████████▏| 341122/371472 [4:58:31<2:18:00, 3.67it/s] 92%|█████████▏| 341123/371472 [4:58:32<2:26:00, 3.46it/s] 92%|█████████▏| 341124/371472 [4:58:32<2:22:45, 3.54it/s] 92%|█████████▏| 341125/371472 [4:58:32<2:21:55, 3.56it/s] 92%|█████████▏| 341126/371472 [4:58:33<2:21:29, 3.57it/s] 92%|█████████▏| 341127/371472 [4:58:33<2:18:59, 3.64it/s] 92%|█████████▏| 341128/371472 [4:58:33<2:18:46, 3.64it/s] 92%|█████████▏| 341129/371472 [4:58:33<2:19:52, 3.62it/s] 92%|█████████▏| 341130/371472 [4:58:34<2:14:15, 3.77it/s] 92%|█████████▏| 341131/371472 [4:58:34<2:14:30, 3.76it/s] 92%|█████████▏| 341132/371472 [4:58:34<2:12:23, 3.82it/s] 92%|█████████▏| 341133/371472 [4:58:34<2:14:51, 3.75it/s] 92%|█████████▏| 341134/371472 [4:58:35<2:13:30, 3.79it/s] 92%|█████████▏| 341135/371472 [4:58:35<2:20:46, 3.59it/s] 92%|█████████▏| 341136/371472 [4:58:35<2:19:47, 3.62it/s] 92%|█████████▏| 341137/371472 [4:58:36<2:17:40, 3.67it/s] 92%|█████████▏| 341138/371472 [4:58:36<2:26:38, 3.45it/s] 92%|█████████▏| 341139/371472 [4:58:36<2:33:34, 3.29it/s] 92%|█████████▏| 341140/371472 [4:58:36<2:27:35, 3.43it/s] {'loss': 2.643, 'learning_rate': 1.735277640112909e-07, 'epoch': 14.69} + 92%|█████████▏| 341140/371472 [4:58:36<2:27:35, 3.43it/s] 92%|█████████▏| 341141/371472 [4:58:37<2:23:16, 3.53it/s] 92%|█████████▏| 341142/371472 [4:58:37<2:19:42, 3.62it/s] 92%|█████████▏| 341143/371472 [4:58:37<2:27:09, 3.43it/s] 92%|█████████▏| 341144/371472 [4:58:38<2:21:13, 3.58it/s] 92%|█████████▏| 341145/371472 [4:58:38<2:26:56, 3.44it/s] 92%|█████████▏| 341146/371472 [4:58:38<2:26:44, 3.44it/s] 92%|█████████▏| 341147/371472 [4:58:38<2:28:01, 3.41it/s] 92%|█████████▏| 341148/371472 [4:58:39<2:26:15, 3.46it/s] 92%|█████████▏| 341149/371472 [4:58:39<2:29:26, 3.38it/s] 92%|█████████▏| 341150/371472 [4:58:39<2:27:51, 3.42it/s] 92%|█████████▏| 341151/371472 [4:58:40<2:36:56, 3.22it/s] 92%|█████████▏| 341152/371472 [4:58:40<2:40:23, 3.15it/s] 92%|█████████▏| 341153/371472 [4:58:40<2:31:04, 3.34it/s] 92%|█████████▏| 341154/371472 [4:58:41<2:27:28, 3.43it/s] 92%|█████████▏| 341155/371472 [4:58:41<2:21:46, 3.56it/s] 92%|█████████▏| 341156/371472 [4:58:41<2:21:44, 3.56it/s] 92%|█████████▏| 341157/371472 [4:58:41<2:18:46, 3.64it/s] 92%|█████████▏| 341158/371472 [4:58:42<2:15:57, 3.72it/s] 92%|█████████▏| 341159/371472 [4:58:42<2:26:31, 3.45it/s] 92%|█████████▏| 341160/371472 [4:58:42<2:24:11, 3.50it/s] {'loss': 2.6484, 'learning_rate': 1.7347928203581206e-07, 'epoch': 14.69} + 92%|█████████▏| 341160/371472 [4:58:42<2:24:11, 3.50it/s] 92%|█████████▏| 341161/371472 [4:58:43<2:35:11, 3.26it/s] 92%|█████████▏| 341162/371472 [4:58:43<2:31:16, 3.34it/s] 92%|█████████▏| 341163/371472 [4:58:43<2:31:44, 3.33it/s] 92%|█████████▏| 341164/371472 [4:58:43<2:29:41, 3.37it/s] 92%|█████████▏| 341165/371472 [4:58:44<2:27:04, 3.43it/s] 92%|█████████▏| 341166/371472 [4:58:44<2:24:34, 3.49it/s] 92%|█████████▏| 341167/371472 [4:58:44<2:22:52, 3.54it/s] 92%|█████████▏| 341168/371472 [4:58:45<2:21:05, 3.58it/s] 92%|█████████▏| 341169/371472 [4:58:45<2:21:58, 3.56it/s] 92%|█████████▏| 341170/371472 [4:58:45<2:25:55, 3.46it/s] 92%|█████████▏| 341171/371472 [4:58:45<2:21:55, 3.56it/s] 92%|█████████▏| 341172/371472 [4:58:46<2:22:33, 3.54it/s] 92%|█████████▏| 341173/371472 [4:58:46<2:24:21, 3.50it/s] 92%|█████████▏| 341174/371472 [4:58:46<2:36:24, 3.23it/s] 92%|█████████▏| 341175/371472 [4:58:47<2:31:32, 3.33it/s] 92%|█████████▏| 341176/371472 [4:58:47<2:32:11, 3.32it/s] 92%|█████████▏| 341177/371472 [4:58:47<2:23:41, 3.51it/s] 92%|█████████▏| 341178/371472 [4:58:47<2:24:39, 3.49it/s] 92%|█████████▏| 341179/371472 [4:58:48<2:22:22, 3.55it/s] 92%|█████████▏| 341180/371472 [4:58:48<2:17:40, 3.67it/s] {'loss': 2.6341, 'learning_rate': 1.7343080006033313e-07, 'epoch': 14.7} + 92%|█████████▏| 341180/371472 [4:58:48<2:17:40, 3.67it/s] 92%|█████████▏| 341181/371472 [4:58:48<2:23:32, 3.52it/s] 92%|█████████▏| 341182/371472 [4:58:49<2:19:15, 3.63it/s] 92%|█████████▏| 341183/371472 [4:58:49<2:19:23, 3.62it/s] 92%|█████████▏| 341184/371472 [4:58:49<2:20:21, 3.60it/s] 92%|█████████▏| 341185/371472 [4:58:49<2:17:10, 3.68it/s] 92%|█████████▏| 341186/371472 [4:58:50<2:24:09, 3.50it/s] 92%|█████████▏| 341187/371472 [4:58:50<2:25:29, 3.47it/s] 92%|█████████▏| 341188/371472 [4:58:50<2:24:47, 3.49it/s] 92%|█████████▏| 341189/371472 [4:58:51<2:24:32, 3.49it/s] 92%|█████████▏| 341190/371472 [4:58:51<2:26:17, 3.45it/s] 92%|█████████▏| 341191/371472 [4:58:51<2:23:38, 3.51it/s] 92%|█████████▏| 341192/371472 [4:58:51<2:21:48, 3.56it/s] 92%|█████████▏| 341193/371472 [4:58:52<2:27:23, 3.42it/s] 92%|█████████▏| 341194/371472 [4:58:52<2:24:48, 3.48it/s] 92%|█████████▏| 341195/371472 [4:58:52<2:25:40, 3.46it/s] 92%|█████████▏| 341196/371472 [4:58:53<2:24:01, 3.50it/s] 92%|█████████▏| 341197/371472 [4:58:53<2:25:02, 3.48it/s] 92%|█████████▏| 341198/371472 [4:58:53<2:23:15, 3.52it/s] 92%|█████████▏| 341199/371472 [4:58:53<2:21:44, 3.56it/s] 92%|█████████▏| 341200/371472 [4:58:54<2:18:21, 3.65it/s] {'loss': 2.5684, 'learning_rate': 1.7338231808485417e-07, 'epoch': 14.7} + 92%|█████████▏| 341200/371472 [4:58:54<2:18:21, 3.65it/s] 92%|█████████▏| 341201/371472 [4:58:54<2:15:32, 3.72it/s] 92%|█████████▏| 341202/371472 [4:58:54<2:13:00, 3.79it/s] 92%|█████████▏| 341203/371472 [4:58:54<2:18:08, 3.65it/s] 92%|█████████▏| 341204/371472 [4:58:55<2:17:01, 3.68it/s] 92%|█████████▏| 341205/371472 [4:58:55<2:14:37, 3.75it/s] 92%|█████████▏| 341206/371472 [4:58:55<2:26:44, 3.44it/s] 92%|█████████▏| 341207/371472 [4:58:56<2:40:51, 3.14it/s] 92%|█████████▏| 341208/371472 [4:58:56<2:33:05, 3.29it/s] 92%|█████████▏| 341209/371472 [4:58:56<2:38:06, 3.19it/s] 92%|█████████▏| 341210/371472 [4:58:57<2:30:57, 3.34it/s] 92%|█████████▏| 341211/371472 [4:58:57<2:27:31, 3.42it/s] 92%|█████████▏| 341212/371472 [4:58:57<2:23:10, 3.52it/s] 92%|█████████▏| 341213/371472 [4:58:57<2:26:29, 3.44it/s] 92%|█████████▏| 341214/371472 [4:58:58<2:26:52, 3.43it/s] 92%|█████████▏| 341215/371472 [4:58:58<2:23:05, 3.52it/s] 92%|█████████▏| 341216/371472 [4:58:58<2:25:37, 3.46it/s] 92%|█████████▏| 341217/371472 [4:58:59<2:23:35, 3.51it/s] 92%|█████████▏| 341218/371472 [4:58:59<2:36:03, 3.23it/s] 92%|█████████▏| 341219/371472 [4:58:59<2:31:34, 3.33it/s] 92%|█████████▏| 341220/371472 [4:58:59<2:25:59, 3.45it/s] {'loss': 2.6155, 'learning_rate': 1.7333383610937532e-07, 'epoch': 14.7} + 92%|█████████▏| 341220/371472 [4:58:59<2:25:59, 3.45it/s] 92%|█████████▏| 341221/371472 [4:59:00<2:22:33, 3.54it/s] 92%|█████████▏| 341222/371472 [4:59:00<2:20:12, 3.60it/s] 92%|█████████▏| 341223/371472 [4:59:00<2:17:23, 3.67it/s] 92%|█████████▏| 341224/371472 [4:59:01<2:18:15, 3.65it/s] 92%|█████████▏| 341225/371472 [4:59:01<2:34:55, 3.25it/s] 92%|█████████▏| 341226/371472 [4:59:01<2:32:47, 3.30it/s] 92%|█████████▏| 341227/371472 [4:59:02<2:28:16, 3.40it/s] 92%|█████████▏| 341228/371472 [4:59:02<2:33:47, 3.28it/s] 92%|█████████▏| 341229/371472 [4:59:02<2:56:08, 2.86it/s] 92%|█████████▏| 341230/371472 [4:59:03<2:49:51, 2.97it/s] 92%|█████████▏| 341231/371472 [4:59:03<2:41:47, 3.12it/s] 92%|█████████▏| 341232/371472 [4:59:03<2:38:23, 3.18it/s] 92%|█████████▏| 341233/371472 [4:59:03<2:31:42, 3.32it/s] 92%|█████████▏| 341234/371472 [4:59:04<2:33:44, 3.28it/s] 92%|█████████▏| 341235/371472 [4:59:04<2:24:21, 3.49it/s] 92%|█████████▏| 341236/371472 [4:59:04<2:20:37, 3.58it/s] 92%|█████████▏| 341237/371472 [4:59:05<2:27:04, 3.43it/s] 92%|█████████▏| 341238/371472 [4:59:05<2:19:56, 3.60it/s] 92%|█████████▏| 341239/371472 [4:59:05<2:19:29, 3.61it/s] 92%|█████████▏| 341240/371472 [4:59:05<2:17:29, 3.66it/s] {'loss': 2.5842, 'learning_rate': 1.732853541338964e-07, 'epoch': 14.7} + 92%|█████████▏| 341240/371472 [4:59:05<2:17:29, 3.66it/s] 92%|█████████▏| 341241/371472 [4:59:06<2:29:38, 3.37it/s] 92%|█████████▏| 341242/371472 [4:59:06<2:26:55, 3.43it/s] 92%|█████████▏| 341243/371472 [4:59:06<2:33:25, 3.28it/s] 92%|█████████▏| 341244/371472 [4:59:07<2:28:12, 3.40it/s] 92%|█████████▏| 341245/371472 [4:59:07<2:22:42, 3.53it/s] 92%|█████████▏| 341246/371472 [4:59:07<2:20:20, 3.59it/s] 92%|█████████▏| 341247/371472 [4:59:07<2:21:28, 3.56it/s] 92%|█████████▏| 341248/371472 [4:59:08<2:20:01, 3.60it/s] 92%|█████████▏| 341249/371472 [4:59:08<2:41:19, 3.12it/s] 92%|█████████▏| 341250/371472 [4:59:08<2:34:59, 3.25it/s] 92%|█████████▏| 341251/371472 [4:59:09<2:35:17, 3.24it/s] 92%|█████████▏| 341252/371472 [4:59:09<2:29:09, 3.38it/s] 92%|█████████▏| 341253/371472 [4:59:09<2:23:25, 3.51it/s] 92%|█████████▏| 341254/371472 [4:59:09<2:20:43, 3.58it/s] 92%|█████████▏| 341255/371472 [4:59:10<2:21:24, 3.56it/s] 92%|█████████▏| 341256/371472 [4:59:10<2:17:03, 3.67it/s] 92%|█████████▏| 341257/371472 [4:59:10<2:15:36, 3.71it/s] 92%|█████████▏| 341258/371472 [4:59:11<2:26:15, 3.44it/s] 92%|█████████▏| 341259/371472 [4:59:11<2:21:35, 3.56it/s] 92%|█████████▏| 341260/371472 [4:59:11<2:21:44, 3.55it/s] {'loss': 2.6351, 'learning_rate': 1.7323687215841754e-07, 'epoch': 14.7} + 92%|█████████▏| 341260/371472 [4:59:11<2:21:44, 3.55it/s] 92%|█████████▏| 341261/371472 [4:59:12<2:29:56, 3.36it/s] 92%|█████████▏| 341262/371472 [4:59:12<2:25:33, 3.46it/s] 92%|█████████▏| 341263/371472 [4:59:12<2:23:59, 3.50it/s] 92%|█████████▏| 341264/371472 [4:59:12<2:21:47, 3.55it/s] 92%|█████████▏| 341265/371472 [4:59:13<2:18:17, 3.64it/s] 92%|█████████▏| 341266/371472 [4:59:13<2:17:06, 3.67it/s] 92%|█████████▏| 341267/371472 [4:59:13<2:16:46, 3.68it/s] 92%|█████████▏| 341268/371472 [4:59:13<2:15:09, 3.72it/s] 92%|█████████▏| 341269/371472 [4:59:14<2:17:56, 3.65it/s] 92%|█████████▏| 341270/371472 [4:59:14<2:22:19, 3.54it/s] 92%|█████████▏| 341271/371472 [4:59:14<2:29:39, 3.36it/s] 92%|█████████▏| 341272/371472 [4:59:15<2:26:14, 3.44it/s] 92%|█████████▏| 341273/371472 [4:59:15<2:27:52, 3.40it/s] 92%|█████████▏| 341274/371472 [4:59:15<2:27:23, 3.41it/s] 92%|█████████▏| 341275/371472 [4:59:15<2:27:05, 3.42it/s] 92%|█████████▏| 341276/371472 [4:59:16<2:27:14, 3.42it/s] 92%|█████████▏| 341277/371472 [4:59:16<2:23:58, 3.50it/s] 92%|█████████▏| 341278/371472 [4:59:16<2:23:19, 3.51it/s] 92%|█████████▏| 341279/371472 [4:59:17<2:24:02, 3.49it/s] 92%|█████████▏| 341280/371472 [4:59:17<2:30:36, 3.34it/s] {'loss': 2.5773, 'learning_rate': 1.731883901829386e-07, 'epoch': 14.7} + 92%|█████████▏| 341280/371472 [4:59:17<2:30:36, 3.34it/s] 92%|█████████▏| 341281/371472 [4:59:17<2:33:22, 3.28it/s] 92%|█████████▏| 341282/371472 [4:59:18<2:45:36, 3.04it/s] 92%|█████████▏| 341283/371472 [4:59:18<2:38:15, 3.18it/s] 92%|█████████▏| 341284/371472 [4:59:18<2:32:20, 3.30it/s] 92%|█████████▏| 341285/371472 [4:59:19<2:40:27, 3.14it/s] 92%|█████████▏| 341286/371472 [4:59:19<2:46:40, 3.02it/s] 92%|█████████▏| 341287/371472 [4:59:19<2:39:18, 3.16it/s] 92%|█████████▏| 341288/371472 [4:59:20<2:38:47, 3.17it/s] 92%|█████████▏| 341289/371472 [4:59:20<2:31:51, 3.31it/s] 92%|█████████▏| 341290/371472 [4:59:20<2:31:29, 3.32it/s] 92%|█████████▏| 341291/371472 [4:59:20<2:27:40, 3.41it/s] 92%|█████████▏| 341292/371472 [4:59:21<2:22:07, 3.54it/s] 92%|█████████▏| 341293/371472 [4:59:21<2:28:00, 3.40it/s] 92%|█████████▏| 341294/371472 [4:59:21<2:26:48, 3.43it/s] 92%|█████████▏| 341295/371472 [4:59:21<2:22:31, 3.53it/s] 92%|█████████▏| 341296/371472 [4:59:22<2:19:19, 3.61it/s] 92%|█████████▏| 341297/371472 [4:59:22<2:22:46, 3.52it/s] 92%|█████████▏| 341298/371472 [4:59:22<2:19:40, 3.60it/s] 92%|█████████▏| 341299/371472 [4:59:23<3:20:26, 2.51it/s] 92%|█████████▏| 341300/371472 [4:59:23<3:06:48, 2.69it/s] {'loss': 2.5023, 'learning_rate': 1.7313990820745977e-07, 'epoch': 14.7} + 92%|█████████▏| 341300/371472 [4:59:23<3:06:48, 2.69it/s] 92%|█████████▏| 341301/371472 [4:59:24<2:48:59, 2.98it/s] 92%|█████████▏| 341302/371472 [4:59:24<2:36:18, 3.22it/s] 92%|█████████▏| 341303/371472 [4:59:24<2:31:06, 3.33it/s] 92%|█████████▏| 341304/371472 [4:59:24<2:30:39, 3.34it/s] 92%|█████████▏| 341305/371472 [4:59:25<2:32:20, 3.30it/s] 92%|█████████▏| 341306/371472 [4:59:25<2:23:21, 3.51it/s] 92%|█████████▏| 341307/371472 [4:59:25<2:22:18, 3.53it/s] 92%|█████████▏| 341308/371472 [4:59:25<2:19:29, 3.60it/s] 92%|█████████▏| 341309/371472 [4:59:26<2:23:05, 3.51it/s] 92%|█████████▏| 341310/371472 [4:59:26<2:23:12, 3.51it/s] 92%|█████████▏| 341311/371472 [4:59:26<2:19:22, 3.61it/s] 92%|█████████▏| 341312/371472 [4:59:27<2:20:32, 3.58it/s] 92%|█████████▏| 341313/371472 [4:59:27<2:23:08, 3.51it/s] 92%|█████████▏| 341314/371472 [4:59:27<2:22:10, 3.54it/s] 92%|█████████▏| 341315/371472 [4:59:27<2:24:14, 3.48it/s] 92%|█████████▏| 341316/371472 [4:59:28<2:27:14, 3.41it/s] 92%|█████████▏| 341317/371472 [4:59:28<2:24:39, 3.47it/s] 92%|█████████▏| 341318/371472 [4:59:29<2:49:27, 2.97it/s] 92%|█████████���| 341319/371472 [4:59:29<2:37:07, 3.20it/s] 92%|█████████▏| 341320/371472 [4:59:29<2:26:03, 3.44it/s] {'loss': 2.5195, 'learning_rate': 1.7309142623198084e-07, 'epoch': 14.7} + 92%|█████████▏| 341320/371472 [4:59:29<2:26:03, 3.44it/s] 92%|█████████▏| 341321/371472 [4:59:29<2:21:50, 3.54it/s] 92%|█████████▏| 341322/371472 [4:59:30<2:23:47, 3.49it/s] 92%|█████████▏| 341323/371472 [4:59:30<2:21:26, 3.55it/s] 92%|█████████▏| 341324/371472 [4:59:30<2:20:53, 3.57it/s] 92%|█████████▏| 341325/371472 [4:59:30<2:15:08, 3.72it/s] 92%|█████████▏| 341326/371472 [4:59:31<2:24:49, 3.47it/s] 92%|█████████▏| 341327/371472 [4:59:31<2:24:32, 3.48it/s] 92%|█████████▏| 341328/371472 [4:59:31<2:19:21, 3.61it/s] 92%|█████████▏| 341329/371472 [4:59:31<2:16:40, 3.68it/s] 92%|█████████▏| 341330/371472 [4:59:32<2:14:22, 3.74it/s] 92%|█████████▏| 341331/371472 [4:59:32<2:16:10, 3.69it/s] 92%|█████████▏| 341332/371472 [4:59:32<2:12:44, 3.78it/s] 92%|█████████▏| 341333/371472 [4:59:33<2:21:37, 3.55it/s] 92%|█████████▏| 341334/371472 [4:59:33<2:16:45, 3.67it/s] 92%|█████████▏| 341335/371472 [4:59:33<2:21:03, 3.56it/s] 92%|█████████▏| 341336/371472 [4:59:33<2:22:25, 3.53it/s] 92%|█████████▏| 341337/371472 [4:59:34<2:28:32, 3.38it/s] 92%|█████████▏| 341338/371472 [4:59:34<2:25:29, 3.45it/s] 92%|█████████▏| 341339/371472 [4:59:34<2:19:21, 3.60it/s] 92%|█████████▏| 341340/371472 [4:59:35<2:15:51, 3.70it/s] {'loss': 2.543, 'learning_rate': 1.7304294425650196e-07, 'epoch': 14.7} + 92%|█████████▏| 341340/371472 [4:59:35<2:15:51, 3.70it/s] 92%|█████████▏| 341341/371472 [4:59:35<2:17:09, 3.66it/s] 92%|█████████▏| 341342/371472 [4:59:35<2:15:44, 3.70it/s] 92%|█████████▏| 341343/371472 [4:59:35<2:09:53, 3.87it/s] 92%|█████████▏| 341344/371472 [4:59:36<2:09:16, 3.88it/s] 92%|█████████▏| 341345/371472 [4:59:36<2:08:46, 3.90it/s] 92%|█████████▏| 341346/371472 [4:59:36<2:08:52, 3.90it/s] 92%|█████████▏| 341347/371472 [4:59:36<2:08:09, 3.92it/s] 92%|█████████▏| 341348/371472 [4:59:37<2:17:12, 3.66it/s] 92%|█████████▏| 341349/371472 [4:59:37<2:26:15, 3.43it/s] 92%|█████████▏| 341350/371472 [4:59:37<2:24:06, 3.48it/s] 92%|█████████▏| 341351/371472 [4:59:38<2:20:20, 3.58it/s] 92%|█████████▏| 341352/371472 [4:59:38<2:18:34, 3.62it/s] 92%|█████████▏| 341353/371472 [4:59:38<2:15:44, 3.70it/s] 92%|█████████▏| 341354/371472 [4:59:38<2:38:03, 3.18it/s] 92%|█████████▏| 341355/371472 [4:59:39<2:32:48, 3.28it/s] 92%|█████████▏| 341356/371472 [4:59:39<2:25:07, 3.46it/s] 92%|█████████▏| 341357/371472 [4:59:39<2:19:31, 3.60it/s] 92%|█████████▏| 341358/371472 [4:59:40<2:18:35, 3.62it/s] 92%|█████████▏| 341359/371472 [4:59:40<2:16:48, 3.67it/s] 92%|█████████▏| 341360/371472 [4:59:40<2:18:33, 3.62it/s] {'loss': 2.5258, 'learning_rate': 1.7299446228102303e-07, 'epoch': 14.7} + 92%|█████████▏| 341360/371472 [4:59:40<2:18:33, 3.62it/s] 92%|█████████▏| 341361/371472 [4:59:40<2:22:15, 3.53it/s] 92%|█████████▏| 341362/371472 [4:59:41<2:41:14, 3.11it/s] 92%|█████████▏| 341363/371472 [4:59:41<2:40:42, 3.12it/s] 92%|█████████▏| 341364/371472 [4:59:41<2:33:27, 3.27it/s] 92%|█████████▏| 341365/371472 [4:59:42<2:35:25, 3.23it/s] 92%|█████████▏| 341366/371472 [4:59:42<2:31:07, 3.32it/s] 92%|█████████▏| 341367/371472 [4:59:42<2:24:15, 3.48it/s] 92%|█████████▏| 341368/371472 [4:59:43<2:26:54, 3.42it/s] 92%|█████████▏| 341369/371472 [4:59:43<2:24:10, 3.48it/s] 92%|█████████▏| 341370/371472 [4:59:43<2:20:56, 3.56it/s] 92%|█████████▏| 341371/371472 [4:59:43<2:15:41, 3.70it/s] 92%|█████████▏| 341372/371472 [4:59:44<2:10:36, 3.84it/s] 92%|█████████▏| 341373/371472 [4:59:44<2:11:14, 3.82it/s] 92%|█████████▏| 341374/371472 [4:59:44<2:12:43, 3.78it/s] 92%|█████████▏| 341375/371472 [4:59:44<2:11:16, 3.82it/s] 92%|█████████▏| 341376/371472 [4:59:45<2:12:13, 3.79it/s] 92%|█████████▏| 341377/371472 [4:59:45<2:12:04, 3.80it/s] 92%|█████████▏| 341378/371472 [4:59:45<2:10:52, 3.83it/s] 92%|█████████▏| 341379/371472 [4:59:45<2:20:10, 3.58it/s] 92%|█████████▏| 341380/371472 [4:59:46<2:20:30, 3.57it/s] {'loss': 2.5802, 'learning_rate': 1.729459803055442e-07, 'epoch': 14.7} + 92%|█████████▏| 341380/371472 [4:59:46<2:20:30, 3.57it/s] 92%|█████████▏| 341381/371472 [4:59:46<2:41:35, 3.10it/s] 92%|█████████▏| 341382/371472 [4:59:46<2:31:38, 3.31it/s] 92%|█████████▏| 341383/371472 [4:59:47<2:28:54, 3.37it/s] 92%|█████████▏| 341384/371472 [4:59:47<2:30:34, 3.33it/s] 92%|█████████▏| 341385/371472 [4:59:47<2:28:26, 3.38it/s] 92%|█████████▏| 341386/371472 [4:59:48<2:27:25, 3.40it/s] 92%|█████████▏| 341387/371472 [4:59:48<2:20:22, 3.57it/s] 92%|█████████▏| 341388/371472 [4:59:48<2:28:44, 3.37it/s] 92%|█████████▏| 341389/371472 [4:59:48<2:34:27, 3.25it/s] 92%|█████████▏| 341390/371472 [4:59:49<2:33:20, 3.27it/s] 92%|█████████▏| 341391/371472 [4:59:49<2:24:44, 3.46it/s] 92%|█████████▏| 341392/371472 [4:59:49<2:32:12, 3.29it/s] 92%|█████████▏| 341393/371472 [4:59:50<2:29:49, 3.35it/s] 92%|█████████▏| 341394/371472 [4:59:50<2:28:58, 3.36it/s] 92%|█████████▏| 341395/371472 [4:59:50<2:28:45, 3.37it/s] 92%|█████████▏| 341396/371472 [4:59:50<2:20:48, 3.56it/s] 92%|█████████▏| 341397/371472 [4:59:51<2:20:32, 3.57it/s] 92%|█████████▏| 341398/371472 [4:59:51<2:20:04, 3.58it/s] 92%|█████████▏| 341399/371472 [4:59:51<2:20:40, 3.56it/s] 92%|█████████▏| 341400/371472 [4:59:52<2:23:09, 3.50it/s] {'loss': 2.6789, 'learning_rate': 1.7289749833006523e-07, 'epoch': 14.7} + 92%|█████████▏| 341400/371472 [4:59:52<2:23:09, 3.50it/s] 92%|█████████▏| 341401/371472 [4:59:52<2:33:27, 3.27it/s] 92%|█████████▏| 341402/371472 [4:59:52<2:37:54, 3.17it/s] 92%|█████████▏| 341403/371472 [4:59:53<2:39:19, 3.15it/s] 92%|█████████▏| 341404/371472 [4:59:53<2:56:37, 2.84it/s] 92%|█████████▏| 341405/371472 [4:59:53<2:49:08, 2.96it/s] 92%|█████████▏| 341406/371472 [4:59:54<2:37:09, 3.19it/s] 92%|█████████▏| 341407/371472 [4:59:54<2:29:48, 3.34it/s] 92%|█████████▏| 341408/371472 [4:59:54<2:23:53, 3.48it/s] 92%|█████████▏| 341409/371472 [4:59:54<2:20:31, 3.57it/s] 92%|█████████▏| 341410/371472 [4:59:55<2:20:55, 3.56it/s] 92%|█████████▏| 341411/371472 [4:59:55<2:15:24, 3.70it/s] 92%|█████████▏| 341412/371472 [4:59:55<2:15:25, 3.70it/s] 92%|█████████▏| 341413/371472 [4:59:56<2:15:15, 3.70it/s] 92%|█████████▏| 341414/371472 [4:59:56<2:12:47, 3.77it/s] 92%|█████████▏| 341415/371472 [4:59:56<2:28:24, 3.38it/s] 92%|█████████▏| 341416/371472 [4:59:56<2:25:33, 3.44it/s] 92%|█████████▏| 341417/371472 [4:59:57<2:31:34, 3.30it/s] 92%|█████████▏| 341418/371472 [4:59:57<2:25:18, 3.45it/s] 92%|█████████▏| 341419/371472 [4:59:57<2:34:43, 3.24it/s] 92%|█████████▏| 341420/371472 [4:59:58<2:32:28, 3.29it/s] {'loss': 2.5746, 'learning_rate': 1.728490163545864e-07, 'epoch': 14.71} + 92%|█████████▏| 341420/371472 [4:59:58<2:32:28, 3.29it/s] 92%|█████████▏| 341421/371472 [4:59:58<2:25:52, 3.43it/s] 92%|█████████▏| 341422/371472 [4:59:58<2:19:55, 3.58it/s] 92%|█████████▏| 341423/371472 [4:59:58<2:24:44, 3.46it/s] 92%|█████████▏| 341424/371472 [4:59:59<2:23:13, 3.50it/s] 92%|█████████▏| 341425/371472 [4:59:59<2:19:02, 3.60it/s] 92%|█████████▏| 341426/371472 [4:59:59<2:15:39, 3.69it/s] 92%|█████████▏| 341427/371472 [5:00:00<2:15:06, 3.71it/s] 92%|█████████▏| 341428/371472 [5:00:00<2:38:14, 3.16it/s] 92%|█████████▏| 341429/371472 [5:00:00<2:40:27, 3.12it/s] 92%|█████████▏| 341430/371472 [5:00:01<2:31:31, 3.30it/s] 92%|██████��██▏| 341431/371472 [5:00:01<2:24:04, 3.48it/s] 92%|█████████▏| 341432/371472 [5:00:01<2:19:37, 3.59it/s] 92%|█████████▏| 341433/371472 [5:00:01<2:22:12, 3.52it/s] 92%|█████████▏| 341434/371472 [5:00:02<2:29:26, 3.35it/s] 92%|█████████▏| 341435/371472 [5:00:02<2:34:04, 3.25it/s] 92%|█████████▏| 341436/371472 [5:00:02<2:38:55, 3.15it/s] 92%|█████████▏| 341437/371472 [5:00:03<2:35:03, 3.23it/s] 92%|█████████▏| 341438/371472 [5:00:03<2:39:05, 3.15it/s] 92%|█████████▏| 341439/371472 [5:00:03<2:31:52, 3.30it/s] 92%|█████████▏| 341440/371472 [5:00:04<2:31:35, 3.30it/s] {'loss': 2.5274, 'learning_rate': 1.728005343791075e-07, 'epoch': 14.71} + 92%|█████████▏| 341440/371472 [5:00:04<2:31:35, 3.30it/s] 92%|█████████▏| 341441/371472 [5:00:04<2:27:06, 3.40it/s] 92%|█████████▏| 341442/371472 [5:00:04<2:21:42, 3.53it/s] 92%|█████████▏| 341443/371472 [5:00:04<2:19:40, 3.58it/s] 92%|█████████▏| 341444/371472 [5:00:05<2:18:10, 3.62it/s] 92%|█████████▏| 341445/371472 [5:00:05<2:24:36, 3.46it/s] 92%|█████████▏| 341446/371472 [5:00:05<2:19:07, 3.60it/s] 92%|█████████▏| 341447/371472 [5:00:05<2:15:13, 3.70it/s] 92%|█████████▏| 341448/371472 [5:00:06<2:19:08, 3.60it/s] 92%|█████████▏| 341449/371472 [5:00:06<2:27:38, 3.39it/s] 92%|█████████▏| 341450/371472 [5:00:06<2:23:22, 3.49it/s] 92%|█████████▏| 341451/371472 [5:00:07<2:24:06, 3.47it/s] 92%|█████████▏| 341452/371472 [5:00:07<2:23:19, 3.49it/s] 92%|█████████▏| 341453/371472 [5:00:07<2:19:43, 3.58it/s] 92%|█████████▏| 341454/371472 [5:00:07<2:20:38, 3.56it/s] 92%|█████████▏| 341455/371472 [5:00:08<2:29:05, 3.36it/s] 92%|█████████▏| 341456/371472 [5:00:08<2:38:27, 3.16it/s] 92%|█████████▏| 341457/371472 [5:00:08<2:32:29, 3.28it/s] 92%|█████████▏| 341458/371472 [5:00:09<2:27:58, 3.38it/s] 92%|█████████▏| 341459/371472 [5:00:09<2:23:40, 3.48it/s] 92%|█████████▏| 341460/371472 [5:00:09<2:32:27, 3.28it/s] {'loss': 2.6513, 'learning_rate': 1.727520524036286e-07, 'epoch': 14.71} + 92%|█████████▏| 341460/371472 [5:00:09<2:32:27, 3.28it/s] 92%|█████████▏| 341461/371472 [5:00:10<2:33:31, 3.26it/s] 92%|█████████▏| 341462/371472 [5:00:10<2:26:41, 3.41it/s] 92%|█████████▏| 341463/371472 [5:00:10<2:23:34, 3.48it/s] 92%|█████████▏| 341464/371472 [5:00:10<2:21:49, 3.53it/s] 92%|█████████▏| 341465/371472 [5:00:11<2:19:56, 3.57it/s] 92%|█████████▏| 341466/371472 [5:00:11<2:19:57, 3.57it/s] 92%|█████████▏| 341467/371472 [5:00:11<2:16:42, 3.66it/s] 92%|█████████▏| 341468/371472 [5:00:12<2:17:42, 3.63it/s] 92%|█████████▏| 341469/371472 [5:00:12<2:23:18, 3.49it/s] 92%|█████████▏| 341470/371472 [5:00:12<2:20:13, 3.57it/s] 92%|█████████▏| 341471/371472 [5:00:12<2:19:02, 3.60it/s] 92%|█████████▏| 341472/371472 [5:00:13<2:16:35, 3.66it/s] 92%|█████████▏| 341473/371472 [5:00:13<2:16:40, 3.66it/s] 92%|█████████▏| 341474/371472 [5:00:13<2:25:19, 3.44it/s] 92%|█████████▏| 341475/371472 [5:00:14<2:24:36, 3.46it/s] 92%|█████████▏| 341476/371472 [5:00:14<2:20:02, 3.57it/s] 92%|█████████▏| 341477/371472 [5:00:14<2:18:36, 3.61it/s] 92%|█████████▏| 341478/371472 [5:00:14<2:21:05, 3.54it/s] 92%|█████████▏| 341479/371472 [5:00:15<2:29:24, 3.35it/s] 92%|█████████▏| 341480/371472 [5:00:15<2:29:10, 3.35it/s] {'loss': 2.4518, 'learning_rate': 1.7270357042814968e-07, 'epoch': 14.71} + 92%|█████████▏| 341480/371472 [5:00:15<2:29:10, 3.35it/s] 92%|█████████▏| 341481/371472 [5:00:15<2:22:11, 3.52it/s] 92%|█████████▏| 341482/371472 [5:00:16<2:20:37, 3.55it/s] 92%|█████████▏| 341483/371472 [5:00:16<2:24:10, 3.47it/s] 92%|█████████▏| 341484/371472 [5:00:16<2:31:54, 3.29it/s] 92%|█████████▏| 341485/371472 [5:00:17<2:37:07, 3.18it/s] 92%|█████████▏| 341486/371472 [5:00:17<2:29:49, 3.34it/s] 92%|████��████▏| 341487/371472 [5:00:17<2:26:04, 3.42it/s] 92%|█████████▏| 341488/371472 [5:00:17<2:22:29, 3.51it/s] 92%|█████████▏| 341489/371472 [5:00:18<2:30:06, 3.33it/s] 92%|█████████▏| 341490/371472 [5:00:18<2:22:52, 3.50it/s] 92%|█████████▏| 341491/371472 [5:00:18<2:20:48, 3.55it/s] 92%|█████████▏| 341492/371472 [5:00:18<2:16:57, 3.65it/s] 92%|█████████▏| 341493/371472 [5:00:19<2:19:35, 3.58it/s] 92%|█████████▏| 341494/371472 [5:00:19<2:14:48, 3.71it/s] 92%|█████████▏| 341495/371472 [5:00:19<2:26:10, 3.42it/s] 92%|█████████▏| 341496/371472 [5:00:20<2:25:21, 3.44it/s] 92%|█████████▏| 341497/371472 [5:00:20<2:20:25, 3.56it/s] 92%|█████████▏| 341498/371472 [5:00:20<2:16:39, 3.66it/s] 92%|█████████▏| 341499/371472 [5:00:20<2:24:36, 3.45it/s] 92%|█████████▏| 341500/371472 [5:00:21<2:27:07, 3.40it/s] {'loss': 2.741, 'learning_rate': 1.7265508845267085e-07, 'epoch': 14.71} + 92%|█████████▏| 341500/371472 [5:00:21<2:27:07, 3.40it/s] 92%|█████████▏| 341501/371472 [5:00:21<2:29:54, 3.33it/s] 92%|█████████▏| 341502/371472 [5:00:21<2:25:34, 3.43it/s] 92%|█████████▏| 341503/371472 [5:00:22<2:36:02, 3.20it/s] 92%|█████████▏| 341504/371472 [5:00:22<2:28:58, 3.35it/s] 92%|█████████▏| 341505/371472 [5:00:22<2:25:39, 3.43it/s] 92%|█████████▏| 341506/371472 [5:00:23<2:27:53, 3.38it/s] 92%|█████████▏| 341507/371472 [5:00:23<2:25:45, 3.43it/s] 92%|█████████▏| 341508/371472 [5:00:23<2:24:30, 3.46it/s] 92%|█████████▏| 341509/371472 [5:00:23<2:24:11, 3.46it/s] 92%|█████████▏| 341510/371472 [5:00:24<2:23:38, 3.48it/s] 92%|█████████▏| 341511/371472 [5:00:24<2:26:59, 3.40it/s] 92%|█████████▏| 341512/371472 [5:00:24<2:24:56, 3.45it/s] 92%|█████████▏| 341513/371472 [5:00:25<2:35:17, 3.22it/s] 92%|█████████▏| 341514/371472 [5:00:25<2:28:43, 3.36it/s] 92%|█████████▏| 341515/371472 [5:00:25<2:28:36, 3.36it/s] 92%|█████████▏| 341516/371472 [5:00:25<2:22:48, 3.50it/s] 92%|█████████▏| 341517/371472 [5:00:26<2:18:41, 3.60it/s] 92%|█████████▏| 341518/371472 [5:00:26<2:16:59, 3.64it/s] 92%|█████████▏| 341519/371472 [5:00:26<2:18:25, 3.61it/s] 92%|█████████▏| 341520/371472 [5:00:27<2:19:43, 3.57it/s] {'loss': 2.4868, 'learning_rate': 1.726066064771919e-07, 'epoch': 14.71} + 92%|█████████▏| 341520/371472 [5:00:27<2:19:43, 3.57it/s] 92%|█████████▏| 341521/371472 [5:00:27<2:28:16, 3.37it/s] 92%|█████████▏| 341522/371472 [5:00:27<2:24:59, 3.44it/s] 92%|█████████▏| 341523/371472 [5:00:27<2:21:24, 3.53it/s] 92%|█████████▏| 341524/371472 [5:00:28<2:21:36, 3.52it/s] 92%|█████████▏| 341525/371472 [5:00:28<2:20:45, 3.55it/s] 92%|█████████▏| 341526/371472 [5:00:28<2:26:31, 3.41it/s] 92%|█████████▏| 341527/371472 [5:00:29<2:20:04, 3.56it/s] 92%|█████████▏| 341528/371472 [5:00:29<2:22:21, 3.51it/s] 92%|█████████▏| 341529/371472 [5:00:29<2:18:35, 3.60it/s] 92%|█████████▏| 341530/371472 [5:00:29<2:13:14, 3.75it/s] 92%|█████████▏| 341531/371472 [5:00:30<2:12:03, 3.78it/s] 92%|█████████▏| 341532/371472 [5:00:30<2:14:46, 3.70it/s] 92%|█████████▏| 341533/371472 [5:00:30<2:13:23, 3.74it/s] 92%|█████████▏| 341534/371472 [5:00:30<2:14:39, 3.71it/s] 92%|█████████▏| 341535/371472 [5:00:31<2:21:03, 3.54it/s] 92%|█████████▏| 341536/371472 [5:00:31<2:21:39, 3.52it/s] 92%|█████████▏| 341537/371472 [5:00:31<2:32:47, 3.27it/s] 92%|█████████▏| 341538/371472 [5:00:32<2:23:52, 3.47it/s] 92%|█████████▏| 341539/371472 [5:00:32<2:22:55, 3.49it/s] 92%|█████████▏| 341540/371472 [5:00:32<2:19:39, 3.57it/s] {'loss': 2.5749, 'learning_rate': 1.7255812450171305e-07, 'epoch': 14.71} + 92%|█████████▏| 341540/371472 [5:00:32<2:19:39, 3.57it/s] 92%|█████████▏| 341541/371472 [5:00:33<2:30:54, 3.31it/s] 92%|█████████▏| 341542/371472 [5:00:33<2:24:00, 3.46it/s] 92%|██��██████▏| 341543/371472 [5:00:33<2:39:40, 3.12it/s] 92%|█████████▏| 341544/371472 [5:00:33<2:36:58, 3.18it/s] 92%|█████████▏| 341545/371472 [5:00:34<2:26:14, 3.41it/s] 92%|█████████▏| 341546/371472 [5:00:34<2:23:20, 3.48it/s] 92%|█████████▏| 341547/371472 [5:00:35<2:57:29, 2.81it/s] 92%|█████████▏| 341548/371472 [5:00:35<2:45:35, 3.01it/s] 92%|█████████▏| 341549/371472 [5:00:35<2:36:49, 3.18it/s] 92%|█████████▏| 341550/371472 [5:00:35<2:29:49, 3.33it/s] 92%|█████████▏| 341551/371472 [5:00:36<2:24:40, 3.45it/s] 92%|█████████▏| 341552/371472 [5:00:36<2:16:28, 3.65it/s] 92%|█████████▏| 341553/371472 [5:00:36<2:18:34, 3.60it/s] 92%|█████████▏| 341554/371472 [5:00:36<2:14:42, 3.70it/s] 92%|█████████▏| 341555/371472 [5:00:37<2:12:40, 3.76it/s] 92%|█████████▏| 341556/371472 [5:00:37<2:22:12, 3.51it/s] 92%|█████████▏| 341557/371472 [5:00:37<2:18:13, 3.61it/s] 92%|█████████▏| 341558/371472 [5:00:38<2:19:27, 3.58it/s] 92%|█████████▏| 341559/371472 [5:00:38<2:22:12, 3.51it/s] 92%|█████████▏| 341560/371472 [5:00:38<2:17:20, 3.63it/s] {'loss': 2.5644, 'learning_rate': 1.7250964252623412e-07, 'epoch': 14.71} + 92%|█████████▏| 341560/371472 [5:00:38<2:17:20, 3.63it/s] 92%|█████████▏| 341561/371472 [5:00:38<2:17:52, 3.62it/s] 92%|█████████▏| 341562/371472 [5:00:39<2:33:24, 3.25it/s] 92%|█████████▏| 341563/371472 [5:00:39<2:27:48, 3.37it/s] 92%|█████████▏| 341564/371472 [5:00:39<2:38:48, 3.14it/s] 92%|█████████▏| 341565/371472 [5:00:40<2:32:10, 3.28it/s] 92%|█████████▏| 341566/371472 [5:00:40<2:29:52, 3.33it/s] 92%|█████████▏| 341567/371472 [5:00:40<2:24:24, 3.45it/s] 92%|█████████▏| 341568/371472 [5:00:41<2:35:07, 3.21it/s] 92%|█████████▏| 341569/371472 [5:00:41<2:34:14, 3.23it/s] 92%|█████████▏| 341570/371472 [5:00:41<2:27:30, 3.38it/s] 92%|█████████▏| 341571/371472 [5:00:41<2:23:14, 3.48it/s] 92%|█████████▏| 341572/371472 [5:00:42<2:18:37, 3.59it/s] 92%|█████████▏| 341573/371472 [5:00:42<2:14:34, 3.70it/s] 92%|█████████▏| 341574/371472 [5:00:42<2:17:00, 3.64it/s] 92%|█████████▏| 341575/371472 [5:00:43<2:30:56, 3.30it/s] 92%|█████████▏| 341576/371472 [5:00:43<2:24:09, 3.46it/s] 92%|█████████▏| 341577/371472 [5:00:43<2:25:04, 3.43it/s] 92%|█████████▏| 341578/371472 [5:00:43<2:18:28, 3.60it/s] 92%|█████████▏| 341579/371472 [5:00:44<2:15:34, 3.67it/s] 92%|█████████▏| 341580/371472 [5:00:44<2:19:51, 3.56it/s] {'loss': 2.471, 'learning_rate': 1.7246116055075527e-07, 'epoch': 14.71} + 92%|█████████▏| 341580/371472 [5:00:44<2:19:51, 3.56it/s] 92%|█████████▏| 341581/371472 [5:00:44<2:18:27, 3.60it/s] 92%|█████████▏| 341582/371472 [5:00:44<2:17:33, 3.62it/s] 92%|█████████▏| 341583/371472 [5:00:45<2:12:16, 3.77it/s] 92%|█████████▏| 341584/371472 [5:00:45<2:15:36, 3.67it/s] 92%|█████████▏| 341585/371472 [5:00:45<2:18:16, 3.60it/s] 92%|█████████▏| 341586/371472 [5:00:46<2:29:22, 3.33it/s] 92%|█████████▏| 341587/371472 [5:00:46<2:41:59, 3.07it/s] 92%|█████████▏| 341588/371472 [5:00:46<2:33:32, 3.24it/s] 92%|█████████▏| 341589/371472 [5:00:47<2:27:34, 3.37it/s] 92%|█████████▏| 341590/371472 [5:00:47<2:27:08, 3.38it/s] 92%|█████████▏| 341591/371472 [5:00:47<2:24:51, 3.44it/s] 92%|█████████▏| 341592/371472 [5:00:47<2:19:31, 3.57it/s] 92%|█████████▏| 341593/371472 [5:00:48<2:16:48, 3.64it/s] 92%|█████████▏| 341594/371472 [5:00:48<2:16:16, 3.65it/s] 92%|█████████▏| 341595/371472 [5:00:48<2:22:11, 3.50it/s] 92%|█████████▏| 341596/371472 [5:00:49<2:32:45, 3.26it/s] 92%|█████████▏| 341597/371472 [5:00:49<2:22:11, 3.50it/s] 92%|█████████▏| 341598/371472 [5:00:49<2:19:15, 3.58it/s] 92%|█████████▏| 341599/371472 [5:00:49<2:19:06, 3.58it/s] 92%|█████████▏| 341600/371472 [5:00:50<2:24:21, 3.45it/s] {'loss': 2.4756, 'learning_rate': 1.7241267857527632e-07, 'epoch': 14.71} + 92%|█████████▏| 341600/371472 [5:00:50<2:24:21, 3.45it/s] 92%|█████████▏| 341601/371472 [5:00:50<2:19:00, 3.58it/s] 92%|█████████▏| 341602/371472 [5:00:50<2:19:11, 3.58it/s] 92%|█████████▏| 341603/371472 [5:00:50<2:13:58, 3.72it/s] 92%|█████████▏| 341604/371472 [5:00:51<2:11:08, 3.80it/s] 92%|█████████▏| 341605/371472 [5:00:51<2:10:37, 3.81it/s] 92%|█████████▏| 341606/371472 [5:00:51<2:11:12, 3.79it/s] 92%|█████████▏| 341607/371472 [5:00:52<2:11:41, 3.78it/s] 92%|█████████▏| 341608/371472 [5:00:52<2:11:43, 3.78it/s] 92%|█████████▏| 341609/371472 [5:00:52<2:10:49, 3.80it/s] 92%|█████████▏| 341610/371472 [5:00:52<2:17:17, 3.62it/s] 92%|█████████▏| 341611/371472 [5:00:53<2:16:19, 3.65it/s] 92%|█████████▏| 341612/371472 [5:00:53<2:18:07, 3.60it/s] 92%|█████████▏| 341613/371472 [5:00:53<2:14:07, 3.71it/s] 92%|█████████▏| 341614/371472 [5:00:53<2:14:40, 3.70it/s] 92%|█████████▏| 341615/371472 [5:00:54<2:30:24, 3.31it/s] 92%|█████████▏| 341616/371472 [5:00:54<2:32:46, 3.26it/s] 92%|█████████▏| 341617/371472 [5:00:54<2:26:31, 3.40it/s] 92%|█████████▏| 341618/371472 [5:00:55<2:21:47, 3.51it/s] 92%|█████████▏| 341619/371472 [5:00:55<2:27:52, 3.36it/s] 92%|█████████▏| 341620/371472 [5:00:55<2:34:17, 3.22it/s] {'loss': 2.7564, 'learning_rate': 1.723641965997975e-07, 'epoch': 14.71} + 92%|█████████▏| 341620/371472 [5:00:55<2:34:17, 3.22it/s] 92%|█████████▏| 341621/371472 [5:00:56<2:35:44, 3.19it/s] 92%|█████████▏| 341622/371472 [5:00:56<2:34:21, 3.22it/s] 92%|█████████▏| 341623/371472 [5:00:56<2:35:46, 3.19it/s] 92%|█████████▏| 341624/371472 [5:00:57<2:30:07, 3.31it/s] 92%|█████████▏| 341625/371472 [5:00:57<2:29:13, 3.33it/s] 92%|█████████▏| 341626/371472 [5:00:57<2:25:54, 3.41it/s] 92%|█████████▏| 341627/371472 [5:00:57<2:21:43, 3.51it/s] 92%|█████████▏| 341628/371472 [5:00:58<2:18:18, 3.60it/s] 92%|█████████▏| 341629/371472 [5:00:58<2:15:43, 3.66it/s] 92%|█████████▏| 341630/371472 [5:00:58<2:16:13, 3.65it/s] 92%|█████████▏| 341631/371472 [5:00:58<2:12:25, 3.76it/s] 92%|█████████▏| 341632/371472 [5:00:59<2:13:24, 3.73it/s] 92%|█████████▏| 341633/371472 [5:00:59<2:17:21, 3.62it/s] 92%|█████████▏| 341634/371472 [5:00:59<2:27:38, 3.37it/s] 92%|█████████▏| 341635/371472 [5:01:00<2:23:17, 3.47it/s] 92%|█████████▏| 341636/371472 [5:01:00<2:20:34, 3.54it/s] 92%|█████████▏| 341637/371472 [5:01:00<2:17:35, 3.61it/s] 92%|█████████▏| 341638/371472 [5:01:01<2:35:56, 3.19it/s] 92%|█████████▏| 341639/371472 [5:01:01<2:33:23, 3.24it/s] 92%|█████████▏| 341640/371472 [5:01:01<2:31:46, 3.28it/s] {'loss': 2.5753, 'learning_rate': 1.7231571462431854e-07, 'epoch': 14.72} + 92%|█████████▏| 341640/371472 [5:01:01<2:31:46, 3.28it/s] 92%|█████████▏| 341641/371472 [5:01:01<2:26:41, 3.39it/s] 92%|█████████▏| 341642/371472 [5:01:02<2:33:49, 3.23it/s] 92%|█████████▏| 341643/371472 [5:01:02<2:25:34, 3.42it/s] 92%|█████████▏| 341644/371472 [5:01:02<2:29:38, 3.32it/s] 92%|█████████▏| 341645/371472 [5:01:03<2:29:25, 3.33it/s] 92%|█████████▏| 341646/371472 [5:01:03<2:27:55, 3.36it/s] 92%|█████████▏| 341647/371472 [5:01:03<2:22:29, 3.49it/s] 92%|█████████▏| 341648/371472 [5:01:03<2:20:06, 3.55it/s] 92%|█████████▏| 341649/371472 [5:01:04<2:21:36, 3.51it/s] 92%|█████████▏| 341650/371472 [5:01:04<2:31:56, 3.27it/s] 92%|█████████▏| 341651/371472 [5:01:04<2:25:00, 3.43it/s] 92%|█████████▏| 341652/371472 [5:01:05<2:23:44, 3.46it/s] 92%|█████████▏| 341653/371472 [5:01:05<2:22:00, 3.50it/s] 92%|█████████▏| 341654/371472 [5:01:05<2:19:02, 3.57it/s] 92%|█████████▏| 341655/371472 [5:01:05<2:23:07, 3.47it/s] 92%|█████████▏| 341656/371472 [5:01:06<2:22:24, 3.49it/s] 92%|█████████▏| 341657/371472 [5:01:06<2:23:21, 3.47it/s] 92%|█████████▏| 341658/371472 [5:01:06<2:29:45, 3.32it/s] 92%|█████████▏| 341659/371472 [5:01:07<2:27:03, 3.38it/s] 92%|█████████▏| 341660/371472 [5:01:07<2:23:12, 3.47it/s] {'loss': 2.4489, 'learning_rate': 1.722672326488397e-07, 'epoch': 14.72} + 92%|█████████▏| 341660/371472 [5:01:07<2:23:12, 3.47it/s] 92%|█████████▏| 341661/371472 [5:01:07<2:18:24, 3.59it/s] 92%|█████████▏| 341662/371472 [5:01:07<2:15:44, 3.66it/s] 92%|█████████▏| 341663/371472 [5:01:08<2:18:09, 3.60it/s] 92%|█████████▏| 341664/371472 [5:01:08<2:31:36, 3.28it/s] 92%|█████████▏| 341665/371472 [5:01:08<2:23:45, 3.46it/s] 92%|█████████▏| 341666/371472 [5:01:09<2:22:12, 3.49it/s] 92%|█████████▏| 341667/371472 [5:01:09<2:29:35, 3.32it/s] 92%|█████████▏| 341668/371472 [5:01:09<2:23:00, 3.47it/s] 92%|█████████▏| 341669/371472 [5:01:10<2:28:15, 3.35it/s] 92%|█████████▏| 341670/371472 [5:01:10<2:23:23, 3.46it/s] 92%|█████████▏| 341671/371472 [5:01:10<2:34:49, 3.21it/s] 92%|█████████▏| 341672/371472 [5:01:11<2:40:20, 3.10it/s] 92%|█████████▏| 341673/371472 [5:01:11<2:31:00, 3.29it/s] 92%|█████████▏| 341674/371472 [5:01:11<2:27:28, 3.37it/s] 92%|█████████▏| 341675/371472 [5:01:11<2:29:32, 3.32it/s] 92%|█████████▏| 341676/371472 [5:01:12<2:25:44, 3.41it/s] 92%|█████████▏| 341677/371472 [5:01:12<2:23:21, 3.46it/s] 92%|█████████▏| 341678/371472 [5:01:12<2:18:36, 3.58it/s] 92%|█████████▏| 341679/371472 [5:01:12<2:15:14, 3.67it/s] 92%|█████████▏| 341680/371472 [5:01:13<2:19:00, 3.57it/s] {'loss': 2.5379, 'learning_rate': 1.7221875067336076e-07, 'epoch': 14.72} + 92%|█████████▏| 341680/371472 [5:01:13<2:19:00, 3.57it/s] 92%|█████████▏| 341681/371472 [5:01:13<2:22:00, 3.50it/s] 92%|█████████▏| 341682/371472 [5:01:13<2:24:13, 3.44it/s] 92%|█████████▏| 341683/371472 [5:01:14<2:27:27, 3.37it/s] 92%|█████████▏| 341684/371472 [5:01:14<2:24:47, 3.43it/s] 92%|█████████▏| 341685/371472 [5:01:14<2:21:56, 3.50it/s] 92%|█████████▏| 341686/371472 [5:01:15<2:30:22, 3.30it/s] 92%|█████████▏| 341687/371472 [5:01:15<2:29:04, 3.33it/s] 92%|█████████▏| 341688/371472 [5:01:15<2:25:41, 3.41it/s] 92%|█████████▏| 341689/371472 [5:01:15<2:24:17, 3.44it/s] 92%|█████████▏| 341690/371472 [5:01:16<2:24:51, 3.43it/s] 92%|█████████▏| 341691/371472 [5:01:16<2:19:10, 3.57it/s] 92%|█████████▏| 341692/371472 [5:01:16<2:20:50, 3.52it/s] 92%|█████████▏| 341693/371472 [5:01:17<2:25:51, 3.40it/s] 92%|█████████▏| 341694/371472 [5:01:17<2:46:08, 2.99it/s] 92%|█████████▏| 341695/371472 [5:01:17<2:33:34, 3.23it/s] 92%|█████████▏| 341696/371472 [5:01:18<2:30:37, 3.29it/s] 92%|█████████▏| 341697/371472 [5:01:18<2:24:14, 3.44it/s] 92%|█████████▏| 341698/371472 [5:01:18<2:27:08, 3.37it/s] 92%|█████████▏| 341699/371472 [5:01:18<2:28:13, 3.35it/s] 92%|█████████▏| 341700/371472 [5:01:19<2:29:17, 3.32it/s] {'loss': 2.5592, 'learning_rate': 1.7217026869788189e-07, 'epoch': 14.72} + 92%|█████████▏| 341700/371472 [5:01:19<2:29:17, 3.32it/s] 92%|█████████▏| 341701/371472 [5:01:19<2:20:08, 3.54it/s] 92%|█████████▏| 341702/371472 [5:01:19<2:28:35, 3.34it/s] 92%|█████████▏| 341703/371472 [5:01:20<2:21:24, 3.51it/s] 92%|█████████▏| 341704/371472 [5:01:20<2:22:19, 3.49it/s] 92%|█████████▏| 341705/371472 [5:01:20<2:18:09, 3.59it/s] 92%|█████████▏| 341706/371472 [5:01:20<2:16:26, 3.64it/s] 92%|█████████▏| 341707/371472 [5:01:21<2:13:03, 3.73it/s] 92%|█████████▏| 341708/371472 [5:01:21<2:11:52, 3.76it/s] 92%|█████████▏| 341709/371472 [5:01:21<2:17:19, 3.61it/s] 92%|█████████▏| 341710/371472 [5:01:21<2:16:09, 3.64it/s] 92%|█████████▏| 341711/371472 [5:01:22<2:14:39, 3.68it/s] 92%|█████████▏| 341712/371472 [5:01:22<2:33:57, 3.22it/s] 92%|█████████▏| 341713/371472 [5:01:22<2:25:33, 3.41it/s] 92%|█████████▏| 341714/371472 [5:01:23<2:17:22, 3.61it/s] 92%|█████████▏| 341715/371472 [5:01:23<2:21:53, 3.50it/s] 92%|█████████▏| 341716/371472 [5:01:23<2:24:40, 3.43it/s] 92%|█████████▏| 341717/371472 [5:01:24<2:25:41, 3.40it/s] 92%|█████████▏| 341718/371472 [5:01:24<2:25:54, 3.40it/s] 92%|█████████▏| 341719/371472 [5:01:24<2:24:30, 3.43it/s] 92%|█████████▏| 341720/371472 [5:01:24<2:30:15, 3.30it/s] {'loss': 2.4912, 'learning_rate': 1.7212178672240296e-07, 'epoch': 14.72} + 92%|█████████▏| 341720/371472 [5:01:24<2:30:15, 3.30it/s] 92%|█████████▏| 341721/371472 [5:01:25<2:32:38, 3.25it/s] 92%|█████████▏| 341722/371472 [5:01:25<2:30:46, 3.29it/s] 92%|█████████▏| 341723/371472 [5:01:25<2:24:31, 3.43it/s] 92%|█████████▏| 341724/371472 [5:01:26<2:27:02, 3.37it/s] 92%|█████████▏| 341725/371472 [5:01:26<2:33:32, 3.23it/s] 92%|█████████▏| 341726/371472 [5:01:26<2:32:15, 3.26it/s] 92%|█████████▏| 341727/371472 [5:01:27<2:33:56, 3.22it/s] 92%|█████████▏| 341728/371472 [5:01:27<2:29:03, 3.33it/s] 92%|█████████▏| 341729/371472 [5:01:27<2:27:27, 3.36it/s] 92%|█████████▏| 341730/371472 [5:01:27<2:26:55, 3.37it/s] 92%|█████████▏| 341731/371472 [5:01:28<2:37:32, 3.15it/s] 92%|█████████▏| 341732/371472 [5:01:28<2:39:26, 3.11it/s] 92%|█████████▏| 341733/371472 [5:01:28<2:29:17, 3.32it/s] 92%|█████████▏| 341734/371472 [5:01:29<2:22:33, 3.48it/s] 92%|█████████▏| 341735/371472 [5:01:29<2:17:36, 3.60it/s] 92%|█████████▏| 341736/371472 [5:01:29<2:26:30, 3.38it/s] 92%|█████████▏| 341737/371472 [5:01:29<2:20:38, 3.52it/s] 92%|█████████▏| 341738/371472 [5:01:30<2:26:36, 3.38it/s] 92%|█████████▏| 341739/371472 [5:01:30<2:21:32, 3.50it/s] 92%|█████████▏| 341740/371472 [5:01:30<2:28:36, 3.33it/s] {'loss': 2.5286, 'learning_rate': 1.7207330474692403e-07, 'epoch': 14.72} + 92%|█████████▏| 341740/371472 [5:01:30<2:28:36, 3.33it/s] 92%|█████████▏| 341741/371472 [5:01:31<2:22:51, 3.47it/s] 92%|█████████▏| 341742/371472 [5:01:31<2:16:32, 3.63it/s] 92%|█████████▏| 341743/371472 [5:01:31<2:21:43, 3.50it/s] 92%|█████████▏| 341744/371472 [5:01:32<2:25:05, 3.41it/s] 92%|█████████▏| 341745/371472 [5:01:32<2:19:12, 3.56it/s] 92%|█████████▏| 341746/371472 [5:01:32<2:17:42, 3.60it/s] 92%|█████████▏| 341747/371472 [5:01:32<2:17:04, 3.61it/s] 92%|█████████▏| 341748/371472 [5:01:33<2:24:00, 3.44it/s] 92%|█████████▏| 341749/371472 [5:01:33<2:28:16, 3.34it/s] 92%|█████████▏| 341750/371472 [5:01:33<2:25:29, 3.40it/s] 92%|█████████▏| 341751/371472 [5:01:34<2:26:58, 3.37it/s] 92%|█████████▏| 341752/371472 [5:01:34<2:22:14, 3.48it/s] 92%|█████████▏| 341753/371472 [5:01:34<2:19:37, 3.55it/s] 92%|█████████▏| 341754/371472 [5:01:34<2:22:24, 3.48it/s] 92%|█████████▏| 341755/371472 [5:01:35<2:18:46, 3.57it/s] 92%|█████████▏| 341756/371472 [5:01:35<2:15:52, 3.65it/s] 92%|█████████▏| 341757/371472 [5:01:35<2:19:11, 3.56it/s] 92%|█████████▏| 341758/371472 [5:01:35<2:18:42, 3.57it/s] 92%|█████████▏| 341759/371472 [5:01:36<2:19:47, 3.54it/s] 92%|█████████▏| 341760/371472 [5:01:36<2:18:33, 3.57it/s] {'loss': 2.6536, 'learning_rate': 1.720248227714452e-07, 'epoch': 14.72} + 92%|█████████▏| 341760/371472 [5:01:36<2:18:33, 3.57it/s] 92%|█████████▏| 341761/371472 [5:01:36<2:30:13, 3.30it/s] 92%|█████████▏| 341762/371472 [5:01:37<2:21:46, 3.49it/s] 92%|█████████▏| 341763/371472 [5:01:37<2:21:05, 3.51it/s] 92%|█████████▏| 341764/371472 [5:01:37<2:18:19, 3.58it/s] 92%|█████████▏| 341765/371472 [5:01:38<2:25:07, 3.41it/s] 92%|█████████▏| 341766/371472 [5:01:38<2:17:19, 3.61it/s] 92%|█████████▏| 341767/371472 [5:01:38<2:16:46, 3.62it/s] 92%|█████████▏| 341768/371472 [5:01:38<2:20:15, 3.53it/s] 92%|█████████▏| 341769/371472 [5:01:39<2:28:27, 3.33it/s] 92%|█████████▏| 341770/371472 [5:01:39<2:25:08, 3.41it/s] 92%|█████████▏| 341771/371472 [5:01:39<2:20:11, 3.53it/s] 92%|█████████▏| 341772/371472 [5:01:40<2:22:24, 3.48it/s] 92%|█████████▏| 341773/371472 [5:01:40<2:17:09, 3.61it/s] 92%|█████████▏| 341774/371472 [5:01:40<2:15:38, 3.65it/s] 92%|█████████▏| 341775/371472 [5:01:40<2:23:17, 3.45it/s] 92%|█████████▏| 341776/371472 [5:01:41<2:19:15, 3.55it/s] 92%|█████████▏| 341777/371472 [5:01:41<2:16:05, 3.64it/s] 92%|█████████▏| 341778/371472 [5:01:41<2:17:23, 3.60it/s] 92%|█████████▏| 341779/371472 [5:01:41<2:15:34, 3.65it/s] 92%|█████████▏| 341780/371472 [5:01:42<2:11:51, 3.75it/s] {'loss': 2.5995, 'learning_rate': 1.7197634079596625e-07, 'epoch': 14.72} + 92%|█████████▏| 341780/371472 [5:01:42<2:11:51, 3.75it/s] 92%|█████████▏| 341781/371472 [5:01:42<2:19:18, 3.55it/s] 92%|█████████▏| 341782/371472 [5:01:42<2:14:04, 3.69it/s] 92%|█████████▏| 341783/371472 [5:01:43<2:22:47, 3.47it/s] 92%|█████████▏| 341784/371472 [5:01:43<2:21:27, 3.50it/s] 92%|█████████▏| 341785/371472 [5:01:43<2:19:46, 3.54it/s] 92%|█████████▏| 341786/371472 [5:01:43<2:14:12, 3.69it/s] 92%|█████████▏| 341787/371472 [5:01:44<2:14:14, 3.69it/s] 92%|█████████▏| 341788/371472 [5:01:44<2:12:25, 3.74it/s] 92%|█████████▏| 341789/371472 [5:01:44<2:09:08, 3.83it/s] 92%|█████████▏| 341790/371472 [5:01:44<2:18:54, 3.56it/s] 92%|█████████▏| 341791/371472 [5:01:45<2:19:36, 3.54it/s] 92%|█████████▏| 341792/371472 [5:01:45<2:16:42, 3.62it/s] 92%|█████████▏| 341793/371472 [5:01:45<2:21:40, 3.49it/s] 92%|█████████▏| 341794/371472 [5:01:46<2:19:48, 3.54it/s] 92%|█████████▏| 341795/371472 [5:01:46<2:20:35, 3.52it/s] 92%|█████████▏| 341796/371472 [5:01:46<2:20:37, 3.52it/s] 92%|█████████▏| 341797/371472 [5:01:47<2:26:44, 3.37it/s] 92%|█████████▏| 341798/371472 [5:01:47<2:26:24, 3.38it/s] 92%|█████████▏| 341799/371472 [5:01:47<2:20:24, 3.52it/s] 92%|█████████▏| 341800/371472 [5:01:47<2:20:14, 3.53it/s] {'loss': 2.5196, 'learning_rate': 1.719278588204874e-07, 'epoch': 14.72} + 92%|█████████▏| 341800/371472 [5:01:47<2:20:14, 3.53it/s] 92%|█████████▏| 341801/371472 [5:01:48<2:24:22, 3.43it/s] 92%|█████████▏| 341802/371472 [5:01:48<2:20:26, 3.52it/s] 92%|█████████▏| 341803/371472 [5:01:48<2:17:36, 3.59it/s] 92%|█████████▏| 341804/371472 [5:01:48<2:13:38, 3.70it/s] 92%|█████████▏| 341805/371472 [5:01:49<2:17:04, 3.61it/s] 92%|█████████▏| 341806/371472 [5:01:49<2:20:37, 3.52it/s] 92%|█████████▏| 341807/371472 [5:01:49<2:21:50, 3.49it/s] 92%|█████████▏| 341808/371472 [5:01:50<2:24:06, 3.43it/s] 92%|█████████▏| 341809/371472 [5:01:50<2:16:58, 3.61it/s] 92%|█████████▏| 341810/371472 [5:01:50<2:19:14, 3.55it/s] 92%|█████████▏| 341811/371472 [5:01:50<2:16:22, 3.63it/s] 92%|█████████▏| 341812/371472 [5:01:51<2:26:01, 3.39it/s] 92%|█████████▏| 341813/371472 [5:01:51<2:38:55, 3.11it/s] 92%|█████████▏| 341814/371472 [5:01:51<2:31:41, 3.26it/s] 92%|█████████▏| 341815/371472 [5:01:52<2:26:14, 3.38it/s] 92%|█████████▏| 341816/371472 [5:01:52<2:31:57, 3.25it/s] 92%|█████████▏| 341817/371472 [5:01:52<2:31:50, 3.26it/s] 92%|█████████▏| 341818/371472 [5:01:53<2:24:17, 3.43it/s] 92%|█████████▏| 341819/371472 [5:01:53<2:22:23, 3.47it/s] 92%|█████████▏| 341820/371472 [5:01:53<2:22:19, 3.47it/s] {'loss': 2.5159, 'learning_rate': 1.7187937684500847e-07, 'epoch': 14.72} + 92%|█████████▏| 341820/371472 [5:01:53<2:22:19, 3.47it/s] 92%|█████████▏| 341821/371472 [5:01:53<2:20:12, 3.52it/s] 92%|█████████▏| 341822/371472 [5:01:54<2:17:34, 3.59it/s] 92%|█████████▏| 341823/371472 [5:01:54<2:19:16, 3.55it/s] 92%|█████████▏| 341824/371472 [5:01:54<2:14:48, 3.67it/s] 92%|█████████��| 341825/371472 [5:01:55<2:24:20, 3.42it/s] 92%|█████████▏| 341826/371472 [5:01:55<2:20:01, 3.53it/s] 92%|█████████▏| 341827/371472 [5:01:55<2:14:41, 3.67it/s] 92%|█████████▏| 341828/371472 [5:01:55<2:10:31, 3.79it/s] 92%|█████████▏| 341829/371472 [5:01:56<2:08:55, 3.83it/s] 92%|█████████▏| 341830/371472 [5:01:56<2:11:22, 3.76it/s] 92%|█████████▏| 341831/371472 [5:01:56<2:14:45, 3.67it/s] 92%|█████████▏| 341832/371472 [5:01:56<2:18:54, 3.56it/s] 92%|█████████▏| 341833/371472 [5:01:57<2:20:11, 3.52it/s] 92%|█████████▏| 341834/371472 [5:01:57<2:18:57, 3.55it/s] 92%|█████████▏| 341835/371472 [5:01:57<2:14:43, 3.67it/s] 92%|█████████▏| 341836/371472 [5:01:58<2:18:08, 3.58it/s] 92%|█████████▏| 341837/371472 [5:01:58<2:17:44, 3.59it/s] 92%|█████████▏| 341838/371472 [5:01:58<2:20:16, 3.52it/s] 92%|█████████▏| 341839/371472 [5:01:58<2:24:39, 3.41it/s] 92%|█████████▏| 341840/371472 [5:01:59<2:28:48, 3.32it/s] {'loss': 2.5153, 'learning_rate': 1.7183089486952962e-07, 'epoch': 14.72} + 92%|█████████▏| 341840/371472 [5:01:59<2:28:48, 3.32it/s] 92%|█████████▏| 341841/371472 [5:01:59<2:28:51, 3.32it/s] 92%|█████████▏| 341842/371472 [5:01:59<2:22:16, 3.47it/s] 92%|█████████▏| 341843/371472 [5:02:00<2:16:20, 3.62it/s] 92%|█████████▏| 341844/371472 [5:02:00<2:24:18, 3.42it/s] 92%|█████████▏| 341845/371472 [5:02:00<2:31:50, 3.25it/s] 92%|█████████▏| 341846/371472 [5:02:01<2:33:48, 3.21it/s] 92%|█████████▏| 341847/371472 [5:02:01<2:25:35, 3.39it/s] 92%|█████████▏| 341848/371472 [5:02:01<2:22:36, 3.46it/s] 92%|█████████▏| 341849/371472 [5:02:01<2:18:41, 3.56it/s] 92%|█████████▏| 341850/371472 [5:02:02<2:13:20, 3.70it/s] 92%|█████████▏| 341851/371472 [5:02:02<2:14:47, 3.66it/s] 92%|█████████▏| 341852/371472 [5:02:02<2:14:09, 3.68it/s] 92%|█████████▏| 341853/371472 [5:02:02<2:22:54, 3.45it/s] 92%|█████████▏| 341854/371472 [5:02:03<2:25:04, 3.40it/s] 92%|█████████▏| 341855/371472 [5:02:03<2:20:46, 3.51it/s] 92%|█████████▏| 341856/371472 [5:02:03<2:18:51, 3.55it/s] 92%|█████████▏| 341857/371472 [5:02:04<2:29:20, 3.30it/s] 92%|█████████▏| 341858/371472 [5:02:04<2:42:54, 3.03it/s] 92%|█████████▏| 341859/371472 [5:02:04<2:36:32, 3.15it/s] 92%|█████████▏| 341860/371472 [5:02:05<2:40:33, 3.07it/s] {'loss': 2.4343, 'learning_rate': 1.7178241289405067e-07, 'epoch': 14.72} + 92%|█████████▏| 341860/371472 [5:02:05<2:40:33, 3.07it/s] 92%|█████████▏| 341861/371472 [5:02:05<2:31:27, 3.26it/s] 92%|█████████▏| 341862/371472 [5:02:05<2:36:53, 3.15it/s] 92%|█████████▏| 341863/371472 [5:02:06<2:31:40, 3.25it/s] 92%|█████████▏| 341864/371472 [5:02:06<2:30:50, 3.27it/s] 92%|█████████▏| 341865/371472 [5:02:06<2:38:11, 3.12it/s] 92%|█████████▏| 341866/371472 [5:02:07<2:27:06, 3.35it/s] 92%|█████████▏| 341867/371472 [5:02:07<2:22:47, 3.46it/s] 92%|█████████▏| 341868/371472 [5:02:07<2:25:11, 3.40it/s] 92%|█████████▏| 341869/371472 [5:02:07<2:24:18, 3.42it/s] 92%|█████████▏| 341870/371472 [5:02:08<2:19:06, 3.55it/s] 92%|█████████▏| 341871/371472 [5:02:08<2:16:31, 3.61it/s] 92%|█████████▏| 341872/371472 [5:02:08<2:12:33, 3.72it/s] 92%|█████████▏| 341873/371472 [5:02:08<2:10:42, 3.77it/s] 92%|█████████▏| 341874/371472 [5:02:09<2:13:02, 3.71it/s] 92%|█████████▏| 341875/371472 [5:02:09<2:12:20, 3.73it/s] 92%|█████████▏| 341876/371472 [5:02:09<2:22:44, 3.46it/s] 92%|█████████▏| 341877/371472 [5:02:10<2:28:20, 3.33it/s] 92%|█████████▏| 341878/371472 [5:02:10<2:26:25, 3.37it/s] 92%|█████████▏| 341879/371472 [5:02:10<2:20:24, 3.51it/s] 92%|█████████▏| 341880/371472 [5:02:10<2:14:09, 3.68it/s] {'loss': 2.5033, 'learning_rate': 1.7173393091857185e-07, 'epoch': 14.73} + 92%|█████████▏| 341880/371472 [5:02:10<2:14:09, 3.68it/s] 92%|█████████▏| 341881/371472 [5:02:11<2:11:22, 3.75it/s] 92%|█████████▏| 341882/371472 [5:02:11<2:15:58, 3.63it/s] 92%|█████████▏| 341883/371472 [5:02:11<2:14:16, 3.67it/s] 92%|█████████▏| 341884/371472 [5:02:11<2:13:21, 3.70it/s] 92%|█████████▏| 341885/371472 [5:02:12<2:08:41, 3.83it/s] 92%|█████████▏| 341886/371472 [5:02:12<2:13:02, 3.71it/s] 92%|█████████▏| 341887/371472 [5:02:12<2:16:39, 3.61it/s] 92%|█████████▏| 341888/371472 [5:02:13<2:14:17, 3.67it/s] 92%|█████████▏| 341889/371472 [5:02:13<2:13:25, 3.70it/s] 92%|█████████▏| 341890/371472 [5:02:13<2:09:36, 3.80it/s] 92%|█████████▏| 341891/371472 [5:02:13<2:13:40, 3.69it/s] 92%|█████████▏| 341892/371472 [5:02:14<2:16:38, 3.61it/s] 92%|█████████▏| 341893/371472 [5:02:14<2:23:02, 3.45it/s] 92%|█████████▏| 341894/371472 [5:02:14<2:17:48, 3.58it/s] 92%|█████████▏| 341895/371472 [5:02:15<2:17:42, 3.58it/s] 92%|█████████▏| 341896/371472 [5:02:15<2:21:21, 3.49it/s] 92%|█████████▏| 341897/371472 [5:02:15<2:24:23, 3.41it/s] 92%|█████████▏| 341898/371472 [5:02:16<2:44:46, 2.99it/s] 92%|█████████▏| 341899/371472 [5:02:16<2:50:43, 2.89it/s] 92%|█████████▏| 341900/371472 [5:02:16<2:41:13, 3.06it/s] {'loss': 2.571, 'learning_rate': 1.716854489430929e-07, 'epoch': 14.73} + 92%|█████████▏| 341900/371472 [5:02:16<2:41:13, 3.06it/s] 92%|█████████▏| 341901/371472 [5:02:16<2:33:36, 3.21it/s] 92%|█████████▏| 341902/371472 [5:02:17<2:24:24, 3.41it/s] 92%|█████████▏| 341903/371472 [5:02:17<2:17:16, 3.59it/s] 92%|█████████▏| 341904/371472 [5:02:17<2:16:13, 3.62it/s] 92%|█████████▏| 341905/371472 [5:02:17<2:12:01, 3.73it/s] 92%|█████████▏| 341906/371472 [5:02:18<2:11:00, 3.76it/s] 92%|█████████▏| 341907/371472 [5:02:18<2:15:01, 3.65it/s] 92%|█████████▏| 341908/371472 [5:02:18<2:13:24, 3.69it/s] 92%|█████████▏| 341909/371472 [5:02:19<2:13:33, 3.69it/s] 92%|█████████▏| 341910/371472 [5:02:19<2:11:28, 3.75it/s] 92%|█████████▏| 341911/371472 [5:02:19<2:15:05, 3.65it/s] 92%|█████████▏| 341912/371472 [5:02:20<2:40:15, 3.07it/s] 92%|█████████▏| 341913/371472 [5:02:20<2:53:42, 2.84it/s] 92%|█████████▏| 341914/371472 [5:02:20<2:41:54, 3.04it/s] 92%|█████████▏| 341915/371472 [5:02:21<2:38:16, 3.11it/s] 92%|█████████▏| 341916/371472 [5:02:21<2:33:24, 3.21it/s] 92%|█████████▏| 341917/371472 [5:02:21<2:55:45, 2.80it/s] 92%|█████████▏| 341918/371472 [5:02:22<2:46:47, 2.95it/s] 92%|█████████▏| 341919/371472 [5:02:22<2:34:18, 3.19it/s] 92%|█████████▏| 341920/371472 [5:02:22<2:27:10, 3.35it/s] {'loss': 2.6179, 'learning_rate': 1.7163696696761404e-07, 'epoch': 14.73} + 92%|█████████▏| 341920/371472 [5:02:22<2:27:10, 3.35it/s] 92%|█████████▏| 341921/371472 [5:02:22<2:30:15, 3.28it/s] 92%|█████████▏| 341922/371472 [5:02:23<2:24:44, 3.40it/s] 92%|█████████▏| 341923/371472 [5:02:23<2:25:09, 3.39it/s] 92%|█████████▏| 341924/371472 [5:02:23<2:21:27, 3.48it/s] 92%|█████████▏| 341925/371472 [5:02:24<2:17:11, 3.59it/s] 92%|█████████▏| 341926/371472 [5:02:24<2:15:43, 3.63it/s] 92%|█████████▏| 341927/371472 [5:02:24<2:18:24, 3.56it/s] 92%|█████████▏| 341928/371472 [5:02:24<2:17:46, 3.57it/s] 92%|█████████▏| 341929/371472 [5:02:25<2:21:56, 3.47it/s] 92%|█████████▏| 341930/371472 [5:02:25<2:41:51, 3.04it/s] 92%|█████████▏| 341931/371472 [5:02:25<2:32:54, 3.22it/s] 92%|█████████▏| 341932/371472 [5:02:26<2:23:43, 3.43it/s] 92%|█████████▏| 341933/371472 [5:02:26<2:27:28, 3.34it/s] 92%|█████████▏| 341934/371472 [5:02:26<2:28:08, 3.32it/s] 92%|█████████▏| 341935/371472 [5:02:27<2:25:11, 3.39it/s] 92%|█████████▏| 341936/371472 [5:02:27<2:25:12, 3.39it/s] 92%|█████████▏| 341937/371472 [5:02:27<2:19:34, 3.53it/s] 92%|█████████▏| 341938/371472 [5:02:27<2:25:30, 3.38it/s] 92%|█████████▏| 341939/371472 [5:02:28<2:18:01, 3.57it/s] 92%|█████████▏| 341940/371472 [5:02:28<2:12:35, 3.71it/s] {'loss': 2.566, 'learning_rate': 1.7158848499213511e-07, 'epoch': 14.73} + 92%|█████████▏| 341940/371472 [5:02:28<2:12:35, 3.71it/s] 92%|█████████▏| 341941/371472 [5:02:28<2:32:18, 3.23it/s] 92%|█████████▏| 341942/371472 [5:02:29<2:30:47, 3.26it/s] 92%|█████████▏| 341943/371472 [5:02:29<2:23:51, 3.42it/s] 92%|█████████▏| 341944/371472 [5:02:29<2:30:51, 3.26it/s] 92%|█████████▏| 341945/371472 [5:02:29<2:26:39, 3.36it/s] 92%|█████████▏| 341946/371472 [5:02:30<2:33:07, 3.21it/s] 92%|█████████▏| 341947/371472 [5:02:30<2:24:32, 3.40it/s] 92%|█████████▏| 341948/371472 [5:02:30<2:22:39, 3.45it/s] 92%|█████████▏| 341949/371472 [5:02:31<2:25:55, 3.37it/s] 92%|█████████▏| 341950/371472 [5:02:31<2:21:58, 3.47it/s] 92%|█████████▏| 341951/371472 [5:02:31<2:32:34, 3.22it/s] 92%|█████████▏| 341952/371472 [5:02:32<2:28:30, 3.31it/s] 92%|█████████▏| 341953/371472 [5:02:32<2:24:18, 3.41it/s] 92%|█████████▏| 341954/371472 [5:02:32<2:26:14, 3.36it/s] 92%|█████████▏| 341955/371472 [5:02:32<2:19:25, 3.53it/s] 92%|█████████▏| 341956/371472 [5:02:33<2:18:53, 3.54it/s] 92%|█████████▏| 341957/371472 [5:02:33<2:17:20, 3.58it/s] 92%|█████████▏| 341958/371472 [5:02:33<2:14:50, 3.65it/s] 92%|█████████▏| 341959/371472 [5:02:34<2:37:44, 3.12it/s] 92%|█████████▏| 341960/371472 [5:02:34<2:29:43, 3.29it/s] {'loss': 2.538, 'learning_rate': 1.7154000301665626e-07, 'epoch': 14.73} + 92%|█████████▏| 341960/371472 [5:02:34<2:29:43, 3.29it/s] 92%|█████████▏| 341961/371472 [5:02:34<2:31:28, 3.25it/s] 92%|█████████▏| 341962/371472 [5:02:35<2:25:11, 3.39it/s] 92%|█████████▏| 341963/371472 [5:02:35<2:19:51, 3.52it/s] 92%|█████████▏| 341964/371472 [5:02:35<2:18:12, 3.56it/s] 92%|█████████▏| 341965/371472 [5:02:35<2:12:13, 3.72it/s] 92%|█████████▏| 341966/371472 [5:02:36<2:24:18, 3.41it/s] 92%|█████████▏| 341967/371472 [5:02:36<2:24:46, 3.40it/s] 92%|█████████▏| 341968/371472 [5:02:36<2:28:21, 3.31it/s] 92%|█████████▏| 341969/371472 [5:02:37<2:32:52, 3.22it/s] 92%|█████████▏| 341970/371472 [5:02:37<2:23:07, 3.44it/s] 92%|█████████▏| 341971/371472 [5:02:37<2:30:46, 3.26it/s] 92%|█████████▏| 341972/371472 [5:02:37<2:24:10, 3.41it/s] 92%|█████████▏| 341973/371472 [5:02:38<2:18:57, 3.54it/s] 92%|█████████▏| 341974/371472 [5:02:38<2:16:47, 3.59it/s] 92%|█████████▏| 341975/371472 [5:02:38<2:14:38, 3.65it/s] 92%|█████████▏| 341976/371472 [5:02:39<2:20:37, 3.50it/s] 92%|█████████▏| 341977/371472 [5:02:39<2:17:38, 3.57it/s] 92%|█████████▏| 341978/371472 [5:02:39<2:16:15, 3.61it/s] 92%|█████████▏| 341979/371472 [5:02:39<2:15:44, 3.62it/s] 92%|█████████▏| 341980/371472 [5:02:40<2:16:37, 3.60it/s] {'loss': 2.6944, 'learning_rate': 1.714915210411773e-07, 'epoch': 14.73} + 92%|█████████▏| 341980/371472 [5:02:40<2:16:37, 3.60it/s] 92%|█████████▏| 341981/371472 [5:02:40<2:23:19, 3.43it/s] 92%|█████████▏| 341982/371472 [5:02:40<2:25:24, 3.38it/s] 92%|█████████▏| 341983/371472 [5:02:41<2:22:35, 3.45it/s] 92%|█████████▏| 341984/371472 [5:02:41<2:26:41, 3.35it/s] 92%|█████████▏| 341985/371472 [5:02:41<2:23:12, 3.43it/s] 92%|█████████▏| 341986/371472 [5:02:41<2:22:27, 3.45it/s] 92%|█████████▏| 341987/371472 [5:02:42<2:17:53, 3.56it/s] 92%|█████████▏| 341988/371472 [5:02:42<2:14:39, 3.65it/s] 92%|█████████▏| 341989/371472 [5:02:42<2:19:18, 3.53it/s] 92%|█████████▏| 341990/371472 [5:02:43<2:23:43, 3.42it/s] 92%|█████████▏| 341991/371472 [5:02:43<2:14:18, 3.66it/s] 92%|█████████▏| 341992/371472 [5:02:43<2:12:40, 3.70it/s] 92%|█████████▏| 341993/371472 [5:02:43<2:18:26, 3.55it/s] 92%|█████████▏| 341994/371472 [5:02:44<2:12:22, 3.71it/s] 92%|█████████▏| 341995/371472 [5:02:44<2:10:01, 3.78it/s] 92%|█████████▏| 341996/371472 [5:02:44<2:11:27, 3.74it/s] 92%|█████████▏| 341997/371472 [5:02:44<2:11:21, 3.74it/s] 92%|█████████▏| 341998/371472 [5:02:45<2:13:26, 3.68it/s] 92%|█████████▏| 341999/371472 [5:02:45<2:17:49, 3.56it/s] 92%|█████████▏| 342000/371472 [5:02:45<2:13:03, 3.69it/s] {'loss': 2.6228, 'learning_rate': 1.714430390656985e-07, 'epoch': 14.73} + 92%|█████████▏| 342000/371472 [5:02:45<2:13:03, 3.69it/s] 92%|█████████▏| 342001/371472 [5:02:46<2:18:21, 3.55it/s] 92%|█████████▏| 342002/371472 [5:02:46<2:17:59, 3.56it/s] 92%|█████████▏| 342003/371472 [5:02:46<2:18:06, 3.56it/s] 92%|█████████▏| 342004/371472 [5:02:46<2:14:31, 3.65it/s] 92%|█████████▏| 342005/371472 [5:02:47<2:14:53, 3.64it/s] 92%|█████████▏| 342006/371472 [5:02:47<2:20:32, 3.49it/s] 92%|█████████▏| 342007/371472 [5:02:47<2:20:25, 3.50it/s] 92%|█████████▏| 342008/371472 [5:02:47<2:18:55, 3.53it/s] 92%|█████████▏| 342009/371472 [5:02:48<2:28:23, 3.31it/s] 92%|█████████▏| 342010/371472 [5:02:48<2:27:51, 3.32it/s] 92%|█████████▏| 342011/371472 [5:02:48<2:20:44, 3.49it/s] 92%|█████████▏| 342012/371472 [5:02:49<2:18:52, 3.54it/s] 92%|█████████▏| 342013/371472 [5:02:49<2:20:16, 3.50it/s] 92%|█████████▏| 342014/371472 [5:02:49<2:21:57, 3.46it/s] 92%|█████████▏| 342015/371472 [5:02:50<2:21:59, 3.46it/s] 92%|█████████▏| 342016/371472 [5:02:50<2:21:54, 3.46it/s] 92%|█████████▏| 342017/371472 [5:02:50<2:16:05, 3.61it/s] 92%|█████████▏| 342018/371472 [5:02:50<2:15:14, 3.63it/s] 92%|█████████▏| 342019/371472 [5:02:51<2:17:07, 3.58it/s] 92%|█████████▏| 342020/371472 [5:02:51<2:31:04, 3.25it/s] {'loss': 2.6103, 'learning_rate': 1.7139455709021953e-07, 'epoch': 14.73} + 92%|█████████▏| 342020/371472 [5:02:51<2:31:04, 3.25it/s] 92%|█████████▏| 342021/371472 [5:02:51<2:31:38, 3.24it/s] 92%|█████████▏| 342022/371472 [5:02:52<2:22:55, 3.43it/s] 92%|█████████▏| 342023/371472 [5:02:52<2:19:08, 3.53it/s] 92%|█████████▏| 342024/371472 [5:02:52<2:11:50, 3.72it/s] 92%|█████████▏| 342025/371472 [5:02:52<2:13:44, 3.67it/s] 92%|█████████▏| 342026/371472 [5:02:53<2:13:29, 3.68it/s] 92%|█████████▏| 342027/371472 [5:02:53<2:15:47, 3.61it/s] 92%|█████████▏| 342028/371472 [5:02:53<2:17:15, 3.58it/s] 92%|█████████▏| 342029/371472 [5:02:53<2:19:38, 3.51it/s] 92%|█████████▏| 342030/371472 [5:02:54<2:20:22, 3.50it/s] 92%|█████████▏| 342031/371472 [5:02:54<2:19:01, 3.53it/s] 92%|█████████▏| 342032/371472 [5:02:54<2:15:39, 3.62it/s] 92%|█████████▏| 342033/371472 [5:02:55<2:15:12, 3.63it/s] 92%|█████████▏| 342034/371472 [5:02:55<2:31:06, 3.25it/s] 92%|█████████▏| 342035/371472 [5:02:55<2:29:56, 3.27it/s] 92%|█████████▏| 342036/371472 [5:02:56<2:24:07, 3.40it/s] 92%|█████████▏| 342037/371472 [5:02:56<2:20:16, 3.50it/s] 92%|█████████▏| 342038/371472 [5:02:56<2:17:57, 3.56it/s] 92%|█████████▏| 342039/371472 [5:02:56<2:17:23, 3.57it/s] 92%|█████████▏| 342040/371472 [5:02:57<2:14:11, 3.66it/s] {'loss': 2.6205, 'learning_rate': 1.7134607511474068e-07, 'epoch': 14.73} + 92%|█████████▏| 342040/371472 [5:02:57<2:14:11, 3.66it/s] 92%|█████████▏| 342041/371472 [5:02:57<2:26:04, 3.36it/s] 92%|█████████▏| 342042/371472 [5:02:57<2:24:30, 3.39it/s] 92%|█████████▏| 342043/371472 [5:02:58<2:25:33, 3.37it/s] 92%|█████████▏| 342044/371472 [5:02:58<2:28:01, 3.31it/s] 92%|█████████▏| 342045/371472 [5:02:58<2:30:29, 3.26it/s] 92%|█████████▏| 342046/371472 [5:02:58<2:25:47, 3.36it/s] 92%|█████████▏| 342047/371472 [5:02:59<2:18:39, 3.54it/s] 92%|█████████▏| 342048/371472 [5:02:59<2:23:45, 3.41it/s] 92%|█████████▏| 342049/371472 [5:02:59<2:21:38, 3.46it/s] 92%|█████████▏| 342050/371472 [5:03:00<2:19:02, 3.53it/s] 92%|█████████▏| 342051/371472 [5:03:00<2:25:01, 3.38it/s] 92%|█████████▏| 342052/371472 [5:03:00<2:21:02, 3.48it/s] 92%|█████████▏| 342053/371472 [5:03:00<2:15:06, 3.63it/s] 92%|█████████▏| 342054/371472 [5:03:01<2:19:30, 3.51it/s] 92%|█████████▏| 342055/371472 [5:03:01<2:22:45, 3.43it/s] 92%|█████████▏| 342056/371472 [5:03:01<2:20:26, 3.49it/s] 92%|█████████▏| 342057/371472 [5:03:02<2:17:09, 3.57it/s] 92%|█████████▏| 342058/371472 [5:03:02<2:18:13, 3.55it/s] 92%|█████████▏| 342059/371472 [5:03:02<2:31:10, 3.24it/s] 92%|█████████▏| 342060/371472 [5:03:03<2:26:43, 3.34it/s] {'loss': 2.68, 'learning_rate': 1.7129759313926175e-07, 'epoch': 14.73} + 92%|█████████▏| 342060/371472 [5:03:03<2:26:43, 3.34it/s] 92%|█████████▏| 342061/371472 [5:03:03<2:24:53, 3.38it/s] 92%|█████████▏| 342062/371472 [5:03:03<2:23:03, 3.43it/s] 92%|█████████▏| 342063/371472 [5:03:03<2:21:07, 3.47it/s] 92%|█████████▏| 342064/371472 [5:03:04<2:16:36, 3.59it/s] 92%|█████████▏| 342065/371472 [5:03:04<2:15:28, 3.62it/s] 92%|█████████▏| 342066/371472 [5:03:04<2:21:21, 3.47it/s] 92%|█████████▏| 342067/371472 [5:03:04<2:20:04, 3.50it/s] 92%|█████████▏| 342068/371472 [5:03:05<2:32:07, 3.22it/s] 92%|█████████▏| 342069/371472 [5:03:05<2:30:08, 3.26it/s] 92%|█████████▏| 342070/371472 [5:03:05<2:30:13, 3.26it/s] 92%|█████████▏| 342071/371472 [5:03:06<2:24:05, 3.40it/s] 92%|█████████▏| 342072/371472 [5:03:06<2:25:22, 3.37it/s] 92%|█████████▏| 342073/371472 [5:03:06<2:26:52, 3.34it/s] 92%|█████████▏| 342074/371472 [5:03:07<2:30:03, 3.27it/s] 92%|█████████▏| 342075/371472 [5:03:07<2:28:04, 3.31it/s] 92%|█████████▏| 342076/371472 [5:03:07<2:23:09, 3.42it/s] 92%|█████████▏| 342077/371472 [5:03:08<2:48:41, 2.90it/s] 92%|█████████▏| 342078/371472 [5:03:08<2:36:43, 3.13it/s] 92%|█████████▏| 342079/371472 [5:03:08<2:31:53, 3.23it/s] 92%|█████████▏| 342080/371472 [5:03:09<2:34:06, 3.18it/s] {'loss': 2.6569, 'learning_rate': 1.712491111637829e-07, 'epoch': 14.73} + 92%|█████████▏| 342080/371472 [5:03:09<2:34:06, 3.18it/s] 92%|█████████▏| 342081/371472 [5:03:09<2:29:52, 3.27it/s] 92%|█████████▏| 342082/371472 [5:03:09<2:22:50, 3.43it/s] 92%|█████████▏| 342083/371472 [5:03:09<2:28:27, 3.30it/s] 92%|█████████▏| 342084/371472 [5:03:10<2:22:18, 3.44it/s] 92%|█████████▏| 342085/371472 [5:03:10<2:24:27, 3.39it/s] 92%|█████████▏| 342086/371472 [5:03:10<2:22:40, 3.43it/s] 92%|█████████▏| 342087/371472 [5:03:11<2:15:28, 3.61it/s] 92%|█████████▏| 342088/371472 [5:03:11<2:22:21, 3.44it/s] 92%|█████████▏| 342089/371472 [5:03:11<2:22:41, 3.43it/s] 92%|█████████▏| 342090/371472 [5:03:11<2:23:32, 3.41it/s] 92%|█████████▏| 342091/371472 [5:03:12<2:22:20, 3.44it/s] 92%|█████████▏| 342092/371472 [5:03:12<2:18:28, 3.54it/s] 92%|█████████▏| 342093/371472 [5:03:12<2:15:03, 3.63it/s] 92%|█████████▏| 342094/371472 [5:03:13<2:16:55, 3.58it/s] 92%|█████████▏| 342095/371472 [5:03:13<2:15:50, 3.60it/s] 92%|█████████▏| 342096/371472 [5:03:13<2:15:16, 3.62it/s] 92%|█████████▏| 342097/371472 [5:03:13<2:17:49, 3.55it/s] 92%|█████████▏| 342098/371472 [5:03:14<2:17:29, 3.56it/s] 92%|█████████▏| 342099/371472 [5:03:14<2:12:48, 3.69it/s] 92%|█████████▏| 342100/371472 [5:03:14<2:13:54, 3.66it/s] {'loss': 2.3767, 'learning_rate': 1.7120062918830395e-07, 'epoch': 14.73} + 92%|█████████▏| 342100/371472 [5:03:14<2:13:54, 3.66it/s] 92%|█████████▏| 342101/371472 [5:03:14<2:18:34, 3.53it/s] 92%|█████████▏| 342102/371472 [5:03:15<2:21:24, 3.46it/s] 92%|█████████▏| 342103/371472 [5:03:15<2:27:20, 3.32it/s] 92%|█████████▏| 342104/371472 [5:03:15<2:22:06, 3.44it/s] 92%|█████████▏| 342105/371472 [5:03:16<2:19:45, 3.50it/s] 92%|█████████▏| 342106/371472 [5:03:16<2:18:52, 3.52it/s] 92%|█████████▏| 342107/371472 [5:03:16<2:24:41, 3.38it/s] 92%|█████████▏| 342108/371472 [5:03:17<2:18:39, 3.53it/s] 92%|█████████▏| 342109/371472 [5:03:17<2:15:37, 3.61it/s] 92%|█████████▏| 342110/371472 [5:03:17<2:26:55, 3.33it/s] 92%|█████████▏| 342111/371472 [5:03:17<2:21:35, 3.46it/s] 92%|█████████▏| 342112/371472 [5:03:18<2:15:19, 3.62it/s] 92%|█████████▏| 342113/371472 [5:03:18<2:15:18, 3.62it/s] 92%|█████████▏| 342114/371472 [5:03:18<2:15:43, 3.61it/s] 92%|█████████▏| 342115/371472 [5:03:18<2:19:36, 3.50it/s] 92%|█████████▏| 342116/371472 [5:03:19<2:19:11, 3.51it/s] 92%|█████████▏| 342117/371472 [5:03:19<2:19:22, 3.51it/s] 92%|█████████▏| 342118/371472 [5:03:19<2:20:24, 3.48it/s] 92%|█████████▏| 342119/371472 [5:03:20<2:20:47, 3.47it/s] 92%|█████████▏| 342120/371472 [5:03:20<2:16:31, 3.58it/s] {'loss': 2.6719, 'learning_rate': 1.7115214721282513e-07, 'epoch': 14.74} + 92%|█████████▏| 342120/371472 [5:03:20<2:16:31, 3.58it/s] 92%|█████████▏| 342121/371472 [5:03:20<2:18:09, 3.54it/s] 92%|█████████▏| 342122/371472 [5:03:20<2:20:12, 3.49it/s] 92%|█████████▏| 342123/371472 [5:03:21<2:20:55, 3.47it/s] 92%|█████████▏| 342124/371472 [5:03:21<2:22:57, 3.42it/s] 92%|█████████▏| 342125/371472 [5:03:21<2:23:30, 3.41it/s] 92%|█████████▏| 342126/371472 [5:03:22<2:22:02, 3.44it/s] 92%|█████████▏| 342127/371472 [5:03:22<2:24:52, 3.38it/s] 92%|█████████▏| 342128/371472 [5:03:22<2:18:13, 3.54it/s] 92%|█████████▏| 342129/371472 [5:03:23<2:17:20, 3.56it/s] 92%|█████████▏| 342130/371472 [5:03:23<2:13:52, 3.65it/s] 92%|█████████▏| 342131/371472 [5:03:23<2:12:58, 3.68it/s] 92%|█████████▏| 342132/371472 [5:03:23<2:21:55, 3.45it/s] 92%|█████████▏| 342133/371472 [5:03:24<2:16:30, 3.58it/s] 92%|█████████▏| 342134/371472 [5:03:24<2:19:44, 3.50it/s] 92%|█████████▏| 342135/371472 [5:03:24<2:14:36, 3.63it/s] 92%|█████████▏| 342136/371472 [5:03:24<2:11:55, 3.71it/s] 92%|█████████▏| 342137/371472 [5:03:25<2:27:55, 3.31it/s] 92%|█████████▏| 342138/371472 [5:03:25<2:22:15, 3.44it/s] 92%|█████████▏| 342139/371472 [5:03:25<2:22:05, 3.44it/s] 92%|█████████▏| 342140/371472 [5:03:26<2:27:14, 3.32it/s] {'loss': 2.7518, 'learning_rate': 1.711036652373462e-07, 'epoch': 14.74} + 92%|█████████▏| 342140/371472 [5:03:26<2:27:14, 3.32it/s] 92%|█████████▏| 342141/371472 [5:03:26<2:33:05, 3.19it/s] 92%|█████████▏| 342142/371472 [5:03:26<2:52:51, 2.83it/s] 92%|█████████▏| 342143/371472 [5:03:27<2:43:24, 2.99it/s] 92%|█████████▏| 342144/371472 [5:03:27<2:37:44, 3.10it/s] 92%|█████████▏| 342145/371472 [5:03:27<2:32:19, 3.21it/s] 92%|█████████▏| 342146/371472 [5:03:28<2:28:52, 3.28it/s] 92%|█████████▏| 342147/371472 [5:03:28<2:28:58, 3.28it/s] 92%|█████████▏| 342148/371472 [5:03:28<2:25:35, 3.36it/s] 92%|█████████▏| 342149/371472 [5:03:28<2:18:41, 3.52it/s] 92%|█████████▏| 342150/371472 [5:03:29<2:21:53, 3.44it/s] 92%|█████████▏| 342151/371472 [5:03:29<2:34:59, 3.15it/s] 92%|█████████▏| 342152/371472 [5:03:29<2:28:08, 3.30it/s] 92%|█████████▏| 342153/371472 [5:03:30<2:26:45, 3.33it/s] 92%|█████████▏| 342154/371472 [5:03:30<2:22:22, 3.43it/s] 92%|█████████▏| 342155/371472 [5:03:30<2:22:48, 3.42it/s] 92%|█████████▏| 342156/371472 [5:03:31<2:33:41, 3.18it/s] 92%|█████████▏| 342157/371472 [5:03:31<2:26:30, 3.33it/s] 92%|█████████▏| 342158/371472 [5:03:31<2:36:46, 3.12it/s] 92%|█████████▏| 342159/371472 [5:03:32<2:32:28, 3.20it/s] 92%|█████████▏| 342160/371472 [5:03:32<2:28:01, 3.30it/s] {'loss': 2.6239, 'learning_rate': 1.7105518326186732e-07, 'epoch': 14.74} + 92%|█████████▏| 342160/371472 [5:03:32<2:28:01, 3.30it/s] 92%|█████████▏| 342161/371472 [5:03:32<2:24:36, 3.38it/s] 92%|█████████▏| 342162/371472 [5:03:32<2:25:31, 3.36it/s] 92%|█████████▏| 342163/371472 [5:03:33<2:22:38, 3.42it/s] 92%|█████████▏| 342164/371472 [5:03:33<2:22:24, 3.43it/s] 92%|█████████▏| 342165/371472 [5:03:33<2:27:30, 3.31it/s] 92%|█████████▏| 342166/371472 [5:03:34<2:20:35, 3.47it/s] 92%|█████████▏| 342167/371472 [5:03:34<2:15:59, 3.59it/s] 92%|█████████▏| 342168/371472 [5:03:34<2:12:47, 3.68it/s] 92%|█████████▏| 342169/371472 [5:03:34<2:12:06, 3.70it/s] 92%|█████████▏| 342170/371472 [5:03:35<2:08:30, 3.80it/s] 92%|█████████▏| 342171/371472 [5:03:35<2:10:10, 3.75it/s] 92%|█████████▏| 342172/371472 [5:03:35<2:12:01, 3.70it/s] 92%|█████████▏| 342173/371472 [5:03:35<2:17:29, 3.55it/s] 92%|█████████▏| 342174/371472 [5:03:36<2:14:08, 3.64it/s] 92%|█████████▏| 342175/371472 [5:03:36<2:13:47, 3.65it/s] 92%|█████████▏| 342176/371472 [5:03:36<2:19:39, 3.50it/s] 92%|█████████▏| 342177/371472 [5:03:37<2:18:51, 3.52it/s] 92%|█████████▏| 342178/371472 [5:03:37<2:23:30, 3.40it/s] 92%|█████████▏| 342179/371472 [5:03:37<2:19:37, 3.50it/s] 92%|█████████▏| 342180/371472 [5:03:37<2:14:14, 3.64it/s] {'loss': 2.6798, 'learning_rate': 1.710067012863884e-07, 'epoch': 14.74} + 92%|█████████▏| 342180/371472 [5:03:37<2:14:14, 3.64it/s] 92%|█████████▏| 342181/371472 [5:03:38<2:15:05, 3.61it/s] 92%|█████████▏| 342182/371472 [5:03:38<2:09:53, 3.76it/s] 92%|█████████▏| 342183/371472 [5:03:38<2:10:05, 3.75it/s] 92%|█████████▏| 342184/371472 [5:03:38<2:10:01, 3.75it/s] 92%|█████████▏| 342185/371472 [5:03:39<2:11:49, 3.70it/s] 92%|█████████▏| 342186/371472 [5:03:39<2:07:28, 3.83it/s] 92%|█████████▏| 342187/371472 [5:03:39<2:13:18, 3.66it/s] 92%|█████████▏| 342188/371472 [5:03:40<2:12:11, 3.69it/s] 92%|█████████▏| 342189/371472 [5:03:40<2:12:40, 3.68it/s] 92%|█████████▏| 342190/371472 [5:03:40<2:12:13, 3.69it/s] 92%|█████████▏| 342191/371472 [5:03:40<2:09:02, 3.78it/s] 92%|█████████▏| 342192/371472 [5:03:41<2:10:00, 3.75it/s] 92%|█████████▏| 342193/371472 [5:03:41<2:12:00, 3.70it/s] 92%|█████████▏| 342194/371472 [5:03:41<2:11:53, 3.70it/s] 92%|█████████▏| 342195/371472 [5:03:41<2:08:42, 3.79it/s] 92%|█████████▏| 342196/371472 [5:03:42<2:06:36, 3.85it/s] 92%|█████████▏| 342197/371472 [5:03:42<2:07:25, 3.83it/s] 92%|█████████▏| 342198/371472 [5:03:42<2:23:21, 3.40it/s] 92%|█████████▏| 342199/371472 [5:03:43<2:30:13, 3.25it/s] 92%|█████████▏| 342200/371472 [5:03:43<2:46:04, 2.94it/s] {'loss': 2.6244, 'learning_rate': 1.7095821931090957e-07, 'epoch': 14.74} + 92%|█████████▏| 342200/371472 [5:03:43<2:46:04, 2.94it/s] 92%|█████████▏| 342201/371472 [5:03:43<2:35:08, 3.14it/s] 92%|█████████▏| 342202/371472 [5:03:44<2:31:36, 3.22it/s] 92%|█████████▏| 342203/371472 [5:03:44<2:29:24, 3.27it/s] 92%|█████████▏| 342204/371472 [5:03:44<2:34:39, 3.15it/s] 92%|█████████▏| 342205/371472 [5:03:45<2:35:04, 3.15it/s] 92%|█████████▏| 342206/371472 [5:03:45<2:24:58, 3.36it/s] 92%|█████████▏| 342207/371472 [5:03:45<2:21:04, 3.46it/s] 92%|█████████▏| 342208/371472 [5:03:45<2:22:51, 3.41it/s] 92%|█████████▏| 342209/371472 [5:03:46<2:37:38, 3.09it/s] 92%|█████████▏| 342210/371472 [5:03:46<2:37:18, 3.10it/s] 92%|█████████▏| 342211/371472 [5:03:46<2:41:53, 3.01it/s] 92%|█████████▏| 342212/371472 [5:03:47<2:43:19, 2.99it/s] 92%|█████████▏| 342213/371472 [5:03:47<2:37:54, 3.09it/s] 92%|█████████▏| 342214/371472 [5:03:47<2:36:35, 3.11it/s] 92%|█████████▏| 342215/371472 [5:03:48<2:35:46, 3.13it/s] 92%|█████████▏| 342216/371472 [5:03:48<2:36:33, 3.11it/s] 92%|█████████▏| 342217/371472 [5:03:48<2:26:51, 3.32it/s] 92%|█████████▏| 342218/371472 [5:03:49<2:22:14, 3.43it/s] 92%|█████████▏| 342219/371472 [5:03:49<2:20:22, 3.47it/s] 92%|█████████▏| 342220/371472 [5:03:49<2:17:28, 3.55it/s] {'loss': 2.598, 'learning_rate': 1.7090973733543062e-07, 'epoch': 14.74} + 92%|█████████▏| 342220/371472 [5:03:49<2:17:28, 3.55it/s] 92%|█████████▏| 342221/371472 [5:03:49<2:16:46, 3.56it/s] 92%|█████████▏| 342222/371472 [5:03:50<2:15:29, 3.60it/s] 92%|█████████▏| 342223/371472 [5:03:50<2:20:41, 3.46it/s] 92%|█████████▏| 342224/371472 [5:03:50<2:21:49, 3.44it/s] 92%|█████████▏| 342225/371472 [5:03:51<2:17:31, 3.54it/s] 92%|█████████▏| 342226/371472 [5:03:51<2:17:28, 3.55it/s] 92%|█████████▏| 342227/371472 [5:03:51<2:17:50, 3.54it/s] 92%|█████████▏| 342228/371472 [5:03:51<2:16:29, 3.57it/s] 92%|█████████▏| 342229/371472 [5:03:52<2:19:00, 3.51it/s] 92%|█████████▏| 342230/371472 [5:03:52<2:24:25, 3.37it/s] 92%|█████████▏| 342231/371472 [5:03:52<2:22:58, 3.41it/s] 92%|█████████▏| 342232/371472 [5:03:53<2:19:23, 3.50it/s] 92%|█████████▏| 342233/371472 [5:03:53<2:23:31, 3.40it/s] 92%|█████████▏| 342234/371472 [5:03:53<2:22:48, 3.41it/s] 92%|█████████▏| 342235/371472 [5:03:53<2:16:39, 3.57it/s] 92%|█████████▏| 342236/371472 [5:03:54<2:20:51, 3.46it/s] 92%|█████████▏| 342237/371472 [5:03:54<2:33:36, 3.17it/s] 92%|█████████▏| 342238/371472 [5:03:54<2:25:37, 3.35it/s] 92%|█████████▏| 342239/371472 [5:03:55<2:21:52, 3.43it/s] 92%|█████████▏| 342240/371472 [5:03:55<2:20:08, 3.48it/s] {'loss': 2.5643, 'learning_rate': 1.7086125535995177e-07, 'epoch': 14.74} + 92%|█████████▏| 342240/371472 [5:03:55<2:20:08, 3.48it/s] 92%|█████████▏| 342241/371472 [5:03:55<2:21:43, 3.44it/s] 92%|█████████▏| 342242/371472 [5:03:56<2:18:17, 3.52it/s] 92%|█████████▏| 342243/371472 [5:03:56<2:27:48, 3.30it/s] 92%|█████████▏| 342244/371472 [5:03:56<2:27:56, 3.29it/s] 92%|█████████▏| 342245/371472 [5:03:56<2:20:49, 3.46it/s] 92%|█████████▏| 342246/371472 [5:03:57<2:26:14, 3.33it/s] 92%|█████████▏| 342247/371472 [5:03:57<2:28:19, 3.28it/s] 92%|█████████▏| 342248/371472 [5:03:57<2:23:13, 3.40it/s] 92%|█████████▏| 342249/371472 [5:03:58<2:28:52, 3.27it/s] 92%|█████████▏| 342250/371472 [5:03:58<2:26:37, 3.32it/s] 92%|█████████▏| 342251/371472 [5:03:58<2:19:56, 3.48it/s] 92%|█████████▏| 342252/371472 [5:03:58<2:15:22, 3.60it/s] 92%|█████████▏| 342253/371472 [5:03:59<2:21:13, 3.45it/s] 92%|█████████▏| 342254/371472 [5:03:59<2:19:05, 3.50it/s] 92%|█████████▏| 342255/371472 [5:03:59<2:20:59, 3.45it/s] 92%|█████████▏| 342256/371472 [5:04:00<2:27:51, 3.29it/s] 92%|█████████▏| 342257/371472 [5:04:00<2:28:16, 3.28it/s] 92%|█████████▏| 342258/371472 [5:04:00<2:21:31, 3.44it/s] 92%|█████████▏| 342259/371472 [5:04:01<2:19:51, 3.48it/s] 92%|█████████▏| 342260/371472 [5:04:01<2:25:05, 3.36it/s] {'loss': 2.6849, 'learning_rate': 1.7081277338447284e-07, 'epoch': 14.74} + 92%|█████████▏| 342260/371472 [5:04:01<2:25:05, 3.36it/s] 92%|█████████▏| 342261/371472 [5:04:01<2:23:20, 3.40it/s] 92%|█████████▏| 342262/371472 [5:04:01<2:23:44, 3.39it/s] 92%|█████████▏| 342263/371472 [5:04:02<2:19:44, 3.48it/s] 92%|█████████▏| 342264/371472 [5:04:02<2:21:42, 3.44it/s] 92%|█████████▏| 342265/371472 [5:04:02<2:18:23, 3.52it/s] 92%|█████████▏| 342266/371472 [5:04:03<2:19:22, 3.49it/s] 92%|█████████▏| 342267/371472 [5:04:03<2:16:52, 3.56it/s] 92%|█████████▏| 342268/371472 [5:04:03<2:17:07, 3.55it/s] 92%|█████████▏| 342269/371472 [5:04:03<2:15:55, 3.58it/s] 92%|█████████▏| 342270/371472 [5:04:04<2:18:52, 3.50it/s] 92%|█████████▏| 342271/371472 [5:04:04<2:18:21, 3.52it/s] 92%|█████████▏| 342272/371472 [5:04:04<2:17:25, 3.54it/s] 92%|█████████▏| 342273/371472 [5:04:05<2:16:18, 3.57it/s] 92%|█████████▏| 342274/371472 [5:04:05<2:24:05, 3.38it/s] 92%|█████████▏| 342275/371472 [5:04:05<2:26:09, 3.33it/s] 92%|█████████▏| 342276/371472 [5:04:05<2:19:52, 3.48it/s] 92%|█████████▏| 342277/371472 [5:04:06<2:18:52, 3.50it/s] 92%|█████████▏| 342278/371472 [5:04:06<2:28:34, 3.27it/s] 92%|█████████▏| 342279/371472 [5:04:06<2:20:28, 3.46it/s] 92%|█████████▏| 342280/371472 [5:04:07<2:29:57, 3.24it/s] {'loss': 2.5761, 'learning_rate': 1.7076429140899386e-07, 'epoch': 14.74} + 92%|█████████▏| 342280/371472 [5:04:07<2:29:57, 3.24it/s] 92%|█████████▏| 342281/371472 [5:04:07<2:24:02, 3.38it/s] 92%|█████████▏| 342282/371472 [5:04:07<2:19:37, 3.48it/s] 92%|█████████▏| 342283/371472 [5:04:08<2:23:24, 3.39it/s] 92%|█████████▏| 342284/371472 [5:04:08<2:20:41, 3.46it/s] 92%|█████████▏| 342285/371472 [5:04:08<2:23:14, 3.40it/s] 92%|█████████▏| 342286/371472 [5:04:08<2:18:20, 3.52it/s] 92%|█████████▏| 342287/371472 [5:04:09<2:11:54, 3.69it/s] 92%|█████████▏| 342288/371472 [5:04:09<2:14:52, 3.61it/s] 92%|█████████▏| 342289/371472 [5:04:09<2:32:24, 3.19it/s] 92%|█████████▏| 342290/371472 [5:04:10<2:25:28, 3.34it/s] 92%|█████████▏| 342291/371472 [5:04:10<2:21:45, 3.43it/s] 92%|█████████▏| 342292/371472 [5:04:10<2:21:54, 3.43it/s] 92%|█████████▏| 342293/371472 [5:04:10<2:20:14, 3.47it/s] 92%|█████████▏| 342294/371472 [5:04:11<2:20:24, 3.46it/s] 92%|█████████▏| 342295/371472 [5:04:11<2:23:54, 3.38it/s] 92%|█████████▏| 342296/371472 [5:04:11<2:20:57, 3.45it/s] 92%|█████████▏| 342297/371472 [5:04:12<2:19:43, 3.48it/s] 92%|█████████▏| 342298/371472 [5:04:12<2:21:47, 3.43it/s] 92%|█████████▏| 342299/371472 [5:04:12<2:14:49, 3.61it/s] 92%|█████████▏| 342300/371472 [5:04:12<2:22:32, 3.41it/s] {'loss': 2.5459, 'learning_rate': 1.7071580943351504e-07, 'epoch': 14.74} + 92%|█████████▏| 342300/371472 [5:04:12<2:22:32, 3.41it/s] 92%|█████████▏| 342301/371472 [5:04:13<2:24:19, 3.37it/s] 92%|█████████▏| 342302/371472 [5:04:13<2:29:33, 3.25it/s] 92%|█████████▏| 342303/371472 [5:04:13<2:24:11, 3.37it/s] 92%|█████████▏| 342304/371472 [5:04:14<2:16:48, 3.55it/s] 92%|█████████▏| 342305/371472 [5:04:14<2:16:17, 3.57it/s] 92%|█████████▏| 342306/371472 [5:04:14<2:12:49, 3.66it/s] 92%|█████████▏| 342307/371472 [5:04:14<2:11:00, 3.71it/s] 92%|█████████▏| 342308/371472 [5:04:15<2:08:54, 3.77it/s] 92%|█████████▏| 342309/371472 [5:04:15<2:14:15, 3.62it/s] 92%|█████████▏| 342310/371472 [5:04:15<2:09:56, 3.74it/s] 92%|█████████▏| 342311/371472 [5:04:16<2:19:03, 3.50it/s] 92%|█████████▏| 342312/371472 [5:04:16<2:23:10, 3.39it/s] 92%|█████████▏| 342313/371472 [5:04:16<2:24:23, 3.37it/s] 92%|█████████▏| 342314/371472 [5:04:16<2:29:57, 3.24it/s] 92%|█████████▏| 342315/371472 [5:04:17<2:31:19, 3.21it/s] 92%|█████████▏| 342316/371472 [5:04:17<2:29:45, 3.24it/s] 92%|█████████▏| 342317/371472 [5:04:17<2:27:32, 3.29it/s] 92%|█████████▏| 342318/371472 [5:04:18<2:25:40, 3.34it/s] 92%|█████████▏| 342319/371472 [5:04:18<2:20:59, 3.45it/s] 92%|█████████▏| 342320/371472 [5:04:18<2:17:48, 3.53it/s] {'loss': 2.6648, 'learning_rate': 1.706673274580361e-07, 'epoch': 14.74} + 92%|█████████▏| 342320/371472 [5:04:18<2:17:48, 3.53it/s] 92%|█████████▏| 342321/371472 [5:04:18<2:14:45, 3.61it/s] 92%|█████████▏| 342322/371472 [5:04:19<2:16:34, 3.56it/s] 92%|█████████▏| 342323/371472 [5:04:19<2:13:38, 3.64it/s] 92%|█████████▏| 342324/371472 [5:04:19<2:10:37, 3.72it/s] 92%|█████████▏| 342325/371472 [5:04:20<2:23:54, 3.38it/s] 92%|█████████▏| 342326/371472 [5:04:20<2:21:33, 3.43it/s] 92%|█████████▏| 342327/371472 [5:04:20<2:18:20, 3.51it/s] 92%|█████████▏| 342328/371472 [5:04:20<2:13:21, 3.64it/s] 92%|█████████▏| 342329/371472 [5:04:21<2:13:40, 3.63it/s] 92%|█████████▏| 342330/371472 [5:04:21<2:12:02, 3.68it/s] 92%|█████████▏| 342331/371472 [5:04:21<2:09:39, 3.75it/s] 92%|█████████▏| 342332/371472 [5:04:22<2:11:47, 3.69it/s] 92%|█████████▏| 342333/371472 [5:04:22<2:13:00, 3.65it/s] 92%|��████████▏| 342334/371472 [5:04:22<2:12:53, 3.65it/s] 92%|█████████▏| 342335/371472 [5:04:22<2:15:49, 3.58it/s] 92%|█████████▏| 342336/371472 [5:04:23<2:12:32, 3.66it/s] 92%|█████████▏| 342337/371472 [5:04:23<2:24:03, 3.37it/s] 92%|█████████▏| 342338/371472 [5:04:23<2:33:02, 3.17it/s] 92%|█████████▏| 342339/371472 [5:04:24<2:23:12, 3.39it/s] 92%|█████████▏| 342340/371472 [5:04:24<2:20:46, 3.45it/s] {'loss': 2.6256, 'learning_rate': 1.7061884548255726e-07, 'epoch': 14.75} + 92%|█████████▏| 342340/371472 [5:04:24<2:20:46, 3.45it/s] 92%|█████████▏| 342341/371472 [5:04:24<2:21:47, 3.42it/s] 92%|█████████▏| 342342/371472 [5:04:24<2:28:17, 3.27it/s] 92%|█████████▏| 342343/371472 [5:04:25<2:23:25, 3.39it/s] 92%|█████████▏| 342344/371472 [5:04:25<2:19:38, 3.48it/s] 92%|█████████▏| 342345/371472 [5:04:25<2:22:56, 3.40it/s] 92%|█████████▏| 342346/371472 [5:04:26<2:14:44, 3.60it/s] 92%|█████████▏| 342347/371472 [5:04:26<2:24:09, 3.37it/s] 92%|█████████▏| 342348/371472 [5:04:26<2:24:51, 3.35it/s] 92%|█████████▏| 342349/371472 [5:04:26<2:19:11, 3.49it/s] 92%|█████████▏| 342350/371472 [5:04:27<2:17:20, 3.53it/s] 92%|█████████▏| 342351/371472 [5:04:27<2:12:26, 3.66it/s] 92%|█████████▏| 342352/371472 [5:04:27<2:10:54, 3.71it/s] 92%|█████████▏| 342353/371472 [5:04:28<2:16:09, 3.56it/s] 92%|█████████▏| 342354/371472 [5:04:28<2:15:24, 3.58it/s] 92%|█████████▏| 342355/371472 [5:04:28<2:11:08, 3.70it/s] 92%|█████████▏| 342356/371472 [5:04:28<2:16:13, 3.56it/s] 92%|█████████▏| 342357/371472 [5:04:29<2:18:15, 3.51it/s] 92%|█████████▏| 342358/371472 [5:04:29<2:15:27, 3.58it/s] 92%|█████████▏| 342359/371472 [5:04:29<2:13:47, 3.63it/s] 92%|█████████▏| 342360/371472 [5:04:30<2:22:28, 3.41it/s] {'loss': 2.6155, 'learning_rate': 1.705703635070783e-07, 'epoch': 14.75} + 92%|█████████▏| 342360/371472 [5:04:30<2:22:28, 3.41it/s] 92%|█████████▏| 342361/371472 [5:04:30<2:16:52, 3.54it/s] 92%|█████████▏| 342362/371472 [5:04:30<2:18:54, 3.49it/s] 92%|█████████▏| 342363/371472 [5:04:30<2:15:14, 3.59it/s] 92%|█████████▏| 342364/371472 [5:04:31<2:16:08, 3.56it/s] 92%|█████████▏| 342365/371472 [5:04:31<2:20:29, 3.45it/s] 92%|█████████▏| 342366/371472 [5:04:31<2:19:48, 3.47it/s] 92%|█████████▏| 342367/371472 [5:04:32<2:19:38, 3.47it/s] 92%|█████████▏| 342368/371472 [5:04:32<2:17:27, 3.53it/s] 92%|█████████▏| 342369/371472 [5:04:32<2:12:32, 3.66it/s] 92%|█████████▏| 342370/371472 [5:04:32<2:08:39, 3.77it/s] 92%|█████████▏| 342371/371472 [5:04:33<2:11:07, 3.70it/s] 92%|█████████▏| 342372/371472 [5:04:33<2:15:28, 3.58it/s] 92%|█████████▏| 342373/371472 [5:04:33<2:11:04, 3.70it/s] 92%|█████████▏| 342374/371472 [5:04:33<2:06:26, 3.84it/s] 92%|█████████▏| 342375/371472 [5:04:34<2:04:28, 3.90it/s] 92%|█████████▏| 342376/371472 [5:04:34<2:24:09, 3.36it/s] 92%|█████████▏| 342377/371472 [5:04:34<2:21:36, 3.42it/s] 92%|█████████▏| 342378/371472 [5:04:35<2:17:37, 3.52it/s] 92%|█████████▏| 342379/371472 [5:04:35<2:22:48, 3.40it/s] 92%|█████████▏| 342380/371472 [5:04:35<2:31:17, 3.20it/s] {'loss': 2.62, 'learning_rate': 1.7052188153159948e-07, 'epoch': 14.75} + 92%|█████████▏| 342380/371472 [5:04:35<2:31:17, 3.20it/s] 92%|█████████▏| 342381/371472 [5:04:36<2:33:04, 3.17it/s] 92%|█████████▏| 342382/371472 [5:04:36<2:24:51, 3.35it/s] 92%|█████████▏| 342383/371472 [5:04:36<2:20:45, 3.44it/s] 92%|█████████▏| 342384/371472 [5:04:36<2:17:14, 3.53it/s] 92%|█████████▏| 342385/371472 [5:04:37<2:26:43, 3.30it/s] 92%|█████████▏| 342386/371472 [5:04:37<2:26:32, 3.31it/s] 92%|█████████▏| 342387/371472 [5:04:37<2:19:25, 3.48it/s] 92%|█████████▏| 342388/371472 [5:04:38<2:16:33, 3.55it/s] 92%|█████████▏| 342389/371472 [5:04:38<2:18:46, 3.49it/s] 92%|█████████▏| 342390/371472 [5:04:38<2:21:40, 3.42it/s] 92%|█████████▏| 342391/371472 [5:04:38<2:24:23, 3.36it/s] 92%|█████████▏| 342392/371472 [5:04:39<2:18:54, 3.49it/s] 92%|█████████▏| 342393/371472 [5:04:39<2:25:44, 3.33it/s] 92%|█████████▏| 342394/371472 [5:04:39<2:23:52, 3.37it/s] 92%|█████████▏| 342395/371472 [5:04:40<2:19:07, 3.48it/s] 92%|█████████▏| 342396/371472 [5:04:40<2:16:22, 3.55it/s] 92%|█████████▏| 342397/371472 [5:04:40<2:12:22, 3.66it/s] 92%|█████████▏| 342398/371472 [5:04:41<2:35:47, 3.11it/s] 92%|█████████▏| 342399/371472 [5:04:41<2:27:17, 3.29it/s] 92%|█████████▏| 342400/371472 [5:04:41<2:29:52, 3.23it/s] {'loss': 2.5477, 'learning_rate': 1.7047339955612055e-07, 'epoch': 14.75} + 92%|█████████▏| 342400/371472 [5:04:41<2:29:52, 3.23it/s] 92%|█████████▏| 342401/371472 [5:04:41<2:31:04, 3.21it/s] 92%|█████████▏| 342402/371472 [5:04:42<2:27:43, 3.28it/s] 92%|█████████▏| 342403/371472 [5:04:42<2:21:51, 3.42it/s] 92%|█████████▏| 342404/371472 [5:04:42<2:19:27, 3.47it/s] 92%|█████████▏| 342405/371472 [5:04:43<2:13:33, 3.63it/s] 92%|█████████▏| 342406/371472 [5:04:43<2:16:49, 3.54it/s] 92%|█████████▏| 342407/371472 [5:04:43<2:14:36, 3.60it/s] 92%|█████████▏| 342408/371472 [5:04:43<2:14:27, 3.60it/s] 92%|█████████▏| 342409/371472 [5:04:44<2:22:40, 3.40it/s] 92%|█████████▏| 342410/371472 [5:04:44<2:23:33, 3.37it/s] 92%|█████████▏| 342411/371472 [5:04:44<2:17:39, 3.52it/s] 92%|█████████▏| 342412/371472 [5:04:45<2:10:29, 3.71it/s] 92%|█████████▏| 342413/371472 [5:04:45<2:13:12, 3.64it/s] 92%|█████████▏| 342414/371472 [5:04:45<2:10:35, 3.71it/s] 92%|█████████▏| 342415/371472 [5:04:45<2:12:16, 3.66it/s] 92%|█████████▏| 342416/371472 [5:04:46<2:13:36, 3.62it/s] 92%|█████████▏| 342417/371472 [5:04:46<2:17:01, 3.53it/s] 92%|█████████▏| 342418/371472 [5:04:46<2:19:04, 3.48it/s] 92%|█████████▏| 342419/371472 [5:04:46<2:14:17, 3.61it/s] 92%|█████████▏| 342420/371472 [5:04:47<2:10:05, 3.72it/s] {'loss': 2.6321, 'learning_rate': 1.7042491758064168e-07, 'epoch': 14.75} + 92%|█████████▏| 342420/371472 [5:04:47<2:10:05, 3.72it/s] 92%|█████████▏| 342421/371472 [5:04:47<2:11:15, 3.69it/s] 92%|█████████▏| 342422/371472 [5:04:47<2:12:29, 3.65it/s] 92%|█████████▏| 342423/371472 [5:04:48<2:14:13, 3.61it/s] 92%|█████████▏| 342424/371472 [5:04:48<2:30:41, 3.21it/s] 92%|█████████▏| 342425/371472 [5:04:48<2:23:58, 3.36it/s] 92%|█████████▏| 342426/371472 [5:04:48<2:19:31, 3.47it/s] 92%|█████████▏| 342427/371472 [5:04:49<2:16:02, 3.56it/s] 92%|█████████▏| 342428/371472 [5:04:49<2:15:31, 3.57it/s] 92%|█████████▏| 342429/371472 [5:04:49<2:18:37, 3.49it/s] 92%|█████████▏| 342430/371472 [5:04:50<2:14:13, 3.61it/s] 92%|█████████▏| 342431/371472 [5:04:50<2:20:00, 3.46it/s] 92%|█████████▏| 342432/371472 [5:04:50<2:14:21, 3.60it/s] 92%|█████████▏| 342433/371472 [5:04:50<2:11:27, 3.68it/s] 92%|█████████▏| 342434/371472 [5:04:51<2:08:40, 3.76it/s] 92%|█████████▏| 342435/371472 [5:04:51<2:08:00, 3.78it/s] 92%|█████████▏| 342436/371472 [5:04:51<2:16:13, 3.55it/s] 92%|█████████▏| 342437/371472 [5:04:52<2:19:51, 3.46it/s] 92%|█████████▏| 342438/371472 [5:04:52<2:16:39, 3.54it/s] 92%|█████████▏| 342439/371472 [5:04:52<2:15:33, 3.57it/s] 92%|█████████▏| 342440/371472 [5:04:52<2:16:46, 3.54it/s] {'loss': 2.5903, 'learning_rate': 1.7037643560516275e-07, 'epoch': 14.75} + 92%|█████████▏| 342440/371472 [5:04:52<2:16:46, 3.54it/s] 92%|█████████▏| 342441/371472 [5:04:53<2:19:54, 3.46it/s] 92%|█████████▏| 342442/371472 [5:04:53<2:14:18, 3.60it/s] 92%|█████████▏| 342443/371472 [5:04:53<2:19:26, 3.47it/s] 92%|█████████▏| 342444/371472 [5:04:54<2:20:40, 3.44it/s] 92%|█████████▏| 342445/371472 [5:04:54<2:16:07, 3.55it/s] 92%|█████████▏| 342446/371472 [5:04:54<2:16:41, 3.54it/s] 92%|█████████▏| 342447/371472 [5:04:54<2:11:09, 3.69it/s] 92%|█████████▏| 342448/371472 [5:04:55<2:16:09, 3.55it/s] 92%|█████████▏| 342449/371472 [5:04:55<2:17:38, 3.51it/s] 92%|█████████▏| 342450/371472 [5:04:55<2:22:08, 3.40it/s] 92%|█████████▏| 342451/371472 [5:04:56<2:20:05, 3.45it/s] 92%|█████████▏| 342452/371472 [5:04:56<2:16:18, 3.55it/s] 92%|█████████▏| 342453/371472 [5:04:56<2:13:00, 3.64it/s] 92%|█████████▏| 342454/371472 [5:04:56<2:10:32, 3.70it/s] 92%|█████████▏| 342455/371472 [5:04:57<2:16:21, 3.55it/s] 92%|█████████▏| 342456/371472 [5:04:57<2:17:17, 3.52it/s] 92%|█████████▏| 342457/371472 [5:04:57<2:12:14, 3.66it/s] 92%|█████████▏| 342458/371472 [5:04:57<2:08:02, 3.78it/s] 92%|█████████▏| 342459/371472 [5:04:58<2:09:56, 3.72it/s] 92%|█████████▏| 342460/371472 [5:04:58<2:07:25, 3.79it/s] {'loss': 2.4972, 'learning_rate': 1.7032795362968393e-07, 'epoch': 14.75} + 92%|█████████▏| 342460/371472 [5:04:58<2:07:25, 3.79it/s] 92%|█████████▏| 342461/371472 [5:04:58<2:10:03, 3.72it/s] 92%|█████████▏| 342462/371472 [5:04:58<2:14:06, 3.61it/s] 92%|█████████▏| 342463/371472 [5:04:59<2:15:55, 3.56it/s] 92%|█████████▏| 342464/371472 [5:04:59<2:26:17, 3.30it/s] 92%|█████████▏| 342465/371472 [5:04:59<2:26:02, 3.31it/s] 92%|█████████▏| 342466/371472 [5:05:00<2:21:45, 3.41it/s] 92%|█████████▏| 342467/371472 [5:05:00<2:18:32, 3.49it/s] 92%|█████████▏| 342468/371472 [5:05:00<2:15:42, 3.56it/s] 92%|█████████▏| 342469/371472 [5:05:01<2:24:07, 3.35it/s] 92%|█████████▏| 342470/371472 [5:05:01<2:33:27, 3.15it/s] 92%|█████████▏| 342471/371472 [5:05:01<2:41:32, 2.99it/s] 92%|█████████▏| 342472/371472 [5:05:02<2:39:55, 3.02it/s] 92%|█████████▏| 342473/371472 [5:05:02<2:29:56, 3.22it/s] 92%|█████████▏| 342474/371472 [5:05:02<2:28:28, 3.26it/s] 92%|█████████▏| 342475/371472 [5:05:03<2:39:04, 3.04it/s] 92%|█████████▏| 342476/371472 [5:05:03<2:28:44, 3.25it/s] 92%|█████████▏| 342477/371472 [5:05:03<2:27:23, 3.28it/s] 92%|█████████▏| 342478/371472 [5:05:03<2:23:01, 3.38it/s] 92%|█████████▏| 342479/371472 [5:05:04<2:16:10, 3.55it/s] 92%|█████████▏| 342480/371472 [5:05:04<2:16:37, 3.54it/s] {'loss': 2.6274, 'learning_rate': 1.7027947165420497e-07, 'epoch': 14.75} + 92%|█████████▏| 342480/371472 [5:05:04<2:16:37, 3.54it/s] 92%|█████████▏| 342481/371472 [5:05:04<2:13:14, 3.63it/s] 92%|█████████▏| 342482/371472 [5:05:05<2:18:46, 3.48it/s] 92%|█████████▏| 342483/371472 [5:05:05<2:24:05, 3.35it/s] 92%|█████████▏| 342484/371472 [5:05:05<2:30:18, 3.21it/s] 92%|█████████▏| 342485/371472 [5:05:05<2:22:51, 3.38it/s] 92%|█████████▏| 342486/371472 [5:05:06<2:24:19, 3.35it/s] 92%|█████████▏| 342487/371472 [5:05:06<2:20:59, 3.43it/s] 92%|█████████▏| 342488/371472 [5:05:06<2:23:55, 3.36it/s] 92%|█████████▏| 342489/371472 [5:05:07<2:35:29, 3.11it/s] 92%|█████████▏| 342490/371472 [5:05:07<2:24:58, 3.33it/s] 92%|█████████▏| 342491/371472 [5:05:07<2:21:32, 3.41it/s] 92%|█████████▏| 342492/371472 [5:05:08<2:23:50, 3.36it/s] 92%|█████████▏| 342493/371472 [5:05:08<2:21:47, 3.41it/s] 92%|█████████▏| 342494/371472 [5:05:08<2:19:49, 3.45it/s] 92%|█████████▏| 342495/371472 [5:05:08<2:15:30, 3.56it/s] 92%|█████████▏| 342496/371472 [5:05:09<2:16:07, 3.55it/s] 92%|█████████▏| 342497/371472 [5:05:09<2:16:01, 3.55it/s] 92%|█████████▏| 342498/371472 [5:05:09<2:11:14, 3.68it/s] 92%|█████████▏| 342499/371472 [5:05:10<2:17:37, 3.51it/s] 92%|█████████▏| 342500/371472 [5:05:10<2:14:11, 3.60it/s] {'loss': 2.6541, 'learning_rate': 1.7023098967872612e-07, 'epoch': 14.75} + 92%|█████████▏| 342500/371472 [5:05:10<2:14:11, 3.60it/s] 92%|█████████▏| 342501/371472 [5:05:10<2:26:04, 3.31it/s] 92%|█████████▏| 342502/371472 [5:05:10<2:26:02, 3.31it/s] 92%|█████████▏| 342503/371472 [5:05:11<2:25:06, 3.33it/s] 92%|█████████▏| 342504/371472 [5:05:11<2:27:43, 3.27it/s] 92%|█████████▏| 342505/371472 [5:05:11<2:31:41, 3.18it/s] 92%|█████████▏| 342506/371472 [5:05:12<2:27:09, 3.28it/s] 92%|█████████▏| 342507/371472 [5:05:12<2:22:27, 3.39it/s] 92%|█████████▏| 342508/371472 [5:05:12<2:16:54, 3.53it/s] 92%|█████████▏| 342509/371472 [5:05:13<2:19:12, 3.47it/s] 92%|█████████▏| 342510/371472 [5:05:13<2:16:58, 3.52it/s] 92%|█████████▏| 342511/371472 [5:05:13<2:18:11, 3.49it/s] 92%|█████████▏| 342512/371472 [5:05:13<2:21:16, 3.42it/s] 92%|█████████▏| 342513/371472 [5:05:14<2:18:33, 3.48it/s] 92%|█████████▏| 342514/371472 [5:05:14<2:22:15, 3.39it/s] 92%|█████████▏| 342515/371472 [5:05:14<2:27:14, 3.28it/s] 92%|█████████▏| 342516/371472 [5:05:15<2:27:38, 3.27it/s] 92%|█████████▏| 342517/371472 [5:05:15<2:17:05, 3.52it/s] 92%|█████████▏| 342518/371472 [5:05:15<2:16:03, 3.55it/s] 92%|█████████▏| 342519/371472 [5:05:15<2:11:21, 3.67it/s] 92%|█████████▏| 342520/371472 [5:05:16<2:09:58, 3.71it/s] {'loss': 2.6049, 'learning_rate': 1.701825077032472e-07, 'epoch': 14.75} + 92%|█████████▏| 342520/371472 [5:05:16<2:09:58, 3.71it/s] 92%|█████████▏| 342521/371472 [5:05:16<2:24:45, 3.33it/s] 92%|█████████▏| 342522/371472 [5:05:16<2:30:39, 3.20it/s] 92%|█████████▏| 342523/371472 [5:05:17<2:25:35, 3.31it/s] 92%|█████████▏| 342524/371472 [5:05:17<2:32:19, 3.17it/s] 92%|█████████▏| 342525/371472 [5:05:17<2:33:35, 3.14it/s] 92%|█████████▏| 342526/371472 [5:05:18<2:36:10, 3.09it/s] 92%|█████████▏| 342527/371472 [5:05:18<2:25:25, 3.32it/s] 92%|█████████▏| 342528/371472 [5:05:18<2:22:11, 3.39it/s] 92%|█████████▏| 342529/371472 [5:05:18<2:18:14, 3.49it/s] 92%|█████████▏| 342530/371472 [5:05:19<2:12:44, 3.63it/s] 92%|█████████▏| 342531/371472 [5:05:19<2:15:05, 3.57it/s] 92%|█████████▏| 342532/371472 [5:05:19<2:13:29, 3.61it/s] 92%|█████████▏| 342533/371472 [5:05:20<2:13:35, 3.61it/s] 92%|█████████▏| 342534/371472 [5:05:20<2:14:28, 3.59it/s] 92%|█████████▏| 342535/371472 [5:05:20<2:11:25, 3.67it/s] 92%|█████████▏| 342536/371472 [5:05:20<2:10:04, 3.71it/s] 92%|█████████▏| 342537/371472 [5:05:21<2:14:04, 3.60it/s] 92%|█████████▏| 342538/371472 [5:05:21<2:22:54, 3.37it/s] 92%|█████████▏| 342539/371472 [5:05:21<2:32:58, 3.15it/s] 92%|█████████▏| 342540/371472 [5:05:22<2:36:10, 3.09it/s] {'loss': 2.5887, 'learning_rate': 1.7013402572776834e-07, 'epoch': 14.75} + 92%|█████████▏| 342540/371472 [5:05:22<2:36:10, 3.09it/s] 92%|█████████▏| 342541/371472 [5:05:22<2:29:23, 3.23it/s] 92%|█████████▏| 342542/371472 [5:05:22<2:30:04, 3.21it/s] 92%|█████████▏| 342543/371472 [5:05:23<2:24:53, 3.33it/s] 92%|█████████▏| 342544/371472 [5:05:23<2:26:19, 3.30it/s] 92%|█████████▏| 342545/371472 [5:05:23<2:24:46, 3.33it/s] 92%|█████████▏| 342546/371472 [5:05:23<2:20:56, 3.42it/s] 92%|█████████▏| 342547/371472 [5:05:24<2:19:03, 3.47it/s] 92%|█████████▏| 342548/371472 [5:05:24<2:13:23, 3.61it/s] 92%|█████████▏| 342549/371472 [5:05:24<2:12:15, 3.64it/s] 92%|█████████▏| 342550/371472 [5:05:24<2:09:56, 3.71it/s] 92%|█████████▏| 342551/371472 [5:05:25<2:20:56, 3.42it/s] 92%|█████████▏| 342552/371472 [5:05:25<2:13:31, 3.61it/s] 92%|█████████▏| 342553/371472 [5:05:25<2:11:41, 3.66it/s] 92%|█████████▏| 342554/371472 [5:05:26<2:11:18, 3.67it/s] 92%|█████████▏| 342555/371472 [5:05:26<2:09:25, 3.72it/s] 92%|█████████▏| 342556/371472 [5:05:26<2:12:45, 3.63it/s] 92%|█████████▏| 342557/371472 [5:05:26<2:08:51, 3.74it/s] 92%|█████████▏| 342558/371472 [5:05:27<2:07:10, 3.79it/s] 92%|█████████▏| 342559/371472 [5:05:27<2:04:58, 3.86it/s] 92%|█████████▏| 342560/371472 [5:05:27<2:06:35, 3.81it/s] {'loss': 2.5643, 'learning_rate': 1.700855437522894e-07, 'epoch': 14.75} + 92%|█████████▏| 342560/371472 [5:05:27<2:06:35, 3.81it/s] 92%|█████████▏| 342561/371472 [5:05:27<2:06:54, 3.80it/s] 92%|█████████▏| 342562/371472 [5:05:28<2:10:50, 3.68it/s] 92%|█████████▏| 342563/371472 [5:05:28<2:17:34, 3.50it/s] 92%|█████████▏| 342564/371472 [5:05:28<2:19:02, 3.47it/s] 92%|█████████▏| 342565/371472 [5:05:29<2:23:51, 3.35it/s] 92%|█████████▏| 342566/371472 [5:05:29<2:35:51, 3.09it/s] 92%|█████████▏| 342567/371472 [5:05:29<2:29:59, 3.21it/s] 92%|█████████▏| 342568/371472 [5:05:30<2:31:17, 3.18it/s] 92%|█████████▏| 342569/371472 [5:05:30<2:35:40, 3.09it/s] 92%|█████████▏| 342570/371472 [5:05:30<2:26:59, 3.28it/s] 92%|█████████▏| 342571/371472 [5:05:30<2:18:31, 3.48it/s] 92%|█████████▏| 342572/371472 [5:05:31<2:20:22, 3.43it/s] 92%|█████████▏| 342573/371472 [5:05:31<2:20:39, 3.42it/s] 92%|█████████▏| 342574/371472 [5:05:31<2:31:34, 3.18it/s] 92%|█████████▏| 342575/371472 [5:05:32<2:26:10, 3.29it/s] 92%|█████████▏| 342576/371472 [5:05:32<2:27:56, 3.26it/s] 92%|█████████▏| 342577/371472 [5:05:32<2:17:51, 3.49it/s] 92%|█████████▏| 342578/371472 [5:05:33<2:11:17, 3.67it/s] 92%|█████████▏| 342579/371472 [5:05:33<2:10:45, 3.68it/s] 92%|█████████▏| 342580/371472 [5:05:33<2:09:12, 3.73it/s] {'loss': 2.537, 'learning_rate': 1.7003706177681057e-07, 'epoch': 14.76} + 92%|█████████▏| 342580/371472 [5:05:33<2:09:12, 3.73it/s] 92%|█████████▏| 342581/371472 [5:05:33<2:16:09, 3.54it/s] 92%|█████████▏| 342582/371472 [5:05:34<2:13:36, 3.60it/s] 92%|█████████▏| 342583/371472 [5:05:34<2:10:12, 3.70it/s] 92%|█████████▏| 342584/371472 [5:05:34<2:08:26, 3.75it/s] 92%|█████████▏| 342585/371472 [5:05:34<2:14:37, 3.58it/s] 92%|█████████▏| 342586/371472 [5:05:35<2:13:11, 3.61it/s] 92%|█████████▏| 342587/371472 [5:05:35<2:13:54, 3.60it/s] 92%|█████████▏| 342588/371472 [5:05:35<2:12:05, 3.64it/s] 92%|█████████▏| 342589/371472 [5:05:36<2:13:49, 3.60it/s] 92%|█████████▏| 342590/371472 [5:05:36<2:20:12, 3.43it/s] 92%|█████████▏| 342591/371472 [5:05:36<2:20:55, 3.42it/s] 92%|█████████▏| 342592/371472 [5:05:36<2:14:25, 3.58it/s] 92%|█████████▏| 342593/371472 [5:05:37<2:11:49, 3.65it/s] 92%|█████████▏| 342594/371472 [5:05:37<2:11:51, 3.65it/s] 92%|█████████▏| 342595/371472 [5:05:37<2:14:09, 3.59it/s] 92%|█████████▏| 342596/371472 [5:05:38<2:23:39, 3.35it/s] 92%|█████████▏| 342597/371472 [5:05:38<2:22:32, 3.38it/s] 92%|█████████▏| 342598/371472 [5:05:38<2:23:11, 3.36it/s] 92%|█████████▏| 342599/371472 [5:05:38<2:17:32, 3.50it/s] 92%|█████████▏| 342600/371472 [5:05:39<2:17:59, 3.49it/s] {'loss': 2.6809, 'learning_rate': 1.699885798013316e-07, 'epoch': 14.76} + 92%|█████████▏| 342600/371472 [5:05:39<2:17:59, 3.49it/s] 92%|█████████▏| 342601/371472 [5:05:39<2:14:57, 3.57it/s] 92%|█████████▏| 342602/371472 [5:05:39<2:09:38, 3.71it/s] 92%|█████████▏| 342603/371472 [5:05:39<2:07:13, 3.78it/s] 92%|█████████▏| 342604/371472 [5:05:40<2:03:50, 3.89it/s] 92%|█████████▏| 342605/371472 [5:05:40<2:03:06, 3.91it/s] 92%|█████████▏| 342606/371472 [5:05:40<2:07:18, 3.78it/s] 92%|█████████▏| 342607/371472 [5:05:41<2:08:51, 3.73it/s] 92%|█████████▏| 342608/371472 [5:05:41<2:21:59, 3.39it/s] 92%|█████████▏| 342609/371472 [5:05:41<2:34:40, 3.11it/s] 92%|█████████▏| 342610/371472 [5:05:42<2:24:09, 3.34it/s] 92%|█████████▏| 342611/371472 [5:05:42<2:17:39, 3.49it/s] 92%|█████████▏| 342612/371472 [5:05:42<2:13:40, 3.60it/s] 92%|█████████▏| 342613/371472 [5:05:42<2:14:37, 3.57it/s] 92%|█████████▏| 342614/371472 [5:05:43<2:09:31, 3.71it/s] 92%|█████████▏| 342615/371472 [5:05:43<2:08:32, 3.74it/s] 92%|████████���▏| 342616/371472 [5:05:43<2:18:42, 3.47it/s] 92%|█████████▏| 342617/371472 [5:05:43<2:22:03, 3.39it/s] 92%|█████████▏| 342618/371472 [5:05:44<2:22:09, 3.38it/s] 92%|█████████▏| 342619/371472 [5:05:44<2:16:22, 3.53it/s] 92%|█████████▏| 342620/371472 [5:05:44<2:14:57, 3.56it/s] {'loss': 2.7381, 'learning_rate': 1.6994009782585276e-07, 'epoch': 14.76} + 92%|█████████▏| 342620/371472 [5:05:44<2:14:57, 3.56it/s] 92%|█████████▏| 342621/371472 [5:05:45<2:14:12, 3.58it/s] 92%|█████████▏| 342622/371472 [5:05:45<2:16:16, 3.53it/s] 92%|█████████▏| 342623/371472 [5:05:45<2:20:22, 3.43it/s] 92%|█████████▏| 342624/371472 [5:05:45<2:15:42, 3.54it/s] 92%|█████████▏| 342625/371472 [5:05:46<2:18:11, 3.48it/s] 92%|█████████▏| 342626/371472 [5:05:46<2:20:34, 3.42it/s] 92%|█████████▏| 342627/371472 [5:05:46<2:28:42, 3.23it/s] 92%|█████████▏| 342628/371472 [5:05:47<2:33:17, 3.14it/s] 92%|█████████▏| 342629/371472 [5:05:47<2:24:23, 3.33it/s] 92%|█████████▏| 342630/371472 [5:05:47<2:14:21, 3.58it/s] 92%|█████████▏| 342631/371472 [5:05:47<2:13:20, 3.60it/s] 92%|█████████▏| 342632/371472 [5:05:48<2:11:09, 3.66it/s] 92%|█████████▏| 342633/371472 [5:05:48<2:15:05, 3.56it/s] 92%|█████████▏| 342634/371472 [5:05:48<2:10:43, 3.68it/s] 92%|█████████▏| 342635/371472 [5:05:49<2:09:53, 3.70it/s] 92%|█████████▏| 342636/371472 [5:05:49<2:06:54, 3.79it/s] 92%|█████████▏| 342637/371472 [5:05:49<2:05:21, 3.83it/s] 92%|█████████▏| 342638/371472 [5:05:49<2:18:19, 3.47it/s] 92%|█████████▏| 342639/371472 [5:05:50<2:12:23, 3.63it/s] 92%|█████████▏| 342640/371472 [5:05:50<2:22:27, 3.37it/s] {'loss': 2.5811, 'learning_rate': 1.6989161585037383e-07, 'epoch': 14.76} + 92%|█████████▏| 342640/371472 [5:05:50<2:22:27, 3.37it/s] 92%|█████████▏| 342641/371472 [5:05:50<2:24:19, 3.33it/s] 92%|█████████▏| 342642/371472 [5:05:51<2:47:45, 2.86it/s] 92%|█████████▏| 342643/371472 [5:05:51<2:35:51, 3.08it/s] 92%|█████████▏| 342644/371472 [5:05:51<2:36:05, 3.08it/s] 92%|█████████▏| 342645/371472 [5:05:52<2:32:47, 3.14it/s] 92%|█████████▏| 342646/371472 [5:05:52<2:23:24, 3.35it/s] 92%|█████████▏| 342647/371472 [5:05:52<2:24:51, 3.32it/s] 92%|█████████▏| 342648/371472 [5:05:53<2:19:11, 3.45it/s] 92%|█████████▏| 342649/371472 [5:05:53<2:15:10, 3.55it/s] 92%|█████████▏| 342650/371472 [5:05:53<2:12:52, 3.62it/s] 92%|█████████▏| 342651/371472 [5:05:53<2:16:06, 3.53it/s] 92%|█████████▏| 342652/371472 [5:05:54<2:18:04, 3.48it/s] 92%|█████████▏| 342653/371472 [5:05:54<2:14:51, 3.56it/s] 92%|█████████▏| 342654/371472 [5:05:54<2:17:45, 3.49it/s] 92%|█████████▏| 342655/371472 [5:05:54<2:18:34, 3.47it/s] 92%|█████████▏| 342656/371472 [5:05:55<2:13:21, 3.60it/s] 92%|█████████▏| 342657/371472 [5:05:55<2:11:12, 3.66it/s] 92%|█████████▏| 342658/371472 [5:05:55<2:09:57, 3.70it/s] 92%|█████████▏| 342659/371472 [5:05:56<2:11:12, 3.66it/s] 92%|█████████▏| 342660/371472 [5:05:56<2:13:15, 3.60it/s] {'loss': 2.5676, 'learning_rate': 1.6984313387489498e-07, 'epoch': 14.76} + 92%|█████████▏| 342660/371472 [5:05:56<2:13:15, 3.60it/s] 92%|█████████▏| 342661/371472 [5:05:56<2:12:44, 3.62it/s] 92%|█████████▏| 342662/371472 [5:05:56<2:12:27, 3.63it/s] 92%|█████████▏| 342663/371472 [5:05:57<2:15:40, 3.54it/s] 92%|█████████▏| 342664/371472 [5:05:57<2:18:23, 3.47it/s] 92%|█████████▏| 342665/371472 [5:05:57<2:13:43, 3.59it/s] 92%|█████████▏| 342666/371472 [5:05:58<2:12:12, 3.63it/s] 92%|█████████▏| 342667/371472 [5:05:58<2:08:30, 3.74it/s] 92%|█████████▏| 342668/371472 [5:05:58<2:06:59, 3.78it/s] 92%|█████████▏| 342669/371472 [5:05:58<2:08:57, 3.72it/s] 92%|█████████▏| 342670/371472 [5:05:59<2:12:49, 3.61it/s] 92%|█████████▏| 342671/371472 [5:05:59<2:10:57, 3.67it/s] 92%|█████��███▏| 342672/371472 [5:05:59<2:19:29, 3.44it/s] 92%|█████████▏| 342673/371472 [5:05:59<2:17:22, 3.49it/s] 92%|█████████▏| 342674/371472 [5:06:00<2:23:31, 3.34it/s] 92%|█████████▏| 342675/371472 [5:06:00<2:21:05, 3.40it/s] 92%|█████████▏| 342676/371472 [5:06:00<2:18:29, 3.47it/s] 92%|█████████▏| 342677/371472 [5:06:01<2:22:26, 3.37it/s] 92%|█████████▏| 342678/371472 [5:06:01<2:20:30, 3.42it/s] 92%|█████████▏| 342679/371472 [5:06:01<2:18:57, 3.45it/s] 92%|█████████▏| 342680/371472 [5:06:01<2:12:51, 3.61it/s] {'loss': 2.6058, 'learning_rate': 1.6979465189941603e-07, 'epoch': 14.76} + 92%|█████████▏| 342680/371472 [5:06:01<2:12:51, 3.61it/s] 92%|█████████▏| 342681/371472 [5:06:02<2:18:06, 3.47it/s] 92%|█████████▏| 342682/371472 [5:06:02<2:25:35, 3.30it/s] 92%|█████████▏| 342683/371472 [5:06:02<2:16:32, 3.51it/s] 92%|█████████▏| 342684/371472 [5:06:03<2:15:18, 3.55it/s] 92%|█████████▏| 342685/371472 [5:06:03<2:11:47, 3.64it/s] 92%|█████████▏| 342686/371472 [5:06:03<2:06:37, 3.79it/s] 92%|█████████▏| 342687/371472 [5:06:03<2:05:05, 3.84it/s] 92%|█████████▏| 342688/371472 [5:06:04<2:30:46, 3.18it/s] 92%|█████████▏| 342689/371472 [5:06:04<2:26:24, 3.28it/s] 92%|█████████▏| 342690/371472 [5:06:04<2:23:28, 3.34it/s] 92%|█████████▏| 342691/371472 [5:06:05<2:25:34, 3.29it/s] 92%|█████████▏| 342692/371472 [5:06:05<2:19:32, 3.44it/s] 92%|█████████▏| 342693/371472 [5:06:05<2:22:01, 3.38it/s] 92%|█████████▏| 342694/371472 [5:06:06<2:14:25, 3.57it/s] 92%|█████████▏| 342695/371472 [5:06:06<2:16:38, 3.51it/s] 92%|█████████▏| 342696/371472 [5:06:06<2:17:48, 3.48it/s] 92%|█████████▏| 342697/371472 [5:06:06<2:14:18, 3.57it/s] 92%|█████████▏| 342698/371472 [5:06:07<2:11:19, 3.65it/s] 92%|█████████▏| 342699/371472 [5:06:07<2:11:12, 3.65it/s] 92%|█████████▏| 342700/371472 [5:06:07<2:12:24, 3.62it/s] {'loss': 2.7432, 'learning_rate': 1.697461699239372e-07, 'epoch': 14.76} + 92%|█████████▏| 342700/371472 [5:06:07<2:12:24, 3.62it/s] 92%|█████████▏| 342701/371472 [5:06:07<2:14:12, 3.57it/s] 92%|█████████▏| 342702/371472 [5:06:08<2:11:51, 3.64it/s] 92%|█████████▏| 342703/371472 [5:06:08<2:08:19, 3.74it/s] 92%|█████████▏| 342704/371472 [5:06:08<2:07:04, 3.77it/s] 92%|█████████▏| 342705/371472 [5:06:09<2:10:13, 3.68it/s] 92%|█████████▏| 342706/371472 [5:06:09<2:39:07, 3.01it/s] 92%|█████████▏| 342707/371472 [5:06:09<2:34:41, 3.10it/s] 92%|█████████▏| 342708/371472 [5:06:10<2:29:27, 3.21it/s] 92%|█████████▏| 342709/371472 [5:06:10<2:22:42, 3.36it/s] 92%|█████████▏| 342710/371472 [5:06:10<2:25:42, 3.29it/s] 92%|█████████▏| 342711/371472 [5:06:10<2:22:22, 3.37it/s] 92%|█████████▏| 342712/371472 [5:06:11<2:19:07, 3.45it/s] 92%|█████████▏| 342713/371472 [5:06:11<2:13:56, 3.58it/s] 92%|█████████▏| 342714/371472 [5:06:11<2:27:09, 3.26it/s] 92%|█████████▏| 342715/371472 [5:06:12<2:22:21, 3.37it/s] 92%|█████████▏| 342716/371472 [5:06:12<2:25:16, 3.30it/s] 92%|█████████▏| 342717/371472 [5:06:12<2:23:54, 3.33it/s] 92%|█████████▏| 342718/371472 [5:06:13<2:15:52, 3.53it/s] 92%|█████████▏| 342719/371472 [5:06:13<2:20:27, 3.41it/s] 92%|█████████▏| 342720/371472 [5:06:13<2:19:01, 3.45it/s] {'loss': 2.7373, 'learning_rate': 1.6969768794845825e-07, 'epoch': 14.76} + 92%|█████████▏| 342720/371472 [5:06:13<2:19:01, 3.45it/s] 92%|█████████▏| 342721/371472 [5:06:13<2:14:40, 3.56it/s] 92%|█████████▏| 342722/371472 [5:06:14<2:13:38, 3.59it/s] 92%|█████████▏| 342723/371472 [5:06:14<2:12:21, 3.62it/s] 92%|█████████▏| 342724/371472 [5:06:14<2:13:18, 3.59it/s] 92%|█████████▏| 342725/371472 [5:06:14<2:16:38, 3.51it/s] 92%|█████████▏| 342726/371472 [5:06:15<2:13:13, 3.60it/s] 92%|█████████▏| 342727/371472 [5:06:15<2:14:41, 3.56it/s] 92%|███���█████▏| 342728/371472 [5:06:15<2:13:07, 3.60it/s] 92%|█████████▏| 342729/371472 [5:06:16<2:14:33, 3.56it/s] 92%|█████████▏| 342730/371472 [5:06:16<2:23:42, 3.33it/s] 92%|█████████▏| 342731/371472 [5:06:16<2:26:48, 3.26it/s] 92%|█████████▏| 342732/371472 [5:06:17<2:23:18, 3.34it/s] 92%|█████████▏| 342733/371472 [5:06:17<2:23:31, 3.34it/s] 92%|█████████▏| 342734/371472 [5:06:17<2:25:22, 3.29it/s] 92%|█████████▏| 342735/371472 [5:06:17<2:16:19, 3.51it/s] 92%|█████████▏| 342736/371472 [5:06:18<2:17:49, 3.48it/s] 92%|█████████▏| 342737/371472 [5:06:18<2:20:31, 3.41it/s] 92%|█████████▏| 342738/371472 [5:06:18<2:13:06, 3.60it/s] 92%|█████████▏| 342739/371472 [5:06:18<2:07:43, 3.75it/s] 92%|█████████▏| 342740/371472 [5:06:19<2:10:33, 3.67it/s] {'loss': 2.6206, 'learning_rate': 1.696492059729794e-07, 'epoch': 14.76} + 92%|█████████▏| 342740/371472 [5:06:19<2:10:33, 3.67it/s] 92%|█████████▏| 342741/371472 [5:06:19<2:17:12, 3.49it/s] 92%|█████████▏| 342742/371472 [5:06:19<2:18:27, 3.46it/s] 92%|█████████▏| 342743/371472 [5:06:20<2:15:28, 3.53it/s] 92%|█████████▏| 342744/371472 [5:06:20<2:10:44, 3.66it/s] 92%|█████████▏| 342745/371472 [5:06:20<2:09:22, 3.70it/s] 92%|█████████▏| 342746/371472 [5:06:20<2:13:31, 3.59it/s] 92%|█████████▏| 342747/371472 [5:06:21<2:17:58, 3.47it/s] 92%|█████████▏| 342748/371472 [5:06:21<2:22:22, 3.36it/s] 92%|█████████▏| 342749/371472 [5:06:21<2:20:06, 3.42it/s] 92%|█████████▏| 342750/371472 [5:06:22<2:16:41, 3.50it/s] 92%|█████████▏| 342751/371472 [5:06:22<2:12:24, 3.62it/s] 92%|█████████▏| 342752/371472 [5:06:22<2:11:31, 3.64it/s] 92%|█████████▏| 342753/371472 [5:06:22<2:13:10, 3.59it/s] 92%|█████████▏| 342754/371472 [5:06:23<2:15:34, 3.53it/s] 92%|█████████▏| 342755/371472 [5:06:23<2:17:29, 3.48it/s] 92%|█████████▏| 342756/371472 [5:06:23<2:12:52, 3.60it/s] 92%|█████████▏| 342757/371472 [5:06:24<2:12:04, 3.62it/s] 92%|█████████▏| 342758/371472 [5:06:24<2:22:04, 3.37it/s] 92%|█████████▏| 342759/371472 [5:06:24<2:29:04, 3.21it/s] 92%|█████████▏| 342760/371472 [5:06:25<2:25:02, 3.30it/s] {'loss': 2.6122, 'learning_rate': 1.6960072399750047e-07, 'epoch': 14.76} + 92%|█████████▏| 342760/371472 [5:06:25<2:25:02, 3.30it/s] 92%|█████████▏| 342761/371472 [5:06:25<2:27:20, 3.25it/s] 92%|█████████▏| 342762/371472 [5:06:25<2:24:15, 3.32it/s] 92%|█████████▏| 342763/371472 [5:06:25<2:20:07, 3.41it/s] 92%|█████████▏| 342764/371472 [5:06:26<2:13:53, 3.57it/s] 92%|█████████▏| 342765/371472 [5:06:26<2:10:54, 3.65it/s] 92%|█████████▏| 342766/371472 [5:06:26<2:14:10, 3.57it/s] 92%|█████████▏| 342767/371472 [5:06:27<2:19:01, 3.44it/s] 92%|█████████▏| 342768/371472 [5:06:27<2:42:19, 2.95it/s] 92%|█████████▏| 342769/371472 [5:06:27<2:31:12, 3.16it/s] 92%|█████████▏| 342770/371472 [5:06:28<2:27:07, 3.25it/s] 92%|█████████▏| 342771/371472 [5:06:28<2:25:29, 3.29it/s] 92%|█████████▏| 342772/371472 [5:06:28<2:22:25, 3.36it/s] 92%|█████████▏| 342773/371472 [5:06:28<2:27:34, 3.24it/s] 92%|█████████▏| 342774/371472 [5:06:29<2:21:55, 3.37it/s] 92%|█████████▏| 342775/371472 [5:06:29<2:22:02, 3.37it/s] 92%|█████████▏| 342776/371472 [5:06:29<2:20:16, 3.41it/s] 92%|█████████▏| 342777/371472 [5:06:30<2:22:36, 3.35it/s] 92%|█████████▏| 342778/371472 [5:06:30<2:20:34, 3.40it/s] 92%|█████████▏| 342779/371472 [5:06:30<2:19:06, 3.44it/s] 92%|█████████▏| 342780/371472 [5:06:31<2:24:20, 3.31it/s] {'loss': 2.6639, 'learning_rate': 1.6955224202202163e-07, 'epoch': 14.76} + 92%|█████████▏| 342780/371472 [5:06:31<2:24:20, 3.31it/s] 92%|█████████▏| 342781/371472 [5:06:31<2:18:14, 3.46it/s] 92%|█████████▏| 342782/371472 [5:06:31<2:13:50, 3.57it/s] 92%|█████████▏| 342783/371472 [5:06:31<2:08:35, 3.72it/s] 92%|█████████▏| 342784/371472 [5:06:32<2:10:28, 3.66it/s] 92%|█████████▏| 342785/371472 [5:06:32<2:19:38, 3.42it/s] 92%|█████████▏| 342786/371472 [5:06:32<2:15:26, 3.53it/s] 92%|█████████▏| 342787/371472 [5:06:32<2:12:56, 3.60it/s] 92%|█████████▏| 342788/371472 [5:06:33<2:20:55, 3.39it/s] 92%|█████████▏| 342789/371472 [5:06:33<2:15:55, 3.52it/s] 92%|█████████▏| 342790/371472 [5:06:33<2:13:55, 3.57it/s] 92%|█████████▏| 342791/371472 [5:06:34<2:12:50, 3.60it/s] 92%|█████████▏| 342792/371472 [5:06:34<2:09:08, 3.70it/s] 92%|█████████▏| 342793/371472 [5:06:34<2:13:04, 3.59it/s] 92%|█████████▏| 342794/371472 [5:06:34<2:13:41, 3.57it/s] 92%|█████████▏| 342795/371472 [5:06:35<2:17:07, 3.49it/s] 92%|█████████▏| 342796/371472 [5:06:35<2:11:58, 3.62it/s] 92%|█████████▏| 342797/371472 [5:06:35<2:09:48, 3.68it/s] 92%|█████████▏| 342798/371472 [5:06:35<2:05:26, 3.81it/s] 92%|█████████▏| 342799/371472 [5:06:36<2:08:58, 3.71it/s] 92%|█████████▏| 342800/371472 [5:06:36<2:09:38, 3.69it/s] {'loss': 2.715, 'learning_rate': 1.6950376004654267e-07, 'epoch': 14.77} + 92%|█████████▏| 342800/371472 [5:06:36<2:09:38, 3.69it/s] 92%|█████████▏| 342801/371472 [5:06:36<2:18:55, 3.44it/s] 92%|█████████▏| 342802/371472 [5:06:37<2:14:50, 3.54it/s] 92%|█████████▏| 342803/371472 [5:06:37<2:20:27, 3.40it/s] 92%|█████████▏| 342804/371472 [5:06:37<2:20:01, 3.41it/s] 92%|█████████▏| 342805/371472 [5:06:37<2:15:30, 3.53it/s] 92%|█████████▏| 342806/371472 [5:06:38<2:14:22, 3.56it/s] 92%|█████████▏| 342807/371472 [5:06:38<2:09:11, 3.70it/s] 92%|█████████▏| 342808/371472 [5:06:38<2:08:10, 3.73it/s] 92%|█████████▏| 342809/371472 [5:06:39<2:09:50, 3.68it/s] 92%|█████████▏| 342810/371472 [5:06:39<2:09:34, 3.69it/s] 92%|█████████▏| 342811/371472 [5:06:39<2:11:16, 3.64it/s] 92%|█████████▏| 342812/371472 [5:06:39<2:06:58, 3.76it/s] 92%|█████████▏| 342813/371472 [5:06:40<2:18:05, 3.46it/s] 92%|█████████▏| 342814/371472 [5:06:40<2:13:56, 3.57it/s] 92%|█████████▏| 342815/371472 [5:06:40<2:13:18, 3.58it/s] 92%|█████████▏| 342816/371472 [5:06:40<2:08:47, 3.71it/s] 92%|█████████▏| 342817/371472 [5:06:41<2:12:20, 3.61it/s] 92%|█████████▏| 342818/371472 [5:06:41<2:09:35, 3.69it/s] 92%|█████████▏| 342819/371472 [5:06:41<2:09:53, 3.68it/s] 92%|█████████▏| 342820/371472 [5:06:42<2:09:17, 3.69it/s] {'loss': 2.5814, 'learning_rate': 1.6945527807106374e-07, 'epoch': 14.77} + 92%|█████████▏| 342820/371472 [5:06:42<2:09:17, 3.69it/s] 92%|█████████▏| 342821/371472 [5:06:42<2:18:49, 3.44it/s] 92%|█████████▏| 342822/371472 [5:06:42<2:12:16, 3.61it/s] 92%|█████████▏| 342823/371472 [5:06:42<2:17:51, 3.46it/s] 92%|█████████▏| 342824/371472 [5:06:43<2:11:58, 3.62it/s] 92%|█████████▏| 342825/371472 [5:06:43<2:09:07, 3.70it/s] 92%|█████████▏| 342826/371472 [5:06:43<2:06:52, 3.76it/s] 92%|█████████▏| 342827/371472 [5:06:44<2:33:17, 3.11it/s] 92%|█████████▏| 342828/371472 [5:06:44<2:27:45, 3.23it/s] 92%|█████████▏| 342829/371472 [5:06:44<2:25:53, 3.27it/s] 92%|█████████▏| 342830/371472 [5:06:45<2:22:06, 3.36it/s] 92%|█████████▏| 342831/371472 [5:06:45<2:21:52, 3.36it/s] 92%|█████████▏| 342832/371472 [5:06:45<2:35:44, 3.06it/s] 92%|█████████▏| 342833/371472 [5:06:46<2:27:51, 3.23it/s] 92%|█████████▏| 342834/371472 [5:06:46<2:19:28, 3.42it/s] 92%|█████████▏| 342835/371472 [5:06:46<2:15:52, 3.51it/s] 92%|█████████▏| 342836/371472 [5:06:46<2:14:03, 3.56it/s] 92%|█████████▏| 342837/371472 [5:06:47<2:13:34, 3.57it/s] 92%|█████████▏| 342838/371472 [5:06:47<2:11:44, 3.62it/s] 92%|█████████▏| 342839/371472 [5:06:47<2:08:08, 3.72it/s] 92%|█████████▏| 342840/371472 [5:06:47<2:24:18, 3.31it/s] {'loss': 2.6579, 'learning_rate': 1.6940679609558492e-07, 'epoch': 14.77} + 92%|█████████▏| 342840/371472 [5:06:47<2:24:18, 3.31it/s] 92%|█████████▏| 342841/371472 [5:06:48<2:19:51, 3.41it/s] 92%|█████████▏| 342842/371472 [5:06:48<2:19:42, 3.42it/s] 92%|█████████▏| 342843/371472 [5:06:48<2:15:42, 3.52it/s] 92%|█████████▏| 342844/371472 [5:06:49<2:12:47, 3.59it/s] 92%|█████████▏| 342845/371472 [5:06:49<2:11:52, 3.62it/s] 92%|█████████▏| 342846/371472 [5:06:49<2:11:46, 3.62it/s] 92%|█████████▏| 342847/371472 [5:06:49<2:09:05, 3.70it/s] 92%|█████████▏| 342848/371472 [5:06:50<2:13:30, 3.57it/s] 92%|█████████▏| 342849/371472 [5:06:50<2:11:17, 3.63it/s] 92%|█████████▏| 342850/371472 [5:06:50<2:16:35, 3.49it/s] 92%|█████████▏| 342851/371472 [5:06:50<2:09:38, 3.68it/s] 92%|█████████▏| 342852/371472 [5:06:51<2:09:50, 3.67it/s] 92%|█████████▏| 342853/371472 [5:06:51<2:11:14, 3.63it/s] 92%|█████████▏| 342854/371472 [5:06:51<2:11:16, 3.63it/s] 92%|█████████▏| 342855/371472 [5:06:52<2:13:20, 3.58it/s] 92%|█████████▏| 342856/371472 [5:06:52<2:12:27, 3.60it/s] 92%|█████████▏| 342857/371472 [5:06:52<2:13:00, 3.59it/s] 92%|█████████▏| 342858/371472 [5:06:52<2:14:21, 3.55it/s] 92%|█████████▏| 342859/371472 [5:06:53<2:16:19, 3.50it/s] 92%|█████████▏| 342860/371472 [5:06:53<2:31:30, 3.15it/s] {'loss': 2.5533, 'learning_rate': 1.6935831412010596e-07, 'epoch': 14.77} + 92%|█████████▏| 342860/371472 [5:06:53<2:31:30, 3.15it/s] 92%|█████████▏| 342861/371472 [5:06:53<2:26:13, 3.26it/s] 92%|█████████▏| 342862/371472 [5:06:54<2:21:24, 3.37it/s] 92%|█████████▏| 342863/371472 [5:06:54<2:23:44, 3.32it/s] 92%|█████████▏| 342864/371472 [5:06:54<2:29:23, 3.19it/s] 92%|█████████▏| 342865/371472 [5:06:55<2:31:40, 3.14it/s] 92%|█████████▏| 342866/371472 [5:06:55<2:24:28, 3.30it/s] 92%|█████████▏| 342867/371472 [5:06:55<2:26:52, 3.25it/s] 92%|█████████▏| 342868/371472 [5:06:56<2:26:48, 3.25it/s] 92%|█████████▏| 342869/371472 [5:06:56<2:25:43, 3.27it/s] 92%|█████████▏| 342870/371472 [5:06:56<2:28:06, 3.22it/s] 92%|█████████▏| 342871/371472 [5:06:56<2:20:36, 3.39it/s] 92%|█████████▏| 342872/371472 [5:06:57<2:24:55, 3.29it/s] 92%|█████████▏| 342873/371472 [5:06:57<2:20:51, 3.38it/s] 92%|█████████▏| 342874/371472 [5:06:57<2:39:02, 3.00it/s] 92%|█████████▏| 342875/371472 [5:06:58<2:33:20, 3.11it/s] 92%|█████████▏| 342876/371472 [5:06:58<2:27:57, 3.22it/s] 92%|█████████▏| 342877/371472 [5:06:58<2:22:40, 3.34it/s] 92%|█████████▏| 342878/371472 [5:06:59<2:16:14, 3.50it/s] 92%|█████████▏| 342879/371472 [5:06:59<2:13:07, 3.58it/s] 92%|█████████▏| 342880/371472 [5:06:59<2:12:26, 3.60it/s] {'loss': 2.4367, 'learning_rate': 1.693098321446271e-07, 'epoch': 14.77} + 92%|█████████▏| 342880/371472 [5:06:59<2:12:26, 3.60it/s] 92%|█████████▏| 342881/371472 [5:06:59<2:11:56, 3.61it/s] 92%|█████████▏| 342882/371472 [5:07:00<2:09:39, 3.68it/s] 92%|█████████▏| 342883/371472 [5:07:00<2:12:46, 3.59it/s] 92%|█████████▏| 342884/371472 [5:07:00<2:11:02, 3.64it/s] 92%|█████████▏| 342885/371472 [5:07:00<2:06:04, 3.78it/s] 92%|█████████▏| 342886/371472 [5:07:01<2:08:06, 3.72it/s] 92%|█████████▏| 342887/371472 [5:07:01<2:11:45, 3.62it/s] 92%|█████████▏| 342888/371472 [5:07:01<2:10:53, 3.64it/s] 92%|█████████▏| 342889/371472 [5:07:02<2:12:42, 3.59it/s] 92%|█████████▏| 342890/371472 [5:07:02<2:12:20, 3.60it/s] 92%|█████████▏| 342891/371472 [5:07:02<2:09:58, 3.66it/s] 92%|█████████▏| 342892/371472 [5:07:02<2:09:17, 3.68it/s] 92%|█████████▏| 342893/371472 [5:07:03<2:13:17, 3.57it/s] 92%|█████████▏| 342894/371472 [5:07:03<2:25:45, 3.27it/s] 92%|█████████▏| 342895/371472 [5:07:03<2:23:24, 3.32it/s] 92%|█████████▏| 342896/371472 [5:07:04<2:18:20, 3.44it/s] 92%|█████████▏| 342897/371472 [5:07:04<2:15:04, 3.53it/s] 92%|█████████▏| 342898/371472 [5:07:04<2:10:37, 3.65it/s] 92%|█████████▏| 342899/371472 [5:07:04<2:10:26, 3.65it/s] 92%|█████████▏| 342900/371472 [5:07:05<2:13:37, 3.56it/s] {'loss': 2.5589, 'learning_rate': 1.6926135016914819e-07, 'epoch': 14.77} + 92%|█████████▏| 342900/371472 [5:07:05<2:13:37, 3.56it/s] 92%|█████████▏| 342901/371472 [5:07:05<2:09:45, 3.67it/s] 92%|█████████▏| 342902/371472 [5:07:05<2:12:12, 3.60it/s] 92%|█████████▏| 342903/371472 [5:07:06<2:13:27, 3.57it/s] 92%|█████████▏| 342904/371472 [5:07:06<2:12:28, 3.59it/s] 92%|█████████▏| 342905/371472 [5:07:06<2:12:19, 3.60it/s] 92%|█████████▏| 342906/371472 [5:07:06<2:10:16, 3.65it/s] 92%|█████████▏| 342907/371472 [5:07:07<2:05:22, 3.80it/s] 92%|█████████▏| 342908/371472 [5:07:07<2:09:45, 3.67it/s] 92%|█████████▏| 342909/371472 [5:07:07<2:15:03, 3.52it/s] 92%|█████████▏| 342910/371472 [5:07:07<2:11:27, 3.62it/s] 92%|█████████▏| 342911/371472 [5:07:08<2:09:32, 3.67it/s] 92%|█████████▏| 342912/371472 [5:07:08<2:07:40, 3.73it/s] 92%|█████████▏| 342913/371472 [5:07:08<2:12:46, 3.58it/s] 92%|█████████▏| 342914/371472 [5:07:09<2:14:45, 3.53it/s] 92%|█████████▏| 342915/371472 [5:07:09<2:12:21, 3.60it/s] 92%|█████████▏| 342916/371472 [5:07:09<2:06:59, 3.75it/s] 92%|█████████▏| 342917/371472 [5:07:09<2:14:43, 3.53it/s] 92%|█████████▏| 342918/371472 [5:07:10<2:11:01, 3.63it/s] 92%|█████████▏| 342919/371472 [5:07:10<2:13:07, 3.57it/s] 92%|█████████▏| 342920/371472 [5:07:10<2:08:02, 3.72it/s] {'loss': 2.6291, 'learning_rate': 1.6921286819366934e-07, 'epoch': 14.77} + 92%|█████████▏| 342920/371472 [5:07:10<2:08:02, 3.72it/s] 92%|█████████▏| 342921/371472 [5:07:10<2:06:47, 3.75it/s] 92%|█████████▏| 342922/371472 [5:07:11<2:06:02, 3.78it/s] 92%|█████████▏| 342923/371472 [5:07:11<2:07:14, 3.74it/s] 92%|█████████▏| 342924/371472 [5:07:11<2:11:32, 3.62it/s] 92%|█████████▏| 342925/371472 [5:07:12<2:13:51, 3.55it/s] 92%|█████████▏| 342926/371472 [5:07:12<2:19:07, 3.42it/s] 92%|█████████▏| 342927/371472 [5:07:12<2:21:02, 3.37it/s] 92%|█████████▏| 342928/371472 [5:07:13<2:45:24, 2.88it/s] 92%|█████████▏| 342929/371472 [5:07:13<2:32:35, 3.12it/s] 92%|█████████▏| 342930/371472 [5:07:13<2:21:22, 3.36it/s] 92%|█████████▏| 342931/371472 [5:07:13<2:14:49, 3.53it/s] 92%|█████████▏| 342932/371472 [5:07:14<2:16:48, 3.48it/s] 92%|█████████▏| 342933/371472 [5:07:14<2:15:58, 3.50it/s] 92%|█████████▏| 342934/371472 [5:07:14<2:14:04, 3.55it/s] 92%|█████████▏| 342935/371472 [5:07:15<2:12:20, 3.59it/s] 92%|█████████▏| 342936/371472 [5:07:15<2:10:45, 3.64it/s] 92%|█████████▏| 342937/371472 [5:07:15<2:14:52, 3.53it/s] 92%|█████████▏| 342938/371472 [5:07:15<2:15:26, 3.51it/s] 92%|█████████▏| 342939/371472 [5:07:16<2:18:02, 3.44it/s] 92%|█████████▏| 342940/371472 [5:07:16<2:18:14, 3.44it/s] {'loss': 2.56, 'learning_rate': 1.6916438621819038e-07, 'epoch': 14.77} + 92%|█████████▏| 342940/371472 [5:07:16<2:18:14, 3.44it/s] 92%|█████████▏| 342941/371472 [5:07:16<2:16:13, 3.49it/s] 92%|█████████▏| 342942/371472 [5:07:17<2:10:49, 3.63it/s] 92%|█████████▏| 342943/371472 [5:07:17<2:14:19, 3.54it/s] 92%|█████████▏| 342944/371472 [5:07:17<2:13:19, 3.57it/s] 92%|█████████▏| 342945/371472 [5:07:17<2:14:15, 3.54it/s] 92%|█████████▏| 342946/371472 [5:07:18<2:12:48, 3.58it/s] 92%|█████████▏| 342947/371472 [5:07:18<2:10:07, 3.65it/s] 92%|█████████▏| 342948/371472 [5:07:18<2:10:21, 3.65it/s] 92%|█████████▏| 342949/371472 [5:07:18<2:08:43, 3.69it/s] 92%|█████████▏| 342950/371472 [5:07:19<2:12:05, 3.60it/s] 92%|█████████▏| 342951/371472 [5:07:19<2:35:17, 3.06it/s] 92%|█████████▏| 342952/371472 [5:07:20<2:36:12, 3.04it/s] 92%|█████████▏| 342953/371472 [5:07:20<2:30:33, 3.16it/s] 92%|█████████▏| 342954/371472 [5:07:20<2:24:28, 3.29it/s] 92%|█████████▏| 342955/371472 [5:07:20<2:21:12, 3.37it/s] 92%|█████████▏| 342956/371472 [5:07:21<2:14:41, 3.53it/s] 92%|█████████▏| 342957/371472 [5:07:21<2:16:09, 3.49it/s] 92%|█████████▏| 342958/371472 [5:07:21<2:13:59, 3.55it/s] 92%|█████████▏| 342959/371472 [5:07:21<2:10:37, 3.64it/s] 92%|█████████▏| 342960/371472 [5:07:22<2:10:53, 3.63it/s] {'loss': 2.5937, 'learning_rate': 1.6911590424271156e-07, 'epoch': 14.77} + 92%|█████████▏| 342960/371472 [5:07:22<2:10:53, 3.63it/s] 92%|█████████▏| 342961/371472 [5:07:22<2:06:54, 3.74it/s] 92%|█████████▏| 342962/371472 [5:07:22<2:08:26, 3.70it/s] 92%|█████████▏| 342963/371472 [5:07:22<2:06:36, 3.75it/s] 92%|█████████▏| 342964/371472 [5:07:23<2:13:23, 3.56it/s] 92%|█████████▏| 342965/371472 [5:07:23<2:16:07, 3.49it/s] 92%|█████████▏| 342966/371472 [5:07:23<2:14:57, 3.52it/s] 92%|█████████▏| 342967/371472 [5:07:24<2:14:02, 3.54it/s] 92%|█████████▏| 342968/371472 [5:07:24<2:12:53, 3.57it/s] 92%|█████████▏| 342969/371472 [5:07:24<2:13:54, 3.55it/s] 92%|█████████▏| 342970/371472 [5:07:25<2:20:01, 3.39it/s] 92%|█████████▏| 342971/371472 [5:07:25<2:13:53, 3.55it/s] 92%|█████████▏| 342972/371472 [5:07:25<2:09:23, 3.67it/s] 92%|█████████▏| 342973/371472 [5:07:25<2:13:45, 3.55it/s] 92%|█████████▏| 342974/371472 [5:07:26<2:18:27, 3.43it/s] 92%|█████████▏| 342975/371472 [5:07:26<2:19:59, 3.39it/s] 92%|█████████▏| 342976/371472 [5:07:26<2:19:05, 3.41it/s] 92%|█████████▏| 342977/371472 [5:07:27<2:17:11, 3.46it/s] 92%|█████████▏| 342978/371472 [5:07:27<2:11:40, 3.61it/s] 92%|█████████▏| 342979/371472 [5:07:27<2:10:32, 3.64it/s] 92%|█████████▏| 342980/371472 [5:07:27<2:07:45, 3.72it/s] {'loss': 2.4951, 'learning_rate': 1.690674222672326e-07, 'epoch': 14.77} + 92%|█████████▏| 342980/371472 [5:07:27<2:07:45, 3.72it/s] 92%|█████████▏| 342981/371472 [5:07:28<2:09:05, 3.68it/s] 92%|█████████▏| 342982/371472 [5:07:28<2:08:00, 3.71it/s] 92%|█████████▏| 342983/371472 [5:07:28<2:06:30, 3.75it/s] 92%|█████████▏| 342984/371472 [5:07:28<2:05:15, 3.79it/s] 92%|█████████▏| 342985/371472 [5:07:29<2:05:44, 3.78it/s] 92%|█████████▏| 342986/371472 [5:07:29<2:04:53, 3.80it/s] 92%|█████████▏| 342987/371472 [5:07:29<2:08:03, 3.71it/s] 92%|█████████▏| 342988/371472 [5:07:29<2:04:03, 3.83it/s] 92%|█████████▏| 342989/371472 [5:07:30<2:05:45, 3.77it/s] 92%|█████████▏| 342990/371472 [5:07:30<2:12:30, 3.58it/s] 92%|█████████▏| 342991/371472 [5:07:30<2:13:25, 3.56it/s] 92%|█████████▏| 342992/371472 [5:07:31<2:26:37, 3.24it/s] 92%|█████████▏| 342993/371472 [5:07:31<2:22:56, 3.32it/s] 92%|█████████▏| 342994/371472 [5:07:31<2:24:33, 3.28it/s] 92%|█████████▏| 342995/371472 [5:07:32<2:21:53, 3.34it/s] 92%|█████████▏| 342996/371472 [5:07:32<2:18:27, 3.43it/s] 92%|█████████▏| 342997/371472 [5:07:32<2:15:32, 3.50it/s] 92%|█████████▏| 342998/371472 [5:07:32<2:16:50, 3.47it/s] 92%|█████████▏| 342999/371472 [5:07:33<2:22:32, 3.33it/s] 92%|█████████▏| 343000/371472 [5:07:33<2:19:00, 3.41it/s] {'loss': 2.5811, 'learning_rate': 1.6901894029175376e-07, 'epoch': 14.77} + 92%|█████████▏| 343000/371472 [5:07:33<2:19:00, 3.41it/s] 92%|█████████▏| 343001/371472 [5:07:33<2:15:51, 3.49it/s] 92%|█████████▏| 343002/371472 [5:07:34<2:24:18, 3.29it/s] 92%|█████████▏| 343003/371472 [5:07:34<2:17:55, 3.44it/s] 92%|█████████▏| 343004/371472 [5:07:34<2:27:26, 3.22it/s] 92%|█████████▏| 343005/371472 [5:07:35<2:22:53, 3.32it/s] 92%|█████████▏| 343006/371472 [5:07:35<2:19:49, 3.39it/s] 92%|█████████▏| 343007/371472 [5:07:35<2:15:11, 3.51it/s] 92%|█████████▏| 343008/371472 [5:07:35<2:17:39, 3.45it/s] 92%|█████████▏| 343009/371472 [5:07:36<2:17:35, 3.45it/s] 92%|█████████▏| 343010/371472 [5:07:36<2:13:38, 3.55it/s] 92%|█████████▏| 343011/371472 [5:07:36<2:17:35, 3.45it/s] 92%|█████████▏| 343012/371472 [5:07:36<2:14:04, 3.54it/s] 92%|█████████▏| 343013/371472 [5:07:37<2:22:38, 3.33it/s] 92%|█████████▏| 343014/371472 [5:07:37<2:21:43, 3.35it/s] 92%|█████████▏| 343015/371472 [5:07:37<2:19:00, 3.41it/s] 92%|█████████▏| 343016/371472 [5:07:38<2:14:12, 3.53it/s] 92%|█████████▏| 343017/371472 [5:07:38<2:16:09, 3.48it/s] 92%|█████████▏| 343018/371472 [5:07:38<2:23:25, 3.31it/s] 92%|█████████▏| 343019/371472 [5:07:39<2:17:16, 3.45it/s] 92%|█████████▏| 343020/371472 [5:07:39<2:19:09, 3.41it/s] {'loss': 2.611, 'learning_rate': 1.6897045831627483e-07, 'epoch': 14.77} + 92%|█████████▏| 343020/371472 [5:07:39<2:19:09, 3.41it/s] 92%|█████████▏| 343021/371472 [5:07:39<2:14:30, 3.53it/s] 92%|█████████▏| 343022/371472 [5:07:39<2:16:24, 3.48it/s] 92%|█████████▏| 343023/371472 [5:07:40<2:11:13, 3.61it/s] 92%|█████████▏| 343024/371472 [5:07:40<2:06:09, 3.76it/s] 92%|█████████▏| 343025/371472 [5:07:40<2:05:28, 3.78it/s] 92%|█████████▏| 343026/371472 [5:07:40<2:06:06, 3.76it/s] 92%|█████████▏| 343027/371472 [5:07:41<2:08:23, 3.69it/s] 92%|█████████▏| 343028/371472 [5:07:41<2:05:24, 3.78it/s] 92%|█████████▏| 343029/371472 [5:07:41<2:05:15, 3.78it/s] 92%|█████████▏| 343030/371472 [5:07:41<2:04:24, 3.81it/s] 92%|█████████▏| 343031/371472 [5:07:42<2:00:47, 3.92it/s] 92%|█████████▏| 343032/371472 [5:07:42<2:08:41, 3.68it/s] 92%|█████████▏| 343033/371472 [5:07:42<2:10:06, 3.64it/s] 92%|█████████▏| 343034/371472 [5:07:43<2:10:51, 3.62it/s] 92%|█████████▏| 343035/371472 [5:07:43<2:11:17, 3.61it/s] 92%|█████████▏| 343036/371472 [5:07:43<2:10:20, 3.64it/s] 92%|█████████▏| 343037/371472 [5:07:43<2:08:08, 3.70it/s] 92%|█████████▏| 343038/371472 [5:07:44<2:07:25, 3.72it/s] 92%|█████████▏| 343039/371472 [5:07:44<2:08:13, 3.70it/s] 92%|█████████▏| 343040/371472 [5:07:44<2:14:46, 3.52it/s] {'loss': 2.6426, 'learning_rate': 1.6892197634079598e-07, 'epoch': 14.78} + 92%|█████████▏| 343040/371472 [5:07:44<2:14:46, 3.52it/s] 92%|█████████▏| 343041/371472 [5:07:45<2:13:14, 3.56it/s] 92%|█████████▏| 343042/371472 [5:07:45<2:22:04, 3.34it/s] 92%|█████████▏| 343043/371472 [5:07:45<2:23:34, 3.30it/s] 92%|█████████▏| 343044/371472 [5:07:45<2:21:10, 3.36it/s] 92%|█████████▏| 343045/371472 [5:07:46<2:13:34, 3.55it/s] 92%|█████████▏| 343046/371472 [5:07:46<2:18:53, 3.41it/s] 92%|█████████▏| 343047/371472 [5:07:46<2:26:15, 3.24it/s] 92%|█████████▏| 343048/371472 [5:07:47<2:16:45, 3.46it/s] 92%|█████████▏| 343049/371472 [5:07:47<2:10:26, 3.63it/s] 92%|█████████▏| 343050/371472 [5:07:47<2:14:19, 3.53it/s] 92%|█████████▏| 343051/371472 [5:07:47<2:11:32, 3.60it/s] 92%|█████████▏| 343052/371472 [5:07:48<2:06:49, 3.73it/s] 92%|█████████▏| 343053/371472 [5:07:48<2:07:12, 3.72it/s] 92%|█████████▏| 343054/371472 [5:07:48<2:15:31, 3.49it/s] 92%|█████████▏| 343055/371472 [5:07:49<2:21:54, 3.34it/s] 92%|█████████▏| 343056/371472 [5:07:49<2:16:40, 3.47it/s] 92%|█████████▏| 343057/371472 [5:07:49<2:25:53, 3.25it/s] 92%|█████████▏| 343058/371472 [5:07:49<2:18:42, 3.41it/s] 92%|█████████▏| 343059/371472 [5:07:50<2:13:30, 3.55it/s] 92%|█████████▏| 343060/371472 [5:07:50<2:16:42, 3.46it/s] {'loss': 2.5223, 'learning_rate': 1.6887349436531702e-07, 'epoch': 14.78} + 92%|█████████▏| 343060/371472 [5:07:50<2:16:42, 3.46it/s] 92%|█████████▏| 343061/371472 [5:07:50<2:17:22, 3.45it/s] 92%|█████████▏| 343062/371472 [5:07:51<2:13:39, 3.54it/s] 92%|█████████▏| 343063/371472 [5:07:51<2:11:39, 3.60it/s] 92%|█████████▏| 343064/371472 [5:07:51<2:10:20, 3.63it/s] 92%|█████████▏| 343065/371472 [5:07:51<2:13:26, 3.55it/s] 92%|████████���▏| 343066/371472 [5:07:52<2:09:51, 3.65it/s] 92%|█████████▏| 343067/371472 [5:07:52<2:09:13, 3.66it/s] 92%|█████████▏| 343068/371472 [5:07:52<2:07:23, 3.72it/s] 92%|█████████▏| 343069/371472 [5:07:53<2:12:52, 3.56it/s] 92%|█████████▏| 343070/371472 [5:07:53<2:08:50, 3.67it/s] 92%|█████████▏| 343071/371472 [5:07:53<2:18:14, 3.42it/s] 92%|█████████▏| 343072/371472 [5:07:53<2:16:03, 3.48it/s] 92%|█████████▏| 343073/371472 [5:07:54<2:15:54, 3.48it/s] 92%|█████████▏| 343074/371472 [5:07:54<2:16:02, 3.48it/s] 92%|█████████▏| 343075/371472 [5:07:54<2:13:39, 3.54it/s] 92%|█████████▏| 343076/371472 [5:07:55<2:12:12, 3.58it/s] 92%|█████████▏| 343077/371472 [5:07:55<2:08:48, 3.67it/s] 92%|█████████▏| 343078/371472 [5:07:55<2:07:32, 3.71it/s] 92%|█████████▏| 343079/371472 [5:07:55<2:13:39, 3.54it/s] 92%|█████████▏| 343080/371472 [5:07:56<2:17:53, 3.43it/s] {'loss': 2.7211, 'learning_rate': 1.688250123898382e-07, 'epoch': 14.78} + 92%|█████████▏| 343080/371472 [5:07:56<2:17:53, 3.43it/s] 92%|█████████▏| 343081/371472 [5:07:56<2:19:07, 3.40it/s] 92%|█████████▏| 343082/371472 [5:07:56<2:19:10, 3.40it/s] 92%|█████████▏| 343083/371472 [5:07:57<2:17:10, 3.45it/s] 92%|█████████▏| 343084/371472 [5:07:57<2:16:05, 3.48it/s] 92%|█████████▏| 343085/371472 [5:07:57<2:11:16, 3.60it/s] 92%|█████████▏| 343086/371472 [5:07:57<2:06:57, 3.73it/s] 92%|█████████▏| 343087/371472 [5:07:58<2:08:49, 3.67it/s] 92%|█████████▏| 343088/371472 [5:07:58<2:04:39, 3.79it/s] 92%|█████████▏| 343089/371472 [5:07:58<2:08:58, 3.67it/s] 92%|█████████▏| 343090/371472 [5:07:58<2:07:22, 3.71it/s] 92%|█████████▏| 343091/371472 [5:07:59<2:09:54, 3.64it/s] 92%|█████████▏| 343092/371472 [5:07:59<2:08:07, 3.69it/s] 92%|█████████▏| 343093/371472 [5:07:59<2:06:40, 3.73it/s] 92%|█████████▏| 343094/371472 [5:07:59<2:03:24, 3.83it/s] 92%|█████████▏| 343095/371472 [5:08:00<2:08:52, 3.67it/s] 92%|█████████▏| 343096/371472 [5:08:00<2:09:31, 3.65it/s] 92%|█████████▏| 343097/371472 [5:08:00<2:23:04, 3.31it/s] 92%|█████████▏| 343098/371472 [5:08:01<2:20:41, 3.36it/s] 92%|█████████▏| 343099/371472 [5:08:01<2:13:53, 3.53it/s] 92%|█████████▏| 343100/371472 [5:08:01<2:08:08, 3.69it/s] {'loss': 2.5406, 'learning_rate': 1.6877653041435927e-07, 'epoch': 14.78} + 92%|█████████▏| 343100/371472 [5:08:01<2:08:08, 3.69it/s] 92%|█████████▏| 343101/371472 [5:08:01<2:05:51, 3.76it/s] 92%|█████████▏| 343102/371472 [5:08:02<2:08:35, 3.68it/s] 92%|█████████▏| 343103/371472 [5:08:02<2:06:09, 3.75it/s] 92%|█████████▏| 343104/371472 [5:08:02<2:05:40, 3.76it/s] 92%|█████████▏| 343105/371472 [5:08:03<2:09:03, 3.66it/s] 92%|█████████▏| 343106/371472 [5:08:03<2:21:49, 3.33it/s] 92%|█████████▏| 343107/371472 [5:08:03<2:23:03, 3.30it/s] 92%|█████████▏| 343108/371472 [5:08:03<2:18:57, 3.40it/s] 92%|█████████▏| 343109/371472 [5:08:04<2:12:19, 3.57it/s] 92%|█████████▏| 343110/371472 [5:08:04<2:18:15, 3.42it/s] 92%|█████████▏| 343111/371472 [5:08:04<2:12:41, 3.56it/s] 92%|█████████▏| 343112/371472 [5:08:05<2:12:09, 3.58it/s] 92%|█████████▏| 343113/371472 [5:08:05<2:12:53, 3.56it/s] 92%|█████████▏| 343114/371472 [5:08:05<2:19:40, 3.38it/s] 92%|█████████▏| 343115/371472 [5:08:06<2:24:47, 3.26it/s] 92%|█████████▏| 343116/371472 [5:08:06<2:21:01, 3.35it/s] 92%|█████████▏| 343117/371472 [5:08:06<2:19:36, 3.39it/s] 92%|█████████▏| 343118/371472 [5:08:06<2:11:31, 3.59it/s] 92%|█████████▏| 343119/371472 [5:08:07<2:08:11, 3.69it/s] 92%|█████████▏| 343120/371472 [5:08:07<2:09:08, 3.66it/s] {'loss': 2.5753, 'learning_rate': 1.687280484388804e-07, 'epoch': 14.78} + 92%|█████████▏| 343120/371472 [5:08:07<2:09:08, 3.66it/s] 92%|█████████▏| 343121/371472 [5:08:07<2:09:01, 3.66it/s] 92%|██████���██▏| 343122/371472 [5:08:07<2:11:10, 3.60it/s] 92%|█████████▏| 343123/371472 [5:08:08<2:12:42, 3.56it/s] 92%|█████████▏| 343124/371472 [5:08:08<2:09:21, 3.65it/s] 92%|█████████▏| 343125/371472 [5:08:08<2:14:11, 3.52it/s] 92%|█████████▏| 343126/371472 [5:08:09<2:13:57, 3.53it/s] 92%|█████████▏| 343127/371472 [5:08:09<2:10:34, 3.62it/s] 92%|█████████▏| 343128/371472 [5:08:09<2:17:45, 3.43it/s] 92%|█████████▏| 343129/371472 [5:08:09<2:13:32, 3.54it/s] 92%|█████████▏| 343130/371472 [5:08:10<2:13:07, 3.55it/s] 92%|█████████▏| 343131/371472 [5:08:10<2:15:58, 3.47it/s] 92%|█████████▏| 343132/371472 [5:08:10<2:11:20, 3.60it/s] 92%|█████████▏| 343133/371472 [5:08:11<2:11:59, 3.58it/s] 92%|█████████▏| 343134/371472 [5:08:11<2:13:02, 3.55it/s] 92%|█████████▏| 343135/371472 [5:08:11<2:08:42, 3.67it/s] 92%|█████████▏| 343136/371472 [5:08:11<2:04:36, 3.79it/s] 92%|█████████▏| 343137/371472 [5:08:12<2:05:06, 3.77it/s] 92%|█████████▏| 343138/371472 [5:08:12<2:09:06, 3.66it/s] 92%|█████████▏| 343139/371472 [5:08:12<2:21:12, 3.34it/s] 92%|█████████▏| 343140/371472 [5:08:12<2:11:06, 3.60it/s] {'loss': 2.7322, 'learning_rate': 1.6867956646340147e-07, 'epoch': 14.78} + 92%|█████████▏| 343140/371472 [5:08:12<2:11:06, 3.60it/s] 92%|█████████▏| 343141/371472 [5:08:13<2:06:22, 3.74it/s] 92%|█████████▏| 343142/371472 [5:08:13<2:02:14, 3.86it/s] 92%|█████████▏| 343143/371472 [5:08:13<2:14:30, 3.51it/s] 92%|█████████▏| 343144/371472 [5:08:14<2:11:47, 3.58it/s] 92%|█████████▏| 343145/371472 [5:08:14<2:09:41, 3.64it/s] 92%|█████████▏| 343146/371472 [5:08:14<2:10:00, 3.63it/s] 92%|█████████▏| 343147/371472 [5:08:14<2:12:16, 3.57it/s] 92%|█████████▏| 343148/371472 [5:08:15<2:11:01, 3.60it/s] 92%|█████████▏| 343149/371472 [5:08:15<2:09:41, 3.64it/s] 92%|█████████▏| 343150/371472 [5:08:15<2:14:07, 3.52it/s] 92%|█████████▏| 343151/371472 [5:08:16<2:13:29, 3.54it/s] 92%|█████████▏| 343152/371472 [5:08:16<2:14:34, 3.51it/s] 92%|█████████▏| 343153/371472 [5:08:16<2:14:07, 3.52it/s] 92%|█████████▏| 343154/371472 [5:08:16<2:12:32, 3.56it/s] 92%|█████████▏| 343155/371472 [5:08:17<2:23:28, 3.29it/s] 92%|█████████▏| 343156/371472 [5:08:17<2:16:55, 3.45it/s] 92%|█████████▏| 343157/371472 [5:08:17<2:15:34, 3.48it/s] 92%|█████████▏| 343158/371472 [5:08:17<2:08:55, 3.66it/s] 92%|█████████▏| 343159/371472 [5:08:18<2:14:19, 3.51it/s] 92%|█████████▏| 343160/371472 [5:08:18<2:10:45, 3.61it/s] {'loss': 2.7668, 'learning_rate': 1.6863108448792265e-07, 'epoch': 14.78} + 92%|█████████▏| 343160/371472 [5:08:18<2:10:45, 3.61it/s] 92%|█████████▏| 343161/371472 [5:08:18<2:17:55, 3.42it/s] 92%|█████████▏| 343162/371472 [5:08:19<2:25:01, 3.25it/s] 92%|█████████▏| 343163/371472 [5:08:19<2:15:30, 3.48it/s] 92%|█████████▏| 343164/371472 [5:08:19<2:10:16, 3.62it/s] 92%|█████████▏| 343165/371472 [5:08:20<2:12:20, 3.57it/s] 92%|█████████▏| 343166/371472 [5:08:20<2:10:06, 3.63it/s] 92%|█████████▏| 343167/371472 [5:08:20<2:12:50, 3.55it/s] 92%|█████████▏| 343168/371472 [5:08:20<2:08:59, 3.66it/s] 92%|█████████▏| 343169/371472 [5:08:21<2:14:00, 3.52it/s] 92%|█████████▏| 343170/371472 [5:08:21<2:13:38, 3.53it/s] 92%|█████████▏| 343171/371472 [5:08:21<2:13:39, 3.53it/s] 92%|█████████▏| 343172/371472 [5:08:21<2:12:57, 3.55it/s] 92%|█████████▏| 343173/371472 [5:08:22<2:19:33, 3.38it/s] 92%|█████████▏| 343174/371472 [5:08:22<2:18:31, 3.40it/s] 92%|█████████▏| 343175/371472 [5:08:22<2:25:35, 3.24it/s] 92%|█████████▏| 343176/371472 [5:08:23<2:28:07, 3.18it/s] 92%|█████████▏| 343177/371472 [5:08:23<2:19:52, 3.37it/s] 92%|█████████▏| 343178/371472 [5:08:23<2:19:30, 3.38it/s] 92%|█████████▏| 343179/371472 [5:08:24<2:12:37, 3.56it/s] 92%|█████████▏| 343180/371472 [5:08:24<2:09:39, 3.64it/s] {'loss': 2.5182, 'learning_rate': 1.6858260251244366e-07, 'epoch': 14.78} + 92%|█████████▏| 343180/371472 [5:08:24<2:09:39, 3.64it/s] 92%|█████████▏| 343181/371472 [5:08:24<2:16:51, 3.45it/s] 92%|█████████▏| 343182/371472 [5:08:24<2:12:14, 3.57it/s] 92%|█████████▏| 343183/371472 [5:08:25<2:19:15, 3.39it/s] 92%|█████████▏| 343184/371472 [5:08:25<2:13:48, 3.52it/s] 92%|█████████▏| 343185/371472 [5:08:25<2:06:00, 3.74it/s] 92%|█████████▏| 343186/371472 [5:08:26<2:10:02, 3.63it/s] 92%|█████████▏| 343187/371472 [5:08:26<2:16:34, 3.45it/s] 92%|█████████▏| 343188/371472 [5:08:26<2:27:24, 3.20it/s] 92%|█████████▏| 343189/371472 [5:08:26<2:23:21, 3.29it/s] 92%|█████████▏| 343190/371472 [5:08:27<2:18:43, 3.40it/s] 92%|█████████▏| 343191/371472 [5:08:27<2:15:40, 3.47it/s] 92%|█████████▏| 343192/371472 [5:08:27<2:29:21, 3.16it/s] 92%|█████████▏| 343193/371472 [5:08:28<2:23:54, 3.28it/s] 92%|█████████▏| 343194/371472 [5:08:28<2:17:28, 3.43it/s] 92%|█████████▏| 343195/371472 [5:08:28<2:10:58, 3.60it/s] 92%|█████████▏| 343196/371472 [5:08:28<2:06:40, 3.72it/s] 92%|█████████▏| 343197/371472 [5:08:29<2:05:31, 3.75it/s] 92%|█████████▏| 343198/371472 [5:08:29<2:03:41, 3.81it/s] 92%|█████████▏| 343199/371472 [5:08:29<1:59:55, 3.93it/s] 92%|█████████▏| 343200/371472 [5:08:30<2:16:06, 3.46it/s] {'loss': 2.6251, 'learning_rate': 1.6853412053696484e-07, 'epoch': 14.78} + 92%|█████████▏| 343200/371472 [5:08:30<2:16:06, 3.46it/s] 92%|█████████▏| 343201/371472 [5:08:30<2:10:10, 3.62it/s] 92%|█████████▏| 343202/371472 [5:08:30<2:13:04, 3.54it/s] 92%|█████████▏| 343203/371472 [5:08:30<2:11:04, 3.59it/s] 92%|█████████▏| 343204/371472 [5:08:31<2:13:54, 3.52it/s] 92%|█████████▏| 343205/371472 [5:08:31<2:07:40, 3.69it/s] 92%|█████████▏| 343206/371472 [5:08:31<2:11:59, 3.57it/s] 92%|█████████▏| 343207/371472 [5:08:32<2:13:07, 3.54it/s] 92%|█████████▏| 343208/371472 [5:08:32<2:14:55, 3.49it/s] 92%|█████████▏| 343209/371472 [5:08:32<2:14:44, 3.50it/s] 92%|█████████▏| 343210/371472 [5:08:32<2:09:02, 3.65it/s] 92%|█████████▏| 343211/371472 [5:08:33<2:11:38, 3.58it/s] 92%|█████████▏| 343212/371472 [5:08:33<2:09:58, 3.62it/s] 92%|█████████▏| 343213/371472 [5:08:33<2:09:46, 3.63it/s] 92%|█████████▏| 343214/371472 [5:08:33<2:08:48, 3.66it/s] 92%|█████████▏| 343215/371472 [5:08:34<2:14:12, 3.51it/s] 92%|█████████▏| 343216/371472 [5:08:34<2:13:37, 3.52it/s] 92%|█████████▏| 343217/371472 [5:08:34<2:20:32, 3.35it/s] 92%|█████████▏| 343218/371472 [5:08:35<2:17:07, 3.43it/s] 92%|█████████▏| 343219/371472 [5:08:35<2:11:24, 3.58it/s] 92%|█████████▏| 343220/371472 [5:08:35<2:20:54, 3.34it/s] {'loss': 2.6019, 'learning_rate': 1.6848563856148594e-07, 'epoch': 14.78} + 92%|█████████▏| 343220/371472 [5:08:35<2:20:54, 3.34it/s] 92%|█████████▏| 343221/371472 [5:08:35<2:15:49, 3.47it/s] 92%|█████████▏| 343222/371472 [5:08:36<2:14:30, 3.50it/s] 92%|█████████▏| 343223/371472 [5:08:36<2:17:35, 3.42it/s] 92%|█████████▏| 343224/371472 [5:08:36<2:15:45, 3.47it/s] 92%|█████████▏| 343225/371472 [5:08:37<2:10:41, 3.60it/s] 92%|█████████▏| 343226/371472 [5:08:37<2:10:18, 3.61it/s] 92%|█████████▏| 343227/371472 [5:08:37<2:06:41, 3.72it/s] 92%|█████████▏| 343228/371472 [5:08:37<2:04:46, 3.77it/s] 92%|█████████▏| 343229/371472 [5:08:38<2:11:03, 3.59it/s] 92%|█████████▏| 343230/371472 [5:08:38<2:15:42, 3.47it/s] 92%|█████████▏| 343231/371472 [5:08:38<2:14:53, 3.49it/s] 92%|█████████▏| 343232/371472 [5:08:39<2:13:37, 3.52it/s] 92%|█████████▏| 343233/371472 [5:08:39<2:23:04, 3.29it/s] 92%|█████████▏| 343234/371472 [5:08:39<2:18:20, 3.40it/s] 92%|█████████▏| 343235/371472 [5:08:40<2:30:57, 3.12it/s] 92%|█████████▏| 343236/371472 [5:08:40<2:22:34, 3.30it/s] 92%|█████████▏| 343237/371472 [5:08:40<2:16:30, 3.45it/s] 92%|█████████▏| 343238/371472 [5:08:40<2:18:04, 3.41it/s] 92%|█████████▏| 343239/371472 [5:08:41<2:15:43, 3.47it/s] 92%|█████████▏| 343240/371472 [5:08:41<2:13:43, 3.52it/s] {'loss': 2.7366, 'learning_rate': 1.6843715658600704e-07, 'epoch': 14.78} + 92%|█████████▏| 343240/371472 [5:08:41<2:13:43, 3.52it/s] 92%|█████████▏| 343241/371472 [5:08:41<2:13:49, 3.52it/s] 92%|█████████▏| 343242/371472 [5:08:42<2:10:25, 3.61it/s] 92%|█████████▏| 343243/371472 [5:08:42<2:10:54, 3.59it/s] 92%|█████████▏| 343244/371472 [5:08:42<2:07:36, 3.69it/s] 92%|█████████▏| 343245/371472 [5:08:42<2:14:19, 3.50it/s] 92%|█████████▏| 343246/371472 [5:08:43<2:10:48, 3.60it/s] 92%|█████████▏| 343247/371472 [5:08:43<2:06:02, 3.73it/s] 92%|█████████▏| 343248/371472 [5:08:43<2:08:27, 3.66it/s] 92%|█████████▏| 343249/371472 [5:08:43<2:07:19, 3.69it/s] 92%|█████████▏| 343250/371472 [5:08:44<2:08:27, 3.66it/s] 92%|█████████▏| 343251/371472 [5:08:44<2:05:22, 3.75it/s] 92%|█████████▏| 343252/371472 [5:08:44<2:08:04, 3.67it/s] 92%|█████████▏| 343253/371472 [5:08:45<2:08:06, 3.67it/s] 92%|█████████▏| 343254/371472 [5:08:45<2:06:16, 3.72it/s] 92%|█████████▏| 343255/371472 [5:08:45<2:05:36, 3.74it/s] 92%|█████████▏| 343256/371472 [5:08:45<2:15:49, 3.46it/s] 92%|█████████▏| 343257/371472 [5:08:46<2:13:12, 3.53it/s] 92%|█████████▏| 343258/371472 [5:08:46<2:12:55, 3.54it/s] 92%|█████████▏| 343259/371472 [5:08:46<2:14:24, 3.50it/s] 92%|█████████▏| 343260/371472 [5:08:47<2:15:18, 3.47it/s] {'loss': 2.5965, 'learning_rate': 1.683886746105281e-07, 'epoch': 14.78} + 92%|█████████▏| 343260/371472 [5:08:47<2:15:18, 3.47it/s] 92%|█████████▏| 343261/371472 [5:08:47<2:24:33, 3.25it/s] 92%|█████████▏| 343262/371472 [5:08:47<2:18:06, 3.40it/s] 92%|█████████▏| 343263/371472 [5:08:47<2:13:36, 3.52it/s] 92%|█████████▏| 343264/371472 [5:08:48<2:21:50, 3.31it/s] 92%|█████████▏| 343265/371472 [5:08:48<2:16:55, 3.43it/s] 92%|█████████▏| 343266/371472 [5:08:48<2:11:19, 3.58it/s] 92%|█████████▏| 343267/371472 [5:08:49<2:14:06, 3.51it/s] 92%|█████████▏| 343268/371472 [5:08:49<2:15:05, 3.48it/s] 92%|█████████▏| 343269/371472 [5:08:49<2:11:22, 3.58it/s] 92%|█████████▏| 343270/371472 [5:08:49<2:10:08, 3.61it/s] 92%|█████████▏| 343271/371472 [5:08:50<2:10:04, 3.61it/s] 92%|█████████▏| 343272/371472 [5:08:50<2:07:40, 3.68it/s] 92%|█████████▏| 343273/371472 [5:08:50<2:04:46, 3.77it/s] 92%|█████████▏| 343274/371472 [5:08:50<2:06:23, 3.72it/s] 92%|█████████▏| 343275/371472 [5:08:51<2:06:09, 3.72it/s] 92%|█████████▏| 343276/371472 [5:08:51<2:09:24, 3.63it/s] 92%|█████████▏| 343277/371472 [5:08:51<2:21:18, 3.33it/s] 92%|█████████▏| 343278/371472 [5:08:52<2:19:31, 3.37it/s] 92%|█████████▏| 343279/371472 [5:08:52<2:15:53, 3.46it/s] 92%|█████████▏| 343280/371472 [5:08:52<2:14:24, 3.50it/s] {'loss': 2.5121, 'learning_rate': 1.6834019263504929e-07, 'epoch': 14.79} + 92%|█████████▏| 343280/371472 [5:08:52<2:14:24, 3.50it/s] 92%|█████████▏| 343281/371472 [5:08:53<2:20:17, 3.35it/s] 92%|█████████▏| 343282/371472 [5:08:53<2:24:48, 3.24it/s] 92%|█████████▏| 343283/371472 [5:08:53<2:37:34, 2.98it/s] 92%|█████████▏| 343284/371472 [5:08:53<2:23:38, 3.27it/s] 92%|█████████▏| 343285/371472 [5:08:54<2:28:31, 3.16it/s] 92%|█████████▏| 343286/371472 [5:08:54<2:18:27, 3.39it/s] 92%|█████████▏| 343287/371472 [5:08:54<2:16:56, 3.43it/s] 92%|█████████▏| 343288/371472 [5:08:55<2:22:41, 3.29it/s] 92%|█████████▏| 343289/371472 [5:08:55<2:13:15, 3.52it/s] 92%|█████████▏| 343290/371472 [5:08:55<2:13:18, 3.52it/s] 92%|█████████▏| 343291/371472 [5:08:55<2:11:33, 3.57it/s] 92%|█████████▏| 343292/371472 [5:08:56<2:08:39, 3.65it/s] 92%|█████████▏| 343293/371472 [5:08:56<2:11:18, 3.58it/s] 92%|█████████▏| 343294/371472 [5:08:56<2:10:38, 3.60it/s] 92%|█████████▏| 343295/371472 [5:08:57<2:11:22, 3.57it/s] 92%|█████████▏| 343296/371472 [5:08:57<2:09:33, 3.62it/s] 92%|█████████▏| 343297/371472 [5:08:57<2:13:57, 3.51it/s] 92%|█████████▏| 343298/371472 [5:08:57<2:19:24, 3.37it/s] 92%|█████████▏| 343299/371472 [5:08:58<2:17:21, 3.42it/s] 92%|█████████▏| 343300/371472 [5:08:58<2:14:00, 3.50it/s] {'loss': 2.5029, 'learning_rate': 1.6829171065957033e-07, 'epoch': 14.79} + 92%|█████████▏| 343300/371472 [5:08:58<2:14:00, 3.50it/s] 92%|█████████▏| 343301/371472 [5:08:58<2:16:11, 3.45it/s] 92%|█████████▏| 343302/371472 [5:08:59<2:19:29, 3.37it/s] 92%|█████████▏| 343303/371472 [5:08:59<2:21:28, 3.32it/s] 92%|█████████▏| 343304/371472 [5:08:59<2:15:32, 3.46it/s] 92%|█████████▏| 343305/371472 [5:09:00<2:19:14, 3.37it/s] 92%|█████████▏| 343306/371472 [5:09:00<2:14:32, 3.49it/s] 92%|█████████▏| 343307/371472 [5:09:00<2:11:40, 3.57it/s] 92%|█████████▏| 343308/371472 [5:09:00<2:12:53, 3.53it/s] 92%|█████████▏| 343309/371472 [5:09:01<2:11:58, 3.56it/s] 92%|█████████▏| 343310/371472 [5:09:01<2:11:50, 3.56it/s] 92%|█████████▏| 343311/371472 [5:09:01<2:07:02, 3.69it/s] 92%|█████████▏| 343312/371472 [5:09:01<2:05:24, 3.74it/s] 92%|█████████▏| 343313/371472 [5:09:02<2:13:19, 3.52it/s] 92%|█████████▏| 343314/371472 [5:09:02<2:12:25, 3.54it/s] 92%|█████████▏| 343315/371472 [5:09:02<2:08:28, 3.65it/s] 92%|█████████▏| 343316/371472 [5:09:03<2:09:29, 3.62it/s] 92%|█████████▏| 343317/371472 [5:09:03<2:08:52, 3.64it/s] 92%|█████████▏| 343318/371472 [5:09:03<2:22:27, 3.29it/s] 92%|█████████▏| 343319/371472 [5:09:03<2:19:27, 3.36it/s] 92%|█████████▏| 343320/371472 [5:09:04<2:19:00, 3.38it/s] {'loss': 2.5728, 'learning_rate': 1.6824322868409148e-07, 'epoch': 14.79} + 92%|█████████▏| 343320/371472 [5:09:04<2:19:00, 3.38it/s] 92%|█████████▏| 343321/371472 [5:09:04<2:20:35, 3.34it/s] 92%|█████████▏| 343322/371472 [5:09:04<2:15:52, 3.45it/s] 92%|█████████▏| 343323/371472 [5:09:05<2:20:40, 3.33it/s] 92%|█████████▏| 343324/371472 [5:09:05<2:19:27, 3.36it/s] 92%|█████████▏| 343325/371472 [5:09:05<2:14:20, 3.49it/s] 92%|█████████▏| 343326/371472 [5:09:06<2:15:15, 3.47it/s] 92%|█████████▏| 343327/371472 [5:09:06<2:13:21, 3.52it/s] 92%|█████████▏| 343328/371472 [5:09:06<2:13:15, 3.52it/s] 92%|█████████▏| 343329/371472 [5:09:06<2:08:37, 3.65it/s] 92%|█████████▏| 343330/371472 [5:09:07<2:10:24, 3.60it/s] 92%|█████████▏| 343331/371472 [5:09:07<2:09:42, 3.62it/s] 92%|█████████▏| 343332/371472 [5:09:07<2:11:57, 3.55it/s] 92%|█████████▏| 343333/371472 [5:09:07<2:14:37, 3.48it/s] 92%|█████████▏| 343334/371472 [5:09:08<2:09:20, 3.63it/s] 92%|█████████▏| 343335/371472 [5:09:08<2:09:41, 3.62it/s] 92%|█████████▏| 343336/371472 [5:09:08<2:10:41, 3.59it/s] 92%|█████████▏| 343337/371472 [5:09:09<2:21:27, 3.31it/s] 92%|█████████▏| 343338/371472 [5:09:09<2:16:36, 3.43it/s] 92%|█████████▏| 343339/371472 [5:09:09<2:10:20, 3.60it/s] 92%|█████████▏| 343340/371472 [5:09:09<2:17:36, 3.41it/s] {'loss': 2.4926, 'learning_rate': 1.6819474670861255e-07, 'epoch': 14.79} + 92%|█████████▏| 343340/371472 [5:09:09<2:17:36, 3.41it/s] 92%|█████████▏| 343341/371472 [5:09:10<2:15:16, 3.47it/s] 92%|█████████▏| 343342/371472 [5:09:10<2:12:12, 3.55it/s] 92%|█████████▏| 343343/371472 [5:09:10<2:10:20, 3.60it/s] 92%|█████████▏| 343344/371472 [5:09:11<2:08:24, 3.65it/s] 92%|█████████▏| 343345/371472 [5:09:11<2:07:29, 3.68it/s] 92%|█████████▏| 343346/371472 [5:09:11<2:06:28, 3.71it/s] 92%|█████████▏| 343347/371472 [5:09:11<2:08:56, 3.64it/s] 92%|█████████▏| 343348/371472 [5:09:12<2:09:11, 3.63it/s] 92%|█████████▏| 343349/371472 [5:09:12<2:11:04, 3.58it/s] 92%|█████████▏| 343350/371472 [5:09:12<2:14:25, 3.49it/s] 92%|█████████▏| 343351/371472 [5:09:13<2:09:41, 3.61it/s] 92%|█████████▏| 343352/371472 [5:09:13<2:07:41, 3.67it/s] 92%|█████████▏| 343353/371472 [5:09:13<2:05:50, 3.72it/s] 92%|█████████▏| 343354/371472 [5:09:13<2:01:27, 3.86it/s] 92%|█████████▏| 343355/371472 [5:09:14<2:04:20, 3.77it/s] 92%|█████████▏| 343356/371472 [5:09:14<2:12:59, 3.52it/s] 92%|█████████▏| 343357/371472 [5:09:14<2:09:37, 3.61it/s] 92%|█████████▏| 343358/371472 [5:09:14<2:14:05, 3.49it/s] 92%|█████████▏| 343359/371472 [5:09:15<2:20:28, 3.34it/s] 92%|█████████▏| 343360/371472 [5:09:15<2:31:22, 3.10it/s] {'loss': 2.6545, 'learning_rate': 1.681462647331336e-07, 'epoch': 14.79} + 92%|█████████▏| 343360/371472 [5:09:15<2:31:22, 3.10it/s] 92%|█████████▏| 343361/371472 [5:09:15<2:29:37, 3.13it/s] 92%|█████████▏| 343362/371472 [5:09:16<2:20:53, 3.33it/s] 92%|█████████▏| 343363/371472 [5:09:16<2:15:40, 3.45it/s] 92%|█████████▏| 343364/371472 [5:09:16<2:21:18, 3.32it/s] 92%|█████████▏| 343365/371472 [5:09:17<2:21:06, 3.32it/s] 92%|█████████▏| 343366/371472 [5:09:17<2:21:06, 3.32it/s] 92%|█████████▏| 343367/371472 [5:09:17<2:16:57, 3.42it/s] 92%|█████████▏| 343368/371472 [5:09:17<2:16:53, 3.42it/s] 92%|█████████▏| 343369/371472 [5:09:18<2:13:53, 3.50it/s] 92%|█████████▏| 343370/371472 [5:09:18<2:20:55, 3.32it/s] 92%|█████████▏| 343371/371472 [5:09:18<2:13:58, 3.50it/s] 92%|█████████▏| 343372/371472 [5:09:19<2:10:06, 3.60it/s] 92%|█████████▏| 343373/371472 [5:09:19<2:19:19, 3.36it/s] 92%|█████████▏| 343374/371472 [5:09:19<2:14:41, 3.48it/s] 92%|█████████▏| 343375/371472 [5:09:19<2:10:44, 3.58it/s] 92%|█████████▏| 343376/371472 [5:09:20<2:06:38, 3.70it/s] 92%|█████████▏| 343377/371472 [5:09:20<2:06:54, 3.69it/s] 92%|█████████▏| 343378/371472 [5:09:20<2:06:41, 3.70it/s] 92%|█████████▏| 343379/371472 [5:09:21<2:07:34, 3.67it/s] 92%|█████████▏| 343380/371472 [5:09:21<2:15:58, 3.44it/s] {'loss': 2.663, 'learning_rate': 1.6809778275765475e-07, 'epoch': 14.79} + 92%|█████████▏| 343380/371472 [5:09:21<2:15:58, 3.44it/s] 92%|█████████▏| 343381/371472 [5:09:21<2:15:16, 3.46it/s] 92%|█████████▏| 343382/371472 [5:09:21<2:10:33, 3.59it/s] 92%|█████████▏| 343383/371472 [5:09:22<2:12:25, 3.54it/s] 92%|█████████▏| 343384/371472 [5:09:22<2:16:28, 3.43it/s] 92%|█████████▏| 343385/371472 [5:09:22<2:13:15, 3.51it/s] 92%|█████████▏| 343386/371472 [5:09:23<2:11:47, 3.55it/s] 92%|█████████▏| 343387/371472 [5:09:23<2:17:15, 3.41it/s] 92%|█████████▏| 343388/371472 [5:09:23<2:15:10, 3.46it/s] 92%|█████████▏| 343389/371472 [5:09:23<2:14:01, 3.49it/s] 92%|█████████▏| 343390/371472 [5:09:24<2:08:12, 3.65it/s] 92%|█████████▏| 343391/371472 [5:09:24<2:06:34, 3.70it/s] 92%|█████████▏| 343392/371472 [5:09:24<2:04:58, 3.74it/s] 92%|█████████▏| 343393/371472 [5:09:24<2:06:03, 3.71it/s] 92%|█████████▏| 343394/371472 [5:09:25<2:04:48, 3.75it/s] 92%|█████████▏| 343395/371472 [5:09:25<2:06:53, 3.69it/s] 92%|█████████▏| 343396/371472 [5:09:25<2:03:19, 3.79it/s] 92%|█████████▏| 343397/371472 [5:09:26<2:10:38, 3.58it/s] 92%|█████████▏| 343398/371472 [5:09:26<2:22:04, 3.29it/s] 92%|█████████▏| 343399/371472 [5:09:26<2:21:37, 3.30it/s] 92%|█████████▏| 343400/371472 [5:09:26<2:15:49, 3.44it/s] {'loss': 2.7356, 'learning_rate': 1.6804930078217582e-07, 'epoch': 14.79} + 92%|█████████▏| 343400/371472 [5:09:26<2:15:49, 3.44it/s] 92%|█████████▏| 343401/371472 [5:09:27<2:12:49, 3.52it/s] 92%|█████████▏| 343402/371472 [5:09:27<2:13:43, 3.50it/s] 92%|█████████▏| 343403/371472 [5:09:27<2:14:05, 3.49it/s] 92%|█████████▏| 343404/371472 [5:09:28<2:10:35, 3.58it/s] 92%|█████████▏| 343405/371472 [5:09:28<2:07:19, 3.67it/s] 92%|█████████▏| 343406/371472 [5:09:28<2:05:48, 3.72it/s] 92%|█████████▏| 343407/371472 [5:09:28<2:04:30, 3.76it/s] 92%|█████████▏| 343408/371472 [5:09:29<2:03:38, 3.78it/s] 92%|█████████▏| 343409/371472 [5:09:29<2:10:15, 3.59it/s] 92%|█████████▏| 343410/371472 [5:09:29<2:10:45, 3.58it/s] 92%|█████████▏| 343411/371472 [5:09:30<2:24:20, 3.24it/s] 92%|█████████▏| 343412/371472 [5:09:30<2:21:00, 3.32it/s] 92%|█████████▏| 343413/371472 [5:09:30<2:18:40, 3.37it/s] 92%|█████████▏| 343414/371472 [5:09:30<2:10:42, 3.58it/s] 92%|█████████▏| 343415/371472 [5:09:31<2:15:04, 3.46it/s] 92%|█████████▏| 343416/371472 [5:09:31<2:13:05, 3.51it/s] 92%|█████████▏| 343417/371472 [5:09:31<2:18:39, 3.37it/s] 92%|█████████▏| 343418/371472 [5:09:32<2:27:24, 3.17it/s] 92%|█████████▏| 343419/371472 [5:09:32<2:19:00, 3.36it/s] 92%|█████████▏| 343420/371472 [5:09:32<2:12:59, 3.52it/s] {'loss': 2.679, 'learning_rate': 1.6800081880669697e-07, 'epoch': 14.79} + 92%|█████████▏| 343420/371472 [5:09:32<2:12:59, 3.52it/s] 92%|█████████▏| 343421/371472 [5:09:33<2:24:28, 3.24it/s] 92%|█████████▏| 343422/371472 [5:09:33<2:17:53, 3.39it/s] 92%|█████████▏| 343423/371472 [5:09:33<2:14:14, 3.48it/s] 92%|█████████▏| 343424/371472 [5:09:33<2:09:19, 3.61it/s] 92%|█████████▏| 343425/371472 [5:09:34<2:18:58, 3.36it/s] 92%|█████████▏| 343426/371472 [5:09:34<2:11:31, 3.55it/s] 92%|█████████▏| 343427/371472 [5:09:34<2:09:15, 3.62it/s] 92%|█████████▏| 343428/371472 [5:09:35<2:17:24, 3.40it/s] 92%|█████████▏| 343429/371472 [5:09:35<2:15:17, 3.45it/s] 92%|█████████▏| 343430/371472 [5:09:35<2:21:46, 3.30it/s] 92%|█████████▏| 343431/371472 [5:09:35<2:28:11, 3.15it/s] 92%|█████████▏| 343432/371472 [5:09:36<2:19:16, 3.36it/s] 92%|█████████▏| 343433/371472 [5:09:36<2:19:16, 3.36it/s] 92%|█████████▏| 343434/371472 [5:09:36<2:19:12, 3.36it/s] 92%|█████████▏| 343435/371472 [5:09:37<2:13:50, 3.49it/s] 92%|█████████▏| 343436/371472 [5:09:37<2:06:23, 3.70it/s] 92%|█████████▏| 343437/371472 [5:09:37<2:20:51, 3.32it/s] 92%|█████████▏| 343438/371472 [5:09:37<2:11:24, 3.56it/s] 92%|█████████▏| 343439/371472 [5:09:38<2:15:00, 3.46it/s] 92%|█████████▏| 343440/371472 [5:09:38<2:08:58, 3.62it/s] {'loss': 2.6834, 'learning_rate': 1.6795233683121802e-07, 'epoch': 14.79} + 92%|█████████▏| 343440/371472 [5:09:38<2:08:58, 3.62it/s] 92%|█████████▏| 343441/371472 [5:09:38<2:24:45, 3.23it/s] 92%|█████████▏| 343442/371472 [5:09:39<2:28:30, 3.15it/s] 92%|█████████▏| 343443/371472 [5:09:39<2:32:01, 3.07it/s] 92%|█████████▏| 343444/371472 [5:09:39<2:32:17, 3.07it/s] 92%|█████████▏| 343445/371472 [5:09:40<2:27:42, 3.16it/s] 92%|█████████▏| 343446/371472 [5:09:40<2:17:05, 3.41it/s] 92%|█████████▏| 343447/371472 [5:09:40<2:18:03, 3.38it/s] 92%|█████████▏| 343448/371472 [5:09:40<2:09:58, 3.59it/s] 92%|█████████▏| 343449/371472 [5:09:41<2:12:06, 3.54it/s] 92%|█████████▏| 343450/371472 [5:09:41<2:17:04, 3.41it/s] 92%|█████████▏| 343451/371472 [5:09:41<2:17:56, 3.39it/s] 92%|█████████▏| 343452/371472 [5:09:42<2:15:08, 3.46it/s] 92%|█████████▏| 343453/371472 [5:09:42<2:20:05, 3.33it/s] 92%|█████████▏| 343454/371472 [5:09:42<2:18:32, 3.37it/s] 92%|█████████▏| 343455/371472 [5:09:43<2:15:33, 3.44it/s] 92%|█████████▏| 343456/371472 [5:09:43<2:14:55, 3.46it/s] 92%|█████████▏| 343457/371472 [5:09:43<2:10:15, 3.58it/s] 92%|█████████▏| 343458/371472 [5:09:43<2:09:06, 3.62it/s] 92%|█████████▏| 343459/371472 [5:09:44<2:07:36, 3.66it/s] 92%|█████████▏| 343460/371472 [5:09:44<2:05:24, 3.72it/s] {'loss': 2.5731, 'learning_rate': 1.679038548557392e-07, 'epoch': 14.79} + 92%|█████████▏| 343460/371472 [5:09:44<2:05:24, 3.72it/s] 92%|█████████▏| 343461/371472 [5:09:44<2:10:02, 3.59it/s] 92%|█████████▏| 343462/371472 [5:09:44<2:10:14, 3.58it/s] 92%|█████████▏| 343463/371472 [5:09:45<2:22:07, 3.28it/s] 92%|█████████▏| 343464/371472 [5:09:45<2:25:20, 3.21it/s] 92%|█████████▏| 343465/371472 [5:09:46<2:30:37, 3.10it/s] 92%|█████████▏| 343466/371472 [5:09:46<2:22:21, 3.28it/s] 92%|█████████▏| 343467/371472 [5:09:46<2:20:36, 3.32it/s] 92%|█████████▏| 343468/371472 [5:09:46<2:18:38, 3.37it/s] 92%|█████████▏| 343469/371472 [5:09:47<2:22:47, 3.27it/s] 92%|█████████▏| 343470/371472 [5:09:47<2:16:11, 3.43it/s] 92%|█████████▏| 343471/371472 [5:09:47<2:15:42, 3.44it/s] 92%|█████████▏| 343472/371472 [5:09:47<2:12:01, 3.53it/s] 92%|█████████▏| 343473/371472 [5:09:48<2:07:50, 3.65it/s] 92%|█████████▏| 343474/371472 [5:09:48<2:10:09, 3.59it/s] 92%|█████████▏| 343475/371472 [5:09:48<2:09:42, 3.60it/s] 92%|█████████▏| 343476/371472 [5:09:49<2:07:23, 3.66it/s] 92%|█████████▏| 343477/371472 [5:09:49<2:13:11, 3.50it/s] 92%|█████████▏| 343478/371472 [5:09:49<2:09:52, 3.59it/s] 92%|█████████▏| 343479/371472 [5:09:49<2:08:32, 3.63it/s] 92%|█████████▏| 343480/371472 [5:09:50<2:08:24, 3.63it/s] {'loss': 2.635, 'learning_rate': 1.6785537288026027e-07, 'epoch': 14.79} + 92%|█████████▏| 343480/371472 [5:09:50<2:08:24, 3.63it/s] 92%|█████████▏| 343481/371472 [5:09:50<2:08:13, 3.64it/s] 92%|█████████▏| 343482/371472 [5:09:50<2:04:05, 3.76it/s] 92%|█████████▏| 343483/371472 [5:09:50<2:05:31, 3.72it/s] 92%|█████████▏| 343484/371472 [5:09:51<2:04:35, 3.74it/s] 92%|█████████▏| 343485/371472 [5:09:51<2:02:24, 3.81it/s] 92%|█████████▏| 343486/371472 [5:09:51<2:00:29, 3.87it/s] 92%|█████████▏| 343487/371472 [5:09:52<2:06:33, 3.69it/s] 92%|█████████▏| 343488/371472 [5:09:52<2:05:09, 3.73it/s] 92%|█████████▏| 343489/371472 [5:09:52<2:09:14, 3.61it/s] 92%|█████████▏| 343490/371472 [5:09:52<2:12:57, 3.51it/s] 92%|█████████▏| 343491/371472 [5:09:53<2:10:24, 3.58it/s] 92%|█████████▏| 343492/371472 [5:09:53<2:08:13, 3.64it/s] 92%|█████████▏| 343493/371472 [5:09:53<2:06:49, 3.68it/s] 92%|█████████▏| 343494/371472 [5:09:54<2:10:34, 3.57it/s] 92%|█████████▏| 343495/371472 [5:09:54<2:17:45, 3.38it/s] 92%|█████████▏| 343496/371472 [5:09:54<2:11:41, 3.54it/s] 92%|█████████▏| 343497/371472 [5:09:54<2:12:00, 3.53it/s] 92%|█████████▏| 343498/371472 [5:09:55<2:12:53, 3.51it/s] 92%|█████████▏| 343499/371472 [5:09:55<2:10:42, 3.57it/s] 92%|█████████▏| 343500/371472 [5:09:55<2:22:30, 3.27it/s] {'loss': 2.6227, 'learning_rate': 1.678068909047814e-07, 'epoch': 14.8} + 92%|█████████▏| 343500/371472 [5:09:55<2:22:30, 3.27it/s] 92%|█████████▏| 343501/371472 [5:09:56<2:14:27, 3.47it/s] 92%|█████████▏| 343502/371472 [5:09:56<2:14:16, 3.47it/s] 92%|█████████▏| 343503/371472 [5:09:56<2:13:56, 3.48it/s] 92%|█████████▏| 343504/371472 [5:09:56<2:18:46, 3.36it/s] 92%|█████████▏| 343505/371472 [5:09:57<2:11:04, 3.56it/s] 92%|█████████▏| 343506/371472 [5:09:57<2:12:13, 3.52it/s] 92%|█████████▏| 343507/371472 [5:09:57<2:09:18, 3.60it/s] 92%|█████████▏| 343508/371472 [5:09:58<2:08:14, 3.63it/s] 92%|█████████▏| 343509/371472 [5:09:58<2:04:31, 3.74it/s] 92%|█████████▏| 343510/371472 [5:09:58<2:03:45, 3.77it/s] 92%|█████████▏| 343511/371472 [5:09:58<2:03:41, 3.77it/s] 92%|█████████▏| 343512/371472 [5:09:59<2:03:45, 3.77it/s] 92%|█████████▏| 343513/371472 [5:09:59<2:11:38, 3.54it/s] 92%|█████████▏| 343514/371472 [5:09:59<2:06:29, 3.68it/s] 92%|█████████▏| 343515/371472 [5:09:59<2:09:46, 3.59it/s] 92%|█████████▏| 343516/371472 [5:10:00<2:10:30, 3.57it/s] 92%|█████████▏| 343517/371472 [5:10:00<2:13:01, 3.50it/s] 92%|█████████▏| 343518/371472 [5:10:00<2:11:34, 3.54it/s] 92%|█████████▏| 343519/371472 [5:10:01<2:10:16, 3.58it/s] 92%|█████████▏| 343520/371472 [5:10:01<2:04:41, 3.74it/s] {'loss': 2.594, 'learning_rate': 1.6775840892930246e-07, 'epoch': 14.8} + 92%|█████████▏| 343520/371472 [5:10:01<2:04:41, 3.74it/s] 92%|█████████▏| 343521/371472 [5:10:01<2:01:31, 3.83it/s] 92%|█████████▏| 343522/371472 [5:10:01<2:06:25, 3.68it/s] 92%|█████████▏| 343523/371472 [5:10:02<2:08:33, 3.62it/s] 92%|█████████▏| 343524/371472 [5:10:02<2:16:14, 3.42it/s] 92%|█████████▏| 343525/371472 [5:10:02<2:11:49, 3.53it/s] 92%|█████████▏| 343526/371472 [5:10:02<2:09:38, 3.59it/s] 92%|█████████▏| 343527/371472 [5:10:03<2:07:45, 3.65it/s] 92%|█████████▏| 343528/371472 [5:10:03<2:08:49, 3.62it/s] 92%|█████████▏| 343529/371472 [5:10:03<2:10:10, 3.58it/s] 92%|█████████▏| 343530/371472 [5:10:04<2:20:44, 3.31it/s] 92%|█████████▏| 343531/371472 [5:10:04<2:22:54, 3.26it/s] 92%|█████████▏| 343532/371472 [5:10:04<2:16:12, 3.42it/s] 92%|█████████▏| 343533/371472 [5:10:05<2:21:08, 3.30it/s] 92%|█████████▏| 343534/371472 [5:10:05<2:19:52, 3.33it/s] 92%|█████████▏| 343535/371472 [5:10:05<2:15:30, 3.44it/s] 92%|█████████▏| 343536/371472 [5:10:05<2:12:10, 3.52it/s] 92%|█████████▏| 343537/371472 [5:10:06<2:08:37, 3.62it/s] 92%|█████████▏| 343538/371472 [5:10:06<2:09:35, 3.59it/s] 92%|█████████▏| 343539/371472 [5:10:06<2:13:47, 3.48it/s] 92%|█████████▏| 343540/371472 [5:10:07<2:14:04, 3.47it/s] {'loss': 2.6105, 'learning_rate': 1.6770992695382364e-07, 'epoch': 14.8} + 92%|█████████▏| 343540/371472 [5:10:07<2:14:04, 3.47it/s] 92%|█████████▏| 343541/371472 [5:10:07<2:07:57, 3.64it/s] 92%|█████████▏| 343542/371472 [5:10:07<2:23:55, 3.23it/s] 92%|█████████▏| 343543/371472 [5:10:07<2:20:23, 3.32it/s] 92%|█████████▏| 343544/371472 [5:10:08<2:17:10, 3.39it/s] 92%|█████████▏| 343545/371472 [5:10:08<2:21:25, 3.29it/s] 92%|█████████▏| 343546/371472 [5:10:08<2:15:59, 3.42it/s] 92%|█████████▏| 343547/371472 [5:10:09<2:10:38, 3.56it/s] 92%|█████████▏| 343548/371472 [5:10:09<2:09:25, 3.60it/s] 92%|█████████▏| 343549/371472 [5:10:09<2:22:50, 3.26it/s] 92%|█████████▏| 343550/371472 [5:10:10<2:32:50, 3.04it/s] 92%|█████████▏| 343551/371472 [5:10:10<2:25:36, 3.20it/s] 92%|█████████▏| 343552/371472 [5:10:10<2:29:06, 3.12it/s] 92%|█████████▏| 343553/371472 [5:10:11<2:27:05, 3.16it/s] 92%|█████████▏| 343554/371472 [5:10:11<2:24:22, 3.22it/s] 92%|█████████▏| 343555/371472 [5:10:11<2:23:31, 3.24it/s] 92%|█████████▏| 343556/371472 [5:10:11<2:17:23, 3.39it/s] 92%|█████████▏| 343557/371472 [5:10:12<2:15:40, 3.43it/s] 92%|█████████▏| 343558/371472 [5:10:12<2:13:13, 3.49it/s] 92%|█████████▏| 343559/371472 [5:10:12<2:08:46, 3.61it/s] 92%|█████████▏| 343560/371472 [5:10:12<2:07:46, 3.64it/s] {'loss': 2.5951, 'learning_rate': 1.6766144497834468e-07, 'epoch': 14.8} + 92%|█████████▏| 343560/371472 [5:10:12<2:07:46, 3.64it/s] 92%|█████████▏| 343561/371472 [5:10:13<2:05:39, 3.70it/s] 92%|█████████▏| 343562/371472 [5:10:13<2:04:59, 3.72it/s] 92%|█████████▏| 343563/371472 [5:10:13<2:01:35, 3.83it/s] 92%|█████████▏| 343564/371472 [5:10:14<2:08:07, 3.63it/s] 92%|█████████▏| 343565/371472 [5:10:14<2:08:36, 3.62it/s] 92%|█████████▏| 343566/371472 [5:10:14<2:05:34, 3.70it/s] 92%|█████████▏| 343567/371472 [5:10:14<2:05:28, 3.71it/s] 92%|█████████▏| 343568/371472 [5:10:15<2:03:45, 3.76it/s] 92%|█████████▏| 343569/371472 [5:10:15<2:13:14, 3.49it/s] 92%|█████████▏| 343570/371472 [5:10:15<2:07:31, 3.65it/s] 92%|█████████▏| 343571/371472 [5:10:16<2:17:28, 3.38it/s] 92%|█████████▏| 343572/371472 [5:10:16<2:27:11, 3.16it/s] 92%|█████████▏| 343573/371472 [5:10:16<2:23:14, 3.25it/s] 92%|█████████▏| 343574/371472 [5:10:16<2:18:54, 3.35it/s] 92%|█████████▏| 343575/371472 [5:10:17<2:14:31, 3.46it/s] 92%|█████████▏| 343576/371472 [5:10:17<2:17:21, 3.38it/s] 92%|█████████▏| 343577/371472 [5:10:17<2:10:13, 3.57it/s] 92%|█████████▏| 343578/371472 [5:10:18<2:08:29, 3.62it/s] 92%|█████████▏| 343579/371472 [5:10:18<2:03:22, 3.77it/s] 92%|█████████▏| 343580/371472 [5:10:18<2:08:58, 3.60it/s] {'loss': 2.5114, 'learning_rate': 1.6761296300286583e-07, 'epoch': 14.8} + 92%|█████████▏| 343580/371472 [5:10:18<2:08:58, 3.60it/s] 92%|█████████▏| 343581/371472 [5:10:18<2:05:47, 3.70it/s] 92%|█████████▏| 343582/371472 [5:10:19<2:08:36, 3.61it/s] 92%|█████████▏| 343583/371472 [5:10:19<2:04:30, 3.73it/s] 92%|█████████▏| 343584/371472 [5:10:19<2:02:46, 3.79it/s] 92%|█████████▏| 343585/371472 [5:10:19<2:07:43, 3.64it/s] 92%|█████████▏| 343586/371472 [5:10:20<2:06:45, 3.67it/s] 92%|█████████▏| 343587/371472 [5:10:20<2:05:43, 3.70it/s] 92%|█████████▏| 343588/371472 [5:10:20<2:07:51, 3.63it/s] 92%|█████████▏| 343589/371472 [5:10:21<2:05:12, 3.71it/s] 92%|█████████▏| 343590/371472 [5:10:21<2:08:10, 3.63it/s] 92%|█████████▏| 343591/371472 [5:10:21<2:13:04, 3.49it/s] 92%|█████████▏| 343592/371472 [5:10:21<2:11:13, 3.54it/s] 92%|█████████▏| 343593/371472 [5:10:22<2:06:50, 3.66it/s] 92%|█████████▏| 343594/371472 [5:10:22<2:15:31, 3.43it/s] 92%|█████████▏| 343595/371472 [5:10:22<2:11:02, 3.55it/s] 92%|█████████▏| 343596/371472 [5:10:23<2:08:57, 3.60it/s] 92%|█████████▏| 343597/371472 [5:10:23<2:11:10, 3.54it/s] 92%|█████████▏| 343598/371472 [5:10:23<2:09:00, 3.60it/s] 92%|█████████▏| 343599/371472 [5:10:23<2:13:07, 3.49it/s] 92%|█████████▏| 343600/371472 [5:10:24<2:12:55, 3.49it/s] {'loss': 2.617, 'learning_rate': 1.675644810273869e-07, 'epoch': 14.8} + 92%|█████████▏| 343600/371472 [5:10:24<2:12:55, 3.49it/s] 92%|█████████▏| 343601/371472 [5:10:24<2:08:37, 3.61it/s] 92%|█████████▏| 343602/371472 [5:10:24<2:14:36, 3.45it/s] 92%|█████████▏| 343603/371472 [5:10:24<2:10:21, 3.56it/s] 92%|█████████▏| 343604/371472 [5:10:25<2:09:07, 3.60it/s] 92%|█████████▏| 343605/371472 [5:10:25<2:14:56, 3.44it/s] 92%|█████████▏| 343606/371472 [5:10:25<2:23:42, 3.23it/s] 92%|█████████▏| 343607/371472 [5:10:26<2:29:28, 3.11it/s] 92%|█████████▏| 343608/371472 [5:10:26<2:24:57, 3.20it/s] 92%|█████████▏| 343609/371472 [5:10:26<2:20:47, 3.30it/s] 92%|█████████▏| 343610/371472 [5:10:27<2:16:09, 3.41it/s] 92%|█████████▏| 343611/371472 [5:10:27<2:14:19, 3.46it/s] 93%|█████████▎| 343612/371472 [5:10:27<2:10:23, 3.56it/s] 93%|█████████▎| 343613/371472 [5:10:27<2:04:27, 3.73it/s] 93%|█████████▎| 343614/371472 [5:10:28<2:03:49, 3.75it/s] 93%|█████████▎| 343615/371472 [5:10:28<2:13:54, 3.47it/s] 93%|█████████▎| 343616/371472 [5:10:28<2:10:08, 3.57it/s] 93%|█████████▎| 343617/371472 [5:10:29<2:17:32, 3.38it/s] 93%|█████████▎| 343618/371472 [5:10:29<2:22:36, 3.26it/s] 93%|█████████▎| 343619/371472 [5:10:29<2:18:36, 3.35it/s] 93%|█████████▎| 343620/371472 [5:10:29<2:11:21, 3.53it/s] {'loss': 2.6254, 'learning_rate': 1.6751599905190806e-07, 'epoch': 14.8} + 93%|█████████▎| 343620/371472 [5:10:29<2:11:21, 3.53it/s] 93%|█████████▎| 343621/371472 [5:10:30<2:08:44, 3.61it/s] 93%|█████████▎| 343622/371472 [5:10:30<2:23:38, 3.23it/s] 93%|█████████▎| 343623/371472 [5:10:30<2:14:44, 3.44it/s] 93%|█████████▎| 343624/371472 [5:10:31<2:09:18, 3.59it/s] 93%|█████████▎| 343625/371472 [5:10:31<2:07:48, 3.63it/s] 93%|█████████▎| 343626/371472 [5:10:31<2:06:14, 3.68it/s] 93%|█████████▎| 343627/371472 [5:10:31<2:09:44, 3.58it/s] 93%|█████████▎| 343628/371472 [5:10:32<2:19:37, 3.32it/s] 93%|█████████▎| 343629/371472 [5:10:32<2:17:40, 3.37it/s] 93%|█████████▎| 343630/371472 [5:10:32<2:14:31, 3.45it/s] 93%|█████████▎| 343631/371472 [5:10:33<2:15:40, 3.42it/s] 93%|█████████▎| 343632/371472 [5:10:33<2:19:02, 3.34it/s] 93%|█████████▎| 343633/371472 [5:10:33<2:18:59, 3.34it/s] 93%|█████████▎| 343634/371472 [5:10:34<2:12:24, 3.50it/s] 93%|█████████▎| 343635/371472 [5:10:34<2:18:50, 3.34it/s] 93%|█████████▎| 343636/371472 [5:10:34<2:23:59, 3.22it/s] 93%|█████████▎| 343637/371472 [5:10:34<2:20:02, 3.31it/s] 93%|█████████▎| 343638/371472 [5:10:35<2:19:01, 3.34it/s] 93%|█████████▎| 343639/371472 [5:10:35<2:14:22, 3.45it/s] 93%|█████████▎| 343640/371472 [5:10:35<2:09:43, 3.58it/s] {'loss': 2.5578, 'learning_rate': 1.6746751707642913e-07, 'epoch': 14.8} + 93%|█████████▎| 343640/371472 [5:10:35<2:09:43, 3.58it/s] 93%|█████████▎| 343641/371472 [5:10:36<2:10:11, 3.56it/s] 93%|█████████▎| 343642/371472 [5:10:36<2:14:41, 3.44it/s] 93%|█████████▎| 343643/371472 [5:10:36<2:17:56, 3.36it/s] 93%|█████████▎| 343644/371472 [5:10:36<2:16:02, 3.41it/s] 93%|█████████▎| 343645/371472 [5:10:37<2:10:53, 3.54it/s] 93%|█████████▎| 343646/371472 [5:10:37<2:14:52, 3.44it/s] 93%|█████████▎| 343647/371472 [5:10:37<2:12:51, 3.49it/s] 93%|█████████▎| 343648/371472 [5:10:38<2:12:52, 3.49it/s] 93%|█████████▎| 343649/371472 [5:10:38<2:09:42, 3.57it/s] 93%|█████████▎| 343650/371472 [5:10:38<2:13:41, 3.47it/s] 93%|█████████▎| 343651/371472 [5:10:38<2:06:48, 3.66it/s] 93%|█████████▎| 343652/371472 [5:10:39<2:17:02, 3.38it/s] 93%|█████████▎| 343653/371472 [5:10:39<2:11:27, 3.53it/s] 93%|█████████▎| 343654/371472 [5:10:39<2:14:32, 3.45it/s] 93%|█████████▎| 343655/371472 [5:10:40<2:11:28, 3.53it/s] 93%|█████████▎| 343656/371472 [5:10:40<2:15:08, 3.43it/s] 93%|█████████▎| 343657/371472 [5:10:40<2:18:57, 3.34it/s] 93%|█████████▎| 343658/371472 [5:10:40<2:10:40, 3.55it/s] 93%|█████████▎| 343659/371472 [5:10:41<2:22:40, 3.25it/s] 93%|█████████▎| 343660/371472 [5:10:41<2:19:58, 3.31it/s] {'loss': 2.4715, 'learning_rate': 1.6741903510095028e-07, 'epoch': 14.8} + 93%|█████████▎| 343660/371472 [5:10:41<2:19:58, 3.31it/s] 93%|█████████▎| 343661/371472 [5:10:41<2:16:48, 3.39it/s] 93%|█████████▎| 343662/371472 [5:10:42<2:08:57, 3.59it/s] 93%|█████████▎| 343663/371472 [5:10:42<2:08:30, 3.61it/s] 93%|█████████▎| 343664/371472 [5:10:42<2:07:01, 3.65it/s] 93%|█████████▎| 343665/371472 [5:10:42<2:04:13, 3.73it/s] 93%|█████████▎| 343666/371472 [5:10:43<2:06:46, 3.66it/s] 93%|█████████▎| 343667/371472 [5:10:43<2:24:53, 3.20it/s] 93%|█████████▎| 343668/371472 [5:10:43<2:20:20, 3.30it/s] 93%|█████████▎| 343669/371472 [5:10:44<2:16:06, 3.40it/s] 93%|█████████▎| 343670/371472 [5:10:44<2:16:06, 3.40it/s] 93%|█████████▎| 343671/371472 [5:10:44<2:15:43, 3.41it/s] 93%|█████████▎| 343672/371472 [5:10:45<2:11:13, 3.53it/s] 93%|█████████▎| 343673/371472 [5:10:45<2:12:11, 3.50it/s] 93%|█████████▎| 343674/371472 [5:10:45<2:10:06, 3.56it/s] 93%|█████████▎| 343675/371472 [5:10:45<2:13:18, 3.48it/s] 93%|█████████▎| 343676/371472 [5:10:46<2:09:21, 3.58it/s] 93%|█████████▎| 343677/371472 [5:10:46<2:07:19, 3.64it/s] 93%|█████████▎| 343678/371472 [5:10:46<2:12:11, 3.50it/s] 93%|█████████▎| 343679/371472 [5:10:46<2:06:45, 3.65it/s] 93%|█████████▎| 343680/371472 [5:10:47<2:05:04, 3.70it/s] {'loss': 2.718, 'learning_rate': 1.6737055312547132e-07, 'epoch': 14.8} + 93%|█████████▎| 343680/371472 [5:10:47<2:05:04, 3.70it/s] 93%|█████████▎| 343681/371472 [5:10:47<2:06:23, 3.66it/s] 93%|█████████▎| 343682/371472 [5:10:47<2:06:59, 3.65it/s] 93%|█████████▎| 343683/371472 [5:10:48<2:05:48, 3.68it/s] 93%|█████████▎| 343684/371472 [5:10:48<2:07:33, 3.63it/s] 93%|█████████▎| 343685/371472 [5:10:48<2:06:02, 3.67it/s] 93%|█████████▎| 343686/371472 [5:10:48<2:08:01, 3.62it/s] 93%|█████████▎| 343687/371472 [5:10:49<2:10:16, 3.55it/s] 93%|█████████▎| 343688/371472 [5:10:49<2:09:48, 3.57it/s] 93%|█████████▎| 343689/371472 [5:10:49<2:08:26, 3.61it/s] 93%|█████████▎| 343690/371472 [5:10:50<2:08:33, 3.60it/s] 93%|█████████▎| 343691/371472 [5:10:50<2:13:47, 3.46it/s] 93%|█████████▎| 343692/371472 [5:10:50<2:18:15, 3.35it/s] 93%|█████████▎| 343693/371472 [5:10:50<2:18:24, 3.35it/s] 93%|█████████▎| 343694/371472 [5:10:51<2:13:05, 3.48it/s] 93%|█████████▎| 343695/371472 [5:10:51<2:14:17, 3.45it/s] 93%|█████████▎| 343696/371472 [5:10:51<2:24:37, 3.20it/s] 93%|█████████▎| 343697/371472 [5:10:52<2:26:08, 3.17it/s] 93%|█████████▎| 343698/371472 [5:10:52<2:24:08, 3.21it/s] 93%|█████████▎| 343699/371472 [5:10:52<2:27:12, 3.14it/s] 93%|█████████▎| 343700/371472 [5:10:53<2:21:18, 3.28it/s] {'loss': 2.7059, 'learning_rate': 1.6732207114999248e-07, 'epoch': 14.8} + 93%|█████████▎| 343700/371472 [5:10:53<2:21:18, 3.28it/s] 93%|█████████▎| 343701/371472 [5:10:53<2:13:56, 3.46it/s] 93%|█████████▎| 343702/371472 [5:10:53<2:10:53, 3.54it/s] 93%|█████████▎| 343703/371472 [5:10:53<2:10:12, 3.55it/s] 93%|█████████▎| 343704/371472 [5:10:54<2:11:24, 3.52it/s] 93%|█████████▎| 343705/371472 [5:10:54<2:07:21, 3.63it/s] 93%|█████████▎| 343706/371472 [5:10:54<2:05:45, 3.68it/s] 93%|█████████▎| 343707/371472 [5:10:54<2:05:18, 3.69it/s] 93%|█████████▎| 343708/371472 [5:10:55<2:08:37, 3.60it/s] 93%|█████████▎| 343709/371472 [5:10:55<2:03:59, 3.73it/s] 93%|█████████▎| 343710/371472 [5:10:55<2:19:34, 3.31it/s] 93%|█████████▎| 343711/371472 [5:10:56<2:22:33, 3.25it/s] 93%|█████████▎| 343712/371472 [5:10:56<2:15:18, 3.42it/s] 93%|█████████▎| 343713/371472 [5:10:56<2:24:48, 3.19it/s] 93%|█████████▎| 343714/371472 [5:10:57<2:23:17, 3.23it/s] 93%|█████████▎| 343715/371472 [5:10:57<2:16:14, 3.40it/s] 93%|█████████▎| 343716/371472 [5:10:57<2:13:13, 3.47it/s] 93%|█████████▎| 343717/371472 [5:10:57<2:09:25, 3.57it/s] 93%|█████████▎| 343718/371472 [5:10:58<2:07:36, 3.62it/s] 93%|█████████▎| 343719/371472 [5:10:58<2:05:52, 3.67it/s] 93%|█████████▎| 343720/371472 [5:10:58<2:24:12, 3.21it/s] {'loss': 2.5951, 'learning_rate': 1.6727358917451355e-07, 'epoch': 14.8} + 93%|█████████▎| 343720/371472 [5:10:58<2:24:12, 3.21it/s] 93%|█████████▎| 343721/371472 [5:10:59<2:19:43, 3.31it/s] 93%|█████████▎| 343722/371472 [5:10:59<2:19:25, 3.32it/s] 93%|█████████▎| 343723/371472 [5:10:59<2:17:21, 3.37it/s] 93%|█████████▎| 343724/371472 [5:10:59<2:10:45, 3.54it/s] 93%|█████████▎| 343725/371472 [5:11:00<2:11:11, 3.53it/s] 93%|█████████▎| 343726/371472 [5:11:00<2:06:44, 3.65it/s] 93%|█████████▎| 343727/371472 [5:11:00<2:09:02, 3.58it/s] 93%|█████████▎| 343728/371472 [5:11:01<2:06:53, 3.64it/s] 93%|█████████▎| 343729/371472 [5:11:01<2:05:21, 3.69it/s] 93%|█████████▎| 343730/371472 [5:11:01<2:05:18, 3.69it/s] 93%|█████████▎| 343731/371472 [5:11:01<2:04:17, 3.72it/s] 93%|█████████▎| 343732/371472 [5:11:02<2:06:25, 3.66it/s] 93%|█████████▎| 343733/371472 [5:11:02<2:15:03, 3.42it/s] 93%|█████████▎| 343734/371472 [5:11:02<2:16:04, 3.40it/s] 93%|█████████▎| 343735/371472 [5:11:03<2:14:28, 3.44it/s] 93%|█████████▎| 343736/371472 [5:11:03<2:11:53, 3.50it/s] 93%|█████████▎| 343737/371472 [5:11:03<2:19:23, 3.32it/s] 93%|█████████▎| 343738/371472 [5:11:04<2:28:37, 3.11it/s] 93%|█████████▎| 343739/371472 [5:11:04<2:21:16, 3.27it/s] 93%|█████████▎| 343740/371472 [5:11:04<2:17:15, 3.37it/s] {'loss': 2.6414, 'learning_rate': 1.672251071990347e-07, 'epoch': 14.81} + 93%|█████████▎| 343740/371472 [5:11:04<2:17:15, 3.37it/s] 93%|█████████▎| 343741/371472 [5:11:04<2:10:16, 3.55it/s] 93%|█████████▎| 343742/371472 [5:11:05<2:10:40, 3.54it/s] 93%|█████████▎| 343743/371472 [5:11:05<2:11:14, 3.52it/s] 93%|█████████▎| 343744/371472 [5:11:05<2:07:57, 3.61it/s] 93%|█████████▎| 343745/371472 [5:11:06<2:17:42, 3.36it/s] 93%|█████████▎| 343746/371472 [5:11:06<2:09:21, 3.57it/s] 93%|█████████▎| 343747/371472 [5:11:06<2:20:22, 3.29it/s] 93%|█████████▎| 343748/371472 [5:11:06<2:15:26, 3.41it/s] 93%|█████████▎| 343749/371472 [5:11:07<2:21:42, 3.26it/s] 93%|█████████▎| 343750/371472 [5:11:07<2:14:22, 3.44it/s] 93%|█████████▎| 343751/371472 [5:11:07<2:20:24, 3.29it/s] 93%|█████████▎| 343752/371472 [5:11:08<2:18:20, 3.34it/s] 93%|█████████▎| 343753/371472 [5:11:08<2:11:49, 3.50it/s] 93%|█████████▎| 343754/371472 [5:11:08<2:06:55, 3.64it/s] 93%|█████████▎| 343755/371472 [5:11:08<2:05:28, 3.68it/s] 93%|█████████▎| 343756/371472 [5:11:09<2:03:46, 3.73it/s] 93%|█████████▎| 343757/371472 [5:11:09<2:10:31, 3.54it/s] 93%|█████████▎| 343758/371472 [5:11:09<2:16:36, 3.38it/s] 93%|█████████▎| 343759/371472 [5:11:10<2:10:54, 3.53it/s] 93%|█████████▎| 343760/371472 [5:11:10<2:08:32, 3.59it/s] {'loss': 2.679, 'learning_rate': 1.6717662522355574e-07, 'epoch': 14.81} + 93%|█████████▎| 343760/371472 [5:11:10<2:08:32, 3.59it/s] 93%|█████████▎| 343761/371472 [5:11:10<2:04:00, 3.72it/s] 93%|█████████▎| 343762/371472 [5:11:10<2:04:21, 3.71it/s] 93%|█████████▎| 343763/371472 [5:11:11<2:07:01, 3.64it/s] 93%|█████████▎| 343764/371472 [5:11:11<2:13:11, 3.47it/s] 93%|█████████▎| 343765/371472 [5:11:11<2:20:47, 3.28it/s] 93%|█████████▎| 343766/371472 [5:11:12<2:13:55, 3.45it/s] 93%|█████████▎| 343767/371472 [5:11:12<2:23:05, 3.23it/s] 93%|█████████▎| 343768/371472 [5:11:12<2:17:08, 3.37it/s] 93%|█████████▎| 343769/371472 [5:11:12<2:25:11, 3.18it/s] 93%|█████████▎| 343770/371472 [5:11:13<2:17:33, 3.36it/s] 93%|█████████▎| 343771/371472 [5:11:13<2:11:44, 3.50it/s] 93%|█████████▎| 343772/371472 [5:11:13<2:13:56, 3.45it/s] 93%|█████████▎| 343773/371472 [5:11:14<2:08:19, 3.60it/s] 93%|█████████▎| 343774/371472 [5:11:14<2:14:24, 3.43it/s] 93%|█████████▎| 343775/371472 [5:11:14<2:10:53, 3.53it/s] 93%|█████████▎| 343776/371472 [5:11:14<2:17:47, 3.35it/s] 93%|█████████▎| 343777/371472 [5:11:15<2:11:47, 3.50it/s] 93%|█████████▎| 343778/371472 [5:11:15<2:15:20, 3.41it/s] 93%|█████████▎| 343779/371472 [5:11:15<2:25:20, 3.18it/s] 93%|█████████▎| 343780/371472 [5:11:16<2:14:19, 3.44it/s] {'loss': 2.5788, 'learning_rate': 1.6712814324807692e-07, 'epoch': 14.81} + 93%|█████████▎| 343780/371472 [5:11:16<2:14:19, 3.44it/s] 93%|█████████▎| 343781/371472 [5:11:16<2:08:23, 3.59it/s] 93%|█████████▎| 343782/371472 [5:11:16<2:13:04, 3.47it/s] 93%|█████████▎| 343783/371472 [5:11:16<2:08:59, 3.58it/s] 93%|█████████▎| 343784/371472 [5:11:17<2:16:17, 3.39it/s] 93%|█████████▎| 343785/371472 [5:11:17<2:09:54, 3.55it/s] 93%|█████████▎| 343786/371472 [5:11:17<2:07:59, 3.61it/s] 93%|█████████▎| 343787/371472 [5:11:18<2:12:51, 3.47it/s] 93%|█████████▎| 343788/371472 [5:11:18<2:06:10, 3.66it/s] 93%|█████████▎| 343789/371472 [5:11:18<2:06:49, 3.64it/s] 93%|█████████▎| 343790/371472 [5:11:18<2:12:33, 3.48it/s] 93%|█████████▎| 343791/371472 [5:11:19<2:13:00, 3.47it/s] 93%|█████████▎| 343792/371472 [5:11:19<2:05:41, 3.67it/s] 93%|█████████▎| 343793/371472 [5:11:19<2:10:51, 3.53it/s] 93%|█████████▎| 343794/371472 [5:11:20<2:12:24, 3.48it/s] 93%|█████████▎| 343795/371472 [5:11:20<2:15:16, 3.41it/s] 93%|█████████▎| 343796/371472 [5:11:20<2:16:32, 3.38it/s] 93%|█████████▎| 343797/371472 [5:11:21<2:18:58, 3.32it/s] 93%|█████████▎| 343798/371472 [5:11:21<2:15:11, 3.41it/s] 93%|█████████▎| 343799/371472 [5:11:21<2:09:37, 3.56it/s] 93%|█████████▎| 343800/371472 [5:11:21<2:11:27, 3.51it/s] {'loss': 2.5702, 'learning_rate': 1.6707966127259797e-07, 'epoch': 14.81} + 93%|█████████▎| 343800/371472 [5:11:21<2:11:27, 3.51it/s] 93%|█████████▎| 343801/371472 [5:11:22<2:10:25, 3.54it/s] 93%|█████████▎| 343802/371472 [5:11:22<2:09:38, 3.56it/s] 93%|█████████▎| 343803/371472 [5:11:22<2:08:48, 3.58it/s] 93%|█████████▎| 343804/371472 [5:11:22<2:15:28, 3.40it/s] 93%|█████████▎| 343805/371472 [5:11:23<2:15:44, 3.40it/s] 93%|█████████▎| 343806/371472 [5:11:23<2:14:57, 3.42it/s] 93%|█████████▎| 343807/371472 [5:11:23<2:21:45, 3.25it/s] 93%|█████████▎| 343808/371472 [5:11:24<2:30:56, 3.05it/s] 93%|█████████▎| 343809/371472 [5:11:24<2:22:54, 3.23it/s] 93%|█████████▎| 343810/371472 [5:11:24<2:14:40, 3.42it/s] 93%|█████████▎| 343811/371472 [5:11:25<2:11:09, 3.52it/s] 93%|█████████▎| 343812/371472 [5:11:25<2:16:46, 3.37it/s] 93%|█████████▎| 343813/371472 [5:11:25<2:14:58, 3.42it/s] 93%|█████████▎| 343814/371472 [5:11:25<2:11:23, 3.51it/s] 93%|█████████▎| 343815/371472 [5:11:26<2:26:17, 3.15it/s] 93%|█████████▎| 343816/371472 [5:11:26<2:27:24, 3.13it/s] 93%|█████████▎| 343817/371472 [5:11:26<2:20:40, 3.28it/s] 93%|█████████▎| 343818/371472 [5:11:27<2:15:11, 3.41it/s] 93%|█████████▎| 343819/371472 [5:11:27<2:11:02, 3.52it/s] 93%|█████████▎| 343820/371472 [5:11:27<2:19:21, 3.31it/s] {'loss': 2.479, 'learning_rate': 1.6703117929711912e-07, 'epoch': 14.81} + 93%|█████████▎| 343820/371472 [5:11:27<2:19:21, 3.31it/s] 93%|█████████▎| 343821/371472 [5:11:28<2:16:26, 3.38it/s] 93%|█████████▎| 343822/371472 [5:11:28<2:16:13, 3.38it/s] 93%|█████████▎| 343823/371472 [5:11:28<2:10:56, 3.52it/s] 93%|█████████▎| 343824/371472 [5:11:28<2:16:07, 3.39it/s] 93%|█████████▎| 343825/371472 [5:11:29<2:22:18, 3.24it/s] 93%|█████████▎| 343826/371472 [5:11:29<2:16:37, 3.37it/s] 93%|█████████▎| 343827/371472 [5:11:29<2:14:03, 3.44it/s] 93%|█████████▎| 343828/371472 [5:11:30<2:12:28, 3.48it/s] 93%|█████████▎| 343829/371472 [5:11:30<2:15:32, 3.40it/s] 93%|█████████▎| 343830/371472 [5:11:30<2:13:57, 3.44it/s] 93%|█████████▎| 343831/371472 [5:11:31<2:11:55, 3.49it/s] 93%|█████████▎| 343832/371472 [5:11:31<2:10:17, 3.54it/s] 93%|█████████▎| 343833/371472 [5:11:31<2:06:46, 3.63it/s] 93%|█████████▎| 343834/371472 [5:11:31<2:15:32, 3.40it/s] 93%|█████████▎| 343835/371472 [5:11:32<2:11:49, 3.49it/s] 93%|█████████▎| 343836/371472 [5:11:32<2:15:16, 3.40it/s] 93%|█████████▎| 343837/371472 [5:11:32<2:15:20, 3.40it/s] 93%|█████████▎| 343838/371472 [5:11:33<2:19:32, 3.30it/s] 93%|█████████▎| 343839/371472 [5:11:33<2:16:15, 3.38it/s] 93%|█████████▎| 343840/371472 [5:11:33<2:14:13, 3.43it/s] {'loss': 2.6102, 'learning_rate': 1.669826973216402e-07, 'epoch': 14.81} + 93%|█████████▎| 343840/371472 [5:11:33<2:14:13, 3.43it/s] 93%|█████████▎| 343841/371472 [5:11:33<2:12:42, 3.47it/s] 93%|█████████▎| 343842/371472 [5:11:34<2:10:13, 3.54it/s] 93%|█████████▎| 343843/371472 [5:11:34<2:15:30, 3.40it/s] 93%|█████████▎| 343844/371472 [5:11:34<2:16:31, 3.37it/s] 93%|█████████▎| 343845/371472 [5:11:35<2:11:43, 3.50it/s] 93%|█████████▎| 343846/371472 [5:11:35<2:09:31, 3.55it/s] 93%|█████████▎| 343847/371472 [5:11:35<2:17:08, 3.36it/s] 93%|█████████▎| 343848/371472 [5:11:35<2:11:34, 3.50it/s] 93%|█████████▎| 343849/371472 [5:11:36<2:11:03, 3.51it/s] 93%|█████████▎| 343850/371472 [5:11:36<2:09:51, 3.54it/s] 93%|█████████▎| 343851/371472 [5:11:36<2:14:49, 3.41it/s] 93%|█████████▎| 343852/371472 [5:11:37<2:20:50, 3.27it/s] 93%|█████████▎| 343853/371472 [5:11:37<2:15:45, 3.39it/s] 93%|█████████▎| 343854/371472 [5:11:37<2:12:10, 3.48it/s] 93%|█████████▎| 343855/371472 [5:11:37<2:05:50, 3.66it/s] 93%|█████████▎| 343856/371472 [5:11:38<2:08:26, 3.58it/s] 93%|█████████▎| 343857/371472 [5:11:38<2:18:08, 3.33it/s] 93%|█████████▎| 343858/371472 [5:11:38<2:12:14, 3.48it/s] 93%|█████████▎| 343859/371472 [5:11:39<2:17:15, 3.35it/s] 93%|█████████▎| 343860/371472 [5:11:39<2:17:26, 3.35it/s] {'loss': 2.4668, 'learning_rate': 1.6693421534616134e-07, 'epoch': 14.81} + 93%|█████████▎| 343860/371472 [5:11:39<2:17:26, 3.35it/s] 93%|█████████▎| 343861/371472 [5:11:39<2:14:44, 3.42it/s] 93%|█████████▎| 343862/371472 [5:11:39<2:07:56, 3.60it/s] 93%|█████████▎| 343863/371472 [5:11:40<2:05:56, 3.65it/s] 93%|█████████▎| 343864/371472 [5:11:40<2:10:34, 3.52it/s] 93%|█████████▎| 343865/371472 [5:11:40<2:06:20, 3.64it/s] 93%|█████████▎| 343866/371472 [5:11:41<2:03:39, 3.72it/s] 93%|█████████▎| 343867/371472 [5:11:41<2:07:15, 3.62it/s] 93%|█████████▎| 343868/371472 [5:11:41<2:06:14, 3.64it/s] 93%|█████████▎| 343869/371472 [5:11:41<2:05:09, 3.68it/s] 93%|█████████▎| 343870/371472 [5:11:42<2:06:04, 3.65it/s] 93%|█████████▎| 343871/371472 [5:11:42<2:06:35, 3.63it/s] 93%|█████████▎| 343872/371472 [5:11:42<2:13:48, 3.44it/s] 93%|█████████▎| 343873/371472 [5:11:43<2:16:01, 3.38it/s] 93%|█████████▎| 343874/371472 [5:11:43<2:16:38, 3.37it/s] 93%|█████████▎| 343875/371472 [5:11:43<2:20:22, 3.28it/s] 93%|█████████▎| 343876/371472 [5:11:43<2:17:39, 3.34it/s] 93%|█████████▎| 343877/371472 [5:11:44<2:11:04, 3.51it/s] 93%|█████████▎| 343878/371472 [5:11:44<2:10:03, 3.54it/s] 93%|█████████▎| 343879/371472 [5:11:44<2:13:01, 3.46it/s] 93%|█████████▎| 343880/371472 [5:11:45<2:17:22, 3.35it/s] {'loss': 2.6407, 'learning_rate': 1.6688573337068238e-07, 'epoch': 14.81} + 93%|█████████▎| 343880/371472 [5:11:45<2:17:22, 3.35it/s] 93%|█████████▎| 343881/371472 [5:11:45<2:11:52, 3.49it/s] 93%|█████████▎| 343882/371472 [5:11:45<2:15:07, 3.40it/s] 93%|█████████▎| 343883/371472 [5:11:45<2:11:31, 3.50it/s] 93%|█████████▎| 343884/371472 [5:11:46<2:12:01, 3.48it/s] 93%|█████████▎| 343885/371472 [5:11:46<2:10:45, 3.52it/s] 93%|█████████▎| 343886/371472 [5:11:46<2:08:30, 3.58it/s] 93%|█████████▎| 343887/371472 [5:11:47<2:15:14, 3.40it/s] 93%|█████████▎| 343888/371472 [5:11:47<2:11:26, 3.50it/s] 93%|█████████▎| 343889/371472 [5:11:47<2:07:28, 3.61it/s] 93%|█████████▎| 343890/371472 [5:11:47<2:15:35, 3.39it/s] 93%|█████████▎| 343891/371472 [5:11:48<2:20:36, 3.27it/s] 93%|█████████▎| 343892/371472 [5:11:48<2:12:22, 3.47it/s] 93%|█████████▎| 343893/371472 [5:11:48<2:08:36, 3.57it/s] 93%|█████████▎| 343894/371472 [5:11:49<2:10:51, 3.51it/s] 93%|█████████▎| 343895/371472 [5:11:49<2:09:29, 3.55it/s] 93%|█████████▎| 343896/371472 [5:11:49<2:12:33, 3.47it/s] 93%|█████████▎| 343897/371472 [5:11:49<2:08:15, 3.58it/s] 93%|█████████▎| 343898/371472 [5:11:50<2:12:11, 3.48it/s] 93%|█████████▎| 343899/371472 [5:11:50<2:16:58, 3.35it/s] 93%|█████████▎| 343900/371472 [5:11:50<2:20:35, 3.27it/s] {'loss': 2.413, 'learning_rate': 1.6683725139520346e-07, 'epoch': 14.81} + 93%|█████████▎| 343900/371472 [5:11:50<2:20:35, 3.27it/s] 93%|█████████▎| 343901/371472 [5:11:51<2:17:20, 3.35it/s] 93%|█████████▎| 343902/371472 [5:11:51<2:17:46, 3.34it/s] 93%|█████████▎| 343903/371472 [5:11:51<2:14:41, 3.41it/s] 93%|█████████▎| 343904/371472 [5:11:52<2:18:08, 3.33it/s] 93%|█████████▎| 343905/371472 [5:11:52<2:17:15, 3.35it/s] 93%|█████████▎| 343906/371472 [5:11:52<2:12:19, 3.47it/s] 93%|█████████▎| 343907/371472 [5:11:52<2:10:37, 3.52it/s] 93%|█████████▎| 343908/371472 [5:11:53<2:08:37, 3.57it/s] 93%|█████████▎| 343909/371472 [5:11:53<2:21:35, 3.24it/s] 93%|█████████▎| 343910/371472 [5:11:53<2:16:09, 3.37it/s] 93%|█████████▎| 343911/371472 [5:11:54<2:12:07, 3.48it/s] 93%|█████████▎| 343912/371472 [5:11:54<2:20:16, 3.27it/s] 93%|███████��█▎| 343913/371472 [5:11:54<2:22:52, 3.21it/s] 93%|█████████▎| 343914/371472 [5:11:55<2:24:44, 3.17it/s] 93%|█████████▎| 343915/371472 [5:11:55<2:21:44, 3.24it/s] 93%|█████████▎| 343916/371472 [5:11:55<2:20:39, 3.27it/s] 93%|█████████▎| 343917/371472 [5:11:56<2:22:17, 3.23it/s] 93%|█████████▎| 343918/371472 [5:11:56<2:27:13, 3.12it/s] 93%|█████████▎| 343919/371472 [5:11:56<2:23:47, 3.19it/s] 93%|█████████▎| 343920/371472 [5:11:56<2:20:11, 3.28it/s] {'loss': 2.5972, 'learning_rate': 1.6678876941972463e-07, 'epoch': 14.81} + 93%|█████████▎| 343920/371472 [5:11:56<2:20:11, 3.28it/s] 93%|█████████▎| 343921/371472 [5:11:57<2:16:06, 3.37it/s] 93%|█████████▎| 343922/371472 [5:11:57<2:10:07, 3.53it/s] 93%|█████████▎| 343923/371472 [5:11:57<2:09:58, 3.53it/s] 93%|█████████▎| 343924/371472 [5:11:58<2:07:59, 3.59it/s] 93%|█████████▎| 343925/371472 [5:11:58<2:09:51, 3.54it/s] 93%|█████████▎| 343926/371472 [5:11:58<2:07:49, 3.59it/s] 93%|█████████▎| 343927/371472 [5:11:58<2:13:13, 3.45it/s] 93%|█████████▎| 343928/371472 [5:11:59<2:10:05, 3.53it/s] 93%|█████████▎| 343929/371472 [5:11:59<2:09:56, 3.53it/s] 93%|█████████▎| 343930/371472 [5:11:59<2:10:08, 3.53it/s] 93%|█████████▎| 343931/371472 [5:12:00<2:15:32, 3.39it/s] 93%|█████████▎| 343932/371472 [5:12:00<2:10:42, 3.51it/s] 93%|█████████▎| 343933/371472 [5:12:00<2:14:44, 3.41it/s] 93%|█████████▎| 343934/371472 [5:12:00<2:11:00, 3.50it/s] 93%|█████████▎| 343935/371472 [5:12:01<2:14:38, 3.41it/s] 93%|█████████▎| 343936/371472 [5:12:01<2:12:23, 3.47it/s] 93%|█████████▎| 343937/371472 [5:12:01<2:07:29, 3.60it/s] 93%|█████████▎| 343938/371472 [5:12:02<2:13:20, 3.44it/s] 93%|█████████▎| 343939/371472 [5:12:02<2:10:12, 3.52it/s] 93%|█████████▎| 343940/371472 [5:12:02<2:09:37, 3.54it/s] {'loss': 2.5663, 'learning_rate': 1.6674028744424568e-07, 'epoch': 14.81} + 93%|█████████▎| 343940/371472 [5:12:02<2:09:37, 3.54it/s] 93%|█████████▎| 343941/371472 [5:12:02<2:14:34, 3.41it/s] 93%|█████████▎| 343942/371472 [5:12:03<2:17:45, 3.33it/s] 93%|█████████▎| 343943/371472 [5:12:03<2:14:06, 3.42it/s] 93%|█████████▎| 343944/371472 [5:12:03<2:10:51, 3.51it/s] 93%|█████████▎| 343945/371472 [5:12:04<2:26:27, 3.13it/s] 93%|█████████▎| 343946/371472 [5:12:04<2:20:52, 3.26it/s] 93%|█████████▎| 343947/371472 [5:12:04<2:11:59, 3.48it/s] 93%|█████████▎| 343948/371472 [5:12:05<2:12:57, 3.45it/s] 93%|█████████▎| 343949/371472 [5:12:05<2:21:21, 3.24it/s] 93%|█████████▎| 343950/371472 [5:12:05<2:18:49, 3.30it/s] 93%|█████████▎| 343951/371472 [5:12:05<2:13:49, 3.43it/s] 93%|█████████▎| 343952/371472 [5:12:06<2:13:59, 3.42it/s] 93%|█████████▎| 343953/371472 [5:12:06<2:11:11, 3.50it/s] 93%|█████████▎| 343954/371472 [5:12:06<2:12:55, 3.45it/s] 93%|█████████▎| 343955/371472 [5:12:07<2:09:53, 3.53it/s] 93%|█████████▎| 343956/371472 [5:12:07<2:09:34, 3.54it/s] 93%|█████████▎| 343957/371472 [5:12:07<2:12:20, 3.47it/s] 93%|█████████▎| 343958/371472 [5:12:07<2:09:48, 3.53it/s] 93%|█████████▎| 343959/371472 [5:12:08<2:15:29, 3.38it/s] 93%|█████████▎| 343960/371472 [5:12:08<2:14:46, 3.40it/s] {'loss': 2.605, 'learning_rate': 1.6669180546876683e-07, 'epoch': 14.82} + 93%|█████████▎| 343960/371472 [5:12:08<2:14:46, 3.40it/s] 93%|█████████▎| 343961/371472 [5:12:08<2:10:06, 3.52it/s] 93%|█████████▎| 343962/371472 [5:12:09<2:06:59, 3.61it/s] 93%|█████████▎| 343963/371472 [5:12:09<2:03:35, 3.71it/s] 93%|█████████▎| 343964/371472 [5:12:09<2:04:53, 3.67it/s] 93%|█████████▎| 343965/371472 [5:12:09<1:59:30, 3.84it/s] 93%|█████████▎| 343966/371472 [5:12:10<1:59:35, 3.83it/s] 93%|█████████▎| 343967/371472 [5:12:10<1:58:03, 3.88it/s] 93%|█████████▎| 343968/371472 [5:12:10<1:57:10, 3.91it/s] 93%|█████���███▎| 343969/371472 [5:12:10<1:57:17, 3.91it/s] 93%|█████████▎| 343970/371472 [5:12:11<1:58:42, 3.86it/s] 93%|█████████▎| 343971/371472 [5:12:11<1:58:46, 3.86it/s] 93%|█████████▎| 343972/371472 [5:12:11<1:57:35, 3.90it/s] 93%|█████████▎| 343973/371472 [5:12:11<2:05:55, 3.64it/s] 93%|█████████▎| 343974/371472 [5:12:12<2:08:19, 3.57it/s] 93%|█████████▎| 343975/371472 [5:12:12<2:04:16, 3.69it/s] 93%|█████████▎| 343976/371472 [5:12:12<2:20:00, 3.27it/s] 93%|█████████▎| 343977/371472 [5:12:13<2:18:50, 3.30it/s] 93%|█████████▎| 343978/371472 [5:12:13<2:15:34, 3.38it/s] 93%|█████████▎| 343979/371472 [5:12:13<2:13:10, 3.44it/s] 93%|█████████▎| 343980/371472 [5:12:13<2:10:08, 3.52it/s] {'loss': 2.5784, 'learning_rate': 1.666433234932879e-07, 'epoch': 14.82} + 93%|█████████▎| 343980/371472 [5:12:13<2:10:08, 3.52it/s] 93%|█████████▎| 343981/371472 [5:12:14<2:06:56, 3.61it/s] 93%|█████████▎| 343982/371472 [5:12:14<2:05:47, 3.64it/s] 93%|█████████▎| 343983/371472 [5:12:14<2:04:07, 3.69it/s] 93%|█████████▎| 343984/371472 [5:12:15<2:06:26, 3.62it/s] 93%|█████████▎| 343985/371472 [5:12:15<2:02:48, 3.73it/s] 93%|█████████▎| 343986/371472 [5:12:15<2:01:12, 3.78it/s] 93%|█████████▎| 343987/371472 [5:12:15<2:04:10, 3.69it/s] 93%|█████████▎| 343988/371472 [5:12:16<2:12:03, 3.47it/s] 93%|█████████▎| 343989/371472 [5:12:16<2:09:04, 3.55it/s] 93%|█████████▎| 343990/371472 [5:12:16<2:08:43, 3.56it/s] 93%|█████████▎| 343991/371472 [5:12:16<2:08:57, 3.55it/s] 93%|█████████▎| 343992/371472 [5:12:17<2:09:52, 3.53it/s] 93%|█████████▎| 343993/371472 [5:12:17<2:09:39, 3.53it/s] 93%|█████████▎| 343994/371472 [5:12:17<2:03:56, 3.70it/s] 93%|█████████▎| 343995/371472 [5:12:18<2:04:18, 3.68it/s] 93%|█████████▎| 343996/371472 [5:12:18<2:06:12, 3.63it/s] 93%|█████████▎| 343997/371472 [5:12:18<2:03:06, 3.72it/s] 93%|█████████▎| 343998/371472 [5:12:18<2:03:25, 3.71it/s] 93%|█████████▎| 343999/371472 [5:12:19<2:04:05, 3.69it/s] 93%|█████████▎| 344000/371472 [5:12:19<2:02:14, 3.75it/s] {'loss': 2.6577, 'learning_rate': 1.6659484151780905e-07, 'epoch': 14.82} + 93%|█████████▎| 344000/371472 [5:12:19<2:02:14, 3.75it/s] 93%|█████████▎| 344001/371472 [5:12:19<2:17:02, 3.34it/s] 93%|█████████▎| 344002/371472 [5:12:20<2:16:04, 3.36it/s] 93%|█████████▎| 344003/371472 [5:12:20<2:14:48, 3.40it/s] 93%|█████████▎| 344004/371472 [5:12:20<2:12:18, 3.46it/s] 93%|█████████▎| 344005/371472 [5:12:20<2:04:54, 3.67it/s] 93%|█████████▎| 344006/371472 [5:12:21<2:11:47, 3.47it/s] 93%|█████████▎| 344007/371472 [5:12:21<2:12:13, 3.46it/s] 93%|█████████▎| 344008/371472 [5:12:21<2:09:18, 3.54it/s] 93%|█████████▎| 344009/371472 [5:12:22<2:04:38, 3.67it/s] 93%|█████████▎| 344010/371472 [5:12:22<2:01:31, 3.77it/s] 93%|█████████▎| 344011/371472 [5:12:22<2:09:23, 3.54it/s] 93%|█████████▎| 344012/371472 [5:12:22<2:10:53, 3.50it/s] 93%|█████████▎| 344013/371472 [5:12:23<2:13:15, 3.43it/s] 93%|█████████▎| 344014/371472 [5:12:23<2:15:19, 3.38it/s] 93%|█████████▎| 344015/371472 [5:12:23<2:10:25, 3.51it/s] 93%|█████████▎| 344016/371472 [5:12:24<2:13:58, 3.42it/s] 93%|█████████▎| 344017/371472 [5:12:24<2:06:50, 3.61it/s] 93%|█████████▎| 344018/371472 [5:12:24<2:10:42, 3.50it/s] 93%|█████████▎| 344019/371472 [5:12:24<2:10:03, 3.52it/s] 93%|█████████▎| 344020/371472 [5:12:25<2:04:58, 3.66it/s] {'loss': 2.5853, 'learning_rate': 1.665463595423301e-07, 'epoch': 14.82} + 93%|█████████▎| 344020/371472 [5:12:25<2:04:58, 3.66it/s] 93%|█████████▎| 344021/371472 [5:12:25<2:07:34, 3.59it/s] 93%|█████████▎| 344022/371472 [5:12:25<2:10:58, 3.49it/s] 93%|█████████▎| 344023/371472 [5:12:26<2:17:49, 3.32it/s] 93%|█████████▎| 344024/371472 [5:12:26<2:16:48, 3.34it/s] 93%|███���█████▎| 344025/371472 [5:12:26<2:16:11, 3.36it/s] 93%|█████████▎| 344026/371472 [5:12:26<2:17:27, 3.33it/s] 93%|█████████▎| 344027/371472 [5:12:27<2:15:40, 3.37it/s] 93%|█████████▎| 344028/371472 [5:12:27<2:12:52, 3.44it/s] 93%|█████████▎| 344029/371472 [5:12:27<2:14:50, 3.39it/s] 93%|█████████▎| 344030/371472 [5:12:28<2:13:04, 3.44it/s] 93%|█████████▎| 344031/371472 [5:12:28<2:16:03, 3.36it/s] 93%|█████████▎| 344032/371472 [5:12:28<2:09:55, 3.52it/s] 93%|█████████▎| 344033/371472 [5:12:29<2:18:32, 3.30it/s] 93%|█████████▎| 344034/371472 [5:12:29<2:21:07, 3.24it/s] 93%|█████████▎| 344035/371472 [5:12:29<2:12:28, 3.45it/s] 93%|█████████▎| 344036/371472 [5:12:29<2:09:13, 3.54it/s] 93%|█████████▎| 344037/371472 [5:12:30<2:06:53, 3.60it/s] 93%|█████████▎| 344038/371472 [5:12:30<2:05:27, 3.64it/s] 93%|█████████▎| 344039/371472 [5:12:30<2:03:58, 3.69it/s] 93%|█████████▎| 344040/371472 [5:12:30<2:00:11, 3.80it/s] {'loss': 2.6172, 'learning_rate': 1.6649787756685127e-07, 'epoch': 14.82} + 93%|█████████▎| 344040/371472 [5:12:30<2:00:11, 3.80it/s] 93%|█████████▎| 344041/371472 [5:12:31<1:56:40, 3.92it/s] 93%|█████████▎| 344042/371472 [5:12:31<2:01:29, 3.76it/s] 93%|█████████▎| 344043/371472 [5:12:31<2:07:34, 3.58it/s] 93%|█████████▎| 344044/371472 [5:12:32<2:06:16, 3.62it/s] 93%|█████████▎| 344045/371472 [5:12:32<2:05:07, 3.65it/s] 93%|█████████▎| 344046/371472 [5:12:32<2:08:49, 3.55it/s] 93%|█████████▎| 344047/371472 [5:12:32<2:09:02, 3.54it/s] 93%|█████████▎| 344048/371472 [5:12:33<2:06:32, 3.61it/s] 93%|█████████▎| 344049/371472 [5:12:33<2:09:34, 3.53it/s] 93%|█████████▎| 344050/371472 [5:12:33<2:09:23, 3.53it/s] 93%|█████████▎| 344051/371472 [5:12:33<2:08:16, 3.56it/s] 93%|█████████▎| 344052/371472 [5:12:34<2:05:32, 3.64it/s] 93%|█████████▎| 344053/371472 [5:12:34<2:14:42, 3.39it/s] 93%|█████████▎| 344054/371472 [5:12:34<2:11:58, 3.46it/s] 93%|█████████▎| 344055/371472 [5:12:35<2:08:47, 3.55it/s] 93%|█████████▎| 344056/371472 [5:12:35<2:06:46, 3.60it/s] 93%|█████████▎| 344057/371472 [5:12:35<2:08:29, 3.56it/s] 93%|█████████▎| 344058/371472 [5:12:36<2:14:17, 3.40it/s] 93%|█████████▎| 344059/371472 [5:12:36<2:08:30, 3.56it/s] 93%|█████████▎| 344060/371472 [5:12:36<2:13:14, 3.43it/s] {'loss': 2.4886, 'learning_rate': 1.664493955913723e-07, 'epoch': 14.82} + 93%|█████████▎| 344060/371472 [5:12:36<2:13:14, 3.43it/s] 93%|█████████▎| 344061/371472 [5:12:36<2:09:06, 3.54it/s] 93%|█████████▎| 344062/371472 [5:12:37<2:12:06, 3.46it/s] 93%|█████████▎| 344063/371472 [5:12:37<2:09:56, 3.52it/s] 93%|█████████▎| 344064/371472 [5:12:37<2:09:49, 3.52it/s] 93%|█████████▎| 344065/371472 [5:12:38<2:16:53, 3.34it/s] 93%|█████████▎| 344066/371472 [5:12:38<2:08:38, 3.55it/s] 93%|█████████▎| 344067/371472 [5:12:38<2:11:49, 3.46it/s] 93%|█████████▎| 344068/371472 [5:12:38<2:06:22, 3.61it/s] 93%|█████████▎| 344069/371472 [5:12:39<2:04:15, 3.68it/s] 93%|█████████▎| 344070/371472 [5:12:39<2:07:57, 3.57it/s] 93%|█████████▎| 344071/371472 [5:12:39<2:06:08, 3.62it/s] 93%|█████████▎| 344072/371472 [5:12:39<2:07:06, 3.59it/s] 93%|█████████▎| 344073/371472 [5:12:40<2:08:45, 3.55it/s] 93%|█████████▎| 344074/371472 [5:12:40<2:12:07, 3.46it/s] 93%|█████████▎| 344075/371472 [5:12:40<2:10:02, 3.51it/s] 93%|█████████▎| 344076/371472 [5:12:41<2:12:02, 3.46it/s] 93%|█████████▎| 344077/371472 [5:12:41<2:04:43, 3.66it/s] 93%|█████████▎| 344078/371472 [5:12:41<2:09:39, 3.52it/s] 93%|█████████▎| 344079/371472 [5:12:41<2:05:20, 3.64it/s] 93%|█████████▎| 344080/371472 [5:12:42<2:04:45, 3.66it/s] {'loss': 2.623, 'learning_rate': 1.6640091361589347e-07, 'epoch': 14.82} + 93%|█████████▎| 344080/371472 [5:12:42<2:04:45, 3.66it/s] 93%|█���███████▎| 344081/371472 [5:12:42<2:03:24, 3.70it/s] 93%|█████████▎| 344082/371472 [5:12:42<2:01:23, 3.76it/s] 93%|█████████▎| 344083/371472 [5:12:42<2:00:08, 3.80it/s] 93%|█████████▎| 344084/371472 [5:12:43<2:03:47, 3.69it/s] 93%|█████████▎| 344085/371472 [5:12:43<2:00:45, 3.78it/s] 93%|█████████▎| 344086/371472 [5:12:43<2:03:52, 3.68it/s] 93%|█████████▎| 344087/371472 [5:12:44<2:06:42, 3.60it/s] 93%|█████████▎| 344088/371472 [5:12:44<2:03:50, 3.69it/s] 93%|█████████▎| 344089/371472 [5:12:44<2:13:59, 3.41it/s] 93%|█████████▎| 344090/371472 [5:12:44<2:16:30, 3.34it/s] 93%|█████████▎| 344091/371472 [5:12:45<2:13:02, 3.43it/s] 93%|█████████▎| 344092/371472 [5:12:45<2:20:59, 3.24it/s] 93%|█████████▎| 344093/371472 [5:12:45<2:15:01, 3.38it/s] 93%|█████████▎| 344094/371472 [5:12:46<2:10:48, 3.49it/s] 93%|█████████▎| 344095/371472 [5:12:46<2:27:39, 3.09it/s] 93%|█████████▎| 344096/371472 [5:12:46<2:29:34, 3.05it/s] 93%|█████████▎| 344097/371472 [5:12:47<2:19:06, 3.28it/s] 93%|█████████▎| 344098/371472 [5:12:47<2:22:58, 3.19it/s] 93%|█████████▎| 344099/371472 [5:12:47<2:19:13, 3.28it/s] 93%|█████████▎| 344100/371472 [5:12:48<2:13:39, 3.41it/s] {'loss': 2.594, 'learning_rate': 1.6635243164041454e-07, 'epoch': 14.82} + 93%|█████████▎| 344100/371472 [5:12:48<2:13:39, 3.41it/s] 93%|█████████▎| 344101/371472 [5:12:48<2:11:38, 3.47it/s] 93%|█████████▎| 344102/371472 [5:12:48<2:12:11, 3.45it/s] 93%|█████████▎| 344103/371472 [5:12:48<2:06:27, 3.61it/s] 93%|█████████▎| 344104/371472 [5:12:49<2:05:12, 3.64it/s] 93%|█████████▎| 344105/371472 [5:12:49<2:00:50, 3.77it/s] 93%|█████████▎| 344106/371472 [5:12:49<2:09:32, 3.52it/s] 93%|█████████▎| 344107/371472 [5:12:49<2:09:51, 3.51it/s] 93%|█████████▎| 344108/371472 [5:12:50<2:07:01, 3.59it/s] 93%|█████████▎| 344109/371472 [5:12:50<2:05:16, 3.64it/s] 93%|█████████▎| 344110/371472 [5:12:50<2:05:13, 3.64it/s] 93%|█████████▎| 344111/371472 [5:12:51<2:06:00, 3.62it/s] 93%|█████████▎| 344112/371472 [5:12:51<2:04:30, 3.66it/s] 93%|█████████▎| 344113/371472 [5:12:51<2:09:43, 3.52it/s] 93%|█████████▎| 344114/371472 [5:12:51<2:16:34, 3.34it/s] 93%|█████████▎| 344115/371472 [5:12:52<2:21:40, 3.22it/s] 93%|█████████▎| 344116/371472 [5:12:52<2:20:57, 3.23it/s] 93%|█████████▎| 344117/371472 [5:12:52<2:21:40, 3.22it/s] 93%|█████████▎| 344118/371472 [5:12:53<2:17:35, 3.31it/s] 93%|█████████▎| 344119/371472 [5:12:53<2:16:35, 3.34it/s] 93%|█████████▎| 344120/371472 [5:12:53<2:15:20, 3.37it/s] {'loss': 2.568, 'learning_rate': 1.663039496649357e-07, 'epoch': 14.82} + 93%|█████████▎| 344120/371472 [5:12:53<2:15:20, 3.37it/s] 93%|█████████▎| 344121/371472 [5:12:54<2:12:48, 3.43it/s] 93%|█████████▎| 344122/371472 [5:12:54<2:13:50, 3.41it/s] 93%|█████████▎| 344123/371472 [5:12:54<2:20:24, 3.25it/s] 93%|█████████▎| 344124/371472 [5:12:54<2:17:25, 3.32it/s] 93%|█████████▎| 344125/371472 [5:12:55<2:15:31, 3.36it/s] 93%|█████████▎| 344126/371472 [5:12:55<2:09:34, 3.52it/s] 93%|█████████▎| 344127/371472 [5:12:55<2:11:36, 3.46it/s] 93%|█████████▎| 344128/371472 [5:12:56<2:13:49, 3.41it/s] 93%|█████████▎| 344129/371472 [5:12:56<2:16:39, 3.33it/s] 93%|█████████▎| 344130/371472 [5:12:56<2:13:51, 3.40it/s] 93%|█████████▎| 344131/371472 [5:12:57<2:11:31, 3.46it/s] 93%|█████████▎| 344132/371472 [5:12:57<2:04:28, 3.66it/s] 93%|█████████▎| 344133/371472 [5:12:57<2:27:26, 3.09it/s] 93%|█████████▎| 344134/371472 [5:12:57<2:23:19, 3.18it/s] 93%|█████████▎| 344135/371472 [5:12:58<2:28:36, 3.07it/s] 93%|█████████▎| 344136/371472 [5:12:58<2:26:37, 3.11it/s] 93%|█████████▎| 344137/371472 [5:12:58<2:23:04, 3.18it/s] 93%|█████████▎| 344138/371472 [5:12:59<2:15:06, 3.37it/s] 93%|█████████▎| 344139/371472 [5:12:59<2:08:47, 3.54it/s] 93%|█████████▎| 344140/371472 [5:12:59<2:14:27, 3.39it/s] {'loss': 2.6965, 'learning_rate': 1.6625546768945674e-07, 'epoch': 14.82} + 93%|█████████▎| 344140/371472 [5:12:59<2:14:27, 3.39it/s] 93%|█████████▎| 344141/371472 [5:13:00<2:10:37, 3.49it/s] 93%|█████████▎| 344142/371472 [5:13:00<2:22:18, 3.20it/s] 93%|█████████▎| 344143/371472 [5:13:00<2:15:16, 3.37it/s] 93%|█████████▎| 344144/371472 [5:13:00<2:09:59, 3.50it/s] 93%|█████████▎| 344145/371472 [5:13:01<2:12:31, 3.44it/s] 93%|█████████▎| 344146/371472 [5:13:01<2:11:46, 3.46it/s] 93%|█████████▎| 344147/371472 [5:13:01<2:12:57, 3.43it/s] 93%|█████████▎| 344148/371472 [5:13:02<2:10:25, 3.49it/s] 93%|█████████▎| 344149/371472 [5:13:02<2:05:11, 3.64it/s] 93%|█████████▎| 344150/371472 [5:13:02<2:05:15, 3.64it/s] 93%|█████████▎| 344151/371472 [5:13:02<2:08:28, 3.54it/s] 93%|█████████▎| 344152/371472 [5:13:03<2:09:31, 3.52it/s] 93%|█████████▎| 344153/371472 [5:13:03<2:12:42, 3.43it/s] 93%|█████████▎| 344154/371472 [5:13:03<2:12:08, 3.45it/s] 93%|█████████▎| 344155/371472 [5:13:04<2:10:00, 3.50it/s] 93%|█████████▎| 344156/371472 [5:13:04<2:09:48, 3.51it/s] 93%|█████████▎| 344157/371472 [5:13:04<2:11:35, 3.46it/s] 93%|█████████▎| 344158/371472 [5:13:05<2:19:20, 3.27it/s] 93%|█████████▎| 344159/371472 [5:13:05<2:14:59, 3.37it/s] 93%|█████████▎| 344160/371472 [5:13:05<2:10:48, 3.48it/s] {'loss': 2.4886, 'learning_rate': 1.6620698571397791e-07, 'epoch': 14.82} + 93%|█████████▎| 344160/371472 [5:13:05<2:10:48, 3.48it/s] 93%|█████████▎| 344161/371472 [5:13:05<2:06:42, 3.59it/s] 93%|█████████▎| 344162/371472 [5:13:06<2:05:59, 3.61it/s] 93%|█████████▎| 344163/371472 [5:13:06<2:09:51, 3.50it/s] 93%|█████████▎| 344164/371472 [5:13:06<2:09:21, 3.52it/s] 93%|█████████▎| 344165/371472 [5:13:06<2:07:33, 3.57it/s] 93%|█████████▎| 344166/371472 [5:13:07<2:11:50, 3.45it/s] 93%|█████████▎| 344167/371472 [5:13:07<2:06:14, 3.60it/s] 93%|█████████▎| 344168/371472 [5:13:07<2:04:20, 3.66it/s] 93%|█████████▎| 344169/371472 [5:13:08<2:03:43, 3.68it/s] 93%|█████████▎| 344170/371472 [5:13:08<2:01:18, 3.75it/s] 93%|█████████▎| 344171/371472 [5:13:08<2:10:52, 3.48it/s] 93%|█████████▎| 344172/371472 [5:13:08<2:10:17, 3.49it/s] 93%|█████████▎| 344173/371472 [5:13:09<2:07:43, 3.56it/s] 93%|█████████▎| 344174/371472 [5:13:09<2:02:56, 3.70it/s] 93%|█████████▎| 344175/371472 [5:13:09<2:04:40, 3.65it/s] 93%|█████████▎| 344176/371472 [5:13:09<2:03:55, 3.67it/s] 93%|█████████▎| 344177/371472 [5:13:10<1:59:16, 3.81it/s] 93%|█████████▎| 344178/371472 [5:13:10<2:02:08, 3.72it/s] 93%|█████████▎| 344179/371472 [5:13:10<2:04:38, 3.65it/s] 93%|█████████▎| 344180/371472 [5:13:11<2:06:25, 3.60it/s] {'loss': 2.8566, 'learning_rate': 1.6615850373849899e-07, 'epoch': 14.82} + 93%|█████████▎| 344180/371472 [5:13:11<2:06:25, 3.60it/s] 93%|█████████▎| 344181/371472 [5:13:11<2:06:29, 3.60it/s] 93%|█████████▎| 344182/371472 [5:13:11<2:03:18, 3.69it/s] 93%|█████████▎| 344183/371472 [5:13:11<2:04:25, 3.66it/s] 93%|█████████▎| 344184/371472 [5:13:12<2:03:16, 3.69it/s] 93%|█████████▎| 344185/371472 [5:13:12<2:04:04, 3.67it/s] 93%|█████████▎| 344186/371472 [5:13:12<2:04:43, 3.65it/s] 93%|█████████▎| 344187/371472 [5:13:12<2:04:13, 3.66it/s] 93%|█████████▎| 344188/371472 [5:13:13<2:01:16, 3.75it/s] 93%|█████████▎| 344189/371472 [5:13:13<2:00:40, 3.77it/s] 93%|█████████▎| 344190/371472 [5:13:13<2:04:22, 3.66it/s] 93%|█████████▎| 344191/371472 [5:13:14<2:01:19, 3.75it/s] 93%|█████████▎| 344192/371472 [5:13:14<2:00:02, 3.79it/s] 93%|█████████▎| 344193/371472 [5:13:14<2:00:24, 3.78it/s] 93%|█████████▎| 344194/371472 [5:13:14<1:59:46, 3.80it/s] 93%|█████████▎| 344195/371472 [5:13:15<2:07:36, 3.56it/s] 93%|█████████▎| 344196/371472 [5:13:15<2:08:32, 3.54it/s] 93%|█████████▎| 344197/371472 [5:13:15<2:03:41, 3.68it/s] 93%|█████████▎| 344198/371472 [5:13:15<2:02:06, 3.72it/s] 93%|█████████▎| 344199/371472 [5:13:16<2:09:21, 3.51it/s] 93%|█████████▎| 344200/371472 [5:13:16<2:08:10, 3.55it/s] {'loss': 2.5231, 'learning_rate': 1.661100217630201e-07, 'epoch': 14.83} + 93%|█████████▎| 344200/371472 [5:13:16<2:08:10, 3.55it/s] 93%|█████████▎| 344201/371472 [5:13:16<2:07:15, 3.57it/s] 93%|█████████▎| 344202/371472 [5:13:17<2:11:24, 3.46it/s] 93%|█████████▎| 344203/371472 [5:13:17<2:10:46, 3.48it/s] 93%|█████████▎| 344204/371472 [5:13:17<2:06:11, 3.60it/s] 93%|█████████▎| 344205/371472 [5:13:17<2:04:41, 3.64it/s] 93%|█████████▎| 344206/371472 [5:13:18<2:02:25, 3.71it/s] 93%|█████████▎| 344207/371472 [5:13:18<1:59:52, 3.79it/s] 93%|█████████▎| 344208/371472 [5:13:18<1:57:24, 3.87it/s] 93%|█████████▎| 344209/371472 [5:13:18<1:58:55, 3.82it/s] 93%|█████████▎| 344210/371472 [5:13:19<1:59:07, 3.81it/s] 93%|█████████▎| 344211/371472 [5:13:19<2:02:51, 3.70it/s] 93%|█████████▎| 344212/371472 [5:13:19<1:59:56, 3.79it/s] 93%|█████████▎| 344213/371472 [5:13:20<2:12:25, 3.43it/s] 93%|█████████▎| 344214/371472 [5:13:20<2:10:48, 3.47it/s] 93%|█████████▎| 344215/371472 [5:13:20<2:04:41, 3.64it/s] 93%|█████████▎| 344216/371472 [5:13:20<2:05:18, 3.63it/s] 93%|█████████▎| 344217/371472 [5:13:21<2:06:07, 3.60it/s] 93%|█████████▎| 344218/371472 [5:13:21<2:01:29, 3.74it/s] 93%|█████████▎| 344219/371472 [5:13:21<2:05:14, 3.63it/s] 93%|█████████▎| 344220/371472 [5:13:21<2:02:37, 3.70it/s] {'loss': 2.581, 'learning_rate': 1.6606153978754118e-07, 'epoch': 14.83} + 93%|█████████▎| 344220/371472 [5:13:21<2:02:37, 3.70it/s] 93%|█████████▎| 344221/371472 [5:13:22<2:06:25, 3.59it/s] 93%|█████████▎| 344222/371472 [5:13:22<2:03:13, 3.69it/s] 93%|█████████▎| 344223/371472 [5:13:22<2:02:39, 3.70it/s] 93%|█████████▎| 344224/371472 [5:13:23<2:04:00, 3.66it/s] 93%|█████████▎| 344225/371472 [5:13:23<2:05:52, 3.61it/s] 93%|█████████▎| 344226/371472 [5:13:23<2:01:08, 3.75it/s] 93%|█████████▎| 344227/371472 [5:13:23<2:02:53, 3.69it/s] 93%|█████████▎| 344228/371472 [5:13:24<2:15:37, 3.35it/s] 93%|█████████▎| 344229/371472 [5:13:24<2:10:01, 3.49it/s] 93%|█████████▎| 344230/371472 [5:13:24<2:17:51, 3.29it/s] 93%|█████████▎| 344231/371472 [5:13:25<2:11:13, 3.46it/s] 93%|█████████▎| 344232/371472 [5:13:25<2:16:02, 3.34it/s] 93%|█████████▎| 344233/371472 [5:13:25<2:20:47, 3.22it/s] 93%|█████████▎| 344234/371472 [5:13:26<2:14:53, 3.37it/s] 93%|█████████▎| 344235/371472 [5:13:26<2:15:27, 3.35it/s] 93%|█████████▎| 344236/371472 [5:13:26<2:17:14, 3.31it/s] 93%|█████████▎| 344237/371472 [5:13:26<2:11:45, 3.45it/s] 93%|█████████▎| 344238/371472 [5:13:27<2:07:47, 3.55it/s] 93%|█████████▎| 344239/371472 [5:13:27<2:03:41, 3.67it/s] 93%|█████████▎| 344240/371472 [5:13:27<2:12:16, 3.43it/s] {'loss': 2.794, 'learning_rate': 1.6601305781206236e-07, 'epoch': 14.83} + 93%|█████████▎| 344240/371472 [5:13:27<2:12:16, 3.43it/s] 93%|█████████▎| 344241/371472 [5:13:28<2:08:21, 3.54it/s] 93%|█████████▎| 344242/371472 [5:13:28<2:13:48, 3.39it/s] 93%|█████████▎| 344243/371472 [5:13:28<2:14:02, 3.39it/s] 93%|█████████▎| 344244/371472 [5:13:28<2:24:01, 3.15it/s] 93%|█████████▎| 344245/371472 [5:13:29<2:22:05, 3.19it/s] 93%|█████████▎| 344246/371472 [5:13:29<2:19:53, 3.24it/s] 93%|█████████▎| 344247/371472 [5:13:29<2:13:58, 3.39it/s] 93%|█████████▎| 344248/371472 [5:13:30<2:16:21, 3.33it/s] 93%|█████████▎| 344249/371472 [5:13:30<2:12:50, 3.42it/s] 93%|█████████▎| 344250/371472 [5:13:30<2:10:37, 3.47it/s] 93%|█████████▎| 344251/371472 [5:13:30<2:05:11, 3.62it/s] 93%|█████████▎| 344252/371472 [5:13:31<2:02:37, 3.70it/s] 93%|█████████▎| 344253/371472 [5:13:31<2:01:26, 3.74it/s] 93%|█████████▎| 344254/371472 [5:13:31<2:01:32, 3.73it/s] 93%|█████████▎| 344255/371472 [5:13:32<2:10:15, 3.48it/s] 93%|█████████▎| 344256/371472 [5:13:32<2:07:17, 3.56it/s] 93%|█████████▎| 344257/371472 [5:13:32<2:13:52, 3.39it/s] 93%|█████████▎| 344258/371472 [5:13:32<2:09:09, 3.51it/s] 93%|█████████▎| 344259/371472 [5:13:33<2:04:14, 3.65it/s] 93%|█████████▎| 344260/371472 [5:13:33<2:13:32, 3.40it/s] {'loss': 2.4604, 'learning_rate': 1.659645758365834e-07, 'epoch': 14.83} + 93%|█████████▎| 344260/371472 [5:13:33<2:13:32, 3.40it/s] 93%|█████████▎| 344261/371472 [5:13:33<2:11:51, 3.44it/s] 93%|█████████▎| 344262/371472 [5:13:34<2:08:19, 3.53it/s] 93%|█████████▎| 344263/371472 [5:13:34<2:06:54, 3.57it/s] 93%|█████████▎| 344264/371472 [5:13:34<2:05:08, 3.62it/s] 93%|█████████▎| 344265/371472 [5:13:34<2:04:29, 3.64it/s] 93%|█████████▎| 344266/371472 [5:13:35<2:04:43, 3.64it/s] 93%|█████████▎| 344267/371472 [5:13:35<2:04:37, 3.64it/s] 93%|█████████▎| 344268/371472 [5:13:35<2:06:28, 3.59it/s] 93%|█████████▎| 344269/371472 [5:13:36<2:06:50, 3.57it/s] 93%|█████████▎| 344270/371472 [5:13:36<2:03:04, 3.68it/s] 93%|█████████▎| 344271/371472 [5:13:36<2:03:13, 3.68it/s] 93%|█████████▎| 344272/371472 [5:13:36<2:03:29, 3.67it/s] 93%|█████████▎| 344273/371472 [5:13:37<2:09:40, 3.50it/s] 93%|█████████▎| 344274/371472 [5:13:37<2:06:31, 3.58it/s] 93%|█████████▎| 344275/371472 [5:13:37<2:02:53, 3.69it/s] 93%|█████████▎| 344276/371472 [5:13:37<2:00:30, 3.76it/s] 93%|█████████▎| 344277/371472 [5:13:38<2:02:57, 3.69it/s] 93%|█████████▎| 344278/371472 [5:13:38<2:03:24, 3.67it/s] 93%|█████████▎| 344279/371472 [5:13:38<2:03:11, 3.68it/s] 93%|█████████▎| 344280/371472 [5:13:38<2:02:23, 3.70it/s] {'loss': 2.663, 'learning_rate': 1.6591609386110455e-07, 'epoch': 14.83} + 93%|█████████▎| 344280/371472 [5:13:39<2:02:23, 3.70it/s] 93%|█████████▎| 344281/371472 [5:13:39<2:06:14, 3.59it/s] 93%|█████████▎| 344282/371472 [5:13:39<2:06:09, 3.59it/s] 93%|█████████▎| 344283/371472 [5:13:39<2:06:52, 3.57it/s] 93%|█████████▎| 344284/371472 [5:13:40<2:02:56, 3.69it/s] 93%|█████████▎| 344285/371472 [5:13:40<2:15:22, 3.35it/s] 93%|█████████▎| 344286/371472 [5:13:40<2:10:28, 3.47it/s] 93%|█████████▎| 344287/371472 [5:13:41<2:16:14, 3.33it/s] 93%|█████████▎| 344288/371472 [5:13:41<2:14:10, 3.38it/s] 93%|█████████▎| 344289/371472 [5:13:41<2:13:10, 3.40it/s] 93%|█████████▎| 344290/371472 [5:13:41<2:08:51, 3.52it/s] 93%|█████████▎| 344291/371472 [5:13:42<2:10:38, 3.47it/s] 93%|█████████▎| 344292/371472 [5:13:42<2:13:06, 3.40it/s] 93%|█████████▎| 344293/371472 [5:13:42<2:09:31, 3.50it/s] 93%|█████████▎| 344294/371472 [5:13:43<2:22:18, 3.18it/s] 93%|█████████▎| 344295/371472 [5:13:43<2:23:57, 3.15it/s] 93%|█████████▎| 344296/371472 [5:13:43<2:21:28, 3.20it/s] 93%|█████████▎| 344297/371472 [5:13:44<2:24:42, 3.13it/s] 93%|█████████▎| 344298/371472 [5:13:44<2:22:53, 3.17it/s] 93%|█████████▎| 344299/371472 [5:13:44<2:22:56, 3.17it/s] 93%|█████████▎| 344300/371472 [5:13:44<2:14:08, 3.38it/s] {'loss': 2.6029, 'learning_rate': 1.6586761188562563e-07, 'epoch': 14.83} + 93%|█████████▎| 344300/371472 [5:13:44<2:14:08, 3.38it/s] 93%|█████████▎| 344301/371472 [5:13:45<2:09:10, 3.51it/s] 93%|█████████▎| 344302/371472 [5:13:45<2:07:00, 3.57it/s] 93%|█████████▎| 344303/371472 [5:13:45<2:05:52, 3.60it/s] 93%|█████████▎| 344304/371472 [5:13:46<2:28:25, 3.05it/s] 93%|█████████▎| 344305/371472 [5:13:46<2:23:00, 3.17it/s] 93%|█████████▎| 344306/371472 [5:13:46<2:15:59, 3.33it/s] 93%|█████████▎| 344307/371472 [5:13:47<2:09:54, 3.49it/s] 93%|█████████▎| 344308/371472 [5:13:47<2:06:47, 3.57it/s] 93%|█████████▎| 344309/371472 [5:13:47<2:22:03, 3.19it/s] 93%|█████████▎| 344310/371472 [5:13:47<2:14:39, 3.36it/s] 93%|█████████▎| 344311/371472 [5:13:48<2:10:19, 3.47it/s] 93%|█████████▎| 344312/371472 [5:13:48<2:05:34, 3.60it/s] 93%|█████████▎| 344313/371472 [5:13:48<2:04:17, 3.64it/s] 93%|█████████▎| 344314/371472 [5:13:49<2:03:04, 3.68it/s] 93%|█████████▎| 344315/371472 [5:13:49<2:08:41, 3.52it/s] 93%|█████████▎| 344316/371472 [5:13:49<2:03:00, 3.68it/s] 93%|█████████▎| 344317/371472 [5:13:49<2:01:20, 3.73it/s] 93%|█████████▎| 344318/371472 [5:13:50<2:03:38, 3.66it/s] 93%|█████████▎| 344319/371472 [5:13:50<2:07:38, 3.55it/s] 93%|█████████▎| 344320/371472 [5:13:50<2:04:04, 3.65it/s] {'loss': 2.603, 'learning_rate': 1.6581912991014678e-07, 'epoch': 14.83} + 93%|█████████▎| 344320/371472 [5:13:50<2:04:04, 3.65it/s] 93%|█████████▎| 344321/371472 [5:13:50<2:02:03, 3.71it/s] 93%|█████████▎| 344322/371472 [5:13:51<2:01:00, 3.74it/s] 93%|█████████▎| 344323/371472 [5:13:51<2:10:48, 3.46it/s] 93%|█████████▎| 344324/371472 [5:13:51<2:06:01, 3.59it/s] 93%|█████████▎| 344325/371472 [5:13:52<2:11:32, 3.44it/s] 93%|█████████▎| 344326/371472 [5:13:52<2:07:22, 3.55it/s] 93%|█████████▎| 344327/371472 [5:13:52<2:02:05, 3.71it/s] 93%|█████████▎| 344328/371472 [5:13:52<1:58:44, 3.81it/s] 93%|█████████▎| 344329/371472 [5:13:53<2:01:33, 3.72it/s] 93%|█████████▎| 344330/371472 [5:13:53<2:04:11, 3.64it/s] 93%|█████████▎| 344331/371472 [5:13:53<2:01:25, 3.73it/s] 93%|█████████▎| 344332/371472 [5:13:53<2:01:18, 3.73it/s] 93%|█████████▎| 344333/371472 [5:13:54<2:00:45, 3.75it/s] 93%|█████████▎| 344334/371472 [5:13:54<1:59:47, 3.78it/s] 93%|█████████▎| 344335/371472 [5:13:54<2:03:28, 3.66it/s] 93%|█████████▎| 344336/371472 [5:13:55<2:03:08, 3.67it/s] 93%|█████████▎| 344337/371472 [5:13:55<2:19:06, 3.25it/s] 93%|█████████▎| 344338/371472 [5:13:55<2:10:30, 3.47it/s] 93%|█████████▎| 344339/371472 [5:13:55<2:12:20, 3.42it/s] 93%|█████████▎| 344340/371472 [5:13:56<2:08:21, 3.52it/s] {'loss': 2.5551, 'learning_rate': 1.6577064793466782e-07, 'epoch': 14.83} + 93%|█████████▎| 344340/371472 [5:13:56<2:08:21, 3.52it/s] 93%|█████████▎| 344341/371472 [5:13:56<2:05:01, 3.62it/s] 93%|█████████▎| 344342/371472 [5:13:56<2:03:02, 3.67it/s] 93%|█████████▎| 344343/371472 [5:13:57<2:04:05, 3.64it/s] 93%|█████████▎| 344344/371472 [5:13:57<2:07:44, 3.54it/s] 93%|█████████▎| 344345/371472 [5:13:57<2:06:12, 3.58it/s] 93%|█████████▎| 344346/371472 [5:13:57<2:02:06, 3.70it/s] 93%|█████████▎| 344347/371472 [5:13:58<2:01:12, 3.73it/s] 93%|█████████▎| 344348/371472 [5:13:58<2:00:32, 3.75it/s] 93%|█████████▎| 344349/371472 [5:13:58<2:05:16, 3.61it/s] 93%|█████████▎| 344350/371472 [5:13:59<2:25:05, 3.12it/s] 93%|█████████▎| 344351/371472 [5:13:59<2:19:24, 3.24it/s] 93%|█████████▎| 344352/371472 [5:13:59<2:11:37, 3.43it/s] 93%|█████████▎| 344353/371472 [5:13:59<2:08:53, 3.51it/s] 93%|█████████▎| 344354/371472 [5:14:00<2:10:54, 3.45it/s] 93%|█████████▎| 344355/371472 [5:14:00<2:09:43, 3.48it/s] 93%|█████████▎| 344356/371472 [5:14:00<2:12:14, 3.42it/s] 93%|█████████▎| 344357/371472 [5:14:01<2:11:54, 3.43it/s] 93%|█████████▎| 344358/371472 [5:14:01<2:13:04, 3.40it/s] 93%|█████████▎| 344359/371472 [5:14:01<2:14:51, 3.35it/s] 93%|█████████▎| 344360/371472 [5:14:02<2:27:38, 3.06it/s] {'loss': 2.7415, 'learning_rate': 1.65722165959189e-07, 'epoch': 14.83} + 93%|█████████▎| 344360/371472 [5:14:02<2:27:38, 3.06it/s] 93%|█████████▎| 344361/371472 [5:14:02<2:23:38, 3.15it/s] 93%|█████████▎| 344362/371472 [5:14:02<2:15:36, 3.33it/s] 93%|█████████▎| 344363/371472 [5:14:02<2:14:03, 3.37it/s] 93%|█████████▎| 344364/371472 [5:14:03<2:11:35, 3.43it/s] 93%|█████████▎| 344365/371472 [5:14:03<2:07:32, 3.54it/s] 93%|█████████▎| 344366/371472 [5:14:03<2:13:22, 3.39it/s] 93%|█████████▎| 344367/371472 [5:14:04<2:14:11, 3.37it/s] 93%|█████████▎| 344368/371472 [5:14:04<2:13:07, 3.39it/s] 93%|█████████▎| 344369/371472 [5:14:04<2:13:05, 3.39it/s] 93%|█████████▎| 344370/371472 [5:14:04<2:07:58, 3.53it/s] 93%|█████████▎| 344371/371472 [5:14:05<2:07:36, 3.54it/s] 93%|█████████▎| 344372/371472 [5:14:05<2:21:06, 3.20it/s] 93%|█████████▎| 344373/371472 [5:14:05<2:10:57, 3.45it/s] 93%|█████████▎| 344374/371472 [5:14:06<2:10:49, 3.45it/s] 93%|█████████▎| 344375/371472 [5:14:06<2:07:35, 3.54it/s] 93%|█████████▎| 344376/371472 [5:14:06<2:10:32, 3.46it/s] 93%|█████████▎| 344377/371472 [5:14:06<2:11:17, 3.44it/s] 93%|█████████▎| 344378/371472 [5:14:07<2:09:44, 3.48it/s] 93%|█████████▎| 344379/371472 [5:14:07<2:08:57, 3.50it/s] 93%|█████████▎| 344380/371472 [5:14:07<2:05:04, 3.61it/s] {'loss': 2.5495, 'learning_rate': 1.6567368398371004e-07, 'epoch': 14.83} + 93%|█████████▎| 344380/371472 [5:14:07<2:05:04, 3.61it/s] 93%|█████████▎| 344381/371472 [5:14:08<2:08:00, 3.53it/s] 93%|█████████▎| 344382/371472 [5:14:08<2:07:34, 3.54it/s] 93%|█████████▎| 344383/371472 [5:14:08<2:12:32, 3.41it/s] 93%|█████████▎| 344384/371472 [5:14:09<2:12:59, 3.39it/s] 93%|█████████▎| 344385/371472 [5:14:09<2:09:35, 3.48it/s] 93%|█████████▎| 344386/371472 [5:14:09<2:07:56, 3.53it/s] 93%|█████████▎| 344387/371472 [5:14:10<2:53:14, 2.61it/s] 93%|█████████▎| 344388/371472 [5:14:10<2:36:19, 2.89it/s] 93%|█████████▎| 344389/371472 [5:14:10<2:34:46, 2.92it/s] 93%|█████████▎| 344390/371472 [5:14:11<2:28:14, 3.04it/s] 93%|█████████▎| 344391/371472 [5:14:11<2:30:53, 2.99it/s] 93%|█████████▎| 344392/371472 [5:14:11<2:20:52, 3.20it/s] 93%|█████████▎| 344393/371472 [5:14:11<2:16:15, 3.31it/s] 93%|█████████▎| 344394/371472 [5:14:12<2:18:27, 3.26it/s] 93%|█████████▎| 344395/371472 [5:14:12<2:11:54, 3.42it/s] 93%|█████████▎| 344396/371472 [5:14:12<2:07:18, 3.54it/s] 93%|█████████▎| 344397/371472 [5:14:13<2:17:08, 3.29it/s] 93%|█████████▎| 344398/371472 [5:14:13<2:15:13, 3.34it/s] 93%|█████████▎| 344399/371472 [5:14:13<2:08:02, 3.52it/s] 93%|█████████▎| 344400/371472 [5:14:13<2:11:54, 3.42it/s] {'loss': 2.5892, 'learning_rate': 1.656252020082312e-07, 'epoch': 14.83} + 93%|█████████▎| 344400/371472 [5:14:13<2:11:54, 3.42it/s] 93%|█████████▎| 344401/371472 [5:14:14<2:07:35, 3.54it/s] 93%|█████████▎| 344402/371472 [5:14:14<2:11:55, 3.42it/s] 93%|█████████▎| 344403/371472 [5:14:14<2:09:37, 3.48it/s] 93%|█████████▎| 344404/371472 [5:14:15<2:14:14, 3.36it/s] 93%|█████████▎| 344405/371472 [5:14:15<2:10:21, 3.46it/s] 93%|█████████▎| 344406/371472 [5:14:15<2:10:33, 3.46it/s] 93%|█████████▎| 344407/371472 [5:14:16<2:17:26, 3.28it/s] 93%|█████████▎| 344408/371472 [5:14:16<2:09:43, 3.48it/s] 93%|█████████▎| 344409/371472 [5:14:16<2:05:59, 3.58it/s] 93%|█████████▎| 344410/371472 [5:14:16<2:03:24, 3.65it/s] 93%|█████████▎| 344411/371472 [5:14:17<2:05:19, 3.60it/s] 93%|█████████▎| 344412/371472 [5:14:17<2:10:54, 3.45it/s] 93%|█████████▎| 344413/371472 [5:14:17<2:09:24, 3.48it/s] 93%|█████████▎| 344414/371472 [5:14:17<2:05:38, 3.59it/s] 93%|█████████▎| 344415/371472 [5:14:18<2:02:36, 3.68it/s] 93%|█████████▎| 344416/371472 [5:14:18<2:07:09, 3.55it/s] 93%|█████████▎| 344417/371472 [5:14:18<2:08:18, 3.51it/s] 93%|█████████▎| 344418/371472 [5:14:19<2:11:25, 3.43it/s] 93%|█████████▎| 344419/371472 [5:14:19<2:16:36, 3.30it/s] 93%|█████████▎| 344420/371472 [5:14:19<2:21:13, 3.19it/s] {'loss': 2.6541, 'learning_rate': 1.6557672003275227e-07, 'epoch': 14.83} + 93%|█████████▎| 344420/371472 [5:14:19<2:21:13, 3.19it/s] 93%|█████████▎| 344421/371472 [5:14:20<2:16:17, 3.31it/s] 93%|█████████▎| 344422/371472 [5:14:20<2:21:35, 3.18it/s] 93%|█████████▎| 344423/371472 [5:14:20<2:14:48, 3.34it/s] 93%|█████████▎| 344424/371472 [5:14:20<2:05:49, 3.58it/s] 93%|█████████▎| 344425/371472 [5:14:21<2:17:03, 3.29it/s] 93%|█████████▎| 344426/371472 [5:14:21<2:10:18, 3.46it/s] 93%|█████████▎| 344427/371472 [5:14:21<2:06:33, 3.56it/s] 93%|█████████▎| 344428/371472 [5:14:22<2:05:40, 3.59it/s] 93%|█████████▎| 344429/371472 [5:14:22<2:00:00, 3.76it/s] 93%|█████████▎| 344430/371472 [5:14:22<2:04:35, 3.62it/s] 93%|█████████▎| 344431/371472 [5:14:22<2:06:39, 3.56it/s] 93%|█████████▎| 344432/371472 [5:14:23<2:02:59, 3.66it/s] 93%|█████████▎| 344433/371472 [5:14:23<2:02:30, 3.68it/s] 93%|█████████▎| 344434/371472 [5:14:23<2:01:10, 3.72it/s] 93%|█████████▎| 344435/371472 [5:14:24<2:10:49, 3.44it/s] 93%|█████████▎| 344436/371472 [5:14:24<2:19:18, 3.23it/s] 93%|█████████▎| 344437/371472 [5:14:24<2:12:23, 3.40it/s] 93%|█████████▎| 344438/371472 [5:14:24<2:11:34, 3.42it/s] 93%|█████████▎| 344439/371472 [5:14:25<2:08:29, 3.51it/s] 93%|█████████▎| 344440/371472 [5:14:25<2:03:32, 3.65it/s] {'loss': 2.5401, 'learning_rate': 1.655282380572733e-07, 'epoch': 14.84} + 93%|█████████▎| 344440/371472 [5:14:25<2:03:32, 3.65it/s] 93%|█████████▎| 344441/371472 [5:14:25<2:10:35, 3.45it/s] 93%|█████████▎| 344442/371472 [5:14:26<2:14:00, 3.36it/s] 93%|█████████▎| 344443/371472 [5:14:26<2:07:50, 3.52it/s] 93%|█████████▎| 344444/371472 [5:14:26<2:04:01, 3.63it/s] 93%|█████████▎| 344445/371472 [5:14:26<2:04:21, 3.62it/s] 93%|█████████▎| 344446/371472 [5:14:27<2:01:33, 3.71it/s] 93%|█████████▎| 344447/371472 [5:14:27<2:03:02, 3.66it/s] 93%|█████████▎| 344448/371472 [5:14:27<2:01:28, 3.71it/s] 93%|█████████▎| 344449/371472 [5:14:27<2:01:55, 3.69it/s] 93%|█████████▎| 344450/371472 [5:14:28<2:04:45, 3.61it/s] 93%|█████████▎| 344451/371472 [5:14:28<2:02:48, 3.67it/s] 93%|█████████▎| 344452/371472 [5:14:28<2:07:09, 3.54it/s] 93%|█████████▎| 344453/371472 [5:14:29<2:01:56, 3.69it/s] 93%|█████████▎| 344454/371472 [5:14:29<2:04:37, 3.61it/s] 93%|█████████▎| 344455/371472 [5:14:29<2:04:48, 3.61it/s] 93%|█████████▎| 344456/371472 [5:14:29<2:09:58, 3.46it/s] 93%|█████████▎| 344457/371472 [5:14:30<2:07:33, 3.53it/s] 93%|█████████▎| 344458/371472 [5:14:30<2:16:11, 3.31it/s] 93%|█████████▎| 344459/371472 [5:14:30<2:09:26, 3.48it/s] 93%|█████████▎| 344460/371472 [5:14:31<2:15:50, 3.31it/s] {'loss': 2.6194, 'learning_rate': 1.6547975608179446e-07, 'epoch': 14.84} + 93%|█████████▎| 344460/371472 [5:14:31<2:15:50, 3.31it/s] 93%|█████████▎| 344461/371472 [5:14:31<2:10:16, 3.46it/s] 93%|█████████▎| 344462/371472 [5:14:31<2:10:27, 3.45it/s] 93%|█████████▎| 344463/371472 [5:14:31<2:09:16, 3.48it/s] 93%|█████████▎| 344464/371472 [5:14:32<2:08:45, 3.50it/s] 93%|█████████▎| 344465/371472 [5:14:32<2:08:40, 3.50it/s] 93%|█████████▎| 344466/371472 [5:14:32<2:04:55, 3.60it/s] 93%|█████████▎| 344467/371472 [5:14:33<2:06:32, 3.56it/s] 93%|█████████▎| 344468/371472 [5:14:33<2:06:53, 3.55it/s] 93%|█████████▎| 344469/371472 [5:14:33<2:02:29, 3.67it/s] 93%|█████████▎| 344470/371472 [5:14:33<2:00:30, 3.73it/s] 93%|█████████▎| 344471/371472 [5:14:34<2:06:22, 3.56it/s] 93%|█████████▎| 344472/371472 [5:14:34<2:02:59, 3.66it/s] 93%|█████████▎| 344473/371472 [5:14:34<2:17:40, 3.27it/s] 93%|█████████▎| 344474/371472 [5:14:35<2:14:23, 3.35it/s] 93%|█████████▎| 344475/371472 [5:14:35<2:09:51, 3.47it/s] 93%|█████████▎| 344476/371472 [5:14:35<2:07:36, 3.53it/s] 93%|█████████▎| 344477/371472 [5:14:35<2:08:43, 3.49it/s] 93%|█████████▎| 344478/371472 [5:14:36<2:07:47, 3.52it/s] 93%|█████████▎| 344479/371472 [5:14:36<2:05:21, 3.59it/s] 93%|█████████▎| 344480/371472 [5:14:36<2:09:57, 3.46it/s] {'loss': 2.6601, 'learning_rate': 1.6543127410631553e-07, 'epoch': 14.84} + 93%|█████████▎| 344480/371472 [5:14:36<2:09:57, 3.46it/s] 93%|█████████▎| 344481/371472 [5:14:37<2:07:32, 3.53it/s] 93%|█████████▎| 344482/371472 [5:14:37<2:10:16, 3.45it/s] 93%|█████████▎| 344483/371472 [5:14:37<2:05:05, 3.60it/s] 93%|█████████▎| 344484/371472 [5:14:37<2:05:01, 3.60it/s] 93%|█████████▎| 344485/371472 [5:14:38<1:59:41, 3.76it/s] 93%|█████████▎| 344486/371472 [5:14:38<2:00:16, 3.74it/s] 93%|█████████▎| 344487/371472 [5:14:38<2:00:58, 3.72it/s] 93%|█████████▎| 344488/371472 [5:14:38<2:01:39, 3.70it/s] 93%|█████████▎| 344489/371472 [5:14:39<2:04:43, 3.61it/s] 93%|█████████▎| 344490/371472 [5:14:39<2:03:23, 3.64it/s] 93%|█████████▎| 344491/371472 [5:14:39<2:02:28, 3.67it/s] 93%|█████████▎| 344492/371472 [5:14:40<2:08:24, 3.50it/s] 93%|█████████▎| 344493/371472 [5:14:40<2:04:52, 3.60it/s] 93%|█████████▎| 344494/371472 [5:14:40<2:06:33, 3.55it/s] 93%|█████████▎| 344495/371472 [5:14:40<2:12:39, 3.39it/s] 93%|█████████▎| 344496/371472 [5:14:41<2:16:16, 3.30it/s] 93%|█████████▎| 344497/371472 [5:14:41<2:14:45, 3.34it/s] 93%|█████████▎| 344498/371472 [5:14:41<2:07:48, 3.52it/s] 93%|█████████▎| 344499/371472 [5:14:42<2:03:09, 3.65it/s] 93%|█████████▎| 344500/371472 [5:14:42<2:10:31, 3.44it/s] {'loss': 2.6342, 'learning_rate': 1.6538279213083669e-07, 'epoch': 14.84} + 93%|█████████▎| 344500/371472 [5:14:42<2:10:31, 3.44it/s] 93%|█████████▎| 344501/371472 [5:14:42<2:11:58, 3.41it/s] 93%|█████████▎| 344502/371472 [5:14:42<2:09:20, 3.48it/s] 93%|█████████▎| 344503/371472 [5:14:43<2:06:24, 3.56it/s] 93%|█████████▎| 344504/371472 [5:14:43<2:02:59, 3.65it/s] 93%|█████████▎| 344505/371472 [5:14:43<2:07:32, 3.52it/s] 93%|█████████▎| 344506/371472 [5:14:44<2:11:12, 3.43it/s] 93%|█████████▎| 344507/371472 [5:14:44<2:09:27, 3.47it/s] 93%|█████████▎| 344508/371472 [5:14:44<2:24:50, 3.10it/s] 93%|█████████▎| 344509/371472 [5:14:45<2:23:38, 3.13it/s] 93%|█████████▎| 344510/371472 [5:14:45<2:16:26, 3.29it/s] 93%|█████████▎| 344511/371472 [5:14:45<2:09:24, 3.47it/s] 93%|█████████▎| 344512/371472 [5:14:45<2:05:56, 3.57it/s] 93%|█████████▎| 344513/371472 [5:14:46<2:05:17, 3.59it/s] 93%|█████████▎| 344514/371472 [5:14:46<2:06:10, 3.56it/s] 93%|█████████▎| 344515/371472 [5:14:46<2:05:14, 3.59it/s] 93%|█████████▎| 344516/371472 [5:14:47<2:22:49, 3.15it/s] 93%|█████████▎| 344517/371472 [5:14:47<2:16:38, 3.29it/s] 93%|█████████▎| 344518/371472 [5:14:47<2:13:14, 3.37it/s] 93%|█████████▎| 344519/371472 [5:14:47<2:08:50, 3.49it/s] 93%|█████████▎| 344520/371472 [5:14:48<2:39:09, 2.82it/s] {'loss': 2.587, 'learning_rate': 1.6533431015535773e-07, 'epoch': 14.84} + 93%|█████████▎| 344520/371472 [5:14:48<2:39:09, 2.82it/s] 93%|█████████▎| 344521/371472 [5:14:48<2:34:34, 2.91it/s] 93%|█████████▎| 344522/371472 [5:14:49<2:23:24, 3.13it/s] 93%|█████████▎| 344523/371472 [5:14:49<2:21:45, 3.17it/s] 93%|█████████▎| 344524/371472 [5:14:49<2:15:15, 3.32it/s] 93%|█████████▎| 344525/371472 [5:14:49<2:15:11, 3.32it/s] 93%|█████████▎| 344526/371472 [5:14:50<2:07:28, 3.52it/s] 93%|█████████▎| 344527/371472 [5:14:50<2:09:41, 3.46it/s] 93%|█████████▎| 344528/371472 [5:14:50<2:14:39, 3.33it/s] 93%|█████████▎| 344529/371472 [5:14:51<2:07:01, 3.53it/s] 93%|█████████▎| 344530/371472 [5:14:51<2:08:04, 3.51it/s] 93%|█████████▎| 344531/371472 [5:14:51<2:14:33, 3.34it/s] 93%|█████████▎| 344532/371472 [5:14:51<2:06:16, 3.56it/s] 93%|█████████▎| 344533/371472 [5:14:52<2:07:47, 3.51it/s] 93%|█████████▎| 344534/371472 [5:14:52<2:05:43, 3.57it/s] 93%|█████████▎| 344535/371472 [5:14:52<2:03:19, 3.64it/s] 93%|█████████▎| 344536/371472 [5:14:52<2:03:06, 3.65it/s] 93%|█████████▎| 344537/371472 [5:14:53<2:02:18, 3.67it/s] 93%|█████████▎| 344538/371472 [5:14:53<2:01:42, 3.69it/s] 93%|█████████▎| 344539/371472 [5:14:53<2:03:37, 3.63it/s] 93%|█████████▎| 344540/371472 [5:14:54<2:05:22, 3.58it/s] {'loss': 2.5741, 'learning_rate': 1.652858281798789e-07, 'epoch': 14.84} + 93%|█████████▎| 344540/371472 [5:14:54<2:05:22, 3.58it/s] 93%|█████████▎| 344541/371472 [5:14:54<2:01:54, 3.68it/s] 93%|█████████▎| 344542/371472 [5:14:54<2:03:31, 3.63it/s] 93%|█████████▎| 344543/371472 [5:14:55<2:18:35, 3.24it/s] 93%|█████████▎| 344544/371472 [5:14:55<2:24:01, 3.12it/s] 93%|█████████▎| 344545/371472 [5:14:55<2:20:40, 3.19it/s] 93%|█████████▎| 344546/371472 [5:14:55<2:10:43, 3.43it/s] 93%|█████████▎| 344547/371472 [5:14:56<2:09:31, 3.46it/s] 93%|█████████▎| 344548/371472 [5:14:56<2:11:20, 3.42it/s] 93%|█████████▎| 344549/371472 [5:14:56<2:19:43, 3.21it/s] 93%|█████████▎| 344550/371472 [5:14:57<2:13:45, 3.35it/s] 93%|█████████▎| 344551/371472 [5:14:57<2:09:59, 3.45it/s] 93%|█████████▎| 344552/371472 [5:14:57<2:07:01, 3.53it/s] 93%|█████████▎| 344553/371472 [5:14:57<2:10:08, 3.45it/s] 93%|█████████▎| 344554/371472 [5:14:58<2:10:11, 3.45it/s] 93%|█████████▎| 344555/371472 [5:14:58<2:10:16, 3.44it/s] 93%|█████████▎| 344556/371472 [5:14:58<2:08:40, 3.49it/s] 93%|█████████▎| 344557/371472 [5:14:59<2:08:54, 3.48it/s] 93%|█████████▎| 344558/371472 [5:14:59<2:10:33, 3.44it/s] 93%|█████████▎| 344559/371472 [5:14:59<2:06:44, 3.54it/s] 93%|█████████▎| 344560/371472 [5:14:59<2:04:19, 3.61it/s] {'loss': 2.6947, 'learning_rate': 1.6523734620439998e-07, 'epoch': 14.84} + 93%|█████████▎| 344560/371472 [5:14:59<2:04:19, 3.61it/s] 93%|█████████▎| 344561/371472 [5:15:00<2:07:30, 3.52it/s] 93%|█████████▎| 344562/371472 [5:15:00<2:12:16, 3.39it/s] 93%|█████████▎| 344563/371472 [5:15:00<2:08:05, 3.50it/s] 93%|█████████▎| 344564/371472 [5:15:01<2:06:48, 3.54it/s] 93%|█████████▎| 344565/371472 [5:15:01<2:06:32, 3.54it/s] 93%|█████████▎| 344566/371472 [5:15:01<2:06:53, 3.53it/s] 93%|█████████▎| 344567/371472 [5:15:01<2:05:46, 3.57it/s] 93%|█████████▎| 344568/371472 [5:15:02<2:02:29, 3.66it/s] 93%|█████████▎| 344569/371472 [5:15:02<2:07:25, 3.52it/s] 93%|█████████▎| 344570/371472 [5:15:02<2:07:20, 3.52it/s] 93%|█████████▎| 344571/371472 [5:15:03<2:06:59, 3.53it/s] 93%|█████████▎| 344572/371472 [5:15:03<2:15:11, 3.32it/s] 93%|█████████▎| 344573/371472 [5:15:03<2:14:18, 3.34it/s] 93%|█████████▎| 344574/371472 [5:15:04<2:25:26, 3.08it/s] 93%|█████████▎| 344575/371472 [5:15:04<2:22:02, 3.16it/s] 93%|█████████▎| 344576/371472 [5:15:04<2:24:05, 3.11it/s] 93%|█████████▎| 344577/371472 [5:15:04<2:14:14, 3.34it/s] 93%|█████████▎| 344578/371472 [5:15:05<2:10:28, 3.44it/s] 93%|█████████▎| 344579/371472 [5:15:05<2:10:31, 3.43it/s] 93%|█████████▎| 344580/371472 [5:15:05<2:14:24, 3.33it/s] {'loss': 2.4015, 'learning_rate': 1.651888642289211e-07, 'epoch': 14.84} + 93%|█████████▎| 344580/371472 [5:15:05<2:14:24, 3.33it/s] 93%|█████████▎| 344581/371472 [5:15:06<2:09:53, 3.45it/s] 93%|█████████▎| 344582/371472 [5:15:06<2:06:20, 3.55it/s] 93%|█████████▎| 344583/371472 [5:15:06<2:09:43, 3.45it/s] 93%|█████████▎| 344584/371472 [5:15:06<2:09:20, 3.46it/s] 93%|█████████▎| 344585/371472 [5:15:07<2:05:22, 3.57it/s] 93%|█████████▎| 344586/371472 [5:15:07<2:07:09, 3.52it/s] 93%|█████████▎| 344587/371472 [5:15:07<2:13:07, 3.37it/s] 93%|█████████▎| 344588/371472 [5:15:08<2:07:40, 3.51it/s] 93%|█████████▎| 344589/371472 [5:15:08<2:05:56, 3.56it/s] 93%|█████████▎| 344590/371472 [5:15:08<2:03:04, 3.64it/s] 93%|█████████▎| 344591/371472 [5:15:08<2:00:53, 3.71it/s] 93%|█████████▎| 344592/371472 [5:15:09<2:01:29, 3.69it/s] 93%|█████████▎| 344593/371472 [5:15:09<2:10:26, 3.43it/s] 93%|█████████▎| 344594/371472 [5:15:09<2:11:45, 3.40it/s] 93%|█████████▎| 344595/371472 [5:15:10<2:08:09, 3.50it/s] 93%|█████████▎| 344596/371472 [5:15:10<2:11:31, 3.41it/s] 93%|█████████▎| 344597/371472 [5:15:10<2:07:08, 3.52it/s] 93%|█████████▎| 344598/371472 [5:15:10<2:02:47, 3.65it/s] 93%|█████████▎| 344599/371472 [5:15:11<1:58:24, 3.78it/s] 93%|█████████▎| 344600/371472 [5:15:11<2:08:56, 3.47it/s] {'loss': 2.6114, 'learning_rate': 1.6514038225344218e-07, 'epoch': 14.84} + 93%|█████████▎| 344600/371472 [5:15:11<2:08:56, 3.47it/s] 93%|█████████▎| 344601/371472 [5:15:11<2:05:22, 3.57it/s] 93%|█████████▎| 344602/371472 [5:15:12<2:05:14, 3.58it/s] 93%|█████████▎| 344603/371472 [5:15:12<2:05:27, 3.57it/s] 93%|█████████▎| 344604/371472 [5:15:12<2:03:10, 3.64it/s] 93%|█████████▎| 344605/371472 [5:15:12<2:03:20, 3.63it/s] 93%|█████████▎| 344606/371472 [5:15:13<2:04:39, 3.59it/s] 93%|█████████▎| 344607/371472 [5:15:13<2:05:25, 3.57it/s] 93%|█████████▎| 344608/371472 [5:15:13<2:00:53, 3.70it/s] 93%|█████████▎| 344609/371472 [5:15:13<2:02:22, 3.66it/s] 93%|█████████▎| 344610/371472 [5:15:14<1:59:11, 3.76it/s] 93%|█████████▎| 344611/371472 [5:15:14<2:01:40, 3.68it/s] 93%|█████████▎| 344612/371472 [5:15:14<2:04:03, 3.61it/s] 93%|█████████▎| 344613/371472 [5:15:15<2:01:04, 3.70it/s] 93%|█████████▎| 344614/371472 [5:15:15<2:10:48, 3.42it/s] 93%|█████████▎| 344615/371472 [5:15:15<2:06:48, 3.53it/s] 93%|█████████▎| 344616/371472 [5:15:15<2:01:55, 3.67it/s] 93%|█████████▎| 344617/371472 [5:15:16<1:57:29, 3.81it/s] 93%|█████████▎| 344618/371472 [5:15:16<2:04:30, 3.59it/s] 93%|█████████▎| 344619/371472 [5:15:16<2:07:41, 3.50it/s] 93%|█████████▎| 344620/371472 [5:15:17<2:05:24, 3.57it/s] {'loss': 2.734, 'learning_rate': 1.6509190027796335e-07, 'epoch': 14.84} + 93%|█████████▎| 344620/371472 [5:15:17<2:05:24, 3.57it/s] 93%|█████████▎| 344621/371472 [5:15:17<2:04:50, 3.58it/s] 93%|█████████▎| 344622/371472 [5:15:17<2:03:16, 3.63it/s] 93%|█████████▎| 344623/371472 [5:15:17<2:01:37, 3.68it/s] 93%|█████████▎| 344624/371472 [5:15:18<2:03:29, 3.62it/s] 93%|█████████▎| 344625/371472 [5:15:18<2:01:53, 3.67it/s] 93%|█████████▎| 344626/371472 [5:15:18<2:04:41, 3.59it/s] 93%|█████████▎| 344627/371472 [5:15:18<2:08:57, 3.47it/s] 93%|█████████▎| 344628/371472 [5:15:19<2:07:46, 3.50it/s] 93%|█████████▎| 344629/371472 [5:15:19<2:05:18, 3.57it/s] 93%|█████████▎| 344630/371472 [5:15:19<2:11:45, 3.40it/s] 93%|█████████▎| 344631/371472 [5:15:20<2:07:20, 3.51it/s] 93%|█████████▎| 344632/371472 [5:15:20<2:15:37, 3.30it/s] 93%|█████████▎| 344633/371472 [5:15:20<2:08:41, 3.48it/s] 93%|█████████▎| 344634/371472 [5:15:20<2:08:15, 3.49it/s] 93%|█████████▎| 344635/371472 [5:15:21<2:08:19, 3.49it/s] 93%|█████████▎| 344636/371472 [5:15:21<2:07:59, 3.49it/s] 93%|█████████▎| 344637/371472 [5:15:21<2:04:52, 3.58it/s] 93%|█████████▎| 344638/371472 [5:15:22<2:00:22, 3.72it/s] 93%|█████████▎| 344639/371472 [5:15:22<2:00:44, 3.70it/s] 93%|█████████▎| 344640/371472 [5:15:22<2:00:39, 3.71it/s] {'loss': 2.5489, 'learning_rate': 1.650434183024844e-07, 'epoch': 14.84} + 93%|█████████▎| 344640/371472 [5:15:22<2:00:39, 3.71it/s] 93%|█████████▎| 344641/371472 [5:15:22<2:05:07, 3.57it/s] 93%|█████████▎| 344642/371472 [5:15:23<2:06:40, 3.53it/s] 93%|█████████▎| 344643/371472 [5:15:23<2:03:03, 3.63it/s] 93%|█████████▎| 344644/371472 [5:15:23<2:04:05, 3.60it/s] 93%|█████████▎| 344645/371472 [5:15:24<2:06:13, 3.54it/s] 93%|█████████▎| 344646/371472 [5:15:24<2:04:12, 3.60it/s] 93%|█████████▎| 344647/371472 [5:15:24<2:04:47, 3.58it/s] 93%|█████████▎| 344648/371472 [5:15:24<2:00:12, 3.72it/s] 93%|█████████▎| 344649/371472 [5:15:25<2:02:11, 3.66it/s] 93%|█████████▎| 344650/371472 [5:15:25<2:03:31, 3.62it/s] 93%|█████████▎| 344651/371472 [5:15:25<2:09:52, 3.44it/s] 93%|█████████▎| 344652/371472 [5:15:26<2:18:01, 3.24it/s] 93%|█████████▎| 344653/371472 [5:15:26<2:13:05, 3.36it/s] 93%|█████████▎| 344654/371472 [5:15:26<2:12:30, 3.37it/s] 93%|█████████▎| 344655/371472 [5:15:26<2:06:58, 3.52it/s] 93%|█████████▎| 344656/371472 [5:15:27<2:14:14, 3.33it/s] 93%|█████████▎| 344657/371472 [5:15:27<2:15:24, 3.30it/s] 93%|█████████▎| 344658/371472 [5:15:27<2:19:16, 3.21it/s] 93%|█████████▎| 344659/371472 [5:15:28<2:16:07, 3.28it/s] 93%|█████████▎| 344660/371472 [5:15:28<2:10:44, 3.42it/s] {'loss': 2.6928, 'learning_rate': 1.6499493632700552e-07, 'epoch': 14.85} + 93%|█████████▎| 344660/371472 [5:15:28<2:10:44, 3.42it/s] 93%|█████████▎| 344661/371472 [5:15:28<2:12:37, 3.37it/s] 93%|█████████▎| 344662/371472 [5:15:29<2:17:04, 3.26it/s] 93%|█████████▎| 344663/371472 [5:15:29<2:14:36, 3.32it/s] 93%|█████████▎| 344664/371472 [5:15:29<2:14:26, 3.32it/s] 93%|█████████▎| 344665/371472 [5:15:29<2:06:55, 3.52it/s] 93%|█████████▎| 344666/371472 [5:15:30<2:02:21, 3.65it/s] 93%|█████████▎| 344667/371472 [5:15:30<2:08:23, 3.48it/s] 93%|█████████▎| 344668/371472 [5:15:30<2:10:23, 3.43it/s] 93%|█████████▎| 344669/371472 [5:15:31<2:06:01, 3.54it/s] 93%|█████████▎| 344670/371472 [5:15:31<2:04:57, 3.57it/s] 93%|█████████▎| 344671/371472 [5:15:31<2:05:01, 3.57it/s] 93%|█████████▎| 344672/371472 [5:15:31<2:02:16, 3.65it/s] 93%|█████████▎| 344673/371472 [5:15:32<2:02:46, 3.64it/s] 93%|█████████▎| 344674/371472 [5:15:32<2:01:19, 3.68it/s] 93%|█████████▎| 344675/371472 [5:15:32<2:03:37, 3.61it/s] 93%|█████████▎| 344676/371472 [5:15:33<2:13:31, 3.34it/s] 93%|█████████▎| 344677/371472 [5:15:33<2:06:51, 3.52it/s] 93%|█████████▎| 344678/371472 [5:15:33<2:08:51, 3.47it/s] 93%|█████████▎| 344679/371472 [5:15:33<2:08:04, 3.49it/s] 93%|█████████▎| 344680/371472 [5:15:34<2:11:29, 3.40it/s] {'loss': 2.5303, 'learning_rate': 1.6494645435152662e-07, 'epoch': 14.85} + 93%|█████████▎| 344680/371472 [5:15:34<2:11:29, 3.40it/s] 93%|█████████▎| 344681/371472 [5:15:34<2:18:49, 3.22it/s] 93%|█████████▎| 344682/371472 [5:15:34<2:17:56, 3.24it/s] 93%|█████████▎| 344683/371472 [5:15:35<2:13:45, 3.34it/s] 93%|█████████▎| 344684/371472 [5:15:35<2:13:21, 3.35it/s] 93%|█████████▎| 344685/371472 [5:15:35<2:06:47, 3.52it/s] 93%|█████████▎| 344686/371472 [5:15:35<2:02:37, 3.64it/s] 93%|█████████▎| 344687/371472 [5:15:36<1:57:56, 3.78it/s] 93%|█████████▎| 344688/371472 [5:15:36<1:58:01, 3.78it/s] 93%|█████████▎| 344689/371472 [5:15:36<2:02:56, 3.63it/s] 93%|█████████▎| 344690/371472 [5:15:36<1:59:53, 3.72it/s] 93%|█████████▎| 344691/371472 [5:15:37<2:00:09, 3.71it/s] 93%|█████████▎| 344692/371472 [5:15:37<2:08:36, 3.47it/s] 93%|█████████▎| 344693/371472 [5:15:37<2:08:34, 3.47it/s] 93%|█████████▎| 344694/371472 [5:15:38<2:07:11, 3.51it/s] 93%|█████████▎| 344695/371472 [5:15:38<2:04:02, 3.60it/s] 93%|█████████▎| 344696/371472 [5:15:38<2:07:04, 3.51it/s] 93%|█████████▎| 344697/371472 [5:15:39<2:14:26, 3.32it/s] 93%|█████████▎| 344698/371472 [5:15:39<2:24:41, 3.08it/s] 93%|█████████▎| 344699/371472 [5:15:39<2:14:05, 3.33it/s] 93%|█████████▎| 344700/371472 [5:15:39<2:14:15, 3.32it/s] {'loss': 2.6019, 'learning_rate': 1.6489797237604777e-07, 'epoch': 14.85} + 93%|█████████▎| 344700/371472 [5:15:39<2:14:15, 3.32it/s] 93%|█████████▎| 344701/371472 [5:15:40<2:09:26, 3.45it/s] 93%|█████████▎| 344702/371472 [5:15:40<2:05:45, 3.55it/s] 93%|█████████▎| 344703/371472 [5:15:40<2:01:52, 3.66it/s] 93%|█████████▎| 344704/371472 [5:15:41<2:16:17, 3.27it/s] 93%|█████████▎| 344705/371472 [5:15:41<2:14:02, 3.33it/s] 93%|█████████▎| 344706/371472 [5:15:41<2:04:59, 3.57it/s] 93%|█████████▎| 344707/371472 [5:15:41<2:07:56, 3.49it/s] 93%|█████████▎| 344708/371472 [5:15:42<2:11:10, 3.40it/s] 93%|█████████▎| 344709/371472 [5:15:42<2:06:19, 3.53it/s] 93%|█████████▎| 344710/371472 [5:15:42<2:09:12, 3.45it/s] 93%|█████████▎| 344711/371472 [5:15:43<2:06:37, 3.52it/s] 93%|█████████▎| 344712/371472 [5:15:43<2:09:42, 3.44it/s] 93%|█████████▎| 344713/371472 [5:15:43<2:04:44, 3.58it/s] 93%|█████████▎| 344714/371472 [5:15:43<2:03:10, 3.62it/s] 93%|█████████▎| 344715/371472 [5:15:44<2:00:10, 3.71it/s] 93%|█████████▎| 344716/371472 [5:15:44<2:04:12, 3.59it/s] 93%|█████████▎| 344717/371472 [5:15:44<2:05:40, 3.55it/s] 93%|█████████▎| 344718/371472 [5:15:44<2:00:22, 3.70it/s] 93%|█████████▎| 344719/371472 [5:15:45<2:06:35, 3.52it/s] 93%|█████████▎| 344720/371472 [5:15:45<2:02:32, 3.64it/s] {'loss': 2.6274, 'learning_rate': 1.6484949040056882e-07, 'epoch': 14.85} + 93%|█████████▎| 344720/371472 [5:15:45<2:02:32, 3.64it/s] 93%|█████████▎| 344721/371472 [5:15:45<2:02:04, 3.65it/s] 93%|█████████▎| 344722/371472 [5:15:46<2:00:02, 3.71it/s] 93%|█████████▎| 344723/371472 [5:15:46<2:01:18, 3.68it/s] 93%|█████████▎| 344724/371472 [5:15:46<2:02:07, 3.65it/s] 93%|█████████▎| 344725/371472 [5:15:46<1:58:05, 3.77it/s] 93%|█████████▎| 344726/371472 [5:15:47<2:03:00, 3.62it/s] 93%|█████████▎| 344727/371472 [5:15:47<2:04:03, 3.59it/s] 93%|█████████▎| 344728/371472 [5:15:47<2:09:47, 3.43it/s] 93%|█████████▎| 344729/371472 [5:15:48<2:06:13, 3.53it/s] 93%|█████████▎| 344730/371472 [5:15:48<2:00:55, 3.69it/s] 93%|█████████▎| 344731/371472 [5:15:48<1:57:20, 3.80it/s] 93%|█████████▎| 344732/371472 [5:15:48<1:56:24, 3.83it/s] 93%|█████████▎| 344733/371472 [5:15:49<1:57:15, 3.80it/s] 93%|█████████▎| 344734/371472 [5:15:49<2:17:33, 3.24it/s] 93%|█████████▎| 344735/371472 [5:15:49<2:12:27, 3.36it/s] 93%|█████████▎| 344736/371472 [5:15:50<2:14:09, 3.32it/s] 93%|█████████▎| 344737/371472 [5:15:50<2:09:18, 3.45it/s] 93%|█████████▎| 344738/371472 [5:15:50<2:04:33, 3.58it/s] 93%|█████████▎| 344739/371472 [5:15:50<2:08:18, 3.47it/s] 93%|█████████▎| 344740/371472 [5:15:51<2:07:49, 3.49it/s] {'loss': 2.6353, 'learning_rate': 1.6480100842509e-07, 'epoch': 14.85} + 93%|█████████▎| 344740/371472 [5:15:51<2:07:49, 3.49it/s] 93%|█████████▎| 344741/371472 [5:15:51<2:05:11, 3.56it/s] 93%|█████████▎| 344742/371472 [5:15:51<2:08:08, 3.48it/s] 93%|█████████▎| 344743/371472 [5:15:52<2:03:12, 3.62it/s] 93%|█████████▎| 344744/371472 [5:15:52<2:06:32, 3.52it/s] 93%|█████████▎| 344745/371472 [5:15:52<2:04:07, 3.59it/s] 93%|█████████▎| 344746/371472 [5:15:52<2:02:40, 3.63it/s] 93%|█████████▎| 344747/371472 [5:15:53<1:57:56, 3.78it/s] 93%|█████████▎| 344748/371472 [5:15:53<1:57:24, 3.79it/s] 93%|█████████▎| 344749/371472 [5:15:53<1:56:56, 3.81it/s] 93%|█████████▎| 344750/371472 [5:15:53<2:01:46, 3.66it/s] 93%|█████████▎| 344751/371472 [5:15:54<2:05:45, 3.54it/s] 93%|█████████▎| 344752/371472 [5:15:54<2:04:37, 3.57it/s] 93%|█████████▎| 344753/371472 [5:15:54<2:01:27, 3.67it/s] 93%|█████████▎| 344754/371472 [5:15:54<1:58:53, 3.75it/s] 93%|█████████▎| 344755/371472 [5:15:55<2:02:27, 3.64it/s] 93%|█████████▎| 344756/371472 [5:15:55<2:08:50, 3.46it/s] 93%|█████████▎| 344757/371472 [5:15:55<2:03:14, 3.61it/s] 93%|█████████▎| 344758/371472 [5:15:56<2:08:42, 3.46it/s] 93%|█████████▎| 344759/371472 [5:15:56<2:02:45, 3.63it/s] 93%|█████��███▎| 344760/371472 [5:15:56<2:02:43, 3.63it/s] {'loss': 2.7827, 'learning_rate': 1.6475252644961104e-07, 'epoch': 14.85} + 93%|█████████▎| 344760/371472 [5:15:56<2:02:43, 3.63it/s] 93%|█████████▎| 344761/371472 [5:15:56<2:00:21, 3.70it/s] 93%|█████████▎| 344762/371472 [5:15:57<1:56:20, 3.83it/s] 93%|█████████▎| 344763/371472 [5:15:57<1:53:54, 3.91it/s] 93%|█████████▎| 344764/371472 [5:15:57<2:03:44, 3.60it/s] 93%|█████████▎| 344765/371472 [5:15:58<2:06:33, 3.52it/s] 93%|█████████▎| 344766/371472 [5:15:58<2:02:42, 3.63it/s] 93%|█████████▎| 344767/371472 [5:15:58<2:06:02, 3.53it/s] 93%|█████████▎| 344768/371472 [5:15:58<2:03:54, 3.59it/s] 93%|█████████▎| 344769/371472 [5:15:59<2:04:41, 3.57it/s] 93%|█████████▎| 344770/371472 [5:15:59<2:01:49, 3.65it/s] 93%|█████████▎| 344771/371472 [5:15:59<2:01:41, 3.66it/s] 93%|█████████▎| 344772/371472 [5:15:59<2:00:46, 3.68it/s] 93%|█████████▎| 344773/371472 [5:16:00<2:02:36, 3.63it/s] 93%|█████████▎| 344774/371472 [5:16:00<2:04:24, 3.58it/s] 93%|█████████▎| 344775/371472 [5:16:00<2:01:58, 3.65it/s] 93%|█████████▎| 344776/371472 [5:16:01<2:06:23, 3.52it/s] 93%|█████████▎| 344777/371472 [5:16:01<2:07:10, 3.50it/s] 93%|█████████▎| 344778/371472 [5:16:01<2:01:22, 3.67it/s] 93%|█████████▎| 344779/371472 [5:16:01<2:01:32, 3.66it/s] 93%|█████████▎| 344780/371472 [5:16:02<2:02:34, 3.63it/s] {'loss': 2.6469, 'learning_rate': 1.647040444741322e-07, 'epoch': 14.85} + 93%|█████████▎| 344780/371472 [5:16:02<2:02:34, 3.63it/s] 93%|█████████▎| 344781/371472 [5:16:02<1:59:26, 3.72it/s] 93%|█████████▎| 344782/371472 [5:16:02<2:02:27, 3.63it/s] 93%|█████████▎| 344783/371472 [5:16:03<2:02:05, 3.64it/s] 93%|█████████▎| 344784/371472 [5:16:03<2:03:54, 3.59it/s] 93%|█████████▎| 344785/371472 [5:16:03<2:03:51, 3.59it/s] 93%|█████████▎| 344786/371472 [5:16:03<1:59:49, 3.71it/s] 93%|█████████▎| 344787/371472 [5:16:04<1:58:25, 3.76it/s] 93%|█████████▎| 344788/371472 [5:16:04<1:58:10, 3.76it/s] 93%|█████████▎| 344789/371472 [5:16:04<2:23:32, 3.10it/s] 93%|█████████▎| 344790/371472 [5:16:05<2:16:01, 3.27it/s] 93%|█████████▎| 344791/371472 [5:16:05<2:10:48, 3.40it/s] 93%|█████████▎| 344792/371472 [5:16:05<2:07:34, 3.49it/s] 93%|█████████▎| 344793/371472 [5:16:05<2:07:57, 3.47it/s] 93%|█████████▎| 344794/371472 [5:16:06<2:07:51, 3.48it/s] 93%|█████████▎| 344795/371472 [5:16:06<2:10:36, 3.40it/s] 93%|█████████▎| 344796/371472 [5:16:06<2:12:22, 3.36it/s] 93%|█████████▎| 344797/371472 [5:16:07<2:15:50, 3.27it/s] 93%|█████████▎| 344798/371472 [5:16:07<2:13:04, 3.34it/s] 93%|█████████▎| 344799/371472 [5:16:07<2:08:08, 3.47it/s] 93%|█████████▎| 344800/371472 [5:16:07<2:01:57, 3.65it/s] {'loss': 2.5557, 'learning_rate': 1.6465556249865326e-07, 'epoch': 14.85} + 93%|█████████▎| 344800/371472 [5:16:07<2:01:57, 3.65it/s] 93%|█████████▎| 344801/371472 [5:16:08<1:59:39, 3.72it/s] 93%|█████████▎| 344802/371472 [5:16:08<1:58:06, 3.76it/s] 93%|█████████▎| 344803/371472 [5:16:08<1:57:51, 3.77it/s] 93%|█████████▎| 344804/371472 [5:16:09<2:06:25, 3.52it/s] 93%|█████████▎| 344805/371472 [5:16:09<2:02:42, 3.62it/s] 93%|█████████▎| 344806/371472 [5:16:09<2:00:19, 3.69it/s] 93%|█████████▎| 344807/371472 [5:16:09<1:59:15, 3.73it/s] 93%|█████████▎| 344808/371472 [5:16:10<1:56:48, 3.80it/s] 93%|█████████▎| 344809/371472 [5:16:10<1:58:59, 3.73it/s] 93%|█████████▎| 344810/371472 [5:16:10<2:00:33, 3.69it/s] 93%|█████████▎| 344811/371472 [5:16:10<1:59:57, 3.70it/s] 93%|█████████▎| 344812/371472 [5:16:11<1:56:55, 3.80it/s] 93%|█████████▎| 344813/371472 [5:16:11<1:56:45, 3.81it/s] 93%|█████████▎| 344814/371472 [5:16:11<2:08:10, 3.47it/s] 93%|█████████▎| 344815/371472 [5:16:12<2:11:48, 3.37it/s] 93%|███���█████▎| 344816/371472 [5:16:12<2:07:25, 3.49it/s] 93%|█████████▎| 344817/371472 [5:16:12<2:03:06, 3.61it/s] 93%|█████████▎| 344818/371472 [5:16:12<2:03:39, 3.59it/s] 93%|█████████▎| 344819/371472 [5:16:13<2:03:24, 3.60it/s] 93%|█████████▎| 344820/371472 [5:16:13<2:02:23, 3.63it/s] {'loss': 2.7461, 'learning_rate': 1.646070805231744e-07, 'epoch': 14.85} + 93%|█████████▎| 344820/371472 [5:16:13<2:02:23, 3.63it/s] 93%|█████████▎| 344821/371472 [5:16:13<2:01:42, 3.65it/s] 93%|█████████▎| 344822/371472 [5:16:13<2:01:07, 3.67it/s] 93%|█████████▎| 344823/371472 [5:16:14<2:03:39, 3.59it/s] 93%|█████████▎| 344824/371472 [5:16:14<2:03:40, 3.59it/s] 93%|█████████▎| 344825/371472 [5:16:14<2:04:58, 3.55it/s] 93%|█████████▎| 344826/371472 [5:16:15<2:06:50, 3.50it/s] 93%|█████████▎| 344827/371472 [5:16:15<2:03:19, 3.60it/s] 93%|█████████▎| 344828/371472 [5:16:15<2:05:30, 3.54it/s] 93%|█████████▎| 344829/371472 [5:16:15<2:10:11, 3.41it/s] 93%|█████████▎| 344830/371472 [5:16:16<2:13:12, 3.33it/s] 93%|█████████▎| 344831/371472 [5:16:16<2:08:34, 3.45it/s] 93%|█████████▎| 344832/371472 [5:16:16<2:07:10, 3.49it/s] 93%|█████████▎| 344833/371472 [5:16:17<2:06:08, 3.52it/s] 93%|█████████▎| 344834/371472 [5:16:17<2:06:44, 3.50it/s] 93%|█████████▎| 344835/371472 [5:16:17<2:07:55, 3.47it/s] 93%|█████████▎| 344836/371472 [5:16:17<2:08:12, 3.46it/s] 93%|█████████▎| 344837/371472 [5:16:18<2:03:27, 3.60it/s] 93%|█████████▎| 344838/371472 [5:16:18<2:02:48, 3.61it/s] 93%|█████████▎| 344839/371472 [5:16:18<2:08:54, 3.44it/s] 93%|█████████▎| 344840/371472 [5:16:19<2:03:22, 3.60it/s] {'loss': 2.5251, 'learning_rate': 1.6455859854769546e-07, 'epoch': 14.85} + 93%|█████████▎| 344840/371472 [5:16:19<2:03:22, 3.60it/s] 93%|█████████▎| 344841/371472 [5:16:19<2:00:52, 3.67it/s] 93%|█████████▎| 344842/371472 [5:16:19<1:58:14, 3.75it/s] 93%|█████████▎| 344843/371472 [5:16:20<2:21:30, 3.14it/s] 93%|█████████▎| 344844/371472 [5:16:20<2:14:43, 3.29it/s] 93%|█████████▎| 344845/371472 [5:16:20<2:12:35, 3.35it/s] 93%|█████████▎| 344846/371472 [5:16:20<2:07:05, 3.49it/s] 93%|█████████▎| 344847/371472 [5:16:21<2:04:32, 3.56it/s] 93%|█████████▎| 344848/371472 [5:16:21<2:31:27, 2.93it/s] 93%|█████████▎| 344849/371472 [5:16:21<2:20:50, 3.15it/s] 93%|█████████▎| 344850/371472 [5:16:22<2:10:26, 3.40it/s] 93%|█████████▎| 344851/371472 [5:16:22<2:25:13, 3.06it/s] 93%|█████████▎| 344852/371472 [5:16:22<2:18:26, 3.20it/s] 93%|█████████▎| 344853/371472 [5:16:23<2:14:01, 3.31it/s] 93%|█████████▎| 344854/371472 [5:16:23<2:12:07, 3.36it/s] 93%|█████████▎| 344855/371472 [5:16:23<2:08:03, 3.46it/s] 93%|█████████▎| 344856/371472 [5:16:23<2:04:40, 3.56it/s] 93%|█████████▎| 344857/371472 [5:16:24<2:10:10, 3.41it/s] 93%|█████████▎| 344858/371472 [5:16:24<2:08:19, 3.46it/s] 93%|█████████▎| 344859/371472 [5:16:24<2:07:27, 3.48it/s] 93%|█████████▎| 344860/371472 [5:16:25<2:06:25, 3.51it/s] {'loss': 2.6465, 'learning_rate': 1.6451011657221663e-07, 'epoch': 14.85} + 93%|█████████▎| 344860/371472 [5:16:25<2:06:25, 3.51it/s] 93%|█████████▎| 344861/371472 [5:16:25<2:04:22, 3.57it/s] 93%|█████████▎| 344862/371472 [5:16:25<2:03:50, 3.58it/s] 93%|█████████▎| 344863/371472 [5:16:25<2:01:16, 3.66it/s] 93%|█████████▎| 344864/371472 [5:16:26<1:59:43, 3.70it/s] 93%|█████████▎| 344865/371472 [5:16:26<2:00:15, 3.69it/s] 93%|█████████▎| 344866/371472 [5:16:26<2:00:36, 3.68it/s] 93%|█████████▎| 344867/371472 [5:16:26<2:00:53, 3.67it/s] 93%|█████████▎| 344868/371472 [5:16:27<2:06:40, 3.50it/s] 93%|█████████▎| 344869/371472 [5:16:27<2:03:58, 3.58it/s] 93%|█████████▎| 344870/371472 [5:16:27<2:10:04, 3.41it/s] 93%|█████████▎| 344871/371472 [5:16:28<2:04:23, 3.56it/s] 93%|█████████▎| 344872/371472 [5:16:28<2:03:30, 3.59it/s] 93%|█████████▎| 344873/371472 [5:16:28<2:02:44, 3.61it/s] 93%|█████████▎| 344874/371472 [5:16:28<1:59:15, 3.72it/s] 93%|█████████▎| 344875/371472 [5:16:29<1:59:37, 3.71it/s] 93%|█████████▎| 344876/371472 [5:16:29<1:59:55, 3.70it/s] 93%|█████████▎| 344877/371472 [5:16:29<1:57:03, 3.79it/s] 93%|█████████▎| 344878/371472 [5:16:29<2:01:43, 3.64it/s] 93%|█████████▎| 344879/371472 [5:16:30<2:00:49, 3.67it/s] 93%|█████████▎| 344880/371472 [5:16:30<2:00:30, 3.68it/s] {'loss': 2.7498, 'learning_rate': 1.644616345967377e-07, 'epoch': 14.85} + 93%|█████████▎| 344880/371472 [5:16:30<2:00:30, 3.68it/s] 93%|█████████▎| 344881/371472 [5:16:30<2:07:39, 3.47it/s] 93%|█████████▎| 344882/371472 [5:16:31<2:03:17, 3.59it/s] 93%|█████████▎| 344883/371472 [5:16:31<2:05:39, 3.53it/s] 93%|█████████▎| 344884/371472 [5:16:31<2:03:06, 3.60it/s] 93%|█████████▎| 344885/371472 [5:16:32<2:16:31, 3.25it/s] 93%|█████████▎| 344886/371472 [5:16:32<2:07:47, 3.47it/s] 93%|█████████▎| 344887/371472 [5:16:32<2:16:42, 3.24it/s] 93%|█████████▎| 344888/371472 [5:16:32<2:09:49, 3.41it/s] 93%|█████████▎| 344889/371472 [5:16:33<2:03:14, 3.60it/s] 93%|█████████▎| 344890/371472 [5:16:33<2:07:11, 3.48it/s] 93%|█████████▎| 344891/371472 [5:16:33<2:07:39, 3.47it/s] 93%|█████████▎| 344892/371472 [5:16:34<2:07:03, 3.49it/s] 93%|█████████▎| 344893/371472 [5:16:34<2:09:54, 3.41it/s] 93%|█████████▎| 344894/371472 [5:16:34<2:10:14, 3.40it/s] 93%|█████████▎| 344895/371472 [5:16:34<2:08:56, 3.44it/s] 93%|█████████▎| 344896/371472 [5:16:35<2:08:45, 3.44it/s] 93%|█████████▎| 344897/371472 [5:16:35<2:14:57, 3.28it/s] 93%|█████████▎| 344898/371472 [5:16:35<2:09:08, 3.43it/s] 93%|█████████▎| 344899/371472 [5:16:36<2:02:47, 3.61it/s] 93%|█████████▎| 344900/371472 [5:16:36<2:06:29, 3.50it/s] {'loss': 2.6287, 'learning_rate': 1.6441315262125883e-07, 'epoch': 14.86} + 93%|█████████▎| 344900/371472 [5:16:36<2:06:29, 3.50it/s] 93%|█████████▎| 344901/371472 [5:16:36<2:12:17, 3.35it/s] 93%|█████████▎| 344902/371472 [5:16:36<2:11:38, 3.36it/s] 93%|█████████▎| 344903/371472 [5:16:37<2:13:39, 3.31it/s] 93%|█████████▎| 344904/371472 [5:16:37<2:09:17, 3.42it/s] 93%|█████████▎| 344905/371472 [5:16:37<2:12:44, 3.34it/s] 93%|█████████▎| 344906/371472 [5:16:38<2:08:53, 3.44it/s] 93%|█████████▎| 344907/371472 [5:16:38<2:08:30, 3.45it/s] 93%|█████████▎| 344908/371472 [5:16:38<2:06:42, 3.49it/s] 93%|█████████▎| 344909/371472 [5:16:38<2:09:38, 3.41it/s] 93%|█████████▎| 344910/371472 [5:16:39<2:09:30, 3.42it/s] 93%|█████████▎| 344911/371472 [5:16:39<2:01:47, 3.63it/s] 93%|█████████▎| 344912/371472 [5:16:39<2:04:16, 3.56it/s] 93%|█████████▎| 344913/371472 [5:16:40<2:03:00, 3.60it/s] 93%|█████████▎| 344914/371472 [5:16:40<2:04:13, 3.56it/s] 93%|█████████▎| 344915/371472 [5:16:40<1:59:51, 3.69it/s] 93%|█████████▎| 344916/371472 [5:16:40<1:58:49, 3.72it/s] 93%|█████████▎| 344917/371472 [5:16:41<1:58:21, 3.74it/s] 93%|█████████▎| 344918/371472 [5:16:41<2:07:01, 3.48it/s] 93%|█████████▎| 344919/371472 [5:16:41<2:04:16, 3.56it/s] 93%|█████████▎| 344920/371472 [5:16:42<2:01:41, 3.64it/s] {'loss': 2.6287, 'learning_rate': 1.643646706457799e-07, 'epoch': 14.86} + 93%|█████████▎| 344920/371472 [5:16:42<2:01:41, 3.64it/s] 93%|█████████▎| 344921/371472 [5:16:42<2:00:00, 3.69it/s] 93%|█████████▎| 344922/371472 [5:16:42<2:12:54, 3.33it/s] 93%|█████████▎| 344923/371472 [5:16:42<2:12:57, 3.33it/s] 93%|█████████▎| 344924/371472 [5:16:43<2:12:13, 3.35it/s] 93%|█████████▎| 344925/371472 [5:16:43<2:07:02, 3.48it/s] 93%|█████████▎| 344926/371472 [5:16:43<2:13:15, 3.32it/s] 93%|█████████▎| 344927/371472 [5:16:44<2:07:50, 3.46it/s] 93%|█████████▎| 344928/371472 [5:16:44<2:09:32, 3.41it/s] 93%|█████████▎| 344929/371472 [5:16:44<2:09:00, 3.43it/s] 93%|█████████▎| 344930/371472 [5:16:45<2:15:30, 3.26it/s] 93%|█████████▎| 344931/371472 [5:16:45<2:09:43, 3.41it/s] 93%|█████████▎| 344932/371472 [5:16:45<2:10:57, 3.38it/s] 93%|█████████▎| 344933/371472 [5:16:45<2:19:10, 3.18it/s] 93%|█████████▎| 344934/371472 [5:16:46<2:27:16, 3.00it/s] 93%|█████████▎| 344935/371472 [5:16:46<2:18:29, 3.19it/s] 93%|█████████▎| 344936/371472 [5:16:46<2:19:40, 3.17it/s] 93%|█████████▎| 344937/371472 [5:16:47<2:19:47, 3.16it/s] 93%|█████████▎| 344938/371472 [5:16:47<2:16:11, 3.25it/s] 93%|█████████▎| 344939/371472 [5:16:47<2:09:52, 3.40it/s] 93%|█████████▎| 344940/371472 [5:16:48<2:10:25, 3.39it/s] {'loss': 2.5876, 'learning_rate': 1.6431618867030108e-07, 'epoch': 14.86} + 93%|█████████▎| 344940/371472 [5:16:48<2:10:25, 3.39it/s] 93%|█████████▎| 344941/371472 [5:16:48<2:09:33, 3.41it/s] 93%|█████████▎| 344942/371472 [5:16:48<2:03:57, 3.57it/s] 93%|█████████▎| 344943/371472 [5:16:49<2:18:06, 3.20it/s] 93%|█████████▎| 344944/371472 [5:16:49<2:13:16, 3.32it/s] 93%|█████████▎| 344945/371472 [5:16:49<2:12:28, 3.34it/s] 93%|█████████▎| 344946/371472 [5:16:49<2:10:59, 3.38it/s] 93%|█████████▎| 344947/371472 [5:16:50<2:10:32, 3.39it/s] 93%|█████████▎| 344948/371472 [5:16:50<2:13:31, 3.31it/s] 93%|█████████▎| 344949/371472 [5:16:50<2:11:43, 3.36it/s] 93%|█████████▎| 344950/371472 [5:16:51<2:08:22, 3.44it/s] 93%|█████████▎| 344951/371472 [5:16:51<2:16:29, 3.24it/s] 93%|█████████▎| 344952/371472 [5:16:51<2:12:28, 3.34it/s] 93%|█████████▎| 344953/371472 [5:16:51<2:11:19, 3.37it/s] 93%|█████████▎| 344954/371472 [5:16:52<2:15:27, 3.26it/s] 93%|█████████▎| 344955/371472 [5:16:52<2:15:09, 3.27it/s] 93%|█████████▎| 344956/371472 [5:16:52<2:08:15, 3.45it/s] 93%|█████████▎| 344957/371472 [5:16:53<2:02:09, 3.62it/s] 93%|█████████▎| 344958/371472 [5:16:53<1:58:41, 3.72it/s] 93%|█████████▎| 344959/371472 [5:16:53<2:01:03, 3.65it/s] 93%|█████████▎| 344960/371472 [5:16:53<2:08:19, 3.44it/s] {'loss': 2.5708, 'learning_rate': 1.642677066948221e-07, 'epoch': 14.86} + 93%|█████████▎| 344960/371472 [5:16:53<2:08:19, 3.44it/s] 93%|█████████▎| 344961/371472 [5:16:54<2:14:39, 3.28it/s] 93%|█████████▎| 344962/371472 [5:16:54<2:06:36, 3.49it/s] 93%|█████████▎| 344963/371472 [5:16:54<2:03:33, 3.58it/s] 93%|█████████▎| 344964/371472 [5:16:55<2:03:24, 3.58it/s] 93%|█████████▎| 344965/371472 [5:16:55<2:01:57, 3.62it/s] 93%|█████████▎| 344966/371472 [5:16:55<1:59:50, 3.69it/s] 93%|█████████▎| 344967/371472 [5:16:55<1:57:28, 3.76it/s] 93%|█████████▎| 344968/371472 [5:16:56<2:09:52, 3.40it/s] 93%|█████████▎| 344969/371472 [5:16:56<2:09:29, 3.41it/s] 93%|█████████▎| 344970/371472 [5:16:56<2:10:00, 3.40it/s] 93%|█████████▎| 344971/371472 [5:16:57<2:04:53, 3.54it/s] 93%|█████████▎| 344972/371472 [5:16:57<2:06:16, 3.50it/s] 93%|█████████▎| 344973/371472 [5:16:57<2:07:30, 3.46it/s] 93%|█████████▎| 344974/371472 [5:16:57<2:09:29, 3.41it/s] 93%|█████████▎| 344975/371472 [5:16:58<2:13:13, 3.31it/s] 93%|█████████▎| 344976/371472 [5:16:58<2:08:09, 3.45it/s] 93%|█████████▎| 344977/371472 [5:16:58<2:01:55, 3.62it/s] 93%|█████████▎| 344978/371472 [5:16:59<2:01:11, 3.64it/s] 93%|█████████▎| 344979/371472 [5:16:59<2:01:48, 3.62it/s] 93%|█████████▎| 344980/371472 [5:16:59<2:05:41, 3.51it/s] {'loss': 2.6257, 'learning_rate': 1.6421922471934317e-07, 'epoch': 14.86} + 93%|█████████▎| 344980/371472 [5:16:59<2:05:41, 3.51it/s] 93%|█████████▎| 344981/371472 [5:16:59<2:00:19, 3.67it/s] 93%|█████████▎| 344982/371472 [5:17:00<1:57:55, 3.74it/s] 93%|█████████▎| 344983/371472 [5:17:00<1:55:33, 3.82it/s] 93%|█████████▎| 344984/371472 [5:17:00<1:55:14, 3.83it/s] 93%|█████████▎| 344985/371472 [5:17:00<2:01:49, 3.62it/s] 93%|█████████▎| 344986/371472 [5:17:01<1:59:08, 3.71it/s] 93%|█████████▎| 344987/371472 [5:17:01<2:00:47, 3.65it/s] 93%|█████████▎| 344988/371472 [5:17:01<2:04:04, 3.56it/s] 93%|█████████▎| 344989/371472 [5:17:02<2:04:02, 3.56it/s] 93%|█████████▎| 344990/371472 [5:17:02<2:04:27, 3.55it/s] 93%|█████████▎| 344991/371472 [5:17:02<2:02:02, 3.62it/s] 93%|█████████▎| 344992/371472 [5:17:02<2:00:39, 3.66it/s] 93%|█████████▎| 344993/371472 [5:17:03<2:06:34, 3.49it/s] 93%|█████████▎| 344994/371472 [5:17:03<2:05:24, 3.52it/s] 93%|█████████▎| 344995/371472 [5:17:03<2:00:33, 3.66it/s] 93%|█████████▎| 344996/371472 [5:17:03<1:58:16, 3.73it/s] 93%|█████████▎| 344997/371472 [5:17:04<1:58:17, 3.73it/s] 93%|█████████▎| 344998/371472 [5:17:04<2:00:15, 3.67it/s] 93%|█████████▎| 344999/371472 [5:17:04<1:58:00, 3.74it/s] 93%|█████████▎| 345000/371472 [5:17:05<2:05:11, 3.52it/s] {'loss': 2.6275, 'learning_rate': 1.6417074274386437e-07, 'epoch': 14.86} + 93%|█████████▎| 345000/371472 [5:17:05<2:05:11, 3.52it/s] 93%|█████████▎| 345001/371472 [5:17:05<2:03:09, 3.58it/s] 93%|█████████▎| 345002/371472 [5:17:05<2:00:50, 3.65it/s] 93%|█████████▎| 345003/371472 [5:17:05<2:05:06, 3.53it/s] 93%|█████████▎| 345004/371472 [5:17:06<2:07:23, 3.46it/s] 93%|█████████▎| 345005/371472 [5:17:06<2:09:20, 3.41it/s] 93%|█████████▎| 345006/371472 [5:17:06<2:07:37, 3.46it/s] 93%|█████████▎| 345007/371472 [5:17:07<2:04:31, 3.54it/s] 93%|█████████▎| 345008/371472 [5:17:07<2:01:27, 3.63it/s] 93%|█████████▎| 345009/371472 [5:17:07<2:20:19, 3.14it/s] 93%|█████████▎| 345010/371472 [5:17:08<2:13:26, 3.30it/s] 93%|█████████▎| 345011/371472 [5:17:08<2:07:57, 3.45it/s] 93%|█████████▎| 345012/371472 [5:17:08<2:18:31, 3.18it/s] 93%|█████████▎| 345013/371472 [5:17:08<2:19:36, 3.16it/s] 93%|█████████▎| 345014/371472 [5:17:09<2:12:29, 3.33it/s] 93%|█████████▎| 345015/371472 [5:17:09<2:24:27, 3.05it/s] 93%|█████████▎| 345016/371472 [5:17:09<2:14:59, 3.27it/s] 93%|█████████▎| 345017/371472 [5:17:10<2:12:50, 3.32it/s] 93%|█████████▎| 345018/371472 [5:17:10<2:14:43, 3.27it/s] 93%|█████████▎| 345019/371472 [5:17:10<2:09:14, 3.41it/s] 93%|█████████▎| 345020/371472 [5:17:11<2:10:04, 3.39it/s] {'loss': 2.6077, 'learning_rate': 1.641222607683854e-07, 'epoch': 14.86} + 93%|█████████▎| 345020/371472 [5:17:11<2:10:04, 3.39it/s] 93%|█████████▎| 345021/371472 [5:17:11<2:18:54, 3.17it/s] 93%|█████████▎| 345022/371472 [5:17:11<2:12:12, 3.33it/s] 93%|█████████▎| 345023/371472 [5:17:11<2:09:42, 3.40it/s] 93%|█████████▎| 345024/371472 [5:17:12<2:04:27, 3.54it/s] 93%|█████████▎| 345025/371472 [5:17:12<1:58:33, 3.72it/s] 93%|█████████▎| 345026/371472 [5:17:12<1:56:18, 3.79it/s] 93%|█████████▎| 345027/371472 [5:17:13<1:58:39, 3.71it/s] 93%|█████████▎| 345028/371472 [5:17:13<1:57:35, 3.75it/s] 93%|█████████▎| 345029/371472 [5:17:13<1:58:56, 3.71it/s] 93%|█████████▎| 345030/371472 [5:17:13<2:03:54, 3.56it/s] 93%|█████████▎| 345031/371472 [5:17:14<1:59:49, 3.68it/s] 93%|█████████▎| 345032/371472 [5:17:14<2:07:07, 3.47it/s] 93%|█████████▎| 345033/371472 [5:17:14<2:13:12, 3.31it/s] 93%|█████████▎| 345034/371472 [5:17:15<2:06:54, 3.47it/s] 93%|█████████▎| 345035/371472 [5:17:15<2:01:19, 3.63it/s] 93%|█████████▎| 345036/371472 [5:17:15<1:59:25, 3.69it/s] 93%|█████████▎| 345037/371472 [5:17:15<1:59:12, 3.70it/s] 93%|█████████▎| 345038/371472 [5:17:16<1:59:21, 3.69it/s] 93%|█████████▎| 345039/371472 [5:17:16<2:03:04, 3.58it/s] 93%|█████████▎| 345040/371472 [5:17:16<2:04:58, 3.53it/s] {'loss': 2.4898, 'learning_rate': 1.6407377879290654e-07, 'epoch': 14.86} + 93%|█████████▎| 345040/371472 [5:17:16<2:04:58, 3.53it/s] 93%|█████████▎| 345041/371472 [5:17:16<2:02:34, 3.59it/s] 93%|█████████▎| 345042/371472 [5:17:17<2:01:57, 3.61it/s] 93%|█████████▎| 345043/371472 [5:17:17<2:02:56, 3.58it/s] 93%|█████████▎| 345044/371472 [5:17:17<2:00:42, 3.65it/s] 93%|█████████▎| 345045/371472 [5:17:18<2:00:54, 3.64it/s] 93%|█████████▎| 345046/371472 [5:17:18<1:58:24, 3.72it/s] 93%|█████████▎| 345047/371472 [5:17:18<1:58:07, 3.73it/s] 93%|█████████▎| 345048/371472 [5:17:18<1:59:52, 3.67it/s] 93%|█████████▎| 345049/371472 [5:17:19<2:03:33, 3.56it/s] 93%|█████████▎| 345050/371472 [5:17:19<2:00:59, 3.64it/s] 93%|█████████▎| 345051/371472 [5:17:19<2:05:13, 3.52it/s] 93%|█████████▎| 345052/371472 [5:17:19<2:06:36, 3.48it/s] 93%|█████████▎| 345053/371472 [5:17:20<2:07:48, 3.45it/s] 93%|█████████▎| 345054/371472 [5:17:20<2:10:13, 3.38it/s] 93%|█████████▎| 345055/371472 [5:17:20<2:09:41, 3.40it/s] 93%|█████████▎| 345056/371472 [5:17:21<2:08:13, 3.43it/s] 93%|█████████▎| 345057/371472 [5:17:21<2:04:32, 3.54it/s] 93%|█████████▎| 345058/371472 [5:17:21<2:07:11, 3.46it/s] 93%|█████████▎| 345059/371472 [5:17:21<2:02:27, 3.59it/s] 93%|█████████▎| 345060/371472 [5:17:22<2:04:19, 3.54it/s] {'loss': 2.525, 'learning_rate': 1.6402529681742761e-07, 'epoch': 14.86} + 93%|█████████▎| 345060/371472 [5:17:22<2:04:19, 3.54it/s] 93%|█████████▎| 345061/371472 [5:17:22<2:03:36, 3.56it/s] 93%|█████████▎| 345062/371472 [5:17:22<2:05:04, 3.52it/s] 93%|█████████▎| 345063/371472 [5:17:23<2:01:48, 3.61it/s] 93%|█████████▎| 345064/371472 [5:17:23<2:03:20, 3.57it/s] 93%|█████████▎| 345065/371472 [5:17:23<2:07:38, 3.45it/s] 93%|█████████▎| 345066/371472 [5:17:24<2:21:23, 3.11it/s] 93%|█████████▎| 345067/371472 [5:17:24<2:14:11, 3.28it/s] 93%|█████████▎| 345068/371472 [5:17:24<2:09:00, 3.41it/s] 93%|█████████▎| 345069/371472 [5:17:24<2:07:46, 3.44it/s] 93%|█████████▎| 345070/371472 [5:17:25<2:07:53, 3.44it/s] 93%|█████████▎| 345071/371472 [5:17:25<2:02:27, 3.59it/s] 93%|█████████▎| 345072/371472 [5:17:25<1:58:46, 3.70it/s] 93%|█████████▎| 345073/371472 [5:17:26<2:15:06, 3.26it/s] 93%|█████████▎| 345074/371472 [5:17:26<2:16:09, 3.23it/s] 93%|█████████▎| 345075/371472 [5:17:26<2:15:55, 3.24it/s] 93%|█████████▎| 345076/371472 [5:17:27<2:11:50, 3.34it/s] 93%|█████████▎| 345077/371472 [5:17:27<2:08:48, 3.42it/s] 93%|█████████▎| 345078/371472 [5:17:27<2:07:10, 3.46it/s] 93%|█████████▎| 345079/371472 [5:17:27<2:06:24, 3.48it/s] 93%|█████████▎| 345080/371472 [5:17:28<2:07:35, 3.45it/s] {'loss': 2.6247, 'learning_rate': 1.6397681484194876e-07, 'epoch': 14.86} + 93%|█████████▎| 345080/371472 [5:17:28<2:07:35, 3.45it/s] 93%|█████████▎| 345081/371472 [5:17:28<2:04:56, 3.52it/s] 93%|█████████▎| 345082/371472 [5:17:28<2:08:51, 3.41it/s] 93%|█████████▎| 345083/371472 [5:17:29<2:09:13, 3.40it/s] 93%|█████████▎| 345084/371472 [5:17:29<2:07:07, 3.46it/s] 93%|█████████▎| 345085/371472 [5:17:29<2:05:42, 3.50it/s] 93%|█████████▎| 345086/371472 [5:17:29<2:06:26, 3.48it/s] 93%|█████████▎| 345087/371472 [5:17:30<2:08:16, 3.43it/s] 93%|█████████▎| 345088/371472 [5:17:30<2:05:35, 3.50it/s] 93%|█████████▎| 345089/371472 [5:17:30<2:03:36, 3.56it/s] 93%|█████████▎| 345090/371472 [5:17:30<2:00:04, 3.66it/s] 93%|█████████▎| 345091/371472 [5:17:31<1:58:54, 3.70it/s] 93%|█████████▎| 345092/371472 [5:17:31<1:58:30, 3.71it/s] 93%|█████████▎| 345093/371472 [5:17:31<1:56:32, 3.77it/s] 93%|█████████▎| 345094/371472 [5:17:32<1:59:44, 3.67it/s] 93%|█████████▎| 345095/371472 [5:17:32<1:59:07, 3.69it/s] 93%|█████████▎| 345096/371472 [5:17:32<2:04:26, 3.53it/s] 93%|█████████▎| 345097/371472 [5:17:32<2:03:36, 3.56it/s] 93%|█████████▎| 345098/371472 [5:17:33<2:02:00, 3.60it/s] 93%|█████████▎| 345099/371472 [5:17:33<2:00:57, 3.63it/s] 93%|█████████▎| 345100/371472 [5:17:33<1:56:51, 3.76it/s] {'loss': 2.7243, 'learning_rate': 1.639283328664698e-07, 'epoch': 14.86} + 93%|█████████▎| 345100/371472 [5:17:33<1:56:51, 3.76it/s] 93%|█████████▎| 345101/371472 [5:17:34<2:02:58, 3.57it/s] 93%|█████████▎| 345102/371472 [5:17:34<2:08:19, 3.42it/s] 93%|█████████▎| 345103/371472 [5:17:34<2:05:40, 3.50it/s] 93%|█████████▎| 345104/371472 [5:17:34<2:11:02, 3.35it/s] 93%|█████████▎| 345105/371472 [5:17:35<2:10:36, 3.36it/s] 93%|█████████▎| 345106/371472 [5:17:35<2:06:39, 3.47it/s] 93%|█████████▎| 345107/371472 [5:17:35<1:59:55, 3.66it/s] 93%|█████████▎| 345108/371472 [5:17:35<2:00:03, 3.66it/s] 93%|█████████▎| 345109/371472 [5:17:36<1:58:45, 3.70it/s] 93%|█████████▎| 345110/371472 [5:17:36<2:05:07, 3.51it/s] 93%|█████████▎| 345111/371472 [5:17:36<2:15:30, 3.24it/s] 93%|█████████▎| 345112/371472 [5:17:37<2:08:33, 3.42it/s] 93%|█████████▎| 345113/371472 [5:17:37<2:08:42, 3.41it/s] 93%|█████████▎| 345114/371472 [5:17:37<2:14:13, 3.27it/s] 93%|█████████▎| 345115/371472 [5:17:38<2:08:07, 3.43it/s] 93%|█████████▎| 345116/371472 [5:17:38<2:16:37, 3.21it/s] 93%|█████████▎| 345117/371472 [5:17:38<2:18:04, 3.18it/s] 93%|█████████▎| 345118/371472 [5:17:39<2:17:40, 3.19it/s] 93%|█████████▎| 345119/371472 [5:17:39<2:15:20, 3.25it/s] 93%|█████████▎| 345120/371472 [5:17:39<2:11:22, 3.34it/s] {'loss': 2.6765, 'learning_rate': 1.63879850890991e-07, 'epoch': 14.86} + 93%|█████████▎| 345120/371472 [5:17:39<2:11:22, 3.34it/s] 93%|█████████▎| 345121/371472 [5:17:39<2:14:34, 3.26it/s] 93%|█████████▎| 345122/371472 [5:17:40<2:13:29, 3.29it/s] 93%|█████████▎| 345123/371472 [5:17:40<2:06:21, 3.48it/s] 93%|█████████▎| 345124/371472 [5:17:40<2:03:19, 3.56it/s] 93%|█████████▎| 345125/371472 [5:17:41<2:13:37, 3.29it/s] 93%|█████████▎| 345126/371472 [5:17:41<2:07:01, 3.46it/s] 93%|█████████▎| 345127/371472 [5:17:41<2:09:58, 3.38it/s] 93%|█████████▎| 345128/371472 [5:17:42<2:09:39, 3.39it/s] 93%|█████████▎| 345129/371472 [5:17:42<2:09:01, 3.40it/s] 93%|█████████▎| 345130/371472 [5:17:42<2:09:53, 3.38it/s] 93%|█████████▎| 345131/371472 [5:17:42<2:07:28, 3.44it/s] 93%|█████████▎| 345132/371472 [5:17:43<2:03:47, 3.55it/s] 93%|█████████▎| 345133/371472 [5:17:43<1:58:01, 3.72it/s] 93%|█████████▎| 345134/371472 [5:17:43<2:00:56, 3.63it/s] 93%|█████████▎| 345135/371472 [5:17:43<2:01:20, 3.62it/s] 93%|█████████▎| 345136/371472 [5:17:44<2:00:23, 3.65it/s] 93%|█████████▎| 345137/371472 [5:17:44<2:11:17, 3.34it/s] 93%|█████████▎| 345138/371472 [5:17:44<2:06:02, 3.48it/s] 93%|█████████▎| 345139/371472 [5:17:45<2:02:02, 3.60it/s] 93%|█████████▎| 345140/371472 [5:17:45<2:01:05, 3.62it/s] {'loss': 2.5928, 'learning_rate': 1.6383136891551203e-07, 'epoch': 14.87} + 93%|█████████▎| 345140/371472 [5:17:45<2:01:05, 3.62it/s] 93%|█████████▎| 345141/371472 [5:17:45<1:59:02, 3.69it/s] 93%|█████████▎| 345142/371472 [5:17:45<1:59:04, 3.69it/s] 93%|█████████▎| 345143/371472 [5:17:46<2:05:33, 3.49it/s] 93%|█████████▎| 345144/371472 [5:17:46<2:03:28, 3.55it/s] 93%|█████████▎| 345145/371472 [5:17:46<2:03:58, 3.54it/s] 93%|█████████▎| 345146/371472 [5:17:47<2:01:54, 3.60it/s] 93%|█████████▎| 345147/371472 [5:17:47<2:02:22, 3.59it/s] 93%|█████████▎| 345148/371472 [5:17:47<1:58:52, 3.69it/s] 93%|█████████▎| 345149/371472 [5:17:47<1:59:15, 3.68it/s] 93%|█████████▎| 345150/371472 [5:17:48<2:00:41, 3.63it/s] 93%|█████████▎| 345151/371472 [5:17:48<1:58:48, 3.69it/s] 93%|█████████▎| 345152/371472 [5:17:48<2:03:42, 3.55it/s] 93%|█████████▎| 345153/371472 [5:17:48<2:05:31, 3.49it/s] 93%|█████████▎| 345154/371472 [5:17:49<2:01:17, 3.62it/s] 93%|█████████▎| 345155/371472 [5:17:49<2:00:17, 3.65it/s] 93%|█████████▎| 345156/371472 [5:17:49<1:59:23, 3.67it/s] 93%|█████████▎| 345157/371472 [5:17:50<1:59:49, 3.66it/s] 93%|█████████▎| 345158/371472 [5:17:50<1:58:28, 3.70it/s] 93%|█████████▎| 345159/371472 [5:17:50<2:02:36, 3.58it/s] 93%|█████████▎| 345160/371472 [5:17:50<2:04:09, 3.53it/s] {'loss': 2.6438, 'learning_rate': 1.6378288694003318e-07, 'epoch': 14.87} + 93%|█████████▎| 345160/371472 [5:17:50<2:04:09, 3.53it/s] 93%|█████████▎| 345161/371472 [5:17:51<2:03:14, 3.56it/s] 93%|█████████▎| 345162/371472 [5:17:51<2:00:28, 3.64it/s] 93%|█████████▎| 345163/371472 [5:17:51<2:01:49, 3.60it/s] 93%|█████████▎| 345164/371472 [5:17:52<2:06:51, 3.46it/s] 93%|█████████▎| 345165/371472 [5:17:52<2:01:13, 3.62it/s] 93%|█████████▎| 345166/371472 [5:17:52<2:01:42, 3.60it/s] 93%|█████████▎| 345167/371472 [5:17:52<2:10:21, 3.36it/s] 93%|█████████▎| 345168/371472 [5:17:53<2:05:41, 3.49it/s] 93%|█████████▎| 345169/371472 [5:17:53<2:00:54, 3.63it/s] 93%|█████████▎| 345170/371472 [5:17:53<2:01:04, 3.62it/s] 93%|█████████▎| 345171/371472 [5:17:53<1:59:18, 3.67it/s] 93%|█████████▎| 345172/371472 [5:17:54<2:00:48, 3.63it/s] 93%|█████████▎| 345173/371472 [5:17:54<2:04:44, 3.51it/s] 93%|█████████▎| 345174/371472 [5:17:54<2:08:16, 3.42it/s] 93%|█████████▎| 345175/371472 [5:17:55<2:10:21, 3.36it/s] 93%|█████████▎| 345176/371472 [5:17:55<2:13:41, 3.28it/s] 93%|█████████▎| 345177/371472 [5:17:55<2:21:45, 3.09it/s] 93%|█████████▎| 345178/371472 [5:17:56<2:13:25, 3.28it/s] 93%|█████████▎| 345179/371472 [5:17:56<2:09:51, 3.37it/s] 93%|█████████▎| 345180/371472 [5:17:56<2:11:03, 3.34it/s] {'loss': 2.6963, 'learning_rate': 1.6373440496455425e-07, 'epoch': 14.87} + 93%|█████████▎| 345180/371472 [5:17:56<2:11:03, 3.34it/s] 93%|█████████▎| 345181/371472 [5:17:56<2:10:20, 3.36it/s] 93%|█████████▎| 345182/371472 [5:17:57<2:05:56, 3.48it/s] 93%|█████████▎| 345183/371472 [5:17:57<2:00:47, 3.63it/s] 93%|█████████▎| 345184/371472 [5:17:57<1:59:01, 3.68it/s] 93%|█████████▎| 345185/371472 [5:17:58<2:01:59, 3.59it/s] 93%|█████████▎| 345186/371472 [5:17:58<2:02:21, 3.58it/s] 93%|█████████▎| 345187/371472 [5:17:58<2:00:03, 3.65it/s] 93%|█████████▎| 345188/371472 [5:17:58<2:07:37, 3.43it/s] 93%|█████████▎| 345189/371472 [5:17:59<2:02:46, 3.57it/s] 93%|█████████▎| 345190/371472 [5:17:59<2:02:57, 3.56it/s] 93%|█████████▎| 345191/371472 [5:17:59<2:07:42, 3.43it/s] 93%|█████████▎| 345192/371472 [5:18:00<2:07:28, 3.44it/s] 93%|█████████▎| 345193/371472 [5:18:00<2:00:26, 3.64it/s] 93%|█████████▎| 345194/371472 [5:18:00<1:57:45, 3.72it/s] 93%|█████████▎| 345195/371472 [5:18:00<1:53:24, 3.86it/s] 93%|█████████▎| 345196/371472 [5:18:01<1:55:35, 3.79it/s] 93%|█████████▎| 345197/371472 [5:18:01<2:00:43, 3.63it/s] 93%|█████████▎| 345198/371472 [5:18:01<1:58:58, 3.68it/s] 93%|█████████▎| 345199/371472 [5:18:01<1:57:47, 3.72it/s] 93%|█████████▎| 345200/371472 [5:18:02<2:03:52, 3.53it/s] {'loss': 2.6933, 'learning_rate': 1.636859229890754e-07, 'epoch': 14.87} + 93%|█████████▎| 345200/371472 [5:18:02<2:03:52, 3.53it/s] 93%|█████████▎| 345201/371472 [5:18:02<2:10:10, 3.36it/s] 93%|█████████▎| 345202/371472 [5:18:02<2:04:47, 3.51it/s] 93%|█████████▎| 345203/371472 [5:18:03<2:04:44, 3.51it/s] 93%|█████████▎| 345204/371472 [5:18:03<2:04:30, 3.52it/s] 93%|█████████▎| 345205/371472 [5:18:03<2:00:57, 3.62it/s] 93%|█████████▎| 345206/371472 [5:18:03<2:02:16, 3.58it/s] 93%|█████████▎| 345207/371472 [5:18:04<2:09:04, 3.39it/s] 93%|█████████▎| 345208/371472 [5:18:04<2:07:18, 3.44it/s] 93%|█████████▎| 345209/371472 [5:18:04<2:11:16, 3.33it/s] 93%|██████��██▎| 345210/371472 [5:18:05<2:05:35, 3.49it/s] 93%|█████████▎| 345211/371472 [5:18:05<2:05:57, 3.47it/s] 93%|█████████▎| 345212/371472 [5:18:05<2:10:24, 3.36it/s] 93%|█████████▎| 345213/371472 [5:18:06<2:08:02, 3.42it/s] 93%|█████████▎| 345214/371472 [5:18:06<2:08:26, 3.41it/s] 93%|█████████▎| 345215/371472 [5:18:06<2:10:01, 3.37it/s] 93%|█████████▎| 345216/371472 [5:18:06<2:09:28, 3.38it/s] 93%|█████████▎| 345217/371472 [5:18:07<2:09:22, 3.38it/s] 93%|█████████▎| 345218/371472 [5:18:07<2:21:41, 3.09it/s] 93%|█████████▎| 345219/371472 [5:18:07<2:21:28, 3.09it/s] 93%|█████████▎| 345220/371472 [5:18:08<2:13:20, 3.28it/s] {'loss': 2.5985, 'learning_rate': 1.6363744101359645e-07, 'epoch': 14.87} + 93%|█████████▎| 345220/371472 [5:18:08<2:13:20, 3.28it/s] 93%|█████████▎| 345221/371472 [5:18:08<2:07:44, 3.42it/s] 93%|█████████▎| 345222/371472 [5:18:08<2:07:05, 3.44it/s] 93%|█████████▎| 345223/371472 [5:18:08<2:02:31, 3.57it/s] 93%|█████████▎| 345224/371472 [5:18:09<2:03:48, 3.53it/s] 93%|█████████▎| 345225/371472 [5:18:09<2:00:35, 3.63it/s] 93%|█████████▎| 345226/371472 [5:18:09<2:01:22, 3.60it/s] 93%|█████████▎| 345227/371472 [5:18:10<1:59:19, 3.67it/s] 93%|█████████▎| 345228/371472 [5:18:10<1:57:14, 3.73it/s] 93%|█████████▎| 345229/371472 [5:18:10<1:53:43, 3.85it/s] 93%|█████████▎| 345230/371472 [5:18:10<1:53:16, 3.86it/s] 93%|█████████▎| 345231/371472 [5:18:11<1:53:04, 3.87it/s] 93%|█████████▎| 345232/371472 [5:18:11<2:06:24, 3.46it/s] 93%|█████████▎| 345233/371472 [5:18:11<2:06:14, 3.46it/s] 93%|█████████▎| 345234/371472 [5:18:12<2:08:28, 3.40it/s] 93%|█████████▎| 345235/371472 [5:18:12<2:05:58, 3.47it/s] 93%|█████████▎| 345236/371472 [5:18:12<2:01:22, 3.60it/s] 93%|█████████▎| 345237/371472 [5:18:12<1:56:27, 3.75it/s] 93%|█████████▎| 345238/371472 [5:18:13<1:57:24, 3.72it/s] 93%|█████████▎| 345239/371472 [5:18:13<1:55:16, 3.79it/s] 93%|█████████▎| 345240/371472 [5:18:13<1:57:08, 3.73it/s] {'loss': 2.4641, 'learning_rate': 1.6358895903811763e-07, 'epoch': 14.87} + 93%|█████████▎| 345240/371472 [5:18:13<1:57:08, 3.73it/s] 93%|█████████▎| 345241/371472 [5:18:13<1:53:10, 3.86it/s] 93%|█████████▎| 345242/371472 [5:18:14<1:53:24, 3.85it/s] 93%|█████████▎| 345243/371472 [5:18:14<1:54:32, 3.82it/s] 93%|█████████▎| 345244/371472 [5:18:14<1:52:29, 3.89it/s] 93%|█████████▎| 345245/371472 [5:18:14<1:52:15, 3.89it/s] 93%|█████████▎| 345246/371472 [5:18:15<2:02:47, 3.56it/s] 93%|█████████▎| 345247/371472 [5:18:15<1:57:49, 3.71it/s] 93%|█████████▎| 345248/371472 [5:18:15<1:59:23, 3.66it/s] 93%|█████████▎| 345249/371472 [5:18:16<2:03:42, 3.53it/s] 93%|█████████▎| 345250/371472 [5:18:16<2:01:20, 3.60it/s] 93%|█████████▎| 345251/371472 [5:18:16<2:00:37, 3.62it/s] 93%|█████████▎| 345252/371472 [5:18:16<1:56:28, 3.75it/s] 93%|█████████▎| 345253/371472 [5:18:17<1:54:04, 3.83it/s] 93%|█████████▎| 345254/371472 [5:18:17<1:59:31, 3.66it/s] 93%|█████████▎| 345255/371472 [5:18:17<2:01:23, 3.60it/s] 93%|█████████▎| 345256/371472 [5:18:17<1:57:56, 3.70it/s] 93%|█████████▎| 345257/371472 [5:18:18<1:55:48, 3.77it/s] 93%|█████████▎| 345258/371472 [5:18:18<1:56:11, 3.76it/s] 93%|█████████▎| 345259/371472 [5:18:18<1:55:53, 3.77it/s] 93%|█████████▎| 345260/371472 [5:18:19<2:10:13, 3.35it/s] {'loss': 2.5855, 'learning_rate': 1.635404770626387e-07, 'epoch': 14.87} + 93%|█████████▎| 345260/371472 [5:18:19<2:10:13, 3.35it/s] 93%|█████████▎| 345261/371472 [5:18:19<2:09:28, 3.37it/s] 93%|█████████▎| 345262/371472 [5:18:19<2:05:09, 3.49it/s] 93%|█████████▎| 345263/371472 [5:18:19<2:04:08, 3.52it/s] 93%|█████████▎| 345264/371472 [5:18:20<1:59:36, 3.65it/s] 93%|█████████▎| 345265/371472 [5:18:20<1:58:53, 3.67it/s] 93%|████���████▎| 345266/371472 [5:18:20<1:59:00, 3.67it/s] 93%|█████████▎| 345267/371472 [5:18:21<2:04:34, 3.51it/s] 93%|█████████▎| 345268/371472 [5:18:21<2:04:43, 3.50it/s] 93%|█████████▎| 345269/371472 [5:18:21<1:58:53, 3.67it/s] 93%|█████████▎| 345270/371472 [5:18:21<1:55:44, 3.77it/s] 93%|█████████▎| 345271/371472 [5:18:22<1:54:50, 3.80it/s] 93%|█████████▎| 345272/371472 [5:18:22<2:03:54, 3.52it/s] 93%|█████████▎| 345273/371472 [5:18:22<2:36:16, 2.79it/s] 93%|█████████▎| 345274/371472 [5:18:23<2:22:55, 3.06it/s] 93%|█████████▎| 345275/371472 [5:18:23<2:25:45, 3.00it/s] 93%|█████████▎| 345276/371472 [5:18:23<2:18:35, 3.15it/s] 93%|█████████▎| 345277/371472 [5:18:24<2:10:49, 3.34it/s] 93%|█████████▎| 345278/371472 [5:18:24<2:09:06, 3.38it/s] 93%|█████████▎| 345279/371472 [5:18:24<2:02:46, 3.56it/s] 93%|█████████▎| 345280/371472 [5:18:24<1:59:26, 3.65it/s] {'loss': 2.5802, 'learning_rate': 1.6349199508715982e-07, 'epoch': 14.87} + 93%|█████████▎| 345280/371472 [5:18:24<1:59:26, 3.65it/s] 93%|█████████▎| 345281/371472 [5:18:25<2:00:28, 3.62it/s] 93%|█████████▎| 345282/371472 [5:18:25<1:56:33, 3.75it/s] 93%|█████████▎| 345283/371472 [5:18:25<2:04:02, 3.52it/s] 93%|█████████▎| 345284/371472 [5:18:25<1:59:49, 3.64it/s] 93%|█████████▎| 345285/371472 [5:18:26<2:02:16, 3.57it/s] 93%|█████████▎| 345286/371472 [5:18:26<2:08:13, 3.40it/s] 93%|█████████▎| 345287/371472 [5:18:26<2:02:19, 3.57it/s] 93%|█████████▎| 345288/371472 [5:18:27<1:59:33, 3.65it/s] 93%|█████████▎| 345289/371472 [5:18:27<1:57:21, 3.72it/s] 93%|█████████▎| 345290/371472 [5:18:27<1:56:30, 3.75it/s] 93%|█████████▎| 345291/371472 [5:18:27<2:03:37, 3.53it/s] 93%|█████████▎| 345292/371472 [5:18:28<2:02:58, 3.55it/s] 93%|█████████▎| 345293/371472 [5:18:28<2:01:22, 3.59it/s] 93%|█████████▎| 345294/371472 [5:18:28<1:59:53, 3.64it/s] 93%|█████████▎| 345295/371472 [5:18:29<2:09:22, 3.37it/s] 93%|█████████▎| 345296/371472 [5:18:29<2:04:00, 3.52it/s] 93%|█████████▎| 345297/371472 [5:18:29<2:07:02, 3.43it/s] 93%|█████████▎| 345298/371472 [5:18:30<2:16:36, 3.19it/s] 93%|█████████▎| 345299/371472 [5:18:30<2:10:48, 3.33it/s] 93%|█████████▎| 345300/371472 [5:18:30<2:11:19, 3.32it/s] {'loss': 2.5267, 'learning_rate': 1.634435131116809e-07, 'epoch': 14.87} + 93%|█████████▎| 345300/371472 [5:18:30<2:11:19, 3.32it/s] 93%|█████████▎| 345301/371472 [5:18:30<2:10:15, 3.35it/s] 93%|█████████▎| 345302/371472 [5:18:31<2:07:15, 3.43it/s] 93%|█████████▎| 345303/371472 [5:18:31<2:03:47, 3.52it/s] 93%|█████████▎| 345304/371472 [5:18:31<2:00:04, 3.63it/s] 93%|█████████▎| 345305/371472 [5:18:31<1:55:44, 3.77it/s] 93%|█████████▎| 345306/371472 [5:18:32<1:56:34, 3.74it/s] 93%|█████████▎| 345307/371472 [5:18:32<1:57:31, 3.71it/s] 93%|█████████▎| 345308/371472 [5:18:32<2:02:53, 3.55it/s] 93%|█████████▎| 345309/371472 [5:18:33<1:59:04, 3.66it/s] 93%|█████████▎| 345310/371472 [5:18:33<1:55:42, 3.77it/s] 93%|█████████▎| 345311/371472 [5:18:33<2:05:42, 3.47it/s] 93%|█████████▎| 345312/371472 [5:18:33<2:05:08, 3.48it/s] 93%|█████████▎| 345313/371472 [5:18:34<2:06:55, 3.43it/s] 93%|█████████▎| 345314/371472 [5:18:34<2:08:01, 3.41it/s] 93%|█████████▎| 345315/371472 [5:18:34<2:03:30, 3.53it/s] 93%|█████████▎| 345316/371472 [5:18:34<1:57:58, 3.70it/s] 93%|█████████▎| 345317/371472 [5:18:35<2:06:20, 3.45it/s] 93%|█████████▎| 345318/371472 [5:18:35<2:04:32, 3.50it/s] 93%|█████████▎| 345319/371472 [5:18:35<2:00:42, 3.61it/s] 93%|█████████▎| 345320/371472 [5:18:36<1:56:56, 3.73it/s] {'loss': 2.5051, 'learning_rate': 1.6339503113620207e-07, 'epoch': 14.87} + 93%|█████████▎| 345320/371472 [5:18:36<1:56:56, 3.73it/s] 93%|█████████▎| 345321/371472 [5:18:36<1:55:06, 3.79it/s] 93%|█████████▎| 345322/371472 [5:18:36<1:54:20, 3.81it/s] 93%|█████████▎| 345323/371472 [5:18:36<1:54:32, 3.81it/s] 93%|█████████▎| 345324/371472 [5:18:37<1:52:59, 3.86it/s] 93%|█████████▎| 345325/371472 [5:18:37<1:51:51, 3.90it/s] 93%|█████████▎| 345326/371472 [5:18:37<2:09:06, 3.38it/s] 93%|█████████▎| 345327/371472 [5:18:38<2:05:22, 3.48it/s] 93%|█████████▎| 345328/371472 [5:18:38<2:06:16, 3.45it/s] 93%|█████████▎| 345329/371472 [5:18:38<2:03:45, 3.52it/s] 93%|█████████▎| 345330/371472 [5:18:38<1:58:49, 3.67it/s] 93%|█████████▎| 345331/371472 [5:18:39<2:05:20, 3.48it/s] 93%|█████████▎| 345332/371472 [5:18:39<2:00:58, 3.60it/s] 93%|█████████▎| 345333/371472 [5:18:39<1:56:43, 3.73it/s] 93%|█████████▎| 345334/371472 [5:18:39<1:56:34, 3.74it/s] 93%|█████████▎| 345335/371472 [5:18:40<2:00:24, 3.62it/s] 93%|█████████▎| 345336/371472 [5:18:40<2:03:48, 3.52it/s] 93%|█████████▎| 345337/371472 [5:18:40<1:58:38, 3.67it/s] 93%|█████████▎| 345338/371472 [5:18:41<1:55:19, 3.78it/s] 93%|█████████▎| 345339/371472 [5:18:41<1:54:10, 3.81it/s] 93%|█████████▎| 345340/371472 [5:18:41<1:56:08, 3.75it/s] {'loss': 2.5974, 'learning_rate': 1.6334654916072312e-07, 'epoch': 14.87} + 93%|█████████▎| 345340/371472 [5:18:41<1:56:08, 3.75it/s] 93%|█████████▎| 345341/371472 [5:18:41<1:57:49, 3.70it/s] 93%|█████████▎| 345342/371472 [5:18:42<1:59:06, 3.66it/s] 93%|█████████▎| 345343/371472 [5:18:42<2:10:43, 3.33it/s] 93%|█████████▎| 345344/371472 [5:18:42<2:10:34, 3.34it/s] 93%|█████████▎| 345345/371472 [5:18:43<2:07:44, 3.41it/s] 93%|█████████▎| 345346/371472 [5:18:43<2:03:44, 3.52it/s] 93%|█████████▎| 345347/371472 [5:18:43<2:02:03, 3.57it/s] 93%|█████████▎| 345348/371472 [5:18:43<2:09:32, 3.36it/s] 93%|█████████▎| 345349/371472 [5:18:44<2:20:10, 3.11it/s] 93%|█████████▎| 345350/371472 [5:18:44<2:12:39, 3.28it/s] 93%|█████████▎| 345351/371472 [5:18:44<2:09:11, 3.37it/s] 93%|█████████▎| 345352/371472 [5:18:45<2:08:57, 3.38it/s] 93%|█████████▎| 345353/371472 [5:18:45<2:10:10, 3.34it/s] 93%|█████████▎| 345354/371472 [5:18:45<2:06:28, 3.44it/s] 93%|█████████▎| 345355/371472 [5:18:46<2:14:20, 3.24it/s] 93%|█████████▎| 345356/371472 [5:18:46<2:29:05, 2.92it/s] 93%|█████████▎| 345357/371472 [5:18:46<2:19:51, 3.11it/s] 93%|█████████▎| 345358/371472 [5:18:47<2:12:40, 3.28it/s] 93%|█████████▎| 345359/371472 [5:18:47<2:04:03, 3.51it/s] 93%|█████████▎| 345360/371472 [5:18:47<1:58:05, 3.69it/s] {'loss': 2.5985, 'learning_rate': 1.6329806718524427e-07, 'epoch': 14.88} + 93%|█████████▎| 345360/371472 [5:18:47<1:58:05, 3.69it/s] 93%|█████████▎| 345361/371472 [5:18:47<2:01:23, 3.58it/s] 93%|█████████▎| 345362/371472 [5:18:48<2:08:57, 3.37it/s] 93%|█████████▎| 345363/371472 [5:18:48<2:02:40, 3.55it/s] 93%|█████████▎| 345364/371472 [5:18:48<1:59:47, 3.63it/s] 93%|█████████▎| 345365/371472 [5:18:48<2:04:08, 3.51it/s] 93%|█████████▎| 345366/371472 [5:18:49<2:01:49, 3.57it/s] 93%|█████████▎| 345367/371472 [5:18:49<1:58:49, 3.66it/s] 93%|█████████▎| 345368/371472 [5:18:49<1:59:48, 3.63it/s] 93%|█████████▎| 345369/371472 [5:18:50<2:02:16, 3.56it/s] 93%|█████████▎| 345370/371472 [5:18:50<2:03:19, 3.53it/s] 93%|█████████▎| 345371/371472 [5:18:50<1:58:38, 3.67it/s] 93%|█████████▎| 345372/371472 [5:18:50<2:00:38, 3.61it/s] 93%|█████████▎| 345373/371472 [5:18:51<2:00:45, 3.60it/s] 93%|█████████▎| 345374/371472 [5:18:51<2:02:34, 3.55it/s] 93%|█████████▎| 345375/371472 [5:18:51<2:01:15, 3.59it/s] 93%|█████████▎| 345376/371472 [5:18:52<1:59:20, 3.64it/s] 93%|█████████▎| 345377/371472 [5:18:52<2:06:44, 3.43it/s] 93%|█████████▎| 345378/371472 [5:18:52<2:03:41, 3.52it/s] 93%|█████████▎| 345379/371472 [5:18:52<2:00:40, 3.60it/s] 93%|█████████▎| 345380/371472 [5:18:53<2:02:58, 3.54it/s] {'loss': 2.6631, 'learning_rate': 1.6324958520976534e-07, 'epoch': 14.88} + 93%|█████████▎| 345380/371472 [5:18:53<2:02:58, 3.54it/s] 93%|█████████▎| 345381/371472 [5:18:53<2:03:19, 3.53it/s] 93%|█████████▎| 345382/371472 [5:18:53<2:04:52, 3.48it/s] 93%|█████████▎| 345383/371472 [5:18:54<2:02:14, 3.56it/s] 93%|█████████▎| 345384/371472 [5:18:54<2:08:09, 3.39it/s] 93%|█████████▎| 345385/371472 [5:18:54<2:09:44, 3.35it/s] 93%|█████████▎| 345386/371472 [5:18:54<2:12:01, 3.29it/s] 93%|█████████▎| 345387/371472 [5:18:55<2:07:30, 3.41it/s] 93%|█████████▎| 345388/371472 [5:18:55<2:04:14, 3.50it/s] 93%|█████████▎| 345389/371472 [5:18:55<2:08:54, 3.37it/s] 93%|█████████▎| 345390/371472 [5:18:56<2:11:22, 3.31it/s] 93%|█████████▎| 345391/371472 [5:18:56<2:10:05, 3.34it/s] 93%|█████████▎| 345392/371472 [5:18:56<2:07:33, 3.41it/s] 93%|█████████▎| 345393/371472 [5:18:56<2:04:14, 3.50it/s] 93%|█████████▎| 345394/371472 [5:18:57<2:04:02, 3.50it/s] 93%|█████████▎| 345395/371472 [5:18:57<2:02:53, 3.54it/s] 93%|█████████▎| 345396/371472 [5:18:57<2:00:41, 3.60it/s] 93%|█████████▎| 345397/371472 [5:18:58<2:00:39, 3.60it/s] 93%|█████████▎| 345398/371472 [5:18:58<2:03:23, 3.52it/s] 93%|█████████▎| 345399/371472 [5:18:58<1:57:32, 3.70it/s] 93%|█████████▎| 345400/371472 [5:18:58<1:57:20, 3.70it/s] {'loss': 2.5874, 'learning_rate': 1.632011032342865e-07, 'epoch': 14.88} + 93%|█████████▎| 345400/371472 [5:18:58<1:57:20, 3.70it/s] 93%|█████████▎| 345401/371472 [5:18:59<1:58:12, 3.68it/s] 93%|█████████▎| 345402/371472 [5:18:59<1:57:32, 3.70it/s] 93%|█████████▎| 345403/371472 [5:18:59<1:53:04, 3.84it/s] 93%|█████████▎| 345404/371472 [5:18:59<1:53:45, 3.82it/s] 93%|█████████▎| 345405/371472 [5:19:00<1:59:12, 3.64it/s] 93%|█████████▎| 345406/371472 [5:19:00<2:02:33, 3.54it/s] 93%|█████████▎| 345407/371472 [5:19:00<2:01:00, 3.59it/s] 93%|█████████▎| 345408/371472 [5:19:01<2:00:52, 3.59it/s] 93%|█████████▎| 345409/371472 [5:19:01<1:58:00, 3.68it/s] 93%|█████████▎| 345410/371472 [5:19:01<2:09:42, 3.35it/s] 93%|█████████▎| 345411/371472 [5:19:01<2:05:37, 3.46it/s] 93%|█████████▎| 345412/371472 [5:19:02<1:58:23, 3.67it/s] 93%|█████████▎| 345413/371472 [5:19:02<1:57:23, 3.70it/s] 93%|█████████▎| 345414/371472 [5:19:02<1:57:41, 3.69it/s] 93%|█████████▎| 345415/371472 [5:19:03<2:02:49, 3.54it/s] 93%|█████████▎| 345416/371472 [5:19:03<1:58:37, 3.66it/s] 93%|█████████▎| 345417/371472 [5:19:03<2:11:08, 3.31it/s] 93%|█████████▎| 345418/371472 [5:19:03<2:08:03, 3.39it/s] 93%|█████████▎| 345419/371472 [5:19:04<2:06:06, 3.44it/s] 93%|█████████▎| 345420/371472 [5:19:04<2:25:52, 2.98it/s] {'loss': 2.7137, 'learning_rate': 1.6315262125880756e-07, 'epoch': 14.88} + 93%|█████████▎| 345420/371472 [5:19:04<2:25:52, 2.98it/s] 93%|█████████▎| 345421/371472 [5:19:04<2:16:02, 3.19it/s] 93%|█████████▎| 345422/371472 [5:19:05<2:10:10, 3.34it/s] 93%|█████████▎| 345423/371472 [5:19:05<2:06:39, 3.43it/s] 93%|█████████▎| 345424/371472 [5:19:05<2:12:41, 3.27it/s] 93%|█████████▎| 345425/371472 [5:19:06<2:11:31, 3.30it/s] 93%|█████████▎| 345426/371472 [5:19:06<2:06:03, 3.44it/s] 93%|█████████▎| 345427/371472 [5:19:06<2:03:59, 3.50it/s] 93%|█████████▎| 345428/371472 [5:19:06<2:00:55, 3.59it/s] 93%|█████████▎| 345429/371472 [5:19:07<1:58:08, 3.67it/s] 93%|█████████▎| 345430/371472 [5:19:07<1:57:09, 3.70it/s] 93%|█████████▎| 345431/371472 [5:19:07<1:55:19, 3.76it/s] 93%|█████████▎| 345432/371472 [5:19:07<2:00:21, 3.61it/s] 93%|█████████▎| 345433/371472 [5:19:08<1:57:54, 3.68it/s] 93%|█████████▎| 345434/371472 [5:19:08<2:02:51, 3.53it/s] 93%|█████████▎| 345435/371472 [5:19:08<2:01:27, 3.57it/s] 93%|█████████▎| 345436/371472 [5:19:09<2:00:30, 3.60it/s] 93%|█████████▎| 345437/371472 [5:19:09<1:57:52, 3.68it/s] 93%|█████████▎| 345438/371472 [5:19:09<1:57:27, 3.69it/s] 93%|█████████▎| 345439/371472 [5:19:09<1:55:49, 3.75it/s] 93%|█████████▎| 345440/371472 [5:19:10<1:54:22, 3.79it/s] {'loss': 2.7381, 'learning_rate': 1.631041392833287e-07, 'epoch': 14.88} + 93%|█████████▎| 345440/371472 [5:19:10<1:54:22, 3.79it/s] 93%|█████████▎| 345441/371472 [5:19:10<2:06:56, 3.42it/s] 93%|█████████▎| 345442/371472 [5:19:10<2:02:11, 3.55it/s] 93%|█████████▎| 345443/371472 [5:19:11<2:05:02, 3.47it/s] 93%|█████████▎| 345444/371472 [5:19:11<2:00:10, 3.61it/s] 93%|█████████▎| 345445/371472 [5:19:11<1:58:23, 3.66it/s] 93%|█████████▎| 345446/371472 [5:19:11<1:54:00, 3.80it/s] 93%|█████████▎| 345447/371472 [5:19:12<1:53:24, 3.82it/s] 93%|█████████▎| 345448/371472 [5:19:12<1:53:23, 3.82it/s] 93%|█████████▎| 345449/371472 [5:19:12<1:59:30, 3.63it/s] 93%|█████████▎| 345450/371472 [5:19:12<1:56:18, 3.73it/s] 93%|█████████▎| 345451/371472 [5:19:13<2:01:18, 3.57it/s] 93%|█████████▎| 345452/371472 [5:19:13<2:06:09, 3.44it/s] 93%|█████████▎| 345453/371472 [5:19:13<2:03:35, 3.51it/s] 93%|█████████▎| 345454/371472 [5:19:14<2:09:40, 3.34it/s] 93%|█████████▎| 345455/371472 [5:19:14<2:12:23, 3.28it/s] 93%|█████████▎| 345456/371472 [5:19:14<2:07:40, 3.40it/s] 93%|█████████▎| 345457/371472 [5:19:14<2:02:55, 3.53it/s] 93%|█████████▎| 345458/371472 [5:19:15<2:04:09, 3.49it/s] 93%|█████████▎| 345459/371472 [5:19:15<2:06:21, 3.43it/s] 93%|█████████▎| 345460/371472 [5:19:15<1:59:34, 3.63it/s] {'loss': 2.6696, 'learning_rate': 1.6305565730784976e-07, 'epoch': 14.88} + 93%|█████████▎| 345460/371472 [5:19:15<1:59:34, 3.63it/s] 93%|█████████▎| 345461/371472 [5:19:16<2:00:51, 3.59it/s] 93%|█████████▎| 345462/371472 [5:19:16<2:01:40, 3.56it/s] 93%|█████████▎| 345463/371472 [5:19:16<2:00:19, 3.60it/s] 93%|█████████▎| 345464/371472 [5:19:16<2:03:07, 3.52it/s] 93%|█████████▎| 345465/371472 [5:19:17<1:57:50, 3.68it/s] 93%|█████████▎| 345466/371472 [5:19:17<2:04:06, 3.49it/s] 93%|█████████▎| 345467/371472 [5:19:17<2:05:07, 3.46it/s] 93%|█████████▎| 345468/371472 [5:19:18<2:06:54, 3.42it/s] 93%|█████████▎| 345469/371472 [5:19:18<2:04:46, 3.47it/s] 93%|█████████▎| 345470/371472 [5:19:18<2:01:39, 3.56it/s] 93%|█████████▎| 345471/371472 [5:19:18<2:06:57, 3.41it/s] 93%|█████████▎| 345472/371472 [5:19:19<2:04:25, 3.48it/s] 93%|█████████▎| 345473/371472 [5:19:19<2:01:37, 3.56it/s] 93%|█████████▎| 345474/371472 [5:19:19<2:08:50, 3.36it/s] 93%|█████████▎| 345475/371472 [5:19:20<2:06:59, 3.41it/s] 93%|█████████▎| 345476/371472 [5:19:20<2:07:00, 3.41it/s] 93%|█████████▎| 345477/371472 [5:19:20<2:10:06, 3.33it/s] 93%|█████████▎| 345478/371472 [5:19:21<2:08:45, 3.36it/s] 93%|█████████▎| 345479/371472 [5:19:21<2:06:04, 3.44it/s] 93%|█████████▎| 345480/371472 [5:19:21<2:01:26, 3.57it/s] {'loss': 2.6719, 'learning_rate': 1.630071753323709e-07, 'epoch': 14.88} + 93%|█████████▎| 345480/371472 [5:19:21<2:01:26, 3.57it/s] 93%|█████████▎| 345481/371472 [5:19:21<2:01:22, 3.57it/s] 93%|█████████▎| 345482/371472 [5:19:22<1:57:48, 3.68it/s] 93%|█████████▎| 345483/371472 [5:19:22<1:54:51, 3.77it/s] 93%|█████████▎| 345484/371472 [5:19:22<1:54:29, 3.78it/s] 93%|█████████▎| 345485/371472 [5:19:22<1:58:28, 3.66it/s] 93%|█████████▎| 345486/371472 [5:19:23<1:57:44, 3.68it/s] 93%|█████████▎| 345487/371472 [5:19:23<2:01:09, 3.57it/s] 93%|█████████▎| 345488/371472 [5:19:23<2:00:35, 3.59it/s] 93%|█████████▎| 345489/371472 [5:19:23<1:56:24, 3.72it/s] 93%|█████████▎| 345490/371472 [5:19:24<1:55:17, 3.76it/s] 93%|█████████▎| 345491/371472 [5:19:24<1:52:17, 3.86it/s] 93%|█████████▎| 345492/371472 [5:19:24<1:49:40, 3.95it/s] 93%|█████████▎| 345493/371472 [5:19:25<1:55:40, 3.74it/s] 93%|█████████▎| 345494/371472 [5:19:25<1:55:58, 3.73it/s] 93%|█████████▎| 345495/371472 [5:19:25<2:09:11, 3.35it/s] 93%|█████████▎| 345496/371472 [5:19:25<2:05:47, 3.44it/s] 93%|█████████▎| 345497/371472 [5:19:26<2:07:37, 3.39it/s] 93%|█████████▎| 345498/371472 [5:19:26<2:04:39, 3.47it/s] 93%|█████████▎| 345499/371472 [5:19:26<2:03:16, 3.51it/s] 93%|█████████▎| 345500/371472 [5:19:27<2:01:31, 3.56it/s] {'loss': 2.4692, 'learning_rate': 1.6295869335689198e-07, 'epoch': 14.88} + 93%|█████████▎| 345500/371472 [5:19:27<2:01:31, 3.56it/s] 93%|█████████▎| 345501/371472 [5:19:27<2:00:02, 3.61it/s] 93%|█████████▎| 345502/371472 [5:19:27<1:59:50, 3.61it/s] 93%|█████████▎| 345503/371472 [5:19:27<1:55:12, 3.76it/s] 93%|█████████▎| 345504/371472 [5:19:28<1:54:55, 3.77it/s] 93%|█████████▎| 345505/371472 [5:19:28<1:58:10, 3.66it/s] 93%|█████████▎| 345506/371472 [5:19:28<1:55:57, 3.73it/s] 93%|█████████▎| 345507/371472 [5:19:29<2:07:20, 3.40it/s] 93%|█████████▎| 345508/371472 [5:19:29<2:02:40, 3.53it/s] 93%|█████████▎| 345509/371472 [5:19:29<2:04:36, 3.47it/s] 93%|█████████▎| 345510/371472 [5:19:29<2:00:57, 3.58it/s] 93%|█████████▎| 345511/371472 [5:19:30<2:11:07, 3.30it/s] 93%|█████████▎| 345512/371472 [5:19:30<2:11:21, 3.29it/s] 93%|█████████▎| 345513/371472 [5:19:30<2:06:13, 3.43it/s] 93%|█████████▎| 345514/371472 [5:19:31<2:04:08, 3.48it/s] 93%|█████████▎| 345515/371472 [5:19:31<2:15:29, 3.19it/s] 93%|█████████▎| 345516/371472 [5:19:31<2:09:27, 3.34it/s] 93%|█████████▎| 345517/371472 [5:19:31<2:07:06, 3.40it/s] 93%|█████████▎| 345518/371472 [5:19:32<2:06:35, 3.42it/s] 93%|█████████▎| 345519/371472 [5:19:32<2:02:24, 3.53it/s] 93%|█████████▎| 345520/371472 [5:19:32<1:57:44, 3.67it/s] {'loss': 2.6071, 'learning_rate': 1.6291021138141303e-07, 'epoch': 14.88} + 93%|█████████▎| 345520/371472 [5:19:32<1:57:44, 3.67it/s] 93%|█████████▎| 345521/371472 [5:19:33<1:55:47, 3.74it/s] 93%|█████████▎| 345522/371472 [5:19:33<2:03:13, 3.51it/s] 93%|█████████▎| 345523/371472 [5:19:33<2:11:10, 3.30it/s] 93%|█████████▎| 345524/371472 [5:19:33<2:11:03, 3.30it/s] 93%|█████████▎| 345525/371472 [5:19:34<2:10:11, 3.32it/s] 93%|█████████▎| 345526/371472 [5:19:34<2:12:42, 3.26it/s] 93%|█████████▎| 345527/371472 [5:19:34<2:10:33, 3.31it/s] 93%|█████████▎| 345528/371472 [5:19:35<2:09:48, 3.33it/s] 93%|█████████▎| 345529/371472 [5:19:35<2:02:21, 3.53it/s] 93%|█████████▎| 345530/371472 [5:19:35<2:08:46, 3.36it/s] 93%|█████████▎| 345531/371472 [5:19:35<2:00:37, 3.58it/s] 93%|█████████▎| 345532/371472 [5:19:36<1:56:19, 3.72it/s] 93%|█████████▎| 345533/371472 [5:19:36<1:57:06, 3.69it/s] 93%|█████████▎| 345534/371472 [5:19:36<2:06:24, 3.42it/s] 93%|█████████▎| 345535/371472 [5:19:37<2:04:50, 3.46it/s] 93%|█████████▎| 345536/371472 [5:19:37<1:59:12, 3.63it/s] 93%|█████████▎| 345537/371472 [5:19:37<1:56:45, 3.70it/s] 93%|█████████▎| 345538/371472 [5:19:37<2:00:13, 3.60it/s] 93%|█████████▎| 345539/371472 [5:19:38<2:02:19, 3.53it/s] 93%|█████████▎| 345540/371472 [5:19:38<1:58:48, 3.64it/s] {'loss': 2.5103, 'learning_rate': 1.6286172940593418e-07, 'epoch': 14.88} + 93%|█████████▎| 345540/371472 [5:19:38<1:58:48, 3.64it/s] 93%|█████████▎| 345541/371472 [5:19:38<1:57:46, 3.67it/s] 93%|█████████▎| 345542/371472 [5:19:39<2:02:34, 3.53it/s] 93%|█████████▎| 345543/371472 [5:19:39<2:01:40, 3.55it/s] 93%|█████████▎| 345544/371472 [5:19:39<1:58:55, 3.63it/s] 93%|█████████▎| 345545/371472 [5:19:39<2:06:14, 3.42it/s] 93%|█████████▎| 345546/371472 [5:19:40<2:03:11, 3.51it/s] 93%|█████████▎| 345547/371472 [5:19:40<2:07:23, 3.39it/s] 93%|█████████▎| 345548/371472 [5:19:40<2:07:44, 3.38it/s] 93%|█████████▎| 345549/371472 [5:19:41<2:06:20, 3.42it/s] 93%|█████████▎| 345550/371472 [5:19:41<2:03:08, 3.51it/s] 93%|█████████▎| 345551/371472 [5:19:41<2:10:30, 3.31it/s] 93%|█████████▎| 345552/371472 [5:19:41<2:06:15, 3.42it/s] 93%|█████████▎| 345553/371472 [5:19:42<2:10:24, 3.31it/s] 93%|█████████▎| 345554/371472 [5:19:42<2:05:33, 3.44it/s] 93%|█████████▎| 345555/371472 [5:19:42<2:01:39, 3.55it/s] 93%|█████████▎| 345556/371472 [5:19:43<1:57:38, 3.67it/s] 93%|█████████▎| 345557/371472 [5:19:43<2:05:13, 3.45it/s] 93%|█████████▎| 345558/371472 [5:19:43<2:03:27, 3.50it/s] 93%|█████████▎| 345559/371472 [5:19:43<1:58:04, 3.66it/s] 93%|█████████▎| 345560/371472 [5:19:44<2:07:41, 3.38it/s] {'loss': 2.6904, 'learning_rate': 1.6281324743045525e-07, 'epoch': 14.88} + 93%|█████████▎| 345560/371472 [5:19:44<2:07:41, 3.38it/s] 93%|█████████▎| 345561/371472 [5:19:44<2:06:38, 3.41it/s] 93%|█████████▎| 345562/371472 [5:19:44<2:00:56, 3.57it/s] 93%|█████████▎| 345563/371472 [5:19:45<2:00:40, 3.58it/s] 93%|█████████▎| 345564/371472 [5:19:45<2:01:30, 3.55it/s] 93%|█████████▎| 345565/371472 [5:19:45<2:01:48, 3.55it/s] 93%|█████████▎| 345566/371472 [5:19:45<2:00:14, 3.59it/s] 93%|█████████▎| 345567/371472 [5:19:46<2:02:01, 3.54it/s] 93%|█████████▎| 345568/371472 [5:19:46<2:03:37, 3.49it/s] 93%|█████████▎| 345569/371472 [5:19:46<2:05:43, 3.43it/s] 93%|█████████▎| 345570/371472 [5:19:47<2:03:22, 3.50it/s] 93%|█████████▎| 345571/371472 [5:19:47<2:06:33, 3.41it/s] 93%|█████████▎| 345572/371472 [5:19:47<2:15:46, 3.18it/s] 93%|█████████▎| 345573/371472 [5:19:48<2:11:48, 3.27it/s] 93%|█████████▎| 345574/371472 [5:19:48<2:11:23, 3.29it/s] 93%|█████████▎| 345575/371472 [5:19:48<2:08:43, 3.35it/s] 93%|█████████▎| 345576/371472 [5:19:48<2:04:40, 3.46it/s] 93%|█████████▎| 345577/371472 [5:19:49<2:09:02, 3.34it/s] 93%|█████████▎| 345578/371472 [5:19:49<2:06:00, 3.43it/s] 93%|█████████▎| 345579/371472 [5:19:49<2:02:48, 3.51it/s] 93%|█████████▎| 345580/371472 [5:19:50<2:03:50, 3.48it/s] {'loss': 2.4942, 'learning_rate': 1.627647654549764e-07, 'epoch': 14.88} + 93%|█████████▎| 345580/371472 [5:19:50<2:03:50, 3.48it/s] 93%|█████████▎| 345581/371472 [5:19:50<2:13:17, 3.24it/s] 93%|█████████▎| 345582/371472 [5:19:50<2:09:49, 3.32it/s] 93%|█████████▎| 345583/371472 [5:19:50<2:06:09, 3.42it/s] 93%|█████████▎| 345584/371472 [5:19:51<2:08:03, 3.37it/s] 93%|█████████▎| 345585/371472 [5:19:51<2:05:38, 3.43it/s] 93%|█████████▎| 345586/371472 [5:19:51<2:01:58, 3.54it/s] 93%|█████████▎| 345587/371472 [5:19:52<1:58:05, 3.65it/s] 93%|█████████▎| 345588/371472 [5:19:52<2:03:07, 3.50it/s] 93%|█████████▎| 345589/371472 [5:19:52<2:07:05, 3.39it/s] 93%|█████████▎| 345590/371472 [5:19:52<2:01:14, 3.56it/s] 93%|█████████▎| 345591/371472 [5:19:53<2:06:16, 3.42it/s] 93%|█████████▎| 345592/371472 [5:19:53<2:02:29, 3.52it/s] 93%|█████████▎| 345593/371472 [5:19:53<1:59:55, 3.60it/s] 93%|█████████▎| 345594/371472 [5:19:54<2:05:56, 3.42it/s] 93%|█████████▎| 345595/371472 [5:19:54<2:01:19, 3.55it/s] 93%|█████████▎| 345596/371472 [5:19:54<1:58:35, 3.64it/s] 93%|█████████▎| 345597/371472 [5:19:54<1:57:33, 3.67it/s] 93%|█████████▎| 345598/371472 [5:19:55<1:56:21, 3.71it/s] 93%|█████████▎| 345599/371472 [5:19:55<1:54:46, 3.76it/s] 93%|█████████▎| 345600/371472 [5:19:55<1:55:06, 3.75it/s] {'loss': 2.4276, 'learning_rate': 1.6271628347949744e-07, 'epoch': 14.89} + 93%|█████████▎| 345600/371472 [5:19:55<1:55:06, 3.75it/s] 93%|█████████▎| 345601/371472 [5:19:55<1:55:04, 3.75it/s] 93%|█████████▎| 345602/371472 [5:19:56<1:54:34, 3.76it/s] 93%|█████████▎| 345603/371472 [5:19:56<1:53:46, 3.79it/s] 93%|████████��▎| 345604/371472 [5:19:56<2:06:51, 3.40it/s] 93%|█████████▎| 345605/371472 [5:19:57<2:03:38, 3.49it/s] 93%|█████████▎| 345606/371472 [5:19:57<2:00:01, 3.59it/s] 93%|█████████▎| 345607/371472 [5:19:57<1:58:07, 3.65it/s] 93%|█████████▎| 345608/371472 [5:19:57<1:55:13, 3.74it/s] 93%|█████████▎| 345609/371472 [5:19:58<2:02:37, 3.52it/s] 93%|█████████▎| 345610/371472 [5:19:58<2:01:07, 3.56it/s] 93%|█████████▎| 345611/371472 [5:19:58<2:14:39, 3.20it/s] 93%|█████████▎| 345612/371472 [5:19:59<2:16:38, 3.15it/s] 93%|█████████▎| 345613/371472 [5:19:59<2:07:48, 3.37it/s] 93%|█████████▎| 345614/371472 [5:19:59<2:06:28, 3.41it/s] 93%|█████████▎| 345615/371472 [5:20:00<2:08:05, 3.36it/s] 93%|█████████▎| 345616/371472 [5:20:00<2:04:18, 3.47it/s] 93%|█████████▎| 345617/371472 [5:20:00<2:06:03, 3.42it/s] 93%|█████████▎| 345618/371472 [5:20:00<2:01:56, 3.53it/s] 93%|█████████▎| 345619/371472 [5:20:01<2:00:33, 3.57it/s] 93%|█████████▎| 345620/371472 [5:20:01<2:01:08, 3.56it/s] {'loss': 2.5221, 'learning_rate': 1.6266780150401862e-07, 'epoch': 14.89} + 93%|█████████▎| 345620/371472 [5:20:01<2:01:08, 3.56it/s] 93%|█████████▎| 345621/371472 [5:20:01<2:01:54, 3.53it/s] 93%|█████████▎| 345622/371472 [5:20:02<2:03:29, 3.49it/s] 93%|█████████▎| 345623/371472 [5:20:02<2:11:33, 3.27it/s] 93%|█████████▎| 345624/371472 [5:20:02<2:03:02, 3.50it/s] 93%|█████████▎| 345625/371472 [5:20:02<1:59:21, 3.61it/s] 93%|█████████▎| 345626/371472 [5:20:03<2:01:55, 3.53it/s] 93%|█████████▎| 345627/371472 [5:20:03<1:57:04, 3.68it/s] 93%|█████████▎| 345628/371472 [5:20:03<1:55:43, 3.72it/s] 93%|█████████▎| 345629/371472 [5:20:03<1:56:50, 3.69it/s] 93%|█████████▎| 345630/371472 [5:20:04<1:59:40, 3.60it/s] 93%|█████████▎| 345631/371472 [5:20:04<1:59:03, 3.62it/s] 93%|█████████▎| 345632/371472 [5:20:04<2:01:23, 3.55it/s] 93%|█████████▎| 345633/371472 [5:20:05<2:00:23, 3.58it/s] 93%|█████████▎| 345634/371472 [5:20:05<1:58:32, 3.63it/s] 93%|█████████▎| 345635/371472 [5:20:05<1:58:26, 3.64it/s] 93%|█████████▎| 345636/371472 [5:20:05<2:07:41, 3.37it/s] 93%|█████████▎| 345637/371472 [5:20:06<2:05:49, 3.42it/s] 93%|█████████▎| 345638/371472 [5:20:06<2:04:48, 3.45it/s] 93%|█████████▎| 345639/371472 [5:20:06<2:16:45, 3.15it/s] 93%|█████████▎| 345640/371472 [5:20:07<2:09:31, 3.32it/s] {'loss': 2.6055, 'learning_rate': 1.626193195285397e-07, 'epoch': 14.89} + 93%|█████████▎| 345640/371472 [5:20:07<2:09:31, 3.32it/s] 93%|█████████▎| 345641/371472 [5:20:07<2:05:48, 3.42it/s] 93%|█████████▎| 345642/371472 [5:20:07<2:02:32, 3.51it/s] 93%|█████████▎| 345643/371472 [5:20:08<2:08:43, 3.34it/s] 93%|█████████▎| 345644/371472 [5:20:08<2:12:02, 3.26it/s] 93%|█████████▎| 345645/371472 [5:20:08<2:17:45, 3.12it/s] 93%|█████████▎| 345646/371472 [5:20:09<2:27:31, 2.92it/s] 93%|█████████▎| 345647/371472 [5:20:09<2:16:28, 3.15it/s] 93%|█████████▎| 345648/371472 [5:20:09<2:10:54, 3.29it/s] 93%|█████████▎| 345649/371472 [5:20:09<2:02:10, 3.52it/s] 93%|█████████▎| 345650/371472 [5:20:10<2:00:56, 3.56it/s] 93%|█████████▎| 345651/371472 [5:20:10<2:11:23, 3.28it/s] 93%|█████████▎| 345652/371472 [5:20:10<2:17:23, 3.13it/s] 93%|█████████▎| 345653/371472 [5:20:11<2:18:40, 3.10it/s] 93%|█████████▎| 345654/371472 [5:20:11<2:14:42, 3.19it/s] 93%|█████████▎| 345655/371472 [5:20:11<2:11:30, 3.27it/s] 93%|█████████▎| 345656/371472 [5:20:12<2:07:22, 3.38it/s] 93%|█████████▎| 345657/371472 [5:20:12<2:03:38, 3.48it/s] 93%|█████████▎| 345658/371472 [5:20:12<2:02:09, 3.52it/s] 93%|█████████▎| 345659/371472 [5:20:12<2:01:08, 3.55it/s] 93%|█████████▎| 345660/371472 [5:20:13<2:01:54, 3.53it/s] {'loss': 2.516, 'learning_rate': 1.6257083755306082e-07, 'epoch': 14.89} + 93%|██████��██▎| 345660/371472 [5:20:13<2:01:54, 3.53it/s] 93%|█████████▎| 345661/371472 [5:20:13<2:08:40, 3.34it/s] 93%|█████████▎| 345662/371472 [5:20:13<2:05:49, 3.42it/s] 93%|█████████▎| 345663/371472 [5:20:14<2:05:39, 3.42it/s] 93%|█████████▎| 345664/371472 [5:20:14<2:04:47, 3.45it/s] 93%|█████████▎| 345665/371472 [5:20:14<2:04:52, 3.44it/s] 93%|█████████▎| 345666/371472 [5:20:14<2:01:50, 3.53it/s] 93%|█████████▎| 345667/371472 [5:20:15<2:00:44, 3.56it/s] 93%|█████████▎| 345668/371472 [5:20:15<2:03:19, 3.49it/s] 93%|█████████▎| 345669/371472 [5:20:15<2:07:41, 3.37it/s] 93%|█████████▎| 345670/371472 [5:20:16<2:13:34, 3.22it/s] 93%|█████████▎| 345671/371472 [5:20:16<2:09:58, 3.31it/s] 93%|█████████▎| 345672/371472 [5:20:16<2:12:35, 3.24it/s] 93%|█████████▎| 345673/371472 [5:20:17<2:13:38, 3.22it/s] 93%|█████████▎| 345674/371472 [5:20:17<2:14:11, 3.20it/s] 93%|█████████▎| 345675/371472 [5:20:17<2:19:46, 3.08it/s] 93%|█████████▎| 345676/371472 [5:20:18<2:12:59, 3.23it/s] 93%|█████████▎| 345677/371472 [5:20:18<2:07:04, 3.38it/s] 93%|█████████▎| 345678/371472 [5:20:18<2:05:11, 3.43it/s] 93%|█████████▎| 345679/371472 [5:20:18<2:00:14, 3.58it/s] 93%|█████████▎| 345680/371472 [5:20:19<1:57:00, 3.67it/s] {'loss': 2.7503, 'learning_rate': 1.625223555775819e-07, 'epoch': 14.89} + 93%|█████████▎| 345680/371472 [5:20:19<1:57:00, 3.67it/s] 93%|█████████▎| 345681/371472 [5:20:19<2:00:32, 3.57it/s] 93%|█████████▎| 345682/371472 [5:20:19<2:04:38, 3.45it/s] 93%|█████████▎| 345683/371472 [5:20:20<2:06:49, 3.39it/s] 93%|█████████▎| 345684/371472 [5:20:20<2:08:28, 3.35it/s] 93%|█████████▎| 345685/371472 [5:20:20<2:01:50, 3.53it/s] 93%|█████████▎| 345686/371472 [5:20:20<1:56:45, 3.68it/s] 93%|█████████▎| 345687/371472 [5:20:21<1:52:38, 3.81it/s] 93%|█████████▎| 345688/371472 [5:20:21<1:55:10, 3.73it/s] 93%|█████████▎| 345689/371472 [5:20:21<1:56:20, 3.69it/s] 93%|█████████▎| 345690/371472 [5:20:21<1:58:57, 3.61it/s] 93%|█████████▎| 345691/371472 [5:20:22<1:57:12, 3.67it/s] 93%|█████████▎| 345692/371472 [5:20:22<1:59:34, 3.59it/s] 93%|█████████▎| 345693/371472 [5:20:22<1:56:43, 3.68it/s] 93%|█████████▎| 345694/371472 [5:20:22<1:55:36, 3.72it/s] 93%|█████████▎| 345695/371472 [5:20:23<2:03:44, 3.47it/s] 93%|█████████▎| 345696/371472 [5:20:23<2:04:15, 3.46it/s] 93%|█████████▎| 345697/371472 [5:20:23<2:06:27, 3.40it/s] 93%|█████████▎| 345698/371472 [5:20:24<2:02:57, 3.49it/s] 93%|█████████▎| 345699/371472 [5:20:24<2:11:04, 3.28it/s] 93%|█████████▎| 345700/371472 [5:20:24<2:04:12, 3.46it/s] {'loss': 2.5931, 'learning_rate': 1.6247387360210307e-07, 'epoch': 14.89} + 93%|█████████▎| 345700/371472 [5:20:24<2:04:12, 3.46it/s] 93%|█████████▎| 345701/371472 [5:20:25<2:23:20, 3.00it/s] 93%|█████████▎| 345702/371472 [5:20:25<2:21:38, 3.03it/s] 93%|█████████▎| 345703/371472 [5:20:25<2:17:29, 3.12it/s] 93%|█████████▎| 345704/371472 [5:20:26<2:11:08, 3.27it/s] 93%|█████████▎| 345705/371472 [5:20:26<2:06:18, 3.40it/s] 93%|█████████▎| 345706/371472 [5:20:26<2:09:42, 3.31it/s] 93%|█████████▎| 345707/371472 [5:20:26<2:03:29, 3.48it/s] 93%|█████████▎| 345708/371472 [5:20:27<2:00:49, 3.55it/s] 93%|█████████▎| 345709/371472 [5:20:27<2:06:21, 3.40it/s] 93%|█████████▎| 345710/371472 [5:20:27<2:07:56, 3.36it/s] 93%|█████████▎| 345711/371472 [5:20:28<2:02:01, 3.52it/s] 93%|█████████▎| 345712/371472 [5:20:28<2:06:14, 3.40it/s] 93%|█████████▎| 345713/371472 [5:20:28<2:07:34, 3.36it/s] 93%|█████████▎| 345714/371472 [5:20:28<2:00:09, 3.57it/s] 93%|█████████▎| 345715/371472 [5:20:29<1:55:37, 3.71it/s] 93%|█████████▎| 345716/371472 [5:20:29<1:56:24, 3.69it/s] 93%|█████████▎| 345717/371472 [5:20:29<1:58:29, 3.62it/s] 93%|█████████▎| 345718/371472 [5:20:30<1:58:44, 3.61it/s] 93%|█████████▎| 345719/371472 [5:20:30<1:59:00, 3.61it/s] 93%|█████████▎| 345720/371472 [5:20:30<2:01:29, 3.53it/s] {'loss': 2.7622, 'learning_rate': 1.624253916266241e-07, 'epoch': 14.89} + 93%|█████████▎| 345720/371472 [5:20:30<2:01:29, 3.53it/s] 93%|█████████▎| 345721/371472 [5:20:30<2:07:12, 3.37it/s] 93%|█████████▎| 345722/371472 [5:20:31<2:04:02, 3.46it/s] 93%|█████████▎| 345723/371472 [5:20:31<2:02:46, 3.50it/s] 93%|█████████▎| 345724/371472 [5:20:31<2:00:05, 3.57it/s] 93%|█████████▎| 345725/371472 [5:20:32<2:07:57, 3.35it/s] 93%|█████████▎| 345726/371472 [5:20:32<2:01:49, 3.52it/s] 93%|█████████▎| 345727/371472 [5:20:32<2:04:53, 3.44it/s] 93%|█████████▎| 345728/371472 [5:20:32<2:10:08, 3.30it/s] 93%|█████████▎| 345729/371472 [5:20:33<2:18:24, 3.10it/s] 93%|█████████▎| 345730/371472 [5:20:33<2:14:28, 3.19it/s] 93%|█████████▎| 345731/371472 [5:20:33<2:08:17, 3.34it/s] 93%|█████████▎| 345732/371472 [5:20:34<2:15:54, 3.16it/s] 93%|█████████▎| 345733/371472 [5:20:34<2:08:56, 3.33it/s] 93%|█████████▎| 345734/371472 [5:20:34<2:04:36, 3.44it/s] 93%|█████████▎| 345735/371472 [5:20:35<2:01:40, 3.53it/s] 93%|█████████▎| 345736/371472 [5:20:35<2:01:26, 3.53it/s] 93%|█████████▎| 345737/371472 [5:20:35<2:00:28, 3.56it/s] 93%|█████████▎| 345738/371472 [5:20:35<1:56:55, 3.67it/s] 93%|█████████▎| 345739/371472 [5:20:36<2:00:09, 3.57it/s] 93%|█████████▎| 345740/371472 [5:20:36<2:05:01, 3.43it/s] {'loss': 2.5193, 'learning_rate': 1.6237690965114526e-07, 'epoch': 14.89} + 93%|█████████▎| 345740/371472 [5:20:36<2:05:01, 3.43it/s] 93%|█████████▎| 345741/371472 [5:20:36<2:11:18, 3.27it/s] 93%|█████████▎| 345742/371472 [5:20:37<2:10:16, 3.29it/s] 93%|█████████▎| 345743/371472 [5:20:37<2:06:23, 3.39it/s] 93%|█████████▎| 345744/371472 [5:20:37<2:10:54, 3.28it/s] 93%|█████████▎| 345745/371472 [5:20:37<2:04:59, 3.43it/s] 93%|█████████▎| 345746/371472 [5:20:38<2:01:54, 3.52it/s] 93%|█████████▎| 345747/371472 [5:20:38<1:57:50, 3.64it/s] 93%|█████████▎| 345748/371472 [5:20:38<1:57:36, 3.65it/s] 93%|█████████▎| 345749/371472 [5:20:39<1:54:22, 3.75it/s] 93%|█████████▎| 345750/371472 [5:20:39<1:58:42, 3.61it/s] 93%|█████████▎| 345751/371472 [5:20:39<1:56:50, 3.67it/s] 93%|█████████▎| 345752/371472 [5:20:39<1:56:19, 3.68it/s] 93%|█████████▎| 345753/371472 [5:20:40<1:54:39, 3.74it/s] 93%|█████████▎| 345754/371472 [5:20:40<2:03:26, 3.47it/s] 93%|█████████▎| 345755/371472 [5:20:40<1:58:16, 3.62it/s] 93%|█████████▎| 345756/371472 [5:20:41<2:06:26, 3.39it/s] 93%|█████████▎| 345757/371472 [5:20:41<2:03:18, 3.48it/s] 93%|█████████▎| 345758/371472 [5:20:41<2:02:19, 3.50it/s] 93%|█████████▎| 345759/371472 [5:20:41<2:03:49, 3.46it/s] 93%|█████████▎| 345760/371472 [5:20:42<1:58:32, 3.62it/s] {'loss': 2.612, 'learning_rate': 1.6232842767566633e-07, 'epoch': 14.89} + 93%|█████████▎| 345760/371472 [5:20:42<1:58:32, 3.62it/s] 93%|█████████▎| 345761/371472 [5:20:42<2:00:28, 3.56it/s] 93%|█████████▎| 345762/371472 [5:20:42<1:59:04, 3.60it/s] 93%|█████████▎| 345763/371472 [5:20:43<2:03:56, 3.46it/s] 93%|█████████▎| 345764/371472 [5:20:43<2:01:14, 3.53it/s] 93%|█████████▎| 345765/371472 [5:20:43<1:58:03, 3.63it/s] 93%|█████████▎| 345766/371472 [5:20:43<2:06:13, 3.39it/s] 93%|█████████▎| 345767/371472 [5:20:44<2:10:12, 3.29it/s] 93%|█████████▎| 345768/371472 [5:20:44<2:03:55, 3.46it/s] 93%|█████████▎| 345769/371472 [5:20:44<2:20:35, 3.05it/s] 93%|█████████▎| 345770/371472 [5:20:45<2:11:43, 3.25it/s] 93%|█████████▎| 345771/371472 [5:20:45<2:10:26, 3.28it/s] 93%|█████████▎| 345772/371472 [5:20:45<2:08:38, 3.33it/s] 93%|█████████▎| 345773/371472 [5:20:45<2:03:43, 3.46it/s] 93%|█████████▎| 345774/371472 [5:20:46<2:02:13, 3.50it/s] 93%|█████████▎| 345775/371472 [5:20:46<2:02:23, 3.50it/s] 93%|█████████▎| 345776/371472 [5:20:46<2:12:25, 3.23it/s] 93%|█████████▎| 345777/371472 [5:20:47<2:12:58, 3.22it/s] 93%|█████████▎| 345778/371472 [5:20:47<2:08:23, 3.34it/s] 93%|█████████▎| 345779/371472 [5:20:47<2:09:51, 3.30it/s] 93%|█████████▎| 345780/371472 [5:20:48<2:11:29, 3.26it/s] {'loss': 2.4931, 'learning_rate': 1.6227994570018748e-07, 'epoch': 14.89} + 93%|█████████▎| 345780/371472 [5:20:48<2:11:29, 3.26it/s] 93%|█████████▎| 345781/371472 [5:20:48<2:14:43, 3.18it/s] 93%|█████████▎| 345782/371472 [5:20:48<2:22:16, 3.01it/s] 93%|█████████▎| 345783/371472 [5:20:49<2:13:13, 3.21it/s] 93%|█████████▎| 345784/371472 [5:20:49<2:07:27, 3.36it/s] 93%|█████████▎| 345785/371472 [5:20:49<2:09:22, 3.31it/s] 93%|█████████▎| 345786/371472 [5:20:50<2:11:27, 3.26it/s] 93%|█████████▎| 345787/371472 [5:20:50<2:08:20, 3.34it/s] 93%|█████████▎| 345788/371472 [5:20:50<2:05:37, 3.41it/s] 93%|█████████▎| 345789/371472 [5:20:50<2:20:50, 3.04it/s] 93%|█████████▎| 345790/371472 [5:20:51<2:27:45, 2.90it/s] 93%|█████████▎| 345791/371472 [5:20:51<2:17:19, 3.12it/s] 93%|█████████▎| 345792/371472 [5:20:51<2:19:19, 3.07it/s] 93%|█████████▎| 345793/371472 [5:20:52<2:14:47, 3.18it/s] 93%|█████████▎| 345794/371472 [5:20:52<2:09:27, 3.31it/s] 93%|█████████▎| 345795/371472 [5:20:52<2:04:20, 3.44it/s] 93%|█████████▎| 345796/371472 [5:20:53<2:09:12, 3.31it/s] 93%|█████████▎| 345797/371472 [5:20:53<2:04:28, 3.44it/s] 93%|█████████▎| 345798/371472 [5:20:53<2:00:11, 3.56it/s] 93%|█████████▎| 345799/371472 [5:20:54<2:13:06, 3.21it/s] 93%|█████████▎| 345800/371472 [5:20:54<2:07:31, 3.36it/s] {'loss': 2.4328, 'learning_rate': 1.6223146372470853e-07, 'epoch': 14.89} + 93%|█████████▎| 345800/371472 [5:20:54<2:07:31, 3.36it/s] 93%|█████████▎| 345801/371472 [5:20:54<2:01:26, 3.52it/s] 93%|█████████▎| 345802/371472 [5:20:54<1:59:18, 3.59it/s] 93%|█████████▎| 345803/371472 [5:20:55<2:08:26, 3.33it/s] 93%|█████████▎| 345804/371472 [5:20:55<2:03:06, 3.47it/s] 93%|█████████▎| 345805/371472 [5:20:55<2:02:46, 3.48it/s] 93%|█████████▎| 345806/371472 [5:20:55<2:04:36, 3.43it/s] 93%|█████████▎| 345807/371472 [5:20:56<2:02:11, 3.50it/s] 93%|█████████▎| 345808/371472 [5:20:56<2:01:03, 3.53it/s] 93%|█████████▎| 345809/371472 [5:20:56<2:02:49, 3.48it/s] 93%|█████████▎| 345810/371472 [5:20:57<2:01:21, 3.52it/s] 93%|█████████▎| 345811/371472 [5:20:57<2:03:39, 3.46it/s] 93%|█████████▎| 345812/371472 [5:20:57<2:06:38, 3.38it/s] 93%|█████████▎| 345813/371472 [5:20:57<2:00:40, 3.54it/s] 93%|█████████▎| 345814/371472 [5:20:58<2:04:29, 3.43it/s] 93%|█████████▎| 345815/371472 [5:20:58<2:03:00, 3.48it/s] 93%|█████████▎| 345816/371472 [5:20:58<2:04:16, 3.44it/s] 93%|█████████▎| 345817/371472 [5:20:59<2:04:15, 3.44it/s] 93%|█████████▎| 345818/371472 [5:20:59<2:06:42, 3.37it/s] 93%|█████████▎| 345819/371472 [5:20:59<2:02:52, 3.48it/s] 93%|█████████▎| 345820/371472 [5:21:00<2:02:54, 3.48it/s] {'loss': 2.5436, 'learning_rate': 1.621829817492297e-07, 'epoch': 14.9} + 93%|█████████▎| 345820/371472 [5:21:00<2:02:54, 3.48it/s] 93%|█████████▎| 345821/371472 [5:21:00<2:03:33, 3.46it/s] 93%|█████████▎| 345822/371472 [5:21:00<2:05:49, 3.40it/s] 93%|█████████▎| 345823/371472 [5:21:00<2:03:56, 3.45it/s] 93%|█████████▎| 345824/371472 [5:21:01<2:05:34, 3.40it/s] 93%|█████████▎| 345825/371472 [5:21:01<2:07:27, 3.35it/s] 93%|█████████▎| 345826/371472 [5:21:01<2:02:28, 3.49it/s] 93%|█████████▎| 345827/371472 [5:21:02<2:03:53, 3.45it/s] 93%|█████████▎| 345828/371472 [5:21:02<1:59:17, 3.58it/s] 93%|█████████▎| 345829/371472 [5:21:02<2:13:18, 3.21it/s] 93%|█████████▎| 345830/371472 [5:21:02<2:08:49, 3.32it/s] 93%|█████████▎| 345831/371472 [5:21:03<2:07:26, 3.35it/s] 93%|█████████▎| 345832/371472 [5:21:03<2:06:20, 3.38it/s] 93%|█████████▎| 345833/371472 [5:21:03<2:06:31, 3.38it/s] 93%|█████████▎| 345834/371472 [5:21:04<2:09:47, 3.29it/s] 93%|█████████▎| 345835/371472 [5:21:04<2:05:11, 3.41it/s] 93%|█████████▎| 345836/371472 [5:21:04<2:00:20, 3.55it/s] 93%|█████████▎| 345837/371472 [5:21:04<1:58:50, 3.60it/s] 93%|█████████▎| 345838/371472 [5:21:05<1:57:48, 3.63it/s] 93%|█████████▎| 345839/371472 [5:21:05<2:00:41, 3.54it/s] 93%|█████████▎| 345840/371472 [5:21:05<1:59:21, 3.58it/s] {'loss': 2.5189, 'learning_rate': 1.6213449977375073e-07, 'epoch': 14.9} + 93%|█████████▎| 345840/371472 [5:21:05<1:59:21, 3.58it/s] 93%|█████████▎| 345841/371472 [5:21:06<2:00:43, 3.54it/s] 93%|█████████▎| 345842/371472 [5:21:06<1:59:34, 3.57it/s] 93%|█████████▎| 345843/371472 [5:21:06<2:02:46, 3.48it/s] 93%|█████████▎| 345844/371472 [5:21:06<2:00:33, 3.54it/s] 93%|█████████▎| 345845/371472 [5:21:07<2:05:06, 3.41it/s] 93%|█████████▎| 345846/371472 [5:21:07<2:14:46, 3.17it/s] 93%|█████████▎| 345847/371472 [5:21:07<2:12:20, 3.23it/s] 93%|█████████▎| 345848/371472 [5:21:08<2:03:48, 3.45it/s] 93%|█████████▎| 345849/371472 [5:21:08<2:02:46, 3.48it/s] 93%|█████████▎| 345850/371472 [5:21:08<2:00:52, 3.53it/s] 93%|█████████▎| 345851/371472 [5:21:09<1:57:24, 3.64it/s] 93%|█████████▎| 345852/371472 [5:21:09<1:58:55, 3.59it/s] 93%|█████████▎| 345853/371472 [5:21:09<2:00:50, 3.53it/s] 93%|█████████▎| 345854/371472 [5:21:09<2:16:14, 3.13it/s] 93%|█████████▎| 345855/371472 [5:21:10<2:10:20, 3.28it/s] 93%|█████████▎| 345856/371472 [5:21:10<2:03:38, 3.45it/s] 93%|█████████▎| 345857/371472 [5:21:10<2:08:58, 3.31it/s] 93%|█████████▎| 345858/371472 [5:21:11<2:12:32, 3.22it/s] 93%|█████████▎| 345859/371472 [5:21:11<2:07:41, 3.34it/s] 93%|█████████▎| 345860/371472 [5:21:11<2:01:56, 3.50it/s] {'loss': 2.5495, 'learning_rate': 1.620860177982719e-07, 'epoch': 14.9} + 93%|█████████▎| 345860/371472 [5:21:11<2:01:56, 3.50it/s] 93%|█████████▎| 345861/371472 [5:21:11<1:58:40, 3.60it/s] 93%|█████████▎| 345862/371472 [5:21:12<2:03:18, 3.46it/s] 93%|█████████▎| 345863/371472 [5:21:12<1:58:56, 3.59it/s] 93%|█████████▎| 345864/371472 [5:21:12<2:03:38, 3.45it/s] 93%|█████████▎| 345865/371472 [5:21:13<1:59:19, 3.58it/s] 93%|█████████▎| 345866/371472 [5:21:13<1:55:53, 3.68it/s] 93%|█████████▎| 345867/371472 [5:21:13<2:00:43, 3.53it/s] 93%|█████████▎| 345868/371472 [5:21:13<2:04:39, 3.42it/s] 93%|█████████▎| 345869/371472 [5:21:14<2:05:18, 3.41it/s] 93%|█████████▎| 345870/371472 [5:21:14<1:58:24, 3.60it/s] 93%|█████████▎| 345871/371472 [5:21:14<2:00:17, 3.55it/s] 93%|█████████▎| 345872/371472 [5:21:15<1:59:01, 3.58it/s] 93%|█████████▎| 345873/371472 [5:21:15<2:03:48, 3.45it/s] 93%|█████████▎| 345874/371472 [5:21:15<2:04:52, 3.42it/s] 93%|█████████▎| 345875/371472 [5:21:15<2:04:56, 3.41it/s] 93%|█████████▎| 345876/371472 [5:21:16<2:07:48, 3.34it/s] 93%|█████████▎| 345877/371472 [5:21:16<2:02:29, 3.48it/s] 93%|█████████▎| 345878/371472 [5:21:16<2:04:28, 3.43it/s] 93%|█████████▎| 345879/371472 [5:21:17<2:00:57, 3.53it/s] 93%|█████████▎| 345880/371472 [5:21:17<2:01:31, 3.51it/s] {'loss': 2.6103, 'learning_rate': 1.6203753582279297e-07, 'epoch': 14.9} + 93%|█████████▎| 345880/371472 [5:21:17<2:01:31, 3.51it/s] 93%|█████████▎| 345881/371472 [5:21:17<2:04:40, 3.42it/s] 93%|█████████▎| 345882/371472 [5:21:18<2:05:02, 3.41it/s] 93%|█████████▎| 345883/371472 [5:21:18<2:00:06, 3.55it/s] 93%|█████████▎| 345884/371472 [5:21:18<2:05:34, 3.40it/s] 93%|█████████▎| 345885/371472 [5:21:18<2:07:16, 3.35it/s] 93%|█████████▎| 345886/371472 [5:21:19<2:00:08, 3.55it/s] 93%|█████████▎| 345887/371472 [5:21:19<1:59:13, 3.58it/s] 93%|█████████▎| 345888/371472 [5:21:19<2:00:33, 3.54it/s] 93%|█████████▎| 345889/371472 [5:21:20<2:24:55, 2.94it/s] 93%|█████████▎| 345890/371472 [5:21:20<2:15:53, 3.14it/s] 93%|█████████▎| 345891/371472 [5:21:20<2:07:41, 3.34it/s] 93%|█████████▎| 345892/371472 [5:21:20<2:03:50, 3.44it/s] 93%|█████████▎| 345893/371472 [5:21:21<2:07:41, 3.34it/s] 93%|█████████▎| 345894/371472 [5:21:21<2:03:35, 3.45it/s] 93%|█████████▎| 345895/371472 [5:21:21<2:04:14, 3.43it/s] 93%|█████████▎| 345896/371472 [5:21:22<2:05:43, 3.39it/s] 93%|█████████▎| 345897/371472 [5:21:22<1:59:49, 3.56it/s] 93%|█████████▎| 345898/371472 [5:21:22<2:00:16, 3.54it/s] 93%|█████████▎| 345899/371472 [5:21:23<2:09:22, 3.29it/s] 93%|█████████▎| 345900/371472 [5:21:23<2:03:27, 3.45it/s] {'loss': 2.5796, 'learning_rate': 1.6198905384731413e-07, 'epoch': 14.9} + 93%|█████████▎| 345900/371472 [5:21:23<2:03:27, 3.45it/s] 93%|█████████▎| 345901/371472 [5:21:23<1:59:38, 3.56it/s] 93%|█████████▎| 345902/371472 [5:21:23<2:05:01, 3.41it/s] 93%|█████████▎| 345903/371472 [5:21:24<2:00:30, 3.54it/s] 93%|█████████▎| 345904/371472 [5:21:24<1:58:42, 3.59it/s] 93%|█████████▎| 345905/371472 [5:21:24<2:05:01, 3.41it/s] 93%|█████████▎| 345906/371472 [5:21:25<2:00:46, 3.53it/s] 93%|█████████▎| 345907/371472 [5:21:25<2:01:09, 3.52it/s] 93%|█████████▎| 345908/371472 [5:21:25<2:09:32, 3.29it/s] 93%|█████████▎| 345909/371472 [5:21:25<2:06:16, 3.37it/s] 93%|█████████▎| 345910/371472 [5:21:26<2:03:00, 3.46it/s] 93%|█████████▎| 345911/371472 [5:21:26<2:02:41, 3.47it/s] 93%|█████████▎| 345912/371472 [5:21:26<2:03:57, 3.44it/s] 93%|█████████▎| 345913/371472 [5:21:27<2:02:03, 3.49it/s] 93%|█████████▎| 345914/371472 [5:21:27<2:11:27, 3.24it/s] 93%|█████████▎| 345915/371472 [5:21:27<2:05:49, 3.39it/s] 93%|█████████▎| 345916/371472 [5:21:27<2:02:22, 3.48it/s] 93%|█████████▎| 345917/371472 [5:21:28<1:59:07, 3.58it/s] 93%|█████████▎| 345918/371472 [5:21:28<1:56:42, 3.65it/s] 93%|█████████▎| 345919/371472 [5:21:28<1:55:38, 3.68it/s] 93%|█████████▎| 345920/371472 [5:21:29<2:02:38, 3.47it/s] {'loss': 2.6702, 'learning_rate': 1.6194057187183517e-07, 'epoch': 14.9} + 93%|█████████▎| 345920/371472 [5:21:29<2:02:38, 3.47it/s] 93%|█████████▎| 345921/371472 [5:21:29<1:59:24, 3.57it/s] 93%|█████████▎| 345922/371472 [5:21:29<1:59:30, 3.56it/s] 93%|█████████▎| 345923/371472 [5:21:29<2:01:53, 3.49it/s] 93%|█████████▎| 345924/371472 [5:21:30<2:05:12, 3.40it/s] 93%|█████████▎| 345925/371472 [5:21:30<1:59:28, 3.56it/s] 93%|█████████▎| 345926/371472 [5:21:30<2:00:04, 3.55it/s] 93%|█████████▎| 345927/371472 [5:21:31<1:58:43, 3.59it/s] 93%|█████████▎| 345928/371472 [5:21:31<1:58:18, 3.60it/s] 93%|█████████▎| 345929/371472 [5:21:31<2:09:05, 3.30it/s] 93%|█████████▎| 345930/371472 [5:21:32<2:20:48, 3.02it/s] 93%|█████████▎| 345931/371472 [5:21:32<2:11:12, 3.24it/s] 93%|█████████▎| 345932/371472 [5:21:32<2:10:07, 3.27it/s] 93%|█████████▎| 345933/371472 [5:21:32<2:07:48, 3.33it/s] 93%|█████████▎| 345934/371472 [5:21:33<2:21:42, 3.00it/s] 93%|█████████▎| 345935/371472 [5:21:33<2:11:35, 3.23it/s] 93%|█████████▎| 345936/371472 [5:21:33<2:06:47, 3.36it/s] 93%|█████████▎| 345937/371472 [5:21:34<2:09:31, 3.29it/s] 93%|█████████▎| 345938/371472 [5:21:34<2:07:02, 3.35it/s] 93%|█████████▎| 345939/371472 [5:21:34<2:02:28, 3.47it/s] 93%|█████████▎| 345940/371472 [5:21:34<2:03:34, 3.44it/s] {'loss': 2.4787, 'learning_rate': 1.6189208989635635e-07, 'epoch': 14.9} + 93%|█████████▎| 345940/371472 [5:21:35<2:03:34, 3.44it/s] 93%|█████████▎| 345941/371472 [5:21:35<1:59:03, 3.57it/s] 93%|█████████▎| 345942/371472 [5:21:35<2:01:43, 3.50it/s] 93%|█████████▎| 345943/371472 [5:21:35<1:56:07, 3.66it/s] 93%|█████████▎| 345944/371472 [5:21:36<2:02:06, 3.48it/s] 93%|█████████▎| 345945/371472 [5:21:36<2:10:19, 3.26it/s] 93%|█████████▎| 345946/371472 [5:21:36<2:12:48, 3.20it/s] 93%|█████████▎| 345947/371472 [5:21:37<2:11:15, 3.24it/s] 93%|█████████▎| 345948/371472 [5:21:37<2:11:12, 3.24it/s] 93%|█████████▎| 345949/371472 [5:21:37<2:13:06, 3.20it/s] 93%|█████████▎| 345950/371472 [5:21:37<2:05:17, 3.40it/s] 93%|█████████▎| 345951/371472 [5:21:38<2:06:19, 3.37it/s] 93%|█████████▎| 345952/371472 [5:21:38<2:03:00, 3.46it/s] 93%|█████████▎| 345953/371472 [5:21:38<1:59:25, 3.56it/s] 93%|█████████▎| 345954/371472 [5:21:39<1:55:01, 3.70it/s] 93%|█████████▎| 345955/371472 [5:21:39<1:52:18, 3.79it/s] 93%|█████████▎| 345956/371472 [5:21:39<1:54:55, 3.70it/s] 93%|█████████▎| 345957/371472 [5:21:39<1:52:49, 3.77it/s] 93%|█████████▎| 345958/371472 [5:21:40<1:54:04, 3.73it/s] 93%|█████████▎| 345959/371472 [5:21:40<1:52:09, 3.79it/s] 93%|█████████▎| 345960/371472 [5:21:40<1:51:57, 3.80it/s] {'loss': 2.6578, 'learning_rate': 1.6184360792087742e-07, 'epoch': 14.9} + 93%|█████████▎| 345960/371472 [5:21:40<1:51:57, 3.80it/s] 93%|█████████▎| 345961/371472 [5:21:40<1:53:14, 3.75it/s] 93%|█████████▎| 345962/371472 [5:21:41<2:02:56, 3.46it/s] 93%|█████████▎| 345963/371472 [5:21:41<1:59:00, 3.57it/s] 93%|█████████▎| 345964/371472 [5:21:41<1:55:00, 3.70it/s] 93%|█████████▎| 345965/371472 [5:21:42<2:05:00, 3.40it/s] 93%|█████████▎| 345966/371472 [5:21:42<1:59:53, 3.55it/s] 93%|█████████▎| 345967/371472 [5:21:42<1:56:41, 3.64it/s] 93%|█████████▎| 345968/371472 [5:21:42<1:54:19, 3.72it/s] 93%|█████████▎| 345969/371472 [5:21:43<2:00:32, 3.53it/s] 93%|█████████▎| 345970/371472 [5:21:43<2:01:06, 3.51it/s] 93%|█████████▎| 345971/371472 [5:21:43<1:57:18, 3.62it/s] 93%|█████████▎| 345972/371472 [5:21:44<2:03:04, 3.45it/s] 93%|█████████▎| 345973/371472 [5:21:44<2:00:05, 3.54it/s] 93%|█████████▎| 345974/371472 [5:21:44<1:56:16, 3.66it/s] 93%|█████████▎| 345975/371472 [5:21:44<1:54:41, 3.70it/s] 93%|█████████▎| 345976/371472 [5:21:45<1:53:15, 3.75it/s] 93%|█████████▎| 345977/371472 [5:21:45<1:52:27, 3.78it/s] 93%|█████████▎| 345978/371472 [5:21:45<2:01:06, 3.51it/s] 93%|█████████▎| 345979/371472 [5:21:45<2:04:00, 3.43it/s] 93%|█████████▎| 345980/371472 [5:21:46<2:03:22, 3.44it/s] {'loss': 2.4406, 'learning_rate': 1.6179512594539854e-07, 'epoch': 14.9} + 93%|█████████▎| 345980/371472 [5:21:46<2:03:22, 3.44it/s] 93%|█████████▎| 345981/371472 [5:21:46<2:13:20, 3.19it/s] 93%|█████████▎| 345982/371472 [5:21:46<2:07:42, 3.33it/s] 93%|█████████▎| 345983/371472 [5:21:47<2:08:23, 3.31it/s] 93%|█████████▎| 345984/371472 [5:21:47<2:01:02, 3.51it/s] 93%|█████████▎| 345985/371472 [5:21:47<1:58:09, 3.60it/s] 93%|█████████▎| 345986/371472 [5:21:48<1:57:38, 3.61it/s] 93%|█████████▎| 345987/371472 [5:21:48<1:53:54, 3.73it/s] 93%|█████████▎| 345988/371472 [5:21:48<1:56:49, 3.64it/s] 93%|█████████▎| 345989/371472 [5:21:48<2:00:20, 3.53it/s] 93%|█████████▎| 345990/371472 [5:21:49<2:03:25, 3.44it/s] 93%|█████████▎| 345991/371472 [5:21:49<2:21:19, 3.01it/s] 93%|█████████▎| 345992/371472 [5:21:49<2:16:48, 3.10it/s] 93%|█████████▎| 345993/371472 [5:21:50<2:10:41, 3.25it/s] 93%|█████████▎| 345994/371472 [5:21:50<2:03:37, 3.43it/s] 93%|█████████▎| 345995/371472 [5:21:50<2:01:08, 3.51it/s] 93%|█████████▎| 345996/371472 [5:21:50<1:59:41, 3.55it/s] 93%|█████████▎| 345997/371472 [5:21:51<2:03:25, 3.44it/s] 93%|█████████▎| 345998/371472 [5:21:51<2:12:36, 3.20it/s] 93%|█████████▎| 345999/371472 [5:21:51<2:19:10, 3.05it/s] 93%|█████████▎| 346000/371472 [5:21:52<2:15:19, 3.14it/s] {'loss': 2.5371, 'learning_rate': 1.6174664396991961e-07, 'epoch': 14.9} + 93%|█████████▎| 346000/371472 [5:21:52<2:15:19, 3.14it/s] 93%|█████████▎| 346001/371472 [5:21:52<2:14:54, 3.15it/s] 93%|█████████▎| 346002/371472 [5:21:52<2:13:02, 3.19it/s] 93%|█████████▎| 346003/371472 [5:21:53<2:11:32, 3.23it/s] 93%|█████████▎| 346004/371472 [5:21:53<2:13:54, 3.17it/s] 93%|█████████▎| 346005/371472 [5:21:53<2:12:42, 3.20it/s] 93%|█████████▎| 346006/371472 [5:21:54<2:06:06, 3.37it/s] 93%|█████████▎| 346007/371472 [5:21:54<2:14:16, 3.16it/s] 93%|█████████▎| 346008/371472 [5:21:54<2:07:42, 3.32it/s] 93%|█████████▎| 346009/371472 [5:21:55<2:08:54, 3.29it/s] 93%|█████████▎| 346010/371472 [5:21:55<2:03:55, 3.42it/s] 93%|█████████▎| 346011/371472 [5:21:55<2:01:30, 3.49it/s] 93%|█████████▎| 346012/371472 [5:21:55<1:58:55, 3.57it/s] 93%|█████████▎| 346013/371472 [5:21:56<1:58:48, 3.57it/s] 93%|█████████▎| 346014/371472 [5:21:56<2:00:13, 3.53it/s] 93%|█████████▎| 346015/371472 [5:21:56<1:59:50, 3.54it/s] 93%|█████████▎| 346016/371472 [5:21:56<1:57:41, 3.61it/s] 93%|█████████▎| 346017/371472 [5:21:57<1:55:28, 3.67it/s] 93%|█████████▎| 346018/371472 [5:21:57<1:56:21, 3.65it/s] 93%|█████████▎| 346019/371472 [5:21:57<2:12:25, 3.20it/s] 93%|█████████▎| 346020/371472 [5:21:58<2:11:54, 3.22it/s] {'loss': 2.6566, 'learning_rate': 1.616981619944408e-07, 'epoch': 14.9} + 93%|█████████▎| 346020/371472 [5:21:58<2:11:54, 3.22it/s] 93%|█████████▎| 346021/371472 [5:21:58<2:11:08, 3.23it/s] 93%|█████████▎| 346022/371472 [5:21:58<2:04:07, 3.42it/s] 93%|█████████▎| 346023/371472 [5:21:59<2:00:36, 3.52it/s] 93%|█████████▎| 346024/371472 [5:21:59<1:58:37, 3.58it/s] 93%|█████████▎| 346025/371472 [5:21:59<1:56:51, 3.63it/s] 93%|█████████▎| 346026/371472 [5:21:59<1:53:36, 3.73it/s] 93%|█████████▎| 346027/371472 [5:22:00<1:53:07, 3.75it/s] 93%|█████████▎| 346028/371472 [5:22:00<1:49:25, 3.88it/s] 93%|█████████▎| 346029/371472 [5:22:00<1:57:45, 3.60it/s] 93%|█████████▎| 346030/371472 [5:22:00<2:01:18, 3.50it/s] 93%|█████████▎| 346031/371472 [5:22:01<2:00:40, 3.51it/s] 93%|█████████▎| 346032/371472 [5:22:01<1:56:51, 3.63it/s] 93%|█████████▎| 346033/371472 [5:22:01<1:56:25, 3.64it/s] 93%|█████████▎| 346034/371472 [5:22:02<2:01:31, 3.49it/s] 93%|█████████▎| 346035/371472 [5:22:02<1:59:12, 3.56it/s] 93%|█████████▎| 346036/371472 [5:22:02<2:02:45, 3.45it/s] 93%|█████████▎| 346037/371472 [5:22:02<2:00:30, 3.52it/s] 93%|█████████▎| 346038/371472 [5:22:03<2:04:27, 3.41it/s] 93%|█████████▎| 346039/371472 [5:22:03<2:10:01, 3.26it/s] 93%|█████████▎| 346040/371472 [5:22:03<2:05:42, 3.37it/s] {'loss': 2.7222, 'learning_rate': 1.6164968001896184e-07, 'epoch': 14.9} + 93%|█████████▎| 346040/371472 [5:22:03<2:05:42, 3.37it/s] 93%|█████████▎| 346041/371472 [5:22:04<2:13:43, 3.17it/s] 93%|█████████▎| 346042/371472 [5:22:04<2:28:50, 2.85it/s] 93%|█████████▎| 346043/371472 [5:22:04<2:18:07, 3.07it/s] 93%|█████████▎| 346044/371472 [5:22:05<2:09:09, 3.28it/s] 93%|█████████▎| 346045/371472 [5:22:05<2:20:02, 3.03it/s] 93%|█████████▎| 346046/371472 [5:22:05<2:13:26, 3.18it/s] 93%|█████████▎| 346047/371472 [5:22:06<2:04:25, 3.41it/s] 93%|█████████▎| 346048/371472 [5:22:06<2:00:53, 3.50it/s] 93%|█████████▎| 346049/371472 [5:22:06<2:08:07, 3.31it/s] 93%|█████████▎| 346050/371472 [5:22:06<2:07:26, 3.32it/s] 93%|█████████▎| 346051/371472 [5:22:07<2:12:33, 3.20it/s] 93%|█████████▎| 346052/371472 [5:22:07<2:09:03, 3.28it/s] 93%|█████████▎| 346053/371472 [5:22:08<2:24:28, 2.93it/s] 93%|█████████▎| 346054/371472 [5:22:08<2:13:41, 3.17it/s] 93%|█████████▎| 346055/371472 [5:22:08<2:11:47, 3.21it/s] 93%|█████████▎| 346056/371472 [5:22:08<2:10:14, 3.25it/s] 93%|████��████▎| 346057/371472 [5:22:09<2:12:24, 3.20it/s] 93%|█████████▎| 346058/371472 [5:22:09<2:17:02, 3.09it/s] 93%|█████████▎| 346059/371472 [5:22:09<2:13:18, 3.18it/s] 93%|█████████▎| 346060/371472 [5:22:10<2:05:34, 3.37it/s] {'loss': 2.4852, 'learning_rate': 1.6160119804348288e-07, 'epoch': 14.91} + 93%|█████████▎| 346060/371472 [5:22:10<2:05:34, 3.37it/s] 93%|█████████▎| 346061/371472 [5:22:10<2:01:35, 3.48it/s] 93%|█████████▎| 346062/371472 [5:22:10<2:04:00, 3.41it/s] 93%|█████████▎| 346063/371472 [5:22:10<2:03:00, 3.44it/s] 93%|█████████▎| 346064/371472 [5:22:11<2:05:47, 3.37it/s] 93%|█████████▎| 346065/371472 [5:22:11<2:03:25, 3.43it/s] 93%|█████████▎| 346066/371472 [5:22:11<2:05:49, 3.37it/s] 93%|█████████▎| 346067/371472 [5:22:12<2:01:48, 3.48it/s] 93%|█████████▎| 346068/371472 [5:22:12<2:05:03, 3.39it/s] 93%|█████████▎| 346069/371472 [5:22:12<2:01:13, 3.49it/s] 93%|█████████▎| 346070/371472 [5:22:13<2:01:59, 3.47it/s] 93%|█████████▎| 346071/371472 [5:22:13<2:03:09, 3.44it/s] 93%|█████████▎| 346072/371472 [5:22:13<2:02:44, 3.45it/s] 93%|█████████▎| 346073/371472 [5:22:13<2:06:06, 3.36it/s] 93%|█████████▎| 346074/371472 [5:22:14<2:11:40, 3.21it/s] 93%|█████████▎| 346075/371472 [5:22:14<2:03:19, 3.43it/s] 93%|█████████▎| 346076/371472 [5:22:14<1:57:09, 3.61it/s] 93%|█████████▎| 346077/371472 [5:22:15<1:57:11, 3.61it/s] 93%|█████████▎| 346078/371472 [5:22:15<1:54:36, 3.69it/s] 93%|█████████▎| 346079/371472 [5:22:15<1:57:02, 3.62it/s] 93%|█████████▎| 346080/371472 [5:22:15<1:56:17, 3.64it/s] {'loss': 2.5385, 'learning_rate': 1.6155271606800406e-07, 'epoch': 14.91} + 93%|█████████▎| 346080/371472 [5:22:15<1:56:17, 3.64it/s] 93%|█████████▎| 346081/371472 [5:22:16<1:57:40, 3.60it/s] 93%|█████████▎| 346082/371472 [5:22:16<1:59:10, 3.55it/s] 93%|█████████▎| 346083/371472 [5:22:16<1:59:55, 3.53it/s] 93%|█████████▎| 346084/371472 [5:22:16<1:58:27, 3.57it/s] 93%|█████████▎| 346085/371472 [5:22:17<2:01:57, 3.47it/s] 93%|█████████▎| 346086/371472 [5:22:17<2:05:12, 3.38it/s] 93%|█████████▎| 346087/371472 [5:22:17<2:09:22, 3.27it/s] 93%|█████████▎| 346088/371472 [5:22:18<2:05:39, 3.37it/s] 93%|█████████▎| 346089/371472 [5:22:18<2:04:58, 3.39it/s] 93%|█████████▎| 346090/371472 [5:22:18<2:00:47, 3.50it/s] 93%|█████████▎| 346091/371472 [5:22:19<1:56:25, 3.63it/s] 93%|█████████▎| 346092/371472 [5:22:19<1:50:59, 3.81it/s] 93%|█████████▎| 346093/371472 [5:22:19<2:00:53, 3.50it/s] 93%|█████████▎| 346094/371472 [5:22:19<2:01:49, 3.47it/s] 93%|█████████▎| 346095/371472 [5:22:20<2:02:42, 3.45it/s] 93%|█████████▎| 346096/371472 [5:22:20<1:56:04, 3.64it/s] 93%|█████████▎| 346097/371472 [5:22:20<1:55:47, 3.65it/s] 93%|█████████▎| 346098/371472 [5:22:20<1:56:29, 3.63it/s] 93%|█████████▎| 346099/371472 [5:22:21<2:01:08, 3.49it/s] 93%|█████████▎| 346100/371472 [5:22:21<1:56:14, 3.64it/s] {'loss': 2.6379, 'learning_rate': 1.615042340925251e-07, 'epoch': 14.91} + 93%|█████████▎| 346100/371472 [5:22:21<1:56:14, 3.64it/s] 93%|█████████▎| 346101/371472 [5:22:21<1:57:45, 3.59it/s] 93%|█████████▎| 346102/371472 [5:22:22<2:01:52, 3.47it/s] 93%|█████████▎| 346103/371472 [5:22:22<2:02:23, 3.45it/s] 93%|█████████▎| 346104/371472 [5:22:22<2:03:48, 3.41it/s] 93%|█████████▎| 346105/371472 [5:22:23<2:12:29, 3.19it/s] 93%|█████████▎| 346106/371472 [5:22:23<2:04:29, 3.40it/s] 93%|█████████▎| 346107/371472 [5:22:23<2:04:54, 3.38it/s] 93%|█████████▎| 346108/371472 [5:22:23<2:03:54, 3.41it/s] 93%|█████████▎| 346109/371472 [5:22:24<2:00:58, 3.49it/s] 93%|█████████▎| 346110/371472 [5:22:24<1:57:32, 3.60it/s] 93%|█████████▎| 346111/371472 [5:22:24<1:54:59, 3.68it/s] 93%|█████████▎| 346112/371472 [5:22:24<1:55:55, 3.65it/s] 93%|██���██████▎| 346113/371472 [5:22:25<1:54:58, 3.68it/s] 93%|█████████▎| 346114/371472 [5:22:25<1:56:07, 3.64it/s] 93%|█████████▎| 346115/371472 [5:22:25<1:54:17, 3.70it/s] 93%|█████████▎| 346116/371472 [5:22:26<1:59:11, 3.55it/s] 93%|█████████▎| 346117/371472 [5:22:26<1:57:37, 3.59it/s] 93%|█████████▎| 346118/371472 [5:22:26<1:57:40, 3.59it/s] 93%|█████████▎| 346119/371472 [5:22:26<1:55:04, 3.67it/s] 93%|█████████▎| 346120/371472 [5:22:27<1:51:39, 3.78it/s] {'loss': 2.5776, 'learning_rate': 1.6145575211704626e-07, 'epoch': 14.91} + 93%|█████████▎| 346120/371472 [5:22:27<1:51:39, 3.78it/s] 93%|█████████▎| 346121/371472 [5:22:27<1:59:38, 3.53it/s] 93%|█████████▎| 346122/371472 [5:22:27<1:57:12, 3.60it/s] 93%|█████████▎| 346123/371472 [5:22:28<1:59:05, 3.55it/s] 93%|█████████▎| 346124/371472 [5:22:28<2:06:17, 3.35it/s] 93%|█████████▎| 346125/371472 [5:22:28<2:05:18, 3.37it/s] 93%|█████████▎| 346126/371472 [5:22:28<2:03:45, 3.41it/s] 93%|█████████▎| 346127/371472 [5:22:29<2:00:04, 3.52it/s] 93%|█████████▎| 346128/371472 [5:22:29<1:58:46, 3.56it/s] 93%|█████████▎| 346129/371472 [5:22:29<1:56:13, 3.63it/s] 93%|█████████▎| 346130/371472 [5:22:30<2:05:32, 3.36it/s] 93%|█████████▎| 346131/371472 [5:22:30<2:07:53, 3.30it/s] 93%|█████████▎| 346132/371472 [5:22:30<2:07:42, 3.31it/s] 93%|█████████▎| 346133/371472 [5:22:31<2:08:04, 3.30it/s] 93%|█████████▎| 346134/371472 [5:22:31<2:02:21, 3.45it/s] 93%|█████████▎| 346135/371472 [5:22:31<1:58:10, 3.57it/s] 93%|█████████▎| 346136/371472 [5:22:31<2:04:58, 3.38it/s] 93%|█████████▎| 346137/371472 [5:22:32<2:05:14, 3.37it/s] 93%|█████████▎| 346138/371472 [5:22:32<2:06:03, 3.35it/s] 93%|█████████▎| 346139/371472 [5:22:32<2:04:02, 3.40it/s] 93%|█████████▎| 346140/371472 [5:22:33<2:03:43, 3.41it/s] {'loss': 2.7166, 'learning_rate': 1.6140727014156733e-07, 'epoch': 14.91} + 93%|█████████▎| 346140/371472 [5:22:33<2:03:43, 3.41it/s] 93%|█████████▎| 346141/371472 [5:22:33<2:04:51, 3.38it/s] 93%|█████████▎| 346142/371472 [5:22:33<2:10:30, 3.23it/s] 93%|█████████▎| 346143/371472 [5:22:33<2:03:25, 3.42it/s] 93%|█████████▎| 346144/371472 [5:22:34<2:01:50, 3.46it/s] 93%|█████████▎| 346145/371472 [5:22:34<1:57:21, 3.60it/s] 93%|█████████▎| 346146/371472 [5:22:34<1:56:02, 3.64it/s] 93%|█████████▎| 346147/371472 [5:22:35<2:06:07, 3.35it/s] 93%|█████████▎| 346148/371472 [5:22:35<1:58:57, 3.55it/s] 93%|█████████▎| 346149/371472 [5:22:35<1:59:56, 3.52it/s] 93%|█████████▎| 346150/371472 [5:22:35<1:57:35, 3.59it/s] 93%|█████████▎| 346151/371472 [5:22:36<2:07:29, 3.31it/s] 93%|█████████▎| 346152/371472 [5:22:36<2:03:49, 3.41it/s] 93%|█████████▎| 346153/371472 [5:22:36<2:03:19, 3.42it/s] 93%|█████████▎| 346154/371472 [5:22:37<1:56:56, 3.61it/s] 93%|█████████▎| 346155/371472 [5:22:37<1:54:55, 3.67it/s] 93%|█████████▎| 346156/371472 [5:22:37<1:53:31, 3.72it/s] 93%|█████████▎| 346157/371472 [5:22:37<1:55:00, 3.67it/s] 93%|█████████▎| 346158/371472 [5:22:38<1:56:43, 3.61it/s] 93%|█████████▎| 346159/371472 [5:22:38<1:54:03, 3.70it/s] 93%|█████████▎| 346160/371472 [5:22:38<1:59:32, 3.53it/s] {'loss': 2.5306, 'learning_rate': 1.6135878816608848e-07, 'epoch': 14.91} + 93%|█████████▎| 346160/371472 [5:22:38<1:59:32, 3.53it/s] 93%|█████████▎| 346161/371472 [5:22:38<2:01:09, 3.48it/s] 93%|█████████▎| 346162/371472 [5:22:39<1:57:40, 3.58it/s] 93%|█████████▎| 346163/371472 [5:22:39<1:54:09, 3.69it/s] 93%|█████████▎| 346164/371472 [5:22:39<1:58:43, 3.55it/s] 93%|█████████▎| 346165/371472 [5:22:40<1:57:00, 3.60it/s] 93%|█████████▎| 346166/371472 [5:22:40<1:51:32, 3.78it/s] 93%|█████████▎| 346167/371472 [5:22:40<1:55:23, 3.66it/s] 93%|█████████▎| 346168/371472 [5:22:40<1:53:30, 3.72it/s] 93%|█████████▎| 346169/371472 [5:22:41<1:54:08, 3.69it/s] 93%|█████████▎| 346170/371472 [5:22:41<1:52:22, 3.75it/s] 93%|█████████▎| 346171/371472 [5:22:41<1:51:36, 3.78it/s] 93%|█████████▎| 346172/371472 [5:22:41<1:49:55, 3.84it/s] 93%|█████████▎| 346173/371472 [5:22:42<1:52:04, 3.76it/s] 93%|█████████▎| 346174/371472 [5:22:42<1:51:23, 3.78it/s] 93%|█████████▎| 346175/371472 [5:22:42<1:51:55, 3.77it/s] 93%|█████████▎| 346176/371472 [5:22:43<1:54:38, 3.68it/s] 93%|█████████▎| 346177/371472 [5:22:43<1:53:31, 3.71it/s] 93%|█████████▎| 346178/371472 [5:22:43<1:51:50, 3.77it/s] 93%|█████████▎| 346179/371472 [5:22:43<1:51:42, 3.77it/s] 93%|█████████▎| 346180/371472 [5:22:44<1:52:48, 3.74it/s] {'loss': 2.6065, 'learning_rate': 1.6131030619060952e-07, 'epoch': 14.91} + 93%|█████████▎| 346180/371472 [5:22:44<1:52:48, 3.74it/s] 93%|█████████▎| 346181/371472 [5:22:44<1:52:58, 3.73it/s] 93%|█████████▎| 346182/371472 [5:22:44<1:55:03, 3.66it/s] 93%|█████████▎| 346183/371472 [5:22:44<1:59:51, 3.52it/s] 93%|█████████▎| 346184/371472 [5:22:45<2:02:14, 3.45it/s] 93%|█████████▎| 346185/371472 [5:22:45<1:56:06, 3.63it/s] 93%|█████████▎| 346186/371472 [5:22:45<1:59:07, 3.54it/s] 93%|█████████▎| 346187/371472 [5:22:46<2:02:44, 3.43it/s] 93%|█████████▎| 346188/371472 [5:22:46<1:59:17, 3.53it/s] 93%|█████████▎| 346189/371472 [5:22:46<2:04:52, 3.37it/s] 93%|█████████▎| 346190/371472 [5:22:47<2:10:27, 3.23it/s] 93%|█████████▎| 346191/371472 [5:22:47<2:10:27, 3.23it/s] 93%|█████████▎| 346192/371472 [5:22:47<2:13:51, 3.15it/s] 93%|█████████▎| 346193/371472 [5:22:47<2:05:24, 3.36it/s] 93%|█████████▎| 346194/371472 [5:22:48<2:00:40, 3.49it/s] 93%|█████████▎| 346195/371472 [5:22:48<1:59:05, 3.54it/s] 93%|█████████▎| 346196/371472 [5:22:48<1:54:48, 3.67it/s] 93%|█████████▎| 346197/371472 [5:22:48<1:51:05, 3.79it/s] 93%|█████████▎| 346198/371472 [5:22:49<1:50:45, 3.80it/s] 93%|█████████▎| 346199/371472 [5:22:49<1:51:08, 3.79it/s] 93%|█████████▎| 346200/371472 [5:22:49<1:49:18, 3.85it/s] {'loss': 2.5109, 'learning_rate': 1.612618242151307e-07, 'epoch': 14.91} + 93%|█████████▎| 346200/371472 [5:22:49<1:49:18, 3.85it/s] 93%|█████████▎| 346201/371472 [5:22:49<1:49:09, 3.86it/s] 93%|█████████▎| 346202/371472 [5:22:50<1:55:21, 3.65it/s] 93%|█████████▎| 346203/371472 [5:22:50<1:52:17, 3.75it/s] 93%|█████████▎| 346204/371472 [5:22:50<1:52:50, 3.73it/s] 93%|█████████▎| 346205/371472 [5:22:51<1:53:01, 3.73it/s] 93%|█████████▎| 346206/371472 [5:22:51<2:11:19, 3.21it/s] 93%|█████████▎| 346207/371472 [5:22:51<2:16:24, 3.09it/s] 93%|█████████▎| 346208/371472 [5:22:52<2:13:12, 3.16it/s] 93%|█████████▎| 346209/371472 [5:22:52<2:05:27, 3.36it/s] 93%|█████████▎| 346210/371472 [5:22:52<1:58:42, 3.55it/s] 93%|█████████▎| 346211/371472 [5:22:52<2:01:47, 3.46it/s] 93%|█████████▎| 346212/371472 [5:22:53<2:00:04, 3.51it/s] 93%|█████████▎| 346213/371472 [5:22:53<1:58:13, 3.56it/s] 93%|█████████▎| 346214/371472 [5:22:53<1:58:41, 3.55it/s] 93%|█████████▎| 346215/371472 [5:22:54<2:06:57, 3.32it/s] 93%|█████████▎| 346216/371472 [5:22:54<2:03:41, 3.40it/s] 93%|█████████▎| 346217/371472 [5:22:54<1:57:58, 3.57it/s] 93%|█████████▎| 346218/371472 [5:22:54<1:52:44, 3.73it/s] 93%|█████████▎| 346219/371472 [5:22:55<1:50:53, 3.80it/s] 93%|█████████▎| 346220/371472 [5:22:55<1:55:30, 3.64it/s] {'loss': 2.554, 'learning_rate': 1.6121334223965175e-07, 'epoch': 14.91} + 93%|█████████▎| 346220/371472 [5:22:55<1:55:30, 3.64it/s] 93%|█████████▎| 346221/371472 [5:22:55<2:03:51, 3.40it/s] 93%|█████████▎| 346222/371472 [5:22:56<2:01:27, 3.46it/s] 93%|█████████▎| 346223/371472 [5:22:56<1:56:13, 3.62it/s] 93%|█████████▎| 346224/371472 [5:22:56<1:59:16, 3.53it/s] 93%|█████████▎| 346225/371472 [5:22:56<1:58:51, 3.54it/s] 93%|█████████▎| 346226/371472 [5:22:57<2:01:45, 3.46it/s] 93%|█████████▎| 346227/371472 [5:22:57<1:59:41, 3.52it/s] 93%|█████████▎| 346228/371472 [5:22:57<1:58:59, 3.54it/s] 93%|█████████▎| 346229/371472 [5:22:58<1:59:13, 3.53it/s] 93%|█████████▎| 346230/371472 [5:22:58<1:56:20, 3.62it/s] 93%|█████████▎| 346231/371472 [5:22:58<1:54:42, 3.67it/s] 93%|█████████▎| 346232/371472 [5:22:58<1:53:19, 3.71it/s] 93%|█████████▎| 346233/371472 [5:22:59<1:55:59, 3.63it/s] 93%|█████████▎| 346234/371472 [5:22:59<1:54:15, 3.68it/s] 93%|█████████▎| 346235/371472 [5:22:59<1:54:43, 3.67it/s] 93%|█████████▎| 346236/371472 [5:22:59<1:55:15, 3.65it/s] 93%|█████████▎| 346237/371472 [5:23:00<1:52:41, 3.73it/s] 93%|█████████▎| 346238/371472 [5:23:00<2:04:23, 3.38it/s] 93%|█████████▎| 346239/371472 [5:23:00<2:17:38, 3.06it/s] 93%|█████████▎| 346240/371472 [5:23:01<2:09:35, 3.24it/s] {'loss': 2.5597, 'learning_rate': 1.611648602641729e-07, 'epoch': 14.91} + 93%|█████████▎| 346240/371472 [5:23:01<2:09:35, 3.24it/s] 93%|█████████▎| 346241/371472 [5:23:01<2:04:28, 3.38it/s] 93%|█████████▎| 346242/371472 [5:23:01<2:03:12, 3.41it/s] 93%|█████████▎| 346243/371472 [5:23:02<2:05:49, 3.34it/s] 93%|█████████▎| 346244/371472 [5:23:02<2:06:01, 3.34it/s] 93%|█████████▎| 346245/371472 [5:23:02<2:06:01, 3.34it/s] 93%|█████████▎| 346246/371472 [5:23:03<2:11:01, 3.21it/s] 93%|█████████▎| 346247/371472 [5:23:03<2:04:12, 3.38it/s] 93%|█████████▎| 346248/371472 [5:23:03<2:04:14, 3.38it/s] 93%|█████████▎| 346249/371472 [5:23:03<2:02:49, 3.42it/s] 93%|█████████▎| 346250/371472 [5:23:04<1:57:22, 3.58it/s] 93%|█████████▎| 346251/371472 [5:23:04<1:54:50, 3.66it/s] 93%|█████████▎| 346252/371472 [5:23:04<2:07:20, 3.30it/s] 93%|█████████▎| 346253/371472 [5:23:05<2:11:33, 3.19it/s] 93%|█████████▎| 346254/371472 [5:23:05<2:12:00, 3.18it/s] 93%|█████████▎| 346255/371472 [5:23:05<2:09:59, 3.23it/s] 93%|█████████▎| 346256/371472 [5:23:05<2:03:50, 3.39it/s] 93%|█████████▎| 346257/371472 [5:23:06<2:18:48, 3.03it/s] 93%|█████████▎| 346258/371472 [5:23:06<2:11:04, 3.21it/s] 93%|█████████▎| 346259/371472 [5:23:06<2:02:23, 3.43it/s] 93%|█████████▎| 346260/371472 [5:23:07<1:58:54, 3.53it/s] {'loss': 2.5992, 'learning_rate': 1.6111637828869397e-07, 'epoch': 14.91} + 93%|█████████▎| 346260/371472 [5:23:07<1:58:54, 3.53it/s] 93%|█████████▎| 346261/371472 [5:23:07<1:57:16, 3.58it/s] 93%|█████████▎| 346262/371472 [5:23:07<1:57:09, 3.59it/s] 93%|█████████▎| 346263/371472 [5:23:07<2:02:00, 3.44it/s] 93%|█████████▎| 346264/371472 [5:23:08<2:23:33, 2.93it/s] 93%|█████████▎| 346265/371472 [5:23:08<2:24:51, 2.90it/s] 93%|█████████▎| 346266/371472 [5:23:09<2:29:27, 2.81it/s] 93%|█████████▎| 346267/371472 [5:23:09<2:18:55, 3.02it/s] 93%|█████████▎| 346268/371472 [5:23:09<2:10:26, 3.22it/s] 93%|█████████▎| 346269/371472 [5:23:09<2:05:01, 3.36it/s] 93%|█████████▎| 346270/371472 [5:23:10<1:59:57, 3.50it/s] 93%|█████████▎| 346271/371472 [5:23:10<1:57:46, 3.57it/s] 93%|█████████▎| 346272/371472 [5:23:10<1:56:40, 3.60it/s] 93%|█████████▎| 346273/371472 [5:23:11<1:57:12, 3.58it/s] 93%|█████████▎| 346274/371472 [5:23:11<1:51:31, 3.77it/s] 93%|█████████▎| 346275/371472 [5:23:11<1:53:02, 3.71it/s] 93%|█████████▎| 346276/371472 [5:23:11<1:57:27, 3.58it/s] 93%|█████████▎| 346277/371472 [5:23:12<1:56:20, 3.61it/s] 93%|█████████▎| 346278/371472 [5:23:12<1:53:08, 3.71it/s] 93%|█████████▎| 346279/371472 [5:23:12<2:06:19, 3.32it/s] 93%|█████████▎| 346280/371472 [5:23:13<2:02:17, 3.43it/s] {'loss': 2.595, 'learning_rate': 1.6106789631321512e-07, 'epoch': 14.91} + 93%|█████████▎| 346280/371472 [5:23:13<2:02:17, 3.43it/s] 93%|█████████▎| 346281/371472 [5:23:13<2:06:21, 3.32it/s] 93%|█████████▎| 346282/371472 [5:23:13<2:00:01, 3.50it/s] 93%|█████████▎| 346283/371472 [5:23:13<2:02:39, 3.42it/s] 93%|█████████▎| 346284/371472 [5:23:14<2:04:19, 3.38it/s] 93%|█████████▎| 346285/371472 [5:23:14<1:57:31, 3.57it/s] 93%|█████████▎| 346286/371472 [5:23:14<1:56:44, 3.60it/s] 93%|█████████▎| 346287/371472 [5:23:14<1:51:22, 3.77it/s] 93%|█████████▎| 346288/371472 [5:23:15<1:58:19, 3.55it/s] 93%|█████████▎| 346289/371472 [5:23:15<1:52:29, 3.73it/s] 93%|█████████▎| 346290/371472 [5:23:15<1:54:50, 3.65it/s] 93%|█████████▎| 346291/371472 [5:23:16<1:50:31, 3.80it/s] 93%|█████████▎| 346292/371472 [5:23:16<1:52:38, 3.73it/s] 93%|█████████▎| 346293/371472 [5:23:16<2:02:03, 3.44it/s] 93%|█████████▎| 346294/371472 [5:23:17<2:05:23, 3.35it/s] 93%|█████████▎| 346295/371472 [5:23:17<1:57:55, 3.56it/s] 93%|█████████▎| 346296/371472 [5:23:17<2:00:07, 3.49it/s] 93%|█████████▎| 346297/371472 [5:23:17<1:55:58, 3.62it/s] 93%|█████████▎| 346298/371472 [5:23:18<2:02:44, 3.42it/s] 93%|█████████▎| 346299/371472 [5:23:18<2:02:03, 3.44it/s] 93%|█████████▎| 346300/371472 [5:23:18<1:59:18, 3.52it/s] {'loss': 2.5084, 'learning_rate': 1.6101941433773616e-07, 'epoch': 14.92} + 93%|█████████▎| 346300/371472 [5:23:18<1:59:18, 3.52it/s] 93%|█████████▎| 346301/371472 [5:23:18<1:53:41, 3.69it/s] 93%|█████████▎| 346302/371472 [5:23:19<1:53:02, 3.71it/s] 93%|█████████▎| 346303/371472 [5:23:19<1:52:39, 3.72it/s] 93%|█████████▎| 346304/371472 [5:23:19<1:54:08, 3.67it/s] 93%|█████████▎| 346305/371472 [5:23:20<1:56:09, 3.61it/s] 93%|█████████▎| 346306/371472 [5:23:20<1:57:07, 3.58it/s] 93%|█████████▎| 346307/371472 [5:23:20<1:54:12, 3.67it/s] 93%|█████████▎| 346308/371472 [5:23:20<1:58:38, 3.53it/s] 93%|█████████▎| 346309/371472 [5:23:21<2:03:21, 3.40it/s] 93%|█████████▎| 346310/371472 [5:23:21<2:00:36, 3.48it/s] 93%|█████████▎| 346311/371472 [5:23:21<1:54:18, 3.67it/s] 93%|█████████▎| 346312/371472 [5:23:22<2:05:13, 3.35it/s] 93%|█████████▎| 346313/371472 [5:23:22<2:01:53, 3.44it/s] 93%|█████████▎| 346314/371472 [5:23:22<1:57:25, 3.57it/s] 93%|█████████▎| 346315/371472 [5:23:22<1:59:51, 3.50it/s] 93%|█████████▎| 346316/371472 [5:23:23<2:09:14, 3.24it/s] 93%|█████████▎| 346317/371472 [5:23:23<2:06:31, 3.31it/s] 93%|█████████▎| 346318/371472 [5:23:23<2:01:00, 3.46it/s] 93%|█████████▎| 346319/371472 [5:23:24<2:10:16, 3.22it/s] 93%|█████████▎| 346320/371472 [5:23:24<2:10:49, 3.20it/s] {'loss': 2.6988, 'learning_rate': 1.6097093236225734e-07, 'epoch': 14.92} + 93%|█████████▎| 346320/371472 [5:23:24<2:10:49, 3.20it/s] 93%|█████████▎| 346321/371472 [5:23:24<2:08:34, 3.26it/s] 93%|█████████▎| 346322/371472 [5:23:25<2:08:19, 3.27it/s] 93%|█████████▎| 346323/371472 [5:23:25<1:59:40, 3.50it/s] 93%|█████████▎| 346324/371472 [5:23:25<1:57:42, 3.56it/s] 93%|█████████▎| 346325/371472 [5:23:25<1:59:35, 3.50it/s] 93%|█████████▎| 346326/371472 [5:23:26<1:57:45, 3.56it/s] 93%|█████████▎| 346327/371472 [5:23:26<1:52:51, 3.71it/s] 93%|█████████▎| 346328/371472 [5:23:26<1:54:51, 3.65it/s] 93%|█████████▎| 346329/371472 [5:23:26<1:55:54, 3.62it/s] 93%|█████████▎| 346330/371472 [5:23:27<1:54:18, 3.67it/s] 93%|█████████▎| 346331/371472 [5:23:27<1:54:00, 3.68it/s] 93%|█████████▎| 346332/371472 [5:23:27<1:59:06, 3.52it/s] 93%|█████████▎| 346333/371472 [5:23:28<1:54:08, 3.67it/s] 93%|█████████▎| 346334/371472 [5:23:28<2:03:35, 3.39it/s] 93%|█████████▎| 346335/371472 [5:23:28<2:00:23, 3.48it/s] 93%|█████████▎| 346336/371472 [5:23:29<2:06:36, 3.31it/s] 93%|█████████▎| 346337/371472 [5:23:29<2:06:46, 3.30it/s] 93%|█████████▎| 346338/371472 [5:23:29<2:03:44, 3.39it/s] 93%|█████████▎| 346339/371472 [5:23:29<2:11:14, 3.19it/s] 93%|█████████▎| 346340/371472 [5:23:30<2:02:36, 3.42it/s] {'loss': 2.7223, 'learning_rate': 1.609224503867784e-07, 'epoch': 14.92} + 93%|█████████▎| 346340/371472 [5:23:30<2:02:36, 3.42it/s] 93%|█████████▎| 346341/371472 [5:23:30<1:58:03, 3.55it/s] 93%|█████████▎| 346342/371472 [5:23:30<1:54:21, 3.66it/s] 93%|█████████▎| 346343/371472 [5:23:30<1:58:42, 3.53it/s] 93%|█████████▎| 346344/371472 [5:23:31<1:58:27, 3.54it/s] 93%|█████████▎| 346345/371472 [5:23:31<1:59:35, 3.50it/s] 93%|█████████▎| 346346/371472 [5:23:31<2:01:23, 3.45it/s] 93%|█████████▎| 346347/371472 [5:23:32<2:09:29, 3.23it/s] 93%|█████████▎| 346348/371472 [5:23:32<2:07:09, 3.29it/s] 93%|█████████▎| 346349/371472 [5:23:32<1:59:55, 3.49it/s] 93%|█████████▎| 346350/371472 [5:23:33<1:54:52, 3.64it/s] 93%|█████████▎| 346351/371472 [5:23:33<1:55:53, 3.61it/s] 93%|█████████▎| 346352/371472 [5:23:33<1:50:49, 3.78it/s] 93%|█████████▎| 346353/371472 [5:23:33<1:49:21, 3.83it/s] 93%|█████████▎| 346354/371472 [5:23:34<1:51:04, 3.77it/s] 93%|█████████▎| 346355/371472 [5:23:34<2:00:55, 3.46it/s] 93%|█████████▎| 346356/371472 [5:23:34<1:56:56, 3.58it/s] 93%|█████████▎| 346357/371472 [5:23:34<1:58:16, 3.54it/s] 93%|█████████▎| 346358/371472 [5:23:35<1:58:51, 3.52it/s] 93%|█████████▎| 346359/371472 [5:23:35<1:55:59, 3.61it/s] 93%|█████████▎| 346360/371472 [5:23:35<1:55:55, 3.61it/s] {'loss': 2.5473, 'learning_rate': 1.6087396841129954e-07, 'epoch': 14.92} + 93%|█████████▎| 346360/371472 [5:23:35<1:55:55, 3.61it/s] 93%|█████████▎| 346361/371472 [5:23:36<1:54:22, 3.66it/s] 93%|█████████▎| 346362/371472 [5:23:36<1:57:30, 3.56it/s] 93%|█████████▎| 346363/371472 [5:23:36<1:55:13, 3.63it/s] 93%|█████████▎| 346364/371472 [5:23:36<1:56:33, 3.59it/s] 93%|█████████▎| 346365/371472 [5:23:37<1:56:57, 3.58it/s] 93%|█████████▎| 346366/371472 [5:23:37<2:04:16, 3.37it/s] 93%|█████████▎| 346367/371472 [5:23:37<2:00:53, 3.46it/s] 93%|█████████▎| 346368/371472 [5:23:38<1:56:01, 3.61it/s] 93%|█████████▎| 346369/371472 [5:23:38<1:54:13, 3.66it/s] 93%|█████████▎| 346370/371472 [5:23:38<1:52:49, 3.71it/s] 93%|█████████▎| 346371/371472 [5:23:38<1:49:46, 3.81it/s] 93%|█████████▎| 346372/371472 [5:23:39<1:51:57, 3.74it/s] 93%|█████████▎| 346373/371472 [5:23:39<1:48:30, 3.86it/s] 93%|█████████▎| 346374/371472 [5:23:39<1:54:37, 3.65it/s] 93%|█████████▎| 346375/371472 [5:23:39<1:55:49, 3.61it/s] 93%|█████████▎| 346376/371472 [5:23:40<2:02:17, 3.42it/s] 93%|█████████▎| 346377/371472 [5:23:40<1:57:41, 3.55it/s] 93%|█████████▎| 346378/371472 [5:23:40<1:54:35, 3.65it/s] 93%|█████████▎| 346379/371472 [5:23:40<1:48:45, 3.85it/s] 93%|█████████▎| 346380/371472 [5:23:41<1:55:15, 3.63it/s] {'loss': 2.7332, 'learning_rate': 1.608254864358206e-07, 'epoch': 14.92} + 93%|█████████▎| 346380/371472 [5:23:41<1:55:15, 3.63it/s] 93%|█████████▎| 346381/371472 [5:23:41<1:52:12, 3.73it/s] 93%|█████████▎| 346382/371472 [5:23:41<1:50:39, 3.78it/s] 93%|█████████▎| 346383/371472 [5:23:42<1:57:03, 3.57it/s] 93%|█████████▎| 346384/371472 [5:23:42<2:00:46, 3.46it/s] 93%|█████████▎| 346385/371472 [5:23:42<2:19:25, 3.00it/s] 93%|█████████▎| 346386/371472 [5:23:43<2:10:09, 3.21it/s] 93%|█████████▎| 346387/371472 [5:23:43<2:03:57, 3.37it/s] 93%|█████████▎| 346388/371472 [5:23:43<1:57:51, 3.55it/s] 93%|█████████▎| 346389/371472 [5:23:43<1:56:34, 3.59it/s] 93%|█████████▎| 346390/371472 [5:23:44<1:55:46, 3.61it/s] 93%|█████████▎| 346391/371472 [5:23:44<1:54:00, 3.67it/s] 93%|█████████▎| 346392/371472 [5:23:44<1:52:50, 3.70it/s] 93%|█████████▎| 346393/371472 [5:23:45<2:04:10, 3.37it/s] 93%|█████████▎| 346394/371472 [5:23:45<2:00:51, 3.46it/s] 93%|█████████��| 346395/371472 [5:23:45<1:57:32, 3.56it/s] 93%|█████████▎| 346396/371472 [5:23:45<1:55:11, 3.63it/s] 93%|█████████▎| 346397/371472 [5:23:46<2:02:44, 3.40it/s] 93%|█████████▎| 346398/371472 [5:23:46<2:01:51, 3.43it/s] 93%|█████████▎| 346399/371472 [5:23:46<2:15:36, 3.08it/s] 93%|█████████▎| 346400/371472 [5:23:47<2:23:58, 2.90it/s] {'loss': 2.6718, 'learning_rate': 1.6077700446034179e-07, 'epoch': 14.92} + 93%|█████████▎| 346400/371472 [5:23:47<2:23:58, 2.90it/s] 93%|█████████▎| 346401/371472 [5:23:47<2:30:04, 2.78it/s] 93%|█████████▎| 346402/371472 [5:23:47<2:21:34, 2.95it/s] 93%|█████████▎| 346403/371472 [5:23:48<2:16:46, 3.05it/s] 93%|█████████▎| 346404/371472 [5:23:48<2:18:34, 3.01it/s] 93%|█████████▎| 346405/371472 [5:23:48<2:09:18, 3.23it/s] 93%|█████████▎| 346406/371472 [5:23:49<2:05:36, 3.33it/s] 93%|█████████▎| 346407/371472 [5:23:49<2:05:45, 3.32it/s] 93%|█████████▎| 346408/371472 [5:23:49<2:01:05, 3.45it/s] 93%|█████████▎| 346409/371472 [5:23:49<1:59:55, 3.48it/s] 93%|█████████▎| 346410/371472 [5:23:50<1:59:14, 3.50it/s] 93%|█████████▎| 346411/371472 [5:23:50<1:54:20, 3.65it/s] 93%|█████████▎| 346412/371472 [5:23:50<1:55:58, 3.60it/s] 93%|█████████▎| 346413/371472 [5:23:51<2:00:37, 3.46it/s] 93%|█████████▎| 346414/371472 [5:23:51<1:57:31, 3.55it/s] 93%|█████████▎| 346415/371472 [5:23:51<1:53:07, 3.69it/s] 93%|█████████▎| 346416/371472 [5:23:51<1:55:19, 3.62it/s] 93%|█████████▎| 346417/371472 [5:23:52<1:52:59, 3.70it/s] 93%|█████████▎| 346418/371472 [5:23:52<1:57:12, 3.56it/s] 93%|█████████▎| 346419/371472 [5:23:52<1:59:06, 3.51it/s] 93%|█████████▎| 346420/371472 [5:23:53<1:58:26, 3.53it/s] {'loss': 2.5235, 'learning_rate': 1.6072852248486283e-07, 'epoch': 14.92} + 93%|█████████▎| 346420/371472 [5:23:53<1:58:26, 3.53it/s] 93%|█████████▎| 346421/371472 [5:23:53<1:56:16, 3.59it/s] 93%|█████████▎| 346422/371472 [5:23:53<1:52:37, 3.71it/s] 93%|█████████▎| 346423/371472 [5:23:53<1:52:00, 3.73it/s] 93%|█████████▎| 346424/371472 [5:23:54<1:51:55, 3.73it/s] 93%|█████████▎| 346425/371472 [5:23:54<1:52:22, 3.71it/s] 93%|█████████▎| 346426/371472 [5:23:54<1:55:06, 3.63it/s] 93%|█████████▎| 346427/371472 [5:23:54<1:57:37, 3.55it/s] 93%|█████████▎| 346428/371472 [5:23:55<2:06:23, 3.30it/s] 93%|█████████▎| 346429/371472 [5:23:55<2:16:11, 3.06it/s] 93%|█████████▎| 346430/371472 [5:23:55<2:12:44, 3.14it/s] 93%|█████████▎| 346431/371472 [5:23:56<2:11:59, 3.16it/s] 93%|█████████▎| 346432/371472 [5:23:56<2:06:15, 3.31it/s] 93%|█████████▎| 346433/371472 [5:23:56<2:07:03, 3.28it/s] 93%|█████████▎| 346434/371472 [5:23:57<2:05:57, 3.31it/s] 93%|█████████▎| 346435/371472 [5:23:57<2:04:36, 3.35it/s] 93%|█████████▎| 346436/371472 [5:23:57<2:05:01, 3.34it/s] 93%|█████████▎| 346437/371472 [5:23:58<2:03:24, 3.38it/s] 93%|█████████▎| 346438/371472 [5:23:58<2:01:22, 3.44it/s] 93%|█████████▎| 346439/371472 [5:23:58<2:00:32, 3.46it/s] 93%|█████████▎| 346440/371472 [5:23:58<2:05:02, 3.34it/s] {'loss': 2.4456, 'learning_rate': 1.6068004050938396e-07, 'epoch': 14.92} + 93%|█████████▎| 346440/371472 [5:23:58<2:05:02, 3.34it/s] 93%|█████████▎| 346441/371472 [5:23:59<1:59:44, 3.48it/s] 93%|█████████▎| 346442/371472 [5:23:59<2:02:08, 3.42it/s] 93%|█████████▎| 346443/371472 [5:23:59<1:58:15, 3.53it/s] 93%|█████████▎| 346444/371472 [5:24:00<1:56:23, 3.58it/s] 93%|█████████▎| 346445/371472 [5:24:00<1:54:26, 3.65it/s] 93%|█████████▎| 346446/371472 [5:24:00<1:55:14, 3.62it/s] 93%|█████████▎| 346447/371472 [5:24:00<1:55:43, 3.60it/s] 93%|█████████▎| 346448/371472 [5:24:01<1:57:30, 3.55it/s] 93%|█████████▎| 346449/371472 [5:24:01<1:54:43, 3.64it/s] 93%|█████████▎| 346450/371472 [5:24:01<1:52:26, 3.71it/s] 93%|█████████▎| 346451/371472 [5:24:01<1:50:16, 3.78it/s] 93%|█████████▎| 346452/371472 [5:24:02<1:51:14, 3.75it/s] 93%|█████████▎| 346453/371472 [5:24:02<1:50:09, 3.79it/s] 93%|█████████▎| 346454/371472 [5:24:02<1:47:59, 3.86it/s] 93%|█████████▎| 346455/371472 [5:24:03<1:56:53, 3.57it/s] 93%|█████████▎| 346456/371472 [5:24:03<1:57:37, 3.54it/s] 93%|█████████▎| 346457/371472 [5:24:03<2:00:27, 3.46it/s] 93%|█████████▎| 346458/371472 [5:24:03<1:58:19, 3.52it/s] 93%|█████████▎| 346459/371472 [5:24:04<1:56:24, 3.58it/s] 93%|█████████▎| 346460/371472 [5:24:04<2:11:08, 3.18it/s] {'loss': 2.5202, 'learning_rate': 1.6063155853390505e-07, 'epoch': 14.92} + 93%|█████████▎| 346460/371472 [5:24:04<2:11:08, 3.18it/s] 93%|█████████▎| 346461/371472 [5:24:04<2:07:15, 3.28it/s] 93%|█████████▎| 346462/371472 [5:24:05<2:01:07, 3.44it/s] 93%|█████████▎| 346463/371472 [5:24:05<1:57:09, 3.56it/s] 93%|█████████▎| 346464/371472 [5:24:05<1:58:33, 3.52it/s] 93%|█████████▎| 346465/371472 [5:24:06<2:14:55, 3.09it/s] 93%|█████████▎| 346466/371472 [5:24:06<2:14:15, 3.10it/s] 93%|█████████▎| 346467/371472 [5:24:06<2:11:33, 3.17it/s] 93%|█████████▎| 346468/371472 [5:24:07<2:20:38, 2.96it/s] 93%|█████████▎| 346469/371472 [5:24:07<2:10:01, 3.20it/s] 93%|█████████▎| 346470/371472 [5:24:07<2:03:48, 3.37it/s] 93%|█████████▎| 346471/371472 [5:24:07<1:57:49, 3.54it/s] 93%|█████████▎| 346472/371472 [5:24:08<1:55:26, 3.61it/s] 93%|█████████▎| 346473/371472 [5:24:08<2:04:13, 3.35it/s] 93%|█████████▎| 346474/371472 [5:24:08<2:06:02, 3.31it/s] 93%|█████████▎| 346475/371472 [5:24:09<2:04:50, 3.34it/s] 93%|█████████▎| 346476/371472 [5:24:09<2:08:25, 3.24it/s] 93%|█████████▎| 346477/371472 [5:24:09<2:05:53, 3.31it/s] 93%|█████████▎| 346478/371472 [5:24:09<2:04:05, 3.36it/s] 93%|█████████▎| 346479/371472 [5:24:10<1:59:57, 3.47it/s] 93%|█████████▎| 346480/371472 [5:24:10<1:57:29, 3.55it/s] {'loss': 2.6117, 'learning_rate': 1.605830765584262e-07, 'epoch': 14.92} + 93%|█████████▎| 346480/371472 [5:24:10<1:57:29, 3.55it/s] 93%|█████████▎| 346481/371472 [5:24:10<2:03:52, 3.36it/s] 93%|█████████▎| 346482/371472 [5:24:11<1:59:54, 3.47it/s] 93%|█████████▎| 346483/371472 [5:24:11<1:54:40, 3.63it/s] 93%|█████████▎| 346484/371472 [5:24:11<1:51:52, 3.72it/s] 93%|█████████▎| 346485/371472 [5:24:11<1:50:47, 3.76it/s] 93%|█████████▎| 346486/371472 [5:24:12<1:51:40, 3.73it/s] 93%|█████████▎| 346487/371472 [5:24:12<1:58:55, 3.50it/s] 93%|█████████▎| 346488/371472 [5:24:12<1:59:51, 3.47it/s] 93%|█████████▎| 346489/371472 [5:24:13<1:58:04, 3.53it/s] 93%|█████████▎| 346490/371472 [5:24:13<1:53:52, 3.66it/s] 93%|█████████▎| 346491/371472 [5:24:13<1:56:41, 3.57it/s] 93%|█████████▎| 346492/371472 [5:24:13<1:55:34, 3.60it/s] 93%|█████████▎| 346493/371472 [5:24:14<2:03:56, 3.36it/s] 93%|█████████▎| 346494/371472 [5:24:14<2:00:17, 3.46it/s] 93%|█████████▎| 346495/371472 [5:24:14<2:01:51, 3.42it/s] 93%|█████████▎| 346496/371472 [5:24:15<2:01:53, 3.41it/s] 93%|█████████▎| 346497/371472 [5:24:15<2:03:20, 3.37it/s] 93%|█████████▎| 346498/371472 [5:24:15<1:56:16, 3.58it/s] 93%|█████████▎| 346499/371472 [5:24:15<1:52:52, 3.69it/s] 93%|█████████▎| 346500/371472 [5:24:16<1:52:27, 3.70it/s] {'loss': 2.6368, 'learning_rate': 1.6053459458294725e-07, 'epoch': 14.92} + 93%|█████████▎| 346500/371472 [5:24:16<1:52:27, 3.70it/s] 93%|█████████▎| 346501/371472 [5:24:16<1:55:59, 3.59it/s] 93%|█████████▎| 346502/371472 [5:24:16<1:59:25, 3.48it/s] 93%|█████████▎| 346503/371472 [5:24:17<2:00:31, 3.45it/s] 93%|█████████▎| 346504/371472 [5:24:17<1:55:38, 3.60it/s] 93%|█████████▎| 346505/371472 [5:24:17<2:07:39, 3.26it/s] 93%|█████████▎| 346506/371472 [5:24:17<2:02:10, 3.41it/s] 93%|████��████▎| 346507/371472 [5:24:18<1:57:29, 3.54it/s] 93%|█████████▎| 346508/371472 [5:24:18<1:57:23, 3.54it/s] 93%|█████████▎| 346509/371472 [5:24:18<1:56:01, 3.59it/s] 93%|█████████▎| 346510/371472 [5:24:18<1:56:12, 3.58it/s] 93%|█████████▎| 346511/371472 [5:24:19<1:53:50, 3.65it/s] 93%|█████████▎| 346512/371472 [5:24:19<1:56:29, 3.57it/s] 93%|█████████▎| 346513/371472 [5:24:19<1:56:51, 3.56it/s] 93%|█████████▎| 346514/371472 [5:24:20<1:55:26, 3.60it/s] 93%|█████████▎| 346515/371472 [5:24:20<1:52:36, 3.69it/s] 93%|█████████▎| 346516/371472 [5:24:20<1:59:03, 3.49it/s] 93%|█████████▎| 346517/371472 [5:24:20<1:56:47, 3.56it/s] 93%|█████████▎| 346518/371472 [5:24:21<1:53:24, 3.67it/s] 93%|█████████▎| 346519/371472 [5:24:21<1:51:46, 3.72it/s] 93%|█████████▎| 346520/371472 [5:24:21<1:50:16, 3.77it/s] {'loss': 2.6138, 'learning_rate': 1.6048611260746843e-07, 'epoch': 14.93} + 93%|█████████▎| 346520/371472 [5:24:21<1:50:16, 3.77it/s] 93%|█████████▎| 346521/371472 [5:24:21<1:49:29, 3.80it/s] 93%|█████████▎| 346522/371472 [5:24:22<1:52:13, 3.71it/s] 93%|█████████▎| 346523/371472 [5:24:22<1:58:25, 3.51it/s] 93%|█████████▎| 346524/371472 [5:24:22<2:01:56, 3.41it/s] 93%|█████████▎| 346525/371472 [5:24:23<2:02:13, 3.40it/s] 93%|█████████▎| 346526/371472 [5:24:23<1:56:54, 3.56it/s] 93%|█████████▎| 346527/371472 [5:24:23<1:54:27, 3.63it/s] 93%|█████████▎| 346528/371472 [5:24:23<1:56:04, 3.58it/s] 93%|█████████▎| 346529/371472 [5:24:24<2:04:34, 3.34it/s] 93%|█████████▎| 346530/371472 [5:24:24<1:57:14, 3.55it/s] 93%|█████████▎| 346531/371472 [5:24:24<2:00:51, 3.44it/s] 93%|█████████▎| 346532/371472 [5:24:25<1:57:57, 3.52it/s] 93%|█████████▎| 346533/371472 [5:24:25<1:55:14, 3.61it/s] 93%|█████████▎| 346534/371472 [5:24:25<2:00:20, 3.45it/s] 93%|█████████▎| 346535/371472 [5:24:25<1:57:22, 3.54it/s] 93%|█████████▎| 346536/371472 [5:24:26<1:54:07, 3.64it/s] 93%|█████████▎| 346537/371472 [5:24:26<1:55:32, 3.60it/s] 93%|█████████▎| 346538/371472 [5:24:26<1:58:05, 3.52it/s] 93%|█████████▎| 346539/371472 [5:24:27<1:56:40, 3.56it/s] 93%|█████████▎| 346540/371472 [5:24:27<2:01:22, 3.42it/s] {'loss': 2.5027, 'learning_rate': 1.6043763063198947e-07, 'epoch': 14.93} + 93%|█████████▎| 346540/371472 [5:24:27<2:01:22, 3.42it/s] 93%|█████████▎| 346541/371472 [5:24:27<2:01:08, 3.43it/s] 93%|█████████▎| 346542/371472 [5:24:27<1:56:11, 3.58it/s] 93%|█████████▎| 346543/371472 [5:24:28<1:59:08, 3.49it/s] 93%|█████████▎| 346544/371472 [5:24:28<1:59:07, 3.49it/s] 93%|█████████▎| 346545/371472 [5:24:28<2:03:51, 3.35it/s] 93%|█████████▎| 346546/371472 [5:24:29<2:04:18, 3.34it/s] 93%|█████████▎| 346547/371472 [5:24:29<1:59:15, 3.48it/s] 93%|█████████▎| 346548/371472 [5:24:29<1:53:48, 3.65it/s] 93%|█████████▎| 346549/371472 [5:24:30<2:01:26, 3.42it/s] 93%|█████████▎| 346550/371472 [5:24:30<2:01:03, 3.43it/s] 93%|█████████▎| 346551/371472 [5:24:30<1:59:30, 3.48it/s] 93%|█████████▎| 346552/371472 [5:24:30<1:58:35, 3.50it/s] 93%|█████████▎| 346553/371472 [5:24:31<2:13:16, 3.12it/s] 93%|█████████▎| 346554/371472 [5:24:31<2:05:44, 3.30it/s] 93%|█████████▎| 346555/371472 [5:24:31<2:01:15, 3.42it/s] 93%|█████████▎| 346556/371472 [5:24:32<2:07:22, 3.26it/s] 93%|█████████▎| 346557/371472 [5:24:32<2:06:30, 3.28it/s] 93%|█████████▎| 346558/371472 [5:24:32<2:03:46, 3.35it/s] 93%|█████████▎| 346559/371472 [5:24:32<2:01:10, 3.43it/s] 93%|█████████▎| 346560/371472 [5:24:33<1:55:21, 3.60it/s] {'loss': 2.5706, 'learning_rate': 1.6038914865651062e-07, 'epoch': 14.93} + 93%|█████████▎| 346560/371472 [5:24:33<1:55:21, 3.60it/s] 93%|█████████▎| 346561/371472 [5:24:33<1:59:46, 3.47it/s] 93%|█████████▎| 346562/371472 [5:24:33<2:11:08, 3.17it/s] 93%|█████████▎| 346563/371472 [5:24:34<2:09:04, 3.22it/s] 93%|█████████▎| 346564/371472 [5:24:34<2:09:15, 3.21it/s] 93%|█████████▎| 346565/371472 [5:24:34<2:02:26, 3.39it/s] 93%|█████████▎| 346566/371472 [5:24:35<2:08:33, 3.23it/s] 93%|█████████▎| 346567/371472 [5:24:35<2:03:06, 3.37it/s] 93%|█████████▎| 346568/371472 [5:24:35<2:04:33, 3.33it/s] 93%|█████████▎| 346569/371472 [5:24:35<1:59:49, 3.46it/s] 93%|█████████▎| 346570/371472 [5:24:36<2:01:34, 3.41it/s] 93%|█████████▎| 346571/371472 [5:24:36<1:58:53, 3.49it/s] 93%|█████████▎| 346572/371472 [5:24:36<1:53:17, 3.66it/s] 93%|█████████▎| 346573/371472 [5:24:37<1:56:58, 3.55it/s] 93%|█████████▎| 346574/371472 [5:24:37<1:56:50, 3.55it/s] 93%|█████████▎| 346575/371472 [5:24:37<1:59:25, 3.47it/s] 93%|█████████▎| 346576/371472 [5:24:37<1:58:14, 3.51it/s] 93%|█████████▎| 346577/371472 [5:24:38<1:59:00, 3.49it/s] 93%|█████████▎| 346578/371472 [5:24:38<2:02:35, 3.38it/s] 93%|█████████▎| 346579/371472 [5:24:38<1:55:33, 3.59it/s] 93%|█████████▎| 346580/371472 [5:24:39<1:51:13, 3.73it/s] {'loss': 2.7174, 'learning_rate': 1.603406666810317e-07, 'epoch': 14.93} + 93%|█████████▎| 346580/371472 [5:24:39<1:51:13, 3.73it/s] 93%|█████████▎| 346581/371472 [5:24:39<1:54:10, 3.63it/s] 93%|█████████▎| 346582/371472 [5:24:39<1:55:34, 3.59it/s] 93%|█████████▎| 346583/371472 [5:24:39<1:56:19, 3.57it/s] 93%|█████████▎| 346584/371472 [5:24:40<2:01:49, 3.41it/s] 93%|█████████▎| 346585/371472 [5:24:40<1:59:39, 3.47it/s] 93%|█████████▎| 346586/371472 [5:24:40<1:59:46, 3.46it/s] 93%|█████████▎| 346587/371472 [5:24:41<1:55:54, 3.58it/s] 93%|█████████▎| 346588/371472 [5:24:41<1:57:47, 3.52it/s] 93%|█████████▎| 346589/371472 [5:24:41<1:51:58, 3.70it/s] 93%|█████████▎| 346590/371472 [5:24:41<2:03:43, 3.35it/s] 93%|█████████▎| 346591/371472 [5:24:42<1:59:48, 3.46it/s] 93%|█████████▎| 346592/371472 [5:24:42<1:57:15, 3.54it/s] 93%|█████████▎| 346593/371472 [5:24:42<1:56:31, 3.56it/s] 93%|█████████▎| 346594/371472 [5:24:43<1:54:52, 3.61it/s] 93%|█████████▎| 346595/371472 [5:24:43<1:51:56, 3.70it/s] 93%|█████████▎| 346596/371472 [5:24:43<1:52:54, 3.67it/s] 93%|█████████▎| 346597/371472 [5:24:43<1:51:59, 3.70it/s] 93%|█████████▎| 346598/371472 [5:24:44<1:51:45, 3.71it/s] 93%|█████████▎| 346599/371472 [5:24:44<1:51:39, 3.71it/s] 93%|█████████▎| 346600/371472 [5:24:44<1:53:55, 3.64it/s] {'loss': 2.5913, 'learning_rate': 1.6029218470555274e-07, 'epoch': 14.93} + 93%|█████████▎| 346600/371472 [5:24:44<1:53:55, 3.64it/s] 93%|█████████▎| 346601/371472 [5:24:44<1:52:50, 3.67it/s] 93%|█████████▎| 346602/371472 [5:24:45<1:53:34, 3.65it/s] 93%|█████████▎| 346603/371472 [5:24:45<1:55:08, 3.60it/s] 93%|█████████▎| 346604/371472 [5:24:45<1:55:45, 3.58it/s] 93%|█████████▎| 346605/371472 [5:24:46<1:51:15, 3.73it/s] 93%|█████████▎| 346606/371472 [5:24:46<1:54:12, 3.63it/s] 93%|█████████▎| 346607/371472 [5:24:46<1:55:28, 3.59it/s] 93%|█████████▎| 346608/371472 [5:24:46<1:55:17, 3.59it/s] 93%|█████████▎| 346609/371472 [5:24:47<1:51:24, 3.72it/s] 93%|█████████▎| 346610/371472 [5:24:47<2:00:35, 3.44it/s] 93%|█████████▎| 346611/371472 [5:24:47<2:01:01, 3.42it/s] 93%|█████████▎| 346612/371472 [5:24:48<1:59:35, 3.46it/s] 93%|█████████▎| 346613/371472 [5:24:48<2:24:13, 2.87it/s] 93%|█████████▎| 346614/371472 [5:24:48<2:14:40, 3.08it/s] 93%|█████████▎| 346615/371472 [5:24:49<2:16:50, 3.03it/s] 93%|█████████▎| 346616/371472 [5:24:49<2:09:14, 3.21it/s] 93%|█████████▎| 346617/371472 [5:24:49<2:05:45, 3.29it/s] 93%|█████████▎| 346618/371472 [5:24:50<2:09:40, 3.19it/s] 93%|█████████▎| 346619/371472 [5:24:50<2:03:07, 3.36it/s] 93%|█████████▎| 346620/371472 [5:24:50<2:05:27, 3.30it/s] {'loss': 2.631, 'learning_rate': 1.602437027300739e-07, 'epoch': 14.93} + 93%|█████████▎| 346620/371472 [5:24:50<2:05:27, 3.30it/s] 93%|█████████▎| 346621/371472 [5:24:50<2:13:11, 3.11it/s] 93%|█████████▎| 346622/371472 [5:24:51<2:07:09, 3.26it/s] 93%|█████████▎| 346623/371472 [5:24:51<2:02:52, 3.37it/s] 93%|█████████▎| 346624/371472 [5:24:51<1:58:12, 3.50it/s] 93%|█████████▎| 346625/371472 [5:24:52<1:57:52, 3.51it/s] 93%|█████████▎| 346626/371472 [5:24:52<1:55:18, 3.59it/s] 93%|█████████▎| 346627/371472 [5:24:52<1:55:41, 3.58it/s] 93%|█████████▎| 346628/371472 [5:24:52<1:56:28, 3.55it/s] 93%|█████████▎| 346629/371472 [5:24:53<2:02:11, 3.39it/s] 93%|█████████▎| 346630/371472 [5:24:53<1:56:54, 3.54it/s] 93%|█████████▎| 346631/371472 [5:24:53<2:01:28, 3.41it/s] 93%|█████████▎| 346632/371472 [5:24:54<2:03:51, 3.34it/s] 93%|█████████▎| 346633/371472 [5:24:54<1:59:31, 3.46it/s] 93%|█████████▎| 346634/371472 [5:24:54<1:55:20, 3.59it/s] 93%|█████████▎| 346635/371472 [5:24:54<1:54:01, 3.63it/s] 93%|█████████▎| 346636/371472 [5:24:55<1:51:54, 3.70it/s] 93%|█████████▎| 346637/371472 [5:24:55<1:51:29, 3.71it/s] 93%|█████████▎| 346638/371472 [5:24:55<1:52:51, 3.67it/s] 93%|█████████▎| 346639/371472 [5:24:56<2:11:53, 3.14it/s] 93%|█████████▎| 346640/371472 [5:24:56<2:24:28, 2.86it/s] {'loss': 2.4502, 'learning_rate': 1.6019522075459496e-07, 'epoch': 14.93} + 93%|█████████▎| 346640/371472 [5:24:56<2:24:28, 2.86it/s] 93%|█████████▎| 346641/371472 [5:24:56<2:15:51, 3.05it/s] 93%|█████████▎| 346642/371472 [5:24:57<2:27:01, 2.81it/s] 93%|█████████▎| 346643/371472 [5:24:57<2:14:48, 3.07it/s] 93%|█████████▎| 346644/371472 [5:24:57<2:06:50, 3.26it/s] 93%|█████████▎| 346645/371472 [5:24:58<2:12:46, 3.12it/s] 93%|█████████▎| 346646/371472 [5:24:58<2:07:30, 3.24it/s] 93%|█████████▎| 346647/371472 [5:24:58<2:18:45, 2.98it/s] 93%|█████████▎| 346648/371472 [5:24:59<2:12:33, 3.12it/s] 93%|█████████▎| 346649/371472 [5:24:59<2:05:13, 3.30it/s] 93%|█████████▎| 346650/371472 [5:24:59<2:03:39, 3.35it/s] 93%|█████████▎| 346651/371472 [5:24:59<2:02:48, 3.37it/s] 93%|█████████▎| 346652/371472 [5:25:00<2:01:28, 3.41it/s] 93%|█████████▎| 346653/371472 [5:25:00<1:58:51, 3.48it/s] 93%|█████████▎| 346654/371472 [5:25:00<1:54:12, 3.62it/s] 93%|█████████▎| 346655/371472 [5:25:01<1:54:29, 3.61it/s] 93%|█████████▎| 346656/371472 [5:25:01<1:50:30, 3.74it/s] 93%|█████████▎| 346657/371472 [5:25:01<1:55:36, 3.58it/s] 93%|█████████▎| 346658/371472 [5:25:01<1:57:45, 3.51it/s] 93%|█████████▎| 346659/371472 [5:25:02<1:57:13, 3.53it/s] 93%|█████████▎| 346660/371472 [5:25:02<1:52:40, 3.67it/s] {'loss': 2.6879, 'learning_rate': 1.6014673877911614e-07, 'epoch': 14.93} + 93%|█████████▎| 346660/371472 [5:25:02<1:52:40, 3.67it/s] 93%|█████████▎| 346661/371472 [5:25:02<1:55:48, 3.57it/s] 93%|█████████▎| 346662/371472 [5:25:02<1:54:21, 3.62it/s] 93%|█████████▎| 346663/371472 [5:25:03<1:56:45, 3.54it/s] 93%|█████████▎| 346664/371472 [5:25:03<2:00:25, 3.43it/s] 93%|█████████▎| 346665/371472 [5:25:03<2:02:16, 3.38it/s] 93%|█████████▎| 346666/371472 [5:25:04<2:01:08, 3.41it/s] 93%|█████████▎| 346667/371472 [5:25:04<1:57:14, 3.53it/s] 93%|█████████▎| 346668/371472 [5:25:04<1:56:57, 3.53it/s] 93%|█████████▎| 346669/371472 [5:25:04<1:55:07, 3.59it/s] 93%|█████████▎| 346670/371472 [5:25:05<1:59:44, 3.45it/s] 93%|█████████▎| 346671/371472 [5:25:05<1:58:49, 3.48it/s] 93%|█████████▎| 346672/371472 [5:25:05<1:56:42, 3.54it/s] 93%|█████████▎| 346673/371472 [5:25:06<1:55:20, 3.58it/s] 93%|█████████▎| 346674/371472 [5:25:06<1:59:11, 3.47it/s] 93%|█████████▎| 346675/371472 [5:25:06<2:01:48, 3.39it/s] 93%|█████████▎| 346676/371472 [5:25:06<1:59:09, 3.47it/s] 93%|█████████▎| 346677/371472 [5:25:07<1:54:08, 3.62it/s] 93%|█████████▎| 346678/371472 [5:25:07<1:53:52, 3.63it/s] 93%|█████████▎| 346679/371472 [5:25:07<1:59:35, 3.46it/s] 93%|█████████▎| 346680/371472 [5:25:08<2:00:42, 3.42it/s] {'loss': 2.5268, 'learning_rate': 1.6009825680363718e-07, 'epoch': 14.93} + 93%|█████████▎| 346680/371472 [5:25:08<2:00:42, 3.42it/s] 93%|█████████▎| 346681/371472 [5:25:08<1:54:37, 3.60it/s] 93%|█████████▎| 346682/371472 [5:25:08<1:55:49, 3.57it/s] 93%|█████████▎| 346683/371472 [5:25:08<1:56:32, 3.54it/s] 93%|█████████▎| 346684/371472 [5:25:09<1:59:03, 3.47it/s] 93%|█████████▎| 346685/371472 [5:25:09<1:56:37, 3.54it/s] 93%|█████████▎| 346686/371472 [5:25:09<1:54:39, 3.60it/s] 93%|█████████▎| 346687/371472 [5:25:10<1:56:09, 3.56it/s] 93%|█████████▎| 346688/371472 [5:25:10<1:57:28, 3.52it/s] 93%|█████████▎| 346689/371472 [5:25:10<1:54:03, 3.62it/s] 93%|█████████▎| 346690/371472 [5:25:10<1:53:57, 3.62it/s] 93%|█████████▎| 346691/371472 [5:25:11<1:54:12, 3.62it/s] 93%|█████████▎| 346692/371472 [5:25:11<1:54:24, 3.61it/s] 93%|█████████▎| 346693/371472 [5:25:11<1:53:02, 3.65it/s] 93%|█████████▎| 346694/371472 [5:25:12<1:53:09, 3.65it/s] 93%|█████████▎| 346695/371472 [5:25:12<1:52:05, 3.68it/s] 93%|█████████▎| 346696/371472 [5:25:12<1:52:05, 3.68it/s] 93%|█████████▎| 346697/371472 [5:25:12<2:05:32, 3.29it/s] 93%|█████████▎| 346698/371472 [5:25:13<2:01:55, 3.39it/s] 93%|█████████▎| 346699/371472 [5:25:13<2:00:30, 3.43it/s] 93%|█████████▎| 346700/371472 [5:25:13<2:01:34, 3.40it/s] {'loss': 2.6283, 'learning_rate': 1.6004977482815833e-07, 'epoch': 14.93} + 93%|█████████▎| 346700/371472 [5:25:13<2:01:34, 3.40it/s] 93%|█████████▎| 346701/371472 [5:25:14<2:01:36, 3.39it/s] 93%|█████████▎| 346702/371472 [5:25:14<2:00:34, 3.42it/s] 93%|█████████▎| 346703/371472 [5:25:14<2:00:40, 3.42it/s] 93%|█████████▎| 346704/371472 [5:25:14<1:55:37, 3.57it/s] 93%|█████████▎| 346705/371472 [5:25:15<1:57:20, 3.52it/s] 93%|█████████▎| 346706/371472 [5:25:15<2:06:37, 3.26it/s] 93%|█████████▎| 346707/371472 [5:25:15<2:02:07, 3.38it/s] 93%|█████████▎| 346708/371472 [5:25:16<1:57:26, 3.51it/s] 93%|█████████▎| 346709/371472 [5:25:16<1:56:42, 3.54it/s] 93%|█████████▎| 346710/371472 [5:25:16<1:54:24, 3.61it/s] 93%|█████████▎| 346711/371472 [5:25:16<1:54:52, 3.59it/s] 93%|█████████▎| 346712/371472 [5:25:17<1:49:35, 3.77it/s] 93%|█████████▎| 346713/371472 [5:25:17<2:05:21, 3.29it/s] 93%|█████████▎| 346714/371472 [5:25:17<1:59:22, 3.46it/s] 93%|█████████▎| 346715/371472 [5:25:18<1:59:24, 3.46it/s] 93%|█████████▎| 346716/371472 [5:25:18<1:53:55, 3.62it/s] 93%|█████████▎| 346717/371472 [5:25:18<2:07:03, 3.25it/s] 93%|█████████▎| 346718/371472 [5:25:18<2:00:06, 3.43it/s] 93%|█████████▎| 346719/371472 [5:25:19<2:02:22, 3.37it/s] 93%|█████████▎| 346720/371472 [5:25:19<2:08:54, 3.20it/s] {'loss': 2.5965, 'learning_rate': 1.600012928526794e-07, 'epoch': 14.93} + 93%|█████████▎| 346720/371472 [5:25:19<2:08:54, 3.20it/s] 93%|█████████▎| 346721/371472 [5:25:19<2:02:22, 3.37it/s] 93%|█████████▎| 346722/371472 [5:25:20<2:00:21, 3.43it/s] 93%|█████████▎| 346723/371472 [5:25:20<1:54:54, 3.59it/s] 93%|█████████▎| 346724/371472 [5:25:20<1:51:32, 3.70it/s] 93%|█████████▎| 346725/371472 [5:25:20<1:50:48, 3.72it/s] 93%|█████████▎| 346726/371472 [5:25:21<1:54:26, 3.60it/s] 93%|█████████▎| 346727/371472 [5:25:21<1:56:06, 3.55it/s] 93%|█████████▎| 346728/371472 [5:25:21<1:58:45, 3.47it/s] 93%|█████████▎| 346729/371472 [5:25:22<1:55:33, 3.57it/s] 93%|█████████▎| 346730/371472 [5:25:22<2:08:51, 3.20it/s] 93%|█████████▎| 346731/371472 [5:25:22<2:06:54, 3.25it/s] 93%|█████████▎| 346732/371472 [5:25:23<1:59:25, 3.45it/s] 93%|█████████▎| 346733/371472 [5:25:23<2:00:10, 3.43it/s] 93%|█████████▎| 346734/371472 [5:25:23<2:00:19, 3.43it/s] 93%|█████████▎| 346735/371472 [5:25:23<2:02:22, 3.37it/s] 93%|█████████▎| 346736/371472 [5:25:24<1:58:28, 3.48it/s] 93%|█████████▎| 346737/371472 [5:25:24<2:05:24, 3.29it/s] 93%|█████████▎| 346738/371472 [5:25:24<2:02:24, 3.37it/s] 93%|█████████▎| 346739/371472 [5:25:25<2:03:02, 3.35it/s] 93%|█████████▎| 346740/371472 [5:25:25<2:04:21, 3.31it/s] {'loss': 2.5719, 'learning_rate': 1.5995281087720053e-07, 'epoch': 14.93} + 93%|█████████▎| 346740/371472 [5:25:25<2:04:21, 3.31it/s] 93%|█████████▎| 346741/371472 [5:25:25<2:01:09, 3.40it/s] 93%|█████████▎| 346742/371472 [5:25:25<1:56:32, 3.54it/s] 93%|█████████▎| 346743/371472 [5:25:26<1:53:46, 3.62it/s] 93%|█████████▎| 346744/371472 [5:25:26<1:50:14, 3.74it/s] 93%|█████████▎| 346745/371472 [5:25:26<1:52:37, 3.66it/s] 93%|█████████▎| 346746/371472 [5:25:27<1:53:56, 3.62it/s] 93%|█████████▎| 346747/371472 [5:25:27<1:58:25, 3.48it/s] 93%|█████████▎| 346748/371472 [5:25:27<1:57:13, 3.52it/s] 93%|█████████▎| 346749/371472 [5:25:27<1:52:58, 3.65it/s] 93%|█████████▎| 346750/371472 [5:25:28<1:52:14, 3.67it/s] 93%|█████████▎| 346751/371472 [5:25:28<1:53:17, 3.64it/s] 93%|█████████▎| 346752/371472 [5:25:28<1:57:20, 3.51it/s] 93%|█████████▎| 346753/371472 [5:25:29<1:57:59, 3.49it/s] 93%|█████████▎| 346754/371472 [5:25:29<1:53:02, 3.64it/s] 93%|█████████▎| 346755/371472 [5:25:29<1:53:24, 3.63it/s] 93%|█████████▎| 346756/371472 [5:25:29<1:53:43, 3.62it/s] 93%|█████████▎| 346757/371472 [5:25:30<1:54:22, 3.60it/s] 93%|█████████▎| 346758/371472 [5:25:30<1:53:00, 3.65it/s] 93%|█████████▎| 346759/371472 [5:25:30<1:51:05, 3.71it/s] 93%|█████████▎| 346760/371472 [5:25:30<1:52:54, 3.65it/s] {'loss': 2.6764, 'learning_rate': 1.599043289017216e-07, 'epoch': 14.94} + 93%|█████████▎| 346760/371472 [5:25:30<1:52:54, 3.65it/s] 93%|█████████▎| 346761/371472 [5:25:31<1:51:12, 3.70it/s] 93%|█████████▎| 346762/371472 [5:25:31<1:56:43, 3.53it/s] 93%|█████████▎| 346763/371472 [5:25:31<1:59:27, 3.45it/s] 93%|█████████▎| 346764/371472 [5:25:32<1:55:00, 3.58it/s] 93%|█████████▎| 346765/371472 [5:25:32<2:03:46, 3.33it/s] 93%|█████████▎| 346766/371472 [5:25:32<1:58:03, 3.49it/s] 93%|█████████▎| 346767/371472 [5:25:32<2:06:41, 3.25it/s] 93%|█████████▎| 346768/371472 [5:25:33<2:03:55, 3.32it/s] 93%|█████████▎| 346769/371472 [5:25:33<2:04:43, 3.30it/s] 93%|█████████▎| 346770/371472 [5:25:33<1:59:05, 3.46it/s] 93%|█████████▎| 346771/371472 [5:25:34<1:53:48, 3.62it/s] 93%|█████████▎| 346772/371472 [5:25:34<1:50:03, 3.74it/s] 93%|█████████▎| 346773/371472 [5:25:34<1:48:47, 3.78it/s] 93%|█████████▎| 346774/371472 [5:25:34<1:47:38, 3.82it/s] 93%|█████████▎| 346775/371472 [5:25:35<1:49:05, 3.77it/s] 93%|█████████▎| 346776/371472 [5:25:35<1:49:48, 3.75it/s] 93%|█████████▎| 346777/371472 [5:25:35<1:58:51, 3.46it/s] 93%|█████████▎| 346778/371472 [5:25:36<1:58:29, 3.47it/s] 93%|█████████▎| 346779/371472 [5:25:36<1:58:53, 3.46it/s] 93%|█████████▎| 346780/371472 [5:25:36<1:58:10, 3.48it/s] {'loss': 2.5568, 'learning_rate': 1.5985584692624275e-07, 'epoch': 14.94} + 93%|█████████▎| 346780/371472 [5:25:36<1:58:10, 3.48it/s] 93%|█████████▎| 346781/371472 [5:25:36<1:54:15, 3.60it/s] 93%|█████████▎| 346782/371472 [5:25:37<1:58:42, 3.47it/s] 93%|█████████▎| 346783/371472 [5:25:37<2:00:28, 3.42it/s] 93%|█████████▎| 346784/371472 [5:25:37<1:59:25, 3.45it/s] 93%|█████████▎| 346785/371472 [5:25:38<2:00:01, 3.43it/s] 93%|█████████▎| 346786/371472 [5:25:38<2:00:27, 3.42it/s] 93%|█████████▎| 346787/371472 [5:25:38<2:00:43, 3.41it/s] 93%|█████████▎| 346788/371472 [5:25:38<1:57:35, 3.50it/s] 93%|█████████▎| 346789/371472 [5:25:39<1:58:29, 3.47it/s] 93%|█████████▎| 346790/371472 [5:25:39<1:54:59, 3.58it/s] 93%|█████████▎| 346791/371472 [5:25:39<1:52:51, 3.64it/s] 93%|█████████▎| 346792/371472 [5:25:40<1:57:38, 3.50it/s] 93%|█████████▎| 346793/371472 [5:25:40<1:58:24, 3.47it/s] 93%|█████████▎| 346794/371472 [5:25:40<2:04:43, 3.30it/s] 93%|█████████▎| 346795/371472 [5:25:40<2:01:39, 3.38it/s] 93%|█████████▎| 346796/371472 [5:25:41<1:57:55, 3.49it/s] 93%|█████████▎| 346797/371472 [5:25:41<1:54:40, 3.59it/s] 93%|█████████▎| 346798/371472 [5:25:41<2:18:40, 2.97it/s] 93%|█████████▎| 346799/371472 [5:25:42<2:16:28, 3.01it/s] 93%|█████████▎| 346800/371472 [5:25:42<2:15:25, 3.04it/s] {'loss': 2.5498, 'learning_rate': 1.5980736495076382e-07, 'epoch': 14.94} + 93%|█████████▎| 346800/371472 [5:25:42<2:15:25, 3.04it/s] 93%|█████████▎| 346801/371472 [5:25:42<2:06:00, 3.26it/s] 93%|█████████▎| 346802/371472 [5:25:43<2:04:21, 3.31it/s] 93%|█████████▎| 346803/371472 [5:25:43<1:59:45, 3.43it/s] 93%|█████████▎| 346804/371472 [5:25:43<1:53:54, 3.61it/s] 93%|█████████▎| 346805/371472 [5:25:44<2:04:13, 3.31it/s] 93%|█████████▎| 346806/371472 [5:25:44<2:00:02, 3.42it/s] 93%|█████████▎| 346807/371472 [5:25:44<1:56:46, 3.52it/s] 93%|█████████▎| 346808/371472 [5:25:44<1:53:37, 3.62it/s] 93%|█████████▎| 346809/371472 [5:25:45<1:51:21, 3.69it/s] 93%|█████████▎| 346810/371472 [5:25:45<1:50:23, 3.72it/s] 93%|█████████▎| 346811/371472 [5:25:45<1:48:34, 3.79it/s] 93%|█████████▎| 346812/371472 [5:25:45<1:46:54, 3.84it/s] 93%|█████████▎| 346813/371472 [5:25:46<1:44:43, 3.92it/s] 93%|█████████▎| 346814/371472 [5:25:46<1:46:42, 3.85it/s] 93%|█████████▎| 346815/371472 [5:25:46<1:46:12, 3.87it/s] 93%|█████████▎| 346816/371472 [5:25:46<1:46:39, 3.85it/s] 93%|█████████▎| 346817/371472 [5:25:47<1:45:32, 3.89it/s] 93%|█████████▎| 346818/371472 [5:25:47<1:50:03, 3.73it/s] 93%|█████████▎| 346819/371472 [5:25:47<1:48:43, 3.78it/s] 93%|█████████▎| 346820/371472 [5:25:47<1:49:09, 3.76it/s] {'loss': 2.7005, 'learning_rate': 1.5975888297528498e-07, 'epoch': 14.94} + 93%|█████████▎| 346820/371472 [5:25:47<1:49:09, 3.76it/s] 93%|█████████▎| 346821/371472 [5:25:48<1:55:14, 3.57it/s] 93%|█████████▎| 346822/371472 [5:25:48<1:56:13, 3.53it/s] 93%|█████████▎| 346823/371472 [5:25:48<1:54:02, 3.60it/s] 93%|█████████▎| 346824/371472 [5:25:49<1:50:55, 3.70it/s] 93%|█████████▎| 346825/371472 [5:25:49<1:49:26, 3.75it/s] 93%|█████████▎| 346826/371472 [5:25:49<1:50:01, 3.73it/s] 93%|█████████▎| 346827/371472 [5:25:49<1:49:02, 3.77it/s] 93%|█████████▎| 346828/371472 [5:25:50<1:45:30, 3.89it/s] 93%|█████████▎| 346829/371472 [5:25:50<1:47:19, 3.83it/s] 93%|█████████▎| 346830/371472 [5:25:50<1:45:36, 3.89it/s] 93%|█████████▎| 346831/371472 [5:25:50<1:46:39, 3.85it/s] 93%|█████████▎| 346832/371472 [5:25:51<1:43:40, 3.96it/s] 93%|█████████▎| 346833/371472 [5:25:51<1:44:59, 3.91it/s] 93%|█████████▎| 346834/371472 [5:25:51<1:51:16, 3.69it/s] 93%|█████████▎| 346835/371472 [5:25:51<1:46:54, 3.84it/s] 93%|█████████▎| 346836/371472 [5:25:52<1:46:10, 3.87it/s] 93%|█████████▎| 346837/371472 [5:25:52<1:52:39, 3.64it/s] 93%|█████████▎| 346838/371472 [5:25:52<1:48:38, 3.78it/s] 93%|█████████▎| 346839/371472 [5:25:53<1:57:19, 3.50it/s] 93%|█████████▎| 346840/371472 [5:25:53<2:00:15, 3.41it/s] {'loss': 2.6279, 'learning_rate': 1.5971040099980605e-07, 'epoch': 14.94} + 93%|█████████▎| 346840/371472 [5:25:53<2:00:15, 3.41it/s] 93%|█████████▎| 346841/371472 [5:25:53<1:53:56, 3.60it/s] 93%|█████████▎| 346842/371472 [5:25:53<1:51:46, 3.67it/s] 93%|█████████▎| 346843/371472 [5:25:54<1:55:14, 3.56it/s] 93%|█████████▎| 346844/371472 [5:25:54<1:57:25, 3.50it/s] 93%|█████████▎| 346845/371472 [5:25:54<1:54:03, 3.60it/s] 93%|█████████▎| 346846/371472 [5:25:54<1:53:46, 3.61it/s] 93%|█████████▎| 346847/371472 [5:25:55<1:52:14, 3.66it/s] 93%|█████████▎| 346848/371472 [5:25:55<1:47:32, 3.82it/s] 93%|█████████▎| 346849/371472 [5:25:55<1:45:22, 3.89it/s] 93%|█████████▎| 346850/371472 [5:25:56<1:51:54, 3.67it/s] 93%|█████████▎| 346851/371472 [5:25:56<1:53:06, 3.63it/s] 93%|█████████▎| 346852/371472 [5:25:56<1:49:39, 3.74it/s] 93%|█████████▎| 346853/371472 [5:25:56<1:52:40, 3.64it/s] 93%|█████████▎| 346854/371472 [5:25:57<1:53:28, 3.62it/s] 93%|█████████▎| 346855/371472 [5:25:57<1:55:13, 3.56it/s] 93%|█████████▎| 346856/371472 [5:25:57<1:55:58, 3.54it/s] 93%|█████████▎| 346857/371472 [5:25:57<1:54:43, 3.58it/s] 93%|█████████▎| 346858/371472 [5:25:58<1:53:31, 3.61it/s] 93%|█████████▎| 346859/371472 [5:25:58<1:52:17, 3.65it/s] 93%|█████████▎| 346860/371472 [5:25:58<1:55:52, 3.54it/s] {'loss': 2.6251, 'learning_rate': 1.596619190243272e-07, 'epoch': 14.94} + 93%|█████████▎| 346860/371472 [5:25:58<1:55:52, 3.54it/s] 93%|█████████▎| 346861/371472 [5:25:59<2:07:20, 3.22it/s] 93%|█████████▎| 346862/371472 [5:25:59<2:02:56, 3.34it/s] 93%|█████████▎| 346863/371472 [5:25:59<2:05:20, 3.27it/s] 93%|█████████▎| 346864/371472 [5:26:00<2:15:57, 3.02it/s] 93%|█████████▎| 346865/371472 [5:26:00<2:05:59, 3.26it/s] 93%|█████████▎| 346866/371472 [5:26:00<1:58:47, 3.45it/s] 93%|█████████▎| 346867/371472 [5:26:01<2:03:59, 3.31it/s] 93%|█████████▎| 346868/371472 [5:26:01<2:02:54, 3.34it/s] 93%|█████████▎| 346869/371472 [5:26:01<2:06:45, 3.24it/s] 93%|█████████▎| 346870/371472 [5:26:01<2:02:49, 3.34it/s] 93%|█████████▎| 346871/371472 [5:26:02<2:13:41, 3.07it/s] 93%|█████████▎| 346872/371472 [5:26:02<2:08:11, 3.20it/s] 93%|█████████▎| 346873/371472 [5:26:02<2:02:49, 3.34it/s] 93%|█████████▎| 346874/371472 [5:26:03<2:06:12, 3.25it/s] 93%|█████████▎| 346875/371472 [5:26:03<2:05:18, 3.27it/s] 93%|█████████▎| 346876/371472 [5:26:03<1:59:37, 3.43it/s] 93%|█████████▎| 346877/371472 [5:26:04<2:12:31, 3.09it/s] 93%|█████████▎| 346878/371472 [5:26:04<2:06:21, 3.24it/s] 93%|█████████▎| 346879/371472 [5:26:04<2:00:01, 3.42it/s] 93%|█████████▎| 346880/371472 [5:26:04<1:57:53, 3.48it/s] {'loss': 2.61, 'learning_rate': 1.5961343704884824e-07, 'epoch': 14.94} + 93%|█████████▎| 346880/371472 [5:26:04<1:57:53, 3.48it/s] 93%|█████████▎| 346881/371472 [5:26:05<1:58:20, 3.46it/s] 93%|█████████▎| 346882/371472 [5:26:05<1:52:50, 3.63it/s] 93%|█████████▎| 346883/371472 [5:26:05<1:54:44, 3.57it/s] 93%|█████████▎| 346884/371472 [5:26:06<1:56:04, 3.53it/s] 93%|█████████▎| 346885/371472 [5:26:06<1:55:27, 3.55it/s] 93%|█████████▎| 346886/371472 [5:26:06<1:57:24, 3.49it/s] 93%|█████████▎| 346887/371472 [5:26:06<1:55:30, 3.55it/s] 93%|█████████▎| 346888/371472 [5:26:07<1:57:46, 3.48it/s] 93%|█████████▎| 346889/371472 [5:26:07<1:52:39, 3.64it/s] 93%|█████████▎| 346890/371472 [5:26:07<1:50:05, 3.72it/s] 93%|█████████▎| 346891/371472 [5:26:08<1:58:13, 3.47it/s] 93%|█████████▎| 346892/371472 [5:26:08<2:15:59, 3.01it/s] 93%|█████████▎| 346893/371472 [5:26:08<2:15:54, 3.01it/s] 93%|█████████▎| 346894/371472 [5:26:09<2:06:12, 3.25it/s] 93%|█████████▎| 346895/371472 [5:26:09<2:04:52, 3.28it/s] 93%|█████████▎| 346896/371472 [5:26:09<1:59:26, 3.43it/s] 93%|█████████▎| 346897/371472 [5:26:09<1:55:54, 3.53it/s] 93%|█████████▎| 346898/371472 [5:26:10<1:54:12, 3.59it/s] 93%|█████████▎| 346899/371472 [5:26:10<1:53:20, 3.61it/s] 93%|█████████▎| 346900/371472 [5:26:10<1:51:00, 3.69it/s] {'loss': 2.5866, 'learning_rate': 1.5956495507336942e-07, 'epoch': 14.94} + 93%|█████████▎| 346900/371472 [5:26:10<1:51:00, 3.69it/s] 93%|███████���█▎| 346901/371472 [5:26:10<1:49:48, 3.73it/s] 93%|█████████▎| 346902/371472 [5:26:11<1:57:28, 3.49it/s] 93%|█████████▎| 346903/371472 [5:26:11<1:57:28, 3.49it/s] 93%|█████████▎| 346904/371472 [5:26:11<1:52:11, 3.65it/s] 93%|█████████▎| 346905/371472 [5:26:12<1:58:29, 3.46it/s] 93%|█████████▎| 346906/371472 [5:26:12<1:58:54, 3.44it/s] 93%|█████████▎| 346907/371472 [5:26:12<1:54:57, 3.56it/s] 93%|█████████▎| 346908/371472 [5:26:13<2:06:57, 3.22it/s] 93%|█████████▎| 346909/371472 [5:26:13<2:04:47, 3.28it/s] 93%|█████████▎| 346910/371472 [5:26:13<2:00:27, 3.40it/s] 93%|█████████▎| 346911/371472 [5:26:13<1:58:32, 3.45it/s] 93%|█████████▎| 346912/371472 [5:26:14<1:59:42, 3.42it/s] 93%|█████████▎| 346913/371472 [5:26:14<1:57:58, 3.47it/s] 93%|█████████▎| 346914/371472 [5:26:14<2:04:58, 3.28it/s] 93%|█████████▎| 346915/371472 [5:26:15<2:11:56, 3.10it/s] 93%|█████████▎| 346916/371472 [5:26:15<2:03:57, 3.30it/s] 93%|█████████▎| 346917/371472 [5:26:15<1:59:37, 3.42it/s] 93%|█████████▎| 346918/371472 [5:26:16<2:04:00, 3.30it/s] 93%|█████████▎| 346919/371472 [5:26:16<1:56:46, 3.50it/s] 93%|█████████▎| 346920/371472 [5:26:16<1:54:28, 3.57it/s] {'loss': 2.6378, 'learning_rate': 1.5951647309789047e-07, 'epoch': 14.94} + 93%|█████████▎| 346920/371472 [5:26:16<1:54:28, 3.57it/s] 93%|█████████▎| 346921/371472 [5:26:16<1:57:28, 3.48it/s] 93%|█████████▎| 346922/371472 [5:26:17<1:56:17, 3.52it/s] 93%|█████████▎| 346923/371472 [5:26:17<1:56:05, 3.52it/s] 93%|█████████▎| 346924/371472 [5:26:17<1:56:44, 3.50it/s] 93%|█████████▎| 346925/371472 [5:26:17<1:53:43, 3.60it/s] 93%|█████████▎| 346926/371472 [5:26:18<1:50:23, 3.71it/s] 93%|█████████▎| 346927/371472 [5:26:18<1:50:58, 3.69it/s] 93%|█████████▎| 346928/371472 [5:26:18<1:53:26, 3.61it/s] 93%|█████████▎| 346929/371472 [5:26:19<1:54:14, 3.58it/s] 93%|█████████▎| 346930/371472 [5:26:19<1:51:02, 3.68it/s] 93%|█████████▎| 346931/371472 [5:26:19<1:50:40, 3.70it/s] 93%|█████████▎| 346932/371472 [5:26:19<1:47:37, 3.80it/s] 93%|█████████▎| 346933/371472 [5:26:20<1:47:46, 3.79it/s] 93%|█████████▎| 346934/371472 [5:26:20<1:45:40, 3.87it/s] 93%|█████████▎| 346935/371472 [5:26:20<1:50:46, 3.69it/s] 93%|█████████▎| 346936/371472 [5:26:20<1:51:26, 3.67it/s] 93%|█████████▎| 346937/371472 [5:26:21<1:49:13, 3.74it/s] 93%|█████████▎| 346938/371472 [5:26:21<1:50:55, 3.69it/s] 93%|█████████▎| 346939/371472 [5:26:21<1:50:40, 3.69it/s] 93%|█████████▎| 346940/371472 [5:26:22<1:55:10, 3.55it/s] {'loss': 2.8654, 'learning_rate': 1.5946799112241162e-07, 'epoch': 14.94} + 93%|█████████▎| 346940/371472 [5:26:22<1:55:10, 3.55it/s] 93%|█████████▎| 346941/371472 [5:26:22<1:58:00, 3.46it/s] 93%|█████████▎| 346942/371472 [5:26:22<2:04:47, 3.28it/s] 93%|█████████▎| 346943/371472 [5:26:22<2:01:26, 3.37it/s] 93%|█████████▎| 346944/371472 [5:26:23<1:56:00, 3.52it/s] 93%|█████████▎| 346945/371472 [5:26:23<2:05:13, 3.26it/s] 93%|█████████▎| 346946/371472 [5:26:23<2:01:33, 3.36it/s] 93%|█████████▎| 346947/371472 [5:26:24<1:58:52, 3.44it/s] 93%|█████████▎| 346948/371472 [5:26:24<2:03:18, 3.31it/s] 93%|█████████▎| 346949/371472 [5:26:24<2:00:20, 3.40it/s] 93%|█████████▎| 346950/371472 [5:26:25<2:04:04, 3.29it/s] 93%|█████████▎| 346951/371472 [5:26:25<2:02:38, 3.33it/s] 93%|█████████▎| 346952/371472 [5:26:25<1:59:33, 3.42it/s] 93%|█████████▎| 346953/371472 [5:26:25<1:57:50, 3.47it/s] 93%|█████████▎| 346954/371472 [5:26:26<1:56:26, 3.51it/s] 93%|█████████▎| 346955/371472 [5:26:26<1:56:15, 3.51it/s] 93%|█████████▎| 346956/371472 [5:26:26<2:01:40, 3.36it/s] 93%|█████████▎| 346957/371472 [5:26:27<1:56:01, 3.52it/s] 93%|█████████▎| 346958/371472 [5:26:27<1:51:21, 3.67it/s] 93%|█████████▎| 346959/371472 [5:26:27<1:49:15, 3.74it/s] 93%|█████████▎| 346960/371472 [5:26:27<1:46:02, 3.85it/s] {'loss': 2.5892, 'learning_rate': 1.594195091469327e-07, 'epoch': 14.94} + 93%|█████████▎| 346960/371472 [5:26:27<1:46:02, 3.85it/s] 93%|█████████▎| 346961/371472 [5:26:28<1:47:21, 3.81it/s] 93%|█████████▎| 346962/371472 [5:26:28<1:49:54, 3.72it/s] 93%|█████████▎| 346963/371472 [5:26:28<1:49:25, 3.73it/s] 93%|█████████▎| 346964/371472 [5:26:28<1:47:56, 3.78it/s] 93%|█████████▎| 346965/371472 [5:26:29<1:55:13, 3.54it/s] 93%|█████████▎| 346966/371472 [5:26:29<1:54:32, 3.57it/s] 93%|█████████▎| 346967/371472 [5:26:29<1:52:26, 3.63it/s] 93%|█████████▎| 346968/371472 [5:26:30<2:02:29, 3.33it/s] 93%|█████████▎| 346969/371472 [5:26:30<2:03:21, 3.31it/s] 93%|█████████▎| 346970/371472 [5:26:30<1:57:04, 3.49it/s] 93%|█████████▎| 346971/371472 [5:26:30<1:52:48, 3.62it/s] 93%|█████████▎| 346972/371472 [5:26:31<1:50:35, 3.69it/s] 93%|█████████▎| 346973/371472 [5:26:31<1:54:47, 3.56it/s] 93%|█████████▎| 346974/371472 [5:26:31<2:02:22, 3.34it/s] 93%|█████████▎| 346975/371472 [5:26:32<1:55:52, 3.52it/s] 93%|█████████▎| 346976/371472 [5:26:32<1:54:41, 3.56it/s] 93%|█████████▎| 346977/371472 [5:26:32<1:55:33, 3.53it/s] 93%|█████████▎| 346978/371472 [5:26:32<1:50:04, 3.71it/s] 93%|█████████▎| 346979/371472 [5:26:33<1:47:58, 3.78it/s] 93%|█████████▎| 346980/371472 [5:26:33<1:52:23, 3.63it/s] {'loss': 2.6767, 'learning_rate': 1.5937102717145384e-07, 'epoch': 14.95} + 93%|█████████▎| 346980/371472 [5:26:33<1:52:23, 3.63it/s] 93%|█████████▎| 346981/371472 [5:26:33<1:56:33, 3.50it/s] 93%|█████████▎| 346982/371472 [5:26:33<1:54:48, 3.56it/s] 93%|█████████▎| 346983/371472 [5:26:34<2:02:22, 3.34it/s] 93%|█████████▎| 346984/371472 [5:26:34<2:00:19, 3.39it/s] 93%|█████████▎| 346985/371472 [5:26:34<1:58:15, 3.45it/s] 93%|█████████▎| 346986/371472 [5:26:35<1:56:44, 3.50it/s] 93%|█████████▎| 346987/371472 [5:26:35<1:55:31, 3.53it/s] 93%|█████████▎| 346988/371472 [5:26:35<2:00:44, 3.38it/s] 93%|█████████▎| 346989/371472 [5:26:36<2:00:56, 3.37it/s] 93%|█████████▎| 346990/371472 [5:26:36<2:11:23, 3.11it/s] 93%|█████████▎| 346991/371472 [5:26:36<2:07:38, 3.20it/s] 93%|█████████▎| 346992/371472 [5:26:37<2:08:08, 3.18it/s] 93%|█████████▎| 346993/371472 [5:26:37<2:05:07, 3.26it/s] 93%|█████████▎| 346994/371472 [5:26:37<2:03:36, 3.30it/s] 93%|█████████▎| 346995/371472 [5:26:37<1:59:07, 3.42it/s] 93%|█████████▎| 346996/371472 [5:26:38<1:54:09, 3.57it/s] 93%|█████████▎| 346997/371472 [5:26:38<1:53:04, 3.61it/s] 93%|█████████▎| 346998/371472 [5:26:38<1:52:58, 3.61it/s] 93%|█████████▎| 346999/371472 [5:26:39<1:56:00, 3.52it/s] 93%|█████████▎| 347000/371472 [5:26:39<1:59:12, 3.42it/s] {'loss': 2.6672, 'learning_rate': 1.593225451959749e-07, 'epoch': 14.95} + 93%|█████████▎| 347000/371472 [5:26:39<1:59:12, 3.42it/s] 93%|█████████▎| 347001/371472 [5:26:39<1:55:24, 3.53it/s] 93%|█████████▎| 347002/371472 [5:26:39<1:57:46, 3.46it/s] 93%|█████████▎| 347003/371472 [5:26:40<2:01:18, 3.36it/s] 93%|█████████▎| 347004/371472 [5:26:40<2:04:26, 3.28it/s] 93%|█████████▎| 347005/371472 [5:26:40<1:59:00, 3.43it/s] 93%|█████████▎| 347006/371472 [5:26:41<1:56:58, 3.49it/s] 93%|█████████▎| 347007/371472 [5:26:41<1:55:18, 3.54it/s] 93%|█████████▎| 347008/371472 [5:26:41<1:51:06, 3.67it/s] 93%|█████████▎| 347009/371472 [5:26:41<1:53:21, 3.60it/s] 93%|█████████▎| 347010/371472 [5:26:42<2:00:12, 3.39it/s] 93%|█████████▎| 347011/371472 [5:26:42<1:55:03, 3.54it/s] 93%|█████████▎| 347012/371472 [5:26:42<1:57:39, 3.47it/s] 93%|█████████▎| 347013/371472 [5:26:43<1:58:11, 3.45it/s] 93%|█████████▎| 347014/371472 [5:26:43<1:58:24, 3.44it/s] 93%|█████████▎| 347015/371472 [5:26:43<2:00:24, 3.39it/s] 93%|█████████▎| 347016/371472 [5:26:43<1:57:47, 3.46it/s] 93%|█████████▎| 347017/371472 [5:26:44<2:04:36, 3.27it/s] 93%|█████████▎| 347018/371472 [5:26:44<1:59:26, 3.41it/s] 93%|█████████▎| 347019/371472 [5:26:44<2:02:19, 3.33it/s] 93%|█████████▎| 347020/371472 [5:26:45<2:03:07, 3.31it/s] {'loss': 2.6397, 'learning_rate': 1.5927406322049606e-07, 'epoch': 14.95} + 93%|█████████▎| 347020/371472 [5:26:45<2:03:07, 3.31it/s] 93%|█████████▎| 347021/371472 [5:26:45<1:58:13, 3.45it/s] 93%|█████████▎| 347022/371472 [5:26:45<2:01:53, 3.34it/s] 93%|█████████▎| 347023/371472 [5:26:46<2:00:14, 3.39it/s] 93%|█████████▎| 347024/371472 [5:26:46<1:58:42, 3.43it/s] 93%|█████████▎| 347025/371472 [5:26:46<1:54:31, 3.56it/s] 93%|█████████▎| 347026/371472 [5:26:46<1:52:20, 3.63it/s] 93%|█████████▎| 347027/371472 [5:26:47<1:50:48, 3.68it/s] 93%|█████████▎| 347028/371472 [5:26:47<1:49:38, 3.72it/s] 93%|█████████▎| 347029/371472 [5:26:47<1:59:12, 3.42it/s] 93%|█████████▎| 347030/371472 [5:26:47<1:59:11, 3.42it/s] 93%|█████████▎| 347031/371472 [5:26:48<1:59:52, 3.40it/s] 93%|█████████▎| 347032/371472 [5:26:48<1:59:52, 3.40it/s] 93%|█████████▎| 347033/371472 [5:26:48<1:58:00, 3.45it/s] 93%|█████████▎| 347034/371472 [5:26:49<1:52:56, 3.61it/s] 93%|█████████▎| 347035/371472 [5:26:49<1:56:37, 3.49it/s] 93%|█████████▎| 347036/371472 [5:26:49<1:52:53, 3.61it/s] 93%|█████████▎| 347037/371472 [5:26:49<1:54:14, 3.56it/s] 93%|█████████▎| 347038/371472 [5:26:50<1:47:32, 3.79it/s] 93%|█████████▎| 347039/371472 [5:26:50<1:45:50, 3.85it/s] 93%|█████████▎| 347040/371472 [5:26:50<1:51:26, 3.65it/s] {'loss': 2.578, 'learning_rate': 1.592255812450171e-07, 'epoch': 14.95} + 93%|█████████▎| 347040/371472 [5:26:50<1:51:26, 3.65it/s] 93%|█████████▎| 347041/371472 [5:26:51<1:54:06, 3.57it/s] 93%|█████████▎| 347042/371472 [5:26:51<1:51:28, 3.65it/s] 93%|█████████▎| 347043/371472 [5:26:51<1:51:30, 3.65it/s] 93%|█████████▎| 347044/371472 [5:26:51<1:48:21, 3.76it/s] 93%|█████████▎| 347045/371472 [5:26:52<1:58:52, 3.42it/s] 93%|█████████▎| 347046/371472 [5:26:52<2:04:02, 3.28it/s] 93%|█████████▎| 347047/371472 [5:26:52<2:01:53, 3.34it/s] 93%|█████████▎| 347048/371472 [5:26:53<1:59:37, 3.40it/s] 93%|█████████▎| 347049/371472 [5:26:53<1:55:04, 3.54it/s] 93%|█████████▎| 347050/371472 [5:26:53<1:52:47, 3.61it/s] 93%|█████████▎| 347051/371472 [5:26:53<1:52:46, 3.61it/s] 93%|█████████▎| 347052/371472 [5:26:54<1:49:55, 3.70it/s] 93%|█████████▎| 347053/371472 [5:26:54<1:49:33, 3.71it/s] 93%|█████████▎| 347054/371472 [5:26:54<1:58:51, 3.42it/s] 93%|█████████▎| 347055/371472 [5:26:55<1:55:28, 3.52it/s] 93%|█████████▎| 347056/371472 [5:26:55<1:50:47, 3.67it/s] 93%|█████████▎| 347057/371472 [5:26:55<1:51:16, 3.66it/s] 93%|█████████▎| 347058/371472 [5:26:55<1:49:57, 3.70it/s] 93%|█████████▎| 347059/371472 [5:26:56<1:49:10, 3.73it/s] 93%|█████████▎| 347060/371472 [5:26:56<1:53:45, 3.58it/s] {'loss': 2.5094, 'learning_rate': 1.5917709926953826e-07, 'epoch': 14.95} + 93%|█████████▎| 347060/371472 [5:26:56<1:53:45, 3.58it/s] 93%|█████████▎| 347061/371472 [5:26:56<1:50:45, 3.67it/s] 93%|█████████▎| 347062/371472 [5:26:56<1:50:23, 3.69it/s] 93%|█████████▎| 347063/371472 [5:26:57<1:50:32, 3.68it/s] 93%|█████████▎| 347064/371472 [5:26:57<1:48:28, 3.75it/s] 93%|█████████▎| 347065/371472 [5:26:57<1:49:49, 3.70it/s] 93%|█████████▎| 347066/371472 [5:26:57<1:50:01, 3.70it/s] 93%|█████████▎| 347067/371472 [5:26:58<1:52:49, 3.61it/s] 93%|█████████▎| 347068/371472 [5:26:58<1:54:29, 3.55it/s] 93%|█████████▎| 347069/371472 [5:26:58<2:04:11, 3.27it/s] 93%|█████████▎| 347070/371472 [5:26:59<2:06:21, 3.22it/s] 93%|█████████▎| 347071/371472 [5:26:59<2:03:29, 3.29it/s] 93%|█████████▎| 347072/371472 [5:26:59<1:57:42, 3.45it/s] 93%|█████████▎| 347073/371472 [5:27:00<1:51:47, 3.64it/s] 93%|█████████▎| 347074/371472 [5:27:00<1:50:13, 3.69it/s] 93%|█████████▎| 347075/371472 [5:27:00<1:51:56, 3.63it/s] 93%|█████████▎| 347076/371472 [5:27:00<1:53:10, 3.59it/s] 93%|█████████▎| 347077/371472 [5:27:01<1:50:06, 3.69it/s] 93%|█████████▎| 347078/371472 [5:27:01<1:53:03, 3.60it/s] 93%|█████████▎| 347079/371472 [5:27:01<1:56:08, 3.50it/s] 93%|█████████▎| 347080/371472 [5:27:01<1:53:44, 3.57it/s] {'loss': 2.5147, 'learning_rate': 1.5912861729405933e-07, 'epoch': 14.95} + 93%|█████████▎| 347080/371472 [5:27:01<1:53:44, 3.57it/s] 93%|█████████▎| 347081/371472 [5:27:02<1:49:57, 3.70it/s] 93%|█████████▎| 347082/371472 [5:27:02<1:46:03, 3.83it/s] 93%|█████████▎| 347083/371472 [5:27:02<1:44:35, 3.89it/s] 93%|█████████▎| 347084/371472 [5:27:02<1:48:04, 3.76it/s] 93%|█████████▎| 347085/371472 [5:27:03<1:49:31, 3.71it/s] 93%|█████████▎| 347086/371472 [5:27:03<1:52:58, 3.60it/s] 93%|█████████▎| 347087/371472 [5:27:03<1:57:11, 3.47it/s] 93%|█████████▎| 347088/371472 [5:27:04<2:02:54, 3.31it/s] 93%|█████████▎| 347089/371472 [5:27:04<2:03:06, 3.30it/s] 93%|█████████▎| 347090/371472 [5:27:04<2:03:04, 3.30it/s] 93%|█████████▎| 347091/371472 [5:27:05<2:00:28, 3.37it/s] 93%|█████████▎| 347092/371472 [5:27:05<1:59:09, 3.41it/s] 93%|█████████▎| 347093/371472 [5:27:05<1:59:52, 3.39it/s] 93%|█████████▎| 347094/371472 [5:27:05<1:58:36, 3.43it/s] 93%|█████████▎| 347095/371472 [5:27:06<1:53:38, 3.58it/s] 93%|█████████▎| 347096/371472 [5:27:06<1:54:07, 3.56it/s] 93%|█████████▎| 347097/371472 [5:27:06<2:00:18, 3.38it/s] 93%|█████████▎| 347098/371472 [5:27:07<1:53:34, 3.58it/s] 93%|█████████▎| 347099/371472 [5:27:07<1:51:20, 3.65it/s] 93%|█████████▎| 347100/371472 [5:27:07<1:50:59, 3.66it/s] {'loss': 2.6778, 'learning_rate': 1.5908013531858048e-07, 'epoch': 14.95} + 93%|█████████▎| 347100/371472 [5:27:07<1:50:59, 3.66it/s] 93%|█████████▎| 347101/371472 [5:27:07<1:53:16, 3.59it/s] 93%|█████████▎| 347102/371472 [5:27:08<1:52:37, 3.61it/s] 93%|█████████▎| 347103/371472 [5:27:08<1:56:36, 3.48it/s] 93%|█████████▎| 347104/371472 [5:27:08<1:55:05, 3.53it/s] 93%|█████████▎| 347105/371472 [5:27:09<2:06:01, 3.22it/s] 93%|█████████▎| 347106/371472 [5:27:09<2:06:53, 3.20it/s] 93%|█████████▎| 347107/371472 [5:27:09<2:02:08, 3.32it/s] 93%|█████████▎| 347108/371472 [5:27:09<1:58:33, 3.43it/s] 93%|█████████▎| 347109/371472 [5:27:10<1:58:49, 3.42it/s] 93%|█████████▎| 347110/371472 [5:27:10<1:55:23, 3.52it/s] 93%|█████████▎| 347111/371472 [5:27:10<1:51:06, 3.65it/s] 93%|█████████▎| 347112/371472 [5:27:11<1:51:24, 3.64it/s] 93%|█████████▎| 347113/371472 [5:27:11<1:50:05, 3.69it/s] 93%|█████████▎| 347114/371472 [5:27:11<1:51:24, 3.64it/s] 93%|█████████▎| 347115/371472 [5:27:11<1:49:25, 3.71it/s] 93%|█████████▎| 347116/371472 [5:27:12<2:05:01, 3.25it/s] 93%|█████████▎| 347117/371472 [5:27:12<1:57:01, 3.47it/s] 93%|█████████▎| 347118/371472 [5:27:12<1:59:08, 3.41it/s] 93%|█████████▎| 347119/371472 [5:27:13<1:55:24, 3.52it/s] 93%|█████████▎| 347120/371472 [5:27:13<1:55:22, 3.52it/s] {'loss': 2.5951, 'learning_rate': 1.5903165334310155e-07, 'epoch': 14.95} + 93%|█████████▎| 347120/371472 [5:27:13<1:55:22, 3.52it/s] 93%|█████████▎| 347121/371472 [5:27:13<1:51:57, 3.62it/s] 93%|█████████▎| 347122/371472 [5:27:13<1:53:00, 3.59it/s] 93%|█████████▎| 347123/371472 [5:27:14<1:57:44, 3.45it/s] 93%|█████████▎| 347124/371472 [5:27:14<1:54:20, 3.55it/s] 93%|█████████▎| 347125/371472 [5:27:14<1:52:05, 3.62it/s] 93%|█████████▎| 347126/371472 [5:27:15<1:54:57, 3.53it/s] 93%|█████████▎| 347127/371472 [5:27:15<1:52:53, 3.59it/s] 93%|█████████▎| 347128/371472 [5:27:15<1:57:02, 3.47it/s] 93%|█████████▎| 347129/371472 [5:27:15<1:55:16, 3.52it/s] 93%|█████████▎| 347130/371472 [5:27:16<1:52:28, 3.61it/s] 93%|█████████▎| 347131/371472 [5:27:16<1:50:15, 3.68it/s] 93%|█████████▎| 347132/371472 [5:27:16<1:58:07, 3.43it/s] 93%|█████████▎| 347133/371472 [5:27:17<1:59:13, 3.40it/s] 93%|█████████▎| 347134/371472 [5:27:17<1:53:47, 3.56it/s] 93%|█████████▎| 347135/371472 [5:27:17<1:54:19, 3.55it/s] 93%|█████████▎| 347136/371472 [5:27:17<1:55:43, 3.51it/s] 93%|█████████▎| 347137/371472 [5:27:18<2:09:11, 3.14it/s] 93%|█████████▎| 347138/371472 [5:27:18<2:04:20, 3.26it/s] 93%|█████████▎| 347139/371472 [5:27:18<1:59:52, 3.38it/s] 93%|█████████▎| 347140/371472 [5:27:19<2:01:29, 3.34it/s] {'loss': 2.6057, 'learning_rate': 1.589831713676226e-07, 'epoch': 14.95} + 93%|█████████▎| 347140/371472 [5:27:19<2:01:29, 3.34it/s] 93%|█████████▎| 347141/371472 [5:27:19<1:58:43, 3.42it/s] 93%|█████████▎| 347142/371472 [5:27:19<1:57:31, 3.45it/s] 93%|█████████▎| 347143/371472 [5:27:20<2:06:40, 3.20it/s] 93%|█████████▎| 347144/371472 [5:27:20<2:04:30, 3.26it/s] 93%|█████████▎| 347145/371472 [5:27:20<2:00:30, 3.36it/s] 93%|█████████▎| 347146/371472 [5:27:20<1:57:11, 3.46it/s] 93%|█████████▎| 347147/371472 [5:27:21<1:54:30, 3.54it/s] 93%|█████████▎| 347148/371472 [5:27:21<2:23:53, 2.82it/s] 93%|█████████▎| 347149/371472 [5:27:21<2:13:43, 3.03it/s] 93%|█████████▎| 347150/371472 [5:27:22<2:07:29, 3.18it/s] 93%|█████████▎| 347151/371472 [5:27:22<2:02:23, 3.31it/s] 93%|█████████▎| 347152/371472 [5:27:22<2:02:23, 3.31it/s] 93%|█████████▎| 347153/371472 [5:27:23<2:00:44, 3.36it/s] 93%|█████████▎| 347154/371472 [5:27:23<1:56:49, 3.47it/s] 93%|█████████▎| 347155/371472 [5:27:23<1:55:04, 3.52it/s] 93%|█████████▎| 347156/371472 [5:27:23<1:53:31, 3.57it/s] 93%|█████████▎| 347157/371472 [5:27:24<1:56:00, 3.49it/s] 93%|█████████▎| 347158/371472 [5:27:24<1:56:03, 3.49it/s] 93%|█████████▎| 347159/371472 [5:27:24<1:55:19, 3.51it/s] 93%|█████████▎| 347160/371472 [5:27:25<1:56:10, 3.49it/s] {'loss': 2.6166, 'learning_rate': 1.5893468939214377e-07, 'epoch': 14.95} + 93%|█████████▎| 347160/371472 [5:27:25<1:56:10, 3.49it/s] 93%|█████████▎| 347161/371472 [5:27:25<2:08:00, 3.17it/s] 93%|█████████▎| 347162/371472 [5:27:25<2:00:10, 3.37it/s] 93%|█████████▎| 347163/371472 [5:27:26<2:01:17, 3.34it/s] 93%|█████████▎| 347164/371472 [5:27:26<1:59:51, 3.38it/s] 93%|█████████▎| 347165/371472 [5:27:26<1:58:23, 3.42it/s] 93%|█████████▎| 347166/371472 [5:27:26<1:52:49, 3.59it/s] 93%|█████████▎| 347167/371472 [5:27:27<1:53:33, 3.57it/s] 93%|█████████▎| 347168/371472 [5:27:27<1:55:09, 3.52it/s] 93%|█████████▎| 347169/371472 [5:27:27<1:54:01, 3.55it/s] 93%|█████████▎| 347170/371472 [5:27:28<2:03:02, 3.29it/s] 93%|█████████▎| 347171/371472 [5:27:28<1:58:08, 3.43it/s] 93%|█████████▎| 347172/371472 [5:27:28<1:53:35, 3.57it/s] 93%|█████████▎| 347173/371472 [5:27:28<1:55:01, 3.52it/s] 93%|█████████▎| 347174/371472 [5:27:29<1:50:02, 3.68it/s] 93%|█████████▎| 347175/371472 [5:27:29<1:57:37, 3.44it/s] 93%|█████████▎| 347176/371472 [5:27:29<1:54:45, 3.53it/s] 93%|█████████▎| 347177/371472 [5:27:30<2:23:23, 2.82it/s] 93%|█████████▎| 347178/371472 [5:27:30<2:12:50, 3.05it/s] 93%|█████████▎| 347179/371472 [5:27:30<2:08:08, 3.16it/s] 93%|█████████▎| 347180/371472 [5:27:31<2:05:35, 3.22it/s] {'loss': 2.6152, 'learning_rate': 1.5888620741666482e-07, 'epoch': 14.95} + 93%|█████████▎| 347180/371472 [5:27:31<2:05:35, 3.22it/s] 93%|█████████▎| 347181/371472 [5:27:31<2:06:28, 3.20it/s] 93%|█████████▎| 347182/371472 [5:27:31<2:04:15, 3.26it/s] 93%|█████████▎| 347183/371472 [5:27:31<1:58:56, 3.40it/s] 93%|█████████▎| 347184/371472 [5:27:32<2:00:01, 3.37it/s] 93%|█████████▎| 347185/371472 [5:27:32<1:56:39, 3.47it/s] 93%|█████████▎| 347186/371472 [5:27:32<1:55:47, 3.50it/s] 93%|█████████▎| 347187/371472 [5:27:33<1:55:21, 3.51it/s] 93%|█████████▎| 347188/371472 [5:27:33<1:55:33, 3.50it/s] 93%|█████████▎| 347189/371472 [5:27:33<2:03:03, 3.29it/s] 93%|█████████▎| 347190/371472 [5:27:33<1:58:51, 3.40it/s] 93%|█████████▎| 347191/371472 [5:27:34<1:57:49, 3.43it/s] 93%|█████████▎| 347192/371472 [5:27:34<2:04:37, 3.25it/s] 93%|█████████▎| 347193/371472 [5:27:34<2:08:55, 3.14it/s] 93%|█████████▎| 347194/371472 [5:27:35<2:01:07, 3.34it/s] 93%|█████████▎| 347195/371472 [5:27:35<1:58:01, 3.43it/s] 93%|█████████▎| 347196/371472 [5:27:35<1:54:26, 3.54it/s] 93%|█████████▎| 347197/371472 [5:27:36<2:01:58, 3.32it/s] 93%|█████████▎| 347198/371472 [5:27:36<2:00:30, 3.36it/s] 93%|█████████▎| 347199/371472 [5:27:36<2:02:28, 3.30it/s] 93%|█████████▎| 347200/371472 [5:27:36<1:59:58, 3.37it/s] {'loss': 2.4921, 'learning_rate': 1.58837725441186e-07, 'epoch': 14.95} + 93%|█████████▎| 347200/371472 [5:27:36<1:59:58, 3.37it/s] 93%|█████████▎| 347201/371472 [5:27:37<1:57:26, 3.44it/s] 93%|█████████▎| 347202/371472 [5:27:37<1:56:10, 3.48it/s] 93%|█████████▎| 347203/371472 [5:27:37<1:51:57, 3.61it/s] 93%|█████████▎| 347204/371472 [5:27:38<1:48:29, 3.73it/s] 93%|█████████▎| 347205/371472 [5:27:38<1:51:13, 3.64it/s] 93%|█████████▎| 347206/371472 [5:27:38<1:54:15, 3.54it/s] 93%|█████████▎| 347207/371472 [5:27:38<1:49:27, 3.69it/s] 93%|█████████▎| 347208/371472 [5:27:39<1:49:19, 3.70it/s] 93%|█████████▎| 347209/371472 [5:27:39<1:54:47, 3.52it/s] 93%|█████████▎| 347210/371472 [5:27:39<2:07:48, 3.16it/s] 93%|█████████▎| 347211/371472 [5:27:40<2:03:28, 3.27it/s] 93%|█████████▎| 347212/371472 [5:27:40<2:01:21, 3.33it/s] 93%|█████████▎| 347213/371472 [5:27:40<1:54:41, 3.53it/s] 93%|█████████▎| 347214/371472 [5:27:40<1:58:28, 3.41it/s] 93%|█████████▎| 347215/371472 [5:27:41<1:54:41, 3.53it/s] 93%|█████████▎| 347216/371472 [5:27:41<1:59:05, 3.39it/s] 93%|█████████▎| 347217/371472 [5:27:41<1:54:14, 3.54it/s] 93%|█████████▎| 347218/371472 [5:27:42<1:52:24, 3.60it/s] 93%|█████████▎| 347219/371472 [5:27:42<1:51:19, 3.63it/s] 93%|█████████▎| 347220/371472 [5:27:42<1:53:09, 3.57it/s] {'loss': 2.4822, 'learning_rate': 1.5878924346570704e-07, 'epoch': 14.96} + 93%|█████████▎| 347220/371472 [5:27:42<1:53:09, 3.57it/s] 93%|█████████▎| 347221/371472 [5:27:42<1:55:14, 3.51it/s] 93%|█████████▎| 347222/371472 [5:27:43<1:52:19, 3.60it/s] 93%|█████████▎| 347223/371472 [5:27:43<1:58:21, 3.41it/s] 93%|█████████▎| 347224/371472 [5:27:43<1:54:53, 3.52it/s] 93%|█████████▎| 347225/371472 [5:27:44<1:57:22, 3.44it/s] 93%|█████████▎| 347226/371472 [5:27:44<1:56:37, 3.47it/s] 93%|█████████▎| 347227/371472 [5:27:44<1:54:07, 3.54it/s] 93%|█████████▎| 347228/371472 [5:27:44<1:53:43, 3.55it/s] 93%|█████████▎| 347229/371472 [5:27:45<1:50:06, 3.67it/s] 93%|█████████▎| 347230/371472 [5:27:45<1:51:02, 3.64it/s] 93%|█████████▎| 347231/371472 [5:27:45<1:50:30, 3.66it/s] 93%|█████████▎| 347232/371472 [5:27:46<1:54:04, 3.54it/s] 93%|█████████▎| 347233/371472 [5:27:46<1:51:09, 3.63it/s] 93%|█████████▎| 347234/371472 [5:27:46<1:49:51, 3.68it/s] 93%|█████████▎| 347235/371472 [5:27:46<1:49:09, 3.70it/s] 93%|█████████▎| 347236/371472 [5:27:47<1:53:38, 3.55it/s] 93%|█████████▎| 347237/371472 [5:27:47<1:54:54, 3.52it/s] 93%|█████████▎| 347238/371472 [5:27:47<1:56:53, 3.46it/s] 93%|█████████▎| 347239/371472 [5:27:47<1:53:10, 3.57it/s] 93%|█████████▎| 347240/371472 [5:27:48<1:50:59, 3.64it/s] {'loss': 2.6956, 'learning_rate': 1.587407614902282e-07, 'epoch': 14.96} + 93%|█████████▎| 347240/371472 [5:27:48<1:50:59, 3.64it/s] 93%|█████████▎| 347241/371472 [5:27:48<1:50:56, 3.64it/s] 93%|█████████▎| 347242/371472 [5:27:48<1:48:55, 3.71it/s] 93%|█████████▎| 347243/371472 [5:27:49<1:47:11, 3.77it/s] 93%|█████████▎| 347244/371472 [5:27:49<1:54:43, 3.52it/s] 93%|█████████▎| 347245/371472 [5:27:49<1:49:30, 3.69it/s] 93%|█████████▎| 347246/371472 [5:27:49<1:48:18, 3.73it/s] 93%|█████████▎| 347247/371472 [5:27:50<1:59:54, 3.37it/s] 93%|█████████▎| 347248/371472 [5:27:50<1:55:36, 3.49it/s] 93%|█████████▎| 347249/371472 [5:27:50<1:52:51, 3.58it/s] 93%|█████████▎| 347250/371472 [5:27:51<1:50:16, 3.66it/s] 93%|█████████▎| 347251/371472 [5:27:51<1:51:44, 3.61it/s] 93%|█████████▎| 347252/371472 [5:27:51<1:55:54, 3.48it/s] 93%|█████████▎| 347253/371472 [5:27:51<1:57:16, 3.44it/s] 93%|█████████▎| 347254/371472 [5:27:52<2:05:03, 3.23it/s] 93%|█████████▎| 347255/371472 [5:27:52<2:00:42, 3.34it/s] 93%|█████████▎| 347256/371472 [5:27:52<2:00:29, 3.35it/s] 93%|█████████▎| 347257/371472 [5:27:53<2:07:53, 3.16it/s] 93%|█████████▎| 347258/371472 [5:27:53<1:59:10, 3.39it/s] 93%|█████████▎| 347259/371472 [5:27:53<2:02:16, 3.30it/s] 93%|█████████▎| 347260/371472 [5:27:54<2:02:06, 3.30it/s] {'loss': 2.6404, 'learning_rate': 1.5869227951474924e-07, 'epoch': 14.96} + 93%|█████████▎| 347260/371472 [5:27:54<2:02:06, 3.30it/s] 93%|█████████▎| 347261/371472 [5:27:54<1:58:13, 3.41it/s] 93%|█████████▎| 347262/371472 [5:27:54<1:56:36, 3.46it/s] 93%|█████████▎| 347263/371472 [5:27:54<1:58:48, 3.40it/s] 93%|█████████▎| 347264/371472 [5:27:55<2:02:32, 3.29it/s] 93%|█████████▎| 347265/371472 [5:27:55<2:09:55, 3.11it/s] 93%|█████████▎| 347266/371472 [5:27:55<2:01:56, 3.31it/s] 93%|█████████▎| 347267/371472 [5:27:56<1:56:27, 3.46it/s] 93%|█████████▎| 347268/371472 [5:27:56<1:55:55, 3.48it/s] 93%|█████████▎| 347269/371472 [5:27:56<1:58:27, 3.41it/s] 93%|█████████▎| 347270/371472 [5:27:56<1:57:02, 3.45it/s] 93%|█████████▎| 347271/371472 [5:27:57<2:02:07, 3.30it/s] 93%|█████████▎| 347272/371472 [5:27:57<2:04:35, 3.24it/s] 93%|█████████▎| 347273/371472 [5:27:57<2:00:56, 3.33it/s] 93%|█████████▎| 347274/371472 [5:27:58<2:01:59, 3.31it/s] 93%|█████████▎| 347275/371472 [5:27:58<1:57:14, 3.44it/s] 93%|█████████▎| 347276/371472 [5:27:58<1:58:27, 3.40it/s] 93%|█████████▎| 347277/371472 [5:27:59<2:06:50, 3.18it/s] 93%|█████████▎| 347278/371472 [5:27:59<2:00:58, 3.33it/s] 93%|█████████▎| 347279/371472 [5:27:59<1:56:36, 3.46it/s] 93%|█████████▎| 347280/371472 [5:27:59<1:56:39, 3.46it/s] {'loss': 2.6398, 'learning_rate': 1.5864379753927041e-07, 'epoch': 14.96} + 93%|█████████▎| 347280/371472 [5:27:59<1:56:39, 3.46it/s] 93%|█████████▎| 347281/371472 [5:28:00<1:56:26, 3.46it/s] 93%|█████████▎| 347282/371472 [5:28:00<1:55:47, 3.48it/s] 93%|█████████▎| 347283/371472 [5:28:00<2:02:01, 3.30it/s] 93%|█████████▎| 347284/371472 [5:28:01<1:54:42, 3.51it/s] 93%|█████████▎| 347285/371472 [5:28:01<1:53:35, 3.55it/s] 93%|█████████▎| 347286/371472 [5:28:01<1:56:53, 3.45it/s] 93%|█████████▎| 347287/371472 [5:28:01<1:54:45, 3.51it/s] 93%|█████████▎| 347288/371472 [5:28:02<2:09:41, 3.11it/s] 93%|█████████▎| 347289/371472 [5:28:02<2:03:18, 3.27it/s] 93%|█████████▎| 347290/371472 [5:28:02<2:00:38, 3.34it/s] 93%|█████████▎| 347291/371472 [5:28:03<2:01:34, 3.32it/s] 93%|█████████▎| 347292/371472 [5:28:03<2:05:52, 3.20it/s] 93%|█████████▎| 347293/371472 [5:28:03<2:02:50, 3.28it/s] 93%|█████████▎| 347294/371472 [5:28:04<1:59:17, 3.38it/s] 93%|█████████▎| 347295/371472 [5:28:04<1:55:55, 3.48it/s] 93%|█████████▎| 347296/371472 [5:28:04<1:54:19, 3.52it/s] 93%|█████████▎| 347297/371472 [5:28:04<1:49:43, 3.67it/s] 93%|█████████▎| 347298/371472 [5:28:05<1:49:40, 3.67it/s] 93%|█████████▎| 347299/371472 [5:28:05<1:46:39, 3.78it/s] 93%|█████████▎| 347300/371472 [5:28:05<1:43:56, 3.88it/s] {'loss': 2.6804, 'learning_rate': 1.5859531556379146e-07, 'epoch': 14.96} + 93%|█████████▎| 347300/371472 [5:28:05<1:43:56, 3.88it/s] 93%|█████████▎| 347301/371472 [5:28:05<1:44:28, 3.86it/s] 93%|█████████▎| 347302/371472 [5:28:06<1:44:28, 3.86it/s] 93%|█████████▎| 347303/371472 [5:28:06<1:54:40, 3.51it/s] 93%|█████████▎| 347304/371472 [5:28:06<1:58:51, 3.39it/s] 93%|█████████▎| 347305/371472 [5:28:07<1:58:02, 3.41it/s] 93%|█████████▎| 347306/371472 [5:28:07<1:56:10, 3.47it/s] 93%|█████████▎| 347307/371472 [5:28:07<1:54:31, 3.52it/s] 93%|█████████▎| 347308/371472 [5:28:08<1:57:42, 3.42it/s] 93%|█████████▎| 347309/371472 [5:28:08<1:56:03, 3.47it/s] 93%|█████████▎| 347310/371472 [5:28:08<1:55:32, 3.49it/s] 93%|█████████▎| 347311/371472 [5:28:08<2:01:15, 3.32it/s] 93%|█████████▎| 347312/371472 [5:28:09<1:55:53, 3.47it/s] 93%|█████████▎| 347313/371472 [5:28:09<1:49:58, 3.66it/s] 93%|█████████▎| 347314/371472 [5:28:09<1:50:07, 3.66it/s] 93%|█████████▎| 347315/371472 [5:28:10<1:52:11, 3.59it/s] 93%|█████████▎| 347316/371472 [5:28:10<1:56:03, 3.47it/s] 93%|█████████▎| 347317/371472 [5:28:10<1:55:48, 3.48it/s] 93%|█████████▎| 347318/371472 [5:28:11<2:09:08, 3.12it/s] 93%|█████████▎| 347319/371472 [5:28:11<2:05:36, 3.20it/s] 93%|█████████▎| 347320/371472 [5:28:11<2:03:09, 3.27it/s] {'loss': 2.6549, 'learning_rate': 1.585468335883126e-07, 'epoch': 14.96} + 93%|█████████▎| 347320/371472 [5:28:11<2:03:09, 3.27it/s] 93%|█████████▎| 347321/371472 [5:28:11<1:59:24, 3.37it/s] 93%|█████████▎| 347322/371472 [5:28:12<1:56:43, 3.45it/s] 93%|█████████▎| 347323/371472 [5:28:12<1:50:44, 3.63it/s] 93%|█████████▎| 347324/371472 [5:28:12<1:50:52, 3.63it/s] 93%|█████████▎| 347325/371472 [5:28:12<1:47:19, 3.75it/s] 93%|█████████▎| 347326/371472 [5:28:13<1:43:54, 3.87it/s] 94%|█████████▎| 347327/371472 [5:28:13<1:46:39, 3.77it/s] 94%|█████████▎| 347328/371472 [5:28:13<1:45:03, 3.83it/s] 94%|█████████▎| 347329/371472 [5:28:13<1:44:17, 3.86it/s] 94%|█████████▎| 347330/371472 [5:28:14<1:47:16, 3.75it/s] 94%|█████████▎| 347331/371472 [5:28:14<1:47:52, 3.73it/s] 94%|█████████▎| 347332/371472 [5:28:14<1:47:14, 3.75it/s] 94%|█████████▎| 347333/371472 [5:28:15<1:50:17, 3.65it/s] 94%|█████████▎| 347334/371472 [5:28:15<1:52:08, 3.59it/s] 94%|█████████▎| 347335/371472 [5:28:15<1:56:36, 3.45it/s] 94%|█████████▎| 347336/371472 [5:28:15<1:53:39, 3.54it/s] 94%|█████████▎| 347337/371472 [5:28:16<2:04:06, 3.24it/s] 94%|█████████▎| 347338/371472 [5:28:16<2:02:27, 3.28it/s] 94%|█████████▎| 347339/371472 [5:28:16<1:57:08, 3.43it/s] 94%|█████████▎| 347340/371472 [5:28:17<1:54:40, 3.51it/s] {'loss': 2.6542, 'learning_rate': 1.5849835161283368e-07, 'epoch': 14.96} + 94%|█████████▎| 347340/371472 [5:28:17<1:54:40, 3.51it/s] 94%|█████████▎| 347341/371472 [5:28:17<1:49:19, 3.68it/s] 94%|█████████▎| 347342/371472 [5:28:17<1:48:10, 3.72it/s] 94%|█████████▎| 347343/371472 [5:28:17<1:53:19, 3.55it/s] 94%|█████████▎| 347344/371472 [5:28:18<1:55:53, 3.47it/s] 94%|█████████▎| 347345/371472 [5:28:18<1:50:59, 3.62it/s] 94%|█████████▎| 347346/371472 [5:28:18<1:49:49, 3.66it/s] 94%|█████████▎| 347347/371472 [5:28:18<1:47:55, 3.73it/s] 94%|█████████▎| 347348/371472 [5:28:19<1:55:55, 3.47it/s] 94%|█████████▎| 347349/371472 [5:28:19<1:50:45, 3.63it/s] 94%|█████████▎| 347350/371472 [5:28:19<1:48:50, 3.69it/s] 94%|█████████▎| 347351/371472 [5:28:20<1:51:20, 3.61it/s] 94%|█████████▎| 347352/371472 [5:28:20<1:47:28, 3.74it/s] 94%|█████████▎| 347353/371472 [5:28:20<1:47:34, 3.74it/s] 94%|█████████▎| 347354/371472 [5:28:20<1:48:28, 3.71it/s] 94%|█████████▎| 347355/371472 [5:28:21<1:46:53, 3.76it/s] 94%|█████████▎| 347356/371472 [5:28:21<1:46:24, 3.78it/s] 94%|█████████▎| 347357/371472 [5:28:21<1:51:43, 3.60it/s] 94%|█████████▎| 347358/371472 [5:28:22<1:56:52, 3.44it/s] 94%|█████████▎| 347359/371472 [5:28:22<1:53:35, 3.54it/s] 94%|█████████▎| 347360/371472 [5:28:22<1:51:06, 3.62it/s] {'loss': 2.5142, 'learning_rate': 1.5844986963735483e-07, 'epoch': 14.96} + 94%|█████████▎| 347360/371472 [5:28:22<1:51:06, 3.62it/s] 94%|█████████▎| 347361/371472 [5:28:22<1:53:40, 3.54it/s] 94%|█████████▎| 347362/371472 [5:28:23<2:07:14, 3.16it/s] 94%|█████████▎| 347363/371472 [5:28:23<1:59:57, 3.35it/s] 94%|█████████▎| 347364/371472 [5:28:23<1:54:31, 3.51it/s] 94%|█████████▎| 347365/371472 [5:28:24<1:51:21, 3.61it/s] 94%|█████████▎| 347366/371472 [5:28:24<1:47:08, 3.75it/s] 94%|█████████▎| 347367/371472 [5:28:24<1:55:29, 3.48it/s] 94%|█████████▎| 347368/371472 [5:28:24<1:54:21, 3.51it/s] 94%|█████████▎| 347369/371472 [5:28:25<1:51:56, 3.59it/s] 94%|█████████▎| 347370/371472 [5:28:25<1:50:25, 3.64it/s] 94%|█████████▎| 347371/371472 [5:28:25<1:52:56, 3.56it/s] 94%|█████████▎| 347372/371472 [5:28:25<1:50:40, 3.63it/s] 94%|█████████▎| 347373/371472 [5:28:26<1:49:16, 3.68it/s] 94%|█████████▎| 347374/371472 [5:28:26<1:50:51, 3.62it/s] 94%|█████████▎| 347375/371472 [5:28:26<1:49:43, 3.66it/s] 94%|█████████▎| 347376/371472 [5:28:27<1:51:56, 3.59it/s] 94%|█████████▎| 347377/371472 [5:28:27<1:54:04, 3.52it/s] 94%|█████████▎| 347378/371472 [5:28:27<1:54:21, 3.51it/s] 94%|█████████▎| 347379/371472 [5:28:27<1:51:05, 3.61it/s] 94%|█████████▎| 347380/371472 [5:28:28<1:47:30, 3.73it/s] {'loss': 2.617, 'learning_rate': 1.584013876618759e-07, 'epoch': 14.96} + 94%|█████████▎| 347380/371472 [5:28:28<1:47:30, 3.73it/s] 94%|█████████▎| 347381/371472 [5:28:28<2:03:29, 3.25it/s] 94%|█████████▎| 347382/371472 [5:28:28<1:59:03, 3.37it/s] 94%|█████████▎| 347383/371472 [5:28:29<1:56:16, 3.45it/s] 94%|█████████▎| 347384/371472 [5:28:29<1:53:53, 3.52it/s] 94%|█████████▎| 347385/371472 [5:28:29<1:52:42, 3.56it/s] 94%|█████████▎| 347386/371472 [5:28:29<1:48:22, 3.70it/s] 94%|█████████▎| 347387/371472 [5:28:30<1:50:14, 3.64it/s] 94%|█████████▎| 347388/371472 [5:28:30<1:53:42, 3.53it/s] 94%|█████████▎| 347389/371472 [5:28:30<1:53:43, 3.53it/s] 94%|█████████▎| 347390/371472 [5:28:31<1:52:16, 3.58it/s] 94%|█████████▎| 347391/371472 [5:28:31<1:50:48, 3.62it/s] 94%|█████████▎| 347392/371472 [5:28:31<1:52:22, 3.57it/s] 94%|█████████▎| 347393/371472 [5:28:31<2:00:10, 3.34it/s] 94%|█████████▎| 347394/371472 [5:28:32<1:56:32, 3.44it/s] 94%|█████████▎| 347395/371472 [5:28:32<1:51:43, 3.59it/s] 94%|█████████▎| 347396/371472 [5:28:32<2:02:58, 3.26it/s] 94%|█████████▎| 347397/371472 [5:28:33<2:03:31, 3.25it/s] 94%|█████████▎| 347398/371472 [5:28:33<1:57:37, 3.41it/s] 94%|█████████▎| 347399/371472 [5:28:33<1:55:37, 3.47it/s] 94%|█████████▎| 347400/371472 [5:28:33<1:56:05, 3.46it/s] {'loss': 2.552, 'learning_rate': 1.5835290568639705e-07, 'epoch': 14.96} + 94%|█████████▎| 347400/371472 [5:28:33<1:56:05, 3.46it/s] 94%|█████████▎| 347401/371472 [5:28:34<1:52:35, 3.56it/s] 94%|█████████▎| 347402/371472 [5:28:34<2:00:37, 3.33it/s] 94%|█████████▎| 347403/371472 [5:28:34<1:58:01, 3.40it/s] 94%|█████████▎| 347404/371472 [5:28:35<2:04:07, 3.23it/s] 94%|█████████▎| 347405/371472 [5:28:35<1:59:18, 3.36it/s] 94%|█████████▎| 347406/371472 [5:28:35<1:56:29, 3.44it/s] 94%|█████████▎| 347407/371472 [5:28:36<1:58:28, 3.39it/s] 94%|█████████▎| 347408/371472 [5:28:36<1:59:58, 3.34it/s] 94%|█████████▎| 347409/371472 [5:28:36<1:59:20, 3.36it/s] 94%|█████████▎| 347410/371472 [5:28:36<1:55:15, 3.48it/s] 94%|█████████▎| 347411/371472 [5:28:37<1:54:05, 3.51it/s] 94%|█████████▎| 347412/371472 [5:28:37<1:48:50, 3.68it/s] 94%|█████████▎| 347413/371472 [5:28:37<1:49:50, 3.65it/s] 94%|█████████▎| 347414/371472 [5:28:38<1:53:28, 3.53it/s] 94%|█████████▎| 347415/371472 [5:28:38<1:58:10, 3.39it/s] 94%|█████████▎| 347416/371472 [5:28:38<1:57:29, 3.41it/s] 94%|█████████▎| 347417/371472 [5:28:38<1:57:46, 3.40it/s] 94%|█████████▎| 347418/371472 [5:28:39<1:59:53, 3.34it/s] 94%|█████████▎| 347419/371472 [5:28:39<2:04:24, 3.22it/s] 94%|█████████▎| 347420/371472 [5:28:39<1:58:26, 3.38it/s] {'loss': 2.5894, 'learning_rate': 1.5830442371091813e-07, 'epoch': 14.96} + 94%|█████████▎| 347420/371472 [5:28:39<1:58:26, 3.38it/s] 94%|█████████▎| 347421/371472 [5:28:40<1:55:18, 3.48it/s] 94%|█████████▎| 347422/371472 [5:28:40<2:01:04, 3.31it/s] 94%|█████████▎| 347423/371472 [5:28:40<1:58:01, 3.40it/s] 94%|█████████▎| 347424/371472 [5:28:41<1:57:53, 3.40it/s] 94%|█████████▎| 347425/371472 [5:28:41<1:58:29, 3.38it/s] 94%|█████████▎| 347426/371472 [5:28:41<1:57:06, 3.42it/s] 94%|█████████▎| 347427/371472 [5:28:41<1:54:44, 3.49it/s] 94%|█████████▎| 347428/371472 [5:28:42<2:00:45, 3.32it/s] 94%|█████████▎| 347429/371472 [5:28:42<1:58:20, 3.39it/s] 94%|█████████▎| 347430/371472 [5:28:42<1:54:43, 3.49it/s] 94%|█████████▎| 347431/371472 [5:28:43<1:50:47, 3.62it/s] 94%|█████████▎| 347432/371472 [5:28:43<1:51:17, 3.60it/s] 94%|█████████▎| 347433/371472 [5:28:43<1:49:30, 3.66it/s] 94%|█████████▎| 347434/371472 [5:28:43<1:54:27, 3.50it/s] 94%|█████████▎| 347435/371472 [5:28:44<1:50:48, 3.62it/s] 94%|█████████▎| 347436/371472 [5:28:44<1:49:26, 3.66it/s] 94%|█████████▎| 347437/371472 [5:28:44<1:47:35, 3.72it/s] 94%|█████████▎| 347438/371472 [5:28:44<1:48:49, 3.68it/s] 94%|█████████▎| 347439/371472 [5:28:45<2:02:26, 3.27it/s] 94%|█████████▎| 347440/371472 [5:28:45<2:01:19, 3.30it/s] {'loss': 2.5318, 'learning_rate': 1.5825594173543928e-07, 'epoch': 14.96} + 94%|█████████▎| 347440/371472 [5:28:45<2:01:19, 3.30it/s] 94%|█████████▎| 347441/371472 [5:28:45<1:58:52, 3.37it/s] 94%|█████████▎| 347442/371472 [5:28:46<1:54:10, 3.51it/s] 94%|█████████▎| 347443/371472 [5:28:46<1:59:25, 3.35it/s] 94%|█████████▎| 347444/371472 [5:28:46<2:07:53, 3.13it/s] 94%|█████████▎| 347445/371472 [5:28:47<2:10:51, 3.06it/s] 94%|█████████▎| 347446/371472 [5:28:47<2:05:15, 3.20it/s] 94%|█████████▎| 347447/371472 [5:28:47<1:59:51, 3.34it/s] 94%|█████████▎| 347448/371472 [5:28:48<1:56:29, 3.44it/s] 94%|█████████▎| 347449/371472 [5:28:48<1:50:46, 3.61it/s] 94%|█████████▎| 347450/371472 [5:28:48<1:51:59, 3.58it/s] 94%|█████████▎| 347451/371472 [5:28:48<1:49:57, 3.64it/s] 94%|█████████▎| 347452/371472 [5:28:49<1:48:33, 3.69it/s] 94%|█████████▎| 347453/371472 [5:28:49<1:47:06, 3.74it/s] 94%|█████████▎| 347454/371472 [5:28:49<1:46:30, 3.76it/s] 94%|█████████▎| 347455/371472 [5:28:49<1:49:00, 3.67it/s] 94%|█████████▎| 347456/371472 [5:28:50<1:48:10, 3.70it/s] 94%|█████████▎| 347457/371472 [5:28:50<1:52:39, 3.55it/s] 94%|█████████▎| 347458/371472 [5:28:50<1:53:40, 3.52it/s] 94%|█████████▎| 347459/371472 [5:28:51<1:55:27, 3.47it/s] 94%|█████████▎| 347460/371472 [5:28:51<1:52:26, 3.56it/s] {'loss': 2.515, 'learning_rate': 1.5820745975996032e-07, 'epoch': 14.97} + 94%|█████████▎| 347460/371472 [5:28:51<1:52:26, 3.56it/s] 94%|█████████▎| 347461/371472 [5:28:51<1:52:07, 3.57it/s] 94%|█████████▎| 347462/371472 [5:28:51<1:50:58, 3.61it/s] 94%|█████████▎| 347463/371472 [5:28:52<1:49:20, 3.66it/s] 94%|█████████▎| 347464/371472 [5:28:52<1:49:27, 3.66it/s] 94%|█████████▎| 347465/371472 [5:28:52<1:49:55, 3.64it/s] 94%|█████████▎| 347466/371472 [5:28:52<1:49:59, 3.64it/s] 94%|█████████▎| 347467/371472 [5:28:53<1:48:59, 3.67it/s] 94%|█████████▎| 347468/371472 [5:28:53<1:46:43, 3.75it/s] 94%|█████████▎| 347469/371472 [5:28:53<1:46:02, 3.77it/s] 94%|█████████▎| 347470/371472 [5:28:54<1:46:57, 3.74it/s] 94%|█████████▎| 347471/371472 [5:28:54<1:45:10, 3.80it/s] 94%|█████████▎| 347472/371472 [5:28:54<1:51:38, 3.58it/s] 94%|█████████▎| 347473/371472 [5:28:54<1:51:53, 3.57it/s] 94%|█████████▎| 347474/371472 [5:28:55<1:52:26, 3.56it/s] 94%|█████████▎| 347475/371472 [5:28:55<1:54:51, 3.48it/s] 94%|█████████▎| 347476/371472 [5:28:55<1:50:58, 3.60it/s] 94%|█████████▎| 347477/371472 [5:28:56<1:53:51, 3.51it/s] 94%|█████████▎| 347478/371472 [5:28:56<1:50:49, 3.61it/s] 94%|█████████▎| 347479/371472 [5:28:56<1:49:45, 3.64it/s] 94%|█████████▎| 347480/371472 [5:28:56<1:54:17, 3.50it/s] {'loss': 2.5222, 'learning_rate': 1.581589777844815e-07, 'epoch': 14.97} + 94%|█████████▎| 347480/371472 [5:28:56<1:54:17, 3.50it/s] 94%|█████████▎| 347481/371472 [5:28:57<1:52:25, 3.56it/s] 94%|█████████▎| 347482/371472 [5:28:57<1:49:45, 3.64it/s] 94%|█████████▎| 347483/371472 [5:28:57<1:48:53, 3.67it/s] 94%|█████████▎| 347484/371472 [5:28:57<1:46:18, 3.76it/s] 94%|█████████▎| 347485/371472 [5:28:58<1:47:44, 3.71it/s] 94%|█████████▎| 347486/371472 [5:28:58<1:50:29, 3.62it/s] 94%|█████████▎| 347487/371472 [5:28:58<1:48:07, 3.70it/s] 94%|█████████▎| 347488/371472 [5:28:58<1:46:16, 3.76it/s] 94%|█████████▎| 347489/371472 [5:28:59<1:53:22, 3.53it/s] 94%|█████████▎| 347490/371472 [5:28:59<2:08:06, 3.12it/s] 94%|█████████▎| 347491/371472 [5:28:59<2:03:28, 3.24it/s] 94%|█████████▎| 347492/371472 [5:29:00<1:58:47, 3.36it/s] 94%|█████████▎| 347493/371472 [5:29:00<1:58:03, 3.39it/s] 94%|█████████▎| 347494/371472 [5:29:00<1:58:50, 3.36it/s] 94%|█████████▎| 347495/371472 [5:29:01<1:57:56, 3.39it/s] 94%|█████████▎| 347496/371472 [5:29:01<1:58:37, 3.37it/s] 94%|█████████▎| 347497/371472 [5:29:01<1:56:42, 3.42it/s] 94%|█████████▎| 347498/371472 [5:29:02<2:01:29, 3.29it/s] 94%|█████████▎| 347499/371472 [5:29:02<1:57:11, 3.41it/s] 94%|█████████▎| 347500/371472 [5:29:02<1:56:00, 3.44it/s] {'loss': 2.633, 'learning_rate': 1.5811049580900254e-07, 'epoch': 14.97} + 94%|█████████▎| 347500/371472 [5:29:02<1:56:00, 3.44it/s] 94%|█████████▎| 347501/371472 [5:29:02<1:59:13, 3.35it/s] 94%|█████████▎| 347502/371472 [5:29:03<1:56:44, 3.42it/s] 94%|█████████▎| 347503/371472 [5:29:03<1:53:54, 3.51it/s] 94%|█████████▎| 347504/371472 [5:29:03<1:57:03, 3.41it/s] 94%|█████████▎| 347505/371472 [5:29:04<1:52:05, 3.56it/s] 94%|█████████▎| 347506/371472 [5:29:04<1:51:01, 3.60it/s] 94%|█████████▎| 347507/371472 [5:29:04<1:47:08, 3.73it/s] 94%|█████████▎| 347508/371472 [5:29:04<1:47:25, 3.72it/s] 94%|█████████▎| 347509/371472 [5:29:05<1:49:25, 3.65it/s] 94%|█████████▎| 347510/371472 [5:29:05<1:48:19, 3.69it/s] 94%|█████████▎| 347511/371472 [5:29:05<1:49:15, 3.66it/s] 94%|█████████▎| 347512/371472 [5:29:05<1:54:36, 3.48it/s] 94%|█████████▎| 347513/371472 [5:29:06<1:50:39, 3.61it/s] 94%|█████████▎| 347514/371472 [5:29:06<1:51:56, 3.57it/s] 94%|█████████▎| 347515/371472 [5:29:06<1:52:26, 3.55it/s] 94%|█████████▎| 347516/371472 [5:29:07<1:48:15, 3.69it/s] 94%|█████████▎| 347517/371472 [5:29:07<1:51:24, 3.58it/s] 94%|█████████▎| 347518/371472 [5:29:07<1:48:50, 3.67it/s] 94%|█████████▎| 347519/371472 [5:29:07<1:49:16, 3.65it/s] 94%|█████████▎| 347520/371472 [5:29:08<1:51:04, 3.59it/s] {'loss': 2.7124, 'learning_rate': 1.580620138335237e-07, 'epoch': 14.97} + 94%|█████████▎| 347520/371472 [5:29:08<1:51:04, 3.59it/s] 94%|█████████▎| 347521/371472 [5:29:08<1:52:25, 3.55it/s] 94%|█████████▎| 347522/371472 [5:29:08<1:57:56, 3.38it/s] 94%|█████████▎| 347523/371472 [5:29:09<1:55:31, 3.46it/s] 94%|█████████▎| 347524/371472 [5:29:09<1:57:17, 3.40it/s] 94%|█████████▎| 347525/371472 [5:29:09<2:02:35, 3.26it/s] 94%|█████████▎| 347526/371472 [5:29:09<1:57:57, 3.38it/s] 94%|█████████▎| 347527/371472 [5:29:10<1:57:05, 3.41it/s] 94%|█████████▎| 347528/371472 [5:29:10<1:58:02, 3.38it/s] 94%|█████████▎| 347529/371472 [5:29:10<1:58:02, 3.38it/s] 94%|█████████▎| 347530/371472 [5:29:11<1:57:23, 3.40it/s] 94%|█████████▎| 347531/371472 [5:29:11<1:57:03, 3.41it/s] 94%|█████████▎| 347532/371472 [5:29:11<1:56:32, 3.42it/s] 94%|█████████▎| 347533/371472 [5:29:11<1:51:06, 3.59it/s] 94%|█████████▎| 347534/371472 [5:29:12<1:53:06, 3.53it/s] 94%|█████████▎| 347535/371472 [5:29:12<1:55:13, 3.46it/s] 94%|█████████▎| 347536/371472 [5:29:12<1:50:08, 3.62it/s] 94%|█████████▎| 347537/371472 [5:29:13<1:49:37, 3.64it/s] 94%|█████████▎| 347538/371472 [5:29:13<1:57:37, 3.39it/s] 94%|█████████▎| 347539/371472 [5:29:13<2:02:30, 3.26it/s] 94%|█████████▎| 347540/371472 [5:29:14<1:58:38, 3.36it/s] {'loss': 2.5157, 'learning_rate': 1.5801353185804477e-07, 'epoch': 14.97} + 94%|█████████▎| 347540/371472 [5:29:14<1:58:38, 3.36it/s] 94%|█████████▎| 347541/371472 [5:29:14<1:56:22, 3.43it/s] 94%|█████████▎| 347542/371472 [5:29:14<1:53:15, 3.52it/s] 94%|█████████▎| 347543/371472 [5:29:14<1:57:54, 3.38it/s] 94%|█████████▎| 347544/371472 [5:29:15<1:53:35, 3.51it/s] 94%|█████████▎| 347545/371472 [5:29:15<1:50:34, 3.61it/s] 94%|█████████▎| 347546/371472 [5:29:15<1:52:47, 3.54it/s] 94%|█████████▎| 347547/371472 [5:29:15<1:49:14, 3.65it/s] 94%|█████████▎| 347548/371472 [5:29:16<1:46:50, 3.73it/s] 94%|█████████▎| 347549/371472 [5:29:16<1:52:42, 3.54it/s] 94%|█████████▎| 347550/371472 [5:29:17<2:29:11, 2.67it/s] 94%|█████████▎| 347551/371472 [5:29:17<2:12:32, 3.01it/s] 94%|█████████▎| 347552/371472 [5:29:17<2:11:17, 3.04it/s] 94%|█████████▎| 347553/371472 [5:29:17<2:01:58, 3.27it/s] 94%|█████████▎| 347554/371472 [5:29:18<2:07:05, 3.14it/s] 94%|█████████▎| 347555/371472 [5:29:18<2:03:06, 3.24it/s] 94%|█████████▎| 347556/371472 [5:29:18<1:55:49, 3.44it/s] 94%|█████████▎| 347557/371472 [5:29:19<1:53:52, 3.50it/s] 94%|█████████▎| 347558/371472 [5:29:19<1:51:52, 3.56it/s] 94%|█████████▎| 347559/371472 [5:29:19<1:47:59, 3.69it/s] 94%|█████████▎| 347560/371472 [5:29:19<1:56:20, 3.43it/s] {'loss': 2.5336, 'learning_rate': 1.5796504988256592e-07, 'epoch': 14.97} + 94%|█████████▎| 347560/371472 [5:29:19<1:56:20, 3.43it/s] 94%|█████████▎| 347561/371472 [5:29:20<1:56:28, 3.42it/s] 94%|█████████▎| 347562/371472 [5:29:20<1:51:24, 3.58it/s] 94%|█████████▎| 347563/371472 [5:29:20<1:53:54, 3.50it/s] 94%|█████████▎| 347564/371472 [5:29:21<1:52:08, 3.55it/s] 94%|█████████▎| 347565/371472 [5:29:21<1:56:04, 3.43it/s] 94%|█████████▎| 347566/371472 [5:29:21<1:52:04, 3.56it/s] 94%|█████████▎| 347567/371472 [5:29:21<1:53:34, 3.51it/s] 94%|█████████▎| 347568/371472 [5:29:22<1:48:58, 3.66it/s] 94%|█████████▎| 347569/371472 [5:29:22<1:48:34, 3.67it/s] 94%|█████████▎| 347570/371472 [5:29:22<1:45:51, 3.76it/s] 94%|█████████▎| 347571/371472 [5:29:22<1:45:02, 3.79it/s] 94%|█████████▎| 347572/371472 [5:29:23<1:45:37, 3.77it/s] 94%|█████████▎| 347573/371472 [5:29:23<1:49:49, 3.63it/s] 94%|█████████▎| 347574/371472 [5:29:23<1:49:45, 3.63it/s] 94%|█████████▎| 347575/371472 [5:29:24<1:50:05, 3.62it/s] 94%|█████████▎| 347576/371472 [5:29:24<1:55:13, 3.46it/s] 94%|█████████▎| 347577/371472 [5:29:24<1:54:08, 3.49it/s] 94%|█████████▎| 347578/371472 [5:29:24<1:52:20, 3.55it/s] 94%|█████████▎| 347579/371472 [5:29:25<1:51:51, 3.56it/s] 94%|█████████▎| 347580/371472 [5:29:25<1:59:35, 3.33it/s] {'loss': 2.7341, 'learning_rate': 1.5791656790708696e-07, 'epoch': 14.97} + 94%|█████████▎| 347580/371472 [5:29:25<1:59:35, 3.33it/s] 94%|█████████▎| 347581/371472 [5:29:25<1:57:59, 3.37it/s] 94%|█████████▎| 347582/371472 [5:29:26<1:57:04, 3.40it/s] 94%|█████████▎| 347583/371472 [5:29:26<1:55:06, 3.46it/s] 94%|█████████▎| 347584/371472 [5:29:26<1:54:58, 3.46it/s] 94%|█████████▎| 347585/371472 [5:29:26<1:51:29, 3.57it/s] 94%|█████████▎| 347586/371472 [5:29:27<1:48:27, 3.67it/s] 94%|█████████▎| 347587/371472 [5:29:27<1:46:24, 3.74it/s] 94%|█████████▎| 347588/371472 [5:29:27<1:49:05, 3.65it/s] 94%|█████████▎| 347589/371472 [5:29:28<1:49:02, 3.65it/s] 94%|█████████▎| 347590/371472 [5:29:28<1:47:49, 3.69it/s] 94%|█████████▎| 347591/371472 [5:29:28<1:54:04, 3.49it/s] 94%|█████████▎| 347592/371472 [5:29:28<1:50:43, 3.59it/s] 94%|█████████▎| 347593/371472 [5:29:29<1:51:09, 3.58it/s] 94%|█████████▎| 347594/371472 [5:29:29<1:53:14, 3.51it/s] 94%|█████████▎| 347595/371472 [5:29:29<1:51:47, 3.56it/s] 94%|█████████▎| 347596/371472 [5:29:30<1:51:43, 3.56it/s] 94%|█████████▎| 347597/371472 [5:29:30<1:50:13, 3.61it/s] 94%|█████████▎| 347598/371472 [5:29:30<1:56:13, 3.42it/s] 94%|█████████▎| 347599/371472 [5:29:30<1:52:40, 3.53it/s] 94%|█████████▎| 347600/371472 [5:29:31<1:47:52, 3.69it/s] {'loss': 2.5821, 'learning_rate': 1.5786808593160814e-07, 'epoch': 14.97} + 94%|█████████▎| 347600/371472 [5:29:31<1:47:52, 3.69it/s] 94%|█████████▎| 347601/371472 [5:29:31<1:50:39, 3.60it/s] 94%|█████████▎| 347602/371472 [5:29:31<1:50:57, 3.59it/s] 94%|█████████▎| 347603/371472 [5:29:32<2:03:49, 3.21it/s] 94%|█████████▎| 347604/371472 [5:29:32<1:57:21, 3.39it/s] 94%|█████████▎| 347605/371472 [5:29:32<1:53:32, 3.50it/s] 94%|█████████▎| 347606/371472 [5:29:32<1:52:16, 3.54it/s] 94%|█████████▎| 347607/371472 [5:29:33<1:51:32, 3.57it/s] 94%|█████████▎| 347608/371472 [5:29:33<1:50:05, 3.61it/s] 94%|█████████▎| 347609/371472 [5:29:33<1:53:33, 3.50it/s] 94%|█████████▎| 347610/371472 [5:29:34<1:52:11, 3.54it/s] 94%|█████████▎| 347611/371472 [5:29:34<1:55:06, 3.45it/s] 94%|█████████▎| 347612/371472 [5:29:34<1:52:52, 3.52it/s] 94%|█████████▎| 347613/371472 [5:29:34<1:56:10, 3.42it/s] 94%|█████████▎| 347614/371472 [5:29:35<1:52:05, 3.55it/s] 94%|█████████▎| 347615/371472 [5:29:35<1:50:12, 3.61it/s] 94%|█████████▎| 347616/371472 [5:29:35<1:50:20, 3.60it/s] 94%|█████████▎| 347617/371472 [5:29:36<1:53:21, 3.51it/s] 94%|█████████▎| 347618/371472 [5:29:36<1:52:06, 3.55it/s] 94%|█████████▎| 347619/371472 [5:29:36<1:54:57, 3.46it/s] 94%|█████████▎| 347620/371472 [5:29:36<1:50:12, 3.61it/s] {'loss': 2.4875, 'learning_rate': 1.5781960395612916e-07, 'epoch': 14.97} + 94%|█████████▎| 347620/371472 [5:29:36<1:50:12, 3.61it/s] 94%|█████████▎| 347621/371472 [5:29:37<1:49:49, 3.62it/s] 94%|█████████▎| 347622/371472 [5:29:37<1:54:13, 3.48it/s] 94%|█████████▎| 347623/371472 [5:29:37<1:56:38, 3.41it/s] 94%|█████████▎| 347624/371472 [5:29:37<1:53:35, 3.50it/s] 94%|█████████▎| 347625/371472 [5:29:38<1:49:24, 3.63it/s] 94%|█████████▎| 347626/371472 [5:29:38<1:47:20, 3.70it/s] 94%|█████████▎| 347627/371472 [5:29:38<1:45:54, 3.75it/s] 94%|█████████▎| 347628/371472 [5:29:39<1:49:05, 3.64it/s] 94%|█████████▎| 347629/371472 [5:29:39<1:51:29, 3.56it/s] 94%|█████████▎| 347630/371472 [5:29:39<1:54:55, 3.46it/s] 94%|█████████▎| 347631/371472 [5:29:39<1:51:51, 3.55it/s] 94%|█████████▎| 347632/371472 [5:29:40<1:54:23, 3.47it/s] 94%|█████████▎| 347633/371472 [5:29:40<1:56:41, 3.40it/s] 94%|█████████▎| 347634/371472 [5:29:40<1:56:30, 3.41it/s] 94%|█████████▎| 347635/371472 [5:29:41<1:56:34, 3.41it/s] 94%|███████���█▎| 347636/371472 [5:29:41<1:53:25, 3.50it/s] 94%|█████████▎| 347637/371472 [5:29:41<1:58:54, 3.34it/s] 94%|█████████▎| 347638/371472 [5:29:42<2:00:39, 3.29it/s] 94%|█████████▎| 347639/371472 [5:29:42<1:56:18, 3.42it/s] 94%|█████████▎| 347640/371472 [5:29:42<1:54:54, 3.46it/s] {'loss': 2.4558, 'learning_rate': 1.5777112198065034e-07, 'epoch': 14.97} + 94%|█████████▎| 347640/371472 [5:29:42<1:54:54, 3.46it/s] 94%|█████████▎| 347641/371472 [5:29:42<1:55:06, 3.45it/s] 94%|█████████▎| 347642/371472 [5:29:43<1:52:35, 3.53it/s] 94%|█████████▎| 347643/371472 [5:29:43<1:52:48, 3.52it/s] 94%|█████████▎| 347644/371472 [5:29:43<1:55:24, 3.44it/s] 94%|█████████▎| 347645/371472 [5:29:44<2:02:35, 3.24it/s] 94%|█████████▎| 347646/371472 [5:29:44<1:56:43, 3.40it/s] 94%|█████████▎| 347647/371472 [5:29:44<1:53:16, 3.51it/s] 94%|█████████▎| 347648/371472 [5:29:44<1:55:28, 3.44it/s] 94%|█████████▎| 347649/371472 [5:29:45<1:52:35, 3.53it/s] 94%|█████████▎| 347650/371472 [5:29:45<1:51:43, 3.55it/s] 94%|█████████▎| 347651/371472 [5:29:45<2:01:56, 3.26it/s] 94%|█████████▎| 347652/371472 [5:29:46<1:53:56, 3.48it/s] 94%|█████████▎| 347653/371472 [5:29:46<1:50:54, 3.58it/s] 94%|█████████▎| 347654/371472 [5:29:46<1:49:41, 3.62it/s] 94%|█████████▎| 347655/371472 [5:29:46<1:48:51, 3.65it/s] 94%|█████████▎| 347656/371472 [5:29:47<1:46:29, 3.73it/s] 94%|█████████▎| 347657/371472 [5:29:47<1:47:58, 3.68it/s] 94%|█████████▎| 347658/371472 [5:29:47<1:52:09, 3.54it/s] 94%|█████████▎| 347659/371472 [5:29:47<1:50:19, 3.60it/s] 94%|█████████▎| 347660/371472 [5:29:48<1:50:51, 3.58it/s] {'loss': 2.6138, 'learning_rate': 1.577226400051714e-07, 'epoch': 14.97} + 94%|█████████▎| 347660/371472 [5:29:48<1:50:51, 3.58it/s] 94%|█████████▎| 347661/371472 [5:29:48<1:48:43, 3.65it/s] 94%|█████████▎| 347662/371472 [5:29:48<1:53:28, 3.50it/s] 94%|█████████▎| 347663/371472 [5:29:49<1:54:29, 3.47it/s] 94%|█████████▎| 347664/371472 [5:29:49<1:54:49, 3.46it/s] 94%|█████████▎| 347665/371472 [5:29:49<2:08:25, 3.09it/s] 94%|█████████▎| 347666/371472 [5:29:50<2:02:48, 3.23it/s] 94%|█████████▎| 347667/371472 [5:29:50<1:55:21, 3.44it/s] 94%|█████████▎| 347668/371472 [5:29:50<1:52:43, 3.52it/s] 94%|█████████▎| 347669/371472 [5:29:50<1:50:07, 3.60it/s] 94%|█████████▎| 347670/371472 [5:29:51<1:49:32, 3.62it/s] 94%|█████████▎| 347671/371472 [5:29:51<1:57:55, 3.36it/s] 94%|█████████▎| 347672/371472 [5:29:51<2:04:31, 3.19it/s] 94%|█████████▎| 347673/371472 [5:29:52<2:00:11, 3.30it/s] 94%|█████████▎| 347674/371472 [5:29:52<1:57:15, 3.38it/s] 94%|█████████▎| 347675/371472 [5:29:52<1:51:58, 3.54it/s] 94%|█████████▎| 347676/371472 [5:29:52<1:53:53, 3.48it/s] 94%|█████████▎| 347677/371472 [5:29:53<1:51:04, 3.57it/s] 94%|█████████▎| 347678/371472 [5:29:53<1:57:55, 3.36it/s] 94%|█████████▎| 347679/371472 [5:29:53<1:56:21, 3.41it/s] 94%|█████████▎| 347680/371472 [5:29:54<1:57:00, 3.39it/s] {'loss': 2.6925, 'learning_rate': 1.5767415802969245e-07, 'epoch': 14.98} + 94%|█████████▎| 347680/371472 [5:29:54<1:57:00, 3.39it/s] 94%|█████████▎| 347681/371472 [5:29:54<1:53:39, 3.49it/s] 94%|█████████▎| 347682/371472 [5:29:54<2:02:30, 3.24it/s] 94%|█████████▎| 347683/371472 [5:29:55<1:59:32, 3.32it/s] 94%|█████████▎| 347684/371472 [5:29:55<1:53:48, 3.48it/s] 94%|█████████▎| 347685/371472 [5:29:55<1:52:13, 3.53it/s] 94%|█████████▎| 347686/371472 [5:29:55<2:00:25, 3.29it/s] 94%|█████████▎| 347687/371472 [5:29:56<1:55:09, 3.44it/s] 94%|█████████▎| 347688/371472 [5:29:56<1:54:52, 3.45it/s] 94%|█████████▎| 347689/371472 [5:29:56<1:53:59, 3.48it/s] 94%|█████████▎| 347690/371472 [5:29:57<1:54:31, 3.46it/s] 94%|█████████▎| 347691/371472 [5:29:57<2:01:15, 3.27it/s] 94%|█████████▎| 347692/371472 [5:29:57<1:59:06, 3.33it/s] 94%|█████████▎| 347693/371472 [5:29:57<1:53:06, 3.50it/s] 94%|█████████▎| 347694/371472 [5:29:58<1:54:19, 3.47it/s] 94%|█████████▎| 347695/371472 [5:29:58<2:06:03, 3.14it/s] 94%|█████████▎| 347696/371472 [5:29:58<1:58:08, 3.35it/s] 94%|█████████▎| 347697/371472 [5:29:59<1:58:35, 3.34it/s] 94%|█████████▎| 347698/371472 [5:29:59<1:53:27, 3.49it/s] 94%|█████████▎| 347699/371472 [5:29:59<2:02:31, 3.23it/s] 94%|█████████▎| 347700/371472 [5:30:00<1:57:48, 3.36it/s] {'loss': 2.4736, 'learning_rate': 1.576256760542136e-07, 'epoch': 14.98} + 94%|█████████▎| 347700/371472 [5:30:00<1:57:48, 3.36it/s] 94%|█████████▎| 347701/371472 [5:30:00<1:53:03, 3.50it/s] 94%|█████████▎| 347702/371472 [5:30:00<1:49:28, 3.62it/s] 94%|█████████▎| 347703/371472 [5:30:00<1:46:18, 3.73it/s] 94%|█████████▎| 347704/371472 [5:30:01<1:46:11, 3.73it/s] 94%|█████████▎| 347705/371472 [5:30:01<1:47:03, 3.70it/s] 94%|█████████▎| 347706/371472 [5:30:01<1:46:36, 3.72it/s] 94%|█████████▎| 347707/371472 [5:30:01<1:44:47, 3.78it/s] 94%|█████████▎| 347708/371472 [5:30:02<1:49:52, 3.60it/s] 94%|█████████▎| 347709/371472 [5:30:02<1:47:06, 3.70it/s] 94%|█████████▎| 347710/371472 [5:30:02<1:47:37, 3.68it/s] 94%|█████████▎| 347711/371472 [5:30:03<1:56:46, 3.39it/s] 94%|█████████▎| 347712/371472 [5:30:03<1:56:24, 3.40it/s] 94%|█████████▎| 347713/371472 [5:30:03<1:56:30, 3.40it/s] 94%|█████████▎| 347714/371472 [5:30:03<1:50:31, 3.58it/s] 94%|█████████▎| 347715/371472 [5:30:04<1:49:42, 3.61it/s] 94%|█████████▎| 347716/371472 [5:30:04<1:51:01, 3.57it/s] 94%|█████████▎| 347717/371472 [5:30:04<1:54:02, 3.47it/s] 94%|█████████▎| 347718/371472 [5:30:05<1:53:05, 3.50it/s] 94%|█████████▎| 347719/371472 [5:30:05<1:52:37, 3.51it/s] 94%|█████████▎| 347720/371472 [5:30:05<1:52:31, 3.52it/s] {'loss': 2.7135, 'learning_rate': 1.5757719407873467e-07, 'epoch': 14.98} + 94%|█████████▎| 347720/371472 [5:30:05<1:52:31, 3.52it/s] 94%|█████████▎| 347721/371472 [5:30:05<2:03:01, 3.22it/s] 94%|█████████▎| 347722/371472 [5:30:06<1:59:38, 3.31it/s] 94%|█████████▎| 347723/371472 [5:30:06<2:03:09, 3.21it/s] 94%|█████████▎| 347724/371472 [5:30:06<2:06:00, 3.14it/s] 94%|█████████▎| 347725/371472 [5:30:07<2:01:39, 3.25it/s] 94%|█████████▎| 347726/371472 [5:30:07<1:59:11, 3.32it/s] 94%|█████████▎| 347727/371472 [5:30:07<1:57:14, 3.38it/s] 94%|█████████▎| 347728/371472 [5:30:08<1:56:10, 3.41it/s] 94%|█████████▎| 347729/371472 [5:30:08<1:54:37, 3.45it/s] 94%|█████████▎| 347730/371472 [5:30:08<1:52:06, 3.53it/s] 94%|█████████▎| 347731/371472 [5:30:08<1:50:28, 3.58it/s] 94%|█████████▎| 347732/371472 [5:30:09<1:54:08, 3.47it/s] 94%|█████████▎| 347733/371472 [5:30:09<1:52:44, 3.51it/s] 94%|█████████▎| 347734/371472 [5:30:09<1:58:04, 3.35it/s] 94%|█████████▎| 347735/371472 [5:30:10<1:52:33, 3.51it/s] 94%|█████████▎| 347736/371472 [5:30:10<2:01:03, 3.27it/s] 94%|█████████▎| 347737/371472 [5:30:10<1:56:48, 3.39it/s] 94%|█████████▎| 347738/371472 [5:30:10<1:49:37, 3.61it/s] 94%|█████████▎| 347739/371472 [5:30:11<1:48:15, 3.65it/s] 94%|█████████▎| 347740/371472 [5:30:11<1:55:21, 3.43it/s] {'loss': 2.5614, 'learning_rate': 1.5752871210325583e-07, 'epoch': 14.98} + 94%|█████████▎| 347740/371472 [5:30:11<1:55:21, 3.43it/s] 94%|█████████▎| 347741/371472 [5:30:11<2:00:21, 3.29it/s] 94%|█████████▎| 347742/371472 [5:30:12<1:55:53, 3.41it/s] 94%|█████████▎| 347743/371472 [5:30:12<1:55:07, 3.44it/s] 94%|█████████▎| 347744/371472 [5:30:12<1:51:44, 3.54it/s] 94%|█████████▎| 347745/371472 [5:30:12<1:53:10, 3.49it/s] 94%|█████████▎| 347746/371472 [5:30:13<1:51:06, 3.56it/s] 94%|█████████▎| 347747/371472 [5:30:13<1:51:46, 3.54it/s] 94%|██��██████▎| 347748/371472 [5:30:13<1:51:14, 3.55it/s] 94%|█████████▎| 347749/371472 [5:30:14<1:49:55, 3.60it/s] 94%|█████████▎| 347750/371472 [5:30:14<1:46:10, 3.72it/s] 94%|█████████▎| 347751/371472 [5:30:14<1:45:57, 3.73it/s] 94%|█████████▎| 347752/371472 [5:30:14<1:43:33, 3.82it/s] 94%|█████████▎| 347753/371472 [5:30:15<1:44:54, 3.77it/s] 94%|█████████▎| 347754/371472 [5:30:15<1:41:52, 3.88it/s] 94%|█████████▎| 347755/371472 [5:30:15<1:50:33, 3.58it/s] 94%|█████████▎| 347756/371472 [5:30:15<1:48:50, 3.63it/s] 94%|█████████▎| 347757/371472 [5:30:16<1:52:34, 3.51it/s] 94%|█████████▎| 347758/371472 [5:30:16<1:53:30, 3.48it/s] 94%|█████████▎| 347759/371472 [5:30:16<2:06:12, 3.13it/s] 94%|█████████▎| 347760/371472 [5:30:17<1:57:42, 3.36it/s] {'loss': 2.5618, 'learning_rate': 1.574802301277769e-07, 'epoch': 14.98} + 94%|█████████▎| 347760/371472 [5:30:17<1:57:42, 3.36it/s] 94%|█████████▎| 347761/371472 [5:30:17<1:57:42, 3.36it/s] 94%|█████████▎| 347762/371472 [5:30:17<2:03:56, 3.19it/s] 94%|█████████▎| 347763/371472 [5:30:18<2:13:46, 2.95it/s] 94%|█████████▎| 347764/371472 [5:30:18<2:07:56, 3.09it/s] 94%|█████████▎| 347765/371472 [5:30:18<1:58:38, 3.33it/s] 94%|█████████▎| 347766/371472 [5:30:19<2:00:31, 3.28it/s] 94%|█████████▎| 347767/371472 [5:30:19<1:56:25, 3.39it/s] 94%|█████████▎| 347768/371472 [5:30:19<2:00:29, 3.28it/s] 94%|█████████▎| 347769/371472 [5:30:19<1:59:29, 3.31it/s] 94%|█████████▎| 347770/371472 [5:30:20<1:54:45, 3.44it/s] 94%|█████████▎| 347771/371472 [5:30:20<1:51:51, 3.53it/s] 94%|█████████▎| 347772/371472 [5:30:20<1:55:14, 3.43it/s] 94%|█████████▎| 347773/371472 [5:30:21<1:55:19, 3.43it/s] 94%|█████████▎| 347774/371472 [5:30:21<1:56:09, 3.40it/s] 94%|█████████▎| 347775/371472 [5:30:21<1:51:40, 3.54it/s] 94%|█████████▎| 347776/371472 [5:30:21<1:49:57, 3.59it/s] 94%|█████████▎| 347777/371472 [5:30:22<1:51:06, 3.55it/s] 94%|█████████▎| 347778/371472 [5:30:22<1:47:34, 3.67it/s] 94%|█████████▎| 347779/371472 [5:30:22<1:48:10, 3.65it/s] 94%|█████████▎| 347780/371472 [5:30:23<1:49:33, 3.60it/s] {'loss': 2.5195, 'learning_rate': 1.5743174815229805e-07, 'epoch': 14.98} + 94%|█████████▎| 347780/371472 [5:30:23<1:49:33, 3.60it/s] 94%|█████████▎| 347781/371472 [5:30:23<1:47:11, 3.68it/s] 94%|█████████▎| 347782/371472 [5:30:23<1:58:48, 3.32it/s] 94%|█████████▎| 347783/371472 [5:30:23<1:54:10, 3.46it/s] 94%|█████████▎| 347784/371472 [5:30:24<1:52:32, 3.51it/s] 94%|█████████▎| 347785/371472 [5:30:24<1:47:58, 3.66it/s] 94%|█████████▎| 347786/371472 [5:30:24<1:46:27, 3.71it/s] 94%|█████████▎| 347787/371472 [5:30:24<1:46:10, 3.72it/s] 94%|█████████▎| 347788/371472 [5:30:25<1:42:59, 3.83it/s] 94%|█████████▎| 347789/371472 [5:30:25<1:43:08, 3.83it/s] 94%|█████████▎| 347790/371472 [5:30:25<1:41:53, 3.87it/s] 94%|█████████▎| 347791/371472 [5:30:26<1:44:33, 3.77it/s] 94%|█████████▎| 347792/371472 [5:30:26<1:42:07, 3.86it/s] 94%|█████████▎| 347793/371472 [5:30:26<1:42:04, 3.87it/s] 94%|█████████▎| 347794/371472 [5:30:26<1:43:21, 3.82it/s] 94%|█████████▎| 347795/371472 [5:30:27<1:48:09, 3.65it/s] 94%|█████████▎| 347796/371472 [5:30:27<1:46:50, 3.69it/s] 94%|█████████▎| 347797/371472 [5:30:27<1:50:59, 3.55it/s] 94%|█████████▎| 347798/371472 [5:30:27<1:49:31, 3.60it/s] 94%|█████████▎| 347799/371472 [5:30:28<1:47:04, 3.68it/s] 94%|█████████▎| 347800/371472 [5:30:28<1:51:02, 3.55it/s] {'loss': 2.6897, 'learning_rate': 1.5738326617681912e-07, 'epoch': 14.98} + 94%|█████████▎| 347800/371472 [5:30:28<1:51:02, 3.55it/s] 94%|█████████▎| 347801/371472 [5:30:28<1:50:26, 3.57it/s] 94%|█████████▎| 347802/371472 [5:30:29<1:57:02, 3.37it/s] 94%|█████████▎| 347803/371472 [5:30:29<1:55:08, 3.43it/s] 94%|���████████▎| 347804/371472 [5:30:29<1:51:41, 3.53it/s] 94%|█████████▎| 347805/371472 [5:30:29<1:50:15, 3.58it/s] 94%|█████████▎| 347806/371472 [5:30:30<1:52:48, 3.50it/s] 94%|█████████▎| 347807/371472 [5:30:30<1:48:37, 3.63it/s] 94%|█████████▎| 347808/371472 [5:30:30<1:45:21, 3.74it/s] 94%|█████████▎| 347809/371472 [5:30:30<1:46:19, 3.71it/s] 94%|█████████▎| 347810/371472 [5:30:31<1:44:31, 3.77it/s] 94%|█████████▎| 347811/371472 [5:30:31<1:47:00, 3.69it/s] 94%|█████████▎| 347812/371472 [5:30:31<1:45:34, 3.74it/s] 94%|█████████▎| 347813/371472 [5:30:32<1:50:56, 3.55it/s] 94%|█████████▎| 347814/371472 [5:30:32<1:51:02, 3.55it/s] 94%|█████████▎| 347815/371472 [5:30:32<1:58:52, 3.32it/s] 94%|█████████▎| 347816/371472 [5:30:32<1:51:52, 3.52it/s] 94%|█████████▎| 347817/371472 [5:30:33<1:52:16, 3.51it/s] 94%|█████████▎| 347818/371472 [5:30:33<1:51:30, 3.54it/s] 94%|█████████▎| 347819/371472 [5:30:33<1:52:58, 3.49it/s] 94%|█████████▎| 347820/371472 [5:30:34<1:57:44, 3.35it/s] {'loss': 2.6266, 'learning_rate': 1.5733478420134027e-07, 'epoch': 14.98} + 94%|█████████▎| 347820/371472 [5:30:34<1:57:44, 3.35it/s] 94%|█████████▎| 347821/371472 [5:30:34<1:54:31, 3.44it/s] 94%|█████████▎| 347822/371472 [5:30:34<2:04:20, 3.17it/s] 94%|█████████▎| 347823/371472 [5:30:35<1:58:08, 3.34it/s] 94%|█████████▎| 347824/371472 [5:30:35<1:57:38, 3.35it/s] 94%|█████████▎| 347825/371472 [5:30:35<1:52:44, 3.50it/s] 94%|█████████▎| 347826/371472 [5:30:35<1:49:36, 3.60it/s] 94%|█████████▎| 347827/371472 [5:30:36<1:45:08, 3.75it/s] 94%|█████████▎| 347828/371472 [5:30:36<1:46:46, 3.69it/s] 94%|█████████▎| 347829/371472 [5:30:36<1:48:03, 3.65it/s] 94%|█████████▎| 347830/371472 [5:30:37<2:01:47, 3.24it/s] 94%|█████████▎| 347831/371472 [5:30:37<1:57:25, 3.36it/s] 94%|█████████▎| 347832/371472 [5:30:37<1:55:23, 3.41it/s] 94%|█████████▎| 347833/371472 [5:30:37<1:54:05, 3.45it/s] 94%|█████████▎| 347834/371472 [5:30:38<1:51:47, 3.52it/s] 94%|█████████▎| 347835/371472 [5:30:38<1:53:47, 3.46it/s] 94%|█████████▎| 347836/371472 [5:30:38<2:01:01, 3.25it/s] 94%|█████████▎| 347837/371472 [5:30:39<1:59:47, 3.29it/s] 94%|█████████▎| 347838/371472 [5:30:39<2:00:12, 3.28it/s] 94%|█████████▎| 347839/371472 [5:30:39<2:09:32, 3.04it/s] 94%|█████████▎| 347840/371472 [5:30:40<2:05:43, 3.13it/s] {'loss': 2.5351, 'learning_rate': 1.5728630222586132e-07, 'epoch': 14.98} + 94%|█████████▎| 347840/371472 [5:30:40<2:05:43, 3.13it/s] 94%|█████████▎| 347841/371472 [5:30:40<2:03:50, 3.18it/s] 94%|█████████▎| 347842/371472 [5:30:40<1:56:52, 3.37it/s] 94%|█████████▎| 347843/371472 [5:30:41<2:05:02, 3.15it/s] 94%|█████████▎| 347844/371472 [5:30:41<2:00:40, 3.26it/s] 94%|█████████▎| 347845/371472 [5:30:41<1:57:00, 3.37it/s] 94%|█████████▎| 347846/371472 [5:30:41<1:52:23, 3.50it/s] 94%|█████████▎| 347847/371472 [5:30:42<1:48:34, 3.63it/s] 94%|█████████▎| 347848/371472 [5:30:42<1:49:03, 3.61it/s] 94%|█████████▎| 347849/371472 [5:30:42<1:54:10, 3.45it/s] 94%|█████████▎| 347850/371472 [5:30:42<1:50:43, 3.56it/s] 94%|█████████▎| 347851/371472 [5:30:43<1:47:39, 3.66it/s] 94%|█████████▎| 347852/371472 [5:30:43<1:54:50, 3.43it/s] 94%|█████████▎| 347853/371472 [5:30:43<1:52:38, 3.49it/s] 94%|█████████▎| 347854/371472 [5:30:44<1:51:37, 3.53it/s] 94%|█████████▎| 347855/371472 [5:30:44<1:54:16, 3.44it/s] 94%|█████████▎| 347856/371472 [5:30:44<1:55:32, 3.41it/s] 94%|█████████▎| 347857/371472 [5:30:45<2:01:20, 3.24it/s] 94%|█████████▎| 347858/371472 [5:30:45<1:56:58, 3.36it/s] 94%|█████████▎| 347859/371472 [5:30:45<1:57:50, 3.34it/s] 94%|█████████▎| 347860/371472 [5:30:45<1:57:50, 3.34it/s] {'loss': 2.5871, 'learning_rate': 1.572378202503825e-07, 'epoch': 14.98} + 94%|█████████▎| 347860/371472 [5:30:45<1:57:50, 3.34it/s] 94%|█████████▎| 347861/371472 [5:30:46<1:53:23, 3.47it/s] 94%|█████████▎| 347862/371472 [5:30:46<1:52:43, 3.49it/s] 94%|█████████▎| 347863/371472 [5:30:46<1:55:18, 3.41it/s] 94%|█████████▎| 347864/371472 [5:30:47<1:51:59, 3.51it/s] 94%|█████████▎| 347865/371472 [5:30:47<1:53:54, 3.45it/s] 94%|█████████▎| 347866/371472 [5:30:47<1:59:52, 3.28it/s] 94%|█████████▎| 347867/371472 [5:30:47<1:56:05, 3.39it/s] 94%|█████████▎| 347868/371472 [5:30:48<1:52:06, 3.51it/s] 94%|█████████▎| 347869/371472 [5:30:48<1:47:34, 3.66it/s] 94%|█████████▎| 347870/371472 [5:30:48<1:47:52, 3.65it/s] 94%|█████████▎| 347871/371472 [5:30:49<1:52:38, 3.49it/s] 94%|█████████▎| 347872/371472 [5:30:49<1:50:08, 3.57it/s] 94%|█████████▎| 347873/371472 [5:30:49<1:55:39, 3.40it/s] 94%|█████████▎| 347874/371472 [5:30:49<1:57:17, 3.35it/s] 94%|█████████▎| 347875/371472 [5:30:50<1:55:22, 3.41it/s] 94%|█████████▎| 347876/371472 [5:30:50<1:55:02, 3.42it/s] 94%|█████████▎| 347877/371472 [5:30:50<1:52:15, 3.50it/s] 94%|█████████▎| 347878/371472 [5:30:51<1:49:11, 3.60it/s] 94%|█████████▎| 347879/371472 [5:30:51<1:45:41, 3.72it/s] 94%|█████████▎| 347880/371472 [5:30:51<1:50:43, 3.55it/s] {'loss': 2.5432, 'learning_rate': 1.5718933827490354e-07, 'epoch': 14.98} + 94%|█████████▎| 347880/371472 [5:30:51<1:50:43, 3.55it/s] 94%|█████████▎| 347881/371472 [5:30:51<1:45:30, 3.73it/s] 94%|█████████▎| 347882/371472 [5:30:52<1:46:50, 3.68it/s] 94%|█████████▎| 347883/371472 [5:30:52<1:51:13, 3.53it/s] 94%|█████████▎| 347884/371472 [5:30:52<2:00:39, 3.26it/s] 94%|█████████▎| 347885/371472 [5:30:53<1:54:20, 3.44it/s] 94%|█████████▎| 347886/371472 [5:30:53<1:53:27, 3.46it/s] 94%|█████████▎| 347887/371472 [5:30:53<1:54:53, 3.42it/s] 94%|█████████▎| 347888/371472 [5:30:53<1:51:52, 3.51it/s] 94%|█████████▎| 347889/371472 [5:30:54<2:03:59, 3.17it/s] 94%|█████████▎| 347890/371472 [5:30:54<2:00:06, 3.27it/s] 94%|█████████▎| 347891/371472 [5:30:54<1:56:07, 3.38it/s] 94%|█████████▎| 347892/371472 [5:30:55<2:02:31, 3.21it/s] 94%|█████████▎| 347893/371472 [5:30:55<1:55:16, 3.41it/s] 94%|█████████▎| 347894/371472 [5:30:55<1:50:54, 3.54it/s] 94%|█████████▎| 347895/371472 [5:30:55<1:47:58, 3.64it/s] 94%|█████████▎| 347896/371472 [5:30:56<1:45:28, 3.73it/s] 94%|█████████▎| 347897/371472 [5:30:56<1:48:55, 3.61it/s] 94%|█████████▎| 347898/371472 [5:30:56<1:48:39, 3.62it/s] 94%|█████████▎| 347899/371472 [5:30:57<1:49:40, 3.58it/s] 94%|█████████▎| 347900/371472 [5:30:57<1:57:15, 3.35it/s] {'loss': 2.6277, 'learning_rate': 1.571408562994247e-07, 'epoch': 14.98} + 94%|█████████▎| 347900/371472 [5:30:57<1:57:15, 3.35it/s] 94%|█████████▎| 347901/371472 [5:30:57<1:53:39, 3.46it/s] 94%|█████████▎| 347902/371472 [5:30:57<1:53:25, 3.46it/s] 94%|█████████▎| 347903/371472 [5:30:58<1:53:22, 3.46it/s] 94%|█████████▎| 347904/371472 [5:30:58<1:50:05, 3.57it/s] 94%|█████████▎| 347905/371472 [5:30:58<1:50:17, 3.56it/s] 94%|█████████▎| 347906/371472 [5:30:59<1:49:21, 3.59it/s] 94%|█████████▎| 347907/371472 [5:30:59<1:48:46, 3.61it/s] 94%|█████████▎| 347908/371472 [5:30:59<1:48:07, 3.63it/s] 94%|█████████▎| 347909/371472 [5:30:59<1:46:15, 3.70it/s] 94%|█████████▎| 347910/371472 [5:31:00<1:41:52, 3.85it/s] 94%|█████████▎| 347911/371472 [5:31:00<1:46:09, 3.70it/s] 94%|█████████▎| 347912/371472 [5:31:00<1:44:35, 3.75it/s] 94%|█████████▎| 347913/371472 [5:31:00<1:41:52, 3.85it/s] 94%|█████████▎| 347914/371472 [5:31:01<1:42:59, 3.81it/s] 94%|█████████▎| 347915/371472 [5:31:01<1:46:32, 3.69it/s] 94%|█████████▎| 347916/371472 [5:31:01<1:49:09, 3.60it/s] 94%|█████████▎| 347917/371472 [5:31:02<1:49:44, 3.58it/s] 94%|█████████▎| 347918/371472 [5:31:02<1:53:44, 3.45it/s] 94%|█████████▎| 347919/371472 [5:31:02<1:51:13, 3.53it/s] 94%|█████████▎| 347920/371472 [5:31:02<1:52:31, 3.49it/s] {'loss': 2.5263, 'learning_rate': 1.5709237432394576e-07, 'epoch': 14.99} + 94%|█████████▎| 347920/371472 [5:31:02<1:52:31, 3.49it/s] 94%|█████████▎| 347921/371472 [5:31:03<1:53:35, 3.46it/s] 94%|█████████▎| 347922/371472 [5:31:03<1:55:37, 3.39it/s] 94%|█████████▎| 347923/371472 [5:31:03<1:56:59, 3.35it/s] 94%|█████████▎| 347924/371472 [5:31:04<1:54:15, 3.43it/s] 94%|█████████▎| 347925/371472 [5:31:04<1:54:04, 3.44it/s] 94%|█████████▎| 347926/371472 [5:31:04<1:50:21, 3.56it/s] 94%|█████████▎| 347927/371472 [5:31:04<1:50:47, 3.54it/s] 94%|█████████▎| 347928/371472 [5:31:05<2:01:02, 3.24it/s] 94%|█████████▎| 347929/371472 [5:31:05<1:57:40, 3.33it/s] 94%|█████████▎| 347930/371472 [5:31:05<2:01:36, 3.23it/s] 94%|█████████▎| 347931/371472 [5:31:06<1:57:16, 3.35it/s] 94%|█████████▎| 347932/371472 [5:31:06<1:56:13, 3.38it/s] 94%|█████████▎| 347933/371472 [5:31:06<1:54:41, 3.42it/s] 94%|█████████▎| 347934/371472 [5:31:07<1:48:57, 3.60it/s] 94%|█████████▎| 347935/371472 [5:31:07<1:45:49, 3.71it/s] 94%|█████████▎| 347936/371472 [5:31:07<1:44:43, 3.75it/s] 94%|█████████▎| 347937/371472 [5:31:07<1:47:19, 3.65it/s] 94%|█████████▎| 347938/371472 [5:31:08<1:42:56, 3.81it/s] 94%|█████████▎| 347939/371472 [5:31:08<1:46:55, 3.67it/s] 94%|█████████▎| 347940/371472 [5:31:08<1:42:40, 3.82it/s] {'loss': 2.6921, 'learning_rate': 1.570438923484669e-07, 'epoch': 14.99} + 94%|█████████▎| 347940/371472 [5:31:08<1:42:40, 3.82it/s] 94%|█████████▎| 347941/371472 [5:31:08<1:42:18, 3.83it/s] 94%|█████████▎| 347942/371472 [5:31:09<1:44:24, 3.76it/s] 94%|█████████▎| 347943/371472 [5:31:09<1:44:35, 3.75it/s] 94%|█████████▎| 347944/371472 [5:31:09<1:44:20, 3.76it/s] 94%|█████████▎| 347945/371472 [5:31:09<1:46:59, 3.66it/s] 94%|█████████▎| 347946/371472 [5:31:10<1:44:44, 3.74it/s] 94%|█████████▎| 347947/371472 [5:31:10<1:47:02, 3.66it/s] 94%|█████████▎| 347948/371472 [5:31:10<1:45:15, 3.73it/s] 94%|█████████▎| 347949/371472 [5:31:11<1:48:03, 3.63it/s] 94%|█████████▎| 347950/371472 [5:31:11<1:47:01, 3.66it/s] 94%|█████████▎| 347951/371472 [5:31:11<1:45:05, 3.73it/s] 94%|█████████▎| 347952/371472 [5:31:11<1:44:27, 3.75it/s] 94%|█████████▎| 347953/371472 [5:31:12<1:47:41, 3.64it/s] 94%|█████████▎| 347954/371472 [5:31:12<1:49:44, 3.57it/s] 94%|█████████▎| 347955/371472 [5:31:12<1:48:49, 3.60it/s] 94%|█████████▎| 347956/371472 [5:31:12<1:50:39, 3.54it/s] 94%|█████████▎| 347957/371472 [5:31:13<1:49:17, 3.59it/s] 94%|█████████▎| 347958/371472 [5:31:13<1:54:09, 3.43it/s] 94%|█████████▎| 347959/371472 [5:31:13<1:50:53, 3.53it/s] 94%|█████████▎| 347960/371472 [5:31:14<1:48:57, 3.60it/s] {'loss': 2.7375, 'learning_rate': 1.5699541037298796e-07, 'epoch': 14.99} + 94%|█████████▎| 347960/371472 [5:31:14<1:48:57, 3.60it/s] 94%|█████████▎| 347961/371472 [5:31:14<1:47:14, 3.65it/s] 94%|█████████▎| 347962/371472 [5:31:14<1:44:14, 3.76it/s] 94%|█████████▎| 347963/371472 [5:31:14<1:45:09, 3.73it/s] 94%|█████████▎| 347964/371472 [5:31:15<1:44:27, 3.75it/s] 94%|█████████▎| 347965/371472 [5:31:15<1:41:11, 3.87it/s] 94%|█████████▎| 347966/371472 [5:31:15<1:45:08, 3.73it/s] 94%|█████████▎| 347967/371472 [5:31:15<1:43:25, 3.79it/s] 94%|█████████▎| 347968/371472 [5:31:16<1:45:41, 3.71it/s] 94%|█████████▎| 347969/371472 [5:31:16<1:48:10, 3.62it/s] 94%|█████████▎| 347970/371472 [5:31:16<1:48:17, 3.62it/s] 94%|█████████▎| 347971/371472 [5:31:17<1:46:57, 3.66it/s] 94%|█████████▎| 347972/371472 [5:31:17<1:52:22, 3.49it/s] 94%|█████████▎| 347973/371472 [5:31:17<1:55:06, 3.40it/s] 94%|█████████▎| 347974/371472 [5:31:17<1:52:12, 3.49it/s] 94%|█████████▎| 347975/371472 [5:31:18<1:50:00, 3.56it/s] 94%|█████████▎| 347976/371472 [5:31:18<1:52:27, 3.48it/s] 94%|█████████▎| 347977/371472 [5:31:18<1:51:42, 3.51it/s] 94%|█████████▎| 347978/371472 [5:31:19<1:47:25, 3.64it/s] 94%|█████████▎| 347979/371472 [5:31:19<1:45:27, 3.71it/s] 94%|█████████▎| 347980/371472 [5:31:19<1:48:33, 3.61it/s] {'loss': 2.572, 'learning_rate': 1.5694692839750913e-07, 'epoch': 14.99} + 94%|█████████▎| 347980/371472 [5:31:19<1:48:33, 3.61it/s] 94%|█████████▎| 347981/371472 [5:31:19<1:48:08, 3.62it/s] 94%|█████████▎| 347982/371472 [5:31:20<1:47:19, 3.65it/s] 94%|█████████▎| 347983/371472 [5:31:20<1:48:33, 3.61it/s] 94%|█████████▎| 347984/371472 [5:31:20<1:54:31, 3.42it/s] 94%|█████████▎| 347985/371472 [5:31:21<1:56:32, 3.36it/s] 94%|█████████▎| 347986/371472 [5:31:21<1:52:13, 3.49it/s] 94%|█████████▎| 347987/371472 [5:31:21<1:48:03, 3.62it/s] 94%|█████████▎| 347988/371472 [5:31:21<1:47:17, 3.65it/s] 94%|█████████▎| 347989/371472 [5:31:22<1:45:05, 3.72it/s] 94%|█████████▎| 347990/371472 [5:31:22<1:43:39, 3.78it/s] 94%|█████████▎| 347991/371472 [5:31:22<1:41:26, 3.86it/s] 94%|█████████▎| 347992/371472 [5:31:22<1:47:24, 3.64it/s] 94%|█████████▎| 347993/371472 [5:31:23<1:52:22, 3.48it/s] 94%|█████████▎| 347994/371472 [5:31:23<1:46:26, 3.68it/s] 94%|█████████▎| 347995/371472 [5:31:23<1:42:50, 3.80it/s] 94%|█████████▎| 347996/371472 [5:31:24<1:45:15, 3.72it/s] 94%|█████████▎| 347997/371472 [5:31:24<1:46:19, 3.68it/s] 94%|█████████▎| 347998/371472 [5:31:24<1:49:04, 3.59it/s] 94%|█████████▎| 347999/371472 [5:31:24<1:49:54, 3.56it/s] 94%|█████████▎| 348000/371472 [5:31:25<1:55:25, 3.39it/s] {'loss': 2.6143, 'learning_rate': 1.5689844642203018e-07, 'epoch': 14.99} + 94%|█████████▎| 348000/371472 [5:31:25<1:55:25, 3.39it/s] 94%|█████████▎| 348001/371472 [5:31:25<1:50:30, 3.54it/s] 94%|█████████▎| 348002/371472 [5:31:25<1:53:08, 3.46it/s] 94%|█████████▎| 348003/371472 [5:31:26<1:49:32, 3.57it/s] 94%|█████████▎| 348004/371472 [5:31:26<1:50:47, 3.53it/s] 94%|█████████▎| 348005/371472 [5:31:26<1:51:48, 3.50it/s] 94%|█████████▎| 348006/371472 [5:31:26<1:48:07, 3.62it/s] 94%|█████████▎| 348007/371472 [5:31:27<1:50:57, 3.52it/s] 94%|█████████▎| 348008/371472 [5:31:27<1:50:36, 3.54it/s] 94%|█████████▎| 348009/371472 [5:31:27<1:48:09, 3.62it/s] 94%|█████████▎| 348010/371472 [5:31:27<1:47:29, 3.64it/s] 94%|█████████▎| 348011/371472 [5:31:28<1:47:50, 3.63it/s] 94%|█████████▎| 348012/371472 [5:31:28<1:48:02, 3.62it/s] 94%|█████████▎| 348013/371472 [5:31:28<1:45:19, 3.71it/s] 94%|█████████▎| 348014/371472 [5:31:29<1:49:27, 3.57it/s] 94%|█████████▎| 348015/371472 [5:31:29<1:46:10, 3.68it/s] 94%|█████████▎| 348016/371472 [5:31:29<1:42:16, 3.82it/s] 94%|█████████▎| 348017/371472 [5:31:29<1:40:13, 3.90it/s] 94%|█████████▎| 348018/371472 [5:31:30<1:44:53, 3.73it/s] 94%|█████████▎| 348019/371472 [5:31:30<1:43:40, 3.77it/s] 94%|█████████▎| 348020/371472 [5:31:30<1:40:05, 3.91it/s] {'loss': 2.5568, 'learning_rate': 1.5684996444655133e-07, 'epoch': 14.99} + 94%|█████████▎| 348020/371472 [5:31:30<1:40:05, 3.91it/s] 94%|█████████▎| 348021/371472 [5:31:30<1:42:18, 3.82it/s] 94%|█████████▎| 348022/371472 [5:31:31<1:47:32, 3.63it/s] 94%|█████████▎| 348023/371472 [5:31:31<1:44:31, 3.74it/s] 94%|█████████▎| 348024/371472 [5:31:31<1:43:32, 3.77it/s] 94%|█████████▎| 348025/371472 [5:31:31<1:46:09, 3.68it/s] 94%|█████████▎| 348026/371472 [5:31:32<1:45:58, 3.69it/s] 94%|█████████▎| 348027/371472 [5:31:32<1:42:23, 3.82it/s] 94%|█████████▎| 348028/371472 [5:31:32<1:44:13, 3.75it/s] 94%|█████████▎| 348029/371472 [5:31:33<1:47:47, 3.63it/s] 94%|█████████��| 348030/371472 [5:31:33<1:49:45, 3.56it/s] 94%|█████████▎| 348031/371472 [5:31:33<1:54:19, 3.42it/s] 94%|█████████▎| 348032/371472 [5:31:33<1:57:29, 3.33it/s] 94%|█████████▎| 348033/371472 [5:31:34<1:57:09, 3.33it/s] 94%|█████████▎| 348034/371472 [5:31:34<1:54:43, 3.40it/s] 94%|█████████▎| 348035/371472 [5:31:34<1:54:33, 3.41it/s] 94%|█████████▎| 348036/371472 [5:31:35<1:49:21, 3.57it/s] 94%|█████████▎| 348037/371472 [5:31:35<1:56:28, 3.35it/s] 94%|█████████▎| 348038/371472 [5:31:35<1:54:51, 3.40it/s] 94%|█████████▎| 348039/371472 [5:31:35<1:48:58, 3.58it/s] 94%|█████████▎| 348040/371472 [5:31:36<1:47:03, 3.65it/s] {'loss': 2.4816, 'learning_rate': 1.568014824710724e-07, 'epoch': 14.99} + 94%|█████████▎| 348040/371472 [5:31:36<1:47:03, 3.65it/s] 94%|█████████▎| 348041/371472 [5:31:36<2:03:21, 3.17it/s] 94%|█████████▎| 348042/371472 [5:31:36<1:58:04, 3.31it/s] 94%|█████████▎| 348043/371472 [5:31:37<1:57:35, 3.32it/s] 94%|█████████▎| 348044/371472 [5:31:37<1:52:39, 3.47it/s] 94%|█████████▎| 348045/371472 [5:31:37<1:48:42, 3.59it/s] 94%|█████████▎| 348046/371472 [5:31:38<2:00:12, 3.25it/s] 94%|█████████▎| 348047/371472 [5:31:38<1:56:47, 3.34it/s] 94%|█████████▎| 348048/371472 [5:31:38<1:55:11, 3.39it/s] 94%|█████████▎| 348049/371472 [5:31:38<1:51:42, 3.49it/s] 94%|█████████▎| 348050/371472 [5:31:39<1:47:55, 3.62it/s] 94%|█████████▎| 348051/371472 [5:31:39<1:46:58, 3.65it/s] 94%|█████████▎| 348052/371472 [5:31:39<1:47:24, 3.63it/s] 94%|█████████▎| 348053/371472 [5:31:40<1:50:28, 3.53it/s] 94%|█████████▎| 348054/371472 [5:31:40<1:47:55, 3.62it/s] 94%|█████████▎| 348055/371472 [5:31:40<1:46:21, 3.67it/s] 94%|█████████▎| 348056/371472 [5:31:40<1:41:26, 3.85it/s] 94%|█████████▎| 348057/371472 [5:31:41<1:51:16, 3.51it/s] 94%|█████████▎| 348058/371472 [5:31:41<1:47:24, 3.63it/s] 94%|█████████▎| 348059/371472 [5:31:41<1:51:01, 3.51it/s] 94%|█████████▎| 348060/371472 [5:31:41<1:51:22, 3.50it/s] {'loss': 2.6472, 'learning_rate': 1.5675300049559355e-07, 'epoch': 14.99} + 94%|█████████▎| 348060/371472 [5:31:41<1:51:22, 3.50it/s] 94%|█████████▎| 348061/371472 [5:31:42<1:55:06, 3.39it/s] 94%|█████████▎| 348062/371472 [5:31:42<1:54:08, 3.42it/s] 94%|█████████▎| 348063/371472 [5:31:42<1:49:27, 3.56it/s] 94%|█████████▎| 348064/371472 [5:31:43<1:48:22, 3.60it/s] 94%|█████████▎| 348065/371472 [5:31:43<1:56:35, 3.35it/s] 94%|█████████▎| 348066/371472 [5:31:43<1:50:35, 3.53it/s] 94%|█████████▎| 348067/371472 [5:31:44<1:51:38, 3.49it/s] 94%|█████████▎| 348068/371472 [5:31:44<1:49:56, 3.55it/s] 94%|█████████▎| 348069/371472 [5:31:44<1:52:17, 3.47it/s] 94%|█████████▎| 348070/371472 [5:31:44<1:49:13, 3.57it/s] 94%|█████████▎| 348071/371472 [5:31:45<1:50:52, 3.52it/s] 94%|█████████▎| 348072/371472 [5:31:45<1:51:35, 3.50it/s] 94%|█████████▎| 348073/371472 [5:31:45<1:51:44, 3.49it/s] 94%|█████████▎| 348074/371472 [5:31:45<1:48:32, 3.59it/s] 94%|█████████▎| 348075/371472 [5:31:46<1:45:55, 3.68it/s] 94%|█████████▎| 348076/371472 [5:31:46<1:46:23, 3.67it/s] 94%|█████████▎| 348077/371472 [5:31:46<1:50:04, 3.54it/s] 94%|█████████▎| 348078/371472 [5:31:47<1:59:39, 3.26it/s] 94%|█████████▎| 348079/371472 [5:31:47<2:02:25, 3.18it/s] 94%|█████████▎| 348080/371472 [5:31:47<2:07:06, 3.07it/s] {'loss': 2.524, 'learning_rate': 1.5670451852011462e-07, 'epoch': 14.99} + 94%|█████████▎| 348080/371472 [5:31:47<2:07:06, 3.07it/s] 94%|█████████▎| 348081/371472 [5:31:48<2:07:13, 3.06it/s] 94%|█████████▎| 348082/371472 [5:31:48<2:06:12, 3.09it/s] 94%|█████████▎| 348083/371472 [5:31:48<2:00:55, 3.22it/s] 94%|█████████▎| 348084/371472 [5:31:49<1:58:44, 3.28it/s] 94%|█████████▎| 348085/371472 [5:31:49<1:59:22, 3.27it/s] 94%|███████��█▎| 348086/371472 [5:31:49<1:52:39, 3.46it/s] 94%|█████████▎| 348087/371472 [5:31:49<1:52:07, 3.48it/s] 94%|█████████▎| 348088/371472 [5:31:50<1:51:44, 3.49it/s] 94%|█████████▎| 348089/371472 [5:31:50<1:53:08, 3.44it/s] 94%|█████████▎| 348090/371472 [5:31:50<1:52:25, 3.47it/s] 94%|█████████▎| 348091/371472 [5:31:51<1:50:17, 3.53it/s] 94%|█████████▎| 348092/371472 [5:31:51<1:49:00, 3.57it/s] 94%|█████████▎| 348093/371472 [5:31:51<1:49:54, 3.55it/s] 94%|█████████▎| 348094/371472 [5:31:51<1:47:42, 3.62it/s] 94%|█████████▎| 348095/371472 [5:31:52<1:56:33, 3.34it/s] 94%|█████████▎| 348096/371472 [5:31:52<1:55:47, 3.36it/s] 94%|█████████▎| 348097/371472 [5:31:52<1:58:17, 3.29it/s] 94%|█████████▎| 348098/371472 [5:31:53<1:50:25, 3.53it/s] 94%|█████████▎| 348099/371472 [5:31:53<1:54:47, 3.39it/s] 94%|█████████▎| 348100/371472 [5:31:53<1:47:35, 3.62it/s] {'loss': 2.6025, 'learning_rate': 1.5665603654463577e-07, 'epoch': 14.99} + 94%|█████████▎| 348100/371472 [5:31:53<1:47:35, 3.62it/s] 94%|█████████▎| 348101/371472 [5:31:53<1:46:48, 3.65it/s] 94%|█████████▎| 348102/371472 [5:31:54<1:47:37, 3.62it/s] 94%|█████████▎| 348103/371472 [5:31:54<1:50:55, 3.51it/s] 94%|█████████▎| 348104/371472 [5:31:54<1:45:58, 3.68it/s] 94%|█████████▎| 348105/371472 [5:31:55<1:46:46, 3.65it/s] 94%|█████████▎| 348106/371472 [5:31:55<1:51:54, 3.48it/s] 94%|█████████▎| 348107/371472 [5:31:55<1:48:53, 3.58it/s] 94%|█████████▎| 348108/371472 [5:31:55<1:50:17, 3.53it/s] 94%|█████████▎| 348109/371472 [5:31:56<1:46:53, 3.64it/s] 94%|█████████▎| 348110/371472 [5:31:56<1:49:15, 3.56it/s] 94%|█████████▎| 348111/371472 [5:31:56<1:50:59, 3.51it/s] 94%|█████████▎| 348112/371472 [5:31:56<1:46:12, 3.67it/s] 94%|█████████▎| 348113/371472 [5:31:57<1:44:29, 3.73it/s] 94%|█████████▎| 348114/371472 [5:31:57<1:44:00, 3.74it/s] 94%|█████████▎| 348115/371472 [5:31:57<2:04:07, 3.14it/s] 94%|█████████▎| 348116/371472 [5:31:58<1:58:31, 3.28it/s] 94%|█████████▎| 348117/371472 [5:31:58<1:53:44, 3.42it/s] 94%|█████████▎| 348118/371472 [5:31:58<1:50:12, 3.53it/s] 94%|█████████▎| 348119/371472 [5:31:59<1:52:07, 3.47it/s] 94%|█████████▎| 348120/371472 [5:31:59<1:55:49, 3.36it/s] {'loss': 2.5365, 'learning_rate': 1.5660755456915685e-07, 'epoch': 14.99} + 94%|█████████▎| 348120/371472 [5:31:59<1:55:49, 3.36it/s] 94%|█████████▎| 348121/371472 [5:31:59<1:54:42, 3.39it/s] 94%|█████████▎| 348122/371472 [5:31:59<1:50:13, 3.53it/s] 94%|█████████▎| 348123/371472 [5:32:00<1:46:22, 3.66it/s] 94%|█████████▎| 348124/371472 [5:32:00<1:59:23, 3.26it/s] 94%|█████████▎| 348125/371472 [5:32:00<1:51:32, 3.49it/s] 94%|█████████▎| 348126/371472 [5:32:01<1:54:27, 3.40it/s] 94%|█████████▎| 348127/371472 [5:32:01<1:50:35, 3.52it/s] 94%|█████████▎| 348128/371472 [5:32:01<1:47:29, 3.62it/s] 94%|█████████▎| 348129/371472 [5:32:01<1:46:22, 3.66it/s] 94%|█████████▎| 348130/371472 [5:32:02<1:48:17, 3.59it/s] 94%|█████████▎| 348131/371472 [5:32:02<1:45:00, 3.70it/s] 94%|█████████▎| 348132/371472 [5:32:02<1:42:47, 3.78it/s] 94%|█████████▎| 348133/371472 [5:32:03<1:56:29, 3.34it/s] 94%|█████████▎| 348134/371472 [5:32:03<1:58:08, 3.29it/s] 94%|█████████▎| 348135/371472 [5:32:03<1:52:02, 3.47it/s] 94%|█████████▎| 348136/371472 [5:32:03<1:49:54, 3.54it/s] 94%|█████████▎| 348137/371472 [5:32:04<1:49:13, 3.56it/s] 94%|█████████▎| 348138/371472 [5:32:04<1:54:20, 3.40it/s] 94%|█████████▎| 348139/371472 [5:32:04<1:53:40, 3.42it/s] 94%|█████████▎| 348140/371472 [5:32:05<1:50:21, 3.52it/s] {'loss': 2.5331, 'learning_rate': 1.56559072593678e-07, 'epoch': 15.0} + 94%|█████████▎| 348140/371472 [5:32:05<1:50:21, 3.52it/s] 94%|█████████▎| 348141/371472 [5:32:05<1:49:57, 3.54it/s] 94%|█████████▎| 348142/371472 [5:32:05<1:48:35, 3.58it/s] 94%|█████████▎| 348143/371472 [5:32:05<1:48:40, 3.58it/s] 94%|█████████▎| 348144/371472 [5:32:06<1:46:21, 3.66it/s] 94%|█████████▎| 348145/371472 [5:32:06<1:46:52, 3.64it/s] 94%|█████████▎| 348146/371472 [5:32:06<1:52:19, 3.46it/s] 94%|█████████▎| 348147/371472 [5:32:06<1:50:45, 3.51it/s] 94%|█████████▎| 348148/371472 [5:32:07<1:48:59, 3.57it/s] 94%|█████████▎| 348149/371472 [5:32:07<1:50:59, 3.50it/s] 94%|█████████▎| 348150/371472 [5:32:07<1:55:16, 3.37it/s] 94%|█████████▎| 348151/371472 [5:32:08<1:55:41, 3.36it/s] 94%|█████████▎| 348152/371472 [5:32:08<1:51:26, 3.49it/s] 94%|█████████▎| 348153/371472 [5:32:08<1:49:47, 3.54it/s] 94%|█████████▎| 348154/371472 [5:32:09<1:53:49, 3.41it/s] 94%|█████████▎| 348155/371472 [5:32:09<1:50:39, 3.51it/s] 94%|█████████▎| 348156/371472 [5:32:09<1:57:29, 3.31it/s] 94%|█████████▎| 348157/371472 [5:32:09<1:52:55, 3.44it/s] 94%|█████████▎| 348158/371472 [5:32:10<1:54:41, 3.39it/s] 94%|█████████▎| 348159/371472 [5:32:10<1:54:44, 3.39it/s] 94%|█████████▎| 348160/371472 [5:32:10<1:54:54, 3.38it/s] {'loss': 2.5148, 'learning_rate': 1.5651059061819904e-07, 'epoch': 15.0} + 94%|█████████▎| 348160/371472 [5:32:10<1:54:54, 3.38it/s] 94%|█████████▎| 348161/371472 [5:32:11<1:55:33, 3.36it/s] 94%|█████████▎| 348162/371472 [5:32:11<1:59:06, 3.26it/s] 94%|█████████▎| 348163/371472 [5:32:11<1:55:18, 3.37it/s] 94%|█████████▎| 348164/371472 [5:32:11<1:51:00, 3.50it/s] 94%|█████████▎| 348165/371472 [5:32:12<1:48:02, 3.60it/s] 94%|█████████▎| 348166/371472 [5:32:12<1:53:48, 3.41it/s] 94%|█████████▎| 348167/371472 [5:32:12<2:05:57, 3.08it/s] 94%|█████████▎| 348168/371472 [5:32:13<2:02:45, 3.16it/s] 94%|█████████▎| 348169/371472 [5:32:13<1:55:11, 3.37it/s] 94%|█████████▎| 348170/371472 [5:32:13<1:58:18, 3.28it/s] 94%|█████████▎| 348171/371472 [5:32:14<1:55:02, 3.38it/s] 94%|█████████▎| 348172/371472 [5:32:14<1:51:48, 3.47it/s] 94%|█████████▎| 348173/371472 [5:32:14<1:51:47, 3.47it/s] 94%|█████████▎| 348174/371472 [5:32:14<1:46:45, 3.64it/s] 94%|█████████▎| 348175/371472 [5:32:15<2:05:13, 3.10it/s] 94%|█████████▎| 348176/371472 [5:32:15<1:58:15, 3.28it/s] 94%|█████████▎| 348177/371472 [5:32:15<1:53:25, 3.42it/s] 94%|█████████▎| 348178/371472 [5:32:16<1:48:31, 3.58it/s] 94%|█████████▎| 348179/371472 [5:32:16<2:20:51, 2.76it/s] 94%|█████████▎| 348180/371472 [5:32:16<2:08:20, 3.02it/s] {'loss': 2.5245, 'learning_rate': 1.5646210864272022e-07, 'epoch': 15.0} + 94%|█████████▎| 348180/371472 [5:32:16<2:08:20, 3.02it/s] 94%|█████████▎| 348181/371472 [5:32:17<2:01:21, 3.20it/s] 94%|█████████▎| 348182/371472 [5:32:17<1:55:38, 3.36it/s] 94%|█████████▎| 348183/371472 [5:32:17<1:52:28, 3.45it/s] 94%|█████████▎| 348184/371472 [5:32:17<1:48:13, 3.59it/s] 94%|█████████▎| 348185/371472 [5:32:18<1:47:11, 3.62it/s] 94%|█████████▎| 348186/371472 [5:32:18<1:49:57, 3.53it/s] 94%|█████████▎| 348187/371472 [5:32:18<1:55:00, 3.37it/s] 94%|█████████▎| 348188/371472 [5:32:19<1:51:33, 3.48it/s] 94%|█████████▎| 348189/371472 [5:32:19<2:01:46, 3.19it/s] 94%|█████████▎| 348190/371472 [5:32:19<2:03:15, 3.15it/s] 94%|█████████▎| 348191/371472 [5:32:20<2:02:07, 3.18it/s] 94%|█████████▎| 348192/371472 [5:32:20<1:58:47, 3.27it/s] 94%|█████████▎| 348193/371472 [5:32:20<1:56:39, 3.33it/s] 94%|█████████▎| 348194/371472 [5:32:20<1:49:46, 3.53it/s] 94%|█████████▎| 348195/371472 [5:32:21<1:46:44, 3.63it/s] 94%|█████████▎| 348196/371472 [5:32:21<1:52:55, 3.44it/s] 94%|█████████▎| 348197/371472 [5:32:21<1:54:05, 3.40it/s] 94%|█████████▎| 348198/371472 [5:32:22<1:50:53, 3.50it/s] 94%|█████████▎| 348199/371472 [5:32:22<1:48:17, 3.58it/s] 94%|█████████▎| 348200/371472 [5:32:22<1:44:51, 3.70it/s] {'loss': 2.5548, 'learning_rate': 1.5641362666724126e-07, 'epoch': 15.0} + 94%|█████████▎| 348200/371472 [5:32:22<1:44:51, 3.70it/s] 94%|█████████▎| 348201/371472 [5:32:22<1:55:15, 3.37it/s] 94%|█████████▎| 348202/371472 [5:32:23<1:49:25, 3.54it/s] 94%|█████████▎| 348203/371472 [5:32:23<1:50:28, 3.51it/s] 94%|█████████▎| 348204/371472 [5:32:23<1:48:46, 3.56it/s] 94%|█████████▎| 348205/371472 [5:32:24<1:50:31, 3.51it/s] 94%|█████████▎| 348206/371472 [5:32:24<1:54:50, 3.38it/s] 94%|█████████▎| 348207/371472 [5:32:24<1:53:19, 3.42it/s] 94%|█████████▎| 348208/371472 [5:32:25<1:57:26, 3.30it/s] 94%|█████████▎| 348209/371472 [5:32:25<1:57:54, 3.29it/s] 94%|█████████▎| 348210/371472 [5:32:25<1:58:20, 3.28it/s] 94%|█████████▎| 348211/371472 [5:32:26<2:08:38, 3.01it/s] 94%|█████████▎| 348212/371472 [5:32:26<1:57:53, 3.29it/s] 94%|█████████▎| 348213/371472 [5:32:26<1:51:05, 3.49it/s] 94%|█████████▎| 348214/371472 [5:32:26<1:48:02, 3.59it/s] 94%|█████████▎| 348215/371472 [5:32:27<1:46:23, 3.64it/s] 94%|█████████▎| 348216/371472 [5:32:27<1:48:03, 3.59it/s] 94%|█████████▎| 348217/371472 [5:32:27<1:48:39, 3.57it/s] 94%|█████████▎| 348218/371472 [5:32:27<1:47:47, 3.60it/s] 94%|█████████▎| 348219/371472 [5:32:28<1:45:08, 3.69it/s] 94%|█████████▎| 348220/371472 [5:32:28<1:45:37, 3.67it/s] {'loss': 2.5468, 'learning_rate': 1.563651446917623e-07, 'epoch': 15.0} + 94%|█████████▎| 348220/371472 [5:32:28<1:45:37, 3.67it/s] 94%|█████████▎| 348221/371472 [5:32:28<1:43:34, 3.74it/s] 94%|█████████▎| 348222/371472 [5:32:28<1:42:49, 3.77it/s] 94%|█████████▎| 348223/371472 [5:32:29<1:40:24, 3.86it/s] 94%|█████████▎| 348224/371472 [5:32:29<1:42:44, 3.77it/s] 94%|█████████▎| 348225/371472 [5:32:29<1:51:59, 3.46it/s] 94%|█████████▎| 348226/371472 [5:32:30<1:58:34, 3.27it/s] 94%|█████████▎| 348227/371472 [5:32:30<1:54:18, 3.39it/s] 94%|█████████▎| 348228/371472 [5:32:30<1:49:00, 3.55it/s] 94%|█████████▎| 348229/371472 [5:32:30<1:45:01, 3.69it/s] 94%|█████████▎| 348230/371472 [5:32:31<1:42:04, 3.79it/s] 94%|█████████▎| 348231/371472 [5:32:31<1:43:19, 3.75it/s] 94%|█████████▎| 348232/371472 [5:32:31<1:41:24, 3.82it/s] 94%|█████████▎| 348233/371472 [5:32:32<1:47:26, 3.60it/s] 94%|█████████▎| 348234/371472 [5:32:32<1:46:25, 3.64it/s] 94%|█████████▎| 348235/371472 [5:32:32<1:55:02, 3.37it/s] 94%|█████████▎| 348236/371472 [5:32:33<2:05:50, 3.08it/s] 94%|█████████▎| 348237/371472 [5:32:33<1:58:24, 3.27it/s] 94%|█████████▎| 348238/371472 [5:32:33<1:54:12, 3.39it/s] 94%|█████████▎| 348239/371472 [5:32:33<2:03:06, 3.15it/s] 94%|█████████▎| 348240/371472 [5:32:34<2:00:08, 3.22it/s] {'loss': 2.4548, 'learning_rate': 1.5631666271628349e-07, 'epoch': 15.0} + 94%|█████████▎| 348240/371472 [5:32:34<2:00:08, 3.22it/s] 94%|█████████▎| 348241/371472 [5:32:34<1:57:44, 3.29it/s] 94%|█████████▎| 348242/371472 [5:32:34<2:00:06, 3.22it/s] 94%|█████████▎| 348243/371472 [5:32:35<2:03:59, 3.12it/s] 94%|█████████▎| 348244/371472 [5:32:35<1:56:44, 3.32it/s] 94%|█████████▎| 348245/371472 [5:32:35<1:52:26, 3.44it/s] 94%|█████████▎| 348246/371472 [5:32:35<1:48:54, 3.55it/s] 94%|█████████▎| 348247/371472 [5:32:36<1:45:07, 3.68it/s] 94%|█████████▎| 348248/371472 [5:32:36<1:46:34, 3.63it/s] 94%|█████████▎| 348249/371472 [5:32:36<1:59:27, 3.24it/s] 94%|█████████▎| 348250/371472 [5:32:37<1:58:01, 3.28it/s] 94%|█████████▎| 348251/371472 [5:32:37<1:53:48, 3.40it/s] 94%|█████████▎| 348252/371472 [5:32:37<1:51:04, 3.48it/s] 94%|█████████▎| 348253/371472 [5:32:37<1:51:48, 3.46it/s] 94%|█████████▎| 348254/371472 [5:32:38<1:45:39, 3.66it/s] 94%|█████████▍| 348255/371472 [5:32:38<1:52:40, 3.43it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 94%|█████████▍| 348256/371472 [5:33:04<51:46:25, 8.03s/it] 94%|█████████▍| 348257/371472 [5:33:04<36:52:48, 5.72s/it] 94%|█████████▍| 348258/371472 [5:33:05<26:28:19, 4.11s/it] 94%|█████████▍| 348259/371472 [5:33:05<19:12:29, 2.98s/it] 94%|█████████▍| 348260/371472 [5:33:05<14:03:54, 2.18s/it] {'loss': 2.7289, 'learning_rate': 1.5626818074080453e-07, 'epoch': 15.0} + 94%|█████████▍| 348260/371472 [5:33:05<14:03:54, 2.18s/it] 94%|█████████▍| 348261/371472 [5:33:06<10:36:52, 1.65s/it] 94%|█████████▍| 348262/371472 [5:33:06<8:05:39, 1.26s/it] 94%|█████████▍| 348263/371472 [5:33:07<6:21:04, 1.02it/s] 94%|█████████▍| 348264/371472 [5:33:07<5:08:25, 1.25it/s] 94%|█████████▍| 348265/371472 [5:33:07<4:26:31, 1.45it/s] 94%|█████████▍| 348266/371472 [5:33:08<3:47:12, 1.70it/s] 94%|█████████▍| 348267/371472 [5:33:08<3:15:33, 1.98it/s] 94%|█████████▍| 348268/371472 [5:33:08<2:56:51, 2.19it/s] 94%|█████████▍| 348269/371472 [5:33:09<2:41:38, 2.39it/s] 94%|█████████▍| 348270/371472 [5:33:09<2:31:10, 2.56it/s] 94%|█████████▍| 348271/371472 [5:33:09<2:23:25, 2.70it/s] 94%|█████████▍| 348272/371472 [5:33:10<2:17:29, 2.81it/s] 94%|█████████▍| 348273/371472 [5:33:10<2:09:18, 2.99it/s] 94%|█████████▍| 348274/371472 [5:33:10<2:03:39, 3.13it/s] 94%|█████████▍| 348275/371472 [5:33:11<1:59:24, 3.24it/s] 94%|█████████▍| 348276/371472 [5:33:11<1:51:23, 3.47it/s] 94%|█████████▍| 348277/371472 [5:33:11<1:51:19, 3.47it/s] 94%|█████████▍| 348278/371472 [5:33:11<1:51:21, 3.47it/s] 94%|█████████▍| 348279/371472 [5:33:12<1:56:30, 3.32it/s] 94%|█████████▍| 348280/371472 [5:33:12<1:54:19, 3.38it/s] {'loss': 2.4949, 'learning_rate': 1.5621969876532568e-07, 'epoch': 15.0} + 94%|█████████▍| 348280/371472 [5:33:12<1:54:19, 3.38it/s] 94%|█████████▍| 348281/371472 [5:33:12<1:59:49, 3.23it/s] 94%|█████████▍| 348282/371472 [5:33:13<1:55:58, 3.33it/s] 94%|█████████▍| 348283/371472 [5:33:13<2:00:29, 3.21it/s] 94%|█████████▍| 348284/371472 [5:33:13<1:58:18, 3.27it/s] 94%|█████████▍| 348285/371472 [5:33:13<1:53:30, 3.40it/s] 94%|█████████▍| 348286/371472 [5:33:14<1:54:35, 3.37it/s] 94%|█████████▍| 348287/371472 [5:33:14<1:55:45, 3.34it/s] 94%|█████████▍| 348288/371472 [5:33:14<1:51:34, 3.46it/s] 94%|█████████▍| 348289/371472 [5:33:15<1:53:35, 3.40it/s] 94%|█████████▍| 348290/371472 [5:33:15<1:54:26, 3.38it/s] 94%|█████████▍| 348291/371472 [5:33:15<1:53:09, 3.41it/s] 94%|█████████▍| 348292/371472 [5:33:16<1:50:47, 3.49it/s] 94%|█████████▍| 348293/371472 [5:33:16<1:49:31, 3.53it/s] 94%|█████████▍| 348294/371472 [5:33:16<1:51:56, 3.45it/s] 94%|█████████▍| 348295/371472 [5:33:16<1:51:25, 3.47it/s] 94%|█████████▍| 348296/371472 [5:33:17<1:49:29, 3.53it/s] 94%|█████████▍| 348297/371472 [5:33:17<1:50:19, 3.50it/s] 94%|█████████▍| 348298/371472 [5:33:17<1:50:19, 3.50it/s] 94%|█████████▍| 348299/371472 [5:33:18<1:50:04, 3.51it/s] 94%|█████████▍| 348300/371472 [5:33:18<1:54:06, 3.38it/s] {'loss': 2.537, 'learning_rate': 1.5617121678984675e-07, 'epoch': 15.0} + 94%|█████████▍| 348300/371472 [5:33:18<1:54:06, 3.38it/s] 94%|█████████▍| 348301/371472 [5:33:18<1:55:24, 3.35it/s] 94%|█████████▍| 348302/371472 [5:33:19<2:10:12, 2.97it/s] 94%|█████████▍| 348303/371472 [5:33:19<2:13:13, 2.90it/s] 94%|█████████▍| 348304/371472 [5:33:19<2:08:03, 3.02it/s] 94%|█████████▍| 348305/371472 [5:33:20<2:05:43, 3.07it/s] 94%|█████████▍| 348306/371472 [5:33:20<1:58:27, 3.26it/s] 94%|█████████▍| 348307/371472 [5:33:20<1:57:40, 3.28it/s] 94%|█████████▍| 348308/371472 [5:33:20<1:56:41, 3.31it/s] 94%|█████████▍| 348309/371472 [5:33:21<1:59:08, 3.24it/s] 94%|█████████▍| 348310/371472 [5:33:21<2:04:28, 3.10it/s] 94%|█████████▍| 348311/371472 [5:33:21<2:06:35, 3.05it/s] 94%|█████████▍| 348312/371472 [5:33:22<2:07:52, 3.02it/s] 94%|█████████▍| 348313/371472 [5:33:22<2:03:12, 3.13it/s] 94%|█████████▍| 348314/371472 [5:33:22<2:11:21, 2.94it/s] 94%|█████████▍| 348315/371472 [5:33:23<2:05:46, 3.07it/s] 94%|█████████▍| 348316/371472 [5:33:23<2:05:10, 3.08it/s] 94%|█████████▍| 348317/371472 [5:33:23<2:04:23, 3.10it/s] 94%|█████████▍| 348318/371472 [5:33:24<1:56:18, 3.32it/s] 94%|█████████▍| 348319/371472 [5:33:24<2:02:51, 3.14it/s] 94%|█████████▍| 348320/371472 [5:33:24<2:02:45, 3.14it/s] {'loss': 2.5016, 'learning_rate': 1.561227348143679e-07, 'epoch': 15.0} + 94%|█████████▍| 348320/371472 [5:33:24<2:02:45, 3.14it/s] 94%|█████████▍| 348321/371472 [5:33:25<2:01:23, 3.18it/s] 94%|█████████▍| 348322/371472 [5:33:25<1:53:21, 3.40it/s] 94%|█████████▍| 348323/371472 [5:33:25<1:54:44, 3.36it/s] 94%|█████████▍| 348324/371472 [5:33:25<1:51:47, 3.45it/s] 94%|█████████▍| 348325/371472 [5:33:26<1:50:00, 3.51it/s] 94%|█████████▍| 348326/371472 [5:33:26<1:49:29, 3.52it/s] 94%|█████████▍| 348327/371472 [5:33:26<1:58:55, 3.24it/s] 94%|█████████▍| 348328/371472 [5:33:27<2:00:12, 3.21it/s] 94%|█████████▍| 348329/371472 [5:33:27<1:57:54, 3.27it/s] 94%|█████████▍| 348330/371472 [5:33:27<1:52:35, 3.43it/s] 94%|█████████▍| 348331/371472 [5:33:28<2:00:36, 3.20it/s] 94%|█████████▍| 348332/371472 [5:33:28<1:57:33, 3.28it/s] 94%|█████████▍| 348333/371472 [5:33:28<1:54:47, 3.36it/s] 94%|█████████▍| 348334/371472 [5:33:28<1:51:11, 3.47it/s] 94%|█████████▍| 348335/371472 [5:33:29<1:50:11, 3.50it/s] 94%|█████████▍| 348336/371472 [5:33:29<1:47:10, 3.60it/s] 94%|█████████▍| 348337/371472 [5:33:29<1:48:30, 3.55it/s] 94%|█████████▍| 348338/371472 [5:33:30<1:45:56, 3.64it/s] 94%|█████████▍| 348339/371472 [5:33:30<1:50:06, 3.50it/s] 94%|█████████▍| 348340/371472 [5:33:30<1:57:33, 3.28it/s] {'loss': 2.5126, 'learning_rate': 1.5607425283888895e-07, 'epoch': 15.0} + 94%|█████████▍| 348340/371472 [5:33:30<1:57:33, 3.28it/s] 94%|█████████▍| 348341/371472 [5:33:30<1:55:26, 3.34it/s] 94%|█████████▍| 348342/371472 [5:33:31<2:04:46, 3.09it/s] 94%|█████████▍| 348343/371472 [5:33:31<2:04:33, 3.09it/s] 94%|█████████▍| 348344/371472 [5:33:31<2:05:14, 3.08it/s] 94%|█████████▍| 348345/371472 [5:33:32<2:03:05, 3.13it/s] 94%|█████████▍| 348346/371472 [5:33:32<2:01:19, 3.18it/s] 94%|█████████▍| 348347/371472 [5:33:32<2:03:02, 3.13it/s] 94%|█████████▍| 348348/371472 [5:33:33<1:59:27, 3.23it/s] 94%|█████████▍| 348349/371472 [5:33:33<1:57:55, 3.27it/s] 94%|█████████▍| 348350/371472 [5:33:33<1:53:34, 3.39it/s] 94%|█████████▍| 348351/371472 [5:33:34<1:50:57, 3.47it/s] 94%|█████████▍| 348352/371472 [5:33:34<1:56:07, 3.32it/s] 94%|█████████▍| 348353/371472 [5:33:34<1:51:54, 3.44it/s] 94%|█████████▍| 348354/371472 [5:33:34<1:48:50, 3.54it/s] 94%|█████████▍| 348355/371472 [5:33:35<1:48:52, 3.54it/s] 94%|█████████▍| 348356/371472 [5:33:35<2:04:43, 3.09it/s] 94%|█████████▍| 348357/371472 [5:33:35<2:01:52, 3.16it/s] 94%|█████████▍| 348358/371472 [5:33:36<1:59:10, 3.23it/s] 94%|█████████▍| 348359/371472 [5:33:36<1:56:01, 3.32it/s] 94%|█████████▍| 348360/371472 [5:33:36<1:59:23, 3.23it/s] {'loss': 2.5462, 'learning_rate': 1.5602577086341013e-07, 'epoch': 15.0} + 94%|█████████▍| 348360/371472 [5:33:36<1:59:23, 3.23it/s] 94%|█████████▍| 348361/371472 [5:33:37<1:56:52, 3.30it/s] 94%|█████████▍| 348362/371472 [5:33:37<1:51:45, 3.45it/s] 94%|█████████▍| 348363/371472 [5:33:37<1:59:26, 3.22it/s] 94%|█████████▍| 348364/371472 [5:33:38<1:58:19, 3.25it/s] 94%|█████████▍| 348365/371472 [5:33:38<2:03:09, 3.13it/s] 94%|█████████▍| 348366/371472 [5:33:38<1:59:03, 3.23it/s] 94%|█████████▍| 348367/371472 [5:33:38<1:54:28, 3.36it/s] 94%|█████████▍| 348368/371472 [5:33:39<1:54:11, 3.37it/s] 94%|█████████▍| 348369/371472 [5:33:39<2:03:14, 3.12it/s] 94%|█████████▍| 348370/371472 [5:33:39<1:58:02, 3.26it/s] 94%|█████████▍| 348371/371472 [5:33:40<2:00:51, 3.19it/s] 94%|█████████▍| 348372/371472 [5:33:40<2:04:51, 3.08it/s] 94%|█████████▍| 348373/371472 [5:33:40<1:59:27, 3.22it/s] 94%|█████████▍| 348374/371472 [5:33:41<1:55:19, 3.34it/s] 94%|█████████▍| 348375/371472 [5:33:41<2:06:24, 3.05it/s] 94%|█████████▍| 348376/371472 [5:33:41<2:09:33, 2.97it/s] 94%|█████████▍| 348377/371472 [5:33:42<2:01:21, 3.17it/s] 94%|█████████▍| 348378/371472 [5:33:42<2:01:40, 3.16it/s] 94%|█████████▍| 348379/371472 [5:33:42<1:55:45, 3.32it/s] 94%|█████████▍| 348380/371472 [5:33:43<1:59:22, 3.22it/s] {'loss': 2.632, 'learning_rate': 1.5597728888793117e-07, 'epoch': 15.01} + 94%|█████████▍| 348380/371472 [5:33:43<1:59:22, 3.22it/s] 94%|█████████▍| 348381/371472 [5:33:43<2:09:33, 2.97it/s] 94%|█████████▍| 348382/371472 [5:33:43<2:01:51, 3.16it/s] 94%|█████████▍| 348383/371472 [5:33:43<1:55:53, 3.32it/s] \ No newline at end of file