diff --git "a/run-2024-07-14T08:43:36+00:00.log" "b/run-2024-07-14T08:43:36+00:00.log" --- "a/run-2024-07-14T08:43:36+00:00.log" +++ "b/run-2024-07-14T08:43:36+00:00.log" @@ -2377,4 +2377,1170 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 13%|█▎ | 46560/371472 [3:43:08<27:05:44, 3.33it/s] 13%|█▎ | 46561/371472 [3:43:09<26:39:26, 3.39it/s] 13%|█▎ | 46562/371472 [3:43:09<27:39:14, 3.26it/s] 13%|█▎ | 46563/371472 [3:43:09<27:08:27, 3.33it/s] 13%|█▎ | 46564/371472 [3:43:09<26:12:24, 3.44it/s] 13%|█▎ | 46565/371472 [3:43:10<25:12:13, 3.58it/s] 13%|█▎ | 46566/371472 [3:43:10<25:46:40, 3.50it/s] 13%|█▎ | 46567/371472 [3:43:10<26:13:52, 3.44it/s] 13%|█▎ | 46568/371472 [3:43:11<25:20:59, 3.56it/s] 13%|█▎ | 46569/371472 [3:43:11<24:52:05, 3.63it/s] 13%|█▎ | 46570/371472 [3:43:11<27:31:57, 3.28it/s] 13%|█▎ | 46571/371472 [3:43:12<30:02:16, 3.00it/s] 13%|█▎ | 46572/371472 [3:43:12<29:28:11, 3.06it/s] 13%|█▎ | 46573/371472 [3:43:12<29:32:48, 3.05it/s] 13%|█▎ | 46574/371472 [3:43:13<28:31:09, 3.16it/s] 13%|█▎ | 46575/371472 [3:43:13<27:12:26, 3.32it/s] 13%|█▎ | 46576/371472 [3:43:13<26:49:03, 3.37it/s] 13%|█▎ | 46577/371472 [3:43:13<27:39:49, 3.26it/s] 13%|█▎ | 46578/371472 [3:43:14<26:54:23, 3.35it/s] 13%|█▎ | 46579/371472 [3:43:14<26:36:58, 3.39it/s] 13%|█▎ | 46580/371472 [3:43:14<29:50:52, 3.02it/s] {'loss': 4.444, 'learning_rate': 8.875702988644444e-07, 'epoch': 2.01} 13%|█▎ | 46580/371472 [3:43:14<29:50:52, 3.02it/s] 13%|█▎ | 46581/371472 [3:43:15<28:37:01, 3.15it/s] 13%|█▎ | 46582/371472 [3:43:15<27:56:57, 3.23it/s] 13%|█▎ | 46583/371472 [3:43:15<27:41:44, 3.26it/s] 13%|█▎ | 46584/371472 [3:43:16<26:14:03, 3.44it/s] 13%|█▎ | 46585/371472 [3:43:16<25:40:09, 3.52it/s] 13%|█▎ | 46586/371472 [3:43:16<25:26:15, 3.55it/s] 13%|█▎ | 46587/371472 [3:43:16<25:08:43, 3.59it/s] 13%|█▎ | 46588/371472 [3:43:17<24:48:44, 3.64it/s] 13%|█▎ | 46589/371472 [3:43:17<25:40:17, 3.52it/s] 13%|█▎ | 46590/371472 [3:43:17<24:48:40, 3.64it/s] 13%|█▎ | 46591/371472 [3:43:18<25:54:48, 3.48it/s] 13%|█▎ | 46592/371472 [3:43:18<25:33:10, 3.53it/s] 13%|█▎ | 46593/371472 [3:43:18<24:51:48, 3.63it/s] 13%|█▎ | 46594/371472 [3:43:18<26:27:21, 3.41it/s] 13%|█▎ | 46595/371472 [3:43:19<26:28:47, 3.41it/s] 13%|█▎ | 46596/371472 [3:43:19<26:14:55, 3.44it/s] 13%|█▎ | 46597/371472 [3:43:19<26:39:29, 3.39it/s] 13%|█▎ | 46598/371472 [3:43:19<25:22:39, 3.56it/s] 13%|█▎ | 46599/371472 [3:43:20<25:16:31, 3.57it/s] 13%|█▎ | 46600/371472 [3:43:20<25:28:57, 3.54it/s] {'loss': 4.2953, 'learning_rate': 8.875218168889654e-07, 'epoch': 2.01} 13%|█▎ | 46600/371472 [3:43:20<25:28:57, 3.54it/s] 13%|█▎ | 46601/371472 [3:43:20<24:48:03, 3.64it/s] 13%|█▎ | 46602/371472 [3:43:21<24:12:59, 3.73it/s] 13%|█▎ | 46603/371472 [3:43:21<24:31:57, 3.68it/s] 13%|█▎ | 46604/371472 [3:43:21<24:37:03, 3.67it/s] 13%|█▎ | 46605/371472 [3:43:21<25:30:28, 3.54it/s] 13%|█▎ | 46606/371472 [3:43:22<24:58:16, 3.61it/s] 13%|█▎ | 46607/371472 [3:43:22<25:39:56, 3.52it/s] 13%|█▎ | 46608/371472 [3:43:22<26:10:02, 3.45it/s] 13%|█▎ | 46609/371472 [3:43:23<26:34:20, 3.40it/s] 13%|█▎ | 46610/371472 [3:43:23<25:36:06, 3.52it/s] 13%|█▎ | 46611/371472 [3:43:23<25:36:22, 3.52it/s] 13%|█▎ | 46612/371472 [3:43:23<24:48:17, 3.64it/s] 13%|█▎ | 46613/371472 [3:43:24<25:54:42, 3.48it/s] 13%|█▎ | 46614/371472 [3:43:24<26:20:33, 3.43it/s] 13%|█▎ | 46615/371472 [3:43:24<25:23:16, 3.55it/s] 13%|█▎ | 46616/371472 [3:43:25<26:16:52, 3.43it/s] 13%|█▎ | 46617/371472 [3:43:25<25:54:54, 3.48it/s] 13%|█▎ | 46618/371472 [3:43:25<24:58:35, 3.61it/s] 13%|█▎ | 46619/371472 [3:43:25<25:20:03, 3.56it/s] 13%|█▎ | 46620/371472 [3:43:26<25:07:15, 3.59it/s] {'loss': 4.1916, 'learning_rate': 8.874733349134865e-07, 'epoch': 2.01} - 13%|█▎ | 46620/371472 [3:43:26<25:07:15, 3.59it/s] 13%|█▎ | 46621/371472 [3:43:26<25:34:06, 3.53it/s] 13%|█▎ | 46622/371472 [3:43:26<26:53:27, 3.36it/s] 13%|█▎ | 46623/371472 [3:43:27<27:35:17, 3.27it/s] 13%|█▎ | 46624/371472 [3:43:27<29:32:07, 3.06it/s] 13%|█▎ | 46625/371472 [3:43:27<30:10:17, 2.99it/s] 13%|█▎ | 46626/371472 [3:43:28<29:36:29, 3.05it/s] 13%|█▎ | 46627/371472 [3:43:28<28:55:25, 3.12it/s] 13%|█▎ | 46628/371472 [3:43:28<28:05:03, 3.21it/s] 13%|█▎ | 46629/371472 [3:43:29<28:02:17, 3.22it/s] 13%|█▎ | 46630/371472 [3:43:29<28:43:44, 3.14it/s] 13%|█▎ | 46631/371472 [3:43:29<27:39:43, 3.26it/s] 13%|█▎ | 46632/371472 [3:43:30<28:40:21, 3.15it/s] 13%|█▎ | 46633/371472 [3:43:30<28:51:34, 3.13it/s] 13%|█▎ | 46634/371472 [3:43:30<29:25:26, 3.07it/s] 13%|█▎ | 46635/371472 [3:43:30<28:13:59, 3.20it/s] 13%|█▎ | 46636/371472 [3:43:31<27:29:00, 3.28it/s] 13%|█▎ | 46637/371472 [3:43:31<26:43:44, 3.38it/s] \ No newline at end of file + 13%|█▎ | 46620/371472 [3:43:26<25:07:15, 3.59it/s] 13%|█▎ | 46621/371472 [3:43:26<25:34:06, 3.53it/s] 13%|█▎ | 46622/371472 [3:43:26<26:53:27, 3.36it/s] 13%|█▎ | 46623/371472 [3:43:27<27:35:17, 3.27it/s] 13%|█▎ | 46624/371472 [3:43:27<29:32:07, 3.06it/s] 13%|█▎ | 46625/371472 [3:43:27<30:10:17, 2.99it/s] 13%|█▎ | 46626/371472 [3:43:28<29:36:29, 3.05it/s] 13%|█▎ | 46627/371472 [3:43:28<28:55:25, 3.12it/s] 13%|█▎ | 46628/371472 [3:43:28<28:05:03, 3.21it/s] 13%|█▎ | 46629/371472 [3:43:29<28:02:17, 3.22it/s] 13%|█▎ | 46630/371472 [3:43:29<28:43:44, 3.14it/s] 13%|█▎ | 46631/371472 [3:43:29<27:39:43, 3.26it/s] 13%|█▎ | 46632/371472 [3:43:30<28:40:21, 3.15it/s] 13%|█▎ | 46633/371472 [3:43:30<28:51:34, 3.13it/s] 13%|█▎ | 46634/371472 [3:43:30<29:25:26, 3.07it/s] 13%|█▎ | 46635/371472 [3:43:30<28:13:59, 3.20it/s] 13%|█▎ | 46636/371472 [3:43:31<27:29:00, 3.28it/s] 13%|█▎ | 46637/371472 [3:43:31<26:43:44, 3.38it/s] 13%|█▎ | 46638/371472 [3:43:31<26:32:36, 3.40it/s] 13%|█▎ | 46639/371472 [3:43:32<26:45:09, 3.37it/s] 13%|█▎ | 46640/371472 [3:43:32<26:05:32, 3.46it/s] {'loss': 4.208, 'learning_rate': 8.874248529380077e-07, 'epoch': 2.01} + 13%|█▎ | 46640/371472 [3:43:32<26:05:32, 3.46it/s] 13%|█▎ | 46641/371472 [3:43:32<26:57:21, 3.35it/s] 13%|█▎ | 46642/371472 [3:43:32<25:40:12, 3.51it/s] 13%|█▎ | 46643/371472 [3:43:33<26:23:28, 3.42it/s] 13%|█▎ | 46644/371472 [3:43:33<25:42:56, 3.51it/s] 13%|█▎ | 46645/371472 [3:43:33<25:10:44, 3.58it/s] 13%|█▎ | 46646/371472 [3:43:34<25:00:04, 3.61it/s] 13%|█▎ | 46647/371472 [3:43:34<26:57:47, 3.35it/s] 13%|█▎ | 46648/371472 [3:43:34<25:58:00, 3.47it/s] 13%|█▎ | 46649/371472 [3:43:34<24:59:11, 3.61it/s] 13%|█▎ | 46650/371472 [3:43:35<25:01:22, 3.61it/s] 13%|█▎ | 46651/371472 [3:43:35<26:26:00, 3.41it/s] 13%|█▎ | 46652/371472 [3:43:35<25:41:31, 3.51it/s] 13%|█▎ | 46653/371472 [3:43:36<25:01:39, 3.61it/s] 13%|█▎ | 46654/371472 [3:43:36<25:54:51, 3.48it/s] 13%|█▎ | 46655/371472 [3:43:36<26:10:34, 3.45it/s] 13%|█▎ | 46656/371472 [3:43:37<26:47:48, 3.37it/s] 13%|█▎ | 46657/371472 [3:43:37<26:46:30, 3.37it/s] 13%|█▎ | 46658/371472 [3:43:37<26:41:15, 3.38it/s] 13%|█▎ | 46659/371472 [3:43:37<25:51:37, 3.49it/s] 13%|█▎ | 46660/371472 [3:43:38<25:26:00, 3.55it/s] {'loss': 4.3798, 'learning_rate': 8.873763709625289e-07, 'epoch': 2.01} + 13%|█▎ | 46660/371472 [3:43:38<25:26:00, 3.55it/s] 13%|█▎ | 46661/371472 [3:43:38<24:53:37, 3.62it/s] 13%|█▎ | 46662/371472 [3:43:38<25:05:37, 3.60it/s] 13%|█▎ | 46663/371472 [3:43:38<24:34:07, 3.67it/s] 13%|█▎ | 46664/371472 [3:43:39<24:27:01, 3.69it/s] 13%|█▎ | 46665/371472 [3:43:39<26:05:48, 3.46it/s] 13%|█▎ | 46666/371472 [3:43:39<26:38:33, 3.39it/s] 13%|█▎ | 46667/371472 [3:43:40<28:14:42, 3.19it/s] 13%|█▎ | 46668/371472 [3:43:40<28:34:46, 3.16it/s] 13%|█▎ | 46669/371472 [3:43:40<29:22:05, 3.07it/s] 13%|█▎ | 46670/371472 [3:43:41<30:38:54, 2.94it/s] 13%|█▎ | 46671/371472 [3:43:41<31:20:40, 2.88it/s] 13%|█▎ | 46672/371472 [3:43:42<32:14:19, 2.80it/s] 13%|█▎ | 46673/371472 [3:43:42<35:17:39, 2.56it/s] 13%|█▎ | 46674/371472 [3:43:42<33:56:16, 2.66it/s] 13%|█▎ | 46675/371472 [3:43:43<32:02:03, 2.82it/s] 13%|█▎ | 46676/371472 [3:43:43<30:35:26, 2.95it/s] 13%|█▎ | 46677/371472 [3:43:43<29:39:19, 3.04it/s] 13%|█▎ | 46678/371472 [3:43:43<28:01:04, 3.22it/s] 13%|█▎ | 46679/371472 [3:43:44<29:03:41, 3.10it/s] 13%|█▎ | 46680/371472 [3:43:44<28:14:26, 3.19it/s] {'loss': 4.2406, 'learning_rate': 8.873278889870499e-07, 'epoch': 2.01} + 13%|█▎ | 46680/371472 [3:43:44<28:14:26, 3.19it/s] 13%|█▎ | 46681/371472 [3:43:44<28:42:58, 3.14it/s] 13%|█▎ | 46682/371472 [3:43:45<27:39:26, 3.26it/s] 13%|█▎ | 46683/371472 [3:43:45<27:49:28, 3.24it/s] 13%|█▎ | 46684/371472 [3:43:45<27:33:18, 3.27it/s] 13%|█▎ | 46685/371472 [3:43:46<25:54:06, 3.48it/s] 13%|█▎ | 46686/371472 [3:43:46<26:12:41, 3.44it/s] 13%|█▎ | 46687/371472 [3:43:46<26:02:31, 3.46it/s] 13%|█▎ | 46688/371472 [3:43:46<26:08:53, 3.45it/s] 13%|█▎ | 46689/371472 [3:43:47<25:52:56, 3.49it/s] 13%|█▎ | 46690/371472 [3:43:47<25:12:46, 3.58it/s] 13%|█▎ | 46691/371472 [3:43:47<25:11:48, 3.58it/s] 13%|█▎ | 46692/371472 [3:43:48<25:13:47, 3.58it/s] 13%|█▎ | 46693/371472 [3:43:48<25:12:12, 3.58it/s] 13%|█▎ | 46694/371472 [3:43:48<25:08:35, 3.59it/s] 13%|█▎ | 46695/371472 [3:43:48<25:03:03, 3.60it/s] 13%|█▎ | 46696/371472 [3:43:49<27:18:26, 3.30it/s] 13%|█▎ | 46697/371472 [3:43:49<26:23:32, 3.42it/s] 13%|█▎ | 46698/371472 [3:43:49<25:26:36, 3.55it/s] 13%|█▎ | 46699/371472 [3:43:50<25:49:22, 3.49it/s] 13%|█▎ | 46700/371472 [3:43:50<27:26:06, 3.29it/s] {'loss': 4.2977, 'learning_rate': 8.87279407011571e-07, 'epoch': 2.01} + 13%|█▎ | 46700/371472 [3:43:50<27:26:06, 3.29it/s] 13%|█▎ | 46701/371472 [3:43:50<29:00:54, 3.11it/s] 13%|█▎ | 46702/371472 [3:43:51<27:07:30, 3.33it/s] 13%|█▎ | 46703/371472 [3:43:51<26:36:08, 3.39it/s] 13%|█▎ | 46704/371472 [3:43:51<26:14:30, 3.44it/s] 13%|█▎ | 46705/371472 [3:43:51<25:22:31, 3.56it/s] 13%|█▎ | 46706/371472 [3:43:52<25:02:17, 3.60it/s] 13%|█▎ | 46707/371472 [3:43:52<24:32:37, 3.68it/s] 13%|█▎ | 46708/371472 [3:43:52<23:46:38, 3.79it/s] 13%|█▎ | 46709/371472 [3:43:52<24:57:48, 3.61it/s] 13%|█▎ | 46710/371472 [3:43:53<26:47:04, 3.37it/s] 13%|█▎ | 46711/371472 [3:43:53<27:08:45, 3.32it/s] 13%|█▎ | 46712/371472 [3:43:53<27:23:58, 3.29it/s] 13%|█▎ | 46713/371472 [3:43:54<26:03:08, 3.46it/s] 13%|█▎ | 46714/371472 [3:43:54<27:34:41, 3.27it/s] 13%|█▎ | 46715/371472 [3:43:54<26:15:01, 3.44it/s] 13%|█▎ | 46716/371472 [3:43:55<26:22:04, 3.42it/s] 13%|█▎ | 46717/371472 [3:43:55<26:30:49, 3.40it/s] 13%|█▎ | 46718/371472 [3:43:55<26:13:47, 3.44it/s] 13%|█▎ | 46719/371472 [3:43:55<25:54:12, 3.48it/s] 13%|█▎ | 46720/371472 [3:43:56<25:06:01, 3.59it/s] {'loss': 4.1866, 'learning_rate': 8.872309250360921e-07, 'epoch': 2.01} + 13%|█▎ | 46720/371472 [3:43:56<25:06:01, 3.59it/s] 13%|█▎ | 46721/371472 [3:43:56<25:35:45, 3.52it/s] 13%|█▎ | 46722/371472 [3:43:56<29:08:52, 3.09it/s] 13%|█▎ | 46723/371472 [3:43:57<28:25:33, 3.17it/s] 13%|█▎ | 46724/371472 [3:43:57<27:00:30, 3.34it/s] 13%|█▎ | 46725/371472 [3:43:57<28:08:39, 3.21it/s] 13%|█▎ | 46726/371472 [3:43:58<28:25:51, 3.17it/s] 13%|█▎ | 46727/371472 [3:43:58<28:10:10, 3.20it/s] 13%|█▎ | 46728/371472 [3:43:58<29:10:13, 3.09it/s] 13%|█▎ | 46729/371472 [3:43:59<27:47:06, 3.25it/s] 13%|█▎ | 46730/371472 [3:43:59<27:09:56, 3.32it/s] 13%|█▎ | 46731/371472 [3:43:59<27:10:18, 3.32it/s] 13%|█▎ | 46732/371472 [3:43:59<26:33:17, 3.40it/s] 13%|█▎ | 46733/371472 [3:44:00<26:22:37, 3.42it/s] 13%|█▎ | 46734/371472 [3:44:00<25:37:10, 3.52it/s] 13%|█▎ | 46735/371472 [3:44:00<26:23:07, 3.42it/s] 13%|█▎ | 46736/371472 [3:44:01<26:20:32, 3.42it/s] 13%|█▎ | 46737/371472 [3:44:01<25:05:50, 3.59it/s] 13%|█▎ | 46738/371472 [3:44:01<24:55:38, 3.62it/s] 13%|█▎ | 46739/371472 [3:44:01<24:59:19, 3.61it/s] 13%|█▎ | 46740/371472 [3:44:02<25:46:41, 3.50it/s] {'loss': 3.9985, 'learning_rate': 8.871824430606132e-07, 'epoch': 2.01} + 13%|█▎ | 46740/371472 [3:44:02<25:46:41, 3.50it/s] 13%|█▎ | 46741/371472 [3:44:02<26:32:39, 3.40it/s] 13%|█▎ | 46742/371472 [3:44:02<25:18:28, 3.56it/s] 13%|█▎ | 46743/371472 [3:44:02<24:45:48, 3.64it/s] 13%|█▎ | 46744/371472 [3:44:03<24:22:14, 3.70it/s] 13%|█▎ | 46745/371472 [3:44:03<25:21:05, 3.56it/s] 13%|█▎ | 46746/371472 [3:44:03<25:31:28, 3.53it/s] 13%|█▎ | 46747/371472 [3:44:04<27:53:54, 3.23it/s] 13%|█▎ | 46748/371472 [3:44:04<26:24:53, 3.41it/s] 13%|█▎ | 46749/371472 [3:44:04<26:08:59, 3.45it/s] 13%|█▎ | 46750/371472 [3:44:05<27:02:40, 3.34it/s] 13%|█▎ | 46751/371472 [3:44:05<26:36:22, 3.39it/s] 13%|█▎ | 46752/371472 [3:44:05<26:59:36, 3.34it/s] 13%|█▎ | 46753/371472 [3:44:05<26:02:57, 3.46it/s] 13%|█▎ | 46754/371472 [3:44:06<25:03:31, 3.60it/s] 13%|█▎ | 46755/371472 [3:44:06<24:41:37, 3.65it/s] 13%|█▎ | 46756/371472 [3:44:06<25:00:56, 3.61it/s] 13%|█▎ | 46757/371472 [3:44:07<26:33:45, 3.40it/s] 13%|█▎ | 46758/371472 [3:44:07<26:09:02, 3.45it/s] 13%|█▎ | 46759/371472 [3:44:07<25:38:43, 3.52it/s] 13%|█▎ | 46760/371472 [3:44:07<24:54:26, 3.62it/s] {'loss': 4.3315, 'learning_rate': 8.871339610851343e-07, 'epoch': 2.01} + 13%|█▎ | 46760/371472 [3:44:07<24:54:26, 3.62it/s] 13%|█▎ | 46761/371472 [3:44:08<25:18:31, 3.56it/s] 13%|█▎ | 46762/371472 [3:44:08<25:39:57, 3.51it/s] 13%|█▎ | 46763/371472 [3:44:08<26:15:18, 3.44it/s] 13%|█▎ | 46764/371472 [3:44:09<26:28:38, 3.41it/s] 13%|█▎ | 46765/371472 [3:44:09<26:27:16, 3.41it/s] 13%|█▎ | 46766/371472 [3:44:09<26:17:10, 3.43it/s] 13%|█▎ | 46767/371472 [3:44:09<27:06:01, 3.33it/s] 13%|█▎ | 46768/371472 [3:44:10<25:57:45, 3.47it/s] 13%|█▎ | 46769/371472 [3:44:10<25:59:13, 3.47it/s] 13%|█▎ | 46770/371472 [3:44:10<25:04:12, 3.60it/s] 13%|█▎ | 46771/371472 [3:44:11<27:40:50, 3.26it/s] 13%|█▎ | 46772/371472 [3:44:11<27:12:08, 3.32it/s] 13%|█▎ | 46773/371472 [3:44:11<25:37:25, 3.52it/s] 13%|█▎ | 46774/371472 [3:44:11<26:19:01, 3.43it/s] 13%|█▎ | 46775/371472 [3:44:12<25:49:14, 3.49it/s] 13%|█▎ | 46776/371472 [3:44:12<27:20:18, 3.30it/s] 13%|█▎ | 46777/371472 [3:44:12<26:18:02, 3.43it/s] 13%|█▎ | 46778/371472 [3:44:13<28:00:31, 3.22it/s] 13%|█▎ | 46779/371472 [3:44:13<26:28:16, 3.41it/s] 13%|█▎ | 46780/371472 [3:44:13<25:18:03, 3.56it/s] {'loss': 4.3568, 'learning_rate': 8.870854791096554e-07, 'epoch': 2.01} + 13%|█▎ | 46780/371472 [3:44:13<25:18:03, 3.56it/s] 13%|█▎ | 46781/371472 [3:44:14<25:03:42, 3.60it/s] 13%|█▎ | 46782/371472 [3:44:14<30:11:31, 2.99it/s] 13%|█▎ | 46783/371472 [3:44:14<27:40:07, 3.26it/s] 13%|█▎ | 46784/371472 [3:44:15<27:19:39, 3.30it/s] 13%|█▎ | 46785/371472 [3:44:15<26:50:19, 3.36it/s] 13%|█▎ | 46786/371472 [3:44:15<27:01:33, 3.34it/s] 13%|█▎ | 46787/371472 [3:44:15<26:23:33, 3.42it/s] 13%|█▎ | 46788/371472 [3:44:16<26:55:50, 3.35it/s] 13%|█▎ | 46789/371472 [3:44:16<25:35:55, 3.52it/s] 13%|█▎ | 46790/371472 [3:44:16<26:04:25, 3.46it/s] 13%|█▎ | 46791/371472 [3:44:16<25:06:04, 3.59it/s] 13%|█▎ | 46792/371472 [3:44:17<27:23:52, 3.29it/s] 13%|█▎ | 46793/371472 [3:44:17<26:42:24, 3.38it/s] 13%|█▎ | 46794/371472 [3:44:17<25:34:06, 3.53it/s] 13%|█▎ | 46795/371472 [3:44:18<26:00:45, 3.47it/s] 13%|█▎ | 46796/371472 [3:44:18<25:14:18, 3.57it/s] 13%|█▎ | 46797/371472 [3:44:18<25:26:53, 3.54it/s] 13%|█▎ | 46798/371472 [3:44:19<25:18:32, 3.56it/s] 13%|█▎ | 46799/371472 [3:44:19<24:42:36, 3.65it/s] 13%|█▎ | 46800/371472 [3:44:19<24:54:41, 3.62it/s] {'loss': 4.4983, 'learning_rate': 8.870369971341766e-07, 'epoch': 2.02} + 13%|█▎ | 46800/371472 [3:44:19<24:54:41, 3.62it/s] 13%|█▎ | 46801/371472 [3:44:19<24:53:55, 3.62it/s] 13%|█▎ | 46802/371472 [3:44:20<25:10:43, 3.58it/s] 13%|█▎ | 46803/371472 [3:44:20<26:30:16, 3.40it/s] 13%|█▎ | 46804/371472 [3:44:20<26:32:12, 3.40it/s] 13%|█▎ | 46805/371472 [3:44:21<28:37:00, 3.15it/s] 13%|█▎ | 46806/371472 [3:44:21<27:05:39, 3.33it/s] 13%|█▎ | 46807/371472 [3:44:21<26:21:07, 3.42it/s] 13%|█▎ | 46808/371472 [3:44:21<26:18:04, 3.43it/s] 13%|█▎ | 46809/371472 [3:44:22<27:42:18, 3.26it/s] 13%|█▎ | 46810/371472 [3:44:22<27:21:54, 3.30it/s] 13%|█▎ | 46811/371472 [3:44:22<27:21:33, 3.30it/s] 13%|█▎ | 46812/371472 [3:44:23<27:19:06, 3.30it/s] 13%|█▎ | 46813/371472 [3:44:23<26:19:10, 3.43it/s] 13%|█▎ | 46814/371472 [3:44:23<25:34:28, 3.53it/s] 13%|█▎ | 46815/371472 [3:44:24<26:08:49, 3.45it/s] 13%|█▎ | 46816/371472 [3:44:24<26:13:06, 3.44it/s] 13%|█▎ | 46817/371472 [3:44:24<28:00:29, 3.22it/s] 13%|█▎ | 46818/371472 [3:44:24<26:43:18, 3.37it/s] 13%|█▎ | 46819/371472 [3:44:25<30:15:26, 2.98it/s] 13%|█▎ | 46820/371472 [3:44:25<28:41:04, 3.14it/s] {'loss': 4.3196, 'learning_rate': 8.869885151586976e-07, 'epoch': 2.02} + 13%|█▎ | 46820/371472 [3:44:25<28:41:04, 3.14it/s] 13%|█▎ | 46821/371472 [3:44:25<27:28:53, 3.28it/s] 13%|█▎ | 46822/371472 [3:44:26<27:21:40, 3.30it/s] 13%|█▎ | 46823/371472 [3:44:26<26:07:33, 3.45it/s] 13%|█▎ | 46824/371472 [3:44:26<27:17:57, 3.30it/s] 13%|█▎ | 46825/371472 [3:44:27<28:38:00, 3.15it/s] 13%|█▎ | 46826/371472 [3:44:27<30:53:34, 2.92it/s] 13%|█▎ | 46827/371472 [3:44:27<31:40:57, 2.85it/s] 13%|█▎ | 46828/371472 [3:44:28<30:11:08, 2.99it/s] 13%|█▎ | 46829/371472 [3:44:28<29:52:07, 3.02it/s] 13%|█▎ | 46830/371472 [3:44:28<29:08:12, 3.10it/s] 13%|█▎ | 46831/371472 [3:44:29<28:05:29, 3.21it/s] 13%|█▎ | 46832/371472 [3:44:29<27:35:33, 3.27it/s] 13%|█▎ | 46833/371472 [3:44:29<27:20:32, 3.30it/s] 13%|█▎ | 46834/371472 [3:44:30<26:58:06, 3.34it/s] 13%|█▎ | 46835/371472 [3:44:30<26:12:32, 3.44it/s] 13%|█▎ | 46836/371472 [3:44:30<26:48:21, 3.36it/s] 13%|█▎ | 46837/371472 [3:44:30<27:18:27, 3.30it/s] 13%|█▎ | 46838/371472 [3:44:31<26:40:59, 3.38it/s] 13%|█▎ | 46839/371472 [3:44:31<26:14:33, 3.44it/s] 13%|█▎ | 46840/371472 [3:44:31<26:30:01, 3.40it/s] {'loss': 4.3064, 'learning_rate': 8.869400331832187e-07, 'epoch': 2.02} + 13%|█▎ | 46840/371472 [3:44:31<26:30:01, 3.40it/s] 13%|█▎ | 46841/371472 [3:44:32<26:52:43, 3.35it/s] 13%|█▎ | 46842/371472 [3:44:32<27:09:13, 3.32it/s] 13%|█▎ | 46843/371472 [3:44:32<27:23:44, 3.29it/s] 13%|█▎ | 46844/371472 [3:44:32<26:45:04, 3.37it/s] 13%|█▎ | 46845/371472 [3:44:33<25:32:31, 3.53it/s] 13%|█▎ | 46846/371472 [3:44:33<24:22:03, 3.70it/s] 13%|█▎ | 46847/371472 [3:44:33<26:08:55, 3.45it/s] 13%|█▎ | 46848/371472 [3:44:34<27:07:05, 3.33it/s] 13%|█▎ | 46849/371472 [3:44:34<25:36:00, 3.52it/s] 13%|█▎ | 46850/371472 [3:44:34<25:20:16, 3.56it/s] 13%|█▎ | 46851/371472 [3:44:34<25:04:49, 3.60it/s] 13%|█▎ | 46852/371472 [3:44:35<24:37:23, 3.66it/s] 13%|█▎ | 46853/371472 [3:44:35<24:33:11, 3.67it/s] 13%|█▎ | 46854/371472 [3:44:35<24:10:55, 3.73it/s] 13%|█▎ | 46855/371472 [3:44:35<24:47:06, 3.64it/s] 13%|█▎ | 46856/371472 [3:44:36<25:01:27, 3.60it/s] 13%|█▎ | 46857/371472 [3:44:36<25:52:07, 3.49it/s] 13%|█▎ | 46858/371472 [3:44:36<27:20:11, 3.30it/s] 13%|█▎ | 46859/371472 [3:44:37<27:16:54, 3.31it/s] 13%|█▎ | 46860/371472 [3:44:37<28:14:42, 3.19it/s] {'loss': 4.2872, 'learning_rate': 8.868915512077398e-07, 'epoch': 2.02} + 13%|█▎ | 46860/371472 [3:44:37<28:14:42, 3.19it/s] 13%|█▎ | 46861/371472 [3:44:37<29:00:49, 3.11it/s] 13%|█▎ | 46862/371472 [3:44:38<30:21:54, 2.97it/s] 13%|█▎ | 46863/371472 [3:44:38<28:32:14, 3.16it/s] 13%|█▎ | 46864/371472 [3:44:38<26:54:47, 3.35it/s] 13%|█▎ | 46865/371472 [3:44:39<27:31:33, 3.28it/s] 13%|█▎ | 46866/371472 [3:44:39<27:36:52, 3.27it/s] 13%|█▎ | 46867/371472 [3:44:39<27:10:35, 3.32it/s] 13%|█▎ | 46868/371472 [3:44:40<28:04:37, 3.21it/s] 13%|█▎ | 46869/371472 [3:44:40<27:14:40, 3.31it/s] 13%|█▎ | 46870/371472 [3:44:40<26:01:39, 3.46it/s] 13%|█▎ | 46871/371472 [3:44:40<25:18:12, 3.56it/s] 13%|█▎ | 46872/371472 [3:44:41<25:28:12, 3.54it/s] 13%|█▎ | 46873/371472 [3:44:41<25:41:58, 3.51it/s] 13%|█▎ | 46874/371472 [3:44:41<26:42:23, 3.38it/s] 13%|█▎ | 46875/371472 [3:44:42<26:45:45, 3.37it/s] 13%|█▎ | 46876/371472 [3:44:42<27:37:06, 3.26it/s] 13%|█▎ | 46877/371472 [3:44:42<26:38:28, 3.38it/s] 13%|█▎ | 46878/371472 [3:44:42<26:46:04, 3.37it/s] 13%|█▎ | 46879/371472 [3:44:43<25:49:09, 3.49it/s] 13%|█▎ | 46880/371472 [3:44:43<25:46:13, 3.50it/s] {'loss': 4.3508, 'learning_rate': 8.86843069232261e-07, 'epoch': 2.02} + 13%|█▎ | 46880/371472 [3:44:43<25:46:13, 3.50it/s] 13%|█▎ | 46881/371472 [3:44:43<26:25:06, 3.41it/s] 13%|█▎ | 46882/371472 [3:44:44<26:19:37, 3.42it/s] 13%|█▎ | 46883/371472 [3:44:44<26:19:35, 3.42it/s] 13%|█▎ | 46884/371472 [3:44:44<26:19:00, 3.43it/s] 13%|█▎ | 46885/371472 [3:44:44<25:42:02, 3.51it/s] 13%|█▎ | 46886/371472 [3:44:45<26:04:16, 3.46it/s] 13%|█▎ | 46887/371472 [3:44:45<25:24:58, 3.55it/s] 13%|█▎ | 46888/371472 [3:44:45<26:36:32, 3.39it/s] 13%|█▎ | 46889/371472 [3:44:46<26:48:30, 3.36it/s] 13%|█▎ | 46890/371472 [3:44:46<29:14:45, 3.08it/s] 13%|█▎ | 46891/371472 [3:44:46<27:58:30, 3.22it/s] 13%|█▎ | 46892/371472 [3:44:47<27:52:49, 3.23it/s] 13%|█▎ | 46893/371472 [3:44:47<27:49:41, 3.24it/s] 13%|█▎ | 46894/371472 [3:44:47<27:51:37, 3.24it/s] 13%|█▎ | 46895/371472 [3:44:48<27:30:22, 3.28it/s] 13%|█▎ | 46896/371472 [3:44:48<27:08:50, 3.32it/s] 13%|█▎ | 46897/371472 [3:44:48<26:17:01, 3.43it/s] 13%|█▎ | 46898/371472 [3:44:48<26:03:50, 3.46it/s] 13%|█▎ | 46899/371472 [3:44:49<27:00:05, 3.34it/s] 13%|█▎ | 46900/371472 [3:44:49<27:50:40, 3.24it/s] {'loss': 4.3117, 'learning_rate': 8.86794587256782e-07, 'epoch': 2.02} + 13%|█▎ | 46900/371472 [3:44:49<27:50:40, 3.24it/s] 13%|█▎ | 46901/371472 [3:44:49<26:19:44, 3.42it/s] 13%|█▎ | 46902/371472 [3:44:50<26:09:57, 3.45it/s] 13%|█▎ | 46903/371472 [3:44:50<26:05:14, 3.46it/s] 13%|█▎ | 46904/371472 [3:44:50<26:15:48, 3.43it/s] 13%|█▎ | 46905/371472 [3:44:50<26:29:40, 3.40it/s] 13%|█▎ | 46906/371472 [3:44:51<26:46:03, 3.37it/s] 13%|█▎ | 46907/371472 [3:44:51<26:41:45, 3.38it/s] 13%|█▎ | 46908/371472 [3:44:51<27:02:39, 3.33it/s] 13%|█▎ | 46909/371472 [3:44:52<25:30:32, 3.53it/s] 13%|█▎ | 46910/371472 [3:44:52<25:16:06, 3.57it/s] 13%|█▎ | 46911/371472 [3:44:52<24:41:32, 3.65it/s] 13%|█▎ | 46912/371472 [3:44:52<24:01:38, 3.75it/s] 13%|█▎ | 46913/371472 [3:44:53<23:44:52, 3.80it/s] 13%|█▎ | 46914/371472 [3:44:53<24:25:50, 3.69it/s] 13%|█▎ | 46915/371472 [3:44:53<26:58:01, 3.34it/s] 13%|█▎ | 46916/371472 [3:44:54<27:30:31, 3.28it/s] 13%|█▎ | 46917/371472 [3:44:54<26:08:13, 3.45it/s] 13%|█▎ | 46918/371472 [3:44:54<24:36:19, 3.66it/s] 13%|█▎ | 46919/371472 [3:44:54<26:15:58, 3.43it/s] 13%|█▎ | 46920/371472 [3:44:55<25:51:26, 3.49it/s] {'loss': 4.3204, 'learning_rate': 8.867461052813032e-07, 'epoch': 2.02} + 13%|█▎ | 46920/371472 [3:44:55<25:51:26, 3.49it/s] 13%|█▎ | 46921/371472 [3:44:55<25:50:26, 3.49it/s] 13%|█▎ | 46922/371472 [3:44:55<26:28:00, 3.41it/s] 13%|█▎ | 46923/371472 [3:44:56<26:18:15, 3.43it/s] 13%|█▎ | 46924/371472 [3:44:56<25:19:38, 3.56it/s] 13%|█▎ | 46925/371472 [3:44:56<24:57:25, 3.61it/s] 13%|█▎ | 46926/371472 [3:44:56<24:05:22, 3.74it/s] 13%|█▎ | 46927/371472 [3:44:57<24:14:27, 3.72it/s] 13%|█▎ | 46928/371472 [3:44:57<26:02:17, 3.46it/s] 13%|█▎ | 46929/371472 [3:44:57<25:32:07, 3.53it/s] 13%|█▎ | 46930/371472 [3:44:58<26:30:59, 3.40it/s] 13%|█▎ | 46931/371472 [3:44:58<25:33:53, 3.53it/s] 13%|█▎ | 46932/371472 [3:44:58<24:59:46, 3.61it/s] 13%|█▎ | 46933/371472 [3:44:58<27:23:47, 3.29it/s] 13%|█▎ | 46934/371472 [3:44:59<27:19:46, 3.30it/s] 13%|█▎ | 46935/371472 [3:44:59<26:04:11, 3.46it/s] 13%|█▎ | 46936/371472 [3:44:59<25:43:41, 3.50it/s] 13%|█▎ | 46937/371472 [3:45:00<25:53:39, 3.48it/s] 13%|█▎ | 46938/371472 [3:45:00<25:00:13, 3.61it/s] 13%|█▎ | 46939/371472 [3:45:00<25:15:03, 3.57it/s] 13%|█▎ | 46940/371472 [3:45:00<25:40:51, 3.51it/s] {'loss': 4.2796, 'learning_rate': 8.866976233058243e-07, 'epoch': 2.02} + 13%|█▎ | 46940/371472 [3:45:00<25:40:51, 3.51it/s] 13%|█▎ | 46941/371472 [3:45:01<26:03:56, 3.46it/s] 13%|█▎ | 46942/371472 [3:45:01<25:28:50, 3.54it/s] 13%|█▎ | 46943/371472 [3:45:01<27:49:22, 3.24it/s] 13%|█▎ | 46944/371472 [3:45:02<27:49:40, 3.24it/s] 13%|█▎ | 46945/371472 [3:45:02<27:31:58, 3.27it/s] 13%|█▎ | 46946/371472 [3:45:02<27:08:03, 3.32it/s] 13%|█▎ | 46947/371472 [3:45:03<27:06:58, 3.32it/s] 13%|█▎ | 46948/371472 [3:45:03<27:30:26, 3.28it/s] 13%|█▎ | 46949/371472 [3:45:03<26:21:37, 3.42it/s] 13%|█▎ | 46950/371472 [3:45:03<26:28:27, 3.40it/s] 13%|█▎ | 46951/371472 [3:45:04<25:14:48, 3.57it/s] 13%|█▎ | 46952/371472 [3:45:04<25:05:57, 3.59it/s] 13%|█▎ | 46953/371472 [3:45:04<25:57:52, 3.47it/s] 13%|█▎ | 46954/371472 [3:45:05<26:59:38, 3.34it/s] 13%|█▎ | 46955/371472 [3:45:05<25:48:36, 3.49it/s] 13%|█▎ | 46956/371472 [3:45:05<26:05:59, 3.45it/s] 13%|█▎ | 46957/371472 [3:45:05<26:01:13, 3.46it/s] 13%|█▎ | 46958/371472 [3:45:06<26:45:46, 3.37it/s] 13%|█▎ | 46959/371472 [3:45:06<25:49:15, 3.49it/s] 13%|█▎ | 46960/371472 [3:45:06<25:11:28, 3.58it/s] {'loss': 4.3953, 'learning_rate': 8.866491413303454e-07, 'epoch': 2.02} + 13%|█▎ | 46960/371472 [3:45:06<25:11:28, 3.58it/s] 13%|█▎ | 46961/371472 [3:45:07<25:05:37, 3.59it/s] 13%|█▎ | 46962/371472 [3:45:07<24:44:27, 3.64it/s] 13%|█▎ | 46963/371472 [3:45:07<25:11:53, 3.58it/s] 13%|█▎ | 46964/371472 [3:45:07<24:40:50, 3.65it/s] 13%|█▎ | 46965/371472 [3:45:08<24:55:53, 3.62it/s] 13%|█▎ | 46966/371472 [3:45:08<25:08:18, 3.59it/s] 13%|█▎ | 46967/371472 [3:45:08<25:58:09, 3.47it/s] 13%|█▎ | 46968/371472 [3:45:08<24:38:17, 3.66it/s] 13%|█▎ | 46969/371472 [3:45:09<27:07:17, 3.32it/s] 13%|█▎ | 46970/371472 [3:45:09<27:47:21, 3.24it/s] 13%|█▎ | 46971/371472 [3:45:09<28:30:39, 3.16it/s] 13%|█▎ | 46972/371472 [3:45:10<26:54:56, 3.35it/s] 13%|█▎ | 46973/371472 [3:45:10<27:26:23, 3.28it/s] 13%|█▎ | 46974/371472 [3:45:10<27:36:38, 3.26it/s] 13%|█▎ | 46975/371472 [3:45:11<26:11:43, 3.44it/s] 13%|█▎ | 46976/371472 [3:45:11<25:34:11, 3.53it/s] 13%|█▎ | 46977/371472 [3:45:11<24:50:13, 3.63it/s] 13%|█▎ | 46978/371472 [3:45:11<24:55:01, 3.62it/s] 13%|█▎ | 46979/371472 [3:45:12<25:28:00, 3.54it/s] 13%|█▎ | 46980/371472 [3:45:12<25:09:08, 3.58it/s] {'loss': 4.3401, 'learning_rate': 8.866006593548664e-07, 'epoch': 2.02} + 13%|█▎ | 46980/371472 [3:45:12<25:09:08, 3.58it/s] 13%|█▎ | 46981/371472 [3:45:12<24:39:59, 3.65it/s] 13%|█▎ | 46982/371472 [3:45:13<24:22:39, 3.70it/s] 13%|█▎ | 46983/371472 [3:45:13<23:58:39, 3.76it/s] 13%|█▎ | 46984/371472 [3:45:13<25:03:21, 3.60it/s] 13%|█▎ | 46985/371472 [3:45:13<24:51:21, 3.63it/s] 13%|█▎ | 46986/371472 [3:45:14<25:53:30, 3.48it/s] 13%|█▎ | 46987/371472 [3:45:14<27:02:46, 3.33it/s] 13%|█▎ | 46988/371472 [3:45:14<26:57:27, 3.34it/s] 13%|█▎ | 46989/371472 [3:45:15<30:02:47, 3.00it/s] 13%|█▎ | 46990/371472 [3:45:15<28:18:09, 3.18it/s] 13%|█▎ | 46991/371472 [3:45:15<27:51:51, 3.23it/s] 13%|█▎ | 46992/371472 [3:45:16<26:24:14, 3.41it/s] 13%|█▎ | 46993/371472 [3:45:16<25:41:52, 3.51it/s] 13%|█▎ | 46994/371472 [3:45:16<25:15:33, 3.57it/s] 13%|█▎ | 46995/371472 [3:45:16<24:13:38, 3.72it/s] 13%|█▎ | 46996/371472 [3:45:17<24:18:13, 3.71it/s] 13%|█▎ | 46997/371472 [3:45:17<26:32:13, 3.40it/s] 13%|█▎ | 46998/371472 [3:45:17<25:59:23, 3.47it/s] 13%|█▎ | 46999/371472 [3:45:18<27:34:12, 3.27it/s] 13%|█▎ | 47000/371472 [3:45:18<26:33:02, 3.39it/s] {'loss': 4.4807, 'learning_rate': 8.865521773793875e-07, 'epoch': 2.02} + 13%|█▎ | 47000/371472 [3:45:18<26:33:02, 3.39it/s] 13%|█▎ | 47001/371472 [3:45:18<26:00:26, 3.47it/s] 13%|█▎ | 47002/371472 [3:45:18<26:16:54, 3.43it/s] 13%|█▎ | 47003/371472 [3:45:19<26:18:54, 3.43it/s] 13%|█▎ | 47004/371472 [3:45:19<27:24:06, 3.29it/s] 13%|█▎ | 47005/371472 [3:45:19<25:33:21, 3.53it/s] 13%|█▎ | 47006/371472 [3:45:20<24:27:38, 3.68it/s] 13%|█▎ | 47007/371472 [3:45:20<25:39:53, 3.51it/s] 13%|█▎ | 47008/371472 [3:45:20<26:05:42, 3.45it/s] 13%|█▎ | 47009/371472 [3:45:20<26:13:58, 3.44it/s] 13%|█▎ | 47010/371472 [3:45:21<26:29:40, 3.40it/s] 13%|█▎ | 47011/371472 [3:45:21<26:07:15, 3.45it/s] 13%|█▎ | 47012/371472 [3:45:21<26:46:23, 3.37it/s] 13%|█▎ | 47013/371472 [3:45:22<25:24:36, 3.55it/s] 13%|█▎ | 47014/371472 [3:45:22<25:09:43, 3.58it/s] 13%|█▎ | 47015/371472 [3:45:22<23:56:27, 3.76it/s] 13%|█▎ | 47016/371472 [3:45:22<23:29:19, 3.84it/s] 13%|█▎ | 47017/371472 [3:45:23<24:09:17, 3.73it/s] 13%|█▎ | 47018/371472 [3:45:23<23:41:17, 3.80it/s] 13%|█▎ | 47019/371472 [3:45:23<23:41:02, 3.81it/s] 13%|█▎ | 47020/371472 [3:45:23<24:24:04, 3.69it/s] {'loss': 4.2138, 'learning_rate': 8.865036954039087e-07, 'epoch': 2.03} + 13%|█▎ | 47020/371472 [3:45:23<24:24:04, 3.69it/s] 13%|█▎ | 47021/371472 [3:45:24<25:25:22, 3.55it/s] 13%|█▎ | 47022/371472 [3:45:24<25:10:39, 3.58it/s] 13%|█▎ | 47023/371472 [3:45:24<25:10:34, 3.58it/s] 13%|█▎ | 47024/371472 [3:45:25<24:47:13, 3.64it/s] 13%|█▎ | 47025/371472 [3:45:25<24:44:35, 3.64it/s] 13%|█▎ | 47026/371472 [3:45:25<27:55:30, 3.23it/s] 13%|█▎ | 47027/371472 [3:45:26<29:45:32, 3.03it/s] 13%|█▎ | 47028/371472 [3:45:26<30:21:02, 2.97it/s] 13%|█▎ | 47029/371472 [3:45:26<28:24:51, 3.17it/s] 13%|█▎ | 47030/371472 [3:45:26<27:26:20, 3.28it/s] 13%|█▎ | 47031/371472 [3:45:27<28:51:18, 3.12it/s] 13%|█▎ | 47032/371472 [3:45:27<27:28:16, 3.28it/s] 13%|█▎ | 47033/371472 [3:45:27<26:32:57, 3.39it/s] 13%|█▎ | 47034/371472 [3:45:28<26:18:23, 3.43it/s] 13%|█▎ | 47035/371472 [3:45:28<25:14:30, 3.57it/s] 13%|█▎ | 47036/371472 [3:45:28<25:39:24, 3.51it/s] 13%|█▎ | 47037/371472 [3:45:29<26:22:21, 3.42it/s] 13%|█▎ | 47038/371472 [3:45:29<27:16:42, 3.30it/s] 13%|█▎ | 47039/371472 [3:45:29<26:07:08, 3.45it/s] 13%|█▎ | 47040/371472 [3:45:29<25:31:13, 3.53it/s] {'loss': 4.2883, 'learning_rate': 8.864552134284299e-07, 'epoch': 2.03} + 13%|█▎ | 47040/371472 [3:45:29<25:31:13, 3.53it/s] 13%|█▎ | 47041/371472 [3:45:30<27:05:37, 3.33it/s] 13%|█▎ | 47042/371472 [3:45:30<27:24:28, 3.29it/s] 13%|█▎ | 47043/371472 [3:45:30<26:27:26, 3.41it/s] 13%|█▎ | 47044/371472 [3:45:31<26:22:26, 3.42it/s] 13%|█▎ | 47045/371472 [3:45:31<26:41:13, 3.38it/s] 13%|█▎ | 47046/371472 [3:45:31<25:51:15, 3.49it/s] 13%|█▎ | 47047/371472 [3:45:31<26:06:46, 3.45it/s] 13%|█▎ | 47048/371472 [3:45:32<27:20:04, 3.30it/s] 13%|█▎ | 47049/371472 [3:45:32<28:05:02, 3.21it/s] 13%|█▎ | 47050/371472 [3:45:32<28:06:45, 3.21it/s] 13%|█▎ | 47051/371472 [3:45:33<28:01:32, 3.22it/s] 13%|█▎ | 47052/371472 [3:45:33<29:36:42, 3.04it/s] 13%|█▎ | 47053/371472 [3:45:33<28:34:45, 3.15it/s] 13%|█▎ | 47054/371472 [3:45:34<27:13:25, 3.31it/s] 13%|█▎ | 47055/371472 [3:45:34<25:39:07, 3.51it/s] 13%|█▎ | 47056/371472 [3:45:34<24:56:26, 3.61it/s] 13%|█▎ | 47057/371472 [3:45:34<25:23:33, 3.55it/s] 13%|█▎ | 47058/371472 [3:45:35<25:03:14, 3.60it/s] 13%|█▎ | 47059/371472 [3:45:35<26:25:00, 3.41it/s] 13%|█▎ | 47060/371472 [3:45:35<26:31:04, 3.40it/s] {'loss': 4.353, 'learning_rate': 8.864067314529509e-07, 'epoch': 2.03} + 13%|█▎ | 47060/371472 [3:45:35<26:31:04, 3.40it/s] 13%|█▎ | 47061/371472 [3:45:36<25:38:12, 3.52it/s] 13%|█▎ | 47062/371472 [3:45:36<24:48:21, 3.63it/s] 13%|█▎ | 47063/371472 [3:45:36<23:56:22, 3.76it/s] 13%|█▎ | 47064/371472 [3:45:36<25:49:03, 3.49it/s] 13%|█▎ | 47065/371472 [3:45:37<24:30:42, 3.68it/s] 13%|█▎ | 47066/371472 [3:45:37<25:06:46, 3.59it/s] 13%|█▎ | 47067/371472 [3:45:37<26:59:04, 3.34it/s] 13%|█▎ | 47068/371472 [3:45:38<26:23:51, 3.41it/s] 13%|█▎ | 47069/371472 [3:45:38<25:03:22, 3.60it/s] 13%|█▎ | 47070/371472 [3:45:38<24:30:25, 3.68it/s] 13%|█▎ | 47071/371472 [3:45:38<24:18:10, 3.71it/s] 13%|█▎ | 47072/371472 [3:45:39<23:24:41, 3.85it/s] 13%|█▎ | 47073/371472 [3:45:39<23:49:04, 3.78it/s] 13%|█▎ | 47074/371472 [3:45:39<23:29:55, 3.83it/s] 13%|█▎ | 47075/371472 [3:45:39<25:25:50, 3.54it/s] 13%|█▎ | 47076/371472 [3:45:40<24:25:08, 3.69it/s] 13%|█▎ | 47077/371472 [3:45:40<25:45:08, 3.50it/s] 13%|█▎ | 47078/371472 [3:45:40<24:53:42, 3.62it/s] 13%|█▎ | 47079/371472 [3:45:41<24:14:16, 3.72it/s] 13%|█▎ | 47080/371472 [3:45:41<24:05:20, 3.74it/s] {'loss': 4.291, 'learning_rate': 8.86358249477472e-07, 'epoch': 2.03} + 13%|█▎ | 47080/371472 [3:45:41<24:05:20, 3.74it/s] 13%|█▎ | 47081/371472 [3:45:41<23:35:48, 3.82it/s] 13%|█▎ | 47082/371472 [3:45:41<23:31:11, 3.83it/s] 13%|█▎ | 47083/371472 [3:45:42<22:59:01, 3.92it/s] 13%|█▎ | 47084/371472 [3:45:42<23:21:39, 3.86it/s] 13%|█▎ | 47085/371472 [3:45:42<23:59:56, 3.75it/s] 13%|█▎ | 47086/371472 [3:45:42<22:54:22, 3.93it/s] 13%|█▎ | 47087/371472 [3:45:43<23:33:05, 3.83it/s] 13%|█▎ | 47088/371472 [3:45:43<23:02:52, 3.91it/s] 13%|█▎ | 47089/371472 [3:45:43<24:05:59, 3.74it/s] 13%|█▎ | 47090/371472 [3:45:43<24:15:56, 3.71it/s] 13%|█▎ | 47091/371472 [3:45:44<25:51:07, 3.49it/s] 13%|█▎ | 47092/371472 [3:45:44<25:54:06, 3.48it/s] 13%|█▎ | 47093/371472 [3:45:44<25:06:21, 3.59it/s] 13%|█▎ | 47094/371472 [3:45:45<25:19:44, 3.56it/s] 13%|█▎ | 47095/371472 [3:45:45<27:54:32, 3.23it/s] 13%|█▎ | 47096/371472 [3:45:45<26:57:06, 3.34it/s] 13%|█▎ | 47097/371472 [3:45:45<25:42:17, 3.51it/s] 13%|█▎ | 47098/371472 [3:45:46<25:48:05, 3.49it/s] 13%|█▎ | 47099/371472 [3:45:46<26:38:39, 3.38it/s] 13%|█▎ | 47100/371472 [3:45:46<24:54:15, 3.62it/s] {'loss': 4.2781, 'learning_rate': 8.863097675019931e-07, 'epoch': 2.03} + 13%|█▎ | 47100/371472 [3:45:46<24:54:15, 3.62it/s] 13%|█▎ | 47101/371472 [3:45:47<24:05:19, 3.74it/s] 13%|█▎ | 47102/371472 [3:45:47<25:10:43, 3.58it/s] 13%|█▎ | 47103/371472 [3:45:47<24:59:40, 3.60it/s] 13%|█▎ | 47104/371472 [3:45:47<24:36:41, 3.66it/s] 13%|█▎ | 47105/371472 [3:45:48<24:00:39, 3.75it/s] 13%|█▎ | 47106/371472 [3:45:48<24:08:54, 3.73it/s] 13%|█▎ | 47107/371472 [3:45:48<25:06:25, 3.59it/s] 13%|█▎ | 47108/371472 [3:45:48<24:54:45, 3.62it/s] 13%|█▎ | 47109/371472 [3:45:49<24:20:16, 3.70it/s] 13%|█▎ | 47110/371472 [3:45:49<25:34:52, 3.52it/s] 13%|█▎ | 47111/371472 [3:45:49<24:41:00, 3.65it/s] 13%|█▎ | 47112/371472 [3:45:50<23:57:49, 3.76it/s] 13%|█▎ | 47113/371472 [3:45:50<23:30:08, 3.83it/s] 13%|█▎ | 47114/371472 [3:45:50<23:30:25, 3.83it/s] 13%|█▎ | 47115/371472 [3:45:50<26:11:16, 3.44it/s] 13%|█▎ | 47116/371472 [3:45:51<29:09:42, 3.09it/s] 13%|█▎ | 47117/371472 [3:45:51<28:32:12, 3.16it/s] 13%|█▎ | 47118/371472 [3:45:51<27:01:34, 3.33it/s] 13%|█▎ | 47119/371472 [3:45:52<26:45:07, 3.37it/s] 13%|█▎ | 47120/371472 [3:45:52<25:49:52, 3.49it/s] {'loss': 4.2815, 'learning_rate': 8.862612855265142e-07, 'epoch': 2.03} + 13%|█▎ | 47120/371472 [3:45:52<25:49:52, 3.49it/s] 13%|█▎ | 47121/371472 [3:45:52<25:36:43, 3.52it/s] 13%|█▎ | 47122/371472 [3:45:53<25:25:07, 3.54it/s] 13%|█▎ | 47123/371472 [3:45:53<25:23:55, 3.55it/s] 13%|█▎ | 47124/371472 [3:45:53<25:53:52, 3.48it/s] 13%|█▎ | 47125/371472 [3:45:53<24:54:37, 3.62it/s] 13%|█▎ | 47126/371472 [3:45:54<24:27:25, 3.68it/s] 13%|█▎ | 47127/371472 [3:45:54<24:10:32, 3.73it/s] 13%|█▎ | 47128/371472 [3:45:54<25:04:19, 3.59it/s] 13%|█▎ | 47129/371472 [3:45:54<24:39:02, 3.65it/s] 13%|█▎ | 47130/371472 [3:45:55<24:19:37, 3.70it/s] 13%|█▎ | 47131/371472 [3:45:55<23:45:48, 3.79it/s] 13%|█▎ | 47132/371472 [3:45:55<24:46:21, 3.64it/s] 13%|█▎ | 47133/371472 [3:45:55<24:10:26, 3.73it/s] 13%|█▎ | 47134/371472 [3:45:56<23:34:02, 3.82it/s] 13%|█▎ | 47135/371472 [3:45:56<24:59:52, 3.60it/s] 13%|█▎ | 47136/371472 [3:45:56<26:19:46, 3.42it/s] 13%|█▎ | 47137/371472 [3:45:57<25:30:42, 3.53it/s] 13%|█▎ | 47138/371472 [3:45:57<24:43:19, 3.64it/s] 13%|█▎ | 47139/371472 [3:45:57<24:16:42, 3.71it/s] 13%|█▎ | 47140/371472 [3:45:57<23:47:36, 3.79it/s] {'loss': 4.1737, 'learning_rate': 8.862128035510353e-07, 'epoch': 2.03} + 13%|█▎ | 47140/371472 [3:45:57<23:47:36, 3.79it/s] 13%|█▎ | 47141/371472 [3:45:58<24:28:27, 3.68it/s] 13%|█▎ | 47142/371472 [3:45:58<26:30:16, 3.40it/s] 13%|█▎ | 47143/371472 [3:45:58<27:17:11, 3.30it/s] 13%|█▎ | 47144/371472 [3:45:59<26:23:53, 3.41it/s] 13%|█▎ | 47145/371472 [3:45:59<25:19:53, 3.56it/s] 13%|█▎ | 47146/371472 [3:45:59<28:59:03, 3.11it/s] 13%|█▎ | 47147/371472 [3:46:00<28:54:39, 3.12it/s] 13%|█▎ | 47148/371472 [3:46:00<27:21:53, 3.29it/s] 13%|█▎ | 47149/371472 [3:46:00<29:00:03, 3.11it/s] 13%|█▎ | 47150/371472 [3:46:01<28:27:01, 3.17it/s] 13%|█▎ | 47151/371472 [3:46:01<27:18:46, 3.30it/s] 13%|█▎ | 47152/371472 [3:46:01<27:02:11, 3.33it/s] 13%|█▎ | 47153/371472 [3:46:01<26:20:08, 3.42it/s] 13%|█▎ | 47154/371472 [3:46:02<27:24:01, 3.29it/s] 13%|█▎ | 47155/371472 [3:46:02<29:45:56, 3.03it/s] 13%|█▎ | 47156/371472 [3:46:02<28:26:07, 3.17it/s] 13%|█▎ | 47157/371472 [3:46:03<27:03:54, 3.33it/s] 13%|█▎ | 47158/371472 [3:46:03<26:46:57, 3.36it/s] 13%|█▎ | 47159/371472 [3:46:03<26:29:30, 3.40it/s] 13%|█▎ | 47160/371472 [3:46:03<25:18:07, 3.56it/s] {'loss': 4.1671, 'learning_rate': 8.861643215755564e-07, 'epoch': 2.03} + 13%|█▎ | 47160/371472 [3:46:03<25:18:07, 3.56it/s] 13%|█▎ | 47161/371472 [3:46:04<25:49:46, 3.49it/s] 13%|█▎ | 47162/371472 [3:46:04<25:40:39, 3.51it/s] 13%|█▎ | 47163/371472 [3:46:04<25:28:19, 3.54it/s] 13%|█▎ | 47164/371472 [3:46:05<25:29:05, 3.53it/s] 13%|█▎ | 47165/371472 [3:46:05<24:59:33, 3.60it/s] 13%|█▎ | 47166/371472 [3:46:05<24:38:59, 3.65it/s] 13%|█�� | 47167/371472 [3:46:05<24:04:34, 3.74it/s] 13%|█▎ | 47168/371472 [3:46:06<23:45:07, 3.79it/s] 13%|█▎ | 47169/371472 [3:46:06<23:05:31, 3.90it/s] 13%|█▎ | 47170/371472 [3:46:06<23:55:33, 3.77it/s] 13%|█▎ | 47171/371472 [3:46:06<24:37:30, 3.66it/s] 13%|█▎ | 47172/371472 [3:46:07<25:12:29, 3.57it/s] 13%|█▎ | 47173/371472 [3:46:07<25:13:06, 3.57it/s] 13%|█▎ | 47174/371472 [3:46:07<25:21:17, 3.55it/s] 13%|█▎ | 47175/371472 [3:46:08<25:06:42, 3.59it/s] 13%|█▎ | 47176/371472 [3:46:08<26:22:13, 3.42it/s] 13%|█▎ | 47177/371472 [3:46:08<27:02:52, 3.33it/s] 13%|█▎ | 47178/371472 [3:46:09<25:48:54, 3.49it/s] 13%|█▎ | 47179/371472 [3:46:09<27:34:46, 3.27it/s] 13%|█▎ | 47180/371472 [3:46:09<28:43:23, 3.14it/s] {'loss': 4.4056, 'learning_rate': 8.861158396000776e-07, 'epoch': 2.03} + 13%|█▎ | 47180/371472 [3:46:09<28:43:23, 3.14it/s] 13%|█▎ | 47181/371472 [3:46:10<29:10:18, 3.09it/s] 13%|█▎ | 47182/371472 [3:46:10<27:47:47, 3.24it/s] 13%|█▎ | 47183/371472 [3:46:10<26:42:51, 3.37it/s] 13%|█▎ | 47184/371472 [3:46:10<26:07:14, 3.45it/s] 13%|█▎ | 47185/371472 [3:46:11<27:09:47, 3.32it/s] 13%|█▎ | 47186/371472 [3:46:11<26:10:25, 3.44it/s] 13%|█▎ | 47187/371472 [3:46:11<25:32:40, 3.53it/s] 13%|█▎ | 47188/371472 [3:46:12<25:31:36, 3.53it/s] 13%|█▎ | 47189/371472 [3:46:12<27:15:18, 3.31it/s] 13%|█▎ | 47190/371472 [3:46:12<26:56:02, 3.34it/s] 13%|█▎ | 47191/371472 [3:46:13<29:27:28, 3.06it/s] 13%|█▎ | 47192/371472 [3:46:13<28:13:14, 3.19it/s] 13%|█▎ | 47193/371472 [3:46:13<26:50:36, 3.36it/s] 13%|█▎ | 47194/371472 [3:46:13<27:51:09, 3.23it/s] 13%|█▎ | 47195/371472 [3:46:14<27:11:34, 3.31it/s] 13%|█▎ | 47196/371472 [3:46:14<25:58:09, 3.47it/s] 13%|█▎ | 47197/371472 [3:46:14<27:02:16, 3.33it/s] 13%|█▎ | 47198/371472 [3:46:15<25:46:12, 3.50it/s] 13%|█▎ | 47199/371472 [3:46:15<25:00:52, 3.60it/s] 13%|█▎ | 47200/371472 [3:46:15<24:16:28, 3.71it/s] {'loss': 4.3964, 'learning_rate': 8.860673576245987e-07, 'epoch': 2.03} + 13%|█▎ | 47200/371472 [3:46:15<24:16:28, 3.71it/s] 13%|█▎ | 47201/371472 [3:46:15<27:24:48, 3.29it/s] 13%|█▎ | 47202/371472 [3:46:16<26:16:09, 3.43it/s] 13%|█▎ | 47203/371472 [3:46:16<25:30:35, 3.53it/s] 13%|█▎ | 47204/371472 [3:46:16<24:59:00, 3.61it/s] 13%|█▎ | 47205/371472 [3:46:16<23:56:20, 3.76it/s] 13%|█▎ | 47206/371472 [3:46:17<24:39:17, 3.65it/s] 13%|█▎ | 47207/371472 [3:46:17<24:44:25, 3.64it/s] 13%|█▎ | 47208/371472 [3:46:17<24:46:55, 3.63it/s] 13%|█▎ | 47209/371472 [3:46:18<25:08:53, 3.58it/s] 13%|█▎ | 47210/371472 [3:46:18<25:31:49, 3.53it/s] 13%|█▎ | 47211/371472 [3:46:18<26:19:09, 3.42it/s] 13%|█▎ | 47212/371472 [3:46:19<26:47:55, 3.36it/s] 13%|█▎ | 47213/371472 [3:46:19<25:31:50, 3.53it/s] 13%|█▎ | 47214/371472 [3:46:19<24:42:40, 3.64it/s] 13%|█▎ | 47215/371472 [3:46:19<24:53:51, 3.62it/s] 13%|█▎ | 47216/371472 [3:46:20<24:59:23, 3.60it/s] 13%|█▎ | 47217/371472 [3:46:20<25:59:43, 3.46it/s] 13%|█▎ | 47218/371472 [3:46:20<25:26:44, 3.54it/s] 13%|█▎ | 47219/371472 [3:46:20<25:09:39, 3.58it/s] 13%|█▎ | 47220/371472 [3:46:21<25:49:14, 3.49it/s] {'loss': 4.1294, 'learning_rate': 8.860188756491197e-07, 'epoch': 2.03} + 13%|█▎ | 47220/371472 [3:46:21<25:49:14, 3.49it/s] 13%|█▎ | 47221/371472 [3:46:21<25:03:44, 3.59it/s] 13%|█▎ | 47222/371472 [3:46:21<24:59:06, 3.60it/s] 13%|█▎ | 47223/371472 [3:46:22<24:15:03, 3.71it/s] 13%|█▎ | 47224/371472 [3:46:22<24:15:49, 3.71it/s] 13%|█▎ | 47225/371472 [3:46:22<24:13:32, 3.72it/s] 13%|█▎ | 47226/371472 [3:46:22<24:47:20, 3.63it/s] 13%|█▎ | 47227/371472 [3:46:23<23:51:25, 3.78it/s] 13%|█▎ | 47228/371472 [3:46:23<23:13:19, 3.88it/s] 13%|█▎ | 47229/371472 [3:46:23<24:06:03, 3.74it/s] 13%|█▎ | 47230/371472 [3:46:23<23:29:09, 3.83it/s] 13%|█▎ | 47231/371472 [3:46:24<23:53:56, 3.77it/s] 13%|█▎ | 47232/371472 [3:46:24<24:46:51, 3.63it/s] 13%|█▎ | 47233/371472 [3:46:24<24:00:52, 3.75it/s] 13%|█▎ | 47234/371472 [3:46:24<23:56:12, 3.76it/s] 13%|█▎ | 47235/371472 [3:46:25<25:53:18, 3.48it/s] 13%|█▎ | 47236/371472 [3:46:25<27:08:06, 3.32it/s] 13%|█▎ | 47237/371472 [3:46:25<25:31:07, 3.53it/s] 13%|█▎ | 47238/371472 [3:46:26<25:35:16, 3.52it/s] 13%|█▎ | 47239/371472 [3:46:26<24:16:00, 3.71it/s] 13%|█▎ | 47240/371472 [3:46:26<24:27:15, 3.68it/s] {'loss': 4.2129, 'learning_rate': 8.859703936736408e-07, 'epoch': 2.03} + 13%|█▎ | 47240/371472 [3:46:26<24:27:15, 3.68it/s] 13%|█▎ | 47241/371472 [3:46:26<24:30:21, 3.68it/s] 13%|█▎ | 47242/371472 [3:46:27<24:54:44, 3.62it/s] 13%|█▎ | 47243/371472 [3:46:27<23:50:18, 3.78it/s] 13%|█▎ | 47244/371472 [3:46:27<24:33:26, 3.67it/s] 13%|█▎ | 47245/371472 [3:46:28<24:20:13, 3.70it/s] 13%|█▎ | 47246/371472 [3:46:28<24:55:24, 3.61it/s] 13%|█▎ | 47247/371472 [3:46:28<23:49:33, 3.78it/s] 13%|█▎ | 47248/371472 [3:46:28<27:38:28, 3.26it/s] 13%|█▎ | 47249/371472 [3:46:29<26:18:46, 3.42it/s] 13%|█▎ | 47250/371472 [3:46:29<26:28:03, 3.40it/s] 13%|█▎ | 47251/371472 [3:46:29<27:17:55, 3.30it/s] 13%|█▎ | 47252/371472 [3:46:30<27:28:31, 3.28it/s] 13%|█▎ | 47253/371472 [3:46:30<29:15:11, 3.08it/s] 13%|█▎ | 47254/371472 [3:46:30<27:47:14, 3.24it/s] 13%|█▎ | 47255/371472 [3:46:31<25:57:53, 3.47it/s] 13%|█▎ | 47256/371472 [3:46:31<30:26:09, 2.96it/s] 13%|█▎ | 47257/371472 [3:46:31<29:04:17, 3.10it/s] 13%|█▎ | 47258/371472 [3:46:32<26:45:07, 3.37it/s] 13%|█▎ | 47259/371472 [3:46:32<25:46:01, 3.50it/s] 13%|█▎ | 47260/371472 [3:46:32<25:07:30, 3.58it/s] {'loss': 4.342, 'learning_rate': 8.85921911698162e-07, 'epoch': 2.04} + 13%|█▎ | 47260/371472 [3:46:32<25:07:30, 3.58it/s] 13%|█▎ | 47261/371472 [3:46:32<25:56:45, 3.47it/s] 13%|█▎ | 47262/371472 [3:46:33<25:41:28, 3.51it/s] 13%|█▎ | 47263/371472 [3:46:33<25:51:47, 3.48it/s] 13%|█▎ | 47264/371472 [3:46:33<24:20:28, 3.70it/s] 13%|█▎ | 47265/371472 [3:46:33<25:21:21, 3.55it/s] 13%|█▎ | 47266/371472 [3:46:34<24:08:13, 3.73it/s] 13%|█▎ | 47267/371472 [3:46:34<25:04:21, 3.59it/s] 13%|█▎ | 47268/371472 [3:46:34<26:35:36, 3.39it/s] 13%|█▎ | 47269/371472 [3:46:35<26:14:42, 3.43it/s] 13%|█▎ | 47270/371472 [3:46:35<25:56:17, 3.47it/s] 13%|█▎ | 47271/371472 [3:46:35<26:18:49, 3.42it/s] 13%|█▎ | 47272/371472 [3:46:35<25:32:01, 3.53it/s] 13%|█▎ | 47273/371472 [3:46:36<25:07:07, 3.59it/s] 13%|█▎ | 47274/371472 [3:46:36<24:47:34, 3.63it/s] 13%|█▎ | 47275/371472 [3:46:36<24:24:51, 3.69it/s] 13%|█▎ | 47276/371472 [3:46:36<23:51:11, 3.78it/s] 13%|█▎ | 47277/371472 [3:46:37<24:01:57, 3.75it/s] 13%|█▎ | 47278/371472 [3:46:37<23:13:50, 3.88it/s] 13%|█▎ | 47279/371472 [3:46:37<22:59:45, 3.92it/s] 13%|█▎ | 47280/371472 [3:46:38<23:42:12, 3.80it/s] {'loss': 4.3192, 'learning_rate': 8.85873429722683e-07, 'epoch': 2.04} + 13%|█▎ | 47280/371472 [3:46:38<23:42:12, 3.80it/s] 13%|█▎ | 47281/371472 [3:46:38<24:06:15, 3.74it/s] 13%|█▎ | 47282/371472 [3:46:38<23:33:53, 3.82it/s] 13%|█▎ | 47283/371472 [3:46:38<24:06:27, 3.74it/s] 13%|█▎ | 47284/371472 [3:46:39<24:09:15, 3.73it/s] 13%|█▎ | 47285/371472 [3:46:39<23:21:21, 3.86it/s] 13%|█▎ | 47286/371472 [3:46:39<23:50:37, 3.78it/s] 13%|█▎ | 47287/371472 [3:46:39<24:22:57, 3.69it/s] 13%|█▎ | 47288/371472 [3:46:40<24:06:26, 3.74it/s] 13%|█▎ | 47289/371472 [3:46:40<26:19:24, 3.42it/s] 13%|█▎ | 47290/371472 [3:46:40<26:54:50, 3.35it/s] 13%|█▎ | 47291/371472 [3:46:41<26:00:40, 3.46it/s] 13%|█▎ | 47292/371472 [3:46:41<25:43:14, 3.50it/s] 13%|█▎ | 47293/371472 [3:46:41<26:21:26, 3.42it/s] 13%|█▎ | 47294/371472 [3:46:41<25:51:35, 3.48it/s] 13%|█▎ | 47295/371472 [3:46:42<24:56:08, 3.61it/s] 13%|█▎ | 47296/371472 [3:46:42<25:18:50, 3.56it/s] 13%|█▎ | 47297/371472 [3:46:42<26:58:20, 3.34it/s] 13%|█▎ | 47298/371472 [3:46:43<26:44:36, 3.37it/s] 13%|█▎ | 47299/371472 [3:46:43<25:35:10, 3.52it/s] 13%|█▎ | 47300/371472 [3:46:43<24:44:45, 3.64it/s] {'loss': 4.4086, 'learning_rate': 8.858249477472042e-07, 'epoch': 2.04} + 13%|█▎ | 47300/371472 [3:46:43<24:44:45, 3.64it/s] 13%|█▎ | 47301/371472 [3:46:43<26:33:32, 3.39it/s] 13%|█▎ | 47302/371472 [3:46:44<25:25:46, 3.54it/s] 13%|█▎ | 47303/371472 [3:46:44<24:40:56, 3.65it/s] 13%|█▎ | 47304/371472 [3:46:44<24:09:09, 3.73it/s] 13%|█▎ | 47305/371472 [3:46:45<25:22:15, 3.55it/s] 13%|█▎ | 47306/371472 [3:46:45<25:23:31, 3.55it/s] 13%|█▎ | 47307/371472 [3:46:45<24:32:39, 3.67it/s] 13%|█▎ | 47308/371472 [3:46:45<25:02:23, 3.60it/s] 13%|█▎ | 47309/371472 [3:46:46<25:55:11, 3.47it/s] 13%|█▎ | 47310/371472 [3:46:46<24:41:14, 3.65it/s] 13%|█▎ | 47311/371472 [3:46:46<24:38:36, 3.65it/s] 13%|█▎ | 47312/371472 [3:46:46<24:52:10, 3.62it/s] 13%|█▎ | 47313/371472 [3:46:47<26:45:42, 3.36it/s] 13%|█▎ | 47314/371472 [3:46:47<28:23:39, 3.17it/s] 13%|█▎ | 47315/371472 [3:46:47<27:00:47, 3.33it/s] 13%|█▎ | 47316/371472 [3:46:48<25:04:31, 3.59it/s] 13%|█▎ | 47317/371472 [3:46:48<24:05:45, 3.74it/s] 13%|█▎ | 47318/371472 [3:46:48<23:51:43, 3.77it/s] 13%|█▎ | 47319/371472 [3:46:48<24:54:38, 3.61it/s] 13%|█▎ | 47320/371472 [3:46:49<24:05:08, 3.74it/s] {'loss': 4.2597, 'learning_rate': 8.857764657717253e-07, 'epoch': 2.04} + 13%|█▎ | 47320/371472 [3:46:49<24:05:08, 3.74it/s] 13%|█▎ | 47321/371472 [3:46:49<26:08:54, 3.44it/s] 13%|█▎ | 47322/371472 [3:46:49<26:05:03, 3.45it/s] 13%|█▎ | 47323/371472 [3:46:50<24:35:59, 3.66it/s] 13%|█▎ | 47324/371472 [3:46:50<23:49:48, 3.78it/s] 13%|█▎ | 47325/371472 [3:46:50<24:20:54, 3.70it/s] 13%|█▎ | 47326/371472 [3:46:50<26:01:13, 3.46it/s] 13%|█▎ | 47327/371472 [3:46:51<25:26:30, 3.54it/s] 13%|█▎ | 47328/371472 [3:46:51<25:47:36, 3.49it/s] 13%|█▎ | 47329/371472 [3:46:51<24:44:04, 3.64it/s] 13%|█▎ | 47330/371472 [3:46:52<25:10:12, 3.58it/s] 13%|█▎ | 47331/371472 [3:46:52<24:38:03, 3.66it/s] 13%|█▎ | 47332/371472 [3:46:52<25:16:17, 3.56it/s] 13%|█▎ | 47333/371472 [3:46:52<24:33:16, 3.67it/s] 13%|█▎ | 47334/371472 [3:46:53<27:12:33, 3.31it/s] 13%|█▎ | 47335/371472 [3:46:53<26:27:28, 3.40it/s] 13%|█▎ | 47336/371472 [3:46:53<26:02:08, 3.46it/s] 13%|█▎ | 47337/371472 [3:46:54<25:17:12, 3.56it/s] 13%|█▎ | 47338/371472 [3:46:54<24:43:42, 3.64it/s] 13%|█▎ | 47339/371472 [3:46:54<26:25:19, 3.41it/s] 13%|█▎ | 47340/371472 [3:46:54<25:18:37, 3.56it/s] {'loss': 4.3538, 'learning_rate': 8.857279837962464e-07, 'epoch': 2.04} + 13%|█▎ | 47340/371472 [3:46:54<25:18:37, 3.56it/s] 13%|█▎ | 47341/371472 [3:46:55<27:05:21, 3.32it/s] 13%|█▎ | 47342/371472 [3:46:55<28:26:50, 3.16it/s] 13%|█▎ | 47343/371472 [3:46:55<28:25:32, 3.17it/s] 13%|█▎ | 47344/371472 [3:46:56<27:22:33, 3.29it/s] 13%|█▎ | 47345/371472 [3:46:56<25:51:47, 3.48it/s] 13%|█▎ | 47346/371472 [3:46:56<25:52:58, 3.48it/s] 13%|█▎ | 47347/371472 [3:46:57<26:31:21, 3.39it/s] 13%|█▎ | 47348/371472 [3:46:57<25:41:15, 3.50it/s] 13%|█▎ | 47349/371472 [3:46:57<28:06:36, 3.20it/s] 13%|█▎ | 47350/371472 [3:46:57<26:14:25, 3.43it/s] 13%|█▎ | 47351/371472 [3:46:58<25:02:49, 3.59it/s] 13%|█▎ | 47352/371472 [3:46:58<25:21:55, 3.55it/s] 13%|█▎ | 47353/371472 [3:46:58<25:46:04, 3.49it/s] 13%|█▎ | 47354/371472 [3:46:59<25:30:07, 3.53it/s] 13%|█▎ | 47355/371472 [3:46:59<25:52:43, 3.48it/s] 13%|█▎ | 47356/371472 [3:46:59<25:36:26, 3.52it/s] 13%|█▎ | 47357/371472 [3:46:59<25:44:50, 3.50it/s] 13%|█▎ | 47358/371472 [3:47:00<25:59:34, 3.46it/s] 13%|█▎ | 47359/371472 [3:47:00<25:24:25, 3.54it/s] 13%|█▎ | 47360/371472 [3:47:00<25:17:26, 3.56it/s] {'loss': 4.3276, 'learning_rate': 8.856795018207674e-07, 'epoch': 2.04} + 13%|█▎ | 47360/371472 [3:47:00<25:17:26, 3.56it/s] 13%|█▎ | 47361/371472 [3:47:01<24:28:36, 3.68it/s] 13%|█▎ | 47362/371472 [3:47:01<24:08:46, 3.73it/s] 13%|█▎ | 47363/371472 [3:47:01<25:10:51, 3.58it/s] 13%|█▎ | 47364/371472 [3:47:01<25:14:20, 3.57it/s] 13%|█▎ | 47365/371472 [3:47:02<24:04:22, 3.74it/s] 13%|█▎ | 47366/371472 [3:47:02<26:39:49, 3.38it/s] 13%|█▎ | 47367/371472 [3:47:02<25:26:26, 3.54it/s] 13%|█▎ | 47368/371472 [3:47:02<24:50:25, 3.62it/s] 13%|█▎ | 47369/371472 [3:47:03<26:06:11, 3.45it/s] 13%|█▎ | 47370/371472 [3:47:03<26:20:05, 3.42it/s] 13%|█▎ | 47371/371472 [3:47:03<25:23:51, 3.54it/s] 13%|█▎ | 47372/371472 [3:47:04<25:40:47, 3.51it/s] 13%|█▎ | 47373/371472 [3:47:04<24:54:35, 3.61it/s] 13%|█▎ | 47374/371472 [3:47:04<26:56:53, 3.34it/s] 13%|█▎ | 47375/371472 [3:47:05<26:43:03, 3.37it/s] 13%|█▎ | 47376/371472 [3:47:05<26:52:55, 3.35it/s] 13%|█▎ | 47377/371472 [3:47:05<26:25:46, 3.41it/s] 13%|█▎ | 47378/371472 [3:47:05<25:33:26, 3.52it/s] 13%|█▎ | 47379/371472 [3:47:06<24:27:01, 3.68it/s] 13%|█▎ | 47380/371472 [3:47:06<24:08:37, 3.73it/s] {'loss': 4.3319, 'learning_rate': 8.856310198452886e-07, 'epoch': 2.04} + 13%|█▎ | 47380/371472 [3:47:06<24:08:37, 3.73it/s] 13%|█▎ | 47381/371472 [3:47:06<25:30:54, 3.53it/s] 13%|█▎ | 47382/371472 [3:47:06<25:17:13, 3.56it/s] 13%|█▎ | 47383/371472 [3:47:07<24:01:35, 3.75it/s] 13%|█▎ | 47384/371472 [3:47:07<24:00:52, 3.75it/s] 13%|█▎ | 47385/371472 [3:47:07<24:40:00, 3.65it/s] 13%|█▎ | 47386/371472 [3:47:08<25:09:47, 3.58it/s] 13%|█▎ | 47387/371472 [3:47:08<26:52:24, 3.35it/s] 13%|█▎ | 47388/371472 [3:47:08<26:20:24, 3.42it/s] 13%|█▎ | 47389/371472 [3:47:09<27:18:41, 3.30it/s] 13%|█▎ | 47390/371472 [3:47:09<25:48:48, 3.49it/s] 13%|█▎ | 47391/371472 [3:47:09<24:32:14, 3.67it/s] 13%|█▎ | 47392/371472 [3:47:09<24:11:53, 3.72it/s] 13%|█▎ | 47393/371472 [3:47:10<23:55:20, 3.76it/s] 13%|█▎ | 47394/371472 [3:47:10<24:12:56, 3.72it/s] 13%|█▎ | 47395/371472 [3:47:10<25:35:50, 3.52it/s] 13%|█▎ | 47396/371472 [3:47:10<24:52:37, 3.62it/s] 13%|█▎ | 47397/371472 [3:47:11<24:01:55, 3.75it/s] 13%|█▎ | 47398/371472 [3:47:11<24:44:04, 3.64it/s] 13%|█▎ | 47399/371472 [3:47:11<24:41:42, 3.65it/s] 13%|█▎ | 47400/371472 [3:47:12<26:27:21, 3.40it/s] {'loss': 4.3089, 'learning_rate': 8.855825378698097e-07, 'epoch': 2.04} + 13%|█▎ | 47400/371472 [3:47:12<26:27:21, 3.40it/s] 13%|█▎ | 47401/371472 [3:47:12<27:03:37, 3.33it/s] 13%|█▎ | 47402/371472 [3:47:12<26:34:54, 3.39it/s] 13%|█▎ | 47403/371472 [3:47:12<25:54:22, 3.47it/s] 13%|█▎ | 47404/371472 [3:47:13<24:56:23, 3.61it/s] 13%|█▎ | 47405/371472 [3:47:13<24:05:48, 3.74it/s] 13%|█▎ | 47406/371472 [3:47:13<24:00:50, 3.75it/s] 13%|█▎ | 47407/371472 [3:47:13<24:13:53, 3.71it/s] 13%|█▎ | 47408/371472 [3:47:14<25:30:00, 3.53it/s] 13%|█▎ | 47409/371472 [3:47:14<24:40:12, 3.65it/s] 13%|█▎ | 47410/371472 [3:47:14<26:14:35, 3.43it/s] 13%|█▎ | 47411/371472 [3:47:15<24:31:07, 3.67it/s] 13%|█▎ | 47412/371472 [3:47:15<24:50:54, 3.62it/s] 13%|█▎ | 47413/371472 [3:47:15<24:50:59, 3.62it/s] 13%|█▎ | 47414/371472 [3:47:15<25:00:13, 3.60it/s] 13%|█▎ | 47415/371472 [3:47:16<26:46:30, 3.36it/s] 13%|█▎ | 47416/371472 [3:47:16<27:35:35, 3.26it/s] 13%|█▎ | 47417/371472 [3:47:16<26:30:44, 3.40it/s] 13%|█▎ | 47418/371472 [3:47:17<28:09:32, 3.20it/s] 13%|█▎ | 47419/371472 [3:47:17<28:46:39, 3.13it/s] 13%|█▎ | 47420/371472 [3:47:17<27:32:23, 3.27it/s] {'loss': 4.4973, 'learning_rate': 8.855340558943309e-07, 'epoch': 2.04} + 13%|█▎ | 47420/371472 [3:47:17<27:32:23, 3.27it/s] 13%|█▎ | 47421/371472 [3:47:18<26:01:34, 3.46it/s] 13%|█▎ | 47422/371472 [3:47:18<27:50:02, 3.23it/s] 13%|█▎ | 47423/371472 [3:47:18<28:27:09, 3.16it/s] 13%|█▎ | 47424/371472 [3:47:18<26:29:31, 3.40it/s] 13%|█▎ | 47425/371472 [3:47:19<25:04:19, 3.59it/s] 13%|█▎ | 47426/371472 [3:47:19<24:03:36, 3.74it/s] 13%|█▎ | 47427/371472 [3:47:19<23:34:48, 3.82it/s] 13%|█▎ | 47428/371472 [3:47:20<24:48:49, 3.63it/s] 13%|█▎ | 47429/371472 [3:47:20<24:31:21, 3.67it/s] 13%|█▎ | 47430/371472 [3:47:20<27:28:38, 3.28it/s] 13%|█▎ | 47431/371472 [3:47:20<26:29:56, 3.40it/s] 13%|█▎ | 47432/371472 [3:47:21<25:19:28, 3.55it/s] 13%|█▎ | 47433/371472 [3:47:21<25:12:43, 3.57it/s] 13%|█▎ | 47434/371472 [3:47:21<24:48:41, 3.63it/s] 13%|█▎ | 47435/371472 [3:47:21<24:11:57, 3.72it/s] 13%|█▎ | 47436/371472 [3:47:22<23:43:31, 3.79it/s] 13%|█▎ | 47437/371472 [3:47:22<25:01:48, 3.60it/s] 13%|█▎ | 47438/371472 [3:47:22<24:01:28, 3.75it/s] 13%|█▎ | 47439/371472 [3:47:23<24:24:36, 3.69it/s] 13%|█▎ | 47440/371472 [3:47:23<23:42:45, 3.80it/s] {'loss': 4.4496, 'learning_rate': 8.854855739188519e-07, 'epoch': 2.04} + 13%|█▎ | 47440/371472 [3:47:23<23:42:45, 3.80it/s] 13%|█▎ | 47441/371472 [3:47:23<23:26:55, 3.84it/s] 13%|█▎ | 47442/371472 [3:47:23<23:40:07, 3.80it/s] 13%|█▎ | 47443/371472 [3:47:24<24:23:12, 3.69it/s] 13%|█▎ | 47444/371472 [3:47:24<24:08:40, 3.73it/s] 13%|█▎ | 47445/371472 [3:47:24<25:35:36, 3.52it/s] 13%|█▎ | 47446/371472 [3:47:25<26:26:01, 3.41it/s] 13%|█▎ | 47447/371472 [3:47:25<25:59:00, 3.46it/s] 13%|█▎ | 47448/371472 [3:47:25<24:33:50, 3.66it/s] 13%|█▎ | 47449/371472 [3:47:25<26:14:21, 3.43it/s] 13%|█▎ | 47450/371472 [3:47:26<25:25:03, 3.54it/s] 13%|█▎ | 47451/371472 [3:47:26<25:34:36, 3.52it/s] 13%|█▎ | 47452/371472 [3:47:26<24:30:05, 3.67it/s] 13%|█▎ | 47453/371472 [3:47:26<25:15:57, 3.56it/s] 13%|█▎ | 47454/371472 [3:47:27<24:11:10, 3.72it/s] 13%|█▎ | 47455/371472 [3:47:27<25:06:38, 3.58it/s] 13%|█▎ | 47456/371472 [3:47:27<24:51:24, 3.62it/s] 13%|█▎ | 47457/371472 [3:47:28<25:13:05, 3.57it/s] 13%|█▎ | 47458/371472 [3:47:28<25:43:36, 3.50it/s] 13%|█▎ | 47459/371472 [3:47:28<25:21:40, 3.55it/s] 13%|█▎ | 47460/371472 [3:47:28<24:52:27, 3.62it/s] {'loss': 4.395, 'learning_rate': 8.854370919433731e-07, 'epoch': 2.04} + 13%|█▎ | 47460/371472 [3:47:28<24:52:27, 3.62it/s] 13%|█▎ | 47461/371472 [3:47:29<24:40:20, 3.65it/s] 13%|█▎ | 47462/371472 [3:47:29<25:17:18, 3.56it/s] 13%|█▎ | 47463/371472 [3:47:29<24:21:33, 3.69it/s] 13%|█▎ | 47464/371472 [3:47:30<25:49:29, 3.49it/s] 13%|█▎ | 47465/371472 [3:47:30<24:39:18, 3.65it/s] 13%|█▎ | 47466/371472 [3:47:30<25:24:54, 3.54it/s] 13%|█▎ | 47467/371472 [3:47:30<26:44:28, 3.37it/s] 13%|█▎ | 47468/371472 [3:47:31<27:00:23, 3.33it/s] 13%|█▎ | 47469/371472 [3:47:31<25:07:30, 3.58it/s] 13%|█▎ | 47470/371472 [3:47:31<24:23:50, 3.69it/s] 13%|█▎ | 47471/371472 [3:47:31<23:46:46, 3.78it/s] 13%|█▎ | 47472/371472 [3:47:32<25:01:43, 3.60it/s] 13%|█▎ | 47473/371472 [3:47:32<28:03:15, 3.21it/s] 13%|█▎ | 47474/371472 [3:47:33<29:57:54, 3.00it/s] 13%|█▎ | 47475/371472 [3:47:33<28:24:31, 3.17it/s] 13%|█▎ | 47476/371472 [3:47:33<27:42:54, 3.25it/s] 13%|█▎ | 47477/371472 [3:47:33<27:08:53, 3.32it/s] 13%|█▎ | 47478/371472 [3:47:34<27:04:46, 3.32it/s] 13%|█▎ | 47479/371472 [3:47:34<26:46:32, 3.36it/s] 13%|█▎ | 47480/371472 [3:47:34<26:04:54, 3.45it/s] {'loss': 4.0993, 'learning_rate': 8.853886099678941e-07, 'epoch': 2.05} + 13%|█▎ | 47480/371472 [3:47:34<26:04:54, 3.45it/s] 13%|█▎ | 47481/371472 [3:47:35<25:21:51, 3.55it/s] 13%|█▎ | 47482/371472 [3:47:35<24:14:24, 3.71it/s] 13%|█▎ | 47483/371472 [3:47:35<23:38:40, 3.81it/s] 13%|█▎ | 47484/371472 [3:47:35<24:22:50, 3.69it/s] 13%|█▎ | 47485/371472 [3:47:36<25:49:38, 3.48it/s] 13%|█▎ | 47486/371472 [3:47:36<25:42:05, 3.50it/s] 13%|█▎ | 47487/371472 [3:47:36<26:24:54, 3.41it/s] 13%|█▎ | 47488/371472 [3:47:36<25:05:30, 3.59it/s] 13%|█▎ | 47489/371472 [3:47:37<24:51:12, 3.62it/s] 13%|█▎ | 47490/371472 [3:47:37<24:53:44, 3.61it/s] 13%|█▎ | 47491/371472 [3:47:37<23:52:42, 3.77it/s] 13%|█▎ | 47492/371472 [3:47:38<24:20:01, 3.70it/s] 13%|█▎ | 47493/371472 [3:47:38<23:29:21, 3.83it/s] 13%|█▎ | 47494/371472 [3:47:38<23:18:18, 3.86it/s] 13%|█▎ | 47495/371472 [3:47:38<24:50:10, 3.62it/s] 13%|█▎ | 47496/371472 [3:47:39<25:25:22, 3.54it/s] 13%|█▎ | 47497/371472 [3:47:39<25:28:01, 3.53it/s] 13%|█▎ | 47498/371472 [3:47:39<24:53:31, 3.62it/s] 13%|█▎ | 47499/371472 [3:47:39<24:20:44, 3.70it/s] 13%|█▎ | 47500/371472 [3:47:40<24:35:52, 3.66it/s] {'loss': 4.3092, 'learning_rate': 8.853401279924152e-07, 'epoch': 2.05} + 13%|█▎ | 47500/371472 [3:47:40<24:35:52, 3.66it/s] 13%|█▎ | 47501/371472 [3:47:40<25:17:39, 3.56it/s] 13%|█▎ | 47502/371472 [3:47:40<25:41:57, 3.50it/s] 13%|█▎ | 47503/371472 [3:47:41<25:12:25, 3.57it/s] 13%|█▎ | 47504/371472 [3:47:41<24:46:16, 3.63it/s] 13%|█▎ | 47505/371472 [3:47:41<25:24:43, 3.54it/s] 13%|█▎ | 47506/371472 [3:47:41<26:47:34, 3.36it/s] 13%|█▎ | 47507/371472 [3:47:42<27:47:32, 3.24it/s] 13%|█▎ | 47508/371472 [3:47:42<27:49:19, 3.23it/s] 13%|█▎ | 47509/371472 [3:47:42<26:19:32, 3.42it/s] 13%|█▎ | 47510/371472 [3:47:43<26:28:57, 3.40it/s] 13%|█▎ | 47511/371472 [3:47:43<25:15:05, 3.56it/s] 13%|█▎ | 47512/371472 [3:47:43<25:44:02, 3.50it/s] 13%|█▎ | 47513/371472 [3:47:44<26:10:25, 3.44it/s] 13%|█▎ | 47514/371472 [3:47:44<27:03:27, 3.33it/s] 13%|█▎ | 47515/371472 [3:47:44<25:48:47, 3.49it/s] 13%|█▎ | 47516/371472 [3:47:44<25:50:44, 3.48it/s] 13%|█▎ | 47517/371472 [3:47:45<24:23:55, 3.69it/s] 13%|█▎ | 47518/371472 [3:47:45<23:50:47, 3.77it/s] 13%|█▎ | 47519/371472 [3:47:45<23:15:58, 3.87it/s] 13%|█▎ | 47520/371472 [3:47:45<23:16:03, 3.87it/s] {'loss': 4.1731, 'learning_rate': 8.852916460169363e-07, 'epoch': 2.05} + 13%|█▎ | 47520/371472 [3:47:45<23:16:03, 3.87it/s] 13%|█▎ | 47521/371472 [3:47:46<22:54:12, 3.93it/s] 13%|█▎ | 47522/371472 [3:47:46<23:02:21, 3.91it/s] 13%|█▎ | 47523/371472 [3:47:46<22:35:59, 3.98it/s] 13%|█▎ | 47524/371472 [3:47:46<22:11:06, 4.06it/s] 13%|█▎ | 47525/371472 [3:47:47<24:06:38, 3.73it/s] 13%|█▎ | 47526/371472 [3:47:47<23:37:29, 3.81it/s] 13%|█▎ | 47527/371472 [3:47:47<24:11:40, 3.72it/s] 13%|█▎ | 47528/371472 [3:47:48<26:46:17, 3.36it/s] 13%|█▎ | 47529/371472 [3:47:48<26:00:22, 3.46it/s] 13%|█▎ | 47530/371472 [3:47:48<24:45:29, 3.63it/s] 13%|█▎ | 47531/371472 [3:47:48<24:28:06, 3.68it/s] 13%|█▎ | 47532/371472 [3:47:49<24:51:34, 3.62it/s] 13%|█▎ | 47533/371472 [3:47:49<24:51:36, 3.62it/s] 13%|█▎ | 47534/371472 [3:47:49<24:00:21, 3.75it/s] 13%|█▎ | 47535/371472 [3:47:49<24:41:26, 3.64it/s] 13%|█▎ | 47536/371472 [3:47:50<24:54:57, 3.61it/s] 13%|█▎ | 47537/371472 [3:47:50<24:26:08, 3.68it/s] 13%|█▎ | 47538/371472 [3:47:50<24:13:33, 3.71it/s] 13%|█▎ | 47539/371472 [3:47:51<24:25:47, 3.68it/s] 13%|█▎ | 47540/371472 [3:47:51<23:50:33, 3.77it/s] {'loss': 4.498, 'learning_rate': 8.852431640414574e-07, 'epoch': 2.05} + 13%|█▎ | 47540/371472 [3:47:51<23:50:33, 3.77it/s] 13%|█▎ | 47541/371472 [3:47:51<24:28:03, 3.68it/s] 13%|█▎ | 47542/371472 [3:47:51<26:31:47, 3.39it/s] 13%|█▎ | 47543/371472 [3:47:52<25:16:54, 3.56it/s] 13%|█▎ | 47544/371472 [3:47:52<26:18:25, 3.42it/s] 13%|█▎ | 47545/371472 [3:47:52<25:24:18, 3.54it/s] 13%|█▎ | 47546/371472 [3:47:52<24:08:11, 3.73it/s] 13%|█▎ | 47547/371472 [3:47:53<24:33:07, 3.66it/s] 13%|█▎ | 47548/371472 [3:47:53<24:26:01, 3.68it/s] 13%|█▎ | 47549/371472 [3:47:53<23:43:29, 3.79it/s] 13%|█▎ | 47550/371472 [3:47:54<24:29:59, 3.67it/s] 13%|█▎ | 47551/371472 [3:47:54<23:36:54, 3.81it/s] 13%|█▎ | 47552/371472 [3:47:54<23:33:17, 3.82it/s] 13%|█▎ | 47553/371472 [3:47:54<25:25:22, 3.54it/s] 13%|█▎ | 47554/371472 [3:47:55<25:29:11, 3.53it/s] 13%|█▎ | 47555/371472 [3:47:55<25:19:12, 3.55it/s] 13%|█▎ | 47556/371472 [3:47:55<25:03:58, 3.59it/s] 13%|█▎ | 47557/371472 [3:47:56<25:00:11, 3.60it/s] 13%|█▎ | 47558/371472 [3:47:56<24:50:41, 3.62it/s] 13%|█▎ | 47559/371472 [3:47:56<23:54:18, 3.76it/s] 13%|█▎ | 47560/371472 [3:47:56<23:56:02, 3.76it/s] {'loss': 4.301, 'learning_rate': 8.851946820659786e-07, 'epoch': 2.05} + 13%|█▎ | 47560/371472 [3:47:56<23:56:02, 3.76it/s] 13%|█▎ | 47561/371472 [3:47:57<24:13:58, 3.71it/s] 13%|█▎ | 47562/371472 [3:47:57<24:03:35, 3.74it/s] 13%|█▎ | 47563/371472 [3:47:57<23:40:56, 3.80it/s] 13%|█▎ | 47564/371472 [3:47:57<24:27:51, 3.68it/s] 13%|█▎ | 47565/371472 [3:47:58<24:31:34, 3.67it/s] 13%|█▎ | 47566/371472 [3:47:58<24:12:47, 3.72it/s] 13%|█▎ | 47567/371472 [3:47:58<26:02:35, 3.45it/s] 13%|█▎ | 47568/371472 [3:47:59<25:17:15, 3.56it/s] 13%|█▎ | 47569/371472 [3:47:59<24:27:42, 3.68it/s] 13%|█▎ | 47570/371472 [3:47:59<24:30:07, 3.67it/s] 13%|█▎ | 47571/371472 [3:47:59<25:01:51, 3.59it/s] 13%|█▎ | 47572/371472 [3:48:00<25:28:46, 3.53it/s] 13%|█▎ | 47573/371472 [3:48:00<25:03:54, 3.59it/s] 13%|█▎ | 47574/371472 [3:48:00<24:04:16, 3.74it/s] 13%|█▎ | 47575/371472 [3:48:00<24:09:46, 3.72it/s] 13%|█▎ | 47576/371472 [3:48:01<25:47:23, 3.49it/s] 13%|█▎ | 47577/371472 [3:48:01<24:49:59, 3.62it/s] 13%|█▎ | 47578/371472 [3:48:01<24:19:23, 3.70it/s] 13%|█▎ | 47579/371472 [3:48:02<24:20:44, 3.70it/s] 13%|█▎ | 47580/371472 [3:48:02<24:42:15, 3.64it/s] {'loss': 4.0986, 'learning_rate': 8.851462000904997e-07, 'epoch': 2.05} + 13%|█▎ | 47580/371472 [3:48:02<24:42:15, 3.64it/s] 13%|█▎ | 47581/371472 [3:48:02<24:48:24, 3.63it/s] 13%|█▎ | 47582/371472 [3:48:02<25:50:19, 3.48it/s] 13%|█▎ | 47583/371472 [3:48:03<25:54:32, 3.47it/s] 13%|█▎ | 47584/371472 [3:48:03<25:21:36, 3.55it/s] 13%|█▎ | 47585/371472 [3:48:03<24:54:09, 3.61it/s] 13%|█▎ | 47586/371472 [3:48:03<24:10:04, 3.72it/s] 13%|█▎ | 47587/371472 [3:48:04<25:47:28, 3.49it/s] 13%|█▎ | 47588/371472 [3:48:04<24:58:52, 3.60it/s] 13%|█▎ | 47589/371472 [3:48:04<24:38:21, 3.65it/s] 13%|█▎ | 47590/371472 [3:48:05<24:46:00, 3.63it/s] 13%|█▎ | 47591/371472 [3:48:05<26:30:30, 3.39it/s] 13%|█▎ | 47592/371472 [3:48:05<25:17:31, 3.56it/s] 13%|█▎ | 47593/371472 [3:48:05<26:24:26, 3.41it/s] 13%|█▎ | 47594/371472 [3:48:06<26:15:43, 3.43it/s] 13%|█▎ | 47595/371472 [3:48:06<26:51:35, 3.35it/s] 13%|█▎ | 47596/371472 [3:48:06<27:00:00, 3.33it/s] 13%|█▎ | 47597/371472 [3:48:07<26:02:03, 3.46it/s] 13%|█▎ | 47598/371472 [3:48:07<26:09:11, 3.44it/s] 13%|█▎ | 47599/371472 [3:48:07<25:10:41, 3.57it/s] 13%|█▎ | 47600/371472 [3:48:07<24:58:10, 3.60it/s] {'loss': 4.3623, 'learning_rate': 8.850977181150206e-07, 'epoch': 2.05} + 13%|█▎ | 47600/371472 [3:48:07<24:58:10, 3.60it/s] 13%|█▎ | 47601/371472 [3:48:08<24:47:07, 3.63it/s] 13%|█▎ | 47602/371472 [3:48:08<24:09:30, 3.72it/s] 13%|█▎ | 47603/371472 [3:48:08<25:58:33, 3.46it/s] 13%|█▎ | 47604/371472 [3:48:09<25:18:36, 3.55it/s] 13%|█▎ | 47605/371472 [3:48:09<24:37:09, 3.65it/s] 13%|█▎ | 47606/371472 [3:48:09<24:39:41, 3.65it/s] 13%|█▎ | 47607/371472 [3:48:09<25:49:09, 3.48it/s] 13%|█▎ | 47608/371472 [3:48:10<26:21:37, 3.41it/s] 13%|█▎ | 47609/371472 [3:48:10<26:02:23, 3.45it/s] 13%|█▎ | 47610/371472 [3:48:10<24:50:09, 3.62it/s] 13%|█▎ | 47611/371472 [3:48:11<26:48:08, 3.36it/s] 13%|█▎ | 47612/371472 [3:48:11<27:23:48, 3.28it/s] 13%|█▎ | 47613/371472 [3:48:11<26:32:37, 3.39it/s] 13%|█▎ | 47614/371472 [3:48:12<26:01:48, 3.46it/s] 13%|█▎ | 47615/371472 [3:48:12<25:50:37, 3.48it/s] 13%|█▎ | 47616/371472 [3:48:12<25:34:50, 3.52it/s] 13%|█▎ | 47617/371472 [3:48:12<24:50:11, 3.62it/s] 13%|█▎ | 47618/371472 [3:48:13<23:55:10, 3.76it/s] 13%|█▎ | 47619/371472 [3:48:13<25:04:15, 3.59it/s] 13%|█▎ | 47620/371472 [3:48:13<24:00:32, 3.75it/s] {'loss': 4.186, 'learning_rate': 8.850492361395418e-07, 'epoch': 2.05} + 13%|█▎ | 47620/371472 [3:48:13<24:00:32, 3.75it/s] 13%|█▎ | 47621/371472 [3:48:13<24:02:53, 3.74it/s] 13%|█▎ | 47622/371472 [3:48:14<24:07:53, 3.73it/s] 13%|█▎ | 47623/371472 [3:48:14<25:21:56, 3.55it/s] 13%|█▎ | 47624/371472 [3:48:14<26:42:47, 3.37it/s] 13%|█▎ | 47625/371472 [3:48:15<25:34:56, 3.52it/s] 13%|█▎ | 47626/371472 [3:48:15<26:10:48, 3.44it/s] 13%|█▎ | 47627/371472 [3:48:15<25:43:38, 3.50it/s] 13%|█▎ | 47628/371472 [3:48:15<25:14:54, 3.56it/s] 13%|█▎ | 47629/371472 [3:48:16<24:43:09, 3.64it/s] 13%|█▎ | 47630/371472 [3:48:16<24:30:26, 3.67it/s] 13%|█▎ | 47631/371472 [3:48:16<24:18:35, 3.70it/s] 13%|█▎ | 47632/371472 [3:48:16<24:31:31, 3.67it/s] 13%|█▎ | 47633/371472 [3:48:17<25:34:32, 3.52it/s] 13%|█▎ | 47634/371472 [3:48:17<25:12:52, 3.57it/s] 13%|█▎ | 47635/371472 [3:48:17<25:44:21, 3.49it/s] 13%|█▎ | 47636/371472 [3:48:18<25:49:14, 3.48it/s] 13%|█▎ | 47637/371472 [3:48:18<25:20:55, 3.55it/s] 13%|█▎ | 47638/371472 [3:48:18<25:16:11, 3.56it/s] 13%|█▎ | 47639/371472 [3:48:18<24:48:09, 3.63it/s] 13%|█▎ | 47640/371472 [3:48:19<24:56:47, 3.61it/s] {'loss': 4.2125, 'learning_rate': 8.85000754164063e-07, 'epoch': 2.05} + 13%|█▎ | 47640/371472 [3:48:19<24:56:47, 3.61it/s] 13%|█▎ | 47641/371472 [3:48:19<25:42:59, 3.50it/s] 13%|█▎ | 47642/371472 [3:48:19<25:05:57, 3.58it/s] 13%|█▎ | 47643/371472 [3:48:20<25:00:10, 3.60it/s] 13%|█▎ | 47644/371472 [3:48:20<24:26:25, 3.68it/s] 13%|█▎ | 47645/371472 [3:48:20<23:52:12, 3.77it/s] 13%|█▎ | 47646/371472 [3:48:20<25:17:14, 3.56it/s] 13%|█▎ | 47647/371472 [3:48:21<25:32:39, 3.52it/s] 13%|█▎ | 47648/371472 [3:48:21<25:48:53, 3.48it/s] 13%|█▎ | 47649/371472 [3:48:21<26:05:52, 3.45it/s] 13%|█▎ | 47650/371472 [3:48:22<26:18:19, 3.42it/s] 13%|█▎ | 47651/371472 [3:48:22<25:16:46, 3.56it/s] 13%|█▎ | 47652/371472 [3:48:22<25:18:40, 3.55it/s] 13%|█▎ | 47653/371472 [3:48:22<23:59:34, 3.75it/s] 13%|█▎ | 47654/371472 [3:48:23<24:05:32, 3.73it/s] 13%|█▎ | 47655/371472 [3:48:23<26:28:58, 3.40it/s] 13%|█▎ | 47656/371472 [3:48:23<24:59:39, 3.60it/s] 13%|█▎ | 47657/371472 [3:48:24<27:00:57, 3.33it/s] 13%|█▎ | 47658/371472 [3:48:24<28:05:56, 3.20it/s] 13%|█▎ | 47659/371472 [3:48:24<27:43:09, 3.24it/s] 13%|█▎ | 47660/371472 [3:48:24<26:23:16, 3.41it/s] {'loss': 4.3799, 'learning_rate': 8.849522721885841e-07, 'epoch': 2.05} + 13%|█▎ | 47660/371472 [3:48:24<26:23:16, 3.41it/s] 13%|█▎ | 47661/371472 [3:48:25<25:53:37, 3.47it/s] 13%|█▎ | 47662/371472 [3:48:25<24:41:06, 3.64it/s] 13%|█▎ | 47663/371472 [3:48:25<24:52:59, 3.61it/s] 13%|█▎ | 47664/371472 [3:48:26<24:27:19, 3.68it/s] 13%|█▎ | 47665/371472 [3:48:26<23:58:16, 3.75it/s] 13%|█▎ | 47666/371472 [3:48:26<24:20:33, 3.69it/s] 13%|█▎ | 47667/371472 [3:48:26<23:41:52, 3.80it/s] 13%|█▎ | 47668/371472 [3:48:27<23:34:51, 3.81it/s] 13%|█▎ | 47669/371472 [3:48:27<23:11:50, 3.88it/s] 13%|█▎ | 47670/371472 [3:48:27<23:46:42, 3.78it/s] 13%|█▎ | 47671/371472 [3:48:27<23:41:16, 3.80it/s] 13%|█▎ | 47672/371472 [3:48:28<24:07:55, 3.73it/s] 13%|█▎ | 47673/371472 [3:48:28<24:54:30, 3.61it/s] 13%|█▎ | 47674/371472 [3:48:28<24:39:44, 3.65it/s] 13%|█▎ | 47675/371472 [3:48:28<24:13:52, 3.71it/s] 13%|█▎ | 47676/371472 [3:48:29<24:41:03, 3.64it/s] 13%|█▎ | 47677/371472 [3:48:29<24:50:58, 3.62it/s] 13%|█▎ | 47678/371472 [3:48:29<24:14:52, 3.71it/s] 13%|█▎ | 47679/371472 [3:48:30<25:35:43, 3.51it/s] 13%|█▎ | 47680/371472 [3:48:30<28:10:04, 3.19it/s] {'loss': 4.2715, 'learning_rate': 8.849037902131052e-07, 'epoch': 2.05} + 13%|█▎ | 47680/371472 [3:48:30<28:10:04, 3.19it/s] 13%|█▎ | 47681/371472 [3:48:30<26:38:32, 3.38it/s] 13%|█▎ | 47682/371472 [3:48:31<28:00:22, 3.21it/s] 13%|█▎ | 47683/371472 [3:48:31<27:01:09, 3.33it/s] 13%|█▎ | 47684/371472 [3:48:31<27:57:49, 3.22it/s] 13%|█▎ | 47685/371472 [3:48:31<26:14:29, 3.43it/s] 13%|█▎ | 47686/371472 [3:48:32<25:10:21, 3.57it/s] 13%|█▎ | 47687/371472 [3:48:32<27:13:57, 3.30it/s] 13%|█▎ | 47688/371472 [3:48:32<25:16:17, 3.56it/s] 13%|█▎ | 47689/371472 [3:48:33<26:24:05, 3.41it/s] 13%|█▎ | 47690/371472 [3:48:33<25:08:43, 3.58it/s] 13%|█▎ | 47691/371472 [3:48:33<24:53:58, 3.61it/s] 13%|█▎ | 47692/371472 [3:48:33<24:18:02, 3.70it/s] 13%|█▎ | 47693/371472 [3:48:34<24:36:18, 3.66it/s] 13%|█▎ | 47694/371472 [3:48:34<23:38:55, 3.80it/s] 13%|█▎ | 47695/371472 [3:48:34<23:35:20, 3.81it/s] 13%|█▎ | 47696/371472 [3:48:34<23:28:42, 3.83it/s] 13%|█▎ | 47697/371472 [3:48:35<25:51:20, 3.48it/s] 13%|█▎ | 47698/371472 [3:48:35<27:35:46, 3.26it/s] 13%|█▎ | 47699/371472 [3:48:35<25:55:07, 3.47it/s] 13%|█▎ | 47700/371472 [3:48:36<24:49:19, 3.62it/s] {'loss': 4.3085, 'learning_rate': 8.848553082376264e-07, 'epoch': 2.05} + 13%|█▎ | 47700/371472 [3:48:36<24:49:19, 3.62it/s] 13%|█▎ | 47701/371472 [3:48:36<24:34:00, 3.66it/s] 13%|█▎ | 47702/371472 [3:48:36<23:25:36, 3.84it/s] 13%|█▎ | 47703/371472 [3:48:36<24:47:32, 3.63it/s] 13%|█▎ | 47704/371472 [3:48:37<24:56:08, 3.61it/s] 13%|█▎ | 47705/371472 [3:48:37<27:08:43, 3.31it/s] 13%|█▎ | 47706/371472 [3:48:37<25:57:19, 3.46it/s] 13%|█▎ | 47707/371472 [3:48:38<26:35:13, 3.38it/s] 13%|█▎ | 47708/371472 [3:48:38<26:47:04, 3.36it/s] 13%|█▎ | 47709/371472 [3:48:38<25:50:19, 3.48it/s] 13%|█▎ | 47710/371472 [3:48:38<25:50:36, 3.48it/s] 13%|█▎ | 47711/371472 [3:48:39<25:18:14, 3.55it/s] 13%|█▎ | 47712/371472 [3:48:39<24:24:48, 3.68it/s] 13%|█▎ | 47713/371472 [3:48:39<24:27:21, 3.68it/s] 13%|█▎ | 47714/371472 [3:48:40<24:04:28, 3.74it/s] 13%|█▎ | 47715/371472 [3:48:40<24:54:34, 3.61it/s] 13%|█▎ | 47716/371472 [3:48:40<24:42:08, 3.64it/s] 13%|█▎ | 47717/371472 [3:48:40<24:21:47, 3.69it/s] 13%|█▎ | 47718/371472 [3:48:41<25:37:07, 3.51it/s] 13%|█▎ | 47719/371472 [3:48:41<26:07:52, 3.44it/s] 13%|█▎ | 47720/371472 [3:48:41<25:15:36, 3.56it/s] {'loss': 4.4178, 'learning_rate': 8.848068262621473e-07, 'epoch': 2.06} + 13%|█▎ | 47720/371472 [3:48:41<25:15:36, 3.56it/s] 13%|█▎ | 47721/371472 [3:48:42<24:49:57, 3.62it/s] 13%|█▎ | 47722/371472 [3:48:42<25:02:09, 3.59it/s] 13%|█▎ | 47723/371472 [3:48:42<25:04:55, 3.59it/s] 13%|█▎ | 47724/371472 [3:48:42<25:19:07, 3.55it/s] 13%|█▎ | 47725/371472 [3:48:43<26:58:57, 3.33it/s] 13%|█▎ | 47726/371472 [3:48:43<25:47:30, 3.49it/s] 13%|█▎ | 47727/371472 [3:48:43<24:40:17, 3.65it/s] 13%|█▎ | 47728/371472 [3:48:44<25:39:35, 3.50it/s] 13%|█▎ | 47729/371472 [3:48:44<25:02:28, 3.59it/s] 13%|█▎ | 47730/371472 [3:48:44<24:46:05, 3.63it/s] 13%|█▎ | 47731/371472 [3:48:44<24:30:33, 3.67it/s] 13%|█▎ | 47732/371472 [3:48:45<24:01:11, 3.74it/s] 13%|█▎ | 47733/371472 [3:48:45<26:47:19, 3.36it/s] 13%|█▎ | 47734/371472 [3:48:45<25:49:31, 3.48it/s] 13%|█▎ | 47735/371472 [3:48:45<24:45:04, 3.63it/s] 13%|█▎ | 47736/371472 [3:48:46<24:23:29, 3.69it/s] 13%|█▎ | 47737/371472 [3:48:46<24:20:04, 3.70it/s] 13%|█▎ | 47738/371472 [3:48:46<26:15:26, 3.42it/s] 13%|█▎ | 47739/371472 [3:48:47<25:37:58, 3.51it/s] 13%|█▎ | 47740/371472 [3:48:47<26:47:09, 3.36it/s] {'loss': 4.3978, 'learning_rate': 8.847583442866685e-07, 'epoch': 2.06} + 13%|█▎ | 47740/371472 [3:48:47<26:47:09, 3.36it/s] 13%|█▎ | 47741/371472 [3:48:47<25:48:11, 3.49it/s] 13%|█▎ | 47742/371472 [3:48:47<24:57:09, 3.60it/s] 13%|█▎ | 47743/371472 [3:48:48<25:21:49, 3.55it/s] 13%|█▎ | 47744/371472 [3:48:48<24:17:26, 3.70it/s] 13%|█▎ | 47745/371472 [3:48:48<23:40:02, 3.80it/s] 13%|█▎ | 47746/371472 [3:48:48<23:33:07, 3.82it/s] 13%|█▎ | 47747/371472 [3:48:49<25:47:28, 3.49it/s] 13%|█▎ | 47748/371472 [3:48:49<25:06:29, 3.58it/s] 13%|█▎ | 47749/371472 [3:48:49<25:20:44, 3.55it/s] 13%|█▎ | 47750/371472 [3:48:50<23:53:47, 3.76it/s] 13%|█▎ | 47751/371472 [3:48:50<26:17:26, 3.42it/s] 13%|█▎ | 47752/371472 [3:48:50<26:18:39, 3.42it/s] 13%|█▎ | 47753/371472 [3:48:51<25:53:57, 3.47it/s] 13%|█▎ | 47754/371472 [3:48:51<25:36:42, 3.51it/s] 13%|█▎ | 47755/371472 [3:48:51<25:27:28, 3.53it/s] 13%|█▎ | 47756/371472 [3:48:51<24:26:13, 3.68it/s] 13%|█▎ | 47757/371472 [3:48:52<25:37:40, 3.51it/s] 13%|█▎ | 47758/371472 [3:48:52<26:35:05, 3.38it/s] 13%|█▎ | 47759/371472 [3:48:52<25:46:35, 3.49it/s] 13%|█▎ | 47760/371472 [3:48:52<24:42:03, 3.64it/s] {'loss': 4.2653, 'learning_rate': 8.847098623111896e-07, 'epoch': 2.06} + 13%|█▎ | 47760/371472 [3:48:52<24:42:03, 3.64it/s] 13%|█▎ | 47761/371472 [3:48:53<25:02:44, 3.59it/s] 13%|█▎ | 47762/371472 [3:48:53<25:06:53, 3.58it/s] 13%|█▎ | 47763/371472 [3:48:53<25:40:08, 3.50it/s] 13%|█▎ | 47764/371472 [3:48:54<25:32:32, 3.52it/s] 13%|█▎ | 47765/371472 [3:48:54<25:03:40, 3.59it/s] 13%|█▎ | 47766/371472 [3:48:54<25:04:49, 3.59it/s] 13%|█▎ | 47767/371472 [3:48:55<26:51:55, 3.35it/s] 13%|█▎ | 47768/371472 [3:48:55<25:55:50, 3.47it/s] 13%|█▎ | 47769/371472 [3:48:55<27:49:13, 3.23it/s] 13%|█▎ | 47770/371472 [3:48:55<26:37:24, 3.38it/s] 13%|█▎ | 47771/371472 [3:48:56<26:22:20, 3.41it/s] 13%|█▎ | 47772/371472 [3:48:56<25:07:30, 3.58it/s] 13%|█▎ | 47773/371472 [3:48:56<24:59:31, 3.60it/s] 13%|█▎ | 47774/371472 [3:48:56<24:27:20, 3.68it/s] 13%|█▎ | 47775/371472 [3:48:57<24:04:03, 3.74it/s] 13%|█▎ | 47776/371472 [3:48:57<24:10:09, 3.72it/s] 13%|█▎ | 47777/371472 [3:48:57<23:52:54, 3.77it/s] 13%|█▎ | 47778/371472 [3:48:58<23:49:44, 3.77it/s] 13%|█▎ | 47779/371472 [3:48:58<23:23:56, 3.84it/s] 13%|█▎ | 47780/371472 [3:48:58<23:51:22, 3.77it/s] {'loss': 4.3894, 'learning_rate': 8.846613803357107e-07, 'epoch': 2.06} + 13%|█▎ | 47780/371472 [3:48:58<23:51:22, 3.77it/s] 13%|█▎ | 47781/371472 [3:48:58<24:08:05, 3.73it/s] 13%|█▎ | 47782/371472 [3:48:59<23:34:43, 3.81it/s] 13%|█▎ | 47783/371472 [3:48:59<25:27:16, 3.53it/s] 13%|█▎ | 47784/371472 [3:48:59<25:19:41, 3.55it/s] 13%|█▎ | 47785/371472 [3:48:59<25:38:29, 3.51it/s] 13%|█▎ | 47786/371472 [3:49:00<24:43:57, 3.64it/s] 13%|█▎ | 47787/371472 [3:49:00<24:46:48, 3.63it/s] 13%|█▎ | 47788/371472 [3:49:00<24:18:50, 3.70it/s] 13%|█▎ | 47789/371472 [3:49:01<25:15:25, 3.56it/s] 13%|█▎ | 47790/371472 [3:49:01<25:00:51, 3.59it/s] 13%|█▎ | 47791/371472 [3:49:01<25:45:20, 3.49it/s] 13%|█▎ | 47792/371472 [3:49:02<30:14:13, 2.97it/s] 13%|█▎ | 47793/371472 [3:49:02<28:24:45, 3.16it/s] 13%|█▎ | 47794/371472 [3:49:02<29:29:23, 3.05it/s] 13%|█▎ | 47795/371472 [3:49:02<27:41:18, 3.25it/s] 13%|█▎ | 47796/371472 [3:49:03<27:19:03, 3.29it/s] 13%|█▎ | 47797/371472 [3:49:03<27:13:55, 3.30it/s] 13%|█▎ | 47798/371472 [3:49:03<26:51:13, 3.35it/s] 13%|█▎ | 47799/371472 [3:49:04<26:34:09, 3.38it/s] 13%|█▎ | 47800/371472 [3:49:04<27:10:11, 3.31it/s] {'loss': 4.1731, 'learning_rate': 8.846128983602318e-07, 'epoch': 2.06} + 13%|█▎ | 47800/371472 [3:49:04<27:10:11, 3.31it/s] 13%|█▎ | 47801/371472 [3:49:04<27:40:44, 3.25it/s] 13%|█▎ | 47802/371472 [3:49:05<26:18:10, 3.42it/s] 13%|█▎ | 47803/371472 [3:49:05<25:43:19, 3.50it/s] 13%|█▎ | 47804/371472 [3:49:05<26:01:59, 3.45it/s] 13%|█▎ | 47805/371472 [3:49:05<25:21:50, 3.54it/s] 13%|█▎ | 47806/371472 [3:49:06<24:39:32, 3.65it/s] 13%|█▎ | 47807/371472 [3:49:06<24:48:59, 3.62it/s] 13%|█▎ | 47808/371472 [3:49:06<23:53:08, 3.76it/s] 13%|█▎ | 47809/371472 [3:49:06<23:07:51, 3.89it/s] 13%|█▎ | 47810/371472 [3:49:07<24:32:36, 3.66it/s] 13%|█▎ | 47811/371472 [3:49:07<24:05:50, 3.73it/s] 13%|█▎ | 47812/371472 [3:49:07<25:06:20, 3.58it/s] 13%|█▎ | 47813/371472 [3:49:08<25:07:20, 3.58it/s] 13%|█▎ | 47814/371472 [3:49:08<25:04:32, 3.59it/s] 13%|█▎ | 47815/371472 [3:49:08<24:59:18, 3.60it/s] 13%|█▎ | 47816/371472 [3:49:08<25:45:06, 3.49it/s] 13%|█▎ | 47817/371472 [3:49:09<24:53:10, 3.61it/s] 13%|█▎ | 47818/371472 [3:49:09<23:42:59, 3.79it/s] 13%|█▎ | 47819/371472 [3:49:09<23:55:08, 3.76it/s] 13%|█▎ | 47820/371472 [3:49:09<25:07:19, 3.58it/s] {'loss': 4.2996, 'learning_rate': 8.845644163847529e-07, 'epoch': 2.06} + 13%|█▎ | 47820/371472 [3:49:09<25:07:19, 3.58it/s] 13%|█▎ | 47821/371472 [3:49:10<25:50:05, 3.48it/s] 13%|█▎ | 47822/371472 [3:49:10<25:19:05, 3.55it/s] 13%|█▎ | 47823/371472 [3:49:10<27:37:50, 3.25it/s] 13%|█▎ | 47824/371472 [3:49:11<26:18:09, 3.42it/s] 13%|█▎ | 47825/371472 [3:49:11<26:02:19, 3.45it/s] 13%|█▎ | 47826/371472 [3:49:11<24:46:45, 3.63it/s] 13%|█▎ | 47827/371472 [3:49:11<24:44:31, 3.63it/s] 13%|█▎ | 47828/371472 [3:49:12<24:36:07, 3.65it/s] 13%|█▎ | 47829/371472 [3:49:12<25:14:20, 3.56it/s] 13%|█▎ | 47830/371472 [3:49:12<24:53:08, 3.61it/s] 13%|█▎ | 47831/371472 [3:49:13<25:15:13, 3.56it/s] 13%|█▎ | 47832/371472 [3:49:13<26:38:46, 3.37it/s] 13%|█▎ | 47833/371472 [3:49:13<25:49:59, 3.48it/s] 13%|█▎ | 47834/371472 [3:49:13<24:50:11, 3.62it/s] 13%|█▎ | 47835/371472 [3:49:14<25:50:48, 3.48it/s] 13%|█▎ | 47836/371472 [3:49:14<25:55:49, 3.47it/s] 13%|█▎ | 47837/371472 [3:49:14<26:27:19, 3.40it/s] 13%|█▎ | 47838/371472 [3:49:15<25:43:36, 3.49it/s] 13%|█▎ | 47839/371472 [3:49:15<24:22:36, 3.69it/s] 13%|█▎ | 47840/371472 [3:49:15<25:33:27, 3.52it/s] {'loss': 4.2233, 'learning_rate': 8.845159344092741e-07, 'epoch': 2.06} + 13%|█▎ | 47840/371472 [3:49:15<25:33:27, 3.52it/s] 13%|█▎ | 47841/371472 [3:49:15<24:31:53, 3.66it/s] 13%|█▎ | 47842/371472 [3:49:16<23:47:50, 3.78it/s] 13%|█▎ | 47843/371472 [3:49:16<23:26:04, 3.84it/s] 13%|█▎ | 47844/371472 [3:49:16<23:32:44, 3.82it/s] 13%|█▎ | 47845/371472 [3:49:16<24:41:08, 3.64it/s] 13%|█▎ | 47846/371472 [3:49:17<24:54:48, 3.61it/s] 13%|█▎ | 47847/371472 [3:49:17<24:43:34, 3.64it/s] 13%|█▎ | 47848/371472 [3:49:18<32:44:52, 2.75it/s] 13%|█▎ | 47849/371472 [3:49:18<30:06:36, 2.99it/s] 13%|█▎ | 47850/371472 [3:49:18<29:15:13, 3.07it/s] 13%|█▎ | 47851/371472 [3:49:18<28:40:08, 3.14it/s] 13%|█▎ | 47852/371472 [3:49:19<29:15:57, 3.07it/s] 13%|█▎ | 47853/371472 [3:49:19<27:15:53, 3.30it/s] 13%|█▎ | 47854/371472 [3:49:19<26:12:40, 3.43it/s] 13%|█▎ | 47855/371472 [3:49:20<25:26:36, 3.53it/s] 13%|█▎ | 47856/371472 [3:49:20<24:22:02, 3.69it/s] 13%|█▎ | 47857/371472 [3:49:20<23:15:29, 3.86it/s] 13%|█▎ | 47858/371472 [3:49:20<23:17:04, 3.86it/s] 13%|█▎ | 47859/371472 [3:49:21<22:43:45, 3.95it/s] 13%|█▎ | 47860/371472 [3:49:21<23:14:17, 3.87it/s] {'loss': 4.3654, 'learning_rate': 8.844674524337951e-07, 'epoch': 2.06} + 13%|█▎ | 47860/371472 [3:49:21<23:14:17, 3.87it/s] 13%|█▎ | 47861/371472 [3:49:21<23:12:49, 3.87it/s] 13%|█▎ | 47862/371472 [3:49:21<23:42:02, 3.79it/s] 13%|█▎ | 47863/371472 [3:49:22<25:28:28, 3.53it/s] 13%|█▎ | 47864/371472 [3:49:22<25:18:00, 3.55it/s] 13%|█▎ | 47865/371472 [3:49:22<25:38:26, 3.51it/s] 13%|█▎ | 47866/371472 [3:49:23<25:03:23, 3.59it/s] 13%|█▎ | 47867/371472 [3:49:23<25:10:06, 3.57it/s] 13%|█▎ | 47868/371472 [3:49:23<24:24:36, 3.68it/s] 13%|█▎ | 47869/371472 [3:49:23<24:20:11, 3.69it/s] 13%|█▎ | 47870/371472 [3:49:24<23:52:35, 3.76it/s] 13%|█▎ | 47871/371472 [3:49:24<23:31:01, 3.82it/s] 13%|█▎ | 47872/371472 [3:49:24<24:18:02, 3.70it/s] 13%|█▎ | 47873/371472 [3:49:24<24:16:37, 3.70it/s] 13%|█▎ | 47874/371472 [3:49:25<23:55:21, 3.76it/s] 13%|█▎ | 47875/371472 [3:49:25<24:24:35, 3.68it/s] 13%|█▎ | 47876/371472 [3:49:25<23:48:07, 3.78it/s] 13%|█▎ | 47877/371472 [3:49:25<23:29:29, 3.83it/s] 13%|█▎ | 47878/371472 [3:49:26<22:53:05, 3.93it/s] 13%|█▎ | 47879/371472 [3:49:26<23:16:12, 3.86it/s] 13%|█▎ | 47880/371472 [3:49:26<23:26:57, 3.83it/s] {'loss': 4.1437, 'learning_rate': 8.844189704583162e-07, 'epoch': 2.06} + 13%|█▎ | 47880/371472 [3:49:26<23:26:57, 3.83it/s] 13%|█▎ | 47881/371472 [3:49:27<24:10:32, 3.72it/s] 13%|█▎ | 47882/371472 [3:49:27<24:06:32, 3.73it/s] 13%|█▎ | 47883/371472 [3:49:27<23:59:41, 3.75it/s] 13%|█▎ | 47884/371472 [3:49:27<24:33:32, 3.66it/s] 13%|█▎ | 47885/371472 [3:49:28<25:19:41, 3.55it/s] 13%|█▎ | 47886/371472 [3:49:28<25:09:17, 3.57it/s] 13%|█▎ | 47887/371472 [3:49:28<25:39:01, 3.50it/s] 13%|█▎ | 47888/371472 [3:49:29<26:18:28, 3.42it/s] 13%|█▎ | 47889/371472 [3:49:29<26:15:22, 3.42it/s] 13%|█▎ | 47890/371472 [3:49:29<25:31:42, 3.52it/s] 13%|█▎ | 47891/371472 [3:49:29<25:42:27, 3.50it/s] 13%|█▎ | 47892/371472 [3:49:30<25:27:43, 3.53it/s] 13%|█▎ | 47893/371472 [3:49:30<26:28:18, 3.40it/s] 13%|█▎ | 47894/371472 [3:49:30<26:40:32, 3.37it/s] 13%|█▎ | 47895/371472 [3:49:31<32:38:54, 2.75it/s] 13%|█▎ | 47896/371472 [3:49:31<30:31:06, 2.95it/s] 13%|█▎ | 47897/371472 [3:49:31<30:22:04, 2.96it/s] 13%|█▎ | 47898/371472 [3:49:32<28:05:37, 3.20it/s] 13%|█▎ | 47899/371472 [3:49:32<26:36:37, 3.38it/s] 13%|█▎ | 47900/371472 [3:49:32<28:03:36, 3.20it/s] {'loss': 4.3644, 'learning_rate': 8.843704884828373e-07, 'epoch': 2.06} + 13%|█▎ | 47900/371472 [3:49:32<28:03:36, 3.20it/s] 13%|█▎ | 47901/371472 [3:49:33<28:17:34, 3.18it/s] 13%|█▎ | 47902/371472 [3:49:33<26:55:10, 3.34it/s] 13%|█▎ | 47903/371472 [3:49:33<25:47:49, 3.48it/s] 13%|█▎ | 47904/371472 [3:49:33<25:48:45, 3.48it/s] 13%|█▎ | 47905/371472 [3:49:34<25:47:31, 3.48it/s] 13%|█▎ | 47906/371472 [3:49:34<26:24:58, 3.40it/s] 13%|█▎ | 47907/371472 [3:49:34<24:51:16, 3.62it/s] 13%|█▎ | 47908/371472 [3:49:35<25:22:33, 3.54it/s] 13%|█▎ | 47909/371472 [3:49:35<24:15:57, 3.70it/s] 13%|█▎ | 47910/371472 [3:49:35<24:23:35, 3.68it/s] 13%|█▎ | 47911/371472 [3:49:35<24:22:06, 3.69it/s] 13%|█▎ | 47912/371472 [3:49:36<24:34:55, 3.66it/s] 13%|█▎ | 47913/371472 [3:49:36<24:11:35, 3.71it/s] 13%|█▎ | 47914/371472 [3:49:36<24:17:41, 3.70it/s] 13%|█▎ | 47915/371472 [3:49:36<24:28:03, 3.67it/s] 13%|█▎ | 47916/371472 [3:49:37<27:57:47, 3.21it/s] 13%|█▎ | 47917/371472 [3:49:37<27:13:48, 3.30it/s] 13%|█▎ | 47918/371472 [3:49:37<28:43:01, 3.13it/s] 13%|█▎ | 47919/371472 [3:49:38<27:32:37, 3.26it/s] 13%|█▎ | 47920/371472 [3:49:38<27:16:27, 3.30it/s] {'loss': 4.2227, 'learning_rate': 8.843220065073585e-07, 'epoch': 2.06} + 13%|█▎ | 47920/371472 [3:49:38<27:16:27, 3.30it/s] 13%|█▎ | 47921/371472 [3:49:38<27:32:45, 3.26it/s] 13%|█▎ | 47922/371472 [3:49:39<26:50:06, 3.35it/s] 13%|█▎ | 47923/371472 [3:49:39<25:52:57, 3.47it/s] 13%|█▎ | 47924/371472 [3:49:39<25:16:41, 3.56it/s] 13%|█▎ | 47925/371472 [3:49:39<24:38:32, 3.65it/s] 13%|█▎ | 47926/371472 [3:49:40<24:59:32, 3.60it/s] 13%|█▎ | 47927/371472 [3:49:40<24:04:45, 3.73it/s] 13%|█▎ | 47928/371472 [3:49:40<25:21:45, 3.54it/s] 13%|█▎ | 47929/371472 [3:49:41<26:29:19, 3.39it/s] 13%|█▎ | 47930/371472 [3:49:41<25:34:06, 3.51it/s] 13%|█▎ | 47931/371472 [3:49:41<24:34:43, 3.66it/s] 13%|█▎ | 47932/371472 [3:49:41<23:51:24, 3.77it/s] 13%|█▎ | 47933/371472 [3:49:42<23:58:00, 3.75it/s] 13%|█▎ | 47934/371472 [3:49:42<23:39:19, 3.80it/s] 13%|█▎ | 47935/371472 [3:49:42<23:56:31, 3.75it/s] 13%|█▎ | 47936/371472 [3:49:42<23:41:14, 3.79it/s] 13%|█▎ | 47937/371472 [3:49:43<23:53:59, 3.76it/s] 13%|█▎ | 47938/371472 [3:49:43<24:39:22, 3.64it/s] 13%|█▎ | 47939/371472 [3:49:43<25:28:43, 3.53it/s] 13%|█▎ | 47940/371472 [3:49:44<26:27:12, 3.40it/s] {'loss': 4.0454, 'learning_rate': 8.842735245318796e-07, 'epoch': 2.06} + 13%|█▎ | 47940/371472 [3:49:44<26:27:12, 3.40it/s] 13%|█▎ | 47941/371472 [3:49:44<26:44:19, 3.36it/s] 13%|█▎ | 47942/371472 [3:49:44<25:38:35, 3.50it/s] 13%|█▎ | 47943/371472 [3:49:44<25:28:29, 3.53it/s] 13%|█▎ | 47944/371472 [3:49:45<24:22:11, 3.69it/s] 13%|█▎ | 47945/371472 [3:49:45<24:13:40, 3.71it/s] 13%|█▎ | 47946/371472 [3:49:45<25:44:35, 3.49it/s] 13%|█▎ | 47947/371472 [3:49:46<28:43:18, 3.13it/s] 13%|█▎ | 47948/371472 [3:49:46<28:11:02, 3.19it/s] 13%|█▎ | 47949/371472 [3:49:46<29:10:12, 3.08it/s] 13%|█▎ | 47950/371472 [3:49:47<27:23:50, 3.28it/s] 13%|█▎ | 47951/371472 [3:49:47<25:29:08, 3.53it/s] 13%|█▎ | 47952/371472 [3:49:47<24:14:38, 3.71it/s] 13%|█▎ | 47953/371472 [3:49:47<23:37:39, 3.80it/s] 13%|█▎ | 47954/371472 [3:49:48<23:22:39, 3.84it/s] 13%|█▎ | 47955/371472 [3:49:48<23:48:33, 3.77it/s] 13%|█▎ | 47956/371472 [3:49:48<24:55:15, 3.61it/s] 13%|█▎ | 47957/371472 [3:49:48<25:00:53, 3.59it/s] 13%|█▎ | 47958/371472 [3:49:49<24:12:44, 3.71it/s] 13%|█▎ | 47959/371472 [3:49:49<24:31:08, 3.67it/s] 13%|█▎ | 47960/371472 [3:49:49<23:48:48, 3.77it/s] {'loss': 4.4391, 'learning_rate': 8.842250425564007e-07, 'epoch': 2.07} + 13%|█▎ | 47960/371472 [3:49:49<23:48:48, 3.77it/s] 13%|█▎ | 47961/371472 [3:49:49<25:08:30, 3.57it/s] 13%|█▎ | 47962/371472 [3:49:50<26:15:55, 3.42it/s] 13%|█▎ | 47963/371472 [3:49:50<30:32:58, 2.94it/s] 13%|█▎ | 47964/371472 [3:49:51<29:08:13, 3.08it/s] 13%|█▎ | 47965/371472 [3:49:51<27:41:56, 3.24it/s] 13%|█▎ | 47966/371472 [3:49:51<26:34:40, 3.38it/s] 13%|█▎ | 47967/371472 [3:49:51<28:20:39, 3.17it/s] 13%|█▎ | 47968/371472 [3:49:52<26:07:35, 3.44it/s] 13%|█▎ | 47969/371472 [3:49:52<24:44:11, 3.63it/s] 13%|█▎ | 47970/371472 [3:49:52<23:43:41, 3.79it/s] 13%|█▎ | 47971/371472 [3:49:52<24:55:44, 3.60it/s] 13%|█▎ | 47972/371472 [3:49:53<25:34:39, 3.51it/s] 13%|█▎ | 47973/371472 [3:49:53<25:19:34, 3.55it/s] 13%|█▎ | 47974/371472 [3:49:53<25:38:25, 3.50it/s] 13%|█▎ | 47975/371472 [3:49:54<24:50:47, 3.62it/s] 13%|█▎ | 47976/371472 [3:49:54<24:20:26, 3.69it/s] 13%|█▎ | 47977/371472 [3:49:54<24:10:24, 3.72it/s] 13%|█▎ | 47978/371472 [3:49:54<23:18:58, 3.85it/s] 13%|█▎ | 47979/371472 [3:49:55<23:29:23, 3.83it/s] 13%|█▎ | 47980/371472 [3:49:55<25:13:13, 3.56it/s] {'loss': 4.3234, 'learning_rate': 8.841765605809217e-07, 'epoch': 2.07} + 13%|█▎ | 47980/371472 [3:49:55<25:13:13, 3.56it/s] 13%|█▎ | 47981/371472 [3:49:55<24:10:14, 3.72it/s] 13%|█▎ | 47982/371472 [3:49:55<24:28:58, 3.67it/s] 13%|█▎ | 47983/371472 [3:49:56<23:35:43, 3.81it/s] 13%|█▎ | 47984/371472 [3:49:56<23:24:56, 3.84it/s] 13%|█▎ | 47985/371472 [3:49:56<24:19:53, 3.69it/s] 13%|█▎ | 47986/371472 [3:49:56<23:43:33, 3.79it/s] 13%|█▎ | 47987/371472 [3:49:57<26:35:53, 3.38it/s] 13%|█▎ | 47988/371472 [3:49:57<25:24:27, 3.54it/s] 13%|█▎ | 47989/371472 [3:49:57<24:43:55, 3.63it/s] 13%|█▎ | 47990/371472 [3:49:58<25:57:24, 3.46it/s] 13%|█▎ | 47991/371472 [3:49:58<25:48:50, 3.48it/s] 13%|█▎ | 47992/371472 [3:49:58<26:14:58, 3.42it/s] 13%|█▎ | 47993/371472 [3:49:59<25:51:31, 3.47it/s] 13%|█▎ | 47994/371472 [3:49:59<28:24:24, 3.16it/s] 13%|█▎ | 47995/371472 [3:49:59<26:33:31, 3.38it/s] 13%|█▎ | 47996/371472 [3:49:59<26:18:45, 3.41it/s] 13%|█▎ | 47997/371472 [3:50:00<28:30:05, 3.15it/s] 13%|█▎ | 47998/371472 [3:50:00<27:29:36, 3.27it/s] 13%|█▎ | 47999/371472 [3:50:00<25:58:10, 3.46it/s] 13%|█▎ | 48000/371472 [3:50:01<26:18:14, 3.42it/s] {'loss': 4.2496, 'learning_rate': 8.841280786054429e-07, 'epoch': 2.07} + 13%|█▎ | 48000/371472 [3:50:01<26:18:14, 3.42it/s] 13%|█▎ | 48001/371472 [3:50:01<25:12:56, 3.56it/s] 13%|█▎ | 48002/371472 [3:50:01<25:27:51, 3.53it/s] 13%|█▎ | 48003/371472 [3:50:02<25:50:25, 3.48it/s] 13%|█▎ | 48004/371472 [3:50:02<27:40:15, 3.25it/s] 13%|█▎ | 48005/371472 [3:50:02<26:07:39, 3.44it/s] 13%|█▎ | 48006/371472 [3:50:02<28:20:47, 3.17it/s] 13%|█▎ | 48007/371472 [3:50:03<29:10:41, 3.08it/s] 13%|█▎ | 48008/371472 [3:50:03<27:53:05, 3.22it/s] 13%|█▎ | 48009/371472 [3:50:03<26:14:04, 3.42it/s] 13%|█▎ | 48010/371472 [3:50:04<25:34:23, 3.51it/s] 13%|█▎ | 48011/371472 [3:50:04<24:36:55, 3.65it/s] 13%|█▎ | 48012/371472 [3:50:04<23:58:25, 3.75it/s] 13%|█▎ | 48013/371472 [3:50:04<23:11:21, 3.87it/s] 13%|█▎ | 48014/371472 [3:50:05<22:37:46, 3.97it/s] 13%|█▎ | 48015/371472 [3:50:05<22:49:30, 3.94it/s] 13%|█▎ | 48016/371472 [3:50:05<23:54:19, 3.76it/s] 13%|█▎ | 48017/371472 [3:50:05<24:15:32, 3.70it/s] 13%|█▎ | 48018/371472 [3:50:06<26:35:56, 3.38it/s] 13%|█▎ | 48019/371472 [3:50:06<25:25:40, 3.53it/s] 13%|█▎ | 48020/371472 [3:50:06<24:09:19, 3.72it/s] {'loss': 4.166, 'learning_rate': 8.84079596629964e-07, 'epoch': 2.07} + 13%|█▎ | 48020/371472 [3:50:06<24:09:19, 3.72it/s] 13%|█▎ | 48021/371472 [3:50:07<24:15:49, 3.70it/s] 13%|█▎ | 48022/371472 [3:50:07<23:41:17, 3.79it/s] 13%|█▎ | 48023/371472 [3:50:07<23:30:57, 3.82it/s] 13%|█▎ | 48024/371472 [3:50:07<23:52:43, 3.76it/s] 13%|█▎ | 48025/371472 [3:50:08<26:54:37, 3.34it/s] 13%|█▎ | 48026/371472 [3:50:08<25:35:19, 3.51it/s] 13%|█▎ | 48027/371472 [3:50:08<27:20:58, 3.29it/s] 13%|█▎ | 48028/371472 [3:50:09<26:03:07, 3.45it/s] 13%|█▎ | 48029/371472 [3:50:09<26:49:01, 3.35it/s] 13%|█▎ | 48030/371472 [3:50:09<27:27:36, 3.27it/s] 13%|█▎ | 48031/371472 [3:50:09<26:25:10, 3.40it/s] 13%|█▎ | 48032/371472 [3:50:10<27:08:52, 3.31it/s] 13%|█▎ | 48033/371472 [3:50:10<26:48:52, 3.35it/s] 13%|█▎ | 48034/371472 [3:50:10<25:52:23, 3.47it/s] 13%|█▎ | 48035/371472 [3:50:11<25:08:47, 3.57it/s] 13%|█▎ | 48036/371472 [3:50:11<25:12:53, 3.56it/s] 13%|█▎ | 48037/371472 [3:50:11<24:19:49, 3.69it/s] 13%|█▎ | 48038/371472 [3:50:11<24:44:23, 3.63it/s] 13%|█▎ | 48039/371472 [3:50:12<24:44:48, 3.63it/s] 13%|█▎ | 48040/371472 [3:50:12<25:44:25, 3.49it/s] {'loss': 4.2374, 'learning_rate': 8.840311146544851e-07, 'epoch': 2.07} + 13%|█▎ | 48040/371472 [3:50:12<25:44:25, 3.49it/s] 13%|█▎ | 48041/371472 [3:50:12<25:07:57, 3.57it/s] 13%|█▎ | 48042/371472 [3:50:13<24:28:37, 3.67it/s] 13%|█▎ | 48043/371472 [3:50:13<24:23:31, 3.68it/s] 13%|█▎ | 48044/371472 [3:50:13<24:35:27, 3.65it/s] 13%|█▎ | 48045/371472 [3:50:13<24:49:06, 3.62it/s] 13%|█▎ | 48046/371472 [3:50:14<25:58:38, 3.46it/s] 13%|█▎ | 48047/371472 [3:50:14<26:13:06, 3.43it/s] 13%|█▎ | 48048/371472 [3:50:14<26:30:13, 3.39it/s] 13%|█▎ | 48049/371472 [3:50:15<25:11:34, 3.57it/s] 13%|█▎ | 48050/371472 [3:50:15<24:36:33, 3.65it/s] 13%|█▎ | 48051/371472 [3:50:15<24:15:41, 3.70it/s] 13%|█▎ | 48052/371472 [3:50:15<26:25:13, 3.40it/s] 13%|█▎ | 48053/371472 [3:50:16<25:36:37, 3.51it/s] 13%|█▎ | 48054/371472 [3:50:16<25:39:14, 3.50it/s] 13%|█▎ | 48055/371472 [3:50:16<26:47:23, 3.35it/s] 13%|█▎ | 48056/371472 [3:50:17<25:58:15, 3.46it/s] 13%|█▎ | 48057/371472 [3:50:17<25:48:22, 3.48it/s] 13%|█▎ | 48058/371472 [3:50:17<25:33:31, 3.51it/s] 13%|█▎ | 48059/371472 [3:50:17<25:05:20, 3.58it/s] 13%|█▎ | 48060/371472 [3:50:18<25:29:37, 3.52it/s] {'loss': 4.2214, 'learning_rate': 8.839826326790062e-07, 'epoch': 2.07} + 13%|█▎ | 48060/371472 [3:50:18<25:29:37, 3.52it/s] 13%|█▎ | 48061/371472 [3:50:18<25:19:34, 3.55it/s] 13%|█▎ | 48062/371472 [3:50:18<25:24:33, 3.54it/s] 13%|█▎ | 48063/371472 [3:50:19<26:35:23, 3.38it/s] 13%|█▎ | 48064/371472 [3:50:19<26:06:11, 3.44it/s] 13%|█▎ | 48065/371472 [3:50:19<26:16:48, 3.42it/s] 13%|█▎ | 48066/371472 [3:50:19<26:22:02, 3.41it/s] 13%|█▎ | 48067/371472 [3:50:20<24:59:43, 3.59it/s] 13%|���▎ | 48068/371472 [3:50:20<24:36:29, 3.65it/s] 13%|█▎ | 48069/371472 [3:50:20<23:59:26, 3.74it/s] 13%|█▎ | 48070/371472 [3:50:20<24:05:15, 3.73it/s] 13%|█▎ | 48071/371472 [3:50:21<24:09:24, 3.72it/s] 13%|█▎ | 48072/371472 [3:50:21<23:48:15, 3.77it/s] 13%|█▎ | 48073/371472 [3:50:21<27:13:47, 3.30it/s] 13%|█▎ | 48074/371472 [3:50:22<26:32:45, 3.38it/s] 13%|█▎ | 48075/371472 [3:50:22<25:59:49, 3.46it/s] 13%|█▎ | 48076/371472 [3:50:22<25:04:25, 3.58it/s] 13%|█▎ | 48077/371472 [3:50:22<25:23:01, 3.54it/s] 13%|█▎ | 48078/371472 [3:50:23<25:03:51, 3.58it/s] 13%|█▎ | 48079/371472 [3:50:23<26:56:47, 3.33it/s] 13%|█▎ | 48080/371472 [3:50:23<26:34:23, 3.38it/s] {'loss': 4.177, 'learning_rate': 8.839341507035273e-07, 'epoch': 2.07} + 13%|█▎ | 48080/371472 [3:50:23<26:34:23, 3.38it/s] 13%|█▎ | 48081/371472 [3:50:24<26:12:02, 3.43it/s] 13%|█▎ | 48082/371472 [3:50:24<25:33:12, 3.52it/s] 13%|█▎ | 48083/371472 [3:50:24<28:31:42, 3.15it/s] 13%|█▎ | 48084/371472 [3:50:25<26:45:15, 3.36it/s] 13%|█▎ | 48085/371472 [3:50:25<25:24:40, 3.54it/s] 13%|█▎ | 48086/371472 [3:50:25<25:58:45, 3.46it/s] 13%|█▎ | 48087/371472 [3:50:25<27:50:07, 3.23it/s] 13%|█▎ | 48088/371472 [3:50:26<25:57:43, 3.46it/s] 13%|█▎ | 48089/371472 [3:50:26<25:00:06, 3.59it/s] 13%|█▎ | 48090/371472 [3:50:26<25:56:17, 3.46it/s] 13%|█▎ | 48091/371472 [3:50:27<25:52:11, 3.47it/s] 13%|█▎ | 48092/371472 [3:50:27<28:10:21, 3.19it/s] 13%|█▎ | 48093/371472 [3:50:27<28:28:24, 3.15it/s] 13%|█▎ | 48094/371472 [3:50:28<26:46:03, 3.36it/s] 13%|█▎ | 48095/371472 [3:50:28<25:16:52, 3.55it/s] 13%|█▎ | 48096/371472 [3:50:28<27:20:23, 3.29it/s] 13%|█▎ | 48097/371472 [3:50:28<25:46:25, 3.49it/s] 13%|█▎ | 48098/371472 [3:50:29<25:41:09, 3.50it/s] 13%|█▎ | 48099/371472 [3:50:29<25:06:51, 3.58it/s] 13%|█▎ | 48100/371472 [3:50:29<24:28:34, 3.67it/s] {'loss': 4.1296, 'learning_rate': 8.838856687280484e-07, 'epoch': 2.07} + 13%|█▎ | 48100/371472 [3:50:29<24:28:34, 3.67it/s] 13%|█▎ | 48101/371472 [3:50:29<23:39:18, 3.80it/s] 13%|█▎ | 48102/371472 [3:50:30<23:59:04, 3.75it/s] 13%|█▎ | 48103/371472 [3:50:30<23:37:21, 3.80it/s] 13%|█▎ | 48104/371472 [3:50:30<24:27:58, 3.67it/s] 13%|█▎ | 48105/371472 [3:50:31<24:19:25, 3.69it/s] 13%|█▎ | 48106/371472 [3:50:31<23:36:32, 3.80it/s] 13%|█▎ | 48107/371472 [3:50:31<23:55:52, 3.75it/s] 13%|█▎ | 48108/371472 [3:50:31<23:45:42, 3.78it/s] 13%|█▎ | 48109/371472 [3:50:32<24:32:36, 3.66it/s] 13%|█▎ | 48110/371472 [3:50:32<23:32:40, 3.81it/s] 13%|█▎ | 48111/371472 [3:50:32<23:44:32, 3.78it/s] 13%|█▎ | 48112/371472 [3:50:32<24:15:52, 3.70it/s] 13%|█▎ | 48113/371472 [3:50:33<24:17:22, 3.70it/s] 13%|█▎ | 48114/371472 [3:50:33<24:37:48, 3.65it/s] 13%|█▎ | 48115/371472 [3:50:33<25:34:45, 3.51it/s] 13%|█▎ | 48116/371472 [3:50:34<25:54:13, 3.47it/s] 13%|█▎ | 48117/371472 [3:50:34<24:28:06, 3.67it/s] 13%|█▎ | 48118/371472 [3:50:34<23:45:25, 3.78it/s] 13%|█▎ | 48119/371472 [3:50:34<23:38:55, 3.80it/s] 13%|█▎ | 48120/371472 [3:50:35<22:57:56, 3.91it/s] {'loss': 4.1983, 'learning_rate': 8.838371867525694e-07, 'epoch': 2.07} + 13%|█▎ | 48120/371472 [3:50:35<22:57:56, 3.91it/s] 13%|█▎ | 48121/371472 [3:50:35<23:47:07, 3.78it/s] 13%|█▎ | 48122/371472 [3:50:35<23:29:16, 3.82it/s] 13%|█▎ | 48123/371472 [3:50:35<24:39:21, 3.64it/s] 13%|█▎ | 48124/371472 [3:50:36<26:09:32, 3.43it/s] 13%|█▎ | 48125/371472 [3:50:36<26:37:27, 3.37it/s] 13%|█▎ | 48126/371472 [3:50:36<29:22:13, 3.06it/s] 13%|█▎ | 48127/371472 [3:50:37<27:05:03, 3.32it/s] 13%|█▎ | 48128/371472 [3:50:37<26:57:47, 3.33it/s] 13%|█▎ | 48129/371472 [3:50:37<25:18:52, 3.55it/s] 13%|█▎ | 48130/371472 [3:50:37<24:35:00, 3.65it/s] 13%|█▎ | 48131/371472 [3:50:38<24:44:06, 3.63it/s] 13%|█▎ | 48132/371472 [3:50:38<23:55:59, 3.75it/s] 13%|█▎ | 48133/371472 [3:50:38<26:21:04, 3.41it/s] 13%|█▎ | 48134/371472 [3:50:39<25:40:23, 3.50it/s] 13%|█▎ | 48135/371472 [3:50:39<25:44:56, 3.49it/s] 13%|█▎ | 48136/371472 [3:50:39<26:40:00, 3.37it/s] 13%|█▎ | 48137/371472 [3:50:39<24:50:46, 3.61it/s] 13%|█▎ | 48138/371472 [3:50:40<25:44:56, 3.49it/s] 13%|█▎ | 48139/371472 [3:50:40<25:31:32, 3.52it/s] 13%|█▎ | 48140/371472 [3:50:40<26:36:25, 3.38it/s] {'loss': 4.2373, 'learning_rate': 8.837887047770906e-07, 'epoch': 2.07} + 13%|█▎ | 48140/371472 [3:50:40<26:36:25, 3.38it/s] 13%|█▎ | 48141/371472 [3:50:41<27:25:39, 3.27it/s] 13%|█▎ | 48142/371472 [3:50:41<25:53:38, 3.47it/s] 13%|█▎ | 48143/371472 [3:50:41<25:33:59, 3.51it/s] 13%|█▎ | 48144/371472 [3:50:42<26:07:05, 3.44it/s] 13%|█▎ | 48145/371472 [3:50:42<25:49:23, 3.48it/s] 13%|█▎ | 48146/371472 [3:50:42<25:03:29, 3.58it/s] 13%|█▎ | 48147/371472 [3:50:42<24:50:47, 3.61it/s] 13%|█▎ | 48148/371472 [3:50:43<24:32:48, 3.66it/s] 13%|█▎ | 48149/371472 [3:50:43<24:41:54, 3.64it/s] 13%|█▎ | 48150/371472 [3:50:43<24:41:29, 3.64it/s] 13%|█▎ | 48151/371472 [3:50:43<24:15:32, 3.70it/s] 13%|█▎ | 48152/371472 [3:50:44<24:34:23, 3.65it/s] 13%|█▎ | 48153/371472 [3:50:44<23:58:41, 3.75it/s] 13%|█▎ | 48154/371472 [3:50:44<24:29:44, 3.67it/s] 13%|█▎ | 48155/371472 [3:50:45<25:05:26, 3.58it/s] 13%|█▎ | 48156/371472 [3:50:45<24:40:37, 3.64it/s] 13%|█▎ | 48157/371472 [3:50:45<25:14:10, 3.56it/s] 13%|█▎ | 48158/371472 [3:50:45<25:17:36, 3.55it/s] 13%|█▎ | 48159/371472 [3:50:46<28:01:35, 3.20it/s] 13%|█▎ | 48160/371472 [3:50:46<27:16:20, 3.29it/s] {'loss': 4.1931, 'learning_rate': 8.837402228016117e-07, 'epoch': 2.07} + 13%|█▎ | 48160/371472 [3:50:46<27:16:20, 3.29it/s] 13%|█▎ | 48161/371472 [3:50:46<26:27:34, 3.39it/s] 13%|█▎ | 48162/371472 [3:50:47<29:29:03, 3.05it/s] 13%|█▎ | 48163/371472 [3:50:47<29:26:12, 3.05it/s] 13%|█▎ | 48164/371472 [3:50:47<27:27:18, 3.27it/s] 13%|█▎ | 48165/371472 [3:50:48<26:26:27, 3.40it/s] 13%|█▎ | 48166/371472 [3:50:48<26:29:50, 3.39it/s] 13%|█▎ | 48167/371472 [3:50:48<27:06:47, 3.31it/s] 13%|█▎ | 48168/371472 [3:50:48<26:32:34, 3.38it/s] 13%|█▎ | 48169/371472 [3:50:49<26:29:40, 3.39it/s] 13%|█▎ | 48170/371472 [3:50:49<25:28:07, 3.53it/s] 13%|█▎ | 48171/371472 [3:50:49<24:35:22, 3.65it/s] 13%|█▎ | 48172/371472 [3:50:49<23:51:25, 3.76it/s] 13%|█▎ | 48173/371472 [3:50:50<24:51:36, 3.61it/s] 13%|█▎ | 48174/371472 [3:50:50<25:00:33, 3.59it/s] 13%|█▎ | 48175/371472 [3:50:50<24:25:19, 3.68it/s] 13%|█▎ | 48176/371472 [3:50:51<26:05:34, 3.44it/s] 13%|█▎ | 48177/371472 [3:50:51<25:59:07, 3.46it/s] 13%|█▎ | 48178/371472 [3:50:51<25:20:03, 3.54it/s] 13%|█▎ | 48179/371472 [3:50:51<25:17:24, 3.55it/s] 13%|█▎ | 48180/371472 [3:50:52<25:42:01, 3.49it/s] {'loss': 4.0753, 'learning_rate': 8.836917408261328e-07, 'epoch': 2.08} + 13%|█▎ | 48180/371472 [3:50:52<25:42:01, 3.49it/s] 13%|█▎ | 48181/371472 [3:50:52<25:58:19, 3.46it/s] 13%|█▎ | 48182/371472 [3:50:52<25:07:32, 3.57it/s] 13%|█▎ | 48183/371472 [3:50:53<24:50:38, 3.61it/s] 13%|█▎ | 48184/371472 [3:50:53<24:00:26, 3.74it/s] 13%|█▎ | 48185/371472 [3:50:53<23:01:22, 3.90it/s] 13%|█▎ | 48186/371472 [3:50:53<23:08:41, 3.88it/s] 13%|█▎ | 48187/371472 [3:50:54<23:19:12, 3.85it/s] 13%|█▎ | 48188/371472 [3:50:54<25:56:21, 3.46it/s] 13%|█▎ | 48189/371472 [3:50:54<25:25:51, 3.53it/s] 13%|█▎ | 48190/371472 [3:50:55<24:53:10, 3.61it/s] 13%|█▎ | 48191/371472 [3:50:55<24:50:18, 3.62it/s] 13%|█▎ | 48192/371472 [3:50:55<24:26:05, 3.68it/s] 13%|█▎ | 48193/371472 [3:50:55<23:15:59, 3.86it/s] 13%|█▎ | 48194/371472 [3:50:56<22:55:54, 3.92it/s] 13%|█▎ | 48195/371472 [3:50:56<23:10:20, 3.88it/s] 13%|█▎ | 48196/371472 [3:50:56<23:55:55, 3.75it/s] 13%|█▎ | 48197/371472 [3:50:56<23:43:28, 3.79it/s] 13%|█▎ | 48198/371472 [3:50:57<24:23:36, 3.68it/s] 13%|█▎ | 48199/371472 [3:50:57<24:20:48, 3.69it/s] 13%|█▎ | 48200/371472 [3:50:57<25:30:51, 3.52it/s] {'loss': 4.4324, 'learning_rate': 8.83643258850654e-07, 'epoch': 2.08} + 13%|█▎ | 48200/371472 [3:50:57<25:30:51, 3.52it/s] 13%|█▎ | 48201/371472 [3:50:58<26:00:08, 3.45it/s] 13%|█▎ | 48202/371472 [3:50:58<27:10:05, 3.31it/s] 13%|█▎ | 48203/371472 [3:50:58<27:03:59, 3.32it/s] 13%|█▎ | 48204/371472 [3:50:58<27:14:26, 3.30it/s] 13%|█▎ | 48205/371472 [3:50:59<26:52:24, 3.34it/s] 13%|█▎ | 48206/371472 [3:50:59<26:19:46, 3.41it/s] 13%|█▎ | 48207/371472 [3:50:59<26:00:58, 3.45it/s] 13%|█▎ | 48208/371472 [3:51:00<27:58:29, 3.21it/s] 13%|█▎ | 48209/371472 [3:51:00<26:22:58, 3.40it/s] 13%|█▎ | 48210/371472 [3:51:00<25:35:26, 3.51it/s] 13%|█▎ | 48211/371472 [3:51:00<24:20:09, 3.69it/s] 13%|█▎ | 48212/371472 [3:51:01<24:14:27, 3.70it/s] 13%|█▎ | 48213/371472 [3:51:01<25:10:43, 3.57it/s] 13%|█▎ | 48214/371472 [3:51:01<24:24:51, 3.68it/s] 13%|█▎ | 48215/371472 [3:51:02<24:41:15, 3.64it/s] 13%|█▎ | 48216/371472 [3:51:02<24:03:40, 3.73it/s] 13%|█▎ | 48217/371472 [3:51:02<25:02:02, 3.59it/s] 13%|█▎ | 48218/371472 [3:51:02<24:53:44, 3.61it/s] 13%|█▎ | 48219/371472 [3:51:03<23:54:29, 3.76it/s] 13%|█▎ | 48220/371472 [3:51:03<23:02:23, 3.90it/s] {'loss': 4.2271, 'learning_rate': 8.835947768751752e-07, 'epoch': 2.08} + 13%|█▎ | 48220/371472 [3:51:03<23:02:23, 3.90it/s] 13%|█▎ | 48221/371472 [3:51:03<23:28:30, 3.82it/s] 13%|█▎ | 48222/371472 [3:51:03<22:51:32, 3.93it/s] 13%|█▎ | 48223/371472 [3:51:04<27:05:26, 3.31it/s] 13%|█▎ | 48224/371472 [3:51:04<27:43:38, 3.24it/s] 13%|█▎ | 48225/371472 [3:51:04<26:38:13, 3.37it/s] 13%|█▎ | 48226/371472 [3:51:05<25:34:39, 3.51it/s] 13%|█▎ | 48227/371472 [3:51:05<25:33:00, 3.51it/s] 13%|█▎ | 48228/371472 [3:51:05<24:27:31, 3.67it/s] 13%|█▎ | 48229/371472 [3:51:05<24:58:37, 3.59it/s] 13%|█▎ | 48230/371472 [3:51:06<25:23:18, 3.54it/s] 13%|█▎ | 48231/371472 [3:51:06<24:51:48, 3.61it/s] 13%|█▎ | 48232/371472 [3:51:06<23:59:03, 3.74it/s] 13%|█▎ | 48233/371472 [3:51:06<23:42:17, 3.79it/s] 13%|█▎ | 48234/371472 [3:51:07<22:50:02, 3.93it/s] 13%|█▎ | 48235/371472 [3:51:07<22:50:21, 3.93it/s] 13%|█▎ | 48236/371472 [3:51:07<25:37:48, 3.50it/s] 13%|█▎ | 48237/371472 [3:51:08<25:59:45, 3.45it/s] 13%|█▎ | 48238/371472 [3:51:08<25:37:36, 3.50it/s] 13%|█▎ | 48239/371472 [3:51:08<26:17:43, 3.41it/s] 13%|█▎ | 48240/371472 [3:51:09<26:46:10, 3.35it/s] {'loss': 4.1287, 'learning_rate': 8.835462948996961e-07, 'epoch': 2.08} + 13%|█▎ | 48240/371472 [3:51:09<26:46:10, 3.35it/s] 13%|█▎ | 48241/371472 [3:51:09<26:07:51, 3.44it/s] 13%|█▎ | 48242/371472 [3:51:09<25:12:11, 3.56it/s] 13%|█▎ | 48243/371472 [3:51:09<24:52:49, 3.61it/s] 13%|█▎ | 48244/371472 [3:51:10<24:10:21, 3.71it/s] 13%|█▎ | 48245/371472 [3:51:10<24:04:50, 3.73it/s] 13%|█▎ | 48246/371472 [3:51:10<25:26:49, 3.53it/s] 13%|█▎ | 48247/371472 [3:51:10<26:01:16, 3.45it/s] 13%|█▎ | 48248/371472 [3:51:11<26:32:00, 3.38it/s] 13%|█▎ | 48249/371472 [3:51:11<25:52:25, 3.47it/s] 13%|█▎ | 48250/371472 [3:51:11<25:26:59, 3.53it/s] 13%|█▎ | 48251/371472 [3:51:12<24:54:46, 3.60it/s] 13%|█▎ | 48252/371472 [3:51:12<25:07:49, 3.57it/s] 13%|█▎ | 48253/371472 [3:51:12<25:33:05, 3.51it/s] 13%|█▎ | 48254/371472 [3:51:12<25:06:23, 3.58it/s] 13%|█▎ | 48255/371472 [3:51:13<24:44:07, 3.63it/s] 13%|█▎ | 48256/371472 [3:51:13<24:54:32, 3.60it/s] 13%|█▎ | 48257/371472 [3:51:13<24:29:18, 3.67it/s] 13%|█▎ | 48258/371472 [3:51:13<23:42:24, 3.79it/s] 13%|█▎ | 48259/371472 [3:51:14<25:27:01, 3.53it/s] 13%|█▎ | 48260/371472 [3:51:14<24:43:08, 3.63it/s] {'loss': 4.3173, 'learning_rate': 8.834978129242172e-07, 'epoch': 2.08} + 13%|█▎ | 48260/371472 [3:51:14<24:43:08, 3.63it/s] 13%|█▎ | 48261/371472 [3:51:14<24:25:10, 3.68it/s] 13%|█▎ | 48262/371472 [3:51:15<25:44:36, 3.49it/s] 13%|█▎ | 48263/371472 [3:51:15<24:59:19, 3.59it/s] 13%|█▎ | 48264/371472 [3:51:15<24:21:03, 3.69it/s] 13%|█▎ | 48265/371472 [3:51:15<24:05:02, 3.73it/s] 13%|█▎ | 48266/371472 [3:51:16<24:17:30, 3.70it/s] 13%|█▎ | 48267/371472 [3:51:16<24:07:45, 3.72it/s] 13%|█▎ | 48268/371472 [3:51:16<25:44:00, 3.49it/s] 13%|█▎ | 48269/371472 [3:51:17<25:21:12, 3.54it/s] 13%|█▎ | 48270/371472 [3:51:17<25:52:09, 3.47it/s] 13%|█▎ | 48271/371472 [3:51:17<27:20:03, 3.28it/s] 13%|█▎ | 48272/371472 [3:51:17<26:03:27, 3.45it/s] 13%|█▎ | 48273/371472 [3:51:18<25:24:46, 3.53it/s] 13%|█▎ | 48274/371472 [3:51:18<27:18:38, 3.29it/s] 13%|█▎ | 48275/371472 [3:51:18<28:50:22, 3.11it/s] 13%|█▎ | 48276/371472 [3:51:19<27:10:23, 3.30it/s] 13%|█▎ | 48277/371472 [3:51:19<25:34:45, 3.51it/s] 13%|█▎ | 48278/371472 [3:51:19<27:01:04, 3.32it/s] 13%|█▎ | 48279/371472 [3:51:20<26:50:38, 3.34it/s] 13%|█▎ | 48280/371472 [3:51:20<27:20:08, 3.28it/s] {'loss': 4.1593, 'learning_rate': 8.834493309487383e-07, 'epoch': 2.08} + 13%|█▎ | 48280/371472 [3:51:20<27:20:08, 3.28it/s] 13%|█▎ | 48281/371472 [3:51:20<27:08:17, 3.31it/s] 13%|█▎ | 48282/371472 [3:51:20<26:02:40, 3.45it/s] 13%|█▎ | 48283/371472 [3:51:21<24:53:04, 3.61it/s] 13%|█▎ | 48284/371472 [3:51:21<25:28:45, 3.52it/s] 13%|█▎ | 48285/371472 [3:51:21<24:26:47, 3.67it/s] 13%|█▎ | 48286/371472 [3:51:22<26:46:21, 3.35it/s] 13%|█▎ | 48287/371472 [3:51:22<25:36:35, 3.51it/s] 13%|█▎ | 48288/371472 [3:51:22<25:18:46, 3.55it/s] 13%|█▎ | 48289/371472 [3:51:22<25:21:53, 3.54it/s] 13%|█▎ | 48290/371472 [3:51:23<24:22:33, 3.68it/s] 13%|█▎ | 48291/371472 [3:51:23<24:07:44, 3.72it/s] 13%|█▎ | 48292/371472 [3:51:23<25:15:13, 3.55it/s] 13%|█▎ | 48293/371472 [3:51:24<28:38:42, 3.13it/s] 13%|█▎ | 48294/371472 [3:51:24<27:25:36, 3.27it/s] 13%|█▎ | 48295/371472 [3:51:24<26:45:34, 3.35it/s] 13%|█▎ | 48296/371472 [3:51:24<26:07:37, 3.44it/s] 13%|█▎ | 48297/371472 [3:51:25<27:23:26, 3.28it/s] 13%|█▎ | 48298/371472 [3:51:25<26:17:54, 3.41it/s] 13%|█▎ | 48299/371472 [3:51:25<26:00:15, 3.45it/s] 13%|█▎ | 48300/371472 [3:51:26<26:27:17, 3.39it/s] {'loss': 4.265, 'learning_rate': 8.834008489732595e-07, 'epoch': 2.08} + 13%|█▎ | 48300/371472 [3:51:26<26:27:17, 3.39it/s] 13%|█▎ | 48301/371472 [3:51:26<26:34:00, 3.38it/s] 13%|█▎ | 48302/371472 [3:51:26<28:35:18, 3.14it/s] 13%|█▎ | 48303/371472 [3:51:27<27:10:57, 3.30it/s] 13%|█▎ | 48304/371472 [3:51:27<29:37:30, 3.03it/s] 13%|█▎ | 48305/371472 [3:51:27<29:28:50, 3.05it/s] 13%|█▎ | 48306/371472 [3:51:28<27:29:25, 3.27it/s] 13%|█▎ | 48307/371472 [3:51:28<27:06:52, 3.31it/s] 13%|█▎ | 48308/371472 [3:51:28<28:06:19, 3.19it/s] 13%|█▎ | 48309/371472 [3:51:28<26:41:36, 3.36it/s] 13%|█▎ | 48310/371472 [3:51:29<25:47:07, 3.48it/s] 13%|█▎ | 48311/371472 [3:51:29<24:53:48, 3.61it/s] 13%|█▎ | 48312/371472 [3:51:29<27:37:17, 3.25it/s] 13%|█▎ | 48313/371472 [3:51:30<25:52:04, 3.47it/s] 13%|█▎ | 48314/371472 [3:51:30<25:32:49, 3.51it/s] 13%|█▎ | 48315/371472 [3:51:30<25:16:46, 3.55it/s] 13%|█▎ | 48316/371472 [3:51:30<24:16:45, 3.70it/s] 13%|█▎ | 48317/371472 [3:51:31<24:28:42, 3.67it/s] 13%|█▎ | 48318/371472 [3:51:31<27:34:40, 3.25it/s] 13%|█▎ | 48319/371472 [3:51:31<26:33:49, 3.38it/s] 13%|█▎ | 48320/371472 [3:51:32<30:27:53, 2.95it/s] {'loss': 4.2934, 'learning_rate': 8.833523669977806e-07, 'epoch': 2.08} + 13%|█▎ | 48320/371472 [3:51:32<30:27:53, 2.95it/s] 13%|█▎ | 48321/371472 [3:51:32<28:20:47, 3.17it/s] 13%|█▎ | 48322/371472 [3:51:32<29:01:07, 3.09it/s] 13%|█▎ | 48323/371472 [3:51:33<28:37:19, 3.14it/s] 13%|█▎ | 48324/371472 [3:51:33<28:48:55, 3.12it/s] 13%|█▎ | 48325/371472 [3:51:33<27:18:55, 3.29it/s] 13%|█▎ | 48326/371472 [3:51:34<28:56:02, 3.10it/s] 13%|█▎ | 48327/371472 [3:51:34<27:48:12, 3.23it/s] 13%|█▎ | 48328/371472 [3:51:34<27:28:53, 3.27it/s] 13%|█▎ | 48329/371472 [3:51:35<28:07:45, 3.19it/s] 13%|█▎ | 48330/371472 [3:51:35<26:27:46, 3.39it/s] 13%|█▎ | 48331/371472 [3:51:35<25:36:05, 3.51it/s] 13%|█▎ | 48332/371472 [3:51:35<26:32:48, 3.38it/s] 13%|█▎ | 48333/371472 [3:51:36<26:07:59, 3.43it/s] 13%|█▎ | 48334/371472 [3:51:36<24:50:24, 3.61it/s] 13%|█▎ | 48335/371472 [3:51:36<24:49:16, 3.62it/s] 13%|█▎ | 48336/371472 [3:51:36<24:38:34, 3.64it/s] 13%|█▎ | 48337/371472 [3:51:37<25:04:58, 3.58it/s] 13%|█▎ | 48338/371472 [3:51:37<25:06:42, 3.57it/s] 13%|█▎ | 48339/371472 [3:51:37<25:44:09, 3.49it/s] 13%|█▎ | 48340/371472 [3:51:38<25:49:32, 3.48it/s] {'loss': 4.3757, 'learning_rate': 8.833038850223017e-07, 'epoch': 2.08} + 13%|█▎ | 48340/371472 [3:51:38<25:49:32, 3.48it/s] 13%|█▎ | 48341/371472 [3:51:38<25:18:38, 3.55it/s] 13%|█▎ | 48342/371472 [3:51:38<27:32:30, 3.26it/s] 13%|█▎ | 48343/371472 [3:51:38<25:59:55, 3.45it/s] 13%|█▎ | 48344/371472 [3:51:39<25:14:29, 3.56it/s] 13%|█▎ | 48345/371472 [3:51:39<24:12:15, 3.71it/s] 13%|█▎ | 48346/371472 [3:51:39<24:25:20, 3.68it/s] 13%|█▎ | 48347/371472 [3:51:40<23:53:41, 3.76it/s] 13%|█▎ | 48348/371472 [3:51:40<23:47:14, 3.77it/s] 13%|█▎ | 48349/371472 [3:51:40<24:01:07, 3.74it/s] 13%|█▎ | 48350/371472 [3:51:40<23:26:29, 3.83it/s] 13%|█▎ | 48351/371472 [3:51:41<24:06:14, 3.72it/s] 13%|█▎ | 48352/371472 [3:51:41<25:00:51, 3.59it/s] 13%|█▎ | 48353/371472 [3:51:41<24:08:25, 3.72it/s] 13%|█▎ | 48354/371472 [3:51:41<23:28:15, 3.82it/s] 13%|█▎ | 48355/371472 [3:51:42<24:05:36, 3.73it/s] 13%|█▎ | 48356/371472 [3:51:42<25:34:32, 3.51it/s] 13%|█▎ | 48357/371472 [3:51:42<25:14:36, 3.56it/s] 13%|█▎ | 48358/371472 [3:51:43<24:45:47, 3.62it/s] 13%|█▎ | 48359/371472 [3:51:43<25:50:43, 3.47it/s] 13%|█▎ | 48360/371472 [3:51:43<25:53:45, 3.47it/s] {'loss': 4.3117, 'learning_rate': 8.832554030468227e-07, 'epoch': 2.08} + 13%|█▎ | 48360/371472 [3:51:43<25:53:45, 3.47it/s] 13%|█▎ | 48361/371472 [3:51:43<24:54:23, 3.60it/s] 13%|█▎ | 48362/371472 [3:51:44<25:36:35, 3.50it/s] 13%|█▎ | 48363/371472 [3:51:44<25:02:47, 3.58it/s] 13%|█▎ | 48364/371472 [3:51:44<24:11:42, 3.71it/s] 13%|█▎ | 48365/371472 [3:51:44<23:26:05, 3.83it/s] 13%|█▎ | 48366/371472 [3:51:45<25:24:13, 3.53it/s] 13%|█▎ | 48367/371472 [3:51:45<24:22:34, 3.68it/s] 13%|█▎ | 48368/371472 [3:51:45<25:50:15, 3.47it/s] 13%|█▎ | 48369/371472 [3:51:46<26:15:44, 3.42it/s] 13%|█▎ | 48370/371472 [3:51:46<24:50:33, 3.61it/s] 13%|█▎ | 48371/371472 [3:51:46<24:58:26, 3.59it/s] 13%|█▎ | 48372/371472 [3:51:46<24:46:29, 3.62it/s] 13%|█▎ | 48373/371472 [3:51:47<26:18:26, 3.41it/s] 13%|█▎ | 48374/371472 [3:51:47<25:40:26, 3.50it/s] 13%|█▎ | 48375/371472 [3:51:47<25:44:50, 3.49it/s] 13%|█▎ | 48376/371472 [3:51:48<24:37:10, 3.65it/s] 13%|█▎ | 48377/371472 [3:51:48<25:20:47, 3.54it/s] 13%|█▎ | 48378/371472 [3:51:48<24:48:07, 3.62it/s] 13%|█▎ | 48379/371472 [3:51:48<25:34:01, 3.51it/s] 13%|█▎ | 48380/371472 [3:51:49<24:07:52, 3.72it/s] {'loss': 4.3394, 'learning_rate': 8.832069210713439e-07, 'epoch': 2.08} + 13%|█▎ | 48380/371472 [3:51:49<24:07:52, 3.72it/s] 13%|█▎ | 48381/371472 [3:51:49<23:39:07, 3.79it/s] 13%|█▎ | 48382/371472 [3:51:49<23:38:44, 3.80it/s] 13%|█▎ | 48383/371472 [3:51:50<25:39:03, 3.50it/s] 13%|█▎ | 48384/371472 [3:51:50<24:55:34, 3.60it/s] 13%|█▎ | 48385/371472 [3:51:50<25:05:24, 3.58it/s] 13%|█▎ | 48386/371472 [3:51:50<24:50:32, 3.61it/s] 13%|█▎ | 48387/371472 [3:51:51<24:24:34, 3.68it/s] 13%|█▎ | 48388/371472 [3:51:51<24:07:43, 3.72it/s] 13%|█▎ | 48389/371472 [3:51:51<25:06:20, 3.57it/s] 13%|█▎ | 48390/371472 [3:51:51<25:05:23, 3.58it/s] 13%|█▎ | 48391/371472 [3:51:52<25:48:35, 3.48it/s] 13%|█▎ | 48392/371472 [3:51:52<26:08:32, 3.43it/s] 13%|█▎ | 48393/371472 [3:51:52<27:45:58, 3.23it/s] 13%|█▎ | 48394/371472 [3:51:53<26:12:59, 3.42it/s] 13%|█▎ | 48395/371472 [3:51:53<25:10:35, 3.56it/s] 13%|█▎ | 48396/371472 [3:51:53<24:12:40, 3.71it/s] 13%|█▎ | 48397/371472 [3:51:53<25:00:00, 3.59it/s] 13%|█▎ | 48398/371472 [3:51:54<24:12:44, 3.71it/s] 13%|█▎ | 48399/371472 [3:51:54<25:17:45, 3.55it/s] 13%|█▎ | 48400/371472 [3:51:54<25:21:01, 3.54it/s] {'loss': 4.2903, 'learning_rate': 8.831584390958649e-07, 'epoch': 2.08} + 13%|█▎ | 48400/371472 [3:51:54<25:21:01, 3.54it/s] 13%|█▎ | 48401/371472 [3:51:55<25:07:19, 3.57it/s] 13%|█▎ | 48402/371472 [3:51:55<25:10:05, 3.57it/s] 13%|█▎ | 48403/371472 [3:51:55<24:27:10, 3.67it/s] 13%|█▎ | 48404/371472 [3:51:55<23:33:42, 3.81it/s] 13%|█▎ | 48405/371472 [3:51:56<24:33:27, 3.65it/s] 13%|█▎ | 48406/371472 [3:51:56<23:42:46, 3.78it/s] 13%|█▎ | 48407/371472 [3:51:56<24:21:33, 3.68it/s] 13%|█▎ | 48408/371472 [3:51:56<24:49:29, 3.61it/s] 13%|█▎ | 48409/371472 [3:51:57<23:51:36, 3.76it/s] 13%|█▎ | 48410/371472 [3:51:57<24:27:16, 3.67it/s] 13%|█▎ | 48411/371472 [3:51:57<24:01:57, 3.73it/s] 13%|█▎ | 48412/371472 [3:51:58<25:00:59, 3.59it/s] 13%|█▎ | 48413/371472 [3:51:58<25:25:31, 3.53it/s] 13%|█▎ | 48414/371472 [3:51:58<27:13:50, 3.30it/s] 13%|█▎ | 48415/371472 [3:51:59<29:35:51, 3.03it/s] 13%|█▎ | 48416/371472 [3:51:59<27:42:00, 3.24it/s] 13%|█▎ | 48417/371472 [3:51:59<27:53:05, 3.22it/s] 13%|█▎ | 48418/371472 [3:52:00<28:44:36, 3.12it/s] 13%|█▎ | 48419/371472 [3:52:00<28:05:06, 3.20it/s] 13%|█▎ | 48420/371472 [3:52:00<27:56:07, 3.21it/s] {'loss': 4.4071, 'learning_rate': 8.831099571203861e-07, 'epoch': 2.09} + 13%|█▎ | 48420/371472 [3:52:00<27:56:07, 3.21it/s] 13%|█▎ | 48421/371472 [3:52:00<27:17:12, 3.29it/s] 13%|█▎ | 48422/371472 [3:52:01<29:14:29, 3.07it/s] 13%|█▎ | 48423/371472 [3:52:01<27:40:58, 3.24it/s] 13%|█▎ | 48424/371472 [3:52:01<26:38:29, 3.37it/s] 13%|█▎ | 48425/371472 [3:52:02<25:21:54, 3.54it/s] 13%|█▎ | 48426/371472 [3:52:02<25:52:11, 3.47it/s] 13%|█▎ | 48427/371472 [3:52:02<26:27:59, 3.39it/s] 13%|█▎ | 48428/371472 [3:52:02<26:02:56, 3.44it/s] 13%|█▎ | 48429/371472 [3:52:03<24:39:17, 3.64it/s] 13%|█▎ | 48430/371472 [3:52:03<25:11:08, 3.56it/s] 13%|█▎ | 48431/371472 [3:52:03<26:41:36, 3.36it/s] 13%|█▎ | 48432/371472 [3:52:04<26:11:34, 3.43it/s] 13%|█▎ | 48433/371472 [3:52:04<25:32:45, 3.51it/s] 13%|█▎ | 48434/371472 [3:52:04<26:02:49, 3.45it/s] 13%|█▎ | 48435/371472 [3:52:04<25:52:51, 3.47it/s] 13%|█▎ | 48436/371472 [3:52:05<25:36:49, 3.50it/s] 13%|█▎ | 48437/371472 [3:52:05<26:42:49, 3.36it/s] 13%|█▎ | 48438/371472 [3:52:05<27:49:06, 3.23it/s] 13%|█▎ | 48439/371472 [3:52:06<26:55:53, 3.33it/s] 13%|█▎ | 48440/371472 [3:52:06<25:55:18, 3.46it/s] {'loss': 4.0438, 'learning_rate': 8.830614751449072e-07, 'epoch': 2.09} + 13%|█▎ | 48440/371472 [3:52:06<25:55:18, 3.46it/s] 13%|█▎ | 48441/371472 [3:52:06<25:22:47, 3.54it/s] 13%|█▎ | 48442/371472 [3:52:06<24:43:16, 3.63it/s] 13%|█▎ | 48443/371472 [3:52:07<25:15:50, 3.55it/s] 13%|█▎ | 48444/371472 [3:52:07<25:23:13, 3.53it/s] 13%|█▎ | 48445/371472 [3:52:07<25:18:40, 3.55it/s] 13%|█▎ | 48446/371472 [3:52:08<28:17:48, 3.17it/s] 13%|█▎ | 48447/371472 [3:52:08<28:05:03, 3.19it/s] 13%|█▎ | 48448/371472 [3:52:08<27:46:08, 3.23it/s] 13%|█▎ | 48449/371472 [3:52:09<28:06:03, 3.19it/s] 13%|█▎ | 48450/371472 [3:52:09<27:52:06, 3.22it/s] 13%|█▎ | 48451/371472 [3:52:09<26:44:14, 3.36it/s] 13%|█▎ | 48452/371472 [3:52:10<26:31:08, 3.38it/s] 13%|█▎ | 48453/371472 [3:52:10<25:35:01, 3.51it/s] 13%|█▎ | 48454/371472 [3:52:10<24:15:15, 3.70it/s] 13%|█▎ | 48455/371472 [3:52:10<23:47:58, 3.77it/s] 13%|█▎ | 48456/371472 [3:52:11<24:32:18, 3.66it/s] 13%|█▎ | 48457/371472 [3:52:11<24:11:20, 3.71it/s] 13%|█▎ | 48458/371472 [3:52:11<23:58:56, 3.74it/s] 13%|█▎ | 48459/371472 [3:52:11<26:07:45, 3.43it/s] 13%|█▎ | 48460/371472 [3:52:12<25:13:40, 3.56it/s] {'loss': 4.3352, 'learning_rate': 8.830129931694284e-07, 'epoch': 2.09} + 13%|█▎ | 48460/371472 [3:52:12<25:13:40, 3.56it/s] 13%|█▎ | 48461/371472 [3:52:12<25:20:43, 3.54it/s] 13%|█▎ | 48462/371472 [3:52:12<28:26:34, 3.15it/s] 13%|█▎ | 48463/371472 [3:52:13<27:19:19, 3.28it/s] 13%|█▎ | 48464/371472 [3:52:13<26:33:12, 3.38it/s] 13%|█▎ | 48465/371472 [3:52:13<26:24:35, 3.40it/s] 13%|█▎ | 48466/371472 [3:52:13<25:22:38, 3.54it/s] 13%|█▎ | 48467/371472 [3:52:14<25:14:13, 3.56it/s] 13%|█▎ | 48468/371472 [3:52:14<24:42:55, 3.63it/s] 13%|█▎ | 48469/371472 [3:52:14<23:50:26, 3.76it/s] 13%|█▎ | 48470/371472 [3:52:15<23:16:48, 3.85it/s] 13%|█▎ | 48471/371472 [3:52:15<24:15:07, 3.70it/s] 13%|█▎ | 48472/371472 [3:52:15<24:43:26, 3.63it/s] 13%|█▎ | 48473/371472 [3:52:15<24:08:11, 3.72it/s] 13%|█▎ | 48474/371472 [3:52:16<23:43:58, 3.78it/s] 13%|█▎ | 48475/371472 [3:52:16<23:19:36, 3.85it/s] 13%|█▎ | 48476/371472 [3:52:16<23:57:48, 3.74it/s] 13%|█▎ | 48477/371472 [3:52:16<23:15:25, 3.86it/s] 13%|█▎ | 48478/371472 [3:52:17<23:30:55, 3.82it/s] 13%|█▎ | 48479/371472 [3:52:17<23:43:56, 3.78it/s] 13%|█▎ | 48480/371472 [3:52:17<24:20:13, 3.69it/s] {'loss': 4.2519, 'learning_rate': 8.829645111939494e-07, 'epoch': 2.09} + 13%|█▎ | 48480/371472 [3:52:17<24:20:13, 3.69it/s] 13%|█▎ | 48481/371472 [3:52:18<25:34:10, 3.51it/s] 13%|█▎ | 48482/371472 [3:52:18<25:46:39, 3.48it/s] 13%|█▎ | 48483/371472 [3:52:18<25:21:45, 3.54it/s] 13%|█▎ | 48484/371472 [3:52:18<25:27:32, 3.52it/s] 13%|█▎ | 48485/371472 [3:52:19<25:19:00, 3.54it/s] 13%|█▎ | 48486/371472 [3:52:19<24:59:59, 3.59it/s] 13%|█▎ | 48487/371472 [3:52:19<26:17:58, 3.41it/s] 13%|█▎ | 48488/371472 [3:52:20<26:57:08, 3.33it/s] 13%|█▎ | 48489/371472 [3:52:20<28:59:15, 3.10it/s] 13%|█▎ | 48490/371472 [3:52:20<27:20:14, 3.28it/s] 13%|█▎ | 48491/371472 [3:52:20<26:26:15, 3.39it/s] 13%|█▎ | 48492/371472 [3:52:21<25:50:12, 3.47it/s] 13%|█▎ | 48493/371472 [3:52:21<25:40:56, 3.49it/s] 13%|█▎ | 48494/371472 [3:52:21<25:20:28, 3.54it/s] 13%|█▎ | 48495/371472 [3:52:22<25:00:42, 3.59it/s] 13%|█▎ | 48496/371472 [3:52:22<25:57:28, 3.46it/s] 13%|█▎ | 48497/371472 [3:52:22<26:07:54, 3.43it/s] 13%|█▎ | 48498/371472 [3:52:22<26:07:20, 3.43it/s] 13%|█▎ | 48499/371472 [3:52:23<26:31:42, 3.38it/s] 13%|█▎ | 48500/371472 [3:52:23<26:58:28, 3.33it/s] {'loss': 4.3348, 'learning_rate': 8.829160292184705e-07, 'epoch': 2.09} + 13%|█▎ | 48500/371472 [3:52:23<26:58:28, 3.33it/s] 13%|█▎ | 48501/371472 [3:52:23<26:30:00, 3.39it/s] 13%|█▎ | 48502/371472 [3:52:24<27:15:28, 3.29it/s] 13%|█▎ | 48503/371472 [3:52:24<27:45:29, 3.23it/s] 13%|█▎ | 48504/371472 [3:52:24<27:02:59, 3.32it/s] 13%|█▎ | 48505/371472 [3:52:25<25:57:29, 3.46it/s] 13%|█▎ | 48506/371472 [3:52:25<24:50:00, 3.61it/s] 13%|█▎ | 48507/371472 [3:52:25<24:01:57, 3.73it/s] 13%|█▎ | 48508/371472 [3:52:25<24:27:55, 3.67it/s] 13%|█▎ | 48509/371472 [3:52:26<23:40:11, 3.79it/s] 13%|█▎ | 48510/371472 [3:52:26<24:03:25, 3.73it/s] 13%|█▎ | 48511/371472 [3:52:26<25:01:46, 3.58it/s] 13%|█▎ | 48512/371472 [3:52:26<25:32:02, 3.51it/s] 13%|█▎ | 48513/371472 [3:52:27<24:47:03, 3.62it/s] 13%|█▎ | 48514/371472 [3:52:27<25:22:43, 3.53it/s] 13%|█▎ | 48515/371472 [3:52:27<25:30:04, 3.52it/s] 13%|█▎ | 48516/371472 [3:52:28<24:53:59, 3.60it/s] 13%|█▎ | 48517/371472 [3:52:28<25:30:15, 3.52it/s] 13%|█▎ | 48518/371472 [3:52:28<25:55:28, 3.46it/s] 13%|█▎ | 48519/371472 [3:52:28<25:59:10, 3.45it/s] 13%|█▎ | 48520/371472 [3:52:29<27:57:31, 3.21it/s] {'loss': 4.0967, 'learning_rate': 8.828675472429916e-07, 'epoch': 2.09} + 13%|█▎ | 48520/371472 [3:52:29<27:57:31, 3.21it/s] 13%|█▎ | 48521/371472 [3:52:29<26:22:56, 3.40it/s] 13%|█▎ | 48522/371472 [3:52:29<26:27:44, 3.39it/s] 13%|█▎ | 48523/371472 [3:52:30<25:36:49, 3.50it/s] 13%|█▎ | 48524/371472 [3:52:30<24:00:08, 3.74it/s] 13%|█▎ | 48525/371472 [3:52:30<24:49:31, 3.61it/s] 13%|█▎ | 48526/371472 [3:52:30<24:21:00, 3.68it/s] 13%|█▎ | 48527/371472 [3:52:31<23:48:24, 3.77it/s] 13%|█▎ | 48528/371472 [3:52:31<24:14:00, 3.70it/s] 13%|█▎ | 48529/371472 [3:52:31<24:11:56, 3.71it/s] 13%|█▎ | 48530/371472 [3:52:31<24:19:43, 3.69it/s] 13%|█▎ | 48531/371472 [3:52:32<25:55:04, 3.46it/s] 13%|█▎ | 48532/371472 [3:52:32<27:27:17, 3.27it/s] 13%|█▎ | 48533/371472 [3:52:32<25:31:46, 3.51it/s] 13%|█▎ | 48534/371472 [3:52:33<26:59:22, 3.32it/s] 13%|█▎ | 48535/371472 [3:52:33<25:38:15, 3.50it/s] 13%|█▎ | 48536/371472 [3:52:33<24:29:46, 3.66it/s] 13%|█▎ | 48537/371472 [3:52:34<24:26:06, 3.67it/s] 13%|█▎ | 48538/371472 [3:52:34<24:29:02, 3.66it/s] 13%|█▎ | 48539/371472 [3:52:34<25:42:36, 3.49it/s] 13%|█▎ | 48540/371472 [3:52:34<25:56:55, 3.46it/s] {'loss': 4.4048, 'learning_rate': 8.828190652675128e-07, 'epoch': 2.09} + 13%|█▎ | 48540/371472 [3:52:34<25:56:55, 3.46it/s] 13%|█▎ | 48541/371472 [3:52:35<26:05:32, 3.44it/s] 13%|█▎ | 48542/371472 [3:52:35<26:21:57, 3.40it/s] 13%|█▎ | 48543/371472 [3:52:35<24:48:26, 3.62it/s] 13%|█▎ | 48544/371472 [3:52:35<23:51:17, 3.76it/s] 13%|█▎ | 48545/371472 [3:52:36<24:27:04, 3.67it/s] 13%|█▎ | 48546/371472 [3:52:36<24:40:48, 3.63it/s] 13%|█▎ | 48547/371472 [3:52:36<24:22:32, 3.68it/s] 13%|█▎ | 48548/371472 [3:52:37<25:20:20, 3.54it/s] 13%|█▎ | 48549/371472 [3:52:37<25:00:41, 3.59it/s] 13%|█▎ | 48550/371472 [3:52:37<23:47:38, 3.77it/s] 13%|█▎ | 48551/371472 [3:52:37<23:50:38, 3.76it/s] 13%|█▎ | 48552/371472 [3:52:38<23:41:54, 3.79it/s] 13%|█▎ | 48553/371472 [3:52:38<23:39:47, 3.79it/s] 13%|█▎ | 48554/371472 [3:52:38<24:32:59, 3.65it/s] 13%|█▎ | 48555/371472 [3:52:39<25:25:40, 3.53it/s] 13%|█▎ | 48556/371472 [3:52:39<26:55:38, 3.33it/s] 13%|█▎ | 48557/371472 [3:52:39<25:59:55, 3.45it/s] 13%|█▎ | 48558/371472 [3:52:39<25:55:59, 3.46it/s] 13%|█▎ | 48559/371472 [3:52:40<26:55:31, 3.33it/s] 13%|█▎ | 48560/371472 [3:52:40<26:08:23, 3.43it/s] {'loss': 4.379, 'learning_rate': 8.827705832920338e-07, 'epoch': 2.09} + 13%|█▎ | 48560/371472 [3:52:40<26:08:23, 3.43it/s] 13%|█▎ | 48561/371472 [3:52:40<25:14:52, 3.55it/s] 13%|█▎ | 48562/371472 [3:52:41<24:38:11, 3.64it/s] 13%|█▎ | 48563/371472 [3:52:41<23:48:19, 3.77it/s] 13%|█▎ | 48564/371472 [3:52:41<25:40:10, 3.49it/s] 13%|█▎ | 48565/371472 [3:52:41<25:49:57, 3.47it/s] 13%|█▎ | 48566/371472 [3:52:42<24:33:08, 3.65it/s] 13%|█▎ | 48567/371472 [3:52:42<24:24:54, 3.67it/s] 13%|█▎ | 48568/371472 [3:52:42<24:42:33, 3.63it/s] 13%|█▎ | 48569/371472 [3:52:42<24:57:18, 3.59it/s] 13%|█▎ | 48570/371472 [3:52:43<24:38:19, 3.64it/s] 13%|█▎ | 48571/371472 [3:52:43<24:36:58, 3.64it/s] 13%|█▎ | 48572/371472 [3:52:43<25:30:25, 3.52it/s] 13%|█▎ | 48573/371472 [3:52:44<25:07:08, 3.57it/s] 13%|█▎ | 48574/371472 [3:52:44<24:06:31, 3.72it/s] 13%|█▎ | 48575/371472 [3:52:44<26:30:58, 3.38it/s] 13%|█▎ | 48576/371472 [3:52:44<26:45:40, 3.35it/s] 13%|█▎ | 48577/371472 [3:52:45<25:55:59, 3.46it/s] 13%|█▎ | 48578/371472 [3:52:45<26:18:43, 3.41it/s] 13%|█▎ | 48579/371472 [3:52:45<24:57:25, 3.59it/s] 13%|█▎ | 48580/371472 [3:52:46<24:47:48, 3.62it/s] {'loss': 4.3112, 'learning_rate': 8.82722101316555e-07, 'epoch': 2.09} + 13%|█▎ | 48580/371472 [3:52:46<24:47:48, 3.62it/s] 13%|█▎ | 48581/371472 [3:52:46<24:40:13, 3.64it/s] 13%|█▎ | 48582/371472 [3:52:46<26:49:48, 3.34it/s] 13%|█▎ | 48583/371472 [3:52:46<26:48:14, 3.35it/s] 13%|█▎ | 48584/371472 [3:52:47<26:31:23, 3.38it/s] 13%|█▎ | 48585/371472 [3:52:47<25:10:15, 3.56it/s] 13%|█▎ | 48586/371472 [3:52:47<25:31:10, 3.51it/s] 13%|█▎ | 48587/371472 [3:52:48<26:14:05, 3.42it/s] 13%|█▎ | 48588/371472 [3:52:48<25:02:34, 3.58it/s] 13%|█▎ | 48589/371472 [3:52:48<24:38:12, 3.64it/s] 13%|█▎ | 48590/371472 [3:52:48<25:36:29, 3.50it/s] 13%|█▎ | 48591/371472 [3:52:49<24:50:38, 3.61it/s] 13%|█▎ | 48592/371472 [3:52:49<24:26:41, 3.67it/s] 13%|█▎ | 48593/371472 [3:52:49<23:41:21, 3.79it/s] 13%|█▎ | 48594/371472 [3:52:49<23:04:18, 3.89it/s] 13%|█▎ | 48595/371472 [3:52:50<22:58:50, 3.90it/s] 13%|█▎ | 48596/371472 [3:52:50<23:15:13, 3.86it/s] 13%|█▎ | 48597/371472 [3:52:50<23:13:55, 3.86it/s] 13%|█▎ | 48598/371472 [3:52:51<24:30:01, 3.66it/s] 13%|█▎ | 48599/371472 [3:52:51<23:50:54, 3.76it/s] 13%|█▎ | 48600/371472 [3:52:51<23:34:36, 3.80it/s] {'loss': 4.2812, 'learning_rate': 8.826736193410761e-07, 'epoch': 2.09} + 13%|█▎ | 48600/371472 [3:52:51<23:34:36, 3.80it/s] 13%|█▎ | 48601/371472 [3:52:51<24:07:55, 3.72it/s] 13%|█▎ | 48602/371472 [3:52:52<23:48:14, 3.77it/s] 13%|█▎ | 48603/371472 [3:52:52<23:13:03, 3.86it/s] 13%|█▎ | 48604/371472 [3:52:52<23:00:01, 3.90it/s] 13%|█▎ | 48605/371472 [3:52:52<22:45:07, 3.94it/s] 13%|█▎ | 48606/371472 [3:52:53<23:56:32, 3.75it/s] 13%|█▎ | 48607/371472 [3:52:53<23:28:38, 3.82it/s] 13%|█▎ | 48608/371472 [3:52:53<23:19:21, 3.85it/s] 13%|█▎ | 48609/371472 [3:52:53<24:45:12, 3.62it/s] 13%|█▎ | 48610/371472 [3:52:54<25:04:07, 3.58it/s] 13%|█▎ | 48611/371472 [3:52:54<26:13:50, 3.42it/s] 13%|█▎ | 48612/371472 [3:52:54<24:49:27, 3.61it/s] 13%|█▎ | 48613/371472 [3:52:55<25:20:20, 3.54it/s] 13%|█▎ | 48614/371472 [3:52:55<25:42:09, 3.49it/s] 13%|█▎ | 48615/371472 [3:52:55<25:30:34, 3.52it/s] 13%|█▎ | 48616/371472 [3:52:55<25:06:10, 3.57it/s] 13%|█▎ | 48617/371472 [3:52:56<25:45:42, 3.48it/s] 13%|█▎ | 48618/371472 [3:52:56<24:51:18, 3.61it/s] 13%|█▎ | 48619/371472 [3:52:56<24:10:07, 3.71it/s] 13%|█▎ | 48620/371472 [3:52:57<23:35:14, 3.80it/s] {'loss': 4.2841, 'learning_rate': 8.82625137365597e-07, 'epoch': 2.09} + 13%|█▎ | 48620/371472 [3:52:57<23:35:14, 3.80it/s] 13%|█▎ | 48621/371472 [3:52:57<24:06:43, 3.72it/s] 13%|█▎ | 48622/371472 [3:52:57<24:41:04, 3.63it/s] 13%|█▎ | 48623/371472 [3:52:57<24:10:28, 3.71it/s] 13%|█▎ | 48624/371472 [3:52:58<25:34:23, 3.51it/s] 13%|█▎ | 48625/371472 [3:52:58<24:30:14, 3.66it/s] 13%|█▎ | 48626/371472 [3:52:58<24:45:13, 3.62it/s] 13%|█▎ | 48627/371472 [3:52:58<25:37:00, 3.50it/s] 13%|█▎ | 48628/371472 [3:52:59<24:34:54, 3.65it/s] 13%|█▎ | 48629/371472 [3:52:59<25:40:00, 3.49it/s] 13%|█▎ | 48630/371472 [3:52:59<27:42:33, 3.24it/s] 13%|█▎ | 48631/371472 [3:53:00<27:55:30, 3.21it/s] 13%|█▎ | 48632/371472 [3:53:00<27:18:46, 3.28it/s] 13%|█▎ | 48633/371472 [3:53:00<26:35:28, 3.37it/s] 13%|█▎ | 48634/371472 [3:53:01<26:06:46, 3.43it/s] 13%|█▎ | 48635/371472 [3:53:01<25:39:41, 3.49it/s] 13%|█▎ | 48636/371472 [3:53:01<24:39:41, 3.64it/s] 13%|█▎ | 48637/371472 [3:53:01<24:08:27, 3.71it/s] 13%|█▎ | 48638/371472 [3:53:02<23:19:11, 3.85it/s] 13%|█▎ | 48639/371472 [3:53:02<24:33:15, 3.65it/s] 13%|█▎ | 48640/371472 [3:53:02<24:58:58, 3.59it/s] {'loss': 4.4263, 'learning_rate': 8.825766553901182e-07, 'epoch': 2.1} + 13%|█▎ | 48640/371472 [3:53:02<24:58:58, 3.59it/s] 13%|█▎ | 48641/371472 [3:53:02<25:02:47, 3.58it/s] 13%|█▎ | 48642/371472 [3:53:03<24:54:38, 3.60it/s] 13%|█▎ | 48643/371472 [3:53:03<24:28:43, 3.66it/s] 13%|█▎ | 48644/371472 [3:53:03<24:42:49, 3.63it/s] 13%|█▎ | 48645/371472 [3:53:04<23:33:47, 3.81it/s] 13%|█▎ | 48646/371472 [3:53:04<23:16:59, 3.85it/s] 13%|█▎ | 48647/371472 [3:53:04<24:54:40, 3.60it/s] 13%|█▎ | 48648/371472 [3:53:04<24:44:50, 3.62it/s] 13%|█▎ | 48649/371472 [3:53:05<25:58:23, 3.45it/s] 13%|█▎ | 48650/371472 [3:53:05<25:09:11, 3.57it/s] 13%|█▎ | 48651/371472 [3:53:05<25:01:18, 3.58it/s] 13%|█▎ | 48652/371472 [3:53:06<25:11:20, 3.56it/s] 13%|█▎ | 48653/371472 [3:53:06<24:25:14, 3.67it/s] 13%|█▎ | 48654/371472 [3:53:06<24:11:48, 3.71it/s] 13%|█▎ | 48655/371472 [3:53:06<23:32:45, 3.81it/s] 13%|█▎ | 48656/371472 [3:53:07<23:37:24, 3.80it/s] 13%|█▎ | 48657/371472 [3:53:07<23:33:20, 3.81it/s] 13%|█▎ | 48658/371472 [3:53:07<23:39:13, 3.79it/s] 13%|█▎ | 48659/371472 [3:53:07<23:09:35, 3.87it/s] 13%|█▎ | 48660/371472 [3:53:08<23:20:17, 3.84it/s] {'loss': 4.4555, 'learning_rate': 8.825281734146394e-07, 'epoch': 2.1} + 13%|█▎ | 48660/371472 [3:53:08<23:20:17, 3.84it/s] 13%|█▎ | 48661/371472 [3:53:08<23:39:12, 3.79it/s] 13%|█▎ | 48662/371472 [3:53:08<25:48:08, 3.48it/s] 13%|█▎ | 48663/371472 [3:53:08<25:31:10, 3.51it/s] 13%|█▎ | 48664/371472 [3:53:09<26:33:11, 3.38it/s] 13%|█▎ | 48665/371472 [3:53:09<25:38:25, 3.50it/s] 13%|█▎ | 48666/371472 [3:53:09<25:15:39, 3.55it/s] 13%|█▎ | 48667/371472 [3:53:10<25:05:11, 3.57it/s] 13%|█▎ | 48668/371472 [3:53:10<24:25:24, 3.67it/s] 13%|█▎ | 48669/371472 [3:53:10<23:43:23, 3.78it/s] 13%|█▎ | 48670/371472 [3:53:10<24:30:23, 3.66it/s] 13%|█▎ | 48671/371472 [3:53:11<24:45:55, 3.62it/s] 13%|█▎ | 48672/371472 [3:53:11<25:18:34, 3.54it/s] 13%|█▎ | 48673/371472 [3:53:11<26:21:02, 3.40it/s] 13%|█▎ | 48674/371472 [3:53:12<25:34:59, 3.50it/s] 13%|█▎ | 48675/371472 [3:53:12<27:10:37, 3.30it/s] 13%|█▎ | 48676/371472 [3:53:12<26:41:04, 3.36it/s] 13%|█▎ | 48677/371472 [3:53:12<26:20:53, 3.40it/s] 13%|█▎ | 48678/371472 [3:53:13<25:31:35, 3.51it/s] 13%|█▎ | 48679/371472 [3:53:13<24:45:47, 3.62it/s] 13%|█▎ | 48680/371472 [3:53:13<26:00:42, 3.45it/s] {'loss': 4.2405, 'learning_rate': 8.824796914391605e-07, 'epoch': 2.1} + 13%|█▎ | 48680/371472 [3:53:13<26:00:42, 3.45it/s] 13%|█▎ | 48681/371472 [3:53:14<26:03:41, 3.44it/s] 13%|█▎ | 48682/371472 [3:53:14<26:10:14, 3.43it/s] 13%|█▎ | 48683/371472 [3:53:14<25:35:09, 3.50it/s] 13%|█▎ | 48684/371472 [3:53:14<25:00:57, 3.58it/s] 13%|█▎ | 48685/371472 [3:53:15<25:30:39, 3.51it/s] 13%|█▎ | 48686/371472 [3:53:15<25:47:59, 3.48it/s] 13%|█▎ | 48687/371472 [3:53:15<26:23:37, 3.40it/s] 13%|█▎ | 48688/371472 [3:53:16<27:29:26, 3.26it/s] 13%|█▎ | 48689/371472 [3:53:16<26:01:47, 3.44it/s] 13%|█▎ | 48690/371472 [3:53:16<26:30:45, 3.38it/s] 13%|█▎ | 48691/371472 [3:53:16<25:07:14, 3.57it/s] 13%|█▎ | 48692/371472 [3:53:17<24:22:34, 3.68it/s] 13%|█▎ | 48693/371472 [3:53:17<23:46:05, 3.77it/s] 13%|█▎ | 48694/371472 [3:53:17<23:17:36, 3.85it/s] 13%|█▎ | 48695/371472 [3:53:17<23:30:26, 3.81it/s] 13%|█▎ | 48696/371472 [3:53:18<23:32:57, 3.81it/s] 13%|█▎ | 48697/371472 [3:53:18<23:23:28, 3.83it/s] 13%|█▎ | 48698/371472 [3:53:18<24:33:04, 3.65it/s] 13%|█▎ | 48699/371472 [3:53:19<25:01:00, 3.58it/s] 13%|█▎ | 48700/371472 [3:53:19<25:42:56, 3.49it/s] {'loss': 4.1133, 'learning_rate': 8.824312094636815e-07, 'epoch': 2.1} + 13%|█▎ | 48700/371472 [3:53:19<25:42:56, 3.49it/s] 13%|█▎ | 48701/371472 [3:53:19<25:20:42, 3.54it/s] 13%|█▎ | 48702/371472 [3:53:19<24:19:32, 3.69it/s] 13%|█▎ | 48703/371472 [3:53:20<24:15:38, 3.70it/s] 13%|█▎ | 48704/371472 [3:53:20<24:35:12, 3.65it/s] 13%|█▎ | 48705/371472 [3:53:20<24:34:17, 3.65it/s] 13%|█▎ | 48706/371472 [3:53:21<24:36:47, 3.64it/s] 13%|█▎ | 48707/371472 [3:53:21<25:04:53, 3.57it/s] 13%|█▎ | 48708/371472 [3:53:21<23:55:20, 3.75it/s] 13%|█▎ | 48709/371472 [3:53:21<23:44:44, 3.78it/s] 13%|█▎ | 48710/371472 [3:53:22<24:37:21, 3.64it/s] 13%|█▎ | 48711/371472 [3:53:22<27:22:35, 3.27it/s] 13%|█▎ | 48712/371472 [3:53:22<26:36:15, 3.37it/s] 13%|█▎ | 48713/371472 [3:53:23<25:16:51, 3.55it/s] 13%|█▎ | 48714/371472 [3:53:23<24:43:54, 3.63it/s] 13%|█▎ | 48715/371472 [3:53:23<25:28:26, 3.52it/s] 13%|█▎ | 48716/371472 [3:53:23<24:49:14, 3.61it/s] 13%|█▎ | 48717/371472 [3:53:24<24:30:40, 3.66it/s] 13%|█▎ | 48718/371472 [3:53:24<24:38:44, 3.64it/s] 13%|█▎ | 48719/371472 [3:53:24<24:33:44, 3.65it/s] 13%|█▎ | 48720/371472 [3:53:24<26:15:52, 3.41it/s] {'loss': 4.2703, 'learning_rate': 8.823827274882027e-07, 'epoch': 2.1} + 13%|█▎ | 48720/371472 [3:53:24<26:15:52, 3.41it/s] 13%|█▎ | 48721/371472 [3:53:25<25:48:49, 3.47it/s] 13%|█▎ | 48722/371472 [3:53:25<24:54:48, 3.60it/s] 13%|█▎ | 48723/371472 [3:53:25<24:22:15, 3.68it/s] 13%|█▎ | 48724/371472 [3:53:26<25:08:45, 3.57it/s] 13%|█▎ | 48725/371472 [3:53:26<25:29:06, 3.52it/s] 13%|█▎ | 48726/371472 [3:53:26<25:54:33, 3.46it/s] 13%|█▎ | 48727/371472 [3:53:27<27:22:49, 3.27it/s] 13%|█▎ | 48728/371472 [3:53:27<26:04:41, 3.44it/s] 13%|█▎ | 48729/371472 [3:53:27<25:05:45, 3.57it/s] 13%|█▎ | 48730/371472 [3:53:27<24:30:25, 3.66it/s] 13%|█▎ | 48731/371472 [3:53:28<24:21:17, 3.68it/s] 13%|█▎ | 48732/371472 [3:53:28<24:07:44, 3.72it/s] 13%|█▎ | 48733/371472 [3:53:28<25:09:02, 3.56it/s] 13%|█▎ | 48734/371472 [3:53:28<25:56:15, 3.46it/s] 13%|█▎ | 48735/371472 [3:53:29<25:29:59, 3.52it/s] 13%|█▎ | 48736/371472 [3:53:29<25:24:57, 3.53it/s] 13%|█▎ | 48737/371472 [3:53:29<24:24:02, 3.67it/s] 13%|█▎ | 48738/371472 [3:53:30<24:19:08, 3.69it/s] 13%|█▎ | 48739/371472 [3:53:30<23:52:53, 3.75it/s] 13%|█▎ | 48740/371472 [3:53:30<23:18:57, 3.84it/s] {'loss': 4.5088, 'learning_rate': 8.823342455127238e-07, 'epoch': 2.1} + 13%|█▎ | 48740/371472 [3:53:30<23:18:57, 3.84it/s] 13%|█▎ | 48741/371472 [3:53:30<22:36:34, 3.97it/s] 13%|█▎ | 48742/371472 [3:53:31<24:14:56, 3.70it/s] 13%|█▎ | 48743/371472 [3:53:31<24:48:57, 3.61it/s] 13%|█▎ | 48744/371472 [3:53:31<24:20:31, 3.68it/s] 13%|█▎ | 48745/371472 [3:53:31<23:35:29, 3.80it/s] 13%|█▎ | 48746/371472 [3:53:32<24:42:37, 3.63it/s] 13%|█▎ | 48747/371472 [3:53:32<24:47:22, 3.62it/s] 13%|█▎ | 48748/371472 [3:53:32<24:11:20, 3.71it/s] 13%|█▎ | 48749/371472 [3:53:32<25:01:06, 3.58it/s] 13%|█▎ | 48750/371472 [3:53:33<25:03:41, 3.58it/s] 13%|█▎ | 48751/371472 [3:53:33<26:31:31, 3.38it/s] 13%|█▎ | 48752/371472 [3:53:33<27:49:13, 3.22it/s] 13%|█▎ | 48753/371472 [3:53:34<26:32:55, 3.38it/s] 13%|█▎ | 48754/371472 [3:53:34<25:04:05, 3.58it/s] 13%|█▎ | 48755/371472 [3:53:34<25:04:00, 3.58it/s] 13%|█▎ | 48756/371472 [3:53:35<24:59:30, 3.59it/s] 13%|█▎ | 48757/371472 [3:53:35<24:01:30, 3.73it/s] 13%|█▎ | 48758/371472 [3:53:35<24:59:55, 3.59it/s] 13%|█▎ | 48759/371472 [3:53:35<24:41:37, 3.63it/s] 13%|█▎ | 48760/371472 [3:53:36<24:25:46, 3.67it/s] {'loss': 4.1103, 'learning_rate': 8.822857635372449e-07, 'epoch': 2.1} + 13%|█▎ | 48760/371472 [3:53:36<24:25:46, 3.67it/s] 13%|█▎ | 48761/371472 [3:53:36<23:57:10, 3.74it/s] 13%|█▎ | 48762/371472 [3:53:36<23:34:38, 3.80it/s] 13%|█▎ | 48763/371472 [3:53:36<24:12:31, 3.70it/s] 13%|█▎ | 48764/371472 [3:53:37<23:57:31, 3.74it/s] 13%|█▎ | 48765/371472 [3:53:37<23:34:13, 3.80it/s] 13%|█▎ | 48766/371472 [3:53:37<22:57:13, 3.91it/s] 13%|█▎ | 48767/371472 [3:53:37<22:51:03, 3.92it/s] 13%|█▎ | 48768/371472 [3:53:38<24:53:47, 3.60it/s] 13%|█▎ | 48769/371472 [3:53:38<24:22:24, 3.68it/s] 13%|█▎ | 48770/371472 [3:53:38<24:41:20, 3.63it/s] 13%|█▎ | 48771/371472 [3:53:39<24:02:38, 3.73it/s] 13%|█▎ | 48772/371472 [3:53:39<25:24:53, 3.53it/s] 13%|█▎ | 48773/371472 [3:53:39<24:17:51, 3.69it/s] 13%|█▎ | 48774/371472 [3:53:39<24:57:16, 3.59it/s] 13%|█▎ | 48775/371472 [3:53:40<24:29:19, 3.66it/s] 13%|█▎ | 48776/371472 [3:53:40<25:33:25, 3.51it/s] 13%|█▎ | 48777/371472 [3:53:40<24:44:08, 3.62it/s] 13%|█▎ | 48778/371472 [3:53:40<24:27:18, 3.67it/s] 13%|█▎ | 48779/371472 [3:53:41<24:57:49, 3.59it/s] 13%|█▎ | 48780/371472 [3:53:41<23:51:51, 3.76it/s] {'loss': 4.4763, 'learning_rate': 8.822372815617659e-07, 'epoch': 2.1} + 13%|█▎ | 48780/371472 [3:53:41<23:51:51, 3.76it/s] 13%|█▎ | 48781/371472 [3:53:41<23:09:20, 3.87it/s] 13%|█▎ | 48782/371472 [3:53:42<23:35:46, 3.80it/s] 13%|█▎ | 48783/371472 [3:53:42<26:41:19, 3.36it/s] 13%|█▎ | 48784/371472 [3:53:42<27:09:35, 3.30it/s] 13%|█▎ | 48785/371472 [3:53:42<25:56:35, 3.46it/s] 13%|█▎ | 48786/371472 [3:53:43<24:22:50, 3.68it/s] 13%|█▎ | 48787/371472 [3:53:43<23:26:43, 3.82it/s] 13%|█▎ | 48788/371472 [3:53:43<22:58:15, 3.90it/s] 13%|█▎ | 48789/371472 [3:53:43<23:54:01, 3.75it/s] 13%|█▎ | 48790/371472 [3:53:44<23:35:51, 3.80it/s] 13%|█▎ | 48791/371472 [3:53:44<23:38:46, 3.79it/s] 13%|█▎ | 48792/371472 [3:53:44<23:50:41, 3.76it/s] 13%|█▎ | 48793/371472 [3:53:45<24:36:31, 3.64it/s] 13%|█▎ | 48794/371472 [3:53:45<24:04:09, 3.72it/s] 13%|█▎ | 48795/371472 [3:53:45<23:48:12, 3.77it/s] 13%|█▎ | 48796/371472 [3:53:45<24:38:17, 3.64it/s] 13%|█▎ | 48797/371472 [3:53:46<25:14:52, 3.55it/s] 13%|█▎ | 48798/371472 [3:53:46<25:14:34, 3.55it/s] 13%|█▎ | 48799/371472 [3:53:46<25:24:34, 3.53it/s] 13%|█▎ | 48800/371472 [3:53:46<24:13:27, 3.70it/s] {'loss': 4.2769, 'learning_rate': 8.821887995862871e-07, 'epoch': 2.1} + 13%|█▎ | 48800/371472 [3:53:46<24:13:27, 3.70it/s] 13%|█▎ | 48801/371472 [3:53:47<27:44:10, 3.23it/s] 13%|█▎ | 48802/371472 [3:53:47<25:54:10, 3.46it/s] 13%|█▎ | 48803/371472 [3:53:47<26:43:49, 3.35it/s] 13%|█▎ | 48804/371472 [3:53:48<25:20:35, 3.54it/s] 13%|█▎ | 48805/371472 [3:53:48<23:59:18, 3.74it/s] 13%|█▎ | 48806/371472 [3:53:48<24:19:45, 3.68it/s] 13%|█▎ | 48807/371472 [3:53:48<24:01:18, 3.73it/s] 13%|█▎ | 48808/371472 [3:53:49<23:22:50, 3.83it/s] 13%|█▎ | 48809/371472 [3:53:49<23:15:01, 3.85it/s] 13%|█▎ | 48810/371472 [3:53:49<24:02:34, 3.73it/s] 13%|█▎ | 48811/371472 [3:53:50<24:08:41, 3.71it/s] 13%|█▎ | 48812/371472 [3:53:50<24:24:05, 3.67it/s] 13%|█▎ | 48813/371472 [3:53:50<23:41:34, 3.78it/s] 13%|█▎ | 48814/371472 [3:53:50<23:52:05, 3.76it/s] 13%|█▎ | 48815/371472 [3:53:51<27:27:35, 3.26it/s] 13%|█▎ | 48816/371472 [3:53:51<26:12:07, 3.42it/s] 13%|█▎ | 48817/371472 [3:53:51<25:20:19, 3.54it/s] 13%|█▎ | 48818/371472 [3:53:52<25:52:15, 3.46it/s] 13%|█▎ | 48819/371472 [3:53:52<24:27:03, 3.67it/s] 13%|█▎ | 48820/371472 [3:53:52<26:17:54, 3.41it/s] {'loss': 4.4507, 'learning_rate': 8.821403176108082e-07, 'epoch': 2.1} + 13%|█▎ | 48820/371472 [3:53:52<26:17:54, 3.41it/s] 13%|█▎ | 48821/371472 [3:53:52<25:44:53, 3.48it/s] 13%|█▎ | 48822/371472 [3:53:53<26:11:18, 3.42it/s] 13%|█▎ | 48823/371472 [3:53:53<26:04:49, 3.44it/s] 13%|█▎ | 48824/371472 [3:53:53<25:11:34, 3.56it/s] 13%|█▎ | 48825/371472 [3:53:54<26:39:20, 3.36it/s] 13%|█▎ | 48826/371472 [3:53:54<26:14:16, 3.42it/s] 13%|█▎ | 48827/371472 [3:53:54<25:37:15, 3.50it/s] 13%|█▎ | 48828/371472 [3:53:54<25:20:35, 3.54it/s] 13%|█▎ | 48829/371472 [3:53:55<25:06:25, 3.57it/s] 13%|█▎ | 48830/371472 [3:53:55<28:09:06, 3.18it/s] 13%|█▎ | 48831/371472 [3:53:55<26:36:59, 3.37it/s] 13%|█▎ | 48832/371472 [3:53:56<26:25:08, 3.39it/s] 13%|█▎ | 48833/371472 [3:53:56<27:04:18, 3.31it/s] 13%|█▎ | 48834/371472 [3:53:56<28:30:57, 3.14it/s] 13%|█▎ | 48835/371472 [3:53:57<27:52:36, 3.21it/s] 13%|█▎ | 48836/371472 [3:53:57<26:55:33, 3.33it/s] 13%|█▎ | 48837/371472 [3:53:57<25:28:36, 3.52it/s] 13%|█▎ | 48838/371472 [3:53:57<25:12:26, 3.56it/s] 13%|█▎ | 48839/371472 [3:53:58<29:23:55, 3.05it/s] 13%|█▎ | 48840/371472 [3:53:58<28:46:09, 3.12it/s] {'loss': 4.1722, 'learning_rate': 8.820918356353294e-07, 'epoch': 2.1} + 13%|█▎ | 48840/371472 [3:53:58<28:46:09, 3.12it/s] 13%|█▎ | 48841/371472 [3:53:58<29:00:42, 3.09it/s] 13%|█▎ | 48842/371472 [3:53:59<28:05:22, 3.19it/s] 13%|█▎ | 48843/371472 [3:53:59<26:05:16, 3.44it/s] 13%|█▎ | 48844/371472 [3:53:59<26:06:32, 3.43it/s] 13%|█▎ | 48845/371472 [3:54:00<25:52:15, 3.46it/s] 13%|█▎ | 48846/371472 [3:54:00<26:41:48, 3.36it/s] 13%|█▎ | 48847/371472 [3:54:00<25:37:49, 3.50it/s] 13%|█▎ | 48848/371472 [3:54:00<24:59:58, 3.58it/s] 13%|█▎ | 48849/371472 [3:54:01<25:43:24, 3.48it/s] 13%|█▎ | 48850/371472 [3:54:01<25:58:53, 3.45it/s] 13%|█▎ | 48851/371472 [3:54:01<24:54:21, 3.60it/s] 13%|█▎ | 48852/371472 [3:54:02<24:51:10, 3.61it/s] 13%|█▎ | 48853/371472 [3:54:02<24:50:57, 3.61it/s] 13%|█▎ | 48854/371472 [3:54:02<24:01:22, 3.73it/s] 13%|█▎ | 48855/371472 [3:54:02<24:24:04, 3.67it/s] 13%|█▎ | 48856/371472 [3:54:03<24:29:47, 3.66it/s] 13%|█▎ | 48857/371472 [3:54:03<24:15:05, 3.70it/s] 13%|█▎ | 48858/371472 [3:54:03<25:08:19, 3.56it/s] 13%|█▎ | 48859/371472 [3:54:03<24:19:22, 3.68it/s] 13%|█▎ | 48860/371472 [3:54:04<26:11:56, 3.42it/s] {'loss': 4.1197, 'learning_rate': 8.820433536598504e-07, 'epoch': 2.1} + 13%|█▎ | 48860/371472 [3:54:04<26:11:56, 3.42it/s] 13%|█▎ | 48861/371472 [3:54:04<24:49:51, 3.61it/s] 13%|█▎ | 48862/371472 [3:54:04<25:18:42, 3.54it/s] 13%|█▎ | 48863/371472 [3:54:05<26:24:38, 3.39it/s] 13%|█▎ | 48864/371472 [3:54:05<29:37:38, 3.02it/s] 13%|█▎ | 48865/371472 [3:54:05<28:39:36, 3.13it/s] 13%|█▎ | 48866/371472 [3:54:06<27:46:21, 3.23it/s] 13%|█▎ | 48867/371472 [3:54:06<27:56:53, 3.21it/s] 13%|█▎ | 48868/371472 [3:54:06<27:26:31, 3.27it/s] 13%|█▎ | 48869/371472 [3:54:06<26:17:07, 3.41it/s] 13%|█▎ | 48870/371472 [3:54:07<25:37:05, 3.50it/s] 13%|█▎ | 48871/371472 [3:54:07<24:35:43, 3.64it/s] 13%|█▎ | 48872/371472 [3:54:07<23:53:39, 3.75it/s] 13%|█▎ | 48873/371472 [3:54:08<23:40:13, 3.79it/s] 13%|█▎ | 48874/371472 [3:54:08<23:44:45, 3.77it/s] 13%|█▎ | 48875/371472 [3:54:08<24:02:21, 3.73it/s] 13%|█▎ | 48876/371472 [3:54:08<26:46:26, 3.35it/s] 13%|█▎ | 48877/371472 [3:54:09<26:14:45, 3.41it/s] 13%|█▎ | 48878/371472 [3:54:09<26:05:45, 3.43it/s] 13%|█▎ | 48879/371472 [3:54:09<24:42:01, 3.63it/s] 13%|█▎ | 48880/371472 [3:54:09<24:01:18, 3.73it/s] {'loss': 4.0937, 'learning_rate': 8.819948716843715e-07, 'epoch': 2.11} + 13%|█▎ | 48880/371472 [3:54:09<24:01:18, 3.73it/s] 13%|█▎ | 48881/371472 [3:54:10<23:19:39, 3.84it/s] 13%|█▎ | 48882/371472 [3:54:10<24:14:48, 3.70it/s] 13%|█▎ | 48883/371472 [3:54:10<24:19:33, 3.68it/s] 13%|█▎ | 48884/371472 [3:54:11<24:03:11, 3.73it/s] 13%|█▎ | 48885/371472 [3:54:11<24:26:22, 3.67it/s] 13%|█▎ | 48886/371472 [3:54:11<24:05:06, 3.72it/s] 13%|█▎ | 48887/371472 [3:54:11<24:02:52, 3.73it/s] 13%|█▎ | 48888/371472 [3:54:12<24:17:22, 3.69it/s] 13%|█▎ | 48889/371472 [3:54:12<24:09:25, 3.71it/s] 13%|█▎ | 48890/371472 [3:54:12<23:35:13, 3.80it/s] 13%|█▎ | 48891/371472 [3:54:12<23:16:28, 3.85it/s] 13%|█▎ | 48892/371472 [3:54:13<23:44:34, 3.77it/s] 13%|█▎ | 48893/371472 [3:54:13<26:23:54, 3.39it/s] 13%|█▎ | 48894/371472 [3:54:13<28:58:27, 3.09it/s] 13%|█▎ | 48895/371472 [3:54:14<27:41:09, 3.24it/s] 13%|█▎ | 48896/371472 [3:54:14<26:22:52, 3.40it/s] 13%|█▎ | 48897/371472 [3:54:14<25:45:12, 3.48it/s] 13%|█▎ | 48898/371472 [3:54:14<24:46:39, 3.62it/s] 13%|█▎ | 48899/371472 [3:54:15<24:12:16, 3.70it/s] 13%|█▎ | 48900/371472 [3:54:15<29:17:04, 3.06it/s] {'loss': 4.2682, 'learning_rate': 8.819463897088926e-07, 'epoch': 2.11} + 13%|█▎ | 48900/371472 [3:54:15<29:17:04, 3.06it/s] 13%|█▎ | 48901/371472 [3:54:15<28:22:07, 3.16it/s] 13%|█▎ | 48902/371472 [3:54:16<26:28:26, 3.38it/s] 13%|█▎ | 48903/371472 [3:54:16<25:21:46, 3.53it/s] 13%|█▎ | 48904/371472 [3:54:16<25:35:07, 3.50it/s] 13%|█▎ | 48905/371472 [3:54:17<24:10:57, 3.71it/s] 13%|█▎ | 48906/371472 [3:54:17<23:44:59, 3.77it/s] 13%|█▎ | 48907/371472 [3:54:17<23:54:42, 3.75it/s] 13%|█▎ | 48908/371472 [3:54:17<26:13:28, 3.42it/s] 13%|█▎ | 48909/371472 [3:54:18<27:04:04, 3.31it/s] 13%|█▎ | 48910/371472 [3:54:18<26:21:19, 3.40it/s] 13%|█▎ | 48911/371472 [3:54:18<26:35:32, 3.37it/s] 13%|█▎ | 48912/371472 [3:54:19<25:53:47, 3.46it/s] 13%|█▎ | 48913/371472 [3:54:19<24:47:10, 3.61it/s] 13%|█▎ | 48914/371472 [3:54:19<24:26:24, 3.67it/s] 13%|█▎ | 48915/371472 [3:54:19<24:39:29, 3.63it/s] 13%|█▎ | 48916/371472 [3:54:20<27:31:00, 3.26it/s] 13%|█▎ | 48917/371472 [3:54:20<26:30:38, 3.38it/s] 13%|█▎ | 48918/371472 [3:54:20<26:11:32, 3.42it/s] 13%|█▎ | 48919/371472 [3:54:21<26:47:23, 3.34it/s] 13%|█▎ | 48920/371472 [3:54:21<29:49:32, 3.00it/s] {'loss': 4.2429, 'learning_rate': 8.818979077334138e-07, 'epoch': 2.11} + 13%|█▎ | 48920/371472 [3:54:21<29:49:32, 3.00it/s] 13%|█▎ | 48921/371472 [3:54:21<30:07:50, 2.97it/s] 13%|█▎ | 48922/371472 [3:54:22<28:14:47, 3.17it/s] 13%|█▎ | 48923/371472 [3:54:22<26:32:17, 3.38it/s] 13%|█▎ | 48924/371472 [3:54:22<26:23:16, 3.40it/s] 13%|█▎ | 48925/371472 [3:54:22<25:48:23, 3.47it/s] 13%|█▎ | 48926/371472 [3:54:23<24:52:30, 3.60it/s] 13%|█▎ | 48927/371472 [3:54:23<24:25:11, 3.67it/s] 13%|█▎ | 48928/371472 [3:54:23<26:41:43, 3.36it/s] 13%|█▎ | 48929/371472 [3:54:24<26:44:34, 3.35it/s] 13%|█▎ | 48930/371472 [3:54:24<28:46:25, 3.11it/s] 13%|█▎ | 48931/371472 [3:54:24<27:30:36, 3.26it/s] 13%|█▎ | 48932/371472 [3:54:25<27:13:41, 3.29it/s] 13%|█▎ | 48933/371472 [3:54:25<29:20:01, 3.05it/s] 13%|█▎ | 48934/371472 [3:54:25<29:00:09, 3.09it/s] 13%|█▎ | 48935/371472 [3:54:26<28:16:04, 3.17it/s] 13%|█▎ | 48936/371472 [3:54:26<27:45:59, 3.23it/s] 13%|█▎ | 48937/371472 [3:54:26<27:44:33, 3.23it/s] 13%|█▎ | 48938/371472 [3:54:26<26:33:30, 3.37it/s] 13%|█▎ | 48939/371472 [3:54:27<25:52:07, 3.46it/s] 13%|█▎ | 48940/371472 [3:54:27<25:20:24, 3.54it/s] {'loss': 4.2731, 'learning_rate': 8.818494257579348e-07, 'epoch': 2.11} + 13%|█▎ | 48940/371472 [3:54:27<25:20:24, 3.54it/s] 13%|█▎ | 48941/371472 [3:54:27<25:11:44, 3.56it/s] 13%|█▎ | 48942/371472 [3:54:28<24:21:34, 3.68it/s] 13%|█▎ | 48943/371472 [3:54:28<23:47:17, 3.77it/s] 13%|█▎ | 48944/371472 [3:54:28<23:39:37, 3.79it/s] 13%|█▎ | 48945/371472 [3:54:28<25:23:54, 3.53it/s] 13%|█▎ | 48946/371472 [3:54:29<25:20:53, 3.53it/s] 13%|█▎ | 48947/371472 [3:54:29<26:30:45, 3.38it/s] 13%|█▎ | 48948/371472 [3:54:29<25:37:53, 3.50it/s] 13%|█▎ | 48949/371472 [3:54:30<26:45:33, 3.35it/s] 13%|█▎ | 48950/371472 [3:54:30<26:15:37, 3.41it/s] 13%|█▎ | 48951/371472 [3:54:30<25:53:48, 3.46it/s] 13%|█▎ | 48952/371472 [3:54:30<26:12:23, 3.42it/s] 13%|█▎ | 48953/371472 [3:54:31<25:24:18, 3.53it/s] 13%|█▎ | 48954/371472 [3:54:31<23:56:29, 3.74it/s] 13%|█▎ | 48955/371472 [3:54:31<23:17:40, 3.85it/s] 13%|█▎ | 48956/371472 [3:54:31<23:15:52, 3.85it/s] 13%|█▎ | 48957/371472 [3:54:32<23:17:58, 3.85it/s] 13%|█▎ | 48958/371472 [3:54:32<23:07:35, 3.87it/s] 13%|█▎ | 48959/371472 [3:54:32<22:59:56, 3.90it/s] 13%|█▎ | 48960/371472 [3:54:32<23:57:17, 3.74it/s] {'loss': 4.2936, 'learning_rate': 8.81800943782456e-07, 'epoch': 2.11} + 13%|█▎ | 48960/371472 [3:54:32<23:57:17, 3.74it/s] 13%|█▎ | 48961/371472 [3:54:33<25:32:53, 3.51it/s] 13%|█▎ | 48962/371472 [3:54:33<24:41:16, 3.63it/s] 13%|█▎ | 48963/371472 [3:54:33<24:04:13, 3.72it/s] 13%|█▎ | 48964/371472 [3:54:34<23:38:18, 3.79it/s] 13%|█▎ | 48965/371472 [3:54:34<23:54:42, 3.75it/s] 13%|█▎ | 48966/371472 [3:54:34<23:23:07, 3.83it/s] 13%|█▎ | 48967/371472 [3:54:34<23:24:06, 3.83it/s] 13%|█▎ | 48968/371472 [3:54:35<23:33:10, 3.80it/s] 13%|█▎ | 48969/371472 [3:54:35<23:40:03, 3.79it/s] 13%|█▎ | 48970/371472 [3:54:35<24:33:28, 3.65it/s] 13%|█▎ | 48971/371472 [3:54:35<25:05:01, 3.57it/s] 13%|█▎ | 48972/371472 [3:54:36<24:36:51, 3.64it/s] 13%|█▎ | 48973/371472 [3:54:36<24:18:40, 3.68it/s] 13%|█▎ | 48974/371472 [3:54:36<24:56:04, 3.59it/s] 13%|█▎ | 48975/371472 [3:54:37<26:06:08, 3.43it/s] 13%|█▎ | 48976/371472 [3:54:37<26:19:12, 3.40it/s] 13%|█▎ | 48977/371472 [3:54:37<26:05:16, 3.43it/s] 13%|█▎ | 48978/371472 [3:54:37<24:54:52, 3.60it/s] 13%|█▎ | 48979/371472 [3:54:38<25:15:03, 3.55it/s] 13%|█▎ | 48980/371472 [3:54:38<25:26:59, 3.52it/s] {'loss': 4.3315, 'learning_rate': 8.817524618069771e-07, 'epoch': 2.11} + 13%|█▎ | 48980/371472 [3:54:38<25:26:59, 3.52it/s] 13%|█▎ | 48981/371472 [3:54:38<26:03:30, 3.44it/s] 13%|█▎ | 48982/371472 [3:54:39<27:42:59, 3.23it/s] 13%|█▎ | 48983/371472 [3:54:39<26:16:23, 3.41it/s] 13%|█▎ | 48984/371472 [3:54:39<26:32:29, 3.38it/s] 13%|█▎ | 48985/371472 [3:54:40<26:16:04, 3.41it/s] 13%|█▎ | 48986/371472 [3:54:40<26:59:43, 3.32it/s] 13%|█▎ | 48987/371472 [3:54:40<25:54:34, 3.46it/s] 13%|█▎ | 48988/371472 [3:54:40<25:45:00, 3.48it/s] 13%|█▎ | 48989/371472 [3:54:41<26:17:01, 3.41it/s] 13%|█▎ | 48990/371472 [3:54:41<25:29:15, 3.51it/s] 13%|█▎ | 48991/371472 [3:54:41<24:23:15, 3.67it/s] 13%|█▎ | 48992/371472 [3:54:41<24:23:11, 3.67it/s] 13%|█▎ | 48993/371472 [3:54:42<24:04:29, 3.72it/s] 13%|█▎ | 48994/371472 [3:54:42<24:16:40, 3.69it/s] 13%|█▎ | 48995/371472 [3:54:42<23:42:50, 3.78it/s] 13%|█▎ | 48996/371472 [3:54:43<26:08:53, 3.43it/s] 13%|█▎ | 48997/371472 [3:54:43<25:00:48, 3.58it/s] 13%|█▎ | 48998/371472 [3:54:43<24:00:47, 3.73it/s] 13%|█▎ | 48999/371472 [3:54:43<23:46:39, 3.77it/s] 13%|█▎ | 49000/371472 [3:54:44<23:55:04, 3.75it/s] {'loss': 4.3196, 'learning_rate': 8.817039798314981e-07, 'epoch': 2.11} + 13%|█▎ | 49000/371472 [3:54:44<23:55:04, 3.75it/s] 13%|█▎ | 49001/371472 [3:54:44<23:34:27, 3.80it/s] 13%|█▎ | 49002/371472 [3:54:44<23:12:55, 3.86it/s] 13%|█▎ | 49003/371472 [3:54:44<23:12:25, 3.86it/s] 13%|█▎ | 49004/371472 [3:54:45<23:26:05, 3.82it/s] 13%|█▎ | 49005/371472 [3:54:45<23:37:50, 3.79it/s] 13%|█▎ | 49006/371472 [3:54:45<23:10:57, 3.86it/s] 13%|█▎ | 49007/371472 [3:54:45<23:20:08, 3.84it/s] 13%|█▎ | 49008/371472 [3:54:46<23:57:52, 3.74it/s] 13%|█▎ | 49009/371472 [3:54:46<23:30:54, 3.81it/s] 13%|█▎ | 49010/371472 [3:54:46<22:51:55, 3.92it/s] 13%|█▎ | 49011/371472 [3:54:46<23:02:42, 3.89it/s] 13%|█▎ | 49012/371472 [3:54:47<24:16:24, 3.69it/s] 13%|█▎ | 49013/371472 [3:54:47<25:55:11, 3.46it/s] 13%|█▎ | 49014/371472 [3:54:47<25:14:49, 3.55it/s] 13%|█▎ | 49015/371472 [3:54:48<24:32:19, 3.65it/s] 13%|█▎ | 49016/371472 [3:54:48<24:06:41, 3.71it/s] 13%|█▎ | 49017/371472 [3:54:48<25:29:37, 3.51it/s] 13%|█▎ | 49018/371472 [3:54:48<25:02:18, 3.58it/s] 13%|█▎ | 49019/371472 [3:54:49<24:25:31, 3.67it/s] 13%|█▎ | 49020/371472 [3:54:49<23:29:58, 3.81it/s] {'loss': 4.192, 'learning_rate': 8.816554978560192e-07, 'epoch': 2.11} + 13%|█▎ | 49020/371472 [3:54:49<23:29:58, 3.81it/s] 13%|█▎ | 49021/371472 [3:54:49<24:37:31, 3.64it/s] 13%|█▎ | 49022/371472 [3:54:50<24:15:47, 3.69it/s] 13%|█▎ | 49023/371472 [3:54:50<24:40:02, 3.63it/s] 13%|█▎ | 49024/371472 [3:54:50<24:51:21, 3.60it/s] 13%|█▎ | 49025/371472 [3:54:50<24:46:32, 3.62it/s] 13%|█▎ | 49026/371472 [3:54:51<23:51:56, 3.75it/s] 13%|█▎ | 49027/371472 [3:54:51<23:50:44, 3.76it/s] 13%|█▎ | 49028/371472 [3:54:51<23:03:05, 3.89it/s] 13%|█▎ | 49029/371472 [3:54:51<22:18:10, 4.02it/s] 13%|█▎ | 49030/371472 [3:54:52<22:56:42, 3.90it/s] 13%|█▎ | 49031/371472 [3:54:52<24:46:51, 3.61it/s] 13%|█▎ | 49032/371472 [3:54:52<25:47:36, 3.47it/s] 13%|█▎ | 49033/371472 [3:54:53<26:38:59, 3.36it/s] 13%|█▎ | 49034/371472 [3:54:53<27:43:00, 3.23it/s] 13%|█▎ | 49035/371472 [3:54:53<26:39:10, 3.36it/s] 13%|█▎ | 49036/371472 [3:54:53<26:57:34, 3.32it/s] 13%|█▎ | 49037/371472 [3:54:54<25:40:01, 3.49it/s] 13%|█▎ | 49038/371472 [3:54:54<25:05:24, 3.57it/s] 13%|█▎ | 49039/371472 [3:54:54<25:10:49, 3.56it/s] 13%|█▎ | 49040/371472 [3:54:55<24:01:12, 3.73it/s] {'loss': 4.261, 'learning_rate': 8.816070158805404e-07, 'epoch': 2.11} + 13%|█▎ | 49040/371472 [3:54:55<24:01:12, 3.73it/s] 13%|█▎ | 49041/371472 [3:54:55<25:01:13, 3.58it/s] 13%|█▎ | 49042/371472 [3:54:55<24:38:56, 3.63it/s] 13%|█▎ | 49043/371472 [3:54:55<24:38:57, 3.63it/s] 13%|█▎ | 49044/371472 [3:54:56<25:56:33, 3.45it/s] 13%|█▎ | 49045/371472 [3:54:56<26:14:35, 3.41it/s] 13%|█▎ | 49046/371472 [3:54:56<26:18:35, 3.40it/s] 13%|█▎ | 49047/371472 [3:54:57<25:17:51, 3.54it/s] 13%|█▎ | 49048/371472 [3:54:57<25:07:51, 3.56it/s] 13%|█▎ | 49049/371472 [3:54:57<24:19:57, 3.68it/s] 13%|█▎ | 49050/371472 [3:54:57<24:25:53, 3.67it/s] 13%|█▎ | 49051/371472 [3:54:58<23:51:32, 3.75it/s] 13%|█▎ | 49052/371472 [3:54:58<24:06:03, 3.72it/s] 13%|█▎ | 49053/371472 [3:54:58<24:39:10, 3.63it/s] 13%|█▎ | 49054/371472 [3:54:58<24:07:11, 3.71it/s] 13%|█▎ | 49055/371472 [3:54:59<24:27:25, 3.66it/s] 13%|█▎ | 49056/371472 [3:54:59<24:08:45, 3.71it/s] 13%|█▎ | 49057/371472 [3:54:59<24:56:27, 3.59it/s] 13%|█▎ | 49058/371472 [3:55:00<24:32:50, 3.65it/s] 13%|█▎ | 49059/371472 [3:55:00<24:20:26, 3.68it/s] 13%|█▎ | 49060/371472 [3:55:00<24:19:16, 3.68it/s] {'loss': 4.3711, 'learning_rate': 8.815585339050615e-07, 'epoch': 2.11} + 13%|█▎ | 49060/371472 [3:55:00<24:19:16, 3.68it/s] 13%|█▎ | 49061/371472 [3:55:00<26:02:55, 3.44it/s] 13%|█▎ | 49062/371472 [3:55:01<25:02:32, 3.58it/s] 13%|█▎ | 49063/371472 [3:55:01<25:17:49, 3.54it/s] 13%|█▎ | 49064/371472 [3:55:01<25:07:37, 3.56it/s] 13%|█▎ | 49065/371472 [3:55:02<25:28:21, 3.52it/s] 13%|█▎ | 49066/371472 [3:55:02<25:03:12, 3.57it/s] 13%|█▎ | 49067/371472 [3:55:02<27:51:36, 3.21it/s] 13%|█▎ | 49068/371472 [3:55:02<27:09:30, 3.30it/s] 13%|█▎ | 49069/371472 [3:55:03<28:04:12, 3.19it/s] 13%|█▎ | 49070/371472 [3:55:03<30:35:50, 2.93it/s] 13%|█▎ | 49071/371472 [3:55:03<29:07:54, 3.07it/s] 13%|█▎ | 49072/371472 [3:55:04<27:00:49, 3.32it/s] 13%|█▎ | 49073/371472 [3:55:04<27:24:23, 3.27it/s] 13%|█▎ | 49074/371472 [3:55:04<26:57:10, 3.32it/s] 13%|█▎ | 49075/371472 [3:55:05<25:58:43, 3.45it/s] 13%|█▎ | 49076/371472 [3:55:05<24:34:46, 3.64it/s] 13%|█▎ | 49077/371472 [3:55:05<23:40:02, 3.78it/s] 13%|█▎ | 49078/371472 [3:55:05<22:59:01, 3.90it/s] 13%|█▎ | 49079/371472 [3:55:06<24:40:22, 3.63it/s] 13%|█▎ | 49080/371472 [3:55:06<23:50:43, 3.76it/s] {'loss': 4.4269, 'learning_rate': 8.815100519295826e-07, 'epoch': 2.11} + 13%|█▎ | 49080/371472 [3:55:06<23:50:43, 3.76it/s] 13%|█▎ | 49081/371472 [3:55:06<23:09:53, 3.87it/s] 13%|█▎ | 49082/371472 [3:55:06<23:00:13, 3.89it/s] 13%|█▎ | 49083/371472 [3:55:07<24:30:22, 3.65it/s] 13%|█▎ | 49084/371472 [3:55:07<24:47:50, 3.61it/s] 13%|█▎ | 49085/371472 [3:55:07<25:03:55, 3.57it/s] 13%|█▎ | 49086/371472 [3:55:08<24:58:00, 3.59it/s] 13%|█▎ | 49087/371472 [3:55:08<25:24:00, 3.53it/s] 13%|█▎ | 49088/371472 [3:55:08<27:31:51, 3.25it/s] 13%|█▎ | 49089/371472 [3:55:08<25:53:05, 3.46it/s] 13%|█▎ | 49090/371472 [3:55:09<26:05:13, 3.43it/s] 13%|█▎ | 49091/371472 [3:55:09<26:53:39, 3.33it/s] 13%|█▎ | 49092/371472 [3:55:09<25:55:02, 3.46it/s] 13%|█▎ | 49093/371472 [3:55:10<26:13:46, 3.41it/s] 13%|█▎ | 49094/371472 [3:55:10<25:11:36, 3.55it/s] 13%|█▎ | 49095/371472 [3:55:10<24:31:36, 3.65it/s] 13%|█▎ | 49096/371472 [3:55:10<24:05:40, 3.72it/s] 13%|█▎ | 49097/371472 [3:55:11<25:37:16, 3.50it/s] 13%|█▎ | 49098/371472 [3:55:11<25:35:21, 3.50it/s] 13%|█▎ | 49099/371472 [3:55:11<24:23:26, 3.67it/s] 13%|█▎ | 49100/371472 [3:55:11<23:27:38, 3.82it/s] {'loss': 4.4123, 'learning_rate': 8.814615699541037e-07, 'epoch': 2.11} + 13%|█▎ | 49100/371472 [3:55:11<23:27:38, 3.82it/s] 13%|█▎ | 49101/371472 [3:55:12<22:58:38, 3.90it/s] 13%|█▎ | 49102/371472 [3:55:12<24:24:45, 3.67it/s] 13%|█▎ | 49103/371472 [3:55:12<25:14:42, 3.55it/s] 13%|█▎ | 49104/371472 [3:55:13<24:36:01, 3.64it/s] 13%|█▎ | 49105/371472 [3:55:13<25:42:05, 3.48it/s] 13%|█▎ | 49106/371472 [3:55:13<27:01:12, 3.31it/s] 13%|█▎ | 49107/371472 [3:55:14<27:40:58, 3.23it/s] 13%|█▎ | 49108/371472 [3:55:14<26:13:31, 3.41it/s] 13%|█▎ | 49109/371472 [3:55:14<27:12:27, 3.29it/s] 13%|█▎ | 49110/371472 [3:55:14<26:14:24, 3.41it/s] 13%|█▎ | 49111/371472 [3:55:15<26:27:59, 3.38it/s] 13%|█▎ | 49112/371472 [3:55:15<25:59:37, 3.44it/s] 13%|█▎ | 49113/371472 [3:55:15<28:07:50, 3.18it/s] 13%|█▎ | 49114/371472 [3:55:16<26:44:06, 3.35it/s] 13%|█▎ | 49115/371472 [3:55:16<25:42:24, 3.48it/s] 13%|█▎ | 49116/371472 [3:55:16<24:55:43, 3.59it/s] 13%|█▎ | 49117/371472 [3:55:16<24:28:47, 3.66it/s] 13%|█▎ | 49118/371472 [3:55:17<23:38:34, 3.79it/s] 13%|█▎ | 49119/371472 [3:55:17<24:04:52, 3.72it/s] 13%|█▎ | 49120/371472 [3:55:17<23:42:38, 3.78it/s] {'loss': 4.1912, 'learning_rate': 8.814130879786248e-07, 'epoch': 2.12} + 13%|█▎ | 49120/371472 [3:55:17<23:42:38, 3.78it/s] 13%|█▎ | 49121/371472 [3:55:17<23:01:54, 3.89it/s] 13%|█▎ | 49122/371472 [3:55:18<22:41:03, 3.95it/s] 13%|█▎ | 49123/371472 [3:55:18<22:21:12, 4.01it/s] 13%|█▎ | 49124/371472 [3:55:18<22:26:57, 3.99it/s] 13%|█▎ | 49125/371472 [3:55:19<25:29:25, 3.51it/s] 13%|█▎ | 49126/371472 [3:55:19<24:29:37, 3.66it/s] 13%|█▎ | 49127/371472 [3:55:19<23:55:46, 3.74it/s] 13%|█▎ | 49128/371472 [3:55:19<24:18:54, 3.68it/s] 13%|█▎ | 49129/371472 [3:55:20<24:46:14, 3.61it/s] 13%|█▎ | 49130/371472 [3:55:20<24:35:46, 3.64it/s] 13%|█▎ | 49131/371472 [3:55:20<24:06:18, 3.71it/s] 13%|█▎ | 49132/371472 [3:55:20<26:23:45, 3.39it/s] 13%|█▎ | 49133/371472 [3:55:21<25:33:20, 3.50it/s] 13%|█▎ | 49134/371472 [3:55:21<25:28:27, 3.51it/s] 13%|█▎ | 49135/371472 [3:55:21<27:22:00, 3.27it/s] 13%|█▎ | 49136/371472 [3:55:22<29:51:49, 3.00it/s] 13%|█▎ | 49137/371472 [3:55:22<28:07:21, 3.18it/s] 13%|█▎ | 49138/371472 [3:55:22<27:29:20, 3.26it/s] 13%|█▎ | 49139/371472 [3:55:23<26:05:10, 3.43it/s] 13%|█▎ | 49140/371472 [3:55:23<26:47:23, 3.34it/s] {'loss': 4.2693, 'learning_rate': 8.813646060031459e-07, 'epoch': 2.12} + 13%|█▎ | 49140/371472 [3:55:23<26:47:23, 3.34it/s] 13%|█▎ | 49141/371472 [3:55:23<26:02:10, 3.44it/s] 13%|█▎ | 49142/371472 [3:55:24<30:30:20, 2.94it/s] 13%|█▎ | 49143/371472 [3:55:24<28:09:55, 3.18it/s] 13%|█▎ | 49144/371472 [3:55:24<27:44:47, 3.23it/s] 13%|█▎ | 49145/371472 [3:55:24<25:54:40, 3.46it/s] 13%|█▎ | 49146/371472 [3:55:25<25:52:42, 3.46it/s] 13%|█▎ | 49147/371472 [3:55:25<25:24:26, 3.52it/s] 13%|█▎ | 49148/371472 [3:55:25<24:40:46, 3.63it/s] 13%|█▎ | 49149/371472 [3:55:26<25:11:17, 3.55it/s] 13%|█▎ | 49150/371472 [3:55:26<25:53:51, 3.46it/s] 13%|█▎ | 49151/371472 [3:55:26<25:09:29, 3.56it/s] 13%|█▎ | 49152/371472 [3:55:26<24:39:49, 3.63it/s] 13%|█▎ | 49153/371472 [3:55:27<24:00:16, 3.73it/s] 13%|█▎ | 49154/371472 [3:55:27<24:35:01, 3.64it/s] 13%|█▎ | 49155/371472 [3:55:27<24:18:32, 3.68it/s] 13%|█▎ | 49156/371472 [3:55:27<23:45:05, 3.77it/s] 13%|█▎ | 49157/371472 [3:55:28<24:49:34, 3.61it/s] 13%|█▎ | 49158/371472 [3:55:28<24:35:35, 3.64it/s] 13%|█▎ | 49159/371472 [3:55:28<25:00:50, 3.58it/s] 13%|█▎ | 49160/371472 [3:55:29<24:48:19, 3.61it/s] {'loss': 4.4526, 'learning_rate': 8.813161240276669e-07, 'epoch': 2.12} + 13%|█▎ | 49160/371472 [3:55:29<24:48:19, 3.61it/s] 13%|█▎ | 49161/371472 [3:55:29<24:11:38, 3.70it/s] 13%|█▎ | 49162/371472 [3:55:29<23:45:55, 3.77it/s] 13%|█▎ | 49163/371472 [3:55:29<24:39:02, 3.63it/s] 13%|█▎ | 49164/371472 [3:55:30<24:14:21, 3.69it/s] 13%|█▎ | 49165/371472 [3:55:30<23:31:24, 3.81it/s] 13%|█▎ | 49166/371472 [3:55:30<24:22:09, 3.67it/s] 13%|█▎ | 49167/371472 [3:55:30<24:10:44, 3.70it/s] 13%|█▎ | 49168/371472 [3:55:31<25:04:57, 3.57it/s] 13%|█▎ | 49169/371472 [3:55:31<25:09:01, 3.56it/s] 13%|█▎ | 49170/371472 [3:55:31<24:22:21, 3.67it/s] 13%|█▎ | 49171/371472 [3:55:32<24:28:28, 3.66it/s] 13%|█▎ | 49172/371472 [3:55:32<26:31:09, 3.38it/s] 13%|█▎ | 49173/371472 [3:55:32<25:52:04, 3.46it/s] 13%|█▎ | 49174/371472 [3:55:32<25:53:26, 3.46it/s] 13%|█▎ | 49175/371472 [3:55:33<25:32:23, 3.51it/s] 13%|█▎ | 49176/371472 [3:55:33<26:20:35, 3.40it/s] 13%|█▎ | 49177/371472 [3:55:33<27:47:58, 3.22it/s] 13%|█▎ | 49178/371472 [3:55:34<26:43:38, 3.35it/s] 13%|█▎ | 49179/371472 [3:55:34<26:31:19, 3.38it/s] 13%|█▎ | 49180/371472 [3:55:34<25:06:31, 3.57it/s] {'loss': 4.4448, 'learning_rate': 8.812676420521881e-07, 'epoch': 2.12} + 13%|█▎ | 49180/371472 [3:55:34<25:06:31, 3.57it/s] 13%|█▎ | 49181/371472 [3:55:34<24:12:25, 3.70it/s] 13%|█▎ | 49182/371472 [3:55:35<25:10:12, 3.56it/s] 13%|█▎ | 49183/371472 [3:55:35<25:06:49, 3.56it/s] 13%|█▎ | 49184/371472 [3:55:35<25:40:13, 3.49it/s] 13%|█▎ | 49185/371472 [3:55:36<24:35:10, 3.64it/s] 13%|█▎ | 49186/371472 [3:55:36<25:53:47, 3.46it/s] 13%|█▎ | 49187/371472 [3:55:36<25:43:16, 3.48it/s] 13%|█▎ | 49188/371472 [3:55:36<25:20:08, 3.53it/s] 13%|█▎ | 49189/371472 [3:55:37<24:51:47, 3.60it/s] 13%|█▎ | 49190/371472 [3:55:37<23:56:17, 3.74it/s] 13%|█▎ | 49191/371472 [3:55:37<25:55:24, 3.45it/s] 13%|█▎ | 49192/371472 [3:55:38<25:35:48, 3.50it/s] 13%|█▎ | 49193/371472 [3:55:38<24:24:54, 3.67it/s] 13%|█▎ | 49194/371472 [3:55:38<24:48:56, 3.61it/s] 13%|█▎ | 49195/371472 [3:55:39<27:38:47, 3.24it/s] 13%|█▎ | 49196/371472 [3:55:39<26:46:44, 3.34it/s] 13%|█▎ | 49197/371472 [3:55:39<25:31:04, 3.51it/s] 13%|█▎ | 49198/371472 [3:55:39<26:15:08, 3.41it/s] 13%|█▎ | 49199/371472 [3:55:40<26:07:49, 3.43it/s] 13%|█▎ | 49200/371472 [3:55:40<25:30:20, 3.51it/s] {'loss': 4.1797, 'learning_rate': 8.812191600767093e-07, 'epoch': 2.12} + 13%|█▎ | 49200/371472 [3:55:40<25:30:20, 3.51it/s] 13%|█▎ | 49201/371472 [3:55:40<25:31:07, 3.51it/s] 13%|█▎ | 49202/371472 [3:55:40<24:50:06, 3.60it/s] 13%|█▎ | 49203/371472 [3:55:41<24:36:28, 3.64it/s] 13%|█▎ | 49204/371472 [3:55:41<27:34:46, 3.25it/s] 13%|█▎ | 49205/371472 [3:55:41<26:14:09, 3.41it/s] 13%|█▎ | 49206/371472 [3:55:42<25:54:38, 3.45it/s] 13%|█▎ | 49207/371472 [3:55:42<28:07:29, 3.18it/s] 13%|█▎ | 49208/371472 [3:55:42<26:04:39, 3.43it/s] 13%|█▎ | 49209/371472 [3:55:43<25:36:07, 3.50it/s] 13%|█▎ | 49210/371472 [3:55:43<25:31:43, 3.51it/s] 13%|█▎ | 49211/371472 [3:55:43<25:19:13, 3.54it/s] 13%|█▎ | 49212/371472 [3:55:44<28:31:02, 3.14it/s] 13%|█▎ | 49213/371472 [3:55:44<26:38:22, 3.36it/s] 13%|█▎ | 49214/371472 [3:55:44<26:16:27, 3.41it/s] 13%|█▎ | 49215/371472 [3:55:44<25:53:50, 3.46it/s] 13%|█▎ | 49216/371472 [3:55:45<24:31:37, 3.65it/s] 13%|█▎ | 49217/371472 [3:55:45<24:40:22, 3.63it/s] 13%|█▎ | 49218/371472 [3:55:45<23:58:08, 3.73it/s] 13%|█▎ | 49219/371472 [3:55:45<24:15:32, 3.69it/s] 13%|█▎ | 49220/371472 [3:55:46<25:07:45, 3.56it/s] {'loss': 4.0822, 'learning_rate': 8.811706781012304e-07, 'epoch': 2.12} + 13%|█▎ | 49220/371472 [3:55:46<25:07:45, 3.56it/s] 13%|█▎ | 49221/371472 [3:55:46<24:40:57, 3.63it/s] 13%|█▎ | 49222/371472 [3:55:46<25:03:10, 3.57it/s] 13%|█▎ | 49223/371472 [3:55:47<26:02:30, 3.44it/s] 13%|█▎ | 49224/371472 [3:55:47<26:29:55, 3.38it/s] 13%|█▎ | 49225/371472 [3:55:47<25:02:19, 3.57it/s] 13%|█▎ | 49226/371472 [3:55:47<26:05:58, 3.43it/s] 13%|█▎ | 49227/371472 [3:55:48<26:45:22, 3.35it/s] 13%|█▎ | 49228/371472 [3:55:48<25:52:52, 3.46it/s] 13%|█▎ | 49229/371472 [3:55:48<25:57:21, 3.45it/s] 13%|█▎ | 49230/371472 [3:55:49<25:47:33, 3.47it/s] 13%|█▎ | 49231/371472 [3:55:49<24:47:58, 3.61it/s] 13%|█▎ | 49232/371472 [3:55:49<24:59:35, 3.58it/s] 13%|█▎ | 49233/371472 [3:55:49<24:25:19, 3.67it/s] 13%|█▎ | 49234/371472 [3:55:50<26:15:50, 3.41it/s] 13%|█▎ | 49235/371472 [3:55:50<24:54:51, 3.59it/s] 13%|█▎ | 49236/371472 [3:55:50<24:29:25, 3.65it/s] 13%|█▎ | 49237/371472 [3:55:50<23:55:00, 3.74it/s] 13%|█▎ | 49238/371472 [3:55:51<24:01:07, 3.73it/s] 13%|█▎ | 49239/371472 [3:55:51<24:04:07, 3.72it/s] 13%|█▎ | 49240/371472 [3:55:51<23:35:47, 3.79it/s] {'loss': 4.3429, 'learning_rate': 8.811221961257514e-07, 'epoch': 2.12} + 13%|█▎ | 49240/371472 [3:55:51<23:35:47, 3.79it/s] 13%|█▎ | 49241/371472 [3:55:52<24:17:09, 3.69it/s] 13%|█▎ | 49242/371472 [3:55:52<26:00:47, 3.44it/s] 13%|█▎ | 49243/371472 [3:55:52<25:24:14, 3.52it/s] 13%|█▎ | 49244/371472 [3:55:52<24:28:31, 3.66it/s] 13%|█▎ | 49245/371472 [3:55:53<23:37:53, 3.79it/s] 13%|█▎ | 49246/371472 [3:55:53<24:41:08, 3.63it/s] 13%|█▎ | 49247/371472 [3:55:53<24:08:27, 3.71it/s] 13%|█▎ | 49248/371472 [3:55:53<23:52:07, 3.75it/s] 13%|█▎ | 49249/371472 [3:55:54<24:01:02, 3.73it/s] 13%|█▎ | 49250/371472 [3:55:54<24:03:49, 3.72it/s] 13%|█▎ | 49251/371472 [3:55:54<24:00:04, 3.73it/s] 13%|█▎ | 49252/371472 [3:55:55<23:49:24, 3.76it/s] 13%|█▎ | 49253/371472 [3:55:55<25:34:56, 3.50it/s] 13%|█▎ | 49254/371472 [3:55:55<25:21:40, 3.53it/s] 13%|█▎ | 49255/371472 [3:55:55<24:58:54, 3.58it/s] 13%|█▎ | 49256/371472 [3:55:56<25:15:01, 3.54it/s] 13%|█▎ | 49257/371472 [3:55:56<26:15:03, 3.41it/s] 13%|█▎ | 49258/371472 [3:55:56<26:47:05, 3.34it/s] 13%|█▎ | 49259/371472 [3:55:57<29:05:45, 3.08it/s] 13%|█▎ | 49260/371472 [3:55:57<27:47:58, 3.22it/s] {'loss': 4.2108, 'learning_rate': 8.810737141502725e-07, 'epoch': 2.12} + 13%|█▎ | 49260/371472 [3:55:57<27:47:58, 3.22it/s] 13%|█▎ | 49261/371472 [3:55:57<26:31:02, 3.38it/s] 13%|█▎ | 49262/371472 [3:55:58<25:19:53, 3.53it/s] 13%|█▎ | 49263/371472 [3:55:58<24:44:39, 3.62it/s] 13%|█▎ | 49264/371472 [3:55:58<25:02:09, 3.57it/s] 13%|█▎ | 49265/371472 [3:55:58<26:35:59, 3.36it/s] 13%|█▎ | 49266/371472 [3:55:59<26:15:41, 3.41it/s] 13%|█▎ | 49267/371472 [3:55:59<25:31:20, 3.51it/s] 13%|█▎ | 49268/371472 [3:55:59<24:39:52, 3.63it/s] 13%|█▎ | 49269/371472 [3:56:00<26:48:40, 3.34it/s] 13%|█▎ | 49270/371472 [3:56:00<25:23:18, 3.53it/s] 13%|█▎ | 49271/371472 [3:56:00<25:44:30, 3.48it/s] 13%|█▎ | 49272/371472 [3:56:00<24:37:52, 3.63it/s] 13%|█▎ | 49273/371472 [3:56:01<25:39:11, 3.49it/s] 13%|█▎ | 49274/371472 [3:56:01<26:28:12, 3.38it/s] 13%|█▎ | 49275/371472 [3:56:01<25:40:27, 3.49it/s] 13%|█▎ | 49276/371472 [3:56:01<24:49:31, 3.61it/s] 13%|█▎ | 49277/371472 [3:56:02<24:07:40, 3.71it/s] 13%|█▎ | 49278/371472 [3:56:02<25:13:18, 3.55it/s] 13%|█▎ | 49279/371472 [3:56:02<24:35:19, 3.64it/s] 13%|█▎ | 49280/371472 [3:56:03<24:11:35, 3.70it/s] {'loss': 4.3856, 'learning_rate': 8.810252321747937e-07, 'epoch': 2.12} + 13%|█▎ | 49280/371472 [3:56:03<24:11:35, 3.70it/s] 13%|█▎ | 49281/371472 [3:56:03<24:01:21, 3.73it/s] 13%|█▎ | 49282/371472 [3:56:03<27:30:48, 3.25it/s] 13%|█▎ | 49283/371472 [3:56:03<26:20:23, 3.40it/s] 13%|█▎ | 49284/371472 [3:56:04<26:45:00, 3.35it/s] 13%|█▎ | 49285/371472 [3:56:04<26:05:25, 3.43it/s] 13%|█▎ | 49286/371472 [3:56:04<26:18:39, 3.40it/s] 13%|█▎ | 49287/371472 [3:56:05<27:56:13, 3.20it/s] 13%|█▎ | 49288/371472 [3:56:05<27:23:41, 3.27it/s] 13%|█▎ | 49289/371472 [3:56:05<27:44:38, 3.23it/s] 13%|█▎ | 49290/371472 [3:56:06<29:02:47, 3.08it/s] 13%|█▎ | 49291/371472 [3:56:06<26:49:12, 3.34it/s] 13%|█▎ | 49292/371472 [3:56:06<25:49:29, 3.47it/s] 13%|█▎ | 49293/371472 [3:56:06<25:04:25, 3.57it/s] 13%|█▎ | 49294/371472 [3:56:07<26:45:57, 3.34it/s] 13%|█▎ | 49295/371472 [3:56:07<26:46:22, 3.34it/s] 13%|█▎ | 49296/371472 [3:56:07<25:59:00, 3.44it/s] 13%|█▎ | 49297/371472 [3:56:08<27:24:34, 3.27it/s] 13%|█▎ | 49298/371472 [3:56:08<26:31:46, 3.37it/s] 13%|█▎ | 49299/371472 [3:56:08<25:58:14, 3.45it/s] 13%|█▎ | 49300/371472 [3:56:09<25:50:32, 3.46it/s] {'loss': 4.3599, 'learning_rate': 8.809767501993147e-07, 'epoch': 2.12} + 13%|█▎ | 49300/371472 [3:56:09<25:50:32, 3.46it/s] 13%|█▎ | 49301/371472 [3:56:09<25:36:20, 3.50it/s] 13%|█▎ | 49302/371472 [3:56:09<25:13:18, 3.55it/s] 13%|█▎ | 49303/371472 [3:56:09<24:42:10, 3.62it/s] 13%|█▎ | 49304/371472 [3:56:10<25:03:35, 3.57it/s] 13%|█▎ | 49305/371472 [3:56:10<25:56:01, 3.45it/s] 13%|█▎ | 49306/371472 [3:56:10<25:50:39, 3.46it/s] 13%|█▎ | 49307/371472 [3:56:11<26:31:01, 3.37it/s] 13%|█▎ | 49308/371472 [3:56:11<26:24:07, 3.39it/s] 13%|█▎ | 49309/371472 [3:56:11<25:06:31, 3.56it/s] 13%|█▎ | 49310/371472 [3:56:11<25:18:37, 3.54it/s] 13%|█▎ | 49311/371472 [3:56:12<24:06:51, 3.71it/s] 13%|█▎ | 49312/371472 [3:56:12<24:54:43, 3.59it/s] 13%|█▎ | 49313/371472 [3:56:12<25:36:59, 3.49it/s] 13%|█▎ | 49314/371472 [3:56:12<24:43:54, 3.62it/s] 13%|█▎ | 49315/371472 [3:56:13<25:17:32, 3.54it/s] 13%|█▎ | 49316/371472 [3:56:13<24:36:04, 3.64it/s] 13%|█▎ | 49317/371472 [3:56:13<24:27:21, 3.66it/s] 13%|█▎ | 49318/371472 [3:56:14<23:48:47, 3.76it/s] 13%|█▎ | 49319/371472 [3:56:14<23:38:41, 3.78it/s] 13%|█▎ | 49320/371472 [3:56:14<23:39:08, 3.78it/s] {'loss': 4.1637, 'learning_rate': 8.809282682238358e-07, 'epoch': 2.12} + 13%|█▎ | 49320/371472 [3:56:14<23:39:08, 3.78it/s] 13%|█▎ | 49321/371472 [3:56:14<24:44:23, 3.62it/s] 13%|█▎ | 49322/371472 [3:56:15<25:35:14, 3.50it/s] 13%|█▎ | 49323/371472 [3:56:15<24:23:47, 3.67it/s] 13%|█▎ | 49324/371472 [3:56:15<23:57:15, 3.74it/s] 13%|█▎ | 49325/371472 [3:56:15<24:16:46, 3.69it/s] 13%|█▎ | 49326/371472 [3:56:16<25:02:08, 3.57it/s] 13%|█▎ | 49327/371472 [3:56:16<25:38:06, 3.49it/s] 13%|█▎ | 49328/371472 [3:56:16<24:53:44, 3.59it/s] 13%|█▎ | 49329/371472 [3:56:17<26:24:28, 3.39it/s] 13%|█▎ | 49330/371472 [3:56:17<25:27:17, 3.52it/s] 13%|█▎ | 49331/371472 [3:56:17<26:42:44, 3.35it/s] 13%|█▎ | 49332/371472 [3:56:18<25:22:14, 3.53it/s] 13%|█▎ | 49333/371472 [3:56:18<24:40:32, 3.63it/s] 13%|█▎ | 49334/371472 [3:56:18<25:03:36, 3.57it/s] 13%|█▎ | 49335/371472 [3:56:18<25:17:09, 3.54it/s] 13%|█▎ | 49336/371472 [3:56:19<24:57:19, 3.59it/s] 13%|█▎ | 49337/371472 [3:56:19<24:42:20, 3.62it/s] 13%|█▎ | 49338/371472 [3:56:19<24:09:48, 3.70it/s] 13%|█▎ | 49339/371472 [3:56:19<23:18:41, 3.84it/s] 13%|█▎ | 49340/371472 [3:56:20<23:27:01, 3.82it/s] {'loss': 4.3569, 'learning_rate': 8.80879786248357e-07, 'epoch': 2.13} + 13%|█▎ | 49340/371472 [3:56:20<23:27:01, 3.82it/s] 13%|█▎ | 49341/371472 [3:56:20<24:00:50, 3.73it/s] 13%|█▎ | 49342/371472 [3:56:20<24:23:18, 3.67it/s] 13%|█▎ | 49343/371472 [3:56:21<26:40:00, 3.36it/s] 13%|█▎ | 49344/371472 [3:56:21<25:51:46, 3.46it/s] 13%|█▎ | 49345/371472 [3:56:21<24:47:17, 3.61it/s] 13%|█▎ | 49346/371472 [3:56:21<24:12:20, 3.70it/s] 13%|█▎ | 49347/371472 [3:56:22<24:34:37, 3.64it/s] 13%|█▎ | 49348/371472 [3:56:22<27:14:53, 3.28it/s] 13%|█▎ | 49349/371472 [3:56:22<27:09:38, 3.29it/s] 13%|█▎ | 49350/371472 [3:56:23<28:31:47, 3.14it/s] 13%|█▎ | 49351/371472 [3:56:23<28:47:11, 3.11it/s] 13%|█▎ | 49352/371472 [3:56:23<27:30:53, 3.25it/s] 13%|█▎ | 49353/371472 [3:56:24<27:30:11, 3.25it/s] 13%|█▎ | 49354/371472 [3:56:24<26:07:23, 3.43it/s] 13%|█▎ | 49355/371472 [3:56:24<25:16:51, 3.54it/s] 13%|█▎ | 49356/371472 [3:56:24<25:07:31, 3.56it/s] 13%|█▎ | 49357/371472 [3:56:25<24:06:37, 3.71it/s] 13%|█▎ | 49358/371472 [3:56:25<26:06:41, 3.43it/s] 13%|█▎ | 49359/371472 [3:56:25<24:45:11, 3.61it/s] 13%|█▎ | 49360/371472 [3:56:25<24:18:41, 3.68it/s] {'loss': 4.312, 'learning_rate': 8.808313042728781e-07, 'epoch': 2.13} + 13%|█▎ | 49360/371472 [3:56:25<24:18:41, 3.68it/s] 13%|█▎ | 49361/371472 [3:56:26<24:20:06, 3.68it/s] 13%|█▎ | 49362/371472 [3:56:26<25:54:51, 3.45it/s] 13%|█▎ | 49363/371472 [3:56:26<26:13:14, 3.41it/s] 13%|█▎ | 49364/371472 [3:56:27<26:10:43, 3.42it/s] 13%|█▎ | 49365/371472 [3:56:27<25:11:47, 3.55it/s] 13%|█▎ | 49366/371472 [3:56:27<25:09:16, 3.56it/s] 13%|█▎ | 49367/371472 [3:56:27<24:25:59, 3.66it/s] 13%|█▎ | 49368/371472 [3:56:28<24:24:14, 3.67it/s] 13%|█▎ | 49369/371472 [3:56:28<24:10:10, 3.70it/s] 13%|█▎ | 49370/371472 [3:56:28<26:19:38, 3.40it/s] 13%|█▎ | 49371/371472 [3:56:29<27:07:47, 3.30it/s] 13%|█▎ | 49372/371472 [3:56:29<27:32:12, 3.25it/s] 13%|█▎ | 49373/371472 [3:56:29<27:05:00, 3.30it/s] 13%|█▎ | 49374/371472 [3:56:30<27:05:31, 3.30it/s] 13%|█▎ | 49375/371472 [3:56:30<25:42:20, 3.48it/s] 13%|█▎ | 49376/371472 [3:56:30<27:40:38, 3.23it/s] 13%|█▎ | 49377/371472 [3:56:30<27:55:43, 3.20it/s] 13%|█▎ | 49378/371472 [3:56:31<26:51:54, 3.33it/s] 13%|█▎ | 49379/371472 [3:56:31<25:46:40, 3.47it/s] 13%|█▎ | 49380/371472 [3:56:31<25:33:42, 3.50it/s] {'loss': 4.2211, 'learning_rate': 8.80782822297399e-07, 'epoch': 2.13} + 13%|█▎ | 49380/371472 [3:56:31<25:33:42, 3.50it/s] 13%|█▎ | 49381/371472 [3:56:32<25:01:51, 3.57it/s] 13%|█▎ | 49382/371472 [3:56:32<24:30:31, 3.65it/s] 13%|█▎ | 49383/371472 [3:56:32<24:44:40, 3.62it/s] 13%|█▎ | 49384/371472 [3:56:32<24:03:30, 3.72it/s] 13%|█▎ | 49385/371472 [3:56:33<26:48:24, 3.34it/s] 13%|█▎ | 49386/371472 [3:56:33<26:10:24, 3.42it/s] 13%|█▎ | 49387/371472 [3:56:33<26:02:20, 3.44it/s] 13%|█▎ | 49388/371472 [3:56:34<24:54:28, 3.59it/s] 13%|█▎ | 49389/371472 [3:56:34<25:25:41, 3.52it/s] 13%|█▎ | 49390/371472 [3:56:34<24:33:42, 3.64it/s] 13%|█▎ | 49391/371472 [3:56:34<27:43:23, 3.23it/s] 13%|█▎ | 49392/371472 [3:56:35<27:40:42, 3.23it/s] 13%|█▎ | 49393/371472 [3:56:35<25:40:34, 3.48it/s] 13%|█▎ | 49394/371472 [3:56:35<25:58:16, 3.44it/s] 13%|█▎ | 49395/371472 [3:56:36<26:58:40, 3.32it/s] 13%|█▎ | 49396/371472 [3:56:36<26:33:37, 3.37it/s] 13%|█▎ | 49397/371472 [3:56:36<25:54:24, 3.45it/s] 13%|█▎ | 49398/371472 [3:56:37<25:56:17, 3.45it/s] 13%|█▎ | 49399/371472 [3:56:37<24:44:11, 3.62it/s] 13%|█▎ | 49400/371472 [3:56:37<25:38:22, 3.49it/s] {'loss': 4.3182, 'learning_rate': 8.807343403219202e-07, 'epoch': 2.13} + 13%|█▎ | 49400/371472 [3:56:37<25:38:22, 3.49it/s] 13%|█▎ | 49401/371472 [3:56:37<24:28:57, 3.65it/s] 13%|█▎ | 49402/371472 [3:56:38<23:28:38, 3.81it/s] 13%|█▎ | 49403/371472 [3:56:38<24:19:49, 3.68it/s] 13%|█▎ | 49404/371472 [3:56:38<25:43:28, 3.48it/s] 13%|█▎ | 49405/371472 [3:56:38<26:26:33, 3.38it/s] 13%|█▎ | 49406/371472 [3:56:39<24:48:19, 3.61it/s] 13%|█▎ | 49407/371472 [3:56:39<24:16:43, 3.68it/s] 13%|█▎ | 49408/371472 [3:56:39<24:38:54, 3.63it/s] 13%|█▎ | 49409/371472 [3:56:40<25:47:26, 3.47it/s] 13%|█▎ | 49410/371472 [3:56:40<25:31:44, 3.50it/s] 13%|█▎ | 49411/371472 [3:56:40<25:11:57, 3.55it/s] 13%|█▎ | 49412/371472 [3:56:40<24:02:51, 3.72it/s] 13%|█▎ | 49413/371472 [3:56:41<24:07:05, 3.71it/s] 13%|█▎ | 49414/371472 [3:56:41<23:51:53, 3.75it/s] 13%|█▎ | 49415/371472 [3:56:41<23:32:54, 3.80it/s] 13%|█▎ | 49416/371472 [3:56:41<25:36:27, 3.49it/s] 13%|█▎ | 49417/371472 [3:56:42<26:52:11, 3.33it/s] 13%|█▎ | 49418/371472 [3:56:42<26:14:24, 3.41it/s] 13%|█▎ | 49419/371472 [3:56:42<25:18:14, 3.54it/s] 13%|█▎ | 49420/371472 [3:56:43<26:36:49, 3.36it/s] {'loss': 4.3647, 'learning_rate': 8.806858583464414e-07, 'epoch': 2.13} + 13%|█▎ | 49420/371472 [3:56:43<26:36:49, 3.36it/s] 13%|█▎ | 49421/371472 [3:56:43<25:31:01, 3.51it/s] 13%|█▎ | 49422/371472 [3:56:43<25:00:36, 3.58it/s] 13%|█▎ | 49423/371472 [3:56:43<24:32:43, 3.64it/s] 13%|█▎ | 49424/371472 [3:56:44<25:33:33, 3.50it/s] 13%|█▎ | 49425/371472 [3:56:44<26:17:58, 3.40it/s] 13%|█▎ | 49426/371472 [3:56:44<26:39:24, 3.36it/s] 13%|█▎ | 49427/371472 [3:56:45<27:29:07, 3.25it/s] 13%|█▎ | 49428/371472 [3:56:45<25:58:48, 3.44it/s] 13%|█▎ | 49429/371472 [3:56:45<26:21:48, 3.39it/s] 13%|█▎ | 49430/371472 [3:56:46<25:20:33, 3.53it/s] 13%|█▎ | 49431/371472 [3:56:46<25:33:36, 3.50it/s] 13%|█▎ | 49432/371472 [3:56:46<25:42:19, 3.48it/s] 13%|█▎ | 49433/371472 [3:56:46<24:40:44, 3.62it/s] 13%|█▎ | 49434/371472 [3:56:47<23:40:33, 3.78it/s] 13%|█▎ | 49435/371472 [3:56:47<24:38:39, 3.63it/s] 13%|█▎ | 49436/371472 [3:56:47<24:21:13, 3.67it/s] 13%|█▎ | 49437/371472 [3:56:48<26:44:46, 3.34it/s] 13%|█▎ | 49438/371472 [3:56:48<26:16:13, 3.41it/s] 13%|█▎ | 49439/371472 [3:56:48<26:09:30, 3.42it/s] 13%|█▎ | 49440/371472 [3:56:48<25:56:00, 3.45it/s] {'loss': 4.1665, 'learning_rate': 8.806373763709625e-07, 'epoch': 2.13} + 13%|█▎ | 49440/371472 [3:56:48<25:56:00, 3.45it/s] 13%|█▎ | 49441/371472 [3:56:49<26:08:55, 3.42it/s] 13%|█▎ | 49442/371472 [3:56:49<27:22:15, 3.27it/s] 13%|█▎ | 49443/371472 [3:56:49<26:18:50, 3.40it/s] 13%|█▎ | 49444/371472 [3:56:50<26:08:41, 3.42it/s] 13%|█▎ | 49445/371472 [3:56:50<27:19:04, 3.27it/s] 13%|█▎ | 49446/371472 [3:56:50<27:27:07, 3.26it/s] 13%|█▎ | 49447/371472 [3:56:51<27:16:15, 3.28it/s] 13%|█▎ | 49448/371472 [3:56:51<26:36:56, 3.36it/s] 13%|█▎ | 49449/371472 [3:56:51<26:47:25, 3.34it/s] 13%|█▎ | 49450/371472 [3:56:51<26:25:35, 3.38it/s] 13%|█▎ | 49451/371472 [3:56:52<26:10:24, 3.42it/s] 13%|█▎ | 49452/371472 [3:56:52<24:51:46, 3.60it/s] 13%|█▎ | 49453/371472 [3:56:52<24:15:31, 3.69it/s] 13%|█▎ | 49454/371472 [3:56:52<23:49:18, 3.75it/s] 13%|█▎ | 49455/371472 [3:56:53<24:39:57, 3.63it/s] 13%|█▎ | 49456/371472 [3:56:53<25:08:09, 3.56it/s] 13%|█▎ | 49457/371472 [3:56:53<23:49:24, 3.75it/s] 13%|█▎ | 49458/371472 [3:56:54<22:59:23, 3.89it/s] 13%|█▎ | 49459/371472 [3:56:54<23:30:27, 3.81it/s] 13%|█▎ | 49460/371472 [3:56:54<24:24:52, 3.66it/s] {'loss': 4.3388, 'learning_rate': 8.805888943954836e-07, 'epoch': 2.13} + 13%|█▎ | 49460/371472 [3:56:54<24:24:52, 3.66it/s] 13%|█▎ | 49461/371472 [3:56:54<24:40:01, 3.63it/s] 13%|█▎ | 49462/371472 [3:56:55<24:15:38, 3.69it/s] 13%|█▎ | 49463/371472 [3:56:55<24:07:26, 3.71it/s] 13%|█▎ | 49464/371472 [3:56:55<23:26:36, 3.82it/s] 13%|█▎ | 49465/371472 [3:56:55<23:40:38, 3.78it/s] 13%|█▎ | 49466/371472 [3:56:56<25:09:39, 3.55it/s] 13%|█▎ | 49467/371472 [3:56:56<24:38:32, 3.63it/s] 13%|█▎ | 49468/371472 [3:56:56<24:01:23, 3.72it/s] 13%|█▎ | 49469/371472 [3:56:57<24:27:44, 3.66it/s] 13%|█▎ | 49470/371472 [3:56:57<24:27:58, 3.66it/s] 13%|█▎ | 49471/371472 [3:56:57<23:52:23, 3.75it/s] 13%|█▎ | 49472/371472 [3:56:57<23:46:25, 3.76it/s] 13%|█▎ | 49473/371472 [3:56:58<25:32:47, 3.50it/s] 13%|█▎ | 49474/371472 [3:56:58<24:42:03, 3.62it/s] 13%|█▎ | 49475/371472 [3:56:58<25:45:24, 3.47it/s] 13%|█▎ | 49476/371472 [3:56:59<29:18:23, 3.05it/s] 13%|█▎ | 49477/371472 [3:56:59<29:33:08, 3.03it/s] 13%|█▎ | 49478/371472 [3:56:59<27:25:47, 3.26it/s] 13%|█▎ | 49479/371472 [3:57:00<28:31:03, 3.14it/s] 13%|█▎ | 49480/371472 [3:57:00<27:04:35, 3.30it/s] {'loss': 4.3251, 'learning_rate': 8.805404124200048e-07, 'epoch': 2.13} + 13%|█▎ | 49480/371472 [3:57:00<27:04:35, 3.30it/s] 13%|█▎ | 49481/371472 [3:57:00<25:55:33, 3.45it/s] 13%|█▎ | 49482/371472 [3:57:00<25:15:05, 3.54it/s] 13%|█▎ | 49483/371472 [3:57:01<25:28:20, 3.51it/s] 13%|█▎ | 49484/371472 [3:57:01<27:35:40, 3.24it/s] 13%|█▎ | 49485/371472 [3:57:01<28:24:56, 3.15it/s] 13%|█▎ | 49486/371472 [3:57:02<27:39:23, 3.23it/s] 13%|█▎ | 49487/371472 [3:57:02<25:57:44, 3.45it/s] 13%|█▎ | 49488/371472 [3:57:02<25:26:50, 3.51it/s] 13%|█▎ | 49489/371472 [3:57:02<24:59:08, 3.58it/s] 13%|█▎ | 49490/371472 [3:57:03<27:44:17, 3.22it/s] 13%|█▎ | 49491/371472 [3:57:03<26:04:59, 3.43it/s] 13%|█▎ | 49492/371472 [3:57:03<25:06:11, 3.56it/s] 13%|█▎ | 49493/371472 [3:57:04<26:30:04, 3.37it/s] 13%|█▎ | 49494/371472 [3:57:04<27:12:20, 3.29it/s] 13%|█▎ | 49495/371472 [3:57:04<25:33:32, 3.50it/s] 13%|█▎ | 49496/371472 [3:57:04<24:27:09, 3.66it/s] 13%|█▎ | 49497/371472 [3:57:05<25:22:51, 3.52it/s] 13%|█▎ | 49498/371472 [3:57:05<25:04:22, 3.57it/s] 13%|█▎ | 49499/371472 [3:57:05<24:22:03, 3.67it/s] 13%|█▎ | 49500/371472 [3:57:06<24:18:54, 3.68it/s] {'loss': 4.3496, 'learning_rate': 8.804919304445258e-07, 'epoch': 2.13} + 13%|█▎ | 49500/371472 [3:57:06<24:18:54, 3.68it/s] 13%|█▎ | 49501/371472 [3:57:06<26:56:36, 3.32it/s] 13%|█▎ | 49502/371472 [3:57:06<25:27:42, 3.51it/s] 13%|█▎ | 49503/371472 [3:57:06<25:20:17, 3.53it/s] 13%|█▎ | 49504/371472 [3:57:07<25:20:37, 3.53it/s] 13%|█▎ | 49505/371472 [3:57:07<24:05:11, 3.71it/s] 13%|█▎ | 49506/371472 [3:57:07<29:02:12, 3.08it/s] 13%|█▎ | 49507/371472 [3:57:08<27:47:53, 3.22it/s] 13%|█▎ | 49508/371472 [3:57:08<26:12:53, 3.41it/s] 13%|█▎ | 49509/371472 [3:57:08<26:27:36, 3.38it/s] 13%|█▎ | 49510/371472 [3:57:09<26:14:42, 3.41it/s] 13%|█▎ | 49511/371472 [3:57:09<25:25:18, 3.52it/s] 13%|█▎ | 49512/371472 [3:57:09<24:33:48, 3.64it/s] 13%|█▎ | 49513/371472 [3:57:09<24:48:37, 3.60it/s] 13%|█▎ | 49514/371472 [3:57:10<23:30:35, 3.80it/s] 13%|█▎ | 49515/371472 [3:57:10<25:03:41, 3.57it/s] 13%|█▎ | 49516/371472 [3:57:10<24:09:41, 3.70it/s] 13%|█▎ | 49517/371472 [3:57:10<25:44:28, 3.47it/s] 13%|█▎ | 49518/371472 [3:57:11<27:03:05, 3.31it/s] 13%|█▎ | 49519/371472 [3:57:11<27:02:02, 3.31it/s] 13%|█▎ | 49520/371472 [3:57:11<26:13:53, 3.41it/s] {'loss': 4.0173, 'learning_rate': 8.804434484690469e-07, 'epoch': 2.13} + 13%|█▎ | 49520/371472 [3:57:11<26:13:53, 3.41it/s] 13%|█▎ | 49521/371472 [3:57:12<25:36:30, 3.49it/s] 13%|█▎ | 49522/371472 [3:57:12<24:24:39, 3.66it/s] 13%|█▎ | 49523/371472 [3:57:12<24:56:26, 3.59it/s] 13%|█▎ | 49524/371472 [3:57:12<24:11:36, 3.70it/s] 13%|█▎ | 49525/371472 [3:57:13<25:18:37, 3.53it/s] 13%|█▎ | 49526/371472 [3:57:13<26:28:43, 3.38it/s] 13%|█▎ | 49527/371472 [3:57:13<27:16:06, 3.28it/s] 13%|█▎ | 49528/371472 [3:57:14<27:31:58, 3.25it/s] 13%|█▎ | 49529/371472 [3:57:14<26:26:30, 3.38it/s] 13%|█▎ | 49530/371472 [3:57:14<26:11:52, 3.41it/s] 13%|█▎ | 49531/371472 [3:57:15<25:50:19, 3.46it/s] 13%|█▎ | 49532/371472 [3:57:15<24:52:15, 3.60it/s] 13%|█▎ | 49533/371472 [3:57:15<25:44:32, 3.47it/s] 13%|█▎ | 49534/371472 [3:57:15<26:10:02, 3.42it/s] 13%|█▎ | 49535/371472 [3:57:16<26:29:21, 3.38it/s] 13%|█▎ | 49536/371472 [3:57:16<25:19:02, 3.53it/s] 13%|█▎ | 49537/371472 [3:57:16<25:40:38, 3.48it/s] 13%|█▎ | 49538/371472 [3:57:17<25:08:04, 3.56it/s] 13%|█▎ | 49539/371472 [3:57:17<24:24:35, 3.66it/s] 13%|█▎ | 49540/371472 [3:57:17<23:31:19, 3.80it/s] {'loss': 4.291, 'learning_rate': 8.80394966493568e-07, 'epoch': 2.13} + 13%|█▎ | 49540/371472 [3:57:17<23:31:19, 3.80it/s] 13%|█▎ | 49541/371472 [3:57:17<23:15:51, 3.84it/s] 13%|█▎ | 49542/371472 [3:57:18<23:35:19, 3.79it/s] 13%|█▎ | 49543/371472 [3:57:18<25:20:20, 3.53it/s] 13%|█▎ | 49544/371472 [3:57:18<25:10:28, 3.55it/s] 13%|█▎ | 49545/371472 [3:57:19<26:41:53, 3.35it/s] 13%|█▎ | 49546/371472 [3:57:19<25:24:55, 3.52it/s] 13%|█▎ | 49547/371472 [3:57:19<25:09:10, 3.56it/s] 13%|█▎ | 49548/371472 [3:57:19<24:34:34, 3.64it/s] 13%|█▎ | 49549/371472 [3:57:20<23:44:02, 3.77it/s] 13%|█▎ | 49550/371472 [3:57:20<24:00:58, 3.72it/s] 13%|█▎ | 49551/371472 [3:57:20<23:51:29, 3.75it/s] 13%|█▎ | 49552/371472 [3:57:20<24:44:16, 3.61it/s] 13%|█▎ | 49553/371472 [3:57:21<25:44:28, 3.47it/s] 13%|█▎ | 49554/371472 [3:57:21<26:06:08, 3.43it/s] 13%|█▎ | 49555/371472 [3:57:21<26:23:24, 3.39it/s] 13%|█▎ | 49556/371472 [3:57:22<27:15:34, 3.28it/s] 13%|█▎ | 49557/371472 [3:57:22<28:20:25, 3.16it/s] 13%|█▎ | 49558/371472 [3:57:22<26:59:16, 3.31it/s] 13%|█▎ | 49559/371472 [3:57:23<27:51:37, 3.21it/s] 13%|█▎ | 49560/371472 [3:57:23<26:16:04, 3.40it/s] {'loss': 4.271, 'learning_rate': 8.803464845180891e-07, 'epoch': 2.13} + 13%|█▎ | 49560/371472 [3:57:23<26:16:04, 3.40it/s] 13%|█▎ | 49561/371472 [3:57:23<26:43:24, 3.35it/s] 13%|█▎ | 49562/371472 [3:57:23<25:22:19, 3.52it/s] 13%|█▎ | 49563/371472 [3:57:24<24:31:48, 3.65it/s] 13%|█▎ | 49564/371472 [3:57:24<24:43:53, 3.62it/s] 13%|█▎ | 49565/371472 [3:57:24<24:40:16, 3.62it/s] 13%|█▎ | 49566/371472 [3:57:24<24:08:44, 3.70it/s] 13%|█▎ | 49567/371472 [3:57:25<24:49:36, 3.60it/s] 13%|█▎ | 49568/371472 [3:57:25<24:31:29, 3.65it/s] 13%|█▎ | 49569/371472 [3:57:25<26:44:38, 3.34it/s] 13%|█▎ | 49570/371472 [3:57:26<29:43:22, 3.01it/s] 13%|█▎ | 49571/371472 [3:57:26<27:40:13, 3.23it/s] 13%|█▎ | 49572/371472 [3:57:26<27:11:19, 3.29it/s] 13%|█▎ | 49573/371472 [3:57:27<25:43:57, 3.47it/s] 13%|█▎ | 49574/371472 [3:57:27<25:02:48, 3.57it/s] 13%|█▎ | 49575/371472 [3:57:27<25:47:26, 3.47it/s] 13%|█▎ | 49576/371472 [3:57:27<25:18:46, 3.53it/s] 13%|█▎ | 49577/371472 [3:57:28<24:08:52, 3.70it/s] 13%|█▎ | 49578/371472 [3:57:28<25:00:13, 3.58it/s] 13%|█▎ | 49579/371472 [3:57:28<25:33:47, 3.50it/s] 13%|█▎ | 49580/371472 [3:57:29<25:32:40, 3.50it/s] {'loss': 4.1767, 'learning_rate': 8.802980025426103e-07, 'epoch': 2.14} + 13%|█▎ | 49580/371472 [3:57:29<25:32:40, 3.50it/s] 13%|█▎ | 49581/371472 [3:57:29<25:41:33, 3.48it/s] 13%|█▎ | 49582/371472 [3:57:29<25:08:03, 3.56it/s] 13%|█▎ | 49583/371472 [3:57:29<24:23:31, 3.67it/s] 13%|█▎ | 49584/371472 [3:57:30<24:43:05, 3.62it/s] 13%|█▎ | 49585/371472 [3:57:30<24:45:52, 3.61it/s] 13%|█▎ | 49586/371472 [3:57:30<24:25:30, 3.66it/s] 13%|█▎ | 49587/371472 [3:57:30<25:04:57, 3.56it/s] 13%|█▎ | 49588/371472 [3:57:31<25:03:30, 3.57it/s] 13%|█▎ | 49589/371472 [3:57:31<25:00:21, 3.58it/s] 13%|█▎ | 49590/371472 [3:57:31<25:32:07, 3.50it/s] 13%|█▎ | 49591/371472 [3:57:32<26:57:13, 3.32it/s] 13%|█▎ | 49592/371472 [3:57:32<26:46:11, 3.34it/s] 13%|█▎ | 49593/371472 [3:57:32<25:45:48, 3.47it/s] 13%|█▎ | 49594/371472 [3:57:33<26:31:39, 3.37it/s] 13%|█▎ | 49595/371472 [3:57:33<27:24:20, 3.26it/s] 13%|█▎ | 49596/371472 [3:57:33<26:54:58, 3.32it/s] 13%|█▎ | 49597/371472 [3:57:34<28:31:22, 3.13it/s] 13%|█▎ | 49598/371472 [3:57:34<27:37:19, 3.24it/s] 13%|█▎ | 49599/371472 [3:57:34<27:05:17, 3.30it/s] 13%|█▎ | 49600/371472 [3:57:34<25:46:40, 3.47it/s] {'loss': 4.3604, 'learning_rate': 8.802495205671313e-07, 'epoch': 2.14} + 13%|█▎ | 49600/371472 [3:57:34<25:46:40, 3.47it/s] 13%|█▎ | 49601/371472 [3:57:35<24:46:10, 3.61it/s] 13%|█▎ | 49602/371472 [3:57:35<24:56:03, 3.59it/s] 13%|█▎ | 49603/371472 [3:57:35<24:21:02, 3.67it/s] 13%|█▎ | 49604/371472 [3:57:36<27:57:43, 3.20it/s] 13%|█▎ | 49605/371472 [3:57:36<31:47:09, 2.81it/s] 13%|█▎ | 49606/371472 [3:57:36<29:46:29, 3.00it/s] 13%|█▎ | 49607/371472 [3:57:37<28:00:29, 3.19it/s] 13%|█▎ | 49608/371472 [3:57:37<27:16:18, 3.28it/s] 13%|█▎ | 49609/371472 [3:57:37<26:12:03, 3.41it/s] 13%|█▎ | 49610/371472 [3:57:37<26:28:26, 3.38it/s] 13%|█▎ | 49611/371472 [3:57:38<27:47:05, 3.22it/s] 13%|█▎ | 49612/371472 [3:57:38<27:53:53, 3.20it/s] 13%|█▎ | 49613/371472 [3:57:38<27:26:13, 3.26it/s] 13%|█▎ | 49614/371472 [3:57:39<26:19:55, 3.40it/s] 13%|█▎ | 49615/371472 [3:57:39<26:42:44, 3.35it/s] 13%|█▎ | 49616/371472 [3:57:39<26:36:28, 3.36it/s] 13%|█▎ | 49617/371472 [3:57:40<26:51:56, 3.33it/s] 13%|█▎ | 49618/371472 [3:57:40<27:22:20, 3.27it/s] 13%|█▎ | 49619/371472 [3:57:40<25:54:38, 3.45it/s] 13%|█▎ | 49620/371472 [3:57:40<27:13:31, 3.28it/s] {'loss': 4.2652, 'learning_rate': 8.802010385916525e-07, 'epoch': 2.14} + 13%|█▎ | 49620/371472 [3:57:40<27:13:31, 3.28it/s] 13%|█▎ | 49621/371472 [3:57:41<26:49:08, 3.33it/s] 13%|█▎ | 49622/371472 [3:57:41<27:27:13, 3.26it/s] 13%|█▎ | 49623/371472 [3:57:41<25:43:53, 3.47it/s] 13%|█▎ | 49624/371472 [3:57:42<27:58:55, 3.19it/s] 13%|█▎ | 49625/371472 [3:57:42<26:12:41, 3.41it/s] 13%|█▎ | 49626/371472 [3:57:42<26:32:19, 3.37it/s] 13%|█▎ | 49627/371472 [3:57:42<24:50:18, 3.60it/s] 13%|█▎ | 49628/371472 [3:57:43<26:01:33, 3.44it/s] 13%|█▎ | 49629/371472 [3:57:43<25:46:58, 3.47it/s] 13%|█▎ | 49630/371472 [3:57:43<25:16:00, 3.54it/s] 13%|█▎ | 49631/371472 [3:57:44<24:32:17, 3.64it/s] 13%|█▎ | 49632/371472 [3:57:44<26:20:46, 3.39it/s] 13%|█▎ | 49633/371472 [3:57:44<26:51:52, 3.33it/s] 13%|█▎ | 49634/371472 [3:57:45<26:13:25, 3.41it/s] 13%|█▎ | 49635/371472 [3:57:45<25:18:05, 3.53it/s] 13%|█▎ | 49636/371472 [3:57:45<24:39:13, 3.63it/s] 13%|█▎ | 49637/371472 [3:57:45<25:14:29, 3.54it/s] 13%|█▎ | 49638/371472 [3:57:46<24:24:04, 3.66it/s] 13%|█▎ | 49639/371472 [3:57:46<26:11:34, 3.41it/s] 13%|█▎ | 49640/371472 [3:57:46<26:28:44, 3.38it/s] {'loss': 4.296, 'learning_rate': 8.801525566161735e-07, 'epoch': 2.14} + 13%|█▎ | 49640/371472 [3:57:46<26:28:44, 3.38it/s] 13%|█▎ | 49641/371472 [3:57:46<25:36:55, 3.49it/s] 13%|█▎ | 49642/371472 [3:57:47<26:11:33, 3.41it/s] 13%|█▎ | 49643/371472 [3:57:47<29:41:28, 3.01it/s] 13%|█▎ | 49644/371472 [3:57:48<28:20:24, 3.15it/s] 13%|█▎ | 49645/371472 [3:57:48<26:16:31, 3.40it/s] 13%|█▎ | 49646/371472 [3:57:48<25:56:44, 3.45it/s] 13%|█▎ | 49647/371472 [3:57:48<28:02:08, 3.19it/s] 13%|█▎ | 49648/371472 [3:57:49<25:55:35, 3.45it/s] 13%|█▎ | 49649/371472 [3:57:49<25:26:58, 3.51it/s] 13%|█▎ | 49650/371472 [3:57:49<26:00:15, 3.44it/s] 13%|█▎ | 49651/371472 [3:57:49<25:07:41, 3.56it/s] 13%|█▎ | 49652/371472 [3:57:50<24:18:07, 3.68it/s] 13%|█▎ | 49653/371472 [3:57:50<23:36:10, 3.79it/s] 13%|█▎ | 49654/371472 [3:57:50<27:19:54, 3.27it/s] 13%|█▎ | 49655/371472 [3:57:51<27:38:09, 3.23it/s] 13%|█▎ | 49656/371472 [3:57:51<26:12:22, 3.41it/s] 13%|█▎ | 49657/371472 [3:57:51<25:49:48, 3.46it/s] 13%|█▎ | 49658/371472 [3:57:51<25:25:16, 3.52it/s] 13%|█▎ | 49659/371472 [3:57:52<26:43:01, 3.35it/s] 13%|█▎ | 49660/371472 [3:57:52<25:48:04, 3.46it/s] {'loss': 4.3255, 'learning_rate': 8.801040746406947e-07, 'epoch': 2.14} + 13%|█▎ | 49660/371472 [3:57:52<25:48:04, 3.46it/s] 13%|█▎ | 49661/371472 [3:57:52<26:19:09, 3.40it/s] 13%|█▎ | 49662/371472 [3:57:53<25:18:40, 3.53it/s] 13%|█▎ | 49663/371472 [3:57:53<25:45:43, 3.47it/s] 13%|█▎ | 49664/371472 [3:57:53<25:59:12, 3.44it/s] 13%|█▎ | 49665/371472 [3:57:54<24:52:10, 3.59it/s] 13%|█▎ | 49666/371472 [3:57:54<24:28:00, 3.65it/s] 13%|█▎ | 49667/371472 [3:57:54<24:11:19, 3.70it/s] 13%|█▎ | 49668/371472 [3:57:54<24:56:18, 3.58it/s] 13%|█▎ | 49669/371472 [3:57:55<23:56:01, 3.73it/s] 13%|█▎ | 49670/371472 [3:57:55<23:58:25, 3.73it/s] 13%|█▎ | 49671/371472 [3:57:55<24:04:26, 3.71it/s] 13%|█▎ | 49672/371472 [3:57:55<23:55:59, 3.73it/s] 13%|█▎ | 49673/371472 [3:57:56<25:54:22, 3.45it/s] 13%|█▎ | 49674/371472 [3:57:56<24:53:02, 3.59it/s] 13%|█▎ | 49675/371472 [3:57:56<28:24:28, 3.15it/s] 13%|█▎ | 49676/371472 [3:57:57<27:00:54, 3.31it/s] 13%|█▎ | 49677/371472 [3:57:57<25:22:53, 3.52it/s] 13%|█▎ | 49678/371472 [3:57:57<24:33:15, 3.64it/s] 13%|█▎ | 49679/371472 [3:57:57<24:40:14, 3.62it/s] 13%|█▎ | 49680/371472 [3:57:58<23:52:12, 3.74it/s] {'loss': 4.2263, 'learning_rate': 8.800555926652157e-07, 'epoch': 2.14} + 13%|█▎ | 49680/371472 [3:57:58<23:52:12, 3.74it/s] 13%|█▎ | 49681/371472 [3:57:58<23:24:25, 3.82it/s] 13%|█▎ | 49682/371472 [3:57:58<24:39:24, 3.63it/s] 13%|█▎ | 49683/371472 [3:57:58<24:15:53, 3.68it/s] 13%|█▎ | 49684/371472 [3:57:59<25:27:42, 3.51it/s] 13%|█▎ | 49685/371472 [3:57:59<24:29:54, 3.65it/s] 13%|█▎ | 49686/371472 [3:57:59<24:29:12, 3.65it/s] 13%|█▎ | 49687/371472 [3:58:00<25:21:13, 3.53it/s] 13%|█▎ | 49688/371472 [3:58:00<24:57:34, 3.58it/s] 13%|█▎ | 49689/371472 [3:58:00<24:57:19, 3.58it/s] 13%|█▎ | 49690/371472 [3:58:00<25:04:06, 3.57it/s] 13%|█▎ | 49691/371472 [3:58:01<25:55:05, 3.45it/s] 13%|█▎ | 49692/371472 [3:58:01<25:39:28, 3.48it/s] 13%|█▎ | 49693/371472 [3:58:01<25:47:59, 3.46it/s] 13%|█▎ | 49694/371472 [3:58:02<25:03:09, 3.57it/s] 13%|█▎ | 49695/371472 [3:58:02<27:01:12, 3.31it/s] 13%|█▎ | 49696/371472 [3:58:02<26:53:45, 3.32it/s] 13%|█▎ | 49697/371472 [3:58:03<25:47:02, 3.47it/s] 13%|█▎ | 49698/371472 [3:58:03<26:29:06, 3.37it/s] 13%|█▎ | 49699/371472 [3:58:03<26:11:42, 3.41it/s] 13%|█▎ | 49700/371472 [3:58:03<25:39:12, 3.48it/s] {'loss': 4.3562, 'learning_rate': 8.800071106897368e-07, 'epoch': 2.14} + 13%|█▎ | 49700/371472 [3:58:03<25:39:12, 3.48it/s] 13%|█▎ | 49701/371472 [3:58:04<24:33:46, 3.64it/s] 13%|█▎ | 49702/371472 [3:58:04<23:44:52, 3.76it/s] 13%|█▎ | 49703/371472 [3:58:04<25:37:06, 3.49it/s] 13%|█▎ | 49704/371472 [3:58:04<25:16:36, 3.54it/s] 13%|█▎ | 49705/371472 [3:58:05<25:24:34, 3.52it/s] 13%|█▎ | 49706/371472 [3:58:05<25:32:12, 3.50it/s] 13%|█▎ | 49707/371472 [3:58:05<25:50:41, 3.46it/s] 13%|█▎ | 49708/371472 [3:58:06<25:58:32, 3.44it/s] 13%|█▎ | 49709/371472 [3:58:06<24:19:14, 3.67it/s] 13%|█▎ | 49710/371472 [3:58:06<25:21:30, 3.52it/s] 13%|█▎ | 49711/371472 [3:58:06<24:32:23, 3.64it/s] 13%|█▎ | 49712/371472 [3:58:07<24:24:13, 3.66it/s] 13%|█▎ | 49713/371472 [3:58:07<23:54:26, 3.74it/s] 13%|█▎ | 49714/371472 [3:58:07<23:52:56, 3.74it/s] 13%|█▎ | 49715/371472 [3:58:08<24:21:49, 3.67it/s] 13%|█▎ | 49716/371472 [3:58:08<23:33:56, 3.79it/s] 13%|█▎ | 49717/371472 [3:58:08<23:19:21, 3.83it/s] 13%|█▎ | 49718/371472 [3:58:08<23:59:32, 3.73it/s] 13%|█▎ | 49719/371472 [3:58:09<24:29:05, 3.65it/s] 13%|█▎ | 49720/371472 [3:58:09<24:18:31, 3.68it/s] {'loss': 4.1724, 'learning_rate': 8.79958628714258e-07, 'epoch': 2.14} + 13%|█▎ | 49720/371472 [3:58:09<24:18:31, 3.68it/s] 13%|█▎ | 49721/371472 [3:58:09<23:48:00, 3.76it/s] 13%|█▎ | 49722/371472 [3:58:09<24:10:25, 3.70it/s] 13%|█▎ | 49723/371472 [3:58:10<23:38:00, 3.78it/s] 13%|█▎ | 49724/371472 [3:58:10<24:59:01, 3.58it/s] 13%|█▎ | 49725/371472 [3:58:10<26:22:11, 3.39it/s] 13%|█▎ | 49726/371472 [3:58:11<25:05:55, 3.56it/s] 13%|█▎ | 49727/371472 [3:58:11<24:44:41, 3.61it/s] 13%|█▎ | 49728/371472 [3:58:11<24:36:05, 3.63it/s] 13%|█▎ | 49729/371472 [3:58:11<25:27:08, 3.51it/s] 13%|█▎ | 49730/371472 [3:58:12<25:50:08, 3.46it/s] 13%|█▎ | 49731/371472 [3:58:12<25:10:08, 3.55it/s] 13%|█▎ | 49732/371472 [3:58:12<25:49:25, 3.46it/s] 13%|█▎ | 49733/371472 [3:58:13<25:46:23, 3.47it/s] 13%|█▎ | 49734/371472 [3:58:13<25:19:11, 3.53it/s] 13%|█▎ | 49735/371472 [3:58:13<24:30:24, 3.65it/s] 13%|█▎ | 49736/371472 [3:58:13<24:55:31, 3.59it/s] 13%|█▎ | 49737/371472 [3:58:14<24:31:19, 3.64it/s] 13%|█▎ | 49738/371472 [3:58:14<23:56:48, 3.73it/s] 13%|█▎ | 49739/371472 [3:58:14<24:12:06, 3.69it/s] 13%|█▎ | 49740/371472 [3:58:14<24:40:26, 3.62it/s] {'loss': 4.3046, 'learning_rate': 8.799101467387792e-07, 'epoch': 2.14} + 13%|█▎ | 49740/371472 [3:58:14<24:40:26, 3.62it/s] 13%|█▎ | 49741/371472 [3:58:15<23:32:59, 3.79it/s] 13%|█▎ | 49742/371472 [3:58:15<23:02:24, 3.88it/s] 13%|█▎ | 49743/371472 [3:58:15<23:08:12, 3.86it/s] 13%|█▎ | 49744/371472 [3:58:16<24:48:09, 3.60it/s] 13%|█▎ | 49745/371472 [3:58:16<24:27:16, 3.65it/s] 13%|█▎ | 49746/371472 [3:58:16<24:35:23, 3.63it/s] 13%|█▎ | 49747/371472 [3:58:16<25:39:42, 3.48it/s] 13%|█▎ | 49748/371472 [3:58:17<25:23:50, 3.52it/s] 13%|█▎ | 49749/371472 [3:58:17<25:07:30, 3.56it/s] 13%|█▎ | 49750/371472 [3:58:17<24:39:16, 3.62it/s] 13%|█▎ | 49751/371472 [3:58:17<24:35:51, 3.63it/s] 13%|█▎ | 49752/371472 [3:58:18<25:53:59, 3.45it/s] 13%|█▎ | 49753/371472 [3:58:18<26:55:17, 3.32it/s] 13%|█▎ | 49754/371472 [3:58:18<26:04:36, 3.43it/s] 13%|█▎ | 49755/371472 [3:58:19<24:52:52, 3.59it/s] 13%|█▎ | 49756/371472 [3:58:19<23:32:59, 3.79it/s] 13%|█▎ | 49757/371472 [3:58:19<24:52:00, 3.59it/s] 13%|█▎ | 49758/371472 [3:58:20<27:11:59, 3.29it/s] 13%|█▎ | 49759/371472 [3:58:20<27:20:31, 3.27it/s] 13%|█▎ | 49760/371472 [3:58:20<26:40:02, 3.35it/s] {'loss': 4.3324, 'learning_rate': 8.798616647633001e-07, 'epoch': 2.14} + 13%|█▎ | 49760/371472 [3:58:20<26:40:02, 3.35it/s] 13%|█▎ | 49761/371472 [3:58:20<26:26:17, 3.38it/s] 13%|█▎ | 49762/371472 [3:58:21<27:47:04, 3.22it/s] 13%|█▎ | 49763/371472 [3:58:21<26:51:10, 3.33it/s] 13%|█▎ | 49764/371472 [3:58:21<25:46:10, 3.47it/s] 13%|█▎ | 49765/371472 [3:58:22<24:47:10, 3.61it/s] 13%|█▎ | 49766/371472 [3:58:22<24:28:17, 3.65it/s] 13%|█▎ | 49767/371472 [3:58:22<24:23:46, 3.66it/s] 13%|█▎ | 49768/371472 [3:58:22<23:27:10, 3.81it/s] 13%|█▎ | 49769/371472 [3:58:23<23:52:14, 3.74it/s] 13%|█▎ | 49770/371472 [3:58:23<24:46:05, 3.61it/s] 13%|█▎ | 49771/371472 [3:58:23<26:40:32, 3.35it/s] 13%|█▎ | 49772/371472 [3:58:23<25:25:05, 3.52it/s] 13%|█▎ | 49773/371472 [3:58:24<24:25:45, 3.66it/s] 13%|█▎ | 49774/371472 [3:58:24<25:57:12, 3.44it/s] 13%|█▎ | 49775/371472 [3:58:24<25:43:18, 3.47it/s] 13%|█▎ | 49776/371472 [3:58:25<25:22:54, 3.52it/s] 13%|█▎ | 49777/371472 [3:58:25<24:43:40, 3.61it/s] 13%|█▎ | 49778/371472 [3:58:25<24:35:32, 3.63it/s] 13%|█▎ | 49779/371472 [3:58:25<25:13:25, 3.54it/s] 13%|█▎ | 49780/371472 [3:58:26<25:17:41, 3.53it/s] {'loss': 4.106, 'learning_rate': 8.798131827878212e-07, 'epoch': 2.14} + 13%|█▎ | 49780/371472 [3:58:26<25:17:41, 3.53it/s] 13%|█▎ | 49781/371472 [3:58:26<25:49:03, 3.46it/s] 13%|█▎ | 49782/371472 [3:58:26<24:47:21, 3.60it/s] 13%|█▎ | 49783/371472 [3:58:27<24:53:02, 3.59it/s] 13%|█▎ | 49784/371472 [3:58:27<24:53:02, 3.59it/s] 13%|█▎ | 49785/371472 [3:58:27<26:21:14, 3.39it/s] 13%|█▎ | 49786/371472 [3:58:28<27:13:24, 3.28it/s] 13%|█▎ | 49787/371472 [3:58:28<28:05:20, 3.18it/s] 13%|█▎ | 49788/371472 [3:58:28<28:12:24, 3.17it/s] 13%|█▎ | 49789/371472 [3:58:29<29:42:42, 3.01it/s] 13%|█▎ | 49790/371472 [3:58:29<28:03:54, 3.18it/s] 13%|█▎ | 49791/371472 [3:58:29<27:08:31, 3.29it/s] 13%|█▎ | 49792/371472 [3:58:29<27:02:52, 3.30it/s] 13%|█▎ | 49793/371472 [3:58:30<27:41:04, 3.23it/s] 13%|█▎ | 49794/371472 [3:58:30<26:24:06, 3.38it/s] 13%|█▎ | 49795/371472 [3:58:30<26:03:16, 3.43it/s] 13%|█▎ | 49796/371472 [3:58:31<25:37:22, 3.49it/s] 13%|█▎ | 49797/371472 [3:58:31<26:19:40, 3.39it/s] 13%|█▎ | 49798/371472 [3:58:31<25:19:01, 3.53it/s] 13%|█▎ | 49799/371472 [3:58:31<24:40:42, 3.62it/s] 13%|█▎ | 49800/371472 [3:58:32<23:48:48, 3.75it/s] {'loss': 4.2116, 'learning_rate': 8.797647008123424e-07, 'epoch': 2.14} + 13%|█▎ | 49800/371472 [3:58:32<23:48:48, 3.75it/s] 13%|█▎ | 49801/371472 [3:58:32<25:09:05, 3.55it/s] 13%|█▎ | 49802/371472 [3:58:32<24:27:39, 3.65it/s] 13%|█▎ | 49803/371472 [3:58:32<24:15:34, 3.68it/s] 13%|█▎ | 49804/371472 [3:58:33<27:07:14, 3.29it/s] 13%|█▎ | 49805/371472 [3:58:33<26:16:17, 3.40it/s] 13%|█▎ | 49806/371472 [3:58:33<25:42:41, 3.48it/s] 13%|█▎ | 49807/371472 [3:58:34<24:40:22, 3.62it/s] 13%|█▎ | 49808/371472 [3:58:34<24:32:29, 3.64it/s] 13%|█▎ | 49809/371472 [3:58:34<23:47:16, 3.76it/s] 13%|█▎ | 49810/371472 [3:58:34<24:24:18, 3.66it/s] 13%|█▎ | 49811/371472 [3:58:35<25:47:40, 3.46it/s] 13%|█▎ | 49812/371472 [3:58:35<26:08:31, 3.42it/s] 13%|█▎ | 49813/371472 [3:58:35<26:11:42, 3.41it/s] 13%|█▎ | 49814/371472 [3:58:36<26:19:13, 3.39it/s] 13%|█▎ | 49815/371472 [3:58:36<25:50:21, 3.46it/s] 13%|█▎ | 49816/371472 [3:58:36<25:14:21, 3.54it/s] 13%|█▎ | 49817/371472 [3:58:36<24:28:45, 3.65it/s] 13%|█▎ | 49818/371472 [3:58:37<23:52:45, 3.74it/s] 13%|█▎ | 49819/371472 [3:58:37<23:16:53, 3.84it/s] 13%|█▎ | 49820/371472 [3:58:37<27:54:12, 3.20it/s] {'loss': 4.3622, 'learning_rate': 8.797162188368636e-07, 'epoch': 2.15} + 13%|█▎ | 49820/371472 [3:58:37<27:54:12, 3.20it/s] 13%|█▎ | 49821/371472 [3:58:38<26:06:27, 3.42it/s] 13%|█▎ | 49822/371472 [3:58:38<25:46:42, 3.47it/s] 13%|█▎ | 49823/371472 [3:58:38<26:18:05, 3.40it/s] 13%|█▎ | 49824/371472 [3:58:39<27:40:46, 3.23it/s] 13%|█▎ | 49825/371472 [3:58:39<27:38:00, 3.23it/s] 13%|█▎ | 49826/371472 [3:58:39<26:41:48, 3.35it/s] 13%|█▎ | 49827/371472 [3:58:39<25:47:58, 3.46it/s] 13%|█▎ | 49828/371472 [3:58:40<27:11:54, 3.28it/s] 13%|█▎ | 49829/371472 [3:58:40<26:01:43, 3.43it/s] 13%|█▎ | 49830/371472 [3:58:40<25:54:09, 3.45it/s] 13%|█▎ | 49831/371472 [3:58:41<25:29:23, 3.51it/s] 13%|█▎ | 49832/371472 [3:58:41<25:31:32, 3.50it/s] 13%|█▎ | 49833/371472 [3:58:41<24:59:16, 3.58it/s] 13%|█▎ | 49834/371472 [3:58:41<25:39:02, 3.48it/s] 13%|█▎ | 49835/371472 [3:58:42<26:00:04, 3.44it/s] 13%|█▎ | 49836/371472 [3:58:42<25:47:20, 3.46it/s] 13%|█▎ | 49837/371472 [3:58:42<26:51:25, 3.33it/s] 13%|█▎ | 49838/371472 [3:58:43<29:41:25, 3.01it/s] 13%|█▎ | 49839/371472 [3:58:43<29:27:28, 3.03it/s] 13%|█▎ | 49840/371472 [3:58:43<30:18:46, 2.95it/s] {'loss': 4.2366, 'learning_rate': 8.796677368613846e-07, 'epoch': 2.15} + 13%|█▎ | 49840/371472 [3:58:43<30:18:46, 2.95it/s] 13%|█▎ | 49841/371472 [3:58:44<29:16:34, 3.05it/s] 13%|█▎ | 49842/371472 [3:58:44<27:57:01, 3.20it/s] 13%|█▎ | 49843/371472 [3:58:44<26:25:11, 3.38it/s] 13%|█▎ | 49844/371472 [3:58:44<24:54:42, 3.59it/s] 13%|█▎ | 49845/371472 [3:58:45<24:05:15, 3.71it/s] 13%|█▎ | 49846/371472 [3:58:45<24:23:25, 3.66it/s] 13%|█▎ | 49847/371472 [3:58:45<23:55:38, 3.73it/s] 13%|█▎ | 49848/371472 [3:58:46<23:28:42, 3.81it/s] 13%|█▎ | 49849/371472 [3:58:46<24:59:59, 3.57it/s] 13%|█▎ | 49850/371472 [3:58:46<25:32:09, 3.50it/s] 13%|█▎ | 49851/371472 [3:58:46<24:17:27, 3.68it/s] 13%|█▎ | 49852/371472 [3:58:47<24:46:18, 3.61it/s] 13%|█▎ | 49853/371472 [3:58:47<25:11:32, 3.55it/s] 13%|█▎ | 49854/371472 [3:58:47<28:47:25, 3.10it/s] 13%|█▎ | 49855/371472 [3:58:48<26:38:49, 3.35it/s] 13%|█▎ | 49856/371472 [3:58:48<31:08:32, 2.87it/s] 13%|█▎ | 49857/371472 [3:58:48<30:05:09, 2.97it/s] 13%|█▎ | 49858/371472 [3:58:49<27:58:50, 3.19it/s] 13%|█▎ | 49859/371472 [3:58:49<26:05:31, 3.42it/s] 13%|█▎ | 49860/371472 [3:58:49<25:42:11, 3.48it/s] {'loss': 4.34, 'learning_rate': 8.796192548859057e-07, 'epoch': 2.15} + 13%|█▎ | 49860/371472 [3:58:49<25:42:11, 3.48it/s] 13%|█▎ | 49861/371472 [3:58:49<26:18:55, 3.39it/s] 13%|█▎ | 49862/371472 [3:58:50<26:56:41, 3.32it/s] 13%|█▎ | 49863/371472 [3:58:50<26:15:51, 3.40it/s] 13%|█▎ | 49864/371472 [3:58:50<25:28:05, 3.51it/s] 13%|█▎ | 49865/371472 [3:58:51<26:02:37, 3.43it/s] 13%|█▎ | 49866/371472 [3:58:51<25:10:11, 3.55it/s] 13%|█▎ | 49867/371472 [3:58:51<24:03:22, 3.71it/s] 13%|█▎ | 49868/371472 [3:58:51<25:05:44, 3.56it/s] 13%|█▎ | 49869/371472 [3:58:52<25:21:40, 3.52it/s] 13%|█�� | 49870/371472 [3:58:52<25:49:51, 3.46it/s] 13%|█▎ | 49871/371472 [3:58:52<24:31:24, 3.64it/s] 13%|█▎ | 49872/371472 [3:58:53<23:54:03, 3.74it/s] 13%|█▎ | 49873/371472 [3:58:53<25:23:23, 3.52it/s] 13%|█▎ | 49874/371472 [3:58:53<24:26:13, 3.66it/s] 13%|█▎ | 49875/371472 [3:58:53<24:42:05, 3.62it/s] 13%|█▎ | 49876/371472 [3:58:54<25:56:17, 3.44it/s] 13%|█▎ | 49877/371472 [3:58:54<25:35:17, 3.49it/s] 13%|█▎ | 49878/371472 [3:58:54<24:56:26, 3.58it/s] 13%|█▎ | 49879/371472 [3:58:55<25:15:45, 3.54it/s] 13%|█▎ | 49880/371472 [3:58:55<25:12:33, 3.54it/s] {'loss': 4.1654, 'learning_rate': 8.795707729104269e-07, 'epoch': 2.15} + 13%|█▎ | 49880/371472 [3:58:55<25:12:33, 3.54it/s] 13%|█▎ | 49881/371472 [3:58:55<25:39:53, 3.48it/s] 13%|█▎ | 49882/371472 [3:58:55<24:56:37, 3.58it/s] 13%|█▎ | 49883/371472 [3:58:56<24:03:38, 3.71it/s] 13%|█▎ | 49884/371472 [3:58:56<24:04:37, 3.71it/s] 13%|█▎ | 49885/371472 [3:58:56<23:38:17, 3.78it/s] 13%|█▎ | 49886/371472 [3:58:56<24:27:34, 3.65it/s] 13%|█▎ | 49887/371472 [3:58:57<24:21:04, 3.67it/s] 13%|█▎ | 49888/371472 [3:58:57<24:13:50, 3.69it/s] 13%|█▎ | 49889/371472 [3:58:57<23:27:37, 3.81it/s] 13%|█▎ | 49890/371472 [3:58:58<24:07:29, 3.70it/s] 13%|█▎ | 49891/371472 [3:58:58<25:16:32, 3.53it/s] 13%|█▎ | 49892/371472 [3:58:58<24:41:30, 3.62it/s] 13%|█▎ | 49893/371472 [3:58:58<25:35:47, 3.49it/s] 13%|█▎ | 49894/371472 [3:58:59<26:59:14, 3.31it/s] 13%|█▎ | 49895/371472 [3:58:59<25:58:59, 3.44it/s] 13%|█▎ | 49896/371472 [3:58:59<25:07:26, 3.56it/s] 13%|█▎ | 49897/371472 [3:59:00<26:47:41, 3.33it/s] 13%|█▎ | 49898/371472 [3:59:00<25:50:07, 3.46it/s] 13%|█▎ | 49899/371472 [3:59:00<26:17:22, 3.40it/s] 13%|█▎ | 49900/371472 [3:59:00<25:12:52, 3.54it/s] {'loss': 4.3001, 'learning_rate': 8.795222909349478e-07, 'epoch': 2.15} + 13%|█▎ | 49900/371472 [3:59:00<25:12:52, 3.54it/s] 13%|█▎ | 49901/371472 [3:59:01<24:32:48, 3.64it/s] 13%|█▎ | 49902/371472 [3:59:01<24:41:39, 3.62it/s] 13%|█▎ | 49903/371472 [3:59:01<24:49:45, 3.60it/s] 13%|█▎ | 49904/371472 [3:59:02<25:01:12, 3.57it/s] 13%|█▎ | 49905/371472 [3:59:02<25:57:34, 3.44it/s] 13%|█▎ | 49906/371472 [3:59:02<26:30:28, 3.37it/s] 13%|█▎ | 49907/371472 [3:59:02<25:11:24, 3.55it/s] 13%|█▎ | 49908/371472 [3:59:03<24:17:28, 3.68it/s] 13%|█▎ | 49909/371472 [3:59:03<24:16:25, 3.68it/s] 13%|█▎ | 49910/371472 [3:59:03<24:29:53, 3.65it/s] 13%|█▎ | 49911/371472 [3:59:03<23:50:43, 3.75it/s] 13%|█▎ | 49912/371472 [3:59:04<23:07:20, 3.86it/s] 13%|█▎ | 49913/371472 [3:59:04<23:34:08, 3.79it/s] 13%|█▎ | 49914/371472 [3:59:04<22:58:07, 3.89it/s] 13%|█▎ | 49915/371472 [3:59:04<22:37:25, 3.95it/s] 13%|█▎ | 49916/371472 [3:59:05<24:48:31, 3.60it/s] 13%|█▎ | 49917/371472 [3:59:05<25:58:59, 3.44it/s] 13%|█▎ | 49918/371472 [3:59:05<24:43:59, 3.61it/s] 13%|█▎ | 49919/371472 [3:59:06<24:44:29, 3.61it/s] 13%|█▎ | 49920/371472 [3:59:06<24:32:07, 3.64it/s] {'loss': 4.2751, 'learning_rate': 8.79473808959469e-07, 'epoch': 2.15} + 13%|█▎ | 49920/371472 [3:59:06<24:32:07, 3.64it/s] 13%|█▎ | 49921/371472 [3:59:06<23:41:44, 3.77it/s] 13%|█▎ | 49922/371472 [3:59:06<23:11:48, 3.85it/s] 13%|█▎ | 49923/371472 [3:59:07<22:26:36, 3.98it/s] 13%|█▎ | 49924/371472 [3:59:07<25:01:08, 3.57it/s] 13%|█▎ | 49925/371472 [3:59:07<25:26:46, 3.51it/s] 13%|█▎ | 49926/371472 [3:59:08<24:54:08, 3.59it/s] 13%|█▎ | 49927/371472 [3:59:08<24:20:28, 3.67it/s] 13%|█▎ | 49928/371472 [3:59:08<25:47:28, 3.46it/s] 13%|█▎ | 49929/371472 [3:59:08<25:13:35, 3.54it/s] 13%|█▎ | 49930/371472 [3:59:09<25:12:51, 3.54it/s] 13%|█▎ | 49931/371472 [3:59:09<24:53:30, 3.59it/s] 13%|█▎ | 49932/371472 [3:59:09<25:28:10, 3.51it/s] 13%|█▎ | 49933/371472 [3:59:10<24:38:41, 3.62it/s] 13%|█▎ | 49934/371472 [3:59:10<25:21:33, 3.52it/s] 13%|█▎ | 49935/371472 [3:59:10<27:14:16, 3.28it/s] 13%|█▎ | 49936/371472 [3:59:10<25:55:23, 3.45it/s] 13%|█▎ | 49937/371472 [3:59:11<25:49:02, 3.46it/s] 13%|█▎ | 49938/371472 [3:59:11<26:47:24, 3.33it/s] 13%|█▎ | 49939/371472 [3:59:11<26:10:25, 3.41it/s] 13%|█▎ | 49940/371472 [3:59:12<27:05:57, 3.30it/s] {'loss': 4.2452, 'learning_rate': 8.794253269839901e-07, 'epoch': 2.15} + 13%|█▎ | 49940/371472 [3:59:12<27:05:57, 3.30it/s] 13%|█▎ | 49941/371472 [3:59:12<26:38:14, 3.35it/s] 13%|█▎ | 49942/371472 [3:59:12<27:03:11, 3.30it/s] 13%|█▎ | 49943/371472 [3:59:12<25:29:53, 3.50it/s] 13%|█▎ | 49944/371472 [3:59:13<26:24:02, 3.38it/s] 13%|█▎ | 49945/371472 [3:59:13<27:31:41, 3.24it/s] 13%|█▎ | 49946/371472 [3:59:13<27:55:59, 3.20it/s] 13%|█▎ | 49947/371472 [3:59:14<26:31:28, 3.37it/s] 13%|█▎ | 49948/371472 [3:59:14<25:05:58, 3.56it/s] 13%|█▎ | 49949/371472 [3:59:14<25:27:00, 3.51it/s] 13%|█▎ | 49950/371472 [3:59:15<26:36:42, 3.36it/s] 13%|█▎ | 49951/371472 [3:59:15<26:20:56, 3.39it/s] 13%|█▎ | 49952/371472 [3:59:15<25:44:53, 3.47it/s] 13%|█▎ | 49953/371472 [3:59:15<25:31:15, 3.50it/s] 13%|█▎ | 49954/371472 [3:59:16<25:07:26, 3.55it/s] 13%|█▎ | 49955/371472 [3:59:16<25:26:56, 3.51it/s] 13%|█▎ | 49956/371472 [3:59:16<26:06:42, 3.42it/s] 13%|█▎ | 49957/371472 [3:59:17<24:49:09, 3.60it/s] 13%|█▎ | 49958/371472 [3:59:17<28:31:32, 3.13it/s] 13%|█▎ | 49959/371472 [3:59:17<28:10:33, 3.17it/s] 13%|█▎ | 49960/371472 [3:59:18<26:39:56, 3.35it/s] {'loss': 4.2826, 'learning_rate': 8.793768450085113e-07, 'epoch': 2.15} + 13%|█▎ | 49960/371472 [3:59:18<26:39:56, 3.35it/s] 13%|█▎ | 49961/371472 [3:59:18<26:37:55, 3.35it/s] 13%|█▎ | 49962/371472 [3:59:18<25:59:39, 3.44it/s] 13%|█▎ | 49963/371472 [3:59:18<25:44:03, 3.47it/s] 13%|█▎ | 49964/371472 [3:59:19<24:38:00, 3.63it/s] 13%|█▎ | 49965/371472 [3:59:19<24:27:52, 3.65it/s] 13%|█▎ | 49966/371472 [3:59:19<26:54:43, 3.32it/s] 13%|█▎ | 49967/371472 [3:59:20<26:44:54, 3.34it/s] 13%|█▎ | 49968/371472 [3:59:20<27:23:33, 3.26it/s] 13%|█▎ | 49969/371472 [3:59:20<27:16:08, 3.28it/s] 13%|█▎ | 49970/371472 [3:59:20<26:32:04, 3.37it/s] 13%|█▎ | 49971/371472 [3:59:21<26:07:23, 3.42it/s] 13%|█▎ | 49972/371472 [3:59:21<25:09:08, 3.55it/s] 13%|█▎ | 49973/371472 [3:59:21<25:37:07, 3.49it/s] 13%|█▎ | 49974/371472 [3:59:22<25:16:20, 3.53it/s] 13%|█▎ | 49975/371472 [3:59:22<24:31:36, 3.64it/s] 13%|█▎ | 49976/371472 [3:59:22<24:07:01, 3.70it/s] 13%|█▎ | 49977/371472 [3:59:22<23:49:43, 3.75it/s] 13%|█▎ | 49978/371472 [3:59:23<23:50:56, 3.74it/s] 13%|█▎ | 49979/371472 [3:59:23<23:35:14, 3.79it/s] 13%|█▎ | 49980/371472 [3:59:23<23:48:58, 3.75it/s] {'loss': 4.0487, 'learning_rate': 8.793283630330323e-07, 'epoch': 2.15} + 13%|█▎ | 49980/371472 [3:59:23<23:48:58, 3.75it/s] 13%|█▎ | 49981/371472 [3:59:23<24:04:55, 3.71it/s] 13%|█▎ | 49982/371472 [3:59:24<23:59:29, 3.72it/s] 13%|█▎ | 49983/371472 [3:59:24<24:35:46, 3.63it/s] 13%|█▎ | 49984/371472 [3:59:24<24:07:06, 3.70it/s] 13%|█▎ | 49985/371472 [3:59:25<24:41:35, 3.62it/s] 13%|█▎ | 49986/371472 [3:59:25<23:55:26, 3.73it/s] 13%|█▎ | 49987/371472 [3:59:25<25:39:38, 3.48it/s] 13%|█▎ | 49988/371472 [3:59:25<24:31:51, 3.64it/s] 13%|█▎ | 49989/371472 [3:59:26<23:39:49, 3.77it/s] 13%|█▎ | 49990/371472 [3:59:26<26:32:26, 3.36it/s] 13%|█▎ | 49991/371472 [3:59:26<25:38:03, 3.48it/s] 13%|█▎ | 49992/371472 [3:59:26<24:43:00, 3.61it/s] 13%|█▎ | 49993/371472 [3:59:27<24:37:44, 3.63it/s] 13%|█▎ | 49994/371472 [3:59:27<27:49:16, 3.21it/s] 13%|█▎ | 49995/371472 [3:59:27<27:34:13, 3.24it/s] 13%|█▎ | 49996/371472 [3:59:28<26:01:17, 3.43it/s] 13%|█▎ | 49997/371472 [3:59:28<24:39:26, 3.62it/s] 13%|█▎ | 49998/371472 [3:59:28<24:54:26, 3.59it/s] 13%|█▎ | 49999/371472 [3:59:28<24:04:16, 3.71it/s] 13%|█▎ | 50000/371472 [3:59:29<24:25:57, 3.65it/s] {'loss': 4.0342, 'learning_rate': 8.792798810575536e-07, 'epoch': 2.15} + 13%|█▎ | 50000/371472 [3:59:29<24:25:57, 3.65it/s] 13%|█▎ | 50001/371472 [3:59:29<23:51:52, 3.74it/s] 13%|█▎ | 50002/371472 [3:59:29<23:20:18, 3.83it/s] 13%|█▎ | 50003/371472 [3:59:30<23:04:59, 3.87it/s] 13%|█▎ | 50004/371472 [3:59:30<23:11:55, 3.85it/s] 13%|█▎ | 50005/371472 [3:59:30<22:57:54, 3.89it/s] 13%|█▎ | 50006/371472 [3:59:30<22:39:22, 3.94it/s] 13%|█▎ | 50007/371472 [3:59:31<23:52:41, 3.74it/s] 13%|█▎ | 50008/371472 [3:59:31<23:41:43, 3.77it/s] 13%|█▎ | 50009/371472 [3:59:31<23:06:38, 3.86it/s] 13%|█▎ | 50010/371472 [3:59:31<23:07:46, 3.86it/s] 13%|█▎ | 50011/371472 [3:59:32<23:50:09, 3.75it/s] 13%|█▎ | 50012/371472 [3:59:32<25:00:46, 3.57it/s] 13%|█▎ | 50013/371472 [3:59:32<24:46:16, 3.60it/s] 13%|█▎ | 50014/371472 [3:59:33<25:20:39, 3.52it/s] 13%|█▎ | 50015/371472 [3:59:33<25:04:48, 3.56it/s] 13%|█▎ | 50016/371472 [3:59:33<25:27:39, 3.51it/s] 13%|█▎ | 50017/371472 [3:59:33<25:58:29, 3.44it/s] 13%|█▎ | 50018/371472 [3:59:34<24:41:11, 3.62it/s] 13%|█▎ | 50019/371472 [3:59:34<23:50:58, 3.74it/s] 13%|█▎ | 50020/371472 [3:59:34<23:14:01, 3.84it/s] {'loss': 4.3066, 'learning_rate': 8.792313990820745e-07, 'epoch': 2.15} + 13%|█▎ | 50020/371472 [3:59:34<23:14:01, 3.84it/s] 13%|█▎ | 50021/371472 [3:59:34<23:56:08, 3.73it/s] 13%|█▎ | 50022/371472 [3:59:35<24:44:12, 3.61it/s] 13%|█▎ | 50023/371472 [3:59:35<24:31:16, 3.64it/s] 13%|█▎ | 50024/371472 [3:59:35<23:33:03, 3.79it/s] 13%|█▎ | 50025/371472 [3:59:35<23:27:06, 3.81it/s] 13%|█▎ | 50026/371472 [3:59:36<24:33:36, 3.64it/s] 13%|█▎ | 50027/371472 [3:59:36<24:18:44, 3.67it/s] 13%|█▎ | 50028/371472 [3:59:36<24:50:15, 3.59it/s] 13%|█▎ | 50029/371472 [3:59:37<26:21:35, 3.39it/s] 13%|█▎ | 50030/371472 [3:59:37<25:42:14, 3.47it/s] 13%|█▎ | 50031/371472 [3:59:37<26:08:51, 3.41it/s] 13%|█▎ | 50032/371472 [3:59:38<25:38:59, 3.48it/s] 13%|█▎ | 50033/371472 [3:59:38<25:15:19, 3.54it/s] 13%|█▎ | 50034/371472 [3:59:38<25:01:57, 3.57it/s] 13%|█▎ | 50035/371472 [3:59:38<24:58:54, 3.57it/s] 13%|█▎ | 50036/371472 [3:59:39<25:28:04, 3.51it/s] 13%|█▎ | 50037/371472 [3:59:39<24:42:45, 3.61it/s] 13%|█▎ | 50038/371472 [3:59:39<24:34:19, 3.63it/s] 13%|█▎ | 50039/371472 [3:59:39<24:53:02, 3.59it/s] 13%|█▎ | 50040/371472 [3:59:40<23:56:29, 3.73it/s] {'loss': 4.2279, 'learning_rate': 8.791829171065957e-07, 'epoch': 2.16} + 13%|█▎ | 50040/371472 [3:59:40<23:56:29, 3.73it/s] 13%|█▎ | 50041/371472 [3:59:40<23:29:12, 3.80it/s] 13%|█▎ | 50042/371472 [3:59:40<23:20:55, 3.82it/s] 13%|█▎ | 50043/371472 [3:59:40<23:05:03, 3.87it/s] 13%|█▎ | 50044/371472 [3:59:41<23:04:18, 3.87it/s] 13%|█▎ | 50045/371472 [3:59:41<24:23:29, 3.66it/s] 13%|█▎ | 50046/371472 [3:59:41<24:34:22, 3.63it/s] 13%|█▎ | 50047/371472 [3:59:42<24:45:58, 3.61it/s] 13%|█▎ | 50048/371472 [3:59:42<26:15:29, 3.40it/s] 13%|█▎ | 50049/371472 [3:59:43<36:00:44, 2.48it/s] 13%|█▎ | 50050/371472 [3:59:43<31:58:32, 2.79it/s] 13%|█▎ | 50051/371472 [3:59:43<31:47:37, 2.81it/s] 13%|█▎ | 50052/371472 [3:59:43<29:25:44, 3.03it/s] 13%|█▎ | 50053/371472 [3:59:44<32:00:00, 2.79it/s] 13%|█▎ | 50054/371472 [3:59:44<29:37:16, 3.01it/s] 13%|█▎ | 50055/371472 [3:59:44<27:45:14, 3.22it/s] 13%|█▎ | 50056/371472 [3:59:45<27:34:29, 3.24it/s] 13%|█▎ | 50057/371472 [3:59:45<26:42:21, 3.34it/s] 13%|█▎ | 50058/371472 [3:59:45<28:03:53, 3.18it/s] 13%|█▎ | 50059/371472 [3:59:46<26:57:55, 3.31it/s] 13%|█▎ | 50060/371472 [3:59:46<26:54:41, 3.32it/s] {'loss': 4.2122, 'learning_rate': 8.791344351311167e-07, 'epoch': 2.16} + 13%|█▎ | 50060/371472 [3:59:46<26:54:41, 3.32it/s] 13%|█▎ | 50061/371472 [3:59:46<28:00:39, 3.19it/s] 13%|█▎ | 50062/371472 [3:59:47<28:12:45, 3.16it/s] 13%|█▎ | 50063/371472 [3:59:47<28:30:01, 3.13it/s] 13%|█▎ | 50064/371472 [3:59:47<28:06:40, 3.18it/s] 13%|█▎ | 50065/371472 [3:59:48<28:05:57, 3.18it/s] 13%|█▎ | 50066/371472 [3:59:48<26:58:31, 3.31it/s] 13%|█▎ | 50067/371472 [3:59:48<27:47:42, 3.21it/s] 13%|█▎ | 50068/371472 [3:59:48<27:58:36, 3.19it/s] 13%|█▎ | 50069/371472 [3:59:49<27:21:44, 3.26it/s] 13%|█▎ | 50070/371472 [3:59:49<27:00:21, 3.31it/s] 13%|█▎ | 50071/371472 [3:59:49<26:23:03, 3.38it/s] 13%|█▎ | 50072/371472 [3:59:50<27:18:07, 3.27it/s] 13%|█▎ | 50073/371472 [3:59:50<26:08:06, 3.42it/s] 13%|█▎ | 50074/371472 [3:59:50<27:12:21, 3.28it/s] 13%|█▎ | 50075/371472 [3:59:51<26:48:04, 3.33it/s] 13%|█▎ | 50076/371472 [3:59:51<25:46:11, 3.46it/s] 13%|█▎ | 50077/371472 [3:59:51<25:49:30, 3.46it/s] 13%|█▎ | 50078/371472 [3:59:51<25:42:51, 3.47it/s] 13%|█▎ | 50079/371472 [3:59:52<25:04:46, 3.56it/s] 13%|█▎ | 50080/371472 [3:59:52<24:14:16, 3.68it/s] {'loss': 4.2016, 'learning_rate': 8.790859531556379e-07, 'epoch': 2.16} + 13%|█▎ | 50080/371472 [3:59:52<24:14:16, 3.68it/s] 13%|█▎ | 50081/371472 [3:59:52<25:24:18, 3.51it/s] 13%|█▎ | 50082/371472 [3:59:52<24:53:09, 3.59it/s] 13%|█▎ | 50083/371472 [3:59:53<24:20:39, 3.67it/s] 13%|█▎ | 50084/371472 [3:59:53<24:11:20, 3.69it/s] 13%|█▎ | 50085/371472 [3:59:53<24:01:18, 3.72it/s] 13%|█▎ | 50086/371472 [3:59:53<23:28:03, 3.80it/s] 13%|█▎ | 50087/371472 [3:59:54<23:04:40, 3.87it/s] 13%|█▎ | 50088/371472 [3:59:54<22:44:05, 3.93it/s] 13%|█▎ | 50089/371472 [3:59:54<27:47:28, 3.21it/s] 13%|█▎ | 50090/371472 [3:59:55<26:16:47, 3.40it/s] 13%|█▎ | 50091/371472 [3:59:55<27:23:52, 3.26it/s] 13%|█▎ | 50092/371472 [3:59:55<27:50:03, 3.21it/s] 13%|█▎ | 50093/371472 [3:59:56<26:42:53, 3.34it/s] 13%|█▎ | 50094/371472 [3:59:56<25:15:05, 3.54it/s] 13%|█▎ | 50095/371472 [3:59:56<24:29:18, 3.65it/s] 13%|█▎ | 50096/371472 [3:59:56<25:04:54, 3.56it/s] 13%|█▎ | 50097/371472 [3:59:57<25:02:20, 3.57it/s] 13%|█▎ | 50098/371472 [3:59:57<25:11:51, 3.54it/s] 13%|█▎ | 50099/371472 [3:59:57<27:01:24, 3.30it/s] 13%|█▎ | 50100/371472 [3:59:58<26:15:40, 3.40it/s] {'loss': 4.3734, 'learning_rate': 8.79037471180159e-07, 'epoch': 2.16} + 13%|█▎ | 50100/371472 [3:59:58<26:15:40, 3.40it/s] 13%|█▎ | 50101/371472 [3:59:58<26:55:44, 3.31it/s] 13%|█▎ | 50102/371472 [3:59:58<26:09:09, 3.41it/s] 13%|█▎ | 50103/371472 [3:59:59<29:12:55, 3.06it/s] 13%|█▎ | 50104/371472 [3:59:59<27:36:27, 3.23it/s] 13%|█▎ | 50105/371472 [3:59:59<29:09:47, 3.06it/s] 13%|█▎ | 50106/371472 [3:59:59<27:06:48, 3.29it/s] 13%|█▎ | 50107/371472 [4:00:00<25:34:40, 3.49it/s] 13%|█▎ | 50108/371472 [4:00:00<24:45:50, 3.60it/s] 13%|█▎ | 50109/371472 [4:00:00<23:59:53, 3.72it/s] 13%|█▎ | 50110/371472 [4:00:01<28:08:59, 3.17it/s] 13%|█▎ | 50111/371472 [4:00:01<26:06:42, 3.42it/s] 13%|█▎ | 50112/371472 [4:00:01<25:54:12, 3.45it/s] 13%|█▎ | 50113/371472 [4:00:01<25:11:53, 3.54it/s] 13%|█▎ | 50114/371472 [4:00:02<24:21:22, 3.67it/s] 13%|█▎ | 50115/371472 [4:00:02<25:02:51, 3.56it/s] 13%|█▎ | 50116/371472 [4:00:02<26:52:22, 3.32it/s] 13%|█▎ | 50117/371472 [4:00:03<28:10:50, 3.17it/s] 13%|█▎ | 50118/371472 [4:00:03<26:59:01, 3.31it/s] 13%|█▎ | 50119/371472 [4:00:03<25:44:56, 3.47it/s] 13%|█▎ | 50120/371472 [4:00:03<24:46:40, 3.60it/s] {'loss': 4.3605, 'learning_rate': 8.789889892046802e-07, 'epoch': 2.16} + 13%|█▎ | 50120/371472 [4:00:03<24:46:40, 3.60it/s] 13%|█▎ | 50121/371472 [4:00:04<26:52:16, 3.32it/s] 13%|█▎ | 50122/371472 [4:00:04<25:20:13, 3.52it/s] 13%|█▎ | 50123/371472 [4:00:04<25:55:45, 3.44it/s] 13%|█▎ | 50124/371472 [4:00:05<25:29:21, 3.50it/s] 13%|█▎ | 50125/371472 [4:00:05<24:26:54, 3.65it/s] 13%|█▎ | 50126/371472 [4:00:05<23:22:00, 3.82it/s] 13%|█▎ | 50127/371472 [4:00:05<24:01:08, 3.72it/s] 13%|█▎ | 50128/371472 [4:00:06<25:47:43, 3.46it/s] 13%|█▎ | 50129/371472 [4:00:06<26:05:44, 3.42it/s] 13%|█▎ | 50130/371472 [4:00:06<25:56:39, 3.44it/s] 13%|█▎ | 50131/371472 [4:00:07<24:55:25, 3.58it/s] 13%|█▎ | 50132/371472 [4:00:07<25:01:57, 3.57it/s] 13%|█▎ | 50133/371472 [4:00:07<25:14:46, 3.54it/s] 13%|█▎ | 50134/371472 [4:00:07<25:34:04, 3.49it/s] 13%|█▎ | 50135/371472 [4:00:08<24:16:39, 3.68it/s] 13%|█▎ | 50136/371472 [4:00:08<23:08:35, 3.86it/s] 13%|█▎ | 50137/371472 [4:00:08<24:17:06, 3.68it/s] 13%|█▎ | 50138/371472 [4:00:09<24:57:26, 3.58it/s] 13%|█▎ | 50139/371472 [4:00:09<24:04:19, 3.71it/s] 13%|█▎ | 50140/371472 [4:00:09<23:38:59, 3.77it/s] {'loss': 4.125, 'learning_rate': 8.789405072292011e-07, 'epoch': 2.16} + 13%|█▎ | 50140/371472 [4:00:09<23:38:59, 3.77it/s] 13%|█▎ | 50141/371472 [4:00:09<23:50:59, 3.74it/s] 13%|█▎ | 50142/371472 [4:00:10<24:00:16, 3.72it/s] 13%|█▎ | 50143/371472 [4:00:10<24:57:16, 3.58it/s] 13%|█▎ | 50144/371472 [4:00:10<24:11:42, 3.69it/s] 13%|█▎ | 50145/371472 [4:00:10<23:51:48, 3.74it/s] 13%|█▎ | 50146/371472 [4:00:11<24:23:39, 3.66it/s] 13%|█▎ | 50147/371472 [4:00:11<23:46:48, 3.75it/s] 13%|█▎ | 50148/371472 [4:00:11<23:51:27, 3.74it/s] 14%|█▎ | 50149/371472 [4:00:12<25:11:00, 3.54it/s] 14%|█▎ | 50150/371472 [4:00:12<24:10:36, 3.69it/s] 14%|█▎ | 50151/371472 [4:00:12<24:23:09, 3.66it/s] 14%|█▎ | 50152/371472 [4:00:12<23:43:43, 3.76it/s] 14%|█▎ | 50153/371472 [4:00:13<23:21:10, 3.82it/s] 14%|█▎ | 50154/371472 [4:00:13<24:16:26, 3.68it/s] 14%|█▎ | 50155/371472 [4:00:13<23:22:09, 3.82it/s] 14%|█▎ | 50156/371472 [4:00:13<24:07:28, 3.70it/s] 14%|█▎ | 50157/371472 [4:00:14<23:32:52, 3.79it/s] 14%|█▎ | 50158/371472 [4:00:14<23:18:08, 3.83it/s] 14%|█▎ | 50159/371472 [4:00:14<23:17:12, 3.83it/s] 14%|█▎ | 50160/371472 [4:00:14<22:38:10, 3.94it/s] {'loss': 4.3302, 'learning_rate': 8.788920252537223e-07, 'epoch': 2.16} + 14%|█▎ | 50160/371472 [4:00:14<22:38:10, 3.94it/s] 14%|█▎ | 50161/371472 [4:00:15<23:56:46, 3.73it/s] 14%|█▎ | 50162/371472 [4:00:15<24:32:55, 3.64it/s] 14%|█▎ | 50163/371472 [4:00:15<23:41:00, 3.77it/s] 14%|█▎ | 50164/371472 [4:00:16<25:28:06, 3.50it/s] 14%|█▎ | 50165/371472 [4:00:16<24:31:29, 3.64it/s] 14%|█▎ | 50166/371472 [4:00:16<26:54:57, 3.32it/s] 14%|█▎ | 50167/371472 [4:00:16<25:02:02, 3.57it/s] 14%|█▎ | 50168/371472 [4:00:17<24:38:23, 3.62it/s] 14%|█▎ | 50169/371472 [4:00:17<24:11:30, 3.69it/s] 14%|█▎ | 50170/371472 [4:00:17<24:07:03, 3.70it/s] 14%|█▎ | 50171/371472 [4:00:17<24:32:35, 3.64it/s] 14%|█▎ | 50172/371472 [4:00:18<29:22:34, 3.04it/s] 14%|█▎ | 50173/371472 [4:00:18<27:56:25, 3.19it/s] 14%|█▎ | 50174/371472 [4:00:18<26:48:52, 3.33it/s] 14%|█▎ | 50175/371472 [4:00:19<25:53:06, 3.45it/s] 14%|█▎ | 50176/371472 [4:00:19<25:24:25, 3.51it/s] 14%|█▎ | 50177/371472 [4:00:19<25:16:44, 3.53it/s] 14%|█▎ | 50178/371472 [4:00:20<24:28:49, 3.65it/s] 14%|█▎ | 50179/371472 [4:00:20<25:51:18, 3.45it/s] 14%|█▎ | 50180/371472 [4:00:20<24:55:09, 3.58it/s] {'loss': 4.142, 'learning_rate': 8.788435432782434e-07, 'epoch': 2.16} + 14%|█▎ | 50180/371472 [4:00:20<24:55:09, 3.58it/s] 14%|█▎ | 50181/371472 [4:00:20<26:21:14, 3.39it/s] 14%|█▎ | 50182/371472 [4:00:21<25:49:24, 3.46it/s] 14%|█▎ | 50183/371472 [4:00:21<26:29:39, 3.37it/s] 14%|█▎ | 50184/371472 [4:00:21<25:56:27, 3.44it/s] 14%|█▎ | 50185/371472 [4:00:22<27:27:25, 3.25it/s] 14%|█▎ | 50186/371472 [4:00:22<26:55:47, 3.31it/s] 14%|█▎ | 50187/371472 [4:00:22<25:49:09, 3.46it/s] 14%|█▎ | 50188/371472 [4:00:22<24:44:37, 3.61it/s] 14%|█▎ | 50189/371472 [4:00:23<25:18:48, 3.53it/s] 14%|█▎ | 50190/371472 [4:00:23<25:29:00, 3.50it/s] 14%|█▎ | 50191/371472 [4:00:23<27:28:34, 3.25it/s] 14%|█▎ | 50192/371472 [4:00:24<27:42:40, 3.22it/s] 14%|█▎ | 50193/371472 [4:00:24<27:34:08, 3.24it/s] 14%|█▎ | 50194/371472 [4:00:24<28:19:30, 3.15it/s] 14%|█▎ | 50195/371472 [4:00:25<27:34:11, 3.24it/s] 14%|█▎ | 50196/371472 [4:00:25<26:02:07, 3.43it/s] 14%|█▎ | 50197/371472 [4:00:25<24:57:50, 3.57it/s] 14%|█▎ | 50198/371472 [4:00:25<25:10:42, 3.54it/s] 14%|█▎ | 50199/371472 [4:00:26<24:52:58, 3.59it/s] 14%|█▎ | 50200/371472 [4:00:26<24:46:33, 3.60it/s] {'loss': 4.2163, 'learning_rate': 8.787950613027646e-07, 'epoch': 2.16} + 14%|█▎ | 50200/371472 [4:00:26<24:46:33, 3.60it/s] 14%|█▎ | 50201/371472 [4:00:26<27:19:47, 3.27it/s] 14%|█▎ | 50202/371472 [4:00:27<26:32:52, 3.36it/s] 14%|█▎ | 50203/371472 [4:00:27<26:21:48, 3.39it/s] 14%|█▎ | 50204/371472 [4:00:27<25:35:32, 3.49it/s] 14%|█▎ | 50205/371472 [4:00:27<24:22:36, 3.66it/s] 14%|█▎ | 50206/371472 [4:00:28<23:46:11, 3.75it/s] 14%|█▎ | 50207/371472 [4:00:28<26:03:33, 3.42it/s] 14%|█▎ | 50208/371472 [4:00:28<26:10:00, 3.41it/s] 14%|█▎ | 50209/371472 [4:00:29<25:24:36, 3.51it/s] 14%|█▎ | 50210/371472 [4:00:29<24:33:58, 3.63it/s] 14%|█▎ | 50211/371472 [4:00:29<24:18:20, 3.67it/s] 14%|█▎ | 50212/371472 [4:00:29<23:48:53, 3.75it/s] 14%|█▎ | 50213/371472 [4:00:30<27:26:13, 3.25it/s] 14%|█▎ | 50214/371472 [4:00:30<26:19:38, 3.39it/s] 14%|█▎ | 50215/371472 [4:00:30<25:06:55, 3.55it/s] 14%|█▎ | 50216/371472 [4:00:31<25:01:31, 3.57it/s] 14%|█▎ | 50217/371472 [4:00:31<24:43:23, 3.61it/s] 14%|█▎ | 50218/371472 [4:00:31<23:47:55, 3.75it/s] 14%|█▎ | 50219/371472 [4:00:31<25:08:11, 3.55it/s] 14%|█▎ | 50220/371472 [4:00:32<24:06:27, 3.70it/s] {'loss': 4.226, 'learning_rate': 8.787465793272856e-07, 'epoch': 2.16} + 14%|█▎ | 50220/371472 [4:00:32<24:06:27, 3.70it/s] 14%|█▎ | 50221/371472 [4:00:32<23:15:49, 3.84it/s] 14%|█▎ | 50222/371472 [4:00:32<23:29:57, 3.80it/s] 14%|█▎ | 50223/371472 [4:00:32<23:20:45, 3.82it/s] 14%|█▎ | 50224/371472 [4:00:33<22:43:36, 3.93it/s] 14%|█▎ | 50225/371472 [4:00:33<23:10:59, 3.85it/s] 14%|█▎ | 50226/371472 [4:00:33<24:02:25, 3.71it/s] 14%|█▎ | 50227/371472 [4:00:33<23:25:18, 3.81it/s] 14%|█▎ | 50228/371472 [4:00:34<23:30:15, 3.80it/s] 14%|█▎ | 50229/371472 [4:00:34<24:52:58, 3.59it/s] 14%|█▎ | 50230/371472 [4:00:34<24:33:50, 3.63it/s] 14%|█▎ | 50231/371472 [4:00:35<24:59:56, 3.57it/s] 14%|█▎ | 50232/371472 [4:00:35<31:45:17, 2.81it/s] 14%|█▎ | 50233/371472 [4:00:35<29:47:52, 2.99it/s] 14%|█▎ | 50234/371472 [4:00:36<27:22:27, 3.26it/s] 14%|█▎ | 50235/371472 [4:00:36<26:18:52, 3.39it/s] 14%|█▎ | 50236/371472 [4:00:36<25:35:11, 3.49it/s] 14%|█▎ | 50237/371472 [4:00:36<24:19:08, 3.67it/s] 14%|█▎ | 50238/371472 [4:00:37<23:14:50, 3.84it/s] 14%|█▎ | 50239/371472 [4:00:37<23:13:43, 3.84it/s] 14%|█▎ | 50240/371472 [4:00:37<22:40:59, 3.93it/s] {'loss': 4.2411, 'learning_rate': 8.786980973518067e-07, 'epoch': 2.16} + 14%|█▎ | 50240/371472 [4:00:37<22:40:59, 3.93it/s] 14%|█▎ | 50241/371472 [4:00:37<22:49:39, 3.91it/s] 14%|█▎ | 50242/371472 [4:00:38<23:23:38, 3.81it/s] 14%|█▎ | 50243/371472 [4:00:38<23:40:44, 3.77it/s] 14%|█▎ | 50244/371472 [4:00:38<23:27:58, 3.80it/s] 14%|█▎ | 50245/371472 [4:00:38<23:38:53, 3.77it/s] 14%|█▎ | 50246/371472 [4:00:39<26:05:54, 3.42it/s] 14%|█▎ | 50247/371472 [4:00:39<25:29:39, 3.50it/s] 14%|█▎ | 50248/371472 [4:00:39<27:10:05, 3.28it/s] 14%|█▎ | 50249/371472 [4:00:40<25:46:48, 3.46it/s] 14%|█▎ | 50250/371472 [4:00:40<25:49:20, 3.46it/s] 14%|█▎ | 50251/371472 [4:00:40<25:52:18, 3.45it/s] 14%|█▎ | 50252/371472 [4:00:41<26:02:47, 3.43it/s] 14%|█▎ | 50253/371472 [4:00:41<25:29:40, 3.50it/s] 14%|█▎ | 50254/371472 [4:00:41<25:07:24, 3.55it/s] 14%|█▎ | 50255/371472 [4:00:41<24:47:46, 3.60it/s] 14%|█▎ | 50256/371472 [4:00:42<24:02:13, 3.71it/s] 14%|█▎ | 50257/371472 [4:00:42<25:42:18, 3.47it/s] 14%|█▎ | 50258/371472 [4:00:42<26:22:25, 3.38it/s] 14%|█▎ | 50259/371472 [4:00:43<24:52:29, 3.59it/s] 14%|█▎ | 50260/371472 [4:00:43<25:54:50, 3.44it/s] {'loss': 4.4795, 'learning_rate': 8.786496153763279e-07, 'epoch': 2.16} + 14%|█▎ | 50260/371472 [4:00:43<25:54:50, 3.44it/s] 14%|█▎ | 50261/371472 [4:00:43<25:54:26, 3.44it/s] 14%|█▎ | 50262/371472 [4:00:43<25:43:41, 3.47it/s] 14%|█▎ | 50263/371472 [4:00:44<26:23:37, 3.38it/s] 14%|█▎ | 50264/371472 [4:00:44<27:49:00, 3.21it/s] 14%|█▎ | 50265/371472 [4:00:44<25:48:57, 3.46it/s] 14%|█▎ | 50266/371472 [4:00:45<25:05:35, 3.56it/s] 14%|█▎ | 50267/371472 [4:00:45<25:27:58, 3.50it/s] 14%|█▎ | 50268/371472 [4:00:45<26:07:27, 3.42it/s] 14%|█▎ | 50269/371472 [4:00:45<25:07:31, 3.55it/s] 14%|█▎ | 50270/371472 [4:00:46<26:00:07, 3.43it/s] 14%|█▎ | 50271/371472 [4:00:46<31:10:24, 2.86it/s] 14%|█▎ | 50272/371472 [4:00:47<30:49:26, 2.89it/s] 14%|█▎ | 50273/371472 [4:00:47<29:57:44, 2.98it/s] 14%|█▎ | 50274/371472 [4:00:47<27:25:25, 3.25it/s] 14%|█▎ | 50275/371472 [4:00:47<27:02:35, 3.30it/s] 14%|█▎ | 50276/371472 [4:00:48<26:48:25, 3.33it/s] 14%|█▎ | 50277/371472 [4:00:48<27:31:22, 3.24it/s] 14%|█▎ | 50278/371472 [4:00:48<27:48:07, 3.21it/s] 14%|█▎ | 50279/371472 [4:00:49<26:27:15, 3.37it/s] 14%|█▎ | 50280/371472 [4:00:49<27:07:17, 3.29it/s] {'loss': 4.0183, 'learning_rate': 8.786011334008489e-07, 'epoch': 2.17} + 14%|█▎ | 50280/371472 [4:00:49<27:07:17, 3.29it/s] 14%|█▎ | 50281/371472 [4:00:49<25:47:56, 3.46it/s] 14%|█▎ | 50282/371472 [4:00:50<26:25:03, 3.38it/s] 14%|█▎ | 50283/371472 [4:00:50<26:30:55, 3.36it/s] 14%|█▎ | 50284/371472 [4:00:50<26:01:14, 3.43it/s] 14%|█▎ | 50285/371472 [4:00:50<26:59:02, 3.31it/s] 14%|█▎ | 50286/371472 [4:00:51<26:26:57, 3.37it/s] 14%|█▎ | 50287/371472 [4:00:51<26:53:21, 3.32it/s] 14%|█▎ | 50288/371472 [4:00:51<26:01:53, 3.43it/s] 14%|█▎ | 50289/371472 [4:00:52<25:26:56, 3.51it/s] 14%|█▎ | 50290/371472 [4:00:52<25:13:37, 3.54it/s] 14%|█▎ | 50291/371472 [4:00:52<25:31:03, 3.50it/s] 14%|█▎ | 50292/371472 [4:00:52<25:16:07, 3.53it/s] 14%|█▎ | 50293/371472 [4:00:53<24:53:47, 3.58it/s] 14%|█▎ | 50294/371472 [4:00:53<24:48:25, 3.60it/s] 14%|█▎ | 50295/371472 [4:00:53<24:37:33, 3.62it/s] 14%|█▎ | 50296/371472 [4:00:54<25:09:41, 3.55it/s] 14%|█▎ | 50297/371472 [4:00:54<25:00:56, 3.57it/s] 14%|█▎ | 50298/371472 [4:00:54<25:51:42, 3.45it/s] 14%|█▎ | 50299/371472 [4:00:54<26:09:02, 3.41it/s] 14%|█▎ | 50300/371472 [4:00:55<25:14:05, 3.54it/s] {'loss': 4.1343, 'learning_rate': 8.7855265142537e-07, 'epoch': 2.17} + 14%|█▎ | 50300/371472 [4:00:55<25:14:05, 3.54it/s] 14%|█▎ | 50301/371472 [4:00:55<25:02:09, 3.56it/s] 14%|█▎ | 50302/371472 [4:00:55<24:21:24, 3.66it/s] 14%|█▎ | 50303/371472 [4:00:55<23:55:05, 3.73it/s] 14%|█▎ | 50304/371472 [4:00:56<24:16:54, 3.67it/s] 14%|█▎ | 50305/371472 [4:00:56<24:57:55, 3.57it/s] 14%|█▎ | 50306/371472 [4:00:56<24:09:50, 3.69it/s] 14%|█▎ | 50307/371472 [4:00:57<23:21:33, 3.82it/s] 14%|█▎ | 50308/371472 [4:00:57<24:51:24, 3.59it/s] 14%|█▎ | 50309/371472 [4:00:57<26:05:08, 3.42it/s] 14%|█▎ | 50310/371472 [4:00:57<25:22:44, 3.52it/s] 14%|█▎ | 50311/371472 [4:00:58<26:09:43, 3.41it/s] 14%|█▎ | 50312/371472 [4:00:58<26:09:22, 3.41it/s] 14%|█▎ | 50313/371472 [4:00:58<26:01:54, 3.43it/s] 14%|█▎ | 50314/371472 [4:00:59<24:51:37, 3.59it/s] 14%|█▎ | 50315/371472 [4:00:59<24:03:10, 3.71it/s] 14%|█▎ | 50316/371472 [4:00:59<24:09:19, 3.69it/s] 14%|█▎ | 50317/371472 [4:00:59<24:23:01, 3.66it/s] 14%|█▎ | 50318/371472 [4:01:00<24:01:26, 3.71it/s] 14%|█▎ | 50319/371472 [4:01:00<24:34:29, 3.63it/s] 14%|█▎ | 50320/371472 [4:01:00<24:25:35, 3.65it/s] {'loss': 4.1272, 'learning_rate': 8.785041694498912e-07, 'epoch': 2.17} + 14%|█▎ | 50320/371472 [4:01:00<24:25:35, 3.65it/s] 14%|█▎ | 50321/371472 [4:01:00<23:44:16, 3.76it/s] 14%|█▎ | 50322/371472 [4:01:01<26:06:48, 3.42it/s] 14%|█▎ | 50323/371472 [4:01:01<28:07:33, 3.17it/s] 14%|█▎ | 50324/371472 [4:01:02<28:58:29, 3.08it/s] 14%|█▎ | 50325/371472 [4:01:02<27:03:13, 3.30it/s] 14%|█▎ | 50326/371472 [4:01:02<26:45:33, 3.33it/s] 14%|█▎ | 50327/371472 [4:01:02<25:48:08, 3.46it/s] 14%|█▎ | 50328/371472 [4:01:03<25:18:51, 3.52it/s] 14%|█▎ | 50329/371472 [4:01:03<24:50:48, 3.59it/s] 14%|█▎ | 50330/371472 [4:01:03<25:51:02, 3.45it/s] 14%|█▎ | 50331/371472 [4:01:03<25:43:42, 3.47it/s] 14%|█▎ | 50332/371472 [4:01:04<24:34:23, 3.63it/s] 14%|█▎ | 50333/371472 [4:01:04<23:34:51, 3.78it/s] 14%|█▎ | 50334/371472 [4:01:04<24:20:59, 3.66it/s] 14%|█▎ | 50335/371472 [4:01:05<23:40:14, 3.77it/s] 14%|█▎ | 50336/371472 [4:01:05<23:53:46, 3.73it/s] 14%|█▎ | 50337/371472 [4:01:05<23:54:45, 3.73it/s] 14%|█▎ | 50338/371472 [4:01:05<23:49:07, 3.75it/s] 14%|█▎ | 50339/371472 [4:01:06<23:51:08, 3.74it/s] 14%|█▎ | 50340/371472 [4:01:06<23:55:26, 3.73it/s] {'loss': 4.4295, 'learning_rate': 8.784556874744123e-07, 'epoch': 2.17} + 14%|█▎ | 50340/371472 [4:01:06<23:55:26, 3.73it/s] 14%|█▎ | 50341/371472 [4:01:06<25:33:53, 3.49it/s] 14%|█▎ | 50342/371472 [4:01:06<24:33:02, 3.63it/s] 14%|█▎ | 50343/371472 [4:01:07<24:21:37, 3.66it/s] 14%|█▎ | 50344/371472 [4:01:07<24:38:52, 3.62it/s] 14%|█▎ | 50345/371472 [4:01:07<24:02:05, 3.71it/s] 14%|█▎ | 50346/371472 [4:01:07<23:47:11, 3.75it/s] 14%|█▎ | 50347/371472 [4:01:08<23:13:14, 3.84it/s] 14%|█▎ | 50348/371472 [4:01:08<23:48:51, 3.75it/s] 14%|█▎ | 50349/371472 [4:01:08<25:18:22, 3.52it/s] 14%|█▎ | 50350/371472 [4:01:09<25:44:03, 3.47it/s] 14%|█▎ | 50351/371472 [4:01:09<24:26:48, 3.65it/s] 14%|█▎ | 50352/371472 [4:01:09<23:59:39, 3.72it/s] 14%|█▎ | 50353/371472 [4:01:09<23:54:32, 3.73it/s] 14%|█▎ | 50354/371472 [4:01:10<22:52:17, 3.90it/s] 14%|█▎ | 50355/371472 [4:01:10<23:54:09, 3.73it/s] 14%|█▎ | 50356/371472 [4:01:10<23:52:29, 3.74it/s] 14%|█▎ | 50357/371472 [4:01:10<23:04:11, 3.87it/s] 14%|█▎ | 50358/371472 [4:01:11<24:04:20, 3.71it/s] 14%|█▎ | 50359/371472 [4:01:11<24:29:26, 3.64it/s] 14%|█▎ | 50360/371472 [4:01:11<24:45:11, 3.60it/s] {'loss': 4.179, 'learning_rate': 8.784072054989334e-07, 'epoch': 2.17} + 14%|█▎ | 50360/371472 [4:01:11<24:45:11, 3.60it/s] 14%|█▎ | 50361/371472 [4:01:12<24:06:18, 3.70it/s] 14%|█▎ | 50362/371472 [4:01:12<27:08:24, 3.29it/s] 14%|█▎ | 50363/371472 [4:01:12<26:08:13, 3.41it/s] 14%|█▎ | 50364/371472 [4:01:13<26:42:50, 3.34it/s] 14%|█▎ | 50365/371472 [4:01:13<25:25:37, 3.51it/s] 14%|█▎ | 50366/371472 [4:01:13<25:22:11, 3.52it/s] 14%|█▎ | 50367/371472 [4:01:13<25:34:12, 3.49it/s] 14%|█▎ | 50368/371472 [4:01:14<24:39:42, 3.62it/s] 14%|█▎ | 50369/371472 [4:01:14<26:44:02, 3.34it/s] 14%|█▎ | 50370/371472 [4:01:14<25:41:17, 3.47it/s] 14%|█▎ | 50371/371472 [4:01:14<25:43:06, 3.47it/s] 14%|█▎ | 50372/371472 [4:01:15<24:40:09, 3.62it/s] 14%|█▎ | 50373/371472 [4:01:15<24:24:20, 3.65it/s] 14%|█▎ | 50374/371472 [4:01:15<25:23:37, 3.51it/s] 14%|█▎ | 50375/371472 [4:01:16<24:42:06, 3.61it/s] 14%|█▎ | 50376/371472 [4:01:16<23:58:00, 3.72it/s] 14%|█▎ | 50377/371472 [4:01:16<24:27:40, 3.65it/s] 14%|█▎ | 50378/371472 [4:01:16<24:01:37, 3.71it/s] 14%|█▎ | 50379/371472 [4:01:17<23:25:19, 3.81it/s] 14%|█▎ | 50380/371472 [4:01:17<23:46:09, 3.75it/s] {'loss': 4.4046, 'learning_rate': 8.783587235234545e-07, 'epoch': 2.17} + 14%|█▎ | 50380/371472 [4:01:17<23:46:09, 3.75it/s] 14%|█▎ | 50381/371472 [4:01:17<23:23:10, 3.81it/s] 14%|█▎ | 50382/371472 [4:01:17<24:47:37, 3.60it/s] 14%|█▎ | 50383/371472 [4:01:18<25:23:14, 3.51it/s] 14%|█▎ | 50384/371472 [4:01:18<32:25:39, 2.75it/s] 14%|█▎ | 50385/371472 [4:01:19<29:40:29, 3.01it/s] 14%|█▎ | 50386/371472 [4:01:19<29:29:47, 3.02it/s] 14%|█▎ | 50387/371472 [4:01:19<27:24:08, 3.25it/s] 14%|█▎ | 50388/371472 [4:01:19<25:59:12, 3.43it/s] 14%|█▎ | 50389/371472 [4:01:20<24:37:30, 3.62it/s] 14%|█▎ | 50390/371472 [4:01:20<25:36:15, 3.48it/s] 14%|█▎ | 50391/371472 [4:01:20<24:38:29, 3.62it/s] 14%|█▎ | 50392/371472 [4:01:21<25:53:18, 3.45it/s] 14%|█▎ | 50393/371472 [4:01:21<26:57:30, 3.31it/s] 14%|█▎ | 50394/371472 [4:01:21<29:18:03, 3.04it/s] 14%|█▎ | 50395/371472 [4:01:22<27:28:18, 3.25it/s] 14%|█▎ | 50396/371472 [4:01:22<26:44:21, 3.34it/s] 14%|█▎ | 50397/371472 [4:01:22<25:06:29, 3.55it/s] 14%|█▎ | 50398/371472 [4:01:22<24:59:07, 3.57it/s] 14%|█▎ | 50399/371472 [4:01:23<23:55:03, 3.73it/s] 14%|█▎ | 50400/371472 [4:01:23<23:02:21, 3.87it/s] {'loss': 4.3233, 'learning_rate': 8.783102415479755e-07, 'epoch': 2.17} + 14%|█▎ | 50400/371472 [4:01:23<23:02:21, 3.87it/s] 14%|█▎ | 50401/371472 [4:01:23<23:18:47, 3.83it/s] 14%|█▎ | 50402/371472 [4:01:23<23:37:17, 3.78it/s] 14%|█▎ | 50403/371472 [4:01:24<22:56:04, 3.89it/s] 14%|█▎ | 50404/371472 [4:01:24<23:24:12, 3.81it/s] 14%|█▎ | 50405/371472 [4:01:24<23:17:03, 3.83it/s] 14%|█▎ | 50406/371472 [4:01:24<23:52:18, 3.74it/s] 14%|█▎ | 50407/371472 [4:01:25<25:34:33, 3.49it/s] 14%|█▎ | 50408/371472 [4:01:25<24:08:44, 3.69it/s] 14%|█▎ | 50409/371472 [4:01:25<24:05:05, 3.70it/s] 14%|█▎ | 50410/371472 [4:01:25<23:42:00, 3.76it/s] 14%|█▎ | 50411/371472 [4:01:26<24:06:07, 3.70it/s] 14%|█▎ | 50412/371472 [4:01:26<23:35:17, 3.78it/s] 14%|█▎ | 50413/371472 [4:01:26<24:13:45, 3.68it/s] 14%|█▎ | 50414/371472 [4:01:27<23:59:48, 3.72it/s] 14%|█▎ | 50415/371472 [4:01:27<23:21:32, 3.82it/s] 14%|█▎ | 50416/371472 [4:01:27<23:38:23, 3.77it/s] 14%|█▎ | 50417/371472 [4:01:27<23:17:31, 3.83it/s] 14%|█▎ | 50418/371472 [4:01:28<23:37:14, 3.78it/s] 14%|█▎ | 50419/371472 [4:01:28<23:05:37, 3.86it/s] 14%|█▎ | 50420/371472 [4:01:28<24:15:37, 3.68it/s] {'loss': 4.0627, 'learning_rate': 8.782617595724967e-07, 'epoch': 2.17} + 14%|█▎ | 50420/371472 [4:01:28<24:15:37, 3.68it/s] 14%|█▎ | 50421/371472 [4:01:28<23:52:04, 3.74it/s] 14%|█▎ | 50422/371472 [4:01:29<24:45:23, 3.60it/s] 14%|█▎ | 50423/371472 [4:01:29<24:41:22, 3.61it/s] 14%|█▎ | 50424/371472 [4:01:29<24:09:49, 3.69it/s] 14%|█▎ | 50425/371472 [4:01:29<23:36:57, 3.78it/s] 14%|█▎ | 50426/371472 [4:01:30<25:23:39, 3.51it/s] 14%|█▎ | 50427/371472 [4:01:30<26:39:36, 3.35it/s] 14%|█▎ | 50428/371472 [4:01:30<25:31:04, 3.49it/s] 14%|█▎ | 50429/371472 [4:01:31<25:05:44, 3.55it/s] 14%|█▎ | 50430/371472 [4:01:31<24:12:19, 3.68it/s] 14%|█▎ | 50431/371472 [4:01:31<23:46:41, 3.75it/s] 14%|█▎ | 50432/371472 [4:01:31<24:32:41, 3.63it/s] 14%|█▎ | 50433/371472 [4:01:32<24:48:03, 3.60it/s] 14%|█▎ | 50434/371472 [4:01:32<24:27:57, 3.64it/s] 14%|█▎ | 50435/371472 [4:01:32<23:16:38, 3.83it/s] 14%|█▎ | 50436/371472 [4:01:33<22:50:15, 3.90it/s] 14%|█▎ | 50437/371472 [4:01:33<25:50:00, 3.45it/s] 14%|█▎ | 50438/371472 [4:01:33<27:35:52, 3.23it/s] 14%|█▎ | 50439/371472 [4:01:34<26:48:28, 3.33it/s] 14%|█▎ | 50440/371472 [4:01:34<27:12:28, 3.28it/s] {'loss': 4.2086, 'learning_rate': 8.782132775970177e-07, 'epoch': 2.17} + 14%|█▎ | 50440/371472 [4:01:34<27:12:28, 3.28it/s] 14%|█▎ | 50441/371472 [4:01:34<27:04:35, 3.29it/s] 14%|█▎ | 50442/371472 [4:01:34<25:30:07, 3.50it/s] 14%|█▎ | 50443/371472 [4:01:35<25:34:02, 3.49it/s] 14%|█▎ | 50444/371472 [4:01:35<24:35:28, 3.63it/s] 14%|█▎ | 50445/371472 [4:01:35<23:27:47, 3.80it/s] 14%|█▎ | 50446/371472 [4:01:35<25:36:19, 3.48it/s] 14%|█▎ | 50447/371472 [4:01:36<25:00:46, 3.57it/s] 14%|█▎ | 50448/371472 [4:01:36<25:23:20, 3.51it/s] 14%|█▎ | 50449/371472 [4:01:36<24:54:17, 3.58it/s] 14%|█▎ | 50450/371472 [4:01:37<24:35:25, 3.63it/s] 14%|█▎ | 50451/371472 [4:01:37<23:46:25, 3.75it/s] 14%|█▎ | 50452/371472 [4:01:37<24:03:45, 3.71it/s] 14%|█▎ | 50453/371472 [4:01:37<24:22:33, 3.66it/s] 14%|█▎ | 50454/371472 [4:01:38<24:34:15, 3.63it/s] 14%|█▎ | 50455/371472 [4:01:38<25:34:21, 3.49it/s] 14%|█▎ | 50456/371472 [4:01:38<25:06:23, 3.55it/s] 14%|█▎ | 50457/371472 [4:01:38<24:05:43, 3.70it/s] 14%|█▎ | 50458/371472 [4:01:39<23:40:50, 3.77it/s] 14%|█▎ | 50459/371472 [4:01:39<24:55:56, 3.58it/s] 14%|█▎ | 50460/371472 [4:01:39<24:32:18, 3.63it/s] {'loss': 4.2934, 'learning_rate': 8.781647956215389e-07, 'epoch': 2.17} + 14%|█▎ | 50460/371472 [4:01:39<24:32:18, 3.63it/s] 14%|█▎ | 50461/371472 [4:01:40<23:41:29, 3.76it/s] 14%|█▎ | 50462/371472 [4:01:40<24:07:31, 3.70it/s] 14%|█▎ | 50463/371472 [4:01:40<25:17:22, 3.53it/s] 14%|█▎ | 50464/371472 [4:01:40<23:52:19, 3.74it/s] 14%|█▎ | 50465/371472 [4:01:41<26:28:01, 3.37it/s] 14%|█▎ | 50466/371472 [4:01:41<26:43:10, 3.34it/s] 14%|█▎ | 50467/371472 [4:01:41<27:51:31, 3.20it/s] 14%|█▎ | 50468/371472 [4:01:42<26:01:20, 3.43it/s] 14%|█▎ | 50469/371472 [4:01:42<24:57:12, 3.57it/s] 14%|█▎ | 50470/371472 [4:01:42<26:51:14, 3.32it/s] 14%|█▎ | 50471/371472 [4:01:42<25:16:38, 3.53it/s] 14%|█▎ | 50472/371472 [4:01:43<25:52:22, 3.45it/s] 14%|█▎ | 50473/371472 [4:01:43<24:46:05, 3.60it/s] 14%|█▎ | 50474/371472 [4:01:43<25:03:02, 3.56it/s] 14%|█▎ | 50475/371472 [4:01:44<24:36:30, 3.62it/s] 14%|█▎ | 50476/371472 [4:01:44<25:15:05, 3.53it/s] 14%|█▎ | 50477/371472 [4:01:44<26:06:50, 3.41it/s] 14%|█▎ | 50478/371472 [4:01:44<25:26:47, 3.50it/s] 14%|█▎ | 50479/371472 [4:01:45<26:01:24, 3.43it/s] 14%|█▎ | 50480/371472 [4:01:45<25:07:36, 3.55it/s] {'loss': 4.3135, 'learning_rate': 8.7811631364606e-07, 'epoch': 2.17} + 14%|█▎ | 50480/371472 [4:01:45<25:07:36, 3.55it/s] 14%|█▎ | 50481/371472 [4:01:45<24:22:50, 3.66it/s] 14%|█▎ | 50482/371472 [4:01:46<23:57:54, 3.72it/s] 14%|█▎ | 50483/371472 [4:01:46<23:24:42, 3.81it/s] 14%|█▎ | 50484/371472 [4:01:46<24:41:04, 3.61it/s] 14%|█▎ | 50485/371472 [4:01:46<25:03:43, 3.56it/s] 14%|█▎ | 50486/371472 [4:01:47<25:29:45, 3.50it/s] 14%|█▎ | 50487/371472 [4:01:47<25:29:26, 3.50it/s] 14%|█▎ | 50488/371472 [4:01:47<24:57:40, 3.57it/s] 14%|█▎ | 50489/371472 [4:01:48<25:33:36, 3.49it/s] 14%|█▎ | 50490/371472 [4:01:48<26:58:14, 3.31it/s] 14%|█▎ | 50491/371472 [4:01:48<27:21:16, 3.26it/s] 14%|█▎ | 50492/371472 [4:01:48<26:39:27, 3.34it/s] 14%|█▎ | 50493/371472 [4:01:49<25:13:21, 3.53it/s] 14%|█▎ | 50494/371472 [4:01:49<26:35:57, 3.35it/s] 14%|█▎ | 50495/371472 [4:01:49<25:38:03, 3.48it/s] 14%|█▎ | 50496/371472 [4:01:50<25:50:00, 3.45it/s] 14%|█▎ | 50497/371472 [4:01:50<25:55:20, 3.44it/s] 14%|█▎ | 50498/371472 [4:01:50<24:57:03, 3.57it/s] 14%|█▎ | 50499/371472 [4:01:50<24:03:01, 3.71it/s] 14%|█▎ | 50500/371472 [4:01:51<23:57:43, 3.72it/s] {'loss': 4.0578, 'learning_rate': 8.780678316705812e-07, 'epoch': 2.18} + 14%|█▎ | 50500/371472 [4:01:51<23:57:43, 3.72it/s] 14%|█▎ | 50501/371472 [4:01:51<23:11:07, 3.85it/s] 14%|█▎ | 50502/371472 [4:01:51<23:23:31, 3.81it/s] 14%|█▎ | 50503/371472 [4:01:51<22:46:11, 3.92it/s] 14%|█▎ | 50504/371472 [4:01:52<22:53:45, 3.89it/s] 14%|█▎ | 50505/371472 [4:01:52<22:52:10, 3.90it/s] 14%|█▎ | 50506/371472 [4:01:52<23:17:19, 3.83it/s] 14%|█▎ | 50507/371472 [4:01:53<24:04:32, 3.70it/s] 14%|█▎ | 50508/371472 [4:01:53<24:32:47, 3.63it/s] 14%|█▎ | 50509/371472 [4:01:53<25:22:13, 3.51it/s] 14%|█▎ | 50510/371472 [4:01:53<27:45:19, 3.21it/s] 14%|█▎ | 50511/371472 [4:01:54<26:36:46, 3.35it/s] 14%|█▎ | 50512/371472 [4:01:54<27:35:29, 3.23it/s] 14%|█▎ | 50513/371472 [4:01:54<26:38:40, 3.35it/s] 14%|█▎ | 50514/371472 [4:01:55<27:23:02, 3.26it/s] 14%|█▎ | 50515/371472 [4:01:55<26:46:24, 3.33it/s] 14%|█▎ | 50516/371472 [4:01:55<26:08:13, 3.41it/s] 14%|█▎ | 50517/371472 [4:01:56<27:02:30, 3.30it/s] 14%|█▎ | 50518/371472 [4:01:56<25:39:05, 3.48it/s] 14%|█▎ | 50519/371472 [4:01:56<26:22:39, 3.38it/s] 14%|█▎ | 50520/371472 [4:01:56<26:10:22, 3.41it/s] {'loss': 4.1289, 'learning_rate': 8.780193496951022e-07, 'epoch': 2.18} + 14%|█▎ | 50520/371472 [4:01:56<26:10:22, 3.41it/s] 14%|█▎ | 50521/371472 [4:01:57<25:20:38, 3.52it/s] 14%|█▎ | 50522/371472 [4:01:57<26:26:08, 3.37it/s] 14%|█▎ | 50523/371472 [4:01:57<25:44:38, 3.46it/s] 14%|█▎ | 50524/371472 [4:01:58<26:34:27, 3.35it/s] 14%|█▎ | 50525/371472 [4:01:58<27:27:55, 3.25it/s] 14%|█▎ | 50526/371472 [4:01:58<27:15:58, 3.27it/s] 14%|█▎ | 50527/371472 [4:01:58<25:42:47, 3.47it/s] 14%|█▎ | 50528/371472 [4:01:59<26:22:14, 3.38it/s] 14%|█▎ | 50529/371472 [4:01:59<25:43:56, 3.46it/s] 14%|█▎ | 50530/371472 [4:01:59<25:45:13, 3.46it/s] 14%|█▎ | 50531/371472 [4:02:00<24:22:32, 3.66it/s] 14%|█▎ | 50532/371472 [4:02:00<24:12:57, 3.68it/s] 14%|█▎ | 50533/371472 [4:02:00<25:28:11, 3.50it/s] 14%|█▎ | 50534/371472 [4:02:00<24:20:28, 3.66it/s] 14%|█▎ | 50535/371472 [4:02:01<25:41:46, 3.47it/s] 14%|█▎ | 50536/371472 [4:02:01<26:02:24, 3.42it/s] 14%|█▎ | 50537/371472 [4:02:01<26:08:25, 3.41it/s] 14%|█▎ | 50538/371472 [4:02:02<25:15:28, 3.53it/s] 14%|█▎ | 50539/371472 [4:02:02<24:22:35, 3.66it/s] 14%|█▎ | 50540/371472 [4:02:02<25:17:06, 3.53it/s] {'loss': 4.2016, 'learning_rate': 8.779708677196233e-07, 'epoch': 2.18} + 14%|█▎ | 50540/371472 [4:02:02<25:17:06, 3.53it/s] 14%|█▎ | 50541/371472 [4:02:02<24:49:46, 3.59it/s] 14%|█▎ | 50542/371472 [4:02:03<24:00:07, 3.71it/s] 14%|█▎ | 50543/371472 [4:02:03<23:49:13, 3.74it/s] 14%|█▎ | 50544/371472 [4:02:03<23:31:56, 3.79it/s] 14%|█▎ | 50545/371472 [4:02:03<23:15:03, 3.83it/s] 14%|█▎ | 50546/371472 [4:02:04<25:12:28, 3.54it/s] 14%|█▎ | 50547/371472 [4:02:04<24:26:14, 3.65it/s] 14%|█▎ | 50548/371472 [4:02:04<26:49:29, 3.32it/s] 14%|█▎ | 50549/371472 [4:02:05<27:34:51, 3.23it/s] 14%|█▎ | 50550/371472 [4:02:05<27:12:06, 3.28it/s] 14%|█▎ | 50551/371472 [4:02:05<26:38:03, 3.35it/s] 14%|█▎ | 50552/371472 [4:02:06<25:40:53, 3.47it/s] 14%|█▎ | 50553/371472 [4:02:06<25:08:32, 3.55it/s] 14%|█▎ | 50554/371472 [4:02:06<24:26:28, 3.65it/s] 14%|█▎ | 50555/371472 [4:02:06<23:27:58, 3.80it/s] 14%|█▎ | 50556/371472 [4:02:07<24:16:35, 3.67it/s] 14%|█▎ | 50557/371472 [4:02:07<24:19:04, 3.67it/s] 14%|█▎ | 50558/371472 [4:02:07<24:29:10, 3.64it/s] 14%|█▎ | 50559/371472 [4:02:08<25:38:31, 3.48it/s] 14%|█▎ | 50560/371472 [4:02:08<28:07:42, 3.17it/s] {'loss': 4.0938, 'learning_rate': 8.779223857441444e-07, 'epoch': 2.18} + 14%|█▎ | 50560/371472 [4:02:08<28:07:42, 3.17it/s] 14%|█▎ | 50561/371472 [4:02:08<28:22:38, 3.14it/s] 14%|█▎ | 50562/371472 [4:02:08<26:54:40, 3.31it/s] 14%|█▎ | 50563/371472 [4:02:09<26:04:19, 3.42it/s] 14%|█▎ | 50564/371472 [4:02:09<24:58:59, 3.57it/s] 14%|█▎ | 50565/371472 [4:02:09<24:26:50, 3.65it/s] 14%|█▎ | 50566/371472 [4:02:10<23:39:28, 3.77it/s] 14%|█▎ | 50567/371472 [4:02:10<24:27:43, 3.64it/s] 14%|█▎ | 50568/371472 [4:02:10<23:32:25, 3.79it/s] 14%|█▎ | 50569/371472 [4:02:10<23:30:57, 3.79it/s] 14%|█▎ | 50570/371472 [4:02:11<23:23:31, 3.81it/s] 14%|█▎ | 50571/371472 [4:02:11<23:06:39, 3.86it/s] 14%|█▎ | 50572/371472 [4:02:11<24:45:32, 3.60it/s] 14%|█▎ | 50573/371472 [4:02:11<24:13:03, 3.68it/s] 14%|█▎ | 50574/371472 [4:02:12<23:31:07, 3.79it/s] 14%|█▎ | 50575/371472 [4:02:12<25:11:06, 3.54it/s] 14%|█▎ | 50576/371472 [4:02:12<25:50:15, 3.45it/s] 14%|█▎ | 50577/371472 [4:02:13<26:15:53, 3.39it/s] 14%|█▎ | 50578/371472 [4:02:13<26:25:09, 3.37it/s] 14%|█▎ | 50579/371472 [4:02:13<25:08:35, 3.55it/s] 14%|█▎ | 50580/371472 [4:02:14<29:09:03, 3.06it/s] {'loss': 4.345, 'learning_rate': 8.778739037686655e-07, 'epoch': 2.18} + 14%|█▎ | 50580/371472 [4:02:14<29:09:03, 3.06it/s] 14%|█▎ | 50581/371472 [4:02:14<27:10:02, 3.28it/s] 14%|█▎ | 50582/371472 [4:02:14<26:53:49, 3.31it/s] 14%|█▎ | 50583/371472 [4:02:14<27:39:30, 3.22it/s] 14%|█▎ | 50584/371472 [4:02:15<26:49:07, 3.32it/s] 14%|█▎ | 50585/371472 [4:02:15<26:03:57, 3.42it/s] 14%|█▎ | 50586/371472 [4:02:15<24:24:23, 3.65it/s] 14%|█▎ | 50587/371472 [4:02:16<25:22:23, 3.51it/s] 14%|█▎ | 50588/371472 [4:02:16<24:45:35, 3.60it/s] 14%|█▎ | 50589/371472 [4:02:16<23:55:04, 3.73it/s] 14%|█▎ | 50590/371472 [4:02:16<23:07:16, 3.86it/s] 14%|█▎ | 50591/371472 [4:02:17<23:43:21, 3.76it/s] 14%|█▎ | 50592/371472 [4:02:17<23:30:28, 3.79it/s] 14%|█▎ | 50593/371472 [4:02:17<22:59:06, 3.88it/s] 14%|█▎ | 50594/371472 [4:02:17<25:57:50, 3.43it/s] 14%|█▎ | 50595/371472 [4:02:18<26:23:54, 3.38it/s] 14%|█▎ | 50596/371472 [4:02:18<26:05:47, 3.42it/s] 14%|█▎ | 50597/371472 [4:02:18<26:10:24, 3.41it/s] 14%|█▎ | 50598/371472 [4:02:19<25:44:43, 3.46it/s] 14%|█▎ | 50599/371472 [4:02:19<25:24:50, 3.51it/s] 14%|█▎ | 50600/371472 [4:02:19<24:45:22, 3.60it/s] {'loss': 4.3269, 'learning_rate': 8.778254217931866e-07, 'epoch': 2.18} + 14%|█▎ | 50600/371472 [4:02:19<24:45:22, 3.60it/s] 14%|█▎ | 50601/371472 [4:02:19<24:33:32, 3.63it/s] 14%|█▎ | 50602/371472 [4:02:20<26:12:50, 3.40it/s] 14%|█▎ | 50603/371472 [4:02:20<28:23:52, 3.14it/s] 14%|█▎ | 50604/371472 [4:02:20<26:16:16, 3.39it/s] 14%|█▎ | 50605/371472 [4:02:21<26:31:15, 3.36it/s] 14%|█▎ | 50606/371472 [4:02:21<25:09:06, 3.54it/s] 14%|█▎ | 50607/371472 [4:02:21<26:50:41, 3.32it/s] 14%|█▎ | 50608/371472 [4:02:22<26:50:37, 3.32it/s] 14%|█▎ | 50609/371472 [4:02:22<28:01:23, 3.18it/s] 14%|█▎ | 50610/371472 [4:02:22<28:22:09, 3.14it/s] 14%|█▎ | 50611/371472 [4:02:23<27:44:31, 3.21it/s] 14%|█▎ | 50612/371472 [4:02:23<26:05:02, 3.42it/s] 14%|█▎ | 50613/371472 [4:02:23<26:22:22, 3.38it/s] 14%|█▎ | 50614/371472 [4:02:23<25:48:28, 3.45it/s] 14%|█▎ | 50615/371472 [4:02:24<25:58:52, 3.43it/s] 14%|█▎ | 50616/371472 [4:02:24<25:58:49, 3.43it/s] 14%|█▎ | 50617/371472 [4:02:24<26:02:36, 3.42it/s] 14%|█▎ | 50618/371472 [4:02:24<25:06:43, 3.55it/s] 14%|█▎ | 50619/371472 [4:02:25<24:35:02, 3.63it/s] 14%|█▎ | 50620/371472 [4:02:25<24:40:20, 3.61it/s] {'loss': 4.2461, 'learning_rate': 8.777769398177078e-07, 'epoch': 2.18} + 14%|█▎ | 50620/371472 [4:02:25<24:40:20, 3.61it/s] 14%|█▎ | 50621/371472 [4:02:25<23:38:18, 3.77it/s] 14%|█▎ | 50622/371472 [4:02:26<23:29:54, 3.79it/s] 14%|█▎ | 50623/371472 [4:02:26<25:19:04, 3.52it/s] 14%|█▎ | 50624/371472 [4:02:26<26:14:47, 3.40it/s] 14%|█▎ | 50625/371472 [4:02:26<24:47:38, 3.59it/s] 14%|█▎ | 50626/371472 [4:02:27<24:49:09, 3.59it/s] 14%|█▎ | 50627/371472 [4:02:27<26:29:05, 3.37it/s] 14%|█▎ | 50628/371472 [4:02:27<25:10:51, 3.54it/s] 14%|█▎ | 50629/371472 [4:02:28<26:06:29, 3.41it/s] 14%|█▎ | 50630/371472 [4:02:28<25:09:29, 3.54it/s] 14%|█▎ | 50631/371472 [4:02:28<25:37:45, 3.48it/s] 14%|█▎ | 50632/371472 [4:02:28<25:06:04, 3.55it/s] 14%|█▎ | 50633/371472 [4:02:29<24:22:44, 3.66it/s] 14%|█▎ | 50634/371472 [4:02:29<25:10:42, 3.54it/s] 14%|█▎ | 50635/371472 [4:02:29<25:14:59, 3.53it/s] 14%|█▎ | 50636/371472 [4:02:30<24:54:43, 3.58it/s] 14%|█▎ | 50637/371472 [4:02:30<24:39:05, 3.62it/s] 14%|█▎ | 50638/371472 [4:02:30<24:15:20, 3.67it/s] 14%|█▎ | 50639/371472 [4:02:30<23:14:43, 3.83it/s] 14%|█▎ | 50640/371472 [4:02:31<23:20:15, 3.82it/s] {'loss': 4.1303, 'learning_rate': 8.777284578422289e-07, 'epoch': 2.18} + 14%|█▎ | 50640/371472 [4:02:31<23:20:15, 3.82it/s] 14%|█▎ | 50641/371472 [4:02:31<26:05:12, 3.42it/s] 14%|█▎ | 50642/371472 [4:02:31<25:56:34, 3.44it/s] 14%|█▎ | 50643/371472 [4:02:32<27:34:11, 3.23it/s] 14%|█▎ | 50644/371472 [4:02:32<27:03:30, 3.29it/s] 14%|█▎ | 50645/371472 [4:02:32<25:49:22, 3.45it/s] 14%|█▎ | 50646/371472 [4:02:32<24:39:22, 3.61it/s] 14%|█▎ | 50647/371472 [4:02:33<24:13:54, 3.68it/s] 14%|█▎ | 50648/371472 [4:02:33<25:35:10, 3.48it/s] 14%|█▎ | 50649/371472 [4:02:33<24:52:58, 3.58it/s] 14%|█▎ | 50650/371472 [4:02:33<25:04:40, 3.55it/s] 14%|█▎ | 50651/371472 [4:02:34<24:40:10, 3.61it/s] 14%|█▎ | 50652/371472 [4:02:34<25:10:05, 3.54it/s] 14%|█▎ | 50653/371472 [4:02:34<24:26:56, 3.65it/s] 14%|█▎ | 50654/371472 [4:02:35<24:18:13, 3.67it/s] 14%|█▎ | 50655/371472 [4:02:35<25:19:56, 3.52it/s] 14%|█▎ | 50656/371472 [4:02:35<24:57:32, 3.57it/s] 14%|█▎ | 50657/371472 [4:02:35<26:01:33, 3.42it/s] 14%|█▎ | 50658/371472 [4:02:36<25:36:37, 3.48it/s] 14%|█▎ | 50659/371472 [4:02:36<24:32:10, 3.63it/s] 14%|█▎ | 50660/371472 [4:02:36<23:44:33, 3.75it/s] {'loss': 4.2371, 'learning_rate': 8.776799758667499e-07, 'epoch': 2.18} + 14%|█▎ | 50660/371472 [4:02:36<23:44:33, 3.75it/s] 14%|█▎ | 50661/371472 [4:02:37<25:54:03, 3.44it/s] 14%|█▎ | 50662/371472 [4:02:37<25:27:21, 3.50it/s] 14%|█▎ | 50663/371472 [4:02:37<24:46:37, 3.60it/s] 14%|█▎ | 50664/371472 [4:02:37<24:37:42, 3.62it/s] 14%|█▎ | 50665/371472 [4:02:38<25:02:29, 3.56it/s] 14%|█▎ | 50666/371472 [4:02:38<24:45:18, 3.60it/s] 14%|█▎ | 50667/371472 [4:02:38<24:55:11, 3.58it/s] 14%|█▎ | 50668/371472 [4:02:39<24:17:19, 3.67it/s] 14%|█▎ | 50669/371472 [4:02:39<24:14:23, 3.68it/s] 14%|█▎ | 50670/371472 [4:02:39<25:19:44, 3.52it/s] 14%|█▎ | 50671/371472 [4:02:39<25:47:40, 3.45it/s] 14%|█▎ | 50672/371472 [4:02:40<24:18:23, 3.67it/s] 14%|█▎ | 50673/371472 [4:02:40<23:47:07, 3.75it/s] 14%|█▎ | 50674/371472 [4:02:40<24:02:43, 3.71it/s] 14%|█▎ | 50675/371472 [4:02:40<24:50:08, 3.59it/s] 14%|█▎ | 50676/371472 [4:02:41<24:35:08, 3.62it/s] 14%|█▎ | 50677/371472 [4:02:41<24:37:12, 3.62it/s] 14%|█▎ | 50678/371472 [4:02:41<24:08:41, 3.69it/s] 14%|█▎ | 50679/371472 [4:02:42<23:25:28, 3.80it/s] 14%|█▎ | 50680/371472 [4:02:42<23:32:10, 3.79it/s] {'loss': 4.2818, 'learning_rate': 8.77631493891271e-07, 'epoch': 2.18} + 14%|█▎ | 50680/371472 [4:02:42<23:32:10, 3.79it/s] 14%|█▎ | 50681/371472 [4:02:42<23:05:56, 3.86it/s] 14%|█▎ | 50682/371472 [4:02:42<22:56:12, 3.88it/s] 14%|█▎ | 50683/371472 [4:02:43<23:07:58, 3.85it/s] 14%|█▎ | 50684/371472 [4:02:43<23:16:35, 3.83it/s] 14%|█▎ | 50685/371472 [4:02:43<22:55:07, 3.89it/s] 14%|█▎ | 50686/371472 [4:02:43<23:25:11, 3.80it/s] 14%|█▎ | 50687/371472 [4:02:44<24:18:16, 3.67it/s] 14%|█▎ | 50688/371472 [4:02:44<24:07:56, 3.69it/s] 14%|█▎ | 50689/371472 [4:02:44<23:53:34, 3.73it/s] 14%|█▎ | 50690/371472 [4:02:44<24:55:26, 3.58it/s] 14%|█▎ | 50691/371472 [4:02:45<24:07:18, 3.69it/s] 14%|█▎ | 50692/371472 [4:02:45<23:13:40, 3.84it/s] 14%|█▎ | 50693/371472 [4:02:45<23:06:24, 3.86it/s] 14%|█▎ | 50694/371472 [4:02:45<23:32:49, 3.78it/s] 14%|█▎ | 50695/371472 [4:02:46<24:09:02, 3.69it/s] 14%|█▎ | 50696/371472 [4:02:46<23:57:48, 3.72it/s] 14%|█▎ | 50697/371472 [4:02:46<24:29:39, 3.64it/s] 14%|█▎ | 50698/371472 [4:02:47<24:02:54, 3.71it/s] 14%|█▎ | 50699/371472 [4:02:47<25:48:06, 3.45it/s] 14%|█▎ | 50700/371472 [4:02:47<24:26:35, 3.65it/s] {'loss': 4.5456, 'learning_rate': 8.775830119157922e-07, 'epoch': 2.18} + 14%|█▎ | 50700/371472 [4:02:47<24:26:35, 3.65it/s] 14%|█▎ | 50701/371472 [4:02:47<23:32:23, 3.79it/s] 14%|█▎ | 50702/371472 [4:02:48<24:21:56, 3.66it/s] 14%|█▎ | 50703/371472 [4:02:48<24:22:23, 3.66it/s] 14%|█▎ | 50704/371472 [4:02:48<24:28:11, 3.64it/s] 14%|█▎ | 50705/371472 [4:02:49<25:26:59, 3.50it/s] 14%|█▎ | 50706/371472 [4:02:49<25:37:04, 3.48it/s] 14%|█▎ | 50707/371472 [4:02:49<24:46:12, 3.60it/s] 14%|█▎ | 50708/371472 [4:02:49<24:53:55, 3.58it/s] 14%|█▎ | 50709/371472 [4:02:50<24:07:40, 3.69it/s] 14%|█▎ | 50710/371472 [4:02:50<24:23:02, 3.65it/s] 14%|█▎ | 50711/371472 [4:02:50<25:37:40, 3.48it/s] 14%|█▎ | 50712/371472 [4:02:51<25:09:52, 3.54it/s] 14%|█▎ | 50713/371472 [4:02:51<25:25:43, 3.50it/s] 14%|█▎ | 50714/371472 [4:02:51<24:14:06, 3.68it/s] 14%|█▎ | 50715/371472 [4:02:51<24:55:58, 3.57it/s] 14%|█▎ | 50716/371472 [4:02:52<26:45:39, 3.33it/s] 14%|█▎ | 50717/371472 [4:02:52<29:17:27, 3.04it/s] 14%|█▎ | 50718/371472 [4:02:52<27:25:49, 3.25it/s] 14%|█▎ | 50719/371472 [4:02:53<27:01:19, 3.30it/s] 14%|█▎ | 50720/371472 [4:02:53<25:21:20, 3.51it/s] {'loss': 4.3468, 'learning_rate': 8.775345299403133e-07, 'epoch': 2.18} + 14%|█▎ | 50720/371472 [4:02:53<25:21:20, 3.51it/s] 14%|█▎ | 50721/371472 [4:02:53<26:45:24, 3.33it/s] 14%|█▎ | 50722/371472 [4:02:53<25:26:25, 3.50it/s] 14%|█▎ | 50723/371472 [4:02:54<25:50:04, 3.45it/s] 14%|█▎ | 50724/371472 [4:02:54<29:32:20, 3.02it/s] 14%|█▎ | 50725/371472 [4:02:54<28:12:28, 3.16it/s] 14%|█▎ | 50726/371472 [4:02:55<26:07:03, 3.41it/s] 14%|█▎ | 50727/371472 [4:02:55<26:13:48, 3.40it/s] 14%|█▎ | 50728/371472 [4:02:55<25:45:32, 3.46it/s] 14%|█▎ | 50729/371472 [4:02:56<25:33:53, 3.49it/s] 14%|█▎ | 50730/371472 [4:02:56<25:23:49, 3.51it/s] 14%|█▎ | 50731/371472 [4:02:56<25:31:36, 3.49it/s] 14%|█▎ | 50732/371472 [4:02:56<26:02:39, 3.42it/s] 14%|█▎ | 50733/371472 [4:02:57<27:03:29, 3.29it/s] 14%|█▎ | 50734/371472 [4:02:57<25:59:23, 3.43it/s] 14%|█▎ | 50735/371472 [4:02:57<24:37:33, 3.62it/s] 14%|█▎ | 50736/371472 [4:02:58<24:42:19, 3.61it/s] 14%|█▎ | 50737/371472 [4:02:58<23:21:16, 3.81it/s] 14%|█▎ | 50738/371472 [4:02:58<23:33:38, 3.78it/s] 14%|█▎ | 50739/371472 [4:02:58<23:42:00, 3.76it/s] 14%|█▎ | 50740/371472 [4:02:59<23:27:25, 3.80it/s] {'loss': 4.2847, 'learning_rate': 8.774860479648344e-07, 'epoch': 2.19} + 14%|█▎ | 50740/371472 [4:02:59<23:27:25, 3.80it/s] 14%|█▎ | 50741/371472 [4:02:59<23:28:18, 3.80it/s] 14%|█▎ | 50742/371472 [4:02:59<23:37:24, 3.77it/s] 14%|█▎ | 50743/371472 [4:02:59<23:07:40, 3.85it/s] 14%|█▎ | 50744/371472 [4:03:00<25:33:37, 3.49it/s] 14%|█▎ | 50745/371472 [4:03:00<24:50:23, 3.59it/s] 14%|█▎ | 50746/371472 [4:03:00<24:25:33, 3.65it/s] 14%|█▎ | 50747/371472 [4:03:01<26:13:03, 3.40it/s] 14%|█▎ | 50748/371472 [4:03:01<25:45:01, 3.46it/s] 14%|█▎ | 50749/371472 [4:03:01<26:13:46, 3.40it/s] 14%|█▎ | 50750/371472 [4:03:01<25:18:23, 3.52it/s] 14%|█▎ | 50751/371472 [4:03:02<25:01:21, 3.56it/s] 14%|█▎ | 50752/371472 [4:03:02<26:10:05, 3.40it/s] 14%|█▎ | 50753/371472 [4:03:02<25:43:54, 3.46it/s] 14%|█▎ | 50754/371472 [4:03:03<25:39:04, 3.47it/s] 14%|█▎ | 50755/371472 [4:03:03<25:42:27, 3.47it/s] 14%|█▎ | 50756/371472 [4:03:03<25:14:46, 3.53it/s] 14%|█▎ | 50757/371472 [4:03:03<25:48:03, 3.45it/s] 14%|█▎ | 50758/371472 [4:03:04<25:24:39, 3.51it/s] 14%|█▎ | 50759/371472 [4:03:04<24:47:10, 3.59it/s] 14%|█▎ | 50760/371472 [4:03:04<26:10:06, 3.40it/s] {'loss': 4.203, 'learning_rate': 8.774375659893555e-07, 'epoch': 2.19} + 14%|█▎ | 50760/371472 [4:03:04<26:10:06, 3.40it/s] 14%|█▎ | 50761/371472 [4:03:05<25:54:08, 3.44it/s] 14%|█▎ | 50762/371472 [4:03:05<25:22:02, 3.51it/s] 14%|█▎ | 50763/371472 [4:03:05<25:01:26, 3.56it/s] 14%|█▎ | 50764/371472 [4:03:05<23:49:42, 3.74it/s] 14%|█▎ | 50765/371472 [4:03:06<24:31:13, 3.63it/s] 14%|█▎ | 50766/371472 [4:03:06<24:00:52, 3.71it/s] 14%|█▎ | 50767/371472 [4:03:06<23:29:44, 3.79it/s] 14%|█▎ | 50768/371472 [4:03:06<23:18:17, 3.82it/s] 14%|█▎ | 50769/371472 [4:03:07<23:24:53, 3.80it/s] 14%|█▎ | 50770/371472 [4:03:07<23:21:52, 3.81it/s] 14%|█��� | 50771/371472 [4:03:07<24:25:02, 3.65it/s] 14%|█▎ | 50772/371472 [4:03:08<24:01:49, 3.71it/s] 14%|█▎ | 50773/371472 [4:03:08<27:42:06, 3.22it/s] 14%|█▎ | 50774/371472 [4:03:08<27:58:15, 3.18it/s] 14%|█▎ | 50775/371472 [4:03:08<26:05:50, 3.41it/s] 14%|█▎ | 50776/371472 [4:03:09<25:38:17, 3.47it/s] 14%|█▎ | 50777/371472 [4:03:09<25:34:37, 3.48it/s] 14%|█▎ | 50778/371472 [4:03:09<24:45:05, 3.60it/s] 14%|█▎ | 50779/371472 [4:03:10<24:31:10, 3.63it/s] 14%|█▎ | 50780/371472 [4:03:10<24:06:09, 3.70it/s] {'loss': 4.0493, 'learning_rate': 8.773890840138765e-07, 'epoch': 2.19} + 14%|█▎ | 50780/371472 [4:03:10<24:06:09, 3.70it/s] 14%|█▎ | 50781/371472 [4:03:10<24:45:29, 3.60it/s] 14%|█▎ | 50782/371472 [4:03:10<23:43:55, 3.75it/s] 14%|█▎ | 50783/371472 [4:03:11<23:39:40, 3.76it/s] 14%|█▎ | 50784/371472 [4:03:11<23:00:35, 3.87it/s] 14%|█▎ | 50785/371472 [4:03:11<22:59:27, 3.87it/s] 14%|█▎ | 50786/371472 [4:03:11<23:23:03, 3.81it/s] 14%|█▎ | 50787/371472 [4:03:12<23:31:03, 3.79it/s] 14%|█▎ | 50788/371472 [4:03:12<23:22:11, 3.81it/s] 14%|█▎ | 50789/371472 [4:03:12<24:44:02, 3.60it/s] 14%|█▎ | 50790/371472 [4:03:13<26:41:53, 3.34it/s] 14%|█▎ | 50791/371472 [4:03:13<25:18:43, 3.52it/s] 14%|█▎ | 50792/371472 [4:03:13<25:16:53, 3.52it/s] 14%|█▎ | 50793/371472 [4:03:13<27:16:10, 3.27it/s] 14%|█▎ | 50794/371472 [4:03:14<27:20:43, 3.26it/s] 14%|█▎ | 50795/371472 [4:03:14<26:10:44, 3.40it/s] 14%|█▎ | 50796/371472 [4:03:14<25:59:44, 3.43it/s] 14%|█▎ | 50797/371472 [4:03:15<25:37:37, 3.48it/s] 14%|█▎ | 50798/371472 [4:03:15<26:17:41, 3.39it/s] 14%|█▎ | 50799/371472 [4:03:15<25:52:08, 3.44it/s] 14%|█▎ | 50800/371472 [4:03:16<26:04:42, 3.42it/s] {'loss': 4.2606, 'learning_rate': 8.773406020383977e-07, 'epoch': 2.19} + 14%|█▎ | 50800/371472 [4:03:16<26:04:42, 3.42it/s] 14%|█▎ | 50801/371472 [4:03:16<25:07:34, 3.55it/s] 14%|█▎ | 50802/371472 [4:03:16<24:53:52, 3.58it/s] 14%|█▎ | 50803/371472 [4:03:16<23:39:56, 3.76it/s] 14%|█▎ | 50804/371472 [4:03:17<23:09:37, 3.85it/s] 14%|█▎ | 50805/371472 [4:03:17<24:19:36, 3.66it/s] 14%|█▎ | 50806/371472 [4:03:17<24:00:56, 3.71it/s] 14%|█▎ | 50807/371472 [4:03:17<24:26:02, 3.65it/s] 14%|█▎ | 50808/371472 [4:03:18<24:46:21, 3.60it/s] 14%|█▎ | 50809/371472 [4:03:18<24:15:08, 3.67it/s] 14%|█▎ | 50810/371472 [4:03:18<24:29:26, 3.64it/s] 14%|█▎ | 50811/371472 [4:03:18<23:59:12, 3.71it/s] 14%|█▎ | 50812/371472 [4:03:19<25:04:31, 3.55it/s] 14%|█▎ | 50813/371472 [4:03:19<26:51:11, 3.32it/s] 14%|█▎ | 50814/371472 [4:03:19<25:46:44, 3.46it/s] 14%|█▎ | 50815/371472 [4:03:20<25:05:23, 3.55it/s] 14%|█▎ | 50816/371472 [4:03:20<26:24:49, 3.37it/s] 14%|█▎ | 50817/371472 [4:03:20<25:03:44, 3.55it/s] 14%|█▎ | 50818/371472 [4:03:20<25:05:51, 3.55it/s] 14%|█▎ | 50819/371472 [4:03:21<24:22:59, 3.65it/s] 14%|█▎ | 50820/371472 [4:03:21<23:41:24, 3.76it/s] {'loss': 4.1683, 'learning_rate': 8.772921200629188e-07, 'epoch': 2.19} + 14%|█▎ | 50820/371472 [4:03:21<23:41:24, 3.76it/s] 14%|█▎ | 50821/371472 [4:03:21<23:40:35, 3.76it/s] 14%|█▎ | 50822/371472 [4:03:22<24:17:53, 3.67it/s] 14%|█▎ | 50823/371472 [4:03:22<24:37:05, 3.62it/s] 14%|█▎ | 50824/371472 [4:03:22<28:30:05, 3.13it/s] 14%|█▎ | 50825/371472 [4:03:23<27:43:51, 3.21it/s] 14%|█▎ | 50826/371472 [4:03:23<28:35:24, 3.12it/s] 14%|█▎ | 50827/371472 [4:03:23<29:51:08, 2.98it/s] 14%|█▎ | 50828/371472 [4:03:24<28:56:25, 3.08it/s] 14%|█▎ | 50829/371472 [4:03:24<29:04:33, 3.06it/s] 14%|█▎ | 50830/371472 [4:03:24<27:27:18, 3.24it/s] 14%|█▎ | 50831/371472 [4:03:24<25:39:23, 3.47it/s] 14%|█▎ | 50832/371472 [4:03:25<25:16:43, 3.52it/s] 14%|█▎ | 50833/371472 [4:03:25<26:02:56, 3.42it/s] 14%|█▎ | 50834/371472 [4:03:25<24:23:38, 3.65it/s] 14%|█▎ | 50835/371472 [4:03:25<24:00:42, 3.71it/s] 14%|█▎ | 50836/371472 [4:03:26<23:55:10, 3.72it/s] 14%|█▎ | 50837/371472 [4:03:26<23:23:39, 3.81it/s] 14%|█▎ | 50838/371472 [4:03:26<22:40:42, 3.93it/s] 14%|█▎ | 50839/371472 [4:03:26<22:09:22, 4.02it/s] 14%|█▎ | 50840/371472 [4:03:27<23:14:36, 3.83it/s] {'loss': 4.3027, 'learning_rate': 8.772436380874399e-07, 'epoch': 2.19} + 14%|█▎ | 50840/371472 [4:03:27<23:14:36, 3.83it/s] 14%|█▎ | 50841/371472 [4:03:27<22:52:19, 3.89it/s] 14%|█▎ | 50842/371472 [4:03:27<23:43:49, 3.75it/s] 14%|█▎ | 50843/371472 [4:03:28<24:11:06, 3.68it/s] 14%|█▎ | 50844/371472 [4:03:28<25:45:22, 3.46it/s] 14%|█▎ | 50845/371472 [4:03:28<26:52:00, 3.31it/s] 14%|█▎ | 50846/371472 [4:03:29<27:51:48, 3.20it/s] 14%|█▎ | 50847/371472 [4:03:29<25:55:05, 3.44it/s] 14%|█▎ | 50848/371472 [4:03:29<24:48:05, 3.59it/s] 14%|█▎ | 50849/371472 [4:03:29<23:56:20, 3.72it/s] 14%|█▎ | 50850/371472 [4:03:30<25:39:20, 3.47it/s] 14%|█▎ | 50851/371472 [4:03:30<26:07:49, 3.41it/s] 14%|█▎ | 50852/371472 [4:03:30<27:02:26, 3.29it/s] 14%|█▎ | 50853/371472 [4:03:31<26:33:55, 3.35it/s] 14%|█▎ | 50854/371472 [4:03:31<25:21:20, 3.51it/s] 14%|█▎ | 50855/371472 [4:03:31<24:19:34, 3.66it/s] 14%|█▎ | 50856/371472 [4:03:31<23:49:41, 3.74it/s] 14%|█▎ | 50857/371472 [4:03:32<24:18:12, 3.66it/s] 14%|█▎ | 50858/371472 [4:03:32<24:03:43, 3.70it/s] 14%|█▎ | 50859/371472 [4:03:32<23:23:09, 3.81it/s] 14%|█▎ | 50860/371472 [4:03:32<23:42:08, 3.76it/s] {'loss': 4.2621, 'learning_rate': 8.77195156111961e-07, 'epoch': 2.19} + 14%|█▎ | 50860/371472 [4:03:32<23:42:08, 3.76it/s] 14%|█▎ | 50861/371472 [4:03:33<24:19:50, 3.66it/s] 14%|█▎ | 50862/371472 [4:03:33<24:48:58, 3.59it/s] 14%|█▎ | 50863/371472 [4:03:33<25:09:18, 3.54it/s] 14%|█▎ | 50864/371472 [4:03:34<24:56:01, 3.57it/s] 14%|█▎ | 50865/371472 [4:03:34<26:02:30, 3.42it/s] 14%|█▎ | 50866/371472 [4:03:34<25:44:08, 3.46it/s] 14%|█▎ | 50867/371472 [4:03:34<24:48:13, 3.59it/s] 14%|█▎ | 50868/371472 [4:03:35<23:51:25, 3.73it/s] 14%|█▎ | 50869/371472 [4:03:35<23:41:52, 3.76it/s] 14%|█▎ | 50870/371472 [4:03:35<24:42:24, 3.60it/s] 14%|█▎ | 50871/371472 [4:03:35<24:18:18, 3.66it/s] 14%|█▎ | 50872/371472 [4:03:36<25:08:37, 3.54it/s] 14%|█▎ | 50873/371472 [4:03:36<24:49:48, 3.59it/s] 14%|█▎ | 50874/371472 [4:03:36<24:31:48, 3.63it/s] 14%|█▎ | 50875/371472 [4:03:37<24:26:10, 3.64it/s] 14%|█▎ | 50876/371472 [4:03:37<24:39:48, 3.61it/s] 14%|█▎ | 50877/371472 [4:03:37<24:05:51, 3.70it/s] 14%|█▎ | 50878/371472 [4:03:37<23:52:19, 3.73it/s] 14%|█▎ | 50879/371472 [4:03:38<24:27:22, 3.64it/s] 14%|█▎ | 50880/371472 [4:03:38<24:40:00, 3.61it/s] {'loss': 4.4363, 'learning_rate': 8.771466741364821e-07, 'epoch': 2.19} + 14%|█▎ | 50880/371472 [4:03:38<24:40:00, 3.61it/s] 14%|█▎ | 50881/371472 [4:03:38<25:55:16, 3.44it/s] 14%|█▎ | 50882/371472 [4:03:39<25:08:56, 3.54it/s] 14%|█▎ | 50883/371472 [4:03:39<24:10:32, 3.68it/s] 14%|█▎ | 50884/371472 [4:03:39<29:32:26, 3.01it/s] 14%|█▎ | 50885/371472 [4:03:40<29:14:26, 3.05it/s] 14%|█▎ | 50886/371472 [4:03:40<27:17:33, 3.26it/s] 14%|█▎ | 50887/371472 [4:03:40<28:00:23, 3.18it/s] 14%|█▎ | 50888/371472 [4:03:40<27:56:42, 3.19it/s] 14%|█▎ | 50889/371472 [4:03:41<26:51:21, 3.32it/s] 14%|█▎ | 50890/371472 [4:03:41<25:25:49, 3.50it/s] 14%|█▎ | 50891/371472 [4:03:41<25:43:02, 3.46it/s] 14%|█▎ | 50892/371472 [4:03:42<24:55:25, 3.57it/s] 14%|█▎ | 50893/371472 [4:03:42<24:14:56, 3.67it/s] 14%|█▎ | 50894/371472 [4:03:42<24:10:11, 3.68it/s] 14%|█▎ | 50895/371472 [4:03:42<24:59:30, 3.56it/s] 14%|█▎ | 50896/371472 [4:03:43<24:57:37, 3.57it/s] 14%|█▎ | 50897/371472 [4:03:43<24:57:47, 3.57it/s] 14%|█▎ | 50898/371472 [4:03:43<25:25:07, 3.50it/s] 14%|█▎ | 50899/371472 [4:03:43<24:31:43, 3.63it/s] 14%|█▎ | 50900/371472 [4:03:44<23:33:43, 3.78it/s] {'loss': 4.4562, 'learning_rate': 8.770981921610032e-07, 'epoch': 2.19} + 14%|█▎ | 50900/371472 [4:03:44<23:33:43, 3.78it/s] 14%|█▎ | 50901/371472 [4:03:44<23:29:12, 3.79it/s] 14%|█▎ | 50902/371472 [4:03:44<24:00:53, 3.71it/s] 14%|█▎ | 50903/371472 [4:03:45<23:46:11, 3.75it/s] 14%|█▎ | 50904/371472 [4:03:45<23:27:56, 3.79it/s] 14%|█▎ | 50905/371472 [4:03:45<23:13:01, 3.84it/s] 14%|█▎ | 50906/371472 [4:03:45<23:36:52, 3.77it/s] 14%|█▎ | 50907/371472 [4:03:46<24:34:25, 3.62it/s] 14%|█▎ | 50908/371472 [4:03:46<23:39:49, 3.76it/s] 14%|█▎ | 50909/371472 [4:03:46<23:19:33, 3.82it/s] 14%|█▎ | 50910/371472 [4:03:46<22:50:11, 3.90it/s] 14%|█▎ | 50911/371472 [4:03:47<22:49:48, 3.90it/s] 14%|█▎ | 50912/371472 [4:03:47<23:58:28, 3.71it/s] 14%|█▎ | 50913/371472 [4:03:47<23:09:23, 3.85it/s] 14%|█▎ | 50914/371472 [4:03:47<23:02:31, 3.86it/s] 14%|█▎ | 50915/371472 [4:03:48<23:48:37, 3.74it/s] 14%|█▎ | 50916/371472 [4:03:48<23:09:38, 3.84it/s] 14%|█▎ | 50917/371472 [4:03:48<24:13:11, 3.68it/s] 14%|█▎ | 50918/371472 [4:03:48<23:57:17, 3.72it/s] 14%|█▎ | 50919/371472 [4:03:49<24:59:12, 3.56it/s] 14%|█▎ | 50920/371472 [4:03:49<25:08:04, 3.54it/s] {'loss': 4.2564, 'learning_rate': 8.770497101855243e-07, 'epoch': 2.19} + 14%|█▎ | 50920/371472 [4:03:49<25:08:04, 3.54it/s] 14%|█▎ | 50921/371472 [4:03:49<25:26:10, 3.50it/s] 14%|█▎ | 50922/371472 [4:03:50<26:32:11, 3.36it/s] 14%|█▎ | 50923/371472 [4:03:50<25:38:52, 3.47it/s] 14%|█▎ | 50924/371472 [4:03:50<24:47:59, 3.59it/s] 14%|█▎ | 50925/371472 [4:03:50<23:36:41, 3.77it/s] 14%|█▎ | 50926/371472 [4:03:51<23:08:04, 3.85it/s] 14%|█▎ | 50927/371472 [4:03:51<22:49:31, 3.90it/s] 14%|█▎ | 50928/371472 [4:03:51<24:16:45, 3.67it/s] 14%|█▎ | 50929/371472 [4:03:52<25:48:23, 3.45it/s] 14%|█▎ | 50930/371472 [4:03:52<25:18:40, 3.52it/s] 14%|█▎ | 50931/371472 [4:03:52<25:09:50, 3.54it/s] 14%|█▎ | 50932/371472 [4:03:52<24:41:57, 3.60it/s] 14%|█▎ | 50933/371472 [4:03:53<24:53:36, 3.58it/s] 14%|█▎ | 50934/371472 [4:03:53<24:41:21, 3.61it/s] 14%|█▎ | 50935/371472 [4:03:53<23:54:36, 3.72it/s] 14%|█▎ | 50936/371472 [4:03:54<24:16:56, 3.67it/s] 14%|█▎ | 50937/371472 [4:03:54<24:34:56, 3.62it/s] 14%|█▎ | 50938/371472 [4:03:54<25:28:36, 3.49it/s] 14%|█▎ | 50939/371472 [4:03:54<24:49:17, 3.59it/s] 14%|█▎ | 50940/371472 [4:03:55<24:14:20, 3.67it/s] {'loss': 4.2429, 'learning_rate': 8.770012282100454e-07, 'epoch': 2.19} + 14%|█▎ | 50940/371472 [4:03:55<24:14:20, 3.67it/s] 14%|█▎ | 50941/371472 [4:03:55<24:33:30, 3.63it/s] 14%|█▎ | 50942/371472 [4:03:55<24:02:57, 3.70it/s] 14%|█▎ | 50943/371472 [4:03:55<25:15:16, 3.53it/s] 14%|█▎ | 50944/371472 [4:03:56<24:30:28, 3.63it/s] 14%|█▎ | 50945/371472 [4:03:56<24:09:29, 3.69it/s] 14%|█▎ | 50946/371472 [4:03:56<25:51:57, 3.44it/s] 14%|█▎ | 50947/371472 [4:03:57<25:21:22, 3.51it/s] 14%|█▎ | 50948/371472 [4:03:57<24:54:51, 3.57it/s] 14%|█▎ | 50949/371472 [4:03:57<24:06:39, 3.69it/s] 14%|█▎ | 50950/371472 [4:03:57<24:50:48, 3.58it/s] 14%|█▎ | 50951/371472 [4:03:58<23:52:53, 3.73it/s] 14%|█▎ | 50952/371472 [4:03:58<23:42:22, 3.76it/s] 14%|█▎ | 50953/371472 [4:03:58<24:41:53, 3.60it/s] 14%|█▎ | 50954/371472 [4:03:58<24:31:49, 3.63it/s] 14%|█▎ | 50955/371472 [4:03:59<25:14:31, 3.53it/s] 14%|█▎ | 50956/371472 [4:03:59<25:39:10, 3.47it/s] 14%|█▎ | 50957/371472 [4:03:59<26:19:37, 3.38it/s] 14%|█▎ | 50958/371472 [4:04:00<27:25:31, 3.25it/s] 14%|█▎ | 50959/371472 [4:04:00<27:06:22, 3.28it/s] 14%|█▎ | 50960/371472 [4:04:00<26:47:06, 3.32it/s] {'loss': 4.3255, 'learning_rate': 8.769527462345665e-07, 'epoch': 2.19} + 14%|█▎ | 50960/371472 [4:04:00<26:47:06, 3.32it/s] 14%|█▎ | 50961/371472 [4:04:01<28:46:50, 3.09it/s] 14%|█▎ | 50962/371472 [4:04:01<27:39:35, 3.22it/s] 14%|█▎ | 50963/371472 [4:04:01<26:15:35, 3.39it/s] 14%|█▎ | 50964/371472 [4:04:01<25:08:46, 3.54it/s] 14%|█▎ | 50965/371472 [4:04:02<26:09:41, 3.40it/s] 14%|█▎ | 50966/371472 [4:04:02<25:12:59, 3.53it/s] 14%|█▎ | 50967/371472 [4:04:02<24:28:49, 3.64it/s] 14%|█▎ | 50968/371472 [4:04:03<23:54:54, 3.72it/s] 14%|█▎ | 50969/371472 [4:04:03<25:22:10, 3.51it/s] 14%|█▎ | 50970/371472 [4:04:03<25:00:54, 3.56it/s] 14%|█▎ | 50971/371472 [4:04:03<24:46:26, 3.59it/s] 14%|█▎ | 50972/371472 [4:04:04<24:53:36, 3.58it/s] 14%|█▎ | 50973/371472 [4:04:04<27:05:17, 3.29it/s] 14%|█▎ | 50974/371472 [4:04:04<29:01:55, 3.07it/s] 14%|█▎ | 50975/371472 [4:04:05<27:48:26, 3.20it/s] 14%|█▎ | 50976/371472 [4:04:05<27:13:09, 3.27it/s] 14%|█▎ | 50977/371472 [4:04:05<26:48:16, 3.32it/s] 14%|█▎ | 50978/371472 [4:04:06<25:20:17, 3.51it/s] 14%|█▎ | 50979/371472 [4:04:06<24:53:10, 3.58it/s] 14%|█▎ | 50980/371472 [4:04:06<25:08:32, 3.54it/s] {'loss': 4.2201, 'learning_rate': 8.769042642590876e-07, 'epoch': 2.2} + 14%|█▎ | 50980/371472 [4:04:06<25:08:32, 3.54it/s] 14%|█▎ | 50981/371472 [4:04:06<25:44:52, 3.46it/s] 14%|█▎ | 50982/371472 [4:04:07<25:13:00, 3.53it/s] 14%|█▎ | 50983/371472 [4:04:07<24:53:15, 3.58it/s] 14%|█▎ | 50984/371472 [4:04:07<24:03:16, 3.70it/s] 14%|█▎ | 50985/371472 [4:04:08<24:15:41, 3.67it/s] 14%|█▎ | 50986/371472 [4:04:08<24:10:38, 3.68it/s] 14%|█▎ | 50987/371472 [4:04:08<27:06:34, 3.28it/s] 14%|█▎ | 50988/371472 [4:04:09<28:20:31, 3.14it/s] 14%|█▎ | 50989/371472 [4:04:09<27:22:22, 3.25it/s] 14%|█▎ | 50990/371472 [4:04:09<27:22:10, 3.25it/s] 14%|█▎ | 50991/371472 [4:04:09<27:41:07, 3.22it/s] 14%|█▎ | 50992/371472 [4:04:10<27:36:28, 3.22it/s] 14%|█▎ | 50993/371472 [4:04:10<28:18:23, 3.14it/s] 14%|█▎ | 50994/371472 [4:04:10<27:09:17, 3.28it/s] 14%|█▎ | 50995/371472 [4:04:11<26:44:25, 3.33it/s] 14%|█▎ | 50996/371472 [4:04:11<26:06:18, 3.41it/s] 14%|█▎ | 50997/371472 [4:04:11<25:33:11, 3.48it/s] 14%|█▎ | 50998/371472 [4:04:11<24:38:53, 3.61it/s] 14%|█▎ | 50999/371472 [4:04:12<24:22:21, 3.65it/s] 14%|█▎ | 51000/371472 [4:04:12<23:25:27, 3.80it/s] {'loss': 4.3356, 'learning_rate': 8.768557822836088e-07, 'epoch': 2.2} + 14%|█▎ | 51000/371472 [4:04:12<23:25:27, 3.80it/s] 14%|█▎ | 51001/371472 [4:04:12<22:50:09, 3.90it/s] 14%|█▎ | 51002/371472 [4:04:12<22:28:10, 3.96it/s] 14%|█▎ | 51003/371472 [4:04:13<23:46:38, 3.74it/s] 14%|█▎ | 51004/371472 [4:04:13<22:59:45, 3.87it/s] 14%|█▎ | 51005/371472 [4:04:13<22:44:06, 3.92it/s] 14%|█▎ | 51006/371472 [4:04:13<22:37:16, 3.94it/s] 14%|█▎ | 51007/371472 [4:04:14<22:52:35, 3.89it/s] 14%|█▎ | 51008/371472 [4:04:14<23:10:18, 3.84it/s] 14%|█▎ | 51009/371472 [4:04:14<23:17:45, 3.82it/s] 14%|█▎ | 51010/371472 [4:04:15<23:47:29, 3.74it/s] 14%|█▎ | 51011/371472 [4:04:15<23:55:06, 3.72it/s] 14%|█▎ | 51012/371472 [4:04:15<25:11:38, 3.53it/s] 14%|█▎ | 51013/371472 [4:04:16<28:17:57, 3.15it/s] 14%|█▎ | 51014/371472 [4:04:16<28:37:28, 3.11it/s] 14%|█▎ | 51015/371472 [4:04:16<27:46:16, 3.21it/s] 14%|█▎ | 51016/371472 [4:04:16<26:49:46, 3.32it/s] 14%|█▎ | 51017/371472 [4:04:17<25:44:07, 3.46it/s] 14%|█▎ | 51018/371472 [4:04:17<24:14:49, 3.67it/s] 14%|█▎ | 51019/371472 [4:04:17<24:06:01, 3.69it/s] 14%|█▎ | 51020/371472 [4:04:17<23:28:31, 3.79it/s] {'loss': 4.019, 'learning_rate': 8.768073003081299e-07, 'epoch': 2.2} + 14%|█▎ | 51020/371472 [4:04:17<23:28:31, 3.79it/s] 14%|█▎ | 51021/371472 [4:04:18<23:48:01, 3.74it/s] 14%|█▎ | 51022/371472 [4:04:18<23:44:56, 3.75it/s] 14%|█▎ | 51023/371472 [4:04:18<24:14:41, 3.67it/s] 14%|█▎ | 51024/371472 [4:04:19<25:18:28, 3.52it/s] 14%|█▎ | 51025/371472 [4:04:19<24:49:08, 3.59it/s] 14%|█▎ | 51026/371472 [4:04:19<24:10:13, 3.68it/s] 14%|█▎ | 51027/371472 [4:04:19<23:55:09, 3.72it/s] 14%|█▎ | 51028/371472 [4:04:20<25:07:37, 3.54it/s] 14%|█▎ | 51029/371472 [4:04:20<25:21:53, 3.51it/s] 14%|█▎ | 51030/371472 [4:04:20<25:21:41, 3.51it/s] 14%|█▎ | 51031/371472 [4:04:21<24:51:35, 3.58it/s] 14%|█▎ | 51032/371472 [4:04:21<24:13:02, 3.68it/s] 14%|█▎ | 51033/371472 [4:04:21<26:10:19, 3.40it/s] 14%|█▎ | 51034/371472 [4:04:21<26:06:41, 3.41it/s] 14%|█▎ | 51035/371472 [4:04:22<25:00:21, 3.56it/s] 14%|█▎ | 51036/371472 [4:04:22<25:45:19, 3.46it/s] 14%|█▎ | 51037/371472 [4:04:22<26:50:23, 3.32it/s] 14%|█▎ | 51038/371472 [4:04:23<25:14:22, 3.53it/s] 14%|█▎ | 51039/371472 [4:04:23<23:58:58, 3.71it/s] 14%|█▎ | 51040/371472 [4:04:23<23:48:10, 3.74it/s] {'loss': 4.3707, 'learning_rate': 8.767588183326509e-07, 'epoch': 2.2} + 14%|█▎ | 51040/371472 [4:04:23<23:48:10, 3.74it/s] 14%|█▎ | 51041/371472 [4:04:23<24:03:02, 3.70it/s] 14%|█▎ | 51042/371472 [4:04:24<23:32:33, 3.78it/s] 14%|█▎ | 51043/371472 [4:04:24<24:13:43, 3.67it/s] 14%|█▎ | 51044/371472 [4:04:24<25:05:42, 3.55it/s] 14%|█▎ | 51045/371472 [4:04:24<24:32:57, 3.63it/s] 14%|█▎ | 51046/371472 [4:04:25<23:37:33, 3.77it/s] 14%|█▎ | 51047/371472 [4:04:25<24:06:37, 3.69it/s] 14%|█▎ | 51048/371472 [4:04:25<23:33:43, 3.78it/s] 14%|█▎ | 51049/371472 [4:04:25<23:32:08, 3.78it/s] 14%|█▎ | 51050/371472 [4:04:26<23:06:17, 3.85it/s] 14%|█▎ | 51051/371472 [4:04:26<24:50:53, 3.58it/s] 14%|█▎ | 51052/371472 [4:04:26<24:41:45, 3.60it/s] 14%|█▎ | 51053/371472 [4:04:27<24:07:31, 3.69it/s] 14%|█▎ | 51054/371472 [4:04:27<23:36:16, 3.77it/s] 14%|█▎ | 51055/371472 [4:04:27<23:52:59, 3.73it/s] 14%|█▎ | 51056/371472 [4:04:27<24:06:41, 3.69it/s] 14%|█▎ | 51057/371472 [4:04:28<23:14:17, 3.83it/s] 14%|█▎ | 51058/371472 [4:04:28<24:25:14, 3.64it/s] 14%|█▎ | 51059/371472 [4:04:28<26:00:30, 3.42it/s] 14%|█▎ | 51060/371472 [4:04:29<26:26:53, 3.37it/s] {'loss': 4.2873, 'learning_rate': 8.76710336357172e-07, 'epoch': 2.2} + 14%|█▎ | 51060/371472 [4:04:29<26:26:53, 3.37it/s] 14%|█▎ | 51061/371472 [4:04:29<25:59:51, 3.42it/s] 14%|█▎ | 51062/371472 [4:04:29<24:48:42, 3.59it/s] 14%|█▎ | 51063/371472 [4:04:29<24:20:01, 3.66it/s] 14%|█▎ | 51064/371472 [4:04:30<24:13:18, 3.67it/s] 14%|█▎ | 51065/371472 [4:04:30<25:48:46, 3.45it/s] 14%|█▎ | 51066/371472 [4:04:30<25:21:56, 3.51it/s] 14%|█▎ | 51067/371472 [4:04:31<25:44:29, 3.46it/s] 14%|█▎ | 51068/371472 [4:04:31<25:10:52, 3.53it/s] 14%|█▎ | 51069/371472 [4:04:31<24:23:52, 3.65it/s] 14%|█▎ | 51070/371472 [4:04:31<24:05:13, 3.69it/s] 14%|█▎ | 51071/371472 [4:04:32<24:07:34, 3.69it/s] 14%|█▎ | 51072/371472 [4:04:32<23:24:27, 3.80it/s] 14%|█▎ | 51073/371472 [4:04:32<23:56:27, 3.72it/s] 14%|█▎ | 51074/371472 [4:04:32<23:45:59, 3.74it/s] 14%|█▎ | 51075/371472 [4:04:33<24:13:09, 3.67it/s] 14%|█▎ | 51076/371472 [4:04:33<24:16:20, 3.67it/s] 14%|█▎ | 51077/371472 [4:04:33<24:04:11, 3.70it/s] 14%|█▍ | 51078/371472 [4:04:33<23:19:49, 3.81it/s] 14%|█▍ | 51079/371472 [4:04:34<24:34:44, 3.62it/s] 14%|█▍ | 51080/371472 [4:04:34<24:38:27, 3.61it/s] {'loss': 4.4412, 'learning_rate': 8.766618543816932e-07, 'epoch': 2.2} + 14%|█▍ | 51080/371472 [4:04:34<24:38:27, 3.61it/s] 14%|█▍ | 51081/371472 [4:04:34<24:32:09, 3.63it/s] 14%|█▍ | 51082/371472 [4:04:35<24:11:50, 3.68it/s] 14%|█▍ | 51083/371472 [4:04:35<24:24:15, 3.65it/s] 14%|█▍ | 51084/371472 [4:04:35<24:08:14, 3.69it/s] 14%|█▍ | 51085/371472 [4:04:35<24:55:36, 3.57it/s] 14%|█▍ | 51086/371472 [4:04:36<27:15:15, 3.27it/s] 14%|█▍ | 51087/371472 [4:04:36<25:32:51, 3.48it/s] 14%|█▍ | 51088/371472 [4:04:36<24:13:11, 3.67it/s] 14%|█▍ | 51089/371472 [4:04:37<24:17:30, 3.66it/s] 14%|█▍ | 51090/371472 [4:04:37<23:38:27, 3.76it/s] 14%|█▍ | 51091/371472 [4:04:37<23:42:01, 3.75it/s] 14%|█▍ | 51092/371472 [4:04:37<23:03:00, 3.86it/s] 14%|█▍ | 51093/371472 [4:04:38<23:22:12, 3.81it/s] 14%|█▍ | 51094/371472 [4:04:38<24:31:51, 3.63it/s] 14%|█▍ | 51095/371472 [4:04:38<24:41:07, 3.61it/s] 14%|█▍ | 51096/371472 [4:04:38<25:25:57, 3.50it/s] 14%|█▍ | 51097/371472 [4:04:39<24:41:36, 3.60it/s] 14%|█▍ | 51098/371472 [4:04:39<24:11:47, 3.68it/s] 14%|█▍ | 51099/371472 [4:04:39<24:26:16, 3.64it/s] 14%|█▍ | 51100/371472 [4:04:40<25:58:14, 3.43it/s] {'loss': 4.2582, 'learning_rate': 8.766133724062143e-07, 'epoch': 2.2} + 14%|█▍ | 51100/371472 [4:04:40<25:58:14, 3.43it/s] 14%|█▍ | 51101/371472 [4:04:40<27:22:43, 3.25it/s] 14%|█▍ | 51102/371472 [4:04:40<27:33:24, 3.23it/s] 14%|█▍ | 51103/371472 [4:04:40<25:52:43, 3.44it/s] 14%|█▍ | 51104/371472 [4:04:41<25:44:37, 3.46it/s] 14%|█▍ | 51105/371472 [4:04:41<24:52:00, 3.58it/s] 14%|█▍ | 51106/371472 [4:04:41<24:42:55, 3.60it/s] 14%|█▍ | 51107/371472 [4:04:42<25:41:31, 3.46it/s] 14%|█▍ | 51108/371472 [4:04:42<27:35:26, 3.23it/s] 14%|█▍ | 51109/371472 [4:04:42<27:41:50, 3.21it/s] 14%|█▍ | 51110/371472 [4:04:43<26:33:03, 3.35it/s] 14%|█▍ | 51111/371472 [4:04:43<24:54:24, 3.57it/s] 14%|█▍ | 51112/371472 [4:04:43<23:45:11, 3.75it/s] 14%|█▍ | 51113/371472 [4:04:43<23:19:06, 3.82it/s] 14%|█▍ | 51114/371472 [4:04:44<23:51:34, 3.73it/s] 14%|█▍ | 51115/371472 [4:04:44<24:09:25, 3.68it/s] 14%|█▍ | 51116/371472 [4:04:44<23:47:55, 3.74it/s] 14%|█▍ | 51117/371472 [4:04:44<23:49:09, 3.74it/s] 14%|█▍ | 51118/371472 [4:04:45<23:12:56, 3.83it/s] 14%|█▍ | 51119/371472 [4:04:45<25:26:44, 3.50it/s] 14%|█▍ | 51120/371472 [4:04:45<24:26:54, 3.64it/s] {'loss': 4.1803, 'learning_rate': 8.765648904307354e-07, 'epoch': 2.2} + 14%|█▍ | 51120/371472 [4:04:45<24:26:54, 3.64it/s] 14%|█▍ | 51121/371472 [4:04:46<25:29:54, 3.49it/s] 14%|█▍ | 51122/371472 [4:04:46<24:52:25, 3.58it/s] 14%|█▍ | 51123/371472 [4:04:46<25:37:00, 3.47it/s] 14%|█▍ | 51124/371472 [4:04:46<26:58:43, 3.30it/s] 14%|█▍ | 51125/371472 [4:04:47<26:45:57, 3.32it/s] 14%|█▍ | 51126/371472 [4:04:47<26:11:13, 3.40it/s] 14%|█▍ | 51127/371472 [4:04:47<25:00:50, 3.56it/s] 14%|█▍ | 51128/371472 [4:04:48<24:44:46, 3.60it/s] 14%|█▍ | 51129/371472 [4:04:48<24:27:24, 3.64it/s] 14%|█▍ | 51130/371472 [4:04:48<24:56:03, 3.57it/s] 14%|█▍ | 51131/371472 [4:04:48<25:15:09, 3.52it/s] 14%|█▍ | 51132/371472 [4:04:49<24:11:00, 3.68it/s] 14%|█▍ | 51133/371472 [4:04:49<24:08:12, 3.69it/s] 14%|█▍ | 51134/371472 [4:04:49<23:50:19, 3.73it/s] 14%|█▍ | 51135/371472 [4:04:49<24:48:13, 3.59it/s] 14%|█▍ | 51136/371472 [4:04:50<24:03:51, 3.70it/s] 14%|█▍ | 51137/371472 [4:04:50<24:35:42, 3.62it/s] 14%|█▍ | 51138/371472 [4:04:50<25:02:42, 3.55it/s] 14%|█▍ | 51139/371472 [4:04:51<25:04:56, 3.55it/s] 14%|█▍ | 51140/371472 [4:04:51<24:27:43, 3.64it/s] {'loss': 4.3579, 'learning_rate': 8.765164084552565e-07, 'epoch': 2.2} + 14%|█▍ | 51140/371472 [4:04:51<24:27:43, 3.64it/s] 14%|█▍ | 51141/371472 [4:04:51<23:37:49, 3.77it/s] 14%|█▍ | 51142/371472 [4:04:51<25:55:06, 3.43it/s] 14%|█▍ | 51143/371472 [4:04:52<25:28:12, 3.49it/s] 14%|█▍ | 51144/371472 [4:04:52<24:41:00, 3.60it/s] 14%|█▍ | 51145/371472 [4:04:52<23:50:57, 3.73it/s] 14%|█▍ | 51146/371472 [4:04:52<23:18:48, 3.82it/s] 14%|█▍ | 51147/371472 [4:04:53<23:31:19, 3.78it/s] 14%|█▍ | 51148/371472 [4:04:53<23:16:46, 3.82it/s] 14%|█▍ | 51149/371472 [4:04:53<22:51:44, 3.89it/s] 14%|█▍ | 51150/371472 [4:04:53<22:26:55, 3.96it/s] 14%|█▍ | 51151/371472 [4:04:54<22:40:14, 3.92it/s] 14%|█▍ | 51152/371472 [4:04:54<23:17:48, 3.82it/s] 14%|█▍ | 51153/371472 [4:04:54<23:52:59, 3.73it/s] 14%|█▍ | 51154/371472 [4:04:55<23:43:15, 3.75it/s] 14%|█▍ | 51155/371472 [4:04:55<23:05:39, 3.85it/s] 14%|█▍ | 51156/371472 [4:04:55<23:54:25, 3.72it/s] 14%|█▍ | 51157/371472 [4:04:55<23:53:06, 3.73it/s] 14%|█▍ | 51158/371472 [4:04:56<23:55:58, 3.72it/s] 14%|█▍ | 51159/371472 [4:04:56<24:52:41, 3.58it/s] 14%|█▍ | 51160/371472 [4:04:56<27:33:48, 3.23it/s] {'loss': 4.3487, 'learning_rate': 8.764679264797775e-07, 'epoch': 2.2} + 14%|█▍ | 51160/371472 [4:04:56<27:33:48, 3.23it/s] 14%|█▍ | 51161/371472 [4:04:57<26:09:34, 3.40it/s] 14%|█▍ | 51162/371472 [4:04:57<25:22:03, 3.51it/s] 14%|█▍ | 51163/371472 [4:04:57<24:30:28, 3.63it/s] 14%|█▍ | 51164/371472 [4:04:57<23:41:54, 3.75it/s] 14%|█▍ | 51165/371472 [4:04:58<24:30:59, 3.63it/s] 14%|█▍ | 51166/371472 [4:04:58<23:52:23, 3.73it/s] 14%|█▍ | 51167/371472 [4:04:58<24:41:34, 3.60it/s] 14%|█▍ | 51168/371472 [4:04:58<25:08:35, 3.54it/s] 14%|█▍ | 51169/371472 [4:04:59<25:51:13, 3.44it/s] 14%|█▍ | 51170/371472 [4:04:59<24:51:35, 3.58it/s] 14%|█▍ | 51171/371472 [4:04:59<25:33:42, 3.48it/s] 14%|█▍ | 51172/371472 [4:05:00<24:43:54, 3.60it/s] 14%|█▍ | 51173/371472 [4:05:00<25:26:56, 3.50it/s] 14%|█▍ | 51174/371472 [4:05:00<24:20:28, 3.66it/s] 14%|█▍ | 51175/371472 [4:05:00<23:46:20, 3.74it/s] 14%|█▍ | 51176/371472 [4:05:01<24:18:14, 3.66it/s] 14%|█▍ | 51177/371472 [4:05:01<25:19:35, 3.51it/s] 14%|█▍ | 51178/371472 [4:05:01<25:54:48, 3.43it/s] 14%|█▍ | 51179/371472 [4:05:02<24:21:41, 3.65it/s] 14%|█▍ | 51180/371472 [4:05:02<24:08:14, 3.69it/s] {'loss': 4.2576, 'learning_rate': 8.764194445042986e-07, 'epoch': 2.2} + 14%|█▍ | 51180/371472 [4:05:02<24:08:14, 3.69it/s] 14%|█▍ | 51181/371472 [4:05:02<24:26:07, 3.64it/s] 14%|█▍ | 51182/371472 [4:05:02<27:14:01, 3.27it/s] 14%|█▍ | 51183/371472 [4:05:03<27:11:09, 3.27it/s] 14%|█▍ | 51184/371472 [4:05:03<28:36:13, 3.11it/s] 14%|█▍ | 51185/371472 [4:05:03<27:39:55, 3.22it/s] 14%|█▍ | 51186/371472 [4:05:04<26:40:49, 3.33it/s] 14%|█▍ | 51187/371472 [4:05:04<26:28:20, 3.36it/s] 14%|█▍ | 51188/371472 [4:05:04<26:19:37, 3.38it/s] 14%|█▍ | 51189/371472 [4:05:05<25:36:16, 3.47it/s] 14%|█▍ | 51190/371472 [4:05:05<25:47:02, 3.45it/s] 14%|█▍ | 51191/371472 [4:05:05<24:51:38, 3.58it/s] 14%|█▍ | 51192/371472 [4:05:05<25:02:53, 3.55it/s] 14%|█▍ | 51193/371472 [4:05:06<25:18:31, 3.52it/s] 14%|█▍ | 51194/371472 [4:05:06<26:23:08, 3.37it/s] 14%|█▍ | 51195/371472 [4:05:06<26:05:12, 3.41it/s] 14%|█▍ | 51196/371472 [4:05:07<25:20:53, 3.51it/s] 14%|█▍ | 51197/371472 [4:05:07<24:36:09, 3.62it/s] 14%|█▍ | 51198/371472 [4:05:07<23:55:49, 3.72it/s] 14%|█▍ | 51199/371472 [4:05:07<25:32:50, 3.48it/s] 14%|█▍ | 51200/371472 [4:05:08<24:19:43, 3.66it/s] {'loss': 4.1051, 'learning_rate': 8.763709625288198e-07, 'epoch': 2.21} + 14%|█▍ | 51200/371472 [4:05:08<24:19:43, 3.66it/s] 14%|█▍ | 51201/371472 [4:05:08<23:39:13, 3.76it/s] 14%|█▍ | 51202/371472 [4:05:08<24:50:06, 3.58it/s] 14%|█▍ | 51203/371472 [4:05:08<24:46:43, 3.59it/s] 14%|█▍ | 51204/371472 [4:05:09<24:34:44, 3.62it/s] 14%|█▍ | 51205/371472 [4:05:09<24:59:36, 3.56it/s] 14%|█▍ | 51206/371472 [4:05:09<24:41:32, 3.60it/s] 14%|█▍ | 51207/371472 [4:05:10<24:14:27, 3.67it/s] 14%|█▍ | 51208/371472 [4:05:10<26:01:34, 3.42it/s] 14%|█▍ | 51209/371472 [4:05:10<25:24:26, 3.50it/s] 14%|█▍ | 51210/371472 [4:05:10<25:08:22, 3.54it/s] 14%|█▍ | 51211/371472 [4:05:11<24:54:24, 3.57it/s] 14%|█▍ | 51212/371472 [4:05:11<25:19:47, 3.51it/s] 14%|█▍ | 51213/371472 [4:05:11<26:35:26, 3.35it/s] 14%|█▍ | 51214/371472 [4:05:12<26:10:26, 3.40it/s] 14%|█▍ | 51215/371472 [4:05:12<25:34:45, 3.48it/s] 14%|█▍ | 51216/371472 [4:05:12<25:16:01, 3.52it/s] 14%|█▍ | 51217/371472 [4:05:12<25:04:12, 3.55it/s] 14%|█▍ | 51218/371472 [4:05:13<24:58:45, 3.56it/s] 14%|█▍ | 51219/371472 [4:05:13<25:10:46, 3.53it/s] 14%|█▍ | 51220/371472 [4:05:13<25:01:19, 3.56it/s] {'loss': 4.1606, 'learning_rate': 8.763224805533409e-07, 'epoch': 2.21} + 14%|█▍ | 51220/371472 [4:05:13<25:01:19, 3.56it/s] 14%|█▍ | 51221/371472 [4:05:14<25:22:37, 3.51it/s] 14%|█▍ | 51222/371472 [4:05:14<25:02:14, 3.55it/s] 14%|█▍ | 51223/371472 [4:05:14<25:06:23, 3.54it/s] 14%|█▍ | 51224/371472 [4:05:14<23:51:29, 3.73it/s] 14%|█▍ | 51225/371472 [4:05:15<23:37:17, 3.77it/s] 14%|█▍ | 51226/371472 [4:05:15<23:54:24, 3.72it/s] 14%|█▍ | 51227/371472 [4:05:15<23:40:59, 3.76it/s] 14%|█▍ | 51228/371472 [4:05:15<24:45:36, 3.59it/s] 14%|█▍ | 51229/371472 [4:05:16<24:00:57, 3.70it/s] 14%|█▍ | 51230/371472 [4:05:16<24:11:11, 3.68it/s] 14%|█▍ | 51231/371472 [4:05:16<24:49:50, 3.58it/s] 14%|█▍ | 51232/371472 [4:05:17<24:14:43, 3.67it/s] 14%|█▍ | 51233/371472 [4:05:17<23:44:26, 3.75it/s] 14%|█▍ | 51234/371472 [4:05:17<23:11:07, 3.84it/s] 14%|█▍ | 51235/371472 [4:05:17<24:26:22, 3.64it/s] 14%|█▍ | 51236/371472 [4:05:18<24:01:00, 3.70it/s] 14%|█▍ | 51237/371472 [4:05:18<24:16:57, 3.66it/s] 14%|█▍ | 51238/371472 [4:05:18<23:59:22, 3.71it/s] 14%|█▍ | 51239/371472 [4:05:18<24:01:14, 3.70it/s] 14%|█▍ | 51240/371472 [4:05:19<23:24:04, 3.80it/s] {'loss': 4.594, 'learning_rate': 8.762739985778621e-07, 'epoch': 2.21} + 14%|█▍ | 51240/371472 [4:05:19<23:24:04, 3.80it/s] 14%|█▍ | 51241/371472 [4:05:19<24:28:49, 3.63it/s] 14%|█▍ | 51242/371472 [4:05:19<24:18:34, 3.66it/s] 14%|█▍ | 51243/371472 [4:05:20<25:24:36, 3.50it/s] 14%|█▍ | 51244/371472 [4:05:20<25:00:05, 3.56it/s] 14%|█▍ | 51245/371472 [4:05:20<25:33:58, 3.48it/s] 14%|█▍ | 51246/371472 [4:05:20<24:56:28, 3.57it/s] 14%|█▍ | 51247/371472 [4:05:21<26:43:29, 3.33it/s] 14%|█▍ | 51248/371472 [4:05:21<26:20:04, 3.38it/s] 14%|█▍ | 51249/371472 [4:05:21<26:01:22, 3.42it/s] 14%|█▍ | 51250/371472 [4:05:22<26:53:11, 3.31it/s] 14%|█▍ | 51251/371472 [4:05:22<27:24:22, 3.25it/s] 14%|█▍ | 51252/371472 [4:05:22<26:18:42, 3.38it/s] 14%|█▍ | 51253/371472 [4:05:22<25:18:21, 3.51it/s] 14%|█▍ | 51254/371472 [4:05:23<26:17:16, 3.38it/s] 14%|█▍ | 51255/371472 [4:05:23<26:08:19, 3.40it/s] 14%|█▍ | 51256/371472 [4:05:23<27:10:09, 3.27it/s] 14%|█▍ | 51257/371472 [4:05:24<27:04:24, 3.29it/s] 14%|█▍ | 51258/371472 [4:05:24<28:59:52, 3.07it/s] 14%|█▍ | 51259/371472 [4:05:24<27:09:02, 3.28it/s] 14%|█▍ | 51260/371472 [4:05:25<26:13:50, 3.39it/s] {'loss': 4.1487, 'learning_rate': 8.762255166023832e-07, 'epoch': 2.21} + 14%|█▍ | 51260/371472 [4:05:25<26:13:50, 3.39it/s] 14%|█▍ | 51261/371472 [4:05:25<27:01:15, 3.29it/s] 14%|█▍ | 51262/371472 [4:05:25<25:43:57, 3.46it/s] 14%|█▍ | 51263/371472 [4:05:25<24:33:45, 3.62it/s] 14%|█▍ | 51264/371472 [4:05:26<26:06:13, 3.41it/s] 14%|█▍ | 51265/371472 [4:05:26<26:30:58, 3.35it/s] 14%|█▍ | 51266/371472 [4:05:26<26:36:42, 3.34it/s] 14%|█▍ | 51267/371472 [4:05:27<27:16:44, 3.26it/s] 14%|█▍ | 51268/371472 [4:05:27<25:34:15, 3.48it/s] 14%|█▍ | 51269/371472 [4:05:27<24:55:10, 3.57it/s] 14%|█▍ | 51270/371472 [4:05:28<25:09:48, 3.53it/s] 14%|█▍ | 51271/371472 [4:05:28<24:42:55, 3.60it/s] 14%|█▍ | 51272/371472 [4:05:28<26:53:32, 3.31it/s] 14%|█▍ | 51273/371472 [4:05:28<25:54:38, 3.43it/s] 14%|█▍ | 51274/371472 [4:05:29<27:24:54, 3.24it/s] 14%|█▍ | 51275/371472 [4:05:29<26:47:08, 3.32it/s] 14%|█▍ | 51276/371472 [4:05:29<25:44:00, 3.46it/s] 14%|█▍ | 51277/371472 [4:05:30<25:13:27, 3.53it/s] 14%|█▍ | 51278/371472 [4:05:30<26:38:11, 3.34it/s] 14%|█▍ | 51279/371472 [4:05:30<25:44:12, 3.46it/s] 14%|█▍ | 51280/371472 [4:05:30<24:39:16, 3.61it/s] {'loss': 4.1725, 'learning_rate': 8.761770346269042e-07, 'epoch': 2.21} + 14%|█▍ | 51280/371472 [4:05:30<24:39:16, 3.61it/s] 14%|█▍ | 51281/371472 [4:05:31<26:03:21, 3.41it/s] 14%|█▍ | 51282/371472 [4:05:31<27:01:09, 3.29it/s] 14%|█▍ | 51283/371472 [4:05:31<25:29:43, 3.49it/s] 14%|█▍ | 51284/371472 [4:05:32<24:15:45, 3.67it/s] 14%|█▍ | 51285/371472 [4:05:32<24:09:35, 3.68it/s] 14%|█▍ | 51286/371472 [4:05:32<23:55:40, 3.72it/s] 14%|█▍ | 51287/371472 [4:05:33<28:36:14, 3.11it/s] 14%|█▍ | 51288/371472 [4:05:33<27:08:33, 3.28it/s] 14%|█▍ | 51289/371472 [4:05:33<25:26:46, 3.50it/s] 14%|█▍ | 51290/371472 [4:05:33<25:34:53, 3.48it/s] 14%|█▍ | 51291/371472 [4:05:34<26:26:13, 3.36it/s] 14%|█▍ | 51292/371472 [4:05:34<35:58:16, 2.47it/s] 14%|█▍ | 51293/371472 [4:05:35<33:20:33, 2.67it/s] 14%|█▍ | 51294/371472 [4:05:35<29:46:28, 2.99it/s] 14%|█▍ | 51295/371472 [4:05:35<27:36:00, 3.22it/s] 14%|█▍ | 51296/371472 [4:05:35<26:22:48, 3.37it/s] 14%|█▍ | 51297/371472 [4:05:36<26:09:05, 3.40it/s] 14%|█▍ | 51298/371472 [4:05:36<25:37:29, 3.47it/s] 14%|█▍ | 51299/371472 [4:05:36<25:04:30, 3.55it/s] 14%|█▍ | 51300/371472 [4:05:36<24:41:46, 3.60it/s] {'loss': 4.1257, 'learning_rate': 8.761285526514253e-07, 'epoch': 2.21} + 14%|█▍ | 51300/371472 [4:05:36<24:41:46, 3.60it/s] 14%|█▍ | 51301/371472 [4:05:37<24:10:53, 3.68it/s] 14%|█▍ | 51302/371472 [4:05:37<24:04:05, 3.70it/s] 14%|█▍ | 51303/371472 [4:05:37<25:06:49, 3.54it/s] 14%|█▍ | 51304/371472 [4:05:38<24:11:44, 3.68it/s] 14%|█▍ | 51305/371472 [4:05:38<23:28:52, 3.79it/s] 14%|█▍ | 51306/371472 [4:05:38<22:59:19, 3.87it/s] 14%|█▍ | 51307/371472 [4:05:38<24:05:07, 3.69it/s] 14%|█▍ | 51308/371472 [4:05:39<23:59:47, 3.71it/s] 14%|█▍ | 51309/371472 [4:05:39<23:28:09, 3.79it/s] 14%|█▍ | 51310/371472 [4:05:39<22:53:46, 3.88it/s] 14%|█▍ | 51311/371472 [4:05:39<22:29:43, 3.95it/s] 14%|█▍ | 51312/371472 [4:05:40<22:56:49, 3.88it/s] 14%|█▍ | 51313/371472 [4:05:40<23:38:24, 3.76it/s] 14%|█▍ | 51314/371472 [4:05:40<23:23:12, 3.80it/s] 14%|█▍ | 51315/371472 [4:05:40<23:36:28, 3.77it/s] 14%|█▍ | 51316/371472 [4:05:41<25:40:22, 3.46it/s] 14%|█▍ | 51317/371472 [4:05:41<25:26:11, 3.50it/s] 14%|█▍ | 51318/371472 [4:05:41<26:46:53, 3.32it/s] 14%|█▍ | 51319/371472 [4:05:42<25:34:32, 3.48it/s] 14%|█▍ | 51320/371472 [4:05:42<24:28:22, 3.63it/s] {'loss': 4.2533, 'learning_rate': 8.760800706759464e-07, 'epoch': 2.21} + 14%|█▍ | 51320/371472 [4:05:42<24:28:22, 3.63it/s] 14%|█▍ | 51321/371472 [4:05:42<26:32:27, 3.35it/s] 14%|█▍ | 51322/371472 [4:05:43<25:24:56, 3.50it/s] 14%|█▍ | 51323/371472 [4:05:43<25:04:15, 3.55it/s] 14%|█▍ | 51324/371472 [4:05:43<25:02:41, 3.55it/s] 14%|█▍ | 51325/371472 [4:05:43<24:17:04, 3.66it/s] 14%|█▍ | 51326/371472 [4:05:44<23:50:32, 3.73it/s] 14%|█▍ | 51327/371472 [4:05:44<24:34:19, 3.62it/s] 14%|█▍ | 51328/371472 [4:05:44<24:09:07, 3.68it/s] 14%|█▍ | 51329/371472 [4:05:45<27:59:01, 3.18it/s] 14%|█▍ | 51330/371472 [4:05:45<26:36:02, 3.34it/s] 14%|█▍ | 51331/371472 [4:05:45<30:41:15, 2.90it/s] 14%|█▍ | 51332/371472 [4:05:46<29:34:05, 3.01it/s] 14%|█▍ | 51333/371472 [4:05:46<29:00:12, 3.07it/s] 14%|█▍ | 51334/371472 [4:05:46<27:39:08, 3.22it/s] 14%|█▍ | 51335/371472 [4:05:46<25:59:29, 3.42it/s] 14%|█▍ | 51336/371472 [4:05:47<24:43:36, 3.60it/s] 14%|█▍ | 51337/371472 [4:05:47<24:09:25, 3.68it/s] 14%|█▍ | 51338/371472 [4:05:47<23:24:11, 3.80it/s] 14%|█▍ | 51339/371472 [4:05:47<23:07:32, 3.85it/s] 14%|█▍ | 51340/371472 [4:05:48<24:04:02, 3.69it/s] {'loss': 4.1158, 'learning_rate': 8.760315887004675e-07, 'epoch': 2.21} + 14%|█▍ | 51340/371472 [4:05:48<24:04:02, 3.69it/s] 14%|█▍ | 51341/371472 [4:05:48<23:50:54, 3.73it/s] 14%|█▍ | 51342/371472 [4:05:48<23:59:34, 3.71it/s] 14%|█▍ | 51343/371472 [4:05:48<23:26:47, 3.79it/s] 14%|█▍ | 51344/371472 [4:05:49<23:36:38, 3.77it/s] 14%|█▍ | 51345/371472 [4:05:49<24:03:31, 3.70it/s] 14%|█▍ | 51346/371472 [4:05:49<23:46:50, 3.74it/s] 14%|█▍ | 51347/371472 [4:05:50<23:11:04, 3.84it/s] 14%|█▍ | 51348/371472 [4:05:50<25:51:35, 3.44it/s] 14%|█▍ | 51349/371472 [4:05:50<24:48:25, 3.58it/s] 14%|█▍ | 51350/371472 [4:05:50<25:42:56, 3.46it/s] 14%|█▍ | 51351/371472 [4:05:51<26:02:53, 3.41it/s] 14%|█▍ | 51352/371472 [4:05:51<24:54:29, 3.57it/s] 14%|█▍ | 51353/371472 [4:05:51<24:06:57, 3.69it/s] 14%|█▍ | 51354/371472 [4:05:52<23:58:36, 3.71it/s] 14%|█▍ | 51355/371472 [4:05:52<24:25:03, 3.64it/s] 14%|█▍ | 51356/371472 [4:05:52<23:33:22, 3.77it/s] 14%|█▍ | 51357/371472 [4:05:52<23:05:58, 3.85it/s] 14%|█▍ | 51358/371472 [4:05:53<23:39:35, 3.76it/s] 14%|█▍ | 51359/371472 [4:05:53<22:57:35, 3.87it/s] 14%|█▍ | 51360/371472 [4:05:53<22:56:54, 3.87it/s] {'loss': 4.4128, 'learning_rate': 8.759831067249887e-07, 'epoch': 2.21} + 14%|█▍ | 51360/371472 [4:05:53<22:56:54, 3.87it/s] 14%|█▍ | 51361/371472 [4:05:53<22:40:15, 3.92it/s] 14%|█▍ | 51362/371472 [4:05:54<22:32:45, 3.94it/s] 14%|█▍ | 51363/371472 [4:05:54<23:44:40, 3.74it/s] 14%|█▍ | 51364/371472 [4:05:54<23:32:27, 3.78it/s] 14%|█▍ | 51365/371472 [4:05:54<25:21:42, 3.51it/s] 14%|█▍ | 51366/371472 [4:05:55<26:37:11, 3.34it/s] 14%|█▍ | 51367/371472 [4:05:55<25:14:19, 3.52it/s] 14%|█▍ | 51368/371472 [4:05:55<24:16:14, 3.66it/s] 14%|█▍ | 51369/371472 [4:05:56<24:40:27, 3.60it/s] 14%|█▍ | 51370/371472 [4:05:56<24:00:22, 3.70it/s] 14%|█▍ | 51371/371472 [4:05:56<24:17:08, 3.66it/s] 14%|█▍ | 51372/371472 [4:05:56<25:29:48, 3.49it/s] 14%|█▍ | 51373/371472 [4:05:57<24:52:15, 3.58it/s] 14%|█▍ | 51374/371472 [4:05:57<24:38:09, 3.61it/s] 14%|█▍ | 51375/371472 [4:05:57<25:46:51, 3.45it/s] 14%|█▍ | 51376/371472 [4:05:58<25:14:38, 3.52it/s] 14%|█▍ | 51377/371472 [4:05:58<26:03:15, 3.41it/s] 14%|█▍ | 51378/371472 [4:05:58<26:07:29, 3.40it/s] 14%|█▍ | 51379/371472 [4:05:58<24:57:17, 3.56it/s] 14%|█▍ | 51380/371472 [4:05:59<25:36:44, 3.47it/s] {'loss': 4.2981, 'learning_rate': 8.759346247495098e-07, 'epoch': 2.21} + 14%|█▍ | 51380/371472 [4:05:59<25:36:44, 3.47it/s] 14%|█▍ | 51381/371472 [4:05:59<26:21:15, 3.37it/s] 14%|█▍ | 51382/371472 [4:05:59<25:54:00, 3.43it/s] 14%|█▍ | 51383/371472 [4:06:00<25:01:31, 3.55it/s] 14%|█▍ | 51384/371472 [4:06:00<24:32:59, 3.62it/s] 14%|█▍ | 51385/371472 [4:06:00<25:13:38, 3.52it/s] 14%|█▍ | 51386/371472 [4:06:00<24:28:57, 3.63it/s] 14%|█▍ | 51387/371472 [4:06:01<24:10:11, 3.68it/s] 14%|█▍ | 51388/371472 [4:06:01<24:39:15, 3.61it/s] 14%|█▍ | 51389/371472 [4:06:01<25:33:08, 3.48it/s] 14%|█▍ | 51390/371472 [4:06:02<24:07:40, 3.69it/s] 14%|█▍ | 51391/371472 [4:06:02<23:40:50, 3.75it/s] 14%|█▍ | 51392/371472 [4:06:02<23:16:33, 3.82it/s] 14%|█▍ | 51393/371472 [4:06:02<23:28:23, 3.79it/s] 14%|█▍ | 51394/371472 [4:06:03<23:21:05, 3.81it/s] 14%|█▍ | 51395/371472 [4:06:03<24:01:53, 3.70it/s] 14%|█▍ | 51396/371472 [4:06:03<25:48:29, 3.45it/s] 14%|█▍ | 51397/371472 [4:06:03<25:08:28, 3.54it/s] 14%|█▍ | 51398/371472 [4:06:04<23:47:31, 3.74it/s] 14%|█▍ | 51399/371472 [4:06:04<23:25:02, 3.80it/s] 14%|█▍ | 51400/371472 [4:06:04<25:19:55, 3.51it/s] {'loss': 4.2351, 'learning_rate': 8.758861427740309e-07, 'epoch': 2.21} + 14%|█▍ | 51400/371472 [4:06:04<25:19:55, 3.51it/s] 14%|█▍ | 51401/371472 [4:06:05<25:28:10, 3.49it/s] 14%|█▍ | 51402/371472 [4:06:05<26:34:15, 3.35it/s] 14%|█▍ | 51403/371472 [4:06:05<26:07:17, 3.40it/s] 14%|█▍ | 51404/371472 [4:06:05<26:52:18, 3.31it/s] 14%|█▍ | 51405/371472 [4:06:06<26:07:14, 3.40it/s] 14%|█▍ | 51406/371472 [4:06:06<26:15:18, 3.39it/s] 14%|█▍ | 51407/371472 [4:06:06<24:42:30, 3.60it/s] 14%|█▍ | 51408/371472 [4:06:07<25:25:45, 3.50it/s] 14%|█▍ | 51409/371472 [4:06:07<24:18:36, 3.66it/s] 14%|█▍ | 51410/371472 [4:06:07<23:40:12, 3.76it/s] 14%|█▍ | 51411/371472 [4:06:07<23:43:27, 3.75it/s] 14%|█▍ | 51412/371472 [4:06:08<24:18:10, 3.66it/s] 14%|█▍ | 51413/371472 [4:06:08<24:57:45, 3.56it/s] 14%|█▍ | 51414/371472 [4:06:08<25:06:55, 3.54it/s] 14%|█▍ | 51415/371472 [4:06:08<24:06:18, 3.69it/s] 14%|█▍ | 51416/371472 [4:06:09<23:16:05, 3.82it/s] 14%|█▍ | 51417/371472 [4:06:09<22:52:29, 3.89it/s] 14%|█▍ | 51418/371472 [4:06:09<23:14:53, 3.82it/s] 14%|█▍ | 51419/371472 [4:06:10<26:20:11, 3.38it/s] 14%|█▍ | 51420/371472 [4:06:10<26:33:06, 3.35it/s] {'loss': 4.1967, 'learning_rate': 8.758376607985519e-07, 'epoch': 2.21} + 14%|█▍ | 51420/371472 [4:06:10<26:33:06, 3.35it/s] 14%|█▍ | 51421/371472 [4:06:10<25:37:59, 3.47it/s] 14%|█▍ | 51422/371472 [4:06:10<25:16:43, 3.52it/s] 14%|█▍ | 51423/371472 [4:06:11<27:03:11, 3.29it/s] 14%|█▍ | 51424/371472 [4:06:11<25:01:01, 3.55it/s] 14%|█▍ | 51425/371472 [4:06:11<24:31:25, 3.63it/s] 14%|█▍ | 51426/371472 [4:06:12<25:56:25, 3.43it/s] 14%|█▍ | 51427/371472 [4:06:12<27:45:28, 3.20it/s] 14%|█▍ | 51428/371472 [4:06:12<28:37:15, 3.11it/s] 14%|█▍ | 51429/371472 [4:06:13<26:23:00, 3.37it/s] 14%|█▍ | 51430/371472 [4:06:13<27:48:04, 3.20it/s] 14%|█▍ | 51431/371472 [4:06:13<26:28:34, 3.36it/s] 14%|█▍ | 51432/371472 [4:06:13<24:54:40, 3.57it/s] 14%|█▍ | 51433/371472 [4:06:14<25:34:33, 3.48it/s] 14%|█▍ | 51434/371472 [4:06:14<24:06:40, 3.69it/s] 14%|█▍ | 51435/371472 [4:06:14<24:47:15, 3.59it/s] 14%|█▍ | 51436/371472 [4:06:15<24:22:25, 3.65it/s] 14%|█▍ | 51437/371472 [4:06:15<24:46:50, 3.59it/s] 14%|█▍ | 51438/371472 [4:06:15<24:19:24, 3.65it/s] 14%|█▍ | 51439/371472 [4:06:15<24:59:51, 3.56it/s] 14%|█▍ | 51440/371472 [4:06:16<25:26:49, 3.49it/s] {'loss': 4.1482, 'learning_rate': 8.757891788230731e-07, 'epoch': 2.22} + 14%|█▍ | 51440/371472 [4:06:16<25:26:49, 3.49it/s] 14%|█▍ | 51441/371472 [4:06:16<26:12:55, 3.39it/s] 14%|█▍ | 51442/371472 [4:06:16<25:54:11, 3.43it/s] 14%|█▍ | 51443/371472 [4:06:17<25:22:54, 3.50it/s] 14%|█▍ | 51444/371472 [4:06:17<25:34:06, 3.48it/s] 14%|█▍ | 51445/371472 [4:06:17<25:15:44, 3.52it/s] 14%|█▍ | 51446/371472 [4:06:17<25:06:41, 3.54it/s] 14%|█▍ | 51447/371472 [4:06:18<24:40:53, 3.60it/s] 14%|█▍ | 51448/371472 [4:06:18<24:52:22, 3.57it/s] 14%|█▍ | 51449/371472 [4:06:18<25:28:15, 3.49it/s] 14%|█▍ | 51450/371472 [4:06:19<25:20:28, 3.51it/s] 14%|█▍ | 51451/371472 [4:06:19<25:06:08, 3.54it/s] 14%|█▍ | 51452/371472 [4:06:19<25:11:51, 3.53it/s] 14%|█▍ | 51453/371472 [4:06:19<24:14:29, 3.67it/s] 14%|█▍ | 51454/371472 [4:06:20<25:22:31, 3.50it/s] 14%|█▍ | 51455/371472 [4:06:20<25:24:30, 3.50it/s] 14%|█▍ | 51456/371472 [4:06:20<24:36:17, 3.61it/s] 14%|█▍ | 51457/371472 [4:06:20<24:10:55, 3.68it/s] 14%|█▍ | 51458/371472 [4:06:21<25:35:33, 3.47it/s] 14%|█▍ | 51459/371472 [4:06:21<27:18:21, 3.26it/s] 14%|█▍ | 51460/371472 [4:06:21<25:43:47, 3.45it/s] {'loss': 4.2317, 'learning_rate': 8.757406968475942e-07, 'epoch': 2.22} + 14%|█▍ | 51460/371472 [4:06:21<25:43:47, 3.45it/s] 14%|█▍ | 51461/371472 [4:06:22<26:00:45, 3.42it/s] 14%|█▍ | 51462/371472 [4:06:22<26:07:06, 3.40it/s] 14%|���▍ | 51463/371472 [4:06:22<27:05:43, 3.28it/s] 14%|█▍ | 51464/371472 [4:06:23<25:28:08, 3.49it/s] 14%|█▍ | 51465/371472 [4:06:23<25:36:43, 3.47it/s] 14%|█▍ | 51466/371472 [4:06:23<24:53:25, 3.57it/s] 14%|█▍ | 51467/371472 [4:06:23<24:04:44, 3.69it/s] 14%|█▍ | 51468/371472 [4:06:24<24:35:17, 3.62it/s] 14%|█▍ | 51469/371472 [4:06:24<23:47:06, 3.74it/s] 14%|█▍ | 51470/371472 [4:06:24<23:24:17, 3.80it/s] 14%|█▍ | 51471/371472 [4:06:24<24:09:01, 3.68it/s] 14%|█▍ | 51472/371472 [4:06:25<24:38:39, 3.61it/s] 14%|█▍ | 51473/371472 [4:06:25<29:34:47, 3.01it/s] 14%|█▍ | 51474/371472 [4:06:25<29:11:15, 3.05it/s] 14%|█▍ | 51475/371472 [4:06:26<27:57:54, 3.18it/s] 14%|█▍ | 51476/371472 [4:06:26<28:47:49, 3.09it/s] 14%|█▍ | 51477/371472 [4:06:26<27:44:29, 3.20it/s] 14%|█▍ | 51478/371472 [4:06:27<26:11:15, 3.39it/s] 14%|█▍ | 51479/371472 [4:06:27<26:28:30, 3.36it/s] 14%|█▍ | 51480/371472 [4:06:27<25:51:40, 3.44it/s] {'loss': 4.2784, 'learning_rate': 8.756922148721152e-07, 'epoch': 2.22} + 14%|█▍ | 51480/371472 [4:06:27<25:51:40, 3.44it/s] 14%|█▍ | 51481/371472 [4:06:28<26:19:55, 3.38it/s] 14%|█▍ | 51482/371472 [4:06:28<29:23:51, 3.02it/s] 14%|█▍ | 51483/371472 [4:06:28<28:43:05, 3.10it/s] 14%|█▍ | 51484/371472 [4:06:29<26:55:22, 3.30it/s] 14%|█▍ | 51485/371472 [4:06:29<28:45:15, 3.09it/s] 14%|█▍ | 51486/371472 [4:06:29<28:11:37, 3.15it/s] 14%|█▍ | 51487/371472 [4:06:29<26:38:55, 3.34it/s] 14%|█▍ | 51488/371472 [4:06:30<26:27:46, 3.36it/s] 14%|█▍ | 51489/371472 [4:06:30<25:35:01, 3.47it/s] 14%|█▍ | 51490/371472 [4:06:30<25:53:28, 3.43it/s] 14%|█▍ | 51491/371472 [4:06:31<27:33:56, 3.22it/s] 14%|█▍ | 51492/371472 [4:06:31<26:09:37, 3.40it/s] 14%|█▍ | 51493/371472 [4:06:31<25:04:01, 3.55it/s] 14%|█▍ | 51494/371472 [4:06:31<25:04:07, 3.55it/s] 14%|█▍ | 51495/371472 [4:06:32<24:23:56, 3.64it/s] 14%|█▍ | 51496/371472 [4:06:32<24:06:28, 3.69it/s] 14%|█▍ | 51497/371472 [4:06:32<24:14:22, 3.67it/s] 14%|█▍ | 51498/371472 [4:06:33<26:55:51, 3.30it/s] 14%|█▍ | 51499/371472 [4:06:33<27:17:16, 3.26it/s] 14%|█▍ | 51500/371472 [4:06:33<28:01:11, 3.17it/s] {'loss': 4.3519, 'learning_rate': 8.756437328966364e-07, 'epoch': 2.22} + 14%|█▍ | 51500/371472 [4:06:33<28:01:11, 3.17it/s] 14%|█▍ | 51501/371472 [4:06:34<26:03:19, 3.41it/s] 14%|█▍ | 51502/371472 [4:06:34<25:06:09, 3.54it/s] 14%|█▍ | 51503/371472 [4:06:34<25:12:03, 3.53it/s] 14%|█▍ | 51504/371472 [4:06:34<24:17:24, 3.66it/s] 14%|█▍ | 51505/371472 [4:06:35<24:59:43, 3.56it/s] 14%|█▍ | 51506/371472 [4:06:35<25:24:55, 3.50it/s] 14%|█▍ | 51507/371472 [4:06:35<26:16:04, 3.38it/s] 14%|█▍ | 51508/371472 [4:06:35<25:01:13, 3.55it/s] 14%|█▍ | 51509/371472 [4:06:36<24:27:53, 3.63it/s] 14%|█▍ | 51510/371472 [4:06:36<23:34:47, 3.77it/s] 14%|█▍ | 51511/371472 [4:06:36<23:40:31, 3.75it/s] 14%|█▍ | 51512/371472 [4:06:37<25:22:09, 3.50it/s] 14%|█▍ | 51513/371472 [4:06:37<25:50:37, 3.44it/s] 14%|█▍ | 51514/371472 [4:06:37<24:38:55, 3.61it/s] 14%|█▍ | 51515/371472 [4:06:37<25:01:22, 3.55it/s] 14%|█▍ | 51516/371472 [4:06:38<24:58:49, 3.56it/s] 14%|█▍ | 51517/371472 [4:06:38<26:00:21, 3.42it/s] 14%|█▍ | 51518/371472 [4:06:38<25:35:40, 3.47it/s] 14%|█▍ | 51519/371472 [4:06:39<24:31:47, 3.62it/s] 14%|█▍ | 51520/371472 [4:06:39<25:08:05, 3.54it/s] {'loss': 4.3672, 'learning_rate': 8.755952509211575e-07, 'epoch': 2.22} + 14%|█▍ | 51520/371472 [4:06:39<25:08:05, 3.54it/s] 14%|█▍ | 51521/371472 [4:06:39<25:43:45, 3.45it/s] 14%|█▍ | 51522/371472 [4:06:39<25:37:31, 3.47it/s] 14%|█▍ | 51523/371472 [4:06:40<25:06:40, 3.54it/s] 14%|█▍ | 51524/371472 [4:06:40<25:01:34, 3.55it/s] 14%|█▍ | 51525/371472 [4:06:40<25:00:07, 3.55it/s] 14%|█▍ | 51526/371472 [4:06:41<26:57:34, 3.30it/s] 14%|█▍ | 51527/371472 [4:06:41<25:21:05, 3.51it/s] 14%|█▍ | 51528/371472 [4:06:41<25:45:59, 3.45it/s] 14%|█▍ | 51529/371472 [4:06:41<25:56:31, 3.43it/s] 14%|█▍ | 51530/371472 [4:06:42<25:04:12, 3.54it/s] 14%|█▍ | 51531/371472 [4:06:42<24:38:38, 3.61it/s] 14%|█▍ | 51532/371472 [4:06:42<24:54:56, 3.57it/s] 14%|█▍ | 51533/371472 [4:06:43<24:38:32, 3.61it/s] 14%|█▍ | 51534/371472 [4:06:43<23:58:43, 3.71it/s] 14%|█▍ | 51535/371472 [4:06:43<23:21:48, 3.80it/s] 14%|█▍ | 51536/371472 [4:06:43<25:02:04, 3.55it/s] 14%|█▍ | 51537/371472 [4:06:44<25:25:22, 3.50it/s] 14%|█▍ | 51538/371472 [4:06:44<25:08:30, 3.53it/s] 14%|█▍ | 51539/371472 [4:06:44<25:27:11, 3.49it/s] 14%|█▍ | 51540/371472 [4:06:45<25:23:45, 3.50it/s] {'loss': 4.1288, 'learning_rate': 8.755467689456786e-07, 'epoch': 2.22} + 14%|█▍ | 51540/371472 [4:06:45<25:23:45, 3.50it/s] 14%|█▍ | 51541/371472 [4:06:45<25:23:16, 3.50it/s] 14%|█▍ | 51542/371472 [4:06:45<27:48:31, 3.20it/s] 14%|█▍ | 51543/371472 [4:06:45<27:25:12, 3.24it/s] 14%|█▍ | 51544/371472 [4:06:46<26:14:13, 3.39it/s] 14%|█▍ | 51545/371472 [4:06:46<25:48:18, 3.44it/s] 14%|█▍ | 51546/371472 [4:06:46<25:51:32, 3.44it/s] 14%|█▍ | 51547/371472 [4:06:47<26:06:38, 3.40it/s] 14%|█▍ | 51548/371472 [4:06:47<26:26:09, 3.36it/s] 14%|█▍ | 51549/371472 [4:06:47<25:44:43, 3.45it/s] 14%|█▍ | 51550/371472 [4:06:47<25:13:47, 3.52it/s] 14%|█▍ | 51551/371472 [4:06:48<24:42:10, 3.60it/s] 14%|█▍ | 51552/371472 [4:06:48<24:24:31, 3.64it/s] 14%|█▍ | 51553/371472 [4:06:48<24:55:55, 3.56it/s] 14%|█▍ | 51554/371472 [4:06:49<24:44:57, 3.59it/s] 14%|█▍ | 51555/371472 [4:06:49<24:23:18, 3.64it/s] 14%|█▍ | 51556/371472 [4:06:49<24:04:56, 3.69it/s] 14%|█▍ | 51557/371472 [4:06:49<24:10:26, 3.68it/s] 14%|█▍ | 51558/371472 [4:06:50<25:08:16, 3.54it/s] 14%|█▍ | 51559/371472 [4:06:50<25:13:58, 3.52it/s] 14%|█▍ | 51560/371472 [4:06:50<24:40:12, 3.60it/s] {'loss': 4.4207, 'learning_rate': 8.754982869701996e-07, 'epoch': 2.22} + 14%|█▍ | 51560/371472 [4:06:50<24:40:12, 3.60it/s] 14%|█▍ | 51561/371472 [4:06:50<24:17:46, 3.66it/s] 14%|█▍ | 51562/371472 [4:06:51<24:25:08, 3.64it/s] 14%|█▍ | 51563/371472 [4:06:51<24:30:28, 3.63it/s] 14%|█▍ | 51564/371472 [4:06:51<24:18:10, 3.66it/s] 14%|█▍ | 51565/371472 [4:06:52<23:31:17, 3.78it/s] 14%|█▍ | 51566/371472 [4:06:52<24:17:59, 3.66it/s] 14%|█▍ | 51567/371472 [4:06:52<23:52:40, 3.72it/s] 14%|█▍ | 51568/371472 [4:06:52<23:19:57, 3.81it/s] 14%|█▍ | 51569/371472 [4:06:53<24:21:38, 3.65it/s] 14%|█▍ | 51570/371472 [4:06:53<23:48:38, 3.73it/s] 14%|█▍ | 51571/371472 [4:06:53<23:25:11, 3.79it/s] 14%|█▍ | 51572/371472 [4:06:54<27:36:04, 3.22it/s] 14%|█▍ | 51573/371472 [4:06:54<26:27:20, 3.36it/s] 14%|█▍ | 51574/371472 [4:06:54<27:03:06, 3.28it/s] 14%|█▍ | 51575/371472 [4:06:54<26:10:09, 3.40it/s] 14%|█▍ | 51576/371472 [4:06:55<25:57:18, 3.42it/s] 14%|█▍ | 51577/371472 [4:06:55<25:54:03, 3.43it/s] 14%|█▍ | 51578/371472 [4:06:55<26:04:39, 3.41it/s] 14%|█▍ | 51579/371472 [4:06:56<24:38:05, 3.61it/s] 14%|█▍ | 51580/371472 [4:06:56<23:23:06, 3.80it/s] {'loss': 4.5234, 'learning_rate': 8.754498049947208e-07, 'epoch': 2.22} + 14%|█▍ | 51580/371472 [4:06:56<23:23:06, 3.80it/s] 14%|█▍ | 51581/371472 [4:06:56<24:47:06, 3.59it/s] 14%|█▍ | 51582/371472 [4:06:56<23:43:04, 3.75it/s] 14%|█▍ | 51583/371472 [4:06:57<23:57:24, 3.71it/s] 14%|█▍ | 51584/371472 [4:06:57<25:50:30, 3.44it/s] 14%|█▍ | 51585/371472 [4:06:57<24:40:13, 3.60it/s] 14%|█▍ | 51586/371472 [4:06:58<26:37:35, 3.34it/s] 14%|█▍ | 51587/371472 [4:06:58<25:35:42, 3.47it/s] 14%|█▍ | 51588/371472 [4:06:58<24:28:37, 3.63it/s] 14%|█▍ | 51589/371472 [4:06:58<24:27:05, 3.63it/s] 14%|█▍ | 51590/371472 [4:06:59<23:31:46, 3.78it/s] 14%|█▍ | 51591/371472 [4:06:59<24:24:25, 3.64it/s] 14%|█▍ | 51592/371472 [4:06:59<26:26:08, 3.36it/s] 14%|█▍ | 51593/371472 [4:07:00<30:53:11, 2.88it/s] 14%|█▍ | 51594/371472 [4:07:00<30:07:43, 2.95it/s] 14%|█▍ | 51595/371472 [4:07:00<28:00:22, 3.17it/s] 14%|█▍ | 51596/371472 [4:07:01<26:24:17, 3.37it/s] 14%|█▍ | 51597/371472 [4:07:01<25:54:21, 3.43it/s] 14%|█▍ | 51598/371472 [4:07:01<24:55:33, 3.56it/s] 14%|█▍ | 51599/371472 [4:07:01<24:30:40, 3.63it/s] 14%|█▍ | 51600/371472 [4:07:02<24:19:51, 3.65it/s] {'loss': 4.2745, 'learning_rate': 8.754013230192419e-07, 'epoch': 2.22} + 14%|█▍ | 51600/371472 [4:07:02<24:19:51, 3.65it/s] 14%|█▍ | 51601/371472 [4:07:02<23:33:20, 3.77it/s] 14%|█▍ | 51602/371472 [4:07:02<24:05:49, 3.69it/s] 14%|█▍ | 51603/371472 [4:07:02<24:26:49, 3.63it/s] 14%|█▍ | 51604/371472 [4:07:03<24:38:52, 3.60it/s] 14%|█▍ | 51605/371472 [4:07:03<24:30:38, 3.63it/s] 14%|█▍ | 51606/371472 [4:07:03<24:41:02, 3.60it/s] 14%|█▍ | 51607/371472 [4:07:04<24:51:27, 3.57it/s] 14%|█▍ | 51608/371472 [4:07:04<25:03:55, 3.54it/s] 14%|█▍ | 51609/371472 [4:07:04<25:58:25, 3.42it/s] 14%|█▍ | 51610/371472 [4:07:04<27:41:15, 3.21it/s] 14%|█▍ | 51611/371472 [4:07:05<25:56:11, 3.43it/s] 14%|█▍ | 51612/371472 [4:07:05<24:48:51, 3.58it/s] 14%|█▍ | 51613/371472 [4:07:05<25:19:36, 3.51it/s] 14%|█▍ | 51614/371472 [4:07:06<23:50:22, 3.73it/s] 14%|█▍ | 51615/371472 [4:07:06<24:11:45, 3.67it/s] 14%|█▍ | 51616/371472 [4:07:06<25:04:48, 3.54it/s] 14%|█▍ | 51617/371472 [4:07:06<25:07:31, 3.54it/s] 14%|█▍ | 51618/371472 [4:07:07<25:26:20, 3.49it/s] 14%|█▍ | 51619/371472 [4:07:07<24:38:46, 3.60it/s] 14%|█▍ | 51620/371472 [4:07:07<23:25:25, 3.79it/s] {'loss': 4.2628, 'learning_rate': 8.753528410437631e-07, 'epoch': 2.22} + 14%|█▍ | 51620/371472 [4:07:07<23:25:25, 3.79it/s] 14%|█▍ | 51621/371472 [4:07:07<23:56:15, 3.71it/s] 14%|█▍ | 51622/371472 [4:07:08<23:09:37, 3.84it/s] 14%|█▍ | 51623/371472 [4:07:08<23:33:59, 3.77it/s] 14%|█▍ | 51624/371472 [4:07:08<24:03:47, 3.69it/s] 14%|█▍ | 51625/371472 [4:07:09<24:42:49, 3.60it/s] 14%|█▍ | 51626/371472 [4:07:09<23:56:52, 3.71it/s] 14%|█▍ | 51627/371472 [4:07:09<27:08:53, 3.27it/s] 14%|█▍ | 51628/371472 [4:07:09<25:52:40, 3.43it/s] 14%|█▍ | 51629/371472 [4:07:10<25:02:08, 3.55it/s] 14%|█▍ | 51630/371472 [4:07:10<23:59:27, 3.70it/s] 14%|█▍ | 51631/371472 [4:07:10<24:41:23, 3.60it/s] 14%|█▍ | 51632/371472 [4:07:10<24:01:25, 3.70it/s] 14%|█▍ | 51633/371472 [4:07:11<23:25:25, 3.79it/s] 14%|█▍ | 51634/371472 [4:07:11<23:56:18, 3.71it/s] 14%|█▍ | 51635/371472 [4:07:11<23:55:51, 3.71it/s] 14%|█▍ | 51636/371472 [4:07:12<23:47:43, 3.73it/s] 14%|█▍ | 51637/371472 [4:07:12<25:41:28, 3.46it/s] 14%|█▍ | 51638/371472 [4:07:12<25:20:33, 3.51it/s] 14%|█▍ | 51639/371472 [4:07:12<24:55:11, 3.57it/s] 14%|█▍ | 51640/371472 [4:07:13<25:12:11, 3.53it/s] {'loss': 4.2123, 'learning_rate': 8.753043590682841e-07, 'epoch': 2.22} + 14%|█▍ | 51640/371472 [4:07:13<25:12:11, 3.53it/s] 14%|█▍ | 51641/371472 [4:07:13<26:30:21, 3.35it/s] 14%|█▍ | 51642/371472 [4:07:13<26:21:58, 3.37it/s] 14%|█▍ | 51643/371472 [4:07:14<25:32:05, 3.48it/s] 14%|█▍ | 51644/371472 [4:07:14<24:23:26, 3.64it/s] 14%|█▍ | 51645/371472 [4:07:14<23:52:15, 3.72it/s] 14%|█▍ | 51646/371472 [4:07:14<24:22:42, 3.64it/s] 14%|█▍ | 51647/371472 [4:07:15<25:11:50, 3.53it/s] 14%|█▍ | 51648/371472 [4:07:15<25:22:13, 3.50it/s] 14%|█▍ | 51649/371472 [4:07:15<24:56:42, 3.56it/s] 14%|█▍ | 51650/371472 [4:07:16<24:27:22, 3.63it/s] 14%|█▍ | 51651/371472 [4:07:16<24:35:47, 3.61it/s] 14%|█▍ | 51652/371472 [4:07:16<25:27:39, 3.49it/s] 14%|█▍ | 51653/371472 [4:07:16<24:43:35, 3.59it/s] 14%|█▍ | 51654/371472 [4:07:17<23:53:42, 3.72it/s] 14%|█▍ | 51655/371472 [4:07:17<24:26:31, 3.63it/s] 14%|█▍ | 51656/371472 [4:07:17<25:22:24, 3.50it/s] 14%|█▍ | 51657/371472 [4:07:17<24:15:04, 3.66it/s] 14%|█▍ | 51658/371472 [4:07:18<24:57:54, 3.56it/s] 14%|█▍ | 51659/371472 [4:07:18<24:07:32, 3.68it/s] 14%|█▍ | 51660/371472 [4:07:18<25:50:02, 3.44it/s] {'loss': 4.3077, 'learning_rate': 8.752558770928053e-07, 'epoch': 2.23} + 14%|█▍ | 51660/371472 [4:07:18<25:50:02, 3.44it/s] 14%|█▍ | 51661/371472 [4:07:19<25:23:11, 3.50it/s] 14%|█▍ | 51662/371472 [4:07:19<24:12:10, 3.67it/s] 14%|█▍ | 51663/371472 [4:07:19<24:23:54, 3.64it/s] 14%|█▍ | 51664/371472 [4:07:19<24:03:32, 3.69it/s] 14%|█▍ | 51665/371472 [4:07:20<24:02:18, 3.70it/s] 14%|█▍ | 51666/371472 [4:07:20<24:05:59, 3.69it/s] 14%|█▍ | 51667/371472 [4:07:20<24:02:18, 3.70it/s] 14%|█▍ | 51668/371472 [4:07:20<23:27:43, 3.79it/s] 14%|█▍ | 51669/371472 [4:07:21<24:05:47, 3.69it/s] 14%|█▍ | 51670/371472 [4:07:21<24:00:40, 3.70it/s] 14%|█▍ | 51671/371472 [4:07:21<24:56:04, 3.56it/s] 14%|█�� | 51672/371472 [4:07:22<24:20:18, 3.65it/s] 14%|█▍ | 51673/371472 [4:07:22<25:22:55, 3.50it/s] 14%|█▍ | 51674/371472 [4:07:22<24:07:59, 3.68it/s] 14%|█▍ | 51675/371472 [4:07:22<23:58:53, 3.70it/s] 14%|█▍ | 51676/371472 [4:07:23<23:40:33, 3.75it/s] 14%|█▍ | 51677/371472 [4:07:23<23:50:39, 3.73it/s] 14%|█▍ | 51678/371472 [4:07:23<25:06:11, 3.54it/s] 14%|█▍ | 51679/371472 [4:07:24<24:49:11, 3.58it/s] 14%|█▍ | 51680/371472 [4:07:24<25:23:52, 3.50it/s] {'loss': 4.245, 'learning_rate': 8.752073951173263e-07, 'epoch': 2.23} + 14%|█▍ | 51680/371472 [4:07:24<25:23:52, 3.50it/s] 14%|█▍ | 51681/371472 [4:07:24<26:37:49, 3.34it/s] 14%|█▍ | 51682/371472 [4:07:24<27:07:03, 3.28it/s] 14%|█▍ | 51683/371472 [4:07:25<27:15:39, 3.26it/s] 14%|█▍ | 51684/371472 [4:07:25<25:50:14, 3.44it/s] 14%|█▍ | 51685/371472 [4:07:25<25:03:38, 3.54it/s] 14%|█▍ | 51686/371472 [4:07:26<25:30:01, 3.48it/s] 14%|█▍ | 51687/371472 [4:07:26<24:39:54, 3.60it/s] 14%|█▍ | 51688/371472 [4:07:26<24:38:28, 3.60it/s] 14%|█▍ | 51689/371472 [4:07:26<23:39:58, 3.75it/s] 14%|█▍ | 51690/371472 [4:07:27<24:18:14, 3.65it/s] 14%|█▍ | 51691/371472 [4:07:27<23:47:46, 3.73it/s] 14%|█▍ | 51692/371472 [4:07:27<23:44:34, 3.74it/s] 14%|█▍ | 51693/371472 [4:07:27<23:11:59, 3.83it/s] 14%|█▍ | 51694/371472 [4:07:28<24:13:14, 3.67it/s] 14%|█▍ | 51695/371472 [4:07:28<23:43:23, 3.74it/s] 14%|█▍ | 51696/371472 [4:07:28<23:47:57, 3.73it/s] 14%|█▍ | 51697/371472 [4:07:29<24:42:28, 3.60it/s] 14%|█▍ | 51698/371472 [4:07:29<24:02:23, 3.69it/s] 14%|█▍ | 51699/371472 [4:07:29<23:06:24, 3.84it/s] 14%|█▍ | 51700/371472 [4:07:29<22:47:30, 3.90it/s] {'loss': 4.2597, 'learning_rate': 8.751589131418475e-07, 'epoch': 2.23} + 14%|█▍ | 51700/371472 [4:07:29<22:47:30, 3.90it/s] 14%|█▍ | 51701/371472 [4:07:30<22:59:30, 3.86it/s] 14%|█▍ | 51702/371472 [4:07:30<23:16:22, 3.82it/s] 14%|█▍ | 51703/371472 [4:07:30<23:05:36, 3.85it/s] 14%|█▍ | 51704/371472 [4:07:30<25:26:49, 3.49it/s] 14%|█▍ | 51705/371472 [4:07:31<26:30:13, 3.35it/s] 14%|█▍ | 51706/371472 [4:07:31<28:55:22, 3.07it/s] 14%|█▍ | 51707/371472 [4:07:31<27:03:11, 3.28it/s] 14%|█▍ | 51708/371472 [4:07:32<25:18:16, 3.51it/s] 14%|█▍ | 51709/371472 [4:07:32<24:01:13, 3.70it/s] 14%|█▍ | 51710/371472 [4:07:32<23:21:56, 3.80it/s] 14%|█▍ | 51711/371472 [4:07:32<23:36:35, 3.76it/s] 14%|█▍ | 51712/371472 [4:07:33<23:48:39, 3.73it/s] 14%|█▍ | 51713/371472 [4:07:33<24:42:48, 3.59it/s] 14%|█▍ | 51714/371472 [4:07:33<24:22:29, 3.64it/s] 14%|█▍ | 51715/371472 [4:07:33<23:32:59, 3.77it/s] 14%|█▍ | 51716/371472 [4:07:34<23:10:57, 3.83it/s] 14%|█▍ | 51717/371472 [4:07:34<23:08:11, 3.84it/s] 14%|█▍ | 51718/371472 [4:07:34<24:16:55, 3.66it/s] 14%|█▍ | 51719/371472 [4:07:35<23:51:56, 3.72it/s] 14%|█▍ | 51720/371472 [4:07:35<24:09:46, 3.68it/s] {'loss': 4.4867, 'learning_rate': 8.751104311663685e-07, 'epoch': 2.23} + 14%|█▍ | 51720/371472 [4:07:35<24:09:46, 3.68it/s] 14%|█▍ | 51721/371472 [4:07:35<24:46:10, 3.59it/s] 14%|█▍ | 51722/371472 [4:07:35<24:08:47, 3.68it/s] 14%|█▍ | 51723/371472 [4:07:36<23:52:31, 3.72it/s] 14%|█▍ | 51724/371472 [4:07:36<23:54:44, 3.71it/s] 14%|█▍ | 51725/371472 [4:07:36<23:15:06, 3.82it/s] 14%|█▍ | 51726/371472 [4:07:36<23:02:16, 3.86it/s] 14%|█▍ | 51727/371472 [4:07:37<23:27:10, 3.79it/s] 14%|█▍ | 51728/371472 [4:07:37<23:23:31, 3.80it/s] 14%|█▍ | 51729/371472 [4:07:37<23:04:00, 3.85it/s] 14%|█▍ | 51730/371472 [4:07:37<23:26:14, 3.79it/s] 14%|█▍ | 51731/371472 [4:07:38<24:35:24, 3.61it/s] 14%|█▍ | 51732/371472 [4:07:38<25:51:36, 3.43it/s] 14%|█▍ | 51733/371472 [4:07:38<25:25:52, 3.49it/s] 14%|█▍ | 51734/371472 [4:07:39<25:41:33, 3.46it/s] 14%|█▍ | 51735/371472 [4:07:39<25:02:32, 3.55it/s] 14%|█▍ | 51736/371472 [4:07:39<25:09:17, 3.53it/s] 14%|█▍ | 51737/371472 [4:07:39<24:33:19, 3.62it/s] 14%|█▍ | 51738/371472 [4:07:40<25:01:31, 3.55it/s] 14%|█▍ | 51739/371472 [4:07:40<26:18:25, 3.38it/s] 14%|█▍ | 51740/371472 [4:07:40<25:52:32, 3.43it/s] {'loss': 4.5026, 'learning_rate': 8.750619491908897e-07, 'epoch': 2.23} + 14%|█▍ | 51740/371472 [4:07:40<25:52:32, 3.43it/s] 14%|█▍ | 51741/371472 [4:07:41<26:12:23, 3.39it/s] 14%|█▍ | 51742/371472 [4:07:41<26:32:37, 3.35it/s] 14%|█▍ | 51743/371472 [4:07:41<25:28:36, 3.49it/s] 14%|█▍ | 51744/371472 [4:07:42<24:49:10, 3.58it/s] 14%|█▍ | 51745/371472 [4:07:42<24:48:26, 3.58it/s] 14%|█▍ | 51746/371472 [4:07:42<24:07:35, 3.68it/s] 14%|█▍ | 51747/371472 [4:07:42<23:45:53, 3.74it/s] 14%|█▍ | 51748/371472 [4:07:43<23:11:59, 3.83it/s] 14%|█▍ | 51749/371472 [4:07:43<25:34:21, 3.47it/s] 14%|█▍ | 51750/371472 [4:07:43<24:35:33, 3.61it/s] 14%|█▍ | 51751/371472 [4:07:43<24:37:29, 3.61it/s] 14%|█▍ | 51752/371472 [4:07:44<24:40:19, 3.60it/s] 14%|█▍ | 51753/371472 [4:07:44<23:36:38, 3.76it/s] 14%|█▍ | 51754/371472 [4:07:44<25:49:52, 3.44it/s] 14%|█▍ | 51755/371472 [4:07:45<25:08:31, 3.53it/s] 14%|█▍ | 51756/371472 [4:07:45<24:10:49, 3.67it/s] 14%|█▍ | 51757/371472 [4:07:45<24:54:51, 3.56it/s] 14%|█▍ | 51758/371472 [4:07:45<24:45:38, 3.59it/s] 14%|█▍ | 51759/371472 [4:07:46<25:54:35, 3.43it/s] 14%|█▍ | 51760/371472 [4:07:46<25:04:38, 3.54it/s] {'loss': 4.2651, 'learning_rate': 8.750134672154108e-07, 'epoch': 2.23} + 14%|█▍ | 51760/371472 [4:07:46<25:04:38, 3.54it/s] 14%|█▍ | 51761/371472 [4:07:46<23:52:08, 3.72it/s] 14%|█▍ | 51762/371472 [4:07:46<23:28:23, 3.78it/s] 14%|█▍ | 51763/371472 [4:07:47<23:33:59, 3.77it/s] 14%|█▍ | 51764/371472 [4:07:47<23:56:58, 3.71it/s] 14%|█▍ | 51765/371472 [4:07:47<23:34:17, 3.77it/s] 14%|█▍ | 51766/371472 [4:07:48<23:18:46, 3.81it/s] 14%|█▍ | 51767/371472 [4:07:48<26:28:40, 3.35it/s] 14%|█▍ | 51768/371472 [4:07:48<26:07:40, 3.40it/s] 14%|█▍ | 51769/371472 [4:07:48<25:14:00, 3.52it/s] 14%|█▍ | 51770/371472 [4:07:49<26:10:54, 3.39it/s] 14%|█▍ | 51771/371472 [4:07:49<25:40:20, 3.46it/s] 14%|█▍ | 51772/371472 [4:07:49<25:54:22, 3.43it/s] 14%|█▍ | 51773/371472 [4:07:50<25:07:04, 3.54it/s] 14%|█▍ | 51774/371472 [4:07:50<26:11:52, 3.39it/s] 14%|█▍ | 51775/371472 [4:07:50<25:32:26, 3.48it/s] 14%|█▍ | 51776/371472 [4:07:50<24:46:20, 3.58it/s] 14%|█▍ | 51777/371472 [4:07:51<24:08:33, 3.68it/s] 14%|█▍ | 51778/371472 [4:07:51<24:02:02, 3.69it/s] 14%|█▍ | 51779/371472 [4:07:51<24:31:38, 3.62it/s] 14%|█▍ | 51780/371472 [4:07:52<23:56:11, 3.71it/s] {'loss': 4.3774, 'learning_rate': 8.749649852399321e-07, 'epoch': 2.23} + 14%|█▍ | 51780/371472 [4:07:52<23:56:11, 3.71it/s] 14%|█▍ | 51781/371472 [4:07:52<23:29:53, 3.78it/s] 14%|█▍ | 51782/371472 [4:07:52<23:49:51, 3.73it/s] 14%|█▍ | 51783/371472 [4:07:52<24:15:42, 3.66it/s] 14%|█▍ | 51784/371472 [4:07:53<24:38:36, 3.60it/s] 14%|█▍ | 51785/371472 [4:07:53<23:59:09, 3.70it/s] 14%|█▍ | 51786/371472 [4:07:53<23:35:57, 3.76it/s] 14%|█▍ | 51787/371472 [4:07:53<24:03:49, 3.69it/s] 14%|█▍ | 51788/371472 [4:07:54<24:39:20, 3.60it/s] 14%|█▍ | 51789/371472 [4:07:54<23:55:07, 3.71it/s] 14%|█▍ | 51790/371472 [4:07:54<22:59:35, 3.86it/s] 14%|█▍ | 51791/371472 [4:07:54<23:00:42, 3.86it/s] 14%|█▍ | 51792/371472 [4:07:55<22:46:03, 3.90it/s] 14%|█▍ | 51793/371472 [4:07:55<22:24:38, 3.96it/s] 14%|█▍ | 51794/371472 [4:07:55<24:50:29, 3.57it/s] 14%|█▍ | 51795/371472 [4:07:56<25:02:53, 3.55it/s] 14%|█▍ | 51796/371472 [4:07:56<25:00:35, 3.55it/s] 14%|█▍ | 51797/371472 [4:07:56<24:32:06, 3.62it/s] 14%|█▍ | 51798/371472 [4:07:56<26:57:31, 3.29it/s] 14%|█▍ | 51799/371472 [4:07:57<27:33:19, 3.22it/s] 14%|█▍ | 51800/371472 [4:07:57<27:27:07, 3.23it/s] {'loss': 4.2054, 'learning_rate': 8.749165032644529e-07, 'epoch': 2.23} + 14%|█▍ | 51800/371472 [4:07:57<27:27:07, 3.23it/s] 14%|█▍ | 51801/371472 [4:07:57<26:35:54, 3.34it/s] 14%|█▍ | 51802/371472 [4:07:58<25:07:13, 3.53it/s] 14%|█▍ | 51803/371472 [4:07:58<24:50:00, 3.58it/s] 14%|█▍ | 51804/371472 [4:07:58<28:40:37, 3.10it/s] 14%|█▍ | 51805/371472 [4:07:59<27:44:20, 3.20it/s] 14%|█▍ | 51806/371472 [4:07:59<26:35:58, 3.34it/s] 14%|█▍ | 51807/371472 [4:07:59<25:54:39, 3.43it/s] 14%|█▍ | 51808/371472 [4:08:00<27:07:36, 3.27it/s] 14%|█▍ | 51809/371472 [4:08:00<25:38:47, 3.46it/s] 14%|█▍ | 51810/371472 [4:08:00<24:54:57, 3.56it/s] 14%|█▍ | 51811/371472 [4:08:00<24:40:20, 3.60it/s] 14%|█▍ | 51812/371472 [4:08:01<24:42:04, 3.59it/s] 14%|█▍ | 51813/371472 [4:08:01<25:42:56, 3.45it/s] 14%|█▍ | 51814/371472 [4:08:01<24:12:56, 3.67it/s] 14%|█▍ | 51815/371472 [4:08:01<25:08:50, 3.53it/s] 14%|█▍ | 51816/371472 [4:08:02<24:08:26, 3.68it/s] 14%|█▍ | 51817/371472 [4:08:02<24:35:37, 3.61it/s] 14%|█▍ | 51818/371472 [4:08:02<24:09:26, 3.68it/s] 14%|█▍ | 51819/371472 [4:08:03<25:29:03, 3.48it/s] 14%|█▍ | 51820/371472 [4:08:03<25:16:14, 3.51it/s] {'loss': 4.3723, 'learning_rate': 8.748680212889741e-07, 'epoch': 2.23} + 14%|█▍ | 51820/371472 [4:08:03<25:16:14, 3.51it/s] 14%|█▍ | 51821/371472 [4:08:03<25:29:14, 3.48it/s] 14%|█▍ | 51822/371472 [4:08:03<25:47:34, 3.44it/s] 14%|█▍ | 51823/371472 [4:08:04<24:51:57, 3.57it/s] 14%|█▍ | 51824/371472 [4:08:04<26:01:41, 3.41it/s] 14%|█▍ | 51825/371472 [4:08:04<25:15:11, 3.52it/s] 14%|█▍ | 51826/371472 [4:08:05<28:45:21, 3.09it/s] 14%|█▍ | 51827/371472 [4:08:05<27:31:57, 3.22it/s] 14%|█▍ | 51828/371472 [4:08:05<26:37:27, 3.33it/s] 14%|█▍ | 51829/371472 [4:08:06<27:33:31, 3.22it/s] 14%|█▍ | 51830/371472 [4:08:06<27:21:30, 3.25it/s] 14%|█▍ | 51831/371472 [4:08:06<25:44:10, 3.45it/s] 14%|█▍ | 51832/371472 [4:08:06<25:26:33, 3.49it/s] 14%|█▍ | 51833/371472 [4:08:07<26:08:06, 3.40it/s] 14%|█▍ | 51834/371472 [4:08:07<25:07:48, 3.53it/s] 14%|█▍ | 51835/371472 [4:08:07<24:23:51, 3.64it/s] 14%|█▍ | 51836/371472 [4:08:08<26:53:52, 3.30it/s] 14%|█▍ | 51837/371472 [4:08:08<26:58:39, 3.29it/s] 14%|█▍ | 51838/371472 [4:08:08<27:38:42, 3.21it/s] 14%|█▍ | 51839/371472 [4:08:08<26:35:31, 3.34it/s] 14%|█▍ | 51840/371472 [4:08:09<25:35:04, 3.47it/s] {'loss': 4.2458, 'learning_rate': 8.748195393134952e-07, 'epoch': 2.23} + 14%|█▍ | 51840/371472 [4:08:09<25:35:04, 3.47it/s] 14%|█▍ | 51841/371472 [4:08:09<26:33:55, 3.34it/s] 14%|█▍ | 51842/371472 [4:08:09<25:28:16, 3.49it/s] 14%|█▍ | 51843/371472 [4:08:10<24:35:45, 3.61it/s] 14%|█▍ | 51844/371472 [4:08:10<23:50:04, 3.73it/s] 14%|█▍ | 51845/371472 [4:08:10<25:05:00, 3.54it/s] 14%|█▍ | 51846/371472 [4:08:10<25:09:18, 3.53it/s] 14%|█▍ | 51847/371472 [4:08:11<24:40:53, 3.60it/s] 14%|█▍ | 51848/371472 [4:08:11<25:24:18, 3.49it/s] 14%|█▍ | 51849/371472 [4:08:11<25:16:13, 3.51it/s] 14%|█▍ | 51850/371472 [4:08:12<25:23:18, 3.50it/s] 14%|█▍ | 51851/371472 [4:08:12<24:32:12, 3.62it/s] 14%|█▍ | 51852/371472 [4:08:12<25:25:54, 3.49it/s] 14%|█▍ | 51853/371472 [4:08:13<27:41:43, 3.21it/s] 14%|█▍ | 51854/371472 [4:08:13<26:05:11, 3.40it/s] 14%|█▍ | 51855/371472 [4:08:13<25:53:29, 3.43it/s] 14%|█▍ | 51856/371472 [4:08:13<24:49:59, 3.58it/s] 14%|█▍ | 51857/371472 [4:08:14<23:35:01, 3.76it/s] 14%|█▍ | 51858/371472 [4:08:14<23:57:45, 3.71it/s] 14%|█▍ | 51859/371472 [4:08:14<23:12:37, 3.83it/s] 14%|█▍ | 51860/371472 [4:08:14<24:16:28, 3.66it/s] {'loss': 4.1491, 'learning_rate': 8.747710573380162e-07, 'epoch': 2.23} + 14%|█▍ | 51860/371472 [4:08:14<24:16:28, 3.66it/s] 14%|█▍ | 51861/371472 [4:08:15<24:18:54, 3.65it/s] 14%|█▍ | 51862/371472 [4:08:15<24:22:46, 3.64it/s] 14%|█▍ | 51863/371472 [4:08:15<24:27:02, 3.63it/s] 14%|█▍ | 51864/371472 [4:08:15<23:51:48, 3.72it/s] 14%|█▍ | 51865/371472 [4:08:16<24:50:10, 3.57it/s] 14%|█▍ | 51866/371472 [4:08:16<24:58:47, 3.55it/s] 14%|█▍ | 51867/371472 [4:08:16<24:18:24, 3.65it/s] 14%|█▍ | 51868/371472 [4:08:17<24:19:12, 3.65it/s] 14%|█▍ | 51869/371472 [4:08:17<26:18:16, 3.38it/s] 14%|█▍ | 51870/371472 [4:08:17<24:43:51, 3.59it/s] 14%|█▍ | 51871/371472 [4:08:17<24:19:33, 3.65it/s] 14%|█▍ | 51872/371472 [4:08:18<23:07:02, 3.84it/s] 14%|█▍ | 51873/371472 [4:08:18<23:51:17, 3.72it/s] 14%|█▍ | 51874/371472 [4:08:18<26:33:10, 3.34it/s] 14%|█▍ | 51875/371472 [4:08:19<27:12:11, 3.26it/s] 14%|█▍ | 51876/371472 [4:08:19<26:37:09, 3.34it/s] 14%|█▍ | 51877/371472 [4:08:19<26:10:55, 3.39it/s] 14%|█▍ | 51878/371472 [4:08:19<26:06:22, 3.40it/s] 14%|█▍ | 51879/371472 [4:08:20<26:27:44, 3.35it/s] 14%|█▍ | 51880/371472 [4:08:20<26:48:27, 3.31it/s] {'loss': 4.0953, 'learning_rate': 8.747225753625374e-07, 'epoch': 2.23} + 14%|█▍ | 51880/371472 [4:08:20<26:48:27, 3.31it/s] 14%|█▍ | 51881/371472 [4:08:20<25:58:56, 3.42it/s] 14%|█▍ | 51882/371472 [4:08:21<28:56:07, 3.07it/s] 14%|█▍ | 51883/371472 [4:08:21<26:55:21, 3.30it/s] 14%|█▍ | 51884/371472 [4:08:21<25:04:09, 3.54it/s] 14%|█▍ | 51885/371472 [4:08:22<24:42:28, 3.59it/s] 14%|█▍ | 51886/371472 [4:08:22<25:41:37, 3.46it/s] 14%|█▍ | 51887/371472 [4:08:22<24:55:04, 3.56it/s] 14%|█▍ | 51888/371472 [4:08:22<24:02:32, 3.69it/s] 14%|█▍ | 51889/371472 [4:08:23<23:32:17, 3.77it/s] 14%|█▍ | 51890/371472 [4:08:23<23:11:08, 3.83it/s] 14%|█▍ | 51891/371472 [4:08:23<23:29:59, 3.78it/s] 14%|█▍ | 51892/371472 [4:08:23<23:15:43, 3.82it/s] 14%|█▍ | 51893/371472 [4:08:24<23:36:03, 3.76it/s] 14%|█▍ | 51894/371472 [4:08:24<23:33:30, 3.77it/s] 14%|█▍ | 51895/371472 [4:08:24<23:13:19, 3.82it/s] 14%|█▍ | 51896/371472 [4:08:24<22:57:36, 3.87it/s] 14%|█▍ | 51897/371472 [4:08:25<24:50:00, 3.57it/s] 14%|█▍ | 51898/371472 [4:08:25<24:44:33, 3.59it/s] 14%|█▍ | 51899/371472 [4:08:25<24:05:04, 3.69it/s] 14%|█▍ | 51900/371472 [4:08:26<24:19:50, 3.65it/s] {'loss': 4.2957, 'learning_rate': 8.746740933870586e-07, 'epoch': 2.24} + 14%|█▍ | 51900/371472 [4:08:26<24:19:50, 3.65it/s] 14%|█▍ | 51901/371472 [4:08:26<24:17:04, 3.66it/s] 14%|█▍ | 51902/371472 [4:08:26<24:07:09, 3.68it/s] 14%|█▍ | 51903/371472 [4:08:26<24:19:28, 3.65it/s] 14%|█▍ | 51904/371472 [4:08:27<25:25:15, 3.49it/s] 14%|█▍ | 51905/371472 [4:08:27<27:45:58, 3.20it/s] 14%|█▍ | 51906/371472 [4:08:27<26:41:32, 3.33it/s] 14%|█▍ | 51907/371472 [4:08:28<26:57:39, 3.29it/s] 14%|█▍ | 51908/371472 [4:08:28<25:50:03, 3.44it/s] 14%|█▍ | 51909/371472 [4:08:28<25:20:16, 3.50it/s] 14%|█▍ | 51910/371472 [4:08:29<26:03:05, 3.41it/s] 14%|█▍ | 51911/371472 [4:08:29<25:04:10, 3.54it/s] 14%|█▍ | 51912/371472 [4:08:29<24:55:28, 3.56it/s] 14%|█▍ | 51913/371472 [4:08:29<25:33:26, 3.47it/s] 14%|█▍ | 51914/371472 [4:08:30<24:53:51, 3.57it/s] 14%|█▍ | 51915/371472 [4:08:30<24:39:34, 3.60it/s] 14%|█▍ | 51916/371472 [4:08:30<24:17:58, 3.65it/s] 14%|█▍ | 51917/371472 [4:08:30<24:00:38, 3.70it/s] 14%|█▍ | 51918/371472 [4:08:31<23:45:46, 3.74it/s] 14%|█▍ | 51919/371472 [4:08:31<26:06:44, 3.40it/s] 14%|█▍ | 51920/371472 [4:08:31<24:49:53, 3.57it/s] {'loss': 4.0886, 'learning_rate': 8.746256114115796e-07, 'epoch': 2.24} + 14%|█▍ | 51920/371472 [4:08:31<24:49:53, 3.57it/s] 14%|█▍ | 51921/371472 [4:08:32<24:18:27, 3.65it/s] 14%|█▍ | 51922/371472 [4:08:32<24:19:48, 3.65it/s] 14%|█▍ | 51923/371472 [4:08:32<23:31:23, 3.77it/s] 14%|█▍ | 51924/371472 [4:08:32<23:34:17, 3.77it/s] 14%|█▍ | 51925/371472 [4:08:33<23:17:29, 3.81it/s] 14%|█▍ | 51926/371472 [4:08:33<23:56:06, 3.71it/s] 14%|█▍ | 51927/371472 [4:08:33<23:50:39, 3.72it/s] 14%|█▍ | 51928/371472 [4:08:33<24:11:46, 3.67it/s] 14%|█▍ | 51929/371472 [4:08:34<24:43:02, 3.59it/s] 14%|█▍ | 51930/371472 [4:08:34<23:37:04, 3.76it/s] 14%|█▍ | 51931/371472 [4:08:34<22:49:24, 3.89it/s] 14%|█▍ | 51932/371472 [4:08:34<23:01:25, 3.86it/s] 14%|█▍ | 51933/371472 [4:08:35<22:45:46, 3.90it/s] 14%|█▍ | 51934/371472 [4:08:35<23:04:23, 3.85it/s] 14%|█▍ | 51935/371472 [4:08:35<22:50:06, 3.89it/s] 14%|█▍ | 51936/371472 [4:08:35<22:19:49, 3.97it/s] 14%|█▍ | 51937/371472 [4:08:36<22:09:13, 4.01it/s] 14%|█▍ | 51938/371472 [4:08:36<23:10:03, 3.83it/s] 14%|█▍ | 51939/371472 [4:08:36<22:53:13, 3.88it/s] 14%|█▍ | 51940/371472 [4:08:37<24:32:20, 3.62it/s] {'loss': 4.2769, 'learning_rate': 8.745771294361006e-07, 'epoch': 2.24} + 14%|█▍ | 51940/371472 [4:08:37<24:32:20, 3.62it/s] 14%|█▍ | 51941/371472 [4:08:37<24:13:16, 3.66it/s] 14%|█▍ | 51942/371472 [4:08:37<23:43:38, 3.74it/s] 14%|█▍ | 51943/371472 [4:08:37<23:24:42, 3.79it/s] 14%|█▍ | 51944/371472 [4:08:38<23:32:01, 3.77it/s] 14%|█▍ | 51945/371472 [4:08:38<22:55:36, 3.87it/s] 14%|█▍ | 51946/371472 [4:08:38<23:43:16, 3.74it/s] 14%|█▍ | 51947/371472 [4:08:38<24:13:01, 3.67it/s] 14%|█▍ | 51948/371472 [4:08:39<25:18:57, 3.51it/s] 14%|█▍ | 51949/371472 [4:08:39<26:12:45, 3.39it/s] 14%|█▍ | 51950/371472 [4:08:39<26:56:26, 3.29it/s] 14%|█▍ | 51951/371472 [4:08:40<25:27:33, 3.49it/s] 14%|█▍ | 51952/371472 [4:08:40<25:04:10, 3.54it/s] 14%|█▍ | 51953/371472 [4:08:40<24:04:09, 3.69it/s] 14%|█▍ | 51954/371472 [4:08:40<24:03:32, 3.69it/s] 14%|█▍ | 51955/371472 [4:08:41<23:54:27, 3.71it/s] 14%|█▍ | 51956/371472 [4:08:41<23:30:00, 3.78it/s] 14%|█▍ | 51957/371472 [4:08:41<24:31:09, 3.62it/s] 14%|█▍ | 51958/371472 [4:08:42<25:40:06, 3.46it/s] 14%|█▍ | 51959/371472 [4:08:42<25:31:44, 3.48it/s] 14%|█▍ | 51960/371472 [4:08:42<24:10:44, 3.67it/s] {'loss': 4.2706, 'learning_rate': 8.745286474606218e-07, 'epoch': 2.24} + 14%|█▍ | 51960/371472 [4:08:42<24:10:44, 3.67it/s] 14%|█▍ | 51961/371472 [4:08:42<24:37:38, 3.60it/s] 14%|█▍ | 51962/371472 [4:08:43<25:20:58, 3.50it/s] 14%|█▍ | 51963/371472 [4:08:43<25:15:42, 3.51it/s] 14%|█▍ | 51964/371472 [4:08:43<25:30:40, 3.48it/s] 14%|█▍ | 51965/371472 [4:08:43<24:29:26, 3.62it/s] 14%|█▍ | 51966/371472 [4:08:44<24:21:00, 3.64it/s] 14%|█▍ | 51967/371472 [4:08:44<35:45:22, 2.48it/s] 14%|█▍ | 51968/371472 [4:08:45<31:47:52, 2.79it/s] 14%|█▍ | 51969/371472 [4:08:45<29:33:47, 3.00it/s] 14%|█▍ | 51970/371472 [4:08:45<27:10:54, 3.27it/s] 14%|█▍ | 51971/371472 [4:08:45<26:04:47, 3.40it/s] 14%|█▍ | 51972/371472 [4:08:46<24:46:13, 3.58it/s] 14%|█▍ | 51973/371472 [4:08:46<25:10:40, 3.52it/s] 14%|█▍ | 51974/371472 [4:08:46<25:09:01, 3.53it/s] 14%|█▍ | 51975/371472 [4:08:47<26:04:27, 3.40it/s] 14%|█▍ | 51976/371472 [4:08:47<25:34:28, 3.47it/s] 14%|█▍ | 51977/371472 [4:08:47<26:10:29, 3.39it/s] 14%|█▍ | 51978/371472 [4:08:48<26:08:59, 3.39it/s] 14%|█▍ | 51979/371472 [4:08:48<24:43:12, 3.59it/s] 14%|█▍ | 51980/371472 [4:08:48<24:28:43, 3.63it/s] {'loss': 4.2817, 'learning_rate': 8.74480165485143e-07, 'epoch': 2.24} + 14%|█▍ | 51980/371472 [4:08:48<24:28:43, 3.63it/s] 14%|█▍ | 51981/371472 [4:08:48<24:46:55, 3.58it/s] 14%|█▍ | 51982/371472 [4:08:49<24:43:32, 3.59it/s] 14%|█▍ | 51983/371472 [4:08:49<24:38:15, 3.60it/s] 14%|█▍ | 51984/371472 [4:08:49<23:49:35, 3.72it/s] 14%|█▍ | 51985/371472 [4:08:49<24:36:01, 3.61it/s] 14%|█▍ | 51986/371472 [4:08:50<23:36:58, 3.76it/s] 14%|█▍ | 51987/371472 [4:08:50<24:15:28, 3.66it/s] 14%|█▍ | 51988/371472 [4:08:50<24:55:30, 3.56it/s] 14%|█▍ | 51989/371472 [4:08:51<27:25:37, 3.24it/s] 14%|█▍ | 51990/371472 [4:08:51<27:14:24, 3.26it/s] 14%|█▍ | 51991/371472 [4:08:51<26:45:11, 3.32it/s] 14%|█▍ | 51992/371472 [4:08:51<26:12:00, 3.39it/s] 14%|█▍ | 51993/371472 [4:08:52<25:23:01, 3.50it/s] 14%|█▍ | 51994/371472 [4:08:52<24:25:31, 3.63it/s] 14%|█▍ | 51995/371472 [4:08:52<23:58:58, 3.70it/s] 14%|█▍ | 51996/371472 [4:08:53<23:19:52, 3.80it/s] 14%|█▍ | 51997/371472 [4:08:53<23:29:40, 3.78it/s] 14%|█▍ | 51998/371472 [4:08:53<22:58:02, 3.86it/s] 14%|█▍ | 51999/371472 [4:08:53<23:23:22, 3.79it/s] 14%|█▍ | 52000/371472 [4:08:54<23:43:55, 3.74it/s] {'loss': 4.1763, 'learning_rate': 8.744316835096641e-07, 'epoch': 2.24} + 14%|█▍ | 52000/371472 [4:08:54<23:43:55, 3.74it/s] 14%|█▍ | 52001/371472 [4:08:54<24:43:19, 3.59it/s] 14%|█▍ | 52002/371472 [4:08:54<25:13:22, 3.52it/s] 14%|█▍ | 52003/371472 [4:08:54<24:54:19, 3.56it/s] 14%|█▍ | 52004/371472 [4:08:55<27:24:07, 3.24it/s] 14%|█▍ | 52005/371472 [4:08:55<26:55:00, 3.30it/s] 14%|█▍ | 52006/371472 [4:08:55<25:58:16, 3.42it/s] 14%|█▍ | 52007/371472 [4:08:56<24:43:35, 3.59it/s] 14%|█▍ | 52008/371472 [4:08:56<24:20:12, 3.65it/s] 14%|█▍ | 52009/371472 [4:08:56<25:11:15, 3.52it/s] 14%|█▍ | 52010/371472 [4:08:56<24:15:15, 3.66it/s] 14%|█▍ | 52011/371472 [4:08:57<25:19:37, 3.50it/s] 14%|█▍ | 52012/371472 [4:08:57<25:21:14, 3.50it/s] 14%|█▍ | 52013/371472 [4:08:57<24:09:50, 3.67it/s] 14%|█▍ | 52014/371472 [4:08:58<24:27:42, 3.63it/s] 14%|█▍ | 52015/371472 [4:08:58<25:38:58, 3.46it/s] 14%|█▍ | 52016/371472 [4:08:58<25:45:07, 3.45it/s] 14%|█▍ | 52017/371472 [4:08:58<26:04:56, 3.40it/s] 14%|█▍ | 52018/371472 [4:08:59<24:25:31, 3.63it/s] 14%|█▍ | 52019/371472 [4:08:59<23:32:10, 3.77it/s] 14%|█▍ | 52020/371472 [4:08:59<24:55:07, 3.56it/s] {'loss': 4.3284, 'learning_rate': 8.743832015341851e-07, 'epoch': 2.24} + 14%|█▍ | 52020/371472 [4:08:59<24:55:07, 3.56it/s] 14%|█▍ | 52021/371472 [4:09:00<25:28:08, 3.48it/s] 14%|█▍ | 52022/371472 [4:09:00<24:24:22, 3.64it/s] 14%|█▍ | 52023/371472 [4:09:00<24:13:37, 3.66it/s] 14%|█▍ | 52024/371472 [4:09:00<24:55:59, 3.56it/s] 14%|█▍ | 52025/371472 [4:09:01<25:13:46, 3.52it/s] 14%|█▍ | 52026/371472 [4:09:01<24:47:15, 3.58it/s] 14%|█▍ | 52027/371472 [4:09:01<24:11:46, 3.67it/s] 14%|█▍ | 52028/371472 [4:09:01<23:53:56, 3.71it/s] 14%|█▍ | 52029/371472 [4:09:02<24:19:17, 3.65it/s] 14%|█▍ | 52030/371472 [4:09:02<23:38:52, 3.75it/s] 14%|█▍ | 52031/371472 [4:09:02<24:26:13, 3.63it/s] 14%|█▍ | 52032/371472 [4:09:03<24:30:41, 3.62it/s] 14%|█▍ | 52033/371472 [4:09:03<25:06:32, 3.53it/s] 14%|█▍ | 52034/371472 [4:09:03<25:09:41, 3.53it/s] 14%|█▍ | 52035/371472 [4:09:04<27:26:39, 3.23it/s] 14%|█▍ | 52036/371472 [4:09:04<26:10:44, 3.39it/s] 14%|█▍ | 52037/371472 [4:09:04<25:44:34, 3.45it/s] 14%|█▍ | 52038/371472 [4:09:04<24:50:32, 3.57it/s] 14%|█▍ | 52039/371472 [4:09:05<25:21:48, 3.50it/s] 14%|█▍ | 52040/371472 [4:09:05<25:15:45, 3.51it/s] {'loss': 4.3196, 'learning_rate': 8.743347195587063e-07, 'epoch': 2.24} + 14%|█▍ | 52040/371472 [4:09:05<25:15:45, 3.51it/s] 14%|█▍ | 52041/371472 [4:09:05<25:10:01, 3.53it/s] 14%|█▍ | 52042/371472 [4:09:05<24:22:33, 3.64it/s] 14%|█▍ | 52043/371472 [4:09:06<24:19:52, 3.65it/s] 14%|█▍ | 52044/371472 [4:09:06<23:44:41, 3.74it/s] 14%|█▍ | 52045/371472 [4:09:06<26:10:40, 3.39it/s] 14%|█▍ | 52046/371472 [4:09:07<26:10:59, 3.39it/s] 14%|█▍ | 52047/371472 [4:09:07<26:16:53, 3.38it/s] 14%|█▍ | 52048/371472 [4:09:07<25:22:09, 3.50it/s] 14%|█▍ | 52049/371472 [4:09:07<24:50:05, 3.57it/s] 14%|█▍ | 52050/371472 [4:09:08<24:04:43, 3.68it/s] 14%|█▍ | 52051/371472 [4:09:08<23:36:52, 3.76it/s] 14%|█▍ | 52052/371472 [4:09:08<24:51:48, 3.57it/s] 14%|█▍ | 52053/371472 [4:09:09<24:38:07, 3.60it/s] 14%|█▍ | 52054/371472 [4:09:09<24:11:19, 3.67it/s] 14%|█▍ | 52055/371472 [4:09:09<23:53:03, 3.71it/s] 14%|█▍ | 52056/371472 [4:09:09<26:01:18, 3.41it/s] 14%|█▍ | 52057/371472 [4:09:10<27:41:43, 3.20it/s] 14%|█▍ | 52058/371472 [4:09:10<27:51:16, 3.19it/s] 14%|█▍ | 52059/371472 [4:09:10<26:20:37, 3.37it/s] 14%|█▍ | 52060/371472 [4:09:11<25:17:16, 3.51it/s] {'loss': 4.4508, 'learning_rate': 8.742862375832273e-07, 'epoch': 2.24} + 14%|█▍ | 52060/371472 [4:09:11<25:17:16, 3.51it/s] 14%|█▍ | 52061/371472 [4:09:11<24:11:18, 3.67it/s] 14%|█▍ | 52062/371472 [4:09:11<23:32:05, 3.77it/s] 14%|█▍ | 52063/371472 [4:09:11<23:03:40, 3.85it/s] 14%|█▍ | 52064/371472 [4:09:12<23:02:31, 3.85it/s] 14%|█▍ | 52065/371472 [4:09:12<23:18:17, 3.81it/s] 14%|█▍ | 52066/371472 [4:09:12<23:15:08, 3.82it/s] 14%|█▍ | 52067/371472 [4:09:12<25:25:57, 3.49it/s] 14%|█▍ | 52068/371472 [4:09:13<24:35:16, 3.61it/s] 14%|█▍ | 52069/371472 [4:09:13<26:15:36, 3.38it/s] 14%|█▍ | 52070/371472 [4:09:13<24:44:24, 3.59it/s] 14%|█▍ | 52071/371472 [4:09:14<24:58:07, 3.55it/s] 14%|█▍ | 52072/371472 [4:09:14<25:39:08, 3.46it/s] 14%|█▍ | 52073/371472 [4:09:14<24:39:49, 3.60it/s] 14%|█▍ | 52074/371472 [4:09:15<28:18:37, 3.13it/s] 14%|█▍ | 52075/371472 [4:09:15<27:33:35, 3.22it/s] 14%|█▍ | 52076/371472 [4:09:15<26:40:57, 3.33it/s] 14%|█▍ | 52077/371472 [4:09:15<25:59:59, 3.41it/s] 14%|█▍ | 52078/371472 [4:09:16<25:12:52, 3.52it/s] 14%|█▍ | 52079/371472 [4:09:16<26:33:45, 3.34it/s] 14%|█▍ | 52080/371472 [4:09:16<26:16:42, 3.38it/s] {'loss': 4.0581, 'learning_rate': 8.742377556077485e-07, 'epoch': 2.24} + 14%|█▍ | 52080/371472 [4:09:16<26:16:42, 3.38it/s] 14%|█▍ | 52081/371472 [4:09:17<25:13:42, 3.52it/s] 14%|█▍ | 52082/371472 [4:09:17<25:39:00, 3.46it/s] 14%|█▍ | 52083/371472 [4:09:17<25:05:01, 3.54it/s] 14%|█▍ | 52084/371472 [4:09:17<25:35:48, 3.47it/s] 14%|█▍ | 52085/371472 [4:09:18<24:57:38, 3.55it/s] 14%|█▍ | 52086/371472 [4:09:18<23:54:07, 3.71it/s] 14%|█▍ | 52087/371472 [4:09:18<23:07:00, 3.84it/s] 14%|█▍ | 52088/371472 [4:09:18<23:02:56, 3.85it/s] 14%|█▍ | 52089/371472 [4:09:19<22:47:05, 3.89it/s] 14%|█▍ | 52090/371472 [4:09:19<23:25:55, 3.79it/s] 14%|█▍ | 52091/371472 [4:09:19<25:05:16, 3.54it/s] 14%|█▍ | 52092/371472 [4:09:20<24:13:12, 3.66it/s] 14%|█▍ | 52093/371472 [4:09:20<24:06:30, 3.68it/s] 14%|█▍ | 52094/371472 [4:09:20<25:14:03, 3.52it/s] 14%|█▍ | 52095/371472 [4:09:20<26:01:12, 3.41it/s] 14%|█▍ | 52096/371472 [4:09:21<24:50:23, 3.57it/s] 14%|█▍ | 52097/371472 [4:09:21<25:16:01, 3.51it/s] 14%|█▍ | 52098/371472 [4:09:21<25:22:48, 3.50it/s] 14%|█▍ | 52099/371472 [4:09:22<25:03:39, 3.54it/s] 14%|█▍ | 52100/371472 [4:09:22<24:42:23, 3.59it/s] {'loss': 4.3669, 'learning_rate': 8.741892736322696e-07, 'epoch': 2.24} + 14%|█▍ | 52100/371472 [4:09:22<24:42:23, 3.59it/s] 14%|█▍ | 52101/371472 [4:09:22<23:51:08, 3.72it/s] 14%|█▍ | 52102/371472 [4:09:22<23:43:12, 3.74it/s] 14%|█▍ | 52103/371472 [4:09:23<23:04:14, 3.85it/s] 14%|█▍ | 52104/371472 [4:09:23<24:43:17, 3.59it/s] 14%|█▍ | 52105/371472 [4:09:23<23:30:08, 3.77it/s] 14%|█▍ | 52106/371472 [4:09:23<22:48:49, 3.89it/s] 14%|█▍ | 52107/371472 [4:09:24<22:30:43, 3.94it/s] 14%|█▍ | 52108/371472 [4:09:24<22:31:45, 3.94it/s] 14%|█▍ | 52109/371472 [4:09:24<24:01:27, 3.69it/s] 14%|█▍ | 52110/371472 [4:09:24<25:17:03, 3.51it/s] 14%|█▍ | 52111/371472 [4:09:25<24:26:27, 3.63it/s] 14%|█▍ | 52112/371472 [4:09:25<24:17:46, 3.65it/s] 14%|█▍ | 52113/371472 [4:09:25<25:46:45, 3.44it/s] 14%|█▍ | 52114/371472 [4:09:26<25:46:01, 3.44it/s] 14%|█▍ | 52115/371472 [4:09:26<24:28:58, 3.62it/s] 14%|█▍ | 52116/371472 [4:09:26<24:34:48, 3.61it/s] 14%|█▍ | 52117/371472 [4:09:26<25:13:47, 3.52it/s] 14%|█▍ | 52118/371472 [4:09:27<24:55:05, 3.56it/s] 14%|█▍ | 52119/371472 [4:09:27<23:51:42, 3.72it/s] 14%|█▍ | 52120/371472 [4:09:27<23:06:48, 3.84it/s] {'loss': 4.1813, 'learning_rate': 8.741407916567907e-07, 'epoch': 2.24} + 14%|█▍ | 52120/371472 [4:09:27<23:06:48, 3.84it/s] 14%|█▍ | 52121/371472 [4:09:27<23:29:58, 3.77it/s] 14%|█▍ | 52122/371472 [4:09:28<23:59:28, 3.70it/s] 14%|█▍ | 52123/371472 [4:09:28<25:38:29, 3.46it/s] 14%|█▍ | 52124/371472 [4:09:28<27:16:35, 3.25it/s] 14%|█▍ | 52125/371472 [4:09:29<26:59:37, 3.29it/s] 14%|█▍ | 52126/371472 [4:09:29<26:01:46, 3.41it/s] 14%|█▍ | 52127/371472 [4:09:29<28:46:41, 3.08it/s] 14%|█▍ | 52128/371472 [4:09:30<27:48:05, 3.19it/s] 14%|█▍ | 52129/371472 [4:09:30<26:01:09, 3.41it/s] 14%|█▍ | 52130/371472 [4:09:30<25:28:55, 3.48it/s] 14%|█▍ | 52131/371472 [4:09:30<25:08:24, 3.53it/s] 14%|█▍ | 52132/371472 [4:09:31<27:08:50, 3.27it/s] 14%|█▍ | 52133/371472 [4:09:31<25:47:29, 3.44it/s] 14%|█▍ | 52134/371472 [4:09:31<24:52:53, 3.57it/s] 14%|█▍ | 52135/371472 [4:09:32<24:34:33, 3.61it/s] 14%|█▍ | 52136/371472 [4:09:32<24:50:19, 3.57it/s] 14%|█▍ | 52137/371472 [4:09:32<24:36:43, 3.60it/s] 14%|█▍ | 52138/371472 [4:09:32<25:08:46, 3.53it/s] 14%|█▍ | 52139/371472 [4:09:33<24:53:17, 3.56it/s] 14%|█▍ | 52140/371472 [4:09:33<25:22:02, 3.50it/s] {'loss': 4.2587, 'learning_rate': 8.740923096813118e-07, 'epoch': 2.25} + 14%|█▍ | 52140/371472 [4:09:33<25:22:02, 3.50it/s] 14%|█▍ | 52141/371472 [4:09:33<25:52:56, 3.43it/s] 14%|█▍ | 52142/371472 [4:09:34<24:17:46, 3.65it/s] 14%|█▍ | 52143/371472 [4:09:34<24:18:30, 3.65it/s] 14%|█▍ | 52144/371472 [4:09:34<24:13:01, 3.66it/s] 14%|█▍ | 52145/371472 [4:09:34<24:07:42, 3.68it/s] 14%|█▍ | 52146/371472 [4:09:35<23:31:35, 3.77it/s] 14%|█▍ | 52147/371472 [4:09:35<23:43:43, 3.74it/s] 14%|█▍ | 52148/371472 [4:09:35<23:40:10, 3.75it/s] 14%|█▍ | 52149/371472 [4:09:35<23:25:21, 3.79it/s] 14%|█▍ | 52150/371472 [4:09:36<23:37:18, 3.76it/s] 14%|█▍ | 52151/371472 [4:09:36<23:18:34, 3.81it/s] 14%|█▍ | 52152/371472 [4:09:36<23:55:04, 3.71it/s] 14%|█▍ | 52153/371472 [4:09:37<24:17:54, 3.65it/s] 14%|█▍ | 52154/371472 [4:09:37<23:27:53, 3.78it/s] 14%|█▍ | 52155/371472 [4:09:37<24:03:46, 3.69it/s] 14%|█▍ | 52156/371472 [4:09:37<23:47:32, 3.73it/s] 14%|█▍ | 52157/371472 [4:09:38<23:43:33, 3.74it/s] 14%|█▍ | 52158/371472 [4:09:38<25:21:51, 3.50it/s] 14%|█▍ | 52159/371472 [4:09:38<25:03:13, 3.54it/s] 14%|█▍ | 52160/371472 [4:09:39<26:35:08, 3.34it/s] {'loss': 4.1264, 'learning_rate': 8.74043827705833e-07, 'epoch': 2.25} + 14%|█▍ | 52160/371472 [4:09:39<26:35:08, 3.34it/s] 14%|█▍ | 52161/371472 [4:09:39<25:26:09, 3.49it/s] 14%|█▍ | 52162/371472 [4:09:39<25:56:11, 3.42it/s] 14%|█▍ | 52163/371472 [4:09:39<26:23:08, 3.36it/s] 14%|█▍ | 52164/371472 [4:09:40<26:00:40, 3.41it/s] 14%|█▍ | 52165/371472 [4:09:40<25:57:23, 3.42it/s] 14%|█▍ | 52166/371472 [4:09:40<25:57:04, 3.42it/s] 14%|█▍ | 52167/371472 [4:09:41<25:05:11, 3.54it/s] 14%|█▍ | 52168/371472 [4:09:41<26:19:40, 3.37it/s] 14%|█▍ | 52169/371472 [4:09:41<25:46:00, 3.44it/s] 14%|█▍ | 52170/371472 [4:09:41<25:58:10, 3.42it/s] 14%|█▍ | 52171/371472 [4:09:42<24:55:38, 3.56it/s] 14%|█▍ | 52172/371472 [4:09:42<24:57:02, 3.55it/s] 14%|█▍ | 52173/371472 [4:09:42<24:43:29, 3.59it/s] 14%|█▍ | 52174/371472 [4:09:43<24:19:28, 3.65it/s] 14%|█▍ | 52175/371472 [4:09:43<25:09:52, 3.52it/s] 14%|█▍ | 52176/371472 [4:09:43<24:15:05, 3.66it/s] 14%|█▍ | 52177/371472 [4:09:43<24:21:15, 3.64it/s] 14%|█▍ | 52178/371472 [4:09:44<24:03:53, 3.69it/s] 14%|█▍ | 52179/371472 [4:09:44<23:39:24, 3.75it/s] 14%|█▍ | 52180/371472 [4:09:44<23:01:09, 3.85it/s] {'loss': 4.2917, 'learning_rate': 8.739953457303539e-07, 'epoch': 2.25} + 14%|█▍ | 52180/371472 [4:09:44<23:01:09, 3.85it/s] 14%|█▍ | 52181/371472 [4:09:44<23:35:31, 3.76it/s] 14%|█▍ | 52182/371472 [4:09:45<23:15:41, 3.81it/s] 14%|█▍ | 52183/371472 [4:09:45<23:56:45, 3.70it/s] 14%|█▍ | 52184/371472 [4:09:45<24:19:03, 3.65it/s] 14%|█▍ | 52185/371472 [4:09:46<25:20:43, 3.50it/s] 14%|█▍ | 52186/371472 [4:09:46<24:42:32, 3.59it/s] 14%|█▍ | 52187/371472 [4:09:46<24:49:29, 3.57it/s] 14%|█▍ | 52188/371472 [4:09:46<24:45:33, 3.58it/s] 14%|█▍ | 52189/371472 [4:09:47<25:57:31, 3.42it/s] 14%|█▍ | 52190/371472 [4:09:47<25:52:41, 3.43it/s] 14%|█▍ | 52191/371472 [4:09:47<26:42:37, 3.32it/s] 14%|█▍ | 52192/371472 [4:09:48<26:44:32, 3.32it/s] 14%|█▍ | 52193/371472 [4:09:48<26:13:58, 3.38it/s] 14%|█▍ | 52194/371472 [4:09:48<25:00:08, 3.55it/s] 14%|█▍ | 52195/371472 [4:09:48<24:22:27, 3.64it/s] 14%|█▍ | 52196/371472 [4:09:49<25:07:01, 3.53it/s] 14%|█▍ | 52197/371472 [4:09:49<24:21:49, 3.64it/s] 14%|█▍ | 52198/371472 [4:09:49<29:03:35, 3.05it/s] 14%|█▍ | 52199/371472 [4:09:50<29:20:37, 3.02it/s] 14%|█▍ | 52200/371472 [4:09:50<26:47:06, 3.31it/s] {'loss': 4.3568, 'learning_rate': 8.739468637548751e-07, 'epoch': 2.25} + 14%|█▍ | 52200/371472 [4:09:50<26:47:06, 3.31it/s] 14%|█▍ | 52201/371472 [4:09:50<27:04:02, 3.28it/s] 14%|█▍ | 52202/371472 [4:09:51<26:37:01, 3.33it/s] 14%|█▍ | 52203/371472 [4:09:51<26:01:10, 3.41it/s] 14%|█▍ | 52204/371472 [4:09:51<24:26:40, 3.63it/s] 14%|█▍ | 52205/371472 [4:09:51<25:31:55, 3.47it/s] 14%|█▍ | 52206/371472 [4:09:52<24:02:42, 3.69it/s] 14%|█▍ | 52207/371472 [4:09:52<23:33:23, 3.76it/s] 14%|█▍ | 52208/371472 [4:09:52<23:02:55, 3.85it/s] 14%|█▍ | 52209/371472 [4:09:52<23:37:19, 3.75it/s] 14%|█▍ | 52210/371472 [4:09:53<23:02:56, 3.85it/s] 14%|█▍ | 52211/371472 [4:09:53<22:26:50, 3.95it/s] 14%|█▍ | 52212/371472 [4:09:53<23:48:08, 3.73it/s] 14%|█▍ | 52213/371472 [4:09:53<23:22:58, 3.79it/s] 14%|█▍ | 52214/371472 [4:09:54<23:29:51, 3.77it/s] 14%|█▍ | 52215/371472 [4:09:54<23:15:19, 3.81it/s] 14%|█▍ | 52216/371472 [4:09:54<23:43:58, 3.74it/s] 14%|█▍ | 52217/371472 [4:09:55<24:41:38, 3.59it/s] 14%|█▍ | 52218/371472 [4:09:55<23:53:56, 3.71it/s] 14%|█▍ | 52219/371472 [4:09:55<25:15:17, 3.51it/s] 14%|█▍ | 52220/371472 [4:09:56<27:30:20, 3.22it/s] {'loss': 4.3348, 'learning_rate': 8.738983817793962e-07, 'epoch': 2.25} + 14%|█▍ | 52220/371472 [4:09:56<27:30:20, 3.22it/s] 14%|█▍ | 52221/371472 [4:09:56<26:03:52, 3.40it/s] 14%|█▍ | 52222/371472 [4:09:56<26:20:30, 3.37it/s] 14%|█▍ | 52223/371472 [4:09:56<26:18:11, 3.37it/s] 14%|█▍ | 52224/371472 [4:09:57<25:40:43, 3.45it/s] 14%|█▍ | 52225/371472 [4:09:57<25:11:59, 3.52it/s] 14%|█▍ | 52226/371472 [4:09:57<24:05:57, 3.68it/s] 14%|█▍ | 52227/371472 [4:09:57<24:19:12, 3.65it/s] 14%|█▍ | 52228/371472 [4:09:58<26:19:06, 3.37it/s] 14%|█▍ | 52229/371472 [4:09:58<25:47:21, 3.44it/s] 14%|█▍ | 52230/371472 [4:09:58<25:03:43, 3.54it/s] 14%|█▍ | 52231/371472 [4:09:59<24:23:06, 3.64it/s] 14%|█▍ | 52232/371472 [4:09:59<24:25:11, 3.63it/s] 14%|█▍ | 52233/371472 [4:09:59<24:25:04, 3.63it/s] 14%|█▍ | 52234/371472 [4:09:59<25:13:21, 3.52it/s] 14%|█▍ | 52235/371472 [4:10:00<26:09:46, 3.39it/s] 14%|█▍ | 52236/371472 [4:10:00<25:51:23, 3.43it/s] 14%|█▍ | 52237/371472 [4:10:00<24:52:19, 3.57it/s] 14%|█▍ | 52238/371472 [4:10:01<25:31:50, 3.47it/s] 14%|█▍ | 52239/371472 [4:10:01<25:04:55, 3.54it/s] 14%|█▍ | 52240/371472 [4:10:01<24:41:02, 3.59it/s] {'loss': 4.3902, 'learning_rate': 8.738498998039173e-07, 'epoch': 2.25} + 14%|█▍ | 52240/371472 [4:10:01<24:41:02, 3.59it/s] 14%|█▍ | 52241/371472 [4:10:01<24:18:05, 3.65it/s] 14%|█▍ | 52242/371472 [4:10:02<24:09:00, 3.67it/s] 14%|█▍ | 52243/371472 [4:10:02<24:58:11, 3.55it/s] 14%|█▍ | 52244/371472 [4:10:02<24:56:55, 3.55it/s] 14%|█▍ | 52245/371472 [4:10:03<25:12:32, 3.52it/s] 14%|█▍ | 52246/371472 [4:10:03<26:08:34, 3.39it/s] 14%|█▍ | 52247/371472 [4:10:03<25:12:11, 3.52it/s] 14%|█▍ | 52248/371472 [4:10:03<24:03:25, 3.69it/s] 14%|█▍ | 52249/371472 [4:10:04<24:29:43, 3.62it/s] 14%|█▍ | 52250/371472 [4:10:04<24:05:33, 3.68it/s] 14%|█▍ | 52251/371472 [4:10:04<24:17:06, 3.65it/s] 14%|█▍ | 52252/371472 [4:10:04<23:47:35, 3.73it/s] 14%|█▍ | 52253/371472 [4:10:05<23:09:18, 3.83it/s] 14%|█▍ | 52254/371472 [4:10:05<23:22:21, 3.79it/s] 14%|█▍ | 52255/371472 [4:10:05<25:06:08, 3.53it/s] 14%|█▍ | 52256/371472 [4:10:06<24:45:39, 3.58it/s] 14%|█▍ | 52257/371472 [4:10:06<25:41:40, 3.45it/s] 14%|█▍ | 52258/371472 [4:10:06<25:24:29, 3.49it/s] 14%|█▍ | 52259/371472 [4:10:06<24:46:14, 3.58it/s] 14%|█▍ | 52260/371472 [4:10:07<25:03:08, 3.54it/s] {'loss': 4.1351, 'learning_rate': 8.738014178284384e-07, 'epoch': 2.25} + 14%|█▍ | 52260/371472 [4:10:07<25:03:08, 3.54it/s] 14%|█▍ | 52261/371472 [4:10:07<24:28:48, 3.62it/s] 14%|█▍ | 52262/371472 [4:10:07<24:15:37, 3.65it/s] 14%|█▍ | 52263/371472 [4:10:08<26:06:19, 3.40it/s] 14%|█▍ | 52264/371472 [4:10:08<25:39:00, 3.46it/s] 14%|█▍ | 52265/371472 [4:10:08<25:53:30, 3.42it/s] 14%|█▍ | 52266/371472 [4:10:09<27:37:36, 3.21it/s] 14%|█▍ | 52267/371472 [4:10:09<27:38:13, 3.21it/s] 14%|█▍ | 52268/371472 [4:10:09<28:54:25, 3.07it/s] 14%|█▍ | 52269/371472 [4:10:09<27:11:44, 3.26it/s] 14%|█▍ | 52270/371472 [4:10:10<26:02:25, 3.40it/s] 14%|█▍ | 52271/371472 [4:10:10<24:46:38, 3.58it/s] 14%|█▍ | 52272/371472 [4:10:10<23:54:34, 3.71it/s] 14%|█▍ | 52273/371472 [4:10:10<23:18:18, 3.80it/s] 14%|█▍ | 52274/371472 [4:10:11<23:19:31, 3.80it/s] 14%|█▍ | 52275/371472 [4:10:11<25:05:43, 3.53it/s] 14%|█▍ | 52276/371472 [4:10:11<23:58:47, 3.70it/s] 14%|█▍ | 52277/371472 [4:10:12<22:58:41, 3.86it/s] 14%|█▍ | 52278/371472 [4:10:12<22:42:21, 3.90it/s] 14%|█▍ | 52279/371472 [4:10:12<23:47:58, 3.73it/s] 14%|█▍ | 52280/371472 [4:10:12<23:24:15, 3.79it/s] {'loss': 4.3549, 'learning_rate': 8.737529358529596e-07, 'epoch': 2.25} + 14%|█▍ | 52280/371472 [4:10:12<23:24:15, 3.79it/s] 14%|█▍ | 52281/371472 [4:10:13<26:04:53, 3.40it/s] 14%|█▍ | 52282/371472 [4:10:13<25:28:21, 3.48it/s] 14%|█▍ | 52283/371472 [4:10:13<26:35:24, 3.33it/s] 14%|█▍ | 52284/371472 [4:10:14<27:05:37, 3.27it/s] 14%|█▍ | 52285/371472 [4:10:14<26:01:27, 3.41it/s] 14%|█▍ | 52286/371472 [4:10:14<25:42:47, 3.45it/s] 14%|█▍ | 52287/371472 [4:10:14<24:48:00, 3.58it/s] 14%|█▍ | 52288/371472 [4:10:15<24:34:48, 3.61it/s] 14%|█▍ | 52289/371472 [4:10:15<23:33:35, 3.76it/s] 14%|█▍ | 52290/371472 [4:10:15<23:06:01, 3.84it/s] 14%|█▍ | 52291/371472 [4:10:15<24:19:25, 3.65it/s] 14%|█▍ | 52292/371472 [4:10:16<23:40:04, 3.75it/s] 14%|█▍ | 52293/371472 [4:10:16<22:57:51, 3.86it/s] 14%|█▍ | 52294/371472 [4:10:16<22:59:54, 3.86it/s] 14%|█▍ | 52295/371472 [4:10:16<23:45:32, 3.73it/s] 14%|█▍ | 52296/371472 [4:10:17<23:29:24, 3.77it/s] 14%|█▍ | 52297/371472 [4:10:17<23:13:16, 3.82it/s] 14%|█▍ | 52298/371472 [4:10:17<23:42:44, 3.74it/s] 14%|█▍ | 52299/371472 [4:10:18<24:47:19, 3.58it/s] 14%|█▍ | 52300/371472 [4:10:18<24:53:57, 3.56it/s] {'loss': 4.2185, 'learning_rate': 8.737044538774807e-07, 'epoch': 2.25} + 14%|█▍ | 52300/371472 [4:10:18<24:53:57, 3.56it/s] 14%|█▍ | 52301/371472 [4:10:18<25:25:11, 3.49it/s] 14%|█▍ | 52302/371472 [4:10:18<25:47:37, 3.44it/s] 14%|█▍ | 52303/371472 [4:10:19<25:03:28, 3.54it/s] 14%|█▍ | 52304/371472 [4:10:19<25:02:04, 3.54it/s] 14%|█▍ | 52305/371472 [4:10:19<26:17:50, 3.37it/s] 14%|█▍ | 52306/371472 [4:10:20<27:23:11, 3.24it/s] 14%|█▍ | 52307/371472 [4:10:20<27:31:46, 3.22it/s] 14%|█▍ | 52308/371472 [4:10:20<26:00:44, 3.41it/s] 14%|█▍ | 52309/371472 [4:10:21<26:58:14, 3.29it/s] 14%|█▍ | 52310/371472 [4:10:21<25:55:13, 3.42it/s] 14%|█▍ | 52311/371472 [4:10:21<26:05:42, 3.40it/s] 14%|█▍ | 52312/371472 [4:10:21<25:15:48, 3.51it/s] 14%|█▍ | 52313/371472 [4:10:22<24:52:42, 3.56it/s] 14%|█▍ | 52314/371472 [4:10:22<25:44:13, 3.44it/s] 14%|█▍ | 52315/371472 [4:10:22<25:44:21, 3.44it/s] 14%|█▍ | 52316/371472 [4:10:23<25:05:04, 3.53it/s] 14%|█▍ | 52317/371472 [4:10:23<26:16:36, 3.37it/s] 14%|█▍ | 52318/371472 [4:10:23<25:50:11, 3.43it/s] 14%|█▍ | 52319/371472 [4:10:23<26:49:21, 3.31it/s] 14%|█▍ | 52320/371472 [4:10:24<25:14:50, 3.51it/s] {'loss': 4.0571, 'learning_rate': 8.736559719020016e-07, 'epoch': 2.25} + 14%|█▍ | 52320/371472 [4:10:24<25:14:50, 3.51it/s] 14%|█▍ | 52321/371472 [4:10:24<25:33:48, 3.47it/s] 14%|█▍ | 52322/371472 [4:10:24<25:51:35, 3.43it/s] 14%|█▍ | 52323/371472 [4:10:25<26:01:56, 3.41it/s] 14%|█▍ | 52324/371472 [4:10:25<24:47:18, 3.58it/s] 14%|█▍ | 52325/371472 [4:10:25<24:42:46, 3.59it/s] 14%|█▍ | 52326/371472 [4:10:25<23:20:46, 3.80it/s] 14%|█▍ | 52327/371472 [4:10:26<23:21:51, 3.79it/s] 14%|█▍ | 52328/371472 [4:10:26<23:34:50, 3.76it/s] 14%|█▍ | 52329/371472 [4:10:26<23:46:05, 3.73it/s] 14%|█▍ | 52330/371472 [4:10:26<23:42:35, 3.74it/s] 14%|█▍ | 52331/371472 [4:10:27<23:33:09, 3.76it/s] 14%|█▍ | 52332/371472 [4:10:27<23:37:49, 3.75it/s] 14%|█▍ | 52333/371472 [4:10:27<23:23:02, 3.79it/s] 14%|█▍ | 52334/371472 [4:10:28<23:43:17, 3.74it/s] 14%|█▍ | 52335/371472 [4:10:28<24:30:03, 3.62it/s] 14%|█▍ | 52336/371472 [4:10:28<24:40:36, 3.59it/s] 14%|█▍ | 52337/371472 [4:10:28<24:23:29, 3.63it/s] 14%|█▍ | 52338/371472 [4:10:29<23:27:48, 3.78it/s] 14%|█▍ | 52339/371472 [4:10:29<23:00:25, 3.85it/s] 14%|█▍ | 52340/371472 [4:10:29<23:22:11, 3.79it/s] {'loss': 4.1322, 'learning_rate': 8.736074899265228e-07, 'epoch': 2.25} + 14%|█▍ | 52340/371472 [4:10:29<23:22:11, 3.79it/s] 14%|█▍ | 52341/371472 [4:10:29<23:07:54, 3.83it/s] 14%|█▍ | 52342/371472 [4:10:30<22:53:37, 3.87it/s] 14%|█▍ | 52343/371472 [4:10:30<26:42:34, 3.32it/s] 14%|█▍ | 52344/371472 [4:10:30<26:10:01, 3.39it/s] 14%|█▍ | 52345/371472 [4:10:31<27:17:03, 3.25it/s] 14%|█▍ | 52346/371472 [4:10:31<25:52:05, 3.43it/s] 14%|█▍ | 52347/371472 [4:10:31<25:18:12, 3.50it/s] 14%|█▍ | 52348/371472 [4:10:31<25:41:26, 3.45it/s] 14%|█▍ | 52349/371472 [4:10:32<25:54:33, 3.42it/s] 14%|█▍ | 52350/371472 [4:10:32<25:56:52, 3.42it/s] 14%|█▍ | 52351/371472 [4:10:32<25:35:32, 3.46it/s] 14%|█▍ | 52352/371472 [4:10:33<24:19:54, 3.64it/s] 14%|█▍ | 52353/371472 [4:10:33<24:32:44, 3.61it/s] 14%|█▍ | 52354/371472 [4:10:33<26:08:17, 3.39it/s] 14%|█▍ | 52355/371472 [4:10:34<26:08:40, 3.39it/s] 14%|█▍ | 52356/371472 [4:10:34<25:28:28, 3.48it/s] 14%|█▍ | 52357/371472 [4:10:34<25:41:02, 3.45it/s] 14%|█▍ | 52358/371472 [4:10:34<25:23:21, 3.49it/s] 14%|█▍ | 52359/371472 [4:10:35<24:02:00, 3.69it/s] 14%|█▍ | 52360/371472 [4:10:35<24:59:34, 3.55it/s] {'loss': 4.1068, 'learning_rate': 8.73559007951044e-07, 'epoch': 2.26} + 14%|█▍ | 52360/371472 [4:10:35<24:59:34, 3.55it/s] 14%|█▍ | 52361/371472 [4:10:35<25:44:24, 3.44it/s] 14%|█▍ | 52362/371472 [4:10:36<26:55:34, 3.29it/s] 14%|█▍ | 52363/371472 [4:10:36<25:43:21, 3.45it/s] 14%|█▍ | 52364/371472 [4:10:36<26:19:36, 3.37it/s] 14%|█▍ | 52365/371472 [4:10:36<26:06:11, 3.40it/s] 14%|█▍ | 52366/371472 [4:10:37<24:56:08, 3.55it/s] 14%|█▍ | 52367/371472 [4:10:37<25:16:05, 3.51it/s] 14%|█▍ | 52368/371472 [4:10:37<24:38:10, 3.60it/s] 14%|█▍ | 52369/371472 [4:10:37<24:24:46, 3.63it/s] 14%|█▍ | 52370/371472 [4:10:38<23:56:14, 3.70it/s] 14%|█▍ | 52371/371472 [4:10:38<24:17:45, 3.65it/s] 14%|█▍ | 52372/371472 [4:10:38<24:31:49, 3.61it/s] 14%|█▍ | 52373/371472 [4:10:39<24:02:48, 3.69it/s] 14%|█▍ | 52374/371472 [4:10:39<23:23:22, 3.79it/s] 14%|█▍ | 52375/371472 [4:10:39<22:43:16, 3.90it/s] 14%|█▍ | 52376/371472 [4:10:39<22:58:31, 3.86it/s] 14%|█▍ | 52377/371472 [4:10:40<24:02:57, 3.69it/s] 14%|█▍ | 52378/371472 [4:10:40<23:50:17, 3.72it/s] 14%|█▍ | 52379/371472 [4:10:40<22:54:35, 3.87it/s] 14%|█▍ | 52380/371472 [4:10:40<23:38:27, 3.75it/s] {'loss': 4.4032, 'learning_rate': 8.73510525975565e-07, 'epoch': 2.26} + 14%|█▍ | 52380/371472 [4:10:40<23:38:27, 3.75it/s] 14%|█▍ | 52381/371472 [4:10:41<24:10:55, 3.67it/s] 14%|█▍ | 52382/371472 [4:10:41<24:14:15, 3.66it/s] 14%|█▍ | 52383/371472 [4:10:41<24:07:15, 3.67it/s] 14%|█▍ | 52384/371472 [4:10:41<24:05:19, 3.68it/s] 14%|█▍ | 52385/371472 [4:10:42<24:37:41, 3.60it/s] 14%|█▍ | 52386/371472 [4:10:42<23:27:17, 3.78it/s] 14%|█▍ | 52387/371472 [4:10:42<23:58:03, 3.70it/s] 14%|█▍ | 52388/371472 [4:10:43<24:45:30, 3.58it/s] 14%|█▍ | 52389/371472 [4:10:43<24:51:28, 3.57it/s] 14%|█▍ | 52390/371472 [4:10:43<24:29:52, 3.62it/s] 14%|█▍ | 52391/371472 [4:10:43<24:28:31, 3.62it/s] 14%|█▍ | 52392/371472 [4:10:44<24:05:51, 3.68it/s] 14%|█▍ | 52393/371472 [4:10:44<24:28:18, 3.62it/s] 14%|█▍ | 52394/371472 [4:10:44<24:27:37, 3.62it/s] 14%|█▍ | 52395/371472 [4:10:45<24:13:36, 3.66it/s] 14%|█▍ | 52396/371472 [4:10:45<24:49:24, 3.57it/s] 14%|█▍ | 52397/371472 [4:10:45<23:50:48, 3.72it/s] 14%|█▍ | 52398/371472 [4:10:45<24:41:13, 3.59it/s] 14%|█▍ | 52399/371472 [4:10:46<24:05:40, 3.68it/s] 14%|█▍ | 52400/371472 [4:10:46<23:51:00, 3.72it/s] {'loss': 4.2477, 'learning_rate': 8.734620440000861e-07, 'epoch': 2.26} + 14%|█▍ | 52400/371472 [4:10:46<23:51:00, 3.72it/s] 14%|█▍ | 52401/371472 [4:10:46<23:35:14, 3.76it/s] 14%|█▍ | 52402/371472 [4:10:46<24:30:20, 3.62it/s] 14%|█▍ | 52403/371472 [4:10:47<23:47:19, 3.73it/s] 14%|█▍ | 52404/371472 [4:10:47<24:20:09, 3.64it/s] 14%|█▍ | 52405/371472 [4:10:47<24:29:54, 3.62it/s] 14%|█▍ | 52406/371472 [4:10:48<24:34:47, 3.61it/s] 14%|█▍ | 52407/371472 [4:10:48<25:03:05, 3.54it/s] 14%|█▍ | 52408/371472 [4:10:48<26:39:44, 3.32it/s] 14%|█▍ | 52409/371472 [4:10:48<25:23:35, 3.49it/s] 14%|█▍ | 52410/371472 [4:10:49<28:01:54, 3.16it/s] 14%|█▍ | 52411/371472 [4:10:49<28:18:06, 3.13it/s] 14%|█▍ | 52412/371472 [4:10:49<27:35:59, 3.21it/s] 14%|█▍ | 52413/371472 [4:10:50<28:10:59, 3.14it/s] 14%|█▍ | 52414/371472 [4:10:50<27:29:38, 3.22it/s] 14%|█▍ | 52415/371472 [4:10:50<25:54:33, 3.42it/s] 14%|█▍ | 52416/371472 [4:10:51<24:31:48, 3.61it/s] 14%|█▍ | 52417/371472 [4:10:51<24:05:21, 3.68it/s] 14%|█▍ | 52418/371472 [4:10:51<24:13:27, 3.66it/s] 14%|█▍ | 52419/371472 [4:10:51<24:37:50, 3.60it/s] 14%|█▍ | 52420/371472 [4:10:52<23:28:45, 3.77it/s] {'loss': 4.3231, 'learning_rate': 8.734135620246073e-07, 'epoch': 2.26} + 14%|█▍ | 52420/371472 [4:10:52<23:28:45, 3.77it/s] 14%|█▍ | 52421/371472 [4:10:52<25:10:28, 3.52it/s] 14%|█▍ | 52422/371472 [4:10:52<24:48:56, 3.57it/s] 14%|█▍ | 52423/371472 [4:10:53<27:21:04, 3.24it/s] 14%|█▍ | 52424/371472 [4:10:53<26:09:45, 3.39it/s] 14%|█▍ | 52425/371472 [4:10:53<25:17:45, 3.50it/s] 14%|█▍ | 52426/371472 [4:10:53<24:41:57, 3.59it/s] 14%|█▍ | 52427/371472 [4:10:54<24:44:39, 3.58it/s] 14%|█▍ | 52428/371472 [4:10:54<23:50:58, 3.72it/s] 14%|█▍ | 52429/371472 [4:10:54<23:35:47, 3.76it/s] 14%|█▍ | 52430/371472 [4:10:54<22:57:11, 3.86it/s] 14%|█▍ | 52431/371472 [4:10:55<22:39:29, 3.91it/s] 14%|█▍ | 52432/371472 [4:10:55<24:12:57, 3.66it/s] 14%|█▍ | 52433/371472 [4:10:55<23:34:57, 3.76it/s] 14%|█▍ | 52434/371472 [4:10:55<23:15:01, 3.81it/s] 14%|█▍ | 52435/371472 [4:10:56<23:52:32, 3.71it/s] 14%|█▍ | 52436/371472 [4:10:56<23:54:43, 3.71it/s] 14%|█▍ | 52437/371472 [4:10:56<25:29:32, 3.48it/s] 14%|█▍ | 52438/371472 [4:10:57<26:24:02, 3.36it/s] 14%|█▍ | 52439/371472 [4:10:57<26:02:00, 3.40it/s] 14%|█▍ | 52440/371472 [4:10:57<25:23:07, 3.49it/s] {'loss': 4.1685, 'learning_rate': 8.733650800491284e-07, 'epoch': 2.26} + 14%|█▍ | 52440/371472 [4:10:57<25:23:07, 3.49it/s] 14%|█▍ | 52441/371472 [4:10:57<24:11:44, 3.66it/s] 14%|█▍ | 52442/371472 [4:10:58<25:34:17, 3.47it/s] 14%|█▍ | 52443/371472 [4:10:58<24:28:59, 3.62it/s] 14%|█▍ | 52444/371472 [4:10:58<24:36:09, 3.60it/s] 14%|█▍ | 52445/371472 [4:10:59<25:36:03, 3.46it/s] 14%|█▍ | 52446/371472 [4:10:59<24:49:19, 3.57it/s] 14%|█▍ | 52447/371472 [4:10:59<24:09:15, 3.67it/s] 14%|█▍ | 52448/371472 [4:10:59<23:21:15, 3.79it/s] 14%|█▍ | 52449/371472 [4:11:00<22:30:15, 3.94it/s] 14%|█▍ | 52450/371472 [4:11:00<24:14:36, 3.66it/s] 14%|█▍ | 52451/371472 [4:11:00<25:45:44, 3.44it/s] 14%|█▍ | 52452/371472 [4:11:01<26:19:07, 3.37it/s] 14%|█▍ | 52453/371472 [4:11:01<25:48:39, 3.43it/s] 14%|█▍ | 52454/371472 [4:11:01<24:30:28, 3.62it/s] 14%|█▍ | 52455/371472 [4:11:01<25:06:52, 3.53it/s] 14%|█▍ | 52456/371472 [4:11:02<26:30:51, 3.34it/s] 14%|█▍ | 52457/371472 [4:11:02<25:46:05, 3.44it/s] 14%|█▍ | 52458/371472 [4:11:02<24:20:40, 3.64it/s] 14%|█▍ | 52459/371472 [4:11:03<24:31:01, 3.61it/s] 14%|█▍ | 52460/371472 [4:11:03<23:42:55, 3.74it/s] {'loss': 4.3216, 'learning_rate': 8.733165980736494e-07, 'epoch': 2.26} + 14%|█▍ | 52460/371472 [4:11:03<23:42:55, 3.74it/s] 14%|█▍ | 52461/371472 [4:11:03<23:39:51, 3.74it/s] 14%|█▍ | 52462/371472 [4:11:03<26:30:50, 3.34it/s] 14%|█▍ | 52463/371472 [4:11:04<25:17:54, 3.50it/s] 14%|█▍ | 52464/371472 [4:11:04<24:56:02, 3.55it/s] 14%|█▍ | 52465/371472 [4:11:04<24:16:36, 3.65it/s] 14%|█▍ | 52466/371472 [4:11:04<24:20:43, 3.64it/s] 14%|█▍ | 52467/371472 [4:11:05<23:26:04, 3.78it/s] 14%|█▍ | 52468/371472 [4:11:05<24:17:30, 3.65it/s] 14%|█▍ | 52469/371472 [4:11:05<24:11:11, 3.66it/s] 14%|█▍ | 52470/371472 [4:11:06<23:32:39, 3.76it/s] 14%|█▍ | 52471/371472 [4:11:06<24:41:18, 3.59it/s] 14%|█▍ | 52472/371472 [4:11:06<25:34:03, 3.47it/s] 14%|█▍ | 52473/371472 [4:11:06<25:09:37, 3.52it/s] 14%|█▍ | 52474/371472 [4:11:07<24:32:43, 3.61it/s] 14%|█▍ | 52475/371472 [4:11:07<24:32:18, 3.61it/s] 14%|█▍ | 52476/371472 [4:11:07<23:20:09, 3.80it/s] 14%|█▍ | 52477/371472 [4:11:07<23:45:09, 3.73it/s] 14%|█▍ | 52478/371472 [4:11:08<23:50:31, 3.72it/s] 14%|█▍ | 52479/371472 [4:11:08<24:47:22, 3.57it/s] 14%|█▍ | 52480/371472 [4:11:08<24:51:06, 3.57it/s] {'loss': 4.227, 'learning_rate': 8.732681160981705e-07, 'epoch': 2.26} + 14%|█▍ | 52480/371472 [4:11:08<24:51:06, 3.57it/s] 14%|█▍ | 52481/371472 [4:11:09<24:38:41, 3.60it/s] 14%|█▍ | 52482/371472 [4:11:09<25:04:13, 3.53it/s] 14%|█▍ | 52483/371472 [4:11:09<24:50:27, 3.57it/s] 14%|█▍ | 52484/371472 [4:11:09<24:13:30, 3.66it/s] 14%|█▍ | 52485/371472 [4:11:10<24:24:13, 3.63it/s] 14%|█▍ | 52486/371472 [4:11:10<23:42:46, 3.74it/s] 14%|█▍ | 52487/371472 [4:11:10<23:54:23, 3.71it/s] 14%|█▍ | 52488/371472 [4:11:11<24:11:32, 3.66it/s] 14%|█▍ | 52489/371472 [4:11:11<23:46:54, 3.73it/s] 14%|█▍ | 52490/371472 [4:11:11<28:53:05, 3.07it/s] 14%|█▍ | 52491/371472 [4:11:12<27:43:16, 3.20it/s] 14%|█▍ | 52492/371472 [4:11:12<28:05:55, 3.15it/s] 14%|█▍ | 52493/371472 [4:11:12<27:22:26, 3.24it/s] 14%|█▍ | 52494/371472 [4:11:12<27:37:48, 3.21it/s] 14%|█▍ | 52495/371472 [4:11:13<26:59:31, 3.28it/s] 14%|█▍ | 52496/371472 [4:11:13<26:01:15, 3.41it/s] 14%|█▍ | 52497/371472 [4:11:13<25:47:42, 3.43it/s] 14%|█▍ | 52498/371472 [4:11:14<24:37:30, 3.60it/s] 14%|█▍ | 52499/371472 [4:11:14<26:26:36, 3.35it/s] 14%|█▍ | 52500/371472 [4:11:14<26:05:58, 3.39it/s] {'loss': 4.244, 'learning_rate': 8.732196341226917e-07, 'epoch': 2.26} + 14%|█▍ | 52500/371472 [4:11:14<26:05:58, 3.39it/s] 14%|█▍ | 52501/371472 [4:11:14<25:31:11, 3.47it/s] 14%|█▍ | 52502/371472 [4:11:15<26:24:11, 3.36it/s] 14%|█▍ | 52503/371472 [4:11:15<25:25:54, 3.48it/s] 14%|█▍ | 52504/371472 [4:11:15<25:04:09, 3.53it/s] 14%|█▍ | 52505/371472 [4:11:16<24:42:29, 3.59it/s] 14%|█▍ | 52506/371472 [4:11:16<24:33:32, 3.61it/s] 14%|█▍ | 52507/371472 [4:11:16<24:06:24, 3.68it/s] 14%|█▍ | 52508/371472 [4:11:16<25:33:18, 3.47it/s] 14%|█▍ | 52509/371472 [4:11:17<25:08:40, 3.52it/s] 14%|█▍ | 52510/371472 [4:11:17<24:33:53, 3.61it/s] 14%|█▍ | 52511/371472 [4:11:17<25:01:11, 3.54it/s] 14%|█▍ | 52512/371472 [4:11:18<25:16:32, 3.51it/s] 14%|█▍ | 52513/371472 [4:11:18<25:20:46, 3.50it/s] 14%|█▍ | 52514/371472 [4:11:18<27:39:02, 3.20it/s] 14%|█▍ | 52515/371472 [4:11:18<26:38:22, 3.33it/s] 14%|█▍ | 52516/371472 [4:11:19<27:16:14, 3.25it/s] 14%|█▍ | 52517/371472 [4:11:19<27:48:09, 3.19it/s] 14%|█▍ | 52518/371472 [4:11:19<27:20:45, 3.24it/s] 14%|█▍ | 52519/371472 [4:11:20<25:36:43, 3.46it/s] 14%|█▍ | 52520/371472 [4:11:20<32:46:17, 2.70it/s] {'loss': 4.3651, 'learning_rate': 8.731711521472129e-07, 'epoch': 2.26} + 14%|█▍ | 52520/371472 [4:11:20<32:46:17, 2.70it/s] 14%|█▍ | 52521/371472 [4:11:21<30:12:37, 2.93it/s] 14%|█▍ | 52522/371472 [4:11:21<29:49:28, 2.97it/s] 14%|█▍ | 52523/371472 [4:11:21<28:01:45, 3.16it/s] 14%|█▍ | 52524/371472 [4:11:21<27:03:44, 3.27it/s] 14%|█▍ | 52525/371472 [4:11:22<26:03:02, 3.40it/s] 14%|█▍ | 52526/371472 [4:11:22<25:23:07, 3.49it/s] 14%|█▍ | 52527/371472 [4:11:22<25:16:45, 3.50it/s] 14%|█▍ | 52528/371472 [4:11:22<24:22:53, 3.63it/s] 14%|█▍ | 52529/371472 [4:11:23<24:20:48, 3.64it/s] 14%|█▍ | 52530/371472 [4:11:23<23:54:51, 3.70it/s] 14%|█▍ | 52531/371472 [4:11:23<23:35:02, 3.76it/s] 14%|█▍ | 52532/371472 [4:11:24<23:20:07, 3.80it/s] 14%|█▍ | 52533/371472 [4:11:24<23:56:41, 3.70it/s] 14%|█▍ | 52534/371472 [4:11:24<24:02:04, 3.69it/s] 14%|█▍ | 52535/371472 [4:11:24<23:34:34, 3.76it/s] 14%|█▍ | 52536/371472 [4:11:25<23:02:07, 3.85it/s] 14%|█▍ | 52537/371472 [4:11:25<24:15:58, 3.65it/s] 14%|█▍ | 52538/371472 [4:11:25<24:03:11, 3.68it/s] 14%|█▍ | 52539/371472 [4:11:25<26:04:08, 3.40it/s] 14%|█▍ | 52540/371472 [4:11:26<27:11:38, 3.26it/s] {'loss': 4.4109, 'learning_rate': 8.731226701717339e-07, 'epoch': 2.26} + 14%|█▍ | 52540/371472 [4:11:26<27:11:38, 3.26it/s] 14%|█▍ | 52541/371472 [4:11:26<26:38:18, 3.33it/s] 14%|█▍ | 52542/371472 [4:11:26<25:51:48, 3.43it/s] 14%|█▍ | 52543/371472 [4:11:27<24:57:30, 3.55it/s] 14%|█▍ | 52544/371472 [4:11:27<23:57:11, 3.70it/s] 14%|█▍ | 52545/371472 [4:11:27<25:10:56, 3.52it/s] 14%|█▍ | 52546/371472 [4:11:27<24:16:23, 3.65it/s] 14%|█▍ | 52547/371472 [4:11:28<23:30:35, 3.77it/s] 14%|█▍ | 52548/371472 [4:11:28<22:45:13, 3.89it/s] 14%|█▍ | 52549/371472 [4:11:28<25:29:06, 3.48it/s] 14%|█▍ | 52550/371472 [4:11:29<24:50:38, 3.57it/s] 14%|█▍ | 52551/371472 [4:11:29<26:23:05, 3.36it/s] 14%|█▍ | 52552/371472 [4:11:29<24:59:27, 3.54it/s] 14%|█▍ | 52553/371472 [4:11:29<24:00:52, 3.69it/s] 14%|█▍ | 52554/371472 [4:11:30<24:41:49, 3.59it/s] 14%|█▍ | 52555/371472 [4:11:30<24:04:04, 3.68it/s] 14%|█▍ | 52556/371472 [4:11:30<23:11:09, 3.82it/s] 14%|█▍ | 52557/371472 [4:11:30<24:17:36, 3.65it/s] 14%|█▍ | 52558/371472 [4:11:31<24:20:15, 3.64it/s] 14%|█▍ | 52559/371472 [4:11:31<25:35:03, 3.46it/s] 14%|█▍ | 52560/371472 [4:11:31<26:46:31, 3.31it/s] {'loss': 4.2073, 'learning_rate': 8.730741881962549e-07, 'epoch': 2.26} + 14%|█▍ | 52560/371472 [4:11:31<26:46:31, 3.31it/s] 14%|█▍ | 52561/371472 [4:11:32<25:27:24, 3.48it/s] 14%|█▍ | 52562/371472 [4:11:32<25:33:19, 3.47it/s] 14%|█▍ | 52563/371472 [4:11:32<24:59:35, 3.54it/s] 14%|█▍ | 52564/371472 [4:11:33<25:14:42, 3.51it/s] 14%|█▍ | 52565/371472 [4:11:33<24:06:06, 3.68it/s] 14%|█▍ | 52566/371472 [4:11:33<24:44:37, 3.58it/s] 14%|█▍ | 52567/371472 [4:11:33<24:41:26, 3.59it/s] 14%|█▍ | 52568/371472 [4:11:34<24:45:21, 3.58it/s] 14%|█▍ | 52569/371472 [4:11:34<23:53:16, 3.71it/s] 14%|█▍ | 52570/371472 [4:11:34<24:40:59, 3.59it/s] 14%|█▍ | 52571/371472 [4:11:34<24:21:38, 3.64it/s] 14%|█▍ | 52572/371472 [4:11:35<23:55:28, 3.70it/s] 14%|█▍ | 52573/371472 [4:11:35<23:16:02, 3.81it/s] 14%|█▍ | 52574/371472 [4:11:35<22:51:31, 3.88it/s] 14%|█▍ | 52575/371472 [4:11:35<23:52:08, 3.71it/s] 14%|█▍ | 52576/371472 [4:11:36<23:35:04, 3.76it/s] 14%|█▍ | 52577/371472 [4:11:36<24:26:12, 3.62it/s] 14%|█▍ | 52578/371472 [4:11:36<26:58:30, 3.28it/s] 14%|█▍ | 52579/371472 [4:11:37<25:36:22, 3.46it/s] 14%|█▍ | 52580/371472 [4:11:37<24:29:52, 3.62it/s] {'loss': 4.3845, 'learning_rate': 8.730257062207761e-07, 'epoch': 2.26} + 14%|█▍ | 52580/371472 [4:11:37<24:29:52, 3.62it/s] 14%|█▍ | 52581/371472 [4:11:37<23:33:19, 3.76it/s] 14%|█▍ | 52582/371472 [4:11:37<25:10:57, 3.52it/s] 14%|█▍ | 52583/371472 [4:11:38<25:03:32, 3.53it/s] 14%|█▍ | 52584/371472 [4:11:38<24:16:04, 3.65it/s] 14%|█▍ | 52585/371472 [4:11:38<24:44:05, 3.58it/s] 14%|█▍ | 52586/371472 [4:11:39<24:00:35, 3.69it/s] 14%|█▍ | 52587/371472 [4:11:39<23:43:03, 3.73it/s] 14%|█▍ | 52588/371472 [4:11:39<24:01:24, 3.69it/s] 14%|█▍ | 52589/371472 [4:11:39<24:08:08, 3.67it/s] 14%|█▍ | 52590/371472 [4:11:40<23:48:16, 3.72it/s] 14%|█▍ | 52591/371472 [4:11:40<23:58:09, 3.70it/s] 14%|█▍ | 52592/371472 [4:11:40<24:09:41, 3.67it/s] 14%|█▍ | 52593/371472 [4:11:40<23:58:44, 3.69it/s] 14%|█▍ | 52594/371472 [4:11:41<23:31:50, 3.76it/s] 14%|█▍ | 52595/371472 [4:11:41<25:06:43, 3.53it/s] 14%|█▍ | 52596/371472 [4:11:41<25:24:30, 3.49it/s] 14%|█▍ | 52597/371472 [4:11:42<24:22:26, 3.63it/s] 14%|█▍ | 52598/371472 [4:11:42<23:36:11, 3.75it/s] 14%|█▍ | 52599/371472 [4:11:42<23:07:04, 3.83it/s] 14%|█▍ | 52600/371472 [4:11:42<24:28:56, 3.62it/s] {'loss': 4.3746, 'learning_rate': 8.729772242452972e-07, 'epoch': 2.27} + 14%|█▍ | 52600/371472 [4:11:42<24:28:56, 3.62it/s] 14%|█▍ | 52601/371472 [4:11:43<26:05:12, 3.40it/s] 14%|█▍ | 52602/371472 [4:11:43<26:37:40, 3.33it/s] 14%|█▍ | 52603/371472 [4:11:43<25:28:08, 3.48it/s] 14%|█▍ | 52604/371472 [4:11:44<25:21:09, 3.49it/s] 14%|█▍ | 52605/371472 [4:11:44<26:38:58, 3.32it/s] 14%|█▍ | 52606/371472 [4:11:44<25:43:26, 3.44it/s] 14%|█▍ | 52607/371472 [4:11:44<24:29:27, 3.62it/s] 14%|█▍ | 52608/371472 [4:11:45<24:26:32, 3.62it/s] 14%|█▍ | 52609/371472 [4:11:45<24:57:13, 3.55it/s] 14%|█▍ | 52610/371472 [4:11:45<25:06:05, 3.53it/s] 14%|█▍ | 52611/371472 [4:11:46<28:09:02, 3.15it/s] 14%|█▍ | 52612/371472 [4:11:46<27:51:00, 3.18it/s] 14%|█▍ | 52613/371472 [4:11:46<26:44:11, 3.31it/s] 14%|█▍ | 52614/371472 [4:11:47<26:14:46, 3.37it/s] 14%|█▍ | 52615/371472 [4:11:47<26:28:19, 3.35it/s] 14%|█▍ | 52616/371472 [4:11:47<25:49:35, 3.43it/s] 14%|█▍ | 52617/371472 [4:11:47<26:17:53, 3.37it/s] 14%|█▍ | 52618/371472 [4:11:48<25:01:10, 3.54it/s] 14%|█▍ | 52619/371472 [4:11:48<24:45:16, 3.58it/s] 14%|█▍ | 52620/371472 [4:11:48<24:04:43, 3.68it/s] {'loss': 4.0557, 'learning_rate': 8.729287422698183e-07, 'epoch': 2.27} + 14%|█▍ | 52620/371472 [4:11:48<24:04:43, 3.68it/s] 14%|█▍ | 52621/371472 [4:11:48<23:44:26, 3.73it/s] 14%|█▍ | 52622/371472 [4:11:49<22:49:24, 3.88it/s] 14%|█▍ | 52623/371472 [4:11:49<22:41:58, 3.90it/s] 14%|█▍ | 52624/371472 [4:11:49<22:21:05, 3.96it/s] 14%|█▍ | 52625/371472 [4:11:50<24:59:29, 3.54it/s] 14%|█▍ | 52626/371472 [4:11:50<25:52:38, 3.42it/s] 14%|█▍ | 52627/371472 [4:11:50<25:37:35, 3.46it/s] 14%|█▍ | 52628/371472 [4:11:50<25:22:11, 3.49it/s] 14%|█▍ | 52629/371472 [4:11:51<25:47:03, 3.43it/s] 14%|█▍ | 52630/371472 [4:11:51<24:59:07, 3.54it/s] 14%|█▍ | 52631/371472 [4:11:51<24:28:04, 3.62it/s] 14%|█▍ | 52632/371472 [4:11:51<24:01:35, 3.69it/s] 14%|█▍ | 52633/371472 [4:11:52<23:18:29, 3.80it/s] 14%|█▍ | 52634/371472 [4:11:52<23:44:41, 3.73it/s] 14%|█▍ | 52635/371472 [4:11:52<23:43:06, 3.73it/s] 14%|█▍ | 52636/371472 [4:11:53<25:56:23, 3.41it/s] 14%|█▍ | 52637/371472 [4:11:53<26:37:37, 3.33it/s] 14%|█▍ | 52638/371472 [4:11:53<25:57:20, 3.41it/s] 14%|█▍ | 52639/371472 [4:11:54<25:58:25, 3.41it/s] 14%|█▍ | 52640/371472 [4:11:54<25:55:51, 3.42it/s] {'loss': 4.3143, 'learning_rate': 8.728802602943394e-07, 'epoch': 2.27} + 14%|█▍ | 52640/371472 [4:11:54<25:55:51, 3.42it/s] 14%|█▍ | 52641/371472 [4:11:54<26:07:39, 3.39it/s] 14%|█▍ | 52642/371472 [4:11:54<25:45:45, 3.44it/s] 14%|█▍ | 52643/371472 [4:11:55<27:18:40, 3.24it/s] 14%|█▍ | 52644/371472 [4:11:55<30:43:31, 2.88it/s] 14%|█▍ | 52645/371472 [4:11:56<30:12:23, 2.93it/s] 14%|█▍ | 52646/371472 [4:11:56<28:00:46, 3.16it/s] 14%|█▍ | 52647/371472 [4:11:56<26:34:41, 3.33it/s] 14%|█▍ | 52648/371472 [4:11:56<26:54:47, 3.29it/s] 14%|█▍ | 52649/371472 [4:11:57<26:25:05, 3.35it/s] 14%|█▍ | 52650/371472 [4:11:57<25:49:16, 3.43it/s] 14%|█▍ | 52651/371472 [4:11:57<28:07:05, 3.15it/s] 14%|█▍ | 52652/371472 [4:11:58<27:34:34, 3.21it/s] 14%|█▍ | 52653/371472 [4:11:58<27:34:49, 3.21it/s] 14%|█▍ | 52654/371472 [4:11:58<27:14:10, 3.25it/s] 14%|█▍ | 52655/371472 [4:11:58<27:00:09, 3.28it/s] 14%|█▍ | 52656/371472 [4:11:59<25:09:28, 3.52it/s] 14%|█▍ | 52657/371472 [4:11:59<25:01:23, 3.54it/s] 14%|█▍ | 52658/371472 [4:11:59<24:56:34, 3.55it/s] 14%|█▍ | 52659/371472 [4:12:00<25:13:25, 3.51it/s] 14%|█▍ | 52660/371472 [4:12:00<25:00:15, 3.54it/s] {'loss': 3.9698, 'learning_rate': 8.728317783188606e-07, 'epoch': 2.27} + 14%|█▍ | 52660/371472 [4:12:00<25:00:15, 3.54it/s] 14%|█▍ | 52661/371472 [4:12:00<24:48:09, 3.57it/s] 14%|█▍ | 52662/371472 [4:12:00<24:51:43, 3.56it/s] 14%|█▍ | 52663/371472 [4:12:01<24:23:57, 3.63it/s] 14%|█▍ | 52664/371472 [4:12:01<25:06:17, 3.53it/s] 14%|█▍ | 52665/371472 [4:12:01<25:07:11, 3.53it/s] 14%|█▍ | 52666/371472 [4:12:02<24:20:14, 3.64it/s] 14%|█▍ | 52667/371472 [4:12:02<24:16:13, 3.65it/s] 14%|█▍ | 52668/371472 [4:12:02<23:49:15, 3.72it/s] 14%|█▍ | 52669/371472 [4:12:02<23:08:19, 3.83it/s] 14%|█▍ | 52670/371472 [4:12:03<22:48:14, 3.88it/s] 14%|█▍ | 52671/371472 [4:12:03<23:05:10, 3.84it/s] 14%|█▍ | 52672/371472 [4:12:03<23:42:03, 3.74it/s] 14%|█▍ | 52673/371472 [4:12:03<23:03:04, 3.84it/s] 14%|█▍ | 52674/371472 [4:12:04<23:33:00, 3.76it/s] 14%|█▍ | 52675/371472 [4:12:04<24:11:15, 3.66it/s] 14%|█▍ | 52676/371472 [4:12:04<23:51:53, 3.71it/s] 14%|█▍ | 52677/371472 [4:12:04<25:24:55, 3.48it/s] 14%|█▍ | 52678/371472 [4:12:05<26:21:19, 3.36it/s] 14%|█▍ | 52679/371472 [4:12:05<25:12:15, 3.51it/s] 14%|█▍ | 52680/371472 [4:12:05<25:53:49, 3.42it/s] {'loss': 4.1787, 'learning_rate': 8.727832963433817e-07, 'epoch': 2.27} + 14%|█▍ | 52680/371472 [4:12:05<25:53:49, 3.42it/s] 14%|█▍ | 52681/371472 [4:12:06<25:19:22, 3.50it/s] 14%|█▍ | 52682/371472 [4:12:06<25:12:03, 3.51it/s] 14%|█▍ | 52683/371472 [4:12:06<24:50:30, 3.56it/s] 14%|█▍ | 52684/371472 [4:12:06<23:50:22, 3.71it/s] 14%|█▍ | 52685/371472 [4:12:07<24:16:15, 3.65it/s] 14%|█▍ | 52686/371472 [4:12:07<25:10:14, 3.52it/s] 14%|█▍ | 52687/371472 [4:12:07<24:20:17, 3.64it/s] 14%|█▍ | 52688/371472 [4:12:08<24:01:44, 3.69it/s] 14%|█▍ | 52689/371472 [4:12:08<24:48:30, 3.57it/s] 14%|█▍ | 52690/371472 [4:12:08<24:40:20, 3.59it/s] 14%|█▍ | 52691/371472 [4:12:08<25:17:52, 3.50it/s] 14%|█▍ | 52692/371472 [4:12:09<26:05:16, 3.39it/s] 14%|█▍ | 52693/371472 [4:12:09<25:13:26, 3.51it/s] 14%|█▍ | 52694/371472 [4:12:09<24:51:32, 3.56it/s] 14%|█▍ | 52695/371472 [4:12:10<24:03:12, 3.68it/s] 14%|█▍ | 52696/371472 [4:12:10<24:03:25, 3.68it/s] 14%|█▍ | 52697/371472 [4:12:10<25:06:55, 3.53it/s] 14%|█▍ | 52698/371472 [4:12:10<25:10:30, 3.52it/s] 14%|█▍ | 52699/371472 [4:12:11<25:42:31, 3.44it/s] 14%|█▍ | 52700/371472 [4:12:11<25:20:39, 3.49it/s] {'loss': 4.294, 'learning_rate': 8.727348143679027e-07, 'epoch': 2.27} + 14%|█▍ | 52700/371472 [4:12:11<25:20:39, 3.49it/s] 14%|█▍ | 52701/371472 [4:12:11<25:46:05, 3.44it/s] 14%|█▍ | 52702/371472 [4:12:12<25:06:58, 3.53it/s] 14%|█▍ | 52703/371472 [4:12:12<24:38:36, 3.59it/s] 14%|█▍ | 52704/371472 [4:12:12<24:42:34, 3.58it/s] 14%|█▍ | 52705/371472 [4:12:12<25:13:37, 3.51it/s] 14%|█▍ | 52706/371472 [4:12:13<25:11:11, 3.52it/s] 14%|█▍ | 52707/371472 [4:12:13<24:00:54, 3.69it/s] 14%|█▍ | 52708/371472 [4:12:13<23:33:37, 3.76it/s] 14%|█▍ | 52709/371472 [4:12:13<23:27:19, 3.78it/s] 14%|█▍ | 52710/371472 [4:12:14<23:57:41, 3.70it/s] 14%|█▍ | 52711/371472 [4:12:14<23:37:35, 3.75it/s] 14%|█▍ | 52712/371472 [4:12:14<23:57:00, 3.70it/s] 14%|█▍ | 52713/371472 [4:12:15<25:30:58, 3.47it/s] 14%|█▍ | 52714/371472 [4:12:15<27:34:19, 3.21it/s] 14%|█▍ | 52715/371472 [4:12:15<27:22:46, 3.23it/s] 14%|█▍ | 52716/371472 [4:12:16<26:54:33, 3.29it/s] 14%|█▍ | 52717/371472 [4:12:16<26:29:30, 3.34it/s] 14%|█▍ | 52718/371472 [4:12:16<27:19:48, 3.24it/s] 14%|█▍ | 52719/371472 [4:12:16<26:01:46, 3.40it/s] 14%|█▍ | 52720/371472 [4:12:17<25:01:07, 3.54it/s] {'loss': 4.1294, 'learning_rate': 8.726863323924238e-07, 'epoch': 2.27} + 14%|█▍ | 52720/371472 [4:12:17<25:01:07, 3.54it/s] 14%|█▍ | 52721/371472 [4:12:17<24:45:51, 3.58it/s] 14%|█▍ | 52722/371472 [4:12:17<24:07:15, 3.67it/s] 14%|█▍ | 52723/371472 [4:12:17<23:39:05, 3.74it/s] 14%|█▍ | 52724/371472 [4:12:18<23:25:33, 3.78it/s] 14%|█▍ | 52725/371472 [4:12:18<24:06:53, 3.67it/s] 14%|█▍ | 52726/371472 [4:12:18<25:11:51, 3.51it/s] 14%|█▍ | 52727/371472 [4:12:19<24:53:26, 3.56it/s] 14%|█▍ | 52728/371472 [4:12:19<26:17:25, 3.37it/s] 14%|█▍ | 52729/371472 [4:12:19<26:35:46, 3.33it/s] 14%|█▍ | 52730/371472 [4:12:20<27:38:25, 3.20it/s] 14%|█▍ | 52731/371472 [4:12:20<26:19:32, 3.36it/s] 14%|█▍ | 52732/371472 [4:12:20<25:43:42, 3.44it/s] 14%|█▍ | 52733/371472 [4:12:20<25:16:57, 3.50it/s] 14%|█▍ | 52734/371472 [4:12:21<25:23:53, 3.49it/s] 14%|█▍ | 52735/371472 [4:12:21<24:31:28, 3.61it/s] 14%|█▍ | 52736/371472 [4:12:21<23:44:49, 3.73it/s] 14%|█▍ | 52737/371472 [4:12:21<22:56:20, 3.86it/s] 14%|█▍ | 52738/371472 [4:12:22<22:58:18, 3.85it/s] 14%|█▍ | 52739/371472 [4:12:22<22:20:42, 3.96it/s] 14%|█▍ | 52740/371472 [4:12:22<24:13:08, 3.66it/s] {'loss': 4.3712, 'learning_rate': 8.72637850416945e-07, 'epoch': 2.27} + 14%|█▍ | 52740/371472 [4:12:22<24:13:08, 3.66it/s] 14%|█▍ | 52741/371472 [4:12:23<25:27:06, 3.48it/s] 14%|█▍ | 52742/371472 [4:12:23<25:05:47, 3.53it/s] 14%|█▍ | 52743/371472 [4:12:23<24:17:48, 3.64it/s] 14%|█▍ | 52744/371472 [4:12:23<24:31:12, 3.61it/s] 14%|█▍ | 52745/371472 [4:12:24<23:46:45, 3.72it/s] 14%|█▍ | 52746/371472 [4:12:24<24:40:35, 3.59it/s] 14%|█▍ | 52747/371472 [4:12:24<24:48:14, 3.57it/s] 14%|█▍ | 52748/371472 [4:12:24<24:37:15, 3.60it/s] 14%|█▍ | 52749/371472 [4:12:25<24:35:46, 3.60it/s] 14%|█▍ | 52750/371472 [4:12:25<25:02:01, 3.54it/s] 14%|█▍ | 52751/371472 [4:12:25<25:03:53, 3.53it/s] 14%|█▍ | 52752/371472 [4:12:26<27:31:38, 3.22it/s] 14%|█▍ | 52753/371472 [4:12:26<26:16:00, 3.37it/s] 14%|█▍ | 52754/371472 [4:12:26<25:04:20, 3.53it/s] 14%|█▍ | 52755/371472 [4:12:27<25:06:30, 3.53it/s] 14%|█▍ | 52756/371472 [4:12:27<24:14:19, 3.65it/s] 14%|█▍ | 52757/371472 [4:12:27<24:27:18, 3.62it/s] 14%|█▍ | 52758/371472 [4:12:27<23:36:16, 3.75it/s] 14%|█▍ | 52759/371472 [4:12:28<25:05:16, 3.53it/s] 14%|█▍ | 52760/371472 [4:12:28<25:24:52, 3.48it/s] {'loss': 4.4626, 'learning_rate': 8.72589368441466e-07, 'epoch': 2.27} + 14%|█▍ | 52760/371472 [4:12:28<25:24:52, 3.48it/s] 14%|█▍ | 52761/371472 [4:12:28<28:39:10, 3.09it/s] 14%|█▍ | 52762/371472 [4:12:29<27:07:17, 3.26it/s] 14%|█▍ | 52763/371472 [4:12:29<26:31:15, 3.34it/s] 14%|█▍ | 52764/371472 [4:12:29<25:35:32, 3.46it/s] 14%|█▍ | 52765/371472 [4:12:29<26:08:14, 3.39it/s] 14%|█▍ | 52766/371472 [4:12:30<25:35:54, 3.46it/s] 14%|█▍ | 52767/371472 [4:12:30<26:46:43, 3.31it/s] 14%|█▍ | 52768/371472 [4:12:30<26:34:47, 3.33it/s] 14%|█▍ | 52769/371472 [4:12:31<25:02:02, 3.54it/s] 14%|█▍ | 52770/371472 [4:12:31<24:21:09, 3.64it/s] 14%|█▍ | 52771/371472 [4:12:31<23:33:11, 3.76it/s] 14%|█▍ | 52772/371472 [4:12:31<27:02:49, 3.27it/s] 14%|█▍ | 52773/371472 [4:12:32<26:17:01, 3.37it/s] 14%|█▍ | 52774/371472 [4:12:32<25:28:13, 3.48it/s] 14%|█▍ | 52775/371472 [4:12:32<24:30:50, 3.61it/s] 14%|█▍ | 52776/371472 [4:12:33<23:25:15, 3.78it/s] 14%|█▍ | 52777/371472 [4:12:33<25:48:01, 3.43it/s] 14%|█▍ | 52778/371472 [4:12:33<25:22:02, 3.49it/s] 14%|█▍ | 52779/371472 [4:12:33<25:01:53, 3.54it/s] 14%|█▍ | 52780/371472 [4:12:34<24:30:21, 3.61it/s] {'loss': 4.1864, 'learning_rate': 8.725408864659872e-07, 'epoch': 2.27} + 14%|█▍ | 52780/371472 [4:12:34<24:30:21, 3.61it/s] 14%|█▍ | 52781/371472 [4:12:34<24:35:50, 3.60it/s] 14%|█▍ | 52782/371472 [4:12:34<24:53:58, 3.56it/s] 14%|█▍ | 52783/371472 [4:12:35<24:18:17, 3.64it/s] 14%|█▍ | 52784/371472 [4:12:35<27:11:05, 3.26it/s] 14%|█▍ | 52785/371472 [4:12:35<25:18:59, 3.50it/s] 14%|█▍ | 52786/371472 [4:12:35<25:35:19, 3.46it/s] 14%|█▍ | 52787/371472 [4:12:36<25:10:47, 3.52it/s] 14%|█▍ | 52788/371472 [4:12:36<24:49:03, 3.57it/s] 14%|█▍ | 52789/371472 [4:12:36<24:03:30, 3.68it/s] 14%|█▍ | 52790/371472 [4:12:36<24:25:44, 3.62it/s] 14%|█▍ | 52791/371472 [4:12:37<27:20:01, 3.24it/s] 14%|█▍ | 52792/371472 [4:12:37<28:31:09, 3.10it/s] 14%|█▍ | 52793/371472 [4:12:37<26:38:03, 3.32it/s] 14%|█▍ | 52794/371472 [4:12:38<25:52:50, 3.42it/s] 14%|█▍ | 52795/371472 [4:12:38<28:09:22, 3.14it/s] 14%|█▍ | 52796/371472 [4:12:38<28:11:10, 3.14it/s] 14%|█▍ | 52797/371472 [4:12:39<27:57:25, 3.17it/s] 14%|█▍ | 52798/371472 [4:12:39<27:27:33, 3.22it/s] 14%|█▍ | 52799/371472 [4:12:39<26:25:08, 3.35it/s] 14%|█▍ | 52800/371472 [4:12:40<24:49:00, 3.57it/s] {'loss': 4.0382, 'learning_rate': 8.724924044905083e-07, 'epoch': 2.27} + 14%|█▍ | 52800/371472 [4:12:40<24:49:00, 3.57it/s] 14%|█▍ | 52801/371472 [4:12:40<23:52:47, 3.71it/s] 14%|█▍ | 52802/371472 [4:12:40<23:59:56, 3.69it/s] 14%|█▍ | 52803/371472 [4:12:40<25:37:23, 3.45it/s] 14%|█▍ | 52804/371472 [4:12:41<24:48:15, 3.57it/s] 14%|█▍ | 52805/371472 [4:12:41<24:05:58, 3.67it/s] 14%|█▍ | 52806/371472 [4:12:41<23:56:57, 3.70it/s] 14%|█▍ | 52807/371472 [4:12:42<25:04:25, 3.53it/s] 14%|█▍ | 52808/371472 [4:12:42<26:05:13, 3.39it/s] 14%|█▍ | 52809/371472 [4:12:42<26:36:49, 3.33it/s] 14%|█▍ | 52810/371472 [4:12:42<27:06:54, 3.26it/s] 14%|█▍ | 52811/371472 [4:12:43<25:45:08, 3.44it/s] 14%|█▍ | 52812/371472 [4:12:43<26:25:54, 3.35it/s] 14%|█▍ | 52813/371472 [4:12:43<25:51:10, 3.42it/s] 14%|█▍ | 52814/371472 [4:12:44<25:03:04, 3.53it/s] 14%|█▍ | 52815/371472 [4:12:44<24:41:41, 3.58it/s] 14%|█▍ | 52816/371472 [4:12:44<24:12:54, 3.66it/s] 14%|█▍ | 52817/371472 [4:12:44<24:21:34, 3.63it/s] 14%|█▍ | 52818/371472 [4:12:45<24:32:29, 3.61it/s] 14%|█▍ | 52819/371472 [4:12:45<24:46:13, 3.57it/s] 14%|█▍ | 52820/371472 [4:12:45<24:51:47, 3.56it/s] {'loss': 4.1505, 'learning_rate': 8.724439225150294e-07, 'epoch': 2.28} + 14%|█▍ | 52820/371472 [4:12:45<24:51:47, 3.56it/s] 14%|█▍ | 52821/371472 [4:12:46<24:44:27, 3.58it/s] 14%|█▍ | 52822/371472 [4:12:46<24:37:50, 3.59it/s] 14%|█▍ | 52823/371472 [4:12:46<24:30:00, 3.61it/s] 14%|█▍ | 52824/371472 [4:12:46<24:23:41, 3.63it/s] 14%|█▍ | 52825/371472 [4:12:47<24:35:16, 3.60it/s] 14%|█▍ | 52826/371472 [4:12:47<25:34:40, 3.46it/s] 14%|█▍ | 52827/371472 [4:12:47<25:53:59, 3.42it/s] 14%|█▍ | 52828/371472 [4:12:48<25:10:35, 3.52it/s] 14%|█▍ | 52829/371472 [4:12:48<27:59:20, 3.16it/s] 14%|█▍ | 52830/371472 [4:12:48<26:29:13, 3.34it/s] 14%|█▍ | 52831/371472 [4:12:48<25:27:24, 3.48it/s] 14%|█▍ | 52832/371472 [4:12:49<25:15:48, 3.50it/s] 14%|█▍ | 52833/371472 [4:12:49<24:18:18, 3.64it/s] 14%|█▍ | 52834/371472 [4:12:49<24:17:51, 3.64it/s] 14%|█▍ | 52835/371472 [4:12:50<24:42:14, 3.58it/s] 14%|█▍ | 52836/371472 [4:12:50<24:49:49, 3.56it/s] 14%|█▍ | 52837/371472 [4:12:50<24:42:44, 3.58it/s] 14%|█▍ | 52838/371472 [4:12:50<25:16:56, 3.50it/s] 14%|█▍ | 52839/371472 [4:12:51<25:12:36, 3.51it/s] 14%|█▍ | 52840/371472 [4:12:51<26:20:08, 3.36it/s] {'loss': 4.3047, 'learning_rate': 8.723954405395504e-07, 'epoch': 2.28} + 14%|█▍ | 52840/371472 [4:12:51<26:20:08, 3.36it/s] 14%|█▍ | 52841/371472 [4:12:51<25:44:50, 3.44it/s] 14%|█▍ | 52842/371472 [4:12:52<26:56:19, 3.29it/s] 14%|█▍ | 52843/371472 [4:12:52<25:31:50, 3.47it/s] 14%|█▍ | 52844/371472 [4:12:52<24:50:33, 3.56it/s] 14%|█▍ | 52845/371472 [4:12:52<25:15:09, 3.50it/s] 14%|█▍ | 52846/371472 [4:12:53<24:56:50, 3.55it/s] 14%|█▍ | 52847/371472 [4:12:53<24:27:17, 3.62it/s] 14%|█▍ | 52848/371472 [4:12:53<24:53:02, 3.56it/s] 14%|█▍ | 52849/371472 [4:12:54<25:24:31, 3.48it/s] 14%|█▍ | 52850/371472 [4:12:54<24:07:10, 3.67it/s] 14%|█▍ | 52851/371472 [4:12:54<26:05:23, 3.39it/s] 14%|█▍ | 52852/371472 [4:12:54<26:46:20, 3.31it/s] 14%|█▍ | 52853/371472 [4:12:55<26:14:42, 3.37it/s] 14%|█▍ | 52854/371472 [4:12:55<25:41:51, 3.44it/s] 14%|█▍ | 52855/371472 [4:12:55<25:05:08, 3.53it/s] 14%|█▍ | 52856/371472 [4:12:56<25:10:05, 3.52it/s] 14%|█▍ | 52857/371472 [4:12:56<25:05:16, 3.53it/s] 14%|█▍ | 52858/371472 [4:12:56<25:38:17, 3.45it/s] 14%|█▍ | 52859/371472 [4:12:56<25:08:04, 3.52it/s] 14%|█▍ | 52860/371472 [4:12:57<24:57:11, 3.55it/s] {'loss': 4.4541, 'learning_rate': 8.723469585640715e-07, 'epoch': 2.28} + 14%|█▍ | 52860/371472 [4:12:57<24:57:11, 3.55it/s] 14%|█▍ | 52861/371472 [4:12:57<26:57:00, 3.28it/s] 14%|█▍ | 52862/371472 [4:12:57<25:25:44, 3.48it/s] 14%|█▍ | 52863/371472 [4:12:58<24:22:35, 3.63it/s] 14%|█▍ | 52864/371472 [4:12:58<26:44:48, 3.31it/s] 14%|█▍ | 52865/371472 [4:12:58<25:43:31, 3.44it/s] 14%|█▍ | 52866/371472 [4:12:59<27:44:14, 3.19it/s] 14%|█▍ | 52867/371472 [4:12:59<26:36:17, 3.33it/s] 14%|█▍ | 52868/371472 [4:12:59<25:50:50, 3.42it/s] 14%|█▍ | 52869/371472 [4:12:59<24:48:26, 3.57it/s] 14%|█▍ | 52870/371472 [4:13:00<24:14:07, 3.65it/s] 14%|█▍ | 52871/371472 [4:13:00<23:58:35, 3.69it/s] 14%|█▍ | 52872/371472 [4:13:00<24:21:22, 3.63it/s] 14%|█▍ | 52873/371472 [4:13:00<24:30:38, 3.61it/s] 14%|█▍ | 52874/371472 [4:13:01<23:25:04, 3.78it/s] 14%|█▍ | 52875/371472 [4:13:01<23:31:20, 3.76it/s] 14%|█▍ | 52876/371472 [4:13:01<23:26:54, 3.77it/s] 14%|█▍ | 52877/371472 [4:13:01<23:07:32, 3.83it/s] 14%|█▍ | 52878/371472 [4:13:02<24:17:40, 3.64it/s] 14%|█▍ | 52879/371472 [4:13:02<26:25:14, 3.35it/s] 14%|█▍ | 52880/371472 [4:13:02<25:08:30, 3.52it/s] {'loss': 4.1365, 'learning_rate': 8.722984765885927e-07, 'epoch': 2.28} + 14%|█▍ | 52880/371472 [4:13:02<25:08:30, 3.52it/s] 14%|█▍ | 52881/371472 [4:13:03<24:43:17, 3.58it/s] 14%|█▍ | 52882/371472 [4:13:03<26:58:37, 3.28it/s] 14%|█▍ | 52883/371472 [4:13:03<25:28:52, 3.47it/s] 14%|█▍ | 52884/371472 [4:13:04<25:18:59, 3.50it/s] 14%|█▍ | 52885/371472 [4:13:04<24:39:35, 3.59it/s] 14%|█▍ | 52886/371472 [4:13:04<25:19:44, 3.49it/s] 14%|█▍ | 52887/371472 [4:13:04<26:35:58, 3.33it/s] 14%|█▍ | 52888/371472 [4:13:05<25:07:36, 3.52it/s] 14%|█▍ | 52889/371472 [4:13:05<24:35:01, 3.60it/s] 14%|█▍ | 52890/371472 [4:13:05<24:28:20, 3.62it/s] 14%|█▍ | 52891/371472 [4:13:05<24:48:10, 3.57it/s] 14%|█▍ | 52892/371472 [4:13:06<24:32:44, 3.61it/s] 14%|█▍ | 52893/371472 [4:13:06<23:34:30, 3.75it/s] 14%|█▍ | 52894/371472 [4:13:06<24:00:04, 3.69it/s] 14%|█▍ | 52895/371472 [4:13:07<24:43:54, 3.58it/s] 14%|█▍ | 52896/371472 [4:13:07<24:58:28, 3.54it/s] 14%|█▍ | 52897/371472 [4:13:07<24:13:11, 3.65it/s] 14%|█▍ | 52898/371472 [4:13:07<23:58:21, 3.69it/s] 14%|█▍ | 52899/371472 [4:13:08<24:22:29, 3.63it/s] 14%|█▍ | 52900/371472 [4:13:08<23:45:25, 3.72it/s] {'loss': 4.2079, 'learning_rate': 8.722499946131139e-07, 'epoch': 2.28} + 14%|█▍ | 52900/371472 [4:13:08<23:45:25, 3.72it/s] 14%|█▍ | 52901/371472 [4:13:08<23:09:49, 3.82it/s] 14%|█▍ | 52902/371472 [4:13:08<23:16:03, 3.80it/s] 14%|█▍ | 52903/371472 [4:13:09<23:05:16, 3.83it/s] 14%|█▍ | 52904/371472 [4:13:09<23:40:02, 3.74it/s] 14%|█▍ | 52905/371472 [4:13:09<23:20:19, 3.79it/s] 14%|█▍ | 52906/371472 [4:13:10<23:51:01, 3.71it/s] 14%|█▍ | 52907/371472 [4:13:10<25:30:32, 3.47it/s] 14%|█▍ | 52908/371472 [4:13:10<28:04:12, 3.15it/s] 14%|█▍ | 52909/371472 [4:13:11<28:04:43, 3.15it/s] 14%|█▍ | 52910/371472 [4:13:11<27:02:33, 3.27it/s] 14%|█▍ | 52911/371472 [4:13:11<26:16:25, 3.37it/s] 14%|█▍ | 52912/371472 [4:13:11<26:22:30, 3.36it/s] 14%|█▍ | 52913/371472 [4:13:12<27:39:18, 3.20it/s] 14%|█▍ | 52914/371472 [4:13:12<25:37:54, 3.45it/s] 14%|█▍ | 52915/371472 [4:13:12<24:48:43, 3.57it/s] 14%|█▍ | 52916/371472 [4:13:13<24:25:47, 3.62it/s] 14%|█▍ | 52917/371472 [4:13:13<23:41:57, 3.73it/s] 14%|█▍ | 52918/371472 [4:13:13<24:12:48, 3.65it/s] 14%|█▍ | 52919/371472 [4:13:13<24:19:02, 3.64it/s] 14%|█▍ | 52920/371472 [4:13:14<24:08:56, 3.66it/s] {'loss': 4.2184, 'learning_rate': 8.722015126376349e-07, 'epoch': 2.28} + 14%|█▍ | 52920/371472 [4:13:14<24:08:56, 3.66it/s] 14%|█▍ | 52921/371472 [4:13:14<26:39:06, 3.32it/s] 14%|█▍ | 52922/371472 [4:13:14<27:46:29, 3.19it/s] 14%|█▍ | 52923/371472 [4:13:15<26:52:37, 3.29it/s] 14%|█▍ | 52924/371472 [4:13:15<25:13:20, 3.51it/s] 14%|█▍ | 52925/371472 [4:13:15<31:41:14, 2.79it/s] 14%|█▍ | 52926/371472 [4:13:16<28:47:42, 3.07it/s] 14%|█▍ | 52927/371472 [4:13:16<26:47:16, 3.30it/s] 14%|█▍ | 52928/371472 [4:13:16<25:33:02, 3.46it/s] 14%|█▍ | 52929/371472 [4:13:16<25:07:50, 3.52it/s] 14%|█▍ | 52930/371472 [4:13:17<24:38:14, 3.59it/s] 14%|█▍ | 52931/371472 [4:13:17<24:42:51, 3.58it/s] 14%|█▍ | 52932/371472 [4:13:17<23:58:11, 3.69it/s] 14%|█▍ | 52933/371472 [4:13:17<25:08:56, 3.52it/s] 14%|█▍ | 52934/371472 [4:13:18<25:09:49, 3.52it/s] 14%|█▍ | 52935/371472 [4:13:18<24:18:41, 3.64it/s] 14%|█▍ | 52936/371472 [4:13:18<25:25:06, 3.48it/s] 14%|█▍ | 52937/371472 [4:13:19<24:26:34, 3.62it/s] 14%|█▍ | 52938/371472 [4:13:19<24:21:34, 3.63it/s] 14%|█▍ | 52939/371472 [4:13:19<24:29:39, 3.61it/s] 14%|█▍ | 52940/371472 [4:13:19<25:50:08, 3.42it/s] {'loss': 4.298, 'learning_rate': 8.721530306621559e-07, 'epoch': 2.28} + 14%|█▍ | 52940/371472 [4:13:19<25:50:08, 3.42it/s] 14%|█▍ | 52941/371472 [4:13:20<24:46:15, 3.57it/s] 14%|█▍ | 52942/371472 [4:13:20<25:39:21, 3.45it/s] 14%|█▍ | 52943/371472 [4:13:20<24:40:28, 3.59it/s] 14%|█▍ | 52944/371472 [4:13:21<24:05:38, 3.67it/s] 14%|█▍ | 52945/371472 [4:13:21<24:07:18, 3.67it/s] 14%|█▍ | 52946/371472 [4:13:21<25:14:11, 3.51it/s] 14%|█▍ | 52947/371472 [4:13:21<26:28:01, 3.34it/s] 14%|█▍ | 52948/371472 [4:13:22<25:16:43, 3.50it/s] 14%|█▍ | 52949/371472 [4:13:22<25:53:09, 3.42it/s] 14%|█▍ | 52950/371472 [4:13:22<25:45:43, 3.43it/s] 14%|█▍ | 52951/371472 [4:13:23<26:40:54, 3.32it/s] 14%|█▍ | 52952/371472 [4:13:23<26:15:04, 3.37it/s] 14%|█▍ | 52953/371472 [4:13:23<25:09:17, 3.52it/s] 14%|█▍ | 52954/371472 [4:13:23<24:11:07, 3.66it/s] 14%|█▍ | 52955/371472 [4:13:24<23:41:23, 3.73it/s] 14%|█▍ | 52956/371472 [4:13:24<24:10:28, 3.66it/s] 14%|█▍ | 52957/371472 [4:13:24<25:02:47, 3.53it/s] 14%|█▍ | 52958/371472 [4:13:25<27:17:16, 3.24it/s] 14%|█▍ | 52959/371472 [4:13:25<26:55:33, 3.29it/s] 14%|█▍ | 52960/371472 [4:13:25<26:09:37, 3.38it/s] {'loss': 4.2712, 'learning_rate': 8.721045486866771e-07, 'epoch': 2.28} + 14%|█▍ | 52960/371472 [4:13:25<26:09:37, 3.38it/s] 14%|█▍ | 52961/371472 [4:13:26<25:49:34, 3.43it/s] 14%|█▍ | 52962/371472 [4:13:26<25:19:58, 3.49it/s] 14%|█▍ | 52963/371472 [4:13:26<25:34:03, 3.46it/s] 14%|█▍ | 52964/371472 [4:13:26<24:37:03, 3.59it/s] 14%|█▍ | 52965/371472 [4:13:27<23:51:18, 3.71it/s] 14%|█▍ | 52966/371472 [4:13:27<23:22:57, 3.78it/s] 14%|█▍ | 52967/371472 [4:13:27<23:21:51, 3.79it/s] 14%|█▍ | 52968/371472 [4:13:27<23:37:24, 3.75it/s] 14%|█▍ | 52969/371472 [4:13:28<23:02:12, 3.84it/s] 14%|█▍ | 52970/371472 [4:13:28<24:06:17, 3.67it/s] 14%|█▍ | 52971/371472 [4:13:28<24:59:16, 3.54it/s] 14%|█▍ | 52972/371472 [4:13:29<26:31:35, 3.34it/s] 14%|█▍ | 52973/371472 [4:13:29<25:54:17, 3.42it/s] 14%|█▍ | 52974/371472 [4:13:29<25:22:48, 3.49it/s] 14%|█▍ | 52975/371472 [4:13:29<25:48:15, 3.43it/s] 14%|█▍ | 52976/371472 [4:13:30<24:15:28, 3.65it/s] 14%|█▍ | 52977/371472 [4:13:30<25:05:03, 3.53it/s] 14%|█▍ | 52978/371472 [4:13:30<25:09:14, 3.52it/s] 14%|█▍ | 52979/371472 [4:13:31<25:11:00, 3.51it/s] 14%|█▍ | 52980/371472 [4:13:31<27:09:21, 3.26it/s] {'loss': 4.2324, 'learning_rate': 8.720560667111983e-07, 'epoch': 2.28} + 14%|█▍ | 52980/371472 [4:13:31<27:09:21, 3.26it/s] 14%|█▍ | 52981/371472 [4:13:31<26:37:55, 3.32it/s] 14%|█▍ | 52982/371472 [4:13:31<25:10:45, 3.51it/s] 14%|█▍ | 52983/371472 [4:13:32<25:00:44, 3.54it/s] 14%|█▍ | 52984/371472 [4:13:32<27:58:14, 3.16it/s] 14%|█▍ | 52985/371472 [4:13:32<27:41:52, 3.19it/s] 14%|█▍ | 52986/371472 [4:13:33<25:46:08, 3.43it/s] 14%|█▍ | 52987/371472 [4:13:33<25:44:29, 3.44it/s] 14%|█��� | 52988/371472 [4:13:33<25:33:54, 3.46it/s] 14%|█▍ | 52989/371472 [4:13:33<24:51:23, 3.56it/s] 14%|█▍ | 52990/371472 [4:13:34<24:38:33, 3.59it/s] 14%|█▍ | 52991/371472 [4:13:34<23:41:33, 3.73it/s] 14%|█▍ | 52992/371472 [4:13:34<25:33:14, 3.46it/s] 14%|█▍ | 52993/371472 [4:13:35<25:10:24, 3.51it/s] 14%|█▍ | 52994/371472 [4:13:35<24:25:06, 3.62it/s] 14%|█▍ | 52995/371472 [4:13:35<23:59:59, 3.69it/s] 14%|█▍ | 52996/371472 [4:13:35<24:14:36, 3.65it/s] 14%|█▍ | 52997/371472 [4:13:36<26:01:50, 3.40it/s] 14%|█▍ | 52998/371472 [4:13:36<25:10:09, 3.51it/s] 14%|█▍ | 52999/371472 [4:13:36<23:55:28, 3.70it/s] 14%|█▍ | 53000/371472 [4:13:36<23:25:46, 3.78it/s] {'loss': 4.2026, 'learning_rate': 8.720075847357193e-07, 'epoch': 2.28} + 14%|█▍ | 53000/371472 [4:13:36<23:25:46, 3.78it/s] 14%|█▍ | 53001/371472 [4:13:37<23:15:06, 3.80it/s] 14%|█▍ | 53002/371472 [4:13:37<23:41:09, 3.73it/s] 14%|█▍ | 53003/371472 [4:13:37<25:24:59, 3.48it/s] 14%|█▍ | 53004/371472 [4:13:38<25:13:03, 3.51it/s] 14%|█▍ | 53005/371472 [4:13:38<25:17:30, 3.50it/s] 14%|█▍ | 53006/371472 [4:13:38<25:05:10, 3.53it/s] 14%|█▍ | 53007/371472 [4:13:38<24:23:03, 3.63it/s] 14%|█▍ | 53008/371472 [4:13:39<23:45:47, 3.72it/s] 14%|█▍ | 53009/371472 [4:13:39<25:04:09, 3.53it/s] 14%|█▍ | 53010/371472 [4:13:39<24:53:20, 3.55it/s] 14%|█▍ | 53011/371472 [4:13:40<25:05:01, 3.53it/s] 14%|█▍ | 53012/371472 [4:13:40<24:55:57, 3.55it/s] 14%|█▍ | 53013/371472 [4:13:40<24:04:25, 3.67it/s] 14%|█▍ | 53014/371472 [4:13:40<23:37:34, 3.74it/s] 14%|█▍ | 53015/371472 [4:13:41<24:34:24, 3.60it/s] 14%|█▍ | 53016/371472 [4:13:41<23:55:35, 3.70it/s] 14%|█▍ | 53017/371472 [4:13:41<23:38:39, 3.74it/s] 14%|█▍ | 53018/371472 [4:13:41<23:50:35, 3.71it/s] 14%|█▍ | 53019/371472 [4:13:42<25:52:01, 3.42it/s] 14%|█▍ | 53020/371472 [4:13:42<27:13:24, 3.25it/s] {'loss': 4.2496, 'learning_rate': 8.719591027602404e-07, 'epoch': 2.28} + 14%|█▍ | 53020/371472 [4:13:42<27:13:24, 3.25it/s] 14%|█▍ | 53021/371472 [4:13:42<25:51:06, 3.42it/s] 14%|█▍ | 53022/371472 [4:13:43<24:55:00, 3.55it/s] 14%|█▍ | 53023/371472 [4:13:43<24:25:16, 3.62it/s] 14%|█▍ | 53024/371472 [4:13:43<24:22:00, 3.63it/s] 14%|█▍ | 53025/371472 [4:13:43<23:25:45, 3.78it/s] 14%|█▍ | 53026/371472 [4:13:44<22:32:13, 3.92it/s] 14%|█▍ | 53027/371472 [4:13:44<23:22:17, 3.78it/s] 14%|█▍ | 53028/371472 [4:13:44<22:46:52, 3.88it/s] 14%|█▍ | 53029/371472 [4:13:44<23:05:01, 3.83it/s] 14%|█▍ | 53030/371472 [4:13:45<23:33:16, 3.76it/s] 14%|█▍ | 53031/371472 [4:13:45<24:40:06, 3.59it/s] 14%|█▍ | 53032/371472 [4:13:45<24:29:16, 3.61it/s] 14%|█▍ | 53033/371472 [4:13:46<27:24:32, 3.23it/s] 14%|█▍ | 53034/371472 [4:13:46<27:24:29, 3.23it/s] 14%|█▍ | 53035/371472 [4:13:46<26:53:31, 3.29it/s] 14%|█▍ | 53036/371472 [4:13:47<25:10:41, 3.51it/s] 14%|█▍ | 53037/371472 [4:13:47<26:09:02, 3.38it/s] 14%|█▍ | 53038/371472 [4:13:47<26:17:11, 3.36it/s] 14%|█▍ | 53039/371472 [4:13:47<25:15:57, 3.50it/s] 14%|█▍ | 53040/371472 [4:13:48<25:11:43, 3.51it/s] {'loss': 4.337, 'learning_rate': 8.719106207847617e-07, 'epoch': 2.28} + 14%|█▍ | 53040/371472 [4:13:48<25:11:43, 3.51it/s] 14%|█▍ | 53041/371472 [4:13:48<27:04:03, 3.27it/s] 14%|█▍ | 53042/371472 [4:13:48<26:31:20, 3.34it/s] 14%|█▍ | 53043/371472 [4:13:49<25:53:30, 3.42it/s] 14%|█▍ | 53044/371472 [4:13:49<25:12:48, 3.51it/s] 14%|█▍ | 53045/371472 [4:13:49<24:14:25, 3.65it/s] 14%|█▍ | 53046/371472 [4:13:49<23:55:41, 3.70it/s] 14%|█▍ | 53047/371472 [4:13:50<24:29:31, 3.61it/s] 14%|█▍ | 53048/371472 [4:13:50<23:41:59, 3.73it/s] 14%|█▍ | 53049/371472 [4:13:50<24:00:59, 3.68it/s] 14%|█▍ | 53050/371472 [4:13:50<23:50:07, 3.71it/s] 14%|█▍ | 53051/371472 [4:13:51<23:42:15, 3.73it/s] 14%|█▍ | 53052/371472 [4:13:51<24:11:09, 3.66it/s] 14%|█▍ | 53053/371472 [4:13:51<23:31:56, 3.76it/s] 14%|█▍ | 53054/371472 [4:13:52<25:47:22, 3.43it/s] 14%|█▍ | 53055/371472 [4:13:52<25:04:57, 3.53it/s] 14%|█▍ | 53056/371472 [4:13:52<26:28:13, 3.34it/s] 14%|█▍ | 53057/371472 [4:13:53<27:29:46, 3.22it/s] 14%|█▍ | 53058/371472 [4:13:53<27:24:07, 3.23it/s] 14%|█▍ | 53059/371472 [4:13:53<27:34:20, 3.21it/s] 14%|█▍ | 53060/371472 [4:13:54<27:15:04, 3.25it/s] {'loss': 4.2557, 'learning_rate': 8.718621388092826e-07, 'epoch': 2.29} + 14%|█▍ | 53060/371472 [4:13:54<27:15:04, 3.25it/s] 14%|█▍ | 53061/371472 [4:13:54<27:35:52, 3.20it/s] 14%|█▍ | 53062/371472 [4:13:54<25:42:08, 3.44it/s] 14%|█▍ | 53063/371472 [4:13:54<25:34:56, 3.46it/s] 14%|█▍ | 53064/371472 [4:13:55<24:29:43, 3.61it/s] 14%|█▍ | 53065/371472 [4:13:55<26:28:09, 3.34it/s] 14%|█▍ | 53066/371472 [4:13:55<26:10:47, 3.38it/s] 14%|█▍ | 53067/371472 [4:13:56<26:39:48, 3.32it/s] 14%|█▍ | 53068/371472 [4:13:56<25:27:40, 3.47it/s] 14%|█▍ | 53069/371472 [4:13:56<25:33:23, 3.46it/s] 14%|█▍ | 53070/371472 [4:13:56<25:41:17, 3.44it/s] 14%|█▍ | 53071/371472 [4:13:57<25:50:42, 3.42it/s] 14%|█▍ | 53072/371472 [4:13:57<24:57:38, 3.54it/s] 14%|█▍ | 53073/371472 [4:13:57<24:20:26, 3.63it/s] 14%|█▍ | 53074/371472 [4:13:58<26:12:05, 3.38it/s] 14%|█▍ | 53075/371472 [4:13:58<25:11:30, 3.51it/s] 14%|█▍ | 53076/371472 [4:13:58<25:04:45, 3.53it/s] 14%|█▍ | 53077/371472 [4:13:58<25:14:01, 3.50it/s] 14%|█▍ | 53078/371472 [4:13:59<24:52:45, 3.55it/s] 14%|█▍ | 53079/371472 [4:13:59<25:17:32, 3.50it/s] 14%|█▍ | 53080/371472 [4:13:59<25:12:33, 3.51it/s] {'loss': 4.2969, 'learning_rate': 8.718136568338037e-07, 'epoch': 2.29} + 14%|█▍ | 53080/371472 [4:13:59<25:12:33, 3.51it/s] 14%|█▍ | 53081/371472 [4:13:59<24:27:48, 3.62it/s] 14%|█▍ | 53082/371472 [4:14:00<24:17:35, 3.64it/s] 14%|█▍ | 53083/371472 [4:14:00<24:23:48, 3.63it/s] 14%|█▍ | 53084/371472 [4:14:00<23:26:10, 3.77it/s] 14%|█▍ | 53085/371472 [4:14:01<23:07:58, 3.82it/s] 14%|█▍ | 53086/371472 [4:14:01<24:34:37, 3.60it/s] 14%|█▍ | 53087/371472 [4:14:01<24:40:11, 3.58it/s] 14%|█▍ | 53088/371472 [4:14:01<23:36:23, 3.75it/s] 14%|█▍ | 53089/371472 [4:14:02<23:57:14, 3.69it/s] 14%|█▍ | 53090/371472 [4:14:02<23:07:19, 3.82it/s] 14%|█▍ | 53091/371472 [4:14:02<25:07:37, 3.52it/s] 14%|█▍ | 53092/371472 [4:14:03<24:45:31, 3.57it/s] 14%|█▍ | 53093/371472 [4:14:03<25:09:45, 3.51it/s] 14%|█▍ | 53094/371472 [4:14:03<25:02:24, 3.53it/s] 14%|█▍ | 53095/371472 [4:14:03<24:51:57, 3.56it/s] 14%|█▍ | 53096/371472 [4:14:04<24:43:06, 3.58it/s] 14%|█▍ | 53097/371472 [4:14:04<25:20:42, 3.49it/s] 14%|█▍ | 53098/371472 [4:14:04<25:25:48, 3.48it/s] 14%|█▍ | 53099/371472 [4:14:05<25:14:19, 3.50it/s] 14%|█▍ | 53100/371472 [4:14:05<25:16:45, 3.50it/s] {'loss': 4.4096, 'learning_rate': 8.717651748583248e-07, 'epoch': 2.29} + 14%|█▍ | 53100/371472 [4:14:05<25:16:45, 3.50it/s] 14%|█▍ | 53101/371472 [4:14:05<25:36:13, 3.45it/s] 14%|█▍ | 53102/371472 [4:14:05<25:12:12, 3.51it/s] 14%|█▍ | 53103/371472 [4:14:06<24:19:28, 3.64it/s] 14%|█▍ | 53104/371472 [4:14:06<24:20:12, 3.63it/s] 14%|█▍ | 53105/371472 [4:14:06<24:25:00, 3.62it/s] 14%|█▍ | 53106/371472 [4:14:06<24:59:26, 3.54it/s] 14%|█▍ | 53107/371472 [4:14:07<23:57:38, 3.69it/s] 14%|█▍ | 53108/371472 [4:14:07<24:02:30, 3.68it/s] 14%|█▍ | 53109/371472 [4:14:07<23:25:19, 3.78it/s] 14%|█▍ | 53110/371472 [4:14:08<23:30:19, 3.76it/s] 14%|█▍ | 53111/371472 [4:14:08<24:37:14, 3.59it/s] 14%|█▍ | 53112/371472 [4:14:08<23:50:46, 3.71it/s] 14%|█▍ | 53113/371472 [4:14:08<23:47:05, 3.72it/s] 14%|█▍ | 53114/371472 [4:14:09<26:17:54, 3.36it/s] 14%|█▍ | 53115/371472 [4:14:09<26:36:51, 3.32it/s] 14%|█▍ | 53116/371472 [4:14:09<25:20:09, 3.49it/s] 14%|█▍ | 53117/371472 [4:14:10<24:53:06, 3.55it/s] 14%|█▍ | 53118/371472 [4:14:10<24:58:16, 3.54it/s] 14%|█▍ | 53119/371472 [4:14:10<23:46:33, 3.72it/s] 14%|█▍ | 53120/371472 [4:14:10<23:22:58, 3.78it/s] {'loss': 4.312, 'learning_rate': 8.71716692882846e-07, 'epoch': 2.29} + 14%|█▍ | 53120/371472 [4:14:10<23:22:58, 3.78it/s] 14%|█▍ | 53121/371472 [4:14:11<23:21:55, 3.78it/s] 14%|█▍ | 53122/371472 [4:14:11<24:29:35, 3.61it/s] 14%|█▍ | 53123/371472 [4:14:11<23:41:14, 3.73it/s] 14%|█▍ | 53124/371472 [4:14:11<24:21:58, 3.63it/s] 14%|█▍ | 53125/371472 [4:14:12<23:41:18, 3.73it/s] 14%|█▍ | 53126/371472 [4:14:12<24:13:23, 3.65it/s] 14%|█▍ | 53127/371472 [4:14:12<24:49:20, 3.56it/s] 14%|█▍ | 53128/371472 [4:14:13<24:16:00, 3.64it/s] 14%|█▍ | 53129/371472 [4:14:13<25:19:11, 3.49it/s] 14%|█▍ | 53130/371472 [4:14:13<26:20:42, 3.36it/s] 14%|█▍ | 53131/371472 [4:14:13<25:19:23, 3.49it/s] 14%|█▍ | 53132/371472 [4:14:14<25:43:54, 3.44it/s] 14%|█▍ | 53133/371472 [4:14:14<24:36:31, 3.59it/s] 14%|█▍ | 53134/371472 [4:14:14<24:30:27, 3.61it/s] 14%|█▍ | 53135/371472 [4:14:15<25:14:17, 3.50it/s] 14%|█▍ | 53136/371472 [4:14:15<24:43:15, 3.58it/s] 14%|█▍ | 53137/371472 [4:14:15<25:16:45, 3.50it/s] 14%|█▍ | 53138/371472 [4:14:15<26:39:10, 3.32it/s] 14%|█▍ | 53139/371472 [4:14:16<25:42:18, 3.44it/s] 14%|█▍ | 53140/371472 [4:14:16<25:14:49, 3.50it/s] {'loss': 4.2076, 'learning_rate': 8.71668210907367e-07, 'epoch': 2.29} + 14%|█▍ | 53140/371472 [4:14:16<25:14:49, 3.50it/s] 14%|█▍ | 53141/371472 [4:14:16<24:38:56, 3.59it/s] 14%|█▍ | 53142/371472 [4:14:17<24:53:33, 3.55it/s] 14%|█▍ | 53143/371472 [4:14:17<23:54:56, 3.70it/s] 14%|█▍ | 53144/371472 [4:14:17<24:01:10, 3.68it/s] 14%|█▍ | 53145/371472 [4:14:17<23:36:05, 3.75it/s] 14%|█▍ | 53146/371472 [4:14:18<24:31:25, 3.61it/s] 14%|█▍ | 53147/371472 [4:14:18<24:39:50, 3.59it/s] 14%|█▍ | 53148/371472 [4:14:18<24:52:24, 3.55it/s] 14%|█▍ | 53149/371472 [4:14:18<24:18:31, 3.64it/s] 14%|█▍ | 53150/371472 [4:14:19<24:25:45, 3.62it/s] 14%|█▍ | 53151/371472 [4:14:19<23:44:13, 3.73it/s] 14%|█▍ | 53152/371472 [4:14:19<23:59:05, 3.69it/s] 14%|█▍ | 53153/371472 [4:14:19<23:34:15, 3.75it/s] 14%|█▍ | 53154/371472 [4:14:20<24:14:42, 3.65it/s] 14%|█▍ | 53155/371472 [4:14:20<24:18:46, 3.64it/s] 14%|█▍ | 53156/371472 [4:14:20<23:46:46, 3.72it/s] 14%|█▍ | 53157/371472 [4:14:21<25:01:01, 3.53it/s] 14%|█▍ | 53158/371472 [4:14:21<25:05:13, 3.52it/s] 14%|█▍ | 53159/371472 [4:14:21<23:50:31, 3.71it/s] 14%|█▍ | 53160/371472 [4:14:21<23:49:09, 3.71it/s] {'loss': 4.1274, 'learning_rate': 8.716197289318882e-07, 'epoch': 2.29} + 14%|█▍ | 53160/371472 [4:14:21<23:49:09, 3.71it/s] 14%|█▍ | 53161/371472 [4:14:22<23:29:39, 3.76it/s] 14%|█▍ | 53162/371472 [4:14:22<23:36:20, 3.75it/s] 14%|█▍ | 53163/371472 [4:14:22<24:05:45, 3.67it/s] 14%|█▍ | 53164/371472 [4:14:23<24:24:26, 3.62it/s] 14%|█▍ | 53165/371472 [4:14:23<24:57:27, 3.54it/s] 14%|█▍ | 53166/371472 [4:14:23<23:40:55, 3.73it/s] 14%|█▍ | 53167/371472 [4:14:23<24:07:19, 3.67it/s] 14%|█▍ | 53168/371472 [4:14:24<25:46:38, 3.43it/s] 14%|█▍ | 53169/371472 [4:14:24<24:46:48, 3.57it/s] 14%|█▍ | 53170/371472 [4:14:24<24:01:15, 3.68it/s] 14%|█▍ | 53171/371472 [4:14:25<26:13:15, 3.37it/s] 14%|█▍ | 53172/371472 [4:14:25<24:58:53, 3.54it/s] 14%|█▍ | 53173/371472 [4:14:25<25:18:51, 3.49it/s] 14%|█▍ | 53174/371472 [4:14:25<24:05:49, 3.67it/s] 14%|█▍ | 53175/371472 [4:14:26<24:01:23, 3.68it/s] 14%|█▍ | 53176/371472 [4:14:26<23:50:20, 3.71it/s] 14%|█▍ | 53177/371472 [4:14:26<24:56:13, 3.55it/s] 14%|█▍ | 53178/371472 [4:14:26<24:02:03, 3.68it/s] 14%|█▍ | 53179/371472 [4:14:27<23:29:52, 3.76it/s] 14%|█▍ | 53180/371472 [4:14:27<27:30:29, 3.21it/s] {'loss': 4.2475, 'learning_rate': 8.715712469564093e-07, 'epoch': 2.29} + 14%|█▍ | 53180/371472 [4:14:27<27:30:29, 3.21it/s] 14%|█▍ | 53181/371472 [4:14:27<27:16:30, 3.24it/s] 14%|█▍ | 53182/371472 [4:14:28<25:39:49, 3.45it/s] 14%|█▍ | 53183/371472 [4:14:28<27:53:44, 3.17it/s] 14%|█▍ | 53184/371472 [4:14:28<28:34:07, 3.09it/s] 14%|█▍ | 53185/371472 [4:14:29<28:23:27, 3.11it/s] 14%|█▍ | 53186/371472 [4:14:29<27:19:18, 3.24it/s] 14%|█▍ | 53187/371472 [4:14:29<26:27:47, 3.34it/s] 14%|█▍ | 53188/371472 [4:14:29<25:37:50, 3.45it/s] 14%|█▍ | 53189/371472 [4:14:30<25:48:12, 3.43it/s] 14%|█▍ | 53190/371472 [4:14:30<25:55:19, 3.41it/s] 14%|█▍ | 53191/371472 [4:14:30<25:25:26, 3.48it/s] 14%|█▍ | 53192/371472 [4:14:31<25:27:25, 3.47it/s] 14%|█▍ | 53193/371472 [4:14:31<24:19:10, 3.64it/s] 14%|█▍ | 53194/371472 [4:14:31<23:53:05, 3.70it/s] 14%|█▍ | 53195/371472 [4:14:31<24:31:02, 3.61it/s] 14%|█▍ | 53196/371472 [4:14:32<24:17:42, 3.64it/s] 14%|█▍ | 53197/371472 [4:14:32<24:56:45, 3.54it/s] 14%|█▍ | 53198/371472 [4:14:32<24:00:20, 3.68it/s] 14%|█▍ | 53199/371472 [4:14:33<23:19:07, 3.79it/s] 14%|█▍ | 53200/371472 [4:14:33<22:59:34, 3.85it/s] {'loss': 4.1663, 'learning_rate': 8.715227649809304e-07, 'epoch': 2.29} + 14%|█▍ | 53200/371472 [4:14:33<22:59:34, 3.85it/s] 14%|█▍ | 53201/371472 [4:14:33<24:59:17, 3.54it/s] 14%|█▍ | 53202/371472 [4:14:33<23:57:01, 3.69it/s] 14%|█▍ | 53203/371472 [4:14:34<25:30:47, 3.47it/s] 14%|█▍ | 53204/371472 [4:14:34<24:31:03, 3.61it/s] 14%|█▍ | 53205/371472 [4:14:34<25:27:43, 3.47it/s] 14%|█▍ | 53206/371472 [4:14:35<27:41:55, 3.19it/s] 14%|█▍ | 53207/371472 [4:14:35<25:59:59, 3.40it/s] 14%|█▍ | 53208/371472 [4:14:35<26:00:19, 3.40it/s] 14%|█▍ | 53209/371472 [4:14:35<25:18:58, 3.49it/s] 14%|█▍ | 53210/371472 [4:14:36<24:00:48, 3.68it/s] 14%|█▍ | 53211/371472 [4:14:36<23:52:02, 3.70it/s] 14%|█▍ | 53212/371472 [4:14:36<24:22:35, 3.63it/s] 14%|█▍ | 53213/371472 [4:14:37<25:32:27, 3.46it/s] 14%|█▍ | 53214/371472 [4:14:37<24:35:50, 3.59it/s] 14%|█▍ | 53215/371472 [4:14:37<23:54:28, 3.70it/s] 14%|█▍ | 53216/371472 [4:14:37<23:39:16, 3.74it/s] 14%|█▍ | 53217/371472 [4:14:38<23:38:28, 3.74it/s] 14%|█▍ | 53218/371472 [4:14:38<23:18:13, 3.79it/s] 14%|█▍ | 53219/371472 [4:14:38<22:35:54, 3.91it/s] 14%|█▍ | 53220/371472 [4:14:38<23:26:59, 3.77it/s] {'loss': 4.2687, 'learning_rate': 8.714742830054514e-07, 'epoch': 2.29} + 14%|█▍ | 53220/371472 [4:14:38<23:26:59, 3.77it/s] 14%|█▍ | 53221/371472 [4:14:39<23:26:05, 3.77it/s] 14%|█▍ | 53222/371472 [4:14:39<23:09:13, 3.82it/s] 14%|█▍ | 53223/371472 [4:14:39<23:57:27, 3.69it/s] 14%|█▍ | 53224/371472 [4:14:39<23:44:58, 3.72it/s] 14%|█▍ | 53225/371472 [4:14:40<23:47:28, 3.72it/s] 14%|█▍ | 53226/371472 [4:14:40<25:24:58, 3.48it/s] 14%|█▍ | 53227/371472 [4:14:40<25:16:14, 3.50it/s] 14%|█▍ | 53228/371472 [4:14:41<28:30:58, 3.10it/s] 14%|█▍ | 53229/371472 [4:14:41<26:24:46, 3.35it/s] 14%|█▍ | 53230/371472 [4:14:41<24:53:00, 3.55it/s] 14%|█▍ | 53231/371472 [4:14:41<25:35:58, 3.45it/s] 14%|█▍ | 53232/371472 [4:14:42<25:38:36, 3.45it/s] 14%|█▍ | 53233/371472 [4:14:42<26:04:06, 3.39it/s] 14%|█▍ | 53234/371472 [4:14:42<25:29:46, 3.47it/s] 14%|█▍ | 53235/371472 [4:14:43<25:26:41, 3.47it/s] 14%|█▍ | 53236/371472 [4:14:43<25:19:47, 3.49it/s] 14%|█▍ | 53237/371472 [4:14:43<26:01:50, 3.40it/s] 14%|█▍ | 53238/371472 [4:14:43<24:45:37, 3.57it/s] 14%|█▍ | 53239/371472 [4:14:44<25:30:37, 3.47it/s] 14%|█▍ | 53240/371472 [4:14:44<25:15:56, 3.50it/s] {'loss': 4.1586, 'learning_rate': 8.714258010299726e-07, 'epoch': 2.29} + 14%|█▍ | 53240/371472 [4:14:44<25:15:56, 3.50it/s] 14%|█▍ | 53241/371472 [4:14:44<24:08:23, 3.66it/s] 14%|█▍ | 53242/371472 [4:14:45<24:19:37, 3.63it/s] 14%|█▍ | 53243/371472 [4:14:45<25:44:40, 3.43it/s] 14%|█▍ | 53244/371472 [4:14:45<27:17:47, 3.24it/s] 14%|█▍ | 53245/371472 [4:14:46<25:55:26, 3.41it/s] 14%|█▍ | 53246/371472 [4:14:46<25:37:06, 3.45it/s] 14%|█▍ | 53247/371472 [4:14:46<25:33:28, 3.46it/s] 14%|█▍ | 53248/371472 [4:14:46<24:38:56, 3.59it/s] 14%|█▍ | 53249/371472 [4:14:47<24:02:18, 3.68it/s] 14%|█▍ | 53250/371472 [4:14:47<23:34:16, 3.75it/s] 14%|█▍ | 53251/371472 [4:14:47<25:16:19, 3.50it/s] 14%|█▍ | 53252/371472 [4:14:48<26:08:11, 3.38it/s] 14%|█▍ | 53253/371472 [4:14:48<25:15:33, 3.50it/s] 14%|█▍ | 53254/371472 [4:14:48<29:59:09, 2.95it/s] 14%|█▍ | 53255/371472 [4:14:49<28:49:26, 3.07it/s] 14%|█▍ | 53256/371472 [4:14:49<27:13:51, 3.25it/s] 14%|█▍ | 53257/371472 [4:14:49<26:42:31, 3.31it/s] 14%|█▍ | 53258/371472 [4:14:49<25:55:37, 3.41it/s] 14%|█▍ | 53259/371472 [4:14:50<25:14:30, 3.50it/s] 14%|█▍ | 53260/371472 [4:14:50<25:55:51, 3.41it/s] {'loss': 4.2607, 'learning_rate': 8.713773190544937e-07, 'epoch': 2.29} + 14%|█▍ | 53260/371472 [4:14:50<25:55:51, 3.41it/s] 14%|█▍ | 53261/371472 [4:14:50<24:57:00, 3.54it/s] 14%|█▍ | 53262/371472 [4:14:50<23:38:08, 3.74it/s] 14%|█▍ | 53263/371472 [4:14:51<22:57:17, 3.85it/s] 14%|█▍ | 53264/371472 [4:14:51<22:52:52, 3.86it/s] 14%|█▍ | 53265/371472 [4:14:51<23:03:50, 3.83it/s] 14%|█▍ | 53266/371472 [4:14:52<24:27:01, 3.62it/s] 14%|█▍ | 53267/371472 [4:14:52<24:24:44, 3.62it/s] 14%|█▍ | 53268/371472 [4:14:52<26:02:25, 3.39it/s] 14%|█▍ | 53269/371472 [4:14:52<24:47:21, 3.57it/s] 14%|█▍ | 53270/371472 [4:14:53<23:46:55, 3.72it/s] 14%|█▍ | 53271/371472 [4:14:53<23:02:15, 3.84it/s] 14%|█▍ | 53272/371472 [4:14:53<23:07:02, 3.82it/s] 14%|█▍ | 53273/371472 [4:14:53<23:05:28, 3.83it/s] 14%|█▍ | 53274/371472 [4:14:54<23:25:02, 3.77it/s] 14%|█▍ | 53275/371472 [4:14:54<24:34:06, 3.60it/s] 14%|█▍ | 53276/371472 [4:14:54<25:39:30, 3.44it/s] 14%|█▍ | 53277/371472 [4:14:55<25:43:00, 3.44it/s] 14%|█▍ | 53278/371472 [4:14:55<25:56:21, 3.41it/s] 14%|█▍ | 53279/371472 [4:14:55<25:16:17, 3.50it/s] 14%|█▍ | 53280/371472 [4:14:55<25:37:15, 3.45it/s] {'loss': 4.2607, 'learning_rate': 8.713288370790149e-07, 'epoch': 2.29} + 14%|█▍ | 53280/371472 [4:14:55<25:37:15, 3.45it/s] 14%|█▍ | 53281/371472 [4:14:56<25:36:39, 3.45it/s] 14%|█▍ | 53282/371472 [4:14:56<24:46:25, 3.57it/s] 14%|█▍ | 53283/371472 [4:14:56<24:10:22, 3.66it/s] 14%|█▍ | 53284/371472 [4:14:57<24:53:26, 3.55it/s] 14%|█▍ | 53285/371472 [4:14:57<25:57:24, 3.41it/s] 14%|█▍ | 53286/371472 [4:14:57<26:38:28, 3.32it/s] 14%|█▍ | 53287/371472 [4:14:57<26:09:13, 3.38it/s] 14%|█▍ | 53288/371472 [4:14:58<25:05:56, 3.52it/s] 14%|█▍ | 53289/371472 [4:14:58<24:44:45, 3.57it/s] 14%|█▍ | 53290/371472 [4:14:58<26:24:47, 3.35it/s] 14%|█▍ | 53291/371472 [4:14:59<26:22:13, 3.35it/s] 14%|█▍ | 53292/371472 [4:14:59<25:10:00, 3.51it/s] 14%|█▍ | 53293/371472 [4:14:59<24:38:56, 3.59it/s] 14%|█▍ | 53294/371472 [4:14:59<24:17:09, 3.64it/s] 14%|█▍ | 53295/371472 [4:15:00<24:24:12, 3.62it/s] 14%|█▍ | 53296/371472 [4:15:00<27:11:57, 3.25it/s] 14%|█▍ | 53297/371472 [4:15:00<26:01:56, 3.40it/s] 14%|█▍ | 53298/371472 [4:15:01<26:31:52, 3.33it/s] 14%|█▍ | 53299/371472 [4:15:01<25:16:34, 3.50it/s] 14%|█▍ | 53300/371472 [4:15:01<26:07:10, 3.38it/s] {'loss': 4.1282, 'learning_rate': 8.712803551035359e-07, 'epoch': 2.3} + 14%|█▍ | 53300/371472 [4:15:01<26:07:10, 3.38it/s] 14%|█▍ | 53301/371472 [4:15:02<26:35:56, 3.32it/s] 14%|█▍ | 53302/371472 [4:15:02<26:49:10, 3.30it/s] 14%|█▍ | 53303/371472 [4:15:02<28:04:14, 3.15it/s] 14%|█▍ | 53304/371472 [4:15:03<29:06:27, 3.04it/s] 14%|█▍ | 53305/371472 [4:15:03<29:53:33, 2.96it/s] 14%|█▍ | 53306/371472 [4:15:03<28:47:50, 3.07it/s] 14%|█▍ | 53307/371472 [4:15:03<26:38:31, 3.32it/s] 14%|█▍ | 53308/371472 [4:15:04<25:13:56, 3.50it/s] 14%|█▍ | 53309/371472 [4:15:04<25:16:23, 3.50it/s] 14%|█▍ | 53310/371472 [4:15:04<25:05:58, 3.52it/s] 14%|█▍ | 53311/371472 [4:15:05<24:57:33, 3.54it/s] 14%|█▍ | 53312/371472 [4:15:05<24:26:32, 3.62it/s] 14%|█▍ | 53313/371472 [4:15:05<25:29:07, 3.47it/s] 14%|█▍ | 53314/371472 [4:15:05<26:22:07, 3.35it/s] 14%|█▍ | 53315/371472 [4:15:06<26:19:58, 3.36it/s] 14%|█▍ | 53316/371472 [4:15:06<27:44:23, 3.19it/s] 14%|█▍ | 53317/371472 [4:15:06<27:34:36, 3.20it/s] 14%|█▍ | 53318/371472 [4:15:07<25:47:07, 3.43it/s] 14%|█▍ | 53319/371472 [4:15:07<25:22:47, 3.48it/s] 14%|█▍ | 53320/371472 [4:15:07<29:16:34, 3.02it/s] {'loss': 4.0689, 'learning_rate': 8.71231873128057e-07, 'epoch': 2.3} + 14%|█▍ | 53320/371472 [4:15:07<29:16:34, 3.02it/s] 14%|█▍ | 53321/371472 [4:15:08<27:10:39, 3.25it/s] 14%|█▍ | 53322/371472 [4:15:08<25:33:51, 3.46it/s] 14%|█▍ | 53323/371472 [4:15:08<26:18:15, 3.36it/s] 14%|█▍ | 53324/371472 [4:15:08<26:06:10, 3.39it/s] 14%|█▍ | 53325/371472 [4:15:09<26:30:49, 3.33it/s] 14%|█▍ | 53326/371472 [4:15:09<25:04:25, 3.52it/s] 14%|█▍ | 53327/371472 [4:15:09<27:03:43, 3.27it/s] 14%|█▍ | 53328/371472 [4:15:10<27:31:57, 3.21it/s] 14%|█▍ | 53329/371472 [4:15:10<25:46:03, 3.43it/s] 14%|█▍ | 53330/371472 [4:15:10<27:17:34, 3.24it/s] 14%|█▍ | 53331/371472 [4:15:11<26:16:58, 3.36it/s] 14%|█▍ | 53332/371472 [4:15:11<25:03:40, 3.53it/s] 14%|█▍ | 53333/371472 [4:15:11<24:29:54, 3.61it/s] 14%|█▍ | 53334/371472 [4:15:11<23:46:44, 3.72it/s] 14%|█▍ | 53335/371472 [4:15:12<22:55:55, 3.85it/s] 14%|█▍ | 53336/371472 [4:15:12<23:05:42, 3.83it/s] 14%|█▍ | 53337/371472 [4:15:12<22:44:05, 3.89it/s] 14%|█▍ | 53338/371472 [4:15:12<23:08:36, 3.82it/s] 14%|█▍ | 53339/371472 [4:15:13<23:21:42, 3.78it/s] 14%|█▍ | 53340/371472 [4:15:13<22:58:03, 3.85it/s] {'loss': 4.3091, 'learning_rate': 8.711833911525781e-07, 'epoch': 2.3} + 14%|█▍ | 53340/371472 [4:15:13<22:58:03, 3.85it/s] 14%|█▍ | 53341/371472 [4:15:13<23:41:47, 3.73it/s] 14%|█▍ | 53342/371472 [4:15:13<24:03:03, 3.67it/s] 14%|█▍ | 53343/371472 [4:15:14<23:58:03, 3.69it/s] 14%|█▍ | 53344/371472 [4:15:14<24:13:10, 3.65it/s] 14%|█▍ | 53345/371472 [4:15:14<24:03:53, 3.67it/s] 14%|█▍ | 53346/371472 [4:15:15<24:17:35, 3.64it/s] 14%|█▍ | 53347/371472 [4:15:15<24:02:24, 3.68it/s] 14%|█▍ | 53348/371472 [4:15:15<23:51:14, 3.70it/s] 14%|█▍ | 53349/371472 [4:15:15<25:07:02, 3.52it/s] 14%|█▍ | 53350/371472 [4:15:16<24:55:39, 3.54it/s] 14%|█▍ | 53351/371472 [4:15:16<24:37:56, 3.59it/s] 14%|█▍ | 53352/371472 [4:15:16<24:26:31, 3.62it/s] 14%|█▍ | 53353/371472 [4:15:16<24:06:58, 3.66it/s] 14%|█▍ | 53354/371472 [4:15:17<24:17:40, 3.64it/s] 14%|█▍ | 53355/371472 [4:15:17<24:20:11, 3.63it/s] 14%|█▍ | 53356/371472 [4:15:17<24:03:21, 3.67it/s] 14%|█▍ | 53357/371472 [4:15:18<24:24:53, 3.62it/s] 14%|█▍ | 53358/371472 [4:15:18<25:26:56, 3.47it/s] 14%|█▍ | 53359/371472 [4:15:18<24:55:03, 3.55it/s] 14%|█▍ | 53360/371472 [4:15:18<25:41:29, 3.44it/s] {'loss': 4.1475, 'learning_rate': 8.711349091770992e-07, 'epoch': 2.3} + 14%|█▍ | 53360/371472 [4:15:18<25:41:29, 3.44it/s] 14%|█▍ | 53361/371472 [4:15:19<27:54:52, 3.17it/s] 14%|█▍ | 53362/371472 [4:15:19<26:11:36, 3.37it/s] 14%|█▍ | 53363/371472 [4:15:19<27:13:18, 3.25it/s] 14%|█▍ | 53364/371472 [4:15:20<27:14:34, 3.24it/s] 14%|█▍ | 53365/371472 [4:15:20<26:57:42, 3.28it/s] 14%|█▍ | 53366/371472 [4:15:20<26:10:40, 3.38it/s] 14%|█▍ | 53367/371472 [4:15:21<26:02:20, 3.39it/s] 14%|█▍ | 53368/371472 [4:15:21<25:48:12, 3.42it/s] 14%|█▍ | 53369/371472 [4:15:21<24:49:01, 3.56it/s] 14%|█▍ | 53370/371472 [4:15:21<24:51:40, 3.55it/s] 14%|█▍ | 53371/371472 [4:15:22<24:36:04, 3.59it/s] 14%|█▍ | 53372/371472 [4:15:22<24:28:26, 3.61it/s] 14%|█▍ | 53373/371472 [4:15:22<23:47:20, 3.71it/s] 14%|█▍ | 53374/371472 [4:15:23<23:34:46, 3.75it/s] 14%|█▍ | 53375/371472 [4:15:23<23:31:39, 3.76it/s] 14%|█▍ | 53376/371472 [4:15:23<26:50:21, 3.29it/s] 14%|█▍ | 53377/371472 [4:15:23<25:41:24, 3.44it/s] 14%|█▍ | 53378/371472 [4:15:24<25:22:04, 3.48it/s] 14%|█▍ | 53379/371472 [4:15:24<26:10:51, 3.37it/s] 14%|█▍ | 53380/371472 [4:15:24<26:18:46, 3.36it/s] {'loss': 4.1784, 'learning_rate': 8.710864272016203e-07, 'epoch': 2.3} + 14%|█▍ | 53380/371472 [4:15:24<26:18:46, 3.36it/s] 14%|█▍ | 53381/371472 [4:15:25<25:07:22, 3.52it/s] 14%|█▍ | 53382/371472 [4:15:25<24:33:04, 3.60it/s] 14%|█▍ | 53383/371472 [4:15:25<23:33:31, 3.75it/s] 14%|█▍ | 53384/371472 [4:15:25<25:10:18, 3.51it/s] 14%|█▍ | 53385/371472 [4:15:26<24:41:06, 3.58it/s] 14%|█▍ | 53386/371472 [4:15:26<24:05:27, 3.67it/s] 14%|█▍ | 53387/371472 [4:15:26<24:29:25, 3.61it/s] 14%|█▍ | 53388/371472 [4:15:26<24:05:45, 3.67it/s] 14%|█▍ | 53389/371472 [4:15:27<24:15:33, 3.64it/s] 14%|█▍ | 53390/371472 [4:15:27<24:03:23, 3.67it/s] 14%|█▍ | 53391/371472 [4:15:27<23:22:17, 3.78it/s] 14%|█▍ | 53392/371472 [4:15:28<24:23:52, 3.62it/s] 14%|█▍ | 53393/371472 [4:15:28<24:58:19, 3.54it/s] 14%|█▍ | 53394/371472 [4:15:28<24:35:32, 3.59it/s] 14%|█▍ | 53395/371472 [4:15:28<24:02:52, 3.67it/s] 14%|█▍ | 53396/371472 [4:15:29<26:42:41, 3.31it/s] 14%|█▍ | 53397/371472 [4:15:29<25:39:57, 3.44it/s] 14%|█▍ | 53398/371472 [4:15:29<24:35:32, 3.59it/s] 14%|█▍ | 53399/371472 [4:15:30<24:18:56, 3.63it/s] 14%|█▍ | 53400/371472 [4:15:30<24:52:16, 3.55it/s] {'loss': 4.1835, 'learning_rate': 8.710379452261414e-07, 'epoch': 2.3} + 14%|█▍ | 53400/371472 [4:15:30<24:52:16, 3.55it/s] 14%|█▍ | 53401/371472 [4:15:30<24:24:16, 3.62it/s] 14%|█▍ | 53402/371472 [4:15:30<23:49:43, 3.71it/s] 14%|█▍ | 53403/371472 [4:15:31<23:25:48, 3.77it/s] 14%|█▍ | 53404/371472 [4:15:31<23:07:39, 3.82it/s] 14%|█▍ | 53405/371472 [4:15:31<23:00:59, 3.84it/s] 14%|█▍ | 53406/371472 [4:15:31<22:54:19, 3.86it/s] 14%|█▍ | 53407/371472 [4:15:32<22:32:28, 3.92it/s] 14%|█▍ | 53408/371472 [4:15:32<22:28:09, 3.93it/s] 14%|█▍ | 53409/371472 [4:15:32<21:55:47, 4.03it/s] 14%|█▍ | 53410/371472 [4:15:32<22:46:38, 3.88it/s] 14%|█▍ | 53411/371472 [4:15:33<24:21:55, 3.63it/s] 14%|█▍ | 53412/371472 [4:15:33<26:35:41, 3.32it/s] 14%|█▍ | 53413/371472 [4:15:33<25:55:01, 3.41it/s] 14%|█▍ | 53414/371472 [4:15:34<25:59:17, 3.40it/s] 14%|█▍ | 53415/371472 [4:15:34<26:04:32, 3.39it/s] 14%|█▍ | 53416/371472 [4:15:34<25:24:40, 3.48it/s] 14%|█▍ | 53417/371472 [4:15:34<25:02:21, 3.53it/s] 14%|█▍ | 53418/371472 [4:15:35<24:36:25, 3.59it/s] 14%|█▍ | 53419/371472 [4:15:35<24:55:57, 3.54it/s] 14%|█▍ | 53420/371472 [4:15:35<25:24:58, 3.48it/s] {'loss': 4.1533, 'learning_rate': 8.709894632506626e-07, 'epoch': 2.3} + 14%|█▍ | 53420/371472 [4:15:35<25:24:58, 3.48it/s] 14%|█▍ | 53421/371472 [4:15:36<25:10:08, 3.51it/s] 14%|█▍ | 53422/371472 [4:15:36<25:06:45, 3.52it/s] 14%|█▍ | 53423/371472 [4:15:36<25:18:51, 3.49it/s] 14%|█▍ | 53424/371472 [4:15:36<24:58:03, 3.54it/s] 14%|█▍ | 53425/371472 [4:15:37<25:05:24, 3.52it/s] 14%|█▍ | 53426/371472 [4:15:37<25:40:58, 3.44it/s] 14%|█▍ | 53427/371472 [4:15:37<25:16:05, 3.50it/s] 14%|█▍ | 53428/371472 [4:15:38<24:52:22, 3.55it/s] 14%|█▍ | 53429/371472 [4:15:38<25:02:56, 3.53it/s] 14%|█▍ | 53430/371472 [4:15:38<24:32:43, 3.60it/s] 14%|█▍ | 53431/371472 [4:15:38<25:24:39, 3.48it/s] 14%|█▍ | 53432/371472 [4:15:39<25:04:27, 3.52it/s] 14%|█▍ | 53433/371472 [4:15:39<24:01:48, 3.68it/s] 14%|█▍ | 53434/371472 [4:15:39<23:18:45, 3.79it/s] 14%|█▍ | 53435/371472 [4:15:40<24:36:02, 3.59it/s] 14%|█▍ | 53436/371472 [4:15:40<23:42:42, 3.73it/s] 14%|█▍ | 53437/371472 [4:15:40<24:54:40, 3.55it/s] 14%|█▍ | 53438/371472 [4:15:40<23:43:24, 3.72it/s] 14%|█▍ | 53439/371472 [4:15:41<23:51:24, 3.70it/s] 14%|█▍ | 53440/371472 [4:15:41<23:40:46, 3.73it/s] {'loss': 4.2843, 'learning_rate': 8.709409812751837e-07, 'epoch': 2.3} + 14%|█▍ | 53440/371472 [4:15:41<23:40:46, 3.73it/s] 14%|█▍ | 53441/371472 [4:15:41<23:11:27, 3.81it/s] 14%|█▍ | 53442/371472 [4:15:41<23:08:34, 3.82it/s] 14%|█▍ | 53443/371472 [4:15:42<22:44:29, 3.88it/s] 14%|█▍ | 53444/371472 [4:15:42<22:37:33, 3.90it/s] 14%|█▍ | 53445/371472 [4:15:42<22:58:15, 3.85it/s] 14%|█▍ | 53446/371472 [4:15:42<22:47:08, 3.88it/s] 14%|█▍ | 53447/371472 [4:15:43<22:42:52, 3.89it/s] 14%|█▍ | 53448/371472 [4:15:43<22:42:38, 3.89it/s] 14%|█▍ | 53449/371472 [4:15:43<23:37:18, 3.74it/s] 14%|█▍ | 53450/371472 [4:15:43<23:22:30, 3.78it/s] 14%|█▍ | 53451/371472 [4:15:44<23:43:57, 3.72it/s] 14%|█▍ | 53452/371472 [4:15:44<23:32:53, 3.75it/s] 14%|█▍ | 53453/371472 [4:15:44<24:27:48, 3.61it/s] 14%|█▍ | 53454/371472 [4:15:45<26:00:38, 3.40it/s] 14%|█▍ | 53455/371472 [4:15:45<24:50:11, 3.56it/s] 14%|█▍ | 53456/371472 [4:15:45<23:37:14, 3.74it/s] 14%|█▍ | 53457/371472 [4:15:45<25:35:58, 3.45it/s] 14%|█▍ | 53458/371472 [4:15:46<24:13:57, 3.65it/s] 14%|█▍ | 53459/371472 [4:15:46<23:15:26, 3.80it/s] 14%|█▍ | 53460/371472 [4:15:46<23:08:11, 3.82it/s] {'loss': 4.1374, 'learning_rate': 8.708924992997047e-07, 'epoch': 2.3} + 14%|█▍ | 53460/371472 [4:15:46<23:08:11, 3.82it/s] 14%|█▍ | 53461/371472 [4:15:46<23:19:41, 3.79it/s] 14%|█▍ | 53462/371472 [4:15:47<25:07:16, 3.52it/s] 14%|█▍ | 53463/371472 [4:15:47<25:24:32, 3.48it/s] 14%|█▍ | 53464/371472 [4:15:47<24:47:00, 3.56it/s] 14%|█▍ | 53465/371472 [4:15:48<23:54:21, 3.70it/s] 14%|█▍ | 53466/371472 [4:15:48<23:21:20, 3.78it/s] 14%|█▍ | 53467/371472 [4:15:48<23:04:00, 3.83it/s] 14%|█▍ | 53468/371472 [4:15:48<24:13:45, 3.65it/s] 14%|█▍ | 53469/371472 [4:15:49<25:36:49, 3.45it/s] 14%|█▍ | 53470/371472 [4:15:49<25:37:29, 3.45it/s] 14%|█▍ | 53471/371472 [4:15:49<24:30:44, 3.60it/s] 14%|█▍ | 53472/371472 [4:15:50<24:40:41, 3.58it/s] 14%|█▍ | 53473/371472 [4:15:50<24:03:09, 3.67it/s] 14%|��▍ | 53474/371472 [4:15:50<23:58:32, 3.68it/s] 14%|█▍ | 53475/371472 [4:15:50<24:13:47, 3.65it/s] 14%|█▍ | 53476/371472 [4:15:51<23:45:21, 3.72it/s] 14%|█▍ | 53477/371472 [4:15:51<27:09:00, 3.25it/s] 14%|█▍ | 53478/371472 [4:15:51<27:37:05, 3.20it/s] 14%|█▍ | 53479/371472 [4:15:52<26:21:46, 3.35it/s] 14%|█▍ | 53480/371472 [4:15:52<26:02:09, 3.39it/s] {'loss': 4.1059, 'learning_rate': 8.708440173242258e-07, 'epoch': 2.3} + 14%|█▍ | 53480/371472 [4:15:52<26:02:09, 3.39it/s] 14%|█▍ | 53481/371472 [4:15:52<25:05:43, 3.52it/s] 14%|█▍ | 53482/371472 [4:15:52<24:32:04, 3.60it/s] 14%|█▍ | 53483/371472 [4:15:53<24:45:16, 3.57it/s] 14%|█▍ | 53484/371472 [4:15:53<24:35:12, 3.59it/s] 14%|█▍ | 53485/371472 [4:15:53<24:23:53, 3.62it/s] 14%|█▍ | 53486/371472 [4:15:54<23:56:57, 3.69it/s] 14%|█▍ | 53487/371472 [4:15:54<23:50:12, 3.71it/s] 14%|█▍ | 53488/371472 [4:15:54<23:23:38, 3.78it/s] 14%|█▍ | 53489/371472 [4:15:54<24:53:29, 3.55it/s] 14%|█▍ | 53490/371472 [4:15:55<24:51:16, 3.55it/s] 14%|█▍ | 53491/371472 [4:15:55<24:22:31, 3.62it/s] 14%|█▍ | 53492/371472 [4:15:55<24:13:45, 3.65it/s] 14%|█▍ | 53493/371472 [4:15:55<23:45:18, 3.72it/s] 14%|█▍ | 53494/371472 [4:15:56<25:31:17, 3.46it/s] 14%|█▍ | 53495/371472 [4:15:56<25:35:23, 3.45it/s] 14%|█▍ | 53496/371472 [4:15:56<24:19:09, 3.63it/s] 14%|█▍ | 53497/371472 [4:15:57<23:50:40, 3.70it/s] 14%|█▍ | 53498/371472 [4:15:57<28:18:37, 3.12it/s] 14%|█▍ | 53499/371472 [4:15:57<26:33:49, 3.33it/s] 14%|█▍ | 53500/371472 [4:15:58<25:34:31, 3.45it/s] {'loss': 4.1689, 'learning_rate': 8.70795535348747e-07, 'epoch': 2.3} + 14%|█▍ | 53500/371472 [4:15:58<25:34:31, 3.45it/s] 14%|█▍ | 53501/371472 [4:15:58<26:02:16, 3.39it/s] 14%|█▍ | 53502/371472 [4:15:58<26:04:56, 3.39it/s] 14%|█▍ | 53503/371472 [4:15:58<26:15:47, 3.36it/s] 14%|█▍ | 53504/371472 [4:15:59<25:58:37, 3.40it/s] 14%|█▍ | 53505/371472 [4:15:59<24:44:00, 3.57it/s] 14%|█▍ | 53506/371472 [4:15:59<24:18:19, 3.63it/s] 14%|█▍ | 53507/371472 [4:16:00<25:08:06, 3.51it/s] 14%|█▍ | 53508/371472 [4:16:00<25:57:52, 3.40it/s] 14%|█▍ | 53509/371472 [4:16:00<24:40:00, 3.58it/s] 14%|█▍ | 53510/371472 [4:16:00<25:45:38, 3.43it/s] 14%|█▍ | 53511/371472 [4:16:01<26:14:29, 3.37it/s] 14%|█▍ | 53512/371472 [4:16:01<25:12:04, 3.50it/s] 14%|█▍ | 53513/371472 [4:16:01<24:26:43, 3.61it/s] 14%|█▍ | 53514/371472 [4:16:01<23:18:01, 3.79it/s] 14%|█▍ | 53515/371472 [4:16:02<22:31:31, 3.92it/s] 14%|█▍ | 53516/371472 [4:16:02<24:42:10, 3.58it/s] 14%|█▍ | 53517/371472 [4:16:02<24:24:56, 3.62it/s] 14%|█▍ | 53518/371472 [4:16:03<24:05:01, 3.67it/s] 14%|█▍ | 53519/371472 [4:16:03<23:35:29, 3.74it/s] 14%|█▍ | 53520/371472 [4:16:03<22:42:56, 3.89it/s] {'loss': 4.3381, 'learning_rate': 8.707470533732681e-07, 'epoch': 2.31} + 14%|█▍ | 53520/371472 [4:16:03<22:42:56, 3.89it/s] 14%|█▍ | 53521/371472 [4:16:03<23:00:29, 3.84it/s] 14%|█▍ | 53522/371472 [4:16:04<22:25:22, 3.94it/s] 14%|█▍ | 53523/371472 [4:16:04<23:10:23, 3.81it/s] 14%|█▍ | 53524/371472 [4:16:04<23:21:10, 3.78it/s] 14%|█▍ | 53525/371472 [4:16:04<25:08:05, 3.51it/s] 14%|█▍ | 53526/371472 [4:16:05<24:52:04, 3.55it/s] 14%|█▍ | 53527/371472 [4:16:05<24:38:28, 3.58it/s] 14%|█▍ | 53528/371472 [4:16:05<24:01:21, 3.68it/s] 14%|█▍ | 53529/371472 [4:16:06<24:04:13, 3.67it/s] 14%|█▍ | 53530/371472 [4:16:06<23:48:43, 3.71it/s] 14%|█▍ | 53531/371472 [4:16:06<24:40:12, 3.58it/s] 14%|█▍ | 53532/371472 [4:16:06<24:26:14, 3.61it/s] 14%|█▍ | 53533/371472 [4:16:07<25:41:15, 3.44it/s] 14%|█▍ | 53534/371472 [4:16:07<24:51:39, 3.55it/s] 14%|█▍ | 53535/371472 [4:16:07<31:06:30, 2.84it/s] 14%|█▍ | 53536/371472 [4:16:08<28:49:04, 3.06it/s] 14%|█▍ | 53537/371472 [4:16:08<31:33:36, 2.80it/s] 14%|█▍ | 53538/371472 [4:16:08<30:18:40, 2.91it/s] 14%|█▍ | 53539/371472 [4:16:09<28:33:31, 3.09it/s] 14%|█▍ | 53540/371472 [4:16:09<28:05:12, 3.14it/s] {'loss': 4.0695, 'learning_rate': 8.706985713977892e-07, 'epoch': 2.31} + 14%|█▍ | 53540/371472 [4:16:09<28:05:12, 3.14it/s] 14%|█▍ | 53541/371472 [4:16:09<25:59:22, 3.40it/s] 14%|█▍ | 53542/371472 [4:16:10<25:55:02, 3.41it/s] 14%|█▍ | 53543/371472 [4:16:10<26:45:16, 3.30it/s] 14%|█▍ | 53544/371472 [4:16:10<26:34:10, 3.32it/s] 14%|█▍ | 53545/371472 [4:16:10<26:02:14, 3.39it/s] 14%|█▍ | 53546/371472 [4:16:11<27:14:11, 3.24it/s] 14%|█▍ | 53547/371472 [4:16:11<26:36:18, 3.32it/s] 14%|█▍ | 53548/371472 [4:16:11<27:13:34, 3.24it/s] 14%|█▍ | 53549/371472 [4:16:12<25:53:24, 3.41it/s] 14%|█▍ | 53550/371472 [4:16:12<25:20:24, 3.49it/s] 14%|█▍ | 53551/371472 [4:16:12<27:05:03, 3.26it/s] 14%|█▍ | 53552/371472 [4:16:13<26:02:52, 3.39it/s] 14%|█▍ | 53553/371472 [4:16:13<25:45:14, 3.43it/s] 14%|█▍ | 53554/371472 [4:16:13<26:07:25, 3.38it/s] 14%|█▍ | 53555/371472 [4:16:13<25:41:09, 3.44it/s] 14%|█▍ | 53556/371472 [4:16:14<25:13:07, 3.50it/s] 14%|█▍ | 53557/371472 [4:16:14<24:52:44, 3.55it/s] 14%|█▍ | 53558/371472 [4:16:14<24:34:40, 3.59it/s] 14%|█▍ | 53559/371472 [4:16:15<25:56:13, 3.40it/s] 14%|█▍ | 53560/371472 [4:16:15<24:36:20, 3.59it/s] {'loss': 4.3566, 'learning_rate': 8.706500894223103e-07, 'epoch': 2.31} + 14%|█▍ | 53560/371472 [4:16:15<24:36:20, 3.59it/s] 14%|█▍ | 53561/371472 [4:16:15<24:59:22, 3.53it/s] 14%|█▍ | 53562/371472 [4:16:15<24:20:39, 3.63it/s] 14%|█▍ | 53563/371472 [4:16:16<23:28:48, 3.76it/s] 14%|█▍ | 53564/371472 [4:16:16<24:55:36, 3.54it/s] 14%|█▍ | 53565/371472 [4:16:16<26:09:40, 3.38it/s] 14%|█▍ | 53566/371472 [4:16:17<25:07:23, 3.51it/s] 14%|█▍ | 53567/371472 [4:16:17<25:16:05, 3.49it/s] 14%|█▍ | 53568/371472 [4:16:17<25:12:16, 3.50it/s] 14%|█▍ | 53569/371472 [4:16:17<25:07:26, 3.51it/s] 14%|█▍ | 53570/371472 [4:16:18<26:15:31, 3.36it/s] 14%|█▍ | 53571/371472 [4:16:18<26:38:26, 3.31it/s] 14%|█▍ | 53572/371472 [4:16:18<26:09:38, 3.38it/s] 14%|█▍ | 53573/371472 [4:16:19<25:16:03, 3.49it/s] 14%|█▍ | 53574/371472 [4:16:19<25:08:02, 3.51it/s] 14%|█▍ | 53575/371472 [4:16:19<24:43:04, 3.57it/s] 14%|█▍ | 53576/371472 [4:16:19<26:36:34, 3.32it/s] 14%|█▍ | 53577/371472 [4:16:20<25:19:11, 3.49it/s] 14%|█▍ | 53578/371472 [4:16:20<24:14:36, 3.64it/s] 14%|█▍ | 53579/371472 [4:16:20<24:05:07, 3.67it/s] 14%|█▍ | 53580/371472 [4:16:21<24:08:20, 3.66it/s] {'loss': 4.1611, 'learning_rate': 8.706016074468314e-07, 'epoch': 2.31} + 14%|█▍ | 53580/371472 [4:16:21<24:08:20, 3.66it/s] 14%|█▍ | 53581/371472 [4:16:21<25:08:01, 3.51it/s] 14%|█▍ | 53582/371472 [4:16:21<24:22:14, 3.62it/s] 14%|█▍ | 53583/371472 [4:16:21<24:04:42, 3.67it/s] 14%|█▍ | 53584/371472 [4:16:22<24:21:52, 3.62it/s] 14%|█▍ | 53585/371472 [4:16:22<24:57:27, 3.54it/s] 14%|█▍ | 53586/371472 [4:16:22<24:39:25, 3.58it/s] 14%|█▍ | 53587/371472 [4:16:22<23:31:57, 3.75it/s] 14%|█▍ | 53588/371472 [4:16:23<25:40:54, 3.44it/s] 14%|█▍ | 53589/371472 [4:16:23<24:38:24, 3.58it/s] 14%|█▍ | 53590/371472 [4:16:23<24:18:17, 3.63it/s] 14%|█▍ | 53591/371472 [4:16:24<23:16:52, 3.79it/s] 14%|█▍ | 53592/371472 [4:16:24<23:28:39, 3.76it/s] 14%|█▍ | 53593/371472 [4:16:24<23:06:51, 3.82it/s] 14%|█▍ | 53594/371472 [4:16:24<24:36:06, 3.59it/s] 14%|█▍ | 53595/371472 [4:16:25<24:31:54, 3.60it/s] 14%|█▍ | 53596/371472 [4:16:25<24:34:39, 3.59it/s] 14%|█▍ | 53597/371472 [4:16:25<24:45:00, 3.57it/s] 14%|█▍ | 53598/371472 [4:16:26<24:11:51, 3.65it/s] 14%|█▍ | 53599/371472 [4:16:26<23:17:32, 3.79it/s] 14%|█▍ | 53600/371472 [4:16:26<23:42:52, 3.72it/s] {'loss': 4.2708, 'learning_rate': 8.705531254713525e-07, 'epoch': 2.31} + 14%|█▍ | 53600/371472 [4:16:26<23:42:52, 3.72it/s] 14%|█▍ | 53601/371472 [4:16:26<23:25:55, 3.77it/s] 14%|█▍ | 53602/371472 [4:16:27<24:24:03, 3.62it/s] 14%|█▍ | 53603/371472 [4:16:27<23:46:33, 3.71it/s] 14%|█▍ | 53604/371472 [4:16:27<22:54:10, 3.86it/s] 14%|█▍ | 53605/371472 [4:16:27<26:16:22, 3.36it/s] 14%|█▍ | 53606/371472 [4:16:28<25:17:51, 3.49it/s] 14%|█▍ | 53607/371472 [4:16:28<27:53:39, 3.17it/s] 14%|█▍ | 53608/371472 [4:16:28<27:22:53, 3.22it/s] 14%|█▍ | 53609/371472 [4:16:29<25:40:48, 3.44it/s] 14%|█▍ | 53610/371472 [4:16:29<24:18:13, 3.63it/s] 14%|█▍ | 53611/371472 [4:16:29<24:12:35, 3.65it/s] 14%|█▍ | 53612/371472 [4:16:29<23:16:12, 3.79it/s] 14%|█▍ | 53613/371472 [4:16:30<22:54:59, 3.85it/s] 14%|█▍ | 53614/371472 [4:16:30<22:27:37, 3.93it/s] 14%|█▍ | 53615/371472 [4:16:30<23:00:50, 3.84it/s] 14%|█▍ | 53616/371472 [4:16:31<30:39:31, 2.88it/s] 14%|█▍ | 53617/371472 [4:16:31<27:59:05, 3.16it/s] 14%|█▍ | 53618/371472 [4:16:31<26:54:48, 3.28it/s] 14%|█▍ | 53619/371472 [4:16:32<26:45:49, 3.30it/s] 14%|█▍ | 53620/371472 [4:16:32<26:09:22, 3.38it/s] {'loss': 4.2761, 'learning_rate': 8.705046434958736e-07, 'epoch': 2.31} + 14%|█▍ | 53620/371472 [4:16:32<26:09:22, 3.38it/s] 14%|█▍ | 53621/371472 [4:16:32<26:21:51, 3.35it/s] 14%|█▍ | 53622/371472 [4:16:32<25:34:44, 3.45it/s] 14%|█▍ | 53623/371472 [4:16:33<24:51:17, 3.55it/s] 14%|█▍ | 53624/371472 [4:16:33<24:11:55, 3.65it/s] 14%|█▍ | 53625/371472 [4:16:33<24:16:45, 3.64it/s] 14%|█▍ | 53626/371472 [4:16:34<26:28:50, 3.33it/s] 14%|█▍ | 53627/371472 [4:16:34<26:07:32, 3.38it/s] 14%|█▍ | 53628/371472 [4:16:34<24:39:57, 3.58it/s] 14%|█▍ | 53629/371472 [4:16:34<24:24:07, 3.62it/s] 14%|█▍ | 53630/371472 [4:16:35<25:01:46, 3.53it/s] 14%|█▍ | 53631/371472 [4:16:35<24:02:18, 3.67it/s] 14%|█▍ | 53632/371472 [4:16:35<26:12:31, 3.37it/s] 14%|█▍ | 53633/371472 [4:16:35<25:02:16, 3.53it/s] 14%|█▍ | 53634/371472 [4:16:36<25:59:02, 3.40it/s] 14%|█▍ | 53635/371472 [4:16:36<26:14:33, 3.36it/s] 14%|█▍ | 53636/371472 [4:16:36<25:29:43, 3.46it/s] 14%|█▍ | 53637/371472 [4:16:37<25:56:23, 3.40it/s] 14%|█▍ | 53638/371472 [4:16:37<25:18:06, 3.49it/s] 14%|█▍ | 53639/371472 [4:16:37<25:28:22, 3.47it/s] 14%|█▍ | 53640/371472 [4:16:38<25:25:57, 3.47it/s] {'loss': 4.2535, 'learning_rate': 8.704561615203947e-07, 'epoch': 2.31} + 14%|█▍ | 53640/371472 [4:16:38<25:25:57, 3.47it/s] 14%|█▍ | 53641/371472 [4:16:38<25:38:22, 3.44it/s] 14%|█▍ | 53642/371472 [4:16:38<24:50:12, 3.55it/s] 14%|█▍ | 53643/371472 [4:16:38<25:56:16, 3.40it/s] 14%|█▍ | 53644/371472 [4:16:39<25:19:13, 3.49it/s] 14%|█▍ | 53645/371472 [4:16:39<25:20:58, 3.48it/s] 14%|█▍ | 53646/371472 [4:16:39<24:42:40, 3.57it/s] 14%|█▍ | 53647/371472 [4:16:40<25:07:32, 3.51it/s] 14%|█▍ | 53648/371472 [4:16:40<24:20:29, 3.63it/s] 14%|█▍ | 53649/371472 [4:16:40<24:50:35, 3.55it/s] 14%|█▍ | 53650/371472 [4:16:40<24:06:35, 3.66it/s] 14%|█▍ | 53651/371472 [4:16:41<24:39:02, 3.58it/s] 14%|█▍ | 53652/371472 [4:16:41<25:42:17, 3.43it/s] 14%|█▍ | 53653/371472 [4:16:41<26:22:21, 3.35it/s] 14%|█▍ | 53654/371472 [4:16:42<25:16:14, 3.49it/s] 14%|█▍ | 53655/371472 [4:16:42<24:58:52, 3.53it/s] 14%|█▍ | 53656/371472 [4:16:42<25:03:13, 3.52it/s] 14%|█▍ | 53657/371472 [4:16:42<25:22:30, 3.48it/s] 14%|█▍ | 53658/371472 [4:16:43<24:38:22, 3.58it/s] 14%|█▍ | 53659/371472 [4:16:43<26:11:47, 3.37it/s] 14%|█▍ | 53660/371472 [4:16:43<25:01:35, 3.53it/s] {'loss': 4.331, 'learning_rate': 8.704076795449159e-07, 'epoch': 2.31} + 14%|█▍ | 53660/371472 [4:16:43<25:01:35, 3.53it/s] 14%|█▍ | 53661/371472 [4:16:43<24:18:39, 3.63it/s] 14%|█▍ | 53662/371472 [4:16:44<23:30:14, 3.76it/s] 14%|█▍ | 53663/371472 [4:16:44<23:19:41, 3.78it/s] 14%|█▍ | 53664/371472 [4:16:44<23:42:48, 3.72it/s] 14%|█▍ | 53665/371472 [4:16:45<23:10:07, 3.81it/s] 14%|█▍ | 53666/371472 [4:16:45<26:25:27, 3.34it/s] 14%|█▍ | 53667/371472 [4:16:45<25:40:01, 3.44it/s] 14%|█▍ | 53668/371472 [4:16:45<25:32:00, 3.46it/s] 14%|█▍ | 53669/371472 [4:16:46<24:57:23, 3.54it/s] 14%|█▍ | 53670/371472 [4:16:46<24:19:29, 3.63it/s] 14%|█▍ | 53671/371472 [4:16:46<23:47:42, 3.71it/s] 14%|█▍ | 53672/371472 [4:16:46<23:30:03, 3.76it/s] 14%|█▍ | 53673/371472 [4:16:47<25:22:03, 3.48it/s] 14%|█▍ | 53674/371472 [4:16:47<25:04:05, 3.52it/s] 14%|█▍ | 53675/371472 [4:16:47<23:45:27, 3.72it/s] 14%|█▍ | 53676/371472 [4:16:48<23:39:54, 3.73it/s] 14%|█▍ | 53677/371472 [4:16:48<24:09:55, 3.65it/s] 14%|█▍ | 53678/371472 [4:16:48<23:58:52, 3.68it/s] 14%|█▍ | 53679/371472 [4:16:48<24:10:12, 3.65it/s] 14%|█▍ | 53680/371472 [4:16:49<24:26:24, 3.61it/s] {'loss': 4.2145, 'learning_rate': 8.703591975694369e-07, 'epoch': 2.31} + 14%|█��� | 53680/371472 [4:16:49<24:26:24, 3.61it/s] 14%|█▍ | 53681/371472 [4:16:49<24:33:35, 3.59it/s] 14%|█▍ | 53682/371472 [4:16:49<24:42:43, 3.57it/s] 14%|█▍ | 53683/371472 [4:16:50<24:54:14, 3.54it/s] 14%|█▍ | 53684/371472 [4:16:50<25:04:08, 3.52it/s] 14%|█▍ | 53685/371472 [4:16:50<25:00:27, 3.53it/s] 14%|█▍ | 53686/371472 [4:16:50<24:05:26, 3.66it/s] 14%|█▍ | 53687/371472 [4:16:51<24:34:32, 3.59it/s] 14%|█▍ | 53688/371472 [4:16:51<25:27:29, 3.47it/s] 14%|█▍ | 53689/371472 [4:16:51<26:08:05, 3.38it/s] 14%|█▍ | 53690/371472 [4:16:52<25:08:01, 3.51it/s] 14%|█▍ | 53691/371472 [4:16:52<24:37:37, 3.58it/s] 14%|█▍ | 53692/371472 [4:16:52<24:26:46, 3.61it/s] 14%|█▍ | 53693/371472 [4:16:53<28:33:14, 3.09it/s] 14%|█▍ | 53694/371472 [4:16:53<27:44:18, 3.18it/s] 14%|█▍ | 53695/371472 [4:16:53<26:14:05, 3.36it/s] 14%|█▍ | 53696/371472 [4:16:53<25:01:50, 3.53it/s] 14%|█▍ | 53697/371472 [4:16:54<24:57:50, 3.54it/s] 14%|█▍ | 53698/371472 [4:16:54<23:59:42, 3.68it/s] 14%|█▍ | 53699/371472 [4:16:54<23:05:43, 3.82it/s] 14%|█▍ | 53700/371472 [4:16:54<25:23:09, 3.48it/s] {'loss': 4.162, 'learning_rate': 8.70310715593958e-07, 'epoch': 2.31} + 14%|█▍ | 53700/371472 [4:16:54<25:23:09, 3.48it/s] 14%|█▍ | 53701/371472 [4:16:55<25:03:15, 3.52it/s] 14%|█▍ | 53702/371472 [4:16:55<25:14:56, 3.50it/s] 14%|█▍ | 53703/371472 [4:16:55<25:13:49, 3.50it/s] 14%|█▍ | 53704/371472 [4:16:56<24:16:39, 3.64it/s] 14%|█▍ | 53705/371472 [4:16:56<23:57:15, 3.68it/s] 14%|█▍ | 53706/371472 [4:16:56<24:04:42, 3.67it/s] 14%|█▍ | 53707/371472 [4:16:56<23:49:06, 3.71it/s] 14%|█▍ | 53708/371472 [4:16:57<23:20:30, 3.78it/s] 14%|█▍ | 53709/371472 [4:16:57<24:09:06, 3.65it/s] 14%|█▍ | 53710/371472 [4:16:57<25:32:10, 3.46it/s] 14%|█▍ | 53711/371472 [4:16:58<25:03:36, 3.52it/s] 14%|█▍ | 53712/371472 [4:16:58<24:50:37, 3.55it/s] 14%|█▍ | 53713/371472 [4:16:58<24:06:35, 3.66it/s] 14%|█▍ | 53714/371472 [4:16:58<25:08:07, 3.51it/s] 14%|█▍ | 53715/371472 [4:16:59<26:12:30, 3.37it/s] 14%|█▍ | 53716/371472 [4:16:59<25:24:56, 3.47it/s] 14%|█▍ | 53717/371472 [4:16:59<25:11:43, 3.50it/s] 14%|█▍ | 53718/371472 [4:16:59<24:26:56, 3.61it/s] 14%|█▍ | 53719/371472 [4:17:00<23:56:43, 3.69it/s] 14%|█▍ | 53720/371472 [4:17:00<24:16:24, 3.64it/s] {'loss': 4.1911, 'learning_rate': 8.702622336184791e-07, 'epoch': 2.31} + 14%|█▍ | 53720/371472 [4:17:00<24:16:24, 3.64it/s] 14%|█▍ | 53721/371472 [4:17:00<24:59:17, 3.53it/s] 14%|█▍ | 53722/371472 [4:17:01<24:42:43, 3.57it/s] 14%|█▍ | 53723/371472 [4:17:01<23:29:16, 3.76it/s] 14%|█▍ | 53724/371472 [4:17:01<23:35:40, 3.74it/s] 14%|█▍ | 53725/371472 [4:17:01<24:25:23, 3.61it/s] 14%|█▍ | 53726/371472 [4:17:02<24:05:07, 3.66it/s] 14%|█▍ | 53727/371472 [4:17:02<23:38:48, 3.73it/s] 14%|█▍ | 53728/371472 [4:17:02<25:04:30, 3.52it/s] 14%|█▍ | 53729/371472 [4:17:03<25:26:54, 3.47it/s] 14%|█▍ | 53730/371472 [4:17:03<25:04:54, 3.52it/s] 14%|█▍ | 53731/371472 [4:17:03<24:33:27, 3.59it/s] 14%|█▍ | 53732/371472 [4:17:03<24:47:06, 3.56it/s] 14%|█▍ | 53733/371472 [4:17:04<25:38:18, 3.44it/s] 14%|█▍ | 53734/371472 [4:17:04<25:04:01, 3.52it/s] 14%|█▍ | 53735/371472 [4:17:04<24:48:50, 3.56it/s] 14%|█▍ | 53736/371472 [4:17:05<24:52:38, 3.55it/s] 14%|█▍ | 53737/371472 [4:17:05<24:33:35, 3.59it/s] 14%|█▍ | 53738/371472 [4:17:05<25:06:07, 3.52it/s] 14%|█▍ | 53739/371472 [4:17:05<24:11:45, 3.65it/s] 14%|█▍ | 53740/371472 [4:17:06<23:40:35, 3.73it/s] {'loss': 4.4483, 'learning_rate': 8.702137516430002e-07, 'epoch': 2.31} + 14%|█▍ | 53740/371472 [4:17:06<23:40:35, 3.73it/s] 14%|█▍ | 53741/371472 [4:17:06<23:30:54, 3.75it/s] 14%|█▍ | 53742/371472 [4:17:06<23:10:29, 3.81it/s] 14%|█▍ | 53743/371472 [4:17:06<24:09:58, 3.65it/s] 14%|█▍ | 53744/371472 [4:17:07<23:24:26, 3.77it/s] 14%|█▍ | 53745/371472 [4:17:07<22:50:45, 3.86it/s] 14%|█▍ | 53746/371472 [4:17:07<22:36:31, 3.90it/s] 14%|█▍ | 53747/371472 [4:17:07<23:31:54, 3.75it/s] 14%|█▍ | 53748/371472 [4:17:08<22:58:16, 3.84it/s] 14%|█▍ | 53749/371472 [4:17:08<24:45:33, 3.56it/s] 14%|█▍ | 53750/371472 [4:17:08<24:58:56, 3.53it/s] 14%|█▍ | 53751/371472 [4:17:09<24:33:41, 3.59it/s] 14%|█▍ | 53752/371472 [4:17:09<24:50:10, 3.55it/s] 14%|█▍ | 53753/371472 [4:17:09<24:58:19, 3.53it/s] 14%|█▍ | 53754/371472 [4:17:09<25:40:19, 3.44it/s] 14%|█▍ | 53755/371472 [4:17:10<24:23:14, 3.62it/s] 14%|█▍ | 53756/371472 [4:17:10<24:11:32, 3.65it/s] 14%|█▍ | 53757/371472 [4:17:10<24:50:50, 3.55it/s] 14%|█▍ | 53758/371472 [4:17:11<24:59:24, 3.53it/s] 14%|█▍ | 53759/371472 [4:17:11<25:46:34, 3.42it/s] 14%|█▍ | 53760/371472 [4:17:11<26:23:30, 3.34it/s] {'loss': 4.1958, 'learning_rate': 8.701652696675213e-07, 'epoch': 2.32} + 14%|█▍ | 53760/371472 [4:17:11<26:23:30, 3.34it/s] 14%|█▍ | 53761/371472 [4:17:11<26:00:38, 3.39it/s] 14%|█▍ | 53762/371472 [4:17:12<25:52:32, 3.41it/s] 14%|█▍ | 53763/371472 [4:17:12<27:18:31, 3.23it/s] 14%|█▍ | 53764/371472 [4:17:12<26:14:47, 3.36it/s] 14%|█▍ | 53765/371472 [4:17:13<25:23:13, 3.48it/s] 14%|█▍ | 53766/371472 [4:17:13<25:28:51, 3.46it/s] 14%|█▍ | 53767/371472 [4:17:13<24:45:41, 3.56it/s] 14%|█▍ | 53768/371472 [4:17:13<23:53:22, 3.69it/s] 14%|█▍ | 53769/371472 [4:17:14<24:01:51, 3.67it/s] 14%|█▍ | 53770/371472 [4:17:14<24:07:52, 3.66it/s] 14%|█▍ | 53771/371472 [4:17:14<23:27:15, 3.76it/s] 14%|█▍ | 53772/371472 [4:17:15<24:51:58, 3.55it/s] 14%|█▍ | 53773/371472 [4:17:15<23:52:09, 3.70it/s] 14%|█▍ | 53774/371472 [4:17:15<23:56:17, 3.69it/s] 14%|█▍ | 53775/371472 [4:17:15<24:26:55, 3.61it/s] 14%|█▍ | 53776/371472 [4:17:16<23:27:29, 3.76it/s] 14%|█▍ | 53777/371472 [4:17:16<24:09:11, 3.65it/s] 14%|█▍ | 53778/371472 [4:17:16<23:31:46, 3.75it/s] 14%|█▍ | 53779/371472 [4:17:16<24:12:33, 3.65it/s] 14%|█▍ | 53780/371472 [4:17:17<23:42:44, 3.72it/s] {'loss': 4.3123, 'learning_rate': 8.701167876920425e-07, 'epoch': 2.32} + 14%|█▍ | 53780/371472 [4:17:17<23:42:44, 3.72it/s] 14%|█▍ | 53781/371472 [4:17:17<24:37:16, 3.58it/s] 14%|█▍ | 53782/371472 [4:17:17<24:12:58, 3.64it/s] 14%|█▍ | 53783/371472 [4:17:18<24:14:24, 3.64it/s] 14%|█▍ | 53784/371472 [4:17:18<24:23:01, 3.62it/s] 14%|█▍ | 53785/371472 [4:17:18<24:41:52, 3.57it/s] 14%|█▍ | 53786/371472 [4:17:18<24:35:43, 3.59it/s] 14%|█▍ | 53787/371472 [4:17:19<23:55:58, 3.69it/s] 14%|█▍ | 53788/371472 [4:17:19<23:10:29, 3.81it/s] 14%|█▍ | 53789/371472 [4:17:19<23:27:53, 3.76it/s] 14%|█▍ | 53790/371472 [4:17:19<23:14:38, 3.80it/s] 14%|█▍ | 53791/371472 [4:17:20<23:49:01, 3.71it/s] 14%|█▍ | 53792/371472 [4:17:20<23:48:23, 3.71it/s] 14%|█▍ | 53793/371472 [4:17:20<24:31:44, 3.60it/s] 14%|█▍ | 53794/371472 [4:17:20<23:31:46, 3.75it/s] 14%|█▍ | 53795/371472 [4:17:21<24:46:05, 3.56it/s] 14%|█▍ | 53796/371472 [4:17:21<26:16:06, 3.36it/s] 14%|█▍ | 53797/371472 [4:17:21<25:41:16, 3.44it/s] 14%|█▍ | 53798/371472 [4:17:22<24:52:15, 3.55it/s] 14%|█▍ | 53799/371472 [4:17:22<25:19:44, 3.48it/s] 14%|█▍ | 53800/371472 [4:17:22<24:40:38, 3.58it/s] {'loss': 4.251, 'learning_rate': 8.700683057165636e-07, 'epoch': 2.32} + 14%|█▍ | 53800/371472 [4:17:22<24:40:38, 3.58it/s] 14%|█▍ | 53801/371472 [4:17:23<25:05:54, 3.52it/s] 14%|█▍ | 53802/371472 [4:17:23<24:31:13, 3.60it/s] 14%|█▍ | 53803/371472 [4:17:23<25:23:31, 3.48it/s] 14%|█▍ | 53804/371472 [4:17:23<25:32:02, 3.46it/s] 14%|█▍ | 53805/371472 [4:17:24<24:40:40, 3.58it/s] 14%|█▍ | 53806/371472 [4:17:24<24:07:47, 3.66it/s] 14%|█▍ | 53807/371472 [4:17:24<24:21:14, 3.62it/s] 14%|█▍ | 53808/371472 [4:17:25<28:09:14, 3.13it/s] 14%|█▍ | 53809/371472 [4:17:25<26:07:57, 3.38it/s] 14%|█▍ | 53810/371472 [4:17:25<25:14:01, 3.50it/s] 14%|█▍ | 53811/371472 [4:17:25<24:27:30, 3.61it/s] 14%|█▍ | 53812/371472 [4:17:26<23:54:46, 3.69it/s] 14%|█▍ | 53813/371472 [4:17:26<23:03:13, 3.83it/s] 14%|█▍ | 53814/371472 [4:17:26<22:54:03, 3.85it/s] 14%|█▍ | 53815/371472 [4:17:26<23:06:29, 3.82it/s] 14%|█▍ | 53816/371472 [4:17:27<23:15:31, 3.79it/s] 14%|█▍ | 53817/371472 [4:17:27<23:22:09, 3.78it/s] 14%|█▍ | 53818/371472 [4:17:27<24:02:20, 3.67it/s] 14%|█▍ | 53819/371472 [4:17:28<24:33:25, 3.59it/s] 14%|█▍ | 53820/371472 [4:17:28<24:13:51, 3.64it/s] {'loss': 4.3, 'learning_rate': 8.700198237410847e-07, 'epoch': 2.32} + 14%|█▍ | 53820/371472 [4:17:28<24:13:51, 3.64it/s] 14%|█▍ | 53821/371472 [4:17:28<24:05:36, 3.66it/s] 14%|█▍ | 53822/371472 [4:17:28<24:15:55, 3.64it/s] 14%|█▍ | 53823/371472 [4:17:29<23:08:29, 3.81it/s] 14%|█▍ | 53824/371472 [4:17:29<23:22:56, 3.77it/s] 14%|█▍ | 53825/371472 [4:17:29<23:33:24, 3.75it/s] 14%|█▍ | 53826/371472 [4:17:29<24:09:14, 3.65it/s] 14%|█▍ | 53827/371472 [4:17:30<24:12:30, 3.64it/s] 14%|█▍ | 53828/371472 [4:17:30<23:53:24, 3.69it/s] 14%|█▍ | 53829/371472 [4:17:30<25:07:04, 3.51it/s] 14%|█▍ | 53830/371472 [4:17:30<23:54:08, 3.69it/s] 14%|█▍ | 53831/371472 [4:17:31<24:06:19, 3.66it/s] 14%|█▍ | 53832/371472 [4:17:31<23:15:50, 3.79it/s] 14%|█▍ | 53833/371472 [4:17:31<23:04:30, 3.82it/s] 14%|█▍ | 53834/371472 [4:17:32<22:51:13, 3.86it/s] 14%|█▍ | 53835/371472 [4:17:32<22:27:40, 3.93it/s] 14%|█▍ | 53836/371472 [4:17:32<22:51:06, 3.86it/s] 14%|█▍ | 53837/371472 [4:17:32<23:25:49, 3.77it/s] 14%|█▍ | 53838/371472 [4:17:33<22:58:56, 3.84it/s] 14%|█▍ | 53839/371472 [4:17:33<26:04:04, 3.38it/s] 14%|█▍ | 53840/371472 [4:17:33<25:30:20, 3.46it/s] {'loss': 4.327, 'learning_rate': 8.699713417656057e-07, 'epoch': 2.32} + 14%|█▍ | 53840/371472 [4:17:33<25:30:20, 3.46it/s] 14%|█▍ | 53841/371472 [4:17:33<24:55:01, 3.54it/s] 14%|█▍ | 53842/371472 [4:17:34<24:10:41, 3.65it/s] 14%|█▍ | 53843/371472 [4:17:34<25:55:28, 3.40it/s] 14%|█▍ | 53844/371472 [4:17:34<24:29:27, 3.60it/s] 14%|█▍ | 53845/371472 [4:17:35<23:54:27, 3.69it/s] 14%|█▍ | 53846/371472 [4:17:35<23:43:43, 3.72it/s] 14%|█▍ | 53847/371472 [4:17:35<23:46:08, 3.71it/s] 14%|█▍ | 53848/371472 [4:17:35<23:32:35, 3.75it/s] 14%|█▍ | 53849/371472 [4:17:36<24:24:15, 3.62it/s] 14%|█▍ | 53850/371472 [4:17:36<23:53:38, 3.69it/s] 14%|█▍ | 53851/371472 [4:17:36<24:15:09, 3.64it/s] 14%|█▍ | 53852/371472 [4:17:37<25:18:55, 3.49it/s] 14%|█▍ | 53853/371472 [4:17:37<25:20:22, 3.48it/s] 14%|█▍ | 53854/371472 [4:17:37<25:28:48, 3.46it/s] 14%|█▍ | 53855/371472 [4:17:37<25:20:39, 3.48it/s] 14%|█▍ | 53856/371472 [4:17:38<26:09:18, 3.37it/s] 14%|█▍ | 53857/371472 [4:17:38<27:15:14, 3.24it/s] 14%|█▍ | 53858/371472 [4:17:38<26:40:14, 3.31it/s] 14%|█▍ | 53859/371472 [4:17:39<27:08:09, 3.25it/s] 14%|█▍ | 53860/371472 [4:17:39<28:28:22, 3.10it/s] {'loss': 4.5191, 'learning_rate': 8.699228597901269e-07, 'epoch': 2.32} + 14%|█▍ | 53860/371472 [4:17:39<28:28:22, 3.10it/s] 14%|█▍ | 53861/371472 [4:17:39<28:04:52, 3.14it/s] 14%|█▍ | 53862/371472 [4:17:40<29:52:20, 2.95it/s] 14%|█▍ | 53863/371472 [4:17:40<27:52:18, 3.17it/s] 15%|█▍ | 53864/371472 [4:17:40<26:49:07, 3.29it/s] 15%|█▍ | 53865/371472 [4:17:40<25:51:18, 3.41it/s] 15%|█▍ | 53866/371472 [4:17:41<25:12:49, 3.50it/s] 15%|█▍ | 53867/371472 [4:17:41<28:20:08, 3.11it/s] 15%|█▍ | 53868/371472 [4:17:41<28:02:28, 3.15it/s] 15%|█▍ | 53869/371472 [4:17:42<28:19:31, 3.11it/s] 15%|█▍ | 53870/371472 [4:17:42<28:14:51, 3.12it/s] 15%|█▍ | 53871/371472 [4:17:42<27:27:18, 3.21it/s] 15%|█▍ | 53872/371472 [4:17:43<27:21:32, 3.22it/s] 15%|█▍ | 53873/371472 [4:17:43<25:39:38, 3.44it/s] 15%|█▍ | 53874/371472 [4:17:43<24:23:09, 3.62it/s] 15%|█▍ | 53875/371472 [4:17:43<24:21:29, 3.62it/s] 15%|█▍ | 53876/371472 [4:17:44<24:05:42, 3.66it/s] 15%|█▍ | 53877/371472 [4:17:44<25:59:07, 3.40it/s] 15%|█▍ | 53878/371472 [4:17:44<26:10:05, 3.37it/s] 15%|█▍ | 53879/371472 [4:17:45<25:16:28, 3.49it/s] 15%|█▍ | 53880/371472 [4:17:45<24:18:10, 3.63it/s] {'loss': 4.0079, 'learning_rate': 8.698743778146481e-07, 'epoch': 2.32} + 15%|█▍ | 53880/371472 [4:17:45<24:18:10, 3.63it/s] 15%|█▍ | 53881/371472 [4:17:45<24:47:03, 3.56it/s] 15%|█▍ | 53882/371472 [4:17:45<24:48:01, 3.56it/s] 15%|█▍ | 53883/371472 [4:17:46<24:30:50, 3.60it/s] 15%|█▍ | 53884/371472 [4:17:46<23:36:34, 3.74it/s] 15%|█▍ | 53885/371472 [4:17:46<25:30:26, 3.46it/s] 15%|█▍ | 53886/371472 [4:17:47<25:21:26, 3.48it/s] 15%|█▍ | 53887/371472 [4:17:47<26:02:34, 3.39it/s] 15%|█▍ | 53888/371472 [4:17:47<24:53:32, 3.54it/s] 15%|█▍ | 53889/371472 [4:17:48<26:13:03, 3.36it/s] 15%|█▍ | 53890/371472 [4:17:48<24:26:19, 3.61it/s] 15%|█▍ | 53891/371472 [4:17:48<23:10:43, 3.81it/s] 15%|█▍ | 53892/371472 [4:17:48<24:13:17, 3.64it/s] 15%|█▍ | 53893/371472 [4:17:49<24:09:37, 3.65it/s] 15%|█▍ | 53894/371472 [4:17:49<25:04:04, 3.52it/s] 15%|█▍ | 53895/371472 [4:17:49<25:24:48, 3.47it/s] 15%|█▍ | 53896/371472 [4:17:49<25:32:42, 3.45it/s] 15%|█▍ | 53897/371472 [4:17:50<25:13:10, 3.50it/s] 15%|█▍ | 53898/371472 [4:17:50<26:04:51, 3.38it/s] 15%|█▍ | 53899/371472 [4:17:50<25:19:48, 3.48it/s] 15%|█▍ | 53900/371472 [4:17:51<24:36:57, 3.58it/s] {'loss': 4.1226, 'learning_rate': 8.698258958391691e-07, 'epoch': 2.32} + 15%|█▍ | 53900/371472 [4:17:51<24:36:57, 3.58it/s] 15%|█▍ | 53901/371472 [4:17:51<24:19:50, 3.63it/s] 15%|█▍ | 53902/371472 [4:17:51<25:53:43, 3.41it/s] 15%|█▍ | 53903/371472 [4:17:51<24:44:55, 3.56it/s] 15%|█▍ | 53904/371472 [4:17:52<24:13:05, 3.64it/s] 15%|█▍ | 53905/371472 [4:17:52<23:31:47, 3.75it/s] 15%|█▍ | 53906/371472 [4:17:52<24:26:43, 3.61it/s] 15%|█▍ | 53907/371472 [4:17:52<23:39:20, 3.73it/s] 15%|█▍ | 53908/371472 [4:17:53<24:11:27, 3.65it/s] 15%|█▍ | 53909/371472 [4:17:53<24:12:00, 3.65it/s] 15%|█▍ | 53910/371472 [4:17:53<23:46:05, 3.71it/s] 15%|█▍ | 53911/371472 [4:17:54<24:32:07, 3.60it/s] 15%|█▍ | 53912/371472 [4:17:54<23:45:31, 3.71it/s] 15%|█▍ | 53913/371472 [4:17:54<23:46:31, 3.71it/s] 15%|█▍ | 53914/371472 [4:17:54<24:44:55, 3.56it/s] 15%|█▍ | 53915/371472 [4:17:55<24:51:10, 3.55it/s] 15%|█▍ | 53916/371472 [4:17:55<24:09:10, 3.65it/s] 15%|█▍ | 53917/371472 [4:17:55<23:14:38, 3.79it/s] 15%|█▍ | 53918/371472 [4:17:56<23:57:40, 3.68it/s] 15%|█▍ | 53919/371472 [4:17:56<23:40:56, 3.72it/s] 15%|█▍ | 53920/371472 [4:17:56<24:38:09, 3.58it/s] {'loss': 4.2492, 'learning_rate': 8.697774138636902e-07, 'epoch': 2.32} + 15%|█▍ | 53920/371472 [4:17:56<24:38:09, 3.58it/s] 15%|█▍ | 53921/371472 [4:17:56<23:35:08, 3.74it/s] 15%|█▍ | 53922/371472 [4:17:57<24:32:56, 3.59it/s] 15%|█▍ | 53923/371472 [4:17:57<23:49:39, 3.70it/s] 15%|█▍ | 53924/371472 [4:17:57<24:42:29, 3.57it/s] 15%|█▍ | 53925/371472 [4:17:57<24:34:41, 3.59it/s] 15%|█▍ | 53926/371472 [4:17:58<24:20:41, 3.62it/s] 15%|█▍ | 53927/371472 [4:17:58<24:49:59, 3.55it/s] 15%|█▍ | 53928/371472 [4:17:58<25:34:43, 3.45it/s] 15%|█▍ | 53929/371472 [4:17:59<24:56:31, 3.54it/s] 15%|█▍ | 53930/371472 [4:17:59<24:24:36, 3.61it/s] 15%|█▍ | 53931/371472 [4:17:59<25:52:08, 3.41it/s] 15%|█▍ | 53932/371472 [4:17:59<25:15:52, 3.49it/s] 15%|█▍ | 53933/371472 [4:18:00<24:00:55, 3.67it/s] 15%|█▍ | 53934/371472 [4:18:00<23:25:37, 3.77it/s] 15%|█▍ | 53935/371472 [4:18:00<27:06:40, 3.25it/s] 15%|█▍ | 53936/371472 [4:18:01<26:34:01, 3.32it/s] 15%|█▍ | 53937/371472 [4:18:01<25:06:25, 3.51it/s] 15%|█▍ | 53938/371472 [4:18:01<24:13:10, 3.64it/s] 15%|█▍ | 53939/371472 [4:18:01<23:49:48, 3.70it/s] 15%|█▍ | 53940/371472 [4:18:02<23:29:49, 3.75it/s] {'loss': 4.2787, 'learning_rate': 8.697289318882113e-07, 'epoch': 2.32} + 15%|█▍ | 53940/371472 [4:18:02<23:29:49, 3.75it/s] 15%|█▍ | 53941/371472 [4:18:02<23:22:16, 3.77it/s] 15%|█▍ | 53942/371472 [4:18:02<23:03:41, 3.82it/s] 15%|█▍ | 53943/371472 [4:18:02<24:06:56, 3.66it/s] 15%|█▍ | 53944/371472 [4:18:03<23:18:40, 3.78it/s] 15%|█▍ | 53945/371472 [4:18:03<24:56:30, 3.54it/s] 15%|█▍ | 53946/371472 [4:18:03<24:19:29, 3.63it/s] 15%|█▍ | 53947/371472 [4:18:04<25:19:05, 3.48it/s] 15%|█▍ | 53948/371472 [4:18:04<25:04:38, 3.52it/s] 15%|█▍ | 53949/371472 [4:18:04<24:59:06, 3.53it/s] 15%|█▍ | 53950/371472 [4:18:04<24:44:02, 3.57it/s] 15%|█▍ | 53951/371472 [4:18:05<25:03:07, 3.52it/s] 15%|█▍ | 53952/371472 [4:18:05<25:49:03, 3.42it/s] 15%|█▍ | 53953/371472 [4:18:05<25:50:07, 3.41it/s] 15%|█▍ | 53954/371472 [4:18:06<24:35:26, 3.59it/s] 15%|█▍ | 53955/371472 [4:18:06<24:22:04, 3.62it/s] 15%|█▍ | 53956/371472 [4:18:06<24:01:34, 3.67it/s] 15%|█▍ | 53957/371472 [4:18:06<25:20:59, 3.48it/s] 15%|█▍ | 53958/371472 [4:18:07<24:24:16, 3.61it/s] 15%|█▍ | 53959/371472 [4:18:07<23:58:35, 3.68it/s] 15%|█▍ | 53960/371472 [4:18:07<24:25:47, 3.61it/s] {'loss': 4.1966, 'learning_rate': 8.696804499127323e-07, 'epoch': 2.32} + 15%|█▍ | 53960/371472 [4:18:07<24:25:47, 3.61it/s] 15%|█▍ | 53961/371472 [4:18:08<24:29:52, 3.60it/s] 15%|█▍ | 53962/371472 [4:18:08<24:53:32, 3.54it/s] 15%|█▍ | 53963/371472 [4:18:08<27:17:51, 3.23it/s] 15%|█▍ | 53964/371472 [4:18:08<26:33:57, 3.32it/s] 15%|█▍ | 53965/371472 [4:18:09<24:55:25, 3.54it/s] 15%|█▍ | 53966/371472 [4:18:09<25:53:45, 3.41it/s] 15%|█▍ | 53967/371472 [4:18:09<24:57:33, 3.53it/s] 15%|█▍ | 53968/371472 [4:18:10<25:03:12, 3.52it/s] 15%|█▍ | 53969/371472 [4:18:10<25:06:06, 3.51it/s] 15%|█▍ | 53970/371472 [4:18:10<25:21:38, 3.48it/s] 15%|█▍ | 53971/371472 [4:18:10<24:48:13, 3.56it/s] 15%|█▍ | 53972/371472 [4:18:11<24:51:26, 3.55it/s] 15%|█▍ | 53973/371472 [4:18:11<25:49:05, 3.42it/s] 15%|█▍ | 53974/371472 [4:18:11<26:12:03, 3.37it/s] 15%|█▍ | 53975/371472 [4:18:12<27:19:52, 3.23it/s] 15%|█▍ | 53976/371472 [4:18:12<27:18:35, 3.23it/s] 15%|█▍ | 53977/371472 [4:18:12<27:54:50, 3.16it/s] 15%|█▍ | 53978/371472 [4:18:13<26:03:12, 3.39it/s] 15%|█▍ | 53979/371472 [4:18:13<26:59:32, 3.27it/s] 15%|█▍ | 53980/371472 [4:18:13<25:36:48, 3.44it/s] {'loss': 4.2201, 'learning_rate': 8.696319679372535e-07, 'epoch': 2.33} + 15%|█▍ | 53980/371472 [4:18:13<25:36:48, 3.44it/s] 15%|█▍ | 53981/371472 [4:18:13<26:00:07, 3.39it/s] 15%|█▍ | 53982/371472 [4:18:14<24:40:22, 3.57it/s] 15%|█▍ | 53983/371472 [4:18:14<25:41:13, 3.43it/s] 15%|█▍ | 53984/371472 [4:18:14<25:37:21, 3.44it/s] 15%|█▍ | 53985/371472 [4:18:15<26:31:09, 3.33it/s] 15%|█▍ | 53986/371472 [4:18:15<25:50:53, 3.41it/s] 15%|█▍ | 53987/371472 [4:18:15<25:54:54, 3.40it/s] 15%|█▍ | 53988/371472 [4:18:15<24:47:26, 3.56it/s] 15%|█▍ | 53989/371472 [4:18:16<24:42:34, 3.57it/s] 15%|█▍ | 53990/371472 [4:18:16<25:28:11, 3.46it/s] 15%|█▍ | 53991/371472 [4:18:16<24:56:30, 3.54it/s] 15%|█▍ | 53992/371472 [4:18:17<24:41:49, 3.57it/s] 15%|█▍ | 53993/371472 [4:18:17<24:10:58, 3.65it/s] 15%|█▍ | 53994/371472 [4:18:17<24:10:29, 3.65it/s] 15%|█▍ | 53995/371472 [4:18:17<25:41:42, 3.43it/s] 15%|█▍ | 53996/371472 [4:18:18<27:21:52, 3.22it/s] 15%|█▍ | 53997/371472 [4:18:18<26:36:00, 3.32it/s] 15%|█▍ | 53998/371472 [4:18:18<25:38:25, 3.44it/s] 15%|█▍ | 53999/371472 [4:18:19<25:35:00, 3.45it/s] 15%|█▍ | 54000/371472 [4:18:19<25:49:52, 3.41it/s] {'loss': 4.2533, 'learning_rate': 8.695834859617746e-07, 'epoch': 2.33} + 15%|█▍ | 54000/371472 [4:18:19<25:49:52, 3.41it/s] 15%|█▍ | 54001/371472 [4:18:19<26:33:01, 3.32it/s] 15%|█▍ | 54002/371472 [4:18:20<25:48:06, 3.42it/s] 15%|█▍ | 54003/371472 [4:18:20<24:43:58, 3.57it/s] 15%|█▍ | 54004/371472 [4:18:20<24:36:32, 3.58it/s] 15%|█▍ | 54005/371472 [4:18:20<24:59:17, 3.53it/s] 15%|█▍ | 54006/371472 [4:18:21<25:17:34, 3.49it/s] 15%|█▍ | 54007/371472 [4:18:21<25:09:09, 3.51it/s] 15%|█▍ | 54008/371472 [4:18:21<23:54:56, 3.69it/s] 15%|█▍ | 54009/371472 [4:18:21<24:05:07, 3.66it/s] 15%|█▍ | 54010/371472 [4:18:22<24:48:57, 3.55it/s] 15%|█▍ | 54011/371472 [4:18:22<24:25:15, 3.61it/s] 15%|█▍ | 54012/371472 [4:18:22<24:12:07, 3.64it/s] 15%|█▍ | 54013/371472 [4:18:22<23:02:59, 3.83it/s] 15%|█▍ | 54014/371472 [4:18:23<25:09:43, 3.50it/s] 15%|█▍ | 54015/371472 [4:18:23<24:50:54, 3.55it/s] 15%|█▍ | 54016/371472 [4:18:23<25:20:17, 3.48it/s] 15%|█▍ | 54017/371472 [4:18:24<25:12:12, 3.50it/s] 15%|█▍ | 54018/371472 [4:18:24<26:05:56, 3.38it/s] 15%|█▍ | 54019/371472 [4:18:24<25:09:01, 3.51it/s] 15%|█▍ | 54020/371472 [4:18:25<24:32:55, 3.59it/s] {'loss': 4.2268, 'learning_rate': 8.695350039862957e-07, 'epoch': 2.33} + 15%|█▍ | 54020/371472 [4:18:25<24:32:55, 3.59it/s] 15%|█▍ | 54021/371472 [4:18:25<24:05:58, 3.66it/s] 15%|█▍ | 54022/371472 [4:18:25<23:25:19, 3.76it/s] 15%|█▍ | 54023/371472 [4:18:25<23:11:22, 3.80it/s] 15%|█▍ | 54024/371472 [4:18:26<25:37:12, 3.44it/s] 15%|█▍ | 54025/371472 [4:18:26<24:33:56, 3.59it/s] 15%|█▍ | 54026/371472 [4:18:26<24:43:38, 3.57it/s] 15%|█▍ | 54027/371472 [4:18:27<25:53:32, 3.41it/s] 15%|█▍ | 54028/371472 [4:18:27<25:42:29, 3.43it/s] 15%|█▍ | 54029/371472 [4:18:27<25:52:34, 3.41it/s] 15%|█▍ | 54030/371472 [4:18:27<24:46:21, 3.56it/s] 15%|█▍ | 54031/371472 [4:18:28<24:58:26, 3.53it/s] 15%|█▍ | 54032/371472 [4:18:28<23:50:33, 3.70it/s] 15%|█▍ | 54033/371472 [4:18:28<23:35:04, 3.74it/s] 15%|█▍ | 54034/371472 [4:18:28<24:05:40, 3.66it/s] 15%|█▍ | 54035/371472 [4:18:29<23:49:23, 3.70it/s] 15%|█▍ | 54036/371472 [4:18:29<23:07:02, 3.81it/s] 15%|█▍ | 54037/371472 [4:18:29<23:23:42, 3.77it/s] 15%|█▍ | 54038/371472 [4:18:29<23:26:22, 3.76it/s] 15%|█▍ | 54039/371472 [4:18:30<23:06:37, 3.82it/s] 15%|█▍ | 54040/371472 [4:18:30<24:19:17, 3.63it/s] {'loss': 4.2416, 'learning_rate': 8.694865220108168e-07, 'epoch': 2.33} + 15%|█▍ | 54040/371472 [4:18:30<24:19:17, 3.63it/s] 15%|█▍ | 54041/371472 [4:18:30<24:15:47, 3.63it/s] 15%|█▍ | 54042/371472 [4:18:31<23:56:13, 3.68it/s] 15%|█▍ | 54043/371472 [4:18:31<24:15:57, 3.63it/s] 15%|█▍ | 54044/371472 [4:18:31<26:13:05, 3.36it/s] 15%|█▍ | 54045/371472 [4:18:31<24:40:18, 3.57it/s] 15%|█▍ | 54046/371472 [4:18:32<24:46:25, 3.56it/s] 15%|█▍ | 54047/371472 [4:18:32<24:19:07, 3.63it/s] 15%|█▍ | 54048/371472 [4:18:32<26:08:02, 3.37it/s] 15%|█▍ | 54049/371472 [4:18:33<24:57:05, 3.53it/s] 15%|█▍ | 54050/371472 [4:18:33<24:46:12, 3.56it/s] 15%|█▍ | 54051/371472 [4:18:33<26:33:54, 3.32it/s] 15%|█▍ | 54052/371472 [4:18:33<25:28:29, 3.46it/s] 15%|█▍ | 54053/371472 [4:18:34<24:56:50, 3.53it/s] 15%|█▍ | 54054/371472 [4:18:34<24:46:20, 3.56it/s] 15%|█▍ | 54055/371472 [4:18:34<23:56:46, 3.68it/s] 15%|█▍ | 54056/371472 [4:18:35<24:58:49, 3.53it/s] 15%|█▍ | 54057/371472 [4:18:35<26:13:04, 3.36it/s] 15%|█▍ | 54058/371472 [4:18:35<25:24:42, 3.47it/s] 15%|█▍ | 54059/371472 [4:18:36<26:31:03, 3.32it/s] 15%|█▍ | 54060/371472 [4:18:36<25:15:33, 3.49it/s] {'loss': 4.1719, 'learning_rate': 8.69438040035338e-07, 'epoch': 2.33} + 15%|█▍ | 54060/371472 [4:18:36<25:15:33, 3.49it/s] 15%|█▍ | 54061/371472 [4:18:36<26:13:14, 3.36it/s] 15%|█▍ | 54062/371472 [4:18:36<25:50:28, 3.41it/s] 15%|█▍ | 54063/371472 [4:18:37<25:09:37, 3.50it/s] 15%|█▍ | 54064/371472 [4:18:37<25:59:53, 3.39it/s] 15%|█▍ | 54065/371472 [4:18:37<26:13:47, 3.36it/s] 15%|█▍ | 54066/371472 [4:18:38<26:06:32, 3.38it/s] 15%|█▍ | 54067/371472 [4:18:38<25:08:42, 3.51it/s] 15%|█▍ | 54068/371472 [4:18:38<24:54:18, 3.54it/s] 15%|█▍ | 54069/371472 [4:18:38<24:25:11, 3.61it/s] 15%|█▍ | 54070/371472 [4:18:39<24:37:47, 3.58it/s] 15%|█▍ | 54071/371472 [4:18:39<24:30:16, 3.60it/s] 15%|█▍ | 54072/371472 [4:18:39<24:24:39, 3.61it/s] 15%|█▍ | 54073/371472 [4:18:39<24:19:34, 3.62it/s] 15%|█▍ | 54074/371472 [4:18:40<23:02:44, 3.83it/s] 15%|█▍ | 54075/371472 [4:18:40<22:57:05, 3.84it/s] 15%|█▍ | 54076/371472 [4:18:40<24:54:10, 3.54it/s] 15%|█▍ | 54077/371472 [4:18:41<25:42:41, 3.43it/s] 15%|█▍ | 54078/371472 [4:18:41<25:24:19, 3.47it/s] 15%|█▍ | 54079/371472 [4:18:41<24:23:59, 3.61it/s] 15%|█▍ | 54080/371472 [4:18:41<24:53:56, 3.54it/s] {'loss': 4.4017, 'learning_rate': 8.693895580598591e-07, 'epoch': 2.33} + 15%|█▍ | 54080/371472 [4:18:41<24:53:56, 3.54it/s] 15%|█▍ | 54081/371472 [4:18:42<24:22:05, 3.62it/s] 15%|█▍ | 54082/371472 [4:18:42<24:18:07, 3.63it/s] 15%|█▍ | 54083/371472 [4:18:42<23:36:06, 3.74it/s] 15%|█▍ | 54084/371472 [4:18:42<23:26:17, 3.76it/s] 15%|█▍ | 54085/371472 [4:18:43<26:24:26, 3.34it/s] 15%|█▍ | 54086/371472 [4:18:43<26:17:40, 3.35it/s] 15%|█▍ | 54087/371472 [4:18:43<26:21:23, 3.34it/s] 15%|█▍ | 54088/371472 [4:18:44<26:39:16, 3.31it/s] 15%|█▍ | 54089/371472 [4:18:44<26:35:50, 3.31it/s] 15%|█▍ | 54090/371472 [4:18:44<25:31:38, 3.45it/s] 15%|█▍ | 54091/371472 [4:18:45<25:36:24, 3.44it/s] 15%|█▍ | 54092/371472 [4:18:45<24:56:28, 3.53it/s] 15%|█▍ | 54093/371472 [4:18:45<24:18:55, 3.63it/s] 15%|█▍ | 54094/371472 [4:18:45<23:53:27, 3.69it/s] 15%|█▍ | 54095/371472 [4:18:46<23:19:41, 3.78it/s] 15%|█▍ | 54096/371472 [4:18:46<24:22:45, 3.62it/s] 15%|█▍ | 54097/371472 [4:18:46<24:53:54, 3.54it/s] 15%|█▍ | 54098/371472 [4:18:46<24:19:18, 3.62it/s] 15%|█▍ | 54099/371472 [4:18:47<25:31:52, 3.45it/s] 15%|█▍ | 54100/371472 [4:18:47<25:22:47, 3.47it/s] {'loss': 4.3052, 'learning_rate': 8.693410760843801e-07, 'epoch': 2.33} + 15%|█▍ | 54100/371472 [4:18:47<25:22:47, 3.47it/s] 15%|█▍ | 54101/371472 [4:18:47<24:14:14, 3.64it/s] 15%|█▍ | 54102/371472 [4:18:48<23:20:40, 3.78it/s] 15%|█▍ | 54103/371472 [4:18:48<24:16:24, 3.63it/s] 15%|█▍ | 54104/371472 [4:18:48<23:36:13, 3.73it/s] 15%|█▍ | 54105/371472 [4:18:48<24:34:24, 3.59it/s] 15%|█▍ | 54106/371472 [4:18:49<26:20:17, 3.35it/s] 15%|█▍ | 54107/371472 [4:18:49<25:36:56, 3.44it/s] 15%|█▍ | 54108/371472 [4:18:49<25:25:13, 3.47it/s] 15%|█▍ | 54109/371472 [4:18:50<25:29:09, 3.46it/s] 15%|█▍ | 54110/371472 [4:18:50<26:35:50, 3.31it/s] 15%|█▍ | 54111/371472 [4:18:50<25:43:03, 3.43it/s] 15%|█▍ | 54112/371472 [4:18:51<25:24:34, 3.47it/s] 15%|█▍ | 54113/371472 [4:18:51<24:25:28, 3.61it/s] 15%|█▍ | 54114/371472 [4:18:51<26:39:25, 3.31it/s] 15%|█▍ | 54115/371472 [4:18:51<26:52:09, 3.28it/s] 15%|█▍ | 54116/371472 [4:18:52<25:25:14, 3.47it/s] 15%|█▍ | 54117/371472 [4:18:52<26:09:16, 3.37it/s] 15%|█▍ | 54118/371472 [4:18:52<25:32:22, 3.45it/s] 15%|█▍ | 54119/371472 [4:18:53<24:47:04, 3.56it/s] 15%|█▍ | 54120/371472 [4:18:53<24:01:11, 3.67it/s] {'loss': 4.1228, 'learning_rate': 8.692925941089012e-07, 'epoch': 2.33} + 15%|█▍ | 54120/371472 [4:18:53<24:01:11, 3.67it/s] 15%|█▍ | 54121/371472 [4:18:53<23:34:59, 3.74it/s] 15%|█▍ | 54122/371472 [4:18:53<23:07:52, 3.81it/s] 15%|█▍ | 54123/371472 [4:18:54<25:00:10, 3.53it/s] 15%|█▍ | 54124/371472 [4:18:54<24:26:51, 3.61it/s] 15%|█▍ | 54125/371472 [4:18:54<23:36:53, 3.73it/s] 15%|█▍ | 54126/371472 [4:18:54<23:40:01, 3.72it/s] 15%|█▍ | 54127/371472 [4:18:55<23:45:25, 3.71it/s] 15%|█▍ | 54128/371472 [4:18:55<22:52:46, 3.85it/s] 15%|█▍ | 54129/371472 [4:18:55<22:47:28, 3.87it/s] 15%|█▍ | 54130/371472 [4:18:55<24:03:41, 3.66it/s] 15%|█▍ | 54131/371472 [4:18:56<23:14:02, 3.79it/s] 15%|█▍ | 54132/371472 [4:18:56<24:07:40, 3.65it/s] 15%|█▍ | 54133/371472 [4:18:56<25:00:20, 3.53it/s] 15%|█▍ | 54134/371472 [4:18:57<25:01:36, 3.52it/s] 15%|█▍ | 54135/371472 [4:18:57<24:51:54, 3.55it/s] 15%|█▍ | 54136/371472 [4:18:57<24:41:20, 3.57it/s] 15%|█▍ | 54137/371472 [4:18:57<25:23:14, 3.47it/s] 15%|█▍ | 54138/371472 [4:18:58<24:59:02, 3.53it/s] 15%|█▍ | 54139/371472 [4:18:58<25:18:38, 3.48it/s] 15%|█▍ | 54140/371472 [4:18:58<25:26:03, 3.47it/s] {'loss': 4.2331, 'learning_rate': 8.692441121334224e-07, 'epoch': 2.33} + 15%|█▍ | 54140/371472 [4:18:58<25:26:03, 3.47it/s] 15%|█▍ | 54141/371472 [4:18:59<25:09:37, 3.50it/s] 15%|█▍ | 54142/371472 [4:18:59<24:09:03, 3.65it/s] 15%|█▍ | 54143/371472 [4:18:59<22:58:22, 3.84it/s] 15%|█▍ | 54144/371472 [4:18:59<22:28:11, 3.92it/s] 15%|█▍ | 54145/371472 [4:19:00<22:27:04, 3.93it/s] 15%|█▍ | 54146/371472 [4:19:00<24:23:58, 3.61it/s] 15%|█▍ | 54147/371472 [4:19:00<26:23:04, 3.34it/s] 15%|█▍ | 54148/371472 [4:19:01<25:03:07, 3.52it/s] 15%|█▍ | 54149/371472 [4:19:01<26:27:57, 3.33it/s] 15%|█▍ | 54150/371472 [4:19:01<28:02:02, 3.14it/s] 15%|█▍ | 54151/371472 [4:19:01<26:46:27, 3.29it/s] 15%|█▍ | 54152/371472 [4:19:02<25:50:00, 3.41it/s] 15%|█▍ | 54153/371472 [4:19:02<26:44:54, 3.30it/s] 15%|█▍ | 54154/371472 [4:19:02<26:38:15, 3.31it/s] 15%|█▍ | 54155/371472 [4:19:03<25:16:48, 3.49it/s] 15%|█▍ | 54156/371472 [4:19:03<26:47:19, 3.29it/s] 15%|█▍ | 54157/371472 [4:19:03<26:27:16, 3.33it/s] 15%|█▍ | 54158/371472 [4:19:04<25:41:31, 3.43it/s] 15%|█▍ | 54159/371472 [4:19:04<25:04:56, 3.51it/s] 15%|█▍ | 54160/371472 [4:19:04<26:29:27, 3.33it/s] {'loss': 4.055, 'learning_rate': 8.691956301579435e-07, 'epoch': 2.33} + 15%|█▍ | 54160/371472 [4:19:04<26:29:27, 3.33it/s] 15%|█▍ | 54161/371472 [4:19:05<28:35:17, 3.08it/s] 15%|█▍ | 54162/371472 [4:19:05<27:37:09, 3.19it/s] 15%|█▍ | 54163/371472 [4:19:05<28:02:02, 3.14it/s] 15%|█▍ | 54164/371472 [4:19:05<28:37:24, 3.08it/s] 15%|█▍ | 54165/371472 [4:19:06<27:06:00, 3.25it/s] 15%|█▍ | 54166/371472 [4:19:06<27:18:31, 3.23it/s] 15%|█▍ | 54167/371472 [4:19:06<26:59:43, 3.27it/s] 15%|█▍ | 54168/371472 [4:19:07<26:06:28, 3.38it/s] 15%|█▍ | 54169/371472 [4:19:07<25:19:16, 3.48it/s] 15%|█▍ | 54170/371472 [4:19:07<24:08:51, 3.65it/s] 15%|█▍ | 54171/371472 [4:19:07<24:47:01, 3.56it/s] 15%|█▍ | 54172/371472 [4:19:08<24:12:13, 3.64it/s] 15%|█▍ | 54173/371472 [4:19:08<22:59:56, 3.83it/s] 15%|█▍ | 54174/371472 [4:19:08<23:10:14, 3.80it/s] 15%|█▍ | 54175/371472 [4:19:08<23:55:33, 3.68it/s] 15%|█▍ | 54176/371472 [4:19:09<23:16:00, 3.79it/s] 15%|█▍ | 54177/371472 [4:19:09<25:30:30, 3.46it/s] 15%|█▍ | 54178/371472 [4:19:09<24:50:36, 3.55it/s] 15%|█▍ | 54179/371472 [4:19:10<24:24:49, 3.61it/s] 15%|█▍ | 54180/371472 [4:19:10<24:16:09, 3.63it/s] {'loss': 4.3784, 'learning_rate': 8.691471481824646e-07, 'epoch': 2.33} + 15%|█▍ | 54180/371472 [4:19:10<24:16:09, 3.63it/s] 15%|█▍ | 54181/371472 [4:19:10<24:16:13, 3.63it/s] 15%|█▍ | 54182/371472 [4:19:10<24:22:44, 3.62it/s] 15%|█▍ | 54183/371472 [4:19:11<24:34:06, 3.59it/s] 15%|█▍ | 54184/371472 [4:19:11<23:56:06, 3.68it/s] 15%|█▍ | 54185/371472 [4:19:11<24:40:26, 3.57it/s] 15%|█▍ | 54186/371472 [4:19:12<24:19:02, 3.62it/s] 15%|█▍ | 54187/371472 [4:19:12<23:42:07, 3.72it/s] 15%|█▍ | 54188/371472 [4:19:12<23:12:43, 3.80it/s] 15%|█▍ | 54189/371472 [4:19:12<22:32:05, 3.91it/s] 15%|█▍ | 54190/371472 [4:19:13<24:55:29, 3.54it/s] 15%|█▍ | 54191/371472 [4:19:13<24:13:47, 3.64it/s] 15%|█▍ | 54192/371472 [4:19:13<25:51:22, 3.41it/s] 15%|█▍ | 54193/371472 [4:19:13<25:10:47, 3.50it/s] 15%|█▍ | 54194/371472 [4:19:14<26:33:12, 3.32it/s] 15%|█▍ | 54195/371472 [4:19:14<25:11:58, 3.50it/s] 15%|█▍ | 54196/371472 [4:19:14<26:24:24, 3.34it/s] 15%|█▍ | 54197/371472 [4:19:15<27:37:01, 3.19it/s] 15%|█▍ | 54198/371472 [4:19:15<28:33:29, 3.09it/s] 15%|█▍ | 54199/371472 [4:19:15<28:41:02, 3.07it/s] 15%|█▍ | 54200/371472 [4:19:16<28:41:48, 3.07it/s] {'loss': 4.362, 'learning_rate': 8.690986662069857e-07, 'epoch': 2.33} + 15%|█▍ | 54200/371472 [4:19:16<28:41:48, 3.07it/s] 15%|█▍ | 54201/371472 [4:19:16<28:04:12, 3.14it/s] 15%|█▍ | 54202/371472 [4:19:16<28:41:58, 3.07it/s] 15%|█▍ | 54203/371472 [4:19:17<26:57:43, 3.27it/s] 15%|█▍ | 54204/371472 [4:19:17<27:10:07, 3.24it/s] 15%|█▍ | 54205/371472 [4:19:17<26:28:09, 3.33it/s] 15%|█▍ | 54206/371472 [4:19:18<25:31:45, 3.45it/s] 15%|█▍ | 54207/371472 [4:19:18<28:25:10, 3.10it/s] 15%|█▍ | 54208/371472 [4:19:18<26:51:20, 3.28it/s] 15%|█▍ | 54209/371472 [4:19:18<25:37:44, 3.44it/s] 15%|█▍ | 54210/371472 [4:19:19<25:17:43, 3.48it/s] 15%|█▍ | 54211/371472 [4:19:19<26:13:03, 3.36it/s] 15%|█▍ | 54212/371472 [4:19:19<25:13:11, 3.49it/s] 15%|█▍ | 54213/371472 [4:19:20<26:48:48, 3.29it/s] 15%|█▍ | 54214/371472 [4:19:20<27:09:27, 3.25it/s] 15%|█▍ | 54215/371472 [4:19:20<25:49:48, 3.41it/s] 15%|█▍ | 54216/371472 [4:19:20<24:43:20, 3.56it/s] 15%|█▍ | 54217/371472 [4:19:21<24:29:21, 3.60it/s] 15%|█▍ | 54218/371472 [4:19:21<23:56:40, 3.68it/s] 15%|█▍ | 54219/371472 [4:19:21<24:12:45, 3.64it/s] 15%|█▍ | 54220/371472 [4:19:22<25:11:41, 3.50it/s] {'loss': 4.2607, 'learning_rate': 8.690501842315067e-07, 'epoch': 2.34} + 15%|█▍ | 54220/371472 [4:19:22<25:11:41, 3.50it/s] 15%|█▍ | 54221/371472 [4:19:22<25:11:02, 3.50it/s] 15%|█▍ | 54222/371472 [4:19:22<25:41:38, 3.43it/s] 15%|█▍ | 54223/371472 [4:19:23<26:41:34, 3.30it/s] 15%|█▍ | 54224/371472 [4:19:23<25:09:22, 3.50it/s] 15%|█▍ | 54225/371472 [4:19:23<27:22:04, 3.22it/s] 15%|█▍ | 54226/371472 [4:19:23<25:48:02, 3.42it/s] 15%|█▍ | 54227/371472 [4:19:24<24:51:20, 3.55it/s] 15%|█▍ | 54228/371472 [4:19:24<24:02:31, 3.67it/s] 15%|█▍ | 54229/371472 [4:19:24<23:35:59, 3.73it/s] 15%|█▍ | 54230/371472 [4:19:24<23:42:27, 3.72it/s] 15%|█▍ | 54231/371472 [4:19:25<23:58:31, 3.68it/s] 15%|█▍ | 54232/371472 [4:19:25<24:07:44, 3.65it/s] 15%|█▍ | 54233/371472 [4:19:25<24:26:55, 3.60it/s] 15%|█▍ | 54234/371472 [4:19:25<23:23:33, 3.77it/s] 15%|█▍ | 54235/371472 [4:19:26<24:23:41, 3.61it/s] 15%|█▍ | 54236/371472 [4:19:26<24:06:42, 3.65it/s] 15%|█▍ | 54237/371472 [4:19:26<25:00:16, 3.52it/s] 15%|█▍ | 54238/371472 [4:19:27<25:25:07, 3.47it/s] 15%|█▍ | 54239/371472 [4:19:27<25:14:07, 3.49it/s] 15%|█▍ | 54240/371472 [4:19:27<24:25:27, 3.61it/s] {'loss': 4.4963, 'learning_rate': 8.690017022560279e-07, 'epoch': 2.34} + 15%|█▍ | 54240/371472 [4:19:27<24:25:27, 3.61it/s] 15%|█▍ | 54241/371472 [4:19:27<24:56:37, 3.53it/s] 15%|█▍ | 54242/371472 [4:19:28<24:35:16, 3.58it/s] 15%|█▍ | 54243/371472 [4:19:28<25:11:20, 3.50it/s] 15%|█▍ | 54244/371472 [4:19:28<25:53:23, 3.40it/s] 15%|█▍ | 54245/371472 [4:19:29<25:19:40, 3.48it/s] 15%|█▍ | 54246/371472 [4:19:29<24:30:02, 3.60it/s] 15%|█▍ | 54247/371472 [4:19:29<23:50:29, 3.70it/s] 15%|█▍ | 54248/371472 [4:19:29<23:57:28, 3.68it/s] 15%|█▍ | 54249/371472 [4:19:30<23:55:02, 3.68it/s] 15%|█▍ | 54250/371472 [4:19:30<24:02:47, 3.66it/s] 15%|█▍ | 54251/371472 [4:19:30<24:14:46, 3.63it/s] 15%|█▍ | 54252/371472 [4:19:31<24:14:05, 3.64it/s] 15%|█▍ | 54253/371472 [4:19:31<24:02:53, 3.66it/s] 15%|█▍ | 54254/371472 [4:19:31<23:44:29, 3.71it/s] 15%|█▍ | 54255/371472 [4:19:31<23:19:34, 3.78it/s] 15%|█▍ | 54256/371472 [4:19:32<23:21:37, 3.77it/s] 15%|█▍ | 54257/371472 [4:19:32<22:39:21, 3.89it/s] 15%|█▍ | 54258/371472 [4:19:32<24:51:22, 3.54it/s] 15%|█▍ | 54259/371472 [4:19:32<25:48:45, 3.41it/s] 15%|█▍ | 54260/371472 [4:19:33<24:41:09, 3.57it/s] {'loss': 4.2517, 'learning_rate': 8.68953220280549e-07, 'epoch': 2.34} + 15%|█▍ | 54260/371472 [4:19:33<24:41:09, 3.57it/s] 15%|█▍ | 54261/371472 [4:19:33<24:37:17, 3.58it/s] 15%|█▍ | 54262/371472 [4:19:33<25:01:45, 3.52it/s] 15%|█▍ | 54263/371472 [4:19:34<26:32:21, 3.32it/s] 15%|█▍ | 54264/371472 [4:19:34<24:53:04, 3.54it/s] 15%|█▍ | 54265/371472 [4:19:34<25:29:00, 3.46it/s] 15%|█▍ | 54266/371472 [4:19:34<24:20:11, 3.62it/s] 15%|█▍ | 54267/371472 [4:19:35<24:52:01, 3.54it/s] 15%|█▍ | 54268/371472 [4:19:35<24:56:51, 3.53it/s] 15%|█▍ | 54269/371472 [4:19:35<24:47:07, 3.55it/s] 15%|█▍ | 54270/371472 [4:19:36<23:50:24, 3.70it/s] 15%|█▍ | 54271/371472 [4:19:36<23:57:47, 3.68it/s] 15%|█▍ | 54272/371472 [4:19:36<24:58:42, 3.53it/s] 15%|█▍ | 54273/371472 [4:19:36<24:31:50, 3.59it/s] 15%|█▍ | 54274/371472 [4:19:37<23:37:47, 3.73it/s] 15%|█▍ | 54275/371472 [4:19:37<23:01:13, 3.83it/s] 15%|█▍ | 54276/371472 [4:19:37<23:49:56, 3.70it/s] 15%|█▍ | 54277/371472 [4:19:37<23:35:01, 3.74it/s] 15%|█▍ | 54278/371472 [4:19:38<24:31:53, 3.59it/s] 15%|█▍ | 54279/371472 [4:19:38<24:41:50, 3.57it/s] 15%|█▍ | 54280/371472 [4:19:38<24:45:57, 3.56it/s] {'loss': 4.3148, 'learning_rate': 8.689047383050701e-07, 'epoch': 2.34} + 15%|█▍ | 54280/371472 [4:19:38<24:45:57, 3.56it/s] 15%|█▍ | 54281/371472 [4:19:39<23:58:32, 3.67it/s] 15%|█▍ | 54282/371472 [4:19:39<23:45:19, 3.71it/s] 15%|█▍ | 54283/371472 [4:19:39<22:57:23, 3.84it/s] 15%|█▍ | 54284/371472 [4:19:39<22:50:30, 3.86it/s] 15%|█▍ | 54285/371472 [4:19:40<23:25:23, 3.76it/s] 15%|█▍ | 54286/371472 [4:19:40<24:30:45, 3.59it/s] 15%|█▍ | 54287/371472 [4:19:40<24:52:04, 3.54it/s] 15%|█▍ | 54288/371472 [4:19:41<25:44:08, 3.42it/s] 15%|█▍ | 54289/371472 [4:19:41<24:51:17, 3.54it/s] 15%|█▍ | 54290/371472 [4:19:41<23:53:11, 3.69it/s] 15%|█▍ | 54291/371472 [4:19:42<33:22:15, 2.64it/s] 15%|█▍ | 54292/371472 [4:19:42<31:33:03, 2.79it/s] 15%|█▍ | 54293/371472 [4:19:42<29:09:05, 3.02it/s] 15%|█▍ | 54294/371472 [4:19:42<27:06:38, 3.25it/s] 15%|█▍ | 54295/371472 [4:19:43<26:56:13, 3.27it/s] 15%|█▍ | 54296/371472 [4:19:43<26:12:40, 3.36it/s] 15%|█▍ | 54297/371472 [4:19:43<25:33:59, 3.45it/s] 15%|█▍ | 54298/371472 [4:19:44<25:09:49, 3.50it/s] 15%|█▍ | 54299/371472 [4:19:44<25:13:32, 3.49it/s] 15%|█▍ | 54300/371472 [4:19:44<24:51:11, 3.54it/s] {'loss': 4.2369, 'learning_rate': 8.688562563295913e-07, 'epoch': 2.34} + 15%|█▍ | 54300/371472 [4:19:44<24:51:11, 3.54it/s] 15%|█▍ | 54301/371472 [4:19:44<25:08:43, 3.50it/s] 15%|█▍ | 54302/371472 [4:19:45<24:24:44, 3.61it/s] 15%|█▍ | 54303/371472 [4:19:45<24:06:19, 3.65it/s] 15%|█▍ | 54304/371472 [4:19:45<23:43:34, 3.71it/s] 15%|█▍ | 54305/371472 [4:19:46<23:39:48, 3.72it/s] 15%|█▍ | 54306/371472 [4:19:46<23:56:16, 3.68it/s] 15%|█▍ | 54307/371472 [4:19:46<24:19:31, 3.62it/s] 15%|█▍ | 54308/371472 [4:19:46<24:17:17, 3.63it/s] 15%|█▍ | 54309/371472 [4:19:47<23:56:11, 3.68it/s] 15%|█▍ | 54310/371472 [4:19:47<24:04:40, 3.66it/s] 15%|█▍ | 54311/371472 [4:19:47<24:34:19, 3.59it/s] 15%|█▍ | 54312/371472 [4:19:47<23:52:46, 3.69it/s] 15%|█▍ | 54313/371472 [4:19:48<23:16:00, 3.79it/s] 15%|█▍ | 54314/371472 [4:19:48<23:12:38, 3.80it/s] 15%|█▍ | 54315/371472 [4:19:48<25:37:58, 3.44it/s] 15%|█▍ | 54316/371472 [4:19:49<27:05:27, 3.25it/s] 15%|█▍ | 54317/371472 [4:19:49<26:17:26, 3.35it/s] 15%|█▍ | 54318/371472 [4:19:49<26:18:35, 3.35it/s] 15%|█▍ | 54319/371472 [4:19:50<26:12:55, 3.36it/s] 15%|█▍ | 54320/371472 [4:19:50<25:05:46, 3.51it/s] {'loss': 4.2441, 'learning_rate': 8.688077743541124e-07, 'epoch': 2.34} + 15%|█▍ | 54320/371472 [4:19:50<25:05:46, 3.51it/s] 15%|█▍ | 54321/371472 [4:19:50<23:53:14, 3.69it/s] 15%|█▍ | 54322/371472 [4:19:50<24:57:44, 3.53it/s] 15%|█▍ | 54323/371472 [4:19:51<25:38:28, 3.44it/s] 15%|█▍ | 54324/371472 [4:19:51<24:11:16, 3.64it/s] 15%|█▍ | 54325/371472 [4:19:51<23:44:21, 3.71it/s] 15%|█▍ | 54326/371472 [4:19:51<23:24:25, 3.76it/s] 15%|█▍ | 54327/371472 [4:19:52<22:46:45, 3.87it/s] 15%|█▍ | 54328/371472 [4:19:52<23:03:08, 3.82it/s] 15%|█▍ | 54329/371472 [4:19:52<25:05:47, 3.51it/s] 15%|█▍ | 54330/371472 [4:19:52<24:23:55, 3.61it/s] 15%|█▍ | 54331/371472 [4:19:53<25:27:35, 3.46it/s] 15%|█▍ | 54332/371472 [4:19:53<26:31:13, 3.32it/s] 15%|█▍ | 54333/371472 [4:19:53<25:56:40, 3.40it/s] 15%|█▍ | 54334/371472 [4:19:54<24:37:58, 3.58it/s] 15%|█▍ | 54335/371472 [4:19:54<24:20:05, 3.62it/s] 15%|█▍ | 54336/371472 [4:19:54<24:22:02, 3.62it/s] 15%|█▍ | 54337/371472 [4:19:54<24:00:06, 3.67it/s] 15%|█▍ | 54338/371472 [4:19:55<25:41:26, 3.43it/s] 15%|█▍ | 54339/371472 [4:19:55<26:52:42, 3.28it/s] 15%|█▍ | 54340/371472 [4:19:55<25:55:21, 3.40it/s] {'loss': 4.0943, 'learning_rate': 8.687592923786333e-07, 'epoch': 2.34} + 15%|█▍ | 54340/371472 [4:19:55<25:55:21, 3.40it/s] 15%|█▍ | 54341/371472 [4:19:56<25:04:44, 3.51it/s] 15%|█▍ | 54342/371472 [4:19:56<25:02:02, 3.52it/s] 15%|█▍ | 54343/371472 [4:19:56<24:56:20, 3.53it/s] 15%|█▍ | 54344/371472 [4:19:57<27:52:28, 3.16it/s] 15%|█▍ | 54345/371472 [4:19:57<26:16:59, 3.35it/s] 15%|█▍ | 54346/371472 [4:19:57<24:57:08, 3.53it/s] 15%|█▍ | 54347/371472 [4:19:57<25:03:18, 3.52it/s] 15%|█▍ | 54348/371472 [4:19:58<24:05:02, 3.66it/s] 15%|█▍ | 54349/371472 [4:19:58<26:19:24, 3.35it/s] 15%|█▍ | 54350/371472 [4:19:58<25:45:15, 3.42it/s] 15%|█▍ | 54351/371472 [4:19:59<25:05:33, 3.51it/s] 15%|█▍ | 54352/371472 [4:19:59<24:09:14, 3.65it/s] 15%|█▍ | 54353/371472 [4:19:59<25:50:25, 3.41it/s] 15%|█▍ | 54354/371472 [4:19:59<25:08:35, 3.50it/s] 15%|█▍ | 54355/371472 [4:20:00<24:17:29, 3.63it/s] 15%|█▍ | 54356/371472 [4:20:00<23:31:29, 3.74it/s] 15%|█▍ | 54357/371472 [4:20:00<26:09:30, 3.37it/s] 15%|█▍ | 54358/371472 [4:20:01<26:15:48, 3.35it/s] 15%|█▍ | 54359/371472 [4:20:01<25:15:32, 3.49it/s] 15%|█▍ | 54360/371472 [4:20:01<24:10:32, 3.64it/s] {'loss': 4.3473, 'learning_rate': 8.687108104031545e-07, 'epoch': 2.34} + 15%|█▍ | 54360/371472 [4:20:01<24:10:32, 3.64it/s] 15%|█▍ | 54361/371472 [4:20:01<23:51:11, 3.69it/s] 15%|█▍ | 54362/371472 [4:20:02<23:14:04, 3.79it/s] 15%|█▍ | 54363/371472 [4:20:02<23:01:32, 3.83it/s] 15%|█▍ | 54364/371472 [4:20:02<23:08:49, 3.81it/s] 15%|█▍ | 54365/371472 [4:20:02<24:08:07, 3.65it/s] 15%|█▍ | 54366/371472 [4:20:03<23:06:08, 3.81it/s] 15%|█▍ | 54367/371472 [4:20:03<23:20:47, 3.77it/s] 15%|█▍ | 54368/371472 [4:20:03<23:35:58, 3.73it/s] 15%|█▍ | 54369/371472 [4:20:03<23:02:35, 3.82it/s] 15%|█▍ | 54370/371472 [4:20:04<24:49:19, 3.55it/s] 15%|█▍ | 54371/371472 [4:20:04<23:36:55, 3.73it/s] 15%|█▍ | 54372/371472 [4:20:04<23:24:58, 3.76it/s] 15%|█▍ | 54373/371472 [4:20:05<23:03:52, 3.82it/s] 15%|█▍ | 54374/371472 [4:20:05<23:18:32, 3.78it/s] 15%|█▍ | 54375/371472 [4:20:05<23:35:09, 3.73it/s] 15%|█▍ | 54376/371472 [4:20:05<23:26:37, 3.76it/s] 15%|█▍ | 54377/371472 [4:20:06<24:08:15, 3.65it/s] 15%|█▍ | 54378/371472 [4:20:06<24:32:04, 3.59it/s] 15%|█▍ | 54379/371472 [4:20:06<24:40:16, 3.57it/s] 15%|█▍ | 54380/371472 [4:20:07<25:26:50, 3.46it/s] {'loss': 4.2992, 'learning_rate': 8.686623284276756e-07, 'epoch': 2.34} + 15%|█▍ | 54380/371472 [4:20:07<25:26:50, 3.46it/s] 15%|█▍ | 54381/371472 [4:20:07<25:51:42, 3.41it/s] 15%|█▍ | 54382/371472 [4:20:07<25:18:44, 3.48it/s] 15%|█▍ | 54383/371472 [4:20:07<24:52:22, 3.54it/s] 15%|█▍ | 54384/371472 [4:20:08<24:14:22, 3.63it/s] 15%|█▍ | 54385/371472 [4:20:08<24:07:10, 3.65it/s] 15%|█▍ | 54386/371472 [4:20:08<23:46:00, 3.71it/s] 15%|█▍ | 54387/371472 [4:20:08<24:15:59, 3.63it/s] 15%|█▍ | 54388/371472 [4:20:09<24:54:21, 3.54it/s] 15%|█▍ | 54389/371472 [4:20:09<24:57:47, 3.53it/s] 15%|█▍ | 54390/371472 [4:20:09<25:29:11, 3.46it/s] 15%|█▍ | 54391/371472 [4:20:10<24:46:29, 3.56it/s] 15%|█▍ | 54392/371472 [4:20:10<23:31:30, 3.74it/s] 15%|█▍ | 54393/371472 [4:20:10<23:21:11, 3.77it/s] 15%|█▍ | 54394/371472 [4:20:10<24:50:52, 3.54it/s] 15%|█▍ | 54395/371472 [4:20:11<25:21:58, 3.47it/s] 15%|█▍ | 54396/371472 [4:20:11<24:43:44, 3.56it/s] 15%|█▍ | 54397/371472 [4:20:11<23:54:31, 3.68it/s] 15%|█▍ | 54398/371472 [4:20:12<26:37:23, 3.31it/s] 15%|█▍ | 54399/371472 [4:20:12<25:28:08, 3.46it/s] 15%|█▍ | 54400/371472 [4:20:12<26:13:26, 3.36it/s] {'loss': 4.2246, 'learning_rate': 8.686138464521968e-07, 'epoch': 2.34} + 15%|█▍ | 54400/371472 [4:20:12<26:13:26, 3.36it/s] 15%|█▍ | 54401/371472 [4:20:12<25:10:26, 3.50it/s] 15%|█▍ | 54402/371472 [4:20:13<23:39:41, 3.72it/s] 15%|█▍ | 54403/371472 [4:20:13<24:15:06, 3.63it/s] 15%|█▍ | 54404/371472 [4:20:13<23:50:34, 3.69it/s] 15%|█▍ | 54405/371472 [4:20:13<23:24:53, 3.76it/s] 15%|█▍ | 54406/371472 [4:20:14<23:13:35, 3.79it/s] 15%|█▍ | 54407/371472 [4:20:14<24:09:25, 3.65it/s] 15%|█▍ | 54408/371472 [4:20:14<23:11:15, 3.80it/s] 15%|█▍ | 54409/371472 [4:20:15<25:40:54, 3.43it/s] 15%|█▍ | 54410/371472 [4:20:15<24:25:29, 3.61it/s] 15%|█▍ | 54411/371472 [4:20:15<24:37:25, 3.58it/s] 15%|█▍ | 54412/371472 [4:20:15<25:43:22, 3.42it/s] 15%|█▍ | 54413/371472 [4:20:16<24:24:55, 3.61it/s] 15%|█▍ | 54414/371472 [4:20:16<24:15:36, 3.63it/s] 15%|█▍ | 54415/371472 [4:20:16<23:12:37, 3.79it/s] 15%|█▍ | 54416/371472 [4:20:16<22:41:29, 3.88it/s] 15%|█▍ | 54417/371472 [4:20:17<25:40:33, 3.43it/s] 15%|█▍ | 54418/371472 [4:20:17<27:52:10, 3.16it/s] 15%|█▍ | 54419/371472 [4:20:17<26:24:17, 3.34it/s] 15%|█▍ | 54420/371472 [4:20:18<25:07:01, 3.51it/s] {'loss': 4.4738, 'learning_rate': 8.685653644767178e-07, 'epoch': 2.34} + 15%|█▍ | 54420/371472 [4:20:18<25:07:01, 3.51it/s] 15%|█▍ | 54421/371472 [4:20:18<27:48:41, 3.17it/s] 15%|█▍ | 54422/371472 [4:20:18<29:07:09, 3.02it/s] 15%|█▍ | 54423/371472 [4:20:19<29:23:39, 3.00it/s] 15%|█▍ | 54424/371472 [4:20:19<29:21:31, 3.00it/s] 15%|█▍ | 54425/371472 [4:20:19<28:31:05, 3.09it/s] 15%|█▍ | 54426/371472 [4:20:20<28:03:40, 3.14it/s] 15%|█▍ | 54427/371472 [4:20:20<26:14:54, 3.36it/s] 15%|█▍ | 54428/371472 [4:20:20<25:04:15, 3.51it/s] 15%|█▍ | 54429/371472 [4:20:21<26:03:41, 3.38it/s] 15%|█▍ | 54430/371472 [4:20:21<24:45:26, 3.56it/s] 15%|█▍ | 54431/371472 [4:20:21<24:06:14, 3.65it/s] 15%|█▍ | 54432/371472 [4:20:21<23:32:01, 3.74it/s] 15%|█▍ | 54433/371472 [4:20:22<23:21:52, 3.77it/s] 15%|█▍ | 54434/371472 [4:20:22<23:54:36, 3.68it/s] 15%|█▍ | 54435/371472 [4:20:22<23:56:50, 3.68it/s] 15%|█▍ | 54436/371472 [4:20:22<24:35:23, 3.58it/s] 15%|█▍ | 54437/371472 [4:20:23<24:04:37, 3.66it/s] 15%|█▍ | 54438/371472 [4:20:23<24:07:35, 3.65it/s] 15%|█▍ | 54439/371472 [4:20:23<23:28:51, 3.75it/s] 15%|█▍ | 54440/371472 [4:20:24<23:41:26, 3.72it/s] {'loss': 4.1206, 'learning_rate': 8.68516882501239e-07, 'epoch': 2.34} + 15%|█▍ | 54440/371472 [4:20:24<23:41:26, 3.72it/s] 15%|█▍ | 54441/371472 [4:20:24<23:10:28, 3.80it/s] 15%|█▍ | 54442/371472 [4:20:24<24:07:50, 3.65it/s] 15%|█▍ | 54443/371472 [4:20:24<25:35:10, 3.44it/s] 15%|█▍ | 54444/371472 [4:20:25<25:17:33, 3.48it/s] 15%|█▍ | 54445/371472 [4:20:25<24:22:28, 3.61it/s] 15%|█▍ | 54446/371472 [4:20:25<24:03:36, 3.66it/s] 15%|█▍ | 54447/371472 [4:20:25<24:16:50, 3.63it/s] 15%|█▍ | 54448/371472 [4:20:26<25:26:10, 3.46it/s] 15%|█▍ | 54449/371472 [4:20:26<26:37:58, 3.31it/s] 15%|█▍ | 54450/371472 [4:20:26<24:58:02, 3.53it/s] 15%|█▍ | 54451/371472 [4:20:27<24:36:06, 3.58it/s] 15%|█▍ | 54452/371472 [4:20:27<24:06:09, 3.65it/s] 15%|█▍ | 54453/371472 [4:20:27<24:17:31, 3.63it/s] 15%|█▍ | 54454/371472 [4:20:28<25:34:15, 3.44it/s] 15%|█▍ | 54455/371472 [4:20:28<24:34:54, 3.58it/s] 15%|█▍ | 54456/371472 [4:20:28<23:37:03, 3.73it/s] 15%|█▍ | 54457/371472 [4:20:28<24:42:40, 3.56it/s] 15%|█▍ | 54458/371472 [4:20:29<23:43:26, 3.71it/s] 15%|█▍ | 54459/371472 [4:20:29<23:07:20, 3.81it/s] 15%|█▍ | 54460/371472 [4:20:29<24:24:07, 3.61it/s] {'loss': 4.3221, 'learning_rate': 8.684684005257601e-07, 'epoch': 2.35} + 15%|█▍ | 54460/371472 [4:20:29<24:24:07, 3.61it/s] 15%|█▍ | 54461/371472 [4:20:29<23:36:44, 3.73it/s] 15%|█▍ | 54462/371472 [4:20:30<24:04:36, 3.66it/s] 15%|█▍ | 54463/371472 [4:20:30<23:40:13, 3.72it/s] 15%|█▍ | 54464/371472 [4:20:30<25:18:39, 3.48it/s] 15%|█▍ | 54465/371472 [4:20:31<28:46:22, 3.06it/s] 15%|█▍ | 54466/371472 [4:20:31<27:22:50, 3.22it/s] 15%|█▍ | 54467/371472 [4:20:31<26:01:54, 3.38it/s] 15%|█▍ | 54468/371472 [4:20:31<24:44:35, 3.56it/s] 15%|█▍ | 54469/371472 [4:20:32<26:14:43, 3.36it/s] 15%|█▍ | 54470/371472 [4:20:32<24:41:06, 3.57it/s] 15%|█▍ | 54471/371472 [4:20:32<24:46:53, 3.55it/s] 15%|█▍ | 54472/371472 [4:20:33<24:35:05, 3.58it/s] 15%|█▍ | 54473/371472 [4:20:33<23:37:31, 3.73it/s] 15%|█▍ | 54474/371472 [4:20:33<23:48:01, 3.70it/s] 15%|█▍ | 54475/371472 [4:20:33<27:19:46, 3.22it/s] 15%|█▍ | 54476/371472 [4:20:34<26:27:27, 3.33it/s] 15%|█▍ | 54477/371472 [4:20:34<25:28:15, 3.46it/s] 15%|█▍ | 54478/371472 [4:20:34<24:35:55, 3.58it/s] 15%|█▍ | 54479/371472 [4:20:35<23:43:43, 3.71it/s] 15%|█▍ | 54480/371472 [4:20:35<24:10:19, 3.64it/s] {'loss': 4.277, 'learning_rate': 8.68419918550281e-07, 'epoch': 2.35} + 15%|█▍ | 54480/371472 [4:20:35<24:10:19, 3.64it/s] 15%|█▍ | 54481/371472 [4:20:35<24:37:56, 3.57it/s] 15%|█▍ | 54482/371472 [4:20:35<24:41:37, 3.57it/s] 15%|█▍ | 54483/371472 [4:20:36<23:30:14, 3.75it/s] 15%|█▍ | 54484/371472 [4:20:36<24:57:59, 3.53it/s] 15%|█▍ | 54485/371472 [4:20:36<28:38:11, 3.07it/s] 15%|█▍ | 54486/371472 [4:20:37<26:58:34, 3.26it/s] 15%|█▍ | 54487/371472 [4:20:37<27:56:49, 3.15it/s] 15%|█▍ | 54488/371472 [4:20:37<29:23:24, 3.00it/s] 15%|█▍ | 54489/371472 [4:20:38<28:12:10, 3.12it/s] 15%|█▍ | 54490/371472 [4:20:38<26:35:41, 3.31it/s] 15%|█▍ | 54491/371472 [4:20:38<26:35:33, 3.31it/s] 15%|█▍ | 54492/371472 [4:20:38<25:51:40, 3.40it/s] 15%|█▍ | 54493/371472 [4:20:39<25:05:30, 3.51it/s] 15%|█▍ | 54494/371472 [4:20:39<24:15:25, 3.63it/s] 15%|█▍ | 54495/371472 [4:20:39<24:02:42, 3.66it/s] 15%|█▍ | 54496/371472 [4:20:40<24:24:14, 3.61it/s] 15%|█▍ | 54497/371472 [4:20:40<24:16:54, 3.63it/s] 15%|█▍ | 54498/371472 [4:20:40<25:10:11, 3.50it/s] 15%|█▍ | 54499/371472 [4:20:40<25:13:57, 3.49it/s] 15%|█▍ | 54500/371472 [4:20:41<26:45:56, 3.29it/s] {'loss': 4.3785, 'learning_rate': 8.683714365748022e-07, 'epoch': 2.35} + 15%|█▍ | 54500/371472 [4:20:41<26:45:56, 3.29it/s] 15%|█▍ | 54501/371472 [4:20:41<26:14:02, 3.36it/s] 15%|█▍ | 54502/371472 [4:20:41<25:16:46, 3.48it/s] 15%|█▍ | 54503/371472 [4:20:42<24:53:13, 3.54it/s] 15%|█▍ | 54504/371472 [4:20:42<25:15:21, 3.49it/s] 15%|█▍ | 54505/371472 [4:20:42<25:24:00, 3.47it/s] 15%|█▍ | 54506/371472 [4:20:42<25:29:57, 3.45it/s] 15%|█▍ | 54507/371472 [4:20:43<25:38:54, 3.43it/s] 15%|█▍ | 54508/371472 [4:20:43<26:31:33, 3.32it/s] 15%|█▍ | 54509/371472 [4:20:43<26:14:58, 3.35it/s] 15%|█▍ | 54510/371472 [4:20:44<25:42:39, 3.42it/s] 15%|█▍ | 54511/371472 [4:20:44<25:09:19, 3.50it/s] 15%|█▍ | 54512/371472 [4:20:44<23:55:25, 3.68it/s] 15%|█▍ | 54513/371472 [4:20:44<25:04:15, 3.51it/s] 15%|█▍ | 54514/371472 [4:20:45<24:50:45, 3.54it/s] 15%|█▍ | 54515/371472 [4:20:45<24:40:14, 3.57it/s] 15%|█▍ | 54516/371472 [4:20:45<23:49:24, 3.70it/s] 15%|█▍ | 54517/371472 [4:20:46<23:12:38, 3.79it/s] 15%|█▍ | 54518/371472 [4:20:46<23:16:44, 3.78it/s] 15%|█▍ | 54519/371472 [4:20:46<23:01:31, 3.82it/s] 15%|█▍ | 54520/371472 [4:20:46<22:28:19, 3.92it/s] {'loss': 4.381, 'learning_rate': 8.683229545993234e-07, 'epoch': 2.35} + 15%|█▍ | 54520/371472 [4:20:46<22:28:19, 3.92it/s] 15%|█▍ | 54521/371472 [4:20:47<22:40:21, 3.88it/s] 15%|█▍ | 54522/371472 [4:20:47<22:25:40, 3.93it/s] 15%|█▍ | 54523/371472 [4:20:47<22:15:11, 3.96it/s] 15%|█▍ | 54524/371472 [4:20:47<22:10:14, 3.97it/s] 15%|█▍ | 54525/371472 [4:20:48<24:57:24, 3.53it/s] 15%|█▍ | 54526/371472 [4:20:48<25:03:34, 3.51it/s] 15%|█▍ | 54527/371472 [4:20:48<25:25:14, 3.46it/s] 15%|█▍ | 54528/371472 [4:20:49<25:21:25, 3.47it/s] 15%|█▍ | 54529/371472 [4:20:49<24:21:01, 3.62it/s] 15%|█▍ | 54530/371472 [4:20:49<23:48:00, 3.70it/s] 15%|█▍ | 54531/371472 [4:20:49<23:36:32, 3.73it/s] 15%|█▍ | 54532/371472 [4:20:50<24:09:37, 3.64it/s] 15%|█▍ | 54533/371472 [4:20:50<25:01:44, 3.52it/s] 15%|█▍ | 54534/371472 [4:20:50<25:08:39, 3.50it/s] 15%|█▍ | 54535/371472 [4:20:50<25:18:40, 3.48it/s] 15%|█▍ | 54536/371472 [4:20:51<24:45:25, 3.56it/s] 15%|█▍ | 54537/371472 [4:20:51<24:08:13, 3.65it/s] 15%|█▍ | 54538/371472 [4:20:51<23:51:01, 3.69it/s] 15%|█▍ | 54539/371472 [4:20:52<23:43:53, 3.71it/s] 15%|█▍ | 54540/371472 [4:20:52<23:53:48, 3.68it/s] {'loss': 4.4255, 'learning_rate': 8.682744726238445e-07, 'epoch': 2.35} + 15%|█▍ | 54540/371472 [4:20:52<23:53:48, 3.68it/s] 15%|█▍ | 54541/371472 [4:20:52<23:35:39, 3.73it/s] 15%|█▍ | 54542/371472 [4:20:52<23:28:57, 3.75it/s] 15%|█▍ | 54543/371472 [4:20:53<23:38:12, 3.72it/s] 15%|█▍ | 54544/371472 [4:20:53<23:19:43, 3.77it/s] 15%|█▍ | 54545/371472 [4:20:53<24:11:01, 3.64it/s] 15%|█▍ | 54546/371472 [4:20:53<24:11:44, 3.64it/s] 15%|█▍ | 54547/371472 [4:20:54<24:24:30, 3.61it/s] 15%|█▍ | 54548/371472 [4:20:54<24:46:40, 3.55it/s] 15%|█▍ | 54549/371472 [4:20:54<23:47:21, 3.70it/s] 15%|█▍ | 54550/371472 [4:20:55<23:50:56, 3.69it/s] 15%|█▍ | 54551/371472 [4:20:55<24:00:04, 3.67it/s] 15%|█▍ | 54552/371472 [4:20:55<24:10:52, 3.64it/s] 15%|█▍ | 54553/371472 [4:20:55<24:04:06, 3.66it/s] 15%|█▍ | 54554/371472 [4:20:56<23:53:05, 3.69it/s] 15%|█▍ | 54555/371472 [4:20:56<24:13:34, 3.63it/s] 15%|█▍ | 54556/371472 [4:20:56<24:58:18, 3.53it/s] 15%|█▍ | 54557/371472 [4:20:56<24:22:18, 3.61it/s] 15%|█▍ | 54558/371472 [4:20:57<23:26:30, 3.76it/s] 15%|█▍ | 54559/371472 [4:20:57<25:10:14, 3.50it/s] 15%|█▍ | 54560/371472 [4:20:57<25:16:11, 3.48it/s] {'loss': 4.1823, 'learning_rate': 8.682259906483656e-07, 'epoch': 2.35} + 15%|█▍ | 54560/371472 [4:20:57<25:16:11, 3.48it/s] 15%|█▍ | 54561/371472 [4:20:58<24:30:59, 3.59it/s] 15%|█▍ | 54562/371472 [4:20:58<24:18:31, 3.62it/s] 15%|█▍ | 54563/371472 [4:20:58<27:03:59, 3.25it/s] 15%|█▍ | 54564/371472 [4:20:59<26:19:25, 3.34it/s] 15%|█▍ | 54565/371472 [4:20:59<27:34:58, 3.19it/s] 15%|█▍ | 54566/371472 [4:20:59<25:52:27, 3.40it/s] 15%|█▍ | 54567/371472 [4:20:59<26:30:28, 3.32it/s] 15%|█▍ | 54568/371472 [4:21:00<25:21:30, 3.47it/s] 15%|█▍ | 54569/371472 [4:21:00<24:26:42, 3.60it/s] 15%|█▍ | 54570/371472 [4:21:00<23:39:11, 3.72it/s] 15%|█▍ | 54571/371472 [4:21:00<23:00:50, 3.82it/s] 15%|█▍ | 54572/371472 [4:21:01<24:07:35, 3.65it/s] 15%|█▍ | 54573/371472 [4:21:01<23:08:49, 3.80it/s] 15%|█▍ | 54574/371472 [4:21:01<23:20:39, 3.77it/s] 15%|█▍ | 54575/371472 [4:21:01<23:02:00, 3.82it/s] 15%|█▍ | 54576/371472 [4:21:02<23:33:13, 3.74it/s] 15%|█▍ | 54577/371472 [4:21:02<26:15:31, 3.35it/s] 15%|█▍ | 54578/371472 [4:21:02<26:06:17, 3.37it/s] 15%|█▍ | 54579/371472 [4:21:03<25:47:22, 3.41it/s] 15%|█▍ | 54580/371472 [4:21:03<25:17:28, 3.48it/s] {'loss': 4.3709, 'learning_rate': 8.681775086728867e-07, 'epoch': 2.35} + 15%|█▍ | 54580/371472 [4:21:03<25:17:28, 3.48it/s] 15%|█�� | 54581/371472 [4:21:03<25:18:23, 3.48it/s] 15%|█▍ | 54582/371472 [4:21:04<27:10:20, 3.24it/s] 15%|█▍ | 54583/371472 [4:21:04<27:51:21, 3.16it/s] 15%|█▍ | 54584/371472 [4:21:04<26:32:37, 3.32it/s] 15%|█▍ | 54585/371472 [4:21:04<25:16:22, 3.48it/s] 15%|█▍ | 54586/371472 [4:21:05<24:19:14, 3.62it/s] 15%|█▍ | 54587/371472 [4:21:05<25:39:44, 3.43it/s] 15%|█▍ | 54588/371472 [4:21:05<24:51:23, 3.54it/s] 15%|█▍ | 54589/371472 [4:21:06<24:36:07, 3.58it/s] 15%|█▍ | 54590/371472 [4:21:06<24:36:14, 3.58it/s] 15%|█▍ | 54591/371472 [4:21:06<24:48:18, 3.55it/s] 15%|█▍ | 54592/371472 [4:21:06<24:23:06, 3.61it/s] 15%|█▍ | 54593/371472 [4:21:07<23:53:04, 3.69it/s] 15%|█▍ | 54594/371472 [4:21:07<23:01:52, 3.82it/s] 15%|█▍ | 54595/371472 [4:21:07<23:33:20, 3.74it/s] 15%|█▍ | 54596/371472 [4:21:07<23:10:49, 3.80it/s] 15%|█▍ | 54597/371472 [4:21:08<23:29:00, 3.75it/s] 15%|█▍ | 54598/371472 [4:21:08<23:11:51, 3.79it/s] 15%|█▍ | 54599/371472 [4:21:08<23:40:56, 3.72it/s] 15%|█▍ | 54600/371472 [4:21:09<24:24:33, 3.61it/s] {'loss': 4.1439, 'learning_rate': 8.681290266974078e-07, 'epoch': 2.35} + 15%|█▍ | 54600/371472 [4:21:09<24:24:33, 3.61it/s] 15%|█▍ | 54601/371472 [4:21:09<23:34:18, 3.73it/s] 15%|█▍ | 54602/371472 [4:21:09<23:56:10, 3.68it/s] 15%|█▍ | 54603/371472 [4:21:09<23:44:01, 3.71it/s] 15%|█▍ | 54604/371472 [4:21:10<26:02:49, 3.38it/s] 15%|█▍ | 54605/371472 [4:21:10<24:48:04, 3.55it/s] 15%|█▍ | 54606/371472 [4:21:10<25:10:26, 3.50it/s] 15%|█▍ | 54607/371472 [4:21:11<24:23:10, 3.61it/s] 15%|█▍ | 54608/371472 [4:21:11<24:35:08, 3.58it/s] 15%|█▍ | 54609/371472 [4:21:11<23:56:33, 3.68it/s] 15%|█▍ | 54610/371472 [4:21:11<23:03:59, 3.82it/s] 15%|█▍ | 54611/371472 [4:21:12<22:56:18, 3.84it/s] 15%|█▍ | 54612/371472 [4:21:12<23:02:28, 3.82it/s] 15%|█▍ | 54613/371472 [4:21:12<25:09:26, 3.50it/s] 15%|█▍ | 54614/371472 [4:21:12<24:42:00, 3.56it/s] 15%|█▍ | 54615/371472 [4:21:13<25:15:59, 3.48it/s] 15%|█▍ | 54616/371472 [4:21:13<23:52:11, 3.69it/s] 15%|█▍ | 54617/371472 [4:21:13<25:48:44, 3.41it/s] 15%|█▍ | 54618/371472 [4:21:14<25:55:21, 3.40it/s] 15%|█▍ | 54619/371472 [4:21:14<25:44:24, 3.42it/s] 15%|█▍ | 54620/371472 [4:21:14<24:34:55, 3.58it/s] {'loss': 4.4158, 'learning_rate': 8.680805447219289e-07, 'epoch': 2.35} + 15%|█▍ | 54620/371472 [4:21:14<24:34:55, 3.58it/s] 15%|█▍ | 54621/371472 [4:21:14<25:31:26, 3.45it/s] 15%|█▍ | 54622/371472 [4:21:15<25:22:08, 3.47it/s] 15%|█▍ | 54623/371472 [4:21:15<25:56:04, 3.39it/s] 15%|█▍ | 54624/371472 [4:21:15<26:11:59, 3.36it/s] 15%|█▍ | 54625/371472 [4:21:16<25:24:28, 3.46it/s] 15%|█▍ | 54626/371472 [4:21:16<24:51:42, 3.54it/s] 15%|█▍ | 54627/371472 [4:21:16<24:07:36, 3.65it/s] 15%|█▍ | 54628/371472 [4:21:16<23:35:37, 3.73it/s] 15%|█▍ | 54629/371472 [4:21:17<25:10:24, 3.50it/s] 15%|█▍ | 54630/371472 [4:21:17<24:30:03, 3.59it/s] 15%|█▍ | 54631/371472 [4:21:17<23:57:19, 3.67it/s] 15%|█▍ | 54632/371472 [4:21:18<23:55:33, 3.68it/s] 15%|█▍ | 54633/371472 [4:21:18<23:06:28, 3.81it/s] 15%|█▍ | 54634/371472 [4:21:18<23:30:36, 3.74it/s] 15%|█▍ | 54635/371472 [4:21:18<26:21:59, 3.34it/s] 15%|█▍ | 54636/371472 [4:21:19<26:23:30, 3.33it/s] 15%|█▍ | 54637/371472 [4:21:19<25:30:55, 3.45it/s] 15%|█▍ | 54638/371472 [4:21:19<26:20:24, 3.34it/s] 15%|█▍ | 54639/371472 [4:21:20<25:27:41, 3.46it/s] 15%|█▍ | 54640/371472 [4:21:20<24:41:14, 3.56it/s] {'loss': 4.2438, 'learning_rate': 8.680320627464499e-07, 'epoch': 2.35} + 15%|█▍ | 54640/371472 [4:21:20<24:41:14, 3.56it/s] 15%|█▍ | 54641/371472 [4:21:20<24:42:11, 3.56it/s] 15%|█▍ | 54642/371472 [4:21:20<24:18:59, 3.62it/s] 15%|█▍ | 54643/371472 [4:21:21<23:51:00, 3.69it/s] 15%|█▍ | 54644/371472 [4:21:21<23:41:45, 3.71it/s] 15%|█▍ | 54645/371472 [4:21:21<23:01:07, 3.82it/s] 15%|█▍ | 54646/371472 [4:21:21<23:16:03, 3.78it/s] 15%|█▍ | 54647/371472 [4:21:22<26:02:12, 3.38it/s] 15%|█▍ | 54648/371472 [4:21:22<25:15:16, 3.48it/s] 15%|█▍ | 54649/371472 [4:21:22<24:19:52, 3.62it/s] 15%|█▍ | 54650/371472 [4:21:23<24:16:29, 3.63it/s] 15%|█▍ | 54651/371472 [4:21:23<24:08:09, 3.65it/s] 15%|█▍ | 54652/371472 [4:21:23<24:22:32, 3.61it/s] 15%|█▍ | 54653/371472 [4:21:23<24:37:22, 3.57it/s] 15%|█▍ | 54654/371472 [4:21:24<24:21:30, 3.61it/s] 15%|█▍ | 54655/371472 [4:21:24<25:37:36, 3.43it/s] 15%|█▍ | 54656/371472 [4:21:24<28:13:00, 3.12it/s] 15%|█▍ | 54657/371472 [4:21:25<26:14:41, 3.35it/s] 15%|█▍ | 54658/371472 [4:21:25<26:21:08, 3.34it/s] 15%|█▍ | 54659/371472 [4:21:25<27:12:54, 3.23it/s] 15%|█▍ | 54660/371472 [4:21:26<26:54:57, 3.27it/s] {'loss': 4.1635, 'learning_rate': 8.679835807709711e-07, 'epoch': 2.35} + 15%|█▍ | 54660/371472 [4:21:26<26:54:57, 3.27it/s] 15%|█▍ | 54661/371472 [4:21:26<26:50:43, 3.28it/s] 15%|█▍ | 54662/371472 [4:21:26<27:22:48, 3.21it/s] 15%|█▍ | 54663/371472 [4:21:26<26:18:51, 3.34it/s] 15%|█▍ | 54664/371472 [4:21:27<26:03:22, 3.38it/s] 15%|█▍ | 54665/371472 [4:21:27<25:38:55, 3.43it/s] 15%|█▍ | 54666/371472 [4:21:27<25:10:32, 3.50it/s] 15%|█▍ | 54667/371472 [4:21:28<24:44:15, 3.56it/s] 15%|█▍ | 54668/371472 [4:21:28<25:25:07, 3.46it/s] 15%|█▍ | 54669/371472 [4:21:28<26:02:57, 3.38it/s] 15%|█▍ | 54670/371472 [4:21:29<26:01:46, 3.38it/s] 15%|█▍ | 54671/371472 [4:21:29<26:13:05, 3.36it/s] 15%|█▍ | 54672/371472 [4:21:29<27:42:20, 3.18it/s] 15%|█▍ | 54673/371472 [4:21:29<26:38:07, 3.30it/s] 15%|█▍ | 54674/371472 [4:21:30<25:42:57, 3.42it/s] 15%|█▍ | 54675/371472 [4:21:30<24:48:23, 3.55it/s] 15%|█▍ | 54676/371472 [4:21:30<24:09:12, 3.64it/s] 15%|█▍ | 54677/371472 [4:21:31<26:05:35, 3.37it/s] 15%|█▍ | 54678/371472 [4:21:31<25:58:41, 3.39it/s] 15%|█▍ | 54679/371472 [4:21:31<25:13:53, 3.49it/s] 15%|█▍ | 54680/371472 [4:21:31<26:26:33, 3.33it/s] {'loss': 4.1807, 'learning_rate': 8.679350987954923e-07, 'epoch': 2.36} + 15%|█▍ | 54680/371472 [4:21:31<26:26:33, 3.33it/s] 15%|█▍ | 54681/371472 [4:21:32<25:03:29, 3.51it/s] 15%|█▍ | 54682/371472 [4:21:32<23:47:59, 3.70it/s] 15%|█▍ | 54683/371472 [4:21:32<24:11:12, 3.64it/s] 15%|█▍ | 54684/371472 [4:21:32<23:46:39, 3.70it/s] 15%|█▍ | 54685/371472 [4:21:33<23:33:17, 3.74it/s] 15%|█▍ | 54686/371472 [4:21:33<22:57:17, 3.83it/s] 15%|█▍ | 54687/371472 [4:21:33<23:51:03, 3.69it/s] 15%|█▍ | 54688/371472 [4:21:34<26:36:24, 3.31it/s] 15%|█▍ | 54689/371472 [4:21:34<25:50:27, 3.41it/s] 15%|█▍ | 54690/371472 [4:21:34<26:17:16, 3.35it/s] 15%|█▍ | 54691/371472 [4:21:35<27:49:41, 3.16it/s] 15%|█▍ | 54692/371472 [4:21:35<26:01:40, 3.38it/s] 15%|█▍ | 54693/371472 [4:21:35<25:22:30, 3.47it/s] 15%|█▍ | 54694/371472 [4:21:35<26:34:14, 3.31it/s] 15%|█▍ | 54695/371472 [4:21:36<26:47:26, 3.28it/s] 15%|█▍ | 54696/371472 [4:21:36<27:59:12, 3.14it/s] 15%|█▍ | 54697/371472 [4:21:36<26:59:14, 3.26it/s] 15%|█▍ | 54698/371472 [4:21:37<26:14:57, 3.35it/s] 15%|█▍ | 54699/371472 [4:21:37<25:21:51, 3.47it/s] 15%|█▍ | 54700/371472 [4:21:37<25:39:11, 3.43it/s] {'loss': 4.311, 'learning_rate': 8.678866168200134e-07, 'epoch': 2.36} + 15%|█▍ | 54700/371472 [4:21:37<25:39:11, 3.43it/s] 15%|█▍ | 54701/371472 [4:21:38<26:04:08, 3.38it/s] 15%|█▍ | 54702/371472 [4:21:38<25:47:03, 3.41it/s] 15%|█▍ | 54703/371472 [4:21:38<27:31:56, 3.20it/s] 15%|█▍ | 54704/371472 [4:21:38<26:13:43, 3.35it/s] 15%|█▍ | 54705/371472 [4:21:39<25:35:00, 3.44it/s] 15%|█▍ | 54706/371472 [4:21:39<26:08:51, 3.37it/s] 15%|█▍ | 54707/371472 [4:21:39<25:48:13, 3.41it/s] 15%|█▍ | 54708/371472 [4:21:40<26:01:23, 3.38it/s] 15%|█▍ | 54709/371472 [4:21:40<24:53:17, 3.54it/s] 15%|█▍ | 54710/371472 [4:21:40<24:19:23, 3.62it/s] 15%|█▍ | 54711/371472 [4:21:40<23:48:14, 3.70it/s] 15%|█▍ | 54712/371472 [4:21:41<23:25:32, 3.76it/s] 15%|█▍ | 54713/371472 [4:21:41<22:56:18, 3.84it/s] 15%|█▍ | 54714/371472 [4:21:41<24:03:50, 3.66it/s] 15%|█▍ | 54715/371472 [4:21:42<24:53:39, 3.53it/s] 15%|█▍ | 54716/371472 [4:21:42<23:50:22, 3.69it/s] 15%|█▍ | 54717/371472 [4:21:42<22:43:36, 3.87it/s] 15%|█▍ | 54718/371472 [4:21:42<23:15:00, 3.78it/s] 15%|█▍ | 54719/371472 [4:21:43<23:56:49, 3.67it/s] 15%|█▍ | 54720/371472 [4:21:43<24:33:19, 3.58it/s] {'loss': 4.1099, 'learning_rate': 8.678381348445343e-07, 'epoch': 2.36} + 15%|█▍ | 54720/371472 [4:21:43<24:33:19, 3.58it/s] 15%|█▍ | 54721/371472 [4:21:43<24:42:42, 3.56it/s] 15%|█▍ | 54722/371472 [4:21:43<24:34:58, 3.58it/s] 15%|█▍ | 54723/371472 [4:21:44<23:53:18, 3.68it/s] 15%|█▍ | 54724/371472 [4:21:44<27:11:07, 3.24it/s] 15%|█▍ | 54725/371472 [4:21:44<26:16:58, 3.35it/s] 15%|█▍ | 54726/371472 [4:21:45<26:55:14, 3.27it/s] 15%|█▍ | 54727/371472 [4:21:45<26:09:05, 3.36it/s] 15%|█▍ | 54728/371472 [4:21:45<25:12:00, 3.49it/s] 15%|█▍ | 54729/371472 [4:21:45<23:45:12, 3.70it/s] 15%|█▍ | 54730/371472 [4:21:46<23:21:38, 3.77it/s] 15%|█▍ | 54731/371472 [4:21:46<25:31:02, 3.45it/s] 15%|█▍ | 54732/371472 [4:21:46<25:51:52, 3.40it/s] 15%|█▍ | 54733/371472 [4:21:47<25:54:27, 3.40it/s] 15%|█▍ | 54734/371472 [4:21:47<25:03:01, 3.51it/s] 15%|█▍ | 54735/371472 [4:21:47<24:04:55, 3.65it/s] 15%|█▍ | 54736/371472 [4:21:47<24:09:48, 3.64it/s] 15%|█▍ | 54737/371472 [4:21:48<24:34:29, 3.58it/s] 15%|█▍ | 54738/371472 [4:21:48<24:54:00, 3.53it/s] 15%|█▍ | 54739/371472 [4:21:48<25:44:18, 3.42it/s] 15%|█▍ | 54740/371472 [4:21:49<25:37:59, 3.43it/s] {'loss': 4.1695, 'learning_rate': 8.677896528690555e-07, 'epoch': 2.36} + 15%|█▍ | 54740/371472 [4:21:49<25:37:59, 3.43it/s] 15%|█▍ | 54741/371472 [4:21:49<24:45:23, 3.55it/s] 15%|█▍ | 54742/371472 [4:21:49<25:33:43, 3.44it/s] 15%|█▍ | 54743/371472 [4:21:50<28:46:18, 3.06it/s] 15%|█▍ | 54744/371472 [4:21:50<27:41:11, 3.18it/s] 15%|█▍ | 54745/371472 [4:21:50<26:25:32, 3.33it/s] 15%|█▍ | 54746/371472 [4:21:50<25:38:11, 3.43it/s] 15%|█▍ | 54747/371472 [4:21:51<24:41:06, 3.56it/s] 15%|█▍ | 54748/371472 [4:21:51<25:03:09, 3.51it/s] 15%|█▍ | 54749/371472 [4:21:51<24:06:21, 3.65it/s] 15%|█▍ | 54750/371472 [4:21:52<24:32:31, 3.58it/s] 15%|█▍ | 54751/371472 [4:21:52<25:02:27, 3.51it/s] 15%|█▍ | 54752/371472 [4:21:52<24:55:32, 3.53it/s] 15%|█▍ | 54753/371472 [4:21:52<24:13:56, 3.63it/s] 15%|█▍ | 54754/371472 [4:21:53<23:35:07, 3.73it/s] 15%|█▍ | 54755/371472 [4:21:53<23:34:01, 3.73it/s] 15%|█▍ | 54756/371472 [4:21:53<23:29:46, 3.74it/s] 15%|█▍ | 54757/371472 [4:21:53<24:29:26, 3.59it/s] 15%|█▍ | 54758/371472 [4:21:54<24:05:11, 3.65it/s] 15%|█▍ | 54759/371472 [4:21:54<25:16:11, 3.48it/s] 15%|█▍ | 54760/371472 [4:21:54<23:58:05, 3.67it/s] {'loss': 4.2379, 'learning_rate': 8.677411708935766e-07, 'epoch': 2.36} + 15%|█▍ | 54760/371472 [4:21:54<23:58:05, 3.67it/s] 15%|█▍ | 54761/371472 [4:21:55<23:36:04, 3.73it/s] 15%|█▍ | 54762/371472 [4:21:55<23:22:22, 3.76it/s] 15%|█▍ | 54763/371472 [4:21:55<22:45:20, 3.87it/s] 15%|█▍ | 54764/371472 [4:21:55<24:01:27, 3.66it/s] 15%|█▍ | 54765/371472 [4:21:56<23:20:56, 3.77it/s] 15%|█▍ | 54766/371472 [4:21:56<24:22:28, 3.61it/s] 15%|█▍ | 54767/371472 [4:21:56<24:57:21, 3.53it/s] 15%|█▍ | 54768/371472 [4:21:56<26:00:00, 3.38it/s] 15%|█▍ | 54769/371472 [4:21:57<25:27:36, 3.46it/s] 15%|█▍ | 54770/371472 [4:21:57<24:07:11, 3.65it/s] 15%|█▍ | 54771/371472 [4:21:57<23:39:58, 3.72it/s] 15%|█▍ | 54772/371472 [4:21:58<24:06:09, 3.65it/s] 15%|█▍ | 54773/371472 [4:21:58<23:30:19, 3.74it/s] 15%|█▍ | 54774/371472 [4:21:58<23:57:06, 3.67it/s] 15%|█▍ | 54775/371472 [4:21:58<24:43:16, 3.56it/s] 15%|█▍ | 54776/371472 [4:21:59<24:17:20, 3.62it/s] 15%|█▍ | 54777/371472 [4:21:59<23:24:33, 3.76it/s] 15%|█▍ | 54778/371472 [4:21:59<25:01:44, 3.51it/s] 15%|█▍ | 54779/371472 [4:22:00<25:30:55, 3.45it/s] 15%|█▍ | 54780/371472 [4:22:00<24:22:00, 3.61it/s] {'loss': 4.2587, 'learning_rate': 8.676926889180978e-07, 'epoch': 2.36} + 15%|█▍ | 54780/371472 [4:22:00<24:22:00, 3.61it/s] 15%|█▍ | 54781/371472 [4:22:00<25:23:21, 3.46it/s] 15%|█▍ | 54782/371472 [4:22:00<24:01:49, 3.66it/s] 15%|█▍ | 54783/371472 [4:22:01<24:26:25, 3.60it/s] 15%|█▍ | 54784/371472 [4:22:01<23:28:31, 3.75it/s] 15%|█▍ | 54785/371472 [4:22:01<25:23:13, 3.47it/s] 15%|█▍ | 54786/371472 [4:22:01<24:39:30, 3.57it/s] 15%|█▍ | 54787/371472 [4:22:02<24:08:31, 3.64it/s] 15%|█▍ | 54788/371472 [4:22:02<23:02:20, 3.82it/s] 15%|█▍ | 54789/371472 [4:22:02<24:07:58, 3.65it/s] 15%|█▍ | 54790/371472 [4:22:02<23:39:17, 3.72it/s] 15%|█▍ | 54791/371472 [4:22:03<23:12:22, 3.79it/s] 15%|█▍ | 54792/371472 [4:22:03<24:01:12, 3.66it/s] 15%|█▍ | 54793/371472 [4:22:03<24:05:45, 3.65it/s] 15%|█▍ | 54794/371472 [4:22:04<26:11:18, 3.36it/s] 15%|█▍ | 54795/371472 [4:22:04<25:32:43, 3.44it/s] 15%|█▍ | 54796/371472 [4:22:04<26:02:23, 3.38it/s] 15%|█▍ | 54797/371472 [4:22:05<25:59:54, 3.38it/s] 15%|█▍ | 54798/371472 [4:22:05<25:12:41, 3.49it/s] 15%|█▍ | 54799/371472 [4:22:05<27:00:31, 3.26it/s] 15%|█▍ | 54800/371472 [4:22:05<25:20:31, 3.47it/s] {'loss': 4.1083, 'learning_rate': 8.676442069426188e-07, 'epoch': 2.36} + 15%|█▍ | 54800/371472 [4:22:05<25:20:31, 3.47it/s] 15%|█▍ | 54801/371472 [4:22:06<24:39:02, 3.57it/s] 15%|█▍ | 54802/371472 [4:22:06<24:03:56, 3.66it/s] 15%|█▍ | 54803/371472 [4:22:06<24:45:03, 3.55it/s] 15%|█▍ | 54804/371472 [4:22:07<25:56:28, 3.39it/s] 15%|█▍ | 54805/371472 [4:22:07<25:50:28, 3.40it/s] 15%|█▍ | 54806/371472 [4:22:07<26:23:34, 3.33it/s] 15%|█▍ | 54807/371472 [4:22:07<25:08:00, 3.50it/s] 15%|█▍ | 54808/371472 [4:22:08<24:37:48, 3.57it/s] 15%|█▍ | 54809/371472 [4:22:08<24:56:39, 3.53it/s] 15%|█▍ | 54810/371472 [4:22:08<24:40:22, 3.57it/s] 15%|█▍ | 54811/371472 [4:22:09<25:30:04, 3.45it/s] 15%|█▍ | 54812/371472 [4:22:09<26:22:15, 3.34it/s] 15%|█▍ | 54813/371472 [4:22:09<25:01:40, 3.51it/s] 15%|█▍ | 54814/371472 [4:22:09<25:59:53, 3.38it/s] 15%|█▍ | 54815/371472 [4:22:10<25:04:37, 3.51it/s] 15%|█▍ | 54816/371472 [4:22:10<25:03:32, 3.51it/s] 15%|█▍ | 54817/371472 [4:22:10<26:14:52, 3.35it/s] 15%|█▍ | 54818/371472 [4:22:11<25:38:07, 3.43it/s] 15%|█▍ | 54819/371472 [4:22:11<24:40:13, 3.57it/s] 15%|█▍ | 54820/371472 [4:22:11<24:09:35, 3.64it/s] {'loss': 4.0596, 'learning_rate': 8.675957249671401e-07, 'epoch': 2.36} + 15%|█▍ | 54820/371472 [4:22:11<24:09:35, 3.64it/s] 15%|█▍ | 54821/371472 [4:22:11<23:40:21, 3.72it/s] 15%|█▍ | 54822/371472 [4:22:12<23:23:25, 3.76it/s] 15%|█▍ | 54823/371472 [4:22:12<23:29:38, 3.74it/s] 15%|█▍ | 54824/371472 [4:22:12<22:56:37, 3.83it/s] 15%|█▍ | 54825/371472 [4:22:13<25:21:10, 3.47it/s] 15%|█▍ | 54826/371472 [4:22:13<24:16:46, 3.62it/s] 15%|█▍ | 54827/371472 [4:22:13<23:10:41, 3.79it/s] 15%|█▍ | 54828/371472 [4:22:13<23:44:19, 3.71it/s] 15%|█▍ | 54829/371472 [4:22:14<23:51:36, 3.69it/s] 15%|█▍ | 54830/371472 [4:22:14<24:16:24, 3.62it/s] 15%|█▍ | 54831/371472 [4:22:14<23:58:52, 3.67it/s] 15%|█▍ | 54832/371472 [4:22:14<24:17:17, 3.62it/s] 15%|█▍ | 54833/371472 [4:22:15<24:41:12, 3.56it/s] 15%|█▍ | 54834/371472 [4:22:15<24:27:00, 3.60it/s] 15%|█▍ | 54835/371472 [4:22:15<25:42:29, 3.42it/s] 15%|█▍ | 54836/371472 [4:22:16<26:29:45, 3.32it/s] 15%|█▍ | 54837/371472 [4:22:16<28:05:25, 3.13it/s] 15%|█▍ | 54838/371472 [4:22:16<27:30:50, 3.20it/s] 15%|█▍ | 54839/371472 [4:22:17<26:04:47, 3.37it/s] 15%|█▍ | 54840/371472 [4:22:17<25:19:53, 3.47it/s] {'loss': 3.8836, 'learning_rate': 8.675472429916611e-07, 'epoch': 2.36} + 15%|█▍ | 54840/371472 [4:22:17<25:19:53, 3.47it/s] 15%|█▍ | 54841/371472 [4:22:17<25:39:22, 3.43it/s] 15%|█▍ | 54842/371472 [4:22:17<25:50:57, 3.40it/s] 15%|█▍ | 54843/371472 [4:22:18<25:10:40, 3.49it/s] 15%|█▍ | 54844/371472 [4:22:18<26:02:16, 3.38it/s] 15%|█▍ | 54845/371472 [4:22:18<26:02:44, 3.38it/s] 15%|█▍ | 54846/371472 [4:22:19<25:43:22, 3.42it/s] 15%|█▍ | 54847/371472 [4:22:19<24:24:17, 3.60it/s] 15%|█▍ | 54848/371472 [4:22:19<25:14:39, 3.48it/s] 15%|█▍ | 54849/371472 [4:22:19<26:36:39, 3.31it/s] 15%|█▍ | 54850/371472 [4:22:20<26:31:45, 3.32it/s] 15%|█▍ | 54851/371472 [4:22:20<27:29:42, 3.20it/s] 15%|█▍ | 54852/371472 [4:22:20<26:28:39, 3.32it/s] 15%|█▍ | 54853/371472 [4:22:21<25:44:51, 3.42it/s] 15%|█▍ | 54854/371472 [4:22:21<27:40:55, 3.18it/s] 15%|█▍ | 54855/371472 [4:22:21<26:14:27, 3.35it/s] 15%|█▍ | 54856/371472 [4:22:22<25:19:58, 3.47it/s] 15%|█▍ | 54857/371472 [4:22:22<24:26:56, 3.60it/s] 15%|█▍ | 54858/371472 [4:22:22<24:04:19, 3.65it/s] 15%|█▍ | 54859/371472 [4:22:22<24:06:39, 3.65it/s] 15%|█▍ | 54860/371472 [4:22:23<23:29:57, 3.74it/s] {'loss': 4.306, 'learning_rate': 8.674987610161822e-07, 'epoch': 2.36} + 15%|█▍ | 54860/371472 [4:22:23<23:29:57, 3.74it/s] 15%|█▍ | 54861/371472 [4:22:23<23:53:47, 3.68it/s] 15%|█▍ | 54862/371472 [4:22:23<24:07:18, 3.65it/s] 15%|█▍ | 54863/371472 [4:22:23<24:15:57, 3.62it/s] 15%|█▍ | 54864/371472 [4:22:24<23:13:00, 3.79it/s] 15%|█▍ | 54865/371472 [4:22:24<24:08:35, 3.64it/s] 15%|█▍ | 54866/371472 [4:22:24<23:24:57, 3.76it/s] 15%|█▍ | 54867/371472 [4:22:24<23:32:14, 3.74it/s] 15%|█▍ | 54868/371472 [4:22:25<24:27:15, 3.60it/s] 15%|█▍ | 54869/371472 [4:22:25<24:44:18, 3.56it/s] 15%|█▍ | 54870/371472 [4:22:25<25:34:34, 3.44it/s] 15%|█▍ | 54871/371472 [4:22:26<24:56:02, 3.53it/s] 15%|█▍ | 54872/371472 [4:22:26<25:01:38, 3.51it/s] 15%|█▍ | 54873/371472 [4:22:26<25:07:49, 3.50it/s] 15%|█▍ | 54874/371472 [4:22:27<25:59:23, 3.38it/s] 15%|█▍ | 54875/371472 [4:22:27<24:34:56, 3.58it/s] 15%|█▍ | 54876/371472 [4:22:27<24:20:13, 3.61it/s] 15%|█▍ | 54877/371472 [4:22:27<26:30:20, 3.32it/s] 15%|█▍ | 54878/371472 [4:22:28<25:20:33, 3.47it/s] 15%|█▍ | 54879/371472 [4:22:28<27:03:21, 3.25it/s] 15%|█▍ | 54880/371472 [4:22:28<26:46:10, 3.29it/s] {'loss': 4.215, 'learning_rate': 8.674502790407032e-07, 'epoch': 2.36} + 15%|█▍ | 54880/371472 [4:22:28<26:46:10, 3.29it/s] 15%|█▍ | 54881/371472 [4:22:29<26:08:38, 3.36it/s] 15%|█▍ | 54882/371472 [4:22:29<28:09:53, 3.12it/s] 15%|█▍ | 54883/371472 [4:22:29<27:08:27, 3.24it/s] 15%|█▍ | 54884/371472 [4:22:29<25:45:47, 3.41it/s] 15%|█▍ | 54885/371472 [4:22:30<26:02:40, 3.38it/s] 15%|█▍ | 54886/371472 [4:22:30<24:54:36, 3.53it/s] 15%|█▍ | 54887/371472 [4:22:30<24:47:09, 3.55it/s] 15%|█▍ | 54888/371472 [4:22:31<24:20:53, 3.61it/s] 15%|█▍ | 54889/371472 [4:22:31<25:56:06, 3.39it/s] 15%|█▍ | 54890/371472 [4:22:31<25:06:20, 3.50it/s] 15%|█▍ | 54891/371472 [4:22:31<25:00:24, 3.52it/s] 15%|█▍ | 54892/371472 [4:22:32<24:10:19, 3.64it/s] 15%|█▍ | 54893/371472 [4:22:32<23:45:43, 3.70it/s] 15%|█▍ | 54894/371472 [4:22:32<23:39:33, 3.72it/s] 15%|█▍ | 54895/371472 [4:22:33<26:49:36, 3.28it/s] 15%|█▍ | 54896/371472 [4:22:33<25:35:45, 3.44it/s] 15%|█▍ | 54897/371472 [4:22:33<24:26:24, 3.60it/s] 15%|█▍ | 54898/371472 [4:22:33<23:39:03, 3.72it/s] 15%|█▍ | 54899/371472 [4:22:34<23:54:07, 3.68it/s] 15%|█▍ | 54900/371472 [4:22:34<23:24:58, 3.76it/s] {'loss': 4.0287, 'learning_rate': 8.674017970652244e-07, 'epoch': 2.36} + 15%|█▍ | 54900/371472 [4:22:34<23:24:58, 3.76it/s] 15%|█▍ | 54901/371472 [4:22:34<24:00:20, 3.66it/s] 15%|█▍ | 54902/371472 [4:22:34<23:28:32, 3.75it/s] 15%|█▍ | 54903/371472 [4:22:35<24:08:03, 3.64it/s] 15%|█▍ | 54904/371472 [4:22:35<27:38:00, 3.18it/s] 15%|█▍ | 54905/371472 [4:22:35<26:40:00, 3.30it/s] 15%|█▍ | 54906/371472 [4:22:36<26:52:29, 3.27it/s] 15%|█▍ | 54907/371472 [4:22:36<25:13:52, 3.49it/s] 15%|█▍ | 54908/371472 [4:22:36<24:54:33, 3.53it/s] 15%|█▍ | 54909/371472 [4:22:37<26:35:56, 3.31it/s] 15%|█▍ | 54910/371472 [4:22:37<26:03:21, 3.37it/s] 15%|█▍ | 54911/371472 [4:22:37<25:19:23, 3.47it/s] 15%|█▍ | 54912/371472 [4:22:37<24:42:24, 3.56it/s] 15%|█▍ | 54913/371472 [4:22:38<24:15:15, 3.63it/s] 15%|█▍ | 54914/371472 [4:22:38<26:07:20, 3.37it/s] 15%|█▍ | 54915/371472 [4:22:38<26:18:30, 3.34it/s] 15%|█▍ | 54916/371472 [4:22:39<26:01:18, 3.38it/s] 15%|█▍ | 54917/371472 [4:22:39<24:35:37, 3.58it/s] 15%|█▍ | 54918/371472 [4:22:39<24:22:54, 3.61it/s] 15%|█▍ | 54919/371472 [4:22:39<24:12:39, 3.63it/s] 15%|█▍ | 54920/371472 [4:22:40<23:08:14, 3.80it/s] {'loss': 4.4671, 'learning_rate': 8.673533150897455e-07, 'epoch': 2.37} + 15%|█▍ | 54920/371472 [4:22:40<23:08:14, 3.80it/s] 15%|█▍ | 54921/371472 [4:22:40<24:42:43, 3.56it/s] 15%|█▍ | 54922/371472 [4:22:40<24:00:12, 3.66it/s] 15%|█▍ | 54923/371472 [4:22:41<23:59:46, 3.66it/s] 15%|█▍ | 54924/371472 [4:22:41<23:31:37, 3.74it/s] 15%|█▍ | 54925/371472 [4:22:41<23:56:54, 3.67it/s] 15%|█▍ | 54926/371472 [4:22:41<24:31:32, 3.59it/s] 15%|█▍ | 54927/371472 [4:22:42<24:29:42, 3.59it/s] 15%|█▍ | 54928/371472 [4:22:42<24:16:53, 3.62it/s] 15%|█▍ | 54929/371472 [4:22:42<23:43:29, 3.71it/s] 15%|█▍ | 54930/371472 [4:22:42<23:31:38, 3.74it/s] 15%|█▍ | 54931/371472 [4:22:43<24:17:20, 3.62it/s] 15%|█▍ | 54932/371472 [4:22:43<25:45:21, 3.41it/s] 15%|█▍ | 54933/371472 [4:22:43<24:58:10, 3.52it/s] 15%|█▍ | 54934/371472 [4:22:44<24:47:15, 3.55it/s] 15%|█▍ | 54935/371472 [4:22:44<24:43:30, 3.56it/s] 15%|█▍ | 54936/371472 [4:22:44<23:51:45, 3.68it/s] 15%|█▍ | 54937/371472 [4:22:44<23:26:31, 3.75it/s] 15%|█▍ | 54938/371472 [4:22:45<23:31:52, 3.74it/s] 15%|█▍ | 54939/371472 [4:22:45<23:27:16, 3.75it/s] 15%|█▍ | 54940/371472 [4:22:45<22:39:59, 3.88it/s] {'loss': 4.3428, 'learning_rate': 8.673048331142666e-07, 'epoch': 2.37} + 15%|█▍ | 54940/371472 [4:22:45<22:39:59, 3.88it/s] 15%|█▍ | 54941/371472 [4:22:45<24:11:41, 3.63it/s] 15%|█▍ | 54942/371472 [4:22:46<24:01:26, 3.66it/s] 15%|█▍ | 54943/371472 [4:22:46<24:16:22, 3.62it/s] 15%|█▍ | 54944/371472 [4:22:46<24:19:39, 3.61it/s] 15%|█▍ | 54945/371472 [4:22:47<23:50:01, 3.69it/s] 15%|█▍ | 54946/371472 [4:22:47<23:56:26, 3.67it/s] 15%|█▍ | 54947/371472 [4:22:47<23:22:44, 3.76it/s] 15%|█▍ | 54948/371472 [4:22:47<24:00:19, 3.66it/s] 15%|█▍ | 54949/371472 [4:22:48<24:16:42, 3.62it/s] 15%|█▍ | 54950/371472 [4:22:48<24:48:43, 3.54it/s] 15%|█▍ | 54951/371472 [4:22:48<25:40:55, 3.42it/s] 15%|█▍ | 54952/371472 [4:22:49<25:24:07, 3.46it/s] 15%|█▍ | 54953/371472 [4:22:49<25:03:16, 3.51it/s] 15%|█▍ | 54954/371472 [4:22:49<24:30:03, 3.59it/s] 15%|█▍ | 54955/371472 [4:22:49<28:24:04, 3.10it/s] 15%|█▍ | 54956/371472 [4:22:50<30:28:14, 2.89it/s] 15%|█▍ | 54957/371472 [4:22:50<29:29:02, 2.98it/s] 15%|█▍ | 54958/371472 [4:22:51<28:45:03, 3.06it/s] 15%|█▍ | 54959/371472 [4:22:51<27:29:18, 3.20it/s] 15%|█▍ | 54960/371472 [4:22:51<26:37:48, 3.30it/s] {'loss': 4.0316, 'learning_rate': 8.672563511387877e-07, 'epoch': 2.37} + 15%|█▍ | 54960/371472 [4:22:51<26:37:48, 3.30it/s] 15%|█▍ | 54961/371472 [4:22:51<25:36:32, 3.43it/s] 15%|█▍ | 54962/371472 [4:22:52<26:18:31, 3.34it/s] 15%|█▍ | 54963/371472 [4:22:52<25:58:35, 3.38it/s] 15%|█▍ | 54964/371472 [4:22:52<25:25:16, 3.46it/s] 15%|█▍ | 54965/371472 [4:22:52<24:09:52, 3.64it/s] 15%|█▍ | 54966/371472 [4:22:53<24:02:13, 3.66it/s] 15%|█▍ | 54967/371472 [4:22:53<23:10:43, 3.79it/s] 15%|█▍ | 54968/371472 [4:22:53<24:28:26, 3.59it/s] 15%|█▍ | 54969/371472 [4:22:54<25:08:45, 3.50it/s] 15%|█▍ | 54970/371472 [4:22:54<27:02:58, 3.25it/s] 15%|█▍ | 54971/371472 [4:22:54<25:05:24, 3.50it/s] 15%|█▍ | 54972/371472 [4:22:54<25:05:31, 3.50it/s] 15%|█▍ | 54973/371472 [4:22:55<24:13:54, 3.63it/s] 15%|█▍ | 54974/371472 [4:22:55<23:03:21, 3.81it/s] 15%|█▍ | 54975/371472 [4:22:55<23:18:01, 3.77it/s] 15%|█▍ | 54976/371472 [4:22:55<22:59:34, 3.82it/s] 15%|█▍ | 54977/371472 [4:22:56<23:28:23, 3.75it/s] 15%|█▍ | 54978/371472 [4:22:56<26:40:14, 3.30it/s] 15%|█▍ | 54979/371472 [4:22:56<26:06:50, 3.37it/s] 15%|█▍ | 54980/371472 [4:22:57<24:48:17, 3.54it/s] {'loss': 4.2033, 'learning_rate': 8.672078691633088e-07, 'epoch': 2.37} + 15%|█▍ | 54980/371472 [4:22:57<24:48:17, 3.54it/s] 15%|█▍ | 54981/371472 [4:22:57<24:31:35, 3.58it/s] 15%|█▍ | 54982/371472 [4:22:57<23:21:59, 3.76it/s] 15%|█▍ | 54983/371472 [4:22:57<23:25:57, 3.75it/s] 15%|█▍ | 54984/371472 [4:22:58<25:22:49, 3.46it/s] 15%|█▍ | 54985/371472 [4:22:58<24:59:31, 3.52it/s] 15%|█▍ | 54986/371472 [4:22:58<25:06:58, 3.50it/s] 15%|█▍ | 54987/371472 [4:22:59<24:03:58, 3.65it/s] 15%|█▍ | 54988/371472 [4:22:59<23:54:23, 3.68it/s] 15%|█▍ | 54989/371472 [4:22:59<24:28:04, 3.59it/s] 15%|█▍ | 54990/371472 [4:22:59<25:35:20, 3.44it/s] 15%|█▍ | 54991/371472 [4:23:00<27:02:41, 3.25it/s] 15%|█▍ | 54992/371472 [4:23:00<25:47:44, 3.41it/s] 15%|█▍ | 54993/371472 [4:23:00<24:37:22, 3.57it/s] 15%|█▍ | 54994/371472 [4:23:01<23:41:56, 3.71it/s] 15%|█▍ | 54995/371472 [4:23:01<25:22:46, 3.46it/s] 15%|█▍ | 54996/371472 [4:23:01<24:40:21, 3.56it/s] 15%|█▍ | 54997/371472 [4:23:01<23:53:01, 3.68it/s] 15%|█▍ | 54998/371472 [4:23:02<24:48:34, 3.54it/s] 15%|█▍ | 54999/371472 [4:23:02<24:34:40, 3.58it/s] 15%|█▍ | 55000/371472 [4:23:02<25:21:33, 3.47it/s] {'loss': 4.3097, 'learning_rate': 8.671593871878299e-07, 'epoch': 2.37} + 15%|█▍ | 55000/371472 [4:23:02<25:21:33, 3.47it/s] 15%|█▍ | 55001/371472 [4:23:03<24:47:53, 3.54it/s] 15%|█▍ | 55002/371472 [4:23:03<23:43:10, 3.71it/s] 15%|█▍ | 55003/371472 [4:23:03<24:17:26, 3.62it/s] 15%|█▍ | 55004/371472 [4:23:03<24:19:28, 3.61it/s] 15%|█▍ | 55005/371472 [4:23:04<25:12:29, 3.49it/s] 15%|█▍ | 55006/371472 [4:23:04<24:43:45, 3.55it/s] 15%|█▍ | 55007/371472 [4:23:04<24:03:42, 3.65it/s] 15%|█▍ | 55008/371472 [4:23:05<24:58:21, 3.52it/s] 15%|█▍ | 55009/371472 [4:23:05<24:15:08, 3.62it/s] 15%|█▍ | 55010/371472 [4:23:05<24:06:05, 3.65it/s] 15%|█▍ | 55011/371472 [4:23:05<23:46:54, 3.70it/s] 15%|█▍ | 55012/371472 [4:23:06<23:50:12, 3.69it/s] 15%|█▍ | 55013/371472 [4:23:06<25:34:33, 3.44it/s] 15%|█▍ | 55014/371472 [4:23:06<24:29:37, 3.59it/s] 15%|█▍ | 55015/371472 [4:23:06<24:27:17, 3.59it/s] 15%|█▍ | 55016/371472 [4:23:07<24:34:47, 3.58it/s] 15%|█▍ | 55017/371472 [4:23:07<24:07:31, 3.64it/s] 15%|█▍ | 55018/371472 [4:23:07<24:26:31, 3.60it/s] 15%|█▍ | 55019/371472 [4:23:08<24:36:38, 3.57it/s] 15%|█▍ | 55020/371472 [4:23:08<26:36:37, 3.30it/s] {'loss': 4.1758, 'learning_rate': 8.671109052123509e-07, 'epoch': 2.37} + 15%|█▍ | 55020/371472 [4:23:08<26:36:37, 3.30it/s] 15%|█▍ | 55021/371472 [4:23:08<25:45:43, 3.41it/s] 15%|█▍ | 55022/371472 [4:23:08<24:42:17, 3.56it/s] 15%|█▍ | 55023/371472 [4:23:09<23:35:51, 3.73it/s] 15%|█▍ | 55024/371472 [4:23:09<24:23:56, 3.60it/s] 15%|█▍ | 55025/371472 [4:23:09<24:13:18, 3.63it/s] 15%|█▍ | 55026/371472 [4:23:10<24:24:43, 3.60it/s] 15%|█▍ | 55027/371472 [4:23:10<25:29:04, 3.45it/s] 15%|█▍ | 55028/371472 [4:23:10<27:18:09, 3.22it/s] 15%|█▍ | 55029/371472 [4:23:10<26:05:20, 3.37it/s] 15%|█▍ | 55030/371472 [4:23:11<25:08:58, 3.50it/s] 15%|█▍ | 55031/371472 [4:23:11<24:37:03, 3.57it/s] 15%|█▍ | 55032/371472 [4:23:11<24:08:09, 3.64it/s] 15%|█▍ | 55033/371472 [4:23:12<24:46:36, 3.55it/s] 15%|█▍ | 55034/371472 [4:23:12<24:54:05, 3.53it/s] 15%|█▍ | 55035/371472 [4:23:12<25:28:45, 3.45it/s] 15%|█▍ | 55036/371472 [4:23:12<25:24:30, 3.46it/s] 15%|█▍ | 55037/371472 [4:23:13<25:35:36, 3.43it/s] 15%|█▍ | 55038/371472 [4:23:13<25:19:04, 3.47it/s] 15%|█▍ | 55039/371472 [4:23:13<25:38:58, 3.43it/s] 15%|█▍ | 55040/371472 [4:23:14<25:04:52, 3.50it/s] {'loss': 4.5582, 'learning_rate': 8.670624232368721e-07, 'epoch': 2.37} + 15%|█▍ | 55040/371472 [4:23:14<25:04:52, 3.50it/s] 15%|█▍ | 55041/371472 [4:23:14<24:11:45, 3.63it/s] 15%|█▍ | 55042/371472 [4:23:14<22:55:25, 3.83it/s] 15%|█▍ | 55043/371472 [4:23:14<22:38:25, 3.88it/s] 15%|█▍ | 55044/371472 [4:23:15<23:10:49, 3.79it/s] 15%|█▍ | 55045/371472 [4:23:15<23:47:34, 3.69it/s] 15%|█▍ | 55046/371472 [4:23:15<24:10:20, 3.64it/s] 15%|█▍ | 55047/371472 [4:23:15<24:02:44, 3.66it/s] 15%|█▍ | 55048/371472 [4:23:16<26:21:52, 3.33it/s] 15%|█▍ | 55049/371472 [4:23:16<24:37:13, 3.57it/s] 15%|█▍ | 55050/371472 [4:23:16<24:15:42, 3.62it/s] 15%|█▍ | 55051/371472 [4:23:17<24:18:58, 3.61it/s] 15%|█▍ | 55052/371472 [4:23:17<23:59:57, 3.66it/s] 15%|█▍ | 55053/371472 [4:23:17<25:04:53, 3.50it/s] 15%|█▍ | 55054/371472 [4:23:17<25:21:24, 3.47it/s] 15%|█▍ | 55055/371472 [4:23:18<25:18:49, 3.47it/s] 15%|█▍ | 55056/371472 [4:23:18<24:23:10, 3.60it/s] 15%|█▍ | 55057/371472 [4:23:18<24:28:19, 3.59it/s] 15%|█▍ | 55058/371472 [4:23:19<23:57:36, 3.67it/s] 15%|█▍ | 55059/371472 [4:23:19<25:11:38, 3.49it/s] 15%|█▍ | 55060/371472 [4:23:19<25:11:59, 3.49it/s] {'loss': 4.3608, 'learning_rate': 8.670139412613933e-07, 'epoch': 2.37} + 15%|█▍ | 55060/371472 [4:23:19<25:11:59, 3.49it/s] 15%|█▍ | 55061/371472 [4:23:19<26:54:01, 3.27it/s] 15%|█▍ | 55062/371472 [4:23:20<25:41:16, 3.42it/s] 15%|█▍ | 55063/371472 [4:23:20<30:26:56, 2.89it/s] 15%|█▍ | 55064/371472 [4:23:21<29:12:19, 3.01it/s] 15%|█▍ | 55065/371472 [4:23:21<27:57:18, 3.14it/s] 15%|█▍ | 55066/371472 [4:23:21<27:15:42, 3.22it/s] 15%|█▍ | 55067/371472 [4:23:21<25:54:41, 3.39it/s] 15%|█▍ | 55068/371472 [4:23:22<26:33:44, 3.31it/s] 15%|█▍ | 55069/371472 [4:23:22<26:18:15, 3.34it/s] 15%|█▍ | 55070/371472 [4:23:22<25:45:19, 3.41it/s] 15%|█▍ | 55071/371472 [4:23:23<26:44:43, 3.29it/s] 15%|█▍ | 55072/371472 [4:23:23<26:34:08, 3.31it/s] 15%|█▍ | 55073/371472 [4:23:23<26:21:52, 3.33it/s] 15%|█▍ | 55074/371472 [4:23:23<26:21:11, 3.34it/s] 15%|█▍ | 55075/371472 [4:23:24<25:24:38, 3.46it/s] 15%|█▍ | 55076/371472 [4:23:24<24:24:46, 3.60it/s] 15%|█▍ | 55077/371472 [4:23:24<24:01:42, 3.66it/s] 15%|█▍ | 55078/371472 [4:23:25<24:38:04, 3.57it/s] 15%|█▍ | 55079/371472 [4:23:25<25:05:23, 3.50it/s] 15%|█▍ | 55080/371472 [4:23:25<24:25:26, 3.60it/s] {'loss': 4.3383, 'learning_rate': 8.669654592859144e-07, 'epoch': 2.37} + 15%|█▍ | 55080/371472 [4:23:25<24:25:26, 3.60it/s] 15%|█▍ | 55081/371472 [4:23:25<23:49:01, 3.69it/s] 15%|█▍ | 55082/371472 [4:23:26<24:08:21, 3.64it/s] 15%|█▍ | 55083/371472 [4:23:26<24:54:48, 3.53it/s] 15%|█▍ | 55084/371472 [4:23:26<27:24:53, 3.21it/s] 15%|█▍ | 55085/371472 [4:23:27<27:32:43, 3.19it/s] 15%|█▍ | 55086/371472 [4:23:27<26:08:15, 3.36it/s] 15%|█▍ | 55087/371472 [4:23:27<27:05:39, 3.24it/s] 15%|█▍ | 55088/371472 [4:23:28<26:40:30, 3.29it/s] 15%|█▍ | 55089/371472 [4:23:28<26:19:53, 3.34it/s] 15%|█▍ | 55090/371472 [4:23:28<25:34:36, 3.44it/s] 15%|█▍ | 55091/371472 [4:23:28<25:32:17, 3.44it/s] 15%|█▍ | 55092/371472 [4:23:29<25:02:32, 3.51it/s] 15%|█▍ | 55093/371472 [4:23:29<25:06:14, 3.50it/s] 15%|█▍ | 55094/371472 [4:23:29<24:29:56, 3.59it/s] 15%|█▍ | 55095/371472 [4:23:29<23:55:20, 3.67it/s] 15%|█▍ | 55096/371472 [4:23:30<24:20:14, 3.61it/s] 15%|█▍ | 55097/371472 [4:23:30<24:24:35, 3.60it/s] 15%|█▍ | 55098/371472 [4:23:30<24:15:37, 3.62it/s] 15%|█▍ | 55099/371472 [4:23:31<23:21:55, 3.76it/s] 15%|█▍ | 55100/371472 [4:23:31<22:39:07, 3.88it/s] {'loss': 4.1525, 'learning_rate': 8.669169773104354e-07, 'epoch': 2.37} + 15%|█▍ | 55100/371472 [4:23:31<22:39:07, 3.88it/s] 15%|█▍ | 55101/371472 [4:23:31<24:44:11, 3.55it/s] 15%|█▍ | 55102/371472 [4:23:31<26:00:01, 3.38it/s] 15%|█▍ | 55103/371472 [4:23:32<24:40:06, 3.56it/s] 15%|█▍ | 55104/371472 [4:23:32<24:06:56, 3.64it/s] 15%|█▍ | 55105/371472 [4:23:32<23:51:45, 3.68it/s] 15%|█▍ | 55106/371472 [4:23:33<24:24:15, 3.60it/s] 15%|█▍ | 55107/371472 [4:23:33<24:29:52, 3.59it/s] 15%|█▍ | 55108/371472 [4:23:33<24:50:55, 3.54it/s] 15%|█▍ | 55109/371472 [4:23:33<26:11:45, 3.35it/s] 15%|█▍ | 55110/371472 [4:23:34<24:58:47, 3.52it/s] 15%|█▍ | 55111/371472 [4:23:34<25:06:22, 3.50it/s] 15%|█▍ | 55112/371472 [4:23:34<24:13:22, 3.63it/s] 15%|█▍ | 55113/371472 [4:23:35<25:34:57, 3.44it/s] 15%|█▍ | 55114/371472 [4:23:35<25:16:29, 3.48it/s] 15%|█▍ | 55115/371472 [4:23:35<25:25:59, 3.46it/s] 15%|█▍ | 55116/371472 [4:23:35<24:58:35, 3.52it/s] 15%|█▍ | 55117/371472 [4:23:36<25:18:41, 3.47it/s] 15%|█▍ | 55118/371472 [4:23:36<24:58:46, 3.52it/s] 15%|█▍ | 55119/371472 [4:23:36<25:13:00, 3.48it/s] 15%|█▍ | 55120/371472 [4:23:36<24:11:29, 3.63it/s] {'loss': 4.1372, 'learning_rate': 8.668684953349565e-07, 'epoch': 2.37} + 15%|█▍ | 55120/371472 [4:23:36<24:11:29, 3.63it/s] 15%|█▍ | 55121/371472 [4:23:37<23:56:34, 3.67it/s] 15%|█▍ | 55122/371472 [4:23:37<24:18:02, 3.62it/s] 15%|█▍ | 55123/371472 [4:23:37<23:33:37, 3.73it/s] 15%|█▍ | 55124/371472 [4:23:38<24:15:24, 3.62it/s] 15%|█▍ | 55125/371472 [4:23:38<23:29:49, 3.74it/s] 15%|█▍ | 55126/371472 [4:23:38<24:50:10, 3.54it/s] 15%|█▍ | 55127/371472 [4:23:38<24:48:32, 3.54it/s] 15%|█▍ | 55128/371472 [4:23:39<24:31:06, 3.58it/s] 15%|█▍ | 55129/371472 [4:23:39<24:25:29, 3.60it/s] 15%|█▍ | 55130/371472 [4:23:39<23:47:06, 3.69it/s] 15%|█▍ | 55131/371472 [4:23:40<24:17:38, 3.62it/s] 15%|█▍ | 55132/371472 [4:23:40<25:13:05, 3.48it/s] 15%|█▍ | 55133/371472 [4:23:40<24:24:35, 3.60it/s] 15%|█▍ | 55134/371472 [4:23:40<24:01:08, 3.66it/s] 15%|█▍ | 55135/371472 [4:23:41<24:29:40, 3.59it/s] 15%|█▍ | 55136/371472 [4:23:41<24:05:16, 3.65it/s] 15%|█▍ | 55137/371472 [4:23:41<25:01:10, 3.51it/s] 15%|█▍ | 55138/371472 [4:23:42<25:36:50, 3.43it/s] 15%|█▍ | 55139/371472 [4:23:42<25:01:10, 3.51it/s] 15%|█▍ | 55140/371472 [4:23:42<23:38:13, 3.72it/s] {'loss': 4.3853, 'learning_rate': 8.668200133594777e-07, 'epoch': 2.37} + 15%|█▍ | 55140/371472 [4:23:42<23:38:13, 3.72it/s] 15%|█▍ | 55141/371472 [4:23:42<24:15:09, 3.62it/s] 15%|█▍ | 55142/371472 [4:23:43<23:46:12, 3.70it/s] 15%|█▍ | 55143/371472 [4:23:43<22:45:16, 3.86it/s] 15%|█▍ | 55144/371472 [4:23:43<23:19:03, 3.77it/s] 15%|█▍ | 55145/371472 [4:23:43<25:16:03, 3.48it/s] 15%|█▍ | 55146/371472 [4:23:44<25:46:54, 3.41it/s] 15%|█▍ | 55147/371472 [4:23:44<24:50:36, 3.54it/s] 15%|█▍ | 55148/371472 [4:23:44<25:53:09, 3.39it/s] 15%|█▍ | 55149/371472 [4:23:45<25:21:27, 3.47it/s] 15%|█▍ | 55150/371472 [4:23:45<25:46:50, 3.41it/s] 15%|█▍ | 55151/371472 [4:23:45<25:43:51, 3.41it/s] 15%|█▍ | 55152/371472 [4:23:46<27:29:26, 3.20it/s] 15%|█▍ | 55153/371472 [4:23:46<25:46:55, 3.41it/s] 15%|█▍ | 55154/371472 [4:23:46<24:07:45, 3.64it/s] 15%|█▍ | 55155/371472 [4:23:46<25:00:57, 3.51it/s] 15%|█▍ | 55156/371472 [4:23:47<23:50:33, 3.69it/s] 15%|█▍ | 55157/371472 [4:23:47<24:23:03, 3.60it/s] 15%|█▍ | 55158/371472 [4:23:47<24:14:11, 3.63it/s] 15%|█▍ | 55159/371472 [4:23:47<23:45:42, 3.70it/s] 15%|█▍ | 55160/371472 [4:23:48<24:35:49, 3.57it/s] {'loss': 4.1832, 'learning_rate': 8.667715313839988e-07, 'epoch': 2.38} + 15%|█▍ | 55160/371472 [4:23:48<24:35:49, 3.57it/s] 15%|█▍ | 55161/371472 [4:23:48<24:56:52, 3.52it/s] 15%|█▍ | 55162/371472 [4:23:48<25:56:15, 3.39it/s] 15%|█▍ | 55163/371472 [4:23:49<24:54:41, 3.53it/s] 15%|█▍ | 55164/371472 [4:23:49<25:06:26, 3.50it/s] 15%|█▍ | 55165/371472 [4:23:49<24:24:28, 3.60it/s] 15%|█▍ | 55166/371472 [4:23:49<23:15:42, 3.78it/s] 15%|█▍ | 55167/371472 [4:23:50<24:07:33, 3.64it/s] 15%|█▍ | 55168/371472 [4:23:50<24:02:18, 3.66it/s] 15%|█▍ | 55169/371472 [4:23:50<23:26:29, 3.75it/s] 15%|█▍ | 55170/371472 [4:23:50<23:19:34, 3.77it/s] 15%|█▍ | 55171/371472 [4:23:51<22:34:13, 3.89it/s] 15%|█▍ | 55172/371472 [4:23:51<22:55:16, 3.83it/s] 15%|█▍ | 55173/371472 [4:23:51<23:01:19, 3.82it/s] 15%|█▍ | 55174/371472 [4:23:51<23:00:27, 3.82it/s] 15%|█▍ | 55175/371472 [4:23:52<22:57:39, 3.83it/s] 15%|█▍ | 55176/371472 [4:23:52<23:02:54, 3.81it/s] 15%|█▍ | 55177/371472 [4:23:52<22:25:32, 3.92it/s] 15%|█▍ | 55178/371472 [4:23:53<23:56:38, 3.67it/s] 15%|█▍ | 55179/371472 [4:23:53<24:40:49, 3.56it/s] 15%|█▍ | 55180/371472 [4:23:53<23:48:25, 3.69it/s] {'loss': 4.3249, 'learning_rate': 8.667230494085198e-07, 'epoch': 2.38} + 15%|█▍ | 55180/371472 [4:23:53<23:48:25, 3.69it/s] 15%|█▍ | 55181/371472 [4:23:53<23:33:53, 3.73it/s] 15%|█▍ | 55182/371472 [4:23:54<24:04:51, 3.65it/s] 15%|█▍ | 55183/371472 [4:23:54<24:09:02, 3.64it/s] 15%|█▍ | 55184/371472 [4:23:54<24:01:40, 3.66it/s] 15%|█▍ | 55185/371472 [4:23:54<23:24:43, 3.75it/s] 15%|█▍ | 55186/371472 [4:23:55<23:08:00, 3.80it/s] 15%|█▍ | 55187/371472 [4:23:55<23:29:27, 3.74it/s] 15%|█▍ | 55188/371472 [4:23:55<25:24:34, 3.46it/s] 15%|█▍ | 55189/371472 [4:23:56<24:37:53, 3.57it/s] 15%|█▍ | 55190/371472 [4:23:56<23:51:54, 3.68it/s] 15%|█▍ | 55191/371472 [4:23:56<23:45:58, 3.70it/s] 15%|█▍ | 55192/371472 [4:23:56<24:56:43, 3.52it/s] 15%|█▍ | 55193/371472 [4:23:57<25:45:08, 3.41it/s] 15%|█▍ | 55194/371472 [4:23:57<24:35:35, 3.57it/s] 15%|█▍ | 55195/371472 [4:23:57<25:36:33, 3.43it/s] 15%|█▍ | 55196/371472 [4:23:58<24:14:58, 3.62it/s] 15%|█▍ | 55197/371472 [4:23:58<25:41:29, 3.42it/s] 15%|█▍ | 55198/371472 [4:23:58<26:50:16, 3.27it/s] 15%|█▍ | 55199/371472 [4:23:58<25:37:28, 3.43it/s] 15%|█▍ | 55200/371472 [4:23:59<24:39:13, 3.56it/s] {'loss': 4.231, 'learning_rate': 8.66674567433041e-07, 'epoch': 2.38} + 15%|█▍ | 55200/371472 [4:23:59<24:39:13, 3.56it/s] 15%|█▍ | 55201/371472 [4:23:59<24:07:03, 3.64it/s] 15%|█▍ | 55202/371472 [4:23:59<24:32:28, 3.58it/s] 15%|█▍ | 55203/371472 [4:24:00<25:13:27, 3.48it/s] 15%|█▍ | 55204/371472 [4:24:00<25:15:29, 3.48it/s] 15%|█▍ | 55205/371472 [4:24:00<26:22:57, 3.33it/s] 15%|█▍ | 55206/371472 [4:24:00<26:03:06, 3.37it/s] 15%|█▍ | 55207/371472 [4:24:01<26:43:30, 3.29it/s] 15%|█▍ | 55208/371472 [4:24:01<26:16:09, 3.34it/s] 15%|█▍ | 55209/371472 [4:24:01<28:22:31, 3.10it/s] 15%|█▍ | 55210/371472 [4:24:02<26:11:59, 3.35it/s] 15%|█▍ | 55211/371472 [4:24:02<25:23:19, 3.46it/s] 15%|█▍ | 55212/371472 [4:24:02<25:24:53, 3.46it/s] 15%|█▍ | 55213/371472 [4:24:03<24:02:32, 3.65it/s] 15%|█▍ | 55214/371472 [4:24:03<24:33:41, 3.58it/s] 15%|█▍ | 55215/371472 [4:24:03<24:09:33, 3.64it/s] 15%|█▍ | 55216/371472 [4:24:03<24:11:14, 3.63it/s] 15%|█▍ | 55217/371472 [4:24:04<23:29:41, 3.74it/s] 15%|█▍ | 55218/371472 [4:24:04<23:47:08, 3.69it/s] 15%|█▍ | 55219/371472 [4:24:04<23:28:19, 3.74it/s] 15%|█▍ | 55220/371472 [4:24:04<22:35:50, 3.89it/s] {'loss': 4.3996, 'learning_rate': 8.666260854575622e-07, 'epoch': 2.38} + 15%|█▍ | 55220/371472 [4:24:04<22:35:50, 3.89it/s] 15%|█▍ | 55221/371472 [4:24:05<22:02:12, 3.99it/s] 15%|█▍ | 55222/371472 [4:24:05<24:02:34, 3.65it/s] 15%|█▍ | 55223/371472 [4:24:05<23:56:45, 3.67it/s] 15%|█▍ | 55224/371472 [4:24:05<23:57:20, 3.67it/s] 15%|█▍ | 55225/371472 [4:24:06<24:21:55, 3.61it/s] 15%|█▍ | 55226/371472 [4:24:06<24:05:58, 3.65it/s] 15%|█▍ | 55227/371472 [4:24:06<24:59:16, 3.52it/s] 15%|█▍ | 55228/371472 [4:24:07<24:02:21, 3.65it/s] 15%|█▍ | 55229/371472 [4:24:07<24:35:00, 3.57it/s] 15%|█▍ | 55230/371472 [4:24:07<26:12:28, 3.35it/s] 15%|█▍ | 55231/371472 [4:24:07<25:39:11, 3.42it/s] 15%|█▍ | 55232/371472 [4:24:08<25:31:09, 3.44it/s] 15%|█▍ | 55233/371472 [4:24:08<24:55:35, 3.52it/s] 15%|█▍ | 55234/371472 [4:24:08<24:29:30, 3.59it/s] 15%|█▍ | 55235/371472 [4:24:09<26:10:15, 3.36it/s] 15%|█▍ | 55236/371472 [4:24:09<25:34:34, 3.43it/s] 15%|█▍ | 55237/371472 [4:24:09<24:50:26, 3.54it/s] 15%|█▍ | 55238/371472 [4:24:09<24:23:03, 3.60it/s] 15%|█▍ | 55239/371472 [4:24:10<24:54:33, 3.53it/s] 15%|█▍ | 55240/371472 [4:24:10<24:15:02, 3.62it/s] {'loss': 4.3011, 'learning_rate': 8.665776034820831e-07, 'epoch': 2.38} + 15%|█▍ | 55240/371472 [4:24:10<24:15:02, 3.62it/s] 15%|█▍ | 55241/371472 [4:24:10<24:18:24, 3.61it/s] 15%|█▍ | 55242/371472 [4:24:11<26:31:38, 3.31it/s] 15%|█▍ | 55243/371472 [4:24:11<26:31:01, 3.31it/s] 15%|█▍ | 55244/371472 [4:24:11<26:46:13, 3.28it/s] 15%|█▍ | 55245/371472 [4:24:12<25:49:51, 3.40it/s] 15%|█▍ | 55246/371472 [4:24:12<25:04:32, 3.50it/s] 15%|█▍ | 55247/371472 [4:24:12<24:27:51, 3.59it/s] 15%|█▍ | 55248/371472 [4:24:12<23:44:25, 3.70it/s] 15%|█▍ | 55249/371472 [4:24:13<25:12:57, 3.48it/s] 15%|█▍ | 55250/371472 [4:24:13<24:19:50, 3.61it/s] 15%|█▍ | 55251/371472 [4:24:13<25:07:51, 3.50it/s] 15%|█▍ | 55252/371472 [4:24:14<26:05:12, 3.37it/s] 15%|█▍ | 55253/371472 [4:24:14<25:33:05, 3.44it/s] 15%|█▍ | 55254/371472 [4:24:14<26:54:22, 3.26it/s] 15%|█▍ | 55255/371472 [4:24:14<26:10:45, 3.36it/s] 15%|█▍ | 55256/371472 [4:24:15<25:05:39, 3.50it/s] 15%|█▍ | 55257/371472 [4:24:15<25:57:19, 3.38it/s] 15%|█▍ | 55258/371472 [4:24:15<25:04:00, 3.50it/s] 15%|█▍ | 55259/371472 [4:24:15<23:43:15, 3.70it/s] 15%|█▍ | 55260/371472 [4:24:16<23:34:41, 3.73it/s] {'loss': 4.1059, 'learning_rate': 8.665291215066042e-07, 'epoch': 2.38} + 15%|█▍ | 55260/371472 [4:24:16<23:34:41, 3.73it/s] 15%|█▍ | 55261/371472 [4:24:16<23:03:51, 3.81it/s] 15%|█▍ | 55262/371472 [4:24:16<23:25:47, 3.75it/s] 15%|█▍ | 55263/371472 [4:24:17<23:11:39, 3.79it/s] 15%|█▍ | 55264/371472 [4:24:17<24:11:45, 3.63it/s] 15%|█▍ | 55265/371472 [4:24:17<25:43:18, 3.41it/s] 15%|█▍ | 55266/371472 [4:24:18<28:18:43, 3.10it/s] 15%|█▍ | 55267/371472 [4:24:18<26:26:34, 3.32it/s] 15%|█▍ | 55268/371472 [4:24:18<26:10:02, 3.36it/s] 15%|█▍ | 55269/371472 [4:24:18<25:59:53, 3.38it/s] 15%|█▍ | 55270/371472 [4:24:19<25:09:07, 3.49it/s] 15%|█▍ | 55271/371472 [4:24:19<24:35:16, 3.57it/s] 15%|█▍ | 55272/371472 [4:24:19<24:46:33, 3.55it/s] 15%|█▍ | 55273/371472 [4:24:19<24:26:17, 3.59it/s] 15%|█▍ | 55274/371472 [4:24:20<24:06:06, 3.64it/s] 15%|█▍ | 55275/371472 [4:24:20<24:08:16, 3.64it/s] 15%|█▍ | 55276/371472 [4:24:20<26:33:09, 3.31it/s] 15%|█▍ | 55277/371472 [4:24:21<27:12:55, 3.23it/s] 15%|█▍ | 55278/371472 [4:24:21<26:26:46, 3.32it/s] 15%|█▍ | 55279/371472 [4:24:21<25:34:49, 3.43it/s] 15%|█▍ | 55280/371472 [4:24:22<24:22:40, 3.60it/s] {'loss': 4.1404, 'learning_rate': 8.664806395311254e-07, 'epoch': 2.38} + 15%|█▍ | 55280/371472 [4:24:22<24:22:40, 3.60it/s] 15%|█▍ | 55281/371472 [4:24:22<23:52:23, 3.68it/s] 15%|█▍ | 55282/371472 [4:24:22<24:14:50, 3.62it/s] 15%|█▍ | 55283/371472 [4:24:22<23:54:00, 3.67it/s] 15%|█▍ | 55284/371472 [4:24:23<25:12:01, 3.49it/s] 15%|█▍ | 55285/371472 [4:24:23<26:13:03, 3.35it/s] 15%|█▍ | 55286/371472 [4:24:23<24:46:49, 3.54it/s] 15%|█▍ | 55287/371472 [4:24:24<25:22:19, 3.46it/s] 15%|█▍ | 55288/371472 [4:24:24<25:39:43, 3.42it/s] 15%|█▍ | 55289/371472 [4:24:24<26:18:08, 3.34it/s] 15%|█▍ | 55290/371472 [4:24:24<26:12:44, 3.35it/s] 15%|█▍ | 55291/371472 [4:24:25<26:34:49, 3.30it/s] 15%|█▍ | 55292/371472 [4:24:25<25:30:02, 3.44it/s] 15%|█▍ | 55293/371472 [4:24:25<24:29:11, 3.59it/s] 15%|█▍ | 55294/371472 [4:24:26<23:42:25, 3.70it/s] 15%|█▍ | 55295/371472 [4:24:26<23:10:31, 3.79it/s] 15%|█▍ | 55296/371472 [4:24:26<23:16:39, 3.77it/s] 15%|█▍ | 55297/371472 [4:24:26<23:42:51, 3.70it/s] 15%|█▍ | 55298/371472 [4:24:27<23:17:47, 3.77it/s] 15%|█▍ | 55299/371472 [4:24:27<23:05:06, 3.80it/s] 15%|█▍ | 55300/371472 [4:24:27<23:16:47, 3.77it/s] {'loss': 4.2836, 'learning_rate': 8.664321575556465e-07, 'epoch': 2.38} + 15%|█▍ | 55300/371472 [4:24:27<23:16:47, 3.77it/s] 15%|█▍ | 55301/371472 [4:24:27<24:29:37, 3.59it/s] 15%|█▍ | 55302/371472 [4:24:28<25:31:57, 3.44it/s] 15%|█▍ | 55303/371472 [4:24:28<24:09:51, 3.63it/s] 15%|█▍ | 55304/371472 [4:24:28<24:29:28, 3.59it/s] 15%|█▍ | 55305/371472 [4:24:28<23:50:47, 3.68it/s] 15%|█▍ | 55306/371472 [4:24:29<23:32:38, 3.73it/s] 15%|█▍ | 55307/371472 [4:24:29<23:11:23, 3.79it/s] 15%|█▍ | 55308/371472 [4:24:29<23:44:07, 3.70it/s] 15%|█▍ | 55309/371472 [4:24:30<24:09:53, 3.63it/s] 15%|█▍ | 55310/371472 [4:24:30<23:38:25, 3.71it/s] 15%|█▍ | 55311/371472 [4:24:30<23:19:40, 3.76it/s] 15%|█▍ | 55312/371472 [4:24:30<23:33:27, 3.73it/s] 15%|█▍ | 55313/371472 [4:24:31<24:03:24, 3.65it/s] 15%|█▍ | 55314/371472 [4:24:31<24:10:08, 3.63it/s] 15%|█▍ | 55315/371472 [4:24:31<23:43:32, 3.70it/s] 15%|█▍ | 55316/371472 [4:24:31<23:32:39, 3.73it/s] 15%|█▍ | 55317/371472 [4:24:32<23:06:40, 3.80it/s] 15%|█▍ | 55318/371472 [4:24:32<25:13:27, 3.48it/s] 15%|█▍ | 55319/371472 [4:24:32<25:52:25, 3.39it/s] 15%|█▍ | 55320/371472 [4:24:33<27:29:32, 3.19it/s] {'loss': 4.2736, 'learning_rate': 8.663836755801676e-07, 'epoch': 2.38} + 15%|█▍ | 55320/371472 [4:24:33<27:29:32, 3.19it/s] 15%|█▍ | 55321/371472 [4:24:33<26:12:02, 3.35it/s] 15%|█▍ | 55322/371472 [4:24:33<25:23:46, 3.46it/s] 15%|█▍ | 55323/371472 [4:24:34<26:19:57, 3.33it/s] 15%|█▍ | 55324/371472 [4:24:34<24:53:47, 3.53it/s] 15%|█▍ | 55325/371472 [4:24:34<23:27:38, 3.74it/s] 15%|█▍ | 55326/371472 [4:24:34<23:46:32, 3.69it/s] 15%|█▍ | 55327/371472 [4:24:35<23:44:51, 3.70it/s] 15%|█▍ | 55328/371472 [4:24:35<23:13:11, 3.78it/s] 15%|█▍ | 55329/371472 [4:24:35<23:34:02, 3.73it/s] 15%|█▍ | 55330/371472 [4:24:35<23:57:01, 3.67it/s] 15%|█▍ | 55331/371472 [4:24:36<24:00:05, 3.66it/s] 15%|█▍ | 55332/371472 [4:24:36<24:47:50, 3.54it/s] 15%|█▍ | 55333/371472 [4:24:36<24:50:02, 3.54it/s] 15%|█▍ | 55334/371472 [4:24:37<24:22:53, 3.60it/s] 15%|█▍ | 55335/371472 [4:24:37<23:31:55, 3.73it/s] 15%|█▍ | 55336/371472 [4:24:37<22:51:12, 3.84it/s] 15%|█▍ | 55337/371472 [4:24:37<22:57:17, 3.83it/s] 15%|█▍ | 55338/371472 [4:24:38<26:52:21, 3.27it/s] 15%|█▍ | 55339/371472 [4:24:38<25:50:23, 3.40it/s] 15%|█▍ | 55340/371472 [4:24:38<24:58:53, 3.52it/s] {'loss': 4.0531, 'learning_rate': 8.663351936046887e-07, 'epoch': 2.38} + 15%|█▍ | 55340/371472 [4:24:38<24:58:53, 3.52it/s] 15%|█▍ | 55341/371472 [4:24:39<25:54:02, 3.39it/s] 15%|█▍ | 55342/371472 [4:24:39<26:31:07, 3.31it/s] 15%|█▍ | 55343/371472 [4:24:39<25:13:57, 3.48it/s] 15%|█▍ | 55344/371472 [4:24:39<25:30:13, 3.44it/s] 15%|█▍ | 55345/371472 [4:24:40<24:44:55, 3.55it/s] 15%|█▍ | 55346/371472 [4:24:40<24:26:47, 3.59it/s] 15%|█▍ | 55347/371472 [4:24:40<23:53:17, 3.68it/s] 15%|█▍ | 55348/371472 [4:24:41<27:22:06, 3.21it/s] 15%|█▍ | 55349/371472 [4:24:41<25:48:05, 3.40it/s] 15%|█▍ | 55350/371472 [4:24:41<25:06:21, 3.50it/s] 15%|█▍ | 55351/371472 [4:24:41<25:21:37, 3.46it/s] 15%|█▍ | 55352/371472 [4:24:42<25:15:45, 3.48it/s] 15%|█▍ | 55353/371472 [4:24:42<24:28:19, 3.59it/s] 15%|█▍ | 55354/371472 [4:24:42<24:39:31, 3.56it/s] 15%|█▍ | 55355/371472 [4:24:43<23:53:59, 3.67it/s] 15%|█▍ | 55356/371472 [4:24:43<23:20:40, 3.76it/s] 15%|█▍ | 55357/371472 [4:24:43<23:11:43, 3.79it/s] 15%|█▍ | 55358/371472 [4:24:43<23:37:33, 3.72it/s] 15%|█▍ | 55359/371472 [4:24:44<24:41:37, 3.56it/s] 15%|█▍ | 55360/371472 [4:24:44<23:55:53, 3.67it/s] {'loss': 4.2524, 'learning_rate': 8.662867116292098e-07, 'epoch': 2.38} + 15%|█▍ | 55360/371472 [4:24:44<23:55:53, 3.67it/s] 15%|█▍ | 55361/371472 [4:24:44<23:30:53, 3.73it/s] 15%|█▍ | 55362/371472 [4:24:44<23:23:53, 3.75it/s] 15%|█▍ | 55363/371472 [4:24:45<23:16:41, 3.77it/s] 15%|█▍ | 55364/371472 [4:24:45<24:36:56, 3.57it/s] 15%|█▍ | 55365/371472 [4:24:45<23:54:49, 3.67it/s] 15%|█▍ | 55366/371472 [4:24:45<24:01:47, 3.65it/s] 15%|█▍ | 55367/371472 [4:24:46<23:36:05, 3.72it/s] 15%|█▍ | 55368/371472 [4:24:46<24:43:18, 3.55it/s] 15%|█▍ | 55369/371472 [4:24:46<24:08:25, 3.64it/s] 15%|█▍ | 55370/371472 [4:24:47<25:57:27, 3.38it/s] 15%|█▍ | 55371/371472 [4:24:47<25:25:07, 3.45it/s] 15%|█▍ | 55372/371472 [4:24:47<24:41:03, 3.56it/s] 15%|█▍ | 55373/371472 [4:24:47<23:59:11, 3.66it/s] 15%|█▍ | 55374/371472 [4:24:48<24:14:45, 3.62it/s] 15%|█▍ | 55375/371472 [4:24:48<23:39:34, 3.71it/s] 15%|█▍ | 55376/371472 [4:24:48<23:58:08, 3.66it/s] 15%|█▍ | 55377/371472 [4:24:49<24:12:15, 3.63it/s] 15%|█▍ | 55378/371472 [4:24:49<24:00:52, 3.66it/s] 15%|█▍ | 55379/371472 [4:24:49<24:28:19, 3.59it/s] 15%|█▍ | 55380/371472 [4:24:49<24:12:25, 3.63it/s] {'loss': 4.307, 'learning_rate': 8.662382296537309e-07, 'epoch': 2.39} + 15%|█▍ | 55380/371472 [4:24:49<24:12:25, 3.63it/s] 15%|█▍ | 55381/371472 [4:24:50<23:49:26, 3.69it/s] 15%|█▍ | 55382/371472 [4:24:50<25:16:10, 3.47it/s] 15%|█▍ | 55383/371472 [4:24:50<24:32:57, 3.58it/s] 15%|█▍ | 55384/371472 [4:24:50<23:42:21, 3.70it/s] 15%|█▍ | 55385/371472 [4:24:51<22:59:01, 3.82it/s] 15%|█▍ | 55386/371472 [4:24:51<22:31:22, 3.90it/s] 15%|█▍ | 55387/371472 [4:24:51<22:50:57, 3.84it/s] 15%|█▍ | 55388/371472 [4:24:51<22:55:01, 3.83it/s] 15%|█▍ | 55389/371472 [4:24:52<23:16:06, 3.77it/s] 15%|█▍ | 55390/371472 [4:24:52<23:33:15, 3.73it/s] 15%|█▍ | 55391/371472 [4:24:52<24:34:28, 3.57it/s] 15%|█▍ | 55392/371472 [4:24:53<23:43:17, 3.70it/s] 15%|█▍ | 55393/371472 [4:24:53<25:28:55, 3.45it/s] 15%|█▍ | 55394/371472 [4:24:53<29:28:21, 2.98it/s] 15%|█▍ | 55395/371472 [4:24:54<28:32:01, 3.08it/s] 15%|█▍ | 55396/371472 [4:24:54<26:31:47, 3.31it/s] 15%|█▍ | 55397/371472 [4:24:54<26:05:13, 3.37it/s] 15%|█▍ | 55398/371472 [4:24:54<25:18:08, 3.47it/s] 15%|█▍ | 55399/371472 [4:24:55<25:47:04, 3.41it/s] 15%|█▍ | 55400/371472 [4:24:55<24:59:53, 3.51it/s] {'loss': 4.2786, 'learning_rate': 8.66189747678252e-07, 'epoch': 2.39} + 15%|█▍ | 55400/371472 [4:24:55<24:59:53, 3.51it/s] 15%|█▍ | 55401/371472 [4:24:55<25:36:16, 3.43it/s] 15%|█▍ | 55402/371472 [4:24:56<26:21:32, 3.33it/s] 15%|█▍ | 55403/371472 [4:24:56<25:13:22, 3.48it/s] 15%|█▍ | 55404/371472 [4:24:56<26:17:53, 3.34it/s] 15%|█▍ | 55405/371472 [4:24:57<25:54:45, 3.39it/s] 15%|█▍ | 55406/371472 [4:24:57<25:05:58, 3.50it/s] 15%|█▍ | 55407/371472 [4:24:57<25:08:23, 3.49it/s] 15%|█▍ | 55408/371472 [4:24:57<24:42:16, 3.55it/s] 15%|█▍ | 55409/371472 [4:24:58<25:01:24, 3.51it/s] 15%|█▍ | 55410/371472 [4:24:58<25:39:58, 3.42it/s] 15%|█▍ | 55411/371472 [4:24:58<25:22:27, 3.46it/s] 15%|█▍ | 55412/371472 [4:24:59<26:48:46, 3.27it/s] 15%|█▍ | 55413/371472 [4:24:59<24:58:32, 3.52it/s] 15%|█▍ | 55414/371472 [4:24:59<24:24:11, 3.60it/s] 15%|█▍ | 55415/371472 [4:24:59<23:32:35, 3.73it/s] 15%|█▍ | 55416/371472 [4:25:00<25:37:03, 3.43it/s] 15%|█▍ | 55417/371472 [4:25:00<24:39:39, 3.56it/s] 15%|█▍ | 55418/371472 [4:25:00<24:00:34, 3.66it/s] 15%|█▍ | 55419/371472 [4:25:01<24:47:56, 3.54it/s] 15%|█▍ | 55420/371472 [4:25:01<24:45:53, 3.55it/s] {'loss': 3.9889, 'learning_rate': 8.661412657027731e-07, 'epoch': 2.39} + 15%|█▍ | 55420/371472 [4:25:01<24:45:53, 3.55it/s] 15%|█▍ | 55421/371472 [4:25:01<25:18:11, 3.47it/s] 15%|█▍ | 55422/371472 [4:25:01<24:13:44, 3.62it/s] 15%|█▍ | 55423/371472 [4:25:02<24:34:11, 3.57it/s] 15%|█▍ | 55424/371472 [4:25:02<25:12:51, 3.48it/s] 15%|█▍ | 55425/371472 [4:25:02<26:53:15, 3.27it/s] 15%|█▍ | 55426/371472 [4:25:03<26:19:50, 3.33it/s] 15%|█▍ | 55427/371472 [4:25:03<25:23:39, 3.46it/s] 15%|█▍ | 55428/371472 [4:25:03<28:56:18, 3.03it/s] 15%|█▍ | 55429/371472 [4:25:03<26:31:39, 3.31it/s] 15%|█▍ | 55430/371472 [4:25:04<25:10:41, 3.49it/s] 15%|█▍ | 55431/371472 [4:25:04<25:42:18, 3.42it/s] 15%|█▍ | 55432/371472 [4:25:04<24:07:32, 3.64it/s] 15%|█▍ | 55433/371472 [4:25:05<24:58:51, 3.51it/s] 15%|█▍ | 55434/371472 [4:25:05<24:00:35, 3.66it/s] 15%|█▍ | 55435/371472 [4:25:05<26:14:22, 3.35it/s] 15%|█▍ | 55436/371472 [4:25:05<25:34:03, 3.43it/s] 15%|█▍ | 55437/371472 [4:25:06<24:38:34, 3.56it/s] 15%|█▍ | 55438/371472 [4:25:06<24:24:35, 3.60it/s] 15%|█▍ | 55439/371472 [4:25:06<24:33:40, 3.57it/s] 15%|█▍ | 55440/371472 [4:25:07<25:54:26, 3.39it/s] {'loss': 4.381, 'learning_rate': 8.660927837272943e-07, 'epoch': 2.39} + 15%|█▍ | 55440/371472 [4:25:07<25:54:26, 3.39it/s] 15%|█▍ | 55441/371472 [4:25:07<25:36:32, 3.43it/s] 15%|█▍ | 55442/371472 [4:25:07<23:51:59, 3.68it/s] 15%|█▍ | 55443/371472 [4:25:07<23:10:37, 3.79it/s] 15%|█▍ | 55444/371472 [4:25:08<24:02:08, 3.65it/s] 15%|█▍ | 55445/371472 [4:25:08<29:57:09, 2.93it/s] 15%|█▍ | 55446/371472 [4:25:08<27:18:10, 3.22it/s] 15%|█▍ | 55447/371472 [4:25:09<26:39:11, 3.29it/s] 15%|█▍ | 55448/371472 [4:25:09<25:38:09, 3.42it/s] 15%|█▍ | 55449/371472 [4:25:09<25:09:54, 3.49it/s] 15%|█▍ | 55450/371472 [4:25:09<24:19:00, 3.61it/s] 15%|█▍ | 55451/371472 [4:25:10<24:07:26, 3.64it/s] 15%|█▍ | 55452/371472 [4:25:10<24:24:35, 3.60it/s] 15%|█▍ | 55453/371472 [4:25:10<24:26:45, 3.59it/s] 15%|█▍ | 55454/371472 [4:25:11<25:41:47, 3.42it/s] 15%|█▍ | 55455/371472 [4:25:11<24:26:39, 3.59it/s] 15%|█▍ | 55456/371472 [4:25:11<24:43:51, 3.55it/s] 15%|█▍ | 55457/371472 [4:25:11<24:07:26, 3.64it/s] 15%|█▍ | 55458/371472 [4:25:12<23:55:33, 3.67it/s] 15%|█▍ | 55459/371472 [4:25:12<25:17:00, 3.47it/s] 15%|█▍ | 55460/371472 [4:25:12<24:33:42, 3.57it/s] {'loss': 4.3136, 'learning_rate': 8.660443017518154e-07, 'epoch': 2.39} + 15%|█▍ | 55460/371472 [4:25:12<24:33:42, 3.57it/s] 15%|█▍ | 55461/371472 [4:25:13<24:43:59, 3.55it/s] 15%|█▍ | 55462/371472 [4:25:13<24:22:54, 3.60it/s] 15%|█▍ | 55463/371472 [4:25:13<23:24:27, 3.75it/s] 15%|█▍ | 55464/371472 [4:25:13<22:36:39, 3.88it/s] 15%|█▍ | 55465/371472 [4:25:14<21:55:22, 4.00it/s] 15%|█▍ | 55466/371472 [4:25:14<22:10:29, 3.96it/s] 15%|█▍ | 55467/371472 [4:25:14<22:37:23, 3.88it/s] 15%|█▍ | 55468/371472 [4:25:14<26:03:09, 3.37it/s] 15%|█▍ | 55469/371472 [4:25:15<25:20:31, 3.46it/s] 15%|█▍ | 55470/371472 [4:25:15<25:57:36, 3.38it/s] 15%|█▍ | 55471/371472 [4:25:15<24:36:57, 3.57it/s] 15%|█▍ | 55472/371472 [4:25:16<25:31:29, 3.44it/s] 15%|█▍ | 55473/371472 [4:25:16<28:43:42, 3.06it/s] 15%|█▍ | 55474/371472 [4:25:16<28:17:22, 3.10it/s] 15%|█▍ | 55475/371472 [4:25:17<26:32:27, 3.31it/s] 15%|█▍ | 55476/371472 [4:25:17<24:55:58, 3.52it/s] 15%|█▍ | 55477/371472 [4:25:17<24:54:55, 3.52it/s] 15%|█▍ | 55478/371472 [4:25:17<23:52:25, 3.68it/s] 15%|█▍ | 55479/371472 [4:25:18<24:18:09, 3.61it/s] 15%|█▍ | 55480/371472 [4:25:18<23:49:34, 3.68it/s] {'loss': 4.1068, 'learning_rate': 8.659958197763364e-07, 'epoch': 2.39} + 15%|█▍ | 55480/371472 [4:25:18<23:49:34, 3.68it/s] 15%|█▍ | 55481/371472 [4:25:18<23:29:50, 3.74it/s] 15%|█▍ | 55482/371472 [4:25:18<23:41:16, 3.71it/s] 15%|█▍ | 55483/371472 [4:25:19<23:57:23, 3.66it/s] 15%|█▍ | 55484/371472 [4:25:19<23:14:08, 3.78it/s] 15%|█▍ | 55485/371472 [4:25:19<23:06:41, 3.80it/s] 15%|█▍ | 55486/371472 [4:25:19<22:27:04, 3.91it/s] 15%|█▍ | 55487/371472 [4:25:20<22:18:03, 3.94it/s] 15%|█▍ | 55488/371472 [4:25:20<25:14:59, 3.48it/s] 15%|█▍ | 55489/371472 [4:25:20<26:18:04, 3.34it/s] 15%|█▍ | 55490/371472 [4:25:21<26:25:49, 3.32it/s] 15%|█▍ | 55491/371472 [4:25:21<25:56:10, 3.38it/s] 15%|█▍ | 55492/371472 [4:25:21<27:04:06, 3.24it/s] 15%|█▍ | 55493/371472 [4:25:22<25:31:59, 3.44it/s] 15%|█▍ | 55494/371472 [4:25:22<24:59:54, 3.51it/s] 15%|█▍ | 55495/371472 [4:25:22<24:17:26, 3.61it/s] 15%|█▍ | 55496/371472 [4:25:22<24:06:37, 3.64it/s] 15%|█▍ | 55497/371472 [4:25:23<23:25:33, 3.75it/s] 15%|█▍ | 55498/371472 [4:25:23<23:39:10, 3.71it/s] 15%|█▍ | 55499/371472 [4:25:23<23:49:45, 3.68it/s] 15%|█▍ | 55500/371472 [4:25:23<23:59:09, 3.66it/s] {'loss': 4.2248, 'learning_rate': 8.659473378008575e-07, 'epoch': 2.39} + 15%|█▍ | 55500/371472 [4:25:23<23:59:09, 3.66it/s] 15%|█▍ | 55501/371472 [4:25:24<24:20:13, 3.61it/s] 15%|█▍ | 55502/371472 [4:25:24<23:39:59, 3.71it/s] 15%|█▍ | 55503/371472 [4:25:24<24:02:35, 3.65it/s] 15%|█▍ | 55504/371472 [4:25:25<24:11:08, 3.63it/s] 15%|█▍ | 55505/371472 [4:25:25<24:10:58, 3.63it/s] 15%|█▍ | 55506/371472 [4:25:25<23:59:17, 3.66it/s] 15%|█▍ | 55507/371472 [4:25:25<25:28:21, 3.45it/s] 15%|█▍ | 55508/371472 [4:25:26<25:24:59, 3.45it/s] 15%|█▍ | 55509/371472 [4:25:26<25:55:20, 3.39it/s] 15%|█▍ | 55510/371472 [4:25:26<26:08:56, 3.36it/s] 15%|█▍ | 55511/371472 [4:25:27<25:29:38, 3.44it/s] 15%|█▍ | 55512/371472 [4:25:27<24:40:46, 3.56it/s] 15%|█▍ | 55513/371472 [4:25:27<23:31:22, 3.73it/s] 15%|█▍ | 55514/371472 [4:25:27<24:11:14, 3.63it/s] 15%|█▍ | 55515/371472 [4:25:28<24:32:13, 3.58it/s] 15%|█▍ | 55516/371472 [4:25:28<25:47:55, 3.40it/s] 15%|█▍ | 55517/371472 [4:25:28<25:48:15, 3.40it/s] 15%|█▍ | 55518/371472 [4:25:29<25:05:41, 3.50it/s] 15%|█▍ | 55519/371472 [4:25:29<25:20:51, 3.46it/s] 15%|█▍ | 55520/371472 [4:25:29<24:00:58, 3.65it/s] {'loss': 3.9636, 'learning_rate': 8.658988558253787e-07, 'epoch': 2.39} + 15%|█▍ | 55520/371472 [4:25:29<24:00:58, 3.65it/s] 15%|█▍ | 55521/371472 [4:25:29<23:21:28, 3.76it/s] 15%|█▍ | 55522/371472 [4:25:30<22:45:48, 3.86it/s] 15%|█▍ | 55523/371472 [4:25:30<23:05:52, 3.80it/s] 15%|█▍ | 55524/371472 [4:25:30<24:08:24, 3.64it/s] 15%|█▍ | 55525/371472 [4:25:30<24:16:11, 3.62it/s] 15%|█▍ | 55526/371472 [4:25:31<25:21:42, 3.46it/s] 15%|█▍ | 55527/371472 [4:25:31<23:56:40, 3.67it/s] 15%|█▍ | 55528/371472 [4:25:31<24:23:16, 3.60it/s] 15%|█▍ | 55529/371472 [4:25:32<25:05:56, 3.50it/s] 15%|█▍ | 55530/371472 [4:25:32<24:33:16, 3.57it/s] 15%|█▍ | 55531/371472 [4:25:32<23:53:20, 3.67it/s] 15%|█▍ | 55532/371472 [4:25:32<24:08:25, 3.64it/s] 15%|█▍ | 55533/371472 [4:25:33<23:36:11, 3.72it/s] 15%|█▍ | 55534/371472 [4:25:33<24:00:58, 3.65it/s] 15%|█▍ | 55535/371472 [4:25:33<25:54:38, 3.39it/s] 15%|█▍ | 55536/371472 [4:25:34<25:36:28, 3.43it/s] 15%|█▍ | 55537/371472 [4:25:34<25:40:49, 3.42it/s] 15%|█▍ | 55538/371472 [4:25:34<25:38:17, 3.42it/s] 15%|█▍ | 55539/371472 [4:25:34<26:47:07, 3.28it/s] 15%|█▍ | 55540/371472 [4:25:35<25:26:46, 3.45it/s] {'loss': 4.3415, 'learning_rate': 8.658503738498997e-07, 'epoch': 2.39} + 15%|█▍ | 55540/371472 [4:25:35<25:26:46, 3.45it/s] 15%|█▍ | 55541/371472 [4:25:35<26:41:50, 3.29it/s] 15%|█▍ | 55542/371472 [4:25:35<26:37:27, 3.30it/s] 15%|█▍ | 55543/371472 [4:25:36<25:17:30, 3.47it/s] 15%|█▍ | 55544/371472 [4:25:36<24:43:56, 3.55it/s] 15%|█▍ | 55545/371472 [4:25:36<23:57:27, 3.66it/s] 15%|█▍ | 55546/371472 [4:25:36<25:29:29, 3.44it/s] 15%|█▍ | 55547/371472 [4:25:37<24:19:25, 3.61it/s] 15%|█▍ | 55548/371472 [4:25:37<24:58:35, 3.51it/s] 15%|█▍ | 55549/371472 [4:25:37<23:35:31, 3.72it/s] 15%|█▍ | 55550/371472 [4:25:38<25:07:38, 3.49it/s] 15%|█▍ | 55551/371472 [4:25:38<23:54:32, 3.67it/s] 15%|█▍ | 55552/371472 [4:25:38<24:32:47, 3.58it/s] 15%|█▍ | 55553/371472 [4:25:38<24:22:24, 3.60it/s] 15%|█▍ | 55554/371472 [4:25:39<23:23:20, 3.75it/s] 15%|█▍ | 55555/371472 [4:25:39<24:19:09, 3.61it/s] 15%|█▍ | 55556/371472 [4:25:39<23:58:39, 3.66it/s] 15%|█▍ | 55557/371472 [4:25:39<24:11:40, 3.63it/s] 15%|█▍ | 55558/371472 [4:25:40<24:14:48, 3.62it/s] 15%|█▍ | 55559/371472 [4:25:40<23:26:29, 3.74it/s] 15%|█▍ | 55560/371472 [4:25:40<23:20:20, 3.76it/s] {'loss': 4.2074, 'learning_rate': 8.658018918744207e-07, 'epoch': 2.39} + 15%|█▍ | 55560/371472 [4:25:40<23:20:20, 3.76it/s] 15%|█▍ | 55561/371472 [4:25:41<25:10:27, 3.49it/s] 15%|█▍ | 55562/371472 [4:25:41<25:05:44, 3.50it/s] 15%|█▍ | 55563/371472 [4:25:41<24:39:03, 3.56it/s] 15%|█▍ | 55564/371472 [4:25:41<24:51:47, 3.53it/s] 15%|█▍ | 55565/371472 [4:25:42<26:08:38, 3.36it/s] 15%|█▍ | 55566/371472 [4:25:42<26:04:20, 3.37it/s] 15%|█▍ | 55567/371472 [4:25:42<25:00:19, 3.51it/s] 15%|█▍ | 55568/371472 [4:25:43<24:04:21, 3.65it/s] 15%|█▍ | 55569/371472 [4:25:43<23:53:24, 3.67it/s] 15%|█▍ | 55570/371472 [4:25:43<24:10:01, 3.63it/s] 15%|█▍ | 55571/371472 [4:25:43<23:25:20, 3.75it/s] 15%|█▍ | 55572/371472 [4:25:44<23:19:21, 3.76it/s] 15%|█▍ | 55573/371472 [4:25:44<24:22:57, 3.60it/s] 15%|█▍ | 55574/371472 [4:25:44<23:55:49, 3.67it/s] 15%|█▍ | 55575/371472 [4:25:44<23:30:35, 3.73it/s] 15%|█▍ | 55576/371472 [4:25:45<23:05:02, 3.80it/s] 15%|█▍ | 55577/371472 [4:25:45<23:27:02, 3.74it/s] 15%|█▍ | 55578/371472 [4:25:45<22:59:09, 3.82it/s] 15%|█▍ | 55579/371472 [4:25:46<23:35:34, 3.72it/s] 15%|█▍ | 55580/371472 [4:25:46<23:37:16, 3.71it/s] {'loss': 4.472, 'learning_rate': 8.65753409898942e-07, 'epoch': 2.39} + 15%|█▍ | 55580/371472 [4:25:46<23:37:16, 3.71it/s] 15%|█▍ | 55581/371472 [4:25:46<24:22:00, 3.60it/s] 15%|█▍ | 55582/371472 [4:25:46<24:12:01, 3.63it/s] 15%|█▍ | 55583/371472 [4:25:47<23:48:40, 3.69it/s] 15%|█▍ | 55584/371472 [4:25:47<27:24:00, 3.20it/s] 15%|█▍ | 55585/371472 [4:25:47<25:28:23, 3.44it/s] 15%|█▍ | 55586/371472 [4:25:48<25:13:04, 3.48it/s] 15%|█▍ | 55587/371472 [4:25:48<24:21:50, 3.60it/s] 15%|█▍ | 55588/371472 [4:25:48<24:13:06, 3.62it/s] 15%|█▍ | 55589/371472 [4:25:48<24:58:16, 3.51it/s] 15%|█▍ | 55590/371472 [4:25:49<24:01:12, 3.65it/s] 15%|█▍ | 55591/371472 [4:25:49<25:30:13, 3.44it/s] 15%|█▍ | 55592/371472 [4:25:49<25:28:00, 3.45it/s] 15%|█▍ | 55593/371472 [4:25:50<26:08:45, 3.36it/s] 15%|█▍ | 55594/371472 [4:25:50<25:15:36, 3.47it/s] 15%|█▍ | 55595/371472 [4:25:50<25:00:49, 3.51it/s] 15%|█▍ | 55596/371472 [4:25:50<24:33:09, 3.57it/s] 15%|█▍ | 55597/371472 [4:25:51<29:12:55, 3.00it/s] 15%|█▍ | 55598/371472 [4:25:51<28:12:33, 3.11it/s] 15%|█▍ | 55599/371472 [4:25:51<27:01:21, 3.25it/s] 15%|█▍ | 55600/371472 [4:25:52<26:42:27, 3.29it/s] {'loss': 4.221, 'learning_rate': 8.657049279234632e-07, 'epoch': 2.39} + 15%|█▍ | 55600/371472 [4:25:52<26:42:27, 3.29it/s] 15%|█▍ | 55601/371472 [4:25:52<25:12:54, 3.48it/s] 15%|█▍ | 55602/371472 [4:25:52<24:08:48, 3.63it/s] 15%|█▍ | 55603/371472 [4:25:52<23:41:36, 3.70it/s] 15%|█▍ | 55604/371472 [4:25:53<23:30:18, 3.73it/s] 15%|█▍ | 55605/371472 [4:25:53<23:33:34, 3.72it/s] 15%|█▍ | 55606/371472 [4:25:53<23:36:01, 3.72it/s] 15%|█▍ | 55607/371472 [4:25:54<23:46:14, 3.69it/s] 15%|█▍ | 55608/371472 [4:25:54<24:59:12, 3.51it/s] 15%|█▍ | 55609/371472 [4:25:54<24:33:30, 3.57it/s] 15%|█▍ | 55610/371472 [4:25:54<24:02:43, 3.65it/s] 15%|█▍ | 55611/371472 [4:25:55<24:56:28, 3.52it/s] 15%|█▍ | 55612/371472 [4:25:55<25:02:17, 3.50it/s] 15%|█▍ | 55613/371472 [4:25:55<24:19:58, 3.61it/s] 15%|█▍ | 55614/371472 [4:25:56<25:17:10, 3.47it/s] 15%|█▍ | 55615/371472 [4:25:56<25:00:11, 3.51it/s] 15%|█▍ | 55616/371472 [4:25:56<26:20:55, 3.33it/s] 15%|█▍ | 55617/371472 [4:25:56<25:06:33, 3.49it/s] 15%|█▍ | 55618/371472 [4:25:57<25:21:55, 3.46it/s] 15%|█▍ | 55619/371472 [4:25:57<24:51:29, 3.53it/s] 15%|█▍ | 55620/371472 [4:25:57<25:01:16, 3.51it/s] {'loss': 4.027, 'learning_rate': 8.656564459479841e-07, 'epoch': 2.4} + 15%|█▍ | 55620/371472 [4:25:57<25:01:16, 3.51it/s] 15%|█▍ | 55621/371472 [4:25:58<24:18:34, 3.61it/s] 15%|█▍ | 55622/371472 [4:25:58<24:04:58, 3.64it/s] 15%|█▍ | 55623/371472 [4:25:58<23:23:57, 3.75it/s] 15%|█▍ | 55624/371472 [4:25:58<24:33:30, 3.57it/s] 15%|█▍ | 55625/371472 [4:25:59<24:02:26, 3.65it/s] 15%|█▍ | 55626/371472 [4:25:59<24:07:46, 3.64it/s] 15%|█▍ | 55627/371472 [4:25:59<23:44:23, 3.70it/s] 15%|█▍ | 55628/371472 [4:25:59<23:16:31, 3.77it/s] 15%|█▍ | 55629/371472 [4:26:00<27:02:05, 3.25it/s] 15%|█▍ | 55630/371472 [4:26:00<25:45:47, 3.41it/s] 15%|█▍ | 55631/371472 [4:26:00<26:36:52, 3.30it/s] 15%|█▍ | 55632/371472 [4:26:01<25:31:12, 3.44it/s] 15%|█▍ | 55633/371472 [4:26:01<24:20:02, 3.61it/s] 15%|█▍ | 55634/371472 [4:26:01<25:15:57, 3.47it/s] 15%|█▍ | 55635/371472 [4:26:02<24:48:20, 3.54it/s] 15%|█▍ | 55636/371472 [4:26:02<26:05:15, 3.36it/s] 15%|█▍ | 55637/371472 [4:26:02<24:40:17, 3.56it/s] 15%|█▍ | 55638/371472 [4:26:02<25:07:30, 3.49it/s] 15%|█▍ | 55639/371472 [4:26:03<24:24:52, 3.59it/s] 15%|█▍ | 55640/371472 [4:26:03<24:41:06, 3.55it/s] {'loss': 4.1817, 'learning_rate': 8.656079639725052e-07, 'epoch': 2.4} + 15%|█▍ | 55640/371472 [4:26:03<24:41:06, 3.55it/s] 15%|█▍ | 55641/371472 [4:26:03<24:17:20, 3.61it/s] 15%|█▍ | 55642/371472 [4:26:03<23:57:51, 3.66it/s] 15%|█▍ | 55643/371472 [4:26:04<26:06:09, 3.36it/s] 15%|█▍ | 55644/371472 [4:26:04<25:44:28, 3.41it/s] 15%|█▍ | 55645/371472 [4:26:04<24:51:09, 3.53it/s] 15%|█▍ | 55646/371472 [4:26:05<24:59:31, 3.51it/s] 15%|█▍ | 55647/371472 [4:26:05<24:35:09, 3.57it/s] 15%|█▍ | 55648/371472 [4:26:05<25:48:06, 3.40it/s] 15%|█▍ | 55649/371472 [4:26:06<25:21:27, 3.46it/s] 15%|█▍ | 55650/371472 [4:26:06<25:35:38, 3.43it/s] 15%|█▍ | 55651/371472 [4:26:06<25:32:19, 3.44it/s] 15%|█▍ | 55652/371472 [4:26:06<25:40:31, 3.42it/s] 15%|█▍ | 55653/371472 [4:26:07<26:20:00, 3.33it/s] 15%|█▍ | 55654/371472 [4:26:07<26:12:57, 3.35it/s] 15%|█▍ | 55655/371472 [4:26:07<24:41:53, 3.55it/s] 15%|█▍ | 55656/371472 [4:26:08<23:54:29, 3.67it/s] 15%|█▍ | 55657/371472 [4:26:08<23:58:43, 3.66it/s] 15%|█▍ | 55658/371472 [4:26:08<24:46:20, 3.54it/s] 15%|█▍ | 55659/371472 [4:26:08<24:32:02, 3.58it/s] 15%|█▍ | 55660/371472 [4:26:09<24:14:41, 3.62it/s] {'loss': 3.85, 'learning_rate': 8.655594819970265e-07, 'epoch': 2.4} + 15%|█▍ | 55660/371472 [4:26:09<24:14:41, 3.62it/s] 15%|█▍ | 55661/371472 [4:26:09<25:27:16, 3.45it/s] 15%|█▍ | 55662/371472 [4:26:09<25:59:21, 3.38it/s] 15%|█▍ | 55663/371472 [4:26:10<25:06:44, 3.49it/s] 15%|█▍ | 55664/371472 [4:26:10<29:16:04, 3.00it/s] 15%|█▍ | 55665/371472 [4:26:10<27:45:37, 3.16it/s] 15%|█▍ | 55666/371472 [4:26:11<27:19:39, 3.21it/s] 15%|█▍ | 55667/371472 [4:26:11<27:24:13, 3.20it/s] 15%|█▍ | 55668/371472 [4:26:11<26:31:25, 3.31it/s] 15%|█▍ | 55669/371472 [4:26:11<26:07:25, 3.36it/s] 15%|█▍ | 55670/371472 [4:26:12<25:29:55, 3.44it/s] 15%|█▍ | 55671/371472 [4:26:12<24:43:08, 3.55it/s] 15%|█▍ | 55672/371472 [4:26:12<24:13:12, 3.62it/s] 15%|█▍ | 55673/371472 [4:26:12<23:42:43, 3.70it/s] 15%|█▍ | 55674/371472 [4:26:13<24:34:51, 3.57it/s] 15%|█▍ | 55675/371472 [4:26:13<28:20:13, 3.10it/s] 15%|█▍ | 55676/371472 [4:26:13<26:21:02, 3.33it/s] 15%|█▍ | 55677/371472 [4:26:14<25:05:26, 3.50it/s] 15%|█▍ | 55678/371472 [4:26:14<26:05:42, 3.36it/s] 15%|█▍ | 55679/371472 [4:26:14<24:52:35, 3.53it/s] 15%|█▍ | 55680/371472 [4:26:15<25:00:46, 3.51it/s] {'loss': 4.1537, 'learning_rate': 8.655110000215476e-07, 'epoch': 2.4} + 15%|█▍ | 55680/371472 [4:26:15<25:00:46, 3.51it/s] 15%|█▍ | 55681/371472 [4:26:15<24:18:06, 3.61it/s] 15%|█▍ | 55682/371472 [4:26:15<24:27:34, 3.59it/s] 15%|█▍ | 55683/371472 [4:26:15<24:50:23, 3.53it/s] 15%|█▍ | 55684/371472 [4:26:16<25:02:08, 3.50it/s] 15%|█▍ | 55685/371472 [4:26:16<24:29:05, 3.58it/s] 15%|█▍ | 55686/371472 [4:26:16<24:24:20, 3.59it/s] 15%|█▍ | 55687/371472 [4:26:17<24:26:09, 3.59it/s] 15%|█▍ | 55688/371472 [4:26:17<23:32:14, 3.73it/s] 15%|█▍ | 55689/371472 [4:26:17<26:05:24, 3.36it/s] 15%|█▍ | 55690/371472 [4:26:17<27:01:55, 3.24it/s] 15%|█▍ | 55691/371472 [4:26:18<25:28:18, 3.44it/s] 15%|█▍ | 55692/371472 [4:26:18<24:58:59, 3.51it/s] 15%|█▍ | 55693/371472 [4:26:18<24:57:47, 3.51it/s] 15%|█▍ | 55694/371472 [4:26:19<25:04:35, 3.50it/s] 15%|█▍ | 55695/371472 [4:26:19<24:04:56, 3.64it/s] 15%|█▍ | 55696/371472 [4:26:19<25:32:00, 3.44it/s] 15%|█▍ | 55697/371472 [4:26:19<26:12:17, 3.35it/s] 15%|█▍ | 55698/371472 [4:26:20<24:47:28, 3.54it/s] 15%|█▍ | 55699/371472 [4:26:20<24:54:35, 3.52it/s] 15%|█▍ | 55700/371472 [4:26:20<24:38:51, 3.56it/s] {'loss': 4.1673, 'learning_rate': 8.654625180460686e-07, 'epoch': 2.4} + 15%|█▍ | 55700/371472 [4:26:20<24:38:51, 3.56it/s] 15%|█▍ | 55701/371472 [4:26:21<24:13:15, 3.62it/s] 15%|█▍ | 55702/371472 [4:26:21<25:19:22, 3.46it/s] 15%|█▍ | 55703/371472 [4:26:21<24:19:50, 3.61it/s] 15%|█▍ | 55704/371472 [4:26:21<23:46:05, 3.69it/s] 15%|█▍ | 55705/371472 [4:26:22<26:22:58, 3.32it/s] 15%|█▍ | 55706/371472 [4:26:22<25:16:23, 3.47it/s] 15%|█▍ | 55707/371472 [4:26:22<23:58:30, 3.66it/s] 15%|█▍ | 55708/371472 [4:26:23<24:28:43, 3.58it/s] 15%|█▍ | 55709/371472 [4:26:23<25:33:03, 3.43it/s] 15%|█▍ | 55710/371472 [4:26:23<24:40:31, 3.55it/s] 15%|█▍ | 55711/371472 [4:26:23<25:14:08, 3.48it/s] 15%|█▍ | 55712/371472 [4:26:24<25:55:09, 3.38it/s] 15%|█▍ | 55713/371472 [4:26:24<24:55:32, 3.52it/s] 15%|█▍ | 55714/371472 [4:26:24<23:38:12, 3.71it/s] 15%|█▍ | 55715/371472 [4:26:24<24:04:15, 3.64it/s] 15%|█▍ | 55716/371472 [4:26:25<24:34:09, 3.57it/s] 15%|█▍ | 55717/371472 [4:26:25<23:55:44, 3.67it/s] 15%|█▍ | 55718/371472 [4:26:25<23:58:00, 3.66it/s] 15%|█▍ | 55719/371472 [4:26:26<23:46:30, 3.69it/s] 15%|█▍ | 55720/371472 [4:26:26<26:59:28, 3.25it/s] {'loss': 4.3035, 'learning_rate': 8.654140360705897e-07, 'epoch': 2.4} + 15%|█▍ | 55720/371472 [4:26:26<26:59:28, 3.25it/s] 15%|█▌ | 55721/371472 [4:26:26<27:08:39, 3.23it/s] 15%|█▌ | 55722/371472 [4:26:27<27:58:06, 3.14it/s] 15%|█▌ | 55723/371472 [4:26:27<26:18:14, 3.33it/s] 15%|█▌ | 55724/371472 [4:26:27<25:56:27, 3.38it/s] 15%|█▌ | 55725/371472 [4:26:27<25:19:25, 3.46it/s] 15%|█▌ | 55726/371472 [4:26:28<26:22:29, 3.33it/s] 15%|█▌ | 55727/371472 [4:26:28<25:35:32, 3.43it/s] 15%|█▌ | 55728/371472 [4:26:28<26:22:09, 3.33it/s] 15%|█▌ | 55729/371472 [4:26:29<26:11:43, 3.35it/s] 15%|█▌ | 55730/371472 [4:26:29<25:22:20, 3.46it/s] 15%|█▌ | 55731/371472 [4:26:29<25:36:29, 3.42it/s] 15%|█▌ | 55732/371472 [4:26:29<24:47:02, 3.54it/s] 15%|█▌ | 55733/371472 [4:26:30<27:12:42, 3.22it/s] 15%|█▌ | 55734/371472 [4:26:30<25:35:08, 3.43it/s] 15%|█▌ | 55735/371472 [4:26:30<25:12:52, 3.48it/s] 15%|█▌ | 55736/371472 [4:26:31<25:53:32, 3.39it/s] 15%|█▌ | 55737/371472 [4:26:31<24:11:55, 3.62it/s] 15%|█▌ | 55738/371472 [4:26:31<24:16:27, 3.61it/s] 15%|█▌ | 55739/371472 [4:26:31<24:38:51, 3.56it/s] 15%|█▌ | 55740/371472 [4:26:32<24:44:20, 3.55it/s] {'loss': 4.3662, 'learning_rate': 8.653655540951108e-07, 'epoch': 2.4} + 15%|█▌ | 55740/371472 [4:26:32<24:44:20, 3.55it/s] 15%|█▌ | 55741/371472 [4:26:32<27:29:34, 3.19it/s] 15%|█▌ | 55742/371472 [4:26:32<26:33:07, 3.30it/s] 15%|█▌ | 55743/371472 [4:26:33<24:54:13, 3.52it/s] 15%|█▌ | 55744/371472 [4:26:33<24:10:15, 3.63it/s] 15%|█▌ | 55745/371472 [4:26:33<25:31:37, 3.44it/s] 15%|█▌ | 55746/371472 [4:26:34<24:59:18, 3.51it/s] 15%|█▌ | 55747/371472 [4:26:34<24:15:43, 3.61it/s] 15%|█▌ | 55748/371472 [4:26:34<23:58:38, 3.66it/s] 15%|█▌ | 55749/371472 [4:26:34<25:52:30, 3.39it/s] 15%|█▌ | 55750/371472 [4:26:35<25:12:24, 3.48it/s] 15%|█▌ | 55751/371472 [4:26:35<24:25:43, 3.59it/s] 15%|█▌ | 55752/371472 [4:26:35<23:53:24, 3.67it/s] 15%|█▌ | 55753/371472 [4:26:35<23:49:39, 3.68it/s] 15%|█▌ | 55754/371472 [4:26:36<23:14:56, 3.77it/s] 15%|█▌ | 55755/371472 [4:26:36<25:22:09, 3.46it/s] 15%|█▌ | 55756/371472 [4:26:36<24:17:38, 3.61it/s] 15%|█▌ | 55757/371472 [4:26:37<23:43:58, 3.70it/s] 15%|█▌ | 55758/371472 [4:26:37<23:15:03, 3.77it/s] 15%|█▌ | 55759/371472 [4:26:37<23:56:54, 3.66it/s] 15%|█▌ | 55760/371472 [4:26:37<23:23:45, 3.75it/s] {'loss': 4.3216, 'learning_rate': 8.65317072119632e-07, 'epoch': 2.4} + 15%|█▌ | 55760/371472 [4:26:37<23:23:45, 3.75it/s] 15%|█▌ | 55761/371472 [4:26:38<23:08:43, 3.79it/s] 15%|█▌ | 55762/371472 [4:26:38<23:39:26, 3.71it/s] 15%|█▌ | 55763/371472 [4:26:38<23:26:46, 3.74it/s] 15%|█▌ | 55764/371472 [4:26:38<24:28:38, 3.58it/s] 15%|█▌ | 55765/371472 [4:26:39<24:49:09, 3.53it/s] 15%|█▌ | 55766/371472 [4:26:39<24:19:42, 3.60it/s] 15%|█▌ | 55767/371472 [4:26:39<24:02:13, 3.65it/s] 15%|█▌ | 55768/371472 [4:26:40<23:00:14, 3.81it/s] 15%|█▌ | 55769/371472 [4:26:40<23:48:20, 3.68it/s] 15%|█▌ | 55770/371472 [4:26:40<23:44:22, 3.69it/s] 15%|█▌ | 55771/371472 [4:26:40<23:12:06, 3.78it/s] 15%|█▌ | 55772/371472 [4:26:41<22:48:29, 3.84it/s] 15%|█▌ | 55773/371472 [4:26:41<23:24:18, 3.75it/s] 15%|█▌ | 55774/371472 [4:26:41<23:58:54, 3.66it/s] 15%|█▌ | 55775/371472 [4:26:41<24:01:58, 3.65it/s] 15%|█▌ | 55776/371472 [4:26:42<23:18:17, 3.76it/s] 15%|█▌ | 55777/371472 [4:26:42<23:27:25, 3.74it/s] 15%|█▌ | 55778/371472 [4:26:42<25:47:37, 3.40it/s] 15%|█▌ | 55779/371472 [4:26:43<24:35:47, 3.57it/s] 15%|█▌ | 55780/371472 [4:26:43<24:18:58, 3.61it/s] {'loss': 4.1805, 'learning_rate': 8.65268590144153e-07, 'epoch': 2.4} + 15%|█▌ | 55780/371472 [4:26:43<24:18:58, 3.61it/s] 15%|█▌ | 55781/371472 [4:26:43<25:14:44, 3.47it/s] 15%|█▌ | 55782/371472 [4:26:43<24:20:19, 3.60it/s] 15%|█▌ | 55783/371472 [4:26:44<23:33:55, 3.72it/s] 15%|█▌ | 55784/371472 [4:26:44<23:35:51, 3.72it/s] 15%|█▌ | 55785/371472 [4:26:44<25:12:20, 3.48it/s] 15%|█▌ | 55786/371472 [4:26:45<26:18:23, 3.33it/s] 15%|█▌ | 55787/371472 [4:26:45<26:51:53, 3.26it/s] 15%|█▌ | 55788/371472 [4:26:45<26:09:49, 3.35it/s] 15%|█▌ | 55789/371472 [4:26:45<25:03:54, 3.50it/s] 15%|█▌ | 55790/371472 [4:26:46<25:48:08, 3.40it/s] 15%|█▌ | 55791/371472 [4:26:46<25:36:54, 3.42it/s] 15%|█▌ | 55792/371472 [4:26:46<24:31:25, 3.58it/s] 15%|█▌ | 55793/371472 [4:26:47<23:59:26, 3.66it/s] 15%|█▌ | 55794/371472 [4:26:47<25:02:25, 3.50it/s] 15%|█▌ | 55795/371472 [4:26:47<24:05:09, 3.64it/s] 15%|█▌ | 55796/371472 [4:26:47<24:21:30, 3.60it/s] 15%|█▌ | 55797/371472 [4:26:48<24:10:54, 3.63it/s] 15%|█▌ | 55798/371472 [4:26:48<24:31:33, 3.58it/s] 15%|█▌ | 55799/371472 [4:26:48<24:21:38, 3.60it/s] 15%|█▌ | 55800/371472 [4:26:48<24:00:42, 3.65it/s] {'loss': 4.1321, 'learning_rate': 8.652201081686741e-07, 'epoch': 2.4} + 15%|█▌ | 55800/371472 [4:26:48<24:00:42, 3.65it/s] 15%|█▌ | 55801/371472 [4:26:49<23:51:52, 3.67it/s] 15%|█▌ | 55802/371472 [4:26:49<23:50:52, 3.68it/s] 15%|█▌ | 55803/371472 [4:26:49<24:11:08, 3.63it/s] 15%|█▌ | 55804/371472 [4:26:50<23:37:54, 3.71it/s] 15%|█▌ | 55805/371472 [4:26:50<23:31:08, 3.73it/s] 15%|█▌ | 55806/371472 [4:26:50<23:43:32, 3.70it/s] 15%|█▌ | 55807/371472 [4:26:50<23:29:34, 3.73it/s] 15%|█▌ | 55808/371472 [4:26:51<23:36:06, 3.72it/s] 15%|█▌ | 55809/371472 [4:26:51<23:56:54, 3.66it/s] 15%|█▌ | 55810/371472 [4:26:51<23:05:22, 3.80it/s] 15%|█▌ | 55811/371472 [4:26:51<24:08:27, 3.63it/s] 15%|█▌ | 55812/371472 [4:26:52<25:05:57, 3.49it/s] 15%|█▌ | 55813/371472 [4:26:52<24:40:16, 3.55it/s] 15%|█▌ | 55814/371472 [4:26:52<24:06:59, 3.64it/s] 15%|█▌ | 55815/371472 [4:26:53<23:38:51, 3.71it/s] 15%|█▌ | 55816/371472 [4:26:53<23:12:14, 3.78it/s] 15%|█▌ | 55817/371472 [4:26:53<23:09:59, 3.78it/s] 15%|█▌ | 55818/371472 [4:26:53<23:44:23, 3.69it/s] 15%|█▌ | 55819/371472 [4:26:54<23:15:28, 3.77it/s] 15%|█▌ | 55820/371472 [4:26:54<24:16:39, 3.61it/s] {'loss': 4.3409, 'learning_rate': 8.651716261931953e-07, 'epoch': 2.4} + 15%|█▌ | 55820/371472 [4:26:54<24:16:39, 3.61it/s] 15%|█▌ | 55821/371472 [4:26:54<24:27:16, 3.59it/s] 15%|█▌ | 55822/371472 [4:26:54<24:02:26, 3.65it/s] 15%|█▌ | 55823/371472 [4:26:55<23:48:15, 3.68it/s] 15%|█▌ | 55824/371472 [4:26:55<24:07:41, 3.63it/s] 15%|█▌ | 55825/371472 [4:26:55<26:27:15, 3.31it/s] 15%|█▌ | 55826/371472 [4:26:56<25:54:45, 3.38it/s] 15%|█▌ | 55827/371472 [4:26:56<26:41:29, 3.28it/s] 15%|█▌ | 55828/371472 [4:26:56<28:37:59, 3.06it/s] 15%|█▌ | 55829/371472 [4:26:57<26:38:25, 3.29it/s] 15%|█▌ | 55830/371472 [4:26:57<26:10:35, 3.35it/s] 15%|█▌ | 55831/371472 [4:26:57<26:24:28, 3.32it/s] 15%|█▌ | 55832/371472 [4:26:57<25:37:33, 3.42it/s] 15%|█▌ | 55833/371472 [4:26:58<25:45:51, 3.40it/s] 15%|█▌ | 55834/371472 [4:26:58<25:51:49, 3.39it/s] 15%|█▌ | 55835/371472 [4:26:58<25:30:20, 3.44it/s] 15%|█▌ | 55836/371472 [4:26:59<24:08:54, 3.63it/s] 15%|█▌ | 55837/371472 [4:26:59<24:14:04, 3.62it/s] 15%|█▌ | 55838/371472 [4:26:59<23:50:33, 3.68it/s] 15%|█▌ | 55839/371472 [4:26:59<23:04:15, 3.80it/s] 15%|█▌ | 55840/371472 [4:27:00<24:50:20, 3.53it/s] {'loss': 4.2178, 'learning_rate': 8.651231442177163e-07, 'epoch': 2.41} + 15%|█▌ | 55840/371472 [4:27:00<24:50:20, 3.53it/s] 15%|█▌ | 55841/371472 [4:27:00<25:10:42, 3.48it/s] 15%|█▌ | 55842/371472 [4:27:00<25:28:27, 3.44it/s] 15%|█▌ | 55843/371472 [4:27:01<24:40:55, 3.55it/s] 15%|█▌ | 55844/371472 [4:27:01<25:16:53, 3.47it/s] 15%|█▌ | 55845/371472 [4:27:01<24:52:41, 3.52it/s] 15%|█▌ | 55846/371472 [4:27:01<24:12:20, 3.62it/s] 15%|█▌ | 55847/371472 [4:27:02<24:15:01, 3.62it/s] 15%|█▌ | 55848/371472 [4:27:02<24:00:32, 3.65it/s] 15%|█▌ | 55849/371472 [4:27:02<23:36:58, 3.71it/s] 15%|█▌ | 55850/371472 [4:27:02<22:55:31, 3.82it/s] 15%|█▌ | 55851/371472 [4:27:03<23:12:34, 3.78it/s] 15%|█▌ | 55852/371472 [4:27:03<22:51:15, 3.84it/s] 15%|█▌ | 55853/371472 [4:27:03<23:28:54, 3.73it/s] 15%|█▌ | 55854/371472 [4:27:04<24:06:21, 3.64it/s] 15%|█▌ | 55855/371472 [4:27:04<23:35:51, 3.72it/s] 15%|█▌ | 55856/371472 [4:27:04<24:18:10, 3.61it/s] 15%|█▌ | 55857/371472 [4:27:04<23:54:19, 3.67it/s] 15%|█▌ | 55858/371472 [4:27:05<23:54:57, 3.67it/s] 15%|█▌ | 55859/371472 [4:27:05<23:20:12, 3.76it/s] 15%|█▌ | 55860/371472 [4:27:05<24:37:37, 3.56it/s] {'loss': 4.1659, 'learning_rate': 8.650746622422375e-07, 'epoch': 2.41} + 15%|█▌ | 55860/371472 [4:27:05<24:37:37, 3.56it/s] 15%|█▌ | 55861/371472 [4:27:06<25:49:27, 3.39it/s] 15%|█▌ | 55862/371472 [4:27:06<25:25:15, 3.45it/s] 15%|█▌ | 55863/371472 [4:27:06<24:26:32, 3.59it/s] 15%|█▌ | 55864/371472 [4:27:06<24:50:33, 3.53it/s] 15%|█▌ | 55865/371472 [4:27:07<25:36:35, 3.42it/s] 15%|█▌ | 55866/371472 [4:27:07<25:29:37, 3.44it/s] 15%|█▌ | 55867/371472 [4:27:07<27:22:11, 3.20it/s] 15%|█▌ | 55868/371472 [4:27:08<25:38:59, 3.42it/s] 15%|█▌ | 55869/371472 [4:27:08<26:08:01, 3.35it/s] 15%|█▌ | 55870/371472 [4:27:08<26:12:21, 3.35it/s] 15%|█▌ | 55871/371472 [4:27:08<25:10:00, 3.48it/s] 15%|█▌ | 55872/371472 [4:27:09<24:59:46, 3.51it/s] 15%|█▌ | 55873/371472 [4:27:09<24:57:02, 3.51it/s] 15%|█▌ | 55874/371472 [4:27:09<24:59:58, 3.51it/s] 15%|█▌ | 55875/371472 [4:27:10<24:40:48, 3.55it/s] 15%|█▌ | 55876/371472 [4:27:10<23:33:44, 3.72it/s] 15%|█▌ | 55877/371472 [4:27:10<23:49:30, 3.68it/s] 15%|█▌ | 55878/371472 [4:27:10<23:32:53, 3.72it/s] 15%|█▌ | 55879/371472 [4:27:11<23:31:22, 3.73it/s] 15%|█▌ | 55880/371472 [4:27:11<23:38:13, 3.71it/s] {'loss': 3.9914, 'learning_rate': 8.650261802667585e-07, 'epoch': 2.41} + 15%|█▌ | 55880/371472 [4:27:11<23:38:13, 3.71it/s] 15%|█▌ | 55881/371472 [4:27:11<26:55:43, 3.26it/s] 15%|█▌ | 55882/371472 [4:27:12<25:57:22, 3.38it/s] 15%|█▌ | 55883/371472 [4:27:12<26:43:04, 3.28it/s] 15%|█▌ | 55884/371472 [4:27:12<27:13:30, 3.22it/s] 15%|█▌ | 55885/371472 [4:27:12<26:22:56, 3.32it/s] 15%|█▌ | 55886/371472 [4:27:13<26:37:18, 3.29it/s] 15%|█▌ | 55887/371472 [4:27:13<27:40:19, 3.17it/s] 15%|█▌ | 55888/371472 [4:27:13<25:44:46, 3.40it/s] 15%|█▌ | 55889/371472 [4:27:14<27:17:43, 3.21it/s] 15%|█▌ | 55890/371472 [4:27:14<27:12:26, 3.22it/s] 15%|█▌ | 55891/371472 [4:27:14<26:33:46, 3.30it/s] 15%|█▌ | 55892/371472 [4:27:15<25:20:39, 3.46it/s] 15%|█▌ | 55893/371472 [4:27:15<23:59:24, 3.65it/s] 15%|█▌ | 55894/371472 [4:27:15<23:31:42, 3.73it/s] 15%|█▌ | 55895/371472 [4:27:15<25:05:06, 3.49it/s] 15%|█▌ | 55896/371472 [4:27:16<25:15:17, 3.47it/s] 15%|█▌ | 55897/371472 [4:27:16<25:49:43, 3.39it/s] 15%|█▌ | 55898/371472 [4:27:16<24:53:47, 3.52it/s] 15%|█▌ | 55899/371472 [4:27:17<25:03:22, 3.50it/s] 15%|█▌ | 55900/371472 [4:27:17<23:55:18, 3.66it/s] {'loss': 4.1824, 'learning_rate': 8.649776982912797e-07, 'epoch': 2.41} + 15%|█▌ | 55900/371472 [4:27:17<23:55:18, 3.66it/s] 15%|█▌ | 55901/371472 [4:27:17<24:45:04, 3.54it/s] 15%|█▌ | 55902/371472 [4:27:17<25:20:53, 3.46it/s] 15%|█▌ | 55903/371472 [4:27:18<27:16:47, 3.21it/s] 15%|█▌ | 55904/371472 [4:27:18<25:56:40, 3.38it/s] 15%|█▌ | 55905/371472 [4:27:18<25:12:16, 3.48it/s] 15%|█▌ | 55906/371472 [4:27:19<24:52:39, 3.52it/s] 15%|█▌ | 55907/371472 [4:27:19<25:25:49, 3.45it/s] 15%|█▌ | 55908/371472 [4:27:19<24:41:32, 3.55it/s] 15%|█▌ | 55909/371472 [4:27:19<23:58:46, 3.66it/s] 15%|█▌ | 55910/371472 [4:27:20<23:36:09, 3.71it/s] 15%|█▌ | 55911/371472 [4:27:20<23:52:43, 3.67it/s] 15%|█▌ | 55912/371472 [4:27:20<23:21:18, 3.75it/s] 15%|█▌ | 55913/371472 [4:27:20<22:35:55, 3.88it/s] 15%|█▌ | 55914/371472 [4:27:21<24:58:59, 3.51it/s] 15%|█▌ | 55915/371472 [4:27:21<23:46:58, 3.69it/s] 15%|█▌ | 55916/371472 [4:27:21<23:29:47, 3.73it/s] 15%|█▌ | 55917/371472 [4:27:21<22:41:04, 3.86it/s] 15%|█▌ | 55918/371472 [4:27:22<23:00:53, 3.81it/s] 15%|█▌ | 55919/371472 [4:27:22<22:58:07, 3.82it/s] 15%|█▌ | 55920/371472 [4:27:22<23:19:18, 3.76it/s] {'loss': 4.2067, 'learning_rate': 8.649292163158007e-07, 'epoch': 2.41} + 15%|█▌ | 55920/371472 [4:27:22<23:19:18, 3.76it/s] 15%|█▌ | 55921/371472 [4:27:23<23:17:08, 3.76it/s] 15%|█▌ | 55922/371472 [4:27:23<23:19:29, 3.76it/s] 15%|█▌ | 55923/371472 [4:27:23<24:30:58, 3.58it/s] 15%|█▌ | 55924/371472 [4:27:23<23:48:06, 3.68it/s] 15%|█▌ | 55925/371472 [4:27:24<23:17:59, 3.76it/s] 15%|█▌ | 55926/371472 [4:27:24<23:55:04, 3.66it/s] 15%|█▌ | 55927/371472 [4:27:24<23:53:55, 3.67it/s] 15%|█▌ | 55928/371472 [4:27:24<23:41:21, 3.70it/s] 15%|█▌ | 55929/371472 [4:27:25<24:13:47, 3.62it/s] 15%|█▌ | 55930/371472 [4:27:25<25:59:41, 3.37it/s] 15%|█▌ | 55931/371472 [4:27:25<24:56:00, 3.52it/s] 15%|█▌ | 55932/371472 [4:27:26<24:26:12, 3.59it/s] 15%|█▌ | 55933/371472 [4:27:26<24:18:45, 3.61it/s] 15%|█▌ | 55934/371472 [4:27:26<23:54:00, 3.67it/s] 15%|█▌ | 55935/371472 [4:27:26<23:27:03, 3.74it/s] 15%|█▌ | 55936/371472 [4:27:27<23:09:48, 3.78it/s] 15%|█▌ | 55937/371472 [4:27:27<23:48:03, 3.68it/s] 15%|█▌ | 55938/371472 [4:27:27<23:22:38, 3.75it/s] 15%|█▌ | 55939/371472 [4:27:27<23:09:15, 3.79it/s] 15%|█▌ | 55940/371472 [4:27:28<23:18:59, 3.76it/s] {'loss': 4.3716, 'learning_rate': 8.648807343403219e-07, 'epoch': 2.41} + 15%|█▌ | 55940/371472 [4:27:28<23:18:59, 3.76it/s] 15%|█▌ | 55941/371472 [4:27:28<23:59:59, 3.65it/s] 15%|█▌ | 55942/371472 [4:27:28<24:14:23, 3.62it/s] 15%|█▌ | 55943/371472 [4:27:29<27:54:33, 3.14it/s] 15%|█▌ | 55944/371472 [4:27:29<29:56:38, 2.93it/s] 15%|█▌ | 55945/371472 [4:27:29<28:46:58, 3.05it/s] 15%|█▌ | 55946/371472 [4:27:30<28:04:24, 3.12it/s] 15%|█▌ | 55947/371472 [4:27:30<26:41:09, 3.28it/s] 15%|█▌ | 55948/371472 [4:27:30<27:34:52, 3.18it/s] 15%|█▌ | 55949/371472 [4:27:31<26:08:23, 3.35it/s] 15%|█▌ | 55950/371472 [4:27:31<25:46:14, 3.40it/s] 15%|█▌ | 55951/371472 [4:27:31<25:02:32, 3.50it/s] 15%|█▌ | 55952/371472 [4:27:31<24:33:56, 3.57it/s] 15%|█▌ | 55953/371472 [4:27:32<24:03:42, 3.64it/s] 15%|█▌ | 55954/371472 [4:27:32<23:19:48, 3.76it/s] 15%|█▌ | 55955/371472 [4:27:32<23:40:46, 3.70it/s] 15%|█▌ | 55956/371472 [4:27:32<23:13:37, 3.77it/s] 15%|█▌ | 55957/371472 [4:27:33<22:42:02, 3.86it/s] 15%|█▌ | 55958/371472 [4:27:33<22:57:18, 3.82it/s] 15%|█▌ | 55959/371472 [4:27:33<23:27:06, 3.74it/s] 15%|█▌ | 55960/371472 [4:27:34<24:39:23, 3.55it/s] {'loss': 4.183, 'learning_rate': 8.64832252364843e-07, 'epoch': 2.41} + 15%|█▌ | 55960/371472 [4:27:34<24:39:23, 3.55it/s] 15%|█▌ | 55961/371472 [4:27:34<24:55:26, 3.52it/s] 15%|█▌ | 55962/371472 [4:27:34<24:12:22, 3.62it/s] 15%|█▌ | 55963/371472 [4:27:34<25:15:09, 3.47it/s] 15%|█▌ | 55964/371472 [4:27:35<23:57:15, 3.66it/s] 15%|█▌ | 55965/371472 [4:27:35<24:12:06, 3.62it/s] 15%|█▌ | 55966/371472 [4:27:35<23:47:42, 3.68it/s] 15%|█▌ | 55967/371472 [4:27:35<23:45:02, 3.69it/s] 15%|█��� | 55968/371472 [4:27:36<24:14:04, 3.62it/s] 15%|█▌ | 55969/371472 [4:27:36<23:41:40, 3.70it/s] 15%|█▌ | 55970/371472 [4:27:36<23:28:27, 3.73it/s] 15%|█▌ | 55971/371472 [4:27:37<23:52:21, 3.67it/s] 15%|█▌ | 55972/371472 [4:27:37<23:58:39, 3.66it/s] 15%|█▌ | 55973/371472 [4:27:37<23:34:05, 3.72it/s] 15%|█▌ | 55974/371472 [4:27:37<24:32:41, 3.57it/s] 15%|█▌ | 55975/371472 [4:27:38<26:40:19, 3.29it/s] 15%|█▌ | 55976/371472 [4:27:38<25:50:32, 3.39it/s] 15%|█▌ | 55977/371472 [4:27:38<25:00:57, 3.50it/s] 15%|█▌ | 55978/371472 [4:27:39<24:06:38, 3.63it/s] 15%|█▌ | 55979/371472 [4:27:39<24:19:23, 3.60it/s] 15%|█▌ | 55980/371472 [4:27:39<29:57:49, 2.92it/s] {'loss': 4.3275, 'learning_rate': 8.647837703893642e-07, 'epoch': 2.41} + 15%|█▌ | 55980/371472 [4:27:39<29:57:49, 2.92it/s] 15%|█▌ | 55981/371472 [4:27:40<28:21:38, 3.09it/s] 15%|█▌ | 55982/371472 [4:27:40<28:25:24, 3.08it/s] 15%|█▌ | 55983/371472 [4:27:40<28:02:37, 3.12it/s] 15%|█▌ | 55984/371472 [4:27:41<26:57:33, 3.25it/s] 15%|█▌ | 55985/371472 [4:27:41<29:18:45, 2.99it/s] 15%|█▌ | 55986/371472 [4:27:41<28:08:59, 3.11it/s] 15%|█▌ | 55987/371472 [4:27:41<25:43:20, 3.41it/s] 15%|█▌ | 55988/371472 [4:27:42<26:08:07, 3.35it/s] 15%|█▌ | 55989/371472 [4:27:42<25:10:44, 3.48it/s] 15%|█▌ | 55990/371472 [4:27:42<24:04:08, 3.64it/s] 15%|█▌ | 55991/371472 [4:27:43<24:52:41, 3.52it/s] 15%|█▌ | 55992/371472 [4:27:43<24:53:54, 3.52it/s] 15%|█▌ | 55993/371472 [4:27:43<27:39:36, 3.17it/s] 15%|█▌ | 55994/371472 [4:27:44<27:20:45, 3.20it/s] 15%|█▌ | 55995/371472 [4:27:44<27:44:30, 3.16it/s] 15%|█▌ | 55996/371472 [4:27:44<26:02:55, 3.36it/s] 15%|█▌ | 55997/371472 [4:27:45<29:34:57, 2.96it/s] 15%|█▌ | 55998/371472 [4:27:45<27:20:05, 3.21it/s] 15%|█▌ | 55999/371472 [4:27:45<25:33:35, 3.43it/s] 15%|█▌ | 56000/371472 [4:27:45<24:40:42, 3.55it/s] {'loss': 4.0832, 'learning_rate': 8.647352884138851e-07, 'epoch': 2.41} + 15%|█▌ | 56000/371472 [4:27:45<24:40:42, 3.55it/s] 15%|█▌ | 56001/371472 [4:27:46<24:32:07, 3.57it/s] 15%|█▌ | 56002/371472 [4:27:46<23:44:36, 3.69it/s] 15%|█▌ | 56003/371472 [4:27:46<23:16:42, 3.76it/s] 15%|█▌ | 56004/371472 [4:27:46<22:18:25, 3.93it/s] 15%|█▌ | 56005/371472 [4:27:47<22:45:10, 3.85it/s] 15%|█▌ | 56006/371472 [4:27:47<24:20:18, 3.60it/s] 15%|█▌ | 56007/371472 [4:27:47<24:04:47, 3.64it/s] 15%|█▌ | 56008/371472 [4:27:47<24:35:56, 3.56it/s] 15%|█▌ | 56009/371472 [4:27:48<26:02:56, 3.36it/s] 15%|█▌ | 56010/371472 [4:27:48<25:54:29, 3.38it/s] 15%|█▌ | 56011/371472 [4:27:48<25:53:09, 3.39it/s] 15%|█▌ | 56012/371472 [4:27:49<24:32:11, 3.57it/s] 15%|█▌ | 56013/371472 [4:27:49<27:15:29, 3.21it/s] 15%|█▌ | 56014/371472 [4:27:49<26:15:03, 3.34it/s] 15%|█▌ | 56015/371472 [4:27:50<24:54:21, 3.52it/s] 15%|█▌ | 56016/371472 [4:27:50<24:40:41, 3.55it/s] 15%|█▌ | 56017/371472 [4:27:50<23:57:34, 3.66it/s] 15%|█▌ | 56018/371472 [4:27:50<23:44:43, 3.69it/s] 15%|█▌ | 56019/371472 [4:27:51<23:42:35, 3.70it/s] 15%|█▌ | 56020/371472 [4:27:51<23:07:47, 3.79it/s] {'loss': 4.0189, 'learning_rate': 8.646868064384063e-07, 'epoch': 2.41} + 15%|█▌ | 56020/371472 [4:27:51<23:07:47, 3.79it/s] 15%|█▌ | 56021/371472 [4:27:51<22:50:03, 3.84it/s] 15%|█▌ | 56022/371472 [4:27:51<23:53:53, 3.67it/s] 15%|█▌ | 56023/371472 [4:27:52<23:59:24, 3.65it/s] 15%|█▌ | 56024/371472 [4:27:52<23:31:20, 3.73it/s] 15%|█▌ | 56025/371472 [4:27:52<25:15:58, 3.47it/s] 15%|█▌ | 56026/371472 [4:27:53<24:11:31, 3.62it/s] 15%|█▌ | 56027/371472 [4:27:53<23:31:38, 3.72it/s] 15%|█▌ | 56028/371472 [4:27:53<24:48:32, 3.53it/s] 15%|█▌ | 56029/371472 [4:27:53<24:12:13, 3.62it/s] 15%|█▌ | 56030/371472 [4:27:54<24:07:02, 3.63it/s] 15%|█▌ | 56031/371472 [4:27:54<24:04:23, 3.64it/s] 15%|█▌ | 56032/371472 [4:27:54<23:26:37, 3.74it/s] 15%|█▌ | 56033/371472 [4:27:54<24:56:30, 3.51it/s] 15%|█▌ | 56034/371472 [4:27:55<23:45:46, 3.69it/s] 15%|█▌ | 56035/371472 [4:27:55<24:04:48, 3.64it/s] 15%|█▌ | 56036/371472 [4:27:55<23:43:17, 3.69it/s] 15%|█▌ | 56037/371472 [4:27:56<23:23:18, 3.75it/s] 15%|█▌ | 56038/371472 [4:27:56<23:09:40, 3.78it/s] 15%|█▌ | 56039/371472 [4:27:56<22:54:15, 3.83it/s] 15%|█▌ | 56040/371472 [4:27:56<23:59:34, 3.65it/s] {'loss': 4.3236, 'learning_rate': 8.646383244629274e-07, 'epoch': 2.41} + 15%|█▌ | 56040/371472 [4:27:56<23:59:34, 3.65it/s] 15%|█▌ | 56041/371472 [4:27:57<24:00:09, 3.65it/s] 15%|█▌ | 56042/371472 [4:27:57<23:14:36, 3.77it/s] 15%|█▌ | 56043/371472 [4:27:57<24:44:29, 3.54it/s] 15%|█▌ | 56044/371472 [4:27:57<25:09:28, 3.48it/s] 15%|█▌ | 56045/371472 [4:27:58<25:25:00, 3.45it/s] 15%|█▌ | 56046/371472 [4:27:58<24:50:53, 3.53it/s] 15%|█▌ | 56047/371472 [4:27:58<25:04:53, 3.49it/s] 15%|█▌ | 56048/371472 [4:27:59<24:15:49, 3.61it/s] 15%|█▌ | 56049/371472 [4:27:59<23:54:44, 3.66it/s] 15%|█▌ | 56050/371472 [4:27:59<24:08:00, 3.63it/s] 15%|█▌ | 56051/371472 [4:27:59<23:56:03, 3.66it/s] 15%|█▌ | 56052/371472 [4:28:00<27:04:23, 3.24it/s] 15%|█▌ | 56053/371472 [4:28:00<26:47:23, 3.27it/s] 15%|█▌ | 56054/371472 [4:28:00<26:34:28, 3.30it/s] 15%|█▌ | 56055/371472 [4:28:01<25:06:52, 3.49it/s] 15%|█▌ | 56056/371472 [4:28:01<25:14:07, 3.47it/s] 15%|█▌ | 56057/371472 [4:28:01<24:35:12, 3.56it/s] 15%|█▌ | 56058/371472 [4:28:01<24:31:13, 3.57it/s] 15%|█▌ | 56059/371472 [4:28:02<24:06:29, 3.63it/s] 15%|█▌ | 56060/371472 [4:28:02<24:31:58, 3.57it/s] {'loss': 4.2682, 'learning_rate': 8.645898424874486e-07, 'epoch': 2.41} + 15%|█▌ | 56060/371472 [4:28:02<24:31:58, 3.57it/s] 15%|█▌ | 56061/371472 [4:28:02<25:08:05, 3.49it/s] 15%|█▌ | 56062/371472 [4:28:03<24:00:17, 3.65it/s] 15%|█▌ | 56063/371472 [4:28:03<23:11:28, 3.78it/s] 15%|█▌ | 56064/371472 [4:28:03<27:04:39, 3.24it/s] 15%|█▌ | 56065/371472 [4:28:03<25:38:31, 3.42it/s] 15%|█▌ | 56066/371472 [4:28:04<24:32:28, 3.57it/s] 15%|█▌ | 56067/371472 [4:28:04<24:51:20, 3.52it/s] 15%|█▌ | 56068/371472 [4:28:04<24:35:05, 3.56it/s] 15%|█▌ | 56069/371472 [4:28:05<24:38:08, 3.56it/s] 15%|█▌ | 56070/371472 [4:28:05<24:09:14, 3.63it/s] 15%|█▌ | 56071/371472 [4:28:05<25:44:01, 3.40it/s] 15%|█▌ | 56072/371472 [4:28:05<24:40:17, 3.55it/s] 15%|█▌ | 56073/371472 [4:28:06<24:04:09, 3.64it/s] 15%|█▌ | 56074/371472 [4:28:06<25:06:50, 3.49it/s] 15%|█▌ | 56075/371472 [4:28:06<24:11:53, 3.62it/s] 15%|█▌ | 56076/371472 [4:28:07<23:32:01, 3.72it/s] 15%|█▌ | 56077/371472 [4:28:07<24:42:53, 3.54it/s] 15%|█▌ | 56078/371472 [4:28:07<23:58:27, 3.65it/s] 15%|█▌ | 56079/371472 [4:28:07<24:19:00, 3.60it/s] 15%|█▌ | 56080/371472 [4:28:08<24:17:50, 3.61it/s] {'loss': 4.2826, 'learning_rate': 8.645413605119695e-07, 'epoch': 2.42} + 15%|█▌ | 56080/371472 [4:28:08<24:17:50, 3.61it/s] 15%|█▌ | 56081/371472 [4:28:08<26:03:02, 3.36it/s] 15%|█▌ | 56082/371472 [4:28:08<25:20:11, 3.46it/s] 15%|█▌ | 56083/371472 [4:28:09<24:52:35, 3.52it/s] 15%|█▌ | 56084/371472 [4:28:09<26:12:09, 3.34it/s] 15%|█▌ | 56085/371472 [4:28:09<26:34:45, 3.30it/s] 15%|█▌ | 56086/371472 [4:28:09<25:19:26, 3.46it/s] 15%|█▌ | 56087/371472 [4:28:10<24:37:14, 3.56it/s] 15%|█▌ | 56088/371472 [4:28:10<23:58:44, 3.65it/s] 15%|█▌ | 56089/371472 [4:28:10<23:23:13, 3.75it/s] 15%|█▌ | 56090/371472 [4:28:10<22:59:22, 3.81it/s] 15%|█▌ | 56091/371472 [4:28:11<23:41:29, 3.70it/s] 15%|█▌ | 56092/371472 [4:28:11<24:01:47, 3.65it/s] 15%|█▌ | 56093/371472 [4:28:11<23:41:12, 3.70it/s] 15%|█▌ | 56094/371472 [4:28:12<24:15:23, 3.61it/s] 15%|█▌ | 56095/371472 [4:28:12<25:10:54, 3.48it/s] 15%|█▌ | 56096/371472 [4:28:12<27:11:58, 3.22it/s] 15%|█▌ | 56097/371472 [4:28:13<25:45:03, 3.40it/s] 15%|█▌ | 56098/371472 [4:28:13<28:14:46, 3.10it/s] 15%|█▌ | 56099/371472 [4:28:13<27:16:24, 3.21it/s] 15%|█▌ | 56100/371472 [4:28:13<26:24:09, 3.32it/s] {'loss': 4.0921, 'learning_rate': 8.644928785364907e-07, 'epoch': 2.42} + 15%|█▌ | 56100/371472 [4:28:13<26:24:09, 3.32it/s] 15%|█▌ | 56101/371472 [4:28:14<26:09:49, 3.35it/s] 15%|█▌ | 56102/371472 [4:28:14<25:55:35, 3.38it/s] 15%|█▌ | 56103/371472 [4:28:14<24:52:14, 3.52it/s] 15%|█▌ | 56104/371472 [4:28:15<26:10:34, 3.35it/s] 15%|█▌ | 56105/371472 [4:28:15<25:20:20, 3.46it/s] 15%|█▌ | 56106/371472 [4:28:15<26:04:25, 3.36it/s] 15%|█▌ | 56107/371472 [4:28:15<25:38:18, 3.42it/s] 15%|█▌ | 56108/371472 [4:28:16<25:28:37, 3.44it/s] 15%|█▌ | 56109/371472 [4:28:16<24:40:47, 3.55it/s] 15%|█▌ | 56110/371472 [4:28:16<24:27:06, 3.58it/s] 15%|█▌ | 56111/371472 [4:28:17<24:09:07, 3.63it/s] 15%|█▌ | 56112/371472 [4:28:17<23:56:23, 3.66it/s] 15%|█▌ | 56113/371472 [4:28:17<23:04:16, 3.80it/s] 15%|█▌ | 56114/371472 [4:28:17<22:53:26, 3.83it/s] 15%|█▌ | 56115/371472 [4:28:18<23:49:17, 3.68it/s] 15%|█▌ | 56116/371472 [4:28:18<24:20:33, 3.60it/s] 15%|█▌ | 56117/371472 [4:28:18<28:15:10, 3.10it/s] 15%|█▌ | 56118/371472 [4:28:19<26:33:22, 3.30it/s] 15%|█▌ | 56119/371472 [4:28:19<26:13:13, 3.34it/s] 15%|█▌ | 56120/371472 [4:28:19<25:59:45, 3.37it/s] {'loss': 4.0919, 'learning_rate': 8.644443965610118e-07, 'epoch': 2.42} + 15%|█▌ | 56120/371472 [4:28:19<25:59:45, 3.37it/s] 15%|█▌ | 56121/371472 [4:28:19<25:19:45, 3.46it/s] 15%|█▌ | 56122/371472 [4:28:20<24:56:51, 3.51it/s] 15%|█▌ | 56123/371472 [4:28:20<24:25:57, 3.59it/s] 15%|█▌ | 56124/371472 [4:28:20<24:44:56, 3.54it/s] 15%|█▌ | 56125/371472 [4:28:21<25:16:40, 3.47it/s] 15%|█▌ | 56126/371472 [4:28:21<24:52:21, 3.52it/s] 15%|█▌ | 56127/371472 [4:28:21<24:04:00, 3.64it/s] 15%|█▌ | 56128/371472 [4:28:21<24:04:35, 3.64it/s] 15%|█▌ | 56129/371472 [4:28:22<23:58:10, 3.65it/s] 15%|█▌ | 56130/371472 [4:28:22<23:33:47, 3.72it/s] 15%|█▌ | 56131/371472 [4:28:22<23:36:11, 3.71it/s] 15%|█▌ | 56132/371472 [4:28:22<23:46:40, 3.68it/s] 15%|█▌ | 56133/371472 [4:28:23<28:10:40, 3.11it/s] 15%|█▌ | 56134/371472 [4:28:23<26:15:44, 3.34it/s] 15%|█▌ | 56135/371472 [4:28:23<26:20:51, 3.32it/s] 15%|█▌ | 56136/371472 [4:28:24<27:52:55, 3.14it/s] 15%|█▌ | 56137/371472 [4:28:24<27:11:46, 3.22it/s] 15%|█▌ | 56138/371472 [4:28:24<25:32:03, 3.43it/s] 15%|█▌ | 56139/371472 [4:28:25<24:23:59, 3.59it/s] 15%|█▌ | 56140/371472 [4:28:25<28:39:54, 3.06it/s] {'loss': 3.8716, 'learning_rate': 8.64395914585533e-07, 'epoch': 2.42} + 15%|█▌ | 56140/371472 [4:28:25<28:39:54, 3.06it/s] 15%|█▌ | 56141/371472 [4:28:25<28:28:30, 3.08it/s] 15%|█▌ | 56142/371472 [4:28:26<26:12:57, 3.34it/s] 15%|█▌ | 56143/371472 [4:28:26<24:55:54, 3.51it/s] 15%|█▌ | 56144/371472 [4:28:26<24:34:58, 3.56it/s] 15%|█▌ | 56145/371472 [4:28:26<24:01:52, 3.64it/s] 15%|█▌ | 56146/371472 [4:28:27<24:46:44, 3.53it/s] 15%|█▌ | 56147/371472 [4:28:27<24:29:50, 3.58it/s] 15%|█▌ | 56148/371472 [4:28:27<24:58:00, 3.51it/s] 15%|█▌ | 56149/371472 [4:28:28<25:16:57, 3.46it/s] 15%|█▌ | 56150/371472 [4:28:28<25:08:03, 3.48it/s] 15%|█▌ | 56151/371472 [4:28:28<24:11:10, 3.62it/s] 15%|█▌ | 56152/371472 [4:28:28<24:43:58, 3.54it/s] 15%|█▌ | 56153/371472 [4:28:29<23:41:57, 3.70it/s] 15%|█▌ | 56154/371472 [4:28:29<23:25:26, 3.74it/s] 15%|█▌ | 56155/371472 [4:28:29<23:24:36, 3.74it/s] 15%|█▌ | 56156/371472 [4:28:30<25:01:16, 3.50it/s] 15%|█▌ | 56157/371472 [4:28:30<24:51:04, 3.52it/s] 15%|█▌ | 56158/371472 [4:28:30<24:04:05, 3.64it/s] 15%|█▌ | 56159/371472 [4:28:30<23:43:31, 3.69it/s] 15%|█▌ | 56160/371472 [4:28:31<23:53:17, 3.67it/s] {'loss': 4.2843, 'learning_rate': 8.64347432610054e-07, 'epoch': 2.42} + 15%|█▌ | 56160/371472 [4:28:31<23:53:17, 3.67it/s] 15%|█▌ | 56161/371472 [4:28:31<24:14:19, 3.61it/s] 15%|█▌ | 56162/371472 [4:28:31<23:38:23, 3.71it/s] 15%|█▌ | 56163/371472 [4:28:31<26:09:03, 3.35it/s] 15%|█▌ | 56164/371472 [4:28:32<24:36:22, 3.56it/s] 15%|█▌ | 56165/371472 [4:28:32<24:14:47, 3.61it/s] 15%|█▌ | 56166/371472 [4:28:32<23:04:40, 3.80it/s] 15%|█▌ | 56167/371472 [4:28:33<24:52:07, 3.52it/s] 15%|█▌ | 56168/371472 [4:28:33<25:06:21, 3.49it/s] 15%|█▌ | 56169/371472 [4:28:33<26:35:00, 3.29it/s] 15%|█▌ | 56170/371472 [4:28:33<26:01:06, 3.37it/s] 15%|█▌ | 56171/371472 [4:28:34<24:32:07, 3.57it/s] 15%|█▌ | 56172/371472 [4:28:34<24:59:45, 3.50it/s] 15%|█▌ | 56173/371472 [4:28:34<25:14:50, 3.47it/s] 15%|█▌ | 56174/371472 [4:28:35<25:03:16, 3.50it/s] 15%|█▌ | 56175/371472 [4:28:35<24:51:27, 3.52it/s] 15%|█▌ | 56176/371472 [4:28:35<23:41:30, 3.70it/s] 15%|█▌ | 56177/371472 [4:28:35<23:04:40, 3.80it/s] 15%|█▌ | 56178/371472 [4:28:36<23:19:27, 3.75it/s] 15%|█▌ | 56179/371472 [4:28:36<24:08:20, 3.63it/s] 15%|█▌ | 56180/371472 [4:28:36<24:13:17, 3.62it/s] {'loss': 4.3531, 'learning_rate': 8.642989506345751e-07, 'epoch': 2.42} + 15%|█▌ | 56180/371472 [4:28:36<24:13:17, 3.62it/s] 15%|█▌ | 56181/371472 [4:28:36<24:16:10, 3.61it/s] 15%|█▌ | 56182/371472 [4:28:37<24:59:10, 3.51it/s] 15%|█▌ | 56183/371472 [4:28:37<25:07:53, 3.48it/s] 15%|█▌ | 56184/371472 [4:28:37<24:41:34, 3.55it/s] 15%|█▌ | 56185/371472 [4:28:38<25:02:25, 3.50it/s] 15%|█▌ | 56186/371472 [4:28:38<24:20:26, 3.60it/s] 15%|█▌ | 56187/371472 [4:28:38<24:10:30, 3.62it/s] 15%|█▌ | 56188/371472 [4:28:38<24:47:30, 3.53it/s] 15%|█▌ | 56189/371472 [4:28:39<24:06:01, 3.63it/s] 15%|█▌ | 56190/371472 [4:28:39<23:35:14, 3.71it/s] 15%|█▌ | 56191/371472 [4:28:39<23:24:45, 3.74it/s] 15%|█▌ | 56192/371472 [4:28:40<24:15:13, 3.61it/s] 15%|█▌ | 56193/371472 [4:28:40<22:53:40, 3.83it/s] 15%|█▌ | 56194/371472 [4:28:40<24:08:04, 3.63it/s] 15%|█▌ | 56195/371472 [4:28:40<26:31:36, 3.30it/s] 15%|█▌ | 56196/371472 [4:28:41<24:49:06, 3.53it/s] 15%|█▌ | 56197/371472 [4:28:41<25:13:04, 3.47it/s] 15%|█▌ | 56198/371472 [4:28:41<25:22:46, 3.45it/s] 15%|█▌ | 56199/371472 [4:28:42<24:33:06, 3.57it/s] 15%|█▌ | 56200/371472 [4:28:42<24:40:16, 3.55it/s] {'loss': 3.9195, 'learning_rate': 8.642504686590963e-07, 'epoch': 2.42} + 15%|█▌ | 56200/371472 [4:28:42<24:40:16, 3.55it/s] 15%|█▌ | 56201/371472 [4:28:42<25:09:13, 3.48it/s] 15%|█▌ | 56202/371472 [4:28:42<25:02:17, 3.50it/s] 15%|█▌ | 56203/371472 [4:28:43<25:21:41, 3.45it/s] 15%|█▌ | 56204/371472 [4:28:43<25:25:37, 3.44it/s] 15%|█▌ | 56205/371472 [4:28:43<25:19:18, 3.46it/s] 15%|█▌ | 56206/371472 [4:28:44<24:29:33, 3.58it/s] 15%|█▌ | 56207/371472 [4:28:44<24:51:51, 3.52it/s] 15%|█▌ | 56208/371472 [4:28:44<24:50:34, 3.53it/s] 15%|█▌ | 56209/371472 [4:28:44<25:30:39, 3.43it/s] 15%|█▌ | 56210/371472 [4:28:45<24:44:26, 3.54it/s] 15%|█▌ | 56211/371472 [4:28:45<23:29:59, 3.73it/s] 15%|█▌ | 56212/371472 [4:28:45<24:23:14, 3.59it/s] 15%|█▌ | 56213/371472 [4:28:45<23:24:26, 3.74it/s] 15%|█▌ | 56214/371472 [4:28:46<23:56:22, 3.66it/s] 15%|█▌ | 56215/371472 [4:28:46<23:30:15, 3.73it/s] 15%|█▌ | 56216/371472 [4:28:46<23:08:52, 3.78it/s] 15%|█▌ | 56217/371472 [4:28:47<22:38:41, 3.87it/s] 15%|█▌ | 56218/371472 [4:28:47<24:48:26, 3.53it/s] 15%|█▌ | 56219/371472 [4:28:47<28:41:33, 3.05it/s] 15%|█▌ | 56220/371472 [4:28:48<27:14:28, 3.21it/s] {'loss': 4.4277, 'learning_rate': 8.642019866836174e-07, 'epoch': 2.42} + 15%|█▌ | 56220/371472 [4:28:48<27:14:28, 3.21it/s] 15%|█▌ | 56221/371472 [4:28:48<26:27:18, 3.31it/s] 15%|█▌ | 56222/371472 [4:28:48<24:55:03, 3.51it/s] 15%|█▌ | 56223/371472 [4:28:48<24:18:05, 3.60it/s] 15%|█▌ | 56224/371472 [4:28:49<23:25:13, 3.74it/s] 15%|█▌ | 56225/371472 [4:28:49<23:23:50, 3.74it/s] 15%|█▌ | 56226/371472 [4:28:49<22:48:32, 3.84it/s] 15%|█▌ | 56227/371472 [4:28:49<22:08:56, 3.95it/s] 15%|█▌ | 56228/371472 [4:28:50<22:46:33, 3.84it/s] 15%|█▌ | 56229/371472 [4:28:50<22:55:15, 3.82it/s] 15%|█▌ | 56230/371472 [4:28:50<23:12:21, 3.77it/s] 15%|█▌ | 56231/371472 [4:28:50<23:29:06, 3.73it/s] 15%|█▌ | 56232/371472 [4:28:51<23:02:05, 3.80it/s] 15%|█▌ | 56233/371472 [4:28:51<24:16:51, 3.61it/s] 15%|█▌ | 56234/371472 [4:28:51<24:16:38, 3.61it/s] 15%|█▌ | 56235/371472 [4:28:52<24:44:21, 3.54it/s] 15%|█▌ | 56236/371472 [4:28:52<24:58:50, 3.51it/s] 15%|█▌ | 56237/371472 [4:28:52<24:36:18, 3.56it/s] 15%|█▌ | 56238/371472 [4:28:52<24:22:23, 3.59it/s] 15%|█▌ | 56239/371472 [4:28:53<24:53:29, 3.52it/s] 15%|█▌ | 56240/371472 [4:28:53<24:45:32, 3.54it/s] {'loss': 4.2536, 'learning_rate': 8.641535047081385e-07, 'epoch': 2.42} + 15%|█▌ | 56240/371472 [4:28:53<24:45:32, 3.54it/s] 15%|█▌ | 56241/371472 [4:28:53<24:37:17, 3.56it/s] 15%|█▌ | 56242/371472 [4:28:54<24:11:07, 3.62it/s] 15%|█▌ | 56243/371472 [4:28:54<23:44:47, 3.69it/s] 15%|█▌ | 56244/371472 [4:28:54<26:00:27, 3.37it/s] 15%|█▌ | 56245/371472 [4:28:55<28:19:36, 3.09it/s] 15%|█▌ | 56246/371472 [4:28:55<28:21:26, 3.09it/s] 15%|█▌ | 56247/371472 [4:28:55<27:40:47, 3.16it/s] 15%|█▌ | 56248/371472 [4:28:55<26:26:11, 3.31it/s] 15%|█▌ | 56249/371472 [4:28:56<26:48:47, 3.27it/s] 15%|█▌ | 56250/371472 [4:28:56<26:27:24, 3.31it/s] 15%|█▌ | 56251/371472 [4:28:56<25:09:02, 3.48it/s] 15%|█▌ | 56252/371472 [4:28:57<25:56:56, 3.37it/s] 15%|█▌ | 56253/371472 [4:28:57<25:58:42, 3.37it/s] 15%|█▌ | 56254/371472 [4:28:57<25:03:44, 3.49it/s] 15%|█▌ | 56255/371472 [4:28:57<24:03:09, 3.64it/s] 15%|█▌ | 56256/371472 [4:28:58<24:18:59, 3.60it/s] 15%|█▌ | 56257/371472 [4:28:58<29:52:00, 2.93it/s] 15%|█▌ | 56258/371472 [4:28:58<29:11:07, 3.00it/s] 15%|█▌ | 56259/371472 [4:28:59<27:56:54, 3.13it/s] 15%|█▌ | 56260/371472 [4:28:59<27:54:04, 3.14it/s] {'loss': 4.2423, 'learning_rate': 8.641050227326595e-07, 'epoch': 2.42} + 15%|█▌ | 56260/371472 [4:28:59<27:54:04, 3.14it/s] 15%|█▌ | 56261/371472 [4:28:59<26:43:41, 3.28it/s] 15%|█▌ | 56262/371472 [4:29:00<25:58:49, 3.37it/s] 15%|█▌ | 56263/371472 [4:29:00<25:47:46, 3.39it/s] 15%|█▌ | 56264/371472 [4:29:00<25:41:58, 3.41it/s] 15%|█▌ | 56265/371472 [4:29:01<26:00:35, 3.37it/s] 15%|█▌ | 56266/371472 [4:29:01<25:19:39, 3.46it/s] 15%|█▌ | 56267/371472 [4:29:01<27:11:51, 3.22it/s] 15%|█▌ | 56268/371472 [4:29:01<25:55:15, 3.38it/s] 15%|█▌ | 56269/371472 [4:29:02<26:53:41, 3.26it/s] 15%|█▌ | 56270/371472 [4:29:02<26:16:19, 3.33it/s] 15%|█▌ | 56271/371472 [4:29:02<25:11:42, 3.48it/s] 15%|█▌ | 56272/371472 [4:29:03<24:34:24, 3.56it/s] 15%|█▌ | 56273/371472 [4:29:03<24:02:56, 3.64it/s] 15%|█▌ | 56274/371472 [4:29:03<24:20:40, 3.60it/s] 15%|█▌ | 56275/371472 [4:29:03<24:17:40, 3.60it/s] 15%|█▌ | 56276/371472 [4:29:04<23:53:16, 3.67it/s] 15%|█▌ | 56277/371472 [4:29:04<24:55:50, 3.51it/s] 15%|█▌ | 56278/371472 [4:29:04<24:01:24, 3.64it/s] 15%|█▌ | 56279/371472 [4:29:05<25:27:35, 3.44it/s] 15%|█▌ | 56280/371472 [4:29:05<24:49:06, 3.53it/s] {'loss': 4.4274, 'learning_rate': 8.640565407571807e-07, 'epoch': 2.42} + 15%|█▌ | 56280/371472 [4:29:05<24:49:06, 3.53it/s] 15%|█▌ | 56281/371472 [4:29:05<25:32:33, 3.43it/s] 15%|█▌ | 56282/371472 [4:29:05<26:58:32, 3.25it/s] 15%|█▌ | 56283/371472 [4:29:06<28:25:21, 3.08it/s] 15%|█▌ | 56284/371472 [4:29:06<27:10:45, 3.22it/s] 15%|█▌ | 56285/371472 [4:29:06<26:12:56, 3.34it/s] 15%|█▌ | 56286/371472 [4:29:07<25:02:47, 3.50it/s] 15%|█▌ | 56287/371472 [4:29:07<24:50:14, 3.52it/s] 15%|█▌ | 56288/371472 [4:29:07<24:42:48, 3.54it/s] 15%|█▌ | 56289/371472 [4:29:07<24:36:25, 3.56it/s] 15%|█▌ | 56290/371472 [4:29:08<24:17:24, 3.60it/s] 15%|█▌ | 56291/371472 [4:29:08<23:45:57, 3.68it/s] 15%|█▌ | 56292/371472 [4:29:08<23:59:55, 3.65it/s] 15%|█▌ | 56293/371472 [4:29:09<23:56:40, 3.66it/s] 15%|█▌ | 56294/371472 [4:29:09<23:26:50, 3.73it/s] 15%|█▌ | 56295/371472 [4:29:09<22:29:15, 3.89it/s] 15%|█▌ | 56296/371472 [4:29:09<21:51:05, 4.01it/s] 15%|█▌ | 56297/371472 [4:29:10<22:11:36, 3.94it/s] 15%|█▌ | 56298/371472 [4:29:10<22:13:07, 3.94it/s] 15%|█▌ | 56299/371472 [4:29:10<23:03:28, 3.80it/s] 15%|█▌ | 56300/371472 [4:29:10<24:56:51, 3.51it/s] {'loss': 4.3172, 'learning_rate': 8.640080587817018e-07, 'epoch': 2.42} + 15%|█▌ | 56300/371472 [4:29:10<24:56:51, 3.51it/s] 15%|█▌ | 56301/371472 [4:29:11<24:30:39, 3.57it/s] 15%|█▌ | 56302/371472 [4:29:11<24:26:48, 3.58it/s] 15%|█▌ | 56303/371472 [4:29:11<25:10:03, 3.48it/s] 15%|█▌ | 56304/371472 [4:29:12<25:53:56, 3.38it/s] 15%|█▌ | 56305/371472 [4:29:12<24:09:49, 3.62it/s] 15%|█▌ | 56306/371472 [4:29:12<25:26:23, 3.44it/s] 15%|█▌ | 56307/371472 [4:29:12<24:35:53, 3.56it/s] 15%|█▌ | 56308/371472 [4:29:13<24:25:43, 3.58it/s] 15%|█▌ | 56309/371472 [4:29:13<25:16:20, 3.46it/s] 15%|█▌ | 56310/371472 [4:29:13<25:22:53, 3.45it/s] 15%|█▌ | 56311/371472 [4:29:14<29:24:08, 2.98it/s] 15%|█▌ | 56312/371472 [4:29:14<27:20:56, 3.20it/s] 15%|█▌ | 56313/371472 [4:29:14<27:59:56, 3.13it/s] 15%|█▌ | 56314/371472 [4:29:15<26:58:51, 3.24it/s] 15%|█▌ | 56315/371472 [4:29:15<27:29:21, 3.18it/s] 15%|█▌ | 56316/371472 [4:29:15<25:58:00, 3.37it/s] 15%|█▌ | 56317/371472 [4:29:15<26:00:05, 3.37it/s] 15%|█▌ | 56318/371472 [4:29:16<25:40:28, 3.41it/s] 15%|█▌ | 56319/371472 [4:29:16<25:58:40, 3.37it/s] 15%|█▌ | 56320/371472 [4:29:16<25:29:49, 3.43it/s] {'loss': 4.1643, 'learning_rate': 8.639595768062229e-07, 'epoch': 2.43} + 15%|█▌ | 56320/371472 [4:29:16<25:29:49, 3.43it/s] 15%|█▌ | 56321/371472 [4:29:17<25:51:06, 3.39it/s] 15%|█▌ | 56322/371472 [4:29:17<25:24:04, 3.45it/s] 15%|█▌ | 56323/371472 [4:29:17<24:23:11, 3.59it/s] 15%|█▌ | 56324/371472 [4:29:17<25:25:28, 3.44it/s] 15%|█▌ | 56325/371472 [4:29:18<24:57:14, 3.51it/s] 15%|█▌ | 56326/371472 [4:29:18<25:43:48, 3.40it/s] 15%|█▌ | 56327/371472 [4:29:18<25:17:40, 3.46it/s] 15%|█▌ | 56328/371472 [4:29:19<23:54:00, 3.66it/s] 15%|█▌ | 56329/371472 [4:29:19<23:51:00, 3.67it/s] 15%|█▌ | 56330/371472 [4:29:19<23:50:20, 3.67it/s] 15%|█▌ | 56331/371472 [4:29:19<23:44:37, 3.69it/s] 15%|█▌ | 56332/371472 [4:29:20<23:28:06, 3.73it/s] 15%|█▌ | 56333/371472 [4:29:20<24:00:39, 3.65it/s] 15%|█▌ | 56334/371472 [4:29:20<24:38:48, 3.55it/s] 15%|█▌ | 56335/371472 [4:29:20<24:07:50, 3.63it/s] 15%|█▌ | 56336/371472 [4:29:21<24:37:18, 3.56it/s] 15%|█▌ | 56337/371472 [4:29:21<23:41:52, 3.69it/s] 15%|█▌ | 56338/371472 [4:29:21<23:24:43, 3.74it/s] 15%|█▌ | 56339/371472 [4:29:22<23:40:27, 3.70it/s] 15%|█▌ | 56340/371472 [4:29:22<23:33:22, 3.72it/s] {'loss': 4.1716, 'learning_rate': 8.63911094830744e-07, 'epoch': 2.43} + 15%|█▌ | 56340/371472 [4:29:22<23:33:22, 3.72it/s] 15%|█▌ | 56341/371472 [4:29:22<24:22:12, 3.59it/s] 15%|█▌ | 56342/371472 [4:29:22<24:33:50, 3.56it/s] 15%|█▌ | 56343/371472 [4:29:23<23:29:34, 3.73it/s] 15%|█▌ | 56344/371472 [4:29:23<23:14:25, 3.77it/s] 15%|█▌ | 56345/371472 [4:29:23<22:34:10, 3.88it/s] 15%|█▌ | 56346/371472 [4:29:23<22:53:41, 3.82it/s] 15%|█▌ | 56347/371472 [4:29:24<23:03:41, 3.80it/s] 15%|█▌ | 56348/371472 [4:29:24<23:10:09, 3.78it/s] 15%|█▌ | 56349/371472 [4:29:24<23:30:12, 3.72it/s] 15%|█▌ | 56350/371472 [4:29:25<23:42:39, 3.69it/s] 15%|█▌ | 56351/371472 [4:29:25<23:24:17, 3.74it/s] 15%|█▌ | 56352/371472 [4:29:25<24:36:26, 3.56it/s] 15%|█▌ | 56353/371472 [4:29:25<24:05:48, 3.63it/s] 15%|█▌ | 56354/371472 [4:29:26<23:21:07, 3.75it/s] 15%|█▌ | 56355/371472 [4:29:26<23:31:15, 3.72it/s] 15%|█▌ | 56356/371472 [4:29:26<22:53:42, 3.82it/s] 15%|█▌ | 56357/371472 [4:29:26<23:38:45, 3.70it/s] 15%|█▌ | 56358/371472 [4:29:27<25:17:36, 3.46it/s] 15%|█▌ | 56359/371472 [4:29:27<26:27:08, 3.31it/s] 15%|█▌ | 56360/371472 [4:29:27<25:30:18, 3.43it/s] {'loss': 4.2735, 'learning_rate': 8.638626128552652e-07, 'epoch': 2.43} + 15%|█▌ | 56360/371472 [4:29:27<25:30:18, 3.43it/s] 15%|█▌ | 56361/371472 [4:29:28<24:52:56, 3.52it/s] 15%|█▌ | 56362/371472 [4:29:28<26:27:01, 3.31it/s] 15%|█▌ | 56363/371472 [4:29:28<25:44:12, 3.40it/s] 15%|█▌ | 56364/371472 [4:29:29<27:42:03, 3.16it/s] 15%|█▌ | 56365/371472 [4:29:29<26:34:56, 3.29it/s] 15%|█▌ | 56366/371472 [4:29:29<26:33:35, 3.30it/s] 15%|█▌ | 56367/371472 [4:29:29<25:02:47, 3.49it/s] 15%|█▌ | 56368/371472 [4:29:30<24:49:00, 3.53it/s] 15%|█▌ | 56369/371472 [4:29:30<24:21:46, 3.59it/s] 15%|█▌ | 56370/371472 [4:29:30<24:17:01, 3.60it/s] 15%|█▌ | 56371/371472 [4:29:31<26:22:30, 3.32it/s] 15%|█▌ | 56372/371472 [4:29:31<25:18:01, 3.46it/s] 15%|█▌ | 56373/371472 [4:29:31<26:11:47, 3.34it/s] 15%|█▌ | 56374/371472 [4:29:31<25:07:51, 3.48it/s] 15%|█▌ | 56375/371472 [4:29:32<25:11:15, 3.47it/s] 15%|█▌ | 56376/371472 [4:29:32<25:02:34, 3.50it/s] 15%|█▌ | 56377/371472 [4:29:32<24:01:17, 3.64it/s] 15%|█▌ | 56378/371472 [4:29:33<23:28:06, 3.73it/s] 15%|█▌ | 56379/371472 [4:29:33<24:50:48, 3.52it/s] 15%|█▌ | 56380/371472 [4:29:33<24:03:01, 3.64it/s] {'loss': 4.2517, 'learning_rate': 8.638141308797861e-07, 'epoch': 2.43} + 15%|█▌ | 56380/371472 [4:29:33<24:03:01, 3.64it/s] 15%|█▌ | 56381/371472 [4:29:33<24:06:30, 3.63it/s] 15%|█▌ | 56382/371472 [4:29:34<25:39:54, 3.41it/s] 15%|█▌ | 56383/371472 [4:29:34<25:06:24, 3.49it/s] 15%|█▌ | 56384/371472 [4:29:34<25:53:24, 3.38it/s] 15%|█▌ | 56385/371472 [4:29:35<26:26:21, 3.31it/s] 15%|█▌ | 56386/371472 [4:29:35<28:45:23, 3.04it/s] 15%|█▌ | 56387/371472 [4:29:35<26:53:46, 3.25it/s] 15%|█▌ | 56388/371472 [4:29:36<26:05:41, 3.35it/s] 15%|█▌ | 56389/371472 [4:29:36<26:13:38, 3.34it/s] 15%|█▌ | 56390/371472 [4:29:36<25:07:32, 3.48it/s] 15%|█▌ | 56391/371472 [4:29:36<24:25:45, 3.58it/s] 15%|█▌ | 56392/371472 [4:29:37<24:23:46, 3.59it/s] 15%|█▌ | 56393/371472 [4:29:37<24:11:31, 3.62it/s] 15%|█▌ | 56394/371472 [4:29:37<23:44:31, 3.69it/s] 15%|█▌ | 56395/371472 [4:29:37<24:18:35, 3.60it/s] 15%|█▌ | 56396/371472 [4:29:38<23:30:21, 3.72it/s] 15%|█▌ | 56397/371472 [4:29:38<26:08:26, 3.35it/s] 15%|█▌ | 56398/371472 [4:29:38<25:48:59, 3.39it/s] 15%|█▌ | 56399/371472 [4:29:39<25:58:37, 3.37it/s] 15%|█▌ | 56400/371472 [4:29:39<24:51:50, 3.52it/s] {'loss': 4.2051, 'learning_rate': 8.637656489043073e-07, 'epoch': 2.43} + 15%|█▌ | 56400/371472 [4:29:39<24:51:50, 3.52it/s] 15%|█▌ | 56401/371472 [4:29:39<24:39:55, 3.55it/s] 15%|█▌ | 56402/371472 [4:29:39<24:21:57, 3.59it/s] 15%|█▌ | 56403/371472 [4:29:40<23:45:42, 3.68it/s] 15%|█▌ | 56404/371472 [4:29:40<23:37:50, 3.70it/s] 15%|█▌ | 56405/371472 [4:29:40<23:29:39, 3.73it/s] 15%|█▌ | 56406/371472 [4:29:41<24:31:09, 3.57it/s] 15%|█▌ | 56407/371472 [4:29:41<24:32:02, 3.57it/s] 15%|█▌ | 56408/371472 [4:29:41<24:25:26, 3.58it/s] 15%|█▌ | 56409/371472 [4:29:41<24:10:32, 3.62it/s] 15%|█▌ | 56410/371472 [4:29:42<23:28:38, 3.73it/s] 15%|█▌ | 56411/371472 [4:29:42<24:36:28, 3.56it/s] 15%|█▌ | 56412/371472 [4:29:42<23:33:03, 3.72it/s] 15%|█▌ | 56413/371472 [4:29:42<23:31:11, 3.72it/s] 15%|█▌ | 56414/371472 [4:29:43<22:55:49, 3.82it/s] 15%|█▌ | 56415/371472 [4:29:43<23:26:56, 3.73it/s] 15%|█▌ | 56416/371472 [4:29:43<23:32:25, 3.72it/s] 15%|█▌ | 56417/371472 [4:29:44<24:26:12, 3.58it/s] 15%|█▌ | 56418/371472 [4:29:44<23:36:40, 3.71it/s] 15%|█▌ | 56419/371472 [4:29:44<23:13:53, 3.77it/s] 15%|█▌ | 56420/371472 [4:29:44<22:32:32, 3.88it/s] {'loss': 4.1685, 'learning_rate': 8.637171669288284e-07, 'epoch': 2.43} + 15%|█▌ | 56420/371472 [4:29:44<22:32:32, 3.88it/s] 15%|█▌ | 56421/371472 [4:29:45<22:08:47, 3.95it/s] 15%|█▌ | 56422/371472 [4:29:45<24:00:50, 3.64it/s] 15%|█▌ | 56423/371472 [4:29:45<24:10:55, 3.62it/s] 15%|█▌ | 56424/371472 [4:29:45<23:35:14, 3.71it/s] 15%|█▌ | 56425/371472 [4:29:46<23:30:23, 3.72it/s] 15%|█▌ | 56426/371472 [4:29:46<23:13:06, 3.77it/s] 15%|█▌ | 56427/371472 [4:29:46<23:23:14, 3.74it/s] 15%|█▌ | 56428/371472 [4:29:46<23:46:12, 3.68it/s] 15%|█▌ | 56429/371472 [4:29:47<24:15:24, 3.61it/s] 15%|█▌ | 56430/371472 [4:29:47<25:08:33, 3.48it/s] 15%|█▌ | 56431/371472 [4:29:47<23:57:17, 3.65it/s] 15%|█▌ | 56432/371472 [4:29:48<24:03:39, 3.64it/s] 15%|█▌ | 56433/371472 [4:29:48<23:13:35, 3.77it/s] 15%|█▌ | 56434/371472 [4:29:48<23:18:48, 3.75it/s] 15%|█▌ | 56435/371472 [4:29:48<25:59:14, 3.37it/s] 15%|█▌ | 56436/371472 [4:29:49<25:26:40, 3.44it/s] 15%|█▌ | 56437/371472 [4:29:49<24:52:07, 3.52it/s] 15%|█▌ | 56438/371472 [4:29:49<24:12:36, 3.61it/s] 15%|█▌ | 56439/371472 [4:29:50<24:45:21, 3.53it/s] 15%|█▌ | 56440/371472 [4:29:50<24:34:55, 3.56it/s] {'loss': 4.0869, 'learning_rate': 8.636686849533496e-07, 'epoch': 2.43} + 15%|█▌ | 56440/371472 [4:29:50<24:34:55, 3.56it/s] 15%|█▌ | 56441/371472 [4:29:50<23:28:03, 3.73it/s] 15%|█▌ | 56442/371472 [4:29:50<23:54:03, 3.66it/s] 15%|█▌ | 56443/371472 [4:29:51<24:02:36, 3.64it/s] 15%|█▌ | 56444/371472 [4:29:51<23:12:47, 3.77it/s] 15%|█▌ | 56445/371472 [4:29:51<23:47:16, 3.68it/s] 15%|█▌ | 56446/371472 [4:29:51<23:18:49, 3.75it/s] 15%|█▌ | 56447/371472 [4:29:52<22:22:30, 3.91it/s] 15%|█▌ | 56448/371472 [4:29:52<22:53:44, 3.82it/s] 15%|█▌ | 56449/371472 [4:29:52<23:43:56, 3.69it/s] 15%|█▌ | 56450/371472 [4:29:52<23:01:25, 3.80it/s] 15%|█▌ | 56451/371472 [4:29:53<25:32:49, 3.43it/s] 15%|█▌ | 56452/371472 [4:29:53<24:24:45, 3.58it/s] 15%|█▌ | 56453/371472 [4:29:53<26:59:21, 3.24it/s] 15%|█▌ | 56454/371472 [4:29:54<25:11:16, 3.47it/s] 15%|█▌ | 56455/371472 [4:29:54<23:56:06, 3.66it/s] 15%|█▌ | 56456/371472 [4:29:54<23:32:16, 3.72it/s] 15%|█▌ | 56457/371472 [4:29:55<24:34:59, 3.56it/s] 15%|█▌ | 56458/371472 [4:29:55<26:56:17, 3.25it/s] 15%|█▌ | 56459/371472 [4:29:55<26:14:58, 3.33it/s] 15%|█▌ | 56460/371472 [4:29:55<25:19:26, 3.46it/s] {'loss': 4.0233, 'learning_rate': 8.636202029778706e-07, 'epoch': 2.43} + 15%|█▌ | 56460/371472 [4:29:55<25:19:26, 3.46it/s] 15%|█▌ | 56461/371472 [4:29:56<24:01:12, 3.64it/s] 15%|█▌ | 56462/371472 [4:29:56<24:05:21, 3.63it/s] 15%|█▌ | 56463/371472 [4:29:56<24:59:42, 3.50it/s] 15%|█▌ | 56464/371472 [4:29:57<24:28:13, 3.58it/s] 15%|█▌ | 56465/371472 [4:29:57<23:36:16, 3.71it/s] 15%|█▌ | 56466/371472 [4:29:57<23:53:57, 3.66it/s] 15%|█▌ | 56467/371472 [4:29:57<23:13:17, 3.77it/s] 15%|█▌ | 56468/371472 [4:29:58<25:08:23, 3.48it/s] 15%|█▌ | 56469/371472 [4:29:58<24:37:34, 3.55it/s] 15%|█▌ | 56470/371472 [4:29:58<24:28:49, 3.57it/s] 15%|█▌ | 56471/371472 [4:29:59<25:57:30, 3.37it/s] 15%|█▌ | 56472/371472 [4:29:59<26:21:45, 3.32it/s] 15%|█▌ | 56473/371472 [4:29:59<25:31:37, 3.43it/s] 15%|█▌ | 56474/371472 [4:29:59<25:08:03, 3.48it/s] 15%|█▌ | 56475/371472 [4:30:00<24:39:54, 3.55it/s] 15%|█▌ | 56476/371472 [4:30:00<24:28:28, 3.58it/s] 15%|█▌ | 56477/371472 [4:30:00<24:12:59, 3.61it/s] 15%|█▌ | 56478/371472 [4:30:00<23:54:41, 3.66it/s] 15%|█▌ | 56479/371472 [4:30:01<23:16:13, 3.76it/s] 15%|█▌ | 56480/371472 [4:30:01<23:26:21, 3.73it/s] {'loss': 4.2422, 'learning_rate': 8.635717210023918e-07, 'epoch': 2.43} + 15%|█▌ | 56480/371472 [4:30:01<23:26:21, 3.73it/s] 15%|█▌ | 56481/371472 [4:30:01<23:00:44, 3.80it/s] 15%|█▌ | 56482/371472 [4:30:01<23:33:22, 3.71it/s] 15%|█▌ | 56483/371472 [4:30:02<23:36:11, 3.71it/s] 15%|█▌ | 56484/371472 [4:30:02<23:48:35, 3.67it/s] 15%|█▌ | 56485/371472 [4:30:02<22:57:52, 3.81it/s] 15%|█▌ | 56486/371472 [4:30:03<22:35:05, 3.87it/s] 15%|█▌ | 56487/371472 [4:30:03<22:40:50, 3.86it/s] 15%|█▌ | 56488/371472 [4:30:03<22:20:05, 3.92it/s] 15%|█▌ | 56489/371472 [4:30:03<26:21:11, 3.32it/s] 15%|█▌ | 56490/371472 [4:30:04<25:54:48, 3.38it/s] 15%|█▌ | 56491/371472 [4:30:04<25:20:06, 3.45it/s] 15%|█▌ | 56492/371472 [4:30:04<24:29:55, 3.57it/s] 15%|█▌ | 56493/371472 [4:30:05<24:18:39, 3.60it/s] 15%|█▌ | 56494/371472 [4:30:05<23:26:54, 3.73it/s] 15%|█▌ | 56495/371472 [4:30:05<24:02:41, 3.64it/s] 15%|█▌ | 56496/371472 [4:30:05<23:50:11, 3.67it/s] 15%|█▌ | 56497/371472 [4:30:06<26:42:17, 3.28it/s] 15%|█▌ | 56498/371472 [4:30:06<25:36:22, 3.42it/s] 15%|█▌ | 56499/371472 [4:30:06<24:20:53, 3.59it/s] 15%|█▌ | 56500/371472 [4:30:07<25:55:09, 3.38it/s] {'loss': 4.2122, 'learning_rate': 8.635232390269128e-07, 'epoch': 2.43} + 15%|█▌ | 56500/371472 [4:30:07<25:55:09, 3.38it/s] 15%|█▌ | 56501/371472 [4:30:07<25:15:56, 3.46it/s] 15%|█▌ | 56502/371472 [4:30:07<25:03:14, 3.49it/s] 15%|█▌ | 56503/371472 [4:30:07<24:32:31, 3.56it/s] 15%|█▌ | 56504/371472 [4:30:08<24:18:39, 3.60it/s] 15%|█▌ | 56505/371472 [4:30:08<24:53:34, 3.51it/s] 15%|█▌ | 56506/371472 [4:30:08<25:25:44, 3.44it/s] 15%|█▌ | 56507/371472 [4:30:09<24:51:23, 3.52it/s] 15%|█▌ | 56508/371472 [4:30:09<24:57:53, 3.50it/s] 15%|█▌ | 56509/371472 [4:30:09<24:02:36, 3.64it/s] 15%|█▌ | 56510/371472 [4:30:09<25:39:35, 3.41it/s] 15%|█▌ | 56511/371472 [4:30:10<27:59:31, 3.13it/s] 15%|█▌ | 56512/371472 [4:30:10<28:13:35, 3.10it/s] 15%|█▌ | 56513/371472 [4:30:10<26:48:01, 3.26it/s] 15%|█▌ | 56514/371472 [4:30:11<27:22:04, 3.20it/s] 15%|█▌ | 56515/371472 [4:30:11<27:42:12, 3.16it/s] 15%|█▌ | 56516/371472 [4:30:11<26:56:38, 3.25it/s] 15%|█▌ | 56517/371472 [4:30:12<25:27:22, 3.44it/s] 15%|█▌ | 56518/371472 [4:30:12<26:24:18, 3.31it/s] 15%|█▌ | 56519/371472 [4:30:12<25:17:38, 3.46it/s] 15%|█▌ | 56520/371472 [4:30:12<25:07:39, 3.48it/s] {'loss': 4.0499, 'learning_rate': 8.634747570514339e-07, 'epoch': 2.43} + 15%|█▌ | 56520/371472 [4:30:12<25:07:39, 3.48it/s] 15%|█▌ | 56521/371472 [4:30:13<24:27:30, 3.58it/s] 15%|█▌ | 56522/371472 [4:30:13<25:45:44, 3.40it/s] 15%|█▌ | 56523/371472 [4:30:13<28:31:47, 3.07it/s] 15%|█▌ | 56524/371472 [4:30:14<27:04:03, 3.23it/s] 15%|█▌ | 56525/371472 [4:30:14<25:46:34, 3.39it/s] 15%|█▌ | 56526/371472 [4:30:14<24:48:18, 3.53it/s] 15%|█▌ | 56527/371472 [4:30:14<24:11:39, 3.62it/s] 15%|█▌ | 56528/371472 [4:30:15<24:17:30, 3.60it/s] 15%|█▌ | 56529/371472 [4:30:15<25:29:09, 3.43it/s] 15%|█▌ | 56530/371472 [4:30:15<24:59:15, 3.50it/s] 15%|█▌ | 56531/371472 [4:30:16<25:29:13, 3.43it/s] 15%|█▌ | 56532/371472 [4:30:16<26:08:08, 3.35it/s] 15%|█▌ | 56533/371472 [4:30:16<24:58:08, 3.50it/s] 15%|█▌ | 56534/371472 [4:30:17<24:57:42, 3.50it/s] 15%|█▌ | 56535/371472 [4:30:17<24:31:06, 3.57it/s] 15%|█▌ | 56536/371472 [4:30:17<23:39:50, 3.70it/s] 15%|█▌ | 56537/371472 [4:30:17<23:00:49, 3.80it/s] 15%|█▌ | 56538/371472 [4:30:18<23:58:13, 3.65it/s] 15%|█▌ | 56539/371472 [4:30:18<24:48:21, 3.53it/s] 15%|█▌ | 56540/371472 [4:30:18<25:33:34, 3.42it/s] {'loss': 4.3853, 'learning_rate': 8.63426275075955e-07, 'epoch': 2.44} + 15%|█▌ | 56540/371472 [4:30:18<25:33:34, 3.42it/s] 15%|█▌ | 56541/371472 [4:30:19<26:36:09, 3.29it/s] 15%|█▌ | 56542/371472 [4:30:19<25:27:22, 3.44it/s] 15%|█▌ | 56543/371472 [4:30:19<25:43:20, 3.40it/s] 15%|█▌ | 56544/371472 [4:30:19<24:51:26, 3.52it/s] 15%|█▌ | 56545/371472 [4:30:20<23:54:41, 3.66it/s] 15%|█▌ | 56546/371472 [4:30:20<24:13:30, 3.61it/s] 15%|█▌ | 56547/371472 [4:30:20<23:50:46, 3.67it/s] 15%|█▌ | 56548/371472 [4:30:21<27:42:47, 3.16it/s] 15%|█▌ | 56549/371472 [4:30:21<28:45:10, 3.04it/s] 15%|█▌ | 56550/371472 [4:30:21<27:07:26, 3.23it/s] 15%|█▌ | 56551/371472 [4:30:21<25:39:53, 3.41it/s] 15%|█▌ | 56552/371472 [4:30:22<24:48:42, 3.53it/s] 15%|█▌ | 56553/371472 [4:30:22<25:18:28, 3.46it/s] 15%|█▌ | 56554/371472 [4:30:22<24:51:45, 3.52it/s] 15%|█▌ | 56555/371472 [4:30:23<26:45:31, 3.27it/s] 15%|█▌ | 56556/371472 [4:30:23<25:04:15, 3.49it/s] 15%|█▌ | 56557/371472 [4:30:23<24:22:52, 3.59it/s] 15%|█▌ | 56558/371472 [4:30:23<24:47:53, 3.53it/s] 15%|█▌ | 56559/371472 [4:30:24<25:02:54, 3.49it/s] 15%|█▌ | 56560/371472 [4:30:24<27:18:03, 3.20it/s] {'loss': 4.4201, 'learning_rate': 8.633777931004762e-07, 'epoch': 2.44} + 15%|█▌ | 56560/371472 [4:30:24<27:18:03, 3.20it/s] 15%|█▌ | 56561/371472 [4:30:24<27:52:09, 3.14it/s] 15%|█▌ | 56562/371472 [4:30:25<28:01:32, 3.12it/s] 15%|█▌ | 56563/371472 [4:30:25<28:08:31, 3.11it/s] 15%|█▌ | 56564/371472 [4:30:25<27:25:48, 3.19it/s] 15%|█▌ | 56565/371472 [4:30:26<26:09:46, 3.34it/s] 15%|█▌ | 56566/371472 [4:30:26<25:30:42, 3.43it/s] 15%|█▌ | 56567/371472 [4:30:26<24:53:44, 3.51it/s] 15%|█▌ | 56568/371472 [4:30:27<26:28:49, 3.30it/s] 15%|█▌ | 56569/371472 [4:30:27<24:49:11, 3.52it/s] 15%|█▌ | 56570/371472 [4:30:27<24:15:18, 3.61it/s] 15%|█▌ | 56571/371472 [4:30:27<25:16:59, 3.46it/s] 15%|█▌ | 56572/371472 [4:30:28<24:12:45, 3.61it/s] 15%|█▌ | 56573/371472 [4:30:28<26:25:47, 3.31it/s] 15%|█▌ | 56574/371472 [4:30:28<27:44:40, 3.15it/s] 15%|█▌ | 56575/371472 [4:30:29<26:07:43, 3.35it/s] 15%|█▌ | 56576/371472 [4:30:29<25:37:07, 3.41it/s] 15%|█▌ | 56577/371472 [4:30:29<24:35:30, 3.56it/s] 15%|█▌ | 56578/371472 [4:30:29<24:17:21, 3.60it/s] 15%|█▌ | 56579/371472 [4:30:30<23:44:14, 3.68it/s] 15%|█▌ | 56580/371472 [4:30:30<23:50:13, 3.67it/s] {'loss': 4.1895, 'learning_rate': 8.633293111249973e-07, 'epoch': 2.44} + 15%|█▌ | 56580/371472 [4:30:30<23:50:13, 3.67it/s] 15%|█▌ | 56581/371472 [4:30:30<25:55:16, 3.37it/s] 15%|█▌ | 56582/371472 [4:30:31<25:14:33, 3.47it/s] 15%|█▌ | 56583/371472 [4:30:31<24:05:35, 3.63it/s] 15%|█▌ | 56584/371472 [4:30:31<23:37:31, 3.70it/s] 15%|█▌ | 56585/371472 [4:30:31<23:58:34, 3.65it/s] 15%|█▌ | 56586/371472 [4:30:32<23:33:52, 3.71it/s] 15%|█▌ | 56587/371472 [4:30:32<24:09:16, 3.62it/s] 15%|█▌ | 56588/371472 [4:30:32<24:00:31, 3.64it/s] 15%|█▌ | 56589/371472 [4:30:32<24:13:28, 3.61it/s] 15%|█▌ | 56590/371472 [4:30:33<23:13:34, 3.77it/s] 15%|█▌ | 56591/371472 [4:30:33<24:18:04, 3.60it/s] 15%|█▌ | 56592/371472 [4:30:33<23:58:47, 3.65it/s] 15%|█▌ | 56593/371472 [4:30:34<25:29:02, 3.43it/s] 15%|█▌ | 56594/371472 [4:30:34<24:46:33, 3.53it/s] 15%|█▌ | 56595/371472 [4:30:34<24:33:41, 3.56it/s] 15%|█▌ | 56596/371472 [4:30:34<27:28:47, 3.18it/s] 15%|█▌ | 56597/371472 [4:30:35<27:48:17, 3.15it/s] 15%|█▌ | 56598/371472 [4:30:35<26:13:47, 3.33it/s] 15%|█▌ | 56599/371472 [4:30:36<29:28:27, 2.97it/s] 15%|█▌ | 56600/371472 [4:30:36<27:08:17, 3.22it/s] {'loss': 4.3688, 'learning_rate': 8.632808291495185e-07, 'epoch': 2.44} + 15%|█▌ | 56600/371472 [4:30:36<27:08:17, 3.22it/s] 15%|█▌ | 56601/371472 [4:30:36<25:54:27, 3.38it/s] 15%|█▌ | 56602/371472 [4:30:36<24:50:11, 3.52it/s] 15%|█▌ | 56603/371472 [4:30:37<23:52:53, 3.66it/s] 15%|█▌ | 56604/371472 [4:30:37<22:52:53, 3.82it/s] 15%|█▌ | 56605/371472 [4:30:37<22:26:07, 3.90it/s] 15%|█▌ | 56606/371472 [4:30:37<22:05:00, 3.96it/s] 15%|█▌ | 56607/371472 [4:30:38<23:57:13, 3.65it/s] 15%|█▌ | 56608/371472 [4:30:38<24:15:59, 3.60it/s] 15%|█▌ | 56609/371472 [4:30:38<24:18:53, 3.60it/s] 15%|█▌ | 56610/371472 [4:30:38<24:03:28, 3.64it/s] 15%|█▌ | 56611/371472 [4:30:39<25:21:51, 3.45it/s] 15%|█▌ | 56612/371472 [4:30:39<24:48:54, 3.52it/s] 15%|█▌ | 56613/371472 [4:30:39<25:12:11, 3.47it/s] 15%|█▌ | 56614/371472 [4:30:40<25:55:24, 3.37it/s] 15%|█▌ | 56615/371472 [4:30:40<24:57:38, 3.50it/s] 15%|█▌ | 56616/371472 [4:30:40<24:24:42, 3.58it/s] 15%|█▌ | 56617/371472 [4:30:40<24:48:17, 3.53it/s] 15%|█▌ | 56618/371472 [4:30:41<23:53:11, 3.66it/s] 15%|█▌ | 56619/371472 [4:30:41<23:55:48, 3.65it/s] 15%|█▌ | 56620/371472 [4:30:41<23:32:17, 3.72it/s] {'loss': 4.3445, 'learning_rate': 8.632323471740395e-07, 'epoch': 2.44} + 15%|█▌ | 56620/371472 [4:30:41<23:32:17, 3.72it/s] 15%|█▌ | 56621/371472 [4:30:41<23:54:49, 3.66it/s] 15%|█▌ | 56622/371472 [4:30:42<23:54:38, 3.66it/s] 15%|█▌ | 56623/371472 [4:30:42<24:45:31, 3.53it/s] 15%|█▌ | 56624/371472 [4:30:42<26:12:25, 3.34it/s] 15%|█▌ | 56625/371472 [4:30:43<26:00:10, 3.36it/s] 15%|█▌ | 56626/371472 [4:30:43<24:19:57, 3.59it/s] 15%|█▌ | 56627/371472 [4:30:43<24:55:35, 3.51it/s] 15%|█▌ | 56628/371472 [4:30:44<24:45:43, 3.53it/s] 15%|█▌ | 56629/371472 [4:30:44<24:15:48, 3.60it/s] 15%|█▌ | 56630/371472 [4:30:44<23:54:16, 3.66it/s] 15%|█▌ | 56631/371472 [4:30:44<24:08:37, 3.62it/s] 15%|█▌ | 56632/371472 [4:30:45<23:29:26, 3.72it/s] 15%|█▌ | 56633/371472 [4:30:45<25:16:41, 3.46it/s] 15%|█▌ | 56634/371472 [4:30:45<24:04:29, 3.63it/s] 15%|█▌ | 56635/371472 [4:30:45<23:40:03, 3.70it/s] 15%|█▌ | 56636/371472 [4:30:46<23:29:56, 3.72it/s] 15%|█▌ | 56637/371472 [4:30:46<24:36:48, 3.55it/s] 15%|█▌ | 56638/371472 [4:30:46<26:09:08, 3.34it/s] 15%|█▌ | 56639/371472 [4:30:47<27:13:02, 3.21it/s] 15%|█▌ | 56640/371472 [4:30:47<25:31:21, 3.43it/s] {'loss': 4.0405, 'learning_rate': 8.631838651985605e-07, 'epoch': 2.44} + 15%|█▌ | 56640/371472 [4:30:47<25:31:21, 3.43it/s] 15%|█▌ | 56641/371472 [4:30:47<25:45:04, 3.40it/s] 15%|█▌ | 56642/371472 [4:30:47<24:25:50, 3.58it/s] 15%|█▌ | 56643/371472 [4:30:48<25:50:16, 3.38it/s] 15%|█▌ | 56644/371472 [4:30:48<26:01:08, 3.36it/s] 15%|█▌ | 56645/371472 [4:30:48<28:32:09, 3.06it/s] 15%|█▌ | 56646/371472 [4:30:49<26:17:25, 3.33it/s] 15%|█▌ | 56647/371472 [4:30:49<26:09:27, 3.34it/s] 15%|█▌ | 56648/371472 [4:30:49<25:19:43, 3.45it/s] 15%|█▌ | 56649/371472 [4:30:50<25:38:15, 3.41it/s] 15%|█▌ | 56650/371472 [4:30:50<25:03:17, 3.49it/s] 15%|█▌ | 56651/371472 [4:30:50<24:35:47, 3.56it/s] 15%|█▌ | 56652/371472 [4:30:50<25:48:03, 3.39it/s] 15%|█▌ | 56653/371472 [4:30:51<25:28:54, 3.43it/s] 15%|█▌ | 56654/371472 [4:30:51<25:19:54, 3.45it/s] 15%|█▌ | 56655/371472 [4:30:51<26:23:43, 3.31it/s] 15%|█▌ | 56656/371472 [4:30:52<26:28:08, 3.30it/s] 15%|█▌ | 56657/371472 [4:30:52<25:18:09, 3.46it/s] 15%|█▌ | 56658/371472 [4:30:52<24:42:29, 3.54it/s] 15%|█▌ | 56659/371472 [4:30:52<25:03:17, 3.49it/s] 15%|█▌ | 56660/371472 [4:30:53<24:40:20, 3.54it/s] {'loss': 4.1405, 'learning_rate': 8.631353832230817e-07, 'epoch': 2.44} + 15%|█▌ | 56660/371472 [4:30:53<24:40:20, 3.54it/s] 15%|█▌ | 56661/371472 [4:30:53<24:35:04, 3.56it/s] 15%|█▌ | 56662/371472 [4:30:53<23:26:43, 3.73it/s] 15%|█▌ | 56663/371472 [4:30:54<23:23:12, 3.74it/s] 15%|█▌ | 56664/371472 [4:30:54<22:59:51, 3.80it/s] 15%|█▌ | 56665/371472 [4:30:54<22:24:44, 3.90it/s] 15%|█▌ | 56666/371472 [4:30:54<23:00:19, 3.80it/s] 15%|█▌ | 56667/371472 [4:30:55<24:21:17, 3.59it/s] 15%|█▌ | 56668/371472 [4:30:55<24:06:39, 3.63it/s] 15%|█▌ | 56669/371472 [4:30:55<24:11:11, 3.62it/s] 15%|█▌ | 56670/371472 [4:30:55<24:34:28, 3.56it/s] 15%|█▌ | 56671/371472 [4:30:56<25:00:59, 3.50it/s] 15%|█▌ | 56672/371472 [4:30:56<24:31:26, 3.57it/s] 15%|█▌ | 56673/371472 [4:30:56<26:50:36, 3.26it/s] 15%|█▌ | 56674/371472 [4:30:57<25:48:54, 3.39it/s] 15%|█▌ | 56675/371472 [4:30:57<24:18:15, 3.60it/s] 15%|█▌ | 56676/371472 [4:30:57<25:10:44, 3.47it/s] 15%|█▌ | 56677/371472 [4:30:58<26:02:50, 3.36it/s] 15%|█▌ | 56678/371472 [4:30:58<25:18:08, 3.46it/s] 15%|█▌ | 56679/371472 [4:30:58<26:39:11, 3.28it/s] 15%|█▌ | 56680/371472 [4:30:58<26:31:05, 3.30it/s] {'loss': 4.1484, 'learning_rate': 8.630869012476028e-07, 'epoch': 2.44} + 15%|█▌ | 56680/371472 [4:30:58<26:31:05, 3.30it/s] 15%|█▌ | 56681/371472 [4:30:59<25:03:31, 3.49it/s] 15%|█▌ | 56682/371472 [4:30:59<24:43:04, 3.54it/s] 15%|█▌ | 56683/371472 [4:30:59<24:12:47, 3.61it/s] 15%|█▌ | 56684/371472 [4:30:59<23:47:25, 3.68it/s] 15%|█▌ | 56685/371472 [4:31:00<23:05:01, 3.79it/s] 15%|█▌ | 56686/371472 [4:31:00<23:13:20, 3.77it/s] 15%|█▌ | 56687/371472 [4:31:00<23:21:02, 3.74it/s] 15%|█▌ | 56688/371472 [4:31:01<22:36:05, 3.87it/s] 15%|█▌ | 56689/371472 [4:31:01<22:47:40, 3.84it/s] 15%|█▌ | 56690/371472 [4:31:01<23:22:14, 3.74it/s] 15%|█▌ | 56691/371472 [4:31:01<24:14:29, 3.61it/s] 15%|█▌ | 56692/371472 [4:31:02<26:26:02, 3.31it/s] 15%|█▌ | 56693/371472 [4:31:02<25:14:55, 3.46it/s] 15%|█▌ | 56694/371472 [4:31:02<29:51:57, 2.93it/s] 15%|█▌ | 56695/371472 [4:31:03<28:15:03, 3.10it/s] 15%|█▌ | 56696/371472 [4:31:03<27:41:39, 3.16it/s] 15%|█▌ | 56697/371472 [4:31:03<26:01:59, 3.36it/s] 15%|█▌ | 56698/371472 [4:31:04<26:17:29, 3.33it/s] 15%|█▌ | 56699/371472 [4:31:04<27:04:01, 3.23it/s] 15%|█▌ | 56700/371472 [4:31:04<25:14:09, 3.46it/s] {'loss': 4.2479, 'learning_rate': 8.630384192721239e-07, 'epoch': 2.44} + 15%|█▌ | 56700/371472 [4:31:04<25:14:09, 3.46it/s] 15%|█▌ | 56701/371472 [4:31:04<25:38:33, 3.41it/s] 15%|█▌ | 56702/371472 [4:31:05<24:46:06, 3.53it/s] 15%|█▌ | 56703/371472 [4:31:05<26:15:51, 3.33it/s] 15%|█▌ | 56704/371472 [4:31:05<25:58:50, 3.37it/s] 15%|█▌ | 56705/371472 [4:31:06<25:54:34, 3.37it/s] 15%|█▌ | 56706/371472 [4:31:06<26:11:35, 3.34it/s] 15%|█▌ | 56707/371472 [4:31:06<25:04:58, 3.49it/s] 15%|█▌ | 56708/371472 [4:31:06<24:55:44, 3.51it/s] 15%|█▌ | 56709/371472 [4:31:07<23:55:54, 3.65it/s] 15%|█▌ | 56710/371472 [4:31:07<25:15:21, 3.46it/s] 15%|█▌ | 56711/371472 [4:31:07<25:37:29, 3.41it/s] 15%|█▌ | 56712/371472 [4:31:08<25:54:00, 3.38it/s] 15%|█▌ | 56713/371472 [4:31:08<24:44:47, 3.53it/s] 15%|█▌ | 56714/371472 [4:31:08<24:46:09, 3.53it/s] 15%|█▌ | 56715/371472 [4:31:08<24:10:00, 3.62it/s] 15%|█▌ | 56716/371472 [4:31:09<26:35:36, 3.29it/s] 15%|█▌ | 56717/371472 [4:31:09<26:01:59, 3.36it/s] 15%|█▌ | 56718/371472 [4:31:09<26:57:53, 3.24it/s] 15%|█▌ | 56719/371472 [4:31:10<25:58:59, 3.36it/s] 15%|█▌ | 56720/371472 [4:31:10<25:08:17, 3.48it/s] {'loss': 4.1576, 'learning_rate': 8.62989937296645e-07, 'epoch': 2.44} + 15%|█▌ | 56720/371472 [4:31:10<25:08:17, 3.48it/s] 15%|█▌ | 56721/371472 [4:31:10<24:08:11, 3.62it/s] 15%|█▌ | 56722/371472 [4:31:11<23:45:52, 3.68it/s] 15%|█▌ | 56723/371472 [4:31:11<23:09:47, 3.77it/s] 15%|█▌ | 56724/371472 [4:31:11<23:08:03, 3.78it/s] 15%|█▌ | 56725/371472 [4:31:11<22:40:39, 3.86it/s] 15%|█▌ | 56726/371472 [4:31:12<23:02:41, 3.79it/s] 15%|█▌ | 56727/371472 [4:31:12<22:20:28, 3.91it/s] 15%|█▌ | 56728/371472 [4:31:12<23:10:04, 3.77it/s] 15%|█▌ | 56729/371472 [4:31:12<23:10:37, 3.77it/s] 15%|█▌ | 56730/371472 [4:31:13<22:42:48, 3.85it/s] 15%|█▌ | 56731/371472 [4:31:13<22:35:20, 3.87it/s] 15%|█▌ | 56732/371472 [4:31:13<22:31:52, 3.88it/s] 15%|█▌ | 56733/371472 [4:31:13<22:18:05, 3.92it/s] 15%|█▌ | 56734/371472 [4:31:14<21:36:27, 4.05it/s] 15%|█▌ | 56735/371472 [4:31:14<22:20:57, 3.91it/s] 15%|█▌ | 56736/371472 [4:31:14<21:50:38, 4.00it/s] 15%|█▌ | 56737/371472 [4:31:14<22:05:32, 3.96it/s] 15%|█▌ | 56738/371472 [4:31:15<24:14:43, 3.61it/s] 15%|█▌ | 56739/371472 [4:31:15<26:06:22, 3.35it/s] 15%|█▌ | 56740/371472 [4:31:15<26:20:11, 3.32it/s] {'loss': 4.3753, 'learning_rate': 8.629414553211662e-07, 'epoch': 2.44} + 15%|█▌ | 56740/371472 [4:31:15<26:20:11, 3.32it/s] 15%|█▌ | 56741/371472 [4:31:16<25:08:23, 3.48it/s] 15%|█▌ | 56742/371472 [4:31:16<23:56:44, 3.65it/s] 15%|█▌ | 56743/371472 [4:31:16<23:45:30, 3.68it/s] 15%|█▌ | 56744/371472 [4:31:16<25:22:02, 3.45it/s] 15%|█▌ | 56745/371472 [4:31:17<24:44:44, 3.53it/s] 15%|█▌ | 56746/371472 [4:31:17<25:00:02, 3.50it/s] 15%|█▌ | 56747/371472 [4:31:17<24:49:42, 3.52it/s] 15%|█▌ | 56748/371472 [4:31:18<24:07:57, 3.62it/s] 15%|█▌ | 56749/371472 [4:31:18<25:26:36, 3.44it/s] 15%|█▌ | 56750/371472 [4:31:18<24:00:00, 3.64it/s] 15%|█▌ | 56751/371472 [4:31:18<23:44:56, 3.68it/s] 15%|█▌ | 56752/371472 [4:31:19<25:42:33, 3.40it/s] 15%|█▌ | 56753/371472 [4:31:19<24:35:19, 3.56it/s] 15%|█▌ | 56754/371472 [4:31:19<23:40:09, 3.69it/s] 15%|█▌ | 56755/371472 [4:31:19<23:32:45, 3.71it/s] 15%|█▌ | 56756/371472 [4:31:20<23:11:59, 3.77it/s] 15%|█▌ | 56757/371472 [4:31:20<24:32:16, 3.56it/s] 15%|█▌ | 56758/371472 [4:31:20<23:27:17, 3.73it/s] 15%|█▌ | 56759/371472 [4:31:21<25:55:50, 3.37it/s] 15%|█▌ | 56760/371472 [4:31:21<25:24:02, 3.44it/s] {'loss': 4.4392, 'learning_rate': 8.628929733456872e-07, 'epoch': 2.44} + 15%|█▌ | 56760/371472 [4:31:21<25:24:02, 3.44it/s] 15%|█▌ | 56761/371472 [4:31:21<26:26:39, 3.31it/s] 15%|█▌ | 56762/371472 [4:31:22<26:13:11, 3.33it/s] 15%|█▌ | 56763/371472 [4:31:22<26:07:05, 3.35it/s] 15%|█▌ | 56764/371472 [4:31:22<26:21:35, 3.32it/s] 15%|█▌ | 56765/371472 [4:31:22<25:19:06, 3.45it/s] 15%|█▌ | 56766/371472 [4:31:23<25:27:57, 3.43it/s] 15%|█▌ | 56767/371472 [4:31:23<24:55:32, 3.51it/s] 15%|█▌ | 56768/371472 [4:31:23<24:13:31, 3.61it/s] 15%|█▌ | 56769/371472 [4:31:23<24:06:01, 3.63it/s] 15%|█▌ | 56770/371472 [4:31:24<23:44:40, 3.68it/s] 15%|█▌ | 56771/371472 [4:31:24<23:25:27, 3.73it/s] 15%|█▌ | 56772/371472 [4:31:24<23:52:33, 3.66it/s] 15%|█▌ | 56773/371472 [4:31:25<23:43:40, 3.68it/s] 15%|█▌ | 56774/371472 [4:31:25<23:49:45, 3.67it/s] 15%|█▌ | 56775/371472 [4:31:25<25:29:28, 3.43it/s] 15%|█▌ | 56776/371472 [4:31:25<24:47:43, 3.53it/s] 15%|█▌ | 56777/371472 [4:31:26<24:26:53, 3.58it/s] 15%|█▌ | 56778/371472 [4:31:26<24:40:15, 3.54it/s] 15%|█▌ | 56779/371472 [4:31:26<24:16:46, 3.60it/s] 15%|█▌ | 56780/371472 [4:31:27<23:29:12, 3.72it/s] {'loss': 4.1372, 'learning_rate': 8.628444913702083e-07, 'epoch': 2.45} + 15%|█▌ | 56780/371472 [4:31:27<23:29:12, 3.72it/s] 15%|█▌ | 56781/371472 [4:31:27<23:10:41, 3.77it/s] 15%|█▌ | 56782/371472 [4:31:27<23:13:32, 3.76it/s] 15%|█▌ | 56783/371472 [4:31:27<24:13:36, 3.61it/s] 15%|█▌ | 56784/371472 [4:31:28<23:20:38, 3.74it/s] 15%|█▌ | 56785/371472 [4:31:28<22:54:44, 3.82it/s] 15%|█▌ | 56786/371472 [4:31:28<22:09:04, 3.95it/s] 15%|█▌ | 56787/371472 [4:31:28<24:06:49, 3.63it/s] 15%|█▌ | 56788/371472 [4:31:29<24:06:46, 3.63it/s] 15%|█▌ | 56789/371472 [4:31:29<23:43:48, 3.68it/s] 15%|█▌ | 56790/371472 [4:31:29<24:42:59, 3.54it/s] 15%|█▌ | 56791/371472 [4:31:30<24:37:24, 3.55it/s] 15%|█▌ | 56792/371472 [4:31:30<24:55:12, 3.51it/s] 15%|█▌ | 56793/371472 [4:31:30<24:12:26, 3.61it/s] 15%|█▌ | 56794/371472 [4:31:30<23:28:32, 3.72it/s] 15%|█▌ | 56795/371472 [4:31:31<22:49:10, 3.83it/s] 15%|█▌ | 56796/371472 [4:31:31<22:46:14, 3.84it/s] 15%|█▌ | 56797/371472 [4:31:31<23:38:31, 3.70it/s] 15%|█▌ | 56798/371472 [4:31:31<22:54:12, 3.82it/s] 15%|█▌ | 56799/371472 [4:31:32<22:53:51, 3.82it/s] 15%|█▌ | 56800/371472 [4:31:32<22:11:29, 3.94it/s] {'loss': 4.1564, 'learning_rate': 8.627960093947294e-07, 'epoch': 2.45} + 15%|█▌ | 56800/371472 [4:31:32<22:11:29, 3.94it/s] 15%|█▌ | 56801/371472 [4:31:32<22:43:06, 3.85it/s] 15%|█▌ | 56802/371472 [4:31:32<23:28:31, 3.72it/s] 15%|█▌ | 56803/371472 [4:31:33<23:34:22, 3.71it/s] 15%|█▌ | 56804/371472 [4:31:33<23:10:22, 3.77it/s] 15%|█▌ | 56805/371472 [4:31:33<25:58:23, 3.37it/s] 15%|█▌ | 56806/371472 [4:31:34<24:47:04, 3.53it/s] 15%|█▌ | 56807/371472 [4:31:34<25:06:21, 3.48it/s] 15%|█▌ | 56808/371472 [4:31:34<25:06:46, 3.48it/s] 15%|█▌ | 56809/371472 [4:31:34<24:42:06, 3.54it/s] 15%|█▌ | 56810/371472 [4:31:35<25:11:06, 3.47it/s] 15%|█▌ | 56811/371472 [4:31:35<24:11:48, 3.61it/s] 15%|█▌ | 56812/371472 [4:31:35<23:57:51, 3.65it/s] 15%|█▌ | 56813/371472 [4:31:36<24:16:07, 3.60it/s] 15%|█▌ | 56814/371472 [4:31:36<25:29:59, 3.43it/s] 15%|█▌ | 56815/371472 [4:31:36<24:59:48, 3.50it/s] 15%|█▌ | 56816/371472 [4:31:36<23:50:32, 3.67it/s] 15%|█▌ | 56817/371472 [4:31:37<23:55:15, 3.65it/s] 15%|█▌ | 56818/371472 [4:31:37<23:44:35, 3.68it/s] 15%|█▌ | 56819/371472 [4:31:37<23:04:38, 3.79it/s] 15%|█▌ | 56820/371472 [4:31:37<22:26:18, 3.90it/s] {'loss': 4.1573, 'learning_rate': 8.627475274192505e-07, 'epoch': 2.45} + 15%|█▌ | 56820/371472 [4:31:37<22:26:18, 3.90it/s] 15%|█▌ | 56821/371472 [4:31:38<22:57:30, 3.81it/s] 15%|█▌ | 56822/371472 [4:31:38<22:23:54, 3.90it/s] 15%|█▌ | 56823/371472 [4:31:38<25:48:09, 3.39it/s] 15%|█▌ | 56824/371472 [4:31:39<25:20:11, 3.45it/s] 15%|█▌ | 56825/371472 [4:31:39<24:55:03, 3.51it/s] 15%|█▌ | 56826/371472 [4:31:39<25:29:43, 3.43it/s] 15%|█▌ | 56827/371472 [4:31:39<25:21:58, 3.45it/s] 15%|█▌ | 56828/371472 [4:31:40<24:39:59, 3.54it/s] 15%|█▌ | 56829/371472 [4:31:40<24:34:09, 3.56it/s] 15%|█▌ | 56830/371472 [4:31:40<24:44:10, 3.53it/s] 15%|█▌ | 56831/371472 [4:31:41<23:33:09, 3.71it/s] 15%|█▌ | 56832/371472 [4:31:41<23:37:34, 3.70it/s] 15%|█▌ | 56833/371472 [4:31:41<23:39:50, 3.69it/s] 15%|█▌ | 56834/371472 [4:31:41<24:25:09, 3.58it/s] 15%|█▌ | 56835/371472 [4:31:42<23:59:56, 3.64it/s] 15%|█▌ | 56836/371472 [4:31:42<24:06:58, 3.62it/s] 15%|█▌ | 56837/371472 [4:31:42<25:38:04, 3.41it/s] 15%|█▌ | 56838/371472 [4:31:43<25:33:51, 3.42it/s] 15%|█▌ | 56839/371472 [4:31:43<24:19:18, 3.59it/s] 15%|█▌ | 56840/371472 [4:31:43<24:54:55, 3.51it/s] {'loss': 4.0543, 'learning_rate': 8.626990454437717e-07, 'epoch': 2.45} + 15%|█▌ | 56840/371472 [4:31:43<24:54:55, 3.51it/s] 15%|█▌ | 56841/371472 [4:31:43<25:05:50, 3.48it/s] 15%|█▌ | 56842/371472 [4:31:44<24:51:01, 3.52it/s] 15%|█▌ | 56843/371472 [4:31:44<24:47:15, 3.53it/s] 15%|█▌ | 56844/371472 [4:31:44<25:11:09, 3.47it/s] 15%|█▌ | 56845/371472 [4:31:44<24:46:50, 3.53it/s] 15%|█▌ | 56846/371472 [4:31:45<24:30:58, 3.56it/s] 15%|█▌ | 56847/371472 [4:31:45<24:07:42, 3.62it/s] 15%|█▌ | 56848/371472 [4:31:45<23:31:30, 3.71it/s] 15%|█▌ | 56849/371472 [4:31:46<25:19:56, 3.45it/s] 15%|█▌ | 56850/371472 [4:31:46<25:19:29, 3.45it/s] 15%|█▌ | 56851/371472 [4:31:46<24:31:23, 3.56it/s] 15%|█▌ | 56852/371472 [4:31:46<24:27:30, 3.57it/s] 15%|█▌ | 56853/371472 [4:31:47<30:03:11, 2.91it/s] 15%|█▌ | 56854/371472 [4:31:47<28:34:50, 3.06it/s] 15%|█▌ | 56855/371472 [4:31:48<27:18:34, 3.20it/s] 15%|█▌ | 56856/371472 [4:31:48<27:16:35, 3.20it/s] 15%|█▌ | 56857/371472 [4:31:48<25:07:05, 3.48it/s] 15%|█▌ | 56858/371472 [4:31:48<24:55:23, 3.51it/s] 15%|█▌ | 56859/371472 [4:31:49<24:11:20, 3.61it/s] 15%|█▌ | 56860/371472 [4:31:49<23:59:13, 3.64it/s] {'loss': 4.077, 'learning_rate': 8.626505634682928e-07, 'epoch': 2.45} + 15%|█▌ | 56860/371472 [4:31:49<23:59:13, 3.64it/s] 15%|█▌ | 56861/371472 [4:31:49<24:37:22, 3.55it/s] 15%|█▌ | 56862/371472 [4:31:49<24:14:19, 3.61it/s] 15%|█▌ | 56863/371472 [4:31:50<23:17:47, 3.75it/s] 15%|█▌ | 56864/371472 [4:31:50<25:15:15, 3.46it/s] 15%|█▌ | 56865/371472 [4:31:50<24:31:29, 3.56it/s] 15%|█▌ | 56866/371472 [4:31:51<24:03:03, 3.63it/s] 15%|█▌ | 56867/371472 [4:31:51<24:47:33, 3.52it/s] 15%|█▌ | 56868/371472 [4:31:51<25:41:15, 3.40it/s] 15%|█▌ | 56869/371472 [4:31:51<24:57:17, 3.50it/s] 15%|█▌ | 56870/371472 [4:31:52<25:12:05, 3.47it/s] 15%|█▌ | 56871/371472 [4:31:52<27:18:36, 3.20it/s] 15%|█▌ | 56872/371472 [4:31:52<26:42:31, 3.27it/s] 15%|█▌ | 56873/371472 [4:31:53<26:06:42, 3.35it/s] 15%|█▌ | 56874/371472 [4:31:53<25:41:51, 3.40it/s] 15%|█▌ | 56875/371472 [4:31:53<26:14:04, 3.33it/s] 15%|█▌ | 56876/371472 [4:31:54<24:54:14, 3.51it/s] 15%|█▌ | 56877/371472 [4:31:54<24:19:50, 3.59it/s] 15%|█▌ | 56878/371472 [4:31:54<25:06:22, 3.48it/s] 15%|█▌ | 56879/371472 [4:31:54<24:19:37, 3.59it/s] 15%|█▌ | 56880/371472 [4:31:55<23:10:25, 3.77it/s] {'loss': 4.2958, 'learning_rate': 8.626020814928139e-07, 'epoch': 2.45} + 15%|█▌ | 56880/371472 [4:31:55<23:10:25, 3.77it/s] 15%|█▌ | 56881/371472 [4:31:55<25:00:34, 3.49it/s] 15%|█▌ | 56882/371472 [4:31:55<24:50:47, 3.52it/s] 15%|█▌ | 56883/371472 [4:31:55<25:07:27, 3.48it/s] 15%|█▌ | 56884/371472 [4:31:56<23:53:53, 3.66it/s] 15%|█▌ | 56885/371472 [4:31:56<23:43:35, 3.68it/s] 15%|█▌ | 56886/371472 [4:31:56<25:04:49, 3.48it/s] 15%|█▌ | 56887/371472 [4:31:57<25:52:24, 3.38it/s] 15%|█▌ | 56888/371472 [4:31:57<25:34:51, 3.42it/s] 15%|█▌ | 56889/371472 [4:31:57<25:13:30, 3.46it/s] 15%|█▌ | 56890/371472 [4:31:58<26:07:37, 3.34it/s] 15%|█▌ | 56891/371472 [4:31:58<25:23:57, 3.44it/s] 15%|█▌ | 56892/371472 [4:31:58<24:59:11, 3.50it/s] 15%|█▌ | 56893/371472 [4:31:58<24:58:46, 3.50it/s] 15%|█▌ | 56894/371472 [4:31:59<24:09:27, 3.62it/s] 15%|█▌ | 56895/371472 [4:31:59<23:41:26, 3.69it/s] 15%|█▌ | 56896/371472 [4:31:59<22:50:29, 3.83it/s] 15%|█▌ | 56897/371472 [4:31:59<23:39:51, 3.69it/s] 15%|█▌ | 56898/371472 [4:32:00<22:57:33, 3.81it/s] 15%|█▌ | 56899/371472 [4:32:00<22:10:18, 3.94it/s] 15%|█▌ | 56900/371472 [4:32:00<22:01:18, 3.97it/s] {'loss': 3.9361, 'learning_rate': 8.625535995173349e-07, 'epoch': 2.45} + 15%|█▌ | 56900/371472 [4:32:00<22:01:18, 3.97it/s] 15%|█▌ | 56901/371472 [4:32:00<22:34:59, 3.87it/s] 15%|█▌ | 56902/371472 [4:32:01<24:10:49, 3.61it/s] 15%|█▌ | 56903/371472 [4:32:01<24:16:56, 3.60it/s] 15%|█▌ | 56904/371472 [4:32:01<26:51:52, 3.25it/s] 15%|█▌ | 56905/371472 [4:32:02<26:34:37, 3.29it/s] 15%|█▌ | 56906/371472 [4:32:02<25:03:50, 3.49it/s] 15%|█▌ | 56907/371472 [4:32:02<23:41:01, 3.69it/s] 15%|█▌ | 56908/371472 [4:32:02<23:46:10, 3.68it/s] 15%|█▌ | 56909/371472 [4:32:03<24:41:09, 3.54it/s] 15%|█▌ | 56910/371472 [4:32:03<23:21:56, 3.74it/s] 15%|█▌ | 56911/371472 [4:32:03<22:41:04, 3.85it/s] 15%|█▌ | 56912/371472 [4:32:03<23:20:10, 3.74it/s] 15%|█▌ | 56913/371472 [4:32:04<23:50:19, 3.67it/s] 15%|█▌ | 56914/371472 [4:32:04<23:38:20, 3.70it/s] 15%|█▌ | 56915/371472 [4:32:04<22:54:29, 3.81it/s] 15%|█▌ | 56916/371472 [4:32:05<23:03:18, 3.79it/s] 15%|█▌ | 56917/371472 [4:32:05<24:45:32, 3.53it/s] 15%|█▌ | 56918/371472 [4:32:05<24:22:27, 3.58it/s] 15%|█▌ | 56919/371472 [4:32:05<24:18:02, 3.60it/s] 15%|█▌ | 56920/371472 [4:32:06<23:32:45, 3.71it/s] {'loss': 4.3521, 'learning_rate': 8.625051175418561e-07, 'epoch': 2.45} + 15%|█▌ | 56920/371472 [4:32:06<23:32:45, 3.71it/s] 15%|█▌ | 56921/371472 [4:32:06<25:00:34, 3.49it/s] 15%|█▌ | 56922/371472 [4:32:06<24:25:32, 3.58it/s] 15%|█▌ | 56923/371472 [4:32:07<25:04:29, 3.48it/s] 15%|█▌ | 56924/371472 [4:32:07<26:21:16, 3.32it/s] 15%|█▌ | 56925/371472 [4:32:07<25:56:46, 3.37it/s] 15%|█▌ | 56926/371472 [4:32:08<28:01:11, 3.12it/s] 15%|█▌ | 56927/371472 [4:32:08<26:32:32, 3.29it/s] 15%|█▌ | 56928/371472 [4:32:08<25:47:37, 3.39it/s] 15%|█▌ | 56929/371472 [4:32:08<25:59:26, 3.36it/s] 15%|█▌ | 56930/371472 [4:32:09<24:44:22, 3.53it/s] 15%|█▌ | 56931/371472 [4:32:09<24:50:06, 3.52it/s] 15%|█▌ | 56932/371472 [4:32:09<25:01:52, 3.49it/s] 15%|█▌ | 56933/371472 [4:32:09<24:36:55, 3.55it/s] 15%|█▌ | 56934/371472 [4:32:10<23:25:14, 3.73it/s] 15%|█▌ | 56935/371472 [4:32:10<22:56:51, 3.81it/s] 15%|█▌ | 56936/371472 [4:32:10<22:25:48, 3.90it/s] 15%|█▌ | 56937/371472 [4:32:10<22:02:20, 3.96it/s] 15%|█▌ | 56938/371472 [4:32:11<23:33:16, 3.71it/s] 15%|█▌ | 56939/371472 [4:32:11<23:08:50, 3.77it/s] 15%|█▌ | 56940/371472 [4:32:11<22:45:29, 3.84it/s] {'loss': 4.432, 'learning_rate': 8.624566355663772e-07, 'epoch': 2.45} + 15%|█▌ | 56940/371472 [4:32:11<22:45:29, 3.84it/s] 15%|█▌ | 56941/371472 [4:32:12<22:58:55, 3.80it/s] 15%|█▌ | 56942/371472 [4:32:12<22:54:18, 3.81it/s] 15%|█▌ | 56943/371472 [4:32:12<22:06:05, 3.95it/s] 15%|█▌ | 56944/371472 [4:32:12<21:41:26, 4.03it/s] 15%|█▌ | 56945/371472 [4:32:13<22:43:17, 3.85it/s] 15%|█▌ | 56946/371472 [4:32:13<22:22:42, 3.90it/s] 15%|█▌ | 56947/371472 [4:32:13<22:15:07, 3.93it/s] 15%|█▌ | 56948/371472 [4:32:13<22:46:54, 3.83it/s] 15%|█▌ | 56949/371472 [4:32:14<22:15:08, 3.93it/s] 15%|█▌ | 56950/371472 [4:32:14<21:59:11, 3.97it/s] 15%|█▌ | 56951/371472 [4:32:14<22:25:41, 3.90it/s] 15%|█▌ | 56952/371472 [4:32:14<24:54:36, 3.51it/s] 15%|█▌ | 56953/371472 [4:32:15<24:30:49, 3.56it/s] 15%|█▌ | 56954/371472 [4:32:15<23:33:11, 3.71it/s] 15%|█▌ | 56955/371472 [4:32:15<23:44:55, 3.68it/s] 15%|█▌ | 56956/371472 [4:32:16<23:35:34, 3.70it/s] 15%|█▌ | 56957/371472 [4:32:16<23:46:43, 3.67it/s] 15%|█▌ | 56958/371472 [4:32:16<24:11:26, 3.61it/s] 15%|█▌ | 56959/371472 [4:32:16<23:21:52, 3.74it/s] 15%|█▌ | 56960/371472 [4:32:17<23:52:25, 3.66it/s] {'loss': 3.9227, 'learning_rate': 8.624081535908983e-07, 'epoch': 2.45} + 15%|█▌ | 56960/371472 [4:32:17<23:52:25, 3.66it/s] 15%|█▌ | 56961/371472 [4:32:17<23:50:43, 3.66it/s] 15%|█▌ | 56962/371472 [4:32:17<23:40:49, 3.69it/s] 15%|█▌ | 56963/371472 [4:32:17<24:30:16, 3.57it/s] 15%|█▌ | 56964/371472 [4:32:18<24:24:06, 3.58it/s] 15%|█▌ | 56965/371472 [4:32:18<24:14:24, 3.60it/s] 15%|█▌ | 56966/371472 [4:32:18<24:35:04, 3.55it/s] 15%|█▌ | 56967/371472 [4:32:19<23:36:51, 3.70it/s] 15%|█▌ | 56968/371472 [4:32:19<23:33:20, 3.71it/s] 15%|█▌ | 56969/371472 [4:32:19<23:42:54, 3.68it/s] 15%|█▌ | 56970/371472 [4:32:19<26:49:32, 3.26it/s] 15%|█▌ | 56971/371472 [4:32:20<25:00:20, 3.49it/s] 15%|█▌ | 56972/371472 [4:32:20<23:58:54, 3.64it/s] 15%|█▌ | 56973/371472 [4:32:20<25:32:27, 3.42it/s] 15%|█▌ | 56974/371472 [4:32:21<26:01:49, 3.36it/s] 15%|█▌ | 56975/371472 [4:32:21<24:51:40, 3.51it/s] 15%|█▌ | 56976/371472 [4:32:21<24:15:59, 3.60it/s] 15%|█▌ | 56977/371472 [4:32:21<23:27:42, 3.72it/s] 15%|█▌ | 56978/371472 [4:32:22<23:31:12, 3.71it/s] 15%|█▌ | 56979/371472 [4:32:22<26:16:16, 3.33it/s] 15%|█▌ | 56980/371472 [4:32:22<25:30:33, 3.42it/s] {'loss': 4.1412, 'learning_rate': 8.623596716154194e-07, 'epoch': 2.45} + 15%|█▌ | 56980/371472 [4:32:22<25:30:33, 3.42it/s] 15%|█▌ | 56981/371472 [4:32:23<25:19:10, 3.45it/s] 15%|█▌ | 56982/371472 [4:32:23<25:36:05, 3.41it/s] 15%|█▌ | 56983/371472 [4:32:23<24:58:47, 3.50it/s] 15%|█▌ | 56984/371472 [4:32:23<25:13:43, 3.46it/s] 15%|█▌ | 56985/371472 [4:32:24<24:36:23, 3.55it/s] 15%|█▌ | 56986/371472 [4:32:24<25:07:22, 3.48it/s] 15%|█▌ | 56987/371472 [4:32:24<25:36:10, 3.41it/s] 15%|█▌ | 56988/371472 [4:32:25<25:23:14, 3.44it/s] 15%|█▌ | 56989/371472 [4:32:25<25:18:31, 3.45it/s] 15%|█▌ | 56990/371472 [4:32:25<24:26:40, 3.57it/s] 15%|█▌ | 56991/371472 [4:32:25<24:07:05, 3.62it/s] 15%|█▌ | 56992/371472 [4:32:26<23:40:50, 3.69it/s] 15%|█▌ | 56993/371472 [4:32:26<27:02:23, 3.23it/s] 15%|█▌ | 56994/371472 [4:32:26<26:27:13, 3.30it/s] 15%|█▌ | 56995/371472 [4:32:27<28:04:11, 3.11it/s] 15%|█▌ | 56996/371472 [4:32:27<25:57:34, 3.37it/s] 15%|█▌ | 56997/371472 [4:32:27<25:19:06, 3.45it/s] 15%|█▌ | 56998/371472 [4:32:27<23:56:38, 3.65it/s] 15%|█▌ | 56999/371472 [4:32:28<23:41:06, 3.69it/s] 15%|█▌ | 57000/371472 [4:32:28<23:16:52, 3.75it/s] {'loss': 4.073, 'learning_rate': 8.623111896399405e-07, 'epoch': 2.46} + 15%|█▌ | 57000/371472 [4:32:28<23:16:52, 3.75it/s] 15%|█▌ | 57001/371472 [4:32:28<23:44:53, 3.68it/s] 15%|█▌ | 57002/371472 [4:32:29<23:54:46, 3.65it/s] 15%|█▌ | 57003/371472 [4:32:29<23:34:10, 3.71it/s] 15%|█▌ | 57004/371472 [4:32:29<23:50:55, 3.66it/s] 15%|█▌ | 57005/371472 [4:32:29<23:27:52, 3.72it/s] 15%|█▌ | 57006/371472 [4:32:30<23:12:08, 3.76it/s] 15%|█▌ | 57007/371472 [4:32:30<24:39:54, 3.54it/s] 15%|█▌ | 57008/371472 [4:32:30<24:30:35, 3.56it/s] 15%|█▌ | 57009/371472 [4:32:30<24:37:52, 3.55it/s] 15%|█▌ | 57010/371472 [4:32:31<24:59:44, 3.49it/s] 15%|█▌ | 57011/371472 [4:32:31<24:13:16, 3.61it/s] 15%|█▌ | 57012/371472 [4:32:31<24:28:04, 3.57it/s] 15%|█▌ | 57013/371472 [4:32:32<25:04:37, 3.48it/s] 15%|█▌ | 57014/371472 [4:32:32<25:23:46, 3.44it/s] 15%|█▌ | 57015/371472 [4:32:32<25:28:16, 3.43it/s] 15%|█▌ | 57016/371472 [4:32:32<24:11:36, 3.61it/s] 15%|█▌ | 57017/371472 [4:32:33<23:33:12, 3.71it/s] 15%|█▌ | 57018/371472 [4:32:33<23:05:28, 3.78it/s] 15%|█▌ | 57019/371472 [4:32:33<24:33:01, 3.56it/s] 15%|█▌ | 57020/371472 [4:32:34<23:17:54, 3.75it/s] {'loss': 4.2698, 'learning_rate': 8.622627076644616e-07, 'epoch': 2.46} + 15%|█▌ | 57020/371472 [4:32:34<23:17:54, 3.75it/s] 15%|█▌ | 57021/371472 [4:32:34<24:04:37, 3.63it/s] 15%|█▌ | 57022/371472 [4:32:34<24:48:06, 3.52it/s] 15%|█▌ | 57023/371472 [4:32:34<27:05:40, 3.22it/s] 15%|█▌ | 57024/371472 [4:32:35<25:10:48, 3.47it/s] 15%|█▌ | 57025/371472 [4:32:35<23:50:49, 3.66it/s] 15%|█▌ | 57026/371472 [4:32:35<24:28:17, 3.57it/s] 15%|█▌ | 57027/371472 [4:32:36<23:47:35, 3.67it/s] 15%|█▌ | 57028/371472 [4:32:36<24:06:10, 3.62it/s] 15%|█▌ | 57029/371472 [4:32:36<24:59:49, 3.49it/s] 15%|█▌ | 57030/371472 [4:32:36<24:30:34, 3.56it/s] 15%|█▌ | 57031/371472 [4:32:37<27:27:20, 3.18it/s] 15%|█▌ | 57032/371472 [4:32:37<27:07:50, 3.22it/s] 15%|█▌ | 57033/371472 [4:32:37<26:07:36, 3.34it/s] 15%|█▌ | 57034/371472 [4:32:38<25:11:26, 3.47it/s] 15%|█▌ | 57035/371472 [4:32:38<25:15:43, 3.46it/s] 15%|█▌ | 57036/371472 [4:32:38<24:58:40, 3.50it/s] 15%|█▌ | 57037/371472 [4:32:38<23:55:46, 3.65it/s] 15%|█▌ | 57038/371472 [4:32:39<23:31:22, 3.71it/s] 15%|█▌ | 57039/371472 [4:32:39<24:09:05, 3.62it/s] 15%|█▌ | 57040/371472 [4:32:39<25:11:40, 3.47it/s] {'loss': 4.2295, 'learning_rate': 8.622142256889827e-07, 'epoch': 2.46} + 15%|█▌ | 57040/371472 [4:32:39<25:11:40, 3.47it/s] 15%|█▌ | 57041/371472 [4:32:40<24:46:23, 3.53it/s] 15%|█▌ | 57042/371472 [4:32:40<24:26:27, 3.57it/s] 15%|█▌ | 57043/371472 [4:32:40<24:21:05, 3.59it/s] 15%|█▌ | 57044/371472 [4:32:40<23:27:54, 3.72it/s] 15%|█▌ | 57045/371472 [4:32:41<23:51:47, 3.66it/s] 15%|█▌ | 57046/371472 [4:32:41<25:07:02, 3.48it/s] 15%|█▌ | 57047/371472 [4:32:41<25:15:59, 3.46it/s] 15%|█▌ | 57048/371472 [4:32:41<24:17:08, 3.60it/s] 15%|█▌ | 57049/371472 [4:32:42<24:54:23, 3.51it/s] 15%|█▌ | 57050/371472 [4:32:42<26:03:18, 3.35it/s] 15%|█▌ | 57051/371472 [4:32:42<25:49:34, 3.38it/s] 15%|█▌ | 57052/371472 [4:32:43<27:30:16, 3.18it/s] 15%|█▌ | 57053/371472 [4:32:43<25:39:57, 3.40it/s] 15%|█▌ | 57054/371472 [4:32:43<25:31:16, 3.42it/s] 15%|█▌ | 57055/371472 [4:32:44<24:49:03, 3.52it/s] 15%|█▌ | 57056/371472 [4:32:44<24:20:04, 3.59it/s] 15%|█▌ | 57057/371472 [4:32:44<23:41:45, 3.69it/s] 15%|█▌ | 57058/371472 [4:32:44<23:43:31, 3.68it/s] 15%|█▌ | 57059/371472 [4:32:45<24:07:59, 3.62it/s] 15%|█▌ | 57060/371472 [4:32:45<24:06:50, 3.62it/s] {'loss': 4.3758, 'learning_rate': 8.621657437135038e-07, 'epoch': 2.46} + 15%|█▌ | 57060/371472 [4:32:45<24:06:50, 3.62it/s] 15%|█▌ | 57061/371472 [4:32:45<24:27:02, 3.57it/s] 15%|█▌ | 57062/371472 [4:32:46<25:08:23, 3.47it/s] 15%|█▌ | 57063/371472 [4:32:46<25:43:30, 3.39it/s] 15%|█▌ | 57064/371472 [4:32:46<26:35:34, 3.28it/s] 15%|█▌ | 57065/371472 [4:32:46<25:27:52, 3.43it/s] 15%|█▌ | 57066/371472 [4:32:47<25:03:16, 3.49it/s] 15%|█▌ | 57067/371472 [4:32:47<24:45:04, 3.53it/s] 15%|█▌ | 57068/371472 [4:32:47<23:44:52, 3.68it/s] 15%|█▌ | 57069/371472 [4:32:48<24:18:39, 3.59it/s] 15%|█▌ | 57070/371472 [4:32:48<24:01:24, 3.64it/s] 15%|█▌ | 57071/371472 [4:32:48<25:26:19, 3.43it/s] 15%|█▌ | 57072/371472 [4:32:48<26:03:21, 3.35it/s] 15%|█▌ | 57073/371472 [4:32:49<26:34:51, 3.29it/s] 15%|█▌ | 57074/371472 [4:32:49<24:45:06, 3.53it/s] 15%|█▌ | 57075/371472 [4:32:49<24:09:42, 3.61it/s] 15%|█▌ | 57076/371472 [4:32:49<23:02:58, 3.79it/s] 15%|█▌ | 57077/371472 [4:32:50<22:21:59, 3.90it/s] 15%|��▌ | 57078/371472 [4:32:50<24:06:47, 3.62it/s] 15%|█▌ | 57079/371472 [4:32:50<23:41:43, 3.69it/s] 15%|█▌ | 57080/371472 [4:32:51<23:30:11, 3.72it/s] {'loss': 4.4118, 'learning_rate': 8.621172617380249e-07, 'epoch': 2.46} + 15%|█▌ | 57080/371472 [4:32:51<23:30:11, 3.72it/s] 15%|█▌ | 57081/371472 [4:32:51<23:32:03, 3.71it/s] 15%|█▌ | 57082/371472 [4:32:51<23:11:48, 3.76it/s] 15%|█▌ | 57083/371472 [4:32:51<23:25:07, 3.73it/s] 15%|█▌ | 57084/371472 [4:32:52<23:32:37, 3.71it/s] 15%|█▌ | 57085/371472 [4:32:52<22:52:51, 3.82it/s] 15%|█▌ | 57086/371472 [4:32:52<24:18:32, 3.59it/s] 15%|█▌ | 57087/371472 [4:32:52<24:14:02, 3.60it/s] 15%|█▌ | 57088/371472 [4:32:53<27:40:08, 3.16it/s] 15%|█▌ | 57089/371472 [4:32:53<25:51:47, 3.38it/s] 15%|█▌ | 57090/371472 [4:32:53<24:35:30, 3.55it/s] 15%|█▌ | 57091/371472 [4:32:54<23:46:43, 3.67it/s] 15%|█▌ | 57092/371472 [4:32:54<24:37:24, 3.55it/s] 15%|█▌ | 57093/371472 [4:32:54<23:17:15, 3.75it/s] 15%|█▌ | 57094/371472 [4:32:54<23:48:01, 3.67it/s] 15%|█▌ | 57095/371472 [4:32:55<26:03:17, 3.35it/s] 15%|█▌ | 57096/371472 [4:32:55<24:50:29, 3.52it/s] 15%|█▌ | 57097/371472 [4:32:55<24:42:57, 3.53it/s] 15%|█▌ | 57098/371472 [4:32:56<23:33:57, 3.71it/s] 15%|█▌ | 57099/371472 [4:32:56<22:46:21, 3.83it/s] 15%|█▌ | 57100/371472 [4:32:56<25:23:09, 3.44it/s] {'loss': 4.3054, 'learning_rate': 8.620687797625461e-07, 'epoch': 2.46} + 15%|█▌ | 57100/371472 [4:32:56<25:23:09, 3.44it/s] 15%|█▌ | 57101/371472 [4:32:56<25:14:15, 3.46it/s] 15%|█▌ | 57102/371472 [4:32:57<24:45:20, 3.53it/s] 15%|█▌ | 57103/371472 [4:32:57<23:59:46, 3.64it/s] 15%|█▌ | 57104/371472 [4:32:57<24:50:45, 3.51it/s] 15%|█▌ | 57105/371472 [4:32:58<26:21:14, 3.31it/s] 15%|█▌ | 57106/371472 [4:32:58<25:05:01, 3.48it/s] 15%|█▌ | 57107/371472 [4:32:58<25:15:01, 3.46it/s] 15%|█▌ | 57108/371472 [4:32:58<25:06:33, 3.48it/s] 15%|█▌ | 57109/371472 [4:32:59<26:01:31, 3.36it/s] 15%|█▌ | 57110/371472 [4:32:59<25:31:09, 3.42it/s] 15%|█▌ | 57111/371472 [4:32:59<25:07:28, 3.48it/s] 15%|█▌ | 57112/371472 [4:33:00<25:58:28, 3.36it/s] 15%|█▌ | 57113/371472 [4:33:00<25:33:29, 3.42it/s] 15%|█▌ | 57114/371472 [4:33:00<25:02:09, 3.49it/s] 15%|█▌ | 57115/371472 [4:33:00<24:35:08, 3.55it/s] 15%|█▌ | 57116/371472 [4:33:01<27:13:43, 3.21it/s] 15%|█▌ | 57117/371472 [4:33:01<26:20:35, 3.31it/s] 15%|█▌ | 57118/371472 [4:33:01<25:31:24, 3.42it/s] 15%|█▌ | 57119/371472 [4:33:02<25:15:34, 3.46it/s] 15%|█▌ | 57120/371472 [4:33:02<28:02:55, 3.11it/s] {'loss': 4.1352, 'learning_rate': 8.620202977870671e-07, 'epoch': 2.46} + 15%|█▌ | 57120/371472 [4:33:02<28:02:55, 3.11it/s] 15%|█▌ | 57121/371472 [4:33:02<26:12:42, 3.33it/s] 15%|█▌ | 57122/371472 [4:33:03<25:34:59, 3.41it/s] 15%|█▌ | 57123/371472 [4:33:03<25:27:42, 3.43it/s] 15%|█▌ | 57124/371472 [4:33:03<24:54:48, 3.50it/s] 15%|█▌ | 57125/371472 [4:33:03<25:34:43, 3.41it/s] 15%|█▌ | 57126/371472 [4:33:04<25:03:19, 3.49it/s] 15%|█▌ | 57127/371472 [4:33:04<24:55:33, 3.50it/s] 15%|█▌ | 57128/371472 [4:33:04<24:53:37, 3.51it/s] 15%|█▌ | 57129/371472 [4:33:05<27:08:18, 3.22it/s] 15%|█▌ | 57130/371472 [4:33:05<25:47:02, 3.39it/s] 15%|█▌ | 57131/371472 [4:33:05<26:46:23, 3.26it/s] 15%|█▌ | 57132/371472 [4:33:06<25:23:07, 3.44it/s] 15%|█▌ | 57133/371472 [4:33:06<25:12:50, 3.46it/s] 15%|█▌ | 57134/371472 [4:33:06<25:49:23, 3.38it/s] 15%|█▌ | 57135/371472 [4:33:06<24:49:54, 3.52it/s] 15%|█▌ | 57136/371472 [4:33:07<25:43:49, 3.39it/s] 15%|█▌ | 57137/371472 [4:33:07<25:18:00, 3.45it/s] 15%|█▌ | 57138/371472 [4:33:07<26:03:27, 3.35it/s] 15%|█▌ | 57139/371472 [4:33:08<24:44:38, 3.53it/s] 15%|█▌ | 57140/371472 [4:33:08<24:50:58, 3.51it/s] {'loss': 4.2934, 'learning_rate': 8.619718158115882e-07, 'epoch': 2.46} + 15%|█▌ | 57140/371472 [4:33:08<24:50:58, 3.51it/s] 15%|█▌ | 57141/371472 [4:33:08<23:55:35, 3.65it/s] 15%|█▌ | 57142/371472 [4:33:08<24:02:36, 3.63it/s] 15%|█▌ | 57143/371472 [4:33:09<24:24:17, 3.58it/s] 15%|█▌ | 57144/371472 [4:33:09<23:33:27, 3.71it/s] 15%|█▌ | 57145/371472 [4:33:09<25:03:22, 3.48it/s] 15%|█▌ | 57146/371472 [4:33:10<24:53:46, 3.51it/s] 15%|█▌ | 57147/371472 [4:33:10<25:53:54, 3.37it/s] 15%|█▌ | 57148/371472 [4:33:10<24:21:51, 3.58it/s] 15%|█▌ | 57149/371472 [4:33:10<23:43:23, 3.68it/s] 15%|█▌ | 57150/371472 [4:33:11<23:02:52, 3.79it/s] 15%|█▌ | 57151/371472 [4:33:11<23:09:35, 3.77it/s] 15%|█▌ | 57152/371472 [4:33:11<25:04:48, 3.48it/s] 15%|█▌ | 57153/371472 [4:33:11<24:39:56, 3.54it/s] 15%|█▌ | 57154/371472 [4:33:12<25:15:57, 3.46it/s] 15%|█▌ | 57155/371472 [4:33:12<25:43:49, 3.39it/s] 15%|█▌ | 57156/371472 [4:33:12<28:20:55, 3.08it/s] 15%|█▌ | 57157/371472 [4:33:13<29:12:04, 2.99it/s] 15%|█▌ | 57158/371472 [4:33:13<28:22:54, 3.08it/s] 15%|█▌ | 57159/371472 [4:33:13<26:30:27, 3.29it/s] 15%|█▌ | 57160/371472 [4:33:14<27:24:26, 3.19it/s] {'loss': 4.1653, 'learning_rate': 8.619233338361093e-07, 'epoch': 2.46} + 15%|█▌ | 57160/371472 [4:33:14<27:24:26, 3.19it/s] 15%|█▌ | 57161/371472 [4:33:14<26:49:22, 3.25it/s] 15%|█▌ | 57162/371472 [4:33:14<26:47:27, 3.26it/s] 15%|█▌ | 57163/371472 [4:33:15<26:32:03, 3.29it/s] 15%|█▌ | 57164/371472 [4:33:15<26:25:39, 3.30it/s] 15%|█▌ | 57165/371472 [4:33:15<25:31:55, 3.42it/s] 15%|█▌ | 57166/371472 [4:33:15<24:29:16, 3.57it/s] 15%|█▌ | 57167/371472 [4:33:16<26:10:56, 3.33it/s] 15%|█▌ | 57168/371472 [4:33:16<25:52:25, 3.37it/s] 15%|█▌ | 57169/371472 [4:33:16<25:00:12, 3.49it/s] 15%|█▌ | 57170/371472 [4:33:17<25:10:23, 3.47it/s] 15%|█▌ | 57171/371472 [4:33:17<24:40:36, 3.54it/s] 15%|█▌ | 57172/371472 [4:33:17<25:54:03, 3.37it/s] 15%|█▌ | 57173/371472 [4:33:18<26:20:21, 3.31it/s] 15%|█▌ | 57174/371472 [4:33:18<26:21:40, 3.31it/s] 15%|█▌ | 57175/371472 [4:33:18<27:15:55, 3.20it/s] 15%|█▌ | 57176/371472 [4:33:18<26:20:12, 3.31it/s] 15%|█▌ | 57177/371472 [4:33:19<24:50:47, 3.51it/s] 15%|█▌ | 57178/371472 [4:33:19<25:02:10, 3.49it/s] 15%|█▌ | 57179/371472 [4:33:19<24:02:44, 3.63it/s] 15%|█▌ | 57180/371472 [4:33:19<22:52:58, 3.82it/s] {'loss': 4.2276, 'learning_rate': 8.618748518606304e-07, 'epoch': 2.46} + 15%|█▌ | 57180/371472 [4:33:19<22:52:58, 3.82it/s] 15%|█▌ | 57181/371472 [4:33:20<22:53:33, 3.81it/s] 15%|█▌ | 57182/371472 [4:33:20<22:27:02, 3.89it/s] 15%|█▌ | 57183/371472 [4:33:20<23:15:36, 3.75it/s] 15%|█▌ | 57184/371472 [4:33:21<22:55:04, 3.81it/s] 15%|█▌ | 57185/371472 [4:33:21<25:17:31, 3.45it/s] 15%|█▌ | 57186/371472 [4:33:21<25:36:14, 3.41it/s] 15%|█▌ | 57187/371472 [4:33:22<27:17:29, 3.20it/s] 15%|█▌ | 57188/371472 [4:33:22<29:46:25, 2.93it/s] 15%|█▌ | 57189/371472 [4:33:22<27:55:11, 3.13it/s] 15%|█▌ | 57190/371472 [4:33:23<27:34:18, 3.17it/s] 15%|█▌ | 57191/371472 [4:33:23<25:58:05, 3.36it/s] 15%|█▌ | 57192/371472 [4:33:23<25:06:30, 3.48it/s] 15%|█▌ | 57193/371472 [4:33:23<24:56:01, 3.50it/s] 15%|█▌ | 57194/371472 [4:33:24<25:58:34, 3.36it/s] 15%|█▌ | 57195/371472 [4:33:24<25:28:19, 3.43it/s] 15%|█▌ | 57196/371472 [4:33:24<27:00:13, 3.23it/s] 15%|█▌ | 57197/371472 [4:33:25<26:00:45, 3.36it/s] 15%|█▌ | 57198/371472 [4:33:25<24:25:01, 3.58it/s] 15%|█▌ | 57199/371472 [4:33:25<24:41:10, 3.54it/s] 15%|█▌ | 57200/371472 [4:33:25<24:16:58, 3.60it/s] {'loss': 4.1756, 'learning_rate': 8.618263698851515e-07, 'epoch': 2.46} + 15%|█▌ | 57200/371472 [4:33:25<24:16:58, 3.60it/s] 15%|█▌ | 57201/371472 [4:33:26<23:41:20, 3.69it/s] 15%|█▌ | 57202/371472 [4:33:26<23:04:59, 3.78it/s] 15%|█▌ | 57203/371472 [4:33:26<22:15:42, 3.92it/s] 15%|█▌ | 57204/371472 [4:33:26<22:14:54, 3.92it/s] 15%|█▌ | 57205/371472 [4:33:27<23:58:49, 3.64it/s] 15%|█▌ | 57206/371472 [4:33:27<23:28:43, 3.72it/s] 15%|█▌ | 57207/371472 [4:33:27<23:07:46, 3.77it/s] 15%|█▌ | 57208/371472 [4:33:27<22:53:03, 3.81it/s] 15%|█▌ | 57209/371472 [4:33:28<23:16:50, 3.75it/s] 15%|█▌ | 57210/371472 [4:33:28<23:42:52, 3.68it/s] 15%|█▌ | 57211/371472 [4:33:28<24:37:01, 3.55it/s] 15%|█▌ | 57212/371472 [4:33:29<24:09:52, 3.61it/s] 15%|█▌ | 57213/371472 [4:33:29<23:40:03, 3.69it/s] 15%|█▌ | 57214/371472 [4:33:29<25:25:19, 3.43it/s] 15%|█▌ | 57215/371472 [4:33:29<24:31:44, 3.56it/s] 15%|█▌ | 57216/371472 [4:33:30<25:54:28, 3.37it/s] 15%|█▌ | 57217/371472 [4:33:30<26:12:53, 3.33it/s] 15%|█▌ | 57218/371472 [4:33:30<25:02:29, 3.49it/s] 15%|█▌ | 57219/371472 [4:33:31<24:56:36, 3.50it/s] 15%|█▌ | 57220/371472 [4:33:31<24:14:52, 3.60it/s] {'loss': 4.2888, 'learning_rate': 8.617778879096727e-07, 'epoch': 2.46} + 15%|█▌ | 57220/371472 [4:33:31<24:14:52, 3.60it/s] 15%|█▌ | 57221/371472 [4:33:31<24:06:14, 3.62it/s] 15%|█▌ | 57222/371472 [4:33:31<23:18:02, 3.75it/s] 15%|█▌ | 57223/371472 [4:33:32<23:37:16, 3.70it/s] 15%|█▌ | 57224/371472 [4:33:32<23:36:14, 3.70it/s] 15%|█▌ | 57225/371472 [4:33:32<23:17:56, 3.75it/s] 15%|█▌ | 57226/371472 [4:33:32<24:05:11, 3.62it/s] 15%|█▌ | 57227/371472 [4:33:33<24:34:42, 3.55it/s] 15%|█▌ | 57228/371472 [4:33:33<25:02:00, 3.49it/s] 15%|█▌ | 57229/371472 [4:33:33<24:33:22, 3.55it/s] 15%|█▌ | 57230/371472 [4:33:34<24:09:31, 3.61it/s] 15%|█▌ | 57231/371472 [4:33:34<24:15:17, 3.60it/s] 15%|█▌ | 57232/371472 [4:33:34<24:34:01, 3.55it/s] 15%|█▌ | 57233/371472 [4:33:34<23:27:07, 3.72it/s] 15%|█▌ | 57234/371472 [4:33:35<22:52:38, 3.82it/s] 15%|█▌ | 57235/371472 [4:33:35<22:34:31, 3.87it/s] 15%|█▌ | 57236/371472 [4:33:35<24:09:24, 3.61it/s] 15%|█▌ | 57237/371472 [4:33:35<23:47:09, 3.67it/s] 15%|█▌ | 57238/371472 [4:33:36<25:52:06, 3.37it/s] 15%|█▌ | 57239/371472 [4:33:36<25:36:05, 3.41it/s] 15%|█▌ | 57240/371472 [4:33:36<26:05:24, 3.35it/s] {'loss': 4.1995, 'learning_rate': 8.617294059341938e-07, 'epoch': 2.47} + 15%|█▌ | 57240/371472 [4:33:36<26:05:24, 3.35it/s] 15%|█▌ | 57241/371472 [4:33:37<24:32:56, 3.56it/s] 15%|█▌ | 57242/371472 [4:33:37<25:33:36, 3.41it/s] 15%|█▌ | 57243/371472 [4:33:37<25:08:22, 3.47it/s] 15%|█▌ | 57244/371472 [4:33:38<24:30:58, 3.56it/s] 15%|█▌ | 57245/371472 [4:33:38<24:27:14, 3.57it/s] 15%|█▌ | 57246/371472 [4:33:38<23:23:01, 3.73it/s] 15%|█▌ | 57247/371472 [4:33:38<23:27:51, 3.72it/s] 15%|█▌ | 57248/371472 [4:33:39<22:35:04, 3.86it/s] 15%|█▌ | 57249/371472 [4:33:39<23:24:49, 3.73it/s] 15%|█▌ | 57250/371472 [4:33:39<24:01:55, 3.63it/s] 15%|█▌ | 57251/371472 [4:33:39<23:44:14, 3.68it/s] 15%|█▌ | 57252/371472 [4:33:40<23:37:37, 3.69it/s] 15%|█▌ | 57253/371472 [4:33:40<23:34:11, 3.70it/s] 15%|█▌ | 57254/371472 [4:33:40<23:12:58, 3.76it/s] 15%|█▌ | 57255/371472 [4:33:41<24:16:24, 3.60it/s] 15%|█▌ | 57256/371472 [4:33:41<24:57:58, 3.50it/s] 15%|█▌ | 57257/371472 [4:33:41<24:32:35, 3.56it/s] 15%|█▌ | 57258/371472 [4:33:41<24:41:43, 3.53it/s] 15%|█▌ | 57259/371472 [4:33:42<23:42:20, 3.68it/s] 15%|█▌ | 57260/371472 [4:33:42<23:57:45, 3.64it/s] {'loss': 4.2819, 'learning_rate': 8.616809239587149e-07, 'epoch': 2.47} + 15%|█▌ | 57260/371472 [4:33:42<23:57:45, 3.64it/s] 15%|█▌ | 57261/371472 [4:33:42<25:13:48, 3.46it/s] 15%|█▌ | 57262/371472 [4:33:42<24:17:37, 3.59it/s] 15%|█▌ | 57263/371472 [4:33:43<23:19:51, 3.74it/s] 15%|█▌ | 57264/371472 [4:33:43<24:08:33, 3.62it/s] 15%|█▌ | 57265/371472 [4:33:43<23:38:24, 3.69it/s] 15%|█▌ | 57266/371472 [4:33:44<24:07:06, 3.62it/s] 15%|█▌ | 57267/371472 [4:33:44<26:02:17, 3.35it/s] 15%|█▌ | 57268/371472 [4:33:44<25:26:05, 3.43it/s] 15%|█▌ | 57269/371472 [4:33:44<25:06:17, 3.48it/s] 15%|█▌ | 57270/371472 [4:33:45<26:41:09, 3.27it/s] 15%|█▌ | 57271/371472 [4:33:45<25:29:04, 3.42it/s] 15%|█▌ | 57272/371472 [4:33:45<26:56:43, 3.24it/s] 15%|█▌ | 57273/371472 [4:33:46<25:14:01, 3.46it/s] 15%|█▌ | 57274/371472 [4:33:46<25:19:26, 3.45it/s] 15%|█▌ | 57275/371472 [4:33:46<24:34:03, 3.55it/s] 15%|█▌ | 57276/371472 [4:33:46<23:35:47, 3.70it/s] 15%|█▌ | 57277/371472 [4:33:47<24:02:04, 3.63it/s] 15%|█▌ | 57278/371472 [4:33:47<24:14:52, 3.60it/s] 15%|█▌ | 57279/371472 [4:33:47<24:07:26, 3.62it/s] 15%|█▌ | 57280/371472 [4:33:48<24:03:22, 3.63it/s] {'loss': 4.3963, 'learning_rate': 8.616324419832359e-07, 'epoch': 2.47} + 15%|█▌ | 57280/371472 [4:33:48<24:03:22, 3.63it/s] 15%|█▌ | 57281/371472 [4:33:48<24:12:43, 3.60it/s] 15%|█▌ | 57282/371472 [4:33:48<24:19:09, 3.59it/s] 15%|█▌ | 57283/371472 [4:33:48<24:14:55, 3.60it/s] 15%|█▌ | 57284/371472 [4:33:49<25:42:00, 3.40it/s] 15%|█▌ | 57285/371472 [4:33:49<24:13:16, 3.60it/s] 15%|█▌ | 57286/371472 [4:33:49<24:07:37, 3.62it/s] 15%|█▌ | 57287/371472 [4:33:50<23:43:13, 3.68it/s] 15%|█▌ | 57288/371472 [4:33:50<24:05:15, 3.62it/s] 15%|█▌ | 57289/371472 [4:33:50<24:20:00, 3.59it/s] 15%|█▌ | 57290/371472 [4:33:50<26:03:40, 3.35it/s] 15%|█▌ | 57291/371472 [4:33:51<24:25:40, 3.57it/s] 15%|█▌ | 57292/371472 [4:33:51<25:28:53, 3.42it/s] 15%|█▌ | 57293/371472 [4:33:51<26:04:25, 3.35it/s] 15%|█▌ | 57294/371472 [4:33:52<24:58:03, 3.50it/s] 15%|█▌ | 57295/371472 [4:33:52<25:30:00, 3.42it/s] 15%|█▌ | 57296/371472 [4:33:52<24:44:39, 3.53it/s] 15%|█▌ | 57297/371472 [4:33:53<31:51:27, 2.74it/s] 15%|█▌ | 57298/371472 [4:33:53<29:25:55, 2.97it/s] 15%|█▌ | 57299/371472 [4:33:53<26:42:14, 3.27it/s] 15%|█▌ | 57300/371472 [4:33:54<26:48:41, 3.25it/s] {'loss': 4.3, 'learning_rate': 8.615839600077571e-07, 'epoch': 2.47} + 15%|█▌ | 57300/371472 [4:33:54<26:48:41, 3.25it/s] 15%|█▌ | 57301/371472 [4:33:54<25:54:38, 3.37it/s] 15%|█▌ | 57302/371472 [4:33:54<24:29:59, 3.56it/s] 15%|█▌ | 57303/371472 [4:33:54<24:47:53, 3.52it/s] 15%|█▌ | 57304/371472 [4:33:55<23:49:44, 3.66it/s] 15%|█▌ | 57305/371472 [4:33:55<25:10:28, 3.47it/s] 15%|█▌ | 57306/371472 [4:33:55<26:02:43, 3.35it/s] 15%|█▌ | 57307/371472 [4:33:55<24:33:10, 3.55it/s] 15%|█▌ | 57308/371472 [4:33:56<24:32:55, 3.55it/s] 15%|█▌ | 57309/371472 [4:33:56<23:39:44, 3.69it/s] 15%|█▌ | 57310/371472 [4:33:56<24:01:24, 3.63it/s] 15%|█▌ | 57311/371472 [4:33:57<24:35:16, 3.55it/s] 15%|█▌ | 57312/371472 [4:33:57<25:54:36, 3.37it/s] 15%|█▌ | 57313/371472 [4:33:57<25:07:13, 3.47it/s] 15%|█▌ | 57314/371472 [4:33:57<25:42:06, 3.40it/s] 15%|█▌ | 57315/371472 [4:33:58<24:44:00, 3.53it/s] 15%|█▌ | 57316/371472 [4:33:58<24:13:11, 3.60it/s] 15%|█▌ | 57317/371472 [4:33:58<24:46:03, 3.52it/s] 15%|█▌ | 57318/371472 [4:33:59<25:25:23, 3.43it/s] 15%|█▌ | 57319/371472 [4:33:59<25:56:51, 3.36it/s] 15%|█▌ | 57320/371472 [4:33:59<26:34:23, 3.28it/s] {'loss': 4.0509, 'learning_rate': 8.615354780322782e-07, 'epoch': 2.47} + 15%|█▌ | 57320/371472 [4:33:59<26:34:23, 3.28it/s] 15%|█▌ | 57321/371472 [4:34:00<26:11:03, 3.33it/s] 15%|█▌ | 57322/371472 [4:34:00<25:08:56, 3.47it/s] 15%|█▌ | 57323/371472 [4:34:00<25:23:58, 3.44it/s] 15%|█▌ | 57324/371472 [4:34:00<24:30:58, 3.56it/s] 15%|█▌ | 57325/371472 [4:34:01<24:32:04, 3.56it/s] 15%|█▌ | 57326/371472 [4:34:01<24:49:56, 3.51it/s] 15%|█▌ | 57327/371472 [4:34:01<24:40:26, 3.54it/s] 15%|█▌ | 57328/371472 [4:34:01<24:16:27, 3.59it/s] 15%|█▌ | 57329/371472 [4:34:02<24:19:25, 3.59it/s] 15%|█▌ | 57330/371472 [4:34:02<24:20:55, 3.58it/s] 15%|█▌ | 57331/371472 [4:34:02<23:33:51, 3.70it/s] 15%|█▌ | 57332/371472 [4:34:03<24:13:29, 3.60it/s] 15%|█▌ | 57333/371472 [4:34:03<24:57:35, 3.50it/s] 15%|█▌ | 57334/371472 [4:34:03<26:44:43, 3.26it/s] 15%|█▌ | 57335/371472 [4:34:04<26:14:38, 3.32it/s] 15%|█▌ | 57336/371472 [4:34:04<26:31:51, 3.29it/s] 15%|█▌ | 57337/371472 [4:34:04<25:36:19, 3.41it/s] 15%|█▌ | 57338/371472 [4:34:04<26:26:11, 3.30it/s] 15%|█▌ | 57339/371472 [4:34:05<25:18:29, 3.45it/s] 15%|█▌ | 57340/371472 [4:34:05<25:13:35, 3.46it/s] {'loss': 4.3817, 'learning_rate': 8.614869960567991e-07, 'epoch': 2.47} + 15%|█▌ | 57340/371472 [4:34:05<25:13:35, 3.46it/s] 15%|█▌ | 57341/371472 [4:34:05<24:36:13, 3.55it/s] 15%|█▌ | 57342/371472 [4:34:05<24:09:05, 3.61it/s] 15%|█▌ | 57343/371472 [4:34:06<23:05:12, 3.78it/s] 15%|█▌ | 57344/371472 [4:34:06<22:52:33, 3.81it/s] 15%|█▌ | 57345/371472 [4:34:06<22:44:45, 3.84it/s] 15%|█▌ | 57346/371472 [4:34:07<22:51:50, 3.82it/s] 15%|█▌ | 57347/371472 [4:34:07<22:21:08, 3.90it/s] 15%|█▌ | 57348/371472 [4:34:07<22:32:28, 3.87it/s] 15%|█▌ | 57349/371472 [4:34:07<22:58:07, 3.80it/s] 15%|█▌ | 57350/371472 [4:34:08<23:16:26, 3.75it/s] 15%|█▌ | 57351/371472 [4:34:08<26:02:47, 3.35it/s] 15%|█▌ | 57352/371472 [4:34:08<26:14:05, 3.33it/s] 15%|█▌ | 57353/371472 [4:34:09<26:04:50, 3.35it/s] 15%|█▌ | 57354/371472 [4:34:09<25:04:01, 3.48it/s] 15%|█▌ | 57355/371472 [4:34:09<24:28:04, 3.57it/s] 15%|█▌ | 57356/371472 [4:34:09<23:28:14, 3.72it/s] 15%|█▌ | 57357/371472 [4:34:10<23:21:27, 3.74it/s] 15%|█▌ | 57358/371472 [4:34:10<25:23:52, 3.44it/s] 15%|█▌ | 57359/371472 [4:34:10<24:07:11, 3.62it/s] 15%|█▌ | 57360/371472 [4:34:10<23:40:29, 3.69it/s] {'loss': 4.2557, 'learning_rate': 8.614385140813204e-07, 'epoch': 2.47} + 15%|█▌ | 57360/371472 [4:34:10<23:40:29, 3.69it/s] 15%|█▌ | 57361/371472 [4:34:11<23:37:11, 3.69it/s] 15%|█▌ | 57362/371472 [4:34:11<25:23:36, 3.44it/s] 15%|█▌ | 57363/371472 [4:34:11<26:52:36, 3.25it/s] 15%|█▌ | 57364/371472 [4:34:12<26:49:58, 3.25it/s] 15%|█▌ | 57365/371472 [4:34:12<25:06:12, 3.48it/s] 15%|█▌ | 57366/371472 [4:34:12<24:12:16, 3.60it/s] 15%|█▌ | 57367/371472 [4:34:12<24:37:07, 3.54it/s] 15%|█▌ | 57368/371472 [4:34:13<24:25:19, 3.57it/s] 15%|█▌ | 57369/371472 [4:34:13<24:46:54, 3.52it/s] 15%|█▌ | 57370/371472 [4:34:13<23:54:31, 3.65it/s] 15%|█▌ | 57371/371472 [4:34:14<23:28:27, 3.72it/s] 15%|█▌ | 57372/371472 [4:34:14<23:48:36, 3.66it/s] 15%|█▌ | 57373/371472 [4:34:14<24:08:04, 3.62it/s] 15%|█▌ | 57374/371472 [4:34:14<23:33:07, 3.70it/s] 15%|█▌ | 57375/371472 [4:34:15<23:17:23, 3.75it/s] 15%|█▌ | 57376/371472 [4:34:15<22:54:45, 3.81it/s] 15%|█▌ | 57377/371472 [4:34:15<24:25:30, 3.57it/s] 15%|█▌ | 57378/371472 [4:34:16<26:55:39, 3.24it/s] 15%|█▌ | 57379/371472 [4:34:16<25:50:52, 3.38it/s] 15%|█▌ | 57380/371472 [4:34:16<25:11:05, 3.46it/s] {'loss': 4.3308, 'learning_rate': 8.613900321058416e-07, 'epoch': 2.47} + 15%|█▌ | 57380/371472 [4:34:16<25:11:05, 3.46it/s] 15%|█▌ | 57381/371472 [4:34:16<24:48:42, 3.52it/s] 15%|█▌ | 57382/371472 [4:34:17<23:52:57, 3.65it/s] 15%|█▌ | 57383/371472 [4:34:17<23:31:43, 3.71it/s] 15%|█▌ | 57384/371472 [4:34:17<23:34:43, 3.70it/s] 15%|█▌ | 57385/371472 [4:34:17<22:56:31, 3.80it/s] 15%|█▌ | 57386/371472 [4:34:18<23:56:19, 3.64it/s] 15%|█▌ | 57387/371472 [4:34:18<23:11:30, 3.76it/s] 15%|█▌ | 57388/371472 [4:34:18<23:08:04, 3.77it/s] 15%|█▌ | 57389/371472 [4:34:18<22:59:12, 3.80it/s] 15%|█▌ | 57390/371472 [4:34:19<22:41:53, 3.84it/s] 15%|█▌ | 57391/371472 [4:34:19<23:18:47, 3.74it/s] 15%|█▌ | 57392/371472 [4:34:19<23:04:24, 3.78it/s] 15%|█▌ | 57393/371472 [4:34:20<24:38:11, 3.54it/s] 15%|█▌ | 57394/371472 [4:34:20<24:11:38, 3.61it/s] 15%|█▌ | 57395/371472 [4:34:20<24:46:54, 3.52it/s] 15%|█▌ | 57396/371472 [4:34:20<24:32:25, 3.56it/s] 15%|█▌ | 57397/371472 [4:34:21<24:15:02, 3.60it/s] 15%|█▌ | 57398/371472 [4:34:21<24:21:22, 3.58it/s] 15%|█▌ | 57399/371472 [4:34:21<24:37:16, 3.54it/s] 15%|█▌ | 57400/371472 [4:34:22<24:23:43, 3.58it/s] {'loss': 4.3975, 'learning_rate': 8.613415501303626e-07, 'epoch': 2.47} + 15%|█▌ | 57400/371472 [4:34:22<24:23:43, 3.58it/s] 15%|█▌ | 57401/371472 [4:34:22<24:36:08, 3.55it/s] 15%|█▌ | 57402/371472 [4:34:22<23:30:25, 3.71it/s] 15%|█▌ | 57403/371472 [4:34:22<25:30:50, 3.42it/s] 15%|█▌ | 57404/371472 [4:34:23<24:10:02, 3.61it/s] 15%|█▌ | 57405/371472 [4:34:23<25:00:09, 3.49it/s] 15%|█▌ | 57406/371472 [4:34:23<24:29:48, 3.56it/s] 15%|█▌ | 57407/371472 [4:34:24<26:10:19, 3.33it/s] 15%|█▌ | 57408/371472 [4:34:24<26:28:35, 3.29it/s] 15%|█▌ | 57409/371472 [4:34:24<24:47:30, 3.52it/s] 15%|█▌ | 57410/371472 [4:34:24<25:02:21, 3.48it/s] 15%|█▌ | 57411/371472 [4:34:25<24:38:54, 3.54it/s] 15%|█▌ | 57412/371472 [4:34:25<23:54:13, 3.65it/s] 15%|█▌ | 57413/371472 [4:34:25<24:40:10, 3.54it/s] 15%|█▌ | 57414/371472 [4:34:26<23:44:57, 3.67it/s] 15%|█▌ | 57415/371472 [4:34:26<24:47:29, 3.52it/s] 15%|█▌ | 57416/371472 [4:34:26<24:11:19, 3.61it/s] 15%|█▌ | 57417/371472 [4:34:26<25:43:43, 3.39it/s] 15%|█▌ | 57418/371472 [4:34:27<25:00:26, 3.49it/s] 15%|█▌ | 57419/371472 [4:34:27<24:10:56, 3.61it/s] 15%|█▌ | 57420/371472 [4:34:27<24:22:01, 3.58it/s] {'loss': 4.1042, 'learning_rate': 8.612930681548836e-07, 'epoch': 2.47} + 15%|█▌ | 57420/371472 [4:34:27<24:22:01, 3.58it/s] 15%|█▌ | 57421/371472 [4:34:28<26:21:03, 3.31it/s] 15%|█▌ | 57422/371472 [4:34:28<24:47:55, 3.52it/s] 15%|█▌ | 57423/371472 [4:34:28<23:43:08, 3.68it/s] 15%|█▌ | 57424/371472 [4:34:28<24:11:35, 3.61it/s] 15%|█▌ | 57425/371472 [4:34:29<23:28:01, 3.72it/s] 15%|█▌ | 57426/371472 [4:34:29<23:46:04, 3.67it/s] 15%|█▌ | 57427/371472 [4:34:29<23:37:29, 3.69it/s] 15%|█▌ | 57428/371472 [4:34:29<23:11:04, 3.76it/s] 15%|█▌ | 57429/371472 [4:34:30<23:09:44, 3.77it/s] 15%|█▌ | 57430/371472 [4:34:30<24:13:49, 3.60it/s] 15%|█▌ | 57431/371472 [4:34:30<23:56:30, 3.64it/s] 15%|█▌ | 57432/371472 [4:34:30<23:29:52, 3.71it/s] 15%|█▌ | 57433/371472 [4:34:31<23:53:00, 3.65it/s] 15%|█▌ | 57434/371472 [4:34:31<24:25:39, 3.57it/s] 15%|█▌ | 57435/371472 [4:34:31<24:51:49, 3.51it/s] 15%|█▌ | 57436/371472 [4:34:32<24:27:20, 3.57it/s] 15%|█▌ | 57437/371472 [4:34:32<23:48:34, 3.66it/s] 15%|█▌ | 57438/371472 [4:34:32<23:17:04, 3.75it/s] 15%|█▌ | 57439/371472 [4:34:32<23:06:37, 3.77it/s] 15%|█▌ | 57440/371472 [4:34:33<22:44:59, 3.83it/s] {'loss': 4.2089, 'learning_rate': 8.612445861794049e-07, 'epoch': 2.47} + 15%|█▌ | 57440/371472 [4:34:33<22:44:59, 3.83it/s] 15%|█▌ | 57441/371472 [4:34:33<22:36:45, 3.86it/s] 15%|█▌ | 57442/371472 [4:34:33<24:07:46, 3.62it/s] 15%|█▌ | 57443/371472 [4:34:34<24:32:30, 3.55it/s] 15%|█▌ | 57444/371472 [4:34:34<25:27:49, 3.43it/s] 15%|█▌ | 57445/371472 [4:34:34<26:46:15, 3.26it/s] 15%|█▌ | 57446/371472 [4:34:34<25:15:41, 3.45it/s] 15%|█▌ | 57447/371472 [4:34:35<25:50:16, 3.38it/s] 15%|█▌ | 57448/371472 [4:34:35<26:04:13, 3.35it/s] 15%|█▌ | 57449/371472 [4:34:35<24:35:02, 3.55it/s] 15%|█▌ | 57450/371472 [4:34:36<24:15:16, 3.60it/s] 15%|█▌ | 57451/371472 [4:34:36<26:02:54, 3.35it/s] 15%|█▌ | 57452/371472 [4:34:36<25:14:39, 3.46it/s] 15%|█▌ | 57453/371472 [4:34:36<24:18:00, 3.59it/s] 15%|█▌ | 57454/371472 [4:34:37<23:40:09, 3.69it/s] 15%|█▌ | 57455/371472 [4:34:37<24:03:12, 3.63it/s] 15%|█▌ | 57456/371472 [4:34:37<23:36:26, 3.69it/s] 15%|█▌ | 57457/371472 [4:34:38<23:50:07, 3.66it/s] 15%|█▌ | 57458/371472 [4:34:38<25:27:09, 3.43it/s] 15%|█▌ | 57459/371472 [4:34:38<25:12:17, 3.46it/s] 15%|█▌ | 57460/371472 [4:34:39<30:13:11, 2.89it/s] {'loss': 4.1088, 'learning_rate': 8.611961042039259e-07, 'epoch': 2.47} + 15%|█▌ | 57460/371472 [4:34:39<30:13:11, 2.89it/s] 15%|█▌ | 57461/371472 [4:34:39<28:35:04, 3.05it/s] 15%|█▌ | 57462/371472 [4:34:39<28:11:56, 3.09it/s] 15%|█▌ | 57463/371472 [4:34:39<26:10:35, 3.33it/s] 15%|█▌ | 57464/371472 [4:34:40<25:19:18, 3.44it/s] 15%|█▌ | 57465/371472 [4:34:40<24:28:18, 3.56it/s] 15%|█▌ | 57466/371472 [4:34:40<27:18:35, 3.19it/s] 15%|█▌ | 57467/371472 [4:34:41<27:27:07, 3.18it/s] 15%|█▌ | 57468/371472 [4:34:41<26:04:54, 3.34it/s] 15%|█▌ | 57469/371472 [4:34:41<24:37:50, 3.54it/s] 15%|█▌ | 57470/371472 [4:34:41<24:00:00, 3.63it/s] 15%|█▌ | 57471/371472 [4:34:42<27:39:11, 3.15it/s] 15%|█▌ | 57472/371472 [4:34:42<26:05:47, 3.34it/s] 15%|█▌ | 57473/371472 [4:34:42<25:46:02, 3.38it/s] 15%|█▌ | 57474/371472 [4:34:43<26:20:43, 3.31it/s] 15%|█▌ | 57475/371472 [4:34:43<25:25:02, 3.43it/s] 15%|█▌ | 57476/371472 [4:34:43<25:16:09, 3.45it/s] 15%|█▌ | 57477/371472 [4:34:44<24:43:13, 3.53it/s] 15%|█▌ | 57478/371472 [4:34:44<25:18:16, 3.45it/s] 15%|█▌ | 57479/371472 [4:34:44<25:11:58, 3.46it/s] 15%|█▌ | 57480/371472 [4:34:44<25:48:28, 3.38it/s] {'loss': 4.3419, 'learning_rate': 8.611476222284471e-07, 'epoch': 2.48} + 15%|█▌ | 57480/371472 [4:34:44<25:48:28, 3.38it/s] 15%|█▌ | 57481/371472 [4:34:45<26:15:00, 3.32it/s] 15%|█▌ | 57482/371472 [4:34:45<25:58:11, 3.36it/s] 15%|█▌ | 57483/371472 [4:34:45<25:13:42, 3.46it/s] 15%|█▌ | 57484/371472 [4:34:46<24:48:50, 3.51it/s] 15%|█▌ | 57485/371472 [4:34:46<24:58:45, 3.49it/s] 15%|█▌ | 57486/371472 [4:34:46<23:37:48, 3.69it/s] 15%|█▌ | 57487/371472 [4:34:46<25:21:12, 3.44it/s] 15%|█▌ | 57488/371472 [4:34:47<24:06:42, 3.62it/s] 15%|█▌ | 57489/371472 [4:34:47<23:46:33, 3.67it/s] 15%|█▌ | 57490/371472 [4:34:47<23:51:57, 3.65it/s] 15%|█▌ | 57491/371472 [4:34:48<24:20:55, 3.58it/s] 15%|█▌ | 57492/371472 [4:34:48<23:57:59, 3.64it/s] 15%|█▌ | 57493/371472 [4:34:48<23:35:43, 3.70it/s] 15%|█▌ | 57494/371472 [4:34:48<23:48:43, 3.66it/s] 15%|█▌ | 57495/371472 [4:34:49<23:04:27, 3.78it/s] 15%|█▌ | 57496/371472 [4:34:49<22:26:13, 3.89it/s] 15%|█▌ | 57497/371472 [4:34:49<22:49:21, 3.82it/s] 15%|█▌ | 57498/371472 [4:34:49<23:53:25, 3.65it/s] 15%|█▌ | 57499/371472 [4:34:50<25:10:14, 3.46it/s] 15%|█▌ | 57500/371472 [4:34:50<24:26:30, 3.57it/s] {'loss': 4.2666, 'learning_rate': 8.610991402529681e-07, 'epoch': 2.48} + 15%|█▌ | 57500/371472 [4:34:50<24:26:30, 3.57it/s] 15%|█▌ | 57501/371472 [4:34:50<23:57:03, 3.64it/s] 15%|█▌ | 57502/371472 [4:34:51<24:11:52, 3.60it/s] 15%|█▌ | 57503/371472 [4:34:51<25:51:08, 3.37it/s] 15%|█▌ | 57504/371472 [4:34:51<25:39:58, 3.40it/s] 15%|█▌ | 57505/371472 [4:34:51<25:20:07, 3.44it/s] 15%|█▌ | 57506/371472 [4:34:52<24:30:11, 3.56it/s] 15%|█▌ | 57507/371472 [4:34:52<24:20:43, 3.58it/s] 15%|█▌ | 57508/371472 [4:34:52<23:15:00, 3.75it/s] 15%|█▌ | 57509/371472 [4:34:52<23:07:06, 3.77it/s] 15%|█▌ | 57510/371472 [4:34:53<23:35:42, 3.70it/s] 15%|█▌ | 57511/371472 [4:34:53<23:25:17, 3.72it/s] 15%|█▌ | 57512/371472 [4:34:53<24:33:15, 3.55it/s] 15%|█▌ | 57513/371472 [4:34:54<23:52:18, 3.65it/s] 15%|█▌ | 57514/371472 [4:34:54<23:16:49, 3.75it/s] 15%|█▌ | 57515/371472 [4:34:54<23:58:31, 3.64it/s] 15%|█▌ | 57516/371472 [4:34:54<25:04:50, 3.48it/s] 15%|█▌ | 57517/371472 [4:34:55<24:45:55, 3.52it/s] 15%|█▌ | 57518/371472 [4:34:55<25:47:57, 3.38it/s] 15%|█▌ | 57519/371472 [4:34:55<24:57:18, 3.49it/s] 15%|█▌ | 57520/371472 [4:34:56<24:19:54, 3.58it/s] {'loss': 4.252, 'learning_rate': 8.610506582774892e-07, 'epoch': 2.48} + 15%|█▌ | 57520/371472 [4:34:56<24:19:54, 3.58it/s] 15%|█▌ | 57521/371472 [4:34:56<23:16:48, 3.75it/s] 15%|█▌ | 57522/371472 [4:34:56<22:59:36, 3.79it/s] 15%|█▌ | 57523/371472 [4:34:56<23:00:38, 3.79it/s] 15%|█▌ | 57524/371472 [4:34:57<22:48:25, 3.82it/s] 15%|█▌ | 57525/371472 [4:34:57<22:05:39, 3.95it/s] 15%|█▌ | 57526/371472 [4:34:57<24:04:53, 3.62it/s] 15%|█▌ | 57527/371472 [4:34:57<24:58:10, 3.49it/s] 15%|█▌ | 57528/371472 [4:34:58<28:51:28, 3.02it/s] 15%|█▌ | 57529/371472 [4:34:58<28:53:17, 3.02it/s] 15%|█▌ | 57530/371472 [4:34:59<27:23:24, 3.18it/s] 15%|█▌ | 57531/371472 [4:34:59<26:45:41, 3.26it/s] 15%|█▌ | 57532/371472 [4:34:59<26:07:39, 3.34it/s] 15%|█▌ | 57533/371472 [4:34:59<24:22:32, 3.58it/s] 15%|█▌ | 57534/371472 [4:35:00<23:53:45, 3.65it/s] 15%|█▌ | 57535/371472 [4:35:00<24:24:03, 3.57it/s] 15%|█▌ | 57536/371472 [4:35:00<24:09:23, 3.61it/s] 15%|█▌ | 57537/371472 [4:35:00<23:23:25, 3.73it/s] 15%|█▌ | 57538/371472 [4:35:01<24:43:14, 3.53it/s] 15%|█▌ | 57539/371472 [4:35:01<23:58:26, 3.64it/s] 15%|█▌ | 57540/371472 [4:35:01<23:21:04, 3.73it/s] {'loss': 4.0077, 'learning_rate': 8.610021763020103e-07, 'epoch': 2.48} + 15%|█▌ | 57540/371472 [4:35:01<23:21:04, 3.73it/s] 15%|█▌ | 57541/371472 [4:35:01<23:25:36, 3.72it/s] 15%|█▌ | 57542/371472 [4:35:02<23:57:28, 3.64it/s] 15%|█▌ | 57543/371472 [4:35:02<23:35:09, 3.70it/s] 15%|█▌ | 57544/371472 [4:35:02<24:30:22, 3.56it/s] 15%|█▌ | 57545/371472 [4:35:03<24:44:24, 3.52it/s] 15%|█▌ | 57546/371472 [4:35:03<24:37:44, 3.54it/s] 15%|█▌ | 57547/371472 [4:35:03<23:53:31, 3.65it/s] 15%|█▌ | 57548/371472 [4:35:03<23:20:47, 3.74it/s] 15%|█▌ | 57549/371472 [4:35:04<26:19:05, 3.31it/s] 15%|█▌ | 57550/371472 [4:35:04<28:05:40, 3.10it/s] 15%|█▌ | 57551/371472 [4:35:04<27:37:54, 3.16it/s] 15%|█▌ | 57552/371472 [4:35:05<27:44:36, 3.14it/s] 15%|█▌ | 57553/371472 [4:35:05<27:04:01, 3.22it/s] 15%|█▌ | 57554/371472 [4:35:05<26:08:54, 3.33it/s] 15%|█▌ | 57555/371472 [4:35:06<25:17:21, 3.45it/s] 15%|█▌ | 57556/371472 [4:35:06<25:01:35, 3.48it/s] 15%|█▌ | 57557/371472 [4:35:06<24:12:10, 3.60it/s] 15%|█▌ | 57558/371472 [4:35:06<23:41:33, 3.68it/s] 15%|█▌ | 57559/371472 [4:35:07<24:10:35, 3.61it/s] 15%|█▌ | 57560/371472 [4:35:07<23:20:00, 3.74it/s] {'loss': 3.9771, 'learning_rate': 8.609536943265315e-07, 'epoch': 2.48} + 15%|█▌ | 57560/371472 [4:35:07<23:20:00, 3.74it/s] 15%|█▌ | 57561/371472 [4:35:07<24:48:37, 3.51it/s] 15%|█▌ | 57562/371472 [4:35:08<24:01:50, 3.63it/s] 15%|█▌ | 57563/371472 [4:35:08<23:43:04, 3.68it/s] 15%|█▌ | 57564/371472 [4:35:08<24:28:34, 3.56it/s] 15%|█▌ | 57565/371472 [4:35:08<24:30:27, 3.56it/s] 15%|█▌ | 57566/371472 [4:35:09<24:18:52, 3.59it/s] 15%|█▌ | 57567/371472 [4:35:09<23:40:06, 3.68it/s] 15%|█▌ | 57568/371472 [4:35:09<23:46:50, 3.67it/s] 15%|█▌ | 57569/371472 [4:35:09<23:17:24, 3.74it/s] 15%|█▌ | 57570/371472 [4:35:10<23:04:11, 3.78it/s] 15%|█▌ | 57571/371472 [4:35:10<23:42:25, 3.68it/s] 15%|█▌ | 57572/371472 [4:35:10<24:03:32, 3.62it/s] 15%|█▌ | 57573/371472 [4:35:11<23:58:02, 3.64it/s] 15%|█▌ | 57574/371472 [4:35:11<23:22:56, 3.73it/s] 15%|█▌ | 57575/371472 [4:35:11<23:22:52, 3.73it/s] 15%|█▌ | 57576/371472 [4:35:11<23:40:52, 3.68it/s] 15%|█▌ | 57577/371472 [4:35:12<23:58:17, 3.64it/s] 15%|█▌ | 57578/371472 [4:35:12<22:55:48, 3.80it/s] 16%|█▌ | 57579/371472 [4:35:12<24:49:32, 3.51it/s] 16%|█▌ | 57580/371472 [4:35:12<24:33:46, 3.55it/s] {'loss': 4.2792, 'learning_rate': 8.609052123510525e-07, 'epoch': 2.48} + 16%|█▌ | 57580/371472 [4:35:12<24:33:46, 3.55it/s] 16%|█▌ | 57581/371472 [4:35:13<24:31:00, 3.56it/s] 16%|█▌ | 57582/371472 [4:35:13<23:27:11, 3.72it/s] 16%|█▌ | 57583/371472 [4:35:13<24:01:16, 3.63it/s] 16%|█▌ | 57584/371472 [4:35:14<24:06:15, 3.62it/s] 16%|█▌ | 57585/371472 [4:35:14<23:35:05, 3.70it/s] 16%|█▌ | 57586/371472 [4:35:14<23:20:26, 3.74it/s] 16%|█▌ | 57587/371472 [4:35:14<23:14:00, 3.75it/s] 16%|█▌ | 57588/371472 [4:35:15<22:39:09, 3.85it/s] 16%|█▌ | 57589/371472 [4:35:15<25:57:21, 3.36it/s] 16%|█▌ | 57590/371472 [4:35:15<25:14:08, 3.45it/s] 16%|█▌ | 57591/371472 [4:35:15<24:04:11, 3.62it/s] 16%|█▌ | 57592/371472 [4:35:16<23:32:01, 3.70it/s] 16%|█▌ | 57593/371472 [4:35:16<22:45:41, 3.83it/s] 16%|█▌ | 57594/371472 [4:35:16<22:36:11, 3.86it/s] 16%|█▌ | 57595/371472 [4:35:17<23:06:29, 3.77it/s] 16%|█▌ | 57596/371472 [4:35:17<23:05:09, 3.78it/s] 16%|█▌ | 57597/371472 [4:35:17<22:56:09, 3.80it/s] 16%|█▌ | 57598/371472 [4:35:17<23:46:20, 3.67it/s] 16%|█▌ | 57599/371472 [4:35:18<23:17:25, 3.74it/s] 16%|█▌ | 57600/371472 [4:35:18<22:35:31, 3.86it/s] {'loss': 4.3579, 'learning_rate': 8.608567303755737e-07, 'epoch': 2.48} + 16%|█▌ | 57600/371472 [4:35:18<22:35:31, 3.86it/s] 16%|█▌ | 57601/371472 [4:35:18<22:33:57, 3.86it/s] 16%|█▌ | 57602/371472 [4:35:18<23:51:51, 3.65it/s] 16%|█▌ | 57603/371472 [4:35:19<24:09:38, 3.61it/s] 16%|█▌ | 57604/371472 [4:35:19<24:48:43, 3.51it/s] 16%|█▌ | 57605/371472 [4:35:19<25:30:57, 3.42it/s] 16%|█▌ | 57606/371472 [4:35:20<24:35:31, 3.55it/s] 16%|█▌ | 57607/371472 [4:35:20<26:41:36, 3.27it/s] 16%|█▌ | 57608/371472 [4:35:20<25:20:01, 3.44it/s] 16%|█▌ | 57609/371472 [4:35:20<24:20:24, 3.58it/s] 16%|█▌ | 57610/371472 [4:35:21<25:07:25, 3.47it/s] 16%|█▌ | 57611/371472 [4:35:21<24:07:50, 3.61it/s] 16%|█▌ | 57612/371472 [4:35:21<23:55:57, 3.64it/s] 16%|█▌ | 57613/371472 [4:35:22<24:43:09, 3.53it/s] 16%|█▌ | 57614/371472 [4:35:22<25:16:29, 3.45it/s] 16%|█▌ | 57615/371472 [4:35:22<24:37:36, 3.54it/s] 16%|█▌ | 57616/371472 [4:35:22<24:19:55, 3.58it/s] 16%|█▌ | 57617/371472 [4:35:23<23:49:56, 3.66it/s] 16%|█▌ | 57618/371472 [4:35:23<23:56:07, 3.64it/s] 16%|█▌ | 57619/371472 [4:35:23<22:48:59, 3.82it/s] 16%|█▌ | 57620/371472 [4:35:23<22:11:38, 3.93it/s] {'loss': 4.29, 'learning_rate': 8.608082484000948e-07, 'epoch': 2.48} + 16%|█▌ | 57620/371472 [4:35:23<22:11:38, 3.93it/s] 16%|█▌ | 57621/371472 [4:35:24<22:05:53, 3.95it/s] 16%|█▌ | 57622/371472 [4:35:24<22:14:14, 3.92it/s] 16%|█▌ | 57623/371472 [4:35:24<22:38:04, 3.85it/s] 16%|█▌ | 57624/371472 [4:35:24<22:20:42, 3.90it/s] 16%|█▌ | 57625/371472 [4:35:25<22:24:11, 3.89it/s] 16%|█▌ | 57626/371472 [4:35:25<23:44:30, 3.67it/s] 16%|█▌ | 57627/371472 [4:35:25<23:50:22, 3.66it/s] 16%|█▌ | 57628/371472 [4:35:26<23:22:00, 3.73it/s] 16%|█▌ | 57629/371472 [4:35:26<24:26:04, 3.57it/s] 16%|█▌ | 57630/371472 [4:35:26<23:41:06, 3.68it/s] 16%|█▌ | 57631/371472 [4:35:26<25:15:42, 3.45it/s] 16%|█▌ | 57632/371472 [4:35:27<25:51:14, 3.37it/s] 16%|█▌ | 57633/371472 [4:35:27<24:45:23, 3.52it/s] 16%|█▌ | 57634/371472 [4:35:27<25:40:46, 3.39it/s] 16%|█▌ | 57635/371472 [4:35:28<24:35:30, 3.54it/s] 16%|█▌ | 57636/371472 [4:35:28<25:12:25, 3.46it/s] 16%|█▌ | 57637/371472 [4:35:28<26:16:29, 3.32it/s] 16%|█▌ | 57638/371472 [4:35:28<25:27:04, 3.43it/s] 16%|█▌ | 57639/371472 [4:35:29<26:17:09, 3.32it/s] 16%|█▌ | 57640/371472 [4:35:29<24:46:51, 3.52it/s] {'loss': 4.3093, 'learning_rate': 8.60759766424616e-07, 'epoch': 2.48} + 16%|█▌ | 57640/371472 [4:35:29<24:46:51, 3.52it/s] 16%|█▌ | 57641/371472 [4:35:29<24:45:03, 3.52it/s] 16%|█▌ | 57642/371472 [4:35:30<23:34:02, 3.70it/s] 16%|█▌ | 57643/371472 [4:35:30<22:58:51, 3.79it/s] 16%|█▌ | 57644/371472 [4:35:30<22:37:15, 3.85it/s] 16%|█▌ | 57645/371472 [4:35:30<23:39:06, 3.69it/s] 16%|█▌ | 57646/371472 [4:35:31<23:04:07, 3.78it/s] 16%|█▌ | 57647/371472 [4:35:31<23:00:33, 3.79it/s] 16%|█▌ | 57648/371472 [4:35:31<24:27:35, 3.56it/s] 16%|█▌ | 57649/371472 [4:35:31<23:32:49, 3.70it/s] 16%|█▌ | 57650/371472 [4:35:32<23:30:00, 3.71it/s] 16%|█▌ | 57651/371472 [4:35:32<22:52:57, 3.81it/s] 16%|█▌ | 57652/371472 [4:35:32<23:56:57, 3.64it/s] 16%|█▌ | 57653/371472 [4:35:33<25:26:19, 3.43it/s] 16%|█▌ | 57654/371472 [4:35:33<26:52:28, 3.24it/s] 16%|█▌ | 57655/371472 [4:35:33<26:59:14, 3.23it/s] 16%|█▌ | 57656/371472 [4:35:34<26:46:35, 3.26it/s] 16%|█▌ | 57657/371472 [4:35:34<29:36:00, 2.94it/s] 16%|█▌ | 57658/371472 [4:35:34<28:07:08, 3.10it/s] 16%|█▌ | 57659/371472 [4:35:35<27:17:32, 3.19it/s] 16%|█▌ | 57660/371472 [4:35:35<26:45:14, 3.26it/s] {'loss': 4.3589, 'learning_rate': 8.607112844491369e-07, 'epoch': 2.48} + 16%|█▌ | 57660/371472 [4:35:35<26:45:14, 3.26it/s] 16%|█▌ | 57661/371472 [4:35:35<26:06:20, 3.34it/s] 16%|█▌ | 57662/371472 [4:35:35<24:45:05, 3.52it/s] 16%|█▌ | 57663/371472 [4:35:36<24:37:53, 3.54it/s] 16%|█▌ | 57664/371472 [4:35:36<23:20:00, 3.74it/s] 16%|█▌ | 57665/371472 [4:35:36<23:49:01, 3.66it/s] 16%|█▌ | 57666/371472 [4:35:36<22:44:32, 3.83it/s] 16%|█▌ | 57667/371472 [4:35:37<23:33:11, 3.70it/s] 16%|█▌ | 57668/371472 [4:35:37<23:43:19, 3.67it/s] 16%|█▌ | 57669/371472 [4:35:37<23:22:47, 3.73it/s] 16%|█▌ | 57670/371472 [4:35:37<23:50:50, 3.66it/s] 16%|█▌ | 57671/371472 [4:35:38<22:56:16, 3.80it/s] 16%|█▌ | 57672/371472 [4:35:38<22:16:34, 3.91it/s] 16%|█▌ | 57673/371472 [4:35:38<23:09:23, 3.76it/s] 16%|█▌ | 57674/371472 [4:35:39<23:19:24, 3.74it/s] 16%|█▌ | 57675/371472 [4:35:39<23:29:25, 3.71it/s] 16%|█▌ | 57676/371472 [4:35:39<22:44:34, 3.83it/s] 16%|█▌ | 57677/371472 [4:35:39<22:00:40, 3.96it/s] 16%|█▌ | 57678/371472 [4:35:40<23:20:49, 3.73it/s] 16%|█▌ | 57679/371472 [4:35:40<25:02:28, 3.48it/s] 16%|█▌ | 57680/371472 [4:35:40<23:50:22, 3.66it/s] {'loss': 4.4272, 'learning_rate': 8.606628024736581e-07, 'epoch': 2.48} + 16%|█▌ | 57680/371472 [4:35:40<23:50:22, 3.66it/s] 16%|█▌ | 57681/371472 [4:35:40<25:11:26, 3.46it/s] 16%|█▌ | 57682/371472 [4:35:41<25:39:10, 3.40it/s] 16%|█▌ | 57683/371472 [4:35:41<27:30:50, 3.17it/s] 16%|█▌ | 57684/371472 [4:35:41<25:52:54, 3.37it/s] 16%|█▌ | 57685/371472 [4:35:42<25:03:49, 3.48it/s] 16%|█▌ | 57686/371472 [4:35:42<25:49:17, 3.38it/s] 16%|█▌ | 57687/371472 [4:35:42<24:24:21, 3.57it/s] 16%|█▌ | 57688/371472 [4:35:42<23:49:17, 3.66it/s] 16%|█▌ | 57689/371472 [4:35:43<24:04:15, 3.62it/s] 16%|█▌ | 57690/371472 [4:35:43<26:20:23, 3.31it/s] 16%|█▌ | 57691/371472 [4:35:43<27:44:16, 3.14it/s] 16%|█▌ | 57692/371472 [4:35:44<30:19:06, 2.87it/s] 16%|█▌ | 57693/371472 [4:35:44<28:28:29, 3.06it/s] 16%|█▌ | 57694/371472 [4:35:44<26:38:35, 3.27it/s] 16%|█▌ | 57695/371472 [4:35:45<25:57:49, 3.36it/s] 16%|█▌ | 57696/371472 [4:35:45<25:17:09, 3.45it/s] 16%|█▌ | 57697/371472 [4:35:45<24:21:49, 3.58it/s] 16%|█▌ | 57698/371472 [4:35:46<24:50:54, 3.51it/s] 16%|█▌ | 57699/371472 [4:35:46<23:49:17, 3.66it/s] 16%|█▌ | 57700/371472 [4:35:46<24:33:15, 3.55it/s] {'loss': 4.0898, 'learning_rate': 8.606143204981792e-07, 'epoch': 2.49} + 16%|█▌ | 57700/371472 [4:35:46<24:33:15, 3.55it/s] 16%|█▌ | 57701/371472 [4:35:46<27:16:24, 3.20it/s] 16%|█▌ | 57702/371472 [4:35:47<25:45:49, 3.38it/s] 16%|█▌ | 57703/371472 [4:35:47<24:58:20, 3.49it/s] 16%|█▌ | 57704/371472 [4:35:47<24:14:10, 3.60it/s] 16%|█▌ | 57705/371472 [4:35:48<23:47:00, 3.66it/s] 16%|█▌ | 57706/371472 [4:35:48<24:01:27, 3.63it/s] 16%|█▌ | 57707/371472 [4:35:48<24:58:00, 3.49it/s] 16%|█▌ | 57708/371472 [4:35:48<24:17:53, 3.59it/s] 16%|█▌ | 57709/371472 [4:35:49<25:24:13, 3.43it/s] 16%|█▌ | 57710/371472 [4:35:49<24:17:57, 3.59it/s] 16%|█▌ | 57711/371472 [4:35:49<24:07:04, 3.61it/s] 16%|█▌ | 57712/371472 [4:35:50<24:30:54, 3.56it/s] 16%|█▌ | 57713/371472 [4:35:50<25:29:19, 3.42it/s] 16%|█▌ | 57714/371472 [4:35:50<25:09:14, 3.46it/s] 16%|█▌ | 57715/371472 [4:35:50<24:43:39, 3.52it/s] 16%|█▌ | 57716/371472 [4:35:51<24:32:26, 3.55it/s] 16%|█▌ | 57717/371472 [4:35:51<24:16:11, 3.59it/s] 16%|█▌ | 57718/371472 [4:35:51<23:45:48, 3.67it/s] 16%|█▌ | 57719/371472 [4:35:51<24:17:26, 3.59it/s] 16%|█▌ | 57720/371472 [4:35:52<24:01:24, 3.63it/s] {'loss': 4.2245, 'learning_rate': 8.605658385227002e-07, 'epoch': 2.49} + 16%|█▌ | 57720/371472 [4:35:52<24:01:24, 3.63it/s] 16%|█▌ | 57721/371472 [4:35:52<26:37:16, 3.27it/s] 16%|█▌ | 57722/371472 [4:35:52<26:15:44, 3.32it/s] 16%|█▌ | 57723/371472 [4:35:53<24:30:45, 3.56it/s] 16%|█▌ | 57724/371472 [4:35:53<25:02:26, 3.48it/s] 16%|█▌ | 57725/371472 [4:35:53<23:46:50, 3.66it/s] 16%|█▌ | 57726/371472 [4:35:53<23:38:33, 3.69it/s] 16%|█▌ | 57727/371472 [4:35:54<24:46:06, 3.52it/s] 16%|█▌ | 57728/371472 [4:35:54<25:05:59, 3.47it/s] 16%|█▌ | 57729/371472 [4:35:54<23:51:32, 3.65it/s] 16%|█▌ | 57730/371472 [4:35:55<27:07:56, 3.21it/s] 16%|█▌ | 57731/371472 [4:35:55<26:50:46, 3.25it/s] 16%|█▌ | 57732/371472 [4:35:55<28:07:51, 3.10it/s] 16%|█▌ | 57733/371472 [4:35:56<29:00:10, 3.00it/s] 16%|█▌ | 57734/371472 [4:35:56<28:47:02, 3.03it/s] 16%|█▌ | 57735/371472 [4:35:56<27:20:55, 3.19it/s] 16%|█▌ | 57736/371472 [4:35:57<25:32:36, 3.41it/s] 16%|█▌ | 57737/371472 [4:35:57<25:56:38, 3.36it/s] 16%|█▌ | 57738/371472 [4:35:57<24:51:18, 3.51it/s] 16%|█▌ | 57739/371472 [4:35:57<25:21:36, 3.44it/s] 16%|█▌ | 57740/371472 [4:35:58<25:54:16, 3.36it/s] {'loss': 4.4094, 'learning_rate': 8.605173565472214e-07, 'epoch': 2.49} + 16%|█▌ | 57740/371472 [4:35:58<25:54:16, 3.36it/s] 16%|█▌ | 57741/371472 [4:35:58<24:15:22, 3.59it/s] 16%|█▌ | 57742/371472 [4:35:58<24:21:33, 3.58it/s] 16%|█▌ | 57743/371472 [4:35:59<24:48:48, 3.51it/s] 16%|█▌ | 57744/371472 [4:35:59<23:33:57, 3.70it/s] 16%|█▌ | 57745/371472 [4:35:59<24:56:59, 3.49it/s] 16%|█▌ | 57746/371472 [4:35:59<24:29:16, 3.56it/s] 16%|█▌ | 57747/371472 [4:36:00<23:36:36, 3.69it/s] 16%|█▌ | 57748/371472 [4:36:00<25:15:53, 3.45it/s] 16%|█▌ | 57749/371472 [4:36:00<25:05:50, 3.47it/s] 16%|█▌ | 57750/371472 [4:36:01<24:12:24, 3.60it/s] 16%|█▌ | 57751/371472 [4:36:01<24:39:13, 3.53it/s] 16%|█▌ | 57752/371472 [4:36:01<25:40:36, 3.39it/s] 16%|█▌ | 57753/371472 [4:36:01<25:14:48, 3.45it/s] 16%|█▌ | 57754/371472 [4:36:02<24:25:17, 3.57it/s] 16%|█▌ | 57755/371472 [4:36:02<23:50:04, 3.66it/s] 16%|█▌ | 57756/371472 [4:36:02<23:37:14, 3.69it/s] 16%|█▌ | 57757/371472 [4:36:02<23:19:52, 3.74it/s] 16%|█▌ | 57758/371472 [4:36:03<23:18:27, 3.74it/s] 16%|█▌ | 57759/371472 [4:36:03<23:25:37, 3.72it/s] 16%|█▌ | 57760/371472 [4:36:03<22:39:53, 3.84it/s] {'loss': 4.1925, 'learning_rate': 8.604688745717426e-07, 'epoch': 2.49} + 16%|█▌ | 57760/371472 [4:36:03<22:39:53, 3.84it/s] 16%|█▌ | 57761/371472 [4:36:03<23:08:25, 3.77it/s] 16%|█▌ | 57762/371472 [4:36:04<22:59:08, 3.79it/s] 16%|█▌ | 57763/371472 [4:36:04<25:05:50, 3.47it/s] 16%|█▌ | 57764/371472 [4:36:04<24:26:31, 3.57it/s] 16%|█▌ | 57765/371472 [4:36:05<23:33:40, 3.70it/s] 16%|█▌ | 57766/371472 [4:36:05<25:08:17, 3.47it/s] 16%|█▌ | 57767/371472 [4:36:05<25:18:57, 3.44it/s] 16%|█▌ | 57768/371472 [4:36:06<25:18:12, 3.44it/s] 16%|█▌ | 57769/371472 [4:36:06<25:28:20, 3.42it/s] 16%|█▌ | 57770/371472 [4:36:06<25:17:14, 3.45it/s] 16%|█▌ | 57771/371472 [4:36:06<24:12:15, 3.60it/s] 16%|█▌ | 57772/371472 [4:36:07<24:03:24, 3.62it/s] 16%|█▌ | 57773/371472 [4:36:07<24:54:17, 3.50it/s] 16%|█▌ | 57774/371472 [4:36:07<25:31:50, 3.41it/s] 16%|█▌ | 57775/371472 [4:36:08<25:09:01, 3.46it/s] 16%|█▌ | 57776/371472 [4:36:08<24:33:55, 3.55it/s] 16%|█▌ | 57777/371472 [4:36:08<24:31:06, 3.55it/s] 16%|█▌ | 57778/371472 [4:36:08<24:13:28, 3.60it/s] 16%|█▌ | 57779/371472 [4:36:09<23:51:58, 3.65it/s] 16%|█▌ | 57780/371472 [4:36:09<24:31:01, 3.55it/s] {'loss': 4.2971, 'learning_rate': 8.604203925962636e-07, 'epoch': 2.49} + 16%|█▌ | 57780/371472 [4:36:09<24:31:01, 3.55it/s] 16%|█▌ | 57781/371472 [4:36:09<28:21:14, 3.07it/s] 16%|█▌ | 57782/371472 [4:36:10<28:58:20, 3.01it/s] 16%|█▌ | 57783/371472 [4:36:10<26:33:21, 3.28it/s] 16%|█▌ | 57784/371472 [4:36:10<25:22:36, 3.43it/s] 16%|█▌ | 57785/371472 [4:36:10<24:35:50, 3.54it/s] 16%|█▌ | 57786/371472 [4:36:11<25:47:39, 3.38it/s] 16%|█▌ | 57787/371472 [4:36:11<25:17:13, 3.45it/s] 16%|█▌ | 57788/371472 [4:36:11<25:10:19, 3.46it/s] 16%|█▌ | 57789/371472 [4:36:12<25:23:39, 3.43it/s] 16%|█▌ | 57790/371472 [4:36:12<24:23:20, 3.57it/s] 16%|█▌ | 57791/371472 [4:36:12<25:37:43, 3.40it/s] 16%|█▌ | 57792/371472 [4:36:12<24:18:46, 3.58it/s] 16%|█▌ | 57793/371472 [4:36:13<25:11:47, 3.46it/s] 16%|█▌ | 57794/371472 [4:36:13<25:13:49, 3.45it/s] 16%|█▌ | 57795/371472 [4:36:13<24:20:19, 3.58it/s] 16%|█▌ | 57796/371472 [4:36:14<24:52:15, 3.50it/s] 16%|█▌ | 57797/371472 [4:36:14<24:43:47, 3.52it/s] 16%|█▌ | 57798/371472 [4:36:14<23:26:32, 3.72it/s] 16%|█▌ | 57799/371472 [4:36:14<24:17:35, 3.59it/s] 16%|█▌ | 57800/371472 [4:36:15<24:18:56, 3.58it/s] {'loss': 4.3134, 'learning_rate': 8.603719106207846e-07, 'epoch': 2.49} + 16%|█▌ | 57800/371472 [4:36:15<24:18:56, 3.58it/s] 16%|█▌ | 57801/371472 [4:36:15<25:05:58, 3.47it/s] 16%|█▌ | 57802/371472 [4:36:15<24:29:42, 3.56it/s] 16%|█▌ | 57803/371472 [4:36:16<26:19:21, 3.31it/s] 16%|█▌ | 57804/371472 [4:36:16<25:03:15, 3.48it/s] 16%|█▌ | 57805/371472 [4:36:16<25:52:37, 3.37it/s] 16%|█▌ | 57806/371472 [4:36:16<24:43:32, 3.52it/s] 16%|█▌ | 57807/371472 [4:36:17<24:48:12, 3.51it/s] 16%|█▌ | 57808/371472 [4:36:17<25:36:37, 3.40it/s] 16%|█▌ | 57809/371472 [4:36:17<25:01:17, 3.48it/s] 16%|█▌ | 57810/371472 [4:36:18<26:27:20, 3.29it/s] 16%|█▌ | 57811/371472 [4:36:18<26:34:46, 3.28it/s] 16%|█▌ | 57812/371472 [4:36:18<25:13:15, 3.45it/s] 16%|█▌ | 57813/371472 [4:36:19<24:39:29, 3.53it/s] 16%|█▌ | 57814/371472 [4:36:19<25:13:46, 3.45it/s] 16%|█▌ | 57815/371472 [4:36:19<24:20:52, 3.58it/s] 16%|█▌ | 57816/371472 [4:36:19<23:59:25, 3.63it/s] 16%|█▌ | 57817/371472 [4:36:20<22:57:00, 3.80it/s] 16%|█▌ | 57818/371472 [4:36:20<22:51:17, 3.81it/s] 16%|█▌ | 57819/371472 [4:36:20<24:11:13, 3.60it/s] 16%|█▌ | 57820/371472 [4:36:20<23:16:39, 3.74it/s] {'loss': 4.0966, 'learning_rate': 8.603234286453058e-07, 'epoch': 2.49} + 16%|█▌ | 57820/371472 [4:36:20<23:16:39, 3.74it/s] 16%|█▌ | 57821/371472 [4:36:21<22:48:01, 3.82it/s] 16%|█▌ | 57822/371472 [4:36:21<22:48:26, 3.82it/s] 16%|█▌ | 57823/371472 [4:36:21<23:40:36, 3.68it/s] 16%|█▌ | 57824/371472 [4:36:21<24:02:35, 3.62it/s] 16%|█▌ | 57825/371472 [4:36:22<23:58:04, 3.64it/s] 16%|█▌ | 57826/371472 [4:36:22<26:12:06, 3.33it/s] 16%|█▌ | 57827/371472 [4:36:22<24:42:16, 3.53it/s] 16%|█▌ | 57828/371472 [4:36:23<23:43:01, 3.67it/s] 16%|█▌ | 57829/371472 [4:36:23<23:14:52, 3.75it/s] 16%|█▌ | 57830/371472 [4:36:23<25:14:15, 3.45it/s] 16%|█▌ | 57831/371472 [4:36:23<24:33:09, 3.55it/s] 16%|█▌ | 57832/371472 [4:36:24<24:54:08, 3.50it/s] 16%|█▌ | 57833/371472 [4:36:24<26:05:34, 3.34it/s] 16%|█▌ | 57834/371472 [4:36:24<25:48:05, 3.38it/s] 16%|█▌ | 57835/371472 [4:36:25<27:19:19, 3.19it/s] 16%|█▌ | 57836/371472 [4:36:25<25:43:04, 3.39it/s] 16%|█▌ | 57837/371472 [4:36:25<28:37:36, 3.04it/s] 16%|█▌ | 57838/371472 [4:36:26<28:17:21, 3.08it/s] 16%|█▌ | 57839/371472 [4:36:26<28:49:52, 3.02it/s] 16%|█▌ | 57840/371472 [4:36:26<27:58:37, 3.11it/s] {'loss': 4.1384, 'learning_rate': 8.60274946669827e-07, 'epoch': 2.49} + 16%|█▌ | 57840/371472 [4:36:26<27:58:37, 3.11it/s] 16%|█▌ | 57841/371472 [4:36:27<27:16:56, 3.19it/s] 16%|█▌ | 57842/371472 [4:36:27<25:49:25, 3.37it/s] 16%|█▌ | 57843/371472 [4:36:27<24:16:53, 3.59it/s] 16%|█▌ | 57844/371472 [4:36:27<23:25:04, 3.72it/s] 16%|█▌ | 57845/371472 [4:36:28<22:38:45, 3.85it/s] 16%|█▌ | 57846/371472 [4:36:28<22:10:16, 3.93it/s] 16%|█▌ | 57847/371472 [4:36:28<22:01:31, 3.96it/s] 16%|█▌ | 57848/371472 [4:36:28<23:28:29, 3.71it/s] 16%|█▌ | 57849/371472 [4:36:29<23:38:37, 3.68it/s] 16%|█▌ | 57850/371472 [4:36:29<24:11:52, 3.60it/s] 16%|█▌ | 57851/371472 [4:36:29<23:51:09, 3.65it/s] 16%|█▌ | 57852/371472 [4:36:30<23:47:07, 3.66it/s] 16%|█▌ | 57853/371472 [4:36:30<23:17:14, 3.74it/s] 16%|█▌ | 57854/371472 [4:36:30<22:48:19, 3.82it/s] 16%|█▌ | 57855/371472 [4:36:30<22:32:54, 3.86it/s] 16%|█▌ | 57856/371472 [4:36:31<23:18:42, 3.74it/s] 16%|█▌ | 57857/371472 [4:36:31<23:25:20, 3.72it/s] 16%|█▌ | 57858/371472 [4:36:31<23:23:50, 3.72it/s] 16%|█▌ | 57859/371472 [4:36:31<24:11:03, 3.60it/s] 16%|█▌ | 57860/371472 [4:36:32<24:33:54, 3.55it/s] {'loss': 4.3345, 'learning_rate': 8.60226464694348e-07, 'epoch': 2.49} + 16%|█▌ | 57860/371472 [4:36:32<24:33:54, 3.55it/s] 16%|█▌ | 57861/371472 [4:36:32<23:56:07, 3.64it/s] 16%|█▌ | 57862/371472 [4:36:32<22:44:19, 3.83it/s] 16%|█▌ | 57863/371472 [4:36:32<22:38:07, 3.85it/s] 16%|█▌ | 57864/371472 [4:36:33<24:07:53, 3.61it/s] 16%|█▌ | 57865/371472 [4:36:33<23:32:33, 3.70it/s] 16%|█▌ | 57866/371472 [4:36:33<23:42:40, 3.67it/s] 16%|█▌ | 57867/371472 [4:36:34<23:13:24, 3.75it/s] 16%|█▌ | 57868/371472 [4:36:34<24:48:01, 3.51it/s] 16%|█▌ | 57869/371472 [4:36:34<25:21:33, 3.44it/s] 16%|█▌ | 57870/371472 [4:36:34<25:06:42, 3.47it/s] 16%|█▌ | 57871/371472 [4:36:35<24:39:27, 3.53it/s] 16%|█▌ | 57872/371472 [4:36:35<24:05:54, 3.61it/s] 16%|█▌ | 57873/371472 [4:36:35<23:43:33, 3.67it/s] 16%|█▌ | 57874/371472 [4:36:36<23:36:01, 3.69it/s] 16%|█▌ | 57875/371472 [4:36:36<24:47:50, 3.51it/s] 16%|█▌ | 57876/371472 [4:36:36<25:11:16, 3.46it/s] 16%|█▌ | 57877/371472 [4:36:36<25:02:24, 3.48it/s] 16%|█▌ | 57878/371472 [4:36:37<23:52:44, 3.65it/s] 16%|█▌ | 57879/371472 [4:36:37<25:15:20, 3.45it/s] 16%|█▌ | 57880/371472 [4:36:37<25:49:10, 3.37it/s] {'loss': 4.0805, 'learning_rate': 8.601779827188691e-07, 'epoch': 2.49} + 16%|█▌ | 57880/371472 [4:36:37<25:49:10, 3.37it/s] 16%|█▌ | 57881/371472 [4:36:38<26:15:49, 3.32it/s] 16%|█▌ | 57882/371472 [4:36:38<26:42:24, 3.26it/s] 16%|█▌ | 57883/371472 [4:36:38<26:34:55, 3.28it/s] 16%|█▌ | 57884/371472 [4:36:39<26:35:40, 3.28it/s] 16%|█▌ | 57885/371472 [4:36:39<26:22:01, 3.30it/s] 16%|█▌ | 57886/371472 [4:36:39<25:09:18, 3.46it/s] 16%|█▌ | 57887/371472 [4:36:39<24:51:16, 3.50it/s] 16%|█▌ | 57888/371472 [4:36:40<23:43:54, 3.67it/s] 16%|█▌ | 57889/371472 [4:36:40<22:57:25, 3.79it/s] 16%|█▌ | 57890/371472 [4:36:40<24:07:22, 3.61it/s] 16%|█▌ | 57891/371472 [4:36:41<26:39:38, 3.27it/s] 16%|█▌ | 57892/371472 [4:36:41<25:00:48, 3.48it/s] 16%|█▌ | 57893/371472 [4:36:41<24:12:11, 3.60it/s] 16%|█▌ | 57894/371472 [4:36:41<24:39:10, 3.53it/s] 16%|█▌ | 57895/371472 [4:36:42<24:10:52, 3.60it/s] 16%|█▌ | 57896/371472 [4:36:42<23:48:39, 3.66it/s] 16%|█▌ | 57897/371472 [4:36:42<23:06:39, 3.77it/s] 16%|█▌ | 57898/371472 [4:36:42<25:19:14, 3.44it/s] 16%|█▌ | 57899/371472 [4:36:43<25:57:44, 3.36it/s] 16%|█▌ | 57900/371472 [4:36:43<25:12:21, 3.46it/s] {'loss': 4.3075, 'learning_rate': 8.601295007433902e-07, 'epoch': 2.49} + 16%|█▌ | 57900/371472 [4:36:43<25:12:21, 3.46it/s] 16%|█▌ | 57901/371472 [4:36:43<24:58:34, 3.49it/s] 16%|█▌ | 57902/371472 [4:36:44<24:21:52, 3.57it/s] 16%|█▌ | 57903/371472 [4:36:44<24:15:59, 3.59it/s] 16%|█▌ | 57904/371472 [4:36:44<26:09:42, 3.33it/s] 16%|█▌ | 57905/371472 [4:36:44<24:24:14, 3.57it/s] 16%|█▌ | 57906/371472 [4:36:45<24:56:28, 3.49it/s] 16%|█▌ | 57907/371472 [4:36:45<24:12:52, 3.60it/s] 16%|█▌ | 57908/371472 [4:36:45<23:48:00, 3.66it/s] 16%|█▌ | 57909/371472 [4:36:46<23:10:15, 3.76it/s] 16%|█▌ | 57910/371472 [4:36:46<22:36:25, 3.85it/s] 16%|█▌ | 57911/371472 [4:36:46<21:58:59, 3.96it/s] 16%|█▌ | 57912/371472 [4:36:46<22:25:59, 3.88it/s] 16%|█▌ | 57913/371472 [4:36:47<24:10:19, 3.60it/s] 16%|█▌ | 57914/371472 [4:36:47<23:53:18, 3.65it/s] 16%|█▌ | 57915/371472 [4:36:47<25:46:13, 3.38it/s] 16%|█▌ | 57916/371472 [4:36:48<26:00:12, 3.35it/s] 16%|█▌ | 57917/371472 [4:36:48<24:25:49, 3.57it/s] 16%|█▌ | 57918/371472 [4:36:48<24:23:38, 3.57it/s] 16%|█▌ | 57919/371472 [4:36:48<24:56:59, 3.49it/s] 16%|█▌ | 57920/371472 [4:36:49<25:05:35, 3.47it/s] {'loss': 3.9731, 'learning_rate': 8.600810187679114e-07, 'epoch': 2.49} + 16%|█▌ | 57920/371472 [4:36:49<25:05:35, 3.47it/s] 16%|█▌ | 57921/371472 [4:36:49<25:39:36, 3.39it/s] 16%|█▌ | 57922/371472 [4:36:49<25:30:29, 3.41it/s] 16%|█▌ | 57923/371472 [4:36:49<24:54:24, 3.50it/s] 16%|█▌ | 57924/371472 [4:36:50<23:54:18, 3.64it/s] 16%|█▌ | 57925/371472 [4:36:50<23:49:13, 3.66it/s] 16%|█▌ | 57926/371472 [4:36:50<24:23:23, 3.57it/s] 16%|█▌ | 57927/371472 [4:36:51<23:22:54, 3.72it/s] 16%|█▌ | 57928/371472 [4:36:51<24:04:17, 3.62it/s] 16%|█▌ | 57929/371472 [4:36:51<23:26:59, 3.71it/s] 16%|█▌ | 57930/371472 [4:36:51<24:18:38, 3.58it/s] 16%|█▌ | 57931/371472 [4:36:52<24:53:20, 3.50it/s] 16%|█▌ | 57932/371472 [4:36:52<25:31:14, 3.41it/s] 16%|█▌ | 57933/371472 [4:36:52<25:34:28, 3.41it/s] 16%|█▌ | 57934/371472 [4:36:53<24:44:01, 3.52it/s] 16%|█▌ | 57935/371472 [4:36:53<25:07:30, 3.47it/s] 16%|█▌ | 57936/371472 [4:36:53<25:17:46, 3.44it/s] 16%|█▌ | 57937/371472 [4:36:53<25:42:23, 3.39it/s] 16%|█▌ | 57938/371472 [4:36:54<25:17:21, 3.44it/s] 16%|█▌ | 57939/371472 [4:36:54<24:39:57, 3.53it/s] 16%|█▌ | 57940/371472 [4:36:54<24:21:02, 3.58it/s] {'loss': 4.2603, 'learning_rate': 8.600325367924325e-07, 'epoch': 2.5} + 16%|█▌ | 57940/371472 [4:36:54<24:21:02, 3.58it/s] 16%|█▌ | 57941/371472 [4:36:55<24:57:43, 3.49it/s] 16%|█▌ | 57942/371472 [4:36:55<24:01:57, 3.62it/s] 16%|█▌ | 57943/371472 [4:36:55<24:06:40, 3.61it/s] 16%|█▌ | 57944/371472 [4:36:55<23:50:29, 3.65it/s] 16%|█▌ | 57945/371472 [4:36:56<23:49:45, 3.65it/s] 16%|█▌ | 57946/371472 [4:36:56<24:09:30, 3.60it/s] 16%|█▌ | 57947/371472 [4:36:56<23:48:43, 3.66it/s] 16%|█▌ | 57948/371472 [4:36:56<23:20:54, 3.73it/s] 16%|█▌ | 57949/371472 [4:36:57<23:15:55, 3.74it/s] 16%|█▌ | 57950/371472 [4:36:57<23:47:34, 3.66it/s] 16%|█▌ | 57951/371472 [4:36:57<23:16:42, 3.74it/s] 16%|█▌ | 57952/371472 [4:36:58<22:40:56, 3.84it/s] 16%|█▌ | 57953/371472 [4:36:58<23:57:00, 3.64it/s] 16%|█▌ | 57954/371472 [4:36:58<24:35:20, 3.54it/s] 16%|█▌ | 57955/371472 [4:36:58<24:35:42, 3.54it/s] 16%|█▌ | 57956/371472 [4:36:59<25:11:05, 3.46it/s] 16%|█▌ | 57957/371472 [4:36:59<24:33:02, 3.55it/s] 16%|█▌ | 57958/371472 [4:36:59<25:02:54, 3.48it/s] 16%|█▌ | 57959/371472 [4:37:00<26:13:54, 3.32it/s] 16%|█▌ | 57960/371472 [4:37:00<25:35:39, 3.40it/s] {'loss': 4.26, 'learning_rate': 8.599840548169535e-07, 'epoch': 2.5} + 16%|█▌ | 57960/371472 [4:37:00<25:35:39, 3.40it/s] 16%|█▌ | 57961/371472 [4:37:00<24:07:15, 3.61it/s] 16%|█▌ | 57962/371472 [4:37:00<22:59:40, 3.79it/s] 16%|█▌ | 57963/371472 [4:37:01<24:50:59, 3.50it/s] 16%|█▌ | 57964/371472 [4:37:01<24:38:12, 3.53it/s] 16%|█▌ | 57965/371472 [4:37:01<24:36:45, 3.54it/s] 16%|█▌ | 57966/371472 [4:37:02<24:25:07, 3.57it/s] 16%|█▌ | 57967/371472 [4:37:02<24:24:22, 3.57it/s] 16%|█▌ | 57968/371472 [4:37:02<23:34:20, 3.69it/s] 16%|█▌ | 57969/371472 [4:37:02<23:16:22, 3.74it/s] 16%|█▌ | 57970/371472 [4:37:03<23:03:27, 3.78it/s] 16%|█▌ | 57971/371472 [4:37:03<23:17:25, 3.74it/s] 16%|█▌ | 57972/371472 [4:37:03<22:53:40, 3.80it/s] 16%|█▌ | 57973/371472 [4:37:03<22:39:33, 3.84it/s] 16%|█▌ | 57974/371472 [4:37:04<23:51:51, 3.65it/s] 16%|█▌ | 57975/371472 [4:37:04<24:00:38, 3.63it/s] 16%|█▌ | 57976/371472 [4:37:04<26:02:16, 3.34it/s] 16%|█▌ | 57977/371472 [4:37:05<26:02:02, 3.34it/s] 16%|█▌ | 57978/371472 [4:37:05<24:25:44, 3.56it/s] 16%|���▌ | 57979/371472 [4:37:05<24:09:43, 3.60it/s] 16%|█▌ | 57980/371472 [4:37:05<23:46:52, 3.66it/s] {'loss': 3.9662, 'learning_rate': 8.599355728414747e-07, 'epoch': 2.5} + 16%|█▌ | 57980/371472 [4:37:05<23:46:52, 3.66it/s] 16%|█▌ | 57981/371472 [4:37:06<24:31:11, 3.55it/s] 16%|█▌ | 57982/371472 [4:37:06<23:32:49, 3.70it/s] 16%|█▌ | 57983/371472 [4:37:06<23:42:19, 3.67it/s] 16%|█▌ | 57984/371472 [4:37:06<22:54:55, 3.80it/s] 16%|█▌ | 57985/371472 [4:37:07<24:25:17, 3.57it/s] 16%|█▌ | 57986/371472 [4:37:07<24:47:16, 3.51it/s] 16%|█▌ | 57987/371472 [4:37:07<26:12:48, 3.32it/s] 16%|█▌ | 57988/371472 [4:37:08<26:26:51, 3.29it/s] 16%|█▌ | 57989/371472 [4:37:08<26:37:35, 3.27it/s] 16%|█▌ | 57990/371472 [4:37:08<27:08:59, 3.21it/s] 16%|█▌ | 57991/371472 [4:37:09<26:12:38, 3.32it/s] 16%|█▌ | 57992/371472 [4:37:09<25:25:43, 3.42it/s] 16%|█▌ | 57993/371472 [4:37:09<25:13:03, 3.45it/s] 16%|█▌ | 57994/371472 [4:37:10<26:59:58, 3.23it/s] 16%|█▌ | 57995/371472 [4:37:10<25:52:08, 3.37it/s] 16%|█▌ | 57996/371472 [4:37:10<27:49:17, 3.13it/s] 16%|█▌ | 57997/371472 [4:37:10<27:11:04, 3.20it/s] 16%|█▌ | 57998/371472 [4:37:11<25:10:48, 3.46it/s] 16%|█▌ | 57999/371472 [4:37:11<24:12:49, 3.60it/s] 16%|█▌ | 58000/371472 [4:37:11<25:27:23, 3.42it/s] {'loss': 4.4137, 'learning_rate': 8.598870908659958e-07, 'epoch': 2.5} + 16%|█▌ | 58000/371472 [4:37:11<25:27:23, 3.42it/s] 16%|█▌ | 58001/371472 [4:37:12<24:56:04, 3.49it/s] 16%|█▌ | 58002/371472 [4:37:12<24:19:58, 3.58it/s] 16%|█▌ | 58003/371472 [4:37:12<25:50:39, 3.37it/s] 16%|█▌ | 58004/371472 [4:37:12<25:40:35, 3.39it/s] 16%|█▌ | 58005/371472 [4:37:13<24:44:36, 3.52it/s] 16%|█▌ | 58006/371472 [4:37:13<23:31:34, 3.70it/s] 16%|█▌ | 58007/371472 [4:37:13<24:16:58, 3.59it/s] 16%|█▌ | 58008/371472 [4:37:13<23:20:42, 3.73it/s] 16%|█▌ | 58009/371472 [4:37:14<23:08:56, 3.76it/s] 16%|█▌ | 58010/371472 [4:37:14<23:09:48, 3.76it/s] 16%|█▌ | 58011/371472 [4:37:14<22:20:46, 3.90it/s] 16%|█▌ | 58012/371472 [4:37:14<22:00:08, 3.96it/s] 16%|█▌ | 58013/371472 [4:37:15<23:04:23, 3.77it/s] 16%|█▌ | 58014/371472 [4:37:15<23:45:10, 3.67it/s] 16%|█▌ | 58015/371472 [4:37:15<23:46:28, 3.66it/s] 16%|█▌ | 58016/371472 [4:37:16<22:54:08, 3.80it/s] 16%|█▌ | 58017/371472 [4:37:16<23:30:34, 3.70it/s] 16%|█▌ | 58018/371472 [4:37:16<25:35:32, 3.40it/s] 16%|█▌ | 58019/371472 [4:37:16<25:26:50, 3.42it/s] 16%|█▌ | 58020/371472 [4:37:17<26:00:41, 3.35it/s] {'loss': 4.3187, 'learning_rate': 8.59838608890517e-07, 'epoch': 2.5} + 16%|█▌ | 58020/371472 [4:37:17<26:00:41, 3.35it/s] 16%|█▌ | 58021/371472 [4:37:17<25:44:45, 3.38it/s] 16%|█▌ | 58022/371472 [4:37:17<24:07:34, 3.61it/s] 16%|█▌ | 58023/371472 [4:37:18<23:45:40, 3.66it/s] 16%|█▌ | 58024/371472 [4:37:18<23:52:57, 3.65it/s] 16%|█▌ | 58025/371472 [4:37:18<25:54:03, 3.36it/s] 16%|█▌ | 58026/371472 [4:37:18<25:21:18, 3.43it/s] 16%|█▌ | 58027/371472 [4:37:19<24:36:06, 3.54it/s] 16%|█▌ | 58028/371472 [4:37:19<26:04:01, 3.34it/s] 16%|█▌ | 58029/371472 [4:37:19<24:52:59, 3.50it/s] 16%|█▌ | 58030/371472 [4:37:20<25:39:48, 3.39it/s] 16%|█▌ | 58031/371472 [4:37:20<25:10:45, 3.46it/s] 16%|█▌ | 58032/371472 [4:37:20<24:21:29, 3.57it/s] 16%|█▌ | 58033/371472 [4:37:21<25:48:52, 3.37it/s] 16%|█▌ | 58034/371472 [4:37:21<25:36:04, 3.40it/s] 16%|█▌ | 58035/371472 [4:37:21<25:33:45, 3.41it/s] 16%|█▌ | 58036/371472 [4:37:21<25:59:47, 3.35it/s] 16%|█▌ | 58037/371472 [4:37:22<25:06:09, 3.47it/s] 16%|█▌ | 58038/371472 [4:37:22<24:44:40, 3.52it/s] 16%|█▌ | 58039/371472 [4:37:22<25:10:13, 3.46it/s] 16%|█▌ | 58040/371472 [4:37:23<24:10:42, 3.60it/s] {'loss': 4.3463, 'learning_rate': 8.597901269150379e-07, 'epoch': 2.5} + 16%|█▌ | 58040/371472 [4:37:23<24:10:42, 3.60it/s] 16%|█▌ | 58041/371472 [4:37:23<25:26:36, 3.42it/s] 16%|█▌ | 58042/371472 [4:37:23<24:29:21, 3.56it/s] 16%|█▌ | 58043/371472 [4:37:23<23:57:06, 3.63it/s] 16%|█▌ | 58044/371472 [4:37:24<24:51:21, 3.50it/s] 16%|█▌ | 58045/371472 [4:37:24<24:11:45, 3.60it/s] 16%|█▌ | 58046/371472 [4:37:24<24:48:14, 3.51it/s] 16%|█▌ | 58047/371472 [4:37:25<25:16:22, 3.44it/s] 16%|█▌ | 58048/371472 [4:37:25<27:58:07, 3.11it/s] 16%|█▌ | 58049/371472 [4:37:25<26:36:46, 3.27it/s] 16%|█▌ | 58050/371472 [4:37:25<25:51:44, 3.37it/s] 16%|█▌ | 58051/371472 [4:37:26<26:56:14, 3.23it/s] 16%|█▌ | 58052/371472 [4:37:26<25:52:29, 3.36it/s] 16%|█▌ | 58053/371472 [4:37:26<24:19:11, 3.58it/s] 16%|█▌ | 58054/371472 [4:37:27<23:26:33, 3.71it/s] 16%|█▌ | 58055/371472 [4:37:27<23:39:28, 3.68it/s] 16%|█▌ | 58056/371472 [4:37:27<24:45:29, 3.52it/s] 16%|█▌ | 58057/371472 [4:37:28<26:31:21, 3.28it/s] 16%|█▌ | 58058/371472 [4:37:28<25:11:48, 3.46it/s] 16%|█▌ | 58059/371472 [4:37:28<24:11:44, 3.60it/s] 16%|█▌ | 58060/371472 [4:37:28<24:46:01, 3.52it/s] {'loss': 4.1218, 'learning_rate': 8.597416449395591e-07, 'epoch': 2.5} + 16%|█▌ | 58060/371472 [4:37:28<24:46:01, 3.52it/s] 16%|█▌ | 58061/371472 [4:37:29<24:47:56, 3.51it/s] 16%|█▌ | 58062/371472 [4:37:29<24:26:33, 3.56it/s] 16%|█▌ | 58063/371472 [4:37:29<23:44:37, 3.67it/s] 16%|█▌ | 58064/371472 [4:37:29<23:13:31, 3.75it/s] 16%|█▌ | 58065/371472 [4:37:30<23:07:50, 3.76it/s] 16%|█▌ | 58066/371472 [4:37:30<24:10:09, 3.60it/s] 16%|█▌ | 58067/371472 [4:37:30<25:39:57, 3.39it/s] 16%|█▌ | 58068/371472 [4:37:31<24:31:57, 3.55it/s] 16%|█▌ | 58069/371472 [4:37:31<24:04:09, 3.62it/s] 16%|█▌ | 58070/371472 [4:37:31<22:58:50, 3.79it/s] 16%|█▌ | 58071/371472 [4:37:31<22:52:47, 3.80it/s] 16%|█▌ | 58072/371472 [4:37:32<22:50:40, 3.81it/s] 16%|█▌ | 58073/371472 [4:37:32<23:53:59, 3.64it/s] 16%|█▌ | 58074/371472 [4:37:32<24:38:51, 3.53it/s] 16%|█▌ | 58075/371472 [4:37:33<26:24:10, 3.30it/s] 16%|█▌ | 58076/371472 [4:37:33<24:30:06, 3.55it/s] 16%|█▌ | 58077/371472 [4:37:33<23:45:09, 3.67it/s] 16%|█▌ | 58078/371472 [4:37:33<26:23:00, 3.30it/s] 16%|█▌ | 58079/371472 [4:37:34<26:11:27, 3.32it/s] 16%|█▌ | 58080/371472 [4:37:34<24:36:09, 3.54it/s] {'loss': 4.2526, 'learning_rate': 8.596931629640802e-07, 'epoch': 2.5} + 16%|█▌ | 58080/371472 [4:37:34<24:36:09, 3.54it/s] 16%|█▌ | 58081/371472 [4:37:34<24:21:51, 3.57it/s] 16%|█▌ | 58082/371472 [4:37:34<25:09:30, 3.46it/s] 16%|█▌ | 58083/371472 [4:37:35<23:50:20, 3.65it/s] 16%|█▌ | 58084/371472 [4:37:35<26:38:53, 3.27it/s] 16%|█▌ | 58085/371472 [4:37:35<25:30:41, 3.41it/s] 16%|█▌ | 58086/371472 [4:37:36<24:50:31, 3.50it/s] 16%|█▌ | 58087/371472 [4:37:36<24:22:20, 3.57it/s] 16%|█▌ | 58088/371472 [4:37:36<24:18:24, 3.58it/s] 16%|█▌ | 58089/371472 [4:37:36<23:42:28, 3.67it/s] 16%|█▌ | 58090/371472 [4:37:37<24:24:33, 3.57it/s] 16%|█▌ | 58091/371472 [4:37:37<23:24:16, 3.72it/s] 16%|█▌ | 58092/371472 [4:37:37<23:27:41, 3.71it/s] 16%|█▌ | 58093/371472 [4:37:38<24:30:55, 3.55it/s] 16%|█▌ | 58094/371472 [4:37:38<23:54:41, 3.64it/s] 16%|█▌ | 58095/371472 [4:37:38<23:06:34, 3.77it/s] 16%|█▌ | 58096/371472 [4:37:38<24:35:07, 3.54it/s] 16%|█▌ | 58097/371472 [4:37:39<24:45:03, 3.52it/s] 16%|█▌ | 58098/371472 [4:37:39<24:18:18, 3.58it/s] 16%|█▌ | 58099/371472 [4:37:39<23:32:10, 3.70it/s] 16%|█▌ | 58100/371472 [4:37:39<24:37:34, 3.53it/s] {'loss': 4.4045, 'learning_rate': 8.596446809886013e-07, 'epoch': 2.5} + 16%|█▌ | 58100/371472 [4:37:39<24:37:34, 3.53it/s] 16%|█▌ | 58101/371472 [4:37:40<24:31:17, 3.55it/s] 16%|█▌ | 58102/371472 [4:37:40<23:18:07, 3.74it/s] 16%|█▌ | 58103/371472 [4:37:40<24:53:54, 3.50it/s] 16%|█▌ | 58104/371472 [4:37:41<26:06:00, 3.34it/s] 16%|█▌ | 58105/371472 [4:37:41<26:46:16, 3.25it/s] 16%|█▌ | 58106/371472 [4:37:41<26:29:08, 3.29it/s] 16%|█▌ | 58107/371472 [4:37:42<28:54:51, 3.01it/s] 16%|█▌ | 58108/371472 [4:37:42<26:52:57, 3.24it/s] 16%|█▌ | 58109/371472 [4:37:42<25:37:27, 3.40it/s] 16%|█▌ | 58110/371472 [4:37:43<25:58:11, 3.35it/s] 16%|█▌ | 58111/371472 [4:37:43<24:31:54, 3.55it/s] 16%|█▌ | 58112/371472 [4:37:43<23:28:23, 3.71it/s] 16%|█▌ | 58113/371472 [4:37:43<23:27:45, 3.71it/s] 16%|█▌ | 58114/371472 [4:37:44<22:47:31, 3.82it/s] 16%|█▌ | 58115/371472 [4:37:44<23:05:10, 3.77it/s] 16%|█▌ | 58116/371472 [4:37:44<22:54:14, 3.80it/s] 16%|█▌ | 58117/371472 [4:37:44<23:19:25, 3.73it/s] 16%|█▌ | 58118/371472 [4:37:45<23:16:20, 3.74it/s] 16%|█▌ | 58119/371472 [4:37:45<22:21:09, 3.89it/s] 16%|█▌ | 58120/371472 [4:37:45<22:44:18, 3.83it/s] {'loss': 4.1303, 'learning_rate': 8.595961990131224e-07, 'epoch': 2.5} + 16%|█▌ | 58120/371472 [4:37:45<22:44:18, 3.83it/s] 16%|█▌ | 58121/371472 [4:37:45<22:57:43, 3.79it/s] 16%|█▌ | 58122/371472 [4:37:46<24:05:01, 3.61it/s] 16%|█▌ | 58123/371472 [4:37:46<23:59:15, 3.63it/s] 16%|█▌ | 58124/371472 [4:37:46<25:51:45, 3.37it/s] 16%|█▌ | 58125/371472 [4:37:47<24:28:03, 3.56it/s] 16%|█▌ | 58126/371472 [4:37:47<24:56:59, 3.49it/s] 16%|█▌ | 58127/371472 [4:37:47<25:57:35, 3.35it/s] 16%|█▌ | 58128/371472 [4:37:47<26:02:17, 3.34it/s] 16%|█▌ | 58129/371472 [4:37:48<25:09:03, 3.46it/s] 16%|█▌ | 58130/371472 [4:37:48<24:01:38, 3.62it/s] 16%|█▌ | 58131/371472 [4:37:48<25:20:59, 3.43it/s] 16%|█▌ | 58132/371472 [4:37:49<24:29:42, 3.55it/s] 16%|█▌ | 58133/371472 [4:37:49<25:40:52, 3.39it/s] 16%|█▌ | 58134/371472 [4:37:49<24:52:14, 3.50it/s] 16%|█▌ | 58135/371472 [4:37:49<24:28:42, 3.56it/s] 16%|█▌ | 58136/371472 [4:37:50<25:11:53, 3.45it/s] 16%|█▌ | 58137/371472 [4:37:50<25:02:36, 3.48it/s] 16%|█▌ | 58138/371472 [4:37:50<23:54:16, 3.64it/s] 16%|█▌ | 58139/371472 [4:37:51<23:34:38, 3.69it/s] 16%|█▌ | 58140/371472 [4:37:51<23:42:48, 3.67it/s] {'loss': 4.0786, 'learning_rate': 8.595477170376436e-07, 'epoch': 2.5} + 16%|█▌ | 58140/371472 [4:37:51<23:42:48, 3.67it/s] 16%|█▌ | 58141/371472 [4:37:51<25:11:24, 3.46it/s] 16%|█▌ | 58142/371472 [4:37:51<24:09:00, 3.60it/s] 16%|█▌ | 58143/371472 [4:37:52<24:13:00, 3.59it/s] 16%|█▌ | 58144/371472 [4:37:52<23:31:51, 3.70it/s] 16%|█▌ | 58145/371472 [4:37:52<22:50:35, 3.81it/s] 16%|█▌ | 58146/371472 [4:37:52<23:29:12, 3.71it/s] 16%|█▌ | 58147/371472 [4:37:53<23:20:38, 3.73it/s] 16%|█▌ | 58148/371472 [4:37:53<22:44:24, 3.83it/s] 16%|█▌ | 58149/371472 [4:37:53<22:42:01, 3.83it/s] 16%|█▌ | 58150/371472 [4:37:54<24:13:14, 3.59it/s] 16%|█▌ | 58151/371472 [4:37:54<24:39:25, 3.53it/s] 16%|█▌ | 58152/371472 [4:37:54<24:15:10, 3.59it/s] 16%|█▌ | 58153/371472 [4:37:54<23:19:24, 3.73it/s] 16%|█▌ | 58154/371472 [4:37:55<22:45:44, 3.82it/s] 16%|█▌ | 58155/371472 [4:37:55<22:48:35, 3.82it/s] 16%|█▌ | 58156/371472 [4:37:55<22:17:00, 3.91it/s] 16%|█▌ | 58157/371472 [4:37:55<23:45:50, 3.66it/s] 16%|█▌ | 58158/371472 [4:37:56<22:58:06, 3.79it/s] 16%|█▌ | 58159/371472 [4:37:56<24:05:53, 3.61it/s] 16%|█▌ | 58160/371472 [4:37:56<23:33:45, 3.69it/s] {'loss': 4.2704, 'learning_rate': 8.594992350621646e-07, 'epoch': 2.51} + 16%|█▌ | 58160/371472 [4:37:56<23:33:45, 3.69it/s] 16%|█▌ | 58161/371472 [4:37:56<22:51:16, 3.81it/s] 16%|█▌ | 58162/371472 [4:37:57<22:12:15, 3.92it/s] 16%|█▌ | 58163/371472 [4:37:57<22:34:05, 3.86it/s] 16%|█▌ | 58164/371472 [4:37:57<24:30:06, 3.55it/s] 16%|█▌ | 58165/371472 [4:37:58<23:57:44, 3.63it/s] 16%|█▌ | 58166/371472 [4:37:58<24:42:03, 3.52it/s] 16%|█▌ | 58167/371472 [4:37:58<24:54:03, 3.49it/s] 16%|█▌ | 58168/371472 [4:37:58<25:44:14, 3.38it/s] 16%|█▌ | 58169/371472 [4:37:59<24:36:58, 3.54it/s] 16%|█▌ | 58170/371472 [4:37:59<23:39:27, 3.68it/s] 16%|█▌ | 58171/371472 [4:37:59<23:28:36, 3.71it/s] 16%|█▌ | 58172/371472 [4:37:59<22:24:21, 3.88it/s] 16%|█▌ | 58173/371472 [4:38:00<22:36:12, 3.85it/s] 16%|█▌ | 58174/371472 [4:38:00<22:51:12, 3.81it/s] 16%|█▌ | 58175/371472 [4:38:00<22:34:45, 3.85it/s] 16%|█▌ | 58176/371472 [4:38:01<23:08:10, 3.76it/s] 16%|█▌ | 58177/371472 [4:38:01<22:43:30, 3.83it/s] 16%|█▌ | 58178/371472 [4:38:01<22:32:07, 3.86it/s] 16%|█▌ | 58179/371472 [4:38:01<22:08:58, 3.93it/s] 16%|█▌ | 58180/371472 [4:38:02<22:08:50, 3.93it/s] {'loss': 4.2644, 'learning_rate': 8.594507530866857e-07, 'epoch': 2.51} + 16%|█▌ | 58180/371472 [4:38:02<22:08:50, 3.93it/s] 16%|█▌ | 58181/371472 [4:38:02<23:56:47, 3.63it/s] 16%|█▌ | 58182/371472 [4:38:02<23:49:08, 3.65it/s] 16%|█▌ | 58183/371472 [4:38:02<23:31:26, 3.70it/s] 16%|█▌ | 58184/371472 [4:38:03<25:31:35, 3.41it/s] 16%|█▌ | 58185/371472 [4:38:03<25:31:59, 3.41it/s] 16%|█▌ | 58186/371472 [4:38:03<26:00:46, 3.35it/s] 16%|█▌ | 58187/371472 [4:38:04<24:23:53, 3.57it/s] 16%|█▌ | 58188/371472 [4:38:04<24:36:57, 3.54it/s] 16%|█▌ | 58189/371472 [4:38:04<24:34:57, 3.54it/s] 16%|█▌ | 58190/371472 [4:38:04<23:41:57, 3.67it/s] 16%|█▌ | 58191/371472 [4:38:05<23:15:13, 3.74it/s] 16%|█▌ | 58192/371472 [4:38:05<24:05:39, 3.61it/s] 16%|█▌ | 58193/371472 [4:38:05<25:16:47, 3.44it/s] 16%|█▌ | 58194/371472 [4:38:06<24:27:55, 3.56it/s] 16%|█▌ | 58195/371472 [4:38:06<23:28:45, 3.71it/s] 16%|█▌ | 58196/371472 [4:38:06<23:14:41, 3.74it/s] 16%|█▌ | 58197/371472 [4:38:06<22:43:22, 3.83it/s] 16%|█▌ | 58198/371472 [4:38:07<22:50:00, 3.81it/s] 16%|█▌ | 58199/371472 [4:38:07<22:38:33, 3.84it/s] 16%|█▌ | 58200/371472 [4:38:07<24:46:37, 3.51it/s] {'loss': 4.4116, 'learning_rate': 8.594022711112068e-07, 'epoch': 2.51} + 16%|█▌ | 58200/371472 [4:38:07<24:46:37, 3.51it/s] 16%|█▌ | 58201/371472 [4:38:07<24:16:42, 3.58it/s] 16%|█▌ | 58202/371472 [4:38:08<24:42:59, 3.52it/s] 16%|█▌ | 58203/371472 [4:38:08<24:06:50, 3.61it/s] 16%|█▌ | 58204/371472 [4:38:08<24:35:31, 3.54it/s] 16%|█▌ | 58205/371472 [4:38:09<25:13:42, 3.45it/s] 16%|█▌ | 58206/371472 [4:38:09<24:01:29, 3.62it/s] 16%|█▌ | 58207/371472 [4:38:09<23:58:58, 3.63it/s] 16%|█▌ | 58208/371472 [4:38:09<23:38:47, 3.68it/s] 16%|█▌ | 58209/371472 [4:38:10<23:18:29, 3.73it/s] 16%|█▌ | 58210/371472 [4:38:10<22:54:51, 3.80it/s] 16%|█▌ | 58211/371472 [4:38:10<23:31:41, 3.70it/s] 16%|█▌ | 58212/371472 [4:38:10<24:22:06, 3.57it/s] 16%|█▌ | 58213/371472 [4:38:11<24:29:23, 3.55it/s] 16%|█▌ | 58214/371472 [4:38:11<24:40:42, 3.53it/s] 16%|█▌ | 58215/371472 [4:38:11<24:31:20, 3.55it/s] 16%|█▌ | 58216/371472 [4:38:12<23:42:07, 3.67it/s] 16%|█▌ | 58217/371472 [4:38:12<23:01:07, 3.78it/s] 16%|█▌ | 58218/371472 [4:38:12<24:04:38, 3.61it/s] 16%|█▌ | 58219/371472 [4:38:12<24:17:51, 3.58it/s] 16%|█▌ | 58220/371472 [4:38:13<25:09:15, 3.46it/s] {'loss': 4.0617, 'learning_rate': 8.59353789135728e-07, 'epoch': 2.51} + 16%|█▌ | 58220/371472 [4:38:13<25:09:15, 3.46it/s] 16%|█▌ | 58221/371472 [4:38:13<24:23:06, 3.57it/s] 16%|█▌ | 58222/371472 [4:38:13<24:13:18, 3.59it/s] 16%|█▌ | 58223/371472 [4:38:13<23:32:39, 3.70it/s] 16%|█▌ | 58224/371472 [4:38:14<23:30:54, 3.70it/s] 16%|█▌ | 58225/371472 [4:38:14<24:18:08, 3.58it/s] 16%|█▌ | 58226/371472 [4:38:14<25:44:31, 3.38it/s] 16%|█▌ | 58227/371472 [4:38:15<24:38:04, 3.53it/s] 16%|█▌ | 58228/371472 [4:38:15<25:18:32, 3.44it/s] 16%|█▌ | 58229/371472 [4:38:15<24:28:04, 3.56it/s] 16%|█▌ | 58230/371472 [4:38:15<23:53:20, 3.64it/s] 16%|█▌ | 58231/371472 [4:38:16<24:04:56, 3.61it/s] 16%|█▌ | 58232/371472 [4:38:16<23:57:23, 3.63it/s] 16%|█▌ | 58233/371472 [4:38:16<24:07:49, 3.61it/s] 16%|█▌ | 58234/371472 [4:38:17<24:44:44, 3.52it/s] 16%|█▌ | 58235/371472 [4:38:17<24:49:14, 3.51it/s] 16%|█▌ | 58236/371472 [4:38:17<24:17:44, 3.58it/s] 16%|█▌ | 58237/371472 [4:38:17<24:56:19, 3.49it/s] 16%|█▌ | 58238/371472 [4:38:18<26:21:50, 3.30it/s] 16%|█▌ | 58239/371472 [4:38:18<25:28:02, 3.42it/s] 16%|█▌ | 58240/371472 [4:38:18<27:47:10, 3.13it/s] {'loss': 4.3488, 'learning_rate': 8.593053071602491e-07, 'epoch': 2.51} + 16%|█▌ | 58240/371472 [4:38:18<27:47:10, 3.13it/s] 16%|█▌ | 58241/371472 [4:38:19<25:37:43, 3.39it/s] 16%|█▌ | 58242/371472 [4:38:19<25:05:18, 3.47it/s] 16%|█▌ | 58243/371472 [4:38:19<24:33:59, 3.54it/s] 16%|█▌ | 58244/371472 [4:38:20<24:46:14, 3.51it/s] 16%|█▌ | 58245/371472 [4:38:20<24:18:04, 3.58it/s] 16%|█▌ | 58246/371472 [4:38:20<24:50:29, 3.50it/s] 16%|█▌ | 58247/371472 [4:38:20<23:57:17, 3.63it/s] 16%|█▌ | 58248/371472 [4:38:21<23:19:22, 3.73it/s] 16%|█▌ | 58249/371472 [4:38:21<23:46:28, 3.66it/s] 16%|█▌ | 58250/371472 [4:38:21<23:04:46, 3.77it/s] 16%|█▌ | 58251/371472 [4:38:21<23:02:23, 3.78it/s] 16%|█▌ | 58252/371472 [4:38:22<23:16:45, 3.74it/s] 16%|█▌ | 58253/371472 [4:38:22<23:49:31, 3.65it/s] 16%|█▌ | 58254/371472 [4:38:22<23:03:37, 3.77it/s] 16%|█▌ | 58255/371472 [4:38:22<23:54:07, 3.64it/s] 16%|█▌ | 58256/371472 [4:38:23<23:41:30, 3.67it/s] 16%|█▌ | 58257/371472 [4:38:23<23:06:52, 3.76it/s] 16%|█▌ | 58258/371472 [4:38:23<22:20:01, 3.90it/s] 16%|█▌ | 58259/371472 [4:38:24<24:07:47, 3.61it/s] 16%|█▌ | 58260/371472 [4:38:24<24:08:08, 3.60it/s] {'loss': 4.213, 'learning_rate': 8.592568251847701e-07, 'epoch': 2.51} + 16%|█▌ | 58260/371472 [4:38:24<24:08:08, 3.60it/s] 16%|█▌ | 58261/371472 [4:38:24<23:33:01, 3.69it/s] 16%|█▌ | 58262/371472 [4:38:24<22:59:21, 3.78it/s] 16%|█▌ | 58263/371472 [4:38:25<23:16:44, 3.74it/s] 16%|█▌ | 58264/371472 [4:38:25<24:07:14, 3.61it/s] 16%|█▌ | 58265/371472 [4:38:25<23:59:40, 3.63it/s] 16%|█▌ | 58266/371472 [4:38:25<23:11:48, 3.75it/s] 16%|█▌ | 58267/371472 [4:38:26<24:08:04, 3.60it/s] 16%|█▌ | 58268/371472 [4:38:26<22:54:28, 3.80it/s] 16%|█▌ | 58269/371472 [4:38:26<23:20:09, 3.73it/s] 16%|█▌ | 58270/371472 [4:38:26<22:52:31, 3.80it/s] 16%|█▌ | 58271/371472 [4:38:27<22:39:11, 3.84it/s] 16%|█▌ | 58272/371472 [4:38:27<23:09:31, 3.76it/s] 16%|█▌ | 58273/371472 [4:38:27<23:42:50, 3.67it/s] 16%|█▌ | 58274/371472 [4:38:28<22:53:35, 3.80it/s] 16%|█▌ | 58275/371472 [4:38:28<22:20:52, 3.89it/s] 16%|█▌ | 58276/371472 [4:38:28<21:59:13, 3.96it/s] 16%|█▌ | 58277/371472 [4:38:28<22:55:27, 3.80it/s] 16%|█▌ | 58278/371472 [4:38:29<22:52:51, 3.80it/s] 16%|█▌ | 58279/371472 [4:38:29<22:55:13, 3.80it/s] 16%|█▌ | 58280/371472 [4:38:29<23:33:48, 3.69it/s] {'loss': 4.5169, 'learning_rate': 8.592083432092912e-07, 'epoch': 2.51} + 16%|█▌ | 58280/371472 [4:38:29<23:33:48, 3.69it/s] 16%|█▌ | 58281/371472 [4:38:29<22:51:53, 3.80it/s] 16%|█▌ | 58282/371472 [4:38:30<25:23:07, 3.43it/s] 16%|█▌ | 58283/371472 [4:38:30<26:34:08, 3.27it/s] 16%|█▌ | 58284/371472 [4:38:30<25:14:41, 3.45it/s] 16%|█▌ | 58285/371472 [4:38:31<23:57:21, 3.63it/s] 16%|█▌ | 58286/371472 [4:38:31<23:32:15, 3.70it/s] 16%|█▌ | 58287/371472 [4:38:31<24:04:17, 3.61it/s] 16%|█▌ | 58288/371472 [4:38:31<23:52:18, 3.64it/s] 16%|█▌ | 58289/371472 [4:38:32<23:50:21, 3.65it/s] 16%|█▌ | 58290/371472 [4:38:32<24:09:15, 3.60it/s] 16%|█▌ | 58291/371472 [4:38:32<23:40:45, 3.67it/s] 16%|█▌ | 58292/371472 [4:38:32<23:26:12, 3.71it/s] 16%|█▌ | 58293/371472 [4:38:33<23:24:59, 3.72it/s] 16%|█▌ | 58294/371472 [4:38:33<23:17:07, 3.74it/s] 16%|█▌ | 58295/371472 [4:38:33<23:43:24, 3.67it/s] 16%|█▌ | 58296/371472 [4:38:34<25:18:01, 3.44it/s] 16%|█▌ | 58297/371472 [4:38:34<23:46:16, 3.66it/s] 16%|█▌ | 58298/371472 [4:38:34<23:25:00, 3.71it/s] 16%|█▌ | 58299/371472 [4:38:34<23:37:13, 3.68it/s] 16%|█▌ | 58300/371472 [4:38:35<24:43:49, 3.52it/s] {'loss': 4.0503, 'learning_rate': 8.591598612338124e-07, 'epoch': 2.51} + 16%|█▌ | 58300/371472 [4:38:35<24:43:49, 3.52it/s] 16%|█▌ | 58301/371472 [4:38:35<27:04:00, 3.21it/s] 16%|█▌ | 58302/371472 [4:38:35<27:30:05, 3.16it/s] 16%|█▌ | 58303/371472 [4:38:36<26:50:57, 3.24it/s] 16%|█▌ | 58304/371472 [4:38:36<25:05:17, 3.47it/s] 16%|█▌ | 58305/371472 [4:38:36<23:51:39, 3.65it/s] 16%|█▌ | 58306/371472 [4:38:36<24:10:33, 3.60it/s] 16%|█▌ | 58307/371472 [4:38:37<23:55:23, 3.64it/s] 16%|█▌ | 58308/371472 [4:38:37<24:27:53, 3.56it/s] 16%|█▌ | 58309/371472 [4:38:37<25:10:21, 3.46it/s] 16%|█▌ | 58310/371472 [4:38:38<26:15:11, 3.31it/s] 16%|█▌ | 58311/371472 [4:38:38<29:43:41, 2.93it/s] 16%|█▌ | 58312/371472 [4:38:38<27:54:27, 3.12it/s] 16%|█▌ | 58313/371472 [4:38:39<26:42:25, 3.26it/s] 16%|█▌ | 58314/371472 [4:38:39<28:40:01, 3.03it/s] 16%|█▌ | 58315/371472 [4:38:39<28:12:31, 3.08it/s] 16%|█▌ | 58316/371472 [4:38:40<27:59:50, 3.11it/s] 16%|█▌ | 58317/371472 [4:38:40<29:27:00, 2.95it/s] 16%|█▌ | 58318/371472 [4:38:40<29:18:12, 2.97it/s] 16%|█▌ | 58319/371472 [4:38:41<27:47:26, 3.13it/s] 16%|█▌ | 58320/371472 [4:38:41<28:30:41, 3.05it/s] {'loss': 3.8825, 'learning_rate': 8.591113792583334e-07, 'epoch': 2.51} + 16%|█▌ | 58320/371472 [4:38:41<28:30:41, 3.05it/s] 16%|█▌ | 58321/371472 [4:38:41<26:42:08, 3.26it/s] 16%|█▌ | 58322/371472 [4:38:42<25:48:10, 3.37it/s] 16%|█▌ | 58323/371472 [4:38:42<24:59:06, 3.48it/s] 16%|█▌ | 58324/371472 [4:38:42<24:16:23, 3.58it/s] 16%|█▌ | 58325/371472 [4:38:42<23:45:29, 3.66it/s] 16%|█▌ | 58326/371472 [4:38:43<23:45:00, 3.66it/s] 16%|█▌ | 58327/371472 [4:38:43<23:14:19, 3.74it/s] 16%|█▌ | 58328/371472 [4:38:43<22:31:29, 3.86it/s] 16%|█▌ | 58329/371472 [4:38:43<24:57:18, 3.49it/s] 16%|█▌ | 58330/371472 [4:38:44<24:36:10, 3.54it/s] 16%|█▌ | 58331/371472 [4:38:44<23:49:23, 3.65it/s] 16%|█▌ | 58332/371472 [4:38:44<24:12:54, 3.59it/s] 16%|█▌ | 58333/371472 [4:38:45<23:52:21, 3.64it/s] 16%|█▌ | 58334/371472 [4:38:45<27:11:25, 3.20it/s] 16%|█▌ | 58335/371472 [4:38:45<26:18:41, 3.31it/s] 16%|█▌ | 58336/371472 [4:38:46<28:40:09, 3.03it/s] 16%|█▌ | 58337/371472 [4:38:46<27:30:57, 3.16it/s] 16%|█▌ | 58338/371472 [4:38:46<25:35:36, 3.40it/s] 16%|█▌ | 58339/371472 [4:38:46<25:07:06, 3.46it/s] 16%|█▌ | 58340/371472 [4:38:47<24:48:47, 3.51it/s] {'loss': 4.281, 'learning_rate': 8.590628972828545e-07, 'epoch': 2.51} + 16%|█▌ | 58340/371472 [4:38:47<24:48:47, 3.51it/s] 16%|█▌ | 58341/371472 [4:38:47<25:56:40, 3.35it/s] 16%|█▌ | 58342/371472 [4:38:47<24:47:47, 3.51it/s] 16%|█▌ | 58343/371472 [4:38:48<24:06:31, 3.61it/s] 16%|█▌ | 58344/371472 [4:38:48<23:14:08, 3.74it/s] 16%|█▌ | 58345/371472 [4:38:48<25:01:53, 3.47it/s] 16%|█▌ | 58346/371472 [4:38:48<27:11:26, 3.20it/s] 16%|█▌ | 58347/371472 [4:38:49<25:50:18, 3.37it/s] 16%|█▌ | 58348/371472 [4:38:49<24:54:48, 3.49it/s] 16%|█▌ | 58349/371472 [4:38:49<24:59:11, 3.48it/s] 16%|█▌ | 58350/371472 [4:38:50<23:42:23, 3.67it/s] 16%|█▌ | 58351/371472 [4:38:50<25:29:59, 3.41it/s] 16%|█▌ | 58352/371472 [4:38:50<24:14:42, 3.59it/s] 16%|█▌ | 58353/371472 [4:38:50<23:45:01, 3.66it/s] 16%|█▌ | 58354/371472 [4:38:51<23:26:33, 3.71it/s] 16%|█▌ | 58355/371472 [4:38:51<25:04:04, 3.47it/s] 16%|█▌ | 58356/371472 [4:38:51<27:13:58, 3.19it/s] 16%|█▌ | 58357/371472 [4:38:52<26:52:52, 3.24it/s] 16%|█▌ | 58358/371472 [4:38:52<26:29:06, 3.28it/s] 16%|█▌ | 58359/371472 [4:38:52<25:20:03, 3.43it/s] 16%|█▌ | 58360/371472 [4:38:52<25:18:33, 3.44it/s] {'loss': 4.2192, 'learning_rate': 8.590144153073757e-07, 'epoch': 2.51} + 16%|█▌ | 58360/371472 [4:38:52<25:18:33, 3.44it/s] 16%|█▌ | 58361/371472 [4:38:53<25:39:24, 3.39it/s] 16%|█▌ | 58362/371472 [4:38:53<24:46:31, 3.51it/s] 16%|█▌ | 58363/371472 [4:38:53<24:21:59, 3.57it/s] 16%|█▌ | 58364/371472 [4:38:54<24:16:46, 3.58it/s] 16%|█▌ | 58365/371472 [4:38:54<24:58:42, 3.48it/s] 16%|█▌ | 58366/371472 [4:38:54<24:32:46, 3.54it/s] 16%|█▌ | 58367/371472 [4:38:54<23:54:52, 3.64it/s] 16%|█▌ | 58368/371472 [4:38:55<24:56:44, 3.49it/s] 16%|█▌ | 58369/371472 [4:38:55<24:26:13, 3.56it/s] 16%|█▌ | 58370/371472 [4:38:55<25:43:33, 3.38it/s] 16%|█▌ | 58371/371472 [4:38:56<25:49:52, 3.37it/s] 16%|█▌ | 58372/371472 [4:38:56<25:47:34, 3.37it/s] 16%|█▌ | 58373/371472 [4:38:56<25:51:44, 3.36it/s] 16%|█▌ | 58374/371472 [4:38:57<25:31:39, 3.41it/s] 16%|█▌ | 58375/371472 [4:38:57<25:02:20, 3.47it/s] 16%|█▌ | 58376/371472 [4:38:57<24:50:57, 3.50it/s] 16%|█▌ | 58377/371472 [4:38:57<24:20:53, 3.57it/s] 16%|█▌ | 58378/371472 [4:38:58<23:42:09, 3.67it/s] 16%|█▌ | 58379/371472 [4:38:58<23:22:48, 3.72it/s] 16%|█▌ | 58380/371472 [4:38:58<24:46:13, 3.51it/s] {'loss': 4.2605, 'learning_rate': 8.58965933331897e-07, 'epoch': 2.51} + 16%|█▌ | 58380/371472 [4:38:58<24:46:13, 3.51it/s] 16%|█▌ | 58381/371472 [4:38:58<24:35:21, 3.54it/s] 16%|█▌ | 58382/371472 [4:38:59<24:41:27, 3.52it/s] 16%|█▌ | 58383/371472 [4:38:59<24:06:02, 3.61it/s] 16%|█▌ | 58384/371472 [4:38:59<23:44:49, 3.66it/s] 16%|█▌ | 58385/371472 [4:39:00<23:42:26, 3.67it/s] 16%|█▌ | 58386/371472 [4:39:00<24:39:47, 3.53it/s] 16%|█▌ | 58387/371472 [4:39:00<24:23:57, 3.56it/s] 16%|█▌ | 58388/371472 [4:39:00<24:01:08, 3.62it/s] 16%|█▌ | 58389/371472 [4:39:01<24:00:31, 3.62it/s] 16%|█▌ | 58390/371472 [4:39:01<23:21:11, 3.72it/s] 16%|█▌ | 58391/371472 [4:39:01<24:26:11, 3.56it/s] 16%|█▌ | 58392/371472 [4:39:02<24:29:18, 3.55it/s] 16%|█▌ | 58393/371472 [4:39:02<24:33:03, 3.54it/s] 16%|█▌ | 58394/371472 [4:39:02<24:02:59, 3.62it/s] 16%|█▌ | 58395/371472 [4:39:02<26:59:40, 3.22it/s] 16%|█▌ | 58396/371472 [4:39:03<28:43:42, 3.03it/s] 16%|█▌ | 58397/371472 [4:39:03<28:30:29, 3.05it/s] 16%|█▌ | 58398/371472 [4:39:03<28:12:03, 3.08it/s] 16%|█▌ | 58399/371472 [4:39:04<27:04:59, 3.21it/s] 16%|█▌ | 58400/371472 [4:39:04<25:50:52, 3.36it/s] {'loss': 4.209, 'learning_rate': 8.58917451356418e-07, 'epoch': 2.52} + 16%|█▌ | 58400/371472 [4:39:04<25:50:52, 3.36it/s] 16%|█▌ | 58401/371472 [4:39:04<24:52:54, 3.50it/s] 16%|█▌ | 58402/371472 [4:39:05<25:44:52, 3.38it/s] 16%|█▌ | 58403/371472 [4:39:05<24:52:59, 3.49it/s] 16%|█▌ | 58404/371472 [4:39:05<26:45:48, 3.25it/s] 16%|█▌ | 58405/371472 [4:39:06<27:02:28, 3.22it/s] 16%|█▌ | 58406/371472 [4:39:06<25:41:44, 3.38it/s] 16%|█▌ | 58407/371472 [4:39:06<26:04:43, 3.33it/s] 16%|█▌ | 58408/371472 [4:39:06<24:39:25, 3.53it/s] 16%|█▌ | 58409/371472 [4:39:07<24:00:16, 3.62it/s] 16%|█▌ | 58410/371472 [4:39:07<24:31:57, 3.54it/s] 16%|█▌ | 58411/371472 [4:39:07<23:53:02, 3.64it/s] 16%|█▌ | 58412/371472 [4:39:07<23:18:27, 3.73it/s] 16%|█▌ | 58413/371472 [4:39:08<22:36:56, 3.85it/s] 16%|█▌ | 58414/371472 [4:39:08<23:21:45, 3.72it/s] 16%|█▌ | 58415/371472 [4:39:08<24:06:18, 3.61it/s] 16%|█▌ | 58416/371472 [4:39:09<24:47:08, 3.51it/s] 16%|█▌ | 58417/371472 [4:39:09<27:16:31, 3.19it/s] 16%|█▌ | 58418/371472 [4:39:09<26:05:19, 3.33it/s] 16%|█▌ | 58419/371472 [4:39:09<25:53:52, 3.36it/s] 16%|█▌ | 58420/371472 [4:39:10<27:03:40, 3.21it/s] {'loss': 4.2857, 'learning_rate': 8.588689693809389e-07, 'epoch': 2.52} + 16%|█▌ | 58420/371472 [4:39:10<27:03:40, 3.21it/s] 16%|█▌ | 58421/371472 [4:39:10<27:35:27, 3.15it/s] 16%|█▌ | 58422/371472 [4:39:10<25:54:44, 3.36it/s] 16%|█▌ | 58423/371472 [4:39:11<25:07:00, 3.46it/s] 16%|█▌ | 58424/371472 [4:39:11<24:01:17, 3.62it/s] 16%|█▌ | 58425/371472 [4:39:11<23:26:55, 3.71it/s] 16%|█▌ | 58426/371472 [4:39:11<22:46:38, 3.82it/s] 16%|█▌ | 58427/371472 [4:39:12<24:10:30, 3.60it/s] 16%|█▌ | 58428/371472 [4:39:12<23:58:47, 3.63it/s] 16%|█▌ | 58429/371472 [4:39:12<24:18:12, 3.58it/s] 16%|█▌ | 58430/371472 [4:39:13<24:12:26, 3.59it/s] 16%|█▌ | 58431/371472 [4:39:13<23:10:06, 3.75it/s] 16%|█▌ | 58432/371472 [4:39:13<23:04:45, 3.77it/s] 16%|█▌ | 58433/371472 [4:39:13<22:25:28, 3.88it/s] 16%|█▌ | 58434/371472 [4:39:14<22:55:45, 3.79it/s] 16%|█▌ | 58435/371472 [4:39:14<28:10:29, 3.09it/s] 16%|█▌ | 58436/371472 [4:39:14<25:44:44, 3.38it/s] 16%|█▌ | 58437/371472 [4:39:15<25:10:58, 3.45it/s] 16%|█▌ | 58438/371472 [4:39:15<25:07:47, 3.46it/s] 16%|█▌ | 58439/371472 [4:39:15<24:38:21, 3.53it/s] 16%|█▌ | 58440/371472 [4:39:15<25:47:54, 3.37it/s] {'loss': 4.1435, 'learning_rate': 8.588204874054601e-07, 'epoch': 2.52} + 16%|█▌ | 58440/371472 [4:39:15<25:47:54, 3.37it/s] 16%|█▌ | 58441/371472 [4:39:16<25:00:12, 3.48it/s] 16%|█▌ | 58442/371472 [4:39:16<24:20:02, 3.57it/s] 16%|█▌ | 58443/371472 [4:39:16<24:39:42, 3.53it/s] 16%|█▌ | 58444/371472 [4:39:17<24:56:29, 3.49it/s] 16%|█▌ | 58445/371472 [4:39:17<24:24:20, 3.56it/s] 16%|█▌ | 58446/371472 [4:39:17<23:49:05, 3.65it/s] 16%|█▌ | 58447/371472 [4:39:17<24:44:58, 3.51it/s] 16%|█▌ | 58448/371472 [4:39:18<24:08:51, 3.60it/s] 16%|█▌ | 58449/371472 [4:39:18<23:31:27, 3.70it/s] 16%|█▌ | 58450/371472 [4:39:18<24:45:43, 3.51it/s] 16%|█▌ | 58451/371472 [4:39:18<23:18:59, 3.73it/s] 16%|█▌ | 58452/371472 [4:39:19<23:24:01, 3.72it/s] 16%|█▌ | 58453/371472 [4:39:19<23:49:36, 3.65it/s] 16%|█▌ | 58454/371472 [4:39:19<23:01:01, 3.78it/s] 16%|█▌ | 58455/371472 [4:39:19<22:17:36, 3.90it/s] 16%|█▌ | 58456/371472 [4:39:20<22:15:07, 3.91it/s] 16%|█▌ | 58457/371472 [4:39:20<22:06:07, 3.93it/s] 16%|█▌ | 58458/371472 [4:39:20<22:08:00, 3.93it/s] 16%|█▌ | 58459/371472 [4:39:21<24:26:56, 3.56it/s] 16%|█▌ | 58460/371472 [4:39:21<23:30:15, 3.70it/s] {'loss': 4.3966, 'learning_rate': 8.587720054299813e-07, 'epoch': 2.52} + 16%|█▌ | 58460/371472 [4:39:21<23:30:15, 3.70it/s] 16%|█▌ | 58461/371472 [4:39:21<23:30:07, 3.70it/s] 16%|█▌ | 58462/371472 [4:39:21<22:47:45, 3.81it/s] 16%|█▌ | 58463/371472 [4:39:22<22:18:32, 3.90it/s] 16%|█▌ | 58464/371472 [4:39:22<22:06:57, 3.93it/s] 16%|█▌ | 58465/371472 [4:39:22<24:15:53, 3.58it/s] 16%|█▌ | 58466/371472 [4:39:22<25:22:51, 3.43it/s] 16%|█▌ | 58467/371472 [4:39:23<24:31:27, 3.55it/s] 16%|█▌ | 58468/371472 [4:39:23<24:18:14, 3.58it/s] 16%|█▌ | 58469/371472 [4:39:23<24:51:25, 3.50it/s] 16%|█▌ | 58470/371472 [4:39:24<23:21:27, 3.72it/s] 16%|█▌ | 58471/371472 [4:39:24<23:20:11, 3.73it/s] 16%|█▌ | 58472/371472 [4:39:24<23:41:54, 3.67it/s] 16%|█▌ | 58473/371472 [4:39:24<24:34:03, 3.54it/s] 16%|█▌ | 58474/371472 [4:39:25<23:51:18, 3.64it/s] 16%|█▌ | 58475/371472 [4:39:25<23:13:41, 3.74it/s] 16%|█▌ | 58476/371472 [4:39:25<26:13:32, 3.32it/s] 16%|█▌ | 58477/371472 [4:39:26<25:13:26, 3.45it/s] 16%|█▌ | 58478/371472 [4:39:26<24:35:14, 3.54it/s] 16%|█▌ | 58479/371472 [4:39:26<24:20:15, 3.57it/s] 16%|█▌ | 58480/371472 [4:39:26<24:41:16, 3.52it/s] {'loss': 4.1411, 'learning_rate': 8.587235234545023e-07, 'epoch': 2.52} + 16%|█▌ | 58480/371472 [4:39:26<24:41:16, 3.52it/s] 16%|█▌ | 58481/371472 [4:39:27<24:53:10, 3.49it/s] 16%|█▌ | 58482/371472 [4:39:27<24:17:08, 3.58it/s] 16%|█▌ | 58483/371472 [4:39:27<23:29:13, 3.70it/s] 16%|█▌ | 58484/371472 [4:39:28<25:58:59, 3.35it/s] 16%|█▌ | 58485/371472 [4:39:28<26:10:21, 3.32it/s] 16%|█▌ | 58486/371472 [4:39:28<26:14:03, 3.31it/s] 16%|█▌ | 58487/371472 [4:39:28<25:16:13, 3.44it/s] 16%|█▌ | 58488/371472 [4:39:29<24:03:54, 3.61it/s] 16%|█▌ | 58489/371472 [4:39:29<26:03:19, 3.34it/s] 16%|█▌ | 58490/371472 [4:39:29<25:51:05, 3.36it/s] 16%|█▌ | 58491/371472 [4:39:30<25:38:40, 3.39it/s] 16%|█▌ | 58492/371472 [4:39:30<27:03:39, 3.21it/s] 16%|█▌ | 58493/371472 [4:39:30<25:23:44, 3.42it/s] 16%|█▌ | 58494/371472 [4:39:30<25:06:18, 3.46it/s] 16%|█▌ | 58495/371472 [4:39:31<24:28:20, 3.55it/s] 16%|█▌ | 58496/371472 [4:39:31<25:21:02, 3.43it/s] 16%|█▌ | 58497/371472 [4:39:31<24:32:53, 3.54it/s] 16%|█▌ | 58498/371472 [4:39:32<23:58:20, 3.63it/s] 16%|█▌ | 58499/371472 [4:39:32<24:03:33, 3.61it/s] 16%|█▌ | 58500/371472 [4:39:32<23:54:46, 3.64it/s] {'loss': 4.2864, 'learning_rate': 8.586750414790234e-07, 'epoch': 2.52} + 16%|█▌ | 58500/371472 [4:39:32<23:54:46, 3.64it/s] 16%|█▌ | 58501/371472 [4:39:32<23:55:18, 3.63it/s] 16%|█▌ | 58502/371472 [4:39:33<22:59:52, 3.78it/s] 16%|█▌ | 58503/371472 [4:39:33<24:13:07, 3.59it/s] 16%|█▌ | 58504/371472 [4:39:33<23:58:34, 3.63it/s] 16%|█▌ | 58505/371472 [4:39:34<25:11:22, 3.45it/s] 16%|█▌ | 58506/371472 [4:39:34<24:45:08, 3.51it/s] 16%|█▌ | 58507/371472 [4:39:34<24:09:29, 3.60it/s] 16%|█▌ | 58508/371472 [4:39:34<24:07:59, 3.60it/s] 16%|█▌ | 58509/371472 [4:39:35<25:05:00, 3.47it/s] 16%|█▌ | 58510/371472 [4:39:35<24:34:10, 3.54it/s] 16%|█▌ | 58511/371472 [4:39:35<24:20:27, 3.57it/s] 16%|█▌ | 58512/371472 [4:39:36<25:55:55, 3.35it/s] 16%|█▌ | 58513/371472 [4:39:36<26:24:14, 3.29it/s] 16%|█▌ | 58514/371472 [4:39:36<25:06:27, 3.46it/s] 16%|█▌ | 58515/371472 [4:39:36<24:20:21, 3.57it/s] 16%|█▌ | 58516/371472 [4:39:37<24:03:10, 3.61it/s] 16%|█▌ | 58517/371472 [4:39:37<24:27:50, 3.55it/s] 16%|█▌ | 58518/371472 [4:39:37<24:48:24, 3.50it/s] 16%|█▌ | 58519/371472 [4:39:38<24:43:34, 3.52it/s] 16%|█▌ | 58520/371472 [4:39:38<25:29:27, 3.41it/s] {'loss': 4.035, 'learning_rate': 8.586265595035446e-07, 'epoch': 2.52} + 16%|█▌ | 58520/371472 [4:39:38<25:29:27, 3.41it/s] 16%|█▌ | 58521/371472 [4:39:38<24:51:32, 3.50it/s] 16%|█▌ | 58522/371472 [4:39:38<25:30:19, 3.41it/s] 16%|█▌ | 58523/371472 [4:39:39<24:50:41, 3.50it/s] 16%|█▌ | 58524/371472 [4:39:39<23:36:30, 3.68it/s] 16%|█▌ | 58525/371472 [4:39:39<24:02:35, 3.62it/s] 16%|█▌ | 58526/371472 [4:39:40<24:48:46, 3.50it/s] 16%|█▌ | 58527/371472 [4:39:40<23:51:12, 3.64it/s] 16%|█▌ | 58528/371472 [4:39:40<22:56:23, 3.79it/s] 16%|█▌ | 58529/371472 [4:39:40<24:18:26, 3.58it/s] 16%|█▌ | 58530/371472 [4:39:41<24:23:28, 3.56it/s] 16%|█▌ | 58531/371472 [4:39:41<24:12:40, 3.59it/s] 16%|█▌ | 58532/371472 [4:39:41<23:55:27, 3.63it/s] 16%|█▌ | 58533/371472 [4:39:41<23:36:55, 3.68it/s] 16%|█▌ | 58534/371472 [4:39:42<23:37:54, 3.68it/s] 16%|█▌ | 58535/371472 [4:39:42<22:53:56, 3.80it/s] 16%|█▌ | 58536/371472 [4:39:42<24:24:26, 3.56it/s] 16%|█▌ | 58537/371472 [4:39:43<23:18:25, 3.73it/s] 16%|█▌ | 58538/371472 [4:39:43<23:12:04, 3.75it/s] 16%|█▌ | 58539/371472 [4:39:43<23:41:27, 3.67it/s] 16%|█▌ | 58540/371472 [4:39:43<23:21:55, 3.72it/s] {'loss': 4.0988, 'learning_rate': 8.585780775280656e-07, 'epoch': 2.52} + 16%|█▌ | 58540/371472 [4:39:43<23:21:55, 3.72it/s] 16%|█▌ | 58541/371472 [4:39:44<23:49:03, 3.65it/s] 16%|█▌ | 58542/371472 [4:39:44<23:31:28, 3.70it/s] 16%|█▌ | 58543/371472 [4:39:44<24:38:01, 3.53it/s] 16%|█▌ | 58544/371472 [4:39:44<24:33:11, 3.54it/s] 16%|█▌ | 58545/371472 [4:39:45<24:37:01, 3.53it/s] 16%|█▌ | 58546/371472 [4:39:45<25:27:39, 3.41it/s] 16%|█▌ | 58547/371472 [4:39:45<25:19:28, 3.43it/s] 16%|█▌ | 58548/371472 [4:39:46<24:49:46, 3.50it/s] 16%|█▌ | 58549/371472 [4:39:46<25:22:17, 3.43it/s] 16%|█▌ | 58550/371472 [4:39:46<24:14:52, 3.58it/s] 16%|█▌ | 58551/371472 [4:39:46<23:51:45, 3.64it/s] 16%|█▌ | 58552/371472 [4:39:47<25:05:42, 3.46it/s] 16%|█▌ | 58553/371472 [4:39:47<24:44:36, 3.51it/s] 16%|█▌ | 58554/371472 [4:39:47<24:23:20, 3.56it/s] 16%|█▌ | 58555/371472 [4:39:48<24:06:30, 3.61it/s] 16%|█▌ | 58556/371472 [4:39:48<23:24:35, 3.71it/s] 16%|█▌ | 58557/371472 [4:39:48<22:44:25, 3.82it/s] 16%|█▌ | 58558/371472 [4:39:48<23:49:26, 3.65it/s] 16%|█▌ | 58559/371472 [4:39:49<23:24:27, 3.71it/s] 16%|█▌ | 58560/371472 [4:39:49<22:53:34, 3.80it/s] {'loss': 4.2534, 'learning_rate': 8.585295955525867e-07, 'epoch': 2.52} + 16%|█▌ | 58560/371472 [4:39:49<22:53:34, 3.80it/s] 16%|█▌ | 58561/371472 [4:39:49<22:40:28, 3.83it/s] 16%|█▌ | 58562/371472 [4:39:49<22:42:16, 3.83it/s] 16%|█▌ | 58563/371472 [4:39:50<23:28:33, 3.70it/s] 16%|█▌ | 58564/371472 [4:39:50<24:22:27, 3.57it/s] 16%|█▌ | 58565/371472 [4:39:50<24:06:08, 3.61it/s] 16%|█▌ | 58566/371472 [4:39:51<24:50:16, 3.50it/s] 16%|█▌ | 58567/371472 [4:39:51<24:10:28, 3.60it/s] 16%|█▌ | 58568/371472 [4:39:51<24:10:23, 3.60it/s] 16%|█▌ | 58569/371472 [4:39:51<24:26:11, 3.56it/s] 16%|█▌ | 58570/371472 [4:39:52<24:36:09, 3.53it/s] 16%|█▌ | 58571/371472 [4:39:52<24:38:00, 3.53it/s] 16%|█▌ | 58572/371472 [4:39:52<24:25:51, 3.56it/s] 16%|█▌ | 58573/371472 [4:39:53<24:33:33, 3.54it/s] 16%|█▌ | 58574/371472 [4:39:53<25:51:41, 3.36it/s] 16%|█▌ | 58575/371472 [4:39:53<24:55:26, 3.49it/s] 16%|█▌ | 58576/371472 [4:39:53<24:50:44, 3.50it/s] 16%|█▌ | 58577/371472 [4:39:54<23:54:31, 3.64it/s] 16%|█▌ | 58578/371472 [4:39:54<24:18:57, 3.57it/s] 16%|█▌ | 58579/371472 [4:39:54<23:18:36, 3.73it/s] 16%|█▌ | 58580/371472 [4:39:54<23:12:25, 3.75it/s] {'loss': 4.1708, 'learning_rate': 8.584811135771078e-07, 'epoch': 2.52} + 16%|█▌ | 58580/371472 [4:39:54<23:12:25, 3.75it/s] 16%|█▌ | 58581/371472 [4:39:55<23:46:42, 3.66it/s] 16%|█▌ | 58582/371472 [4:39:55<23:22:49, 3.72it/s] 16%|█▌ | 58583/371472 [4:39:55<23:38:15, 3.68it/s] 16%|█▌ | 58584/371472 [4:39:56<24:01:32, 3.62it/s] 16%|█▌ | 58585/371472 [4:39:56<25:35:11, 3.40it/s] 16%|█▌ | 58586/371472 [4:39:56<25:07:28, 3.46it/s] 16%|█▌ | 58587/371472 [4:39:56<24:47:19, 3.51it/s] 16%|█▌ | 58588/371472 [4:39:57<25:57:24, 3.35it/s] 16%|█▌ | 58589/371472 [4:39:57<26:32:41, 3.27it/s] 16%|█▌ | 58590/371472 [4:39:57<24:33:08, 3.54it/s] 16%|█▌ | 58591/371472 [4:39:58<25:45:22, 3.37it/s] 16%|█▌ | 58592/371472 [4:39:58<25:03:22, 3.47it/s] 16%|█▌ | 58593/371472 [4:39:58<24:50:33, 3.50it/s] 16%|█▌ | 58594/371472 [4:39:58<23:41:54, 3.67it/s] 16%|█▌ | 58595/371472 [4:39:59<22:52:27, 3.80it/s] 16%|█▌ | 58596/371472 [4:39:59<22:58:12, 3.78it/s] 16%|█▌ | 58597/371472 [4:39:59<23:44:13, 3.66it/s] 16%|█▌ | 58598/371472 [4:40:00<24:29:26, 3.55it/s] 16%|█▌ | 58599/371472 [4:40:00<24:14:06, 3.59it/s] 16%|█▌ | 58600/371472 [4:40:00<24:05:11, 3.61it/s] {'loss': 4.3406, 'learning_rate': 8.58432631601629e-07, 'epoch': 2.52} + 16%|█▌ | 58600/371472 [4:40:00<24:05:11, 3.61it/s] 16%|█▌ | 58601/371472 [4:40:00<23:20:52, 3.72it/s] 16%|█▌ | 58602/371472 [4:40:01<25:11:52, 3.45it/s] 16%|█▌ | 58603/371472 [4:40:01<24:55:46, 3.49it/s] 16%|█▌ | 58604/371472 [4:40:01<28:18:04, 3.07it/s] 16%|█▌ | 58605/371472 [4:40:02<26:31:57, 3.28it/s] 16%|█▌ | 58606/371472 [4:40:02<24:53:14, 3.49it/s] 16%|█▌ | 58607/371472 [4:40:02<24:01:12, 3.62it/s] 16%|█▌ | 58608/371472 [4:40:02<24:43:17, 3.52it/s] 16%|█▌ | 58609/371472 [4:40:03<24:28:17, 3.55it/s] 16%|█▌ | 58610/371472 [4:40:03<24:03:36, 3.61it/s] 16%|█▌ | 58611/371472 [4:40:03<23:59:26, 3.62it/s] 16%|█▌ | 58612/371472 [4:40:04<26:52:41, 3.23it/s] 16%|█▌ | 58613/371472 [4:40:04<25:52:42, 3.36it/s] 16%|█▌ | 58614/371472 [4:40:04<25:00:05, 3.48it/s] 16%|█▌ | 58615/371472 [4:40:04<25:29:39, 3.41it/s] 16%|█▌ | 58616/371472 [4:40:05<25:54:46, 3.35it/s] 16%|█▌ | 58617/371472 [4:40:05<24:49:10, 3.50it/s] 16%|█▌ | 58618/371472 [4:40:05<23:51:00, 3.64it/s] 16%|█▌ | 58619/371472 [4:40:06<24:17:41, 3.58it/s] 16%|█▌ | 58620/371472 [4:40:06<24:19:57, 3.57it/s] {'loss': 4.4411, 'learning_rate': 8.5838414962615e-07, 'epoch': 2.52} + 16%|█▌ | 58620/371472 [4:40:06<24:19:57, 3.57it/s] 16%|█▌ | 58621/371472 [4:40:06<25:56:55, 3.35it/s] 16%|█▌ | 58622/371472 [4:40:06<24:15:50, 3.58it/s] 16%|█▌ | 58623/371472 [4:40:07<25:36:58, 3.39it/s] 16%|█▌ | 58624/371472 [4:40:07<24:56:12, 3.48it/s] 16%|█▌ | 58625/371472 [4:40:07<25:20:07, 3.43it/s] 16%|█▌ | 58626/371472 [4:40:08<24:27:05, 3.55it/s] 16%|█▌ | 58627/371472 [4:40:08<24:17:12, 3.58it/s] 16%|█▌ | 58628/371472 [4:40:08<24:14:18, 3.59it/s] 16%|█▌ | 58629/371472 [4:40:08<23:51:37, 3.64it/s] 16%|█▌ | 58630/371472 [4:40:09<24:45:54, 3.51it/s] 16%|█▌ | 58631/371472 [4:40:09<24:55:43, 3.49it/s] 16%|█▌ | 58632/371472 [4:40:09<25:06:13, 3.46it/s] 16%|█▌ | 58633/371472 [4:40:10<24:38:58, 3.53it/s] 16%|█▌ | 58634/371472 [4:40:10<24:24:15, 3.56it/s] 16%|█▌ | 58635/371472 [4:40:10<24:09:51, 3.60it/s] 16%|█▌ | 58636/371472 [4:40:10<24:25:46, 3.56it/s] 16%|█▌ | 58637/371472 [4:40:11<26:06:11, 3.33it/s] 16%|█▌ | 58638/371472 [4:40:11<25:50:18, 3.36it/s] 16%|█▌ | 58639/371472 [4:40:11<25:52:37, 3.36it/s] 16%|█▌ | 58640/371472 [4:40:12<25:35:09, 3.40it/s] {'loss': 4.1217, 'learning_rate': 8.583356676506712e-07, 'epoch': 2.53} + 16%|█▌ | 58640/371472 [4:40:12<25:35:09, 3.40it/s] 16%|█▌ | 58641/371472 [4:40:12<25:22:48, 3.42it/s] 16%|█▌ | 58642/371472 [4:40:12<24:47:06, 3.51it/s] 16%|█▌ | 58643/371472 [4:40:12<24:13:02, 3.59it/s] 16%|█▌ | 58644/371472 [4:40:13<24:06:38, 3.60it/s] 16%|█▌ | 58645/371472 [4:40:13<24:02:01, 3.62it/s] 16%|█▌ | 58646/371472 [4:40:13<23:34:37, 3.69it/s] 16%|█▌ | 58647/371472 [4:40:14<23:47:31, 3.65it/s] 16%|█▌ | 58648/371472 [4:40:14<23:17:01, 3.73it/s] 16%|█▌ | 58649/371472 [4:40:14<22:53:50, 3.79it/s] 16%|█▌ | 58650/371472 [4:40:14<23:42:12, 3.67it/s] 16%|█▌ | 58651/371472 [4:40:15<23:37:13, 3.68it/s] 16%|█▌ | 58652/371472 [4:40:15<23:34:47, 3.69it/s] 16%|█▌ | 58653/371472 [4:40:15<23:01:17, 3.77it/s] 16%|█▌ | 58654/371472 [4:40:15<23:10:00, 3.75it/s] 16%|█▌ | 58655/371472 [4:40:16<23:06:35, 3.76it/s] 16%|█▌ | 58656/371472 [4:40:16<23:43:55, 3.66it/s] 16%|█▌ | 58657/371472 [4:40:16<23:36:00, 3.68it/s] 16%|█▌ | 58658/371472 [4:40:17<25:53:08, 3.36it/s] 16%|█▌ | 58659/371472 [4:40:17<26:23:22, 3.29it/s] 16%|█▌ | 58660/371472 [4:40:17<25:41:15, 3.38it/s] {'loss': 4.4278, 'learning_rate': 8.582871856751923e-07, 'epoch': 2.53} + 16%|█▌ | 58660/371472 [4:40:17<25:41:15, 3.38it/s] 16%|█▌ | 58661/371472 [4:40:17<24:45:18, 3.51it/s] 16%|█▌ | 58662/371472 [4:40:18<23:47:35, 3.65it/s] 16%|█▌ | 58663/371472 [4:40:18<24:10:18, 3.59it/s] 16%|█▌ | 58664/371472 [4:40:18<25:24:31, 3.42it/s] 16%|█▌ | 58665/371472 [4:40:19<26:43:45, 3.25it/s] 16%|█▌ | 58666/371472 [4:40:19<26:17:48, 3.30it/s] 16%|█▌ | 58667/371472 [4:40:19<27:31:07, 3.16it/s] 16%|█▌ | 58668/371472 [4:40:20<25:48:06, 3.37it/s] 16%|█▌ | 58669/371472 [4:40:20<24:48:10, 3.50it/s] 16%|█▌ | 58670/371472 [4:40:20<25:07:01, 3.46it/s] 16%|█▌ | 58671/371472 [4:40:20<25:06:22, 3.46it/s] 16%|█▌ | 58672/371472 [4:40:21<26:04:29, 3.33it/s] 16%|█▌ | 58673/371472 [4:40:21<26:08:17, 3.32it/s] 16%|█▌ | 58674/371472 [4:40:21<25:32:45, 3.40it/s] 16%|█▌ | 58675/371472 [4:40:22<25:18:43, 3.43it/s] 16%|█▌ | 58676/371472 [4:40:22<24:08:30, 3.60it/s] 16%|█▌ | 58677/371472 [4:40:22<24:17:16, 3.58it/s] 16%|█▌ | 58678/371472 [4:40:22<23:35:09, 3.68it/s] 16%|█▌ | 58679/371472 [4:40:23<24:11:29, 3.59it/s] 16%|█▌ | 58680/371472 [4:40:23<24:55:59, 3.48it/s] {'loss': 4.3738, 'learning_rate': 8.582387036997134e-07, 'epoch': 2.53} + 16%|█▌ | 58680/371472 [4:40:23<24:55:59, 3.48it/s] 16%|█▌ | 58681/371472 [4:40:23<24:48:19, 3.50it/s] 16%|█▌ | 58682/371472 [4:40:24<24:41:45, 3.52it/s] 16%|█▌ | 58683/371472 [4:40:24<26:21:11, 3.30it/s] 16%|█▌ | 58684/371472 [4:40:24<25:31:46, 3.40it/s] 16%|█▌ | 58685/371472 [4:40:24<24:38:20, 3.53it/s] 16%|█▌ | 58686/371472 [4:40:25<23:36:39, 3.68it/s] 16%|█▌ | 58687/371472 [4:40:25<23:37:02, 3.68it/s] 16%|█▌ | 58688/371472 [4:40:25<23:29:02, 3.70it/s] 16%|█▌ | 58689/371472 [4:40:26<25:33:15, 3.40it/s] 16%|█▌ | 58690/371472 [4:40:26<24:01:13, 3.62it/s] 16%|█▌ | 58691/371472 [4:40:26<24:13:54, 3.59it/s] 16%|█▌ | 58692/371472 [4:40:26<23:56:48, 3.63it/s] 16%|█▌ | 58693/371472 [4:40:27<23:41:11, 3.67it/s] 16%|█▌ | 58694/371472 [4:40:27<23:09:25, 3.75it/s] 16%|█▌ | 58695/371472 [4:40:27<22:55:16, 3.79it/s] 16%|█▌ | 58696/371472 [4:40:27<22:28:42, 3.87it/s] 16%|█▌ | 58697/371472 [4:40:28<22:30:31, 3.86it/s] 16%|█▌ | 58698/371472 [4:40:28<22:56:53, 3.79it/s] 16%|█▌ | 58699/371472 [4:40:28<26:45:18, 3.25it/s] 16%|█▌ | 58700/371472 [4:40:29<28:18:19, 3.07it/s] {'loss': 4.1657, 'learning_rate': 8.581902217242345e-07, 'epoch': 2.53} + 16%|█▌ | 58700/371472 [4:40:29<28:18:19, 3.07it/s] 16%|█▌ | 58701/371472 [4:40:29<27:40:58, 3.14it/s] 16%|█▌ | 58702/371472 [4:40:29<26:54:51, 3.23it/s] 16%|█▌ | 58703/371472 [4:40:30<25:25:52, 3.42it/s] 16%|█▌ | 58704/371472 [4:40:30<23:48:52, 3.65it/s] 16%|█▌ | 58705/371472 [4:40:30<24:31:09, 3.54it/s] 16%|█▌ | 58706/371472 [4:40:30<24:41:10, 3.52it/s] 16%|█▌ | 58707/371472 [4:40:31<24:13:29, 3.59it/s] 16%|█▌ | 58708/371472 [4:40:31<24:18:50, 3.57it/s] 16%|█▌ | 58709/371472 [4:40:31<23:50:56, 3.64it/s] 16%|█▌ | 58710/371472 [4:40:31<23:10:06, 3.75it/s] 16%|█▌ | 58711/371472 [4:40:32<23:13:08, 3.74it/s] 16%|█▌ | 58712/371472 [4:40:32<23:49:13, 3.65it/s] 16%|█▌ | 58713/371472 [4:40:32<23:42:25, 3.66it/s] 16%|█▌ | 58714/371472 [4:40:33<24:39:25, 3.52it/s] 16%|█▌ | 58715/371472 [4:40:33<25:14:22, 3.44it/s] 16%|█▌ | 58716/371472 [4:40:33<23:54:58, 3.63it/s] 16%|█▌ | 58717/371472 [4:40:33<24:15:48, 3.58it/s] 16%|█▌ | 58718/371472 [4:40:34<25:22:28, 3.42it/s] 16%|█▌ | 58719/371472 [4:40:34<24:46:06, 3.51it/s] 16%|█▌ | 58720/371472 [4:40:34<26:16:33, 3.31it/s] {'loss': 4.0269, 'learning_rate': 8.581417397487556e-07, 'epoch': 2.53} + 16%|█▌ | 58720/371472 [4:40:34<26:16:33, 3.31it/s] 16%|█▌ | 58721/371472 [4:40:35<25:54:35, 3.35it/s] 16%|█▌ | 58722/371472 [4:40:35<24:41:14, 3.52it/s] 16%|█▌ | 58723/371472 [4:40:35<24:14:40, 3.58it/s] 16%|█▌ | 58724/371472 [4:40:35<23:29:50, 3.70it/s] 16%|█▌ | 58725/371472 [4:40:36<23:38:45, 3.67it/s] 16%|█▌ | 58726/371472 [4:40:36<23:59:44, 3.62it/s] 16%|█▌ | 58727/371472 [4:40:36<23:17:31, 3.73it/s] 16%|█▌ | 58728/371472 [4:40:36<23:25:37, 3.71it/s] 16%|█▌ | 58729/371472 [4:40:37<23:18:17, 3.73it/s] 16%|█▌ | 58730/371472 [4:40:37<22:55:12, 3.79it/s] 16%|█▌ | 58731/371472 [4:40:37<23:18:52, 3.73it/s] 16%|█▌ | 58732/371472 [4:40:38<23:06:19, 3.76it/s] 16%|█▌ | 58733/371472 [4:40:38<23:50:55, 3.64it/s] 16%|█▌ | 58734/371472 [4:40:38<25:26:24, 3.41it/s] 16%|█▌ | 58735/371472 [4:40:38<24:51:37, 3.49it/s] 16%|█▌ | 58736/371472 [4:40:39<24:00:46, 3.62it/s] 16%|█▌ | 58737/371472 [4:40:39<23:03:23, 3.77it/s] 16%|█▌ | 58738/371472 [4:40:39<23:06:03, 3.76it/s] 16%|█▌ | 58739/371472 [4:40:39<22:56:34, 3.79it/s] 16%|█▌ | 58740/371472 [4:40:40<24:38:00, 3.53it/s] {'loss': 4.2034, 'learning_rate': 8.580932577732767e-07, 'epoch': 2.53} + 16%|█▌ | 58740/371472 [4:40:40<24:38:00, 3.53it/s] 16%|█▌ | 58741/371472 [4:40:40<24:02:49, 3.61it/s] 16%|█▌ | 58742/371472 [4:40:40<25:56:47, 3.35it/s] 16%|█▌ | 58743/371472 [4:40:41<26:34:47, 3.27it/s] 16%|█▌ | 58744/371472 [4:40:41<25:12:56, 3.45it/s] 16%|█▌ | 58745/371472 [4:40:41<24:36:53, 3.53it/s] 16%|█▌ | 58746/371472 [4:40:41<23:41:35, 3.67it/s] 16%|█▌ | 58747/371472 [4:40:42<23:46:39, 3.65it/s] 16%|█▌ | 58748/371472 [4:40:42<22:57:27, 3.78it/s] 16%|█▌ | 58749/371472 [4:40:42<22:27:44, 3.87it/s] 16%|█▌ | 58750/371472 [4:40:42<22:51:15, 3.80it/s] 16%|█▌ | 58751/371472 [4:40:43<22:59:51, 3.78it/s] 16%|█▌ | 58752/371472 [4:40:43<23:18:22, 3.73it/s] 16%|█▌ | 58753/371472 [4:40:43<22:57:35, 3.78it/s] 16%|█▌ | 58754/371472 [4:40:44<23:49:27, 3.65it/s] 16%|█▌ | 58755/371472 [4:40:44<24:01:25, 3.62it/s] 16%|█▌ | 58756/371472 [4:40:44<23:26:13, 3.71it/s] 16%|█▌ | 58757/371472 [4:40:45<26:15:38, 3.31it/s] 16%|█▌ | 58758/371472 [4:40:45<25:36:25, 3.39it/s] 16%|█▌ | 58759/371472 [4:40:45<25:27:52, 3.41it/s] 16%|█▌ | 58760/371472 [4:40:45<26:25:14, 3.29it/s] {'loss': 4.26, 'learning_rate': 8.580447757977979e-07, 'epoch': 2.53} + 16%|█▌ | 58760/371472 [4:40:45<26:25:14, 3.29it/s] 16%|█▌ | 58761/371472 [4:40:46<28:02:58, 3.10it/s] 16%|█▌ | 58762/371472 [4:40:46<26:02:23, 3.34it/s] 16%|█▌ | 58763/371472 [4:40:46<25:46:20, 3.37it/s] 16%|█▌ | 58764/371472 [4:40:47<25:46:36, 3.37it/s] 16%|█▌ | 58765/371472 [4:40:47<24:36:13, 3.53it/s] 16%|█▌ | 58766/371472 [4:40:47<24:30:50, 3.54it/s] 16%|█▌ | 58767/371472 [4:40:47<23:48:37, 3.65it/s] 16%|█▌ | 58768/371472 [4:40:48<23:22:18, 3.72it/s] 16%|█▌ | 58769/371472 [4:40:48<22:56:36, 3.79it/s] 16%|█▌ | 58770/371472 [4:40:48<23:17:25, 3.73it/s] 16%|█▌ | 58771/371472 [4:40:48<23:09:19, 3.75it/s] 16%|█▌ | 58772/371472 [4:40:49<23:16:53, 3.73it/s] 16%|█▌ | 58773/371472 [4:40:49<24:08:23, 3.60it/s] 16%|█▌ | 58774/371472 [4:40:49<25:04:51, 3.46it/s] 16%|█▌ | 58775/371472 [4:40:50<25:00:02, 3.47it/s] 16%|█▌ | 58776/371472 [4:40:50<27:34:07, 3.15it/s] 16%|█▌ | 58777/371472 [4:40:50<26:38:24, 3.26it/s] 16%|█▌ | 58778/371472 [4:40:51<27:28:22, 3.16it/s] 16%|█▌ | 58779/371472 [4:40:51<26:16:02, 3.31it/s] 16%|█▌ | 58780/371472 [4:40:51<24:39:46, 3.52it/s] {'loss': 4.0627, 'learning_rate': 8.579962938223189e-07, 'epoch': 2.53} + 16%|█▌ | 58780/371472 [4:40:51<24:39:46, 3.52it/s] 16%|█▌ | 58781/371472 [4:40:51<23:56:42, 3.63it/s] 16%|█▌ | 58782/371472 [4:40:52<23:48:30, 3.65it/s] 16%|█▌ | 58783/371472 [4:40:52<24:53:58, 3.49it/s] 16%|█▌ | 58784/371472 [4:40:52<24:51:23, 3.49it/s] 16%|█▌ | 58785/371472 [4:40:53<27:36:42, 3.15it/s] 16%|█▌ | 58786/371472 [4:40:53<25:53:09, 3.36it/s] 16%|█▌ | 58787/371472 [4:40:53<24:43:16, 3.51it/s] 16%|█▌ | 58788/371472 [4:40:53<24:09:39, 3.59it/s] 16%|█▌ | 58789/371472 [4:40:54<23:22:42, 3.72it/s] 16%|█▌ | 58790/371472 [4:40:54<23:11:53, 3.74it/s] 16%|█▌ | 58791/371472 [4:40:54<23:29:39, 3.70it/s] 16%|█▌ | 58792/371472 [4:40:55<24:35:08, 3.53it/s] 16%|█▌ | 58793/371472 [4:40:55<24:46:12, 3.51it/s] 16%|█▌ | 58794/371472 [4:40:55<23:32:54, 3.69it/s] 16%|█▌ | 58795/371472 [4:40:55<23:27:05, 3.70it/s] 16%|█▌ | 58796/371472 [4:40:56<24:33:43, 3.54it/s] 16%|█▌ | 58797/371472 [4:40:56<24:12:13, 3.59it/s] 16%|█▌ | 58798/371472 [4:40:56<25:05:45, 3.46it/s] 16%|█▌ | 58799/371472 [4:40:56<24:47:58, 3.50it/s] 16%|█▌ | 58800/371472 [4:40:57<24:54:53, 3.49it/s] {'loss': 4.4826, 'learning_rate': 8.579478118468399e-07, 'epoch': 2.53} + 16%|█▌ | 58800/371472 [4:40:57<24:54:53, 3.49it/s] 16%|█▌ | 58801/371472 [4:40:57<24:52:47, 3.49it/s] 16%|█▌ | 58802/371472 [4:40:57<24:18:40, 3.57it/s] 16%|█▌ | 58803/371472 [4:40:58<23:39:01, 3.67it/s] 16%|█▌ | 58804/371472 [4:40:58<25:26:54, 3.41it/s] 16%|█▌ | 58805/371472 [4:40:58<25:45:50, 3.37it/s] 16%|█▌ | 58806/371472 [4:40:59<26:00:39, 3.34it/s] 16%|█▌ | 58807/371472 [4:40:59<25:57:51, 3.35it/s] 16%|█▌ | 58808/371472 [4:40:59<25:03:19, 3.47it/s] 16%|█▌ | 58809/371472 [4:40:59<24:21:13, 3.57it/s] 16%|█▌ | 58810/371472 [4:41:00<24:47:00, 3.50it/s] 16%|█▌ | 58811/371472 [4:41:00<26:40:15, 3.26it/s] 16%|█▌ | 58812/371472 [4:41:00<26:56:06, 3.22it/s] 16%|█▌ | 58813/371472 [4:41:01<25:19:03, 3.43it/s] 16%|█▌ | 58814/371472 [4:41:01<24:50:17, 3.50it/s] 16%|█▌ | 58815/371472 [4:41:01<26:10:39, 3.32it/s] 16%|█▌ | 58816/371472 [4:41:01<25:54:19, 3.35it/s] 16%|█▌ | 58817/371472 [4:41:02<25:12:58, 3.44it/s] 16%|█▌ | 58818/371472 [4:41:02<24:09:20, 3.60it/s] 16%|█▌ | 58819/371472 [4:41:02<23:00:42, 3.77it/s] 16%|█▌ | 58820/371472 [4:41:03<23:55:47, 3.63it/s] {'loss': 4.2246, 'learning_rate': 8.578993298713611e-07, 'epoch': 2.53} + 16%|█▌ | 58820/371472 [4:41:03<23:55:47, 3.63it/s] 16%|█▌ | 58821/371472 [4:41:03<26:13:45, 3.31it/s] 16%|█▌ | 58822/371472 [4:41:03<24:41:09, 3.52it/s] 16%|█▌ | 58823/371472 [4:41:03<25:26:03, 3.41it/s] 16%|█▌ | 58824/371472 [4:41:04<25:28:47, 3.41it/s] 16%|█▌ | 58825/371472 [4:41:04<24:19:58, 3.57it/s] 16%|█▌ | 58826/371472 [4:41:04<24:07:14, 3.60it/s] 16%|█▌ | 58827/371472 [4:41:05<23:58:01, 3.62it/s] 16%|█▌ | 58828/371472 [4:41:05<24:37:35, 3.53it/s] 16%|█▌ | 58829/371472 [4:41:05<23:37:04, 3.68it/s] 16%|█▌ | 58830/371472 [4:41:05<23:30:02, 3.70it/s] 16%|█▌ | 58831/371472 [4:41:06<25:40:52, 3.38it/s] 16%|█▌ | 58832/371472 [4:41:06<27:34:08, 3.15it/s] 16%|█▌ | 58833/371472 [4:41:06<27:36:12, 3.15it/s] 16%|█▌ | 58834/371472 [4:41:07<27:20:59, 3.18it/s] 16%|█▌ | 58835/371472 [4:41:07<27:16:31, 3.18it/s] 16%|█▌ | 58836/371472 [4:41:07<28:16:20, 3.07it/s] 16%|█▌ | 58837/371472 [4:41:08<28:05:32, 3.09it/s] 16%|█▌ | 58838/371472 [4:41:08<26:22:06, 3.29it/s] 16%|█▌ | 58839/371472 [4:41:08<25:17:38, 3.43it/s] 16%|█▌ | 58840/371472 [4:41:09<26:08:48, 3.32it/s] {'loss': 4.0814, 'learning_rate': 8.578508478958823e-07, 'epoch': 2.53} + 16%|█▌ | 58840/371472 [4:41:09<26:08:48, 3.32it/s] 16%|█▌ | 58841/371472 [4:41:09<26:18:02, 3.30it/s] 16%|█▌ | 58842/371472 [4:41:09<26:57:51, 3.22it/s] 16%|█▌ | 58843/371472 [4:41:09<25:40:56, 3.38it/s] 16%|█▌ | 58844/371472 [4:41:10<29:16:43, 2.97it/s] 16%|█▌ | 58845/371472 [4:41:10<26:54:38, 3.23it/s] 16%|█▌ | 58846/371472 [4:41:10<26:40:57, 3.25it/s] 16%|█▌ | 58847/371472 [4:41:11<25:25:10, 3.42it/s] 16%|█▌ | 58848/371472 [4:41:11<24:20:57, 3.57it/s] 16%|█▌ | 58849/371472 [4:41:11<24:59:52, 3.47it/s] 16%|█▌ | 58850/371472 [4:41:11<24:08:04, 3.60it/s] 16%|█▌ | 58851/371472 [4:41:12<23:33:57, 3.68it/s] 16%|█▌ | 58852/371472 [4:41:12<23:29:39, 3.70it/s] 16%|█▌ | 58853/371472 [4:41:12<23:30:45, 3.69it/s] 16%|█▌ | 58854/371472 [4:41:13<23:43:19, 3.66it/s] 16%|█▌ | 58855/371472 [4:41:13<23:30:13, 3.69it/s] 16%|█▌ | 58856/371472 [4:41:13<23:27:14, 3.70it/s] 16%|█▌ | 58857/371472 [4:41:13<23:02:08, 3.77it/s] 16%|█▌ | 58858/371472 [4:41:14<22:58:08, 3.78it/s] 16%|█▌ | 58859/371472 [4:41:14<22:34:55, 3.85it/s] 16%|█▌ | 58860/371472 [4:41:14<22:06:50, 3.93it/s] {'loss': 4.4911, 'learning_rate': 8.578023659204033e-07, 'epoch': 2.54} + 16%|█▌ | 58860/371472 [4:41:14<22:06:50, 3.93it/s] 16%|█▌ | 58861/371472 [4:41:14<24:26:57, 3.55it/s] 16%|█▌ | 58862/371472 [4:41:15<23:22:15, 3.72it/s] 16%|█▌ | 58863/371472 [4:41:15<23:25:54, 3.71it/s] 16%|█▌ | 58864/371472 [4:41:15<23:27:05, 3.70it/s] 16%|█▌ | 58865/371472 [4:41:16<24:26:29, 3.55it/s] 16%|█▌ | 58866/371472 [4:41:16<24:17:46, 3.57it/s] 16%|█▌ | 58867/371472 [4:41:16<26:47:53, 3.24it/s] 16%|█▌ | 58868/371472 [4:41:17<27:15:13, 3.19it/s] 16%|█▌ | 58869/371472 [4:41:17<27:11:49, 3.19it/s] 16%|█▌ | 58870/371472 [4:41:17<31:01:15, 2.80it/s] 16%|█▌ | 58871/371472 [4:41:18<29:42:27, 2.92it/s] 16%|█▌ | 58872/371472 [4:41:18<27:40:40, 3.14it/s] 16%|█▌ | 58873/371472 [4:41:18<27:38:27, 3.14it/s] 16%|█▌ | 58874/371472 [4:41:18<25:58:32, 3.34it/s] 16%|█▌ | 58875/371472 [4:41:19<24:28:07, 3.55it/s] 16%|█▌ | 58876/371472 [4:41:19<24:10:51, 3.59it/s] 16%|█▌ | 58877/371472 [4:41:19<24:10:34, 3.59it/s] 16%|█▌ | 58878/371472 [4:41:19<23:19:26, 3.72it/s] 16%|█▌ | 58879/371472 [4:41:20<22:37:23, 3.84it/s] 16%|█▌ | 58880/371472 [4:41:20<22:56:46, 3.78it/s] {'loss': 4.3088, 'learning_rate': 8.577538839449244e-07, 'epoch': 2.54} + 16%|█▌ | 58880/371472 [4:41:20<22:56:46, 3.78it/s] 16%|█▌ | 58881/371472 [4:41:20<22:42:04, 3.82it/s] 16%|█▌ | 58882/371472 [4:41:20<22:32:52, 3.85it/s] 16%|█▌ | 58883/371472 [4:41:21<23:39:46, 3.67it/s] 16%|█▌ | 58884/371472 [4:41:21<23:51:18, 3.64it/s] 16%|█▌ | 58885/371472 [4:41:21<25:54:45, 3.35it/s] 16%|█▌ | 58886/371472 [4:41:22<25:22:23, 3.42it/s] 16%|█▌ | 58887/371472 [4:41:22<25:14:37, 3.44it/s] 16%|█▌ | 58888/371472 [4:41:22<24:45:32, 3.51it/s] 16%|█▌ | 58889/371472 [4:41:23<23:53:22, 3.63it/s] 16%|█▌ | 58890/371472 [4:41:23<23:28:01, 3.70it/s] 16%|█▌ | 58891/371472 [4:41:23<24:06:24, 3.60it/s] 16%|█▌ | 58892/371472 [4:41:23<24:24:44, 3.56it/s] 16%|█▌ | 58893/371472 [4:41:24<23:26:15, 3.70it/s] 16%|█▌ | 58894/371472 [4:41:24<23:14:46, 3.74it/s] 16%|█▌ | 58895/371472 [4:41:24<25:42:45, 3.38it/s] 16%|█▌ | 58896/371472 [4:41:25<27:26:58, 3.16it/s] 16%|█▌ | 58897/371472 [4:41:25<26:12:43, 3.31it/s] 16%|█▌ | 58898/371472 [4:41:25<26:32:36, 3.27it/s] 16%|█▌ | 58899/371472 [4:41:25<25:15:41, 3.44it/s] 16%|█▌ | 58900/371472 [4:41:26<24:17:56, 3.57it/s] {'loss': 4.1783, 'learning_rate': 8.577054019694456e-07, 'epoch': 2.54} + 16%|█▌ | 58900/371472 [4:41:26<24:17:56, 3.57it/s] 16%|█▌ | 58901/371472 [4:41:26<24:19:51, 3.57it/s] 16%|█▌ | 58902/371472 [4:41:26<24:55:31, 3.48it/s] 16%|█▌ | 58903/371472 [4:41:27<24:21:42, 3.56it/s] 16%|█▌ | 58904/371472 [4:41:27<23:54:25, 3.63it/s] 16%|█▌ | 58905/371472 [4:41:27<23:05:13, 3.76it/s] 16%|█▌ | 58906/371472 [4:41:27<23:36:08, 3.68it/s] 16%|█▌ | 58907/371472 [4:41:28<22:32:26, 3.85it/s] 16%|█▌ | 58908/371472 [4:41:28<24:44:06, 3.51it/s] 16%|█▌ | 58909/371472 [4:41:28<24:51:14, 3.49it/s] 16%|█▌ | 58910/371472 [4:41:29<26:40:25, 3.25it/s] 16%|█▌ | 58911/371472 [4:41:29<25:56:46, 3.35it/s] 16%|█▌ | 58912/371472 [4:41:29<26:49:07, 3.24it/s] 16%|█▌ | 58913/371472 [4:41:29<26:37:26, 3.26it/s] 16%|█▌ | 58914/371472 [4:41:30<26:31:13, 3.27it/s] 16%|█▌ | 58915/371472 [4:41:30<25:56:02, 3.35it/s] 16%|█▌ | 58916/371472 [4:41:30<25:21:17, 3.42it/s] 16%|█▌ | 58917/371472 [4:41:31<30:12:20, 2.87it/s] 16%|█▌ | 58918/371472 [4:41:31<28:49:06, 3.01it/s] 16%|█▌ | 58919/371472 [4:41:31<27:13:45, 3.19it/s] 16%|█▌ | 58920/371472 [4:41:32<25:20:08, 3.43it/s] {'loss': 4.0773, 'learning_rate': 8.576569199939667e-07, 'epoch': 2.54} + 16%|█▌ | 58920/371472 [4:41:32<25:20:08, 3.43it/s] 16%|█▌ | 58921/371472 [4:41:32<24:47:02, 3.50it/s] 16%|█▌ | 58922/371472 [4:41:32<23:40:38, 3.67it/s] 16%|█▌ | 58923/371472 [4:41:32<23:38:08, 3.67it/s] 16%|█▌ | 58924/371472 [4:41:33<23:20:22, 3.72it/s] 16%|█▌ | 58925/371472 [4:41:33<24:30:53, 3.54it/s] 16%|█▌ | 58926/371472 [4:41:33<25:23:16, 3.42it/s] 16%|█▌ | 58927/371472 [4:41:34<24:58:00, 3.48it/s] 16%|█▌ | 58928/371472 [4:41:34<24:41:34, 3.52it/s] 16%|█▌ | 58929/371472 [4:41:34<25:18:50, 3.43it/s] 16%|█▌ | 58930/371472 [4:41:34<25:01:24, 3.47it/s] 16%|█▌ | 58931/371472 [4:41:35<24:59:15, 3.47it/s] 16%|█▌ | 58932/371472 [4:41:35<26:07:57, 3.32it/s] 16%|█▌ | 58933/371472 [4:41:35<25:00:41, 3.47it/s] 16%|█▌ | 58934/371472 [4:41:36<24:54:38, 3.49it/s] 16%|█▌ | 58935/371472 [4:41:36<24:51:22, 3.49it/s] 16%|█▌ | 58936/371472 [4:41:36<26:43:03, 3.25it/s] 16%|█▌ | 58937/371472 [4:41:36<25:03:51, 3.46it/s] 16%|█▌ | 58938/371472 [4:41:37<24:32:25, 3.54it/s] 16%|█▌ | 58939/371472 [4:41:37<24:56:56, 3.48it/s] 16%|█▌ | 58940/371472 [4:41:37<24:01:17, 3.61it/s] {'loss': 4.2445, 'learning_rate': 8.576084380184877e-07, 'epoch': 2.54} + 16%|█▌ | 58940/371472 [4:41:37<24:01:17, 3.61it/s] 16%|█▌ | 58941/371472 [4:41:38<23:08:37, 3.75it/s] 16%|█▌ | 58942/371472 [4:41:38<24:47:35, 3.50it/s] 16%|█▌ | 58943/371472 [4:41:38<24:49:05, 3.50it/s] 16%|█▌ | 58944/371472 [4:41:38<24:22:04, 3.56it/s] 16%|█▌ | 58945/371472 [4:41:39<24:00:07, 3.62it/s] 16%|█▌ | 58946/371472 [4:41:39<23:01:39, 3.77it/s] 16%|█▌ | 58947/371472 [4:41:39<22:52:03, 3.80it/s] 16%|█▌ | 58948/371472 [4:41:39<24:29:28, 3.54it/s] 16%|█▌ | 58949/371472 [4:41:40<26:17:33, 3.30it/s] 16%|█▌ | 58950/371472 [4:41:40<24:35:56, 3.53it/s] 16%|█▌ | 58951/371472 [4:41:40<24:39:05, 3.52it/s] 16%|█▌ | 58952/371472 [4:41:41<23:59:17, 3.62it/s] 16%|█▌ | 58953/371472 [4:41:41<24:00:12, 3.62it/s] 16%|█▌ | 58954/371472 [4:41:41<23:21:05, 3.72it/s] 16%|█▌ | 58955/371472 [4:41:41<24:14:41, 3.58it/s] 16%|█▌ | 58956/371472 [4:41:42<24:03:31, 3.61it/s] 16%|█▌ | 58957/371472 [4:41:42<25:14:38, 3.44it/s] 16%|█▌ | 58958/371472 [4:41:42<25:25:26, 3.41it/s] 16%|█▌ | 58959/371472 [4:41:43<24:18:24, 3.57it/s] 16%|█▌ | 58960/371472 [4:41:43<23:12:26, 3.74it/s] {'loss': 4.4122, 'learning_rate': 8.575599560430088e-07, 'epoch': 2.54} + 16%|█▌ | 58960/371472 [4:41:43<23:12:26, 3.74it/s] 16%|█▌ | 58961/371472 [4:41:43<22:31:07, 3.85it/s] 16%|█▌ | 58962/371472 [4:41:43<23:33:58, 3.68it/s] 16%|█▌ | 58963/371472 [4:41:44<23:35:21, 3.68it/s] 16%|█▌ | 58964/371472 [4:41:44<23:50:19, 3.64it/s] 16%|█▌ | 58965/371472 [4:41:44<25:07:15, 3.46it/s] 16%|█▌ | 58966/371472 [4:41:45<26:40:42, 3.25it/s] 16%|█▌ | 58967/371472 [4:41:45<25:40:36, 3.38it/s] 16%|█▌ | 58968/371472 [4:41:45<24:27:54, 3.55it/s] 16%|█▌ | 58969/371472 [4:41:45<23:50:20, 3.64it/s] 16%|█▌ | 58970/371472 [4:41:46<23:07:31, 3.75it/s] 16%|█▌ | 58971/371472 [4:41:46<23:23:54, 3.71it/s] 16%|█▌ | 58972/371472 [4:41:46<23:19:12, 3.72it/s] 16%|█▌ | 58973/371472 [4:41:47<24:50:26, 3.49it/s] 16%|█▌ | 58974/371472 [4:41:47<24:00:23, 3.62it/s] 16%|█▌ | 58975/371472 [4:41:47<24:19:21, 3.57it/s] 16%|█▌ | 58976/371472 [4:41:47<23:48:36, 3.65it/s] 16%|█▌ | 58977/371472 [4:41:48<23:45:24, 3.65it/s] 16%|█▌ | 58978/371472 [4:41:48<26:02:26, 3.33it/s] 16%|█▌ | 58979/371472 [4:41:48<24:35:17, 3.53it/s] 16%|█▌ | 58980/371472 [4:41:48<24:43:17, 3.51it/s] {'loss': 4.0924, 'learning_rate': 8.5751147406753e-07, 'epoch': 2.54} + 16%|█▌ | 58980/371472 [4:41:48<24:43:17, 3.51it/s] 16%|█▌ | 58981/371472 [4:41:49<24:57:05, 3.48it/s] 16%|█▌ | 58982/371472 [4:41:49<24:51:56, 3.49it/s] 16%|█▌ | 58983/371472 [4:41:49<23:35:57, 3.68it/s] 16%|█▌ | 58984/371472 [4:41:50<25:47:01, 3.37it/s] 16%|█▌ | 58985/371472 [4:41:50<24:27:34, 3.55it/s] 16%|█▌ | 58986/371472 [4:41:50<23:47:53, 3.65it/s] 16%|█▌ | 58987/371472 [4:41:51<25:57:55, 3.34it/s] 16%|█▌ | 58988/371472 [4:41:51<25:44:54, 3.37it/s] 16%|█▌ | 58989/371472 [4:41:51<27:14:43, 3.19it/s] 16%|█▌ | 58990/371472 [4:41:51<25:21:13, 3.42it/s] 16%|█▌ | 58991/371472 [4:41:52<24:05:50, 3.60it/s] 16%|█▌ | 58992/371472 [4:41:52<23:26:40, 3.70it/s] 16%|█▌ | 58993/371472 [4:41:52<26:06:31, 3.32it/s] 16%|█▌ | 58994/371472 [4:41:53<25:08:03, 3.45it/s] 16%|█▌ | 58995/371472 [4:41:53<24:42:22, 3.51it/s] 16%|█▌ | 58996/371472 [4:41:53<24:23:01, 3.56it/s] 16%|█▌ | 58997/371472 [4:41:53<27:12:25, 3.19it/s] 16%|█▌ | 58998/371472 [4:41:54<28:11:07, 3.08it/s] 16%|█▌ | 58999/371472 [4:41:54<26:20:59, 3.29it/s] 16%|█▌ | 59000/371472 [4:41:54<24:30:00, 3.54it/s] {'loss': 4.169, 'learning_rate': 8.574629920920512e-07, 'epoch': 2.54} + 16%|█▌ | 59000/371472 [4:41:54<24:30:00, 3.54it/s] 16%|█▌ | 59001/371472 [4:41:55<25:03:32, 3.46it/s] 16%|█▌ | 59002/371472 [4:41:55<25:00:26, 3.47it/s] 16%|█▌ | 59003/371472 [4:41:55<23:59:53, 3.62it/s] 16%|█▌ | 59004/371472 [4:41:55<25:08:17, 3.45it/s] 16%|█▌ | 59005/371472 [4:41:56<24:35:19, 3.53it/s] 16%|█▌ | 59006/371472 [4:41:56<24:15:56, 3.58it/s] 16%|█▌ | 59007/371472 [4:41:56<24:13:19, 3.58it/s] 16%|█▌ | 59008/371472 [4:41:57<23:24:28, 3.71it/s] 16%|█▌ | 59009/371472 [4:41:57<24:43:57, 3.51it/s] 16%|█▌ | 59010/371472 [4:41:57<23:47:54, 3.65it/s] 16%|█▌ | 59011/371472 [4:41:57<26:10:26, 3.32it/s] 16%|█▌ | 59012/371472 [4:41:58<24:46:04, 3.50it/s] 16%|█▌ | 59013/371472 [4:41:58<24:56:10, 3.48it/s] 16%|█▌ | 59014/371472 [4:41:58<25:48:40, 3.36it/s] 16%|█▌ | 59015/371472 [4:41:59<28:59:48, 2.99it/s] 16%|█▌ | 59016/371472 [4:41:59<27:11:18, 3.19it/s] 16%|█▌ | 59017/371472 [4:41:59<25:46:38, 3.37it/s] 16%|█▌ | 59018/371472 [4:42:00<24:46:49, 3.50it/s] 16%|█▌ | 59019/371472 [4:42:00<23:47:09, 3.65it/s] 16%|█▌ | 59020/371472 [4:42:00<22:55:17, 3.79it/s] {'loss': 4.268, 'learning_rate': 8.574145101165722e-07, 'epoch': 2.54} + 16%|█▌ | 59020/371472 [4:42:00<22:55:17, 3.79it/s] 16%|█▌ | 59021/371472 [4:42:00<23:54:43, 3.63it/s] 16%|█▌ | 59022/371472 [4:42:01<23:23:33, 3.71it/s] 16%|█▌ | 59023/371472 [4:42:01<27:10:18, 3.19it/s] 16%|█▌ | 59024/371472 [4:42:01<26:34:31, 3.27it/s] 16%|█▌ | 59025/371472 [4:42:02<25:43:42, 3.37it/s] 16%|█▌ | 59026/371472 [4:42:02<24:49:33, 3.50it/s] 16%|█▌ | 59027/371472 [4:42:02<24:22:47, 3.56it/s] 16%|█▌ | 59028/371472 [4:42:02<24:06:56, 3.60it/s] 16%|█▌ | 59029/371472 [4:42:03<24:17:32, 3.57it/s] 16%|█▌ | 59030/371472 [4:42:03<23:52:00, 3.64it/s] 16%|█▌ | 59031/371472 [4:42:03<23:30:44, 3.69it/s] 16%|█▌ | 59032/371472 [4:42:03<25:11:47, 3.44it/s] 16%|█▌ | 59033/371472 [4:42:04<25:51:13, 3.36it/s] 16%|█▌ | 59034/371472 [4:42:04<25:17:28, 3.43it/s] 16%|█▌ | 59035/371472 [4:42:04<24:00:28, 3.61it/s] 16%|█▌ | 59036/371472 [4:42:05<23:17:51, 3.73it/s] 16%|█▌ | 59037/371472 [4:42:05<23:56:06, 3.63it/s] 16%|█▌ | 59038/371472 [4:42:05<23:50:03, 3.64it/s] 16%|█▌ | 59039/371472 [4:42:05<23:26:02, 3.70it/s] 16%|█▌ | 59040/371472 [4:42:06<23:11:41, 3.74it/s] {'loss': 4.3032, 'learning_rate': 8.573660281410933e-07, 'epoch': 2.54} + 16%|█▌ | 59040/371472 [4:42:06<23:11:41, 3.74it/s] 16%|█▌ | 59041/371472 [4:42:06<23:02:11, 3.77it/s] 16%|█▌ | 59042/371472 [4:42:06<23:35:46, 3.68it/s] 16%|█▌ | 59043/371472 [4:42:06<22:51:31, 3.80it/s] 16%|█▌ | 59044/371472 [4:42:07<22:22:07, 3.88it/s] 16%|█▌ | 59045/371472 [4:42:07<22:16:28, 3.90it/s] 16%|█▌ | 59046/371472 [4:42:07<22:10:11, 3.91it/s] 16%|█▌ | 59047/371472 [4:42:08<24:31:19, 3.54it/s] 16%|█▌ | 59048/371472 [4:42:08<24:33:09, 3.53it/s] 16%|█▌ | 59049/371472 [4:42:08<24:33:03, 3.53it/s] 16%|█▌ | 59050/371472 [4:42:08<24:20:34, 3.57it/s] 16%|█▌ | 59051/371472 [4:42:09<23:28:34, 3.70it/s] 16%|█▌ | 59052/371472 [4:42:09<22:54:26, 3.79it/s] 16%|█▌ | 59053/371472 [4:42:09<22:16:50, 3.89it/s] 16%|█▌ | 59054/371472 [4:42:09<22:55:40, 3.79it/s] 16%|█▌ | 59055/371472 [4:42:10<24:19:59, 3.57it/s] 16%|█▌ | 59056/371472 [4:42:10<23:44:50, 3.65it/s] 16%|█▌ | 59057/371472 [4:42:10<24:12:06, 3.59it/s] 16%|█▌ | 59058/371472 [4:42:11<24:39:59, 3.52it/s] 16%|█▌ | 59059/371472 [4:42:11<24:19:50, 3.57it/s] 16%|█▌ | 59060/371472 [4:42:11<24:47:10, 3.50it/s] {'loss': 4.2648, 'learning_rate': 8.573175461656144e-07, 'epoch': 2.54} + 16%|█▌ | 59060/371472 [4:42:11<24:47:10, 3.50it/s] 16%|█▌ | 59061/371472 [4:42:11<23:23:34, 3.71it/s] 16%|█▌ | 59062/371472 [4:42:12<22:55:22, 3.79it/s] 16%|█▌ | 59063/371472 [4:42:12<22:28:05, 3.86it/s] 16%|█▌ | 59064/371472 [4:42:12<23:20:40, 3.72it/s] 16%|█▌ | 59065/371472 [4:42:12<23:57:52, 3.62it/s] 16%|█▌ | 59066/371472 [4:42:13<26:11:57, 3.31it/s] 16%|█▌ | 59067/371472 [4:42:13<25:03:18, 3.46it/s] 16%|█▌ | 59068/371472 [4:42:13<27:20:32, 3.17it/s] 16%|█▌ | 59069/371472 [4:42:14<27:15:39, 3.18it/s] 16%|█▌ | 59070/371472 [4:42:14<30:59:12, 2.80it/s] 16%|█▌ | 59071/371472 [4:42:14<28:17:57, 3.07it/s] 16%|█▌ | 59072/371472 [4:42:15<26:33:52, 3.27it/s] 16%|█▌ | 59073/371472 [4:42:15<25:17:08, 3.43it/s] 16%|█▌ | 59074/371472 [4:42:15<25:10:18, 3.45it/s] 16%|█▌ | 59075/371472 [4:42:16<25:04:36, 3.46it/s] 16%|█▌ | 59076/371472 [4:42:16<24:37:22, 3.52it/s] 16%|█▌ | 59077/371472 [4:42:16<24:09:21, 3.59it/s] 16%|█▌ | 59078/371472 [4:42:16<25:15:29, 3.44it/s] 16%|█▌ | 59079/371472 [4:42:17<24:39:33, 3.52it/s] 16%|█▌ | 59080/371472 [4:42:17<23:36:50, 3.67it/s] {'loss': 4.4249, 'learning_rate': 8.572690641901354e-07, 'epoch': 2.54} + 16%|█▌ | 59080/371472 [4:42:17<23:36:50, 3.67it/s] 16%|█▌ | 59081/371472 [4:42:17<23:09:35, 3.75it/s] 16%|█▌ | 59082/371472 [4:42:17<22:52:55, 3.79it/s] 16%|█▌ | 59083/371472 [4:42:18<22:34:49, 3.84it/s] 16%|█▌ | 59084/371472 [4:42:18<22:43:46, 3.82it/s] 16%|█▌ | 59085/371472 [4:42:18<25:30:28, 3.40it/s] 16%|█▌ | 59086/371472 [4:42:19<25:46:10, 3.37it/s] 16%|█▌ | 59087/371472 [4:42:19<25:19:22, 3.43it/s] 16%|█▌ | 59088/371472 [4:42:19<25:11:12, 3.45it/s] 16%|█▌ | 59089/371472 [4:42:19<24:01:06, 3.61it/s] 16%|█▌ | 59090/371472 [4:42:20<22:57:12, 3.78it/s] 16%|█▌ | 59091/371472 [4:42:20<24:08:41, 3.59it/s] 16%|█▌ | 59092/371472 [4:42:20<23:17:32, 3.73it/s] 16%|█▌ | 59093/371472 [4:42:21<24:11:40, 3.59it/s] 16%|█▌ | 59094/371472 [4:42:21<24:49:56, 3.49it/s] 16%|█▌ | 59095/371472 [4:42:21<24:16:31, 3.57it/s] 16%|█▌ | 59096/371472 [4:42:21<23:55:59, 3.63it/s] 16%|█▌ | 59097/371472 [4:42:22<25:23:38, 3.42it/s] 16%|█▌ | 59098/371472 [4:42:22<24:45:18, 3.51it/s] 16%|█▌ | 59099/371472 [4:42:22<24:18:54, 3.57it/s] 16%|█▌ | 59100/371472 [4:42:23<24:30:15, 3.54it/s] {'loss': 4.3171, 'learning_rate': 8.572205822146566e-07, 'epoch': 2.55} + 16%|█▌ | 59100/371472 [4:42:23<24:30:15, 3.54it/s] 16%|█▌ | 59101/371472 [4:42:23<25:06:47, 3.46it/s] 16%|█▌ | 59102/371472 [4:42:23<24:39:48, 3.52it/s] 16%|█▌ | 59103/371472 [4:42:23<24:08:10, 3.59it/s] 16%|█▌ | 59104/371472 [4:42:24<25:32:08, 3.40it/s] 16%|█▌ | 59105/371472 [4:42:24<24:10:46, 3.59it/s] 16%|█▌ | 59106/371472 [4:42:24<24:36:21, 3.53it/s] 16%|█▌ | 59107/371472 [4:42:25<25:15:58, 3.43it/s] 16%|█▌ | 59108/371472 [4:42:25<24:11:32, 3.59it/s] 16%|█▌ | 59109/371472 [4:42:25<23:22:50, 3.71it/s] 16%|█▌ | 59110/371472 [4:42:25<23:10:15, 3.74it/s] 16%|█▌ | 59111/371472 [4:42:26<23:54:33, 3.63it/s] 16%|█▌ | 59112/371472 [4:42:26<23:56:03, 3.63it/s] 16%|█▌ | 59113/371472 [4:42:26<23:51:46, 3.64it/s] 16%|█▌ | 59114/371472 [4:42:26<24:11:05, 3.59it/s] 16%|█▌ | 59115/371472 [4:42:27<24:08:39, 3.59it/s] 16%|█▌ | 59116/371472 [4:42:27<23:45:07, 3.65it/s] 16%|█▌ | 59117/371472 [4:42:27<23:18:52, 3.72it/s] 16%|█▌ | 59118/371472 [4:42:28<24:19:18, 3.57it/s] 16%|█▌ | 59119/371472 [4:42:28<23:43:56, 3.66it/s] 16%|█▌ | 59120/371472 [4:42:28<25:15:53, 3.43it/s] {'loss': 4.2584, 'learning_rate': 8.571721002391776e-07, 'epoch': 2.55} + 16%|█▌ | 59120/371472 [4:42:28<25:15:53, 3.43it/s] 16%|█▌ | 59121/371472 [4:42:28<24:43:22, 3.51it/s] 16%|█▌ | 59122/371472 [4:42:29<24:16:43, 3.57it/s] 16%|█▌ | 59123/371472 [4:42:29<24:00:18, 3.61it/s] 16%|█▌ | 59124/371472 [4:42:29<24:19:01, 3.57it/s] 16%|█▌ | 59125/371472 [4:42:30<24:15:31, 3.58it/s] 16%|█▌ | 59126/371472 [4:42:30<23:33:06, 3.68it/s] 16%|█▌ | 59127/371472 [4:42:30<23:32:28, 3.69it/s] 16%|█▌ | 59128/371472 [4:42:30<23:38:12, 3.67it/s] 16%|█▌ | 59129/371472 [4:42:31<23:27:21, 3.70it/s] 16%|█▌ | 59130/371472 [4:42:31<22:52:34, 3.79it/s] 16%|█▌ | 59131/371472 [4:42:31<23:39:16, 3.67it/s] 16%|█▌ | 59132/371472 [4:42:31<23:55:35, 3.63it/s] 16%|█▌ | 59133/371472 [4:42:32<23:26:51, 3.70it/s] 16%|█▌ | 59134/371472 [4:42:32<23:25:08, 3.70it/s] 16%|█▌ | 59135/371472 [4:42:32<23:05:38, 3.76it/s] 16%|█▌ | 59136/371472 [4:42:32<23:24:44, 3.71it/s] 16%|█▌ | 59137/371472 [4:42:33<23:48:48, 3.64it/s] 16%|█▌ | 59138/371472 [4:42:33<23:44:49, 3.65it/s] 16%|█▌ | 59139/371472 [4:42:33<24:20:53, 3.56it/s] 16%|█▌ | 59140/371472 [4:42:34<24:47:52, 3.50it/s] {'loss': 4.3728, 'learning_rate': 8.571236182636989e-07, 'epoch': 2.55} + 16%|█▌ | 59140/371472 [4:42:34<24:47:52, 3.50it/s] 16%|█▌ | 59141/371472 [4:42:34<25:39:51, 3.38it/s] 16%|█▌ | 59142/371472 [4:42:34<24:37:04, 3.52it/s] 16%|█▌ | 59143/371472 [4:42:34<23:36:15, 3.68it/s] 16%|█▌ | 59144/371472 [4:42:35<23:20:58, 3.72it/s] 16%|█▌ | 59145/371472 [4:42:35<23:05:25, 3.76it/s] 16%|█▌ | 59146/371472 [4:42:35<24:06:20, 3.60it/s] 16%|█▌ | 59147/371472 [4:42:36<24:33:32, 3.53it/s] 16%|█▌ | 59148/371472 [4:42:36<23:49:07, 3.64it/s] 16%|█▌ | 59149/371472 [4:42:36<25:05:07, 3.46it/s] 16%|█▌ | 59150/371472 [4:42:36<24:35:04, 3.53it/s] 16%|█▌ | 59151/371472 [4:42:37<24:38:44, 3.52it/s] 16%|█▌ | 59152/371472 [4:42:37<25:28:54, 3.40it/s] 16%|█▌ | 59153/371472 [4:42:37<25:17:02, 3.43it/s] 16%|█▌ | 59154/371472 [4:42:38<25:22:21, 3.42it/s] 16%|█▌ | 59155/371472 [4:42:38<25:13:14, 3.44it/s] 16%|█▌ | 59156/371472 [4:42:38<24:11:50, 3.59it/s] 16%|█▌ | 59157/371472 [4:42:38<24:26:41, 3.55it/s] 16%|█▌ | 59158/371472 [4:42:39<24:16:49, 3.57it/s] 16%|█▌ | 59159/371472 [4:42:39<23:57:36, 3.62it/s] 16%|█▌ | 59160/371472 [4:42:39<24:16:02, 3.57it/s] {'loss': 4.3094, 'learning_rate': 8.570751362882199e-07, 'epoch': 2.55} + 16%|█▌ | 59160/371472 [4:42:39<24:16:02, 3.57it/s] 16%|█▌ | 59161/371472 [4:42:39<23:13:35, 3.74it/s] 16%|█▌ | 59162/371472 [4:42:40<23:27:48, 3.70it/s] 16%|█▌ | 59163/371472 [4:42:40<23:21:01, 3.72it/s] 16%|█▌ | 59164/371472 [4:42:40<22:36:39, 3.84it/s] 16%|█▌ | 59165/371472 [4:42:41<23:40:54, 3.66it/s] 16%|█▌ | 59166/371472 [4:42:41<24:49:50, 3.49it/s] 16%|█▌ | 59167/371472 [4:42:41<23:28:09, 3.70it/s] 16%|█▌ | 59168/371472 [4:42:41<24:17:59, 3.57it/s] 16%|█▌ | 59169/371472 [4:42:42<22:57:01, 3.78it/s] 16%|█▌ | 59170/371472 [4:42:42<23:32:10, 3.69it/s] 16%|█▌ | 59171/371472 [4:42:42<23:19:04, 3.72it/s] 16%|█▌ | 59172/371472 [4:42:42<23:00:30, 3.77it/s] 16%|█▌ | 59173/371472 [4:42:43<23:50:47, 3.64it/s] 16%|█▌ | 59174/371472 [4:42:43<23:08:21, 3.75it/s] 16%|█▌ | 59175/371472 [4:42:43<22:45:18, 3.81it/s] 16%|█▌ | 59176/371472 [4:42:44<22:21:34, 3.88it/s] 16%|█▌ | 59177/371472 [4:42:44<22:09:50, 3.91it/s] 16%|█▌ | 59178/371472 [4:42:44<22:15:07, 3.90it/s] 16%|█▌ | 59179/371472 [4:42:44<22:26:10, 3.87it/s] 16%|█▌ | 59180/371472 [4:42:45<21:54:24, 3.96it/s] {'loss': 3.9504, 'learning_rate': 8.57026654312741e-07, 'epoch': 2.55} + 16%|█▌ | 59180/371472 [4:42:45<21:54:24, 3.96it/s] 16%|█▌ | 59181/371472 [4:42:45<24:28:52, 3.54it/s] 16%|█▌ | 59182/371472 [4:42:45<25:32:53, 3.40it/s] 16%|█▌ | 59183/371472 [4:42:45<24:41:05, 3.51it/s] 16%|█▌ | 59184/371472 [4:42:46<24:31:04, 3.54it/s] 16%|█▌ | 59185/371472 [4:42:46<23:37:19, 3.67it/s] 16%|█▌ | 59186/371472 [4:42:46<26:38:19, 3.26it/s] 16%|█▌ | 59187/371472 [4:42:47<26:58:18, 3.22it/s] 16%|█▌ | 59188/371472 [4:42:47<25:31:00, 3.40it/s] 16%|█▌ | 59189/371472 [4:42:47<24:27:05, 3.55it/s] 16%|█▌ | 59190/371472 [4:42:47<24:04:46, 3.60it/s] 16%|█▌ | 59191/371472 [4:42:48<23:12:32, 3.74it/s] 16%|█▌ | 59192/371472 [4:42:48<22:49:38, 3.80it/s] 16%|█▌ | 59193/371472 [4:42:48<22:24:14, 3.87it/s] 16%|█▌ | 59194/371472 [4:42:48<21:52:27, 3.97it/s] 16%|█▌ | 59195/371472 [4:42:49<21:42:52, 3.99it/s] 16%|█▌ | 59196/371472 [4:42:49<22:13:59, 3.90it/s] 16%|█▌ | 59197/371472 [4:42:49<22:35:50, 3.84it/s] 16%|█▌ | 59198/371472 [4:42:49<22:10:18, 3.91it/s] 16%|█▌ | 59199/371472 [4:42:50<23:40:08, 3.66it/s] 16%|█▌ | 59200/371472 [4:42:50<23:12:14, 3.74it/s] {'loss': 4.1956, 'learning_rate': 8.569781723372621e-07, 'epoch': 2.55} + 16%|█▌ | 59200/371472 [4:42:50<23:12:14, 3.74it/s] 16%|█▌ | 59201/371472 [4:42:50<25:01:20, 3.47it/s] 16%|█▌ | 59202/371472 [4:42:51<27:51:38, 3.11it/s] 16%|█▌ | 59203/371472 [4:42:51<25:55:59, 3.34it/s] 16%|█▌ | 59204/371472 [4:42:51<24:33:16, 3.53it/s] 16%|█▌ | 59205/371472 [4:42:52<27:33:52, 3.15it/s] 16%|█▌ | 59206/371472 [4:42:52<26:20:42, 3.29it/s] 16%|█▌ | 59207/371472 [4:42:52<26:10:32, 3.31it/s] 16%|█▌ | 59208/371472 [4:42:53<25:23:16, 3.42it/s] 16%|█▌ | 59209/371472 [4:42:53<24:41:33, 3.51it/s] 16%|█▌ | 59210/371472 [4:42:53<27:44:38, 3.13it/s] 16%|█▌ | 59211/371472 [4:42:53<25:41:57, 3.38it/s] 16%|█▌ | 59212/371472 [4:42:54<24:20:59, 3.56it/s] 16%|█▌ | 59213/371472 [4:42:54<24:23:45, 3.56it/s] 16%|█▌ | 59214/371472 [4:42:54<24:01:16, 3.61it/s] 16%|█▌ | 59215/371472 [4:42:54<23:22:03, 3.71it/s] 16%|█▌ | 59216/371472 [4:42:55<27:48:33, 3.12it/s] 16%|█▌ | 59217/371472 [4:42:55<26:02:31, 3.33it/s] 16%|█▌ | 59218/371472 [4:42:55<25:28:46, 3.40it/s] 16%|█▌ | 59219/371472 [4:42:56<25:42:57, 3.37it/s] 16%|█▌ | 59220/371472 [4:42:56<26:23:50, 3.29it/s] {'loss': 4.2037, 'learning_rate': 8.569296903617834e-07, 'epoch': 2.55} + 16%|█▌ | 59220/371472 [4:42:56<26:23:50, 3.29it/s] 16%|█▌ | 59221/371472 [4:42:56<25:50:39, 3.36it/s] 16%|█▌ | 59222/371472 [4:42:57<25:36:01, 3.39it/s] 16%|█▌ | 59223/371472 [4:42:57<24:33:37, 3.53it/s] 16%|█▌ | 59224/371472 [4:42:57<23:42:05, 3.66it/s] 16%|█▌ | 59225/371472 [4:42:57<22:44:36, 3.81it/s] 16%|█▌ | 59226/371472 [4:42:58<24:22:27, 3.56it/s] 16%|█▌ | 59227/371472 [4:42:58<23:32:48, 3.68it/s] 16%|█▌ | 59228/371472 [4:42:58<24:00:04, 3.61it/s] 16%|█▌ | 59229/371472 [4:42:58<23:34:36, 3.68it/s] 16%|█▌ | 59230/371472 [4:42:59<23:10:45, 3.74it/s] 16%|█▌ | 59231/371472 [4:42:59<23:08:01, 3.75it/s] 16%|█▌ | 59232/371472 [4:42:59<23:47:25, 3.65it/s] 16%|█▌ | 59233/371472 [4:43:00<26:23:03, 3.29it/s] 16%|█▌ | 59234/371472 [4:43:00<25:13:22, 3.44it/s] 16%|█▌ | 59235/371472 [4:43:00<24:58:04, 3.47it/s] 16%|█▌ | 59236/371472 [4:43:01<25:03:35, 3.46it/s] 16%|█▌ | 59237/371472 [4:43:01<25:39:22, 3.38it/s] 16%|█▌ | 59238/371472 [4:43:01<26:31:40, 3.27it/s] 16%|█▌ | 59239/371472 [4:43:01<26:46:32, 3.24it/s] 16%|█▌ | 59240/371472 [4:43:02<26:02:35, 3.33it/s] {'loss': 4.1132, 'learning_rate': 8.568812083863043e-07, 'epoch': 2.55} + 16%|█▌ | 59240/371472 [4:43:02<26:02:35, 3.33it/s] 16%|█▌ | 59241/371472 [4:43:02<25:08:58, 3.45it/s] 16%|█▌ | 59242/371472 [4:43:02<24:17:40, 3.57it/s] 16%|█▌ | 59243/371472 [4:43:03<25:11:10, 3.44it/s] 16%|█▌ | 59244/371472 [4:43:03<24:08:31, 3.59it/s] 16%|█▌ | 59245/371472 [4:43:03<23:39:32, 3.67it/s] 16%|█▌ | 59246/371472 [4:43:03<22:40:03, 3.83it/s] 16%|█▌ | 59247/371472 [4:43:04<22:41:06, 3.82it/s] 16%|█▌ | 59248/371472 [4:43:04<24:49:15, 3.49it/s] 16%|█▌ | 59249/371472 [4:43:04<23:53:33, 3.63it/s] 16%|█▌ | 59250/371472 [4:43:05<26:08:43, 3.32it/s] 16%|█▌ | 59251/371472 [4:43:05<25:13:14, 3.44it/s] 16%|█▌ | 59252/371472 [4:43:05<26:08:43, 3.32it/s] 16%|█▌ | 59253/371472 [4:43:05<24:43:09, 3.51it/s] 16%|█▌ | 59254/371472 [4:43:06<24:43:07, 3.51it/s] 16%|█▌ | 59255/371472 [4:43:06<24:10:10, 3.59it/s] 16%|█▌ | 59256/371472 [4:43:06<24:20:24, 3.56it/s] 16%|█▌ | 59257/371472 [4:43:07<25:09:18, 3.45it/s] 16%|█▌ | 59258/371472 [4:43:07<24:40:06, 3.52it/s] 16%|█▌ | 59259/371472 [4:43:07<24:54:33, 3.48it/s] 16%|█▌ | 59260/371472 [4:43:07<24:27:32, 3.55it/s] {'loss': 4.3493, 'learning_rate': 8.568327264108255e-07, 'epoch': 2.55} + 16%|█▌ | 59260/371472 [4:43:07<24:27:32, 3.55it/s] 16%|█▌ | 59261/371472 [4:43:08<27:26:17, 3.16it/s] 16%|█▌ | 59262/371472 [4:43:08<25:50:02, 3.36it/s] 16%|█▌ | 59263/371472 [4:43:08<25:39:27, 3.38it/s] 16%|█▌ | 59264/371472 [4:43:09<24:41:27, 3.51it/s] 16%|█▌ | 59265/371472 [4:43:09<25:27:02, 3.41it/s] 16%|█▌ | 59266/371472 [4:43:09<25:11:22, 3.44it/s] 16%|█▌ | 59267/371472 [4:43:10<27:07:30, 3.20it/s] 16%|█▌ | 59268/371472 [4:43:10<26:29:44, 3.27it/s] 16%|█▌ | 59269/371472 [4:43:10<25:17:34, 3.43it/s] 16%|█▌ | 59270/371472 [4:43:10<23:51:34, 3.63it/s] 16%|█▌ | 59271/371472 [4:43:11<24:02:47, 3.61it/s] 16%|█▌ | 59272/371472 [4:43:11<23:54:00, 3.63it/s] 16%|█▌ | 59273/371472 [4:43:11<23:33:24, 3.68it/s] 16%|█▌ | 59274/371472 [4:43:11<23:04:43, 3.76it/s] 16%|█▌ | 59275/371472 [4:43:12<22:29:36, 3.86it/s] 16%|█▌ | 59276/371472 [4:43:12<22:36:54, 3.83it/s] 16%|█▌ | 59277/371472 [4:43:12<23:24:42, 3.70it/s] 16%|█▌ | 59278/371472 [4:43:12<24:14:01, 3.58it/s] 16%|█▌ | 59279/371472 [4:43:13<23:44:57, 3.65it/s] 16%|█▌ | 59280/371472 [4:43:13<22:44:59, 3.81it/s] {'loss': 4.361, 'learning_rate': 8.567842444353466e-07, 'epoch': 2.55} + 16%|█▌ | 59280/371472 [4:43:13<22:44:59, 3.81it/s] 16%|█▌ | 59281/371472 [4:43:13<22:51:16, 3.79it/s] 16%|█▌ | 59282/371472 [4:43:14<23:57:09, 3.62it/s] 16%|█▌ | 59283/371472 [4:43:14<22:34:26, 3.84it/s] 16%|█▌ | 59284/371472 [4:43:14<22:59:48, 3.77it/s] 16%|█▌ | 59285/371472 [4:43:14<23:01:19, 3.77it/s] 16%|█▌ | 59286/371472 [4:43:15<22:53:13, 3.79it/s] 16%|█▌ | 59287/371472 [4:43:15<21:57:56, 3.95it/s] 16%|█▌ | 59288/371472 [4:43:15<23:05:22, 3.76it/s] 16%|█▌ | 59289/371472 [4:43:15<23:33:48, 3.68it/s] 16%|█▌ | 59290/371472 [4:43:16<24:04:03, 3.60it/s] 16%|█▌ | 59291/371472 [4:43:16<23:21:09, 3.71it/s] 16%|█▌ | 59292/371472 [4:43:16<22:58:45, 3.77it/s] 16%|█▌ | 59293/371472 [4:43:16<23:18:43, 3.72it/s] 16%|█▌ | 59294/371472 [4:43:17<24:03:54, 3.60it/s] 16%|█▌ | 59295/371472 [4:43:17<24:10:28, 3.59it/s] 16%|█▌ | 59296/371472 [4:43:17<24:35:13, 3.53it/s] 16%|█▌ | 59297/371472 [4:43:18<23:59:59, 3.61it/s] 16%|█▌ | 59298/371472 [4:43:18<23:48:48, 3.64it/s] 16%|█▌ | 59299/371472 [4:43:18<22:52:24, 3.79it/s] 16%|█▌ | 59300/371472 [4:43:18<23:23:22, 3.71it/s] {'loss': 4.1195, 'learning_rate': 8.567357624598676e-07, 'epoch': 2.55} + 16%|█▌ | 59300/371472 [4:43:18<23:23:22, 3.71it/s] 16%|█▌ | 59301/371472 [4:43:19<27:39:41, 3.13it/s] 16%|█▌ | 59302/371472 [4:43:19<26:19:35, 3.29it/s] 16%|█▌ | 59303/371472 [4:43:19<25:46:57, 3.36it/s] 16%|█▌ | 59304/371472 [4:43:20<26:15:01, 3.30it/s] 16%|█▌ | 59305/371472 [4:43:20<26:05:43, 3.32it/s] 16%|█▌ | 59306/371472 [4:43:20<26:22:36, 3.29it/s] 16%|█▌ | 59307/371472 [4:43:21<27:12:08, 3.19it/s] 16%|█▌ | 59308/371472 [4:43:21<25:53:35, 3.35it/s] 16%|█▌ | 59309/371472 [4:43:21<24:47:16, 3.50it/s] 16%|█▌ | 59310/371472 [4:43:21<23:52:42, 3.63it/s] 16%|█▌ | 59311/371472 [4:43:22<23:21:52, 3.71it/s] 16%|█▌ | 59312/371472 [4:43:22<23:00:57, 3.77it/s] 16%|█▌ | 59313/371472 [4:43:22<22:26:43, 3.86it/s] 16%|█▌ | 59314/371472 [4:43:22<22:29:52, 3.85it/s] 16%|█▌ | 59315/371472 [4:43:23<22:20:35, 3.88it/s] 16%|█▌ | 59316/371472 [4:43:23<21:59:25, 3.94it/s] 16%|█▌ | 59317/371472 [4:43:23<23:33:06, 3.68it/s] 16%|█▌ | 59318/371472 [4:43:24<24:05:39, 3.60it/s] 16%|█▌ | 59319/371472 [4:43:24<24:55:32, 3.48it/s] 16%|█▌ | 59320/371472 [4:43:24<23:35:29, 3.68it/s] {'loss': 4.1201, 'learning_rate': 8.566872804843887e-07, 'epoch': 2.56} + 16%|█▌ | 59320/371472 [4:43:24<23:35:29, 3.68it/s] 16%|█▌ | 59321/371472 [4:43:24<24:53:50, 3.48it/s] 16%|█▌ | 59322/371472 [4:43:25<24:03:11, 3.60it/s] 16%|█▌ | 59323/371472 [4:43:25<26:03:12, 3.33it/s] 16%|█▌ | 59324/371472 [4:43:25<24:38:03, 3.52it/s] 16%|█▌ | 59325/371472 [4:43:26<23:56:00, 3.62it/s] 16%|█▌ | 59326/371472 [4:43:26<24:10:24, 3.59it/s] 16%|█▌ | 59327/371472 [4:43:26<24:48:10, 3.50it/s] 16%|█▌ | 59328/371472 [4:43:26<25:30:54, 3.40it/s] 16%|█▌ | 59329/371472 [4:43:27<23:54:40, 3.63it/s] 16%|█▌ | 59330/371472 [4:43:27<23:06:05, 3.75it/s] 16%|█▌ | 59331/371472 [4:43:27<23:14:33, 3.73it/s] 16%|█▌ | 59332/371472 [4:43:27<24:05:24, 3.60it/s] 16%|█▌ | 59333/371472 [4:43:28<24:23:12, 3.56it/s] 16%|█▌ | 59334/371472 [4:43:28<24:09:41, 3.59it/s] 16%|█▌ | 59335/371472 [4:43:28<25:57:00, 3.34it/s] 16%|█▌ | 59336/371472 [4:43:29<24:44:42, 3.50it/s] 16%|█▌ | 59337/371472 [4:43:29<24:23:00, 3.56it/s] 16%|█▌ | 59338/371472 [4:43:29<23:29:37, 3.69it/s] 16%|█▌ | 59339/371472 [4:43:29<23:10:45, 3.74it/s] 16%|█▌ | 59340/371472 [4:43:30<23:32:17, 3.68it/s] {'loss': 4.3441, 'learning_rate': 8.566387985089098e-07, 'epoch': 2.56} + 16%|█▌ | 59340/371472 [4:43:30<23:32:17, 3.68it/s] 16%|█▌ | 59341/371472 [4:43:30<24:39:11, 3.52it/s] 16%|█▌ | 59342/371472 [4:43:30<24:28:25, 3.54it/s] 16%|█▌ | 59343/371472 [4:43:31<23:52:02, 3.63it/s] 16%|█▌ | 59344/371472 [4:43:31<24:04:01, 3.60it/s] 16%|█▌ | 59345/371472 [4:43:31<24:08:29, 3.59it/s] 16%|█▌ | 59346/371472 [4:43:31<24:40:43, 3.51it/s] 16%|█▌ | 59347/371472 [4:43:32<23:48:44, 3.64it/s] 16%|█▌ | 59348/371472 [4:43:32<26:03:20, 3.33it/s] 16%|█▌ | 59349/371472 [4:43:32<25:53:35, 3.35it/s] 16%|█▌ | 59350/371472 [4:43:33<26:03:56, 3.33it/s] 16%|█▌ | 59351/371472 [4:43:33<26:30:43, 3.27it/s] 16%|█▌ | 59352/371472 [4:43:33<25:37:16, 3.38it/s] 16%|█▌ | 59353/371472 [4:43:34<25:43:39, 3.37it/s] 16%|█▌ | 59354/371472 [4:43:34<25:02:52, 3.46it/s] 16%|█▌ | 59355/371472 [4:43:34<26:09:44, 3.31it/s] 16%|█▌ | 59356/371472 [4:43:34<26:10:55, 3.31it/s] 16%|█▌ | 59357/371472 [4:43:35<25:13:17, 3.44it/s] 16%|█▌ | 59358/371472 [4:43:35<24:14:33, 3.58it/s] 16%|█▌ | 59359/371472 [4:43:35<24:54:40, 3.48it/s] 16%|█▌ | 59360/371472 [4:43:35<24:07:46, 3.59it/s] {'loss': 4.1668, 'learning_rate': 8.56590316533431e-07, 'epoch': 2.56} + 16%|█▌ | 59360/371472 [4:43:35<24:07:46, 3.59it/s] 16%|█▌ | 59361/371472 [4:43:36<23:49:07, 3.64it/s] 16%|█▌ | 59362/371472 [4:43:36<23:10:36, 3.74it/s] 16%|█▌ | 59363/371472 [4:43:36<22:21:13, 3.88it/s] 16%|█▌ | 59364/371472 [4:43:37<24:58:09, 3.47it/s] 16%|█▌ | 59365/371472 [4:43:37<23:40:01, 3.66it/s] 16%|█▌ | 59366/371472 [4:43:37<23:33:12, 3.68it/s] 16%|█▌ | 59367/371472 [4:43:37<22:27:40, 3.86it/s] 16%|█▌ | 59368/371472 [4:43:38<22:34:39, 3.84it/s] 16%|█▌ | 59369/371472 [4:43:38<24:16:46, 3.57it/s] 16%|█▌ | 59370/371472 [4:43:38<24:12:59, 3.58it/s] 16%|█▌ | 59371/371472 [4:43:38<24:03:03, 3.60it/s] 16%|█▌ | 59372/371472 [4:43:39<24:12:48, 3.58it/s] 16%|█▌ | 59373/371472 [4:43:39<23:26:59, 3.70it/s] 16%|█▌ | 59374/371472 [4:43:39<25:59:15, 3.34it/s] 16%|█▌ | 59375/371472 [4:43:40<24:15:32, 3.57it/s] 16%|█▌ | 59376/371472 [4:43:40<23:22:12, 3.71it/s] 16%|█▌ | 59377/371472 [4:43:40<23:56:49, 3.62it/s] 16%|█▌ | 59378/371472 [4:43:40<24:15:52, 3.57it/s] 16%|█▌ | 59379/371472 [4:43:41<23:57:46, 3.62it/s] 16%|█▌ | 59380/371472 [4:43:41<24:15:30, 3.57it/s] {'loss': 4.391, 'learning_rate': 8.565418345579521e-07, 'epoch': 2.56} + 16%|█▌ | 59380/371472 [4:43:41<24:15:30, 3.57it/s] 16%|█▌ | 59381/371472 [4:43:41<24:22:46, 3.56it/s] 16%|█▌ | 59382/371472 [4:43:42<23:42:13, 3.66it/s] 16%|█▌ | 59383/371472 [4:43:42<23:14:06, 3.73it/s] 16%|█▌ | 59384/371472 [4:43:42<24:08:23, 3.59it/s] 16%|█▌ | 59385/371472 [4:43:42<26:56:52, 3.22it/s] 16%|█▌ | 59386/371472 [4:43:43<25:55:26, 3.34it/s] 16%|█▌ | 59387/371472 [4:43:43<25:14:18, 3.43it/s] 16%|█▌ | 59388/371472 [4:43:43<24:26:18, 3.55it/s] 16%|█▌ | 59389/371472 [4:43:44<23:37:43, 3.67it/s] 16%|█▌ | 59390/371472 [4:43:44<24:00:43, 3.61it/s] 16%|█▌ | 59391/371472 [4:43:44<23:26:30, 3.70it/s] 16%|█▌ | 59392/371472 [4:43:44<23:26:14, 3.70it/s] 16%|█▌ | 59393/371472 [4:43:45<25:01:45, 3.46it/s] 16%|█▌ | 59394/371472 [4:43:45<24:34:46, 3.53it/s] 16%|█▌ | 59395/371472 [4:43:45<25:01:26, 3.46it/s] 16%|█▌ | 59396/371472 [4:43:46<24:59:53, 3.47it/s] 16%|█▌ | 59397/371472 [4:43:46<23:47:58, 3.64it/s] 16%|█▌ | 59398/371472 [4:43:46<23:51:05, 3.63it/s] 16%|█▌ | 59399/371472 [4:43:46<23:58:17, 3.62it/s] 16%|█▌ | 59400/371472 [4:43:47<23:17:01, 3.72it/s] {'loss': 4.1836, 'learning_rate': 8.564933525824732e-07, 'epoch': 2.56} + 16%|█▌ | 59400/371472 [4:43:47<23:17:01, 3.72it/s] 16%|█▌ | 59401/371472 [4:43:47<24:05:41, 3.60it/s] 16%|█▌ | 59402/371472 [4:43:47<22:52:02, 3.79it/s] 16%|█▌ | 59403/371472 [4:43:47<22:29:07, 3.86it/s] 16%|█▌ | 59404/371472 [4:43:48<22:30:43, 3.85it/s] 16%|█▌ | 59405/371472 [4:43:48<22:30:10, 3.85it/s] 16%|█▌ | 59406/371472 [4:43:48<22:25:30, 3.87it/s] 16%|█▌ | 59407/371472 [4:43:48<22:53:03, 3.79it/s] 16%|█▌ | 59408/371472 [4:43:49<25:17:14, 3.43it/s] 16%|█▌ | 59409/371472 [4:43:49<24:55:14, 3.48it/s] 16%|█▌ | 59410/371472 [4:43:49<27:06:21, 3.20it/s] 16%|█▌ | 59411/371472 [4:43:50<25:40:15, 3.38it/s] 16%|█▌ | 59412/371472 [4:43:50<24:12:54, 3.58it/s] 16%|█▌ | 59413/371472 [4:43:50<23:56:55, 3.62it/s] 16%|█▌ | 59414/371472 [4:43:50<23:52:21, 3.63it/s] 16%|█▌ | 59415/371472 [4:43:51<23:31:59, 3.68it/s] 16%|█▌ | 59416/371472 [4:43:51<23:00:15, 3.77it/s] 16%|█▌ | 59417/371472 [4:43:51<22:24:48, 3.87it/s] 16%|█▌ | 59418/371472 [4:43:52<24:12:09, 3.58it/s] 16%|█▌ | 59419/371472 [4:43:52<24:33:16, 3.53it/s] 16%|█▌ | 59420/371472 [4:43:52<24:55:36, 3.48it/s] {'loss': 4.0879, 'learning_rate': 8.564448706069943e-07, 'epoch': 2.56} + 16%|█▌ | 59420/371472 [4:43:52<24:55:36, 3.48it/s] 16%|█▌ | 59421/371472 [4:43:52<24:26:24, 3.55it/s] 16%|█▌ | 59422/371472 [4:43:53<24:03:10, 3.60it/s] 16%|█▌ | 59423/371472 [4:43:53<24:08:30, 3.59it/s] 16%|█▌ | 59424/371472 [4:43:53<23:40:16, 3.66it/s] 16%|█▌ | 59425/371472 [4:43:54<24:31:33, 3.53it/s] 16%|█▌ | 59426/371472 [4:43:54<24:24:48, 3.55it/s] 16%|█▌ | 59427/371472 [4:43:54<23:39:01, 3.67it/s] 16%|█▌ | 59428/371472 [4:43:54<23:29:36, 3.69it/s] 16%|█▌ | 59429/371472 [4:43:55<23:15:05, 3.73it/s] 16%|█▌ | 59430/371472 [4:43:55<24:15:26, 3.57it/s] 16%|█▌ | 59431/371472 [4:43:55<24:16:20, 3.57it/s] 16%|█▌ | 59432/371472 [4:43:55<24:28:49, 3.54it/s] 16%|█▌ | 59433/371472 [4:43:56<23:51:11, 3.63it/s] 16%|█▌ | 59434/371472 [4:43:56<23:27:12, 3.70it/s] 16%|█▌ | 59435/371472 [4:43:56<22:55:48, 3.78it/s] 16%|█▌ | 59436/371472 [4:43:56<22:56:21, 3.78it/s] 16%|█▌ | 59437/371472 [4:43:57<22:29:49, 3.85it/s] 16%|█▌ | 59438/371472 [4:43:57<22:34:10, 3.84it/s] 16%|█▌ | 59439/371472 [4:43:57<21:52:11, 3.96it/s] 16%|█▌ | 59440/371472 [4:43:57<22:07:06, 3.92it/s] {'loss': 4.187, 'learning_rate': 8.563963886315154e-07, 'epoch': 2.56} + 16%|█▌ | 59440/371472 [4:43:57<22:07:06, 3.92it/s] 16%|█▌ | 59441/371472 [4:43:58<24:38:01, 3.52it/s] 16%|█▌ | 59442/371472 [4:43:58<24:00:57, 3.61it/s] 16%|█▌ | 59443/371472 [4:43:58<24:35:24, 3.52it/s] 16%|█▌ | 59444/371472 [4:43:59<24:16:58, 3.57it/s] 16%|█▌ | 59445/371472 [4:43:59<25:28:44, 3.40it/s] 16%|█▌ | 59446/371472 [4:43:59<25:07:40, 3.45it/s] 16%|█▌ | 59447/371472 [4:44:00<24:56:40, 3.47it/s] 16%|█▌ | 59448/371472 [4:44:00<24:33:17, 3.53it/s] 16%|█▌ | 59449/371472 [4:44:00<24:03:45, 3.60it/s] 16%|█▌ | 59450/371472 [4:44:00<25:23:02, 3.41it/s] 16%|█▌ | 59451/371472 [4:44:01<24:13:04, 3.58it/s] 16%|█▌ | 59452/371472 [4:44:01<24:34:21, 3.53it/s] 16%|█▌ | 59453/371472 [4:44:01<25:43:17, 3.37it/s] 16%|█▌ | 59454/371472 [4:44:02<25:08:17, 3.45it/s] 16%|█▌ | 59455/371472 [4:44:02<27:01:51, 3.21it/s] 16%|█▌ | 59456/371472 [4:44:02<27:41:09, 3.13it/s] 16%|█▌ | 59457/371472 [4:44:03<25:54:15, 3.35it/s] 16%|█▌ | 59458/371472 [4:44:03<26:37:11, 3.26it/s] 16%|█▌ | 59459/371472 [4:44:03<26:06:49, 3.32it/s] 16%|█▌ | 59460/371472 [4:44:03<24:47:19, 3.50it/s] {'loss': 4.1479, 'learning_rate': 8.563479066560365e-07, 'epoch': 2.56} + 16%|█▌ | 59460/371472 [4:44:03<24:47:19, 3.50it/s] 16%|█▌ | 59461/371472 [4:44:04<25:05:25, 3.45it/s] 16%|█▌ | 59462/371472 [4:44:04<26:58:48, 3.21it/s] 16%|█▌ | 59463/371472 [4:44:04<26:54:17, 3.22it/s] 16%|█▌ | 59464/371472 [4:44:05<25:09:30, 3.44it/s] 16%|█▌ | 59465/371472 [4:44:05<23:44:58, 3.65it/s] 16%|█▌ | 59466/371472 [4:44:05<22:50:29, 3.79it/s] 16%|█▌ | 59467/371472 [4:44:05<22:29:12, 3.85it/s] 16%|█▌ | 59468/371472 [4:44:06<22:20:14, 3.88it/s] 16%|█▌ | 59469/371472 [4:44:06<23:58:27, 3.62it/s] 16%|█▌ | 59470/371472 [4:44:06<23:06:51, 3.75it/s] 16%|█▌ | 59471/371472 [4:44:06<25:18:34, 3.42it/s] 16%|█▌ | 59472/371472 [4:44:07<24:02:37, 3.60it/s] 16%|█▌ | 59473/371472 [4:44:07<23:03:25, 3.76it/s] 16%|█▌ | 59474/371472 [4:44:07<22:53:16, 3.79it/s] 16%|█▌ | 59475/371472 [4:44:07<22:36:04, 3.83it/s] 16%|█▌ | 59476/371472 [4:44:08<22:11:38, 3.90it/s] 16%|█▌ | 59477/371472 [4:44:08<23:05:39, 3.75it/s] 16%|█▌ | 59478/371472 [4:44:08<23:34:18, 3.68it/s] 16%|█▌ | 59479/371472 [4:44:09<23:42:13, 3.66it/s] 16%|█▌ | 59480/371472 [4:44:09<23:04:54, 3.75it/s] {'loss': 4.1809, 'learning_rate': 8.562994246805576e-07, 'epoch': 2.56} + 16%|█▌ | 59480/371472 [4:44:09<23:04:54, 3.75it/s] 16%|█▌ | 59481/371472 [4:44:09<22:49:38, 3.80it/s] 16%|█▌ | 59482/371472 [4:44:09<23:39:46, 3.66it/s] 16%|█▌ | 59483/371472 [4:44:10<23:38:59, 3.66it/s] 16%|█▌ | 59484/371472 [4:44:10<23:49:00, 3.64it/s] 16%|█▌ | 59485/371472 [4:44:10<24:09:10, 3.59it/s] 16%|█▌ | 59486/371472 [4:44:11<25:46:45, 3.36it/s] 16%|█▌ | 59487/371472 [4:44:11<25:13:24, 3.44it/s] 16%|█▌ | 59488/371472 [4:44:11<24:07:46, 3.59it/s] 16%|█▌ | 59489/371472 [4:44:11<23:51:45, 3.63it/s] 16%|█▌ | 59490/371472 [4:44:12<23:00:23, 3.77it/s] 16%|█▌ | 59491/371472 [4:44:12<22:03:48, 3.93it/s] 16%|█▌ | 59492/371472 [4:44:12<23:02:06, 3.76it/s] 16%|█▌ | 59493/371472 [4:44:12<23:50:03, 3.64it/s] 16%|█▌ | 59494/371472 [4:44:13<24:10:32, 3.58it/s] 16%|█▌ | 59495/371472 [4:44:13<23:47:28, 3.64it/s] 16%|█▌ | 59496/371472 [4:44:13<23:29:19, 3.69it/s] 16%|█▌ | 59497/371472 [4:44:14<23:29:33, 3.69it/s] 16%|█▌ | 59498/371472 [4:44:14<24:16:32, 3.57it/s] 16%|█▌ | 59499/371472 [4:44:14<24:18:24, 3.57it/s] 16%|█▌ | 59500/371472 [4:44:14<24:28:27, 3.54it/s] {'loss': 4.191, 'learning_rate': 8.562509427050787e-07, 'epoch': 2.56} + 16%|█▌ | 59500/371472 [4:44:14<24:28:27, 3.54it/s] 16%|█▌ | 59501/371472 [4:44:15<24:59:33, 3.47it/s] 16%|█▌ | 59502/371472 [4:44:15<27:25:43, 3.16it/s] 16%|█▌ | 59503/371472 [4:44:15<26:17:11, 3.30it/s] 16%|█▌ | 59504/371472 [4:44:16<28:32:40, 3.04it/s] 16%|█▌ | 59505/371472 [4:44:16<27:03:57, 3.20it/s] 16%|█▌ | 59506/371472 [4:44:16<27:09:42, 3.19it/s] 16%|█▌ | 59507/371472 [4:44:17<26:34:19, 3.26it/s] 16%|█▌ | 59508/371472 [4:44:17<25:53:15, 3.35it/s] 16%|█▌ | 59509/371472 [4:44:17<25:02:31, 3.46it/s] 16%|█▌ | 59510/371472 [4:44:17<24:13:30, 3.58it/s] 16%|█▌ | 59511/371472 [4:44:18<24:13:45, 3.58it/s] 16%|█▌ | 59512/371472 [4:44:18<23:03:32, 3.76it/s] 16%|█▌ | 59513/371472 [4:44:18<23:56:56, 3.62it/s] 16%|█▌ | 59514/371472 [4:44:18<23:47:13, 3.64it/s] 16%|█▌ | 59515/371472 [4:44:19<23:16:03, 3.72it/s] 16%|█▌ | 59516/371472 [4:44:19<24:03:42, 3.60it/s] 16%|█▌ | 59517/371472 [4:44:19<23:56:47, 3.62it/s] 16%|█▌ | 59518/371472 [4:44:20<22:47:37, 3.80it/s] 16%|█▌ | 59519/371472 [4:44:20<22:29:42, 3.85it/s] 16%|█▌ | 59520/371472 [4:44:20<24:07:14, 3.59it/s] {'loss': 4.4552, 'learning_rate': 8.562024607295999e-07, 'epoch': 2.56} + 16%|█▌ | 59520/371472 [4:44:20<24:07:14, 3.59it/s] 16%|█▌ | 59521/371472 [4:44:20<23:12:57, 3.73it/s] 16%|█▌ | 59522/371472 [4:44:21<22:50:28, 3.79it/s] 16%|█▌ | 59523/371472 [4:44:21<22:11:10, 3.91it/s] 16%|█▌ | 59524/371472 [4:44:21<21:38:11, 4.00it/s] 16%|█▌ | 59525/371472 [4:44:21<21:48:26, 3.97it/s] 16%|█▌ | 59526/371472 [4:44:22<22:06:52, 3.92it/s] 16%|█▌ | 59527/371472 [4:44:22<22:06:17, 3.92it/s] 16%|█▌ | 59528/371472 [4:44:22<22:11:08, 3.91it/s] 16%|█▌ | 59529/371472 [4:44:22<22:12:44, 3.90it/s] 16%|█▌ | 59530/371472 [4:44:23<23:22:18, 3.71it/s] 16%|█▌ | 59531/371472 [4:44:23<25:28:13, 3.40it/s] 16%|█▌ | 59532/371472 [4:44:23<25:21:19, 3.42it/s] 16%|█▌ | 59533/371472 [4:44:24<25:16:53, 3.43it/s] 16%|█▌ | 59534/371472 [4:44:24<25:44:21, 3.37it/s] 16%|█▌ | 59535/371472 [4:44:24<25:01:43, 3.46it/s] 16%|█▌ | 59536/371472 [4:44:24<24:22:45, 3.55it/s] 16%|█▌ | 59537/371472 [4:44:25<23:59:15, 3.61it/s] 16%|█▌ | 59538/371472 [4:44:25<24:25:58, 3.55it/s] 16%|█▌ | 59539/371472 [4:44:25<24:12:08, 3.58it/s] 16%|█▌ | 59540/371472 [4:44:26<24:42:59, 3.51it/s] {'loss': 4.3285, 'learning_rate': 8.56153978754121e-07, 'epoch': 2.56} + 16%|█▌ | 59540/371472 [4:44:26<24:42:59, 3.51it/s] 16%|█▌ | 59541/371472 [4:44:26<23:46:26, 3.64it/s] 16%|█▌ | 59542/371472 [4:44:26<23:50:32, 3.63it/s] 16%|█▌ | 59543/371472 [4:44:26<23:09:50, 3.74it/s] 16%|█▌ | 59544/371472 [4:44:27<22:32:46, 3.84it/s] 16%|█▌ | 59545/371472 [4:44:27<21:52:49, 3.96it/s] 16%|█▌ | 59546/371472 [4:44:27<21:59:32, 3.94it/s] 16%|█▌ | 59547/371472 [4:44:27<22:04:08, 3.93it/s] 16%|█▌ | 59548/371472 [4:44:28<22:26:42, 3.86it/s] 16%|█▌ | 59549/371472 [4:44:28<23:45:25, 3.65it/s] 16%|█▌ | 59550/371472 [4:44:28<25:10:54, 3.44it/s] 16%|█▌ | 59551/371472 [4:44:29<24:40:58, 3.51it/s] 16%|█▌ | 59552/371472 [4:44:29<23:53:29, 3.63it/s] 16%|█▌ | 59553/371472 [4:44:29<23:33:47, 3.68it/s] 16%|█▌ | 59554/371472 [4:44:29<24:25:53, 3.55it/s] 16%|█▌ | 59555/371472 [4:44:30<24:25:07, 3.55it/s] 16%|█▌ | 59556/371472 [4:44:30<23:46:49, 3.64it/s] 16%|█▌ | 59557/371472 [4:44:30<22:56:09, 3.78it/s] 16%|█▌ | 59558/371472 [4:44:30<23:15:49, 3.72it/s] 16%|█▌ | 59559/371472 [4:44:31<24:01:18, 3.61it/s] 16%|█▌ | 59560/371472 [4:44:31<23:58:42, 3.61it/s] {'loss': 4.2826, 'learning_rate': 8.56105496778642e-07, 'epoch': 2.57} + 16%|█▌ | 59560/371472 [4:44:31<23:58:42, 3.61it/s] 16%|█▌ | 59561/371472 [4:44:31<25:41:29, 3.37it/s] 16%|█▌ | 59562/371472 [4:44:32<24:21:32, 3.56it/s] 16%|█▌ | 59563/371472 [4:44:32<23:43:35, 3.65it/s] 16%|█▌ | 59564/371472 [4:44:32<23:30:12, 3.69it/s] 16%|█▌ | 59565/371472 [4:44:32<23:39:41, 3.66it/s] 16%|█▌ | 59566/371472 [4:44:33<23:50:41, 3.63it/s] 16%|█▌ | 59567/371472 [4:44:33<23:39:48, 3.66it/s] 16%|█▌ | 59568/371472 [4:44:33<23:56:42, 3.62it/s] 16%|█▌ | 59569/371472 [4:44:34<25:21:10, 3.42it/s] 16%|█▌ | 59570/371472 [4:44:34<25:24:54, 3.41it/s] 16%|█▌ | 59571/371472 [4:44:34<24:50:55, 3.49it/s] 16%|█▌ | 59572/371472 [4:44:34<24:59:44, 3.47it/s] 16%|█▌ | 59573/371472 [4:44:35<23:30:33, 3.69it/s] 16%|█▌ | 59574/371472 [4:44:35<22:37:59, 3.83it/s] 16%|█▌ | 59575/371472 [4:44:35<23:58:38, 3.61it/s] 16%|█▌ | 59576/371472 [4:44:36<24:58:03, 3.47it/s] 16%|█▌ | 59577/371472 [4:44:36<25:09:58, 3.44it/s] 16%|█▌ | 59578/371472 [4:44:36<24:15:26, 3.57it/s] 16%|█▌ | 59579/371472 [4:44:36<24:01:32, 3.61it/s] 16%|█▌ | 59580/371472 [4:44:37<23:25:14, 3.70it/s] {'loss': 4.0214, 'learning_rate': 8.560570148031631e-07, 'epoch': 2.57} + 16%|█▌ | 59580/371472 [4:44:37<23:25:14, 3.70it/s] 16%|█▌ | 59581/371472 [4:44:37<22:57:40, 3.77it/s] 16%|█▌ | 59582/371472 [4:44:37<22:37:01, 3.83it/s] 16%|█▌ | 59583/371472 [4:44:37<24:03:22, 3.60it/s] 16%|█▌ | 59584/371472 [4:44:38<24:00:59, 3.61it/s] 16%|█▌ | 59585/371472 [4:44:38<23:36:40, 3.67it/s] 16%|█▌ | 59586/371472 [4:44:38<23:53:30, 3.63it/s] 16%|█▌ | 59587/371472 [4:44:39<24:04:51, 3.60it/s] 16%|█▌ | 59588/371472 [4:44:39<23:18:17, 3.72it/s] 16%|█▌ | 59589/371472 [4:44:39<22:41:44, 3.82it/s] 16%|█▌ | 59590/371472 [4:44:39<24:41:37, 3.51it/s] 16%|█▌ | 59591/371472 [4:44:40<24:46:10, 3.50it/s] 16%|█▌ | 59592/371472 [4:44:40<25:21:33, 3.42it/s] 16%|█▌ | 59593/371472 [4:44:40<26:29:24, 3.27it/s] 16%|█▌ | 59594/371472 [4:44:41<25:20:34, 3.42it/s] 16%|█▌ | 59595/371472 [4:44:41<25:26:02, 3.41it/s] 16%|█▌ | 59596/371472 [4:44:41<25:11:01, 3.44it/s] 16%|█▌ | 59597/371472 [4:44:41<24:27:24, 3.54it/s] 16%|█▌ | 59598/371472 [4:44:42<23:48:10, 3.64it/s] 16%|█▌ | 59599/371472 [4:44:42<24:39:36, 3.51it/s] 16%|█▌ | 59600/371472 [4:44:42<25:12:38, 3.44it/s] {'loss': 4.0768, 'learning_rate': 8.560085328276842e-07, 'epoch': 2.57} + 16%|█▌ | 59600/371472 [4:44:42<25:12:38, 3.44it/s] 16%|█▌ | 59601/371472 [4:44:43<27:39:30, 3.13it/s] 16%|█▌ | 59602/371472 [4:44:43<26:22:46, 3.28it/s] 16%|█▌ | 59603/371472 [4:44:43<27:44:42, 3.12it/s] 16%|█▌ | 59604/371472 [4:44:44<26:22:11, 3.29it/s] 16%|█▌ | 59605/371472 [4:44:44<24:44:55, 3.50it/s] 16%|█▌ | 59606/371472 [4:44:44<23:38:35, 3.66it/s] 16%|█▌ | 59607/371472 [4:44:44<25:14:20, 3.43it/s] 16%|█▌ | 59608/371472 [4:44:45<24:50:01, 3.49it/s] 16%|█▌ | 59609/371472 [4:44:45<26:41:03, 3.25it/s] 16%|█▌ | 59610/371472 [4:44:45<25:29:42, 3.40it/s] 16%|█▌ | 59611/371472 [4:44:45<24:15:42, 3.57it/s] 16%|█▌ | 59612/371472 [4:44:46<23:27:12, 3.69it/s] 16%|█▌ | 59613/371472 [4:44:46<24:15:29, 3.57it/s] 16%|█▌ | 59614/371472 [4:44:46<24:37:29, 3.52it/s] 16%|█▌ | 59615/371472 [4:44:47<24:27:33, 3.54it/s] 16%|█▌ | 59616/371472 [4:44:47<23:43:37, 3.65it/s] 16%|█▌ | 59617/371472 [4:44:47<23:09:02, 3.74it/s] 16%|█▌ | 59618/371472 [4:44:47<23:21:45, 3.71it/s] 16%|█▌ | 59619/371472 [4:44:48<24:17:34, 3.57it/s] 16%|█▌ | 59620/371472 [4:44:48<27:35:33, 3.14it/s] {'loss': 4.1235, 'learning_rate': 8.559600508522053e-07, 'epoch': 2.57} + 16%|█▌ | 59620/371472 [4:44:48<27:35:33, 3.14it/s] 16%|█▌ | 59621/371472 [4:44:48<27:11:48, 3.19it/s] 16%|█▌ | 59622/371472 [4:44:49<25:20:20, 3.42it/s] 16%|█▌ | 59623/371472 [4:44:49<24:27:37, 3.54it/s] 16%|█▌ | 59624/371472 [4:44:49<24:16:21, 3.57it/s] 16%|█▌ | 59625/371472 [4:44:49<24:33:23, 3.53it/s] 16%|█▌ | 59626/371472 [4:44:50<23:19:54, 3.71it/s] 16%|█▌ | 59627/371472 [4:44:50<23:11:54, 3.73it/s] 16%|█▌ | 59628/371472 [4:44:50<23:57:17, 3.62it/s] 16%|█▌ | 59629/371472 [4:44:51<25:07:01, 3.45it/s] 16%|█▌ | 59630/371472 [4:44:51<24:50:04, 3.49it/s] 16%|█▌ | 59631/371472 [4:44:51<25:57:54, 3.34it/s] 16%|█▌ | 59632/371472 [4:44:52<26:00:28, 3.33it/s] 16%|█▌ | 59633/371472 [4:44:52<24:53:32, 3.48it/s] 16%|█▌ | 59634/371472 [4:44:52<25:52:51, 3.35it/s] 16%|█▌ | 59635/371472 [4:44:52<24:41:54, 3.51it/s] 16%|█▌ | 59636/371472 [4:44:53<24:16:35, 3.57it/s] 16%|█▌ | 59637/371472 [4:44:53<23:59:49, 3.61it/s] 16%|█▌ | 59638/371472 [4:44:53<24:05:00, 3.60it/s] 16%|█▌ | 59639/371472 [4:44:53<25:09:57, 3.44it/s] 16%|█▌ | 59640/371472 [4:44:54<24:17:24, 3.57it/s] {'loss': 4.2986, 'learning_rate': 8.559115688767264e-07, 'epoch': 2.57} + 16%|█▌ | 59640/371472 [4:44:54<24:17:24, 3.57it/s] 16%|█▌ | 59641/371472 [4:44:54<25:03:10, 3.46it/s] 16%|█▌ | 59642/371472 [4:44:54<23:54:09, 3.62it/s] 16%|█▌ | 59643/371472 [4:44:55<23:41:11, 3.66it/s] 16%|█▌ | 59644/371472 [4:44:55<25:57:57, 3.34it/s] 16%|█▌ | 59645/371472 [4:44:55<24:59:09, 3.47it/s] 16%|█▌ | 59646/371472 [4:44:55<25:01:32, 3.46it/s] 16%|█▌ | 59647/371472 [4:44:56<25:31:23, 3.39it/s] 16%|█▌ | 59648/371472 [4:44:56<24:36:49, 3.52it/s] 16%|█▌ | 59649/371472 [4:44:56<24:23:31, 3.55it/s] 16%|█▌ | 59650/371472 [4:44:57<25:18:25, 3.42it/s] 16%|█▌ | 59651/371472 [4:44:57<24:38:14, 3.52it/s] 16%|█▌ | 59652/371472 [4:44:57<24:16:41, 3.57it/s] 16%|█▌ | 59653/371472 [4:44:57<22:53:34, 3.78it/s] 16%|█▌ | 59654/371472 [4:44:58<23:01:31, 3.76it/s] 16%|█▌ | 59655/371472 [4:44:58<23:01:35, 3.76it/s] 16%|█▌ | 59656/371472 [4:44:58<23:13:01, 3.73it/s] 16%|█▌ | 59657/371472 [4:44:59<24:31:23, 3.53it/s] 16%|█▌ | 59658/371472 [4:44:59<24:25:14, 3.55it/s] 16%|█▌ | 59659/371472 [4:44:59<25:41:31, 3.37it/s] 16%|█▌ | 59660/371472 [4:44:59<24:28:27, 3.54it/s] {'loss': 3.9553, 'learning_rate': 8.558630869012476e-07, 'epoch': 2.57} + 16%|█▌ | 59660/371472 [4:44:59<24:28:27, 3.54it/s] 16%|█▌ | 59661/371472 [4:45:00<23:26:39, 3.69it/s] 16%|█▌ | 59662/371472 [4:45:00<22:37:03, 3.83it/s] 16%|█▌ | 59663/371472 [4:45:00<22:58:17, 3.77it/s] 16%|█▌ | 59664/371472 [4:45:00<23:14:26, 3.73it/s] 16%|█▌ | 59665/371472 [4:45:01<24:01:48, 3.60it/s] 16%|█▌ | 59666/371472 [4:45:01<23:26:58, 3.69it/s] 16%|█▌ | 59667/371472 [4:45:01<23:08:37, 3.74it/s] 16%|█▌ | 59668/371472 [4:45:01<22:47:11, 3.80it/s] 16%|█▌ | 59669/371472 [4:45:02<25:21:12, 3.42it/s] 16%|█▌ | 59670/371472 [4:45:02<26:06:43, 3.32it/s] 16%|█▌ | 59671/371472 [4:45:02<26:34:32, 3.26it/s] 16%|█▌ | 59672/371472 [4:45:03<26:44:50, 3.24it/s] 16%|█▌ | 59673/371472 [4:45:03<26:16:31, 3.30it/s] 16%|█▌ | 59674/371472 [4:45:03<26:38:14, 3.25it/s] 16%|█▌ | 59675/371472 [4:45:04<25:25:51, 3.41it/s] 16%|█▌ | 59676/371472 [4:45:04<24:16:58, 3.57it/s] 16%|█▌ | 59677/371472 [4:45:04<24:43:56, 3.50it/s] 16%|█▌ | 59678/371472 [4:45:04<23:57:12, 3.62it/s] 16%|█▌ | 59679/371472 [4:45:05<23:25:09, 3.70it/s] 16%|█▌ | 59680/371472 [4:45:05<25:10:12, 3.44it/s] {'loss': 4.144, 'learning_rate': 8.558146049257686e-07, 'epoch': 2.57} + 16%|█▌ | 59680/371472 [4:45:05<25:10:12, 3.44it/s] 16%|█▌ | 59681/371472 [4:45:05<24:31:14, 3.53it/s] 16%|█▌ | 59682/371472 [4:45:06<25:14:45, 3.43it/s] 16%|█▌ | 59683/371472 [4:45:06<24:55:29, 3.47it/s] 16%|█▌ | 59684/371472 [4:45:06<24:06:46, 3.59it/s] 16%|█▌ | 59685/371472 [4:45:06<23:49:07, 3.64it/s] 16%|█▌ | 59686/371472 [4:45:07<24:15:39, 3.57it/s] 16%|█▌ | 59687/371472 [4:45:07<24:47:41, 3.49it/s] 16%|█▌ | 59688/371472 [4:45:07<24:14:10, 3.57it/s] 16%|█▌ | 59689/371472 [4:45:08<23:33:50, 3.68it/s] 16%|█▌ | 59690/371472 [4:45:08<23:26:29, 3.69it/s] 16%|█▌ | 59691/371472 [4:45:08<23:14:04, 3.73it/s] 16%|█▌ | 59692/371472 [4:45:08<24:01:33, 3.60it/s] 16%|█▌ | 59693/371472 [4:45:09<23:18:05, 3.72it/s] 16%|█▌ | 59694/371472 [4:45:09<23:05:26, 3.75it/s] 16%|█▌ | 59695/371472 [4:45:09<23:50:20, 3.63it/s] 16%|█▌ | 59696/371472 [4:45:09<23:27:43, 3.69it/s] 16%|█▌ | 59697/371472 [4:45:10<24:48:23, 3.49it/s] 16%|█▌ | 59698/371472 [4:45:10<23:32:03, 3.68it/s] 16%|█▌ | 59699/371472 [4:45:10<25:16:47, 3.43it/s] 16%|█▌ | 59700/371472 [4:45:11<25:08:18, 3.45it/s] {'loss': 4.1962, 'learning_rate': 8.557661229502897e-07, 'epoch': 2.57} + 16%|█▌ | 59700/371472 [4:45:11<25:08:18, 3.45it/s] 16%|█▌ | 59701/371472 [4:45:11<24:08:59, 3.59it/s] 16%|█▌ | 59702/371472 [4:45:11<23:22:01, 3.71it/s] 16%|█▌ | 59703/371472 [4:45:11<24:43:05, 3.50it/s] 16%|█▌ | 59704/371472 [4:45:12<24:30:47, 3.53it/s] 16%|█▌ | 59705/371472 [4:45:12<23:36:23, 3.67it/s] 16%|█▌ | 59706/371472 [4:45:12<23:18:22, 3.72it/s] 16%|█▌ | 59707/371472 [4:45:13<24:30:42, 3.53it/s] 16%|█▌ | 59708/371472 [4:45:13<23:54:54, 3.62it/s] 16%|█▌ | 59709/371472 [4:45:13<23:09:39, 3.74it/s] 16%|█▌ | 59710/371472 [4:45:13<24:10:45, 3.58it/s] 16%|█▌ | 59711/371472 [4:45:14<23:06:54, 3.75it/s] 16%|█▌ | 59712/371472 [4:45:14<25:07:16, 3.45it/s] 16%|█▌ | 59713/371472 [4:45:14<24:11:18, 3.58it/s] 16%|█▌ | 59714/371472 [4:45:14<24:14:38, 3.57it/s] 16%|█▌ | 59715/371472 [4:45:15<26:03:21, 3.32it/s] 16%|█▌ | 59716/371472 [4:45:15<24:44:28, 3.50it/s] 16%|█▌ | 59717/371472 [4:45:15<23:29:16, 3.69it/s] 16%|█▌ | 59718/371472 [4:45:16<23:33:53, 3.67it/s] 16%|█▌ | 59719/371472 [4:45:16<23:54:34, 3.62it/s] 16%|█▌ | 59720/371472 [4:45:16<23:31:04, 3.68it/s] {'loss': 4.3642, 'learning_rate': 8.557176409748109e-07, 'epoch': 2.57} + 16%|█▌ | 59720/371472 [4:45:16<23:31:04, 3.68it/s] 16%|█▌ | 59721/371472 [4:45:16<24:42:40, 3.50it/s] 16%|█▌ | 59722/371472 [4:45:17<23:35:31, 3.67it/s] 16%|█▌ | 59723/371472 [4:45:17<23:37:52, 3.66it/s] 16%|█▌ | 59724/371472 [4:45:17<23:51:14, 3.63it/s] 16%|█▌ | 59725/371472 [4:45:18<23:27:44, 3.69it/s] 16%|█▌ | 59726/371472 [4:45:18<22:54:53, 3.78it/s] 16%|█▌ | 59727/371472 [4:45:18<23:19:08, 3.71it/s] 16%|█▌ | 59728/371472 [4:45:18<24:02:32, 3.60it/s] 16%|█▌ | 59729/371472 [4:45:19<23:50:32, 3.63it/s] 16%|█▌ | 59730/371472 [4:45:19<25:36:52, 3.38it/s] 16%|█▌ | 59731/371472 [4:45:19<24:20:25, 3.56it/s] 16%|█▌ | 59732/371472 [4:45:20<25:01:21, 3.46it/s] 16%|█▌ | 59733/371472 [4:45:20<25:29:31, 3.40it/s] 16%|█▌ | 59734/371472 [4:45:20<24:56:22, 3.47it/s] 16%|█▌ | 59735/371472 [4:45:20<24:53:04, 3.48it/s] 16%|█▌ | 59736/371472 [4:45:21<24:28:06, 3.54it/s] 16%|█▌ | 59737/371472 [4:45:21<26:04:04, 3.32it/s] 16%|█▌ | 59738/371472 [4:45:21<25:41:28, 3.37it/s] 16%|█▌ | 59739/371472 [4:45:22<26:05:14, 3.32it/s] 16%|█▌ | 59740/371472 [4:45:22<26:52:53, 3.22it/s] {'loss': 4.0517, 'learning_rate': 8.55669158999332e-07, 'epoch': 2.57} + 16%|█▌ | 59740/371472 [4:45:22<26:52:53, 3.22it/s] 16%|█▌ | 59741/371472 [4:45:22<25:31:02, 3.39it/s] 16%|█▌ | 59742/371472 [4:45:22<24:50:53, 3.48it/s] 16%|█▌ | 59743/371472 [4:45:23<24:56:05, 3.47it/s] 16%|█▌ | 59744/371472 [4:45:23<25:45:47, 3.36it/s] 16%|█▌ | 59745/371472 [4:45:23<24:17:02, 3.57it/s] 16%|█▌ | 59746/371472 [4:45:24<23:42:18, 3.65it/s] 16%|█▌ | 59747/371472 [4:45:24<23:16:10, 3.72it/s] 16%|█▌ | 59748/371472 [4:45:24<23:01:14, 3.76it/s] 16%|█▌ | 59749/371472 [4:45:24<22:27:47, 3.85it/s] 16%|█▌ | 59750/371472 [4:45:25<23:58:59, 3.61it/s] 16%|█▌ | 59751/371472 [4:45:25<23:41:48, 3.65it/s] 16%|█▌ | 59752/371472 [4:45:25<23:19:27, 3.71it/s] 16%|█▌ | 59753/371472 [4:45:25<23:38:02, 3.66it/s] 16%|█▌ | 59754/371472 [4:45:26<23:08:56, 3.74it/s] 16%|█▌ | 59755/371472 [4:45:26<23:19:01, 3.71it/s] 16%|█▌ | 59756/371472 [4:45:26<22:56:24, 3.77it/s] 16%|█▌ | 59757/371472 [4:45:26<22:53:06, 3.78it/s] 16%|█▌ | 59758/371472 [4:45:27<24:09:22, 3.58it/s] 16%|█▌ | 59759/371472 [4:45:27<26:55:45, 3.22it/s] 16%|█▌ | 59760/371472 [4:45:27<25:57:31, 3.34it/s] {'loss': 4.0824, 'learning_rate': 8.556206770238531e-07, 'epoch': 2.57} + 16%|█▌ | 59760/371472 [4:45:27<25:57:31, 3.34it/s] 16%|█▌ | 59761/371472 [4:45:28<24:27:52, 3.54it/s] 16%|█▌ | 59762/371472 [4:45:28<24:34:56, 3.52it/s] 16%|█▌ | 59763/371472 [4:45:28<25:46:14, 3.36it/s] 16%|█▌ | 59764/371472 [4:45:29<24:46:36, 3.49it/s] 16%|█▌ | 59765/371472 [4:45:29<24:54:21, 3.48it/s] 16%|█▌ | 59766/371472 [4:45:29<24:18:10, 3.56it/s] 16%|█▌ | 59767/371472 [4:45:29<23:32:37, 3.68it/s] 16%|█▌ | 59768/371472 [4:45:30<23:06:13, 3.75it/s] 16%|█▌ | 59769/371472 [4:45:30<23:27:28, 3.69it/s] 16%|█▌ | 59770/371472 [4:45:30<23:09:42, 3.74it/s] 16%|█▌ | 59771/371472 [4:45:30<23:00:35, 3.76it/s] 16%|█▌ | 59772/371472 [4:45:31<25:48:44, 3.35it/s] 16%|█▌ | 59773/371472 [4:45:31<25:56:24, 3.34it/s] 16%|█▌ | 59774/371472 [4:45:31<25:33:42, 3.39it/s] 16%|█▌ | 59775/371472 [4:45:32<24:34:05, 3.52it/s] 16%|█▌ | 59776/371472 [4:45:32<24:05:20, 3.59it/s] 16%|█▌ | 59777/371472 [4:45:32<24:24:47, 3.55it/s] 16%|█▌ | 59778/371472 [4:45:32<23:53:21, 3.62it/s] 16%|█▌ | 59779/371472 [4:45:33<23:39:44, 3.66it/s] 16%|█▌ | 59780/371472 [4:45:33<24:12:28, 3.58it/s] {'loss': 4.178, 'learning_rate': 8.555721950483742e-07, 'epoch': 2.57} + 16%|█▌ | 59780/371472 [4:45:33<24:12:28, 3.58it/s] 16%|█▌ | 59781/371472 [4:45:33<24:32:27, 3.53it/s] 16%|█▌ | 59782/371472 [4:45:34<23:58:06, 3.61it/s] 16%|█▌ | 59783/371472 [4:45:34<24:00:50, 3.61it/s] 16%|█▌ | 59784/371472 [4:45:34<23:52:14, 3.63it/s] 16%|█▌ | 59785/371472 [4:45:34<23:34:15, 3.67it/s] 16%|█▌ | 59786/371472 [4:45:35<27:18:52, 3.17it/s] 16%|█▌ | 59787/371472 [4:45:35<25:28:02, 3.40it/s] 16%|█▌ | 59788/371472 [4:45:35<25:02:29, 3.46it/s] 16%|█▌ | 59789/371472 [4:45:36<28:50:30, 3.00it/s] 16%|█▌ | 59790/371472 [4:45:36<26:42:13, 3.24it/s] 16%|█▌ | 59791/371472 [4:45:36<28:02:21, 3.09it/s] 16%|█▌ | 59792/371472 [4:45:37<26:35:58, 3.25it/s] 16%|█▌ | 59793/371472 [4:45:37<27:37:40, 3.13it/s] 16%|█▌ | 59794/371472 [4:45:37<26:52:33, 3.22it/s] 16%|█▌ | 59795/371472 [4:45:38<25:21:00, 3.42it/s] 16%|█▌ | 59796/371472 [4:45:38<24:32:34, 3.53it/s] 16%|█▌ | 59797/371472 [4:45:38<25:11:48, 3.44it/s] 16%|█▌ | 59798/371472 [4:45:38<24:23:05, 3.55it/s] 16%|█▌ | 59799/371472 [4:45:39<24:43:55, 3.50it/s] 16%|█▌ | 59800/371472 [4:45:39<25:15:09, 3.43it/s] {'loss': 4.347, 'learning_rate': 8.555237130728954e-07, 'epoch': 2.58} + 16%|█▌ | 59800/371472 [4:45:39<25:15:09, 3.43it/s] 16%|█▌ | 59801/371472 [4:45:39<26:18:29, 3.29it/s] 16%|█▌ | 59802/371472 [4:45:40<24:44:36, 3.50it/s] 16%|█▌ | 59803/371472 [4:45:40<23:54:03, 3.62it/s] 16%|█▌ | 59804/371472 [4:45:40<23:41:47, 3.65it/s] 16%|█▌ | 59805/371472 [4:45:40<23:15:02, 3.72it/s] 16%|█▌ | 59806/371472 [4:45:41<22:39:32, 3.82it/s] 16%|█▌ | 59807/371472 [4:45:41<24:33:31, 3.53it/s] 16%|█▌ | 59808/371472 [4:45:41<23:53:40, 3.62it/s] 16%|█▌ | 59809/371472 [4:45:41<23:56:11, 3.62it/s] 16%|█▌ | 59810/371472 [4:45:42<23:31:14, 3.68it/s] 16%|█▌ | 59811/371472 [4:45:42<24:50:44, 3.48it/s] 16%|█▌ | 59812/371472 [4:45:42<24:12:26, 3.58it/s] 16%|█▌ | 59813/371472 [4:45:43<23:53:33, 3.62it/s] 16%|█▌ | 59814/371472 [4:45:43<24:50:55, 3.48it/s] 16%|█▌ | 59815/371472 [4:45:43<24:08:05, 3.59it/s] 16%|█▌ | 59816/371472 [4:45:43<24:35:14, 3.52it/s] 16%|█▌ | 59817/371472 [4:45:44<23:47:30, 3.64it/s] 16%|█▌ | 59818/371472 [4:45:44<23:35:22, 3.67it/s] 16%|█▌ | 59819/371472 [4:45:44<22:54:21, 3.78it/s] 16%|█▌ | 59820/371472 [4:45:44<22:49:41, 3.79it/s] {'loss': 4.3382, 'learning_rate': 8.554752310974164e-07, 'epoch': 2.58} + 16%|█▌ | 59820/371472 [4:45:44<22:49:41, 3.79it/s] 16%|█▌ | 59821/371472 [4:45:45<22:55:24, 3.78it/s] 16%|█▌ | 59822/371472 [4:45:45<22:46:27, 3.80it/s] 16%|█▌ | 59823/371472 [4:45:45<22:56:55, 3.77it/s] 16%|█▌ | 59824/371472 [4:45:46<23:26:02, 3.69it/s] 16%|█▌ | 59825/371472 [4:45:46<22:54:18, 3.78it/s] 16%|█▌ | 59826/371472 [4:45:46<22:50:42, 3.79it/s] 16%|█▌ | 59827/371472 [4:45:46<23:26:07, 3.69it/s] 16%|█▌ | 59828/371472 [4:45:47<23:17:49, 3.72it/s] 16%|█▌ | 59829/371472 [4:45:47<22:56:42, 3.77it/s] 16%|█▌ | 59830/371472 [4:45:47<23:52:26, 3.63it/s] 16%|█▌ | 59831/371472 [4:45:47<23:58:28, 3.61it/s] 16%|█▌ | 59832/371472 [4:45:48<23:33:52, 3.67it/s] 16%|█▌ | 59833/371472 [4:45:48<23:55:31, 3.62it/s] 16%|█▌ | 59834/371472 [4:45:48<24:47:24, 3.49it/s] 16%|█▌ | 59835/371472 [4:45:49<24:34:11, 3.52it/s] 16%|█▌ | 59836/371472 [4:45:49<25:58:47, 3.33it/s] 16%|█▌ | 59837/371472 [4:45:49<27:33:31, 3.14it/s] 16%|█▌ | 59838/371472 [4:45:50<28:16:21, 3.06it/s] 16%|█▌ | 59839/371472 [4:45:50<26:52:13, 3.22it/s] 16%|█▌ | 59840/371472 [4:45:50<26:35:15, 3.26it/s] {'loss': 4.1127, 'learning_rate': 8.554267491219375e-07, 'epoch': 2.58} + 16%|█▌ | 59840/371472 [4:45:50<26:35:15, 3.26it/s] 16%|█▌ | 59841/371472 [4:45:50<25:37:12, 3.38it/s] 16%|█▌ | 59842/371472 [4:45:51<24:43:40, 3.50it/s] 16%|█▌ | 59843/371472 [4:45:51<24:19:53, 3.56it/s] 16%|█▌ | 59844/371472 [4:45:51<23:56:37, 3.62it/s] 16%|█▌ | 59845/371472 [4:45:52<24:04:11, 3.60it/s] 16%|█▌ | 59846/371472 [4:45:52<23:16:50, 3.72it/s] 16%|█▌ | 59847/371472 [4:45:52<23:54:05, 3.62it/s] 16%|█▌ | 59848/371472 [4:45:52<25:05:15, 3.45it/s] 16%|█▌ | 59849/371472 [4:45:53<24:29:48, 3.53it/s] 16%|█▌ | 59850/371472 [4:45:53<23:49:28, 3.63it/s] 16%|█▌ | 59851/371472 [4:45:53<23:27:54, 3.69it/s] 16%|█▌ | 59852/371472 [4:45:53<22:40:54, 3.82it/s] 16%|█▌ | 59853/371472 [4:45:54<22:50:06, 3.79it/s] 16%|█▌ | 59854/371472 [4:45:54<23:04:39, 3.75it/s] 16%|█▌ | 59855/371472 [4:45:54<22:33:48, 3.84it/s] 16%|█▌ | 59856/371472 [4:45:55<25:28:08, 3.40it/s] 16%|█▌ | 59857/371472 [4:45:55<24:47:16, 3.49it/s] 16%|█▌ | 59858/371472 [4:45:55<24:01:14, 3.60it/s] 16%|█▌ | 59859/371472 [4:45:55<24:40:41, 3.51it/s] 16%|█▌ | 59860/371472 [4:45:56<23:50:51, 3.63it/s] {'loss': 4.0289, 'learning_rate': 8.553782671464586e-07, 'epoch': 2.58} + 16%|█▌ | 59860/371472 [4:45:56<23:50:51, 3.63it/s] 16%|█▌ | 59861/371472 [4:45:56<24:33:53, 3.52it/s] 16%|█▌ | 59862/371472 [4:45:56<24:15:57, 3.57it/s] 16%|█▌ | 59863/371472 [4:45:57<23:31:12, 3.68it/s] 16%|█▌ | 59864/371472 [4:45:57<23:34:52, 3.67it/s] 16%|█▌ | 59865/371472 [4:45:57<24:55:27, 3.47it/s] 16%|█▌ | 59866/371472 [4:45:57<24:05:19, 3.59it/s] 16%|█▌ | 59867/371472 [4:45:58<24:42:55, 3.50it/s] 16%|█▌ | 59868/371472 [4:45:58<26:21:50, 3.28it/s] 16%|█▌ | 59869/371472 [4:45:58<26:10:35, 3.31it/s] 16%|█▌ | 59870/371472 [4:45:59<25:49:27, 3.35it/s] 16%|█▌ | 59871/371472 [4:45:59<25:53:55, 3.34it/s] 16%|█▌ | 59872/371472 [4:45:59<26:24:22, 3.28it/s] 16%|█▌ | 59873/371472 [4:46:00<27:02:05, 3.20it/s] 16%|█▌ | 59874/371472 [4:46:00<26:48:57, 3.23it/s] 16%|█▌ | 59875/371472 [4:46:00<26:23:39, 3.28it/s] 16%|█▌ | 59876/371472 [4:46:01<28:27:56, 3.04it/s] 16%|█▌ | 59877/371472 [4:46:01<26:18:56, 3.29it/s] 16%|█▌ | 59878/371472 [4:46:01<27:00:43, 3.20it/s] 16%|█▌ | 59879/371472 [4:46:01<27:14:54, 3.18it/s] 16%|█▌ | 59880/371472 [4:46:02<28:16:14, 3.06it/s] {'loss': 4.1871, 'learning_rate': 8.553297851709797e-07, 'epoch': 2.58} + 16%|█▌ | 59880/371472 [4:46:02<28:16:14, 3.06it/s] 16%|█▌ | 59881/371472 [4:46:02<27:27:36, 3.15it/s] 16%|█▌ | 59882/371472 [4:46:02<26:03:33, 3.32it/s] 16%|█▌ | 59883/371472 [4:46:03<24:43:57, 3.50it/s] 16%|█▌ | 59884/371472 [4:46:03<24:22:35, 3.55it/s] 16%|█▌ | 59885/371472 [4:46:03<26:16:40, 3.29it/s] 16%|█▌ | 59886/371472 [4:46:03<25:01:09, 3.46it/s] 16%|█▌ | 59887/371472 [4:46:04<25:04:06, 3.45it/s] 16%|█▌ | 59888/371472 [4:46:04<25:36:26, 3.38it/s] 16%|█▌ | 59889/371472 [4:46:04<24:46:18, 3.49it/s] 16%|█▌ | 59890/371472 [4:46:05<29:59:01, 2.89it/s] 16%|█▌ | 59891/371472 [4:46:05<28:09:39, 3.07it/s] 16%|█▌ | 59892/371472 [4:46:05<27:00:48, 3.20it/s] 16%|█▌ | 59893/371472 [4:46:06<26:35:36, 3.25it/s] 16%|█▌ | 59894/371472 [4:46:06<25:08:26, 3.44it/s] 16%|█▌ | 59895/371472 [4:46:06<23:43:51, 3.65it/s] 16%|█▌ | 59896/371472 [4:46:06<23:10:50, 3.73it/s] 16%|█▌ | 59897/371472 [4:46:07<22:43:49, 3.81it/s] 16%|█▌ | 59898/371472 [4:46:07<22:42:10, 3.81it/s] 16%|█▌ | 59899/371472 [4:46:07<25:55:13, 3.34it/s] 16%|█▌ | 59900/371472 [4:46:08<24:41:38, 3.50it/s] {'loss': 4.2729, 'learning_rate': 8.552813031955009e-07, 'epoch': 2.58} + 16%|█▌ | 59900/371472 [4:46:08<24:41:38, 3.50it/s] 16%|█▌ | 59901/371472 [4:46:08<23:39:28, 3.66it/s] 16%|█▌ | 59902/371472 [4:46:08<24:05:08, 3.59it/s] 16%|█▌ | 59903/371472 [4:46:08<25:01:01, 3.46it/s] 16%|█▌ | 59904/371472 [4:46:09<24:00:20, 3.61it/s] 16%|█▌ | 59905/371472 [4:46:09<25:57:19, 3.33it/s] 16%|█▌ | 59906/371472 [4:46:09<25:03:09, 3.45it/s] 16%|█▌ | 59907/371472 [4:46:10<24:42:55, 3.50it/s] 16%|█▌ | 59908/371472 [4:46:10<24:06:22, 3.59it/s] 16%|█▌ | 59909/371472 [4:46:10<24:25:27, 3.54it/s] 16%|█▌ | 59910/371472 [4:46:10<24:25:18, 3.54it/s] 16%|█▌ | 59911/371472 [4:46:11<24:28:26, 3.54it/s] 16%|█▌ | 59912/371472 [4:46:11<25:31:00, 3.39it/s] 16%|█▌ | 59913/371472 [4:46:11<24:49:07, 3.49it/s] 16%|█▌ | 59914/371472 [4:46:12<24:15:38, 3.57it/s] 16%|█▌ | 59915/371472 [4:46:12<25:02:11, 3.46it/s] 16%|█▌ | 59916/371472 [4:46:12<24:33:02, 3.53it/s] 16%|█▌ | 59917/371472 [4:46:12<24:05:33, 3.59it/s] 16%|█▌ | 59918/371472 [4:46:13<25:46:48, 3.36it/s] 16%|█▌ | 59919/371472 [4:46:13<25:38:55, 3.37it/s] 16%|█▌ | 59920/371472 [4:46:13<25:53:35, 3.34it/s] {'loss': 4.1083, 'learning_rate': 8.55232821220022e-07, 'epoch': 2.58} + 16%|█▌ | 59920/371472 [4:46:13<25:53:35, 3.34it/s] 16%|█▌ | 59921/371472 [4:46:14<24:20:22, 3.56it/s] 16%|█▌ | 59922/371472 [4:46:14<24:00:56, 3.60it/s] 16%|█▌ | 59923/371472 [4:46:14<23:26:16, 3.69it/s] 16%|█▌ | 59924/371472 [4:46:14<23:49:33, 3.63it/s] 16%|█▌ | 59925/371472 [4:46:15<24:49:50, 3.49it/s] 16%|█▌ | 59926/371472 [4:46:15<24:02:22, 3.60it/s] 16%|█▌ | 59927/371472 [4:46:15<23:15:35, 3.72it/s] 16%|█▌ | 59928/371472 [4:46:16<24:24:10, 3.55it/s] 16%|█▌ | 59929/371472 [4:46:16<24:27:42, 3.54it/s] 16%|█▌ | 59930/371472 [4:46:16<28:00:10, 3.09it/s] 16%|█▌ | 59931/371472 [4:46:16<26:58:33, 3.21it/s] 16%|█▌ | 59932/371472 [4:46:17<26:47:04, 3.23it/s] 16%|█▌ | 59933/371472 [4:46:17<24:52:47, 3.48it/s] 16%|█▌ | 59934/371472 [4:46:17<24:04:59, 3.59it/s] 16%|█▌ | 59935/371472 [4:46:18<23:25:01, 3.70it/s] 16%|█▌ | 59936/371472 [4:46:18<23:05:30, 3.75it/s] 16%|█▌ | 59937/371472 [4:46:18<24:24:15, 3.55it/s] 16%|█▌ | 59938/371472 [4:46:18<24:12:39, 3.57it/s] 16%|█▌ | 59939/371472 [4:46:19<24:08:29, 3.58it/s] 16%|█▌ | 59940/371472 [4:46:19<23:18:24, 3.71it/s] {'loss': 4.0924, 'learning_rate': 8.55184339244543e-07, 'epoch': 2.58} + 16%|█▌ | 59940/371472 [4:46:19<23:18:24, 3.71it/s] 16%|█▌ | 59941/371472 [4:46:19<23:08:37, 3.74it/s] 16%|█▌ | 59942/371472 [4:46:19<22:47:28, 3.80it/s] 16%|█▌ | 59943/371472 [4:46:20<24:05:48, 3.59it/s] 16%|█▌ | 59944/371472 [4:46:20<23:58:11, 3.61it/s] 16%|█▌ | 59945/371472 [4:46:20<25:08:30, 3.44it/s] 16%|█▌ | 59946/371472 [4:46:21<25:06:02, 3.45it/s] 16%|█▌ | 59947/371472 [4:46:21<24:40:01, 3.51it/s] 16%|█▌ | 59948/371472 [4:46:21<24:55:15, 3.47it/s] 16%|█▌ | 59949/371472 [4:46:22<26:01:38, 3.32it/s] 16%|█▌ | 59950/371472 [4:46:22<25:47:23, 3.36it/s] 16%|█▌ | 59951/371472 [4:46:22<25:33:18, 3.39it/s] 16%|█▌ | 59952/371472 [4:46:22<24:52:03, 3.48it/s] 16%|█▌ | 59953/371472 [4:46:23<24:31:46, 3.53it/s] 16%|█▌ | 59954/371472 [4:46:23<24:22:55, 3.55it/s] 16%|█▌ | 59955/371472 [4:46:23<23:53:52, 3.62it/s] 16%|█▌ | 59956/371472 [4:46:24<25:19:52, 3.42it/s] 16%|█▌ | 59957/371472 [4:46:24<24:47:52, 3.49it/s] 16%|█▌ | 59958/371472 [4:46:24<26:54:47, 3.22it/s] 16%|█▌ | 59959/371472 [4:46:24<25:50:26, 3.35it/s] 16%|█▌ | 59960/371472 [4:46:25<25:41:12, 3.37it/s] {'loss': 4.134, 'learning_rate': 8.55135857269064e-07, 'epoch': 2.58} + 16%|█▌ | 59960/371472 [4:46:25<25:41:12, 3.37it/s] 16%|█▌ | 59961/371472 [4:46:25<25:04:19, 3.45it/s] 16%|█▌ | 59962/371472 [4:46:25<24:09:41, 3.58it/s] 16%|█▌ | 59963/371472 [4:46:26<26:00:04, 3.33it/s] 16%|█▌ | 59964/371472 [4:46:26<24:35:43, 3.52it/s] 16%|█▌ | 59965/371472 [4:46:26<24:18:43, 3.56it/s] 16%|█▌ | 59966/371472 [4:46:27<26:41:49, 3.24it/s] 16%|█▌ | 59967/371472 [4:46:27<25:38:25, 3.37it/s] 16%|█▌ | 59968/371472 [4:46:27<26:02:36, 3.32it/s] 16%|█▌ | 59969/371472 [4:46:27<26:13:21, 3.30it/s] 16%|█▌ | 59970/371472 [4:46:28<26:04:40, 3.32it/s] 16%|█▌ | 59971/371472 [4:46:28<27:02:00, 3.20it/s] 16%|█▌ | 59972/371472 [4:46:28<26:04:40, 3.32it/s] 16%|█▌ | 59973/371472 [4:46:29<26:00:59, 3.33it/s] 16%|█▌ | 59974/371472 [4:46:29<26:34:45, 3.26it/s] 16%|█▌ | 59975/371472 [4:46:29<24:59:02, 3.46it/s] 16%|█▌ | 59976/371472 [4:46:29<24:13:22, 3.57it/s] 16%|█▌ | 59977/371472 [4:46:30<23:38:44, 3.66it/s] 16%|█▌ | 59978/371472 [4:46:30<23:54:06, 3.62it/s] 16%|█▌ | 59979/371472 [4:46:30<27:27:22, 3.15it/s] 16%|█▌ | 59980/371472 [4:46:31<29:12:36, 2.96it/s] {'loss': 4.1005, 'learning_rate': 8.550873752935852e-07, 'epoch': 2.58} + 16%|█▌ | 59980/371472 [4:46:31<29:12:36, 2.96it/s] 16%|█▌ | 59981/371472 [4:46:31<27:09:49, 3.19it/s] 16%|█▌ | 59982/371472 [4:46:31<26:31:04, 3.26it/s] 16%|█▌ | 59983/371472 [4:46:32<25:35:50, 3.38it/s] 16%|█▌ | 59984/371472 [4:46:32<24:21:59, 3.55it/s] 16%|█▌ | 59985/371472 [4:46:32<23:50:44, 3.63it/s] 16%|█▌ | 59986/371472 [4:46:32<23:45:42, 3.64it/s] 16%|█▌ | 59987/371472 [4:46:33<24:03:26, 3.60it/s] 16%|█▌ | 59988/371472 [4:46:33<24:26:21, 3.54it/s] 16%|█▌ | 59989/371472 [4:46:33<23:25:28, 3.69it/s] 16%|█▌ | 59990/371472 [4:46:33<23:52:54, 3.62it/s] 16%|█▌ | 59991/371472 [4:46:34<23:25:50, 3.69it/s] 16%|█▌ | 59992/371472 [4:46:34<24:21:46, 3.55it/s] 16%|█▌ | 59993/371472 [4:46:34<25:28:38, 3.40it/s] 16%|█▌ | 59994/371472 [4:46:35<25:11:47, 3.43it/s] 16%|█▌ | 59995/371472 [4:46:35<23:46:42, 3.64it/s] 16%|█▌ | 59996/371472 [4:46:35<24:31:47, 3.53it/s] 16%|█▌ | 59997/371472 [4:46:36<25:30:51, 3.39it/s] 16%|█▌ | 59998/371472 [4:46:36<25:54:07, 3.34it/s] 16%|█▌ | 59999/371472 [4:46:36<25:00:01, 3.46it/s] 16%|█▌ | 60000/371472 [4:46:36<24:03:37, 3.60it/s] {'loss': 4.0589, 'learning_rate': 8.550388933181064e-07, 'epoch': 2.58} + 16%|█▌ | 60000/371472 [4:46:36<24:03:37, 3.60it/s] 16%|█▌ | 60001/371472 [4:46:37<23:30:49, 3.68it/s] 16%|█▌ | 60002/371472 [4:46:37<25:30:40, 3.39it/s] 16%|█▌ | 60003/371472 [4:46:37<25:15:21, 3.43it/s] 16%|█▌ | 60004/371472 [4:46:37<23:47:59, 3.64it/s] 16%|█▌ | 60005/371472 [4:46:38<23:11:53, 3.73it/s] 16%|█▌ | 60006/371472 [4:46:38<24:50:22, 3.48it/s] 16%|█▌ | 60007/371472 [4:46:38<25:51:36, 3.35it/s] 16%|█▌ | 60008/371472 [4:46:39<24:41:15, 3.50it/s] 16%|█▌ | 60009/371472 [4:46:39<23:30:44, 3.68it/s] 16%|█▌ | 60010/371472 [4:46:39<25:05:49, 3.45it/s] 16%|█▌ | 60011/371472 [4:46:39<24:45:40, 3.49it/s] 16%|█▌ | 60012/371472 [4:46:40<23:51:54, 3.63it/s] 16%|█▌ | 60013/371472 [4:46:40<23:32:15, 3.68it/s] 16%|█▌ | 60014/371472 [4:46:40<22:54:11, 3.78it/s] 16%|█▌ | 60015/371472 [4:46:41<23:17:42, 3.71it/s] 16%|█▌ | 60016/371472 [4:46:41<22:29:41, 3.85it/s] 16%|█▌ | 60017/371472 [4:46:41<22:18:36, 3.88it/s] 16%|█▌ | 60018/371472 [4:46:41<21:39:39, 3.99it/s] 16%|█▌ | 60019/371472 [4:46:42<22:19:39, 3.87it/s] 16%|█▌ | 60020/371472 [4:46:42<21:51:55, 3.96it/s] {'loss': 4.3462, 'learning_rate': 8.549904113426275e-07, 'epoch': 2.59} + 16%|█▌ | 60020/371472 [4:46:42<21:51:55, 3.96it/s] 16%|█▌ | 60021/371472 [4:46:42<23:36:01, 3.67it/s] 16%|█▌ | 60022/371472 [4:46:42<23:54:04, 3.62it/s] 16%|█▌ | 60023/371472 [4:46:43<23:10:34, 3.73it/s] 16%|█▌ | 60024/371472 [4:46:43<22:26:53, 3.85it/s] 16%|█▌ | 60025/371472 [4:46:43<23:17:37, 3.71it/s] 16%|█▌ | 60026/371472 [4:46:43<23:43:04, 3.65it/s] 16%|█▌ | 60027/371472 [4:46:44<25:43:58, 3.36it/s] 16%|█▌ | 60028/371472 [4:46:44<24:53:21, 3.48it/s] 16%|█▌ | 60029/371472 [4:46:44<25:50:36, 3.35it/s] 16%|█▌ | 60030/371472 [4:46:45<25:10:25, 3.44it/s] 16%|█▌ | 60031/371472 [4:46:45<23:54:44, 3.62it/s] 16%|█▌ | 60032/371472 [4:46:45<26:27:18, 3.27it/s] 16%|█▌ | 60033/371472 [4:46:46<25:07:29, 3.44it/s] 16%|█▌ | 60034/371472 [4:46:46<24:31:12, 3.53it/s] 16%|█▌ | 60035/371472 [4:46:46<24:42:42, 3.50it/s] 16%|█▌ | 60036/371472 [4:46:46<23:47:36, 3.64it/s] 16%|█▌ | 60037/371472 [4:46:47<22:50:17, 3.79it/s] 16%|█▌ | 60038/371472 [4:46:47<24:44:15, 3.50it/s] 16%|█▌ | 60039/371472 [4:46:47<28:06:49, 3.08it/s] 16%|█▌ | 60040/371472 [4:46:48<27:29:18, 3.15it/s] {'loss': 4.1581, 'learning_rate': 8.549419293671486e-07, 'epoch': 2.59} + 16%|█▌ | 60040/371472 [4:46:48<27:29:18, 3.15it/s] 16%|█▌ | 60041/371472 [4:46:48<25:40:48, 3.37it/s] 16%|█▌ | 60042/371472 [4:46:48<24:39:41, 3.51it/s] 16%|█▌ | 60043/371472 [4:46:48<26:13:49, 3.30it/s] 16%|█▌ | 60044/371472 [4:46:49<26:29:50, 3.26it/s] 16%|█▌ | 60045/371472 [4:46:49<26:22:51, 3.28it/s] 16%|█▌ | 60046/371472 [4:46:49<25:09:24, 3.44it/s] 16%|█▌ | 60047/371472 [4:46:50<24:11:51, 3.57it/s] 16%|█▌ | 60048/371472 [4:46:50<23:13:34, 3.72it/s] 16%|█▌ | 60049/371472 [4:46:50<24:16:19, 3.56it/s] 16%|█▌ | 60050/371472 [4:46:50<24:12:44, 3.57it/s] 16%|█▌ | 60051/371472 [4:46:51<24:35:11, 3.52it/s] 16%|█▌ | 60052/371472 [4:46:51<24:50:17, 3.48it/s] 16%|█▌ | 60053/371472 [4:46:51<28:18:28, 3.06it/s] 16%|█▌ | 60054/371472 [4:46:52<27:07:57, 3.19it/s] 16%|█▌ | 60055/371472 [4:46:52<27:51:03, 3.11it/s] 16%|█▌ | 60056/371472 [4:46:52<26:23:15, 3.28it/s] 16%|█▌ | 60057/371472 [4:46:53<24:39:22, 3.51it/s] 16%|█▌ | 60058/371472 [4:46:53<23:59:35, 3.61it/s] 16%|█▌ | 60059/371472 [4:46:53<24:33:27, 3.52it/s] 16%|█▌ | 60060/371472 [4:46:53<24:06:25, 3.59it/s] {'loss': 4.3692, 'learning_rate': 8.548934473916696e-07, 'epoch': 2.59} + 16%|█▌ | 60060/371472 [4:46:53<24:06:25, 3.59it/s] 16%|█▌ | 60061/371472 [4:46:54<23:29:49, 3.68it/s] 16%|█▌ | 60062/371472 [4:46:54<24:13:02, 3.57it/s] 16%|█▌ | 60063/371472 [4:46:54<24:18:54, 3.56it/s] 16%|█▌ | 60064/371472 [4:46:55<25:24:01, 3.41it/s] 16%|█▌ | 60065/371472 [4:46:55<25:33:31, 3.38it/s] 16%|█▌ | 60066/371472 [4:46:55<25:02:45, 3.45it/s] 16%|█▌ | 60067/371472 [4:46:55<25:50:01, 3.35it/s] 16%|█▌ | 60068/371472 [4:46:56<28:55:30, 2.99it/s] 16%|█▌ | 60069/371472 [4:46:56<27:09:04, 3.19it/s] 16%|█▌ | 60070/371472 [4:46:56<25:47:41, 3.35it/s] 16%|█▌ | 60071/371472 [4:46:57<24:36:23, 3.52it/s] 16%|█▌ | 60072/371472 [4:46:57<24:34:53, 3.52it/s] 16%|█▌ | 60073/371472 [4:46:57<24:02:03, 3.60it/s] 16%|█▌ | 60074/371472 [4:46:57<23:12:40, 3.73it/s] 16%|█▌ | 60075/371472 [4:46:58<22:46:18, 3.80it/s] 16%|█▌ | 60076/371472 [4:46:58<22:48:44, 3.79it/s] 16%|█▌ | 60077/371472 [4:46:58<24:26:06, 3.54it/s] 16%|█▌ | 60078/371472 [4:46:59<25:48:31, 3.35it/s] 16%|█▌ | 60079/371472 [4:46:59<26:21:07, 3.28it/s] 16%|█▌ | 60080/371472 [4:46:59<26:16:58, 3.29it/s] {'loss': 4.0618, 'learning_rate': 8.548449654161908e-07, 'epoch': 2.59} + 16%|█▌ | 60080/371472 [4:46:59<26:16:58, 3.29it/s] 16%|█▌ | 60081/371472 [4:46:59<25:05:44, 3.45it/s] 16%|█▌ | 60082/371472 [4:47:00<24:46:50, 3.49it/s] 16%|█▌ | 60083/371472 [4:47:00<24:06:18, 3.59it/s] 16%|█▌ | 60084/371472 [4:47:00<23:09:56, 3.73it/s] 16%|█▌ | 60085/371472 [4:47:01<23:40:59, 3.65it/s] 16%|█▌ | 60086/371472 [4:47:01<24:10:26, 3.58it/s] 16%|█▌ | 60087/371472 [4:47:01<24:31:26, 3.53it/s] 16%|█▌ | 60088/371472 [4:47:01<25:39:42, 3.37it/s] 16%|█▌ | 60089/371472 [4:47:02<27:32:14, 3.14it/s] 16%|█▌ | 60090/371472 [4:47:02<25:58:48, 3.33it/s] 16%|█▌ | 60091/371472 [4:47:02<25:30:23, 3.39it/s] 16%|█▌ | 60092/371472 [4:47:03<25:23:10, 3.41it/s] 16%|█▌ | 60093/371472 [4:47:03<25:24:43, 3.40it/s] 16%|█▌ | 60094/371472 [4:47:03<27:05:50, 3.19it/s] 16%|█▌ | 60095/371472 [4:47:04<26:29:29, 3.26it/s] 16%|█▌ | 60096/371472 [4:47:04<25:48:49, 3.35it/s] 16%|█▌ | 60097/371472 [4:47:04<24:27:31, 3.54it/s] 16%|█▌ | 60098/371472 [4:47:04<24:03:49, 3.59it/s] 16%|█▌ | 60099/371472 [4:47:05<23:12:28, 3.73it/s] 16%|█▌ | 60100/371472 [4:47:05<23:31:22, 3.68it/s] {'loss': 4.3576, 'learning_rate': 8.547964834407119e-07, 'epoch': 2.59} + 16%|█▌ | 60100/371472 [4:47:05<23:31:22, 3.68it/s] 16%|█▌ | 60101/371472 [4:47:05<22:44:50, 3.80it/s] 16%|█▌ | 60102/371472 [4:47:05<22:57:24, 3.77it/s] 16%|█▌ | 60103/371472 [4:47:06<22:37:29, 3.82it/s] 16%|█▌ | 60104/371472 [4:47:06<22:49:30, 3.79it/s] 16%|█▌ | 60105/371472 [4:47:06<23:40:12, 3.65it/s] 16%|█▌ | 60106/371472 [4:47:07<23:11:56, 3.73it/s] 16%|█▌ | 60107/371472 [4:47:07<24:59:01, 3.46it/s] 16%|█▌ | 60108/371472 [4:47:07<24:11:13, 3.58it/s] 16%|█▌ | 60109/371472 [4:47:07<23:46:58, 3.64it/s] 16%|█▌ | 60110/371472 [4:47:08<25:18:01, 3.42it/s] 16%|█▌ | 60111/371472 [4:47:08<24:21:22, 3.55it/s] 16%|█▌ | 60112/371472 [4:47:08<25:28:30, 3.40it/s] 16%|█▌ | 60113/371472 [4:47:09<25:13:49, 3.43it/s] 16%|█▌ | 60114/371472 [4:47:09<26:57:30, 3.21it/s] 16%|█▌ | 60115/371472 [4:47:09<25:53:05, 3.34it/s] 16%|█▌ | 60116/371472 [4:47:10<25:51:28, 3.34it/s] 16%|█▌ | 60117/371472 [4:47:10<24:47:24, 3.49it/s] 16%|█▌ | 60118/371472 [4:47:10<25:07:52, 3.44it/s] 16%|█▌ | 60119/371472 [4:47:10<24:38:00, 3.51it/s] 16%|█▌ | 60120/371472 [4:47:11<24:29:10, 3.53it/s] {'loss': 4.3794, 'learning_rate': 8.54748001465233e-07, 'epoch': 2.59} + 16%|█▌ | 60120/371472 [4:47:11<24:29:10, 3.53it/s] 16%|█▌ | 60121/371472 [4:47:11<25:21:12, 3.41it/s] 16%|█▌ | 60122/371472 [4:47:11<27:19:50, 3.16it/s] 16%|█▌ | 60123/371472 [4:47:12<25:30:07, 3.39it/s] 16%|█▌ | 60124/371472 [4:47:12<25:34:46, 3.38it/s] 16%|█▌ | 60125/371472 [4:47:12<25:15:28, 3.42it/s] 16%|█▌ | 60126/371472 [4:47:12<23:43:38, 3.64it/s] 16%|█▌ | 60127/371472 [4:47:13<25:34:32, 3.38it/s] 16%|█▌ | 60128/371472 [4:47:13<25:25:36, 3.40it/s] 16%|█▌ | 60129/371472 [4:47:13<25:49:28, 3.35it/s] 16%|█▌ | 60130/371472 [4:47:14<24:30:19, 3.53it/s] 16%|█▌ | 60131/371472 [4:47:14<24:01:25, 3.60it/s] 16%|█▌ | 60132/371472 [4:47:14<23:19:00, 3.71it/s] 16%|█▌ | 60133/371472 [4:47:14<22:48:23, 3.79it/s] 16%|█▌ | 60134/371472 [4:47:15<23:23:42, 3.70it/s] 16%|█▌ | 60135/371472 [4:47:15<23:43:29, 3.65it/s] 16%|█▌ | 60136/371472 [4:47:15<23:38:22, 3.66it/s] 16%|█▌ | 60137/371472 [4:47:15<23:25:29, 3.69it/s] 16%|█▌ | 60138/371472 [4:47:16<24:35:30, 3.52it/s] 16%|█▌ | 60139/371472 [4:47:16<24:59:20, 3.46it/s] 16%|█▌ | 60140/371472 [4:47:16<26:29:19, 3.26it/s] {'loss': 4.1958, 'learning_rate': 8.546995194897541e-07, 'epoch': 2.59} + 16%|█▌ | 60140/371472 [4:47:16<26:29:19, 3.26it/s] 16%|█▌ | 60141/371472 [4:47:17<25:32:29, 3.39it/s] 16%|█▌ | 60142/371472 [4:47:17<24:49:59, 3.48it/s] 16%|█▌ | 60143/371472 [4:47:17<26:00:48, 3.32it/s] 16%|█▌ | 60144/371472 [4:47:18<24:31:01, 3.53it/s] 16%|█▌ | 60145/371472 [4:47:18<23:36:43, 3.66it/s] 16%|█▌ | 60146/371472 [4:47:18<23:34:51, 3.67it/s] 16%|█▌ | 60147/371472 [4:47:18<23:48:46, 3.63it/s] 16%|█▌ | 60148/371472 [4:47:19<24:52:52, 3.48it/s] 16%|█▌ | 60149/371472 [4:47:19<24:56:00, 3.47it/s] 16%|█▌ | 60150/371472 [4:47:19<24:57:08, 3.47it/s] 16%|█▌ | 60151/371472 [4:47:19<24:20:23, 3.55it/s] 16%|█▌ | 60152/371472 [4:47:20<24:26:56, 3.54it/s] 16%|█▌ | 60153/371472 [4:47:20<24:30:56, 3.53it/s] 16%|█▌ | 60154/371472 [4:47:20<24:45:11, 3.49it/s] 16%|█▌ | 60155/371472 [4:47:21<23:41:54, 3.65it/s] 16%|█▌ | 60156/371472 [4:47:21<23:14:05, 3.72it/s] 16%|█▌ | 60157/371472 [4:47:21<23:41:17, 3.65it/s] 16%|█▌ | 60158/371472 [4:47:21<23:22:45, 3.70it/s] 16%|█▌ | 60159/371472 [4:47:22<22:41:48, 3.81it/s] 16%|█▌ | 60160/371472 [4:47:22<22:32:15, 3.84it/s] {'loss': 4.2342, 'learning_rate': 8.546510375142752e-07, 'epoch': 2.59} + 16%|█▌ | 60160/371472 [4:47:22<22:32:15, 3.84it/s] 16%|█▌ | 60161/371472 [4:47:22<22:23:48, 3.86it/s] 16%|█▌ | 60162/371472 [4:47:22<22:46:56, 3.80it/s] 16%|█▌ | 60163/371472 [4:47:23<22:58:27, 3.76it/s] 16%|█▌ | 60164/371472 [4:47:23<23:52:00, 3.62it/s] 16%|█▌ | 60165/371472 [4:47:23<24:59:33, 3.46it/s] 16%|█▌ | 60166/371472 [4:47:24<25:18:38, 3.42it/s] 16%|█▌ | 60167/371472 [4:47:24<25:10:17, 3.44it/s] 16%|█▌ | 60168/371472 [4:47:24<25:30:10, 3.39it/s] 16%|█▌ | 60169/371472 [4:47:24<24:08:59, 3.58it/s] 16%|█▌ | 60170/371472 [4:47:25<24:38:20, 3.51it/s] 16%|█▌ | 60171/371472 [4:47:25<24:23:59, 3.54it/s] 16%|█▌ | 60172/371472 [4:47:25<24:25:02, 3.54it/s] 16%|█▌ | 60173/371472 [4:47:26<25:01:27, 3.46it/s] 16%|█▌ | 60174/371472 [4:47:26<23:58:26, 3.61it/s] 16%|█▌ | 60175/371472 [4:47:26<24:27:25, 3.54it/s] 16%|█▌ | 60176/371472 [4:47:26<25:15:00, 3.42it/s] 16%|█▌ | 60177/371472 [4:47:27<24:10:19, 3.58it/s] 16%|█▌ | 60178/371472 [4:47:27<23:50:32, 3.63it/s] 16%|█▌ | 60179/371472 [4:47:27<23:26:24, 3.69it/s] 16%|█▌ | 60180/371472 [4:47:28<23:25:17, 3.69it/s] {'loss': 4.2702, 'learning_rate': 8.546025555387964e-07, 'epoch': 2.59} + 16%|█▌ | 60180/371472 [4:47:28<23:25:17, 3.69it/s] 16%|█▌ | 60181/371472 [4:47:28<24:32:16, 3.52it/s] 16%|█▌ | 60182/371472 [4:47:28<23:35:28, 3.67it/s] 16%|█▌ | 60183/371472 [4:47:28<23:47:11, 3.64it/s] 16%|█▌ | 60184/371472 [4:47:29<23:38:36, 3.66it/s] 16%|█▌ | 60185/371472 [4:47:29<23:30:59, 3.68it/s] 16%|█▌ | 60186/371472 [4:47:29<24:52:14, 3.48it/s] 16%|█▌ | 60187/371472 [4:47:30<25:03:03, 3.45it/s] 16%|█▌ | 60188/371472 [4:47:30<24:45:11, 3.49it/s] 16%|█▌ | 60189/371472 [4:47:30<24:29:53, 3.53it/s] 16%|█▌ | 60190/371472 [4:47:30<25:25:17, 3.40it/s] 16%|█▌ | 60191/371472 [4:47:31<23:59:57, 3.60it/s] 16%|█▌ | 60192/371472 [4:47:31<23:11:27, 3.73it/s] 16%|█▌ | 60193/371472 [4:47:31<24:37:17, 3.51it/s] 16%|█▌ | 60194/371472 [4:47:32<25:50:37, 3.35it/s] 16%|█▌ | 60195/371472 [4:47:32<26:56:54, 3.21it/s] 16%|█▌ | 60196/371472 [4:47:32<27:03:39, 3.20it/s] 16%|█▌ | 60197/371472 [4:47:32<25:34:46, 3.38it/s] 16%|█▌ | 60198/371472 [4:47:33<25:24:41, 3.40it/s] 16%|█▌ | 60199/371472 [4:47:33<25:46:54, 3.35it/s] 16%|█▌ | 60200/371472 [4:47:33<25:12:06, 3.43it/s] {'loss': 4.0517, 'learning_rate': 8.545540735633173e-07, 'epoch': 2.59} + 16%|█▌ | 60200/371472 [4:47:33<25:12:06, 3.43it/s] 16%|█▌ | 60201/371472 [4:47:34<24:09:23, 3.58it/s] 16%|█▌ | 60202/371472 [4:47:34<25:55:56, 3.33it/s] 16%|█▌ | 60203/371472 [4:47:34<24:53:05, 3.47it/s] 16%|█▌ | 60204/371472 [4:47:34<24:10:01, 3.58it/s] 16%|█▌ | 60205/371472 [4:47:35<23:15:23, 3.72it/s] 16%|█▌ | 60206/371472 [4:47:35<23:32:58, 3.67it/s] 16%|█▌ | 60207/371472 [4:47:35<23:36:25, 3.66it/s] 16%|█▌ | 60208/371472 [4:47:35<23:27:59, 3.68it/s] 16%|█▌ | 60209/371472 [4:47:36<23:45:29, 3.64it/s] 16%|█▌ | 60210/371472 [4:47:36<22:51:03, 3.78it/s] 16%|█▌ | 60211/371472 [4:47:36<23:04:58, 3.75it/s] 16%|█▌ | 60212/371472 [4:47:37<22:38:54, 3.82it/s] 16%|█▌ | 60213/371472 [4:47:37<22:18:20, 3.88it/s] 16%|█▌ | 60214/371472 [4:47:37<23:00:07, 3.76it/s] 16%|█▌ | 60215/371472 [4:47:37<22:41:01, 3.81it/s] 16%|█▌ | 60216/371472 [4:47:38<25:16:17, 3.42it/s] 16%|█▌ | 60217/371472 [4:47:38<24:03:27, 3.59it/s] 16%|█▌ | 60218/371472 [4:47:38<24:17:40, 3.56it/s] 16%|█▌ | 60219/371472 [4:47:38<24:07:38, 3.58it/s] 16%|█▌ | 60220/371472 [4:47:39<23:52:14, 3.62it/s] {'loss': 4.1766, 'learning_rate': 8.545055915878385e-07, 'epoch': 2.59} + 16%|█▌ | 60220/371472 [4:47:39<23:52:14, 3.62it/s] 16%|█▌ | 60221/371472 [4:47:39<24:16:28, 3.56it/s] 16%|█▌ | 60222/371472 [4:47:39<24:54:19, 3.47it/s] 16%|█▌ | 60223/371472 [4:47:40<25:12:25, 3.43it/s] 16%|█▌ | 60224/371472 [4:47:40<23:59:59, 3.60it/s] 16%|█▌ | 60225/371472 [4:47:40<24:14:54, 3.57it/s] 16%|█▌ | 60226/371472 [4:47:40<24:01:17, 3.60it/s] 16%|█▌ | 60227/371472 [4:47:41<23:21:59, 3.70it/s] 16%|█▌ | 60228/371472 [4:47:41<23:58:22, 3.61it/s] 16%|█▌ | 60229/371472 [4:47:41<26:12:11, 3.30it/s] 16%|█▌ | 60230/371472 [4:47:42<25:20:21, 3.41it/s] 16%|█▌ | 60231/371472 [4:47:42<25:21:46, 3.41it/s] 16%|█▌ | 60232/371472 [4:47:42<25:15:12, 3.42it/s] 16%|█▌ | 60233/371472 [4:47:42<24:21:27, 3.55it/s] 16%|█▌ | 60234/371472 [4:47:43<24:01:34, 3.60it/s] 16%|█▌ | 60235/371472 [4:47:43<24:13:59, 3.57it/s] 16%|█▌ | 60236/371472 [4:47:43<25:52:43, 3.34it/s] 16%|█▌ | 60237/371472 [4:47:44<24:49:28, 3.48it/s] 16%|█▌ | 60238/371472 [4:47:44<26:39:55, 3.24it/s] 16%|█▌ | 60239/371472 [4:47:44<25:30:39, 3.39it/s] 16%|█▌ | 60240/371472 [4:47:45<25:03:34, 3.45it/s] {'loss': 4.0639, 'learning_rate': 8.544571096123596e-07, 'epoch': 2.59} + 16%|█▌ | 60240/371472 [4:47:45<25:03:34, 3.45it/s] 16%|█▌ | 60241/371472 [4:47:45<24:41:42, 3.50it/s] 16%|█▌ | 60242/371472 [4:47:45<24:57:47, 3.46it/s] 16%|█▌ | 60243/371472 [4:47:45<25:16:22, 3.42it/s] 16%|█▌ | 60244/371472 [4:47:46<25:35:39, 3.38it/s] 16%|█▌ | 60245/371472 [4:47:46<24:57:14, 3.46it/s] 16%|█▌ | 60246/371472 [4:47:46<27:56:54, 3.09it/s] 16%|█▌ | 60247/371472 [4:47:47<26:04:36, 3.32it/s] 16%|█▌ | 60248/371472 [4:47:47<25:43:59, 3.36it/s] 16%|█▌ | 60249/371472 [4:47:47<24:21:02, 3.55it/s] 16%|█▌ | 60250/371472 [4:47:47<24:04:09, 3.59it/s] 16%|█▌ | 60251/371472 [4:47:48<25:28:27, 3.39it/s] 16%|█▌ | 60252/371472 [4:47:48<26:02:11, 3.32it/s] 16%|█▌ | 60253/371472 [4:47:48<25:20:50, 3.41it/s] 16%|█▌ | 60254/371472 [4:47:49<25:34:18, 3.38it/s] 16%|█▌ | 60255/371472 [4:47:49<25:19:30, 3.41it/s] 16%|█▌ | 60256/371472 [4:47:49<24:46:59, 3.49it/s] 16%|█▌ | 60257/371472 [4:47:50<25:49:33, 3.35it/s] 16%|█▌ | 60258/371472 [4:47:50<26:01:27, 3.32it/s] 16%|█▌ | 60259/371472 [4:47:50<25:16:53, 3.42it/s] 16%|█▌ | 60260/371472 [4:47:50<24:54:33, 3.47it/s] {'loss': 4.3518, 'learning_rate': 8.544086276368808e-07, 'epoch': 2.6} + 16%|█▌ | 60260/371472 [4:47:50<24:54:33, 3.47it/s] 16%|█▌ | 60261/371472 [4:47:51<23:54:00, 3.62it/s] 16%|█▌ | 60262/371472 [4:47:51<24:19:30, 3.55it/s] 16%|█▌ | 60263/371472 [4:47:51<24:17:46, 3.56it/s] 16%|█▌ | 60264/371472 [4:47:51<23:42:01, 3.65it/s] 16%|█▌ | 60265/371472 [4:47:52<23:18:01, 3.71it/s] 16%|█▌ | 60266/371472 [4:47:52<25:46:25, 3.35it/s] 16%|█▌ | 60267/371472 [4:47:52<26:14:29, 3.29it/s] 16%|█▌ | 60268/371472 [4:47:53<25:14:16, 3.43it/s] 16%|█▌ | 60269/371472 [4:47:53<24:35:08, 3.52it/s] 16%|█▌ | 60270/371472 [4:47:53<24:44:01, 3.50it/s] 16%|█▌ | 60271/371472 [4:47:54<24:14:04, 3.57it/s] 16%|█▌ | 60272/371472 [4:47:54<23:49:50, 3.63it/s] 16%|█▌ | 60273/371472 [4:47:54<22:54:02, 3.77it/s] 16%|█▌ | 60274/371472 [4:47:54<22:16:25, 3.88it/s] 16%|█▌ | 60275/371472 [4:47:55<24:07:59, 3.58it/s] 16%|█▌ | 60276/371472 [4:47:55<23:41:03, 3.65it/s] 16%|█▌ | 60277/371472 [4:47:55<22:39:27, 3.82it/s] 16%|█▌ | 60278/371472 [4:47:55<22:05:37, 3.91it/s] 16%|█▌ | 60279/371472 [4:47:56<22:23:25, 3.86it/s] 16%|█▌ | 60280/371472 [4:47:56<22:39:08, 3.82it/s] {'loss': 4.4055, 'learning_rate': 8.543601456614018e-07, 'epoch': 2.6} + 16%|█▌ | 60280/371472 [4:47:56<22:39:08, 3.82it/s] 16%|█▌ | 60281/371472 [4:47:56<22:30:05, 3.84it/s] 16%|█▌ | 60282/371472 [4:47:56<22:14:46, 3.89it/s] 16%|█▌ | 60283/371472 [4:47:57<23:36:14, 3.66it/s] 16%|█▌ | 60284/371472 [4:47:57<24:05:53, 3.59it/s] 16%|█▌ | 60285/371472 [4:47:57<24:09:04, 3.58it/s] 16%|█▌ | 60286/371472 [4:47:58<24:23:51, 3.54it/s] 16%|█▌ | 60287/371472 [4:47:58<24:23:48, 3.54it/s] 16%|█▌ | 60288/371472 [4:47:58<24:20:18, 3.55it/s] 16%|█▌ | 60289/371472 [4:47:58<24:24:58, 3.54it/s] 16%|█▌ | 60290/371472 [4:47:59<25:18:02, 3.42it/s] 16%|█▌ | 60291/371472 [4:47:59<24:21:32, 3.55it/s] 16%|█▌ | 60292/371472 [4:47:59<25:16:28, 3.42it/s] 16%|█▌ | 60293/371472 [4:48:00<24:36:34, 3.51it/s] 16%|█▌ | 60294/371472 [4:48:00<23:32:27, 3.67it/s] 16%|█▌ | 60295/371472 [4:48:00<23:32:06, 3.67it/s] 16%|█▌ | 60296/371472 [4:48:00<23:57:43, 3.61it/s] 16%|█▌ | 60297/371472 [4:48:01<25:19:51, 3.41it/s] 16%|█▌ | 60298/371472 [4:48:01<24:43:48, 3.50it/s] 16%|█▌ | 60299/371472 [4:48:01<23:40:42, 3.65it/s] 16%|█▌ | 60300/371472 [4:48:01<22:52:34, 3.78it/s] {'loss': 4.3399, 'learning_rate': 8.54311663685923e-07, 'epoch': 2.6} + 16%|█▌ | 60300/371472 [4:48:01<22:52:34, 3.78it/s] 16%|█▌ | 60301/371472 [4:48:02<23:36:17, 3.66it/s] 16%|█▌ | 60302/371472 [4:48:02<26:24:21, 3.27it/s] 16%|█▌ | 60303/371472 [4:48:02<26:01:57, 3.32it/s] 16%|█▌ | 60304/371472 [4:48:03<25:01:27, 3.45it/s] 16%|█▌ | 60305/371472 [4:48:03<25:43:20, 3.36it/s] 16%|█▌ | 60306/371472 [4:48:03<24:37:35, 3.51it/s] 16%|█▌ | 60307/371472 [4:48:04<24:49:03, 3.48it/s] 16%|█▌ | 60308/371472 [4:48:04<25:05:38, 3.44it/s] 16%|█▌ | 60309/371472 [4:48:04<24:10:07, 3.58it/s] 16%|█▌ | 60310/371472 [4:48:04<23:15:31, 3.72it/s] 16%|█▌ | 60311/371472 [4:48:05<24:22:59, 3.54it/s] 16%|█▌ | 60312/371472 [4:48:05<24:11:16, 3.57it/s] 16%|█▌ | 60313/371472 [4:48:05<24:54:38, 3.47it/s] 16%|█▌ | 60314/371472 [4:48:06<24:33:23, 3.52it/s] 16%|█▌ | 60315/371472 [4:48:06<24:19:15, 3.55it/s] 16%|█▌ | 60316/371472 [4:48:06<23:28:19, 3.68it/s] 16%|█▌ | 60317/371472 [4:48:06<23:01:43, 3.75it/s] 16%|█▌ | 60318/371472 [4:48:07<23:59:48, 3.60it/s] 16%|█▌ | 60319/371472 [4:48:07<24:55:27, 3.47it/s] 16%|█▌ | 60320/371472 [4:48:07<26:08:11, 3.31it/s] {'loss': 4.1884, 'learning_rate': 8.54263181710444e-07, 'epoch': 2.6} + 16%|█▌ | 60320/371472 [4:48:07<26:08:11, 3.31it/s] 16%|█▌ | 60321/371472 [4:48:07<24:57:07, 3.46it/s] 16%|█▌ | 60322/371472 [4:48:08<24:18:35, 3.56it/s] 16%|█▌ | 60323/371472 [4:48:08<23:56:23, 3.61it/s] 16%|█▌ | 60324/371472 [4:48:08<24:25:39, 3.54it/s] 16%|█▌ | 60325/371472 [4:48:09<25:18:07, 3.42it/s] 16%|█▌ | 60326/371472 [4:48:09<25:12:30, 3.43it/s] 16%|█▌ | 60327/371472 [4:48:09<25:11:03, 3.43it/s] 16%|█▌ | 60328/371472 [4:48:09<24:01:14, 3.60it/s] 16%|█▌ | 60329/371472 [4:48:10<24:06:51, 3.58it/s] 16%|█▌ | 60330/371472 [4:48:10<26:28:37, 3.26it/s] 16%|█▌ | 60331/371472 [4:48:10<25:33:42, 3.38it/s] 16%|█▌ | 60332/371472 [4:48:11<24:27:30, 3.53it/s] 16%|█▌ | 60333/371472 [4:48:11<23:23:32, 3.69it/s] 16%|█▌ | 60334/371472 [4:48:11<24:00:15, 3.60it/s] 16%|█▌ | 60335/371472 [4:48:11<23:40:53, 3.65it/s] 16%|█▌ | 60336/371472 [4:48:12<25:06:24, 3.44it/s] 16%|█▌ | 60337/371472 [4:48:12<24:55:56, 3.47it/s] 16%|█▌ | 60338/371472 [4:48:12<24:57:48, 3.46it/s] 16%|█▌ | 60339/371472 [4:48:13<24:53:03, 3.47it/s] 16%|█▌ | 60340/371472 [4:48:13<24:19:29, 3.55it/s] {'loss': 4.3731, 'learning_rate': 8.542146997349652e-07, 'epoch': 2.6} + 16%|█▌ | 60340/371472 [4:48:13<24:19:29, 3.55it/s] 16%|█▌ | 60341/371472 [4:48:13<23:40:31, 3.65it/s] 16%|█▌ | 60342/371472 [4:48:13<23:28:14, 3.68it/s] 16%|█▌ | 60343/371472 [4:48:14<24:43:06, 3.50it/s] 16%|█▌ | 60344/371472 [4:48:14<24:41:06, 3.50it/s] 16%|█▌ | 60345/371472 [4:48:14<23:32:21, 3.67it/s] 16%|█▌ | 60346/371472 [4:48:15<23:12:01, 3.73it/s] 16%|█▌ | 60347/371472 [4:48:15<23:48:13, 3.63it/s] 16%|█▌ | 60348/371472 [4:48:15<24:28:23, 3.53it/s] 16%|█▌ | 60349/371472 [4:48:15<24:12:03, 3.57it/s] 16%|█▌ | 60350/371472 [4:48:16<24:14:32, 3.56it/s] 16%|█▌ | 60351/371472 [4:48:16<25:10:57, 3.43it/s] 16%|█▌ | 60352/371472 [4:48:16<24:24:55, 3.54it/s] 16%|█▌ | 60353/371472 [4:48:17<25:44:25, 3.36it/s] 16%|█▌ | 60354/371472 [4:48:17<24:30:35, 3.53it/s] 16%|█▌ | 60355/371472 [4:48:17<23:20:49, 3.70it/s] 16%|█▌ | 60356/371472 [4:48:17<23:19:38, 3.70it/s] 16%|█▌ | 60357/371472 [4:48:18<23:22:47, 3.70it/s] 16%|█▌ | 60358/371472 [4:48:18<23:26:59, 3.69it/s] 16%|█▌ | 60359/371472 [4:48:18<23:57:45, 3.61it/s] 16%|█▌ | 60360/371472 [4:48:18<23:24:41, 3.69it/s] {'loss': 4.5374, 'learning_rate': 8.541662177594862e-07, 'epoch': 2.6} + 16%|█▌ | 60360/371472 [4:48:18<23:24:41, 3.69it/s] 16%|█▌ | 60361/371472 [4:48:19<23:35:56, 3.66it/s] 16%|█▌ | 60362/371472 [4:48:19<23:13:21, 3.72it/s] 16%|█▌ | 60363/371472 [4:48:19<23:25:01, 3.69it/s] 16%|█▌ | 60364/371472 [4:48:20<23:01:42, 3.75it/s] 16%|█▋ | 60365/371472 [4:48:20<23:16:24, 3.71it/s] 16%|█▋ | 60366/371472 [4:48:20<23:40:38, 3.65it/s] 16%|█▋ | 60367/371472 [4:48:20<23:52:42, 3.62it/s] 16%|█▋ | 60368/371472 [4:48:21<23:41:13, 3.65it/s] 16%|█▋ | 60369/371472 [4:48:21<22:53:41, 3.77it/s] 16%|█▋ | 60370/371472 [4:48:21<23:47:53, 3.63it/s] 16%|█▋ | 60371/371472 [4:48:21<23:46:06, 3.64it/s] 16%|█▋ | 60372/371472 [4:48:22<24:25:26, 3.54it/s] 16%|█▋ | 60373/371472 [4:48:22<24:22:59, 3.54it/s] 16%|█▋ | 60374/371472 [4:48:22<27:05:29, 3.19it/s] 16%|█▋ | 60375/371472 [4:48:23<25:35:30, 3.38it/s] 16%|█▋ | 60376/371472 [4:48:23<26:51:09, 3.22it/s] 16%|█▋ | 60377/371472 [4:48:23<25:45:42, 3.35it/s] 16%|█▋ | 60378/371472 [4:48:23<24:01:01, 3.60it/s] 16%|█▋ | 60379/371472 [4:48:24<23:31:11, 3.67it/s] 16%|█▋ | 60380/371472 [4:48:24<22:56:44, 3.77it/s] {'loss': 4.1441, 'learning_rate': 8.541177357840074e-07, 'epoch': 2.6} + 16%|█▋ | 60380/371472 [4:48:24<22:56:44, 3.77it/s] 16%|█▋ | 60381/371472 [4:48:24<22:58:50, 3.76it/s] 16%|█▋ | 60382/371472 [4:48:25<22:43:41, 3.80it/s] 16%|█▋ | 60383/371472 [4:48:25<22:34:18, 3.83it/s] 16%|█▋ | 60384/371472 [4:48:25<25:25:26, 3.40it/s] 16%|█▋ | 60385/371472 [4:48:25<25:10:29, 3.43it/s] 16%|█▋ | 60386/371472 [4:48:26<23:53:16, 3.62it/s] 16%|█▋ | 60387/371472 [4:48:26<23:52:27, 3.62it/s] 16%|█▋ | 60388/371472 [4:48:26<23:54:38, 3.61it/s] 16%|█▋ | 60389/371472 [4:48:26<23:20:32, 3.70it/s] 16%|█▋ | 60390/371472 [4:48:27<24:02:32, 3.59it/s] 16%|█▋ | 60391/371472 [4:48:27<23:03:05, 3.75it/s] 16%|█▋ | 60392/371472 [4:48:27<24:54:33, 3.47it/s] 16%|█▋ | 60393/371472 [4:48:28<23:53:49, 3.62it/s] 16%|█▋ | 60394/371472 [4:48:28<25:21:56, 3.41it/s] 16%|█▋ | 60395/371472 [4:48:28<26:27:54, 3.27it/s] 16%|█▋ | 60396/371472 [4:48:29<27:31:22, 3.14it/s] 16%|█▋ | 60397/371472 [4:48:29<27:26:50, 3.15it/s] 16%|█▋ | 60398/371472 [4:48:29<26:58:17, 3.20it/s] 16%|█▋ | 60399/371472 [4:48:30<28:34:08, 3.02it/s] 16%|█▋ | 60400/371472 [4:48:30<27:32:58, 3.14it/s] {'loss': 4.2451, 'learning_rate': 8.540692538085285e-07, 'epoch': 2.6} + 16%|█▋ | 60400/371472 [4:48:30<27:32:58, 3.14it/s] 16%|█▋ | 60401/371472 [4:48:30<26:32:55, 3.25it/s] 16%|█▋ | 60402/371472 [4:48:30<25:54:16, 3.34it/s] 16%|█▋ | 60403/371472 [4:48:31<25:44:18, 3.36it/s] 16%|█▋ | 60404/371472 [4:48:31<25:07:23, 3.44it/s] 16%|█▋ | 60405/371472 [4:48:31<23:48:04, 3.63it/s] 16%|█▋ | 60406/371472 [4:48:32<23:40:34, 3.65it/s] 16%|█▋ | 60407/371472 [4:48:32<24:40:25, 3.50it/s] 16%|█▋ | 60408/371472 [4:48:32<25:49:32, 3.35it/s] 16%|█▋ | 60409/371472 [4:48:32<24:56:16, 3.46it/s] 16%|█▋ | 60410/371472 [4:48:33<25:07:37, 3.44it/s] 16%|█▋ | 60411/371472 [4:48:33<24:14:07, 3.57it/s] 16%|█▋ | 60412/371472 [4:48:33<23:55:58, 3.61it/s] 16%|█▋ | 60413/371472 [4:48:34<23:32:28, 3.67it/s] 16%|█▋ | 60414/371472 [4:48:34<25:07:01, 3.44it/s] 16%|█▋ | 60415/371472 [4:48:34<23:44:31, 3.64it/s] 16%|█▋ | 60416/371472 [4:48:34<24:38:52, 3.51it/s] 16%|█▋ | 60417/371472 [4:48:35<24:02:21, 3.59it/s] 16%|█▋ | 60418/371472 [4:48:35<23:59:35, 3.60it/s] 16%|█▋ | 60419/371472 [4:48:35<26:00:25, 3.32it/s] 16%|█▋ | 60420/371472 [4:48:36<25:12:59, 3.43it/s] {'loss': 4.172, 'learning_rate': 8.540207718330496e-07, 'epoch': 2.6} + 16%|█▋ | 60420/371472 [4:48:36<25:12:59, 3.43it/s] 16%|█▋ | 60421/371472 [4:48:36<24:41:13, 3.50it/s] 16%|█▋ | 60422/371472 [4:48:36<25:44:50, 3.36it/s] 16%|█▋ | 60423/371472 [4:48:36<25:36:20, 3.37it/s] 16%|█▋ | 60424/371472 [4:48:37<26:01:04, 3.32it/s] 16%|█▋ | 60425/371472 [4:48:37<26:18:57, 3.28it/s] 16%|█▋ | 60426/371472 [4:48:37<26:32:27, 3.26it/s] 16%|█▋ | 60427/371472 [4:48:38<25:14:09, 3.42it/s] 16%|█▋ | 60428/371472 [4:48:38<24:34:44, 3.52it/s] 16%|█▋ | 60429/371472 [4:48:38<24:32:34, 3.52it/s] 16%|█▋ | 60430/371472 [4:48:38<23:39:12, 3.65it/s] 16%|█▋ | 60431/371472 [4:48:39<22:54:47, 3.77it/s] 16%|█▋ | 60432/371472 [4:48:39<23:30:07, 3.68it/s] 16%|█▋ | 60433/371472 [4:48:39<23:24:23, 3.69it/s] 16%|█▋ | 60434/371472 [4:48:40<23:44:48, 3.64it/s] 16%|█▋ | 60435/371472 [4:48:40<24:12:43, 3.57it/s] 16%|█▋ | 60436/371472 [4:48:40<24:06:22, 3.58it/s] 16%|█▋ | 60437/371472 [4:48:40<23:31:34, 3.67it/s] 16%|█▋ | 60438/371472 [4:48:41<24:18:13, 3.55it/s] 16%|█▋ | 60439/371472 [4:48:41<24:15:57, 3.56it/s] 16%|█▋ | 60440/371472 [4:48:41<23:29:26, 3.68it/s] {'loss': 4.1974, 'learning_rate': 8.539722898575707e-07, 'epoch': 2.6} + 16%|█▋ | 60440/371472 [4:48:41<23:29:26, 3.68it/s] 16%|█▋ | 60441/371472 [4:48:42<25:29:20, 3.39it/s] 16%|█▋ | 60442/371472 [4:48:42<24:40:10, 3.50it/s] 16%|█▋ | 60443/371472 [4:48:42<24:58:35, 3.46it/s] 16%|█▋ | 60444/371472 [4:48:42<24:37:05, 3.51it/s] 16%|█▋ | 60445/371472 [4:48:43<24:24:26, 3.54it/s] 16%|█▋ | 60446/371472 [4:48:43<24:08:13, 3.58it/s] 16%|█▋ | 60447/371472 [4:48:43<24:19:33, 3.55it/s] 16%|█▋ | 60448/371472 [4:48:44<24:14:44, 3.56it/s] 16%|█▋ | 60449/371472 [4:48:44<23:35:51, 3.66it/s] 16%|█▋ | 60450/371472 [4:48:44<23:49:18, 3.63it/s] 16%|█▋ | 60451/371472 [4:48:44<24:42:55, 3.50it/s] 16%|█▋ | 60452/371472 [4:48:45<24:58:46, 3.46it/s] 16%|█▋ | 60453/371472 [4:48:45<24:36:37, 3.51it/s] 16%|█▋ | 60454/371472 [4:48:45<23:31:26, 3.67it/s] 16%|█▋ | 60455/371472 [4:48:45<23:38:47, 3.65it/s] 16%|█▋ | 60456/371472 [4:48:46<23:27:19, 3.68it/s] 16%|█▋ | 60457/371472 [4:48:46<23:28:03, 3.68it/s] 16%|█▋ | 60458/371472 [4:48:46<24:33:52, 3.52it/s] 16%|█▋ | 60459/371472 [4:48:47<24:13:24, 3.57it/s] 16%|█▋ | 60460/371472 [4:48:47<24:39:11, 3.50it/s] {'loss': 4.2203, 'learning_rate': 8.539238078820918e-07, 'epoch': 2.6} + 16%|█▋ | 60460/371472 [4:48:47<24:39:11, 3.50it/s] 16%|█▋ | 60461/371472 [4:48:47<24:12:04, 3.57it/s] 16%|█▋ | 60462/371472 [4:48:47<24:15:59, 3.56it/s] 16%|█▋ | 60463/371472 [4:48:48<23:54:43, 3.61it/s] 16%|█▋ | 60464/371472 [4:48:48<24:20:23, 3.55it/s] 16%|█▋ | 60465/371472 [4:48:48<24:32:54, 3.52it/s] 16%|█▋ | 60466/371472 [4:48:49<24:02:41, 3.59it/s] 16%|█▋ | 60467/371472 [4:48:49<23:24:03, 3.69it/s] 16%|█▋ | 60468/371472 [4:48:49<24:14:48, 3.56it/s] 16%|█▋ | 60469/371472 [4:48:49<24:26:22, 3.53it/s] 16%|█▋ | 60470/371472 [4:48:50<24:34:03, 3.52it/s] 16%|█▋ | 60471/371472 [4:48:50<24:26:29, 3.53it/s] 16%|█▋ | 60472/371472 [4:48:50<24:12:05, 3.57it/s] 16%|█▋ | 60473/371472 [4:48:51<24:29:11, 3.53it/s] 16%|█▋ | 60474/371472 [4:48:51<24:44:59, 3.49it/s] 16%|█▋ | 60475/371472 [4:48:51<26:09:26, 3.30it/s] 16%|█▋ | 60476/371472 [4:48:52<36:24:33, 2.37it/s] 16%|█▋ | 60477/371472 [4:48:52<32:30:11, 2.66it/s] 16%|█▋ | 60478/371472 [4:48:52<30:49:57, 2.80it/s] 16%|█▋ | 60479/371472 [4:48:53<28:29:06, 3.03it/s] 16%|█▋ | 60480/371472 [4:48:53<26:56:39, 3.21it/s] {'loss': 4.241, 'learning_rate': 8.53875325906613e-07, 'epoch': 2.6} + 16%|█▋ | 60480/371472 [4:48:53<26:56:39, 3.21it/s] 16%|█▋ | 60481/371472 [4:48:53<26:03:40, 3.31it/s] 16%|█▋ | 60482/371472 [4:48:54<25:57:11, 3.33it/s] 16%|█▋ | 60483/371472 [4:48:54<25:22:51, 3.40it/s] 16%|█▋ | 60484/371472 [4:48:54<23:54:49, 3.61it/s] 16%|█▋ | 60485/371472 [4:48:54<23:21:39, 3.70it/s] 16%|█▋ | 60486/371472 [4:48:55<22:39:05, 3.81it/s] 16%|█▋ | 60487/371472 [4:48:55<22:18:48, 3.87it/s] 16%|█▋ | 60488/371472 [4:48:55<24:45:37, 3.49it/s] 16%|█▋ | 60489/371472 [4:48:55<24:06:49, 3.58it/s] 16%|█▋ | 60490/371472 [4:48:56<24:05:55, 3.58it/s] 16%|█▋ | 60491/371472 [4:48:56<23:33:38, 3.67it/s] 16%|█▋ | 60492/371472 [4:48:56<24:03:14, 3.59it/s] 16%|█▋ | 60493/371472 [4:48:57<23:50:14, 3.62it/s] 16%|█▋ | 60494/371472 [4:48:57<25:23:40, 3.40it/s] 16%|█▋ | 60495/371472 [4:48:57<25:22:14, 3.40it/s] 16%|█▋ | 60496/371472 [4:48:57<26:27:24, 3.27it/s] 16%|█▋ | 60497/371472 [4:48:58<25:20:05, 3.41it/s] 16%|█▋ | 60498/371472 [4:48:58<24:20:32, 3.55it/s] 16%|█▋ | 60499/371472 [4:48:58<24:28:46, 3.53it/s] 16%|█▋ | 60500/371472 [4:48:59<23:29:37, 3.68it/s] {'loss': 4.2997, 'learning_rate': 8.53826843931134e-07, 'epoch': 2.61} + 16%|█▋ | 60500/371472 [4:48:59<23:29:37, 3.68it/s] 16%|█▋ | 60501/371472 [4:48:59<23:55:54, 3.61it/s] 16%|█▋ | 60502/371472 [4:48:59<26:55:17, 3.21it/s] 16%|█▋ | 60503/371472 [4:48:59<25:30:04, 3.39it/s] 16%|█▋ | 60504/371472 [4:49:00<25:44:44, 3.36it/s] 16%|█▋ | 60505/371472 [4:49:00<25:04:56, 3.44it/s] 16%|█▋ | 60506/371472 [4:49:00<25:04:01, 3.45it/s] 16%|█▋ | 60507/371472 [4:49:01<24:26:23, 3.53it/s] 16%|█▋ | 60508/371472 [4:49:01<23:20:23, 3.70it/s] 16%|█▋ | 60509/371472 [4:49:01<23:24:28, 3.69it/s] 16%|█▋ | 60510/371472 [4:49:01<24:25:01, 3.54it/s] 16%|█▋ | 60511/371472 [4:49:02<24:59:41, 3.46it/s] 16%|█▋ | 60512/371472 [4:49:02<24:21:45, 3.55it/s] 16%|█▋ | 60513/371472 [4:49:02<23:27:01, 3.68it/s] 16%|█▋ | 60514/371472 [4:49:03<24:29:35, 3.53it/s] 16%|█▋ | 60515/371472 [4:49:03<24:18:43, 3.55it/s] 16%|█▋ | 60516/371472 [4:49:03<23:48:50, 3.63it/s] 16%|█▋ | 60517/371472 [4:49:03<24:40:08, 3.50it/s] 16%|█▋ | 60518/371472 [4:49:04<24:02:23, 3.59it/s] 16%|█▋ | 60519/371472 [4:49:04<24:08:55, 3.58it/s] 16%|█▋ | 60520/371472 [4:49:04<24:17:33, 3.56it/s] {'loss': 4.0746, 'learning_rate': 8.537783619556551e-07, 'epoch': 2.61} + 16%|█▋ | 60520/371472 [4:49:04<24:17:33, 3.56it/s] 16%|█▋ | 60521/371472 [4:49:04<23:19:24, 3.70it/s] 16%|█▋ | 60522/371472 [4:49:05<25:01:57, 3.45it/s] 16%|█▋ | 60523/371472 [4:49:05<25:07:08, 3.44it/s] 16%|█▋ | 60524/371472 [4:49:05<26:32:27, 3.25it/s] 16%|█▋ | 60525/371472 [4:49:06<25:28:13, 3.39it/s] 16%|█▋ | 60526/371472 [4:49:06<24:31:26, 3.52it/s] 16%|█▋ | 60527/371472 [4:49:06<24:21:53, 3.54it/s] 16%|█▋ | 60528/371472 [4:49:07<24:26:39, 3.53it/s] 16%|█▋ | 60529/371472 [4:49:07<25:02:03, 3.45it/s] 16%|█▋ | 60530/371472 [4:49:07<26:11:14, 3.30it/s] 16%|█▋ | 60531/371472 [4:49:07<25:26:19, 3.40it/s] 16%|█▋ | 60532/371472 [4:49:08<25:12:03, 3.43it/s] 16%|█▋ | 60533/371472 [4:49:08<25:33:06, 3.38it/s] 16%|█▋ | 60534/371472 [4:49:08<25:44:16, 3.36it/s] 16%|█▋ | 60535/371472 [4:49:09<24:39:53, 3.50it/s] 16%|█▋ | 60536/371472 [4:49:09<25:20:33, 3.41it/s] 16%|█▋ | 60537/371472 [4:49:09<27:02:45, 3.19it/s] 16%|█▋ | 60538/371472 [4:49:10<25:32:29, 3.38it/s] 16%|█▋ | 60539/371472 [4:49:10<24:49:05, 3.48it/s] 16%|█▋ | 60540/371472 [4:49:10<23:41:20, 3.65it/s] {'loss': 4.1195, 'learning_rate': 8.537298799801763e-07, 'epoch': 2.61} + 16%|█▋ | 60540/371472 [4:49:10<23:41:20, 3.65it/s] 16%|█▋ | 60541/371472 [4:49:10<24:57:22, 3.46it/s] 16%|█▋ | 60542/371472 [4:49:11<29:05:41, 2.97it/s] 16%|█▋ | 60543/371472 [4:49:11<29:18:44, 2.95it/s] 16%|█▋ | 60544/371472 [4:49:11<26:55:37, 3.21it/s] 16%|█▋ | 60545/371472 [4:49:12<25:37:40, 3.37it/s] 16%|█▋ | 60546/371472 [4:49:12<24:45:01, 3.49it/s] 16%|█▋ | 60547/371472 [4:49:12<23:29:05, 3.68it/s] 16%|█▋ | 60548/371472 [4:49:12<23:24:09, 3.69it/s] 16%|█▋ | 60549/371472 [4:49:13<24:12:05, 3.57it/s] 16%|█▋ | 60550/371472 [4:49:13<23:18:06, 3.71it/s] 16%|█▋ | 60551/371472 [4:49:13<25:04:15, 3.44it/s] 16%|█▋ | 60552/371472 [4:49:14<25:41:06, 3.36it/s] 16%|█▋ | 60553/371472 [4:49:14<24:24:42, 3.54it/s] 16%|█▋ | 60554/371472 [4:49:14<24:20:17, 3.55it/s] 16%|█▋ | 60555/371472 [4:49:14<24:28:53, 3.53it/s] 16%|█▋ | 60556/371472 [4:49:15<26:00:54, 3.32it/s] 16%|█▋ | 60557/371472 [4:49:15<25:39:50, 3.37it/s] 16%|█▋ | 60558/371472 [4:49:15<24:22:26, 3.54it/s] 16%|█▋ | 60559/371472 [4:49:16<24:32:20, 3.52it/s] 16%|█▋ | 60560/371472 [4:49:16<23:44:26, 3.64it/s] {'loss': 4.2102, 'learning_rate': 8.536813980046974e-07, 'epoch': 2.61} + 16%|█▋ | 60560/371472 [4:49:16<23:44:26, 3.64it/s] 16%|█▋ | 60561/371472 [4:49:16<26:16:45, 3.29it/s] 16%|█▋ | 60562/371472 [4:49:17<26:30:25, 3.26it/s] 16%|█▋ | 60563/371472 [4:49:17<27:28:21, 3.14it/s] 16%|█▋ | 60564/371472 [4:49:17<25:28:31, 3.39it/s] 16%|█▋ | 60565/371472 [4:49:17<26:58:00, 3.20it/s] 16%|█▋ | 60566/371472 [4:49:18<25:39:44, 3.37it/s] 16%|█▋ | 60567/371472 [4:49:18<24:50:57, 3.48it/s] 16%|█▋ | 60568/371472 [4:49:18<24:37:59, 3.51it/s] 16%|█▋ | 60569/371472 [4:49:19<23:24:31, 3.69it/s] 16%|█▋ | 60570/371472 [4:49:19<23:57:47, 3.60it/s] 16%|█▋ | 60571/371472 [4:49:19<25:09:42, 3.43it/s] 16%|█▋ | 60572/371472 [4:49:19<24:54:20, 3.47it/s] 16%|█▋ | 60573/371472 [4:49:20<24:56:08, 3.46it/s] 16%|█▋ | 60574/371472 [4:49:20<24:32:49, 3.52it/s] 16%|█▋ | 60575/371472 [4:49:20<23:56:42, 3.61it/s] 16%|█▋ | 60576/371472 [4:49:21<24:49:39, 3.48it/s] 16%|█▋ | 60577/371472 [4:49:21<25:07:09, 3.44it/s] 16%|█▋ | 60578/371472 [4:49:21<26:08:23, 3.30it/s] 16%|█▋ | 60579/371472 [4:49:21<25:15:44, 3.42it/s] 16%|█▋ | 60580/371472 [4:49:22<24:49:33, 3.48it/s] {'loss': 4.1293, 'learning_rate': 8.536329160292183e-07, 'epoch': 2.61} + 16%|█▋ | 60580/371472 [4:49:22<24:49:33, 3.48it/s] 16%|█▋ | 60581/371472 [4:49:22<24:19:33, 3.55it/s] 16%|█▋ | 60582/371472 [4:49:22<23:31:15, 3.67it/s] 16%|█▋ | 60583/371472 [4:49:23<22:52:16, 3.78it/s] 16%|█▋ | 60584/371472 [4:49:23<26:12:15, 3.30it/s] 16%|█▋ | 60585/371472 [4:49:23<25:33:59, 3.38it/s] 16%|█▋ | 60586/371472 [4:49:23<25:31:11, 3.38it/s] 16%|█▋ | 60587/371472 [4:49:24<24:21:12, 3.55it/s] 16%|█▋ | 60588/371472 [4:49:24<24:10:35, 3.57it/s] 16%|█▋ | 60589/371472 [4:49:24<25:54:46, 3.33it/s] 16%|█▋ | 60590/371472 [4:49:25<25:25:41, 3.40it/s] 16%|█▋ | 60591/371472 [4:49:25<24:13:46, 3.56it/s] 16%|█▋ | 60592/371472 [4:49:25<24:50:47, 3.48it/s] 16%|█▋ | 60593/371472 [4:49:25<25:24:56, 3.40it/s] 16%|█▋ | 60594/371472 [4:49:26<25:08:05, 3.44it/s] 16%|█▋ | 60595/371472 [4:49:26<26:35:25, 3.25it/s] 16%|█▋ | 60596/371472 [4:49:26<26:11:38, 3.30it/s] 16%|█▋ | 60597/371472 [4:49:27<24:54:07, 3.47it/s] 16%|█▋ | 60598/371472 [4:49:27<24:16:32, 3.56it/s] 16%|█▋ | 60599/371472 [4:49:27<23:14:18, 3.72it/s] 16%|█▋ | 60600/371472 [4:49:27<23:10:26, 3.73it/s] {'loss': 4.1909, 'learning_rate': 8.535844340537395e-07, 'epoch': 2.61} + 16%|█▋ | 60600/371472 [4:49:27<23:10:26, 3.73it/s] 16%|█▋ | 60601/371472 [4:49:28<22:47:20, 3.79it/s] 16%|█▋ | 60602/371472 [4:49:28<24:28:27, 3.53it/s] 16%|█▋ | 60603/371472 [4:49:28<27:09:47, 3.18it/s] 16%|█▋ | 60604/371472 [4:49:29<26:50:18, 3.22it/s] 16%|█▋ | 60605/371472 [4:49:29<27:15:23, 3.17it/s] 16%|█▋ | 60606/371472 [4:49:29<25:42:45, 3.36it/s] 16%|█▋ | 60607/371472 [4:49:30<24:27:48, 3.53it/s] 16%|█▋ | 60608/371472 [4:49:30<25:10:28, 3.43it/s] 16%|█▋ | 60609/371472 [4:49:30<26:59:09, 3.20it/s] 16%|█▋ | 60610/371472 [4:49:31<26:13:14, 3.29it/s] 16%|█▋ | 60611/371472 [4:49:31<25:26:37, 3.39it/s] 16%|█▋ | 60612/371472 [4:49:31<27:11:23, 3.18it/s] 16%|█▋ | 60613/371472 [4:49:31<25:31:34, 3.38it/s] 16%|█▋ | 60614/371472 [4:49:32<24:33:38, 3.52it/s] 16%|█▋ | 60615/371472 [4:49:32<26:21:04, 3.28it/s] 16%|█▋ | 60616/371472 [4:49:32<25:57:33, 3.33it/s] 16%|█▋ | 60617/371472 [4:49:33<24:52:29, 3.47it/s] 16%|█▋ | 60618/371472 [4:49:33<25:34:39, 3.38it/s] 16%|█▋ | 60619/371472 [4:49:33<26:45:50, 3.23it/s] 16%|█▋ | 60620/371472 [4:49:34<26:49:54, 3.22it/s] {'loss': 4.0423, 'learning_rate': 8.535359520782607e-07, 'epoch': 2.61} + 16%|█▋ | 60620/371472 [4:49:34<26:49:54, 3.22it/s] 16%|█▋ | 60621/371472 [4:49:34<26:39:06, 3.24it/s] 16%|█▋ | 60622/371472 [4:49:34<24:58:21, 3.46it/s] 16%|█▋ | 60623/371472 [4:49:34<26:54:28, 3.21it/s] 16%|█▋ | 60624/371472 [4:49:35<27:49:08, 3.10it/s] 16%|█▋ | 60625/371472 [4:49:35<26:47:16, 3.22it/s] 16%|█▋ | 60626/371472 [4:49:35<27:19:15, 3.16it/s] 16%|█▋ | 60627/371472 [4:49:36<26:51:47, 3.21it/s] 16%|█▋ | 60628/371472 [4:49:36<27:19:54, 3.16it/s] 16%|█▋ | 60629/371472 [4:49:36<27:38:00, 3.12it/s] 16%|█▋ | 60630/371472 [4:49:37<26:46:30, 3.22it/s] 16%|█▋ | 60631/371472 [4:49:37<26:31:56, 3.25it/s] 16%|█▋ | 60632/371472 [4:49:37<25:23:01, 3.40it/s] 16%|█▋ | 60633/371472 [4:49:38<26:50:41, 3.22it/s] 16%|█▋ | 60634/371472 [4:49:38<26:32:17, 3.25it/s] 16%|█▋ | 60635/371472 [4:49:38<26:06:27, 3.31it/s] 16%|█▋ | 60636/371472 [4:49:38<25:42:15, 3.36it/s] 16%|█▋ | 60637/371472 [4:49:39<24:52:04, 3.47it/s] 16%|█▋ | 60638/371472 [4:49:39<24:53:44, 3.47it/s] 16%|█▋ | 60639/371472 [4:49:39<24:19:59, 3.55it/s] 16%|█▋ | 60640/371472 [4:49:40<23:43:17, 3.64it/s] {'loss': 4.2171, 'learning_rate': 8.534874701027818e-07, 'epoch': 2.61} + 16%|█▋ | 60640/371472 [4:49:40<23:43:17, 3.64it/s] 16%|█▋ | 60641/371472 [4:49:40<23:05:08, 3.74it/s] 16%|█▋ | 60642/371472 [4:49:40<23:59:55, 3.60it/s] 16%|█▋ | 60643/371472 [4:49:40<23:50:24, 3.62it/s] 16%|█▋ | 60644/371472 [4:49:41<24:43:59, 3.49it/s] 16%|█▋ | 60645/371472 [4:49:41<24:29:07, 3.53it/s] 16%|█▋ | 60646/371472 [4:49:41<24:41:39, 3.50it/s] 16%|█▋ | 60647/371472 [4:49:41<24:07:41, 3.58it/s] 16%|█▋ | 60648/371472 [4:49:42<23:59:18, 3.60it/s] 16%|█▋ | 60649/371472 [4:49:42<25:09:48, 3.43it/s] 16%|█▋ | 60650/371472 [4:49:42<26:16:49, 3.29it/s] 16%|█▋ | 60651/371472 [4:49:43<25:19:28, 3.41it/s] 16%|█▋ | 60652/371472 [4:49:43<24:43:02, 3.49it/s] 16%|█▋ | 60653/371472 [4:49:43<26:19:53, 3.28it/s] 16%|█▋ | 60654/371472 [4:49:44<25:47:07, 3.35it/s] 16%|█▋ | 60655/371472 [4:49:44<26:04:56, 3.31it/s] 16%|█▋ | 60656/371472 [4:49:44<24:35:01, 3.51it/s] 16%|█▋ | 60657/371472 [4:49:44<24:24:53, 3.54it/s] 16%|█▋ | 60658/371472 [4:49:45<25:04:17, 3.44it/s] 16%|█▋ | 60659/371472 [4:49:45<24:02:33, 3.59it/s] 16%|█▋ | 60660/371472 [4:49:45<23:49:19, 3.62it/s] {'loss': 3.9872, 'learning_rate': 8.534389881273028e-07, 'epoch': 2.61} + 16%|█▋ | 60660/371472 [4:49:45<23:49:19, 3.62it/s] 16%|█▋ | 60661/371472 [4:49:46<23:49:15, 3.62it/s] 16%|█▋ | 60662/371472 [4:49:46<23:52:54, 3.62it/s] 16%|█▋ | 60663/371472 [4:49:46<23:30:06, 3.67it/s] 16%|█▋ | 60664/371472 [4:49:46<25:08:20, 3.43it/s] 16%|█▋ | 60665/371472 [4:49:47<24:21:30, 3.54it/s] 16%|█▋ | 60666/371472 [4:49:47<23:58:37, 3.60it/s] 16%|█▋ | 60667/371472 [4:49:47<23:58:21, 3.60it/s] 16%|█▋ | 60668/371472 [4:49:48<25:03:44, 3.44it/s] 16%|█▋ | 60669/371472 [4:49:48<23:54:58, 3.61it/s] 16%|█▋ | 60670/371472 [4:49:48<23:46:03, 3.63it/s] 16%|█▋ | 60671/371472 [4:49:48<23:28:45, 3.68it/s] 16%|█▋ | 60672/371472 [4:49:49<24:41:01, 3.50it/s] 16%|█▋ | 60673/371472 [4:49:49<23:35:12, 3.66it/s] 16%|█▋ | 60674/371472 [4:49:49<24:12:16, 3.57it/s] 16%|█▋ | 60675/371472 [4:49:49<24:16:46, 3.56it/s] 16%|█▋ | 60676/371472 [4:49:50<24:05:24, 3.58it/s] 16%|█▋ | 60677/371472 [4:49:50<23:10:09, 3.73it/s] 16%|█▋ | 60678/371472 [4:49:50<23:20:20, 3.70it/s] 16%|█▋ | 60679/371472 [4:49:51<26:54:09, 3.21it/s] 16%|█▋ | 60680/371472 [4:49:51<25:07:13, 3.44it/s] {'loss': 4.1679, 'learning_rate': 8.53390506151824e-07, 'epoch': 2.61} + 16%|█▋ | 60680/371472 [4:49:51<25:07:13, 3.44it/s] 16%|█▋ | 60681/371472 [4:49:51<24:16:48, 3.56it/s] 16%|█▋ | 60682/371472 [4:49:51<23:54:02, 3.61it/s] 16%|█▋ | 60683/371472 [4:49:52<22:56:11, 3.76it/s] 16%|█▋ | 60684/371472 [4:49:52<23:02:58, 3.75it/s] 16%|█▋ | 60685/371472 [4:49:52<23:03:51, 3.74it/s] 16%|█▋ | 60686/371472 [4:49:53<24:36:50, 3.51it/s] 16%|█▋ | 60687/371472 [4:49:53<24:44:53, 3.49it/s] 16%|█▋ | 60688/371472 [4:49:53<23:35:55, 3.66it/s] 16%|█▋ | 60689/371472 [4:49:53<23:19:09, 3.70it/s] 16%|█▋ | 60690/371472 [4:49:54<23:35:29, 3.66it/s] 16%|█▋ | 60691/371472 [4:49:54<24:05:51, 3.58it/s] 16%|█▋ | 60692/371472 [4:49:54<23:29:27, 3.67it/s] 16%|█▋ | 60693/371472 [4:49:54<23:06:35, 3.74it/s] 16%|█▋ | 60694/371472 [4:49:55<24:08:17, 3.58it/s] 16%|█▋ | 60695/371472 [4:49:55<24:17:28, 3.55it/s] 16%|█▋ | 60696/371472 [4:49:55<23:44:03, 3.64it/s] 16%|█▋ | 60697/371472 [4:49:55<23:02:18, 3.75it/s] 16%|█▋ | 60698/371472 [4:49:56<22:56:03, 3.76it/s] 16%|█▋ | 60699/371472 [4:49:56<22:13:24, 3.88it/s] 16%|█▋ | 60700/371472 [4:49:56<22:45:03, 3.79it/s] {'loss': 4.2011, 'learning_rate': 8.53342024176345e-07, 'epoch': 2.61} + 16%|█▋ | 60700/371472 [4:49:56<22:45:03, 3.79it/s] 16%|█▋ | 60701/371472 [4:49:57<22:48:17, 3.79it/s] 16%|█▋ | 60702/371472 [4:49:57<22:35:40, 3.82it/s] 16%|█▋ | 60703/371472 [4:49:57<24:00:54, 3.59it/s] 16%|█▋ | 60704/371472 [4:49:57<23:55:22, 3.61it/s] 16%|█▋ | 60705/371472 [4:49:58<24:15:25, 3.56it/s] 16%|█▋ | 60706/371472 [4:49:58<23:51:23, 3.62it/s] 16%|█▋ | 60707/371472 [4:49:58<24:24:01, 3.54it/s] 16%|█▋ | 60708/371472 [4:49:58<23:24:55, 3.69it/s] 16%|█▋ | 60709/371472 [4:49:59<25:06:20, 3.44it/s] 16%|█▋ | 60710/371472 [4:49:59<27:24:29, 3.15it/s] 16%|█▋ | 60711/371472 [4:49:59<26:08:09, 3.30it/s] 16%|█▋ | 60712/371472 [4:50:00<26:44:44, 3.23it/s] 16%|█▋ | 60713/371472 [4:50:00<26:06:47, 3.31it/s] 16%|█▋ | 60714/371472 [4:50:00<25:45:22, 3.35it/s] 16%|█▋ | 60715/371472 [4:50:01<25:00:31, 3.45it/s] 16%|█▋ | 60716/371472 [4:50:01<25:29:29, 3.39it/s] 16%|█▋ | 60717/371472 [4:50:01<24:15:55, 3.56it/s] 16%|█▋ | 60718/371472 [4:50:01<24:17:00, 3.55it/s] 16%|█▋ | 60719/371472 [4:50:02<23:34:38, 3.66it/s] 16%|█▋ | 60720/371472 [4:50:02<23:38:55, 3.65it/s] {'loss': 4.1579, 'learning_rate': 8.532935422008662e-07, 'epoch': 2.62} + 16%|█▋ | 60720/371472 [4:50:02<23:38:55, 3.65it/s] 16%|█▋ | 60721/371472 [4:50:02<22:58:34, 3.76it/s] 16%|█▋ | 60722/371472 [4:50:03<23:13:16, 3.72it/s] 16%|█▋ | 60723/371472 [4:50:03<23:59:51, 3.60it/s] 16%|█▋ | 60724/371472 [4:50:03<23:12:30, 3.72it/s] 16%|█▋ | 60725/371472 [4:50:03<23:16:47, 3.71it/s] 16%|█▋ | 60726/371472 [4:50:04<22:52:33, 3.77it/s] 16%|█▋ | 60727/371472 [4:50:04<23:41:40, 3.64it/s] 16%|█▋ | 60728/371472 [4:50:04<23:33:30, 3.66it/s] 16%|█▋ | 60729/371472 [4:50:04<23:20:27, 3.70it/s] 16%|█▋ | 60730/371472 [4:50:05<26:05:27, 3.31it/s] 16%|█▋ | 60731/371472 [4:50:05<24:53:57, 3.47it/s] 16%|█▋ | 60732/371472 [4:50:05<23:47:38, 3.63it/s] 16%|█▋ | 60733/371472 [4:50:06<23:56:29, 3.61it/s] 16%|█▋ | 60734/371472 [4:50:06<23:08:39, 3.73it/s] 16%|█▋ | 60735/371472 [4:50:06<22:52:06, 3.77it/s] 16%|█▋ | 60736/371472 [4:50:06<22:43:36, 3.80it/s] 16%|█▋ | 60737/371472 [4:50:07<22:08:41, 3.90it/s] 16%|█▋ | 60738/371472 [4:50:07<21:45:20, 3.97it/s] 16%|█▋ | 60739/371472 [4:50:07<23:03:02, 3.74it/s] 16%|█▋ | 60740/371472 [4:50:07<22:35:32, 3.82it/s] {'loss': 4.3445, 'learning_rate': 8.532450602253872e-07, 'epoch': 2.62} + 16%|█▋ | 60740/371472 [4:50:07<22:35:32, 3.82it/s] 16%|█▋ | 60741/371472 [4:50:08<23:07:37, 3.73it/s] 16%|█▋ | 60742/371472 [4:50:08<23:36:58, 3.65it/s] 16%|█▋ | 60743/371472 [4:50:08<26:28:18, 3.26it/s] 16%|█▋ | 60744/371472 [4:50:09<25:46:40, 3.35it/s] 16%|█▋ | 60745/371472 [4:50:09<25:37:46, 3.37it/s] 16%|█▋ | 60746/371472 [4:50:09<26:01:16, 3.32it/s] 16%|█▋ | 60747/371472 [4:50:10<26:47:49, 3.22it/s] 16%|█▋ | 60748/371472 [4:50:10<25:39:52, 3.36it/s] 16%|█▋ | 60749/371472 [4:50:10<30:49:23, 2.80it/s] 16%|█▋ | 60750/371472 [4:50:11<28:26:16, 3.04it/s] 16%|█▋ | 60751/371472 [4:50:11<27:08:19, 3.18it/s] 16%|█▋ | 60752/371472 [4:50:11<26:07:24, 3.30it/s] 16%|█▋ | 60753/371472 [4:50:11<26:05:34, 3.31it/s] 16%|█▋ | 60754/371472 [4:50:12<26:21:57, 3.27it/s] 16%|█▋ | 60755/371472 [4:50:12<25:05:14, 3.44it/s] 16%|█▋ | 60756/371472 [4:50:12<24:48:38, 3.48it/s] 16%|█▋ | 60757/371472 [4:50:13<25:16:56, 3.41it/s] 16%|█▋ | 60758/371472 [4:50:13<24:57:25, 3.46it/s] 16%|█▋ | 60759/371472 [4:50:13<24:51:48, 3.47it/s] 16%|█▋ | 60760/371472 [4:50:13<25:08:51, 3.43it/s] {'loss': 4.2757, 'learning_rate': 8.531965782499084e-07, 'epoch': 2.62} + 16%|█▋ | 60760/371472 [4:50:13<25:08:51, 3.43it/s] 16%|█▋ | 60761/371472 [4:50:14<25:31:25, 3.38it/s] 16%|█▋ | 60762/371472 [4:50:14<24:13:53, 3.56it/s] 16%|█▋ | 60763/371472 [4:50:14<23:03:17, 3.74it/s] 16%|█▋ | 60764/371472 [4:50:15<22:43:44, 3.80it/s] 16%|█▋ | 60765/371472 [4:50:15<22:22:45, 3.86it/s] 16%|█▋ | 60766/371472 [4:50:15<22:51:20, 3.78it/s] 16%|█▋ | 60767/371472 [4:50:15<22:50:00, 3.78it/s] 16%|█▋ | 60768/371472 [4:50:16<22:51:31, 3.78it/s] 16%|█▋ | 60769/371472 [4:50:16<24:05:19, 3.58it/s] 16%|█▋ | 60770/371472 [4:50:16<25:21:36, 3.40it/s] 16%|█▋ | 60771/371472 [4:50:17<26:45:10, 3.23it/s] 16%|█▋ | 60772/371472 [4:50:17<25:29:40, 3.39it/s] 16%|█▋ | 60773/371472 [4:50:17<24:41:50, 3.49it/s] 16%|█▋ | 60774/371472 [4:50:17<25:17:27, 3.41it/s] 16%|█▋ | 60775/371472 [4:50:18<23:52:26, 3.61it/s] 16%|█▋ | 60776/371472 [4:50:18<23:59:10, 3.60it/s] 16%|█▋ | 60777/371472 [4:50:18<23:43:38, 3.64it/s] 16%|█▋ | 60778/371472 [4:50:18<24:15:44, 3.56it/s] 16%|█▋ | 60779/371472 [4:50:19<23:27:51, 3.68it/s] 16%|█▋ | 60780/371472 [4:50:19<23:52:19, 3.62it/s] {'loss': 4.1861, 'learning_rate': 8.531480962744295e-07, 'epoch': 2.62} + 16%|█▋ | 60780/371472 [4:50:19<23:52:19, 3.62it/s] 16%|█▋ | 60781/371472 [4:50:19<22:53:38, 3.77it/s] 16%|█▋ | 60782/371472 [4:50:20<24:41:22, 3.50it/s] 16%|█▋ | 60783/371472 [4:50:20<26:29:51, 3.26it/s] 16%|█▋ | 60784/371472 [4:50:20<25:54:59, 3.33it/s] 16%|█▋ | 60785/371472 [4:50:21<25:53:12, 3.33it/s] 16%|█▋ | 60786/371472 [4:50:21<24:14:14, 3.56it/s] 16%|█▋ | 60787/371472 [4:50:21<25:17:38, 3.41it/s] 16%|█▋ | 60788/371472 [4:50:21<25:03:28, 3.44it/s] 16%|█▋ | 60789/371472 [4:50:22<23:54:24, 3.61it/s] 16%|█▋ | 60790/371472 [4:50:22<24:33:15, 3.51it/s] 16%|█▋ | 60791/371472 [4:50:22<24:30:49, 3.52it/s] 16%|█▋ | 60792/371472 [4:50:22<24:54:20, 3.47it/s] 16%|█▋ | 60793/371472 [4:50:23<23:59:06, 3.60it/s] 16%|█▋ | 60794/371472 [4:50:23<22:55:45, 3.76it/s] 16%|█▋ | 60795/371472 [4:50:23<23:57:15, 3.60it/s] 16%|█▋ | 60796/371472 [4:50:24<23:03:08, 3.74it/s] 16%|█▋ | 60797/371472 [4:50:24<25:36:50, 3.37it/s] 16%|█▋ | 60798/371472 [4:50:24<24:59:19, 3.45it/s] 16%|█▋ | 60799/371472 [4:50:24<24:12:07, 3.57it/s] 16%|█▋ | 60800/371472 [4:50:25<24:39:20, 3.50it/s] {'loss': 4.3216, 'learning_rate': 8.530996142989507e-07, 'epoch': 2.62} + 16%|█▋ | 60800/371472 [4:50:25<24:39:20, 3.50it/s] 16%|█▋ | 60801/371472 [4:50:25<24:09:17, 3.57it/s] 16%|█▋ | 60802/371472 [4:50:25<24:13:10, 3.56it/s] 16%|█▋ | 60803/371472 [4:50:26<25:55:13, 3.33it/s] 16%|█▋ | 60804/371472 [4:50:26<24:09:25, 3.57it/s] 16%|█▋ | 60805/371472 [4:50:26<23:41:26, 3.64it/s] 16%|█▋ | 60806/371472 [4:50:26<23:11:48, 3.72it/s] 16%|█▋ | 60807/371472 [4:50:27<22:59:10, 3.75it/s] 16%|█▋ | 60808/371472 [4:50:27<23:07:26, 3.73it/s] 16%|█▋ | 60809/371472 [4:50:27<23:02:44, 3.74it/s] 16%|█▋ | 60810/371472 [4:50:27<22:59:13, 3.75it/s] 16%|█▋ | 60811/371472 [4:50:28<22:42:13, 3.80it/s] 16%|█▋ | 60812/371472 [4:50:28<22:22:22, 3.86it/s] 16%|█▋ | 60813/371472 [4:50:28<22:41:26, 3.80it/s] 16%|█▋ | 60814/371472 [4:50:28<22:34:18, 3.82it/s] 16%|█▋ | 60815/371472 [4:50:29<22:13:31, 3.88it/s] 16%|█▋ | 60816/371472 [4:50:29<25:25:14, 3.39it/s] 16%|█▋ | 60817/371472 [4:50:29<24:26:37, 3.53it/s] 16%|█▋ | 60818/371472 [4:50:30<24:09:27, 3.57it/s] 16%|█▋ | 60819/371472 [4:50:30<23:13:52, 3.71it/s] 16%|█▋ | 60820/371472 [4:50:30<23:09:33, 3.73it/s] {'loss': 4.3104, 'learning_rate': 8.530511323234717e-07, 'epoch': 2.62} + 16%|█▋ | 60820/371472 [4:50:30<23:09:33, 3.73it/s] 16%|█▋ | 60821/371472 [4:50:30<22:56:00, 3.76it/s] 16%|█▋ | 60822/371472 [4:50:31<22:59:36, 3.75it/s] 16%|█▋ | 60823/371472 [4:50:31<22:36:11, 3.82it/s] 16%|█▋ | 60824/371472 [4:50:31<23:43:08, 3.64it/s] 16%|█▋ | 60825/371472 [4:50:31<23:11:37, 3.72it/s] 16%|█▋ | 60826/371472 [4:50:32<23:20:21, 3.70it/s] 16%|█▋ | 60827/371472 [4:50:32<24:25:21, 3.53it/s] 16%|█▋ | 60828/371472 [4:50:32<23:18:56, 3.70it/s] 16%|█▋ | 60829/371472 [4:50:33<25:40:52, 3.36it/s] 16%|█▋ | 60830/371472 [4:50:33<26:17:20, 3.28it/s] 16%|█▋ | 60831/371472 [4:50:33<25:01:37, 3.45it/s] 16%|█▋ | 60832/371472 [4:50:34<24:23:20, 3.54it/s] 16%|█▋ | 60833/371472 [4:50:34<23:59:32, 3.60it/s] 16%|█▋ | 60834/371472 [4:50:34<24:26:55, 3.53it/s] 16%|█▋ | 60835/371472 [4:50:34<25:07:58, 3.43it/s] 16%|█▋ | 60836/371472 [4:50:35<24:07:31, 3.58it/s] 16%|█▋ | 60837/371472 [4:50:35<24:32:26, 3.52it/s] 16%|█▋ | 60838/371472 [4:50:35<24:38:46, 3.50it/s] 16%|█▋ | 60839/371472 [4:50:35<23:31:35, 3.67it/s] 16%|█▋ | 60840/371472 [4:50:36<22:55:08, 3.76it/s] {'loss': 4.31, 'learning_rate': 8.530026503479928e-07, 'epoch': 2.62} + 16%|█▋ | 60840/371472 [4:50:36<22:55:08, 3.76it/s] 16%|█▋ | 60841/371472 [4:50:36<22:57:47, 3.76it/s] 16%|█▋ | 60842/371472 [4:50:36<23:10:59, 3.72it/s] 16%|█▋ | 60843/371472 [4:50:36<22:33:01, 3.83it/s] 16%|█▋ | 60844/371472 [4:50:37<22:48:36, 3.78it/s] 16%|█▋ | 60845/371472 [4:50:37<22:33:43, 3.82it/s] 16%|█▋ | 60846/371472 [4:50:37<22:28:42, 3.84it/s] 16%|█▋ | 60847/371472 [4:50:38<22:08:45, 3.90it/s] 16%|█▋ | 60848/371472 [4:50:38<22:57:51, 3.76it/s] 16%|█▋ | 60849/371472 [4:50:38<23:29:22, 3.67it/s] 16%|█▋ | 60850/371472 [4:50:38<23:20:31, 3.70it/s] 16%|█▋ | 60851/371472 [4:50:39<24:17:35, 3.55it/s] 16%|█▋ | 60852/371472 [4:50:39<23:37:44, 3.65it/s] 16%|█▋ | 60853/371472 [4:50:39<24:53:08, 3.47it/s] 16%|█▋ | 60854/371472 [4:50:40<23:53:06, 3.61it/s] 16%|█▋ | 60855/371472 [4:50:40<23:20:13, 3.70it/s] 16%|█▋ | 60856/371472 [4:50:40<23:16:33, 3.71it/s] 16%|█▋ | 60857/371472 [4:50:40<24:19:58, 3.55it/s] 16%|█▋ | 60858/371472 [4:50:41<23:41:08, 3.64it/s] 16%|█▋ | 60859/371472 [4:50:41<23:08:28, 3.73it/s] 16%|█▋ | 60860/371472 [4:50:41<24:29:13, 3.52it/s] {'loss': 4.2184, 'learning_rate': 8.529541683725139e-07, 'epoch': 2.62} + 16%|█▋ | 60860/371472 [4:50:41<24:29:13, 3.52it/s] 16%|█▋ | 60861/371472 [4:50:41<24:20:22, 3.54it/s] 16%|█▋ | 60862/371472 [4:50:42<24:14:15, 3.56it/s] 16%|█▋ | 60863/371472 [4:50:42<24:23:00, 3.54it/s] 16%|█▋ | 60864/371472 [4:50:42<24:51:05, 3.47it/s] 16%|█▋ | 60865/371472 [4:50:43<25:39:26, 3.36it/s] 16%|█▋ | 60866/371472 [4:50:43<24:43:58, 3.49it/s] 16%|█▋ | 60867/371472 [4:50:43<24:45:14, 3.49it/s] 16%|█▋ | 60868/371472 [4:50:43<23:30:57, 3.67it/s] 16%|█▋ | 60869/371472 [4:50:44<23:20:03, 3.70it/s] 16%|█▋ | 60870/371472 [4:50:44<23:58:15, 3.60it/s] 16%|█▋ | 60871/371472 [4:50:44<23:37:55, 3.65it/s] 16%|█▋ | 60872/371472 [4:50:44<22:41:17, 3.80it/s] 16%|█▋ | 60873/371472 [4:50:45<26:05:28, 3.31it/s] 16%|█▋ | 60874/371472 [4:50:45<25:08:04, 3.43it/s] 16%|█▋ | 60875/371472 [4:50:45<24:25:58, 3.53it/s] 16%|█▋ | 60876/371472 [4:50:46<24:01:15, 3.59it/s] 16%|█▋ | 60877/371472 [4:50:46<24:51:13, 3.47it/s] 16%|█▋ | 60878/371472 [4:50:46<23:52:30, 3.61it/s] 16%|█▋ | 60879/371472 [4:50:46<23:11:31, 3.72it/s] 16%|█▋ | 60880/371472 [4:50:47<23:25:51, 3.68it/s] {'loss': 4.0942, 'learning_rate': 8.52905686397035e-07, 'epoch': 2.62} + 16%|█▋ | 60880/371472 [4:50:47<23:25:51, 3.68it/s] 16%|█▋ | 60881/371472 [4:50:47<22:32:57, 3.83it/s] 16%|█▋ | 60882/371472 [4:50:47<23:21:24, 3.69it/s] 16%|█▋ | 60883/371472 [4:50:48<23:45:20, 3.63it/s] 16%|█▋ | 60884/371472 [4:50:48<23:10:14, 3.72it/s] 16%|█▋ | 60885/371472 [4:50:48<24:42:21, 3.49it/s] 16%|█▋ | 60886/371472 [4:50:48<24:31:19, 3.52it/s] 16%|█▋ | 60887/371472 [4:50:49<25:02:06, 3.45it/s] 16%|█▋ | 60888/371472 [4:50:49<24:26:28, 3.53it/s] 16%|█▋ | 60889/371472 [4:50:49<23:38:29, 3.65it/s] 16%|█▋ | 60890/371472 [4:50:50<23:41:30, 3.64it/s] 16%|█▋ | 60891/371472 [4:50:50<24:17:01, 3.55it/s] 16%|█▋ | 60892/371472 [4:50:50<24:09:08, 3.57it/s] 16%|█▋ | 60893/371472 [4:50:50<23:10:30, 3.72it/s] 16%|█▋ | 60894/371472 [4:50:51<23:12:25, 3.72it/s] 16%|█▋ | 60895/371472 [4:50:51<25:16:59, 3.41it/s] 16%|█▋ | 60896/371472 [4:50:51<23:58:19, 3.60it/s] 16%|█▋ | 60897/371472 [4:50:51<23:12:12, 3.72it/s] 16%|█▋ | 60898/371472 [4:50:52<23:23:09, 3.69it/s] 16%|█▋ | 60899/371472 [4:50:52<23:00:38, 3.75it/s] 16%|█▋ | 60900/371472 [4:50:52<23:29:40, 3.67it/s] {'loss': 4.3643, 'learning_rate': 8.52857204421556e-07, 'epoch': 2.62} + 16%|█▋ | 60900/371472 [4:50:52<23:29:40, 3.67it/s] 16%|█▋ | 60901/371472 [4:50:53<23:08:25, 3.73it/s] 16%|█▋ | 60902/371472 [4:50:53<23:13:30, 3.71it/s] 16%|█▋ | 60903/371472 [4:50:53<23:49:05, 3.62it/s] 16%|█▋ | 60904/371472 [4:50:53<23:15:01, 3.71it/s] 16%|█▋ | 60905/371472 [4:50:54<23:23:46, 3.69it/s] 16%|█▋ | 60906/371472 [4:50:54<26:16:01, 3.28it/s] 16%|█▋ | 60907/371472 [4:50:54<27:13:37, 3.17it/s] 16%|█▋ | 60908/371472 [4:50:55<26:16:50, 3.28it/s] 16%|█▋ | 60909/371472 [4:50:55<24:39:57, 3.50it/s] 16%|█▋ | 60910/371472 [4:50:55<24:49:03, 3.48it/s] 16%|█▋ | 60911/371472 [4:50:55<23:57:53, 3.60it/s] 16%|█▋ | 60912/371472 [4:50:56<22:59:21, 3.75it/s] 16%|█▋ | 60913/371472 [4:50:56<23:26:51, 3.68it/s] 16%|█▋ | 60914/371472 [4:50:56<25:04:15, 3.44it/s] 16%|█▋ | 60915/371472 [4:50:57<24:15:54, 3.56it/s] 16%|█▋ | 60916/371472 [4:50:57<23:45:57, 3.63it/s] 16%|█▋ | 60917/371472 [4:50:57<24:19:49, 3.55it/s] 16%|█▋ | 60918/371472 [4:50:57<25:54:51, 3.33it/s] 16%|█▋ | 60919/371472 [4:50:58<24:24:24, 3.53it/s] 16%|█▋ | 60920/371472 [4:50:58<23:20:36, 3.70it/s] {'loss': 3.9566, 'learning_rate': 8.528087224460773e-07, 'epoch': 2.62} + 16%|█▋ | 60920/371472 [4:50:58<23:20:36, 3.70it/s] 16%|█▋ | 60921/371472 [4:50:58<23:47:08, 3.63it/s] 16%|█▋ | 60922/371472 [4:50:59<24:25:44, 3.53it/s] 16%|█▋ | 60923/371472 [4:50:59<24:27:47, 3.53it/s] 16%|█▋ | 60924/371472 [4:50:59<23:48:18, 3.62it/s] 16%|█▋ | 60925/371472 [4:50:59<23:00:09, 3.75it/s] 16%|█▋ | 60926/371472 [4:51:00<22:47:10, 3.79it/s] 16%|█▋ | 60927/371472 [4:51:00<24:36:21, 3.51it/s] 16%|█▋ | 60928/371472 [4:51:00<24:45:20, 3.48it/s] 16%|█▋ | 60929/371472 [4:51:01<25:24:29, 3.40it/s] 16%|█▋ | 60930/371472 [4:51:01<27:19:20, 3.16it/s] 16%|█▋ | 60931/371472 [4:51:01<26:12:02, 3.29it/s] 16%|█▋ | 60932/371472 [4:51:01<27:07:37, 3.18it/s] 16%|█▋ | 60933/371472 [4:51:02<25:51:41, 3.34it/s] 16%|█▋ | 60934/371472 [4:51:02<26:14:10, 3.29it/s] 16%|█▋ | 60935/371472 [4:51:02<26:27:30, 3.26it/s] 16%|█▋ | 60936/371472 [4:51:03<25:13:31, 3.42it/s] 16%|█▋ | 60937/371472 [4:51:03<25:06:38, 3.44it/s] 16%|█▋ | 60938/371472 [4:51:03<24:28:17, 3.52it/s] 16%|█▋ | 60939/371472 [4:51:03<24:06:14, 3.58it/s] 16%|█▋ | 60940/371472 [4:51:04<23:00:12, 3.75it/s] {'loss': 4.2772, 'learning_rate': 8.527602404705984e-07, 'epoch': 2.62} + 16%|█▋ | 60940/371472 [4:51:04<23:00:12, 3.75it/s] 16%|█▋ | 60941/371472 [4:51:04<23:06:29, 3.73it/s] 16%|█▋ | 60942/371472 [4:51:04<22:47:12, 3.79it/s] 16%|█▋ | 60943/371472 [4:51:04<22:23:55, 3.85it/s] 16%|█▋ | 60944/371472 [4:51:05<21:58:01, 3.93it/s] 16%|█▋ | 60945/371472 [4:51:05<22:58:08, 3.76it/s] 16%|█▋ | 60946/371472 [4:51:05<23:51:35, 3.62it/s] 16%|█▋ | 60947/371472 [4:51:06<24:07:24, 3.58it/s] 16%|█▋ | 60948/371472 [4:51:06<23:00:17, 3.75it/s] 16%|█▋ | 60949/371472 [4:51:06<23:06:59, 3.73it/s] 16%|█▋ | 60950/371472 [4:51:06<22:57:17, 3.76it/s] 16%|█▋ | 60951/371472 [4:51:07<22:51:06, 3.77it/s] 16%|█▋ | 60952/371472 [4:51:07<23:50:28, 3.62it/s] 16%|█▋ | 60953/371472 [4:51:07<23:54:01, 3.61it/s] 16%|█▋ | 60954/371472 [4:51:07<23:45:13, 3.63it/s] 16%|█▋ | 60955/371472 [4:51:08<23:16:58, 3.70it/s] 16%|█▋ | 60956/371472 [4:51:08<24:32:17, 3.52it/s] 16%|█▋ | 60957/371472 [4:51:08<26:16:04, 3.28it/s] 16%|█▋ | 60958/371472 [4:51:09<24:38:26, 3.50it/s] 16%|█▋ | 60959/371472 [4:51:09<25:07:16, 3.43it/s] 16%|█▋ | 60960/371472 [4:51:09<27:55:06, 3.09it/s] {'loss': 4.1774, 'learning_rate': 8.527117584951193e-07, 'epoch': 2.63} + 16%|█▋ | 60960/371472 [4:51:09<27:55:06, 3.09it/s] 16%|█▋ | 60961/371472 [4:51:10<26:28:14, 3.26it/s] 16%|█▋ | 60962/371472 [4:51:10<26:11:49, 3.29it/s] 16%|█▋ | 60963/371472 [4:51:10<25:28:22, 3.39it/s] 16%|█▋ | 60964/371472 [4:51:10<24:50:30, 3.47it/s] 16%|█▋ | 60965/371472 [4:51:11<24:29:29, 3.52it/s] 16%|█▋ | 60966/371472 [4:51:11<23:15:34, 3.71it/s] 16%|█▋ | 60967/371472 [4:51:11<22:24:52, 3.85it/s] 16%|█▋ | 60968/371472 [4:51:11<22:06:39, 3.90it/s] 16%|█▋ | 60969/371472 [4:51:12<22:44:38, 3.79it/s] 16%|█▋ | 60970/371472 [4:51:12<23:13:13, 3.71it/s] 16%|█▋ | 60971/371472 [4:51:12<23:05:30, 3.74it/s] 16%|█▋ | 60972/371472 [4:51:13<22:45:05, 3.79it/s] 16%|█▋ | 60973/371472 [4:51:13<23:30:54, 3.67it/s] 16%|█▋ | 60974/371472 [4:51:13<22:49:33, 3.78it/s] 16%|█▋ | 60975/371472 [4:51:13<22:15:20, 3.88it/s] 16%|█▋ | 60976/371472 [4:51:14<23:18:18, 3.70it/s] 16%|█▋ | 60977/371472 [4:51:14<24:39:52, 3.50it/s] 16%|█▋ | 60978/371472 [4:51:14<24:42:29, 3.49it/s] 16%|█▋ | 60979/371472 [4:51:14<24:03:00, 3.59it/s] 16%|█▋ | 60980/371472 [4:51:15<23:09:12, 3.73it/s] {'loss': 4.1943, 'learning_rate': 8.526632765196405e-07, 'epoch': 2.63} + 16%|█▋ | 60980/371472 [4:51:15<23:09:12, 3.73it/s] 16%|█▋ | 60981/371472 [4:51:15<23:10:02, 3.72it/s] 16%|█▋ | 60982/371472 [4:51:15<24:24:43, 3.53it/s] 16%|█▋ | 60983/371472 [4:51:16<23:54:56, 3.61it/s] 16%|█▋ | 60984/371472 [4:51:16<22:59:38, 3.75it/s] 16%|█▋ | 60985/371472 [4:51:16<22:52:09, 3.77it/s] 16%|█▋ | 60986/371472 [4:51:16<24:38:13, 3.50it/s] 16%|█▋ | 60987/371472 [4:51:17<24:00:56, 3.59it/s] 16%|█▋ | 60988/371472 [4:51:17<23:39:47, 3.64it/s] 16%|█▋ | 60989/371472 [4:51:17<24:39:28, 3.50it/s] 16%|█▋ | 60990/371472 [4:51:18<24:13:38, 3.56it/s] 16%|█▋ | 60991/371472 [4:51:18<24:04:31, 3.58it/s] 16%|█▋ | 60992/371472 [4:51:18<23:56:21, 3.60it/s] 16%|█▋ | 60993/371472 [4:51:18<26:39:01, 3.24it/s] 16%|█▋ | 60994/371472 [4:51:19<24:42:17, 3.49it/s] 16%|█▋ | 60995/371472 [4:51:19<24:53:03, 3.47it/s] 16%|█▋ | 60996/371472 [4:51:19<26:12:18, 3.29it/s] 16%|█▋ | 60997/371472 [4:51:20<26:01:27, 3.31it/s] 16%|█▋ | 60998/371472 [4:51:20<24:55:08, 3.46it/s] 16%|█▋ | 60999/371472 [4:51:20<24:03:23, 3.59it/s] 16%|█▋ | 61000/371472 [4:51:20<23:27:28, 3.68it/s] {'loss': 4.1812, 'learning_rate': 8.526147945441618e-07, 'epoch': 2.63} + 16%|█▋ | 61000/371472 [4:51:20<23:27:28, 3.68it/s] 16%|█▋ | 61001/371472 [4:51:21<22:54:12, 3.77it/s] 16%|█▋ | 61002/371472 [4:51:21<24:21:46, 3.54it/s] 16%|█▋ | 61003/371472 [4:51:21<24:42:38, 3.49it/s] 16%|█▋ | 61004/371472 [4:51:22<25:06:36, 3.43it/s] 16%|█▋ | 61005/371472 [4:51:22<24:09:21, 3.57it/s] 16%|█▋ | 61006/371472 [4:51:22<24:42:34, 3.49it/s] 16%|█▋ | 61007/371472 [4:51:22<24:19:17, 3.55it/s] 16%|█▋ | 61008/371472 [4:51:23<23:48:14, 3.62it/s] 16%|█▋ | 61009/371472 [4:51:23<23:44:13, 3.63it/s] 16%|█▋ | 61010/371472 [4:51:23<23:07:16, 3.73it/s] 16%|█▋ | 61011/371472 [4:51:23<22:32:27, 3.83it/s] 16%|█▋ | 61012/371472 [4:51:24<22:17:39, 3.87it/s] 16%|█▋ | 61013/371472 [4:51:24<23:02:55, 3.74it/s] 16%|█▋ | 61014/371472 [4:51:24<23:11:55, 3.72it/s] 16%|█▋ | 61015/371472 [4:51:24<22:32:14, 3.83it/s] 16%|█▋ | 61016/371472 [4:51:25<22:57:26, 3.76it/s] 16%|█▋ | 61017/371472 [4:51:25<22:43:23, 3.80it/s] 16%|█▋ | 61018/371472 [4:51:25<22:15:39, 3.87it/s] 16%|█▋ | 61019/371472 [4:51:26<21:53:06, 3.94it/s] 16%|█▋ | 61020/371472 [4:51:26<23:39:54, 3.64it/s] {'loss': 4.3691, 'learning_rate': 8.525663125686828e-07, 'epoch': 2.63} + 16%|█▋ | 61020/371472 [4:51:26<23:39:54, 3.64it/s] 16%|█▋ | 61021/371472 [4:51:26<23:57:29, 3.60it/s] 16%|█▋ | 61022/371472 [4:51:26<23:25:47, 3.68it/s] 16%|█▋ | 61023/371472 [4:51:27<23:24:49, 3.68it/s] 16%|█▋ | 61024/371472 [4:51:27<23:17:17, 3.70it/s] 16%|█▋ | 61025/371472 [4:51:27<23:26:16, 3.68it/s] 16%|█▋ | 61026/371472 [4:51:28<26:09:28, 3.30it/s] 16%|█▋ | 61027/371472 [4:51:28<24:48:21, 3.48it/s] 16%|█▋ | 61028/371472 [4:51:28<24:12:09, 3.56it/s] 16%|█▋ | 61029/371472 [4:51:28<24:44:15, 3.49it/s] 16%|█▋ | 61030/371472 [4:51:29<24:21:05, 3.54it/s] 16%|█▋ | 61031/371472 [4:51:29<24:40:56, 3.49it/s] 16%|█▋ | 61032/371472 [4:51:29<24:22:37, 3.54it/s] 16%|█▋ | 61033/371472 [4:51:29<23:37:37, 3.65it/s] 16%|█▋ | 61034/371472 [4:51:30<22:55:45, 3.76it/s] 16%|█▋ | 61035/371472 [4:51:30<23:02:25, 3.74it/s] 16%|█▋ | 61036/371472 [4:51:30<23:12:25, 3.72it/s] 16%|█▋ | 61037/371472 [4:51:31<22:51:13, 3.77it/s] 16%|█▋ | 61038/371472 [4:51:31<22:43:11, 3.80it/s] 16%|█▋ | 61039/371472 [4:51:31<23:25:49, 3.68it/s] 16%|█▋ | 61040/371472 [4:51:31<22:44:22, 3.79it/s] {'loss': 4.3042, 'learning_rate': 8.525178305932038e-07, 'epoch': 2.63} + 16%|█▋ | 61040/371472 [4:51:31<22:44:22, 3.79it/s] 16%|█▋ | 61041/371472 [4:51:32<23:02:52, 3.74it/s] 16%|█▋ | 61042/371472 [4:51:32<22:40:50, 3.80it/s] 16%|█▋ | 61043/371472 [4:51:32<22:58:32, 3.75it/s] 16%|█▋ | 61044/371472 [4:51:32<24:59:10, 3.45it/s] 16%|█▋ | 61045/371472 [4:51:33<24:52:23, 3.47it/s] 16%|█▋ | 61046/371472 [4:51:33<24:57:43, 3.45it/s] 16%|█▋ | 61047/371472 [4:51:33<26:02:45, 3.31it/s] 16%|█▋ | 61048/371472 [4:51:34<24:48:20, 3.48it/s] 16%|█▋ | 61049/371472 [4:51:34<25:07:32, 3.43it/s] 16%|█▋ | 61050/371472 [4:51:34<23:43:46, 3.63it/s] 16%|█▋ | 61051/371472 [4:51:34<23:38:39, 3.65it/s] 16%|█▋ | 61052/371472 [4:51:35<23:08:28, 3.73it/s] 16%|█▋ | 61053/371472 [4:51:35<22:46:18, 3.79it/s] 16%|█▋ | 61054/371472 [4:51:35<22:36:54, 3.81it/s] 16%|█▋ | 61055/371472 [4:51:35<22:08:03, 3.90it/s] 16%|█▋ | 61056/371472 [4:51:36<21:55:12, 3.93it/s] 16%|█▋ | 61057/371472 [4:51:36<21:35:15, 3.99it/s] 16%|█▋ | 61058/371472 [4:51:36<22:53:33, 3.77it/s] 16%|█▋ | 61059/371472 [4:51:37<23:21:07, 3.69it/s] 16%|█▋ | 61060/371472 [4:51:37<23:13:50, 3.71it/s] {'loss': 4.2198, 'learning_rate': 8.52469348617725e-07, 'epoch': 2.63} + 16%|█▋ | 61060/371472 [4:51:37<23:13:50, 3.71it/s] 16%|█▋ | 61061/371472 [4:51:37<33:47:29, 2.55it/s] 16%|█▋ | 61062/371472 [4:51:38<30:22:25, 2.84it/s] 16%|█▋ | 61063/371472 [4:51:38<27:41:02, 3.11it/s] 16%|█▋ | 61064/371472 [4:51:38<25:56:25, 3.32it/s] 16%|█▋ | 61065/371472 [4:51:38<24:26:56, 3.53it/s] 16%|█▋ | 61066/371472 [4:51:39<23:58:22, 3.60it/s] 16%|█▋ | 61067/371472 [4:51:39<24:49:37, 3.47it/s] 16%|█▋ | 61068/371472 [4:51:39<23:42:17, 3.64it/s] 16%|█▋ | 61069/371472 [4:51:40<26:12:13, 3.29it/s] 16%|█▋ | 61070/371472 [4:51:40<26:10:01, 3.30it/s] 16%|█▋ | 61071/371472 [4:51:40<25:22:50, 3.40it/s] 16%|█▋ | 61072/371472 [4:51:40<24:08:03, 3.57it/s] 16%|█▋ | 61073/371472 [4:51:41<23:30:59, 3.67it/s] 16%|█▋ | 61074/371472 [4:51:41<24:14:03, 3.56it/s] 16%|█▋ | 61075/371472 [4:51:41<24:15:07, 3.56it/s] 16%|█▋ | 61076/371472 [4:51:42<25:02:44, 3.44it/s] 16%|█▋ | 61077/371472 [4:51:42<23:55:22, 3.60it/s] 16%|█▋ | 61078/371472 [4:51:42<23:53:54, 3.61it/s] 16%|█▋ | 61079/371472 [4:51:42<23:36:25, 3.65it/s] 16%|█▋ | 61080/371472 [4:51:43<23:19:30, 3.70it/s] {'loss': 4.1068, 'learning_rate': 8.524208666422461e-07, 'epoch': 2.63} + 16%|█▋ | 61080/371472 [4:51:43<23:19:30, 3.70it/s] 16%|█▋ | 61081/371472 [4:51:43<22:47:00, 3.78it/s] 16%|█▋ | 61082/371472 [4:51:43<23:07:01, 3.73it/s] 16%|█▋ | 61083/371472 [4:51:43<23:14:59, 3.71it/s] 16%|█▋ | 61084/371472 [4:51:44<22:57:34, 3.76it/s] 16%|█▋ | 61085/371472 [4:51:44<22:59:16, 3.75it/s] 16%|█▋ | 61086/371472 [4:51:44<22:45:37, 3.79it/s] 16%|█▋ | 61087/371472 [4:51:45<22:43:14, 3.79it/s] 16%|█▋ | 61088/371472 [4:51:45<23:15:44, 3.71it/s] 16%|█▋ | 61089/371472 [4:51:45<23:18:47, 3.70it/s] 16%|█▋ | 61090/371472 [4:51:45<25:26:14, 3.39it/s] 16%|█▋ | 61091/371472 [4:51:46<24:15:22, 3.55it/s] 16%|█▋ | 61092/371472 [4:51:46<24:43:00, 3.49it/s] 16%|█▋ | 61093/371472 [4:51:46<25:52:31, 3.33it/s] 16%|█▋ | 61094/371472 [4:51:47<25:50:18, 3.34it/s] 16%|█▋ | 61095/371472 [4:51:47<24:22:27, 3.54it/s] 16%|█▋ | 61096/371472 [4:51:47<24:55:49, 3.46it/s] 16%|█▋ | 61097/371472 [4:51:47<23:54:39, 3.61it/s] 16%|█▋ | 61098/371472 [4:51:48<24:11:02, 3.56it/s] 16%|█▋ | 61099/371472 [4:51:48<27:05:06, 3.18it/s] 16%|█▋ | 61100/371472 [4:51:48<27:43:50, 3.11it/s] {'loss': 4.1193, 'learning_rate': 8.523723846667672e-07, 'epoch': 2.63} + 16%|█▋ | 61100/371472 [4:51:48<27:43:50, 3.11it/s] 16%|█▋ | 61101/371472 [4:51:49<26:28:36, 3.26it/s] 16%|█▋ | 61102/371472 [4:51:49<25:30:52, 3.38it/s] 16%|█▋ | 61103/371472 [4:51:49<26:23:26, 3.27it/s] 16%|█▋ | 61104/371472 [4:51:50<25:05:59, 3.43it/s] 16%|█▋ | 61105/371472 [4:51:50<25:53:30, 3.33it/s] 16%|█▋ | 61106/371472 [4:51:50<26:31:11, 3.25it/s] 16%|█▋ | 61107/371472 [4:51:50<25:10:29, 3.42it/s] 16%|█▋ | 61108/371472 [4:51:51<24:07:35, 3.57it/s] 16%|█▋ | 61109/371472 [4:51:51<24:40:07, 3.49it/s] 16%|█▋ | 61110/371472 [4:51:51<24:08:07, 3.57it/s] 16%|█▋ | 61111/371472 [4:51:52<23:53:44, 3.61it/s] 16%|█▋ | 61112/371472 [4:51:52<23:42:34, 3.64it/s] 16%|█▋ | 61113/371472 [4:51:52<25:02:06, 3.44it/s] 16%|█▋ | 61114/371472 [4:51:52<24:18:20, 3.55it/s] 16%|█▋ | 61115/371472 [4:51:53<24:58:01, 3.45it/s] 16%|█▋ | 61116/371472 [4:51:53<24:45:42, 3.48it/s] 16%|█▋ | 61117/371472 [4:51:53<25:07:29, 3.43it/s] 16%|█▋ | 61118/371472 [4:51:54<25:20:17, 3.40it/s] 16%|█▋ | 61119/371472 [4:51:54<25:26:11, 3.39it/s] 16%|█▋ | 61120/371472 [4:51:54<24:33:00, 3.51it/s] {'loss': 4.4358, 'learning_rate': 8.523239026912882e-07, 'epoch': 2.63} + 16%|█▋ | 61120/371472 [4:51:54<24:33:00, 3.51it/s] 16%|█▋ | 61121/371472 [4:51:54<23:41:40, 3.64it/s] 16%|█▋ | 61122/371472 [4:51:55<23:44:28, 3.63it/s] 16%|█▋ | 61123/371472 [4:51:55<23:05:58, 3.73it/s] 16%|█▋ | 61124/371472 [4:51:55<24:34:16, 3.51it/s] 16%|█▋ | 61125/371472 [4:51:56<23:34:33, 3.66it/s] 16%|█▋ | 61126/371472 [4:51:56<25:29:40, 3.38it/s] 16%|█▋ | 61127/371472 [4:51:56<24:17:28, 3.55it/s] 16%|█▋ | 61128/371472 [4:51:56<24:45:13, 3.48it/s] 16%|█▋ | 61129/371472 [4:51:57<25:10:14, 3.42it/s] 16%|█▋ | 61130/371472 [4:51:57<25:30:17, 3.38it/s] 16%|█▋ | 61131/371472 [4:51:57<24:55:02, 3.46it/s] 16%|█▋ | 61132/371472 [4:51:58<23:59:37, 3.59it/s] 16%|█▋ | 61133/371472 [4:51:58<23:41:15, 3.64it/s] 16%|█▋ | 61134/371472 [4:51:58<23:33:06, 3.66it/s] 16%|█▋ | 61135/371472 [4:51:58<24:25:24, 3.53it/s] 16%|█▋ | 61136/371472 [4:51:59<23:30:54, 3.67it/s] 16%|█▋ | 61137/371472 [4:51:59<23:50:50, 3.61it/s] 16%|█▋ | 61138/371472 [4:51:59<22:44:17, 3.79it/s] 16%|█▋ | 61139/371472 [4:51:59<23:40:57, 3.64it/s] 16%|█▋ | 61140/371472 [4:52:00<24:03:45, 3.58it/s] {'loss': 4.2313, 'learning_rate': 8.522754207158094e-07, 'epoch': 2.63} + 16%|█▋ | 61140/371472 [4:52:00<24:03:45, 3.58it/s] 16%|█▋ | 61141/371472 [4:52:00<23:48:56, 3.62it/s] 16%|█▋ | 61142/371472 [4:52:00<27:51:05, 3.10it/s] 16%|█▋ | 61143/371472 [4:52:01<27:45:44, 3.11it/s] 16%|█▋ | 61144/371472 [4:52:01<26:21:12, 3.27it/s] 16%|█▋ | 61145/371472 [4:52:01<25:05:40, 3.44it/s] 16%|█▋ | 61146/371472 [4:52:02<25:50:50, 3.34it/s] 16%|█▋ | 61147/371472 [4:52:02<24:56:37, 3.46it/s] 16%|█▋ | 61148/371472 [4:52:02<24:34:16, 3.51it/s] 16%|█▋ | 61149/371472 [4:52:02<23:52:33, 3.61it/s] 16%|█▋ | 61150/371472 [4:52:03<23:19:18, 3.70it/s] 16%|█▋ | 61151/371472 [4:52:03<24:03:23, 3.58it/s] 16%|█▋ | 61152/371472 [4:52:03<23:41:38, 3.64it/s] 16%|█▋ | 61153/371472 [4:52:03<23:07:25, 3.73it/s] 16%|█▋ | 61154/371472 [4:52:04<23:19:07, 3.70it/s] 16%|█▋ | 61155/371472 [4:52:04<22:41:37, 3.80it/s] 16%|█▋ | 61156/371472 [4:52:04<22:02:16, 3.91it/s] 16%|█▋ | 61157/371472 [4:52:05<22:05:44, 3.90it/s] 16%|█▋ | 61158/371472 [4:52:05<21:59:43, 3.92it/s] 16%|█▋ | 61159/371472 [4:52:05<22:07:18, 3.90it/s] 16%|█▋ | 61160/371472 [4:52:05<22:45:11, 3.79it/s] {'loss': 4.2971, 'learning_rate': 8.522269387403306e-07, 'epoch': 2.63} + 16%|█▋ | 61160/371472 [4:52:05<22:45:11, 3.79it/s] 16%|█▋ | 61161/371472 [4:52:06<22:45:19, 3.79it/s] 16%|█▋ | 61162/371472 [4:52:06<23:49:37, 3.62it/s] 16%|█▋ | 61163/371472 [4:52:06<23:12:00, 3.72it/s] 16%|█▋ | 61164/371472 [4:52:06<22:56:27, 3.76it/s] 16%|█�� | 61165/371472 [4:52:07<24:27:53, 3.52it/s] 16%|█▋ | 61166/371472 [4:52:07<23:27:54, 3.67it/s] 16%|█▋ | 61167/371472 [4:52:07<23:05:15, 3.73it/s] 16%|█▋ | 61168/371472 [4:52:07<22:53:07, 3.77it/s] 16%|█▋ | 61169/371472 [4:52:08<22:43:33, 3.79it/s] 16%|█▋ | 61170/371472 [4:52:08<24:00:49, 3.59it/s] 16%|█▋ | 61171/371472 [4:52:08<24:37:28, 3.50it/s] 16%|█▋ | 61172/371472 [4:52:09<26:50:39, 3.21it/s] 16%|█▋ | 61173/371472 [4:52:09<24:58:34, 3.45it/s] 16%|█▋ | 61174/371472 [4:52:09<24:58:24, 3.45it/s] 16%|█▋ | 61175/371472 [4:52:10<29:48:17, 2.89it/s] 16%|█▋ | 61176/371472 [4:52:10<28:01:05, 3.08it/s] 16%|█▋ | 61177/371472 [4:52:10<28:31:45, 3.02it/s] 16%|█▋ | 61178/371472 [4:52:11<27:28:59, 3.14it/s] 16%|█▋ | 61179/371472 [4:52:11<26:13:10, 3.29it/s] 16%|█▋ | 61180/371472 [4:52:11<25:07:43, 3.43it/s] {'loss': 4.2572, 'learning_rate': 8.521784567648516e-07, 'epoch': 2.64} + 16%|█▋ | 61180/371472 [4:52:11<25:07:43, 3.43it/s] 16%|█▋ | 61181/371472 [4:52:11<24:29:36, 3.52it/s] 16%|█▋ | 61182/371472 [4:52:12<25:07:29, 3.43it/s] 16%|█▋ | 61183/371472 [4:52:12<25:10:25, 3.42it/s] 16%|█▋ | 61184/371472 [4:52:12<25:41:46, 3.35it/s] 16%|█▋ | 61185/371472 [4:52:13<24:12:57, 3.56it/s] 16%|█▋ | 61186/371472 [4:52:13<23:28:21, 3.67it/s] 16%|█▋ | 61187/371472 [4:52:13<25:10:57, 3.42it/s] 16%|█▋ | 61188/371472 [4:52:13<23:55:27, 3.60it/s] 16%|█▋ | 61189/371472 [4:52:14<23:34:44, 3.66it/s] 16%|█▋ | 61190/371472 [4:52:14<23:00:43, 3.75it/s] 16%|█▋ | 61191/371472 [4:52:14<23:07:30, 3.73it/s] 16%|█▋ | 61192/371472 [4:52:14<23:45:43, 3.63it/s] 16%|█▋ | 61193/371472 [4:52:15<24:43:33, 3.49it/s] 16%|█▋ | 61194/371472 [4:52:15<24:47:37, 3.48it/s] 16%|█▋ | 61195/371472 [4:52:15<24:15:10, 3.55it/s] 16%|█▋ | 61196/371472 [4:52:16<24:05:27, 3.58it/s] 16%|█▋ | 61197/371472 [4:52:16<24:50:46, 3.47it/s] 16%|█▋ | 61198/371472 [4:52:16<24:20:11, 3.54it/s] 16%|█▋ | 61199/371472 [4:52:16<24:15:43, 3.55it/s] 16%|█▋ | 61200/371472 [4:52:17<23:14:09, 3.71it/s] {'loss': 4.2868, 'learning_rate': 8.521299747893727e-07, 'epoch': 2.64} + 16%|█▋ | 61200/371472 [4:52:17<23:14:09, 3.71it/s] 16%|█▋ | 61201/371472 [4:52:17<25:18:41, 3.41it/s] 16%|█▋ | 61202/371472 [4:52:17<24:24:26, 3.53it/s] 16%|█▋ | 61203/371472 [4:52:18<24:02:20, 3.59it/s] 16%|█▋ | 61204/371472 [4:52:18<24:24:53, 3.53it/s] 16%|█▋ | 61205/371472 [4:52:18<24:04:51, 3.58it/s] 16%|█▋ | 61206/371472 [4:52:18<24:29:59, 3.52it/s] 16%|█▋ | 61207/371472 [4:52:19<23:33:50, 3.66it/s] 16%|█▋ | 61208/371472 [4:52:19<24:44:14, 3.48it/s] 16%|█▋ | 61209/371472 [4:52:19<23:57:52, 3.60it/s] 16%|█▋ | 61210/371472 [4:52:20<25:57:04, 3.32it/s] 16%|█▋ | 61211/371472 [4:52:20<25:57:04, 3.32it/s] 16%|█▋ | 61212/371472 [4:52:20<25:16:53, 3.41it/s] 16%|█▋ | 61213/371472 [4:52:21<25:46:12, 3.34it/s] 16%|█▋ | 61214/371472 [4:52:21<24:27:23, 3.52it/s] 16%|█▋ | 61215/371472 [4:52:21<23:32:19, 3.66it/s] 16%|█▋ | 61216/371472 [4:52:21<23:02:50, 3.74it/s] 16%|█▋ | 61217/371472 [4:52:22<24:00:52, 3.59it/s] 16%|█▋ | 61218/371472 [4:52:22<24:55:29, 3.46it/s] 16%|█▋ | 61219/371472 [4:52:22<24:02:19, 3.59it/s] 16%|█▋ | 61220/371472 [4:52:22<24:35:12, 3.51it/s] {'loss': 4.1524, 'learning_rate': 8.520814928138938e-07, 'epoch': 2.64} + 16%|█▋ | 61220/371472 [4:52:22<24:35:12, 3.51it/s] 16%|█▋ | 61221/371472 [4:52:23<24:18:53, 3.54it/s] 16%|█▋ | 61222/371472 [4:52:23<24:20:57, 3.54it/s] 16%|█▋ | 61223/371472 [4:52:23<24:34:29, 3.51it/s] 16%|█▋ | 61224/371472 [4:52:24<27:13:46, 3.16it/s] 16%|█▋ | 61225/371472 [4:52:24<26:17:30, 3.28it/s] 16%|█▋ | 61226/371472 [4:52:24<25:35:23, 3.37it/s] 16%|█▋ | 61227/371472 [4:52:25<24:46:10, 3.48it/s] 16%|█▋ | 61228/371472 [4:52:25<24:40:01, 3.49it/s] 16%|█▋ | 61229/371472 [4:52:25<24:11:59, 3.56it/s] 16%|█▋ | 61230/371472 [4:52:25<23:31:23, 3.66it/s] 16%|█▋ | 61231/371472 [4:52:26<23:07:15, 3.73it/s] 16%|█▋ | 61232/371472 [4:52:26<24:25:50, 3.53it/s] 16%|█▋ | 61233/371472 [4:52:26<25:53:09, 3.33it/s] 16%|█▋ | 61234/371472 [4:52:27<24:34:33, 3.51it/s] 16%|█▋ | 61235/371472 [4:52:27<23:35:04, 3.65it/s] 16%|█▋ | 61236/371472 [4:52:27<23:12:41, 3.71it/s] 16%|█▋ | 61237/371472 [4:52:27<22:54:29, 3.76it/s] 16%|█▋ | 61238/371472 [4:52:28<24:39:31, 3.49it/s] 16%|█▋ | 61239/371472 [4:52:28<24:26:22, 3.53it/s] 16%|█▋ | 61240/371472 [4:52:28<24:34:31, 3.51it/s] {'loss': 4.267, 'learning_rate': 8.520330108384149e-07, 'epoch': 2.64} + 16%|█▋ | 61240/371472 [4:52:28<24:34:31, 3.51it/s] 16%|█▋ | 61241/371472 [4:52:28<25:02:01, 3.44it/s] 16%|█▋ | 61242/371472 [4:52:29<25:15:53, 3.41it/s] 16%|█▋ | 61243/371472 [4:52:29<24:08:26, 3.57it/s] 16%|█▋ | 61244/371472 [4:52:29<23:26:23, 3.68it/s] 16%|█▋ | 61245/371472 [4:52:30<22:49:56, 3.77it/s] 16%|█▋ | 61246/371472 [4:52:30<22:32:14, 3.82it/s] 16%|█▋ | 61247/371472 [4:52:30<22:29:23, 3.83it/s] 16%|█▋ | 61248/371472 [4:52:30<23:11:56, 3.71it/s] 16%|█▋ | 61249/371472 [4:52:31<23:46:45, 3.62it/s] 16%|█▋ | 61250/371472 [4:52:31<24:07:49, 3.57it/s] 16%|█▋ | 61251/371472 [4:52:31<24:16:59, 3.55it/s] 16%|█▋ | 61252/371472 [4:52:31<23:49:56, 3.62it/s] 16%|█▋ | 61253/371472 [4:52:32<22:53:01, 3.77it/s] 16%|█▋ | 61254/371472 [4:52:32<22:24:56, 3.84it/s] 16%|█▋ | 61255/371472 [4:52:32<23:21:46, 3.69it/s] 16%|█▋ | 61256/371472 [4:52:33<23:27:18, 3.67it/s] 16%|█▋ | 61257/371472 [4:52:33<22:52:51, 3.77it/s] 16%|█▋ | 61258/371472 [4:52:33<22:24:06, 3.85it/s] 16%|█▋ | 61259/371472 [4:52:33<22:43:06, 3.79it/s] 16%|█▋ | 61260/371472 [4:52:34<22:38:13, 3.81it/s] {'loss': 4.1479, 'learning_rate': 8.51984528862936e-07, 'epoch': 2.64} + 16%|█▋ | 61260/371472 [4:52:34<22:38:13, 3.81it/s] 16%|█▋ | 61261/371472 [4:52:34<22:06:49, 3.90it/s] 16%|█▋ | 61262/371472 [4:52:34<22:38:35, 3.81it/s] 16%|█▋ | 61263/371472 [4:52:34<23:25:17, 3.68it/s] 16%|█▋ | 61264/371472 [4:52:35<23:45:17, 3.63it/s] 16%|█▋ | 61265/371472 [4:52:35<23:00:52, 3.74it/s] 16%|█▋ | 61266/371472 [4:52:35<22:45:52, 3.79it/s] 16%|█▋ | 61267/371472 [4:52:35<22:07:26, 3.89it/s] 16%|█▋ | 61268/371472 [4:52:36<22:32:28, 3.82it/s] 16%|█▋ | 61269/371472 [4:52:36<23:44:24, 3.63it/s] 16%|█▋ | 61270/371472 [4:52:36<24:34:14, 3.51it/s] 16%|█▋ | 61271/371472 [4:52:37<24:53:46, 3.46it/s] 16%|█▋ | 61272/371472 [4:52:37<23:42:32, 3.63it/s] 16%|█▋ | 61273/371472 [4:52:37<24:11:13, 3.56it/s] 16%|█▋ | 61274/371472 [4:52:37<24:34:13, 3.51it/s] 16%|█▋ | 61275/371472 [4:52:38<24:29:39, 3.52it/s] 16%|█▋ | 61276/371472 [4:52:38<23:52:14, 3.61it/s] 16%|█▋ | 61277/371472 [4:52:38<26:53:03, 3.21it/s] 16%|█▋ | 61278/371472 [4:52:39<25:26:10, 3.39it/s] 16%|█▋ | 61279/371472 [4:52:39<24:52:05, 3.46it/s] 16%|█▋ | 61280/371472 [4:52:39<24:40:33, 3.49it/s] {'loss': 4.1027, 'learning_rate': 8.519360468874571e-07, 'epoch': 2.64} + 16%|█▋ | 61280/371472 [4:52:39<24:40:33, 3.49it/s] 16%|█▋ | 61281/371472 [4:52:39<24:57:45, 3.45it/s] 16%|█▋ | 61282/371472 [4:52:40<25:09:48, 3.42it/s] 16%|█▋ | 61283/371472 [4:52:40<24:18:21, 3.54it/s] 16%|█▋ | 61284/371472 [4:52:40<23:52:23, 3.61it/s] 16%|█▋ | 61285/371472 [4:52:41<24:55:25, 3.46it/s] 16%|█▋ | 61286/371472 [4:52:41<23:46:28, 3.62it/s] 16%|█▋ | 61287/371472 [4:52:41<23:39:48, 3.64it/s] 16%|█▋ | 61288/371472 [4:52:41<23:43:34, 3.63it/s] 16%|█▋ | 61289/371472 [4:52:42<23:22:20, 3.69it/s] 16%|█▋ | 61290/371472 [4:52:42<23:25:11, 3.68it/s] 16%|█▋ | 61291/371472 [4:52:42<22:55:14, 3.76it/s] 16%|█▋ | 61292/371472 [4:52:42<23:39:42, 3.64it/s] 17%|█▋ | 61293/371472 [4:52:43<24:44:08, 3.48it/s] 17%|█▋ | 61294/371472 [4:52:43<24:14:58, 3.55it/s] 17%|█▋ | 61295/371472 [4:52:43<24:00:19, 3.59it/s] 17%|█▋ | 61296/371472 [4:52:44<24:18:27, 3.54it/s] 17%|█▋ | 61297/371472 [4:52:44<23:27:33, 3.67it/s] 17%|█▋ | 61298/371472 [4:52:44<22:40:52, 3.80it/s] 17%|█▋ | 61299/371472 [4:52:44<23:31:09, 3.66it/s] 17%|█▋ | 61300/371472 [4:52:45<24:59:20, 3.45it/s] {'loss': 4.0376, 'learning_rate': 8.518875649119783e-07, 'epoch': 2.64} + 17%|█▋ | 61300/371472 [4:52:45<24:59:20, 3.45it/s] 17%|█▋ | 61301/371472 [4:52:45<24:35:26, 3.50it/s] 17%|█▋ | 61302/371472 [4:52:45<23:27:31, 3.67it/s] 17%|█▋ | 61303/371472 [4:52:45<22:30:35, 3.83it/s] 17%|█▋ | 61304/371472 [4:52:46<23:14:58, 3.71it/s] 17%|█▋ | 61305/371472 [4:52:46<24:28:30, 3.52it/s] 17%|█▋ | 61306/371472 [4:52:46<23:23:27, 3.68it/s] 17%|█▋ | 61307/371472 [4:52:47<23:15:05, 3.71it/s] 17%|█▋ | 61308/371472 [4:52:47<23:45:32, 3.63it/s] 17%|█▋ | 61309/371472 [4:52:47<23:19:02, 3.69it/s] 17%|█▋ | 61310/371472 [4:52:47<23:52:11, 3.61it/s] 17%|█▋ | 61311/371472 [4:52:48<24:58:20, 3.45it/s] 17%|█▋ | 61312/371472 [4:52:48<27:57:45, 3.08it/s] 17%|█▋ | 61313/371472 [4:52:48<27:51:54, 3.09it/s] 17%|█▋ | 61314/371472 [4:52:49<27:12:34, 3.17it/s] 17%|█▋ | 61315/371472 [4:52:49<29:44:06, 2.90it/s] 17%|█▋ | 61316/371472 [4:52:49<27:13:26, 3.16it/s] 17%|█▋ | 61317/371472 [4:52:50<25:13:56, 3.41it/s] 17%|█▋ | 61318/371472 [4:52:50<24:03:11, 3.58it/s] 17%|█▋ | 61319/371472 [4:52:50<23:49:31, 3.62it/s] 17%|█▋ | 61320/371472 [4:52:50<23:17:17, 3.70it/s] {'loss': 4.268, 'learning_rate': 8.518390829364994e-07, 'epoch': 2.64} + 17%|█▋ | 61320/371472 [4:52:50<23:17:17, 3.70it/s] 17%|█▋ | 61321/371472 [4:52:51<22:25:43, 3.84it/s] 17%|█▋ | 61322/371472 [4:52:51<22:42:48, 3.79it/s] 17%|█▋ | 61323/371472 [4:52:51<22:32:40, 3.82it/s] 17%|█▋ | 61324/371472 [4:52:52<23:43:21, 3.63it/s] 17%|█▋ | 61325/371472 [4:52:52<23:04:25, 3.73it/s] 17%|█▋ | 61326/371472 [4:52:52<22:23:59, 3.85it/s] 17%|█▋ | 61327/371472 [4:52:52<24:20:59, 3.54it/s] 17%|█▋ | 61328/371472 [4:52:53<23:56:31, 3.60it/s] 17%|█▋ | 61329/371472 [4:52:53<24:09:08, 3.57it/s] 17%|█▋ | 61330/371472 [4:52:53<23:40:44, 3.64it/s] 17%|█▋ | 61331/371472 [4:52:53<22:35:52, 3.81it/s] 17%|█▋ | 61332/371472 [4:52:54<24:47:56, 3.47it/s] 17%|█▋ | 61333/371472 [4:52:54<27:09:16, 3.17it/s] 17%|█▋ | 61334/371472 [4:52:54<25:40:32, 3.36it/s] 17%|█▋ | 61335/371472 [4:52:55<24:12:25, 3.56it/s] 17%|█▋ | 61336/371472 [4:52:55<25:14:26, 3.41it/s] 17%|█▋ | 61337/371472 [4:52:55<24:09:07, 3.57it/s] 17%|█▋ | 61338/371472 [4:52:55<23:38:16, 3.64it/s] 17%|█▋ | 61339/371472 [4:52:56<24:43:02, 3.49it/s] 17%|█▋ | 61340/371472 [4:52:56<27:26:31, 3.14it/s] {'loss': 4.4516, 'learning_rate': 8.517906009610204e-07, 'epoch': 2.64} + 17%|█▋ | 61340/371472 [4:52:56<27:26:31, 3.14it/s] 17%|█▋ | 61341/371472 [4:52:57<27:55:30, 3.08it/s] 17%|█▋ | 61342/371472 [4:52:57<27:43:25, 3.11it/s] 17%|█▋ | 61343/371472 [4:52:57<29:19:44, 2.94it/s] 17%|█▋ | 61344/371472 [4:52:58<28:51:40, 2.98it/s] 17%|█▋ | 61345/371472 [4:52:58<27:44:02, 3.11it/s] 17%|█▋ | 61346/371472 [4:52:58<28:45:17, 3.00it/s] 17%|█▋ | 61347/371472 [4:52:58<27:15:00, 3.16it/s] 17%|█▋ | 61348/371472 [4:52:59<27:09:27, 3.17it/s] 17%|█▋ | 61349/371472 [4:52:59<26:26:32, 3.26it/s] 17%|█▋ | 61350/371472 [4:52:59<26:36:10, 3.24it/s] 17%|█▋ | 61351/371472 [4:53:00<25:23:54, 3.39it/s] 17%|█▋ | 61352/371472 [4:53:00<24:08:55, 3.57it/s] 17%|█▋ | 61353/371472 [4:53:00<26:39:43, 3.23it/s] 17%|█▋ | 61354/371472 [4:53:01<25:52:45, 3.33it/s] 17%|█▋ | 61355/371472 [4:53:01<24:50:34, 3.47it/s] 17%|█▋ | 61356/371472 [4:53:01<23:33:56, 3.66it/s] 17%|█▋ | 61357/371472 [4:53:01<23:11:09, 3.72it/s] 17%|█▋ | 61358/371472 [4:53:02<24:20:13, 3.54it/s] 17%|█▋ | 61359/371472 [4:53:02<23:56:37, 3.60it/s] 17%|█▋ | 61360/371472 [4:53:02<24:45:55, 3.48it/s] {'loss': 4.144, 'learning_rate': 8.517421189855415e-07, 'epoch': 2.64} + 17%|█▋ | 61360/371472 [4:53:02<24:45:55, 3.48it/s] 17%|█▋ | 61361/371472 [4:53:02<23:56:18, 3.60it/s] 17%|█▋ | 61362/371472 [4:53:03<24:50:18, 3.47it/s] 17%|█▋ | 61363/371472 [4:53:03<24:28:03, 3.52it/s] 17%|█▋ | 61364/371472 [4:53:03<23:47:23, 3.62it/s] 17%|█▋ | 61365/371472 [4:53:04<23:58:30, 3.59it/s] 17%|█▋ | 61366/371472 [4:53:04<24:57:57, 3.45it/s] 17%|█▋ | 61367/371472 [4:53:04<24:01:53, 3.58it/s] 17%|█▋ | 61368/371472 [4:53:04<23:27:19, 3.67it/s] 17%|█▋ | 61369/371472 [4:53:05<23:25:31, 3.68it/s] 17%|█▋ | 61370/371472 [4:53:05<23:31:55, 3.66it/s] 17%|█▋ | 61371/371472 [4:53:05<22:42:05, 3.79it/s] 17%|█▋ | 61372/371472 [4:53:05<22:48:54, 3.78it/s] 17%|█▋ | 61373/371472 [4:53:06<24:36:28, 3.50it/s] 17%|█▋ | 61374/371472 [4:53:06<23:36:56, 3.65it/s] 17%|█▋ | 61375/371472 [4:53:06<23:23:07, 3.68it/s] 17%|█▋ | 61376/371472 [4:53:07<23:17:53, 3.70it/s] 17%|█▋ | 61377/371472 [4:53:07<24:07:03, 3.57it/s] 17%|█▋ | 61378/371472 [4:53:07<23:28:04, 3.67it/s] 17%|█▋ | 61379/371472 [4:53:07<23:42:19, 3.63it/s] 17%|█▋ | 61380/371472 [4:53:08<23:01:49, 3.74it/s] {'loss': 4.213, 'learning_rate': 8.516936370100627e-07, 'epoch': 2.64} + 17%|█▋ | 61380/371472 [4:53:08<23:01:49, 3.74it/s] 17%|█▋ | 61381/371472 [4:53:08<22:29:27, 3.83it/s] 17%|█▋ | 61382/371472 [4:53:08<23:07:04, 3.73it/s] 17%|█▋ | 61383/371472 [4:53:08<24:14:02, 3.55it/s] 17%|█▋ | 61384/371472 [4:53:09<24:55:41, 3.46it/s] 17%|█▋ | 61385/371472 [4:53:09<23:45:36, 3.63it/s] 17%|█▋ | 61386/371472 [4:53:09<24:52:33, 3.46it/s] 17%|█▋ | 61387/371472 [4:53:10<25:57:21, 3.32it/s] 17%|█▋ | 61388/371472 [4:53:10<25:35:19, 3.37it/s] 17%|█▋ | 61389/371472 [4:53:10<24:43:29, 3.48it/s] 17%|█▋ | 61390/371472 [4:53:10<23:51:07, 3.61it/s] 17%|█▋ | 61391/371472 [4:53:11<24:52:16, 3.46it/s] 17%|█▋ | 61392/371472 [4:53:11<25:56:39, 3.32it/s] 17%|█▋ | 61393/371472 [4:53:11<27:16:05, 3.16it/s] 17%|█▋ | 61394/371472 [4:53:12<26:12:45, 3.29it/s] 17%|█▋ | 61395/371472 [4:53:12<25:04:44, 3.43it/s] 17%|█▋ | 61396/371472 [4:53:12<23:45:43, 3.62it/s] 17%|█▋ | 61397/371472 [4:53:13<24:11:00, 3.56it/s] 17%|█▋ | 61398/371472 [4:53:13<23:34:42, 3.65it/s] 17%|█▋ | 61399/371472 [4:53:13<24:01:48, 3.58it/s] 17%|█▋ | 61400/371472 [4:53:13<23:37:21, 3.65it/s] {'loss': 4.2411, 'learning_rate': 8.516451550345838e-07, 'epoch': 2.64} + 17%|█▋ | 61400/371472 [4:53:13<23:37:21, 3.65it/s] 17%|█▋ | 61401/371472 [4:53:14<23:23:45, 3.68it/s] 17%|█▋ | 61402/371472 [4:53:14<23:10:53, 3.72it/s] 17%|█▋ | 61403/371472 [4:53:14<23:08:15, 3.72it/s] 17%|█▋ | 61404/371472 [4:53:14<22:47:57, 3.78it/s] 17%|█▋ | 61405/371472 [4:53:15<22:41:40, 3.80it/s] 17%|█▋ | 61406/371472 [4:53:15<23:19:01, 3.69it/s] 17%|█▋ | 61407/371472 [4:53:15<22:46:14, 3.78it/s] 17%|█▋ | 61408/371472 [4:53:15<22:16:13, 3.87it/s] 17%|█▋ | 61409/371472 [4:53:16<25:50:33, 3.33it/s] 17%|█▋ | 61410/371472 [4:53:16<24:53:48, 3.46it/s] 17%|█▋ | 61411/371472 [4:53:16<25:37:47, 3.36it/s] 17%|█▋ | 61412/371472 [4:53:17<25:10:16, 3.42it/s] 17%|█▋ | 61413/371472 [4:53:17<24:01:33, 3.58it/s] 17%|█▋ | 61414/371472 [4:53:17<23:49:03, 3.62it/s] 17%|█▋ | 61415/371472 [4:53:18<23:56:06, 3.60it/s] 17%|█▋ | 61416/371472 [4:53:18<22:54:43, 3.76it/s] 17%|█▋ | 61417/371472 [4:53:18<24:16:27, 3.55it/s] 17%|█▋ | 61418/371472 [4:53:18<23:57:10, 3.60it/s] 17%|█▋ | 61419/371472 [4:53:19<24:09:34, 3.56it/s] 17%|█▋ | 61420/371472 [4:53:19<25:19:50, 3.40it/s] {'loss': 4.125, 'learning_rate': 8.515966730591048e-07, 'epoch': 2.65} + 17%|█▋ | 61420/371472 [4:53:19<25:19:50, 3.40it/s] 17%|█▋ | 61421/371472 [4:53:19<26:20:05, 3.27it/s] 17%|█▋ | 61422/371472 [4:53:20<25:51:21, 3.33it/s] 17%|█▋ | 61423/371472 [4:53:20<26:12:20, 3.29it/s] 17%|█▋ | 61424/371472 [4:53:20<24:23:06, 3.53it/s] 17%|█▋ | 61425/371472 [4:53:20<23:38:20, 3.64it/s] 17%|█▋ | 61426/371472 [4:53:21<26:38:55, 3.23it/s] 17%|█▋ | 61427/371472 [4:53:21<26:14:15, 3.28it/s] 17%|█▋ | 61428/371472 [4:53:21<26:35:11, 3.24it/s] 17%|█▋ | 61429/371472 [4:53:22<24:59:46, 3.45it/s] 17%|█▋ | 61430/371472 [4:53:22<23:54:32, 3.60it/s] 17%|█▋ | 61431/371472 [4:53:22<24:17:32, 3.55it/s] 17%|█▋ | 61432/371472 [4:53:22<24:24:56, 3.53it/s] 17%|█▋ | 61433/371472 [4:53:23<25:34:50, 3.37it/s] 17%|█▋ | 61434/371472 [4:53:23<24:35:39, 3.50it/s] 17%|█▋ | 61435/371472 [4:53:23<24:31:01, 3.51it/s] 17%|█▋ | 61436/371472 [4:53:24<25:48:35, 3.34it/s] 17%|█▋ | 61437/371472 [4:53:24<25:45:20, 3.34it/s] 17%|█▋ | 61438/371472 [4:53:24<25:28:39, 3.38it/s] 17%|█▋ | 61439/371472 [4:53:25<24:46:34, 3.48it/s] 17%|█▋ | 61440/371472 [4:53:25<24:03:52, 3.58it/s] {'loss': 4.2793, 'learning_rate': 8.51548191083626e-07, 'epoch': 2.65} + 17%|█▋ | 61440/371472 [4:53:25<24:03:52, 3.58it/s] 17%|█▋ | 61441/371472 [4:53:25<22:55:29, 3.76it/s] 17%|█▋ | 61442/371472 [4:53:25<22:29:33, 3.83it/s] 17%|█▋ | 61443/371472 [4:53:26<22:55:32, 3.76it/s] 17%|█▋ | 61444/371472 [4:53:26<22:37:14, 3.81it/s] 17%|█▋ | 61445/371472 [4:53:26<24:42:25, 3.49it/s] 17%|█▋ | 61446/371472 [4:53:26<26:15:56, 3.28it/s] 17%|█▋ | 61447/371472 [4:53:27<25:31:46, 3.37it/s] 17%|█▋ | 61448/371472 [4:53:27<25:17:35, 3.40it/s] 17%|█▋ | 61449/371472 [4:53:27<25:23:22, 3.39it/s] 17%|█▋ | 61450/371472 [4:53:28<24:17:36, 3.54it/s] 17%|█▋ | 61451/371472 [4:53:28<24:24:29, 3.53it/s] 17%|█▋ | 61452/371472 [4:53:28<24:56:14, 3.45it/s] 17%|█▋ | 61453/371472 [4:53:28<24:47:24, 3.47it/s] 17%|█▋ | 61454/371472 [4:53:29<24:20:37, 3.54it/s] 17%|█▋ | 61455/371472 [4:53:29<24:13:13, 3.56it/s] 17%|█▋ | 61456/371472 [4:53:29<24:36:43, 3.50it/s] 17%|█▋ | 61457/371472 [4:53:30<24:08:46, 3.57it/s] 17%|█▋ | 61458/371472 [4:53:30<24:38:22, 3.49it/s] 17%|█▋ | 61459/371472 [4:53:30<23:42:53, 3.63it/s] 17%|█▋ | 61460/371472 [4:53:30<24:33:28, 3.51it/s] {'loss': 4.1936, 'learning_rate': 8.514997091081471e-07, 'epoch': 2.65} + 17%|█▋ | 61460/371472 [4:53:30<24:33:28, 3.51it/s] 17%|█▋ | 61461/371472 [4:53:31<24:26:18, 3.52it/s] 17%|█▋ | 61462/371472 [4:53:31<24:05:14, 3.58it/s] 17%|█▋ | 61463/371472 [4:53:31<24:28:57, 3.52it/s] 17%|█▋ | 61464/371472 [4:53:32<24:31:52, 3.51it/s] 17%|█▋ | 61465/371472 [4:53:32<24:13:35, 3.55it/s] 17%|█▋ | 61466/371472 [4:53:32<24:23:53, 3.53it/s] 17%|█▋ | 61467/371472 [4:53:32<24:12:31, 3.56it/s] 17%|█▋ | 61468/371472 [4:53:33<23:49:05, 3.62it/s] 17%|█▋ | 61469/371472 [4:53:33<25:07:10, 3.43it/s] 17%|█▋ | 61470/371472 [4:53:33<26:04:53, 3.30it/s] 17%|█▋ | 61471/371472 [4:53:34<24:44:28, 3.48it/s] 17%|█▋ | 61472/371472 [4:53:34<24:40:50, 3.49it/s] 17%|█▋ | 61473/371472 [4:53:34<24:29:13, 3.52it/s] 17%|█▋ | 61474/371472 [4:53:34<23:54:30, 3.60it/s] 17%|█▋ | 61475/371472 [4:53:35<24:09:57, 3.56it/s] 17%|█▋ | 61476/371472 [4:53:35<23:49:27, 3.61it/s] 17%|█▋ | 61477/371472 [4:53:35<27:09:57, 3.17it/s] 17%|█▋ | 61478/371472 [4:53:36<25:46:23, 3.34it/s] 17%|█▋ | 61479/371472 [4:53:36<26:16:57, 3.28it/s] 17%|█▋ | 61480/371472 [4:53:36<25:51:33, 3.33it/s] {'loss': 4.3829, 'learning_rate': 8.514512271326681e-07, 'epoch': 2.65} + 17%|█▋ | 61480/371472 [4:53:36<25:51:33, 3.33it/s] 17%|█▋ | 61481/371472 [4:53:37<25:18:48, 3.40it/s] 17%|█▋ | 61482/371472 [4:53:37<24:01:33, 3.58it/s] 17%|█▋ | 61483/371472 [4:53:37<23:59:29, 3.59it/s] 17%|█▋ | 61484/371472 [4:53:37<23:35:39, 3.65it/s] 17%|█▋ | 61485/371472 [4:53:38<23:10:08, 3.72it/s] 17%|█▋ | 61486/371472 [4:53:38<22:47:12, 3.78it/s] 17%|█▋ | 61487/371472 [4:53:38<23:01:59, 3.74it/s] 17%|█▋ | 61488/371472 [4:53:38<24:57:29, 3.45it/s] 17%|█▋ | 61489/371472 [4:53:39<25:10:14, 3.42it/s] 17%|█▋ | 61490/371472 [4:53:39<24:33:36, 3.51it/s] 17%|█▋ | 61491/371472 [4:53:39<24:33:05, 3.51it/s] 17%|█▋ | 61492/371472 [4:53:40<24:51:25, 3.46it/s] 17%|█▋ | 61493/371472 [4:53:40<23:59:27, 3.59it/s] 17%|█▋ | 61494/371472 [4:53:40<24:32:31, 3.51it/s] 17%|█▋ | 61495/371472 [4:53:40<24:51:04, 3.46it/s] 17%|█▋ | 61496/371472 [4:53:41<25:07:37, 3.43it/s] 17%|█▋ | 61497/371472 [4:53:41<24:18:13, 3.54it/s] 17%|█▋ | 61498/371472 [4:53:41<23:58:55, 3.59it/s] 17%|█▋ | 61499/371472 [4:53:42<23:45:35, 3.62it/s] 17%|█▋ | 61500/371472 [4:53:42<22:56:41, 3.75it/s] {'loss': 4.0414, 'learning_rate': 8.514027451571892e-07, 'epoch': 2.65} + 17%|█▋ | 61500/371472 [4:53:42<22:56:41, 3.75it/s] 17%|█▋ | 61501/371472 [4:53:42<22:24:52, 3.84it/s] 17%|█▋ | 61502/371472 [4:53:42<23:29:38, 3.66it/s] 17%|█▋ | 61503/371472 [4:53:43<23:18:03, 3.70it/s] 17%|█▋ | 61504/371472 [4:53:43<23:02:36, 3.74it/s] 17%|█▋ | 61505/371472 [4:53:43<23:36:34, 3.65it/s] 17%|█▋ | 61506/371472 [4:53:43<24:01:50, 3.58it/s] 17%|█▋ | 61507/371472 [4:53:44<24:49:27, 3.47it/s] 17%|█▋ | 61508/371472 [4:53:44<23:58:52, 3.59it/s] 17%|█▋ | 61509/371472 [4:53:44<25:23:39, 3.39it/s] 17%|█▋ | 61510/371472 [4:53:45<24:39:20, 3.49it/s] 17%|█▋ | 61511/371472 [4:53:45<24:32:56, 3.51it/s] 17%|█▋ | 61512/371472 [4:53:45<24:01:31, 3.58it/s] 17%|█▋ | 61513/371472 [4:53:45<23:10:24, 3.72it/s] 17%|█▋ | 61514/371472 [4:53:46<22:55:14, 3.76it/s] 17%|█▋ | 61515/371472 [4:53:46<22:09:12, 3.89it/s] 17%|█▋ | 61516/371472 [4:53:46<22:54:43, 3.76it/s] 17%|█▋ | 61517/371472 [4:53:46<23:32:19, 3.66it/s] 17%|█▋ | 61518/371472 [4:53:47<23:09:19, 3.72it/s] 17%|█▋ | 61519/371472 [4:53:47<22:50:55, 3.77it/s] 17%|█▋ | 61520/371472 [4:53:47<24:34:57, 3.50it/s] {'loss': 4.3418, 'learning_rate': 8.513542631817104e-07, 'epoch': 2.65} + 17%|█▋ | 61520/371472 [4:53:47<24:34:57, 3.50it/s] 17%|█▋ | 61521/371472 [4:53:48<25:31:33, 3.37it/s] 17%|█▋ | 61522/371472 [4:53:48<24:29:46, 3.51it/s] 17%|█▋ | 61523/371472 [4:53:48<24:23:55, 3.53it/s] 17%|█▋ | 61524/371472 [4:53:48<24:36:50, 3.50it/s] 17%|█▋ | 61525/371472 [4:53:49<25:38:16, 3.36it/s] 17%|█▋ | 61526/371472 [4:53:49<24:35:50, 3.50it/s] 17%|█▋ | 61527/371472 [4:53:49<23:21:39, 3.69it/s] 17%|█▋ | 61528/371472 [4:53:50<25:57:27, 3.32it/s] 17%|█▋ | 61529/371472 [4:53:50<24:42:56, 3.48it/s] 17%|█▋ | 61530/371472 [4:53:50<24:21:16, 3.54it/s] 17%|█▋ | 61531/371472 [4:53:50<24:12:23, 3.56it/s] 17%|█▋ | 61532/371472 [4:53:51<23:23:59, 3.68it/s] 17%|█▋ | 61533/371472 [4:53:51<23:04:06, 3.73it/s] 17%|█▋ | 61534/371472 [4:53:51<22:34:41, 3.81it/s] 17%|█▋ | 61535/371472 [4:53:52<23:42:16, 3.63it/s] 17%|█▋ | 61536/371472 [4:53:52<26:24:42, 3.26it/s] 17%|█▋ | 61537/371472 [4:53:52<26:13:28, 3.28it/s] 17%|█▋ | 61538/371472 [4:53:52<25:04:02, 3.43it/s] 17%|█▋ | 61539/371472 [4:53:53<24:39:43, 3.49it/s] 17%|█▋ | 61540/371472 [4:53:53<24:25:07, 3.53it/s] {'loss': 4.2777, 'learning_rate': 8.513057812062316e-07, 'epoch': 2.65} + 17%|█▋ | 61540/371472 [4:53:53<24:25:07, 3.53it/s] 17%|█▋ | 61541/371472 [4:53:53<23:46:13, 3.62it/s] 17%|█▋ | 61542/371472 [4:53:54<23:29:54, 3.66it/s] 17%|█▋ | 61543/371472 [4:53:54<24:26:03, 3.52it/s] 17%|█▋ | 61544/371472 [4:53:54<24:24:56, 3.53it/s] 17%|█▋ | 61545/371472 [4:53:54<25:40:13, 3.35it/s] 17%|█▋ | 61546/371472 [4:53:55<24:53:55, 3.46it/s] 17%|█▋ | 61547/371472 [4:53:55<24:22:23, 3.53it/s] 17%|█▋ | 61548/371472 [4:53:55<23:24:04, 3.68it/s] 17%|█▋ | 61549/371472 [4:53:56<26:00:18, 3.31it/s] 17%|█▋ | 61550/371472 [4:53:56<25:55:23, 3.32it/s] 17%|█▋ | 61551/371472 [4:53:56<26:23:33, 3.26it/s] 17%|█▋ | 61552/371472 [4:53:57<25:40:44, 3.35it/s] 17%|█▋ | 61553/371472 [4:53:57<24:01:43, 3.58it/s] 17%|█▋ | 61554/371472 [4:53:57<23:37:12, 3.64it/s] 17%|█▋ | 61555/371472 [4:53:57<22:50:10, 3.77it/s] 17%|█▋ | 61556/371472 [4:53:58<22:44:50, 3.78it/s] 17%|█▋ | 61557/371472 [4:53:58<23:52:44, 3.61it/s] 17%|█▋ | 61558/371472 [4:53:58<23:37:47, 3.64it/s] 17%|█▋ | 61559/371472 [4:53:58<23:45:02, 3.62it/s] 17%|█▋ | 61560/371472 [4:53:59<24:57:47, 3.45it/s] {'loss': 4.075, 'learning_rate': 8.512572992307526e-07, 'epoch': 2.65} + 17%|█▋ | 61560/371472 [4:53:59<24:57:47, 3.45it/s] 17%|█▋ | 61561/371472 [4:53:59<25:17:57, 3.40it/s] 17%|█▋ | 61562/371472 [4:53:59<25:17:07, 3.40it/s] 17%|█▋ | 61563/371472 [4:54:00<25:30:31, 3.37it/s] 17%|█▋ | 61564/371472 [4:54:00<25:24:56, 3.39it/s] 17%|█▋ | 61565/371472 [4:54:00<25:31:00, 3.37it/s] 17%|█▋ | 61566/371472 [4:54:00<24:23:09, 3.53it/s] 17%|█▋ | 61567/371472 [4:54:01<23:16:53, 3.70it/s] 17%|█▋ | 61568/371472 [4:54:01<23:26:17, 3.67it/s] 17%|█▋ | 61569/371472 [4:54:01<22:56:30, 3.75it/s] 17%|█▋ | 61570/371472 [4:54:01<23:02:26, 3.74it/s] 17%|█▋ | 61571/371472 [4:54:02<23:13:01, 3.71it/s] 17%|█▋ | 61572/371472 [4:54:02<22:58:54, 3.75it/s] 17%|█▋ | 61573/371472 [4:54:02<24:21:01, 3.54it/s] 17%|█▋ | 61574/371472 [4:54:03<23:34:23, 3.65it/s] 17%|█▋ | 61575/371472 [4:54:03<27:07:06, 3.17it/s] 17%|█▋ | 61576/371472 [4:54:03<25:52:46, 3.33it/s] 17%|█▋ | 61577/371472 [4:54:04<24:29:52, 3.51it/s] 17%|█▋ | 61578/371472 [4:54:04<26:15:20, 3.28it/s] 17%|█▋ | 61579/371472 [4:54:04<27:14:49, 3.16it/s] 17%|█▋ | 61580/371472 [4:54:05<26:32:59, 3.24it/s] {'loss': 4.2111, 'learning_rate': 8.512088172552737e-07, 'epoch': 2.65} + 17%|█▋ | 61580/371472 [4:54:05<26:32:59, 3.24it/s] 17%|█▋ | 61581/371472 [4:54:05<26:12:19, 3.28it/s] 17%|█▋ | 61582/371472 [4:54:05<24:52:15, 3.46it/s] 17%|█▋ | 61583/371472 [4:54:05<24:46:04, 3.48it/s] 17%|█▋ | 61584/371472 [4:54:06<24:05:52, 3.57it/s] 17%|█▋ | 61585/371472 [4:54:06<23:29:58, 3.66it/s] 17%|█▋ | 61586/371472 [4:54:06<23:37:39, 3.64it/s] 17%|█▋ | 61587/371472 [4:54:06<25:22:20, 3.39it/s] 17%|█▋ | 61588/371472 [4:54:07<25:03:37, 3.43it/s] 17%|█▋ | 61589/371472 [4:54:07<25:11:50, 3.42it/s] 17%|█▋ | 61590/371472 [4:54:07<24:11:23, 3.56it/s] 17%|█▋ | 61591/371472 [4:54:08<24:29:24, 3.51it/s] 17%|█▋ | 61592/371472 [4:54:08<25:00:45, 3.44it/s] 17%|█▋ | 61593/371472 [4:54:08<27:01:43, 3.18it/s] 17%|█▋ | 61594/371472 [4:54:09<26:20:02, 3.27it/s] 17%|█▋ | 61595/371472 [4:54:09<27:25:53, 3.14it/s] 17%|█▋ | 61596/371472 [4:54:09<25:44:20, 3.34it/s] 17%|█▋ | 61597/371472 [4:54:09<25:35:35, 3.36it/s] 17%|█▋ | 61598/371472 [4:54:10<24:53:54, 3.46it/s] 17%|█▋ | 61599/371472 [4:54:10<23:52:37, 3.60it/s] 17%|█▋ | 61600/371472 [4:54:10<24:04:22, 3.58it/s] {'loss': 4.0313, 'learning_rate': 8.511603352797948e-07, 'epoch': 2.65} + 17%|█▋ | 61600/371472 [4:54:10<24:04:22, 3.58it/s] 17%|█▋ | 61601/371472 [4:54:11<23:34:45, 3.65it/s] 17%|█▋ | 61602/371472 [4:54:11<24:22:22, 3.53it/s] 17%|█▋ | 61603/371472 [4:54:11<23:50:07, 3.61it/s] 17%|█▋ | 61604/371472 [4:54:11<23:33:47, 3.65it/s] 17%|█▋ | 61605/371472 [4:54:12<24:14:19, 3.55it/s] 17%|█▋ | 61606/371472 [4:54:12<24:06:01, 3.57it/s] 17%|█▋ | 61607/371472 [4:54:12<24:13:22, 3.55it/s] 17%|█▋ | 61608/371472 [4:54:13<25:43:56, 3.34it/s] 17%|█▋ | 61609/371472 [4:54:13<24:32:33, 3.51it/s] 17%|█▋ | 61610/371472 [4:54:13<23:56:25, 3.60it/s] 17%|█▋ | 61611/371472 [4:54:13<24:01:35, 3.58it/s] 17%|█▋ | 61612/371472 [4:54:14<23:51:05, 3.61it/s] 17%|█▋ | 61613/371472 [4:54:14<23:05:34, 3.73it/s] 17%|█▋ | 61614/371472 [4:54:14<22:58:20, 3.75it/s] 17%|█▋ | 61615/371472 [4:54:14<23:49:15, 3.61it/s] 17%|█▋ | 61616/371472 [4:54:15<24:02:00, 3.58it/s] 17%|█▋ | 61617/371472 [4:54:15<22:53:18, 3.76it/s] 17%|█▋ | 61618/371472 [4:54:15<22:17:24, 3.86it/s] 17%|█▋ | 61619/371472 [4:54:15<22:06:35, 3.89it/s] 17%|█▋ | 61620/371472 [4:54:16<24:32:22, 3.51it/s] {'loss': 4.1522, 'learning_rate': 8.51111853304316e-07, 'epoch': 2.65} + 17%|█▋ | 61620/371472 [4:54:16<24:32:22, 3.51it/s] 17%|█▋ | 61621/371472 [4:54:16<24:29:55, 3.51it/s] 17%|█▋ | 61622/371472 [4:54:16<23:31:09, 3.66it/s] 17%|█▋ | 61623/371472 [4:54:17<24:21:45, 3.53it/s] 17%|█▋ | 61624/371472 [4:54:17<24:41:28, 3.49it/s] 17%|█▋ | 61625/371472 [4:54:17<23:48:37, 3.61it/s] 17%|█▋ | 61626/371472 [4:54:18<24:41:29, 3.49it/s] 17%|█▋ | 61627/371472 [4:54:18<24:05:01, 3.57it/s] 17%|█▋ | 61628/371472 [4:54:18<25:27:11, 3.38it/s] 17%|█▋ | 61629/371472 [4:54:18<24:53:51, 3.46it/s] 17%|█▋ | 61630/371472 [4:54:19<25:07:14, 3.43it/s] 17%|█▋ | 61631/371472 [4:54:19<25:26:29, 3.38it/s] 17%|█▋ | 61632/371472 [4:54:19<25:40:39, 3.35it/s] 17%|█▋ | 61633/371472 [4:54:20<25:08:16, 3.42it/s] 17%|█▋ | 61634/371472 [4:54:20<26:10:29, 3.29it/s] 17%|█▋ | 61635/371472 [4:54:20<25:03:30, 3.43it/s] 17%|█▋ | 61636/371472 [4:54:20<25:20:48, 3.40it/s] 17%|█▋ | 61637/371472 [4:54:21<24:40:41, 3.49it/s] 17%|█▋ | 61638/371472 [4:54:21<24:03:51, 3.58it/s] 17%|█▋ | 61639/371472 [4:54:21<24:08:26, 3.57it/s] 17%|█▋ | 61640/371472 [4:54:22<26:20:29, 3.27it/s] {'loss': 4.1889, 'learning_rate': 8.51063371328837e-07, 'epoch': 2.65} + 17%|█▋ | 61640/371472 [4:54:22<26:20:29, 3.27it/s] 17%|█▋ | 61641/371472 [4:54:22<25:04:11, 3.43it/s] 17%|█▋ | 61642/371472 [4:54:22<23:51:21, 3.61it/s] 17%|█▋ | 61643/371472 [4:54:23<26:30:03, 3.25it/s] 17%|█▋ | 61644/371472 [4:54:23<25:34:50, 3.36it/s] 17%|█▋ | 61645/371472 [4:54:23<24:28:00, 3.52it/s] 17%|█▋ | 61646/371472 [4:54:23<23:48:13, 3.62it/s] 17%|█▋ | 61647/371472 [4:54:24<23:35:24, 3.65it/s] 17%|█▋ | 61648/371472 [4:54:24<24:25:11, 3.52it/s] 17%|█▋ | 61649/371472 [4:54:24<25:37:22, 3.36it/s] 17%|█▋ | 61650/371472 [4:54:25<25:42:45, 3.35it/s] 17%|█▋ | 61651/371472 [4:54:25<24:53:41, 3.46it/s] 17%|█▋ | 61652/371472 [4:54:25<24:18:25, 3.54it/s] 17%|█▋ | 61653/371472 [4:54:25<23:49:08, 3.61it/s] 17%|█▋ | 61654/371472 [4:54:26<25:19:20, 3.40it/s] 17%|█▋ | 61655/371472 [4:54:26<24:00:08, 3.59it/s] 17%|█▋ | 61656/371472 [4:54:26<25:26:38, 3.38it/s] 17%|█▋ | 61657/371472 [4:54:26<24:59:30, 3.44it/s] 17%|█▋ | 61658/371472 [4:54:27<25:18:27, 3.40it/s] 17%|█▋ | 61659/371472 [4:54:27<24:42:51, 3.48it/s] 17%|█▋ | 61660/371472 [4:54:27<24:43:49, 3.48it/s] {'loss': 4.0174, 'learning_rate': 8.510148893533581e-07, 'epoch': 2.66} + 17%|█▋ | 61660/371472 [4:54:27<24:43:49, 3.48it/s] 17%|█▋ | 61661/371472 [4:54:28<25:07:16, 3.43it/s] 17%|█▋ | 61662/371472 [4:54:28<23:42:52, 3.63it/s] 17%|█▋ | 61663/371472 [4:54:28<23:32:18, 3.66it/s] 17%|█▋ | 61664/371472 [4:54:28<23:30:39, 3.66it/s] 17%|█▋ | 61665/371472 [4:54:29<23:08:04, 3.72it/s] 17%|█▋ | 61666/371472 [4:54:29<23:53:03, 3.60it/s] 17%|█▋ | 61667/371472 [4:54:29<23:47:22, 3.62it/s] 17%|█▋ | 61668/371472 [4:54:30<24:21:27, 3.53it/s] 17%|█▋ | 61669/371472 [4:54:30<24:04:33, 3.57it/s] 17%|█▋ | 61670/371472 [4:54:30<24:01:29, 3.58it/s] 17%|█▋ | 61671/371472 [4:54:30<24:06:04, 3.57it/s] 17%|█▋ | 61672/371472 [4:54:31<23:41:50, 3.63it/s] 17%|█▋ | 61673/371472 [4:54:31<24:14:48, 3.55it/s] 17%|█▋ | 61674/371472 [4:54:31<24:49:47, 3.47it/s] 17%|█▋ | 61675/371472 [4:54:32<23:57:11, 3.59it/s] 17%|█▋ | 61676/371472 [4:54:32<23:14:25, 3.70it/s] 17%|█▋ | 61677/371472 [4:54:32<23:38:00, 3.64it/s] 17%|█▋ | 61678/371472 [4:54:32<23:07:42, 3.72it/s] 17%|█▋ | 61679/371472 [4:54:33<23:35:00, 3.65it/s] 17%|█▋ | 61680/371472 [4:54:33<24:04:53, 3.57it/s] {'loss': 4.1852, 'learning_rate': 8.509664073778793e-07, 'epoch': 2.66} + 17%|█▋ | 61680/371472 [4:54:33<24:04:53, 3.57it/s] 17%|█▋ | 61681/371472 [4:54:33<25:37:41, 3.36it/s] 17%|█▋ | 61682/371472 [4:54:33<24:30:14, 3.51it/s] 17%|█▋ | 61683/371472 [4:54:34<24:15:51, 3.55it/s] 17%|█▋ | 61684/371472 [4:54:34<25:34:11, 3.37it/s] 17%|█▋ | 61685/371472 [4:54:34<25:31:16, 3.37it/s] 17%|█▋ | 61686/371472 [4:54:35<26:03:42, 3.30it/s] 17%|█▋ | 61687/371472 [4:54:35<24:51:37, 3.46it/s] 17%|█▋ | 61688/371472 [4:54:35<24:21:01, 3.53it/s] 17%|█▋ | 61689/371472 [4:54:35<23:35:08, 3.65it/s] 17%|█▋ | 61690/371472 [4:54:36<22:39:25, 3.80it/s] 17%|█▋ | 61691/371472 [4:54:36<23:01:19, 3.74it/s] 17%|█▋ | 61692/371472 [4:54:36<23:08:05, 3.72it/s] 17%|█▋ | 61693/371472 [4:54:37<24:11:55, 3.56it/s] 17%|█▋ | 61694/371472 [4:54:37<25:02:41, 3.44it/s] 17%|█▋ | 61695/371472 [4:54:37<23:50:24, 3.61it/s] 17%|█▋ | 61696/371472 [4:54:37<22:55:48, 3.75it/s] 17%|█▋ | 61697/371472 [4:54:38<23:28:31, 3.67it/s] 17%|█▋ | 61698/371472 [4:54:38<22:59:42, 3.74it/s] 17%|█▋ | 61699/371472 [4:54:38<22:52:18, 3.76it/s] 17%|█▋ | 61700/371472 [4:54:38<22:57:09, 3.75it/s] {'loss': 4.2706, 'learning_rate': 8.509179254024005e-07, 'epoch': 2.66} + 17%|█▋ | 61700/371472 [4:54:38<22:57:09, 3.75it/s] 17%|█▋ | 61701/371472 [4:54:39<31:19:37, 2.75it/s] 17%|█▋ | 61702/371472 [4:54:39<29:39:21, 2.90it/s] 17%|█▋ | 61703/371472 [4:54:40<27:44:32, 3.10it/s] 17%|█▋ | 61704/371472 [4:54:40<25:41:33, 3.35it/s] 17%|█▋ | 61705/371472 [4:54:40<26:32:51, 3.24it/s] 17%|█▋ | 61706/371472 [4:54:40<25:15:27, 3.41it/s] 17%|█▋ | 61707/371472 [4:54:41<24:17:57, 3.54it/s] 17%|█▋ | 61708/371472 [4:54:41<23:42:46, 3.63it/s] 17%|█▋ | 61709/371472 [4:54:41<23:07:12, 3.72it/s] 17%|█▋ | 61710/371472 [4:54:41<22:41:39, 3.79it/s] 17%|█▋ | 61711/371472 [4:54:42<23:01:59, 3.74it/s] 17%|█▋ | 61712/371472 [4:54:42<23:23:07, 3.68it/s] 17%|█▋ | 61713/371472 [4:54:42<23:43:14, 3.63it/s] 17%|█▋ | 61714/371472 [4:54:43<23:32:35, 3.65it/s] 17%|█▋ | 61715/371472 [4:54:43<23:18:57, 3.69it/s] 17%|█▋ | 61716/371472 [4:54:43<22:35:02, 3.81it/s] 17%|█▋ | 61717/371472 [4:54:43<22:49:01, 3.77it/s] 17%|█▋ | 61718/371472 [4:54:44<22:42:03, 3.79it/s] 17%|█▋ | 61719/371472 [4:54:44<21:58:10, 3.92it/s] 17%|█▋ | 61720/371472 [4:54:44<23:01:52, 3.74it/s] {'loss': 4.218, 'learning_rate': 8.508694434269214e-07, 'epoch': 2.66} + 17%|█▋ | 61720/371472 [4:54:44<23:01:52, 3.74it/s] 17%|█▋ | 61721/371472 [4:54:44<22:38:28, 3.80it/s] 17%|█▋ | 61722/371472 [4:54:45<22:38:25, 3.80it/s] 17%|█▋ | 61723/371472 [4:54:45<22:06:07, 3.89it/s] 17%|█▋ | 61724/371472 [4:54:45<23:13:42, 3.70it/s] 17%|█▋ | 61725/371472 [4:54:45<23:34:08, 3.65it/s] 17%|█▋ | 61726/371472 [4:54:46<24:49:15, 3.47it/s] 17%|█▋ | 61727/371472 [4:54:46<24:01:03, 3.58it/s] 17%|█▋ | 61728/371472 [4:54:46<23:21:21, 3.68it/s] 17%|█▋ | 61729/371472 [4:54:47<23:31:08, 3.66it/s] 17%|█▋ | 61730/371472 [4:54:47<24:35:32, 3.50it/s] 17%|█▋ | 61731/371472 [4:54:47<27:31:08, 3.13it/s] 17%|█▋ | 61732/371472 [4:54:48<26:46:06, 3.21it/s] 17%|█▋ | 61733/371472 [4:54:48<25:13:59, 3.41it/s] 17%|█▋ | 61734/371472 [4:54:48<24:21:27, 3.53it/s] 17%|█▋ | 61735/371472 [4:54:48<24:24:10, 3.53it/s] 17%|█▋ | 61736/371472 [4:54:49<23:50:23, 3.61it/s] 17%|█▋ | 61737/371472 [4:54:49<23:48:04, 3.61it/s] 17%|█▋ | 61738/371472 [4:54:49<23:07:00, 3.72it/s] 17%|█▋ | 61739/371472 [4:54:49<23:36:39, 3.64it/s] 17%|█▋ | 61740/371472 [4:54:50<23:05:07, 3.73it/s] {'loss': 4.1876, 'learning_rate': 8.508209614514424e-07, 'epoch': 2.66} + 17%|█▋ | 61740/371472 [4:54:50<23:05:07, 3.73it/s] 17%|█▋ | 61741/371472 [4:54:50<23:43:25, 3.63it/s] 17%|█▋ | 61742/371472 [4:54:50<23:44:47, 3.62it/s] 17%|█▋ | 61743/371472 [4:54:51<25:15:17, 3.41it/s] 17%|█▋ | 61744/371472 [4:54:51<24:28:02, 3.52it/s] 17%|█▋ | 61745/371472 [4:54:51<23:27:18, 3.67it/s] 17%|█▋ | 61746/371472 [4:54:51<23:53:26, 3.60it/s] 17%|█▋ | 61747/371472 [4:54:52<23:19:06, 3.69it/s] 17%|█▋ | 61748/371472 [4:54:52<23:10:31, 3.71it/s] 17%|█▋ | 61749/371472 [4:54:52<23:35:26, 3.65it/s] 17%|█▋ | 61750/371472 [4:54:52<22:49:00, 3.77it/s] 17%|█▋ | 61751/371472 [4:54:53<23:16:14, 3.70it/s] 17%|█▋ | 61752/371472 [4:54:53<23:00:49, 3.74it/s] 17%|█▋ | 61753/371472 [4:54:53<22:40:52, 3.79it/s] 17%|█▋ | 61754/371472 [4:54:54<24:32:28, 3.51it/s] 17%|█▋ | 61755/371472 [4:54:54<24:45:53, 3.47it/s] 17%|█▋ | 61756/371472 [4:54:54<23:57:29, 3.59it/s] 17%|█▋ | 61757/371472 [4:54:54<23:56:21, 3.59it/s] 17%|█▋ | 61758/371472 [4:54:55<23:14:14, 3.70it/s] 17%|█▋ | 61759/371472 [4:54:55<24:26:44, 3.52it/s] 17%|█▋ | 61760/371472 [4:54:55<24:34:41, 3.50it/s] {'loss': 4.1555, 'learning_rate': 8.507724794759637e-07, 'epoch': 2.66} + 17%|█▋ | 61760/371472 [4:54:55<24:34:41, 3.50it/s] 17%|█▋ | 61761/371472 [4:54:56<25:42:45, 3.35it/s] 17%|█▋ | 61762/371472 [4:54:56<25:53:11, 3.32it/s] 17%|█▋ | 61763/371472 [4:54:56<26:22:38, 3.26it/s] 17%|█▋ | 61764/371472 [4:54:57<27:18:35, 3.15it/s] 17%|█▋ | 61765/371472 [4:54:57<25:44:29, 3.34it/s] 17%|█▋ | 61766/371472 [4:54:57<25:42:38, 3.35it/s] 17%|█▋ | 61767/371472 [4:54:58<27:40:45, 3.11it/s] 17%|█▋ | 61768/371472 [4:54:58<26:18:22, 3.27it/s] 17%|█▋ | 61769/371472 [4:54:58<25:02:30, 3.44it/s] 17%|█▋ | 61770/371472 [4:54:58<27:16:16, 3.15it/s] 17%|█▋ | 61771/371472 [4:54:59<26:09:40, 3.29it/s] 17%|█▋ | 61772/371472 [4:54:59<26:35:37, 3.23it/s] 17%|█▋ | 61773/371472 [4:54:59<27:09:30, 3.17it/s] 17%|█▋ | 61774/371472 [4:55:00<27:14:39, 3.16it/s] 17%|█▋ | 61775/371472 [4:55:00<25:09:31, 3.42it/s] 17%|█▋ | 61776/371472 [4:55:00<24:47:10, 3.47it/s] 17%|█▋ | 61777/371472 [4:55:01<27:03:38, 3.18it/s] 17%|█▋ | 61778/371472 [4:55:01<27:29:32, 3.13it/s] 17%|█▋ | 61779/371472 [4:55:01<25:43:23, 3.34it/s] 17%|█▋ | 61780/371472 [4:55:01<24:00:29, 3.58it/s] {'loss': 3.943, 'learning_rate': 8.507239975004847e-07, 'epoch': 2.66} + 17%|█▋ | 61780/371472 [4:55:01<24:00:29, 3.58it/s] 17%|█▋ | 61781/371472 [4:55:02<24:19:21, 3.54it/s] 17%|█▋ | 61782/371472 [4:55:02<24:12:16, 3.55it/s] 17%|█▋ | 61783/371472 [4:55:02<23:50:41, 3.61it/s] 17%|█▋ | 61784/371472 [4:55:03<24:15:29, 3.55it/s] 17%|█▋ | 61785/371472 [4:55:03<24:39:38, 3.49it/s] 17%|█▋ | 61786/371472 [4:55:03<24:08:21, 3.56it/s] 17%|█▋ | 61787/371472 [4:55:03<23:38:54, 3.64it/s] 17%|█▋ | 61788/371472 [4:55:04<23:26:27, 3.67it/s] 17%|█▋ | 61789/371472 [4:55:04<22:43:41, 3.78it/s] 17%|█▋ | 61790/371472 [4:55:04<24:24:10, 3.53it/s] 17%|█▋ | 61791/371472 [4:55:04<24:21:25, 3.53it/s] 17%|█▋ | 61792/371472 [4:55:05<23:27:58, 3.67it/s] 17%|█▋ | 61793/371472 [4:55:05<23:17:48, 3.69it/s] 17%|█▋ | 61794/371472 [4:55:05<23:11:57, 3.71it/s] 17%|█▋ | 61795/371472 [4:55:05<22:34:39, 3.81it/s] 17%|█▋ | 61796/371472 [4:55:06<22:58:41, 3.74it/s] 17%|█▋ | 61797/371472 [4:55:06<24:05:30, 3.57it/s] 17%|█▋ | 61798/371472 [4:55:06<23:57:18, 3.59it/s] 17%|█▋ | 61799/371472 [4:55:07<23:34:30, 3.65it/s] 17%|█▋ | 61800/371472 [4:55:07<23:16:15, 3.70it/s] {'loss': 4.1855, 'learning_rate': 8.506755155250059e-07, 'epoch': 2.66} + 17%|█▋ | 61800/371472 [4:55:07<23:16:15, 3.70it/s] 17%|█▋ | 61801/371472 [4:55:07<23:34:11, 3.65it/s] 17%|█▋ | 61802/371472 [4:55:07<24:21:13, 3.53it/s] 17%|█▋ | 61803/371472 [4:55:08<23:53:50, 3.60it/s] 17%|█▋ | 61804/371472 [4:55:08<23:24:00, 3.68it/s] 17%|█▋ | 61805/371472 [4:55:08<23:42:06, 3.63it/s] 17%|█▋ | 61806/371472 [4:55:09<23:24:31, 3.67it/s] 17%|█▋ | 61807/371472 [4:55:09<23:39:09, 3.64it/s] 17%|█▋ | 61808/371472 [4:55:09<23:42:49, 3.63it/s] 17%|█▋ | 61809/371472 [4:55:09<24:20:14, 3.53it/s] 17%|█▋ | 61810/371472 [4:55:10<25:33:36, 3.37it/s] 17%|█▋ | 61811/371472 [4:55:10<24:22:06, 3.53it/s] 17%|█▋ | 61812/371472 [4:55:10<24:54:41, 3.45it/s] 17%|█▋ | 61813/371472 [4:55:11<25:50:29, 3.33it/s] 17%|█▋ | 61814/371472 [4:55:11<26:51:27, 3.20it/s] 17%|█▋ | 61815/371472 [4:55:11<27:18:44, 3.15it/s] 17%|█▋ | 61816/371472 [4:55:12<27:26:57, 3.13it/s] 17%|█▋ | 61817/371472 [4:55:12<26:23:46, 3.26it/s] 17%|█▋ | 61818/371472 [4:55:12<25:53:54, 3.32it/s] 17%|█▋ | 61819/371472 [4:55:12<25:25:42, 3.38it/s] 17%|█▋ | 61820/371472 [4:55:13<24:38:26, 3.49it/s] {'loss': 4.0555, 'learning_rate': 8.50627033549527e-07, 'epoch': 2.66} + 17%|█▋ | 61820/371472 [4:55:13<24:38:26, 3.49it/s] 17%|█▋ | 61821/371472 [4:55:13<23:25:52, 3.67it/s] 17%|█▋ | 61822/371472 [4:55:13<25:34:00, 3.36it/s] 17%|█▋ | 61823/371472 [4:55:14<25:13:19, 3.41it/s] 17%|█▋ | 61824/371472 [4:55:14<25:53:15, 3.32it/s] 17%|█▋ | 61825/371472 [4:55:14<25:32:16, 3.37it/s] 17%|█▋ | 61826/371472 [4:55:15<25:48:28, 3.33it/s] 17%|█▋ | 61827/371472 [4:55:15<25:35:45, 3.36it/s] 17%|█▋ | 61828/371472 [4:55:15<27:42:50, 3.10it/s] 17%|█▋ | 61829/371472 [4:55:15<25:54:14, 3.32it/s] 17%|█▋ | 61830/371472 [4:55:16<25:01:22, 3.44it/s] 17%|█▋ | 61831/371472 [4:55:16<25:59:01, 3.31it/s] 17%|█▋ | 61832/371472 [4:55:16<24:33:06, 3.50it/s] 17%|█▋ | 61833/371472 [4:55:17<23:53:59, 3.60it/s] 17%|█▋ | 61834/371472 [4:55:17<23:17:07, 3.69it/s] 17%|█▋ | 61835/371472 [4:55:17<24:01:25, 3.58it/s] 17%|█▋ | 61836/371472 [4:55:17<24:03:57, 3.57it/s] 17%|█▋ | 61837/371472 [4:55:18<24:45:44, 3.47it/s] 17%|█▋ | 61838/371472 [4:55:18<24:22:23, 3.53it/s] 17%|█▋ | 61839/371472 [4:55:18<24:31:29, 3.51it/s] 17%|█▋ | 61840/371472 [4:55:18<23:41:50, 3.63it/s] {'loss': 4.0709, 'learning_rate': 8.505785515740481e-07, 'epoch': 2.66} + 17%|█▋ | 61840/371472 [4:55:18<23:41:50, 3.63it/s] 17%|█▋ | 61841/371472 [4:55:19<23:01:04, 3.74it/s] 17%|█▋ | 61842/371472 [4:55:19<23:19:28, 3.69it/s] 17%|█▋ | 61843/371472 [4:55:19<22:52:39, 3.76it/s] 17%|█▋ | 61844/371472 [4:55:20<24:08:04, 3.56it/s] 17%|█▋ | 61845/371472 [4:55:20<23:52:45, 3.60it/s] 17%|█▋ | 61846/371472 [4:55:20<23:20:12, 3.69it/s] 17%|█▋ | 61847/371472 [4:55:20<25:03:21, 3.43it/s] 17%|█▋ | 61848/371472 [4:55:21<26:00:45, 3.31it/s] 17%|█▋ | 61849/371472 [4:55:21<25:51:27, 3.33it/s] 17%|█▋ | 61850/371472 [4:55:21<27:38:48, 3.11it/s] 17%|█▋ | 61851/371472 [4:55:22<27:35:25, 3.12it/s] 17%|█▋ | 61852/371472 [4:55:22<26:07:49, 3.29it/s] 17%|█▋ | 61853/371472 [4:55:22<27:56:50, 3.08it/s] 17%|█▋ | 61854/371472 [4:55:23<25:59:17, 3.31it/s] 17%|█▋ | 61855/371472 [4:55:23<25:05:13, 3.43it/s] 17%|█▋ | 61856/371472 [4:55:23<25:05:09, 3.43it/s] 17%|█▋ | 61857/371472 [4:55:23<24:04:05, 3.57it/s] 17%|█▋ | 61858/371472 [4:55:24<23:11:34, 3.71it/s] 17%|█▋ | 61859/371472 [4:55:24<24:06:58, 3.57it/s] 17%|█▋ | 61860/371472 [4:55:24<23:18:17, 3.69it/s] {'loss': 4.2282, 'learning_rate': 8.505300695985691e-07, 'epoch': 2.66} + 17%|█▋ | 61860/371472 [4:55:24<23:18:17, 3.69it/s] 17%|█▋ | 61861/371472 [4:55:25<25:38:15, 3.35it/s] 17%|█▋ | 61862/371472 [4:55:25<25:32:44, 3.37it/s] 17%|█▋ | 61863/371472 [4:55:25<25:18:57, 3.40it/s] 17%|█▋ | 61864/371472 [4:55:25<24:22:32, 3.53it/s] 17%|█▋ | 61865/371472 [4:55:26<24:01:21, 3.58it/s] 17%|█▋ | 61866/371472 [4:55:26<24:45:19, 3.47it/s] 17%|█▋ | 61867/371472 [4:55:26<25:01:23, 3.44it/s] 17%|█▋ | 61868/371472 [4:55:27<24:43:34, 3.48it/s] 17%|█▋ | 61869/371472 [4:55:27<24:39:40, 3.49it/s] 17%|█▋ | 61870/371472 [4:55:27<25:57:54, 3.31it/s] 17%|█▋ | 61871/371472 [4:55:28<24:46:44, 3.47it/s] 17%|█▋ | 61872/371472 [4:55:28<26:06:31, 3.29it/s] 17%|█▋ | 61873/371472 [4:55:28<25:47:08, 3.34it/s] 17%|█▋ | 61874/371472 [4:55:28<25:12:00, 3.41it/s] 17%|█▋ | 61875/371472 [4:55:29<25:28:39, 3.38it/s] 17%|█▋ | 61876/371472 [4:55:29<28:06:31, 3.06it/s] 17%|█▋ | 61877/371472 [4:55:29<26:30:52, 3.24it/s] 17%|█▋ | 61878/371472 [4:55:30<25:55:49, 3.32it/s] 17%|█▋ | 61879/371472 [4:55:30<24:57:40, 3.45it/s] 17%|█▋ | 61880/371472 [4:55:30<25:06:14, 3.43it/s] {'loss': 4.0509, 'learning_rate': 8.504815876230903e-07, 'epoch': 2.67} + 17%|█▋ | 61880/371472 [4:55:30<25:06:14, 3.43it/s] 17%|█▋ | 61881/371472 [4:55:31<25:16:03, 3.40it/s] 17%|█▋ | 61882/371472 [4:55:31<24:30:01, 3.51it/s] 17%|█▋ | 61883/371472 [4:55:31<26:02:59, 3.30it/s] 17%|█▋ | 61884/371472 [4:55:31<25:13:04, 3.41it/s] 17%|█▋ | 61885/371472 [4:55:32<24:40:24, 3.49it/s] 17%|█▋ | 61886/371472 [4:55:32<24:09:16, 3.56it/s] 17%|█▋ | 61887/371472 [4:55:32<23:51:04, 3.61it/s] 17%|█▋ | 61888/371472 [4:55:32<23:17:04, 3.69it/s] 17%|█▋ | 61889/371472 [4:55:33<22:51:26, 3.76it/s] 17%|█▋ | 61890/371472 [4:55:33<22:47:15, 3.77it/s] 17%|█▋ | 61891/371472 [4:55:33<23:08:21, 3.72it/s] 17%|█▋ | 61892/371472 [4:55:34<23:18:56, 3.69it/s] 17%|█▋ | 61893/371472 [4:55:34<23:22:26, 3.68it/s] 17%|█▋ | 61894/371472 [4:55:34<23:49:07, 3.61it/s] 17%|█▋ | 61895/371472 [4:55:34<24:31:07, 3.51it/s] 17%|█▋ | 61896/371472 [4:55:35<25:23:42, 3.39it/s] 17%|█▋ | 61897/371472 [4:55:35<26:26:34, 3.25it/s] 17%|█▋ | 61898/371472 [4:55:35<27:28:22, 3.13it/s] 17%|█▋ | 61899/371472 [4:55:36<26:31:05, 3.24it/s] 17%|█▋ | 61900/371472 [4:55:36<26:27:00, 3.25it/s] {'loss': 4.0596, 'learning_rate': 8.504331056476114e-07, 'epoch': 2.67} + 17%|█▋ | 61900/371472 [4:55:36<26:27:00, 3.25it/s] 17%|█▋ | 61901/371472 [4:55:36<24:51:28, 3.46it/s] 17%|█▋ | 61902/371472 [4:55:37<25:17:08, 3.40it/s] 17%|█▋ | 61903/371472 [4:55:37<24:01:23, 3.58it/s] 17%|█▋ | 61904/371472 [4:55:37<23:22:34, 3.68it/s] 17%|█▋ | 61905/371472 [4:55:37<24:03:13, 3.57it/s] 17%|█▋ | 61906/371472 [4:55:38<23:00:20, 3.74it/s] 17%|█▋ | 61907/371472 [4:55:38<23:56:48, 3.59it/s] 17%|█▋ | 61908/371472 [4:55:38<24:05:43, 3.57it/s] 17%|█▋ | 61909/371472 [4:55:39<26:23:49, 3.26it/s] 17%|█▋ | 61910/371472 [4:55:39<25:06:25, 3.42it/s] 17%|█▋ | 61911/371472 [4:55:39<23:42:28, 3.63it/s] 17%|█▋ | 61912/371472 [4:55:39<23:28:33, 3.66it/s] 17%|█▋ | 61913/371472 [4:55:40<22:44:26, 3.78it/s] 17%|█▋ | 61914/371472 [4:55:40<23:13:11, 3.70it/s] 17%|█▋ | 61915/371472 [4:55:40<24:20:16, 3.53it/s] 17%|█▋ | 61916/371472 [4:55:40<24:02:23, 3.58it/s] 17%|█▋ | 61917/371472 [4:55:41<23:06:17, 3.72it/s] 17%|█▋ | 61918/371472 [4:55:41<23:03:39, 3.73it/s] 17%|█▋ | 61919/371472 [4:55:41<23:52:59, 3.60it/s] 17%|█▋ | 61920/371472 [4:55:41<23:30:58, 3.66it/s] {'loss': 4.1527, 'learning_rate': 8.503846236721326e-07, 'epoch': 2.67} + 17%|█▋ | 61920/371472 [4:55:41<23:30:58, 3.66it/s] 17%|█▋ | 61921/371472 [4:55:42<25:03:59, 3.43it/s] 17%|█▋ | 61922/371472 [4:55:42<25:51:43, 3.32it/s] 17%|█▋ | 61923/371472 [4:55:42<24:10:45, 3.56it/s] 17%|█▋ | 61924/371472 [4:55:43<24:02:09, 3.58it/s] 17%|█▋ | 61925/371472 [4:55:43<23:22:53, 3.68it/s] 17%|█▋ | 61926/371472 [4:55:43<22:41:37, 3.79it/s] 17%|█▋ | 61927/371472 [4:55:43<22:30:25, 3.82it/s] 17%|█▋ | 61928/371472 [4:55:44<22:19:34, 3.85it/s] 17%|█▋ | 61929/371472 [4:55:44<22:39:00, 3.80it/s] 17%|█▋ | 61930/371472 [4:55:44<22:10:31, 3.88it/s] 17%|█▋ | 61931/371472 [4:55:44<22:56:38, 3.75it/s] 17%|█▋ | 61932/371472 [4:55:45<22:25:53, 3.83it/s] 17%|█▋ | 61933/371472 [4:55:45<24:18:25, 3.54it/s] 17%|█▋ | 61934/371472 [4:55:45<23:31:50, 3.65it/s] 17%|█▋ | 61935/371472 [4:55:46<25:56:08, 3.32it/s] 17%|█▋ | 61936/371472 [4:55:46<25:14:00, 3.41it/s] 17%|█▋ | 61937/371472 [4:55:46<24:39:30, 3.49it/s] 17%|█▋ | 61938/371472 [4:55:46<23:14:45, 3.70it/s] 17%|█▋ | 61939/371472 [4:55:47<24:05:28, 3.57it/s] 17%|█▋ | 61940/371472 [4:55:47<26:28:13, 3.25it/s] {'loss': 4.3536, 'learning_rate': 8.503361416966536e-07, 'epoch': 2.67} + 17%|█▋ | 61940/371472 [4:55:47<26:28:13, 3.25it/s] 17%|█▋ | 61941/371472 [4:55:47<25:13:46, 3.41it/s] 17%|█▋ | 61942/371472 [4:55:48<24:52:55, 3.46it/s] 17%|█▋ | 61943/371472 [4:55:48<24:39:08, 3.49it/s] 17%|█▋ | 61944/371472 [4:55:48<25:11:08, 3.41it/s] 17%|█▋ | 61945/371472 [4:55:49<25:21:14, 3.39it/s] 17%|█▋ | 61946/371472 [4:55:49<24:32:28, 3.50it/s] 17%|█▋ | 61947/371472 [4:55:49<23:45:47, 3.62it/s] 17%|█▋ | 61948/371472 [4:55:49<23:45:43, 3.62it/s] 17%|█▋ | 61949/371472 [4:55:50<24:49:28, 3.46it/s] 17%|█▋ | 61950/371472 [4:55:50<23:36:02, 3.64it/s] 17%|█▋ | 61951/371472 [4:55:50<23:36:53, 3.64it/s] 17%|█▋ | 61952/371472 [4:55:50<23:22:20, 3.68it/s] 17%|█▋ | 61953/371472 [4:55:51<24:31:02, 3.51it/s] 17%|█▋ | 61954/371472 [4:55:51<23:50:06, 3.61it/s] 17%|█▋ | 61955/371472 [4:55:51<24:30:52, 3.51it/s] 17%|█▋ | 61956/371472 [4:55:52<25:39:37, 3.35it/s] 17%|█▋ | 61957/371472 [4:55:52<24:40:54, 3.48it/s] 17%|█▋ | 61958/371472 [4:55:52<26:33:08, 3.24it/s] 17%|█▋ | 61959/371472 [4:55:53<26:17:09, 3.27it/s] 17%|█▋ | 61960/371472 [4:55:53<26:35:07, 3.23it/s] {'loss': 4.0764, 'learning_rate': 8.502876597211748e-07, 'epoch': 2.67} + 17%|█▋ | 61960/371472 [4:55:53<26:35:07, 3.23it/s] 17%|█▋ | 61961/371472 [4:55:53<25:19:32, 3.39it/s] 17%|█▋ | 61962/371472 [4:55:53<24:25:13, 3.52it/s] 17%|█▋ | 61963/371472 [4:55:54<25:33:57, 3.36it/s] 17%|█▋ | 61964/371472 [4:55:54<26:21:49, 3.26it/s] 17%|█▋ | 61965/371472 [4:55:54<25:34:01, 3.36it/s] 17%|█▋ | 61966/371472 [4:55:55<25:45:38, 3.34it/s] 17%|█▋ | 61967/371472 [4:55:55<25:23:19, 3.39it/s] 17%|█▋ | 61968/371472 [4:55:55<24:31:10, 3.51it/s] 17%|█▋ | 61969/371472 [4:55:55<24:09:48, 3.56it/s] 17%|█▋ | 61970/371472 [4:55:56<25:14:44, 3.41it/s] 17%|█▋ | 61971/371472 [4:55:56<24:19:00, 3.54it/s] 17%|█▋ | 61972/371472 [4:55:56<24:03:51, 3.57it/s] 17%|█▋ | 61973/371472 [4:55:57<23:05:24, 3.72it/s] 17%|█▋ | 61974/371472 [4:55:57<24:50:31, 3.46it/s] 17%|█▋ | 61975/371472 [4:55:57<26:20:31, 3.26it/s] 17%|█▋ | 61976/371472 [4:55:58<25:22:11, 3.39it/s] 17%|█▋ | 61977/371472 [4:55:58<26:10:56, 3.28it/s] 17%|█▋ | 61978/371472 [4:55:58<24:47:14, 3.47it/s] 17%|█▋ | 61979/371472 [4:55:58<24:40:15, 3.48it/s] 17%|█▋ | 61980/371472 [4:55:59<23:53:41, 3.60it/s] {'loss': 4.1755, 'learning_rate': 8.502391777456958e-07, 'epoch': 2.67} + 17%|█▋ | 61980/371472 [4:55:59<23:53:41, 3.60it/s] 17%|█▋ | 61981/371472 [4:55:59<24:24:10, 3.52it/s] 17%|█▋ | 61982/371472 [4:55:59<24:27:12, 3.52it/s] 17%|█▋ | 61983/371472 [4:56:00<27:24:05, 3.14it/s] 17%|█▋ | 61984/371472 [4:56:00<25:48:42, 3.33it/s] 17%|█▋ | 61985/371472 [4:56:00<25:40:58, 3.35it/s] 17%|█▋ | 61986/371472 [4:56:00<24:41:01, 3.48it/s] 17%|█▋ | 61987/371472 [4:56:01<23:43:17, 3.62it/s] 17%|█▋ | 61988/371472 [4:56:01<24:13:55, 3.55it/s] 17%|█▋ | 61989/371472 [4:56:01<25:18:08, 3.40it/s] 17%|█▋ | 61990/371472 [4:56:02<25:26:59, 3.38it/s] 17%|█▋ | 61991/371472 [4:56:02<26:25:47, 3.25it/s] 17%|█▋ | 61992/371472 [4:56:02<26:58:29, 3.19it/s] 17%|█▋ | 61993/371472 [4:56:03<27:31:46, 3.12it/s] 17%|█▋ | 61994/371472 [4:56:03<25:58:44, 3.31it/s] 17%|█▋ | 61995/371472 [4:56:03<25:14:08, 3.41it/s] 17%|█▋ | 61996/371472 [4:56:03<24:27:56, 3.51it/s] 17%|█▋ | 61997/371472 [4:56:04<25:38:48, 3.35it/s] 17%|█▋ | 61998/371472 [4:56:04<25:11:57, 3.41it/s] 17%|█▋ | 61999/371472 [4:56:04<25:04:06, 3.43it/s] 17%|█▋ | 62000/371472 [4:56:05<24:39:27, 3.49it/s] {'loss': 4.2831, 'learning_rate': 8.50190695770217e-07, 'epoch': 2.67} + 17%|█▋ | 62000/371472 [4:56:05<24:39:27, 3.49it/s] 17%|█▋ | 62001/371472 [4:56:05<24:09:48, 3.56it/s] 17%|█▋ | 62002/371472 [4:56:05<24:09:46, 3.56it/s] 17%|█▋ | 62003/371472 [4:56:05<24:17:11, 3.54it/s] 17%|█▋ | 62004/371472 [4:56:06<24:05:52, 3.57it/s] 17%|█▋ | 62005/371472 [4:56:06<23:40:52, 3.63it/s] 17%|█▋ | 62006/371472 [4:56:06<23:52:28, 3.60it/s] 17%|█▋ | 62007/371472 [4:56:07<24:04:43, 3.57it/s] 17%|█▋ | 62008/371472 [4:56:07<24:10:08, 3.56it/s] 17%|█▋ | 62009/371472 [4:56:07<23:04:41, 3.72it/s] 17%|█▋ | 62010/371472 [4:56:07<23:40:16, 3.63it/s] 17%|█▋ | 62011/371472 [4:56:08<23:06:10, 3.72it/s] 17%|█▋ | 62012/371472 [4:56:08<23:10:18, 3.71it/s] 17%|█▋ | 62013/371472 [4:56:08<24:25:24, 3.52it/s] 17%|█▋ | 62014/371472 [4:56:08<24:33:35, 3.50it/s] 17%|█▋ | 62015/371472 [4:56:09<24:24:21, 3.52it/s] 17%|█▋ | 62016/371472 [4:56:09<23:57:20, 3.59it/s] 17%|█▋ | 62017/371472 [4:56:09<23:27:02, 3.67it/s] 17%|█▋ | 62018/371472 [4:56:10<22:52:23, 3.76it/s] 17%|█▋ | 62019/371472 [4:56:10<23:22:10, 3.68it/s] 17%|█▋ | 62020/371472 [4:56:10<23:14:36, 3.70it/s] {'loss': 4.3237, 'learning_rate': 8.50142213794738e-07, 'epoch': 2.67} + 17%|█▋ | 62020/371472 [4:56:10<23:14:36, 3.70it/s] 17%|█▋ | 62021/371472 [4:56:10<25:06:43, 3.42it/s] 17%|█▋ | 62022/371472 [4:56:11<24:16:26, 3.54it/s] 17%|█▋ | 62023/371472 [4:56:11<28:16:18, 3.04it/s] 17%|█▋ | 62024/371472 [4:56:11<26:04:15, 3.30it/s] 17%|█▋ | 62025/371472 [4:56:12<25:37:30, 3.35it/s] 17%|█▋ | 62026/371472 [4:56:12<27:03:00, 3.18it/s] 17%|█▋ | 62027/371472 [4:56:12<25:55:57, 3.31it/s] 17%|█▋ | 62028/371472 [4:56:13<25:16:17, 3.40it/s] 17%|█▋ | 62029/371472 [4:56:13<24:45:58, 3.47it/s] 17%|█▋ | 62030/371472 [4:56:13<24:46:44, 3.47it/s] 17%|█▋ | 62031/371472 [4:56:13<26:20:15, 3.26it/s] 17%|█▋ | 62032/371472 [4:56:14<25:15:42, 3.40it/s] 17%|█▋ | 62033/371472 [4:56:14<26:59:50, 3.18it/s] 17%|█▋ | 62034/371472 [4:56:14<25:56:59, 3.31it/s] 17%|█▋ | 62035/371472 [4:56:15<24:49:36, 3.46it/s] 17%|█▋ | 62036/371472 [4:56:15<23:56:34, 3.59it/s] 17%|█▋ | 62037/371472 [4:56:15<25:12:10, 3.41it/s] 17%|█▋ | 62038/371472 [4:56:15<25:01:07, 3.44it/s] 17%|█▋ | 62039/371472 [4:56:16<25:00:13, 3.44it/s] 17%|█▋ | 62040/371472 [4:56:16<24:32:49, 3.50it/s] {'loss': 4.186, 'learning_rate': 8.500937318192591e-07, 'epoch': 2.67} + 17%|█▋ | 62040/371472 [4:56:16<24:32:49, 3.50it/s] 17%|█▋ | 62041/371472 [4:56:16<25:22:07, 3.39it/s] 17%|█▋ | 62042/371472 [4:56:17<26:25:27, 3.25it/s] 17%|█▋ | 62043/371472 [4:56:17<25:51:39, 3.32it/s] 17%|█▋ | 62044/371472 [4:56:17<26:31:55, 3.24it/s] 17%|█▋ | 62045/371472 [4:56:18<25:19:20, 3.39it/s] 17%|█▋ | 62046/371472 [4:56:18<25:20:55, 3.39it/s] 17%|█▋ | 62047/371472 [4:56:18<24:17:59, 3.54it/s] 17%|█▋ | 62048/371472 [4:56:18<24:22:08, 3.53it/s] 17%|█▋ | 62049/371472 [4:56:19<23:29:40, 3.66it/s] 17%|█▋ | 62050/371472 [4:56:19<23:55:32, 3.59it/s] 17%|█▋ | 62051/371472 [4:56:19<23:16:11, 3.69it/s] 17%|█▋ | 62052/371472 [4:56:20<27:03:08, 3.18it/s] 17%|█▋ | 62053/371472 [4:56:20<26:37:10, 3.23it/s] 17%|█▋ | 62054/371472 [4:56:20<24:55:29, 3.45it/s] 17%|█▋ | 62055/371472 [4:56:20<24:54:22, 3.45it/s] 17%|█▋ | 62056/371472 [4:56:21<24:27:35, 3.51it/s] 17%|█▋ | 62057/371472 [4:56:21<24:42:09, 3.48it/s] 17%|█▋ | 62058/371472 [4:56:21<24:35:18, 3.50it/s] 17%|█▋ | 62059/371472 [4:56:22<25:18:06, 3.40it/s] 17%|█▋ | 62060/371472 [4:56:22<24:22:56, 3.52it/s] {'loss': 4.3383, 'learning_rate': 8.500452498437803e-07, 'epoch': 2.67} + 17%|█▋ | 62060/371472 [4:56:22<24:22:56, 3.52it/s] 17%|█▋ | 62061/371472 [4:56:22<24:27:23, 3.51it/s] 17%|█▋ | 62062/371472 [4:56:22<24:20:10, 3.53it/s] 17%|█▋ | 62063/371472 [4:56:23<25:04:45, 3.43it/s] 17%|█▋ | 62064/371472 [4:56:23<23:55:10, 3.59it/s] 17%|█▋ | 62065/371472 [4:56:23<23:30:20, 3.66it/s] 17%|█▋ | 62066/371472 [4:56:24<23:46:10, 3.62it/s] 17%|█▋ | 62067/371472 [4:56:24<22:45:51, 3.78it/s] 17%|█▋ | 62068/371472 [4:56:24<23:59:27, 3.58it/s] 17%|█▋ | 62069/371472 [4:56:24<26:07:43, 3.29it/s] 17%|█▋ | 62070/371472 [4:56:25<25:25:42, 3.38it/s] 17%|█▋ | 62071/371472 [4:56:25<26:38:59, 3.22it/s] 17%|█▋ | 62072/371472 [4:56:25<28:03:15, 3.06it/s] 17%|█▋ | 62073/371472 [4:56:26<26:23:55, 3.26it/s] 17%|█▋ | 62074/371472 [4:56:26<24:59:32, 3.44it/s] 17%|█▋ | 62075/371472 [4:56:26<24:51:11, 3.46it/s] 17%|█▋ | 62076/371472 [4:56:27<24:31:40, 3.50it/s] 17%|█▋ | 62077/371472 [4:56:27<25:05:40, 3.42it/s] 17%|█▋ | 62078/371472 [4:56:27<24:48:17, 3.46it/s] 17%|█▋ | 62079/371472 [4:56:27<26:33:27, 3.24it/s] 17%|█▋ | 62080/371472 [4:56:28<25:29:18, 3.37it/s] {'loss': 3.7935, 'learning_rate': 8.499967678683015e-07, 'epoch': 2.67} + 17%|█▋ | 62080/371472 [4:56:28<25:29:18, 3.37it/s] 17%|█▋ | 62081/371472 [4:56:28<24:43:51, 3.48it/s] 17%|█▋ | 62082/371472 [4:56:28<25:43:46, 3.34it/s] 17%|█▋ | 62083/371472 [4:56:29<24:57:35, 3.44it/s] 17%|█▋ | 62084/371472 [4:56:29<23:58:11, 3.59it/s] 17%|█▋ | 62085/371472 [4:56:29<23:03:07, 3.73it/s] 17%|█▋ | 62086/371472 [4:56:29<26:01:27, 3.30it/s] 17%|█▋ | 62087/371472 [4:56:30<25:32:25, 3.36it/s] 17%|█▋ | 62088/371472 [4:56:30<25:18:36, 3.40it/s] 17%|█▋ | 62089/371472 [4:56:30<23:51:21, 3.60it/s] 17%|█▋ | 62090/371472 [4:56:31<24:40:44, 3.48it/s] 17%|█▋ | 62091/371472 [4:56:31<25:22:00, 3.39it/s] 17%|█▋ | 62092/371472 [4:56:31<25:02:21, 3.43it/s] 17%|█▋ | 62093/371472 [4:56:31<24:26:15, 3.52it/s] 17%|█▋ | 62094/371472 [4:56:32<24:38:28, 3.49it/s] 17%|█▋ | 62095/371472 [4:56:32<23:44:51, 3.62it/s] 17%|█▋ | 62096/371472 [4:56:32<22:47:36, 3.77it/s] 17%|█▋ | 62097/371472 [4:56:32<22:19:18, 3.85it/s] 17%|█▋ | 62098/371472 [4:56:33<22:54:56, 3.75it/s] 17%|█▋ | 62099/371472 [4:56:33<24:39:50, 3.48it/s] 17%|█▋ | 62100/371472 [4:56:33<24:31:30, 3.50it/s] {'loss': 4.1606, 'learning_rate': 8.499482858928224e-07, 'epoch': 2.67} + 17%|█▋ | 62100/371472 [4:56:33<24:31:30, 3.50it/s] 17%|█▋ | 62101/371472 [4:56:34<24:05:20, 3.57it/s] 17%|█▋ | 62102/371472 [4:56:34<23:53:46, 3.60it/s] 17%|█▋ | 62103/371472 [4:56:34<24:24:26, 3.52it/s] 17%|█▋ | 62104/371472 [4:56:34<23:52:30, 3.60it/s] 17%|█▋ | 62105/371472 [4:56:35<23:25:42, 3.67it/s] 17%|█▋ | 62106/371472 [4:56:35<23:22:06, 3.68it/s] 17%|█▋ | 62107/371472 [4:56:35<23:13:41, 3.70it/s] 17%|█▋ | 62108/371472 [4:56:36<23:09:07, 3.71it/s] 17%|█▋ | 62109/371472 [4:56:36<23:41:49, 3.63it/s] 17%|█▋ | 62110/371472 [4:56:36<24:55:28, 3.45it/s] 17%|█▋ | 62111/371472 [4:56:36<23:34:27, 3.65it/s] 17%|█▋ | 62112/371472 [4:56:37<23:49:37, 3.61it/s] 17%|█▋ | 62113/371472 [4:56:37<23:09:19, 3.71it/s] 17%|█▋ | 62114/371472 [4:56:37<25:47:25, 3.33it/s] 17%|█▋ | 62115/371472 [4:56:38<25:37:26, 3.35it/s] 17%|█▋ | 62116/371472 [4:56:38<25:11:32, 3.41it/s] 17%|█▋ | 62117/371472 [4:56:38<25:41:58, 3.34it/s] 17%|█▋ | 62118/371472 [4:56:38<25:03:00, 3.43it/s] 17%|█▋ | 62119/371472 [4:56:39<25:36:16, 3.36it/s] 17%|█▋ | 62120/371472 [4:56:39<24:11:39, 3.55it/s] {'loss': 4.2875, 'learning_rate': 8.498998039173435e-07, 'epoch': 2.68} + 17%|█▋ | 62120/371472 [4:56:39<24:11:39, 3.55it/s] 17%|█▋ | 62121/371472 [4:56:39<23:44:56, 3.62it/s] 17%|█▋ | 62122/371472 [4:56:40<23:26:32, 3.67it/s] 17%|█▋ | 62123/371472 [4:56:40<22:54:02, 3.75it/s] 17%|█▋ | 62124/371472 [4:56:40<24:04:08, 3.57it/s] 17%|█▋ | 62125/371472 [4:56:40<23:39:04, 3.63it/s] 17%|█▋ | 62126/371472 [4:56:41<22:52:56, 3.76it/s] 17%|█▋ | 62127/371472 [4:56:41<23:58:41, 3.58it/s] 17%|█▋ | 62128/371472 [4:56:41<23:37:15, 3.64it/s] 17%|█▋ | 62129/371472 [4:56:42<25:58:15, 3.31it/s] 17%|█▋ | 62130/371472 [4:56:42<25:40:55, 3.35it/s] 17%|█▋ | 62131/371472 [4:56:42<25:02:57, 3.43it/s] 17%|█▋ | 62132/371472 [4:56:42<24:11:50, 3.55it/s] 17%|█▋ | 62133/371472 [4:56:43<23:57:10, 3.59it/s] 17%|█▋ | 62134/371472 [4:56:43<23:14:23, 3.70it/s] 17%|█▋ | 62135/371472 [4:56:43<23:24:14, 3.67it/s] 17%|█▋ | 62136/371472 [4:56:43<23:57:57, 3.59it/s] 17%|█▋ | 62137/371472 [4:56:44<24:44:31, 3.47it/s] 17%|█▋ | 62138/371472 [4:56:44<24:22:20, 3.53it/s] 17%|█▋ | 62139/371472 [4:56:44<25:33:47, 3.36it/s] 17%|█▋ | 62140/371472 [4:56:45<25:32:44, 3.36it/s] {'loss': 4.3184, 'learning_rate': 8.498513219418647e-07, 'epoch': 2.68} + 17%|█▋ | 62140/371472 [4:56:45<25:32:44, 3.36it/s] 17%|█▋ | 62141/371472 [4:56:45<27:05:39, 3.17it/s] 17%|█▋ | 62142/371472 [4:56:45<25:45:53, 3.33it/s] 17%|█▋ | 62143/371472 [4:56:46<26:40:14, 3.22it/s] 17%|█▋ | 62144/371472 [4:56:46<25:48:47, 3.33it/s] 17%|█▋ | 62145/371472 [4:56:46<26:02:56, 3.30it/s] 17%|█▋ | 62146/371472 [4:56:47<26:57:41, 3.19it/s] 17%|█▋ | 62147/371472 [4:56:47<26:32:10, 3.24it/s] 17%|█▋ | 62148/371472 [4:56:47<26:29:42, 3.24it/s] 17%|█▋ | 62149/371472 [4:56:47<25:44:21, 3.34it/s] 17%|█▋ | 62150/371472 [4:56:48<26:08:28, 3.29it/s] 17%|█▋ | 62151/371472 [4:56:48<25:34:47, 3.36it/s] 17%|█▋ | 62152/371472 [4:56:48<25:07:42, 3.42it/s] 17%|█▋ | 62153/371472 [4:56:49<24:00:19, 3.58it/s] 17%|█▋ | 62154/371472 [4:56:49<23:33:21, 3.65it/s] 17%|█▋ | 62155/371472 [4:56:49<23:10:47, 3.71it/s] 17%|█▋ | 62156/371472 [4:56:49<22:36:42, 3.80it/s] 17%|█▋ | 62157/371472 [4:56:50<22:37:52, 3.80it/s] 17%|█▋ | 62158/371472 [4:56:50<22:55:48, 3.75it/s] 17%|█▋ | 62159/371472 [4:56:50<22:20:52, 3.84it/s] 17%|█▋ | 62160/371472 [4:56:50<23:01:52, 3.73it/s] {'loss': 4.2195, 'learning_rate': 8.498028399663858e-07, 'epoch': 2.68} + 17%|█▋ | 62160/371472 [4:56:50<23:01:52, 3.73it/s] 17%|█▋ | 62161/371472 [4:56:51<22:32:26, 3.81it/s] 17%|█▋ | 62162/371472 [4:56:51<23:23:47, 3.67it/s] 17%|█▋ | 62163/371472 [4:56:51<23:08:46, 3.71it/s] 17%|█▋ | 62164/371472 [4:56:52<26:44:52, 3.21it/s] 17%|█▋ | 62165/371472 [4:56:52<25:06:45, 3.42it/s] 17%|█▋ | 62166/371472 [4:56:52<23:55:56, 3.59it/s] 17%|█▋ | 62167/371472 [4:56:52<25:27:51, 3.37it/s] 17%|█▋ | 62168/371472 [4:56:53<25:56:37, 3.31it/s] 17%|█▋ | 62169/371472 [4:56:53<25:46:47, 3.33it/s] 17%|█▋ | 62170/371472 [4:56:53<25:46:01, 3.33it/s] 17%|█▋ | 62171/371472 [4:56:54<24:38:31, 3.49it/s] 17%|█▋ | 62172/371472 [4:56:54<23:57:02, 3.59it/s] 17%|█▋ | 62173/371472 [4:56:54<23:30:44, 3.65it/s] 17%|█▋ | 62174/371472 [4:56:54<23:19:26, 3.68it/s] 17%|█▋ | 62175/371472 [4:56:55<23:29:24, 3.66it/s] 17%|█▋ | 62176/371472 [4:56:55<23:18:17, 3.69it/s] 17%|█▋ | 62177/371472 [4:56:55<23:24:23, 3.67it/s] 17%|█▋ | 62178/371472 [4:56:56<25:03:50, 3.43it/s] 17%|█▋ | 62179/371472 [4:56:56<24:36:01, 3.49it/s] 17%|█▋ | 62180/371472 [4:56:56<24:17:37, 3.54it/s] {'loss': 4.3262, 'learning_rate': 8.497543579909069e-07, 'epoch': 2.68} + 17%|█▋ | 62180/371472 [4:56:56<24:17:37, 3.54it/s] 17%|█▋ | 62181/371472 [4:56:56<26:25:08, 3.25it/s] 17%|█▋ | 62182/371472 [4:56:57<25:11:03, 3.41it/s] 17%|█▋ | 62183/371472 [4:56:57<24:25:03, 3.52it/s] 17%|█▋ | 62184/371472 [4:56:57<23:34:06, 3.65it/s] 17%|█▋ | 62185/371472 [4:56:58<23:16:39, 3.69it/s] 17%|█▋ | 62186/371472 [4:56:58<23:00:21, 3.73it/s] 17%|█▋ | 62187/371472 [4:56:58<23:04:05, 3.72it/s] 17%|█▋ | 62188/371472 [4:56:58<23:19:26, 3.68it/s] 17%|█▋ | 62189/371472 [4:56:59<24:18:55, 3.53it/s] 17%|█▋ | 62190/371472 [4:56:59<24:51:20, 3.46it/s] 17%|█▋ | 62191/371472 [4:56:59<24:44:06, 3.47it/s] 17%|█▋ | 62192/371472 [4:56:59<23:56:25, 3.59it/s] 17%|█▋ | 62193/371472 [4:57:00<23:18:11, 3.69it/s] 17%|█▋ | 62194/371472 [4:57:00<22:46:05, 3.77it/s] 17%|█▋ | 62195/371472 [4:57:00<22:52:20, 3.76it/s] 17%|█▋ | 62196/371472 [4:57:01<23:43:03, 3.62it/s] 17%|█▋ | 62197/371472 [4:57:01<22:56:42, 3.74it/s] 17%|█▋ | 62198/371472 [4:57:01<25:04:32, 3.43it/s] 17%|█▋ | 62199/371472 [4:57:01<26:03:51, 3.30it/s] 17%|█▋ | 62200/371472 [4:57:02<25:02:21, 3.43it/s] {'loss': 4.2036, 'learning_rate': 8.49705876015428e-07, 'epoch': 2.68} + 17%|█▋ | 62200/371472 [4:57:02<25:02:21, 3.43it/s] 17%|█▋ | 62201/371472 [4:57:02<24:18:57, 3.53it/s] 17%|█▋ | 62202/371472 [4:57:02<24:38:40, 3.49it/s] 17%|█▋ | 62203/371472 [4:57:03<24:37:57, 3.49it/s] 17%|█▋ | 62204/371472 [4:57:03<24:11:57, 3.55it/s] 17%|█▋ | 62205/371472 [4:57:03<23:06:44, 3.72it/s] 17%|█▋ | 62206/371472 [4:57:03<23:28:37, 3.66it/s] 17%|█▋ | 62207/371472 [4:57:04<23:50:52, 3.60it/s] 17%|█▋ | 62208/371472 [4:57:04<23:51:55, 3.60it/s] 17%|█▋ | 62209/371472 [4:57:04<23:53:32, 3.60it/s] 17%|█▋ | 62210/371472 [4:57:05<24:56:59, 3.44it/s] 17%|█▋ | 62211/371472 [4:57:05<23:52:42, 3.60it/s] 17%|█▋ | 62212/371472 [4:57:05<22:45:24, 3.77it/s] 17%|█▋ | 62213/371472 [4:57:05<23:26:50, 3.66it/s] 17%|█▋ | 62214/371472 [4:57:06<23:43:23, 3.62it/s] 17%|█▋ | 62215/371472 [4:57:06<23:57:39, 3.59it/s] 17%|█▋ | 62216/371472 [4:57:06<23:25:19, 3.67it/s] 17%|█▋ | 62217/371472 [4:57:06<23:38:13, 3.63it/s] 17%|█▋ | 62218/371472 [4:57:07<24:34:55, 3.49it/s] 17%|█▋ | 62219/371472 [4:57:07<24:06:36, 3.56it/s] 17%|█▋ | 62220/371472 [4:57:07<24:24:05, 3.52it/s] {'loss': 4.1811, 'learning_rate': 8.496573940399492e-07, 'epoch': 2.68} + 17%|█▋ | 62220/371472 [4:57:07<24:24:05, 3.52it/s] 17%|█▋ | 62221/371472 [4:57:08<24:41:09, 3.48it/s] 17%|█▋ | 62222/371472 [4:57:08<23:49:33, 3.61it/s] 17%|█▋ | 62223/371472 [4:57:08<24:35:19, 3.49it/s] 17%|█▋ | 62224/371472 [4:57:08<25:06:22, 3.42it/s] 17%|█▋ | 62225/371472 [4:57:09<24:22:24, 3.52it/s] 17%|█▋ | 62226/371472 [4:57:09<24:28:46, 3.51it/s] 17%|█▋ | 62227/371472 [4:57:09<23:45:17, 3.62it/s] 17%|█▋ | 62228/371472 [4:57:10<23:22:20, 3.68it/s] 17%|█▋ | 62229/371472 [4:57:10<24:25:34, 3.52it/s] 17%|█▋ | 62230/371472 [4:57:10<23:43:29, 3.62it/s] 17%|█▋ | 62231/371472 [4:57:10<23:20:44, 3.68it/s] 17%|█▋ | 62232/371472 [4:57:11<23:36:17, 3.64it/s] 17%|█▋ | 62233/371472 [4:57:11<25:21:27, 3.39it/s] 17%|█▋ | 62234/371472 [4:57:11<24:37:12, 3.49it/s] 17%|█▋ | 62235/371472 [4:57:12<25:19:15, 3.39it/s] 17%|█▋ | 62236/371472 [4:57:12<24:18:35, 3.53it/s] 17%|█▋ | 62237/371472 [4:57:12<23:36:01, 3.64it/s] 17%|█▋ | 62238/371472 [4:57:12<23:26:25, 3.66it/s] 17%|█▋ | 62239/371472 [4:57:13<22:51:34, 3.76it/s] 17%|█▋ | 62240/371472 [4:57:13<22:31:03, 3.81it/s] {'loss': 4.2556, 'learning_rate': 8.496089120644702e-07, 'epoch': 2.68} + 17%|█▋ | 62240/371472 [4:57:13<22:31:03, 3.81it/s] 17%|█▋ | 62241/371472 [4:57:13<23:15:03, 3.69it/s] 17%|█▋ | 62242/371472 [4:57:13<23:33:08, 3.65it/s] 17%|█▋ | 62243/371472 [4:57:14<23:24:44, 3.67it/s] 17%|█▋ | 62244/371472 [4:57:14<22:58:48, 3.74it/s] 17%|█▋ | 62245/371472 [4:57:14<22:50:28, 3.76it/s] 17%|█▋ | 62246/371472 [4:57:15<25:17:56, 3.40it/s] 17%|█▋ | 62247/371472 [4:57:15<25:05:29, 3.42it/s] 17%|█▋ | 62248/371472 [4:57:15<24:35:07, 3.49it/s] 17%|█▋ | 62249/371472 [4:57:15<23:59:51, 3.58it/s] 17%|█▋ | 62250/371472 [4:57:16<24:14:01, 3.54it/s] 17%|█▋ | 62251/371472 [4:57:16<23:15:05, 3.69it/s] 17%|█▋ | 62252/371472 [4:57:16<23:15:03, 3.69it/s] 17%|█▋ | 62253/371472 [4:57:17<24:56:23, 3.44it/s] 17%|█▋ | 62254/371472 [4:57:17<25:02:18, 3.43it/s] 17%|█▋ | 62255/371472 [4:57:17<23:55:59, 3.59it/s] 17%|█▋ | 62256/371472 [4:57:17<24:05:14, 3.57it/s] 17%|█▋ | 62257/371472 [4:57:18<27:47:59, 3.09it/s] 17%|█▋ | 62258/371472 [4:57:18<25:47:36, 3.33it/s] 17%|█▋ | 62259/371472 [4:57:18<25:08:37, 3.42it/s] 17%|█▋ | 62260/371472 [4:57:19<25:24:47, 3.38it/s] {'loss': 4.1667, 'learning_rate': 8.495604300889914e-07, 'epoch': 2.68} + 17%|█▋ | 62260/371472 [4:57:19<25:24:47, 3.38it/s] 17%|█▋ | 62261/371472 [4:57:19<30:51:41, 2.78it/s] 17%|█▋ | 62262/371472 [4:57:19<28:20:00, 3.03it/s] 17%|█▋ | 62263/371472 [4:57:20<26:55:47, 3.19it/s] 17%|█▋ | 62264/371472 [4:57:20<27:14:17, 3.15it/s] 17%|█▋ | 62265/371472 [4:57:20<26:51:13, 3.20it/s] 17%|█▋ | 62266/371472 [4:57:21<26:06:20, 3.29it/s] 17%|█▋ | 62267/371472 [4:57:21<26:45:28, 3.21it/s] 17%|█▋ | 62268/371472 [4:57:21<25:40:32, 3.35it/s] 17%|█▋ | 62269/371472 [4:57:21<24:47:56, 3.46it/s] 17%|█▋ | 62270/371472 [4:57:22<25:45:11, 3.34it/s] 17%|█▋ | 62271/371472 [4:57:22<27:03:55, 3.17it/s] 17%|█▋ | 62272/371472 [4:57:22<26:21:39, 3.26it/s] 17%|█▋ | 62273/371472 [4:57:23<25:18:17, 3.39it/s] 17%|█▋ | 62274/371472 [4:57:23<24:35:44, 3.49it/s] 17%|█▋ | 62275/371472 [4:57:23<24:32:29, 3.50it/s] 17%|█▋ | 62276/371472 [4:57:23<23:51:36, 3.60it/s] 17%|█▋ | 62277/371472 [4:57:24<22:54:17, 3.75it/s] 17%|█▋ | 62278/371472 [4:57:24<23:11:24, 3.70it/s] 17%|█▋ | 62279/371472 [4:57:24<22:30:55, 3.81it/s] 17%|█▋ | 62280/371472 [4:57:25<22:40:53, 3.79it/s] {'loss': 4.0655, 'learning_rate': 8.495119481135124e-07, 'epoch': 2.68} + 17%|█▋ | 62280/371472 [4:57:25<22:40:53, 3.79it/s] 17%|█▋ | 62281/371472 [4:57:25<23:43:31, 3.62it/s] 17%|█▋ | 62282/371472 [4:57:25<23:02:25, 3.73it/s] 17%|█▋ | 62283/371472 [4:57:25<23:16:58, 3.69it/s] 17%|█▋ | 62284/371472 [4:57:26<24:34:16, 3.50it/s] 17%|█▋ | 62285/371472 [4:57:26<24:41:35, 3.48it/s] 17%|█▋ | 62286/371472 [4:57:26<24:34:05, 3.50it/s] 17%|█▋ | 62287/371472 [4:57:27<24:19:27, 3.53it/s] 17%|█▋ | 62288/371472 [4:57:27<23:10:36, 3.71it/s] 17%|█▋ | 62289/371472 [4:57:27<23:07:20, 3.71it/s] 17%|█▋ | 62290/371472 [4:57:27<23:26:42, 3.66it/s] 17%|█▋ | 62291/371472 [4:57:28<23:41:29, 3.63it/s] 17%|█▋ | 62292/371472 [4:57:28<24:13:54, 3.54it/s] 17%|█▋ | 62293/371472 [4:57:28<24:20:41, 3.53it/s] 17%|█▋ | 62294/371472 [4:57:29<26:31:31, 3.24it/s] 17%|█▋ | 62295/371472 [4:57:29<25:17:56, 3.39it/s] 17%|█▋ | 62296/371472 [4:57:29<23:57:39, 3.58it/s] 17%|█▋ | 62297/371472 [4:57:29<22:53:03, 3.75it/s] 17%|█▋ | 62298/371472 [4:57:30<24:02:28, 3.57it/s] 17%|█▋ | 62299/371472 [4:57:30<23:45:47, 3.61it/s] 17%|█▋ | 62300/371472 [4:57:30<22:48:19, 3.77it/s] {'loss': 4.0451, 'learning_rate': 8.494634661380336e-07, 'epoch': 2.68} + 17%|█▋ | 62300/371472 [4:57:30<22:48:19, 3.77it/s] 17%|█▋ | 62301/371472 [4:57:30<23:12:11, 3.70it/s] 17%|█▋ | 62302/371472 [4:57:31<23:11:22, 3.70it/s] 17%|█▋ | 62303/371472 [4:57:31<23:26:34, 3.66it/s] 17%|█▋ | 62304/371472 [4:57:31<23:36:16, 3.64it/s] 17%|█▋ | 62305/371472 [4:57:31<23:04:49, 3.72it/s] 17%|█▋ | 62306/371472 [4:57:32<23:40:36, 3.63it/s] 17%|█▋ | 62307/371472 [4:57:32<22:55:48, 3.75it/s] 17%|█▋ | 62308/371472 [4:57:32<24:57:36, 3.44it/s] 17%|█▋ | 62309/371472 [4:57:33<26:07:32, 3.29it/s] 17%|█▋ | 62310/371472 [4:57:33<25:23:06, 3.38it/s] 17%|█▋ | 62311/371472 [4:57:33<24:48:37, 3.46it/s] 17%|█▋ | 62312/371472 [4:57:34<25:55:56, 3.31it/s] 17%|█▋ | 62313/371472 [4:57:34<26:36:58, 3.23it/s] 17%|█▋ | 62314/371472 [4:57:34<26:24:15, 3.25it/s] 17%|█▋ | 62315/371472 [4:57:34<25:22:02, 3.39it/s] 17%|█▋ | 62316/371472 [4:57:35<25:48:45, 3.33it/s] 17%|█▋ | 62317/371472 [4:57:35<25:05:45, 3.42it/s] 17%|█▋ | 62318/371472 [4:57:35<24:59:39, 3.44it/s] 17%|█▋ | 62319/371472 [4:57:36<27:17:23, 3.15it/s] 17%|█▋ | 62320/371472 [4:57:36<26:39:07, 3.22it/s] {'loss': 4.3132, 'learning_rate': 8.494149841625546e-07, 'epoch': 2.68} + 17%|█▋ | 62320/371472 [4:57:36<26:39:07, 3.22it/s] 17%|█▋ | 62321/371472 [4:57:36<25:51:57, 3.32it/s] 17%|█▋ | 62322/371472 [4:57:37<24:13:58, 3.54it/s] 17%|█▋ | 62323/371472 [4:57:37<23:33:47, 3.64it/s] 17%|█▋ | 62324/371472 [4:57:37<23:42:58, 3.62it/s] 17%|█▋ | 62325/371472 [4:57:37<23:59:03, 3.58it/s] 17%|█▋ | 62326/371472 [4:57:38<23:35:20, 3.64it/s] 17%|█▋ | 62327/371472 [4:57:38<23:53:04, 3.60it/s] 17%|█▋ | 62328/371472 [4:57:38<23:52:13, 3.60it/s] 17%|█▋ | 62329/371472 [4:57:38<23:02:34, 3.73it/s] 17%|█▋ | 62330/371472 [4:57:39<25:59:19, 3.30it/s] 17%|█▋ | 62331/371472 [4:57:39<25:27:42, 3.37it/s] 17%|█▋ | 62332/371472 [4:57:39<26:54:58, 3.19it/s] 17%|█▋ | 62333/371472 [4:57:40<26:48:23, 3.20it/s] 17%|█▋ | 62334/371472 [4:57:40<25:44:35, 3.34it/s] 17%|█▋ | 62335/371472 [4:57:40<25:23:46, 3.38it/s] 17%|█▋ | 62336/371472 [4:57:41<25:25:06, 3.38it/s] 17%|█▋ | 62337/371472 [4:57:41<26:10:52, 3.28it/s] 17%|█▋ | 62338/371472 [4:57:41<31:03:49, 2.76it/s] 17%|█▋ | 62339/371472 [4:57:42<27:52:16, 3.08it/s] 17%|█▋ | 62340/371472 [4:57:42<26:31:52, 3.24it/s] {'loss': 4.3183, 'learning_rate': 8.493665021870758e-07, 'epoch': 2.69} + 17%|█▋ | 62340/371472 [4:57:42<26:31:52, 3.24it/s] 17%|█▋ | 62341/371472 [4:57:42<25:13:24, 3.40it/s] 17%|█▋ | 62342/371472 [4:57:42<25:25:25, 3.38it/s] 17%|█▋ | 62343/371472 [4:57:43<25:09:05, 3.41it/s] 17%|█▋ | 62344/371472 [4:57:43<24:56:39, 3.44it/s] 17%|█▋ | 62345/371472 [4:57:43<23:39:28, 3.63it/s] 17%|█▋ | 62346/371472 [4:57:44<23:00:40, 3.73it/s] 17%|█▋ | 62347/371472 [4:57:44<22:44:28, 3.78it/s] 17%|█▋ | 62348/371472 [4:57:44<24:09:54, 3.55it/s] 17%|█▋ | 62349/371472 [4:57:44<24:50:44, 3.46it/s] 17%|█▋ | 62350/371472 [4:57:45<23:53:41, 3.59it/s] 17%|█▋ | 62351/371472 [4:57:45<23:39:40, 3.63it/s] 17%|█▋ | 62352/371472 [4:57:45<23:33:11, 3.65it/s] 17%|█▋ | 62353/371472 [4:57:46<24:14:02, 3.54it/s] 17%|█▋ | 62354/371472 [4:57:46<23:20:14, 3.68it/s] 17%|█▋ | 62355/371472 [4:57:46<23:01:17, 3.73it/s] 17%|█▋ | 62356/371472 [4:57:46<25:19:43, 3.39it/s] 17%|█▋ | 62357/371472 [4:57:47<24:53:27, 3.45it/s] 17%|█▋ | 62358/371472 [4:57:47<23:44:15, 3.62it/s] 17%|█▋ | 62359/371472 [4:57:47<23:44:13, 3.62it/s] 17%|█▋ | 62360/371472 [4:57:47<23:31:17, 3.65it/s] {'loss': 4.0798, 'learning_rate': 8.493180202115968e-07, 'epoch': 2.69} + 17%|█▋ | 62360/371472 [4:57:47<23:31:17, 3.65it/s] 17%|█▋ | 62361/371472 [4:57:48<25:35:16, 3.36it/s] 17%|█▋ | 62362/371472 [4:57:48<25:11:30, 3.41it/s] 17%|█▋ | 62363/371472 [4:57:48<24:46:53, 3.46it/s] 17%|█▋ | 62364/371472 [4:57:49<23:59:50, 3.58it/s] 17%|█▋ | 62365/371472 [4:57:49<24:45:37, 3.47it/s] 17%|█▋ | 62366/371472 [4:57:49<25:11:56, 3.41it/s] 17%|█▋ | 62367/371472 [4:57:50<24:25:11, 3.52it/s] 17%|█▋ | 62368/371472 [4:57:50<24:45:42, 3.47it/s] 17%|█▋ | 62369/371472 [4:57:50<24:07:42, 3.56it/s] 17%|█▋ | 62370/371472 [4:57:50<23:48:06, 3.61it/s] 17%|█▋ | 62371/371472 [4:57:51<23:55:01, 3.59it/s] 17%|█▋ | 62372/371472 [4:57:51<24:27:09, 3.51it/s] 17%|█▋ | 62373/371472 [4:57:51<24:33:13, 3.50it/s] 17%|█▋ | 62374/371472 [4:57:51<23:55:08, 3.59it/s] 17%|█▋ | 62375/371472 [4:57:52<23:35:55, 3.64it/s] 17%|█▋ | 62376/371472 [4:57:52<23:05:48, 3.72it/s] 17%|█▋ | 62377/371472 [4:57:52<23:01:11, 3.73it/s] 17%|█▋ | 62378/371472 [4:57:53<22:47:43, 3.77it/s] 17%|█▋ | 62379/371472 [4:57:53<22:31:59, 3.81it/s] 17%|█▋ | 62380/371472 [4:57:53<22:50:49, 3.76it/s] {'loss': 4.362, 'learning_rate': 8.49269538236118e-07, 'epoch': 2.69} + 17%|█▋ | 62380/371472 [4:57:53<22:50:49, 3.76it/s] 17%|█▋ | 62381/371472 [4:57:53<25:16:27, 3.40it/s] 17%|█▋ | 62382/371472 [4:57:54<25:07:20, 3.42it/s] 17%|█▋ | 62383/371472 [4:57:54<24:24:33, 3.52it/s] 17%|█▋ | 62384/371472 [4:57:54<24:29:37, 3.51it/s] 17%|█▋ | 62385/371472 [4:57:55<25:21:36, 3.39it/s] 17%|█▋ | 62386/371472 [4:57:55<25:25:19, 3.38it/s] 17%|█▋ | 62387/371472 [4:57:55<24:28:54, 3.51it/s] 17%|█▋ | 62388/371472 [4:57:55<23:35:13, 3.64it/s] 17%|█▋ | 62389/371472 [4:57:56<24:19:01, 3.53it/s] 17%|█▋ | 62390/371472 [4:57:56<24:03:36, 3.57it/s] 17%|█▋ | 62391/371472 [4:57:56<25:21:33, 3.39it/s] 17%|█▋ | 62392/371472 [4:57:57<24:44:52, 3.47it/s] 17%|█▋ | 62393/371472 [4:57:57<23:48:37, 3.61it/s] 17%|█▋ | 62394/371472 [4:57:57<23:08:38, 3.71it/s] 17%|█▋ | 62395/371472 [4:57:57<22:31:33, 3.81it/s] 17%|█▋ | 62396/371472 [4:57:58<25:28:15, 3.37it/s] 17%|█▋ | 62397/371472 [4:57:58<25:19:16, 3.39it/s] 17%|█▋ | 62398/371472 [4:57:58<24:47:38, 3.46it/s] 17%|█▋ | 62399/371472 [4:57:59<25:08:47, 3.41it/s] 17%|█▋ | 62400/371472 [4:57:59<26:00:55, 3.30it/s] {'loss': 4.1945, 'learning_rate': 8.49221056260639e-07, 'epoch': 2.69} + 17%|█▋ | 62400/371472 [4:57:59<26:00:55, 3.30it/s] 17%|█▋ | 62401/371472 [4:57:59<24:33:16, 3.50it/s] 17%|█▋ | 62402/371472 [4:57:59<23:42:09, 3.62it/s] 17%|█▋ | 62403/371472 [4:58:00<23:30:40, 3.65it/s] 17%|█▋ | 62404/371472 [4:58:00<23:11:04, 3.70it/s] 17%|█▋ | 62405/371472 [4:58:00<23:01:28, 3.73it/s] 17%|█▋ | 62406/371472 [4:58:00<23:37:10, 3.63it/s] 17%|█▋ | 62407/371472 [4:58:01<23:32:36, 3.65it/s] 17%|█▋ | 62408/371472 [4:58:01<23:32:06, 3.65it/s] 17%|█▋ | 62409/371472 [4:58:01<23:41:27, 3.62it/s] 17%|█▋ | 62410/371472 [4:58:02<23:20:31, 3.68it/s] 17%|█▋ | 62411/371472 [4:58:02<22:39:55, 3.79it/s] 17%|█▋ | 62412/371472 [4:58:02<23:11:28, 3.70it/s] 17%|█▋ | 62413/371472 [4:58:02<23:19:16, 3.68it/s] 17%|█▋ | 62414/371472 [4:58:03<23:38:56, 3.63it/s] 17%|█▋ | 62415/371472 [4:58:03<23:26:27, 3.66it/s] 17%|█▋ | 62416/371472 [4:58:03<23:43:14, 3.62it/s] 17%|█▋ | 62417/371472 [4:58:03<23:21:33, 3.68it/s] 17%|█▋ | 62418/371472 [4:58:04<23:00:40, 3.73it/s] 17%|█▋ | 62419/371472 [4:58:04<25:20:45, 3.39it/s] 17%|█▋ | 62420/371472 [4:58:04<24:09:00, 3.55it/s] {'loss': 4.2235, 'learning_rate': 8.491725742851602e-07, 'epoch': 2.69} + 17%|█▋ | 62420/371472 [4:58:04<24:09:00, 3.55it/s] 17%|█▋ | 62421/371472 [4:58:05<24:40:59, 3.48it/s] 17%|█▋ | 62422/371472 [4:58:05<23:46:41, 3.61it/s] 17%|█▋ | 62423/371472 [4:58:05<23:22:47, 3.67it/s] 17%|█▋ | 62424/371472 [4:58:05<23:45:02, 3.61it/s] 17%|█▋ | 62425/371472 [4:58:06<23:50:28, 3.60it/s] 17%|█▋ | 62426/371472 [4:58:06<26:17:00, 3.27it/s] 17%|█▋ | 62427/371472 [4:58:06<25:27:14, 3.37it/s] 17%|█▋ | 62428/371472 [4:58:07<26:51:04, 3.20it/s] 17%|█▋ | 62429/371472 [4:58:07<25:59:06, 3.30it/s] 17%|█▋ | 62430/371472 [4:58:07<25:18:37, 3.39it/s] 17%|█▋ | 62431/371472 [4:58:08<25:18:28, 3.39it/s] 17%|█▋ | 62432/371472 [4:58:08<24:29:06, 3.51it/s] 17%|█▋ | 62433/371472 [4:58:08<26:40:56, 3.22it/s] 17%|█▋ | 62434/371472 [4:58:08<25:09:00, 3.41it/s] 17%|█▋ | 62435/371472 [4:58:09<25:21:46, 3.38it/s] 17%|█▋ | 62436/371472 [4:58:09<26:20:29, 3.26it/s] 17%|█▋ | 62437/371472 [4:58:09<24:28:04, 3.51it/s] 17%|█▋ | 62438/371472 [4:58:10<23:57:02, 3.58it/s] 17%|█▋ | 62439/371472 [4:58:10<23:05:53, 3.72it/s] 17%|█▋ | 62440/371472 [4:58:10<24:12:42, 3.55it/s] {'loss': 4.1094, 'learning_rate': 8.491240923096813e-07, 'epoch': 2.69} + 17%|█▋ | 62440/371472 [4:58:10<24:12:42, 3.55it/s] 17%|█▋ | 62441/371472 [4:58:11<27:18:42, 3.14it/s] 17%|█▋ | 62442/371472 [4:58:11<26:17:57, 3.26it/s] 17%|█▋ | 62443/371472 [4:58:11<26:24:47, 3.25it/s] 17%|█▋ | 62444/371472 [4:58:11<24:53:59, 3.45it/s] 17%|█▋ | 62445/371472 [4:58:12<23:44:04, 3.62it/s] 17%|█▋ | 62446/371472 [4:58:12<24:32:27, 3.50it/s] 17%|█▋ | 62447/371472 [4:58:12<24:29:11, 3.51it/s] 17%|█▋ | 62448/371472 [4:58:12<24:10:02, 3.55it/s] 17%|█▋ | 62449/371472 [4:58:13<23:45:30, 3.61it/s] 17%|█▋ | 62450/371472 [4:58:13<24:05:12, 3.56it/s] 17%|█▋ | 62451/371472 [4:58:13<26:12:44, 3.27it/s] 17%|█▋ | 62452/371472 [4:58:14<25:14:46, 3.40it/s] 17%|█▋ | 62453/371472 [4:58:14<24:18:42, 3.53it/s] 17%|█▋ | 62454/371472 [4:58:14<23:43:02, 3.62it/s] 17%|█▋ | 62455/371472 [4:58:14<23:16:38, 3.69it/s] 17%|█▋ | 62456/371472 [4:58:15<26:07:42, 3.29it/s] 17%|█▋ | 62457/371472 [4:58:15<25:25:08, 3.38it/s] 17%|█▋ | 62458/371472 [4:58:15<24:34:11, 3.49it/s] 17%|█▋ | 62459/371472 [4:58:16<24:35:58, 3.49it/s] 17%|█▋ | 62460/371472 [4:58:16<24:12:42, 3.55it/s] {'loss': 4.0557, 'learning_rate': 8.490756103342024e-07, 'epoch': 2.69} + 17%|█▋ | 62460/371472 [4:58:16<24:12:42, 3.55it/s] 17%|█▋ | 62461/371472 [4:58:16<23:58:57, 3.58it/s] 17%|█▋ | 62462/371472 [4:58:16<23:33:45, 3.64it/s] 17%|█▋ | 62463/371472 [4:58:17<23:27:12, 3.66it/s] 17%|█▋ | 62464/371472 [4:58:17<23:06:31, 3.71it/s] 17%|█▋ | 62465/371472 [4:58:17<23:12:08, 3.70it/s] 17%|█▋ | 62466/371472 [4:58:18<22:40:15, 3.79it/s] 17%|█▋ | 62467/371472 [4:58:18<22:17:28, 3.85it/s] 17%|█▋ | 62468/371472 [4:58:18<24:11:46, 3.55it/s] 17%|█▋ | 62469/371472 [4:58:18<24:35:25, 3.49it/s] 17%|█▋ | 62470/371472 [4:58:19<23:30:36, 3.65it/s] 17%|█▋ | 62471/371472 [4:58:19<23:49:22, 3.60it/s] 17%|█▋ | 62472/371472 [4:58:19<24:37:06, 3.49it/s] 17%|█▋ | 62473/371472 [4:58:19<23:56:53, 3.58it/s] 17%|█▋ | 62474/371472 [4:58:20<23:46:37, 3.61it/s] 17%|█▋ | 62475/371472 [4:58:20<28:48:18, 2.98it/s] 17%|█▋ | 62476/371472 [4:58:21<26:59:19, 3.18it/s] 17%|█▋ | 62477/371472 [4:58:21<26:02:56, 3.30it/s] 17%|█▋ | 62478/371472 [4:58:21<24:58:22, 3.44it/s] 17%|█▋ | 62479/371472 [4:58:21<24:48:58, 3.46it/s] 17%|█▋ | 62480/371472 [4:58:22<24:18:22, 3.53it/s] {'loss': 4.2701, 'learning_rate': 8.490271283587234e-07, 'epoch': 2.69} + 17%|█▋ | 62480/371472 [4:58:22<24:18:22, 3.53it/s] 17%|█▋ | 62481/371472 [4:58:22<24:32:29, 3.50it/s] 17%|█▋ | 62482/371472 [4:58:22<23:50:43, 3.60it/s] 17%|█▋ | 62483/371472 [4:58:22<24:27:18, 3.51it/s] 17%|█▋ | 62484/371472 [4:58:23<23:54:54, 3.59it/s] 17%|█▋ | 62485/371472 [4:58:23<24:12:54, 3.54it/s] 17%|█▋ | 62486/371472 [4:58:23<24:12:35, 3.55it/s] 17%|█▋ | 62487/371472 [4:58:24<23:44:24, 3.62it/s] 17%|█▋ | 62488/371472 [4:58:24<25:25:56, 3.37it/s] 17%|█▋ | 62489/371472 [4:58:24<28:44:46, 2.99it/s] 17%|█▋ | 62490/371472 [4:58:25<28:32:52, 3.01it/s] 17%|█▋ | 62491/371472 [4:58:25<27:33:45, 3.11it/s] 17%|█▋ | 62492/371472 [4:58:25<25:44:59, 3.33it/s] 17%|█▋ | 62493/371472 [4:58:25<24:50:21, 3.46it/s] 17%|█▋ | 62494/371472 [4:58:26<24:16:22, 3.54it/s] 17%|█▋ | 62495/371472 [4:58:26<24:03:29, 3.57it/s] 17%|█▋ | 62496/371472 [4:58:26<23:31:48, 3.65it/s] 17%|█▋ | 62497/371472 [4:58:27<23:15:01, 3.69it/s] 17%|█▋ | 62498/371472 [4:58:27<23:27:54, 3.66it/s] 17%|█▋ | 62499/371472 [4:58:27<23:25:55, 3.66it/s] 17%|█▋ | 62500/371472 [4:58:27<22:57:41, 3.74it/s] {'loss': 4.1211, 'learning_rate': 8.489786463832446e-07, 'epoch': 2.69} + 17%|█▋ | 62500/371472 [4:58:27<22:57:41, 3.74it/s] 17%|█▋ | 62501/371472 [4:58:28<23:11:31, 3.70it/s] 17%|█▋ | 62502/371472 [4:58:28<22:37:55, 3.79it/s] 17%|█▋ | 62503/371472 [4:58:28<22:42:48, 3.78it/s] 17%|█▋ | 62504/371472 [4:58:28<24:14:25, 3.54it/s] 17%|█▋ | 62505/371472 [4:58:29<24:01:33, 3.57it/s] 17%|█▋ | 62506/371472 [4:58:29<24:17:15, 3.53it/s] 17%|█▋ | 62507/371472 [4:58:29<23:18:38, 3.68it/s] 17%|█▋ | 62508/371472 [4:58:30<22:58:40, 3.74it/s] 17%|█▋ | 62509/371472 [4:58:30<23:23:01, 3.67it/s] 17%|█▋ | 62510/371472 [4:58:30<23:58:22, 3.58it/s] 17%|█▋ | 62511/371472 [4:58:30<23:39:21, 3.63it/s] 17%|█▋ | 62512/371472 [4:58:31<24:32:49, 3.50it/s] 17%|█▋ | 62513/371472 [4:58:31<23:39:40, 3.63it/s] 17%|█▋ | 62514/371472 [4:58:31<23:03:32, 3.72it/s] 17%|█▋ | 62515/371472 [4:58:31<22:46:43, 3.77it/s] 17%|█▋ | 62516/371472 [4:58:32<25:05:25, 3.42it/s] 17%|█▋ | 62517/371472 [4:58:32<24:57:52, 3.44it/s] 17%|█▋ | 62518/371472 [4:58:32<24:42:31, 3.47it/s] 17%|█▋ | 62519/371472 [4:58:33<24:13:11, 3.54it/s] 17%|█▋ | 62520/371472 [4:58:33<24:17:12, 3.53it/s] {'loss': 4.2872, 'learning_rate': 8.489301644077657e-07, 'epoch': 2.69} + 17%|█▋ | 62520/371472 [4:58:33<24:17:12, 3.53it/s] 17%|█▋ | 62521/371472 [4:58:33<24:41:42, 3.48it/s] 17%|█▋ | 62522/371472 [4:58:33<23:56:00, 3.59it/s] 17%|█▋ | 62523/371472 [4:58:34<23:03:07, 3.72it/s] 17%|█▋ | 62524/371472 [4:58:34<23:01:03, 3.73it/s] 17%|█▋ | 62525/371472 [4:58:34<22:23:25, 3.83it/s] 17%|█▋ | 62526/371472 [4:58:34<22:19:58, 3.84it/s] 17%|█▋ | 62527/371472 [4:58:35<22:11:19, 3.87it/s] 17%|█▋ | 62528/371472 [4:58:35<23:01:56, 3.73it/s] 17%|█▋ | 62529/371472 [4:58:35<22:44:59, 3.77it/s] 17%|█▋ | 62530/371472 [4:58:36<23:34:31, 3.64it/s] 17%|█▋ | 62531/371472 [4:58:36<25:02:31, 3.43it/s] 17%|█▋ | 62532/371472 [4:58:36<24:35:22, 3.49it/s] 17%|█▋ | 62533/371472 [4:58:37<25:26:32, 3.37it/s] 17%|█▋ | 62534/371472 [4:58:37<24:25:43, 3.51it/s] 17%|█▋ | 62535/371472 [4:58:37<25:11:40, 3.41it/s] 17%|█▋ | 62536/371472 [4:58:37<23:58:47, 3.58it/s] 17%|█▋ | 62537/371472 [4:58:38<24:57:58, 3.44it/s] 17%|█▋ | 62538/371472 [4:58:38<24:26:58, 3.51it/s] 17%|█▋ | 62539/371472 [4:58:38<25:25:04, 3.38it/s] 17%|█▋ | 62540/371472 [4:58:39<25:21:46, 3.38it/s] {'loss': 4.1406, 'learning_rate': 8.488816824322868e-07, 'epoch': 2.69} + 17%|█▋ | 62540/371472 [4:58:39<25:21:46, 3.38it/s] 17%|█▋ | 62541/371472 [4:58:39<24:59:25, 3.43it/s] 17%|█▋ | 62542/371472 [4:58:39<24:05:39, 3.56it/s] 17%|█▋ | 62543/371472 [4:58:39<25:56:48, 3.31it/s] 17%|█▋ | 62544/371472 [4:58:40<24:30:19, 3.50it/s] 17%|█▋ | 62545/371472 [4:58:40<24:23:44, 3.52it/s] 17%|█▋ | 62546/371472 [4:58:40<25:24:24, 3.38it/s] 17%|█▋ | 62547/371472 [4:58:41<23:53:33, 3.59it/s] 17%|█▋ | 62548/371472 [4:58:41<23:23:02, 3.67it/s] 17%|█▋ | 62549/371472 [4:58:41<23:34:27, 3.64it/s] 17%|█▋ | 62550/371472 [4:58:41<23:56:17, 3.58it/s] 17%|█▋ | 62551/371472 [4:58:42<23:22:48, 3.67it/s] 17%|█▋ | 62552/371472 [4:58:42<24:25:43, 3.51it/s] 17%|█▋ | 62553/371472 [4:58:42<24:06:48, 3.56it/s] 17%|█▋ | 62554/371472 [4:58:42<24:40:32, 3.48it/s] 17%|█▋ | 62555/371472 [4:58:43<24:36:06, 3.49it/s] 17%|█▋ | 62556/371472 [4:58:43<24:34:50, 3.49it/s] 17%|█▋ | 62557/371472 [4:58:43<25:43:00, 3.34it/s] 17%|█▋ | 62558/371472 [4:58:44<25:06:52, 3.42it/s] 17%|█▋ | 62559/371472 [4:58:44<24:48:35, 3.46it/s] 17%|█▋ | 62560/371472 [4:58:44<25:25:48, 3.37it/s] {'loss': 4.2378, 'learning_rate': 8.488332004568079e-07, 'epoch': 2.69} + 17%|█▋ | 62560/371472 [4:58:44<25:25:48, 3.37it/s] 17%|█▋ | 62561/371472 [4:58:45<25:10:58, 3.41it/s] 17%|█▋ | 62562/371472 [4:58:45<23:49:42, 3.60it/s] 17%|█▋ | 62563/371472 [4:58:45<23:12:26, 3.70it/s] 17%|█▋ | 62564/371472 [4:58:45<24:32:51, 3.50it/s] 17%|█▋ | 62565/371472 [4:58:46<24:23:51, 3.52it/s] 17%|█▋ | 62566/371472 [4:58:46<23:42:15, 3.62it/s] 17%|█▋ | 62567/371472 [4:58:46<22:55:20, 3.74it/s] 17%|█▋ | 62568/371472 [4:58:46<23:28:46, 3.65it/s] 17%|█▋ | 62569/371472 [4:58:47<22:58:57, 3.73it/s] 17%|█▋ | 62570/371472 [4:58:47<24:11:58, 3.55it/s] 17%|█▋ | 62571/371472 [4:58:47<23:50:20, 3.60it/s] 17%|█▋ | 62572/371472 [4:58:48<22:57:54, 3.74it/s] 17%|█▋ | 62573/371472 [4:58:48<23:11:09, 3.70it/s] 17%|█▋ | 62574/371472 [4:58:48<23:42:40, 3.62it/s] 17%|█▋ | 62575/371472 [4:58:48<26:26:05, 3.25it/s] 17%|█▋ | 62576/371472 [4:58:49<24:47:20, 3.46it/s] 17%|█▋ | 62577/371472 [4:58:49<23:53:08, 3.59it/s] 17%|█▋ | 62578/371472 [4:58:49<25:22:49, 3.38it/s] 17%|█▋ | 62579/371472 [4:58:50<24:58:53, 3.43it/s] 17%|█▋ | 62580/371472 [4:58:50<24:36:32, 3.49it/s] {'loss': 4.2389, 'learning_rate': 8.48784718481329e-07, 'epoch': 2.7} + 17%|█▋ | 62580/371472 [4:58:50<24:36:32, 3.49it/s] 17%|█▋ | 62581/371472 [4:58:50<24:12:19, 3.54it/s] 17%|█▋ | 62582/371472 [4:58:50<23:36:52, 3.63it/s] 17%|█▋ | 62583/371472 [4:58:51<24:09:23, 3.55it/s] 17%|█▋ | 62584/371472 [4:58:51<24:01:32, 3.57it/s] 17%|█▋ | 62585/371472 [4:58:51<24:11:54, 3.55it/s] 17%|█▋ | 62586/371472 [4:58:52<23:51:58, 3.60it/s] 17%|█▋ | 62587/371472 [4:58:52<23:16:01, 3.69it/s] 17%|█▋ | 62588/371472 [4:58:52<23:08:25, 3.71it/s] 17%|█▋ | 62589/371472 [4:58:52<22:33:31, 3.80it/s] 17%|█▋ | 62590/371472 [4:58:53<24:09:02, 3.55it/s] 17%|█▋ | 62591/371472 [4:58:53<23:49:11, 3.60it/s] 17%|█▋ | 62592/371472 [4:58:53<24:33:55, 3.49it/s] 17%|█▋ | 62593/371472 [4:58:53<23:23:42, 3.67it/s] 17%|█▋ | 62594/371472 [4:58:54<24:00:01, 3.57it/s] 17%|█▋ | 62595/371472 [4:58:54<24:01:47, 3.57it/s] 17%|█▋ | 62596/371472 [4:58:54<25:05:45, 3.42it/s] 17%|█▋ | 62597/371472 [4:58:55<24:02:24, 3.57it/s] 17%|█▋ | 62598/371472 [4:58:55<23:11:25, 3.70it/s] 17%|█▋ | 62599/371472 [4:58:55<22:48:40, 3.76it/s] 17%|█▋ | 62600/371472 [4:58:55<23:16:23, 3.69it/s] {'loss': 4.1908, 'learning_rate': 8.487362365058502e-07, 'epoch': 2.7} + 17%|█▋ | 62600/371472 [4:58:55<23:16:23, 3.69it/s] 17%|█▋ | 62601/371472 [4:58:56<23:56:44, 3.58it/s] 17%|█▋ | 62602/371472 [4:58:56<23:11:05, 3.70it/s] 17%|█▋ | 62603/371472 [4:58:56<23:53:18, 3.59it/s] 17%|█▋ | 62604/371472 [4:58:56<23:14:09, 3.69it/s] 17%|█▋ | 62605/371472 [4:58:57<23:26:25, 3.66it/s] 17%|█▋ | 62606/371472 [4:58:57<24:08:20, 3.55it/s] 17%|█▋ | 62607/371472 [4:58:57<25:09:51, 3.41it/s] 17%|█▋ | 62608/371472 [4:58:58<24:04:59, 3.56it/s] 17%|█▋ | 62609/371472 [4:58:58<23:35:56, 3.64it/s] 17%|█▋ | 62610/371472 [4:58:58<23:39:58, 3.63it/s] 17%|█▋ | 62611/371472 [4:58:58<23:29:36, 3.65it/s] 17%|█▋ | 62612/371472 [4:58:59<23:02:11, 3.72it/s] 17%|█▋ | 62613/371472 [4:58:59<23:13:17, 3.69it/s] 17%|█▋ | 62614/371472 [4:58:59<22:38:25, 3.79it/s] 17%|█▋ | 62615/371472 [4:59:00<25:43:51, 3.33it/s] 17%|█▋ | 62616/371472 [4:59:00<24:45:03, 3.47it/s] 17%|█▋ | 62617/371472 [4:59:00<24:01:48, 3.57it/s] 17%|█▋ | 62618/371472 [4:59:00<22:56:51, 3.74it/s] 17%|█▋ | 62619/371472 [4:59:01<22:34:30, 3.80it/s] 17%|█▋ | 62620/371472 [4:59:01<22:38:01, 3.79it/s] {'loss': 4.269, 'learning_rate': 8.486877545303712e-07, 'epoch': 2.7} + 17%|█▋ | 62620/371472 [4:59:01<22:38:01, 3.79it/s] 17%|█▋ | 62621/371472 [4:59:01<22:40:04, 3.78it/s] 17%|█▋ | 62622/371472 [4:59:01<22:31:59, 3.81it/s] 17%|█▋ | 62623/371472 [4:59:02<24:31:43, 3.50it/s] 17%|█▋ | 62624/371472 [4:59:02<25:02:41, 3.43it/s] 17%|█▋ | 62625/371472 [4:59:02<25:22:07, 3.38it/s] 17%|█▋ | 62626/371472 [4:59:03<25:56:14, 3.31it/s] 17%|█▋ | 62627/371472 [4:59:03<24:42:17, 3.47it/s] 17%|█▋ | 62628/371472 [4:59:03<24:59:21, 3.43it/s] 17%|█▋ | 62629/371472 [4:59:03<24:44:21, 3.47it/s] 17%|█▋ | 62630/371472 [4:59:04<25:51:34, 3.32it/s] 17%|█▋ | 62631/371472 [4:59:04<25:40:05, 3.34it/s] 17%|█▋ | 62632/371472 [4:59:04<24:44:21, 3.47it/s] 17%|█▋ | 62633/371472 [4:59:05<24:15:03, 3.54it/s] 17%|█▋ | 62634/371472 [4:59:05<24:04:41, 3.56it/s] 17%|█▋ | 62635/371472 [4:59:05<24:22:03, 3.52it/s] 17%|█▋ | 62636/371472 [4:59:05<24:06:11, 3.56it/s] 17%|█▋ | 62637/371472 [4:59:06<25:25:25, 3.37it/s] 17%|█▋ | 62638/371472 [4:59:06<24:48:22, 3.46it/s] 17%|█▋ | 62639/371472 [4:59:06<25:47:54, 3.33it/s] 17%|█▋ | 62640/371472 [4:59:07<25:30:53, 3.36it/s] {'loss': 4.0371, 'learning_rate': 8.486392725548923e-07, 'epoch': 2.7} + 17%|█▋ | 62640/371472 [4:59:07<25:30:53, 3.36it/s] 17%|█▋ | 62641/371472 [4:59:07<26:33:37, 3.23it/s] 17%|█▋ | 62642/371472 [4:59:07<25:23:36, 3.38it/s] 17%|█▋ | 62643/371472 [4:59:08<25:55:12, 3.31it/s] 17%|█▋ | 62644/371472 [4:59:08<24:15:23, 3.54it/s] 17%|█▋ | 62645/371472 [4:59:08<24:59:11, 3.43it/s] 17%|█▋ | 62646/371472 [4:59:08<25:35:30, 3.35it/s] 17%|█▋ | 62647/371472 [4:59:09<24:27:18, 3.51it/s] 17%|█▋ | 62648/371472 [4:59:09<23:50:15, 3.60it/s] 17%|█▋ | 62649/371472 [4:59:09<25:18:15, 3.39it/s] 17%|█▋ | 62650/371472 [4:59:10<23:57:08, 3.58it/s] 17%|█▋ | 62651/371472 [4:59:10<24:03:23, 3.57it/s] 17%|█▋ | 62652/371472 [4:59:10<24:00:58, 3.57it/s] 17%|█▋ | 62653/371472 [4:59:10<24:50:27, 3.45it/s] 17%|█▋ | 62654/371472 [4:59:11<24:14:41, 3.54it/s] 17%|█▋ | 62655/371472 [4:59:11<24:26:41, 3.51it/s] 17%|█▋ | 62656/371472 [4:59:11<23:48:03, 3.60it/s] 17%|█▋ | 62657/371472 [4:59:12<23:14:08, 3.69it/s] 17%|█▋ | 62658/371472 [4:59:12<25:11:50, 3.40it/s] 17%|█▋ | 62659/371472 [4:59:12<23:50:24, 3.60it/s] 17%|█▋ | 62660/371472 [4:59:12<24:39:14, 3.48it/s] {'loss': 4.2435, 'learning_rate': 8.485907905794134e-07, 'epoch': 2.7} + 17%|█▋ | 62660/371472 [4:59:12<24:39:14, 3.48it/s] 17%|█▋ | 62661/371472 [4:59:13<23:25:29, 3.66it/s] 17%|█▋ | 62662/371472 [4:59:13<24:16:42, 3.53it/s] 17%|█▋ | 62663/371472 [4:59:13<23:37:15, 3.63it/s] 17%|█▋ | 62664/371472 [4:59:14<24:48:13, 3.46it/s] 17%|█▋ | 62665/371472 [4:59:14<23:44:19, 3.61it/s] 17%|█▋ | 62666/371472 [4:59:14<24:17:42, 3.53it/s] 17%|█▋ | 62667/371472 [4:59:14<24:00:46, 3.57it/s] 17%|█▋ | 62668/371472 [4:59:15<24:24:03, 3.52it/s] 17%|█▋ | 62669/371472 [4:59:15<23:47:19, 3.61it/s] 17%|█▋ | 62670/371472 [4:59:15<24:19:30, 3.53it/s] 17%|█▋ | 62671/371472 [4:59:15<23:58:14, 3.58it/s] 17%|█▋ | 62672/371472 [4:59:16<23:38:18, 3.63it/s] 17%|█▋ | 62673/371472 [4:59:16<23:50:33, 3.60it/s] 17%|█▋ | 62674/371472 [4:59:16<24:09:52, 3.55it/s] 17%|█▋ | 62675/371472 [4:59:17<24:13:21, 3.54it/s] 17%|█▋ | 62676/371472 [4:59:17<23:53:33, 3.59it/s] 17%|█▋ | 62677/371472 [4:59:17<25:20:30, 3.38it/s] 17%|█▋ | 62678/371472 [4:59:17<24:52:46, 3.45it/s] 17%|█▋ | 62679/371472 [4:59:18<24:37:43, 3.48it/s] 17%|█▋ | 62680/371472 [4:59:18<24:34:44, 3.49it/s] {'loss': 4.1919, 'learning_rate': 8.485423086039344e-07, 'epoch': 2.7} + 17%|█▋ | 62680/371472 [4:59:18<24:34:44, 3.49it/s] 17%|█▋ | 62681/371472 [4:59:18<25:24:19, 3.38it/s] 17%|█▋ | 62682/371472 [4:59:19<24:40:21, 3.48it/s] 17%|█▋ | 62683/371472 [4:59:19<23:24:14, 3.66it/s] 17%|█▋ | 62684/371472 [4:59:19<24:00:07, 3.57it/s] 17%|█▋ | 62685/371472 [4:59:20<25:05:23, 3.42it/s] 17%|█▋ | 62686/371472 [4:59:20<25:40:31, 3.34it/s] 17%|█▋ | 62687/371472 [4:59:20<24:42:37, 3.47it/s] 17%|█▋ | 62688/371472 [4:59:20<25:08:56, 3.41it/s] 17%|█▋ | 62689/371472 [4:59:21<24:28:27, 3.50it/s] 17%|█▋ | 62690/371472 [4:59:21<24:47:21, 3.46it/s] 17%|█▋ | 62691/371472 [4:59:21<24:48:46, 3.46it/s] 17%|█▋ | 62692/371472 [4:59:21<23:47:09, 3.61it/s] 17%|█▋ | 62693/371472 [4:59:22<23:00:39, 3.73it/s] 17%|█▋ | 62694/371472 [4:59:22<22:56:42, 3.74it/s] 17%|█▋ | 62695/371472 [4:59:22<24:48:03, 3.46it/s] 17%|█▋ | 62696/371472 [4:59:23<23:50:24, 3.60it/s] 17%|█▋ | 62697/371472 [4:59:23<24:34:13, 3.49it/s] 17%|█▋ | 62698/371472 [4:59:23<24:27:43, 3.51it/s] 17%|█▋ | 62699/371472 [4:59:24<25:42:26, 3.34it/s] 17%|█▋ | 62700/371472 [4:59:24<24:22:09, 3.52it/s] {'loss': 4.0822, 'learning_rate': 8.484938266284557e-07, 'epoch': 2.7} + 17%|█▋ | 62700/371472 [4:59:24<24:22:09, 3.52it/s] 17%|█▋ | 62701/371472 [4:59:24<23:46:27, 3.61it/s] 17%|█▋ | 62702/371472 [4:59:24<24:43:00, 3.47it/s] 17%|█▋ | 62703/371472 [4:59:25<29:46:51, 2.88it/s] 17%|█▋ | 62704/371472 [4:59:25<27:43:15, 3.09it/s] 17%|█▋ | 62705/371472 [4:59:25<26:43:47, 3.21it/s] 17%|█▋ | 62706/371472 [4:59:26<25:22:54, 3.38it/s] 17%|█▋ | 62707/371472 [4:59:26<25:22:49, 3.38it/s] 17%|█▋ | 62708/371472 [4:59:26<23:56:59, 3.58it/s] 17%|█▋ | 62709/371472 [4:59:26<23:04:18, 3.72it/s] 17%|█▋ | 62710/371472 [4:59:27<24:27:21, 3.51it/s] 17%|█▋ | 62711/371472 [4:59:27<25:31:20, 3.36it/s] 17%|█▋ | 62712/371472 [4:59:27<24:14:14, 3.54it/s] 17%|█▋ | 62713/371472 [4:59:28<23:44:59, 3.61it/s] 17%|█▋ | 62714/371472 [4:59:28<24:07:46, 3.55it/s] 17%|█▋ | 62715/371472 [4:59:28<23:24:32, 3.66it/s] 17%|█▋ | 62716/371472 [4:59:28<25:17:18, 3.39it/s] 17%|█▋ | 62717/371472 [4:59:29<24:28:46, 3.50it/s] 17%|█▋ | 62718/371472 [4:59:29<23:59:13, 3.58it/s] 17%|█▋ | 62719/371472 [4:59:29<23:29:26, 3.65it/s] 17%|█▋ | 62720/371472 [4:59:30<22:47:28, 3.76it/s] {'loss': 4.2258, 'learning_rate': 8.484453446529768e-07, 'epoch': 2.7} + 17%|█▋ | 62720/371472 [4:59:30<22:47:28, 3.76it/s] 17%|█▋ | 62721/371472 [4:59:30<22:58:14, 3.73it/s] 17%|█▋ | 62722/371472 [4:59:30<22:32:52, 3.80it/s] 17%|█▋ | 62723/371472 [4:59:30<22:14:37, 3.86it/s] 17%|█▋ | 62724/371472 [4:59:31<22:40:22, 3.78it/s] 17%|█▋ | 62725/371472 [4:59:31<23:39:08, 3.63it/s] 17%|█▋ | 62726/371472 [4:59:31<23:22:38, 3.67it/s] 17%|█▋ | 62727/371472 [4:59:31<22:49:17, 3.76it/s] 17%|█▋ | 62728/371472 [4:59:32<22:43:56, 3.77it/s] 17%|█▋ | 62729/371472 [4:59:32<22:08:52, 3.87it/s] 17%|█▋ | 62730/371472 [4:59:32<22:24:04, 3.83it/s] 17%|█▋ | 62731/371472 [4:59:32<24:10:15, 3.55it/s] 17%|█▋ | 62732/371472 [4:59:33<24:19:49, 3.52it/s] 17%|█▋ | 62733/371472 [4:59:33<23:47:24, 3.60it/s] 17%|█▋ | 62734/371472 [4:59:33<23:51:34, 3.59it/s] 17%|█▋ | 62735/371472 [4:59:34<23:40:28, 3.62it/s] 17%|█▋ | 62736/371472 [4:59:34<23:27:09, 3.66it/s] 17%|█▋ | 62737/371472 [4:59:34<22:59:00, 3.73it/s] 17%|█▋ | 62738/371472 [4:59:34<23:25:12, 3.66it/s] 17%|█▋ | 62739/371472 [4:59:35<23:32:40, 3.64it/s] 17%|█▋ | 62740/371472 [4:59:35<23:59:59, 3.57it/s] {'loss': 4.0884, 'learning_rate': 8.483968626774978e-07, 'epoch': 2.7} + 17%|█▋ | 62740/371472 [4:59:35<23:59:59, 3.57it/s] 17%|█▋ | 62741/371472 [4:59:35<23:32:48, 3.64it/s] 17%|█▋ | 62742/371472 [4:59:36<24:05:28, 3.56it/s] 17%|█▋ | 62743/371472 [4:59:36<23:13:20, 3.69it/s] 17%|█▋ | 62744/371472 [4:59:36<22:43:48, 3.77it/s] 17%|█▋ | 62745/371472 [4:59:36<21:59:46, 3.90it/s] 17%|█▋ | 62746/371472 [4:59:37<21:51:04, 3.92it/s] 17%|█▋ | 62747/371472 [4:59:37<21:57:18, 3.91it/s] 17%|█▋ | 62748/371472 [4:59:37<23:07:11, 3.71it/s] 17%|█▋ | 62749/371472 [4:59:37<22:47:50, 3.76it/s] 17%|█▋ | 62750/371472 [4:59:38<22:45:15, 3.77it/s] 17%|█▋ | 62751/371472 [4:59:38<22:48:22, 3.76it/s] 17%|█▋ | 62752/371472 [4:59:38<23:01:06, 3.73it/s] 17%|█▋ | 62753/371472 [4:59:38<24:28:30, 3.50it/s] 17%|█▋ | 62754/371472 [4:59:39<24:05:08, 3.56it/s] 17%|█▋ | 62755/371472 [4:59:39<23:08:22, 3.71it/s] 17%|█▋ | 62756/371472 [4:59:39<25:18:05, 3.39it/s] 17%|█▋ | 62757/371472 [4:59:40<24:13:49, 3.54it/s] 17%|█▋ | 62758/371472 [4:59:40<23:40:16, 3.62it/s] 17%|█▋ | 62759/371472 [4:59:40<24:51:28, 3.45it/s] 17%|█▋ | 62760/371472 [4:59:41<26:58:38, 3.18it/s] {'loss': 4.2268, 'learning_rate': 8.483483807020189e-07, 'epoch': 2.7} + 17%|█▋ | 62760/371472 [4:59:41<26:58:38, 3.18it/s] 17%|█▋ | 62761/371472 [4:59:41<25:50:16, 3.32it/s] 17%|█▋ | 62762/371472 [4:59:41<24:43:44, 3.47it/s] 17%|█▋ | 62763/371472 [4:59:41<24:14:34, 3.54it/s] 17%|█▋ | 62764/371472 [4:59:42<24:20:33, 3.52it/s] 17%|█▋ | 62765/371472 [4:59:42<23:55:05, 3.59it/s] 17%|█▋ | 62766/371472 [4:59:42<25:15:05, 3.40it/s] 17%|█▋ | 62767/371472 [4:59:43<25:14:13, 3.40it/s] 17%|█▋ | 62768/371472 [4:59:43<24:48:56, 3.46it/s] 17%|█▋ | 62769/371472 [4:59:43<24:01:18, 3.57it/s] 17%|█▋ | 62770/371472 [4:59:43<27:48:23, 3.08it/s] 17%|█▋ | 62771/371472 [4:59:44<26:15:50, 3.26it/s] 17%|█▋ | 62772/371472 [4:59:44<24:47:52, 3.46it/s] 17%|█▋ | 62773/371472 [4:59:44<25:13:52, 3.40it/s] 17%|█▋ | 62774/371472 [4:59:45<24:33:38, 3.49it/s] 17%|█▋ | 62775/371472 [4:59:45<23:35:11, 3.64it/s] 17%|█▋ | 62776/371472 [4:59:45<23:15:57, 3.69it/s] 17%|█▋ | 62777/371472 [4:59:45<24:15:19, 3.54it/s] 17%|█▋ | 62778/371472 [4:59:46<25:06:58, 3.41it/s] 17%|█▋ | 62779/371472 [4:59:46<25:04:11, 3.42it/s] 17%|█▋ | 62780/371472 [4:59:46<24:21:45, 3.52it/s] {'loss': 4.0974, 'learning_rate': 8.482998987265402e-07, 'epoch': 2.7} + 17%|█▋ | 62780/371472 [4:59:46<24:21:45, 3.52it/s] 17%|█▋ | 62781/371472 [4:59:47<26:31:43, 3.23it/s] 17%|█▋ | 62782/371472 [4:59:47<26:23:02, 3.25it/s] 17%|█▋ | 62783/371472 [4:59:47<24:58:26, 3.43it/s] 17%|█▋ | 62784/371472 [4:59:47<24:58:34, 3.43it/s] 17%|█▋ | 62785/371472 [4:59:48<25:35:32, 3.35it/s] 17%|█▋ | 62786/371472 [4:59:48<24:14:45, 3.54it/s] 17%|█▋ | 62787/371472 [4:59:48<24:37:06, 3.48it/s] 17%|█▋ | 62788/371472 [4:59:49<24:23:02, 3.52it/s] 17%|█▋ | 62789/371472 [4:59:49<24:00:45, 3.57it/s] 17%|█▋ | 62790/371472 [4:59:49<23:51:35, 3.59it/s] 17%|█▋ | 62791/371472 [4:59:49<22:37:49, 3.79it/s] 17%|█▋ | 62792/371472 [4:59:50<22:15:49, 3.85it/s] 17%|█▋ | 62793/371472 [4:59:50<23:18:15, 3.68it/s] 17%|█▋ | 62794/371472 [4:59:50<25:12:26, 3.40it/s] 17%|█▋ | 62795/371472 [4:59:51<25:02:11, 3.42it/s] 17%|█▋ | 62796/371472 [4:59:51<30:16:10, 2.83it/s] 17%|█▋ | 62797/371472 [4:59:51<29:47:38, 2.88it/s] 17%|█▋ | 62798/371472 [4:59:52<27:05:44, 3.16it/s] 17%|█▋ | 62799/371472 [4:59:52<25:33:13, 3.36it/s] 17%|█▋ | 62800/371472 [4:59:52<25:21:50, 3.38it/s] {'loss': 4.2737, 'learning_rate': 8.482514167510612e-07, 'epoch': 2.7} + 17%|█▋ | 62800/371472 [4:59:52<25:21:50, 3.38it/s] 17%|█▋ | 62801/371472 [4:59:52<25:01:09, 3.43it/s] 17%|█▋ | 62802/371472 [4:59:53<25:47:45, 3.32it/s] 17%|█▋ | 62803/371472 [4:59:53<24:59:02, 3.43it/s] 17%|█▋ | 62804/371472 [4:59:53<26:10:00, 3.28it/s] 17%|█▋ | 62805/371472 [4:59:54<25:08:44, 3.41it/s] 17%|█▋ | 62806/371472 [4:59:54<24:11:04, 3.55it/s] 17%|█▋ | 62807/371472 [4:59:54<23:42:12, 3.62it/s] 17%|█▋ | 62808/371472 [4:59:54<24:02:00, 3.57it/s] 17%|█▋ | 62809/371472 [4:59:55<24:17:52, 3.53it/s] 17%|█▋ | 62810/371472 [4:59:55<24:20:20, 3.52it/s] 17%|█▋ | 62811/371472 [4:59:55<24:52:06, 3.45it/s] 17%|█▋ | 62812/371472 [4:59:56<28:11:09, 3.04it/s] 17%|█▋ | 62813/371472 [4:59:56<25:43:52, 3.33it/s] 17%|█▋ | 62814/371472 [4:59:56<26:45:20, 3.20it/s] 17%|█▋ | 62815/371472 [4:59:57<26:34:44, 3.23it/s] 17%|█▋ | 62816/371472 [4:59:57<24:48:54, 3.46it/s] 17%|█▋ | 62817/371472 [4:59:57<24:30:08, 3.50it/s] 17%|█▋ | 62818/371472 [4:59:57<25:47:29, 3.32it/s] 17%|█▋ | 62819/371472 [4:59:58<25:28:46, 3.36it/s] 17%|█▋ | 62820/371472 [4:59:58<25:41:31, 3.34it/s] {'loss': 4.3173, 'learning_rate': 8.482029347755823e-07, 'epoch': 2.71} + 17%|█▋ | 62820/371472 [4:59:58<25:41:31, 3.34it/s] 17%|█▋ | 62821/371472 [4:59:58<25:31:36, 3.36it/s] 17%|█▋ | 62822/371472 [4:59:59<24:24:01, 3.51it/s] 17%|█▋ | 62823/371472 [4:59:59<23:57:58, 3.58it/s] 17%|█▋ | 62824/371472 [4:59:59<23:54:18, 3.59it/s] 17%|█▋ | 62825/371472 [4:59:59<23:49:51, 3.60it/s] 17%|█▋ | 62826/371472 [5:00:00<24:08:54, 3.55it/s] 17%|█▋ | 62827/371472 [5:00:00<25:15:02, 3.40it/s] 17%|█▋ | 62828/371472 [5:00:00<24:07:00, 3.55it/s] 17%|█▋ | 62829/371472 [5:00:01<23:53:24, 3.59it/s] 17%|█▋ | 62830/371472 [5:00:01<23:59:24, 3.57it/s] 17%|█▋ | 62831/371472 [5:00:01<23:41:11, 3.62it/s] 17%|█▋ | 62832/371472 [5:00:01<23:35:50, 3.63it/s] 17%|█▋ | 62833/371472 [5:00:02<26:01:19, 3.29it/s] 17%|█▋ | 62834/371472 [5:00:02<24:44:24, 3.47it/s] 17%|█▋ | 62835/371472 [5:00:02<25:19:48, 3.38it/s] 17%|█▋ | 62836/371472 [5:00:03<24:16:57, 3.53it/s] 17%|█▋ | 62837/371472 [5:00:03<23:36:15, 3.63it/s] 17%|█▋ | 62838/371472 [5:00:03<25:41:51, 3.34it/s] 17%|█▋ | 62839/371472 [5:00:03<24:55:47, 3.44it/s] 17%|█▋ | 62840/371472 [5:00:04<24:18:17, 3.53it/s] {'loss': 4.0712, 'learning_rate': 8.481544528001034e-07, 'epoch': 2.71} + 17%|█▋ | 62840/371472 [5:00:04<24:18:17, 3.53it/s] 17%|█▋ | 62841/371472 [5:00:04<24:10:54, 3.55it/s] 17%|█▋ | 62842/371472 [5:00:04<26:05:14, 3.29it/s] 17%|█▋ | 62843/371472 [5:00:05<24:54:02, 3.44it/s] 17%|█▋ | 62844/371472 [5:00:05<23:55:46, 3.58it/s] 17%|█▋ | 62845/371472 [5:00:05<23:09:58, 3.70it/s] 17%|█▋ | 62846/371472 [5:00:05<22:51:37, 3.75it/s] 17%|█▋ | 62847/371472 [5:00:06<24:30:02, 3.50it/s] 17%|█▋ | 62848/371472 [5:00:06<24:02:26, 3.57it/s] 17%|█▋ | 62849/371472 [5:00:06<23:09:21, 3.70it/s] 17%|█▋ | 62850/371472 [5:00:07<22:49:17, 3.76it/s] 17%|█▋ | 62851/371472 [5:00:07<22:34:09, 3.80it/s] 17%|█▋ | 62852/371472 [5:00:07<22:50:46, 3.75it/s] 17%|█▋ | 62853/371472 [5:00:07<23:09:22, 3.70it/s] 17%|█▋ | 62854/371472 [5:00:08<22:53:10, 3.75it/s] 17%|█▋ | 62855/371472 [5:00:08<22:33:22, 3.80it/s] 17%|█▋ | 62856/371472 [5:00:08<23:14:35, 3.69it/s] 17%|█▋ | 62857/371472 [5:00:08<24:30:28, 3.50it/s] 17%|█▋ | 62858/371472 [5:00:09<24:38:15, 3.48it/s] 17%|█▋ | 62859/371472 [5:00:09<25:10:23, 3.41it/s] 17%|█▋ | 62860/371472 [5:00:09<26:14:55, 3.27it/s] {'loss': 4.4412, 'learning_rate': 8.481059708246245e-07, 'epoch': 2.71} + 17%|█▋ | 62860/371472 [5:00:09<26:14:55, 3.27it/s] 17%|█▋ | 62861/371472 [5:00:10<25:10:31, 3.41it/s] 17%|█▋ | 62862/371472 [5:00:10<24:38:09, 3.48it/s] 17%|█▋ | 62863/371472 [5:00:10<24:06:05, 3.56it/s] 17%|█▋ | 62864/371472 [5:00:11<26:08:00, 3.28it/s] 17%|█▋ | 62865/371472 [5:00:11<24:46:03, 3.46it/s] 17%|█▋ | 62866/371472 [5:00:11<24:21:08, 3.52it/s] 17%|█▋ | 62867/371472 [5:00:11<23:34:46, 3.64it/s] 17%|█▋ | 62868/371472 [5:00:12<24:11:37, 3.54it/s] 17%|█▋ | 62869/371472 [5:00:12<24:07:41, 3.55it/s] 17%|█▋ | 62870/371472 [5:00:12<25:45:46, 3.33it/s] 17%|█▋ | 62871/371472 [5:00:13<25:21:32, 3.38it/s] 17%|█▋ | 62872/371472 [5:00:13<24:15:29, 3.53it/s] 17%|█▋ | 62873/371472 [5:00:13<27:05:31, 3.16it/s] 17%|█▋ | 62874/371472 [5:00:13<25:05:40, 3.42it/s] 17%|█▋ | 62875/371472 [5:00:14<24:24:54, 3.51it/s] 17%|█▋ | 62876/371472 [5:00:14<24:06:42, 3.56it/s] 17%|█▋ | 62877/371472 [5:00:14<25:40:01, 3.34it/s] 17%|█▋ | 62878/371472 [5:00:15<24:44:56, 3.46it/s] 17%|█▋ | 62879/371472 [5:00:15<23:49:03, 3.60it/s] 17%|█▋ | 62880/371472 [5:00:15<25:24:16, 3.37it/s] {'loss': 4.271, 'learning_rate': 8.480574888491456e-07, 'epoch': 2.71} + 17%|█▋ | 62880/371472 [5:00:15<25:24:16, 3.37it/s] 17%|█▋ | 62881/371472 [5:00:15<24:52:26, 3.45it/s] 17%|█▋ | 62882/371472 [5:00:16<23:51:44, 3.59it/s] 17%|█▋ | 62883/371472 [5:00:16<24:00:48, 3.57it/s] 17%|█▋ | 62884/371472 [5:00:16<25:37:01, 3.35it/s] 17%|█▋ | 62885/371472 [5:00:17<25:50:38, 3.32it/s] 17%|█▋ | 62886/371472 [5:00:17<26:26:03, 3.24it/s] 17%|█▋ | 62887/371472 [5:00:17<28:18:39, 3.03it/s] 17%|█▋ | 62888/371472 [5:00:18<27:13:16, 3.15it/s] 17%|█▋ | 62889/371472 [5:00:18<26:18:39, 3.26it/s] 17%|█▋ | 62890/371472 [5:00:18<25:54:22, 3.31it/s] 17%|█▋ | 62891/371472 [5:00:18<26:05:32, 3.29it/s] 17%|█▋ | 62892/371472 [5:00:19<25:14:38, 3.40it/s] 17%|█▋ | 62893/371472 [5:00:19<24:39:54, 3.48it/s] 17%|█▋ | 62894/371472 [5:00:19<23:40:26, 3.62it/s] 17%|█▋ | 62895/371472 [5:00:20<23:35:58, 3.63it/s] 17%|█▋ | 62896/371472 [5:00:20<23:30:02, 3.65it/s] 17%|█▋ | 62897/371472 [5:00:20<23:13:32, 3.69it/s] 17%|█▋ | 62898/371472 [5:00:20<22:58:18, 3.73it/s] 17%|█▋ | 62899/371472 [5:00:21<22:48:13, 3.76it/s] 17%|█▋ | 62900/371472 [5:00:21<22:57:28, 3.73it/s] {'loss': 4.1806, 'learning_rate': 8.480090068736667e-07, 'epoch': 2.71} + 17%|█▋ | 62900/371472 [5:00:21<22:57:28, 3.73it/s] 17%|█▋ | 62901/371472 [5:00:21<24:45:58, 3.46it/s] 17%|█▋ | 62902/371472 [5:00:21<23:45:33, 3.61it/s] 17%|█▋ | 62903/371472 [5:00:22<23:30:17, 3.65it/s] 17%|█▋ | 62904/371472 [5:00:22<23:05:21, 3.71it/s] 17%|█▋ | 62905/371472 [5:00:22<23:08:06, 3.70it/s] 17%|█▋ | 62906/371472 [5:00:23<23:11:41, 3.70it/s] 17%|█▋ | 62907/371472 [5:00:23<23:28:00, 3.65it/s] 17%|█▋ | 62908/371472 [5:00:23<23:31:14, 3.64it/s] 17%|█▋ | 62909/371472 [5:00:23<22:49:35, 3.75it/s] 17%|█▋ | 62910/371472 [5:00:24<23:31:04, 3.64it/s] 17%|█▋ | 62911/371472 [5:00:24<23:12:51, 3.69it/s] 17%|█▋ | 62912/371472 [5:00:24<23:07:30, 3.71it/s] 17%|█▋ | 62913/371472 [5:00:24<24:26:57, 3.51it/s] 17%|█▋ | 62914/371472 [5:00:25<25:12:26, 3.40it/s] 17%|█▋ | 62915/371472 [5:00:25<26:22:47, 3.25it/s] 17%|█▋ | 62916/371472 [5:00:25<26:21:32, 3.25it/s] 17%|█▋ | 62917/371472 [5:00:26<25:00:39, 3.43it/s] 17%|█▋ | 62918/371472 [5:00:26<24:17:32, 3.53it/s] 17%|█▋ | 62919/371472 [5:00:26<26:09:46, 3.28it/s] 17%|█▋ | 62920/371472 [5:00:27<25:13:03, 3.40it/s] {'loss': 4.0364, 'learning_rate': 8.479605248981878e-07, 'epoch': 2.71} + 17%|█▋ | 62920/371472 [5:00:27<25:13:03, 3.40it/s] 17%|█▋ | 62921/371472 [5:00:27<24:31:16, 3.50it/s] 17%|█▋ | 62922/371472 [5:00:27<24:36:43, 3.48it/s] 17%|█▋ | 62923/371472 [5:00:27<25:11:44, 3.40it/s] 17%|█▋ | 62924/371472 [5:00:28<24:21:11, 3.52it/s] 17%|█▋ | 62925/371472 [5:00:28<24:27:33, 3.50it/s] 17%|█▋ | 62926/371472 [5:00:28<24:40:55, 3.47it/s] 17%|█▋ | 62927/371472 [5:00:29<24:06:14, 3.56it/s] 17%|█▋ | 62928/371472 [5:00:29<24:01:59, 3.57it/s] 17%|█▋ | 62929/371472 [5:00:29<26:02:02, 3.29it/s] 17%|█▋ | 62930/371472 [5:00:30<26:11:55, 3.27it/s] 17%|█▋ | 62931/371472 [5:00:30<24:45:18, 3.46it/s] 17%|█▋ | 62932/371472 [5:00:30<24:23:42, 3.51it/s] 17%|█▋ | 62933/371472 [5:00:30<23:47:40, 3.60it/s] 17%|█▋ | 62934/371472 [5:00:31<23:09:44, 3.70it/s] 17%|█▋ | 62935/371472 [5:00:31<22:57:11, 3.73it/s] 17%|█▋ | 62936/371472 [5:00:31<23:10:21, 3.70it/s] 17%|█▋ | 62937/371472 [5:00:31<24:56:41, 3.44it/s] 17%|█▋ | 62938/371472 [5:00:32<24:23:19, 3.51it/s] 17%|█▋ | 62939/371472 [5:00:32<23:56:27, 3.58it/s] 17%|█▋ | 62940/371472 [5:00:32<23:58:00, 3.58it/s] {'loss': 4.1013, 'learning_rate': 8.479120429227089e-07, 'epoch': 2.71} + 17%|█▋ | 62940/371472 [5:00:32<23:58:00, 3.58it/s] 17%|█▋ | 62941/371472 [5:00:33<24:39:31, 3.48it/s] 17%|█▋ | 62942/371472 [5:00:33<24:27:42, 3.50it/s] 17%|█▋ | 62943/371472 [5:00:33<23:43:24, 3.61it/s] 17%|█▋ | 62944/371472 [5:00:33<23:36:53, 3.63it/s] 17%|█▋ | 62945/371472 [5:00:34<23:13:06, 3.69it/s] 17%|█▋ | 62946/371472 [5:00:34<23:13:23, 3.69it/s] 17%|█▋ | 62947/371472 [5:00:34<23:02:17, 3.72it/s] 17%|█▋ | 62948/371472 [5:00:34<24:42:53, 3.47it/s] 17%|█▋ | 62949/371472 [5:00:35<25:33:58, 3.35it/s] 17%|█▋ | 62950/371472 [5:00:35<24:34:44, 3.49it/s] 17%|█▋ | 62951/371472 [5:00:35<23:30:08, 3.65it/s] 17%|█▋ | 62952/371472 [5:00:36<22:50:30, 3.75it/s] 17%|█▋ | 62953/371472 [5:00:36<30:25:11, 2.82it/s] 17%|█▋ | 62954/371472 [5:00:36<27:54:31, 3.07it/s] 17%|█▋ | 62955/371472 [5:00:37<26:08:09, 3.28it/s] 17%|█▋ | 62956/371472 [5:00:37<24:46:53, 3.46it/s] 17%|█▋ | 62957/371472 [5:00:37<24:02:29, 3.56it/s] 17%|█▋ | 62958/371472 [5:00:37<24:07:43, 3.55it/s] 17%|█▋ | 62959/371472 [5:00:38<24:15:46, 3.53it/s] 17%|█▋ | 62960/371472 [5:00:38<23:54:55, 3.58it/s] {'loss': 4.0226, 'learning_rate': 8.478635609472301e-07, 'epoch': 2.71} + 17%|█▋ | 62960/371472 [5:00:38<23:54:55, 3.58it/s] 17%|█▋ | 62961/371472 [5:00:38<24:21:53, 3.52it/s] 17%|█▋ | 62962/371472 [5:00:39<24:07:39, 3.55it/s] 17%|█▋ | 62963/371472 [5:00:39<23:33:48, 3.64it/s] 17%|█▋ | 62964/371472 [5:00:39<23:05:57, 3.71it/s] 17%|█▋ | 62965/371472 [5:00:39<23:31:24, 3.64it/s] 17%|█▋ | 62966/371472 [5:00:40<26:03:01, 3.29it/s] 17%|█▋ | 62967/371472 [5:00:40<25:26:02, 3.37it/s] 17%|█▋ | 62968/371472 [5:00:40<25:04:57, 3.42it/s] 17%|█▋ | 62969/371472 [5:00:41<26:16:42, 3.26it/s] 17%|█▋ | 62970/371472 [5:00:41<25:13:47, 3.40it/s] 17%|█▋ | 62971/371472 [5:00:41<24:59:58, 3.43it/s] 17%|█▋ | 62972/371472 [5:00:42<25:16:13, 3.39it/s] 17%|█▋ | 62973/371472 [5:00:42<26:19:37, 3.25it/s] 17%|█▋ | 62974/371472 [5:00:42<25:56:43, 3.30it/s] 17%|█▋ | 62975/371472 [5:00:42<24:22:21, 3.52it/s] 17%|█▋ | 62976/371472 [5:00:43<26:36:03, 3.22it/s] 17%|█▋ | 62977/371472 [5:00:43<27:02:26, 3.17it/s] 17%|█▋ | 62978/371472 [5:00:43<26:50:17, 3.19it/s] 17%|█▋ | 62979/371472 [5:00:44<25:14:00, 3.40it/s] 17%|█▋ | 62980/371472 [5:00:44<24:57:52, 3.43it/s] {'loss': 4.1229, 'learning_rate': 8.478150789717512e-07, 'epoch': 2.71} + 17%|█▋ | 62980/371472 [5:00:44<24:57:52, 3.43it/s] 17%|█▋ | 62981/371472 [5:00:44<25:13:19, 3.40it/s] 17%|█▋ | 62982/371472 [5:00:44<23:57:27, 3.58it/s] 17%|█▋ | 62983/371472 [5:00:45<24:41:22, 3.47it/s] 17%|█▋ | 62984/371472 [5:00:45<24:52:21, 3.45it/s] 17%|█▋ | 62985/371472 [5:00:45<24:10:06, 3.55it/s] 17%|█▋ | 62986/371472 [5:00:46<24:34:48, 3.49it/s] 17%|█▋ | 62987/371472 [5:00:46<24:15:21, 3.53it/s] 17%|█▋ | 62988/371472 [5:00:46<23:32:52, 3.64it/s] 17%|█▋ | 62989/371472 [5:00:46<23:53:57, 3.59it/s] 17%|█▋ | 62990/371472 [5:00:47<23:31:48, 3.64it/s] 17%|█▋ | 62991/371472 [5:00:47<23:58:03, 3.58it/s] 17%|█▋ | 62992/371472 [5:00:47<22:41:57, 3.77it/s] 17%|█▋ | 62993/371472 [5:00:47<22:39:22, 3.78it/s] 17%|█▋ | 62994/371472 [5:00:48<22:07:22, 3.87it/s] 17%|█▋ | 62995/371472 [5:00:48<24:25:42, 3.51it/s] 17%|█▋ | 62996/371472 [5:00:48<25:26:41, 3.37it/s] 17%|█▋ | 62997/371472 [5:00:49<26:25:48, 3.24it/s] 17%|█▋ | 62998/371472 [5:00:49<25:13:58, 3.40it/s] 17%|█▋ | 62999/371472 [5:00:49<23:51:09, 3.59it/s] 17%|█▋ | 63000/371472 [5:00:50<23:48:22, 3.60it/s] {'loss': 4.1475, 'learning_rate': 8.477665969962722e-07, 'epoch': 2.71} + 17%|█▋ | 63000/371472 [5:00:50<23:48:22, 3.60it/s] 17%|█▋ | 63001/371472 [5:00:50<24:34:00, 3.49it/s] 17%|█▋ | 63002/371472 [5:00:50<25:24:57, 3.37it/s] 17%|█▋ | 63003/371472 [5:00:50<24:33:34, 3.49it/s] 17%|█▋ | 63004/371472 [5:00:51<23:51:58, 3.59it/s] 17%|█▋ | 63005/371472 [5:00:51<25:57:31, 3.30it/s] 17%|█▋ | 63006/371472 [5:00:51<28:17:46, 3.03it/s] 17%|█▋ | 63007/371472 [5:00:52<27:24:03, 3.13it/s] 17%|█▋ | 63008/371472 [5:00:52<26:36:16, 3.22it/s] 17%|█▋ | 63009/371472 [5:00:52<25:22:59, 3.38it/s] 17%|█▋ | 63010/371472 [5:00:53<25:46:13, 3.32it/s] 17%|█▋ | 63011/371472 [5:00:53<25:04:36, 3.42it/s] 17%|█▋ | 63012/371472 [5:00:53<25:12:42, 3.40it/s] 17%|█▋ | 63013/371472 [5:00:53<25:49:33, 3.32it/s] 17%|█▋ | 63014/371472 [5:00:54<25:38:12, 3.34it/s] 17%|█▋ | 63015/371472 [5:00:54<25:35:37, 3.35it/s] 17%|█▋ | 63016/371472 [5:00:54<24:32:19, 3.49it/s] 17%|█▋ | 63017/371472 [5:00:55<25:11:41, 3.40it/s] 17%|█▋ | 63018/371472 [5:00:55<28:38:01, 2.99it/s] 17%|█▋ | 63019/371472 [5:00:55<27:18:13, 3.14it/s] 17%|█▋ | 63020/371472 [5:00:56<25:59:10, 3.30it/s] {'loss': 4.1741, 'learning_rate': 8.477181150207933e-07, 'epoch': 2.71} + 17%|█▋ | 63020/371472 [5:00:56<25:59:10, 3.30it/s] 17%|█▋ | 63021/371472 [5:00:56<25:35:00, 3.35it/s] 17%|█▋ | 63022/371472 [5:00:56<24:59:50, 3.43it/s] 17%|█▋ | 63023/371472 [5:00:56<25:35:03, 3.35it/s] 17%|█▋ | 63024/371472 [5:00:57<27:44:04, 3.09it/s] 17%|█▋ | 63025/371472 [5:00:57<25:42:53, 3.33it/s] 17%|█▋ | 63026/371472 [5:00:57<26:31:04, 3.23it/s] 17%|█▋ | 63027/371472 [5:00:58<27:05:45, 3.16it/s] 17%|█▋ | 63028/371472 [5:00:58<25:14:41, 3.39it/s] 17%|█▋ | 63029/371472 [5:00:58<25:10:52, 3.40it/s] 17%|█▋ | 63030/371472 [5:00:59<24:52:36, 3.44it/s] 17%|█▋ | 63031/371472 [5:00:59<25:06:13, 3.41it/s] 17%|█▋ | 63032/371472 [5:00:59<24:16:55, 3.53it/s] 17%|█▋ | 63033/371472 [5:00:59<24:49:12, 3.45it/s] 17%|█▋ | 63034/371472 [5:01:00<24:04:42, 3.56it/s] 17%|█▋ | 63035/371472 [5:01:00<23:53:51, 3.59it/s] 17%|█▋ | 63036/371472 [5:01:00<23:08:05, 3.70it/s] 17%|█▋ | 63037/371472 [5:01:01<25:43:44, 3.33it/s] 17%|█▋ | 63038/371472 [5:01:01<25:13:27, 3.40it/s] 17%|█▋ | 63039/371472 [5:01:01<24:37:34, 3.48it/s] 17%|█▋ | 63040/371472 [5:01:01<24:36:59, 3.48it/s] {'loss': 4.3025, 'learning_rate': 8.476696330453145e-07, 'epoch': 2.72} + 17%|█▋ | 63040/371472 [5:01:01<24:36:59, 3.48it/s] 17%|█▋ | 63041/371472 [5:01:02<24:05:08, 3.56it/s] 17%|█▋ | 63042/371472 [5:01:02<23:17:13, 3.68it/s] 17%|█▋ | 63043/371472 [5:01:02<22:50:47, 3.75it/s] 17%|█▋ | 63044/371472 [5:01:03<23:04:09, 3.71it/s] 17%|█▋ | 63045/371472 [5:01:03<23:10:23, 3.70it/s] 17%|█▋ | 63046/371472 [5:01:03<23:30:54, 3.64it/s] 17%|█▋ | 63047/371472 [5:01:03<23:17:09, 3.68it/s] 17%|█▋ | 63048/371472 [5:01:04<22:53:43, 3.74it/s] 17%|█▋ | 63049/371472 [5:01:04<22:49:55, 3.75it/s] 17%|█▋ | 63050/371472 [5:01:04<27:12:40, 3.15it/s] 17%|█▋ | 63051/371472 [5:01:05<27:18:56, 3.14it/s] 17%|█▋ | 63052/371472 [5:01:05<25:33:26, 3.35it/s] 17%|█▋ | 63053/371472 [5:01:05<25:22:01, 3.38it/s] 17%|█▋ | 63054/371472 [5:01:05<24:49:58, 3.45it/s] 17%|█▋ | 63055/371472 [5:01:06<24:10:06, 3.54it/s] 17%|█▋ | 63056/371472 [5:01:06<25:20:35, 3.38it/s] 17%|█▋ | 63057/371472 [5:01:06<25:42:06, 3.33it/s] 17%|█▋ | 63058/371472 [5:01:07<24:48:20, 3.45it/s] 17%|█▋ | 63059/371472 [5:01:07<24:12:07, 3.54it/s] 17%|█▋ | 63060/371472 [5:01:07<23:41:37, 3.62it/s] {'loss': 4.2863, 'learning_rate': 8.476211510698355e-07, 'epoch': 2.72} + 17%|█▋ | 63060/371472 [5:01:07<23:41:37, 3.62it/s] 17%|█▋ | 63061/371472 [5:01:07<23:51:44, 3.59it/s] 17%|█▋ | 63062/371472 [5:01:08<24:01:28, 3.57it/s] 17%|█▋ | 63063/371472 [5:01:08<25:10:04, 3.40it/s] 17%|█▋ | 63064/371472 [5:01:08<24:19:21, 3.52it/s] 17%|█▋ | 63065/371472 [5:01:09<24:16:32, 3.53it/s] 17%|█▋ | 63066/371472 [5:01:09<23:46:08, 3.60it/s] 17%|█▋ | 63067/371472 [5:01:09<22:47:59, 3.76it/s] 17%|█▋ | 63068/371472 [5:01:09<24:54:04, 3.44it/s] 17%|█▋ | 63069/371472 [5:01:10<25:38:03, 3.34it/s] 17%|█▋ | 63070/371472 [5:01:10<24:42:18, 3.47it/s] 17%|█▋ | 63071/371472 [5:01:10<28:16:53, 3.03it/s] 17%|█▋ | 63072/371472 [5:01:11<26:00:43, 3.29it/s] 17%|█▋ | 63073/371472 [5:01:11<24:45:39, 3.46it/s] 17%|█▋ | 63074/371472 [5:01:11<24:09:30, 3.55it/s] 17%|█▋ | 63075/371472 [5:01:11<24:47:37, 3.46it/s] 17%|█▋ | 63076/371472 [5:01:12<24:17:18, 3.53it/s] 17%|█▋ | 63077/371472 [5:01:12<23:29:20, 3.65it/s] 17%|█▋ | 63078/371472 [5:01:12<25:23:52, 3.37it/s] 17%|█▋ | 63079/371472 [5:01:13<24:23:38, 3.51it/s] 17%|█▋ | 63080/371472 [5:01:13<23:50:01, 3.59it/s] {'loss': 4.1415, 'learning_rate': 8.475726690943567e-07, 'epoch': 2.72} + 17%|█▋ | 63080/371472 [5:01:13<23:50:01, 3.59it/s] 17%|█▋ | 63081/371472 [5:01:13<24:20:12, 3.52it/s] 17%|█▋ | 63082/371472 [5:01:14<25:22:52, 3.38it/s] 17%|█▋ | 63083/371472 [5:01:14<24:21:19, 3.52it/s] 17%|█▋ | 63084/371472 [5:01:14<24:16:20, 3.53it/s] 17%|█▋ | 63085/371472 [5:01:14<24:35:00, 3.48it/s] 17%|█▋ | 63086/371472 [5:01:15<27:03:02, 3.17it/s] 17%|█▋ | 63087/371472 [5:01:15<25:53:25, 3.31it/s] 17%|█▋ | 63088/371472 [5:01:15<25:16:12, 3.39it/s] 17%|█▋ | 63089/371472 [5:01:16<24:47:42, 3.45it/s] 17%|█▋ | 63090/371472 [5:01:16<24:41:25, 3.47it/s] 17%|█▋ | 63091/371472 [5:01:16<24:07:00, 3.55it/s] 17%|█▋ | 63092/371472 [5:01:16<24:45:06, 3.46it/s] 17%|█▋ | 63093/371472 [5:01:17<25:13:51, 3.40it/s] 17%|█▋ | 63094/371472 [5:01:17<25:38:50, 3.34it/s] 17%|█▋ | 63095/371472 [5:01:17<25:53:17, 3.31it/s] 17%|█▋ | 63096/371472 [5:01:18<24:50:04, 3.45it/s] 17%|█▋ | 63097/371472 [5:01:18<25:47:08, 3.32it/s] 17%|█▋ | 63098/371472 [5:01:18<25:56:22, 3.30it/s] 17%|█▋ | 63099/371472 [5:01:19<26:07:35, 3.28it/s] 17%|█▋ | 63100/371472 [5:01:19<24:56:28, 3.43it/s] {'loss': 4.1979, 'learning_rate': 8.475241871188778e-07, 'epoch': 2.72} + 17%|█▋ | 63100/371472 [5:01:19<24:56:28, 3.43it/s] 17%|█▋ | 63101/371472 [5:01:19<24:14:12, 3.53it/s] 17%|█▋ | 63102/371472 [5:01:19<23:58:22, 3.57it/s] 17%|█▋ | 63103/371472 [5:01:20<23:38:58, 3.62it/s] 17%|█▋ | 63104/371472 [5:01:20<23:04:13, 3.71it/s] 17%|█▋ | 63105/371472 [5:01:20<22:14:52, 3.85it/s] 17%|█▋ | 63106/371472 [5:01:20<22:14:48, 3.85it/s] 17%|█▋ | 63107/371472 [5:01:21<22:51:47, 3.75it/s] 17%|█▋ | 63108/371472 [5:01:21<21:56:53, 3.90it/s] 17%|█▋ | 63109/371472 [5:01:21<23:08:35, 3.70it/s] 17%|█▋ | 63110/371472 [5:01:21<23:02:36, 3.72it/s] 17%|█▋ | 63111/371472 [5:01:22<23:37:23, 3.63it/s] 17%|█▋ | 63112/371472 [5:01:22<24:06:04, 3.55it/s] 17%|█▋ | 63113/371472 [5:01:22<23:25:21, 3.66it/s] 17%|█▋ | 63114/371472 [5:01:23<23:16:16, 3.68it/s] 17%|█▋ | 63115/371472 [5:01:23<23:34:51, 3.63it/s] 17%|█▋ | 63116/371472 [5:01:23<24:57:50, 3.43it/s] 17%|█▋ | 63117/371472 [5:01:23<24:42:15, 3.47it/s] 17%|█▋ | 63118/371472 [5:01:24<25:42:26, 3.33it/s] 17%|█▋ | 63119/371472 [5:01:24<26:36:53, 3.22it/s] 17%|█▋ | 63120/371472 [5:01:24<26:19:51, 3.25it/s] {'loss': 4.2951, 'learning_rate': 8.474757051433988e-07, 'epoch': 2.72} + 17%|█▋ | 63120/371472 [5:01:24<26:19:51, 3.25it/s] 17%|█▋ | 63121/371472 [5:01:25<24:56:02, 3.44it/s] 17%|█▋ | 63122/371472 [5:01:25<24:25:48, 3.51it/s] 17%|█▋ | 63123/371472 [5:01:25<23:54:27, 3.58it/s] 17%|█▋ | 63124/371472 [5:01:25<23:47:25, 3.60it/s] 17%|█▋ | 63125/371472 [5:01:26<25:15:01, 3.39it/s] 17%|█▋ | 63126/371472 [5:01:26<25:55:11, 3.30it/s] 17%|█▋ | 63127/371472 [5:01:26<25:10:58, 3.40it/s] 17%|█▋ | 63128/371472 [5:01:27<24:27:37, 3.50it/s] 17%|█▋ | 63129/371472 [5:01:27<25:34:14, 3.35it/s] 17%|█▋ | 63130/371472 [5:01:27<26:03:52, 3.29it/s] 17%|█▋ | 63131/371472 [5:01:28<25:31:44, 3.36it/s] 17%|█▋ | 63132/371472 [5:01:28<26:18:04, 3.26it/s] 17%|█▋ | 63133/371472 [5:01:28<27:00:31, 3.17it/s] 17%|█▋ | 63134/371472 [5:01:29<26:16:49, 3.26it/s] 17%|█▋ | 63135/371472 [5:01:29<25:14:36, 3.39it/s] 17%|█▋ | 63136/371472 [5:01:29<24:05:26, 3.56it/s] 17%|█▋ | 63137/371472 [5:01:29<23:21:29, 3.67it/s] 17%|█▋ | 63138/371472 [5:01:30<25:06:20, 3.41it/s] 17%|█▋ | 63139/371472 [5:01:30<24:57:03, 3.43it/s] 17%|█▋ | 63140/371472 [5:01:30<24:14:52, 3.53it/s] {'loss': 4.0424, 'learning_rate': 8.474272231679199e-07, 'epoch': 2.72} + 17%|█▋ | 63140/371472 [5:01:30<24:14:52, 3.53it/s] 17%|█▋ | 63141/371472 [5:01:30<24:10:29, 3.54it/s] 17%|█▋ | 63142/371472 [5:01:31<24:28:59, 3.50it/s] 17%|█▋ | 63143/371472 [5:01:31<23:48:45, 3.60it/s] 17%|█▋ | 63144/371472 [5:01:31<23:12:45, 3.69it/s] 17%|█▋ | 63145/371472 [5:01:32<24:15:17, 3.53it/s] 17%|█▋ | 63146/371472 [5:01:32<23:59:16, 3.57it/s] 17%|█▋ | 63147/371472 [5:01:32<23:06:13, 3.71it/s] 17%|█▋ | 63148/371472 [5:01:32<22:43:37, 3.77it/s] 17%|█▋ | 63149/371472 [5:01:33<21:52:50, 3.91it/s] 17%|█▋ | 63150/371472 [5:01:33<22:14:07, 3.85it/s] 17%|█▋ | 63151/371472 [5:01:33<23:44:08, 3.61it/s] 17%|█▋ | 63152/371472 [5:01:33<23:11:10, 3.69it/s] 17%|█▋ | 63153/371472 [5:01:34<23:00:14, 3.72it/s] 17%|█▋ | 63154/371472 [5:01:34<24:34:01, 3.49it/s] 17%|█▋ | 63155/371472 [5:01:34<24:08:24, 3.55it/s] 17%|█▋ | 63156/371472 [5:01:35<24:29:21, 3.50it/s] 17%|█▋ | 63157/371472 [5:01:35<23:55:10, 3.58it/s] 17%|█▋ | 63158/371472 [5:01:35<23:09:01, 3.70it/s] 17%|█▋ | 63159/371472 [5:01:35<23:41:18, 3.62it/s] 17%|█▋ | 63160/371472 [5:01:36<23:25:59, 3.65it/s] {'loss': 4.2505, 'learning_rate': 8.473787411924411e-07, 'epoch': 2.72} + 17%|█▋ | 63160/371472 [5:01:36<23:25:59, 3.65it/s] 17%|█▋ | 63161/371472 [5:01:36<22:52:06, 3.74it/s] 17%|█▋ | 63162/371472 [5:01:36<22:45:02, 3.76it/s] 17%|█▋ | 63163/371472 [5:01:36<23:26:46, 3.65it/s] 17%|█▋ | 63164/371472 [5:01:37<25:20:39, 3.38it/s] 17%|█▋ | 63165/371472 [5:01:37<24:22:10, 3.51it/s] 17%|█▋ | 63166/371472 [5:01:37<25:30:42, 3.36it/s] 17%|█▋ | 63167/371472 [5:01:38<25:28:58, 3.36it/s] 17%|█▋ | 63168/371472 [5:01:38<25:01:30, 3.42it/s] 17%|█▋ | 63169/371472 [5:01:38<25:09:13, 3.40it/s] 17%|█▋ | 63170/371472 [5:01:39<24:03:11, 3.56it/s] 17%|█▋ | 63171/371472 [5:01:39<22:59:32, 3.72it/s] 17%|█▋ | 63172/371472 [5:01:39<23:11:12, 3.69it/s] 17%|█▋ | 63173/371472 [5:01:39<23:04:47, 3.71it/s] 17%|█▋ | 63174/371472 [5:01:40<23:58:00, 3.57it/s] 17%|█▋ | 63175/371472 [5:01:40<23:51:45, 3.59it/s] 17%|█▋ | 63176/371472 [5:01:40<22:55:08, 3.74it/s] 17%|█▋ | 63177/371472 [5:01:40<23:04:32, 3.71it/s] 17%|█▋ | 63178/371472 [5:01:41<23:38:17, 3.62it/s] 17%|█▋ | 63179/371472 [5:01:41<25:50:36, 3.31it/s] 17%|█▋ | 63180/371472 [5:01:41<25:52:17, 3.31it/s] {'loss': 4.4026, 'learning_rate': 8.473302592169622e-07, 'epoch': 2.72} + 17%|█▋ | 63180/371472 [5:01:41<25:52:17, 3.31it/s] 17%|█▋ | 63181/371472 [5:01:42<25:42:56, 3.33it/s] 17%|█▋ | 63182/371472 [5:01:42<26:28:24, 3.23it/s] 17%|█▋ | 63183/371472 [5:01:42<26:56:30, 3.18it/s] 17%|█▋ | 63184/371472 [5:01:43<26:43:30, 3.20it/s] 17%|█▋ | 63185/371472 [5:01:43<26:48:03, 3.20it/s] 17%|█▋ | 63186/371472 [5:01:43<26:04:28, 3.28it/s] 17%|█▋ | 63187/371472 [5:01:44<25:25:25, 3.37it/s] 17%|█▋ | 63188/371472 [5:01:44<24:51:54, 3.44it/s] 17%|█▋ | 63189/371472 [5:01:44<25:32:45, 3.35it/s] 17%|█▋ | 63190/371472 [5:01:44<25:42:01, 3.33it/s] 17%|█▋ | 63191/371472 [5:01:45<25:05:09, 3.41it/s] 17%|█▋ | 63192/371472 [5:01:45<24:37:27, 3.48it/s] 17%|█▋ | 63193/371472 [5:01:45<24:09:45, 3.54it/s] 17%|█▋ | 63194/371472 [5:01:46<25:16:14, 3.39it/s] 17%|█▋ | 63195/371472 [5:01:46<24:36:12, 3.48it/s] 17%|█▋ | 63196/371472 [5:01:46<23:30:47, 3.64it/s] 17%|█▋ | 63197/371472 [5:01:46<26:11:13, 3.27it/s] 17%|█▋ | 63198/371472 [5:01:47<26:12:12, 3.27it/s] 17%|█▋ | 63199/371472 [5:01:47<25:17:12, 3.39it/s] 17%|█▋ | 63200/371472 [5:01:47<24:38:07, 3.48it/s] {'loss': 4.0589, 'learning_rate': 8.472817772414833e-07, 'epoch': 2.72} + 17%|█▋ | 63200/371472 [5:01:47<24:38:07, 3.48it/s] 17%|█▋ | 63201/371472 [5:01:48<24:27:56, 3.50it/s] 17%|█▋ | 63202/371472 [5:01:48<23:29:16, 3.65it/s] 17%|█▋ | 63203/371472 [5:01:48<24:19:17, 3.52it/s] 17%|█▋ | 63204/371472 [5:01:48<24:24:30, 3.51it/s] 17%|█▋ | 63205/371472 [5:01:49<26:41:47, 3.21it/s] 17%|█▋ | 63206/371472 [5:01:49<25:01:59, 3.42it/s] 17%|█▋ | 63207/371472 [5:01:49<25:25:42, 3.37it/s] 17%|█▋ | 63208/371472 [5:01:50<24:35:10, 3.48it/s] 17%|█▋ | 63209/371472 [5:01:50<23:24:52, 3.66it/s] 17%|█▋ | 63210/371472 [5:01:50<23:05:02, 3.71it/s] 17%|█▋ | 63211/371472 [5:01:50<22:50:46, 3.75it/s] 17%|█▋ | 63212/371472 [5:01:51<22:40:32, 3.78it/s] 17%|█▋ | 63213/371472 [5:01:51<22:30:26, 3.80it/s] 17%|█▋ | 63214/371472 [5:01:51<23:26:11, 3.65it/s] 17%|█▋ | 63215/371472 [5:01:52<24:32:45, 3.49it/s] 17%|█▋ | 63216/371472 [5:01:52<25:37:44, 3.34it/s] 17%|█▋ | 63217/371472 [5:01:52<24:19:28, 3.52it/s] 17%|█▋ | 63218/371472 [5:01:52<25:09:26, 3.40it/s] 17%|█▋ | 63219/371472 [5:01:53<24:24:09, 3.51it/s] 17%|█▋ | 63220/371472 [5:01:53<25:24:39, 3.37it/s] {'loss': 4.1424, 'learning_rate': 8.472332952660044e-07, 'epoch': 2.72} + 17%|█▋ | 63220/371472 [5:01:53<25:24:39, 3.37it/s] 17%|█▋ | 63221/371472 [5:01:53<25:06:30, 3.41it/s] 17%|█▋ | 63222/371472 [5:01:54<24:18:46, 3.52it/s] 17%|█▋ | 63223/371472 [5:01:54<23:44:28, 3.61it/s] 17%|█▋ | 63224/371472 [5:01:54<23:30:25, 3.64it/s] 17%|█▋ | 63225/371472 [5:01:54<24:07:15, 3.55it/s] 17%|█▋ | 63226/371472 [5:01:55<23:14:57, 3.68it/s] 17%|█▋ | 63227/371472 [5:01:55<23:40:27, 3.62it/s] 17%|█▋ | 63228/371472 [5:01:55<24:36:09, 3.48it/s] 17%|█▋ | 63229/371472 [5:01:55<24:19:37, 3.52it/s] 17%|█▋ | 63230/371472 [5:01:56<24:04:16, 3.56it/s] 17%|█▋ | 63231/371472 [5:01:56<24:07:13, 3.55it/s] 17%|█▋ | 63232/371472 [5:01:56<23:14:26, 3.68it/s] 17%|█▋ | 63233/371472 [5:01:57<22:51:07, 3.75it/s] 17%|█▋ | 63234/371472 [5:01:57<25:21:48, 3.38it/s] 17%|█▋ | 63235/371472 [5:01:57<24:42:34, 3.47it/s] 17%|█▋ | 63236/371472 [5:01:57<24:04:51, 3.56it/s] 17%|█▋ | 63237/371472 [5:01:58<24:45:56, 3.46it/s] 17%|█▋ | 63238/371472 [5:01:58<24:44:01, 3.46it/s] 17%|█▋ | 63239/371472 [5:01:58<25:47:05, 3.32it/s] 17%|█▋ | 63240/371472 [5:01:59<24:43:16, 3.46it/s] {'loss': 4.256, 'learning_rate': 8.471848132905255e-07, 'epoch': 2.72} + 17%|█▋ | 63240/371472 [5:01:59<24:43:16, 3.46it/s] 17%|█▋ | 63241/371472 [5:01:59<23:40:48, 3.62it/s] 17%|█▋ | 63242/371472 [5:01:59<23:54:36, 3.58it/s] 17%|█▋ | 63243/371472 [5:01:59<23:54:47, 3.58it/s] 17%|█▋ | 63244/371472 [5:02:00<23:50:20, 3.59it/s] 17%|█▋ | 63245/371472 [5:02:00<28:12:43, 3.03it/s] 17%|█▋ | 63246/371472 [5:02:00<25:53:30, 3.31it/s] 17%|█▋ | 63247/371472 [5:02:01<25:52:18, 3.31it/s] 17%|█▋ | 63248/371472 [5:02:01<24:55:20, 3.44it/s] 17%|█▋ | 63249/371472 [5:02:01<24:20:14, 3.52it/s] 17%|█▋ | 63250/371472 [5:02:02<24:02:18, 3.56it/s] 17%|█▋ | 63251/371472 [5:02:02<23:22:30, 3.66it/s] 17%|█▋ | 63252/371472 [5:02:02<22:47:23, 3.76it/s] 17%|█▋ | 63253/371472 [5:02:02<22:45:16, 3.76it/s] 17%|█▋ | 63254/371472 [5:02:03<25:01:28, 3.42it/s] 17%|█▋ | 63255/371472 [5:02:03<24:37:49, 3.48it/s] 17%|█▋ | 63256/371472 [5:02:03<24:13:15, 3.53it/s] 17%|█▋ | 63257/371472 [5:02:03<23:14:29, 3.68it/s] 17%|█▋ | 63258/371472 [5:02:04<24:25:45, 3.50it/s] 17%|█▋ | 63259/371472 [5:02:04<23:49:39, 3.59it/s] 17%|█▋ | 63260/371472 [5:02:04<23:49:12, 3.59it/s] {'loss': 4.1204, 'learning_rate': 8.471363313150466e-07, 'epoch': 2.72} + 17%|█▋ | 63260/371472 [5:02:04<23:49:12, 3.59it/s] 17%|█▋ | 63261/371472 [5:02:05<23:24:51, 3.66it/s] 17%|█▋ | 63262/371472 [5:02:05<23:27:18, 3.65it/s] 17%|█▋ | 63263/371472 [5:02:05<23:13:26, 3.69it/s] 17%|█▋ | 63264/371472 [5:02:05<24:03:32, 3.56it/s] 17%|█▋ | 63265/371472 [5:02:06<23:32:38, 3.64it/s] 17%|█▋ | 63266/371472 [5:02:06<22:41:38, 3.77it/s] 17%|█▋ | 63267/371472 [5:02:06<24:05:34, 3.55it/s] 17%|█▋ | 63268/371472 [5:02:06<23:22:08, 3.66it/s] 17%|█▋ | 63269/371472 [5:02:07<23:16:28, 3.68it/s] 17%|█▋ | 63270/371472 [5:02:07<22:47:11, 3.76it/s] 17%|█▋ | 63271/371472 [5:02:07<23:43:40, 3.61it/s] 17%|█▋ | 63272/371472 [5:02:08<23:16:27, 3.68it/s] 17%|█▋ | 63273/371472 [5:02:08<24:08:37, 3.55it/s] 17%|█▋ | 63274/371472 [5:02:08<26:34:43, 3.22it/s] 17%|█▋ | 63275/371472 [5:02:08<24:47:46, 3.45it/s] 17%|█▋ | 63276/371472 [5:02:09<23:48:54, 3.59it/s] 17%|█▋ | 63277/371472 [5:02:09<24:45:31, 3.46it/s] 17%|█▋ | 63278/371472 [5:02:09<23:58:11, 3.57it/s] 17%|█▋ | 63279/371472 [5:02:10<23:31:43, 3.64it/s] 17%|█▋ | 63280/371472 [5:02:10<23:48:27, 3.60it/s] {'loss': 4.1596, 'learning_rate': 8.470878493395676e-07, 'epoch': 2.73} + 17%|█▋ | 63280/371472 [5:02:10<23:48:27, 3.60it/s] 17%|█▋ | 63281/371472 [5:02:10<23:22:00, 3.66it/s] 17%|█▋ | 63282/371472 [5:02:10<22:48:57, 3.75it/s] 17%|█▋ | 63283/371472 [5:02:11<23:09:45, 3.70it/s] 17%|█▋ | 63284/371472 [5:02:11<22:33:22, 3.80it/s] 17%|█▋ | 63285/371472 [5:02:11<23:19:13, 3.67it/s] 17%|█▋ | 63286/371472 [5:02:12<26:25:54, 3.24it/s] 17%|█▋ | 63287/371472 [5:02:12<25:20:36, 3.38it/s] 17%|█▋ | 63288/371472 [5:02:12<23:59:05, 3.57it/s] 17%|█▋ | 63289/371472 [5:02:12<23:39:55, 3.62it/s] 17%|█▋ | 63290/371472 [5:02:13<22:45:14, 3.76it/s] 17%|█▋ | 63291/371472 [5:02:13<23:35:40, 3.63it/s] 17%|█▋ | 63292/371472 [5:02:13<24:34:07, 3.48it/s] 17%|█▋ | 63293/371472 [5:02:13<23:42:22, 3.61it/s] 17%|█▋ | 63294/371472 [5:02:14<24:32:48, 3.49it/s] 17%|█▋ | 63295/371472 [5:02:14<24:29:40, 3.49it/s] 17%|█▋ | 63296/371472 [5:02:14<23:50:36, 3.59it/s] 17%|█▋ | 63297/371472 [5:02:15<24:41:26, 3.47it/s] 17%|█▋ | 63298/371472 [5:02:15<23:41:06, 3.61it/s] 17%|█▋ | 63299/371472 [5:02:15<23:02:15, 3.72it/s] 17%|█▋ | 63300/371472 [5:02:15<23:05:31, 3.71it/s] {'loss': 4.0859, 'learning_rate': 8.470393673640888e-07, 'epoch': 2.73} + 17%|█▋ | 63300/371472 [5:02:15<23:05:31, 3.71it/s] 17%|█▋ | 63301/371472 [5:02:16<24:11:13, 3.54it/s] 17%|█▋ | 63302/371472 [5:02:16<25:58:46, 3.30it/s] 17%|█▋ | 63303/371472 [5:02:16<24:49:31, 3.45it/s] 17%|█▋ | 63304/371472 [5:02:17<23:23:01, 3.66it/s] 17%|█▋ | 63305/371472 [5:02:17<22:27:24, 3.81it/s] 17%|█▋ | 63306/371472 [5:02:17<22:51:49, 3.74it/s] 17%|█▋ | 63307/371472 [5:02:17<22:22:38, 3.83it/s] 17%|█▋ | 63308/371472 [5:02:18<23:40:59, 3.61it/s] 17%|█▋ | 63309/371472 [5:02:18<23:31:49, 3.64it/s] 17%|█▋ | 63310/371472 [5:02:18<23:53:40, 3.58it/s] 17%|█▋ | 63311/371472 [5:02:18<23:13:08, 3.69it/s] 17%|█▋ | 63312/371472 [5:02:19<22:37:24, 3.78it/s] 17%|█▋ | 63313/371472 [5:02:19<22:40:50, 3.77it/s] 17%|█▋ | 63314/371472 [5:02:19<23:23:17, 3.66it/s] 17%|█▋ | 63315/371472 [5:02:20<23:58:13, 3.57it/s] 17%|█▋ | 63316/371472 [5:02:20<24:50:42, 3.45it/s] 17%|█▋ | 63317/371472 [5:02:20<23:56:39, 3.57it/s] 17%|█▋ | 63318/371472 [5:02:20<24:25:17, 3.51it/s] 17%|█▋ | 63319/371472 [5:02:21<23:29:48, 3.64it/s] 17%|█▋ | 63320/371472 [5:02:21<23:37:48, 3.62it/s] {'loss': 4.032, 'learning_rate': 8.4699088538861e-07, 'epoch': 2.73} + 17%|█▋ | 63320/371472 [5:02:21<23:37:48, 3.62it/s] 17%|█▋ | 63321/371472 [5:02:21<30:01:03, 2.85it/s] 17%|█▋ | 63322/371472 [5:02:22<27:48:34, 3.08it/s] 17%|█▋ | 63323/371472 [5:02:22<27:29:29, 3.11it/s] 17%|█▋ | 63324/371472 [5:02:22<26:56:07, 3.18it/s] 17%|█▋ | 63325/371472 [5:02:23<27:27:30, 3.12it/s] 17%|█▋ | 63326/371472 [5:02:23<26:55:56, 3.18it/s] 17%|█▋ | 63327/371472 [5:02:23<27:38:20, 3.10it/s] 17%|█▋ | 63328/371472 [5:02:24<26:35:20, 3.22it/s] 17%|█▋ | 63329/371472 [5:02:24<26:01:43, 3.29it/s] 17%|█▋ | 63330/371472 [5:02:24<26:19:46, 3.25it/s] 17%|█▋ | 63331/371472 [5:02:25<25:57:00, 3.30it/s] 17%|█▋ | 63332/371472 [5:02:25<26:21:03, 3.25it/s] 17%|█▋ | 63333/371472 [5:02:25<25:06:04, 3.41it/s] 17%|█▋ | 63334/371472 [5:02:25<24:15:14, 3.53it/s] 17%|█▋ | 63335/371472 [5:02:26<23:39:14, 3.62it/s] 17%|█▋ | 63336/371472 [5:02:26<23:47:55, 3.60it/s] 17%|█▋ | 63337/371472 [5:02:26<23:42:14, 3.61it/s] 17%|█▋ | 63338/371472 [5:02:26<23:03:07, 3.71it/s] 17%|█▋ | 63339/371472 [5:02:27<23:23:31, 3.66it/s] 17%|█▋ | 63340/371472 [5:02:27<24:48:00, 3.45it/s] {'loss': 4.3392, 'learning_rate': 8.469424034131311e-07, 'epoch': 2.73} + 17%|█▋ | 63340/371472 [5:02:27<24:48:00, 3.45it/s] 17%|█▋ | 63341/371472 [5:02:27<25:07:03, 3.41it/s] 17%|█▋ | 63342/371472 [5:02:28<27:36:59, 3.10it/s] 17%|█▋ | 63343/371472 [5:02:28<25:47:38, 3.32it/s] 17%|█▋ | 63344/371472 [5:02:28<24:57:10, 3.43it/s] 17%|█▋ | 63345/371472 [5:02:28<23:56:54, 3.57it/s] 17%|█▋ | 63346/371472 [5:02:29<23:16:24, 3.68it/s] 17%|█▋ | 63347/371472 [5:02:29<23:13:34, 3.69it/s] 17%|█▋ | 63348/371472 [5:02:29<24:33:31, 3.49it/s] 17%|█▋ | 63349/371472 [5:02:30<24:37:53, 3.47it/s] 17%|█▋ | 63350/371472 [5:02:30<24:18:09, 3.52it/s] 17%|█▋ | 63351/371472 [5:02:30<25:11:32, 3.40it/s] 17%|█▋ | 63352/371472 [5:02:30<24:37:10, 3.48it/s] 17%|█▋ | 63353/371472 [5:02:31<23:39:57, 3.62it/s] 17%|█▋ | 63354/371472 [5:02:31<22:47:11, 3.76it/s] 17%|█▋ | 63355/371472 [5:02:31<22:08:05, 3.87it/s] 17%|█▋ | 63356/371472 [5:02:31<22:21:49, 3.83it/s] 17%|█▋ | 63357/371472 [5:02:32<23:21:28, 3.66it/s] 17%|█▋ | 63358/371472 [5:02:32<24:50:10, 3.45it/s] 17%|█▋ | 63359/371472 [5:02:32<24:09:24, 3.54it/s] 17%|█▋ | 63360/371472 [5:02:33<26:11:26, 3.27it/s] {'loss': 4.18, 'learning_rate': 8.468939214376521e-07, 'epoch': 2.73} + 17%|█▋ | 63360/371472 [5:02:33<26:11:26, 3.27it/s] 17%|█▋ | 63361/371472 [5:02:33<25:16:09, 3.39it/s] 17%|█▋ | 63362/371472 [5:02:33<24:51:54, 3.44it/s] 17%|█▋ | 63363/371472 [5:02:34<24:23:58, 3.51it/s] 17%|█▋ | 63364/371472 [5:02:34<23:33:15, 3.63it/s] 17%|█▋ | 63365/371472 [5:02:34<23:09:06, 3.70it/s] 17%|█▋ | 63366/371472 [5:02:34<23:55:41, 3.58it/s] 17%|█▋ | 63367/371472 [5:02:35<23:18:10, 3.67it/s] 17%|█▋ | 63368/371472 [5:02:35<22:50:53, 3.75it/s] 17%|█▋ | 63369/371472 [5:02:35<22:48:37, 3.75it/s] 17%|█▋ | 63370/371472 [5:02:35<22:29:41, 3.80it/s] 17%|█▋ | 63371/371472 [5:02:36<22:32:26, 3.80it/s] 17%|█▋ | 63372/371472 [5:02:36<22:33:48, 3.79it/s] 17%|█▋ | 63373/371472 [5:02:36<24:36:22, 3.48it/s] 17%|█▋ | 63374/371472 [5:02:37<24:15:16, 3.53it/s] 17%|█▋ | 63375/371472 [5:02:37<23:44:23, 3.61it/s] 17%|█▋ | 63376/371472 [5:02:37<23:17:48, 3.67it/s] 17%|█▋ | 63377/371472 [5:02:37<22:49:01, 3.75it/s] 17%|█▋ | 63378/371472 [5:02:38<22:25:11, 3.82it/s] 17%|█▋ | 63379/371472 [5:02:38<23:41:39, 3.61it/s] 17%|█▋ | 63380/371472 [5:02:38<23:38:40, 3.62it/s] {'loss': 4.2426, 'learning_rate': 8.468454394621732e-07, 'epoch': 2.73} + 17%|█▋ | 63380/371472 [5:02:38<23:38:40, 3.62it/s] 17%|█▋ | 63381/371472 [5:02:38<23:15:55, 3.68it/s] 17%|█▋ | 63382/371472 [5:02:39<23:40:27, 3.61it/s] 17%|█▋ | 63383/371472 [5:02:39<25:41:31, 3.33it/s] 17%|█▋ | 63384/371472 [5:02:39<25:49:54, 3.31it/s] 17%|█▋ | 63385/371472 [5:02:40<24:06:22, 3.55it/s] 17%|█▋ | 63386/371472 [5:02:40<23:17:15, 3.67it/s] 17%|█▋ | 63387/371472 [5:02:40<22:28:09, 3.81it/s] 17%|█▋ | 63388/371472 [5:02:40<22:00:53, 3.89it/s] 17%|█▋ | 63389/371472 [5:02:41<25:10:12, 3.40it/s] 17%|█▋ | 63390/371472 [5:02:41<24:44:45, 3.46it/s] 17%|█▋ | 63391/371472 [5:02:41<24:13:59, 3.53it/s] 17%|█▋ | 63392/371472 [5:02:42<24:23:11, 3.51it/s] 17%|█▋ | 63393/371472 [5:02:42<24:10:01, 3.54it/s] 17%|█▋ | 63394/371472 [5:02:42<23:25:14, 3.65it/s] 17%|█▋ | 63395/371472 [5:02:42<24:02:58, 3.56it/s] 17%|█▋ | 63396/371472 [5:02:43<25:45:52, 3.32it/s] 17%|█▋ | 63397/371472 [5:02:43<27:16:16, 3.14it/s] 17%|█▋ | 63398/371472 [5:02:43<26:40:29, 3.21it/s] 17%|█▋ | 63399/371472 [5:02:44<25:29:56, 3.36it/s] 17%|█▋ | 63400/371472 [5:02:44<24:52:39, 3.44it/s] {'loss': 4.1187, 'learning_rate': 8.467969574866943e-07, 'epoch': 2.73} + 17%|█▋ | 63400/371472 [5:02:44<24:52:39, 3.44it/s] 17%|█▋ | 63401/371472 [5:02:44<23:54:18, 3.58it/s] 17%|█▋ | 63402/371472 [5:02:44<24:00:20, 3.56it/s] 17%|█▋ | 63403/371472 [5:02:45<27:00:57, 3.17it/s] 17%|█▋ | 63404/371472 [5:02:45<25:32:34, 3.35it/s] 17%|█▋ | 63405/371472 [5:02:45<25:30:20, 3.36it/s] 17%|█▋ | 63406/371472 [5:02:46<25:24:41, 3.37it/s] 17%|█▋ | 63407/371472 [5:02:46<24:27:25, 3.50it/s] 17%|█▋ | 63408/371472 [5:02:46<24:42:42, 3.46it/s] 17%|█▋ | 63409/371472 [5:02:47<23:26:32, 3.65it/s] 17%|█▋ | 63410/371472 [5:02:47<23:30:48, 3.64it/s] 17%|█▋ | 63411/371472 [5:02:47<25:19:47, 3.38it/s] 17%|█▋ | 63412/371472 [5:02:47<24:32:58, 3.49it/s] 17%|█▋ | 63413/371472 [5:02:48<25:03:55, 3.41it/s] 17%|█▋ | 63414/371472 [5:02:48<25:16:16, 3.39it/s] 17%|█▋ | 63415/371472 [5:02:48<25:31:48, 3.35it/s] 17%|█▋ | 63416/371472 [5:02:49<24:27:18, 3.50it/s] 17%|█▋ | 63417/371472 [5:02:49<23:57:33, 3.57it/s] 17%|█▋ | 63418/371472 [5:02:49<22:54:48, 3.73it/s] 17%|█▋ | 63419/371472 [5:02:49<25:31:48, 3.35it/s] 17%|█▋ | 63420/371472 [5:02:50<24:11:04, 3.54it/s] {'loss': 4.1211, 'learning_rate': 8.467484755112155e-07, 'epoch': 2.73} + 17%|█▋ | 63420/371472 [5:02:50<24:11:04, 3.54it/s] 17%|█▋ | 63421/371472 [5:02:50<23:56:31, 3.57it/s] 17%|█▋ | 63422/371472 [5:02:50<25:05:14, 3.41it/s] 17%|█▋ | 63423/371472 [5:02:51<24:59:12, 3.42it/s] 17%|█▋ | 63424/371472 [5:02:51<24:09:41, 3.54it/s] 17%|█▋ | 63425/371472 [5:02:51<23:22:31, 3.66it/s] 17%|█▋ | 63426/371472 [5:02:51<23:14:18, 3.68it/s] 17%|█▋ | 63427/371472 [5:02:52<23:38:12, 3.62it/s] 17%|█▋ | 63428/371472 [5:02:52<25:03:34, 3.41it/s] 17%|█▋ | 63429/371472 [5:02:52<25:05:44, 3.41it/s] 17%|█▋ | 63430/371472 [5:02:53<24:43:32, 3.46it/s] 17%|█▋ | 63431/371472 [5:02:53<24:28:52, 3.50it/s] 17%|█▋ | 63432/371472 [5:02:53<24:17:50, 3.52it/s] 17%|█▋ | 63433/371472 [5:02:53<23:53:57, 3.58it/s] 17%|█▋ | 63434/371472 [5:02:54<23:08:08, 3.70it/s] 17%|█▋ | 63435/371472 [5:02:54<24:54:27, 3.44it/s] 17%|█▋ | 63436/371472 [5:02:54<23:46:09, 3.60it/s] 17%|█▋ | 63437/371472 [5:02:55<24:23:39, 3.51it/s] 17%|█▋ | 63438/371472 [5:02:55<23:25:22, 3.65it/s] 17%|█▋ | 63439/371472 [5:02:55<23:19:31, 3.67it/s] 17%|█▋ | 63440/371472 [5:02:55<23:37:40, 3.62it/s] {'loss': 4.1815, 'learning_rate': 8.466999935357365e-07, 'epoch': 2.73} + 17%|█▋ | 63440/371472 [5:02:55<23:37:40, 3.62it/s] 17%|█▋ | 63441/371472 [5:02:56<23:31:52, 3.64it/s] 17%|█▋ | 63442/371472 [5:02:56<22:56:16, 3.73it/s] 17%|█▋ | 63443/371472 [5:02:56<23:28:02, 3.65it/s] 17%|█▋ | 63444/371472 [5:02:56<24:08:31, 3.54it/s] 17%|█▋ | 63445/371472 [5:02:57<24:05:57, 3.55it/s] 17%|█▋ | 63446/371472 [5:02:57<23:41:32, 3.61it/s] 17%|█▋ | 63447/371472 [5:02:57<23:08:41, 3.70it/s] 17%|█▋ | 63448/371472 [5:02:58<22:57:39, 3.73it/s] 17%|█▋ | 63449/371472 [5:02:58<24:23:54, 3.51it/s] 17%|█▋ | 63450/371472 [5:02:58<23:19:40, 3.67it/s] 17%|█▋ | 63451/371472 [5:02:58<23:24:16, 3.66it/s] 17%|█▋ | 63452/371472 [5:02:59<22:30:48, 3.80it/s] 17%|��▋ | 63453/371472 [5:02:59<23:11:06, 3.69it/s] 17%|█▋ | 63454/371472 [5:02:59<23:10:45, 3.69it/s] 17%|█▋ | 63455/371472 [5:02:59<25:07:41, 3.40it/s] 17%|█▋ | 63456/371472 [5:03:00<24:36:03, 3.48it/s] 17%|█▋ | 63457/371472 [5:03:00<24:16:36, 3.52it/s] 17%|█▋ | 63458/371472 [5:03:00<23:52:53, 3.58it/s] 17%|█▋ | 63459/371472 [5:03:01<23:03:48, 3.71it/s] 17%|█▋ | 63460/371472 [5:03:01<23:54:35, 3.58it/s] {'loss': 4.1454, 'learning_rate': 8.466515115602577e-07, 'epoch': 2.73} + 17%|█▋ | 63460/371472 [5:03:01<23:54:35, 3.58it/s] 17%|█▋ | 63461/371472 [5:03:01<25:01:29, 3.42it/s] 17%|█▋ | 63462/371472 [5:03:01<23:51:42, 3.59it/s] 17%|█▋ | 63463/371472 [5:03:02<23:23:10, 3.66it/s] 17%|█▋ | 63464/371472 [5:03:02<26:05:52, 3.28it/s] 17%|█▋ | 63465/371472 [5:03:02<25:07:19, 3.41it/s] 17%|█▋ | 63466/371472 [5:03:03<24:03:07, 3.56it/s] 17%|█▋ | 63467/371472 [5:03:03<25:46:20, 3.32it/s] 17%|█▋ | 63468/371472 [5:03:03<24:37:52, 3.47it/s] 17%|█▋ | 63469/371472 [5:03:03<23:40:39, 3.61it/s] 17%|█▋ | 63470/371472 [5:03:04<23:51:12, 3.59it/s] 17%|█▋ | 63471/371472 [5:03:04<23:54:37, 3.58it/s] 17%|█▋ | 63472/371472 [5:03:04<23:29:31, 3.64it/s] 17%|█▋ | 63473/371472 [5:03:05<23:16:39, 3.68it/s] 17%|█▋ | 63474/371472 [5:03:05<22:44:59, 3.76it/s] 17%|█▋ | 63475/371472 [5:03:05<23:53:29, 3.58it/s] 17%|█▋ | 63476/371472 [5:03:05<23:44:28, 3.60it/s] 17%|█▋ | 63477/371472 [5:03:06<24:57:51, 3.43it/s] 17%|█▋ | 63478/371472 [5:03:06<23:43:52, 3.61it/s] 17%|█▋ | 63479/371472 [5:03:06<24:10:38, 3.54it/s] 17%|█▋ | 63480/371472 [5:03:07<24:40:23, 3.47it/s] {'loss': 4.381, 'learning_rate': 8.466030295847788e-07, 'epoch': 2.73} + 17%|█▋ | 63480/371472 [5:03:07<24:40:23, 3.47it/s] 17%|█▋ | 63481/371472 [5:03:07<23:45:40, 3.60it/s] 17%|█▋ | 63482/371472 [5:03:07<24:56:14, 3.43it/s] 17%|█▋ | 63483/371472 [5:03:07<24:13:21, 3.53it/s] 17%|█▋ | 63484/371472 [5:03:08<24:21:13, 3.51it/s] 17%|█▋ | 63485/371472 [5:03:08<23:46:49, 3.60it/s] 17%|█▋ | 63486/371472 [5:03:08<24:44:33, 3.46it/s] 17%|█▋ | 63487/371472 [5:03:09<24:11:22, 3.54it/s] 17%|█▋ | 63488/371472 [5:03:09<23:53:52, 3.58it/s] 17%|█▋ | 63489/371472 [5:03:09<23:18:03, 3.67it/s] 17%|█▋ | 63490/371472 [5:03:09<22:57:18, 3.73it/s] 17%|█▋ | 63491/371472 [5:03:10<24:59:52, 3.42it/s] 17%|█▋ | 63492/371472 [5:03:10<23:55:13, 3.58it/s] 17%|█▋ | 63493/371472 [5:03:10<22:49:21, 3.75it/s] 17%|█▋ | 63494/371472 [5:03:10<23:12:40, 3.69it/s] 17%|█▋ | 63495/371472 [5:03:11<22:56:08, 3.73it/s] 17%|█▋ | 63496/371472 [5:03:11<22:22:31, 3.82it/s] 17%|█▋ | 63497/371472 [5:03:11<22:26:02, 3.81it/s] 17%|█▋ | 63498/371472 [5:03:11<22:56:28, 3.73it/s] 17%|█▋ | 63499/371472 [5:03:12<22:27:42, 3.81it/s] 17%|█▋ | 63500/371472 [5:03:12<22:57:19, 3.73it/s] {'loss': 4.3725, 'learning_rate': 8.465545476092999e-07, 'epoch': 2.74} + 17%|█▋ | 63500/371472 [5:03:12<22:57:19, 3.73it/s] 17%|█▋ | 63501/371472 [5:03:12<23:08:11, 3.70it/s] 17%|█▋ | 63502/371472 [5:03:13<24:04:53, 3.55it/s] 17%|█▋ | 63503/371472 [5:03:13<25:16:50, 3.38it/s] 17%|█▋ | 63504/371472 [5:03:13<25:14:57, 3.39it/s] 17%|█▋ | 63505/371472 [5:03:13<25:01:43, 3.42it/s] 17%|█▋ | 63506/371472 [5:03:14<23:51:13, 3.59it/s] 17%|█▋ | 63507/371472 [5:03:14<25:31:35, 3.35it/s] 17%|█▋ | 63508/371472 [5:03:14<25:54:24, 3.30it/s] 17%|█▋ | 63509/371472 [5:03:15<25:08:58, 3.40it/s] 17%|█▋ | 63510/371472 [5:03:15<25:31:24, 3.35it/s] 17%|█▋ | 63511/371472 [5:03:15<23:45:51, 3.60it/s] 17%|█▋ | 63512/371472 [5:03:16<24:20:18, 3.51it/s] 17%|█▋ | 63513/371472 [5:03:16<23:20:41, 3.66it/s] 17%|█▋ | 63514/371472 [5:03:16<24:17:48, 3.52it/s] 17%|█▋ | 63515/371472 [5:03:16<23:26:45, 3.65it/s] 17%|█▋ | 63516/371472 [5:03:17<23:06:45, 3.70it/s] 17%|█▋ | 63517/371472 [5:03:17<22:35:27, 3.79it/s] 17%|█▋ | 63518/371472 [5:03:17<24:28:12, 3.50it/s] 17%|█▋ | 63519/371472 [5:03:17<24:18:43, 3.52it/s] 17%|█▋ | 63520/371472 [5:03:18<23:21:36, 3.66it/s] {'loss': 4.0545, 'learning_rate': 8.465060656338208e-07, 'epoch': 2.74} + 17%|█▋ | 63520/371472 [5:03:18<23:21:36, 3.66it/s] 17%|█▋ | 63521/371472 [5:03:18<23:35:53, 3.62it/s] 17%|█▋ | 63522/371472 [5:03:18<25:33:26, 3.35it/s] 17%|█▋ | 63523/371472 [5:03:19<26:51:57, 3.18it/s] 17%|█▋ | 63524/371472 [5:03:19<26:59:22, 3.17it/s] 17%|█▋ | 63525/371472 [5:03:19<25:50:09, 3.31it/s] 17%|█▋ | 63526/371472 [5:03:20<26:10:06, 3.27it/s] 17%|█▋ | 63527/371472 [5:03:20<25:48:41, 3.31it/s] 17%|█▋ | 63528/371472 [5:03:20<24:21:50, 3.51it/s] 17%|█▋ | 63529/371472 [5:03:20<23:47:31, 3.60it/s] 17%|█▋ | 63530/371472 [5:03:21<24:14:06, 3.53it/s] 17%|█▋ | 63531/371472 [5:03:21<24:00:29, 3.56it/s] 17%|█▋ | 63532/371472 [5:03:21<23:46:05, 3.60it/s] 17%|█▋ | 63533/371472 [5:03:21<23:30:25, 3.64it/s] 17%|█▋ | 63534/371472 [5:03:22<23:10:10, 3.69it/s] 17%|█▋ | 63535/371472 [5:03:22<23:21:46, 3.66it/s] 17%|█▋ | 63536/371472 [5:03:22<22:48:25, 3.75it/s] 17%|█▋ | 63537/371472 [5:03:23<23:54:05, 3.58it/s] 17%|█▋ | 63538/371472 [5:03:23<23:20:42, 3.66it/s] 17%|█▋ | 63539/371472 [5:03:23<23:14:44, 3.68it/s] 17%|█▋ | 63540/371472 [5:03:23<25:04:06, 3.41it/s] {'loss': 4.2509, 'learning_rate': 8.464575836583421e-07, 'epoch': 2.74} + 17%|█▋ | 63540/371472 [5:03:23<25:04:06, 3.41it/s] 17%|█▋ | 63541/371472 [5:03:24<23:45:06, 3.60it/s] 17%|█▋ | 63542/371472 [5:03:24<23:46:27, 3.60it/s] 17%|█▋ | 63543/371472 [5:03:24<23:05:11, 3.71it/s] 17%|█▋ | 63544/371472 [5:03:24<22:54:21, 3.73it/s] 17%|█▋ | 63545/371472 [5:03:25<24:27:44, 3.50it/s] 17%|█▋ | 63546/371472 [5:03:25<23:55:39, 3.57it/s] 17%|█▋ | 63547/371472 [5:03:25<24:18:54, 3.52it/s] 17%|█▋ | 63548/371472 [5:03:26<24:40:02, 3.47it/s] 17%|█▋ | 63549/371472 [5:03:26<25:06:23, 3.41it/s] 17%|█▋ | 63550/371472 [5:03:26<25:39:19, 3.33it/s] 17%|█▋ | 63551/371472 [5:03:27<24:58:35, 3.42it/s] 17%|█▋ | 63552/371472 [5:03:27<24:07:06, 3.55it/s] 17%|█▋ | 63553/371472 [5:03:27<24:15:36, 3.53it/s] 17%|█▋ | 63554/371472 [5:03:27<24:04:05, 3.55it/s] 17%|█▋ | 63555/371472 [5:03:28<24:34:24, 3.48it/s] 17%|█▋ | 63556/371472 [5:03:28<24:00:11, 3.56it/s] 17%|█▋ | 63557/371472 [5:03:28<23:45:25, 3.60it/s] 17%|█▋ | 63558/371472 [5:03:29<25:15:07, 3.39it/s] 17%|█▋ | 63559/371472 [5:03:29<24:24:48, 3.50it/s] 17%|█▋ | 63560/371472 [5:03:29<23:31:56, 3.63it/s] {'loss': 4.0918, 'learning_rate': 8.464091016828632e-07, 'epoch': 2.74} + 17%|█▋ | 63560/371472 [5:03:29<23:31:56, 3.63it/s] 17%|█▋ | 63561/371472 [5:03:29<22:53:22, 3.74it/s] 17%|█▋ | 63562/371472 [5:03:30<21:52:24, 3.91it/s] 17%|█▋ | 63563/371472 [5:03:30<23:40:20, 3.61it/s] 17%|█▋ | 63564/371472 [5:03:30<24:47:56, 3.45it/s] 17%|█▋ | 63565/371472 [5:03:30<23:23:36, 3.66it/s] 17%|█▋ | 63566/371472 [5:03:31<24:47:26, 3.45it/s] 17%|█▋ | 63567/371472 [5:03:31<24:00:49, 3.56it/s] 17%|█▋ | 63568/371472 [5:03:31<24:21:13, 3.51it/s] 17%|█▋ | 63569/371472 [5:03:32<25:31:52, 3.35it/s] 17%|█▋ | 63570/371472 [5:03:32<25:04:40, 3.41it/s] 17%|█▋ | 63571/371472 [5:03:32<24:21:06, 3.51it/s] 17%|█▋ | 63572/371472 [5:03:33<26:24:56, 3.24it/s] 17%|█▋ | 63573/371472 [5:03:33<25:17:12, 3.38it/s] 17%|█▋ | 63574/371472 [5:03:33<24:12:02, 3.53it/s] 17%|█▋ | 63575/371472 [5:03:33<24:01:45, 3.56it/s] 17%|█▋ | 63576/371472 [5:03:34<23:24:47, 3.65it/s] 17%|█▋ | 63577/371472 [5:03:34<24:59:28, 3.42it/s] 17%|█▋ | 63578/371472 [5:03:34<25:02:44, 3.41it/s] 17%|█▋ | 63579/371472 [5:03:34<23:40:45, 3.61it/s] 17%|█▋ | 63580/371472 [5:03:35<23:39:02, 3.62it/s] {'loss': 4.2106, 'learning_rate': 8.463606197073844e-07, 'epoch': 2.74} + 17%|█▋ | 63580/371472 [5:03:35<23:39:02, 3.62it/s] 17%|█▋ | 63581/371472 [5:03:35<22:54:53, 3.73it/s] 17%|█▋ | 63582/371472 [5:03:35<23:49:32, 3.59it/s] 17%|█▋ | 63583/371472 [5:03:36<23:59:00, 3.57it/s] 17%|█▋ | 63584/371472 [5:03:36<24:45:38, 3.45it/s] 17%|█▋ | 63585/371472 [5:03:36<24:35:40, 3.48it/s] 17%|█▋ | 63586/371472 [5:03:36<24:49:10, 3.45it/s] 17%|█▋ | 63587/371472 [5:03:37<23:46:26, 3.60it/s] 17%|█▋ | 63588/371472 [5:03:37<24:34:13, 3.48it/s] 17%|█▋ | 63589/371472 [5:03:37<24:48:17, 3.45it/s] 17%|█▋ | 63590/371472 [5:03:38<24:51:45, 3.44it/s] 17%|█▋ | 63591/371472 [5:03:38<24:17:55, 3.52it/s] 17%|█▋ | 63592/371472 [5:03:38<23:22:46, 3.66it/s] 17%|█▋ | 63593/371472 [5:03:38<24:45:23, 3.45it/s] 17%|█▋ | 63594/371472 [5:03:39<23:51:59, 3.58it/s] 17%|█▋ | 63595/371472 [5:03:39<23:51:22, 3.58it/s] 17%|█▋ | 63596/371472 [5:03:39<23:01:54, 3.71it/s] 17%|█▋ | 63597/371472 [5:03:40<22:42:15, 3.77it/s] 17%|█▋ | 63598/371472 [5:03:40<22:49:47, 3.75it/s] 17%|█▋ | 63599/371472 [5:03:40<23:37:12, 3.62it/s] 17%|█▋ | 63600/371472 [5:03:40<24:46:31, 3.45it/s] {'loss': 4.145, 'learning_rate': 8.463121377319054e-07, 'epoch': 2.74} + 17%|█▋ | 63600/371472 [5:03:40<24:46:31, 3.45it/s] 17%|█▋ | 63601/371472 [5:03:41<23:59:53, 3.56it/s] 17%|█▋ | 63602/371472 [5:03:41<23:16:01, 3.68it/s] 17%|█▋ | 63603/371472 [5:03:41<23:36:37, 3.62it/s] 17%|█▋ | 63604/371472 [5:03:41<23:49:56, 3.59it/s] 17%|█▋ | 63605/371472 [5:03:42<23:04:49, 3.71it/s] 17%|█▋ | 63606/371472 [5:03:42<23:05:08, 3.70it/s] 17%|█▋ | 63607/371472 [5:03:42<23:15:40, 3.68it/s] 17%|█▋ | 63608/371472 [5:03:43<22:52:54, 3.74it/s] 17%|█▋ | 63609/371472 [5:03:43<24:02:04, 3.56it/s] 17%|█▋ | 63610/371472 [5:03:43<24:16:07, 3.52it/s] 17%|█▋ | 63611/371472 [5:03:43<23:22:54, 3.66it/s] 17%|█▋ | 63612/371472 [5:03:44<22:50:53, 3.74it/s] 17%|█▋ | 63613/371472 [5:03:44<22:59:46, 3.72it/s] 17%|█▋ | 63614/371472 [5:03:44<22:30:49, 3.80it/s] 17%|█▋ | 63615/371472 [5:03:44<22:58:58, 3.72it/s] 17%|█▋ | 63616/371472 [5:03:45<24:33:23, 3.48it/s] 17%|█▋ | 63617/371472 [5:03:45<24:52:16, 3.44it/s] 17%|█▋ | 63618/371472 [5:03:45<24:23:39, 3.51it/s] 17%|█▋ | 63619/371472 [5:03:46<24:05:30, 3.55it/s] 17%|█▋ | 63620/371472 [5:03:46<23:14:14, 3.68it/s] {'loss': 4.2587, 'learning_rate': 8.462636557564265e-07, 'epoch': 2.74} + 17%|█▋ | 63620/371472 [5:03:46<23:14:14, 3.68it/s] 17%|█▋ | 63621/371472 [5:03:46<23:45:15, 3.60it/s] 17%|█▋ | 63622/371472 [5:03:46<24:08:59, 3.54it/s] 17%|█▋ | 63623/371472 [5:03:47<25:51:03, 3.31it/s] 17%|█▋ | 63624/371472 [5:03:47<24:43:28, 3.46it/s] 17%|█▋ | 63625/371472 [5:03:47<24:32:46, 3.48it/s] 17%|█▋ | 63626/371472 [5:03:48<24:25:33, 3.50it/s] 17%|█▋ | 63627/371472 [5:03:48<23:22:44, 3.66it/s] 17%|█▋ | 63628/371472 [5:03:48<23:20:26, 3.66it/s] 17%|█▋ | 63629/371472 [5:03:48<23:36:13, 3.62it/s] 17%|█▋ | 63630/371472 [5:03:49<23:00:39, 3.72it/s] 17%|█▋ | 63631/371472 [5:03:49<22:30:54, 3.80it/s] 17%|█▋ | 63632/371472 [5:03:49<22:13:11, 3.85it/s] 17%|█▋ | 63633/371472 [5:03:50<24:15:55, 3.52it/s] 17%|█▋ | 63634/371472 [5:03:50<23:54:13, 3.58it/s] 17%|█▋ | 63635/371472 [5:03:50<26:07:38, 3.27it/s] 17%|█▋ | 63636/371472 [5:03:51<27:13:28, 3.14it/s] 17%|█▋ | 63637/371472 [5:03:51<25:29:21, 3.35it/s] 17%|█▋ | 63638/371472 [5:03:51<26:21:18, 3.24it/s] 17%|█▋ | 63639/371472 [5:03:51<26:00:08, 3.29it/s] 17%|█▋ | 63640/371472 [5:03:52<24:23:51, 3.50it/s] {'loss': 4.2361, 'learning_rate': 8.462151737809476e-07, 'epoch': 2.74} + 17%|█▋ | 63640/371472 [5:03:52<24:23:51, 3.50it/s] 17%|█▋ | 63641/371472 [5:03:52<23:28:31, 3.64it/s] 17%|█▋ | 63642/371472 [5:03:52<22:45:37, 3.76it/s] 17%|█▋ | 63643/371472 [5:03:52<22:34:02, 3.79it/s] 17%|█▋ | 63644/371472 [5:03:53<22:24:41, 3.82it/s] 17%|█▋ | 63645/371472 [5:03:53<22:56:01, 3.73it/s] 17%|█▋ | 63646/371472 [5:03:53<22:44:09, 3.76it/s] 17%|█▋ | 63647/371472 [5:03:53<23:30:28, 3.64it/s] 17%|█▋ | 63648/371472 [5:03:54<23:33:55, 3.63it/s] 17%|█▋ | 63649/371472 [5:03:54<23:16:22, 3.67it/s] 17%|█▋ | 63650/371472 [5:03:54<22:48:03, 3.75it/s] 17%|█▋ | 63651/371472 [5:03:55<24:54:02, 3.43it/s] 17%|█▋ | 63652/371472 [5:03:55<24:27:17, 3.50it/s] 17%|█▋ | 63653/371472 [5:03:55<25:38:57, 3.33it/s] 17%|█▋ | 63654/371472 [5:03:56<25:06:23, 3.41it/s] 17%|█▋ | 63655/371472 [5:03:56<23:52:56, 3.58it/s] 17%|█▋ | 63656/371472 [5:03:56<23:25:01, 3.65it/s] 17%|█▋ | 63657/371472 [5:03:56<24:03:08, 3.55it/s] 17%|█▋ | 63658/371472 [5:03:57<23:24:22, 3.65it/s] 17%|█▋ | 63659/371472 [5:03:57<22:42:58, 3.76it/s] 17%|█▋ | 63660/371472 [5:03:57<22:50:41, 3.74it/s] {'loss': 4.3539, 'learning_rate': 8.461666918054686e-07, 'epoch': 2.74} + 17%|█▋ | 63660/371472 [5:03:57<22:50:41, 3.74it/s] 17%|█▋ | 63661/371472 [5:03:57<23:56:09, 3.57it/s] 17%|█▋ | 63662/371472 [5:03:58<23:36:13, 3.62it/s] 17%|█▋ | 63663/371472 [5:03:58<24:58:55, 3.42it/s] 17%|█▋ | 63664/371472 [5:03:58<24:35:23, 3.48it/s] 17%|█▋ | 63665/371472 [5:03:59<23:40:00, 3.61it/s] 17%|█▋ | 63666/371472 [5:03:59<24:19:42, 3.51it/s] 17%|█▋ | 63667/371472 [5:03:59<24:51:12, 3.44it/s] 17%|█▋ | 63668/371472 [5:03:59<24:26:54, 3.50it/s] 17%|█▋ | 63669/371472 [5:04:00<24:39:59, 3.47it/s] 17%|█▋ | 63670/371472 [5:04:00<23:44:47, 3.60it/s] 17%|█▋ | 63671/371472 [5:04:00<25:54:03, 3.30it/s] 17%|█▋ | 63672/371472 [5:04:01<25:19:52, 3.38it/s] 17%|█▋ | 63673/371472 [5:04:01<26:50:37, 3.19it/s] 17%|█▋ | 63674/371472 [5:04:01<26:50:18, 3.19it/s] 17%|█▋ | 63675/371472 [5:04:02<25:41:22, 3.33it/s] 17%|█▋ | 63676/371472 [5:04:02<24:31:45, 3.49it/s] 17%|█▋ | 63677/371472 [5:04:02<23:28:55, 3.64it/s] 17%|█▋ | 63678/371472 [5:04:02<23:45:38, 3.60it/s] 17%|█▋ | 63679/371472 [5:04:03<24:10:18, 3.54it/s] 17%|█▋ | 63680/371472 [5:04:03<23:48:00, 3.59it/s] {'loss': 4.2672, 'learning_rate': 8.461182098299898e-07, 'epoch': 2.74} + 17%|█▋ | 63680/371472 [5:04:03<23:48:00, 3.59it/s] 17%|█▋ | 63681/371472 [5:04:03<23:38:07, 3.62it/s] 17%|█▋ | 63682/371472 [5:04:03<23:14:29, 3.68it/s] 17%|█▋ | 63683/371472 [5:04:04<23:09:21, 3.69it/s] 17%|█▋ | 63684/371472 [5:04:04<22:27:25, 3.81it/s] 17%|█▋ | 63685/371472 [5:04:04<23:50:55, 3.58it/s] 17%|█▋ | 63686/371472 [5:04:05<23:44:04, 3.60it/s] 17%|█▋ | 63687/371472 [5:04:05<24:27:57, 3.49it/s] 17%|█▋ | 63688/371472 [5:04:05<26:42:45, 3.20it/s] 17%|█▋ | 63689/371472 [5:04:05<24:45:39, 3.45it/s] 17%|█▋ | 63690/371472 [5:04:06<23:33:38, 3.63it/s] 17%|█▋ | 63691/371472 [5:04:06<23:21:42, 3.66it/s] 17%|█▋ | 63692/371472 [5:04:06<23:00:21, 3.72it/s] 17%|█▋ | 63693/371472 [5:04:06<22:45:52, 3.76it/s] 17%|█▋ | 63694/371472 [5:04:07<23:38:15, 3.62it/s] 17%|█▋ | 63695/371472 [5:04:07<23:22:35, 3.66it/s] 17%|█▋ | 63696/371472 [5:04:07<24:33:40, 3.48it/s] 17%|█▋ | 63697/371472 [5:04:08<23:59:35, 3.56it/s] 17%|█▋ | 63698/371472 [5:04:08<23:01:44, 3.71it/s] 17%|█▋ | 63699/371472 [5:04:08<22:47:25, 3.75it/s] 17%|█▋ | 63700/371472 [5:04:08<23:51:37, 3.58it/s] {'loss': 4.306, 'learning_rate': 8.46069727854511e-07, 'epoch': 2.74} + 17%|█▋ | 63700/371472 [5:04:08<23:51:37, 3.58it/s] 17%|█▋ | 63701/371472 [5:04:09<23:08:30, 3.69it/s] 17%|█▋ | 63702/371472 [5:04:09<23:37:56, 3.62it/s] 17%|█▋ | 63703/371472 [5:04:09<23:09:37, 3.69it/s] 17%|█▋ | 63704/371472 [5:04:09<22:55:01, 3.73it/s] 17%|█▋ | 63705/371472 [5:04:10<23:55:21, 3.57it/s] 17%|█▋ | 63706/371472 [5:04:10<23:45:56, 3.60it/s] 17%|█▋ | 63707/371472 [5:04:10<23:30:34, 3.64it/s] 17%|█▋ | 63708/371472 [5:04:11<22:52:51, 3.74it/s] 17%|█▋ | 63709/371472 [5:04:11<23:46:00, 3.60it/s] 17%|█▋ | 63710/371472 [5:04:11<23:32:43, 3.63it/s] 17%|█▋ | 63711/371472 [5:04:11<22:54:08, 3.73it/s] 17%|█▋ | 63712/371472 [5:04:12<22:18:43, 3.83it/s] 17%|█▋ | 63713/371472 [5:04:12<23:07:43, 3.70it/s] 17%|█▋ | 63714/371472 [5:04:12<22:47:04, 3.75it/s] 17%|█▋ | 63715/371472 [5:04:13<24:03:11, 3.55it/s] 17%|█▋ | 63716/371472 [5:04:13<22:48:12, 3.75it/s] 17%|█▋ | 63717/371472 [5:04:13<22:14:58, 3.84it/s] 17%|█▋ | 63718/371472 [5:04:13<22:03:31, 3.88it/s] 17%|█▋ | 63719/371472 [5:04:14<22:42:37, 3.76it/s] 17%|█▋ | 63720/371472 [5:04:14<22:16:13, 3.84it/s] {'loss': 4.2876, 'learning_rate': 8.460212458790321e-07, 'epoch': 2.74} + 17%|█▋ | 63720/371472 [5:04:14<22:16:13, 3.84it/s] 17%|█▋ | 63721/371472 [5:04:14<22:58:25, 3.72it/s] 17%|█▋ | 63722/371472 [5:04:14<23:20:46, 3.66it/s] 17%|█▋ | 63723/371472 [5:04:15<24:12:05, 3.53it/s] 17%|█▋ | 63724/371472 [5:04:15<23:36:13, 3.62it/s] 17%|█▋ | 63725/371472 [5:04:15<24:52:59, 3.44it/s] 17%|█▋ | 63726/371472 [5:04:15<23:31:23, 3.63it/s] 17%|█▋ | 63727/371472 [5:04:16<25:26:10, 3.36it/s] 17%|█▋ | 63728/371472 [5:04:16<25:31:27, 3.35it/s] 17%|█▋ | 63729/371472 [5:04:16<24:18:51, 3.52it/s] 17%|█▋ | 63730/371472 [5:04:17<23:25:46, 3.65it/s] 17%|█▋ | 63731/371472 [5:04:17<23:53:19, 3.58it/s] 17%|█▋ | 63732/371472 [5:04:17<24:22:21, 3.51it/s] 17%|█▋ | 63733/371472 [5:04:18<24:49:59, 3.44it/s] 17%|█▋ | 63734/371472 [5:04:18<24:58:17, 3.42it/s] 17%|█▋ | 63735/371472 [5:04:18<24:32:01, 3.48it/s] 17%|█▋ | 63736/371472 [5:04:18<24:10:38, 3.54it/s] 17%|█▋ | 63737/371472 [5:04:19<25:25:13, 3.36it/s] 17%|█▋ | 63738/371472 [5:04:19<24:22:37, 3.51it/s] 17%|█▋ | 63739/371472 [5:04:19<25:12:04, 3.39it/s] 17%|█▋ | 63740/371472 [5:04:20<24:24:46, 3.50it/s] {'loss': 4.1529, 'learning_rate': 8.459727639035531e-07, 'epoch': 2.75} + 17%|█▋ | 63740/371472 [5:04:20<24:24:46, 3.50it/s] 17%|█▋ | 63741/371472 [5:04:20<26:04:14, 3.28it/s] 17%|█▋ | 63742/371472 [5:04:20<25:16:14, 3.38it/s] 17%|█▋ | 63743/371472 [5:04:20<24:36:08, 3.47it/s] 17%|█▋ | 63744/371472 [5:04:21<24:44:52, 3.45it/s] 17%|█▋ | 63745/371472 [5:04:21<23:50:43, 3.58it/s] 17%|█▋ | 63746/371472 [5:04:21<25:02:08, 3.41it/s] 17%|█▋ | 63747/371472 [5:04:22<25:52:12, 3.30it/s] 17%|█▋ | 63748/371472 [5:04:22<26:12:19, 3.26it/s] 17%|█▋ | 63749/371472 [5:04:22<26:16:15, 3.25it/s] 17%|█▋ | 63750/371472 [5:04:23<24:33:32, 3.48it/s] 17%|█▋ | 63751/371472 [5:04:23<24:44:30, 3.45it/s] 17%|█▋ | 63752/371472 [5:04:23<23:44:42, 3.60it/s] 17%|█▋ | 63753/371472 [5:04:23<23:18:29, 3.67it/s] 17%|█▋ | 63754/371472 [5:04:24<23:22:43, 3.66it/s] 17%|█▋ | 63755/371472 [5:04:24<22:52:44, 3.74it/s] 17%|█▋ | 63756/371472 [5:04:24<22:18:34, 3.83it/s] 17%|█▋ | 63757/371472 [5:04:24<21:48:15, 3.92it/s] 17%|█▋ | 63758/371472 [5:04:25<21:43:47, 3.93it/s] 17%|█▋ | 63759/371472 [5:04:25<22:07:27, 3.86it/s] 17%|█▋ | 63760/371472 [5:04:25<22:22:59, 3.82it/s] {'loss': 4.1421, 'learning_rate': 8.459242819280742e-07, 'epoch': 2.75} + 17%|█▋ | 63760/371472 [5:04:25<22:22:59, 3.82it/s] 17%|█▋ | 63761/371472 [5:04:25<24:01:21, 3.56it/s] 17%|█▋ | 63762/371472 [5:04:26<24:56:39, 3.43it/s] 17%|█▋ | 63763/371472 [5:04:26<24:30:41, 3.49it/s] 17%|█▋ | 63764/371472 [5:04:26<23:23:32, 3.65it/s] 17%|█▋ | 63765/371472 [5:04:27<22:21:05, 3.82it/s] 17%|█▋ | 63766/371472 [5:04:27<23:13:49, 3.68it/s] 17%|█▋ | 63767/371472 [5:04:27<22:57:17, 3.72it/s] 17%|█▋ | 63768/371472 [5:04:27<23:50:31, 3.58it/s] 17%|█▋ | 63769/371472 [5:04:28<23:37:58, 3.62it/s] 17%|█▋ | 63770/371472 [5:04:28<22:38:21, 3.78it/s] 17%|█▋ | 63771/371472 [5:04:28<24:00:41, 3.56it/s] 17%|█▋ | 63772/371472 [5:04:29<24:45:16, 3.45it/s] 17%|█▋ | 63773/371472 [5:04:29<24:26:44, 3.50it/s] 17%|█▋ | 63774/371472 [5:04:29<24:54:12, 3.43it/s] 17%|█▋ | 63775/371472 [5:04:29<23:34:14, 3.63it/s] 17%|█▋ | 63776/371472 [5:04:30<23:18:07, 3.67it/s] 17%|█▋ | 63777/371472 [5:04:30<24:34:54, 3.48it/s] 17%|█▋ | 63778/371472 [5:04:30<23:51:56, 3.58it/s] 17%|█▋ | 63779/371472 [5:04:30<24:15:12, 3.52it/s] 17%|█▋ | 63780/371472 [5:04:31<25:29:28, 3.35it/s] {'loss': 4.208, 'learning_rate': 8.458757999525954e-07, 'epoch': 2.75} + 17%|█▋ | 63780/371472 [5:04:31<25:29:28, 3.35it/s] 17%|█▋ | 63781/371472 [5:04:31<24:34:14, 3.48it/s] 17%|█▋ | 63782/371472 [5:04:31<23:56:02, 3.57it/s] 17%|█▋ | 63783/371472 [5:04:32<23:16:03, 3.67it/s] 17%|█▋ | 63784/371472 [5:04:32<22:19:12, 3.83it/s] 17%|█▋ | 63785/371472 [5:04:32<21:46:24, 3.93it/s] 17%|█▋ | 63786/371472 [5:04:32<24:19:40, 3.51it/s] 17%|█▋ | 63787/371472 [5:04:33<25:14:20, 3.39it/s] 17%|█▋ | 63788/371472 [5:04:33<24:09:11, 3.54it/s] 17%|█▋ | 63789/371472 [5:04:33<25:37:32, 3.34it/s] 17%|█▋ | 63790/371472 [5:04:34<24:36:25, 3.47it/s] 17%|█▋ | 63791/371472 [5:04:34<24:06:18, 3.55it/s] 17%|█▋ | 63792/371472 [5:04:34<24:31:14, 3.49it/s] 17%|█▋ | 63793/371472 [5:04:34<23:23:17, 3.65it/s] 17%|█▋ | 63794/371472 [5:04:35<22:57:55, 3.72it/s] 17%|█▋ | 63795/371472 [5:04:35<25:08:05, 3.40it/s] 17%|█▋ | 63796/371472 [5:04:35<24:13:28, 3.53it/s] 17%|█▋ | 63797/371472 [5:04:36<23:25:00, 3.65it/s] 17%|█▋ | 63798/371472 [5:04:36<23:37:56, 3.62it/s] 17%|█▋ | 63799/371472 [5:04:36<23:53:22, 3.58it/s] 17%|█▋ | 63800/371472 [5:04:36<24:27:07, 3.50it/s] {'loss': 4.2952, 'learning_rate': 8.458273179771165e-07, 'epoch': 2.75} + 17%|█▋ | 63800/371472 [5:04:36<24:27:07, 3.50it/s] 17%|█▋ | 63801/371472 [5:04:37<23:54:13, 3.58it/s] 17%|█▋ | 63802/371472 [5:04:37<23:14:23, 3.68it/s] 17%|█▋ | 63803/371472 [5:04:37<22:06:37, 3.87it/s] 17%|█▋ | 63804/371472 [5:04:37<21:53:13, 3.90it/s] 17%|█▋ | 63805/371472 [5:04:38<24:43:31, 3.46it/s] 17%|█▋ | 63806/371472 [5:04:38<25:27:26, 3.36it/s] 17%|█▋ | 63807/371472 [5:04:38<26:20:34, 3.24it/s] 17%|█▋ | 63808/371472 [5:04:39<28:04:57, 3.04it/s] 17%|█▋ | 63809/371472 [5:04:39<26:33:51, 3.22it/s] 17%|█▋ | 63810/371472 [5:04:39<26:09:44, 3.27it/s] 17%|█▋ | 63811/371472 [5:04:40<24:35:52, 3.47it/s] 17%|█▋ | 63812/371472 [5:04:40<24:28:48, 3.49it/s] 17%|█▋ | 63813/371472 [5:04:40<24:58:04, 3.42it/s] 17%|█▋ | 63814/371472 [5:04:40<24:45:59, 3.45it/s] 17%|█▋ | 63815/371472 [5:04:41<26:02:26, 3.28it/s] 17%|█▋ | 63816/371472 [5:04:41<26:10:56, 3.26it/s] 17%|█▋ | 63817/371472 [5:04:41<25:05:53, 3.41it/s] 17%|█▋ | 63818/371472 [5:04:42<26:00:55, 3.28it/s] 17%|█▋ | 63819/371472 [5:04:42<24:17:38, 3.52it/s] 17%|█▋ | 63820/371472 [5:04:42<24:19:20, 3.51it/s] {'loss': 4.0587, 'learning_rate': 8.457788360016375e-07, 'epoch': 2.75} + 17%|█▋ | 63820/371472 [5:04:42<24:19:20, 3.51it/s] 17%|█▋ | 63821/371472 [5:04:42<23:49:22, 3.59it/s] 17%|█▋ | 63822/371472 [5:04:43<23:09:58, 3.69it/s] 17%|█▋ | 63823/371472 [5:04:43<25:42:39, 3.32it/s] 17%|█▋ | 63824/371472 [5:04:43<24:49:28, 3.44it/s] 17%|█▋ | 63825/371472 [5:04:44<24:40:55, 3.46it/s] 17%|█▋ | 63826/371472 [5:04:44<23:50:23, 3.58it/s] 17%|█▋ | 63827/371472 [5:04:44<25:54:20, 3.30it/s] 17%|█▋ | 63828/371472 [5:04:45<26:06:29, 3.27it/s] 17%|█▋ | 63829/371472 [5:04:45<24:41:08, 3.46it/s] 17%|█▋ | 63830/371472 [5:04:45<24:59:50, 3.42it/s] 17%|█▋ | 63831/371472 [5:04:45<25:03:36, 3.41it/s] 17%|█▋ | 63832/371472 [5:04:46<24:35:23, 3.48it/s] 17%|█▋ | 63833/371472 [5:04:46<24:19:18, 3.51it/s] 17%|█▋ | 63834/371472 [5:04:46<23:51:02, 3.58it/s] 17%|█▋ | 63835/371472 [5:04:47<24:23:09, 3.50it/s] 17%|█▋ | 63836/371472 [5:04:47<24:02:06, 3.56it/s] 17%|█▋ | 63837/371472 [5:04:47<23:04:46, 3.70it/s] 17%|█▋ | 63838/371472 [5:04:47<22:37:21, 3.78it/s] 17%|█▋ | 63839/371472 [5:04:48<22:09:09, 3.86it/s] 17%|█▋ | 63840/371472 [5:04:48<22:14:49, 3.84it/s] {'loss': 4.1058, 'learning_rate': 8.457303540261587e-07, 'epoch': 2.75} + 17%|█▋ | 63840/371472 [5:04:48<22:14:49, 3.84it/s] 17%|█▋ | 63841/371472 [5:04:48<21:38:46, 3.95it/s] 17%|█▋ | 63842/371472 [5:04:48<22:38:33, 3.77it/s] 17%|█▋ | 63843/371472 [5:04:49<22:50:55, 3.74it/s] 17%|█▋ | 63844/371472 [5:04:49<22:17:31, 3.83it/s] 17%|█▋ | 63845/371472 [5:04:49<22:53:14, 3.73it/s] 17%|█▋ | 63846/371472 [5:04:49<23:37:40, 3.62it/s] 17%|█▋ | 63847/371472 [5:04:50<24:03:12, 3.55it/s] 17%|█▋ | 63848/371472 [5:04:50<25:49:21, 3.31it/s] 17%|█▋ | 63849/371472 [5:04:50<24:56:47, 3.43it/s] 17%|█▋ | 63850/371472 [5:04:51<24:16:51, 3.52it/s] 17%|█▋ | 63851/371472 [5:04:51<24:13:26, 3.53it/s] 17%|█▋ | 63852/371472 [5:04:51<23:55:18, 3.57it/s] 17%|█▋ | 63853/371472 [5:04:52<28:21:54, 3.01it/s] 17%|█▋ | 63854/371472 [5:04:52<26:20:59, 3.24it/s] 17%|█▋ | 63855/371472 [5:04:52<25:17:44, 3.38it/s] 17%|█▋ | 63856/371472 [5:04:52<24:31:50, 3.48it/s] 17%|█▋ | 63857/371472 [5:04:53<25:28:47, 3.35it/s] 17%|█▋ | 63858/371472 [5:04:53<24:20:48, 3.51it/s] 17%|█▋ | 63859/371472 [5:04:53<23:49:43, 3.59it/s] 17%|█▋ | 63860/371472 [5:04:54<25:30:28, 3.35it/s] {'loss': 4.1121, 'learning_rate': 8.456818720506799e-07, 'epoch': 2.75} + 17%|█▋ | 63860/371472 [5:04:54<25:30:28, 3.35it/s] 17%|█▋ | 63861/371472 [5:04:54<24:20:17, 3.51it/s] 17%|█▋ | 63862/371472 [5:04:54<23:55:48, 3.57it/s] 17%|█▋ | 63863/371472 [5:04:54<23:31:22, 3.63it/s] 17%|█▋ | 63864/371472 [5:04:55<24:36:35, 3.47it/s] 17%|█▋ | 63865/371472 [5:04:55<26:21:03, 3.24it/s] 17%|█▋ | 63866/371472 [5:04:55<28:08:09, 3.04it/s] 17%|█▋ | 63867/371472 [5:04:56<29:01:32, 2.94it/s] 17%|█▋ | 63868/371472 [5:04:56<26:55:41, 3.17it/s] 17%|█▋ | 63869/371472 [5:04:56<26:57:45, 3.17it/s] 17%|█▋ | 63870/371472 [5:04:57<25:18:38, 3.38it/s] 17%|█▋ | 63871/371472 [5:04:57<24:54:25, 3.43it/s] 17%|█▋ | 63872/371472 [5:04:57<24:38:11, 3.47it/s] 17%|█▋ | 63873/371472 [5:04:57<23:44:06, 3.60it/s] 17%|█▋ | 63874/371472 [5:04:58<24:05:02, 3.55it/s] 17%|█▋ | 63875/371472 [5:04:58<24:06:27, 3.54it/s] 17%|█▋ | 63876/371472 [5:04:58<25:10:56, 3.39it/s] 17%|█▋ | 63877/371472 [5:04:59<23:44:38, 3.60it/s] 17%|█▋ | 63878/371472 [5:04:59<24:01:47, 3.56it/s] 17%|█▋ | 63879/371472 [5:04:59<24:13:10, 3.53it/s] 17%|█▋ | 63880/371472 [5:04:59<24:23:08, 3.50it/s] {'loss': 4.0427, 'learning_rate': 8.456333900752009e-07, 'epoch': 2.75} + 17%|█▋ | 63880/371472 [5:04:59<24:23:08, 3.50it/s] 17%|█▋ | 63881/371472 [5:05:00<23:22:25, 3.66it/s] 17%|█▋ | 63882/371472 [5:05:00<24:00:40, 3.56it/s] 17%|█▋ | 63883/371472 [5:05:00<24:55:21, 3.43it/s] 17%|█▋ | 63884/371472 [5:05:01<24:39:38, 3.46it/s] 17%|█▋ | 63885/371472 [5:05:01<26:26:44, 3.23it/s] 17%|█▋ | 63886/371472 [5:05:01<26:34:03, 3.22it/s] 17%|█▋ | 63887/371472 [5:05:02<26:22:07, 3.24it/s] 17%|█▋ | 63888/371472 [5:05:02<25:07:10, 3.40it/s] 17%|█▋ | 63889/371472 [5:05:02<24:19:30, 3.51it/s] 17%|█▋ | 63890/371472 [5:05:02<23:53:20, 3.58it/s] 17%|█▋ | 63891/371472 [5:05:03<25:06:04, 3.40it/s] 17%|█▋ | 63892/371472 [5:05:03<28:22:35, 3.01it/s] 17%|█▋ | 63893/371472 [5:05:03<26:32:10, 3.22it/s] 17%|█▋ | 63894/371472 [5:05:04<25:10:03, 3.39it/s] 17%|█▋ | 63895/371472 [5:05:04<24:58:17, 3.42it/s] 17%|█▋ | 63896/371472 [5:05:04<23:59:44, 3.56it/s] 17%|█▋ | 63897/371472 [5:05:04<23:56:15, 3.57it/s] 17%|█▋ | 63898/371472 [5:05:05<24:58:26, 3.42it/s] 17%|█▋ | 63899/371472 [5:05:05<24:39:13, 3.47it/s] 17%|█▋ | 63900/371472 [5:05:05<26:51:04, 3.18it/s] {'loss': 3.9421, 'learning_rate': 8.455849080997219e-07, 'epoch': 2.75} + 17%|█▋ | 63900/371472 [5:05:05<26:51:04, 3.18it/s] 17%|█▋ | 63901/371472 [5:05:06<25:37:33, 3.33it/s] 17%|█▋ | 63902/371472 [5:05:06<27:00:28, 3.16it/s] 17%|█▋ | 63903/371472 [5:05:06<24:52:55, 3.43it/s] 17%|█▋ | 63904/371472 [5:05:07<24:01:06, 3.56it/s] 17%|█▋ | 63905/371472 [5:05:07<25:10:28, 3.39it/s] 17%|█▋ | 63906/371472 [5:05:07<24:20:15, 3.51it/s] 17%|█▋ | 63907/371472 [5:05:07<23:40:44, 3.61it/s] 17%|█▋ | 63908/371472 [5:05:08<23:22:33, 3.65it/s] 17%|█▋ | 63909/371472 [5:05:08<24:35:35, 3.47it/s] 17%|█▋ | 63910/371472 [5:05:08<24:33:11, 3.48it/s] 17%|█▋ | 63911/371472 [5:05:09<23:44:06, 3.60it/s] 17%|█▋ | 63912/371472 [5:05:09<23:41:43, 3.61it/s] 17%|█▋ | 63913/371472 [5:05:09<23:22:24, 3.66it/s] 17%|█▋ | 63914/371472 [5:05:09<23:30:37, 3.63it/s] 17%|█▋ | 63915/371472 [5:05:10<23:24:57, 3.65it/s] 17%|█▋ | 63916/371472 [5:05:10<23:53:17, 3.58it/s] 17%|█▋ | 63917/371472 [5:05:10<23:59:43, 3.56it/s] 17%|█▋ | 63918/371472 [5:05:10<22:46:59, 3.75it/s] 17%|█▋ | 63919/371472 [5:05:11<22:25:08, 3.81it/s] 17%|█▋ | 63920/371472 [5:05:11<22:15:48, 3.84it/s] {'loss': 4.2178, 'learning_rate': 8.455364261242431e-07, 'epoch': 2.75} + 17%|█▋ | 63920/371472 [5:05:11<22:15:48, 3.84it/s] 17%|█▋ | 63921/371472 [5:05:11<21:54:00, 3.90it/s] 17%|█▋ | 63922/371472 [5:05:11<23:24:00, 3.65it/s] 17%|█▋ | 63923/371472 [5:05:12<22:51:53, 3.74it/s] 17%|█▋ | 63924/371472 [5:05:12<22:34:17, 3.78it/s] 17%|█▋ | 63925/371472 [5:05:12<23:22:51, 3.65it/s] 17%|█▋ | 63926/371472 [5:05:13<22:41:03, 3.77it/s] 17%|█▋ | 63927/371472 [5:05:13<22:50:36, 3.74it/s] 17%|█▋ | 63928/371472 [5:05:13<23:31:12, 3.63it/s] 17%|█▋ | 63929/371472 [5:05:13<23:59:10, 3.56it/s] 17%|█▋ | 63930/371472 [5:05:14<22:48:35, 3.75it/s] 17%|█▋ | 63931/371472 [5:05:14<22:00:14, 3.88it/s] 17%|█▋ | 63932/371472 [5:05:14<21:42:04, 3.94it/s] 17%|█▋ | 63933/371472 [5:05:14<23:37:22, 3.62it/s] 17%|█▋ | 63934/371472 [5:05:15<24:13:58, 3.53it/s] 17%|█▋ | 63935/371472 [5:05:15<23:57:17, 3.57it/s] 17%|█▋ | 63936/371472 [5:05:15<23:37:19, 3.62it/s] 17%|█▋ | 63937/371472 [5:05:16<23:38:45, 3.61it/s] 17%|█▋ | 63938/371472 [5:05:16<23:35:39, 3.62it/s] 17%|█▋ | 63939/371472 [5:05:16<24:49:06, 3.44it/s] 17%|█▋ | 63940/371472 [5:05:16<24:50:41, 3.44it/s] {'loss': 4.3995, 'learning_rate': 8.454879441487641e-07, 'epoch': 2.75} + 17%|█▋ | 63940/371472 [5:05:16<24:50:41, 3.44it/s] 17%|█▋ | 63941/371472 [5:05:17<25:00:32, 3.42it/s] 17%|█▋ | 63942/371472 [5:05:17<24:14:49, 3.52it/s] 17%|█▋ | 63943/371472 [5:05:17<24:30:47, 3.48it/s] 17%|█▋ | 63944/371472 [5:05:18<23:54:55, 3.57it/s] 17%|█▋ | 63945/371472 [5:05:18<23:50:12, 3.58it/s] 17%|█▋ | 63946/371472 [5:05:18<23:36:28, 3.62it/s] 17%|█▋ | 63947/371472 [5:05:18<23:53:31, 3.58it/s] 17%|█▋ | 63948/371472 [5:05:19<26:19:10, 3.25it/s] 17%|█▋ | 63949/371472 [5:05:19<24:46:10, 3.45it/s] 17%|█▋ | 63950/371472 [5:05:19<25:14:25, 3.38it/s] 17%|█▋ | 63951/371472 [5:05:20<24:50:48, 3.44it/s] 17%|█▋ | 63952/371472 [5:05:20<23:43:33, 3.60it/s] 17%|█▋ | 63953/371472 [5:05:20<23:20:56, 3.66it/s] 17%|█▋ | 63954/371472 [5:05:20<23:32:21, 3.63it/s] 17%|█▋ | 63955/371472 [5:05:21<22:36:35, 3.78it/s] 17%|█▋ | 63956/371472 [5:05:21<22:15:01, 3.84it/s] 17%|█▋ | 63957/371472 [5:05:21<22:36:57, 3.78it/s] 17%|█▋ | 63958/371472 [5:05:21<22:08:04, 3.86it/s] 17%|█▋ | 63959/371472 [5:05:22<24:54:12, 3.43it/s] 17%|█▋ | 63960/371472 [5:05:22<24:49:14, 3.44it/s] {'loss': 3.9356, 'learning_rate': 8.454394621732854e-07, 'epoch': 2.75} + 17%|█▋ | 63960/371472 [5:05:22<24:49:14, 3.44it/s] 17%|█▋ | 63961/371472 [5:05:22<24:41:15, 3.46it/s] 17%|█▋ | 63962/371472 [5:05:23<24:55:09, 3.43it/s] 17%|█▋ | 63963/371472 [5:05:23<24:27:04, 3.49it/s] 17%|█▋ | 63964/371472 [5:05:23<23:30:00, 3.63it/s] 17%|█▋ | 63965/371472 [5:05:23<23:07:47, 3.69it/s] 17%|█▋ | 63966/371472 [5:05:24<22:34:11, 3.78it/s] 17%|█▋ | 63967/371472 [5:05:24<21:57:02, 3.89it/s] 17%|█▋ | 63968/371472 [5:05:24<22:59:01, 3.72it/s] 17%|█▋ | 63969/371472 [5:05:25<24:24:11, 3.50it/s] 17%|█▋ | 63970/371472 [5:05:25<23:29:24, 3.64it/s] 17%|█▋ | 63971/371472 [5:05:25<23:31:06, 3.63it/s] 17%|█▋ | 63972/371472 [5:05:25<22:48:13, 3.75it/s] 17%|█▋ | 63973/371472 [5:05:26<24:07:39, 3.54it/s] 17%|█▋ | 63974/371472 [5:05:26<22:52:59, 3.73it/s] 17%|█▋ | 63975/371472 [5:05:26<23:06:35, 3.70it/s] 17%|█▋ | 63976/371472 [5:05:26<22:47:17, 3.75it/s] 17%|█▋ | 63977/371472 [5:05:27<23:11:08, 3.68it/s] 17%|█▋ | 63978/371472 [5:05:27<22:57:15, 3.72it/s] 17%|█▋ | 63979/371472 [5:05:27<23:36:34, 3.62it/s] 17%|█▋ | 63980/371472 [5:05:28<22:59:51, 3.71it/s] {'loss': 4.1646, 'learning_rate': 8.453909801978064e-07, 'epoch': 2.76} + 17%|█▋ | 63980/371472 [5:05:28<22:59:51, 3.71it/s] 17%|█▋ | 63981/371472 [5:05:28<23:06:19, 3.70it/s] 17%|█▋ | 63982/371472 [5:05:28<22:59:37, 3.71it/s] 17%|█▋ | 63983/371472 [5:05:28<24:55:41, 3.43it/s] 17%|█▋ | 63984/371472 [5:05:29<24:29:22, 3.49it/s] 17%|█▋ | 63985/371472 [5:05:29<24:56:16, 3.43it/s] 17%|█▋ | 63986/371472 [5:05:29<24:30:01, 3.49it/s] 17%|█▋ | 63987/371472 [5:05:30<24:08:27, 3.54it/s] 17%|█▋ | 63988/371472 [5:05:30<23:35:50, 3.62it/s] 17%|█▋ | 63989/371472 [5:05:30<23:25:19, 3.65it/s] 17%|█▋ | 63990/371472 [5:05:30<23:40:06, 3.61it/s] 17%|█▋ | 63991/371472 [5:05:31<23:16:56, 3.67it/s] 17%|█▋ | 63992/371472 [5:05:31<22:49:31, 3.74it/s] 17%|█▋ | 63993/371472 [5:05:31<23:15:31, 3.67it/s] 17%|█▋ | 63994/371472 [5:05:31<23:14:01, 3.68it/s] 17%|█▋ | 63995/371472 [5:05:32<23:59:27, 3.56it/s] 17%|█▋ | 63996/371472 [5:05:32<23:40:54, 3.61it/s] 17%|█▋ | 63997/371472 [5:05:32<22:52:50, 3.73it/s] 17%|█▋ | 63998/371472 [5:05:33<23:13:21, 3.68it/s] 17%|█▋ | 63999/371472 [5:05:33<22:51:50, 3.74it/s] 17%|█▋ | 64000/371472 [5:05:33<24:33:32, 3.48it/s] {'loss': 3.9561, 'learning_rate': 8.453424982223276e-07, 'epoch': 2.76} + 17%|█▋ | 64000/371472 [5:05:33<24:33:32, 3.48it/s] 17%|█▋ | 64001/371472 [5:05:33<23:51:01, 3.58it/s] 17%|█▋ | 64002/371472 [5:05:34<23:24:18, 3.65it/s] 17%|█▋ | 64003/371472 [5:05:34<24:00:50, 3.56it/s] 17%|█▋ | 64004/371472 [5:05:34<23:53:14, 3.58it/s] 17%|█▋ | 64005/371472 [5:05:34<24:31:43, 3.48it/s] 17%|█▋ | 64006/371472 [5:05:35<24:18:17, 3.51it/s] 17%|█▋ | 64007/371472 [5:05:35<27:33:32, 3.10it/s] 17%|█▋ | 64008/371472 [5:05:35<25:43:55, 3.32it/s] 17%|█▋ | 64009/371472 [5:05:36<25:16:48, 3.38it/s] 17%|█▋ | 64010/371472 [5:05:36<25:05:11, 3.40it/s] 17%|█▋ | 64011/371472 [5:05:36<24:09:42, 3.53it/s] 17%|█▋ | 64012/371472 [5:05:37<25:13:42, 3.39it/s] 17%|█▋ | 64013/371472 [5:05:37<23:36:22, 3.62it/s] 17%|█▋ | 64014/371472 [5:05:37<23:11:35, 3.68it/s] 17%|█▋ | 64015/371472 [5:05:37<22:41:58, 3.76it/s] 17%|█▋ | 64016/371472 [5:05:38<22:44:27, 3.76it/s] 17%|█▋ | 64017/371472 [5:05:38<22:51:21, 3.74it/s] 17%|█▋ | 64018/371472 [5:05:38<22:31:39, 3.79it/s] 17%|█▋ | 64019/371472 [5:05:38<23:51:45, 3.58it/s] 17%|█▋ | 64020/371472 [5:05:39<24:16:15, 3.52it/s] {'loss': 4.2556, 'learning_rate': 8.452940162468486e-07, 'epoch': 2.76} + 17%|█▋ | 64020/371472 [5:05:39<24:16:15, 3.52it/s] 17%|█▋ | 64021/371472 [5:05:39<24:44:15, 3.45it/s] 17%|█▋ | 64022/371472 [5:05:39<23:26:11, 3.64it/s] 17%|█▋ | 64023/371472 [5:05:40<25:40:21, 3.33it/s] 17%|█▋ | 64024/371472 [5:05:40<24:31:13, 3.48it/s] 17%|█▋ | 64025/371472 [5:05:40<23:40:48, 3.61it/s] 17%|█▋ | 64026/371472 [5:05:40<22:47:08, 3.75it/s] 17%|█▋ | 64027/371472 [5:05:41<23:03:16, 3.70it/s] 17%|█▋ | 64028/371472 [5:05:41<23:49:33, 3.58it/s] 17%|█▋ | 64029/371472 [5:05:41<24:13:14, 3.53it/s] 17%|█▋ | 64030/371472 [5:05:42<24:04:55, 3.55it/s] 17%|█▋ | 64031/371472 [5:05:42<23:11:39, 3.68it/s] 17%|█▋ | 64032/371472 [5:05:42<24:18:24, 3.51it/s] 17%|█▋ | 64033/371472 [5:05:42<24:21:35, 3.51it/s] 17%|█▋ | 64034/371472 [5:05:43<23:59:20, 3.56it/s] 17%|█▋ | 64035/371472 [5:05:43<24:49:09, 3.44it/s] 17%|█▋ | 64036/371472 [5:05:43<24:19:26, 3.51it/s] 17%|█▋ | 64037/371472 [5:05:44<28:10:29, 3.03it/s] 17%|█▋ | 64038/371472 [5:05:44<26:54:47, 3.17it/s] 17%|█▋ | 64039/371472 [5:05:44<25:44:20, 3.32it/s] 17%|█▋ | 64040/371472 [5:05:44<24:03:02, 3.55it/s] {'loss': 4.1118, 'learning_rate': 8.452455342713698e-07, 'epoch': 2.76} + 17%|█▋ | 64040/371472 [5:05:44<24:03:02, 3.55it/s] 17%|█▋ | 64041/371472 [5:05:45<25:09:04, 3.40it/s] 17%|█▋ | 64042/371472 [5:05:45<24:46:40, 3.45it/s] 17%|█▋ | 64043/371472 [5:05:45<25:01:40, 3.41it/s] 17%|█▋ | 64044/371472 [5:05:46<24:04:35, 3.55it/s] 17%|█▋ | 64045/371472 [5:05:46<27:21:04, 3.12it/s] 17%|█▋ | 64046/371472 [5:05:46<26:45:56, 3.19it/s] 17%|█▋ | 64047/371472 [5:05:47<27:41:57, 3.08it/s] 17%|█▋ | 64048/371472 [5:05:47<27:21:05, 3.12it/s] 17%|█▋ | 64049/371472 [5:05:47<25:22:55, 3.36it/s] 17%|█▋ | 64050/371472 [5:05:47<24:13:42, 3.52it/s] 17%|█▋ | 64051/371472 [5:05:48<23:16:58, 3.67it/s] 17%|█▋ | 64052/371472 [5:05:48<22:15:06, 3.84it/s] 17%|█▋ | 64053/371472 [5:05:48<25:07:32, 3.40it/s] 17%|█▋ | 64054/371472 [5:05:49<25:05:37, 3.40it/s] 17%|█▋ | 64055/371472 [5:05:49<24:40:23, 3.46it/s] 17%|█▋ | 64056/371472 [5:05:49<24:05:14, 3.55it/s] 17%|█▋ | 64057/371472 [5:05:49<23:48:57, 3.59it/s] 17%|█▋ | 64058/371472 [5:05:50<23:08:00, 3.69it/s] 17%|█▋ | 64059/371472 [5:05:50<22:37:41, 3.77it/s] 17%|█▋ | 64060/371472 [5:05:50<22:48:49, 3.74it/s] {'loss': 4.0717, 'learning_rate': 8.451970522958908e-07, 'epoch': 2.76} + 17%|█▋ | 64060/371472 [5:05:50<22:48:49, 3.74it/s] 17%|█▋ | 64061/371472 [5:05:51<23:27:10, 3.64it/s] 17%|█▋ | 64062/371472 [5:05:51<23:23:54, 3.65it/s] 17%|█▋ | 64063/371472 [5:05:51<23:50:17, 3.58it/s] 17%|█▋ | 64064/371472 [5:05:51<23:09:34, 3.69it/s] 17%|█▋ | 64065/371472 [5:05:52<24:29:09, 3.49it/s] 17%|█▋ | 64066/371472 [5:05:52<24:04:59, 3.55it/s] 17%|█▋ | 64067/371472 [5:05:52<26:01:04, 3.28it/s] 17%|█▋ | 64068/371472 [5:05:53<24:55:36, 3.43it/s] 17%|█▋ | 64069/371472 [5:05:53<23:45:26, 3.59it/s] 17%|█▋ | 64070/371472 [5:05:53<22:51:13, 3.74it/s] 17%|█▋ | 64071/371472 [5:05:53<23:25:40, 3.64it/s] 17%|█▋ | 64072/371472 [5:05:54<25:11:27, 3.39it/s] 17%|█▋ | 64073/371472 [5:05:54<25:02:00, 3.41it/s] 17%|█▋ | 64074/371472 [5:05:54<24:01:08, 3.56it/s] 17%|█▋ | 64075/371472 [5:05:54<23:35:59, 3.62it/s] 17%|█▋ | 64076/371472 [5:05:55<23:08:40, 3.69it/s] 17%|█▋ | 64077/371472 [5:05:55<22:14:20, 3.84it/s] 17%|█▋ | 64078/371472 [5:05:55<24:09:03, 3.54it/s] 17%|█▋ | 64079/371472 [5:05:56<23:27:39, 3.64it/s] 17%|█▋ | 64080/371472 [5:05:56<23:38:34, 3.61it/s] {'loss': 4.2172, 'learning_rate': 8.45148570320412e-07, 'epoch': 2.76} + 17%|█▋ | 64080/371472 [5:05:56<23:38:34, 3.61it/s] 17%|█▋ | 64081/371472 [5:05:56<24:29:26, 3.49it/s] 17%|█▋ | 64082/371472 [5:05:56<23:57:17, 3.56it/s] 17%|█▋ | 64083/371472 [5:05:57<24:34:33, 3.47it/s] 17%|█▋ | 64084/371472 [5:05:57<23:20:28, 3.66it/s] 17%|█▋ | 64085/371472 [5:05:57<23:03:31, 3.70it/s] 17%|█▋ | 64086/371472 [5:05:58<23:36:33, 3.62it/s] 17%|█▋ | 64087/371472 [5:05:58<23:19:34, 3.66it/s] 17%|█▋ | 64088/371472 [5:05:58<23:21:19, 3.66it/s] 17%|█▋ | 64089/371472 [5:05:58<24:47:47, 3.44it/s] 17%|█▋ | 64090/371472 [5:05:59<23:54:53, 3.57it/s] 17%|█▋ | 64091/371472 [5:05:59<23:43:00, 3.60it/s] 17%|█▋ | 64092/371472 [5:05:59<23:25:10, 3.65it/s] 17%|█▋ | 64093/371472 [5:05:59<22:46:34, 3.75it/s] 17%|█▋ | 64094/371472 [5:06:00<22:08:40, 3.86it/s] 17%|█▋ | 64095/371472 [5:06:00<22:04:55, 3.87it/s] 17%|█▋ | 64096/371472 [5:06:00<21:50:56, 3.91it/s] 17%|█▋ | 64097/371472 [5:06:00<22:22:18, 3.82it/s] 17%|█▋ | 64098/371472 [5:06:01<22:39:02, 3.77it/s] 17%|█▋ | 64099/371472 [5:06:01<23:03:33, 3.70it/s] 17%|█▋ | 64100/371472 [5:06:01<23:26:55, 3.64it/s] {'loss': 4.1552, 'learning_rate': 8.451000883449331e-07, 'epoch': 2.76} + 17%|█▋ | 64100/371472 [5:06:01<23:26:55, 3.64it/s] 17%|█▋ | 64101/371472 [5:06:02<23:22:34, 3.65it/s] 17%|█▋ | 64102/371472 [5:06:02<23:26:20, 3.64it/s] 17%|█▋ | 64103/371472 [5:06:02<26:00:37, 3.28it/s] 17%|█▋ | 64104/371472 [5:06:02<25:10:36, 3.39it/s] 17%|█▋ | 64105/371472 [5:06:03<25:08:51, 3.40it/s] 17%|█▋ | 64106/371472 [5:06:03<24:26:25, 3.49it/s] 17%|█▋ | 64107/371472 [5:06:03<24:00:52, 3.56it/s] 17%|█▋ | 64108/371472 [5:06:04<23:46:06, 3.59it/s] 17%|█▋ | 64109/371472 [5:06:04<23:13:04, 3.68it/s] 17%|█▋ | 64110/371472 [5:06:04<23:18:24, 3.66it/s] 17%|█▋ | 64111/371472 [5:06:04<23:32:44, 3.63it/s] 17%|█▋ | 64112/371472 [5:06:05<23:10:51, 3.68it/s] 17%|█▋ | 64113/371472 [5:06:05<23:09:12, 3.69it/s] 17%|█▋ | 64114/371472 [5:06:05<24:25:03, 3.50it/s] 17%|█▋ | 64115/371472 [5:06:06<26:01:51, 3.28it/s] 17%|█▋ | 64116/371472 [5:06:06<26:17:30, 3.25it/s] 17%|█▋ | 64117/371472 [5:06:06<26:59:51, 3.16it/s] 17%|█▋ | 64118/371472 [5:06:07<26:30:47, 3.22it/s] 17%|█▋ | 64119/371472 [5:06:07<26:57:28, 3.17it/s] 17%|█▋ | 64120/371472 [5:06:07<25:19:03, 3.37it/s] {'loss': 4.2209, 'learning_rate': 8.450516063694542e-07, 'epoch': 2.76} + 17%|█▋ | 64120/371472 [5:06:07<25:19:03, 3.37it/s] 17%|█▋ | 64121/371472 [5:06:07<25:00:49, 3.41it/s] 17%|█▋ | 64122/371472 [5:06:08<23:33:16, 3.62it/s] 17%|█▋ | 64123/371472 [5:06:08<23:49:38, 3.58it/s] 17%|█▋ | 64124/371472 [5:06:08<23:19:41, 3.66it/s] 17%|█▋ | 64125/371472 [5:06:08<23:20:20, 3.66it/s] 17%|█▋ | 64126/371472 [5:06:09<23:19:34, 3.66it/s] 17%|█▋ | 64127/371472 [5:06:09<24:37:16, 3.47it/s] 17%|█▋ | 64128/371472 [5:06:09<23:48:09, 3.59it/s] 17%|█▋ | 64129/371472 [5:06:10<24:32:02, 3.48it/s] 17%|█▋ | 64130/371472 [5:06:10<24:44:06, 3.45it/s] 17%|█▋ | 64131/371472 [5:06:10<25:51:57, 3.30it/s] 17%|█▋ | 64132/371472 [5:06:11<25:31:44, 3.34it/s] 17%|█▋ | 64133/371472 [5:06:11<25:20:50, 3.37it/s] 17%|█▋ | 64134/371472 [5:06:11<25:50:20, 3.30it/s] 17%|█▋ | 64135/371472 [5:06:11<24:39:31, 3.46it/s] 17%|█▋ | 64136/371472 [5:06:12<23:56:15, 3.57it/s] 17%|█▋ | 64137/371472 [5:06:12<24:08:52, 3.54it/s] 17%|█▋ | 64138/371472 [5:06:12<26:46:46, 3.19it/s] 17%|█▋ | 64139/371472 [5:06:13<25:25:31, 3.36it/s] 17%|█▋ | 64140/371472 [5:06:13<25:46:47, 3.31it/s] {'loss': 4.1111, 'learning_rate': 8.450031243939752e-07, 'epoch': 2.76} + 17%|█▋ | 64140/371472 [5:06:13<25:46:47, 3.31it/s] 17%|█▋ | 64141/371472 [5:06:13<26:09:08, 3.26it/s] 17%|█▋ | 64142/371472 [5:06:14<24:52:53, 3.43it/s] 17%|█▋ | 64143/371472 [5:06:14<24:52:38, 3.43it/s] 17%|█▋ | 64144/371472 [5:06:14<24:25:26, 3.50it/s] 17%|█▋ | 64145/371472 [5:06:14<25:30:43, 3.35it/s] 17%|█▋ | 64146/371472 [5:06:15<25:51:54, 3.30it/s] 17%|█▋ | 64147/371472 [5:06:15<26:54:33, 3.17it/s] 17%|█▋ | 64148/371472 [5:06:15<25:21:47, 3.37it/s] 17%|█▋ | 64149/371472 [5:06:16<25:08:44, 3.39it/s] 17%|█▋ | 64150/371472 [5:06:16<24:31:45, 3.48it/s] 17%|█▋ | 64151/371472 [5:06:16<24:43:30, 3.45it/s] 17%|█▋ | 64152/371472 [5:06:16<24:32:05, 3.48it/s] 17%|█▋ | 64153/371472 [5:06:17<24:23:49, 3.50it/s] 17%|█▋ | 64154/371472 [5:06:17<23:14:17, 3.67it/s] 17%|█▋ | 64155/371472 [5:06:17<27:49:07, 3.07it/s] 17%|█▋ | 64156/371472 [5:06:18<26:09:37, 3.26it/s] 17%|█▋ | 64157/371472 [5:06:18<25:18:08, 3.37it/s] 17%|█▋ | 64158/371472 [5:06:18<24:00:46, 3.55it/s] 17%|█▋ | 64159/371472 [5:06:18<23:32:11, 3.63it/s] 17%|█▋ | 64160/371472 [5:06:19<23:00:36, 3.71it/s] {'loss': 4.0866, 'learning_rate': 8.449546424184964e-07, 'epoch': 2.76} + 17%|█▋ | 64160/371472 [5:06:19<23:00:36, 3.71it/s] 17%|█▋ | 64161/371472 [5:06:19<23:40:31, 3.61it/s] 17%|█▋ | 64162/371472 [5:06:19<23:27:49, 3.64it/s] 17%|█▋ | 64163/371472 [5:06:20<23:38:09, 3.61it/s] 17%|█▋ | 64164/371472 [5:06:20<23:49:23, 3.58it/s] 17%|█▋ | 64165/371472 [5:06:20<23:27:26, 3.64it/s] 17%|█▋ | 64166/371472 [5:06:20<24:06:30, 3.54it/s] 17%|█▋ | 64167/371472 [5:06:21<23:33:02, 3.62it/s] 17%|█▋ | 64168/371472 [5:06:21<24:19:41, 3.51it/s] 17%|█▋ | 64169/371472 [5:06:21<23:50:44, 3.58it/s] 17%|█▋ | 64170/371472 [5:06:21<23:00:30, 3.71it/s] 17%|█▋ | 64171/371472 [5:06:22<23:54:00, 3.57it/s] 17%|█▋ | 64172/371472 [5:06:22<22:59:45, 3.71it/s] 17%|█▋ | 64173/371472 [5:06:22<25:00:37, 3.41it/s] 17%|█▋ | 64174/371472 [5:06:23<24:50:11, 3.44it/s] 17%|█▋ | 64175/371472 [5:06:23<23:50:54, 3.58it/s] 17%|█▋ | 64176/371472 [5:06:23<23:00:17, 3.71it/s] 17%|█▋ | 64177/371472 [5:06:23<22:23:48, 3.81it/s] 17%|█▋ | 64178/371472 [5:06:24<23:25:36, 3.64it/s] 17%|█▋ | 64179/371472 [5:06:24<22:41:26, 3.76it/s] 17%|█▋ | 64180/371472 [5:06:24<21:58:25, 3.88it/s] {'loss': 4.1659, 'learning_rate': 8.449061604430175e-07, 'epoch': 2.76} + 17%|█▋ | 64180/371472 [5:06:24<21:58:25, 3.88it/s] 17%|█▋ | 64181/371472 [5:06:24<22:02:02, 3.87it/s] 17%|█▋ | 64182/371472 [5:06:25<22:43:31, 3.76it/s] 17%|█▋ | 64183/371472 [5:06:25<22:01:39, 3.88it/s] 17%|█▋ | 64184/371472 [5:06:25<21:39:29, 3.94it/s] 17%|█▋ | 64185/371472 [5:06:25<21:45:09, 3.92it/s] 17%|█▋ | 64186/371472 [5:06:26<21:46:03, 3.92it/s] 17%|█▋ | 64187/371472 [5:06:26<21:36:05, 3.95it/s] 17%|█▋ | 64188/371472 [5:06:26<22:34:21, 3.78it/s] 17%|█▋ | 64189/371472 [5:06:27<23:32:27, 3.63it/s] 17%|█▋ | 64190/371472 [5:06:27<24:19:36, 3.51it/s] 17%|█▋ | 64191/371472 [5:06:27<27:13:40, 3.13it/s] 17%|█▋ | 64192/371472 [5:06:28<28:18:32, 3.02it/s] 17%|█▋ | 64193/371472 [5:06:28<28:12:27, 3.03it/s] 17%|█▋ | 64194/371472 [5:06:28<28:48:51, 2.96it/s] 17%|█▋ | 64195/371472 [5:06:29<28:37:07, 2.98it/s] 17%|█▋ | 64196/371472 [5:06:29<27:49:01, 3.07it/s] 17%|█▋ | 64197/371472 [5:06:29<26:15:34, 3.25it/s] 17%|█▋ | 64198/371472 [5:06:30<25:14:20, 3.38it/s] 17%|█▋ | 64199/371472 [5:06:30<24:40:09, 3.46it/s] 17%|█▋ | 64200/371472 [5:06:30<24:21:05, 3.51it/s] {'loss': 4.3124, 'learning_rate': 8.448576784675385e-07, 'epoch': 2.77} + 17%|█▋ | 64200/371472 [5:06:30<24:21:05, 3.51it/s] 17%|█▋ | 64201/371472 [5:06:30<23:34:32, 3.62it/s] 17%|█▋ | 64202/371472 [5:06:31<23:02:47, 3.70it/s] 17%|█▋ | 64203/371472 [5:06:31<23:19:08, 3.66it/s] 17%|█▋ | 64204/371472 [5:06:31<23:35:00, 3.62it/s] 17%|█▋ | 64205/371472 [5:06:31<23:39:57, 3.61it/s] 17%|█▋ | 64206/371472 [5:06:32<23:27:50, 3.64it/s] 17%|█▋ | 64207/371472 [5:06:32<23:46:16, 3.59it/s] 17%|█▋ | 64208/371472 [5:06:32<24:06:54, 3.54it/s] 17%|█▋ | 64209/371472 [5:06:33<24:24:42, 3.50it/s] 17%|█▋ | 64210/371472 [5:06:33<23:58:17, 3.56it/s] 17%|█▋ | 64211/371472 [5:06:33<23:12:05, 3.68it/s] 17%|█▋ | 64212/371472 [5:06:33<22:53:32, 3.73it/s] 17%|█▋ | 64213/371472 [5:06:34<23:23:49, 3.65it/s] 17%|█▋ | 64214/371472 [5:06:34<24:06:11, 3.54it/s] 17%|█▋ | 64215/371472 [5:06:34<23:14:43, 3.67it/s] 17%|█▋ | 64216/371472 [5:06:34<22:52:59, 3.73it/s] 17%|█▋ | 64217/371472 [5:06:35<23:21:29, 3.65it/s] 17%|█▋ | 64218/371472 [5:06:35<23:25:36, 3.64it/s] 17%|█▋ | 64219/371472 [5:06:35<24:15:13, 3.52it/s] 17%|█▋ | 64220/371472 [5:06:36<23:38:01, 3.61it/s] {'loss': 4.1531, 'learning_rate': 8.448091964920597e-07, 'epoch': 2.77} + 17%|█▋ | 64220/371472 [5:06:36<23:38:01, 3.61it/s] 17%|█▋ | 64221/371472 [5:06:36<23:29:12, 3.63it/s] 17%|█▋ | 64222/371472 [5:06:36<22:44:02, 3.75it/s] 17%|█▋ | 64223/371472 [5:06:36<22:37:31, 3.77it/s] 17%|█▋ | 64224/371472 [5:06:37<22:27:14, 3.80it/s] 17%|█▋ | 64225/371472 [5:06:37<22:51:14, 3.73it/s] 17%|█▋ | 64226/371472 [5:06:37<23:25:45, 3.64it/s] 17%|█▋ | 64227/371472 [5:06:37<23:19:14, 3.66it/s] 17%|█▋ | 64228/371472 [5:06:38<23:25:26, 3.64it/s] 17%|█▋ | 64229/371472 [5:06:38<23:24:46, 3.65it/s] 17%|█▋ | 64230/371472 [5:06:38<23:58:46, 3.56it/s] 17%|█▋ | 64231/371472 [5:06:39<25:23:08, 3.36it/s] 17%|█▋ | 64232/371472 [5:06:39<26:48:58, 3.18it/s] 17%|█▋ | 64233/371472 [5:06:39<25:57:11, 3.29it/s] 17%|█▋ | 64234/371472 [5:06:40<24:35:57, 3.47it/s] 17%|█▋ | 64235/371472 [5:06:40<23:36:28, 3.62it/s] 17%|█▋ | 64236/371472 [5:06:40<23:25:13, 3.64it/s] 17%|█▋ | 64237/371472 [5:06:40<25:09:55, 3.39it/s] 17%|█▋ | 64238/371472 [5:06:41<26:26:18, 3.23it/s] 17%|█▋ | 64239/371472 [5:06:41<24:42:34, 3.45it/s] 17%|█▋ | 64240/371472 [5:06:41<23:42:04, 3.60it/s] {'loss': 4.2296, 'learning_rate': 8.447607145165809e-07, 'epoch': 2.77} + 17%|█▋ | 64240/371472 [5:06:41<23:42:04, 3.60it/s] 17%|█▋ | 64241/371472 [5:06:41<23:51:28, 3.58it/s] 17%|█▋ | 64242/371472 [5:06:42<24:05:22, 3.54it/s] 17%|█▋ | 64243/371472 [5:06:42<24:24:46, 3.50it/s] 17%|█▋ | 64244/371472 [5:06:42<25:46:34, 3.31it/s] 17%|█▋ | 64245/371472 [5:06:43<26:48:21, 3.18it/s] 17%|█▋ | 64246/371472 [5:06:43<25:31:58, 3.34it/s] 17%|█▋ | 64247/371472 [5:06:43<25:29:54, 3.35it/s] 17%|█▋ | 64248/371472 [5:06:44<24:44:06, 3.45it/s] 17%|█▋ | 64249/371472 [5:06:44<24:13:11, 3.52it/s] 17%|█▋ | 64250/371472 [5:06:44<23:25:47, 3.64it/s] 17%|█▋ | 64251/371472 [5:06:44<23:03:20, 3.70it/s] 17%|█▋ | 64252/371472 [5:06:45<24:36:52, 3.47it/s] 17%|█▋ | 64253/371472 [5:06:45<23:35:41, 3.62it/s] 17%|█▋ | 64254/371472 [5:06:45<25:16:52, 3.38it/s] 17%|█▋ | 64255/371472 [5:06:46<24:29:31, 3.48it/s] 17%|█▋ | 64256/371472 [5:06:46<25:18:09, 3.37it/s] 17%|█▋ | 64257/371472 [5:06:46<24:35:43, 3.47it/s] 17%|█▋ | 64258/371472 [5:06:46<24:14:58, 3.52it/s] 17%|█▋ | 64259/371472 [5:06:47<23:59:01, 3.56it/s] 17%|█▋ | 64260/371472 [5:06:47<23:23:47, 3.65it/s] {'loss': 4.1856, 'learning_rate': 8.447122325411018e-07, 'epoch': 2.77} + 17%|█▋ | 64260/371472 [5:06:47<23:23:47, 3.65it/s] 17%|█▋ | 64261/371472 [5:06:47<23:28:32, 3.64it/s] 17%|█▋ | 64262/371472 [5:06:48<25:48:02, 3.31it/s] 17%|█▋ | 64263/371472 [5:06:48<25:20:47, 3.37it/s] 17%|█▋ | 64264/371472 [5:06:48<25:48:12, 3.31it/s] 17%|█▋ | 64265/371472 [5:06:49<26:22:59, 3.23it/s] 17%|█▋ | 64266/371472 [5:06:49<26:15:14, 3.25it/s] 17%|█▋ | 64267/371472 [5:06:49<25:40:27, 3.32it/s] 17%|█▋ | 64268/371472 [5:06:49<24:41:34, 3.46it/s] 17%|█▋ | 64269/371472 [5:06:50<24:19:24, 3.51it/s] 17%|█▋ | 64270/371472 [5:06:50<23:45:23, 3.59it/s] 17%|█▋ | 64271/371472 [5:06:50<23:32:15, 3.63it/s] 17%|█▋ | 64272/371472 [5:06:50<23:22:31, 3.65it/s] 17%|█▋ | 64273/371472 [5:06:51<23:26:24, 3.64it/s] 17%|█▋ | 64274/371472 [5:06:51<23:50:56, 3.58it/s] 17%|█▋ | 64275/371472 [5:06:51<23:50:31, 3.58it/s] 17%|█▋ | 64276/371472 [5:06:52<23:48:32, 3.58it/s] 17%|█▋ | 64277/371472 [5:06:52<24:46:59, 3.44it/s] 17%|█▋ | 64278/371472 [5:06:52<23:46:37, 3.59it/s] 17%|█▋ | 64279/371472 [5:06:52<24:01:44, 3.55it/s] 17%|█▋ | 64280/371472 [5:06:53<23:12:58, 3.68it/s] {'loss': 4.2284, 'learning_rate': 8.446637505656229e-07, 'epoch': 2.77} + 17%|█▋ | 64280/371472 [5:06:53<23:12:58, 3.68it/s] 17%|█▋ | 64281/371472 [5:06:53<24:28:12, 3.49it/s] 17%|█▋ | 64282/371472 [5:06:53<24:08:19, 3.53it/s] 17%|█▋ | 64283/371472 [5:06:54<23:54:55, 3.57it/s] 17%|█▋ | 64284/371472 [5:06:54<23:37:19, 3.61it/s] 17%|█▋ | 64285/371472 [5:06:54<22:55:28, 3.72it/s] 17%|█▋ | 64286/371472 [5:06:54<23:26:21, 3.64it/s] 17%|█▋ | 64287/371472 [5:06:55<22:41:21, 3.76it/s] 17%|█▋ | 64288/371472 [5:06:55<24:13:55, 3.52it/s] 17%|█▋ | 64289/371472 [5:06:55<24:24:55, 3.49it/s] 17%|█▋ | 64290/371472 [5:06:55<24:17:50, 3.51it/s] 17%|█▋ | 64291/371472 [5:06:56<24:12:12, 3.53it/s] 17%|█▋ | 64292/371472 [5:06:56<24:13:58, 3.52it/s] 17%|█▋ | 64293/371472 [5:06:56<24:06:18, 3.54it/s] 17%|█▋ | 64294/371472 [5:06:57<24:40:54, 3.46it/s] 17%|█▋ | 64295/371472 [5:06:57<24:17:27, 3.51it/s] 17%|█▋ | 64296/371472 [5:06:57<23:35:37, 3.62it/s] 17%|█▋ | 64297/371472 [5:06:57<23:21:21, 3.65it/s] 17%|█▋ | 64298/371472 [5:06:58<22:49:38, 3.74it/s] 17%|█▋ | 64299/371472 [5:06:58<22:25:19, 3.81it/s] 17%|█▋ | 64300/371472 [5:06:58<23:47:04, 3.59it/s] {'loss': 4.2686, 'learning_rate': 8.446152685901441e-07, 'epoch': 2.77} + 17%|█▋ | 64300/371472 [5:06:58<23:47:04, 3.59it/s] 17%|█▋ | 64301/371472 [5:06:59<24:00:34, 3.55it/s] 17%|█▋ | 64302/371472 [5:06:59<24:15:42, 3.52it/s] 17%|█▋ | 64303/371472 [5:06:59<23:20:13, 3.66it/s] 17%|█▋ | 64304/371472 [5:06:59<23:02:45, 3.70it/s] 17%|█▋ | 64305/371472 [5:07:00<22:50:40, 3.73it/s] 17%|█▋ | 64306/371472 [5:07:00<22:30:19, 3.79it/s] 17%|█▋ | 64307/371472 [5:07:00<22:44:19, 3.75it/s] 17%|█▋ | 64308/371472 [5:07:00<23:37:27, 3.61it/s] 17%|█▋ | 64309/371472 [5:07:01<24:55:11, 3.42it/s] 17%|█▋ | 64310/371472 [5:07:01<25:00:15, 3.41it/s] 17%|█▋ | 64311/371472 [5:07:01<23:40:51, 3.60it/s] 17%|█▋ | 64312/371472 [5:07:02<23:28:28, 3.63it/s] 17%|█▋ | 64313/371472 [5:07:02<22:51:04, 3.73it/s] 17%|█▋ | 64314/371472 [5:07:02<22:22:28, 3.81it/s] 17%|█▋ | 64315/371472 [5:07:02<22:40:58, 3.76it/s] 17%|█▋ | 64316/371472 [5:07:03<24:13:11, 3.52it/s] 17%|█▋ | 64317/371472 [5:07:03<24:41:52, 3.45it/s] 17%|█▋ | 64318/371472 [5:07:03<23:54:43, 3.57it/s] 17%|█▋ | 64319/371472 [5:07:04<24:13:47, 3.52it/s] 17%|█▋ | 64320/371472 [5:07:04<23:24:46, 3.64it/s] {'loss': 4.1992, 'learning_rate': 8.445667866146653e-07, 'epoch': 2.77} + 17%|█▋ | 64320/371472 [5:07:04<23:24:46, 3.64it/s] 17%|█▋ | 64321/371472 [5:07:04<23:11:33, 3.68it/s] 17%|█▋ | 64322/371472 [5:07:04<23:58:57, 3.56it/s] 17%|█▋ | 64323/371472 [5:07:05<23:43:35, 3.60it/s] 17%|█▋ | 64324/371472 [5:07:05<23:28:31, 3.63it/s] 17%|█▋ | 64325/371472 [5:07:05<22:42:55, 3.76it/s] 17%|█▋ | 64326/371472 [5:07:05<22:40:39, 3.76it/s] 17%|█▋ | 64327/371472 [5:07:06<22:29:32, 3.79it/s] 17%|█▋ | 64328/371472 [5:07:06<23:09:10, 3.68it/s] 17%|█▋ | 64329/371472 [5:07:06<24:42:44, 3.45it/s] 17%|█▋ | 64330/371472 [5:07:07<23:42:36, 3.60it/s] 17%|█▋ | 64331/371472 [5:07:07<23:51:59, 3.57it/s] 17%|█▋ | 64332/371472 [5:07:07<23:00:43, 3.71it/s] 17%|█▋ | 64333/371472 [5:07:07<22:46:13, 3.75it/s] 17%|█▋ | 64334/371472 [5:07:08<22:41:59, 3.76it/s] 17%|█▋ | 64335/371472 [5:07:08<25:57:24, 3.29it/s] 17%|█▋ | 64336/371472 [5:07:08<24:32:57, 3.48it/s] 17%|█▋ | 64337/371472 [5:07:09<26:32:17, 3.21it/s] 17%|█▋ | 64338/371472 [5:07:09<27:05:33, 3.15it/s] 17%|█▋ | 64339/371472 [5:07:09<25:29:09, 3.35it/s] 17%|█▋ | 64340/371472 [5:07:10<27:19:37, 3.12it/s] {'loss': 4.2428, 'learning_rate': 8.445183046391863e-07, 'epoch': 2.77} + 17%|█▋ | 64340/371472 [5:07:10<27:19:37, 3.12it/s] 17%|█▋ | 64341/371472 [5:07:10<26:04:00, 3.27it/s] 17%|█▋ | 64342/371472 [5:07:10<24:50:51, 3.43it/s] 17%|█▋ | 64343/371472 [5:07:10<23:43:23, 3.60it/s] 17%|█▋ | 64344/371472 [5:07:11<23:02:07, 3.70it/s] 17%|█▋ | 64345/371472 [5:07:11<23:12:41, 3.68it/s] 17%|█▋ | 64346/371472 [5:07:11<22:38:46, 3.77it/s] 17%|█▋ | 64347/371472 [5:07:11<22:04:34, 3.86it/s] 17%|█▋ | 64348/371472 [5:07:12<22:24:54, 3.81it/s] 17%|█▋ | 64349/371472 [5:07:12<23:21:05, 3.65it/s] 17%|█▋ | 64350/371472 [5:07:12<23:59:07, 3.56it/s] 17%|█▋ | 64351/371472 [5:07:13<24:10:07, 3.53it/s] 17%|█▋ | 64352/371472 [5:07:13<24:00:56, 3.55it/s] 17%|█▋ | 64353/371472 [5:07:13<23:26:01, 3.64it/s] 17%|█▋ | 64354/371472 [5:07:13<23:36:55, 3.61it/s] 17%|█▋ | 64355/371472 [5:07:14<24:04:43, 3.54it/s] 17%|█▋ | 64356/371472 [5:07:14<23:54:52, 3.57it/s] 17%|█▋ | 64357/371472 [5:07:14<24:48:06, 3.44it/s] 17%|█▋ | 64358/371472 [5:07:15<27:33:55, 3.09it/s] 17%|█▋ | 64359/371472 [5:07:15<25:48:06, 3.31it/s] 17%|█▋ | 64360/371472 [5:07:15<24:58:57, 3.41it/s] {'loss': 4.3385, 'learning_rate': 8.444698226637074e-07, 'epoch': 2.77} + 17%|█▋ | 64360/371472 [5:07:15<24:58:57, 3.41it/s] 17%|█▋ | 64361/371472 [5:07:15<23:32:05, 3.62it/s] 17%|█▋ | 64362/371472 [5:07:16<22:37:57, 3.77it/s] 17%|█▋ | 64363/371472 [5:07:16<24:39:39, 3.46it/s] 17%|█▋ | 64364/371472 [5:07:16<24:05:57, 3.54it/s] 17%|█▋ | 64365/371472 [5:07:16<23:17:35, 3.66it/s] 17%|█▋ | 64366/371472 [5:07:17<23:17:00, 3.66it/s] 17%|█▋ | 64367/371472 [5:07:17<22:43:35, 3.75it/s] 17%|█▋ | 64368/371472 [5:07:17<21:50:28, 3.91it/s] 17%|█▋ | 64369/371472 [5:07:18<22:28:47, 3.79it/s] 17%|█▋ | 64370/371472 [5:07:18<22:25:50, 3.80it/s] 17%|█▋ | 64371/371472 [5:07:18<24:52:20, 3.43it/s] 17%|█▋ | 64372/371472 [5:07:18<26:42:12, 3.19it/s] 17%|█▋ | 64373/371472 [5:07:19<26:00:09, 3.28it/s] 17%|█▋ | 64374/371472 [5:07:19<25:46:32, 3.31it/s] 17%|█▋ | 64375/371472 [5:07:19<27:19:12, 3.12it/s] 17%|█▋ | 64376/371472 [5:07:20<25:51:14, 3.30it/s] 17%|█▋ | 64377/371472 [5:07:20<24:56:23, 3.42it/s] 17%|█▋ | 64378/371472 [5:07:20<27:29:33, 3.10it/s] 17%|█▋ | 64379/371472 [5:07:21<26:10:19, 3.26it/s] 17%|█▋ | 64380/371472 [5:07:21<24:39:25, 3.46it/s] {'loss': 4.0539, 'learning_rate': 8.444213406882286e-07, 'epoch': 2.77} + 17%|█▋ | 64380/371472 [5:07:21<24:39:25, 3.46it/s] 17%|█▋ | 64381/371472 [5:07:21<25:54:32, 3.29it/s] 17%|█▋ | 64382/371472 [5:07:22<26:06:18, 3.27it/s] 17%|█▋ | 64383/371472 [5:07:22<25:40:30, 3.32it/s] 17%|█▋ | 64384/371472 [5:07:22<25:46:52, 3.31it/s] 17%|█▋ | 64385/371472 [5:07:22<25:51:03, 3.30it/s] 17%|█▋ | 64386/371472 [5:07:23<25:22:27, 3.36it/s] 17%|█▋ | 64387/371472 [5:07:23<24:57:44, 3.42it/s] 17%|█▋ | 64388/371472 [5:07:23<24:35:54, 3.47it/s] 17%|█▋ | 64389/371472 [5:07:24<25:14:03, 3.38it/s] 17%|█▋ | 64390/371472 [5:07:24<24:27:07, 3.49it/s] 17%|█▋ | 64391/371472 [5:07:24<23:37:08, 3.61it/s] 17%|█▋ | 64392/371472 [5:07:24<23:56:56, 3.56it/s] 17%|█▋ | 64393/371472 [5:07:25<24:06:05, 3.54it/s] 17%|█▋ | 64394/371472 [5:07:25<23:37:11, 3.61it/s] 17%|█▋ | 64395/371472 [5:07:25<24:08:00, 3.53it/s] 17%|█▋ | 64396/371472 [5:07:26<24:05:26, 3.54it/s] 17%|█▋ | 64397/371472 [5:07:26<24:07:56, 3.53it/s] 17%|█▋ | 64398/371472 [5:07:26<24:02:59, 3.55it/s] 17%|█▋ | 64399/371472 [5:07:26<24:04:08, 3.54it/s] 17%|█▋ | 64400/371472 [5:07:27<23:02:40, 3.70it/s] {'loss': 4.073, 'learning_rate': 8.443728587127497e-07, 'epoch': 2.77} + 17%|█▋ | 64400/371472 [5:07:27<23:02:40, 3.70it/s] 17%|█▋ | 64401/371472 [5:07:27<24:15:50, 3.52it/s] 17%|█▋ | 64402/371472 [5:07:27<25:03:08, 3.40it/s] 17%|█▋ | 64403/371472 [5:07:28<24:32:40, 3.48it/s] 17%|█▋ | 64404/371472 [5:07:28<23:31:07, 3.63it/s] 17%|█▋ | 64405/371472 [5:07:28<23:06:08, 3.69it/s] 17%|█▋ | 64406/371472 [5:07:28<24:40:28, 3.46it/s] 17%|█▋ | 64407/371472 [5:07:29<23:58:30, 3.56it/s] 17%|█▋ | 64408/371472 [5:07:29<25:39:15, 3.32it/s] 17%|█▋ | 64409/371472 [5:07:29<26:14:33, 3.25it/s] 17%|█▋ | 64410/371472 [5:07:30<26:45:47, 3.19it/s] 17%|█▋ | 64411/371472 [5:07:30<26:04:02, 3.27it/s] 17%|█▋ | 64412/371472 [5:07:30<25:24:51, 3.36it/s] 17%|█▋ | 64413/371472 [5:07:30<25:26:22, 3.35it/s] 17%|█▋ | 64414/371472 [5:07:31<25:24:27, 3.36it/s] 17%|█▋ | 64415/371472 [5:07:31<24:38:41, 3.46it/s] 17%|█▋ | 64416/371472 [5:07:31<24:01:29, 3.55it/s] 17%|█▋ | 64417/371472 [5:07:32<24:44:01, 3.45it/s] 17%|█▋ | 64418/371472 [5:07:32<23:37:57, 3.61it/s] 17%|█▋ | 64419/371472 [5:07:32<22:45:22, 3.75it/s] 17%|█▋ | 64420/371472 [5:07:32<24:35:35, 3.47it/s] {'loss': 4.1707, 'learning_rate': 8.443243767372707e-07, 'epoch': 2.77} + 17%|█▋ | 64420/371472 [5:07:32<24:35:35, 3.47it/s] 17%|█▋ | 64421/371472 [5:07:33<25:41:02, 3.32it/s] 17%|█▋ | 64422/371472 [5:07:33<24:28:24, 3.49it/s] 17%|█▋ | 64423/371472 [5:07:33<23:37:23, 3.61it/s] 17%|█▋ | 64424/371472 [5:07:34<22:59:52, 3.71it/s] 17%|█▋ | 64425/371472 [5:07:34<22:38:05, 3.77it/s] 17%|█▋ | 64426/371472 [5:07:34<24:58:35, 3.41it/s] 17%|█▋ | 64427/371472 [5:07:34<23:44:28, 3.59it/s] 17%|█▋ | 64428/371472 [5:07:35<23:53:05, 3.57it/s] 17%|█▋ | 64429/371472 [5:07:35<22:47:50, 3.74it/s] 17%|█▋ | 64430/371472 [5:07:35<22:27:58, 3.80it/s] 17%|█▋ | 64431/371472 [5:07:36<27:19:16, 3.12it/s] 17%|█▋ | 64432/371472 [5:07:36<28:08:40, 3.03it/s] 17%|█▋ | 64433/371472 [5:07:36<25:52:33, 3.30it/s] 17%|█▋ | 64434/371472 [5:07:36<24:25:54, 3.49it/s] 17%|█▋ | 64435/371472 [5:07:37<24:12:37, 3.52it/s] 17%|█▋ | 64436/371472 [5:07:37<25:29:18, 3.35it/s] 17%|█▋ | 64437/371472 [5:07:37<24:41:03, 3.46it/s] 17%|█▋ | 64438/371472 [5:07:38<24:33:26, 3.47it/s] 17%|█▋ | 64439/371472 [5:07:38<23:21:22, 3.65it/s] 17%|█▋ | 64440/371472 [5:07:38<23:33:08, 3.62it/s] {'loss': 4.1407, 'learning_rate': 8.442758947617918e-07, 'epoch': 2.78} + 17%|█▋ | 64440/371472 [5:07:38<23:33:08, 3.62it/s] 17%|█▋ | 64441/371472 [5:07:38<23:40:08, 3.60it/s] 17%|█▋ | 64442/371472 [5:07:39<23:06:57, 3.69it/s] 17%|█▋ | 64443/371472 [5:07:39<23:40:23, 3.60it/s] 17%|█▋ | 64444/371472 [5:07:39<23:46:11, 3.59it/s] 17%|█▋ | 64445/371472 [5:07:40<23:07:07, 3.69it/s] 17%|█▋ | 64446/371472 [5:07:40<23:04:21, 3.70it/s] 17%|█▋ | 64447/371472 [5:07:40<22:44:47, 3.75it/s] 17%|█▋ | 64448/371472 [5:07:40<22:09:42, 3.85it/s] 17%|█▋ | 64449/371472 [5:07:41<22:02:35, 3.87it/s] 17%|█▋ | 64450/371472 [5:07:41<23:48:39, 3.58it/s] 17%|█▋ | 64451/371472 [5:07:41<24:44:15, 3.45it/s] 17%|█▋ | 64452/371472 [5:07:42<26:45:42, 3.19it/s] 17%|█▋ | 64453/371472 [5:07:42<25:50:01, 3.30it/s] 17%|█▋ | 64454/371472 [5:07:42<24:31:46, 3.48it/s] 17%|█▋ | 64455/371472 [5:07:42<25:18:01, 3.37it/s] 17%|█▋ | 64456/371472 [5:07:43<28:53:56, 2.95it/s] 17%|█▋ | 64457/371472 [5:07:43<32:36:23, 2.62it/s] 17%|█▋ | 64458/371472 [5:07:44<29:16:05, 2.91it/s] 17%|█▋ | 64459/371472 [5:07:44<27:13:49, 3.13it/s] 17%|█▋ | 64460/371472 [5:07:44<26:01:37, 3.28it/s] {'loss': 4.2477, 'learning_rate': 8.442274127863129e-07, 'epoch': 2.78} + 17%|█▋ | 64460/371472 [5:07:44<26:01:37, 3.28it/s] 17%|█▋ | 64461/371472 [5:07:44<24:56:46, 3.42it/s] 17%|█▋ | 64462/371472 [5:07:45<23:59:04, 3.56it/s] 17%|█▋ | 64463/371472 [5:07:45<22:51:44, 3.73it/s] 17%|█▋ | 64464/371472 [5:07:45<22:12:33, 3.84it/s] 17%|█▋ | 64465/371472 [5:07:45<23:15:32, 3.67it/s] 17%|█▋ | 64466/371472 [5:07:46<23:03:54, 3.70it/s] 17%|█▋ | 64467/371472 [5:07:46<26:19:05, 3.24it/s] 17%|█▋ | 64468/371472 [5:07:46<24:39:02, 3.46it/s] 17%|█▋ | 64469/371472 [5:07:47<24:04:00, 3.54it/s] 17%|█▋ | 64470/371472 [5:07:47<27:24:32, 3.11it/s] 17%|█▋ | 64471/371472 [5:07:47<26:15:23, 3.25it/s] 17%|█▋ | 64472/371472 [5:07:48<27:23:18, 3.11it/s] 17%|█▋ | 64473/371472 [5:07:48<26:51:10, 3.18it/s] 17%|█▋ | 64474/371472 [5:07:48<25:21:01, 3.36it/s] 17%|█▋ | 64475/371472 [5:07:48<24:11:10, 3.53it/s] 17%|█▋ | 64476/371472 [5:07:49<23:41:17, 3.60it/s] 17%|█▋ | 64477/371472 [5:07:49<22:37:57, 3.77it/s] 17%|█▋ | 64478/371472 [5:07:49<23:32:44, 3.62it/s] 17%|█▋ | 64479/371472 [5:07:50<24:20:24, 3.50it/s] 17%|█▋ | 64480/371472 [5:07:50<23:36:30, 3.61it/s] {'loss': 4.3696, 'learning_rate': 8.441789308108341e-07, 'epoch': 2.78} + 17%|█▋ | 64480/371472 [5:07:50<23:36:30, 3.61it/s] 17%|█▋ | 64481/371472 [5:07:50<23:12:31, 3.67it/s] 17%|█▋ | 64482/371472 [5:07:50<23:10:53, 3.68it/s] 17%|█▋ | 64483/371472 [5:07:51<23:42:39, 3.60it/s] 17%|█▋ | 64484/371472 [5:07:51<23:47:16, 3.58it/s] 17%|█▋ | 64485/371472 [5:07:51<23:08:42, 3.68it/s] 17%|█▋ | 64486/371472 [5:07:51<23:56:23, 3.56it/s] 17%|█▋ | 64487/371472 [5:07:52<23:19:51, 3.65it/s] 17%|█▋ | 64488/371472 [5:07:52<25:50:04, 3.30it/s] 17%|█▋ | 64489/371472 [5:07:52<24:07:59, 3.53it/s] 17%|█▋ | 64490/371472 [5:07:53<23:33:59, 3.62it/s] 17%|█▋ | 64491/371472 [5:07:53<24:17:08, 3.51it/s] 17%|█▋ | 64492/371472 [5:07:53<23:50:47, 3.58it/s] 17%|█▋ | 64493/371472 [5:07:54<25:46:28, 3.31it/s] 17%|█▋ | 64494/371472 [5:07:54<25:56:48, 3.29it/s] 17%|█▋ | 64495/371472 [5:07:54<25:09:04, 3.39it/s] 17%|█▋ | 64496/371472 [5:07:54<25:17:13, 3.37it/s] 17%|█▋ | 64497/371472 [5:07:55<24:30:40, 3.48it/s] 17%|█▋ | 64498/371472 [5:07:55<23:56:44, 3.56it/s] 17%|█▋ | 64499/371472 [5:07:55<24:38:47, 3.46it/s] 17%|█▋ | 64500/371472 [5:07:56<24:30:08, 3.48it/s] {'loss': 4.2808, 'learning_rate': 8.441304488353552e-07, 'epoch': 2.78} + 17%|█▋ | 64500/371472 [5:07:56<24:30:08, 3.48it/s] 17%|█▋ | 64501/371472 [5:07:56<23:35:40, 3.61it/s] 17%|█▋ | 64502/371472 [5:07:56<23:38:20, 3.61it/s] 17%|█▋ | 64503/371472 [5:07:56<23:27:55, 3.63it/s] 17%|█▋ | 64504/371472 [5:07:57<24:09:12, 3.53it/s] 17%|█▋ | 64505/371472 [5:07:57<25:03:38, 3.40it/s] 17%|█▋ | 64506/371472 [5:07:57<24:05:21, 3.54it/s] 17%|█▋ | 64507/371472 [5:07:57<24:19:07, 3.51it/s] 17%|█▋ | 64508/371472 [5:07:58<24:04:43, 3.54it/s] 17%|█▋ | 64509/371472 [5:07:58<24:18:37, 3.51it/s] 17%|█▋ | 64510/371472 [5:07:58<24:22:52, 3.50it/s] 17%|█▋ | 64511/371472 [5:07:59<24:06:28, 3.54it/s] 17%|█▋ | 64512/371472 [5:07:59<24:28:12, 3.48it/s] 17%|█▋ | 64513/371472 [5:07:59<23:30:14, 3.63it/s] 17%|█▋ | 64514/371472 [5:07:59<23:34:45, 3.62it/s] 17%|█▋ | 64515/371472 [5:08:00<25:20:47, 3.36it/s] 17%|█▋ | 64516/371472 [5:08:00<23:55:24, 3.56it/s] 17%|█▋ | 64517/371472 [5:08:00<23:59:48, 3.55it/s] 17%|█▋ | 64518/371472 [5:08:01<23:59:23, 3.55it/s] 17%|█▋ | 64519/371472 [5:08:01<23:06:50, 3.69it/s] 17%|█▋ | 64520/371472 [5:08:01<22:25:05, 3.80it/s] {'loss': 4.248, 'learning_rate': 8.440819668598762e-07, 'epoch': 2.78} + 17%|█▋ | 64520/371472 [5:08:01<22:25:05, 3.80it/s] 17%|█▋ | 64521/371472 [5:08:01<23:57:44, 3.56it/s] 17%|█▋ | 64522/371472 [5:08:02<24:44:38, 3.45it/s] 17%|█▋ | 64523/371472 [5:08:02<24:54:45, 3.42it/s] 17%|█▋ | 64524/371472 [5:08:02<24:15:15, 3.52it/s] 17%|█▋ | 64525/371472 [5:08:03<23:37:02, 3.61it/s] 17%|█▋ | 64526/371472 [5:08:03<22:32:49, 3.78it/s] 17%|█▋ | 64527/371472 [5:08:03<23:44:20, 3.59it/s] 17%|█▋ | 64528/371472 [5:08:03<23:22:31, 3.65it/s] 17%|█▋ | 64529/371472 [5:08:04<22:56:09, 3.72it/s] 17%|█▋ | 64530/371472 [5:08:04<22:15:42, 3.83it/s] 17%|█▋ | 64531/371472 [5:08:04<22:43:31, 3.75it/s] 17%|█▋ | 64532/371472 [5:08:04<22:27:16, 3.80it/s] 17%|█▋ | 64533/371472 [5:08:05<22:05:36, 3.86it/s] 17%|█▋ | 64534/371472 [5:08:05<22:26:40, 3.80it/s] 17%|█▋ | 64535/371472 [5:08:05<23:39:19, 3.60it/s] 17%|█▋ | 64536/371472 [5:08:05<22:47:03, 3.74it/s] 17%|█▋ | 64537/371472 [5:08:06<23:06:00, 3.69it/s] 17%|█▋ | 64538/371472 [5:08:06<22:58:24, 3.71it/s] 17%|█▋ | 64539/371472 [5:08:06<23:36:50, 3.61it/s] 17%|█▋ | 64540/371472 [5:08:07<23:42:28, 3.60it/s] {'loss': 4.3766, 'learning_rate': 8.440334848843974e-07, 'epoch': 2.78} + 17%|█▋ | 64540/371472 [5:08:07<23:42:28, 3.60it/s] 17%|█▋ | 64541/371472 [5:08:07<22:59:19, 3.71it/s] 17%|█▋ | 64542/371472 [5:08:07<23:19:10, 3.66it/s] 17%|█▋ | 64543/371472 [5:08:07<23:12:13, 3.67it/s] 17%|█▋ | 64544/371472 [5:08:08<23:24:27, 3.64it/s] 17%|█▋ | 64545/371472 [5:08:08<26:20:36, 3.24it/s] 17%|█▋ | 64546/371472 [5:08:08<25:24:35, 3.36it/s] 17%|█▋ | 64547/371472 [5:08:09<25:15:55, 3.37it/s] 17%|█▋ | 64548/371472 [5:08:09<24:52:54, 3.43it/s] 17%|█▋ | 64549/371472 [5:08:09<26:17:19, 3.24it/s] 17%|█▋ | 64550/371472 [5:08:10<28:06:14, 3.03it/s] 17%|█▋ | 64551/371472 [5:08:10<27:16:36, 3.13it/s] 17%|█▋ | 64552/371472 [5:08:10<26:18:44, 3.24it/s] 17%|█▋ | 64553/371472 [5:08:11<33:40:07, 2.53it/s] 17%|█▋ | 64554/371472 [5:08:11<29:57:50, 2.85it/s] 17%|█▋ | 64555/371472 [5:08:11<27:44:23, 3.07it/s] 17%|█▋ | 64556/371472 [5:08:12<25:13:00, 3.38it/s] 17%|█▋ | 64557/371472 [5:08:12<26:22:16, 3.23it/s] 17%|█▋ | 64558/371472 [5:08:12<26:36:20, 3.20it/s] 17%|█▋ | 64559/371472 [5:08:12<25:41:21, 3.32it/s] 17%|█▋ | 64560/371472 [5:08:13<25:15:57, 3.37it/s] {'loss': 4.157, 'learning_rate': 8.439850029089184e-07, 'epoch': 2.78} + 17%|█▋ | 64560/371472 [5:08:13<25:15:57, 3.37it/s] 17%|█▋ | 64561/371472 [5:08:13<24:39:33, 3.46it/s] 17%|█▋ | 64562/371472 [5:08:13<25:39:10, 3.32it/s] 17%|█▋ | 64563/371472 [5:08:14<25:30:56, 3.34it/s] 17%|█▋ | 64564/371472 [5:08:14<25:24:37, 3.35it/s] 17%|█▋ | 64565/371472 [5:08:14<24:34:06, 3.47it/s] 17%|█▋ | 64566/371472 [5:08:15<24:21:18, 3.50it/s] 17%|█▋ | 64567/371472 [5:08:15<26:36:50, 3.20it/s] 17%|█▋ | 64568/371472 [5:08:15<25:22:24, 3.36it/s] 17%|█▋ | 64569/371472 [5:08:15<25:08:20, 3.39it/s] 17%|█▋ | 64570/371472 [5:08:16<24:46:26, 3.44it/s] 17%|█▋ | 64571/371472 [5:08:16<24:50:50, 3.43it/s] 17%|█▋ | 64572/371472 [5:08:16<24:06:15, 3.54it/s] 17%|█▋ | 64573/371472 [5:08:17<24:13:05, 3.52it/s] 17%|█▋ | 64574/371472 [5:08:17<24:49:32, 3.43it/s] 17%|█▋ | 64575/371472 [5:08:17<26:06:08, 3.27it/s] 17%|█▋ | 64576/371472 [5:08:18<27:26:20, 3.11it/s] 17%|█▋ | 64577/371472 [5:08:18<26:55:35, 3.17it/s] 17%|█▋ | 64578/371472 [5:08:18<25:21:45, 3.36it/s] 17%|█▋ | 64579/371472 [5:08:18<25:21:39, 3.36it/s] 17%|█▋ | 64580/371472 [5:08:19<24:53:34, 3.42it/s] {'loss': 4.2217, 'learning_rate': 8.439365209334396e-07, 'epoch': 2.78} + 17%|█▋ | 64580/371472 [5:08:19<24:53:34, 3.42it/s] 17%|█▋ | 64581/371472 [5:08:19<24:10:13, 3.53it/s] 17%|█▋ | 64582/371472 [5:08:19<23:17:41, 3.66it/s] 17%|█▋ | 64583/371472 [5:08:19<23:17:32, 3.66it/s] 17%|█▋ | 64584/371472 [5:08:20<23:39:36, 3.60it/s] 17%|█▋ | 64585/371472 [5:08:20<24:04:40, 3.54it/s] 17%|█▋ | 64586/371472 [5:08:21<28:27:13, 3.00it/s] 17%|█▋ | 64587/371472 [5:08:21<26:12:28, 3.25it/s] 17%|█▋ | 64588/371472 [5:08:21<25:22:23, 3.36it/s] 17%|█▋ | 64589/371472 [5:08:21<24:12:27, 3.52it/s] 17%|█▋ | 64590/371472 [5:08:22<23:37:23, 3.61it/s] 17%|█▋ | 64591/371472 [5:08:22<24:05:45, 3.54it/s] 17%|█▋ | 64592/371472 [5:08:22<25:19:40, 3.37it/s] 17%|█▋ | 64593/371472 [5:08:22<25:07:16, 3.39it/s] 17%|█▋ | 64594/371472 [5:08:23<24:24:55, 3.49it/s] 17%|█▋ | 64595/371472 [5:08:23<23:31:06, 3.62it/s] 17%|█▋ | 64596/371472 [5:08:23<23:41:21, 3.60it/s] 17%|█▋ | 64597/371472 [5:08:24<23:06:48, 3.69it/s] 17%|█▋ | 64598/371472 [5:08:24<23:59:11, 3.55it/s] 17%|█▋ | 64599/371472 [5:08:24<24:08:40, 3.53it/s] 17%|█▋ | 64600/371472 [5:08:24<24:30:02, 3.48it/s] {'loss': 4.1429, 'learning_rate': 8.438880389579607e-07, 'epoch': 2.78} + 17%|█▋ | 64600/371472 [5:08:24<24:30:02, 3.48it/s] 17%|█▋ | 64601/371472 [5:08:25<24:35:01, 3.47it/s] 17%|█▋ | 64602/371472 [5:08:25<24:33:05, 3.47it/s] 17%|█▋ | 64603/371472 [5:08:25<23:31:30, 3.62it/s] 17%|█▋ | 64604/371472 [5:08:26<23:43:08, 3.59it/s] 17%|█▋ | 64605/371472 [5:08:26<23:27:25, 3.63it/s] 17%|█▋ | 64606/371472 [5:08:26<23:28:42, 3.63it/s] 17%|█▋ | 64607/371472 [5:08:26<22:56:57, 3.71it/s] 17%|█▋ | 64608/371472 [5:08:27<22:36:25, 3.77it/s] 17%|█▋ | 64609/371472 [5:08:27<22:52:54, 3.73it/s] 17%|█▋ | 64610/371472 [5:08:27<22:47:49, 3.74it/s] 17%|█▋ | 64611/371472 [5:08:27<22:24:17, 3.80it/s] 17%|█▋ | 64612/371472 [5:08:28<23:06:15, 3.69it/s] 17%|█▋ | 64613/371472 [5:08:28<22:32:31, 3.78it/s] 17%|█▋ | 64614/371472 [5:08:28<22:57:05, 3.71it/s] 17%|█▋ | 64615/371472 [5:08:28<22:45:13, 3.75it/s] 17%|█▋ | 64616/371472 [5:08:29<22:05:25, 3.86it/s] 17%|█▋ | 64617/371472 [5:08:29<21:50:34, 3.90it/s] 17%|█▋ | 64618/371472 [5:08:29<21:57:59, 3.88it/s] 17%|█▋ | 64619/371472 [5:08:29<22:21:35, 3.81it/s] 17%|█▋ | 64620/371472 [5:08:30<22:56:52, 3.71it/s] {'loss': 4.1362, 'learning_rate': 8.438395569824819e-07, 'epoch': 2.78} + 17%|█▋ | 64620/371472 [5:08:30<22:56:52, 3.71it/s] 17%|█▋ | 64621/371472 [5:08:30<23:05:03, 3.69it/s] 17%|█▋ | 64622/371472 [5:08:30<25:44:27, 3.31it/s] 17%|█▋ | 64623/371472 [5:08:31<24:58:39, 3.41it/s] 17%|█▋ | 64624/371472 [5:08:31<26:07:03, 3.26it/s] 17%|█▋ | 64625/371472 [5:08:31<25:29:45, 3.34it/s] 17%|█▋ | 64626/371472 [5:08:32<24:33:55, 3.47it/s] 17%|█▋ | 64627/371472 [5:08:32<23:54:11, 3.57it/s] 17%|█▋ | 64628/371472 [5:08:32<23:15:36, 3.66it/s] 17%|█▋ | 64629/371472 [5:08:32<24:17:35, 3.51it/s] 17%|█▋ | 64630/371472 [5:08:33<24:44:45, 3.44it/s] 17%|█▋ | 64631/371472 [5:08:33<23:58:30, 3.56it/s] 17%|█▋ | 64632/371472 [5:08:33<23:17:45, 3.66it/s] 17%|█▋ | 64633/371472 [5:08:33<23:22:42, 3.65it/s] 17%|█▋ | 64634/371472 [5:08:34<23:12:25, 3.67it/s] 17%|█▋ | 64635/371472 [5:08:34<22:25:58, 3.80it/s] 17%|█▋ | 64636/371472 [5:08:34<22:17:34, 3.82it/s] 17%|█▋ | 64637/371472 [5:08:35<23:40:59, 3.60it/s] 17%|█▋ | 64638/371472 [5:08:35<22:30:17, 3.79it/s] 17%|█▋ | 64639/371472 [5:08:35<22:34:06, 3.78it/s] 17%|█▋ | 64640/371472 [5:08:35<22:12:54, 3.84it/s] {'loss': 4.2166, 'learning_rate': 8.437910750070029e-07, 'epoch': 2.78} + 17%|█▋ | 64640/371472 [5:08:35<22:12:54, 3.84it/s] 17%|█▋ | 64641/371472 [5:08:36<22:19:22, 3.82it/s] 17%|█▋ | 64642/371472 [5:08:36<24:44:24, 3.45it/s] 17%|█▋ | 64643/371472 [5:08:36<24:51:07, 3.43it/s] 17%|█▋ | 64644/371472 [5:08:36<23:24:45, 3.64it/s] 17%|█▋ | 64645/371472 [5:08:37<22:58:54, 3.71it/s] 17%|█▋ | 64646/371472 [5:08:37<22:51:19, 3.73it/s] 17%|█▋ | 64647/371472 [5:08:37<24:21:14, 3.50it/s] 17%|█▋ | 64648/371472 [5:08:38<24:12:16, 3.52it/s] 17%|█▋ | 64649/371472 [5:08:38<25:21:44, 3.36it/s] 17%|█▋ | 64650/371472 [5:08:38<24:48:36, 3.44it/s] 17%|█▋ | 64651/371472 [5:08:38<24:21:46, 3.50it/s] 17%|█▋ | 64652/371472 [5:08:39<23:25:10, 3.64it/s] 17%|█▋ | 64653/371472 [5:08:39<23:59:57, 3.55it/s] 17%|█▋ | 64654/371472 [5:08:39<24:02:53, 3.54it/s] 17%|█▋ | 64655/371472 [5:08:40<24:23:19, 3.49it/s] 17%|█▋ | 64656/371472 [5:08:40<27:04:11, 3.15it/s] 17%|█▋ | 64657/371472 [5:08:40<26:14:15, 3.25it/s] 17%|█▋ | 64658/371472 [5:08:41<24:57:38, 3.41it/s] 17%|█▋ | 64659/371472 [5:08:41<25:11:44, 3.38it/s] 17%|█▋ | 64660/371472 [5:08:41<24:35:39, 3.47it/s] {'loss': 4.1578, 'learning_rate': 8.43742593031524e-07, 'epoch': 2.79} + 17%|█▋ | 64660/371472 [5:08:41<24:35:39, 3.47it/s] 17%|█▋ | 64661/371472 [5:08:41<24:22:28, 3.50it/s] 17%|█▋ | 64662/371472 [5:08:42<25:13:59, 3.38it/s] 17%|█▋ | 64663/371472 [5:08:42<23:45:23, 3.59it/s] 17%|█▋ | 64664/371472 [5:08:42<22:51:02, 3.73it/s] 17%|█▋ | 64665/371472 [5:08:42<23:35:05, 3.61it/s] 17%|█▋ | 64666/371472 [5:08:43<23:45:04, 3.59it/s] 17%|█▋ | 64667/371472 [5:08:43<23:34:28, 3.62it/s] 17%|█▋ | 64668/371472 [5:08:43<23:41:01, 3.60it/s] 17%|█▋ | 64669/371472 [5:08:44<23:20:30, 3.65it/s] 17%|█▋ | 64670/371472 [5:08:44<22:37:36, 3.77it/s] 17%|█▋ | 64671/371472 [5:08:44<23:35:15, 3.61it/s] 17%|█▋ | 64672/371472 [5:08:44<25:05:12, 3.40it/s] 17%|█▋ | 64673/371472 [5:08:45<25:07:15, 3.39it/s] 17%|█▋ | 64674/371472 [5:08:45<24:39:39, 3.46it/s] 17%|█▋ | 64675/371472 [5:08:45<23:38:39, 3.60it/s] 17%|█▋ | 64676/371472 [5:08:46<23:39:39, 3.60it/s] 17%|█▋ | 64677/371472 [5:08:46<24:50:07, 3.43it/s] 17%|█▋ | 64678/371472 [5:08:46<24:33:21, 3.47it/s] 17%|█▋ | 64679/371472 [5:08:47<25:52:14, 3.29it/s] 17%|█▋ | 64680/371472 [5:08:47<25:21:32, 3.36it/s] {'loss': 4.399, 'learning_rate': 8.436941110560451e-07, 'epoch': 2.79} + 17%|█▋ | 64680/371472 [5:08:47<25:21:32, 3.36it/s] 17%|█▋ | 64681/371472 [5:08:47<24:28:29, 3.48it/s] 17%|█▋ | 64682/371472 [5:08:47<24:23:00, 3.49it/s] 17%|█▋ | 64683/371472 [5:08:48<23:08:52, 3.68it/s] 17%|█▋ | 64684/371472 [5:08:48<23:32:28, 3.62it/s] 17%|█▋ | 64685/371472 [5:08:48<23:51:07, 3.57it/s] 17%|█▋ | 64686/371472 [5:08:48<23:20:48, 3.65it/s] 17%|█▋ | 64687/371472 [5:08:49<22:38:42, 3.76it/s] 17%|█▋ | 64688/371472 [5:08:49<22:37:44, 3.77it/s] 17%|█▋ | 64689/371472 [5:08:49<22:38:47, 3.76it/s] 17%|█▋ | 64690/371472 [5:08:49<22:40:41, 3.76it/s] 17%|█▋ | 64691/371472 [5:08:50<22:43:16, 3.75it/s] 17%|█▋ | 64692/371472 [5:08:50<23:28:10, 3.63it/s] 17%|█▋ | 64693/371472 [5:08:50<23:53:35, 3.57it/s] 17%|█▋ | 64694/371472 [5:08:51<24:12:23, 3.52it/s] 17%|█▋ | 64695/371472 [5:08:51<23:46:29, 3.58it/s] 17%|█▋ | 64696/371472 [5:08:51<23:07:33, 3.68it/s] 17%|█▋ | 64697/371472 [5:08:51<22:46:33, 3.74it/s] 17%|█▋ | 64698/371472 [5:08:52<22:06:15, 3.86it/s] 17%|█▋ | 64699/371472 [5:08:52<24:00:34, 3.55it/s] 17%|█▋ | 64700/371472 [5:08:52<24:37:28, 3.46it/s] {'loss': 4.2696, 'learning_rate': 8.436456290805663e-07, 'epoch': 2.79} + 17%|█▋ | 64700/371472 [5:08:52<24:37:28, 3.46it/s] 17%|█▋ | 64701/371472 [5:08:53<24:26:21, 3.49it/s] 17%|█▋ | 64702/371472 [5:08:53<23:36:54, 3.61it/s] 17%|█▋ | 64703/371472 [5:08:53<22:52:05, 3.73it/s] 17%|█▋ | 64704/371472 [5:08:53<22:21:12, 3.81it/s] 17%|█▋ | 64705/371472 [5:08:54<22:43:30, 3.75it/s] 17%|█▋ | 64706/371472 [5:08:54<23:57:42, 3.56it/s] 17%|█▋ | 64707/371472 [5:08:54<22:49:55, 3.73it/s] 17%|█▋ | 64708/371472 [5:08:55<25:30:32, 3.34it/s] 17%|█▋ | 64709/371472 [5:08:55<26:16:27, 3.24it/s] 17%|█▋ | 64710/371472 [5:08:55<25:05:17, 3.40it/s] 17%|█▋ | 64711/371472 [5:08:55<23:20:00, 3.65it/s] 17%|█▋ | 64712/371472 [5:08:56<22:39:47, 3.76it/s] 17%|█▋ | 64713/371472 [5:08:56<24:04:04, 3.54it/s] 17%|█▋ | 64714/371472 [5:08:56<23:09:09, 3.68it/s] 17%|█▋ | 64715/371472 [5:08:56<22:43:15, 3.75it/s] 17%|█▋ | 64716/371472 [5:08:57<22:32:59, 3.78it/s] 17%|█▋ | 64717/371472 [5:08:57<23:23:16, 3.64it/s] 17%|█▋ | 64718/371472 [5:08:57<22:46:09, 3.74it/s] 17%|█▋ | 64719/371472 [5:08:57<22:25:45, 3.80it/s] 17%|█▋ | 64720/371472 [5:08:58<24:00:43, 3.55it/s] {'loss': 4.1585, 'learning_rate': 8.435971471050873e-07, 'epoch': 2.79} + 17%|█▋ | 64720/371472 [5:08:58<24:00:43, 3.55it/s] 17%|█▋ | 64721/371472 [5:08:58<23:20:23, 3.65it/s] 17%|█▋ | 64722/371472 [5:08:58<24:09:32, 3.53it/s] 17%|█▋ | 64723/371472 [5:08:59<24:51:15, 3.43it/s] 17%|█▋ | 64724/371472 [5:08:59<24:51:58, 3.43it/s] 17%|█▋ | 64725/371472 [5:08:59<25:23:52, 3.35it/s] 17%|█▋ | 64726/371472 [5:09:00<25:11:39, 3.38it/s] 17%|█▋ | 64727/371472 [5:09:00<24:02:17, 3.54it/s] 17%|█▋ | 64728/371472 [5:09:00<23:27:49, 3.63it/s] 17%|█▋ | 64729/371472 [5:09:00<24:16:11, 3.51it/s] 17%|█▋ | 64730/371472 [5:09:01<25:27:53, 3.35it/s] 17%|█▋ | 64731/371472 [5:09:01<25:59:48, 3.28it/s] 17%|█▋ | 64732/371472 [5:09:01<24:54:17, 3.42it/s] 17%|█▋ | 64733/371472 [5:09:02<25:36:37, 3.33it/s] 17%|█▋ | 64734/371472 [5:09:02<24:03:25, 3.54it/s] 17%|█▋ | 64735/371472 [5:09:02<24:08:17, 3.53it/s] 17%|█▋ | 64736/371472 [5:09:02<25:01:54, 3.40it/s] 17%|█▋ | 64737/371472 [5:09:03<24:15:06, 3.51it/s] 17%|█▋ | 64738/371472 [5:09:03<23:31:05, 3.62it/s] 17%|█▋ | 64739/371472 [5:09:03<23:14:44, 3.67it/s] 17%|█▋ | 64740/371472 [5:09:04<25:05:17, 3.40it/s] {'loss': 4.026, 'learning_rate': 8.435486651296084e-07, 'epoch': 2.79} + 17%|█▋ | 64740/371472 [5:09:04<25:05:17, 3.40it/s] 17%|█▋ | 64741/371472 [5:09:04<26:52:28, 3.17it/s] 17%|█▋ | 64742/371472 [5:09:04<25:49:06, 3.30it/s] 17%|█▋ | 64743/371472 [5:09:04<24:31:36, 3.47it/s] 17%|█▋ | 64744/371472 [5:09:05<23:55:52, 3.56it/s] 17%|█▋ | 64745/371472 [5:09:05<23:30:21, 3.62it/s] 17%|█▋ | 64746/371472 [5:09:05<23:22:56, 3.64it/s] 17%|█▋ | 64747/371472 [5:09:06<22:52:41, 3.72it/s] 17%|█▋ | 64748/371472 [5:09:06<24:18:43, 3.50it/s] 17%|█▋ | 64749/371472 [5:09:06<23:42:15, 3.59it/s] 17%|█▋ | 64750/371472 [5:09:06<23:54:34, 3.56it/s] 17%|█▋ | 64751/371472 [5:09:07<25:49:41, 3.30it/s] 17%|█▋ | 64752/371472 [5:09:07<25:10:49, 3.38it/s] 17%|█▋ | 64753/371472 [5:09:07<24:31:15, 3.47it/s] 17%|█▋ | 64754/371472 [5:09:08<24:16:52, 3.51it/s] 17%|█▋ | 64755/371472 [5:09:08<25:04:22, 3.40it/s] 17%|█▋ | 64756/371472 [5:09:08<24:31:25, 3.47it/s] 17%|█▋ | 64757/371472 [5:09:08<24:05:05, 3.54it/s] 17%|█▋ | 64758/371472 [5:09:09<24:26:16, 3.49it/s] 17%|█▋ | 64759/371472 [5:09:09<23:54:01, 3.56it/s] 17%|█▋ | 64760/371472 [5:09:09<24:30:54, 3.48it/s] {'loss': 4.2457, 'learning_rate': 8.435001831541296e-07, 'epoch': 2.79} + 17%|█▋ | 64760/371472 [5:09:09<24:30:54, 3.48it/s] 17%|█▋ | 64761/371472 [5:09:10<23:39:01, 3.60it/s] 17%|█▋ | 64762/371472 [5:09:10<23:19:44, 3.65it/s] 17%|█▋ | 64763/371472 [5:09:10<22:32:30, 3.78it/s] 17%|█▋ | 64764/371472 [5:09:10<21:59:52, 3.87it/s] 17%|█▋ | 64765/371472 [5:09:11<22:02:25, 3.87it/s] 17%|█▋ | 64766/371472 [5:09:11<22:46:24, 3.74it/s] 17%|█▋ | 64767/371472 [5:09:11<22:03:00, 3.86it/s] 17%|█▋ | 64768/371472 [5:09:11<21:56:43, 3.88it/s] 17%|��▋ | 64769/371472 [5:09:12<22:41:30, 3.75it/s] 17%|█▋ | 64770/371472 [5:09:12<23:21:48, 3.65it/s] 17%|█▋ | 64771/371472 [5:09:12<22:45:48, 3.74it/s] 17%|█▋ | 64772/371472 [5:09:12<22:44:09, 3.75it/s] 17%|█▋ | 64773/371472 [5:09:13<22:19:35, 3.82it/s] 17%|█▋ | 64774/371472 [5:09:13<22:15:03, 3.83it/s] 17%|█▋ | 64775/371472 [5:09:13<22:26:40, 3.80it/s] 17%|█▋ | 64776/371472 [5:09:14<24:35:23, 3.46it/s] 17%|█▋ | 64777/371472 [5:09:14<24:22:18, 3.50it/s] 17%|█▋ | 64778/371472 [5:09:14<24:55:27, 3.42it/s] 17%|█▋ | 64779/371472 [5:09:14<25:10:13, 3.38it/s] 17%|█▋ | 64780/371472 [5:09:15<26:08:24, 3.26it/s] {'loss': 4.2927, 'learning_rate': 8.434517011786507e-07, 'epoch': 2.79} + 17%|█▋ | 64780/371472 [5:09:15<26:08:24, 3.26it/s] 17%|█▋ | 64781/371472 [5:09:15<24:33:26, 3.47it/s] 17%|█▋ | 64782/371472 [5:09:15<24:17:28, 3.51it/s] 17%|█▋ | 64783/371472 [5:09:16<24:24:35, 3.49it/s] 17%|█▋ | 64784/371472 [5:09:16<24:41:55, 3.45it/s] 17%|█▋ | 64785/371472 [5:09:16<24:26:22, 3.49it/s] 17%|█▋ | 64786/371472 [5:09:16<24:08:43, 3.53it/s] 17%|█▋ | 64787/371472 [5:09:17<23:01:47, 3.70it/s] 17%|█▋ | 64788/371472 [5:09:17<24:01:20, 3.55it/s] 17%|█▋ | 64789/371472 [5:09:17<22:55:04, 3.72it/s] 17%|█▋ | 64790/371472 [5:09:18<23:16:51, 3.66it/s] 17%|█▋ | 64791/371472 [5:09:18<23:01:11, 3.70it/s] 17%|█▋ | 64792/371472 [5:09:18<22:38:32, 3.76it/s] 17%|█▋ | 64793/371472 [5:09:18<23:30:42, 3.62it/s] 17%|█▋ | 64794/371472 [5:09:19<23:15:21, 3.66it/s] 17%|█▋ | 64795/371472 [5:09:19<24:36:00, 3.46it/s] 17%|█▋ | 64796/371472 [5:09:19<24:09:43, 3.53it/s] 17%|█▋ | 64797/371472 [5:09:20<25:02:49, 3.40it/s] 17%|█▋ | 64798/371472 [5:09:20<25:08:18, 3.39it/s] 17%|█▋ | 64799/371472 [5:09:20<27:59:23, 3.04it/s] 17%|█▋ | 64800/371472 [5:09:20<26:17:44, 3.24it/s] {'loss': 4.2799, 'learning_rate': 8.434032192031717e-07, 'epoch': 2.79} + 17%|█▋ | 64800/371472 [5:09:21<26:17:44, 3.24it/s] 17%|█▋ | 64801/371472 [5:09:21<25:01:08, 3.40it/s] 17%|█▋ | 64802/371472 [5:09:21<26:22:35, 3.23it/s] 17%|█▋ | 64803/371472 [5:09:21<25:20:53, 3.36it/s] 17%|█▋ | 64804/371472 [5:09:22<24:45:21, 3.44it/s] 17%|█▋ | 64805/371472 [5:09:22<24:10:27, 3.52it/s] 17%|█▋ | 64806/371472 [5:09:22<25:04:13, 3.40it/s] 17%|█▋ | 64807/371472 [5:09:23<24:51:55, 3.43it/s] 17%|█▋ | 64808/371472 [5:09:23<24:00:17, 3.55it/s] 17%|█▋ | 64809/371472 [5:09:23<23:37:19, 3.61it/s] 17%|█▋ | 64810/371472 [5:09:23<23:50:40, 3.57it/s] 17%|█▋ | 64811/371472 [5:09:24<24:08:43, 3.53it/s] 17%|█▋ | 64812/371472 [5:09:24<23:47:34, 3.58it/s] 17%|█▋ | 64813/371472 [5:09:24<23:45:23, 3.59it/s] 17%|█▋ | 64814/371472 [5:09:24<23:06:03, 3.69it/s] 17%|█▋ | 64815/371472 [5:09:25<23:05:00, 3.69it/s] 17%|█▋ | 64816/371472 [5:09:25<23:48:23, 3.58it/s] 17%|█▋ | 64817/371472 [5:09:25<24:09:43, 3.53it/s] 17%|█▋ | 64818/371472 [5:09:26<24:27:34, 3.48it/s] 17%|█▋ | 64819/371472 [5:09:26<25:13:23, 3.38it/s] 17%|█▋ | 64820/371472 [5:09:26<25:56:49, 3.28it/s] {'loss': 4.1319, 'learning_rate': 8.433547372276928e-07, 'epoch': 2.79} + 17%|█▋ | 64820/371472 [5:09:26<25:56:49, 3.28it/s] 17%|█▋ | 64821/371472 [5:09:27<25:21:53, 3.36it/s] 17%|█▋ | 64822/371472 [5:09:27<24:10:17, 3.52it/s] 17%|█▋ | 64823/371472 [5:09:27<24:37:13, 3.46it/s] 17%|█▋ | 64824/371472 [5:09:27<24:09:02, 3.53it/s] 17%|█▋ | 64825/371472 [5:09:28<23:51:34, 3.57it/s] 17%|█▋ | 64826/371472 [5:09:28<23:01:47, 3.70it/s] 17%|█▋ | 64827/371472 [5:09:28<23:00:55, 3.70it/s] 17%|█▋ | 64828/371472 [5:09:28<22:41:18, 3.75it/s] 17%|█▋ | 64829/371472 [5:09:29<22:22:20, 3.81it/s] 17%|█▋ | 64830/371472 [5:09:29<23:36:49, 3.61it/s] 17%|█▋ | 64831/371472 [5:09:29<23:19:32, 3.65it/s] 17%|█▋ | 64832/371472 [5:09:29<22:23:20, 3.80it/s] 17%|█▋ | 64833/371472 [5:09:30<22:35:48, 3.77it/s] 17%|█▋ | 64834/371472 [5:09:30<22:47:48, 3.74it/s] 17%|█▋ | 64835/371472 [5:09:30<24:45:14, 3.44it/s] 17%|█▋ | 64836/371472 [5:09:31<24:32:24, 3.47it/s] 17%|█▋ | 64837/371472 [5:09:31<23:41:30, 3.60it/s] 17%|█▋ | 64838/371472 [5:09:31<23:25:59, 3.63it/s] 17%|█▋ | 64839/371472 [5:09:31<23:24:23, 3.64it/s] 17%|█▋ | 64840/371472 [5:09:32<27:57:30, 3.05it/s] {'loss': 4.1158, 'learning_rate': 8.43306255252214e-07, 'epoch': 2.79} + 17%|█▋ | 64840/371472 [5:09:32<27:57:30, 3.05it/s] 17%|█▋ | 64841/371472 [5:09:32<27:01:18, 3.15it/s] 17%|█▋ | 64842/371472 [5:09:32<26:59:51, 3.15it/s] 17%|█▋ | 64843/371472 [5:09:33<25:57:09, 3.28it/s] 17%|█▋ | 64844/371472 [5:09:33<24:45:52, 3.44it/s] 17%|█▋ | 64845/371472 [5:09:33<24:50:43, 3.43it/s] 17%|█▋ | 64846/371472 [5:09:34<23:44:33, 3.59it/s] 17%|█▋ | 64847/371472 [5:09:34<23:39:36, 3.60it/s] 17%|█▋ | 64848/371472 [5:09:34<23:10:33, 3.68it/s] 17%|█▋ | 64849/371472 [5:09:34<25:20:54, 3.36it/s] 17%|█▋ | 64850/371472 [5:09:35<24:40:35, 3.45it/s] 17%|█▋ | 64851/371472 [5:09:35<24:12:04, 3.52it/s] 17%|█▋ | 64852/371472 [5:09:35<24:01:33, 3.54it/s] 17%|█▋ | 64853/371472 [5:09:36<25:08:03, 3.39it/s] 17%|█▋ | 64854/371472 [5:09:36<23:52:10, 3.57it/s] 17%|█▋ | 64855/371472 [5:09:36<24:50:40, 3.43it/s] 17%|█▋ | 64856/371472 [5:09:36<24:24:27, 3.49it/s] 17%|█▋ | 64857/371472 [5:09:37<25:39:08, 3.32it/s] 17%|█▋ | 64858/371472 [5:09:37<25:51:26, 3.29it/s] 17%|█▋ | 64859/371472 [5:09:37<24:50:01, 3.43it/s] 17%|█▋ | 64860/371472 [5:09:38<23:13:03, 3.67it/s] {'loss': 4.2568, 'learning_rate': 8.432577732767352e-07, 'epoch': 2.79} + 17%|█▋ | 64860/371472 [5:09:38<23:13:03, 3.67it/s] 17%|█▋ | 64861/371472 [5:09:38<22:41:35, 3.75it/s] 17%|█▋ | 64862/371472 [5:09:38<24:02:13, 3.54it/s] 17%|█▋ | 64863/371472 [5:09:38<24:21:28, 3.50it/s] 17%|█▋ | 64864/371472 [5:09:39<25:30:16, 3.34it/s] 17%|█▋ | 64865/371472 [5:09:39<24:57:48, 3.41it/s] 17%|█▋ | 64866/371472 [5:09:39<24:12:03, 3.52it/s] 17%|█▋ | 64867/371472 [5:09:40<23:33:28, 3.62it/s] 17%|█▋ | 64868/371472 [5:09:40<24:16:23, 3.51it/s] 17%|█▋ | 64869/371472 [5:09:40<23:30:19, 3.62it/s] 17%|█▋ | 64870/371472 [5:09:40<24:12:11, 3.52it/s] 17%|█▋ | 64871/371472 [5:09:41<23:46:16, 3.58it/s] 17%|█▋ | 64872/371472 [5:09:41<23:50:02, 3.57it/s] 17%|█▋ | 64873/371472 [5:09:41<23:01:32, 3.70it/s] 17%|█▋ | 64874/371472 [5:09:41<22:41:34, 3.75it/s] 17%|█▋ | 64875/371472 [5:09:42<23:51:57, 3.57it/s] 17%|█▋ | 64876/371472 [5:09:42<23:26:29, 3.63it/s] 17%|█▋ | 64877/371472 [5:09:42<23:02:03, 3.70it/s] 17%|█▋ | 64878/371472 [5:09:43<22:30:24, 3.78it/s] 17%|█▋ | 64879/371472 [5:09:43<23:16:08, 3.66it/s] 17%|█▋ | 64880/371472 [5:09:43<23:38:28, 3.60it/s] {'loss': 4.2391, 'learning_rate': 8.432092913012562e-07, 'epoch': 2.79} + 17%|█▋ | 64880/371472 [5:09:43<23:38:28, 3.60it/s] 17%|█▋ | 64881/371472 [5:09:43<22:55:50, 3.71it/s] 17%|█▋ | 64882/371472 [5:09:44<24:18:05, 3.50it/s] 17%|█▋ | 64883/371472 [5:09:44<24:07:06, 3.53it/s] 17%|█▋ | 64884/371472 [5:09:44<23:13:39, 3.67it/s] 17%|█▋ | 64885/371472 [5:09:45<23:55:37, 3.56it/s] 17%|█▋ | 64886/371472 [5:09:45<22:40:44, 3.76it/s] 17%|█▋ | 64887/371472 [5:09:45<22:05:24, 3.86it/s] 17%|█▋ | 64888/371472 [5:09:45<22:29:33, 3.79it/s] 17%|█▋ | 64889/371472 [5:09:46<22:38:08, 3.76it/s] 17%|█▋ | 64890/371472 [5:09:46<23:00:02, 3.70it/s] 17%|█▋ | 64891/371472 [5:09:46<22:51:45, 3.72it/s] 17%|█▋ | 64892/371472 [5:09:46<22:34:50, 3.77it/s] 17%|█▋ | 64893/371472 [5:09:47<23:52:27, 3.57it/s] 17%|█▋ | 64894/371472 [5:09:47<23:24:40, 3.64it/s] 17%|█▋ | 64895/371472 [5:09:47<23:51:02, 3.57it/s] 17%|█▋ | 64896/371472 [5:09:48<23:57:22, 3.55it/s] 17%|█▋ | 64897/371472 [5:09:48<23:53:42, 3.56it/s] 17%|█▋ | 64898/371472 [5:09:48<23:41:03, 3.60it/s] 17%|█▋ | 64899/371472 [5:09:48<24:14:15, 3.51it/s] 17%|█▋ | 64900/371472 [5:09:49<24:36:15, 3.46it/s] {'loss': 4.167, 'learning_rate': 8.431608093257772e-07, 'epoch': 2.8} + 17%|█▋ | 64900/371472 [5:09:49<24:36:15, 3.46it/s] 17%|█▋ | 64901/371472 [5:09:49<24:12:15, 3.52it/s] 17%|█▋ | 64902/371472 [5:09:49<24:49:01, 3.43it/s] 17%|█▋ | 64903/371472 [5:09:50<24:41:43, 3.45it/s] 17%|█▋ | 64904/371472 [5:09:50<24:40:29, 3.45it/s] 17%|█▋ | 64905/371472 [5:09:50<24:17:33, 3.51it/s] 17%|█▋ | 64906/371472 [5:09:50<23:41:06, 3.60it/s] 17%|█▋ | 64907/371472 [5:09:51<25:22:39, 3.36it/s] 17%|█▋ | 64908/371472 [5:09:51<24:19:32, 3.50it/s] 17%|█▋ | 64909/371472 [5:09:51<23:11:49, 3.67it/s] 17%|█▋ | 64910/371472 [5:09:51<22:25:02, 3.80it/s] 17%|█▋ | 64911/371472 [5:09:52<22:08:30, 3.85it/s] 17%|█▋ | 64912/371472 [5:09:52<21:54:34, 3.89it/s] 17%|█▋ | 64913/371472 [5:09:52<23:07:48, 3.68it/s] 17%|█▋ | 64914/371472 [5:09:53<23:16:54, 3.66it/s] 17%|█▋ | 64915/371472 [5:09:53<26:56:43, 3.16it/s] 17%|█▋ | 64916/371472 [5:09:53<24:44:42, 3.44it/s] 17%|█▋ | 64917/371472 [5:09:53<24:00:34, 3.55it/s] 17%|█▋ | 64918/371472 [5:09:54<23:05:28, 3.69it/s] 17%|█▋ | 64919/371472 [5:09:54<25:04:24, 3.40it/s] 17%|█▋ | 64920/371472 [5:09:54<25:46:05, 3.30it/s] {'loss': 4.2472, 'learning_rate': 8.431123273502984e-07, 'epoch': 2.8} + 17%|█▋ | 64920/371472 [5:09:54<25:46:05, 3.30it/s] 17%|█▋ | 64921/371472 [5:09:55<28:06:18, 3.03it/s] 17%|█▋ | 64922/371472 [5:09:55<26:01:12, 3.27it/s] 17%|█▋ | 64923/371472 [5:09:55<25:21:38, 3.36it/s] 17%|█▋ | 64924/371472 [5:09:56<24:37:29, 3.46it/s] 17%|█▋ | 64925/371472 [5:09:56<23:33:40, 3.61it/s] 17%|█▋ | 64926/371472 [5:09:56<23:09:36, 3.68it/s] 17%|█▋ | 64927/371472 [5:09:56<22:57:52, 3.71it/s] 17%|█▋ | 64928/371472 [5:09:57<22:52:20, 3.72it/s] 17%|█▋ | 64929/371472 [5:09:57<23:21:12, 3.65it/s] 17%|█▋ | 64930/371472 [5:09:57<22:42:54, 3.75it/s] 17%|█▋ | 64931/371472 [5:09:57<21:59:07, 3.87it/s] 17%|█▋ | 64932/371472 [5:09:58<23:16:24, 3.66it/s] 17%|█▋ | 64933/371472 [5:09:58<22:29:49, 3.78it/s] 17%|█▋ | 64934/371472 [5:09:58<22:18:26, 3.82it/s] 17%|█▋ | 64935/371472 [5:09:58<22:44:01, 3.75it/s] 17%|█▋ | 64936/371472 [5:09:59<22:28:17, 3.79it/s] 17%|█▋ | 64937/371472 [5:09:59<23:03:36, 3.69it/s] 17%|█▋ | 64938/371472 [5:09:59<24:31:26, 3.47it/s] 17%|█▋ | 64939/371472 [5:10:00<25:45:57, 3.30it/s] 17%|█▋ | 64940/371472 [5:10:00<25:54:34, 3.29it/s] {'loss': 4.1453, 'learning_rate': 8.430638453748195e-07, 'epoch': 2.8} + 17%|█▋ | 64940/371472 [5:10:00<25:54:34, 3.29it/s] 17%|█▋ | 64941/371472 [5:10:00<26:11:23, 3.25it/s] 17%|█▋ | 64942/371472 [5:10:01<25:21:02, 3.36it/s] 17%|█▋ | 64943/371472 [5:10:01<25:05:37, 3.39it/s] 17%|█▋ | 64944/371472 [5:10:01<24:14:04, 3.51it/s] 17%|█▋ | 64945/371472 [5:10:02<27:08:31, 3.14it/s] 17%|█▋ | 64946/371472 [5:10:02<25:09:06, 3.39it/s] 17%|█▋ | 64947/371472 [5:10:02<23:58:39, 3.55it/s] 17%|█▋ | 64948/371472 [5:10:02<24:55:03, 3.42it/s] 17%|█▋ | 64949/371472 [5:10:03<24:29:22, 3.48it/s] 17%|█▋ | 64950/371472 [5:10:03<24:57:44, 3.41it/s] 17%|█▋ | 64951/371472 [5:10:03<25:49:28, 3.30it/s] 17%|█▋ | 64952/371472 [5:10:03<24:39:24, 3.45it/s] 17%|█▋ | 64953/371472 [5:10:04<23:41:58, 3.59it/s] 17%|█▋ | 64954/371472 [5:10:04<23:07:54, 3.68it/s] 17%|█▋ | 64955/371472 [5:10:04<22:52:52, 3.72it/s] 17%|█▋ | 64956/371472 [5:10:05<22:29:56, 3.78it/s] 17%|█▋ | 64957/371472 [5:10:05<22:41:34, 3.75it/s] 17%|█▋ | 64958/371472 [5:10:05<22:32:31, 3.78it/s] 17%|█▋ | 64959/371472 [5:10:05<24:45:43, 3.44it/s] 17%|█▋ | 64960/371472 [5:10:06<24:46:32, 3.44it/s] {'loss': 4.0949, 'learning_rate': 8.430153633993406e-07, 'epoch': 2.8} + 17%|█▋ | 64960/371472 [5:10:06<24:46:32, 3.44it/s] 17%|█▋ | 64961/371472 [5:10:06<24:33:58, 3.47it/s] 17%|█▋ | 64962/371472 [5:10:06<23:47:34, 3.58it/s] 17%|█▋ | 64963/371472 [5:10:07<24:38:08, 3.46it/s] 17%|█▋ | 64964/371472 [5:10:07<23:18:32, 3.65it/s] 17%|█▋ | 64965/371472 [5:10:07<22:58:19, 3.71it/s] 17%|█▋ | 64966/371472 [5:10:07<22:37:41, 3.76it/s] 17%|█▋ | 64967/371472 [5:10:08<22:48:56, 3.73it/s] 17%|█▋ | 64968/371472 [5:10:08<25:00:09, 3.41it/s] 17%|█▋ | 64969/371472 [5:10:08<24:13:16, 3.52it/s] 17%|█▋ | 64970/371472 [5:10:09<27:12:26, 3.13it/s] 17%|█▋ | 64971/371472 [5:10:09<26:14:19, 3.24it/s] 17%|█▋ | 64972/371472 [5:10:09<25:39:21, 3.32it/s] 17%|█▋ | 64973/371472 [5:10:09<24:32:10, 3.47it/s] 17%|█▋ | 64974/371472 [5:10:10<26:04:20, 3.27it/s] 17%|█▋ | 64975/371472 [5:10:10<24:16:14, 3.51it/s] 17%|█▋ | 64976/371472 [5:10:10<24:23:20, 3.49it/s] 17%|█▋ | 64977/371472 [5:10:11<24:19:35, 3.50it/s] 17%|█▋ | 64978/371472 [5:10:11<23:49:41, 3.57it/s] 17%|█▋ | 64979/371472 [5:10:11<23:27:04, 3.63it/s] 17%|█▋ | 64980/371472 [5:10:11<23:21:17, 3.65it/s] {'loss': 4.1396, 'learning_rate': 8.429668814238617e-07, 'epoch': 2.8} + 17%|█▋ | 64980/371472 [5:10:11<23:21:17, 3.65it/s] 17%|█▋ | 64981/371472 [5:10:12<23:31:48, 3.62it/s] 17%|█▋ | 64982/371472 [5:10:12<24:39:06, 3.45it/s] 17%|█▋ | 64983/371472 [5:10:12<24:23:35, 3.49it/s] 17%|█▋ | 64984/371472 [5:10:13<25:03:52, 3.40it/s] 17%|█▋ | 64985/371472 [5:10:13<24:33:51, 3.47it/s] 17%|█▋ | 64986/371472 [5:10:13<24:40:18, 3.45it/s] 17%|█▋ | 64987/371472 [5:10:13<24:08:53, 3.53it/s] 17%|█▋ | 64988/371472 [5:10:14<23:05:22, 3.69it/s] 17%|█▋ | 64989/371472 [5:10:14<23:23:11, 3.64it/s] 17%|█▋ | 64990/371472 [5:10:14<25:19:59, 3.36it/s] 17%|█▋ | 64991/371472 [5:10:15<23:55:13, 3.56it/s] 17%|█▋ | 64992/371472 [5:10:15<23:44:05, 3.59it/s] 17%|█▋ | 64993/371472 [5:10:15<22:51:50, 3.72it/s] 17%|█▋ | 64994/371472 [5:10:15<23:24:55, 3.64it/s] 17%|█▋ | 64995/371472 [5:10:16<22:49:58, 3.73it/s] 17%|█▋ | 64996/371472 [5:10:16<29:04:28, 2.93it/s] 17%|█▋ | 64997/371472 [5:10:16<27:17:37, 3.12it/s] 17%|█▋ | 64998/371472 [5:10:17<25:36:31, 3.32it/s] 17%|█▋ | 64999/371472 [5:10:17<24:41:40, 3.45it/s] 17%|█▋ | 65000/371472 [5:10:17<23:23:16, 3.64it/s] {'loss': 4.0352, 'learning_rate': 8.429183994483829e-07, 'epoch': 2.8} + 17%|█▋ | 65000/371472 [5:10:17<23:23:16, 3.64it/s] 17%|█▋ | 65001/371472 [5:10:17<22:34:45, 3.77it/s] 17%|█▋ | 65002/371472 [5:10:18<22:59:00, 3.70it/s] 17%|█▋ | 65003/371472 [5:10:18<23:02:37, 3.69it/s] 17%|█▋ | 65004/371472 [5:10:18<24:01:38, 3.54it/s] 17%|█▋ | 65005/371472 [5:10:18<23:05:56, 3.69it/s] 17%|█▋ | 65006/371472 [5:10:19<23:12:32, 3.67it/s] 17%|█▋ | 65007/371472 [5:10:19<22:58:43, 3.70it/s] 18%|█▊ | 65008/371472 [5:10:19<23:02:12, 3.70it/s] 18%|█▊ | 65009/371472 [5:10:20<23:02:58, 3.69it/s] 18%|█▊ | 65010/371472 [5:10:20<23:25:30, 3.63it/s] 18%|█▊ | 65011/371472 [5:10:20<22:54:43, 3.72it/s] 18%|█▊ | 65012/371472 [5:10:20<23:06:50, 3.68it/s] 18%|█▊ | 65013/371472 [5:10:21<24:00:47, 3.55it/s] 18%|█▊ | 65014/371472 [5:10:21<24:25:46, 3.48it/s] 18%|█▊ | 65015/371472 [5:10:21<23:39:36, 3.60it/s] 18%|█▊ | 65016/371472 [5:10:21<22:26:59, 3.79it/s] 18%|█▊ | 65017/371472 [5:10:22<24:04:03, 3.54it/s] 18%|█▊ | 65018/371472 [5:10:22<24:55:58, 3.41it/s] 18%|█▊ | 65019/371472 [5:10:22<24:38:03, 3.46it/s] 18%|█▊ | 65020/371472 [5:10:23<23:54:27, 3.56it/s] {'loss': 4.0997, 'learning_rate': 8.428699174729039e-07, 'epoch': 2.8} + 18%|█▊ | 65020/371472 [5:10:23<23:54:27, 3.56it/s] 18%|█▊ | 65021/371472 [5:10:23<24:25:56, 3.48it/s] 18%|█▊ | 65022/371472 [5:10:23<25:28:04, 3.34it/s] 18%|█▊ | 65023/371472 [5:10:24<25:26:42, 3.35it/s] 18%|█▊ | 65024/371472 [5:10:24<24:35:24, 3.46it/s] 18%|█▊ | 65025/371472 [5:10:24<23:34:54, 3.61it/s] 18%|█▊ | 65026/371472 [5:10:24<24:15:15, 3.51it/s] 18%|█▊ | 65027/371472 [5:10:25<23:14:30, 3.66it/s] 18%|█▊ | 65028/371472 [5:10:25<23:29:52, 3.62it/s] 18%|█▊ | 65029/371472 [5:10:25<22:55:12, 3.71it/s] 18%|█▊ | 65030/371472 [5:10:25<23:27:27, 3.63it/s] 18%|█▊ | 65031/371472 [5:10:26<22:43:01, 3.75it/s] 18%|█▊ | 65032/371472 [5:10:26<22:11:17, 3.84it/s] 18%|█▊ | 65033/371472 [5:10:26<23:32:26, 3.62it/s] 18%|█▊ | 65034/371472 [5:10:27<22:47:28, 3.73it/s] 18%|█▊ | 65035/371472 [5:10:27<22:38:35, 3.76it/s] 18%|█▊ | 65036/371472 [5:10:27<23:07:44, 3.68it/s] 18%|█▊ | 65037/371472 [5:10:27<22:35:57, 3.77it/s] 18%|█▊ | 65038/371472 [5:10:28<23:48:18, 3.58it/s] 18%|█▊ | 65039/371472 [5:10:28<23:19:27, 3.65it/s] 18%|█▊ | 65040/371472 [5:10:28<23:27:42, 3.63it/s] {'loss': 4.2631, 'learning_rate': 8.42821435497425e-07, 'epoch': 2.8} + 18%|█▊ | 65040/371472 [5:10:28<23:27:42, 3.63it/s] 18%|█▊ | 65041/371472 [5:10:28<23:02:48, 3.69it/s] 18%|█▊ | 65042/371472 [5:10:29<23:46:38, 3.58it/s] 18%|█▊ | 65043/371472 [5:10:29<23:20:20, 3.65it/s] 18%|█▊ | 65044/371472 [5:10:29<24:56:13, 3.41it/s] 18%|█▊ | 65045/371472 [5:10:30<24:13:02, 3.51it/s] 18%|█▊ | 65046/371472 [5:10:30<24:17:58, 3.50it/s] 18%|█▊ | 65047/371472 [5:10:30<23:33:11, 3.61it/s] 18%|█▊ | 65048/371472 [5:10:30<23:53:09, 3.56it/s] 18%|█▊ | 65049/371472 [5:10:31<23:39:23, 3.60it/s] 18%|█▊ | 65050/371472 [5:10:31<23:54:10, 3.56it/s] 18%|█▊ | 65051/371472 [5:10:31<24:12:45, 3.52it/s] 18%|█▊ | 65052/371472 [5:10:32<24:11:09, 3.52it/s] 18%|█▊ | 65053/371472 [5:10:32<23:37:19, 3.60it/s] 18%|█▊ | 65054/371472 [5:10:32<23:50:27, 3.57it/s] 18%|█▊ | 65055/371472 [5:10:32<23:23:55, 3.64it/s] 18%|█▊ | 65056/371472 [5:10:33<22:40:12, 3.75it/s] 18%|█▊ | 65057/371472 [5:10:33<22:07:14, 3.85it/s] 18%|█▊ | 65058/371472 [5:10:33<22:05:08, 3.85it/s] 18%|█▊ | 65059/371472 [5:10:33<24:05:59, 3.53it/s] 18%|█▊ | 65060/371472 [5:10:34<24:07:05, 3.53it/s] {'loss': 4.198, 'learning_rate': 8.427729535219461e-07, 'epoch': 2.8} + 18%|█▊ | 65060/371472 [5:10:34<24:07:05, 3.53it/s] 18%|█▊ | 65061/371472 [5:10:34<24:29:19, 3.48it/s] 18%|█▊ | 65062/371472 [5:10:34<23:50:23, 3.57it/s] 18%|█▊ | 65063/371472 [5:10:35<23:39:32, 3.60it/s] 18%|█▊ | 65064/371472 [5:10:35<24:18:28, 3.50it/s] 18%|█▊ | 65065/371472 [5:10:35<23:57:37, 3.55it/s] 18%|█▊ | 65066/371472 [5:10:35<22:57:37, 3.71it/s] 18%|█▊ | 65067/371472 [5:10:36<22:29:44, 3.78it/s] 18%|█▊ | 65068/371472 [5:10:36<22:43:26, 3.75it/s] 18%|█▊ | 65069/371472 [5:10:36<23:22:58, 3.64it/s] 18%|█▊ | 65070/371472 [5:10:37<23:42:17, 3.59it/s] 18%|█▊ | 65071/371472 [5:10:37<22:41:36, 3.75it/s] 18%|█▊ | 65072/371472 [5:10:37<22:20:39, 3.81it/s] 18%|█▊ | 65073/371472 [5:10:37<22:54:47, 3.71it/s] 18%|█▊ | 65074/371472 [5:10:38<23:23:24, 3.64it/s] 18%|█▊ | 65075/371472 [5:10:38<22:45:39, 3.74it/s] 18%|█▊ | 65076/371472 [5:10:38<23:04:03, 3.69it/s] 18%|█▊ | 65077/371472 [5:10:38<25:58:31, 3.28it/s] 18%|█▊ | 65078/371472 [5:10:39<24:31:53, 3.47it/s] 18%|█▊ | 65079/371472 [5:10:39<23:49:18, 3.57it/s] 18%|█▊ | 65080/371472 [5:10:39<23:51:26, 3.57it/s] {'loss': 4.3659, 'learning_rate': 8.427244715464673e-07, 'epoch': 2.8} + 18%|█▊ | 65080/371472 [5:10:39<23:51:26, 3.57it/s] 18%|█▊ | 65081/371472 [5:10:40<24:55:52, 3.41it/s] 18%|█▊ | 65082/371472 [5:10:40<24:19:41, 3.50it/s] 18%|█▊ | 65083/371472 [5:10:40<24:18:15, 3.50it/s] 18%|█▊ | 65084/371472 [5:10:40<23:20:42, 3.65it/s] 18%|█▊ | 65085/371472 [5:10:41<23:17:13, 3.65it/s] 18%|█▊ | 65086/371472 [5:10:41<23:05:58, 3.68it/s] 18%|█▊ | 65087/371472 [5:10:41<24:32:21, 3.47it/s] 18%|█▊ | 65088/371472 [5:10:42<23:50:30, 3.57it/s] 18%|█▊ | 65089/371472 [5:10:42<24:44:07, 3.44it/s] 18%|█▊ | 65090/371472 [5:10:42<24:10:46, 3.52it/s] 18%|█▊ | 65091/371472 [5:10:42<23:35:42, 3.61it/s] 18%|█▊ | 65092/371472 [5:10:43<23:32:31, 3.62it/s] 18%|█▊ | 65093/371472 [5:10:43<22:42:18, 3.75it/s] 18%|█▊ | 65094/371472 [5:10:43<22:53:54, 3.72it/s] 18%|█▊ | 65095/371472 [5:10:43<23:25:40, 3.63it/s] 18%|█▊ | 65096/371472 [5:10:44<23:18:39, 3.65it/s] 18%|█▊ | 65097/371472 [5:10:44<23:39:07, 3.60it/s] 18%|█▊ | 65098/371472 [5:10:44<23:14:24, 3.66it/s] 18%|█▊ | 65099/371472 [5:10:45<23:31:53, 3.62it/s] 18%|█▊ | 65100/371472 [5:10:45<23:33:59, 3.61it/s] {'loss': 3.993, 'learning_rate': 8.426759895709883e-07, 'epoch': 2.8} + 18%|█▊ | 65100/371472 [5:10:45<23:33:59, 3.61it/s] 18%|█▊ | 65101/371472 [5:10:45<27:16:12, 3.12it/s] 18%|█▊ | 65102/371472 [5:10:46<28:23:06, 3.00it/s] 18%|█▊ | 65103/371472 [5:10:46<26:24:47, 3.22it/s] 18%|█▊ | 65104/371472 [5:10:46<25:16:16, 3.37it/s] 18%|█▊ | 65105/371472 [5:10:46<23:58:34, 3.55it/s] 18%|█▊ | 65106/371472 [5:10:47<23:41:40, 3.59it/s] 18%|█▊ | 65107/371472 [5:10:47<24:41:58, 3.45it/s] 18%|█▊ | 65108/371472 [5:10:47<23:56:48, 3.55it/s] 18%|█▊ | 65109/371472 [5:10:48<23:32:50, 3.61it/s] 18%|█▊ | 65110/371472 [5:10:48<22:46:36, 3.74it/s] 18%|█▊ | 65111/371472 [5:10:48<22:21:49, 3.81it/s] 18%|█▊ | 65112/371472 [5:10:48<23:17:45, 3.65it/s] 18%|█▊ | 65113/371472 [5:10:49<24:03:38, 3.54it/s] 18%|█▊ | 65114/371472 [5:10:49<23:11:09, 3.67it/s] 18%|█▊ | 65115/371472 [5:10:49<23:56:45, 3.55it/s] 18%|█▊ | 65116/371472 [5:10:49<24:31:50, 3.47it/s] 18%|█▊ | 65117/371472 [5:10:50<23:57:28, 3.55it/s] 18%|█▊ | 65118/371472 [5:10:50<26:29:16, 3.21it/s] 18%|█▊ | 65119/371472 [5:10:50<26:26:44, 3.22it/s] 18%|█▊ | 65120/371472 [5:10:51<26:20:56, 3.23it/s] {'loss': 4.1925, 'learning_rate': 8.426275075955095e-07, 'epoch': 2.8} + 18%|█▊ | 65120/371472 [5:10:51<26:20:56, 3.23it/s] 18%|█▊ | 65121/371472 [5:10:51<25:09:32, 3.38it/s] 18%|█▊ | 65122/371472 [5:10:51<24:31:30, 3.47it/s] 18%|█▊ | 65123/371472 [5:10:52<24:21:03, 3.49it/s] 18%|█▊ | 65124/371472 [5:10:52<23:37:38, 3.60it/s] 18%|█▊ | 65125/371472 [5:10:52<22:58:16, 3.70it/s] 18%|█▊ | 65126/371472 [5:10:52<22:41:04, 3.75it/s] 18%|█▊ | 65127/371472 [5:10:53<22:54:42, 3.71it/s] 18%|█▊ | 65128/371472 [5:10:53<22:49:55, 3.73it/s] 18%|█▊ | 65129/371472 [5:10:53<23:23:56, 3.64it/s] 18%|█▊ | 65130/371472 [5:10:53<22:20:40, 3.81it/s] 18%|█▊ | 65131/371472 [5:10:54<22:57:02, 3.71it/s] 18%|█▊ | 65132/371472 [5:10:54<22:57:27, 3.71it/s] 18%|█▊ | 65133/371472 [5:10:54<22:45:14, 3.74it/s] 18%|█▊ | 65134/371472 [5:10:54<21:51:56, 3.89it/s] 18%|█▊ | 65135/371472 [5:10:55<22:37:11, 3.76it/s] 18%|█▊ | 65136/371472 [5:10:55<22:39:00, 3.76it/s] 18%|█▊ | 65137/371472 [5:10:55<22:22:48, 3.80it/s] 18%|█▊ | 65138/371472 [5:10:56<22:48:13, 3.73it/s] 18%|█▊ | 65139/371472 [5:10:56<22:39:00, 3.76it/s] 18%|█▊ | 65140/371472 [5:10:56<23:08:28, 3.68it/s] {'loss': 4.4249, 'learning_rate': 8.425790256200306e-07, 'epoch': 2.81} + 18%|█▊ | 65140/371472 [5:10:56<23:08:28, 3.68it/s] 18%|█▊ | 65141/371472 [5:10:56<22:28:15, 3.79it/s] 18%|█▊ | 65142/371472 [5:10:57<22:48:14, 3.73it/s] 18%|█▊ | 65143/371472 [5:10:57<22:28:55, 3.78it/s] 18%|█▊ | 65144/371472 [5:10:57<22:19:19, 3.81it/s] 18%|█▊ | 65145/371472 [5:10:57<22:36:35, 3.76it/s] 18%|█▊ | 65146/371472 [5:10:58<22:06:41, 3.85it/s] 18%|█▊ | 65147/371472 [5:10:58<21:40:27, 3.93it/s] 18%|█▊ | 65148/371472 [5:10:58<22:40:56, 3.75it/s] 18%|█▊ | 65149/371472 [5:10:58<22:35:09, 3.77it/s] 18%|█▊ | 65150/371472 [5:10:59<23:17:38, 3.65it/s] 18%|█▊ | 65151/371472 [5:10:59<23:23:09, 3.64it/s] 18%|█▊ | 65152/371472 [5:10:59<24:06:14, 3.53it/s] 18%|█▊ | 65153/371472 [5:11:00<23:36:41, 3.60it/s] 18%|█▊ | 65154/371472 [5:11:00<22:55:29, 3.71it/s] 18%|█▊ | 65155/371472 [5:11:00<24:25:18, 3.48it/s] 18%|█▊ | 65156/371472 [5:11:00<23:47:05, 3.58it/s] 18%|█▊ | 65157/371472 [5:11:01<24:21:46, 3.49it/s] 18%|█▊ | 65158/371472 [5:11:01<23:38:01, 3.60it/s] 18%|█▊ | 65159/371472 [5:11:01<24:21:30, 3.49it/s] 18%|█▊ | 65160/371472 [5:11:02<24:34:12, 3.46it/s] {'loss': 4.1671, 'learning_rate': 8.425305436445517e-07, 'epoch': 2.81} + 18%|█▊ | 65160/371472 [5:11:02<24:34:12, 3.46it/s] 18%|█▊ | 65161/371472 [5:11:02<23:47:21, 3.58it/s] 18%|█▊ | 65162/371472 [5:11:02<23:27:00, 3.63it/s] 18%|█▊ | 65163/371472 [5:11:02<23:16:53, 3.65it/s] 18%|█▊ | 65164/371472 [5:11:03<23:43:44, 3.59it/s] 18%|█▊ | 65165/371472 [5:11:03<24:23:47, 3.49it/s] 18%|█▊ | 65166/371472 [5:11:03<23:44:04, 3.58it/s] 18%|█▊ | 65167/371472 [5:11:03<23:35:36, 3.61it/s] 18%|█▊ | 65168/371472 [5:11:04<24:42:47, 3.44it/s] 18%|█▊ | 65169/371472 [5:11:04<23:18:25, 3.65it/s] 18%|█▊ | 65170/371472 [5:11:04<23:08:50, 3.68it/s] 18%|█▊ | 65171/371472 [5:11:05<23:01:40, 3.69it/s] 18%|█▊ | 65172/371472 [5:11:05<25:34:16, 3.33it/s] 18%|█▊ | 65173/371472 [5:11:05<24:49:06, 3.43it/s] 18%|█▊ | 65174/371472 [5:11:06<25:10:20, 3.38it/s] 18%|█▊ | 65175/371472 [5:11:06<23:59:26, 3.55it/s] 18%|█▊ | 65176/371472 [5:11:06<25:59:36, 3.27it/s] 18%|█▊ | 65177/371472 [5:11:06<24:41:33, 3.45it/s] 18%|█▊ | 65178/371472 [5:11:07<24:42:09, 3.44it/s] 18%|█▊ | 65179/371472 [5:11:07<27:54:12, 3.05it/s] 18%|█▊ | 65180/371472 [5:11:07<26:16:53, 3.24it/s] {'loss': 3.9986, 'learning_rate': 8.424820616690727e-07, 'epoch': 2.81} + 18%|█▊ | 65180/371472 [5:11:07<26:16:53, 3.24it/s] 18%|█▊ | 65181/371472 [5:11:08<25:52:02, 3.29it/s] 18%|█▊ | 65182/371472 [5:11:08<26:29:42, 3.21it/s] 18%|█▊ | 65183/371472 [5:11:08<25:27:03, 3.34it/s] 18%|█▊ | 65184/371472 [5:11:09<25:04:09, 3.39it/s] 18%|█▊ | 65185/371472 [5:11:09<25:15:22, 3.37it/s] 18%|█▊ | 65186/371472 [5:11:09<25:12:13, 3.38it/s] 18%|█▊ | 65187/371472 [5:11:09<25:05:47, 3.39it/s] 18%|█▊ | 65188/371472 [5:11:10<24:49:29, 3.43it/s] 18%|█▊ | 65189/371472 [5:11:10<25:12:42, 3.37it/s] 18%|█▊ | 65190/371472 [5:11:10<24:00:34, 3.54it/s] 18%|█▊ | 65191/371472 [5:11:10<22:48:40, 3.73it/s] 18%|█▊ | 65192/371472 [5:11:11<22:20:19, 3.81it/s] 18%|█▊ | 65193/371472 [5:11:11<22:35:41, 3.77it/s] 18%|█▊ | 65194/371472 [5:11:11<23:17:33, 3.65it/s] 18%|█▊ | 65195/371472 [5:11:12<23:47:33, 3.58it/s] 18%|█▊ | 65196/371472 [5:11:12<23:49:31, 3.57it/s] 18%|█▊ | 65197/371472 [5:11:12<23:48:59, 3.57it/s] 18%|█▊ | 65198/371472 [5:11:12<24:06:56, 3.53it/s] 18%|█▊ | 65199/371472 [5:11:13<23:53:59, 3.56it/s] 18%|█▊ | 65200/371472 [5:11:13<23:17:02, 3.65it/s] {'loss': 4.22, 'learning_rate': 8.424335796935939e-07, 'epoch': 2.81} + 18%|█▊ | 65200/371472 [5:11:13<23:17:02, 3.65it/s] 18%|█▊ | 65201/371472 [5:11:13<22:55:56, 3.71it/s] 18%|█▊ | 65202/371472 [5:11:14<22:46:29, 3.74it/s] 18%|█▊ | 65203/371472 [5:11:14<24:06:20, 3.53it/s] 18%|█▊ | 65204/371472 [5:11:14<25:18:40, 3.36it/s] 18%|█▊ | 65205/371472 [5:11:14<23:51:26, 3.57it/s] 18%|█▊ | 65206/371472 [5:11:15<22:47:04, 3.73it/s] 18%|█▊ | 65207/371472 [5:11:15<22:39:06, 3.76it/s] 18%|█▊ | 65208/371472 [5:11:15<23:20:54, 3.64it/s] 18%|█▊ | 65209/371472 [5:11:15<22:23:03, 3.80it/s] 18%|█▊ | 65210/371472 [5:11:16<23:52:36, 3.56it/s] 18%|█▊ | 65211/371472 [5:11:16<23:54:05, 3.56it/s] 18%|█▊ | 65212/371472 [5:11:16<23:58:58, 3.55it/s] 18%|█▊ | 65213/371472 [5:11:17<23:36:51, 3.60it/s] 18%|█▊ | 65214/371472 [5:11:17<25:14:27, 3.37it/s] 18%|█▊ | 65215/371472 [5:11:17<24:26:04, 3.48it/s] 18%|█▊ | 65216/371472 [5:11:17<24:32:25, 3.47it/s] 18%|█▊ | 65217/371472 [5:11:18<23:39:01, 3.60it/s] 18%|█▊ | 65218/371472 [5:11:18<22:40:35, 3.75it/s] 18%|█▊ | 65219/371472 [5:11:18<23:54:40, 3.56it/s] 18%|█▊ | 65220/371472 [5:11:19<25:00:56, 3.40it/s] {'loss': 4.3503, 'learning_rate': 8.42385097718115e-07, 'epoch': 2.81} + 18%|█▊ | 65220/371472 [5:11:19<25:00:56, 3.40it/s] 18%|█▊ | 65221/371472 [5:11:19<25:00:02, 3.40it/s] 18%|█▊ | 65222/371472 [5:11:19<23:43:31, 3.59it/s] 18%|█▊ | 65223/371472 [5:11:19<23:05:16, 3.68it/s] 18%|█▊ | 65224/371472 [5:11:20<22:50:32, 3.72it/s] 18%|█▊ | 65225/371472 [5:11:20<23:02:36, 3.69it/s] 18%|█▊ | 65226/371472 [5:11:20<24:17:35, 3.50it/s] 18%|█▊ | 65227/371472 [5:11:21<23:37:14, 3.60it/s] 18%|█▊ | 65228/371472 [5:11:21<23:52:22, 3.56it/s] 18%|█▊ | 65229/371472 [5:11:21<30:04:22, 2.83it/s] 18%|█▊ | 65230/371472 [5:11:22<28:49:36, 2.95it/s] 18%|█▊ | 65231/371472 [5:11:22<27:03:17, 3.14it/s] 18%|█▊ | 65232/371472 [5:11:22<26:10:44, 3.25it/s] 18%|█▊ | 65233/371472 [5:11:23<27:19:13, 3.11it/s] 18%|█▊ | 65234/371472 [5:11:23<27:40:42, 3.07it/s] 18%|█▊ | 65235/371472 [5:11:23<26:54:48, 3.16it/s] 18%|█▊ | 65236/371472 [5:11:23<25:27:58, 3.34it/s] 18%|█▊ | 65237/371472 [5:11:24<25:24:31, 3.35it/s] 18%|█▊ | 65238/371472 [5:11:24<25:51:24, 3.29it/s] 18%|█▊ | 65239/371472 [5:11:24<25:53:28, 3.29it/s] 18%|█▊ | 65240/371472 [5:11:25<25:39:50, 3.31it/s] {'loss': 4.1263, 'learning_rate': 8.423366157426362e-07, 'epoch': 2.81} + 18%|█▊ | 65240/371472 [5:11:25<25:39:50, 3.31it/s] 18%|█▊ | 65241/371472 [5:11:25<24:54:47, 3.41it/s] 18%|█▊ | 65242/371472 [5:11:25<25:34:20, 3.33it/s] 18%|█▊ | 65243/371472 [5:11:26<25:11:46, 3.38it/s] 18%|█▊ | 65244/371472 [5:11:26<24:36:44, 3.46it/s] 18%|█▊ | 65245/371472 [5:11:26<24:30:34, 3.47it/s] 18%|█▊ | 65246/371472 [5:11:26<23:43:09, 3.59it/s] 18%|█▊ | 65247/371472 [5:11:27<24:06:49, 3.53it/s] 18%|█▊ | 65248/371472 [5:11:27<23:28:06, 3.62it/s] 18%|█▊ | 65249/371472 [5:11:27<23:20:24, 3.64it/s] 18%|█▊ | 65250/371472 [5:11:27<23:36:15, 3.60it/s] 18%|█▊ | 65251/371472 [5:11:28<24:20:07, 3.50it/s] 18%|█▊ | 65252/371472 [5:11:28<23:23:15, 3.64it/s] 18%|█▊ | 65253/371472 [5:11:28<23:13:53, 3.66it/s] 18%|█▊ | 65254/371472 [5:11:29<23:37:42, 3.60it/s] 18%|█▊ | 65255/371472 [5:11:29<24:04:02, 3.53it/s] 18%|█▊ | 65256/371472 [5:11:29<25:02:15, 3.40it/s] 18%|█▊ | 65257/371472 [5:11:29<23:56:44, 3.55it/s] 18%|█▊ | 65258/371472 [5:11:30<23:36:51, 3.60it/s] 18%|█▊ | 65259/371472 [5:11:30<23:38:05, 3.60it/s] 18%|█▊ | 65260/371472 [5:11:30<24:35:12, 3.46it/s] {'loss': 4.4143, 'learning_rate': 8.422881337671572e-07, 'epoch': 2.81} + 18%|█▊ | 65260/371472 [5:11:30<24:35:12, 3.46it/s] 18%|█▊ | 65261/371472 [5:11:31<23:58:22, 3.55it/s] 18%|█▊ | 65262/371472 [5:11:31<24:32:57, 3.46it/s] 18%|█▊ | 65263/371472 [5:11:31<24:27:17, 3.48it/s] 18%|█▊ | 65264/371472 [5:11:31<24:51:03, 3.42it/s] 18%|█▊ | 65265/371472 [5:11:32<23:32:35, 3.61it/s] 18%|█▊ | 65266/371472 [5:11:32<25:18:45, 3.36it/s] 18%|█▊ | 65267/371472 [5:11:32<24:32:12, 3.47it/s] 18%|█▊ | 65268/371472 [5:11:33<24:19:36, 3.50it/s] 18%|█▊ | 65269/371472 [5:11:33<24:31:20, 3.47it/s] 18%|█▊ | 65270/371472 [5:11:33<24:22:48, 3.49it/s] 18%|█▊ | 65271/371472 [5:11:33<24:36:09, 3.46it/s] 18%|█▊ | 65272/371472 [5:11:34<24:54:03, 3.42it/s] 18%|█▊ | 65273/371472 [5:11:34<23:34:21, 3.61it/s] 18%|█▊ | 65274/371472 [5:11:34<22:36:18, 3.76it/s] 18%|█▊ | 65275/371472 [5:11:35<23:31:15, 3.62it/s] 18%|█▊ | 65276/371472 [5:11:35<23:22:37, 3.64it/s] 18%|█▊ | 65277/371472 [5:11:35<23:32:03, 3.61it/s] 18%|█▊ | 65278/371472 [5:11:35<23:35:54, 3.60it/s] 18%|█▊ | 65279/371472 [5:11:36<23:56:34, 3.55it/s] 18%|█▊ | 65280/371472 [5:11:36<23:42:28, 3.59it/s] {'loss': 4.0707, 'learning_rate': 8.422396517916783e-07, 'epoch': 2.81} + 18%|█▊ | 65280/371472 [5:11:36<23:42:28, 3.59it/s] 18%|█▊ | 65281/371472 [5:11:36<23:03:04, 3.69it/s] 18%|█▊ | 65282/371472 [5:11:37<26:25:10, 3.22it/s] 18%|█▊ | 65283/371472 [5:11:37<26:17:23, 3.24it/s] 18%|█▊ | 65284/371472 [5:11:37<26:38:07, 3.19it/s] 18%|█▊ | 65285/371472 [5:11:38<26:51:32, 3.17it/s] 18%|█▊ | 65286/371472 [5:11:38<25:35:14, 3.32it/s] 18%|█▊ | 65287/371472 [5:11:38<24:57:57, 3.41it/s] 18%|█▊ | 65288/371472 [5:11:38<26:25:30, 3.22it/s] 18%|█▊ | 65289/371472 [5:11:39<26:44:21, 3.18it/s] 18%|█▊ | 65290/371472 [5:11:39<25:45:15, 3.30it/s] 18%|█▊ | 65291/371472 [5:11:39<25:05:36, 3.39it/s] 18%|█▊ | 65292/371472 [5:11:40<24:06:55, 3.53it/s] 18%|█▊ | 65293/371472 [5:11:40<23:51:06, 3.57it/s] 18%|█▊ | 65294/371472 [5:11:40<23:52:03, 3.56it/s] 18%|█▊ | 65295/371472 [5:11:40<23:51:54, 3.56it/s] 18%|█▊ | 65296/371472 [5:11:41<24:49:19, 3.43it/s] 18%|█▊ | 65297/371472 [5:11:41<23:55:23, 3.56it/s] 18%|█▊ | 65298/371472 [5:11:41<23:27:15, 3.63it/s] 18%|█▊ | 65299/371472 [5:11:41<22:46:25, 3.73it/s] 18%|█▊ | 65300/371472 [5:11:42<22:53:20, 3.72it/s] {'loss': 4.052, 'learning_rate': 8.421911698161993e-07, 'epoch': 2.81} + 18%|█▊ | 65300/371472 [5:11:42<22:53:20, 3.72it/s] 18%|█▊ | 65301/371472 [5:11:42<22:51:47, 3.72it/s] 18%|█▊ | 65302/371472 [5:11:42<22:34:15, 3.77it/s] 18%|█▊ | 65303/371472 [5:11:43<23:44:24, 3.58it/s] 18%|█▊ | 65304/371472 [5:11:43<23:14:35, 3.66it/s] 18%|█▊ | 65305/371472 [5:11:43<23:51:09, 3.57it/s] 18%|█▊ | 65306/371472 [5:11:43<23:33:05, 3.61it/s] 18%|█▊ | 65307/371472 [5:11:44<23:59:54, 3.54it/s] 18%|█▊ | 65308/371472 [5:11:44<24:29:10, 3.47it/s] 18%|█▊ | 65309/371472 [5:11:44<23:24:02, 3.63it/s] 18%|█▊ | 65310/371472 [5:11:45<23:33:25, 3.61it/s] 18%|█▊ | 65311/371472 [5:11:45<22:26:22, 3.79it/s] 18%|█▊ | 65312/371472 [5:11:45<22:04:52, 3.85it/s] 18%|█▊ | 65313/371472 [5:11:45<22:51:31, 3.72it/s] 18%|█▊ | 65314/371472 [5:11:46<23:30:11, 3.62it/s] 18%|█▊ | 65315/371472 [5:11:46<24:21:14, 3.49it/s] 18%|█▊ | 65316/371472 [5:11:46<23:49:05, 3.57it/s] 18%|█▊ | 65317/371472 [5:11:46<23:43:13, 3.59it/s] 18%|█▊ | 65318/371472 [5:11:47<24:36:09, 3.46it/s] 18%|█▊ | 65319/371472 [5:11:47<24:02:40, 3.54it/s] 18%|█▊ | 65320/371472 [5:11:47<23:49:22, 3.57it/s] {'loss': 4.1191, 'learning_rate': 8.421426878407205e-07, 'epoch': 2.81} + 18%|█▊ | 65320/371472 [5:11:47<23:49:22, 3.57it/s] 18%|█▊ | 65321/371472 [5:11:48<22:51:21, 3.72it/s] 18%|█▊ | 65322/371472 [5:11:48<24:57:24, 3.41it/s] 18%|█▊ | 65323/371472 [5:11:48<24:17:23, 3.50it/s] 18%|█▊ | 65324/371472 [5:11:48<23:38:53, 3.60it/s] 18%|█▊ | 65325/371472 [5:11:49<24:32:32, 3.47it/s] 18%|█▊ | 65326/371472 [5:11:49<23:23:34, 3.64it/s] 18%|█▊ | 65327/371472 [5:11:49<24:37:13, 3.45it/s] 18%|█▊ | 65328/371472 [5:11:50<24:19:20, 3.50it/s] 18%|█▊ | 65329/371472 [5:11:50<23:31:15, 3.62it/s] 18%|█▊ | 65330/371472 [5:11:50<23:00:55, 3.69it/s] 18%|█▊ | 65331/371472 [5:11:50<22:48:59, 3.73it/s] 18%|█▊ | 65332/371472 [5:11:51<22:56:23, 3.71it/s] 18%|█▊ | 65333/371472 [5:11:51<24:37:02, 3.45it/s] 18%|█▊ | 65334/371472 [5:11:51<23:57:42, 3.55it/s] 18%|█▊ | 65335/371472 [5:11:51<23:21:50, 3.64it/s] 18%|█▊ | 65336/371472 [5:11:52<25:12:16, 3.37it/s] 18%|█▊ | 65337/371472 [5:11:52<24:29:10, 3.47it/s] 18%|█▊ | 65338/371472 [5:11:52<24:09:34, 3.52it/s] 18%|█▊ | 65339/371472 [5:11:53<23:24:29, 3.63it/s] 18%|█▊ | 65340/371472 [5:11:53<23:08:06, 3.68it/s] {'loss': 4.0559, 'learning_rate': 8.420942058652416e-07, 'epoch': 2.81} + 18%|█▊ | 65340/371472 [5:11:53<23:08:06, 3.68it/s] 18%|█▊ | 65341/371472 [5:11:53<25:12:43, 3.37it/s] 18%|█▊ | 65342/371472 [5:11:54<24:46:11, 3.43it/s] 18%|█▊ | 65343/371472 [5:11:54<25:09:58, 3.38it/s] 18%|█▊ | 65344/371472 [5:11:54<27:38:47, 3.08it/s] 18%|█▊ | 65345/371472 [5:11:55<28:35:16, 2.97it/s] 18%|█▊ | 65346/371472 [5:11:55<27:40:27, 3.07it/s] 18%|█▊ | 65347/371472 [5:11:55<26:04:26, 3.26it/s] 18%|█▊ | 65348/371472 [5:11:55<25:39:49, 3.31it/s] 18%|█▊ | 65349/371472 [5:11:56<24:25:39, 3.48it/s] 18%|█▊ | 65350/371472 [5:11:56<25:17:11, 3.36it/s] 18%|█▊ | 65351/371472 [5:11:56<24:33:41, 3.46it/s] 18%|█▊ | 65352/371472 [5:11:57<24:33:33, 3.46it/s] 18%|█▊ | 65353/371472 [5:11:57<24:32:24, 3.47it/s] 18%|█▊ | 65354/371472 [5:11:57<23:15:29, 3.66it/s] 18%|█▊ | 65355/371472 [5:11:57<23:25:15, 3.63it/s] 18%|█▊ | 65356/371472 [5:11:58<23:03:51, 3.69it/s] 18%|█▊ | 65357/371472 [5:11:58<23:18:13, 3.65it/s] 18%|█▊ | 65358/371472 [5:11:58<23:01:37, 3.69it/s] 18%|█▊ | 65359/371472 [5:11:58<22:37:36, 3.76it/s] 18%|█▊ | 65360/371472 [5:11:59<22:30:41, 3.78it/s] {'loss': 4.0588, 'learning_rate': 8.420457238897627e-07, 'epoch': 2.82} + 18%|█▊ | 65360/371472 [5:11:59<22:30:41, 3.78it/s] 18%|█▊ | 65361/371472 [5:11:59<22:38:06, 3.76it/s] 18%|█▊ | 65362/371472 [5:11:59<23:32:28, 3.61it/s] 18%|█▊ | 65363/371472 [5:12:00<24:28:21, 3.47it/s] 18%|█▊ | 65364/371472 [5:12:00<24:15:59, 3.50it/s] 18%|█▊ | 65365/371472 [5:12:00<24:10:10, 3.52it/s] 18%|█▊ | 65366/371472 [5:12:01<26:09:39, 3.25it/s] 18%|█▊ | 65367/371472 [5:12:01<25:55:14, 3.28it/s] 18%|█▊ | 65368/371472 [5:12:01<26:17:41, 3.23it/s] 18%|█▊ | 65369/371472 [5:12:01<26:12:52, 3.24it/s] 18%|█▊ | 65370/371472 [5:12:02<25:43:35, 3.31it/s] 18%|█▊ | 65371/371472 [5:12:02<25:02:26, 3.40it/s] 18%|█▊ | 65372/371472 [5:12:02<24:28:30, 3.47it/s] 18%|█▊ | 65373/371472 [5:12:03<23:33:04, 3.61it/s] 18%|█▊ | 65374/371472 [5:12:03<22:33:31, 3.77it/s] 18%|█▊ | 65375/371472 [5:12:03<22:20:09, 3.81it/s] 18%|█▊ | 65376/371472 [5:12:03<23:00:52, 3.69it/s] 18%|█▊ | 65377/371472 [5:12:04<24:36:29, 3.46it/s] 18%|█▊ | 65378/371472 [5:12:04<24:44:15, 3.44it/s] 18%|█▊ | 65379/371472 [5:12:04<25:19:42, 3.36it/s] 18%|█▊ | 65380/371472 [5:12:05<24:07:30, 3.52it/s] {'loss': 4.1563, 'learning_rate': 8.419972419142839e-07, 'epoch': 2.82} + 18%|█▊ | 65380/371472 [5:12:05<24:07:30, 3.52it/s] 18%|█▊ | 65381/371472 [5:12:05<23:24:33, 3.63it/s] 18%|█▊ | 65382/371472 [5:12:05<23:01:58, 3.69it/s] 18%|█▊ | 65383/371472 [5:12:05<23:14:21, 3.66it/s] 18%|█▊ | 65384/371472 [5:12:06<24:40:46, 3.45it/s] 18%|█▊ | 65385/371472 [5:12:06<24:54:40, 3.41it/s] 18%|█▊ | 65386/371472 [5:12:06<24:04:37, 3.53it/s] 18%|█▊ | 65387/371472 [5:12:06<24:27:51, 3.48it/s] 18%|█▊ | 65388/371472 [5:12:07<23:37:36, 3.60it/s] 18%|█▊ | 65389/371472 [5:12:07<23:21:56, 3.64it/s] 18%|█▊ | 65390/371472 [5:12:07<23:33:45, 3.61it/s] 18%|█▊ | 65391/371472 [5:12:08<22:55:45, 3.71it/s] 18%|█▊ | 65392/371472 [5:12:08<23:27:22, 3.62it/s] 18%|█▊ | 65393/371472 [5:12:08<25:09:41, 3.38it/s] 18%|█▊ | 65394/371472 [5:12:08<24:04:53, 3.53it/s] 18%|█▊ | 65395/371472 [5:12:09<24:31:32, 3.47it/s] 18%|█▊ | 65396/371472 [5:12:09<25:52:46, 3.29it/s] 18%|█▊ | 65397/371472 [5:12:09<24:31:30, 3.47it/s] 18%|█▊ | 65398/371472 [5:12:10<23:28:28, 3.62it/s] 18%|█▊ | 65399/371472 [5:12:10<24:14:08, 3.51it/s] 18%|█▊ | 65400/371472 [5:12:10<25:17:10, 3.36it/s] {'loss': 4.1521, 'learning_rate': 8.419487599388049e-07, 'epoch': 2.82} + 18%|█▊ | 65400/371472 [5:12:10<25:17:10, 3.36it/s] 18%|█▊ | 65401/371472 [5:12:10<24:44:56, 3.44it/s] 18%|█▊ | 65402/371472 [5:12:11<23:37:22, 3.60it/s] 18%|█▊ | 65403/371472 [5:12:11<23:32:47, 3.61it/s] 18%|█▊ | 65404/371472 [5:12:11<24:12:20, 3.51it/s] 18%|█▊ | 65405/371472 [5:12:12<24:17:39, 3.50it/s] 18%|█▊ | 65406/371472 [5:12:12<24:19:03, 3.50it/s] 18%|█▊ | 65407/371472 [5:12:12<24:15:28, 3.50it/s] 18%|█▊ | 65408/371472 [5:12:12<23:04:22, 3.68it/s] 18%|█▊ | 65409/371472 [5:12:13<22:25:50, 3.79it/s] 18%|█▊ | 65410/371472 [5:12:13<23:29:42, 3.62it/s] 18%|█▊ | 65411/371472 [5:12:13<23:34:56, 3.61it/s] 18%|█▊ | 65412/371472 [5:12:14<28:30:43, 2.98it/s] 18%|█▊ | 65413/371472 [5:12:14<26:12:47, 3.24it/s] 18%|█▊ | 65414/371472 [5:12:14<26:58:12, 3.15it/s] 18%|█▊ | 65415/371472 [5:12:15<25:26:26, 3.34it/s] 18%|█▊ | 65416/371472 [5:12:15<26:08:31, 3.25it/s] 18%|█▊ | 65417/371472 [5:12:15<24:33:11, 3.46it/s] 18%|█▊ | 65418/371472 [5:12:15<25:15:12, 3.37it/s] 18%|█▊ | 65419/371472 [5:12:16<24:06:30, 3.53it/s] 18%|█▊ | 65420/371472 [5:12:16<25:47:55, 3.30it/s] {'loss': 4.202, 'learning_rate': 8.41900277963326e-07, 'epoch': 2.82} + 18%|█▊ | 65420/371472 [5:12:16<25:47:55, 3.30it/s] 18%|█▊ | 65421/371472 [5:12:16<25:37:02, 3.32it/s] 18%|█▊ | 65422/371472 [5:12:17<24:21:10, 3.49it/s] 18%|█▊ | 65423/371472 [5:12:17<23:35:51, 3.60it/s] 18%|█▊ | 65424/371472 [5:12:17<23:15:19, 3.66it/s] 18%|█▊ | 65425/371472 [5:12:17<24:10:13, 3.52it/s] 18%|█▊ | 65426/371472 [5:12:18<23:58:04, 3.55it/s] 18%|█▊ | 65427/371472 [5:12:18<24:45:38, 3.43it/s] 18%|█▊ | 65428/371472 [5:12:18<24:51:04, 3.42it/s] 18%|█▊ | 65429/371472 [5:12:19<24:15:36, 3.50it/s] 18%|█▊ | 65430/371472 [5:12:19<23:55:15, 3.55it/s] 18%|█▊ | 65431/371472 [5:12:19<24:11:31, 3.51it/s] 18%|█▊ | 65432/371472 [5:12:19<24:06:02, 3.53it/s] 18%|█▊ | 65433/371472 [5:12:20<23:15:54, 3.65it/s] 18%|█▊ | 65434/371472 [5:12:20<23:54:59, 3.55it/s] 18%|█▊ | 65435/371472 [5:12:20<24:22:19, 3.49it/s] 18%|█▊ | 65436/371472 [5:12:21<24:07:22, 3.52it/s] 18%|█▊ | 65437/371472 [5:12:21<25:21:22, 3.35it/s] 18%|█▊ | 65438/371472 [5:12:21<24:44:06, 3.44it/s] 18%|█▊ | 65439/371472 [5:12:21<24:21:39, 3.49it/s] 18%|█▊ | 65440/371472 [5:12:22<27:31:03, 3.09it/s] {'loss': 4.1619, 'learning_rate': 8.418517959878471e-07, 'epoch': 2.82} + 18%|█▊ | 65440/371472 [5:12:22<27:31:03, 3.09it/s] 18%|█▊ | 65441/371472 [5:12:22<25:32:28, 3.33it/s] 18%|█▊ | 65442/371472 [5:12:22<24:20:26, 3.49it/s] 18%|█▊ | 65443/371472 [5:12:23<24:49:36, 3.42it/s] 18%|█▊ | 65444/371472 [5:12:23<23:56:00, 3.55it/s] 18%|█▊ | 65445/371472 [5:12:23<23:18:50, 3.65it/s] 18%|█▊ | 65446/371472 [5:12:23<22:32:44, 3.77it/s] 18%|█▊ | 65447/371472 [5:12:24<24:36:16, 3.45it/s] 18%|█▊ | 65448/371472 [5:12:24<24:05:33, 3.53it/s] 18%|█▊ | 65449/371472 [5:12:24<25:09:04, 3.38it/s] 18%|█▊ | 65450/371472 [5:12:25<24:24:50, 3.48it/s] 18%|█▊ | 65451/371472 [5:12:25<25:45:47, 3.30it/s] 18%|█▊ | 65452/371472 [5:12:25<25:44:07, 3.30it/s] 18%|█▊ | 65453/371472 [5:12:26<25:14:12, 3.37it/s] 18%|█▊ | 65454/371472 [5:12:26<25:42:41, 3.31it/s] 18%|█▊ | 65455/371472 [5:12:26<26:26:59, 3.21it/s] 18%|█▊ | 65456/371472 [5:12:27<27:46:21, 3.06it/s] 18%|█▊ | 65457/371472 [5:12:27<25:48:58, 3.29it/s] 18%|█▊ | 65458/371472 [5:12:27<26:52:36, 3.16it/s] 18%|█▊ | 65459/371472 [5:12:27<25:18:59, 3.36it/s] 18%|█▊ | 65460/371472 [5:12:28<27:52:02, 3.05it/s] {'loss': 4.2922, 'learning_rate': 8.418033140123683e-07, 'epoch': 2.82} + 18%|█▊ | 65460/371472 [5:12:28<27:52:02, 3.05it/s] 18%|█▊ | 65461/371472 [5:12:28<26:03:24, 3.26it/s] 18%|█▊ | 65462/371472 [5:12:28<25:18:57, 3.36it/s] 18%|█▊ | 65463/371472 [5:12:29<24:49:23, 3.42it/s] 18%|█▊ | 65464/371472 [5:12:29<24:22:12, 3.49it/s] 18%|█▊ | 65465/371472 [5:12:29<23:56:16, 3.55it/s] 18%|█▊ | 65466/371472 [5:12:29<23:34:22, 3.61it/s] 18%|█▊ | 65467/371472 [5:12:30<23:05:41, 3.68it/s] 18%|█▊ | 65468/371472 [5:12:30<23:19:32, 3.64it/s] 18%|█▊ | 65469/371472 [5:12:30<25:16:08, 3.36it/s] 18%|█▊ | 65470/371472 [5:12:31<24:15:08, 3.50it/s] 18%|█▊ | 65471/371472 [5:12:31<24:19:28, 3.49it/s] 18%|█▊ | 65472/371472 [5:12:31<23:51:53, 3.56it/s] 18%|█▊ | 65473/371472 [5:12:31<23:57:40, 3.55it/s] 18%|█▊ | 65474/371472 [5:12:32<23:24:50, 3.63it/s] 18%|█▊ | 65475/371472 [5:12:32<23:10:29, 3.67it/s] 18%|█▊ | 65476/371472 [5:12:32<24:47:21, 3.43it/s] 18%|█▊ | 65477/371472 [5:12:33<24:35:35, 3.46it/s] 18%|█▊ | 65478/371472 [5:12:33<25:19:43, 3.36it/s] 18%|█▊ | 65479/371472 [5:12:33<24:29:21, 3.47it/s] 18%|█▊ | 65480/371472 [5:12:33<23:56:58, 3.55it/s] {'loss': 4.2528, 'learning_rate': 8.417548320368894e-07, 'epoch': 2.82} + 18%|█▊ | 65480/371472 [5:12:33<23:56:58, 3.55it/s] 18%|█▊ | 65481/371472 [5:12:34<23:37:07, 3.60it/s] 18%|█▊ | 65482/371472 [5:12:34<23:50:14, 3.57it/s] 18%|█▊ | 65483/371472 [5:12:34<23:08:04, 3.67it/s] 18%|█▊ | 65484/371472 [5:12:34<23:25:36, 3.63it/s] 18%|█▊ | 65485/371472 [5:12:35<22:49:55, 3.72it/s] 18%|█▊ | 65486/371472 [5:12:35<22:08:12, 3.84it/s] 18%|█▊ | 65487/371472 [5:12:35<22:34:48, 3.76it/s] 18%|█▊ | 65488/371472 [5:12:36<22:20:56, 3.80it/s] 18%|█▊ | 65489/371472 [5:12:36<23:43:57, 3.58it/s] 18%|█▊ | 65490/371472 [5:12:36<22:56:31, 3.70it/s] 18%|█▊ | 65491/371472 [5:12:36<24:14:03, 3.51it/s] 18%|█▊ | 65492/371472 [5:12:37<23:14:15, 3.66it/s] 18%|█▊ | 65493/371472 [5:12:37<22:34:35, 3.76it/s] 18%|█▊ | 65494/371472 [5:12:37<23:12:49, 3.66it/s] 18%|█▊ | 65495/371472 [5:12:38<25:18:35, 3.36it/s] 18%|█▊ | 65496/371472 [5:12:38<24:16:29, 3.50it/s] 18%|█▊ | 65497/371472 [5:12:38<23:49:37, 3.57it/s] 18%|█▊ | 65498/371472 [5:12:38<24:51:19, 3.42it/s] 18%|█▊ | 65499/371472 [5:12:39<23:22:40, 3.64it/s] 18%|█▊ | 65500/371472 [5:12:39<24:53:31, 3.41it/s] {'loss': 4.2084, 'learning_rate': 8.417063500614105e-07, 'epoch': 2.82} + 18%|█▊ | 65500/371472 [5:12:39<24:53:31, 3.41it/s] 18%|█▊ | 65501/371472 [5:12:39<26:21:00, 3.23it/s] 18%|█▊ | 65502/371472 [5:12:40<27:06:40, 3.13it/s] 18%|█▊ | 65503/371472 [5:12:40<26:21:24, 3.22it/s] 18%|█▊ | 65504/371472 [5:12:40<24:37:55, 3.45it/s] 18%|█▊ | 65505/371472 [5:12:40<24:52:55, 3.42it/s] 18%|█▊ | 65506/371472 [5:12:41<24:27:07, 3.48it/s] 18%|█▊ | 65507/371472 [5:12:41<23:58:12, 3.55it/s] 18%|█▊ | 65508/371472 [5:12:41<23:27:10, 3.62it/s] 18%|█▊ | 65509/371472 [5:12:42<23:19:36, 3.64it/s] 18%|█▊ | 65510/371472 [5:12:42<24:58:51, 3.40it/s] 18%|█▊ | 65511/371472 [5:12:42<24:07:59, 3.52it/s] 18%|█▊ | 65512/371472 [5:12:43<25:31:32, 3.33it/s] 18%|█▊ | 65513/371472 [5:12:43<23:56:06, 3.55it/s] 18%|█▊ | 65514/371472 [5:12:43<24:26:12, 3.48it/s] 18%|█▊ | 65515/371472 [5:12:43<23:44:02, 3.58it/s] 18%|█▊ | 65516/371472 [5:12:44<23:06:41, 3.68it/s] 18%|█▊ | 65517/371472 [5:12:44<22:37:39, 3.76it/s] 18%|█▊ | 65518/371472 [5:12:44<24:11:48, 3.51it/s] 18%|█▊ | 65519/371472 [5:12:44<23:43:40, 3.58it/s] 18%|█▊ | 65520/371472 [5:12:45<23:25:08, 3.63it/s] {'loss': 4.2309, 'learning_rate': 8.416578680859316e-07, 'epoch': 2.82} + 18%|█▊ | 65520/371472 [5:12:45<23:25:08, 3.63it/s] 18%|█▊ | 65521/371472 [5:12:45<23:26:37, 3.63it/s] 18%|█▊ | 65522/371472 [5:12:45<23:59:18, 3.54it/s] 18%|█▊ | 65523/371472 [5:12:46<23:43:08, 3.58it/s] 18%|█▊ | 65524/371472 [5:12:46<23:44:51, 3.58it/s] 18%|█▊ | 65525/371472 [5:12:46<23:34:04, 3.61it/s] 18%|█▊ | 65526/371472 [5:12:46<22:41:55, 3.74it/s] 18%|█▊ | 65527/371472 [5:12:47<23:09:41, 3.67it/s] 18%|█▊ | 65528/371472 [5:12:47<23:03:37, 3.69it/s] 18%|█▊ | 65529/371472 [5:12:47<23:10:33, 3.67it/s] 18%|█▊ | 65530/371472 [5:12:47<23:08:06, 3.67it/s] 18%|█▊ | 65531/371472 [5:12:48<24:31:04, 3.47it/s] 18%|█▊ | 65532/371472 [5:12:48<25:32:03, 3.33it/s] 18%|█▊ | 65533/371472 [5:12:48<25:22:35, 3.35it/s] 18%|█▊ | 65534/371472 [5:12:49<25:46:51, 3.30it/s] 18%|█▊ | 65535/371472 [5:12:49<25:29:06, 3.33it/s] 18%|█▊ | 65536/371472 [5:12:49<24:31:13, 3.47it/s] 18%|█▊ | 65537/371472 [5:12:50<24:31:23, 3.47it/s] 18%|█▊ | 65538/371472 [5:12:50<23:55:33, 3.55it/s] 18%|█▊ | 65539/371472 [5:12:50<23:22:13, 3.64it/s] 18%|█▊ | 65540/371472 [5:12:50<24:35:16, 3.46it/s] {'loss': 4.2342, 'learning_rate': 8.416093861104526e-07, 'epoch': 2.82} + 18%|█▊ | 65540/371472 [5:12:50<24:35:16, 3.46it/s] 18%|█▊ | 65541/371472 [5:12:51<26:35:41, 3.20it/s] 18%|█▊ | 65542/371472 [5:12:51<24:44:57, 3.43it/s] 18%|█▊ | 65543/371472 [5:12:51<24:08:30, 3.52it/s] 18%|█▊ | 65544/371472 [5:12:52<24:30:52, 3.47it/s] 18%|█▊ | 65545/371472 [5:12:52<23:47:45, 3.57it/s] 18%|█▊ | 65546/371472 [5:12:52<24:38:30, 3.45it/s] 18%|█▊ | 65547/371472 [5:12:52<25:33:36, 3.32it/s] 18%|█▊ | 65548/371472 [5:12:53<25:43:01, 3.30it/s] 18%|█▊ | 65549/371472 [5:12:53<26:01:22, 3.27it/s] 18%|█▊ | 65550/371472 [5:12:53<26:06:04, 3.26it/s] 18%|█▊ | 65551/371472 [5:12:54<24:58:54, 3.40it/s] 18%|█▊ | 65552/371472 [5:12:54<23:47:25, 3.57it/s] 18%|█▊ | 65553/371472 [5:12:54<23:18:41, 3.65it/s] 18%|█▊ | 65554/371472 [5:12:54<24:01:44, 3.54it/s] 18%|█▊ | 65555/371472 [5:12:55<24:26:54, 3.48it/s] 18%|█▊ | 65556/371472 [5:12:55<27:01:49, 3.14it/s] 18%|█▊ | 65557/371472 [5:12:56<28:59:05, 2.93it/s] 18%|█▊ | 65558/371472 [5:12:56<26:24:31, 3.22it/s] 18%|█▊ | 65559/371472 [5:12:56<27:38:23, 3.07it/s] 18%|█▊ | 65560/371472 [5:12:56<26:53:13, 3.16it/s] {'loss': 4.0922, 'learning_rate': 8.415609041349737e-07, 'epoch': 2.82} + 18%|█▊ | 65560/371472 [5:12:56<26:53:13, 3.16it/s] 18%|█▊ | 65561/371472 [5:12:57<28:45:47, 2.95it/s] 18%|█▊ | 65562/371472 [5:12:57<27:05:07, 3.14it/s] 18%|█▊ | 65563/371472 [5:12:57<26:29:16, 3.21it/s] 18%|█▊ | 65564/371472 [5:12:58<25:25:30, 3.34it/s] 18%|█▊ | 65565/371472 [5:12:58<25:47:02, 3.30it/s] 18%|█▊ | 65566/371472 [5:12:58<25:06:10, 3.39it/s] 18%|█▊ | 65567/371472 [5:12:59<25:00:30, 3.40it/s] 18%|█▊ | 65568/371472 [5:12:59<23:42:10, 3.58it/s] 18%|█▊ | 65569/371472 [5:12:59<23:08:27, 3.67it/s] 18%|█▊ | 65570/371472 [5:12:59<24:24:35, 3.48it/s] 18%|█▊ | 65571/371472 [5:13:00<25:51:59, 3.29it/s] 18%|█▊ | 65572/371472 [5:13:00<25:34:39, 3.32it/s] 18%|█▊ | 65573/371472 [5:13:00<24:29:09, 3.47it/s] 18%|█▊ | 65574/371472 [5:13:01<25:00:13, 3.40it/s] 18%|█▊ | 65575/371472 [5:13:01<23:51:15, 3.56it/s] 18%|█▊ | 65576/371472 [5:13:01<22:58:07, 3.70it/s] 18%|█▊ | 65577/371472 [5:13:01<24:08:38, 3.52it/s] 18%|█▊ | 65578/371472 [5:13:02<24:17:37, 3.50it/s] 18%|█▊ | 65579/371472 [5:13:02<23:44:24, 3.58it/s] 18%|█▊ | 65580/371472 [5:13:02<23:09:58, 3.67it/s] {'loss': 3.9988, 'learning_rate': 8.415124221594949e-07, 'epoch': 2.82} + 18%|█▊ | 65580/371472 [5:13:02<23:09:58, 3.67it/s] 18%|█▊ | 65581/371472 [5:13:02<23:32:20, 3.61it/s] 18%|█▊ | 65582/371472 [5:13:03<22:57:27, 3.70it/s] 18%|█▊ | 65583/371472 [5:13:03<24:25:09, 3.48it/s] 18%|█▊ | 65584/371472 [5:13:03<25:08:42, 3.38it/s] 18%|█▊ | 65585/371472 [5:13:04<24:30:00, 3.47it/s] 18%|█▊ | 65586/371472 [5:13:04<24:21:37, 3.49it/s] 18%|█▊ | 65587/371472 [5:13:04<23:42:22, 3.58it/s] 18%|█▊ | 65588/371472 [5:13:04<24:28:06, 3.47it/s] 18%|█▊ | 65589/371472 [5:13:05<23:36:12, 3.60it/s] 18%|█▊ | 65590/371472 [5:13:05<25:24:09, 3.34it/s] 18%|█▊ | 65591/371472 [5:13:05<26:47:31, 3.17it/s] 18%|█▊ | 65592/371472 [5:13:06<26:49:09, 3.17it/s] 18%|█▊ | 65593/371472 [5:13:06<25:01:28, 3.40it/s] 18%|█▊ | 65594/371472 [5:13:06<26:00:47, 3.27it/s] 18%|█▊ | 65595/371472 [5:13:07<24:33:17, 3.46it/s] 18%|█▊ | 65596/371472 [5:13:07<24:22:49, 3.48it/s] 18%|█▊ | 65597/371472 [5:13:07<24:47:30, 3.43it/s] 18%|█▊ | 65598/371472 [5:13:07<23:40:08, 3.59it/s] 18%|█▊ | 65599/371472 [5:13:08<23:39:55, 3.59it/s] 18%|█▊ | 65600/371472 [5:13:08<23:17:59, 3.65it/s] {'loss': 4.2548, 'learning_rate': 8.41463940184016e-07, 'epoch': 2.83} + 18%|█▊ | 65600/371472 [5:13:08<23:17:59, 3.65it/s] 18%|█▊ | 65601/371472 [5:13:08<23:13:31, 3.66it/s] 18%|█▊ | 65602/371472 [5:13:09<23:26:07, 3.63it/s] 18%|█▊ | 65603/371472 [5:13:09<24:44:04, 3.44it/s] 18%|█▊ | 65604/371472 [5:13:09<24:02:57, 3.53it/s] 18%|█▊ | 65605/371472 [5:13:09<24:12:55, 3.51it/s] 18%|█▊ | 65606/371472 [5:13:10<24:06:37, 3.52it/s] 18%|█▊ | 65607/371472 [5:13:10<24:55:17, 3.41it/s] 18%|█▊ | 65608/371472 [5:13:10<24:39:11, 3.45it/s] 18%|█▊ | 65609/371472 [5:13:11<25:04:30, 3.39it/s] 18%|█▊ | 65610/371472 [5:13:11<24:09:14, 3.52it/s] 18%|█▊ | 65611/371472 [5:13:11<24:08:26, 3.52it/s] 18%|█▊ | 65612/371472 [5:13:11<23:22:31, 3.63it/s] 18%|█▊ | 65613/371472 [5:13:12<22:48:06, 3.73it/s] 18%|█▊ | 65614/371472 [5:13:12<22:45:45, 3.73it/s] 18%|█▊ | 65615/371472 [5:13:12<23:03:33, 3.68it/s] 18%|█▊ | 65616/371472 [5:13:12<23:15:26, 3.65it/s] 18%|█▊ | 65617/371472 [5:13:13<22:53:33, 3.71it/s] 18%|█▊ | 65618/371472 [5:13:13<24:45:19, 3.43it/s] 18%|█▊ | 65619/371472 [5:13:13<24:02:53, 3.53it/s] 18%|█▊ | 65620/371472 [5:13:14<23:04:32, 3.68it/s] {'loss': 4.024, 'learning_rate': 8.414154582085371e-07, 'epoch': 2.83} + 18%|█▊ | 65620/371472 [5:13:14<23:04:32, 3.68it/s] 18%|█▊ | 65621/371472 [5:13:14<23:45:29, 3.58it/s] 18%|█▊ | 65622/371472 [5:13:14<23:39:07, 3.59it/s] 18%|█▊ | 65623/371472 [5:13:14<23:28:01, 3.62it/s] 18%|█▊ | 65624/371472 [5:13:15<22:35:49, 3.76it/s] 18%|█▊ | 65625/371472 [5:13:15<22:35:03, 3.76it/s] 18%|█▊ | 65626/371472 [5:13:15<22:00:00, 3.86it/s] 18%|█▊ | 65627/371472 [5:13:15<21:57:30, 3.87it/s] 18%|█▊ | 65628/371472 [5:13:16<22:50:18, 3.72it/s] 18%|█▊ | 65629/371472 [5:13:16<22:33:19, 3.77it/s] 18%|█▊ | 65630/371472 [5:13:16<24:13:14, 3.51it/s] 18%|█▊ | 65631/371472 [5:13:17<25:03:16, 3.39it/s] 18%|█▊ | 65632/371472 [5:13:17<24:22:00, 3.49it/s] 18%|█▊ | 65633/371472 [5:13:17<23:54:46, 3.55it/s] 18%|█▊ | 65634/371472 [5:13:17<23:49:30, 3.57it/s] 18%|█▊ | 65635/371472 [5:13:18<23:50:33, 3.56it/s] 18%|█▊ | 65636/371472 [5:13:18<22:37:38, 3.75it/s] 18%|█▊ | 65637/371472 [5:13:18<23:11:01, 3.66it/s] 18%|█▊ | 65638/371472 [5:13:19<24:18:34, 3.49it/s] 18%|█▊ | 65639/371472 [5:13:19<23:25:29, 3.63it/s] 18%|█▊ | 65640/371472 [5:13:19<23:42:36, 3.58it/s] {'loss': 4.1746, 'learning_rate': 8.413669762330582e-07, 'epoch': 2.83} + 18%|█▊ | 65640/371472 [5:13:19<23:42:36, 3.58it/s] 18%|█▊ | 65641/371472 [5:13:19<23:20:38, 3.64it/s] 18%|█▊ | 65642/371472 [5:13:20<24:04:15, 3.53it/s] 18%|█▊ | 65643/371472 [5:13:20<23:28:19, 3.62it/s] 18%|█▊ | 65644/371472 [5:13:20<23:30:51, 3.61it/s] 18%|█▊ | 65645/371472 [5:13:20<23:50:37, 3.56it/s] 18%|█▊ | 65646/371472 [5:13:21<23:48:44, 3.57it/s] 18%|█▊ | 65647/371472 [5:13:21<23:03:03, 3.69it/s] 18%|█▊ | 65648/371472 [5:13:21<22:54:17, 3.71it/s] 18%|█▊ | 65649/371472 [5:13:22<23:03:48, 3.68it/s] 18%|█▊ | 65650/371472 [5:13:22<23:44:01, 3.58it/s] 18%|█▊ | 65651/371472 [5:13:22<23:18:51, 3.64it/s] 18%|█▊ | 65652/371472 [5:13:22<22:42:32, 3.74it/s] 18%|█▊ | 65653/371472 [5:13:23<23:14:30, 3.66it/s] 18%|█▊ | 65654/371472 [5:13:23<24:15:05, 3.50it/s] 18%|█▊ | 65655/371472 [5:13:23<23:41:25, 3.59it/s] 18%|█▊ | 65656/371472 [5:13:24<24:45:10, 3.43it/s] 18%|█▊ | 65657/371472 [5:13:24<23:43:23, 3.58it/s] 18%|█▊ | 65658/371472 [5:13:24<22:45:34, 3.73it/s] 18%|█▊ | 65659/371472 [5:13:24<22:59:21, 3.70it/s] 18%|█▊ | 65660/371472 [5:13:25<23:44:58, 3.58it/s] {'loss': 4.1492, 'learning_rate': 8.413184942575793e-07, 'epoch': 2.83} + 18%|█▊ | 65660/371472 [5:13:25<23:44:58, 3.58it/s] 18%|█▊ | 65661/371472 [5:13:25<23:29:31, 3.62it/s] 18%|█▊ | 65662/371472 [5:13:25<22:52:06, 3.71it/s] 18%|█▊ | 65663/371472 [5:13:25<22:57:41, 3.70it/s] 18%|█▊ | 65664/371472 [5:13:26<22:14:11, 3.82it/s] 18%|█▊ | 65665/371472 [5:13:26<23:59:49, 3.54it/s] 18%|█▊ | 65666/371472 [5:13:26<24:18:33, 3.49it/s] 18%|█▊ | 65667/371472 [5:13:27<23:55:42, 3.55it/s] 18%|█▊ | 65668/371472 [5:13:27<23:07:29, 3.67it/s] 18%|█▊ | 65669/371472 [5:13:27<22:57:59, 3.70it/s] 18%|█▊ | 65670/371472 [5:13:27<23:39:56, 3.59it/s] 18%|█▊ | 65671/371472 [5:13:28<25:45:48, 3.30it/s] 18%|█▊ | 65672/371472 [5:13:28<25:58:58, 3.27it/s] 18%|█▊ | 65673/371472 [5:13:28<26:28:55, 3.21it/s] 18%|█▊ | 65674/371472 [5:13:29<25:43:32, 3.30it/s] 18%|█▊ | 65675/371472 [5:13:29<26:21:58, 3.22it/s] 18%|█▊ | 65676/371472 [5:13:29<25:42:48, 3.30it/s] 18%|█▊ | 65677/371472 [5:13:30<24:24:20, 3.48it/s] 18%|█▊ | 65678/371472 [5:13:30<24:08:52, 3.52it/s] 18%|█▊ | 65679/371472 [5:13:30<24:01:21, 3.54it/s] 18%|█▊ | 65680/371472 [5:13:30<23:12:30, 3.66it/s] {'loss': 4.3199, 'learning_rate': 8.412700122821004e-07, 'epoch': 2.83} + 18%|█▊ | 65680/371472 [5:13:30<23:12:30, 3.66it/s] 18%|█▊ | 65681/371472 [5:13:31<23:44:48, 3.58it/s] 18%|█▊ | 65682/371472 [5:13:31<24:33:03, 3.46it/s] 18%|█▊ | 65683/371472 [5:13:31<27:27:55, 3.09it/s] 18%|█▊ | 65684/371472 [5:13:32<25:52:27, 3.28it/s] 18%|█▊ | 65685/371472 [5:13:32<24:04:25, 3.53it/s] 18%|█▊ | 65686/371472 [5:13:32<24:12:54, 3.51it/s] 18%|█▊ | 65687/371472 [5:13:32<23:34:08, 3.60it/s] 18%|█▊ | 65688/371472 [5:13:33<24:15:56, 3.50it/s] 18%|█▊ | 65689/371472 [5:13:33<23:48:11, 3.57it/s] 18%|█▊ | 65690/371472 [5:13:33<23:33:32, 3.61it/s] 18%|█▊ | 65691/371472 [5:13:33<23:10:05, 3.67it/s] 18%|█▊ | 65692/371472 [5:13:34<22:55:18, 3.71it/s] 18%|█▊ | 65693/371472 [5:13:34<22:33:25, 3.77it/s] 18%|█▊ | 65694/371472 [5:13:34<24:12:14, 3.51it/s] 18%|█▊ | 65695/371472 [5:13:35<25:00:33, 3.40it/s] 18%|█▊ | 65696/371472 [5:13:35<23:28:22, 3.62it/s] 18%|█▊ | 65697/371472 [5:13:35<24:03:48, 3.53it/s] 18%|█▊ | 65698/371472 [5:13:35<23:36:06, 3.60it/s] 18%|█▊ | 65699/371472 [5:13:36<23:30:51, 3.61it/s] 18%|█▊ | 65700/371472 [5:13:36<23:01:15, 3.69it/s] {'loss': 4.2033, 'learning_rate': 8.412215303066215e-07, 'epoch': 2.83} + 18%|█▊ | 65700/371472 [5:13:36<23:01:15, 3.69it/s] 18%|█▊ | 65701/371472 [5:13:36<22:44:25, 3.74it/s] 18%|█▊ | 65702/371472 [5:13:36<22:17:02, 3.81it/s] 18%|█▊ | 65703/371472 [5:13:37<21:31:57, 3.94it/s] 18%|█▊ | 65704/371472 [5:13:37<22:38:08, 3.75it/s] 18%|█▊ | 65705/371472 [5:13:38<28:47:22, 2.95it/s] 18%|█▊ | 65706/371472 [5:13:38<28:10:08, 3.02it/s] 18%|█▊ | 65707/371472 [5:13:38<26:46:58, 3.17it/s] 18%|█▊ | 65708/371472 [5:13:38<26:25:53, 3.21it/s] 18%|█▊ | 65709/371472 [5:13:39<27:44:45, 3.06it/s] 18%|█▊ | 65710/371472 [5:13:39<26:39:05, 3.19it/s] 18%|█▊ | 65711/371472 [5:13:39<25:44:27, 3.30it/s] 18%|█▊ | 65712/371472 [5:13:40<25:49:46, 3.29it/s] 18%|█▊ | 65713/371472 [5:13:40<24:46:43, 3.43it/s] 18%|█▊ | 65714/371472 [5:13:40<25:49:29, 3.29it/s] 18%|█▊ | 65715/371472 [5:13:40<24:21:26, 3.49it/s] 18%|█▊ | 65716/371472 [5:13:41<23:18:21, 3.64it/s] 18%|█▊ | 65717/371472 [5:13:41<23:30:17, 3.61it/s] 18%|█▊ | 65718/371472 [5:13:41<22:56:51, 3.70it/s] 18%|█▊ | 65719/371472 [5:13:42<22:16:30, 3.81it/s] 18%|█▊ | 65720/371472 [5:13:42<21:45:58, 3.90it/s] {'loss': 4.2661, 'learning_rate': 8.411730483311425e-07, 'epoch': 2.83} + 18%|█▊ | 65720/371472 [5:13:42<21:45:58, 3.90it/s] 18%|█▊ | 65721/371472 [5:13:42<21:36:08, 3.93it/s] 18%|█▊ | 65722/371472 [5:13:42<23:01:41, 3.69it/s] 18%|█▊ | 65723/371472 [5:13:43<23:18:54, 3.64it/s] 18%|█▊ | 65724/371472 [5:13:43<22:42:59, 3.74it/s] 18%|█▊ | 65725/371472 [5:13:43<22:20:37, 3.80it/s] 18%|█▊ | 65726/371472 [5:13:43<23:46:25, 3.57it/s] 18%|█▊ | 65727/371472 [5:13:44<23:29:25, 3.62it/s] 18%|█▊ | 65728/371472 [5:13:44<23:43:44, 3.58it/s] 18%|█▊ | 65729/371472 [5:13:44<24:06:41, 3.52it/s] 18%|█▊ | 65730/371472 [5:13:45<24:03:12, 3.53it/s] 18%|█▊ | 65731/371472 [5:13:45<24:50:08, 3.42it/s] 18%|█▊ | 65732/371472 [5:13:45<24:40:22, 3.44it/s] 18%|█▊ | 65733/371472 [5:13:45<25:43:59, 3.30it/s] 18%|█▊ | 65734/371472 [5:13:46<24:44:52, 3.43it/s] 18%|█▊ | 65735/371472 [5:13:46<23:50:16, 3.56it/s] 18%|█▊ | 65736/371472 [5:13:46<23:08:53, 3.67it/s] 18%|█▊ | 65737/371472 [5:13:47<23:08:50, 3.67it/s] 18%|█▊ | 65738/371472 [5:13:47<22:39:47, 3.75it/s] 18%|█▊ | 65739/371472 [5:13:47<22:47:38, 3.73it/s] 18%|█▊ | 65740/371472 [5:13:47<22:16:54, 3.81it/s] {'loss': 4.1234, 'learning_rate': 8.411245663556638e-07, 'epoch': 2.83} + 18%|█▊ | 65740/371472 [5:13:47<22:16:54, 3.81it/s] 18%|█▊ | 65741/371472 [5:13:48<24:45:13, 3.43it/s] 18%|█▊ | 65742/371472 [5:13:48<24:47:04, 3.43it/s] 18%|█▊ | 65743/371472 [5:13:48<26:36:39, 3.19it/s] 18%|█▊ | 65744/371472 [5:13:49<26:23:35, 3.22it/s] 18%|█▊ | 65745/371472 [5:13:49<24:52:33, 3.41it/s] 18%|█▊ | 65746/371472 [5:13:49<24:27:55, 3.47it/s] 18%|█▊ | 65747/371472 [5:13:49<24:40:24, 3.44it/s] 18%|█▊ | 65748/371472 [5:13:50<23:25:25, 3.63it/s] 18%|█▊ | 65749/371472 [5:13:50<22:48:10, 3.72it/s] 18%|█▊ | 65750/371472 [5:13:50<22:16:11, 3.81it/s] 18%|█▊ | 65751/371472 [5:13:51<23:36:43, 3.60it/s] 18%|█▊ | 65752/371472 [5:13:51<23:15:53, 3.65it/s] 18%|█▊ | 65753/371472 [5:13:51<22:35:33, 3.76it/s] 18%|█▊ | 65754/371472 [5:13:51<22:19:02, 3.81it/s] 18%|█▊ | 65755/371472 [5:13:52<21:56:03, 3.87it/s] 18%|█▊ | 65756/371472 [5:13:52<22:32:46, 3.77it/s] 18%|█▊ | 65757/371472 [5:13:52<22:06:28, 3.84it/s] 18%|█▊ | 65758/371472 [5:13:52<23:26:46, 3.62it/s] 18%|█▊ | 65759/371472 [5:13:53<26:11:05, 3.24it/s] 18%|█▊ | 65760/371472 [5:13:53<25:40:46, 3.31it/s] {'loss': 3.9953, 'learning_rate': 8.410760843801849e-07, 'epoch': 2.83} + 18%|█▊ | 65760/371472 [5:13:53<25:40:46, 3.31it/s] 18%|█▊ | 65761/371472 [5:13:53<27:16:40, 3.11it/s] 18%|█▊ | 65762/371472 [5:13:54<26:21:34, 3.22it/s] 18%|█▊ | 65763/371472 [5:13:54<25:24:11, 3.34it/s] 18%|█▊ | 65764/371472 [5:13:54<24:56:33, 3.40it/s] 18%|█▊ | 65765/371472 [5:13:55<27:30:07, 3.09it/s] 18%|█▊ | 65766/371472 [5:13:55<26:21:32, 3.22it/s] 18%|█▊ | 65767/371472 [5:13:55<27:56:22, 3.04it/s] 18%|█▊ | 65768/371472 [5:13:56<26:35:36, 3.19it/s] 18%|█▊ | 65769/371472 [5:13:56<27:09:00, 3.13it/s] 18%|█▊ | 65770/371472 [5:13:56<25:30:29, 3.33it/s] 18%|█▊ | 65771/371472 [5:13:56<24:19:06, 3.49it/s] 18%|█▊ | 65772/371472 [5:13:57<24:37:10, 3.45it/s] 18%|█▊ | 65773/371472 [5:13:57<25:06:10, 3.38it/s] 18%|█▊ | 65774/371472 [5:13:57<25:09:35, 3.38it/s] 18%|█▊ | 65775/371472 [5:13:58<24:25:45, 3.48it/s] 18%|█▊ | 65776/371472 [5:13:58<23:37:44, 3.59it/s] 18%|█▊ | 65777/371472 [5:13:58<24:29:46, 3.47it/s] 18%|█▊ | 65778/371472 [5:13:58<23:46:53, 3.57it/s] 18%|█▊ | 65779/371472 [5:13:59<23:12:52, 3.66it/s] 18%|█▊ | 65780/371472 [5:13:59<22:26:16, 3.78it/s] {'loss': 3.9306, 'learning_rate': 8.41027602404706e-07, 'epoch': 2.83} + 18%|█▊ | 65780/371472 [5:13:59<22:26:16, 3.78it/s] 18%|█▊ | 65781/371472 [5:13:59<22:34:56, 3.76it/s] 18%|█▊ | 65782/371472 [5:14:00<24:59:37, 3.40it/s] 18%|█▊ | 65783/371472 [5:14:00<27:56:00, 3.04it/s] 18%|█▊ | 65784/371472 [5:14:00<25:53:08, 3.28it/s] 18%|█▊ | 65785/371472 [5:14:00<24:42:21, 3.44it/s] 18%|█▊ | 65786/371472 [5:14:01<24:38:37, 3.45it/s] 18%|█▊ | 65787/371472 [5:14:01<24:09:35, 3.51it/s] 18%|█▊ | 65788/371472 [5:14:01<23:27:59, 3.62it/s] 18%|█▊ | 65789/371472 [5:14:02<23:34:49, 3.60it/s] 18%|█▊ | 65790/371472 [5:14:02<22:48:35, 3.72it/s] 18%|█▊ | 65791/371472 [5:14:02<23:53:08, 3.55it/s] 18%|█▊ | 65792/371472 [5:14:02<22:50:07, 3.72it/s] 18%|█▊ | 65793/371472 [5:14:03<24:53:24, 3.41it/s] 18%|█▊ | 65794/371472 [5:14:03<24:14:49, 3.50it/s] 18%|█▊ | 65795/371472 [5:14:03<24:04:35, 3.53it/s] 18%|█▊ | 65796/371472 [5:14:04<23:32:23, 3.61it/s] 18%|█▊ | 65797/371472 [5:14:04<23:34:50, 3.60it/s] 18%|█▊ | 65798/371472 [5:14:04<23:20:17, 3.64it/s] 18%|█▊ | 65799/371472 [5:14:04<25:10:29, 3.37it/s] 18%|█▊ | 65800/371472 [5:14:05<24:08:04, 3.52it/s] {'loss': 4.3457, 'learning_rate': 8.40979120429227e-07, 'epoch': 2.83} + 18%|█▊ | 65800/371472 [5:14:05<24:08:04, 3.52it/s] 18%|█▊ | 65801/371472 [5:14:05<26:10:02, 3.24it/s] 18%|█▊ | 65802/371472 [5:14:05<25:21:41, 3.35it/s] 18%|█▊ | 65803/371472 [5:14:06<24:06:16, 3.52it/s] 18%|█▊ | 65804/371472 [5:14:06<24:12:42, 3.51it/s] 18%|█▊ | 65805/371472 [5:14:06<22:49:42, 3.72it/s] 18%|█▊ | 65806/371472 [5:14:06<22:50:47, 3.72it/s] 18%|█▊ | 65807/371472 [5:14:07<22:43:21, 3.74it/s] 18%|█▊ | 65808/371472 [5:14:07<22:47:15, 3.73it/s] 18%|█▊ | 65809/371472 [5:14:07<22:58:57, 3.69it/s] 18%|█▊ | 65810/371472 [5:14:07<23:52:58, 3.56it/s] 18%|█▊ | 65811/371472 [5:14:08<23:33:04, 3.61it/s] 18%|█▊ | 65812/371472 [5:14:08<24:23:23, 3.48it/s] 18%|█▊ | 65813/371472 [5:14:08<25:11:51, 3.37it/s] 18%|█▊ | 65814/371472 [5:14:09<24:36:02, 3.45it/s] 18%|█▊ | 65815/371472 [5:14:09<24:51:13, 3.42it/s] 18%|█▊ | 65816/371472 [5:14:09<24:11:51, 3.51it/s] 18%|█▊ | 65817/371472 [5:14:09<23:12:00, 3.66it/s] 18%|█▊ | 65818/371472 [5:14:10<23:18:44, 3.64it/s] 18%|█▊ | 65819/371472 [5:14:10<23:57:52, 3.54it/s] 18%|█▊ | 65820/371472 [5:14:10<23:22:23, 3.63it/s] {'loss': 4.3391, 'learning_rate': 8.409306384537481e-07, 'epoch': 2.83} + 18%|█▊ | 65820/371472 [5:14:10<23:22:23, 3.63it/s] 18%|█▊ | 65821/371472 [5:14:11<23:19:41, 3.64it/s] 18%|█▊ | 65822/371472 [5:14:11<23:51:39, 3.56it/s] 18%|█▊ | 65823/371472 [5:14:11<22:55:31, 3.70it/s] 18%|█▊ | 65824/371472 [5:14:11<23:19:52, 3.64it/s] 18%|█▊ | 65825/371472 [5:14:12<23:33:36, 3.60it/s] 18%|█▊ | 65826/371472 [5:14:12<25:21:23, 3.35it/s] 18%|█▊ | 65827/371472 [5:14:12<25:13:15, 3.37it/s] 18%|█▊ | 65828/371472 [5:14:13<24:58:41, 3.40it/s] 18%|█▊ | 65829/371472 [5:14:13<24:10:42, 3.51it/s] 18%|█▊ | 65830/371472 [5:14:13<22:55:56, 3.70it/s] 18%|█▊ | 65831/371472 [5:14:13<23:56:28, 3.55it/s] 18%|█▊ | 65832/371472 [5:14:14<24:11:03, 3.51it/s] 18%|█▊ | 65833/371472 [5:14:14<25:26:17, 3.34it/s] 18%|█▊ | 65834/371472 [5:14:14<25:32:05, 3.32it/s] 18%|█▊ | 65835/371472 [5:14:15<23:55:37, 3.55it/s] 18%|█▊ | 65836/371472 [5:14:15<23:06:09, 3.67it/s] 18%|█▊ | 65837/371472 [5:14:15<23:21:52, 3.63it/s] 18%|█▊ | 65838/371472 [5:14:15<25:36:41, 3.31it/s] 18%|█▊ | 65839/371472 [5:14:16<23:59:25, 3.54it/s] 18%|█▊ | 65840/371472 [5:14:16<23:23:30, 3.63it/s] {'loss': 4.1781, 'learning_rate': 8.408821564782692e-07, 'epoch': 2.84} + 18%|█▊ | 65840/371472 [5:14:16<23:23:30, 3.63it/s] 18%|█▊ | 65841/371472 [5:14:16<22:47:05, 3.73it/s] 18%|█▊ | 65842/371472 [5:14:16<22:29:30, 3.77it/s] 18%|█▊ | 65843/371472 [5:14:17<22:33:20, 3.76it/s] 18%|█▊ | 65844/371472 [5:14:17<23:58:46, 3.54it/s] 18%|█▊ | 65845/371472 [5:14:17<23:01:16, 3.69it/s] 18%|█▊ | 65846/371472 [5:14:18<22:42:44, 3.74it/s] 18%|█▊ | 65847/371472 [5:14:18<22:31:34, 3.77it/s] 18%|█▊ | 65848/371472 [5:14:18<22:48:16, 3.72it/s] 18%|█▊ | 65849/371472 [5:14:18<23:16:28, 3.65it/s] 18%|█▊ | 65850/371472 [5:14:19<23:57:07, 3.54it/s] 18%|█▊ | 65851/371472 [5:14:19<23:25:38, 3.62it/s] 18%|█▊ | 65852/371472 [5:14:19<23:37:17, 3.59it/s] 18%|█▊ | 65853/371472 [5:14:20<26:43:13, 3.18it/s] 18%|█▊ | 65854/371472 [5:14:20<26:38:26, 3.19it/s] 18%|█▊ | 65855/371472 [5:14:20<25:06:47, 3.38it/s] 18%|█▊ | 65856/371472 [5:14:20<24:47:48, 3.42it/s] 18%|█▊ | 65857/371472 [5:14:21<24:29:07, 3.47it/s] 18%|█▊ | 65858/371472 [5:14:21<25:34:13, 3.32it/s] 18%|█▊ | 65859/371472 [5:14:21<24:19:09, 3.49it/s] 18%|█▊ | 65860/371472 [5:14:22<23:15:45, 3.65it/s] {'loss': 4.1102, 'learning_rate': 8.408336745027904e-07, 'epoch': 2.84} + 18%|█▊ | 65860/371472 [5:14:22<23:15:45, 3.65it/s] 18%|█▊ | 65861/371472 [5:14:22<24:54:13, 3.41it/s] 18%|█▊ | 65862/371472 [5:14:22<25:00:40, 3.39it/s] 18%|█▊ | 65863/371472 [5:14:22<24:17:39, 3.49it/s] 18%|█▊ | 65864/371472 [5:14:23<26:07:35, 3.25it/s] 18%|█▊ | 65865/371472 [5:14:23<25:33:50, 3.32it/s] 18%|█▊ | 65866/371472 [5:14:23<24:42:13, 3.44it/s] 18%|█▊ | 65867/371472 [5:14:24<27:22:37, 3.10it/s] 18%|█▊ | 65868/371472 [5:14:24<26:21:03, 3.22it/s] 18%|█▊ | 65869/371472 [5:14:24<25:55:58, 3.27it/s] 18%|█▊ | 65870/371472 [5:14:25<26:21:03, 3.22it/s] 18%|█▊ | 65871/371472 [5:14:25<26:23:03, 3.22it/s] 18%|█▊ | 65872/371472 [5:14:25<25:05:40, 3.38it/s] 18%|█▊ | 65873/371472 [5:14:26<25:06:10, 3.38it/s] 18%|█▊ | 65874/371472 [5:14:26<24:58:32, 3.40it/s] 18%|█▊ | 65875/371472 [5:14:26<24:07:06, 3.52it/s] 18%|█▊ | 65876/371472 [5:14:26<23:25:31, 3.62it/s] 18%|█▊ | 65877/371472 [5:14:27<22:18:32, 3.81it/s] 18%|█▊ | 65878/371472 [5:14:27<23:05:31, 3.68it/s] 18%|█▊ | 65879/371472 [5:14:27<22:50:32, 3.72it/s] 18%|█▊ | 65880/371472 [5:14:27<21:53:34, 3.88it/s] {'loss': 4.2409, 'learning_rate': 8.407851925273115e-07, 'epoch': 2.84} + 18%|█▊ | 65880/371472 [5:14:27<21:53:34, 3.88it/s] 18%|█▊ | 65881/371472 [5:14:28<21:56:51, 3.87it/s] 18%|█▊ | 65882/371472 [5:14:28<22:39:19, 3.75it/s] 18%|█▊ | 65883/371472 [5:14:28<23:10:02, 3.66it/s] 18%|█▊ | 65884/371472 [5:14:29<24:32:20, 3.46it/s] 18%|█▊ | 65885/371472 [5:14:29<23:23:25, 3.63it/s] 18%|█▊ | 65886/371472 [5:14:29<23:00:40, 3.69it/s] 18%|█▊ | 65887/371472 [5:14:29<22:15:32, 3.81it/s] 18%|█▊ | 65888/371472 [5:14:30<22:08:44, 3.83it/s] 18%|█▊ | 65889/371472 [5:14:30<21:53:25, 3.88it/s] 18%|█▊ | 65890/371472 [5:14:30<23:10:15, 3.66it/s] 18%|█▊ | 65891/371472 [5:14:30<23:11:55, 3.66it/s] 18%|█▊ | 65892/371472 [5:14:31<24:44:07, 3.43it/s] 18%|█▊ | 65893/371472 [5:14:31<26:08:56, 3.25it/s] 18%|█▊ | 65894/371472 [5:14:31<25:31:11, 3.33it/s] 18%|█▊ | 65895/371472 [5:14:32<26:07:12, 3.25it/s] 18%|█▊ | 65896/371472 [5:14:32<26:33:37, 3.20it/s] 18%|█▊ | 65897/371472 [5:14:32<25:16:24, 3.36it/s] 18%|█▊ | 65898/371472 [5:14:33<24:53:40, 3.41it/s] 18%|█▊ | 65899/371472 [5:14:33<23:52:16, 3.56it/s] 18%|█▊ | 65900/371472 [5:14:33<23:04:17, 3.68it/s] {'loss': 4.28, 'learning_rate': 8.407367105518326e-07, 'epoch': 2.84} + 18%|█▊ | 65900/371472 [5:14:33<23:04:17, 3.68it/s] 18%|█▊ | 65901/371472 [5:14:33<24:42:49, 3.43it/s] 18%|█▊ | 65902/371472 [5:14:34<23:34:04, 3.60it/s] 18%|█▊ | 65903/371472 [5:14:34<23:01:10, 3.69it/s] 18%|█▊ | 65904/371472 [5:14:34<24:06:08, 3.52it/s] 18%|█▊ | 65905/371472 [5:14:34<24:03:55, 3.53it/s] 18%|█▊ | 65906/371472 [5:14:35<23:05:21, 3.68it/s] 18%|█▊ | 65907/371472 [5:14:35<22:50:01, 3.72it/s] 18%|█▊ | 65908/371472 [5:14:35<23:17:11, 3.64it/s] 18%|█▊ | 65909/371472 [5:14:36<22:48:20, 3.72it/s] 18%|█▊ | 65910/371472 [5:14:36<22:37:03, 3.75it/s] 18%|█▊ | 65911/371472 [5:14:36<22:25:38, 3.78it/s] 18%|█▊ | 65912/371472 [5:14:36<22:23:17, 3.79it/s] 18%|█▊ | 65913/371472 [5:14:37<22:03:38, 3.85it/s] 18%|█▊ | 65914/371472 [5:14:37<25:04:21, 3.39it/s] 18%|█▊ | 65915/371472 [5:14:37<25:09:51, 3.37it/s] 18%|█▊ | 65916/371472 [5:14:38<24:26:11, 3.47it/s] 18%|█▊ | 65917/371472 [5:14:38<23:37:09, 3.59it/s] 18%|█▊ | 65918/371472 [5:14:38<23:10:21, 3.66it/s] 18%|█▊ | 65919/371472 [5:14:38<23:05:34, 3.68it/s] 18%|█▊ | 65920/371472 [5:14:39<22:41:10, 3.74it/s] {'loss': 4.5097, 'learning_rate': 8.406882285763536e-07, 'epoch': 2.84} + 18%|█▊ | 65920/371472 [5:14:39<22:41:10, 3.74it/s] 18%|█▊ | 65921/371472 [5:14:39<22:12:15, 3.82it/s] 18%|█▊ | 65922/371472 [5:14:39<23:07:39, 3.67it/s] 18%|█▊ | 65923/371472 [5:14:39<22:06:03, 3.84it/s] 18%|█▊ | 65924/371472 [5:14:40<23:44:02, 3.58it/s] 18%|█▊ | 65925/371472 [5:14:40<22:31:11, 3.77it/s] 18%|█▊ | 65926/371472 [5:14:40<24:09:19, 3.51it/s] 18%|█▊ | 65927/371472 [5:14:41<24:07:50, 3.52it/s] 18%|█▊ | 65928/371472 [5:14:41<23:54:11, 3.55it/s] 18%|█▊ | 65929/371472 [5:14:41<23:24:37, 3.63it/s] 18%|█▊ | 65930/371472 [5:14:41<23:18:31, 3.64it/s] 18%|█▊ | 65931/371472 [5:14:42<25:18:58, 3.35it/s] 18%|█▊ | 65932/371472 [5:14:42<24:32:01, 3.46it/s] 18%|█▊ | 65933/371472 [5:14:42<23:24:38, 3.63it/s] 18%|█▊ | 65934/371472 [5:14:42<23:49:56, 3.56it/s] 18%|█▊ | 65935/371472 [5:14:43<24:19:07, 3.49it/s] 18%|█▊ | 65936/371472 [5:14:43<23:58:39, 3.54it/s] 18%|█▊ | 65937/371472 [5:14:43<24:04:28, 3.53it/s] 18%|█▊ | 65938/371472 [5:14:44<23:18:42, 3.64it/s] 18%|█▊ | 65939/371472 [5:14:44<22:31:35, 3.77it/s] 18%|█▊ | 65940/371472 [5:14:44<22:21:44, 3.80it/s] {'loss': 4.4996, 'learning_rate': 8.406397466008748e-07, 'epoch': 2.84} + 18%|█▊ | 65940/371472 [5:14:44<22:21:44, 3.80it/s] 18%|█▊ | 65941/371472 [5:14:44<22:45:02, 3.73it/s] 18%|█▊ | 65942/371472 [5:14:45<22:40:37, 3.74it/s] 18%|█▊ | 65943/371472 [5:14:45<27:31:39, 3.08it/s] 18%|█▊ | 65944/371472 [5:14:45<25:56:32, 3.27it/s] 18%|█▊ | 65945/371472 [5:14:46<25:01:32, 3.39it/s] 18%|█▊ | 65946/371472 [5:14:46<24:15:18, 3.50it/s] 18%|█▊ | 65947/371472 [5:14:46<24:00:43, 3.53it/s] 18%|█▊ | 65948/371472 [5:14:46<23:48:22, 3.56it/s] 18%|█▊ | 65949/371472 [5:14:47<24:54:27, 3.41it/s] 18%|█▊ | 65950/371472 [5:14:47<25:14:18, 3.36it/s] 18%|█▊ | 65951/371472 [5:14:47<24:52:01, 3.41it/s] 18%|█▊ | 65952/371472 [5:14:48<25:26:50, 3.33it/s] 18%|█▊ | 65953/371472 [5:14:48<24:47:05, 3.42it/s] 18%|█▊ | 65954/371472 [5:14:48<23:50:16, 3.56it/s] 18%|█▊ | 65955/371472 [5:14:48<23:17:29, 3.64it/s] 18%|█▊ | 65956/371472 [5:14:49<24:51:34, 3.41it/s] 18%|█▊ | 65957/371472 [5:14:49<23:32:13, 3.61it/s] 18%|█▊ | 65958/371472 [5:14:49<22:59:06, 3.69it/s] 18%|█▊ | 65959/371472 [5:14:50<22:47:51, 3.72it/s] 18%|█▊ | 65960/371472 [5:14:50<22:40:12, 3.74it/s] {'loss': 4.5238, 'learning_rate': 8.405912646253959e-07, 'epoch': 2.84} + 18%|█▊ | 65960/371472 [5:14:50<22:40:12, 3.74it/s] 18%|█▊ | 65961/371472 [5:14:50<22:56:00, 3.70it/s] 18%|█▊ | 65962/371472 [5:14:50<22:34:03, 3.76it/s] 18%|█▊ | 65963/371472 [5:14:51<24:44:45, 3.43it/s] 18%|█▊ | 65964/371472 [5:14:51<24:58:22, 3.40it/s] 18%|█▊ | 65965/371472 [5:14:51<24:39:44, 3.44it/s] 18%|█▊ | 65966/371472 [5:14:52<26:10:05, 3.24it/s] 18%|█▊ | 65967/371472 [5:14:52<25:14:00, 3.36it/s] 18%|█▊ | 65968/371472 [5:14:52<24:56:22, 3.40it/s] 18%|█▊ | 65969/371472 [5:14:53<25:17:37, 3.36it/s] 18%|█▊ | 65970/371472 [5:14:53<24:04:41, 3.52it/s] 18%|█▊ | 65971/371472 [5:14:53<23:54:45, 3.55it/s] 18%|█▊ | 65972/371472 [5:14:53<24:13:21, 3.50it/s] 18%|█▊ | 65973/371472 [5:14:54<24:08:42, 3.51it/s] 18%|█▊ | 65974/371472 [5:14:54<23:31:54, 3.61it/s] 18%|█▊ | 65975/371472 [5:14:54<22:30:50, 3.77it/s] 18%|█▊ | 65976/371472 [5:14:54<22:36:00, 3.75it/s] 18%|█▊ | 65977/371472 [5:14:55<22:35:45, 3.76it/s] 18%|█▊ | 65978/371472 [5:14:55<22:03:22, 3.85it/s] 18%|█▊ | 65979/371472 [5:14:55<22:02:00, 3.85it/s] 18%|█▊ | 65980/371472 [5:14:55<22:33:50, 3.76it/s] {'loss': 4.374, 'learning_rate': 8.40542782649917e-07, 'epoch': 2.84} + 18%|█▊ | 65980/371472 [5:14:55<22:33:50, 3.76it/s] 18%|█▊ | 65981/371472 [5:14:56<23:27:02, 3.62it/s] 18%|█▊ | 65982/371472 [5:14:56<22:46:28, 3.73it/s] 18%|█▊ | 65983/371472 [5:14:56<22:19:36, 3.80it/s] 18%|█▊ | 65984/371472 [5:14:57<23:32:01, 3.61it/s] 18%|█▊ | 65985/371472 [5:14:57<23:04:09, 3.68it/s] 18%|█▊ | 65986/371472 [5:14:57<23:08:26, 3.67it/s] 18%|█▊ | 65987/371472 [5:14:57<22:49:59, 3.72it/s] 18%|█▊ | 65988/371472 [5:14:58<23:35:48, 3.60it/s] 18%|█▊ | 65989/371472 [5:14:58<22:49:45, 3.72it/s] 18%|█▊ | 65990/371472 [5:14:58<23:05:40, 3.67it/s] 18%|█▊ | 65991/371472 [5:14:58<23:14:56, 3.65it/s] 18%|█▊ | 65992/371472 [5:14:59<22:44:04, 3.73it/s] 18%|█▊ | 65993/371472 [5:14:59<23:05:21, 3.68it/s] 18%|█▊ | 65994/371472 [5:14:59<22:51:19, 3.71it/s] 18%|█▊ | 65995/371472 [5:14:59<22:28:10, 3.78it/s] 18%|█▊ | 65996/371472 [5:15:00<24:05:00, 3.52it/s] 18%|█▊ | 65997/371472 [5:15:00<23:30:52, 3.61it/s] 18%|█▊ | 65998/371472 [5:15:00<25:06:13, 3.38it/s] 18%|█▊ | 65999/371472 [5:15:01<25:25:52, 3.34it/s] 18%|█▊ | 66000/371472 [5:15:01<24:22:53, 3.48it/s] {'loss': 4.3341, 'learning_rate': 8.404943006744381e-07, 'epoch': 2.84} + 18%|█▊ | 66000/371472 [5:15:01<24:22:53, 3.48it/s] 18%|█▊ | 66001/371472 [5:15:01<25:01:19, 3.39it/s] 18%|█▊ | 66002/371472 [5:15:02<24:45:08, 3.43it/s] 18%|█▊ | 66003/371472 [5:15:02<23:50:54, 3.56it/s] 18%|█▊ | 66004/371472 [5:15:02<23:29:18, 3.61it/s] 18%|█▊ | 66005/371472 [5:15:02<23:21:24, 3.63it/s] 18%|█▊ | 66006/371472 [5:15:03<23:18:02, 3.64it/s] 18%|█▊ | 66007/371472 [5:15:03<23:52:06, 3.55it/s] 18%|█▊ | 66008/371472 [5:15:03<23:47:29, 3.57it/s] 18%|█▊ | 66009/371472 [5:15:03<22:58:23, 3.69it/s] 18%|█▊ | 66010/371472 [5:15:04<22:24:17, 3.79it/s] 18%|█▊ | 66011/371472 [5:15:04<22:58:27, 3.69it/s] 18%|█▊ | 66012/371472 [5:15:04<24:52:40, 3.41it/s] 18%|█▊ | 66013/371472 [5:15:05<24:03:09, 3.53it/s] 18%|█▊ | 66014/371472 [5:15:05<24:00:14, 3.53it/s] 18%|█▊ | 66015/371472 [5:15:05<22:35:42, 3.76it/s] 18%|█▊ | 66016/371472 [5:15:05<22:22:53, 3.79it/s] 18%|█▊ | 66017/371472 [5:15:06<22:15:30, 3.81it/s] 18%|█▊ | 66018/371472 [5:15:06<24:08:00, 3.52it/s] 18%|█▊ | 66019/371472 [5:15:06<22:53:08, 3.71it/s] 18%|█▊ | 66020/371472 [5:15:06<22:50:08, 3.72it/s] {'loss': 4.2999, 'learning_rate': 8.404458186989593e-07, 'epoch': 2.84} + 18%|█▊ | 66020/371472 [5:15:06<22:50:08, 3.72it/s] 18%|█▊ | 66021/371472 [5:15:07<23:08:10, 3.67it/s] 18%|█▊ | 66022/371472 [5:15:07<23:47:14, 3.57it/s] 18%|█▊ | 66023/371472 [5:15:07<23:24:20, 3.63it/s] 18%|█▊ | 66024/371472 [5:15:08<23:49:45, 3.56it/s] 18%|█▊ | 66025/371472 [5:15:08<24:13:25, 3.50it/s] 18%|█▊ | 66026/371472 [5:15:08<25:16:00, 3.36it/s] 18%|█▊ | 66027/371472 [5:15:08<24:13:54, 3.50it/s] 18%|█▊ | 66028/371472 [5:15:09<24:42:38, 3.43it/s] 18%|█▊ | 66029/371472 [5:15:09<24:26:24, 3.47it/s] 18%|█▊ | 66030/371472 [5:15:09<24:09:08, 3.51it/s] 18%|█▊ | 66031/371472 [5:15:10<23:48:37, 3.56it/s] 18%|█▊ | 66032/371472 [5:15:10<26:48:10, 3.17it/s] 18%|█▊ | 66033/371472 [5:15:10<25:45:57, 3.29it/s] 18%|█▊ | 66034/371472 [5:15:11<24:22:00, 3.48it/s] 18%|█▊ | 66035/371472 [5:15:11<24:46:17, 3.43it/s] 18%|█▊ | 66036/371472 [5:15:11<24:48:26, 3.42it/s] 18%|█▊ | 66037/371472 [5:15:11<24:50:38, 3.42it/s] 18%|█▊ | 66038/371472 [5:15:12<23:25:40, 3.62it/s] 18%|█▊ | 66039/371472 [5:15:12<24:56:07, 3.40it/s] 18%|█▊ | 66040/371472 [5:15:12<25:29:38, 3.33it/s] {'loss': 4.0023, 'learning_rate': 8.403973367234803e-07, 'epoch': 2.84} + 18%|█▊ | 66040/371472 [5:15:12<25:29:38, 3.33it/s] 18%|█▊ | 66041/371472 [5:15:13<24:13:51, 3.50it/s] 18%|█▊ | 66042/371472 [5:15:13<23:16:32, 3.65it/s] 18%|█▊ | 66043/371472 [5:15:13<24:02:58, 3.53it/s] 18%|█▊ | 66044/371472 [5:15:13<23:15:32, 3.65it/s] 18%|█▊ | 66045/371472 [5:15:14<22:54:47, 3.70it/s] 18%|█▊ | 66046/371472 [5:15:14<22:34:12, 3.76it/s] 18%|█▊ | 66047/371472 [5:15:14<22:27:50, 3.78it/s] 18%|█▊ | 66048/371472 [5:15:14<22:24:21, 3.79it/s] 18%|█▊ | 66049/371472 [5:15:15<22:02:29, 3.85it/s] 18%|█▊ | 66050/371472 [5:15:15<21:47:03, 3.89it/s] 18%|█▊ | 66051/371472 [5:15:15<21:48:26, 3.89it/s] 18%|█▊ | 66052/371472 [5:15:15<22:18:41, 3.80it/s] 18%|█▊ | 66053/371472 [5:15:16<27:57:53, 3.03it/s] 18%|█▊ | 66054/371472 [5:15:16<28:51:41, 2.94it/s] 18%|█▊ | 66055/371472 [5:15:17<27:52:59, 3.04it/s] 18%|█▊ | 66056/371472 [5:15:17<26:01:24, 3.26it/s] 18%|█▊ | 66057/371472 [5:15:17<24:47:03, 3.42it/s] 18%|█▊ | 66058/371472 [5:15:17<24:13:01, 3.50it/s] 18%|█▊ | 66059/371472 [5:15:18<24:37:32, 3.45it/s] 18%|█▊ | 66060/371472 [5:15:18<24:30:11, 3.46it/s] {'loss': 4.3215, 'learning_rate': 8.403488547480014e-07, 'epoch': 2.85} + 18%|█▊ | 66060/371472 [5:15:18<24:30:11, 3.46it/s] 18%|█▊ | 66061/371472 [5:15:18<24:38:55, 3.44it/s] 18%|█▊ | 66062/371472 [5:15:19<24:29:08, 3.46it/s] 18%|█▊ | 66063/371472 [5:15:19<24:22:41, 3.48it/s] 18%|█▊ | 66064/371472 [5:15:19<23:34:07, 3.60it/s] 18%|█▊ | 66065/371472 [5:15:19<24:05:58, 3.52it/s] 18%|█▊ | 66066/371472 [5:15:20<24:42:14, 3.43it/s] 18%|█▊ | 66067/371472 [5:15:20<24:04:41, 3.52it/s] 18%|█▊ | 66068/371472 [5:15:20<23:08:37, 3.67it/s] 18%|█▊ | 66069/371472 [5:15:21<23:51:50, 3.55it/s] 18%|█▊ | 66070/371472 [5:15:21<22:29:04, 3.77it/s] 18%|█▊ | 66071/371472 [5:15:21<23:31:20, 3.61it/s] 18%|█▊ | 66072/371472 [5:15:21<23:05:07, 3.67it/s] 18%|█▊ | 66073/371472 [5:15:22<25:13:57, 3.36it/s] 18%|█▊ | 66074/371472 [5:15:22<24:36:31, 3.45it/s] 18%|█▊ | 66075/371472 [5:15:22<24:05:09, 3.52it/s] 18%|█▊ | 66076/371472 [5:15:23<24:09:01, 3.51it/s] 18%|█▊ | 66077/371472 [5:15:23<23:09:17, 3.66it/s] 18%|█▊ | 66078/371472 [5:15:23<25:39:54, 3.31it/s] 18%|█▊ | 66079/371472 [5:15:23<26:36:01, 3.19it/s] 18%|█▊ | 66080/371472 [5:15:24<24:48:43, 3.42it/s] {'loss': 4.4189, 'learning_rate': 8.403003727725225e-07, 'epoch': 2.85} + 18%|█▊ | 66080/371472 [5:15:24<24:48:43, 3.42it/s] 18%|█▊ | 66081/371472 [5:15:24<25:21:00, 3.35it/s] 18%|█▊ | 66082/371472 [5:15:24<26:56:02, 3.15it/s] 18%|█▊ | 66083/371472 [5:15:25<25:11:42, 3.37it/s] 18%|█▊ | 66084/371472 [5:15:25<24:48:48, 3.42it/s] 18%|█▊ | 66085/371472 [5:15:25<24:44:05, 3.43it/s] 18%|█▊ | 66086/371472 [5:15:25<24:09:22, 3.51it/s] 18%|█▊ | 66087/371472 [5:15:26<24:40:15, 3.44it/s] 18%|█▊ | 66088/371472 [5:15:26<23:52:31, 3.55it/s] 18%|█▊ | 66089/371472 [5:15:26<25:37:56, 3.31it/s] 18%|█▊ | 66090/371472 [5:15:27<25:49:10, 3.29it/s] 18%|█▊ | 66091/371472 [5:15:27<24:54:53, 3.40it/s] 18%|█▊ | 66092/371472 [5:15:27<27:45:17, 3.06it/s] 18%|█▊ | 66093/371472 [5:15:28<27:28:42, 3.09it/s] 18%|█▊ | 66094/371472 [5:15:28<26:29:23, 3.20it/s] 18%|█▊ | 66095/371472 [5:15:28<28:03:53, 3.02it/s] 18%|█▊ | 66096/371472 [5:15:29<26:49:57, 3.16it/s] 18%|█▊ | 66097/371472 [5:15:29<26:40:36, 3.18it/s] 18%|█▊ | 66098/371472 [5:15:29<26:23:56, 3.21it/s] 18%|█▊ | 66099/371472 [5:15:29<24:55:39, 3.40it/s] 18%|█▊ | 66100/371472 [5:15:30<24:28:05, 3.47it/s] {'loss': 4.0156, 'learning_rate': 8.402518907970436e-07, 'epoch': 2.85} + 18%|█▊ | 66100/371472 [5:15:30<24:28:05, 3.47it/s] 18%|█▊ | 66101/371472 [5:15:30<24:29:41, 3.46it/s] 18%|█▊ | 66102/371472 [5:15:30<23:38:30, 3.59it/s] 18%|█▊ | 66103/371472 [5:15:31<23:50:43, 3.56it/s] 18%|█▊ | 66104/371472 [5:15:31<23:10:49, 3.66it/s] 18%|█▊ | 66105/371472 [5:15:31<22:50:45, 3.71it/s] 18%|█▊ | 66106/371472 [5:15:31<23:14:10, 3.65it/s] 18%|█▊ | 66107/371472 [5:15:32<22:31:08, 3.77it/s] 18%|█▊ | 66108/371472 [5:15:32<22:16:50, 3.81it/s] 18%|█▊ | 66109/371472 [5:15:32<22:34:57, 3.76it/s] 18%|█▊ | 66110/371472 [5:15:33<24:18:27, 3.49it/s] 18%|█▊ | 66111/371472 [5:15:33<23:34:07, 3.60it/s] 18%|█▊ | 66112/371472 [5:15:33<23:35:08, 3.60it/s] 18%|█▊ | 66113/371472 [5:15:33<23:09:54, 3.66it/s] 18%|█▊ | 66114/371472 [5:15:34<23:38:49, 3.59it/s] 18%|█▊ | 66115/371472 [5:15:34<22:55:17, 3.70it/s] 18%|█▊ | 66116/371472 [5:15:34<23:29:29, 3.61it/s] 18%|█▊ | 66117/371472 [5:15:34<24:51:09, 3.41it/s] 18%|█▊ | 66118/371472 [5:15:35<26:32:37, 3.20it/s] 18%|█▊ | 66119/371472 [5:15:35<27:11:59, 3.12it/s] 18%|█▊ | 66120/371472 [5:15:35<25:37:46, 3.31it/s] {'loss': 3.8661, 'learning_rate': 8.402034088215648e-07, 'epoch': 2.85} + 18%|█▊ | 66120/371472 [5:15:35<25:37:46, 3.31it/s] 18%|█▊ | 66121/371472 [5:15:36<25:25:56, 3.34it/s] 18%|█▊ | 66122/371472 [5:15:36<24:37:59, 3.44it/s] 18%|█▊ | 66123/371472 [5:15:36<24:04:48, 3.52it/s] 18%|█▊ | 66124/371472 [5:15:37<23:35:37, 3.59it/s] 18%|█▊ | 66125/371472 [5:15:37<23:37:23, 3.59it/s] 18%|█▊ | 66126/371472 [5:15:37<23:11:16, 3.66it/s] 18%|█▊ | 66127/371472 [5:15:37<23:49:14, 3.56it/s] 18%|█▊ | 66128/371472 [5:15:38<23:31:21, 3.61it/s] 18%|█▊ | 66129/371472 [5:15:38<22:40:19, 3.74it/s] 18%|█▊ | 66130/371472 [5:15:38<22:47:04, 3.72it/s] 18%|█▊ | 66131/371472 [5:15:38<22:19:40, 3.80it/s] 18%|█▊ | 66132/371472 [5:15:39<21:54:36, 3.87it/s] 18%|█▊ | 66133/371472 [5:15:39<22:06:17, 3.84it/s] 18%|█▊ | 66134/371472 [5:15:39<23:37:54, 3.59it/s] 18%|█▊ | 66135/371472 [5:15:40<24:46:06, 3.42it/s] 18%|█▊ | 66136/371472 [5:15:40<24:15:45, 3.50it/s] 18%|█▊ | 66137/371472 [5:15:40<23:26:59, 3.62it/s] 18%|█▊ | 66138/371472 [5:15:40<23:05:42, 3.67it/s] 18%|█▊ | 66139/371472 [5:15:41<23:03:49, 3.68it/s] 18%|█▊ | 66140/371472 [5:15:41<23:36:36, 3.59it/s] {'loss': 4.1853, 'learning_rate': 8.401549268460858e-07, 'epoch': 2.85} + 18%|█▊ | 66140/371472 [5:15:41<23:36:36, 3.59it/s] 18%|█▊ | 66141/371472 [5:15:41<24:21:18, 3.48it/s] 18%|█▊ | 66142/371472 [5:15:41<23:42:56, 3.58it/s] 18%|█▊ | 66143/371472 [5:15:42<23:54:30, 3.55it/s] 18%|█▊ | 66144/371472 [5:15:42<23:16:24, 3.64it/s] 18%|█▊ | 66145/371472 [5:15:42<22:39:44, 3.74it/s] 18%|█▊ | 66146/371472 [5:15:43<22:07:20, 3.83it/s] 18%|█▊ | 66147/371472 [5:15:43<22:57:39, 3.69it/s] 18%|█▊ | 66148/371472 [5:15:43<22:40:52, 3.74it/s] 18%|█▊ | 66149/371472 [5:15:43<23:03:56, 3.68it/s] 18%|█▊ | 66150/371472 [5:15:44<23:09:43, 3.66it/s] 18%|█▊ | 66151/371472 [5:15:44<24:09:27, 3.51it/s] 18%|█▊ | 66152/371472 [5:15:44<23:59:19, 3.54it/s] 18%|█▊ | 66153/371472 [5:15:45<24:25:15, 3.47it/s] 18%|█▊ | 66154/371472 [5:15:45<24:00:58, 3.53it/s] 18%|█▊ | 66155/371472 [5:15:45<24:41:43, 3.43it/s] 18%|█▊ | 66156/371472 [5:15:45<25:17:50, 3.35it/s] 18%|█▊ | 66157/371472 [5:15:46<23:51:03, 3.56it/s] 18%|█▊ | 66158/371472 [5:15:46<22:49:19, 3.72it/s] 18%|█▊ | 66159/371472 [5:15:46<23:53:32, 3.55it/s] 18%|█▊ | 66160/371472 [5:15:46<22:53:28, 3.70it/s] {'loss': 4.0573, 'learning_rate': 8.40106444870607e-07, 'epoch': 2.85} + 18%|█▊ | 66160/371472 [5:15:46<22:53:28, 3.70it/s] 18%|█▊ | 66161/371472 [5:15:47<23:04:42, 3.67it/s] 18%|█▊ | 66162/371472 [5:15:47<23:20:58, 3.63it/s] 18%|█▊ | 66163/371472 [5:15:47<23:03:54, 3.68it/s] 18%|█▊ | 66164/371472 [5:15:48<23:40:28, 3.58it/s] 18%|█▊ | 66165/371472 [5:15:48<22:57:54, 3.69it/s] 18%|█▊ | 66166/371472 [5:15:48<22:05:47, 3.84it/s] 18%|█▊ | 66167/371472 [5:15:48<22:47:58, 3.72it/s] 18%|█▊ | 66168/371472 [5:15:49<23:33:34, 3.60it/s] 18%|█▊ | 66169/371472 [5:15:49<23:58:59, 3.54it/s] 18%|█▊ | 66170/371472 [5:15:49<23:04:05, 3.68it/s] 18%|█▊ | 66171/371472 [5:15:49<22:49:48, 3.71it/s] 18%|█▊ | 66172/371472 [5:15:50<23:22:05, 3.63it/s] 18%|█▊ | 66173/371472 [5:15:50<22:37:19, 3.75it/s] 18%|█▊ | 66174/371472 [5:15:50<22:23:52, 3.79it/s] 18%|█▊ | 66175/371472 [5:15:51<23:28:09, 3.61it/s] 18%|█▊ | 66176/371472 [5:15:51<23:31:34, 3.60it/s] 18%|█▊ | 66177/371472 [5:15:51<24:07:10, 3.52it/s] 18%|█▊ | 66178/371472 [5:15:51<25:43:54, 3.30it/s] 18%|█▊ | 66179/371472 [5:15:52<24:12:02, 3.50it/s] 18%|█▊ | 66180/371472 [5:15:52<25:09:13, 3.37it/s] {'loss': 4.1377, 'learning_rate': 8.40057962895128e-07, 'epoch': 2.85} + 18%|█▊ | 66180/371472 [5:15:52<25:09:13, 3.37it/s] 18%|█▊ | 66181/371472 [5:15:52<24:21:56, 3.48it/s] 18%|█▊ | 66182/371472 [5:15:53<23:12:28, 3.65it/s] 18%|█▊ | 66183/371472 [5:15:53<22:26:53, 3.78it/s] 18%|█▊ | 66184/371472 [5:15:53<22:35:36, 3.75it/s] 18%|█▊ | 66185/371472 [5:15:53<23:30:16, 3.61it/s] 18%|█▊ | 66186/371472 [5:15:54<23:00:49, 3.68it/s] 18%|█▊ | 66187/371472 [5:15:54<24:18:17, 3.49it/s] 18%|█▊ | 66188/371472 [5:15:54<23:57:32, 3.54it/s] 18%|█▊ | 66189/371472 [5:15:54<23:41:03, 3.58it/s] 18%|█▊ | 66190/371472 [5:15:55<22:32:19, 3.76it/s] 18%|█▊ | 66191/371472 [5:15:55<23:39:48, 3.58it/s] 18%|█▊ | 66192/371472 [5:15:55<23:51:50, 3.55it/s] 18%|█▊ | 66193/371472 [5:15:56<25:59:55, 3.26it/s] 18%|█▊ | 66194/371472 [5:15:56<25:10:43, 3.37it/s] 18%|█▊ | 66195/371472 [5:15:56<24:39:10, 3.44it/s] 18%|█▊ | 66196/371472 [5:15:57<24:19:40, 3.49it/s] 18%|█▊ | 66197/371472 [5:15:57<24:12:33, 3.50it/s] 18%|█▊ | 66198/371472 [5:15:57<24:52:29, 3.41it/s] 18%|█▊ | 66199/371472 [5:15:57<25:11:47, 3.37it/s] 18%|█▊ | 66200/371472 [5:15:58<24:06:49, 3.52it/s] {'loss': 4.17, 'learning_rate': 8.400094809196492e-07, 'epoch': 2.85} + 18%|█▊ | 66200/371472 [5:15:58<24:06:49, 3.52it/s] 18%|█▊ | 66201/371472 [5:15:58<23:45:26, 3.57it/s] 18%|█▊ | 66202/371472 [5:15:58<23:44:14, 3.57it/s] 18%|█▊ | 66203/371472 [5:15:59<24:00:14, 3.53it/s] 18%|█▊ | 66204/371472 [5:15:59<24:12:08, 3.50it/s] 18%|█▊ | 66205/371472 [5:15:59<23:45:36, 3.57it/s] 18%|█▊ | 66206/371472 [5:15:59<24:29:10, 3.46it/s] 18%|█▊ | 66207/371472 [5:16:00<24:04:16, 3.52it/s] 18%|█▊ | 66208/371472 [5:16:00<23:10:33, 3.66it/s] 18%|█▊ | 66209/371472 [5:16:00<22:46:37, 3.72it/s] 18%|█▊ | 66210/371472 [5:16:00<23:19:38, 3.63it/s] 18%|█▊ | 66211/371472 [5:16:01<23:03:34, 3.68it/s] 18%|█▊ | 66212/371472 [5:16:01<24:09:54, 3.51it/s] 18%|█▊ | 66213/371472 [5:16:01<23:29:55, 3.61it/s] 18%|█▊ | 66214/371472 [5:16:02<23:36:46, 3.59it/s] 18%|█▊ | 66215/371472 [5:16:02<23:15:28, 3.65it/s] 18%|█▊ | 66216/371472 [5:16:02<24:33:40, 3.45it/s] 18%|█▊ | 66217/371472 [5:16:02<23:36:47, 3.59it/s] 18%|█▊ | 66218/371472 [5:16:03<23:35:47, 3.59it/s] 18%|█▊ | 66219/371472 [5:16:03<23:29:18, 3.61it/s] 18%|█▊ | 66220/371472 [5:16:03<23:57:27, 3.54it/s] {'loss': 4.1114, 'learning_rate': 8.399609989441702e-07, 'epoch': 2.85} + 18%|█▊ | 66220/371472 [5:16:03<23:57:27, 3.54it/s] 18%|█▊ | 66221/371472 [5:16:04<23:19:27, 3.64it/s] 18%|█▊ | 66222/371472 [5:16:04<24:04:25, 3.52it/s] 18%|█▊ | 66223/371472 [5:16:04<24:32:25, 3.46it/s] 18%|█▊ | 66224/371472 [5:16:04<24:25:24, 3.47it/s] 18%|█▊ | 66225/371472 [5:16:05<24:24:02, 3.47it/s] 18%|█▊ | 66226/371472 [5:16:05<24:28:10, 3.47it/s] 18%|█▊ | 66227/371472 [5:16:05<23:36:28, 3.59it/s] 18%|█▊ | 66228/371472 [5:16:06<25:10:09, 3.37it/s] 18%|█▊ | 66229/371472 [5:16:06<24:06:46, 3.52it/s] 18%|█▊ | 66230/371472 [5:16:06<23:25:19, 3.62it/s] 18%|█▊ | 66231/371472 [5:16:06<23:55:12, 3.54it/s] 18%|█▊ | 66232/371472 [5:16:07<23:30:32, 3.61it/s] 18%|█▊ | 66233/371472 [5:16:07<23:06:10, 3.67it/s] 18%|█▊ | 66234/371472 [5:16:07<23:00:31, 3.69it/s] 18%|█▊ | 66235/371472 [5:16:08<24:23:37, 3.48it/s] 18%|█▊ | 66236/371472 [5:16:08<23:55:00, 3.55it/s] 18%|█▊ | 66237/371472 [5:16:08<23:17:21, 3.64it/s] 18%|█▊ | 66238/371472 [5:16:08<23:54:42, 3.55it/s] 18%|█▊ | 66239/371472 [5:16:09<23:17:04, 3.64it/s] 18%|█▊ | 66240/371472 [5:16:09<23:18:42, 3.64it/s] {'loss': 4.0018, 'learning_rate': 8.399125169686913e-07, 'epoch': 2.85} + 18%|█▊ | 66240/371472 [5:16:09<23:18:42, 3.64it/s] 18%|█▊ | 66241/371472 [5:16:09<25:30:11, 3.32it/s] 18%|█▊ | 66242/371472 [5:16:10<24:54:50, 3.40it/s] 18%|█▊ | 66243/371472 [5:16:10<25:07:52, 3.37it/s] 18%|█▊ | 66244/371472 [5:16:10<25:02:48, 3.39it/s] 18%|█▊ | 66245/371472 [5:16:10<23:44:17, 3.57it/s] 18%|█▊ | 66246/371472 [5:16:11<23:11:29, 3.66it/s] 18%|█▊ | 66247/371472 [5:16:11<23:06:03, 3.67it/s] 18%|█▊ | 66248/371472 [5:16:11<23:04:48, 3.67it/s] 18%|█▊ | 66249/371472 [5:16:11<23:33:08, 3.60it/s] 18%|█▊ | 66250/371472 [5:16:12<25:44:09, 3.29it/s] 18%|█▊ | 66251/371472 [5:16:12<24:40:13, 3.44it/s] 18%|█▊ | 66252/371472 [5:16:12<24:37:51, 3.44it/s] 18%|█▊ | 66253/371472 [5:16:13<23:39:46, 3.58it/s] 18%|█▊ | 66254/371472 [5:16:13<26:06:29, 3.25it/s] 18%|█▊ | 66255/371472 [5:16:13<25:02:29, 3.39it/s] 18%|█▊ | 66256/371472 [5:16:13<23:45:41, 3.57it/s] 18%|█▊ | 66257/371472 [5:16:14<24:19:32, 3.49it/s] 18%|█▊ | 66258/371472 [5:16:14<23:16:29, 3.64it/s] 18%|█▊ | 66259/371472 [5:16:14<22:29:32, 3.77it/s] 18%|█▊ | 66260/371472 [5:16:15<22:50:15, 3.71it/s] {'loss': 4.0871, 'learning_rate': 8.398640349932125e-07, 'epoch': 2.85} + 18%|█▊ | 66260/371472 [5:16:15<22:50:15, 3.71it/s] 18%|█▊ | 66261/371472 [5:16:15<22:42:26, 3.73it/s] 18%|█▊ | 66262/371472 [5:16:15<22:45:06, 3.73it/s] 18%|█▊ | 66263/371472 [5:16:15<24:04:20, 3.52it/s] 18%|█▊ | 66264/371472 [5:16:16<23:40:17, 3.58it/s] 18%|█▊ | 66265/371472 [5:16:16<25:08:58, 3.37it/s] 18%|█▊ | 66266/371472 [5:16:16<24:16:38, 3.49it/s] 18%|█▊ | 66267/371472 [5:16:17<25:41:40, 3.30it/s] 18%|█▊ | 66268/371472 [5:16:17<24:47:39, 3.42it/s] 18%|█▊ | 66269/371472 [5:16:17<23:45:59, 3.57it/s] 18%|█▊ | 66270/371472 [5:16:17<22:52:27, 3.71it/s] 18%|█▊ | 66271/371472 [5:16:18<22:41:17, 3.74it/s] 18%|█▊ | 66272/371472 [5:16:18<23:01:54, 3.68it/s] 18%|█▊ | 66273/371472 [5:16:18<22:50:16, 3.71it/s] 18%|█▊ | 66274/371472 [5:16:18<23:09:10, 3.66it/s] 18%|█▊ | 66275/371472 [5:16:19<23:17:00, 3.64it/s] 18%|█▊ | 66276/371472 [5:16:19<22:52:41, 3.71it/s] 18%|█▊ | 66277/371472 [5:16:19<22:37:36, 3.75it/s] 18%|█▊ | 66278/371472 [5:16:20<22:31:06, 3.76it/s] 18%|█▊ | 66279/371472 [5:16:20<23:01:04, 3.68it/s] 18%|█▊ | 66280/371472 [5:16:20<22:47:35, 3.72it/s] {'loss': 4.1926, 'learning_rate': 8.398155530177337e-07, 'epoch': 2.85} + 18%|█▊ | 66280/371472 [5:16:20<22:47:35, 3.72it/s] 18%|█▊ | 66281/371472 [5:16:20<22:28:01, 3.77it/s] 18%|█▊ | 66282/371472 [5:16:21<22:05:02, 3.84it/s] 18%|█▊ | 66283/371472 [5:16:21<26:05:56, 3.25it/s] 18%|█▊ | 66284/371472 [5:16:21<24:52:53, 3.41it/s] 18%|█▊ | 66285/371472 [5:16:22<25:45:41, 3.29it/s] 18%|█▊ | 66286/371472 [5:16:22<25:09:34, 3.37it/s] 18%|█▊ | 66287/371472 [5:16:22<25:46:21, 3.29it/s] 18%|█▊ | 66288/371472 [5:16:22<24:39:09, 3.44it/s] 18%|█▊ | 66289/371472 [5:16:23<24:52:11, 3.41it/s] 18%|█▊ | 66290/371472 [5:16:23<24:17:14, 3.49it/s] 18%|█▊ | 66291/371472 [5:16:23<23:48:57, 3.56it/s] 18%|█▊ | 66292/371472 [5:16:24<25:27:12, 3.33it/s] 18%|█▊ | 66293/371472 [5:16:24<26:13:11, 3.23it/s] 18%|█▊ | 66294/371472 [5:16:24<26:28:54, 3.20it/s] 18%|█▊ | 66295/371472 [5:16:25<26:36:21, 3.19it/s] 18%|█▊ | 66296/371472 [5:16:25<25:26:35, 3.33it/s] 18%|█▊ | 66297/371472 [5:16:25<24:29:24, 3.46it/s] 18%|█▊ | 66298/371472 [5:16:25<26:07:00, 3.25it/s] 18%|█▊ | 66299/371472 [5:16:26<24:54:35, 3.40it/s] 18%|█▊ | 66300/371472 [5:16:26<23:43:05, 3.57it/s] {'loss': 4.2544, 'learning_rate': 8.397670710422546e-07, 'epoch': 2.86} + 18%|█▊ | 66300/371472 [5:16:26<23:43:05, 3.57it/s] 18%|█▊ | 66301/371472 [5:16:26<23:30:27, 3.61it/s] 18%|█▊ | 66302/371472 [5:16:27<23:19:49, 3.63it/s] 18%|█▊ | 66303/371472 [5:16:27<23:58:12, 3.54it/s] 18%|█▊ | 66304/371472 [5:16:27<26:32:06, 3.19it/s] 18%|█▊ | 66305/371472 [5:16:28<28:22:05, 2.99it/s] 18%|█▊ | 66306/371472 [5:16:28<26:04:28, 3.25it/s] 18%|█▊ | 66307/371472 [5:16:28<25:21:08, 3.34it/s] 18%|█▊ | 66308/371472 [5:16:28<25:56:53, 3.27it/s] 18%|█▊ | 66309/371472 [5:16:29<26:07:56, 3.24it/s] 18%|█▊ | 66310/371472 [5:16:29<25:28:22, 3.33it/s] 18%|█▊ | 66311/371472 [5:16:29<24:45:53, 3.42it/s] 18%|█▊ | 66312/371472 [5:16:30<24:32:39, 3.45it/s] 18%|█▊ | 66313/371472 [5:16:30<24:12:23, 3.50it/s] 18%|█▊ | 66314/371472 [5:16:30<23:37:47, 3.59it/s] 18%|█▊ | 66315/371472 [5:16:30<24:15:04, 3.50it/s] 18%|█▊ | 66316/371472 [5:16:31<23:25:41, 3.62it/s] 18%|█▊ | 66317/371472 [5:16:31<22:30:25, 3.77it/s] 18%|█▊ | 66318/371472 [5:16:31<23:36:10, 3.59it/s] 18%|█▊ | 66319/371472 [5:16:32<25:31:30, 3.32it/s] 18%|█▊ | 66320/371472 [5:16:32<23:47:14, 3.56it/s] {'loss': 4.093, 'learning_rate': 8.397185890667758e-07, 'epoch': 2.86} + 18%|█▊ | 66320/371472 [5:16:32<23:47:14, 3.56it/s] 18%|█▊ | 66321/371472 [5:16:32<22:53:06, 3.70it/s] 18%|█▊ | 66322/371472 [5:16:32<22:03:22, 3.84it/s] 18%|█▊ | 66323/371472 [5:16:33<22:11:31, 3.82it/s] 18%|█▊ | 66324/371472 [5:16:33<22:01:51, 3.85it/s] 18%|█▊ | 66325/371472 [5:16:33<21:41:21, 3.91it/s] 18%|█▊ | 66326/371472 [5:16:33<23:37:36, 3.59it/s] 18%|█▊ | 66327/371472 [5:16:34<24:02:18, 3.53it/s] 18%|█▊ | 66328/371472 [5:16:34<24:17:15, 3.49it/s] 18%|█▊ | 66329/371472 [5:16:34<24:22:23, 3.48it/s] 18%|█▊ | 66330/371472 [5:16:35<23:42:51, 3.57it/s] 18%|█▊ | 66331/371472 [5:16:35<23:30:06, 3.61it/s] 18%|█▊ | 66332/371472 [5:16:35<23:47:29, 3.56it/s] 18%|█▊ | 66333/371472 [5:16:35<25:35:57, 3.31it/s] 18%|█▊ | 66334/371472 [5:16:36<28:04:08, 3.02it/s] 18%|█▊ | 66335/371472 [5:16:36<25:47:42, 3.29it/s] 18%|█▊ | 66336/371472 [5:16:36<24:50:33, 3.41it/s] 18%|█▊ | 66337/371472 [5:16:37<24:16:15, 3.49it/s] 18%|█▊ | 66338/371472 [5:16:37<25:04:17, 3.38it/s] 18%|█▊ | 66339/371472 [5:16:37<24:25:32, 3.47it/s] 18%|█▊ | 66340/371472 [5:16:38<24:55:28, 3.40it/s] {'loss': 4.1454, 'learning_rate': 8.396701070912969e-07, 'epoch': 2.86} + 18%|█▊ | 66340/371472 [5:16:38<24:55:28, 3.40it/s] 18%|█▊ | 66341/371472 [5:16:38<24:28:08, 3.46it/s] 18%|█▊ | 66342/371472 [5:16:38<25:47:10, 3.29it/s] 18%|█▊ | 66343/371472 [5:16:38<24:28:20, 3.46it/s] 18%|█▊ | 66344/371472 [5:16:39<24:22:20, 3.48it/s] 18%|█▊ | 66345/371472 [5:16:39<24:19:27, 3.48it/s] 18%|█▊ | 66346/371472 [5:16:39<23:55:13, 3.54it/s] 18%|█▊ | 66347/371472 [5:16:40<24:34:43, 3.45it/s] 18%|█▊ | 66348/371472 [5:16:40<23:51:36, 3.55it/s] 18%|█▊ | 66349/371472 [5:16:40<24:56:06, 3.40it/s] 18%|█▊ | 66350/371472 [5:16:40<25:13:15, 3.36it/s] 18%|█▊ | 66351/371472 [5:16:41<24:25:34, 3.47it/s] 18%|█▊ | 66352/371472 [5:16:41<25:00:20, 3.39it/s] 18%|█▊ | 66353/371472 [5:16:41<24:51:14, 3.41it/s] 18%|█▊ | 66354/371472 [5:16:42<23:51:45, 3.55it/s] 18%|█▊ | 66355/371472 [5:16:42<24:24:28, 3.47it/s] 18%|█▊ | 66356/371472 [5:16:42<23:16:13, 3.64it/s] 18%|█▊ | 66357/371472 [5:16:42<23:33:31, 3.60it/s] 18%|█▊ | 66358/371472 [5:16:43<24:01:09, 3.53it/s] 18%|█▊ | 66359/371472 [5:16:43<25:03:52, 3.38it/s] 18%|█▊ | 66360/371472 [5:16:43<25:39:53, 3.30it/s] {'loss': 4.237, 'learning_rate': 8.39621625115818e-07, 'epoch': 2.86} + 18%|█▊ | 66360/371472 [5:16:43<25:39:53, 3.30it/s] 18%|█▊ | 66361/371472 [5:16:44<24:10:15, 3.51it/s] 18%|█▊ | 66362/371472 [5:16:44<23:30:24, 3.61it/s] 18%|█▊ | 66363/371472 [5:16:44<24:09:44, 3.51it/s] 18%|█▊ | 66364/371472 [5:16:44<24:02:31, 3.53it/s] 18%|█▊ | 66365/371472 [5:16:45<24:04:02, 3.52it/s] 18%|█▊ | 66366/371472 [5:16:45<24:31:06, 3.46it/s] 18%|█▊ | 66367/371472 [5:16:45<25:15:15, 3.36it/s] 18%|█▊ | 66368/371472 [5:16:46<24:42:59, 3.43it/s] 18%|█▊ | 66369/371472 [5:16:46<23:22:34, 3.63it/s] 18%|█▊ | 66370/371472 [5:16:46<24:54:12, 3.40it/s] 18%|█▊ | 66371/371472 [5:16:46<23:23:58, 3.62it/s] 18%|█▊ | 66372/371472 [5:16:47<23:24:02, 3.62it/s] 18%|█▊ | 66373/371472 [5:16:47<22:47:19, 3.72it/s] 18%|█▊ | 66374/371472 [5:16:47<22:18:22, 3.80it/s] 18%|█▊ | 66375/371472 [5:16:47<22:52:58, 3.70it/s] 18%|█▊ | 66376/371472 [5:16:48<23:39:09, 3.58it/s] 18%|█▊ | 66377/371472 [5:16:48<23:27:17, 3.61it/s] 18%|█▊ | 66378/371472 [5:16:48<23:41:24, 3.58it/s] 18%|█▊ | 66379/371472 [5:16:49<24:01:33, 3.53it/s] 18%|█▊ | 66380/371472 [5:16:49<24:07:05, 3.51it/s] {'loss': 4.1079, 'learning_rate': 8.395731431403391e-07, 'epoch': 2.86} + 18%|█▊ | 66380/371472 [5:16:49<24:07:05, 3.51it/s] 18%|█▊ | 66381/371472 [5:16:49<23:54:26, 3.54it/s] 18%|█▊ | 66382/371472 [5:16:49<23:20:05, 3.63it/s] 18%|█▊ | 66383/371472 [5:16:50<23:16:47, 3.64it/s] 18%|█▊ | 66384/371472 [5:16:50<24:00:51, 3.53it/s] 18%|█▊ | 66385/371472 [5:16:50<23:23:23, 3.62it/s] 18%|█▊ | 66386/371472 [5:16:51<22:30:00, 3.77it/s] 18%|█▊ | 66387/371472 [5:16:51<22:36:29, 3.75it/s] 18%|█▊ | 66388/371472 [5:16:51<22:14:33, 3.81it/s] 18%|█▊ | 66389/371472 [5:16:51<22:43:09, 3.73it/s] 18%|█▊ | 66390/371472 [5:16:52<23:02:18, 3.68it/s] 18%|█▊ | 66391/371472 [5:16:52<25:06:05, 3.38it/s] 18%|█▊ | 66392/371472 [5:16:52<24:27:34, 3.46it/s] 18%|█▊ | 66393/371472 [5:16:53<24:41:26, 3.43it/s] 18%|█▊ | 66394/371472 [5:16:53<26:10:44, 3.24it/s] 18%|█▊ | 66395/371472 [5:16:53<24:29:25, 3.46it/s] 18%|█▊ | 66396/371472 [5:16:53<25:32:04, 3.32it/s] 18%|█▊ | 66397/371472 [5:16:54<24:51:27, 3.41it/s] 18%|█▊ | 66398/371472 [5:16:54<26:05:42, 3.25it/s] 18%|█▊ | 66399/371472 [5:16:54<25:20:03, 3.34it/s] 18%|█▊ | 66400/371472 [5:16:55<27:44:54, 3.05it/s] {'loss': 4.1858, 'learning_rate': 8.395246611648603e-07, 'epoch': 2.86} + 18%|█▊ | 66400/371472 [5:16:55<27:44:54, 3.05it/s] 18%|█▊ | 66401/371472 [5:16:55<26:47:37, 3.16it/s] 18%|█▊ | 66402/371472 [5:16:55<25:40:08, 3.30it/s] 18%|█▊ | 66403/371472 [5:16:56<25:07:12, 3.37it/s] 18%|█▊ | 66404/371472 [5:16:56<24:19:44, 3.48it/s] 18%|█▊ | 66405/371472 [5:16:56<23:49:40, 3.56it/s] 18%|█▊ | 66406/371472 [5:16:56<23:33:54, 3.60it/s] 18%|█▊ | 66407/371472 [5:16:57<22:40:56, 3.74it/s] 18%|█▊ | 66408/371472 [5:16:57<21:47:43, 3.89it/s] 18%|█▊ | 66409/371472 [5:16:57<21:35:30, 3.92it/s] 18%|█▊ | 66410/371472 [5:16:57<23:02:15, 3.68it/s] 18%|█▊ | 66411/371472 [5:16:58<22:56:30, 3.69it/s] 18%|█▊ | 66412/371472 [5:16:58<23:13:03, 3.65it/s] 18%|█▊ | 66413/371472 [5:16:58<23:56:35, 3.54it/s] 18%|█▊ | 66414/371472 [5:16:59<23:48:49, 3.56it/s] 18%|█▊ | 66415/371472 [5:16:59<23:49:04, 3.56it/s] 18%|█▊ | 66416/371472 [5:16:59<24:38:57, 3.44it/s] 18%|█▊ | 66417/371472 [5:16:59<23:36:34, 3.59it/s] 18%|█▊ | 66418/371472 [5:17:00<23:00:31, 3.68it/s] 18%|█▊ | 66419/371472 [5:17:00<22:47:02, 3.72it/s] 18%|█▊ | 66420/371472 [5:17:00<22:51:30, 3.71it/s] {'loss': 4.2199, 'learning_rate': 8.394761791893814e-07, 'epoch': 2.86} + 18%|█▊ | 66420/371472 [5:17:00<22:51:30, 3.71it/s] 18%|█▊ | 66421/371472 [5:17:01<25:10:46, 3.37it/s] 18%|█▊ | 66422/371472 [5:17:01<25:22:56, 3.34it/s] 18%|█▊ | 66423/371472 [5:17:01<24:43:30, 3.43it/s] 18%|█▊ | 66424/371472 [5:17:01<24:45:21, 3.42it/s] 18%|█▊ | 66425/371472 [5:17:02<26:32:46, 3.19it/s] 18%|█▊ | 66426/371472 [5:17:02<25:03:55, 3.38it/s] 18%|█▊ | 66427/371472 [5:17:02<26:22:02, 3.21it/s] 18%|█▊ | 66428/371472 [5:17:03<25:23:23, 3.34it/s] 18%|█▊ | 66429/371472 [5:17:03<24:55:14, 3.40it/s] 18%|█▊ | 66430/371472 [5:17:03<24:18:04, 3.49it/s] 18%|█▊ | 66431/371472 [5:17:04<23:49:45, 3.56it/s] 18%|█▊ | 66432/371472 [5:17:04<23:48:01, 3.56it/s] 18%|█▊ | 66433/371472 [5:17:04<23:37:56, 3.59it/s] 18%|█▊ | 66434/371472 [5:17:04<24:34:15, 3.45it/s] 18%|█▊ | 66435/371472 [5:17:05<25:24:15, 3.34it/s] 18%|█▊ | 66436/371472 [5:17:05<24:38:19, 3.44it/s] 18%|█▊ | 66437/371472 [5:17:05<25:27:19, 3.33it/s] 18%|█▊ | 66438/371472 [5:17:06<25:12:28, 3.36it/s] 18%|█▊ | 66439/371472 [5:17:06<25:05:35, 3.38it/s] 18%|█▊ | 66440/371472 [5:17:06<25:09:02, 3.37it/s] {'loss': 4.1503, 'learning_rate': 8.394276972139023e-07, 'epoch': 2.86} + 18%|█▊ | 66440/371472 [5:17:06<25:09:02, 3.37it/s] 18%|█▊ | 66441/371472 [5:17:06<24:51:26, 3.41it/s] 18%|█▊ | 66442/371472 [5:17:07<24:15:55, 3.49it/s] 18%|█▊ | 66443/371472 [5:17:07<23:48:20, 3.56it/s] 18%|█▊ | 66444/371472 [5:17:07<23:25:51, 3.62it/s] 18%|█▊ | 66445/371472 [5:17:08<23:20:51, 3.63it/s] 18%|█▊ | 66446/371472 [5:17:08<23:02:57, 3.68it/s] 18%|█▊ | 66447/371472 [5:17:08<24:39:53, 3.44it/s] 18%|█▊ | 66448/371472 [5:17:08<24:31:19, 3.46it/s] 18%|█▊ | 66449/371472 [5:17:09<24:12:22, 3.50it/s] 18%|█▊ | 66450/371472 [5:17:09<25:46:23, 3.29it/s] 18%|█▊ | 66451/371472 [5:17:09<24:40:39, 3.43it/s] 18%|█▊ | 66452/371472 [5:17:10<24:17:38, 3.49it/s] 18%|█▊ | 66453/371472 [5:17:10<23:28:39, 3.61it/s] 18%|█▊ | 66454/371472 [5:17:10<22:53:50, 3.70it/s] 18%|█▊ | 66455/371472 [5:17:10<22:36:11, 3.75it/s] 18%|█▊ | 66456/371472 [5:17:11<22:37:36, 3.74it/s] 18%|█▊ | 66457/371472 [5:17:11<22:08:56, 3.83it/s] 18%|█▊ | 66458/371472 [5:17:11<22:31:44, 3.76it/s] 18%|█▊ | 66459/371472 [5:17:11<24:45:39, 3.42it/s] 18%|█▊ | 66460/371472 [5:17:12<23:58:46, 3.53it/s] {'loss': 3.92, 'learning_rate': 8.393792152384235e-07, 'epoch': 2.86} + 18%|█▊ | 66460/371472 [5:17:12<23:58:46, 3.53it/s] 18%|█▊ | 66461/371472 [5:17:12<24:13:35, 3.50it/s] 18%|█▊ | 66462/371472 [5:17:12<24:01:17, 3.53it/s] 18%|█▊ | 66463/371472 [5:17:13<23:48:27, 3.56it/s] 18%|█▊ | 66464/371472 [5:17:13<23:15:13, 3.64it/s] 18%|█▊ | 66465/371472 [5:17:13<22:51:58, 3.71it/s] 18%|█▊ | 66466/371472 [5:17:13<22:22:45, 3.79it/s] 18%|█▊ | 66467/371472 [5:17:14<21:56:04, 3.86it/s] 18%|█▊ | 66468/371472 [5:17:14<23:14:28, 3.65it/s] 18%|█▊ | 66469/371472 [5:17:14<23:00:21, 3.68it/s] 18%|█▊ | 66470/371472 [5:17:14<22:47:17, 3.72it/s] 18%|█▊ | 66471/371472 [5:17:15<23:44:33, 3.57it/s] 18%|█▊ | 66472/371472 [5:17:15<26:00:42, 3.26it/s] 18%|█▊ | 66473/371472 [5:17:15<24:33:44, 3.45it/s] 18%|█▊ | 66474/371472 [5:17:16<23:30:54, 3.60it/s] 18%|█▊ | 66475/371472 [5:17:16<23:13:27, 3.65it/s] 18%|█▊ | 66476/371472 [5:17:16<23:23:18, 3.62it/s] 18%|█▊ | 66477/371472 [5:17:16<23:53:43, 3.55it/s] 18%|█▊ | 66478/371472 [5:17:17<23:24:13, 3.62it/s] 18%|█▊ | 66479/371472 [5:17:17<23:05:06, 3.67it/s] 18%|█▊ | 66480/371472 [5:17:17<22:53:16, 3.70it/s] {'loss': 4.0939, 'learning_rate': 8.393307332629447e-07, 'epoch': 2.86} + 18%|█▊ | 66480/371472 [5:17:17<22:53:16, 3.70it/s] 18%|█▊ | 66481/371472 [5:17:18<23:24:17, 3.62it/s] 18%|█▊ | 66482/371472 [5:17:18<23:06:45, 3.67it/s] 18%|█▊ | 66483/371472 [5:17:18<22:43:10, 3.73it/s] 18%|█▊ | 66484/371472 [5:17:18<24:07:17, 3.51it/s] 18%|█▊ | 66485/371472 [5:17:19<24:21:07, 3.48it/s] 18%|█▊ | 66486/371472 [5:17:19<24:10:01, 3.51it/s] 18%|█▊ | 66487/371472 [5:17:19<24:50:52, 3.41it/s] 18%|█▊ | 66488/371472 [5:17:20<23:57:43, 3.54it/s] 18%|█▊ | 66489/371472 [5:17:20<24:22:38, 3.48it/s] 18%|█▊ | 66490/371472 [5:17:20<23:05:32, 3.67it/s] 18%|█▊ | 66491/371472 [5:17:20<22:33:48, 3.75it/s] 18%|█▊ | 66492/371472 [5:17:21<23:04:04, 3.67it/s] 18%|█▊ | 66493/371472 [5:17:21<22:51:14, 3.71it/s] 18%|█▊ | 66494/371472 [5:17:21<24:09:33, 3.51it/s] 18%|█▊ | 66495/371472 [5:17:21<23:38:38, 3.58it/s] 18%|█▊ | 66496/371472 [5:17:22<24:27:12, 3.46it/s] 18%|█▊ | 66497/371472 [5:17:22<23:18:33, 3.63it/s] 18%|█▊ | 66498/371472 [5:17:22<22:52:06, 3.70it/s] 18%|█▊ | 66499/371472 [5:17:23<24:24:42, 3.47it/s] 18%|█▊ | 66500/371472 [5:17:23<26:06:13, 3.25it/s] {'loss': 4.3083, 'learning_rate': 8.392822512874658e-07, 'epoch': 2.86} + 18%|█▊ | 66500/371472 [5:17:23<26:06:13, 3.25it/s] 18%|█▊ | 66501/371472 [5:17:23<24:48:48, 3.41it/s] 18%|█▊ | 66502/371472 [5:17:23<24:21:53, 3.48it/s] 18%|█▊ | 66503/371472 [5:17:24<23:42:45, 3.57it/s] 18%|█▊ | 66504/371472 [5:17:24<23:17:20, 3.64it/s] 18%|█▊ | 66505/371472 [5:17:24<24:35:38, 3.44it/s] 18%|█▊ | 66506/371472 [5:17:25<23:23:12, 3.62it/s] 18%|█▊ | 66507/371472 [5:17:25<23:25:34, 3.62it/s] 18%|█▊ | 66508/371472 [5:17:25<23:32:52, 3.60it/s] 18%|█▊ | 66509/371472 [5:17:25<23:26:50, 3.61it/s] 18%|█▊ | 66510/371472 [5:17:26<23:43:06, 3.57it/s] 18%|█▊ | 66511/371472 [5:17:26<23:11:39, 3.65it/s] 18%|█▊ | 66512/371472 [5:17:26<24:27:44, 3.46it/s] 18%|█▊ | 66513/371472 [5:17:27<23:36:15, 3.59it/s] 18%|█▊ | 66514/371472 [5:17:27<23:28:57, 3.61it/s] 18%|█▊ | 66515/371472 [5:17:27<22:57:13, 3.69it/s] 18%|█▊ | 66516/371472 [5:17:27<22:19:46, 3.79it/s] 18%|█▊ | 66517/371472 [5:17:28<21:46:18, 3.89it/s] 18%|█▊ | 66518/371472 [5:17:28<21:59:55, 3.85it/s] 18%|█▊ | 66519/371472 [5:17:28<21:58:04, 3.86it/s] 18%|█▊ | 66520/371472 [5:17:28<22:25:40, 3.78it/s] {'loss': 4.5096, 'learning_rate': 8.392337693119868e-07, 'epoch': 2.87} + 18%|█▊ | 66520/371472 [5:17:28<22:25:40, 3.78it/s] 18%|█▊ | 66521/371472 [5:17:29<23:00:37, 3.68it/s] 18%|█▊ | 66522/371472 [5:17:29<23:24:18, 3.62it/s] 18%|█▊ | 66523/371472 [5:17:29<24:57:32, 3.39it/s] 18%|█▊ | 66524/371472 [5:17:30<24:46:13, 3.42it/s] 18%|█▊ | 66525/371472 [5:17:30<24:50:43, 3.41it/s] 18%|█▊ | 66526/371472 [5:17:30<24:22:18, 3.48it/s] 18%|█▊ | 66527/371472 [5:17:30<23:54:46, 3.54it/s] 18%|█▊ | 66528/371472 [5:17:31<22:53:26, 3.70it/s] 18%|█▊ | 66529/371472 [5:17:31<23:50:05, 3.55it/s] 18%|█▊ | 66530/371472 [5:17:31<24:30:22, 3.46it/s] 18%|█▊ | 66531/371472 [5:17:32<23:30:06, 3.60it/s] 18%|█▊ | 66532/371472 [5:17:32<25:57:34, 3.26it/s] 18%|█▊ | 66533/371472 [5:17:32<24:42:18, 3.43it/s] 18%|█▊ | 66534/371472 [5:17:32<23:52:13, 3.55it/s] 18%|█▊ | 66535/371472 [5:17:33<24:48:01, 3.42it/s] 18%|█▊ | 66536/371472 [5:17:33<23:30:02, 3.60it/s] 18%|█▊ | 66537/371472 [5:17:33<23:44:18, 3.57it/s] 18%|█▊ | 66538/371472 [5:17:34<22:51:46, 3.70it/s] 18%|█▊ | 66539/371472 [5:17:34<22:50:40, 3.71it/s] 18%|█▊ | 66540/371472 [5:17:34<22:25:32, 3.78it/s] {'loss': 4.1628, 'learning_rate': 8.39185287336508e-07, 'epoch': 2.87} + 18%|█▊ | 66540/371472 [5:17:34<22:25:32, 3.78it/s] 18%|█▊ | 66541/371472 [5:17:34<23:23:18, 3.62it/s] 18%|█▊ | 66542/371472 [5:17:35<22:43:27, 3.73it/s] 18%|█▊ | 66543/371472 [5:17:35<22:19:37, 3.79it/s] 18%|█▊ | 66544/371472 [5:17:35<21:56:34, 3.86it/s] 18%|█▊ | 66545/371472 [5:17:35<22:48:00, 3.71it/s] 18%|█▊ | 66546/371472 [5:17:36<24:02:46, 3.52it/s] 18%|█▊ | 66547/371472 [5:17:36<23:00:21, 3.68it/s] 18%|█▊ | 66548/371472 [5:17:36<22:44:54, 3.72it/s] 18%|█▊ | 66549/371472 [5:17:36<22:44:11, 3.73it/s] 18%|█▊ | 66550/371472 [5:17:37<23:31:48, 3.60it/s] 18%|█▊ | 66551/371472 [5:17:37<25:29:53, 3.32it/s] 18%|█▊ | 66552/371472 [5:17:37<25:53:50, 3.27it/s] 18%|█▊ | 66553/371472 [5:17:38<25:29:37, 3.32it/s] 18%|█▊ | 66554/371472 [5:17:38<27:21:37, 3.10it/s] 18%|█▊ | 66555/371472 [5:17:38<28:49:52, 2.94it/s] 18%|█▊ | 66556/371472 [5:17:39<29:06:27, 2.91it/s] 18%|█▊ | 66557/371472 [5:17:39<29:41:05, 2.85it/s] 18%|█▊ | 66558/371472 [5:17:39<27:38:25, 3.06it/s] 18%|█▊ | 66559/371472 [5:17:40<26:09:14, 3.24it/s] 18%|█▊ | 66560/371472 [5:17:40<24:28:27, 3.46it/s] {'loss': 4.1477, 'learning_rate': 8.391368053610291e-07, 'epoch': 2.87} + 18%|█▊ | 66560/371472 [5:17:40<24:28:27, 3.46it/s] 18%|█▊ | 66561/371472 [5:17:40<24:37:17, 3.44it/s] 18%|█▊ | 66562/371472 [5:17:41<25:22:41, 3.34it/s] 18%|█▊ | 66563/371472 [5:17:41<25:16:00, 3.35it/s] 18%|█▊ | 66564/371472 [5:17:41<24:47:46, 3.42it/s] 18%|█▊ | 66565/371472 [5:17:42<26:06:11, 3.24it/s] 18%|█▊ | 66566/371472 [5:17:42<25:32:04, 3.32it/s] 18%|█▊ | 66567/371472 [5:17:42<23:56:40, 3.54it/s] 18%|█▊ | 66568/371472 [5:17:42<23:39:41, 3.58it/s] 18%|█▊ | 66569/371472 [5:17:43<23:23:39, 3.62it/s] 18%|█▊ | 66570/371472 [5:17:43<23:19:47, 3.63it/s] 18%|█▊ | 66571/371472 [5:17:43<24:30:20, 3.46it/s] 18%|█▊ | 66572/371472 [5:17:43<23:35:25, 3.59it/s] 18%|█▊ | 66573/371472 [5:17:44<23:34:07, 3.59it/s] 18%|█▊ | 66574/371472 [5:17:44<25:15:25, 3.35it/s] 18%|█▊ | 66575/371472 [5:17:44<23:51:16, 3.55it/s] 18%|█▊ | 66576/371472 [5:17:45<24:01:28, 3.53it/s] 18%|█▊ | 66577/371472 [5:17:45<24:07:55, 3.51it/s] 18%|█▊ | 66578/371472 [5:17:45<23:22:28, 3.62it/s] 18%|█▊ | 66579/371472 [5:17:45<23:57:17, 3.54it/s] 18%|█▊ | 66580/371472 [5:17:46<24:07:58, 3.51it/s] {'loss': 3.9537, 'learning_rate': 8.390883233855502e-07, 'epoch': 2.87} + 18%|█▊ | 66580/371472 [5:17:46<24:07:58, 3.51it/s] 18%|█▊ | 66581/371472 [5:17:46<22:57:20, 3.69it/s] 18%|█▊ | 66582/371472 [5:17:46<23:18:28, 3.63it/s] 18%|█▊ | 66583/371472 [5:17:47<23:20:14, 3.63it/s] 18%|█▊ | 66584/371472 [5:17:47<23:09:10, 3.66it/s] 18%|█▊ | 66585/371472 [5:17:47<22:15:02, 3.81it/s] 18%|█▊ | 66586/371472 [5:17:47<24:26:08, 3.47it/s] 18%|█▊ | 66587/371472 [5:17:48<24:21:14, 3.48it/s] 18%|█▊ | 66588/371472 [5:17:48<24:47:03, 3.42it/s] 18%|█▊ | 66589/371472 [5:17:48<24:57:54, 3.39it/s] 18%|█▊ | 66590/371472 [5:17:49<26:00:57, 3.26it/s] 18%|█▊ | 66591/371472 [5:17:49<25:31:00, 3.32it/s] 18%|█▊ | 66592/371472 [5:17:49<25:36:21, 3.31it/s] 18%|█▊ | 66593/371472 [5:17:49<25:11:11, 3.36it/s] 18%|█▊ | 66594/371472 [5:17:50<24:09:54, 3.50it/s] 18%|█▊ | 66595/371472 [5:17:50<24:26:20, 3.47it/s] 18%|█▊ | 66596/371472 [5:17:50<26:07:41, 3.24it/s] 18%|█▊ | 66597/371472 [5:17:51<26:09:58, 3.24it/s] 18%|█▊ | 66598/371472 [5:17:51<25:01:34, 3.38it/s] 18%|█▊ | 66599/371472 [5:17:51<25:16:40, 3.35it/s] 18%|█▊ | 66600/371472 [5:17:52<25:06:29, 3.37it/s] {'loss': 4.0073, 'learning_rate': 8.390398414100712e-07, 'epoch': 2.87} + 18%|█▊ | 66600/371472 [5:17:52<25:06:29, 3.37it/s] 18%|█▊ | 66601/371472 [5:17:52<25:34:51, 3.31it/s] 18%|█▊ | 66602/371472 [5:17:52<24:24:42, 3.47it/s] 18%|█▊ | 66603/371472 [5:17:52<23:53:39, 3.54it/s] 18%|█▊ | 66604/371472 [5:17:53<23:27:43, 3.61it/s] 18%|█▊ | 66605/371472 [5:17:53<22:34:17, 3.75it/s] 18%|█▊ | 66606/371472 [5:17:53<22:00:06, 3.85it/s] 18%|█▊ | 66607/371472 [5:17:53<24:05:21, 3.52it/s] 18%|█▊ | 66608/371472 [5:17:54<24:15:55, 3.49it/s] 18%|█▊ | 66609/371472 [5:17:54<23:32:42, 3.60it/s] 18%|█▊ | 66610/371472 [5:17:54<23:32:21, 3.60it/s] 18%|█▊ | 66611/371472 [5:17:55<23:08:56, 3.66it/s] 18%|█▊ | 66612/371472 [5:17:55<25:21:59, 3.34it/s] 18%|█▊ | 66613/371472 [5:17:55<25:06:06, 3.37it/s] 18%|█▊ | 66614/371472 [5:17:55<24:13:19, 3.50it/s] 18%|█▊ | 66615/371472 [5:17:56<23:29:52, 3.60it/s] 18%|█▊ | 66616/371472 [5:17:56<23:05:04, 3.67it/s] 18%|█▊ | 66617/371472 [5:17:56<24:20:32, 3.48it/s] 18%|█▊ | 66618/371472 [5:17:57<24:58:24, 3.39it/s] 18%|█▊ | 66619/371472 [5:17:57<24:36:40, 3.44it/s] 18%|█▊ | 66620/371472 [5:17:57<24:07:23, 3.51it/s] {'loss': 4.1379, 'learning_rate': 8.389913594345924e-07, 'epoch': 2.87} + 18%|█▊ | 66620/371472 [5:17:57<24:07:23, 3.51it/s] 18%|█▊ | 66621/371472 [5:17:57<23:22:45, 3.62it/s] 18%|█▊ | 66622/371472 [5:17:58<23:27:46, 3.61it/s] 18%|█▊ | 66623/371472 [5:17:58<24:11:08, 3.50it/s] 18%|█▊ | 66624/371472 [5:17:58<24:40:35, 3.43it/s] 18%|█▊ | 66625/371472 [5:17:59<23:57:08, 3.54it/s] 18%|█▊ | 66626/371472 [5:17:59<24:05:31, 3.51it/s] 18%|█▊ | 66627/371472 [5:17:59<24:14:26, 3.49it/s] 18%|█▊ | 66628/371472 [5:17:59<24:16:26, 3.49it/s] 18%|█▊ | 66629/371472 [5:18:00<23:34:12, 3.59it/s] 18%|█▊ | 66630/371472 [5:18:00<23:14:34, 3.64it/s] 18%|█▊ | 66631/371472 [5:18:00<22:49:54, 3.71it/s] 18%|█▊ | 66632/371472 [5:18:01<23:40:15, 3.58it/s] 18%|█▊ | 66633/371472 [5:18:01<23:03:01, 3.67it/s] 18%|█▊ | 66634/371472 [5:18:01<24:03:44, 3.52it/s] 18%|█▊ | 66635/371472 [5:18:01<23:17:29, 3.64it/s] 18%|█▊ | 66636/371472 [5:18:02<22:38:06, 3.74it/s] 18%|█▊ | 66637/371472 [5:18:02<22:39:49, 3.74it/s] 18%|█▊ | 66638/371472 [5:18:02<22:23:04, 3.78it/s] 18%|█▊ | 66639/371472 [5:18:02<24:09:48, 3.50it/s] 18%|█▊ | 66640/371472 [5:18:03<23:46:17, 3.56it/s] {'loss': 4.5399, 'learning_rate': 8.389428774591135e-07, 'epoch': 2.87} + 18%|█▊ | 66640/371472 [5:18:03<23:46:17, 3.56it/s] 18%|█▊ | 66641/371472 [5:18:03<24:18:32, 3.48it/s] 18%|█▊ | 66642/371472 [5:18:03<24:33:04, 3.45it/s] 18%|█▊ | 66643/371472 [5:18:04<26:14:10, 3.23it/s] 18%|█▊ | 66644/371472 [5:18:04<24:27:02, 3.46it/s] 18%|█▊ | 66645/371472 [5:18:04<23:43:36, 3.57it/s] 18%|█▊ | 66646/371472 [5:18:04<22:56:37, 3.69it/s] 18%|█▊ | 66647/371472 [5:18:05<22:41:32, 3.73it/s] 18%|█▊ | 66648/371472 [5:18:05<22:20:26, 3.79it/s] 18%|█▊ | 66649/371472 [5:18:05<22:14:43, 3.81it/s] 18%|█▊ | 66650/371472 [5:18:06<23:01:11, 3.68it/s] 18%|█▊ | 66651/371472 [5:18:06<22:39:25, 3.74it/s] 18%|█▊ | 66652/371472 [5:18:06<22:02:16, 3.84it/s] 18%|█▊ | 66653/371472 [5:18:06<22:11:38, 3.82it/s] 18%|█▊ | 66654/371472 [5:18:07<22:40:16, 3.73it/s] 18%|█▊ | 66655/371472 [5:18:07<23:25:33, 3.61it/s] 18%|█▊ | 66656/371472 [5:18:07<23:01:41, 3.68it/s] 18%|█▊ | 66657/371472 [5:18:07<23:32:47, 3.60it/s] 18%|█▊ | 66658/371472 [5:18:08<24:43:49, 3.42it/s] 18%|█▊ | 66659/371472 [5:18:08<26:15:02, 3.23it/s] 18%|█▊ | 66660/371472 [5:18:08<26:42:44, 3.17it/s] {'loss': 4.172, 'learning_rate': 8.388943954836347e-07, 'epoch': 2.87} + 18%|█▊ | 66660/371472 [5:18:08<26:42:44, 3.17it/s] 18%|█▊ | 66661/371472 [5:18:09<27:45:19, 3.05it/s] 18%|█▊ | 66662/371472 [5:18:09<25:31:48, 3.32it/s] 18%|█▊ | 66663/371472 [5:18:09<24:17:12, 3.49it/s] 18%|█▊ | 66664/371472 [5:18:10<23:12:50, 3.65it/s] 18%|█▊ | 66665/371472 [5:18:10<23:30:23, 3.60it/s] 18%|█▊ | 66666/371472 [5:18:10<22:46:23, 3.72it/s] 18%|█▊ | 66667/371472 [5:18:10<22:56:02, 3.69it/s] 18%|█▊ | 66668/371472 [5:18:11<23:01:43, 3.68it/s] 18%|█▊ | 66669/371472 [5:18:11<22:51:15, 3.70it/s] 18%|█▊ | 66670/371472 [5:18:11<24:22:21, 3.47it/s] 18%|█▊ | 66671/371472 [5:18:11<23:37:14, 3.58it/s] 18%|█▊ | 66672/371472 [5:18:12<24:22:52, 3.47it/s] 18%|█▊ | 66673/371472 [5:18:12<24:38:53, 3.44it/s] 18%|█▊ | 66674/371472 [5:18:12<26:19:02, 3.22it/s] 18%|█▊ | 66675/371472 [5:18:13<26:17:11, 3.22it/s] 18%|█▊ | 66676/371472 [5:18:13<26:10:21, 3.23it/s] 18%|█▊ | 66677/371472 [5:18:13<25:08:10, 3.37it/s] 18%|█▊ | 66678/371472 [5:18:14<25:00:42, 3.39it/s] 18%|█▊ | 66679/371472 [5:18:14<23:58:59, 3.53it/s] 18%|█▊ | 66680/371472 [5:18:14<24:16:04, 3.49it/s] {'loss': 4.1137, 'learning_rate': 8.388459135081556e-07, 'epoch': 2.87} + 18%|█▊ | 66680/371472 [5:18:14<24:16:04, 3.49it/s] 18%|█▊ | 66681/371472 [5:18:14<24:13:41, 3.49it/s] 18%|█▊ | 66682/371472 [5:18:15<23:42:07, 3.57it/s] 18%|█▊ | 66683/371472 [5:18:15<22:37:34, 3.74it/s] 18%|█▊ | 66684/371472 [5:18:15<23:07:21, 3.66it/s] 18%|█▊ | 66685/371472 [5:18:16<24:03:43, 3.52it/s] 18%|█▊ | 66686/371472 [5:18:16<23:36:20, 3.59it/s] 18%|█▊ | 66687/371472 [5:18:16<24:08:18, 3.51it/s] 18%|█▊ | 66688/371472 [5:18:16<23:53:17, 3.54it/s] 18%|█▊ | 66689/371472 [5:18:17<24:00:57, 3.53it/s] 18%|█▊ | 66690/371472 [5:18:17<23:53:15, 3.54it/s] 18%|█▊ | 66691/371472 [5:18:17<23:39:12, 3.58it/s] 18%|█▊ | 66692/371472 [5:18:18<23:56:26, 3.54it/s] 18%|█▊ | 66693/371472 [5:18:18<24:14:48, 3.49it/s] 18%|█▊ | 66694/371472 [5:18:18<24:01:39, 3.52it/s] 18%|█▊ | 66695/371472 [5:18:18<23:52:04, 3.55it/s] 18%|█▊ | 66696/371472 [5:18:19<23:14:49, 3.64it/s] 18%|█▊ | 66697/371472 [5:18:19<23:06:39, 3.66it/s] 18%|█▊ | 66698/371472 [5:18:19<23:51:09, 3.55it/s] 18%|█▊ | 66699/371472 [5:18:19<23:45:23, 3.56it/s] 18%|█▊ | 66700/371472 [5:18:20<22:50:55, 3.71it/s] {'loss': 4.3093, 'learning_rate': 8.387974315326768e-07, 'epoch': 2.87} + 18%|█▊ | 66700/371472 [5:18:20<22:50:55, 3.71it/s] 18%|█▊ | 66701/371472 [5:18:20<22:34:59, 3.75it/s] 18%|█▊ | 66702/371472 [5:18:20<22:16:10, 3.80it/s] 18%|█▊ | 66703/371472 [5:18:20<22:28:30, 3.77it/s] 18%|█▊ | 66704/371472 [5:18:21<22:54:25, 3.70it/s] 18%|█▊ | 66705/371472 [5:18:21<23:02:02, 3.68it/s] 18%|█▊ | 66706/371472 [5:18:21<22:36:23, 3.74it/s] 18%|█▊ | 66707/371472 [5:18:22<22:06:41, 3.83it/s] 18%|█▊ | 66708/371472 [5:18:22<22:47:59, 3.71it/s] 18%|█▊ | 66709/371472 [5:18:22<22:52:45, 3.70it/s] 18%|█▊ | 66710/371472 [5:18:22<22:40:03, 3.73it/s] 18%|█▊ | 66711/371472 [5:18:23<23:41:19, 3.57it/s] 18%|█▊ | 66712/371472 [5:18:23<23:06:11, 3.66it/s] 18%|█▊ | 66713/371472 [5:18:23<23:16:56, 3.64it/s] 18%|█▊ | 66714/371472 [5:18:23<23:07:48, 3.66it/s] 18%|█▊ | 66715/371472 [5:18:24<23:18:08, 3.63it/s] 18%|█▊ | 66716/371472 [5:18:24<23:00:14, 3.68it/s] 18%|█▊ | 66717/371472 [5:18:24<23:51:08, 3.55it/s] 18%|█▊ | 66718/371472 [5:18:25<22:48:22, 3.71it/s] 18%|█▊ | 66719/371472 [5:18:25<22:51:27, 3.70it/s] 18%|█▊ | 66720/371472 [5:18:25<23:11:49, 3.65it/s] {'loss': 4.2004, 'learning_rate': 8.387489495571979e-07, 'epoch': 2.87} + 18%|█▊ | 66720/371472 [5:18:25<23:11:49, 3.65it/s] 18%|█▊ | 66721/371472 [5:18:25<23:12:17, 3.65it/s] 18%|█▊ | 66722/371472 [5:18:26<22:25:32, 3.77it/s] 18%|█▊ | 66723/371472 [5:18:26<23:28:01, 3.61it/s] 18%|█▊ | 66724/371472 [5:18:26<23:46:04, 3.56it/s] 18%|█▊ | 66725/371472 [5:18:27<24:46:57, 3.42it/s] 18%|█▊ | 66726/371472 [5:18:27<26:11:29, 3.23it/s] 18%|█▊ | 66727/371472 [5:18:27<25:02:27, 3.38it/s] 18%|█▊ | 66728/371472 [5:18:27<24:06:43, 3.51it/s] 18%|█▊ | 66729/371472 [5:18:28<23:00:12, 3.68it/s] 18%|█▊ | 66730/371472 [5:18:28<22:02:57, 3.84it/s] 18%|█▊ | 66731/371472 [5:18:28<22:05:41, 3.83it/s] 18%|█▊ | 66732/371472 [5:18:28<22:22:22, 3.78it/s] 18%|█▊ | 66733/371472 [5:18:29<23:10:46, 3.65it/s] 18%|█▊ | 66734/371472 [5:18:29<24:53:34, 3.40it/s] 18%|█▊ | 66735/371472 [5:18:29<24:58:59, 3.39it/s] 18%|█▊ | 66736/371472 [5:18:30<25:17:44, 3.35it/s] 18%|█▊ | 66737/371472 [5:18:30<24:05:13, 3.51it/s] 18%|█▊ | 66738/371472 [5:18:30<23:57:12, 3.53it/s] 18%|█▊ | 66739/371472 [5:18:31<24:07:31, 3.51it/s] 18%|█▊ | 66740/371472 [5:18:31<23:52:27, 3.55it/s] {'loss': 4.2117, 'learning_rate': 8.387004675817191e-07, 'epoch': 2.87} + 18%|█▊ | 66740/371472 [5:18:31<23:52:27, 3.55it/s] 18%|█▊ | 66741/371472 [5:18:31<23:07:08, 3.66it/s] 18%|█▊ | 66742/371472 [5:18:31<24:11:38, 3.50it/s] 18%|█▊ | 66743/371472 [5:18:32<24:38:11, 3.44it/s] 18%|█▊ | 66744/371472 [5:18:32<23:43:21, 3.57it/s] 18%|█▊ | 66745/371472 [5:18:32<24:15:29, 3.49it/s] 18%|█▊ | 66746/371472 [5:18:32<24:15:24, 3.49it/s] 18%|█▊ | 66747/371472 [5:18:33<24:35:54, 3.44it/s] 18%|█▊ | 66748/371472 [5:18:33<23:22:55, 3.62it/s] 18%|█▊ | 66749/371472 [5:18:33<23:31:11, 3.60it/s] 18%|█▊ | 66750/371472 [5:18:34<23:13:53, 3.64it/s] 18%|█▊ | 66751/371472 [5:18:34<23:53:04, 3.54it/s] 18%|█▊ | 66752/371472 [5:18:34<24:07:35, 3.51it/s] 18%|█▊ | 66753/371472 [5:18:34<24:07:56, 3.51it/s] 18%|█▊ | 66754/371472 [5:18:35<24:10:55, 3.50it/s] 18%|█▊ | 66755/371472 [5:18:35<23:03:55, 3.67it/s] 18%|█▊ | 66756/371472 [5:18:35<23:08:22, 3.66it/s] 18%|█▊ | 66757/371472 [5:18:36<24:08:26, 3.51it/s] 18%|█▊ | 66758/371472 [5:18:36<23:29:00, 3.60it/s] 18%|█▊ | 66759/371472 [5:18:36<23:00:41, 3.68it/s] 18%|█▊ | 66760/371472 [5:18:36<22:02:20, 3.84it/s] {'loss': 3.9785, 'learning_rate': 8.386519856062401e-07, 'epoch': 2.88} + 18%|█▊ | 66760/371472 [5:18:36<22:02:20, 3.84it/s] 18%|█▊ | 66761/371472 [5:18:37<22:13:55, 3.81it/s] 18%|█▊ | 66762/371472 [5:18:37<22:01:00, 3.84it/s] 18%|█▊ | 66763/371472 [5:18:37<21:40:30, 3.90it/s] 18%|█▊ | 66764/371472 [5:18:37<21:51:54, 3.87it/s] 18%|█▊ | 66765/371472 [5:18:38<21:35:57, 3.92it/s] 18%|█▊ | 66766/371472 [5:18:38<21:44:55, 3.89it/s] 18%|█▊ | 66767/371472 [5:18:38<21:42:59, 3.90it/s] 18%|█▊ | 66768/371472 [5:18:38<22:16:39, 3.80it/s] 18%|█▊ | 66769/371472 [5:18:39<22:12:11, 3.81it/s] 18%|█▊ | 66770/371472 [5:18:39<24:20:11, 3.48it/s] 18%|█▊ | 66771/371472 [5:18:39<23:47:29, 3.56it/s] 18%|█▊ | 66772/371472 [5:18:40<23:51:29, 3.55it/s] 18%|█▊ | 66773/371472 [5:18:40<22:42:02, 3.73it/s] 18%|█▊ | 66774/371472 [5:18:40<22:07:59, 3.82it/s] 18%|█▊ | 66775/371472 [5:18:40<21:47:47, 3.88it/s] 18%|█▊ | 66776/371472 [5:18:41<22:41:35, 3.73it/s] 18%|█▊ | 66777/371472 [5:18:41<24:41:47, 3.43it/s] 18%|█▊ | 66778/371472 [5:18:41<25:21:19, 3.34it/s] 18%|█▊ | 66779/371472 [5:18:42<24:58:50, 3.39it/s] 18%|█▊ | 66780/371472 [5:18:42<23:54:18, 3.54it/s] {'loss': 4.2004, 'learning_rate': 8.386035036307613e-07, 'epoch': 2.88} + 18%|█▊ | 66780/371472 [5:18:42<23:54:18, 3.54it/s] 18%|█▊ | 66781/371472 [5:18:42<23:02:48, 3.67it/s] 18%|█▊ | 66782/371472 [5:18:42<22:40:13, 3.73it/s] 18%|█▊ | 66783/371472 [5:18:43<22:47:31, 3.71it/s] 18%|█▊ | 66784/371472 [5:18:43<23:31:09, 3.60it/s] 18%|█▊ | 66785/371472 [5:18:43<23:51:38, 3.55it/s] 18%|█▊ | 66786/371472 [5:18:43<23:11:30, 3.65it/s] 18%|█▊ | 66787/371472 [5:18:44<23:02:34, 3.67it/s] 18%|█▊ | 66788/371472 [5:18:44<22:50:58, 3.70it/s] 18%|█▊ | 66789/371472 [5:18:44<24:56:20, 3.39it/s] 18%|█▊ | 66790/371472 [5:18:45<24:37:06, 3.44it/s] 18%|█▊ | 66791/371472 [5:18:45<23:24:02, 3.62it/s] 18%|█▊ | 66792/371472 [5:18:45<23:24:39, 3.62it/s] 18%|█▊ | 66793/371472 [5:18:45<24:51:02, 3.41it/s] 18%|█▊ | 66794/371472 [5:18:46<26:47:40, 3.16it/s] 18%|█▊ | 66795/371472 [5:18:46<27:00:10, 3.13it/s] 18%|█▊ | 66796/371472 [5:18:46<26:37:25, 3.18it/s] 18%|█▊ | 66797/371472 [5:18:47<25:22:24, 3.34it/s] 18%|█▊ | 66798/371472 [5:18:47<24:08:47, 3.50it/s] 18%|█▊ | 66799/371472 [5:18:47<24:01:05, 3.52it/s] 18%|█▊ | 66800/371472 [5:18:48<24:00:02, 3.53it/s] {'loss': 4.3149, 'learning_rate': 8.385550216552824e-07, 'epoch': 2.88} + 18%|█▊ | 66800/371472 [5:18:48<24:00:02, 3.53it/s] 18%|█▊ | 66801/371472 [5:18:48<24:37:33, 3.44it/s] 18%|█▊ | 66802/371472 [5:18:48<26:10:46, 3.23it/s] 18%|█▊ | 66803/371472 [5:18:48<25:03:35, 3.38it/s] 18%|█▊ | 66804/371472 [5:18:49<25:18:27, 3.34it/s] 18%|█▊ | 66805/371472 [5:18:49<24:34:52, 3.44it/s] 18%|█▊ | 66806/371472 [5:18:49<23:36:08, 3.59it/s] 18%|█▊ | 66807/371472 [5:18:50<24:30:58, 3.45it/s] 18%|█▊ | 66808/371472 [5:18:50<23:21:26, 3.62it/s] 18%|█▊ | 66809/371472 [5:18:50<23:07:20, 3.66it/s] 18%|█▊ | 66810/371472 [5:18:50<22:11:15, 3.81it/s] 18%|█▊ | 66811/371472 [5:18:51<21:57:30, 3.85it/s] 18%|█▊ | 66812/371472 [5:18:51<22:36:00, 3.74it/s] 18%|█▊ | 66813/371472 [5:18:51<22:40:53, 3.73it/s] 18%|█▊ | 66814/371472 [5:18:51<23:03:33, 3.67it/s] 18%|█▊ | 66815/371472 [5:18:52<22:04:45, 3.83it/s] 18%|█▊ | 66816/371472 [5:18:52<22:02:54, 3.84it/s] 18%|█▊ | 66817/371472 [5:18:52<23:35:54, 3.59it/s] 18%|█▊ | 66818/371472 [5:18:52<22:37:41, 3.74it/s] 18%|█▊ | 66819/371472 [5:18:53<22:44:38, 3.72it/s] 18%|█▊ | 66820/371472 [5:18:53<22:28:44, 3.76it/s] {'loss': 4.0467, 'learning_rate': 8.385065396798034e-07, 'epoch': 2.88} + 18%|█▊ | 66820/371472 [5:18:53<22:28:44, 3.76it/s] 18%|█▊ | 66821/371472 [5:18:53<22:50:54, 3.70it/s] 18%|█▊ | 66822/371472 [5:18:54<22:06:07, 3.83it/s] 18%|█▊ | 66823/371472 [5:18:54<22:28:51, 3.76it/s] 18%|█▊ | 66824/371472 [5:18:54<22:15:24, 3.80it/s] 18%|█▊ | 66825/371472 [5:18:54<22:42:11, 3.73it/s] 18%|█▊ | 66826/371472 [5:18:55<22:19:46, 3.79it/s] 18%|█▊ | 66827/371472 [5:18:55<22:00:59, 3.84it/s] 18%|█▊ | 66828/371472 [5:18:55<23:06:23, 3.66it/s] 18%|█▊ | 66829/371472 [5:18:55<22:27:01, 3.77it/s] 18%|█▊ | 66830/371472 [5:18:56<22:20:11, 3.79it/s] 18%|█▊ | 66831/371472 [5:18:56<22:01:11, 3.84it/s] 18%|█▊ | 66832/371472 [5:18:56<22:35:57, 3.74it/s] 18%|█▊ | 66833/371472 [5:18:57<25:12:04, 3.36it/s] 18%|█▊ | 66834/371472 [5:18:57<24:00:16, 3.53it/s] 18%|█▊ | 66835/371472 [5:18:57<23:10:36, 3.65it/s] 18%|█▊ | 66836/371472 [5:18:57<22:42:47, 3.73it/s] 18%|█▊ | 66837/371472 [5:18:58<23:01:45, 3.67it/s] 18%|█▊ | 66838/371472 [5:18:58<22:42:57, 3.73it/s] 18%|█▊ | 66839/371472 [5:18:58<23:29:30, 3.60it/s] 18%|█▊ | 66840/371472 [5:18:58<23:05:13, 3.67it/s] {'loss': 4.2308, 'learning_rate': 8.384580577043245e-07, 'epoch': 2.88} + 18%|█▊ | 66840/371472 [5:18:58<23:05:13, 3.67it/s] 18%|█▊ | 66841/371472 [5:18:59<23:31:50, 3.60it/s] 18%|█▊ | 66842/371472 [5:18:59<24:58:44, 3.39it/s] 18%|█▊ | 66843/371472 [5:18:59<25:15:43, 3.35it/s] 18%|█▊ | 66844/371472 [5:19:00<28:15:47, 2.99it/s] 18%|█▊ | 66845/371472 [5:19:00<26:15:41, 3.22it/s] 18%|█▊ | 66846/371472 [5:19:00<25:18:01, 3.34it/s] 18%|█▊ | 66847/371472 [5:19:01<24:42:32, 3.42it/s] 18%|█▊ | 66848/371472 [5:19:01<24:04:57, 3.51it/s] 18%|█▊ | 66849/371472 [5:19:01<24:02:17, 3.52it/s] 18%|█▊ | 66850/371472 [5:19:01<26:15:57, 3.22it/s] 18%|█▊ | 66851/371472 [5:19:02<25:26:46, 3.33it/s] 18%|█▊ | 66852/371472 [5:19:02<24:25:54, 3.46it/s] 18%|█▊ | 66853/371472 [5:19:02<23:31:43, 3.60it/s] 18%|█▊ | 66854/371472 [5:19:03<23:03:24, 3.67it/s] 18%|█▊ | 66855/371472 [5:19:03<22:36:00, 3.74it/s] 18%|█▊ | 66856/371472 [5:19:03<25:10:05, 3.36it/s] 18%|█▊ | 66857/371472 [5:19:03<23:40:39, 3.57it/s] 18%|█▊ | 66858/371472 [5:19:04<23:06:42, 3.66it/s] 18%|█▊ | 66859/371472 [5:19:04<22:43:35, 3.72it/s] 18%|█▊ | 66860/371472 [5:19:04<22:56:59, 3.69it/s] {'loss': 4.2297, 'learning_rate': 8.384095757288457e-07, 'epoch': 2.88} + 18%|█▊ | 66860/371472 [5:19:04<22:56:59, 3.69it/s] 18%|█▊ | 66861/371472 [5:19:04<23:34:37, 3.59it/s] 18%|█▊ | 66862/371472 [5:19:05<22:48:39, 3.71it/s] 18%|█▊ | 66863/371472 [5:19:05<22:22:28, 3.78it/s] 18%|█▊ | 66864/371472 [5:19:05<21:55:45, 3.86it/s] 18%|█▊ | 66865/371472 [5:19:06<22:29:32, 3.76it/s] 18%|█▊ | 66866/371472 [5:19:06<22:33:36, 3.75it/s] 18%|█▊ | 66867/371472 [5:19:06<21:53:47, 3.86it/s] 18%|█▊ | 66868/371472 [5:19:06<22:20:54, 3.79it/s] 18%|█▊ | 66869/371472 [5:19:07<22:10:57, 3.81it/s] 18%|█▊ | 66870/371472 [5:19:07<21:42:31, 3.90it/s] 18%|█▊ | 66871/371472 [5:19:07<22:44:57, 3.72it/s] 18%|█▊ | 66872/371472 [5:19:07<22:28:53, 3.76it/s] 18%|█▊ | 66873/371472 [5:19:08<22:38:25, 3.74it/s] 18%|█▊ | 66874/371472 [5:19:08<22:59:02, 3.68it/s] 18%|█▊ | 66875/371472 [5:19:08<23:03:15, 3.67it/s] 18%|█▊ | 66876/371472 [5:19:08<22:49:42, 3.71it/s] 18%|█▊ | 66877/371472 [5:19:09<22:35:59, 3.74it/s] 18%|█▊ | 66878/371472 [5:19:09<22:39:34, 3.73it/s] 18%|█▊ | 66879/371472 [5:19:09<23:07:31, 3.66it/s] 18%|█▊ | 66880/371472 [5:19:10<23:43:28, 3.57it/s] {'loss': 4.1531, 'learning_rate': 8.383610937533668e-07, 'epoch': 2.88} + 18%|█▊ | 66880/371472 [5:19:10<23:43:28, 3.57it/s] 18%|█▊ | 66881/371472 [5:19:10<23:11:24, 3.65it/s] 18%|█▊ | 66882/371472 [5:19:10<24:56:56, 3.39it/s] 18%|█▊ | 66883/371472 [5:19:10<24:32:24, 3.45it/s] 18%|█▊ | 66884/371472 [5:19:11<24:20:24, 3.48it/s] 18%|█▊ | 66885/371472 [5:19:11<23:40:41, 3.57it/s] 18%|█▊ | 66886/371472 [5:19:11<22:51:20, 3.70it/s] 18%|█▊ | 66887/371472 [5:19:12<24:13:41, 3.49it/s] 18%|█▊ | 66888/371472 [5:19:12<25:46:18, 3.28it/s] 18%|█▊ | 66889/371472 [5:19:12<25:13:21, 3.35it/s] 18%|█▊ | 66890/371472 [5:19:13<26:05:33, 3.24it/s] 18%|█▊ | 66891/371472 [5:19:13<24:50:17, 3.41it/s] 18%|█▊ | 66892/371472 [5:19:13<25:01:07, 3.38it/s] 18%|█▊ | 66893/371472 [5:19:13<23:24:04, 3.62it/s] 18%|█▊ | 66894/371472 [5:19:14<24:32:56, 3.45it/s] 18%|█▊ | 66895/371472 [5:19:14<25:33:26, 3.31it/s] 18%|█▊ | 66896/371472 [5:19:14<24:59:59, 3.38it/s] 18%|█▊ | 66897/371472 [5:19:15<25:11:34, 3.36it/s] 18%|█▊ | 66898/371472 [5:19:15<24:56:27, 3.39it/s] 18%|█▊ | 66899/371472 [5:19:15<24:48:43, 3.41it/s] 18%|█▊ | 66900/371472 [5:19:15<25:56:35, 3.26it/s] {'loss': 4.127, 'learning_rate': 8.383126117778878e-07, 'epoch': 2.88} + 18%|█▊ | 66900/371472 [5:19:15<25:56:35, 3.26it/s] 18%|█▊ | 66901/371472 [5:19:16<25:16:07, 3.35it/s] 18%|█▊ | 66902/371472 [5:19:16<24:18:11, 3.48it/s] 18%|█▊ | 66903/371472 [5:19:16<25:45:05, 3.29it/s] 18%|█▊ | 66904/371472 [5:19:17<25:39:17, 3.30it/s] 18%|█▊ | 66905/371472 [5:19:17<25:01:32, 3.38it/s] 18%|█▊ | 66906/371472 [5:19:17<24:27:54, 3.46it/s] 18%|█▊ | 66907/371472 [5:19:18<25:57:39, 3.26it/s] 18%|█▊ | 66908/371472 [5:19:18<27:22:14, 3.09it/s] 18%|█▊ | 66909/371472 [5:19:18<26:31:45, 3.19it/s] 18%|█▊ | 66910/371472 [5:19:19<26:25:48, 3.20it/s] 18%|█▊ | 66911/371472 [5:19:19<25:17:30, 3.34it/s] 18%|█▊ | 66912/371472 [5:19:19<24:03:14, 3.52it/s] 18%|█▊ | 66913/371472 [5:19:19<25:29:04, 3.32it/s] 18%|█▊ | 66914/371472 [5:19:20<25:01:22, 3.38it/s] 18%|█▊ | 66915/371472 [5:19:20<23:40:00, 3.57it/s] 18%|█▊ | 66916/371472 [5:19:20<23:23:56, 3.62it/s] 18%|█▊ | 66917/371472 [5:19:20<23:07:31, 3.66it/s] 18%|█▊ | 66918/371472 [5:19:21<23:35:58, 3.58it/s] 18%|█▊ | 66919/371472 [5:19:21<23:32:35, 3.59it/s] 18%|█▊ | 66920/371472 [5:19:21<23:37:46, 3.58it/s] {'loss': 4.2216, 'learning_rate': 8.38264129802409e-07, 'epoch': 2.88} + 18%|█▊ | 66920/371472 [5:19:21<23:37:46, 3.58it/s] 18%|█▊ | 66921/371472 [5:19:22<24:30:53, 3.45it/s] 18%|█▊ | 66922/371472 [5:19:22<23:17:53, 3.63it/s] 18%|█▊ | 66923/371472 [5:19:22<24:08:57, 3.50it/s] 18%|█▊ | 66924/371472 [5:19:22<24:19:28, 3.48it/s] 18%|█▊ | 66925/371472 [5:19:23<23:08:05, 3.66it/s] 18%|█▊ | 66926/371472 [5:19:23<22:34:35, 3.75it/s] 18%|█▊ | 66927/371472 [5:19:23<22:01:04, 3.84it/s] 18%|█▊ | 66928/371472 [5:19:23<22:19:17, 3.79it/s] 18%|█▊ | 66929/371472 [5:19:24<23:17:25, 3.63it/s] 18%|█▊ | 66930/371472 [5:19:24<23:33:01, 3.59it/s] 18%|█▊ | 66931/371472 [5:19:24<23:45:06, 3.56it/s] 18%|█▊ | 66932/371472 [5:19:25<24:44:46, 3.42it/s] 18%|█▊ | 66933/371472 [5:19:25<23:36:19, 3.58it/s] 18%|█▊ | 66934/371472 [5:19:25<23:19:52, 3.63it/s] 18%|█▊ | 66935/371472 [5:19:25<24:14:31, 3.49it/s] 18%|█▊ | 66936/371472 [5:19:26<22:46:53, 3.71it/s] 18%|█▊ | 66937/371472 [5:19:26<22:10:44, 3.81it/s] 18%|█▊ | 66938/371472 [5:19:26<25:37:12, 3.30it/s] 18%|█▊ | 66939/371472 [5:19:27<25:35:16, 3.31it/s] 18%|█▊ | 66940/371472 [5:19:27<24:29:37, 3.45it/s] {'loss': 4.1503, 'learning_rate': 8.382156478269301e-07, 'epoch': 2.88} + 18%|█▊ | 66940/371472 [5:19:27<24:29:37, 3.45it/s] 18%|█▊ | 66941/371472 [5:19:27<24:19:50, 3.48it/s] 18%|█▊ | 66942/371472 [5:19:27<23:18:25, 3.63it/s] 18%|█▊ | 66943/371472 [5:19:28<23:00:59, 3.68it/s] 18%|█▊ | 66944/371472 [5:19:28<22:17:33, 3.79it/s] 18%|█▊ | 66945/371472 [5:19:28<22:18:19, 3.79it/s] 18%|█▊ | 66946/371472 [5:19:29<24:46:23, 3.41it/s] 18%|█▊ | 66947/371472 [5:19:29<24:50:30, 3.41it/s] 18%|█▊ | 66948/371472 [5:19:29<25:13:15, 3.35it/s] 18%|█▊ | 66949/371472 [5:19:29<24:38:37, 3.43it/s] 18%|█▊ | 66950/371472 [5:19:30<23:30:34, 3.60it/s] 18%|█▊ | 66951/371472 [5:19:30<22:42:23, 3.73it/s] 18%|█▊ | 66952/371472 [5:19:30<23:46:59, 3.56it/s] 18%|█▊ | 66953/371472 [5:19:31<24:24:47, 3.46it/s] 18%|█▊ | 66954/371472 [5:19:31<24:00:45, 3.52it/s] 18%|█▊ | 66955/371472 [5:19:31<24:15:29, 3.49it/s] 18%|█▊ | 66956/371472 [5:19:31<23:34:05, 3.59it/s] 18%|█▊ | 66957/371472 [5:19:32<22:47:03, 3.71it/s] 18%|█▊ | 66958/371472 [5:19:32<24:49:07, 3.41it/s] 18%|█▊ | 66959/371472 [5:19:32<23:38:25, 3.58it/s] 18%|█▊ | 66960/371472 [5:19:33<23:57:37, 3.53it/s] {'loss': 4.1333, 'learning_rate': 8.381671658514512e-07, 'epoch': 2.88} + 18%|█▊ | 66960/371472 [5:19:33<23:57:37, 3.53it/s] 18%|█▊ | 66961/371472 [5:19:33<23:12:22, 3.64it/s] 18%|█▊ | 66962/371472 [5:19:33<23:05:17, 3.66it/s] 18%|█▊ | 66963/371472 [5:19:33<24:35:47, 3.44it/s] 18%|█▊ | 66964/371472 [5:19:34<24:10:26, 3.50it/s] 18%|█▊ | 66965/371472 [5:19:34<24:15:05, 3.49it/s] 18%|█▊ | 66966/371472 [5:19:34<23:57:40, 3.53it/s] 18%|█▊ | 66967/371472 [5:19:34<22:59:10, 3.68it/s] 18%|█▊ | 66968/371472 [5:19:35<24:22:17, 3.47it/s] 18%|█▊ | 66969/371472 [5:19:35<24:25:46, 3.46it/s] 18%|█▊ | 66970/371472 [5:19:35<25:14:24, 3.35it/s] 18%|█▊ | 66971/371472 [5:19:36<25:49:35, 3.28it/s] 18%|█▊ | 66972/371472 [5:19:36<24:47:02, 3.41it/s] 18%|█▊ | 66973/371472 [5:19:36<23:24:22, 3.61it/s] 18%|█▊ | 66974/371472 [5:19:37<23:32:37, 3.59it/s] 18%|█▊ | 66975/371472 [5:19:37<23:38:32, 3.58it/s] 18%|█▊ | 66976/371472 [5:19:37<22:47:13, 3.71it/s] 18%|█▊ | 66977/371472 [5:19:37<23:09:26, 3.65it/s] 18%|█▊ | 66978/371472 [5:19:38<23:01:46, 3.67it/s] 18%|█▊ | 66979/371472 [5:19:38<24:37:48, 3.43it/s] 18%|█▊ | 66980/371472 [5:19:38<24:26:00, 3.46it/s] {'loss': 4.2919, 'learning_rate': 8.381186838759722e-07, 'epoch': 2.88} + 18%|█▊ | 66980/371472 [5:19:38<24:26:00, 3.46it/s] 18%|█▊ | 66981/371472 [5:19:38<24:21:31, 3.47it/s] 18%|█▊ | 66982/371472 [5:19:39<24:27:58, 3.46it/s] 18%|█▊ | 66983/371472 [5:19:39<23:43:51, 3.56it/s] 18%|█▊ | 66984/371472 [5:19:39<23:17:04, 3.63it/s] 18%|█▊ | 66985/371472 [5:19:40<23:47:33, 3.55it/s] 18%|█▊ | 66986/371472 [5:19:40<23:12:36, 3.64it/s] 18%|█▊ | 66987/371472 [5:19:40<24:02:39, 3.52it/s] 18%|█▊ | 66988/371472 [5:19:40<24:07:58, 3.50it/s] 18%|█▊ | 66989/371472 [5:19:41<23:36:57, 3.58it/s] 18%|█▊ | 66990/371472 [5:19:41<22:56:24, 3.69it/s] 18%|█▊ | 66991/371472 [5:19:41<24:01:52, 3.52it/s] 18%|█▊ | 66992/371472 [5:19:42<24:09:23, 3.50it/s] 18%|█▊ | 66993/371472 [5:19:42<23:08:41, 3.65it/s] 18%|█▊ | 66994/371472 [5:19:42<25:14:06, 3.35it/s] 18%|█▊ | 66995/371472 [5:19:42<23:53:35, 3.54it/s] 18%|█▊ | 66996/371472 [5:19:43<22:56:37, 3.69it/s] 18%|█▊ | 66997/371472 [5:19:43<23:51:24, 3.55it/s] 18%|█▊ | 66998/371472 [5:19:43<23:14:54, 3.64it/s] 18%|█▊ | 66999/371472 [5:19:44<23:18:24, 3.63it/s] 18%|█▊ | 67000/371472 [5:19:44<25:29:35, 3.32it/s] {'loss': 4.2744, 'learning_rate': 8.380702019004934e-07, 'epoch': 2.89} + 18%|█▊ | 67000/371472 [5:19:44<25:29:35, 3.32it/s] 18%|█▊ | 67001/371472 [5:19:44<25:21:38, 3.33it/s] 18%|█▊ | 67002/371472 [5:19:44<24:49:41, 3.41it/s] 18%|█▊ | 67003/371472 [5:19:45<23:27:13, 3.61it/s] 18%|█▊ | 67004/371472 [5:19:45<23:10:54, 3.65it/s] 18%|█▊ | 67005/371472 [5:19:45<23:42:26, 3.57it/s] 18%|█▊ | 67006/371472 [5:19:46<24:38:18, 3.43it/s] 18%|█▊ | 67007/371472 [5:19:46<24:06:10, 3.51it/s] 18%|█▊ | 67008/371472 [5:19:46<23:50:39, 3.55it/s] 18%|█▊ | 67009/371472 [5:19:46<24:31:54, 3.45it/s] 18%|█▊ | 67010/371472 [5:19:47<25:39:40, 3.30it/s] 18%|█▊ | 67011/371472 [5:19:47<25:18:57, 3.34it/s] 18%|█▊ | 67012/371472 [5:19:47<25:09:32, 3.36it/s] 18%|█▊ | 67013/371472 [5:19:48<25:04:40, 3.37it/s] 18%|█▊ | 67014/371472 [5:19:48<24:42:07, 3.42it/s] 18%|█▊ | 67015/371472 [5:19:48<24:45:09, 3.42it/s] 18%|█▊ | 67016/371472 [5:19:49<31:10:52, 2.71it/s] 18%|█▊ | 67017/371472 [5:19:49<28:37:00, 2.96it/s] 18%|█▊ | 67018/371472 [5:19:49<26:32:20, 3.19it/s] 18%|█▊ | 67019/371472 [5:19:50<25:25:51, 3.33it/s] 18%|█▊ | 67020/371472 [5:19:50<31:13:06, 2.71it/s] {'loss': 4.3332, 'learning_rate': 8.380217199250146e-07, 'epoch': 2.89} + 18%|█▊ | 67020/371472 [5:19:50<31:13:06, 2.71it/s] 18%|█▊ | 67021/371472 [5:19:50<29:17:26, 2.89it/s] 18%|█▊ | 67022/371472 [5:19:51<27:09:15, 3.11it/s] 18%|█▊ | 67023/371472 [5:19:51<26:01:04, 3.25it/s] 18%|█▊ | 67024/371472 [5:19:51<24:27:49, 3.46it/s] 18%|█▊ | 67025/371472 [5:19:51<24:15:31, 3.49it/s] 18%|█▊ | 67026/371472 [5:19:52<24:14:58, 3.49it/s] 18%|█▊ | 67027/371472 [5:19:52<24:02:08, 3.52it/s] 18%|█▊ | 67028/371472 [5:19:52<23:44:54, 3.56it/s] 18%|█▊ | 67029/371472 [5:19:53<23:23:34, 3.62it/s] 18%|█▊ | 67030/371472 [5:19:53<24:06:53, 3.51it/s] 18%|█▊ | 67031/371472 [5:19:53<23:44:56, 3.56it/s] 18%|█▊ | 67032/371472 [5:19:53<25:39:07, 3.30it/s] 18%|█▊ | 67033/371472 [5:19:54<26:18:52, 3.21it/s] 18%|█▊ | 67034/371472 [5:19:54<24:59:21, 3.38it/s] 18%|█▊ | 67035/371472 [5:19:54<24:08:05, 3.50it/s] 18%|█▊ | 67036/371472 [5:19:55<23:29:34, 3.60it/s] 18%|█▊ | 67037/371472 [5:19:55<24:59:12, 3.38it/s] 18%|█▊ | 67038/371472 [5:19:55<24:47:19, 3.41it/s] 18%|█▊ | 67039/371472 [5:19:55<23:42:34, 3.57it/s] 18%|█▊ | 67040/371472 [5:19:56<24:11:10, 3.50it/s] {'loss': 4.237, 'learning_rate': 8.379732379495357e-07, 'epoch': 2.89} + 18%|█▊ | 67040/371472 [5:19:56<24:11:10, 3.50it/s] 18%|█▊ | 67041/371472 [5:19:56<23:30:14, 3.60it/s] 18%|█▊ | 67042/371472 [5:19:56<23:41:22, 3.57it/s] 18%|█▊ | 67043/371472 [5:19:57<24:32:51, 3.44it/s] 18%|█▊ | 67044/371472 [5:19:57<24:02:18, 3.52it/s] 18%|█▊ | 67045/371472 [5:19:57<23:51:59, 3.54it/s] 18%|█▊ | 67046/371472 [5:19:57<23:25:22, 3.61it/s] 18%|█▊ | 67047/371472 [5:19:58<24:55:14, 3.39it/s] 18%|█▊ | 67048/371472 [5:19:58<25:13:37, 3.35it/s] 18%|█▊ | 67049/371472 [5:19:58<24:41:22, 3.43it/s] 18%|█▊ | 67050/371472 [5:19:59<23:15:49, 3.63it/s] 18%|█▊ | 67051/371472 [5:19:59<25:49:38, 3.27it/s] 18%|█▊ | 67052/371472 [5:19:59<25:03:36, 3.37it/s] 18%|█▊ | 67053/371472 [5:20:00<24:24:12, 3.47it/s] 18%|█▊ | 67054/371472 [5:20:00<24:47:56, 3.41it/s] 18%|█▊ | 67055/371472 [5:20:00<25:18:45, 3.34it/s] 18%|█▊ | 67056/371472 [5:20:00<24:41:53, 3.42it/s] 18%|█▊ | 67057/371472 [5:20:01<24:47:13, 3.41it/s] 18%|█▊ | 67058/371472 [5:20:01<24:04:19, 3.51it/s] 18%|█▊ | 67059/371472 [5:20:01<23:51:09, 3.55it/s] 18%|█▊ | 67060/371472 [5:20:02<24:24:26, 3.46it/s] {'loss': 4.0655, 'learning_rate': 8.379247559740567e-07, 'epoch': 2.89} + 18%|█▊ | 67060/371472 [5:20:02<24:24:26, 3.46it/s] 18%|█▊ | 67061/371472 [5:20:02<23:31:01, 3.60it/s] 18%|█▊ | 67062/371472 [5:20:02<23:12:27, 3.64it/s] 18%|█▊ | 67063/371472 [5:20:02<23:43:34, 3.56it/s] 18%|█▊ | 67064/371472 [5:20:03<22:48:36, 3.71it/s] 18%|█▊ | 67065/371472 [5:20:03<22:55:08, 3.69it/s] 18%|█▊ | 67066/371472 [5:20:03<22:54:43, 3.69it/s] 18%|█▊ | 67067/371472 [5:20:03<22:58:01, 3.68it/s] 18%|█▊ | 67068/371472 [5:20:04<24:17:03, 3.48it/s] 18%|█▊ | 67069/371472 [5:20:04<23:14:55, 3.64it/s] 18%|█▊ | 67070/371472 [5:20:04<24:27:24, 3.46it/s] 18%|█▊ | 67071/371472 [5:20:05<24:07:29, 3.50it/s] 18%|█▊ | 67072/371472 [5:20:05<23:13:04, 3.64it/s] 18%|█▊ | 67073/371472 [5:20:05<23:06:13, 3.66it/s] 18%|█▊ | 67074/371472 [5:20:05<23:05:39, 3.66it/s] 18%|█▊ | 67075/371472 [5:20:06<23:34:06, 3.59it/s] 18%|█▊ | 67076/371472 [5:20:06<22:59:27, 3.68it/s] 18%|█▊ | 67077/371472 [5:20:06<22:35:29, 3.74it/s] 18%|█▊ | 67078/371472 [5:20:06<22:28:04, 3.76it/s] 18%|█▊ | 67079/371472 [5:20:07<22:29:40, 3.76it/s] 18%|█▊ | 67080/371472 [5:20:07<22:07:34, 3.82it/s] {'loss': 4.3082, 'learning_rate': 8.378762739985777e-07, 'epoch': 2.89} + 18%|█▊ | 67080/371472 [5:20:07<22:07:34, 3.82it/s] 18%|█▊ | 67081/371472 [5:20:07<22:55:24, 3.69it/s] 18%|█▊ | 67082/371472 [5:20:08<26:11:21, 3.23it/s] 18%|█▊ | 67083/371472 [5:20:08<27:46:28, 3.04it/s] 18%|█▊ | 67084/371472 [5:20:08<26:37:20, 3.18it/s] 18%|█▊ | 67085/371472 [5:20:09<25:11:04, 3.36it/s] 18%|█▊ | 67086/371472 [5:20:09<24:55:08, 3.39it/s] 18%|█▊ | 67087/371472 [5:20:09<25:52:39, 3.27it/s] 18%|█▊ | 67088/371472 [5:20:09<24:55:52, 3.39it/s] 18%|█▊ | 67089/371472 [5:20:10<24:07:18, 3.51it/s] 18%|█▊ | 67090/371472 [5:20:10<23:49:30, 3.55it/s] 18%|█▊ | 67091/371472 [5:20:10<24:33:44, 3.44it/s] 18%|█▊ | 67092/371472 [5:20:11<23:42:06, 3.57it/s] 18%|█▊ | 67093/371472 [5:20:11<22:53:13, 3.69it/s] 18%|█▊ | 67094/371472 [5:20:11<22:55:40, 3.69it/s] 18%|█▊ | 67095/371472 [5:20:11<22:27:51, 3.76it/s] 18%|█▊ | 67096/371472 [5:20:12<21:55:22, 3.86it/s] 18%|█▊ | 67097/371472 [5:20:12<22:43:13, 3.72it/s] 18%|█▊ | 67098/371472 [5:20:12<22:17:46, 3.79it/s] 18%|█▊ | 67099/371472 [5:20:12<21:43:50, 3.89it/s] 18%|█▊ | 67100/371472 [5:20:13<22:46:54, 3.71it/s] {'loss': 4.2381, 'learning_rate': 8.37827792023099e-07, 'epoch': 2.89} + 18%|█▊ | 67100/371472 [5:20:13<22:46:54, 3.71it/s] 18%|█▊ | 67101/371472 [5:20:13<22:50:11, 3.70it/s] 18%|█▊ | 67102/371472 [5:20:13<24:28:15, 3.45it/s] 18%|█▊ | 67103/371472 [5:20:14<26:54:19, 3.14it/s] 18%|█▊ | 67104/371472 [5:20:14<25:27:01, 3.32it/s] 18%|█▊ | 67105/371472 [5:20:14<24:35:34, 3.44it/s] 18%|█▊ | 67106/371472 [5:20:14<23:48:34, 3.55it/s] 18%|█▊ | 67107/371472 [5:20:15<23:27:27, 3.60it/s] 18%|█▊ | 67108/371472 [5:20:15<22:41:32, 3.73it/s] 18%|█▊ | 67109/371472 [5:20:15<25:27:00, 3.32it/s] 18%|█▊ | 67110/371472 [5:20:16<27:37:44, 3.06it/s] 18%|█▊ | 67111/371472 [5:20:16<27:31:39, 3.07it/s] 18%|█▊ | 67112/371472 [5:20:16<26:23:49, 3.20it/s] 18%|█▊ | 67113/371472 [5:20:17<24:29:42, 3.45it/s] 18%|█▊ | 67114/371472 [5:20:17<24:02:41, 3.52it/s] 18%|█▊ | 67115/371472 [5:20:17<23:12:14, 3.64it/s] 18%|█▊ | 67116/371472 [5:20:17<22:51:38, 3.70it/s] 18%|█▊ | 67117/371472 [5:20:18<24:12:40, 3.49it/s] 18%|█▊ | 67118/371472 [5:20:18<23:46:16, 3.56it/s] 18%|█▊ | 67119/371472 [5:20:18<22:47:22, 3.71it/s] 18%|█▊ | 67120/371472 [5:20:18<22:39:51, 3.73it/s] {'loss': 4.0887, 'learning_rate': 8.3777931004762e-07, 'epoch': 2.89} + 18%|█▊ | 67120/371472 [5:20:18<22:39:51, 3.73it/s] 18%|█▊ | 67121/371472 [5:20:19<23:00:46, 3.67it/s] 18%|█▊ | 67122/371472 [5:20:19<23:50:35, 3.55it/s] 18%|█▊ | 67123/371472 [5:20:19<23:30:50, 3.60it/s] 18%|█▊ | 67124/371472 [5:20:20<24:53:43, 3.40it/s] 18%|█▊ | 67125/371472 [5:20:20<25:34:31, 3.31it/s] 18%|█▊ | 67126/371472 [5:20:20<25:08:50, 3.36it/s] 18%|█▊ | 67127/371472 [5:20:21<24:55:08, 3.39it/s] 18%|█▊ | 67128/371472 [5:20:21<25:06:37, 3.37it/s] 18%|█▊ | 67129/371472 [5:20:21<25:15:00, 3.35it/s] 18%|█▊ | 67130/371472 [5:20:21<25:03:17, 3.37it/s] 18%|█▊ | 67131/371472 [5:20:22<24:05:50, 3.51it/s] 18%|█▊ | 67132/371472 [5:20:22<23:18:09, 3.63it/s] 18%|█▊ | 67133/371472 [5:20:22<23:02:33, 3.67it/s] 18%|█▊ | 67134/371472 [5:20:23<23:56:19, 3.53it/s] 18%|█▊ | 67135/371472 [5:20:23<24:25:50, 3.46it/s] 18%|█▊ | 67136/371472 [5:20:23<24:59:41, 3.38it/s] 18%|█▊ | 67137/371472 [5:20:23<24:49:01, 3.41it/s] 18%|█▊ | 67138/371472 [5:20:24<24:40:51, 3.43it/s] 18%|█▊ | 67139/371472 [5:20:24<24:10:34, 3.50it/s] 18%|█▊ | 67140/371472 [5:20:24<24:10:55, 3.50it/s] {'loss': 4.1576, 'learning_rate': 8.377308280721411e-07, 'epoch': 2.89} + 18%|█▊ | 67140/371472 [5:20:24<24:10:55, 3.50it/s] 18%|█▊ | 67141/371472 [5:20:25<23:47:39, 3.55it/s] 18%|█▊ | 67142/371472 [5:20:25<23:56:50, 3.53it/s] 18%|█▊ | 67143/371472 [5:20:25<25:46:57, 3.28it/s] 18%|█▊ | 67144/371472 [5:20:25<24:19:28, 3.48it/s] 18%|█▊ | 67145/371472 [5:20:26<23:42:38, 3.57it/s] 18%|█▊ | 67146/371472 [5:20:26<22:49:34, 3.70it/s] 18%|█▊ | 67147/371472 [5:20:26<24:18:00, 3.48it/s] 18%|█▊ | 67148/371472 [5:20:27<24:27:11, 3.46it/s] 18%|█▊ | 67149/371472 [5:20:27<24:08:35, 3.50it/s] 18%|█▊ | 67150/371472 [5:20:27<23:41:05, 3.57it/s] 18%|█▊ | 67151/371472 [5:20:27<23:22:23, 3.62it/s] 18%|█▊ | 67152/371472 [5:20:28<23:47:04, 3.55it/s] 18%|█▊ | 67153/371472 [5:20:28<22:58:22, 3.68it/s] 18%|█▊ | 67154/371472 [5:20:28<23:06:22, 3.66it/s] 18%|█▊ | 67155/371472 [5:20:29<25:06:21, 3.37it/s] 18%|█▊ | 67156/371472 [5:20:29<26:43:34, 3.16it/s] 18%|█▊ | 67157/371472 [5:20:29<26:20:31, 3.21it/s] 18%|█▊ | 67158/371472 [5:20:30<26:05:26, 3.24it/s] 18%|█▊ | 67159/371472 [5:20:30<25:16:57, 3.34it/s] 18%|█▊ | 67160/371472 [5:20:30<26:06:02, 3.24it/s] {'loss': 4.1743, 'learning_rate': 8.376823460966623e-07, 'epoch': 2.89} + 18%|█▊ | 67160/371472 [5:20:30<26:06:02, 3.24it/s] 18%|█▊ | 67161/371472 [5:20:30<25:33:26, 3.31it/s] 18%|█▊ | 67162/371472 [5:20:31<26:09:01, 3.23it/s] 18%|█▊ | 67163/371472 [5:20:31<24:52:23, 3.40it/s] 18%|█▊ | 67164/371472 [5:20:31<23:45:53, 3.56it/s] 18%|█▊ | 67165/371472 [5:20:31<23:20:19, 3.62it/s] 18%|█▊ | 67166/371472 [5:20:32<22:54:15, 3.69it/s] 18%|█▊ | 67167/371472 [5:20:32<22:36:04, 3.74it/s] 18%|█▊ | 67168/371472 [5:20:32<24:00:18, 3.52it/s] 18%|█▊ | 67169/371472 [5:20:33<23:42:19, 3.57it/s] 18%|█▊ | 67170/371472 [5:20:33<26:06:36, 3.24it/s] 18%|█▊ | 67171/371472 [5:20:33<26:26:00, 3.20it/s] 18%|█▊ | 67172/371472 [5:20:34<25:08:01, 3.36it/s] 18%|█▊ | 67173/371472 [5:20:34<26:44:50, 3.16it/s] 18%|█▊ | 67174/371472 [5:20:34<26:56:53, 3.14it/s] 18%|█▊ | 67175/371472 [5:20:35<25:19:48, 3.34it/s] 18%|█▊ | 67176/371472 [5:20:35<26:43:19, 3.16it/s] 18%|█▊ | 67177/371472 [5:20:35<26:53:20, 3.14it/s] 18%|█▊ | 67178/371472 [5:20:35<25:56:31, 3.26it/s] 18%|█▊ | 67179/371472 [5:20:36<25:18:29, 3.34it/s] 18%|█▊ | 67180/371472 [5:20:36<23:56:21, 3.53it/s] {'loss': 4.2787, 'learning_rate': 8.376338641211834e-07, 'epoch': 2.89} + 18%|█▊ | 67180/371472 [5:20:36<23:56:21, 3.53it/s] 18%|█▊ | 67181/371472 [5:20:36<22:40:10, 3.73it/s] 18%|█▊ | 67182/371472 [5:20:37<23:35:48, 3.58it/s] 18%|█▊ | 67183/371472 [5:20:37<24:50:36, 3.40it/s] 18%|█▊ | 67184/371472 [5:20:37<24:30:32, 3.45it/s] 18%|█▊ | 67185/371472 [5:20:37<24:36:41, 3.43it/s] 18%|█▊ | 67186/371472 [5:20:38<25:22:58, 3.33it/s] 18%|█▊ | 67187/371472 [5:20:38<27:53:52, 3.03it/s] 18%|█▊ | 67188/371472 [5:20:38<26:22:32, 3.20it/s] 18%|█▊ | 67189/371472 [5:20:39<25:58:36, 3.25it/s] 18%|█▊ | 67190/371472 [5:20:39<25:01:41, 3.38it/s] 18%|█▊ | 67191/371472 [5:20:39<26:02:29, 3.25it/s] 18%|█▊ | 67192/371472 [5:20:40<25:16:48, 3.34it/s] 18%|█▊ | 67193/371472 [5:20:40<24:01:29, 3.52it/s] 18%|█▊ | 67194/371472 [5:20:40<24:05:26, 3.51it/s] 18%|█▊ | 67195/371472 [5:20:40<23:40:59, 3.57it/s] 18%|█▊ | 67196/371472 [5:20:41<24:37:07, 3.43it/s] 18%|█▊ | 67197/371472 [5:20:41<23:37:03, 3.58it/s] 18%|█▊ | 67198/371472 [5:20:41<26:28:28, 3.19it/s] 18%|█▊ | 67199/371472 [5:20:42<25:29:31, 3.32it/s] 18%|█▊ | 67200/371472 [5:20:42<24:24:26, 3.46it/s] {'loss': 4.2371, 'learning_rate': 8.375853821457044e-07, 'epoch': 2.89} + 18%|█▊ | 67200/371472 [5:20:42<24:24:26, 3.46it/s] 18%|█▊ | 67201/371472 [5:20:42<24:36:25, 3.43it/s] 18%|█▊ | 67202/371472 [5:20:42<24:20:59, 3.47it/s] 18%|█▊ | 67203/371472 [5:20:43<23:37:34, 3.58it/s] 18%|█▊ | 67204/371472 [5:20:43<22:53:04, 3.69it/s] 18%|█▊ | 67205/371472 [5:20:43<22:38:45, 3.73it/s] 18%|█▊ | 67206/371472 [5:20:43<21:42:19, 3.89it/s] 18%|█▊ | 67207/371472 [5:20:44<21:36:53, 3.91it/s] 18%|█▊ | 67208/371472 [5:20:44<22:40:45, 3.73it/s] 18%|█▊ | 67209/371472 [5:20:44<22:29:04, 3.76it/s] 18%|█▊ | 67210/371472 [5:20:45<22:45:16, 3.71it/s] 18%|█▊ | 67211/371472 [5:20:45<22:45:49, 3.71it/s] 18%|█▊ | 67212/371472 [5:20:45<22:03:37, 3.83it/s] 18%|█▊ | 67213/371472 [5:20:45<21:30:59, 3.93it/s] 18%|█▊ | 67214/371472 [5:20:46<21:15:28, 3.98it/s] 18%|█▊ | 67215/371472 [5:20:46<21:24:05, 3.95it/s] 18%|█▊ | 67216/371472 [5:20:46<21:17:14, 3.97it/s] 18%|█▊ | 67217/371472 [5:20:46<21:41:01, 3.90it/s] 18%|█▊ | 67218/371472 [5:20:47<22:33:46, 3.75it/s] 18%|█▊ | 67219/371472 [5:20:47<22:41:02, 3.73it/s] 18%|█▊ | 67220/371472 [5:20:47<22:57:51, 3.68it/s] {'loss': 4.2381, 'learning_rate': 8.375369001702255e-07, 'epoch': 2.9} + 18%|█▊ | 67220/371472 [5:20:47<22:57:51, 3.68it/s] 18%|█▊ | 67221/371472 [5:20:47<23:05:47, 3.66it/s] 18%|█▊ | 67222/371472 [5:20:48<25:04:57, 3.37it/s] 18%|█▊ | 67223/371472 [5:20:48<24:05:41, 3.51it/s] 18%|█▊ | 67224/371472 [5:20:48<24:21:05, 3.47it/s] 18%|█▊ | 67225/371472 [5:20:49<23:33:25, 3.59it/s] 18%|█▊ | 67226/371472 [5:20:49<23:14:16, 3.64it/s] 18%|█▊ | 67227/371472 [5:20:49<22:12:08, 3.81it/s] 18%|█▊ | 67228/371472 [5:20:49<24:39:08, 3.43it/s] 18%|█▊ | 67229/371472 [5:20:50<23:26:07, 3.61it/s] 18%|█▊ | 67230/371472 [5:20:50<24:19:32, 3.47it/s] 18%|█▊ | 67231/371472 [5:20:50<25:25:47, 3.32it/s] 18%|█▊ | 67232/371472 [5:20:51<26:03:26, 3.24it/s] 18%|█▊ | 67233/371472 [5:20:51<28:32:52, 2.96it/s] 18%|█▊ | 67234/371472 [5:20:51<26:39:57, 3.17it/s] 18%|█▊ | 67235/371472 [5:20:52<26:44:05, 3.16it/s] 18%|█▊ | 67236/371472 [5:20:52<25:28:54, 3.32it/s] 18%|█▊ | 67237/371472 [5:20:52<24:14:48, 3.49it/s] 18%|█▊ | 67238/371472 [5:20:53<27:09:30, 3.11it/s] 18%|█▊ | 67239/371472 [5:20:53<25:06:23, 3.37it/s] 18%|█▊ | 67240/371472 [5:20:53<26:15:01, 3.22it/s] {'loss': 4.3043, 'learning_rate': 8.374884181947467e-07, 'epoch': 2.9} + 18%|█▊ | 67240/371472 [5:20:53<26:15:01, 3.22it/s] 18%|█▊ | 67241/371472 [5:20:54<27:20:05, 3.09it/s] 18%|█▊ | 67242/371472 [5:20:54<25:24:29, 3.33it/s] 18%|█▊ | 67243/371472 [5:20:54<24:41:08, 3.42it/s] 18%|█▊ | 67244/371472 [5:20:54<26:10:45, 3.23it/s] 18%|█▊ | 67245/371472 [5:20:55<27:06:23, 3.12it/s] 18%|█▊ | 67246/371472 [5:20:55<25:29:08, 3.32it/s] 18%|█▊ | 67247/371472 [5:20:55<24:22:42, 3.47it/s] 18%|█▊ | 67248/371472 [5:20:56<24:35:38, 3.44it/s] 18%|█▊ | 67249/371472 [5:20:56<23:22:08, 3.62it/s] 18%|█▊ | 67250/371472 [5:20:56<22:39:38, 3.73it/s] 18%|█▊ | 67251/371472 [5:20:56<21:59:03, 3.84it/s] 18%|█▊ | 67252/371472 [5:20:57<22:09:44, 3.81it/s] 18%|█▊ | 67253/371472 [5:20:57<22:43:14, 3.72it/s] 18%|█▊ | 67254/371472 [5:20:57<23:27:31, 3.60it/s] 18%|█▊ | 67255/371472 [5:20:57<23:05:47, 3.66it/s] 18%|█▊ | 67256/371472 [5:20:58<22:24:48, 3.77it/s] 18%|█▊ | 67257/371472 [5:20:58<24:59:31, 3.38it/s] 18%|█▊ | 67258/371472 [5:20:58<25:21:44, 3.33it/s] 18%|█▊ | 67259/371472 [5:20:59<24:28:39, 3.45it/s] 18%|█▊ | 67260/371472 [5:20:59<23:13:04, 3.64it/s] {'loss': 4.1704, 'learning_rate': 8.374399362192678e-07, 'epoch': 2.9} + 18%|█▊ | 67260/371472 [5:20:59<23:13:04, 3.64it/s] 18%|█▊ | 67261/371472 [5:20:59<23:17:32, 3.63it/s] 18%|█▊ | 67262/371472 [5:20:59<23:39:57, 3.57it/s] 18%|█▊ | 67263/371472 [5:21:00<22:59:48, 3.67it/s] 18%|█▊ | 67264/371472 [5:21:00<22:10:09, 3.81it/s] 18%|█▊ | 67265/371472 [5:21:00<22:56:19, 3.68it/s] 18%|█▊ | 67266/371472 [5:21:00<23:23:01, 3.61it/s] 18%|█▊ | 67267/371472 [5:21:01<23:37:50, 3.58it/s] 18%|█▊ | 67268/371472 [5:21:01<22:48:40, 3.70it/s] 18%|█▊ | 67269/371472 [5:21:01<24:14:50, 3.48it/s] 18%|█▊ | 67270/371472 [5:21:02<23:22:08, 3.62it/s] 18%|█▊ | 67271/371472 [5:21:02<23:44:08, 3.56it/s] 18%|█▊ | 67272/371472 [5:21:02<25:14:23, 3.35it/s] 18%|█▊ | 67273/371472 [5:21:03<24:27:16, 3.46it/s] 18%|█▊ | 67274/371472 [5:21:03<24:25:46, 3.46it/s] 18%|█▊ | 67275/371472 [5:21:03<25:06:43, 3.36it/s] 18%|█▊ | 67276/371472 [5:21:03<25:48:48, 3.27it/s] 18%|█▊ | 67277/371472 [5:21:04<25:36:06, 3.30it/s] 18%|█▊ | 67278/371472 [5:21:04<25:10:19, 3.36it/s] 18%|█▊ | 67279/371472 [5:21:04<24:22:42, 3.47it/s] 18%|█▊ | 67280/371472 [5:21:05<23:37:57, 3.58it/s] {'loss': 4.0632, 'learning_rate': 8.373914542437889e-07, 'epoch': 2.9} + 18%|█▊ | 67280/371472 [5:21:05<23:37:57, 3.58it/s] 18%|█▊ | 67281/371472 [5:21:05<23:34:31, 3.58it/s] 18%|█▊ | 67282/371472 [5:21:05<23:29:23, 3.60it/s] 18%|█▊ | 67283/371472 [5:21:05<22:26:19, 3.77it/s] 18%|█▊ | 67284/371472 [5:21:06<22:24:16, 3.77it/s] 18%|█▊ | 67285/371472 [5:21:06<24:53:04, 3.40it/s] 18%|█▊ | 67286/371472 [5:21:06<24:28:44, 3.45it/s] 18%|█▊ | 67287/371472 [5:21:07<25:14:26, 3.35it/s] 18%|█▊ | 67288/371472 [5:21:07<23:59:36, 3.52it/s] 18%|█▊ | 67289/371472 [5:21:07<23:34:50, 3.58it/s] 18%|█▊ | 67290/371472 [5:21:07<25:22:21, 3.33it/s] 18%|█▊ | 67291/371472 [5:21:08<24:47:31, 3.41it/s] 18%|█▊ | 67292/371472 [5:21:08<24:03:59, 3.51it/s] 18%|█▊ | 67293/371472 [5:21:08<25:05:36, 3.37it/s] 18%|█▊ | 67294/371472 [5:21:09<24:37:15, 3.43it/s] 18%|█▊ | 67295/371472 [5:21:09<24:21:43, 3.47it/s] 18%|█▊ | 67296/371472 [5:21:09<23:03:27, 3.66it/s] 18%|█▊ | 67297/371472 [5:21:09<26:14:45, 3.22it/s] 18%|█▊ | 67298/371472 [5:21:10<25:20:52, 3.33it/s] 18%|█▊ | 67299/371472 [5:21:10<26:31:11, 3.19it/s] 18%|█▊ | 67300/371472 [5:21:10<26:20:55, 3.21it/s] {'loss': 4.035, 'learning_rate': 8.3734297226831e-07, 'epoch': 2.9} + 18%|█▊ | 67300/371472 [5:21:10<26:20:55, 3.21it/s] 18%|█▊ | 67301/371472 [5:21:11<25:23:13, 3.33it/s] 18%|█▊ | 67302/371472 [5:21:11<25:43:18, 3.28it/s] 18%|█▊ | 67303/371472 [5:21:11<24:10:44, 3.49it/s] 18%|█▊ | 67304/371472 [5:21:12<24:45:39, 3.41it/s] 18%|█▊ | 67305/371472 [5:21:12<23:48:12, 3.55it/s] 18%|█▊ | 67306/371472 [5:21:12<24:32:55, 3.44it/s] 18%|█▊ | 67307/371472 [5:21:12<23:18:55, 3.62it/s] 18%|█▊ | 67308/371472 [5:21:13<22:51:01, 3.70it/s] 18%|█▊ | 67309/371472 [5:21:13<23:40:41, 3.57it/s] 18%|█▊ | 67310/371472 [5:21:13<23:14:44, 3.63it/s] 18%|█▊ | 67311/371472 [5:21:13<23:11:10, 3.64it/s] 18%|█▊ | 67312/371472 [5:21:14<22:03:31, 3.83it/s] 18%|█▊ | 67313/371472 [5:21:14<22:32:10, 3.75it/s] 18%|█▊ | 67314/371472 [5:21:14<23:03:15, 3.66it/s] 18%|█▊ | 67315/371472 [5:21:15<23:01:15, 3.67it/s] 18%|█▊ | 67316/371472 [5:21:15<23:29:11, 3.60it/s] 18%|█▊ | 67317/371472 [5:21:15<23:32:46, 3.59it/s] 18%|█▊ | 67318/371472 [5:21:15<23:50:37, 3.54it/s] 18%|█▊ | 67319/371472 [5:21:16<24:44:43, 3.41it/s] 18%|█▊ | 67320/371472 [5:21:16<24:02:00, 3.52it/s] {'loss': 4.2782, 'learning_rate': 8.372944902928311e-07, 'epoch': 2.9} + 18%|█▊ | 67320/371472 [5:21:16<24:02:00, 3.52it/s] 18%|█▊ | 67321/371472 [5:21:16<24:04:58, 3.51it/s] 18%|█▊ | 67322/371472 [5:21:17<24:31:35, 3.44it/s] 18%|█▊ | 67323/371472 [5:21:17<24:08:24, 3.50it/s] 18%|█▊ | 67324/371472 [5:21:17<25:16:16, 3.34it/s] 18%|█▊ | 67325/371472 [5:21:17<25:11:20, 3.35it/s] 18%|█▊ | 67326/371472 [5:21:18<24:28:14, 3.45it/s] 18%|█▊ | 67327/371472 [5:21:18<23:45:07, 3.56it/s] 18%|█▊ | 67328/371472 [5:21:18<23:16:54, 3.63it/s] 18%|█▊ | 67329/371472 [5:21:19<23:03:38, 3.66it/s] 18%|█▊ | 67330/371472 [5:21:19<22:55:07, 3.69it/s] 18%|█▊ | 67331/371472 [5:21:19<23:04:34, 3.66it/s] 18%|█▊ | 67332/371472 [5:21:19<24:48:29, 3.41it/s] 18%|█▊ | 67333/371472 [5:21:20<23:42:18, 3.56it/s] 18%|█▊ | 67334/371472 [5:21:20<23:08:34, 3.65it/s] 18%|█▊ | 67335/371472 [5:21:20<24:26:52, 3.46it/s] 18%|█▊ | 67336/371472 [5:21:21<24:06:20, 3.50it/s] 18%|█▊ | 67337/371472 [5:21:21<27:05:35, 3.12it/s] 18%|█▊ | 67338/371472 [5:21:21<26:41:28, 3.17it/s] 18%|█▊ | 67339/371472 [5:21:21<25:25:04, 3.32it/s] 18%|█▊ | 67340/371472 [5:21:22<25:11:38, 3.35it/s] {'loss': 4.1251, 'learning_rate': 8.372460083173522e-07, 'epoch': 2.9} + 18%|█▊ | 67340/371472 [5:21:22<25:11:38, 3.35it/s] 18%|█▊ | 67341/371472 [5:21:22<25:59:53, 3.25it/s] 18%|█▊ | 67342/371472 [5:21:22<25:04:46, 3.37it/s] 18%|█▊ | 67343/371472 [5:21:23<24:14:48, 3.48it/s] 18%|█▊ | 67344/371472 [5:21:23<24:45:55, 3.41it/s] 18%|█▊ | 67345/371472 [5:21:23<23:44:28, 3.56it/s] 18%|█▊ | 67346/371472 [5:21:23<22:55:54, 3.68it/s] 18%|█▊ | 67347/371472 [5:21:24<26:05:58, 3.24it/s] 18%|█▊ | 67348/371472 [5:21:24<25:02:26, 3.37it/s] 18%|█▊ | 67349/371472 [5:21:24<24:37:11, 3.43it/s] 18%|█▊ | 67350/371472 [5:21:25<24:30:23, 3.45it/s] 18%|█▊ | 67351/371472 [5:21:25<26:12:46, 3.22it/s] 18%|█▊ | 67352/371472 [5:21:25<24:56:23, 3.39it/s] 18%|█▊ | 67353/371472 [5:21:26<26:04:28, 3.24it/s] 18%|█▊ | 67354/371472 [5:21:26<25:34:48, 3.30it/s] 18%|█▊ | 67355/371472 [5:21:26<24:02:37, 3.51it/s] 18%|█▊ | 67356/371472 [5:21:26<23:38:57, 3.57it/s] 18%|█▊ | 67357/371472 [5:21:27<23:23:58, 3.61it/s] 18%|█▊ | 67358/371472 [5:21:27<22:41:33, 3.72it/s] 18%|█▊ | 67359/371472 [5:21:27<23:02:16, 3.67it/s] 18%|█▊ | 67360/371472 [5:21:28<22:46:24, 3.71it/s] {'loss': 4.3405, 'learning_rate': 8.371975263418733e-07, 'epoch': 2.9} + 18%|█▊ | 67360/371472 [5:21:28<22:46:24, 3.71it/s] 18%|█▊ | 67361/371472 [5:21:28<25:25:31, 3.32it/s] 18%|█▊ | 67362/371472 [5:21:28<24:48:07, 3.41it/s] 18%|█▊ | 67363/371472 [5:21:28<24:29:04, 3.45it/s] 18%|█▊ | 67364/371472 [5:21:29<23:46:16, 3.55it/s] 18%|█▊ | 67365/371472 [5:21:29<23:33:13, 3.59it/s] 18%|█▊ | 67366/371472 [5:21:29<22:53:47, 3.69it/s] 18%|█▊ | 67367/371472 [5:21:30<24:52:46, 3.40it/s] 18%|█▊ | 67368/371472 [5:21:30<23:44:45, 3.56it/s] 18%|█▊ | 67369/371472 [5:21:30<23:23:55, 3.61it/s] 18%|█▊ | 67370/371472 [5:21:30<26:14:24, 3.22it/s] 18%|█▊ | 67371/371472 [5:21:31<25:08:47, 3.36it/s] 18%|█▊ | 67372/371472 [5:21:31<24:38:32, 3.43it/s] 18%|█▊ | 67373/371472 [5:21:31<24:16:31, 3.48it/s] 18%|█▊ | 67374/371472 [5:21:32<24:07:56, 3.50it/s] 18%|█▊ | 67375/371472 [5:21:32<24:27:20, 3.45it/s] 18%|█▊ | 67376/371472 [5:21:32<25:11:12, 3.35it/s] 18%|█▊ | 67377/371472 [5:21:32<24:43:14, 3.42it/s] 18%|█▊ | 67378/371472 [5:21:33<27:31:12, 3.07it/s] 18%|█▊ | 67379/371472 [5:21:33<26:39:24, 3.17it/s] 18%|█▊ | 67380/371472 [5:21:33<26:05:02, 3.24it/s] {'loss': 4.1674, 'learning_rate': 8.371490443663944e-07, 'epoch': 2.9} + 18%|█▊ | 67380/371472 [5:21:33<26:05:02, 3.24it/s] 18%|█▊ | 67381/371472 [5:21:34<24:17:10, 3.48it/s] 18%|█▊ | 67382/371472 [5:21:34<23:05:53, 3.66it/s] 18%|█▊ | 67383/371472 [5:21:34<22:39:05, 3.73it/s] 18%|█▊ | 67384/371472 [5:21:34<22:10:52, 3.81it/s] 18%|█▊ | 67385/371472 [5:21:35<23:02:07, 3.67it/s] 18%|█▊ | 67386/371472 [5:21:35<23:29:13, 3.60it/s] 18%|█▊ | 67387/371472 [5:21:35<22:35:25, 3.74it/s] 18%|█▊ | 67388/371472 [5:21:36<22:08:23, 3.82it/s] 18%|█▊ | 67389/371472 [5:21:36<22:43:33, 3.72it/s] 18%|█▊ | 67390/371472 [5:21:36<22:03:47, 3.83it/s] 18%|█▊ | 67391/371472 [5:21:36<22:31:05, 3.75it/s] 18%|█▊ | 67392/371472 [5:21:37<22:19:57, 3.78it/s] 18%|█▊ | 67393/371472 [5:21:37<22:48:49, 3.70it/s] 18%|█▊ | 67394/371472 [5:21:37<22:26:07, 3.76it/s] 18%|█▊ | 67395/371472 [5:21:37<23:25:16, 3.61it/s] 18%|█▊ | 67396/371472 [5:21:38<23:17:27, 3.63it/s] 18%|█▊ | 67397/371472 [5:21:38<22:27:58, 3.76it/s] 18%|█▊ | 67398/371472 [5:21:38<22:10:54, 3.81it/s] 18%|█▊ | 67399/371472 [5:21:38<21:58:34, 3.84it/s] 18%|█▊ | 67400/371472 [5:21:39<22:19:25, 3.78it/s] {'loss': 4.2904, 'learning_rate': 8.371005623909156e-07, 'epoch': 2.9} + 18%|█▊ | 67400/371472 [5:21:39<22:19:25, 3.78it/s] 18%|█▊ | 67401/371472 [5:21:39<22:45:07, 3.71it/s] 18%|█▊ | 67402/371472 [5:21:39<24:24:42, 3.46it/s] 18%|█▊ | 67403/371472 [5:21:40<23:43:54, 3.56it/s] 18%|█▊ | 67404/371472 [5:21:40<22:58:37, 3.68it/s] 18%|█▊ | 67405/371472 [5:21:40<22:20:48, 3.78it/s] 18%|█▊ | 67406/371472 [5:21:40<24:24:34, 3.46it/s] 18%|█▊ | 67407/371472 [5:21:41<24:29:41, 3.45it/s] 18%|█▊ | 67408/371472 [5:21:41<23:38:06, 3.57it/s] 18%|█▊ | 67409/371472 [5:21:41<23:51:44, 3.54it/s] 18%|█▊ | 67410/371472 [5:21:42<23:53:22, 3.54it/s] 18%|█▊ | 67411/371472 [5:21:42<23:03:44, 3.66it/s] 18%|█▊ | 67412/371472 [5:21:42<23:46:47, 3.55it/s] 18%|█▊ | 67413/371472 [5:21:42<23:26:31, 3.60it/s] 18%|█▊ | 67414/371472 [5:21:43<25:36:24, 3.30it/s] 18%|█▊ | 67415/371472 [5:21:43<24:56:31, 3.39it/s] 18%|█▊ | 67416/371472 [5:21:43<25:50:42, 3.27it/s] 18%|█▊ | 67417/371472 [5:21:44<25:34:20, 3.30it/s] 18%|█▊ | 67418/371472 [5:21:44<24:24:58, 3.46it/s] 18%|█▊ | 67419/371472 [5:21:44<24:19:33, 3.47it/s] 18%|█▊ | 67420/371472 [5:21:44<23:12:37, 3.64it/s] {'loss': 4.2848, 'learning_rate': 8.370520804154366e-07, 'epoch': 2.9} + 18%|█▊ | 67420/371472 [5:21:44<23:12:37, 3.64it/s] 18%|█▊ | 67421/371472 [5:21:45<23:23:12, 3.61it/s] 18%|█▊ | 67422/371472 [5:21:45<23:34:23, 3.58it/s] 18%|█▊ | 67423/371472 [5:21:45<24:00:55, 3.52it/s] 18%|█▊ | 67424/371472 [5:21:46<24:02:06, 3.51it/s] 18%|█▊ | 67425/371472 [5:21:46<23:11:22, 3.64it/s] 18%|█▊ | 67426/371472 [5:21:46<24:42:13, 3.42it/s] 18%|█▊ | 67427/371472 [5:21:46<24:17:58, 3.48it/s] 18%|█▊ | 67428/371472 [5:21:47<25:42:04, 3.29it/s] 18%|█▊ | 67429/371472 [5:21:47<24:47:36, 3.41it/s] 18%|█▊ | 67430/371472 [5:21:47<23:36:19, 3.58it/s] 18%|█▊ | 67431/371472 [5:21:48<23:12:35, 3.64it/s] 18%|█▊ | 67432/371472 [5:21:48<23:07:13, 3.65it/s] 18%|█▊ | 67433/371472 [5:21:48<24:33:28, 3.44it/s] 18%|█▊ | 67434/371472 [5:21:48<24:15:15, 3.48it/s] 18%|█▊ | 67435/371472 [5:21:49<23:29:32, 3.59it/s] 18%|█▊ | 67436/371472 [5:21:49<24:04:54, 3.51it/s] 18%|█▊ | 67437/371472 [5:21:49<22:39:02, 3.73it/s] 18%|█▊ | 67438/371472 [5:21:50<22:31:32, 3.75it/s] 18%|█▊ | 67439/371472 [5:21:50<22:20:36, 3.78it/s] 18%|█▊ | 67440/371472 [5:21:50<22:12:17, 3.80it/s] {'loss': 4.369, 'learning_rate': 8.370035984399577e-07, 'epoch': 2.9} + 18%|█▊ | 67440/371472 [5:21:50<22:12:17, 3.80it/s] 18%|█▊ | 67441/371472 [5:21:50<22:02:41, 3.83it/s] 18%|█▊ | 67442/371472 [5:21:51<22:07:58, 3.82it/s] 18%|█▊ | 67443/371472 [5:21:51<22:00:06, 3.84it/s] 18%|█▊ | 67444/371472 [5:21:51<22:55:09, 3.68it/s] 18%|█▊ | 67445/371472 [5:21:51<22:09:26, 3.81it/s] 18%|█▊ | 67446/371472 [5:21:52<22:15:38, 3.79it/s] 18%|█▊ | 67447/371472 [5:21:52<22:18:31, 3.79it/s] 18%|█▊ | 67448/371472 [5:21:52<21:59:14, 3.84it/s] 18%|█▊ | 67449/371472 [5:21:52<21:58:57, 3.84it/s] 18%|█▊ | 67450/371472 [5:21:53<22:25:44, 3.77it/s] 18%|█▊ | 67451/371472 [5:21:53<22:39:00, 3.73it/s] 18%|█▊ | 67452/371472 [5:21:53<23:07:02, 3.65it/s] 18%|█▊ | 67453/371472 [5:21:54<23:27:08, 3.60it/s] 18%|█▊ | 67454/371472 [5:21:54<24:59:33, 3.38it/s] 18%|█▊ | 67455/371472 [5:21:54<24:28:45, 3.45it/s] 18%|█▊ | 67456/371472 [5:21:54<25:57:57, 3.25it/s] 18%|█▊ | 67457/371472 [5:21:55<28:29:57, 2.96it/s] 18%|█▊ | 67458/371472 [5:21:55<27:01:10, 3.13it/s] 18%|█▊ | 67459/371472 [5:21:55<25:21:26, 3.33it/s] 18%|█▊ | 67460/371472 [5:21:56<24:11:48, 3.49it/s] {'loss': 4.2133, 'learning_rate': 8.369551164644788e-07, 'epoch': 2.91} + 18%|█▊ | 67460/371472 [5:21:56<24:11:48, 3.49it/s] 18%|█▊ | 67461/371472 [5:21:56<24:31:56, 3.44it/s] 18%|█▊ | 67462/371472 [5:21:56<24:29:34, 3.45it/s] 18%|█▊ | 67463/371472 [5:21:57<24:41:07, 3.42it/s] 18%|█▊ | 67464/371472 [5:21:57<24:57:46, 3.38it/s] 18%|█▊ | 67465/371472 [5:21:57<23:28:00, 3.60it/s] 18%|█▊ | 67466/371472 [5:21:57<23:46:14, 3.55it/s] 18%|█▊ | 67467/371472 [5:21:58<23:17:45, 3.62it/s] 18%|█▊ | 67468/371472 [5:21:58<22:24:24, 3.77it/s] 18%|█▊ | 67469/371472 [5:21:58<22:37:19, 3.73it/s] 18%|█▊ | 67470/371472 [5:21:58<22:01:48, 3.83it/s] 18%|█▊ | 67471/371472 [5:21:59<23:07:51, 3.65it/s] 18%|█▊ | 67472/371472 [5:21:59<25:31:00, 3.31it/s] 18%|█▊ | 67473/371472 [5:21:59<26:05:41, 3.24it/s] 18%|█▊ | 67474/371472 [5:22:00<25:01:47, 3.37it/s] 18%|█▊ | 67475/371472 [5:22:00<23:42:23, 3.56it/s] 18%|█▊ | 67476/371472 [5:22:00<23:12:08, 3.64it/s] 18%|█▊ | 67477/371472 [5:22:01<24:15:48, 3.48it/s] 18%|█▊ | 67478/371472 [5:22:01<23:43:04, 3.56it/s] 18%|█▊ | 67479/371472 [5:22:01<23:03:59, 3.66it/s] 18%|█▊ | 67480/371472 [5:22:01<22:56:51, 3.68it/s] {'loss': 4.4579, 'learning_rate': 8.36906634489e-07, 'epoch': 2.91} + 18%|█▊ | 67480/371472 [5:22:01<22:56:51, 3.68it/s] 18%|█▊ | 67481/371472 [5:22:02<23:05:16, 3.66it/s] 18%|█▊ | 67482/371472 [5:22:02<23:28:42, 3.60it/s] 18%|█▊ | 67483/371472 [5:22:02<23:56:01, 3.53it/s] 18%|█▊ | 67484/371472 [5:22:02<23:14:46, 3.63it/s] 18%|█▊ | 67485/371472 [5:22:03<23:30:26, 3.59it/s] 18%|█▊ | 67486/371472 [5:22:03<23:40:49, 3.57it/s] 18%|█▊ | 67487/371472 [5:22:03<24:04:55, 3.51it/s] 18%|█▊ | 67488/371472 [5:22:04<24:45:03, 3.41it/s] 18%|█▊ | 67489/371472 [5:22:04<23:36:26, 3.58it/s] 18%|█▊ | 67490/371472 [5:22:04<24:29:33, 3.45it/s] 18%|█▊ | 67491/371472 [5:22:04<23:52:07, 3.54it/s] 18%|█▊ | 67492/371472 [5:22:05<23:54:05, 3.53it/s] 18%|█▊ | 67493/371472 [5:22:05<24:57:30, 3.38it/s] 18%|█▊ | 67494/371472 [5:22:05<25:25:56, 3.32it/s] 18%|█▊ | 67495/371472 [5:22:06<24:26:27, 3.45it/s] 18%|█▊ | 67496/371472 [5:22:06<24:52:24, 3.39it/s] 18%|█▊ | 67497/371472 [5:22:06<23:44:11, 3.56it/s] 18%|█▊ | 67498/371472 [5:22:06<24:39:59, 3.42it/s] 18%|█▊ | 67499/371472 [5:22:07<24:25:34, 3.46it/s] 18%|█▊ | 67500/371472 [5:22:07<24:21:37, 3.47it/s] {'loss': 4.0013, 'learning_rate': 8.368581525135209e-07, 'epoch': 2.91} + 18%|█▊ | 67500/371472 [5:22:07<24:21:37, 3.47it/s] 18%|█▊ | 67501/371472 [5:22:07<26:49:57, 3.15it/s] 18%|█▊ | 67502/371472 [5:22:08<26:13:27, 3.22it/s] 18%|█▊ | 67503/371472 [5:22:08<25:14:14, 3.35it/s] 18%|█▊ | 67504/371472 [5:22:08<25:32:20, 3.31it/s] 18%|█▊ | 67505/371472 [5:22:09<28:02:48, 3.01it/s] 18%|█▊ | 67506/371472 [5:22:09<26:44:23, 3.16it/s] 18%|█▊ | 67507/371472 [5:22:09<25:21:11, 3.33it/s] 18%|█▊ | 67508/371472 [5:22:10<30:02:15, 2.81it/s] 18%|█▊ | 67509/371472 [5:22:10<27:50:30, 3.03it/s] 18%|█▊ | 67510/371472 [5:22:10<28:53:01, 2.92it/s] 18%|█▊ | 67511/371472 [5:22:11<26:57:03, 3.13it/s] 18%|█▊ | 67512/371472 [5:22:11<25:16:54, 3.34it/s] 18%|█▊ | 67513/371472 [5:22:11<26:15:56, 3.21it/s] 18%|█▊ | 67514/371472 [5:22:12<26:36:02, 3.17it/s] 18%|█▊ | 67515/371472 [5:22:12<26:13:09, 3.22it/s] 18%|█▊ | 67516/371472 [5:22:12<24:31:01, 3.44it/s] 18%|█▊ | 67517/371472 [5:22:12<25:35:01, 3.30it/s] 18%|█▊ | 67518/371472 [5:22:13<25:11:15, 3.35it/s] 18%|█▊ | 67519/371472 [5:22:13<23:50:54, 3.54it/s] 18%|█▊ | 67520/371472 [5:22:13<22:51:38, 3.69it/s] {'loss': 4.0972, 'learning_rate': 8.368096705380421e-07, 'epoch': 2.91} + 18%|█▊ | 67520/371472 [5:22:13<22:51:38, 3.69it/s] 18%|█▊ | 67521/371472 [5:22:13<22:33:50, 3.74it/s] 18%|█▊ | 67522/371472 [5:22:14<23:33:17, 3.58it/s] 18%|█▊ | 67523/371472 [5:22:14<22:49:08, 3.70it/s] 18%|█▊ | 67524/371472 [5:22:14<23:36:35, 3.58it/s] 18%|█▊ | 67525/371472 [5:22:15<22:59:39, 3.67it/s] 18%|█▊ | 67526/371472 [5:22:15<22:52:11, 3.69it/s] 18%|█▊ | 67527/371472 [5:22:15<26:14:06, 3.22it/s] 18%|█▊ | 67528/371472 [5:22:16<26:20:34, 3.21it/s] 18%|█▊ | 67529/371472 [5:22:16<25:47:38, 3.27it/s] 18%|█▊ | 67530/371472 [5:22:16<24:21:20, 3.47it/s] 18%|█▊ | 67531/371472 [5:22:16<24:04:56, 3.51it/s] 18%|█▊ | 67532/371472 [5:22:17<22:40:25, 3.72it/s] 18%|█▊ | 67533/371472 [5:22:17<22:51:19, 3.69it/s] 18%|█▊ | 67534/371472 [5:22:17<22:10:17, 3.81it/s] 18%|█▊ | 67535/371472 [5:22:17<21:46:48, 3.88it/s] 18%|█▊ | 67536/371472 [5:22:18<21:49:18, 3.87it/s] 18%|█▊ | 67537/371472 [5:22:18<22:54:18, 3.69it/s] 18%|█▊ | 67538/371472 [5:22:18<22:27:35, 3.76it/s] 18%|█▊ | 67539/371472 [5:22:18<21:52:52, 3.86it/s] 18%|█▊ | 67540/371472 [5:22:19<21:27:50, 3.93it/s] {'loss': 4.4095, 'learning_rate': 8.367611885625633e-07, 'epoch': 2.91} + 18%|█��� | 67540/371472 [5:22:19<21:27:50, 3.93it/s] 18%|█▊ | 67541/371472 [5:22:19<21:47:33, 3.87it/s] 18%|█▊ | 67542/371472 [5:22:19<22:16:42, 3.79it/s] 18%|█▊ | 67543/371472 [5:22:19<21:54:05, 3.85it/s] 18%|█▊ | 67544/371472 [5:22:20<21:16:41, 3.97it/s] 18%|█▊ | 67545/371472 [5:22:20<21:16:13, 3.97it/s] 18%|█▊ | 67546/371472 [5:22:20<21:37:30, 3.90it/s] 18%|█▊ | 67547/371472 [5:22:21<23:01:25, 3.67it/s] 18%|█▊ | 67548/371472 [5:22:21<24:31:59, 3.44it/s] 18%|█▊ | 67549/371472 [5:22:21<24:56:22, 3.39it/s] 18%|█▊ | 67550/371472 [5:22:21<24:02:26, 3.51it/s] 18%|█▊ | 67551/371472 [5:22:22<23:25:57, 3.60it/s] 18%|█▊ | 67552/371472 [5:22:22<23:01:24, 3.67it/s] 18%|█▊ | 67553/371472 [5:22:22<22:47:22, 3.70it/s] 18%|█▊ | 67554/371472 [5:22:23<23:01:20, 3.67it/s] 18%|█▊ | 67555/371472 [5:22:23<22:46:38, 3.71it/s] 18%|█▊ | 67556/371472 [5:22:23<24:09:49, 3.49it/s] 18%|█▊ | 67557/371472 [5:22:23<26:22:20, 3.20it/s] 18%|█▊ | 67558/371472 [5:22:24<24:50:13, 3.40it/s] 18%|█▊ | 67559/371472 [5:22:24<24:32:07, 3.44it/s] 18%|█▊ | 67560/371472 [5:22:24<22:58:28, 3.67it/s] {'loss': 4.2958, 'learning_rate': 8.367127065870845e-07, 'epoch': 2.91} + 18%|█▊ | 67560/371472 [5:22:24<22:58:28, 3.67it/s] 18%|█▊ | 67561/371472 [5:22:25<23:12:29, 3.64it/s] 18%|█▊ | 67562/371472 [5:22:25<22:53:04, 3.69it/s] 18%|█▊ | 67563/371472 [5:22:25<22:25:53, 3.76it/s] 18%|█▊ | 67564/371472 [5:22:25<23:51:35, 3.54it/s] 18%|█▊ | 67565/371472 [5:22:26<26:19:24, 3.21it/s] 18%|█▊ | 67566/371472 [5:22:26<25:06:11, 3.36it/s] 18%|█▊ | 67567/371472 [5:22:26<26:39:41, 3.17it/s] 18%|█▊ | 67568/371472 [5:22:27<24:47:03, 3.41it/s] 18%|█▊ | 67569/371472 [5:22:27<24:36:57, 3.43it/s] 18%|█▊ | 67570/371472 [5:22:27<24:16:25, 3.48it/s] 18%|█▊ | 67571/371472 [5:22:27<24:23:09, 3.46it/s] 18%|█▊ | 67572/371472 [5:22:28<22:50:49, 3.69it/s] 18%|█▊ | 67573/371472 [5:22:28<23:10:59, 3.64it/s] 18%|█▊ | 67574/371472 [5:22:28<23:09:19, 3.65it/s] 18%|█▊ | 67575/371472 [5:22:28<22:11:36, 3.80it/s] 18%|█▊ | 67576/371472 [5:22:29<22:49:03, 3.70it/s] 18%|█▊ | 67577/371472 [5:22:29<24:45:09, 3.41it/s] 18%|█▊ | 67578/371472 [5:22:29<24:01:11, 3.51it/s] 18%|█▊ | 67579/371472 [5:22:30<23:22:59, 3.61it/s] 18%|█▊ | 67580/371472 [5:22:30<23:41:22, 3.56it/s] {'loss': 4.249, 'learning_rate': 8.366642246116054e-07, 'epoch': 2.91} + 18%|█▊ | 67580/371472 [5:22:30<23:41:22, 3.56it/s] 18%|█▊ | 67581/371472 [5:22:30<24:27:42, 3.45it/s] 18%|█▊ | 67582/371472 [5:22:31<25:18:57, 3.33it/s] 18%|█▊ | 67583/371472 [5:22:31<27:15:18, 3.10it/s] 18%|█▊ | 67584/371472 [5:22:31<25:48:19, 3.27it/s] 18%|█▊ | 67585/371472 [5:22:31<24:50:09, 3.40it/s] 18%|█▊ | 67586/371472 [5:22:32<24:08:30, 3.50it/s] 18%|█▊ | 67587/371472 [5:22:32<23:38:15, 3.57it/s] 18%|█▊ | 67588/371472 [5:22:32<23:03:43, 3.66it/s] 18%|█▊ | 67589/371472 [5:22:33<23:10:42, 3.64it/s] 18%|█▊ | 67590/371472 [5:22:33<22:58:59, 3.67it/s] 18%|█▊ | 67591/371472 [5:22:33<22:53:58, 3.69it/s] 18%|█▊ | 67592/371472 [5:22:33<22:52:48, 3.69it/s] 18%|█▊ | 67593/371472 [5:22:34<24:21:05, 3.47it/s] 18%|█▊ | 67594/371472 [5:22:34<25:37:32, 3.29it/s] 18%|█▊ | 67595/371472 [5:22:34<26:02:31, 3.24it/s] 18%|█▊ | 67596/371472 [5:22:35<24:16:28, 3.48it/s] 18%|█▊ | 67597/371472 [5:22:35<24:30:12, 3.44it/s] 18%|█▊ | 67598/371472 [5:22:35<23:41:56, 3.56it/s] 18%|█▊ | 67599/371472 [5:22:35<23:46:17, 3.55it/s] 18%|█▊ | 67600/371472 [5:22:36<23:21:58, 3.61it/s] {'loss': 4.1099, 'learning_rate': 8.366157426361265e-07, 'epoch': 2.91} + 18%|█▊ | 67600/371472 [5:22:36<23:21:58, 3.61it/s] 18%|█▊ | 67601/371472 [5:22:36<23:52:43, 3.53it/s] 18%|█▊ | 67602/371472 [5:22:36<23:36:29, 3.58it/s] 18%|█▊ | 67603/371472 [5:22:37<23:44:09, 3.56it/s] 18%|█▊ | 67604/371472 [5:22:37<23:58:30, 3.52it/s] 18%|█▊ | 67605/371472 [5:22:37<23:13:56, 3.63it/s] 18%|█▊ | 67606/371472 [5:22:37<22:49:49, 3.70it/s] 18%|█▊ | 67607/371472 [5:22:38<22:58:04, 3.68it/s] 18%|█▊ | 67608/371472 [5:22:38<22:52:02, 3.69it/s] 18%|█▊ | 67609/371472 [5:22:38<22:52:46, 3.69it/s] 18%|█▊ | 67610/371472 [5:22:38<23:12:14, 3.64it/s] 18%|█▊ | 67611/371472 [5:22:39<22:21:04, 3.78it/s] 18%|█▊ | 67612/371472 [5:22:39<22:59:21, 3.67it/s] 18%|█▊ | 67613/371472 [5:22:39<22:35:13, 3.74it/s] 18%|█▊ | 67614/371472 [5:22:39<22:18:32, 3.78it/s] 18%|█▊ | 67615/371472 [5:22:40<22:41:18, 3.72it/s] 18%|█▊ | 67616/371472 [5:22:40<23:44:11, 3.56it/s] 18%|█▊ | 67617/371472 [5:22:40<24:04:13, 3.51it/s] 18%|█▊ | 67618/371472 [5:22:41<23:38:13, 3.57it/s] 18%|█▊ | 67619/371472 [5:22:41<22:42:32, 3.72it/s] 18%|█▊ | 67620/371472 [5:22:41<22:39:31, 3.72it/s] {'loss': 4.3728, 'learning_rate': 8.365672606606477e-07, 'epoch': 2.91} + 18%|█▊ | 67620/371472 [5:22:41<22:39:31, 3.72it/s] 18%|█▊ | 67621/371472 [5:22:41<22:20:48, 3.78it/s] 18%|█▊ | 67622/371472 [5:22:42<25:03:24, 3.37it/s] 18%|█▊ | 67623/371472 [5:22:42<24:45:28, 3.41it/s] 18%|█▊ | 67624/371472 [5:22:42<24:09:32, 3.49it/s] 18%|█▊ | 67625/371472 [5:22:43<24:12:29, 3.49it/s] 18%|█▊ | 67626/371472 [5:22:43<25:13:16, 3.35it/s] 18%|█▊ | 67627/371472 [5:22:43<23:43:46, 3.56it/s] 18%|█▊ | 67628/371472 [5:22:43<23:52:02, 3.54it/s] 18%|█▊ | 67629/371472 [5:22:44<23:37:44, 3.57it/s] 18%|█▊ | 67630/371472 [5:22:44<24:33:16, 3.44it/s] 18%|█▊ | 67631/371472 [5:22:44<26:49:10, 3.15it/s] 18%|█▊ | 67632/371472 [5:22:45<27:43:16, 3.04it/s] 18%|█▊ | 67633/371472 [5:22:45<26:56:39, 3.13it/s] 18%|█▊ | 67634/371472 [5:22:45<26:35:59, 3.17it/s] 18%|█▊ | 67635/371472 [5:22:46<25:51:43, 3.26it/s] 18%|█▊ | 67636/371472 [5:22:46<25:00:36, 3.37it/s] 18%|█▊ | 67637/371472 [5:22:46<28:33:14, 2.96it/s] 18%|█▊ | 67638/371472 [5:22:47<26:45:34, 3.15it/s] 18%|█▊ | 67639/371472 [5:22:47<26:04:33, 3.24it/s] 18%|█▊ | 67640/371472 [5:22:47<25:45:09, 3.28it/s] {'loss': 4.1975, 'learning_rate': 8.365187786851689e-07, 'epoch': 2.91} + 18%|█▊ | 67640/371472 [5:22:47<25:45:09, 3.28it/s] 18%|█▊ | 67641/371472 [5:22:48<26:07:50, 3.23it/s] 18%|█▊ | 67642/371472 [5:22:48<27:02:02, 3.12it/s] 18%|█▊ | 67643/371472 [5:22:48<26:44:07, 3.16it/s] 18%|█▊ | 67644/371472 [5:22:48<24:58:29, 3.38it/s] 18%|█▊ | 67645/371472 [5:22:49<25:15:09, 3.34it/s] 18%|█▊ | 67646/371472 [5:22:49<24:20:28, 3.47it/s] 18%|█▊ | 67647/371472 [5:22:49<23:54:09, 3.53it/s] 18%|█▊ | 67648/371472 [5:22:50<22:55:57, 3.68it/s] 18%|█▊ | 67649/371472 [5:22:50<22:57:22, 3.68it/s] 18%|█▊ | 67650/371472 [5:22:50<23:09:56, 3.64it/s] 18%|█▊ | 67651/371472 [5:22:50<24:21:31, 3.46it/s] 18%|█▊ | 67652/371472 [5:22:51<23:07:03, 3.65it/s] 18%|█▊ | 67653/371472 [5:22:51<23:30:30, 3.59it/s] 18%|█▊ | 67654/371472 [5:22:51<23:53:04, 3.53it/s] 18%|█▊ | 67655/371472 [5:22:52<25:20:03, 3.33it/s] 18%|█▊ | 67656/371472 [5:22:52<24:18:33, 3.47it/s] 18%|█▊ | 67657/371472 [5:22:52<24:23:41, 3.46it/s] 18%|█▊ | 67658/371472 [5:22:52<23:33:01, 3.58it/s] 18%|█▊ | 67659/371472 [5:22:53<22:20:10, 3.78it/s] 18%|█▊ | 67660/371472 [5:22:53<22:36:56, 3.73it/s] {'loss': 4.329, 'learning_rate': 8.364702967096899e-07, 'epoch': 2.91} + 18%|█▊ | 67660/371472 [5:22:53<22:36:56, 3.73it/s] 18%|█▊ | 67661/371472 [5:22:53<24:26:34, 3.45it/s] 18%|█▊ | 67662/371472 [5:22:53<23:12:56, 3.64it/s] 18%|█▊ | 67663/371472 [5:22:54<23:02:18, 3.66it/s] 18%|█▊ | 67664/371472 [5:22:54<22:07:53, 3.81it/s] 18%|█▊ | 67665/371472 [5:22:54<23:04:29, 3.66it/s] 18%|█▊ | 67666/371472 [5:22:55<23:21:14, 3.61it/s] 18%|█▊ | 67667/371472 [5:22:55<22:49:56, 3.70it/s] 18%|█▊ | 67668/371472 [5:22:55<23:28:30, 3.59it/s] 18%|█▊ | 67669/371472 [5:22:55<24:02:34, 3.51it/s] 18%|█▊ | 67670/371472 [5:22:56<23:24:20, 3.61it/s] 18%|█▊ | 67671/371472 [5:22:56<22:15:27, 3.79it/s] 18%|█▊ | 67672/371472 [5:22:56<26:04:04, 3.24it/s] 18%|█▊ | 67673/371472 [5:22:57<24:55:18, 3.39it/s] 18%|█▊ | 67674/371472 [5:22:57<25:35:08, 3.30it/s] 18%|█▊ | 67675/371472 [5:22:57<25:22:15, 3.33it/s] 18%|█▊ | 67676/371472 [5:22:58<25:41:49, 3.28it/s] 18%|█▊ | 67677/371472 [5:22:58<24:27:09, 3.45it/s] 18%|█▊ | 67678/371472 [5:22:58<24:40:44, 3.42it/s] 18%|█▊ | 67679/371472 [5:22:58<24:19:35, 3.47it/s] 18%|█▊ | 67680/371472 [5:22:59<23:46:06, 3.55it/s] {'loss': 4.2475, 'learning_rate': 8.36421814734211e-07, 'epoch': 2.92} + 18%|█▊ | 67680/371472 [5:22:59<23:46:06, 3.55it/s] 18%|█▊ | 67681/371472 [5:22:59<23:26:41, 3.60it/s] 18%|█▊ | 67682/371472 [5:22:59<22:47:34, 3.70it/s] 18%|█▊ | 67683/371472 [5:22:59<22:55:27, 3.68it/s] 18%|█▊ | 67684/371472 [5:23:00<24:38:43, 3.42it/s] 18%|█▊ | 67685/371472 [5:23:00<24:29:44, 3.44it/s] 18%|█▊ | 67686/371472 [5:23:00<24:22:10, 3.46it/s] 18%|█▊ | 67687/371472 [5:23:01<23:35:24, 3.58it/s] 18%|█▊ | 67688/371472 [5:23:01<23:41:45, 3.56it/s] 18%|█▊ | 67689/371472 [5:23:01<24:07:59, 3.50it/s] 18%|█▊ | 67690/371472 [5:23:01<22:45:55, 3.71it/s] 18%|█▊ | 67691/371472 [5:23:02<24:06:53, 3.50it/s] 18%|█▊ | 67692/371472 [5:23:02<24:17:03, 3.47it/s] 18%|█▊ | 67693/371472 [5:23:02<23:08:12, 3.65it/s] 18%|█▊ | 67694/371472 [5:23:03<24:56:47, 3.38it/s] 18%|█▊ | 67695/371472 [5:23:03<25:02:26, 3.37it/s] 18%|█▊ | 67696/371472 [5:23:03<25:07:37, 3.36it/s] 18%|█▊ | 67697/371472 [5:23:03<23:52:20, 3.53it/s] 18%|█▊ | 67698/371472 [5:23:04<23:53:26, 3.53it/s] 18%|█▊ | 67699/371472 [5:23:04<23:16:32, 3.63it/s] 18%|█▊ | 67700/371472 [5:23:04<23:13:05, 3.63it/s] {'loss': 4.1461, 'learning_rate': 8.363733327587321e-07, 'epoch': 2.92} + 18%|█▊ | 67700/371472 [5:23:04<23:13:05, 3.63it/s] 18%|█▊ | 67701/371472 [5:23:05<22:40:43, 3.72it/s] 18%|█▊ | 67702/371472 [5:23:05<22:23:36, 3.77it/s] 18%|█▊ | 67703/371472 [5:23:05<22:21:49, 3.77it/s] 18%|█▊ | 67704/371472 [5:23:05<21:41:10, 3.89it/s] 18%|█▊ | 67705/371472 [5:23:06<24:10:22, 3.49it/s] 18%|█▊ | 67706/371472 [5:23:06<23:24:53, 3.60it/s] 18%|█▊ | 67707/371472 [5:23:06<25:06:26, 3.36it/s] 18%|█▊ | 67708/371472 [5:23:07<24:29:14, 3.45it/s] 18%|█▊ | 67709/371472 [5:23:07<26:07:30, 3.23it/s] 18%|█▊ | 67710/371472 [5:23:07<25:10:34, 3.35it/s] 18%|█▊ | 67711/371472 [5:23:07<25:22:24, 3.33it/s] 18%|█▊ | 67712/371472 [5:23:08<24:11:43, 3.49it/s] 18%|█▊ | 67713/371472 [5:23:08<23:59:17, 3.52it/s] 18%|█▊ | 67714/371472 [5:23:08<24:41:20, 3.42it/s] 18%|█▊ | 67715/371472 [5:23:09<23:48:17, 3.54it/s] 18%|█▊ | 67716/371472 [5:23:09<23:25:07, 3.60it/s] 18%|█▊ | 67717/371472 [5:23:09<24:44:22, 3.41it/s] 18%|█▊ | 67718/371472 [5:23:09<23:46:04, 3.55it/s] 18%|█▊ | 67719/371472 [5:23:10<22:29:19, 3.75it/s] 18%|█▊ | 67720/371472 [5:23:10<22:54:45, 3.68it/s] {'loss': 4.0494, 'learning_rate': 8.363248507832531e-07, 'epoch': 2.92} + 18%|█▊ | 67720/371472 [5:23:10<22:54:45, 3.68it/s] 18%|█▊ | 67721/371472 [5:23:10<22:25:33, 3.76it/s] 18%|█▊ | 67722/371472 [5:23:10<22:19:10, 3.78it/s] 18%|█▊ | 67723/371472 [5:23:11<22:58:03, 3.67it/s] 18%|█▊ | 67724/371472 [5:23:11<23:47:40, 3.55it/s] 18%|█▊ | 67725/371472 [5:23:11<24:00:53, 3.51it/s] 18%|█▊ | 67726/371472 [5:23:12<23:28:37, 3.59it/s] 18%|█▊ | 67727/371472 [5:23:12<24:06:57, 3.50it/s] 18%|█▊ | 67728/371472 [5:23:12<24:21:11, 3.46it/s] 18%|█▊ | 67729/371472 [5:23:12<23:12:40, 3.64it/s] 18%|█▊ | 67730/371472 [5:23:13<22:24:32, 3.77it/s] 18%|█▊ | 67731/371472 [5:23:13<22:21:12, 3.77it/s] 18%|█▊ | 67732/371472 [5:23:13<24:15:00, 3.48it/s] 18%|█▊ | 67733/371472 [5:23:14<23:20:32, 3.61it/s] 18%|█▊ | 67734/371472 [5:23:14<25:14:21, 3.34it/s] 18%|█▊ | 67735/371472 [5:23:14<24:24:51, 3.46it/s] 18%|█▊ | 67736/371472 [5:23:14<23:01:47, 3.66it/s] 18%|█▊ | 67737/371472 [5:23:15<23:18:24, 3.62it/s] 18%|█▊ | 67738/371472 [5:23:15<23:23:22, 3.61it/s] 18%|█▊ | 67739/371472 [5:23:15<23:56:49, 3.52it/s] 18%|█▊ | 67740/371472 [5:23:15<23:14:29, 3.63it/s] {'loss': 4.1619, 'learning_rate': 8.362763688077743e-07, 'epoch': 2.92} + 18%|█▊ | 67740/371472 [5:23:15<23:14:29, 3.63it/s] 18%|█▊ | 67741/371472 [5:23:16<23:23:13, 3.61it/s] 18%|█▊ | 67742/371472 [5:23:16<23:22:56, 3.61it/s] 18%|█▊ | 67743/371472 [5:23:16<25:09:00, 3.35it/s] 18%|█▊ | 67744/371472 [5:23:17<24:48:26, 3.40it/s] 18%|█▊ | 67745/371472 [5:23:17<23:52:23, 3.53it/s] 18%|█▊ | 67746/371472 [5:23:17<27:44:57, 3.04it/s] 18%|█▊ | 67747/371472 [5:23:18<25:58:18, 3.25it/s] 18%|█▊ | 67748/371472 [5:23:18<24:43:35, 3.41it/s] 18%|█▊ | 67749/371472 [5:23:18<24:33:43, 3.43it/s] 18%|█▊ | 67750/371472 [5:23:19<25:59:33, 3.25it/s] 18%|█▊ | 67751/371472 [5:23:19<24:55:45, 3.38it/s] 18%|█▊ | 67752/371472 [5:23:19<23:46:58, 3.55it/s] 18%|█▊ | 67753/371472 [5:23:19<23:20:43, 3.61it/s] 18%|█▊ | 67754/371472 [5:23:20<23:18:38, 3.62it/s] 18%|█▊ | 67755/371472 [5:23:20<23:05:54, 3.65it/s] 18%|█▊ | 67756/371472 [5:23:20<24:17:22, 3.47it/s] 18%|█▊ | 67757/371472 [5:23:20<24:24:36, 3.46it/s] 18%|█▊ | 67758/371472 [5:23:21<24:54:49, 3.39it/s] 18%|█▊ | 67759/371472 [5:23:21<25:22:09, 3.33it/s] 18%|█▊ | 67760/371472 [5:23:21<24:14:36, 3.48it/s] {'loss': 4.1775, 'learning_rate': 8.362278868322954e-07, 'epoch': 2.92} + 18%|█▊ | 67760/371472 [5:23:21<24:14:36, 3.48it/s] 18%|█▊ | 67761/371472 [5:23:22<25:01:39, 3.37it/s] 18%|█▊ | 67762/371472 [5:23:22<23:52:45, 3.53it/s] 18%|█▊ | 67763/371472 [5:23:22<22:51:55, 3.69it/s] 18%|█▊ | 67764/371472 [5:23:22<23:26:23, 3.60it/s] 18%|█▊ | 67765/371472 [5:23:23<24:11:33, 3.49it/s] 18%|█▊ | 67766/371472 [5:23:23<24:00:06, 3.51it/s] 18%|█▊ | 67767/371472 [5:23:23<23:44:04, 3.55it/s] 18%|█▊ | 67768/371472 [5:23:24<23:39:55, 3.56it/s] 18%|█▊ | 67769/371472 [5:23:24<22:33:50, 3.74it/s] 18%|█▊ | 67770/371472 [5:23:24<22:27:39, 3.76it/s] 18%|█▊ | 67771/371472 [5:23:24<21:37:34, 3.90it/s] 18%|█▊ | 67772/371472 [5:23:25<22:19:13, 3.78it/s] 18%|█▊ | 67773/371472 [5:23:25<22:03:19, 3.82it/s] 18%|█▊ | 67774/371472 [5:23:25<22:06:52, 3.81it/s] 18%|█▊ | 67775/371472 [5:23:25<24:11:19, 3.49it/s] 18%|█▊ | 67776/371472 [5:23:26<24:15:14, 3.48it/s] 18%|█▊ | 67777/371472 [5:23:26<23:32:26, 3.58it/s] 18%|█▊ | 67778/371472 [5:23:26<23:32:54, 3.58it/s] 18%|█▊ | 67779/371472 [5:23:27<22:43:49, 3.71it/s] 18%|█▊ | 67780/371472 [5:23:27<25:02:33, 3.37it/s] {'loss': 4.0533, 'learning_rate': 8.361794048568166e-07, 'epoch': 2.92} + 18%|█▊ | 67780/371472 [5:23:27<25:02:33, 3.37it/s] 18%|█▊ | 67781/371472 [5:23:27<25:31:59, 3.30it/s] 18%|█▊ | 67782/371472 [5:23:27<24:58:49, 3.38it/s] 18%|█▊ | 67783/371472 [5:23:28<23:33:17, 3.58it/s] 18%|█▊ | 67784/371472 [5:23:28<25:01:57, 3.37it/s] 18%|█▊ | 67785/371472 [5:23:28<25:27:16, 3.31it/s] 18%|█▊ | 67786/371472 [5:23:29<24:40:22, 3.42it/s] 18%|█▊ | 67787/371472 [5:23:29<24:35:16, 3.43it/s] 18%|█▊ | 67788/371472 [5:23:29<24:10:43, 3.49it/s] 18%|█▊ | 67789/371472 [5:23:30<23:58:21, 3.52it/s] 18%|█▊ | 67790/371472 [5:23:30<22:44:00, 3.71it/s] 18%|█▊ | 67791/371472 [5:23:30<23:40:49, 3.56it/s] 18%|█▊ | 67792/371472 [5:23:30<22:37:08, 3.73it/s] 18%|█▊ | 67793/371472 [5:23:31<22:57:27, 3.67it/s] 18%|█▊ | 67794/371472 [5:23:31<22:26:15, 3.76it/s] 18%|█▊ | 67795/371472 [5:23:31<21:43:42, 3.88it/s] 18%|█▊ | 67796/371472 [5:23:31<21:46:32, 3.87it/s] 18%|█▊ | 67797/371472 [5:23:32<24:18:39, 3.47it/s] 18%|█▊ | 67798/371472 [5:23:32<25:06:05, 3.36it/s] 18%|█▊ | 67799/371472 [5:23:32<25:30:09, 3.31it/s] 18%|█▊ | 67800/371472 [5:23:33<24:05:01, 3.50it/s] {'loss': 4.1892, 'learning_rate': 8.361309228813376e-07, 'epoch': 2.92} + 18%|█▊ | 67800/371472 [5:23:33<24:05:01, 3.50it/s] 18%|█▊ | 67801/371472 [5:23:33<23:59:25, 3.52it/s] 18%|█▊ | 67802/371472 [5:23:33<23:36:22, 3.57it/s] 18%|█▊ | 67803/371472 [5:23:33<22:44:20, 3.71it/s] 18%|█▊ | 67804/371472 [5:23:34<23:08:11, 3.65it/s] 18%|█▊ | 67805/371472 [5:23:34<22:37:32, 3.73it/s] 18%|█▊ | 67806/371472 [5:23:34<22:32:04, 3.74it/s] 18%|█▊ | 67807/371472 [5:23:34<23:58:26, 3.52it/s] 18%|█▊ | 67808/371472 [5:23:35<23:24:42, 3.60it/s] 18%|█▊ | 67809/371472 [5:23:35<22:33:39, 3.74it/s] 18%|█▊ | 67810/371472 [5:23:35<22:14:03, 3.79it/s] 18%|█▊ | 67811/371472 [5:23:36<22:31:44, 3.74it/s] 18%|█▊ | 67812/371472 [5:23:36<22:25:36, 3.76it/s] 18%|█▊ | 67813/371472 [5:23:36<25:27:18, 3.31it/s] 18%|█▊ | 67814/371472 [5:23:36<24:36:26, 3.43it/s] 18%|█▊ | 67815/371472 [5:23:37<23:32:25, 3.58it/s] 18%|█▊ | 67816/371472 [5:23:37<23:13:47, 3.63it/s] 18%|█▊ | 67817/371472 [5:23:37<22:38:41, 3.72it/s] 18%|█▊ | 67818/371472 [5:23:37<23:07:16, 3.65it/s] 18%|█▊ | 67819/371472 [5:23:38<23:57:28, 3.52it/s] 18%|█▊ | 67820/371472 [5:23:38<23:49:55, 3.54it/s] {'loss': 4.1913, 'learning_rate': 8.360824409058587e-07, 'epoch': 2.92} + 18%|█▊ | 67820/371472 [5:23:38<23:49:55, 3.54it/s] 18%|█▊ | 67821/371472 [5:23:38<23:23:24, 3.61it/s] 18%|█▊ | 67822/371472 [5:23:39<23:32:22, 3.58it/s] 18%|█▊ | 67823/371472 [5:23:39<22:44:23, 3.71it/s] 18%|█▊ | 67824/371472 [5:23:39<23:46:24, 3.55it/s] 18%|█▊ | 67825/371472 [5:23:39<23:24:33, 3.60it/s] 18%|█▊ | 67826/371472 [5:23:40<24:44:24, 3.41it/s] 18%|█▊ | 67827/371472 [5:23:40<24:17:02, 3.47it/s] 18%|█▊ | 67828/371472 [5:23:40<24:57:54, 3.38it/s] 18%|█▊ | 67829/371472 [5:23:41<24:25:11, 3.45it/s] 18%|█▊ | 67830/371472 [5:23:41<24:20:24, 3.47it/s] 18%|█▊ | 67831/371472 [5:23:41<26:23:21, 3.20it/s] 18%|█▊ | 67832/371472 [5:23:42<27:07:48, 3.11it/s] 18%|█▊ | 67833/371472 [5:23:42<25:38:24, 3.29it/s] 18%|█▊ | 67834/371472 [5:23:42<24:59:24, 3.38it/s] 18%|█▊ | 67835/371472 [5:23:43<26:18:36, 3.21it/s] 18%|█▊ | 67836/371472 [5:23:43<27:25:56, 3.07it/s] 18%|█▊ | 67837/371472 [5:23:43<27:42:05, 3.04it/s] 18%|█▊ | 67838/371472 [5:23:43<26:22:08, 3.20it/s] 18%|█▊ | 67839/371472 [5:23:44<25:36:39, 3.29it/s] 18%|█▊ | 67840/371472 [5:23:44<25:33:54, 3.30it/s] {'loss': 4.0711, 'learning_rate': 8.360339589303798e-07, 'epoch': 2.92} + 18%|█▊ | 67840/371472 [5:23:44<25:33:54, 3.30it/s] 18%|█▊ | 67841/371472 [5:23:44<24:32:46, 3.44it/s] 18%|█▊ | 67842/371472 [5:23:45<23:42:37, 3.56it/s] 18%|█▊ | 67843/371472 [5:23:45<23:22:09, 3.61it/s] 18%|█▊ | 67844/371472 [5:23:45<22:53:10, 3.69it/s] 18%|█▊ | 67845/371472 [5:23:45<23:07:57, 3.65it/s] 18%|█▊ | 67846/371472 [5:23:46<23:55:47, 3.52it/s] 18%|█▊ | 67847/371472 [5:23:46<25:19:11, 3.33it/s] 18%|█▊ | 67848/371472 [5:23:46<24:01:18, 3.51it/s] 18%|█▊ | 67849/371472 [5:23:47<23:16:19, 3.62it/s] 18%|█▊ | 67850/371472 [5:23:47<22:45:08, 3.71it/s] 18%|█▊ | 67851/371472 [5:23:47<23:31:20, 3.59it/s] 18%|█▊ | 67852/371472 [5:23:47<22:40:42, 3.72it/s] 18%|█▊ | 67853/371472 [5:23:48<23:17:27, 3.62it/s] 18%|█▊ | 67854/371472 [5:23:48<23:18:07, 3.62it/s] 18%|█▊ | 67855/371472 [5:23:48<24:38:08, 3.42it/s] 18%|█▊ | 67856/371472 [5:23:49<24:49:28, 3.40it/s] 18%|█▊ | 67857/371472 [5:23:49<24:19:04, 3.47it/s] 18%|█▊ | 67858/371472 [5:23:49<23:13:42, 3.63it/s] 18%|█▊ | 67859/371472 [5:23:49<22:48:43, 3.70it/s] 18%|█▊ | 67860/371472 [5:23:50<26:38:01, 3.17it/s] {'loss': 4.2778, 'learning_rate': 8.35985476954901e-07, 'epoch': 2.92} + 18%|█▊ | 67860/371472 [5:23:50<26:38:01, 3.17it/s] 18%|█▊ | 67861/371472 [5:23:50<26:03:29, 3.24it/s] 18%|█▊ | 67862/371472 [5:23:50<24:28:54, 3.44it/s] 18%|█▊ | 67863/371472 [5:23:51<24:07:51, 3.49it/s] 18%|█▊ | 67864/371472 [5:23:51<24:05:42, 3.50it/s] 18%|█▊ | 67865/371472 [5:23:51<24:31:36, 3.44it/s] 18%|█▊ | 67866/371472 [5:23:51<24:34:41, 3.43it/s] 18%|█▊ | 67867/371472 [5:23:52<24:02:58, 3.51it/s] 18%|█▊ | 67868/371472 [5:23:52<24:41:56, 3.41it/s] 18%|█▊ | 67869/371472 [5:23:52<24:18:59, 3.47it/s] 18%|█▊ | 67870/371472 [5:23:53<23:20:21, 3.61it/s] 18%|█▊ | 67871/371472 [5:23:53<22:13:27, 3.79it/s] 18%|█▊ | 67872/371472 [5:23:53<22:36:10, 3.73it/s] 18%|█▊ | 67873/371472 [5:23:53<21:53:34, 3.85it/s] 18%|█▊ | 67874/371472 [5:23:54<22:08:17, 3.81it/s] 18%|█▊ | 67875/371472 [5:23:54<22:01:09, 3.83it/s] 18%|█▊ | 67876/371472 [5:23:54<22:22:16, 3.77it/s] 18%|█▊ | 67877/371472 [5:23:54<23:15:14, 3.63it/s] 18%|█▊ | 67878/371472 [5:23:55<23:28:01, 3.59it/s] 18%|█▊ | 67879/371472 [5:23:55<23:56:42, 3.52it/s] 18%|█▊ | 67880/371472 [5:23:55<23:19:36, 3.62it/s] {'loss': 4.3031, 'learning_rate': 8.35936994979422e-07, 'epoch': 2.92} + 18%|█▊ | 67880/371472 [5:23:55<23:19:36, 3.62it/s] 18%|█▊ | 67881/371472 [5:23:56<23:15:24, 3.63it/s] 18%|█▊ | 67882/371472 [5:23:56<23:11:54, 3.64it/s] 18%|█▊ | 67883/371472 [5:23:56<22:55:20, 3.68it/s] 18%|█▊ | 67884/371472 [5:23:56<23:24:19, 3.60it/s] 18%|█▊ | 67885/371472 [5:23:57<24:11:26, 3.49it/s] 18%|█▊ | 67886/371472 [5:23:57<24:24:37, 3.45it/s] 18%|█▊ | 67887/371472 [5:23:57<23:45:37, 3.55it/s] 18%|█▊ | 67888/371472 [5:23:58<24:42:54, 3.41it/s] 18%|█▊ | 67889/371472 [5:23:58<24:34:48, 3.43it/s] 18%|█▊ | 67890/371472 [5:23:58<23:59:49, 3.51it/s] 18%|█▊ | 67891/371472 [5:23:58<23:55:22, 3.52it/s] 18%|█▊ | 67892/371472 [5:23:59<24:16:59, 3.47it/s] 18%|█▊ | 67893/371472 [5:23:59<25:23:58, 3.32it/s] 18%|█▊ | 67894/371472 [5:23:59<24:00:15, 3.51it/s] 18%|█▊ | 67895/371472 [5:24:00<24:15:42, 3.48it/s] 18%|█▊ | 67896/371472 [5:24:00<24:22:53, 3.46it/s] 18%|█▊ | 67897/371472 [5:24:00<24:07:50, 3.49it/s] 18%|█▊ | 67898/371472 [5:24:00<24:49:44, 3.40it/s] 18%|█▊ | 67899/371472 [5:24:01<24:14:57, 3.48it/s] 18%|█▊ | 67900/371472 [5:24:01<23:27:47, 3.59it/s] {'loss': 4.032, 'learning_rate': 8.358885130039432e-07, 'epoch': 2.92} + 18%|█▊ | 67900/371472 [5:24:01<23:27:47, 3.59it/s] 18%|█▊ | 67901/371472 [5:24:01<23:33:20, 3.58it/s] 18%|█▊ | 67902/371472 [5:24:02<23:18:53, 3.62it/s] 18%|█▊ | 67903/371472 [5:24:02<23:36:50, 3.57it/s] 18%|█▊ | 67904/371472 [5:24:02<23:40:50, 3.56it/s] 18%|█▊ | 67905/371472 [5:24:02<22:42:47, 3.71it/s] 18%|█▊ | 67906/371472 [5:24:03<23:10:00, 3.64it/s] 18%|█▊ | 67907/371472 [5:24:03<25:03:30, 3.37it/s] 18%|█▊ | 67908/371472 [5:24:03<24:16:30, 3.47it/s] 18%|█▊ | 67909/371472 [5:24:04<27:11:06, 3.10it/s] 18%|█▊ | 67910/371472 [5:24:04<25:12:24, 3.35it/s] 18%|█▊ | 67911/371472 [5:24:04<24:51:40, 3.39it/s] 18%|█▊ | 67912/371472 [5:24:04<24:13:51, 3.48it/s] 18%|█▊ | 67913/371472 [5:24:05<23:19:45, 3.61it/s] 18%|█▊ | 67914/371472 [5:24:05<23:16:53, 3.62it/s] 18%|█▊ | 67915/371472 [5:24:05<22:40:14, 3.72it/s] 18%|█▊ | 67916/371472 [5:24:05<22:21:50, 3.77it/s] 18%|█▊ | 67917/371472 [5:24:06<22:28:37, 3.75it/s] 18%|█▊ | 67918/371472 [5:24:06<21:57:17, 3.84it/s] 18%|█▊ | 67919/371472 [5:24:06<22:16:34, 3.79it/s] 18%|█▊ | 67920/371472 [5:24:07<24:30:05, 3.44it/s] {'loss': 4.1971, 'learning_rate': 8.358400310284643e-07, 'epoch': 2.93} + 18%|█▊ | 67920/371472 [5:24:07<24:30:05, 3.44it/s] 18%|█▊ | 67921/371472 [5:24:07<25:23:31, 3.32it/s] 18%|█▊ | 67922/371472 [5:24:07<24:33:54, 3.43it/s] 18%|█▊ | 67923/371472 [5:24:07<23:36:25, 3.57it/s] 18%|█▊ | 67924/371472 [5:24:08<23:33:09, 3.58it/s] 18%|█▊ | 67925/371472 [5:24:08<23:41:57, 3.56it/s] 18%|█▊ | 67926/371472 [5:24:08<23:37:25, 3.57it/s] 18%|█▊ | 67927/371472 [5:24:09<23:18:31, 3.62it/s] 18%|█▊ | 67928/371472 [5:24:09<23:16:03, 3.62it/s] 18%|█▊ | 67929/371472 [5:24:09<24:28:49, 3.44it/s] 18%|█▊ | 67930/371472 [5:24:09<25:31:59, 3.30it/s] 18%|█▊ | 67931/371472 [5:24:10<24:58:10, 3.38it/s] 18%|█▊ | 67932/371472 [5:24:10<24:09:17, 3.49it/s] 18%|█▊ | 67933/371472 [5:24:10<24:48:49, 3.40it/s] 18%|█▊ | 67934/371472 [5:24:11<24:52:00, 3.39it/s] 18%|█▊ | 67935/371472 [5:24:11<23:58:01, 3.52it/s] 18%|█▊ | 67936/371472 [5:24:11<25:34:27, 3.30it/s] 18%|█▊ | 67937/371472 [5:24:12<24:44:38, 3.41it/s] 18%|█▊ | 67938/371472 [5:24:12<25:05:37, 3.36it/s] 18%|█▊ | 67939/371472 [5:24:12<25:48:01, 3.27it/s] 18%|█▊ | 67940/371472 [5:24:12<25:05:17, 3.36it/s] {'loss': 4.133, 'learning_rate': 8.357915490529855e-07, 'epoch': 2.93} + 18%|█▊ | 67940/371472 [5:24:12<25:05:17, 3.36it/s] 18%|█▊ | 67941/371472 [5:24:13<24:09:26, 3.49it/s] 18%|█▊ | 67942/371472 [5:24:13<22:59:50, 3.67it/s] 18%|█▊ | 67943/371472 [5:24:13<24:36:09, 3.43it/s] 18%|█▊ | 67944/371472 [5:24:14<23:42:49, 3.56it/s] 18%|█▊ | 67945/371472 [5:24:14<22:44:23, 3.71it/s] 18%|█▊ | 67946/371472 [5:24:14<23:20:23, 3.61it/s] 18%|█▊ | 67947/371472 [5:24:14<25:18:10, 3.33it/s] 18%|█▊ | 67948/371472 [5:24:15<26:24:01, 3.19it/s] 18%|█▊ | 67949/371472 [5:24:15<25:48:07, 3.27it/s] 18%|█▊ | 67950/371472 [5:24:15<24:41:52, 3.41it/s] 18%|█▊ | 67951/371472 [5:24:16<23:48:32, 3.54it/s] 18%|█▊ | 67952/371472 [5:24:16<22:47:10, 3.70it/s] 18%|█▊ | 67953/371472 [5:24:16<21:55:50, 3.84it/s] 18%|█▊ | 67954/371472 [5:24:16<22:37:49, 3.73it/s] 18%|█▊ | 67955/371472 [5:24:17<22:40:21, 3.72it/s] 18%|█▊ | 67956/371472 [5:24:17<21:57:45, 3.84it/s] 18%|█▊ | 67957/371472 [5:24:17<22:33:48, 3.74it/s] 18%|█▊ | 67958/371472 [5:24:17<22:20:19, 3.77it/s] 18%|█▊ | 67959/371472 [5:24:18<23:22:56, 3.61it/s] 18%|█▊ | 67960/371472 [5:24:18<23:17:23, 3.62it/s] {'loss': 4.1434, 'learning_rate': 8.357430670775064e-07, 'epoch': 2.93} + 18%|█▊ | 67960/371472 [5:24:18<23:17:23, 3.62it/s] 18%|█▊ | 67961/371472 [5:24:18<24:22:24, 3.46it/s] 18%|█▊ | 67962/371472 [5:24:19<24:30:04, 3.44it/s] 18%|█▊ | 67963/371472 [5:24:19<23:38:50, 3.57it/s] 18%|█▊ | 67964/371472 [5:24:19<23:28:15, 3.59it/s] 18%|█▊ | 67965/371472 [5:24:19<23:05:03, 3.65it/s] 18%|█▊ | 67966/371472 [5:24:20<22:27:34, 3.75it/s] 18%|█▊ | 67967/371472 [5:24:20<22:43:53, 3.71it/s] 18%|█▊ | 67968/371472 [5:24:20<24:36:32, 3.43it/s] 18%|█▊ | 67969/371472 [5:24:21<24:06:53, 3.50it/s] 18%|█▊ | 67970/371472 [5:24:21<24:44:24, 3.41it/s] 18%|█▊ | 67971/371472 [5:24:21<23:51:19, 3.53it/s] 18%|█▊ | 67972/371472 [5:24:21<24:51:31, 3.39it/s] 18%|█▊ | 67973/371472 [5:24:22<24:04:05, 3.50it/s] 18%|█▊ | 67974/371472 [5:24:22<23:57:03, 3.52it/s] 18%|█▊ | 67975/371472 [5:24:22<24:57:25, 3.38it/s] 18%|█▊ | 67976/371472 [5:24:23<24:36:58, 3.42it/s] 18%|█▊ | 67977/371472 [5:24:23<24:18:54, 3.47it/s] 18%|█▊ | 67978/371472 [5:24:23<24:04:56, 3.50it/s] 18%|█▊ | 67979/371472 [5:24:23<24:11:04, 3.49it/s] 18%|█▊ | 67980/371472 [5:24:24<25:29:01, 3.31it/s] {'loss': 4.1361, 'learning_rate': 8.356945851020275e-07, 'epoch': 2.93} + 18%|█▊ | 67980/371472 [5:24:24<25:29:01, 3.31it/s] 18%|█▊ | 67981/371472 [5:24:24<24:30:20, 3.44it/s] 18%|█▊ | 67982/371472 [5:24:24<23:22:55, 3.61it/s] 18%|█▊ | 67983/371472 [5:24:25<23:09:22, 3.64it/s] 18%|█▊ | 67984/371472 [5:24:25<22:23:06, 3.77it/s] 18%|█▊ | 67985/371472 [5:24:25<21:48:45, 3.86it/s] 18%|█▊ | 67986/371472 [5:24:25<23:14:48, 3.63it/s] 18%|█▊ | 67987/371472 [5:24:26<22:29:11, 3.75it/s] 18%|█▊ | 67988/371472 [5:24:26<22:53:38, 3.68it/s] 18%|█▊ | 67989/371472 [5:24:26<25:12:47, 3.34it/s] 18%|█▊ | 67990/371472 [5:24:27<25:43:30, 3.28it/s] 18%|█▊ | 67991/371472 [5:24:27<26:18:49, 3.20it/s] 18%|█▊ | 67992/371472 [5:24:27<24:33:20, 3.43it/s] 18%|█▊ | 67993/371472 [5:24:27<24:18:43, 3.47it/s] 18%|█▊ | 67994/371472 [5:24:28<24:31:01, 3.44it/s] 18%|█▊ | 67995/371472 [5:24:28<24:26:26, 3.45it/s] 18%|█▊ | 67996/371472 [5:24:28<23:54:45, 3.53it/s] 18%|█▊ | 67997/371472 [5:24:29<23:33:59, 3.58it/s] 18%|█▊ | 67998/371472 [5:24:29<23:26:13, 3.60it/s] 18%|█▊ | 67999/371472 [5:24:29<23:04:05, 3.65it/s] 18%|█▊ | 68000/371472 [5:24:29<22:59:55, 3.67it/s] {'loss': 4.1048, 'learning_rate': 8.356461031265487e-07, 'epoch': 2.93} + 18%|█▊ | 68000/371472 [5:24:29<22:59:55, 3.67it/s] 18%|█▊ | 68001/371472 [5:24:30<22:43:35, 3.71it/s] 18%|█▊ | 68002/371472 [5:24:30<23:28:07, 3.59it/s] 18%|█▊ | 68003/371472 [5:24:30<23:40:36, 3.56it/s] 18%|█▊ | 68004/371472 [5:24:30<22:57:56, 3.67it/s] 18%|█▊ | 68005/371472 [5:24:31<22:44:57, 3.71it/s] 18%|█▊ | 68006/371472 [5:24:31<22:36:24, 3.73it/s] 18%|█▊ | 68007/371472 [5:24:31<22:40:05, 3.72it/s] 18%|█▊ | 68008/371472 [5:24:32<23:10:34, 3.64it/s] 18%|█▊ | 68009/371472 [5:24:32<23:20:08, 3.61it/s] 18%|█▊ | 68010/371472 [5:24:32<23:39:49, 3.56it/s] 18%|█▊ | 68011/371472 [5:24:32<23:35:27, 3.57it/s] 18%|█▊ | 68012/371472 [5:24:33<25:32:58, 3.30it/s] 18%|█▊ | 68013/371472 [5:24:33<25:40:33, 3.28it/s] 18%|█▊ | 68014/371472 [5:24:33<25:20:48, 3.33it/s] 18%|█▊ | 68015/371472 [5:24:34<24:47:14, 3.40it/s] 18%|█▊ | 68016/371472 [5:24:34<24:11:23, 3.48it/s] 18%|█▊ | 68017/371472 [5:24:34<24:37:46, 3.42it/s] 18%|█▊ | 68018/371472 [5:24:34<23:48:21, 3.54it/s] 18%|█▊ | 68019/371472 [5:24:35<24:06:01, 3.50it/s] 18%|█▊ | 68020/371472 [5:24:35<28:31:56, 2.95it/s] {'loss': 4.108, 'learning_rate': 8.355976211510697e-07, 'epoch': 2.93} + 18%|█▊ | 68020/371472 [5:24:35<28:31:56, 2.95it/s] 18%|█▊ | 68021/371472 [5:24:35<27:12:19, 3.10it/s] 18%|█▊ | 68022/371472 [5:24:36<25:59:32, 3.24it/s] 18%|█▊ | 68023/371472 [5:24:36<24:51:18, 3.39it/s] 18%|█▊ | 68024/371472 [5:24:36<23:44:20, 3.55it/s] 18%|█▊ | 68025/371472 [5:24:37<23:30:32, 3.59it/s] 18%|█▊ | 68026/371472 [5:24:37<24:01:29, 3.51it/s] 18%|█▊ | 68027/371472 [5:24:37<24:35:28, 3.43it/s] 18%|█▊ | 68028/371472 [5:24:37<24:04:51, 3.50it/s] 18%|█▊ | 68029/371472 [5:24:38<24:59:25, 3.37it/s] 18%|█▊ | 68030/371472 [5:24:38<24:47:01, 3.40it/s] 18%|█▊ | 68031/371472 [5:24:38<24:05:36, 3.50it/s] 18%|█▊ | 68032/371472 [5:24:39<25:42:38, 3.28it/s] 18%|█▊ | 68033/371472 [5:24:39<25:19:15, 3.33it/s] 18%|█▊ | 68034/371472 [5:24:39<24:46:17, 3.40it/s] 18%|█▊ | 68035/371472 [5:24:40<24:43:38, 3.41it/s] 18%|█▊ | 68036/371472 [5:24:40<23:30:04, 3.59it/s] 18%|█▊ | 68037/371472 [5:24:40<24:30:41, 3.44it/s] 18%|█▊ | 68038/371472 [5:24:40<24:25:32, 3.45it/s] 18%|█▊ | 68039/371472 [5:24:41<23:51:12, 3.53it/s] 18%|█▊ | 68040/371472 [5:24:41<22:55:09, 3.68it/s] {'loss': 4.3268, 'learning_rate': 8.355491391755909e-07, 'epoch': 2.93} + 18%|█▊ | 68040/371472 [5:24:41<22:55:09, 3.68it/s] 18%|█▊ | 68041/371472 [5:24:41<22:23:49, 3.76it/s] 18%|█▊ | 68042/371472 [5:24:41<24:24:38, 3.45it/s] 18%|█▊ | 68043/371472 [5:24:42<23:07:55, 3.64it/s] 18%|█▊ | 68044/371472 [5:24:42<23:06:05, 3.65it/s] 18%|█▊ | 68045/371472 [5:24:42<23:33:55, 3.58it/s] 18%|█▊ | 68046/371472 [5:24:43<22:55:04, 3.68it/s] 18%|█▊ | 68047/371472 [5:24:43<22:47:49, 3.70it/s] 18%|█▊ | 68048/371472 [5:24:43<23:44:33, 3.55it/s] 18%|█▊ | 68049/371472 [5:24:43<23:54:45, 3.52it/s] 18%|█▊ | 68050/371472 [5:24:44<23:13:29, 3.63it/s] 18%|█▊ | 68051/371472 [5:24:44<23:08:54, 3.64it/s] 18%|█▊ | 68052/371472 [5:24:44<22:18:56, 3.78it/s] 18%|█▊ | 68053/371472 [5:24:45<25:31:06, 3.30it/s] 18%|█▊ | 68054/371472 [5:24:45<23:45:16, 3.55it/s] 18%|█▊ | 68055/371472 [5:24:45<23:14:54, 3.63it/s] 18%|█▊ | 68056/371472 [5:24:45<22:46:23, 3.70it/s] 18%|█▊ | 68057/371472 [5:24:46<22:31:20, 3.74it/s] 18%|█▊ | 68058/371472 [5:24:46<22:01:30, 3.83it/s] 18%|█▊ | 68059/371472 [5:24:46<22:19:07, 3.78it/s] 18%|█▊ | 68060/371472 [5:24:46<22:00:36, 3.83it/s] {'loss': 4.0361, 'learning_rate': 8.35500657200112e-07, 'epoch': 2.93} + 18%|█▊ | 68060/371472 [5:24:46<22:00:36, 3.83it/s] 18%|█▊ | 68061/371472 [5:24:47<21:44:59, 3.88it/s] 18%|█▊ | 68062/371472 [5:24:47<22:08:29, 3.81it/s] 18%|█▊ | 68063/371472 [5:24:47<23:22:11, 3.61it/s] 18%|█▊ | 68064/371472 [5:24:47<22:48:15, 3.70it/s] 18%|█▊ | 68065/371472 [5:24:48<22:21:44, 3.77it/s] 18%|█▊ | 68066/371472 [5:24:48<22:05:02, 3.82it/s] 18%|█▊ | 68067/371472 [5:24:48<22:41:00, 3.72it/s] 18%|█▊ | 68068/371472 [5:24:48<22:07:20, 3.81it/s] 18%|█▊ | 68069/371472 [5:24:49<23:32:00, 3.58it/s] 18%|█▊ | 68070/371472 [5:24:49<24:11:04, 3.48it/s] 18%|█▊ | 68071/371472 [5:24:49<24:37:32, 3.42it/s] 18%|█▊ | 68072/371472 [5:24:50<24:35:06, 3.43it/s] 18%|█▊ | 68073/371472 [5:24:50<24:34:02, 3.43it/s] 18%|█▊ | 68074/371472 [5:24:50<25:26:43, 3.31it/s] 18%|█▊ | 68075/371472 [5:24:51<25:56:41, 3.25it/s] 18%|█▊ | 68076/371472 [5:24:51<24:28:38, 3.44it/s] 18%|█▊ | 68077/371472 [5:24:51<23:33:29, 3.58it/s] 18%|█▊ | 68078/371472 [5:24:51<22:24:21, 3.76it/s] 18%|█▊ | 68079/371472 [5:24:52<22:54:25, 3.68it/s] 18%|█▊ | 68080/371472 [5:24:52<22:43:25, 3.71it/s] {'loss': 3.9429, 'learning_rate': 8.354521752246331e-07, 'epoch': 2.93} + 18%|█▊ | 68080/371472 [5:24:52<22:43:25, 3.71it/s] 18%|█▊ | 68081/371472 [5:24:52<23:21:08, 3.61it/s] 18%|█▊ | 68082/371472 [5:24:52<23:24:49, 3.60it/s] 18%|█▊ | 68083/371472 [5:24:53<24:29:36, 3.44it/s] 18%|█▊ | 68084/371472 [5:24:53<24:25:57, 3.45it/s] 18%|█▊ | 68085/371472 [5:24:53<25:04:57, 3.36it/s] 18%|█▊ | 68086/371472 [5:24:54<24:14:35, 3.48it/s] 18%|█▊ | 68087/371472 [5:24:54<22:53:00, 3.68it/s] 18%|█▊ | 68088/371472 [5:24:54<22:49:21, 3.69it/s] 18%|█▊ | 68089/371472 [5:24:54<23:32:22, 3.58it/s] 18%|█▊ | 68090/371472 [5:24:55<23:18:33, 3.62it/s] 18%|█▊ | 68091/371472 [5:24:55<22:43:05, 3.71it/s] 18%|█▊ | 68092/371472 [5:24:55<21:59:37, 3.83it/s] 18%|█▊ | 68093/371472 [5:24:56<22:44:52, 3.70it/s] 18%|█▊ | 68094/371472 [5:24:56<22:11:43, 3.80it/s] 18%|█▊ | 68095/371472 [5:24:56<22:12:56, 3.79it/s] 18%|█▊ | 68096/371472 [5:24:56<22:14:33, 3.79it/s] 18%|█▊ | 68097/371472 [5:24:57<22:23:57, 3.76it/s] 18%|█▊ | 68098/371472 [5:24:57<22:17:37, 3.78it/s] 18%|█▊ | 68099/371472 [5:24:57<21:56:53, 3.84it/s] 18%|█▊ | 68100/371472 [5:24:57<21:24:42, 3.94it/s] {'loss': 4.4815, 'learning_rate': 8.354036932491542e-07, 'epoch': 2.93} + 18%|█▊ | 68100/371472 [5:24:57<21:24:42, 3.94it/s] 18%|█▊ | 68101/371472 [5:24:58<21:46:41, 3.87it/s] 18%|█▊ | 68102/371472 [5:24:58<21:28:52, 3.92it/s] 18%|█▊ | 68103/371472 [5:24:58<21:27:11, 3.93it/s] 18%|█▊ | 68104/371472 [5:24:58<22:02:19, 3.82it/s] 18%|█▊ | 68105/371472 [5:24:59<22:11:26, 3.80it/s] 18%|█▊ | 68106/371472 [5:24:59<23:44:45, 3.55it/s] 18%|█▊ | 68107/371472 [5:24:59<23:06:16, 3.65it/s] 18%|█▊ | 68108/371472 [5:25:00<22:57:26, 3.67it/s] 18%|█▊ | 68109/371472 [5:25:00<22:32:57, 3.74it/s] 18%|█▊ | 68110/371472 [5:25:00<25:38:53, 3.29it/s] 18%|█▊ | 68111/371472 [5:25:00<24:30:02, 3.44it/s] 18%|█▊ | 68112/371472 [5:25:01<25:09:41, 3.35it/s] 18%|█▊ | 68113/371472 [5:25:01<24:23:12, 3.46it/s] 18%|█▊ | 68114/371472 [5:25:01<24:15:29, 3.47it/s] 18%|█▊ | 68115/371472 [5:25:02<25:16:53, 3.33it/s] 18%|█▊ | 68116/371472 [5:25:02<23:50:15, 3.53it/s] 18%|█▊ | 68117/371472 [5:25:02<24:31:03, 3.44it/s] 18%|█▊ | 68118/371472 [5:25:02<24:33:11, 3.43it/s] 18%|█▊ | 68119/371472 [5:25:03<24:36:19, 3.42it/s] 18%|█▊ | 68120/371472 [5:25:03<24:28:24, 3.44it/s] {'loss': 3.9123, 'learning_rate': 8.353552112736753e-07, 'epoch': 2.93} + 18%|█▊ | 68120/371472 [5:25:03<24:28:24, 3.44it/s] 18%|█▊ | 68121/371472 [5:25:03<23:58:19, 3.52it/s] 18%|█▊ | 68122/371472 [5:25:04<23:53:07, 3.53it/s] 18%|█▊ | 68123/371472 [5:25:04<23:39:01, 3.56it/s] 18%|█▊ | 68124/371472 [5:25:04<23:16:52, 3.62it/s] 18%|█▊ | 68125/371472 [5:25:04<23:49:12, 3.54it/s] 18%|█▊ | 68126/371472 [5:25:05<26:45:10, 3.15it/s] 18%|█▊ | 68127/371472 [5:25:05<25:25:24, 3.31it/s] 18%|█▊ | 68128/371472 [5:25:05<24:11:46, 3.48it/s] 18%|█▊ | 68129/371472 [5:25:06<22:51:07, 3.69it/s] 18%|█▊ | 68130/371472 [5:25:06<22:04:58, 3.82it/s] 18%|█▊ | 68131/371472 [5:25:06<22:23:55, 3.76it/s] 18%|█▊ | 68132/371472 [5:25:06<22:18:15, 3.78it/s] 18%|█▊ | 68133/371472 [5:25:07<22:00:15, 3.83it/s] 18%|█▊ | 68134/371472 [5:25:07<22:17:45, 3.78it/s] 18%|█▊ | 68135/371472 [5:25:07<23:17:01, 3.62it/s] 18%|█▊ | 68136/371472 [5:25:07<22:50:25, 3.69it/s] 18%|█▊ | 68137/371472 [5:25:08<23:52:24, 3.53it/s] 18%|█▊ | 68138/371472 [5:25:08<25:55:06, 3.25it/s] 18%|█▊ | 68139/371472 [5:25:08<25:49:54, 3.26it/s] 18%|█▊ | 68140/371472 [5:25:09<24:40:49, 3.41it/s] {'loss': 4.309, 'learning_rate': 8.353067292981964e-07, 'epoch': 2.93} + 18%|█▊ | 68140/371472 [5:25:09<24:40:49, 3.41it/s] 18%|█▊ | 68141/371472 [5:25:09<23:56:59, 3.52it/s] 18%|█▊ | 68142/371472 [5:25:09<23:08:56, 3.64it/s] 18%|█▊ | 68143/371472 [5:25:09<22:36:15, 3.73it/s] 18%|█▊ | 68144/371472 [5:25:10<24:20:30, 3.46it/s] 18%|█▊ | 68145/371472 [5:25:10<26:54:22, 3.13it/s] 18%|█▊ | 68146/371472 [5:25:10<25:28:42, 3.31it/s] 18%|█▊ | 68147/371472 [5:25:11<27:05:17, 3.11it/s] 18%|█▊ | 68148/371472 [5:25:11<25:19:33, 3.33it/s] 18%|█▊ | 68149/371472 [5:25:11<24:46:37, 3.40it/s] 18%|█▊ | 68150/371472 [5:25:12<24:27:20, 3.45it/s] 18%|█▊ | 68151/371472 [5:25:12<25:26:04, 3.31it/s] 18%|█▊ | 68152/371472 [5:25:12<24:34:02, 3.43it/s] 18%|█▊ | 68153/371472 [5:25:12<24:08:13, 3.49it/s] 18%|█▊ | 68154/371472 [5:25:13<23:01:01, 3.66it/s] 18%|█▊ | 68155/371472 [5:25:13<22:33:32, 3.73it/s] 18%|█▊ | 68156/371472 [5:25:13<22:48:00, 3.70it/s] 18%|█▊ | 68157/371472 [5:25:14<22:36:49, 3.73it/s] 18%|█▊ | 68158/371472 [5:25:14<23:20:09, 3.61it/s] 18%|█▊ | 68159/371472 [5:25:14<25:25:28, 3.31it/s] 18%|█▊ | 68160/371472 [5:25:15<26:15:27, 3.21it/s] {'loss': 4.1525, 'learning_rate': 8.352582473227176e-07, 'epoch': 2.94} + 18%|█▊ | 68160/371472 [5:25:15<26:15:27, 3.21it/s] 18%|█▊ | 68161/371472 [5:25:15<25:36:17, 3.29it/s] 18%|█▊ | 68162/371472 [5:25:15<24:08:16, 3.49it/s] 18%|█▊ | 68163/371472 [5:25:15<24:30:58, 3.44it/s] 18%|█▊ | 68164/371472 [5:25:16<23:21:59, 3.61it/s] 18%|█▊ | 68165/371472 [5:25:16<23:07:58, 3.64it/s] 18%|█▊ | 68166/371472 [5:25:16<23:16:52, 3.62it/s] 18%|█▊ | 68167/371472 [5:25:16<22:26:27, 3.75it/s] 18%|█▊ | 68168/371472 [5:25:17<23:37:24, 3.57it/s] 18%|█▊ | 68169/371472 [5:25:17<24:17:43, 3.47it/s] 18%|█▊ | 68170/371472 [5:25:17<24:11:41, 3.48it/s] 18%|█▊ | 68171/371472 [5:25:18<25:12:34, 3.34it/s] 18%|█▊ | 68172/371472 [5:25:18<24:08:03, 3.49it/s] 18%|█▊ | 68173/371472 [5:25:18<24:05:07, 3.50it/s] 18%|█▊ | 68174/371472 [5:25:19<27:12:24, 3.10it/s] 18%|█▊ | 68175/371472 [5:25:19<28:22:11, 2.97it/s] 18%|█▊ | 68176/371472 [5:25:19<26:00:33, 3.24it/s] 18%|█▊ | 68177/371472 [5:25:19<26:00:50, 3.24it/s] 18%|█▊ | 68178/371472 [5:25:20<25:07:23, 3.35it/s] 18%|█▊ | 68179/371472 [5:25:20<23:56:43, 3.52it/s] 18%|█▊ | 68180/371472 [5:25:20<22:51:45, 3.68it/s] {'loss': 4.2099, 'learning_rate': 8.352097653472387e-07, 'epoch': 2.94} + 18%|█▊ | 68180/371472 [5:25:20<22:51:45, 3.68it/s] 18%|█▊ | 68181/371472 [5:25:21<23:10:45, 3.63it/s] 18%|█▊ | 68182/371472 [5:25:21<22:17:27, 3.78it/s] 18%|█▊ | 68183/371472 [5:25:21<22:52:15, 3.68it/s] 18%|█▊ | 68184/371472 [5:25:21<22:20:34, 3.77it/s] 18%|█▊ | 68185/371472 [5:25:22<22:34:05, 3.73it/s] 18%|█▊ | 68186/371472 [5:25:22<23:00:02, 3.66it/s] 18%|█▊ | 68187/371472 [5:25:22<24:13:35, 3.48it/s] 18%|█▊ | 68188/371472 [5:25:22<23:50:33, 3.53it/s] 18%|█▊ | 68189/371472 [5:25:23<23:37:41, 3.57it/s] 18%|█▊ | 68190/371472 [5:25:23<22:32:44, 3.74it/s] 18%|█▊ | 68191/371472 [5:25:23<23:22:19, 3.60it/s] 18%|█▊ | 68192/371472 [5:25:24<23:36:55, 3.57it/s] 18%|█▊ | 68193/371472 [5:25:24<24:04:12, 3.50it/s] 18%|█▊ | 68194/371472 [5:25:24<24:16:40, 3.47it/s] 18%|█▊ | 68195/371472 [5:25:24<24:09:18, 3.49it/s] 18%|█▊ | 68196/371472 [5:25:25<23:37:36, 3.57it/s] 18%|█▊ | 68197/371472 [5:25:25<23:44:30, 3.55it/s] 18%|█▊ | 68198/371472 [5:25:25<23:23:31, 3.60it/s] 18%|█▊ | 68199/371472 [5:25:26<22:40:09, 3.72it/s] 18%|█▊ | 68200/371472 [5:25:26<22:03:44, 3.82it/s] {'loss': 4.2891, 'learning_rate': 8.351612833717598e-07, 'epoch': 2.94} + 18%|█▊ | 68200/371472 [5:25:26<22:03:44, 3.82it/s] 18%|█▊ | 68201/371472 [5:25:26<22:34:39, 3.73it/s] 18%|█▊ | 68202/371472 [5:25:26<22:40:22, 3.72it/s] 18%|█▊ | 68203/371472 [5:25:27<24:46:05, 3.40it/s] 18%|█▊ | 68204/371472 [5:25:27<23:40:47, 3.56it/s] 18%|█▊ | 68205/371472 [5:25:27<24:10:21, 3.48it/s] 18%|█▊ | 68206/371472 [5:25:28<24:32:19, 3.43it/s] 18%|█▊ | 68207/371472 [5:25:28<23:53:30, 3.53it/s] 18%|█▊ | 68208/371472 [5:25:28<23:51:51, 3.53it/s] 18%|█▊ | 68209/371472 [5:25:28<23:33:42, 3.58it/s] 18%|█▊ | 68210/371472 [5:25:29<25:07:06, 3.35it/s] 18%|█▊ | 68211/371472 [5:25:29<24:53:14, 3.38it/s] 18%|█▊ | 68212/371472 [5:25:29<23:52:24, 3.53it/s] 18%|█▊ | 68213/371472 [5:25:30<24:04:24, 3.50it/s] 18%|█▊ | 68214/371472 [5:25:30<25:44:38, 3.27it/s] 18%|█▊ | 68215/371472 [5:25:30<25:39:05, 3.28it/s] 18%|█▊ | 68216/371472 [5:25:31<26:50:49, 3.14it/s] 18%|█▊ | 68217/371472 [5:25:31<25:56:29, 3.25it/s] 18%|█▊ | 68218/371472 [5:25:31<26:52:22, 3.13it/s] 18%|█▊ | 68219/371472 [5:25:31<25:36:45, 3.29it/s] 18%|█▊ | 68220/371472 [5:25:32<24:51:31, 3.39it/s] {'loss': 3.9832, 'learning_rate': 8.351128013962808e-07, 'epoch': 2.94} + 18%|█▊ | 68220/371472 [5:25:32<24:51:31, 3.39it/s] 18%|█▊ | 68221/371472 [5:25:32<24:28:09, 3.44it/s] 18%|█▊ | 68222/371472 [5:25:32<23:33:29, 3.58it/s] 18%|█▊ | 68223/371472 [5:25:32<23:03:00, 3.65it/s] 18%|█▊ | 68224/371472 [5:25:33<23:24:18, 3.60it/s] 18%|█▊ | 68225/371472 [5:25:33<24:31:46, 3.43it/s] 18%|█▊ | 68226/371472 [5:25:33<23:33:10, 3.58it/s] 18%|█▊ | 68227/371472 [5:25:34<22:31:54, 3.74it/s] 18%|█▊ | 68228/371472 [5:25:34<22:59:02, 3.66it/s] 18%|█▊ | 68229/371472 [5:25:34<23:45:31, 3.55it/s] 18%|█▊ | 68230/371472 [5:25:34<23:19:06, 3.61it/s] 18%|█▊ | 68231/371472 [5:25:35<23:21:18, 3.61it/s] 18%|█▊ | 68232/371472 [5:25:35<25:24:20, 3.32it/s] 18%|█▊ | 68233/371472 [5:25:35<25:10:26, 3.35it/s] 18%|█▊ | 68234/371472 [5:25:36<24:19:51, 3.46it/s] 18%|��▊ | 68235/371472 [5:25:36<23:44:23, 3.55it/s] 18%|█▊ | 68236/371472 [5:25:36<23:49:04, 3.54it/s] 18%|█▊ | 68237/371472 [5:25:36<24:25:35, 3.45it/s] 18%|█▊ | 68238/371472 [5:25:37<23:30:58, 3.58it/s] 18%|█▊ | 68239/371472 [5:25:37<23:49:05, 3.54it/s] 18%|█▊ | 68240/371472 [5:25:37<23:22:03, 3.60it/s] {'loss': 4.2352, 'learning_rate': 8.35064319420802e-07, 'epoch': 2.94} + 18%|█▊ | 68240/371472 [5:25:37<23:22:03, 3.60it/s] 18%|█▊ | 68241/371472 [5:25:38<23:31:51, 3.58it/s] 18%|█▊ | 68242/371472 [5:25:38<22:42:50, 3.71it/s] 18%|█▊ | 68243/371472 [5:25:38<25:00:24, 3.37it/s] 18%|█▊ | 68244/371472 [5:25:38<23:57:04, 3.52it/s] 18%|█▊ | 68245/371472 [5:25:39<23:25:00, 3.60it/s] 18%|█▊ | 68246/371472 [5:25:39<23:03:55, 3.65it/s] 18%|█▊ | 68247/371472 [5:25:39<22:44:55, 3.70it/s] 18%|█▊ | 68248/371472 [5:25:40<23:08:20, 3.64it/s] 18%|█▊ | 68249/371472 [5:25:40<22:28:45, 3.75it/s] 18%|█▊ | 68250/371472 [5:25:40<23:07:34, 3.64it/s] 18%|█▊ | 68251/371472 [5:25:40<23:30:00, 3.58it/s] 18%|█▊ | 68252/371472 [5:25:41<24:56:45, 3.38it/s] 18%|█▊ | 68253/371472 [5:25:41<23:53:43, 3.52it/s] 18%|█▊ | 68254/371472 [5:25:41<23:01:44, 3.66it/s] 18%|█▊ | 68255/371472 [5:25:41<22:55:29, 3.67it/s] 18%|█▊ | 68256/371472 [5:25:42<24:38:46, 3.42it/s] 18%|█▊ | 68257/371472 [5:25:42<25:17:45, 3.33it/s] 18%|█▊ | 68258/371472 [5:25:42<23:43:02, 3.55it/s] 18%|█▊ | 68259/371472 [5:25:43<23:55:00, 3.52it/s] 18%|█▊ | 68260/371472 [5:25:43<23:44:40, 3.55it/s] {'loss': 4.1817, 'learning_rate': 8.35015837445323e-07, 'epoch': 2.94} + 18%|█▊ | 68260/371472 [5:25:43<23:44:40, 3.55it/s] 18%|█▊ | 68261/371472 [5:25:43<22:47:23, 3.70it/s] 18%|█▊ | 68262/371472 [5:25:43<23:21:50, 3.60it/s] 18%|█▊ | 68263/371472 [5:25:44<23:10:40, 3.63it/s] 18%|█▊ | 68264/371472 [5:25:44<23:18:51, 3.61it/s] 18%|█▊ | 68265/371472 [5:25:44<25:57:52, 3.24it/s] 18%|█▊ | 68266/371472 [5:25:45<25:29:24, 3.30it/s] 18%|█▊ | 68267/371472 [5:25:45<24:55:44, 3.38it/s] 18%|█▊ | 68268/371472 [5:25:45<23:55:24, 3.52it/s] 18%|█▊ | 68269/371472 [5:25:45<23:35:29, 3.57it/s] 18%|█▊ | 68270/371472 [5:25:46<23:58:12, 3.51it/s] 18%|█▊ | 68271/371472 [5:25:46<23:21:22, 3.61it/s] 18%|█▊ | 68272/371472 [5:25:46<22:50:55, 3.69it/s] 18%|█▊ | 68273/371472 [5:25:47<23:38:52, 3.56it/s] 18%|█▊ | 68274/371472 [5:25:47<24:54:37, 3.38it/s] 18%|█▊ | 68275/371472 [5:25:47<24:27:18, 3.44it/s] 18%|█▊ | 68276/371472 [5:25:48<25:40:50, 3.28it/s] 18%|█▊ | 68277/371472 [5:25:48<24:03:08, 3.50it/s] 18%|█▊ | 68278/371472 [5:25:48<22:48:23, 3.69it/s] 18%|█▊ | 68279/371472 [5:25:48<24:03:05, 3.50it/s] 18%|█▊ | 68280/371472 [5:25:49<24:25:49, 3.45it/s] {'loss': 4.0247, 'learning_rate': 8.349673554698442e-07, 'epoch': 2.94} + 18%|█▊ | 68280/371472 [5:25:49<24:25:49, 3.45it/s] 18%|█▊ | 68281/371472 [5:25:49<23:43:08, 3.55it/s] 18%|█▊ | 68282/371472 [5:25:49<23:04:24, 3.65it/s] 18%|█▊ | 68283/371472 [5:25:49<23:11:21, 3.63it/s] 18%|█▊ | 68284/371472 [5:25:50<25:19:42, 3.33it/s] 18%|█▊ | 68285/371472 [5:25:50<23:46:16, 3.54it/s] 18%|█▊ | 68286/371472 [5:25:50<24:33:46, 3.43it/s] 18%|█▊ | 68287/371472 [5:25:51<24:00:26, 3.51it/s] 18%|█▊ | 68288/371472 [5:25:51<23:49:19, 3.54it/s] 18%|█▊ | 68289/371472 [5:25:51<23:22:57, 3.60it/s] 18%|█▊ | 68290/371472 [5:25:51<24:21:31, 3.46it/s] 18%|█▊ | 68291/371472 [5:25:52<24:34:36, 3.43it/s] 18%|█▊ | 68292/371472 [5:25:52<23:35:08, 3.57it/s] 18%|█▊ | 68293/371472 [5:25:52<23:44:52, 3.55it/s] 18%|█▊ | 68294/371472 [5:25:53<22:38:41, 3.72it/s] 18%|█▊ | 68295/371472 [5:25:53<23:35:33, 3.57it/s] 18%|█▊ | 68296/371472 [5:25:53<23:37:34, 3.56it/s] 18%|█▊ | 68297/371472 [5:25:53<23:52:11, 3.53it/s] 18%|█▊ | 68298/371472 [5:25:54<26:31:41, 3.17it/s] 18%|█▊ | 68299/371472 [5:25:54<26:38:22, 3.16it/s] 18%|█▊ | 68300/371472 [5:25:54<25:17:43, 3.33it/s] {'loss': 4.0533, 'learning_rate': 8.349188734943653e-07, 'epoch': 2.94} + 18%|█▊ | 68300/371472 [5:25:54<25:17:43, 3.33it/s] 18%|█▊ | 68301/371472 [5:25:55<25:49:29, 3.26it/s] 18%|█▊ | 68302/371472 [5:25:55<24:24:24, 3.45it/s] 18%|█▊ | 68303/371472 [5:25:55<23:28:24, 3.59it/s] 18%|█▊ | 68304/371472 [5:25:56<24:22:58, 3.45it/s] 18%|█▊ | 68305/371472 [5:25:56<25:00:56, 3.37it/s] 18%|█▊ | 68306/371472 [5:25:56<23:46:01, 3.54it/s] 18%|█▊ | 68307/371472 [5:25:56<23:33:27, 3.57it/s] 18%|█▊ | 68308/371472 [5:25:57<23:29:26, 3.58it/s] 18%|█▊ | 68309/371472 [5:25:57<22:40:36, 3.71it/s] 18%|█▊ | 68310/371472 [5:25:57<22:47:37, 3.69it/s] 18%|█▊ | 68311/371472 [5:25:57<23:02:35, 3.65it/s] 18%|█▊ | 68312/371472 [5:25:58<23:29:46, 3.58it/s] 18%|█▊ | 68313/371472 [5:25:58<23:45:29, 3.54it/s] 18%|█▊ | 68314/371472 [5:25:58<24:57:04, 3.37it/s] 18%|█▊ | 68315/371472 [5:25:59<24:55:10, 3.38it/s] 18%|█▊ | 68316/371472 [5:25:59<24:03:15, 3.50it/s] 18%|█▊ | 68317/371472 [5:25:59<23:27:16, 3.59it/s] 18%|█▊ | 68318/371472 [5:25:59<22:56:54, 3.67it/s] 18%|█▊ | 68319/371472 [5:26:00<22:24:20, 3.76it/s] 18%|█▊ | 68320/371472 [5:26:00<22:35:34, 3.73it/s] {'loss': 4.1722, 'learning_rate': 8.348703915188865e-07, 'epoch': 2.94} + 18%|█▊ | 68320/371472 [5:26:00<22:35:34, 3.73it/s] 18%|█▊ | 68321/371472 [5:26:00<24:55:26, 3.38it/s] 18%|█▊ | 68322/371472 [5:26:01<24:35:56, 3.42it/s] 18%|█▊ | 68323/371472 [5:26:01<25:02:33, 3.36it/s] 18%|█▊ | 68324/371472 [5:26:01<26:03:41, 3.23it/s] 18%|█▊ | 68325/371472 [5:26:02<26:07:03, 3.22it/s] 18%|█▊ | 68326/371472 [5:26:02<24:38:28, 3.42it/s] 18%|█▊ | 68327/371472 [5:26:02<23:24:36, 3.60it/s] 18%|█▊ | 68328/371472 [5:26:02<23:09:20, 3.64it/s] 18%|█▊ | 68329/371472 [5:26:03<25:14:58, 3.33it/s] 18%|█▊ | 68330/371472 [5:26:03<25:34:16, 3.29it/s] 18%|█▊ | 68331/371472 [5:26:03<25:03:13, 3.36it/s] 18%|█▊ | 68332/371472 [5:26:04<24:46:54, 3.40it/s] 18%|█▊ | 68333/371472 [5:26:04<23:48:34, 3.54it/s] 18%|█▊ | 68334/371472 [5:26:04<23:01:40, 3.66it/s] 18%|█▊ | 68335/371472 [5:26:04<23:22:14, 3.60it/s] 18%|█▊ | 68336/371472 [5:26:05<23:14:42, 3.62it/s] 18%|█▊ | 68337/371472 [5:26:05<22:54:48, 3.67it/s] 18%|█▊ | 68338/371472 [5:26:05<23:37:27, 3.56it/s] 18%|█▊ | 68339/371472 [5:26:06<25:44:31, 3.27it/s] 18%|█▊ | 68340/371472 [5:26:06<24:11:20, 3.48it/s] {'loss': 4.1471, 'learning_rate': 8.348219095434073e-07, 'epoch': 2.94} + 18%|█▊ | 68340/371472 [5:26:06<24:11:20, 3.48it/s] 18%|█▊ | 68341/371472 [5:26:06<24:56:32, 3.38it/s] 18%|█▊ | 68342/371472 [5:26:06<25:56:10, 3.25it/s] 18%|█▊ | 68343/371472 [5:26:07<25:35:11, 3.29it/s] 18%|█▊ | 68344/371472 [5:26:07<25:05:58, 3.35it/s] 18%|█▊ | 68345/371472 [5:26:07<24:05:31, 3.49it/s] 18%|█▊ | 68346/371472 [5:26:08<23:36:06, 3.57it/s] 18%|█▊ | 68347/371472 [5:26:08<23:05:26, 3.65it/s] 18%|█▊ | 68348/371472 [5:26:08<24:07:46, 3.49it/s] 18%|█▊ | 68349/371472 [5:26:08<24:54:06, 3.38it/s] 18%|█▊ | 68350/371472 [5:26:09<24:15:04, 3.47it/s] 18%|█▊ | 68351/371472 [5:26:09<26:21:50, 3.19it/s] 18%|█▊ | 68352/371472 [5:26:09<26:23:30, 3.19it/s] 18%|█▊ | 68353/371472 [5:26:10<24:51:35, 3.39it/s] 18%|█▊ | 68354/371472 [5:26:10<24:23:35, 3.45it/s] 18%|█▊ | 68355/371472 [5:26:10<26:33:32, 3.17it/s] 18%|█▊ | 68356/371472 [5:26:11<25:59:28, 3.24it/s] 18%|█▊ | 68357/371472 [5:26:11<25:43:13, 3.27it/s] 18%|█▊ | 68358/371472 [5:26:11<25:42:16, 3.28it/s] 18%|█▊ | 68359/371472 [5:26:11<24:24:34, 3.45it/s] 18%|█▊ | 68360/371472 [5:26:12<24:16:43, 3.47it/s] {'loss': 4.1911, 'learning_rate': 8.347734275679286e-07, 'epoch': 2.94} + 18%|█▊ | 68360/371472 [5:26:12<24:16:43, 3.47it/s] 18%|█▊ | 68361/371472 [5:26:12<24:44:29, 3.40it/s] 18%|█▊ | 68362/371472 [5:26:12<23:28:13, 3.59it/s] 18%|█▊ | 68363/371472 [5:26:13<22:36:50, 3.72it/s] 18%|█▊ | 68364/371472 [5:26:13<22:56:29, 3.67it/s] 18%|█▊ | 68365/371472 [5:26:13<22:35:41, 3.73it/s] 18%|█▊ | 68366/371472 [5:26:13<23:36:42, 3.57it/s] 18%|█▊ | 68367/371472 [5:26:14<23:32:42, 3.58it/s] 18%|█▊ | 68368/371472 [5:26:14<22:37:15, 3.72it/s] 18%|█▊ | 68369/371472 [5:26:14<22:53:03, 3.68it/s] 18%|█▊ | 68370/371472 [5:26:15<23:18:06, 3.61it/s] 18%|█▊ | 68371/371472 [5:26:15<24:27:20, 3.44it/s] 18%|█▊ | 68372/371472 [5:26:15<23:57:00, 3.52it/s] 18%|█▊ | 68373/371472 [5:26:15<23:01:17, 3.66it/s] 18%|█▊ | 68374/371472 [5:26:16<24:42:40, 3.41it/s] 18%|█▊ | 68375/371472 [5:26:16<23:59:02, 3.51it/s] 18%|█▊ | 68376/371472 [5:26:16<23:35:58, 3.57it/s] 18%|█▊ | 68377/371472 [5:26:17<24:20:19, 3.46it/s] 18%|█▊ | 68378/371472 [5:26:17<23:54:19, 3.52it/s] 18%|█▊ | 68379/371472 [5:26:17<23:05:51, 3.65it/s] 18%|█▊ | 68380/371472 [5:26:17<23:22:57, 3.60it/s] {'loss': 4.0995, 'learning_rate': 8.347249455924497e-07, 'epoch': 2.95} + 18%|█▊ | 68380/371472 [5:26:17<23:22:57, 3.60it/s] 18%|█▊ | 68381/371472 [5:26:18<23:18:47, 3.61it/s] 18%|█▊ | 68382/371472 [5:26:18<23:07:47, 3.64it/s] 18%|█▊ | 68383/371472 [5:26:18<23:46:57, 3.54it/s] 18%|█▊ | 68384/371472 [5:26:18<23:27:58, 3.59it/s] 18%|█▊ | 68385/371472 [5:26:19<22:33:13, 3.73it/s] 18%|█▊ | 68386/371472 [5:26:19<24:03:04, 3.50it/s] 18%|█▊ | 68387/371472 [5:26:19<23:51:28, 3.53it/s] 18%|█▊ | 68388/371472 [5:26:20<24:35:06, 3.42it/s] 18%|█▊ | 68389/371472 [5:26:20<23:36:33, 3.57it/s] 18%|█▊ | 68390/371472 [5:26:20<23:05:31, 3.65it/s] 18%|█▊ | 68391/371472 [5:26:20<23:01:53, 3.66it/s] 18%|█▊ | 68392/371472 [5:26:21<22:11:31, 3.79it/s] 18%|█▊ | 68393/371472 [5:26:21<22:37:44, 3.72it/s] 18%|█▊ | 68394/371472 [5:26:21<22:27:21, 3.75it/s] 18%|█▊ | 68395/371472 [5:26:21<22:57:28, 3.67it/s] 18%|█▊ | 68396/371472 [5:26:22<24:48:01, 3.39it/s] 18%|█▊ | 68397/371472 [5:26:22<24:04:34, 3.50it/s] 18%|█▊ | 68398/371472 [5:26:22<23:38:50, 3.56it/s] 18%|█▊ | 68399/371472 [5:26:23<22:41:47, 3.71it/s] 18%|█▊ | 68400/371472 [5:26:23<22:58:14, 3.66it/s] {'loss': 4.1429, 'learning_rate': 8.346764636169708e-07, 'epoch': 2.95} + 18%|█▊ | 68400/371472 [5:26:23<22:58:14, 3.66it/s] 18%|█▊ | 68401/371472 [5:26:23<23:20:29, 3.61it/s] 18%|█▊ | 68402/371472 [5:26:24<26:08:47, 3.22it/s] 18%|█▊ | 68403/371472 [5:26:24<25:12:48, 3.34it/s] 18%|█▊ | 68404/371472 [5:26:24<23:55:16, 3.52it/s] 18%|█▊ | 68405/371472 [5:26:24<23:18:26, 3.61it/s] 18%|█▊ | 68406/371472 [5:26:25<22:39:29, 3.72it/s] 18%|█▊ | 68407/371472 [5:26:25<22:10:30, 3.80it/s] 18%|█▊ | 68408/371472 [5:26:25<23:20:14, 3.61it/s] 18%|█▊ | 68409/371472 [5:26:25<22:37:58, 3.72it/s] 18%|█▊ | 68410/371472 [5:26:26<21:51:53, 3.85it/s] 18%|█▊ | 68411/371472 [5:26:26<23:41:47, 3.55it/s] 18%|█▊ | 68412/371472 [5:26:26<23:17:11, 3.62it/s] 18%|█▊ | 68413/371472 [5:26:27<24:21:33, 3.46it/s] 18%|█▊ | 68414/371472 [5:26:27<24:02:40, 3.50it/s] 18%|█▊ | 68415/371472 [5:26:27<24:19:00, 3.46it/s] 18%|█▊ | 68416/371472 [5:26:27<25:29:40, 3.30it/s] 18%|█▊ | 68417/371472 [5:26:28<26:29:50, 3.18it/s] 18%|█▊ | 68418/371472 [5:26:28<25:31:42, 3.30it/s] 18%|█▊ | 68419/371472 [5:26:28<24:35:13, 3.42it/s] 18%|█▊ | 68420/371472 [5:26:29<23:11:12, 3.63it/s] {'loss': 4.2546, 'learning_rate': 8.346279816414919e-07, 'epoch': 2.95} + 18%|█▊ | 68420/371472 [5:26:29<23:11:12, 3.63it/s] 18%|█▊ | 68421/371472 [5:26:29<22:39:04, 3.72it/s] 18%|█▊ | 68422/371472 [5:26:29<26:07:41, 3.22it/s] 18%|█▊ | 68423/371472 [5:26:29<24:46:47, 3.40it/s] 18%|█▊ | 68424/371472 [5:26:30<25:56:36, 3.24it/s] 18%|█▊ | 68425/371472 [5:26:30<24:46:20, 3.40it/s] 18%|█▊ | 68426/371472 [5:26:30<23:35:56, 3.57it/s] 18%|█▊ | 68427/371472 [5:26:31<23:48:58, 3.53it/s] 18%|█▊ | 68428/371472 [5:26:31<24:45:32, 3.40it/s] 18%|█▊ | 68429/371472 [5:26:31<23:41:55, 3.55it/s] 18%|█▊ | 68430/371472 [5:26:31<23:14:28, 3.62it/s] 18%|█▊ | 68431/371472 [5:26:32<23:53:18, 3.52it/s] 18%|█▊ | 68432/371472 [5:26:32<24:08:01, 3.49it/s] 18%|█▊ | 68433/371472 [5:26:32<23:24:45, 3.60it/s] 18%|█▊ | 68434/371472 [5:26:33<23:19:02, 3.61it/s] 18%|█▊ | 68435/371472 [5:26:33<23:05:42, 3.64it/s] 18%|█▊ | 68436/371472 [5:26:33<22:43:59, 3.70it/s] 18%|█▊ | 68437/371472 [5:26:33<22:13:45, 3.79it/s] 18%|█▊ | 68438/371472 [5:26:34<24:28:21, 3.44it/s] 18%|█▊ | 68439/371472 [5:26:34<23:22:59, 3.60it/s] 18%|█▊ | 68440/371472 [5:26:34<23:59:14, 3.51it/s] {'loss': 4.003, 'learning_rate': 8.345794996660131e-07, 'epoch': 2.95} + 18%|█▊ | 68440/371472 [5:26:34<23:59:14, 3.51it/s] 18%|█▊ | 68441/371472 [5:26:35<23:53:41, 3.52it/s] 18%|█▊ | 68442/371472 [5:26:35<23:23:19, 3.60it/s] 18%|█▊ | 68443/371472 [5:26:35<24:25:06, 3.45it/s] 18%|█▊ | 68444/371472 [5:26:35<22:54:53, 3.67it/s] 18%|█▊ | 68445/371472 [5:26:36<22:36:57, 3.72it/s] 18%|█▊ | 68446/371472 [5:26:36<21:55:21, 3.84it/s] 18%|█▊ | 68447/371472 [5:26:36<24:31:30, 3.43it/s] 18%|█▊ | 68448/371472 [5:26:37<23:55:54, 3.52it/s] 18%|█▊ | 68449/371472 [5:26:37<24:22:30, 3.45it/s] 18%|█▊ | 68450/371472 [5:26:37<24:36:39, 3.42it/s] 18%|█▊ | 68451/371472 [5:26:37<25:50:15, 3.26it/s] 18%|█▊ | 68452/371472 [5:26:38<24:23:10, 3.45it/s] 18%|█▊ | 68453/371472 [5:26:38<24:14:28, 3.47it/s] 18%|█▊ | 68454/371472 [5:26:38<24:51:01, 3.39it/s] 18%|█▊ | 68455/371472 [5:26:39<23:59:03, 3.51it/s] 18%|█▊ | 68456/371472 [5:26:39<23:51:16, 3.53it/s] 18%|█▊ | 68457/371472 [5:26:39<22:52:51, 3.68it/s] 18%|█▊ | 68458/371472 [5:26:39<21:56:25, 3.84it/s] 18%|█▊ | 68459/371472 [5:26:40<21:17:29, 3.95it/s] 18%|█▊ | 68460/371472 [5:26:40<21:11:02, 3.97it/s] {'loss': 4.0601, 'learning_rate': 8.345310176905341e-07, 'epoch': 2.95} + 18%|█▊ | 68460/371472 [5:26:40<21:11:02, 3.97it/s] 18%|█▊ | 68461/371472 [5:26:40<21:31:03, 3.91it/s] 18%|█▊ | 68462/371472 [5:26:40<22:24:07, 3.76it/s] 18%|█▊ | 68463/371472 [5:26:41<22:04:15, 3.81it/s] 18%|█▊ | 68464/371472 [5:26:41<22:25:27, 3.75it/s] 18%|█▊ | 68465/371472 [5:26:41<23:15:59, 3.62it/s] 18%|█▊ | 68466/371472 [5:26:41<22:11:59, 3.79it/s] 18%|█▊ | 68467/371472 [5:26:42<22:57:55, 3.66it/s] 18%|█▊ | 68468/371472 [5:26:42<22:22:24, 3.76it/s] 18%|█▊ | 68469/371472 [5:26:42<22:13:39, 3.79it/s] 18%|█▊ | 68470/371472 [5:26:43<23:46:20, 3.54it/s] 18%|█▊ | 68471/371472 [5:26:43<23:06:34, 3.64it/s] 18%|█▊ | 68472/371472 [5:26:43<24:22:25, 3.45it/s] 18%|█▊ | 68473/371472 [5:26:43<23:30:24, 3.58it/s] 18%|█▊ | 68474/371472 [5:26:44<23:46:48, 3.54it/s] 18%|█▊ | 68475/371472 [5:26:44<24:31:04, 3.43it/s] 18%|█▊ | 68476/371472 [5:26:44<24:44:42, 3.40it/s] 18%|█▊ | 68477/371472 [5:26:45<25:30:50, 3.30it/s] 18%|█▊ | 68478/371472 [5:26:45<24:42:34, 3.41it/s] 18%|█▊ | 68479/371472 [5:26:45<25:07:43, 3.35it/s] 18%|█▊ | 68480/371472 [5:26:45<24:01:29, 3.50it/s] {'loss': 4.2808, 'learning_rate': 8.344825357150552e-07, 'epoch': 2.95} + 18%|█▊ | 68480/371472 [5:26:45<24:01:29, 3.50it/s] 18%|█▊ | 68481/371472 [5:26:46<23:34:36, 3.57it/s] 18%|█▊ | 68482/371472 [5:26:46<23:33:47, 3.57it/s] 18%|█▊ | 68483/371472 [5:26:46<22:25:08, 3.75it/s] 18%|█▊ | 68484/371472 [5:26:47<25:31:47, 3.30it/s] 18%|█▊ | 68485/371472 [5:26:47<25:21:32, 3.32it/s] 18%|█▊ | 68486/371472 [5:26:47<24:56:57, 3.37it/s] 18%|█▊ | 68487/371472 [5:26:48<25:12:48, 3.34it/s] 18%|█▊ | 68488/371472 [5:26:48<24:37:41, 3.42it/s] 18%|█▊ | 68489/371472 [5:26:48<24:02:04, 3.50it/s] 18%|█▊ | 68490/371472 [5:26:48<24:14:50, 3.47it/s] 18%|█▊ | 68491/371472 [5:26:49<24:00:02, 3.51it/s] 18%|█▊ | 68492/371472 [5:26:49<24:42:55, 3.41it/s] 18%|█▊ | 68493/371472 [5:26:49<24:04:41, 3.50it/s] 18%|█▊ | 68494/371472 [5:26:49<23:00:11, 3.66it/s] 18%|█▊ | 68495/371472 [5:26:50<22:39:13, 3.72it/s] 18%|█▊ | 68496/371472 [5:26:50<23:19:00, 3.61it/s] 18%|█▊ | 68497/371472 [5:26:50<22:30:42, 3.74it/s] 18%|█▊ | 68498/371472 [5:26:50<22:01:39, 3.82it/s] 18%|█▊ | 68499/371472 [5:26:51<22:01:52, 3.82it/s] 18%|█▊ | 68500/371472 [5:26:51<21:20:59, 3.94it/s] {'loss': 4.3243, 'learning_rate': 8.344340537395763e-07, 'epoch': 2.95} + 18%|█▊ | 68500/371472 [5:26:51<21:20:59, 3.94it/s] 18%|█▊ | 68501/371472 [5:26:51<22:09:20, 3.80it/s] 18%|█▊ | 68502/371472 [5:26:52<22:08:31, 3.80it/s] 18%|█▊ | 68503/371472 [5:26:52<22:08:53, 3.80it/s] 18%|█▊ | 68504/371472 [5:26:52<23:15:44, 3.62it/s] 18%|█▊ | 68505/371472 [5:26:52<23:05:07, 3.65it/s] 18%|█▊ | 68506/371472 [5:26:53<22:19:16, 3.77it/s] 18%|█▊ | 68507/371472 [5:26:53<24:26:10, 3.44it/s] 18%|█▊ | 68508/371472 [5:26:53<23:20:01, 3.61it/s] 18%|█▊ | 68509/371472 [5:26:54<24:54:30, 3.38it/s] 18%|█▊ | 68510/371472 [5:26:54<23:59:50, 3.51it/s] 18%|█▊ | 68511/371472 [5:26:54<25:58:43, 3.24it/s] 18%|█▊ | 68512/371472 [5:26:55<28:09:17, 2.99it/s] 18%|█▊ | 68513/371472 [5:26:55<26:08:01, 3.22it/s] 18%|█▊ | 68514/371472 [5:26:55<25:57:29, 3.24it/s] 18%|█▊ | 68515/371472 [5:26:55<24:34:48, 3.42it/s] 18%|█▊ | 68516/371472 [5:26:56<23:47:37, 3.54it/s] 18%|█▊ | 68517/371472 [5:26:56<22:49:45, 3.69it/s] 18%|█▊ | 68518/371472 [5:26:56<22:37:53, 3.72it/s] 18%|█▊ | 68519/371472 [5:26:56<22:21:42, 3.76it/s] 18%|█▊ | 68520/371472 [5:26:57<24:09:55, 3.48it/s] {'loss': 4.1212, 'learning_rate': 8.343855717640974e-07, 'epoch': 2.95} + 18%|█▊ | 68520/371472 [5:26:57<24:09:55, 3.48it/s] 18%|█▊ | 68521/371472 [5:26:57<23:40:13, 3.56it/s] 18%|█▊ | 68522/371472 [5:26:57<24:12:29, 3.48it/s] 18%|█▊ | 68523/371472 [5:26:58<24:00:57, 3.50it/s] 18%|█▊ | 68524/371472 [5:26:58<23:20:48, 3.60it/s] 18%|█▊ | 68525/371472 [5:26:58<24:01:37, 3.50it/s] 18%|█▊ | 68526/371472 [5:26:58<23:52:06, 3.53it/s] 18%|█▊ | 68527/371472 [5:26:59<24:58:38, 3.37it/s] 18%|█▊ | 68528/371472 [5:26:59<24:25:31, 3.45it/s] 18%|█▊ | 68529/371472 [5:26:59<23:15:08, 3.62it/s] 18%|█▊ | 68530/371472 [5:27:00<22:18:19, 3.77it/s] 18%|█▊ | 68531/371472 [5:27:00<22:31:06, 3.74it/s] 18%|█▊ | 68532/371472 [5:27:00<22:12:19, 3.79it/s] 18%|█▊ | 68533/371472 [5:27:00<24:10:57, 3.48it/s] 18%|█▊ | 68534/371472 [5:27:01<24:53:14, 3.38it/s] 18%|█▊ | 68535/371472 [5:27:01<24:31:28, 3.43it/s] 18%|█▊ | 68536/371472 [5:27:01<23:57:14, 3.51it/s] 18%|█▊ | 68537/371472 [5:27:02<23:03:32, 3.65it/s] 18%|█▊ | 68538/371472 [5:27:02<23:15:30, 3.62it/s] 18%|█▊ | 68539/371472 [5:27:02<23:24:19, 3.60it/s] 18%|█▊ | 68540/371472 [5:27:02<26:09:26, 3.22it/s] {'loss': 4.0958, 'learning_rate': 8.343370897886186e-07, 'epoch': 2.95} + 18%|█▊ | 68540/371472 [5:27:02<26:09:26, 3.22it/s] 18%|█▊ | 68541/371472 [5:27:03<24:43:05, 3.40it/s] 18%|█▊ | 68542/371472 [5:27:03<25:07:56, 3.35it/s] 18%|█▊ | 68543/371472 [5:27:03<25:11:05, 3.34it/s] 18%|█▊ | 68544/371472 [5:27:04<23:56:12, 3.52it/s] 18%|█▊ | 68545/371472 [5:27:04<23:20:36, 3.60it/s] 18%|█▊ | 68546/371472 [5:27:04<22:34:40, 3.73it/s] 18%|█▊ | 68547/371472 [5:27:04<22:28:47, 3.74it/s] 18%|█▊ | 68548/371472 [5:27:05<23:21:27, 3.60it/s] 18%|█▊ | 68549/371472 [5:27:05<24:18:36, 3.46it/s] 18%|█▊ | 68550/371472 [5:27:05<22:53:38, 3.68it/s] 18%|█▊ | 68551/371472 [5:27:05<22:39:54, 3.71it/s] 18%|█▊ | 68552/371472 [5:27:06<22:06:44, 3.81it/s] 18%|█▊ | 68553/371472 [5:27:06<21:51:48, 3.85it/s] 18%|█▊ | 68554/371472 [5:27:06<22:50:39, 3.68it/s] 18%|█▊ | 68555/371472 [5:27:07<22:14:35, 3.78it/s] 18%|█▊ | 68556/371472 [5:27:07<22:10:12, 3.80it/s] 18%|█▊ | 68557/371472 [5:27:07<21:28:08, 3.92it/s] 18%|█▊ | 68558/371472 [5:27:07<22:04:52, 3.81it/s] 18%|█▊ | 68559/371472 [5:27:08<22:10:13, 3.80it/s] 18%|█▊ | 68560/371472 [5:27:08<22:35:24, 3.72it/s] {'loss': 4.3868, 'learning_rate': 8.342886078131397e-07, 'epoch': 2.95} + 18%|█▊ | 68560/371472 [5:27:08<22:35:24, 3.72it/s] 18%|█▊ | 68561/371472 [5:27:08<24:17:51, 3.46it/s] 18%|█▊ | 68562/371472 [5:27:08<24:46:52, 3.40it/s] 18%|█▊ | 68563/371472 [5:27:09<23:45:51, 3.54it/s] 18%|█▊ | 68564/371472 [5:27:09<23:44:42, 3.54it/s] 18%|█▊ | 68565/371472 [5:27:09<25:29:42, 3.30it/s] 18%|█▊ | 68566/371472 [5:27:10<25:19:41, 3.32it/s] 18%|█▊ | 68567/371472 [5:27:10<26:37:46, 3.16it/s] 18%|█▊ | 68568/371472 [5:27:10<27:13:16, 3.09it/s] 18%|█▊ | 68569/371472 [5:27:11<26:08:24, 3.22it/s] 18%|█▊ | 68570/371472 [5:27:11<25:17:50, 3.33it/s] 18%|█▊ | 68571/371472 [5:27:11<25:17:55, 3.33it/s] 18%|█▊ | 68572/371472 [5:27:11<24:43:11, 3.40it/s] 18%|█▊ | 68573/371472 [5:27:12<24:24:49, 3.45it/s] 18%|█▊ | 68574/371472 [5:27:12<25:04:20, 3.36it/s] 18%|█▊ | 68575/371472 [5:27:12<24:42:23, 3.41it/s] 18%|█▊ | 68576/371472 [5:27:13<26:36:35, 3.16it/s] 18%|█▊ | 68577/371472 [5:27:13<25:32:35, 3.29it/s] 18%|█▊ | 68578/371472 [5:27:13<25:14:38, 3.33it/s] 18%|█▊ | 68579/371472 [5:27:14<24:39:04, 3.41it/s] 18%|█▊ | 68580/371472 [5:27:14<23:55:00, 3.52it/s] {'loss': 4.2354, 'learning_rate': 8.342401258376608e-07, 'epoch': 2.95} + 18%|█▊ | 68580/371472 [5:27:14<23:55:00, 3.52it/s] 18%|█▊ | 68581/371472 [5:27:14<22:47:23, 3.69it/s] 18%|█▊ | 68582/371472 [5:27:14<24:24:40, 3.45it/s] 18%|█▊ | 68583/371472 [5:27:15<23:40:41, 3.55it/s] 18%|█▊ | 68584/371472 [5:27:15<23:01:21, 3.65it/s] 18%|█▊ | 68585/371472 [5:27:15<22:31:45, 3.73it/s] 18%|█▊ | 68586/371472 [5:27:15<22:06:30, 3.81it/s] 18%|█▊ | 68587/371472 [5:27:16<24:24:58, 3.45it/s] 18%|█▊ | 68588/371472 [5:27:16<24:21:24, 3.45it/s] 18%|█▊ | 68589/371472 [5:27:16<23:01:35, 3.65it/s] 18%|█▊ | 68590/371472 [5:27:17<22:18:01, 3.77it/s] 18%|█▊ | 68591/371472 [5:27:17<22:31:08, 3.74it/s] 18%|█▊ | 68592/371472 [5:27:17<22:20:52, 3.76it/s] 18%|█▊ | 68593/371472 [5:27:17<23:45:00, 3.54it/s] 18%|█▊ | 68594/371472 [5:27:18<23:38:37, 3.56it/s] 18%|█▊ | 68595/371472 [5:27:18<23:35:25, 3.57it/s] 18%|█▊ | 68596/371472 [5:27:18<25:09:33, 3.34it/s] 18%|█▊ | 68597/371472 [5:27:19<24:08:37, 3.48it/s] 18%|█▊ | 68598/371472 [5:27:19<24:16:24, 3.47it/s] 18%|█▊ | 68599/371472 [5:27:19<23:31:47, 3.58it/s] 18%|█▊ | 68600/371472 [5:27:19<23:07:34, 3.64it/s] {'loss': 4.3807, 'learning_rate': 8.341916438621818e-07, 'epoch': 2.95} + 18%|█▊ | 68600/371472 [5:27:19<23:07:34, 3.64it/s] 18%|█▊ | 68601/371472 [5:27:20<22:25:03, 3.75it/s] 18%|█▊ | 68602/371472 [5:27:20<22:31:49, 3.73it/s] 18%|█▊ | 68603/371472 [5:27:20<24:25:53, 3.44it/s] 18%|█▊ | 68604/371472 [5:27:21<23:56:19, 3.51it/s] 18%|█▊ | 68605/371472 [5:27:21<23:05:49, 3.64it/s] 18%|█▊ | 68606/371472 [5:27:21<24:16:22, 3.47it/s] 18%|█▊ | 68607/371472 [5:27:21<23:28:07, 3.58it/s] 18%|█▊ | 68608/371472 [5:27:22<23:41:09, 3.55it/s] 18%|█▊ | 68609/371472 [5:27:22<24:22:40, 3.45it/s] 18%|█▊ | 68610/371472 [5:27:22<23:14:57, 3.62it/s] 18%|█▊ | 68611/371472 [5:27:22<22:55:27, 3.67it/s] 18%|█▊ | 68612/371472 [5:27:23<22:51:28, 3.68it/s] 18%|█▊ | 68613/371472 [5:27:23<23:16:32, 3.61it/s] 18%|█▊ | 68614/371472 [5:27:23<23:34:35, 3.57it/s] 18%|█▊ | 68615/371472 [5:27:24<23:20:35, 3.60it/s] 18%|█▊ | 68616/371472 [5:27:24<23:12:35, 3.62it/s] 18%|█▊ | 68617/371472 [5:27:24<24:07:15, 3.49it/s] 18%|█▊ | 68618/371472 [5:27:25<25:22:45, 3.31it/s] 18%|█▊ | 68619/371472 [5:27:25<24:29:44, 3.43it/s] 18%|█▊ | 68620/371472 [5:27:25<23:51:30, 3.53it/s] {'loss': 4.1667, 'learning_rate': 8.34143161886703e-07, 'epoch': 2.96} + 18%|█▊ | 68620/371472 [5:27:25<23:51:30, 3.53it/s] 18%|█▊ | 68621/371472 [5:27:25<22:50:18, 3.68it/s] 18%|█▊ | 68622/371472 [5:27:26<22:49:03, 3.69it/s] 18%|█▊ | 68623/371472 [5:27:26<22:11:12, 3.79it/s] 18%|█▊ | 68624/371472 [5:27:26<23:20:30, 3.60it/s] 18%|█▊ | 68625/371472 [5:27:26<22:25:52, 3.75it/s] 18%|█▊ | 68626/371472 [5:27:27<22:34:23, 3.73it/s] 18%|█▊ | 68627/371472 [5:27:27<23:54:54, 3.52it/s] 18%|█▊ | 68628/371472 [5:27:27<24:52:32, 3.38it/s] 18%|█▊ | 68629/371472 [5:27:28<23:45:31, 3.54it/s] 18%|█▊ | 68630/371472 [5:27:28<28:11:40, 2.98it/s] 18%|█▊ | 68631/371472 [5:27:28<27:00:35, 3.11it/s] 18%|█▊ | 68632/371472 [5:27:29<25:57:43, 3.24it/s] 18%|█▊ | 68633/371472 [5:27:29<25:06:31, 3.35it/s] 18%|█▊ | 68634/371472 [5:27:29<28:08:45, 2.99it/s] 18%|█▊ | 68635/371472 [5:27:30<26:38:19, 3.16it/s] 18%|█▊ | 68636/371472 [5:27:30<27:13:32, 3.09it/s] 18%|█▊ | 68637/371472 [5:27:30<28:25:39, 2.96it/s] 18%|█▊ | 68638/371472 [5:27:30<25:42:09, 3.27it/s] 18%|█▊ | 68639/371472 [5:27:31<24:43:02, 3.40it/s] 18%|█▊ | 68640/371472 [5:27:31<24:04:48, 3.49it/s] {'loss': 4.0863, 'learning_rate': 8.340946799112241e-07, 'epoch': 2.96} + 18%|█▊ | 68640/371472 [5:27:31<24:04:48, 3.49it/s] 18%|█▊ | 68641/371472 [5:27:31<23:36:50, 3.56it/s] 18%|█▊ | 68642/371472 [5:27:32<22:52:06, 3.68it/s] 18%|█▊ | 68643/371472 [5:27:32<24:47:48, 3.39it/s] 18%|█▊ | 68644/371472 [5:27:32<25:44:19, 3.27it/s] 18%|█▊ | 68645/371472 [5:27:32<25:19:43, 3.32it/s] 18%|█▊ | 68646/371472 [5:27:33<26:54:28, 3.13it/s] 18%|█▊ | 68647/371472 [5:27:33<26:32:27, 3.17it/s] 18%|█▊ | 68648/371472 [5:27:33<24:58:29, 3.37it/s] 18%|█▊ | 68649/371472 [5:27:34<23:42:20, 3.55it/s] 18%|█▊ | 68650/371472 [5:27:34<23:10:57, 3.63it/s] 18%|█▊ | 68651/371472 [5:27:34<23:16:49, 3.61it/s] 18%|█▊ | 68652/371472 [5:27:34<22:54:19, 3.67it/s] 18%|█▊ | 68653/371472 [5:27:35<23:04:49, 3.64it/s] 18%|█▊ | 68654/371472 [5:27:35<22:25:51, 3.75it/s] 18%|█▊ | 68655/371472 [5:27:35<22:30:33, 3.74it/s] 18%|█▊ | 68656/371472 [5:27:36<22:47:38, 3.69it/s] 18%|█▊ | 68657/371472 [5:27:36<22:22:35, 3.76it/s] 18%|█▊ | 68658/371472 [5:27:36<23:59:16, 3.51it/s] 18%|█▊ | 68659/371472 [5:27:36<24:12:24, 3.47it/s] 18%|█▊ | 68660/371472 [5:27:37<25:45:14, 3.27it/s] {'loss': 4.1569, 'learning_rate': 8.340461979357452e-07, 'epoch': 2.96} + 18%|█▊ | 68660/371472 [5:27:37<25:45:14, 3.27it/s] 18%|█▊ | 68661/371472 [5:27:37<25:11:57, 3.34it/s] 18%|█▊ | 68662/371472 [5:27:37<24:14:17, 3.47it/s] 18%|█▊ | 68663/371472 [5:27:38<23:28:37, 3.58it/s] 18%|█▊ | 68664/371472 [5:27:38<25:37:10, 3.28it/s] 18%|█▊ | 68665/371472 [5:27:38<25:11:19, 3.34it/s] 18%|█▊ | 68666/371472 [5:27:39<25:37:52, 3.28it/s] 18%|█▊ | 68667/371472 [5:27:39<26:05:43, 3.22it/s] 18%|█▊ | 68668/371472 [5:27:39<25:19:50, 3.32it/s] 18%|█▊ | 68669/371472 [5:27:39<23:32:32, 3.57it/s] 18%|█▊ | 68670/371472 [5:27:40<24:22:41, 3.45it/s] 18%|█▊ | 68671/371472 [5:27:40<26:20:12, 3.19it/s] 18%|█▊ | 68672/371472 [5:27:40<26:17:27, 3.20it/s] 18%|█▊ | 68673/371472 [5:27:41<26:04:32, 3.23it/s] 18%|█▊ | 68674/371472 [5:27:41<26:57:39, 3.12it/s] 18%|█▊ | 68675/371472 [5:27:41<26:03:19, 3.23it/s] 18%|█▊ | 68676/371472 [5:27:42<24:53:47, 3.38it/s] 18%|█▊ | 68677/371472 [5:27:42<25:03:44, 3.36it/s] 18%|█▊ | 68678/371472 [5:27:42<24:01:25, 3.50it/s] 18%|█▊ | 68679/371472 [5:27:42<23:01:57, 3.65it/s] 18%|█▊ | 68680/371472 [5:27:43<23:04:32, 3.64it/s] {'loss': 4.1465, 'learning_rate': 8.339977159602663e-07, 'epoch': 2.96} + 18%|█▊ | 68680/371472 [5:27:43<23:04:32, 3.64it/s] 18%|█▊ | 68681/371472 [5:27:43<23:08:16, 3.64it/s] 18%|█▊ | 68682/371472 [5:27:43<23:21:43, 3.60it/s] 18%|█▊ | 68683/371472 [5:27:43<23:08:28, 3.63it/s] 18%|█▊ | 68684/371472 [5:27:44<23:05:13, 3.64it/s] 18%|█▊ | 68685/371472 [5:27:44<22:44:57, 3.70it/s] 18%|█▊ | 68686/371472 [5:27:44<22:27:34, 3.74it/s] 18%|█▊ | 68687/371472 [5:27:45<23:37:22, 3.56it/s] 18%|█▊ | 68688/371472 [5:27:45<24:32:45, 3.43it/s] 18%|█▊ | 68689/371472 [5:27:45<23:12:11, 3.62it/s] 18%|█▊ | 68690/371472 [5:27:45<22:57:16, 3.66it/s] 18%|█▊ | 68691/371472 [5:27:46<23:00:22, 3.66it/s] 18%|█▊ | 68692/371472 [5:27:46<22:08:01, 3.80it/s] 18%|█▊ | 68693/371472 [5:27:46<22:21:33, 3.76it/s] 18%|█▊ | 68694/371472 [5:27:46<22:05:12, 3.81it/s] 18%|█▊ | 68695/371472 [5:27:47<24:14:58, 3.47it/s] 18%|█▊ | 68696/371472 [5:27:47<26:30:25, 3.17it/s] 18%|█▊ | 68697/371472 [5:27:47<25:18:47, 3.32it/s] 18%|█▊ | 68698/371472 [5:27:48<26:48:33, 3.14it/s] 18%|█▊ | 68699/371472 [5:27:48<25:12:23, 3.34it/s] 18%|█▊ | 68700/371472 [5:27:48<24:21:57, 3.45it/s] {'loss': 4.0999, 'learning_rate': 8.339492339847874e-07, 'epoch': 2.96} + 18%|█▊ | 68700/371472 [5:27:48<24:21:57, 3.45it/s] 18%|█▊ | 68701/371472 [5:27:49<24:22:42, 3.45it/s] 18%|█▊ | 68702/371472 [5:27:49<23:43:28, 3.54it/s] 18%|█▊ | 68703/371472 [5:27:49<24:07:21, 3.49it/s] 18%|█▊ | 68704/371472 [5:27:49<23:37:58, 3.56it/s] 18%|█▊ | 68705/371472 [5:27:50<23:24:50, 3.59it/s] 18%|█▊ | 68706/371472 [5:27:50<22:46:40, 3.69it/s] 18%|█▊ | 68707/371472 [5:27:50<22:27:46, 3.74it/s] 18%|█▊ | 68708/371472 [5:27:51<25:42:33, 3.27it/s] 18%|█▊ | 68709/371472 [5:27:51<24:23:27, 3.45it/s] 18%|█▊ | 68710/371472 [5:27:51<25:04:28, 3.35it/s] 18%|█▊ | 68711/371472 [5:27:51<23:36:44, 3.56it/s] 18%|█▊ | 68712/371472 [5:27:52<23:29:52, 3.58it/s] 18%|█▊ | 68713/371472 [5:27:52<25:11:03, 3.34it/s] 18%|█▊ | 68714/371472 [5:27:52<24:20:45, 3.45it/s] 18%|█▊ | 68715/371472 [5:27:53<23:39:05, 3.56it/s] 18%|█▊ | 68716/371472 [5:27:53<24:50:53, 3.38it/s] 18%|█▊ | 68717/371472 [5:27:53<24:50:31, 3.39it/s] 18%|█▊ | 68718/371472 [5:27:53<23:54:46, 3.52it/s] 18%|█▊ | 68719/371472 [5:27:54<24:27:43, 3.44it/s] 18%|█▊ | 68720/371472 [5:27:54<24:48:15, 3.39it/s] {'loss': 4.0144, 'learning_rate': 8.339007520093085e-07, 'epoch': 2.96} + 18%|█▊ | 68720/371472 [5:27:54<24:48:15, 3.39it/s] 18%|█▊ | 68721/371472 [5:27:54<24:24:59, 3.44it/s] 18%|█▊ | 68722/371472 [5:27:55<27:03:16, 3.11it/s] 19%|█▊ | 68723/371472 [5:27:55<25:51:25, 3.25it/s] 19%|█▊ | 68724/371472 [5:27:55<24:41:18, 3.41it/s] 19%|█▊ | 68725/371472 [5:27:56<24:31:02, 3.43it/s] 19%|█▊ | 68726/371472 [5:27:56<23:48:44, 3.53it/s] 19%|█▊ | 68727/371472 [5:27:56<23:37:45, 3.56it/s] 19%|█▊ | 68728/371472 [5:27:56<23:23:41, 3.59it/s] 19%|█▊ | 68729/371472 [5:27:57<22:57:04, 3.66it/s] 19%|█▊ | 68730/371472 [5:27:57<22:22:57, 3.76it/s] 19%|█▊ | 68731/371472 [5:27:57<23:02:49, 3.65it/s] 19%|█▊ | 68732/371472 [5:27:57<22:59:04, 3.66it/s] 19%|█▊ | 68733/371472 [5:27:58<22:38:40, 3.71it/s] 19%|█▊ | 68734/371472 [5:27:58<22:40:42, 3.71it/s] 19%|█▊ | 68735/371472 [5:27:58<23:01:13, 3.65it/s] 19%|█▊ | 68736/371472 [5:27:59<23:00:14, 3.66it/s] 19%|█▊ | 68737/371472 [5:27:59<22:05:45, 3.81it/s] 19%|█▊ | 68738/371472 [5:27:59<22:22:05, 3.76it/s] 19%|█▊ | 68739/371472 [5:27:59<22:43:39, 3.70it/s] 19%|█▊ | 68740/371472 [5:28:00<22:22:44, 3.76it/s] {'loss': 3.8672, 'learning_rate': 8.338522700338296e-07, 'epoch': 2.96} + 19%|█▊ | 68740/371472 [5:28:00<22:22:44, 3.76it/s] 19%|█▊ | 68741/371472 [5:28:00<21:57:49, 3.83it/s] 19%|█▊ | 68742/371472 [5:28:00<21:49:05, 3.85it/s] 19%|█▊ | 68743/371472 [5:28:00<23:01:27, 3.65it/s] 19%|█▊ | 68744/371472 [5:28:01<24:15:21, 3.47it/s] 19%|█▊ | 68745/371472 [5:28:01<23:14:50, 3.62it/s] 19%|█▊ | 68746/371472 [5:28:01<22:34:53, 3.72it/s] 19%|█▊ | 68747/371472 [5:28:01<23:04:22, 3.64it/s] 19%|█▊ | 68748/371472 [5:28:02<23:07:15, 3.64it/s] 19%|█▊ | 68749/371472 [5:28:02<23:57:16, 3.51it/s] 19%|█▊ | 68750/371472 [5:28:02<24:25:13, 3.44it/s] 19%|█▊ | 68751/371472 [5:28:03<23:35:15, 3.56it/s] 19%|█▊ | 68752/371472 [5:28:03<22:56:53, 3.66it/s] 19%|█▊ | 68753/371472 [5:28:03<22:45:47, 3.69it/s] 19%|█▊ | 68754/371472 [5:28:03<23:46:53, 3.54it/s] 19%|█▊ | 68755/371472 [5:28:04<23:42:33, 3.55it/s] 19%|█▊ | 68756/371472 [5:28:04<24:33:16, 3.42it/s] 19%|█▊ | 68757/371472 [5:28:04<25:49:11, 3.26it/s] 19%|█▊ | 68758/371472 [5:28:05<25:57:54, 3.24it/s] 19%|█▊ | 68759/371472 [5:28:05<27:12:37, 3.09it/s] 19%|█▊ | 68760/371472 [5:28:05<26:11:59, 3.21it/s] {'loss': 4.0771, 'learning_rate': 8.338037880583507e-07, 'epoch': 2.96} + 19%|█▊ | 68760/371472 [5:28:05<26:11:59, 3.21it/s] 19%|█▊ | 68761/371472 [5:28:06<25:13:38, 3.33it/s] 19%|█▊ | 68762/371472 [5:28:06<24:47:17, 3.39it/s] 19%|█▊ | 68763/371472 [5:28:06<24:08:15, 3.48it/s] 19%|█▊ | 68764/371472 [5:28:06<24:04:05, 3.49it/s] 19%|█▊ | 68765/371472 [5:28:07<23:08:13, 3.63it/s] 19%|█▊ | 68766/371472 [5:28:07<23:34:19, 3.57it/s] 19%|█▊ | 68767/371472 [5:28:07<24:06:45, 3.49it/s] 19%|█▊ | 68768/371472 [5:28:08<24:40:44, 3.41it/s] 19%|█▊ | 68769/371472 [5:28:08<23:38:56, 3.56it/s] 19%|█▊ | 68770/371472 [5:28:08<23:16:23, 3.61it/s] 19%|█▊ | 68771/371472 [5:28:08<22:50:46, 3.68it/s] 19%|█▊ | 68772/371472 [5:28:09<24:10:41, 3.48it/s] 19%|█▊ | 68773/371472 [5:28:09<23:34:45, 3.57it/s] 19%|█▊ | 68774/371472 [5:28:09<23:16:45, 3.61it/s] 19%|█▊ | 68775/371472 [5:28:10<23:23:48, 3.59it/s] 19%|█▊ | 68776/371472 [5:28:10<23:14:51, 3.62it/s] 19%|█▊ | 68777/371472 [5:28:10<23:01:25, 3.65it/s] 19%|█▊ | 68778/371472 [5:28:10<22:19:58, 3.76it/s] 19%|█▊ | 68779/371472 [5:28:11<22:54:08, 3.67it/s] 19%|█▊ | 68780/371472 [5:28:11<22:30:59, 3.73it/s] {'loss': 4.144, 'learning_rate': 8.337553060828718e-07, 'epoch': 2.96} + 19%|█▊ | 68780/371472 [5:28:11<22:30:59, 3.73it/s] 19%|█▊ | 68781/371472 [5:28:11<23:02:15, 3.65it/s] 19%|█▊ | 68782/371472 [5:28:11<22:40:20, 3.71it/s] 19%|█▊ | 68783/371472 [5:28:12<22:42:42, 3.70it/s] 19%|█▊ | 68784/371472 [5:28:12<23:09:10, 3.63it/s] 19%|█▊ | 68785/371472 [5:28:12<22:37:53, 3.72it/s] 19%|█▊ | 68786/371472 [5:28:13<23:02:24, 3.65it/s] 19%|█▊ | 68787/371472 [5:28:13<24:00:23, 3.50it/s] 19%|█▊ | 68788/371472 [5:28:13<23:25:28, 3.59it/s] 19%|█▊ | 68789/371472 [5:28:13<23:36:09, 3.56it/s] 19%|█▊ | 68790/371472 [5:28:14<23:18:49, 3.61it/s] 19%|█▊ | 68791/371472 [5:28:14<22:55:47, 3.67it/s] 19%|█▊ | 68792/371472 [5:28:14<23:10:58, 3.63it/s] 19%|█▊ | 68793/371472 [5:28:14<22:27:08, 3.74it/s] 19%|█▊ | 68794/371472 [5:28:15<23:28:41, 3.58it/s] 19%|█▊ | 68795/371472 [5:28:15<24:03:22, 3.50it/s] 19%|█▊ | 68796/371472 [5:28:15<25:52:30, 3.25it/s] 19%|█▊ | 68797/371472 [5:28:16<24:21:19, 3.45it/s] 19%|█▊ | 68798/371472 [5:28:16<23:19:18, 3.61it/s] 19%|█▊ | 68799/371472 [5:28:16<23:09:29, 3.63it/s] 19%|█▊ | 68800/371472 [5:28:16<22:22:56, 3.76it/s] {'loss': 3.9042, 'learning_rate': 8.337068241073929e-07, 'epoch': 2.96} + 19%|█▊ | 68800/371472 [5:28:16<22:22:56, 3.76it/s] 19%|█▊ | 68801/371472 [5:28:17<22:15:13, 3.78it/s] 19%|█▊ | 68802/371472 [5:28:17<25:15:59, 3.33it/s] 19%|█▊ | 68803/371472 [5:28:17<23:36:02, 3.56it/s] 19%|█▊ | 68804/371472 [5:28:18<24:06:53, 3.49it/s] 19%|█▊ | 68805/371472 [5:28:18<22:39:01, 3.71it/s] 19%|█▊ | 68806/371472 [5:28:18<22:21:58, 3.76it/s] 19%|█▊ | 68807/371472 [5:28:18<24:54:51, 3.37it/s] 19%|█▊ | 68808/371472 [5:28:19<26:12:38, 3.21it/s] 19%|█▊ | 68809/371472 [5:28:19<24:13:00, 3.47it/s] 19%|█▊ | 68810/371472 [5:28:19<23:31:36, 3.57it/s] 19%|█▊ | 68811/371472 [5:28:20<23:40:27, 3.55it/s] 19%|█▊ | 68812/371472 [5:28:20<23:18:48, 3.61it/s] 19%|█▊ | 68813/371472 [5:28:20<25:46:27, 3.26it/s] 19%|█▊ | 68814/371472 [5:28:21<25:07:54, 3.35it/s] 19%|█▊ | 68815/371472 [5:28:21<24:33:23, 3.42it/s] 19%|█▊ | 68816/371472 [5:28:21<24:46:51, 3.39it/s] 19%|█▊ | 68817/371472 [5:28:21<23:44:16, 3.54it/s] 19%|█▊ | 68818/371472 [5:28:22<23:53:14, 3.52it/s] 19%|█▊ | 68819/371472 [5:28:22<24:33:40, 3.42it/s] 19%|█▊ | 68820/371472 [5:28:22<24:17:44, 3.46it/s] {'loss': 4.2654, 'learning_rate': 8.336583421319141e-07, 'epoch': 2.96} + 19%|█▊ | 68820/371472 [5:28:22<24:17:44, 3.46it/s] 19%|█▊ | 68821/371472 [5:28:23<24:37:47, 3.41it/s] 19%|█▊ | 68822/371472 [5:28:23<23:57:45, 3.51it/s] 19%|█▊ | 68823/371472 [5:28:23<23:34:09, 3.57it/s] 19%|█▊ | 68824/371472 [5:28:23<22:39:19, 3.71it/s] 19%|█▊ | 68825/371472 [5:28:24<22:04:29, 3.81it/s] 19%|█▊ | 68826/371472 [5:28:24<22:04:37, 3.81it/s] 19%|█▊ | 68827/371472 [5:28:24<24:48:22, 3.39it/s] 19%|█▊ | 68828/371472 [5:28:25<25:58:13, 3.24it/s] 19%|█▊ | 68829/371472 [5:28:25<24:44:04, 3.40it/s] 19%|█▊ | 68830/371472 [5:28:25<23:12:49, 3.62it/s] 19%|█▊ | 68831/371472 [5:28:25<23:55:54, 3.51it/s] 19%|█▊ | 68832/371472 [5:28:26<24:38:10, 3.41it/s] 19%|█▊ | 68833/371472 [5:28:26<26:14:39, 3.20it/s] 19%|█▊ | 68834/371472 [5:28:26<24:46:24, 3.39it/s] 19%|█▊ | 68835/371472 [5:28:26<23:19:48, 3.60it/s] 19%|█▊ | 68836/371472 [5:28:27<23:48:00, 3.53it/s] 19%|█▊ | 68837/371472 [5:28:27<23:10:38, 3.63it/s] 19%|█▊ | 68838/371472 [5:28:27<23:07:09, 3.64it/s] 19%|█▊ | 68839/371472 [5:28:28<23:27:17, 3.58it/s] 19%|█▊ | 68840/371472 [5:28:28<23:19:43, 3.60it/s] {'loss': 4.2632, 'learning_rate': 8.336098601564352e-07, 'epoch': 2.97} + 19%|█▊ | 68840/371472 [5:28:28<23:19:43, 3.60it/s] 19%|█▊ | 68841/371472 [5:28:28<23:31:29, 3.57it/s] 19%|█▊ | 68842/371472 [5:28:28<23:06:01, 3.64it/s] 19%|█▊ | 68843/371472 [5:28:29<24:46:34, 3.39it/s] 19%|█▊ | 68844/371472 [5:28:29<25:57:07, 3.24it/s] 19%|█▊ | 68845/371472 [5:28:29<25:20:06, 3.32it/s] 19%|█▊ | 68846/371472 [5:28:30<25:06:37, 3.35it/s] 19%|█▊ | 68847/371472 [5:28:30<24:02:11, 3.50it/s] 19%|█▊ | 68848/371472 [5:28:30<24:03:11, 3.49it/s] 19%|█▊ | 68849/371472 [5:28:31<24:09:23, 3.48it/s] 19%|█▊ | 68850/371472 [5:28:31<23:44:22, 3.54it/s] 19%|█▊ | 68851/371472 [5:28:31<23:03:57, 3.64it/s] 19%|█▊ | 68852/371472 [5:28:31<23:27:17, 3.58it/s] 19%|█▊ | 68853/371472 [5:28:32<28:22:46, 2.96it/s] 19%|█▊ | 68854/371472 [5:28:32<26:51:24, 3.13it/s] 19%|█▊ | 68855/371472 [5:28:32<25:24:10, 3.31it/s] 19%|█▊ | 68856/371472 [5:28:33<24:52:55, 3.38it/s] 19%|█▊ | 68857/371472 [5:28:33<24:12:25, 3.47it/s] 19%|█▊ | 68858/371472 [5:28:33<25:14:08, 3.33it/s] 19%|█▊ | 68859/371472 [5:28:33<24:22:12, 3.45it/s] 19%|█▊ | 68860/371472 [5:28:34<23:44:47, 3.54it/s] {'loss': 4.1627, 'learning_rate': 8.335613781809561e-07, 'epoch': 2.97} + 19%|█▊ | 68860/371472 [5:28:34<23:44:47, 3.54it/s] 19%|█▊ | 68861/371472 [5:28:34<22:49:34, 3.68it/s] 19%|█▊ | 68862/371472 [5:28:34<22:56:46, 3.66it/s] 19%|█▊ | 68863/371472 [5:28:35<22:43:38, 3.70it/s] 19%|█▊ | 68864/371472 [5:28:35<22:28:25, 3.74it/s] 19%|█▊ | 68865/371472 [5:28:35<22:26:51, 3.74it/s] 19%|█▊ | 68866/371472 [5:28:35<22:05:48, 3.80it/s] 19%|█▊ | 68867/371472 [5:28:36<21:32:03, 3.90it/s] 19%|█▊ | 68868/371472 [5:28:36<21:20:42, 3.94it/s] 19%|█▊ | 68869/371472 [5:28:36<21:13:20, 3.96it/s] 19%|█▊ | 68870/371472 [5:28:36<21:23:02, 3.93it/s] 19%|█▊ | 68871/371472 [5:28:37<22:30:39, 3.73it/s] 19%|█▊ | 68872/371472 [5:28:37<25:35:37, 3.28it/s] 19%|█▊ | 68873/371472 [5:28:37<25:06:01, 3.35it/s] 19%|█▊ | 68874/371472 [5:28:38<23:47:33, 3.53it/s] 19%|█▊ | 68875/371472 [5:28:38<23:04:27, 3.64it/s] 19%|█▊ | 68876/371472 [5:28:38<22:28:18, 3.74it/s] 19%|█▊ | 68877/371472 [5:28:38<23:42:36, 3.55it/s] 19%|█▊ | 68878/371472 [5:28:39<23:19:35, 3.60it/s] 19%|█▊ | 68879/371472 [5:28:39<23:56:17, 3.51it/s] 19%|█▊ | 68880/371472 [5:28:39<24:04:43, 3.49it/s] {'loss': 4.2268, 'learning_rate': 8.335128962054773e-07, 'epoch': 2.97} + 19%|█▊ | 68880/371472 [5:28:39<24:04:43, 3.49it/s] 19%|█▊ | 68881/371472 [5:28:39<23:24:03, 3.59it/s] 19%|█▊ | 68882/371472 [5:28:40<23:19:32, 3.60it/s] 19%|█▊ | 68883/371472 [5:28:40<25:07:05, 3.35it/s] 19%|█▊ | 68884/371472 [5:28:40<25:21:46, 3.31it/s] 19%|█▊ | 68885/371472 [5:28:41<24:09:49, 3.48it/s] 19%|█▊ | 68886/371472 [5:28:41<23:31:11, 3.57it/s] 19%|█▊ | 68887/371472 [5:28:41<22:58:59, 3.66it/s] 19%|█▊ | 68888/371472 [5:28:42<24:29:03, 3.43it/s] 19%|█▊ | 68889/371472 [5:28:42<24:06:45, 3.49it/s] 19%|█▊ | 68890/371472 [5:28:42<23:10:59, 3.63it/s] 19%|█▊ | 68891/371472 [5:28:42<23:25:25, 3.59it/s] 19%|█▊ | 68892/371472 [5:28:43<22:45:58, 3.69it/s] 19%|█▊ | 68893/371472 [5:28:43<22:48:19, 3.69it/s] 19%|█▊ | 68894/371472 [5:28:43<22:07:57, 3.80it/s] 19%|█▊ | 68895/371472 [5:28:43<22:58:22, 3.66it/s] 19%|█▊ | 68896/371472 [5:28:44<25:02:19, 3.36it/s] 19%|█▊ | 68897/371472 [5:28:44<24:54:53, 3.37it/s] 19%|█▊ | 68898/371472 [5:28:44<24:08:59, 3.48it/s] 19%|█▊ | 68899/371472 [5:28:45<23:02:11, 3.65it/s] 19%|█▊ | 68900/371472 [5:28:45<23:06:04, 3.64it/s] {'loss': 4.0903, 'learning_rate': 8.334644142299985e-07, 'epoch': 2.97} + 19%|█▊ | 68900/371472 [5:28:45<23:06:04, 3.64it/s] 19%|█▊ | 68901/371472 [5:28:45<23:14:13, 3.62it/s] 19%|█▊ | 68902/371472 [5:28:45<22:52:14, 3.67it/s] 19%|█▊ | 68903/371472 [5:28:46<23:33:37, 3.57it/s] 19%|█▊ | 68904/371472 [5:28:46<23:01:04, 3.65it/s] 19%|█▊ | 68905/371472 [5:28:46<23:19:39, 3.60it/s] 19%|█▊ | 68906/371472 [5:28:47<23:47:10, 3.53it/s] 19%|█▊ | 68907/371472 [5:28:47<22:42:36, 3.70it/s] 19%|█▊ | 68908/371472 [5:28:47<22:26:54, 3.74it/s] 19%|█▊ | 68909/371472 [5:28:47<21:58:28, 3.82it/s] 19%|█▊ | 68910/371472 [5:28:48<22:17:39, 3.77it/s] 19%|█▊ | 68911/371472 [5:28:48<23:32:21, 3.57it/s] 19%|█▊ | 68912/371472 [5:28:48<22:41:16, 3.70it/s] 19%|█▊ | 68913/371472 [5:28:48<23:52:28, 3.52it/s] 19%|█▊ | 68914/371472 [5:28:49<22:47:17, 3.69it/s] 19%|█▊ | 68915/371472 [5:28:49<24:18:39, 3.46it/s] 19%|█▊ | 68916/371472 [5:28:49<23:33:32, 3.57it/s] 19%|█▊ | 68917/371472 [5:28:50<23:14:12, 3.62it/s] 19%|█▊ | 68918/371472 [5:28:50<22:21:06, 3.76it/s] 19%|█▊ | 68919/371472 [5:28:50<23:29:10, 3.58it/s] 19%|█▊ | 68920/371472 [5:28:50<22:52:33, 3.67it/s] {'loss': 4.4831, 'learning_rate': 8.334159322545196e-07, 'epoch': 2.97} + 19%|█▊ | 68920/371472 [5:28:50<22:52:33, 3.67it/s] 19%|█▊ | 68921/371472 [5:28:51<22:39:20, 3.71it/s] 19%|█▊ | 68922/371472 [5:28:51<23:13:18, 3.62it/s] 19%|█▊ | 68923/371472 [5:28:51<22:51:41, 3.68it/s] 19%|█▊ | 68924/371472 [5:28:51<22:55:44, 3.67it/s] 19%|█▊ | 68925/371472 [5:28:52<23:31:16, 3.57it/s] 19%|█▊ | 68926/371472 [5:28:52<23:41:13, 3.55it/s] 19%|█▊ | 68927/371472 [5:28:52<23:24:31, 3.59it/s] 19%|█▊ | 68928/371472 [5:28:53<23:55:16, 3.51it/s] 19%|█▊ | 68929/371472 [5:28:53<24:51:10, 3.38it/s] 19%|█▊ | 68930/371472 [5:28:53<24:41:31, 3.40it/s] 19%|█▊ | 68931/371472 [5:28:53<24:11:35, 3.47it/s] 19%|█▊ | 68932/371472 [5:28:54<23:26:17, 3.59it/s] 19%|█▊ | 68933/371472 [5:28:54<24:33:17, 3.42it/s] 19%|█▊ | 68934/371472 [5:28:54<23:59:08, 3.50it/s] 19%|█▊ | 68935/371472 [5:28:55<24:05:01, 3.49it/s] 19%|█▊ | 68936/371472 [5:28:55<23:47:49, 3.53it/s] 19%|█▊ | 68937/371472 [5:28:55<22:51:11, 3.68it/s] 19%|█▊ | 68938/371472 [5:28:55<23:16:14, 3.61it/s] 19%|█▊ | 68939/371472 [5:28:56<22:05:55, 3.80it/s] 19%|█▊ | 68940/371472 [5:28:56<22:28:00, 3.74it/s] {'loss': 4.0449, 'learning_rate': 8.333674502790407e-07, 'epoch': 2.97} + 19%|█▊ | 68940/371472 [5:28:56<22:28:00, 3.74it/s] 19%|█▊ | 68941/371472 [5:28:56<22:17:37, 3.77it/s] 19%|█▊ | 68942/371472 [5:28:56<22:23:14, 3.75it/s] 19%|█▊ | 68943/371472 [5:28:57<22:20:48, 3.76it/s] 19%|█▊ | 68944/371472 [5:28:57<22:13:13, 3.78it/s] 19%|█▊ | 68945/371472 [5:28:57<21:51:49, 3.84it/s] 19%|█▊ | 68946/371472 [5:28:58<25:02:45, 3.36it/s] 19%|█▊ | 68947/371472 [5:28:58<23:58:14, 3.51it/s] 19%|█▊ | 68948/371472 [5:28:58<26:08:00, 3.22it/s] 19%|█▊ | 68949/371472 [5:28:58<24:41:48, 3.40it/s] 19%|█▊ | 68950/371472 [5:28:59<23:58:47, 3.50it/s] 19%|█▊ | 68951/371472 [5:28:59<22:53:48, 3.67it/s] 19%|█▊ | 68952/371472 [5:28:59<23:59:31, 3.50it/s] 19%|█▊ | 68953/371472 [5:29:00<23:14:48, 3.61it/s] 19%|█▊ | 68954/371472 [5:29:00<22:49:51, 3.68it/s] 19%|█▊ | 68955/371472 [5:29:00<22:41:15, 3.70it/s] 19%|█▊ | 68956/371472 [5:29:00<22:31:15, 3.73it/s] 19%|█▊ | 68957/371472 [5:29:01<22:47:22, 3.69it/s] 19%|█▊ | 68958/371472 [5:29:01<23:29:25, 3.58it/s] 19%|█▊ | 68959/371472 [5:29:01<23:52:36, 3.52it/s] 19%|█▊ | 68960/371472 [5:29:02<24:01:53, 3.50it/s] {'loss': 4.0883, 'learning_rate': 8.333189683035618e-07, 'epoch': 2.97} + 19%|█▊ | 68960/371472 [5:29:02<24:01:53, 3.50it/s] 19%|█▊ | 68961/371472 [5:29:02<23:50:50, 3.52it/s] 19%|█▊ | 68962/371472 [5:29:02<24:34:56, 3.42it/s] 19%|█▊ | 68963/371472 [5:29:02<24:06:46, 3.48it/s] 19%|█▊ | 68964/371472 [5:29:03<23:35:36, 3.56it/s] 19%|█▊ | 68965/371472 [5:29:03<22:38:45, 3.71it/s] 19%|█▊ | 68966/371472 [5:29:03<22:16:46, 3.77it/s] 19%|█▊ | 68967/371472 [5:29:03<22:19:03, 3.77it/s] 19%|█▊ | 68968/371472 [5:29:04<22:35:25, 3.72it/s] 19%|█▊ | 68969/371472 [5:29:04<23:24:13, 3.59it/s] 19%|█▊ | 68970/371472 [5:29:04<22:52:10, 3.67it/s] 19%|█▊ | 68971/371472 [5:29:05<27:26:03, 3.06it/s] 19%|█▊ | 68972/371472 [5:29:05<26:00:49, 3.23it/s] 19%|█▊ | 68973/371472 [5:29:05<24:24:07, 3.44it/s] 19%|█▊ | 68974/371472 [5:29:05<23:06:41, 3.64it/s] 19%|█▊ | 68975/371472 [5:29:06<22:23:57, 3.75it/s] 19%|█▊ | 68976/371472 [5:29:06<22:39:31, 3.71it/s] 19%|█▊ | 68977/371472 [5:29:06<22:18:08, 3.77it/s] 19%|█▊ | 68978/371472 [5:29:07<22:23:55, 3.75it/s] 19%|█▊ | 68979/371472 [5:29:07<21:47:30, 3.86it/s] 19%|█▊ | 68980/371472 [5:29:07<22:14:13, 3.78it/s] {'loss': 4.1052, 'learning_rate': 8.332704863280829e-07, 'epoch': 2.97} + 19%|█▊ | 68980/371472 [5:29:07<22:14:13, 3.78it/s] 19%|█▊ | 68981/371472 [5:29:07<21:31:06, 3.90it/s] 19%|█▊ | 68982/371472 [5:29:07<21:05:56, 3.98it/s] 19%|█▊ | 68983/371472 [5:29:08<22:53:55, 3.67it/s] 19%|█▊ | 68984/371472 [5:29:08<23:59:11, 3.50it/s] 19%|█▊ | 68985/371472 [5:29:09<28:07:17, 2.99it/s] 19%|█▊ | 68986/371472 [5:29:09<26:39:18, 3.15it/s] 19%|█▊ | 68987/371472 [5:29:09<25:42:19, 3.27it/s] 19%|█▊ | 68988/371472 [5:29:09<24:47:10, 3.39it/s] 19%|█▊ | 68989/371472 [5:29:10<24:34:01, 3.42it/s] 19%|█▊ | 68990/371472 [5:29:10<23:58:31, 3.50it/s] 19%|█▊ | 68991/371472 [5:29:10<24:36:19, 3.41it/s] 19%|█▊ | 68992/371472 [5:29:11<23:54:56, 3.51it/s] 19%|█▊ | 68993/371472 [5:29:11<25:19:35, 3.32it/s] 19%|█▊ | 68994/371472 [5:29:11<25:11:01, 3.34it/s] 19%|█▊ | 68995/371472 [5:29:11<24:42:39, 3.40it/s] 19%|█▊ | 68996/371472 [5:29:12<24:04:14, 3.49it/s] 19%|█▊ | 68997/371472 [5:29:12<23:46:17, 3.53it/s] 19%|█▊ | 68998/371472 [5:29:12<23:55:45, 3.51it/s] 19%|█▊ | 68999/371472 [5:29:13<23:18:08, 3.61it/s] 19%|█▊ | 69000/371472 [5:29:13<27:29:24, 3.06it/s] {'loss': 4.0703, 'learning_rate': 8.332220043526039e-07, 'epoch': 2.97} + 19%|█▊ | 69000/371472 [5:29:13<27:29:24, 3.06it/s] 19%|█▊ | 69001/371472 [5:29:13<28:09:11, 2.98it/s] 19%|█▊ | 69002/371472 [5:29:14<25:59:42, 3.23it/s] 19%|█▊ | 69003/371472 [5:29:14<25:29:24, 3.30it/s] 19%|█▊ | 69004/371472 [5:29:14<25:01:54, 3.36it/s] 19%|█▊ | 69005/371472 [5:29:14<25:19:22, 3.32it/s] 19%|█▊ | 69006/371472 [5:29:15<24:13:42, 3.47it/s] 19%|█▊ | 69007/371472 [5:29:15<24:32:51, 3.42it/s] 19%|█▊ | 69008/371472 [5:29:15<24:05:37, 3.49it/s] 19%|█▊ | 69009/371472 [5:29:16<24:15:50, 3.46it/s] 19%|█▊ | 69010/371472 [5:29:16<23:56:17, 3.51it/s] 19%|█▊ | 69011/371472 [5:29:16<23:39:57, 3.55it/s] 19%|█▊ | 69012/371472 [5:29:17<25:44:58, 3.26it/s] 19%|█▊ | 69013/371472 [5:29:17<24:59:09, 3.36it/s] 19%|█▊ | 69014/371472 [5:29:17<25:25:35, 3.30it/s] 19%|█▊ | 69015/371472 [5:29:18<28:54:27, 2.91it/s] 19%|█▊ | 69016/371472 [5:29:18<27:50:54, 3.02it/s] 19%|█▊ | 69017/371472 [5:29:18<27:05:40, 3.10it/s] 19%|█▊ | 69018/371472 [5:29:18<25:34:12, 3.29it/s] 19%|█▊ | 69019/371472 [5:29:19<24:26:14, 3.44it/s] 19%|█▊ | 69020/371472 [5:29:19<23:19:27, 3.60it/s] {'loss': 4.2334, 'learning_rate': 8.331735223771251e-07, 'epoch': 2.97} + 19%|█▊ | 69020/371472 [5:29:19<23:19:27, 3.60it/s] 19%|█▊ | 69021/371472 [5:29:19<25:10:36, 3.34it/s] 19%|█▊ | 69022/371472 [5:29:20<23:35:05, 3.56it/s] 19%|█▊ | 69023/371472 [5:29:20<23:30:57, 3.57it/s] 19%|█▊ | 69024/371472 [5:29:20<23:39:15, 3.55it/s] 19%|█▊ | 69025/371472 [5:29:20<22:43:30, 3.70it/s] 19%|█▊ | 69026/371472 [5:29:21<22:50:06, 3.68it/s] 19%|█▊ | 69027/371472 [5:29:21<23:49:33, 3.53it/s] 19%|█▊ | 69028/371472 [5:29:21<22:52:32, 3.67it/s] 19%|█▊ | 69029/371472 [5:29:21<23:05:24, 3.64it/s] 19%|█▊ | 69030/371472 [5:29:22<22:22:35, 3.75it/s] 19%|█▊ | 69031/371472 [5:29:22<22:09:06, 3.79it/s] 19%|█▊ | 69032/371472 [5:29:22<21:47:25, 3.86it/s] 19%|█▊ | 69033/371472 [5:29:23<24:21:34, 3.45it/s] 19%|█▊ | 69034/371472 [5:29:23<25:05:28, 3.35it/s] 19%|█▊ | 69035/371472 [5:29:23<23:55:07, 3.51it/s] 19%|█▊ | 69036/371472 [5:29:23<23:24:05, 3.59it/s] 19%|█▊ | 69037/371472 [5:29:24<22:50:55, 3.68it/s] 19%|█▊ | 69038/371472 [5:29:24<22:01:20, 3.81it/s] 19%|█▊ | 69039/371472 [5:29:24<22:31:30, 3.73it/s] 19%|█▊ | 69040/371472 [5:29:24<21:59:39, 3.82it/s] {'loss': 4.1308, 'learning_rate': 8.331250404016462e-07, 'epoch': 2.97} + 19%|█▊ | 69040/371472 [5:29:24<21:59:39, 3.82it/s] 19%|█▊ | 69041/371472 [5:29:25<24:11:27, 3.47it/s] 19%|█▊ | 69042/371472 [5:29:25<25:30:25, 3.29it/s] 19%|█▊ | 69043/371472 [5:29:25<24:18:45, 3.46it/s] 19%|█▊ | 69044/371472 [5:29:26<24:06:09, 3.49it/s] 19%|█▊ | 69045/371472 [5:29:26<23:59:55, 3.50it/s] 19%|█▊ | 69046/371472 [5:29:26<23:10:11, 3.63it/s] 19%|█▊ | 69047/371472 [5:29:26<22:49:06, 3.68it/s] 19%|█▊ | 69048/371472 [5:29:27<22:46:05, 3.69it/s] 19%|█▊ | 69049/371472 [5:29:27<22:27:16, 3.74it/s] 19%|█▊ | 69050/371472 [5:29:27<22:42:38, 3.70it/s] 19%|█▊ | 69051/371472 [5:29:28<23:43:57, 3.54it/s] 19%|█▊ | 69052/371472 [5:29:28<26:28:04, 3.17it/s] 19%|█▊ | 69053/371472 [5:29:28<25:37:58, 3.28it/s] 19%|█▊ | 69054/371472 [5:29:29<25:57:18, 3.24it/s] 19%|█▊ | 69055/371472 [5:29:29<24:57:21, 3.37it/s] 19%|█▊ | 69056/371472 [5:29:29<24:00:43, 3.50it/s] 19%|█▊ | 69057/371472 [5:29:29<23:34:04, 3.56it/s] 19%|█▊ | 69058/371472 [5:29:30<23:45:28, 3.54it/s] 19%|█▊ | 69059/371472 [5:29:30<22:36:28, 3.72it/s] 19%|█▊ | 69060/371472 [5:29:30<21:59:06, 3.82it/s] {'loss': 4.23, 'learning_rate': 8.330765584261673e-07, 'epoch': 2.97} + 19%|█▊ | 69060/371472 [5:29:30<21:59:06, 3.82it/s] 19%|█▊ | 69061/371472 [5:29:30<23:12:53, 3.62it/s] 19%|█▊ | 69062/371472 [5:29:31<23:33:54, 3.56it/s] 19%|█▊ | 69063/371472 [5:29:31<23:44:13, 3.54it/s] 19%|█▊ | 69064/371472 [5:29:31<24:11:32, 3.47it/s] 19%|█▊ | 69065/371472 [5:29:32<23:00:17, 3.65it/s] 19%|█▊ | 69066/371472 [5:29:32<23:03:21, 3.64it/s] 19%|█▊ | 69067/371472 [5:29:32<22:41:38, 3.70it/s] 19%|█▊ | 69068/371472 [5:29:32<22:03:02, 3.81it/s] 19%|█▊ | 69069/371472 [5:29:33<21:48:17, 3.85it/s] 19%|█▊ | 69070/371472 [5:29:33<22:12:03, 3.78it/s] 19%|█▊ | 69071/371472 [5:29:33<22:30:23, 3.73it/s] 19%|█▊ | 69072/371472 [5:29:33<22:51:31, 3.67it/s] 19%|█▊ | 69073/371472 [5:29:34<22:26:20, 3.74it/s] 19%|█▊ | 69074/371472 [5:29:34<23:29:44, 3.58it/s] 19%|█▊ | 69075/371472 [5:29:34<24:00:02, 3.50it/s] 19%|█▊ | 69076/371472 [5:29:35<25:22:52, 3.31it/s] 19%|█▊ | 69077/371472 [5:29:35<26:41:01, 3.15it/s] 19%|█▊ | 69078/371472 [5:29:35<27:17:11, 3.08it/s] 19%|█▊ | 69079/371472 [5:29:36<26:49:49, 3.13it/s] 19%|█▊ | 69080/371472 [5:29:36<25:53:14, 3.24it/s] {'loss': 3.9961, 'learning_rate': 8.330280764506884e-07, 'epoch': 2.98} + 19%|█▊ | 69080/371472 [5:29:36<25:53:14, 3.24it/s] 19%|█▊ | 69081/371472 [5:29:36<24:49:56, 3.38it/s] 19%|█▊ | 69082/371472 [5:29:36<23:46:54, 3.53it/s] 19%|█▊ | 69083/371472 [5:29:37<24:30:37, 3.43it/s] 19%|█▊ | 69084/371472 [5:29:37<23:37:43, 3.55it/s] 19%|█▊ | 69085/371472 [5:29:37<24:21:55, 3.45it/s] 19%|█▊ | 69086/371472 [5:29:38<23:19:39, 3.60it/s] 19%|█▊ | 69087/371472 [5:29:38<22:48:18, 3.68it/s] 19%|█▊ | 69088/371472 [5:29:38<26:14:57, 3.20it/s] 19%|█▊ | 69089/371472 [5:29:38<24:46:05, 3.39it/s] 19%|█▊ | 69090/371472 [5:29:39<23:36:39, 3.56it/s] 19%|█▊ | 69091/371472 [5:29:39<22:33:25, 3.72it/s] 19%|█▊ | 69092/371472 [5:29:39<23:27:16, 3.58it/s] 19%|█▊ | 69093/371472 [5:29:40<24:17:19, 3.46it/s] 19%|█▊ | 69094/371472 [5:29:40<24:16:31, 3.46it/s] 19%|█▊ | 69095/371472 [5:29:40<23:48:05, 3.53it/s] 19%|█▊ | 69096/371472 [5:29:40<23:02:52, 3.64it/s] 19%|█▊ | 69097/371472 [5:29:41<22:27:02, 3.74it/s] 19%|█▊ | 69098/371472 [5:29:41<21:58:06, 3.82it/s] 19%|█▊ | 69099/371472 [5:29:41<22:30:32, 3.73it/s] 19%|█▊ | 69100/371472 [5:29:41<22:13:27, 3.78it/s] {'loss': 4.2471, 'learning_rate': 8.329795944752095e-07, 'epoch': 2.98} + 19%|█▊ | 69100/371472 [5:29:41<22:13:27, 3.78it/s] 19%|█▊ | 69101/371472 [5:29:42<21:21:31, 3.93it/s] 19%|█▊ | 69102/371472 [5:29:42<20:49:34, 4.03it/s] 19%|█▊ | 69103/371472 [5:29:42<22:22:07, 3.75it/s] 19%|█▊ | 69104/371472 [5:29:42<23:12:27, 3.62it/s] 19%|█▊ | 69105/371472 [5:29:43<21:54:37, 3.83it/s] 19%|█▊ | 69106/371472 [5:29:43<22:21:06, 3.76it/s] 19%|█▊ | 69107/371472 [5:29:43<22:09:57, 3.79it/s] 19%|█▊ | 69108/371472 [5:29:44<22:03:49, 3.81it/s] 19%|█▊ | 69109/371472 [5:29:44<22:58:58, 3.65it/s] 19%|█▊ | 69110/371472 [5:29:44<23:09:58, 3.63it/s] 19%|█▊ | 69111/371472 [5:29:44<22:54:39, 3.67it/s] 19%|█▊ | 69112/371472 [5:29:45<23:33:20, 3.57it/s] 19%|█▊ | 69113/371472 [5:29:45<22:52:53, 3.67it/s] 19%|█▊ | 69114/371472 [5:29:45<24:02:52, 3.49it/s] 19%|█▊ | 69115/371472 [5:29:45<23:35:24, 3.56it/s] 19%|█▊ | 69116/371472 [5:29:46<24:10:55, 3.47it/s] 19%|█▊ | 69117/371472 [5:29:46<24:05:56, 3.49it/s] 19%|█▊ | 69118/371472 [5:29:46<23:48:10, 3.53it/s] 19%|█▊ | 69119/371472 [5:29:47<23:58:35, 3.50it/s] 19%|█▊ | 69120/371472 [5:29:47<23:33:43, 3.56it/s] {'loss': 4.2375, 'learning_rate': 8.329311124997306e-07, 'epoch': 2.98} + 19%|█▊ | 69120/371472 [5:29:47<23:33:43, 3.56it/s] 19%|█▊ | 69121/371472 [5:29:47<23:20:30, 3.60it/s] 19%|█▊ | 69122/371472 [5:29:47<23:43:57, 3.54it/s] 19%|█▊ | 69123/371472 [5:29:48<23:57:53, 3.50it/s] 19%|█▊ | 69124/371472 [5:29:48<23:07:46, 3.63it/s] 19%|█▊ | 69125/371472 [5:29:48<25:25:43, 3.30it/s] 19%|█▊ | 69126/371472 [5:29:49<25:01:06, 3.36it/s] 19%|█▊ | 69127/371472 [5:29:49<24:59:50, 3.36it/s] 19%|█▊ | 69128/371472 [5:29:49<24:11:13, 3.47it/s] 19%|█▊ | 69129/371472 [5:29:50<28:13:53, 2.97it/s] 19%|█▊ | 69130/371472 [5:29:50<25:32:48, 3.29it/s] 19%|█▊ | 69131/371472 [5:29:50<24:53:36, 3.37it/s] 19%|█▊ | 69132/371472 [5:29:50<23:51:48, 3.52it/s] 19%|█▊ | 69133/371472 [5:29:51<24:22:31, 3.45it/s] 19%|█▊ | 69134/371472 [5:29:51<23:10:42, 3.62it/s] 19%|█▊ | 69135/371472 [5:29:51<22:56:14, 3.66it/s] 19%|█▊ | 69136/371472 [5:29:52<22:06:48, 3.80it/s] 19%|█▊ | 69137/371472 [5:29:52<22:20:43, 3.76it/s] 19%|█▊ | 69138/371472 [5:29:52<22:58:21, 3.66it/s] 19%|█▊ | 69139/371472 [5:29:52<22:25:42, 3.74it/s] 19%|█▊ | 69140/371472 [5:29:53<22:09:52, 3.79it/s] {'loss': 4.2749, 'learning_rate': 8.328826305242517e-07, 'epoch': 2.98} + 19%|█▊ | 69140/371472 [5:29:53<22:09:52, 3.79it/s] 19%|█▊ | 69141/371472 [5:29:53<21:33:56, 3.89it/s] 19%|█▊ | 69142/371472 [5:29:53<22:18:03, 3.77it/s] 19%|█▊ | 69143/371472 [5:29:53<22:23:48, 3.75it/s] 19%|█▊ | 69144/371472 [5:29:54<22:18:22, 3.76it/s] 19%|█▊ | 69145/371472 [5:29:54<22:01:29, 3.81it/s] 19%|█▊ | 69146/371472 [5:29:54<23:10:58, 3.62it/s] 19%|█▊ | 69147/371472 [5:29:55<24:45:46, 3.39it/s] 19%|█▊ | 69148/371472 [5:29:55<24:42:07, 3.40it/s] 19%|█▊ | 69149/371472 [5:29:55<24:07:14, 3.48it/s] 19%|█▊ | 69150/371472 [5:29:55<26:35:41, 3.16it/s] 19%|█▊ | 69151/371472 [5:29:56<25:56:44, 3.24it/s] 19%|█▊ | 69152/371472 [5:29:56<24:51:25, 3.38it/s] 19%|█▊ | 69153/371472 [5:29:56<23:36:11, 3.56it/s] 19%|█▊ | 69154/371472 [5:29:57<23:25:21, 3.59it/s] 19%|█▊ | 69155/371472 [5:29:57<23:51:20, 3.52it/s] 19%|█▊ | 69156/371472 [5:29:57<23:26:25, 3.58it/s] 19%|█▊ | 69157/371472 [5:29:57<24:15:45, 3.46it/s] 19%|█▊ | 69158/371472 [5:29:58<24:28:37, 3.43it/s] 19%|█▊ | 69159/371472 [5:29:58<24:38:16, 3.41it/s] 19%|█▊ | 69160/371472 [5:29:58<25:38:54, 3.27it/s] {'loss': 4.0304, 'learning_rate': 8.328341485487728e-07, 'epoch': 2.98} + 19%|█▊ | 69160/371472 [5:29:58<25:38:54, 3.27it/s] 19%|█▊ | 69161/371472 [5:29:59<26:10:43, 3.21it/s] 19%|█▊ | 69162/371472 [5:29:59<24:59:19, 3.36it/s] 19%|█▊ | 69163/371472 [5:29:59<24:25:03, 3.44it/s] 19%|█▊ | 69164/371472 [5:30:00<25:42:08, 3.27it/s] 19%|█▊ | 69165/371472 [5:30:00<25:20:03, 3.31it/s] 19%|█▊ | 69166/371472 [5:30:00<24:55:59, 3.37it/s] 19%|█▊ | 69167/371472 [5:30:00<23:57:28, 3.51it/s] 19%|█▊ | 69168/371472 [5:30:01<23:54:33, 3.51it/s] 19%|█▊ | 69169/371472 [5:30:01<25:03:53, 3.35it/s] 19%|█▊ | 69170/371472 [5:30:01<24:03:36, 3.49it/s] 19%|█▊ | 69171/371472 [5:30:02<23:59:35, 3.50it/s] 19%|█▊ | 69172/371472 [5:30:02<23:43:42, 3.54it/s] 19%|█▊ | 69173/371472 [5:30:02<23:14:40, 3.61it/s] 19%|█▊ | 69174/371472 [5:30:02<25:28:29, 3.30it/s] 19%|█▊ | 69175/371472 [5:30:03<25:09:31, 3.34it/s] 19%|█▊ | 69176/371472 [5:30:03<23:48:16, 3.53it/s] 19%|█▊ | 69177/371472 [5:30:03<23:47:33, 3.53it/s] 19%|█▊ | 69178/371472 [5:30:04<23:19:21, 3.60it/s] 19%|█▊ | 69179/371472 [5:30:04<22:54:22, 3.67it/s] 19%|█▊ | 69180/371472 [5:30:04<22:41:37, 3.70it/s] {'loss': 4.1121, 'learning_rate': 8.32785666573294e-07, 'epoch': 2.98} + 19%|█▊ | 69180/371472 [5:30:04<22:41:37, 3.70it/s] 19%|█▊ | 69181/371472 [5:30:04<22:56:57, 3.66it/s] 19%|█▊ | 69182/371472 [5:30:05<22:15:33, 3.77it/s] 19%|█▊ | 69183/371472 [5:30:05<21:58:52, 3.82it/s] 19%|█▊ | 69184/371472 [5:30:05<23:38:56, 3.55it/s] 19%|█▊ | 69185/371472 [5:30:05<23:06:03, 3.63it/s] 19%|█▊ | 69186/371472 [5:30:06<22:52:15, 3.67it/s] 19%|█▊ | 69187/371472 [5:30:06<23:01:08, 3.65it/s] 19%|█▊ | 69188/371472 [5:30:06<23:09:29, 3.63it/s] 19%|█▊ | 69189/371472 [5:30:07<23:53:03, 3.52it/s] 19%|█▊ | 69190/371472 [5:30:07<24:33:20, 3.42it/s] 19%|█▊ | 69191/371472 [5:30:07<23:23:12, 3.59it/s] 19%|█▊ | 69192/371472 [5:30:07<24:30:28, 3.43it/s] 19%|█▊ | 69193/371472 [5:30:08<24:05:02, 3.49it/s] 19%|█▊ | 69194/371472 [5:30:08<23:46:09, 3.53it/s] 19%|█▊ | 69195/371472 [5:30:08<24:39:22, 3.41it/s] 19%|█▊ | 69196/371472 [5:30:09<24:36:14, 3.41it/s] 19%|█▊ | 69197/371472 [5:30:09<23:13:15, 3.62it/s] 19%|█▊ | 69198/371472 [5:30:09<24:19:43, 3.45it/s] 19%|█▊ | 69199/371472 [5:30:09<23:43:11, 3.54it/s] 19%|█▊ | 69200/371472 [5:30:10<23:52:26, 3.52it/s] {'loss': 4.1717, 'learning_rate': 8.327371845978151e-07, 'epoch': 2.98} + 19%|█▊ | 69200/371472 [5:30:10<23:52:26, 3.52it/s] 19%|█▊ | 69201/371472 [5:30:10<23:57:26, 3.50it/s] 19%|█▊ | 69202/371472 [5:30:10<23:23:38, 3.59it/s] 19%|█▊ | 69203/371472 [5:30:11<23:58:14, 3.50it/s] 19%|█▊ | 69204/371472 [5:30:11<24:30:41, 3.43it/s] 19%|█▊ | 69205/371472 [5:30:11<23:22:55, 3.59it/s] 19%|█▊ | 69206/371472 [5:30:11<22:34:09, 3.72it/s] 19%|█▊ | 69207/371472 [5:30:12<22:05:40, 3.80it/s] 19%|█▊ | 69208/371472 [5:30:12<22:40:28, 3.70it/s] 19%|█▊ | 69209/371472 [5:30:12<23:16:09, 3.61it/s] 19%|█▊ | 69210/371472 [5:30:12<22:41:51, 3.70it/s] 19%|█▊ | 69211/371472 [5:30:13<22:32:13, 3.73it/s] 19%|█▊ | 69212/371472 [5:30:13<23:32:42, 3.57it/s] 19%|█▊ | 69213/371472 [5:30:13<24:11:20, 3.47it/s] 19%|█▊ | 69214/371472 [5:30:14<24:50:08, 3.38it/s] 19%|█▊ | 69215/371472 [5:30:14<26:12:39, 3.20it/s] 19%|█▊ | 69216/371472 [5:30:14<25:12:56, 3.33it/s] 19%|█▊ | 69217/371472 [5:30:15<25:33:57, 3.28it/s] 19%|█▊ | 69218/371472 [5:30:15<25:14:21, 3.33it/s] 19%|█▊ | 69219/371472 [5:30:15<24:29:53, 3.43it/s] 19%|█▊ | 69220/371472 [5:30:16<26:22:24, 3.18it/s] {'loss': 3.996, 'learning_rate': 8.326887026223362e-07, 'epoch': 2.98} + 19%|█▊ | 69220/371472 [5:30:16<26:22:24, 3.18it/s] 19%|█▊ | 69221/371472 [5:30:16<25:38:46, 3.27it/s] 19%|█▊ | 69222/371472 [5:30:16<24:37:49, 3.41it/s] 19%|█▊ | 69223/371472 [5:30:16<23:54:50, 3.51it/s] 19%|█▊ | 69224/371472 [5:30:17<23:16:41, 3.61it/s] 19%|█▊ | 69225/371472 [5:30:17<22:30:09, 3.73it/s] 19%|█▊ | 69226/371472 [5:30:17<27:56:09, 3.01it/s] 19%|█▊ | 69227/371472 [5:30:18<26:23:38, 3.18it/s] 19%|█▊ | 69228/371472 [5:30:18<24:48:25, 3.38it/s] 19%|█▊ | 69229/371472 [5:30:18<23:35:26, 3.56it/s] 19%|█▊ | 69230/371472 [5:30:18<24:31:05, 3.42it/s] 19%|█▊ | 69231/371472 [5:30:19<24:52:05, 3.38it/s] 19%|█▊ | 69232/371472 [5:30:19<26:11:07, 3.21it/s] 19%|█▊ | 69233/371472 [5:30:19<26:23:13, 3.18it/s] 19%|█▊ | 69234/371472 [5:30:20<27:05:14, 3.10it/s] 19%|█▊ | 69235/371472 [5:30:20<26:00:05, 3.23it/s] 19%|█▊ | 69236/371472 [5:30:20<25:54:26, 3.24it/s] 19%|█▊ | 69237/371472 [5:30:21<25:45:39, 3.26it/s] 19%|█▊ | 69238/371472 [5:30:21<24:37:10, 3.41it/s] 19%|█▊ | 69239/371472 [5:30:21<24:28:20, 3.43it/s] 19%|█▊ | 69240/371472 [5:30:21<23:26:35, 3.58it/s] {'loss': 4.3253, 'learning_rate': 8.326402206468572e-07, 'epoch': 2.98} + 19%|█▊ | 69240/371472 [5:30:21<23:26:35, 3.58it/s] 19%|█▊ | 69241/371472 [5:30:22<24:56:57, 3.36it/s] 19%|█▊ | 69242/371472 [5:30:22<24:04:40, 3.49it/s] 19%|█▊ | 69243/371472 [5:30:22<23:33:12, 3.56it/s] 19%|█▊ | 69244/371472 [5:30:23<23:17:38, 3.60it/s] 19%|█▊ | 69245/371472 [5:30:23<23:35:51, 3.56it/s] 19%|█▊ | 69246/371472 [5:30:23<23:10:42, 3.62it/s] 19%|█▊ | 69247/371472 [5:30:23<22:38:32, 3.71it/s] 19%|█▊ | 69248/371472 [5:30:24<23:18:47, 3.60it/s] 19%|█▊ | 69249/371472 [5:30:24<22:44:31, 3.69it/s] 19%|█▊ | 69250/371472 [5:30:24<25:05:22, 3.35it/s] 19%|█▊ | 69251/371472 [5:30:25<25:06:33, 3.34it/s] 19%|█▊ | 69252/371472 [5:30:25<28:36:18, 2.93it/s] 19%|█▊ | 69253/371472 [5:30:25<27:14:01, 3.08it/s] 19%|█▊ | 69254/371472 [5:30:26<26:30:50, 3.17it/s] 19%|█▊ | 69255/371472 [5:30:26<25:35:39, 3.28it/s] 19%|█▊ | 69256/371472 [5:30:26<25:46:50, 3.26it/s] 19%|█▊ | 69257/371472 [5:30:26<24:38:12, 3.41it/s] 19%|█▊ | 69258/371472 [5:30:27<23:35:57, 3.56it/s] 19%|█▊ | 69259/371472 [5:30:27<23:43:18, 3.54it/s] 19%|█▊ | 69260/371472 [5:30:27<22:56:41, 3.66it/s] {'loss': 4.0697, 'learning_rate': 8.325917386713784e-07, 'epoch': 2.98} + 19%|█▊ | 69260/371472 [5:30:27<22:56:41, 3.66it/s] 19%|█▊ | 69261/371472 [5:30:28<23:06:37, 3.63it/s] 19%|█▊ | 69262/371472 [5:30:28<23:01:46, 3.65it/s] 19%|█▊ | 69263/371472 [5:30:28<23:07:36, 3.63it/s] 19%|█▊ | 69264/371472 [5:30:28<23:09:52, 3.62it/s] 19%|█▊ | 69265/371472 [5:30:29<23:35:12, 3.56it/s] 19%|█▊ | 69266/371472 [5:30:29<22:48:18, 3.68it/s] 19%|█▊ | 69267/371472 [5:30:29<23:02:51, 3.64it/s] 19%|█▊ | 69268/371472 [5:30:29<23:33:44, 3.56it/s] 19%|█▊ | 69269/371472 [5:30:30<22:49:24, 3.68it/s] 19%|█▊ | 69270/371472 [5:30:30<22:14:43, 3.77it/s] 19%|█▊ | 69271/371472 [5:30:30<22:31:35, 3.73it/s] 19%|█▊ | 69272/371472 [5:30:31<22:16:12, 3.77it/s] 19%|█▊ | 69273/371472 [5:30:31<22:46:00, 3.69it/s] 19%|█▊ | 69274/371472 [5:30:31<22:29:42, 3.73it/s] 19%|█▊ | 69275/371472 [5:30:31<22:06:30, 3.80it/s] 19%|█▊ | 69276/371472 [5:30:32<22:31:11, 3.73it/s] 19%|█▊ | 69277/371472 [5:30:32<22:12:49, 3.78it/s] 19%|█▊ | 69278/371472 [5:30:32<24:18:18, 3.45it/s] 19%|█▊ | 69279/371472 [5:30:32<23:45:08, 3.53it/s] 19%|█▊ | 69280/371472 [5:30:33<24:10:27, 3.47it/s] {'loss': 4.4012, 'learning_rate': 8.325432566958994e-07, 'epoch': 2.98} + 19%|█▊ | 69280/371472 [5:30:33<24:10:27, 3.47it/s] 19%|█▊ | 69281/371472 [5:30:33<23:35:34, 3.56it/s] 19%|█▊ | 69282/371472 [5:30:33<22:54:34, 3.66it/s] 19%|█▊ | 69283/371472 [5:30:34<23:43:37, 3.54it/s] 19%|█▊ | 69284/371472 [5:30:34<24:01:21, 3.49it/s] 19%|█▊ | 69285/371472 [5:30:34<24:08:04, 3.48it/s] 19%|█▊ | 69286/371472 [5:30:34<24:55:11, 3.37it/s] 19%|█▊ | 69287/371472 [5:30:35<24:19:19, 3.45it/s] 19%|█▊ | 69288/371472 [5:30:35<24:10:03, 3.47it/s] 19%|█▊ | 69289/371472 [5:30:35<24:40:23, 3.40it/s] 19%|█▊ | 69290/371472 [5:30:36<24:32:25, 3.42it/s] 19%|█▊ | 69291/371472 [5:30:36<23:50:23, 3.52it/s] 19%|█▊ | 69292/371472 [5:30:36<23:38:19, 3.55it/s] 19%|█▊ | 69293/371472 [5:30:36<22:57:01, 3.66it/s] 19%|█▊ | 69294/371472 [5:30:37<22:50:45, 3.67it/s] 19%|█▊ | 69295/371472 [5:30:37<25:18:42, 3.32it/s] 19%|█▊ | 69296/371472 [5:30:37<24:42:49, 3.40it/s] 19%|█▊ | 69297/371472 [5:30:38<24:09:35, 3.47it/s] 19%|█▊ | 69298/371472 [5:30:38<23:29:23, 3.57it/s] 19%|█▊ | 69299/371472 [5:30:38<24:05:08, 3.48it/s] 19%|█▊ | 69300/371472 [5:30:38<23:17:16, 3.60it/s] {'loss': 3.992, 'learning_rate': 8.324947747204205e-07, 'epoch': 2.98} + 19%|█▊ | 69300/371472 [5:30:38<23:17:16, 3.60it/s] 19%|█▊ | 69301/371472 [5:30:39<23:28:54, 3.57it/s] 19%|█▊ | 69302/371472 [5:30:39<24:43:09, 3.40it/s] 19%|█▊ | 69303/371472 [5:30:39<24:38:49, 3.41it/s] 19%|█▊ | 69304/371472 [5:30:40<24:41:22, 3.40it/s] 19%|█▊ | 69305/371472 [5:30:40<23:30:56, 3.57it/s] 19%|█▊ | 69306/371472 [5:30:40<23:34:37, 3.56it/s] 19%|█▊ | 69307/371472 [5:30:40<22:30:12, 3.73it/s] 19%|█▊ | 69308/371472 [5:30:41<23:48:37, 3.53it/s] 19%|█▊ | 69309/371472 [5:30:41<23:31:29, 3.57it/s] 19%|█▊ | 69310/371472 [5:30:41<24:39:47, 3.40it/s] 19%|█▊ | 69311/371472 [5:30:42<23:36:46, 3.55it/s] 19%|█▊ | 69312/371472 [5:30:42<23:59:32, 3.50it/s] 19%|█▊ | 69313/371472 [5:30:42<23:52:44, 3.51it/s] 19%|█▊ | 69314/371472 [5:30:42<25:07:14, 3.34it/s] 19%|█▊ | 69315/371472 [5:30:43<24:21:54, 3.44it/s] 19%|█▊ | 69316/371472 [5:30:43<24:36:36, 3.41it/s] 19%|█▊ | 69317/371472 [5:30:43<26:36:12, 3.15it/s] 19%|█▊ | 69318/371472 [5:30:44<26:42:51, 3.14it/s] 19%|█▊ | 69319/371472 [5:30:44<25:24:11, 3.30it/s] 19%|█▊ | 69320/371472 [5:30:44<23:36:58, 3.55it/s] {'loss': 4.241, 'learning_rate': 8.324462927449417e-07, 'epoch': 2.99} + 19%|█▊ | 69320/371472 [5:30:44<23:36:58, 3.55it/s] 19%|█▊ | 69321/371472 [5:30:45<23:10:14, 3.62it/s] 19%|█▊ | 69322/371472 [5:30:45<22:28:25, 3.73it/s] 19%|█▊ | 69323/371472 [5:30:45<22:26:23, 3.74it/s] 19%|█▊ | 69324/371472 [5:30:45<21:55:56, 3.83it/s] 19%|█▊ | 69325/371472 [5:30:46<22:52:58, 3.67it/s] 19%|█▊ | 69326/371472 [5:30:46<22:30:55, 3.73it/s] 19%|█▊ | 69327/371472 [5:30:46<23:35:26, 3.56it/s] 19%|█▊ | 69328/371472 [5:30:46<23:26:47, 3.58it/s] 19%|█▊ | 69329/371472 [5:30:47<22:20:07, 3.76it/s] 19%|█▊ | 69330/371472 [5:30:47<23:23:08, 3.59it/s] 19%|█▊ | 69331/371472 [5:30:47<22:24:42, 3.74it/s] 19%|█▊ | 69332/371472 [5:30:48<23:06:28, 3.63it/s] 19%|█▊ | 69333/371472 [5:30:48<24:07:44, 3.48it/s] 19%|█▊ | 69334/371472 [5:30:48<22:57:30, 3.66it/s] 19%|█▊ | 69335/371472 [5:30:48<22:30:37, 3.73it/s] 19%|█▊ | 69336/371472 [5:30:49<21:47:36, 3.85it/s] 19%|█▊ | 69337/371472 [5:30:49<21:32:18, 3.90it/s] 19%|█▊ | 69338/371472 [5:30:49<22:05:23, 3.80it/s] 19%|█▊ | 69339/371472 [5:30:49<22:49:44, 3.68it/s] 19%|█▊ | 69340/371472 [5:30:50<22:12:24, 3.78it/s] {'loss': 4.3471, 'learning_rate': 8.323978107694628e-07, 'epoch': 2.99} + 19%|█▊ | 69340/371472 [5:30:50<22:12:24, 3.78it/s] 19%|█▊ | 69341/371472 [5:30:50<22:51:17, 3.67it/s] 19%|█▊ | 69342/371472 [5:30:50<23:07:43, 3.63it/s] 19%|█▊ | 69343/371472 [5:30:51<23:58:34, 3.50it/s] 19%|█▊ | 69344/371472 [5:30:51<24:52:37, 3.37it/s] 19%|█▊ | 69345/371472 [5:30:51<24:30:05, 3.43it/s] 19%|█▊ | 69346/371472 [5:30:51<23:36:35, 3.55it/s] 19%|█▊ | 69347/371472 [5:30:52<23:36:48, 3.55it/s] 19%|█▊ | 69348/371472 [5:30:52<23:38:58, 3.55it/s] 19%|█▊ | 69349/371472 [5:30:52<25:08:56, 3.34it/s] 19%|█▊ | 69350/371472 [5:30:53<24:55:05, 3.37it/s] 19%|█▊ | 69351/371472 [5:30:53<25:33:56, 3.28it/s] 19%|█▊ | 69352/371472 [5:30:53<24:40:28, 3.40it/s] 19%|█▊ | 69353/371472 [5:30:53<23:54:17, 3.51it/s] 19%|█▊ | 69354/371472 [5:30:54<24:12:43, 3.47it/s] 19%|█▊ | 69355/371472 [5:30:54<23:03:42, 3.64it/s] 19%|█▊ | 69356/371472 [5:30:54<24:12:46, 3.47it/s] 19%|█▊ | 69357/371472 [5:30:55<24:00:11, 3.50it/s] 19%|█▊ | 69358/371472 [5:30:55<23:41:52, 3.54it/s] 19%|█▊ | 69359/371472 [5:30:55<22:36:31, 3.71it/s] 19%|█▊ | 69360/371472 [5:30:55<22:46:35, 3.68it/s] {'loss': 4.1748, 'learning_rate': 8.323493287939839e-07, 'epoch': 2.99} + 19%|█▊ | 69360/371472 [5:30:55<22:46:35, 3.68it/s] 19%|█▊ | 69361/371472 [5:30:56<23:20:24, 3.60it/s] 19%|█▊ | 69362/371472 [5:30:56<23:16:43, 3.60it/s] 19%|█▊ | 69363/371472 [5:30:56<24:28:50, 3.43it/s] 19%|█▊ | 69364/371472 [5:30:57<23:44:28, 3.53it/s] 19%|█▊ | 69365/371472 [5:30:57<24:34:51, 3.41it/s] 19%|█▊ | 69366/371472 [5:30:57<23:51:46, 3.52it/s] 19%|█▊ | 69367/371472 [5:30:57<23:33:45, 3.56it/s] 19%|█▊ | 69368/371472 [5:30:58<23:21:32, 3.59it/s] 19%|█▊ | 69369/371472 [5:30:58<22:41:22, 3.70it/s] 19%|█▊ | 69370/371472 [5:30:58<23:58:20, 3.50it/s] 19%|█▊ | 69371/371472 [5:30:59<24:40:45, 3.40it/s] 19%|█▊ | 69372/371472 [5:30:59<24:40:58, 3.40it/s] 19%|█▊ | 69373/371472 [5:30:59<24:44:49, 3.39it/s] 19%|█▊ | 69374/371472 [5:30:59<26:15:57, 3.19it/s] 19%|█▊ | 69375/371472 [5:31:00<25:34:07, 3.28it/s] 19%|█▊ | 69376/371472 [5:31:00<24:15:00, 3.46it/s] 19%|█▊ | 69377/371472 [5:31:00<23:40:36, 3.54it/s] 19%|█▊ | 69378/371472 [5:31:01<24:24:30, 3.44it/s] 19%|█▊ | 69379/371472 [5:31:01<24:12:51, 3.47it/s] 19%|█▊ | 69380/371472 [5:31:01<26:15:04, 3.20it/s] {'loss': 4.332, 'learning_rate': 8.323008468185049e-07, 'epoch': 2.99} + 19%|█▊ | 69380/371472 [5:31:01<26:15:04, 3.20it/s] 19%|█▊ | 69381/371472 [5:31:02<25:26:56, 3.30it/s] 19%|█▊ | 69382/371472 [5:31:02<27:08:15, 3.09it/s] 19%|█▊ | 69383/371472 [5:31:02<27:01:31, 3.10it/s] 19%|█▊ | 69384/371472 [5:31:02<25:17:37, 3.32it/s] 19%|█▊ | 69385/371472 [5:31:03<25:02:24, 3.35it/s] 19%|█▊ | 69386/371472 [5:31:03<23:59:46, 3.50it/s] 19%|█▊ | 69387/371472 [5:31:03<24:41:26, 3.40it/s] 19%|█▊ | 69388/371472 [5:31:04<25:52:52, 3.24it/s] 19%|█▊ | 69389/371472 [5:31:04<29:56:42, 2.80it/s] 19%|█▊ | 69390/371472 [5:31:04<28:24:35, 2.95it/s] 19%|█▊ | 69391/371472 [5:31:05<27:43:23, 3.03it/s] 19%|█▊ | 69392/371472 [5:31:05<25:46:27, 3.26it/s] 19%|█▊ | 69393/371472 [5:31:05<25:44:30, 3.26it/s] 19%|█▊ | 69394/371472 [5:31:06<24:13:22, 3.46it/s] 19%|█▊ | 69395/371472 [5:31:06<22:57:49, 3.65it/s] 19%|█▊ | 69396/371472 [5:31:06<22:57:46, 3.65it/s] 19%|█▊ | 69397/371472 [5:31:06<23:00:24, 3.65it/s] 19%|█▊ | 69398/371472 [5:31:07<23:37:33, 3.55it/s] 19%|█▊ | 69399/371472 [5:31:07<22:58:19, 3.65it/s] 19%|█▊ | 69400/371472 [5:31:07<23:02:43, 3.64it/s] {'loss': 3.9955, 'learning_rate': 8.322523648430261e-07, 'epoch': 2.99} + 19%|█▊ | 69400/371472 [5:31:07<23:02:43, 3.64it/s] 19%|█▊ | 69401/371472 [5:31:07<23:51:13, 3.52it/s] 19%|█▊ | 69402/371472 [5:31:08<23:39:32, 3.55it/s] 19%|█▊ | 69403/371472 [5:31:08<22:26:05, 3.74it/s] 19%|█▊ | 69404/371472 [5:31:08<22:04:29, 3.80it/s] 19%|█▊ | 69405/371472 [5:31:09<22:35:20, 3.71it/s] 19%|█▊ | 69406/371472 [5:31:09<22:03:52, 3.80it/s] 19%|█▊ | 69407/371472 [5:31:09<23:12:09, 3.62it/s] 19%|█▊ | 69408/371472 [5:31:09<23:19:19, 3.60it/s] 19%|█▊ | 69409/371472 [5:31:10<24:00:47, 3.49it/s] 19%|█▊ | 69410/371472 [5:31:10<25:47:27, 3.25it/s] 19%|█▊ | 69411/371472 [5:31:10<24:51:49, 3.37it/s] 19%|█▊ | 69412/371472 [5:31:11<24:39:40, 3.40it/s] 19%|█▊ | 69413/371472 [5:31:11<25:06:37, 3.34it/s] 19%|█▊ | 69414/371472 [5:31:11<24:24:28, 3.44it/s] 19%|█▊ | 69415/371472 [5:31:11<24:52:00, 3.37it/s] 19%|█▊ | 69416/371472 [5:31:12<23:47:59, 3.53it/s] 19%|█▊ | 69417/371472 [5:31:12<23:12:41, 3.61it/s] 19%|█▊ | 69418/371472 [5:31:12<23:30:43, 3.57it/s] 19%|█▊ | 69419/371472 [5:31:13<22:34:05, 3.72it/s] 19%|█▊ | 69420/371472 [5:31:13<22:49:04, 3.68it/s] {'loss': 4.2962, 'learning_rate': 8.322038828675472e-07, 'epoch': 2.99} + 19%|█▊ | 69420/371472 [5:31:13<22:49:04, 3.68it/s] 19%|█▊ | 69421/371472 [5:31:13<24:42:14, 3.40it/s] 19%|█▊ | 69422/371472 [5:31:13<24:42:26, 3.40it/s] 19%|█▊ | 69423/371472 [5:31:14<24:26:15, 3.43it/s] 19%|█▊ | 69424/371472 [5:31:14<23:14:35, 3.61it/s] 19%|█▊ | 69425/371472 [5:31:14<22:44:03, 3.69it/s] 19%|█▊ | 69426/371472 [5:31:14<21:46:19, 3.85it/s] 19%|█▊ | 69427/371472 [5:31:15<24:02:28, 3.49it/s] 19%|█▊ | 69428/371472 [5:31:15<22:58:09, 3.65it/s] 19%|█▊ | 69429/371472 [5:31:15<22:27:40, 3.74it/s] 19%|█▊ | 69430/371472 [5:31:16<22:41:04, 3.70it/s] 19%|█▊ | 69431/371472 [5:31:16<22:33:29, 3.72it/s] 19%|█▊ | 69432/371472 [5:31:16<22:04:08, 3.80it/s] 19%|█▊ | 69433/371472 [5:31:16<21:31:51, 3.90it/s] 19%|█▊ | 69434/371472 [5:31:17<21:38:53, 3.88it/s] 19%|█▊ | 69435/371472 [5:31:17<21:58:37, 3.82it/s] 19%|█▊ | 69436/371472 [5:31:17<21:45:31, 3.86it/s] 19%|█▊ | 69437/371472 [5:31:17<21:38:21, 3.88it/s] 19%|█▊ | 69438/371472 [5:31:18<22:28:25, 3.73it/s] 19%|█▊ | 69439/371472 [5:31:18<22:55:42, 3.66it/s] 19%|█▊ | 69440/371472 [5:31:18<24:25:06, 3.44it/s] {'loss': 4.0723, 'learning_rate': 8.321554008920684e-07, 'epoch': 2.99} + 19%|█▊ | 69440/371472 [5:31:18<24:25:06, 3.44it/s] 19%|█▊ | 69441/371472 [5:31:19<24:15:58, 3.46it/s] 19%|█▊ | 69442/371472 [5:31:19<24:17:11, 3.45it/s] 19%|█▊ | 69443/371472 [5:31:19<23:16:27, 3.60it/s] 19%|█▊ | 69444/371472 [5:31:19<23:11:43, 3.62it/s] 19%|█▊ | 69445/371472 [5:31:20<23:51:23, 3.52it/s] 19%|█▊ | 69446/371472 [5:31:20<24:12:05, 3.47it/s] 19%|█▊ | 69447/371472 [5:31:20<23:24:28, 3.58it/s] 19%|█▊ | 69448/371472 [5:31:20<22:22:54, 3.75it/s] 19%|█▊ | 69449/371472 [5:31:21<24:30:56, 3.42it/s] 19%|█▊ | 69450/371472 [5:31:21<23:31:41, 3.57it/s] 19%|█▊ | 69451/371472 [5:31:21<23:25:13, 3.58it/s] 19%|█▊ | 69452/371472 [5:31:22<23:08:16, 3.63it/s] 19%|█▊ | 69453/371472 [5:31:22<23:39:08, 3.55it/s] 19%|█▊ | 69454/371472 [5:31:22<24:57:35, 3.36it/s] 19%|█▊ | 69455/371472 [5:31:23<26:59:22, 3.11it/s] 19%|█▊ | 69456/371472 [5:31:23<26:49:46, 3.13it/s] 19%|█▊ | 69457/371472 [5:31:23<26:03:41, 3.22it/s] 19%|█▊ | 69458/371472 [5:31:23<24:46:14, 3.39it/s] 19%|█▊ | 69459/371472 [5:31:24<24:21:46, 3.44it/s] 19%|█▊ | 69460/371472 [5:31:24<23:58:59, 3.50it/s] {'loss': 3.9664, 'learning_rate': 8.321069189165894e-07, 'epoch': 2.99} + 19%|█▊ | 69460/371472 [5:31:24<23:58:59, 3.50it/s] 19%|█▊ | 69461/371472 [5:31:24<24:40:17, 3.40it/s] 19%|█▊ | 69462/371472 [5:31:25<24:02:49, 3.49it/s] 19%|█▊ | 69463/371472 [5:31:25<24:55:09, 3.37it/s] 19%|█▊ | 69464/371472 [5:31:25<23:56:47, 3.50it/s] 19%|█▊ | 69465/371472 [5:31:25<23:43:32, 3.54it/s] 19%|█▊ | 69466/371472 [5:31:26<23:55:56, 3.51it/s] 19%|█▊ | 69467/371472 [5:31:26<23:42:39, 3.54it/s] 19%|█▊ | 69468/371472 [5:31:26<24:11:07, 3.47it/s] 19%|█▊ | 69469/371472 [5:31:27<24:07:53, 3.48it/s] 19%|█▊ | 69470/371472 [5:31:27<23:33:46, 3.56it/s] 19%|█▊ | 69471/371472 [5:31:27<22:45:33, 3.69it/s] 19%|█▊ | 69472/371472 [5:31:27<23:34:03, 3.56it/s] 19%|█▊ | 69473/371472 [5:31:28<24:28:29, 3.43it/s] 19%|█▊ | 69474/371472 [5:31:28<24:16:01, 3.46it/s] 19%|█▊ | 69475/371472 [5:31:28<24:23:59, 3.44it/s] 19%|█▊ | 69476/371472 [5:31:29<23:46:38, 3.53it/s] 19%|█▊ | 69477/371472 [5:31:29<23:06:49, 3.63it/s] 19%|█▊ | 69478/371472 [5:31:29<24:13:37, 3.46it/s] 19%|█▊ | 69479/371472 [5:31:29<24:28:29, 3.43it/s] 19%|█▊ | 69480/371472 [5:31:30<23:29:00, 3.57it/s] {'loss': 4.2113, 'learning_rate': 8.320584369411105e-07, 'epoch': 2.99} + 19%|█▊ | 69480/371472 [5:31:30<23:29:00, 3.57it/s] 19%|█▊ | 69481/371472 [5:31:30<22:44:36, 3.69it/s] 19%|█▊ | 69482/371472 [5:31:30<22:26:10, 3.74it/s] 19%|█▊ | 69483/371472 [5:31:31<22:47:58, 3.68it/s] 19%|█▊ | 69484/371472 [5:31:31<22:21:26, 3.75it/s] 19%|█▊ | 69485/371472 [5:31:31<25:45:21, 3.26it/s] 19%|█▊ | 69486/371472 [5:31:31<24:05:50, 3.48it/s] 19%|█▊ | 69487/371472 [5:31:32<24:29:52, 3.42it/s] 19%|█▊ | 69488/371472 [5:31:32<24:59:44, 3.36it/s] 19%|█▊ | 69489/371472 [5:31:32<24:49:37, 3.38it/s] 19%|█▊ | 69490/371472 [5:31:33<25:23:46, 3.30it/s] 19%|█▊ | 69491/371472 [5:31:33<24:35:40, 3.41it/s] 19%|█▊ | 69492/371472 [5:31:33<25:17:50, 3.32it/s] 19%|█▊ | 69493/371472 [5:31:34<25:04:19, 3.35it/s] 19%|█▊ | 69494/371472 [5:31:34<34:52:43, 2.40it/s] 19%|█▊ | 69495/371472 [5:31:35<33:29:07, 2.51it/s] 19%|█▊ | 69496/371472 [5:31:35<30:09:34, 2.78it/s] 19%|█▊ | 69497/371472 [5:31:35<27:53:42, 3.01it/s] 19%|█▊ | 69498/371472 [5:31:35<28:24:49, 2.95it/s] 19%|█▊ | 69499/371472 [5:31:36<27:08:38, 3.09it/s] 19%|█▊ | 69500/371472 [5:31:36<25:41:10, 3.27it/s] {'loss': 3.9184, 'learning_rate': 8.320099549656316e-07, 'epoch': 2.99} + 19%|█▊ | 69500/371472 [5:31:36<25:41:10, 3.27it/s] 19%|█▊ | 69501/371472 [5:31:36<26:00:11, 3.23it/s] 19%|█▊ | 69502/371472 [5:31:37<24:32:31, 3.42it/s] 19%|█▊ | 69503/371472 [5:31:37<23:38:09, 3.55it/s] 19%|█▊ | 69504/371472 [5:31:37<23:47:41, 3.53it/s] 19%|█▊ | 69505/371472 [5:31:38<26:35:14, 3.15it/s] 19%|█▊ | 69506/371472 [5:31:38<26:19:30, 3.19it/s] 19%|█▊ | 69507/371472 [5:31:38<24:36:49, 3.41it/s] 19%|█▊ | 69508/371472 [5:31:38<25:02:49, 3.35it/s] 19%|█▊ | 69509/371472 [5:31:39<24:55:28, 3.37it/s] 19%|█▊ | 69510/371472 [5:31:39<25:00:18, 3.35it/s] 19%|█▊ | 69511/371472 [5:31:39<23:36:21, 3.55it/s] 19%|█▊ | 69512/371472 [5:31:40<22:52:38, 3.67it/s] 19%|█▊ | 69513/371472 [5:31:40<21:55:24, 3.83it/s] 19%|█▊ | 69514/371472 [5:31:40<23:20:28, 3.59it/s] 19%|█▊ | 69515/371472 [5:31:40<22:27:02, 3.74it/s] 19%|█▊ | 69516/371472 [5:31:41<22:43:29, 3.69it/s] 19%|█▊ | 69517/371472 [5:31:41<22:12:16, 3.78it/s] 19%|█▊ | 69518/371472 [5:31:41<21:38:51, 3.87it/s] 19%|█▊ | 69519/371472 [5:31:41<21:40:28, 3.87it/s] 19%|█▊ | 69520/371472 [5:31:42<22:36:08, 3.71it/s] {'loss': 4.489, 'learning_rate': 8.319614729901528e-07, 'epoch': 2.99} + 19%|█▊ | 69520/371472 [5:31:42<22:36:08, 3.71it/s] 19%|█▊ | 69521/371472 [5:31:42<22:41:34, 3.70it/s] 19%|█▊ | 69522/371472 [5:31:42<22:46:35, 3.68it/s] 19%|█▊ | 69523/371472 [5:31:42<22:51:51, 3.67it/s] 19%|█▊ | 69524/371472 [5:31:43<23:12:32, 3.61it/s] 19%|█▊ | 69525/371472 [5:31:43<22:55:27, 3.66it/s] 19%|█▊ | 69526/371472 [5:31:43<22:18:53, 3.76it/s] 19%|█▊ | 69527/371472 [5:31:44<22:32:11, 3.72it/s] 19%|█▊ | 69528/371472 [5:31:44<22:40:16, 3.70it/s] 19%|█▊ | 69529/371472 [5:31:44<22:51:01, 3.67it/s] 19%|█▊ | 69530/371472 [5:31:44<22:33:59, 3.72it/s] 19%|█▊ | 69531/371472 [5:31:45<22:24:08, 3.74it/s] 19%|█▊ | 69532/371472 [5:31:45<22:37:12, 3.71it/s] 19%|█▊ | 69533/371472 [5:31:45<23:04:22, 3.64it/s] 19%|█▊ | 69534/371472 [5:31:45<22:14:52, 3.77it/s] 19%|█▊ | 69535/371472 [5:31:46<24:29:32, 3.42it/s] 19%|█▊ | 69536/371472 [5:31:46<25:06:53, 3.34it/s] 19%|█▊ | 69537/371472 [5:31:46<23:53:10, 3.51it/s] 19%|█▊ | 69538/371472 [5:31:47<22:28:29, 3.73it/s] 19%|█▊ | 69539/371472 [5:31:47<22:26:16, 3.74it/s] 19%|█▊ | 69540/371472 [5:31:47<22:23:52, 3.74it/s] {'loss': 4.3433, 'learning_rate': 8.319129910146738e-07, 'epoch': 3.0} + 19%|█▊ | 69540/371472 [5:31:47<22:23:52, 3.74it/s] 19%|█▊ | 69541/371472 [5:31:47<22:56:28, 3.66it/s] 19%|█▊ | 69542/371472 [5:31:48<22:21:01, 3.75it/s] 19%|█▊ | 69543/371472 [5:31:48<22:06:41, 3.79it/s] 19%|█▊ | 69544/371472 [5:31:48<23:05:25, 3.63it/s] 19%|█▊ | 69545/371472 [5:31:48<23:38:44, 3.55it/s] 19%|█▊ | 69546/371472 [5:31:49<23:37:05, 3.55it/s] 19%|█▊ | 69547/371472 [5:31:49<30:38:03, 2.74it/s] 19%|█▊ | 69548/371472 [5:31:50<28:44:12, 2.92it/s] 19%|█▊ | 69549/371472 [5:31:50<26:49:27, 3.13it/s] 19%|█▊ | 69550/371472 [5:31:50<25:10:25, 3.33it/s] 19%|█��� | 69551/371472 [5:31:50<25:59:37, 3.23it/s] 19%|█▊ | 69552/371472 [5:31:51<25:11:03, 3.33it/s] 19%|█▊ | 69553/371472 [5:31:51<23:53:04, 3.51it/s] 19%|█▊ | 69554/371472 [5:31:51<23:59:48, 3.49it/s] 19%|█▊ | 69555/371472 [5:31:52<23:52:29, 3.51it/s] 19%|█▊ | 69556/371472 [5:31:52<24:02:14, 3.49it/s] 19%|█▊ | 69557/371472 [5:31:52<23:34:48, 3.56it/s] 19%|█▊ | 69558/371472 [5:31:52<24:53:11, 3.37it/s] 19%|█▊ | 69559/371472 [5:31:53<28:11:11, 2.98it/s] 19%|█▊ | 69560/371472 [5:31:53<26:41:59, 3.14it/s] {'loss': 4.1775, 'learning_rate': 8.31864509039195e-07, 'epoch': 3.0} + 19%|█▊ | 69560/371472 [5:31:53<26:41:59, 3.14it/s] 19%|█▊ | 69561/371472 [5:31:53<25:26:23, 3.30it/s] 19%|█▊ | 69562/371472 [5:31:54<24:43:35, 3.39it/s] 19%|█▊ | 69563/371472 [5:31:54<24:11:15, 3.47it/s] 19%|█▊ | 69564/371472 [5:31:54<25:05:41, 3.34it/s] 19%|█▊ | 69565/371472 [5:31:55<23:47:46, 3.52it/s] 19%|█▊ | 69566/371472 [5:31:55<25:17:46, 3.32it/s] 19%|█▊ | 69567/371472 [5:31:55<23:45:39, 3.53it/s] 19%|█▊ | 69568/371472 [5:31:55<23:30:38, 3.57it/s] 19%|█▊ | 69569/371472 [5:31:56<23:09:18, 3.62it/s] 19%|█▊ | 69570/371472 [5:31:56<22:25:32, 3.74it/s] 19%|█▊ | 69571/371472 [5:31:56<24:10:11, 3.47it/s] 19%|█▊ | 69572/371472 [5:31:57<23:11:48, 3.62it/s] 19%|█▊ | 69573/371472 [5:31:57<22:13:42, 3.77it/s] 19%|█▊ | 69574/371472 [5:31:57<22:34:42, 3.71it/s] 19%|█▊ | 69575/371472 [5:31:57<22:23:17, 3.75it/s] 19%|█▊ | 69576/371472 [5:31:58<22:00:20, 3.81it/s] 19%|█▊ | 69577/371472 [5:31:58<25:19:42, 3.31it/s] 19%|█▊ | 69578/371472 [5:31:58<26:36:33, 3.15it/s] 19%|█▊ | 69579/371472 [5:31:59<25:35:37, 3.28it/s] 19%|█▊ | 69580/371472 [5:31:59<24:59:23, 3.36it/s] {'loss': 4.1645, 'learning_rate': 8.318160270637161e-07, 'epoch': 3.0} + 19%|█▊ | 69580/371472 [5:31:59<24:59:23, 3.36it/s] 19%|█▊ | 69581/371472 [5:31:59<23:59:09, 3.50it/s] 19%|█▊ | 69582/371472 [5:31:59<23:53:02, 3.51it/s] 19%|█▊ | 69583/371472 [5:32:00<23:25:07, 3.58it/s] 19%|█▊ | 69584/371472 [5:32:00<22:55:02, 3.66it/s] 19%|█▊ | 69585/371472 [5:32:00<22:58:16, 3.65it/s] 19%|█▊ | 69586/371472 [5:32:00<23:05:54, 3.63it/s] 19%|█▊ | 69587/371472 [5:32:01<22:04:04, 3.80it/s] 19%|█▊ | 69588/371472 [5:32:01<22:17:33, 3.76it/s] 19%|█▊ | 69589/371472 [5:32:01<21:39:16, 3.87it/s] 19%|█▊ | 69590/371472 [5:32:02<22:44:28, 3.69it/s] 19%|█▊ | 69591/371472 [5:32:02<22:37:02, 3.71it/s] 19%|█▊ | 69592/371472 [5:32:02<22:01:59, 3.81it/s] 19%|█▊ | 69593/371472 [5:32:02<21:37:07, 3.88it/s] 19%|█▊ | 69594/371472 [5:32:03<21:11:05, 3.96it/s] 19%|█▊ | 69595/371472 [5:32:03<22:06:57, 3.79it/s] 19%|█▊ | 69596/371472 [5:32:03<22:40:30, 3.70it/s] 19%|█▊ | 69597/371472 [5:32:03<22:30:46, 3.72it/s] 19%|█▊ | 69598/371472 [5:32:04<22:05:29, 3.80it/s] 19%|█▊ | 69599/371472 [5:32:04<23:00:26, 3.64it/s] 19%|█▊ | 69600/371472 [5:32:04<22:51:30, 3.67it/s] {'loss': 4.0936, 'learning_rate': 8.317675450882371e-07, 'epoch': 3.0} + 19%|█▊ | 69600/371472 [5:32:04<22:51:30, 3.67it/s] 19%|█▊ | 69601/371472 [5:32:04<23:19:37, 3.59it/s] 19%|█▊ | 69602/371472 [5:32:05<22:28:17, 3.73it/s] 19%|█▊ | 69603/371472 [5:32:05<23:25:46, 3.58it/s] 19%|█▊ | 69604/371472 [5:32:05<22:58:31, 3.65it/s] 19%|█▊ | 69605/371472 [5:32:06<22:25:40, 3.74it/s] 19%|█▊ | 69606/371472 [5:32:06<23:11:05, 3.62it/s] 19%|█▊ | 69607/371472 [5:32:06<22:45:07, 3.69it/s] 19%|█▊ | 69608/371472 [5:32:06<23:11:33, 3.62it/s] 19%|█▊ | 69609/371472 [5:32:07<23:36:08, 3.55it/s] 19%|█▊ | 69610/371472 [5:32:07<23:07:03, 3.63it/s] 19%|█▊ | 69611/371472 [5:32:07<23:46:09, 3.53it/s] 19%|█▊ | 69612/371472 [5:32:07<22:50:47, 3.67it/s] 19%|█▊ | 69613/371472 [5:32:08<23:21:42, 3.59it/s] 19%|█▊ | 69614/371472 [5:32:08<22:46:02, 3.68it/s] 19%|█▊ | 69615/371472 [5:32:08<23:46:11, 3.53it/s] 19%|█▊ | 69616/371472 [5:32:09<22:55:49, 3.66it/s] 19%|█▊ | 69617/371472 [5:32:09<22:54:46, 3.66it/s] 19%|█▊ | 69618/371472 [5:32:09<24:11:36, 3.47it/s] 19%|█▊ | 69619/371472 [5:32:09<23:15:12, 3.61it/s] 19%|█▊ | 69620/371472 [5:32:10<22:29:45, 3.73it/s] {'loss': 4.243, 'learning_rate': 8.317190631127582e-07, 'epoch': 3.0} + 19%|█▊ | 69620/371472 [5:32:10<22:29:45, 3.73it/s] 19%|█▊ | 69621/371472 [5:32:10<22:14:46, 3.77it/s] 19%|█▊ | 69622/371472 [5:32:10<22:52:39, 3.67it/s] 19%|█▊ | 69623/371472 [5:32:10<22:39:18, 3.70it/s] 19%|█▊ | 69624/371472 [5:32:11<23:26:12, 3.58it/s] 19%|█▊ | 69625/371472 [5:32:11<24:42:41, 3.39it/s] 19%|█▊ | 69626/371472 [5:32:11<23:51:03, 3.52it/s] 19%|█▊ | 69627/371472 [5:32:12<23:26:36, 3.58it/s] 19%|█▊ | 69628/371472 [5:32:12<23:18:51, 3.60it/s] 19%|█▊ | 69629/371472 [5:32:12<23:48:10, 3.52it/s] 19%|█▊ | 69630/371472 [5:32:12<23:20:26, 3.59it/s] 19%|█▊ | 69631/371472 [5:32:13<23:16:34, 3.60it/s] 19%|█▊ | 69632/371472 [5:32:13<22:41:04, 3.70it/s] 19%|█▊ | 69633/371472 [5:32:13<22:22:39, 3.75it/s] 19%|█▊ | 69634/371472 [5:32:14<22:05:47, 3.79it/s] 19%|█▊ | 69635/371472 [5:32:14<22:30:46, 3.72it/s] 19%|█▊ | 69636/371472 [5:32:14<23:09:15, 3.62it/s] 19%|█▊ | 69637/371472 [5:32:14<23:24:09, 3.58it/s] 19%|█▊ | 69638/371472 [5:32:15<23:29:58, 3.57it/s] 19%|█▊ | 69639/371472 [5:32:15<23:09:35, 3.62it/s] 19%|█▊ | 69640/371472 [5:32:15<23:26:12, 3.58it/s] {'loss': 4.3322, 'learning_rate': 8.316705811372794e-07, 'epoch': 3.0} + 19%|█▊ | 69640/371472 [5:32:15<23:26:12, 3.58it/s] 19%|█▊ | 69641/371472 [5:32:16<23:58:04, 3.50it/s] 19%|█▊ | 69642/371472 [5:32:16<24:15:31, 3.46it/s] 19%|█▊ | 69643/371472 [5:32:16<23:35:58, 3.55it/s] 19%|█▊ | 69644/371472 [5:32:16<23:13:56, 3.61it/s] 19%|█▊ | 69645/371472 [5:32:17<23:41:02, 3.54it/s] 19%|█▊ | 69646/371472 [5:32:17<22:23:40, 3.74it/s] 19%|█▊ | 69647/371472 [5:32:17<22:01:15, 3.81it/s] 19%|█▊ | 69648/371472 [5:32:17<21:27:51, 3.91it/s] 19%|█▊ | 69649/371472 [5:32:18<22:20:27, 3.75it/s] 19%|█▊ | 69650/371472 [5:32:18<23:20:08, 3.59it/s] 19%|█▉ | 69651/371472 [5:32:18<24:42:08, 3.39it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 19%|█▉ | 69652/371472 [5:32:42<617:58:13, 7.37s/it] 19%|█▉ | 69653/371472 [5:32:43<442:04:09, 5.27s/it] 19%|█▉ | 69654/371472 [5:32:43<316:56:47, 3.78s/it] 19%|█▉ | 69655/371472 [5:32:43<229:25:25, 2.74s/it] 19%|█▉ | 69656/371472 [5:32:43<167:56:41, 2.00s/it] 19%|█▉ | 69657/371472 [5:32:44<128:01:01, 1.53s/it] 19%|█▉ | 69658/371472 [5:32:44<97:18:02, 1.16s/it] 19%|█▉ | 69659/371472 [5:32:44<75:43:35, 1.11it/s] 19%|█▉ | 69660/371472 [5:32:45<60:46:46, 1.38it/s] {'loss': 4.1925, 'learning_rate': 8.316220991618005e-07, 'epoch': 3.0} + 19%|█▉ | 69660/371472 [5:32:45<60:46:46, 1.38it/s] 19%|█▉ | 69661/371472 [5:32:45<50:39:16, 1.66it/s] 19%|█▉ | 69662/371472 [5:32:45<43:22:36, 1.93it/s] 19%|█▉ | 69663/371472 [5:32:46<37:42:02, 2.22it/s] 19%|█▉ | 69664/371472 [5:32:46<32:41:52, 2.56it/s] 19%|█▉ | 69665/371472 [5:32:46<31:42:58, 2.64it/s] 19%|█▉ | 69666/371472 [5:32:47<31:02:58, 2.70it/s] 19%|█▉ | 69667/371472 [5:32:47<29:43:58, 2.82it/s] 19%|█▉ | 69668/371472 [5:32:47<27:18:30, 3.07it/s] 19%|█▉ | 69669/371472 [5:32:48<25:29:28, 3.29it/s] 19%|█▉ | 69670/371472 [5:32:48<24:02:37, 3.49it/s] 19%|█▉ | 69671/371472 [5:32:48<25:13:16, 3.32it/s] 19%|█▉ | 69672/371472 [5:32:48<26:01:42, 3.22it/s] 19%|█▉ | 69673/371472 [5:32:49<25:53:59, 3.24it/s] 19%|█▉ | 69674/371472 [5:32:49<25:20:53, 3.31it/s] 19%|█▉ | 69675/371472 [5:32:49<24:38:32, 3.40it/s] 19%|█▉ | 69676/371472 [5:32:50<25:34:18, 3.28it/s] 19%|█▉ | 69677/371472 [5:32:50<25:14:00, 3.32it/s] 19%|█▉ | 69678/371472 [5:32:50<24:10:48, 3.47it/s] 19%|█▉ | 69679/371472 [5:32:50<24:25:25, 3.43it/s] 19%|█▉ | 69680/371472 [5:32:51<24:34:15, 3.41it/s] {'loss': 4.1922, 'learning_rate': 8.315736171863215e-07, 'epoch': 3.0} + 19%|█▉ | 69680/371472 [5:32:51<24:34:15, 3.41it/s] 19%|█▉ | 69681/371472 [5:32:51<26:39:11, 3.15it/s] 19%|█▉ | 69682/371472 [5:32:51<25:44:31, 3.26it/s] 19%|█▉ | 69683/371472 [5:32:52<25:13:01, 3.32it/s] 19%|█▉ | 69684/371472 [5:32:52<24:12:55, 3.46it/s] 19%|█▉ | 69685/371472 [5:32:52<24:00:11, 3.49it/s] 19%|█▉ | 69686/371472 [5:32:53<23:54:51, 3.51it/s] 19%|█▉ | 69687/371472 [5:32:53<23:08:06, 3.62it/s] 19%|█▉ | 69688/371472 [5:32:53<23:11:54, 3.61it/s] 19%|█▉ | 69689/371472 [5:32:53<23:18:51, 3.60it/s] 19%|█▉ | 69690/371472 [5:32:54<23:22:40, 3.59it/s] 19%|█▉ | 69691/371472 [5:32:54<23:12:29, 3.61it/s] 19%|█▉ | 69692/371472 [5:32:54<23:14:59, 3.61it/s] 19%|█▉ | 69693/371472 [5:32:54<22:27:33, 3.73it/s] 19%|█▉ | 69694/371472 [5:32:55<22:17:11, 3.76it/s] 19%|█▉ | 69695/371472 [5:32:55<22:03:23, 3.80it/s] 19%|█▉ | 69696/371472 [5:32:55<23:24:30, 3.58it/s] 19%|█▉ | 69697/371472 [5:32:56<22:40:36, 3.70it/s] 19%|█▉ | 69698/371472 [5:32:56<22:21:24, 3.75it/s] 19%|█▉ | 69699/371472 [5:32:56<23:07:36, 3.62it/s] 19%|█▉ | 69700/371472 [5:32:56<22:50:53, 3.67it/s] {'loss': 4.2554, 'learning_rate': 8.315251352108427e-07, 'epoch': 3.0} + 19%|█▉ | 69700/371472 [5:32:56<22:50:53, 3.67it/s] 19%|█▉ | 69701/371472 [5:32:57<23:14:08, 3.61it/s] 19%|█▉ | 69702/371472 [5:32:57<24:22:03, 3.44it/s] 19%|█▉ | 69703/371472 [5:32:57<24:13:18, 3.46it/s] 19%|█▉ | 69704/371472 [5:32:57<23:38:45, 3.54it/s] 19%|█▉ | 69705/371472 [5:32:58<23:21:56, 3.59it/s] 19%|█▉ | 69706/371472 [5:32:58<24:02:44, 3.49it/s] 19%|█▉ | 69707/371472 [5:32:58<24:56:19, 3.36it/s] 19%|█▉ | 69708/371472 [5:32:59<24:46:11, 3.38it/s] 19%|█▉ | 69709/371472 [5:32:59<24:17:38, 3.45it/s] 19%|█▉ | 69710/371472 [5:32:59<24:15:07, 3.46it/s] 19%|█▉ | 69711/371472 [5:33:00<24:01:40, 3.49it/s] 19%|█▉ | 69712/371472 [5:33:00<24:22:36, 3.44it/s] 19%|█▉ | 69713/371472 [5:33:00<24:10:46, 3.47it/s] 19%|█▉ | 69714/371472 [5:33:00<23:43:32, 3.53it/s] 19%|█▉ | 69715/371472 [5:33:01<23:07:14, 3.63it/s] 19%|█▉ | 69716/371472 [5:33:01<22:34:26, 3.71it/s] 19%|█▉ | 69717/371472 [5:33:01<23:18:46, 3.60it/s] 19%|█▉ | 69718/371472 [5:33:01<23:19:28, 3.59it/s] 19%|█▉ | 69719/371472 [5:33:02<23:16:58, 3.60it/s] 19%|█▉ | 69720/371472 [5:33:02<23:04:24, 3.63it/s] {'loss': 4.001, 'learning_rate': 8.314766532353639e-07, 'epoch': 3.0} + 19%|█▉ | 69720/371472 [5:33:02<23:04:24, 3.63it/s] 19%|█▉ | 69721/371472 [5:33:02<23:19:08, 3.59it/s] 19%|█▉ | 69722/371472 [5:33:03<23:09:53, 3.62it/s] 19%|█▉ | 69723/371472 [5:33:03<25:29:00, 3.29it/s] 19%|█▉ | 69724/371472 [5:33:03<24:24:38, 3.43it/s] 19%|█▉ | 69725/371472 [5:33:03<23:29:03, 3.57it/s] 19%|█▉ | 69726/371472 [5:33:04<25:00:28, 3.35it/s] 19%|█▉ | 69727/371472 [5:33:04<26:47:19, 3.13it/s] 19%|█▉ | 69728/371472 [5:33:04<26:35:53, 3.15it/s] 19%|█▉ | 69729/371472 [5:33:05<25:54:09, 3.24it/s] 19%|█▉ | 69730/371472 [5:33:05<24:38:01, 3.40it/s] 19%|█▉ | 69731/371472 [5:33:05<26:32:59, 3.16it/s] 19%|█▉ | 69732/371472 [5:33:06<28:05:42, 2.98it/s] 19%|█▉ | 69733/371472 [5:33:06<26:58:06, 3.11it/s] 19%|█▉ | 69734/371472 [5:33:06<28:04:36, 2.99it/s] 19%|█▉ | 69735/371472 [5:33:07<26:41:24, 3.14it/s] 19%|█▉ | 69736/371472 [5:33:07<25:32:43, 3.28it/s] 19%|█▉ | 69737/371472 [5:33:07<24:40:29, 3.40it/s] 19%|█▉ | 69738/371472 [5:33:08<23:53:17, 3.51it/s] 19%|█▉ | 69739/371472 [5:33:08<24:51:06, 3.37it/s] 19%|█▉ | 69740/371472 [5:33:08<23:35:13, 3.55it/s] {'loss': 4.1319, 'learning_rate': 8.314281712598849e-07, 'epoch': 3.0} + 19%|█▉ | 69740/371472 [5:33:08<23:35:13, 3.55it/s] 19%|█▉ | 69741/371472 [5:33:08<23:45:22, 3.53it/s] 19%|█▉ | 69742/371472 [5:33:09<23:47:42, 3.52it/s] 19%|█▉ | 69743/371472 [5:33:09<24:43:30, 3.39it/s] 19%|█▉ | 69744/371472 [5:33:09<24:11:22, 3.46it/s] 19%|█▉ | 69745/371472 [5:33:10<23:32:15, 3.56it/s] 19%|█▉ | 69746/371472 [5:33:10<23:27:58, 3.57it/s] 19%|█▉ | 69747/371472 [5:33:10<24:00:16, 3.49it/s] 19%|█▉ | 69748/371472 [5:33:10<23:00:28, 3.64it/s] 19%|█▉ | 69749/371472 [5:33:11<24:09:08, 3.47it/s] 19%|��▉ | 69750/371472 [5:33:11<23:42:47, 3.53it/s] 19%|█▉ | 69751/371472 [5:33:11<22:56:36, 3.65it/s] 19%|█▉ | 69752/371472 [5:33:12<24:59:14, 3.35it/s] 19%|█▉ | 69753/371472 [5:33:12<23:49:23, 3.52it/s] 19%|█▉ | 69754/371472 [5:33:12<24:10:01, 3.47it/s] 19%|█▉ | 69755/371472 [5:33:12<22:50:14, 3.67it/s] 19%|█▉ | 69756/371472 [5:33:13<23:30:16, 3.57it/s] 19%|█▉ | 69757/371472 [5:33:13<26:13:04, 3.20it/s] 19%|█▉ | 69758/371472 [5:33:13<25:45:25, 3.25it/s] 19%|█▉ | 69759/371472 [5:33:14<27:02:35, 3.10it/s] 19%|█▉ | 69760/371472 [5:33:14<25:15:27, 3.32it/s] {'loss': 4.1174, 'learning_rate': 8.313796892844059e-07, 'epoch': 3.0} + 19%|█▉ | 69760/371472 [5:33:14<25:15:27, 3.32it/s] 19%|█▉ | 69761/371472 [5:33:14<26:34:02, 3.15it/s] 19%|█▉ | 69762/371472 [5:33:15<25:39:08, 3.27it/s] 19%|█▉ | 69763/371472 [5:33:15<24:43:29, 3.39it/s] 19%|█▉ | 69764/371472 [5:33:15<23:48:47, 3.52it/s] 19%|█▉ | 69765/371472 [5:33:15<23:03:08, 3.64it/s] 19%|█▉ | 69766/371472 [5:33:16<23:07:28, 3.62it/s] 19%|█▉ | 69767/371472 [5:33:16<22:45:02, 3.68it/s] 19%|█▉ | 69768/371472 [5:33:16<21:47:26, 3.85it/s] 19%|█▉ | 69769/371472 [5:33:16<22:10:11, 3.78it/s] 19%|█▉ | 69770/371472 [5:33:17<21:53:50, 3.83it/s] 19%|█▉ | 69771/371472 [5:33:17<22:12:56, 3.77it/s] 19%|█▉ | 69772/371472 [5:33:17<22:45:35, 3.68it/s] 19%|█▉ | 69773/371472 [5:33:17<22:27:11, 3.73it/s] 19%|█▉ | 69774/371472 [5:33:18<23:00:58, 3.64it/s] 19%|█▉ | 69775/371472 [5:33:18<25:00:27, 3.35it/s] 19%|█▉ | 69776/371472 [5:33:18<24:57:52, 3.36it/s] 19%|█▉ | 69777/371472 [5:33:19<24:10:37, 3.47it/s] 19%|█▉ | 69778/371472 [5:33:19<25:19:09, 3.31it/s] 19%|█▉ | 69779/371472 [5:33:19<24:42:42, 3.39it/s] 19%|█▉ | 69780/371472 [5:33:20<23:23:00, 3.58it/s] {'loss': 4.0008, 'learning_rate': 8.313312073089271e-07, 'epoch': 3.01} + 19%|█▉ | 69780/371472 [5:33:20<23:23:00, 3.58it/s] 19%|█▉ | 69781/371472 [5:33:20<22:50:39, 3.67it/s] 19%|█▉ | 69782/371472 [5:33:20<23:10:51, 3.62it/s] 19%|█▉ | 69783/371472 [5:33:20<24:23:58, 3.43it/s] 19%|█▉ | 69784/371472 [5:33:21<25:32:45, 3.28it/s] 19%|█▉ | 69785/371472 [5:33:21<25:12:01, 3.33it/s] 19%|█▉ | 69786/371472 [5:33:21<24:31:45, 3.42it/s] 19%|█▉ | 69787/371472 [5:33:22<24:13:27, 3.46it/s] 19%|█▉ | 69788/371472 [5:33:22<23:28:27, 3.57it/s] 19%|█▉ | 69789/371472 [5:33:22<26:54:04, 3.12it/s] 19%|█▉ | 69790/371472 [5:33:23<25:35:50, 3.27it/s] 19%|█▉ | 69791/371472 [5:33:23<25:04:26, 3.34it/s] 19%|█▉ | 69792/371472 [5:33:23<25:42:26, 3.26it/s] 19%|█▉ | 69793/371472 [5:33:23<24:36:24, 3.41it/s] 19%|█▉ | 69794/371472 [5:33:24<27:33:55, 3.04it/s] 19%|█▉ | 69795/371472 [5:33:24<26:27:09, 3.17it/s] 19%|█▉ | 69796/371472 [5:33:24<26:22:02, 3.18it/s] 19%|█▉ | 69797/371472 [5:33:25<26:10:20, 3.20it/s] 19%|█▉ | 69798/371472 [5:33:25<25:04:54, 3.34it/s] 19%|█▉ | 69799/371472 [5:33:25<24:54:58, 3.36it/s] 19%|█▉ | 69800/371472 [5:33:26<24:37:34, 3.40it/s] {'loss': 4.1544, 'learning_rate': 8.312827253334482e-07, 'epoch': 3.01} + 19%|█▉ | 69800/371472 [5:33:26<24:37:34, 3.40it/s] 19%|█▉ | 69801/371472 [5:33:26<24:26:11, 3.43it/s] 19%|█▉ | 69802/371472 [5:33:26<23:49:57, 3.52it/s] 19%|█▉ | 69803/371472 [5:33:26<22:56:41, 3.65it/s] 19%|█▉ | 69804/371472 [5:33:27<22:43:32, 3.69it/s] 19%|█▉ | 69805/371472 [5:33:27<22:21:06, 3.75it/s] 19%|█▉ | 69806/371472 [5:33:27<23:47:39, 3.52it/s] 19%|█▉ | 69807/371472 [5:33:27<24:07:41, 3.47it/s] 19%|█▉ | 69808/371472 [5:33:28<22:44:23, 3.68it/s] 19%|█▉ | 69809/371472 [5:33:28<22:19:13, 3.75it/s] 19%|█▉ | 69810/371472 [5:33:28<22:37:49, 3.70it/s] 19%|█▉ | 69811/371472 [5:33:29<23:53:25, 3.51it/s] 19%|█▉ | 69812/371472 [5:33:29<24:47:53, 3.38it/s] 19%|█▉ | 69813/371472 [5:33:29<24:38:35, 3.40it/s] 19%|█▉ | 69814/371472 [5:33:29<23:12:05, 3.61it/s] 19%|█▉ | 69815/371472 [5:33:30<23:33:03, 3.56it/s] 19%|█▉ | 69816/371472 [5:33:30<23:20:01, 3.59it/s] 19%|█▉ | 69817/371472 [5:33:30<23:36:03, 3.55it/s] 19%|█▉ | 69818/371472 [5:33:31<23:39:15, 3.54it/s] 19%|█▉ | 69819/371472 [5:33:31<23:47:16, 3.52it/s] 19%|█▉ | 69820/371472 [5:33:31<24:51:51, 3.37it/s] {'loss': 4.3267, 'learning_rate': 8.312342433579694e-07, 'epoch': 3.01} + 19%|█▉ | 69820/371472 [5:33:31<24:51:51, 3.37it/s] 19%|█▉ | 69821/371472 [5:33:31<24:37:56, 3.40it/s] 19%|█▉ | 69822/371472 [5:33:32<23:29:52, 3.57it/s] 19%|█▉ | 69823/371472 [5:33:32<22:38:07, 3.70it/s] 19%|█▉ | 69824/371472 [5:33:32<23:04:10, 3.63it/s] 19%|█▉ | 69825/371472 [5:33:33<24:16:44, 3.45it/s] 19%|█▉ | 69826/371472 [5:33:33<23:47:56, 3.52it/s] 19%|█▉ | 69827/371472 [5:33:33<23:32:13, 3.56it/s] 19%|█▉ | 69828/371472 [5:33:33<24:04:02, 3.48it/s] 19%|█▉ | 69829/371472 [5:33:34<24:18:05, 3.45it/s] 19%|█▉ | 69830/371472 [5:33:34<24:31:18, 3.42it/s] 19%|█▉ | 69831/371472 [5:33:34<23:06:44, 3.63it/s] 19%|█▉ | 69832/371472 [5:33:35<26:08:59, 3.20it/s] 19%|█▉ | 69833/371472 [5:33:35<25:31:39, 3.28it/s] 19%|█▉ | 69834/371472 [5:33:35<25:28:22, 3.29it/s] 19%|█▉ | 69835/371472 [5:33:36<24:56:46, 3.36it/s] 19%|█▉ | 69836/371472 [5:33:36<23:39:53, 3.54it/s] 19%|█▉ | 69837/371472 [5:33:36<23:37:56, 3.55it/s] 19%|█▉ | 69838/371472 [5:33:36<23:44:37, 3.53it/s] 19%|█▉ | 69839/371472 [5:33:37<23:35:31, 3.55it/s] 19%|█▉ | 69840/371472 [5:33:37<23:08:21, 3.62it/s] {'loss': 4.0394, 'learning_rate': 8.311857613824904e-07, 'epoch': 3.01} + 19%|█▉ | 69840/371472 [5:33:37<23:08:21, 3.62it/s] 19%|█▉ | 69841/371472 [5:33:37<23:00:58, 3.64it/s] 19%|█▉ | 69842/371472 [5:33:37<24:16:00, 3.45it/s] 19%|█▉ | 69843/371472 [5:33:38<23:38:14, 3.54it/s] 19%|█▉ | 69844/371472 [5:33:38<23:42:30, 3.53it/s] 19%|█▉ | 69845/371472 [5:33:38<24:18:43, 3.45it/s] 19%|█▉ | 69846/371472 [5:33:39<23:39:25, 3.54it/s] 19%|█▉ | 69847/371472 [5:33:39<24:03:39, 3.48it/s] 19%|█▉ | 69848/371472 [5:33:39<23:19:15, 3.59it/s] 19%|█▉ | 69849/371472 [5:33:39<22:18:11, 3.76it/s] 19%|█▉ | 69850/371472 [5:33:40<23:18:12, 3.60it/s] 19%|█▉ | 69851/371472 [5:33:40<22:18:54, 3.75it/s] 19%|█▉ | 69852/371472 [5:33:40<23:47:27, 3.52it/s] 19%|█▉ | 69853/371472 [5:33:41<23:28:02, 3.57it/s] 19%|█▉ | 69854/371472 [5:33:41<22:35:32, 3.71it/s] 19%|█▉ | 69855/371472 [5:33:41<22:47:22, 3.68it/s] 19%|█▉ | 69856/371472 [5:33:41<22:02:47, 3.80it/s] 19%|█▉ | 69857/371472 [5:33:42<23:04:28, 3.63it/s] 19%|█▉ | 69858/371472 [5:33:42<23:57:13, 3.50it/s] 19%|█▉ | 69859/371472 [5:33:42<25:44:06, 3.26it/s] 19%|█▉ | 69860/371472 [5:33:43<24:06:37, 3.47it/s] {'loss': 4.3174, 'learning_rate': 8.311372794070115e-07, 'epoch': 3.01} + 19%|█▉ | 69860/371472 [5:33:43<24:06:37, 3.47it/s] 19%|█▉ | 69861/371472 [5:33:43<23:53:59, 3.51it/s] 19%|█▉ | 69862/371472 [5:33:43<23:10:58, 3.61it/s] 19%|█▉ | 69863/371472 [5:33:43<22:24:08, 3.74it/s] 19%|█▉ | 69864/371472 [5:33:44<22:15:16, 3.76it/s] 19%|█▉ | 69865/371472 [5:33:44<21:51:38, 3.83it/s] 19%|█▉ | 69866/371472 [5:33:44<21:54:58, 3.82it/s] 19%|█▉ | 69867/371472 [5:33:44<21:30:46, 3.89it/s] 19%|█▉ | 69868/371472 [5:33:45<23:32:01, 3.56it/s] 19%|█▉ | 69869/371472 [5:33:45<22:47:44, 3.68it/s] 19%|█▉ | 69870/371472 [5:33:45<23:34:51, 3.55it/s] 19%|█▉ | 69871/371472 [5:33:45<23:52:39, 3.51it/s] 19%|█▉ | 69872/371472 [5:33:46<23:05:23, 3.63it/s] 19%|█▉ | 69873/371472 [5:33:46<22:30:57, 3.72it/s] 19%|█▉ | 69874/371472 [5:33:46<24:47:37, 3.38it/s] 19%|█▉ | 69875/371472 [5:33:47<23:47:16, 3.52it/s] 19%|█▉ | 69876/371472 [5:33:47<24:58:18, 3.35it/s] 19%|█▉ | 69877/371472 [5:33:47<24:54:20, 3.36it/s] 19%|█▉ | 69878/371472 [5:33:48<24:54:41, 3.36it/s] \ No newline at end of file