diff --git "a/run-2024-07-14T08:43:36+00:00.log" "b/run-2024-07-14T08:43:36+00:00.log" --- "a/run-2024-07-14T08:43:36+00:00.log" +++ "b/run-2024-07-14T08:43:36+00:00.log" @@ -4709,4 +4709,1167 @@ Non-default generation parameters: {'max_length': 200, 'early_stopping': True, ' 25%|██▌ | 93040/371472 [7:24:12<21:42:48, 3.56it/s] 25%|██▌ | 93041/371472 [7:24:12<21:32:44, 3.59it/s] 25%|██▌ | 93042/371472 [7:24:12<21:20:26, 3.62it/s] 25%|██▌ | 93043/371472 [7:24:13<21:12:54, 3.65it/s] 25%|██▌ | 93044/371472 [7:24:13<20:32:29, 3.77it/s] 25%|██▌ | 93045/371472 [7:24:13<22:02:43, 3.51it/s] 25%|██▌ | 93046/371472 [7:24:14<21:08:03, 3.66it/s] 25%|██▌ | 93047/371472 [7:24:14<20:44:11, 3.73it/s] 25%|██▌ | 93048/371472 [7:24:14<20:10:26, 3.83it/s] 25%|██▌ | 93049/371472 [7:24:14<21:22:01, 3.62it/s] 25%|██▌ | 93050/371472 [7:24:15<21:07:58, 3.66it/s] 25%|██▌ | 93051/371472 [7:24:15<23:59:39, 3.22it/s] 25%|██▌ | 93052/371472 [7:24:15<22:58:55, 3.37it/s] 25%|██▌ | 93053/371472 [7:24:16<22:44:19, 3.40it/s] 25%|██▌ | 93054/371472 [7:24:16<21:52:38, 3.54it/s] 25%|██▌ | 93055/371472 [7:24:16<21:52:07, 3.54it/s] 25%|██▌ | 93056/371472 [7:24:16<21:35:31, 3.58it/s] 25%|██▌ | 93057/371472 [7:24:17<22:09:13, 3.49it/s] 25%|██▌ | 93058/371472 [7:24:17<21:30:43, 3.60it/s] 25%|██▌ | 93059/371472 [7:24:17<20:31:06, 3.77it/s] 25%|██▌ | 93060/371472 [7:24:17<20:28:17, 3.78it/s] {'loss': 3.6872, 'learning_rate': 7.748981878514942e-07, 'epoch': 4.01} 25%|██▌ | 93060/371472 [7:24:17<20:28:17, 3.78it/s] 25%|██▌ | 93061/371472 [7:24:18<20:48:50, 3.72it/s] 25%|██▌ | 93062/371472 [7:24:18<21:08:39, 3.66it/s] 25%|██▌ | 93063/371472 [7:24:18<21:55:28, 3.53it/s] 25%|██▌ | 93064/371472 [7:24:19<21:09:10, 3.66it/s] 25%|██▌ | 93065/371472 [7:24:19<20:10:31, 3.83it/s] 25%|██▌ | 93066/371472 [7:24:19<21:33:10, 3.59it/s] 25%|██▌ | 93067/371472 [7:24:19<22:38:05, 3.42it/s] 25%|██▌ | 93068/371472 [7:24:20<22:09:53, 3.49it/s] 25%|██▌ | 93069/371472 [7:24:20<21:24:28, 3.61it/s] 25%|██▌ | 93070/371472 [7:24:20<21:05:49, 3.67it/s] 25%|██▌ | 93071/371472 [7:24:21<21:13:35, 3.64it/s] 25%|██▌ | 93072/371472 [7:24:21<21:12:47, 3.65it/s] 25%|██▌ | 93073/371472 [7:24:21<21:49:25, 3.54it/s] 25%|██▌ | 93074/371472 [7:24:21<22:14:07, 3.48it/s] 25%|██▌ | 93075/371472 [7:24:22<21:40:33, 3.57it/s] 25%|██▌ | 93076/371472 [7:24:22<21:57:50, 3.52it/s] 25%|██▌ | 93077/371472 [7:24:22<21:25:02, 3.61it/s] 25%|██▌ | 93078/371472 [7:24:22<21:54:29, 3.53it/s] 25%|██▌ | 93079/371472 [7:24:23<22:51:28, 3.38it/s] 25%|██▌ | 93080/371472 [7:24:23<21:50:32, 3.54it/s] {'loss': 3.7737, 'learning_rate': 7.748497058760154e-07, 'epoch': 4.01} 25%|██▌ | 93080/371472 [7:24:23<21:50:32, 3.54it/s] 25%|██▌ | 93081/371472 [7:24:23<22:58:21, 3.37it/s] 25%|██▌ | 93082/371472 [7:24:24<22:00:53, 3.51it/s] 25%|██▌ | 93083/371472 [7:24:24<21:56:29, 3.52it/s] 25%|██▌ | 93084/371472 [7:24:24<24:09:13, 3.20it/s] 25%|██▌ | 93085/371472 [7:24:25<23:20:14, 3.31it/s] 25%|██▌ | 93086/371472 [7:24:25<23:21:21, 3.31it/s] 25%|██▌ | 93087/371472 [7:24:25<22:24:12, 3.45it/s] 25%|██▌ | 93088/371472 [7:24:25<22:10:16, 3.49it/s] 25%|██▌ | 93089/371472 [7:24:26<21:41:51, 3.56it/s] 25%|██▌ | 93090/371472 [7:24:26<21:28:22, 3.60it/s] 25%|██▌ | 93091/371472 [7:24:26<24:15:58, 3.19it/s] 25%|██▌ | 93092/371472 [7:24:27<23:01:16, 3.36it/s] 25%|██▌ | 93093/371472 [7:24:27<22:29:43, 3.44it/s] 25%|██▌ | 93094/371472 [7:24:27<23:03:26, 3.35it/s] 25%|██▌ | 93095/371472 [7:24:28<22:59:24, 3.36it/s] 25%|██▌ | 93096/371472 [7:24:28<22:23:12, 3.45it/s] 25%|██▌ | 93097/371472 [7:24:28<22:34:02, 3.43it/s] 25%|██▌ | 93098/371472 [7:24:28<22:09:26, 3.49it/s] 25%|██▌ | 93099/371472 [7:24:29<21:00:42, 3.68it/s] 25%|██▌ | 93100/371472 [7:24:29<21:26:26, 3.61it/s] {'loss': 3.6935, 'learning_rate': 7.748012239005366e-07, 'epoch': 4.01} - 25%|██▌ | 93100/371472 [7:24:29<21:26:26, 3.61it/s] 25%|██▌ | 93101/371472 [7:24:29<21:12:56, 3.64it/s] 25%|██▌ | 93102/371472 [7:24:29<21:38:45, 3.57it/s] 25%|██▌ | 93103/371472 [7:24:30<20:54:23, 3.70it/s] 25%|██▌ | 93104/371472 [7:24:30<21:05:59, 3.66it/s] \ No newline at end of file + 25%|██▌ | 93100/371472 [7:24:29<21:26:26, 3.61it/s] 25%|██▌ | 93101/371472 [7:24:29<21:12:56, 3.64it/s] 25%|██▌ | 93102/371472 [7:24:29<21:38:45, 3.57it/s] 25%|██▌ | 93103/371472 [7:24:30<20:54:23, 3.70it/s] 25%|██▌ | 93104/371472 [7:24:30<21:05:59, 3.66it/s] 25%|██▌ | 93105/371472 [7:24:30<22:32:53, 3.43it/s] 25%|██▌ | 93106/371472 [7:24:31<22:43:06, 3.40it/s] 25%|██▌ | 93107/371472 [7:24:31<24:04:33, 3.21it/s] 25%|██▌ | 93108/371472 [7:24:31<23:20:42, 3.31it/s] 25%|██▌ | 93109/371472 [7:24:32<23:52:09, 3.24it/s] 25%|██▌ | 93110/371472 [7:24:32<24:32:52, 3.15it/s] 25%|██▌ | 93111/371472 [7:24:32<23:16:58, 3.32it/s] 25%|██▌ | 93112/371472 [7:24:32<23:06:16, 3.35it/s] 25%|██▌ | 93113/371472 [7:24:33<21:45:42, 3.55it/s] 25%|██▌ | 93114/371472 [7:24:33<21:44:38, 3.56it/s] 25%|██▌ | 93115/371472 [7:24:33<20:55:09, 3.70it/s] 25%|██▌ | 93116/371472 [7:24:34<24:10:14, 3.20it/s] 25%|██▌ | 93117/371472 [7:24:34<25:08:23, 3.08it/s] 25%|██▌ | 93118/371472 [7:24:34<26:25:33, 2.93it/s] 25%|██▌ | 93119/371472 [7:24:35<24:47:17, 3.12it/s] 25%|██▌ | 93120/371472 [7:24:35<23:39:51, 3.27it/s] {'loss': 3.7127, 'learning_rate': 7.747527419250577e-07, 'epoch': 4.01} + 25%|██▌ | 93120/371472 [7:24:35<23:39:51, 3.27it/s] 25%|██▌ | 93121/371472 [7:24:35<22:40:30, 3.41it/s] 25%|██▌ | 93122/371472 [7:24:36<23:36:56, 3.27it/s] 25%|██▌ | 93123/371472 [7:24:36<23:27:06, 3.30it/s] 25%|██▌ | 93124/371472 [7:24:36<22:05:30, 3.50it/s] 25%|██▌ | 93125/371472 [7:24:36<22:22:24, 3.46it/s] 25%|██▌ | 93126/371472 [7:24:37<22:26:54, 3.44it/s] 25%|██▌ | 93127/371472 [7:24:37<22:05:10, 3.50it/s] 25%|██▌ | 93128/371472 [7:24:37<22:05:28, 3.50it/s] 25%|██▌ | 93129/371472 [7:24:38<22:59:47, 3.36it/s] 25%|██▌ | 93130/371472 [7:24:38<23:26:19, 3.30it/s] 25%|██▌ | 93131/371472 [7:24:38<23:51:33, 3.24it/s] 25%|██▌ | 93132/371472 [7:24:38<24:11:24, 3.20it/s] 25%|██▌ | 93133/371472 [7:24:39<24:21:35, 3.17it/s] 25%|██▌ | 93134/371472 [7:24:39<23:26:20, 3.30it/s] 25%|██▌ | 93135/371472 [7:24:39<22:11:45, 3.48it/s] 25%|██▌ | 93136/371472 [7:24:40<21:16:07, 3.64it/s] 25%|██▌ | 93137/371472 [7:24:40<21:46:50, 3.55it/s] 25%|██▌ | 93138/371472 [7:24:40<22:52:55, 3.38it/s] 25%|██▌ | 93139/371472 [7:24:40<22:30:20, 3.44it/s] 25%|██▌ | 93140/371472 [7:24:41<22:20:43, 3.46it/s] {'loss': 3.7855, 'learning_rate': 7.747042599495787e-07, 'epoch': 4.01} + 25%|██▌ | 93140/371472 [7:24:41<22:20:43, 3.46it/s] 25%|██▌ | 93141/371472 [7:24:41<23:07:37, 3.34it/s] 25%|██▌ | 93142/371472 [7:24:41<23:55:00, 3.23it/s] 25%|██▌ | 93143/371472 [7:24:42<22:47:40, 3.39it/s] 25%|██▌ | 93144/371472 [7:24:42<21:51:14, 3.54it/s] 25%|██▌ | 93145/371472 [7:24:42<21:12:00, 3.65it/s] 25%|██▌ | 93146/371472 [7:24:42<20:51:05, 3.71it/s] 25%|██▌ | 93147/371472 [7:24:43<21:16:40, 3.63it/s] 25%|██▌ | 93148/371472 [7:24:43<23:05:33, 3.35it/s] 25%|██▌ | 93149/371472 [7:24:43<22:43:59, 3.40it/s] 25%|██▌ | 93150/371472 [7:24:44<22:33:22, 3.43it/s] 25%|██▌ | 93151/371472 [7:24:44<21:44:41, 3.56it/s] 25%|██▌ | 93152/371472 [7:24:44<23:14:41, 3.33it/s] 25%|██▌ | 93153/371472 [7:24:45<22:14:35, 3.48it/s] 25%|██▌ | 93154/371472 [7:24:45<21:45:39, 3.55it/s] 25%|██▌ | 93155/371472 [7:24:45<22:19:26, 3.46it/s] 25%|██▌ | 93156/371472 [7:24:45<21:42:21, 3.56it/s] 25%|██▌ | 93157/371472 [7:24:46<20:54:20, 3.70it/s] 25%|██▌ | 93158/371472 [7:24:46<21:47:22, 3.55it/s] 25%|██▌ | 93159/371472 [7:24:46<21:54:10, 3.53it/s] 25%|██▌ | 93160/371472 [7:24:46<20:55:55, 3.69it/s] {'loss': 3.6971, 'learning_rate': 7.746557779740997e-07, 'epoch': 4.01} + 25%|██▌ | 93160/371472 [7:24:46<20:55:55, 3.69it/s] 25%|██▌ | 93161/371472 [7:24:47<21:21:46, 3.62it/s] 25%|██▌ | 93162/371472 [7:24:47<21:20:29, 3.62it/s] 25%|██▌ | 93163/371472 [7:24:47<21:07:36, 3.66it/s] 25%|██▌ | 93164/371472 [7:24:48<21:57:43, 3.52it/s] 25%|██▌ | 93165/371472 [7:24:48<21:43:10, 3.56it/s] 25%|██▌ | 93166/371472 [7:24:48<21:53:35, 3.53it/s] 25%|██▌ | 93167/371472 [7:24:48<22:58:18, 3.37it/s] 25%|██▌ | 93168/371472 [7:24:49<22:38:17, 3.41it/s] 25%|██▌ | 93169/371472 [7:24:49<22:23:16, 3.45it/s] 25%|██▌ | 93170/371472 [7:24:49<22:15:11, 3.47it/s] 25%|██▌ | 93171/371472 [7:24:50<22:04:15, 3.50it/s] 25%|██▌ | 93172/371472 [7:24:50<21:16:05, 3.63it/s] 25%|██▌ | 93173/371472 [7:24:50<20:52:03, 3.70it/s] 25%|██▌ | 93174/371472 [7:24:50<21:13:48, 3.64it/s] 25%|██▌ | 93175/371472 [7:24:51<21:16:09, 3.63it/s] 25%|██▌ | 93176/371472 [7:24:51<21:32:56, 3.59it/s] 25%|██▌ | 93177/371472 [7:24:51<21:22:06, 3.62it/s] 25%|██▌ | 93178/371472 [7:24:52<21:01:21, 3.68it/s] 25%|██▌ | 93179/371472 [7:24:52<21:05:28, 3.67it/s] 25%|██▌ | 93180/371472 [7:24:52<20:25:54, 3.78it/s] {'loss': 3.6356, 'learning_rate': 7.746072959986209e-07, 'epoch': 4.01} + 25%|██▌ | 93180/371472 [7:24:52<20:25:54, 3.78it/s] 25%|██▌ | 93181/371472 [7:24:52<20:29:05, 3.77it/s] 25%|██▌ | 93182/371472 [7:24:53<19:44:38, 3.92it/s] 25%|██▌ | 93183/371472 [7:24:53<20:51:18, 3.71it/s] 25%|██▌ | 93184/371472 [7:24:53<20:42:28, 3.73it/s] 25%|██▌ | 93185/371472 [7:24:53<20:21:50, 3.80it/s] 25%|██▌ | 93186/371472 [7:24:54<21:34:21, 3.58it/s] 25%|██▌ | 93187/371472 [7:24:54<20:36:48, 3.75it/s] 25%|██▌ | 93188/371472 [7:24:54<22:17:50, 3.47it/s] 25%|██▌ | 93189/371472 [7:24:55<21:50:53, 3.54it/s] 25%|██▌ | 93190/371472 [7:24:55<20:55:55, 3.69it/s] 25%|██▌ | 93191/371472 [7:24:55<20:21:05, 3.80it/s] 25%|██▌ | 93192/371472 [7:24:55<20:29:42, 3.77it/s] 25%|██▌ | 93193/371472 [7:24:56<20:56:12, 3.69it/s] 25%|██▌ | 93194/371472 [7:24:56<20:34:26, 3.76it/s] 25%|██▌ | 93195/371472 [7:24:56<20:26:13, 3.78it/s] 25%|██▌ | 93196/371472 [7:24:56<20:29:01, 3.77it/s] 25%|██▌ | 93197/371472 [7:24:57<21:50:19, 3.54it/s] 25%|██▌ | 93198/371472 [7:24:57<21:22:12, 3.62it/s] 25%|██▌ | 93199/371472 [7:24:57<20:57:41, 3.69it/s] 25%|██▌ | 93200/371472 [7:24:57<21:04:44, 3.67it/s] {'loss': 3.6845, 'learning_rate': 7.74558814023142e-07, 'epoch': 4.01} + 25%|██▌ | 93200/371472 [7:24:57<21:04:44, 3.67it/s] 25%|██▌ | 93201/371472 [7:24:58<21:21:13, 3.62it/s] 25%|██▌ | 93202/371472 [7:24:58<21:57:14, 3.52it/s] 25%|██▌ | 93203/371472 [7:24:58<22:36:38, 3.42it/s] 25%|██▌ | 93204/371472 [7:24:59<23:05:18, 3.35it/s] 25%|██▌ | 93205/371472 [7:24:59<22:16:47, 3.47it/s] 25%|██▌ | 93206/371472 [7:24:59<22:04:12, 3.50it/s] 25%|██▌ | 93207/371472 [7:24:59<21:39:57, 3.57it/s] 25%|██▌ | 93208/371472 [7:25:00<22:03:59, 3.50it/s] 25%|██▌ | 93209/371472 [7:25:00<21:42:55, 3.56it/s] 25%|██▌ | 93210/371472 [7:25:00<23:18:11, 3.32it/s] 25%|██▌ | 93211/371472 [7:25:01<23:20:27, 3.31it/s] 25%|██▌ | 93212/371472 [7:25:01<23:54:32, 3.23it/s] 25%|██▌ | 93213/371472 [7:25:01<23:10:31, 3.34it/s] 25%|██▌ | 93214/371472 [7:25:02<22:18:39, 3.46it/s] 25%|██▌ | 93215/371472 [7:25:02<22:35:39, 3.42it/s] 25%|██▌ | 93216/371472 [7:25:02<22:12:08, 3.48it/s] 25%|██▌ | 93217/371472 [7:25:02<21:41:06, 3.56it/s] 25%|██▌ | 93218/371472 [7:25:03<21:11:29, 3.65it/s] 25%|██▌ | 93219/371472 [7:25:03<21:30:09, 3.59it/s] 25%|██▌ | 93220/371472 [7:25:03<21:38:29, 3.57it/s] {'loss': 3.727, 'learning_rate': 7.745103320476631e-07, 'epoch': 4.02} + 25%|██▌ | 93220/371472 [7:25:03<21:38:29, 3.57it/s] 25%|██▌ | 93221/371472 [7:25:04<22:06:17, 3.50it/s] 25%|██▌ | 93222/371472 [7:25:04<22:55:31, 3.37it/s] 25%|██▌ | 93223/371472 [7:25:04<22:39:32, 3.41it/s] 25%|██▌ | 93224/371472 [7:25:04<22:15:37, 3.47it/s] 25%|██▌ | 93225/371472 [7:25:05<22:02:04, 3.51it/s] 25%|██▌ | 93226/371472 [7:25:05<21:30:45, 3.59it/s] 25%|██▌ | 93227/371472 [7:25:05<20:52:12, 3.70it/s] 25%|██▌ | 93228/371472 [7:25:05<21:24:05, 3.61it/s] 25%|██▌ | 93229/371472 [7:25:06<21:59:47, 3.51it/s] 25%|██▌ | 93230/371472 [7:25:06<22:22:49, 3.45it/s] 25%|██▌ | 93231/371472 [7:25:06<22:07:19, 3.49it/s] 25%|██▌ | 93232/371472 [7:25:07<22:15:27, 3.47it/s] 25%|██▌ | 93233/371472 [7:25:07<22:43:01, 3.40it/s] 25%|██▌ | 93234/371472 [7:25:07<23:45:16, 3.25it/s] 25%|██▌ | 93235/371472 [7:25:08<23:10:58, 3.33it/s] 25%|██▌ | 93236/371472 [7:25:08<22:15:55, 3.47it/s] 25%|██▌ | 93237/371472 [7:25:08<22:23:58, 3.45it/s] 25%|██▌ | 93238/371472 [7:25:08<22:24:13, 3.45it/s] 25%|██▌ | 93239/371472 [7:25:09<22:34:38, 3.42it/s] 25%|██▌ | 93240/371472 [7:25:09<23:33:01, 3.28it/s] {'loss': 3.6132, 'learning_rate': 7.744618500721843e-07, 'epoch': 4.02} + 25%|██▌ | 93240/371472 [7:25:09<23:33:01, 3.28it/s] 25%|██▌ | 93241/371472 [7:25:09<23:28:18, 3.29it/s] 25%|██▌ | 93242/371472 [7:25:10<25:34:26, 3.02it/s] 25%|██▌ | 93243/371472 [7:25:10<26:46:04, 2.89it/s] 25%|██▌ | 93244/371472 [7:25:10<26:14:37, 2.94it/s] 25%|██▌ | 93245/371472 [7:25:11<26:27:00, 2.92it/s] 25%|██▌ | 93246/371472 [7:25:11<25:41:45, 3.01it/s] 25%|██▌ | 93247/371472 [7:25:12<26:46:13, 2.89it/s] 25%|██▌ | 93248/371472 [7:25:12<28:07:37, 2.75it/s] 25%|██▌ | 93249/371472 [7:25:12<25:35:35, 3.02it/s] 25%|██▌ | 93250/371472 [7:25:12<24:26:22, 3.16it/s] 25%|██▌ | 93251/371472 [7:25:13<23:34:21, 3.28it/s] 25%|██▌ | 93252/371472 [7:25:13<23:01:14, 3.36it/s] 25%|██▌ | 93253/371472 [7:25:13<22:36:35, 3.42it/s] 25%|██▌ | 93254/371472 [7:25:14<23:38:52, 3.27it/s] 25%|██▌ | 93255/371472 [7:25:14<22:57:59, 3.37it/s] 25%|██▌ | 93256/371472 [7:25:14<23:28:27, 3.29it/s] 25%|██▌ | 93257/371472 [7:25:15<23:25:47, 3.30it/s] 25%|██▌ | 93258/371472 [7:25:15<23:13:57, 3.33it/s] 25%|██▌ | 93259/371472 [7:25:15<23:02:18, 3.35it/s] 25%|██▌ | 93260/371472 [7:25:15<22:15:27, 3.47it/s] {'loss': 3.6475, 'learning_rate': 7.744133680967054e-07, 'epoch': 4.02} + 25%|██▌ | 93260/371472 [7:25:15<22:15:27, 3.47it/s] 25%|██▌ | 93261/371472 [7:25:16<21:53:03, 3.53it/s] 25%|██▌ | 93262/371472 [7:25:16<20:52:06, 3.70it/s] 25%|██▌ | 93263/371472 [7:25:16<22:53:17, 3.38it/s] 25%|██▌ | 93264/371472 [7:25:17<23:23:32, 3.30it/s] 25%|██▌ | 93265/371472 [7:25:17<23:47:57, 3.25it/s] 25%|██▌ | 93266/371472 [7:25:17<22:56:45, 3.37it/s] 25%|██▌ | 93267/371472 [7:25:17<22:46:59, 3.39it/s] 25%|██▌ | 93268/371472 [7:25:18<22:45:58, 3.39it/s] 25%|██▌ | 93269/371472 [7:25:18<23:46:02, 3.25it/s] 25%|██▌ | 93270/371472 [7:25:18<23:29:36, 3.29it/s] 25%|██▌ | 93271/371472 [7:25:19<23:37:54, 3.27it/s] 25%|██▌ | 93272/371472 [7:25:19<22:36:35, 3.42it/s] 25%|██▌ | 93273/371472 [7:25:19<22:56:02, 3.37it/s] 25%|██▌ | 93274/371472 [7:25:20<22:12:24, 3.48it/s] 25%|██▌ | 93275/371472 [7:25:20<21:39:59, 3.57it/s] 25%|██▌ | 93276/371472 [7:25:20<21:24:16, 3.61it/s] 25%|██▌ | 93277/371472 [7:25:20<24:16:46, 3.18it/s] 25%|██▌ | 93278/371472 [7:25:21<23:31:56, 3.28it/s] 25%|██▌ | 93279/371472 [7:25:21<23:15:25, 3.32it/s] 25%|██▌ | 93280/371472 [7:25:21<23:25:09, 3.30it/s] {'loss': 3.7494, 'learning_rate': 7.743648861212263e-07, 'epoch': 4.02} + 25%|██▌ | 93280/371472 [7:25:21<23:25:09, 3.30it/s] 25%|██▌ | 93281/371472 [7:25:22<22:44:52, 3.40it/s] 25%|██▌ | 93282/371472 [7:25:22<22:21:22, 3.46it/s] 25%|██▌ | 93283/371472 [7:25:22<21:44:06, 3.56it/s] 25%|██▌ | 93284/371472 [7:25:22<21:10:02, 3.65it/s] 25%|██▌ | 93285/371472 [7:25:23<21:38:23, 3.57it/s] 25%|██▌ | 93286/371472 [7:25:23<22:14:05, 3.48it/s] 25%|██▌ | 93287/371472 [7:25:23<22:00:22, 3.51it/s] 25%|██▌ | 93288/371472 [7:25:24<22:49:49, 3.38it/s] 25%|██▌ | 93289/371472 [7:25:24<22:47:16, 3.39it/s] 25%|██▌ | 93290/371472 [7:25:24<21:36:01, 3.58it/s] 25%|██▌ | 93291/371472 [7:25:24<22:05:40, 3.50it/s] 25%|██▌ | 93292/371472 [7:25:25<21:36:31, 3.58it/s] 25%|██▌ | 93293/371472 [7:25:25<23:53:03, 3.24it/s] 25%|██▌ | 93294/371472 [7:25:25<23:38:30, 3.27it/s] 25%|██▌ | 93295/371472 [7:25:26<22:51:53, 3.38it/s] 25%|██▌ | 93296/371472 [7:25:26<22:03:38, 3.50it/s] 25%|██▌ | 93297/371472 [7:25:26<21:55:29, 3.52it/s] 25%|██▌ | 93298/371472 [7:25:27<22:25:29, 3.45it/s] 25%|██▌ | 93299/371472 [7:25:27<22:40:14, 3.41it/s] 25%|██▌ | 93300/371472 [7:25:27<23:48:47, 3.24it/s] {'loss': 3.8054, 'learning_rate': 7.743164041457475e-07, 'epoch': 4.02} + 25%|██▌ | 93300/371472 [7:25:27<23:48:47, 3.24it/s] 25%|██▌ | 93301/371472 [7:25:27<23:42:32, 3.26it/s] 25%|██▌ | 93302/371472 [7:25:28<23:00:17, 3.36it/s] 25%|██▌ | 93303/371472 [7:25:28<23:51:47, 3.24it/s] 25%|██▌ | 93304/371472 [7:25:28<25:53:32, 2.98it/s] 25%|██▌ | 93305/371472 [7:25:29<24:03:39, 3.21it/s] 25%|██▌ | 93306/371472 [7:25:29<23:24:59, 3.30it/s] 25%|██▌ | 93307/371472 [7:25:29<23:14:23, 3.32it/s] 25%|██▌ | 93308/371472 [7:25:30<22:25:26, 3.45it/s] 25%|██▌ | 93309/371472 [7:25:30<22:08:04, 3.49it/s] 25%|██▌ | 93310/371472 [7:25:30<23:26:07, 3.30it/s] 25%|██▌ | 93311/371472 [7:25:30<23:43:51, 3.26it/s] 25%|██▌ | 93312/371472 [7:25:31<23:18:43, 3.31it/s] 25%|██▌ | 93313/371472 [7:25:31<22:38:40, 3.41it/s] 25%|██▌ | 93314/371472 [7:25:31<23:06:03, 3.34it/s] 25%|██▌ | 93315/371472 [7:25:32<22:37:15, 3.42it/s] 25%|██▌ | 93316/371472 [7:25:32<22:50:50, 3.38it/s] 25%|██▌ | 93317/371472 [7:25:32<22:44:27, 3.40it/s] 25%|██▌ | 93318/371472 [7:25:33<23:04:14, 3.35it/s] 25%|██▌ | 93319/371472 [7:25:33<23:06:29, 3.34it/s] 25%|██▌ | 93320/371472 [7:25:33<22:41:20, 3.41it/s] {'loss': 3.6377, 'learning_rate': 7.742679221702687e-07, 'epoch': 4.02} + 25%|██▌ | 93320/371472 [7:25:33<22:41:20, 3.41it/s] 25%|██▌ | 93321/371472 [7:25:33<22:42:33, 3.40it/s] 25%|██▌ | 93322/371472 [7:25:34<22:48:49, 3.39it/s] 25%|██▌ | 93323/371472 [7:25:34<22:38:40, 3.41it/s] 25%|██▌ | 93324/371472 [7:25:34<22:04:30, 3.50it/s] 25%|██▌ | 93325/371472 [7:25:35<21:28:20, 3.60it/s] 25%|██▌ | 93326/371472 [7:25:35<22:39:58, 3.41it/s] 25%|██▌ | 93327/371472 [7:25:35<22:24:52, 3.45it/s] 25%|██▌ | 93328/371472 [7:25:35<21:45:07, 3.55it/s] 25%|██▌ | 93329/371472 [7:25:36<21:51:35, 3.53it/s] 25%|██▌ | 93330/371472 [7:25:36<21:50:58, 3.54it/s] 25%|██▌ | 93331/371472 [7:25:36<21:50:20, 3.54it/s] 25%|██▌ | 93332/371472 [7:25:37<21:51:38, 3.53it/s] 25%|██▌ | 93333/371472 [7:25:37<21:35:41, 3.58it/s] 25%|██▌ | 93334/371472 [7:25:37<21:18:29, 3.63it/s] 25%|██▌ | 93335/371472 [7:25:37<22:37:03, 3.42it/s] 25%|██▌ | 93336/371472 [7:25:38<22:18:51, 3.46it/s] 25%|██▌ | 93337/371472 [7:25:38<21:14:15, 3.64it/s] 25%|██▌ | 93338/371472 [7:25:38<20:58:56, 3.68it/s] 25%|██▌ | 93339/371472 [7:25:39<21:52:48, 3.53it/s] 25%|██▌ | 93340/371472 [7:25:39<23:16:10, 3.32it/s] {'loss': 3.5, 'learning_rate': 7.742194401947898e-07, 'epoch': 4.02} + 25%|██▌ | 93340/371472 [7:25:39<23:16:10, 3.32it/s] 25%|██▌ | 93341/371472 [7:25:39<22:48:31, 3.39it/s] 25%|██▌ | 93342/371472 [7:25:39<24:00:52, 3.22it/s] 25%|██▌ | 93343/371472 [7:25:40<24:01:29, 3.22it/s] 25%|██▌ | 93344/371472 [7:25:40<23:26:30, 3.30it/s] 25%|██▌ | 93345/371472 [7:25:40<23:49:36, 3.24it/s] 25%|██▌ | 93346/371472 [7:25:41<23:34:45, 3.28it/s] 25%|██▌ | 93347/371472 [7:25:41<23:00:45, 3.36it/s] 25%|██▌ | 93348/371472 [7:25:41<22:56:21, 3.37it/s] 25%|██▌ | 93349/371472 [7:25:42<22:44:29, 3.40it/s] 25%|██▌ | 93350/371472 [7:25:42<24:36:56, 3.14it/s] 25%|██▌ | 93351/371472 [7:25:42<23:33:48, 3.28it/s] 25%|██▌ | 93352/371472 [7:25:43<23:24:49, 3.30it/s] 25%|██▌ | 93353/371472 [7:25:43<22:03:52, 3.50it/s] 25%|██▌ | 93354/371472 [7:25:43<22:58:54, 3.36it/s] 25%|██▌ | 93355/371472 [7:25:43<22:00:22, 3.51it/s] 25%|██▌ | 93356/371472 [7:25:44<21:02:36, 3.67it/s] 25%|██▌ | 93357/371472 [7:25:44<21:22:06, 3.62it/s] 25%|██▌ | 93358/371472 [7:25:44<20:34:12, 3.76it/s] 25%|██▌ | 93359/371472 [7:25:44<20:49:34, 3.71it/s] 25%|██▌ | 93360/371472 [7:25:45<21:07:36, 3.66it/s] {'loss': 3.8877, 'learning_rate': 7.741709582193108e-07, 'epoch': 4.02} + 25%|██▌ | 93360/371472 [7:25:45<21:07:36, 3.66it/s] 25%|██▌ | 93361/371472 [7:25:45<21:43:12, 3.56it/s] 25%|██▌ | 93362/371472 [7:25:45<20:50:32, 3.71it/s] 25%|██▌ | 93363/371472 [7:25:46<21:18:13, 3.63it/s] 25%|██▌ | 93364/371472 [7:25:46<21:56:15, 3.52it/s] 25%|██▌ | 93365/371472 [7:25:46<21:51:07, 3.54it/s] 25%|██▌ | 93366/371472 [7:25:46<22:51:41, 3.38it/s] 25%|██▌ | 93367/371472 [7:25:47<21:50:04, 3.54it/s] 25%|██▌ | 93368/371472 [7:25:47<21:54:21, 3.53it/s] 25%|██▌ | 93369/371472 [7:25:47<21:54:07, 3.53it/s] 25%|██▌ | 93370/371472 [7:25:48<22:08:45, 3.49it/s] 25%|██▌ | 93371/371472 [7:25:48<21:36:38, 3.57it/s] 25%|██▌ | 93372/371472 [7:25:48<21:27:38, 3.60it/s] 25%|██▌ | 93373/371472 [7:25:48<21:13:41, 3.64it/s] 25%|██▌ | 93374/371472 [7:25:49<22:11:01, 3.48it/s] 25%|██▌ | 93375/371472 [7:25:49<23:55:35, 3.23it/s] 25%|██▌ | 93376/371472 [7:25:49<23:07:34, 3.34it/s] 25%|██▌ | 93377/371472 [7:25:50<22:01:07, 3.51it/s] 25%|██▌ | 93378/371472 [7:25:50<21:01:50, 3.67it/s] 25%|██▌ | 93379/371472 [7:25:50<22:01:52, 3.51it/s] 25%|██▌ | 93380/371472 [7:25:50<21:59:54, 3.51it/s] {'loss': 3.8689, 'learning_rate': 7.74122476243832e-07, 'epoch': 4.02} + 25%|██▌ | 93380/371472 [7:25:50<21:59:54, 3.51it/s] 25%|██▌ | 93381/371472 [7:25:51<23:07:35, 3.34it/s] 25%|██▌ | 93382/371472 [7:25:51<23:02:37, 3.35it/s] 25%|██▌ | 93383/371472 [7:25:51<22:59:22, 3.36it/s] 25%|██▌ | 93384/371472 [7:25:52<23:09:28, 3.34it/s] 25%|██▌ | 93385/371472 [7:25:52<22:08:19, 3.49it/s] 25%|██▌ | 93386/371472 [7:25:52<22:29:46, 3.43it/s] 25%|██▌ | 93387/371472 [7:25:52<23:18:04, 3.32it/s] 25%|██▌ | 93388/371472 [7:25:53<22:37:22, 3.41it/s] 25%|██▌ | 93389/371472 [7:25:53<21:36:56, 3.57it/s] 25%|██▌ | 93390/371472 [7:25:53<22:20:02, 3.46it/s] 25%|██▌ | 93391/371472 [7:25:54<21:35:40, 3.58it/s] 25%|██▌ | 93392/371472 [7:25:54<21:20:56, 3.62it/s] 25%|██▌ | 93393/371472 [7:25:54<21:48:16, 3.54it/s] 25%|██▌ | 93394/371472 [7:25:54<21:57:44, 3.52it/s] 25%|██▌ | 93395/371472 [7:25:55<21:09:23, 3.65it/s] 25%|██▌ | 93396/371472 [7:25:55<23:23:16, 3.30it/s] 25%|██▌ | 93397/371472 [7:25:55<22:58:13, 3.36it/s] 25%|██▌ | 93398/371472 [7:25:56<26:41:42, 2.89it/s] 25%|██▌ | 93399/371472 [7:25:56<26:08:08, 2.96it/s] 25%|██▌ | 93400/371472 [7:25:56<26:09:12, 2.95it/s] {'loss': 3.785, 'learning_rate': 7.740739942683531e-07, 'epoch': 4.02} + 25%|██▌ | 93400/371472 [7:25:56<26:09:12, 2.95it/s] 25%|██▌ | 93401/371472 [7:25:57<25:17:11, 3.05it/s] 25%|██▌ | 93402/371472 [7:25:57<24:30:10, 3.15it/s] 25%|██▌ | 93403/371472 [7:25:57<24:33:31, 3.15it/s] 25%|██▌ | 93404/371472 [7:25:58<23:33:45, 3.28it/s] 25%|██▌ | 93405/371472 [7:25:58<23:51:55, 3.24it/s] 25%|██▌ | 93406/371472 [7:25:58<23:14:21, 3.32it/s] 25%|██▌ | 93407/371472 [7:25:59<22:27:36, 3.44it/s] 25%|██▌ | 93408/371472 [7:25:59<22:28:03, 3.44it/s] 25%|██▌ | 93409/371472 [7:25:59<23:15:26, 3.32it/s] 25%|██▌ | 93410/371472 [7:25:59<22:04:09, 3.50it/s] 25%|██▌ | 93411/371472 [7:26:00<22:44:53, 3.40it/s] 25%|██▌ | 93412/371472 [7:26:00<22:15:37, 3.47it/s] 25%|██▌ | 93413/371472 [7:26:00<21:27:41, 3.60it/s] 25%|██▌ | 93414/371472 [7:26:01<23:07:08, 3.34it/s] 25%|██▌ | 93415/371472 [7:26:01<23:46:48, 3.25it/s] 25%|██▌ | 93416/371472 [7:26:01<23:24:02, 3.30it/s] 25%|██▌ | 93417/371472 [7:26:01<22:48:28, 3.39it/s] 25%|██▌ | 93418/371472 [7:26:02<22:31:52, 3.43it/s] 25%|██▌ | 93419/371472 [7:26:02<21:18:25, 3.62it/s] 25%|██▌ | 93420/371472 [7:26:02<22:25:53, 3.44it/s] {'loss': 3.8665, 'learning_rate': 7.740255122928742e-07, 'epoch': 4.02} + 25%|██▌ | 93420/371472 [7:26:02<22:25:53, 3.44it/s] 25%|██▌ | 93421/371472 [7:26:03<23:37:41, 3.27it/s] 25%|██▌ | 93422/371472 [7:26:03<22:13:24, 3.48it/s] 25%|██▌ | 93423/371472 [7:26:03<23:10:12, 3.33it/s] 25%|██▌ | 93424/371472 [7:26:04<22:51:51, 3.38it/s] 25%|██▌ | 93425/371472 [7:26:04<22:32:34, 3.43it/s] 25%|██▌ | 93426/371472 [7:26:04<21:50:56, 3.53it/s] 25%|██▌ | 93427/371472 [7:26:04<23:28:51, 3.29it/s] 25%|██▌ | 93428/371472 [7:26:05<24:57:55, 3.09it/s] 25%|██▌ | 93429/371472 [7:26:05<24:04:57, 3.21it/s] 25%|██▌ | 93430/371472 [7:26:05<24:37:27, 3.14it/s] 25%|██▌ | 93431/371472 [7:26:06<24:03:09, 3.21it/s] 25%|██▌ | 93432/371472 [7:26:06<23:45:03, 3.25it/s] 25%|██▌ | 93433/371472 [7:26:06<22:34:54, 3.42it/s] 25%|██▌ | 93434/371472 [7:26:07<21:48:04, 3.54it/s] 25%|██▌ | 93435/371472 [7:26:07<21:24:04, 3.61it/s] 25%|██▌ | 93436/371472 [7:26:07<21:27:20, 3.60it/s] 25%|██▌ | 93437/371472 [7:26:07<22:00:01, 3.51it/s] 25%|██▌ | 93438/371472 [7:26:08<22:10:40, 3.48it/s] 25%|██▌ | 93439/371472 [7:26:08<21:39:45, 3.57it/s] 25%|██▌ | 93440/371472 [7:26:08<22:51:42, 3.38it/s] {'loss': 3.7676, 'learning_rate': 7.739770303173952e-07, 'epoch': 4.02} + 25%|██▌ | 93440/371472 [7:26:08<22:51:42, 3.38it/s] 25%|██▌ | 93441/371472 [7:26:09<22:11:26, 3.48it/s] 25%|██▌ | 93442/371472 [7:26:09<24:04:36, 3.21it/s] 25%|██▌ | 93443/371472 [7:26:09<23:43:33, 3.26it/s] 25%|██▌ | 93444/371472 [7:26:09<23:29:22, 3.29it/s] 25%|██▌ | 93445/371472 [7:26:10<23:40:03, 3.26it/s] 25%|██▌ | 93446/371472 [7:26:10<22:44:51, 3.40it/s] 25%|██▌ | 93447/371472 [7:26:10<24:02:18, 3.21it/s] 25%|██▌ | 93448/371472 [7:26:11<23:19:11, 3.31it/s] 25%|██▌ | 93449/371472 [7:26:11<23:17:58, 3.31it/s] 25%|██▌ | 93450/371472 [7:26:11<21:39:48, 3.56it/s] 25%|██▌ | 93451/371472 [7:26:12<22:32:57, 3.42it/s] 25%|██▌ | 93452/371472 [7:26:12<22:21:20, 3.45it/s] 25%|██▌ | 93453/371472 [7:26:12<21:58:53, 3.51it/s] 25%|██▌ | 93454/371472 [7:26:12<22:46:26, 3.39it/s] 25%|██▌ | 93455/371472 [7:26:13<22:45:04, 3.39it/s] 25%|██▌ | 93456/371472 [7:26:13<25:39:33, 3.01it/s] 25%|██▌ | 93457/371472 [7:26:14<26:39:26, 2.90it/s] 25%|██▌ | 93458/371472 [7:26:14<25:49:00, 2.99it/s] 25%|██▌ | 93459/371472 [7:26:14<25:35:28, 3.02it/s] 25%|██▌ | 93460/371472 [7:26:14<25:33:08, 3.02it/s] {'loss': 3.7239, 'learning_rate': 7.739285483419164e-07, 'epoch': 4.03} + 25%|██▌ | 93460/371472 [7:26:14<25:33:08, 3.02it/s] 25%|██▌ | 93461/371472 [7:26:15<23:43:26, 3.26it/s] 25%|██▌ | 93462/371472 [7:26:15<23:02:29, 3.35it/s] 25%|██▌ | 93463/371472 [7:26:15<22:24:40, 3.45it/s] 25%|██▌ | 93464/371472 [7:26:16<22:11:14, 3.48it/s] 25%|██▌ | 93465/371472 [7:26:16<21:36:40, 3.57it/s] 25%|██▌ | 93466/371472 [7:26:16<22:13:52, 3.47it/s] 25%|██▌ | 93467/371472 [7:26:16<22:32:17, 3.43it/s] 25%|██▌ | 93468/371472 [7:26:17<23:08:52, 3.34it/s] 25%|██▌ | 93469/371472 [7:26:17<24:30:26, 3.15it/s] 25%|██▌ | 93470/371472 [7:26:17<24:26:30, 3.16it/s] 25%|██▌ | 93471/371472 [7:26:18<25:20:15, 3.05it/s] 25%|██▌ | 93472/371472 [7:26:18<27:12:16, 2.84it/s] 25%|██▌ | 93473/371472 [7:26:19<27:14:00, 2.84it/s] 25%|██▌ | 93474/371472 [7:26:19<26:09:40, 2.95it/s] 25%|██▌ | 93475/371472 [7:26:19<25:29:16, 3.03it/s] 25%|██▌ | 93476/371472 [7:26:19<25:43:47, 3.00it/s] 25%|██▌ | 93477/371472 [7:26:20<24:06:09, 3.20it/s] 25%|██▌ | 93478/371472 [7:26:20<23:35:02, 3.27it/s] 25%|██▌ | 93479/371472 [7:26:20<23:39:19, 3.26it/s] 25%|██▌ | 93480/371472 [7:26:21<23:36:33, 3.27it/s] {'loss': 3.6412, 'learning_rate': 7.738800663664376e-07, 'epoch': 4.03} + 25%|██▌ | 93480/371472 [7:26:21<23:36:33, 3.27it/s] 25%|██▌ | 93481/371472 [7:26:21<22:51:32, 3.38it/s] 25%|██▌ | 93482/371472 [7:26:21<22:53:42, 3.37it/s] 25%|██▌ | 93483/371472 [7:26:22<24:40:52, 3.13it/s] 25%|██▌ | 93484/371472 [7:26:22<22:57:01, 3.36it/s] 25%|██▌ | 93485/371472 [7:26:22<22:47:07, 3.39it/s] 25%|██▌ | 93486/371472 [7:26:22<24:10:17, 3.19it/s] 25%|██▌ | 93487/371472 [7:26:23<24:22:46, 3.17it/s] 25%|██▌ | 93488/371472 [7:26:23<23:20:02, 3.31it/s] 25%|██▌ | 93489/371472 [7:26:23<22:51:23, 3.38it/s] 25%|██▌ | 93490/371472 [7:26:24<22:40:59, 3.40it/s] 25%|██▌ | 93491/371472 [7:26:24<25:12:04, 3.06it/s] 25%|██▌ | 93492/371472 [7:26:24<24:41:27, 3.13it/s] 25%|██▌ | 93493/371472 [7:26:25<25:33:06, 3.02it/s] 25%|██▌ | 93494/371472 [7:26:25<24:32:13, 3.15it/s] 25%|██▌ | 93495/371472 [7:26:25<22:51:46, 3.38it/s] 25%|██▌ | 93496/371472 [7:26:26<24:24:05, 3.16it/s] 25%|██▌ | 93497/371472 [7:26:26<23:50:00, 3.24it/s] 25%|██▌ | 93498/371472 [7:26:26<22:47:21, 3.39it/s] 25%|██▌ | 93499/371472 [7:26:27<25:54:42, 2.98it/s] 25%|██▌ | 93500/371472 [7:26:27<25:29:58, 3.03it/s] {'loss': 3.5538, 'learning_rate': 7.738315843909587e-07, 'epoch': 4.03} + 25%|██▌ | 93500/371472 [7:26:27<25:29:58, 3.03it/s] 25%|██▌ | 93501/371472 [7:26:27<23:54:39, 3.23it/s] 25%|██▌ | 93502/371472 [7:26:27<23:08:48, 3.34it/s] 25%|██▌ | 93503/371472 [7:26:28<22:31:40, 3.43it/s] 25%|██▌ | 93504/371472 [7:26:28<21:48:33, 3.54it/s] 25%|██▌ | 93505/371472 [7:26:28<21:56:50, 3.52it/s] 25%|██▌ | 93506/371472 [7:26:29<23:30:35, 3.28it/s] 25%|██▌ | 93507/371472 [7:26:29<25:31:12, 3.03it/s] 25%|██▌ | 93508/371472 [7:26:29<24:32:42, 3.15it/s] 25%|██▌ | 93509/371472 [7:26:30<24:18:23, 3.18it/s] 25%|██▌ | 93510/371472 [7:26:30<23:13:07, 3.33it/s] 25%|██▌ | 93511/371472 [7:26:30<22:43:06, 3.40it/s] 25%|██▌ | 93512/371472 [7:26:30<21:49:43, 3.54it/s] 25%|██▌ | 93513/371472 [7:26:31<21:13:49, 3.64it/s] 25%|██▌ | 93514/371472 [7:26:31<21:44:42, 3.55it/s] 25%|██▌ | 93515/371472 [7:26:31<22:25:12, 3.44it/s] 25%|██▌ | 93516/371472 [7:26:32<21:32:24, 3.58it/s] 25%|██▌ | 93517/371472 [7:26:32<22:21:06, 3.45it/s] 25%|██▌ | 93518/371472 [7:26:32<22:29:52, 3.43it/s] 25%|██▌ | 93519/371472 [7:26:32<22:18:21, 3.46it/s] 25%|██▌ | 93520/371472 [7:26:33<22:18:44, 3.46it/s] {'loss': 3.8761, 'learning_rate': 7.737831024154797e-07, 'epoch': 4.03} + 25%|██▌ | 93520/371472 [7:26:33<22:18:44, 3.46it/s] 25%|██▌ | 93521/371472 [7:26:33<21:35:56, 3.57it/s] 25%|██▌ | 93522/371472 [7:26:33<21:15:35, 3.63it/s] 25%|██▌ | 93523/371472 [7:26:33<20:37:53, 3.74it/s] 25%|██▌ | 93524/371472 [7:26:34<20:19:11, 3.80it/s] 25%|██▌ | 93525/371472 [7:26:34<19:54:46, 3.88it/s] 25%|██▌ | 93526/371472 [7:26:34<20:07:12, 3.84it/s] 25%|██▌ | 93527/371472 [7:26:35<19:52:59, 3.88it/s] 25%|██▌ | 93528/371472 [7:26:35<21:26:54, 3.60it/s] 25%|██▌ | 93529/371472 [7:26:35<22:27:21, 3.44it/s] 25%|██▌ | 93530/371472 [7:26:35<21:34:53, 3.58it/s] 25%|██▌ | 93531/371472 [7:26:36<21:01:20, 3.67it/s] 25%|██▌ | 93532/371472 [7:26:36<22:04:21, 3.50it/s] 25%|██▌ | 93533/371472 [7:26:36<22:29:12, 3.43it/s] 25%|██▌ | 93534/371472 [7:26:37<22:24:37, 3.45it/s] 25%|██▌ | 93535/371472 [7:26:37<22:28:59, 3.43it/s] 25%|██▌ | 93536/371472 [7:26:37<21:10:57, 3.64it/s] 25%|██▌ | 93537/371472 [7:26:37<20:50:36, 3.70it/s] 25%|██▌ | 93538/371472 [7:26:38<20:27:37, 3.77it/s] 25%|██▌ | 93539/371472 [7:26:38<20:10:48, 3.83it/s] 25%|██▌ | 93540/371472 [7:26:38<21:47:10, 3.54it/s] {'loss': 3.7814, 'learning_rate': 7.737346204400008e-07, 'epoch': 4.03} + 25%|██▌ | 93540/371472 [7:26:38<21:47:10, 3.54it/s] 25%|██▌ | 93541/371472 [7:26:38<21:07:13, 3.66it/s] 25%|██▌ | 93542/371472 [7:26:39<21:13:08, 3.64it/s] 25%|██▌ | 93543/371472 [7:26:39<21:02:50, 3.67it/s] 25%|██▌ | 93544/371472 [7:26:39<22:49:06, 3.38it/s] 25%|██▌ | 93545/371472 [7:26:40<21:34:56, 3.58it/s] 25%|██▌ | 93546/371472 [7:26:40<21:08:46, 3.65it/s] 25%|██▌ | 93547/371472 [7:26:40<20:28:36, 3.77it/s] 25%|██▌ | 93548/371472 [7:26:40<21:10:56, 3.64it/s] 25%|██▌ | 93549/371472 [7:26:41<20:51:18, 3.70it/s] 25%|██▌ | 93550/371472 [7:26:41<20:29:41, 3.77it/s] 25%|██▌ | 93551/371472 [7:26:41<20:14:56, 3.81it/s] 25%|██▌ | 93552/371472 [7:26:41<20:06:27, 3.84it/s] 25%|██▌ | 93553/371472 [7:26:42<19:45:03, 3.91it/s] 25%|██▌ | 93554/371472 [7:26:42<22:24:40, 3.44it/s] 25%|██▌ | 93555/371472 [7:26:42<22:08:14, 3.49it/s] 25%|██▌ | 93556/371472 [7:26:43<20:50:51, 3.70it/s] 25%|██▌ | 93557/371472 [7:26:43<21:12:32, 3.64it/s] 25%|██▌ | 93558/371472 [7:26:43<23:18:21, 3.31it/s] 25%|██▌ | 93559/371472 [7:26:43<22:10:55, 3.48it/s] 25%|██▌ | 93560/371472 [7:26:44<21:26:02, 3.60it/s] {'loss': 3.696, 'learning_rate': 7.73686138464522e-07, 'epoch': 4.03} + 25%|██▌ | 93560/371472 [7:26:44<21:26:02, 3.60it/s] 25%|██▌ | 93561/371472 [7:26:44<22:01:09, 3.51it/s] 25%|██▌ | 93562/371472 [7:26:44<21:22:31, 3.61it/s] 25%|██▌ | 93563/371472 [7:26:45<21:52:14, 3.53it/s] 25%|██▌ | 93564/371472 [7:26:45<21:45:55, 3.55it/s] 25%|██▌ | 93565/371472 [7:26:45<21:28:05, 3.60it/s] 25%|██▌ | 93566/371472 [7:26:45<21:15:14, 3.63it/s] 25%|██▌ | 93567/371472 [7:26:46<21:58:18, 3.51it/s] 25%|██▌ | 93568/371472 [7:26:46<22:41:26, 3.40it/s] 25%|██▌ | 93569/371472 [7:26:46<21:45:44, 3.55it/s] 25%|██▌ | 93570/371472 [7:26:46<21:01:03, 3.67it/s] 25%|██▌ | 93571/371472 [7:26:47<20:44:06, 3.72it/s] 25%|██▌ | 93572/371472 [7:26:47<21:53:09, 3.53it/s] 25%|██▌ | 93573/371472 [7:26:47<21:25:54, 3.60it/s] 25%|██▌ | 93574/371472 [7:26:48<22:44:49, 3.39it/s] 25%|██▌ | 93575/371472 [7:26:48<21:26:03, 3.60it/s] 25%|██▌ | 93576/371472 [7:26:48<24:13:00, 3.19it/s] 25%|██▌ | 93577/371472 [7:26:49<22:58:23, 3.36it/s] 25%|██▌ | 93578/371472 [7:26:49<24:38:07, 3.13it/s] 25%|██▌ | 93579/371472 [7:26:49<22:52:14, 3.38it/s] 25%|██▌ | 93580/371472 [7:26:49<22:22:15, 3.45it/s] {'loss': 3.5002, 'learning_rate': 7.73637656489043e-07, 'epoch': 4.03} + 25%|██▌ | 93580/371472 [7:26:49<22:22:15, 3.45it/s] 25%|██▌ | 93581/371472 [7:26:50<21:25:27, 3.60it/s] 25%|██▌ | 93582/371472 [7:26:50<20:27:19, 3.77it/s] 25%|██▌ | 93583/371472 [7:26:50<19:59:31, 3.86it/s] 25%|██▌ | 93584/371472 [7:26:50<20:08:53, 3.83it/s] 25%|██▌ | 93585/371472 [7:26:51<19:56:01, 3.87it/s] 25%|██▌ | 93586/371472 [7:26:51<20:49:03, 3.71it/s] 25%|██▌ | 93587/371472 [7:26:51<20:20:28, 3.79it/s] 25%|██▌ | 93588/371472 [7:26:52<24:02:19, 3.21it/s] 25%|██▌ | 93589/371472 [7:26:52<24:40:47, 3.13it/s] 25%|██▌ | 93590/371472 [7:26:52<22:49:54, 3.38it/s] 25%|██▌ | 93591/371472 [7:26:53<23:35:22, 3.27it/s] 25%|██▌ | 93592/371472 [7:26:53<22:32:09, 3.43it/s] 25%|██▌ | 93593/371472 [7:26:53<24:01:17, 3.21it/s] 25%|██▌ | 93594/371472 [7:26:53<22:56:13, 3.37it/s] 25%|██▌ | 93595/371472 [7:26:54<21:56:16, 3.52it/s] 25%|██▌ | 93596/371472 [7:26:54<21:37:54, 3.57it/s] 25%|██▌ | 93597/371472 [7:26:54<21:16:19, 3.63it/s] 25%|██▌ | 93598/371472 [7:26:54<20:23:54, 3.78it/s] 25%|██▌ | 93599/371472 [7:26:55<20:04:04, 3.85it/s] 25%|██▌ | 93600/371472 [7:26:55<19:42:32, 3.92it/s] {'loss': 3.7821, 'learning_rate': 7.735891745135641e-07, 'epoch': 4.03} + 25%|██▌ | 93600/371472 [7:26:55<19:42:32, 3.92it/s] 25%|██▌ | 93601/371472 [7:26:55<19:41:18, 3.92it/s] 25%|██▌ | 93602/371472 [7:26:56<20:15:35, 3.81it/s] 25%|██▌ | 93603/371472 [7:26:56<19:46:09, 3.90it/s] 25%|██▌ | 93604/371472 [7:26:56<21:13:05, 3.64it/s] 25%|██▌ | 93605/371472 [7:26:56<22:00:20, 3.51it/s] 25%|██▌ | 93606/371472 [7:26:57<22:05:08, 3.49it/s] 25%|██▌ | 93607/371472 [7:26:57<22:29:17, 3.43it/s] 25%|██▌ | 93608/371472 [7:26:57<22:50:50, 3.38it/s] 25%|██▌ | 93609/371472 [7:26:58<21:39:32, 3.56it/s] 25%|██▌ | 93610/371472 [7:26:58<21:51:53, 3.53it/s] 25%|██▌ | 93611/371472 [7:26:58<23:12:40, 3.33it/s] 25%|██▌ | 93612/371472 [7:26:58<21:58:49, 3.51it/s] 25%|██▌ | 93613/371472 [7:26:59<21:18:18, 3.62it/s] 25%|██▌ | 93614/371472 [7:26:59<20:53:40, 3.69it/s] 25%|██▌ | 93615/371472 [7:26:59<21:46:16, 3.55it/s] 25%|██▌ | 93616/371472 [7:26:59<20:48:16, 3.71it/s] 25%|██▌ | 93617/371472 [7:27:00<20:49:21, 3.71it/s] 25%|██▌ | 93618/371472 [7:27:00<20:33:08, 3.76it/s] 25%|██▌ | 93619/371472 [7:27:00<21:50:05, 3.53it/s] 25%|██▌ | 93620/371472 [7:27:01<21:12:38, 3.64it/s] {'loss': 3.7736, 'learning_rate': 7.735406925380853e-07, 'epoch': 4.03} + 25%|██▌ | 93620/371472 [7:27:01<21:12:38, 3.64it/s] 25%|██▌ | 93621/371472 [7:27:01<21:48:25, 3.54it/s] 25%|██▌ | 93622/371472 [7:27:01<22:09:58, 3.48it/s] 25%|██▌ | 93623/371472 [7:27:01<22:30:08, 3.43it/s] 25%|██▌ | 93624/371472 [7:27:02<21:53:16, 3.53it/s] 25%|██▌ | 93625/371472 [7:27:02<21:33:22, 3.58it/s] 25%|██▌ | 93626/371472 [7:27:02<21:29:03, 3.59it/s] 25%|██▌ | 93627/371472 [7:27:03<21:33:05, 3.58it/s] 25%|██▌ | 93628/371472 [7:27:03<21:33:11, 3.58it/s] 25%|██▌ | 93629/371472 [7:27:03<23:03:52, 3.35it/s] 25%|██▌ | 93630/371472 [7:27:04<23:27:24, 3.29it/s] 25%|██▌ | 93631/371472 [7:27:04<22:24:12, 3.44it/s] 25%|██▌ | 93632/371472 [7:27:04<22:30:38, 3.43it/s] 25%|██▌ | 93633/371472 [7:27:04<21:15:16, 3.63it/s] 25%|██▌ | 93634/371472 [7:27:05<21:29:17, 3.59it/s] 25%|██▌ | 93635/371472 [7:27:05<20:36:19, 3.75it/s] 25%|██▌ | 93636/371472 [7:27:05<20:43:56, 3.72it/s] 25%|██▌ | 93637/371472 [7:27:05<20:32:45, 3.76it/s] 25%|██▌ | 93638/371472 [7:27:06<20:27:51, 3.77it/s] 25%|██▌ | 93639/371472 [7:27:06<20:09:46, 3.83it/s] 25%|██▌ | 93640/371472 [7:27:06<20:27:34, 3.77it/s] {'loss': 3.6331, 'learning_rate': 7.734922105626065e-07, 'epoch': 4.03} + 25%|██▌ | 93640/371472 [7:27:06<20:27:34, 3.77it/s] 25%|██▌ | 93641/371472 [7:27:06<20:31:48, 3.76it/s] 25%|██▌ | 93642/371472 [7:27:07<21:55:26, 3.52it/s] 25%|██▌ | 93643/371472 [7:27:07<23:51:03, 3.24it/s] 25%|██▌ | 93644/371472 [7:27:07<23:09:44, 3.33it/s] 25%|██▌ | 93645/371472 [7:27:08<23:51:18, 3.24it/s] 25%|██▌ | 93646/371472 [7:27:08<23:06:36, 3.34it/s] 25%|██▌ | 93647/371472 [7:27:08<24:00:08, 3.22it/s] 25%|██▌ | 93648/371472 [7:27:09<23:03:16, 3.35it/s] 25%|██▌ | 93649/371472 [7:27:09<22:47:32, 3.39it/s] 25%|██▌ | 93650/371472 [7:27:09<21:55:31, 3.52it/s] 25%|██▌ | 93651/371472 [7:27:09<21:03:23, 3.67it/s] 25%|██▌ | 93652/371472 [7:27:10<21:52:59, 3.53it/s] 25%|██▌ | 93653/371472 [7:27:10<21:28:41, 3.59it/s] 25%|██▌ | 93654/371472 [7:27:10<20:32:55, 3.76it/s] 25%|██▌ | 93655/371472 [7:27:10<20:25:22, 3.78it/s] 25%|██▌ | 93656/371472 [7:27:11<21:12:04, 3.64it/s] 25%|██▌ | 93657/371472 [7:27:11<21:57:56, 3.51it/s] 25%|██▌ | 93658/371472 [7:27:11<22:18:34, 3.46it/s] 25%|██▌ | 93659/371472 [7:27:12<21:56:32, 3.52it/s] 25%|██▌ | 93660/371472 [7:27:12<21:53:57, 3.52it/s] {'loss': 3.7201, 'learning_rate': 7.734437285871274e-07, 'epoch': 4.03} + 25%|██▌ | 93660/371472 [7:27:12<21:53:57, 3.52it/s] 25%|██▌ | 93661/371472 [7:27:12<21:00:33, 3.67it/s] 25%|██▌ | 93662/371472 [7:27:12<21:11:11, 3.64it/s] 25%|██▌ | 93663/371472 [7:27:13<20:28:55, 3.77it/s] 25%|██▌ | 93664/371472 [7:27:13<21:31:58, 3.58it/s] 25%|██▌ | 93665/371472 [7:27:13<23:07:53, 3.34it/s] 25%|██▌ | 93666/371472 [7:27:14<23:10:14, 3.33it/s] 25%|██▌ | 93667/371472 [7:27:14<22:45:45, 3.39it/s] 25%|██▌ | 93668/371472 [7:27:14<22:36:41, 3.41it/s] 25%|██▌ | 93669/371472 [7:27:14<22:01:32, 3.50it/s] 25%|██▌ | 93670/371472 [7:27:15<20:58:25, 3.68it/s] 25%|██▌ | 93671/371472 [7:27:15<20:54:13, 3.69it/s] 25%|██▌ | 93672/371472 [7:27:15<21:53:17, 3.53it/s] 25%|██▌ | 93673/371472 [7:27:16<22:05:40, 3.49it/s] 25%|██▌ | 93674/371472 [7:27:16<23:01:38, 3.35it/s] 25%|██▌ | 93675/371472 [7:27:16<21:56:16, 3.52it/s] 25%|██▌ | 93676/371472 [7:27:17<22:38:24, 3.41it/s] 25%|██▌ | 93677/371472 [7:27:17<22:29:26, 3.43it/s] 25%|██▌ | 93678/371472 [7:27:17<23:57:20, 3.22it/s] 25%|██▌ | 93679/371472 [7:27:17<23:20:23, 3.31it/s] 25%|██▌ | 93680/371472 [7:27:18<22:29:24, 3.43it/s] {'loss': 3.5504, 'learning_rate': 7.733952466116485e-07, 'epoch': 4.03} + 25%|██▌ | 93680/371472 [7:27:18<22:29:24, 3.43it/s] 25%|██▌ | 93681/371472 [7:27:18<22:20:35, 3.45it/s] 25%|██▌ | 93682/371472 [7:27:18<23:35:22, 3.27it/s] 25%|██▌ | 93683/371472 [7:27:19<22:26:34, 3.44it/s] 25%|██▌ | 93684/371472 [7:27:19<21:15:20, 3.63it/s] 25%|██▌ | 93685/371472 [7:27:19<21:33:00, 3.58it/s] 25%|██▌ | 93686/371472 [7:27:19<22:28:40, 3.43it/s] 25%|██▌ | 93687/371472 [7:27:20<21:38:42, 3.56it/s] 25%|██▌ | 93688/371472 [7:27:20<21:12:36, 3.64it/s] 25%|██▌ | 93689/371472 [7:27:20<21:46:59, 3.54it/s] 25%|██▌ | 93690/371472 [7:27:20<21:12:07, 3.64it/s] 25%|██▌ | 93691/371472 [7:27:21<21:21:41, 3.61it/s] 25%|██▌ | 93692/371472 [7:27:21<21:48:01, 3.54it/s] 25%|██▌ | 93693/371472 [7:27:21<23:24:07, 3.30it/s] 25%|██▌ | 93694/371472 [7:27:22<23:12:43, 3.32it/s] 25%|██▌ | 93695/371472 [7:27:22<22:16:21, 3.46it/s] 25%|██▌ | 93696/371472 [7:27:22<21:19:11, 3.62it/s] 25%|██▌ | 93697/371472 [7:27:23<21:10:38, 3.64it/s] 25%|██▌ | 93698/371472 [7:27:23<20:33:20, 3.75it/s] 25%|██▌ | 93699/371472 [7:27:23<20:14:55, 3.81it/s] 25%|██▌ | 93700/371472 [7:27:23<21:08:52, 3.65it/s] {'loss': 3.773, 'learning_rate': 7.733467646361697e-07, 'epoch': 4.04} + 25%|██▌ | 93700/371472 [7:27:23<21:08:52, 3.65it/s] 25%|██▌ | 93701/371472 [7:27:24<22:29:33, 3.43it/s] 25%|██▌ | 93702/371472 [7:27:24<21:55:42, 3.52it/s] 25%|██▌ | 93703/371472 [7:27:24<21:16:30, 3.63it/s] 25%|██▌ | 93704/371472 [7:27:24<20:27:51, 3.77it/s] 25%|██▌ | 93705/371472 [7:27:25<20:38:06, 3.74it/s] 25%|██▌ | 93706/371472 [7:27:25<23:19:23, 3.31it/s] 25%|██▌ | 93707/371472 [7:27:25<22:10:38, 3.48it/s] 25%|██▌ | 93708/371472 [7:27:26<21:46:41, 3.54it/s] 25%|██▌ | 93709/371472 [7:27:26<22:06:42, 3.49it/s] 25%|██▌ | 93710/371472 [7:27:26<21:14:09, 3.63it/s] 25%|██▌ | 93711/371472 [7:27:26<21:27:06, 3.60it/s] 25%|██▌ | 93712/371472 [7:27:27<21:28:26, 3.59it/s] 25%|██▌ | 93713/371472 [7:27:27<20:56:32, 3.68it/s] 25%|██▌ | 93714/371472 [7:27:27<21:28:17, 3.59it/s] 25%|██▌ | 93715/371472 [7:27:28<21:36:49, 3.57it/s] 25%|██▌ | 93716/371472 [7:27:28<21:55:00, 3.52it/s] 25%|██▌ | 93717/371472 [7:27:28<21:51:27, 3.53it/s] 25%|██▌ | 93718/371472 [7:27:28<22:13:13, 3.47it/s] 25%|██▌ | 93719/371472 [7:27:29<21:37:33, 3.57it/s] 25%|██▌ | 93720/371472 [7:27:29<21:15:00, 3.63it/s] {'loss': 3.5198, 'learning_rate': 7.732982826606908e-07, 'epoch': 4.04} + 25%|██▌ | 93720/371472 [7:27:29<21:15:00, 3.63it/s] 25%|██▌ | 93721/371472 [7:27:29<21:33:14, 3.58it/s] 25%|██▌ | 93722/371472 [7:27:29<21:41:32, 3.56it/s] 25%|██▌ | 93723/371472 [7:27:30<21:08:48, 3.65it/s] 25%|██▌ | 93724/371472 [7:27:30<21:58:19, 3.51it/s] 25%|██▌ | 93725/371472 [7:27:30<21:45:34, 3.55it/s] 25%|██▌ | 93726/371472 [7:27:31<21:41:26, 3.56it/s] 25%|██▌ | 93727/371472 [7:27:31<21:29:47, 3.59it/s] 25%|██▌ | 93728/371472 [7:27:31<21:07:05, 3.65it/s] 25%|██▌ | 93729/371472 [7:27:31<20:27:33, 3.77it/s] 25%|██▌ | 93730/371472 [7:27:32<21:16:29, 3.63it/s] 25%|██▌ | 93731/371472 [7:27:32<21:38:53, 3.56it/s] 25%|██▌ | 93732/371472 [7:27:32<21:26:55, 3.60it/s] 25%|██▌ | 93733/371472 [7:27:33<21:02:40, 3.67it/s] 25%|██▌ | 93734/371472 [7:27:33<21:35:26, 3.57it/s] 25%|██▌ | 93735/371472 [7:27:33<21:44:04, 3.55it/s] 25%|██▌ | 93736/371472 [7:27:33<21:17:06, 3.62it/s] 25%|██▌ | 93737/371472 [7:27:34<21:08:40, 3.65it/s] 25%|██▌ | 93738/371472 [7:27:34<21:45:56, 3.54it/s] 25%|██▌ | 93739/371472 [7:27:34<21:24:09, 3.60it/s] 25%|██▌ | 93740/371472 [7:27:34<21:03:54, 3.66it/s] {'loss': 3.6763, 'learning_rate': 7.732498006852118e-07, 'epoch': 4.04} + 25%|██▌ | 93740/371472 [7:27:34<21:03:54, 3.66it/s] 25%|██▌ | 93741/371472 [7:27:35<21:56:26, 3.52it/s] 25%|██▌ | 93742/371472 [7:27:35<23:05:46, 3.34it/s] 25%|██▌ | 93743/371472 [7:27:35<21:58:10, 3.51it/s] 25%|██▌ | 93744/371472 [7:27:36<21:38:31, 3.56it/s] 25%|██▌ | 93745/371472 [7:27:36<22:08:11, 3.49it/s] 25%|██▌ | 93746/371472 [7:27:36<21:22:01, 3.61it/s] 25%|██▌ | 93747/371472 [7:27:36<21:42:38, 3.55it/s] 25%|██▌ | 93748/371472 [7:27:37<21:15:21, 3.63it/s] 25%|██▌ | 93749/371472 [7:27:37<21:07:58, 3.65it/s] 25%|██▌ | 93750/371472 [7:27:37<21:05:36, 3.66it/s] 25%|██▌ | 93751/371472 [7:27:38<21:17:32, 3.62it/s] 25%|██▌ | 93752/371472 [7:27:38<21:29:54, 3.59it/s] 25%|██▌ | 93753/371472 [7:27:38<20:38:43, 3.74it/s] 25%|██▌ | 93754/371472 [7:27:38<20:58:12, 3.68it/s] 25%|██▌ | 93755/371472 [7:27:39<23:08:50, 3.33it/s] 25%|██▌ | 93756/371472 [7:27:39<23:54:49, 3.23it/s] 25%|██▌ | 93757/371472 [7:27:39<22:23:45, 3.44it/s] 25%|██▌ | 93758/371472 [7:27:40<23:02:05, 3.35it/s] 25%|██▌ | 93759/371472 [7:27:40<22:19:19, 3.46it/s] 25%|██▌ | 93760/371472 [7:27:40<21:17:05, 3.62it/s] {'loss': 3.7281, 'learning_rate': 7.73201318709733e-07, 'epoch': 4.04} + 25%|██▌ | 93760/371472 [7:27:40<21:17:05, 3.62it/s] 25%|██▌ | 93761/371472 [7:27:40<21:24:52, 3.60it/s] 25%|██▌ | 93762/371472 [7:27:41<21:04:15, 3.66it/s] 25%|██▌ | 93763/371472 [7:27:41<21:32:50, 3.58it/s] 25%|██▌ | 93764/371472 [7:27:41<23:38:11, 3.26it/s] 25%|██▌ | 93765/371472 [7:27:42<22:14:54, 3.47it/s] 25%|██▌ | 93766/371472 [7:27:42<21:16:40, 3.63it/s] 25%|██▌ | 93767/371472 [7:27:42<20:25:48, 3.78it/s] 25%|██▌ | 93768/371472 [7:27:42<20:53:36, 3.69it/s] 25%|██▌ | 93769/371472 [7:27:43<20:40:26, 3.73it/s] 25%|██▌ | 93770/371472 [7:27:43<21:06:50, 3.65it/s] 25%|██▌ | 93771/371472 [7:27:43<21:21:10, 3.61it/s] 25%|██▌ | 93772/371472 [7:27:43<20:51:14, 3.70it/s] 25%|██▌ | 93773/371472 [7:27:44<20:37:58, 3.74it/s] 25%|██▌ | 93774/371472 [7:27:44<20:48:48, 3.71it/s] 25%|██▌ | 93775/371472 [7:27:44<24:24:09, 3.16it/s] 25%|██▌ | 93776/371472 [7:27:45<23:07:28, 3.34it/s] 25%|██▌ | 93777/371472 [7:27:45<22:42:05, 3.40it/s] 25%|██▌ | 93778/371472 [7:27:45<22:59:44, 3.35it/s] 25%|██▌ | 93779/371472 [7:27:46<21:47:08, 3.54it/s] 25%|██▌ | 93780/371472 [7:27:46<21:46:58, 3.54it/s] {'loss': 3.6656, 'learning_rate': 7.731528367342541e-07, 'epoch': 4.04} + 25%|██▌ | 93780/371472 [7:27:46<21:46:58, 3.54it/s] 25%|██▌ | 93781/371472 [7:27:46<22:08:55, 3.48it/s] 25%|██▌ | 93782/371472 [7:27:46<21:14:06, 3.63it/s] 25%|██▌ | 93783/371472 [7:27:47<21:40:14, 3.56it/s] 25%|██▌ | 93784/371472 [7:27:47<21:13:33, 3.63it/s] 25%|██▌ | 93785/371472 [7:27:47<21:46:52, 3.54it/s] 25%|██▌ | 93786/371472 [7:27:48<22:36:06, 3.41it/s] 25%|██▌ | 93787/371472 [7:27:48<22:22:21, 3.45it/s] 25%|██▌ | 93788/371472 [7:27:48<23:43:00, 3.25it/s] 25%|██▌ | 93789/371472 [7:27:48<22:39:14, 3.40it/s] 25%|██▌ | 93790/371472 [7:27:49<22:02:14, 3.50it/s] 25%|██▌ | 93791/371472 [7:27:49<22:32:36, 3.42it/s] 25%|██▌ | 93792/371472 [7:27:49<22:39:29, 3.40it/s] 25%|██▌ | 93793/371472 [7:27:50<21:40:16, 3.56it/s] 25%|██▌ | 93794/371472 [7:27:50<22:47:39, 3.38it/s] 25%|██▌ | 93795/371472 [7:27:50<22:01:37, 3.50it/s] 25%|██▌ | 93796/371472 [7:27:50<21:42:27, 3.55it/s] 25%|██▌ | 93797/371472 [7:27:51<20:52:12, 3.70it/s] 25%|██▌ | 93798/371472 [7:27:51<20:34:36, 3.75it/s] 25%|██▌ | 93799/371472 [7:27:51<20:41:06, 3.73it/s] 25%|██▌ | 93800/371472 [7:27:51<20:16:47, 3.80it/s] {'loss': 3.961, 'learning_rate': 7.731043547587752e-07, 'epoch': 4.04} + 25%|██▌ | 93800/371472 [7:27:51<20:16:47, 3.80it/s] 25%|██▌ | 93801/371472 [7:27:52<20:26:41, 3.77it/s] 25%|██▌ | 93802/371472 [7:27:52<23:18:19, 3.31it/s] 25%|██▌ | 93803/371472 [7:27:52<21:55:36, 3.52it/s] 25%|██▌ | 93804/371472 [7:27:53<23:34:05, 3.27it/s] 25%|██▌ | 93805/371472 [7:27:53<21:57:06, 3.51it/s] 25%|██▌ | 93806/371472 [7:27:53<22:43:29, 3.39it/s] 25%|██▌ | 93807/371472 [7:27:54<22:21:38, 3.45it/s] 25%|██▌ | 93808/371472 [7:27:54<22:31:30, 3.42it/s] 25%|██▌ | 93809/371472 [7:27:54<22:28:08, 3.43it/s] 25%|██▌ | 93810/371472 [7:27:54<21:14:00, 3.63it/s] 25%|██▌ | 93811/371472 [7:27:55<20:21:35, 3.79it/s] 25%|██▌ | 93812/371472 [7:27:55<20:23:02, 3.78it/s] 25%|██▌ | 93813/371472 [7:27:55<20:13:28, 3.81it/s] 25%|██▌ | 93814/371472 [7:27:55<20:32:48, 3.75it/s] 25%|██▌ | 93815/371472 [7:27:56<19:46:08, 3.90it/s] 25%|██▌ | 93816/371472 [7:27:56<19:45:58, 3.90it/s] 25%|██▌ | 93817/371472 [7:27:56<22:15:06, 3.47it/s] 25%|██▌ | 93818/371472 [7:27:56<21:51:32, 3.53it/s] 25%|██▌ | 93819/371472 [7:27:57<21:54:31, 3.52it/s] 25%|██▌ | 93820/371472 [7:27:57<20:52:56, 3.69it/s] {'loss': 3.6461, 'learning_rate': 7.730558727832962e-07, 'epoch': 4.04} + 25%|██▌ | 93820/371472 [7:27:57<20:52:56, 3.69it/s] 25%|██▌ | 93821/371472 [7:27:57<20:44:16, 3.72it/s] 25%|██▌ | 93822/371472 [7:27:58<20:31:33, 3.76it/s] 25%|██▌ | 93823/371472 [7:27:58<20:26:29, 3.77it/s] 25%|██▌ | 93824/371472 [7:27:58<21:10:06, 3.64it/s] 25%|██▌ | 93825/371472 [7:27:58<21:15:57, 3.63it/s] 25%|██▌ | 93826/371472 [7:27:59<20:52:26, 3.69it/s] 25%|██▌ | 93827/371472 [7:27:59<21:14:51, 3.63it/s] 25%|██▌ | 93828/371472 [7:27:59<21:01:34, 3.67it/s] 25%|██▌ | 93829/371472 [7:28:00<28:05:07, 2.75it/s] 25%|██▌ | 93830/371472 [7:28:00<27:14:14, 2.83it/s] 25%|██▌ | 93831/371472 [7:28:00<25:11:25, 3.06it/s] 25%|██▌ | 93832/371472 [7:28:01<23:32:47, 3.28it/s] 25%|██▌ | 93833/371472 [7:28:01<25:30:01, 3.02it/s] 25%|██▌ | 93834/371472 [7:28:01<24:18:25, 3.17it/s] 25%|██▌ | 93835/371472 [7:28:02<23:03:19, 3.35it/s] 25%|██▌ | 93836/371472 [7:28:02<21:40:32, 3.56it/s] 25%|██▌ | 93837/371472 [7:28:02<21:51:37, 3.53it/s] 25%|██▌ | 93838/371472 [7:28:02<22:17:28, 3.46it/s] 25%|██▌ | 93839/371472 [7:28:03<26:00:19, 2.97it/s] 25%|██▌ | 93840/371472 [7:28:03<24:33:11, 3.14it/s] {'loss': 3.6627, 'learning_rate': 7.730073908078174e-07, 'epoch': 4.04} + 25%|██▌ | 93840/371472 [7:28:03<24:33:11, 3.14it/s] 25%|██▌ | 93841/371472 [7:28:03<22:54:15, 3.37it/s] 25%|██▌ | 93842/371472 [7:28:04<22:03:33, 3.50it/s] 25%|██▌ | 93843/371472 [7:28:04<22:06:07, 3.49it/s] 25%|██▌ | 93844/371472 [7:28:04<21:33:24, 3.58it/s] 25%|██▌ | 93845/371472 [7:28:04<20:33:55, 3.75it/s] 25%|██▌ | 93846/371472 [7:28:05<22:47:38, 3.38it/s] 25%|██▌ | 93847/371472 [7:28:05<23:10:52, 3.33it/s] 25%|██▌ | 93848/371472 [7:28:05<22:14:49, 3.47it/s] 25%|██▌ | 93849/371472 [7:28:06<21:41:29, 3.56it/s] 25%|██▌ | 93850/371472 [7:28:06<20:46:52, 3.71it/s] 25%|██▌ | 93851/371472 [7:28:06<22:34:33, 3.42it/s] 25%|██▌ | 93852/371472 [7:28:06<22:50:56, 3.38it/s] 25%|██▌ | 93853/371472 [7:28:07<22:48:27, 3.38it/s] 25%|██▌ | 93854/371472 [7:28:07<23:27:11, 3.29it/s] 25%|██▌ | 93855/371472 [7:28:07<22:04:55, 3.49it/s] 25%|██▌ | 93856/371472 [7:28:08<21:25:34, 3.60it/s] 25%|██▌ | 93857/371472 [7:28:08<21:08:39, 3.65it/s] 25%|██▌ | 93858/371472 [7:28:08<21:46:39, 3.54it/s] 25%|██▌ | 93859/371472 [7:28:08<21:06:38, 3.65it/s] 25%|██▌ | 93860/371472 [7:28:09<20:20:13, 3.79it/s] {'loss': 3.723, 'learning_rate': 7.729589088323386e-07, 'epoch': 4.04} + 25%|██▌ | 93860/371472 [7:28:09<20:20:13, 3.79it/s] 25%|██▌ | 93861/371472 [7:28:09<20:29:22, 3.76it/s] 25%|██▌ | 93862/371472 [7:28:09<20:40:07, 3.73it/s] 25%|██▌ | 93863/371472 [7:28:09<20:05:46, 3.84it/s] 25%|██▌ | 93864/371472 [7:28:10<20:53:51, 3.69it/s] 25%|██▌ | 93865/371472 [7:28:10<20:44:28, 3.72it/s] 25%|██▌ | 93866/371472 [7:28:10<20:19:49, 3.79it/s] 25%|██▌ | 93867/371472 [7:28:11<20:59:47, 3.67it/s] 25%|██▌ | 93868/371472 [7:28:11<20:43:33, 3.72it/s] 25%|██▌ | 93869/371472 [7:28:11<20:33:32, 3.75it/s] 25%|██▌ | 93870/371472 [7:28:11<19:45:47, 3.90it/s] 25%|██▌ | 93871/371472 [7:28:12<20:06:51, 3.83it/s] 25%|██▌ | 93872/371472 [7:28:12<21:22:58, 3.61it/s] 25%|██▌ | 93873/371472 [7:28:12<23:05:57, 3.34it/s] 25%|██▌ | 93874/371472 [7:28:13<22:53:33, 3.37it/s] 25%|██▌ | 93875/371472 [7:28:13<22:47:32, 3.38it/s] 25%|██▌ | 93876/371472 [7:28:13<22:16:14, 3.46it/s] 25%|██▌ | 93877/371472 [7:28:13<22:12:47, 3.47it/s] 25%|██▌ | 93878/371472 [7:28:14<22:24:32, 3.44it/s] 25%|██▌ | 93879/371472 [7:28:14<25:09:11, 3.07it/s] 25%|██▌ | 93880/371472 [7:28:14<24:43:36, 3.12it/s] {'loss': 3.7918, 'learning_rate': 7.729104268568597e-07, 'epoch': 4.04} + 25%|██▌ | 93880/371472 [7:28:14<24:43:36, 3.12it/s] 25%|██▌ | 93881/371472 [7:28:15<23:24:27, 3.29it/s] 25%|██▌ | 93882/371472 [7:28:15<22:28:20, 3.43it/s] 25%|██▌ | 93883/371472 [7:28:15<22:35:02, 3.41it/s] 25%|██▌ | 93884/371472 [7:28:16<22:41:53, 3.40it/s] 25%|██▌ | 93885/371472 [7:28:16<21:21:11, 3.61it/s] 25%|██▌ | 93886/371472 [7:28:16<21:18:40, 3.62it/s] 25%|██▌ | 93887/371472 [7:28:16<21:13:37, 3.63it/s] 25%|██▌ | 93888/371472 [7:28:17<23:20:44, 3.30it/s] 25%|██▌ | 93889/371472 [7:28:17<23:15:20, 3.32it/s] 25%|██▌ | 93890/371472 [7:28:17<22:35:13, 3.41it/s] 25%|██▌ | 93891/371472 [7:28:18<22:05:26, 3.49it/s] 25%|██▌ | 93892/371472 [7:28:18<21:43:20, 3.55it/s] 25%|██▌ | 93893/371472 [7:28:18<21:37:06, 3.57it/s] 25%|██▌ | 93894/371472 [7:28:18<21:41:48, 3.55it/s] 25%|██▌ | 93895/371472 [7:28:19<21:33:00, 3.58it/s] 25%|██▌ | 93896/371472 [7:28:19<21:06:43, 3.65it/s] 25%|██▌ | 93897/371472 [7:28:19<21:26:37, 3.60it/s] 25%|██▌ | 93898/371472 [7:28:19<21:32:19, 3.58it/s] 25%|██▌ | 93899/371472 [7:28:20<21:16:34, 3.62it/s] 25%|██▌ | 93900/371472 [7:28:20<22:03:17, 3.50it/s] {'loss': 3.7381, 'learning_rate': 7.728619448813807e-07, 'epoch': 4.04} + 25%|██▌ | 93900/371472 [7:28:20<22:03:17, 3.50it/s] 25%|██▌ | 93901/371472 [7:28:20<22:12:20, 3.47it/s] 25%|██▌ | 93902/371472 [7:28:21<22:02:28, 3.50it/s] 25%|██▌ | 93903/371472 [7:28:21<23:15:04, 3.32it/s] 25%|██▌ | 93904/371472 [7:28:21<24:18:19, 3.17it/s] 25%|██▌ | 93905/371472 [7:28:22<22:45:29, 3.39it/s] 25%|██▌ | 93906/371472 [7:28:22<23:01:20, 3.35it/s] 25%|██▌ | 93907/371472 [7:28:22<21:42:27, 3.55it/s] 25%|██▌ | 93908/371472 [7:28:22<21:31:40, 3.58it/s] 25%|██▌ | 93909/371472 [7:28:23<20:50:13, 3.70it/s] 25%|██▌ | 93910/371472 [7:28:23<20:36:02, 3.74it/s] 25%|██▌ | 93911/371472 [7:28:23<20:38:10, 3.74it/s] 25%|██▌ | 93912/371472 [7:28:23<21:21:15, 3.61it/s] 25%|██▌ | 93913/371472 [7:28:24<21:54:05, 3.52it/s] 25%|██▌ | 93914/371472 [7:28:24<20:57:11, 3.68it/s] 25%|██▌ | 93915/371472 [7:28:24<20:47:09, 3.71it/s] 25%|██▌ | 93916/371472 [7:28:25<22:36:46, 3.41it/s] 25%|██▌ | 93917/371472 [7:28:25<21:50:18, 3.53it/s] 25%|██▌ | 93918/371472 [7:28:25<21:08:17, 3.65it/s] 25%|██▌ | 93919/371472 [7:28:25<20:03:54, 3.84it/s] 25%|██▌ | 93920/371472 [7:28:26<20:15:45, 3.80it/s] {'loss': 3.6455, 'learning_rate': 7.728134629059018e-07, 'epoch': 4.05} + 25%|██▌ | 93920/371472 [7:28:26<20:15:45, 3.80it/s] 25%|██▌ | 93921/371472 [7:28:26<21:15:09, 3.63it/s] 25%|██▌ | 93922/371472 [7:28:26<22:21:00, 3.45it/s] 25%|██▌ | 93923/371472 [7:28:27<23:01:24, 3.35it/s] 25%|██▌ | 93924/371472 [7:28:27<22:50:00, 3.38it/s] 25%|██▌ | 93925/371472 [7:28:27<22:33:11, 3.42it/s] 25%|██▌ | 93926/371472 [7:28:27<22:40:36, 3.40it/s] 25%|██▌ | 93927/371472 [7:28:28<21:34:53, 3.57it/s] 25%|██▌ | 93928/371472 [7:28:28<20:59:16, 3.67it/s] 25%|██▌ | 93929/371472 [7:28:28<20:26:58, 3.77it/s] 25%|██▌ | 93930/371472 [7:28:28<20:33:57, 3.75it/s] 25%|██▌ | 93931/371472 [7:28:29<20:33:57, 3.75it/s] 25%|██▌ | 93932/371472 [7:28:29<21:46:37, 3.54it/s] 25%|██▌ | 93933/371472 [7:28:29<21:13:44, 3.63it/s] 25%|██▌ | 93934/371472 [7:28:30<21:25:18, 3.60it/s] 25%|██▌ | 93935/371472 [7:28:30<21:16:15, 3.62it/s] 25%|██▌ | 93936/371472 [7:28:30<20:54:21, 3.69it/s] 25%|██▌ | 93937/371472 [7:28:30<23:23:01, 3.30it/s] 25%|██▌ | 93938/371472 [7:28:31<23:35:14, 3.27it/s] 25%|██▌ | 93939/371472 [7:28:31<22:48:37, 3.38it/s] 25%|██▌ | 93940/371472 [7:28:31<23:27:22, 3.29it/s] {'loss': 3.5353, 'learning_rate': 7.72764980930423e-07, 'epoch': 4.05} + 25%|██▌ | 93940/371472 [7:28:31<23:27:22, 3.29it/s] 25%|██▌ | 93941/371472 [7:28:32<23:07:02, 3.33it/s] 25%|██▌ | 93942/371472 [7:28:32<22:07:20, 3.48it/s] 25%|██▌ | 93943/371472 [7:28:32<22:25:43, 3.44it/s] 25%|██▌ | 93944/371472 [7:28:33<21:44:30, 3.55it/s] 25%|██▌ | 93945/371472 [7:28:33<21:29:37, 3.59it/s] 25%|██▌ | 93946/371472 [7:28:33<21:45:21, 3.54it/s] 25%|██▌ | 93947/371472 [7:28:33<20:54:03, 3.69it/s] 25%|██▌ | 93948/371472 [7:28:34<20:50:51, 3.70it/s] 25%|██▌ | 93949/371472 [7:28:34<20:00:24, 3.85it/s] 25%|██▌ | 93950/371472 [7:28:34<19:56:11, 3.87it/s] 25%|██▌ | 93951/371472 [7:28:34<20:34:26, 3.75it/s] 25%|██▌ | 93952/371472 [7:28:35<20:14:54, 3.81it/s] 25%|██▌ | 93953/371472 [7:28:35<20:29:20, 3.76it/s] 25%|██▌ | 93954/371472 [7:28:35<20:44:37, 3.72it/s] 25%|██▌ | 93955/371472 [7:28:35<20:29:13, 3.76it/s] 25%|██▌ | 93956/371472 [7:28:36<21:19:15, 3.62it/s] 25%|██▌ | 93957/371472 [7:28:36<20:59:46, 3.67it/s] 25%|██▌ | 93958/371472 [7:28:36<21:18:14, 3.62it/s] 25%|██▌ | 93959/371472 [7:28:37<20:56:27, 3.68it/s] 25%|██▌ | 93960/371472 [7:28:37<20:48:58, 3.70it/s] {'loss': 3.6592, 'learning_rate': 7.72716498954944e-07, 'epoch': 4.05} + 25%|██▌ | 93960/371472 [7:28:37<20:48:58, 3.70it/s] 25%|██▌ | 93961/371472 [7:28:37<20:17:02, 3.80it/s] 25%|██▌ | 93962/371472 [7:28:37<21:09:18, 3.64it/s] 25%|██▌ | 93963/371472 [7:28:38<20:21:19, 3.79it/s] 25%|██▌ | 93964/371472 [7:28:38<19:55:37, 3.87it/s] 25%|██▌ | 93965/371472 [7:28:38<20:00:29, 3.85it/s] 25%|██▌ | 93966/371472 [7:28:38<20:40:51, 3.73it/s] 25%|██▌ | 93967/371472 [7:28:39<20:32:34, 3.75it/s] 25%|██▌ | 93968/371472 [7:28:39<20:32:10, 3.75it/s] 25%|██▌ | 93969/371472 [7:28:39<20:13:38, 3.81it/s] 25%|██▌ | 93970/371472 [7:28:40<22:02:30, 3.50it/s] 25%|██▌ | 93971/371472 [7:28:40<21:48:05, 3.54it/s] 25%|██▌ | 93972/371472 [7:28:40<21:01:52, 3.67it/s] 25%|██▌ | 93973/371472 [7:28:40<20:47:22, 3.71it/s] 25%|██▌ | 93974/371472 [7:28:41<20:19:22, 3.79it/s] 25%|██▌ | 93975/371472 [7:28:41<19:51:30, 3.88it/s] 25%|██▌ | 93976/371472 [7:28:41<21:31:16, 3.58it/s] 25%|██▌ | 93977/371472 [7:28:41<22:07:05, 3.48it/s] 25%|██▌ | 93978/371472 [7:28:42<22:56:29, 3.36it/s] 25%|██▌ | 93979/371472 [7:28:42<22:15:01, 3.46it/s] 25%|██▌ | 93980/371472 [7:28:42<22:05:56, 3.49it/s] {'loss': 3.8174, 'learning_rate': 7.726680169794651e-07, 'epoch': 4.05} + 25%|██▌ | 93980/371472 [7:28:42<22:05:56, 3.49it/s] 25%|██▌ | 93981/371472 [7:28:43<21:34:49, 3.57it/s] 25%|██▌ | 93982/371472 [7:28:43<21:35:54, 3.57it/s] 25%|██▌ | 93983/371472 [7:28:43<21:43:35, 3.55it/s] 25%|██▌ | 93984/371472 [7:28:43<21:32:12, 3.58it/s] 25%|██▌ | 93985/371472 [7:28:44<21:16:39, 3.62it/s] 25%|██▌ | 93986/371472 [7:28:44<20:10:34, 3.82it/s] 25%|██▌ | 93987/371472 [7:28:44<22:46:46, 3.38it/s] 25%|██▌ | 93988/371472 [7:28:45<21:25:53, 3.60it/s] 25%|██▌ | 93989/371472 [7:28:45<21:40:43, 3.56it/s] 25%|██▌ | 93990/371472 [7:28:45<21:09:47, 3.64it/s] 25%|██▌ | 93991/371472 [7:28:45<20:58:41, 3.67it/s] 25%|██▌ | 93992/371472 [7:28:46<21:48:16, 3.53it/s] 25%|██▌ | 93993/371472 [7:28:46<20:59:14, 3.67it/s] 25%|██▌ | 93994/371472 [7:28:46<21:16:04, 3.62it/s] 25%|██▌ | 93995/371472 [7:28:46<21:08:34, 3.65it/s] 25%|██▌ | 93996/371472 [7:28:47<21:07:08, 3.65it/s] 25%|██▌ | 93997/371472 [7:28:47<20:22:09, 3.78it/s] 25%|██▌ | 93998/371472 [7:28:47<20:25:12, 3.77it/s] 25%|██▌ | 93999/371472 [7:28:48<23:51:37, 3.23it/s] 25%|██▌ | 94000/371472 [7:28:48<22:19:43, 3.45it/s] {'loss': 3.6744, 'learning_rate': 7.726195350039863e-07, 'epoch': 4.05} + 25%|██▌ | 94000/371472 [7:28:48<22:19:43, 3.45it/s] 25%|██▌ | 94001/371472 [7:28:48<21:37:40, 3.56it/s] 25%|██▌ | 94002/371472 [7:28:48<21:06:36, 3.65it/s] 25%|██▌ | 94003/371472 [7:28:49<21:01:08, 3.67it/s] 25%|██▌ | 94004/371472 [7:28:49<20:22:06, 3.78it/s] 25%|██▌ | 94005/371472 [7:28:49<21:02:15, 3.66it/s] 25%|██▌ | 94006/371472 [7:28:49<20:19:48, 3.79it/s] 25%|██▌ | 94007/371472 [7:28:50<20:01:03, 3.85it/s] 25%|██▌ | 94008/371472 [7:28:50<19:52:14, 3.88it/s] 25%|██▌ | 94009/371472 [7:28:50<19:59:59, 3.85it/s] 25%|██▌ | 94010/371472 [7:28:51<20:43:26, 3.72it/s] 25%|██▌ | 94011/371472 [7:28:51<21:40:34, 3.56it/s] 25%|██▌ | 94012/371472 [7:28:51<21:48:28, 3.53it/s] 25%|██▌ | 94013/371472 [7:28:51<21:05:50, 3.65it/s] 25%|██▌ | 94014/371472 [7:28:52<21:17:43, 3.62it/s] 25%|██▌ | 94015/371472 [7:28:52<22:30:42, 3.42it/s] 25%|██▌ | 94016/371472 [7:28:52<22:21:31, 3.45it/s] 25%|██▌ | 94017/371472 [7:28:53<21:36:14, 3.57it/s] 25%|██▌ | 94018/371472 [7:28:53<21:05:23, 3.65it/s] 25%|██▌ | 94019/371472 [7:28:53<21:12:47, 3.63it/s] 25%|██▌ | 94020/371472 [7:28:53<21:10:40, 3.64it/s] {'loss': 3.6903, 'learning_rate': 7.725710530285075e-07, 'epoch': 4.05} + 25%|██▌ | 94020/371472 [7:28:53<21:10:40, 3.64it/s] 25%|██▌ | 94021/371472 [7:28:54<20:58:23, 3.67it/s] 25%|██▌ | 94022/371472 [7:28:54<20:11:34, 3.82it/s] 25%|██▌ | 94023/371472 [7:28:54<20:35:56, 3.74it/s] 25%|██▌ | 94024/371472 [7:28:54<20:37:27, 3.74it/s] 25%|██▌ | 94025/371472 [7:28:55<20:30:30, 3.76it/s] 25%|██▌ | 94026/371472 [7:28:55<20:02:52, 3.84it/s] 25%|██▌ | 94027/371472 [7:28:55<20:30:53, 3.76it/s] 25%|██▌ | 94028/371472 [7:28:55<20:34:48, 3.74it/s] 25%|██▌ | 94029/371472 [7:28:56<20:14:44, 3.81it/s] 25%|██▌ | 94030/371472 [7:28:56<20:11:47, 3.82it/s] 25%|██▌ | 94031/371472 [7:28:56<20:12:30, 3.81it/s] 25%|██▌ | 94032/371472 [7:28:57<21:12:23, 3.63it/s] 25%|██▌ | 94033/371472 [7:28:57<21:33:00, 3.58it/s] 25%|██▌ | 94034/371472 [7:28:57<20:29:40, 3.76it/s] 25%|██▌ | 94035/371472 [7:28:57<20:20:15, 3.79it/s] 25%|██▌ | 94036/371472 [7:28:58<20:05:00, 3.84it/s] 25%|██▌ | 94037/371472 [7:28:58<21:25:13, 3.60it/s] 25%|██▌ | 94038/371472 [7:28:58<21:52:07, 3.52it/s] 25%|██▌ | 94039/371472 [7:28:58<21:38:40, 3.56it/s] 25%|██▌ | 94040/371472 [7:28:59<21:08:05, 3.65it/s] {'loss': 3.713, 'learning_rate': 7.725225710530284e-07, 'epoch': 4.05} + 25%|██▌ | 94040/371472 [7:28:59<21:08:05, 3.65it/s] 25%|██▌ | 94041/371472 [7:28:59<20:26:04, 3.77it/s] 25%|██▌ | 94042/371472 [7:28:59<20:18:31, 3.79it/s] 25%|██▌ | 94043/371472 [7:28:59<20:11:41, 3.82it/s] 25%|██▌ | 94044/371472 [7:29:00<20:42:41, 3.72it/s] 25%|██▌ | 94045/371472 [7:29:00<20:29:30, 3.76it/s] 25%|██▌ | 94046/371472 [7:29:00<21:34:57, 3.57it/s] 25%|██▌ | 94047/371472 [7:29:01<20:43:29, 3.72it/s] 25%|██▌ | 94048/371472 [7:29:01<21:38:00, 3.56it/s] 25%|██▌ | 94049/371472 [7:29:01<23:16:58, 3.31it/s] 25%|██▌ | 94050/371472 [7:29:02<23:41:38, 3.25it/s] 25%|██▌ | 94051/371472 [7:29:02<22:52:02, 3.37it/s] 25%|██▌ | 94052/371472 [7:29:02<22:10:44, 3.47it/s] 25%|██▌ | 94053/371472 [7:29:02<22:10:02, 3.48it/s] 25%|██▌ | 94054/371472 [7:29:03<21:05:32, 3.65it/s] 25%|██▌ | 94055/371472 [7:29:03<22:36:12, 3.41it/s] 25%|██▌ | 94056/371472 [7:29:03<22:03:41, 3.49it/s] 25%|██▌ | 94057/371472 [7:29:03<21:31:13, 3.58it/s] 25%|██▌ | 94058/371472 [7:29:04<22:39:52, 3.40it/s] 25%|██▌ | 94059/371472 [7:29:04<22:23:50, 3.44it/s] 25%|██▌ | 94060/371472 [7:29:04<24:33:59, 3.14it/s] {'loss': 3.7358, 'learning_rate': 7.724740890775495e-07, 'epoch': 4.05} + 25%|██▌ | 94060/371472 [7:29:04<24:33:59, 3.14it/s] 25%|██▌ | 94061/371472 [7:29:05<22:41:58, 3.39it/s] 25%|██▌ | 94062/371472 [7:29:05<22:41:06, 3.40it/s] 25%|██▌ | 94063/371472 [7:29:05<23:46:25, 3.24it/s] 25%|██▌ | 94064/371472 [7:29:06<23:08:25, 3.33it/s] 25%|██▌ | 94065/371472 [7:29:06<22:55:28, 3.36it/s] 25%|██▌ | 94066/371472 [7:29:06<22:23:58, 3.44it/s] 25%|██▌ | 94067/371472 [7:29:06<21:51:42, 3.52it/s] 25%|██▌ | 94068/371472 [7:29:07<21:18:56, 3.62it/s] 25%|██▌ | 94069/371472 [7:29:07<21:07:11, 3.65it/s] 25%|██▌ | 94070/371472 [7:29:07<21:16:32, 3.62it/s] 25%|██▌ | 94071/371472 [7:29:08<21:07:08, 3.65it/s] 25%|██▌ | 94072/371472 [7:29:08<20:40:05, 3.73it/s] 25%|██▌ | 94073/371472 [7:29:08<20:23:23, 3.78it/s] 25%|██▌ | 94074/371472 [7:29:08<20:52:57, 3.69it/s] 25%|██▌ | 94075/371472 [7:29:09<21:59:46, 3.50it/s] 25%|██▌ | 94076/371472 [7:29:09<21:55:25, 3.51it/s] 25%|██▌ | 94077/371472 [7:29:09<23:24:33, 3.29it/s] 25%|██▌ | 94078/371472 [7:29:10<22:10:43, 3.47it/s] 25%|██▌ | 94079/371472 [7:29:10<22:08:42, 3.48it/s] 25%|██▌ | 94080/371472 [7:29:10<21:55:04, 3.52it/s] {'loss': 3.7242, 'learning_rate': 7.724256071020707e-07, 'epoch': 4.05} + 25%|██▌ | 94080/371472 [7:29:10<21:55:04, 3.52it/s] 25%|██▌ | 94081/371472 [7:29:10<22:35:56, 3.41it/s] 25%|██▌ | 94082/371472 [7:29:11<22:54:42, 3.36it/s] 25%|██▌ | 94083/371472 [7:29:11<23:35:14, 3.27it/s] 25%|██▌ | 94084/371472 [7:29:11<22:09:15, 3.48it/s] 25%|██▌ | 94085/371472 [7:29:12<21:51:14, 3.53it/s] 25%|██▌ | 94086/371472 [7:29:12<22:40:12, 3.40it/s] 25%|██▌ | 94087/371472 [7:29:12<21:25:20, 3.60it/s] 25%|██▌ | 94088/371472 [7:29:12<22:05:22, 3.49it/s] 25%|██▌ | 94089/371472 [7:29:13<21:01:42, 3.66it/s] 25%|██▌ | 94090/371472 [7:29:13<21:00:22, 3.67it/s] 25%|██▌ | 94091/371472 [7:29:13<22:30:36, 3.42it/s] 25%|██▌ | 94092/371472 [7:29:14<21:19:38, 3.61it/s] 25%|██▌ | 94093/371472 [7:29:14<20:46:39, 3.71it/s] 25%|██▌ | 94094/371472 [7:29:14<21:17:23, 3.62it/s] 25%|██▌ | 94095/371472 [7:29:14<21:29:13, 3.59it/s] 25%|██▌ | 94096/371472 [7:29:15<21:00:41, 3.67it/s] 25%|██▌ | 94097/371472 [7:29:15<20:26:09, 3.77it/s] 25%|██▌ | 94098/371472 [7:29:15<21:20:07, 3.61it/s] 25%|██▌ | 94099/371472 [7:29:16<26:26:21, 2.91it/s] 25%|██▌ | 94100/371472 [7:29:16<24:22:40, 3.16it/s] {'loss': 3.6433, 'learning_rate': 7.723771251265918e-07, 'epoch': 4.05} + 25%|██▌ | 94100/371472 [7:29:16<24:22:40, 3.16it/s] 25%|██▌ | 94101/371472 [7:29:16<23:39:47, 3.26it/s] 25%|██▌ | 94102/371472 [7:29:16<23:11:16, 3.32it/s] 25%|██▌ | 94103/371472 [7:29:17<22:52:48, 3.37it/s] 25%|██▌ | 94104/371472 [7:29:17<22:29:37, 3.43it/s] 25%|██▌ | 94105/371472 [7:29:17<21:57:42, 3.51it/s] 25%|██▌ | 94106/371472 [7:29:18<21:43:26, 3.55it/s] 25%|██▌ | 94107/371472 [7:29:18<21:51:17, 3.53it/s] 25%|██▌ | 94108/371472 [7:29:18<21:11:42, 3.64it/s] 25%|██▌ | 94109/371472 [7:29:18<21:48:46, 3.53it/s] 25%|██▌ | 94110/371472 [7:29:19<22:02:43, 3.49it/s] 25%|██▌ | 94111/371472 [7:29:19<21:44:03, 3.54it/s] 25%|██▌ | 94112/371472 [7:29:19<22:18:07, 3.45it/s] 25%|██▌ | 94113/371472 [7:29:20<21:26:17, 3.59it/s] 25%|██▌ | 94114/371472 [7:29:20<21:59:52, 3.50it/s] 25%|██▌ | 94115/371472 [7:29:20<21:11:56, 3.63it/s] 25%|██▌ | 94116/371472 [7:29:20<21:00:04, 3.67it/s] 25%|██▌ | 94117/371472 [7:29:21<20:32:36, 3.75it/s] 25%|██▌ | 94118/371472 [7:29:21<19:58:06, 3.86it/s] 25%|██▌ | 94119/371472 [7:29:21<21:25:49, 3.60it/s] 25%|██▌ | 94120/371472 [7:29:21<21:33:56, 3.57it/s] {'loss': 3.7958, 'learning_rate': 7.723286431511129e-07, 'epoch': 4.05} + 25%|██▌ | 94120/371472 [7:29:21<21:33:56, 3.57it/s] 25%|██▌ | 94121/371472 [7:29:22<22:26:43, 3.43it/s] 25%|██▌ | 94122/371472 [7:29:22<21:21:51, 3.61it/s] 25%|██▌ | 94123/371472 [7:29:22<21:44:35, 3.54it/s] 25%|██▌ | 94124/371472 [7:29:23<21:40:49, 3.55it/s] 25%|██▌ | 94125/371472 [7:29:23<23:16:39, 3.31it/s] 25%|██▌ | 94126/371472 [7:29:23<23:02:38, 3.34it/s] 25%|██▌ | 94127/371472 [7:29:24<22:21:46, 3.45it/s] 25%|██▌ | 94128/371472 [7:29:24<22:23:50, 3.44it/s] 25%|██▌ | 94129/371472 [7:29:24<21:40:53, 3.55it/s] 25%|██▌ | 94130/371472 [7:29:24<20:47:16, 3.71it/s] 25%|██▌ | 94131/371472 [7:29:25<21:32:43, 3.58it/s] 25%|██▌ | 94132/371472 [7:29:25<22:29:48, 3.42it/s] 25%|██▌ | 94133/371472 [7:29:25<23:22:30, 3.30it/s] 25%|██▌ | 94134/371472 [7:29:26<23:23:58, 3.29it/s] 25%|██▌ | 94135/371472 [7:29:26<23:21:15, 3.30it/s] 25%|██▌ | 94136/371472 [7:29:26<22:59:17, 3.35it/s] 25%|██▌ | 94137/371472 [7:29:26<21:44:53, 3.54it/s] 25%|██▌ | 94138/371472 [7:29:27<21:55:55, 3.51it/s] 25%|██▌ | 94139/371472 [7:29:27<21:18:39, 3.61it/s] 25%|██▌ | 94140/371472 [7:29:27<21:46:14, 3.54it/s] {'loss': 3.6197, 'learning_rate': 7.72280161175634e-07, 'epoch': 4.05} + 25%|██▌ | 94140/371472 [7:29:27<21:46:14, 3.54it/s] 25%|██▌ | 94141/371472 [7:29:28<21:55:05, 3.51it/s] 25%|██▌ | 94142/371472 [7:29:28<21:48:21, 3.53it/s] 25%|██▌ | 94143/371472 [7:29:28<22:49:04, 3.38it/s] 25%|██▌ | 94144/371472 [7:29:29<24:05:26, 3.20it/s] 25%|██▌ | 94145/371472 [7:29:29<22:42:50, 3.39it/s] 25%|██▌ | 94146/371472 [7:29:29<22:24:34, 3.44it/s] 25%|██▌ | 94147/371472 [7:29:29<21:38:13, 3.56it/s] 25%|██▌ | 94148/371472 [7:29:30<23:19:24, 3.30it/s] 25%|██▌ | 94149/371472 [7:29:30<22:21:51, 3.44it/s] 25%|██▌ | 94150/371472 [7:29:30<24:02:13, 3.20it/s] 25%|██▌ | 94151/371472 [7:29:31<22:45:22, 3.39it/s] 25%|██▌ | 94152/371472 [7:29:31<22:42:36, 3.39it/s] 25%|██▌ | 94153/371472 [7:29:31<21:29:36, 3.58it/s] 25%|██▌ | 94154/371472 [7:29:31<21:10:19, 3.64it/s] 25%|██▌ | 94155/371472 [7:29:32<21:12:42, 3.63it/s] 25%|██▌ | 94156/371472 [7:29:32<22:42:37, 3.39it/s] 25%|██▌ | 94157/371472 [7:29:32<22:18:43, 3.45it/s] 25%|██▌ | 94158/371472 [7:29:32<21:26:03, 3.59it/s] 25%|██▌ | 94159/371472 [7:29:33<22:52:42, 3.37it/s] 25%|██▌ | 94160/371472 [7:29:33<24:01:09, 3.21it/s] {'loss': 3.4819, 'learning_rate': 7.722316792001551e-07, 'epoch': 4.06} + 25%|██▌ | 94160/371472 [7:29:33<24:01:09, 3.21it/s] 25%|██▌ | 94161/371472 [7:29:33<24:17:55, 3.17it/s] 25%|██▌ | 94162/371472 [7:29:34<26:01:18, 2.96it/s] 25%|██▌ | 94163/371472 [7:29:34<25:21:22, 3.04it/s] 25%|██▌ | 94164/371472 [7:29:34<24:22:34, 3.16it/s] 25%|██▌ | 94165/371472 [7:29:35<23:45:37, 3.24it/s] 25%|██▌ | 94166/371472 [7:29:35<22:49:14, 3.38it/s] 25%|██▌ | 94167/371472 [7:29:35<22:55:49, 3.36it/s] 25%|██▌ | 94168/371472 [7:29:36<22:10:27, 3.47it/s] 25%|██▌ | 94169/371472 [7:29:36<21:42:43, 3.55it/s] 25%|██▌ | 94170/371472 [7:29:36<21:56:03, 3.51it/s] 25%|██▌ | 94171/371472 [7:29:36<21:31:50, 3.58it/s] 25%|██▌ | 94172/371472 [7:29:37<21:32:21, 3.58it/s] 25%|██▌ | 94173/371472 [7:29:37<22:26:15, 3.43it/s] 25%|██▌ | 94174/371472 [7:29:37<22:45:34, 3.38it/s] 25%|██▌ | 94175/371472 [7:29:38<22:57:40, 3.35it/s] 25%|██▌ | 94176/371472 [7:29:38<21:56:56, 3.51it/s] 25%|██▌ | 94177/371472 [7:29:38<21:19:03, 3.61it/s] 25%|██▌ | 94178/371472 [7:29:38<21:03:11, 3.66it/s] 25%|██▌ | 94179/371472 [7:29:39<20:41:30, 3.72it/s] 25%|██▌ | 94180/371472 [7:29:39<20:18:07, 3.79it/s] {'loss': 3.6515, 'learning_rate': 7.721831972246763e-07, 'epoch': 4.06} + 25%|██▌ | 94180/371472 [7:29:39<20:18:07, 3.79it/s] 25%|██▌ | 94181/371472 [7:29:39<19:56:49, 3.86it/s] 25%|██▌ | 94182/371472 [7:29:40<24:25:50, 3.15it/s] 25%|██▌ | 94183/371472 [7:29:40<23:44:54, 3.24it/s] 25%|██▌ | 94184/371472 [7:29:40<24:44:35, 3.11it/s] 25%|██▌ | 94185/371472 [7:29:41<22:55:33, 3.36it/s] 25%|██▌ | 94186/371472 [7:29:41<22:23:35, 3.44it/s] 25%|██▌ | 94187/371472 [7:29:41<22:05:59, 3.49it/s] 25%|██▌ | 94188/371472 [7:29:41<22:37:45, 3.40it/s] 25%|██▌ | 94189/371472 [7:29:42<22:06:58, 3.48it/s] 25%|██▌ | 94190/371472 [7:29:42<22:44:45, 3.39it/s] 25%|██▌ | 94191/371472 [7:29:42<23:22:08, 3.30it/s] 25%|██▌ | 94192/371472 [7:29:43<22:07:15, 3.48it/s] 25%|██▌ | 94193/371472 [7:29:43<21:42:28, 3.55it/s] 25%|██▌ | 94194/371472 [7:29:43<21:29:06, 3.58it/s] 25%|██▌ | 94195/371472 [7:29:43<20:49:11, 3.70it/s] 25%|██▌ | 94196/371472 [7:29:44<20:15:29, 3.80it/s] 25%|██▌ | 94197/371472 [7:29:44<22:17:52, 3.45it/s] 25%|██▌ | 94198/371472 [7:29:44<23:12:15, 3.32it/s] 25%|██▌ | 94199/371472 [7:29:45<22:27:23, 3.43it/s] 25%|██▌ | 94200/371472 [7:29:45<22:11:59, 3.47it/s] {'loss': 3.6916, 'learning_rate': 7.721347152491973e-07, 'epoch': 4.06} + 25%|██▌ | 94200/371472 [7:29:45<22:11:59, 3.47it/s] 25%|██▌ | 94201/371472 [7:29:45<21:34:35, 3.57it/s] 25%|██▌ | 94202/371472 [7:29:45<21:36:03, 3.57it/s] 25%|██▌ | 94203/371472 [7:29:46<20:48:50, 3.70it/s] 25%|██▌ | 94204/371472 [7:29:46<20:24:44, 3.77it/s] 25%|██▌ | 94205/371472 [7:29:46<20:42:47, 3.72it/s] 25%|██▌ | 94206/371472 [7:29:46<20:36:40, 3.74it/s] 25%|██▌ | 94207/371472 [7:29:47<22:29:47, 3.42it/s] 25%|██▌ | 94208/371472 [7:29:47<21:47:24, 3.53it/s] 25%|██▌ | 94209/371472 [7:29:47<22:12:18, 3.47it/s] 25%|██▌ | 94210/371472 [7:29:48<20:57:15, 3.68it/s] 25%|██▌ | 94211/371472 [7:29:48<20:13:56, 3.81it/s] 25%|██▌ | 94212/371472 [7:29:48<20:35:41, 3.74it/s] 25%|██▌ | 94213/371472 [7:29:48<22:03:11, 3.49it/s] 25%|██▌ | 94214/371472 [7:29:49<21:51:11, 3.52it/s] 25%|██▌ | 94215/371472 [7:29:49<21:35:01, 3.57it/s] 25%|██▌ | 94216/371472 [7:29:49<21:57:41, 3.51it/s] 25%|██▌ | 94217/371472 [7:29:49<21:12:27, 3.63it/s] 25%|██▌ | 94218/371472 [7:29:50<20:36:05, 3.74it/s] 25%|██▌ | 94219/371472 [7:29:50<20:07:26, 3.83it/s] 25%|██▌ | 94220/371472 [7:29:50<19:47:31, 3.89it/s] {'loss': 3.6998, 'learning_rate': 7.720862332737184e-07, 'epoch': 4.06} + 25%|██▌ | 94220/371472 [7:29:50<19:47:31, 3.89it/s] 25%|██▌ | 94221/371472 [7:29:50<19:26:10, 3.96it/s] 25%|██▌ | 94222/371472 [7:29:51<19:02:42, 4.04it/s] 25%|██▌ | 94223/371472 [7:29:51<19:00:54, 4.05it/s] 25%|██▌ | 94224/371472 [7:29:51<19:51:28, 3.88it/s] 25%|██▌ | 94225/371472 [7:29:52<20:45:35, 3.71it/s] 25%|██▌ | 94226/371472 [7:29:52<21:15:09, 3.62it/s] 25%|██▌ | 94227/371472 [7:29:52<21:36:40, 3.56it/s] 25%|██▌ | 94228/371472 [7:29:52<21:46:01, 3.54it/s] 25%|██▌ | 94229/371472 [7:29:53<21:14:54, 3.62it/s] 25%|██▌ | 94230/371472 [7:29:53<20:46:40, 3.71it/s] 25%|██▌ | 94231/371472 [7:29:53<20:16:49, 3.80it/s] 25%|██▌ | 94232/371472 [7:29:53<21:50:18, 3.53it/s] 25%|██▌ | 94233/371472 [7:29:54<21:13:07, 3.63it/s] 25%|██▌ | 94234/371472 [7:29:54<21:41:59, 3.55it/s] 25%|██▌ | 94235/371472 [7:29:54<22:39:05, 3.40it/s] 25%|██▌ | 94236/371472 [7:29:55<22:41:05, 3.39it/s] 25%|██▌ | 94237/371472 [7:29:55<22:06:00, 3.48it/s] 25%|██▌ | 94238/371472 [7:29:55<22:01:36, 3.50it/s] 25%|██▌ | 94239/371472 [7:29:56<24:56:04, 3.09it/s] 25%|██▌ | 94240/371472 [7:29:56<23:03:56, 3.34it/s] {'loss': 3.9044, 'learning_rate': 7.720377512982396e-07, 'epoch': 4.06} + 25%|██▌ | 94240/371472 [7:29:56<23:03:56, 3.34it/s] 25%|██▌ | 94241/371472 [7:29:56<23:21:22, 3.30it/s] 25%|██▌ | 94242/371472 [7:29:56<21:44:51, 3.54it/s] 25%|██▌ | 94243/371472 [7:29:57<20:56:50, 3.68it/s] 25%|██▌ | 94244/371472 [7:29:57<21:38:08, 3.56it/s] 25%|██▌ | 94245/371472 [7:29:57<22:24:53, 3.44it/s] 25%|██▌ | 94246/371472 [7:29:58<22:13:18, 3.47it/s] 25%|██▌ | 94247/371472 [7:29:58<22:23:58, 3.44it/s] 25%|██▌ | 94248/371472 [7:29:58<22:18:40, 3.45it/s] 25%|██▌ | 94249/371472 [7:29:58<22:08:09, 3.48it/s] 25%|██▌ | 94250/371472 [7:29:59<22:14:22, 3.46it/s] 25%|██▌ | 94251/371472 [7:29:59<22:39:05, 3.40it/s] 25%|██▌ | 94252/371472 [7:29:59<21:37:21, 3.56it/s] 25%|██▌ | 94253/371472 [7:30:00<21:02:18, 3.66it/s] 25%|██▌ | 94254/371472 [7:30:00<20:44:19, 3.71it/s] 25%|██▌ | 94255/371472 [7:30:00<20:22:47, 3.78it/s] 25%|██▌ | 94256/371472 [7:30:00<19:53:30, 3.87it/s] 25%|██▌ | 94257/371472 [7:30:01<22:03:23, 3.49it/s] 25%|██▌ | 94258/371472 [7:30:01<21:18:35, 3.61it/s] 25%|██▌ | 94259/371472 [7:30:01<21:47:56, 3.53it/s] 25%|██▌ | 94260/371472 [7:30:01<21:50:39, 3.53it/s] {'loss': 3.5698, 'learning_rate': 7.719892693227606e-07, 'epoch': 4.06} + 25%|██▌ | 94260/371472 [7:30:01<21:50:39, 3.53it/s] 25%|██▌ | 94261/371472 [7:30:02<23:00:05, 3.35it/s] 25%|██▌ | 94262/371472 [7:30:02<21:54:33, 3.51it/s] 25%|██▌ | 94263/371472 [7:30:02<21:45:46, 3.54it/s] 25%|██▌ | 94264/371472 [7:30:03<21:46:46, 3.54it/s] 25%|██▌ | 94265/371472 [7:30:03<21:46:54, 3.54it/s] 25%|██▌ | 94266/371472 [7:30:03<21:40:40, 3.55it/s] 25%|██▌ | 94267/371472 [7:30:03<21:19:26, 3.61it/s] 25%|██▌ | 94268/371472 [7:30:04<20:24:35, 3.77it/s] 25%|██▌ | 94269/371472 [7:30:04<20:37:31, 3.73it/s] 25%|██▌ | 94270/371472 [7:30:04<22:32:55, 3.41it/s] 25%|██▌ | 94271/371472 [7:30:05<22:03:39, 3.49it/s] 25%|██▌ | 94272/371472 [7:30:05<22:25:13, 3.43it/s] 25%|██▌ | 94273/371472 [7:30:05<21:47:12, 3.53it/s] 25%|██▌ | 94274/371472 [7:30:05<22:09:17, 3.48it/s] 25%|██▌ | 94275/371472 [7:30:06<21:41:36, 3.55it/s] 25%|██▌ | 94276/371472 [7:30:06<21:22:03, 3.60it/s] 25%|██▌ | 94277/371472 [7:30:06<20:28:29, 3.76it/s] 25%|██▌ | 94278/371472 [7:30:06<20:00:03, 3.85it/s] 25%|██▌ | 94279/371472 [7:30:07<20:29:13, 3.76it/s] 25%|██▌ | 94280/371472 [7:30:07<20:30:27, 3.75it/s] {'loss': 3.5303, 'learning_rate': 7.719407873472817e-07, 'epoch': 4.06} + 25%|██▌ | 94280/371472 [7:30:07<20:30:27, 3.75it/s] 25%|██▌ | 94281/371472 [7:30:07<20:25:46, 3.77it/s] 25%|██▌ | 94282/371472 [7:30:08<20:30:57, 3.75it/s] 25%|██▌ | 94283/371472 [7:30:08<20:16:52, 3.80it/s] 25%|██▌ | 94284/371472 [7:30:08<20:43:11, 3.72it/s] 25%|██▌ | 94285/371472 [7:30:08<21:02:21, 3.66it/s] 25%|██▌ | 94286/371472 [7:30:09<20:59:43, 3.67it/s] 25%|██▌ | 94287/371472 [7:30:09<19:58:37, 3.85it/s] 25%|██▌ | 94288/371472 [7:30:09<19:31:04, 3.94it/s] 25%|██▌ | 94289/371472 [7:30:09<19:15:47, 4.00it/s] 25%|██▌ | 94290/371472 [7:30:10<19:45:07, 3.90it/s] 25%|██▌ | 94291/371472 [7:30:10<19:22:19, 3.97it/s] 25%|██▌ | 94292/371472 [7:30:10<19:33:01, 3.94it/s] 25%|██▌ | 94293/371472 [7:30:10<19:59:56, 3.85it/s] 25%|██▌ | 94294/371472 [7:30:11<20:39:33, 3.73it/s] 25%|██▌ | 94295/371472 [7:30:11<21:12:51, 3.63it/s] 25%|██▌ | 94296/371472 [7:30:11<21:20:20, 3.61it/s] 25%|██▌ | 94297/371472 [7:30:12<20:50:24, 3.69it/s] 25%|██▌ | 94298/371472 [7:30:12<21:14:46, 3.62it/s] 25%|██▌ | 94299/371472 [7:30:12<20:49:12, 3.70it/s] 25%|██▌ | 94300/371472 [7:30:12<20:26:55, 3.77it/s] {'loss': 4.0102, 'learning_rate': 7.718923053718028e-07, 'epoch': 4.06} + 25%|██▌ | 94300/371472 [7:30:12<20:26:55, 3.77it/s] 25%|██▌ | 94301/371472 [7:30:13<20:21:25, 3.78it/s] 25%|██▌ | 94302/371472 [7:30:13<21:36:12, 3.56it/s] 25%|██▌ | 94303/371472 [7:30:13<21:10:10, 3.64it/s] 25%|██▌ | 94304/371472 [7:30:13<20:48:52, 3.70it/s] 25%|██▌ | 94305/371472 [7:30:14<20:40:21, 3.72it/s] 25%|██▌ | 94306/371472 [7:30:14<21:05:00, 3.65it/s] 25%|██▌ | 94307/371472 [7:30:14<21:39:10, 3.56it/s] 25%|██▌ | 94308/371472 [7:30:15<20:51:25, 3.69it/s] 25%|██▌ | 94309/371472 [7:30:15<20:08:47, 3.82it/s] 25%|██▌ | 94310/371472 [7:30:15<20:37:39, 3.73it/s] 25%|██▌ | 94311/371472 [7:30:15<23:12:51, 3.32it/s] 25%|██▌ | 94312/371472 [7:30:16<23:33:01, 3.27it/s] 25%|██▌ | 94313/371472 [7:30:16<24:48:58, 3.10it/s] 25%|██▌ | 94314/371472 [7:30:16<23:56:42, 3.22it/s] 25%|██▌ | 94315/371472 [7:30:17<25:22:30, 3.03it/s] 25%|██▌ | 94316/371472 [7:30:17<23:36:54, 3.26it/s] 25%|██▌ | 94317/371472 [7:30:17<23:39:26, 3.25it/s] 25%|██▌ | 94318/371472 [7:30:18<23:22:54, 3.29it/s] 25%|██▌ | 94319/371472 [7:30:18<23:02:51, 3.34it/s] 25%|██▌ | 94320/371472 [7:30:18<23:37:02, 3.26it/s] {'loss': 3.4653, 'learning_rate': 7.71843823396324e-07, 'epoch': 4.06} + 25%|██▌ | 94320/371472 [7:30:18<23:37:02, 3.26it/s] 25%|██▌ | 94321/371472 [7:30:18<22:43:16, 3.39it/s] 25%|██▌ | 94322/371472 [7:30:19<22:08:57, 3.48it/s] 25%|██▌ | 94323/371472 [7:30:19<23:25:39, 3.29it/s] 25%|██▌ | 94324/371472 [7:30:19<22:21:06, 3.44it/s] 25%|██▌ | 94325/371472 [7:30:20<23:08:43, 3.33it/s] 25%|██▌ | 94326/371472 [7:30:20<22:28:57, 3.42it/s] 25%|██▌ | 94327/371472 [7:30:20<22:57:42, 3.35it/s] 25%|██▌ | 94328/371472 [7:30:21<21:43:08, 3.54it/s] 25%|██▌ | 94329/371472 [7:30:21<21:39:27, 3.55it/s] 25%|██▌ | 94330/371472 [7:30:21<20:45:17, 3.71it/s] 25%|██▌ | 94331/371472 [7:30:21<21:40:32, 3.55it/s] 25%|██▌ | 94332/371472 [7:30:22<21:39:57, 3.55it/s] 25%|██▌ | 94333/371472 [7:30:22<20:51:21, 3.69it/s] 25%|██▌ | 94334/371472 [7:30:22<21:03:43, 3.66it/s] 25%|██▌ | 94335/371472 [7:30:22<22:43:46, 3.39it/s] 25%|██▌ | 94336/371472 [7:30:23<22:09:03, 3.48it/s] 25%|██▌ | 94337/371472 [7:30:23<21:32:14, 3.57it/s] 25%|██▌ | 94338/371472 [7:30:23<21:45:59, 3.54it/s] 25%|██▌ | 94339/371472 [7:30:24<20:48:23, 3.70it/s] 25%|██▌ | 94340/371472 [7:30:24<21:42:08, 3.55it/s] {'loss': 3.581, 'learning_rate': 7.71795341420845e-07, 'epoch': 4.06} + 25%|██▌ | 94340/371472 [7:30:24<21:42:08, 3.55it/s] 25%|██▌ | 94341/371472 [7:30:24<21:29:31, 3.58it/s] 25%|██▌ | 94342/371472 [7:30:24<22:17:07, 3.45it/s] 25%|██▌ | 94343/371472 [7:30:25<21:28:45, 3.58it/s] 25%|██▌ | 94344/371472 [7:30:25<21:03:51, 3.65it/s] 25%|██▌ | 94345/371472 [7:30:25<21:22:02, 3.60it/s] 25%|██▌ | 94346/371472 [7:30:26<20:58:07, 3.67it/s] 25%|██▌ | 94347/371472 [7:30:26<21:04:39, 3.65it/s] 25%|██▌ | 94348/371472 [7:30:26<21:30:54, 3.58it/s] 25%|██▌ | 94349/371472 [7:30:26<22:18:44, 3.45it/s] 25%|██▌ | 94350/371472 [7:30:27<21:46:03, 3.54it/s] 25%|██▌ | 94351/371472 [7:30:27<22:01:10, 3.50it/s] 25%|██▌ | 94352/371472 [7:30:27<21:21:25, 3.60it/s] 25%|██▌ | 94353/371472 [7:30:27<21:24:23, 3.60it/s] 25%|██▌ | 94354/371472 [7:30:28<20:51:21, 3.69it/s] 25%|██▌ | 94355/371472 [7:30:28<23:50:04, 3.23it/s] 25%|██▌ | 94356/371472 [7:30:28<24:33:39, 3.13it/s] 25%|██▌ | 94357/371472 [7:30:29<24:07:59, 3.19it/s] 25%|██▌ | 94358/371472 [7:30:29<22:36:35, 3.40it/s] 25%|██▌ | 94359/371472 [7:30:29<22:29:46, 3.42it/s] 25%|██▌ | 94360/371472 [7:30:30<22:22:11, 3.44it/s] {'loss': 3.6084, 'learning_rate': 7.717468594453661e-07, 'epoch': 4.06} + 25%|██▌ | 94360/371472 [7:30:30<22:22:11, 3.44it/s] 25%|██▌ | 94361/371472 [7:30:30<21:56:06, 3.51it/s] 25%|██▌ | 94362/371472 [7:30:30<21:58:28, 3.50it/s] 25%|██▌ | 94363/371472 [7:30:31<24:05:50, 3.19it/s] 25%|██▌ | 94364/371472 [7:30:31<23:02:17, 3.34it/s] 25%|██▌ | 94365/371472 [7:30:31<22:06:42, 3.48it/s] 25%|██▌ | 94366/371472 [7:30:31<23:11:22, 3.32it/s] 25%|██▌ | 94367/371472 [7:30:32<22:00:08, 3.50it/s] 25%|██▌ | 94368/371472 [7:30:32<22:38:38, 3.40it/s] 25%|██▌ | 94369/371472 [7:30:32<21:54:35, 3.51it/s] 25%|██▌ | 94370/371472 [7:30:32<21:05:45, 3.65it/s] 25%|██▌ | 94371/371472 [7:30:33<21:13:38, 3.63it/s] 25%|██▌ | 94372/371472 [7:30:33<20:33:48, 3.74it/s] 25%|██▌ | 94373/371472 [7:30:33<21:43:37, 3.54it/s] 25%|██▌ | 94374/371472 [7:30:34<24:53:22, 3.09it/s] 25%|██▌ | 94375/371472 [7:30:34<24:19:34, 3.16it/s] 25%|██▌ | 94376/371472 [7:30:34<25:02:06, 3.07it/s] 25%|██▌ | 94377/371472 [7:30:35<25:50:01, 2.98it/s] 25%|██▌ | 94378/371472 [7:30:35<24:30:52, 3.14it/s] 25%|██▌ | 94379/371472 [7:30:35<24:36:48, 3.13it/s] 25%|██▌ | 94380/371472 [7:30:36<24:10:18, 3.18it/s] {'loss': 3.5158, 'learning_rate': 7.716983774698873e-07, 'epoch': 4.07} + 25%|██▌ | 94380/371472 [7:30:36<24:10:18, 3.18it/s] 25%|██▌ | 94381/371472 [7:30:36<23:24:20, 3.29it/s] 25%|██▌ | 94382/371472 [7:30:36<22:01:00, 3.50it/s] 25%|██▌ | 94383/371472 [7:30:36<21:47:16, 3.53it/s] 25%|██▌ | 94384/371472 [7:30:37<21:02:15, 3.66it/s] 25%|██▌ | 94385/371472 [7:30:37<20:26:59, 3.76it/s] 25%|██▌ | 94386/371472 [7:30:37<21:03:23, 3.66it/s] 25%|██▌ | 94387/371472 [7:30:37<20:24:45, 3.77it/s] 25%|██▌ | 94388/371472 [7:30:38<21:46:45, 3.53it/s] 25%|██▌ | 94389/371472 [7:30:38<21:36:20, 3.56it/s] 25%|██▌ | 94390/371472 [7:30:38<22:00:01, 3.50it/s] 25%|██▌ | 94391/371472 [7:30:39<21:24:36, 3.59it/s] 25%|██▌ | 94392/371472 [7:30:39<20:51:27, 3.69it/s] 25%|██▌ | 94393/371472 [7:30:39<20:13:12, 3.81it/s] 25%|██▌ | 94394/371472 [7:30:39<21:25:08, 3.59it/s] 25%|██▌ | 94395/371472 [7:30:40<21:44:25, 3.54it/s] 25%|██▌ | 94396/371472 [7:30:40<21:10:29, 3.63it/s] 25%|██▌ | 94397/371472 [7:30:40<20:55:10, 3.68it/s] 25%|██▌ | 94398/371472 [7:30:41<21:29:29, 3.58it/s] 25%|██▌ | 94399/371472 [7:30:41<21:24:09, 3.60it/s] 25%|██▌ | 94400/371472 [7:30:41<21:30:44, 3.58it/s] {'loss': 3.6894, 'learning_rate': 7.716498954944085e-07, 'epoch': 4.07} + 25%|██▌ | 94400/371472 [7:30:41<21:30:44, 3.58it/s] 25%|██▌ | 94401/371472 [7:30:41<21:16:49, 3.62it/s] 25%|██▌ | 94402/371472 [7:30:42<21:11:48, 3.63it/s] 25%|██▌ | 94403/371472 [7:30:42<21:38:48, 3.56it/s] 25%|██▌ | 94404/371472 [7:30:42<21:32:24, 3.57it/s] 25%|██▌ | 94405/371472 [7:30:43<21:01:01, 3.66it/s] 25%|██▌ | 94406/371472 [7:30:43<21:07:27, 3.64it/s] 25%|██▌ | 94407/371472 [7:30:43<21:35:47, 3.56it/s] 25%|██▌ | 94408/371472 [7:30:43<20:41:30, 3.72it/s] 25%|██▌ | 94409/371472 [7:30:44<20:32:54, 3.75it/s] 25%|██▌ | 94410/371472 [7:30:44<20:43:31, 3.71it/s] 25%|██▌ | 94411/371472 [7:30:44<21:00:38, 3.66it/s] 25%|██▌ | 94412/371472 [7:30:44<21:11:09, 3.63it/s] 25%|██▌ | 94413/371472 [7:30:45<22:38:56, 3.40it/s] 25%|██▌ | 94414/371472 [7:30:45<21:46:08, 3.54it/s] 25%|██▌ | 94415/371472 [7:30:45<20:44:15, 3.71it/s] 25%|██▌ | 94416/371472 [7:30:46<22:06:38, 3.48it/s] 25%|██▌ | 94417/371472 [7:30:46<21:42:39, 3.54it/s] 25%|██▌ | 94418/371472 [7:30:46<21:32:26, 3.57it/s] 25%|██▌ | 94419/371472 [7:30:46<21:20:54, 3.60it/s] 25%|██▌ | 94420/371472 [7:30:47<21:16:12, 3.62it/s] {'loss': 3.6205, 'learning_rate': 7.716014135189293e-07, 'epoch': 4.07} + 25%|██▌ | 94420/371472 [7:30:47<21:16:12, 3.62it/s] 25%|██▌ | 94421/371472 [7:30:47<20:58:20, 3.67it/s] 25%|██▌ | 94422/371472 [7:30:47<21:18:24, 3.61it/s] 25%|██▌ | 94423/371472 [7:30:47<21:06:53, 3.64it/s] 25%|██▌ | 94424/371472 [7:30:48<21:07:06, 3.64it/s] 25%|██▌ | 94425/371472 [7:30:48<21:13:35, 3.63it/s] 25%|██▌ | 94426/371472 [7:30:48<22:05:39, 3.48it/s] 25%|██▌ | 94427/371472 [7:30:49<23:25:42, 3.28it/s] 25%|██▌ | 94428/371472 [7:30:49<22:34:07, 3.41it/s] 25%|██▌ | 94429/371472 [7:30:49<22:32:49, 3.41it/s] 25%|██▌ | 94430/371472 [7:30:50<21:50:17, 3.52it/s] 25%|██▌ | 94431/371472 [7:30:50<21:12:13, 3.63it/s] 25%|██▌ | 94432/371472 [7:30:50<23:02:54, 3.34it/s] 25%|██▌ | 94433/371472 [7:30:50<22:31:57, 3.42it/s] 25%|██▌ | 94434/371472 [7:30:51<23:24:06, 3.29it/s] 25%|██▌ | 94435/371472 [7:30:51<22:00:59, 3.50it/s] 25%|██▌ | 94436/371472 [7:30:51<21:48:19, 3.53it/s] 25%|██▌ | 94437/371472 [7:30:52<20:58:51, 3.67it/s] 25%|██▌ | 94438/371472 [7:30:52<22:41:43, 3.39it/s] 25%|██▌ | 94439/371472 [7:30:52<22:39:08, 3.40it/s] 25%|██▌ | 94440/371472 [7:30:52<21:55:39, 3.51it/s] {'loss': 3.6581, 'learning_rate': 7.715529315434505e-07, 'epoch': 4.07} + 25%|██▌ | 94440/371472 [7:30:52<21:55:39, 3.51it/s] 25%|██▌ | 94441/371472 [7:30:53<21:37:58, 3.56it/s] 25%|██▌ | 94442/371472 [7:30:53<22:44:31, 3.38it/s] 25%|██▌ | 94443/371472 [7:30:53<21:50:40, 3.52it/s] 25%|██▌ | 94444/371472 [7:30:54<26:41:24, 2.88it/s] 25%|██▌ | 94445/371472 [7:30:54<24:09:43, 3.18it/s] 25%|██▌ | 94446/371472 [7:30:54<23:20:10, 3.30it/s] 25%|██▌ | 94447/371472 [7:30:55<22:30:36, 3.42it/s] 25%|██▌ | 94448/371472 [7:30:55<22:45:59, 3.38it/s] 25%|██▌ | 94449/371472 [7:30:55<22:46:42, 3.38it/s] 25%|██�� | 94450/371472 [7:30:55<23:03:53, 3.34it/s] 25%|██▌ | 94451/371472 [7:30:56<23:32:05, 3.27it/s] 25%|██▌ | 94452/371472 [7:30:56<22:49:42, 3.37it/s] 25%|██▌ | 94453/371472 [7:30:56<21:35:36, 3.56it/s] 25%|██▌ | 94454/371472 [7:30:57<22:11:11, 3.47it/s] 25%|██▌ | 94455/371472 [7:30:57<21:37:26, 3.56it/s] 25%|██▌ | 94456/371472 [7:30:57<22:00:48, 3.50it/s] 25%|██▌ | 94457/371472 [7:30:57<21:48:50, 3.53it/s] 25%|██▌ | 94458/371472 [7:30:58<21:34:42, 3.57it/s] 25%|██▌ | 94459/371472 [7:30:58<21:09:36, 3.64it/s] 25%|██▌ | 94460/371472 [7:30:58<21:15:21, 3.62it/s] {'loss': 3.593, 'learning_rate': 7.715044495679717e-07, 'epoch': 4.07} + 25%|██▌ | 94460/371472 [7:30:58<21:15:21, 3.62it/s] 25%|██▌ | 94461/371472 [7:30:59<20:35:29, 3.74it/s] 25%|██▌ | 94462/371472 [7:30:59<19:52:33, 3.87it/s] 25%|██▌ | 94463/371472 [7:30:59<20:49:35, 3.69it/s] 25%|██▌ | 94464/371472 [7:30:59<22:05:40, 3.48it/s] 25%|██▌ | 94465/371472 [7:31:00<24:03:33, 3.20it/s] 25%|██▌ | 94466/371472 [7:31:00<23:32:37, 3.27it/s] 25%|██▌ | 94467/371472 [7:31:00<22:08:54, 3.47it/s] 25%|██▌ | 94468/371472 [7:31:01<21:15:16, 3.62it/s] 25%|██▌ | 94469/371472 [7:31:01<20:24:33, 3.77it/s] 25%|██▌ | 94470/371472 [7:31:01<21:16:51, 3.62it/s] 25%|██▌ | 94471/371472 [7:31:01<20:07:39, 3.82it/s] 25%|██▌ | 94472/371472 [7:31:02<19:40:50, 3.91it/s] 25%|██▌ | 94473/371472 [7:31:02<20:00:57, 3.84it/s] 25%|██▌ | 94474/371472 [7:31:02<19:31:45, 3.94it/s] 25%|██▌ | 94475/371472 [7:31:02<19:36:30, 3.92it/s] 25%|██▌ | 94476/371472 [7:31:03<19:44:00, 3.90it/s] 25%|██▌ | 94477/371472 [7:31:03<20:54:37, 3.68it/s] 25%|██▌ | 94478/371472 [7:31:03<20:26:26, 3.76it/s] 25%|██▌ | 94479/371472 [7:31:04<23:09:00, 3.32it/s] 25%|██▌ | 94480/371472 [7:31:04<23:29:19, 3.28it/s] {'loss': 3.5971, 'learning_rate': 7.714559675924929e-07, 'epoch': 4.07} + 25%|██▌ | 94480/371472 [7:31:04<23:29:19, 3.28it/s] 25%|██▌ | 94481/371472 [7:31:04<22:07:15, 3.48it/s] 25%|██▌ | 94482/371472 [7:31:04<21:13:22, 3.63it/s] 25%|██▌ | 94483/371472 [7:31:05<20:44:37, 3.71it/s] 25%|██▌ | 94484/371472 [7:31:05<21:28:08, 3.58it/s] 25%|██▌ | 94485/371472 [7:31:05<21:08:34, 3.64it/s] 25%|██▌ | 94486/371472 [7:31:05<20:22:42, 3.78it/s] 25%|██▌ | 94487/371472 [7:31:06<19:50:33, 3.88it/s] 25%|██▌ | 94488/371472 [7:31:06<20:15:36, 3.80it/s] 25%|██▌ | 94489/371472 [7:31:06<20:43:21, 3.71it/s] 25%|██▌ | 94490/371472 [7:31:06<21:07:02, 3.64it/s] 25%|██▌ | 94491/371472 [7:31:07<21:46:28, 3.53it/s] 25%|██▌ | 94492/371472 [7:31:07<21:37:40, 3.56it/s] 25%|██▌ | 94493/371472 [7:31:07<21:05:33, 3.65it/s] 25%|██▌ | 94494/371472 [7:31:08<20:35:46, 3.74it/s] 25%|██▌ | 94495/371472 [7:31:08<21:16:46, 3.62it/s] 25%|██▌ | 94496/371472 [7:31:08<22:05:12, 3.48it/s] 25%|██▌ | 94497/371472 [7:31:08<21:38:57, 3.55it/s] 25%|██▌ | 94498/371472 [7:31:09<22:16:16, 3.45it/s] 25%|██▌ | 94499/371472 [7:31:09<22:41:30, 3.39it/s] 25%|██▌ | 94500/371472 [7:31:09<22:42:25, 3.39it/s] {'loss': 3.6637, 'learning_rate': 7.714074856170139e-07, 'epoch': 4.07} + 25%|██▌ | 94500/371472 [7:31:09<22:42:25, 3.39it/s] 25%|██▌ | 94501/371472 [7:31:10<21:48:43, 3.53it/s] 25%|██▌ | 94502/371472 [7:31:10<21:06:37, 3.64it/s] 25%|██▌ | 94503/371472 [7:31:10<20:19:24, 3.79it/s] 25%|██▌ | 94504/371472 [7:31:10<20:42:53, 3.71it/s] 25%|██▌ | 94505/371472 [7:31:11<20:45:32, 3.71it/s] 25%|██▌ | 94506/371472 [7:31:11<21:17:45, 3.61it/s] 25%|██▌ | 94507/371472 [7:31:11<21:26:25, 3.59it/s] 25%|██▌ | 94508/371472 [7:31:12<22:09:25, 3.47it/s] 25%|██▌ | 94509/371472 [7:31:12<21:54:51, 3.51it/s] 25%|██▌ | 94510/371472 [7:31:12<21:09:34, 3.64it/s] 25%|██▌ | 94511/371472 [7:31:12<24:11:54, 3.18it/s] 25%|██▌ | 94512/371472 [7:31:13<23:59:54, 3.21it/s] 25%|██▌ | 94513/371472 [7:31:13<23:34:20, 3.26it/s] 25%|██▌ | 94514/371472 [7:31:13<22:21:16, 3.44it/s] 25%|██▌ | 94515/371472 [7:31:14<21:37:38, 3.56it/s] 25%|██▌ | 94516/371472 [7:31:14<20:54:18, 3.68it/s] 25%|██▌ | 94517/371472 [7:31:14<21:35:05, 3.56it/s] 25%|██▌ | 94518/371472 [7:31:14<22:38:04, 3.40it/s] 25%|██▌ | 94519/371472 [7:31:15<22:09:32, 3.47it/s] 25%|██▌ | 94520/371472 [7:31:15<24:12:29, 3.18it/s] {'loss': 3.4868, 'learning_rate': 7.713590036415351e-07, 'epoch': 4.07} + 25%|██▌ | 94520/371472 [7:31:15<24:12:29, 3.18it/s] 25%|██▌ | 94521/371472 [7:31:15<23:02:14, 3.34it/s] 25%|██▌ | 94522/371472 [7:31:16<22:01:18, 3.49it/s] 25%|██▌ | 94523/371472 [7:31:16<21:22:44, 3.60it/s] 25%|██▌ | 94524/371472 [7:31:16<21:30:10, 3.58it/s] 25%|██▌ | 94525/371472 [7:31:16<21:30:58, 3.58it/s] 25%|██▌ | 94526/371472 [7:31:17<20:58:51, 3.67it/s] 25%|██▌ | 94527/371472 [7:31:17<20:24:20, 3.77it/s] 25%|██▌ | 94528/371472 [7:31:17<21:42:28, 3.54it/s] 25%|██▌ | 94529/371472 [7:31:18<20:44:18, 3.71it/s] 25%|██▌ | 94530/371472 [7:31:18<20:28:33, 3.76it/s] 25%|██▌ | 94531/371472 [7:31:18<21:10:53, 3.63it/s] 25%|██▌ | 94532/371472 [7:31:18<21:50:47, 3.52it/s] 25%|██▌ | 94533/371472 [7:31:19<21:12:07, 3.63it/s] 25%|██▌ | 94534/371472 [7:31:19<21:56:45, 3.51it/s] 25%|██▌ | 94535/371472 [7:31:19<21:45:34, 3.54it/s] 25%|██▌ | 94536/371472 [7:31:19<20:59:47, 3.66it/s] 25%|██▌ | 94537/371472 [7:31:20<21:01:43, 3.66it/s] 25%|██▌ | 94538/371472 [7:31:20<20:25:28, 3.77it/s] 25%|██▌ | 94539/371472 [7:31:20<20:36:22, 3.73it/s] 25%|██▌ | 94540/371472 [7:31:21<21:52:48, 3.52it/s] {'loss': 3.6145, 'learning_rate': 7.713105216660561e-07, 'epoch': 4.07} + 25%|██▌ | 94540/371472 [7:31:21<21:52:48, 3.52it/s] 25%|██▌ | 94541/371472 [7:31:21<23:17:18, 3.30it/s] 25%|██▌ | 94542/371472 [7:31:21<23:49:59, 3.23it/s] 25%|██▌ | 94543/371472 [7:31:22<22:57:10, 3.35it/s] 25%|██▌ | 94544/371472 [7:31:22<22:21:23, 3.44it/s] 25%|██▌ | 94545/371472 [7:31:22<21:23:05, 3.60it/s] 25%|██▌ | 94546/371472 [7:31:22<20:35:01, 3.74it/s] 25%|██▌ | 94547/371472 [7:31:23<20:22:31, 3.78it/s] 25%|██▌ | 94548/371472 [7:31:23<20:52:06, 3.69it/s] 25%|██▌ | 94549/371472 [7:31:23<20:21:44, 3.78it/s] 25%|██▌ | 94550/371472 [7:31:23<20:01:40, 3.84it/s] 25%|██▌ | 94551/371472 [7:31:24<20:19:17, 3.79it/s] 25%|██▌ | 94552/371472 [7:31:24<21:51:08, 3.52it/s] 25%|██▌ | 94553/371472 [7:31:24<21:43:04, 3.54it/s] 25%|██▌ | 94554/371472 [7:31:24<21:13:51, 3.62it/s] 25%|██▌ | 94555/371472 [7:31:25<21:55:50, 3.51it/s] 25%|██▌ | 94556/371472 [7:31:25<20:52:23, 3.69it/s] 25%|██▌ | 94557/371472 [7:31:25<20:46:05, 3.70it/s] 25%|██▌ | 94558/371472 [7:31:26<20:35:19, 3.74it/s] 25%|██▌ | 94559/371472 [7:31:26<21:37:22, 3.56it/s] 25%|██▌ | 94560/371472 [7:31:26<21:00:38, 3.66it/s] {'loss': 3.6797, 'learning_rate': 7.712620396905773e-07, 'epoch': 4.07} + 25%|██▌ | 94560/371472 [7:31:26<21:00:38, 3.66it/s] 25%|██▌ | 94561/371472 [7:31:26<21:27:33, 3.58it/s] 25%|██▌ | 94562/371472 [7:31:27<21:00:27, 3.66it/s] 25%|██▌ | 94563/371472 [7:31:27<20:24:46, 3.77it/s] 25%|██▌ | 94564/371472 [7:31:27<24:38:05, 3.12it/s] 25%|██▌ | 94565/371472 [7:31:28<24:06:19, 3.19it/s] 25%|██▌ | 94566/371472 [7:31:28<22:40:51, 3.39it/s] 25%|██▌ | 94567/371472 [7:31:28<21:55:36, 3.51it/s] 25%|██▌ | 94568/371472 [7:31:28<21:45:39, 3.53it/s] 25%|██▌ | 94569/371472 [7:31:29<21:58:48, 3.50it/s] 25%|██▌ | 94570/371472 [7:31:29<23:00:03, 3.34it/s] 25%|██▌ | 94571/371472 [7:31:29<22:09:13, 3.47it/s] 25%|██▌ | 94572/371472 [7:31:30<22:32:39, 3.41it/s] 25%|██▌ | 94573/371472 [7:31:30<22:06:24, 3.48it/s] 25%|██▌ | 94574/371472 [7:31:30<23:13:59, 3.31it/s] 25%|██▌ | 94575/371472 [7:31:31<22:39:11, 3.40it/s] 25%|██▌ | 94576/371472 [7:31:31<21:22:14, 3.60it/s] 25%|██▌ | 94577/371472 [7:31:31<20:39:39, 3.72it/s] 25%|██▌ | 94578/371472 [7:31:31<20:28:14, 3.76it/s] 25%|██▌ | 94579/371472 [7:31:32<20:20:04, 3.78it/s] 25%|██▌ | 94580/371472 [7:31:32<19:42:00, 3.90it/s] {'loss': 3.6024, 'learning_rate': 7.712135577150983e-07, 'epoch': 4.07} + 25%|██▌ | 94580/371472 [7:31:32<19:42:00, 3.90it/s] 25%|██▌ | 94581/371472 [7:31:32<20:12:50, 3.80it/s] 25%|██▌ | 94582/371472 [7:31:32<20:27:02, 3.76it/s] 25%|██▌ | 94583/371472 [7:31:33<22:23:14, 3.44it/s] 25%|█��▌ | 94584/371472 [7:31:33<21:19:18, 3.61it/s] 25%|██▌ | 94585/371472 [7:31:33<20:51:56, 3.69it/s] 25%|██▌ | 94586/371472 [7:31:33<20:11:49, 3.81it/s] 25%|██▌ | 94587/371472 [7:31:34<19:59:06, 3.85it/s] 25%|██▌ | 94588/371472 [7:31:34<20:18:42, 3.79it/s] 25%|██▌ | 94589/371472 [7:31:34<19:40:22, 3.91it/s] 25%|██▌ | 94590/371472 [7:31:34<19:33:00, 3.93it/s] 25%|██▌ | 94591/371472 [7:31:35<20:19:45, 3.78it/s] 25%|██▌ | 94592/371472 [7:31:35<20:50:21, 3.69it/s] 25%|██▌ | 94593/371472 [7:31:35<20:47:20, 3.70it/s] 25%|██▌ | 94594/371472 [7:31:36<22:01:38, 3.49it/s] 25%|██▌ | 94595/371472 [7:31:36<22:30:17, 3.42it/s] 25%|██▌ | 94596/371472 [7:31:36<22:13:22, 3.46it/s] 25%|██▌ | 94597/371472 [7:31:36<21:12:21, 3.63it/s] 25%|██▌ | 94598/371472 [7:31:37<20:48:15, 3.70it/s] 25%|██▌ | 94599/371472 [7:31:37<21:11:19, 3.63it/s] 25%|██▌ | 94600/371472 [7:31:37<20:27:28, 3.76it/s] {'loss': 3.6174, 'learning_rate': 7.711650757396194e-07, 'epoch': 4.07} + 25%|██▌ | 94600/371472 [7:31:37<20:27:28, 3.76it/s] 25%|██▌ | 94601/371472 [7:31:37<20:34:21, 3.74it/s] 25%|██▌ | 94602/371472 [7:31:38<21:48:07, 3.53it/s] 25%|██▌ | 94603/371472 [7:31:38<21:02:23, 3.66it/s] 25%|██▌ | 94604/371472 [7:31:38<21:19:24, 3.61it/s] 25%|██▌ | 94605/371472 [7:31:39<20:59:43, 3.66it/s] 25%|██▌ | 94606/371472 [7:31:39<20:41:47, 3.72it/s] 25%|██▌ | 94607/371472 [7:31:39<20:42:47, 3.71it/s] 25%|██▌ | 94608/371472 [7:31:39<21:10:48, 3.63it/s] 25%|██▌ | 94609/371472 [7:31:40<21:57:35, 3.50it/s] 25%|██▌ | 94610/371472 [7:31:40<21:14:42, 3.62it/s] 25%|██▌ | 94611/371472 [7:31:40<20:31:26, 3.75it/s] 25%|██▌ | 94612/371472 [7:31:41<20:55:11, 3.68it/s] 25%|██▌ | 94613/371472 [7:31:41<22:15:01, 3.46it/s] 25%|██▌ | 94614/371472 [7:31:41<21:11:59, 3.63it/s] 25%|██▌ | 94615/371472 [7:31:41<21:25:30, 3.59it/s] 25%|██▌ | 94616/371472 [7:31:42<21:27:52, 3.58it/s] 25%|██▌ | 94617/371472 [7:31:42<20:37:07, 3.73it/s] 25%|██▌ | 94618/371472 [7:31:42<19:54:47, 3.86it/s] 25%|██▌ | 94619/371472 [7:31:42<19:40:09, 3.91it/s] 25%|██▌ | 94620/371472 [7:31:43<20:02:47, 3.84it/s] {'loss': 3.6744, 'learning_rate': 7.711165937641406e-07, 'epoch': 4.08} + 25%|██▌ | 94620/371472 [7:31:43<20:02:47, 3.84it/s] 25%|██▌ | 94621/371472 [7:31:43<20:06:49, 3.82it/s] 25%|██▌ | 94622/371472 [7:31:43<19:56:46, 3.86it/s] 25%|██▌ | 94623/371472 [7:31:43<19:24:36, 3.96it/s] 25%|██▌ | 94624/371472 [7:31:44<19:46:48, 3.89it/s] 25%|██▌ | 94625/371472 [7:31:44<20:37:21, 3.73it/s] 25%|██▌ | 94626/371472 [7:31:44<20:32:33, 3.74it/s] 25%|██▌ | 94627/371472 [7:31:44<20:08:43, 3.82it/s] 25%|██▌ | 94628/371472 [7:31:45<20:47:56, 3.70it/s] 25%|██▌ | 94629/371472 [7:31:45<20:39:26, 3.72it/s] 25%|██▌ | 94630/371472 [7:31:45<20:08:15, 3.82it/s] 25%|██▌ | 94631/371472 [7:31:46<20:41:10, 3.72it/s] 25%|██▌ | 94632/371472 [7:31:46<21:22:42, 3.60it/s] 25%|██▌ | 94633/371472 [7:31:46<22:07:43, 3.48it/s] 25%|██▌ | 94634/371472 [7:31:46<21:12:57, 3.62it/s] 25%|██▌ | 94635/371472 [7:31:47<21:42:24, 3.54it/s] 25%|██▌ | 94636/371472 [7:31:47<22:00:56, 3.49it/s] 25%|██▌ | 94637/371472 [7:31:47<21:21:38, 3.60it/s] 25%|██▌ | 94638/371472 [7:31:48<21:00:18, 3.66it/s] 25%|██▌ | 94639/371472 [7:31:48<22:05:38, 3.48it/s] 25%|██▌ | 94640/371472 [7:31:48<21:44:25, 3.54it/s] {'loss': 3.6869, 'learning_rate': 7.710681117886617e-07, 'epoch': 4.08} + 25%|██▌ | 94640/371472 [7:31:48<21:44:25, 3.54it/s] 25%|██▌ | 94641/371472 [7:31:48<21:20:37, 3.60it/s] 25%|██▌ | 94642/371472 [7:31:49<20:46:56, 3.70it/s] 25%|██▌ | 94643/371472 [7:31:49<21:22:23, 3.60it/s] 25%|██▌ | 94644/371472 [7:31:49<21:03:32, 3.65it/s] 25%|██▌ | 94645/371472 [7:31:50<21:39:42, 3.55it/s] 25%|██▌ | 94646/371472 [7:31:50<20:40:32, 3.72it/s] 25%|██▌ | 94647/371472 [7:31:50<20:40:21, 3.72it/s] 25%|██▌ | 94648/371472 [7:31:50<20:29:11, 3.75it/s] 25%|██▌ | 94649/371472 [7:31:51<20:49:59, 3.69it/s] 25%|██▌ | 94650/371472 [7:31:51<21:11:52, 3.63it/s] 25%|██▌ | 94651/371472 [7:31:51<21:07:24, 3.64it/s] 25%|██▌ | 94652/371472 [7:31:51<20:28:20, 3.76it/s] 25%|██▌ | 94653/371472 [7:31:52<19:44:03, 3.90it/s] 25%|██▌ | 94654/371472 [7:31:52<19:36:09, 3.92it/s] 25%|██▌ | 94655/371472 [7:31:52<19:43:58, 3.90it/s] 25%|██▌ | 94656/371472 [7:31:53<22:24:26, 3.43it/s] 25%|██▌ | 94657/371472 [7:31:53<21:45:40, 3.53it/s] 25%|██▌ | 94658/371472 [7:31:53<20:48:19, 3.70it/s] 25%|██▌ | 94659/371472 [7:31:53<21:14:00, 3.62it/s] 25%|██▌ | 94660/371472 [7:31:54<20:54:06, 3.68it/s] {'loss': 3.6593, 'learning_rate': 7.710196298131828e-07, 'epoch': 4.08} + 25%|██▌ | 94660/371472 [7:31:54<20:54:06, 3.68it/s] 25%|██▌ | 94661/371472 [7:31:54<20:17:54, 3.79it/s] 25%|██▌ | 94662/371472 [7:31:54<20:15:23, 3.80it/s] 25%|██▌ | 94663/371472 [7:31:54<19:56:42, 3.86it/s] 25%|██▌ | 94664/371472 [7:31:55<21:40:41, 3.55it/s] 25%|██▌ | 94665/371472 [7:31:55<21:00:38, 3.66it/s] 25%|██▌ | 94666/371472 [7:31:55<20:46:02, 3.70it/s] 25%|██▌ | 94667/371472 [7:31:55<21:57:57, 3.50it/s] 25%|██▌ | 94668/371472 [7:31:56<21:08:34, 3.64it/s] 25%|██▌ | 94669/371472 [7:31:56<21:20:54, 3.60it/s] 25%|██▌ | 94670/371472 [7:31:56<20:38:22, 3.73it/s] 25%|██▌ | 94671/371472 [7:31:57<22:33:43, 3.41it/s] 25%|██▌ | 94672/371472 [7:31:57<21:49:46, 3.52it/s] 25%|██▌ | 94673/371472 [7:31:57<21:43:02, 3.54it/s] 25%|██▌ | 94674/371472 [7:31:57<21:13:45, 3.62it/s] 25%|██▌ | 94675/371472 [7:31:58<20:28:42, 3.75it/s] 25%|██▌ | 94676/371472 [7:31:58<21:19:23, 3.61it/s] 25%|██▌ | 94677/371472 [7:31:58<22:16:00, 3.45it/s] 25%|██▌ | 94678/371472 [7:31:59<21:46:28, 3.53it/s] 25%|██▌ | 94679/371472 [7:31:59<21:33:33, 3.57it/s] 25%|██▌ | 94680/371472 [7:31:59<21:04:27, 3.65it/s] {'loss': 3.5935, 'learning_rate': 7.709711478377038e-07, 'epoch': 4.08} + 25%|██▌ | 94680/371472 [7:31:59<21:04:27, 3.65it/s] 25%|██▌ | 94681/371472 [7:31:59<21:10:56, 3.63it/s] 25%|██▌ | 94682/371472 [7:32:00<20:26:04, 3.76it/s] 25%|██▌ | 94683/371472 [7:32:00<20:34:20, 3.74it/s] 25%|██▌ | 94684/371472 [7:32:00<22:28:30, 3.42it/s] 25%|██▌ | 94685/371472 [7:32:01<22:52:59, 3.36it/s] 25%|██▌ | 94686/371472 [7:32:01<22:21:19, 3.44it/s] 25%|██▌ | 94687/371472 [7:32:01<22:23:14, 3.43it/s] 25%|██▌ | 94688/371472 [7:32:01<23:04:01, 3.33it/s] 25%|██▌ | 94689/371472 [7:32:02<22:04:42, 3.48it/s] 25%|██▌ | 94690/371472 [7:32:02<21:44:51, 3.54it/s] 25%|██▌ | 94691/371472 [7:32:02<21:54:25, 3.51it/s] 25%|██▌ | 94692/371472 [7:32:02<21:02:27, 3.65it/s] 25%|██▌ | 94693/371472 [7:32:03<20:36:07, 3.73it/s] 25%|██▌ | 94694/371472 [7:32:03<21:13:52, 3.62it/s] 25%|██▌ | 94695/371472 [7:32:03<20:57:41, 3.67it/s] 25%|██▌ | 94696/371472 [7:32:04<22:43:50, 3.38it/s] 25%|██▌ | 94697/371472 [7:32:04<22:42:44, 3.39it/s] 25%|██▌ | 94698/371472 [7:32:04<21:36:26, 3.56it/s] 25%|██▌ | 94699/371472 [7:32:04<21:10:33, 3.63it/s] 25%|██▌ | 94700/371472 [7:32:05<20:03:45, 3.83it/s] {'loss': 3.604, 'learning_rate': 7.70922665862225e-07, 'epoch': 4.08} + 25%|██▌ | 94700/371472 [7:32:05<20:03:45, 3.83it/s] 25%|██▌ | 94701/371472 [7:32:05<22:43:57, 3.38it/s] 25%|██▌ | 94702/371472 [7:32:05<22:08:13, 3.47it/s] 25%|██▌ | 94703/371472 [7:32:06<21:50:31, 3.52it/s] 25%|██▌ | 94704/371472 [7:32:06<21:56:48, 3.50it/s] 25%|██▌ | 94705/371472 [7:32:06<21:15:01, 3.62it/s] 25%|██▌ | 94706/371472 [7:32:06<20:28:16, 3.76it/s] 25%|██▌ | 94707/371472 [7:32:07<21:07:00, 3.64it/s] 25%|██▌ | 94708/371472 [7:32:07<20:45:08, 3.70it/s] 25%|██▌ | 94709/371472 [7:32:07<20:04:33, 3.83it/s] 25%|██▌ | 94710/371472 [7:32:07<19:54:34, 3.86it/s] 25%|██▌ | 94711/371472 [7:32:08<20:19:34, 3.78it/s] 25%|██▌ | 94712/371472 [7:32:08<20:56:58, 3.67it/s] 25%|██▌ | 94713/371472 [7:32:08<22:25:35, 3.43it/s] 25%|██▌ | 94714/371472 [7:32:09<21:24:52, 3.59it/s] 25%|██▌ | 94715/371472 [7:32:09<21:27:38, 3.58it/s] 25%|██▌ | 94716/371472 [7:32:09<21:05:44, 3.64it/s] 25%|██▌ | 94717/371472 [7:32:09<20:51:59, 3.68it/s] 25%|██▌ | 94718/371472 [7:32:10<21:16:55, 3.61it/s] 25%|██▌ | 94719/371472 [7:32:10<20:34:18, 3.74it/s] 25%|██▌ | 94720/371472 [7:32:10<20:21:34, 3.78it/s] {'loss': 3.7207, 'learning_rate': 7.708741838867461e-07, 'epoch': 4.08} + 25%|██▌ | 94720/371472 [7:32:10<20:21:34, 3.78it/s] 25%|██▌ | 94721/371472 [7:32:10<20:19:57, 3.78it/s] 25%|██▌ | 94722/371472 [7:32:11<21:45:03, 3.53it/s] 25%|██▌ | 94723/371472 [7:32:11<21:58:36, 3.50it/s] 25%|██▌ | 94724/371472 [7:32:11<22:45:55, 3.38it/s] 25%|██▌ | 94725/371472 [7:32:12<21:43:53, 3.54it/s] 26%|██▌ | 94726/371472 [7:32:12<21:09:20, 3.63it/s] 26%|██▌ | 94727/371472 [7:32:12<25:25:16, 3.02it/s] 26%|██▌ | 94728/371472 [7:32:13<23:44:41, 3.24it/s] 26%|██▌ | 94729/371472 [7:32:13<23:47:52, 3.23it/s] 26%|██▌ | 94730/371472 [7:32:13<23:19:06, 3.30it/s] 26%|██▌ | 94731/371472 [7:32:13<21:58:44, 3.50it/s] 26%|██▌ | 94732/371472 [7:32:14<21:58:26, 3.50it/s] 26%|██▌ | 94733/371472 [7:32:14<21:46:48, 3.53it/s] 26%|██▌ | 94734/371472 [7:32:14<21:38:40, 3.55it/s] 26%|██▌ | 94735/371472 [7:32:15<23:08:40, 3.32it/s] 26%|██▌ | 94736/371472 [7:32:15<22:27:03, 3.42it/s] 26%|██▌ | 94737/371472 [7:32:15<21:50:58, 3.52it/s] 26%|██▌ | 94738/371472 [7:32:16<22:25:57, 3.43it/s] 26%|██▌ | 94739/371472 [7:32:16<21:12:07, 3.63it/s] 26%|██▌ | 94740/371472 [7:32:16<21:01:32, 3.66it/s] {'loss': 3.6882, 'learning_rate': 7.708257019112672e-07, 'epoch': 4.08} + 26%|██▌ | 94740/371472 [7:32:16<21:01:32, 3.66it/s] 26%|██▌ | 94741/371472 [7:32:16<21:22:48, 3.60it/s] 26%|██▌ | 94742/371472 [7:32:17<20:32:11, 3.74it/s] 26%|██▌ | 94743/371472 [7:32:17<20:14:48, 3.80it/s] 26%|██▌ | 94744/371472 [7:32:17<21:30:05, 3.58it/s] 26%|██▌ | 94745/371472 [7:32:17<21:06:00, 3.64it/s] 26%|██▌ | 94746/371472 [7:32:18<21:00:47, 3.66it/s] 26%|██▌ | 94747/371472 [7:32:18<20:40:55, 3.72it/s] 26%|██▌ | 94748/371472 [7:32:18<20:29:22, 3.75it/s] 26%|██▌ | 94749/371472 [7:32:18<21:03:04, 3.65it/s] 26%|██▌ | 94750/371472 [7:32:19<20:22:07, 3.77it/s] 26%|██▌ | 94751/371472 [7:32:19<20:30:51, 3.75it/s] 26%|██▌ | 94752/371472 [7:32:19<21:56:51, 3.50it/s] 26%|██▌ | 94753/371472 [7:32:20<22:10:04, 3.47it/s] 26%|██▌ | 94754/371472 [7:32:20<23:10:55, 3.32it/s] 26%|██▌ | 94755/371472 [7:32:20<23:21:28, 3.29it/s] 26%|██▌ | 94756/371472 [7:32:21<22:18:18, 3.45it/s] 26%|██▌ | 94757/371472 [7:32:21<22:20:01, 3.44it/s] 26%|██▌ | 94758/371472 [7:32:21<23:01:16, 3.34it/s] 26%|██▌ | 94759/371472 [7:32:21<22:15:40, 3.45it/s] 26%|██▌ | 94760/371472 [7:32:22<21:09:08, 3.63it/s] {'loss': 3.7115, 'learning_rate': 7.707772199357883e-07, 'epoch': 4.08} + 26%|██▌ | 94760/371472 [7:32:22<21:09:08, 3.63it/s] 26%|██▌ | 94761/371472 [7:32:22<21:00:33, 3.66it/s] 26%|██▌ | 94762/371472 [7:32:22<20:51:17, 3.69it/s] 26%|██▌ | 94763/371472 [7:32:22<20:32:10, 3.74it/s] 26%|██▌ | 94764/371472 [7:32:23<20:28:53, 3.75it/s] 26%|██▌ | 94765/371472 [7:32:23<20:44:35, 3.71it/s] 26%|██▌ | 94766/371472 [7:32:23<20:08:15, 3.82it/s] 26%|██▌ | 94767/371472 [7:32:23<20:23:27, 3.77it/s] 26%|██▌ | 94768/371472 [7:32:24<22:12:35, 3.46it/s] 26%|██▌ | 94769/371472 [7:32:24<22:53:47, 3.36it/s] 26%|██▌ | 94770/371472 [7:32:24<22:11:24, 3.46it/s] 26%|██▌ | 94771/371472 [7:32:25<21:57:27, 3.50it/s] 26%|██▌ | 94772/371472 [7:32:25<21:16:05, 3.61it/s] 26%|██▌ | 94773/371472 [7:32:25<22:08:02, 3.47it/s] 26%|██▌ | 94774/371472 [7:32:26<21:20:45, 3.60it/s] 26%|██▌ | 94775/371472 [7:32:26<21:13:55, 3.62it/s] 26%|██▌ | 94776/371472 [7:32:26<20:43:31, 3.71it/s] 26%|██▌ | 94777/371472 [7:32:26<20:30:18, 3.75it/s] 26%|██▌ | 94778/371472 [7:32:27<19:56:05, 3.86it/s] 26%|██▌ | 94779/371472 [7:32:27<19:44:19, 3.89it/s] 26%|██▌ | 94780/371472 [7:32:27<19:39:51, 3.91it/s] {'loss': 3.607, 'learning_rate': 7.707287379603095e-07, 'epoch': 4.08} + 26%|██▌ | 94780/371472 [7:32:27<19:39:51, 3.91it/s] 26%|██▌ | 94781/371472 [7:32:27<19:29:29, 3.94it/s] 26%|██▌ | 94782/371472 [7:32:28<19:44:30, 3.89it/s] 26%|██▌ | 94783/371472 [7:32:28<20:35:07, 3.73it/s] 26%|██▌ | 94784/371472 [7:32:28<20:26:36, 3.76it/s] 26%|██▌ | 94785/371472 [7:32:28<21:05:11, 3.64it/s] 26%|██▌ | 94786/371472 [7:32:29<21:41:14, 3.54it/s] 26%|██▌ | 94787/371472 [7:32:29<20:58:52, 3.66it/s] 26%|██▌ | 94788/371472 [7:32:29<21:23:15, 3.59it/s] 26%|██▌ | 94789/371472 [7:32:29<20:31:11, 3.75it/s] 26%|██▌ | 94790/371472 [7:32:30<21:15:43, 3.61it/s] 26%|██▌ | 94791/371472 [7:32:30<20:47:19, 3.70it/s] 26%|██▌ | 94792/371472 [7:32:30<22:06:49, 3.48it/s] 26%|██▌ | 94793/371472 [7:32:31<21:33:59, 3.56it/s] 26%|██▌ | 94794/371472 [7:32:31<21:20:52, 3.60it/s] 26%|██▌ | 94795/371472 [7:32:31<21:22:08, 3.60it/s] 26%|██▌ | 94796/371472 [7:32:31<20:47:50, 3.70it/s] 26%|██▌ | 94797/371472 [7:32:32<20:31:31, 3.74it/s] 26%|██▌ | 94798/371472 [7:32:32<21:14:19, 3.62it/s] 26%|██▌ | 94799/371472 [7:32:32<20:39:00, 3.72it/s] 26%|██▌ | 94800/371472 [7:32:33<22:45:14, 3.38it/s] {'loss': 3.9763, 'learning_rate': 7.706802559848304e-07, 'epoch': 4.08} + 26%|██▌ | 94800/371472 [7:32:33<22:45:14, 3.38it/s] 26%|██▌ | 94801/371472 [7:32:33<22:45:24, 3.38it/s] 26%|██▌ | 94802/371472 [7:32:33<23:27:03, 3.28it/s] 26%|██▌ | 94803/371472 [7:32:34<24:23:20, 3.15it/s] 26%|██▌ | 94804/371472 [7:32:34<23:06:10, 3.33it/s] 26%|██▌ | 94805/371472 [7:32:34<21:36:20, 3.56it/s] 26%|██▌ | 94806/371472 [7:32:34<24:11:46, 3.18it/s] 26%|██▌ | 94807/371472 [7:32:35<23:32:17, 3.26it/s] 26%|██▌ | 94808/371472 [7:32:35<22:17:10, 3.45it/s] 26%|██▌ | 94809/371472 [7:32:35<22:45:07, 3.38it/s] 26%|██▌ | 94810/371472 [7:32:36<21:57:27, 3.50it/s] 26%|██▌ | 94811/371472 [7:32:36<22:18:28, 3.44it/s] 26%|██▌ | 94812/371472 [7:32:36<21:29:11, 3.58it/s] 26%|██▌ | 94813/371472 [7:32:36<21:26:35, 3.58it/s] 26%|██▌ | 94814/371472 [7:32:37<20:28:23, 3.75it/s] 26%|██▌ | 94815/371472 [7:32:37<20:30:52, 3.75it/s] 26%|██▌ | 94816/371472 [7:32:37<20:22:17, 3.77it/s] 26%|██▌ | 94817/371472 [7:32:37<20:51:27, 3.68it/s] 26%|██▌ | 94818/371472 [7:32:38<20:08:33, 3.82it/s] 26%|██▌ | 94819/371472 [7:32:38<20:36:15, 3.73it/s] 26%|██▌ | 94820/371472 [7:32:38<21:24:47, 3.59it/s] {'loss': 3.7004, 'learning_rate': 7.706317740093515e-07, 'epoch': 4.08} + 26%|██▌ | 94820/371472 [7:32:38<21:24:47, 3.59it/s] 26%|██▌ | 94821/371472 [7:32:39<22:26:38, 3.42it/s] 26%|██▌ | 94822/371472 [7:32:39<22:08:06, 3.47it/s] 26%|██▌ | 94823/371472 [7:32:39<21:53:33, 3.51it/s] 26%|██▌ | 94824/371472 [7:32:39<21:26:52, 3.58it/s] 26%|██▌ | 94825/371472 [7:32:40<21:39:03, 3.55it/s] 26%|██▌ | 94826/371472 [7:32:40<21:23:37, 3.59it/s] 26%|██▌ | 94827/371472 [7:32:40<20:56:27, 3.67it/s] 26%|██▌ | 94828/371472 [7:32:41<20:38:31, 3.72it/s] 26%|██▌ | 94829/371472 [7:32:41<20:28:32, 3.75it/s] 26%|██▌ | 94830/371472 [7:32:41<20:52:46, 3.68it/s] 26%|██▌ | 94831/371472 [7:32:41<21:20:58, 3.60it/s] 26%|██▌ | 94832/371472 [7:32:42<22:15:05, 3.45it/s] 26%|██▌ | 94833/371472 [7:32:42<21:32:40, 3.57it/s] 26%|██▌ | 94834/371472 [7:32:42<21:30:51, 3.57it/s] 26%|██▌ | 94835/371472 [7:32:42<21:30:13, 3.57it/s] 26%|██▌ | 94836/371472 [7:32:43<21:19:00, 3.60it/s] 26%|██▌ | 94837/371472 [7:32:43<21:03:31, 3.65it/s] 26%|██▌ | 94838/371472 [7:32:43<21:50:40, 3.52it/s] 26%|██▌ | 94839/371472 [7:32:44<21:39:55, 3.55it/s] 26%|██▌ | 94840/371472 [7:32:44<20:46:07, 3.70it/s] {'loss': 3.6568, 'learning_rate': 7.705832920338727e-07, 'epoch': 4.08} + 26%|██▌ | 94840/371472 [7:32:44<20:46:07, 3.70it/s] 26%|██▌ | 94841/371472 [7:32:44<20:02:51, 3.83it/s] 26%|██▌ | 94842/371472 [7:32:44<20:30:22, 3.75it/s] 26%|██▌ | 94843/371472 [7:32:45<20:29:52, 3.75it/s] 26%|██▌ | 94844/371472 [7:32:45<22:27:12, 3.42it/s] 26%|██▌ | 94845/371472 [7:32:45<22:51:43, 3.36it/s] 26%|██▌ | 94846/371472 [7:32:46<21:57:43, 3.50it/s] 26%|██▌ | 94847/371472 [7:32:46<21:39:47, 3.55it/s] 26%|██▌ | 94848/371472 [7:32:46<21:28:56, 3.58it/s] 26%|██▌ | 94849/371472 [7:32:46<20:54:09, 3.68it/s] 26%|██▌ | 94850/371472 [7:32:47<20:05:52, 3.82it/s] 26%|██▌ | 94851/371472 [7:32:47<21:26:57, 3.58it/s] 26%|██▌ | 94852/371472 [7:32:47<20:53:05, 3.68it/s] 26%|██▌ | 94853/371472 [7:32:47<21:17:49, 3.61it/s] 26%|██▌ | 94854/371472 [7:32:48<21:07:19, 3.64it/s] 26%|��█▌ | 94855/371472 [7:32:48<21:58:20, 3.50it/s] 26%|██▌ | 94856/371472 [7:32:48<22:12:34, 3.46it/s] 26%|██▌ | 94857/371472 [7:32:49<24:34:38, 3.13it/s] 26%|██▌ | 94858/371472 [7:32:49<25:52:57, 2.97it/s] 26%|██▌ | 94859/371472 [7:32:49<25:15:03, 3.04it/s] 26%|██▌ | 94860/371472 [7:32:50<23:50:43, 3.22it/s] {'loss': 3.7294, 'learning_rate': 7.705348100583939e-07, 'epoch': 4.09} + 26%|██▌ | 94860/371472 [7:32:50<23:50:43, 3.22it/s] 26%|██▌ | 94861/371472 [7:32:50<22:02:46, 3.49it/s] 26%|██▌ | 94862/371472 [7:32:50<22:04:05, 3.48it/s] 26%|██▌ | 94863/371472 [7:32:50<21:42:44, 3.54it/s] 26%|██▌ | 94864/371472 [7:32:51<22:47:58, 3.37it/s] 26%|██▌ | 94865/371472 [7:32:51<23:37:07, 3.25it/s] 26%|██▌ | 94866/371472 [7:32:51<22:18:27, 3.44it/s] 26%|██▌ | 94867/371472 [7:32:52<21:37:01, 3.55it/s] 26%|██▌ | 94868/371472 [7:32:52<21:20:18, 3.60it/s] 26%|██▌ | 94869/371472 [7:32:52<20:27:20, 3.76it/s] 26%|██▌ | 94870/371472 [7:32:52<20:52:11, 3.68it/s] 26%|██▌ | 94871/371472 [7:32:53<20:01:02, 3.84it/s] 26%|██▌ | 94872/371472 [7:32:53<19:53:12, 3.86it/s] 26%|██▌ | 94873/371472 [7:32:53<20:04:56, 3.83it/s] 26%|██▌ | 94874/371472 [7:32:53<20:41:25, 3.71it/s] 26%|██▌ | 94875/371472 [7:32:54<21:19:41, 3.60it/s] 26%|██▌ | 94876/371472 [7:32:54<22:08:45, 3.47it/s] 26%|██▌ | 94877/371472 [7:32:54<24:27:50, 3.14it/s] 26%|██▌ | 94878/371472 [7:32:55<22:50:19, 3.36it/s] 26%|██▌ | 94879/371472 [7:32:55<21:47:33, 3.53it/s] 26%|██▌ | 94880/371472 [7:32:55<22:17:00, 3.45it/s] {'loss': 3.6133, 'learning_rate': 7.704863280829149e-07, 'epoch': 4.09} + 26%|██▌ | 94880/371472 [7:32:55<22:17:00, 3.45it/s] 26%|██▌ | 94881/371472 [7:32:56<21:51:47, 3.51it/s] 26%|██▌ | 94882/371472 [7:32:56<21:29:13, 3.58it/s] 26%|██▌ | 94883/371472 [7:32:56<21:41:34, 3.54it/s] 26%|██▌ | 94884/371472 [7:32:56<20:51:17, 3.68it/s] 26%|██▌ | 94885/371472 [7:32:57<21:10:24, 3.63it/s] 26%|██▌ | 94886/371472 [7:32:57<22:34:31, 3.40it/s] 26%|██▌ | 94887/371472 [7:32:57<21:41:36, 3.54it/s] 26%|██▌ | 94888/371472 [7:32:58<21:21:15, 3.60it/s] 26%|██▌ | 94889/371472 [7:32:58<20:30:11, 3.75it/s] 26%|██▌ | 94890/371472 [7:32:58<20:45:37, 3.70it/s] 26%|██▌ | 94891/371472 [7:32:58<21:04:48, 3.64it/s] 26%|██▌ | 94892/371472 [7:32:59<20:43:43, 3.71it/s] 26%|██▌ | 94893/371472 [7:32:59<20:21:36, 3.77it/s] 26%|██▌ | 94894/371472 [7:32:59<20:27:44, 3.75it/s] 26%|██▌ | 94895/371472 [7:32:59<20:53:40, 3.68it/s] 26%|██▌ | 94896/371472 [7:33:00<20:12:27, 3.80it/s] 26%|██▌ | 94897/371472 [7:33:00<19:30:18, 3.94it/s] 26%|██▌ | 94898/371472 [7:33:00<19:29:39, 3.94it/s] 26%|██▌ | 94899/371472 [7:33:00<19:58:00, 3.85it/s] 26%|██▌ | 94900/371472 [7:33:01<20:04:32, 3.83it/s] {'loss': 3.5222, 'learning_rate': 7.70437846107436e-07, 'epoch': 4.09} + 26%|██▌ | 94900/371472 [7:33:01<20:04:32, 3.83it/s] 26%|██▌ | 94901/371472 [7:33:01<19:46:56, 3.88it/s] 26%|██▌ | 94902/371472 [7:33:01<19:48:50, 3.88it/s] 26%|██▌ | 94903/371472 [7:33:01<19:44:33, 3.89it/s] 26%|██▌ | 94904/371472 [7:33:02<20:24:55, 3.76it/s] 26%|██▌ | 94905/371472 [7:33:02<19:48:46, 3.88it/s] 26%|██▌ | 94906/371472 [7:33:02<19:47:11, 3.88it/s] 26%|██▌ | 94907/371472 [7:33:03<21:33:12, 3.56it/s] 26%|██▌ | 94908/371472 [7:33:03<20:57:19, 3.67it/s] 26%|██▌ | 94909/371472 [7:33:03<20:20:27, 3.78it/s] 26%|██▌ | 94910/371472 [7:33:03<20:32:56, 3.74it/s] 26%|██▌ | 94911/371472 [7:33:04<20:21:41, 3.77it/s] 26%|██▌ | 94912/371472 [7:33:04<20:44:10, 3.70it/s] 26%|██▌ | 94913/371472 [7:33:04<21:11:41, 3.62it/s] 26%|██▌ | 94914/371472 [7:33:04<22:36:19, 3.40it/s] 26%|██▌ | 94915/371472 [7:33:05<23:09:35, 3.32it/s] 26%|██▌ | 94916/371472 [7:33:05<24:05:12, 3.19it/s] 26%|██▌ | 94917/371472 [7:33:05<23:52:41, 3.22it/s] 26%|██▌ | 94918/371472 [7:33:06<22:06:45, 3.47it/s] 26%|██▌ | 94919/371472 [7:33:06<22:36:49, 3.40it/s] 26%|██▌ | 94920/371472 [7:33:06<22:07:44, 3.47it/s] {'loss': 3.8665, 'learning_rate': 7.703893641319572e-07, 'epoch': 4.09} + 26%|██▌ | 94920/371472 [7:33:06<22:07:44, 3.47it/s] 26%|██▌ | 94921/371472 [7:33:07<21:33:43, 3.56it/s] 26%|██▌ | 94922/371472 [7:33:07<23:27:26, 3.27it/s] 26%|██▌ | 94923/371472 [7:33:07<22:31:31, 3.41it/s] 26%|██▌ | 94924/371472 [7:33:07<21:46:38, 3.53it/s] 26%|██▌ | 94925/371472 [7:33:08<21:35:32, 3.56it/s] 26%|██▌ | 94926/371472 [7:33:08<20:58:26, 3.66it/s] 26%|██▌ | 94927/371472 [7:33:08<23:24:07, 3.28it/s] 26%|██▌ | 94928/371472 [7:33:09<23:10:22, 3.31it/s] 26%|██▌ | 94929/371472 [7:33:09<21:40:21, 3.54it/s] 26%|██▌ | 94930/371472 [7:33:09<20:48:25, 3.69it/s] 26%|██▌ | 94931/371472 [7:33:09<22:31:44, 3.41it/s] 26%|██▌ | 94932/371472 [7:33:10<21:23:39, 3.59it/s] 26%|██▌ | 94933/371472 [7:33:10<20:37:54, 3.72it/s] 26%|██▌ | 94934/371472 [7:33:10<19:58:26, 3.85it/s] 26%|██▌ | 94935/371472 [7:33:10<20:15:19, 3.79it/s] 26%|██▌ | 94936/371472 [7:33:11<20:04:09, 3.83it/s] 26%|██▌ | 94937/371472 [7:33:11<20:20:15, 3.78it/s] 26%|██▌ | 94938/371472 [7:33:11<21:56:00, 3.50it/s] 26%|██▌ | 94939/371472 [7:33:12<21:39:52, 3.55it/s] 26%|██▌ | 94940/371472 [7:33:12<21:38:40, 3.55it/s] {'loss': 3.7331, 'learning_rate': 7.703408821564781e-07, 'epoch': 4.09} + 26%|██▌ | 94940/371472 [7:33:12<21:38:40, 3.55it/s] 26%|██▌ | 94941/371472 [7:33:12<22:25:44, 3.42it/s] 26%|██▌ | 94942/371472 [7:33:12<21:49:58, 3.52it/s] 26%|██▌ | 94943/371472 [7:33:13<21:30:15, 3.57it/s] 26%|██▌ | 94944/371472 [7:33:13<23:11:04, 3.31it/s] 26%|██▌ | 94945/371472 [7:33:13<21:52:49, 3.51it/s] 26%|██▌ | 94946/371472 [7:33:14<21:07:54, 3.63it/s] 26%|██▌ | 94947/371472 [7:33:14<22:18:50, 3.44it/s] 26%|██▌ | 94948/371472 [7:33:14<21:13:18, 3.62it/s] 26%|██▌ | 94949/371472 [7:33:14<20:26:24, 3.76it/s] 26%|██▌ | 94950/371472 [7:33:15<20:15:09, 3.79it/s] 26%|██▌ | 94951/371472 [7:33:15<19:47:10, 3.88it/s] 26%|██▌ | 94952/371472 [7:33:15<19:49:00, 3.88it/s] 26%|██▌ | 94953/371472 [7:33:15<21:12:47, 3.62it/s] 26%|██▌ | 94954/371472 [7:33:16<20:36:27, 3.73it/s] 26%|██▌ | 94955/371472 [7:33:16<22:16:53, 3.45it/s] 26%|██▌ | 94956/371472 [7:33:16<22:40:36, 3.39it/s] 26%|██▌ | 94957/371472 [7:33:17<23:57:29, 3.21it/s] 26%|██▌ | 94958/371472 [7:33:17<22:55:46, 3.35it/s] 26%|██▌ | 94959/371472 [7:33:17<21:33:39, 3.56it/s] 26%|██▌ | 94960/371472 [7:33:17<20:50:13, 3.69it/s] {'loss': 3.6031, 'learning_rate': 7.702924001809993e-07, 'epoch': 4.09} + 26%|██▌ | 94960/371472 [7:33:17<20:50:13, 3.69it/s] 26%|██▌ | 94961/371472 [7:33:18<23:49:34, 3.22it/s] 26%|██▌ | 94962/371472 [7:33:18<22:56:19, 3.35it/s] 26%|██▌ | 94963/371472 [7:33:18<21:58:04, 3.50it/s] 26%|██▌ | 94964/371472 [7:33:19<21:37:27, 3.55it/s] 26%|██▌ | 94965/371472 [7:33:19<21:56:08, 3.50it/s] 26%|██▌ | 94966/371472 [7:33:19<22:22:06, 3.43it/s] 26%|██▌ | 94967/371472 [7:33:20<22:46:27, 3.37it/s] 26%|██▌ | 94968/371472 [7:33:20<22:50:01, 3.36it/s] 26%|██▌ | 94969/371472 [7:33:20<23:07:15, 3.32it/s] 26%|██▌ | 94970/371472 [7:33:20<21:48:20, 3.52it/s] 26%|██▌ | 94971/371472 [7:33:21<21:57:54, 3.50it/s] 26%|██▌ | 94972/371472 [7:33:21<23:35:44, 3.26it/s] 26%|██▌ | 94973/371472 [7:33:21<22:43:58, 3.38it/s] 26%|██▌ | 94974/371472 [7:33:22<21:51:59, 3.51it/s] 26%|██▌ | 94975/371472 [7:33:22<21:23:07, 3.59it/s] 26%|██▌ | 94976/371472 [7:33:22<21:29:35, 3.57it/s] 26%|██▌ | 94977/371472 [7:33:23<25:06:14, 3.06it/s] 26%|██▌ | 94978/371472 [7:33:23<24:07:30, 3.18it/s] 26%|██▌ | 94979/371472 [7:33:23<23:22:15, 3.29it/s] 26%|██▌ | 94980/371472 [7:33:23<22:59:00, 3.34it/s] {'loss': 3.397, 'learning_rate': 7.702439182055204e-07, 'epoch': 4.09} + 26%|██▌ | 94980/371472 [7:33:23<22:59:00, 3.34it/s] 26%|██▌ | 94981/371472 [7:33:24<21:48:47, 3.52it/s] 26%|██▌ | 94982/371472 [7:33:24<21:16:14, 3.61it/s] 26%|██▌ | 94983/371472 [7:33:24<20:52:39, 3.68it/s] 26%|██▌ | 94984/371472 [7:33:24<20:39:52, 3.72it/s] 26%|██▌ | 94985/371472 [7:33:25<21:34:04, 3.56it/s] 26%|██▌ | 94986/371472 [7:33:25<20:27:14, 3.75it/s] 26%|██▌ | 94987/371472 [7:33:25<20:52:37, 3.68it/s] 26%|██▌ | 94988/371472 [7:33:26<20:55:27, 3.67it/s] 26%|██▌ | 94989/371472 [7:33:26<20:52:04, 3.68it/s] 26%|██▌ | 94990/371472 [7:33:26<20:11:05, 3.80it/s] 26%|██▌ | 94991/371472 [7:33:26<20:08:16, 3.81it/s] 26%|██▌ | 94992/371472 [7:33:27<19:39:00, 3.91it/s] 26%|██▌ | 94993/371472 [7:33:27<19:58:37, 3.84it/s] 26%|██▌ | 94994/371472 [7:33:27<20:53:14, 3.68it/s] 26%|██▌ | 94995/371472 [7:33:27<20:35:39, 3.73it/s] 26%|██▌ | 94996/371472 [7:33:28<20:31:25, 3.74it/s] 26%|██▌ | 94997/371472 [7:33:28<20:15:31, 3.79it/s] 26%|██▌ | 94998/371472 [7:33:28<20:16:43, 3.79it/s] 26%|██▌ | 94999/371472 [7:33:28<21:09:15, 3.63it/s] 26%|██▌ | 95000/371472 [7:33:29<21:07:03, 3.64it/s] {'loss': 3.9264, 'learning_rate': 7.701954362300416e-07, 'epoch': 4.09} + 26%|██▌ | 95000/371472 [7:33:29<21:07:03, 3.64it/s] 26%|██▌ | 95001/371472 [7:33:29<22:16:09, 3.45it/s] 26%|██▌ | 95002/371472 [7:33:29<21:31:08, 3.57it/s] 26%|██▌ | 95003/371472 [7:33:30<20:57:30, 3.66it/s] 26%|██▌ | 95004/371472 [7:33:30<20:24:56, 3.76it/s] 26%|██▌ | 95005/371472 [7:33:30<20:34:14, 3.73it/s] 26%|██▌ | 95006/371472 [7:33:30<20:08:19, 3.81it/s] 26%|██▌ | 95007/371472 [7:33:31<19:54:56, 3.86it/s] 26%|██▌ | 95008/371472 [7:33:31<20:58:24, 3.66it/s] 26%|██▌ | 95009/371472 [7:33:31<20:45:54, 3.70it/s] 26%|██▌ | 95010/371472 [7:33:31<20:21:27, 3.77it/s] 26%|██▌ | 95011/371472 [7:33:32<20:47:01, 3.69it/s] 26%|██▌ | 95012/371472 [7:33:32<20:13:07, 3.80it/s] 26%|██▌ | 95013/371472 [7:33:32<20:14:34, 3.79it/s] 26%|██▌ | 95014/371472 [7:33:32<19:58:24, 3.84it/s] 26%|██▌ | 95015/371472 [7:33:33<23:51:05, 3.22it/s] 26%|██▌ | 95016/371472 [7:33:33<24:17:04, 3.16it/s] 26%|██▌ | 95017/371472 [7:33:33<22:41:59, 3.38it/s] 26%|██▌ | 95018/371472 [7:33:34<22:50:23, 3.36it/s] 26%|██▌ | 95019/371472 [7:33:34<23:49:27, 3.22it/s] 26%|██▌ | 95020/371472 [7:33:34<21:54:46, 3.50it/s] {'loss': 3.569, 'learning_rate': 7.701469542545627e-07, 'epoch': 4.09} + 26%|██▌ | 95020/371472 [7:33:34<21:54:46, 3.50it/s] 26%|██▌ | 95021/371472 [7:33:35<22:47:30, 3.37it/s] 26%|██▌ | 95022/371472 [7:33:35<22:59:31, 3.34it/s] 26%|██▌ | 95023/371472 [7:33:35<22:54:42, 3.35it/s] 26%|██▌ | 95024/371472 [7:33:36<21:30:36, 3.57it/s] 26%|██▌ | 95025/371472 [7:33:36<22:17:38, 3.44it/s] 26%|██▌ | 95026/371472 [7:33:36<23:22:15, 3.29it/s] 26%|██▌ | 95027/371472 [7:33:36<22:29:00, 3.42it/s] 26%|██▌ | 95028/371472 [7:33:37<21:19:36, 3.60it/s] 26%|██▌ | 95029/371472 [7:33:37<22:01:32, 3.49it/s] 26%|██▌ | 95030/371472 [7:33:37<21:50:45, 3.52it/s] 26%|██▌ | 95031/371472 [7:33:37<20:40:44, 3.71it/s] 26%|██▌ | 95032/371472 [7:33:38<20:28:26, 3.75it/s] 26%|██▌ | 95033/371472 [7:33:38<21:12:07, 3.62it/s] 26%|██▌ | 95034/371472 [7:33:38<20:59:38, 3.66it/s] 26%|██▌ | 95035/371472 [7:33:39<20:26:10, 3.76it/s] 26%|██▌ | 95036/371472 [7:33:39<20:16:38, 3.79it/s] 26%|██▌ | 95037/371472 [7:33:39<20:10:27, 3.81it/s] 26%|██▌ | 95038/371472 [7:33:39<20:50:26, 3.68it/s] 26%|██▌ | 95039/371472 [7:33:40<21:09:38, 3.63it/s] 26%|██▌ | 95040/371472 [7:33:40<21:39:59, 3.54it/s] {'loss': 3.586, 'learning_rate': 7.700984722790838e-07, 'epoch': 4.09} + 26%|██▌ | 95040/371472 [7:33:40<21:39:59, 3.54it/s] 26%|██▌ | 95041/371472 [7:33:40<21:40:45, 3.54it/s] 26%|██▌ | 95042/371472 [7:33:41<20:56:22, 3.67it/s] 26%|██▌ | 95043/371472 [7:33:41<21:19:09, 3.60it/s] 26%|██▌ | 95044/371472 [7:33:41<21:09:38, 3.63it/s] 26%|██▌ | 95045/371472 [7:33:41<20:47:20, 3.69it/s] 26%|██▌ | 95046/371472 [7:33:42<21:22:56, 3.59it/s] 26%|██▌ | 95047/371472 [7:33:42<20:51:30, 3.68it/s] 26%|██▌ | 95048/371472 [7:33:42<20:47:29, 3.69it/s] 26%|██▌ | 95049/371472 [7:33:42<20:45:58, 3.70it/s] 26%|██▌ | 95050/371472 [7:33:43<21:07:03, 3.64it/s] 26%|██▌ | 95051/371472 [7:33:43<21:05:10, 3.64it/s] 26%|██▌ | 95052/371472 [7:33:43<20:20:15, 3.78it/s] 26%|██▌ | 95053/371472 [7:33:43<20:26:44, 3.76it/s] 26%|██▌ | 95054/371472 [7:33:44<21:19:20, 3.60it/s] 26%|██▌ | 95055/371472 [7:33:44<21:08:28, 3.63it/s] 26%|██▌ | 95056/371472 [7:33:44<20:25:11, 3.76it/s] 26%|██▌ | 95057/371472 [7:33:45<20:49:54, 3.69it/s] 26%|██▌ | 95058/371472 [7:33:45<20:40:18, 3.71it/s] 26%|██▌ | 95059/371472 [7:33:45<21:05:36, 3.64it/s] 26%|██▌ | 95060/371472 [7:33:45<20:34:47, 3.73it/s] {'loss': 3.6438, 'learning_rate': 7.700499903036048e-07, 'epoch': 4.09} + 26%|██▌ | 95060/371472 [7:33:45<20:34:47, 3.73it/s] 26%|██▌ | 95061/371472 [7:33:46<22:08:18, 3.47it/s] 26%|██▌ | 95062/371472 [7:33:46<21:11:38, 3.62it/s] 26%|██▌ | 95063/371472 [7:33:46<21:05:13, 3.64it/s] 26%|██▌ | 95064/371472 [7:33:47<22:04:35, 3.48it/s] 26%|██▌ | 95065/371472 [7:33:47<21:49:46, 3.52it/s] 26%|██▌ | 95066/371472 [7:33:47<21:59:33, 3.49it/s] 26%|██▌ | 95067/371472 [7:33:47<22:10:17, 3.46it/s] 26%|██▌ | 95068/371472 [7:33:48<22:19:00, 3.44it/s] 26%|██▌ | 95069/371472 [7:33:48<21:27:47, 3.58it/s] 26%|██▌ | 95070/371472 [7:33:48<21:15:21, 3.61it/s] 26%|██▌ | 95071/371472 [7:33:48<20:47:09, 3.69it/s] 26%|██▌ | 95072/371472 [7:33:49<20:34:49, 3.73it/s] 26%|██▌ | 95073/371472 [7:33:49<21:40:15, 3.54it/s] 26%|██▌ | 95074/371472 [7:33:49<21:32:17, 3.56it/s] 26%|██▌ | 95075/371472 [7:33:50<21:24:51, 3.59it/s] 26%|██▌ | 95076/371472 [7:33:50<20:47:57, 3.69it/s] 26%|██▌ | 95077/371472 [7:33:50<20:39:47, 3.72it/s] 26%|██▌ | 95078/371472 [7:33:50<20:11:37, 3.80it/s] 26%|██▌ | 95079/371472 [7:33:51<21:35:51, 3.55it/s] 26%|██▌ | 95080/371472 [7:33:51<22:04:35, 3.48it/s] {'loss': 3.7133, 'learning_rate': 7.70001508328126e-07, 'epoch': 4.1} + 26%|██▌ | 95080/371472 [7:33:51<22:04:35, 3.48it/s] 26%|██▌ | 95081/371472 [7:33:51<21:54:25, 3.50it/s] 26%|██▌ | 95082/371472 [7:33:52<21:39:57, 3.54it/s] 26%|██▌ | 95083/371472 [7:33:52<21:07:28, 3.63it/s] 26%|██▌ | 95084/371472 [7:33:52<21:56:35, 3.50it/s] 26%|██▌ | 95085/371472 [7:33:52<22:25:13, 3.42it/s] 26%|██▌ | 95086/371472 [7:33:53<22:31:23, 3.41it/s] 26%|██▌ | 95087/371472 [7:33:53<21:22:22, 3.59it/s] 26%|██▌ | 95088/371472 [7:33:53<22:16:18, 3.45it/s] 26%|██▌ | 95089/371472 [7:33:54<22:55:32, 3.35it/s] 26%|██▌ | 95090/371472 [7:33:54<23:22:12, 3.29it/s] 26%|██▌ | 95091/371472 [7:33:54<24:02:40, 3.19it/s] 26%|██▌ | 95092/371472 [7:33:55<28:41:38, 2.68it/s] 26%|██▌ | 95093/371472 [7:33:55<25:55:45, 2.96it/s] 26%|██▌ | 95094/371472 [7:33:55<23:54:01, 3.21it/s] 26%|██▌ | 95095/371472 [7:33:56<22:47:33, 3.37it/s] 26%|██▌ | 95096/371472 [7:33:56<22:17:35, 3.44it/s] 26%|██▌ | 95097/371472 [7:33:56<21:29:48, 3.57it/s] 26%|██▌ | 95098/371472 [7:33:56<20:59:16, 3.66it/s] 26%|██▌ | 95099/371472 [7:33:57<26:55:30, 2.85it/s] 26%|██▌ | 95100/371472 [7:33:57<25:11:44, 3.05it/s] {'loss': 3.6876, 'learning_rate': 7.699530263526471e-07, 'epoch': 4.1} + 26%|██▌ | 95100/371472 [7:33:57<25:11:44, 3.05it/s] 26%|██▌ | 95101/371472 [7:33:57<23:59:24, 3.20it/s] 26%|██▌ | 95102/371472 [7:33:58<23:24:05, 3.28it/s] 26%|██▌ | 95103/371472 [7:33:58<22:13:43, 3.45it/s] 26%|██▌ | 95104/371472 [7:33:58<22:07:24, 3.47it/s] 26%|██▌ | 95105/371472 [7:33:59<21:14:00, 3.62it/s] 26%|██▌ | 95106/371472 [7:33:59<21:04:23, 3.64it/s] 26%|██▌ | 95107/371472 [7:33:59<20:29:46, 3.75it/s] 26%|██▌ | 95108/371472 [7:33:59<20:34:57, 3.73it/s] 26%|██▌ | 95109/371472 [7:34:00<20:11:43, 3.80it/s] 26%|██▌ | 95110/371472 [7:34:00<20:25:25, 3.76it/s] 26%|██▌ | 95111/371472 [7:34:00<21:55:13, 3.50it/s] 26%|██▌ | 95112/371472 [7:34:00<21:50:40, 3.51it/s] 26%|██▌ | 95113/371472 [7:34:01<21:31:43, 3.57it/s] 26%|██▌ | 95114/371472 [7:34:01<22:05:40, 3.47it/s] 26%|██▌ | 95115/371472 [7:34:01<21:44:46, 3.53it/s] 26%|██▌ | 95116/371472 [7:34:02<21:39:16, 3.55it/s] 26%|██▌ | 95117/371472 [7:34:02<21:08:16, 3.63it/s] 26%|██▌ | 95118/371472 [7:34:02<21:22:37, 3.59it/s] 26%|██▌ | 95119/371472 [7:34:02<23:46:47, 3.23it/s] 26%|██▌ | 95120/371472 [7:34:03<23:11:16, 3.31it/s] {'loss': 3.5328, 'learning_rate': 7.699045443771682e-07, 'epoch': 4.1} + 26%|██▌ | 95120/371472 [7:34:03<23:11:16, 3.31it/s] 26%|██▌ | 95121/371472 [7:34:03<22:50:01, 3.36it/s] 26%|██▌ | 95122/371472 [7:34:03<24:17:27, 3.16it/s] 26%|��█▌ | 95123/371472 [7:34:04<23:01:45, 3.33it/s] 26%|██▌ | 95124/371472 [7:34:04<22:49:10, 3.36it/s] 26%|██▌ | 95125/371472 [7:34:04<21:55:19, 3.50it/s] 26%|██▌ | 95126/371472 [7:34:05<21:47:37, 3.52it/s] 26%|██▌ | 95127/371472 [7:34:05<21:11:39, 3.62it/s] 26%|██▌ | 95128/371472 [7:34:05<20:49:57, 3.68it/s] 26%|██▌ | 95129/371472 [7:34:05<20:37:44, 3.72it/s] 26%|██▌ | 95130/371472 [7:34:06<21:18:21, 3.60it/s] 26%|██▌ | 95131/371472 [7:34:06<21:49:56, 3.52it/s] 26%|██▌ | 95132/371472 [7:34:06<21:05:43, 3.64it/s] 26%|██▌ | 95133/371472 [7:34:06<20:51:06, 3.68it/s] 26%|██▌ | 95134/371472 [7:34:07<21:51:48, 3.51it/s] 26%|██▌ | 95135/371472 [7:34:07<21:58:47, 3.49it/s] 26%|██▌ | 95136/371472 [7:34:07<21:04:42, 3.64it/s] 26%|██▌ | 95137/371472 [7:34:08<20:50:29, 3.68it/s] 26%|██▌ | 95138/371472 [7:34:08<22:32:16, 3.41it/s] 26%|██▌ | 95139/371472 [7:34:08<21:58:35, 3.49it/s] 26%|██▌ | 95140/371472 [7:34:08<22:22:05, 3.43it/s] {'loss': 3.7075, 'learning_rate': 7.698560624016893e-07, 'epoch': 4.1} + 26%|██▌ | 95140/371472 [7:34:08<22:22:05, 3.43it/s] 26%|██▌ | 95141/371472 [7:34:09<22:35:39, 3.40it/s] 26%|██▌ | 95142/371472 [7:34:09<21:42:30, 3.54it/s] 26%|██▌ | 95143/371472 [7:34:09<20:46:02, 3.70it/s] 26%|██▌ | 95144/371472 [7:34:09<20:21:26, 3.77it/s] 26%|██▌ | 95145/371472 [7:34:10<21:19:31, 3.60it/s] 26%|██▌ | 95146/371472 [7:34:10<21:59:10, 3.49it/s] 26%|██▌ | 95147/371472 [7:34:10<21:26:40, 3.58it/s] 26%|██▌ | 95148/371472 [7:34:11<22:03:50, 3.48it/s] 26%|██▌ | 95149/371472 [7:34:11<21:08:09, 3.63it/s] 26%|██▌ | 95150/371472 [7:34:11<21:16:28, 3.61it/s] 26%|██▌ | 95151/371472 [7:34:12<22:35:57, 3.40it/s] 26%|██▌ | 95152/371472 [7:34:12<22:28:12, 3.42it/s] 26%|██▌ | 95153/371472 [7:34:12<21:24:05, 3.59it/s] 26%|██▌ | 95154/371472 [7:34:12<22:19:58, 3.44it/s] 26%|██▌ | 95155/371472 [7:34:13<21:28:45, 3.57it/s] 26%|██▌ | 95156/371472 [7:34:13<21:26:18, 3.58it/s] 26%|██▌ | 95157/371472 [7:34:13<21:23:35, 3.59it/s] 26%|██▌ | 95158/371472 [7:34:13<21:07:54, 3.63it/s] 26%|██▌ | 95159/371472 [7:34:14<20:56:58, 3.66it/s] 26%|██▌ | 95160/371472 [7:34:14<21:17:41, 3.60it/s] {'loss': 3.7001, 'learning_rate': 7.698075804262105e-07, 'epoch': 4.1} + 26%|██▌ | 95160/371472 [7:34:14<21:17:41, 3.60it/s] 26%|██▌ | 95161/371472 [7:34:14<22:13:09, 3.45it/s] 26%|██▌ | 95162/371472 [7:34:15<24:46:27, 3.10it/s] 26%|██▌ | 95163/371472 [7:34:15<24:03:46, 3.19it/s] 26%|██▌ | 95164/371472 [7:34:15<23:46:00, 3.23it/s] 26%|██▌ | 95165/371472 [7:34:16<23:56:36, 3.21it/s] 26%|██▌ | 95166/371472 [7:34:16<24:07:30, 3.18it/s] 26%|██▌ | 95167/371472 [7:34:16<23:24:11, 3.28it/s] 26%|██▌ | 95168/371472 [7:34:17<23:59:18, 3.20it/s] 26%|██▌ | 95169/371472 [7:34:17<22:46:24, 3.37it/s] 26%|██▌ | 95170/371472 [7:34:17<22:25:33, 3.42it/s] 26%|██▌ | 95171/371472 [7:34:17<22:09:54, 3.46it/s] 26%|██▌ | 95172/371472 [7:34:18<21:12:32, 3.62it/s] 26%|██▌ | 95173/371472 [7:34:18<20:46:23, 3.69it/s] 26%|██▌ | 95174/371472 [7:34:18<22:18:05, 3.44it/s] 26%|██▌ | 95175/371472 [7:34:19<22:40:58, 3.38it/s] 26%|██▌ | 95176/371472 [7:34:19<23:49:25, 3.22it/s] 26%|██▌ | 95177/371472 [7:34:19<22:47:21, 3.37it/s] 26%|██▌ | 95178/371472 [7:34:19<22:19:51, 3.44it/s] 26%|██▌ | 95179/371472 [7:34:20<21:42:23, 3.54it/s] 26%|██▌ | 95180/371472 [7:34:20<20:46:46, 3.69it/s] {'loss': 3.6158, 'learning_rate': 7.697590984507315e-07, 'epoch': 4.1} + 26%|██▌ | 95180/371472 [7:34:20<20:46:46, 3.69it/s] 26%|██▌ | 95181/371472 [7:34:20<21:39:13, 3.54it/s] 26%|██▌ | 95182/371472 [7:34:21<22:17:39, 3.44it/s] 26%|██▌ | 95183/371472 [7:34:21<22:02:15, 3.48it/s] 26%|██▌ | 95184/371472 [7:34:21<21:11:51, 3.62it/s] 26%|██▌ | 95185/371472 [7:34:21<22:33:03, 3.40it/s] 26%|██▌ | 95186/371472 [7:34:22<22:39:45, 3.39it/s] 26%|██▌ | 95187/371472 [7:34:22<22:22:37, 3.43it/s] 26%|██▌ | 95188/371472 [7:34:22<22:14:59, 3.45it/s] 26%|██▌ | 95189/371472 [7:34:23<22:17:25, 3.44it/s] 26%|██▌ | 95190/371472 [7:34:23<23:39:36, 3.24it/s] 26%|██▌ | 95191/371472 [7:34:23<22:11:08, 3.46it/s] 26%|██▌ | 95192/371472 [7:34:23<21:53:48, 3.50it/s] 26%|██▌ | 95193/371472 [7:34:24<21:33:01, 3.56it/s] 26%|██▌ | 95194/371472 [7:34:24<21:16:29, 3.61it/s] 26%|██▌ | 95195/371472 [7:34:24<21:13:38, 3.62it/s] 26%|██▌ | 95196/371472 [7:34:25<22:17:28, 3.44it/s] 26%|██▌ | 95197/371472 [7:34:25<21:52:36, 3.51it/s] 26%|██▌ | 95198/371472 [7:34:25<21:20:32, 3.60it/s] 26%|██▌ | 95199/371472 [7:34:25<23:04:31, 3.33it/s] 26%|██▌ | 95200/371472 [7:34:26<21:53:24, 3.51it/s] {'loss': 3.6288, 'learning_rate': 7.697106164752526e-07, 'epoch': 4.1} + 26%|██▌ | 95200/371472 [7:34:26<21:53:24, 3.51it/s] 26%|██▌ | 95201/371472 [7:34:26<20:58:59, 3.66it/s] 26%|██▌ | 95202/371472 [7:34:26<22:15:45, 3.45it/s] 26%|██▌ | 95203/371472 [7:34:27<21:04:22, 3.64it/s] 26%|██▌ | 95204/371472 [7:34:27<21:04:18, 3.64it/s] 26%|██▌ | 95205/371472 [7:34:27<21:15:35, 3.61it/s] 26%|██▌ | 95206/371472 [7:34:27<20:32:15, 3.74it/s] 26%|██▌ | 95207/371472 [7:34:28<20:48:23, 3.69it/s] 26%|██▌ | 95208/371472 [7:34:28<20:02:27, 3.83it/s] 26%|██▌ | 95209/371472 [7:34:28<20:00:06, 3.84it/s] 26%|██▌ | 95210/371472 [7:34:28<20:13:50, 3.79it/s] 26%|██▌ | 95211/371472 [7:34:29<20:25:39, 3.76it/s] 26%|██▌ | 95212/371472 [7:34:29<20:08:36, 3.81it/s] 26%|██▌ | 95213/371472 [7:34:29<19:30:17, 3.93it/s] 26%|██▌ | 95214/371472 [7:34:29<19:19:12, 3.97it/s] 26%|██▌ | 95215/371472 [7:34:30<19:20:34, 3.97it/s] 26%|██▌ | 95216/371472 [7:34:30<20:16:26, 3.79it/s] 26%|██▌ | 95217/371472 [7:34:30<20:43:06, 3.70it/s] 26%|██▌ | 95218/371472 [7:34:31<20:32:52, 3.73it/s] 26%|██▌ | 95219/371472 [7:34:31<19:59:07, 3.84it/s] 26%|██▌ | 95220/371472 [7:34:31<20:11:18, 3.80it/s] {'loss': 3.6396, 'learning_rate': 7.696621344997737e-07, 'epoch': 4.1} + 26%|██▌ | 95220/371472 [7:34:31<20:11:18, 3.80it/s] 26%|██▌ | 95221/371472 [7:34:31<19:52:33, 3.86it/s] 26%|██▌ | 95222/371472 [7:34:32<20:42:04, 3.71it/s] 26%|██▌ | 95223/371472 [7:34:32<20:19:07, 3.78it/s] 26%|██▌ | 95224/371472 [7:34:32<20:44:56, 3.70it/s] 26%|██▌ | 95225/371472 [7:34:32<20:53:29, 3.67it/s] 26%|██▌ | 95226/371472 [7:34:33<19:54:11, 3.86it/s] 26%|██▌ | 95227/371472 [7:34:33<20:41:41, 3.71it/s] 26%|██▌ | 95228/371472 [7:34:33<20:54:30, 3.67it/s] 26%|██▌ | 95229/371472 [7:34:33<21:17:44, 3.60it/s] 26%|██▌ | 95230/371472 [7:34:34<20:40:49, 3.71it/s] 26%|██▌ | 95231/371472 [7:34:34<21:23:38, 3.59it/s] 26%|██▌ | 95232/371472 [7:34:34<21:43:23, 3.53it/s] 26%|██▌ | 95233/371472 [7:34:35<21:38:01, 3.55it/s] 26%|██▌ | 95234/371472 [7:34:35<20:39:10, 3.72it/s] 26%|██▌ | 95235/371472 [7:34:35<20:42:58, 3.70it/s] 26%|██▌ | 95236/371472 [7:34:35<21:22:27, 3.59it/s] 26%|██▌ | 95237/371472 [7:34:36<24:34:51, 3.12it/s] 26%|██▌ | 95238/371472 [7:34:36<25:01:53, 3.07it/s] 26%|██▌ | 95239/371472 [7:34:36<23:33:40, 3.26it/s] 26%|██▌ | 95240/371472 [7:34:37<22:41:09, 3.38it/s] {'loss': 3.8677, 'learning_rate': 7.696136525242948e-07, 'epoch': 4.1} + 26%|██▌ | 95240/371472 [7:34:37<22:41:09, 3.38it/s] 26%|██▌ | 95241/371472 [7:34:37<23:24:06, 3.28it/s] 26%|██▌ | 95242/371472 [7:34:37<23:05:16, 3.32it/s] 26%|██▌ | 95243/371472 [7:34:38<22:29:59, 3.41it/s] 26%|██▌ | 95244/371472 [7:34:38<21:57:02, 3.50it/s] 26%|██▌ | 95245/371472 [7:34:38<21:08:29, 3.63it/s] 26%|██▌ | 95246/371472 [7:34:38<21:53:33, 3.50it/s] 26%|██▌ | 95247/371472 [7:34:39<22:09:30, 3.46it/s] 26%|██▌ | 95248/371472 [7:34:39<22:03:57, 3.48it/s] 26%|██▌ | 95249/371472 [7:34:39<21:31:36, 3.56it/s] 26%|██▌ | 95250/371472 [7:34:40<21:54:16, 3.50it/s] 26%|██▌ | 95251/371472 [7:34:40<22:25:20, 3.42it/s] 26%|██▌ | 95252/371472 [7:34:40<22:37:53, 3.39it/s] 26%|██▌ | 95253/371472 [7:34:40<21:47:03, 3.52it/s] 26%|██▌ | 95254/371472 [7:34:41<22:02:20, 3.48it/s] 26%|██▌ | 95255/371472 [7:34:41<21:13:28, 3.62it/s] 26%|██▌ | 95256/371472 [7:34:41<22:13:43, 3.45it/s] 26%|██▌ | 95257/371472 [7:34:42<23:14:44, 3.30it/s] 26%|██▌ | 95258/371472 [7:34:42<22:35:47, 3.40it/s] 26%|██▌ | 95259/371472 [7:34:42<24:11:23, 3.17it/s] 26%|██▌ | 95260/371472 [7:34:43<23:50:32, 3.22it/s] {'loss': 3.4926, 'learning_rate': 7.69565170548816e-07, 'epoch': 4.1} + 26%|██▌ | 95260/371472 [7:34:43<23:50:32, 3.22it/s] 26%|██▌ | 95261/371472 [7:34:43<22:09:32, 3.46it/s] 26%|██▌ | 95262/371472 [7:34:43<21:52:39, 3.51it/s] 26%|██▌ | 95263/371472 [7:34:43<21:30:05, 3.57it/s] 26%|██▌ | 95264/371472 [7:34:44<20:39:13, 3.71it/s] 26%|██▌ | 95265/371472 [7:34:44<22:08:02, 3.47it/s] 26%|██▌ | 95266/371472 [7:34:44<22:06:33, 3.47it/s] 26%|██▌ | 95267/371472 [7:34:44<20:59:35, 3.65it/s] 26%|██▌ | 95268/371472 [7:34:45<20:01:11, 3.83it/s] 26%|██▌ | 95269/371472 [7:34:45<20:56:18, 3.66it/s] 26%|██▌ | 95270/371472 [7:34:45<20:25:48, 3.76it/s] 26%|██▌ | 95271/371472 [7:34:46<20:56:45, 3.66it/s] 26%|██▌ | 95272/371472 [7:34:46<20:27:47, 3.75it/s] 26%|██▌ | 95273/371472 [7:34:46<21:34:41, 3.56it/s] 26%|██▌ | 95274/371472 [7:34:46<20:43:52, 3.70it/s] 26%|██▌ | 95275/371472 [7:34:47<20:37:06, 3.72it/s] 26%|██▌ | 95276/371472 [7:34:47<20:35:45, 3.73it/s] 26%|██▌ | 95277/371472 [7:34:47<21:05:13, 3.64it/s] 26%|██▌ | 95278/371472 [7:34:47<22:13:20, 3.45it/s] 26%|██▌ | 95279/371472 [7:34:48<21:25:44, 3.58it/s] 26%|██▌ | 95280/371472 [7:34:48<20:29:02, 3.75it/s] {'loss': 3.6808, 'learning_rate': 7.695166885733371e-07, 'epoch': 4.1} + 26%|██▌ | 95280/371472 [7:34:48<20:29:02, 3.75it/s] 26%|██▌ | 95281/371472 [7:34:48<20:56:15, 3.66it/s] 26%|██▌ | 95282/371472 [7:34:49<21:06:44, 3.63it/s] 26%|██▌ | 95283/371472 [7:34:49<21:56:29, 3.50it/s] 26%|██▌ | 95284/371472 [7:34:49<21:34:07, 3.56it/s] 26%|██▌ | 95285/371472 [7:34:49<22:07:57, 3.47it/s] 26%|██▌ | 95286/371472 [7:34:50<21:10:32, 3.62it/s] 26%|██▌ | 95287/371472 [7:34:50<20:41:53, 3.71it/s] 26%|██▌ | 95288/371472 [7:34:50<20:18:00, 3.78it/s] 26%|██▌ | 95289/371472 [7:34:50<20:19:46, 3.77it/s] 26%|██▌ | 95290/371472 [7:34:51<20:32:52, 3.73it/s] 26%|██▌ | 95291/371472 [7:34:51<20:43:44, 3.70it/s] 26%|██▌ | 95292/371472 [7:34:51<20:05:34, 3.82it/s] 26%|██▌ | 95293/371472 [7:34:52<22:01:45, 3.48it/s] 26%|██▌ | 95294/371472 [7:34:52<21:04:48, 3.64it/s] 26%|██▌ | 95295/371472 [7:34:52<20:33:43, 3.73it/s] 26%|██▌ | 95296/371472 [7:34:52<21:17:24, 3.60it/s] 26%|██▌ | 95297/371472 [7:34:53<22:00:14, 3.49it/s] 26%|██▌ | 95298/371472 [7:34:53<21:46:23, 3.52it/s] 26%|██▌ | 95299/371472 [7:34:53<24:12:58, 3.17it/s] 26%|██▌ | 95300/371472 [7:34:54<22:37:20, 3.39it/s] {'loss': 3.5624, 'learning_rate': 7.694682065978582e-07, 'epoch': 4.1} + 26%|██▌ | 95300/371472 [7:34:54<22:37:20, 3.39it/s] 26%|██▌ | 95301/371472 [7:34:54<22:22:42, 3.43it/s] 26%|██▌ | 95302/371472 [7:34:54<24:07:40, 3.18it/s] 26%|██▌ | 95303/371472 [7:34:55<23:26:48, 3.27it/s] 26%|██▌ | 95304/371472 [7:34:55<23:10:26, 3.31it/s] 26%|██▌ | 95305/371472 [7:34:55<22:45:09, 3.37it/s] 26%|██▌ | 95306/371472 [7:34:55<21:48:41, 3.52it/s] 26%|██▌ | 95307/371472 [7:34:56<23:34:38, 3.25it/s] 26%|██▌ | 95308/371472 [7:34:56<23:33:04, 3.26it/s] 26%|██▌ | 95309/371472 [7:34:56<22:11:33, 3.46it/s] 26%|██▌ | 95310/371472 [7:34:57<22:30:40, 3.41it/s] 26%|██▌ | 95311/371472 [7:34:57<21:36:53, 3.55it/s] 26%|██▌ | 95312/371472 [7:34:57<21:02:42, 3.65it/s] 26%|██▌ | 95313/371472 [7:34:57<22:40:48, 3.38it/s] 26%|██▌ | 95314/371472 [7:34:58<22:00:26, 3.49it/s] 26%|██▌ | 95315/371472 [7:34:58<21:23:29, 3.59it/s] 26%|██▌ | 95316/371472 [7:34:58<21:25:05, 3.58it/s] 26%|██▌ | 95317/371472 [7:34:58<20:36:01, 3.72it/s] 26%|██▌ | 95318/371472 [7:34:59<21:02:24, 3.65it/s] 26%|██▌ | 95319/371472 [7:34:59<21:02:53, 3.64it/s] 26%|██▌ | 95320/371472 [7:34:59<20:36:47, 3.72it/s] {'loss': 3.4057, 'learning_rate': 7.694197246223792e-07, 'epoch': 4.11} + 26%|██▌ | 95320/371472 [7:34:59<20:36:47, 3.72it/s] 26%|██▌ | 95321/371472 [7:35:00<20:28:30, 3.75it/s] 26%|██▌ | 95322/371472 [7:35:00<22:20:26, 3.43it/s] 26%|██▌ | 95323/371472 [7:35:00<21:10:18, 3.62it/s] 26%|██▌ | 95324/371472 [7:35:00<20:12:40, 3.80it/s] 26%|██▌ | 95325/371472 [7:35:01<19:48:06, 3.87it/s] 26%|██▌ | 95326/371472 [7:35:01<20:53:43, 3.67it/s] 26%|██▌ | 95327/371472 [7:35:01<20:31:32, 3.74it/s] 26%|██▌ | 95328/371472 [7:35:01<20:53:22, 3.67it/s] 26%|██▌ | 95329/371472 [7:35:02<19:58:52, 3.84it/s] 26%|██▌ | 95330/371472 [7:35:02<19:56:47, 3.85it/s] 26%|██▌ | 95331/371472 [7:35:02<21:06:52, 3.63it/s] 26%|██▌ | 95332/371472 [7:35:03<20:44:12, 3.70it/s] 26%|██▌ | 95333/371472 [7:35:03<20:31:30, 3.74it/s] 26%|██▌ | 95334/371472 [7:35:03<20:18:37, 3.78it/s] 26%|██▌ | 95335/371472 [7:35:03<20:12:32, 3.80it/s] 26%|██▌ | 95336/371472 [7:35:04<20:02:00, 3.83it/s] 26%|██▌ | 95337/371472 [7:35:04<20:42:24, 3.70it/s] 26%|██▌ | 95338/371472 [7:35:04<20:33:29, 3.73it/s] 26%|██▌ | 95339/371472 [7:35:04<20:31:22, 3.74it/s] 26%|██▌ | 95340/371472 [7:35:05<22:27:52, 3.41it/s] {'loss': 3.5641, 'learning_rate': 7.693712426469003e-07, 'epoch': 4.11} + 26%|██▌ | 95340/371472 [7:35:05<22:27:52, 3.41it/s] 26%|██▌ | 95341/371472 [7:35:05<21:55:28, 3.50it/s] 26%|██▌ | 95342/371472 [7:35:05<21:03:47, 3.64it/s] 26%|██▌ | 95343/371472 [7:35:06<22:17:34, 3.44it/s] 26%|██▌ | 95344/371472 [7:35:06<22:14:57, 3.45it/s] 26%|██▌ | 95345/371472 [7:35:06<22:27:01, 3.42it/s] 26%|██▌ | 95346/371472 [7:35:06<21:22:30, 3.59it/s] 26%|██▌ | 95347/371472 [7:35:07<20:53:06, 3.67it/s] 26%|██▌ | 95348/371472 [7:35:07<20:40:56, 3.71it/s] 26%|██▌ | 95349/371472 [7:35:07<20:14:49, 3.79it/s] 26%|██▌ | 95350/371472 [7:35:08<21:45:07, 3.53it/s] 26%|██▌ | 95351/371472 [7:35:08<20:37:18, 3.72it/s] 26%|██▌ | 95352/371472 [7:35:08<21:23:29, 3.59it/s] 26%|██▌ | 95353/371472 [7:35:08<21:31:34, 3.56it/s] 26%|██▌ | 95354/371472 [7:35:09<20:56:19, 3.66it/s] 26%|██▌ | 95355/371472 [7:35:09<20:35:23, 3.73it/s] 26%|██▌ | 95356/371472 [7:35:09<20:41:52, 3.71it/s] 26%|██▌ | 95357/371472 [7:35:09<20:18:22, 3.78it/s] 26%|██▌ | 95358/371472 [7:35:10<20:09:07, 3.81it/s] 26%|██▌ | 95359/371472 [7:35:10<19:50:24, 3.87it/s] 26%|██▌ | 95360/371472 [7:35:10<19:20:03, 3.97it/s] {'loss': 3.6686, 'learning_rate': 7.693227606714214e-07, 'epoch': 4.11} + 26%|██▌ | 95360/371472 [7:35:10<19:20:03, 3.97it/s] 26%|██▌ | 95361/371472 [7:35:10<19:04:26, 4.02it/s] 26%|██▌ | 95362/371472 [7:35:11<19:49:57, 3.87it/s] 26%|██▌ | 95363/371472 [7:35:11<21:45:15, 3.53it/s] 26%|██▌ | 95364/371472 [7:35:11<21:15:57, 3.61it/s] 26%|██▌ | 95365/371472 [7:35:12<23:10:13, 3.31it/s] 26%|██▌ | 95366/371472 [7:35:12<23:19:44, 3.29it/s] 26%|██▌ | 95367/371472 [7:35:12<23:32:31, 3.26it/s] 26%|██▌ | 95368/371472 [7:35:13<23:31:47, 3.26it/s] 26%|██▌ | 95369/371472 [7:35:13<23:50:30, 3.22it/s] 26%|██▌ | 95370/371472 [7:35:13<23:29:59, 3.26it/s] 26%|██▌ | 95371/371472 [7:35:13<22:47:29, 3.37it/s] 26%|██▌ | 95372/371472 [7:35:14<21:32:54, 3.56it/s] 26%|██▌ | 95373/371472 [7:35:14<21:20:16, 3.59it/s] 26%|██▌ | 95374/371472 [7:35:14<23:37:47, 3.25it/s] 26%|██▌ | 95375/371472 [7:35:15<24:24:48, 3.14it/s] 26%|██▌ | 95376/371472 [7:35:15<22:36:54, 3.39it/s] 26%|██▌ | 95377/371472 [7:35:15<23:14:34, 3.30it/s] 26%|██▌ | 95378/371472 [7:35:16<23:29:22, 3.26it/s] 26%|██▌ | 95379/371472 [7:35:16<23:15:08, 3.30it/s] 26%|██▌ | 95380/371472 [7:35:16<22:21:55, 3.43it/s] {'loss': 3.658, 'learning_rate': 7.692742786959426e-07, 'epoch': 4.11} + 26%|██▌ | 95380/371472 [7:35:16<22:21:55, 3.43it/s] 26%|██▌ | 95381/371472 [7:35:16<23:10:20, 3.31it/s] 26%|██▌ | 95382/371472 [7:35:17<22:13:16, 3.45it/s] 26%|██▌ | 95383/371472 [7:35:17<21:25:13, 3.58it/s] 26%|██▌ | 95384/371472 [7:35:17<20:51:12, 3.68it/s] 26%|██▌ | 95385/371472 [7:35:18<21:16:25, 3.60it/s] 26%|██▌ | 95386/371472 [7:35:18<21:25:51, 3.58it/s] 26%|██▌ | 95387/371472 [7:35:18<20:37:16, 3.72it/s] 26%|██▌ | 95388/371472 [7:35:18<20:36:17, 3.72it/s] 26%|██▌ | 95389/371472 [7:35:19<20:24:37, 3.76it/s] 26%|██▌ | 95390/371472 [7:35:19<20:38:57, 3.71it/s] 26%|██▌ | 95391/371472 [7:35:19<20:43:59, 3.70it/s] 26%|██▌ | 95392/371472 [7:35:19<20:44:26, 3.70it/s] 26%|██▌ | 95393/371472 [7:35:20<20:32:11, 3.73it/s] 26%|██��� | 95394/371472 [7:35:20<20:04:17, 3.82it/s] 26%|██▌ | 95395/371472 [7:35:20<21:23:49, 3.58it/s] 26%|██▌ | 95396/371472 [7:35:21<23:11:59, 3.31it/s] 26%|██▌ | 95397/371472 [7:35:21<22:12:31, 3.45it/s] 26%|██▌ | 95398/371472 [7:35:21<21:44:18, 3.53it/s] 26%|██▌ | 95399/371472 [7:35:22<24:55:18, 3.08it/s] 26%|██▌ | 95400/371472 [7:35:22<23:32:01, 3.26it/s] {'loss': 3.6373, 'learning_rate': 7.692257967204637e-07, 'epoch': 4.11} + 26%|██▌ | 95400/371472 [7:35:22<23:32:01, 3.26it/s] 26%|██▌ | 95401/371472 [7:35:22<23:07:23, 3.32it/s] 26%|██▌ | 95402/371472 [7:35:22<24:06:01, 3.18it/s] 26%|██▌ | 95403/371472 [7:35:23<23:22:13, 3.28it/s] 26%|██▌ | 95404/371472 [7:35:23<22:10:43, 3.46it/s] 26%|██▌ | 95405/371472 [7:35:23<23:03:05, 3.33it/s] 26%|██▌ | 95406/371472 [7:35:24<22:14:42, 3.45it/s] 26%|██▌ | 95407/371472 [7:35:24<22:09:29, 3.46it/s] 26%|██▌ | 95408/371472 [7:35:24<21:56:10, 3.50it/s] 26%|██▌ | 95409/371472 [7:35:24<22:01:23, 3.48it/s] 26%|██▌ | 95410/371472 [7:35:25<21:31:34, 3.56it/s] 26%|██▌ | 95411/371472 [7:35:25<20:58:34, 3.66it/s] 26%|██▌ | 95412/371472 [7:35:25<21:53:46, 3.50it/s] 26%|██▌ | 95413/371472 [7:35:26<21:51:30, 3.51it/s] 26%|██▌ | 95414/371472 [7:35:26<22:27:29, 3.41it/s] 26%|██▌ | 95415/371472 [7:35:26<21:41:21, 3.54it/s] 26%|██▌ | 95416/371472 [7:35:26<22:04:05, 3.47it/s] 26%|██▌ | 95417/371472 [7:35:27<24:21:36, 3.15it/s] 26%|██▌ | 95418/371472 [7:35:27<26:54:55, 2.85it/s] 26%|██▌ | 95419/371472 [7:35:27<24:36:19, 3.12it/s] 26%|██▌ | 95420/371472 [7:35:28<23:21:02, 3.28it/s] {'loss': 3.6582, 'learning_rate': 7.691773147449848e-07, 'epoch': 4.11} + 26%|██▌ | 95420/371472 [7:35:28<23:21:02, 3.28it/s] 26%|██▌ | 95421/371472 [7:35:28<22:25:57, 3.42it/s] 26%|██▌ | 95422/371472 [7:35:28<23:04:35, 3.32it/s] 26%|██▌ | 95423/371472 [7:35:29<22:46:47, 3.37it/s] 26%|██▌ | 95424/371472 [7:35:29<21:43:21, 3.53it/s] 26%|██▌ | 95425/371472 [7:35:29<21:29:34, 3.57it/s] 26%|██▌ | 95426/371472 [7:35:29<21:13:36, 3.61it/s] 26%|██▌ | 95427/371472 [7:35:30<22:26:10, 3.42it/s] 26%|██▌ | 95428/371472 [7:35:30<21:56:21, 3.50it/s] 26%|██▌ | 95429/371472 [7:35:30<22:10:29, 3.46it/s] 26%|██▌ | 95430/371472 [7:35:31<22:07:10, 3.47it/s] 26%|██▌ | 95431/371472 [7:35:31<22:45:38, 3.37it/s] 26%|██▌ | 95432/371472 [7:35:31<23:00:49, 3.33it/s] 26%|██▌ | 95433/371472 [7:35:31<21:55:49, 3.50it/s] 26%|██▌ | 95434/371472 [7:35:32<21:11:49, 3.62it/s] 26%|██▌ | 95435/371472 [7:35:32<21:05:25, 3.64it/s] 26%|██▌ | 95436/371472 [7:35:32<20:55:10, 3.67it/s] 26%|██▌ | 95437/371472 [7:35:32<20:04:14, 3.82it/s] 26%|██▌ | 95438/371472 [7:35:33<19:41:19, 3.89it/s] 26%|██▌ | 95439/371472 [7:35:33<19:37:14, 3.91it/s] 26%|██▌ | 95440/371472 [7:35:33<20:05:59, 3.81it/s] {'loss': 3.6382, 'learning_rate': 7.691288327695058e-07, 'epoch': 4.11} + 26%|██▌ | 95440/371472 [7:35:33<20:05:59, 3.81it/s] 26%|██▌ | 95441/371472 [7:35:34<20:58:54, 3.65it/s] 26%|██▌ | 95442/371472 [7:35:34<20:52:03, 3.67it/s] 26%|██▌ | 95443/371472 [7:35:34<20:32:30, 3.73it/s] 26%|██▌ | 95444/371472 [7:35:34<20:56:57, 3.66it/s] 26%|██▌ | 95445/371472 [7:35:35<20:28:58, 3.74it/s] 26%|██▌ | 95446/371472 [7:35:35<21:03:00, 3.64it/s] 26%|██▌ | 95447/371472 [7:35:35<21:00:51, 3.65it/s] 26%|██▌ | 95448/371472 [7:35:35<20:07:52, 3.81it/s] 26%|██▌ | 95449/371472 [7:35:36<20:33:11, 3.73it/s] 26%|██▌ | 95450/371472 [7:35:36<20:59:44, 3.65it/s] 26%|██▌ | 95451/371472 [7:35:36<21:08:04, 3.63it/s] 26%|██▌ | 95452/371472 [7:35:37<21:08:17, 3.63it/s] 26%|██▌ | 95453/371472 [7:35:37<22:14:27, 3.45it/s] 26%|██▌ | 95454/371472 [7:35:37<23:19:16, 3.29it/s] 26%|██▌ | 95455/371472 [7:35:37<22:09:44, 3.46it/s] 26%|██▌ | 95456/371472 [7:35:38<21:24:01, 3.58it/s] 26%|██▌ | 95457/371472 [7:35:38<22:00:14, 3.48it/s] 26%|██▌ | 95458/371472 [7:35:38<21:45:50, 3.52it/s] 26%|██▌ | 95459/371472 [7:35:39<23:31:45, 3.26it/s] 26%|██▌ | 95460/371472 [7:35:39<23:31:46, 3.26it/s] {'loss': 3.4615, 'learning_rate': 7.69080350794027e-07, 'epoch': 4.11} + 26%|██▌ | 95460/371472 [7:35:39<23:31:46, 3.26it/s] 26%|██▌ | 95461/371472 [7:35:39<23:06:41, 3.32it/s] 26%|██▌ | 95462/371472 [7:35:40<22:42:30, 3.38it/s] 26%|██▌ | 95463/371472 [7:35:40<21:18:16, 3.60it/s] 26%|██▌ | 95464/371472 [7:35:40<21:43:28, 3.53it/s] 26%|██▌ | 95465/371472 [7:35:40<21:42:02, 3.53it/s] 26%|██▌ | 95466/371472 [7:35:41<22:09:19, 3.46it/s] 26%|██▌ | 95467/371472 [7:35:41<21:36:57, 3.55it/s] 26%|██▌ | 95468/371472 [7:35:41<21:04:49, 3.64it/s] 26%|██▌ | 95469/371472 [7:35:41<21:15:15, 3.61it/s] 26%|██▌ | 95470/371472 [7:35:42<21:08:27, 3.63it/s] 26%|██▌ | 95471/371472 [7:35:42<21:03:05, 3.64it/s] 26%|██▌ | 95472/371472 [7:35:42<22:47:15, 3.36it/s] 26%|██▌ | 95473/371472 [7:35:43<22:21:08, 3.43it/s] 26%|██▌ | 95474/371472 [7:35:43<21:52:29, 3.50it/s] 26%|██▌ | 95475/371472 [7:35:43<21:12:29, 3.61it/s] 26%|██▌ | 95476/371472 [7:35:43<21:08:54, 3.63it/s] 26%|██▌ | 95477/371472 [7:35:44<21:20:28, 3.59it/s] 26%|██▌ | 95478/371472 [7:35:44<21:26:59, 3.57it/s] 26%|██▌ | 95479/371472 [7:35:44<20:57:20, 3.66it/s] 26%|██▌ | 95480/371472 [7:35:45<20:38:11, 3.71it/s] {'loss': 3.7458, 'learning_rate': 7.690318688185481e-07, 'epoch': 4.11} + 26%|██▌ | 95480/371472 [7:35:45<20:38:11, 3.71it/s] 26%|██▌ | 95481/371472 [7:35:45<20:39:55, 3.71it/s] 26%|██▌ | 95482/371472 [7:35:45<20:35:26, 3.72it/s] 26%|██▌ | 95483/371472 [7:35:45<20:30:18, 3.74it/s] 26%|██▌ | 95484/371472 [7:35:46<20:45:07, 3.69it/s] 26%|██▌ | 95485/371472 [7:35:46<20:28:37, 3.74it/s] 26%|██▌ | 95486/371472 [7:35:46<21:27:34, 3.57it/s] 26%|██▌ | 95487/371472 [7:35:46<20:47:59, 3.69it/s] 26%|██▌ | 95488/371472 [7:35:47<20:32:09, 3.73it/s] 26%|██▌ | 95489/371472 [7:35:47<20:23:24, 3.76it/s] 26%|██▌ | 95490/371472 [7:35:47<20:12:56, 3.79it/s] 26%|██▌ | 95491/371472 [7:35:47<20:44:27, 3.70it/s] 26%|██▌ | 95492/371472 [7:35:48<21:10:15, 3.62it/s] 26%|██▌ | 95493/371472 [7:35:48<21:46:38, 3.52it/s] 26%|██▌ | 95494/371472 [7:35:48<22:10:38, 3.46it/s] 26%|██▌ | 95495/371472 [7:35:49<21:53:46, 3.50it/s] 26%|██▌ | 95496/371472 [7:35:49<21:33:33, 3.56it/s] 26%|██▌ | 95497/371472 [7:35:49<22:40:45, 3.38it/s] 26%|██▌ | 95498/371472 [7:35:50<21:51:10, 3.51it/s] 26%|██▌ | 95499/371472 [7:35:50<21:54:30, 3.50it/s] 26%|██▌ | 95500/371472 [7:35:50<21:14:08, 3.61it/s] {'loss': 3.6824, 'learning_rate': 7.689833868430692e-07, 'epoch': 4.11} + 26%|██▌ | 95500/371472 [7:35:50<21:14:08, 3.61it/s] 26%|██▌ | 95501/371472 [7:35:50<20:41:03, 3.71it/s] 26%|██▌ | 95502/371472 [7:35:51<21:38:48, 3.54it/s] 26%|██▌ | 95503/371472 [7:35:51<21:39:17, 3.54it/s] 26%|██▌ | 95504/371472 [7:35:51<21:32:07, 3.56it/s] 26%|██▌ | 95505/371472 [7:35:52<24:19:15, 3.15it/s] 26%|██▌ | 95506/371472 [7:35:52<22:37:26, 3.39it/s] 26%|██▌ | 95507/371472 [7:35:52<21:25:05, 3.58it/s] 26%|██▌ | 95508/371472 [7:35:52<20:38:56, 3.71it/s] 26%|██▌ | 95509/371472 [7:35:53<20:36:02, 3.72it/s] 26%|██▌ | 95510/371472 [7:35:53<20:08:09, 3.81it/s] 26%|██▌ | 95511/371472 [7:35:53<20:34:25, 3.73it/s] 26%|██▌ | 95512/371472 [7:35:53<20:17:24, 3.78it/s] 26%|██▌ | 95513/371472 [7:35:54<21:46:39, 3.52it/s] 26%|██▌ | 95514/371472 [7:35:54<21:00:10, 3.65it/s] 26%|██▌ | 95515/371472 [7:35:54<20:23:34, 3.76it/s] 26%|██▌ | 95516/371472 [7:35:54<20:32:30, 3.73it/s] 26%|██▌ | 95517/371472 [7:35:55<20:22:32, 3.76it/s] 26%|██▌ | 95518/371472 [7:35:55<21:24:43, 3.58it/s] 26%|██▌ | 95519/371472 [7:35:55<21:25:46, 3.58it/s] 26%|██▌ | 95520/371472 [7:35:56<22:22:33, 3.43it/s] {'loss': 3.6806, 'learning_rate': 7.689349048675903e-07, 'epoch': 4.11} + 26%|██▌ | 95520/371472 [7:35:56<22:22:33, 3.43it/s] 26%|██▌ | 95521/371472 [7:35:56<22:25:48, 3.42it/s] 26%|██▌ | 95522/371472 [7:35:56<22:09:45, 3.46it/s] 26%|██▌ | 95523/371472 [7:35:57<22:45:40, 3.37it/s] 26%|██▌ | 95524/371472 [7:35:57<22:12:36, 3.45it/s] 26%|██▌ | 95525/371472 [7:35:57<21:57:57, 3.49it/s] 26%|██▌ | 95526/371472 [7:35:57<22:02:06, 3.48it/s] 26%|██▌ | 95527/371472 [7:35:58<21:33:12, 3.56it/s] 26%|█���▌ | 95528/371472 [7:35:58<23:29:22, 3.26it/s] 26%|██▌ | 95529/371472 [7:35:58<23:37:37, 3.24it/s] 26%|██▌ | 95530/371472 [7:35:59<22:33:09, 3.40it/s] 26%|██▌ | 95531/371472 [7:35:59<22:02:11, 3.48it/s] 26%|██▌ | 95532/371472 [7:35:59<21:19:35, 3.59it/s] 26%|██▌ | 95533/371472 [7:35:59<22:44:00, 3.37it/s] 26%|██▌ | 95534/371472 [7:36:00<21:27:33, 3.57it/s] 26%|██▌ | 95535/371472 [7:36:00<20:18:30, 3.77it/s] 26%|██▌ | 95536/371472 [7:36:00<20:26:25, 3.75it/s] 26%|██▌ | 95537/371472 [7:36:00<20:01:48, 3.83it/s] 26%|██▌ | 95538/371472 [7:36:01<19:57:34, 3.84it/s] 26%|██▌ | 95539/371472 [7:36:01<21:33:45, 3.55it/s] 26%|██▌ | 95540/371472 [7:36:01<21:23:40, 3.58it/s] {'loss': 3.6325, 'learning_rate': 7.688864228921114e-07, 'epoch': 4.12} + 26%|██▌ | 95540/371472 [7:36:01<21:23:40, 3.58it/s] 26%|██▌ | 95541/371472 [7:36:02<21:48:47, 3.51it/s] 26%|██▌ | 95542/371472 [7:36:02<22:00:32, 3.48it/s] 26%|██▌ | 95543/371472 [7:36:02<20:59:04, 3.65it/s] 26%|██▌ | 95544/371472 [7:36:02<21:13:50, 3.61it/s] 26%|██▌ | 95545/371472 [7:36:03<21:01:00, 3.65it/s] 26%|██▌ | 95546/371472 [7:36:03<21:10:41, 3.62it/s] 26%|██▌ | 95547/371472 [7:36:03<21:39:38, 3.54it/s] 26%|██▌ | 95548/371472 [7:36:04<21:10:32, 3.62it/s] 26%|██▌ | 95549/371472 [7:36:04<20:28:33, 3.74it/s] 26%|██▌ | 95550/371472 [7:36:04<19:53:22, 3.85it/s] 26%|██▌ | 95551/371472 [7:36:04<19:28:42, 3.93it/s] 26%|██▌ | 95552/371472 [7:36:05<22:39:23, 3.38it/s] 26%|██▌ | 95553/371472 [7:36:05<22:32:24, 3.40it/s] 26%|██▌ | 95554/371472 [7:36:05<21:23:56, 3.58it/s] 26%|██▌ | 95555/371472 [7:36:05<20:55:45, 3.66it/s] 26%|██▌ | 95556/371472 [7:36:06<20:17:55, 3.78it/s] 26%|██▌ | 95557/371472 [7:36:06<19:33:43, 3.92it/s] 26%|██▌ | 95558/371472 [7:36:06<20:09:50, 3.80it/s] 26%|██▌ | 95559/371472 [7:36:07<23:12:32, 3.30it/s] 26%|██▌ | 95560/371472 [7:36:07<22:28:11, 3.41it/s] {'loss': 3.7223, 'learning_rate': 7.688379409166325e-07, 'epoch': 4.12} + 26%|██▌ | 95560/371472 [7:36:07<22:28:11, 3.41it/s] 26%|██▌ | 95561/371472 [7:36:07<22:41:48, 3.38it/s] 26%|██▌ | 95562/371472 [7:36:07<22:02:09, 3.48it/s] 26%|██▌ | 95563/371472 [7:36:08<20:49:29, 3.68it/s] 26%|██▌ | 95564/371472 [7:36:08<20:08:54, 3.80it/s] 26%|██▌ | 95565/371472 [7:36:08<20:56:08, 3.66it/s] 26%|██▌ | 95566/371472 [7:36:08<20:19:14, 3.77it/s] 26%|██▌ | 95567/371472 [7:36:09<19:33:56, 3.92it/s] 26%|██▌ | 95568/371472 [7:36:09<20:36:28, 3.72it/s] 26%|██▌ | 95569/371472 [7:36:09<20:48:22, 3.68it/s] 26%|██▌ | 95570/371472 [7:36:10<20:08:00, 3.81it/s] 26%|██▌ | 95571/371472 [7:36:10<20:07:06, 3.81it/s] 26%|██▌ | 95572/371472 [7:36:10<19:35:57, 3.91it/s] 26%|██▌ | 95573/371472 [7:36:10<19:14:33, 3.98it/s] 26%|██▌ | 95574/371472 [7:36:11<21:05:56, 3.63it/s] 26%|██▌ | 95575/371472 [7:36:11<21:34:18, 3.55it/s] 26%|██▌ | 95576/371472 [7:36:11<21:21:24, 3.59it/s] 26%|██▌ | 95577/371472 [7:36:11<21:00:38, 3.65it/s] 26%|██▌ | 95578/371472 [7:36:12<21:15:47, 3.60it/s] 26%|██▌ | 95579/371472 [7:36:12<22:09:50, 3.46it/s] 26%|██▌ | 95580/371472 [7:36:12<21:32:43, 3.56it/s] {'loss': 3.6405, 'learning_rate': 7.687894589411536e-07, 'epoch': 4.12} + 26%|██▌ | 95580/371472 [7:36:12<21:32:43, 3.56it/s] 26%|██▌ | 95581/371472 [7:36:13<21:49:17, 3.51it/s] 26%|██▌ | 95582/371472 [7:36:13<23:30:50, 3.26it/s] 26%|██▌ | 95583/371472 [7:36:13<22:30:41, 3.40it/s] 26%|██▌ | 95584/371472 [7:36:13<21:16:20, 3.60it/s] 26%|██▌ | 95585/371472 [7:36:14<21:54:27, 3.50it/s] 26%|██▌ | 95586/371472 [7:36:14<22:53:08, 3.35it/s] 26%|██▌ | 95587/371472 [7:36:14<25:22:28, 3.02it/s] 26%|██▌ | 95588/371472 [7:36:15<24:55:22, 3.07it/s] 26%|██▌ | 95589/371472 [7:36:15<23:11:08, 3.31it/s] 26%|██▌ | 95590/371472 [7:36:15<22:38:47, 3.38it/s] 26%|██▌ | 95591/371472 [7:36:16<25:41:46, 2.98it/s] 26%|██▌ | 95592/371472 [7:36:16<24:51:29, 3.08it/s] 26%|██▌ | 95593/371472 [7:36:16<23:10:42, 3.31it/s] 26%|██▌ | 95594/371472 [7:36:17<22:55:53, 3.34it/s] 26%|██▌ | 95595/371472 [7:36:17<22:23:39, 3.42it/s] 26%|██▌ | 95596/371472 [7:36:17<21:51:54, 3.50it/s] 26%|██▌ | 95597/371472 [7:36:17<21:43:06, 3.53it/s] 26%|██▌ | 95598/371472 [7:36:18<22:06:11, 3.47it/s] 26%|██▌ | 95599/371472 [7:36:18<21:57:14, 3.49it/s] 26%|██▌ | 95600/371472 [7:36:18<22:30:52, 3.40it/s] {'loss': 3.4842, 'learning_rate': 7.687409769656747e-07, 'epoch': 4.12} + 26%|██▌ | 95600/371472 [7:36:18<22:30:52, 3.40it/s] 26%|██▌ | 95601/371472 [7:36:19<21:06:08, 3.63it/s] 26%|██▌ | 95602/371472 [7:36:19<20:02:16, 3.82it/s] 26%|██▌ | 95603/371472 [7:36:19<20:42:49, 3.70it/s] 26%|██▌ | 95604/371472 [7:36:19<20:22:40, 3.76it/s] 26%|██▌ | 95605/371472 [7:36:20<20:34:15, 3.73it/s] 26%|██▌ | 95606/371472 [7:36:20<22:40:13, 3.38it/s] 26%|██▌ | 95607/371472 [7:36:20<22:57:13, 3.34it/s] 26%|██▌ | 95608/371472 [7:36:21<22:08:40, 3.46it/s] 26%|██▌ | 95609/371472 [7:36:21<21:21:55, 3.59it/s] 26%|██▌ | 95610/371472 [7:36:21<20:45:47, 3.69it/s] 26%|██▌ | 95611/371472 [7:36:21<20:48:35, 3.68it/s] 26%|██▌ | 95612/371472 [7:36:22<21:02:54, 3.64it/s] 26%|██▌ | 95613/371472 [7:36:22<20:54:52, 3.66it/s] 26%|██▌ | 95614/371472 [7:36:22<21:18:24, 3.60it/s] 26%|██▌ | 95615/371472 [7:36:22<20:48:44, 3.68it/s] 26%|██▌ | 95616/371472 [7:36:23<20:40:45, 3.71it/s] 26%|██▌ | 95617/371472 [7:36:23<20:15:58, 3.78it/s] 26%|██▌ | 95618/371472 [7:36:23<20:32:29, 3.73it/s] 26%|██▌ | 95619/371472 [7:36:24<21:55:56, 3.49it/s] 26%|██▌ | 95620/371472 [7:36:24<21:27:39, 3.57it/s] {'loss': 3.6272, 'learning_rate': 7.686924949901958e-07, 'epoch': 4.12} + 26%|██▌ | 95620/371472 [7:36:24<21:27:39, 3.57it/s] 26%|██▌ | 95621/371472 [7:36:24<21:35:34, 3.55it/s] 26%|██▌ | 95622/371472 [7:36:24<21:01:29, 3.64it/s] 26%|██▌ | 95623/371472 [7:36:25<20:31:27, 3.73it/s] 26%|██▌ | 95624/371472 [7:36:25<20:05:43, 3.81it/s] 26%|██▌ | 95625/371472 [7:36:25<19:56:51, 3.84it/s] 26%|██▌ | 95626/371472 [7:36:25<20:01:03, 3.83it/s] 26%|██▌ | 95627/371472 [7:36:26<19:35:31, 3.91it/s] 26%|██▌ | 95628/371472 [7:36:26<19:34:23, 3.91it/s] 26%|██▌ | 95629/371472 [7:36:26<20:45:57, 3.69it/s] 26%|██▌ | 95630/371472 [7:36:27<22:26:37, 3.41it/s] 26%|██▌ | 95631/371472 [7:36:27<21:33:56, 3.55it/s] 26%|██▌ | 95632/371472 [7:36:27<21:42:38, 3.53it/s] 26%|██▌ | 95633/371472 [7:36:27<20:55:08, 3.66it/s] 26%|██▌ | 95634/371472 [7:36:28<19:59:40, 3.83it/s] 26%|██▌ | 95635/371472 [7:36:28<19:56:52, 3.84it/s] 26%|██▌ | 95636/371472 [7:36:28<19:46:56, 3.87it/s] 26%|██▌ | 95637/371472 [7:36:28<22:27:46, 3.41it/s] 26%|██▌ | 95638/371472 [7:36:29<21:30:23, 3.56it/s] 26%|██▌ | 95639/371472 [7:36:29<22:03:24, 3.47it/s] 26%|██▌ | 95640/371472 [7:36:29<23:44:43, 3.23it/s] {'loss': 3.4975, 'learning_rate': 7.68644013014717e-07, 'epoch': 4.12} + 26%|██▌ | 95640/371472 [7:36:29<23:44:43, 3.23it/s] 26%|██▌ | 95641/371472 [7:36:30<23:03:25, 3.32it/s] 26%|██▌ | 95642/371472 [7:36:30<22:41:10, 3.38it/s] 26%|██▌ | 95643/371472 [7:36:30<23:02:32, 3.33it/s] 26%|██▌ | 95644/371472 [7:36:31<24:45:44, 3.09it/s] 26%|██▌ | 95645/371472 [7:36:31<23:16:35, 3.29it/s] 26%|██▌ | 95646/371472 [7:36:31<22:20:05, 3.43it/s] 26%|██▌ | 95647/371472 [7:36:31<23:48:28, 3.22it/s] 26%|██▌ | 95648/371472 [7:36:32<23:35:10, 3.25it/s] 26%|██▌ | 95649/371472 [7:36:32<22:19:16, 3.43it/s] 26%|██▌ | 95650/371472 [7:36:32<21:40:48, 3.53it/s] 26%|██▌ | 95651/371472 [7:36:33<21:43:42, 3.53it/s] 26%|██▌ | 95652/371472 [7:36:33<21:06:30, 3.63it/s] 26%|██▌ | 95653/371472 [7:36:33<21:56:07, 3.49it/s] 26%|██▌ | 95654/371472 [7:36:33<22:03:21, 3.47it/s] 26%|██▌ | 95655/371472 [7:36:34<21:26:11, 3.57it/s] 26%|██▌ | 95656/371472 [7:36:34<20:44:51, 3.69it/s] 26%|██▌ | 95657/371472 [7:36:34<21:17:07, 3.60it/s] 26%|██▌ | 95658/371472 [7:36:34<20:22:54, 3.76it/s] 26%|██▌ | 95659/371472 [7:36:35<21:05:38, 3.63it/s] 26%|██▌ | 95660/371472 [7:36:35<22:20:39, 3.43it/s] {'loss': 3.5477, 'learning_rate': 7.685955310392381e-07, 'epoch': 4.12} + 26%|██▌ | 95660/371472 [7:36:35<22:20:39, 3.43it/s] 26%|██▌ | 95661/371472 [7:36:35<22:23:34, 3.42it/s] 26%|���█▌ | 95662/371472 [7:36:36<21:17:37, 3.60it/s] 26%|██▌ | 95663/371472 [7:36:36<20:29:27, 3.74it/s] 26%|██▌ | 95664/371472 [7:36:36<20:52:08, 3.67it/s] 26%|██▌ | 95665/371472 [7:36:36<20:42:32, 3.70it/s] 26%|██▌ | 95666/371472 [7:36:37<20:35:56, 3.72it/s] 26%|██▌ | 95667/371472 [7:36:37<20:10:05, 3.80it/s] 26%|██▌ | 95668/371472 [7:36:37<21:31:39, 3.56it/s] 26%|██▌ | 95669/371472 [7:36:38<21:44:14, 3.52it/s] 26%|██▌ | 95670/371472 [7:36:38<22:01:50, 3.48it/s] 26%|██▌ | 95671/371472 [7:36:38<21:38:44, 3.54it/s] 26%|██▌ | 95672/371472 [7:36:38<21:25:29, 3.58it/s] 26%|██▌ | 95673/371472 [7:36:39<20:42:07, 3.70it/s] 26%|██▌ | 95674/371472 [7:36:39<20:32:23, 3.73it/s] 26%|██▌ | 95675/371472 [7:36:39<21:22:58, 3.58it/s] 26%|██▌ | 95676/371472 [7:36:40<23:09:37, 3.31it/s] 26%|██▌ | 95677/371472 [7:36:40<22:15:35, 3.44it/s] 26%|██▌ | 95678/371472 [7:36:40<23:21:10, 3.28it/s] 26%|██▌ | 95679/371472 [7:36:40<22:08:00, 3.46it/s] 26%|██▌ | 95680/371472 [7:36:41<21:14:10, 3.61it/s] {'loss': 3.7781, 'learning_rate': 7.685470490637592e-07, 'epoch': 4.12} + 26%|██▌ | 95680/371472 [7:36:41<21:14:10, 3.61it/s] 26%|██▌ | 95681/371472 [7:36:41<20:55:30, 3.66it/s] 26%|██▌ | 95682/371472 [7:36:41<22:41:34, 3.38it/s] 26%|██▌ | 95683/371472 [7:36:42<22:05:45, 3.47it/s] 26%|██▌ | 95684/371472 [7:36:42<24:52:18, 3.08it/s] 26%|██▌ | 95685/371472 [7:36:42<24:40:50, 3.10it/s] 26%|██▌ | 95686/371472 [7:36:43<23:03:26, 3.32it/s] 26%|██▌ | 95687/371472 [7:36:43<22:01:32, 3.48it/s] 26%|██▌ | 95688/371472 [7:36:43<21:53:39, 3.50it/s] 26%|██▌ | 95689/371472 [7:36:43<21:11:31, 3.61it/s] 26%|██▌ | 95690/371472 [7:36:44<20:35:20, 3.72it/s] 26%|██▌ | 95691/371472 [7:36:44<20:04:00, 3.82it/s] 26%|██▌ | 95692/371472 [7:36:44<20:03:33, 3.82it/s] 26%|██▌ | 95693/371472 [7:36:44<20:28:32, 3.74it/s] 26%|██▌ | 95694/371472 [7:36:45<20:33:27, 3.73it/s] 26%|██▌ | 95695/371472 [7:36:45<20:20:40, 3.77it/s] 26%|██▌ | 95696/371472 [7:36:45<20:42:17, 3.70it/s] 26%|██▌ | 95697/371472 [7:36:45<21:27:23, 3.57it/s] 26%|██▌ | 95698/371472 [7:36:46<21:30:51, 3.56it/s] 26%|██▌ | 95699/371472 [7:36:46<21:03:03, 3.64it/s] 26%|██▌ | 95700/371472 [7:36:46<20:31:31, 3.73it/s] {'loss': 3.6428, 'learning_rate': 7.684985670882802e-07, 'epoch': 4.12} + 26%|██▌ | 95700/371472 [7:36:46<20:31:31, 3.73it/s] 26%|██▌ | 95701/371472 [7:36:47<19:54:08, 3.85it/s] 26%|██▌ | 95702/371472 [7:36:47<20:16:56, 3.78it/s] 26%|██▌ | 95703/371472 [7:36:47<22:02:55, 3.47it/s] 26%|██▌ | 95704/371472 [7:36:47<21:55:19, 3.49it/s] 26%|██▌ | 95705/371472 [7:36:48<20:45:51, 3.69it/s] 26%|██▌ | 95706/371472 [7:36:48<22:27:18, 3.41it/s] 26%|██▌ | 95707/371472 [7:36:48<22:33:47, 3.39it/s] 26%|██▌ | 95708/371472 [7:36:49<22:00:27, 3.48it/s] 26%|██▌ | 95709/371472 [7:36:49<21:10:21, 3.62it/s] 26%|██▌ | 95710/371472 [7:36:49<21:32:14, 3.56it/s] 26%|██▌ | 95711/371472 [7:36:49<21:31:00, 3.56it/s] 26%|██▌ | 95712/371472 [7:36:50<22:39:25, 3.38it/s] 26%|██▌ | 95713/371472 [7:36:50<22:08:53, 3.46it/s] 26%|██▌ | 95714/371472 [7:36:50<21:31:57, 3.56it/s] 26%|██▌ | 95715/371472 [7:36:51<20:59:22, 3.65it/s] 26%|██▌ | 95716/371472 [7:36:51<20:49:25, 3.68it/s] 26%|██▌ | 95717/371472 [7:36:51<20:27:37, 3.74it/s] 26%|██▌ | 95718/371472 [7:36:51<21:30:32, 3.56it/s] 26%|██▌ | 95719/371472 [7:36:52<21:20:35, 3.59it/s] 26%|██▌ | 95720/371472 [7:36:52<20:56:35, 3.66it/s] {'loss': 3.6195, 'learning_rate': 7.684500851128014e-07, 'epoch': 4.12} + 26%|██▌ | 95720/371472 [7:36:52<20:56:35, 3.66it/s] 26%|██▌ | 95721/371472 [7:36:52<20:37:27, 3.71it/s] 26%|██▌ | 95722/371472 [7:36:52<20:21:02, 3.76it/s] 26%|██▌ | 95723/371472 [7:36:53<21:15:47, 3.60it/s] 26%|██▌ | 95724/371472 [7:36:53<20:42:43, 3.70it/s] 26%|██▌ | 95725/371472 [7:36:53<21:13:46, 3.61it/s] 26%|██▌ | 95726/371472 [7:36:54<20:46:26, 3.69it/s] 26%|██▌ | 95727/371472 [7:36:54<20:39:40, 3.71it/s] 26%|██▌ | 95728/371472 [7:36:54<20:10:38, 3.80it/s] 26%|██▌ | 95729/371472 [7:36:54<20:02:41, 3.82it/s] 26%|██▌ | 95730/371472 [7:36:55<20:24:41, 3.75it/s] 26%|██▌ | 95731/371472 [7:36:55<21:52:52, 3.50it/s] 26%|██▌ | 95732/371472 [7:36:55<22:39:16, 3.38it/s] 26%|██▌ | 95733/371472 [7:36:55<22:07:21, 3.46it/s] 26%|██▌ | 95734/371472 [7:36:56<23:22:15, 3.28it/s] 26%|██▌ | 95735/371472 [7:36:56<23:37:56, 3.24it/s] 26%|██▌ | 95736/371472 [7:36:56<22:45:18, 3.37it/s] 26%|██▌ | 95737/371472 [7:36:57<21:20:08, 3.59it/s] 26%|██▌ | 95738/371472 [7:36:57<21:28:29, 3.57it/s] 26%|██▌ | 95739/371472 [7:36:57<22:14:42, 3.44it/s] 26%|██▌ | 95740/371472 [7:36:58<21:42:12, 3.53it/s] {'loss': 3.5773, 'learning_rate': 7.684016031373225e-07, 'epoch': 4.12} + 26%|██▌ | 95740/371472 [7:36:58<21:42:12, 3.53it/s] 26%|██▌ | 95741/371472 [7:36:58<20:58:05, 3.65it/s] 26%|██▌ | 95742/371472 [7:36:58<20:58:05, 3.65it/s] 26%|██▌ | 95743/371472 [7:36:58<21:07:14, 3.63it/s] 26%|██▌ | 95744/371472 [7:36:59<23:19:08, 3.28it/s] 26%|██▌ | 95745/371472 [7:36:59<22:06:19, 3.46it/s] 26%|██▌ | 95746/371472 [7:36:59<21:46:43, 3.52it/s] 26%|██▌ | 95747/371472 [7:37:00<22:30:38, 3.40it/s] 26%|██▌ | 95748/371472 [7:37:00<22:39:57, 3.38it/s] 26%|██▌ | 95749/371472 [7:37:00<23:17:11, 3.29it/s] 26%|██▌ | 95750/371472 [7:37:00<22:43:52, 3.37it/s] 26%|██▌ | 95751/371472 [7:37:01<21:36:07, 3.55it/s] 26%|██▌ | 95752/371472 [7:37:01<21:25:17, 3.58it/s] 26%|██▌ | 95753/371472 [7:37:01<21:11:41, 3.61it/s] 26%|██▌ | 95754/371472 [7:37:02<22:32:38, 3.40it/s] 26%|██▌ | 95755/371472 [7:37:02<21:35:58, 3.55it/s] 26%|██▌ | 95756/371472 [7:37:02<20:54:55, 3.66it/s] 26%|██▌ | 95757/371472 [7:37:03<26:49:46, 2.85it/s] 26%|██▌ | 95758/371472 [7:37:03<25:19:23, 3.02it/s] 26%|██▌ | 95759/371472 [7:37:03<24:30:05, 3.13it/s] 26%|██▌ | 95760/371472 [7:37:03<22:42:46, 3.37it/s] {'loss': 3.7106, 'learning_rate': 7.683531211618436e-07, 'epoch': 4.12} + 26%|██▌ | 95760/371472 [7:37:03<22:42:46, 3.37it/s] 26%|██▌ | 95761/371472 [7:37:04<21:56:40, 3.49it/s] 26%|██▌ | 95762/371472 [7:37:04<23:29:27, 3.26it/s] 26%|██▌ | 95763/371472 [7:37:04<22:13:04, 3.45it/s] 26%|██▌ | 95764/371472 [7:37:05<23:31:35, 3.26it/s] 26%|██▌ | 95765/371472 [7:37:05<22:13:02, 3.45it/s] 26%|██▌ | 95766/371472 [7:37:05<21:24:57, 3.58it/s] 26%|██▌ | 95767/371472 [7:37:05<21:08:02, 3.62it/s] 26%|██▌ | 95768/371472 [7:37:06<21:56:15, 3.49it/s] 26%|██▌ | 95769/371472 [7:37:06<21:40:04, 3.53it/s] 26%|██▌ | 95770/371472 [7:37:06<23:04:09, 3.32it/s] 26%|██▌ | 95771/371472 [7:37:07<22:17:23, 3.44it/s] 26%|██▌ | 95772/371472 [7:37:07<21:24:36, 3.58it/s] 26%|██▌ | 95773/371472 [7:37:07<22:01:18, 3.48it/s] 26%|██▌ | 95774/371472 [7:37:07<22:10:48, 3.45it/s] 26%|██▌ | 95775/371472 [7:37:08<21:30:19, 3.56it/s] 26%|██▌ | 95776/371472 [7:37:08<22:27:41, 3.41it/s] 26%|██▌ | 95777/371472 [7:37:08<22:43:21, 3.37it/s] 26%|██▌ | 95778/371472 [7:37:09<23:04:58, 3.32it/s] 26%|██▌ | 95779/371472 [7:37:09<22:55:35, 3.34it/s] 26%|██▌ | 95780/371472 [7:37:09<21:21:50, 3.58it/s] {'loss': 3.5037, 'learning_rate': 7.683046391863647e-07, 'epoch': 4.13} + 26%|██▌ | 95780/371472 [7:37:09<21:21:50, 3.58it/s] 26%|██▌ | 95781/371472 [7:37:09<20:41:59, 3.70it/s] 26%|██▌ | 95782/371472 [7:37:10<20:19:53, 3.77it/s] 26%|██▌ | 95783/371472 [7:37:10<20:44:19, 3.69it/s] 26%|██▌ | 95784/371472 [7:37:10<20:39:02, 3.71it/s] 26%|██▌ | 95785/371472 [7:37:10<20:17:26, 3.77it/s] 26%|██▌ | 95786/371472 [7:37:11<21:03:09, 3.64it/s] 26%|██▌ | 95787/371472 [7:37:11<20:26:23, 3.75it/s] 26%|██▌ | 95788/371472 [7:37:11<20:09:09, 3.80it/s] 26%|██▌ | 95789/371472 [7:37:12<20:17:41, 3.77it/s] 26%|██▌ | 95790/371472 [7:37:12<20:03:27, 3.82it/s] 26%|██▌ | 95791/371472 [7:37:12<20:53:57, 3.66it/s] 26%|██▌ | 95792/371472 [7:37:12<20:54:00, 3.66it/s] 26%|██▌ | 95793/371472 [7:37:13<22:42:39, 3.37it/s] 26%|██▌ | 95794/371472 [7:37:13<22:51:24, 3.35it/s] 26%|██▌ | 95795/371472 [7:37:13<22:02:01, 3.48it/s] 26%|██▌ | 95796/371472 [7:37:14<21:05:37, 3.63it/s] 26%|██▌ | 95797/371472 [7:37:14<21:25:25, 3.57it/s] 26%|██▌ | 95798/371472 [7:37:14<21:49:25, 3.51it/s] 26%|██▌ | 95799/371472 [7:37:14<23:03:12, 3.32it/s] 26%|██▌ | 95800/371472 [7:37:15<21:35:49, 3.55it/s] {'loss': 3.8348, 'learning_rate': 7.682561572108859e-07, 'epoch': 4.13} + 26%|██▌ | 95800/371472 [7:37:15<21:35:49, 3.55it/s] 26%|██▌ | 95801/371472 [7:37:15<23:44:18, 3.23it/s] 26%|██▌ | 95802/371472 [7:37:15<22:37:39, 3.38it/s] 26%|██▌ | 95803/371472 [7:37:16<22:06:58, 3.46it/s] 26%|██▌ | 95804/371472 [7:37:16<22:33:33, 3.39it/s] 26%|██▌ | 95805/371472 [7:37:16<21:27:53, 3.57it/s] 26%|██▌ | 95806/371472 [7:37:16<21:45:07, 3.52it/s] 26%|██▌ | 95807/371472 [7:37:17<21:53:11, 3.50it/s] 26%|██▌ | 95808/371472 [7:37:17<21:24:57, 3.58it/s] 26%|██▌ | 95809/371472 [7:37:17<22:28:16, 3.41it/s] 26%|██▌ | 95810/371472 [7:37:18<22:11:14, 3.45it/s] 26%|██▌ | 95811/371472 [7:37:18<21:38:17, 3.54it/s] 26%|██▌ | 95812/371472 [7:37:18<21:38:00, 3.54it/s] 26%|██▌ | 95813/371472 [7:37:18<20:57:49, 3.65it/s] 26%|██▌ | 95814/371472 [7:37:19<20:06:39, 3.81it/s] 26%|██▌ | 95815/371472 [7:37:19<20:26:11, 3.75it/s] 26%|██▌ | 95816/371472 [7:37:19<20:03:22, 3.82it/s] 26%|██▌ | 95817/371472 [7:37:19<19:54:22, 3.85it/s] 26%|██▌ | 95818/371472 [7:37:20<19:36:03, 3.91it/s] 26%|██▌ | 95819/371472 [7:37:20<21:52:35, 3.50it/s] 26%|██▌ | 95820/371472 [7:37:20<21:34:10, 3.55it/s] {'loss': 3.5621, 'learning_rate': 7.682076752354068e-07, 'epoch': 4.13} + 26%|██▌ | 95820/371472 [7:37:20<21:34:10, 3.55it/s] 26%|██▌ | 95821/371472 [7:37:21<21:29:43, 3.56it/s] 26%|██▌ | 95822/371472 [7:37:21<20:40:19, 3.70it/s] 26%|██▌ | 95823/371472 [7:37:21<20:06:41, 3.81it/s] 26%|██▌ | 95824/371472 [7:37:21<20:13:59, 3.78it/s] 26%|██▌ | 95825/371472 [7:37:22<20:34:02, 3.72it/s] 26%|██▌ | 95826/371472 [7:37:22<21:26:25, 3.57it/s] 26%|██▌ | 95827/371472 [7:37:22<21:49:28, 3.51it/s] 26%|██▌ | 95828/371472 [7:37:23<21:43:38, 3.52it/s] 26%|██▌ | 95829/371472 [7:37:23<22:52:15, 3.35it/s] 26%|██▌ | 95830/371472 [7:37:23<22:27:14, 3.41it/s] 26%|██▌ | 95831/371472 [7:37:23<21:58:44, 3.48it/s] 26%|██▌ | 95832/371472 [7:37:24<20:35:32, 3.72it/s] 26%|██▌ | 95833/371472 [7:37:24<25:44:08, 2.98it/s] 26%|██▌ | 95834/371472 [7:37:24<23:42:10, 3.23it/s] 26%|██▌ | 95835/371472 [7:37:25<22:45:52, 3.36it/s] 26%|██▌ | 95836/371472 [7:37:25<21:39:47, 3.53it/s] 26%|██▌ | 95837/371472 [7:37:25<21:49:57, 3.51it/s] 26%|██▌ | 95838/371472 [7:37:25<21:51:44, 3.50it/s] 26%|██▌ | 95839/371472 [7:37:26<22:08:53, 3.46it/s] 26%|██▌ | 95840/371472 [7:37:26<22:10:07, 3.45it/s] {'loss': 3.6296, 'learning_rate': 7.681591932599279e-07, 'epoch': 4.13} + 26%|██▌ | 95840/371472 [7:37:26<22:10:07, 3.45it/s] 26%|██▌ | 95841/371472 [7:37:26<23:04:48, 3.32it/s] 26%|██▌ | 95842/371472 [7:37:27<22:22:25, 3.42it/s] 26%|██▌ | 95843/371472 [7:37:27<23:33:10, 3.25it/s] 26%|██▌ | 95844/371472 [7:37:27<23:07:58, 3.31it/s] 26%|██▌ | 95845/371472 [7:37:28<22:29:51, 3.40it/s] 26%|██▌ | 95846/371472 [7:37:28<22:03:54, 3.47it/s] 26%|██▌ | 95847/371472 [7:37:28<22:19:16, 3.43it/s] 26%|██▌ | 95848/371472 [7:37:28<22:02:26, 3.47it/s] 26%|██▌ | 95849/371472 [7:37:29<20:50:35, 3.67it/s] 26%|██▌ | 95850/371472 [7:37:29<21:11:40, 3.61it/s] 26%|██▌ | 95851/371472 [7:37:29<22:47:03, 3.36it/s] 26%|██▌ | 95852/371472 [7:37:30<22:13:13, 3.45it/s] 26%|██▌ | 95853/371472 [7:37:30<21:39:45, 3.53it/s] 26%|██▌ | 95854/371472 [7:37:30<21:40:46, 3.53it/s] 26%|██▌ | 95855/371472 [7:37:30<22:10:33, 3.45it/s] 26%|██▌ | 95856/371472 [7:37:31<21:15:32, 3.60it/s] 26%|██▌ | 95857/371472 [7:37:31<21:01:55, 3.64it/s] 26%|██▌ | 95858/371472 [7:37:31<21:43:07, 3.53it/s] 26%|██▌ | 95859/371472 [7:37:32<22:43:07, 3.37it/s] 26%|██▌ | 95860/371472 [7:37:32<21:57:11, 3.49it/s] {'loss': 3.5148, 'learning_rate': 7.681107112844491e-07, 'epoch': 4.13} + 26%|██▌ | 95860/371472 [7:37:32<21:57:11, 3.49it/s] 26%|██▌ | 95861/371472 [7:37:32<21:37:30, 3.54it/s] 26%|██▌ | 95862/371472 [7:37:32<21:50:44, 3.50it/s] 26%|██▌ | 95863/371472 [7:37:33<21:28:18, 3.57it/s] 26%|██▌ | 95864/371472 [7:37:33<21:39:57, 3.53it/s] 26%|██▌ | 95865/371472 [7:37:33<20:48:08, 3.68it/s] 26%|██▌ | 95866/371472 [7:37:33<20:42:20, 3.70it/s] 26%|██▌ | 95867/371472 [7:37:34<21:07:35, 3.62it/s] 26%|██▌ | 95868/371472 [7:37:34<21:01:16, 3.64it/s] 26%|██▌ | 95869/371472 [7:37:34<20:25:02, 3.75it/s] 26%|██▌ | 95870/371472 [7:37:35<20:46:32, 3.68it/s] 26%|██▌ | 95871/371472 [7:37:35<20:38:29, 3.71it/s] 26%|██▌ | 95872/371472 [7:37:35<24:04:06, 3.18it/s] 26%|██▌ | 95873/371472 [7:37:36<23:27:04, 3.26it/s] 26%|██▌ | 95874/371472 [7:37:36<22:26:50, 3.41it/s] 26%|██▌ | 95875/371472 [7:37:36<22:03:41, 3.47it/s] 26%|██▌ | 95876/371472 [7:37:36<21:03:09, 3.64it/s] 26%|██▌ | 95877/371472 [7:37:37<21:01:38, 3.64it/s] 26%|██▌ | 95878/371472 [7:37:37<20:34:22, 3.72it/s] 26%|██▌ | 95879/371472 [7:37:37<20:55:10, 3.66it/s] 26%|██▌ | 95880/371472 [7:37:37<20:39:23, 3.71it/s] {'loss': 3.4836, 'learning_rate': 7.680622293089702e-07, 'epoch': 4.13} + 26%|██▌ | 95880/371472 [7:37:37<20:39:23, 3.71it/s] 26%|██▌ | 95881/371472 [7:37:38<19:42:03, 3.89it/s] 26%|██▌ | 95882/371472 [7:37:38<20:46:43, 3.68it/s] 26%|██▌ | 95883/371472 [7:37:38<20:58:01, 3.65it/s] 26%|██▌ | 95884/371472 [7:37:38<20:58:50, 3.65it/s] 26%|██▌ | 95885/371472 [7:37:39<20:31:35, 3.73it/s] 26%|██▌ | 95886/371472 [7:37:39<20:04:30, 3.81it/s] 26%|██▌ | 95887/371472 [7:37:39<19:46:40, 3.87it/s] 26%|██▌ | 95888/371472 [7:37:40<20:04:43, 3.81it/s] 26%|██▌ | 95889/371472 [7:37:40<21:51:39, 3.50it/s] 26%|██▌ | 95890/371472 [7:37:40<20:53:07, 3.67it/s] 26%|██▌ | 95891/371472 [7:37:40<20:35:28, 3.72it/s] 26%|██▌ | 95892/371472 [7:37:41<20:08:41, 3.80it/s] 26%|██▌ | 95893/371472 [7:37:41<19:59:26, 3.83it/s] 26%|██▌ | 95894/371472 [7:37:41<20:28:31, 3.74it/s] 26%|██▌ | 95895/371472 [7:37:41<21:03:21, 3.64it/s] 26%|██▌ | 95896/371472 [7:37:42<21:53:06, 3.50it/s] 26%|██▌ | 95897/371472 [7:37:42<21:37:14, 3.54it/s] 26%|██▌ | 95898/371472 [7:37:42<21:19:33, 3.59it/s] 26%|██▌ | 95899/371472 [7:37:43<20:42:34, 3.70it/s] 26%|██▌ | 95900/371472 [7:37:43<20:50:56, 3.67it/s] {'loss': 3.6669, 'learning_rate': 7.680137473334913e-07, 'epoch': 4.13} + 26%|██▌ | 95900/371472 [7:37:43<20:50:56, 3.67it/s] 26%|██▌ | 95901/371472 [7:37:43<20:44:35, 3.69it/s] 26%|██▌ | 95902/371472 [7:37:43<20:11:34, 3.79it/s] 26%|██▌ | 95903/371472 [7:37:44<20:50:40, 3.67it/s] 26%|██▌ | 95904/371472 [7:37:44<22:26:07, 3.41it/s] 26%|██▌ | 95905/371472 [7:37:44<21:58:27, 3.48it/s] 26%|██▌ | 95906/371472 [7:37:45<21:28:28, 3.56it/s] 26%|██▌ | 95907/371472 [7:37:45<22:56:03, 3.34it/s] 26%|██▌ | 95908/371472 [7:37:45<22:04:28, 3.47it/s] 26%|██▌ | 95909/371472 [7:37:45<21:56:52, 3.49it/s] 26%|██▌ | 95910/371472 [7:37:46<21:21:37, 3.58it/s] 26%|██▌ | 95911/371472 [7:37:46<20:56:38, 3.65it/s] 26%|██▌ | 95912/371472 [7:37:46<20:42:10, 3.70it/s] 26%|██▌ | 95913/371472 [7:37:46<20:59:29, 3.65it/s] 26%|██▌ | 95914/371472 [7:37:47<20:43:40, 3.69it/s] 26%|██▌ | 95915/371472 [7:37:47<21:31:05, 3.56it/s] 26%|██▌ | 95916/371472 [7:37:47<21:37:05, 3.54it/s] 26%|██▌ | 95917/371472 [7:37:48<21:17:12, 3.60it/s] 26%|██▌ | 95918/371472 [7:37:48<21:00:37, 3.64it/s] 26%|██▌ | 95919/371472 [7:37:48<23:49:13, 3.21it/s] 26%|██▌ | 95920/371472 [7:37:49<23:16:55, 3.29it/s] {'loss': 3.6571, 'learning_rate': 7.679652653580124e-07, 'epoch': 4.13} + 26%|██▌ | 95920/371472 [7:37:49<23:16:55, 3.29it/s] 26%|██▌ | 95921/371472 [7:37:49<23:47:24, 3.22it/s] 26%|██▌ | 95922/371472 [7:37:49<22:39:31, 3.38it/s] 26%|██▌ | 95923/371472 [7:37:49<22:14:51, 3.44it/s] 26%|██▌ | 95924/371472 [7:37:50<21:21:02, 3.58it/s] 26%|██▌ | 95925/371472 [7:37:50<21:20:38, 3.59it/s] 26%|██▌ | 95926/371472 [7:37:50<20:28:33, 3.74it/s] 26%|██▌ | 95927/371472 [7:37:50<21:02:46, 3.64it/s] 26%|██▌ | 95928/371472 [7:37:51<21:04:04, 3.63it/s] 26%|██▌ | 95929/371472 [7:37:51<20:27:18, 3.74it/s] 26%|██▌ | 95930/371472 [7:37:51<19:46:29, 3.87it/s] 26%|██▌ | 95931/371472 [7:37:52<22:11:14, 3.45it/s] 26%|██▌ | 95932/371472 [7:37:52<21:25:08, 3.57it/s] 26%|██▌ | 95933/371472 [7:37:52<21:11:50, 3.61it/s] 26%|██▌ | 95934/371472 [7:37:52<20:54:39, 3.66it/s] 26%|██▌ | 95935/371472 [7:37:53<21:18:09, 3.59it/s] 26%|██▌ | 95936/371472 [7:37:53<20:56:31, 3.65it/s] 26%|██▌ | 95937/371472 [7:37:53<20:45:32, 3.69it/s] 26%|██▌ | 95938/371472 [7:37:53<20:21:47, 3.76it/s] 26%|██▌ | 95939/371472 [7:37:54<21:59:34, 3.48it/s] 26%|██▌ | 95940/371472 [7:37:54<21:51:32, 3.50it/s] {'loss': 3.7406, 'learning_rate': 7.679167833825335e-07, 'epoch': 4.13} + 26%|██▌ | 95940/371472 [7:37:54<21:51:32, 3.50it/s] 26%|██▌ | 95941/371472 [7:37:54<23:20:05, 3.28it/s] 26%|██▌ | 95942/371472 [7:37:55<22:34:24, 3.39it/s] 26%|██▌ | 95943/371472 [7:37:55<21:34:53, 3.55it/s] 26%|██▌ | 95944/371472 [7:37:55<20:57:36, 3.65it/s] 26%|██▌ | 95945/371472 [7:37:55<20:16:03, 3.78it/s] 26%|██▌ | 95946/371472 [7:37:56<22:43:09, 3.37it/s] 26%|██▌ | 95947/371472 [7:37:56<23:05:19, 3.31it/s] 26%|██▌ | 95948/371472 [7:37:56<23:03:23, 3.32it/s] 26%|██▌ | 95949/371472 [7:37:57<24:15:00, 3.16it/s] 26%|██▌ | 95950/371472 [7:37:57<23:33:11, 3.25it/s] 26%|██▌ | 95951/371472 [7:37:57<22:55:51, 3.34it/s] 26%|██▌ | 95952/371472 [7:37:58<21:57:57, 3.48it/s] 26%|██▌ | 95953/371472 [7:37:58<21:47:39, 3.51it/s] 26%|██▌ | 95954/371472 [7:37:58<21:57:51, 3.48it/s] 26%|██▌ | 95955/371472 [7:37:58<20:42:46, 3.69it/s] 26%|██▌ | 95956/371472 [7:37:59<20:22:24, 3.76it/s] 26%|██▌ | 95957/371472 [7:37:59<20:55:39, 3.66it/s] 26%|██▌ | 95958/371472 [7:37:59<22:06:03, 3.46it/s] 26%|██▌ | 95959/371472 [7:38:00<21:43:08, 3.52it/s] 26%|██▌ | 95960/371472 [7:38:00<23:01:51, 3.32it/s] {'loss': 3.5661, 'learning_rate': 7.678683014070546e-07, 'epoch': 4.13} + 26%|██▌ | 95960/371472 [7:38:00<23:01:51, 3.32it/s] 26%|██▌ | 95961/371472 [7:38:00<21:59:45, 3.48it/s] 26%|██▌ | 95962/371472 [7:38:00<21:25:09, 3.57it/s] 26%|██▌ | 95963/371472 [7:38:01<22:36:50, 3.38it/s] 26%|██▌ | 95964/371472 [7:38:01<21:29:12, 3.56it/s] 26%|██▌ | 95965/371472 [7:38:01<22:37:59, 3.38it/s] 26%|██▌ | 95966/371472 [7:38:02<21:57:16, 3.49it/s] 26%|██▌ | 95967/371472 [7:38:02<22:48:10, 3.36it/s] 26%|██▌ | 95968/371472 [7:38:02<23:56:21, 3.20it/s] 26%|██▌ | 95969/371472 [7:38:02<22:19:02, 3.43it/s] 26%|██▌ | 95970/371472 [7:38:03<23:10:09, 3.30it/s] 26%|██▌ | 95971/371472 [7:38:03<22:38:03, 3.38it/s] 26%|██▌ | 95972/371472 [7:38:03<21:33:52, 3.55it/s] 26%|██▌ | 95973/371472 [7:38:04<21:48:21, 3.51it/s] 26%|██▌ | 95974/371472 [7:38:04<20:52:17, 3.67it/s] 26%|██▌ | 95975/371472 [7:38:04<21:28:13, 3.56it/s] 26%|██▌ | 95976/371472 [7:38:04<21:09:41, 3.62it/s] 26%|██▌ | 95977/371472 [7:38:05<20:28:59, 3.74it/s] 26%|██▌ | 95978/371472 [7:38:05<20:08:09, 3.80it/s] 26%|██▌ | 95979/371472 [7:38:05<22:57:54, 3.33it/s] 26%|██▌ | 95980/371472 [7:38:06<21:59:24, 3.48it/s] {'loss': 3.6479, 'learning_rate': 7.678198194315757e-07, 'epoch': 4.13} + 26%|██▌ | 95980/371472 [7:38:06<21:59:24, 3.48it/s] 26%|██▌ | 95981/371472 [7:38:06<21:19:56, 3.59it/s] 26%|██▌ | 95982/371472 [7:38:06<22:07:27, 3.46it/s] 26%|██▌ | 95983/371472 [7:38:06<21:43:29, 3.52it/s] 26%|██▌ | 95984/371472 [7:38:07<22:06:24, 3.46it/s] 26%|██▌ | 95985/371472 [7:38:07<22:11:51, 3.45it/s] 26%|██▌ | 95986/371472 [7:38:07<21:41:52, 3.53it/s] 26%|██▌ | 95987/371472 [7:38:08<21:28:28, 3.56it/s] 26%|██▌ | 95988/371472 [7:38:08<21:09:22, 3.62it/s] 26%|██▌ | 95989/371472 [7:38:08<22:15:29, 3.44it/s] 26%|██▌ | 95990/371472 [7:38:08<21:36:02, 3.54it/s] 26%|██▌ | 95991/371472 [7:38:09<20:29:47, 3.73it/s] 26%|██▌ | 95992/371472 [7:38:09<21:19:52, 3.59it/s] 26%|██▌ | 95993/371472 [7:38:09<21:46:18, 3.51it/s] 26%|██▌ | 95994/371472 [7:38:10<21:14:37, 3.60it/s] 26%|██▌ | 95995/371472 [7:38:10<20:49:14, 3.68it/s] 26%|██▌ | 95996/371472 [7:38:10<20:04:13, 3.81it/s] 26%|██▌ | 95997/371472 [7:38:10<20:36:52, 3.71it/s] 26%|██▌ | 95998/371472 [7:38:11<20:12:12, 3.79it/s] 26%|██▌ | 95999/371472 [7:38:11<19:48:59, 3.86it/s] 26%|██▌ | 96000/371472 [7:38:11<19:50:16, 3.86it/s] {'loss': 3.7884, 'learning_rate': 7.677713374560968e-07, 'epoch': 4.13} + 26%|██▌ | 96000/371472 [7:38:11<19:50:16, 3.86it/s] 26%|██▌ | 96001/371472 [7:38:11<20:12:00, 3.79it/s] 26%|██▌ | 96002/371472 [7:38:12<19:39:22, 3.89it/s] 26%|██▌ | 96003/371472 [7:38:12<20:20:34, 3.76it/s] 26%|██▌ | 96004/371472 [7:38:12<20:03:43, 3.81it/s] 26%|██▌ | 96005/371472 [7:38:12<19:23:13, 3.95it/s] 26%|██▌ | 96006/371472 [7:38:13<19:48:04, 3.86it/s] 26%|██▌ | 96007/371472 [7:38:13<19:26:55, 3.93it/s] 26%|██▌ | 96008/371472 [7:38:13<19:21:27, 3.95it/s] 26%|██▌ | 96009/371472 [7:38:13<20:06:36, 3.80it/s] 26%|██▌ | 96010/371472 [7:38:14<20:05:02, 3.81it/s] 26%|██▌ | 96011/371472 [7:38:14<19:56:27, 3.84it/s] 26%|██▌ | 96012/371472 [7:38:14<20:18:07, 3.77it/s] 26%|██▌ | 96013/371472 [7:38:14<20:10:14, 3.79it/s] 26%|██▌ | 96014/371472 [7:38:15<20:34:07, 3.72it/s] 26%|██▌ | 96015/371472 [7:38:15<20:10:44, 3.79it/s] 26%|██▌ | 96016/371472 [7:38:15<21:11:11, 3.61it/s] 26%|██▌ | 96017/371472 [7:38:16<20:31:56, 3.73it/s] 26%|██▌ | 96018/371472 [7:38:16<20:24:07, 3.75it/s] 26%|██▌ | 96019/371472 [7:38:16<20:20:42, 3.76it/s] 26%|██▌ | 96020/371472 [7:38:16<21:02:24, 3.64it/s] {'loss': 3.6712, 'learning_rate': 7.67722855480618e-07, 'epoch': 4.14} + 26%|██▌ | 96020/371472 [7:38:16<21:02:24, 3.64it/s] 26%|██▌ | 96021/371472 [7:38:17<20:10:40, 3.79it/s] 26%|██▌ | 96022/371472 [7:38:17<21:36:07, 3.54it/s] 26%|██▌ | 96023/371472 [7:38:17<20:55:54, 3.66it/s] 26%|██▌ | 96024/371472 [7:38:17<20:49:12, 3.67it/s] 26%|██▌ | 96025/371472 [7:38:18<21:10:44, 3.61it/s] 26%|██▌ | 96026/371472 [7:38:18<20:43:35, 3.69it/s] 26%|██▌ | 96027/371472 [7:38:18<22:19:53, 3.43it/s] 26%|██▌ | 96028/371472 [7:38:19<22:07:30, 3.46it/s] 26%|██▌ | 96029/371472 [7:38:19<20:37:43, 3.71it/s] 26%|██▌ | 96030/371472 [7:38:19<21:12:54, 3.61it/s] 26%|██▌ | 96031/371472 [7:38:19<20:31:04, 3.73it/s] 26%|██▌ | 96032/371472 [7:38:20<20:17:13, 3.77it/s] 26%|██▌ | 96033/371472 [7:38:20<20:40:36, 3.70it/s] 26%|██▌ | 96034/371472 [7:38:20<20:32:30, 3.72it/s] 26%|██▌ | 96035/371472 [7:38:20<20:24:30, 3.75it/s] 26%|██▌ | 96036/371472 [7:38:21<20:55:53, 3.66it/s] 26%|██▌ | 96037/371472 [7:38:21<21:05:15, 3.63it/s] 26%|██▌ | 96038/371472 [7:38:21<21:20:20, 3.59it/s] 26%|██▌ | 96039/371472 [7:38:22<22:32:52, 3.39it/s] 26%|██▌ | 96040/371472 [7:38:22<21:28:12, 3.56it/s] {'loss': 3.551, 'learning_rate': 7.676743735051391e-07, 'epoch': 4.14} + 26%|██▌ | 96040/371472 [7:38:22<21:28:12, 3.56it/s] 26%|██▌ | 96041/371472 [7:38:22<22:04:43, 3.47it/s] 26%|██▌ | 96042/371472 [7:38:22<22:03:54, 3.47it/s] 26%|██▌ | 96043/371472 [7:38:23<20:53:50, 3.66it/s] 26%|██▌ | 96044/371472 [7:38:23<20:57:36, 3.65it/s] 26%|██▌ | 96045/371472 [7:38:23<21:04:12, 3.63it/s] 26%|██▌ | 96046/371472 [7:38:24<20:18:19, 3.77it/s] 26%|██▌ | 96047/371472 [7:38:24<21:09:25, 3.62it/s] 26%|██▌ | 96048/371472 [7:38:24<20:46:51, 3.68it/s] 26%|██▌ | 96049/371472 [7:38:24<20:49:58, 3.67it/s] 26%|██▌ | 96050/371472 [7:38:25<20:54:05, 3.66it/s] 26%|██▌ | 96051/371472 [7:38:25<21:45:48, 3.52it/s] 26%|██▌ | 96052/371472 [7:38:25<21:34:56, 3.54it/s] 26%|██▌ | 96053/371472 [7:38:26<22:02:16, 3.47it/s] 26%|██▌ | 96054/371472 [7:38:26<21:46:04, 3.51it/s] 26%|██▌ | 96055/371472 [7:38:26<20:58:21, 3.65it/s] 26%|██▌ | 96056/371472 [7:38:26<21:25:21, 3.57it/s] 26%|██▌ | 96057/371472 [7:38:27<21:13:16, 3.61it/s] 26%|██▌ | 96058/371472 [7:38:27<20:24:08, 3.75it/s] 26%|██▌ | 96059/371472 [7:38:27<20:35:21, 3.72it/s] 26%|██▌ | 96060/371472 [7:38:27<20:56:42, 3.65it/s] {'loss': 3.6324, 'learning_rate': 7.676258915296602e-07, 'epoch': 4.14} + 26%|██▌ | 96060/371472 [7:38:27<20:56:42, 3.65it/s] 26%|██▌ | 96061/371472 [7:38:28<22:04:05, 3.47it/s] 26%|██▌ | 96062/371472 [7:38:28<20:53:48, 3.66it/s] 26%|██▌ | 96063/371472 [7:38:28<21:06:37, 3.62it/s] 26%|██▌ | 96064/371472 [7:38:29<20:43:00, 3.69it/s] 26%|██▌ | 96065/371472 [7:38:29<20:31:03, 3.73it/s] 26%|██▌ | 96066/371472 [7:38:29<20:59:27, 3.64it/s] 26%|██▌ | 96067/371472 [7:38:29<20:56:00, 3.65it/s] 26%|██▌ | 96068/371472 [7:38:30<21:15:16, 3.60it/s] 26%|██▌ | 96069/371472 [7:38:30<21:38:55, 3.53it/s] 26%|██▌ | 96070/371472 [7:38:30<20:30:09, 3.73it/s] 26%|██▌ | 96071/371472 [7:38:30<20:12:03, 3.79it/s] 26%|██▌ | 96072/371472 [7:38:31<19:59:18, 3.83it/s] 26%|██▌ | 96073/371472 [7:38:31<20:40:36, 3.70it/s] 26%|██▌ | 96074/371472 [7:38:31<19:57:23, 3.83it/s] 26%|██▌ | 96075/371472 [7:38:32<20:45:33, 3.69it/s] 26%|██▌ | 96076/371472 [7:38:32<21:00:56, 3.64it/s] 26%|██▌ | 96077/371472 [7:38:32<21:32:44, 3.55it/s] 26%|██▌ | 96078/371472 [7:38:32<20:22:42, 3.75it/s] 26%|██▌ | 96079/371472 [7:38:33<20:02:59, 3.82it/s] 26%|██▌ | 96080/371472 [7:38:33<20:29:43, 3.73it/s] {'loss': 3.829, 'learning_rate': 7.675774095541812e-07, 'epoch': 4.14} + 26%|██▌ | 96080/371472 [7:38:33<20:29:43, 3.73it/s] 26%|██▌ | 96081/371472 [7:38:33<19:59:33, 3.83it/s] 26%|██▌ | 96082/371472 [7:38:33<20:10:05, 3.79it/s] 26%|██▌ | 96083/371472 [7:38:34<20:23:17, 3.75it/s] 26%|██▌ | 96084/371472 [7:38:34<21:06:31, 3.62it/s] 26%|██▌ | 96085/371472 [7:38:34<21:45:51, 3.51it/s] 26%|██▌ | 96086/371472 [7:38:34<21:14:38, 3.60it/s] 26%|██▌ | 96087/371472 [7:38:35<20:25:25, 3.75it/s] 26%|██▌ | 96088/371472 [7:38:35<19:58:48, 3.83it/s] 26%|██▌ | 96089/371472 [7:38:35<19:36:37, 3.90it/s] 26%|██▌ | 96090/371472 [7:38:35<19:10:21, 3.99it/s] 26%|██▌ | 96091/371472 [7:38:36<20:21:08, 3.76it/s] 26%|██▌ | 96092/371472 [7:38:36<20:07:11, 3.80it/s] 26%|██▌ | 96093/371472 [7:38:36<19:41:12, 3.89it/s] 26%|██▌ | 96094/371472 [7:38:37<19:45:35, 3.87it/s] 26%|██▌ | 96095/371472 [7:38:37<20:04:24, 3.81it/s] 26%|██▌ | 96096/371472 [7:38:37<19:41:03, 3.89it/s] 26%|██▌ | 96097/371472 [7:38:37<20:58:43, 3.65it/s] 26%|██▌ | 96098/371472 [7:38:38<20:31:59, 3.73it/s] 26%|██▌ | 96099/371472 [7:38:38<20:07:53, 3.80it/s] 26%|██▌ | 96100/371472 [7:38:38<20:31:02, 3.73it/s] {'loss': 3.48, 'learning_rate': 7.675289275787024e-07, 'epoch': 4.14} + 26%|██▌ | 96100/371472 [7:38:38<20:31:02, 3.73it/s] 26%|██▌ | 96101/371472 [7:38:38<21:08:42, 3.62it/s] 26%|██▌ | 96102/371472 [7:38:39<20:28:32, 3.74it/s] 26%|██▌ | 96103/371472 [7:38:39<20:21:52, 3.76it/s] 26%|██▌ | 96104/371472 [7:38:39<20:14:05, 3.78it/s] 26%|██▌ | 96105/371472 [7:38:39<20:13:10, 3.78it/s] 26%|██▌ | 96106/371472 [7:38:40<21:30:07, 3.56it/s] 26%|██▌ | 96107/371472 [7:38:40<20:55:36, 3.66it/s] 26%|██▌ | 96108/371472 [7:38:40<20:10:54, 3.79it/s] 26%|██▌ | 96109/371472 [7:38:41<19:40:51, 3.89it/s] 26%|██▌ | 96110/371472 [7:38:41<20:03:19, 3.81it/s] 26%|██▌ | 96111/371472 [7:38:41<20:58:01, 3.65it/s] 26%|██▌ | 96112/371472 [7:38:41<21:19:03, 3.59it/s] 26%|██▌ | 96113/371472 [7:38:42<21:24:17, 3.57it/s] 26%|██▌ | 96114/371472 [7:38:42<23:19:04, 3.28it/s] 26%|██▌ | 96115/371472 [7:38:42<25:45:15, 2.97it/s] 26%|██▌ | 96116/371472 [7:38:43<24:15:18, 3.15it/s] 26%|██▌ | 96117/371472 [7:38:43<23:28:38, 3.26it/s] 26%|██▌ | 96118/371472 [7:38:43<22:55:36, 3.34it/s] 26%|██▌ | 96119/371472 [7:38:44<24:02:27, 3.18it/s] 26%|██▌ | 96120/371472 [7:38:44<22:20:01, 3.42it/s] {'loss': 3.5614, 'learning_rate': 7.674804456032235e-07, 'epoch': 4.14} + 26%|██▌ | 96120/371472 [7:38:44<22:20:01, 3.42it/s] 26%|██▌ | 96121/371472 [7:38:44<21:37:54, 3.54it/s] 26%|██▌ | 96122/371472 [7:38:44<21:55:19, 3.49it/s] 26%|██▌ | 96123/371472 [7:38:45<22:00:38, 3.47it/s] 26%|██▌ | 96124/371472 [7:38:45<21:46:57, 3.51it/s] 26%|██▌ | 96125/371472 [7:38:45<21:29:08, 3.56it/s] 26%|██▌ | 96126/371472 [7:38:46<22:49:50, 3.35it/s] 26%|██▌ | 96127/371472 [7:38:46<22:37:55, 3.38it/s] 26%|██▌ | 96128/371472 [7:38:46<21:31:29, 3.55it/s] 26%|██▌ | 96129/371472 [7:38:46<21:21:50, 3.58it/s] 26%|██▌ | 96130/371472 [7:38:47<22:23:36, 3.42it/s] 26%|██▌ | 96131/371472 [7:38:47<21:20:22, 3.58it/s] 26%|██▌ | 96132/371472 [7:38:47<21:18:47, 3.59it/s] 26%|██▌ | 96133/371472 [7:38:48<20:46:24, 3.68it/s] 26%|██▌ | 96134/371472 [7:38:48<20:55:50, 3.65it/s] 26%|██▌ | 96135/371472 [7:38:48<20:56:22, 3.65it/s] 26%|██▌ | 96136/371472 [7:38:48<21:19:17, 3.59it/s] 26%|██▌ | 96137/371472 [7:38:49<20:23:47, 3.75it/s] 26%|██▌ | 96138/371472 [7:38:49<20:39:34, 3.70it/s] 26%|██▌ | 96139/371472 [7:38:49<22:31:58, 3.39it/s] 26%|██▌ | 96140/371472 [7:38:50<21:56:37, 3.49it/s] {'loss': 3.7307, 'learning_rate': 7.674319636277445e-07, 'epoch': 4.14} + 26%|██▌ | 96140/371472 [7:38:50<21:56:37, 3.49it/s] 26%|██▌ | 96141/371472 [7:38:50<21:28:49, 3.56it/s] 26%|██▌ | 96142/371472 [7:38:50<25:18:26, 3.02it/s] 26%|██▌ | 96143/371472 [7:38:51<24:11:00, 3.16it/s] 26%|██▌ | 96144/371472 [7:38:51<22:35:11, 3.39it/s] 26%|██▌ | 96145/371472 [7:38:51<22:06:51, 3.46it/s] 26%|██▌ | 96146/371472 [7:38:51<23:43:05, 3.22it/s] 26%|██▌ | 96147/371472 [7:38:52<25:04:35, 3.05it/s] 26%|██▌ | 96148/371472 [7:38:52<24:26:50, 3.13it/s] 26%|██▌ | 96149/371472 [7:38:52<24:34:11, 3.11it/s] 26%|██▌ | 96150/371472 [7:38:53<24:31:51, 3.12it/s] 26%|██▌ | 96151/371472 [7:38:53<24:05:49, 3.17it/s] 26%|██▌ | 96152/371472 [7:38:53<22:44:59, 3.36it/s] 26%|██▌ | 96153/371472 [7:38:54<21:24:52, 3.57it/s] 26%|██▌ | 96154/371472 [7:38:54<21:37:37, 3.54it/s] 26%|██▌ | 96155/371472 [7:38:54<22:06:12, 3.46it/s] 26%|██▌ | 96156/371472 [7:38:54<23:29:59, 3.25it/s] 26%|██▌ | 96157/371472 [7:38:55<22:51:02, 3.35it/s] 26%|██▌ | 96158/371472 [7:38:55<22:39:15, 3.38it/s] 26%|██▌ | 96159/371472 [7:38:55<22:33:54, 3.39it/s] 26%|██▌ | 96160/371472 [7:38:56<23:18:27, 3.28it/s] {'loss': 3.6099, 'learning_rate': 7.673834816522657e-07, 'epoch': 4.14} + 26%|██▌ | 96160/371472 [7:38:56<23:18:27, 3.28it/s] 26%|██▌ | 96161/371472 [7:38:56<21:55:03, 3.49it/s] 26%|██▌ | 96162/371472 [7:38:56<20:38:54, 3.70it/s] 26%|██▌ | 96163/371472 [7:38:56<19:57:25, 3.83it/s] 26%|██▌ | 96164/371472 [7:38:57<19:43:00, 3.88it/s] 26%|██▌ | 96165/371472 [7:38:57<19:48:12, 3.86it/s] 26%|██▌ | 96166/371472 [7:38:57<19:52:23, 3.85it/s] 26%|██▌ | 96167/371472 [7:38:57<20:05:36, 3.81it/s] 26%|██▌ | 96168/371472 [7:38:58<21:27:10, 3.56it/s] 26%|██▌ | 96169/371472 [7:38:58<21:12:57, 3.60it/s] 26%|██▌ | 96170/371472 [7:38:58<22:16:11, 3.43it/s] 26%|██▌ | 96171/371472 [7:38:59<23:15:11, 3.29it/s] 26%|██▌ | 96172/371472 [7:38:59<22:04:27, 3.46it/s] 26%|██▌ | 96173/371472 [7:38:59<21:47:25, 3.51it/s] 26%|██▌ | 96174/371472 [7:38:59<22:21:32, 3.42it/s] 26%|██▌ | 96175/371472 [7:39:00<21:16:37, 3.59it/s] 26%|██▌ | 96176/371472 [7:39:00<21:02:55, 3.63it/s] 26%|██▌ | 96177/371472 [7:39:00<20:11:57, 3.79it/s] 26%|██▌ | 96178/371472 [7:39:01<20:30:13, 3.73it/s] 26%|██▌ | 96179/371472 [7:39:01<20:08:26, 3.80it/s] 26%|██▌ | 96180/371472 [7:39:01<20:40:38, 3.70it/s] {'loss': 3.7577, 'learning_rate': 7.673349996767869e-07, 'epoch': 4.14} + 26%|██▌ | 96180/371472 [7:39:01<20:40:38, 3.70it/s] 26%|██▌ | 96181/371472 [7:39:01<20:23:36, 3.75it/s] 26%|██▌ | 96182/371472 [7:39:02<20:56:39, 3.65it/s] 26%|██▌ | 96183/371472 [7:39:02<20:56:28, 3.65it/s] 26%|██▌ | 96184/371472 [7:39:02<23:20:50, 3.28it/s] 26%|██▌ | 96185/371472 [7:39:03<22:18:43, 3.43it/s] 26%|██▌ | 96186/371472 [7:39:03<22:02:35, 3.47it/s] 26%|██▌ | 96187/371472 [7:39:03<21:44:22, 3.52it/s] 26%|██▌ | 96188/371472 [7:39:03<20:52:41, 3.66it/s] 26%|██▌ | 96189/371472 [7:39:04<21:12:25, 3.61it/s] 26%|██▌ | 96190/371472 [7:39:04<21:05:09, 3.63it/s] 26%|██▌ | 96191/371472 [7:39:04<22:23:12, 3.42it/s] 26%|██▌ | 96192/371472 [7:39:05<23:04:20, 3.31it/s] 26%|██▌ | 96193/371472 [7:39:05<21:34:22, 3.54it/s] 26%|██▌ | 96194/371472 [7:39:05<20:58:12, 3.65it/s] 26%|██▌ | 96195/371472 [7:39:05<23:28:33, 3.26it/s] 26%|██▌ | 96196/371472 [7:39:06<24:59:15, 3.06it/s] 26%|██▌ | 96197/371472 [7:39:06<23:31:10, 3.25it/s] 26%|██▌ | 96198/371472 [7:39:06<23:15:51, 3.29it/s] 26%|██▌ | 96199/371472 [7:39:07<22:20:49, 3.42it/s] 26%|██▌ | 96200/371472 [7:39:07<21:08:13, 3.62it/s] {'loss': 3.7372, 'learning_rate': 7.672865177013078e-07, 'epoch': 4.14} + 26%|██▌ | 96200/371472 [7:39:07<21:08:13, 3.62it/s] 26%|██▌ | 96201/371472 [7:39:07<20:16:27, 3.77it/s] 26%|██▌ | 96202/371472 [7:39:07<21:08:47, 3.62it/s] 26%|██▌ | 96203/371472 [7:39:08<21:23:58, 3.57it/s] 26%|██▌ | 96204/371472 [7:39:08<22:21:05, 3.42it/s] 26%|██▌ | 96205/371472 [7:39:08<21:59:25, 3.48it/s] 26%|██▌ | 96206/371472 [7:39:09<22:13:17, 3.44it/s] 26%|██▌ | 96207/371472 [7:39:09<22:17:17, 3.43it/s] 26%|██▌ | 96208/371472 [7:39:09<21:08:40, 3.62it/s] 26%|██▌ | 96209/371472 [7:39:09<21:19:08, 3.59it/s] 26%|██▌ | 96210/371472 [7:39:10<21:42:13, 3.52it/s] 26%|██▌ | 96211/371472 [7:39:10<21:04:26, 3.63it/s] 26%|██▌ | 96212/371472 [7:39:10<22:11:19, 3.45it/s] 26%|██▌ | 96213/371472 [7:39:11<24:53:40, 3.07it/s] 26%|██▌ | 96214/371472 [7:39:11<23:35:22, 3.24it/s] 26%|██▌ | 96215/371472 [7:39:11<23:36:33, 3.24it/s] 26%|██▌ | 96216/371472 [7:39:11<22:08:22, 3.45it/s] 26%|██▌ | 96217/371472 [7:39:12<22:45:10, 3.36it/s] 26%|██▌ | 96218/371472 [7:39:12<21:41:59, 3.52it/s] 26%|██▌ | 96219/371472 [7:39:12<21:24:20, 3.57it/s] 26%|██▌ | 96220/371472 [7:39:13<20:32:08, 3.72it/s] {'loss': 3.5859, 'learning_rate': 7.672380357258289e-07, 'epoch': 4.14} + 26%|██▌ | 96220/371472 [7:39:13<20:32:08, 3.72it/s] 26%|██▌ | 96221/371472 [7:39:13<20:36:53, 3.71it/s] 26%|██▌ | 96222/371472 [7:39:13<19:49:40, 3.86it/s] 26%|██▌ | 96223/371472 [7:39:13<20:48:14, 3.68it/s] 26%|██▌ | 96224/371472 [7:39:14<21:35:58, 3.54it/s] 26%|██▌ | 96225/371472 [7:39:14<22:26:00, 3.41it/s] 26%|██▌ | 96226/371472 [7:39:14<23:32:50, 3.25it/s] 26%|██▌ | 96227/371472 [7:39:15<24:04:24, 3.18it/s] 26%|██▌ | 96228/371472 [7:39:15<23:30:47, 3.25it/s] 26%|██▌ | 96229/371472 [7:39:15<22:32:23, 3.39it/s] 26%|██▌ | 96230/371472 [7:39:16<22:01:01, 3.47it/s] 26%|██▌ | 96231/371472 [7:39:16<20:59:06, 3.64it/s] 26%|██▌ | 96232/371472 [7:39:16<22:49:02, 3.35it/s] 26%|██▌ | 96233/371472 [7:39:16<22:04:31, 3.46it/s] 26%|██▌ | 96234/371472 [7:39:17<21:28:32, 3.56it/s] 26%|██▌ | 96235/371472 [7:39:17<21:32:05, 3.55it/s] 26%|██▌ | 96236/371472 [7:39:17<20:37:07, 3.71it/s] 26%|██▌ | 96237/371472 [7:39:18<22:16:22, 3.43it/s] 26%|██▌ | 96238/371472 [7:39:18<21:20:03, 3.58it/s] 26%|██▌ | 96239/371472 [7:39:18<21:00:24, 3.64it/s] 26%|██▌ | 96240/371472 [7:39:18<21:00:37, 3.64it/s] {'loss': 3.821, 'learning_rate': 7.671895537503501e-07, 'epoch': 4.15} + 26%|██▌ | 96240/371472 [7:39:18<21:00:37, 3.64it/s] 26%|██▌ | 96241/371472 [7:39:19<20:36:24, 3.71it/s] 26%|██▌ | 96242/371472 [7:39:19<20:05:02, 3.81it/s] 26%|██▌ | 96243/371472 [7:39:19<19:48:14, 3.86it/s] 26%|██▌ | 96244/371472 [7:39:19<20:21:22, 3.76it/s] 26%|██▌ | 96245/371472 [7:39:20<20:00:56, 3.82it/s] 26%|██▌ | 96246/371472 [7:39:20<20:50:33, 3.67it/s] 26%|██▌ | 96247/371472 [7:39:20<20:37:21, 3.71it/s] 26%|██▌ | 96248/371472 [7:39:20<20:51:10, 3.67it/s] 26%|██▌ | 96249/371472 [7:39:21<22:11:52, 3.44it/s] 26%|██▌ | 96250/371472 [7:39:21<22:51:17, 3.35it/s] 26%|██▌ | 96251/371472 [7:39:21<22:21:00, 3.42it/s] 26%|██▌ | 96252/371472 [7:39:22<22:00:21, 3.47it/s] 26%|██▌ | 96253/371472 [7:39:22<21:23:57, 3.57it/s] 26%|██▌ | 96254/371472 [7:39:22<21:00:43, 3.64it/s] 26%|██▌ | 96255/371472 [7:39:22<20:34:30, 3.72it/s] 26%|██▌ | 96256/371472 [7:39:23<20:23:37, 3.75it/s] 26%|██▌ | 96257/371472 [7:39:23<21:12:01, 3.61it/s] 26%|██▌ | 96258/371472 [7:39:23<20:55:31, 3.65it/s] 26%|██▌ | 96259/371472 [7:39:24<21:24:55, 3.57it/s] 26%|██▌ | 96260/371472 [7:39:24<21:16:41, 3.59it/s] {'loss': 3.6523, 'learning_rate': 7.671410717748713e-07, 'epoch': 4.15} + 26%|██▌ | 96260/371472 [7:39:24<21:16:41, 3.59it/s] 26%|██▌ | 96261/371472 [7:39:24<21:13:43, 3.60it/s] 26%|██▌ | 96262/371472 [7:39:24<22:33:14, 3.39it/s] 26%|██▌ | 96263/371472 [7:39:25<21:15:51, 3.60it/s] 26%|██▌ | 96264/371472 [7:39:25<21:38:19, 3.53it/s] 26%|██▌ | 96265/371472 [7:39:25<21:44:46, 3.52it/s] 26%|██▌ | 96266/371472 [7:39:26<21:19:15, 3.59it/s] 26%|██▌ | 96267/371472 [7:39:26<20:30:58, 3.73it/s] 26%|██▌ | 96268/371472 [7:39:26<21:43:06, 3.52it/s] 26%|██▌ | 96269/371472 [7:39:26<22:44:32, 3.36it/s] 26%|██▌ | 96270/371472 [7:39:27<22:14:31, 3.44it/s] 26%|██▌ | 96271/371472 [7:39:27<22:04:44, 3.46it/s] 26%|██▌ | 96272/371472 [7:39:27<22:21:22, 3.42it/s] 26%|██▌ | 96273/371472 [7:39:28<22:01:56, 3.47it/s] 26%|██▌ | 96274/371472 [7:39:28<21:09:16, 3.61it/s] 26%|██▌ | 96275/371472 [7:39:28<20:36:18, 3.71it/s] 26%|██▌ | 96276/371472 [7:39:28<20:41:21, 3.69it/s] 26%|██▌ | 96277/371472 [7:39:29<21:33:51, 3.54it/s] 26%|██▌ | 96278/371472 [7:39:29<21:15:01, 3.60it/s] 26%|██▌ | 96279/371472 [7:39:29<20:43:09, 3.69it/s] 26%|██▌ | 96280/371472 [7:39:29<20:55:56, 3.65it/s] {'loss': 3.7537, 'learning_rate': 7.670925897993924e-07, 'epoch': 4.15} + 26%|██▌ | 96280/371472 [7:39:29<20:55:56, 3.65it/s] 26%|██▌ | 96281/371472 [7:39:30<22:13:02, 3.44it/s] 26%|██▌ | 96282/371472 [7:39:30<22:37:35, 3.38it/s] 26%|██▌ | 96283/371472 [7:39:30<22:13:39, 3.44it/s] 26%|██▌ | 96284/371472 [7:39:31<20:53:24, 3.66it/s] 26%|██▌ | 96285/371472 [7:39:31<21:31:31, 3.55it/s] 26%|██▌ | 96286/371472 [7:39:31<21:23:22, 3.57it/s] 26%|██▌ | 96287/371472 [7:39:31<21:27:23, 3.56it/s] 26%|██▌ | 96288/371472 [7:39:32<22:09:11, 3.45it/s] 26%|██▌ | 96289/371472 [7:39:32<22:55:29, 3.33it/s] 26%|██▌ | 96290/371472 [7:39:32<21:58:42, 3.48it/s] 26%|██▌ | 96291/371472 [7:39:33<22:52:39, 3.34it/s] 26%|██▌ | 96292/371472 [7:39:33<25:15:21, 3.03it/s] 26%|██▌ | 96293/371472 [7:39:33<23:55:14, 3.20it/s] 26%|██▌ | 96294/371472 [7:39:34<22:09:39, 3.45it/s] 26%|██▌ | 96295/371472 [7:39:34<21:10:26, 3.61it/s] 26%|██▌ | 96296/371472 [7:39:34<21:53:43, 3.49it/s] 26%|██▌ | 96297/371472 [7:39:34<22:27:18, 3.40it/s] 26%|██▌ | 96298/371472 [7:39:35<22:06:54, 3.46it/s] 26%|██▌ | 96299/371472 [7:39:35<23:16:09, 3.28it/s] 26%|██▌ | 96300/371472 [7:39:35<23:53:33, 3.20it/s] {'loss': 3.5594, 'learning_rate': 7.670441078239135e-07, 'epoch': 4.15} + 26%|██▌ | 96300/371472 [7:39:35<23:53:33, 3.20it/s] 26%|██▌ | 96301/371472 [7:39:36<22:08:34, 3.45it/s] 26%|██▌ | 96302/371472 [7:39:36<22:27:15, 3.40it/s] 26%|██▌ | 96303/371472 [7:39:36<21:17:29, 3.59it/s] 26%|██▌ | 96304/371472 [7:39:36<21:19:26, 3.58it/s] 26%|██▌ | 96305/371472 [7:39:37<22:03:16, 3.47it/s] 26%|██▌ | 96306/371472 [7:39:37<21:43:25, 3.52it/s] 26%|██▌ | 96307/371472 [7:39:37<21:11:22, 3.61it/s] 26%|██▌ | 96308/371472 [7:39:38<21:47:26, 3.51it/s] 26%|██▌ | 96309/371472 [7:39:38<21:17:49, 3.59it/s] 26%|██▌ | 96310/371472 [7:39:38<20:34:03, 3.72it/s] 26%|██▌ | 96311/371472 [7:39:38<21:10:49, 3.61it/s] 26%|██▌ | 96312/371472 [7:39:39<20:35:34, 3.71it/s] 26%|██▌ | 96313/371472 [7:39:39<21:06:05, 3.62it/s] 26%|██▌ | 96314/371472 [7:39:39<20:42:06, 3.69it/s] 26%|██▌ | 96315/371472 [7:39:40<22:11:16, 3.44it/s] 26%|██▌ | 96316/371472 [7:39:40<22:33:06, 3.39it/s] 26%|██▌ | 96317/371472 [7:39:40<21:13:49, 3.60it/s] 26%|██▌ | 96318/371472 [7:39:40<20:22:42, 3.75it/s] 26%|██▌ | 96319/371472 [7:39:41<19:57:23, 3.83it/s] 26%|██▌ | 96320/371472 [7:39:41<19:41:00, 3.88it/s] {'loss': 3.6303, 'learning_rate': 7.669956258484345e-07, 'epoch': 4.15} + 26%|██▌ | 96320/371472 [7:39:41<19:41:00, 3.88it/s] 26%|██▌ | 96321/371472 [7:39:41<20:23:01, 3.75it/s] 26%|██▌ | 96322/371472 [7:39:41<19:52:12, 3.85it/s] 26%|██▌ | 96323/371472 [7:39:42<19:53:38, 3.84it/s] 26%|██▌ | 96324/371472 [7:39:42<19:39:03, 3.89it/s] 26%|██▌ | 96325/371472 [7:39:42<21:31:25, 3.55it/s] 26%|██▌ | 96326/371472 [7:39:43<25:41:23, 2.98it/s] 26%|██▌ | 96327/371472 [7:39:43<24:07:33, 3.17it/s] 26%|██▌ | 96328/371472 [7:39:43<23:30:22, 3.25it/s] 26%|██▌ | 96329/371472 [7:39:44<23:35:42, 3.24it/s] 26%|██▌ | 96330/371472 [7:39:44<23:01:44, 3.32it/s] 26%|██▌ | 96331/371472 [7:39:44<22:15:00, 3.43it/s] 26%|██▌ | 96332/371472 [7:39:44<24:09:21, 3.16it/s] 26%|██▌ | 96333/371472 [7:39:45<22:44:52, 3.36it/s] 26%|██▌ | 96334/371472 [7:39:45<22:08:24, 3.45it/s] 26%|██▌ | 96335/371472 [7:39:45<22:11:33, 3.44it/s] 26%|██▌ | 96336/371472 [7:39:46<21:19:51, 3.58it/s] 26%|██▌ | 96337/371472 [7:39:46<21:42:53, 3.52it/s] 26%|██▌ | 96338/371472 [7:39:46<21:10:32, 3.61it/s] 26%|██▌ | 96339/371472 [7:39:46<20:45:08, 3.68it/s] 26%|██▌ | 96340/371472 [7:39:47<21:39:49, 3.53it/s] {'loss': 3.6694, 'learning_rate': 7.669471438729557e-07, 'epoch': 4.15} + 26%|██▌ | 96340/371472 [7:39:47<21:39:49, 3.53it/s] 26%|██▌ | 96341/371472 [7:39:47<20:48:24, 3.67it/s] 26%|██▌ | 96342/371472 [7:39:47<21:20:43, 3.58it/s] 26%|██▌ | 96343/371472 [7:39:48<21:57:57, 3.48it/s] 26%|██▌ | 96344/371472 [7:39:48<21:34:14, 3.54it/s] 26%|██▌ | 96345/371472 [7:39:48<22:16:01, 3.43it/s] 26%|██▌ | 96346/371472 [7:39:48<22:51:21, 3.34it/s] 26%|██▌ | 96347/371472 [7:39:49<21:41:18, 3.52it/s] 26%|██▌ | 96348/371472 [7:39:49<21:12:12, 3.60it/s] 26%|██▌ | 96349/371472 [7:39:49<21:25:47, 3.57it/s] 26%|██▌ | 96350/371472 [7:39:49<20:37:32, 3.71it/s] 26%|██▌ | 96351/371472 [7:39:50<20:15:23, 3.77it/s] 26%|██▌ | 96352/371472 [7:39:50<20:05:22, 3.80it/s] 26%|██▌ | 96353/371472 [7:39:50<20:33:37, 3.72it/s] 26%|██▌ | 96354/371472 [7:39:51<21:46:00, 3.51it/s] 26%|██▌ | 96355/371472 [7:39:51<21:55:30, 3.49it/s] 26%|██▌ | 96356/371472 [7:39:51<20:57:01, 3.65it/s] 26%|██▌ | 96357/371472 [7:39:51<20:47:07, 3.68it/s] 26%|██▌ | 96358/371472 [7:39:52<21:00:20, 3.64it/s] 26%|██▌ | 96359/371472 [7:39:52<21:32:50, 3.55it/s] 26%|██▌ | 96360/371472 [7:39:52<22:03:59, 3.46it/s] {'loss': 3.5924, 'learning_rate': 7.668986618974766e-07, 'epoch': 4.15} + 26%|██▌ | 96360/371472 [7:39:52<22:03:59, 3.46it/s] 26%|██▌ | 96361/371472 [7:39:52<20:46:31, 3.68it/s] 26%|██▌ | 96362/371472 [7:39:53<20:12:56, 3.78it/s] 26%|██▌ | 96363/371472 [7:39:53<19:53:48, 3.84it/s] 26%|██▌ | 96364/371472 [7:39:53<19:50:50, 3.85it/s] 26%|██▌ | 96365/371472 [7:39:54<21:08:33, 3.61it/s] 26%|██▌ | 96366/371472 [7:39:54<20:43:12, 3.69it/s] 26%|██▌ | 96367/371472 [7:39:54<23:32:06, 3.25it/s] 26%|██▌ | 96368/371472 [7:39:55<23:19:06, 3.28it/s] 26%|██▌ | 96369/371472 [7:39:55<22:51:47, 3.34it/s] 26%|██▌ | 96370/371472 [7:39:55<23:30:47, 3.25it/s] 26%|██▌ | 96371/371472 [7:39:55<23:14:29, 3.29it/s] 26%|██▌ | 96372/371472 [7:39:56<25:24:42, 3.01it/s] 26%|██▌ | 96373/371472 [7:39:56<25:06:07, 3.04it/s] 26%|██▌ | 96374/371472 [7:39:56<23:14:14, 3.29it/s] 26%|██▌ | 96375/371472 [7:39:57<23:17:55, 3.28it/s] 26%|██▌ | 96376/371472 [7:39:57<22:33:44, 3.39it/s] 26%|██▌ | 96377/371472 [7:39:57<23:11:13, 3.30it/s] 26%|██▌ | 96378/371472 [7:39:58<23:18:26, 3.28it/s] 26%|██▌ | 96379/371472 [7:39:58<24:07:19, 3.17it/s] 26%|██▌ | 96380/371472 [7:39:58<23:07:23, 3.30it/s] {'loss': 3.6093, 'learning_rate': 7.668501799219978e-07, 'epoch': 4.15} + 26%|██▌ | 96380/371472 [7:39:58<23:07:23, 3.30it/s] 26%|██▌ | 96381/371472 [7:39:59<23:53:27, 3.20it/s] 26%|██▌ | 96382/371472 [7:39:59<23:06:57, 3.31it/s] 26%|██▌ | 96383/371472 [7:39:59<21:54:29, 3.49it/s] 26%|██▌ | 96384/371472 [7:39:59<21:50:22, 3.50it/s] 26%|██▌ | 96385/371472 [7:40:00<21:15:55, 3.59it/s] 26%|██▌ | 96386/371472 [7:40:00<21:02:59, 3.63it/s] 26%|██▌ | 96387/371472 [7:40:00<21:18:50, 3.59it/s] 26%|██▌ | 96388/371472 [7:40:00<20:50:46, 3.67it/s] 26%|██▌ | 96389/371472 [7:40:01<20:45:52, 3.68it/s] 26%|██▌ | 96390/371472 [7:40:01<20:53:31, 3.66it/s] 26%|██▌ | 96391/371472 [7:40:01<22:22:59, 3.41it/s] 26%|██▌ | 96392/371472 [7:40:02<21:34:02, 3.54it/s] 26%|██▌ | 96393/371472 [7:40:02<21:21:28, 3.58it/s] 26%|██▌ | 96394/371472 [7:40:02<21:06:53, 3.62it/s] 26%|██▌ | 96395/371472 [7:40:02<20:59:49, 3.64it/s] 26%|██▌ | 96396/371472 [7:40:03<23:08:14, 3.30it/s] 26%|██▌ | 96397/371472 [7:40:03<22:51:35, 3.34it/s] 26%|██▌ | 96398/371472 [7:40:03<21:55:44, 3.48it/s] 26%|██▌ | 96399/371472 [7:40:04<21:00:03, 3.64it/s] 26%|██▌ | 96400/371472 [7:40:04<20:55:57, 3.65it/s] {'loss': 3.7879, 'learning_rate': 7.66801697946519e-07, 'epoch': 4.15} + 26%|██▌ | 96400/371472 [7:40:04<20:55:57, 3.65it/s] 26%|██▌ | 96401/371472 [7:40:04<23:09:04, 3.30it/s] 26%|██▌ | 96402/371472 [7:40:04<22:03:58, 3.46it/s] 26%|██▌ | 96403/371472 [7:40:05<22:08:21, 3.45it/s] 26%|██▌ | 96404/371472 [7:40:05<22:33:44, 3.39it/s] 26%|██▌ | 96405/371472 [7:40:05<22:10:51, 3.44it/s] 26%|██▌ | 96406/371472 [7:40:06<21:38:48, 3.53it/s] 26%|██▌ | 96407/371472 [7:40:06<21:21:07, 3.58it/s] 26%|██▌ | 96408/371472 [7:40:06<20:40:40, 3.70it/s] 26%|██▌ | 96409/371472 [7:40:06<20:43:57, 3.69it/s] 26%|██▌ | 96410/371472 [7:40:07<20:18:19, 3.76it/s] 26%|██▌ | 96411/371472 [7:40:07<20:27:08, 3.74it/s] 26%|██▌ | 96412/371472 [7:40:07<23:26:31, 3.26it/s] 26%|██▌ | 96413/371472 [7:40:08<22:22:04, 3.42it/s] 26%|██▌ | 96414/371472 [7:40:08<23:40:18, 3.23it/s] 26%|██▌ | 96415/371472 [7:40:08<25:23:10, 3.01it/s] 26%|██▌ | 96416/371472 [7:40:09<23:31:20, 3.25it/s] 26%|██▌ | 96417/371472 [7:40:09<23:04:32, 3.31it/s] 26%|██▌ | 96418/371472 [7:40:09<22:00:45, 3.47it/s] 26%|██▌ | 96419/371472 [7:40:09<21:27:09, 3.56it/s] 26%|██▌ | 96420/371472 [7:40:10<21:10:29, 3.61it/s] {'loss': 3.6788, 'learning_rate': 7.667532159710401e-07, 'epoch': 4.15} + 26%|██▌ | 96420/371472 [7:40:10<21:10:29, 3.61it/s] 26%|██▌ | 96421/371472 [7:40:10<21:18:43, 3.58it/s] 26%|██▌ | 96422/371472 [7:40:10<20:54:46, 3.65it/s] 26%|██▌ | 96423/371472 [7:40:10<21:09:11, 3.61it/s] 26%|██▌ | 96424/371472 [7:40:11<20:53:00, 3.66it/s] 26%|██▌ | 96425/371472 [7:40:11<21:44:55, 3.51it/s] 26%|██▌ | 96426/371472 [7:40:11<22:02:15, 3.47it/s] 26%|██▌ | 96427/371472 [7:40:12<22:48:25, 3.35it/s] 26%|██▌ | 96428/371472 [7:40:12<22:39:18, 3.37it/s] 26%|██▌ | 96429/371472 [7:40:12<22:14:23, 3.44it/s] 26%|██▌ | 96430/371472 [7:40:12<21:40:18, 3.53it/s] 26%|██▌ | 96431/371472 [7:40:13<20:49:13, 3.67it/s] 26%|██▌ | 96432/371472 [7:40:13<20:45:10, 3.68it/s] 26%|██▌ | 96433/371472 [7:40:13<20:00:57, 3.82it/s] 26%|██▌ | 96434/371472 [7:40:14<20:23:54, 3.75it/s] 26%|██▌ | 96435/371472 [7:40:14<21:35:02, 3.54it/s] 26%|██▌ | 96436/371472 [7:40:14<21:29:49, 3.55it/s] 26%|██▌ | 96437/371472 [7:40:14<20:48:37, 3.67it/s] 26%|██▌ | 96438/371472 [7:40:15<23:01:42, 3.32it/s] 26%|██▌ | 96439/371472 [7:40:15<22:38:43, 3.37it/s] 26%|██▌ | 96440/371472 [7:40:15<21:52:28, 3.49it/s] {'loss': 3.6262, 'learning_rate': 7.667047339955611e-07, 'epoch': 4.15} + 26%|██▌ | 96440/371472 [7:40:15<21:52:28, 3.49it/s] 26%|██▌ | 96441/371472 [7:40:16<22:03:45, 3.46it/s] 26%|██▌ | 96442/371472 [7:40:16<21:28:40, 3.56it/s] 26%|██▌ | 96443/371472 [7:40:16<22:55:18, 3.33it/s] 26%|██▌ | 96444/371472 [7:40:16<21:40:18, 3.53it/s] 26%|██▌ | 96445/371472 [7:40:17<20:54:39, 3.65it/s] 26%|██▌ | 96446/371472 [7:40:17<22:06:37, 3.46it/s] 26%|██▌ | 96447/371472 [7:40:17<21:51:08, 3.50it/s] 26%|██▌ | 96448/371472 [7:40:18<22:50:40, 3.34it/s] 26%|██▌ | 96449/371472 [7:40:18<22:25:31, 3.41it/s] 26%|██▌ | 96450/371472 [7:40:18<21:37:53, 3.53it/s] 26%|██▌ | 96451/371472 [7:40:18<21:23:36, 3.57it/s] 26%|██▌ | 96452/371472 [7:40:19<21:16:57, 3.59it/s] 26%|██▌ | 96453/371472 [7:40:19<21:47:29, 3.51it/s] 26%|██▌ | 96454/371472 [7:40:19<21:25:44, 3.56it/s] 26%|██▌ | 96455/371472 [7:40:20<21:50:18, 3.50it/s] 26%|██▌ | 96456/371472 [7:40:20<20:48:44, 3.67it/s] 26%|██▌ | 96457/371472 [7:40:20<20:18:36, 3.76it/s] 26%|██▌ | 96458/371472 [7:40:20<20:35:47, 3.71it/s] 26%|██▌ | 96459/371472 [7:40:21<19:49:46, 3.85it/s] 26%|██▌ | 96460/371472 [7:40:21<20:28:01, 3.73it/s] {'loss': 3.571, 'learning_rate': 7.666562520200822e-07, 'epoch': 4.15} + 26%|██▌ | 96460/371472 [7:40:21<20:28:01, 3.73it/s] 26%|██▌ | 96461/371472 [7:40:21<20:15:47, 3.77it/s] 26%|██▌ | 96462/371472 [7:40:21<19:58:50, 3.82it/s] 26%|██▌ | 96463/371472 [7:40:22<22:05:35, 3.46it/s] 26%|██▌ | 96464/371472 [7:40:22<21:59:29, 3.47it/s] 26%|██▌ | 96465/371472 [7:40:22<21:36:18, 3.54it/s] 26%|██▌ | 96466/371472 [7:40:23<21:21:07, 3.58it/s] 26%|██▌ | 96467/371472 [7:40:23<22:51:50, 3.34it/s] 26%|██▌ | 96468/371472 [7:40:23<22:20:01, 3.42it/s] 26%|██▌ | 96469/371472 [7:40:24<23:17:21, 3.28it/s] 26%|██▌ | 96470/371472 [7:40:24<22:32:11, 3.39it/s] 26%|██▌ | 96471/371472 [7:40:24<22:38:02, 3.37it/s] 26%|██▌ | 96472/371472 [7:40:24<24:07:51, 3.17it/s] 26%|██▌ | 96473/371472 [7:40:25<22:23:14, 3.41it/s] 26%|██▌ | 96474/371472 [7:40:25<21:09:47, 3.61it/s] 26%|██▌ | 96475/371472 [7:40:25<24:06:33, 3.17it/s] 26%|██▌ | 96476/371472 [7:40:26<23:14:46, 3.29it/s] 26%|██▌ | 96477/371472 [7:40:26<21:55:19, 3.48it/s] 26%|██▌ | 96478/371472 [7:40:26<22:02:54, 3.46it/s] 26%|██▌ | 96479/371472 [7:40:26<22:00:00, 3.47it/s] 26%|██▌ | 96480/371472 [7:40:27<21:39:58, 3.53it/s] {'loss': 3.8031, 'learning_rate': 7.666077700446034e-07, 'epoch': 4.16} + 26%|██▌ | 96480/371472 [7:40:27<21:39:58, 3.53it/s] 26%|██▌ | 96481/371472 [7:40:27<22:46:55, 3.35it/s] 26%|██▌ | 96482/371472 [7:40:27<22:21:33, 3.42it/s] 26%|██▌ | 96483/371472 [7:40:28<21:30:09, 3.55it/s] 26%|██▌ | 96484/371472 [7:40:28<21:21:19, 3.58it/s] 26%|██▌ | 96485/371472 [7:40:28<21:51:14, 3.50it/s] 26%|██▌ | 96486/371472 [7:40:28<21:54:32, 3.49it/s] 26%|██▌ | 96487/371472 [7:40:29<21:06:45, 3.62it/s] 26%|██▌ | 96488/371472 [7:40:29<20:39:58, 3.70it/s] 26%|██▌ | 96489/371472 [7:40:29<20:53:58, 3.65it/s] 26%|██▌ | 96490/371472 [7:40:30<20:50:18, 3.67it/s] 26%|██▌ | 96491/371472 [7:40:30<20:06:42, 3.80it/s] 26%|██▌ | 96492/371472 [7:40:30<20:11:56, 3.78it/s] 26%|██▌ | 96493/371472 [7:40:30<19:42:50, 3.87it/s] 26%|██▌ | 96494/371472 [7:40:31<20:06:07, 3.80it/s] 26%|██▌ | 96495/371472 [7:40:31<20:59:06, 3.64it/s] 26%|██▌ | 96496/371472 [7:40:31<22:02:16, 3.47it/s] 26%|██▌ | 96497/371472 [7:40:31<21:02:41, 3.63it/s] 26%|██▌ | 96498/371472 [7:40:32<20:04:52, 3.80it/s] 26%|██▌ | 96499/371472 [7:40:32<19:53:44, 3.84it/s] 26%|██▌ | 96500/371472 [7:40:32<20:11:33, 3.78it/s] {'loss': 3.553, 'learning_rate': 7.665592880691245e-07, 'epoch': 4.16} + 26%|██▌ | 96500/371472 [7:40:32<20:11:33, 3.78it/s] 26%|██▌ | 96501/371472 [7:40:32<20:08:27, 3.79it/s] 26%|██▌ | 96502/371472 [7:40:33<19:52:45, 3.84it/s] 26%|██▌ | 96503/371472 [7:40:33<21:31:33, 3.55it/s] 26%|██▌ | 96504/371472 [7:40:33<21:17:55, 3.59it/s] 26%|██▌ | 96505/371472 [7:40:34<21:13:09, 3.60it/s] 26%|██▌ | 96506/371472 [7:40:34<20:52:31, 3.66it/s] 26%|██▌ | 96507/371472 [7:40:34<20:43:55, 3.68it/s] 26%|██▌ | 96508/371472 [7:40:34<21:21:31, 3.58it/s] 26%|██▌ | 96509/371472 [7:40:35<23:51:07, 3.20it/s] 26%|██▌ | 96510/371472 [7:40:35<23:06:50, 3.30it/s] 26%|██▌ | 96511/371472 [7:40:35<22:55:49, 3.33it/s] 26%|██▌ | 96512/371472 [7:40:36<23:30:38, 3.25it/s] 26%|██▌ | 96513/371472 [7:40:36<24:20:07, 3.14it/s] 26%|██▌ | 96514/371472 [7:40:36<23:09:47, 3.30it/s] 26%|██▌ | 96515/371472 [7:40:37<22:25:14, 3.41it/s] 26%|██▌ | 96516/371472 [7:40:37<24:50:39, 3.07it/s] 26%|██▌ | 96517/371472 [7:40:37<22:59:20, 3.32it/s] 26%|██▌ | 96518/371472 [7:40:37<21:57:47, 3.48it/s] 26%|██▌ | 96519/371472 [7:40:38<21:18:02, 3.59it/s] 26%|██▌ | 96520/371472 [7:40:38<20:41:27, 3.69it/s] {'loss': 3.6871, 'learning_rate': 7.665108060936455e-07, 'epoch': 4.16} + 26%|██▌ | 96520/371472 [7:40:38<20:41:27, 3.69it/s] 26%|██▌ | 96521/371472 [7:40:38<21:30:39, 3.55it/s] 26%|██▌ | 96522/371472 [7:40:39<20:39:38, 3.70it/s] 26%|██▌ | 96523/371472 [7:40:39<21:15:42, 3.59it/s] 26%|██▌ | 96524/371472 [7:40:39<21:07:44, 3.61it/s] 26%|██▌ | 96525/371472 [7:40:39<21:38:29, 3.53it/s] 26%|██▌ | 96526/371472 [7:40:40<20:49:36, 3.67it/s] 26%|██▌ | 96527/371472 [7:40:40<20:46:59, 3.67it/s] 26%|██▌ | 96528/371472 [7:40:40<20:51:30, 3.66it/s] 26%|██▌ | 96529/371472 [7:40:40<20:46:02, 3.68it/s] 26%|██▌ | 96530/371472 [7:40:41<20:01:39, 3.81it/s] 26%|██▌ | 96531/371472 [7:40:41<19:54:56, 3.83it/s] 26%|██▌ | 96532/371472 [7:40:41<20:46:14, 3.68it/s] 26%|██▌ | 96533/371472 [7:40:41<20:08:14, 3.79it/s] 26%|██▌ | 96534/371472 [7:40:42<21:15:13, 3.59it/s] 26%|██▌ | 96535/371472 [7:40:42<21:17:44, 3.59it/s] 26%|██▌ | 96536/371472 [7:40:42<22:17:20, 3.43it/s] 26%|██▌ | 96537/371472 [7:40:43<22:24:17, 3.41it/s] 26%|██▌ | 96538/371472 [7:40:43<21:55:41, 3.48it/s] 26%|██▌ | 96539/371472 [7:40:43<21:36:07, 3.54it/s] 26%|██▌ | 96540/371472 [7:40:43<20:43:15, 3.69it/s] {'loss': 3.568, 'learning_rate': 7.664623241181667e-07, 'epoch': 4.16} + 26%|██▌ | 96540/371472 [7:40:43<20:43:15, 3.69it/s] 26%|██▌ | 96541/371472 [7:40:44<20:27:38, 3.73it/s] 26%|██▌ | 96542/371472 [7:40:44<20:27:09, 3.73it/s] 26%|██▌ | 96543/371472 [7:40:44<20:12:28, 3.78it/s] 26%|██▌ | 96544/371472 [7:40:45<19:55:01, 3.83it/s] 26%|██▌ | 96545/371472 [7:40:45<20:14:27, 3.77it/s] 26%|██▌ | 96546/371472 [7:40:45<20:08:48, 3.79it/s] 26%|██▌ | 96547/371472 [7:40:45<21:35:12, 3.54it/s] 26%|██▌ | 96548/371472 [7:40:46<21:58:22, 3.48it/s] 26%|██▌ | 96549/371472 [7:40:46<20:57:18, 3.64it/s] 26%|██▌ | 96550/371472 [7:40:46<20:06:20, 3.80it/s] 26%|██▌ | 96551/371472 [7:40:47<22:15:56, 3.43it/s] 26%|██▌ | 96552/371472 [7:40:47<21:54:56, 3.48it/s] 26%|██▌ | 96553/371472 [7:40:47<21:56:15, 3.48it/s] 26%|██▌ | 96554/371472 [7:40:47<23:07:15, 3.30it/s] 26%|██▌ | 96555/371472 [7:40:48<22:16:39, 3.43it/s] 26%|██▌ | 96556/371472 [7:40:48<21:43:02, 3.52it/s] 26%|██▌ | 96557/371472 [7:40:48<21:33:33, 3.54it/s] 26%|██▌ | 96558/371472 [7:40:48<20:36:07, 3.71it/s] 26%|██▌ | 96559/371472 [7:40:49<20:41:24, 3.69it/s] 26%|██▌ | 96560/371472 [7:40:49<21:47:08, 3.51it/s] {'loss': 3.6274, 'learning_rate': 7.664138421426879e-07, 'epoch': 4.16} + 26%|██▌ | 96560/371472 [7:40:49<21:47:08, 3.51it/s] 26%|██▌ | 96561/371472 [7:40:49<22:18:26, 3.42it/s] 26%|██▌ | 96562/371472 [7:40:50<21:16:22, 3.59it/s] 26%|██▌ | 96563/371472 [7:40:50<22:08:41, 3.45it/s] 26%|██▌ | 96564/371472 [7:40:50<21:14:07, 3.60it/s] 26%|██▌ | 96565/371472 [7:40:51<22:13:42, 3.44it/s] 26%|██▌ | 96566/371472 [7:40:51<21:06:08, 3.62it/s] 26%|██▌ | 96567/371472 [7:40:51<20:48:09, 3.67it/s] 26%|██▌ | 96568/371472 [7:40:51<20:48:31, 3.67it/s] 26%|██▌ | 96569/371472 [7:40:52<20:14:13, 3.77it/s] 26%|██▌ | 96570/371472 [7:40:52<20:26:47, 3.73it/s] 26%|██▌ | 96571/371472 [7:40:52<20:30:04, 3.72it/s] 26%|██▌ | 96572/371472 [7:40:52<22:37:10, 3.38it/s] 26%|██▌ | 96573/371472 [7:40:53<21:48:49, 3.50it/s] 26%|██▌ | 96574/371472 [7:40:53<22:01:03, 3.47it/s] 26%|██▌ | 96575/371472 [7:40:53<21:37:05, 3.53it/s] 26%|██▌ | 96576/371472 [7:40:54<23:21:58, 3.27it/s] 26%|██▌ | 96577/371472 [7:40:54<22:40:32, 3.37it/s] 26%|██▌ | 96578/371472 [7:40:54<22:53:54, 3.33it/s] 26%|██▌ | 96579/371472 [7:40:54<21:39:29, 3.53it/s] 26%|██▌ | 96580/371472 [7:40:55<20:56:58, 3.64it/s] {'loss': 3.7104, 'learning_rate': 7.663653601672089e-07, 'epoch': 4.16} + 26%|██▌ | 96580/371472 [7:40:55<20:56:58, 3.64it/s] 26%|██▌ | 96581/371472 [7:40:55<21:15:15, 3.59it/s] 26%|██▌ | 96582/371472 [7:40:55<20:21:47, 3.75it/s] 26%|██▌ | 96583/371472 [7:40:56<20:56:02, 3.65it/s] 26%|██▌ | 96584/371472 [7:40:56<20:27:42, 3.73it/s] 26%|██▌ | 96585/371472 [7:40:56<21:57:22, 3.48it/s] 26%|██▌ | 96586/371472 [7:40:56<21:21:56, 3.57it/s] 26%|██▌ | 96587/371472 [7:40:57<21:30:46, 3.55it/s] 26%|██▌ | 96588/371472 [7:40:57<21:40:46, 3.52it/s] 26%|██▌ | 96589/371472 [7:40:57<23:32:32, 3.24it/s] 26%|██▌ | 96590/371472 [7:40:58<22:27:42, 3.40it/s] 26%|██▌ | 96591/371472 [7:40:58<22:12:28, 3.44it/s] 26%|██▌ | 96592/371472 [7:40:58<22:44:32, 3.36it/s] 26%|██▌ | 96593/371472 [7:40:58<22:13:31, 3.44it/s] 26%|██▌ | 96594/371472 [7:40:59<22:53:08, 3.34it/s] 26%|██▌ | 96595/371472 [7:40:59<22:36:34, 3.38it/s] 26%|██▌ | 96596/371472 [7:40:59<23:02:15, 3.31it/s] 26%|██▌ | 96597/371472 [7:41:00<22:05:58, 3.46it/s] 26%|██▌ | 96598/371472 [7:41:00<21:20:29, 3.58it/s] 26%|██▌ | 96599/371472 [7:41:00<20:33:52, 3.71it/s] 26%|██▌ | 96600/371472 [7:41:00<20:31:43, 3.72it/s] {'loss': 3.7302, 'learning_rate': 7.663168781917299e-07, 'epoch': 4.16} + 26%|██▌ | 96600/371472 [7:41:00<20:31:43, 3.72it/s] 26%|██▌ | 96601/371472 [7:41:01<19:54:08, 3.84it/s] 26%|██▌ | 96602/371472 [7:41:01<21:24:53, 3.57it/s] 26%|██▌ | 96603/371472 [7:41:01<21:07:34, 3.61it/s] 26%|██▌ | 96604/371472 [7:41:02<20:35:11, 3.71it/s] 26%|██▌ | 96605/371472 [7:41:02<20:14:55, 3.77it/s] 26%|██▌ | 96606/371472 [7:41:02<21:04:34, 3.62it/s] 26%|██▌ | 96607/371472 [7:41:02<20:24:21, 3.74it/s] 26%|██▌ | 96608/371472 [7:41:03<22:55:03, 3.33it/s] 26%|██▌ | 96609/371472 [7:41:03<22:12:58, 3.44it/s] 26%|██▌ | 96610/371472 [7:41:03<21:18:28, 3.58it/s] 26%|██▌ | 96611/371472 [7:41:03<21:02:04, 3.63it/s] 26%|██▌ | 96612/371472 [7:41:04<21:01:30, 3.63it/s] 26%|██▌ | 96613/371472 [7:41:04<21:15:35, 3.59it/s] 26%|██▌ | 96614/371472 [7:41:04<20:32:19, 3.72it/s] 26%|██▌ | 96615/371472 [7:41:05<20:36:00, 3.71it/s] 26%|██▌ | 96616/371472 [7:41:05<20:59:58, 3.64it/s] 26%|██▌ | 96617/371472 [7:41:05<21:20:20, 3.58it/s] 26%|██▌ | 96618/371472 [7:41:06<24:15:53, 3.15it/s] 26%|██▌ | 96619/371472 [7:41:06<24:34:59, 3.11it/s] 26%|██▌ | 96620/371472 [7:41:06<23:22:54, 3.27it/s] {'loss': 3.6491, 'learning_rate': 7.662683962162511e-07, 'epoch': 4.16} + 26%|██▌ | 96620/371472 [7:41:06<23:22:54, 3.27it/s] 26%|██▌ | 96621/371472 [7:41:06<22:17:49, 3.42it/s] 26%|██▌ | 96622/371472 [7:41:07<22:04:32, 3.46it/s] 26%|██▌ | 96623/371472 [7:41:07<21:51:53, 3.49it/s] 26%|██▌ | 96624/371472 [7:41:07<23:16:30, 3.28it/s] 26%|██▌ | 96625/371472 [7:41:08<22:52:39, 3.34it/s] 26%|██▌ | 96626/371472 [7:41:08<21:22:12, 3.57it/s] 26%|██▌ | 96627/371472 [7:41:08<21:08:01, 3.61it/s] 26%|██▌ | 96628/371472 [7:41:08<23:11:57, 3.29it/s] 26%|██▌ | 96629/371472 [7:41:09<21:53:07, 3.49it/s] 26%|██▌ | 96630/371472 [7:41:09<23:25:39, 3.26it/s] 26%|██▌ | 96631/371472 [7:41:09<22:46:33, 3.35it/s] 26%|██▌ | 96632/371472 [7:41:10<22:26:53, 3.40it/s] 26%|██▌ | 96633/371472 [7:41:10<21:19:58, 3.58it/s] 26%|██▌ | 96634/371472 [7:41:10<21:47:48, 3.50it/s] 26%|██▌ | 96635/371472 [7:41:10<21:47:35, 3.50it/s] 26%|██▌ | 96636/371472 [7:41:11<21:01:35, 3.63it/s] 26%|██▌ | 96637/371472 [7:41:11<22:54:39, 3.33it/s] 26%|██▌ | 96638/371472 [7:41:11<21:57:35, 3.48it/s] 26%|██▌ | 96639/371472 [7:41:12<21:10:46, 3.60it/s] 26%|██▌ | 96640/371472 [7:41:12<21:22:58, 3.57it/s] {'loss': 3.753, 'learning_rate': 7.662199142407723e-07, 'epoch': 4.16} + 26%|██▌ | 96640/371472 [7:41:12<21:22:58, 3.57it/s] 26%|██▌ | 96641/371472 [7:41:12<22:17:16, 3.43it/s] 26%|██▌ | 96642/371472 [7:41:12<21:52:33, 3.49it/s] 26%|██▌ | 96643/371472 [7:41:13<21:29:58, 3.55it/s] 26%|██▌ | 96644/371472 [7:41:13<21:06:05, 3.62it/s] 26%|██▌ | 96645/371472 [7:41:13<21:04:01, 3.62it/s] 26%|██▌ | 96646/371472 [7:41:14<21:05:42, 3.62it/s] 26%|██▌ | 96647/371472 [7:41:14<20:47:59, 3.67it/s] 26%|██▌ | 96648/371472 [7:41:14<20:45:29, 3.68it/s] 26%|██▌ | 96649/371472 [7:41:14<20:24:49, 3.74it/s] 26%|██▌ | 96650/371472 [7:41:15<21:56:33, 3.48it/s] 26%|██▌ | 96651/371472 [7:41:15<22:45:02, 3.36it/s] 26%|██▌ | 96652/371472 [7:41:15<21:18:54, 3.58it/s] 26%|██▌ | 96653/371472 [7:41:15<20:50:55, 3.66it/s] 26%|██▌ | 96654/371472 [7:41:16<20:20:16, 3.75it/s] 26%|██▌ | 96655/371472 [7:41:16<19:49:42, 3.85it/s] 26%|██▌ | 96656/371472 [7:41:16<21:09:12, 3.61it/s] 26%|██▌ | 96657/371472 [7:41:17<20:37:40, 3.70it/s] 26%|██▌ | 96658/371472 [7:41:17<21:53:25, 3.49it/s] 26%|██▌ | 96659/371472 [7:41:17<21:41:00, 3.52it/s] 26%|██▌ | 96660/371472 [7:41:17<21:22:07, 3.57it/s] {'loss': 3.6668, 'learning_rate': 7.661714322652934e-07, 'epoch': 4.16} + 26%|██▌ | 96660/371472 [7:41:17<21:22:07, 3.57it/s] 26%|██▌ | 96661/371472 [7:41:18<21:09:43, 3.61it/s] 26%|██▌ | 96662/371472 [7:41:18<20:23:11, 3.74it/s] 26%|██▌ | 96663/371472 [7:41:18<20:12:29, 3.78it/s] 26%|██▌ | 96664/371472 [7:41:19<21:13:52, 3.60it/s] 26%|██▌ | 96665/371472 [7:41:19<22:36:03, 3.38it/s] 26%|██▌ | 96666/371472 [7:41:19<21:28:17, 3.56it/s] 26%|██▌ | 96667/371472 [7:41:19<21:11:03, 3.60it/s] 26%|██▌ | 96668/371472 [7:41:20<22:23:05, 3.41it/s] 26%|██▌ | 96669/371472 [7:41:20<21:39:29, 3.52it/s] 26%|██▌ | 96670/371472 [7:41:20<21:58:27, 3.47it/s] 26%|██▌ | 96671/371472 [7:41:21<23:29:36, 3.25it/s] 26%|██▌ | 96672/371472 [7:41:21<23:28:28, 3.25it/s] 26%|██▌ | 96673/371472 [7:41:21<22:35:31, 3.38it/s] 26%|██▌ | 96674/371472 [7:41:21<22:07:50, 3.45it/s] 26%|██▌ | 96675/371472 [7:41:22<21:24:44, 3.56it/s] 26%|██▌ | 96676/371472 [7:41:22<21:27:15, 3.56it/s] 26%|██▌ | 96677/371472 [7:41:22<20:53:53, 3.65it/s] 26%|██▌ | 96678/371472 [7:41:23<20:38:09, 3.70it/s] 26%|██▌ | 96679/371472 [7:41:23<21:20:34, 3.58it/s] 26%|██▌ | 96680/371472 [7:41:23<20:50:32, 3.66it/s] {'loss': 3.6697, 'learning_rate': 7.661229502898144e-07, 'epoch': 4.16} + 26%|██▌ | 96680/371472 [7:41:23<20:50:32, 3.66it/s] 26%|██▌ | 96681/371472 [7:41:23<20:37:27, 3.70it/s] 26%|██▌ | 96682/371472 [7:41:24<20:12:12, 3.78it/s] 26%|██▌ | 96683/371472 [7:41:24<20:13:35, 3.77it/s] 26%|██▌ | 96684/371472 [7:41:24<20:51:39, 3.66it/s] 26%|██▌ | 96685/371472 [7:41:24<21:26:55, 3.56it/s] 26%|██▌ | 96686/371472 [7:41:25<20:29:52, 3.72it/s] 26%|██▌ | 96687/371472 [7:41:25<19:51:37, 3.84it/s] 26%|██▌ | 96688/371472 [7:41:25<19:59:33, 3.82it/s] 26%|██▌ | 96689/371472 [7:41:25<19:47:18, 3.86it/s] 26%|██▌ | 96690/371472 [7:41:26<19:46:28, 3.86it/s] 26%|██▌ | 96691/371472 [7:41:26<20:10:16, 3.78it/s] 26%|██▌ | 96692/371472 [7:41:26<20:58:52, 3.64it/s] 26%|██▌ | 96693/371472 [7:41:27<21:39:18, 3.52it/s] 26%|██▌ | 96694/371472 [7:41:27<22:40:38, 3.37it/s] 26%|██▌ | 96695/371472 [7:41:27<21:41:10, 3.52it/s] 26%|██▌ | 96696/371472 [7:41:28<22:50:21, 3.34it/s] 26%|██▌ | 96697/371472 [7:41:28<23:12:18, 3.29it/s] 26%|██▌ | 96698/371472 [7:41:28<21:57:42, 3.48it/s] 26%|██▌ | 96699/371472 [7:41:28<22:19:49, 3.42it/s] 26%|██▌ | 96700/371472 [7:41:29<21:09:03, 3.61it/s] {'loss': 3.6219, 'learning_rate': 7.660744683143356e-07, 'epoch': 4.17} + 26%|██▌ | 96700/371472 [7:41:29<21:09:03, 3.61it/s] 26%|██▌ | 96701/371472 [7:41:29<20:59:11, 3.64it/s] 26%|██▌ | 96702/371472 [7:41:29<21:11:59, 3.60it/s] 26%|██▌ | 96703/371472 [7:41:29<20:21:01, 3.75it/s] 26%|██▌ | 96704/371472 [7:41:30<20:47:13, 3.67it/s] 26%|██▌ | 96705/371472 [7:41:30<21:22:21, 3.57it/s] 26%|██▌ | 96706/371472 [7:41:30<21:41:49, 3.52it/s] 26%|██▌ | 96707/371472 [7:41:31<22:03:25, 3.46it/s] 26%|██▌ | 96708/371472 [7:41:31<21:27:34, 3.56it/s] 26%|██▌ | 96709/371472 [7:41:31<20:44:31, 3.68it/s] 26%|██▌ | 96710/371472 [7:41:31<20:04:37, 3.80it/s] 26%|██▌ | 96711/371472 [7:41:32<20:35:59, 3.71it/s] 26%|██▌ | 96712/371472 [7:41:32<21:19:49, 3.58it/s] 26%|██▌ | 96713/371472 [7:41:32<21:04:03, 3.62it/s] 26%|██▌ | 96714/371472 [7:41:32<20:00:23, 3.81it/s] 26%|██▌ | 96715/371472 [7:41:33<19:48:43, 3.85it/s] 26%|██▌ | 96716/371472 [7:41:33<21:10:56, 3.60it/s] 26%|██▌ | 96717/371472 [7:41:33<20:42:16, 3.69it/s] 26%|██▌ | 96718/371472 [7:41:34<21:56:47, 3.48it/s] 26%|██▌ | 96719/371472 [7:41:34<22:42:30, 3.36it/s] 26%|██▌ | 96720/371472 [7:41:34<21:43:27, 3.51it/s] {'loss': 3.6411, 'learning_rate': 7.660259863388566e-07, 'epoch': 4.17} + 26%|██▌ | 96720/371472 [7:41:34<21:43:27, 3.51it/s] 26%|██▌ | 96721/371472 [7:41:34<21:39:32, 3.52it/s] 26%|██▌ | 96722/371472 [7:41:35<22:18:09, 3.42it/s] 26%|██▌ | 96723/371472 [7:41:35<21:14:47, 3.59it/s] 26%|██▌ | 96724/371472 [7:41:35<21:29:23, 3.55it/s] 26%|██▌ | 96725/371472 [7:41:36<21:23:19, 3.57it/s] 26%|██▌ | 96726/371472 [7:41:36<21:46:53, 3.50it/s] 26%|██▌ | 96727/371472 [7:41:36<21:45:01, 3.51it/s] 26%|██▌ | 96728/371472 [7:41:36<22:05:02, 3.46it/s] 26%|██▌ | 96729/371472 [7:41:37<22:01:15, 3.47it/s] 26%|██▌ | 96730/371472 [7:41:37<22:34:28, 3.38it/s] 26%|██▌ | 96731/371472 [7:41:37<21:46:17, 3.51it/s] 26%|██▌ | 96732/371472 [7:41:38<21:33:46, 3.54it/s] 26%|██▌ | 96733/371472 [7:41:38<21:37:51, 3.53it/s] 26%|██▌ | 96734/371472 [7:41:38<21:09:44, 3.61it/s] 26%|██▌ | 96735/371472 [7:41:38<20:51:42, 3.66it/s] 26%|██▌ | 96736/371472 [7:41:39<21:24:05, 3.57it/s] 26%|██▌ | 96737/371472 [7:41:39<21:29:10, 3.55it/s] 26%|██▌ | 96738/371472 [7:41:39<20:23:41, 3.74it/s] 26%|██▌ | 96739/371472 [7:41:39<19:40:04, 3.88it/s] 26%|██▌ | 96740/371472 [7:41:40<20:37:18, 3.70it/s] {'loss': 3.4607, 'learning_rate': 7.659775043633777e-07, 'epoch': 4.17} + 26%|██▌ | 96740/371472 [7:41:40<20:37:18, 3.70it/s] 26%|██▌ | 96741/371472 [7:41:40<19:56:43, 3.83it/s] 26%|██▌ | 96742/371472 [7:41:40<20:30:22, 3.72it/s] 26%|██▌ | 96743/371472 [7:41:41<20:09:36, 3.79it/s] 26%|██▌ | 96744/371472 [7:41:41<19:52:00, 3.84it/s] 26%|██▌ | 96745/371472 [7:41:41<19:25:40, 3.93it/s] 26%|██▌ | 96746/371472 [7:41:41<19:02:56, 4.01it/s] 26%|██▌ | 96747/371472 [7:41:42<19:40:24, 3.88it/s] 26%|██▌ | 96748/371472 [7:41:42<21:13:03, 3.60it/s] 26%|██▌ | 96749/371472 [7:41:42<20:48:42, 3.67it/s] 26%|██▌ | 96750/371472 [7:41:42<20:05:47, 3.80it/s] 26%|██▌ | 96751/371472 [7:41:43<22:14:06, 3.43it/s] 26%|██▌ | 96752/371472 [7:41:43<21:39:22, 3.52it/s] 26%|██▌ | 96753/371472 [7:41:43<20:59:13, 3.64it/s] 26%|██▌ | 96754/371472 [7:41:43<20:07:30, 3.79it/s] 26%|██▌ | 96755/371472 [7:41:44<19:26:59, 3.92it/s] 26%|██▌ | 96756/371472 [7:41:44<21:03:47, 3.62it/s] 26%|██▌ | 96757/371472 [7:41:44<22:22:30, 3.41it/s] 26%|██▌ | 96758/371472 [7:41:45<22:18:54, 3.42it/s] 26%|██▌ | 96759/371472 [7:41:45<22:33:33, 3.38it/s] 26%|██▌ | 96760/371472 [7:41:45<21:44:53, 3.51it/s] {'loss': 3.7117, 'learning_rate': 7.659290223878988e-07, 'epoch': 4.17} + 26%|██▌ | 96760/371472 [7:41:45<21:44:53, 3.51it/s] 26%|██▌ | 96761/371472 [7:41:45<20:48:05, 3.67it/s] 26%|██▌ | 96762/371472 [7:41:46<20:25:02, 3.74it/s] 26%|██▌ | 96763/371472 [7:41:46<21:03:26, 3.62it/s] 26%|██▌ | 96764/371472 [7:41:46<20:25:26, 3.74it/s] 26%|██▌ | 96765/371472 [7:41:47<20:20:42, 3.75it/s] 26%|██▌ | 96766/371472 [7:41:47<20:37:32, 3.70it/s] 26%|██▌ | 96767/371472 [7:41:47<21:19:49, 3.58it/s] 26%|██▌ | 96768/371472 [7:41:47<22:51:57, 3.34it/s] 26%|██▌ | 96769/371472 [7:41:48<23:18:00, 3.27it/s] 26%|██▌ | 96770/371472 [7:41:48<22:41:09, 3.36it/s] 26%|██▌ | 96771/371472 [7:41:48<22:51:37, 3.34it/s] 26%|██▌ | 96772/371472 [7:41:49<23:40:47, 3.22it/s] 26%|██▌ | 96773/371472 [7:41:49<22:42:22, 3.36it/s] 26%|██▌ | 96774/371472 [7:41:49<21:41:27, 3.52it/s] 26%|██▌ | 96775/371472 [7:41:50<22:57:25, 3.32it/s] 26%|██▌ | 96776/371472 [7:41:50<21:44:09, 3.51it/s] 26%|██▌ | 96777/371472 [7:41:50<22:09:18, 3.44it/s] 26%|██▌ | 96778/371472 [7:41:50<21:55:14, 3.48it/s] 26%|██▌ | 96779/371472 [7:41:51<21:17:21, 3.58it/s] 26%|██▌ | 96780/371472 [7:41:51<21:26:15, 3.56it/s] {'loss': 3.6294, 'learning_rate': 7.6588054041242e-07, 'epoch': 4.17} + 26%|██▌ | 96780/371472 [7:41:51<21:26:15, 3.56it/s] 26%|██▌ | 96781/371472 [7:41:51<21:19:39, 3.58it/s] 26%|██▌ | 96782/371472 [7:41:52<21:56:12, 3.48it/s] 26%|██▌ | 96783/371472 [7:41:52<26:51:29, 2.84it/s] 26%|██▌ | 96784/371472 [7:41:52<24:54:26, 3.06it/s] 26%|██▌ | 96785/371472 [7:41:53<25:08:45, 3.03it/s] 26%|██▌ | 96786/371472 [7:41:53<23:57:00, 3.19it/s] 26%|██▌ | 96787/371472 [7:41:53<22:32:20, 3.39it/s] 26%|██▌ | 96788/371472 [7:41:53<21:28:56, 3.55it/s] 26%|██▌ | 96789/371472 [7:41:54<21:32:18, 3.54it/s] 26%|██▌ | 96790/371472 [7:41:54<21:12:28, 3.60it/s] 26%|██▌ | 96791/371472 [7:41:54<21:25:42, 3.56it/s] 26%|██▌ | 96792/371472 [7:41:54<20:22:53, 3.74it/s] 26%|██▌ | 96793/371472 [7:41:55<20:44:24, 3.68it/s] 26%|██▌ | 96794/371472 [7:41:55<20:33:51, 3.71it/s] 26%|██▌ | 96795/371472 [7:41:55<20:47:53, 3.67it/s] 26%|██▌ | 96796/371472 [7:41:56<21:30:06, 3.55it/s] 26%|██▌ | 96797/371472 [7:41:56<22:47:51, 3.35it/s] 26%|██▌ | 96798/371472 [7:41:56<24:29:08, 3.12it/s] 26%|██▌ | 96799/371472 [7:41:57<23:40:37, 3.22it/s] 26%|██▌ | 96800/371472 [7:41:57<23:29:05, 3.25it/s] {'loss': 3.4917, 'learning_rate': 7.658320584369412e-07, 'epoch': 4.17} + 26%|██▌ | 96800/371472 [7:41:57<23:29:05, 3.25it/s] 26%|██▌ | 96801/371472 [7:41:57<22:45:02, 3.35it/s] 26%|██▌ | 96802/371472 [7:41:57<21:29:37, 3.55it/s] 26%|██▌ | 96803/371472 [7:41:58<22:15:18, 3.43it/s] 26%|██▌ | 96804/371472 [7:41:58<21:28:25, 3.55it/s] 26%|██▌ | 96805/371472 [7:41:58<25:01:45, 3.05it/s] 26%|██▌ | 96806/371472 [7:41:59<25:34:42, 2.98it/s] 26%|██▌ | 96807/371472 [7:41:59<24:06:46, 3.16it/s] 26%|██▌ | 96808/371472 [7:41:59<23:29:00, 3.25it/s] 26%|██▌ | 96809/371472 [7:42:00<23:15:00, 3.28it/s] 26%|██▌ | 96810/371472 [7:42:00<21:37:42, 3.53it/s] 26%|██▌ | 96811/371472 [7:42:00<21:00:30, 3.63it/s] 26%|██▌ | 96812/371472 [7:42:00<21:02:40, 3.63it/s] 26%|██▌ | 96813/371472 [7:42:01<20:52:18, 3.66it/s] 26%|██▌ | 96814/371472 [7:42:01<20:27:16, 3.73it/s] 26%|██▌ | 96815/371472 [7:42:01<20:59:49, 3.63it/s] 26%|██▌ | 96816/371472 [7:42:02<21:05:46, 3.62it/s] 26%|██▌ | 96817/371472 [7:42:02<21:57:03, 3.48it/s] 26%|██▌ | 96818/371472 [7:42:02<21:32:13, 3.54it/s] 26%|██▌ | 96819/371472 [7:42:02<21:43:29, 3.51it/s] 26%|██▌ | 96820/371472 [7:42:03<20:49:07, 3.66it/s] {'loss': 3.6277, 'learning_rate': 7.657835764614622e-07, 'epoch': 4.17} + 26%|██▌ | 96820/371472 [7:42:03<20:49:07, 3.66it/s] 26%|██▌ | 96821/371472 [7:42:03<20:37:16, 3.70it/s] 26%|██▌ | 96822/371472 [7:42:03<21:02:14, 3.63it/s] 26%|██▌ | 96823/371472 [7:42:04<22:14:34, 3.43it/s] 26%|██▌ | 96824/371472 [7:42:04<22:55:48, 3.33it/s] 26%|██▌ | 96825/371472 [7:42:04<25:45:53, 2.96it/s] 26%|██▌ | 96826/371472 [7:42:05<24:30:01, 3.11it/s] 26%|██▌ | 96827/371472 [7:42:05<24:07:35, 3.16it/s] 26%|██▌ | 96828/371472 [7:42:05<25:26:44, 3.00it/s] 26%|██▌ | 96829/371472 [7:42:05<24:01:11, 3.18it/s] 26%|██▌ | 96830/371472 [7:42:06<23:13:46, 3.28it/s] 26%|██▌ | 96831/371472 [7:42:06<22:36:37, 3.37it/s] 26%|██▌ | 96832/371472 [7:42:06<21:57:48, 3.47it/s] 26%|██▌ | 96833/371472 [7:42:07<21:23:04, 3.57it/s] 26%|██▌ | 96834/371472 [7:42:07<22:13:34, 3.43it/s] 26%|██▌ | 96835/371472 [7:42:07<21:13:17, 3.59it/s] 26%|██▌ | 96836/371472 [7:42:07<21:52:37, 3.49it/s] 26%|██▌ | 96837/371472 [7:42:08<22:10:45, 3.44it/s] 26%|██▌ | 96838/371472 [7:42:08<21:36:09, 3.53it/s] 26%|██▌ | 96839/371472 [7:42:08<21:40:52, 3.52it/s] 26%|██▌ | 96840/371472 [7:42:09<24:07:56, 3.16it/s] {'loss': 3.3923, 'learning_rate': 7.657350944859832e-07, 'epoch': 4.17} + 26%|██▌ | 96840/371472 [7:42:09<24:07:56, 3.16it/s] 26%|██▌ | 96841/371472 [7:42:09<23:05:59, 3.30it/s] 26%|██▌ | 96842/371472 [7:42:09<21:37:26, 3.53it/s] 26%|██▌ | 96843/371472 [7:42:09<21:34:03, 3.54it/s] 26%|██▌ | 96844/371472 [7:42:10<20:47:25, 3.67it/s] 26%|██▌ | 96845/371472 [7:42:10<20:59:45, 3.63it/s] 26%|██▌ | 96846/371472 [7:42:10<20:42:15, 3.68it/s] 26%|██▌ | 96847/371472 [7:42:11<20:49:20, 3.66it/s] 26%|██▌ | 96848/371472 [7:42:11<21:29:24, 3.55it/s] 26%|██▌ | 96849/371472 [7:42:11<20:33:33, 3.71it/s] 26%|██▌ | 96850/371472 [7:42:11<20:59:55, 3.63it/s] 26%|██▌ | 96851/371472 [7:42:12<20:51:27, 3.66it/s] 26%|██▌ | 96852/371472 [7:42:12<21:05:39, 3.62it/s] 26%|██▌ | 96853/371472 [7:42:12<20:37:52, 3.70it/s] 26%|██▌ | 96854/371472 [7:42:13<22:49:15, 3.34it/s] 26%|██▌ | 96855/371472 [7:42:13<22:38:45, 3.37it/s] 26%|██▌ | 96856/371472 [7:42:13<23:15:53, 3.28it/s] 26%|██▌ | 96857/371472 [7:42:13<22:34:07, 3.38it/s] 26%|██▌ | 96858/371472 [7:42:14<22:19:04, 3.42it/s] 26%|██▌ | 96859/371472 [7:42:14<21:35:40, 3.53it/s] 26%|██▌ | 96860/371472 [7:42:14<21:42:49, 3.51it/s] {'loss': 3.5579, 'learning_rate': 7.656866125105044e-07, 'epoch': 4.17} + 26%|██▌ | 96860/371472 [7:42:14<21:42:49, 3.51it/s] 26%|██▌ | 96861/371472 [7:42:15<21:33:55, 3.54it/s] 26%|██▌ | 96862/371472 [7:42:15<21:19:11, 3.58it/s] 26%|██▌ | 96863/371472 [7:42:15<21:07:35, 3.61it/s] 26%|██▌ | 96864/371472 [7:42:15<20:36:47, 3.70it/s] 26%|██▌ | 96865/371472 [7:42:16<20:24:17, 3.74it/s] 26%|██▌ | 96866/371472 [7:42:16<20:09:56, 3.78it/s] 26%|██▌ | 96867/371472 [7:42:16<20:02:47, 3.81it/s] 26%|██▌ | 96868/371472 [7:42:16<20:03:15, 3.80it/s] 26%|██▌ | 96869/371472 [7:42:17<20:26:32, 3.73it/s] 26%|██▌ | 96870/371472 [7:42:17<21:26:16, 3.56it/s] 26%|██▌ | 96871/371472 [7:42:17<21:56:41, 3.48it/s] 26%|██▌ | 96872/371472 [7:42:18<21:46:21, 3.50it/s] 26%|██▌ | 96873/371472 [7:42:18<21:26:41, 3.56it/s] 26%|██▌ | 96874/371472 [7:42:18<21:08:17, 3.61it/s] 26%|██▌ | 96875/371472 [7:42:18<21:20:23, 3.57it/s] 26%|██▌ | 96876/371472 [7:42:19<21:28:12, 3.55it/s] 26%|██▌ | 96877/371472 [7:42:19<22:42:11, 3.36it/s] 26%|██▌ | 96878/371472 [7:42:19<22:21:38, 3.41it/s] 26%|██▌ | 96879/371472 [7:42:20<23:09:13, 3.29it/s] 26%|██▌ | 96880/371472 [7:42:20<21:31:18, 3.54it/s] {'loss': 3.5401, 'learning_rate': 7.656381305350256e-07, 'epoch': 4.17} + 26%|██▌ | 96880/371472 [7:42:20<21:31:18, 3.54it/s] 26%|██▌ | 96881/371472 [7:42:20<22:22:20, 3.41it/s] 26%|██▌ | 96882/371472 [7:42:20<22:00:12, 3.47it/s] 26%|██▌ | 96883/371472 [7:42:21<21:33:12, 3.54it/s] 26%|██▌ | 96884/371472 [7:42:21<21:28:52, 3.55it/s] 26%|██▌ | 96885/371472 [7:42:21<21:36:22, 3.53it/s] 26%|██▌ | 96886/371472 [7:42:22<22:06:51, 3.45it/s] 26%|██▌ | 96887/371472 [7:42:22<21:46:58, 3.50it/s] 26%|██▌ | 96888/371472 [7:42:22<20:58:05, 3.64it/s] 26%|██▌ | 96889/371472 [7:42:22<21:44:17, 3.51it/s] 26%|██▌ | 96890/371472 [7:42:23<21:44:05, 3.51it/s] 26%|██▌ | 96891/371472 [7:42:23<20:43:21, 3.68it/s] 26%|██▌ | 96892/371472 [7:42:23<21:52:01, 3.49it/s] 26%|██▌ | 96893/371472 [7:42:24<21:10:24, 3.60it/s] 26%|██▌ | 96894/371472 [7:42:24<20:25:22, 3.73it/s] 26%|██▌ | 96895/371472 [7:42:24<20:52:50, 3.65it/s] 26%|██▌ | 96896/371472 [7:42:24<20:55:09, 3.65it/s] 26%|██▌ | 96897/371472 [7:42:25<21:35:52, 3.53it/s] 26%|██▌ | 96898/371472 [7:42:25<20:45:41, 3.67it/s] 26%|██▌ | 96899/371472 [7:42:25<20:26:31, 3.73it/s] 26%|██▌ | 96900/371472 [7:42:25<20:00:38, 3.81it/s] {'loss': 3.7217, 'learning_rate': 7.655896485595465e-07, 'epoch': 4.17} + 26%|██▌ | 96900/371472 [7:42:25<20:00:38, 3.81it/s] 26%|██▌ | 96901/371472 [7:42:26<21:28:23, 3.55it/s] 26%|██▌ | 96902/371472 [7:42:26<20:24:46, 3.74it/s] 26%|██▌ | 96903/371472 [7:42:26<19:56:22, 3.83it/s] 26%|██▌ | 96904/371472 [7:42:26<19:49:01, 3.85it/s] 26%|██▌ | 96905/371472 [7:42:27<20:14:03, 3.77it/s] 26%|██▌ | 96906/371472 [7:42:27<21:27:00, 3.56it/s] 26%|██▌ | 96907/371472 [7:42:27<21:54:57, 3.48it/s] 26%|██▌ | 96908/371472 [7:42:28<21:06:46, 3.61it/s] 26%|██▌ | 96909/371472 [7:42:28<21:41:45, 3.52it/s] 26%|██▌ | 96910/371472 [7:42:28<21:48:40, 3.50it/s] 26%|██▌ | 96911/371472 [7:42:28<21:19:31, 3.58it/s] 26%|██▌ | 96912/371472 [7:42:29<20:57:25, 3.64it/s] 26%|██▌ | 96913/371472 [7:42:29<21:05:16, 3.62it/s] 26%|██▌ | 96914/371472 [7:42:29<20:28:31, 3.72it/s] 26%|██▌ | 96915/371472 [7:42:30<20:33:15, 3.71it/s] 26%|██▌ | 96916/371472 [7:42:30<20:50:28, 3.66it/s] 26%|██▌ | 96917/371472 [7:42:30<20:43:39, 3.68it/s] 26%|██▌ | 96918/371472 [7:42:30<20:24:47, 3.74it/s] 26%|██▌ | 96919/371472 [7:42:31<19:54:22, 3.83it/s] 26%|██▌ | 96920/371472 [7:42:31<21:14:31, 3.59it/s] {'loss': 3.5982, 'learning_rate': 7.655411665840677e-07, 'epoch': 4.17} + 26%|██▌ | 96920/371472 [7:42:31<21:14:31, 3.59it/s] 26%|██▌ | 96921/371472 [7:42:31<22:09:48, 3.44it/s] 26%|██▌ | 96922/371472 [7:42:32<21:49:47, 3.49it/s] 26%|██▌ | 96923/371472 [7:42:32<21:36:21, 3.53it/s] 26%|██▌ | 96924/371472 [7:42:32<20:34:23, 3.71it/s] 26%|██▌ | 96925/371472 [7:42:32<21:22:49, 3.57it/s] 26%|██▌ | 96926/371472 [7:42:33<21:15:20, 3.59it/s] 26%|██▌ | 96927/371472 [7:42:33<21:16:18, 3.59it/s] 26%|██▌ | 96928/371472 [7:42:33<22:47:44, 3.35it/s] 26%|██▌ | 96929/371472 [7:42:33<21:29:38, 3.55it/s] 26%|██▌ | 96930/371472 [7:42:34<21:44:25, 3.51it/s] 26%|██▌ | 96931/371472 [7:42:34<21:33:18, 3.54it/s] 26%|██▌ | 96932/371472 [7:42:34<22:47:59, 3.34it/s] 26%|██▌ | 96933/371472 [7:42:35<23:05:16, 3.30it/s] 26%|██▌ | 96934/371472 [7:42:35<22:21:34, 3.41it/s] 26%|██▌ | 96935/371472 [7:42:35<24:42:29, 3.09it/s] 26%|██▌ | 96936/371472 [7:42:36<24:58:07, 3.05it/s] 26%|██▌ | 96937/371472 [7:42:36<23:40:26, 3.22it/s] 26%|██▌ | 96938/371472 [7:42:36<22:34:00, 3.38it/s] 26%|██▌ | 96939/371472 [7:42:37<22:37:39, 3.37it/s] 26%|██▌ | 96940/371472 [7:42:37<21:50:17, 3.49it/s] {'loss': 3.6811, 'learning_rate': 7.654926846085889e-07, 'epoch': 4.18} + 26%|██▌ | 96940/371472 [7:42:37<21:50:17, 3.49it/s] 26%|██▌ | 96941/371472 [7:42:37<22:06:42, 3.45it/s] 26%|██▌ | 96942/371472 [7:42:37<22:03:33, 3.46it/s] 26%|██▌ | 96943/371472 [7:42:38<22:06:06, 3.45it/s] 26%|██▌ | 96944/371472 [7:42:38<21:11:18, 3.60it/s] 26%|██▌ | 96945/371472 [7:42:38<21:11:22, 3.60it/s] 26%|██▌ | 96946/371472 [7:42:38<20:41:15, 3.69it/s] 26%|██▌ | 96947/371472 [7:42:39<20:28:48, 3.72it/s] 26%|██▌ | 96948/371472 [7:42:39<20:33:46, 3.71it/s] 26%|██▌ | 96949/371472 [7:42:39<20:17:56, 3.76it/s] 26%|██▌ | 96950/371472 [7:42:40<22:29:31, 3.39it/s] 26%|██▌ | 96951/371472 [7:42:40<22:01:37, 3.46it/s] 26%|██▌ | 96952/371472 [7:42:40<21:40:52, 3.52it/s] 26%|██▌ | 96953/371472 [7:42:41<23:39:26, 3.22it/s] 26%|██▌ | 96954/371472 [7:42:41<23:14:35, 3.28it/s] 26%|██▌ | 96955/371472 [7:42:41<22:25:03, 3.40it/s] 26%|██▌ | 96956/371472 [7:42:41<22:10:00, 3.44it/s] 26%|██▌ | 96957/371472 [7:42:42<21:07:33, 3.61it/s] 26%|██▌ | 96958/371472 [7:42:42<23:24:19, 3.26it/s] 26%|██▌ | 96959/371472 [7:42:42<22:08:23, 3.44it/s] 26%|██▌ | 96960/371472 [7:42:42<21:31:48, 3.54it/s] {'loss': 3.6916, 'learning_rate': 7.654442026331099e-07, 'epoch': 4.18} + 26%|██▌ | 96960/371472 [7:42:42<21:31:48, 3.54it/s] 26%|██▌ | 96961/371472 [7:42:43<22:10:31, 3.44it/s] 26%|██▌ | 96962/371472 [7:42:43<21:30:27, 3.55it/s] 26%|██▌ | 96963/371472 [7:42:43<20:54:10, 3.65it/s] 26%|██▌ | 96964/371472 [7:42:44<21:05:03, 3.62it/s] 26%|██▌ | 96965/371472 [7:42:44<20:18:11, 3.76it/s] 26%|██▌ | 96966/371472 [7:42:44<20:55:38, 3.64it/s] 26%|██▌ | 96967/371472 [7:42:44<20:35:25, 3.70it/s] 26%|██▌ | 96968/371472 [7:42:45<22:09:50, 3.44it/s] 26%|██▌ | 96969/371472 [7:42:45<21:12:55, 3.59it/s] 26%|██▌ | 96970/371472 [7:42:45<20:56:39, 3.64it/s] 26%|██▌ | 96971/371472 [7:42:46<23:40:55, 3.22it/s] 26%|██▌ | 96972/371472 [7:42:46<22:20:31, 3.41it/s] 26%|██▌ | 96973/371472 [7:42:46<21:55:59, 3.48it/s] 26%|██▌ | 96974/371472 [7:42:46<22:33:38, 3.38it/s] 26%|██▌ | 96975/371472 [7:42:47<21:26:44, 3.56it/s] 26%|██▌ | 96976/371472 [7:42:47<21:11:20, 3.60it/s] 26%|██▌ | 96977/371472 [7:42:47<21:00:33, 3.63it/s] 26%|██▌ | 96978/371472 [7:42:48<20:28:03, 3.73it/s] 26%|██▌ | 96979/371472 [7:42:48<20:18:52, 3.75it/s] 26%|██▌ | 96980/371472 [7:42:48<19:59:46, 3.81it/s] {'loss': 3.6281, 'learning_rate': 7.653957206576309e-07, 'epoch': 4.18} + 26%|██▌ | 96980/371472 [7:42:48<19:59:46, 3.81it/s] 26%|██▌ | 96981/371472 [7:42:48<20:18:21, 3.75it/s] 26%|██▌ | 96982/371472 [7:42:49<20:35:21, 3.70it/s] 26%|██▌ | 96983/371472 [7:42:49<22:48:56, 3.34it/s] 26%|██▌ | 96984/371472 [7:42:49<21:32:47, 3.54it/s] 26%|██▌ | 96985/371472 [7:42:50<22:37:57, 3.37it/s] 26%|██▌ | 96986/371472 [7:42:50<21:31:46, 3.54it/s] 26%|██▌ | 96987/371472 [7:42:50<21:52:09, 3.49it/s] 26%|██▌ | 96988/371472 [7:42:50<22:31:28, 3.39it/s] 26%|██▌ | 96989/371472 [7:42:51<22:29:03, 3.39it/s] 26%|██▌ | 96990/371472 [7:42:51<21:43:30, 3.51it/s] 26%|██▌ | 96991/371472 [7:42:51<21:00:30, 3.63it/s] 26%|██▌ | 96992/371472 [7:42:51<20:42:15, 3.68it/s] 26%|██▌ | 96993/371472 [7:42:52<22:55:43, 3.33it/s] 26%|██▌ | 96994/371472 [7:42:52<23:53:57, 3.19it/s] 26%|██▌ | 96995/371472 [7:42:52<22:17:50, 3.42it/s] 26%|██▌ | 96996/371472 [7:42:53<23:04:18, 3.30it/s] 26%|██▌ | 96997/371472 [7:42:53<22:28:28, 3.39it/s] 26%|██▌ | 96998/371472 [7:42:53<22:32:51, 3.38it/s] 26%|██▌ | 96999/371472 [7:42:54<21:42:18, 3.51it/s] 26%|██▌ | 97000/371472 [7:42:54<21:14:14, 3.59it/s] {'loss': 3.4743, 'learning_rate': 7.653472386821521e-07, 'epoch': 4.18} + 26%|██▌ | 97000/371472 [7:42:54<21:14:14, 3.59it/s] 26%|██▌ | 97001/371472 [7:42:54<21:13:33, 3.59it/s] 26%|██▌ | 97002/371472 [7:42:54<20:11:21, 3.78it/s] 26%|██▌ | 97003/371472 [7:42:55<19:49:39, 3.85it/s] 26%|██▌ | 97004/371472 [7:42:55<20:06:36, 3.79it/s] 26%|██▌ | 97005/371472 [7:42:55<20:23:01, 3.74it/s] 26%|██▌ | 97006/371472 [7:42:55<19:43:14, 3.87it/s] 26%|██▌ | 97007/371472 [7:42:56<19:38:32, 3.88it/s] 26%|██▌ | 97008/371472 [7:42:56<19:29:28, 3.91it/s] 26%|██▌ | 97009/371472 [7:42:56<19:33:20, 3.90it/s] 26%|██▌ | 97010/371472 [7:42:56<19:49:26, 3.85it/s] 26%|██▌ | 97011/371472 [7:42:57<21:27:07, 3.55it/s] 26%|██▌ | 97012/371472 [7:42:57<22:41:23, 3.36it/s] 26%|██▌ | 97013/371472 [7:42:57<23:26:40, 3.25it/s] 26%|██▌ | 97014/371472 [7:42:58<22:00:57, 3.46it/s] 26%|██▌ | 97015/371472 [7:42:58<20:56:33, 3.64it/s] 26%|██▌ | 97016/371472 [7:42:58<21:10:25, 3.60it/s] 26%|██▌ | 97017/371472 [7:42:58<20:28:09, 3.72it/s] 26%|██▌ | 97018/371472 [7:42:59<20:45:56, 3.67it/s] 26%|██▌ | 97019/371472 [7:42:59<21:45:34, 3.50it/s] 26%|██▌ | 97020/371472 [7:42:59<23:45:04, 3.21it/s] {'loss': 3.5921, 'learning_rate': 7.652987567066733e-07, 'epoch': 4.18} + 26%|██▌ | 97020/371472 [7:42:59<23:45:04, 3.21it/s] 26%|██▌ | 97021/371472 [7:43:00<22:13:12, 3.43it/s] 26%|██▌ | 97022/371472 [7:43:00<21:39:28, 3.52it/s] 26%|██▌ | 97023/371472 [7:43:00<21:03:51, 3.62it/s] 26%|██▌ | 97024/371472 [7:43:00<21:52:42, 3.48it/s] 26%|██▌ | 97025/371472 [7:43:01<21:33:37, 3.54it/s] 26%|██▌ | 97026/371472 [7:43:01<20:57:56, 3.64it/s] 26%|██▌ | 97027/371472 [7:43:01<20:24:55, 3.73it/s] 26%|██▌ | 97028/371472 [7:43:02<20:01:42, 3.81it/s] 26%|██▌ | 97029/371472 [7:43:02<20:38:26, 3.69it/s] 26%|██▌ | 97030/371472 [7:43:02<20:31:47, 3.71it/s] 26%|██▌ | 97031/371472 [7:43:02<19:57:18, 3.82it/s] 26%|██▌ | 97032/371472 [7:43:03<19:37:58, 3.88it/s] 26%|██▌ | 97033/371472 [7:43:03<20:25:59, 3.73it/s] 26%|██▌ | 97034/371472 [7:43:03<20:32:10, 3.71it/s] 26%|██▌ | 97035/371472 [7:43:03<20:23:19, 3.74it/s] 26%|██▌ | 97036/371472 [7:43:04<20:49:59, 3.66it/s] 26%|██▌ | 97037/371472 [7:43:04<20:50:44, 3.66it/s] 26%|██▌ | 97038/371472 [7:43:04<21:09:01, 3.60it/s] 26%|██▌ | 97039/371472 [7:43:05<22:16:42, 3.42it/s] 26%|██▌ | 97040/371472 [7:43:05<22:15:45, 3.42it/s] {'loss': 3.5673, 'learning_rate': 7.652502747311944e-07, 'epoch': 4.18} + 26%|██▌ | 97040/371472 [7:43:05<22:15:45, 3.42it/s] 26%|██▌ | 97041/371472 [7:43:05<21:48:54, 3.49it/s] 26%|██▌ | 97042/371472 [7:43:05<21:56:38, 3.47it/s] 26%|██▌ | 97043/371472 [7:43:06<21:05:21, 3.61it/s] 26%|██▌ | 97044/371472 [7:43:06<23:41:58, 3.22it/s] 26%|██▌ | 97045/371472 [7:43:06<23:12:45, 3.28it/s] 26%|██▌ | 97046/371472 [7:43:07<23:09:16, 3.29it/s] 26%|██▌ | 97047/371472 [7:43:07<22:22:11, 3.41it/s] 26%|██▌ | 97048/371472 [7:43:07<21:33:26, 3.54it/s] 26%|██▌ | 97049/371472 [7:43:07<21:35:09, 3.53it/s] 26%|██▌ | 97050/371472 [7:43:08<21:08:44, 3.60it/s] 26%|██▌ | 97051/371472 [7:43:08<21:09:19, 3.60it/s] 26%|██▌ | 97052/371472 [7:43:08<21:31:49, 3.54it/s] 26%|██▌ | 97053/371472 [7:43:09<21:53:00, 3.48it/s] 26%|██▌ | 97054/371472 [7:43:09<22:19:51, 3.41it/s] 26%|██▌ | 97055/371472 [7:43:09<21:50:21, 3.49it/s] 26%|██▌ | 97056/371472 [7:43:09<20:59:35, 3.63it/s] 26%|██▌ | 97057/371472 [7:43:10<20:37:03, 3.70it/s] 26%|██▌ | 97058/371472 [7:43:10<20:15:20, 3.76it/s] 26%|██▌ | 97059/371472 [7:43:10<19:52:04, 3.84it/s] 26%|██▌ | 97060/371472 [7:43:11<21:17:25, 3.58it/s] {'loss': 3.6073, 'learning_rate': 7.652017927557154e-07, 'epoch': 4.18} + 26%|██▌ | 97060/371472 [7:43:11<21:17:25, 3.58it/s] 26%|██▌ | 97061/371472 [7:43:11<21:05:30, 3.61it/s] 26%|██▌ | 97062/371472 [7:43:11<20:52:42, 3.65it/s] 26%|██▌ | 97063/371472 [7:43:11<24:35:17, 3.10it/s] 26%|██▌ | 97064/371472 [7:43:12<23:31:34, 3.24it/s] 26%|██▌ | 97065/371472 [7:43:12<23:15:01, 3.28it/s] 26%|██▌ | 97066/371472 [7:43:12<23:12:01, 3.29it/s] 26%|██▌ | 97067/371472 [7:43:13<22:03:23, 3.46it/s] 26%|██▌ | 97068/371472 [7:43:13<22:13:53, 3.43it/s] 26%|██▌ | 97069/371472 [7:43:13<22:12:17, 3.43it/s] 26%|██▌ | 97070/371472 [7:43:13<21:20:29, 3.57it/s] 26%|██▌ | 97071/371472 [7:43:14<21:08:32, 3.61it/s] 26%|██▌ | 97072/371472 [7:43:14<20:19:48, 3.75it/s] 26%|██▌ | 97073/371472 [7:43:14<21:34:29, 3.53it/s] 26%|██▌ | 97074/371472 [7:43:15<20:32:53, 3.71it/s] 26%|██▌ | 97075/371472 [7:43:15<20:20:18, 3.75it/s] 26%|██▌ | 97076/371472 [7:43:15<20:53:16, 3.65it/s] 26%|██▌ | 97077/371472 [7:43:15<23:44:15, 3.21it/s] 26%|██▌ | 97078/371472 [7:43:16<22:51:11, 3.34it/s] 26%|██▌ | 97079/371472 [7:43:16<22:07:45, 3.44it/s] 26%|██▌ | 97080/371472 [7:43:16<23:24:30, 3.26it/s] {'loss': 3.5392, 'learning_rate': 7.651533107802366e-07, 'epoch': 4.18} + 26%|██▌ | 97080/371472 [7:43:16<23:24:30, 3.26it/s] 26%|██▌ | 97081/371472 [7:43:17<22:42:35, 3.36it/s] 26%|██▌ | 97082/371472 [7:43:17<23:55:46, 3.19it/s] 26%|██▌ | 97083/371472 [7:43:17<22:51:27, 3.33it/s] 26%|██▌ | 97084/371472 [7:43:18<24:24:51, 3.12it/s] 26%|██▌ | 97085/371472 [7:43:18<22:42:49, 3.36it/s] 26%|██▌ | 97086/371472 [7:43:18<23:55:12, 3.19it/s] 26%|██▌ | 97087/371472 [7:43:19<22:44:51, 3.35it/s] 26%|██▌ | 97088/371472 [7:43:19<21:43:13, 3.51it/s] 26%|██▌ | 97089/371472 [7:43:19<22:20:32, 3.41it/s] 26%|██▌ | 97090/371472 [7:43:19<22:08:57, 3.44it/s] 26%|██▌ | 97091/371472 [7:43:20<21:10:26, 3.60it/s] 26%|██▌ | 97092/371472 [7:43:20<20:30:56, 3.72it/s] 26%|██▌ | 97093/371472 [7:43:20<20:06:08, 3.79it/s] 26%|██▌ | 97094/371472 [7:43:20<20:19:12, 3.75it/s] 26%|██▌ | 97095/371472 [7:43:21<20:45:06, 3.67it/s] 26%|██▌ | 97096/371472 [7:43:21<20:30:20, 3.72it/s] 26%|██▌ | 97097/371472 [7:43:21<20:04:03, 3.80it/s] 26%|██▌ | 97098/371472 [7:43:21<21:19:02, 3.58it/s] 26%|██▌ | 97099/371472 [7:43:22<23:47:26, 3.20it/s] 26%|██▌ | 97100/371472 [7:43:22<23:07:19, 3.30it/s] {'loss': 3.5971, 'learning_rate': 7.651048288047577e-07, 'epoch': 4.18} + 26%|██▌ | 97100/371472 [7:43:22<23:07:19, 3.30it/s] 26%|██▌ | 97101/371472 [7:43:22<22:38:29, 3.37it/s] 26%|██▌ | 97102/371472 [7:43:23<22:41:47, 3.36it/s] 26%|██▌ | 97103/371472 [7:43:23<21:54:35, 3.48it/s] 26%|██▌ | 97104/371472 [7:43:23<21:59:38, 3.47it/s] 26%|██▌ | 97105/371472 [7:43:24<21:44:57, 3.50it/s] 26%|██▌ | 97106/371472 [7:43:24<21:42:40, 3.51it/s] 26%|██▌ | 97107/371472 [7:43:24<21:05:04, 3.61it/s] 26%|██▌ | 97108/371472 [7:43:24<20:27:22, 3.73it/s] 26%|██▌ | 97109/371472 [7:43:25<20:46:42, 3.67it/s] 26%|██▌ | 97110/371472 [7:43:25<20:29:45, 3.72it/s] 26%|██▌ | 97111/371472 [7:43:25<20:48:30, 3.66it/s] 26%|██▌ | 97112/371472 [7:43:25<20:45:33, 3.67it/s] 26%|██▌ | 97113/371472 [7:43:26<20:35:01, 3.70it/s] 26%|██▌ | 97114/371472 [7:43:26<21:44:17, 3.51it/s] 26%|██▌ | 97115/371472 [7:43:26<21:03:32, 3.62it/s] 26%|██▌ | 97116/371472 [7:43:27<22:05:14, 3.45it/s] 26%|██▌ | 97117/371472 [7:43:27<21:16:16, 3.58it/s] 26%|██▌ | 97118/371472 [7:43:27<20:51:01, 3.66it/s] 26%|██▌ | 97119/371472 [7:43:27<20:23:21, 3.74it/s] 26%|██▌ | 97120/371472 [7:43:28<20:16:33, 3.76it/s] {'loss': 3.557, 'learning_rate': 7.650563468292787e-07, 'epoch': 4.18} + 26%|██▌ | 97120/371472 [7:43:28<20:16:33, 3.76it/s] 26%|██▌ | 97121/371472 [7:43:28<19:34:41, 3.89it/s] 26%|██▌ | 97122/371472 [7:43:28<21:04:25, 3.62it/s] 26%|██▌ | 97123/371472 [7:43:29<22:04:33, 3.45it/s] 26%|██▌ | 97124/371472 [7:43:29<21:04:29, 3.62it/s] 26%|██▌ | 97125/371472 [7:43:29<20:58:43, 3.63it/s] 26%|██▌ | 97126/371472 [7:43:29<21:55:15, 3.48it/s] 26%|██▌ | 97127/371472 [7:43:30<21:58:06, 3.47it/s] 26%|██▌ | 97128/371472 [7:43:30<21:14:46, 3.59it/s] 26%|██▌ | 97129/371472 [7:43:30<20:55:46, 3.64it/s] 26%|██▌ | 97130/371472 [7:43:30<21:16:44, 3.58it/s] 26%|██▌ | 97131/371472 [7:43:31<20:51:04, 3.65it/s] 26%|██▌ | 97132/371472 [7:43:31<20:19:04, 3.75it/s] 26%|██▌ | 97133/371472 [7:43:31<19:44:10, 3.86it/s] 26%|██▌ | 97134/371472 [7:43:31<19:23:46, 3.93it/s] 26%|██▌ | 97135/371472 [7:43:32<19:05:55, 3.99it/s] 26%|██▌ | 97136/371472 [7:43:32<19:26:54, 3.92it/s] 26%|██▌ | 97137/371472 [7:43:32<20:47:14, 3.67it/s] 26%|██▌ | 97138/371472 [7:43:33<20:22:39, 3.74it/s] 26%|██▌ | 97139/371472 [7:43:33<20:42:24, 3.68it/s] 26%|██▌ | 97140/371472 [7:43:33<20:35:24, 3.70it/s] {'loss': 3.6023, 'learning_rate': 7.650078648537998e-07, 'epoch': 4.18} + 26%|██▌ | 97140/371472 [7:43:33<20:35:24, 3.70it/s] 26%|██▌ | 97141/371472 [7:43:33<20:05:06, 3.79it/s] 26%|██▌ | 97142/371472 [7:43:34<21:10:40, 3.60it/s] 26%|██▌ | 97143/371472 [7:43:34<22:05:39, 3.45it/s] 26%|██▌ | 97144/371472 [7:43:34<21:30:30, 3.54it/s] 26%|██▌ | 97145/371472 [7:43:34<20:56:18, 3.64it/s] 26%|██▌ | 97146/371472 [7:43:35<20:53:07, 3.65it/s] 26%|██▌ | 97147/371472 [7:43:35<20:17:08, 3.76it/s] 26%|██▌ | 97148/371472 [7:43:35<21:39:09, 3.52it/s] 26%|██▌ | 97149/371472 [7:43:36<21:02:08, 3.62it/s] 26%|██▌ | 97150/371472 [7:43:36<23:01:34, 3.31it/s] 26%|██▌ | 97151/371472 [7:43:36<22:46:08, 3.35it/s] 26%|██▌ | 97152/371472 [7:43:37<22:05:15, 3.45it/s] 26%|██▌ | 97153/371472 [7:43:37<21:05:09, 3.61it/s] 26%|██▌ | 97154/371472 [7:43:37<20:40:04, 3.69it/s] 26%|██▌ | 97155/371472 [7:43:37<21:11:21, 3.60it/s] 26%|██▌ | 97156/371472 [7:43:38<20:43:08, 3.68it/s] 26%|██▌ | 97157/371472 [7:43:38<20:52:45, 3.65it/s] 26%|██▌ | 97158/371472 [7:43:38<23:20:14, 3.27it/s] 26%|██▌ | 97159/371472 [7:43:39<22:37:28, 3.37it/s] 26%|██▌ | 97160/371472 [7:43:39<25:46:26, 2.96it/s] {'loss': 3.6399, 'learning_rate': 7.64959382878321e-07, 'epoch': 4.18} + 26%|██▌ | 97160/371472 [7:43:39<25:46:26, 2.96it/s] 26%|██▌ | 97161/371472 [7:43:39<24:14:29, 3.14it/s] 26%|██▌ | 97162/371472 [7:43:39<23:03:09, 3.31it/s] 26%|██▌ | 97163/371472 [7:43:40<22:58:13, 3.32it/s] 26%|██▌ | 97164/371472 [7:43:40<21:57:31, 3.47it/s] 26%|██▌ | 97165/371472 [7:43:40<22:22:57, 3.40it/s] 26%|██▌ | 97166/371472 [7:43:41<21:54:17, 3.48it/s] 26%|██▌ | 97167/371472 [7:43:41<22:23:28, 3.40it/s] 26%|██▌ | 97168/371472 [7:43:41<22:12:40, 3.43it/s] 26%|██▌ | 97169/371472 [7:43:41<21:56:28, 3.47it/s] 26%|██▌ | 97170/371472 [7:43:42<22:24:31, 3.40it/s] 26%|██▌ | 97171/371472 [7:43:42<23:50:33, 3.20it/s] 26%|██▌ | 97172/371472 [7:43:42<22:37:33, 3.37it/s] 26%|██▌ | 97173/371472 [7:43:43<21:30:10, 3.54it/s] 26%|██▌ | 97174/371472 [7:43:43<21:27:16, 3.55it/s] 26%|██▌ | 97175/371472 [7:43:43<22:07:14, 3.44it/s] 26%|██▌ | 97176/371472 [7:43:44<21:29:10, 3.55it/s] 26%|██▌ | 97177/371472 [7:43:44<22:09:21, 3.44it/s] 26%|██▌ | 97178/371472 [7:43:44<21:52:37, 3.48it/s] 26%|██▌ | 97179/371472 [7:43:44<20:54:10, 3.65it/s] 26%|██▌ | 97180/371472 [7:43:45<20:46:49, 3.67it/s] {'loss': 3.6157, 'learning_rate': 7.649109009028422e-07, 'epoch': 4.19} + 26%|██▌ | 97180/371472 [7:43:45<20:46:49, 3.67it/s] 26%|██▌ | 97181/371472 [7:43:45<22:52:08, 3.33it/s] 26%|██▌ | 97182/371472 [7:43:45<22:24:18, 3.40it/s] 26%|██▌ | 97183/371472 [7:43:46<23:10:35, 3.29it/s] 26%|██▌ | 97184/371472 [7:43:46<22:33:23, 3.38it/s] 26%|██▌ | 97185/371472 [7:43:46<22:02:04, 3.46it/s] 26%|██▌ | 97186/371472 [7:43:46<21:05:40, 3.61it/s] 26%|██▌ | 97187/371472 [7:43:47<21:31:11, 3.54it/s] 26%|██▌ | 97188/371472 [7:43:47<21:04:43, 3.61it/s] 26%|██▌ | 97189/371472 [7:43:47<22:27:09, 3.39it/s] 26%|██▌ | 97190/371472 [7:43:48<21:50:25, 3.49it/s] 26%|██▌ | 97191/371472 [7:43:48<22:15:37, 3.42it/s] 26%|██▌ | 97192/371472 [7:43:48<22:28:36, 3.39it/s] 26%|██▌ | 97193/371472 [7:43:48<21:27:22, 3.55it/s] 26%|██▌ | 97194/371472 [7:43:49<21:28:07, 3.55it/s] 26%|██▌ | 97195/371472 [7:43:49<24:41:33, 3.09it/s] 26%|██▌ | 97196/371472 [7:43:49<23:22:00, 3.26it/s] 26%|██▌ | 97197/371472 [7:43:50<23:01:07, 3.31it/s] 26%|██▌ | 97198/371472 [7:43:50<21:58:59, 3.47it/s] 26%|██▌ | 97199/371472 [7:43:50<20:59:06, 3.63it/s] 26%|██▌ | 97200/371472 [7:43:50<20:51:54, 3.65it/s] {'loss': 3.5105, 'learning_rate': 7.648624189273632e-07, 'epoch': 4.19} + 26%|██▌ | 97200/371472 [7:43:50<20:51:54, 3.65it/s] 26%|██▌ | 97201/371472 [7:43:51<20:59:11, 3.63it/s] 26%|██▌ | 97202/371472 [7:43:51<21:07:24, 3.61it/s] 26%|██▌ | 97203/371472 [7:43:51<20:42:38, 3.68it/s] 26%|██▌ | 97204/371472 [7:43:52<20:46:52, 3.67it/s] 26%|██▌ | 97205/371472 [7:43:52<20:16:58, 3.76it/s] 26%|██▌ | 97206/371472 [7:43:52<19:52:47, 3.83it/s] 26%|██▌ | 97207/371472 [7:43:52<20:49:21, 3.66it/s] 26%|██▌ | 97208/371472 [7:43:53<20:59:13, 3.63it/s] 26%|██▌ | 97209/371472 [7:43:53<21:59:19, 3.46it/s] 26%|██▌ | 97210/371472 [7:43:53<21:00:13, 3.63it/s] 26%|██▌ | 97211/371472 [7:43:53<21:15:51, 3.58it/s] 26%|██▌ | 97212/371472 [7:43:54<21:49:36, 3.49it/s] 26%|██▌ | 97213/371472 [7:43:54<20:43:38, 3.68it/s] 26%|██▌ | 97214/371472 [7:43:54<20:57:01, 3.64it/s] 26%|██▌ | 97215/371472 [7:43:55<20:14:19, 3.76it/s] 26%|██▌ | 97216/371472 [7:43:55<21:14:36, 3.59it/s] 26%|██▌ | 97217/371472 [7:43:55<23:32:59, 3.23it/s] 26%|██▌ | 97218/371472 [7:43:56<23:20:14, 3.26it/s] 26%|██▌ | 97219/371472 [7:43:56<22:06:39, 3.45it/s] 26%|██▌ | 97220/371472 [7:43:56<21:26:29, 3.55it/s] {'loss': 3.6493, 'learning_rate': 7.648139369518842e-07, 'epoch': 4.19} + 26%|██▌ | 97220/371472 [7:43:56<21:26:29, 3.55it/s] 26%|██▌ | 97221/371472 [7:43:56<20:24:18, 3.73it/s] 26%|██▌ | 97222/371472 [7:43:57<20:32:48, 3.71it/s] 26%|██▌ | 97223/371472 [7:43:57<19:45:56, 3.85it/s] 26%|██▌ | 97224/371472 [7:43:57<20:24:07, 3.73it/s] 26%|██▌ | 97225/371472 [7:43:57<22:35:25, 3.37it/s] 26%|██▌ | 97226/371472 [7:43:58<21:54:45, 3.48it/s] 26%|██▌ | 97227/371472 [7:43:58<21:29:29, 3.54it/s] 26%|██▌ | 97228/371472 [7:43:58<22:47:44, 3.34it/s] 26%|██▌ | 97229/371472 [7:43:59<21:46:32, 3.50it/s] 26%|██▌ | 97230/371472 [7:43:59<20:54:16, 3.64it/s] 26%|██▌ | 97231/371472 [7:43:59<21:11:34, 3.59it/s] 26%|██▌ | 97232/371472 [7:43:59<23:21:12, 3.26it/s] 26%|██▌ | 97233/371472 [7:44:00<22:26:59, 3.39it/s] 26%|██▌ | 97234/371472 [7:44:00<22:06:57, 3.44it/s] 26%|██▌ | 97235/371472 [7:44:00<21:09:26, 3.60it/s] 26%|██▌ | 97236/371472 [7:44:01<21:19:27, 3.57it/s] 26%|██▌ | 97237/371472 [7:44:01<21:02:07, 3.62it/s] 26%|██▌ | 97238/371472 [7:44:01<20:23:35, 3.74it/s] 26%|██▌ | 97239/371472 [7:44:01<21:07:55, 3.60it/s] 26%|██▌ | 97240/371472 [7:44:02<20:23:29, 3.74it/s] {'loss': 3.734, 'learning_rate': 7.647654549764054e-07, 'epoch': 4.19} + 26%|██▌ | 97240/371472 [7:44:02<20:23:29, 3.74it/s] 26%|██▌ | 97241/371472 [7:44:02<20:21:45, 3.74it/s] 26%|██▌ | 97242/371472 [7:44:02<21:03:32, 3.62it/s] 26%|██▌ | 97243/371472 [7:44:02<21:40:51, 3.51it/s] 26%|██▌ | 97244/371472 [7:44:03<20:46:53, 3.67it/s] 26%|██▌ | 97245/371472 [7:44:03<20:28:41, 3.72it/s] 26%|██▌ | 97246/371472 [7:44:03<20:28:42, 3.72it/s] 26%|██▌ | 97247/371472 [7:44:04<21:34:51, 3.53it/s] 26%|██▌ | 97248/371472 [7:44:04<20:49:50, 3.66it/s] 26%|██▌ | 97249/371472 [7:44:04<20:38:07, 3.69it/s] 26%|██▌ | 97250/371472 [7:44:04<22:04:38, 3.45it/s] 26%|██▌ | 97251/371472 [7:44:05<21:45:36, 3.50it/s] 26%|██▌ | 97252/371472 [7:44:05<23:00:41, 3.31it/s] 26%|██▌ | 97253/371472 [7:44:05<21:39:31, 3.52it/s] 26%|██▌ | 97254/371472 [7:44:06<21:00:13, 3.63it/s] 26%|██▌ | 97255/371472 [7:44:06<20:19:47, 3.75it/s] 26%|██▌ | 97256/371472 [7:44:06<19:50:03, 3.84it/s] 26%|██▌ | 97257/371472 [7:44:06<20:02:54, 3.80it/s] 26%|██▌ | 97258/371472 [7:44:07<20:44:27, 3.67it/s] 26%|██▌ | 97259/371472 [7:44:07<20:06:06, 3.79it/s] 26%|██▌ | 97260/371472 [7:44:07<20:34:49, 3.70it/s] {'loss': 3.6497, 'learning_rate': 7.647169730009266e-07, 'epoch': 4.19} + 26%|██▌ | 97260/371472 [7:44:07<20:34:49, 3.70it/s] 26%|██▌ | 97261/371472 [7:44:07<22:13:54, 3.43it/s] 26%|██▌ | 97262/371472 [7:44:08<22:10:08, 3.44it/s] 26%|██▌ | 97263/371472 [7:44:08<21:41:17, 3.51it/s] 26%|██▌ | 97264/371472 [7:44:08<22:40:47, 3.36it/s] 26%|██▌ | 97265/371472 [7:44:09<21:59:29, 3.46it/s] 26%|██▌ | 97266/371472 [7:44:09<25:05:58, 3.03it/s] 26%|██▌ | 97267/371472 [7:44:09<23:55:49, 3.18it/s] 26%|██▌ | 97268/371472 [7:44:10<24:44:40, 3.08it/s] 26%|██▌ | 97269/371472 [7:44:10<23:33:46, 3.23it/s] 26%|██▌ | 97270/371472 [7:44:10<22:12:04, 3.43it/s] 26%|██▌ | 97271/371472 [7:44:10<21:38:34, 3.52it/s] 26%|██▌ | 97272/371472 [7:44:11<21:50:29, 3.49it/s] 26%|██▌ | 97273/371472 [7:44:11<24:03:36, 3.17it/s] 26%|██▌ | 97274/371472 [7:44:11<22:38:43, 3.36it/s] 26%|██▌ | 97275/371472 [7:44:12<22:29:54, 3.39it/s] 26%|██▌ | 97276/371472 [7:44:12<21:19:18, 3.57it/s] 26%|██▌ | 97277/371472 [7:44:12<20:50:55, 3.65it/s] 26%|██▌ | 97278/371472 [7:44:12<20:01:10, 3.80it/s] 26%|██▌ | 97279/371472 [7:44:13<20:26:56, 3.72it/s] 26%|██▌ | 97280/371472 [7:44:13<20:54:20, 3.64it/s] {'loss': 3.5229, 'learning_rate': 7.646684910254476e-07, 'epoch': 4.19} + 26%|██▌ | 97280/371472 [7:44:13<20:54:20, 3.64it/s] 26%|██▌ | 97281/371472 [7:44:13<20:11:18, 3.77it/s] 26%|██▌ | 97282/371472 [7:44:14<20:05:55, 3.79it/s] 26%|██▌ | 97283/371472 [7:44:14<19:56:16, 3.82it/s] 26%|██▌ | 97284/371472 [7:44:14<20:21:32, 3.74it/s] 26%|██▌ | 97285/371472 [7:44:14<22:09:37, 3.44it/s] 26%|██▌ | 97286/371472 [7:44:15<21:14:31, 3.59it/s] 26%|██▌ | 97287/371472 [7:44:15<21:28:55, 3.55it/s] 26%|██▌ | 97288/371472 [7:44:15<22:30:23, 3.38it/s] 26%|██▌ | 97289/371472 [7:44:16<22:45:59, 3.35it/s] 26%|██▌ | 97290/371472 [7:44:16<21:56:19, 3.47it/s] 26%|██▌ | 97291/371472 [7:44:16<23:11:31, 3.28it/s] 26%|██▌ | 97292/371472 [7:44:16<22:10:10, 3.44it/s] 26%|██▌ | 97293/371472 [7:44:17<26:49:54, 2.84it/s] 26%|██▌ | 97294/371472 [7:44:17<24:59:02, 3.05it/s] 26%|██▌ | 97295/371472 [7:44:18<26:11:47, 2.91it/s] 26%|██▌ | 97296/371472 [7:44:18<25:18:24, 3.01it/s] 26%|██▌ | 97297/371472 [7:44:18<24:32:40, 3.10it/s] 26%|██▌ | 97298/371472 [7:44:18<23:24:14, 3.25it/s] 26%|██▌ | 97299/371472 [7:44:19<22:11:49, 3.43it/s] 26%|██▌ | 97300/371472 [7:44:19<22:19:43, 3.41it/s] {'loss': 3.55, 'learning_rate': 7.646200090499687e-07, 'epoch': 4.19} + 26%|██▌ | 97300/371472 [7:44:19<22:19:43, 3.41it/s] 26%|██▌ | 97301/371472 [7:44:19<21:08:45, 3.60it/s] 26%|██▌ | 97302/371472 [7:44:19<20:38:13, 3.69it/s] 26%|██▌ | 97303/371472 [7:44:20<20:22:12, 3.74it/s] 26%|██▌ | 97304/371472 [7:44:20<20:06:58, 3.79it/s] 26%|██▌ | 97305/371472 [7:44:20<20:53:02, 3.65it/s] 26%|██▌ | 97306/371472 [7:44:21<20:57:05, 3.63it/s] 26%|██▌ | 97307/371472 [7:44:21<20:05:36, 3.79it/s] 26%|██▌ | 97308/371472 [7:44:21<19:45:42, 3.85it/s] 26%|██▌ | 97309/371472 [7:44:21<20:13:34, 3.77it/s] 26%|██▌ | 97310/371472 [7:44:22<21:07:04, 3.61it/s] 26%|██▌ | 97311/371472 [7:44:22<21:18:41, 3.57it/s] 26%|██▌ | 97312/371472 [7:44:22<20:39:51, 3.69it/s] 26%|██▌ | 97313/371472 [7:44:22<20:05:10, 3.79it/s] 26%|██▌ | 97314/371472 [7:44:23<20:13:35, 3.77it/s] 26%|██▌ | 97315/371472 [7:44:23<20:41:13, 3.68it/s] 26%|██▌ | 97316/371472 [7:44:23<20:52:35, 3.65it/s] 26%|██▌ | 97317/371472 [7:44:24<20:54:31, 3.64it/s] 26%|██▌ | 97318/371472 [7:44:24<23:15:44, 3.27it/s] 26%|██▌ | 97319/371472 [7:44:24<22:04:11, 3.45it/s] 26%|██▌ | 97320/371472 [7:44:24<21:42:19, 3.51it/s] {'loss': 3.7054, 'learning_rate': 7.645715270744899e-07, 'epoch': 4.19} + 26%|██▌ | 97320/371472 [7:44:24<21:42:19, 3.51it/s] 26%|██▌ | 97321/371472 [7:44:25<21:43:00, 3.51it/s] 26%|██▌ | 97322/371472 [7:44:25<22:00:56, 3.46it/s] 26%|██▌ | 97323/371472 [7:44:25<20:49:38, 3.66it/s] 26%|██▌ | 97324/371472 [7:44:26<20:37:12, 3.69it/s] 26%|██▌ | 97325/371472 [7:44:26<20:42:06, 3.68it/s] 26%|██▌ | 97326/371472 [7:44:26<20:25:41, 3.73it/s] 26%|██▌ | 97327/371472 [7:44:26<20:20:27, 3.74it/s] 26%|██▌ | 97328/371472 [7:44:27<21:05:48, 3.61it/s] 26%|██▌ | 97329/371472 [7:44:27<21:02:33, 3.62it/s] 26%|██▌ | 97330/371472 [7:44:27<20:21:42, 3.74it/s] 26%|██▌ | 97331/371472 [7:44:28<22:23:06, 3.40it/s] 26%|██▌ | 97332/371472 [7:44:28<22:42:42, 3.35it/s] 26%|██▌ | 97333/371472 [7:44:28<21:45:38, 3.50it/s] 26%|██▌ | 97334/371472 [7:44:28<22:04:03, 3.45it/s] 26%|██▌ | 97335/371472 [7:44:29<23:12:01, 3.28it/s] 26%|██▌ | 97336/371472 [7:44:29<22:19:31, 3.41it/s] 26%|██▌ | 97337/371472 [7:44:29<22:46:23, 3.34it/s] 26%|██▌ | 97338/371472 [7:44:30<22:30:03, 3.38it/s] 26%|██▌ | 97339/371472 [7:44:30<22:21:56, 3.40it/s] 26%|██▌ | 97340/371472 [7:44:30<22:23:28, 3.40it/s] {'loss': 3.6473, 'learning_rate': 7.64523045099011e-07, 'epoch': 4.19} + 26%|██▌ | 97340/371472 [7:44:30<22:23:28, 3.40it/s] 26%|██▌ | 97341/371472 [7:44:30<21:50:48, 3.49it/s] 26%|██▌ | 97342/371472 [7:44:31<22:02:46, 3.45it/s] 26%|██▌ | 97343/371472 [7:44:31<21:45:48, 3.50it/s] 26%|██▌ | 97344/371472 [7:44:31<22:54:02, 3.33it/s] 26%|██▌ | 97345/371472 [7:44:32<22:43:02, 3.35it/s] 26%|██▌ | 97346/371472 [7:44:32<22:02:01, 3.46it/s] 26%|██▌ | 97347/371472 [7:44:32<21:39:08, 3.52it/s] 26%|██▌ | 97348/371472 [7:44:32<21:06:02, 3.61it/s] 26%|██▌ | 97349/371472 [7:44:33<20:38:03, 3.69it/s] 26%|██▌ | 97350/371472 [7:44:33<20:21:13, 3.74it/s] 26%|██▌ | 97351/371472 [7:44:33<20:03:24, 3.80it/s] 26%|██▌ | 97352/371472 [7:44:34<20:54:06, 3.64it/s] 26%|██▌ | 97353/371472 [7:44:34<19:54:56, 3.82it/s] 26%|██▌ | 97354/371472 [7:44:34<19:45:28, 3.85it/s] 26%|██▌ | 97355/371472 [7:44:34<20:06:08, 3.79it/s] 26%|██▌ | 97356/371472 [7:44:35<20:56:21, 3.64it/s] 26%|██▌ | 97357/371472 [7:44:35<20:13:40, 3.76it/s] 26%|██▌ | 97358/371472 [7:44:35<20:49:29, 3.66it/s] 26%|██▌ | 97359/371472 [7:44:35<22:19:07, 3.41it/s] 26%|██▌ | 97360/371472 [7:44:36<21:49:52, 3.49it/s] {'loss': 3.7658, 'learning_rate': 7.64474563123532e-07, 'epoch': 4.19} + 26%|██▌ | 97360/371472 [7:44:36<21:49:52, 3.49it/s] 26%|██▌ | 97361/371472 [7:44:36<21:46:49, 3.50it/s] 26%|██▌ | 97362/371472 [7:44:36<21:12:18, 3.59it/s] 26%|██▌ | 97363/371472 [7:44:37<21:57:25, 3.47it/s] 26%|██▌ | 97364/371472 [7:44:37<21:58:53, 3.46it/s] 26%|██▌ | 97365/371472 [7:44:37<22:20:40, 3.41it/s] 26%|██▌ | 97366/371472 [7:44:37<21:41:23, 3.51it/s] 26%|██▌ | 97367/371472 [7:44:38<22:13:41, 3.43it/s] 26%|██▌ | 97368/371472 [7:44:38<21:49:12, 3.49it/s] 26%|██▌ | 97369/371472 [7:44:38<21:41:59, 3.51it/s] 26%|██▌ | 97370/371472 [7:44:39<20:54:22, 3.64it/s] 26%|██▌ | 97371/371472 [7:44:39<20:43:50, 3.67it/s] 26%|██▌ | 97372/371472 [7:44:39<20:43:34, 3.67it/s] 26%|██▌ | 97373/371472 [7:44:39<21:53:43, 3.48it/s] 26%|██▌ | 97374/371472 [7:44:40<21:50:36, 3.49it/s] 26%|██▌ | 97375/371472 [7:44:40<21:38:50, 3.52it/s] 26%|██▌ | 97376/371472 [7:44:40<21:07:44, 3.60it/s] 26%|██▌ | 97377/371472 [7:44:41<20:56:37, 3.64it/s] 26%|██▌ | 97378/371472 [7:44:41<19:51:02, 3.84it/s] 26%|██▌ | 97379/371472 [7:44:41<19:41:11, 3.87it/s] 26%|██▌ | 97380/371472 [7:44:41<19:20:37, 3.94it/s] {'loss': 3.6228, 'learning_rate': 7.644260811480531e-07, 'epoch': 4.19} + 26%|██▌ | 97380/371472 [7:44:41<19:20:37, 3.94it/s] 26%|██▌ | 97381/371472 [7:44:42<20:34:05, 3.70it/s] 26%|██▌ | 97382/371472 [7:44:42<23:08:46, 3.29it/s] 26%|██▌ | 97383/371472 [7:44:42<23:43:07, 3.21it/s] 26%|██▌ | 97384/371472 [7:44:43<22:21:44, 3.40it/s] 26%|██▌ | 97385/371472 [7:44:43<21:31:29, 3.54it/s] 26%|██▌ | 97386/371472 [7:44:43<21:05:03, 3.61it/s] 26%|██▌ | 97387/371472 [7:44:43<21:20:58, 3.57it/s] 26%|██▌ | 97388/371472 [7:44:44<21:36:17, 3.52it/s] 26%|██▌ | 97389/371472 [7:44:44<20:59:21, 3.63it/s] 26%|██▌ | 97390/371472 [7:44:44<22:14:37, 3.42it/s] 26%|██▌ | 97391/371472 [7:44:44<22:17:35, 3.42it/s] 26%|██▌ | 97392/371472 [7:44:45<21:27:03, 3.55it/s] 26%|██▌ | 97393/371472 [7:44:45<20:25:52, 3.73it/s] 26%|██▌ | 97394/371472 [7:44:45<20:08:24, 3.78it/s] 26%|██▌ | 97395/371472 [7:44:45<19:43:29, 3.86it/s] 26%|██▌ | 97396/371472 [7:44:46<19:16:08, 3.95it/s] 26%|██▌ | 97397/371472 [7:44:46<20:24:19, 3.73it/s] 26%|██▌ | 97398/371472 [7:44:46<20:31:59, 3.71it/s] 26%|██▌ | 97399/371472 [7:44:47<23:22:49, 3.26it/s] 26%|██▌ | 97400/371472 [7:44:47<22:13:12, 3.43it/s] {'loss': 3.6086, 'learning_rate': 7.643775991725743e-07, 'epoch': 4.2} + 26%|██▌ | 97400/371472 [7:44:47<22:13:12, 3.43it/s] 26%|██▌ | 97401/371472 [7:44:47<21:56:11, 3.47it/s] 26%|██▌ | 97402/371472 [7:44:47<20:41:19, 3.68it/s] 26%|██▌ | 97403/371472 [7:44:48<20:34:01, 3.70it/s] 26%|██▌ | 97404/371472 [7:44:48<19:39:29, 3.87it/s] 26%|██▌ | 97405/371472 [7:44:48<20:07:02, 3.78it/s] 26%|██▌ | 97406/371472 [7:44:49<21:10:39, 3.59it/s] 26%|██▌ | 97407/371472 [7:44:49<21:32:34, 3.53it/s] 26%|██▌ | 97408/371472 [7:44:49<21:42:37, 3.51it/s] 26%|██▌ | 97409/371472 [7:44:49<22:14:54, 3.42it/s] 26%|██▌ | 97410/371472 [7:44:50<21:46:55, 3.50it/s] 26%|██▌ | 97411/371472 [7:44:50<20:34:32, 3.70it/s] 26%|██▌ | 97412/371472 [7:44:50<21:07:28, 3.60it/s] 26%|██▌ | 97413/371472 [7:44:51<21:04:07, 3.61it/s] 26%|██▌ | 97414/371472 [7:44:51<22:10:42, 3.43it/s] 26%|██▌ | 97415/371472 [7:44:51<21:54:11, 3.48it/s] 26%|██▌ | 97416/371472 [7:44:51<23:03:56, 3.30it/s] 26%|██▌ | 97417/371472 [7:44:52<22:10:53, 3.43it/s] 26%|██▌ | 97418/371472 [7:44:52<21:46:37, 3.50it/s] 26%|██▌ | 97419/371472 [7:44:52<21:36:09, 3.52it/s] 26%|██▌ | 97420/371472 [7:44:53<21:41:36, 3.51it/s] {'loss': 3.6835, 'learning_rate': 7.643291171970955e-07, 'epoch': 4.2} + 26%|██▌ | 97420/371472 [7:44:53<21:41:36, 3.51it/s] 26%|██▌ | 97421/371472 [7:44:53<21:02:07, 3.62it/s] 26%|██▌ | 97422/371472 [7:44:53<20:52:47, 3.65it/s] 26%|██▌ | 97423/371472 [7:44:53<20:08:23, 3.78it/s] 26%|██▌ | 97424/371472 [7:44:54<20:24:10, 3.73it/s] 26%|██▌ | 97425/371472 [7:44:54<19:55:43, 3.82it/s] 26%|██▌ | 97426/371472 [7:44:54<19:39:25, 3.87it/s] 26%|██▌ | 97427/371472 [7:44:54<19:16:31, 3.95it/s] 26%|██▌ | 97428/371472 [7:44:55<19:10:48, 3.97it/s] 26%|██▌ | 97429/371472 [7:44:55<19:56:50, 3.82it/s] 26%|██▌ | 97430/371472 [7:44:55<19:19:54, 3.94it/s] 26%|██▌ | 97431/371472 [7:44:55<19:06:35, 3.98it/s] 26%|██▌ | 97432/371472 [7:44:56<19:34:36, 3.89it/s] 26%|██▌ | 97433/371472 [7:44:56<21:18:03, 3.57it/s] 26%|██▌ | 97434/371472 [7:44:56<21:48:02, 3.49it/s] 26%|██▌ | 97435/371472 [7:44:57<22:05:44, 3.45it/s] 26%|██▌ | 97436/371472 [7:44:57<22:20:08, 3.41it/s] 26%|██▌ | 97437/371472 [7:44:57<23:39:38, 3.22it/s] 26%|██▌ | 97438/371472 [7:44:57<22:11:53, 3.43it/s] 26%|██▌ | 97439/371472 [7:44:58<22:04:15, 3.45it/s] 26%|██▌ | 97440/371472 [7:44:58<21:22:18, 3.56it/s] {'loss': 3.5575, 'learning_rate': 7.642806352216164e-07, 'epoch': 4.2} + 26%|██▌ | 97440/371472 [7:44:58<21:22:18, 3.56it/s] 26%|██▌ | 97441/371472 [7:44:58<21:30:24, 3.54it/s] 26%|██▌ | 97442/371472 [7:44:59<20:50:10, 3.65it/s] 26%|██▌ | 97443/371472 [7:44:59<20:31:31, 3.71it/s] 26%|██▌ | 97444/371472 [7:44:59<20:25:52, 3.73it/s] 26%|██▌ | 97445/371472 [7:44:59<20:32:01, 3.71it/s] 26%|██▌ | 97446/371472 [7:45:00<20:34:51, 3.70it/s] 26%|██▌ | 97447/371472 [7:45:00<20:15:54, 3.76it/s] 26%|██▌ | 97448/371472 [7:45:00<20:12:37, 3.77it/s] 26%|██▌ | 97449/371472 [7:45:00<20:35:43, 3.70it/s] 26%|██▌ | 97450/371472 [7:45:01<21:10:49, 3.59it/s] 26%|██▌ | 97451/371472 [7:45:01<20:53:40, 3.64it/s] 26%|██▌ | 97452/371472 [7:45:01<20:15:52, 3.76it/s] 26%|██▌ | 97453/371472 [7:45:01<19:43:20, 3.86it/s] 26%|██▌ | 97454/371472 [7:45:02<20:16:49, 3.75it/s] 26%|██▌ | 97455/371472 [7:45:02<20:23:08, 3.73it/s] 26%|██▌ | 97456/371472 [7:45:02<20:08:18, 3.78it/s] 26%|██▌ | 97457/371472 [7:45:03<20:39:02, 3.69it/s] 26%|██▌ | 97458/371472 [7:45:03<20:12:52, 3.77it/s] 26%|██▌ | 97459/371472 [7:45:03<20:43:55, 3.67it/s] 26%|██▌ | 97460/371472 [7:45:03<20:40:12, 3.68it/s] {'loss': 3.4224, 'learning_rate': 7.642321532461376e-07, 'epoch': 4.2} + 26%|██▌ | 97460/371472 [7:45:03<20:40:12, 3.68it/s] 26%|██▌ | 97461/371472 [7:45:04<22:12:34, 3.43it/s] 26%|██▌ | 97462/371472 [7:45:04<22:14:47, 3.42it/s] 26%|██▌ | 97463/371472 [7:45:04<22:27:41, 3.39it/s] 26%|██▌ | 97464/371472 [7:45:05<21:43:12, 3.50it/s] 26%|██▌ | 97465/371472 [7:45:05<21:27:06, 3.55it/s] 26%|██▌ | 97466/371472 [7:45:05<20:34:40, 3.70it/s] 26%|██▌ | 97467/371472 [7:45:05<21:54:30, 3.47it/s] 26%|██▌ | 97468/371472 [7:45:06<20:28:04, 3.72it/s] 26%|██▌ | 97469/371472 [7:45:06<20:38:16, 3.69it/s] 26%|██▌ | 97470/371472 [7:45:06<20:49:09, 3.66it/s] 26%|██▌ | 97471/371472 [7:45:06<20:54:40, 3.64it/s] 26%|██▌ | 97472/371472 [7:45:07<20:12:02, 3.77it/s] 26%|██▌ | 97473/371472 [7:45:07<21:10:49, 3.59it/s] 26%|██▌ | 97474/371472 [7:45:07<21:56:05, 3.47it/s] 26%|██▌ | 97475/371472 [7:45:08<20:46:53, 3.66it/s] 26%|██▌ | 97476/371472 [7:45:08<20:26:23, 3.72it/s] 26%|██▌ | 97477/371472 [7:45:08<20:14:16, 3.76it/s] 26%|██▌ | 97478/371472 [7:45:08<21:40:10, 3.51it/s] 26%|██▌ | 97479/371472 [7:45:09<21:12:25, 3.59it/s] 26%|██▌ | 97480/371472 [7:45:09<21:52:20, 3.48it/s] {'loss': 3.6825, 'learning_rate': 7.641836712706587e-07, 'epoch': 4.2} + 26%|██▌ | 97480/371472 [7:45:09<21:52:20, 3.48it/s] 26%|██▌ | 97481/371472 [7:45:09<21:21:23, 3.56it/s] 26%|██▌ | 97482/371472 [7:45:10<20:22:31, 3.74it/s] 26%|██▌ | 97483/371472 [7:45:10<20:18:56, 3.75it/s] 26%|██▌ | 97484/371472 [7:45:10<22:07:57, 3.44it/s] 26%|██▌ | 97485/371472 [7:45:10<21:28:28, 3.54it/s] 26%|██▌ | 97486/371472 [7:45:11<22:03:03, 3.45it/s] 26%|██▌ | 97487/371472 [7:45:11<21:40:39, 3.51it/s] 26%|██▌ | 97488/371472 [7:45:11<21:21:06, 3.56it/s] 26%|██▌ | 97489/371472 [7:45:12<21:11:59, 3.59it/s] 26%|██▌ | 97490/371472 [7:45:12<20:30:07, 3.71it/s] 26%|██▌ | 97491/371472 [7:45:12<21:37:44, 3.52it/s] 26%|██▌ | 97492/371472 [7:45:12<20:59:38, 3.63it/s] 26%|██▌ | 97493/371472 [7:45:13<20:48:05, 3.66it/s] 26%|██▌ | 97494/371472 [7:45:13<23:17:41, 3.27it/s] 26%|██▌ | 97495/371472 [7:45:13<22:50:04, 3.33it/s] 26%|██▌ | 97496/371472 [7:45:14<23:01:19, 3.31it/s] 26%|██▌ | 97497/371472 [7:45:14<23:02:21, 3.30it/s] 26%|██▌ | 97498/371472 [7:45:14<21:38:40, 3.52it/s] 26%|██▌ | 97499/371472 [7:45:14<21:11:03, 3.59it/s] 26%|██▌ | 97500/371472 [7:45:15<20:44:44, 3.67it/s] {'loss': 3.8836, 'learning_rate': 7.641351892951797e-07, 'epoch': 4.2} + 26%|██▌ | 97500/371472 [7:45:15<20:44:44, 3.67it/s] 26%|██▌ | 97501/371472 [7:45:15<20:59:22, 3.63it/s] 26%|██▌ | 97502/371472 [7:45:15<20:52:02, 3.65it/s] 26%|██▌ | 97503/371472 [7:45:15<20:22:27, 3.74it/s] 26%|██▌ | 97504/371472 [7:45:16<20:26:23, 3.72it/s] 26%|██▌ | 97505/371472 [7:45:16<21:44:25, 3.50it/s] 26%|██▌ | 97506/371472 [7:45:16<21:41:40, 3.51it/s] 26%|██▌ | 97507/371472 [7:45:17<22:22:31, 3.40it/s] 26%|██▌ | 97508/371472 [7:45:17<21:54:36, 3.47it/s] 26%|██▌ | 97509/371472 [7:45:17<21:16:23, 3.58it/s] 26%|██▌ | 97510/371472 [7:45:17<20:44:26, 3.67it/s] 26%|██▌ | 97511/371472 [7:45:18<20:26:22, 3.72it/s] 26%|██▋ | 97512/371472 [7:45:18<19:57:09, 3.81it/s] 26%|██▋ | 97513/371472 [7:45:18<20:11:17, 3.77it/s] 26%|██▋ | 97514/371472 [7:45:18<20:22:38, 3.73it/s] 26%|██▋ | 97515/371472 [7:45:19<20:09:40, 3.77it/s] 26%|██▋ | 97516/371472 [7:45:19<20:39:47, 3.68it/s] 26%|██▋ | 97517/371472 [7:45:19<21:12:47, 3.59it/s] 26%|██▋ | 97518/371472 [7:45:20<21:04:13, 3.61it/s] 26%|██▋ | 97519/371472 [7:45:20<21:31:05, 3.54it/s] 26%|██▋ | 97520/371472 [7:45:20<20:50:37, 3.65it/s] {'loss': 3.5121, 'learning_rate': 7.640867073197008e-07, 'epoch': 4.2} + 26%|██▋ | 97520/371472 [7:45:20<20:50:37, 3.65it/s] 26%|██▋ | 97521/371472 [7:45:20<20:14:09, 3.76it/s] 26%|██▋ | 97522/371472 [7:45:21<20:28:35, 3.72it/s] 26%|██▋ | 97523/371472 [7:45:21<20:31:57, 3.71it/s] 26%|██▋ | 97524/371472 [7:45:21<20:06:54, 3.78it/s] 26%|██▋ | 97525/371472 [7:45:21<20:08:04, 3.78it/s] 26%|██▋ | 97526/371472 [7:45:22<20:16:47, 3.75it/s] 26%|██▋ | 97527/371472 [7:45:22<20:20:14, 3.74it/s] 26%|██▋ | 97528/371472 [7:45:22<20:08:04, 3.78it/s] 26%|██▋ | 97529/371472 [7:45:23<20:13:05, 3.76it/s] 26%|██▋ | 97530/371472 [7:45:23<20:21:08, 3.74it/s] 26%|██▋ | 97531/371472 [7:45:23<20:21:17, 3.74it/s] 26%|██▋ | 97532/371472 [7:45:23<19:55:49, 3.82it/s] 26%|██▋ | 97533/371472 [7:45:24<20:18:08, 3.75it/s] 26%|██▋ | 97534/371472 [7:45:24<19:53:37, 3.83it/s] 26%|██▋ | 97535/371472 [7:45:24<19:44:11, 3.86it/s] 26%|██▋ | 97536/371472 [7:45:24<19:34:42, 3.89it/s] 26%|██▋ | 97537/371472 [7:45:25<21:21:18, 3.56it/s] 26%|██▋ | 97538/371472 [7:45:25<21:18:44, 3.57it/s] 26%|██▋ | 97539/371472 [7:45:25<22:33:58, 3.37it/s] 26%|██▋ | 97540/371472 [7:45:26<21:57:53, 3.46it/s] {'loss': 3.4411, 'learning_rate': 7.64038225344222e-07, 'epoch': 4.2} + 26%|██▋ | 97540/371472 [7:45:26<21:57:53, 3.46it/s] 26%|██▋ | 97541/371472 [7:45:26<22:31:43, 3.38it/s] 26%|██▋ | 97542/371472 [7:45:26<21:51:43, 3.48it/s] 26%|██▋ | 97543/371472 [7:45:26<21:24:46, 3.55it/s] 26%|██▋ | 97544/371472 [7:45:27<21:34:17, 3.53it/s] 26%|██▋ | 97545/371472 [7:45:27<21:41:01, 3.51it/s] 26%|██▋ | 97546/371472 [7:45:27<21:27:53, 3.54it/s] 26%|██▋ | 97547/371472 [7:45:28<22:32:21, 3.38it/s] 26%|██▋ | 97548/371472 [7:45:28<21:18:16, 3.57it/s] 26%|██▋ | 97549/371472 [7:45:28<22:02:08, 3.45it/s] 26%|██▋ | 97550/371472 [7:45:28<21:34:20, 3.53it/s] 26%|██▋ | 97551/371472 [7:45:29<21:20:39, 3.56it/s] 26%|██▋ | 97552/371472 [7:45:29<22:03:42, 3.45it/s] 26%|██▋ | 97553/371472 [7:45:29<21:40:16, 3.51it/s] 26%|██▋ | 97554/371472 [7:45:30<21:25:42, 3.55it/s] 26%|██▋ | 97555/371472 [7:45:30<20:38:29, 3.69it/s] 26%|██▋ | 97556/371472 [7:45:30<20:06:52, 3.78it/s] 26%|██▋ | 97557/371472 [7:45:30<20:56:09, 3.63it/s] 26%|██▋ | 97558/371472 [7:45:31<22:08:12, 3.44it/s] 26%|██▋ | 97559/371472 [7:45:31<24:57:17, 3.05it/s] 26%|██▋ | 97560/371472 [7:45:31<23:20:40, 3.26it/s] {'loss': 3.6295, 'learning_rate': 7.639897433687432e-07, 'epoch': 4.2} + 26%|██▋ | 97560/371472 [7:45:31<23:20:40, 3.26it/s] 26%|██▋ | 97561/371472 [7:45:32<22:30:24, 3.38it/s] 26%|██▋ | 97562/371472 [7:45:32<21:49:35, 3.49it/s] 26%|██▋ | 97563/371472 [7:45:32<21:18:23, 3.57it/s] 26%|██▋ | 97564/371472 [7:45:32<21:27:38, 3.55it/s] 26%|██▋ | 97565/371472 [7:45:33<20:46:39, 3.66it/s] 26%|██▋ | 97566/371472 [7:45:33<20:40:02, 3.68it/s] 26%|██▋ | 97567/371472 [7:45:33<20:34:01, 3.70it/s] 26%|██▋ | 97568/371472 [7:45:34<21:50:57, 3.48it/s] 26%|██▋ | 97569/371472 [7:45:34<20:48:11, 3.66it/s] 26%|██▋ | 97570/371472 [7:45:34<20:15:22, 3.76it/s] 26%|██▋ | 97571/371472 [7:45:34<19:22:44, 3.93it/s] 26%|██▋ | 97572/371472 [7:45:35<19:11:04, 3.97it/s] 26%|██▋ | 97573/371472 [7:45:35<20:58:01, 3.63it/s] 26%|██▋ | 97574/371472 [7:45:35<20:53:36, 3.64it/s] 26%|██▋ | 97575/371472 [7:45:35<21:37:07, 3.52it/s] 26%|██▋ | 97576/371472 [7:45:36<20:47:29, 3.66it/s] 26%|██▋ | 97577/371472 [7:45:36<20:14:47, 3.76it/s] 26%|██▋ | 97578/371472 [7:45:36<19:26:20, 3.91it/s] 26%|██▋ | 97579/371472 [7:45:36<20:16:31, 3.75it/s] 26%|██▋ | 97580/371472 [7:45:37<21:08:43, 3.60it/s] {'loss': 3.789, 'learning_rate': 7.639412613932642e-07, 'epoch': 4.2} + 26%|██▋ | 97580/371472 [7:45:37<21:08:43, 3.60it/s] 26%|██▋ | 97581/371472 [7:45:37<21:13:25, 3.58it/s] 26%|██▋ | 97582/371472 [7:45:37<21:52:24, 3.48it/s] 26%|██▋ | 97583/371472 [7:45:38<21:30:47, 3.54it/s] 26%|██▋ | 97584/371472 [7:45:38<20:43:35, 3.67it/s] 26%|██▋ | 97585/371472 [7:45:38<20:08:04, 3.78it/s] 26%|██▋ | 97586/371472 [7:45:38<20:04:00, 3.79it/s] 26%|██▋ | 97587/371472 [7:45:39<21:30:06, 3.54it/s] 26%|██▋ | 97588/371472 [7:45:39<21:08:20, 3.60it/s] 26%|██▋ | 97589/371472 [7:45:39<22:59:32, 3.31it/s] 26%|██▋ | 97590/371472 [7:45:40<22:08:19, 3.44it/s] 26%|██▋ | 97591/371472 [7:45:40<21:45:14, 3.50it/s] 26%|██▋ | 97592/371472 [7:45:40<22:26:22, 3.39it/s] 26%|██▋ | 97593/371472 [7:45:40<21:42:31, 3.50it/s] 26%|██▋ | 97594/371472 [7:45:41<20:35:46, 3.69it/s] 26%|██▋ | 97595/371472 [7:45:41<22:16:22, 3.42it/s] 26%|██▋ | 97596/371472 [7:45:41<21:24:13, 3.55it/s] 26%|██▋ | 97597/371472 [7:45:42<21:53:30, 3.48it/s] 26%|██▋ | 97598/371472 [7:45:42<21:47:33, 3.49it/s] 26%|██▋ | 97599/371472 [7:45:42<21:48:30, 3.49it/s] 26%|██▋ | 97600/371472 [7:45:42<22:41:52, 3.35it/s] {'loss': 3.4085, 'learning_rate': 7.638927794177852e-07, 'epoch': 4.2} + 26%|██▋ | 97600/371472 [7:45:42<22:41:52, 3.35it/s] 26%|██▋ | 97601/371472 [7:45:43<23:43:13, 3.21it/s] 26%|██▋ | 97602/371472 [7:45:43<23:37:55, 3.22it/s] 26%|██▋ | 97603/371472 [7:45:43<23:59:17, 3.17it/s] 26%|██▋ | 97604/371472 [7:45:44<24:22:48, 3.12it/s] 26%|██▋ | 97605/371472 [7:45:44<27:51:47, 2.73it/s] 26%|██▋ | 97606/371472 [7:45:45<26:06:02, 2.91it/s] 26%|██▋ | 97607/371472 [7:45:45<25:25:45, 2.99it/s] 26%|██▋ | 97608/371472 [7:45:45<23:35:13, 3.23it/s] 26%|██▋ | 97609/371472 [7:45:45<24:01:37, 3.17it/s] 26%|██▋ | 97610/371472 [7:45:46<22:58:50, 3.31it/s] 26%|██▋ | 97611/371472 [7:45:46<22:31:17, 3.38it/s] 26%|██▋ | 97612/371472 [7:45:46<21:34:54, 3.52it/s] 26%|██▋ | 97613/371472 [7:45:47<21:26:02, 3.55it/s] 26%|██▋ | 97614/371472 [7:45:47<21:18:35, 3.57it/s] 26%|██▋ | 97615/371472 [7:45:47<23:33:21, 3.23it/s] 26%|██▋ | 97616/371472 [7:45:48<23:56:03, 3.18it/s] 26%|██▋ | 97617/371472 [7:45:48<22:56:54, 3.31it/s] 26%|██▋ | 97618/371472 [7:45:48<22:28:07, 3.39it/s] 26%|██▋ | 97619/371472 [7:45:48<21:39:02, 3.51it/s] 26%|██▋ | 97620/371472 [7:45:49<21:23:53, 3.55it/s] {'loss': 3.5776, 'learning_rate': 7.638442974423064e-07, 'epoch': 4.2} + 26%|██▋ | 97620/371472 [7:45:49<21:23:53, 3.55it/s] 26%|██▋ | 97621/371472 [7:45:49<21:09:51, 3.59it/s] 26%|██▋ | 97622/371472 [7:45:49<20:52:51, 3.64it/s] 26%|██▋ | 97623/371472 [7:45:49<22:30:52, 3.38it/s] 26%|██▋ | 97624/371472 [7:45:50<21:50:38, 3.48it/s] 26%|██▋ | 97625/371472 [7:45:50<21:24:18, 3.55it/s] 26%|██▋ | 97626/371472 [7:45:50<20:43:00, 3.67it/s] 26%|██▋ | 97627/371472 [7:45:51<22:08:47, 3.43it/s] 26%|██▋ | 97628/371472 [7:45:51<20:55:23, 3.64it/s] 26%|██▋ | 97629/371472 [7:45:51<20:22:35, 3.73it/s] 26%|██▋ | 97630/371472 [7:45:51<20:40:49, 3.68it/s] 26%|██▋ | 97631/371472 [7:45:52<20:16:53, 3.75it/s] 26%|██▋ | 97632/371472 [7:45:52<20:49:09, 3.65it/s] 26%|██▋ | 97633/371472 [7:45:52<21:09:18, 3.60it/s] 26%|██▋ | 97634/371472 [7:45:52<21:07:15, 3.60it/s] 26%|██▋ | 97635/371472 [7:45:53<22:49:23, 3.33it/s] 26%|██▋ | 97636/371472 [7:45:53<21:22:08, 3.56it/s] 26%|██▋ | 97637/371472 [7:45:53<21:46:08, 3.49it/s] 26%|██▋ | 97638/371472 [7:45:54<21:46:53, 3.49it/s] 26%|██▋ | 97639/371472 [7:45:54<21:41:56, 3.51it/s] 26%|██▋ | 97640/371472 [7:45:54<21:19:14, 3.57it/s] {'loss': 3.6755, 'learning_rate': 7.637958154668276e-07, 'epoch': 4.21} + 26%|██▋ | 97640/371472 [7:45:54<21:19:14, 3.57it/s] 26%|██▋ | 97641/371472 [7:45:54<21:48:31, 3.49it/s] 26%|██▋ | 97642/371472 [7:45:55<21:07:16, 3.60it/s] 26%|██▋ | 97643/371472 [7:45:55<20:38:12, 3.69it/s] 26%|██▋ | 97644/371472 [7:45:55<21:20:56, 3.56it/s] 26%|██▋ | 97645/371472 [7:45:56<23:08:46, 3.29it/s] 26%|██▋ | 97646/371472 [7:45:56<25:02:43, 3.04it/s] 26%|██▋ | 97647/371472 [7:45:56<22:48:05, 3.34it/s] 26%|██▋ | 97648/371472 [7:45:57<23:35:17, 3.22it/s] 26%|██▋ | 97649/371472 [7:45:57<23:19:12, 3.26it/s] 26%|██▋ | 97650/371472 [7:45:57<23:51:24, 3.19it/s] 26%|██▋ | 97651/371472 [7:45:58<22:45:57, 3.34it/s] 26%|██▋ | 97652/371472 [7:45:58<21:49:20, 3.49it/s] 26%|██▋ | 97653/371472 [7:45:58<21:23:51, 3.55it/s] 26%|██▋ | 97654/371472 [7:45:58<22:22:28, 3.40it/s] 26%|██▋ | 97655/371472 [7:45:59<22:55:54, 3.32it/s] 26%|██▋ | 97656/371472 [7:45:59<25:31:09, 2.98it/s] 26%|██▋ | 97657/371472 [7:45:59<24:12:26, 3.14it/s] 26%|██▋ | 97658/371472 [7:46:00<22:33:31, 3.37it/s] 26%|██▋ | 97659/371472 [7:46:00<21:22:55, 3.56it/s] 26%|██▋ | 97660/371472 [7:46:00<21:50:33, 3.48it/s] {'loss': 3.6116, 'learning_rate': 7.637473334913486e-07, 'epoch': 4.21} + 26%|██▋ | 97660/371472 [7:46:00<21:50:33, 3.48it/s] 26%|██▋ | 97661/371472 [7:46:00<21:58:07, 3.46it/s] 26%|██▋ | 97662/371472 [7:46:01<22:17:00, 3.41it/s] 26%|██▋ | 97663/371472 [7:46:01<21:12:20, 3.59it/s] 26%|██▋ | 97664/371472 [7:46:01<21:15:45, 3.58it/s] 26%|██▋ | 97665/371472 [7:46:02<20:37:43, 3.69it/s] 26%|██▋ | 97666/371472 [7:46:02<20:30:36, 3.71it/s] 26%|██▋ | 97667/371472 [7:46:02<22:39:57, 3.36it/s] 26%|██▋ | 97668/371472 [7:46:03<23:16:41, 3.27it/s] 26%|██▋ | 97669/371472 [7:46:03<24:29:42, 3.10it/s] 26%|██▋ | 97670/371472 [7:46:03<23:00:11, 3.31it/s] 26%|██▋ | 97671/371472 [7:46:03<23:00:04, 3.31it/s] 26%|██▋ | 97672/371472 [7:46:04<22:50:07, 3.33it/s] 26%|██▋ | 97673/371472 [7:46:04<22:12:38, 3.42it/s] 26%|██▋ | 97674/371472 [7:46:04<21:25:06, 3.55it/s] 26%|██▋ | 97675/371472 [7:46:05<21:31:53, 3.53it/s] 26%|██▋ | 97676/371472 [7:46:05<21:05:18, 3.61it/s] 26%|██▋ | 97677/371472 [7:46:05<21:03:27, 3.61it/s] 26%|██▋ | 97678/371472 [7:46:05<20:42:20, 3.67it/s] 26%|██▋ | 97679/371472 [7:46:06<20:18:32, 3.74it/s] 26%|██▋ | 97680/371472 [7:46:06<20:09:03, 3.77it/s] {'loss': 3.608, 'learning_rate': 7.636988515158697e-07, 'epoch': 4.21} + 26%|██▋ | 97680/371472 [7:46:06<20:09:03, 3.77it/s] 26%|██▋ | 97681/371472 [7:46:06<22:12:50, 3.42it/s] 26%|██▋ | 97682/371472 [7:46:06<21:59:14, 3.46it/s] 26%|██▋ | 97683/371472 [7:46:07<21:32:17, 3.53it/s] 26%|██▋ | 97684/371472 [7:46:07<20:55:46, 3.63it/s] 26%|██▋ | 97685/371472 [7:46:07<20:32:08, 3.70it/s] 26%|██▋ | 97686/371472 [7:46:08<19:57:22, 3.81it/s] 26%|██▋ | 97687/371472 [7:46:08<19:40:22, 3.87it/s] 26%|██▋ | 97688/371472 [7:46:08<19:30:02, 3.90it/s] 26%|██▋ | 97689/371472 [7:46:08<19:43:04, 3.86it/s] 26%|██▋ | 97690/371472 [7:46:09<20:46:39, 3.66it/s] 26%|██▋ | 97691/371472 [7:46:09<20:11:09, 3.77it/s] 26%|██▋ | 97692/371472 [7:46:09<19:56:57, 3.81it/s] 26%|██▋ | 97693/371472 [7:46:10<23:45:43, 3.20it/s] 26%|██▋ | 97694/371472 [7:46:10<23:11:45, 3.28it/s] 26%|██▋ | 97695/371472 [7:46:10<22:49:04, 3.33it/s] 26%|██▋ | 97696/371472 [7:46:10<21:53:23, 3.47it/s] 26%|██▋ | 97697/371472 [7:46:11<21:47:25, 3.49it/s] 26%|██▋ | 97698/371472 [7:46:11<23:19:26, 3.26it/s] 26%|██▋ | 97699/371472 [7:46:11<21:58:39, 3.46it/s] 26%|██▋ | 97700/371472 [7:46:12<22:38:31, 3.36it/s] {'loss': 3.5513, 'learning_rate': 7.636503695403909e-07, 'epoch': 4.21} + 26%|██▋ | 97700/371472 [7:46:12<22:38:31, 3.36it/s] 26%|██▋ | 97701/371472 [7:46:12<22:07:58, 3.44it/s] 26%|██▋ | 97702/371472 [7:46:12<21:16:50, 3.57it/s] 26%|██▋ | 97703/371472 [7:46:12<20:21:24, 3.74it/s] 26%|██▋ | 97704/371472 [7:46:13<20:21:46, 3.73it/s] 26%|██▋ | 97705/371472 [7:46:13<21:29:29, 3.54it/s] 26%|██▋ | 97706/371472 [7:46:13<22:23:09, 3.40it/s] 26%|██▋ | 97707/371472 [7:46:14<21:52:48, 3.48it/s] 26%|██▋ | 97708/371472 [7:46:14<23:46:03, 3.20it/s] 26%|██▋ | 97709/371472 [7:46:14<23:35:12, 3.22it/s] 26%|██▋ | 97710/371472 [7:46:15<24:04:31, 3.16it/s] 26%|██▋ | 97711/371472 [7:46:15<23:14:14, 3.27it/s] 26%|██▋ | 97712/371472 [7:46:15<22:51:13, 3.33it/s] 26%|██▋ | 97713/371472 [7:46:15<22:10:43, 3.43it/s] 26%|██▋ | 97714/371472 [7:46:16<21:03:18, 3.61it/s] 26%|██▋ | 97715/371472 [7:46:16<21:14:14, 3.58it/s] 26%|██▋ | 97716/371472 [7:46:16<20:28:34, 3.71it/s] 26%|██▋ | 97717/371472 [7:46:16<21:25:26, 3.55it/s] 26%|██▋ | 97718/371472 [7:46:17<21:22:45, 3.56it/s] 26%|██▋ | 97719/371472 [7:46:17<20:46:58, 3.66it/s] 26%|██▋ | 97720/371472 [7:46:17<21:04:27, 3.61it/s] {'loss': 3.5045, 'learning_rate': 7.636018875649119e-07, 'epoch': 4.21} + 26%|██▋ | 97720/371472 [7:46:17<21:04:27, 3.61it/s] 26%|██▋ | 97721/371472 [7:46:18<20:36:04, 3.69it/s] 26%|██▋ | 97722/371472 [7:46:18<20:46:49, 3.66it/s] 26%|██▋ | 97723/371472 [7:46:18<20:29:32, 3.71it/s] 26%|██▋ | 97724/371472 [7:46:18<21:32:37, 3.53it/s] 26%|██▋ | 97725/371472 [7:46:19<22:58:43, 3.31it/s] 26%|██▋ | 97726/371472 [7:46:19<22:03:13, 3.45it/s] 26%|██▋ | 97727/371472 [7:46:19<21:52:31, 3.48it/s] 26%|██▋ | 97728/371472 [7:46:20<21:37:52, 3.52it/s] 26%|██▋ | 97729/371472 [7:46:20<22:52:02, 3.33it/s] 26%|██▋ | 97730/371472 [7:46:20<22:17:51, 3.41it/s] 26%|██▋ | 97731/371472 [7:46:20<22:41:51, 3.35it/s] 26%|██▋ | 97732/371472 [7:46:21<21:47:44, 3.49it/s] 26%|██▋ | 97733/371472 [7:46:21<21:58:57, 3.46it/s] 26%|██▋ | 97734/371472 [7:46:21<21:43:16, 3.50it/s] 26%|██▋ | 97735/371472 [7:46:22<20:48:42, 3.65it/s] 26%|██▋ | 97736/371472 [7:46:22<20:57:51, 3.63it/s] 26%|██▋ | 97737/371472 [7:46:22<20:32:54, 3.70it/s] 26%|██▋ | 97738/371472 [7:46:22<19:59:01, 3.80it/s] 26%|██▋ | 97739/371472 [7:46:23<19:40:23, 3.86it/s] 26%|██▋ | 97740/371472 [7:46:23<20:19:18, 3.74it/s] {'loss': 3.487, 'learning_rate': 7.63553405589433e-07, 'epoch': 4.21} + 26%|██▋ | 97740/371472 [7:46:23<20:19:18, 3.74it/s] 26%|██▋ | 97741/371472 [7:46:23<20:26:22, 3.72it/s] 26%|██▋ | 97742/371472 [7:46:23<20:37:21, 3.69it/s] 26%|██▋ | 97743/371472 [7:46:24<22:43:35, 3.35it/s] 26%|██▋ | 97744/371472 [7:46:24<22:29:10, 3.38it/s] 26%|██▋ | 97745/371472 [7:46:24<21:14:33, 3.58it/s] 26%|██▋ | 97746/371472 [7:46:25<21:26:58, 3.54it/s] 26%|██▋ | 97747/371472 [7:46:25<20:46:36, 3.66it/s] 26%|██▋ | 97748/371472 [7:46:25<20:12:16, 3.76it/s] 26%|██▋ | 97749/371472 [7:46:25<20:25:16, 3.72it/s] 26%|██▋ | 97750/371472 [7:46:26<22:34:43, 3.37it/s] 26%|██▋ | 97751/371472 [7:46:26<20:57:31, 3.63it/s] 26%|██▋ | 97752/371472 [7:46:26<20:21:48, 3.73it/s] 26%|██▋ | 97753/371472 [7:46:26<19:38:28, 3.87it/s] 26%|██▋ | 97754/371472 [7:46:27<19:19:26, 3.93it/s] 26%|██▋ | 97755/371472 [7:46:27<19:11:47, 3.96it/s] 26%|██▋ | 97756/371472 [7:46:27<19:22:48, 3.92it/s] 26%|██▋ | 97757/371472 [7:46:27<20:11:27, 3.77it/s] 26%|██▋ | 97758/371472 [7:46:28<20:42:14, 3.67it/s] 26%|██▋ | 97759/371472 [7:46:28<23:01:34, 3.30it/s] 26%|██▋ | 97760/371472 [7:46:28<22:32:15, 3.37it/s] {'loss': 3.6189, 'learning_rate': 7.635049236139541e-07, 'epoch': 4.21} + 26%|██▋ | 97760/371472 [7:46:28<22:32:15, 3.37it/s] 26%|██▋ | 97761/371472 [7:46:29<21:47:29, 3.49it/s] 26%|██▋ | 97762/371472 [7:46:29<20:49:57, 3.65it/s] 26%|██▋ | 97763/371472 [7:46:29<21:26:47, 3.55it/s] 26%|██▋ | 97764/371472 [7:46:30<22:23:33, 3.40it/s] 26%|██▋ | 97765/371472 [7:46:30<21:38:05, 3.51it/s] 26%|██▋ | 97766/371472 [7:46:30<21:12:43, 3.58it/s] 26%|██▋ | 97767/371472 [7:46:30<20:35:23, 3.69it/s] 26%|██▋ | 97768/371472 [7:46:31<20:42:43, 3.67it/s] 26%|██▋ | 97769/371472 [7:46:31<21:15:37, 3.58it/s] 26%|██▋ | 97770/371472 [7:46:31<20:52:00, 3.64it/s] 26%|██▋ | 97771/371472 [7:46:32<22:34:31, 3.37it/s] 26%|██▋ | 97772/371472 [7:46:32<21:37:19, 3.52it/s] 26%|██▋ | 97773/371472 [7:46:32<24:06:33, 3.15it/s] 26%|██▋ | 97774/371472 [7:46:32<22:43:10, 3.35it/s] 26%|██▋ | 97775/371472 [7:46:33<23:42:57, 3.21it/s] 26%|██▋ | 97776/371472 [7:46:33<24:56:28, 3.05it/s] 26%|██▋ | 97777/371472 [7:46:33<24:02:02, 3.16it/s] 26%|██▋ | 97778/371472 [7:46:34<22:28:21, 3.38it/s] 26%|██▋ | 97779/371472 [7:46:34<21:25:32, 3.55it/s] 26%|██▋ | 97780/371472 [7:46:34<20:48:58, 3.65it/s] {'loss': 3.6587, 'learning_rate': 7.634564416384753e-07, 'epoch': 4.21} + 26%|██▋ | 97780/371472 [7:46:34<20:48:58, 3.65it/s] 26%|██▋ | 97781/371472 [7:46:34<20:36:47, 3.69it/s] 26%|██▋ | 97782/371472 [7:46:35<22:30:16, 3.38it/s] 26%|██▋ | 97783/371472 [7:46:35<21:42:06, 3.50it/s] 26%|██▋ | 97784/371472 [7:46:35<21:07:17, 3.60it/s] 26%|██▋ | 97785/371472 [7:46:36<22:03:40, 3.45it/s] 26%|██▋ | 97786/371472 [7:46:36<22:28:54, 3.38it/s] 26%|██▋ | 97787/371472 [7:46:36<21:38:47, 3.51it/s] 26%|██▋ | 97788/371472 [7:46:37<22:04:26, 3.44it/s] 26%|██▋ | 97789/371472 [7:46:37<22:43:26, 3.35it/s] 26%|██▋ | 97790/371472 [7:46:37<22:40:44, 3.35it/s] 26%|██▋ | 97791/371472 [7:46:37<22:13:18, 3.42it/s] 26%|██▋ | 97792/371472 [7:46:38<22:25:53, 3.39it/s] 26%|██▋ | 97793/371472 [7:46:38<23:21:41, 3.25it/s] 26%|██▋ | 97794/371472 [7:46:38<22:44:28, 3.34it/s] 26%|██▋ | 97795/371472 [7:46:39<21:53:14, 3.47it/s] 26%|██▋ | 97796/371472 [7:46:39<21:12:32, 3.58it/s] 26%|██▋ | 97797/371472 [7:46:39<20:43:04, 3.67it/s] 26%|██▋ | 97798/371472 [7:46:39<20:49:50, 3.65it/s] 26%|██▋ | 97799/371472 [7:46:40<20:29:11, 3.71it/s] 26%|██▋ | 97800/371472 [7:46:40<20:39:40, 3.68it/s] {'loss': 3.5741, 'learning_rate': 7.634079596629964e-07, 'epoch': 4.21} + 26%|██▋ | 97800/371472 [7:46:40<20:39:40, 3.68it/s] 26%|██▋ | 97801/371472 [7:46:40<19:49:08, 3.84it/s] 26%|██▋ | 97802/371472 [7:46:40<19:53:01, 3.82it/s] 26%|██▋ | 97803/371472 [7:46:41<19:26:06, 3.91it/s] 26%|██▋ | 97804/371472 [7:46:41<19:28:39, 3.90it/s] 26%|██▋ | 97805/371472 [7:46:41<19:07:54, 3.97it/s] 26%|██▋ | 97806/371472 [7:46:41<19:32:33, 3.89it/s] 26%|██▋ | 97807/371472 [7:46:42<21:25:19, 3.55it/s] 26%|██▋ | 97808/371472 [7:46:42<21:17:29, 3.57it/s] 26%|██▋ | 97809/371472 [7:46:42<21:11:02, 3.59it/s] 26%|██▋ | 97810/371472 [7:46:43<20:25:10, 3.72it/s] 26%|██▋ | 97811/371472 [7:46:43<20:26:31, 3.72it/s] 26%|██▋ | 97812/371472 [7:46:43<21:07:45, 3.60it/s] 26%|██▋ | 97813/371472 [7:46:43<21:16:39, 3.57it/s] 26%|██▋ | 97814/371472 [7:46:44<20:12:48, 3.76it/s] 26%|██▋ | 97815/371472 [7:46:44<19:32:40, 3.89it/s] 26%|██▋ | 97816/371472 [7:46:44<20:39:04, 3.68it/s] 26%|██▋ | 97817/371472 [7:46:44<21:08:41, 3.59it/s] 26%|██▋ | 97818/371472 [7:46:45<21:07:36, 3.60it/s] 26%|██▋ | 97819/371472 [7:46:45<20:28:33, 3.71it/s] 26%|██▋ | 97820/371472 [7:46:45<20:48:39, 3.65it/s] {'loss': 3.8757, 'learning_rate': 7.633594776875175e-07, 'epoch': 4.21} + 26%|██▋ | 97820/371472 [7:46:45<20:48:39, 3.65it/s] 26%|██▋ | 97821/371472 [7:46:46<20:30:41, 3.71it/s] 26%|██▋ | 97822/371472 [7:46:46<20:33:10, 3.70it/s] 26%|██▋ | 97823/371472 [7:46:46<20:31:37, 3.70it/s] 26%|██▋ | 97824/371472 [7:46:46<20:24:35, 3.72it/s] 26%|██▋ | 97825/371472 [7:46:47<20:42:06, 3.67it/s] 26%|██▋ | 97826/371472 [7:46:47<20:39:16, 3.68it/s] 26%|██▋ | 97827/371472 [7:46:47<20:12:24, 3.76it/s] 26%|██▋ | 97828/371472 [7:46:47<20:18:37, 3.74it/s] 26%|██▋ | 97829/371472 [7:46:48<19:37:56, 3.87it/s] 26%|██▋ | 97830/371472 [7:46:48<19:33:46, 3.89it/s] 26%|██▋ | 97831/371472 [7:46:48<19:41:50, 3.86it/s] 26%|██▋ | 97832/371472 [7:46:48<20:18:35, 3.74it/s] 26%|██▋ | 97833/371472 [7:46:49<20:08:38, 3.77it/s] 26%|██▋ | 97834/371472 [7:46:49<19:41:34, 3.86it/s] 26%|██▋ | 97835/371472 [7:46:49<20:02:51, 3.79it/s] 26%|██▋ | 97836/371472 [7:46:49<19:29:45, 3.90it/s] 26%|██▋ | 97837/371472 [7:46:50<19:18:50, 3.94it/s] 26%|██▋ | 97838/371472 [7:46:50<19:33:59, 3.88it/s] 26%|██▋ | 97839/371472 [7:46:50<24:28:24, 3.11it/s] 26%|██▋ | 97840/371472 [7:46:51<24:54:26, 3.05it/s] {'loss': 3.4607, 'learning_rate': 7.633109957120386e-07, 'epoch': 4.21} + 26%|██▋ | 97840/371472 [7:46:51<24:54:26, 3.05it/s] 26%|██▋ | 97841/371472 [7:46:51<24:26:29, 3.11it/s] 26%|██▋ | 97842/371472 [7:46:51<23:06:44, 3.29it/s] 26%|██▋ | 97843/371472 [7:46:52<22:20:16, 3.40it/s] 26%|██▋ | 97844/371472 [7:46:52<22:35:46, 3.36it/s] 26%|██▋ | 97845/371472 [7:46:52<21:53:57, 3.47it/s] 26%|██▋ | 97846/371472 [7:46:52<20:45:34, 3.66it/s] 26%|██▋ | 97847/371472 [7:46:53<20:06:17, 3.78it/s] 26%|██▋ | 97848/371472 [7:46:53<22:45:11, 3.34it/s] 26%|██▋ | 97849/371472 [7:46:53<22:24:17, 3.39it/s] 26%|██▋ | 97850/371472 [7:46:54<21:47:24, 3.49it/s] 26%|██▋ | 97851/371472 [7:46:54<21:58:06, 3.46it/s] 26%|██▋ | 97852/371472 [7:46:54<21:19:30, 3.56it/s] 26%|██▋ | 97853/371472 [7:46:54<20:32:01, 3.70it/s] 26%|██▋ | 97854/371472 [7:46:55<20:22:23, 3.73it/s] 26%|██▋ | 97855/371472 [7:46:55<21:59:00, 3.46it/s] 26%|██▋ | 97856/371472 [7:46:55<21:34:29, 3.52it/s] 26%|██▋ | 97857/371472 [7:46:56<20:39:59, 3.68it/s] 26%|██▋ | 97858/371472 [7:46:56<20:31:22, 3.70it/s] 26%|██▋ | 97859/371472 [7:46:56<21:57:46, 3.46it/s] 26%|██▋ | 97860/371472 [7:46:56<21:32:51, 3.53it/s] {'loss': 3.6519, 'learning_rate': 7.632625137365597e-07, 'epoch': 4.22} + 26%|██▋ | 97860/371472 [7:46:56<21:32:51, 3.53it/s] 26%|██▋ | 97861/371472 [7:46:57<21:57:34, 3.46it/s] 26%|██▋ | 97862/371472 [7:46:57<21:11:02, 3.59it/s] 26%|██▋ | 97863/371472 [7:46:57<20:29:15, 3.71it/s] 26%|██▋ | 97864/371472 [7:46:58<20:26:36, 3.72it/s] 26%|██▋ | 97865/371472 [7:46:58<19:36:32, 3.88it/s] 26%|██▋ | 97866/371472 [7:46:58<19:04:54, 3.98it/s] 26%|██▋ | 97867/371472 [7:46:58<20:29:22, 3.71it/s] 26%|██▋ | 97868/371472 [7:46:59<20:32:29, 3.70it/s] 26%|██▋ | 97869/371472 [7:46:59<20:59:52, 3.62it/s] 26%|██▋ | 97870/371472 [7:46:59<21:56:40, 3.46it/s] 26%|██▋ | 97871/371472 [7:46:59<21:40:42, 3.51it/s] 26%|██▋ | 97872/371472 [7:47:00<22:00:50, 3.45it/s] 26%|██▋ | 97873/371472 [7:47:00<21:37:36, 3.51it/s] 26%|██▋ | 97874/371472 [7:47:00<20:45:47, 3.66it/s] 26%|██▋ | 97875/371472 [7:47:01<20:27:46, 3.71it/s] 26%|██▋ | 97876/371472 [7:47:01<21:16:33, 3.57it/s] 26%|██▋ | 97877/371472 [7:47:01<21:01:25, 3.61it/s] 26%|██▋ | 97878/371472 [7:47:01<21:15:31, 3.57it/s] 26%|██▋ | 97879/371472 [7:47:02<22:03:54, 3.44it/s] 26%|██▋ | 97880/371472 [7:47:02<21:00:21, 3.62it/s] {'loss': 3.6615, 'learning_rate': 7.632140317610808e-07, 'epoch': 4.22} + 26%|██▋ | 97880/371472 [7:47:02<21:00:21, 3.62it/s] 26%|██▋ | 97881/371472 [7:47:02<21:29:50, 3.54it/s] 26%|██▋ | 97882/371472 [7:47:02<20:52:03, 3.64it/s] 26%|██▋ | 97883/371472 [7:47:03<20:42:12, 3.67it/s] 26%|██▋ | 97884/371472 [7:47:03<21:29:30, 3.54it/s] 26%|██▋ | 97885/371472 [7:47:03<21:06:58, 3.60it/s] 26%|██▋ | 97886/371472 [7:47:04<20:24:13, 3.72it/s] 26%|██▋ | 97887/371472 [7:47:04<21:30:16, 3.53it/s] 26%|██▋ | 97888/371472 [7:47:04<20:30:39, 3.71it/s] 26%|██▋ | 97889/371472 [7:47:04<20:06:34, 3.78it/s] 26%|██▋ | 97890/371472 [7:47:05<20:00:31, 3.80it/s] 26%|██▋ | 97891/371472 [7:47:05<21:31:03, 3.53it/s] 26%|██▋ | 97892/371472 [7:47:05<21:46:40, 3.49it/s] 26%|██▋ | 97893/371472 [7:47:06<21:24:51, 3.55it/s] 26%|██▋ | 97894/371472 [7:47:06<20:59:10, 3.62it/s] 26%|██▋ | 97895/371472 [7:47:06<23:29:37, 3.23it/s] 26%|██▋ | 97896/371472 [7:47:07<24:46:06, 3.07it/s] 26%|██▋ | 97897/371472 [7:47:07<24:09:19, 3.15it/s] 26%|██▋ | 97898/371472 [7:47:07<23:06:05, 3.29it/s] 26%|██▋ | 97899/371472 [7:47:07<22:45:48, 3.34it/s] 26%|██▋ | 97900/371472 [7:47:08<22:45:05, 3.34it/s] {'loss': 3.6786, 'learning_rate': 7.631655497856019e-07, 'epoch': 4.22} + 26%|██▋ | 97900/371472 [7:47:08<22:45:05, 3.34it/s] 26%|██▋ | 97901/371472 [7:47:08<21:45:31, 3.49it/s] 26%|██▋ | 97902/371472 [7:47:08<21:45:00, 3.49it/s] 26%|██▋ | 97903/371472 [7:47:09<21:42:52, 3.50it/s] 26%|██▋ | 97904/371472 [7:47:09<22:15:16, 3.41it/s] 26%|██▋ | 97905/371472 [7:47:09<23:22:56, 3.25it/s] 26%|██▋ | 97906/371472 [7:47:09<22:07:36, 3.43it/s] 26%|██▋ | 97907/371472 [7:47:10<21:09:51, 3.59it/s] 26%|██▋ | 97908/371472 [7:47:10<20:36:52, 3.69it/s] 26%|██▋ | 97909/371472 [7:47:10<22:53:44, 3.32it/s] 26%|██▋ | 97910/371472 [7:47:11<22:14:39, 3.42it/s] 26%|██▋ | 97911/371472 [7:47:11<21:25:05, 3.55it/s] 26%|██▋ | 97912/371472 [7:47:11<20:34:34, 3.69it/s] 26%|██▋ | 97913/371472 [7:47:11<20:57:40, 3.63it/s] 26%|██▋ | 97914/371472 [7:47:12<20:52:00, 3.64it/s] 26%|██▋ | 97915/371472 [7:47:12<20:28:40, 3.71it/s] 26%|██▋ | 97916/371472 [7:47:12<22:05:15, 3.44it/s] 26%|██▋ | 97917/371472 [7:47:13<21:46:23, 3.49it/s] 26%|██▋ | 97918/371472 [7:47:13<22:54:14, 3.32it/s] 26%|██▋ | 97919/371472 [7:47:13<25:03:27, 3.03it/s] 26%|██▋ | 97920/371472 [7:47:14<23:52:03, 3.18it/s] {'loss': 3.7063, 'learning_rate': 7.63117067810123e-07, 'epoch': 4.22} + 26%|██▋ | 97920/371472 [7:47:14<23:52:03, 3.18it/s] 26%|██▋ | 97921/371472 [7:47:14<22:49:26, 3.33it/s] 26%|██▋ | 97922/371472 [7:47:14<23:46:15, 3.20it/s] 26%|██▋ | 97923/371472 [7:47:14<22:05:45, 3.44it/s] 26%|██▋ | 97924/371472 [7:47:15<22:06:26, 3.44it/s] 26%|██▋ | 97925/371472 [7:47:15<21:30:20, 3.53it/s] 26%|██▋ | 97926/371472 [7:47:15<21:40:05, 3.51it/s] 26%|██▋ | 97927/371472 [7:47:16<21:28:23, 3.54it/s] 26%|██▋ | 97928/371472 [7:47:16<22:00:37, 3.45it/s] 26%|██▋ | 97929/371472 [7:47:16<21:15:32, 3.57it/s] 26%|██▋ | 97930/371472 [7:47:16<21:00:33, 3.62it/s] 26%|██▋ | 97931/371472 [7:47:17<21:12:14, 3.58it/s] 26%|██▋ | 97932/371472 [7:47:17<22:23:14, 3.39it/s] 26%|██▋ | 97933/371472 [7:47:17<24:16:43, 3.13it/s] 26%|██▋ | 97934/371472 [7:47:18<23:05:44, 3.29it/s] 26%|██▋ | 97935/371472 [7:47:18<22:29:01, 3.38it/s] 26%|██▋ | 97936/371472 [7:47:18<21:57:36, 3.46it/s] 26%|██▋ | 97937/371472 [7:47:19<23:46:59, 3.19it/s] 26%|██▋ | 97938/371472 [7:47:19<23:09:36, 3.28it/s] 26%|██▋ | 97939/371472 [7:47:19<23:11:28, 3.28it/s] 26%|██▋ | 97940/371472 [7:47:19<22:36:40, 3.36it/s] {'loss': 3.4403, 'learning_rate': 7.630685858346442e-07, 'epoch': 4.22} + 26%|██▋ | 97940/371472 [7:47:19<22:36:40, 3.36it/s] 26%|██▋ | 97941/371472 [7:47:20<21:32:16, 3.53it/s] 26%|██▋ | 97942/371472 [7:47:20<21:04:18, 3.61it/s] 26%|██▋ | 97943/371472 [7:47:20<20:22:54, 3.73it/s] 26%|██▋ | 97944/371472 [7:47:20<21:31:49, 3.53it/s] 26%|██▋ | 97945/371472 [7:47:21<20:55:09, 3.63it/s] 26%|██▋ | 97946/371472 [7:47:21<20:44:42, 3.66it/s] 26%|██▋ | 97947/371472 [7:47:21<22:21:47, 3.40it/s] 26%|██▋ | 97948/371472 [7:47:22<21:09:52, 3.59it/s] 26%|██▋ | 97949/371472 [7:47:22<21:05:23, 3.60it/s] 26%|██▋ | 97950/371472 [7:47:22<21:13:09, 3.58it/s] 26%|██▋ | 97951/371472 [7:47:22<21:25:12, 3.55it/s] 26%|██▋ | 97952/371472 [7:47:23<21:44:23, 3.49it/s] 26%|██▋ | 97953/371472 [7:47:23<21:37:30, 3.51it/s] 26%|██▋ | 97954/371472 [7:47:23<21:04:20, 3.61it/s] 26%|██▋ | 97955/371472 [7:47:24<23:11:55, 3.28it/s] 26%|██▋ | 97956/371472 [7:47:24<23:25:19, 3.24it/s] 26%|██▋ | 97957/371472 [7:47:24<22:22:53, 3.39it/s] 26%|██▋ | 97958/371472 [7:47:24<21:29:52, 3.53it/s] 26%|██▋ | 97959/371472 [7:47:25<21:02:34, 3.61it/s] 26%|██▋ | 97960/371472 [7:47:25<20:34:17, 3.69it/s] {'loss': 3.5957, 'learning_rate': 7.630201038591653e-07, 'epoch': 4.22} + 26%|██▋ | 97960/371472 [7:47:25<20:34:17, 3.69it/s] 26%|██▋ | 97961/371472 [7:47:25<20:56:53, 3.63it/s] 26%|██▋ | 97962/371472 [7:47:26<22:51:59, 3.32it/s] 26%|██▋ | 97963/371472 [7:47:26<22:34:43, 3.36it/s] 26%|██▋ | 97964/371472 [7:47:26<22:19:07, 3.40it/s] 26%|██▋ | 97965/371472 [7:47:26<21:10:33, 3.59it/s] 26%|██▋ | 97966/371472 [7:47:27<20:43:14, 3.67it/s] 26%|██▋ | 97967/371472 [7:47:27<20:14:31, 3.75it/s] 26%|██▋ | 97968/371472 [7:47:27<20:41:27, 3.67it/s] 26%|██▋ | 97969/371472 [7:47:28<20:38:00, 3.68it/s] 26%|██▋ | 97970/371472 [7:47:28<20:28:19, 3.71it/s] 26%|██▋ | 97971/371472 [7:47:28<21:31:55, 3.53it/s] 26%|██▋ | 97972/371472 [7:47:28<21:57:20, 3.46it/s] 26%|██▋ | 97973/371472 [7:47:29<21:46:31, 3.49it/s] 26%|██▋ | 97974/371472 [7:47:29<21:11:01, 3.59it/s] 26%|██▋ | 97975/371472 [7:47:29<21:06:52, 3.60it/s] 26%|██▋ | 97976/371472 [7:47:30<21:26:46, 3.54it/s] 26%|██▋ | 97977/371472 [7:47:30<20:52:48, 3.64it/s] 26%|██▋ | 97978/371472 [7:47:30<20:52:53, 3.64it/s] 26%|██▋ | 97979/371472 [7:47:30<20:54:07, 3.63it/s] 26%|██▋ | 97980/371472 [7:47:31<21:40:11, 3.51it/s] {'loss': 3.5053, 'learning_rate': 7.629716218836862e-07, 'epoch': 4.22} + 26%|██▋ | 97980/371472 [7:47:31<21:40:11, 3.51it/s] 26%|██▋ | 97981/371472 [7:47:31<22:15:15, 3.41it/s] 26%|██▋ | 97982/371472 [7:47:31<21:01:49, 3.61it/s] 26%|██▋ | 97983/371472 [7:47:31<21:46:34, 3.49it/s] 26%|██▋ | 97984/371472 [7:47:32<22:32:41, 3.37it/s] 26%|██▋ | 97985/371472 [7:47:32<21:16:49, 3.57it/s] 26%|██▋ | 97986/371472 [7:47:32<20:26:28, 3.72it/s] 26%|██▋ | 97987/371472 [7:47:33<22:40:45, 3.35it/s] 26%|██▋ | 97988/371472 [7:47:33<22:22:08, 3.40it/s] 26%|██▋ | 97989/371472 [7:47:33<21:10:50, 3.59it/s] 26%|██▋ | 97990/371472 [7:47:33<20:43:26, 3.67it/s] 26%|██▋ | 97991/371472 [7:47:34<22:02:21, 3.45it/s] 26%|██▋ | 97992/371472 [7:47:34<22:30:06, 3.38it/s] 26%|██▋ | 97993/371472 [7:47:34<23:58:51, 3.17it/s] 26%|██▋ | 97994/371472 [7:47:35<22:37:29, 3.36it/s] 26%|██▋ | 97995/371472 [7:47:35<22:31:11, 3.37it/s] 26%|██▋ | 97996/371472 [7:47:35<24:15:37, 3.13it/s] 26%|██▋ | 97997/371472 [7:47:36<22:59:25, 3.30it/s] 26%|██▋ | 97998/371472 [7:47:36<22:02:45, 3.45it/s] 26%|██▋ | 97999/371472 [7:47:36<22:06:48, 3.44it/s] 26%|██▋ | 98000/371472 [7:47:37<22:48:44, 3.33it/s] {'loss': 3.3752, 'learning_rate': 7.629231399082074e-07, 'epoch': 4.22} + 26%|██▋ | 98000/371472 [7:47:37<22:48:44, 3.33it/s] 26%|██▋ | 98001/371472 [7:47:37<22:36:51, 3.36it/s] 26%|██▋ | 98002/371472 [7:47:37<23:08:38, 3.28it/s] 26%|██▋ | 98003/371472 [7:47:37<21:43:35, 3.50it/s] 26%|██▋ | 98004/371472 [7:47:38<20:52:01, 3.64it/s] 26%|██▋ | 98005/371472 [7:47:38<20:47:15, 3.65it/s] 26%|██▋ | 98006/371472 [7:47:38<22:51:16, 3.32it/s] 26%|██▋ | 98007/371472 [7:47:39<22:28:59, 3.38it/s] 26%|██▋ | 98008/371472 [7:47:39<22:03:54, 3.44it/s] 26%|██▋ | 98009/371472 [7:47:39<21:20:19, 3.56it/s] 26%|██▋ | 98010/371472 [7:47:39<23:15:00, 3.27it/s] 26%|██▋ | 98011/371472 [7:47:40<22:19:20, 3.40it/s] 26%|██▋ | 98012/371472 [7:47:40<21:33:28, 3.52it/s] 26%|██▋ | 98013/371472 [7:47:40<20:43:14, 3.67it/s] 26%|██▋ | 98014/371472 [7:47:41<21:23:58, 3.55it/s] 26%|██▋ | 98015/371472 [7:47:41<21:21:46, 3.56it/s] 26%|██▋ | 98016/371472 [7:47:41<21:09:55, 3.59it/s] 26%|██▋ | 98017/371472 [7:47:41<24:28:09, 3.10it/s] 26%|██▋ | 98018/371472 [7:47:42<24:11:05, 3.14it/s] 26%|██▋ | 98019/371472 [7:47:42<23:32:39, 3.23it/s] 26%|██▋ | 98020/371472 [7:47:42<23:58:08, 3.17it/s] {'loss': 3.8487, 'learning_rate': 7.628746579327285e-07, 'epoch': 4.22} + 26%|██▋ | 98020/371472 [7:47:42<23:58:08, 3.17it/s] 26%|██▋ | 98021/371472 [7:47:43<23:29:54, 3.23it/s] 26%|██▋ | 98022/371472 [7:47:43<22:56:07, 3.31it/s] 26%|██▋ | 98023/371472 [7:47:43<23:01:23, 3.30it/s] 26%|██▋ | 98024/371472 [7:47:44<22:03:39, 3.44it/s] 26%|██▋ | 98025/371472 [7:47:44<21:38:39, 3.51it/s] 26%|██▋ | 98026/371472 [7:47:44<22:05:28, 3.44it/s] 26%|██▋ | 98027/371472 [7:47:44<22:30:58, 3.37it/s] 26%|██▋ | 98028/371472 [7:47:45<22:16:36, 3.41it/s] 26%|██▋ | 98029/371472 [7:47:45<22:08:52, 3.43it/s] 26%|██▋ | 98030/371472 [7:47:45<21:43:38, 3.50it/s] 26%|██▋ | 98031/371472 [7:47:46<21:39:34, 3.51it/s] 26%|██▋ | 98032/371472 [7:47:46<21:03:34, 3.61it/s] 26%|██▋ | 98033/371472 [7:47:46<20:54:11, 3.63it/s] 26%|██▋ | 98034/371472 [7:47:46<20:16:43, 3.75it/s] 26%|██▋ | 98035/371472 [7:47:47<20:12:34, 3.76it/s] 26%|██▋ | 98036/371472 [7:47:47<20:41:02, 3.67it/s] 26%|██▋ | 98037/371472 [7:47:47<21:08:04, 3.59it/s] 26%|██▋ | 98038/371472 [7:47:47<21:01:38, 3.61it/s] 26%|██▋ | 98039/371472 [7:47:48<20:59:25, 3.62it/s] 26%|██▋ | 98040/371472 [7:47:48<21:01:22, 3.61it/s] {'loss': 3.5937, 'learning_rate': 7.628261759572496e-07, 'epoch': 4.22} + 26%|██▋ | 98040/371472 [7:47:48<21:01:22, 3.61it/s] 26%|██▋ | 98041/371472 [7:47:48<20:56:41, 3.63it/s] 26%|██▋ | 98042/371472 [7:47:49<20:08:20, 3.77it/s] 26%|██▋ | 98043/371472 [7:47:49<20:48:35, 3.65it/s] 26%|██▋ | 98044/371472 [7:47:49<21:04:13, 3.60it/s] 26%|██▋ | 98045/371472 [7:47:49<20:43:21, 3.67it/s] 26%|██▋ | 98046/371472 [7:47:50<20:38:32, 3.68it/s] 26%|██▋ | 98047/371472 [7:47:50<22:00:58, 3.45it/s] 26%|██▋ | 98048/371472 [7:47:50<21:37:09, 3.51it/s] 26%|██▋ | 98049/371472 [7:47:51<21:39:48, 3.51it/s] 26%|██▋ | 98050/371472 [7:47:51<20:48:28, 3.65it/s] 26%|██▋ | 98051/371472 [7:47:51<20:49:16, 3.65it/s] 26%|██▋ | 98052/371472 [7:47:51<21:22:20, 3.55it/s] 26%|██▋ | 98053/371472 [7:47:52<22:31:21, 3.37it/s] 26%|██▋ | 98054/371472 [7:47:52<23:35:20, 3.22it/s] 26%|██▋ | 98055/371472 [7:47:52<22:12:51, 3.42it/s] 26%|██▋ | 98056/371472 [7:47:53<21:09:19, 3.59it/s] 26%|██▋ | 98057/371472 [7:47:53<20:29:36, 3.71it/s] 26%|██▋ | 98058/371472 [7:47:53<20:58:39, 3.62it/s] 26%|██▋ | 98059/371472 [7:47:53<20:48:09, 3.65it/s] 26%|██▋ | 98060/371472 [7:47:54<20:51:35, 3.64it/s] {'loss': 3.8835, 'learning_rate': 7.627776939817707e-07, 'epoch': 4.22} + 26%|██▋ | 98060/371472 [7:47:54<20:51:35, 3.64it/s] 26%|██▋ | 98061/371472 [7:47:54<22:00:20, 3.45it/s] 26%|██▋ | 98062/371472 [7:47:54<21:49:18, 3.48it/s] 26%|██▋ | 98063/371472 [7:47:55<21:43:10, 3.50it/s] 26%|██▋ | 98064/371472 [7:47:55<21:01:09, 3.61it/s] 26%|██▋ | 98065/371472 [7:47:55<20:41:44, 3.67it/s] 26%|██▋ | 98066/371472 [7:47:55<20:32:38, 3.70it/s] 26%|██▋ | 98067/371472 [7:47:56<20:28:53, 3.71it/s] 26%|██▋ | 98068/371472 [7:47:56<21:01:20, 3.61it/s] 26%|██▋ | 98069/371472 [7:47:56<21:33:29, 3.52it/s] 26%|██▋ | 98070/371472 [7:47:56<21:11:23, 3.58it/s] 26%|██▋ | 98071/371472 [7:47:57<21:50:37, 3.48it/s] 26%|██▋ | 98072/371472 [7:47:57<21:56:16, 3.46it/s] 26%|██▋ | 98073/371472 [7:47:57<21:24:13, 3.55it/s] 26%|██▋ | 98074/371472 [7:47:58<24:09:30, 3.14it/s] 26%|██▋ | 98075/371472 [7:47:58<23:30:05, 3.23it/s] 26%|██▋ | 98076/371472 [7:47:58<23:56:08, 3.17it/s] 26%|██▋ | 98077/371472 [7:47:59<23:30:35, 3.23it/s] 26%|██▋ | 98078/371472 [7:47:59<23:12:30, 3.27it/s] 26%|██▋ | 98079/371472 [7:47:59<24:00:14, 3.16it/s] 26%|██▋ | 98080/371472 [7:48:00<23:06:07, 3.29it/s] {'loss': 3.4389, 'learning_rate': 7.62729212006292e-07, 'epoch': 4.22} + 26%|██▋ | 98080/371472 [7:48:00<23:06:07, 3.29it/s] 26%|██▋ | 98081/371472 [7:48:00<21:39:54, 3.51it/s] 26%|██▋ | 98082/371472 [7:48:00<21:39:55, 3.51it/s] 26%|██▋ | 98083/371472 [7:48:00<21:34:38, 3.52it/s] 26%|██▋ | 98084/371472 [7:48:01<21:27:38, 3.54it/s] 26%|██▋ | 98085/371472 [7:48:01<22:21:41, 3.40it/s] 26%|██▋ | 98086/371472 [7:48:01<21:57:27, 3.46it/s] 26%|██▋ | 98087/371472 [7:48:01<21:33:45, 3.52it/s] 26%|██▋ | 98088/371472 [7:48:02<20:49:29, 3.65it/s] 26%|██▋ | 98089/371472 [7:48:02<20:14:54, 3.75it/s] 26%|██▋ | 98090/371472 [7:48:02<21:15:49, 3.57it/s] 26%|██▋ | 98091/371472 [7:48:03<22:04:22, 3.44it/s] 26%|██▋ | 98092/371472 [7:48:03<24:17:24, 3.13it/s] 26%|██▋ | 98093/371472 [7:48:03<23:02:51, 3.29it/s] 26%|██▋ | 98094/371472 [7:48:04<22:51:47, 3.32it/s] 26%|██▋ | 98095/371472 [7:48:04<22:31:33, 3.37it/s] 26%|██▋ | 98096/371472 [7:48:04<22:14:30, 3.41it/s] 26%|██▋ | 98097/371472 [7:48:04<22:32:57, 3.37it/s] 26%|██▋ | 98098/371472 [7:48:05<23:11:15, 3.27it/s] 26%|██▋ | 98099/371472 [7:48:05<22:40:19, 3.35it/s] 26%|██▋ | 98100/371472 [7:48:05<22:09:46, 3.43it/s] {'loss': 3.5515, 'learning_rate': 7.626807300308129e-07, 'epoch': 4.23} + 26%|██▋ | 98100/371472 [7:48:05<22:09:46, 3.43it/s] 26%|██▋ | 98101/371472 [7:48:06<21:26:58, 3.54it/s] 26%|██▋ | 98102/371472 [7:48:06<20:49:03, 3.65it/s] 26%|██▋ | 98103/371472 [7:48:06<20:18:15, 3.74it/s] 26%|██▋ | 98104/371472 [7:48:06<20:06:52, 3.78it/s] 26%|██▋ | 98105/371472 [7:48:07<19:53:18, 3.82it/s] 26%|██▋ | 98106/371472 [7:48:07<20:08:24, 3.77it/s] 26%|██▋ | 98107/371472 [7:48:07<20:27:54, 3.71it/s] 26%|██▋ | 98108/371472 [7:48:07<20:27:22, 3.71it/s] 26%|██▋ | 98109/371472 [7:48:08<20:26:05, 3.72it/s] 26%|██▋ | 98110/371472 [7:48:08<20:13:00, 3.76it/s] 26%|██▋ | 98111/371472 [7:48:08<21:03:46, 3.61it/s] 26%|██▋ | 98112/371472 [7:48:09<22:08:36, 3.43it/s] 26%|██▋ | 98113/371472 [7:48:09<23:07:17, 3.28it/s] 26%|██▋ | 98114/371472 [7:48:09<23:00:02, 3.30it/s] 26%|██▋ | 98115/371472 [7:48:09<22:20:38, 3.40it/s] 26%|██▋ | 98116/371472 [7:48:10<23:27:32, 3.24it/s] 26%|██▋ | 98117/371472 [7:48:10<22:49:29, 3.33it/s] 26%|██▋ | 98118/371472 [7:48:10<22:33:41, 3.37it/s] 26%|██▋ | 98119/371472 [7:48:11<22:19:43, 3.40it/s] 26%|██▋ | 98120/371472 [7:48:11<22:02:20, 3.45it/s] {'loss': 3.6198, 'learning_rate': 7.62632248055334e-07, 'epoch': 4.23} + 26%|██▋ | 98120/371472 [7:48:11<22:02:20, 3.45it/s] 26%|██▋ | 98121/371472 [7:48:11<23:08:30, 3.28it/s] 26%|██▋ | 98122/371472 [7:48:12<23:15:24, 3.26it/s] 26%|██▋ | 98123/371472 [7:48:12<22:18:07, 3.40it/s] 26%|██▋ | 98124/371472 [7:48:12<21:23:30, 3.55it/s] 26%|██▋ | 98125/371472 [7:48:12<21:13:04, 3.58it/s] 26%|██▋ | 98126/371472 [7:48:13<21:01:41, 3.61it/s] 26%|██▋ | 98127/371472 [7:48:13<20:41:18, 3.67it/s] 26%|██▋ | 98128/371472 [7:48:13<20:29:53, 3.70it/s] 26%|██▋ | 98129/371472 [7:48:14<21:15:28, 3.57it/s] 26%|██▋ | 98130/371472 [7:48:14<20:33:53, 3.69it/s] 26%|██▋ | 98131/371472 [7:48:14<20:46:23, 3.66it/s] 26%|██▋ | 98132/371472 [7:48:14<20:41:14, 3.67it/s] 26%|██▋ | 98133/371472 [7:48:15<20:20:52, 3.73it/s] 26%|██▋ | 98134/371472 [7:48:15<20:20:43, 3.73it/s] 26%|██▋ | 98135/371472 [7:48:15<20:20:36, 3.73it/s] 26%|██▋ | 98136/371472 [7:48:15<20:26:33, 3.71it/s] 26%|██▋ | 98137/371472 [7:48:16<20:26:32, 3.71it/s] 26%|██▋ | 98138/371472 [7:48:16<20:39:12, 3.68it/s] 26%|██▋ | 98139/371472 [7:48:16<20:51:24, 3.64it/s] 26%|██▋ | 98140/371472 [7:48:16<20:16:41, 3.74it/s] {'loss': 3.5712, 'learning_rate': 7.625837660798551e-07, 'epoch': 4.23} + 26%|██▋ | 98140/371472 [7:48:16<20:16:41, 3.74it/s] 26%|██▋ | 98141/371472 [7:48:17<19:45:11, 3.84it/s] 26%|██▋ | 98142/371472 [7:48:17<20:09:12, 3.77it/s] 26%|██▋ | 98143/371472 [7:48:17<21:28:55, 3.53it/s] 26%|██▋ | 98144/371472 [7:48:18<21:16:18, 3.57it/s] 26%|██▋ | 98145/371472 [7:48:18<21:18:42, 3.56it/s] 26%|██▋ | 98146/371472 [7:48:18<20:37:16, 3.68it/s] 26%|██▋ | 98147/371472 [7:48:18<22:09:42, 3.43it/s] 26%|██▋ | 98148/371472 [7:48:19<21:54:30, 3.47it/s] 26%|██▋ | 98149/371472 [7:48:19<21:50:49, 3.48it/s] 26%|██▋ | 98150/371472 [7:48:19<22:29:56, 3.37it/s] 26%|██▋ | 98151/371472 [7:48:20<22:20:57, 3.40it/s] 26%|██▋ | 98152/371472 [7:48:20<24:18:05, 3.12it/s] 26%|██▋ | 98153/371472 [7:48:20<22:54:46, 3.31it/s] 26%|██▋ | 98154/371472 [7:48:21<22:20:48, 3.40it/s] 26%|██▋ | 98155/371472 [7:48:21<22:38:08, 3.35it/s] 26%|██▋ | 98156/371472 [7:48:21<21:52:20, 3.47it/s] 26%|██▋ | 98157/371472 [7:48:21<21:22:15, 3.55it/s] 26%|██▋ | 98158/371472 [7:48:22<21:43:17, 3.50it/s] 26%|██▋ | 98159/371472 [7:48:22<22:01:31, 3.45it/s] 26%|██▋ | 98160/371472 [7:48:22<22:19:26, 3.40it/s] {'loss': 3.7876, 'learning_rate': 7.625352841043763e-07, 'epoch': 4.23} + 26%|██▋ | 98160/371472 [7:48:22<22:19:26, 3.40it/s] 26%|██▋ | 98161/371472 [7:48:23<21:17:33, 3.57it/s] 26%|██▋ | 98162/371472 [7:48:23<23:23:39, 3.25it/s] 26%|██▋ | 98163/371472 [7:48:23<22:29:03, 3.38it/s] 26%|██▋ | 98164/371472 [7:48:23<22:58:29, 3.30it/s] 26%|██▋ | 98165/371472 [7:48:24<21:58:52, 3.45it/s] 26%|██▋ | 98166/371472 [7:48:24<21:35:33, 3.52it/s] 26%|██▋ | 98167/371472 [7:48:24<20:38:13, 3.68it/s] 26%|██▋ | 98168/371472 [7:48:25<21:32:46, 3.52it/s] 26%|██▋ | 98169/371472 [7:48:25<22:42:58, 3.34it/s] 26%|██▋ | 98170/371472 [7:48:25<23:02:11, 3.30it/s] 26%|██▋ | 98171/371472 [7:48:25<21:43:01, 3.50it/s] 26%|██▋ | 98172/371472 [7:48:26<21:12:21, 3.58it/s] 26%|██▋ | 98173/371472 [7:48:26<21:09:02, 3.59it/s] 26%|██▋ | 98174/371472 [7:48:26<20:31:42, 3.70it/s] 26%|██▋ | 98175/371472 [7:48:27<21:14:38, 3.57it/s] 26%|██▋ | 98176/371472 [7:48:27<21:33:04, 3.52it/s] 26%|██▋ | 98177/371472 [7:48:27<21:11:10, 3.58it/s] 26%|██▋ | 98178/371472 [7:48:27<23:04:37, 3.29it/s] 26%|██▋ | 98179/371472 [7:48:28<22:27:36, 3.38it/s] 26%|██▋ | 98180/371472 [7:48:28<22:26:04, 3.38it/s] {'loss': 3.6569, 'learning_rate': 7.624868021288974e-07, 'epoch': 4.23} + 26%|██▋ | 98180/371472 [7:48:28<22:26:04, 3.38it/s] 26%|██▋ | 98181/371472 [7:48:28<22:22:44, 3.39it/s] 26%|██▋ | 98182/371472 [7:48:29<22:16:35, 3.41it/s] 26%|██▋ | 98183/371472 [7:48:29<23:01:56, 3.30it/s] 26%|██▋ | 98184/371472 [7:48:29<22:53:20, 3.32it/s] 26%|██▋ | 98185/371472 [7:48:30<21:51:51, 3.47it/s] 26%|██▋ | 98186/371472 [7:48:30<23:26:09, 3.24it/s] 26%|██▋ | 98187/371472 [7:48:30<23:32:43, 3.22it/s] 26%|██▋ | 98188/371472 [7:48:30<23:12:10, 3.27it/s] 26%|██▋ | 98189/371472 [7:48:31<22:34:54, 3.36it/s] 26%|██▋ | 98190/371472 [7:48:31<22:31:41, 3.37it/s] 26%|██▋ | 98191/371472 [7:48:31<22:04:45, 3.44it/s] 26%|██▋ | 98192/371472 [7:48:32<21:42:23, 3.50it/s] 26%|██▋ | 98193/371472 [7:48:32<21:08:56, 3.59it/s] 26%|██▋ | 98194/371472 [7:48:32<21:53:19, 3.47it/s] 26%|██▋ | 98195/371472 [7:48:32<21:23:12, 3.55it/s] 26%|██▋ | 98196/371472 [7:48:33<23:32:31, 3.22it/s] 26%|██▋ | 98197/371472 [7:48:33<22:44:11, 3.34it/s] 26%|██▋ | 98198/371472 [7:48:33<22:07:33, 3.43it/s] 26%|██▋ | 98199/371472 [7:48:34<21:37:47, 3.51it/s] 26%|██▋ | 98200/371472 [7:48:34<21:21:33, 3.55it/s] {'loss': 3.6339, 'learning_rate': 7.624383201534185e-07, 'epoch': 4.23} + 26%|██▋ | 98200/371472 [7:48:34<21:21:33, 3.55it/s] 26%|██▋ | 98201/371472 [7:48:34<20:40:32, 3.67it/s] 26%|██▋ | 98202/371472 [7:48:34<21:59:08, 3.45it/s] 26%|██▋ | 98203/371472 [7:48:35<21:32:42, 3.52it/s] 26%|██▋ | 98204/371472 [7:48:35<21:30:21, 3.53it/s] 26%|██▋ | 98205/371472 [7:48:35<20:47:24, 3.65it/s] 26%|██▋ | 98206/371472 [7:48:36<20:54:14, 3.63it/s] 26%|██▋ | 98207/371472 [7:48:36<20:32:56, 3.69it/s] 26%|██▋ | 98208/371472 [7:48:36<20:52:44, 3.64it/s] 26%|██▋ | 98209/371472 [7:48:36<20:35:41, 3.69it/s] 26%|██▋ | 98210/371472 [7:48:37<20:14:36, 3.75it/s] 26%|██▋ | 98211/371472 [7:48:37<22:03:47, 3.44it/s] 26%|██▋ | 98212/371472 [7:48:37<22:36:54, 3.36it/s] 26%|██▋ | 98213/371472 [7:48:38<21:43:46, 3.49it/s] 26%|██▋ | 98214/371472 [7:48:38<21:50:16, 3.48it/s] 26%|██▋ | 98215/371472 [7:48:38<22:47:43, 3.33it/s] 26%|██▋ | 98216/371472 [7:48:38<22:37:56, 3.35it/s] 26%|██▋ | 98217/371472 [7:48:39<23:24:40, 3.24it/s] 26%|██▋ | 98218/371472 [7:48:39<24:28:34, 3.10it/s] 26%|██▋ | 98219/371472 [7:48:39<23:23:45, 3.24it/s] 26%|██▋ | 98220/371472 [7:48:40<22:16:44, 3.41it/s] {'loss': 3.5889, 'learning_rate': 7.623898381779396e-07, 'epoch': 4.23} + 26%|██▋ | 98220/371472 [7:48:40<22:16:44, 3.41it/s] 26%|██▋ | 98221/371472 [7:48:40<21:45:40, 3.49it/s] 26%|██▋ | 98222/371472 [7:48:40<21:15:55, 3.57it/s] 26%|██▋ | 98223/371472 [7:48:41<21:37:08, 3.51it/s] 26%|██▋ | 98224/371472 [7:48:41<20:52:25, 3.64it/s] 26%|██▋ | 98225/371472 [7:48:41<21:28:08, 3.54it/s] 26%|██▋ | 98226/371472 [7:48:41<21:28:35, 3.53it/s] 26%|██▋ | 98227/371472 [7:48:42<21:55:44, 3.46it/s] 26%|██▋ | 98228/371472 [7:48:42<22:02:45, 3.44it/s] 26%|██▋ | 98229/371472 [7:48:42<21:31:00, 3.53it/s] 26%|██▋ | 98230/371472 [7:48:43<23:35:22, 3.22it/s] 26%|██▋ | 98231/371472 [7:48:43<23:42:45, 3.20it/s] 26%|██▋ | 98232/371472 [7:48:43<22:36:33, 3.36it/s] 26%|██▋ | 98233/371472 [7:48:43<21:42:00, 3.50it/s] 26%|██▋ | 98234/371472 [7:48:44<22:25:49, 3.38it/s] 26%|██▋ | 98235/371472 [7:48:44<22:14:40, 3.41it/s] 26%|██▋ | 98236/371472 [7:48:44<21:00:41, 3.61it/s] 26%|██▋ | 98237/371472 [7:48:45<20:49:22, 3.64it/s] 26%|██▋ | 98238/371472 [7:48:45<21:07:21, 3.59it/s] 26%|██▋ | 98239/371472 [7:48:45<21:19:43, 3.56it/s] 26%|██▋ | 98240/371472 [7:48:45<21:39:19, 3.50it/s] {'loss': 3.6677, 'learning_rate': 7.623413562024607e-07, 'epoch': 4.23} + 26%|██▋ | 98240/371472 [7:48:45<21:39:19, 3.50it/s] 26%|██▋ | 98241/371472 [7:48:46<21:34:31, 3.52it/s] 26%|██▋ | 98242/371472 [7:48:46<21:16:03, 3.57it/s] 26%|██▋ | 98243/371472 [7:48:46<21:18:11, 3.56it/s] 26%|██▋ | 98244/371472 [7:48:47<22:12:36, 3.42it/s] 26%|██▋ | 98245/371472 [7:48:47<23:59:19, 3.16it/s] 26%|██▋ | 98246/371472 [7:48:47<23:02:11, 3.29it/s] 26%|██▋ | 98247/371472 [7:48:47<22:17:15, 3.41it/s] 26%|██▋ | 98248/371472 [7:48:48<21:50:45, 3.47it/s] 26%|██▋ | 98249/371472 [7:48:48<22:21:18, 3.39it/s] 26%|██▋ | 98250/371472 [7:48:48<22:34:53, 3.36it/s] 26%|██▋ | 98251/371472 [7:48:49<21:31:09, 3.53it/s] 26%|██▋ | 98252/371472 [7:48:49<21:21:46, 3.55it/s] 26%|██▋ | 98253/371472 [7:48:49<21:36:05, 3.51it/s] 26%|██▋ | 98254/371472 [7:48:49<21:17:11, 3.57it/s] 26%|██▋ | 98255/371472 [7:48:50<21:20:32, 3.56it/s] 26%|██▋ | 98256/371472 [7:48:50<21:28:32, 3.53it/s] 26%|██▋ | 98257/371472 [7:48:50<21:48:22, 3.48it/s] 26%|██▋ | 98258/371472 [7:48:51<21:30:30, 3.53it/s] 26%|██▋ | 98259/371472 [7:48:51<20:48:46, 3.65it/s] 26%|██▋ | 98260/371472 [7:48:51<20:54:23, 3.63it/s] {'loss': 3.7587, 'learning_rate': 7.622928742269818e-07, 'epoch': 4.23} + 26%|██▋ | 98260/371472 [7:48:51<20:54:23, 3.63it/s] 26%|██▋ | 98261/371472 [7:48:51<21:09:09, 3.59it/s] 26%|██▋ | 98262/371472 [7:48:52<22:31:49, 3.37it/s] 26%|██▋ | 98263/371472 [7:48:52<21:57:45, 3.46it/s] 26%|██▋ | 98264/371472 [7:48:52<22:00:45, 3.45it/s] 26%|██▋ | 98265/371472 [7:48:53<22:59:26, 3.30it/s] 26%|██▋ | 98266/371472 [7:48:53<22:12:52, 3.42it/s] 26%|██▋ | 98267/371472 [7:48:53<21:33:06, 3.52it/s] 26%|██▋ | 98268/371472 [7:48:53<21:30:06, 3.53it/s] 26%|██▋ | 98269/371472 [7:48:54<20:50:43, 3.64it/s] 26%|██▋ | 98270/371472 [7:48:54<22:00:29, 3.45it/s] 26%|██▋ | 98271/371472 [7:48:54<23:02:42, 3.29it/s] 26%|██▋ | 98272/371472 [7:48:55<22:26:43, 3.38it/s] 26%|██▋ | 98273/371472 [7:48:55<22:00:16, 3.45it/s] 26%|██▋ | 98274/371472 [7:48:55<22:38:47, 3.35it/s] 26%|██▋ | 98275/371472 [7:48:56<21:49:17, 3.48it/s] 26%|██▋ | 98276/371472 [7:48:56<21:54:08, 3.46it/s] 26%|██▋ | 98277/371472 [7:48:56<23:11:47, 3.27it/s] 26%|██▋ | 98278/371472 [7:48:56<22:16:02, 3.41it/s] 26%|██▋ | 98279/371472 [7:48:57<21:19:42, 3.56it/s] 26%|██▋ | 98280/371472 [7:48:57<21:30:05, 3.53it/s] {'loss': 3.3814, 'learning_rate': 7.622443922515029e-07, 'epoch': 4.23} + 26%|██▋ | 98280/371472 [7:48:57<21:30:05, 3.53it/s] 26%|██▋ | 98281/371472 [7:48:57<20:52:38, 3.63it/s] 26%|██▋ | 98282/371472 [7:48:58<22:02:45, 3.44it/s] 26%|██▋ | 98283/371472 [7:48:58<22:35:57, 3.36it/s] 26%|██▋ | 98284/371472 [7:48:58<21:51:51, 3.47it/s] 26%|██▋ | 98285/371472 [7:48:58<22:49:00, 3.33it/s] 26%|██▋ | 98286/371472 [7:48:59<21:46:02, 3.49it/s] 26%|██▋ | 98287/371472 [7:48:59<21:19:24, 3.56it/s] 26%|██▋ | 98288/371472 [7:48:59<20:50:49, 3.64it/s] 26%|██▋ | 98289/371472 [7:49:00<22:19:28, 3.40it/s] 26%|██▋ | 98290/371472 [7:49:00<21:33:59, 3.52it/s] 26%|██��� | 98291/371472 [7:49:00<20:49:22, 3.64it/s] 26%|██▋ | 98292/371472 [7:49:00<21:07:33, 3.59it/s] 26%|██▋ | 98293/371472 [7:49:01<21:15:30, 3.57it/s] 26%|██▋ | 98294/371472 [7:49:01<22:55:57, 3.31it/s] 26%|██▋ | 98295/371472 [7:49:01<22:22:01, 3.39it/s] 26%|██▋ | 98296/371472 [7:49:02<21:14:12, 3.57it/s] 26%|██▋ | 98297/371472 [7:49:02<22:06:26, 3.43it/s] 26%|██▋ | 98298/371472 [7:49:02<21:15:50, 3.57it/s] 26%|██▋ | 98299/371472 [7:49:02<21:29:06, 3.53it/s] 26%|██▋ | 98300/371472 [7:49:03<21:03:22, 3.60it/s] {'loss': 3.4579, 'learning_rate': 7.62195910276024e-07, 'epoch': 4.23} + 26%|██▋ | 98300/371472 [7:49:03<21:03:22, 3.60it/s] 26%|██▋ | 98301/371472 [7:49:03<21:51:23, 3.47it/s] 26%|██▋ | 98302/371472 [7:49:03<22:00:17, 3.45it/s] 26%|██▋ | 98303/371472 [7:49:04<21:18:52, 3.56it/s] 26%|██▋ | 98304/371472 [7:49:04<21:06:38, 3.59it/s] 26%|██▋ | 98305/371472 [7:49:04<20:38:54, 3.67it/s] 26%|██▋ | 98306/371472 [7:49:04<20:50:26, 3.64it/s] 26%|██▋ | 98307/371472 [7:49:05<20:43:43, 3.66it/s] 26%|██▋ | 98308/371472 [7:49:05<21:34:09, 3.52it/s] 26%|██▋ | 98309/371472 [7:49:05<23:35:42, 3.22it/s] 26%|██▋ | 98310/371472 [7:49:06<23:23:14, 3.24it/s] 26%|██▋ | 98311/371472 [7:49:06<22:29:47, 3.37it/s] 26%|██▋ | 98312/371472 [7:49:06<21:28:26, 3.53it/s] 26%|██▋ | 98313/371472 [7:49:06<20:31:36, 3.70it/s] 26%|██▋ | 98314/371472 [7:49:07<20:34:56, 3.69it/s] 26%|██▋ | 98315/371472 [7:49:07<20:31:21, 3.70it/s] 26%|██▋ | 98316/371472 [7:49:07<21:22:10, 3.55it/s] 26%|██▋ | 98317/371472 [7:49:07<20:39:01, 3.67it/s] 26%|██▋ | 98318/371472 [7:49:08<20:33:41, 3.69it/s] 26%|██▋ | 98319/371472 [7:49:08<20:28:47, 3.70it/s] 26%|██▋ | 98320/371472 [7:49:08<21:17:01, 3.56it/s] {'loss': 3.6606, 'learning_rate': 7.621474283005451e-07, 'epoch': 4.23} + 26%|██▋ | 98320/371472 [7:49:08<21:17:01, 3.56it/s] 26%|██▋ | 98321/371472 [7:49:09<20:56:14, 3.62it/s] 26%|██▋ | 98322/371472 [7:49:09<20:52:55, 3.63it/s] 26%|██▋ | 98323/371472 [7:49:09<20:25:14, 3.72it/s] 26%|██▋ | 98324/371472 [7:49:09<20:06:16, 3.77it/s] 26%|██▋ | 98325/371472 [7:49:10<19:50:54, 3.82it/s] 26%|██▋ | 98326/371472 [7:49:10<20:31:53, 3.70it/s] 26%|██▋ | 98327/371472 [7:49:10<20:27:26, 3.71it/s] 26%|██▋ | 98328/371472 [7:49:10<19:59:26, 3.80it/s] 26%|██▋ | 98329/371472 [7:49:11<20:58:15, 3.62it/s] 26%|██▋ | 98330/371472 [7:49:11<20:52:46, 3.63it/s] 26%|██▋ | 98331/371472 [7:49:11<21:01:44, 3.61it/s] 26%|██▋ | 98332/371472 [7:49:12<20:55:38, 3.63it/s] 26%|██▋ | 98333/371472 [7:49:12<20:51:00, 3.64it/s] 26%|██▋ | 98334/371472 [7:49:12<21:32:35, 3.52it/s] 26%|██▋ | 98335/371472 [7:49:12<20:50:45, 3.64it/s] 26%|██▋ | 98336/371472 [7:49:13<23:53:19, 3.18it/s] 26%|██▋ | 98337/371472 [7:49:13<22:55:34, 3.31it/s] 26%|██▋ | 98338/371472 [7:49:13<22:39:27, 3.35it/s] 26%|██▋ | 98339/371472 [7:49:14<24:11:17, 3.14it/s] 26%|██▋ | 98340/371472 [7:49:14<23:00:32, 3.30it/s] {'loss': 3.5666, 'learning_rate': 7.620989463250663e-07, 'epoch': 4.24} + 26%|██▋ | 98340/371472 [7:49:14<23:00:32, 3.30it/s] 26%|██▋ | 98341/371472 [7:49:14<22:42:34, 3.34it/s] 26%|██▋ | 98342/371472 [7:49:15<23:37:05, 3.21it/s] 26%|██▋ | 98343/371472 [7:49:15<22:50:48, 3.32it/s] 26%|██▋ | 98344/371472 [7:49:15<22:05:22, 3.43it/s] 26%|██▋ | 98345/371472 [7:49:15<23:17:15, 3.26it/s] 26%|██▋ | 98346/371472 [7:49:16<22:32:06, 3.37it/s] 26%|██▋ | 98347/371472 [7:49:16<23:10:18, 3.27it/s] 26%|██▋ | 98348/371472 [7:49:16<22:04:39, 3.44it/s] 26%|██▋ | 98349/371472 [7:49:17<21:29:09, 3.53it/s] 26%|██▋ | 98350/371472 [7:49:17<22:11:28, 3.42it/s] 26%|██▋ | 98351/371472 [7:49:17<21:40:47, 3.50it/s] 26%|██▋ | 98352/371472 [7:49:18<22:49:54, 3.32it/s] 26%|██▋ | 98353/371472 [7:49:18<22:57:04, 3.31it/s] 26%|██▋ | 98354/371472 [7:49:18<22:25:24, 3.38it/s] 26%|██▋ | 98355/371472 [7:49:18<22:03:59, 3.44it/s] 26%|██▋ | 98356/371472 [7:49:19<22:30:47, 3.37it/s] 26%|██▋ | 98357/371472 [7:49:19<22:18:01, 3.40it/s] 26%|██▋ | 98358/371472 [7:49:19<21:38:39, 3.51it/s] 26%|██▋ | 98359/371472 [7:49:20<21:05:48, 3.60it/s] 26%|██▋ | 98360/371472 [7:49:20<20:34:16, 3.69it/s] {'loss': 3.7073, 'learning_rate': 7.620504643495873e-07, 'epoch': 4.24} + 26%|██▋ | 98360/371472 [7:49:20<20:34:16, 3.69it/s] 26%|██▋ | 98361/371472 [7:49:20<20:31:42, 3.70it/s] 26%|██▋ | 98362/371472 [7:49:20<20:33:55, 3.69it/s] 26%|██▋ | 98363/371472 [7:49:21<20:59:59, 3.61it/s] 26%|██▋ | 98364/371472 [7:49:21<20:43:08, 3.66it/s] 26%|██▋ | 98365/371472 [7:49:21<20:50:01, 3.64it/s] 26%|██▋ | 98366/371472 [7:49:21<20:42:56, 3.66it/s] 26%|██▋ | 98367/371472 [7:49:22<21:57:08, 3.46it/s] 26%|██▋ | 98368/371472 [7:49:22<22:26:03, 3.38it/s] 26%|██▋ | 98369/371472 [7:49:22<22:02:52, 3.44it/s] 26%|██▋ | 98370/371472 [7:49:23<21:40:11, 3.50it/s] 26%|██▋ | 98371/371472 [7:49:23<22:39:47, 3.35it/s] 26%|██▋ | 98372/371472 [7:49:23<21:47:33, 3.48it/s] 26%|██▋ | 98373/371472 [7:49:24<22:09:21, 3.42it/s] 26%|██▋ | 98374/371472 [7:49:24<23:15:01, 3.26it/s] 26%|██▋ | 98375/371472 [7:49:24<22:37:17, 3.35it/s] 26%|██▋ | 98376/371472 [7:49:24<21:40:22, 3.50it/s] 26%|██▋ | 98377/371472 [7:49:25<20:56:10, 3.62it/s] 26%|██▋ | 98378/371472 [7:49:25<21:28:20, 3.53it/s] 26%|██▋ | 98379/371472 [7:49:25<21:16:37, 3.57it/s] 26%|██▋ | 98380/371472 [7:49:26<21:40:37, 3.50it/s] {'loss': 3.5744, 'learning_rate': 7.620019823741084e-07, 'epoch': 4.24} + 26%|██▋ | 98380/371472 [7:49:26<21:40:37, 3.50it/s] 26%|██▋ | 98381/371472 [7:49:26<21:29:04, 3.53it/s] 26%|██▋ | 98382/371472 [7:49:26<21:19:04, 3.56it/s] 26%|██▋ | 98383/371472 [7:49:26<21:01:56, 3.61it/s] 26%|██▋ | 98384/371472 [7:49:27<21:59:26, 3.45it/s] 26%|██▋ | 98385/371472 [7:49:27<21:15:27, 3.57it/s] 26%|██▋ | 98386/371472 [7:49:27<21:21:54, 3.55it/s] 26%|██▋ | 98387/371472 [7:49:27<21:13:58, 3.57it/s] 26%|██▋ | 98388/371472 [7:49:28<20:24:24, 3.72it/s] 26%|██▋ | 98389/371472 [7:49:28<20:21:17, 3.73it/s] 26%|██▋ | 98390/371472 [7:49:28<20:30:44, 3.70it/s] 26%|██▋ | 98391/371472 [7:49:29<19:58:09, 3.80it/s] 26%|██▋ | 98392/371472 [7:49:29<19:48:05, 3.83it/s] 26%|██▋ | 98393/371472 [7:49:29<20:39:06, 3.67it/s] 26%|██▋ | 98394/371472 [7:49:29<20:28:18, 3.71it/s] 26%|██▋ | 98395/371472 [7:49:30<20:46:00, 3.65it/s] 26%|██▋ | 98396/371472 [7:49:30<21:38:47, 3.50it/s] 26%|██▋ | 98397/371472 [7:49:30<21:12:58, 3.58it/s] 26%|██▋ | 98398/371472 [7:49:30<21:33:50, 3.52it/s] 26%|██▋ | 98399/371472 [7:49:31<21:50:29, 3.47it/s] 26%|██▋ | 98400/371472 [7:49:31<22:42:08, 3.34it/s] {'loss': 3.7059, 'learning_rate': 7.619535003986295e-07, 'epoch': 4.24} + 26%|██▋ | 98400/371472 [7:49:31<22:42:08, 3.34it/s] 26%|██▋ | 98401/371472 [7:49:31<23:23:50, 3.24it/s] 26%|██▋ | 98402/371472 [7:49:32<22:48:27, 3.33it/s] 26%|██▋ | 98403/371472 [7:49:32<22:05:28, 3.43it/s] 26%|██▋ | 98404/371472 [7:49:32<23:28:37, 3.23it/s] 26%|██▋ | 98405/371472 [7:49:33<23:32:18, 3.22it/s] 26%|██▋ | 98406/371472 [7:49:33<22:07:46, 3.43it/s] 26%|██▋ | 98407/371472 [7:49:33<21:40:37, 3.50it/s] 26%|██▋ | 98408/371472 [7:49:33<20:47:37, 3.65it/s] 26%|██▋ | 98409/371472 [7:49:34<20:24:05, 3.72it/s] 26%|██▋ | 98410/371472 [7:49:34<20:58:52, 3.62it/s] 26%|██▋ | 98411/371472 [7:49:34<20:26:36, 3.71it/s] 26%|██▋ | 98412/371472 [7:49:35<21:20:39, 3.55it/s] 26%|██▋ | 98413/371472 [7:49:35<21:00:31, 3.61it/s] 26%|██▋ | 98414/371472 [7:49:35<21:24:56, 3.54it/s] 26%|██▋ | 98415/371472 [7:49:35<21:56:42, 3.46it/s] 26%|██▋ | 98416/371472 [7:49:36<21:31:05, 3.52it/s] 26%|██▋ | 98417/371472 [7:49:36<21:19:05, 3.56it/s] 26%|██▋ | 98418/371472 [7:49:36<21:16:18, 3.57it/s] 26%|██▋ | 98419/371472 [7:49:37<21:29:18, 3.53it/s] 26%|██▋ | 98420/371472 [7:49:37<22:54:19, 3.31it/s] {'loss': 3.6127, 'learning_rate': 7.619050184231507e-07, 'epoch': 4.24} + 26%|██▋ | 98420/371472 [7:49:37<22:54:19, 3.31it/s] 26%|██▋ | 98421/371472 [7:49:37<22:07:42, 3.43it/s] 26%|██▋ | 98422/371472 [7:49:37<21:47:55, 3.48it/s] 26%|██▋ | 98423/371472 [7:49:38<21:18:26, 3.56it/s] 26%|██▋ | 98424/371472 [7:49:38<21:47:30, 3.48it/s] 26%|█���▋ | 98425/371472 [7:49:38<22:46:39, 3.33it/s] 26%|██▋ | 98426/371472 [7:49:39<21:30:15, 3.53it/s] 26%|██▋ | 98427/371472 [7:49:39<20:47:17, 3.65it/s] 26%|██▋ | 98428/371472 [7:49:39<21:58:04, 3.45it/s] 26%|██▋ | 98429/371472 [7:49:39<21:28:50, 3.53it/s] 26%|██▋ | 98430/371472 [7:49:40<21:03:54, 3.60it/s] 26%|██▋ | 98431/371472 [7:49:40<20:47:23, 3.65it/s] 26%|██▋ | 98432/371472 [7:49:40<20:28:50, 3.70it/s] 26%|██▋ | 98433/371472 [7:49:40<19:43:30, 3.85it/s] 26%|██▋ | 98434/371472 [7:49:41<19:49:41, 3.83it/s] 26%|██▋ | 98435/371472 [7:49:41<22:45:05, 3.33it/s] 26%|██▋ | 98436/371472 [7:49:41<23:48:06, 3.19it/s] 26%|██▋ | 98437/371472 [7:49:42<22:29:27, 3.37it/s] 26%|██▋ | 98438/371472 [7:49:42<22:02:14, 3.44it/s] 26%|██▋ | 98439/371472 [7:49:42<21:57:39, 3.45it/s] 26%|██▋ | 98440/371472 [7:49:43<21:46:48, 3.48it/s] {'loss': 3.564, 'learning_rate': 7.618565364476718e-07, 'epoch': 4.24} + 26%|██▋ | 98440/371472 [7:49:43<21:46:48, 3.48it/s] 27%|██▋ | 98441/371472 [7:49:43<21:37:38, 3.51it/s] 27%|██▋ | 98442/371472 [7:49:43<21:35:50, 3.51it/s] 27%|██▋ | 98443/371472 [7:49:43<22:35:54, 3.36it/s] 27%|██▋ | 98444/371472 [7:49:44<22:44:09, 3.34it/s] 27%|██▋ | 98445/371472 [7:49:44<22:33:10, 3.36it/s] 27%|██▋ | 98446/371472 [7:49:44<22:14:56, 3.41it/s] 27%|██▋ | 98447/371472 [7:49:45<27:08:04, 2.79it/s] 27%|██▋ | 98448/371472 [7:49:45<25:59:33, 2.92it/s] 27%|██▋ | 98449/371472 [7:49:45<24:01:48, 3.16it/s] 27%|██▋ | 98450/371472 [7:49:46<23:13:58, 3.26it/s] 27%|██▋ | 98451/371472 [7:49:46<22:14:10, 3.41it/s] 27%|██▋ | 98452/371472 [7:49:46<21:21:23, 3.55it/s] 27%|██▋ | 98453/371472 [7:49:47<27:32:43, 2.75it/s] 27%|██▋ | 98454/371472 [7:49:47<25:41:42, 2.95it/s] 27%|██▋ | 98455/371472 [7:49:47<24:47:02, 3.06it/s] 27%|██▋ | 98456/371472 [7:49:48<24:22:27, 3.11it/s] 27%|██▋ | 98457/371472 [7:49:48<24:01:44, 3.16it/s] 27%|██▋ | 98458/371472 [7:49:48<23:11:20, 3.27it/s] 27%|██▋ | 98459/371472 [7:49:49<24:08:27, 3.14it/s] 27%|██▋ | 98460/371472 [7:49:49<22:33:39, 3.36it/s] {'loss': 3.3204, 'learning_rate': 7.618080544721929e-07, 'epoch': 4.24} + 27%|██▋ | 98460/371472 [7:49:49<22:33:39, 3.36it/s] 27%|██▋ | 98461/371472 [7:49:49<21:11:25, 3.58it/s] 27%|██▋ | 98462/371472 [7:49:49<22:01:17, 3.44it/s] 27%|██▋ | 98463/371472 [7:49:50<21:39:13, 3.50it/s] 27%|██▋ | 98464/371472 [7:49:50<22:22:51, 3.39it/s] 27%|██▋ | 98465/371472 [7:49:50<22:33:12, 3.36it/s] 27%|██▋ | 98466/371472 [7:49:51<22:30:59, 3.37it/s] 27%|██▋ | 98467/371472 [7:49:51<23:31:18, 3.22it/s] 27%|██▋ | 98468/371472 [7:49:51<22:05:41, 3.43it/s] 27%|██▋ | 98469/371472 [7:49:51<21:49:09, 3.48it/s] 27%|██▋ | 98470/371472 [7:49:52<21:13:06, 3.57it/s] 27%|██▋ | 98471/371472 [7:49:52<20:45:21, 3.65it/s] 27%|██▋ | 98472/371472 [7:49:52<22:05:59, 3.43it/s] 27%|██▋ | 98473/371472 [7:49:53<21:56:39, 3.46it/s] 27%|██▋ | 98474/371472 [7:49:53<23:11:08, 3.27it/s] 27%|██▋ | 98475/371472 [7:49:53<22:29:08, 3.37it/s] 27%|██▋ | 98476/371472 [7:49:53<21:07:21, 3.59it/s] 27%|██▋ | 98477/371472 [7:49:54<20:57:21, 3.62it/s] 27%|██▋ | 98478/371472 [7:49:54<21:20:57, 3.55it/s] 27%|██▋ | 98479/371472 [7:49:54<20:56:00, 3.62it/s] 27%|██▋ | 98480/371472 [7:49:54<20:16:09, 3.74it/s] {'loss': 3.6988, 'learning_rate': 7.61759572496714e-07, 'epoch': 4.24} + 27%|██▋ | 98480/371472 [7:49:54<20:16:09, 3.74it/s] 27%|██▋ | 98481/371472 [7:49:55<20:01:32, 3.79it/s] 27%|██▋ | 98482/371472 [7:49:55<20:27:13, 3.71it/s] 27%|██▋ | 98483/371472 [7:49:55<21:03:14, 3.60it/s] 27%|██▋ | 98484/371472 [7:49:56<21:35:34, 3.51it/s] 27%|██▋ | 98485/371472 [7:49:56<21:51:33, 3.47it/s] 27%|██▋ | 98486/371472 [7:49:56<21:12:35, 3.58it/s] 27%|██▋ | 98487/371472 [7:49:56<22:07:51, 3.43it/s] 27%|██▋ | 98488/371472 [7:49:57<22:34:53, 3.36it/s] 27%|██▋ | 98489/371472 [7:49:57<22:27:09, 3.38it/s] 27%|██▋ | 98490/371472 [7:49:57<21:11:16, 3.58it/s] 27%|██▋ | 98491/371472 [7:49:58<21:09:53, 3.58it/s] 27%|██▋ | 98492/371472 [7:49:58<20:40:53, 3.67it/s] 27%|██▋ | 98493/371472 [7:49:58<22:04:16, 3.44it/s] 27%|██▋ | 98494/371472 [7:49:58<21:43:55, 3.49it/s] 27%|██▋ | 98495/371472 [7:49:59<20:39:56, 3.67it/s] 27%|██▋ | 98496/371472 [7:49:59<21:38:30, 3.50it/s] 27%|██▋ | 98497/371472 [7:49:59<21:21:43, 3.55it/s] 27%|██▋ | 98498/371472 [7:50:00<20:28:10, 3.70it/s] 27%|██▋ | 98499/371472 [7:50:00<19:52:01, 3.82it/s] 27%|██▋ | 98500/371472 [7:50:00<22:47:34, 3.33it/s] {'loss': 3.7666, 'learning_rate': 7.61711090521235e-07, 'epoch': 4.24} + 27%|██▋ | 98500/371472 [7:50:00<22:47:34, 3.33it/s] 27%|██▋ | 98501/371472 [7:50:00<21:30:39, 3.52it/s] 27%|██▋ | 98502/371472 [7:50:01<21:24:56, 3.54it/s] 27%|██▋ | 98503/371472 [7:50:01<21:59:55, 3.45it/s] 27%|██▋ | 98504/371472 [7:50:01<21:48:25, 3.48it/s] 27%|██▋ | 98505/371472 [7:50:02<23:25:28, 3.24it/s] 27%|██▋ | 98506/371472 [7:50:02<22:26:49, 3.38it/s] 27%|██▋ | 98507/371472 [7:50:02<21:51:53, 3.47it/s] 27%|██▋ | 98508/371472 [7:50:02<21:15:33, 3.57it/s] 27%|██▋ | 98509/371472 [7:50:03<20:52:19, 3.63it/s] 27%|██▋ | 98510/371472 [7:50:03<20:12:50, 3.75it/s] 27%|██▋ | 98511/371472 [7:50:03<20:33:33, 3.69it/s] 27%|██▋ | 98512/371472 [7:50:04<20:39:55, 3.67it/s] 27%|██▋ | 98513/371472 [7:50:04<20:30:22, 3.70it/s] 27%|██▋ | 98514/371472 [7:50:04<20:49:03, 3.64it/s] 27%|██▋ | 98515/371472 [7:50:04<20:23:40, 3.72it/s] 27%|██▋ | 98516/371472 [7:50:05<21:40:33, 3.50it/s] 27%|██▋ | 98517/371472 [7:50:05<21:11:07, 3.58it/s] 27%|██▋ | 98518/371472 [7:50:05<20:45:40, 3.65it/s] 27%|██▋ | 98519/371472 [7:50:05<21:04:38, 3.60it/s] 27%|██▋ | 98520/371472 [7:50:06<22:09:26, 3.42it/s] {'loss': 3.5265, 'learning_rate': 7.616626085457561e-07, 'epoch': 4.24} + 27%|██▋ | 98520/371472 [7:50:06<22:09:26, 3.42it/s] 27%|██▋ | 98521/371472 [7:50:06<21:21:44, 3.55it/s] 27%|██▋ | 98522/371472 [7:50:06<21:05:54, 3.59it/s] 27%|██▋ | 98523/371472 [7:50:07<21:29:53, 3.53it/s] 27%|██▋ | 98524/371472 [7:50:07<20:23:22, 3.72it/s] 27%|██▋ | 98525/371472 [7:50:07<23:28:06, 3.23it/s] 27%|██▋ | 98526/371472 [7:50:08<23:29:04, 3.23it/s] 27%|██▋ | 98527/371472 [7:50:08<21:55:23, 3.46it/s] 27%|██▋ | 98528/371472 [7:50:08<21:21:14, 3.55it/s] 27%|██▋ | 98529/371472 [7:50:08<21:06:26, 3.59it/s] 27%|██▋ | 98530/371472 [7:50:09<21:23:17, 3.54it/s] 27%|██▋ | 98531/371472 [7:50:09<21:18:17, 3.56it/s] 27%|██▋ | 98532/371472 [7:50:09<20:34:49, 3.68it/s] 27%|██▋ | 98533/371472 [7:50:09<20:58:47, 3.61it/s] 27%|██▋ | 98534/371472 [7:50:10<20:26:41, 3.71it/s] 27%|██▋ | 98535/371472 [7:50:10<20:41:17, 3.66it/s] 27%|██▋ | 98536/371472 [7:50:10<20:20:00, 3.73it/s] 27%|██▋ | 98537/371472 [7:50:11<20:27:32, 3.71it/s] 27%|██▋ | 98538/371472 [7:50:11<20:59:06, 3.61it/s] 27%|██▋ | 98539/371472 [7:50:11<20:21:48, 3.72it/s] 27%|██▋ | 98540/371472 [7:50:11<20:06:16, 3.77it/s] {'loss': 3.494, 'learning_rate': 7.616141265702773e-07, 'epoch': 4.24} + 27%|██▋ | 98540/371472 [7:50:11<20:06:16, 3.77it/s] 27%|██▋ | 98541/371472 [7:50:12<20:02:21, 3.78it/s] 27%|██▋ | 98542/371472 [7:50:12<20:13:39, 3.75it/s] 27%|██▋ | 98543/371472 [7:50:12<19:48:25, 3.83it/s] 27%|██▋ | 98544/371472 [7:50:12<19:53:02, 3.81it/s] 27%|██▋ | 98545/371472 [7:50:13<20:10:06, 3.76it/s] 27%|██▋ | 98546/371472 [7:50:13<19:54:42, 3.81it/s] 27%|██▋ | 98547/371472 [7:50:13<20:21:59, 3.72it/s] 27%|██▋ | 98548/371472 [7:50:13<20:44:34, 3.65it/s] 27%|██▋ | 98549/371472 [7:50:14<21:23:31, 3.54it/s] 27%|██▋ | 98550/371472 [7:50:14<22:12:16, 3.41it/s] 27%|██▋ | 98551/371472 [7:50:14<22:24:10, 3.38it/s] 27%|██▋ | 98552/371472 [7:50:15<23:13:07, 3.27it/s] 27%|██▋ | 98553/371472 [7:50:15<22:12:12, 3.41it/s] 27%|██▋ | 98554/371472 [7:50:15<21:37:43, 3.51it/s] 27%|██▋ | 98555/371472 [7:50:15<20:50:39, 3.64it/s] 27%|██▋ | 98556/371472 [7:50:16<23:30:08, 3.23it/s] 27%|██▋ | 98557/371472 [7:50:16<22:15:27, 3.41it/s] 27%|██▋ | 98558/371472 [7:50:16<21:02:23, 3.60it/s] 27%|██▋ | 98559/371472 [7:50:17<20:43:56, 3.66it/s] 27%|██▋ | 98560/371472 [7:50:17<20:32:53, 3.69it/s] {'loss': 3.636, 'learning_rate': 7.615656445947984e-07, 'epoch': 4.25} + 27%|██▋ | 98560/371472 [7:50:17<20:32:53, 3.69it/s] 27%|██▋ | 98561/371472 [7:50:17<20:27:13, 3.71it/s] 27%|██▋ | 98562/371472 [7:50:17<20:23:48, 3.72it/s] 27%|██▋ | 98563/371472 [7:50:18<19:50:00, 3.82it/s] 27%|██▋ | 98564/371472 [7:50:18<22:02:36, 3.44it/s] 27%|██▋ | 98565/371472 [7:50:18<21:36:52, 3.51it/s] 27%|██▋ | 98566/371472 [7:50:19<21:05:13, 3.59it/s] 27%|██▋ | 98567/371472 [7:50:19<21:26:37, 3.54it/s] 27%|██▋ | 98568/371472 [7:50:19<22:00:12, 3.45it/s] 27%|██▋ | 98569/371472 [7:50:19<21:19:22, 3.56it/s] 27%|██▋ | 98570/371472 [7:50:20<22:15:11, 3.41it/s] 27%|██▋ | 98571/371472 [7:50:20<22:04:40, 3.43it/s] 27%|██▋ | 98572/371472 [7:50:20<21:30:33, 3.52it/s] 27%|██▋ | 98573/371472 [7:50:21<23:17:46, 3.25it/s] 27%|██▋ | 98574/371472 [7:50:21<22:24:46, 3.38it/s] 27%|██▋ | 98575/371472 [7:50:21<21:39:55, 3.50it/s] 27%|██▋ | 98576/371472 [7:50:22<22:06:22, 3.43it/s] 27%|██▋ | 98577/371472 [7:50:22<22:34:25, 3.36it/s] 27%|██▋ | 98578/371472 [7:50:22<22:51:43, 3.32it/s] 27%|██▋ | 98579/371472 [7:50:22<21:54:14, 3.46it/s] 27%|██▋ | 98580/371472 [7:50:23<21:09:23, 3.58it/s] {'loss': 3.622, 'learning_rate': 7.615171626193195e-07, 'epoch': 4.25} + 27%|██▋ | 98580/371472 [7:50:23<21:09:23, 3.58it/s] 27%|██▋ | 98581/371472 [7:50:23<21:21:49, 3.55it/s] 27%|██▋ | 98582/371472 [7:50:23<20:57:14, 3.62it/s] 27%|██▋ | 98583/371472 [7:50:24<21:39:56, 3.50it/s] 27%|██▋ | 98584/371472 [7:50:24<20:50:19, 3.64it/s] 27%|██▋ | 98585/371472 [7:50:24<20:40:01, 3.67it/s] 27%|██▋ | 98586/371472 [7:50:24<21:55:08, 3.46it/s] 27%|██▋ | 98587/371472 [7:50:25<21:35:48, 3.51it/s] 27%|██▋ | 98588/371472 [7:50:25<21:39:08, 3.50it/s] 27%|██▋ | 98589/371472 [7:50:25<20:30:16, 3.70it/s] 27%|██▋ | 98590/371472 [7:50:25<21:49:17, 3.47it/s] 27%|██▋ | 98591/371472 [7:50:26<21:14:08, 3.57it/s] 27%|██▋ | 98592/371472 [7:50:26<22:31:23, 3.37it/s] 27%|██▋ | 98593/371472 [7:50:26<21:39:07, 3.50it/s] 27%|██▋ | 98594/371472 [7:50:27<20:52:31, 3.63it/s] 27%|██▋ | 98595/371472 [7:50:27<20:54:10, 3.63it/s] 27%|██▋ | 98596/371472 [7:50:27<21:45:08, 3.48it/s] 27%|██▋ | 98597/371472 [7:50:27<21:27:07, 3.53it/s] 27%|██▋ | 98598/371472 [7:50:28<21:25:19, 3.54it/s] 27%|██▋ | 98599/371472 [7:50:28<22:13:02, 3.41it/s] 27%|██▋ | 98600/371472 [7:50:28<22:21:52, 3.39it/s] {'loss': 3.4535, 'learning_rate': 7.614686806438406e-07, 'epoch': 4.25} + 27%|██▋ | 98600/371472 [7:50:28<22:21:52, 3.39it/s] 27%|██▋ | 98601/371472 [7:50:29<21:54:16, 3.46it/s] 27%|██▋ | 98602/371472 [7:50:29<22:10:50, 3.42it/s] 27%|██▋ | 98603/371472 [7:50:29<26:13:42, 2.89it/s] 27%|██▋ | 98604/371472 [7:50:30<25:29:17, 2.97it/s] 27%|██▋ | 98605/371472 [7:50:30<24:19:32, 3.12it/s] 27%|██▋ | 98606/371472 [7:50:30<23:07:06, 3.28it/s] 27%|██▋ | 98607/371472 [7:50:31<22:55:57, 3.31it/s] 27%|██▋ | 98608/371472 [7:50:31<22:58:25, 3.30it/s] 27%|██▋ | 98609/371472 [7:50:31<21:59:07, 3.45it/s] 27%|██▋ | 98610/371472 [7:50:31<21:03:25, 3.60it/s] 27%|██▋ | 98611/371472 [7:50:32<22:20:04, 3.39it/s] 27%|██▋ | 98612/371472 [7:50:32<22:17:34, 3.40it/s] 27%|██▋ | 98613/371472 [7:50:32<21:09:15, 3.58it/s] 27%|██▋ | 98614/371472 [7:50:32<20:37:12, 3.68it/s] 27%|██▋ | 98615/371472 [7:50:33<22:48:00, 3.32it/s] 27%|██▋ | 98616/371472 [7:50:33<22:17:22, 3.40it/s] 27%|██▋ | 98617/371472 [7:50:33<22:07:02, 3.43it/s] 27%|██▋ | 98618/371472 [7:50:34<21:39:11, 3.50it/s] 27%|██▋ | 98619/371472 [7:50:34<21:28:17, 3.53it/s] 27%|██▋ | 98620/371472 [7:50:34<21:18:14, 3.56it/s] {'loss': 3.3266, 'learning_rate': 7.614201986683616e-07, 'epoch': 4.25} + 27%|██▋ | 98620/371472 [7:50:34<21:18:14, 3.56it/s] 27%|██▋ | 98621/371472 [7:50:35<21:12:58, 3.57it/s] 27%|██▋ | 98622/371472 [7:50:35<20:55:21, 3.62it/s] 27%|██▋ | 98623/371472 [7:50:35<20:30:19, 3.70it/s] 27%|██▋ | 98624/371472 [7:50:35<20:40:16, 3.67it/s] 27%|██▋ | 98625/371472 [7:50:36<21:27:03, 3.53it/s] 27%|██▋ | 98626/371472 [7:50:36<21:30:59, 3.52it/s] 27%|██▋ | 98627/371472 [7:50:36<22:07:48, 3.42it/s] 27%|██▋ | 98628/371472 [7:50:36<21:21:44, 3.55it/s] 27%|██▋ | 98629/371472 [7:50:37<21:24:58, 3.54it/s] 27%|██▋ | 98630/371472 [7:50:37<20:27:05, 3.71it/s] 27%|██▋ | 98631/371472 [7:50:37<23:14:50, 3.26it/s] 27%|██▋ | 98632/371472 [7:50:38<22:32:17, 3.36it/s] 27%|██▋ | 98633/371472 [7:50:38<21:33:36, 3.52it/s] 27%|██▋ | 98634/371472 [7:50:38<22:48:09, 3.32it/s] 27%|██▋ | 98635/371472 [7:50:39<21:57:28, 3.45it/s] 27%|██▋ | 98636/371472 [7:50:39<24:55:08, 3.04it/s] 27%|██▋ | 98637/371472 [7:50:39<23:22:32, 3.24it/s] 27%|██▋ | 98638/371472 [7:50:39<22:19:58, 3.39it/s] 27%|██▋ | 98639/371472 [7:50:40<23:00:52, 3.29it/s] 27%|██▋ | 98640/371472 [7:50:40<22:34:46, 3.36it/s] {'loss': 3.5386, 'learning_rate': 7.613717166928828e-07, 'epoch': 4.25} + 27%|██▋ | 98640/371472 [7:50:40<22:34:46, 3.36it/s] 27%|██▋ | 98641/371472 [7:50:40<21:50:12, 3.47it/s] 27%|██▋ | 98642/371472 [7:50:41<21:47:44, 3.48it/s] 27%|██▋ | 98643/371472 [7:50:41<21:08:44, 3.58it/s] 27%|██▋ | 98644/371472 [7:50:41<21:05:38, 3.59it/s] 27%|██▋ | 98645/371472 [7:50:41<20:45:41, 3.65it/s] 27%|██▋ | 98646/371472 [7:50:42<20:32:50, 3.69it/s] 27%|██▋ | 98647/371472 [7:50:42<22:42:13, 3.34it/s] 27%|██▋ | 98648/371472 [7:50:42<22:26:12, 3.38it/s] 27%|██▋ | 98649/371472 [7:50:43<21:09:17, 3.58it/s] 27%|██▋ | 98650/371472 [7:50:43<21:09:22, 3.58it/s] 27%|██▋ | 98651/371472 [7:50:43<22:12:47, 3.41it/s] 27%|██▋ | 98652/371472 [7:50:43<21:10:16, 3.58it/s] 27%|██▋ | 98653/371472 [7:50:44<20:37:11, 3.68it/s] 27%|██▋ | 98654/371472 [7:50:44<21:17:08, 3.56it/s] 27%|██▋ | 98655/371472 [7:50:44<22:56:30, 3.30it/s] 27%|██▋ | 98656/371472 [7:50:45<23:49:20, 3.18it/s] 27%|██▋ | 98657/371472 [7:50:45<22:29:49, 3.37it/s] 27%|██▋ | 98658/371472 [7:50:45<21:40:48, 3.50it/s] 27%|██▋ | 98659/371472 [7:50:46<22:42:20, 3.34it/s] 27%|██▋ | 98660/371472 [7:50:46<22:40:54, 3.34it/s] {'loss': 3.6697, 'learning_rate': 7.613232347174039e-07, 'epoch': 4.25} + 27%|██▋ | 98660/371472 [7:50:46<22:40:54, 3.34it/s] 27%|██▋ | 98661/371472 [7:50:46<21:50:30, 3.47it/s] 27%|██▋ | 98662/371472 [7:50:46<22:11:53, 3.41it/s] 27%|██▋ | 98663/371472 [7:50:47<21:34:55, 3.51it/s] 27%|██▋ | 98664/371472 [7:50:47<21:31:10, 3.52it/s] 27%|██▋ | 98665/371472 [7:50:47<21:08:04, 3.59it/s] 27%|██▋ | 98666/371472 [7:50:48<21:51:36, 3.47it/s] 27%|██▋ | 98667/371472 [7:50:48<21:26:29, 3.53it/s] 27%|██▋ | 98668/371472 [7:50:48<21:51:18, 3.47it/s] 27%|██▋ | 98669/371472 [7:50:48<21:33:40, 3.51it/s] 27%|██▋ | 98670/371472 [7:50:49<22:09:28, 3.42it/s] 27%|██▋ | 98671/371472 [7:50:49<21:51:20, 3.47it/s] 27%|██▋ | 98672/371472 [7:50:49<21:58:04, 3.45it/s] 27%|██▋ | 98673/371472 [7:50:50<21:45:24, 3.48it/s] 27%|██▋ | 98674/371472 [7:50:50<22:33:12, 3.36it/s] 27%|██▋ | 98675/371472 [7:50:50<22:40:05, 3.34it/s] 27%|██▋ | 98676/371472 [7:50:50<22:12:14, 3.41it/s] 27%|██▋ | 98677/371472 [7:50:51<21:59:29, 3.45it/s] 27%|██▋ | 98678/371472 [7:50:51<20:48:38, 3.64it/s] 27%|██▋ | 98679/371472 [7:50:51<20:54:16, 3.62it/s] 27%|██▋ | 98680/371472 [7:50:52<20:45:54, 3.65it/s] {'loss': 3.6676, 'learning_rate': 7.61274752741925e-07, 'epoch': 4.25} + 27%|██▋ | 98680/371472 [7:50:52<20:45:54, 3.65it/s] 27%|██▋ | 98681/371472 [7:50:52<21:24:45, 3.54it/s] 27%|██▋ | 98682/371472 [7:50:52<22:48:17, 3.32it/s] 27%|██▋ | 98683/371472 [7:50:52<21:39:28, 3.50it/s] 27%|██▋ | 98684/371472 [7:50:53<21:10:17, 3.58it/s] 27%|██▋ | 98685/371472 [7:50:53<21:38:49, 3.50it/s] 27%|██▋ | 98686/371472 [7:50:53<21:36:06, 3.51it/s] 27%|██▋ | 98687/371472 [7:50:54<21:18:20, 3.56it/s] 27%|██▋ | 98688/371472 [7:50:54<22:35:13, 3.35it/s] 27%|██▋ | 98689/371472 [7:50:54<22:34:11, 3.36it/s] 27%|██▋ | 98690/371472 [7:50:54<22:16:37, 3.40it/s] 27%|██▋ | 98691/371472 [7:50:55<21:46:10, 3.48it/s] 27%|██▋ | 98692/371472 [7:50:55<22:07:07, 3.43it/s] 27%|██▋ | 98693/371472 [7:50:55<21:32:03, 3.52it/s] 27%|██▋ | 98694/371472 [7:50:56<20:59:55, 3.61it/s] 27%|██▋ | 98695/371472 [7:50:56<21:41:06, 3.49it/s] 27%|█��▋ | 98696/371472 [7:50:56<21:24:35, 3.54it/s] 27%|██▋ | 98697/371472 [7:50:56<21:26:30, 3.53it/s] 27%|██▋ | 98698/371472 [7:50:57<21:57:32, 3.45it/s] 27%|██▋ | 98699/371472 [7:50:57<21:11:46, 3.57it/s] 27%|██▋ | 98700/371472 [7:50:57<20:55:30, 3.62it/s] {'loss': 3.5347, 'learning_rate': 7.612262707664461e-07, 'epoch': 4.25} + 27%|██▋ | 98700/371472 [7:50:57<20:55:30, 3.62it/s] 27%|██▋ | 98701/371472 [7:50:58<20:31:04, 3.69it/s] 27%|██▋ | 98702/371472 [7:50:58<22:11:07, 3.42it/s] 27%|██▋ | 98703/371472 [7:50:58<21:29:37, 3.53it/s] 27%|██▋ | 98704/371472 [7:50:58<22:04:01, 3.43it/s] 27%|██▋ | 98705/371472 [7:50:59<21:55:32, 3.46it/s] 27%|██▋ | 98706/371472 [7:50:59<23:46:53, 3.19it/s] 27%|██▋ | 98707/371472 [7:50:59<23:31:41, 3.22it/s] 27%|██▋ | 98708/371472 [7:51:00<23:12:40, 3.26it/s] 27%|██▋ | 98709/371472 [7:51:00<23:19:15, 3.25it/s] 27%|██▋ | 98710/371472 [7:51:00<22:25:37, 3.38it/s] 27%|██▋ | 98711/371472 [7:51:01<21:32:09, 3.52it/s] 27%|██▋ | 98712/371472 [7:51:01<21:25:55, 3.54it/s] 27%|██▋ | 98713/371472 [7:51:01<22:36:45, 3.35it/s] 27%|██▋ | 98714/371472 [7:51:01<21:53:01, 3.46it/s] 27%|██▋ | 98715/371472 [7:51:02<23:43:53, 3.19it/s] 27%|██▋ | 98716/371472 [7:51:02<23:37:19, 3.21it/s] 27%|██▋ | 98717/371472 [7:51:02<22:05:11, 3.43it/s] 27%|██▋ | 98718/371472 [7:51:03<21:30:03, 3.52it/s] 27%|██▋ | 98719/371472 [7:51:03<20:54:39, 3.62it/s] 27%|██▋ | 98720/371472 [7:51:03<22:11:30, 3.41it/s] {'loss': 3.5547, 'learning_rate': 7.611777887909673e-07, 'epoch': 4.25} + 27%|██▋ | 98720/371472 [7:51:03<22:11:30, 3.41it/s] 27%|██▋ | 98721/371472 [7:51:03<21:53:52, 3.46it/s] 27%|██▋ | 98722/371472 [7:51:04<20:59:03, 3.61it/s] 27%|██▋ | 98723/371472 [7:51:04<20:39:44, 3.67it/s] 27%|██▋ | 98724/371472 [7:51:04<21:11:37, 3.57it/s] 27%|██▋ | 98725/371472 [7:51:05<20:39:11, 3.67it/s] 27%|██▋ | 98726/371472 [7:51:05<20:45:08, 3.65it/s] 27%|██▋ | 98727/371472 [7:51:05<21:31:45, 3.52it/s] 27%|██▋ | 98728/371472 [7:51:05<21:55:28, 3.46it/s] 27%|██▋ | 98729/371472 [7:51:06<22:04:10, 3.43it/s] 27%|██▋ | 98730/371472 [7:51:06<21:34:49, 3.51it/s] 27%|██▋ | 98731/371472 [7:51:06<21:46:05, 3.48it/s] 27%|██▋ | 98732/371472 [7:51:07<21:25:16, 3.54it/s] 27%|██▋ | 98733/371472 [7:51:07<22:05:12, 3.43it/s] 27%|██▋ | 98734/371472 [7:51:07<21:18:22, 3.56it/s] 27%|██▋ | 98735/371472 [7:51:07<22:52:22, 3.31it/s] 27%|██▋ | 98736/371472 [7:51:08<22:34:47, 3.36it/s] 27%|██▋ | 98737/371472 [7:51:08<24:52:43, 3.05it/s] 27%|██▋ | 98738/371472 [7:51:08<23:34:55, 3.21it/s] 27%|██▋ | 98739/371472 [7:51:09<23:19:03, 3.25it/s] 27%|██▋ | 98740/371472 [7:51:09<22:23:16, 3.38it/s] {'loss': 3.6636, 'learning_rate': 7.611293068154883e-07, 'epoch': 4.25} + 27%|██▋ | 98740/371472 [7:51:09<22:23:16, 3.38it/s] 27%|██▋ | 98741/371472 [7:51:09<21:42:16, 3.49it/s] 27%|██▋ | 98742/371472 [7:51:10<20:58:55, 3.61it/s] 27%|██▋ | 98743/371472 [7:51:10<20:31:19, 3.69it/s] 27%|██▋ | 98744/371472 [7:51:10<20:34:59, 3.68it/s] 27%|██▋ | 98745/371472 [7:51:10<20:58:32, 3.61it/s] 27%|██▋ | 98746/371472 [7:51:11<22:21:27, 3.39it/s] 27%|██▋ | 98747/371472 [7:51:11<22:28:23, 3.37it/s] 27%|██▋ | 98748/371472 [7:51:11<22:45:48, 3.33it/s] 27%|██▋ | 98749/371472 [7:51:12<21:31:38, 3.52it/s] 27%|██▋ | 98750/371472 [7:51:12<21:57:47, 3.45it/s] 27%|██▋ | 98751/371472 [7:51:12<21:17:17, 3.56it/s] 27%|██▋ | 98752/371472 [7:51:12<20:35:25, 3.68it/s] 27%|██▋ | 98753/371472 [7:51:13<21:36:47, 3.51it/s] 27%|██▋ | 98754/371472 [7:51:13<22:26:48, 3.37it/s] 27%|██▋ | 98755/371472 [7:51:13<21:24:47, 3.54it/s] 27%|██▋ | 98756/371472 [7:51:13<20:52:53, 3.63it/s] 27%|██▋ | 98757/371472 [7:51:14<20:58:15, 3.61it/s] 27%|██▋ | 98758/371472 [7:51:14<20:35:28, 3.68it/s] 27%|██▋ | 98759/371472 [7:51:14<20:45:13, 3.65it/s] 27%|██▋ | 98760/371472 [7:51:15<20:05:28, 3.77it/s] {'loss': 3.5236, 'learning_rate': 7.610808248400094e-07, 'epoch': 4.25} + 27%|██▋ | 98760/371472 [7:51:15<20:05:28, 3.77it/s] 27%|██▋ | 98761/371472 [7:51:15<21:40:33, 3.49it/s] 27%|██▋ | 98762/371472 [7:51:15<21:17:48, 3.56it/s] 27%|██▋ | 98763/371472 [7:51:16<22:45:32, 3.33it/s] 27%|██▋ | 98764/371472 [7:51:16<22:52:53, 3.31it/s] 27%|██▋ | 98765/371472 [7:51:16<22:55:40, 3.30it/s] 27%|██▋ | 98766/371472 [7:51:16<21:59:27, 3.44it/s] 27%|██▋ | 98767/371472 [7:51:17<22:21:45, 3.39it/s] 27%|██▋ | 98768/371472 [7:51:17<22:59:20, 3.30it/s] 27%|██▋ | 98769/371472 [7:51:17<22:59:13, 3.30it/s] 27%|██▋ | 98770/371472 [7:51:18<24:02:28, 3.15it/s] 27%|██▋ | 98771/371472 [7:51:18<23:01:33, 3.29it/s] 27%|██▋ | 98772/371472 [7:51:18<22:53:31, 3.31it/s] 27%|██▋ | 98773/371472 [7:51:19<23:19:33, 3.25it/s] 27%|██▋ | 98774/371472 [7:51:19<23:19:55, 3.25it/s] 27%|██▋ | 98775/371472 [7:51:19<22:37:14, 3.35it/s] 27%|██▋ | 98776/371472 [7:51:19<21:53:54, 3.46it/s] 27%|██▋ | 98777/371472 [7:51:20<20:52:51, 3.63it/s] 27%|██▋ | 98778/371472 [7:51:20<21:06:43, 3.59it/s] 27%|██▋ | 98779/371472 [7:51:20<21:55:44, 3.45it/s] 27%|██▋ | 98780/371472 [7:51:21<21:18:26, 3.55it/s] {'loss': 3.5066, 'learning_rate': 7.610323428645305e-07, 'epoch': 4.25} + 27%|██▋ | 98780/371472 [7:51:21<21:18:26, 3.55it/s] 27%|██▋ | 98781/371472 [7:51:21<20:35:45, 3.68it/s] 27%|██▋ | 98782/371472 [7:51:21<20:31:40, 3.69it/s] 27%|██▋ | 98783/371472 [7:51:21<20:55:49, 3.62it/s] 27%|██▋ | 98784/371472 [7:51:22<20:34:33, 3.68it/s] 27%|██▋ | 98785/371472 [7:51:22<20:31:16, 3.69it/s] 27%|██▋ | 98786/371472 [7:51:22<19:58:53, 3.79it/s] 27%|██▋ | 98787/371472 [7:51:22<19:49:22, 3.82it/s] 27%|██▋ | 98788/371472 [7:51:23<20:26:28, 3.71it/s] 27%|██▋ | 98789/371472 [7:51:23<21:18:50, 3.55it/s] 27%|██▋ | 98790/371472 [7:51:23<21:12:06, 3.57it/s] 27%|██▋ | 98791/371472 [7:51:23<20:49:03, 3.64it/s] 27%|██▋ | 98792/371472 [7:51:24<21:19:49, 3.55it/s] 27%|██▋ | 98793/371472 [7:51:24<20:44:32, 3.65it/s] 27%|██▋ | 98794/371472 [7:51:24<20:42:08, 3.66it/s] 27%|██▋ | 98795/371472 [7:51:25<22:16:37, 3.40it/s] 27%|██▋ | 98796/371472 [7:51:25<22:09:44, 3.42it/s] 27%|██▋ | 98797/371472 [7:51:25<22:43:14, 3.33it/s] 27%|██▋ | 98798/371472 [7:51:26<22:14:55, 3.40it/s] 27%|██▋ | 98799/371472 [7:51:26<21:59:39, 3.44it/s] 27%|██▋ | 98800/371472 [7:51:26<22:03:58, 3.43it/s] {'loss': 3.7395, 'learning_rate': 7.609838608890517e-07, 'epoch': 4.26} + 27%|██▋ | 98800/371472 [7:51:26<22:03:58, 3.43it/s] 27%|██▋ | 98801/371472 [7:51:26<22:11:54, 3.41it/s] 27%|██▋ | 98802/371472 [7:51:27<22:01:41, 3.44it/s] 27%|██▋ | 98803/371472 [7:51:27<21:50:37, 3.47it/s] 27%|██▋ | 98804/371472 [7:51:27<21:51:05, 3.47it/s] 27%|██▋ | 98805/371472 [7:51:28<21:14:17, 3.57it/s] 27%|██▋ | 98806/371472 [7:51:28<20:45:35, 3.65it/s] 27%|██▋ | 98807/371472 [7:51:28<21:34:16, 3.51it/s] 27%|██▋ | 98808/371472 [7:51:28<21:27:23, 3.53it/s] 27%|██▋ | 98809/371472 [7:51:29<21:14:43, 3.57it/s] 27%|██▋ | 98810/371472 [7:51:29<20:51:22, 3.63it/s] 27%|██▋ | 98811/371472 [7:51:29<20:58:00, 3.61it/s] 27%|██▋ | 98812/371472 [7:51:29<20:26:21, 3.71it/s] 27%|██▋ | 98813/371472 [7:51:30<20:11:57, 3.75it/s] 27%|██▋ | 98814/371472 [7:51:30<21:09:02, 3.58it/s] 27%|██▋ | 98815/371472 [7:51:30<21:12:45, 3.57it/s] 27%|██▋ | 98816/371472 [7:51:31<23:32:09, 3.22it/s] 27%|██▋ | 98817/371472 [7:51:31<24:00:42, 3.15it/s] 27%|██▋ | 98818/371472 [7:51:31<23:43:32, 3.19it/s] 27%|██▋ | 98819/371472 [7:51:32<22:58:21, 3.30it/s] 27%|██▋ | 98820/371472 [7:51:32<22:55:32, 3.30it/s] {'loss': 3.6356, 'learning_rate': 7.609353789135728e-07, 'epoch': 4.26} + 27%|██▋ | 98820/371472 [7:51:32<22:55:32, 3.30it/s] 27%|██▋ | 98821/371472 [7:51:32<22:01:35, 3.44it/s] 27%|██▋ | 98822/371472 [7:51:32<21:01:55, 3.60it/s] 27%|██▋ | 98823/371472 [7:51:33<20:24:28, 3.71it/s] 27%|██▋ | 98824/371472 [7:51:33<19:47:46, 3.83it/s] 27%|██▋ | 98825/371472 [7:51:33<19:49:54, 3.82it/s] 27%|██▋ | 98826/371472 [7:51:33<20:07:04, 3.76it/s] 27%|██▋ | 98827/371472 [7:51:34<21:09:45, 3.58it/s] 27%|██▋ | 98828/371472 [7:51:34<20:23:29, 3.71it/s] 27%|██▋ | 98829/371472 [7:51:34<20:18:21, 3.73it/s] 27%|���█▋ | 98830/371472 [7:51:35<21:29:17, 3.52it/s] 27%|██▋ | 98831/371472 [7:51:35<21:01:35, 3.60it/s] 27%|██▋ | 98832/371472 [7:51:35<20:52:56, 3.63it/s] 27%|██▋ | 98833/371472 [7:51:35<20:26:38, 3.70it/s] 27%|██▋ | 98834/371472 [7:51:36<20:15:28, 3.74it/s] 27%|██▋ | 98835/371472 [7:51:36<21:06:22, 3.59it/s] 27%|██▋ | 98836/371472 [7:51:36<20:52:53, 3.63it/s] 27%|██▋ | 98837/371472 [7:51:36<20:37:55, 3.67it/s] 27%|██▋ | 98838/371472 [7:51:37<21:07:08, 3.59it/s] 27%|██▋ | 98839/371472 [7:51:37<21:56:11, 3.45it/s] 27%|██▋ | 98840/371472 [7:51:37<21:10:05, 3.58it/s] {'loss': 3.539, 'learning_rate': 7.608868969380939e-07, 'epoch': 4.26} + 27%|██▋ | 98840/371472 [7:51:37<21:10:05, 3.58it/s] 27%|██▋ | 98841/371472 [7:51:38<23:52:29, 3.17it/s] 27%|██▋ | 98842/371472 [7:51:38<22:58:21, 3.30it/s] 27%|██▋ | 98843/371472 [7:51:38<22:39:19, 3.34it/s] 27%|██▋ | 98844/371472 [7:51:39<21:36:21, 3.51it/s] 27%|██▋ | 98845/371472 [7:51:39<21:38:01, 3.50it/s] 27%|██▋ | 98846/371472 [7:51:39<21:22:01, 3.54it/s] 27%|██▋ | 98847/371472 [7:51:39<21:35:34, 3.51it/s] 27%|██▋ | 98848/371472 [7:51:40<21:01:08, 3.60it/s] 27%|██▋ | 98849/371472 [7:51:40<20:41:56, 3.66it/s] 27%|██▋ | 98850/371472 [7:51:40<22:24:59, 3.38it/s] 27%|██▋ | 98851/371472 [7:51:41<21:35:17, 3.51it/s] 27%|██▋ | 98852/371472 [7:51:41<22:32:02, 3.36it/s] 27%|██▋ | 98853/371472 [7:51:41<21:45:26, 3.48it/s] 27%|██▋ | 98854/371472 [7:51:41<22:09:47, 3.42it/s] 27%|██▋ | 98855/371472 [7:51:42<22:37:07, 3.35it/s] 27%|██▋ | 98856/371472 [7:51:42<21:49:22, 3.47it/s] 27%|██▋ | 98857/371472 [7:51:42<23:22:11, 3.24it/s] 27%|██▋ | 98858/371472 [7:51:43<23:29:25, 3.22it/s] 27%|██▋ | 98859/371472 [7:51:43<22:50:13, 3.32it/s] 27%|██▋ | 98860/371472 [7:51:43<21:52:30, 3.46it/s] {'loss': 3.5006, 'learning_rate': 7.60838414962615e-07, 'epoch': 4.26} + 27%|██▋ | 98860/371472 [7:51:43<21:52:30, 3.46it/s] 27%|██▋ | 98861/371472 [7:51:43<21:02:44, 3.60it/s] 27%|██▋ | 98862/371472 [7:51:44<20:26:01, 3.71it/s] 27%|██▋ | 98863/371472 [7:51:44<20:06:50, 3.76it/s] 27%|██▋ | 98864/371472 [7:51:44<19:45:02, 3.83it/s] 27%|██▋ | 98865/371472 [7:51:45<19:53:53, 3.81it/s] 27%|██▋ | 98866/371472 [7:51:45<20:44:43, 3.65it/s] 27%|██▋ | 98867/371472 [7:51:45<21:17:42, 3.56it/s] 27%|██▋ | 98868/371472 [7:51:45<21:11:28, 3.57it/s] 27%|██▋ | 98869/371472 [7:51:46<22:26:58, 3.37it/s] 27%|██▋ | 98870/371472 [7:51:46<21:56:45, 3.45it/s] 27%|██▋ | 98871/371472 [7:51:46<22:39:32, 3.34it/s] 27%|██▋ | 98872/371472 [7:51:47<22:05:44, 3.43it/s] 27%|██▋ | 98873/371472 [7:51:47<21:45:29, 3.48it/s] 27%|██▋ | 98874/371472 [7:51:47<23:50:37, 3.18it/s] 27%|██▋ | 98875/371472 [7:51:48<22:32:01, 3.36it/s] 27%|██▋ | 98876/371472 [7:51:48<22:00:57, 3.44it/s] 27%|██▋ | 98877/371472 [7:51:48<23:29:54, 3.22it/s] 27%|██▋ | 98878/371472 [7:51:48<22:50:45, 3.31it/s] 27%|██▋ | 98879/371472 [7:51:49<22:08:16, 3.42it/s] 27%|██▋ | 98880/371472 [7:51:49<23:04:45, 3.28it/s] {'loss': 3.7159, 'learning_rate': 7.607899329871361e-07, 'epoch': 4.26} + 27%|██▋ | 98880/371472 [7:51:49<23:04:45, 3.28it/s] 27%|██▋ | 98881/371472 [7:51:49<22:15:27, 3.40it/s] 27%|██▋ | 98882/371472 [7:51:50<21:32:34, 3.51it/s] 27%|██▋ | 98883/371472 [7:51:50<20:27:55, 3.70it/s] 27%|██▋ | 98884/371472 [7:51:50<20:14:36, 3.74it/s] 27%|██▋ | 98885/371472 [7:51:50<19:45:41, 3.83it/s] 27%|██▋ | 98886/371472 [7:51:51<19:48:34, 3.82it/s] 27%|██▋ | 98887/371472 [7:51:51<20:05:49, 3.77it/s] 27%|██▋ | 98888/371472 [7:51:51<20:20:31, 3.72it/s] 27%|██▋ | 98889/371472 [7:51:51<20:29:41, 3.69it/s] 27%|██▋ | 98890/371472 [7:51:52<20:50:31, 3.63it/s] 27%|██▋ | 98891/371472 [7:51:52<20:44:03, 3.65it/s] 27%|██▋ | 98892/371472 [7:51:52<20:24:51, 3.71it/s] 27%|██▋ | 98893/371472 [7:51:53<22:20:13, 3.39it/s] 27%|██▋ | 98894/371472 [7:51:53<23:20:10, 3.24it/s] 27%|██▋ | 98895/371472 [7:51:53<21:58:50, 3.44it/s] 27%|██▋ | 98896/371472 [7:51:53<21:34:18, 3.51it/s] 27%|██▋ | 98897/371472 [7:51:54<21:23:56, 3.54it/s] 27%|██▋ | 98898/371472 [7:51:54<21:14:43, 3.56it/s] 27%|██▋ | 98899/371472 [7:51:54<21:50:25, 3.47it/s] 27%|██▋ | 98900/371472 [7:51:55<21:13:42, 3.57it/s] {'loss': 3.672, 'learning_rate': 7.607414510116572e-07, 'epoch': 4.26} + 27%|██▋ | 98900/371472 [7:51:55<21:13:42, 3.57it/s] 27%|██▋ | 98901/371472 [7:51:55<20:56:02, 3.62it/s] 27%|██▋ | 98902/371472 [7:51:55<20:14:42, 3.74it/s] 27%|██▋ | 98903/371472 [7:51:55<21:55:22, 3.45it/s] 27%|██▋ | 98904/371472 [7:51:56<22:40:29, 3.34it/s] 27%|██▋ | 98905/371472 [7:51:56<21:35:10, 3.51it/s] 27%|██▋ | 98906/371472 [7:51:56<22:52:54, 3.31it/s] 27%|██▋ | 98907/371472 [7:51:57<22:27:42, 3.37it/s] 27%|██▋ | 98908/371472 [7:51:57<24:09:37, 3.13it/s] 27%|██▋ | 98909/371472 [7:51:57<23:03:16, 3.28it/s] 27%|██▋ | 98910/371472 [7:51:57<22:07:23, 3.42it/s] 27%|██▋ | 98911/371472 [7:51:58<21:34:27, 3.51it/s] 27%|██▋ | 98912/371472 [7:51:58<22:50:51, 3.31it/s] 27%|██▋ | 98913/371472 [7:51:58<22:49:28, 3.32it/s] 27%|██▋ | 98914/371472 [7:51:59<22:26:28, 3.37it/s] 27%|██▋ | 98915/371472 [7:51:59<22:00:21, 3.44it/s] 27%|██▋ | 98916/371472 [7:51:59<21:58:54, 3.44it/s] 27%|██▋ | 98917/371472 [7:52:00<22:51:11, 3.31it/s] 27%|██▋ | 98918/371472 [7:52:00<24:07:11, 3.14it/s] 27%|██▋ | 98919/371472 [7:52:00<22:53:35, 3.31it/s] 27%|██▋ | 98920/371472 [7:52:00<21:29:28, 3.52it/s] {'loss': 3.5287, 'learning_rate': 7.606929690361784e-07, 'epoch': 4.26} + 27%|██▋ | 98920/371472 [7:52:00<21:29:28, 3.52it/s] 27%|██▋ | 98921/371472 [7:52:01<21:13:22, 3.57it/s] 27%|██▋ | 98922/371472 [7:52:01<21:16:36, 3.56it/s] 27%|██▋ | 98923/371472 [7:52:01<20:49:06, 3.64it/s] 27%|██▋ | 98924/371472 [7:52:02<21:38:51, 3.50it/s] 27%|██▋ | 98925/371472 [7:52:02<21:21:34, 3.54it/s] 27%|██▋ | 98926/371472 [7:52:02<21:36:02, 3.50it/s] 27%|██▋ | 98927/371472 [7:52:02<21:40:01, 3.49it/s] 27%|██▋ | 98928/371472 [7:52:03<21:10:52, 3.57it/s] 27%|██▋ | 98929/371472 [7:52:03<21:26:49, 3.53it/s] 27%|██▋ | 98930/371472 [7:52:03<21:16:19, 3.56it/s] 27%|██▋ | 98931/371472 [7:52:04<21:02:46, 3.60it/s] 27%|██▋ | 98932/371472 [7:52:04<21:06:30, 3.59it/s] 27%|██▋ | 98933/371472 [7:52:04<21:02:03, 3.60it/s] 27%|██▋ | 98934/371472 [7:52:04<20:49:15, 3.64it/s] 27%|██▋ | 98935/371472 [7:52:05<21:04:13, 3.59it/s] 27%|██▋ | 98936/371472 [7:52:05<21:14:56, 3.56it/s] 27%|██▋ | 98937/371472 [7:52:05<21:11:13, 3.57it/s] 27%|██▋ | 98938/371472 [7:52:05<20:47:29, 3.64it/s] 27%|██▋ | 98939/371472 [7:52:06<20:28:13, 3.70it/s] 27%|██▋ | 98940/371472 [7:52:06<20:05:58, 3.77it/s] {'loss': 3.5974, 'learning_rate': 7.606444870606994e-07, 'epoch': 4.26} + 27%|██▋ | 98940/371472 [7:52:06<20:05:58, 3.77it/s] 27%|██▋ | 98941/371472 [7:52:06<20:18:45, 3.73it/s] 27%|██▋ | 98942/371472 [7:52:07<22:14:22, 3.40it/s] 27%|██▋ | 98943/371472 [7:52:07<23:17:03, 3.25it/s] 27%|██▋ | 98944/371472 [7:52:07<24:15:41, 3.12it/s] 27%|██▋ | 98945/371472 [7:52:08<23:16:14, 3.25it/s] 27%|██▋ | 98946/371472 [7:52:08<22:33:15, 3.36it/s] 27%|██▋ | 98947/371472 [7:52:08<25:02:48, 3.02it/s] 27%|██▋ | 98948/371472 [7:52:09<23:54:17, 3.17it/s] 27%|██▋ | 98949/371472 [7:52:09<23:21:26, 3.24it/s] 27%|██▋ | 98950/371472 [7:52:09<24:09:11, 3.13it/s] 27%|██▋ | 98951/371472 [7:52:09<23:11:55, 3.26it/s] 27%|██▋ | 98952/371472 [7:52:10<23:05:42, 3.28it/s] 27%|██▋ | 98953/371472 [7:52:10<21:46:51, 3.48it/s] 27%|██▋ | 98954/371472 [7:52:10<21:03:19, 3.60it/s] 27%|██▋ | 98955/371472 [7:52:11<20:14:33, 3.74it/s] 27%|██▋ | 98956/371472 [7:52:11<22:08:46, 3.42it/s] 27%|██▋ | 98957/371472 [7:52:11<21:26:16, 3.53it/s] 27%|██▋ | 98958/371472 [7:52:11<20:48:54, 3.64it/s] 27%|██▋ | 98959/371472 [7:52:12<20:38:18, 3.67it/s] 27%|██▋ | 98960/371472 [7:52:12<20:01:27, 3.78it/s] {'loss': 3.565, 'learning_rate': 7.605960050852206e-07, 'epoch': 4.26} + 27%|██▋ | 98960/371472 [7:52:12<20:01:27, 3.78it/s] 27%|██▋ | 98961/371472 [7:52:12<20:18:11, 3.73it/s] 27%|██▋ | 98962/371472 [7:52:12<20:34:22, 3.68it/s] 27%|██▋ | 98963/371472 [7:52:13<20:55:07, 3.62it/s] 27%|���█▋ | 98964/371472 [7:52:13<20:18:33, 3.73it/s] 27%|██▋ | 98965/371472 [7:52:13<20:25:26, 3.71it/s] 27%|██▋ | 98966/371472 [7:52:14<22:15:09, 3.40it/s] 27%|██▋ | 98967/371472 [7:52:14<21:50:34, 3.47it/s] 27%|██▋ | 98968/371472 [7:52:14<21:03:47, 3.59it/s] 27%|██▋ | 98969/371472 [7:52:15<23:42:45, 3.19it/s] 27%|██▋ | 98970/371472 [7:52:15<23:45:36, 3.19it/s] 27%|██▋ | 98971/371472 [7:52:15<22:47:17, 3.32it/s] 27%|██▋ | 98972/371472 [7:52:15<21:40:55, 3.49it/s] 27%|██▋ | 98973/371472 [7:52:16<21:07:46, 3.58it/s] 27%|██▋ | 98974/371472 [7:52:16<21:15:28, 3.56it/s] 27%|██▋ | 98975/371472 [7:52:16<22:06:40, 3.42it/s] 27%|██▋ | 98976/371472 [7:52:16<21:31:46, 3.52it/s] 27%|██▋ | 98977/371472 [7:52:17<21:21:51, 3.54it/s] 27%|██▋ | 98978/371472 [7:52:17<22:22:58, 3.38it/s] 27%|██▋ | 98979/371472 [7:52:17<22:24:08, 3.38it/s] 27%|██▋ | 98980/371472 [7:52:18<21:53:04, 3.46it/s] {'loss': 3.5943, 'learning_rate': 7.605475231097417e-07, 'epoch': 4.26} + 27%|██▋ | 98980/371472 [7:52:18<21:53:04, 3.46it/s] 27%|██▋ | 98981/371472 [7:52:18<21:33:58, 3.51it/s] 27%|██▋ | 98982/371472 [7:52:18<21:55:23, 3.45it/s] 27%|██▋ | 98983/371472 [7:52:19<21:15:37, 3.56it/s] 27%|██▋ | 98984/371472 [7:52:19<21:02:57, 3.60it/s] 27%|██▋ | 98985/371472 [7:52:19<23:16:20, 3.25it/s] 27%|██▋ | 98986/371472 [7:52:19<21:53:40, 3.46it/s] 27%|██▋ | 98987/371472 [7:52:20<20:59:27, 3.61it/s] 27%|██▋ | 98988/371472 [7:52:20<20:27:52, 3.70it/s] 27%|██▋ | 98989/371472 [7:52:20<20:42:27, 3.66it/s] 27%|██▋ | 98990/371472 [7:52:20<20:45:26, 3.65it/s] 27%|██▋ | 98991/371472 [7:52:21<20:46:15, 3.64it/s] 27%|██▋ | 98992/371472 [7:52:21<19:57:31, 3.79it/s] 27%|██▋ | 98993/371472 [7:52:21<20:59:15, 3.61it/s] 27%|██▋ | 98994/371472 [7:52:22<21:21:29, 3.54it/s] 27%|██▋ | 98995/371472 [7:52:22<21:16:27, 3.56it/s] 27%|██▋ | 98996/371472 [7:52:22<21:42:08, 3.49it/s] 27%|██▋ | 98997/371472 [7:52:22<21:12:46, 3.57it/s] 27%|██▋ | 98998/371472 [7:52:23<20:42:01, 3.66it/s] 27%|██▋ | 98999/371472 [7:52:23<23:00:59, 3.29it/s] 27%|██▋ | 99000/371472 [7:52:23<22:28:36, 3.37it/s] {'loss': 3.6006, 'learning_rate': 7.604990411342626e-07, 'epoch': 4.26} + 27%|██▋ | 99000/371472 [7:52:23<22:28:36, 3.37it/s] 27%|██▋ | 99001/371472 [7:52:24<22:27:47, 3.37it/s] 27%|██▋ | 99002/371472 [7:52:24<21:11:47, 3.57it/s] 27%|██▋ | 99003/371472 [7:52:24<21:56:06, 3.45it/s] 27%|██▋ | 99004/371472 [7:52:24<21:36:07, 3.50it/s] 27%|██▋ | 99005/371472 [7:52:25<21:06:26, 3.59it/s] 27%|██▋ | 99006/371472 [7:52:25<20:47:24, 3.64it/s] 27%|██▋ | 99007/371472 [7:52:25<20:55:36, 3.62it/s] 27%|██▋ | 99008/371472 [7:52:26<20:56:55, 3.61it/s] 27%|██▋ | 99009/371472 [7:52:26<20:47:13, 3.64it/s] 27%|██▋ | 99010/371472 [7:52:26<20:45:52, 3.64it/s] 27%|██▋ | 99011/371472 [7:52:26<20:24:35, 3.71it/s] 27%|██▋ | 99012/371472 [7:52:27<20:58:31, 3.61it/s] 27%|██▋ | 99013/371472 [7:52:27<20:46:09, 3.64it/s] 27%|██▋ | 99014/371472 [7:52:27<21:55:40, 3.45it/s] 27%|██▋ | 99015/371472 [7:52:28<21:31:06, 3.52it/s] 27%|██▋ | 99016/371472 [7:52:28<21:22:44, 3.54it/s] 27%|██▋ | 99017/371472 [7:52:28<21:31:22, 3.52it/s] 27%|██▋ | 99018/371472 [7:52:28<21:36:33, 3.50it/s] 27%|██▋ | 99019/371472 [7:52:29<21:09:24, 3.58it/s] 27%|██▋ | 99020/371472 [7:52:29<21:46:49, 3.47it/s] {'loss': 3.566, 'learning_rate': 7.604505591587838e-07, 'epoch': 4.26} + 27%|██▋ | 99020/371472 [7:52:29<21:46:49, 3.47it/s] 27%|██▋ | 99021/371472 [7:52:29<21:47:36, 3.47it/s] 27%|██▋ | 99022/371472 [7:52:30<22:29:11, 3.37it/s] 27%|██▋ | 99023/371472 [7:52:30<22:18:01, 3.39it/s] 27%|██▋ | 99024/371472 [7:52:30<21:19:10, 3.55it/s] 27%|██▋ | 99025/371472 [7:52:30<20:59:18, 3.61it/s] 27%|██▋ | 99026/371472 [7:52:31<20:31:38, 3.69it/s] 27%|██▋ | 99027/371472 [7:52:31<21:11:55, 3.57it/s] 27%|██▋ | 99028/371472 [7:52:31<20:53:22, 3.62it/s] 27%|██▋ | 99029/371472 [7:52:31<20:22:31, 3.71it/s] 27%|██▋ | 99030/371472 [7:52:32<20:23:42, 3.71it/s] 27%|██▋ | 99031/371472 [7:52:32<20:35:36, 3.67it/s] 27%|██▋ | 99032/371472 [7:52:32<21:05:57, 3.59it/s] 27%|██▋ | 99033/371472 [7:52:33<20:47:13, 3.64it/s] 27%|██▋ | 99034/371472 [7:52:33<21:31:19, 3.52it/s] 27%|██▋ | 99035/371472 [7:52:33<23:24:12, 3.23it/s] 27%|██▋ | 99036/371472 [7:52:34<23:04:37, 3.28it/s] 27%|██▋ | 99037/371472 [7:52:34<24:00:20, 3.15it/s] 27%|██▋ | 99038/371472 [7:52:34<24:57:50, 3.03it/s] 27%|██▋ | 99039/371472 [7:52:35<25:17:33, 2.99it/s] 27%|██▋ | 99040/371472 [7:52:35<23:32:03, 3.22it/s] {'loss': 3.6672, 'learning_rate': 7.60402077183305e-07, 'epoch': 4.27} + 27%|██▋ | 99040/371472 [7:52:35<23:32:03, 3.22it/s] 27%|██▋ | 99041/371472 [7:52:35<23:15:41, 3.25it/s] 27%|██▋ | 99042/371472 [7:52:35<22:14:38, 3.40it/s] 27%|██▋ | 99043/371472 [7:52:36<21:56:25, 3.45it/s] 27%|██▋ | 99044/371472 [7:52:36<22:09:41, 3.41it/s] 27%|██▋ | 99045/371472 [7:52:36<22:14:07, 3.40it/s] 27%|██▋ | 99046/371472 [7:52:37<21:47:02, 3.47it/s] 27%|██▋ | 99047/371472 [7:52:37<21:58:25, 3.44it/s] 27%|██▋ | 99048/371472 [7:52:37<21:29:48, 3.52it/s] 27%|██▋ | 99049/371472 [7:52:37<20:41:32, 3.66it/s] 27%|██▋ | 99050/371472 [7:52:38<22:44:34, 3.33it/s] 27%|██▋ | 99051/371472 [7:52:38<22:12:52, 3.41it/s] 27%|██▋ | 99052/371472 [7:52:38<22:19:25, 3.39it/s] 27%|██▋ | 99053/371472 [7:52:39<21:52:24, 3.46it/s] 27%|██▋ | 99054/371472 [7:52:39<22:17:21, 3.39it/s] 27%|██▋ | 99055/371472 [7:52:39<21:10:11, 3.57it/s] 27%|██▋ | 99056/371472 [7:52:39<21:20:14, 3.55it/s] 27%|██▋ | 99057/371472 [7:52:40<20:51:54, 3.63it/s] 27%|██▋ | 99058/371472 [7:52:40<20:19:27, 3.72it/s] 27%|██▋ | 99059/371472 [7:52:40<20:09:41, 3.75it/s] 27%|██▋ | 99060/371472 [7:52:40<21:00:10, 3.60it/s] {'loss': 3.4539, 'learning_rate': 7.60353595207826e-07, 'epoch': 4.27} + 27%|██▋ | 99060/371472 [7:52:40<21:00:10, 3.60it/s] 27%|██▋ | 99061/371472 [7:52:41<20:24:13, 3.71it/s] 27%|██▋ | 99062/371472 [7:52:41<20:15:51, 3.73it/s] 27%|██▋ | 99063/371472 [7:52:41<20:14:51, 3.74it/s] 27%|██▋ | 99064/371472 [7:52:42<19:49:51, 3.82it/s] 27%|██▋ | 99065/371472 [7:52:42<19:35:54, 3.86it/s] 27%|██▋ | 99066/371472 [7:52:42<21:26:01, 3.53it/s] 27%|██▋ | 99067/371472 [7:52:42<21:07:27, 3.58it/s] 27%|██▋ | 99068/371472 [7:52:43<21:01:37, 3.60it/s] 27%|██▋ | 99069/371472 [7:52:43<21:26:43, 3.53it/s] 27%|██▋ | 99070/371472 [7:52:43<21:10:55, 3.57it/s] 27%|██▋ | 99071/371472 [7:52:43<20:58:38, 3.61it/s] 27%|██▋ | 99072/371472 [7:52:44<21:10:51, 3.57it/s] 27%|██▋ | 99073/371472 [7:52:44<22:02:02, 3.43it/s] 27%|██▋ | 99074/371472 [7:52:44<21:20:54, 3.54it/s] 27%|██▋ | 99075/371472 [7:52:45<20:35:52, 3.67it/s] 27%|██▋ | 99076/371472 [7:52:45<20:25:16, 3.71it/s] 27%|██▋ | 99077/371472 [7:52:45<20:38:05, 3.67it/s] 27%|██▋ | 99078/371472 [7:52:45<20:49:58, 3.63it/s] 27%|██▋ | 99079/371472 [7:52:46<20:52:34, 3.62it/s] 27%|██▋ | 99080/371472 [7:52:46<20:36:01, 3.67it/s] {'loss': 3.8522, 'learning_rate': 7.603051132323471e-07, 'epoch': 4.27} + 27%|██▋ | 99080/371472 [7:52:46<20:36:01, 3.67it/s] 27%|██▋ | 99081/371472 [7:52:46<20:55:15, 3.62it/s] 27%|██▋ | 99082/371472 [7:52:47<23:18:37, 3.25it/s] 27%|██▋ | 99083/371472 [7:52:47<22:46:08, 3.32it/s] 27%|██▋ | 99084/371472 [7:52:47<22:33:20, 3.35it/s] 27%|██▋ | 99085/371472 [7:52:48<23:18:07, 3.25it/s] 27%|██▋ | 99086/371472 [7:52:48<22:42:17, 3.33it/s] 27%|██▋ | 99087/371472 [7:52:48<22:35:07, 3.35it/s] 27%|██▋ | 99088/371472 [7:52:48<23:10:43, 3.26it/s] 27%|██▋ | 99089/371472 [7:52:49<21:47:37, 3.47it/s] 27%|██▋ | 99090/371472 [7:52:49<21:38:25, 3.50it/s] 27%|██▋ | 99091/371472 [7:52:49<20:56:43, 3.61it/s] 27%|██▋ | 99092/371472 [7:52:50<22:07:51, 3.42it/s] 27%|██▋ | 99093/371472 [7:52:50<21:37:42, 3.50it/s] 27%|██▋ | 99094/371472 [7:52:50<21:09:07, 3.58it/s] 27%|██▋ | 99095/371472 [7:52:50<21:24:04, 3.54it/s] 27%|██▋ | 99096/371472 [7:52:51<21:13:13, 3.57it/s] 27%|██▋ | 99097/371472 [7:52:51<20:54:25, 3.62it/s] 27%|██▋ | 99098/371472 [7:52:51<20:47:30, 3.64it/s] 27%|██▋ | 99099/371472 [7:52:51<21:16:30, 3.56it/s] 27%|██▋ | 99100/371472 [7:52:52<20:38:47, 3.66it/s] {'loss': 3.6473, 'learning_rate': 7.602566312568683e-07, 'epoch': 4.27} + 27%|██▋ | 99100/371472 [7:52:52<20:38:47, 3.66it/s] 27%|██▋ | 99101/371472 [7:52:52<20:25:20, 3.70it/s] 27%|██▋ | 99102/371472 [7:52:52<20:18:35, 3.73it/s] 27%|██▋ | 99103/371472 [7:52:53<20:11:29, 3.75it/s] 27%|██▋ | 99104/371472 [7:52:53<20:14:38, 3.74it/s] 27%|██▋ | 99105/371472 [7:52:53<21:07:35, 3.58it/s] 27%|██▋ | 99106/371472 [7:52:53<21:34:02, 3.51it/s] 27%|██▋ | 99107/371472 [7:52:54<21:26:51, 3.53it/s] 27%|██▋ | 99108/371472 [7:52:54<21:29:30, 3.52it/s] 27%|██▋ | 99109/371472 [7:52:54<21:06:33, 3.58it/s] 27%|██▋ | 99110/371472 [7:52:55<21:22:29, 3.54it/s] 27%|██▋ | 99111/371472 [7:52:55<22:24:13, 3.38it/s] 27%|██▋ | 99112/371472 [7:52:55<23:41:16, 3.19it/s] 27%|██▋ | 99113/371472 [7:52:55<22:32:15, 3.36it/s] 27%|██▋ | 99114/371472 [7:52:56<23:08:01, 3.27it/s] 27%|██▋ | 99115/371472 [7:52:56<23:45:09, 3.19it/s] 27%|██▋ | 99116/371472 [7:52:56<22:51:14, 3.31it/s] 27%|██▋ | 99117/371472 [7:52:57<21:27:08, 3.53it/s] 27%|██▋ | 99118/371472 [7:52:57<21:28:23, 3.52it/s] 27%|██▋ | 99119/371472 [7:52:57<21:39:58, 3.49it/s] 27%|██▋ | 99120/371472 [7:52:57<21:28:46, 3.52it/s] {'loss': 3.5483, 'learning_rate': 7.602081492813893e-07, 'epoch': 4.27} + 27%|██▋ | 99120/371472 [7:52:57<21:28:46, 3.52it/s] 27%|██▋ | 99121/371472 [7:52:58<22:28:26, 3.37it/s] 27%|██▋ | 99122/371472 [7:52:58<21:23:00, 3.54it/s] 27%|██▋ | 99123/371472 [7:52:58<21:15:34, 3.56it/s] 27%|██▋ | 99124/371472 [7:52:59<21:12:32, 3.57it/s] 27%|██▋ | 99125/371472 [7:52:59<21:28:46, 3.52it/s] 27%|██▋ | 99126/371472 [7:52:59<20:52:48, 3.62it/s] 27%|██▋ | 99127/371472 [7:52:59<21:01:05, 3.60it/s] 27%|██▋ | 99128/371472 [7:53:00<24:01:24, 3.15it/s] 27%|██▋ | 99129/371472 [7:53:00<23:32:42, 3.21it/s] 27%|██▋ | 99130/371472 [7:53:01<24:13:07, 3.12it/s] 27%|██▋ | 99131/371472 [7:53:01<24:12:33, 3.12it/s] 27%|██▋ | 99132/371472 [7:53:01<22:53:05, 3.31it/s] 27%|██▋ | 99133/371472 [7:53:01<22:23:32, 3.38it/s] 27%|██▋ | 99134/371472 [7:53:02<21:28:12, 3.52it/s] 27%|██▋ | 99135/371472 [7:53:02<21:43:45, 3.48it/s] 27%|██▋ | 99136/371472 [7:53:02<21:10:52, 3.57it/s] 27%|██▋ | 99137/371472 [7:53:02<21:00:30, 3.60it/s] 27%|██▋ | 99138/371472 [7:53:03<21:40:26, 3.49it/s] 27%|██▋ | 99139/371472 [7:53:03<21:51:49, 3.46it/s] 27%|██▋ | 99140/371472 [7:53:03<21:08:35, 3.58it/s] {'loss': 3.4412, 'learning_rate': 7.601596673059104e-07, 'epoch': 4.27} + 27%|██▋ | 99140/371472 [7:53:03<21:08:35, 3.58it/s] 27%|██▋ | 99141/371472 [7:53:04<20:49:42, 3.63it/s] 27%|██▋ | 99142/371472 [7:53:04<20:18:16, 3.73it/s] 27%|██▋ | 99143/371472 [7:53:04<20:25:31, 3.70it/s] 27%|██▋ | 99144/371472 [7:53:04<19:56:31, 3.79it/s] 27%|██▋ | 99145/371472 [7:53:05<20:16:04, 3.73it/s] 27%|██▋ | 99146/371472 [7:53:05<20:12:34, 3.74it/s] 27%|██▋ | 99147/371472 [7:53:05<20:03:24, 3.77it/s] 27%|██▋ | 99148/371472 [7:53:05<21:19:50, 3.55it/s] 27%|██▋ | 99149/371472 [7:53:06<21:41:20, 3.49it/s] 27%|██▋ | 99150/371472 [7:53:06<21:25:44, 3.53it/s] 27%|██▋ | 99151/371472 [7:53:06<21:49:13, 3.47it/s] 27%|██▋ | 99152/371472 [7:53:07<21:43:35, 3.48it/s] 27%|██▋ | 99153/371472 [7:53:07<21:26:12, 3.53it/s] 27%|██▋ | 99154/371472 [7:53:07<22:18:04, 3.39it/s] 27%|██▋ | 99155/371472 [7:53:08<21:46:42, 3.47it/s] 27%|██▋ | 99156/371472 [7:53:08<21:13:06, 3.56it/s] 27%|██▋ | 99157/371472 [7:53:08<20:20:16, 3.72it/s] 27%|██▋ | 99158/371472 [7:53:08<20:24:04, 3.71it/s] 27%|██▋ | 99159/371472 [7:53:09<20:00:36, 3.78it/s] 27%|██▋ | 99160/371472 [7:53:09<20:22:24, 3.71it/s] {'loss': 3.6133, 'learning_rate': 7.601111853304315e-07, 'epoch': 4.27} + 27%|██▋ | 99160/371472 [7:53:09<20:22:24, 3.71it/s] 27%|██▋ | 99161/371472 [7:53:09<19:56:38, 3.79it/s] 27%|██▋ | 99162/371472 [7:53:09<19:37:22, 3.85it/s] 27%|██▋ | 99163/371472 [7:53:10<19:27:10, 3.89it/s] 27%|██▋ | 99164/371472 [7:53:10<19:40:34, 3.84it/s] 27%|██▋ | 99165/371472 [7:53:10<20:09:03, 3.75it/s] 27%|██▋ | 99166/371472 [7:53:10<21:02:58, 3.59it/s] 27%|██▋ | 99167/371472 [7:53:11<20:48:36, 3.63it/s] 27%|██▋ | 99168/371472 [7:53:11<21:35:33, 3.50it/s] 27%|██▋ | 99169/371472 [7:53:11<20:31:03, 3.69it/s] 27%|██▋ | 99170/371472 [7:53:12<21:43:24, 3.48it/s] 27%|██▋ | 99171/371472 [7:53:12<22:22:28, 3.38it/s] 27%|██▋ | 99172/371472 [7:53:12<22:10:21, 3.41it/s] 27%|██▋ | 99173/371472 [7:53:12<23:10:22, 3.26it/s] 27%|██▋ | 99174/371472 [7:53:13<22:57:39, 3.29it/s] 27%|██▋ | 99175/371472 [7:53:13<21:59:53, 3.44it/s] 27%|██▋ | 99176/371472 [7:53:13<22:04:11, 3.43it/s] 27%|██▋ | 99177/371472 [7:53:14<22:56:33, 3.30it/s] 27%|██▋ | 99178/371472 [7:53:14<22:44:12, 3.33it/s] 27%|██▋ | 99179/371472 [7:53:14<21:43:08, 3.48it/s] 27%|██▋ | 99180/371472 [7:53:15<21:47:10, 3.47it/s] {'loss': 3.7525, 'learning_rate': 7.600627033549527e-07, 'epoch': 4.27} + 27%|██▋ | 99180/371472 [7:53:15<21:47:10, 3.47it/s] 27%|██▋ | 99181/371472 [7:53:15<21:32:21, 3.51it/s] 27%|██▋ | 99182/371472 [7:53:15<22:13:16, 3.40it/s] 27%|██▋ | 99183/371472 [7:53:15<21:30:52, 3.52it/s] 27%|██▋ | 99184/371472 [7:53:16<21:45:59, 3.47it/s] 27%|██▋ | 99185/371472 [7:53:16<21:10:43, 3.57it/s] 27%|██▋ | 99186/371472 [7:53:16<21:32:49, 3.51it/s] 27%|██▋ | 99187/371472 [7:53:16<21:03:33, 3.59it/s] 27%|██▋ | 99188/371472 [7:53:17<20:36:48, 3.67it/s] 27%|██▋ | 99189/371472 [7:53:17<20:04:32, 3.77it/s] 27%|██▋ | 99190/371472 [7:53:17<20:23:21, 3.71it/s] 27%|██▋ | 99191/371472 [7:53:18<22:41:50, 3.33it/s] 27%|██▋ | 99192/371472 [7:53:18<21:55:02, 3.45it/s] 27%|██▋ | 99193/371472 [7:53:18<21:25:15, 3.53it/s] 27%|██▋ | 99194/371472 [7:53:19<22:47:53, 3.32it/s] 27%|██▋ | 99195/371472 [7:53:19<23:06:07, 3.27it/s] 27%|██▋ | 99196/371472 [7:53:19<21:53:08, 3.46it/s] 27%|██▋ | 99197/371472 [7:53:19<21:55:43, 3.45it/s] 27%|██▋ | 99198/371472 [7:53:20<22:48:46, 3.32it/s] 27%|██▋ | 99199/371472 [7:53:20<22:56:17, 3.30it/s] 27%|██▋ | 99200/371472 [7:53:20<23:57:24, 3.16it/s] {'loss': 3.7139, 'learning_rate': 7.600142213794738e-07, 'epoch': 4.27} + 27%|██▋ | 99200/371472 [7:53:20<23:57:24, 3.16it/s] 27%|██▋ | 99201/371472 [7:53:21<22:46:30, 3.32it/s] 27%|██▋ | 99202/371472 [7:53:21<22:06:58, 3.42it/s] 27%|██▋ | 99203/371472 [7:53:21<21:58:24, 3.44it/s] 27%|██▋ | 99204/371472 [7:53:21<22:15:34, 3.40it/s] 27%|██▋ | 99205/371472 [7:53:22<22:08:46, 3.42it/s] 27%|██▋ | 99206/371472 [7:53:22<20:47:35, 3.64it/s] 27%|██▋ | 99207/371472 [7:53:22<20:21:04, 3.72it/s] 27%|██▋ | 99208/371472 [7:53:23<20:11:52, 3.74it/s] 27%|██▋ | 99209/371472 [7:53:23<21:29:30, 3.52it/s] 27%|██▋ | 99210/371472 [7:53:23<20:30:49, 3.69it/s] 27%|██▋ | 99211/371472 [7:53:23<20:29:05, 3.69it/s] 27%|██▋ | 99212/371472 [7:53:24<21:01:56, 3.60it/s] 27%|██▋ | 99213/371472 [7:53:24<20:22:24, 3.71it/s] 27%|██▋ | 99214/371472 [7:53:24<20:52:47, 3.62it/s] 27%|██▋ | 99215/371472 [7:53:24<20:43:22, 3.65it/s] 27%|██▋ | 99216/371472 [7:53:25<21:13:49, 3.56it/s] 27%|██▋ | 99217/371472 [7:53:25<21:36:43, 3.50it/s] 27%|██▋ | 99218/371472 [7:53:25<21:07:49, 3.58it/s] 27%|██▋ | 99219/371472 [7:53:26<20:51:20, 3.63it/s] 27%|██▋ | 99220/371472 [7:53:26<21:11:03, 3.57it/s] {'loss': 3.6204, 'learning_rate': 7.599657394039948e-07, 'epoch': 4.27} + 27%|██▋ | 99220/371472 [7:53:26<21:11:03, 3.57it/s] 27%|██▋ | 99221/371472 [7:53:26<20:39:30, 3.66it/s] 27%|██▋ | 99222/371472 [7:53:26<20:57:31, 3.61it/s] 27%|██▋ | 99223/371472 [7:53:27<21:18:51, 3.55it/s] 27%|██▋ | 99224/371472 [7:53:27<23:26:23, 3.23it/s] 27%|██▋ | 99225/371472 [7:53:27<22:40:18, 3.34it/s] 27%|██▋ | 99226/371472 [7:53:28<21:45:20, 3.48it/s] 27%|██▋ | 99227/371472 [7:53:28<22:34:36, 3.35it/s] 27%|██▋ | 99228/371472 [7:53:28<22:15:09, 3.40it/s] 27%|██▋ | 99229/371472 [7:53:29<21:46:00, 3.47it/s] 27%|██▋ | 99230/371472 [7:53:29<23:11:23, 3.26it/s] 27%|██▋ | 99231/371472 [7:53:29<23:13:18, 3.26it/s] 27%|██▋ | 99232/371472 [7:53:29<22:00:19, 3.44it/s] 27%|██▋ | 99233/371472 [7:53:30<21:21:51, 3.54it/s] 27%|██▋ | 99234/371472 [7:53:30<21:20:09, 3.54it/s] 27%|██▋ | 99235/371472 [7:53:30<20:33:19, 3.68it/s] 27%|██▋ | 99236/371472 [7:53:31<21:28:34, 3.52it/s] 27%|██▋ | 99237/371472 [7:53:31<21:21:18, 3.54it/s] 27%|██▋ | 99238/371472 [7:53:31<22:24:11, 3.38it/s] 27%|██▋ | 99239/371472 [7:53:31<22:17:50, 3.39it/s] 27%|██▋ | 99240/371472 [7:53:32<22:28:31, 3.36it/s] {'loss': 3.5158, 'learning_rate': 7.59917257428516e-07, 'epoch': 4.27} + 27%|██▋ | 99240/371472 [7:53:32<22:28:31, 3.36it/s] 27%|██▋ | 99241/371472 [7:53:32<21:39:34, 3.49it/s] 27%|██▋ | 99242/371472 [7:53:32<21:58:01, 3.44it/s] 27%|██▋ | 99243/371472 [7:53:33<22:27:19, 3.37it/s] 27%|██▋ | 99244/371472 [7:53:33<21:18:21, 3.55it/s] 27%|██▋ | 99245/371472 [7:53:33<22:46:38, 3.32it/s] 27%|██▋ | 99246/371472 [7:53:34<22:51:39, 3.31it/s] 27%|██▋ | 99247/371472 [7:53:34<22:44:51, 3.32it/s] 27%|██▋ | 99248/371472 [7:53:34<23:16:01, 3.25it/s] 27%|██▋ | 99249/371472 [7:53:34<22:25:59, 3.37it/s] 27%|██▋ | 99250/371472 [7:53:35<21:46:23, 3.47it/s] 27%|██▋ | 99251/371472 [7:53:35<21:20:15, 3.54it/s] 27%|██▋ | 99252/371472 [7:53:35<21:25:21, 3.53it/s] 27%|██▋ | 99253/371472 [7:53:35<21:11:42, 3.57it/s] 27%|██▋ | 99254/371472 [7:53:36<20:49:39, 3.63it/s] 27%|██▋ | 99255/371472 [7:53:36<21:37:33, 3.50it/s] 27%|██▋ | 99256/371472 [7:53:36<21:15:15, 3.56it/s] 27%|██▋ | 99257/371472 [7:53:37<21:13:30, 3.56it/s] 27%|██▋ | 99258/371472 [7:53:37<21:30:53, 3.51it/s] 27%|██▋ | 99259/371472 [7:53:37<21:55:12, 3.45it/s] 27%|██▋ | 99260/371472 [7:53:37<21:42:22, 3.48it/s] {'loss': 3.6156, 'learning_rate': 7.598687754530371e-07, 'epoch': 4.28} + 27%|██▋ | 99260/371472 [7:53:38<21:42:22, 3.48it/s] 27%|██▋ | 99261/371472 [7:53:38<21:07:52, 3.58it/s] 27%|██▋ | 99262/371472 [7:53:38<20:12:04, 3.74it/s] 27%|██▋ | 99263/371472 [7:53:38<21:09:46, 3.57it/s] 27%|██▋ | 99264/371472 [7:53:39<24:36:05, 3.07it/s] 27%|██▋ | 99265/371472 [7:53:39<23:46:12, 3.18it/s] 27%|██▋ | 99266/371472 [7:53:39<22:52:34, 3.31it/s] 27%|██▋ | 99267/371472 [7:53:40<21:35:17, 3.50it/s] 27%|██▋ | 99268/371472 [7:53:40<21:54:42, 3.45it/s] 27%|██▋ | 99269/371472 [7:53:40<21:35:18, 3.50it/s] 27%|██▋ | 99270/371472 [7:53:40<21:08:31, 3.58it/s] 27%|██▋ | 99271/371472 [7:53:41<21:01:57, 3.59it/s] 27%|██▋ | 99272/371472 [7:53:41<21:13:41, 3.56it/s] 27%|██▋ | 99273/371472 [7:53:41<21:10:54, 3.57it/s] 27%|██▋ | 99274/371472 [7:53:42<21:27:11, 3.52it/s] 27%|██▋ | 99275/371472 [7:53:42<21:51:11, 3.46it/s] 27%|██▋ | 99276/371472 [7:53:42<22:57:50, 3.29it/s] 27%|██▋ | 99277/371472 [7:53:43<23:48:27, 3.18it/s] 27%|██▋ | 99278/371472 [7:53:43<23:04:42, 3.28it/s] 27%|██▋ | 99279/371472 [7:53:43<23:29:23, 3.22it/s] 27%|██▋ | 99280/371472 [7:53:43<22:45:18, 3.32it/s] {'loss': 3.7032, 'learning_rate': 7.598202934775582e-07, 'epoch': 4.28} + 27%|██▋ | 99280/371472 [7:53:43<22:45:18, 3.32it/s] 27%|██▋ | 99281/371472 [7:53:44<22:28:18, 3.36it/s] 27%|██▋ | 99282/371472 [7:53:44<22:07:27, 3.42it/s] 27%|██▋ | 99283/371472 [7:53:44<24:47:16, 3.05it/s] 27%|██▋ | 99284/371472 [7:53:45<26:02:45, 2.90it/s] 27%|██▋ | 99285/371472 [7:53:45<23:52:54, 3.17it/s] 27%|██▋ | 99286/371472 [7:53:45<24:20:07, 3.11it/s] 27%|██▋ | 99287/371472 [7:53:46<23:17:36, 3.25it/s] 27%|██▋ | 99288/371472 [7:53:46<22:08:31, 3.41it/s] 27%|██▋ | 99289/371472 [7:53:46<22:19:57, 3.39it/s] 27%|██▋ | 99290/371472 [7:53:46<21:36:52, 3.50it/s] 27%|██▋ | 99291/371472 [7:53:47<21:20:32, 3.54it/s] 27%|██▋ | 99292/371472 [7:53:47<20:54:53, 3.61it/s] 27%|██▋ | 99293/371472 [7:53:47<20:20:41, 3.72it/s] 27%|██▋ | 99294/371472 [7:53:48<21:28:58, 3.52it/s] 27%|██▋ | 99295/371472 [7:53:48<21:59:38, 3.44it/s] 27%|██▋ | 99296/371472 [7:53:48<21:54:06, 3.45it/s] 27%|██▋ | 99297/371472 [7:53:48<21:28:12, 3.52it/s] 27%|██▋ | 99298/371472 [7:53:49<21:09:14, 3.57it/s] 27%|██▋ | 99299/371472 [7:53:49<22:43:17, 3.33it/s] 27%|██▋ | 99300/371472 [7:53:49<24:13:23, 3.12it/s] {'loss': 3.575, 'learning_rate': 7.597718115020792e-07, 'epoch': 4.28} + 27%|██▋ | 99300/371472 [7:53:49<24:13:23, 3.12it/s] 27%|██▋ | 99301/371472 [7:53:50<23:32:05, 3.21it/s] 27%|██▋ | 99302/371472 [7:53:50<22:40:12, 3.33it/s] 27%|██▋ | 99303/371472 [7:53:50<22:33:16, 3.35it/s] 27%|██▋ | 99304/371472 [7:53:51<21:35:10, 3.50it/s] 27%|██▋ | 99305/371472 [7:53:51<21:28:35, 3.52it/s] 27%|██▋ | 99306/371472 [7:53:51<20:38:25, 3.66it/s] 27%|██▋ | 99307/371472 [7:53:51<20:29:27, 3.69it/s] 27%|██▋ | 99308/371472 [7:53:52<20:37:52, 3.66it/s] 27%|██▋ | 99309/371472 [7:53:52<20:10:33, 3.75it/s] 27%|██▋ | 99310/371472 [7:53:52<22:43:21, 3.33it/s] 27%|██▋ | 99311/371472 [7:53:52<22:27:14, 3.37it/s] 27%|██▋ | 99312/371472 [7:53:53<21:56:09, 3.45it/s] 27%|██▋ | 99313/371472 [7:53:53<22:07:27, 3.42it/s] 27%|██▋ | 99314/371472 [7:53:53<21:19:50, 3.54it/s] 27%|██▋ | 99315/371472 [7:53:54<20:32:22, 3.68it/s] 27%|██▋ | 99316/371472 [7:53:54<20:31:08, 3.68it/s] 27%|██▋ | 99317/371472 [7:53:54<20:23:21, 3.71it/s] 27%|██▋ | 99318/371472 [7:53:54<20:26:05, 3.70it/s] 27%|██▋ | 99319/371472 [7:53:55<21:23:30, 3.53it/s] 27%|██▋ | 99320/371472 [7:53:55<21:54:59, 3.45it/s] {'loss': 3.4864, 'learning_rate': 7.597233295266004e-07, 'epoch': 4.28} + 27%|██▋ | 99320/371472 [7:53:55<21:54:59, 3.45it/s] 27%|██▋ | 99321/371472 [7:53:55<21:50:56, 3.46it/s] 27%|██▋ | 99322/371472 [7:53:56<20:34:03, 3.68it/s] 27%|██▋ | 99323/371472 [7:53:56<20:16:16, 3.73it/s] 27%|██▋ | 99324/371472 [7:53:56<20:13:17, 3.74it/s] 27%|██▋ | 99325/371472 [7:53:56<21:31:19, 3.51it/s] 27%|██▋ | 99326/371472 [7:53:57<20:59:25, 3.60it/s] 27%|██▋ | 99327/371472 [7:53:57<20:43:45, 3.65it/s] 27%|██▋ | 99328/371472 [7:53:57<21:34:32, 3.50it/s] 27%|██▋ | 99329/371472 [7:53:57<21:37:10, 3.50it/s] 27%|██▋ | 99330/371472 [7:53:58<21:27:51, 3.52it/s] 27%|██▋ | 99331/371472 [7:53:58<21:05:04, 3.59it/s] 27%|██▋ | 99332/371472 [7:53:58<21:02:34, 3.59it/s] 27%|██▋ | 99333/371472 [7:53:59<20:08:09, 3.75it/s] 27%|██▋ | 99334/371472 [7:53:59<19:58:13, 3.79it/s] 27%|██▋ | 99335/371472 [7:53:59<19:49:35, 3.81it/s] 27%|██▋ | 99336/371472 [7:53:59<20:18:36, 3.72it/s] 27%|██▋ | 99337/371472 [7:54:00<21:00:16, 3.60it/s] 27%|██▋ | 99338/371472 [7:54:00<21:41:08, 3.49it/s] 27%|██▋ | 99339/371472 [7:54:00<22:18:44, 3.39it/s] 27%|██▋ | 99340/371472 [7:54:01<22:09:39, 3.41it/s] {'loss': 3.4801, 'learning_rate': 7.596748475511216e-07, 'epoch': 4.28} + 27%|██▋ | 99340/371472 [7:54:01<22:09:39, 3.41it/s] 27%|██▋ | 99341/371472 [7:54:01<21:22:01, 3.54it/s] 27%|██▋ | 99342/371472 [7:54:01<21:00:33, 3.60it/s] 27%|██▋ | 99343/371472 [7:54:01<20:59:57, 3.60it/s] 27%|██▋ | 99344/371472 [7:54:02<21:31:37, 3.51it/s] 27%|██▋ | 99345/371472 [7:54:02<23:52:22, 3.17it/s] 27%|██▋ | 99346/371472 [7:54:02<22:53:25, 3.30it/s] 27%|██▋ | 99347/371472 [7:54:03<21:56:03, 3.45it/s] 27%|██▋ | 99348/371472 [7:54:03<21:03:59, 3.59it/s] 27%|██▋ | 99349/371472 [7:54:03<20:41:40, 3.65it/s] 27%|██▋ | 99350/371472 [7:54:03<20:30:25, 3.69it/s] 27%|██▋ | 99351/371472 [7:54:04<20:28:52, 3.69it/s] 27%|██▋ | 99352/371472 [7:54:04<21:36:20, 3.50it/s] 27%|██▋ | 99353/371472 [7:54:04<21:40:25, 3.49it/s] 27%|██▋ | 99354/371472 [7:54:05<21:25:27, 3.53it/s] 27%|██▋ | 99355/371472 [7:54:05<21:42:32, 3.48it/s] 27%|██▋ | 99356/371472 [7:54:05<21:09:53, 3.57it/s] 27%|██▋ | 99357/371472 [7:54:05<20:47:41, 3.63it/s] 27%|██▋ | 99358/371472 [7:54:06<20:56:23, 3.61it/s] 27%|██▋ | 99359/371472 [7:54:06<20:37:53, 3.66it/s] 27%|██▋ | 99360/371472 [7:54:06<21:07:25, 3.58it/s] {'loss': 3.5849, 'learning_rate': 7.596263655756427e-07, 'epoch': 4.28} + 27%|██▋ | 99360/371472 [7:54:06<21:07:25, 3.58it/s] 27%|██▋ | 99361/371472 [7:54:06<20:59:49, 3.60it/s] 27%|██▋ | 99362/371472 [7:54:07<20:47:50, 3.63it/s] 27%|██▋ | 99363/371472 [7:54:07<20:26:21, 3.70it/s] 27%|██▋ | 99364/371472 [7:54:07<20:20:17, 3.72it/s] 27%|██▋ | 99365/371472 [7:54:08<20:03:14, 3.77it/s] 27%|██▋ | 99366/371472 [7:54:08<19:41:06, 3.84it/s] 27%|██▋ | 99367/371472 [7:54:08<21:19:47, 3.54it/s] 27%|██▋ | 99368/371472 [7:54:08<21:51:32, 3.46it/s] 27%|██▋ | 99369/371472 [7:54:09<21:37:29, 3.50it/s] 27%|██▋ | 99370/371472 [7:54:09<20:59:26, 3.60it/s] 27%|██▋ | 99371/371472 [7:54:09<21:30:21, 3.51it/s] 27%|██▋ | 99372/371472 [7:54:10<21:41:41, 3.48it/s] 27%|██▋ | 99373/371472 [7:54:10<20:57:52, 3.61it/s] 27%|██▋ | 99374/371472 [7:54:10<20:23:14, 3.71it/s] 27%|██▋ | 99375/371472 [7:54:10<20:44:21, 3.64it/s] 27%|██▋ | 99376/371472 [7:54:11<21:00:44, 3.60it/s] 27%|██▋ | 99377/371472 [7:54:11<22:20:43, 3.38it/s] 27%|██▋ | 99378/371472 [7:54:11<21:31:16, 3.51it/s] 27%|██▋ | 99379/371472 [7:54:11<20:58:40, 3.60it/s] 27%|██▋ | 99380/371472 [7:54:12<21:56:08, 3.45it/s] {'loss': 3.6636, 'learning_rate': 7.595778836001636e-07, 'epoch': 4.28} + 27%|██▋ | 99380/371472 [7:54:12<21:56:08, 3.45it/s] 27%|██▋ | 99381/371472 [7:54:12<22:30:22, 3.36it/s] 27%|██▋ | 99382/371472 [7:54:12<22:11:42, 3.41it/s] 27%|██▋ | 99383/371472 [7:54:13<21:41:05, 3.49it/s] 27%|██▋ | 99384/371472 [7:54:13<20:57:56, 3.60it/s] 27%|██▋ | 99385/371472 [7:54:13<20:35:24, 3.67it/s] 27%|██▋ | 99386/371472 [7:54:13<20:38:43, 3.66it/s] 27%|██▋ | 99387/371472 [7:54:14<20:28:58, 3.69it/s] 27%|██▋ | 99388/371472 [7:54:14<22:12:37, 3.40it/s] 27%|██▋ | 99389/371472 [7:54:14<22:10:45, 3.41it/s] 27%|██▋ | 99390/371472 [7:54:15<21:00:40, 3.60it/s] 27%|██▋ | 99391/371472 [7:54:15<21:41:09, 3.49it/s] 27%|██▋ | 99392/371472 [7:54:15<21:25:29, 3.53it/s] 27%|██▋ | 99393/371472 [7:54:15<21:31:43, 3.51it/s] 27%|██▋ | 99394/371472 [7:54:16<23:22:18, 3.23it/s] 27%|██▋ | 99395/371472 [7:54:16<24:24:30, 3.10it/s] 27%|██▋ | 99396/371472 [7:54:16<23:24:00, 3.23it/s] 27%|██▋ | 99397/371472 [7:54:17<22:16:11, 3.39it/s] 27%|██▋ | 99398/371472 [7:54:17<21:17:59, 3.55it/s] 27%|██▋ | 99399/371472 [7:54:17<22:01:47, 3.43it/s] 27%|██▋ | 99400/371472 [7:54:18<21:46:41, 3.47it/s] {'loss': 3.5957, 'learning_rate': 7.595294016246848e-07, 'epoch': 4.28} + 27%|██▋ | 99400/371472 [7:54:18<21:46:41, 3.47it/s] 27%|██▋ | 99401/371472 [7:54:18<21:08:35, 3.57it/s] 27%|██▋ | 99402/371472 [7:54:18<22:06:15, 3.42it/s] 27%|██▋ | 99403/371472 [7:54:18<21:57:11, 3.44it/s] 27%|██▋ | 99404/371472 [7:54:19<21:39:16, 3.49it/s] 27%|██▋ | 99405/371472 [7:54:19<22:25:15, 3.37it/s] 27%|██▋ | 99406/371472 [7:54:19<22:41:13, 3.33it/s] 27%|██▋ | 99407/371472 [7:54:20<22:10:02, 3.41it/s] 27%|██▋ | 99408/371472 [7:54:20<21:32:48, 3.51it/s] 27%|██▋ | 99409/371472 [7:54:20<20:53:49, 3.62it/s] 27%|██▋ | 99410/371472 [7:54:20<21:20:18, 3.54it/s] 27%|██▋ | 99411/371472 [7:54:21<22:49:19, 3.31it/s] 27%|██▋ | 99412/371472 [7:54:21<21:45:15, 3.47it/s] 27%|██▋ | 99413/371472 [7:54:21<22:23:34, 3.37it/s] 27%|██▋ | 99414/371472 [7:54:22<22:21:37, 3.38it/s] 27%|██▋ | 99415/371472 [7:54:22<23:43:27, 3.19it/s] 27%|██▋ | 99416/371472 [7:54:22<22:22:07, 3.38it/s] 27%|██▋ | 99417/371472 [7:54:23<22:11:16, 3.41it/s] 27%|██▋ | 99418/371472 [7:54:23<21:48:41, 3.46it/s] 27%|██▋ | 99419/371472 [7:54:23<22:20:15, 3.38it/s] 27%|██▋ | 99420/371472 [7:54:23<22:22:52, 3.38it/s] {'loss': 3.7073, 'learning_rate': 7.59480919649206e-07, 'epoch': 4.28} + 27%|██▋ | 99420/371472 [7:54:23<22:22:52, 3.38it/s] 27%|██▋ | 99421/371472 [7:54:24<22:23:02, 3.38it/s] 27%|██▋ | 99422/371472 [7:54:24<21:45:05, 3.47it/s] 27%|██▋ | 99423/371472 [7:54:24<21:02:11, 3.59it/s] 27%|██▋ | 99424/371472 [7:54:25<21:21:08, 3.54it/s] 27%|██▋ | 99425/371472 [7:54:25<21:04:55, 3.58it/s] 27%|██▋ | 99426/371472 [7:54:25<22:25:14, 3.37it/s] 27%|██▋ | 99427/371472 [7:54:25<21:17:52, 3.55it/s] 27%|██▋ | 99428/371472 [7:54:26<22:25:56, 3.37it/s] 27%|██▋ | 99429/371472 [7:54:26<21:55:39, 3.45it/s] 27%|██▋ | 99430/371472 [7:54:26<22:12:33, 3.40it/s] 27%|██▋ | 99431/371472 [7:54:27<21:55:38, 3.45it/s] 27%|██▋ | 99432/371472 [7:54:27<21:23:01, 3.53it/s] 27%|██▋ | 99433/371472 [7:54:27<22:10:09, 3.41it/s] 27%|██▋ | 99434/371472 [7:54:27<21:40:12, 3.49it/s] 27%|██▋ | 99435/371472 [7:54:28<22:01:50, 3.43it/s] 27%|██▋ | 99436/371472 [7:54:28<22:01:32, 3.43it/s] 27%|██▋ | 99437/371472 [7:54:28<22:53:50, 3.30it/s] 27%|██▋ | 99438/371472 [7:54:29<21:55:52, 3.45it/s] 27%|██▋ | 99439/371472 [7:54:29<22:18:22, 3.39it/s] 27%|██▋ | 99440/371472 [7:54:29<21:11:13, 3.57it/s] {'loss': 3.4552, 'learning_rate': 7.594324376737271e-07, 'epoch': 4.28} + 27%|██▋ | 99440/371472 [7:54:29<21:11:13, 3.57it/s] 27%|██▋ | 99441/371472 [7:54:29<21:02:43, 3.59it/s] 27%|██▋ | 99442/371472 [7:54:30<20:05:46, 3.76it/s] 27%|██▋ | 99443/371472 [7:54:30<21:00:41, 3.60it/s] 27%|██▋ | 99444/371472 [7:54:30<21:13:33, 3.56it/s] 27%|██▋ | 99445/371472 [7:54:31<20:43:06, 3.65it/s] 27%|██▋ | 99446/371472 [7:54:31<20:09:17, 3.75it/s] 27%|██▋ | 99447/371472 [7:54:31<19:50:28, 3.81it/s] 27%|██▋ | 99448/371472 [7:54:31<20:07:12, 3.76it/s] 27%|██▋ | 99449/371472 [7:54:32<19:35:57, 3.86it/s] 27%|██▋ | 99450/371472 [7:54:32<20:43:41, 3.65it/s] 27%|██▋ | 99451/371472 [7:54:32<20:17:18, 3.72it/s] 27%|██▋ | 99452/371472 [7:54:32<20:26:11, 3.70it/s] 27%|██▋ | 99453/371472 [7:54:33<20:32:07, 3.68it/s] 27%|██▋ | 99454/371472 [7:54:33<20:39:38, 3.66it/s] 27%|██▋ | 99455/371472 [7:54:33<20:41:19, 3.65it/s] 27%|██▋ | 99456/371472 [7:54:33<20:02:28, 3.77it/s] 27%|██▋ | 99457/371472 [7:54:34<22:10:22, 3.41it/s] 27%|██▋ | 99458/371472 [7:54:34<21:44:42, 3.47it/s] 27%|██▋ | 99459/371472 [7:54:34<20:51:13, 3.62it/s] 27%|██▋ | 99460/371472 [7:54:35<21:17:16, 3.55it/s] {'loss': 3.6417, 'learning_rate': 7.593839556982481e-07, 'epoch': 4.28} + 27%|██▋ | 99460/371472 [7:54:35<21:17:16, 3.55it/s] 27%|██▋ | 99461/371472 [7:54:35<20:56:00, 3.61it/s] 27%|██▋ | 99462/371472 [7:54:35<20:45:51, 3.64it/s] 27%|██▋ | 99463/371472 [7:54:35<21:01:37, 3.59it/s] 27%|██▋ | 99464/371472 [7:54:36<21:02:01, 3.59it/s] 27%|██▋ | 99465/371472 [7:54:36<21:02:48, 3.59it/s] 27%|██▋ | 99466/371472 [7:54:36<20:49:26, 3.63it/s] 27%|██▋ | 99467/371472 [7:54:37<20:23:40, 3.70it/s] 27%|██▋ | 99468/371472 [7:54:37<21:13:44, 3.56it/s] 27%|██▋ | 99469/371472 [7:54:37<22:42:40, 3.33it/s] 27%|██▋ | 99470/371472 [7:54:38<24:29:31, 3.08it/s] 27%|██▋ | 99471/371472 [7:54:38<23:24:18, 3.23it/s] 27%|██▋ | 99472/371472 [7:54:38<22:16:49, 3.39it/s] 27%|██▋ | 99473/371472 [7:54:38<22:06:41, 3.42it/s] 27%|██▋ | 99474/371472 [7:54:39<20:53:37, 3.62it/s] 27%|██▋ | 99475/371472 [7:54:39<20:37:29, 3.66it/s] 27%|██▋ | 99476/371472 [7:54:39<20:23:27, 3.71it/s] 27%|██▋ | 99477/371472 [7:54:40<22:19:19, 3.38it/s] 27%|██▋ | 99478/371472 [7:54:40<21:46:27, 3.47it/s] 27%|██▋ | 99479/371472 [7:54:40<21:18:46, 3.54it/s] 27%|██▋ | 99480/371472 [7:54:40<21:04:41, 3.58it/s] {'loss': 3.5259, 'learning_rate': 7.593354737227693e-07, 'epoch': 4.28} + 27%|██▋ | 99480/371472 [7:54:40<21:04:41, 3.58it/s] 27%|██▋ | 99481/371472 [7:54:41<20:58:39, 3.60it/s] 27%|██▋ | 99482/371472 [7:54:41<20:47:00, 3.64it/s] 27%|██▋ | 99483/371472 [7:54:41<21:30:45, 3.51it/s] 27%|██▋ | 99484/371472 [7:54:41<20:29:05, 3.69it/s] 27%|██▋ | 99485/371472 [7:54:42<22:16:34, 3.39it/s] 27%|██▋ | 99486/371472 [7:54:42<21:50:42, 3.46it/s] 27%|██▋ | 99487/371472 [7:54:42<21:45:09, 3.47it/s] 27%|██▋ | 99488/371472 [7:54:43<21:56:55, 3.44it/s] 27%|██▋ | 99489/371472 [7:54:43<21:28:34, 3.52it/s] 27%|██▋ | 99490/371472 [7:54:43<20:49:41, 3.63it/s] 27%|██▋ | 99491/371472 [7:54:43<20:29:33, 3.69it/s] 27%|██▋ | 99492/371472 [7:54:44<21:12:54, 3.56it/s] 27%|██▋ | 99493/371472 [7:54:44<21:06:43, 3.58it/s] 27%|██▋ | 99494/371472 [7:54:44<20:51:24, 3.62it/s] 27%|██▋ | 99495/371472 [7:54:45<20:46:18, 3.64it/s] 27%|██▋ | 99496/371472 [7:54:45<20:47:15, 3.63it/s] 27%|██▋ | 99497/371472 [7:54:45<20:41:38, 3.65it/s] 27%|██▋ | 99498/371472 [7:54:45<20:11:05, 3.74it/s] 27%|██▋ | 99499/371472 [7:54:46<20:54:45, 3.61it/s] 27%|██▋ | 99500/371472 [7:54:46<20:19:18, 3.72it/s] {'loss': 3.7289, 'learning_rate': 7.592869917472904e-07, 'epoch': 4.29} + 27%|██▋ | 99500/371472 [7:54:46<20:19:18, 3.72it/s] 27%|██▋ | 99501/371472 [7:54:46<20:22:00, 3.71it/s] 27%|██▋ | 99502/371472 [7:54:46<21:06:48, 3.58it/s] 27%|██▋ | 99503/371472 [7:54:47<21:40:23, 3.49it/s] 27%|██▋ | 99504/371472 [7:54:47<22:24:34, 3.37it/s] 27%|██▋ | 99505/371472 [7:54:47<22:10:17, 3.41it/s] 27%|██▋ | 99506/371472 [7:54:48<21:28:15, 3.52it/s] 27%|██▋ | 99507/371472 [7:54:48<21:27:08, 3.52it/s] 27%|██▋ | 99508/371472 [7:54:48<21:31:27, 3.51it/s] 27%|██▋ | 99509/371472 [7:54:49<22:22:09, 3.38it/s] 27%|██▋ | 99510/371472 [7:54:49<22:20:12, 3.38it/s] 27%|██▋ | 99511/371472 [7:54:49<21:57:35, 3.44it/s] 27%|██▋ | 99512/371472 [7:54:49<21:36:22, 3.50it/s] 27%|██▋ | 99513/371472 [7:54:50<22:44:18, 3.32it/s] 27%|██▋ | 99514/371472 [7:54:50<21:51:15, 3.46it/s] 27%|██▋ | 99515/371472 [7:54:50<21:07:51, 3.58it/s] 27%|██▋ | 99516/371472 [7:54:51<21:18:39, 3.54it/s] 27%|██▋ | 99517/371472 [7:54:51<21:02:02, 3.59it/s] 27%|██▋ | 99518/371472 [7:54:51<21:17:30, 3.55it/s] 27%|██▋ | 99519/371472 [7:54:51<21:55:54, 3.44it/s] 27%|██▋ | 99520/371472 [7:54:52<21:25:11, 3.53it/s] {'loss': 3.606, 'learning_rate': 7.592385097718115e-07, 'epoch': 4.29} + 27%|██▋ | 99520/371472 [7:54:52<21:25:11, 3.53it/s] 27%|██▋ | 99521/371472 [7:54:52<21:28:44, 3.52it/s] 27%|██▋ | 99522/371472 [7:54:52<26:23:53, 2.86it/s] 27%|██▋ | 99523/371472 [7:54:53<26:39:32, 2.83it/s] 27%|██▋ | 99524/371472 [7:54:53<25:44:17, 2.93it/s] 27%|██▋ | 99525/371472 [7:54:53<24:26:38, 3.09it/s] 27%|██▋ | 99526/371472 [7:54:54<23:34:12, 3.20it/s] 27%|██▋ | 99527/371472 [7:54:54<23:09:55, 3.26it/s] 27%|██▋ | 99528/371472 [7:54:54<22:17:54, 3.39it/s] 27%|██▋ | 99529/371472 [7:54:55<23:55:19, 3.16it/s] 27%|██▋ | 99530/371472 [7:54:55<22:48:05, 3.31it/s] 27%|██▋ | 99531/371472 [7:54:55<22:02:43, 3.43it/s] 27%|██▋ | 99532/371472 [7:54:56<23:22:05, 3.23it/s] 27%|██▋ | 99533/371472 [7:54:56<21:50:23, 3.46it/s] 27%|██▋ | 99534/371472 [7:54:56<21:22:12, 3.53it/s] 27%|██▋ | 99535/371472 [7:54:56<21:43:29, 3.48it/s] 27%|██▋ | 99536/371472 [7:54:57<21:38:43, 3.49it/s] 27%|██▋ | 99537/371472 [7:54:57<21:20:56, 3.54it/s] 27%|██▋ | 99538/371472 [7:54:57<22:37:57, 3.34it/s] 27%|██▋ | 99539/371472 [7:54:58<22:40:22, 3.33it/s] 27%|██▋ | 99540/371472 [7:54:58<21:37:35, 3.49it/s] {'loss': 3.6425, 'learning_rate': 7.591900277963325e-07, 'epoch': 4.29} + 27%|██▋ | 99540/371472 [7:54:58<21:37:35, 3.49it/s] 27%|██▋ | 99541/371472 [7:54:58<21:41:19, 3.48it/s] 27%|██▋ | 99542/371472 [7:54:58<22:01:43, 3.43it/s] 27%|██▋ | 99543/371472 [7:54:59<22:12:47, 3.40it/s] 27%|██▋ | 99544/371472 [7:54:59<21:21:47, 3.54it/s] 27%|██▋ | 99545/371472 [7:54:59<20:32:14, 3.68it/s] 27%|██▋ | 99546/371472 [7:54:59<21:12:07, 3.56it/s] 27%|██▋ | 99547/371472 [7:55:00<21:37:30, 3.49it/s] 27%|██▋ | 99548/371472 [7:55:00<20:58:21, 3.60it/s] 27%|██▋ | 99549/371472 [7:55:00<21:10:33, 3.57it/s] 27%|██▋ | 99550/371472 [7:55:01<21:58:27, 3.44it/s] 27%|██▋ | 99551/371472 [7:55:01<21:34:47, 3.50it/s] 27%|██▋ | 99552/371472 [7:55:01<21:22:48, 3.53it/s] 27%|██▋ | 99553/371472 [7:55:01<20:42:06, 3.65it/s] 27%|██▋ | 99554/371472 [7:55:02<20:53:16, 3.62it/s] 27%|██▋ | 99555/371472 [7:55:02<21:07:07, 3.58it/s] 27%|██▋ | 99556/371472 [7:55:02<20:22:56, 3.71it/s] 27%|██▋ | 99557/371472 [7:55:03<20:29:35, 3.69it/s] 27%|██▋ | 99558/371472 [7:55:03<19:57:40, 3.78it/s] 27%|██▋ | 99559/371472 [7:55:03<19:31:55, 3.87it/s] 27%|██▋ | 99560/371472 [7:55:03<19:44:01, 3.83it/s] {'loss': 3.6355, 'learning_rate': 7.591415458208537e-07, 'epoch': 4.29} + 27%|██▋ | 99560/371472 [7:55:03<19:44:01, 3.83it/s] 27%|██▋ | 99561/371472 [7:55:04<19:14:31, 3.93it/s] 27%|██▋ | 99562/371472 [7:55:04<19:25:28, 3.89it/s] 27%|██▋ | 99563/371472 [7:55:04<20:15:01, 3.73it/s] 27%|██▋ | 99564/371472 [7:55:04<21:11:27, 3.56it/s] 27%|██▋ | 99565/371472 [7:55:05<20:31:09, 3.68it/s] 27%|██▋ | 99566/371472 [7:55:05<20:56:45, 3.61it/s] 27%|██▋ | 99567/371472 [7:55:05<20:28:58, 3.69it/s] 27%|██▋ | 99568/371472 [7:55:06<21:15:00, 3.55it/s] 27%|██▋ | 99569/371472 [7:55:06<21:38:11, 3.49it/s] 27%|██▋ | 99570/371472 [7:55:06<22:16:17, 3.39it/s] 27%|██▋ | 99571/371472 [7:55:06<21:03:15, 3.59it/s] 27%|██▋ | 99572/371472 [7:55:07<20:21:08, 3.71it/s] 27%|██▋ | 99573/371472 [7:55:07<20:32:11, 3.68it/s] 27%|██▋ | 99574/371472 [7:55:07<22:06:05, 3.42it/s] 27%|██▋ | 99575/371472 [7:55:08<22:08:52, 3.41it/s] 27%|██▋ | 99576/371472 [7:55:08<23:06:02, 3.27it/s] 27%|██▋ | 99577/371472 [7:55:08<22:33:59, 3.35it/s] 27%|██▋ | 99578/371472 [7:55:08<21:55:38, 3.44it/s] 27%|██▋ | 99579/371472 [7:55:09<21:37:41, 3.49it/s] 27%|██▋ | 99580/371472 [7:55:09<21:55:59, 3.44it/s] {'loss': 3.527, 'learning_rate': 7.590930638453749e-07, 'epoch': 4.29} + 27%|██▋ | 99580/371472 [7:55:09<21:55:59, 3.44it/s] 27%|██▋ | 99581/371472 [7:55:09<23:22:22, 3.23it/s] 27%|██▋ | 99582/371472 [7:55:10<22:49:20, 3.31it/s] 27%|██▋ | 99583/371472 [7:55:10<22:09:58, 3.41it/s] 27%|██▋ | 99584/371472 [7:55:10<23:53:55, 3.16it/s] 27%|██▋ | 99585/371472 [7:55:11<22:06:25, 3.42it/s] 27%|██▋ | 99586/371472 [7:55:11<21:44:03, 3.47it/s] 27%|██▋ | 99587/371472 [7:55:11<23:08:20, 3.26it/s] 27%|██▋ | 99588/371472 [7:55:11<21:50:33, 3.46it/s] 27%|██▋ | 99589/371472 [7:55:12<23:51:45, 3.16it/s] 27%|██▋ | 99590/371472 [7:55:12<22:24:31, 3.37it/s] 27%|██▋ | 99591/371472 [7:55:12<23:12:09, 3.25it/s] 27%|██▋ | 99592/371472 [7:55:13<21:37:03, 3.49it/s] 27%|██▋ | 99593/371472 [7:55:13<22:46:49, 3.32it/s] 27%|██▋ | 99594/371472 [7:55:13<21:34:03, 3.50it/s] 27%|██▋ | 99595/371472 [7:55:13<22:20:05, 3.38it/s] 27%|██▋ | 99596/371472 [7:55:14<21:43:23, 3.48it/s] 27%|██▋ | 99597/371472 [7:55:14<22:32:24, 3.35it/s] 27%|██▋ | 99598/371472 [7:55:14<22:05:55, 3.42it/s] 27%|██▋ | 99599/371472 [7:55:15<22:37:14, 3.34it/s] 27%|██▋ | 99600/371472 [7:55:15<23:55:45, 3.16it/s] {'loss': 3.3932, 'learning_rate': 7.590445818698958e-07, 'epoch': 4.29} + 27%|██▋ | 99600/371472 [7:55:15<23:55:45, 3.16it/s] 27%|██▋ | 99601/371472 [7:55:15<22:34:51, 3.34it/s] 27%|██▋ | 99602/371472 [7:55:16<21:25:48, 3.52it/s] 27%|██▋ | 99603/371472 [7:55:16<21:59:59, 3.43it/s] 27%|██▋ | 99604/371472 [7:55:16<21:12:50, 3.56it/s] 27%|██▋ | 99605/371472 [7:55:16<20:28:01, 3.69it/s] 27%|██▋ | 99606/371472 [7:55:17<21:53:28, 3.45it/s] 27%|██▋ | 99607/371472 [7:55:17<21:27:09, 3.52it/s] 27%|██▋ | 99608/371472 [7:55:17<20:53:54, 3.61it/s] 27%|██▋ | 99609/371472 [7:55:17<20:54:34, 3.61it/s] 27%|██▋ | 99610/371472 [7:55:18<20:46:31, 3.63it/s] 27%|██▋ | 99611/371472 [7:55:18<21:41:43, 3.48it/s] 27%|██▋ | 99612/371472 [7:55:18<21:33:00, 3.50it/s] 27%|██▋ | 99613/371472 [7:55:19<20:30:57, 3.68it/s] 27%|██▋ | 99614/371472 [7:55:19<21:22:56, 3.53it/s] 27%|██▋ | 99615/371472 [7:55:19<21:30:11, 3.51it/s] 27%|██▋ | 99616/371472 [7:55:20<23:03:00, 3.28it/s] 27%|██▋ | 99617/371472 [7:55:20<21:58:20, 3.44it/s] 27%|██▋ | 99618/371472 [7:55:20<22:15:22, 3.39it/s] 27%|██▋ | 99619/371472 [7:55:20<23:30:28, 3.21it/s] 27%|██▋ | 99620/371472 [7:55:21<23:28:52, 3.22it/s] {'loss': 3.5408, 'learning_rate': 7.58996099894417e-07, 'epoch': 4.29} + 27%|██▋ | 99620/371472 [7:55:21<23:28:52, 3.22it/s] 27%|██▋ | 99621/371472 [7:55:21<22:45:03, 3.32it/s] 27%|██▋ | 99622/371472 [7:55:21<21:52:28, 3.45it/s] 27%|██▋ | 99623/371472 [7:55:22<21:58:19, 3.44it/s] 27%|██▋ | 99624/371472 [7:55:22<21:47:10, 3.47it/s] 27%|██▋ | 99625/371472 [7:55:22<21:17:36, 3.55it/s] 27%|██▋ | 99626/371472 [7:55:22<21:42:48, 3.48it/s] 27%|██▋ | 99627/371472 [7:55:23<23:21:26, 3.23it/s] 27%|██▋ | 99628/371472 [7:55:23<22:21:08, 3.38it/s] 27%|██▋ | 99629/371472 [7:55:23<22:26:03, 3.37it/s] 27%|██▋ | 99630/371472 [7:55:24<21:50:48, 3.46it/s] 27%|██▋ | 99631/371472 [7:55:24<21:51:03, 3.46it/s] 27%|██▋ | 99632/371472 [7:55:24<21:49:00, 3.46it/s] 27%|██▋ | 99633/371472 [7:55:25<21:43:49, 3.47it/s] 27%|██▋ | 99634/371472 [7:55:25<20:52:41, 3.62it/s] 27%|██▋ | 99635/371472 [7:55:25<20:14:20, 3.73it/s] 27%|██▋ | 99636/371472 [7:55:25<20:20:15, 3.71it/s] 27%|██▋ | 99637/371472 [7:55:26<19:56:53, 3.79it/s] 27%|██▋ | 99638/371472 [7:55:26<20:20:53, 3.71it/s] 27%|██▋ | 99639/371472 [7:55:26<20:11:11, 3.74it/s] 27%|██▋ | 99640/371472 [7:55:26<19:50:26, 3.81it/s] {'loss': 3.6079, 'learning_rate': 7.589476179189381e-07, 'epoch': 4.29} + 27%|██▋ | 99640/371472 [7:55:26<19:50:26, 3.81it/s] 27%|██▋ | 99641/371472 [7:55:27<20:33:45, 3.67it/s] 27%|██▋ | 99642/371472 [7:55:27<20:51:51, 3.62it/s] 27%|██▋ | 99643/371472 [7:55:27<21:25:50, 3.52it/s] 27%|██▋ | 99644/371472 [7:55:27<21:17:48, 3.55it/s] 27%|██▋ | 99645/371472 [7:55:28<23:44:39, 3.18it/s] 27%|██▋ | 99646/371472 [7:55:28<22:55:41, 3.29it/s] 27%|██▋ | 99647/371472 [7:55:28<22:18:44, 3.38it/s] 27%|██▋ | 99648/371472 [7:55:29<23:10:11, 3.26it/s] 27%|██▋ | 99649/371472 [7:55:29<22:13:18, 3.40it/s] 27%|██▋ | 99650/371472 [7:55:29<21:46:12, 3.47it/s] 27%|██▋ | 99651/371472 [7:55:30<21:31:34, 3.51it/s] 27%|██▋ | 99652/371472 [7:55:30<21:55:09, 3.44it/s] 27%|██▋ | 99653/371472 [7:55:30<22:20:43, 3.38it/s] 27%|██▋ | 99654/371472 [7:55:31<22:28:36, 3.36it/s] 27%|██▋ | 99655/371472 [7:55:31<22:19:55, 3.38it/s] 27%|██▋ | 99656/371472 [7:55:31<22:00:10, 3.43it/s] 27%|██▋ | 99657/371472 [7:55:31<21:19:58, 3.54it/s] 27%|██▋ | 99658/371472 [7:55:32<21:56:04, 3.44it/s] 27%|██▋ | 99659/371472 [7:55:32<21:51:00, 3.46it/s] 27%|██▋ | 99660/371472 [7:55:32<21:12:47, 3.56it/s] {'loss': 3.4367, 'learning_rate': 7.588991359434592e-07, 'epoch': 4.29} + 27%|██▋ | 99660/371472 [7:55:32<21:12:47, 3.56it/s] 27%|██▋ | 99661/371472 [7:55:32<20:18:11, 3.72it/s] 27%|██▋ | 99662/371472 [7:55:33<20:41:46, 3.65it/s] 27%|██▋ | 99663/371472 [7:55:33<21:21:06, 3.54it/s] 27%|██▋ | 99664/371472 [7:55:33<21:04:13, 3.58it/s] 27%|██▋ | 99665/371472 [7:55:34<21:14:36, 3.55it/s] 27%|██▋ | 99666/371472 [7:55:34<22:02:31, 3.43it/s] 27%|██▋ | 99667/371472 [7:55:34<21:02:26, 3.59it/s] 27%|██▋ | 99668/371472 [7:55:34<21:21:40, 3.53it/s] 27%|██▋ | 99669/371472 [7:55:35<20:43:42, 3.64it/s] 27%|██▋ | 99670/371472 [7:55:35<21:05:07, 3.58it/s] 27%|██▋ | 99671/371472 [7:55:35<21:22:57, 3.53it/s] 27%|██▋ | 99672/371472 [7:55:36<21:08:00, 3.57it/s] 27%|██▋ | 99673/371472 [7:55:36<20:50:03, 3.62it/s] 27%|██▋ | 99674/371472 [7:55:36<20:19:24, 3.71it/s] 27%|██▋ | 99675/371472 [7:55:36<19:41:39, 3.83it/s] 27%|██▋ | 99676/371472 [7:55:37<19:39:28, 3.84it/s] 27%|██▋ | 99677/371472 [7:55:37<20:14:53, 3.73it/s] 27%|██▋ | 99678/371472 [7:55:37<20:27:05, 3.69it/s] 27%|██▋ | 99679/371472 [7:55:37<20:25:49, 3.70it/s] 27%|██▋ | 99680/371472 [7:55:38<20:38:47, 3.66it/s] {'loss': 3.4824, 'learning_rate': 7.588506539679802e-07, 'epoch': 4.29} + 27%|██▋ | 99680/371472 [7:55:38<20:38:47, 3.66it/s] 27%|██▋ | 99681/371472 [7:55:38<20:17:15, 3.72it/s] 27%|██▋ | 99682/371472 [7:55:38<19:56:30, 3.79it/s] 27%|██▋ | 99683/371472 [7:55:38<19:14:14, 3.92it/s] 27%|██▋ | 99684/371472 [7:55:39<19:37:16, 3.85it/s] 27%|██▋ | 99685/371472 [7:55:39<19:27:37, 3.88it/s] 27%|██▋ | 99686/371472 [7:55:39<19:38:48, 3.84it/s] 27%|██▋ | 99687/371472 [7:55:39<19:05:47, 3.95it/s] 27%|██▋ | 99688/371472 [7:55:40<19:49:57, 3.81it/s] 27%|██▋ | 99689/371472 [7:55:40<19:22:06, 3.90it/s] 27%|██▋ | 99690/371472 [7:55:40<19:54:23, 3.79it/s] 27%|██▋ | 99691/371472 [7:55:41<20:19:17, 3.71it/s] 27%|██▋ | 99692/371472 [7:55:41<20:57:42, 3.60it/s] 27%|██▋ | 99693/371472 [7:55:41<20:53:12, 3.61it/s] 27%|██▋ | 99694/371472 [7:55:41<22:10:26, 3.40it/s] 27%|██▋ | 99695/371472 [7:55:42<21:45:32, 3.47it/s] 27%|██▋ | 99696/371472 [7:55:42<21:48:28, 3.46it/s] 27%|██▋ | 99697/371472 [7:55:42<22:08:55, 3.41it/s] 27%|██▋ | 99698/371472 [7:55:43<21:36:15, 3.49it/s] 27%|██▋ | 99699/371472 [7:55:43<20:34:53, 3.67it/s] 27%|██▋ | 99700/371472 [7:55:43<21:01:46, 3.59it/s] {'loss': 3.6333, 'learning_rate': 7.588021719925014e-07, 'epoch': 4.29} + 27%|██▋ | 99700/371472 [7:55:43<21:01:46, 3.59it/s] 27%|██▋ | 99701/371472 [7:55:43<22:15:29, 3.39it/s] 27%|██▋ | 99702/371472 [7:55:44<23:00:30, 3.28it/s] 27%|██▋ | 99703/371472 [7:55:44<22:54:44, 3.29it/s] 27%|██▋ | 99704/371472 [7:55:44<21:40:46, 3.48it/s] 27%|██▋ | 99705/371472 [7:55:45<24:33:17, 3.07it/s] 27%|██▋ | 99706/371472 [7:55:45<23:17:49, 3.24it/s] 27%|██▋ | 99707/371472 [7:55:45<22:20:37, 3.38it/s] 27%|██▋ | 99708/371472 [7:55:46<23:14:25, 3.25it/s] 27%|██▋ | 99709/371472 [7:55:46<22:18:38, 3.38it/s] 27%|██▋ | 99710/371472 [7:55:46<21:56:49, 3.44it/s] 27%|██▋ | 99711/371472 [7:55:46<21:09:52, 3.57it/s] 27%|██▋ | 99712/371472 [7:55:47<22:28:33, 3.36it/s] 27%|██▋ | 99713/371472 [7:55:47<22:31:19, 3.35it/s] 27%|██▋ | 99714/371472 [7:55:47<23:28:37, 3.22it/s] 27%|██▋ | 99715/371472 [7:55:48<22:25:38, 3.37it/s] 27%|██▋ | 99716/371472 [7:55:48<22:27:59, 3.36it/s] 27%|██▋ | 99717/371472 [7:55:48<21:45:18, 3.47it/s] 27%|██▋ | 99718/371472 [7:55:48<21:22:01, 3.53it/s] 27%|██▋ | 99719/371472 [7:55:49<21:12:41, 3.56it/s] 27%|██▋ | 99720/371472 [7:55:49<24:16:29, 3.11it/s] {'loss': 3.664, 'learning_rate': 7.587536900170226e-07, 'epoch': 4.3} + 27%|██▋ | 99720/371472 [7:55:49<24:16:29, 3.11it/s] 27%|██▋ | 99721/371472 [7:55:49<23:00:28, 3.28it/s] 27%|██▋ | 99722/371472 [7:55:50<25:12:25, 2.99it/s] 27%|██▋ | 99723/371472 [7:55:50<23:28:25, 3.22it/s] 27%|██▋ | 99724/371472 [7:55:50<21:44:15, 3.47it/s] 27%|██▋ | 99725/371472 [7:55:51<21:05:36, 3.58it/s] 27%|██▋ | 99726/371472 [7:55:51<20:49:15, 3.63it/s] 27%|██▋ | 99727/371472 [7:55:51<21:03:31, 3.58it/s] 27%|██▋ | 99728/371472 [7:55:51<20:58:16, 3.60it/s] 27%|██▋ | 99729/371472 [7:55:52<21:45:48, 3.47it/s] 27%|██▋ | 99730/371472 [7:55:52<22:35:29, 3.34it/s] 27%|██▋ | 99731/371472 [7:55:52<21:49:26, 3.46it/s] 27%|██▋ | 99732/371472 [7:55:53<21:52:50, 3.45it/s] 27%|██▋ | 99733/371472 [7:55:53<21:06:56, 3.57it/s] 27%|██▋ | 99734/371472 [7:55:53<20:51:48, 3.62it/s] 27%|██▋ | 99735/371472 [7:55:53<21:44:42, 3.47it/s] 27%|██▋ | 99736/371472 [7:55:54<20:58:45, 3.60it/s] 27%|██▋ | 99737/371472 [7:55:54<20:24:42, 3.70it/s] 27%|██▋ | 99738/371472 [7:55:54<20:22:00, 3.71it/s] 27%|██▋ | 99739/371472 [7:55:54<19:48:36, 3.81it/s] 27%|██▋ | 99740/371472 [7:55:55<20:10:39, 3.74it/s] {'loss': 3.5497, 'learning_rate': 7.587052080415437e-07, 'epoch': 4.3} + 27%|██▋ | 99740/371472 [7:55:55<20:10:39, 3.74it/s] 27%|██▋ | 99741/371472 [7:55:55<21:04:02, 3.58it/s] 27%|██▋ | 99742/371472 [7:55:55<20:46:07, 3.63it/s] 27%|██▋ | 99743/371472 [7:55:56<20:40:34, 3.65it/s] 27%|██▋ | 99744/371472 [7:55:56<20:13:57, 3.73it/s] 27%|██▋ | 99745/371472 [7:55:56<21:01:57, 3.59it/s] 27%|██▋ | 99746/371472 [7:55:56<20:15:47, 3.72it/s] 27%|██▋ | 99747/371472 [7:55:57<20:38:30, 3.66it/s] 27%|██▋ | 99748/371472 [7:55:57<20:46:02, 3.63it/s] 27%|██▋ | 99749/371472 [7:55:57<21:32:14, 3.50it/s] 27%|██▋ | 99750/371472 [7:55:58<21:27:16, 3.52it/s] 27%|██▋ | 99751/371472 [7:55:58<21:17:34, 3.54it/s] 27%|██▋ | 99752/371472 [7:55:58<21:57:52, 3.44it/s] 27%|██▋ | 99753/371472 [7:55:58<21:36:06, 3.49it/s] 27%|██▋ | 99754/371472 [7:55:59<22:13:33, 3.40it/s] 27%|██▋ | 99755/371472 [7:55:59<21:09:22, 3.57it/s] 27%|██▋ | 99756/371472 [7:55:59<22:01:50, 3.43it/s] 27%|██▋ | 99757/371472 [7:56:00<21:31:18, 3.51it/s] 27%|██▋ | 99758/371472 [7:56:00<21:12:01, 3.56it/s] 27%|██▋ | 99759/371472 [7:56:00<21:27:42, 3.52it/s] 27%|██▋ | 99760/371472 [7:56:01<24:08:17, 3.13it/s] {'loss': 3.5202, 'learning_rate': 7.586567260660646e-07, 'epoch': 4.3} + 27%|██▋ | 99760/371472 [7:56:01<24:08:17, 3.13it/s] 27%|██▋ | 99761/371472 [7:56:01<23:34:51, 3.20it/s] 27%|██▋ | 99762/371472 [7:56:01<24:23:06, 3.10it/s] 27%|██▋ | 99763/371472 [7:56:02<23:59:54, 3.14it/s] 27%|██▋ | 99764/371472 [7:56:02<23:09:33, 3.26it/s] 27%|██▋ | 99765/371472 [7:56:02<23:02:34, 3.28it/s] 27%|██▋ | 99766/371472 [7:56:02<22:00:27, 3.43it/s] 27%|██▋ | 99767/371472 [7:56:03<22:47:01, 3.31it/s] 27%|██▋ | 99768/371472 [7:56:03<22:31:17, 3.35it/s] 27%|██▋ | 99769/371472 [7:56:03<25:12:13, 2.99it/s] 27%|██▋ | 99770/371472 [7:56:04<26:15:30, 2.87it/s] 27%|██▋ | 99771/371472 [7:56:04<26:14:23, 2.88it/s] 27%|██▋ | 99772/371472 [7:56:04<24:49:18, 3.04it/s] 27%|██▋ | 99773/371472 [7:56:05<23:47:01, 3.17it/s] 27%|██▋ | 99774/371472 [7:56:05<23:44:32, 3.18it/s] 27%|██▋ | 99775/371472 [7:56:05<22:50:11, 3.30it/s] 27%|██▋ | 99776/371472 [7:56:06<22:06:08, 3.41it/s] 27%|██▋ | 99777/371472 [7:56:06<21:53:14, 3.45it/s] 27%|██▋ | 99778/371472 [7:56:06<22:01:39, 3.43it/s] 27%|██▋ | 99779/371472 [7:56:06<22:07:46, 3.41it/s] 27%|██▋ | 99780/371472 [7:56:07<23:53:14, 3.16it/s] {'loss': 3.6558, 'learning_rate': 7.586082440905858e-07, 'epoch': 4.3} + 27%|██▋ | 99780/371472 [7:56:07<23:53:14, 3.16it/s] 27%|██▋ | 99781/371472 [7:56:07<22:57:32, 3.29it/s] 27%|██▋ | 99782/371472 [7:56:07<22:27:25, 3.36it/s] 27%|██▋ | 99783/371472 [7:56:08<22:26:15, 3.36it/s] 27%|██▋ | 99784/371472 [7:56:08<22:21:21, 3.38it/s] 27%|██▋ | 99785/371472 [7:56:08<22:00:30, 3.43it/s] 27%|██▋ | 99786/371472 [7:56:09<23:27:06, 3.22it/s] 27%|██▋ | 99787/371472 [7:56:09<22:17:34, 3.39it/s] 27%|██▋ | 99788/371472 [7:56:09<21:16:23, 3.55it/s] 27%|██▋ | 99789/371472 [7:56:09<20:48:50, 3.63it/s] 27%|██▋ | 99790/371472 [7:56:10<20:30:40, 3.68it/s] 27%|██▋ | 99791/371472 [7:56:10<20:34:54, 3.67it/s] 27%|██▋ | 99792/371472 [7:56:10<20:22:12, 3.70it/s] 27%|██▋ | 99793/371472 [7:56:10<20:13:32, 3.73it/s] 27%|██▋ | 99794/371472 [7:56:11<20:25:17, 3.70it/s] 27%|██▋ | 99795/371472 [7:56:11<20:42:30, 3.64it/s] 27%|██▋ | 99796/371472 [7:56:11<20:12:11, 3.74it/s] 27%|██▋ | 99797/371472 [7:56:12<21:12:01, 3.56it/s] 27%|██▋ | 99798/371472 [7:56:12<21:36:28, 3.49it/s] 27%|██▋ | 99799/371472 [7:56:12<21:05:59, 3.58it/s] 27%|██▋ | 99800/371472 [7:56:12<21:12:38, 3.56it/s] {'loss': 3.5368, 'learning_rate': 7.58559762115107e-07, 'epoch': 4.3} + 27%|██▋ | 99800/371472 [7:56:12<21:12:38, 3.56it/s] 27%|██▋ | 99801/371472 [7:56:13<22:02:51, 3.42it/s] 27%|██▋ | 99802/371472 [7:56:13<21:40:01, 3.48it/s] 27%|██▋ | 99803/371472 [7:56:13<20:57:48, 3.60it/s] 27%|██▋ | 99804/371472 [7:56:13<20:12:04, 3.74it/s] 27%|██▋ | 99805/371472 [7:56:14<21:51:42, 3.45it/s] 27%|██▋ | 99806/371472 [7:56:14<21:16:44, 3.55it/s] 27%|██▋ | 99807/371472 [7:56:14<20:45:52, 3.63it/s] 27%|██▋ | 99808/371472 [7:56:15<20:58:02, 3.60it/s] 27%|██▋ | 99809/371472 [7:56:15<20:55:30, 3.61it/s] 27%|██▋ | 99810/371472 [7:56:15<20:36:02, 3.66it/s] 27%|██▋ | 99811/371472 [7:56:15<21:00:55, 3.59it/s] 27%|██▋ | 99812/371472 [7:56:16<20:53:05, 3.61it/s] 27%|██▋ | 99813/371472 [7:56:16<22:22:45, 3.37it/s] 27%|██▋ | 99814/371472 [7:56:16<21:46:05, 3.47it/s] 27%|██▋ | 99815/371472 [7:56:17<22:12:32, 3.40it/s] 27%|██▋ | 99816/371472 [7:56:17<21:52:01, 3.45it/s] 27%|██▋ | 99817/371472 [7:56:17<22:50:07, 3.30it/s] 27%|██▋ | 99818/371472 [7:56:18<22:24:21, 3.37it/s] 27%|██▋ | 99819/371472 [7:56:18<22:21:11, 3.38it/s] 27%|██▋ | 99820/371472 [7:56:18<22:48:38, 3.31it/s] {'loss': 3.5494, 'learning_rate': 7.58511280139628e-07, 'epoch': 4.3} + 27%|██▋ | 99820/371472 [7:56:18<22:48:38, 3.31it/s] 27%|██▋ | 99821/371472 [7:56:18<23:36:47, 3.20it/s] 27%|██▋ | 99822/371472 [7:56:19<23:03:57, 3.27it/s] 27%|██▋ | 99823/371472 [7:56:19<21:42:19, 3.48it/s] 27%|██▋ | 99824/371472 [7:56:19<21:42:08, 3.48it/s] 27%|██▋ | 99825/371472 [7:56:20<21:51:41, 3.45it/s] 27%|██▋ | 99826/371472 [7:56:20<21:16:25, 3.55it/s] 27%|██▋ | 99827/371472 [7:56:20<22:24:10, 3.37it/s] 27%|██▋ | 99828/371472 [7:56:21<23:01:34, 3.28it/s] 27%|██▋ | 99829/371472 [7:56:21<21:54:13, 3.44it/s] 27%|██▋ | 99830/371472 [7:56:21<20:46:22, 3.63it/s] 27%|██▋ | 99831/371472 [7:56:21<20:44:34, 3.64it/s] 27%|██▋ | 99832/371472 [7:56:22<20:32:14, 3.67it/s] 27%|██▋ | 99833/371472 [7:56:22<20:36:34, 3.66it/s] 27%|██▋ | 99834/371472 [7:56:22<20:57:22, 3.60it/s] 27%|██▋ | 99835/371472 [7:56:22<20:14:13, 3.73it/s] 27%|██▋ | 99836/371472 [7:56:23<20:23:33, 3.70it/s] 27%|██▋ | 99837/371472 [7:56:23<21:05:04, 3.58it/s] 27%|██▋ | 99838/371472 [7:56:23<20:31:26, 3.68it/s] 27%|██▋ | 99839/371472 [7:56:24<21:44:59, 3.47it/s] 27%|██▋ | 99840/371472 [7:56:24<21:05:58, 3.58it/s] {'loss': 3.8079, 'learning_rate': 7.584627981641491e-07, 'epoch': 4.3} + 27%|██▋ | 99840/371472 [7:56:24<21:05:58, 3.58it/s] 27%|██▋ | 99841/371472 [7:56:24<21:25:26, 3.52it/s] 27%|██▋ | 99842/371472 [7:56:24<21:29:02, 3.51it/s] 27%|██▋ | 99843/371472 [7:56:25<21:01:30, 3.59it/s] 27%|██▋ | 99844/371472 [7:56:25<20:44:51, 3.64it/s] 27%|██▋ | 99845/371472 [7:56:25<20:09:43, 3.74it/s] 27%|██▋ | 99846/371472 [7:56:25<20:34:42, 3.67it/s] 27%|██▋ | 99847/371472 [7:56:26<22:19:01, 3.38it/s] 27%|██▋ | 99848/371472 [7:56:26<21:24:15, 3.53it/s] 27%|██▋ | 99849/371472 [7:56:26<21:50:06, 3.46it/s] 27%|██▋ | 99850/371472 [7:56:27<21:31:42, 3.50it/s] 27%|██▋ | 99851/371472 [7:56:27<23:30:43, 3.21it/s] 27%|██▋ | 99852/371472 [7:56:27<22:30:57, 3.35it/s] 27%|██▋ | 99853/371472 [7:56:28<21:56:43, 3.44it/s] 27%|██▋ | 99854/371472 [7:56:28<21:18:19, 3.54it/s] 27%|██▋ | 99855/371472 [7:56:28<20:38:19, 3.66it/s] 27%|██▋ | 99856/371472 [7:56:28<20:45:45, 3.63it/s] 27%|██▋ | 99857/371472 [7:56:29<22:40:40, 3.33it/s] 27%|██▋ | 99858/371472 [7:56:29<21:54:38, 3.44it/s] 27%|██▋ | 99859/371472 [7:56:29<21:45:06, 3.47it/s] 27%|██▋ | 99860/371472 [7:56:29<21:18:11, 3.54it/s] {'loss': 3.6958, 'learning_rate': 7.584143161886704e-07, 'epoch': 4.3} + 27%|██▋ | 99860/371472 [7:56:29<21:18:11, 3.54it/s] 27%|██▋ | 99861/371472 [7:56:30<22:40:08, 3.33it/s] 27%|██▋ | 99862/371472 [7:56:30<21:51:23, 3.45it/s] 27%|██▋ | 99863/371472 [7:56:30<22:12:32, 3.40it/s] 27%|██▋ | 99864/371472 [7:56:31<22:52:58, 3.30it/s] 27%|██▋ | 99865/371472 [7:56:31<23:44:48, 3.18it/s] 27%|██▋ | 99866/371472 [7:56:31<23:16:30, 3.24it/s] 27%|██▋ | 99867/371472 [7:56:32<23:15:29, 3.24it/s] 27%|██▋ | 99868/371472 [7:56:32<22:26:47, 3.36it/s] 27%|██▋ | 99869/371472 [7:56:32<21:57:11, 3.44it/s] 27%|██▋ | 99870/371472 [7:56:32<21:01:34, 3.59it/s] 27%|██▋ | 99871/371472 [7:56:33<20:33:45, 3.67it/s] 27%|██▋ | 99872/371472 [7:56:33<22:37:47, 3.33it/s] 27%|██▋ | 99873/371472 [7:56:33<21:36:58, 3.49it/s] 27%|██▋ | 99874/371472 [7:56:34<21:18:10, 3.54it/s] 27%|██▋ | 99875/371472 [7:56:34<22:04:22, 3.42it/s] 27%|██▋ | 99876/371472 [7:56:34<22:00:59, 3.43it/s] 27%|██▋ | 99877/371472 [7:56:35<23:13:10, 3.25it/s] 27%|██▋ | 99878/371472 [7:56:35<23:11:56, 3.25it/s] 27%|██▋ | 99879/371472 [7:56:35<22:12:52, 3.40it/s] 27%|██▋ | 99880/371472 [7:56:35<21:42:45, 3.47it/s] {'loss': 3.5206, 'learning_rate': 7.583658342131914e-07, 'epoch': 4.3} + 27%|██▋ | 99880/371472 [7:56:35<21:42:45, 3.47it/s] 27%|██▋ | 99881/371472 [7:56:36<21:10:07, 3.56it/s] 27%|██▋ | 99882/371472 [7:56:36<20:17:00, 3.72it/s] 27%|██▋ | 99883/371472 [7:56:36<20:21:34, 3.71it/s] 27%|██▋ | 99884/371472 [7:56:37<22:57:03, 3.29it/s] 27%|██▋ | 99885/371472 [7:56:37<24:33:34, 3.07it/s] 27%|██▋ | 99886/371472 [7:56:37<24:17:49, 3.10it/s] 27%|██▋ | 99887/371472 [7:56:38<22:42:49, 3.32it/s] 27%|██▋ | 99888/371472 [7:56:38<22:21:24, 3.37it/s] 27%|██▋ | 99889/371472 [7:56:38<21:27:43, 3.52it/s] 27%|██▋ | 99890/371472 [7:56:38<21:09:30, 3.57it/s] 27%|██▋ | 99891/371472 [7:56:39<20:12:51, 3.73it/s] 27%|██▋ | 99892/371472 [7:56:39<24:48:29, 3.04it/s] 27%|██▋ | 99893/371472 [7:56:39<23:05:32, 3.27it/s] 27%|██▋ | 99894/371472 [7:56:40<22:16:55, 3.39it/s] 27%|██▋ | 99895/371472 [7:56:40<21:55:26, 3.44it/s] 27%|██▋ | 99896/371472 [7:56:40<21:18:36, 3.54it/s] 27%|██▋ | 99897/371472 [7:56:40<21:12:46, 3.56it/s] 27%|██▋ | 99898/371472 [7:56:41<22:13:54, 3.39it/s] 27%|██▋ | 99899/371472 [7:56:41<22:17:13, 3.38it/s] 27%|██▋ | 99900/371472 [7:56:41<22:04:00, 3.42it/s] {'loss': 3.5383, 'learning_rate': 7.583173522377124e-07, 'epoch': 4.3} + 27%|██▋ | 99900/371472 [7:56:41<22:04:00, 3.42it/s] 27%|██▋ | 99901/371472 [7:56:42<21:31:07, 3.51it/s] 27%|██▋ | 99902/371472 [7:56:42<21:27:32, 3.52it/s] 27%|██▋ | 99903/371472 [7:56:42<21:59:24, 3.43it/s] 27%|██▋ | 99904/371472 [7:56:42<21:28:02, 3.51it/s] 27%|██▋ | 99905/371472 [7:56:43<20:44:09, 3.64it/s] 27%|██▋ | 99906/371472 [7:56:43<20:13:52, 3.73it/s] 27%|██▋ | 99907/371472 [7:56:43<20:11:16, 3.74it/s] 27%|██▋ | 99908/371472 [7:56:43<19:55:43, 3.79it/s] 27%|██▋ | 99909/371472 [7:56:44<19:57:12, 3.78it/s] 27%|██▋ | 99910/371472 [7:56:44<19:50:24, 3.80it/s] 27%|██▋ | 99911/371472 [7:56:44<22:27:00, 3.36it/s] 27%|██▋ | 99912/371472 [7:56:45<21:50:14, 3.45it/s] 27%|██▋ | 99913/371472 [7:56:45<22:17:47, 3.38it/s] 27%|██▋ | 99914/371472 [7:56:45<22:45:03, 3.32it/s] 27%|██▋ | 99915/371472 [7:56:46<23:03:53, 3.27it/s] 27%|██▋ | 99916/371472 [7:56:46<23:06:27, 3.26it/s] 27%|██▋ | 99917/371472 [7:56:46<23:04:07, 3.27it/s] 27%|██▋ | 99918/371472 [7:56:47<24:11:51, 3.12it/s] 27%|██▋ | 99919/371472 [7:56:47<22:45:54, 3.31it/s] 27%|██▋ | 99920/371472 [7:56:47<22:06:05, 3.41it/s] {'loss': 3.5775, 'learning_rate': 7.582688702622335e-07, 'epoch': 4.3} + 27%|██▋ | 99920/371472 [7:56:47<22:06:05, 3.41it/s] 27%|██▋ | 99921/371472 [7:56:47<21:35:18, 3.49it/s] 27%|██▋ | 99922/371472 [7:56:48<20:59:49, 3.59it/s] 27%|██▋ | 99923/371472 [7:56:48<20:46:56, 3.63it/s] 27%|██▋ | 99924/371472 [7:56:48<20:35:48, 3.66it/s] 27%|██▋ | 99925/371472 [7:56:48<21:14:53, 3.55it/s] 27%|██▋ | 99926/371472 [7:56:49<21:27:06, 3.52it/s] 27%|██▋ | 99927/371472 [7:56:49<21:37:38, 3.49it/s] 27%|██▋ | 99928/371472 [7:56:49<21:29:22, 3.51it/s] 27%|██▋ | 99929/371472 [7:56:50<21:26:42, 3.52it/s] 27%|██▋ | 99930/371472 [7:56:50<20:32:21, 3.67it/s] 27%|██▋ | 99931/371472 [7:56:50<20:45:32, 3.63it/s] 27%|██▋ | 99932/371472 [7:56:50<21:59:56, 3.43it/s] 27%|██▋ | 99933/371472 [7:56:51<22:14:46, 3.39it/s] 27%|██▋ | 99934/371472 [7:56:51<21:24:20, 3.52it/s] 27%|██▋ | 99935/371472 [7:56:51<21:13:07, 3.55it/s] 27%|██▋ | 99936/371472 [7:56:52<21:46:50, 3.46it/s] 27%|██▋ | 99937/371472 [7:56:52<20:38:35, 3.65it/s] 27%|██▋ | 99938/371472 [7:56:52<20:43:54, 3.64it/s] 27%|██▋ | 99939/371472 [7:56:52<21:36:12, 3.49it/s] 27%|██▋ | 99940/371472 [7:56:53<21:20:28, 3.53it/s] {'loss': 3.6891, 'learning_rate': 7.582203882867547e-07, 'epoch': 4.3} + 27%|██▋ | 99940/371472 [7:56:53<21:20:28, 3.53it/s] 27%|██▋ | 99941/371472 [7:56:53<20:49:32, 3.62it/s] 27%|██▋ | 99942/371472 [7:56:53<20:43:06, 3.64it/s] 27%|██▋ | 99943/371472 [7:56:53<20:54:48, 3.61it/s] 27%|██▋ | 99944/371472 [7:56:54<20:54:00, 3.61it/s] 27%|██▋ | 99945/371472 [7:56:54<20:53:09, 3.61it/s] 27%|██▋ | 99946/371472 [7:56:54<21:02:08, 3.59it/s] 27%|██▋ | 99947/371472 [7:56:55<20:56:47, 3.60it/s] 27%|██▋ | 99948/371472 [7:56:55<21:04:23, 3.58it/s] 27%|██▋ | 99949/371472 [7:56:55<22:43:12, 3.32it/s] 27%|██▋ | 99950/371472 [7:56:55<21:39:34, 3.48it/s] 27%|██▋ | 99951/371472 [7:56:56<21:27:54, 3.51it/s] 27%|██▋ | 99952/371472 [7:56:56<21:03:08, 3.58it/s] 27%|██▋ | 99953/371472 [7:56:56<22:07:50, 3.41it/s] 27%|██▋ | 99954/371472 [7:56:57<21:02:02, 3.59it/s] 27%|██▋ | 99955/371472 [7:56:57<22:30:57, 3.35it/s] 27%|██▋ | 99956/371472 [7:56:57<22:27:54, 3.36it/s] 27%|██▋ | 99957/371472 [7:56:58<23:16:44, 3.24it/s] 27%|██▋ | 99958/371472 [7:56:58<22:51:01, 3.30it/s] 27%|██▋ | 99959/371472 [7:56:58<22:55:02, 3.29it/s] 27%|██▋ | 99960/371472 [7:56:58<22:49:23, 3.30it/s] {'loss': 3.7513, 'learning_rate': 7.581719063112759e-07, 'epoch': 4.31} + 27%|██▋ | 99960/371472 [7:56:58<22:49:23, 3.30it/s] 27%|██▋ | 99961/371472 [7:56:59<21:47:23, 3.46it/s] 27%|██▋ | 99962/371472 [7:56:59<21:13:21, 3.55it/s] 27%|██▋ | 99963/371472 [7:56:59<20:17:36, 3.72it/s] 27%|██▋ | 99964/371472 [7:57:00<20:27:01, 3.69it/s] 27%|██▋ | 99965/371472 [7:57:00<20:23:02, 3.70it/s] 27%|██▋ | 99966/371472 [7:57:00<20:53:57, 3.61it/s] 27%|██▋ | 99967/371472 [7:57:00<21:56:57, 3.44it/s] 27%|██▋ | 99968/371472 [7:57:01<21:07:49, 3.57it/s] 27%|██▋ | 99969/371472 [7:57:01<21:16:42, 3.54it/s] 27%|██▋ | 99970/371472 [7:57:01<20:38:47, 3.65it/s] 27%|██▋ | 99971/371472 [7:57:02<21:18:38, 3.54it/s] 27%|██▋ | 99972/371472 [7:57:02<22:05:36, 3.41it/s] 27%|██▋ | 99973/371472 [7:57:02<22:08:09, 3.41it/s] 27%|██▋ | 99974/371472 [7:57:02<22:38:25, 3.33it/s] 27%|██▋ | 99975/371472 [7:57:03<23:21:32, 3.23it/s] 27%|██▋ | 99976/371472 [7:57:03<22:09:33, 3.40it/s] 27%|██▋ | 99977/371472 [7:57:03<22:07:40, 3.41it/s] 27%|██▋ | 99978/371472 [7:57:04<22:02:21, 3.42it/s] 27%|██▋ | 99979/371472 [7:57:04<21:15:58, 3.55it/s] 27%|██▋ | 99980/371472 [7:57:04<20:47:08, 3.63it/s] {'loss': 3.5897, 'learning_rate': 7.581234243357969e-07, 'epoch': 4.31} + 27%|██▋ | 99980/371472 [7:57:04<20:47:08, 3.63it/s] 27%|██▋ | 99981/371472 [7:57:04<20:39:41, 3.65it/s] 27%|██▋ | 99982/371472 [7:57:05<22:44:40, 3.32it/s] 27%|██▋ | 99983/371472 [7:57:05<22:56:07, 3.29it/s] 27%|██▋ | 99984/371472 [7:57:05<22:35:41, 3.34it/s] 27%|██▋ | 99985/371472 [7:57:06<21:32:32, 3.50it/s] 27%|██▋ | 99986/371472 [7:57:06<20:40:33, 3.65it/s] 27%|██▋ | 99987/371472 [7:57:06<20:26:31, 3.69it/s] 27%|██▋ | 99988/371472 [7:57:07<23:11:58, 3.25it/s] 27%|██▋ | 99989/371472 [7:57:07<22:53:46, 3.29it/s] 27%|██▋ | 99990/371472 [7:57:07<23:03:08, 3.27it/s] 27%|██▋ | 99991/371472 [7:57:07<22:24:07, 3.37it/s] 27%|██▋ | 99992/371472 [7:57:08<21:22:15, 3.53it/s] 27%|██▋ | 99993/371472 [7:57:08<20:51:41, 3.61it/s] 27%|██▋ | 99994/371472 [7:57:08<20:24:35, 3.69it/s] 27%|██▋ | 99995/371472 [7:57:08<21:36:36, 3.49it/s] 27%|██▋ | 99996/371472 [7:57:09<21:08:56, 3.57it/s] 27%|██▋ | 99997/371472 [7:57:09<22:53:31, 3.29it/s] 27%|██▋ | 99998/371472 [7:57:09<21:50:39, 3.45it/s] 27%|██▋ | 99999/371472 [7:57:10<21:21:41, 3.53it/s] 27%|██▋ | 100000/371472 [7:57:10<20:50:08, 3.62it/s] {'loss': 3.5481, 'learning_rate': 7.58074942360318e-07, 'epoch': 4.31} + 27%|██▋ | 100000/371472 [7:57:10<20:50:08, 3.62it/s] 27%|██▋ | 100001/371472 [7:57:10<20:37:34, 3.66it/s] 27%|██▋ | 100002/371472 [7:57:10<21:44:25, 3.47it/s] 27%|██▋ | 100003/371472 [7:57:11<20:46:59, 3.63it/s] 27%|██▋ | 100004/371472 [7:57:11<20:08:29, 3.74it/s] 27%|██▋ | 100005/371472 [7:57:11<20:25:36, 3.69it/s] 27%|██▋ | 100006/371472 [7:57:12<20:14:39, 3.72it/s] 27%|██▋ | 100007/371472 [7:57:12<19:46:09, 3.81it/s] 27%|██▋ | 100008/371472 [7:57:12<21:37:33, 3.49it/s] 27%|██▋ | 100009/371472 [7:57:12<21:57:00, 3.44it/s] 27%|██▋ | 100010/371472 [7:57:13<22:14:24, 3.39it/s] 27%|██▋ | 100011/371472 [7:57:13<21:41:50, 3.48it/s] 27%|██▋ | 100012/371472 [7:57:13<20:53:37, 3.61it/s] 27%|██▋ | 100013/371472 [7:57:14<20:30:36, 3.68it/s] 27%|██▋ | 100014/371472 [7:57:14<21:01:22, 3.59it/s] 27%|██▋ | 100015/371472 [7:57:14<21:04:21, 3.58it/s] 27%|██▋ | 100016/371472 [7:57:14<21:32:15, 3.50it/s] 27%|██▋ | 100017/371472 [7:57:15<21:41:35, 3.48it/s] 27%|██▋ | 100018/371472 [7:57:15<21:01:33, 3.59it/s] 27%|██▋ | 100019/371472 [7:57:15<21:36:01, 3.49it/s] 27%|██▋ | 100020/371472 [7:57:16<22:25:41, 3.36it/s] {'loss': 3.6821, 'learning_rate': 7.580264603848391e-07, 'epoch': 4.31} + 27%|██▋ | 100020/371472 [7:57:16<22:25:41, 3.36it/s] 27%|██▋ | 100021/371472 [7:57:16<22:04:17, 3.42it/s] 27%|██▋ | 100022/371472 [7:57:16<21:31:15, 3.50it/s] 27%|██▋ | 100023/371472 [7:57:16<21:38:38, 3.48it/s] 27%|██▋ | 100024/371472 [7:57:17<23:27:05, 3.22it/s] 27%|██▋ | 100025/371472 [7:57:17<23:07:12, 3.26it/s] 27%|██▋ | 100026/371472 [7:57:17<22:43:49, 3.32it/s] 27%|██▋ | 100027/371472 [7:57:18<22:53:49, 3.29it/s] 27%|██▋ | 100028/371472 [7:57:18<23:11:35, 3.25it/s] 27%|██▋ | 100029/371472 [7:57:18<22:50:30, 3.30it/s] 27%|██▋ | 100030/371472 [7:57:19<22:08:06, 3.41it/s] 27%|██▋ | 100031/371472 [7:57:19<22:42:05, 3.32it/s] 27%|██▋ | 100032/371472 [7:57:19<21:56:34, 3.44it/s] 27%|██▋ | 100033/371472 [7:57:19<21:22:55, 3.53it/s] 27%|██▋ | 100034/371472 [7:57:20<21:04:42, 3.58it/s] 27%|██▋ | 100035/371472 [7:57:20<20:42:40, 3.64it/s] 27%|██▋ | 100036/371472 [7:57:20<21:15:58, 3.55it/s] 27%|██▋ | 100037/371472 [7:57:20<20:51:52, 3.61it/s] 27%|██▋ | 100038/371472 [7:57:21<20:47:58, 3.63it/s] 27%|██▋ | 100039/371472 [7:57:21<20:56:56, 3.60it/s] 27%|██▋ | 100040/371472 [7:57:21<20:36:23, 3.66it/s] {'loss': 3.6613, 'learning_rate': 7.579779784093603e-07, 'epoch': 4.31} + 27%|██▋ | 100040/371472 [7:57:21<20:36:23, 3.66it/s] 27%|██▋ | 100041/371472 [7:57:22<21:24:17, 3.52it/s] 27%|██▋ | 100042/371472 [7:57:22<20:51:00, 3.62it/s] 27%|██▋ | 100043/371472 [7:57:22<21:57:25, 3.43it/s] 27%|██▋ | 100044/371472 [7:57:22<21:55:18, 3.44it/s] 27%|██▋ | 100045/371472 [7:57:23<22:44:02, 3.32it/s] 27%|██▋ | 100046/371472 [7:57:23<24:02:24, 3.14it/s] 27%|██▋ | 100047/371472 [7:57:23<23:00:04, 3.28it/s] 27%|██▋ | 100048/371472 [7:57:24<23:18:06, 3.24it/s] 27%|██▋ | 100049/371472 [7:57:24<23:00:28, 3.28it/s] 27%|██▋ | 100050/371472 [7:57:24<23:18:23, 3.23it/s] 27%|██▋ | 100051/371472 [7:57:25<22:11:45, 3.40it/s] 27%|██▋ | 100052/371472 [7:57:25<21:32:23, 3.50it/s] 27%|██▋ | 100053/371472 [7:57:25<21:58:06, 3.43it/s] 27%|██▋ | 100054/371472 [7:57:25<21:40:11, 3.48it/s] 27%|██▋ | 100055/371472 [7:57:26<20:47:04, 3.63it/s] 27%|██▋ | 100056/371472 [7:57:26<20:58:59, 3.59it/s] 27%|██▋ | 100057/371472 [7:57:26<22:07:36, 3.41it/s] 27%|██▋ | 100058/371472 [7:57:27<23:33:44, 3.20it/s] 27%|██▋ | 100059/371472 [7:57:27<22:32:32, 3.34it/s] 27%|██▋ | 100060/371472 [7:57:27<22:42:18, 3.32it/s] {'loss': 3.4891, 'learning_rate': 7.579294964338813e-07, 'epoch': 4.31} + 27%|██▋ | 100060/371472 [7:57:27<22:42:18, 3.32it/s] 27%|██▋ | 100061/371472 [7:57:28<21:51:54, 3.45it/s] 27%|██▋ | 100062/371472 [7:57:28<21:05:39, 3.57it/s] 27%|██▋ | 100063/371472 [7:57:28<20:37:53, 3.65it/s] 27%|██▋ | 100064/371472 [7:57:28<21:18:50, 3.54it/s] 27%|██▋ | 100065/371472 [7:57:29<21:24:02, 3.52it/s] 27%|██▋ | 100066/371472 [7:57:29<22:08:29, 3.40it/s] 27%|██▋ | 100067/371472 [7:57:29<22:26:12, 3.36it/s] 27%|██▋ | 100068/371472 [7:57:30<22:33:16, 3.34it/s] 27%|██▋ | 100069/371472 [7:57:30<22:39:05, 3.33it/s] 27%|██▋ | 100070/371472 [7:57:30<22:08:25, 3.41it/s] 27%|██▋ | 100071/371472 [7:57:30<22:04:35, 3.41it/s] 27%|██▋ | 100072/371472 [7:57:31<21:23:56, 3.52it/s] 27%|██▋ | 100073/371472 [7:57:31<21:09:12, 3.56it/s] 27%|██▋ | 100074/371472 [7:57:31<20:20:23, 3.71it/s] 27%|██▋ | 100075/371472 [7:57:31<20:10:01, 3.74it/s] 27%|██▋ | 100076/371472 [7:57:32<22:01:08, 3.42it/s] 27%|██▋ | 100077/371472 [7:57:32<23:21:06, 3.23it/s] 27%|██▋ | 100078/371472 [7:57:32<22:57:26, 3.28it/s] 27%|██▋ | 100079/371472 [7:57:33<21:22:09, 3.53it/s] 27%|██▋ | 100080/371472 [7:57:33<21:03:38, 3.58it/s] {'loss': 3.5326, 'learning_rate': 7.578810144584024e-07, 'epoch': 4.31} + 27%|██▋ | 100080/371472 [7:57:33<21:03:38, 3.58it/s] 27%|██▋ | 100081/371472 [7:57:33<22:11:54, 3.40it/s] 27%|██▋ | 100082/371472 [7:57:34<21:39:40, 3.48it/s] 27%|██▋ | 100083/371472 [7:57:34<21:03:00, 3.58it/s] 27%|██▋ | 100084/371472 [7:57:34<20:31:19, 3.67it/s] 27%|██▋ | 100085/371472 [7:57:34<19:51:31, 3.80it/s] 27%|██▋ | 100086/371472 [7:57:35<19:43:39, 3.82it/s] 27%|██▋ | 100087/371472 [7:57:35<19:47:42, 3.81it/s] 27%|██▋ | 100088/371472 [7:57:35<19:18:04, 3.91it/s] 27%|██▋ | 100089/371472 [7:57:35<19:13:15, 3.92it/s] 27%|██▋ | 100090/371472 [7:57:36<19:13:25, 3.92it/s] 27%|██▋ | 100091/371472 [7:57:36<18:48:17, 4.01it/s] 27%|██▋ | 100092/371472 [7:57:36<18:32:16, 4.07it/s] 27%|██▋ | 100093/371472 [7:57:36<18:25:02, 4.09it/s] 27%|██▋ | 100094/371472 [7:57:37<18:21:01, 4.11it/s] 27%|██▋ | 100095/371472 [7:57:37<18:40:37, 4.04it/s] 27%|██▋ | 100096/371472 [7:57:37<22:18:25, 3.38it/s] 27%|██▋ | 100097/371472 [7:57:38<21:34:47, 3.49it/s] 27%|██▋ | 100098/371472 [7:57:38<21:16:29, 3.54it/s] 27%|██▋ | 100099/371472 [7:57:38<22:04:25, 3.41it/s] 27%|██▋ | 100100/371472 [7:57:38<21:34:51, 3.49it/s] {'loss': 3.7199, 'learning_rate': 7.578325324829236e-07, 'epoch': 4.31} + 27%|██▋ | 100100/371472 [7:57:38<21:34:51, 3.49it/s] 27%|██▋ | 100101/371472 [7:57:39<21:04:15, 3.58it/s] 27%|██▋ | 100102/371472 [7:57:39<19:54:24, 3.79it/s] 27%|██▋ | 100103/371472 [7:57:39<19:23:47, 3.89it/s] 27%|██▋ | 100104/371472 [7:57:39<20:59:45, 3.59it/s] 27%|██▋ | 100105/371472 [7:57:40<20:50:50, 3.62it/s] 27%|██▋ | 100106/371472 [7:57:40<20:28:01, 3.68it/s] 27%|██▋ | 100107/371472 [7:57:40<20:32:12, 3.67it/s] 27%|██▋ | 100108/371472 [7:57:41<25:07:00, 3.00it/s] 27%|██▋ | 100109/371472 [7:57:41<24:31:37, 3.07it/s] 27%|██▋ | 100110/371472 [7:57:41<23:21:39, 3.23it/s] 27%|██▋ | 100111/371472 [7:57:42<22:46:14, 3.31it/s] 27%|██▋ | 100112/371472 [7:57:42<21:55:37, 3.44it/s] 27%|██▋ | 100113/371472 [7:57:42<24:23:14, 3.09it/s] 27%|██▋ | 100114/371472 [7:57:43<23:24:48, 3.22it/s] 27%|██▋ | 100115/371472 [7:57:43<22:36:18, 3.33it/s] 27%|██▋ | 100116/371472 [7:57:43<22:57:18, 3.28it/s] 27%|██▋ | 100117/371472 [7:57:43<22:27:57, 3.36it/s] 27%|██▋ | 100118/371472 [7:57:44<21:44:49, 3.47it/s] 27%|██▋ | 100119/371472 [7:57:44<21:47:47, 3.46it/s] 27%|██▋ | 100120/371472 [7:57:44<21:24:49, 3.52it/s] {'loss': 3.4382, 'learning_rate': 7.577840505074448e-07, 'epoch': 4.31} + 27%|██▋ | 100120/371472 [7:57:44<21:24:49, 3.52it/s] 27%|██▋ | 100121/371472 [7:57:44<20:48:28, 3.62it/s] 27%|██▋ | 100122/371472 [7:57:45<21:00:00, 3.59it/s] 27%|██▋ | 100123/371472 [7:57:45<20:03:38, 3.76it/s] 27%|██▋ | 100124/371472 [7:57:45<19:59:53, 3.77it/s] 27%|██▋ | 100125/371472 [7:57:46<19:34:20, 3.85it/s] 27%|██▋ | 100126/371472 [7:57:46<21:36:20, 3.49it/s] 27%|██▋ | 100127/371472 [7:57:46<21:44:42, 3.47it/s] 27%|██▋ | 100128/371472 [7:57:46<22:01:50, 3.42it/s] 27%|██▋ | 100129/371472 [7:57:47<21:37:07, 3.49it/s] 27%|██▋ | 100130/371472 [7:57:47<21:46:52, 3.46it/s] 27%|██▋ | 100131/371472 [7:57:47<20:57:12, 3.60it/s] 27%|██▋ | 100132/371472 [7:57:48<20:11:37, 3.73it/s] 27%|██▋ | 100133/371472 [7:57:48<19:36:35, 3.84it/s] 27%|██▋ | 100134/371472 [7:57:48<19:36:40, 3.84it/s] 27%|██▋ | 100135/371472 [7:57:48<20:12:08, 3.73it/s] 27%|██▋ | 100136/371472 [7:57:49<20:00:40, 3.77it/s] 27%|██▋ | 100137/371472 [7:57:49<19:29:44, 3.87it/s] 27%|██▋ | 100138/371472 [7:57:49<20:29:58, 3.68it/s] 27%|██▋ | 100139/371472 [7:57:49<20:30:26, 3.68it/s] 27%|██▋ | 100140/371472 [7:57:50<20:22:08, 3.70it/s] {'loss': 3.6127, 'learning_rate': 7.577355685319657e-07, 'epoch': 4.31} + 27%|██▋ | 100140/371472 [7:57:50<20:22:08, 3.70it/s] 27%|██▋ | 100141/371472 [7:57:50<20:15:33, 3.72it/s] 27%|██▋ | 100142/371472 [7:57:50<20:18:08, 3.71it/s] 27%|██▋ | 100143/371472 [7:57:50<19:52:57, 3.79it/s] 27%|██▋ | 100144/371472 [7:57:51<21:29:19, 3.51it/s] 27%|██▋ | 100145/371472 [7:57:51<21:38:46, 3.48it/s] 27%|██▋ | 100146/371472 [7:57:51<22:16:54, 3.38it/s] 27%|██▋ | 100147/371472 [7:57:52<23:01:56, 3.27it/s] 27%|██▋ | 100148/371472 [7:57:52<22:19:53, 3.37it/s] 27%|██▋ | 100149/371472 [7:57:52<21:48:05, 3.46it/s] 27%|██▋ | 100150/371472 [7:57:53<22:28:02, 3.35it/s] 27%|██▋ | 100151/371472 [7:57:53<21:58:00, 3.43it/s] 27%|██▋ | 100152/371472 [7:57:53<21:22:38, 3.53it/s] 27%|██▋ | 100153/371472 [7:57:53<21:34:56, 3.49it/s] 27%|██▋ | 100154/371472 [7:57:54<20:30:47, 3.67it/s] 27%|██▋ | 100155/371472 [7:57:54<20:13:49, 3.73it/s] 27%|██▋ | 100156/371472 [7:57:54<19:56:51, 3.78it/s] 27%|██▋ | 100157/371472 [7:57:54<20:33:04, 3.67it/s] 27%|██▋ | 100158/371472 [7:57:55<19:47:35, 3.81it/s] 27%|██▋ | 100159/371472 [7:57:55<20:57:34, 3.60it/s] 27%|██▋ | 100160/371472 [7:57:55<22:17:17, 3.38it/s] {'loss': 3.6243, 'learning_rate': 7.576870865564868e-07, 'epoch': 4.31} + 27%|██▋ | 100160/371472 [7:57:55<22:17:17, 3.38it/s] 27%|██▋ | 100161/371472 [7:57:56<21:58:55, 3.43it/s] 27%|██▋ | 100162/371472 [7:57:56<21:32:56, 3.50it/s] 27%|██▋ | 100163/371472 [7:57:56<21:25:55, 3.52it/s] 27%|██▋ | 100164/371472 [7:57:56<20:57:14, 3.60it/s] 27%|██▋ | 100165/371472 [7:57:57<21:19:26, 3.53it/s] 27%|██▋ | 100166/371472 [7:57:57<20:50:45, 3.62it/s] 27%|██▋ | 100167/371472 [7:57:57<20:27:45, 3.68it/s] 27%|██▋ | 100168/371472 [7:57:58<20:22:06, 3.70it/s] 27%|██▋ | 100169/371472 [7:57:58<21:01:36, 3.58it/s] 27%|██▋ | 100170/371472 [7:57:58<21:15:51, 3.54it/s] 27%|██▋ | 100171/371472 [7:57:58<20:39:44, 3.65it/s] 27%|██▋ | 100172/371472 [7:57:59<20:03:42, 3.76it/s] 27%|██▋ | 100173/371472 [7:57:59<20:09:25, 3.74it/s] 27%|██▋ | 100174/371472 [7:57:59<19:43:02, 3.82it/s] 27%|██▋ | 100175/371472 [7:57:59<21:24:04, 3.52it/s] 27%|██▋ | 100176/371472 [7:58:00<20:50:46, 3.62it/s] 27%|██▋ | 100177/371472 [7:58:00<20:49:48, 3.62it/s] 27%|██▋ | 100178/371472 [7:58:00<22:04:27, 3.41it/s] 27%|██▋ | 100179/371472 [7:58:01<21:06:23, 3.57it/s] 27%|██▋ | 100180/371472 [7:58:01<20:35:48, 3.66it/s] {'loss': 3.7023, 'learning_rate': 7.57638604581008e-07, 'epoch': 4.31} + 27%|██▋ | 100180/371472 [7:58:01<20:35:48, 3.66it/s] 27%|██▋ | 100181/371472 [7:58:01<20:00:04, 3.77it/s] 27%|██▋ | 100182/371472 [7:58:01<20:10:53, 3.73it/s] 27%|██▋ | 100183/371472 [7:58:02<21:48:13, 3.46it/s] 27%|██▋ | 100184/371472 [7:58:02<21:28:09, 3.51it/s] 27%|██▋ | 100185/371472 [7:58:02<20:33:54, 3.66it/s] 27%|██▋ | 100186/371472 [7:58:02<19:58:18, 3.77it/s] 27%|██▋ | 100187/371472 [7:58:03<19:44:24, 3.82it/s] 27%|██▋ | 100188/371472 [7:58:03<19:45:54, 3.81it/s] 27%|██▋ | 100189/371472 [7:58:03<21:03:30, 3.58it/s] 27%|██▋ | 100190/371472 [7:58:04<21:06:25, 3.57it/s] 27%|██▋ | 100191/371472 [7:58:04<21:31:55, 3.50it/s] 27%|██▋ | 100192/371472 [7:58:04<20:19:43, 3.71it/s] 27%|██▋ | 100193/371472 [7:58:04<20:02:26, 3.76it/s] 27%|██▋ | 100194/371472 [7:58:05<20:08:52, 3.74it/s] 27%|██▋ | 100195/371472 [7:58:05<20:03:08, 3.76it/s] 27%|██▋ | 100196/371472 [7:58:05<19:58:00, 3.77it/s] 27%|██▋ | 100197/371472 [7:58:05<19:42:19, 3.82it/s] 27%|██▋ | 100198/371472 [7:58:06<20:07:23, 3.74it/s] 27%|██▋ | 100199/371472 [7:58:06<20:11:07, 3.73it/s] 27%|██▋ | 100200/371472 [7:58:06<19:42:19, 3.82it/s] {'loss': 3.6696, 'learning_rate': 7.57590122605529e-07, 'epoch': 4.32} + 27%|██▋ | 100200/371472 [7:58:06<19:42:19, 3.82it/s] 27%|██▋ | 100201/371472 [7:58:06<19:49:43, 3.80it/s] 27%|██▋ | 100202/371472 [7:58:07<21:52:53, 3.44it/s] 27%|██▋ | 100203/371472 [7:58:07<21:32:12, 3.50it/s] 27%|██▋ | 100204/371472 [7:58:07<22:03:39, 3.42it/s] 27%|██▋ | 100205/371472 [7:58:08<21:03:06, 3.58it/s] 27%|██▋ | 100206/371472 [7:58:08<20:23:41, 3.69it/s] 27%|██▋ | 100207/371472 [7:58:08<20:59:09, 3.59it/s] 27%|██▋ | 100208/371472 [7:58:09<22:37:33, 3.33it/s] 27%|██▋ | 100209/371472 [7:58:09<21:37:54, 3.48it/s] 27%|██▋ | 100210/371472 [7:58:09<21:35:20, 3.49it/s] 27%|██▋ | 100211/371472 [7:58:09<21:17:23, 3.54it/s] 27%|██▋ | 100212/371472 [7:58:10<22:57:49, 3.28it/s] 27%|██▋ | 100213/371472 [7:58:10<22:28:27, 3.35it/s] 27%|██▋ | 100214/371472 [7:58:10<21:36:48, 3.49it/s] 27%|██▋ | 100215/371472 [7:58:11<21:29:55, 3.50it/s] 27%|██▋ | 100216/371472 [7:58:11<21:54:40, 3.44it/s] 27%|██▋ | 100217/371472 [7:58:11<21:40:24, 3.48it/s] 27%|██▋ | 100218/371472 [7:58:11<20:52:15, 3.61it/s] 27%|██▋ | 100219/371472 [7:58:12<21:11:00, 3.56it/s] 27%|██▋ | 100220/371472 [7:58:12<22:14:36, 3.39it/s] {'loss': 3.6333, 'learning_rate': 7.575416406300501e-07, 'epoch': 4.32} + 27%|██▋ | 100220/371472 [7:58:12<22:14:36, 3.39it/s] 27%|██▋ | 100221/371472 [7:58:12<20:57:04, 3.60it/s] 27%|██▋ | 100222/371472 [7:58:13<20:19:08, 3.71it/s] 27%|██▋ | 100223/371472 [7:58:13<19:46:32, 3.81it/s] 27%|██▋ | 100224/371472 [7:58:13<22:11:42, 3.39it/s] 27%|██▋ | 100225/371472 [7:58:13<21:34:24, 3.49it/s] 27%|██▋ | 100226/371472 [7:58:14<23:12:19, 3.25it/s] 27%|██▋ | 100227/371472 [7:58:14<21:47:29, 3.46it/s] 27%|██▋ | 100228/371472 [7:58:14<20:36:19, 3.66it/s] 27%|██▋ | 100229/371472 [7:58:15<20:26:48, 3.68it/s] 27%|██▋ | 100230/371472 [7:58:15<20:19:07, 3.71it/s] 27%|██▋ | 100231/371472 [7:58:15<20:13:29, 3.73it/s] 27%|██▋ | 100232/371472 [7:58:15<20:12:55, 3.73it/s] 27%|██▋ | 100233/371472 [7:58:16<20:24:41, 3.69it/s] 27%|██▋ | 100234/371472 [7:58:16<20:07:21, 3.74it/s] 27%|██▋ | 100235/371472 [7:58:16<19:54:13, 3.79it/s] 27%|██▋ | 100236/371472 [7:58:16<19:40:18, 3.83it/s] 27%|██▋ | 100237/371472 [7:58:17<21:03:38, 3.58it/s] 27%|██▋ | 100238/371472 [7:58:17<20:18:24, 3.71it/s] 27%|██▋ | 100239/371472 [7:58:17<21:07:43, 3.57it/s] 27%|██▋ | 100240/371472 [7:58:18<21:31:41, 3.50it/s] {'loss': 3.6335, 'learning_rate': 7.574931586545713e-07, 'epoch': 4.32} + 27%|██▋ | 100240/371472 [7:58:18<21:31:41, 3.50it/s] 27%|██▋ | 100241/371472 [7:58:18<22:16:55, 3.38it/s] 27%|██▋ | 100242/371472 [7:58:18<23:21:37, 3.23it/s] 27%|██▋ | 100243/371472 [7:58:19<24:02:35, 3.13it/s] 27%|██▋ | 100244/371472 [7:58:19<23:02:42, 3.27it/s] 27%|██▋ | 100245/371472 [7:58:19<22:54:00, 3.29it/s] 27%|██▋ | 100246/371472 [7:58:19<22:37:32, 3.33it/s] 27%|██▋ | 100247/371472 [7:58:20<21:47:45, 3.46it/s] 27%|██▋ | 100248/371472 [7:58:20<21:05:33, 3.57it/s] 27%|██▋ | 100249/371472 [7:58:20<21:58:24, 3.43it/s] 27%|██▋ | 100250/371472 [7:58:20<21:00:25, 3.59it/s] 27%|██▋ | 100251/371472 [7:58:21<21:24:18, 3.52it/s] 27%|██▋ | 100252/371472 [7:58:21<22:26:49, 3.36it/s] 27%|██▋ | 100253/371472 [7:58:22<25:24:47, 2.96it/s] 27%|██▋ | 100254/371472 [7:58:22<25:21:33, 2.97it/s] 27%|██▋ | 100255/371472 [7:58:22<27:01:26, 2.79it/s] 27%|██▋ | 100256/371472 [7:58:23<25:28:22, 2.96it/s] 27%|██▋ | 100257/371472 [7:58:23<25:32:24, 2.95it/s] 27%|██▋ | 100258/371472 [7:58:23<25:07:53, 3.00it/s] 27%|██▋ | 100259/371472 [7:58:24<24:43:03, 3.05it/s] 27%|██▋ | 100260/371472 [7:58:24<23:17:49, 3.23it/s] {'loss': 3.4217, 'learning_rate': 7.574446766790925e-07, 'epoch': 4.32} + 27%|██▋ | 100260/371472 [7:58:24<23:17:49, 3.23it/s] 27%|██▋ | 100261/371472 [7:58:24<22:38:33, 3.33it/s] 27%|██▋ | 100262/371472 [7:58:24<21:36:42, 3.49it/s] 27%|██▋ | 100263/371472 [7:58:25<22:05:36, 3.41it/s] 27%|██▋ | 100264/371472 [7:58:25<23:45:59, 3.17it/s] 27%|██▋ | 100265/371472 [7:58:25<22:25:25, 3.36it/s] 27%|██▋ | 100266/371472 [7:58:26<22:12:02, 3.39it/s] 27%|██▋ | 100267/371472 [7:58:26<21:05:20, 3.57it/s] 27%|██▋ | 100268/371472 [7:58:26<21:53:42, 3.44it/s] 27%|██▋ | 100269/371472 [7:58:26<22:24:21, 3.36it/s] 27%|██▋ | 100270/371472 [7:58:27<22:45:13, 3.31it/s] 27%|██▋ | 100271/371472 [7:58:27<22:42:43, 3.32it/s] 27%|██▋ | 100272/371472 [7:58:27<21:53:54, 3.44it/s] 27%|██▋ | 100273/371472 [7:58:28<22:53:38, 3.29it/s] 27%|██▋ | 100274/371472 [7:58:28<21:18:17, 3.54it/s] 27%|██▋ | 100275/371472 [7:58:28<21:33:23, 3.49it/s] 27%|██▋ | 100276/371472 [7:58:29<28:30:20, 2.64it/s] 27%|██▋ | 100277/371472 [7:58:29<25:34:56, 2.94it/s] 27%|██▋ | 100278/371472 [7:58:29<23:39:36, 3.18it/s] 27%|██▋ | 100279/371472 [7:58:30<24:01:07, 3.14it/s] 27%|██▋ | 100280/371472 [7:58:30<22:39:30, 3.32it/s] {'loss': 3.4638, 'learning_rate': 7.573961947036134e-07, 'epoch': 4.32} + 27%|██▋ | 100280/371472 [7:58:30<22:39:30, 3.32it/s] 27%|██▋ | 100281/371472 [7:58:30<21:45:43, 3.46it/s] 27%|██▋ | 100282/371472 [7:58:30<21:26:04, 3.51it/s] 27%|██▋ | 100283/371472 [7:58:31<20:54:38, 3.60it/s] 27%|██▋ | 100284/371472 [7:58:31<21:24:31, 3.52it/s] 27%|██▋ | 100285/371472 [7:58:31<21:19:33, 3.53it/s] 27%|██▋ | 100286/371472 [7:58:32<22:21:51, 3.37it/s] 27%|██▋ | 100287/371472 [7:58:32<21:10:15, 3.56it/s] 27%|██▋ | 100288/371472 [7:58:32<21:50:26, 3.45it/s] 27%|██▋ | 100289/371472 [7:58:32<21:01:36, 3.58it/s] 27%|██▋ | 100290/371472 [7:58:33<21:39:38, 3.48it/s] 27%|██▋ | 100291/371472 [7:58:33<21:32:12, 3.50it/s] 27%|██▋ | 100292/371472 [7:58:33<25:22:18, 2.97it/s] 27%|██▋ | 100293/371472 [7:58:34<24:07:04, 3.12it/s] 27%|██▋ | 100294/371472 [7:58:34<23:35:59, 3.19it/s] 27%|██▋ | 100295/371472 [7:58:34<23:30:12, 3.20it/s] 27%|██▋ | 100296/371472 [7:58:35<24:34:26, 3.07it/s] 27%|██▋ | 100297/371472 [7:58:35<24:43:55, 3.05it/s] 27%|██▋ | 100298/371472 [7:58:35<23:23:29, 3.22it/s] 27%|██▋ | 100299/371472 [7:58:36<21:54:20, 3.44it/s] 27%|██▋ | 100300/371472 [7:58:36<21:13:43, 3.55it/s] {'loss': 3.6377, 'learning_rate': 7.573477127281345e-07, 'epoch': 4.32} + 27%|██▋ | 100300/371472 [7:58:36<21:13:43, 3.55it/s] 27%|██▋ | 100301/371472 [7:58:36<21:05:36, 3.57it/s] 27%|██▋ | 100302/371472 [7:58:36<20:57:53, 3.59it/s] 27%|██▋ | 100303/371472 [7:58:37<20:46:21, 3.63it/s] 27%|██▋ | 100304/371472 [7:58:37<20:35:00, 3.66it/s] 27%|██▋ | 100305/371472 [7:58:37<21:06:55, 3.57it/s] 27%|██▋ | 100306/371472 [7:58:37<21:17:59, 3.54it/s] 27%|██▋ | 100307/371472 [7:58:38<23:05:08, 3.26it/s] 27%|██▋ | 100308/371472 [7:58:38<22:46:39, 3.31it/s] 27%|██▋ | 100309/371472 [7:58:38<22:03:03, 3.42it/s] 27%|██▋ | 100310/371472 [7:58:39<21:05:48, 3.57it/s] 27%|██▋ | 100311/371472 [7:58:39<20:21:09, 3.70it/s] 27%|██▋ | 100312/371472 [7:58:39<20:29:35, 3.68it/s] 27%|██▋ | 100313/371472 [7:58:39<21:38:21, 3.48it/s] 27%|██▋ | 100314/371472 [7:58:40<22:34:09, 3.34it/s] 27%|██▋ | 100315/371472 [7:58:40<21:31:36, 3.50it/s] 27%|██▋ | 100316/371472 [7:58:40<21:02:30, 3.58it/s] 27%|██▋ | 100317/371472 [7:58:41<20:32:08, 3.67it/s] 27%|██▋ | 100318/371472 [7:58:41<19:36:13, 3.84it/s] 27%|██▋ | 100319/371472 [7:58:41<21:59:24, 3.43it/s] 27%|██▋ | 100320/371472 [7:58:41<22:10:28, 3.40it/s] {'loss': 3.5025, 'learning_rate': 7.572992307526557e-07, 'epoch': 4.32} + 27%|██▋ | 100320/371472 [7:58:41<22:10:28, 3.40it/s] 27%|██▋ | 100321/371472 [7:58:42<21:38:44, 3.48it/s] 27%|██▋ | 100322/371472 [7:58:42<20:30:42, 3.67it/s] 27%|██▋ | 100323/371472 [7:58:42<20:22:16, 3.70it/s] 27%|██▋ | 100324/371472 [7:58:43<20:19:52, 3.70it/s] 27%|██▋ | 100325/371472 [7:58:43<20:33:16, 3.66it/s] 27%|██▋ | 100326/371472 [7:58:43<20:26:33, 3.68it/s] 27%|██▋ | 100327/371472 [7:58:43<22:00:59, 3.42it/s] 27%|██▋ | 100328/371472 [7:58:44<20:52:21, 3.61it/s] 27%|██▋ | 100329/371472 [7:58:44<22:49:27, 3.30it/s] 27%|██▋ | 100330/371472 [7:58:44<22:04:49, 3.41it/s] 27%|██▋ | 100331/371472 [7:58:45<23:39:26, 3.18it/s] 27%|██▋ | 100332/371472 [7:58:45<23:40:42, 3.18it/s] 27%|██▋ | 100333/371472 [7:58:45<24:18:33, 3.10it/s] 27%|██▋ | 100334/371472 [7:58:46<22:42:30, 3.32it/s] 27%|██▋ | 100335/371472 [7:58:46<23:14:07, 3.24it/s] 27%|██▋ | 100336/371472 [7:58:46<22:22:14, 3.37it/s] 27%|██▋ | 100337/371472 [7:58:47<23:21:40, 3.22it/s] 27%|██▋ | 100338/371472 [7:58:47<22:16:11, 3.38it/s] 27%|██▋ | 100339/371472 [7:58:47<22:07:01, 3.41it/s] 27%|██▋ | 100340/371472 [7:58:47<22:38:19, 3.33it/s] {'loss': 3.4211, 'learning_rate': 7.572507487771769e-07, 'epoch': 4.32} + 27%|██▋ | 100340/371472 [7:58:47<22:38:19, 3.33it/s] 27%|██▋ | 100341/371472 [7:58:48<22:50:59, 3.30it/s] 27%|██▋ | 100342/371472 [7:58:48<21:49:45, 3.45it/s] 27%|██▋ | 100343/371472 [7:58:48<21:45:46, 3.46it/s] 27%|██▋ | 100344/371472 [7:58:49<21:34:05, 3.49it/s] 27%|██▋ | 100345/371472 [7:58:49<21:49:19, 3.45it/s] 27%|██▋ | 100346/371472 [7:58:49<21:17:21, 3.54it/s] 27%|██▋ | 100347/371472 [7:58:49<22:14:27, 3.39it/s] 27%|██▋ | 100348/371472 [7:58:50<22:05:14, 3.41it/s] 27%|██▋ | 100349/371472 [7:58:50<21:55:25, 3.44it/s] 27%|██▋ | 100350/371472 [7:58:50<21:55:32, 3.43it/s] 27%|██▋ | 100351/371472 [7:58:51<21:13:33, 3.55it/s] 27%|██▋ | 100352/371472 [7:58:51<21:57:14, 3.43it/s] 27%|██▋ | 100353/371472 [7:58:51<20:56:57, 3.59it/s] 27%|██▋ | 100354/371472 [7:58:51<20:10:39, 3.73it/s] 27%|██▋ | 100355/371472 [7:58:52<21:43:20, 3.47it/s] 27%|██▋ | 100356/371472 [7:58:52<21:03:48, 3.58it/s] 27%|██▋ | 100357/371472 [7:58:52<20:22:19, 3.70it/s] 27%|██▋ | 100358/371472 [7:58:52<20:51:40, 3.61it/s] 27%|██▋ | 100359/371472 [7:58:53<21:40:57, 3.47it/s] 27%|██▋ | 100360/371472 [7:58:53<22:23:51, 3.36it/s] {'loss': 3.4665, 'learning_rate': 7.572022668016979e-07, 'epoch': 4.32} + 27%|██▋ | 100360/371472 [7:58:53<22:23:51, 3.36it/s] 27%|██▋ | 100361/371472 [7:58:53<22:15:27, 3.38it/s] 27%|██▋ | 100362/371472 [7:58:54<21:45:34, 3.46it/s] 27%|██▋ | 100363/371472 [7:58:54<22:06:13, 3.41it/s] 27%|██▋ | 100364/371472 [7:58:54<21:39:51, 3.48it/s] 27%|██▋ | 100365/371472 [7:58:55<21:12:11, 3.55it/s] 27%|██▋ | 100366/371472 [7:58:55<22:32:58, 3.34it/s] 27%|██▋ | 100367/371472 [7:58:55<25:09:07, 2.99it/s] 27%|██▋ | 100368/371472 [7:58:56<23:49:53, 3.16it/s] 27%|██▋ | 100369/371472 [7:58:56<22:51:38, 3.29it/s] 27%|██▋ | 100370/371472 [7:58:56<24:57:43, 3.02it/s] 27%|██▋ | 100371/371472 [7:58:56<23:42:58, 3.18it/s] 27%|██▋ | 100372/371472 [7:58:57<23:22:41, 3.22it/s] 27%|██▋ | 100373/371472 [7:58:57<23:27:24, 3.21it/s] 27%|██▋ | 100374/371472 [7:58:57<24:01:59, 3.13it/s] 27%|██▋ | 100375/371472 [7:58:58<22:40:55, 3.32it/s] 27%|██▋ | 100376/371472 [7:58:58<21:53:37, 3.44it/s] 27%|██▋ | 100377/371472 [7:58:58<22:21:07, 3.37it/s] 27%|██▋ | 100378/371472 [7:58:59<21:41:13, 3.47it/s] 27%|██▋ | 100379/371472 [7:58:59<20:52:42, 3.61it/s] 27%|██▋ | 100380/371472 [7:58:59<21:00:20, 3.58it/s] {'loss': 3.635, 'learning_rate': 7.57153784826219e-07, 'epoch': 4.32} + 27%|██▋ | 100380/371472 [7:58:59<21:00:20, 3.58it/s] 27%|██▋ | 100381/371472 [7:58:59<20:25:27, 3.69it/s] 27%|██▋ | 100382/371472 [7:59:00<19:50:41, 3.79it/s] 27%|██▋ | 100383/371472 [7:59:00<19:40:38, 3.83it/s] 27%|██▋ | 100384/371472 [7:59:00<20:02:25, 3.76it/s] 27%|██▋ | 100385/371472 [7:59:00<21:31:48, 3.50it/s] 27%|██▋ | 100386/371472 [7:59:01<21:18:15, 3.53it/s] 27%|██▋ | 100387/371472 [7:59:01<20:52:48, 3.61it/s] 27%|██▋ | 100388/371472 [7:59:01<20:40:24, 3.64it/s] 27%|██▋ | 100389/371472 [7:59:01<19:51:04, 3.79it/s] 27%|██▋ | 100390/371472 [7:59:02<19:23:48, 3.88it/s] 27%|██▋ | 100391/371472 [7:59:02<18:54:46, 3.98it/s] 27%|██▋ | 100392/371472 [7:59:02<19:02:39, 3.95it/s] 27%|██▋ | 100393/371472 [7:59:03<19:51:33, 3.79it/s] 27%|██▋ | 100394/371472 [7:59:03<20:45:55, 3.63it/s] 27%|██▋ | 100395/371472 [7:59:03<19:48:04, 3.80it/s] 27%|██▋ | 100396/371472 [7:59:03<19:34:57, 3.85it/s] 27%|██▋ | 100397/371472 [7:59:04<19:13:45, 3.92it/s] 27%|██▋ | 100398/371472 [7:59:04<19:23:15, 3.88it/s] 27%|██▋ | 100399/371472 [7:59:04<19:41:08, 3.83it/s] 27%|██▋ | 100400/371472 [7:59:04<19:56:05, 3.78it/s] {'loss': 3.7561, 'learning_rate': 7.571053028507401e-07, 'epoch': 4.32} + 27%|██▋ | 100400/371472 [7:59:04<19:56:05, 3.78it/s] 27%|██▋ | 100401/371472 [7:59:05<20:08:07, 3.74it/s] 27%|██▋ | 100402/371472 [7:59:05<21:14:38, 3.54it/s] 27%|██▋ | 100403/371472 [7:59:05<20:46:55, 3.62it/s] 27%|██▋ | 100404/371472 [7:59:06<22:38:11, 3.33it/s] 27%|██▋ | 100405/371472 [7:59:06<21:45:05, 3.46it/s] 27%|██▋ | 100406/371472 [7:59:06<21:28:23, 3.51it/s] 27%|██▋ | 100407/371472 [7:59:06<21:21:34, 3.53it/s] 27%|██▋ | 100408/371472 [7:59:07<21:03:44, 3.57it/s] 27%|██▋ | 100409/371472 [7:59:07<21:26:35, 3.51it/s] 27%|██▋ | 100410/371472 [7:59:07<20:17:04, 3.71it/s] 27%|██▋ | 100411/371472 [7:59:08<21:54:42, 3.44it/s] 27%|██▋ | 100412/371472 [7:59:08<22:11:15, 3.39it/s] 27%|██▋ | 100413/371472 [7:59:08<21:58:38, 3.43it/s] 27%|██▋ | 100414/371472 [7:59:08<21:29:17, 3.50it/s] 27%|██▋ | 100415/371472 [7:59:09<20:33:43, 3.66it/s] 27%|██▋ | 100416/371472 [7:59:09<20:20:18, 3.70it/s] 27%|██▋ | 100417/371472 [7:59:09<19:37:59, 3.83it/s] 27%|██▋ | 100418/371472 [7:59:09<19:05:57, 3.94it/s] 27%|██▋ | 100419/371472 [7:59:10<19:39:34, 3.83it/s] 27%|██▋ | 100420/371472 [7:59:10<19:46:53, 3.81it/s] {'loss': 3.6254, 'learning_rate': 7.570568208752613e-07, 'epoch': 4.33} + 27%|██▋ | 100420/371472 [7:59:10<19:46:53, 3.81it/s] 27%|██▋ | 100421/371472 [7:59:10<19:43:51, 3.82it/s] 27%|██▋ | 100422/371472 [7:59:10<19:34:27, 3.85it/s] 27%|██▋ | 100423/371472 [7:59:11<19:17:56, 3.90it/s] 27%|██▋ | 100424/371472 [7:59:11<20:44:32, 3.63it/s] 27%|██▋ | 100425/371472 [7:59:11<20:34:28, 3.66it/s] 27%|██▋ | 100426/371472 [7:59:12<20:35:15, 3.66it/s] 27%|██▋ | 100427/371472 [7:59:12<20:46:24, 3.62it/s] 27%|██▋ | 100428/371472 [7:59:12<20:59:33, 3.59it/s] 27%|██▋ | 100429/371472 [7:59:12<21:54:32, 3.44it/s] 27%|██▋ | 100430/371472 [7:59:13<20:47:29, 3.62it/s] 27%|██▋ | 100431/371472 [7:59:13<21:21:19, 3.53it/s] 27%|██▋ | 100432/371472 [7:59:13<20:56:35, 3.59it/s] 27%|██▋ | 100433/371472 [7:59:13<20:08:05, 3.74it/s] 27%|██▋ | 100434/371472 [7:59:14<19:56:11, 3.78it/s] 27%|██▋ | 100435/371472 [7:59:14<21:47:44, 3.45it/s] 27%|██▋ | 100436/371472 [7:59:14<23:16:34, 3.23it/s] 27%|██▋ | 100437/371472 [7:59:15<22:30:04, 3.35it/s] 27%|██▋ | 100438/371472 [7:59:15<22:00:59, 3.42it/s] 27%|██▋ | 100439/371472 [7:59:15<21:36:23, 3.48it/s] 27%|██▋ | 100440/371472 [7:59:16<21:00:15, 3.58it/s] {'loss': 3.3085, 'learning_rate': 7.570083388997823e-07, 'epoch': 4.33} + 27%|██▋ | 100440/371472 [7:59:16<21:00:15, 3.58it/s] 27%|██▋ | 100441/371472 [7:59:16<20:11:36, 3.73it/s] 27%|██▋ | 100442/371472 [7:59:16<20:33:19, 3.66it/s] 27%|██▋ | 100443/371472 [7:59:16<19:59:31, 3.77it/s] 27%|██▋ | 100444/371472 [7:59:17<20:36:29, 3.65it/s] 27%|██▋ | 100445/371472 [7:59:17<20:16:32, 3.71it/s] 27%|██▋ | 100446/371472 [7:59:17<20:17:39, 3.71it/s] 27%|██▋ | 100447/371472 [7:59:17<20:34:39, 3.66it/s] 27%|██▋ | 100448/371472 [7:59:18<20:53:31, 3.60it/s] 27%|██▋ | 100449/371472 [7:59:18<20:56:42, 3.59it/s] 27%|██▋ | 100450/371472 [7:59:18<20:22:49, 3.69it/s] 27%|██▋ | 100451/371472 [7:59:18<20:28:38, 3.68it/s] 27%|██▋ | 100452/371472 [7:59:19<21:00:09, 3.58it/s] 27%|██▋ | 100453/371472 [7:59:19<20:16:29, 3.71it/s] 27%|██▋ | 100454/371472 [7:59:19<19:29:39, 3.86it/s] 27%|██▋ | 100455/371472 [7:59:20<19:31:07, 3.86it/s] 27%|██▋ | 100456/371472 [7:59:20<19:34:43, 3.85it/s] 27%|██▋ | 100457/371472 [7:59:20<19:58:17, 3.77it/s] 27%|██▋ | 100458/371472 [7:59:20<20:11:01, 3.73it/s] 27%|██▋ | 100459/371472 [7:59:21<19:51:09, 3.79it/s] 27%|██▋ | 100460/371472 [7:59:21<21:00:21, 3.58it/s] {'loss': 3.4187, 'learning_rate': 7.569598569243034e-07, 'epoch': 4.33} + 27%|██▋ | 100460/371472 [7:59:21<21:00:21, 3.58it/s] 27%|██▋ | 100461/371472 [7:59:21<20:14:26, 3.72it/s] 27%|██▋ | 100462/371472 [7:59:21<20:18:45, 3.71it/s] 27%|██▋ | 100463/371472 [7:59:22<20:05:50, 3.75it/s] 27%|██▋ | 100464/371472 [7:59:22<20:26:49, 3.68it/s] 27%|██▋ | 100465/371472 [7:59:22<20:31:07, 3.67it/s] 27%|██▋ | 100466/371472 [7:59:23<21:19:47, 3.53it/s] 27%|██▋ | 100467/371472 [7:59:23<20:45:11, 3.63it/s] 27%|██▋ | 100468/371472 [7:59:23<20:48:57, 3.62it/s] 27%|██▋ | 100469/371472 [7:59:23<20:16:24, 3.71it/s] 27%|██▋ | 100470/371472 [7:59:24<20:07:35, 3.74it/s] 27%|██▋ | 100471/371472 [7:59:24<19:59:06, 3.77it/s] 27%|██▋ | 100472/371472 [7:59:24<20:27:29, 3.68it/s] 27%|██▋ | 100473/371472 [7:59:24<20:46:51, 3.62it/s] 27%|██▋ | 100474/371472 [7:59:25<21:06:47, 3.57it/s] 27%|██▋ | 100475/371472 [7:59:25<21:22:32, 3.52it/s] 27%|██▋ | 100476/371472 [7:59:25<20:42:18, 3.64it/s] 27%|██▋ | 100477/371472 [7:59:26<21:52:14, 3.44it/s] 27%|██▋ | 100478/371472 [7:59:26<20:52:43, 3.61it/s] 27%|██▋ | 100479/371472 [7:59:26<21:13:49, 3.55it/s] 27%|██▋ | 100480/371472 [7:59:26<22:26:50, 3.35it/s] {'loss': 3.4405, 'learning_rate': 7.569113749488246e-07, 'epoch': 4.33} + 27%|██▋ | 100480/371472 [7:59:26<22:26:50, 3.35it/s] 27%|██▋ | 100481/371472 [7:59:27<22:11:19, 3.39it/s] 27%|██▋ | 100482/371472 [7:59:27<21:52:55, 3.44it/s] 27%|██▋ | 100483/371472 [7:59:27<21:29:06, 3.50it/s] 27%|██▋ | 100484/371472 [7:59:28<22:47:55, 3.30it/s] 27%|██▋ | 100485/371472 [7:59:28<24:16:36, 3.10it/s] 27%|██▋ | 100486/371472 [7:59:28<23:31:22, 3.20it/s] 27%|██▋ | 100487/371472 [7:59:29<23:43:11, 3.17it/s] 27%|██▋ | 100488/371472 [7:59:29<22:25:59, 3.36it/s] 27%|██▋ | 100489/371472 [7:59:29<21:26:55, 3.51it/s] 27%|██▋ | 100490/371472 [7:59:30<23:03:43, 3.26it/s] 27%|██▋ | 100491/371472 [7:59:30<21:45:30, 3.46it/s] 27%|██▋ | 100492/371472 [7:59:30<22:27:31, 3.35it/s] 27%|██▋ | 100493/371472 [7:59:30<22:21:13, 3.37it/s] 27%|██▋ | 100494/371472 [7:59:31<21:56:43, 3.43it/s] 27%|██▋ | 100495/371472 [7:59:31<21:21:04, 3.53it/s] 27%|██▋ | 100496/371472 [7:59:31<21:29:46, 3.50it/s] 27%|██▋ | 100497/371472 [7:59:31<21:07:09, 3.56it/s] 27%|██▋ | 100498/371472 [7:59:32<20:48:49, 3.62it/s] 27%|██▋ | 100499/371472 [7:59:32<20:51:29, 3.61it/s] 27%|██▋ | 100500/371472 [7:59:32<20:48:34, 3.62it/s] {'loss': 3.619, 'learning_rate': 7.568628929733458e-07, 'epoch': 4.33} + 27%|██▋ | 100500/371472 [7:59:32<20:48:34, 3.62it/s] 27%|██▋ | 100501/371472 [7:59:33<21:52:23, 3.44it/s] 27%|██▋ | 100502/371472 [7:59:33<21:43:50, 3.46it/s] 27%|██▋ | 100503/371472 [7:59:33<21:38:06, 3.48it/s] 27%|██▋ | 100504/371472 [7:59:33<20:50:42, 3.61it/s] 27%|██▋ | 100505/371472 [7:59:34<21:25:30, 3.51it/s] 27%|██▋ | 100506/371472 [7:59:34<21:38:19, 3.48it/s] 27%|██▋ | 100507/371472 [7:59:34<20:57:54, 3.59it/s] 27%|██▋ | 100508/371472 [7:59:35<21:07:18, 3.56it/s] 27%|██▋ | 100509/371472 [7:59:35<20:51:08, 3.61it/s] 27%|██▋ | 100510/371472 [7:59:35<22:42:19, 3.31it/s] 27%|██▋ | 100511/371472 [7:59:36<23:11:37, 3.25it/s] 27%|██▋ | 100512/371472 [7:59:36<23:19:16, 3.23it/s] 27%|██▋ | 100513/371472 [7:59:36<21:58:16, 3.43it/s] 27%|██▋ | 100514/371472 [7:59:36<21:21:08, 3.52it/s] 27%|██▋ | 100515/371472 [7:59:37<23:00:45, 3.27it/s] 27%|██▋ | 100516/371472 [7:59:37<21:55:16, 3.43it/s] 27%|██▋ | 100517/371472 [7:59:37<20:54:11, 3.60it/s] 27%|██▋ | 100518/371472 [7:59:38<22:44:01, 3.31it/s] 27%|██▋ | 100519/371472 [7:59:38<21:15:02, 3.54it/s] 27%|██▋ | 100520/371472 [7:59:38<21:05:05, 3.57it/s] {'loss': 3.6213, 'learning_rate': 7.568144109978667e-07, 'epoch': 4.33} + 27%|██▋ | 100520/371472 [7:59:38<21:05:05, 3.57it/s] 27%|██▋ | 100521/371472 [7:59:38<21:12:06, 3.55it/s] 27%|██▋ | 100522/371472 [7:59:39<21:38:24, 3.48it/s] 27%|██▋ | 100523/371472 [7:59:39<20:24:34, 3.69it/s] 27%|██▋ | 100524/371472 [7:59:39<21:11:48, 3.55it/s] 27%|██▋ | 100525/371472 [7:59:40<21:41:28, 3.47it/s] 27%|██▋ | 100526/371472 [7:59:40<21:40:15, 3.47it/s] 27%|██▋ | 100527/371472 [7:59:40<20:46:59, 3.62it/s] 27%|██▋ | 100528/371472 [7:59:40<20:02:19, 3.76it/s] 27%|██▋ | 100529/371472 [7:59:41<20:25:05, 3.69it/s] 27%|██▋ | 100530/371472 [7:59:41<20:32:11, 3.66it/s] 27%|██▋ | 100531/371472 [7:59:41<20:10:36, 3.73it/s] 27%|██▋ | 100532/371472 [7:59:41<21:49:18, 3.45it/s] 27%|██▋ | 100533/371472 [7:59:42<21:17:04, 3.54it/s] 27%|██▋ | 100534/371472 [7:59:42<20:58:03, 3.59it/s] 27%|██▋ | 100535/371472 [7:59:42<20:28:05, 3.68it/s] 27%|██▋ | 100536/371472 [7:59:42<19:54:36, 3.78it/s] 27%|██▋ | 100537/371472 [7:59:43<19:41:20, 3.82it/s] 27%|██▋ | 100538/371472 [7:59:43<20:22:34, 3.69it/s] 27%|██▋ | 100539/371472 [7:59:43<20:06:30, 3.74it/s] 27%|██▋ | 100540/371472 [7:59:44<21:42:19, 3.47it/s] {'loss': 3.5508, 'learning_rate': 7.567659290223878e-07, 'epoch': 4.33} + 27%|██▋ | 100540/371472 [7:59:44<21:42:19, 3.47it/s] 27%|██▋ | 100541/371472 [7:59:44<21:03:39, 3.57it/s] 27%|██▋ | 100542/371472 [7:59:44<20:50:19, 3.61it/s] 27%|██▋ | 100543/371472 [7:59:44<20:10:34, 3.73it/s] 27%|██▋ | 100544/371472 [7:59:45<20:18:21, 3.71it/s] 27%|██▋ | 100545/371472 [7:59:45<20:35:10, 3.66it/s] 27%|██▋ | 100546/371472 [7:59:45<20:32:48, 3.66it/s] 27%|██▋ | 100547/371472 [7:59:46<21:45:50, 3.46it/s] 27%|██▋ | 100548/371472 [7:59:46<21:11:35, 3.55it/s] 27%|██▋ | 100549/371472 [7:59:46<24:32:52, 3.07it/s] 27%|██▋ | 100550/371472 [7:59:47<22:51:33, 3.29it/s] 27%|██▋ | 100551/371472 [7:59:47<22:16:48, 3.38it/s] 27%|██▋ | 100552/371472 [7:59:47<21:00:33, 3.58it/s] 27%|██▋ | 100553/371472 [7:59:47<20:22:07, 3.69it/s] 27%|██▋ | 100554/371472 [7:59:48<21:05:52, 3.57it/s] 27%|██▋ | 100555/371472 [7:59:48<20:13:39, 3.72it/s] 27%|██▋ | 100556/371472 [7:59:48<20:36:01, 3.65it/s] 27%|██▋ | 100557/371472 [7:59:48<21:11:25, 3.55it/s] 27%|██▋ | 100558/371472 [7:59:49<20:54:40, 3.60it/s] 27%|██▋ | 100559/371472 [7:59:49<20:43:43, 3.63it/s] 27%|██▋ | 100560/371472 [7:59:49<21:10:23, 3.55it/s] {'loss': 3.4848, 'learning_rate': 7.56717447046909e-07, 'epoch': 4.33} + 27%|██▋ | 100560/371472 [7:59:49<21:10:23, 3.55it/s] 27%|██▋ | 100561/371472 [7:59:50<22:14:45, 3.38it/s] 27%|██▋ | 100562/371472 [7:59:50<21:24:40, 3.51it/s] 27%|██▋ | 100563/371472 [7:59:50<22:04:26, 3.41it/s] 27%|██▋ | 100564/371472 [7:59:50<21:42:45, 3.47it/s] 27%|██▋ | 100565/371472 [7:59:51<21:08:52, 3.56it/s] 27%|██▋ | 100566/371472 [7:59:51<20:26:32, 3.68it/s] 27%|██▋ | 100567/371472 [7:59:51<21:28:03, 3.51it/s] 27%|██▋ | 100568/371472 [7:59:52<21:47:44, 3.45it/s] 27%|██▋ | 100569/371472 [7:59:52<20:39:16, 3.64it/s] 27%|██▋ | 100570/371472 [7:59:52<20:14:32, 3.72it/s] 27%|██▋ | 100571/371472 [7:59:52<19:51:22, 3.79it/s] 27%|██▋ | 100572/371472 [7:59:53<21:26:16, 3.51it/s] 27%|██▋ | 100573/371472 [7:59:53<23:41:08, 3.18it/s] 27%|██▋ | 100574/371472 [7:59:53<23:13:45, 3.24it/s] 27%|██▋ | 100575/371472 [7:59:54<22:43:59, 3.31it/s] 27%|██▋ | 100576/371472 [7:59:54<21:55:09, 3.43it/s] 27%|██▋ | 100577/371472 [7:59:54<20:56:07, 3.59it/s] 27%|██▋ | 100578/371472 [7:59:54<22:41:57, 3.32it/s] 27%|██▋ | 100579/371472 [7:59:55<21:21:04, 3.52it/s] 27%|██▋ | 100580/371472 [7:59:55<21:31:35, 3.50it/s] {'loss': 3.4511, 'learning_rate': 7.566689650714301e-07, 'epoch': 4.33} + 27%|██▋ | 100580/371472 [7:59:55<21:31:35, 3.50it/s] 27%|██▋ | 100581/371472 [7:59:55<20:43:26, 3.63it/s] 27%|██▋ | 100582/371472 [7:59:56<20:36:07, 3.65it/s] 27%|██▋ | 100583/371472 [7:59:56<21:03:21, 3.57it/s] 27%|██▋ | 100584/371472 [7:59:56<22:36:02, 3.33it/s] 27%|██▋ | 100585/371472 [7:59:56<23:08:01, 3.25it/s] 27%|██▋ | 100586/371472 [7:59:57<21:45:08, 3.46it/s] 27%|██▋ | 100587/371472 [7:59:57<21:29:35, 3.50it/s] 27%|██▋ | 100588/371472 [7:59:57<21:49:17, 3.45it/s] 27%|██▋ | 100589/371472 [7:59:58<21:31:36, 3.50it/s] 27%|██▋ | 100590/371472 [7:59:58<20:21:46, 3.70it/s] 27%|██▋ | 100591/371472 [7:59:58<20:30:43, 3.67it/s] 27%|██▋ | 100592/371472 [7:59:58<21:01:43, 3.58it/s] 27%|██▋ | 100593/371472 [7:59:59<21:37:13, 3.48it/s] 27%|██▋ | 100594/371472 [7:59:59<21:31:22, 3.50it/s] 27%|██▋ | 100595/371472 [7:59:59<21:19:46, 3.53it/s] 27%|██▋ | 100596/371472 [8:00:00<21:33:43, 3.49it/s] 27%|██▋ | 100597/371472 [8:00:00<21:21:42, 3.52it/s] 27%|██▋ | 100598/371472 [8:00:00<20:50:34, 3.61it/s] 27%|██▋ | 100599/371472 [8:00:00<20:06:09, 3.74it/s] 27%|██▋ | 100600/371472 [8:00:01<20:25:52, 3.68it/s] {'loss': 3.5972, 'learning_rate': 7.566204830959512e-07, 'epoch': 4.33} + 27%|██▋ | 100600/371472 [8:00:01<20:25:52, 3.68it/s] 27%|██▋ | 100601/371472 [8:00:01<19:46:16, 3.81it/s] 27%|██▋ | 100602/371472 [8:00:01<21:44:45, 3.46it/s] 27%|██▋ | 100603/371472 [8:00:01<21:19:43, 3.53it/s] 27%|██▋ | 100604/371472 [8:00:02<23:15:12, 3.24it/s] 27%|██▋ | 100605/371472 [8:00:02<23:58:46, 3.14it/s] 27%|██▋ | 100606/371472 [8:00:02<22:38:47, 3.32it/s] 27%|██▋ | 100607/371472 [8:00:03<22:35:44, 3.33it/s] 27%|██▋ | 100608/371472 [8:00:03<22:32:26, 3.34it/s] 27%|██▋ | 100609/371472 [8:00:03<21:46:34, 3.46it/s] 27%|██▋ | 100610/371472 [8:00:04<20:59:12, 3.59it/s] 27%|██▋ | 100611/371472 [8:00:04<20:19:09, 3.70it/s] 27%|██▋ | 100612/371472 [8:00:04<19:56:17, 3.77it/s] 27%|██▋ | 100613/371472 [8:00:04<20:05:02, 3.75it/s] 27%|██▋ | 100614/371472 [8:00:05<20:13:06, 3.72it/s] 27%|██▋ | 100615/371472 [8:00:05<19:49:50, 3.79it/s] 27%|██▋ | 100616/371472 [8:00:05<21:30:23, 3.50it/s] 27%|██▋ | 100617/371472 [8:00:05<20:57:15, 3.59it/s] 27%|██▋ | 100618/371472 [8:00:06<20:32:31, 3.66it/s] 27%|██▋ | 100619/371472 [8:00:06<20:31:48, 3.66it/s] 27%|██▋ | 100620/371472 [8:00:06<20:53:26, 3.60it/s] {'loss': 3.5877, 'learning_rate': 7.565720011204723e-07, 'epoch': 4.33} + 27%|██▋ | 100620/371472 [8:00:06<20:53:26, 3.60it/s] 27%|██▋ | 100621/371472 [8:00:07<20:24:59, 3.69it/s] 27%|██▋ | 100622/371472 [8:00:07<20:45:40, 3.62it/s] 27%|██▋ | 100623/371472 [8:00:07<22:31:46, 3.34it/s] 27%|██▋ | 100624/371472 [8:00:07<21:31:46, 3.49it/s] 27%|██▋ | 100625/371472 [8:00:08<21:11:06, 3.55it/s] 27%|██▋ | 100626/371472 [8:00:08<23:23:23, 3.22it/s] 27%|██▋ | 100627/371472 [8:00:08<22:49:37, 3.30it/s] 27%|██▋ | 100628/371472 [8:00:09<22:21:16, 3.37it/s] 27%|██▋ | 100629/371472 [8:00:09<21:13:36, 3.54it/s] 27%|██▋ | 100630/371472 [8:00:09<20:23:32, 3.69it/s] 27%|██▋ | 100631/371472 [8:00:09<20:55:18, 3.60it/s] 27%|██▋ | 100632/371472 [8:00:10<20:58:36, 3.59it/s] 27%|██▋ | 100633/371472 [8:00:10<21:03:02, 3.57it/s] 27%|██▋ | 100634/371472 [8:00:10<20:49:14, 3.61it/s] 27%|██▋ | 100635/371472 [8:00:11<20:10:01, 3.73it/s] 27%|██▋ | 100636/371472 [8:00:11<19:16:25, 3.90it/s] 27%|██▋ | 100637/371472 [8:00:11<22:23:00, 3.36it/s] 27%|██▋ | 100638/371472 [8:00:11<22:20:39, 3.37it/s] 27%|██▋ | 100639/371472 [8:00:12<21:27:15, 3.51it/s] 27%|██▋ | 100640/371472 [8:00:12<21:45:14, 3.46it/s] {'loss': 3.7472, 'learning_rate': 7.565235191449935e-07, 'epoch': 4.33} + 27%|██▋ | 100640/371472 [8:00:12<21:45:14, 3.46it/s] 27%|██▋ | 100641/371472 [8:00:12<21:25:10, 3.51it/s] 27%|██▋ | 100642/371472 [8:00:13<22:07:10, 3.40it/s] 27%|██▋ | 100643/371472 [8:00:13<21:01:54, 3.58it/s] 27%|██▋ | 100644/371472 [8:00:13<20:07:36, 3.74it/s] 27%|██▋ | 100645/371472 [8:00:13<22:25:07, 3.36it/s] 27%|██▋ | 100646/371472 [8:00:14<21:19:23, 3.53it/s] 27%|██▋ | 100647/371472 [8:00:14<20:37:17, 3.65it/s] 27%|██▋ | 100648/371472 [8:00:14<20:25:28, 3.68it/s] 27%|██▋ | 100649/371472 [8:00:14<19:57:28, 3.77it/s] 27%|██▋ | 100650/371472 [8:00:15<19:45:48, 3.81it/s] 27%|██▋ | 100651/371472 [8:00:15<20:06:47, 3.74it/s] 27%|██▋ | 100652/371472 [8:00:15<21:46:51, 3.45it/s] 27%|██▋ | 100653/371472 [8:00:16<22:56:01, 3.28it/s] 27%|██▋ | 100654/371472 [8:00:16<23:39:15, 3.18it/s] 27%|██▋ | 100655/371472 [8:00:16<23:22:42, 3.22it/s] 27%|██▋ | 100656/371472 [8:00:17<22:30:38, 3.34it/s] 27%|██▋ | 100657/371472 [8:00:17<21:27:20, 3.51it/s] 27%|██▋ | 100658/371472 [8:00:17<21:01:26, 3.58it/s] 27%|██▋ | 100659/371472 [8:00:17<20:45:19, 3.62it/s] 27%|██▋ | 100660/371472 [8:00:18<20:56:19, 3.59it/s] {'loss': 3.5291, 'learning_rate': 7.564750371695145e-07, 'epoch': 4.34} + 27%|██▋ | 100660/371472 [8:00:18<20:56:19, 3.59it/s] 27%|██▋ | 100661/371472 [8:00:18<21:10:03, 3.55it/s] 27%|██▋ | 100662/371472 [8:00:18<21:14:36, 3.54it/s] 27%|██▋ | 100663/371472 [8:00:19<21:06:38, 3.56it/s] 27%|██▋ | 100664/371472 [8:00:19<21:24:37, 3.51it/s] 27%|██▋ | 100665/371472 [8:00:19<22:32:22, 3.34it/s] 27%|██▋ | 100666/371472 [8:00:19<22:57:35, 3.28it/s] 27%|██▋ | 100667/371472 [8:00:20<22:41:59, 3.31it/s] 27%|██▋ | 100668/371472 [8:00:20<22:01:31, 3.42it/s] 27%|██▋ | 100669/371472 [8:00:20<20:53:18, 3.60it/s] 27%|██▋ | 100670/371472 [8:00:21<19:53:22, 3.78it/s] 27%|██▋ | 100671/371472 [8:00:21<22:12:40, 3.39it/s] 27%|██▋ | 100672/371472 [8:00:21<21:12:15, 3.55it/s] 27%|██▋ | 100673/371472 [8:00:21<21:06:19, 3.56it/s] 27%|██▋ | 100674/371472 [8:00:22<20:50:46, 3.61it/s] 27%|██▋ | 100675/371472 [8:00:22<19:55:30, 3.78it/s] 27%|██▋ | 100676/371472 [8:00:22<20:16:20, 3.71it/s] 27%|██▋ | 100677/371472 [8:00:22<19:34:36, 3.84it/s] 27%|██▋ | 100678/371472 [8:00:23<19:48:08, 3.80it/s] 27%|██▋ | 100679/371472 [8:00:23<20:08:30, 3.73it/s] 27%|██▋ | 100680/371472 [8:00:23<20:05:59, 3.74it/s] {'loss': 3.5092, 'learning_rate': 7.564265551940355e-07, 'epoch': 4.34} + 27%|██▋ | 100680/371472 [8:00:23<20:05:59, 3.74it/s] 27%|██▋ | 100681/371472 [8:00:24<20:46:35, 3.62it/s] 27%|██▋ | 100682/371472 [8:00:24<20:03:56, 3.75it/s] 27%|██▋ | 100683/371472 [8:00:24<19:34:44, 3.84it/s] 27%|██▋ | 100684/371472 [8:00:24<19:32:31, 3.85it/s] 27%|██▋ | 100685/371472 [8:00:25<19:41:11, 3.82it/s] 27%|██▋ | 100686/371472 [8:00:25<20:12:02, 3.72it/s] 27%|██▋ | 100687/371472 [8:00:25<20:21:37, 3.69it/s] 27%|██▋ | 100688/371472 [8:00:25<20:37:57, 3.65it/s] 27%|██▋ | 100689/371472 [8:00:26<20:55:01, 3.60it/s] 27%|██▋ | 100690/371472 [8:00:26<20:17:37, 3.71it/s] 27%|██▋ | 100691/371472 [8:00:26<20:02:29, 3.75it/s] 27%|██▋ | 100692/371472 [8:00:26<20:13:18, 3.72it/s] 27%|██▋ | 100693/371472 [8:00:27<23:21:13, 3.22it/s] 27%|██▋ | 100694/371472 [8:00:27<28:16:53, 2.66it/s] 27%|██▋ | 100695/371472 [8:00:28<27:34:12, 2.73it/s] 27%|██▋ | 100696/371472 [8:00:28<24:42:11, 3.04it/s] 27%|██▋ | 100697/371472 [8:00:28<24:19:39, 3.09it/s] 27%|██▋ | 100698/371472 [8:00:29<23:13:03, 3.24it/s] 27%|██▋ | 100699/371472 [8:00:29<22:24:32, 3.36it/s] 27%|██▋ | 100700/371472 [8:00:29<21:44:57, 3.46it/s] {'loss': 3.2994, 'learning_rate': 7.563780732185568e-07, 'epoch': 4.34} + 27%|██▋ | 100700/371472 [8:00:29<21:44:57, 3.46it/s] 27%|██▋ | 100701/371472 [8:00:29<21:50:17, 3.44it/s] 27%|██▋ | 100702/371472 [8:00:30<21:05:43, 3.57it/s] 27%|██▋ | 100703/371472 [8:00:30<23:16:02, 3.23it/s] 27%|██▋ | 100704/371472 [8:00:30<22:47:18, 3.30it/s] 27%|██▋ | 100705/371472 [8:00:31<21:55:55, 3.43it/s] 27%|��█▋ | 100706/371472 [8:00:31<21:24:42, 3.51it/s] 27%|██▋ | 100707/371472 [8:00:31<20:36:25, 3.65it/s] 27%|██▋ | 100708/371472 [8:00:31<20:53:18, 3.60it/s] 27%|██▋ | 100709/371472 [8:00:32<20:06:00, 3.74it/s] 27%|██▋ | 100710/371472 [8:00:32<20:11:13, 3.73it/s] 27%|██▋ | 100711/371472 [8:00:32<20:41:42, 3.63it/s] 27%|██▋ | 100712/371472 [8:00:32<20:43:01, 3.63it/s] 27%|██▋ | 100713/371472 [8:00:33<21:41:43, 3.47it/s] 27%|██▋ | 100714/371472 [8:00:33<21:01:24, 3.58it/s] 27%|██▋ | 100715/371472 [8:00:33<21:40:47, 3.47it/s] 27%|██▋ | 100716/371472 [8:00:34<21:08:14, 3.56it/s] 27%|██▋ | 100717/371472 [8:00:34<20:53:42, 3.60it/s] 27%|██▋ | 100718/371472 [8:00:34<20:18:22, 3.70it/s] 27%|██▋ | 100719/371472 [8:00:34<21:16:54, 3.53it/s] 27%|██▋ | 100720/371472 [8:00:35<22:38:37, 3.32it/s] {'loss': 3.4567, 'learning_rate': 7.563295912430779e-07, 'epoch': 4.34} + 27%|██▋ | 100720/371472 [8:00:35<22:38:37, 3.32it/s] 27%|██▋ | 100721/371472 [8:00:35<22:22:34, 3.36it/s] 27%|██▋ | 100722/371472 [8:00:35<20:56:37, 3.59it/s] 27%|██▋ | 100723/371472 [8:00:36<21:58:08, 3.42it/s] 27%|██▋ | 100724/371472 [8:00:36<21:05:54, 3.56it/s] 27%|██▋ | 100725/371472 [8:00:36<20:49:57, 3.61it/s] 27%|██▋ | 100726/371472 [8:00:36<21:37:36, 3.48it/s] 27%|██▋ | 100727/371472 [8:00:37<22:25:13, 3.35it/s] 27%|██▋ | 100728/371472 [8:00:37<21:51:55, 3.44it/s] 27%|██▋ | 100729/371472 [8:00:37<22:47:55, 3.30it/s] 27%|██▋ | 100730/371472 [8:00:38<22:21:04, 3.36it/s] 27%|██▋ | 100731/371472 [8:00:38<21:13:20, 3.54it/s] 27%|██▋ | 100732/371472 [8:00:38<21:24:56, 3.51it/s] 27%|██▋ | 100733/371472 [8:00:39<20:52:59, 3.60it/s] 27%|██▋ | 100734/371472 [8:00:39<21:11:10, 3.55it/s] 27%|██▋ | 100735/371472 [8:00:39<20:44:49, 3.62it/s] 27%|██▋ | 100736/371472 [8:00:39<21:43:13, 3.46it/s] 27%|██▋ | 100737/371472 [8:00:40<21:55:33, 3.43it/s] 27%|██▋ | 100738/371472 [8:00:40<21:14:40, 3.54it/s] 27%|██▋ | 100739/371472 [8:00:40<22:28:06, 3.35it/s] 27%|██▋ | 100740/371472 [8:00:41<21:41:58, 3.47it/s] {'loss': 3.6029, 'learning_rate': 7.562811092675989e-07, 'epoch': 4.34} + 27%|██▋ | 100740/371472 [8:00:41<21:41:58, 3.47it/s] 27%|██▋ | 100741/371472 [8:00:41<21:36:16, 3.48it/s] 27%|██▋ | 100742/371472 [8:00:41<21:05:28, 3.57it/s] 27%|██▋ | 100743/371472 [8:00:41<20:57:09, 3.59it/s] 27%|██▋ | 100744/371472 [8:00:42<22:10:16, 3.39it/s] 27%|██▋ | 100745/371472 [8:00:42<22:40:32, 3.32it/s] 27%|██▋ | 100746/371472 [8:00:42<22:53:36, 3.28it/s] 27%|██▋ | 100747/371472 [8:00:43<22:52:55, 3.29it/s] 27%|██▋ | 100748/371472 [8:00:43<22:06:44, 3.40it/s] 27%|██▋ | 100749/371472 [8:00:43<21:46:14, 3.45it/s] 27%|██▋ | 100750/371472 [8:00:43<21:25:47, 3.51it/s] 27%|██▋ | 100751/371472 [8:00:44<21:28:57, 3.50it/s] 27%|██▋ | 100752/371472 [8:00:44<21:07:54, 3.56it/s] 27%|██▋ | 100753/371472 [8:00:44<21:00:21, 3.58it/s] 27%|██▋ | 100754/371472 [8:00:45<20:46:21, 3.62it/s] 27%|██▋ | 100755/371472 [8:00:45<20:47:32, 3.62it/s] 27%|██▋ | 100756/371472 [8:00:45<20:29:47, 3.67it/s] 27%|██▋ | 100757/371472 [8:00:45<20:19:40, 3.70it/s] 27%|██▋ | 100758/371472 [8:00:46<20:50:40, 3.61it/s] 27%|██▋ | 100759/371472 [8:00:46<20:28:56, 3.67it/s] 27%|██▋ | 100760/371472 [8:00:46<22:37:17, 3.32it/s] {'loss': 3.6637, 'learning_rate': 7.5623262729212e-07, 'epoch': 4.34} + 27%|██▋ | 100760/371472 [8:00:46<22:37:17, 3.32it/s] 27%|██▋ | 100761/371472 [8:00:47<21:34:50, 3.48it/s] 27%|██▋ | 100762/371472 [8:00:47<21:39:10, 3.47it/s] 27%|██▋ | 100763/371472 [8:00:47<20:40:10, 3.64it/s] 27%|██▋ | 100764/371472 [8:00:47<20:11:19, 3.72it/s] 27%|██▋ | 100765/371472 [8:00:48<19:58:23, 3.76it/s] 27%|██▋ | 100766/371472 [8:00:48<19:49:39, 3.79it/s] 27%|██▋ | 100767/371472 [8:00:48<20:05:42, 3.74it/s] 27%|██▋ | 100768/371472 [8:00:48<21:33:07, 3.49it/s] 27%|██▋ | 100769/371472 [8:00:49<20:36:27, 3.65it/s] 27%|██▋ | 100770/371472 [8:00:49<20:45:29, 3.62it/s] 27%|██▋ | 100771/371472 [8:00:49<21:21:14, 3.52it/s] 27%|██▋ | 100772/371472 [8:00:50<20:19:17, 3.70it/s] 27%|██▋ | 100773/371472 [8:00:50<20:05:03, 3.74it/s] 27%|██▋ | 100774/371472 [8:00:50<20:06:19, 3.74it/s] 27%|██▋ | 100775/371472 [8:00:50<20:42:54, 3.63it/s] 27%|██▋ | 100776/371472 [8:00:51<21:08:44, 3.56it/s] 27%|██▋ | 100777/371472 [8:00:51<21:01:13, 3.58it/s] 27%|██▋ | 100778/371472 [8:00:51<21:39:51, 3.47it/s] 27%|██▋ | 100779/371472 [8:00:51<20:43:12, 3.63it/s] 27%|██▋ | 100780/371472 [8:00:52<20:32:52, 3.66it/s] {'loss': 3.6901, 'learning_rate': 7.561841453166411e-07, 'epoch': 4.34} + 27%|██▋ | 100780/371472 [8:00:52<20:32:52, 3.66it/s] 27%|██▋ | 100781/371472 [8:00:52<22:44:31, 3.31it/s] 27%|██▋ | 100782/371472 [8:00:52<21:34:03, 3.49it/s] 27%|██▋ | 100783/371472 [8:00:53<22:29:11, 3.34it/s] 27%|██▋ | 100784/371472 [8:00:53<21:50:12, 3.44it/s] 27%|██▋ | 100785/371472 [8:00:53<20:43:41, 3.63it/s] 27%|██▋ | 100786/371472 [8:00:53<20:57:18, 3.59it/s] 27%|██▋ | 100787/371472 [8:00:54<20:29:48, 3.67it/s] 27%|██▋ | 100788/371472 [8:00:54<20:23:35, 3.69it/s] 27%|██▋ | 100789/371472 [8:00:54<20:40:18, 3.64it/s] 27%|██▋ | 100790/371472 [8:00:55<20:01:46, 3.75it/s] 27%|██▋ | 100791/371472 [8:00:55<20:56:49, 3.59it/s] 27%|██▋ | 100792/371472 [8:00:55<20:56:49, 3.59it/s] 27%|██▋ | 100793/371472 [8:00:55<21:11:03, 3.55it/s] 27%|██▋ | 100794/371472 [8:00:56<20:16:43, 3.71it/s] 27%|██▋ | 100795/371472 [8:00:56<20:40:36, 3.64it/s] 27%|██▋ | 100796/371472 [8:00:56<20:10:43, 3.73it/s] 27%|██▋ | 100797/371472 [8:00:56<20:02:15, 3.75it/s] 27%|██▋ | 100798/371472 [8:00:57<19:42:14, 3.82it/s] 27%|██▋ | 100799/371472 [8:00:57<20:13:36, 3.72it/s] 27%|██▋ | 100800/371472 [8:00:57<21:27:38, 3.50it/s] {'loss': 3.7772, 'learning_rate': 7.561356633411622e-07, 'epoch': 4.34} + 27%|██▋ | 100800/371472 [8:00:57<21:27:38, 3.50it/s] 27%|██▋ | 100801/371472 [8:00:58<21:46:28, 3.45it/s] 27%|██▋ | 100802/371472 [8:00:58<21:12:20, 3.55it/s] 27%|██▋ | 100803/371472 [8:00:58<21:08:20, 3.56it/s] 27%|██▋ | 100804/371472 [8:00:58<21:01:22, 3.58it/s] 27%|██▋ | 100805/371472 [8:00:59<21:11:28, 3.55it/s] 27%|██▋ | 100806/371472 [8:00:59<21:24:39, 3.51it/s] 27%|██▋ | 100807/371472 [8:00:59<21:29:14, 3.50it/s] 27%|██▋ | 100808/371472 [8:01:00<21:52:39, 3.44it/s] 27%|██▋ | 100809/371472 [8:01:00<21:03:03, 3.57it/s] 27%|██▋ | 100810/371472 [8:01:00<20:28:53, 3.67it/s] 27%|██▋ | 100811/371472 [8:01:00<20:02:31, 3.75it/s] 27%|██▋ | 100812/371472 [8:01:01<19:38:44, 3.83it/s] 27%|██▋ | 100813/371472 [8:01:01<21:48:16, 3.45it/s] 27%|██▋ | 100814/371472 [8:01:01<21:39:01, 3.47it/s] 27%|██▋ | 100815/371472 [8:01:02<20:56:32, 3.59it/s] 27%|██▋ | 100816/371472 [8:01:02<21:48:20, 3.45it/s] 27%|██▋ | 100817/371472 [8:01:02<21:09:15, 3.55it/s] 27%|██▋ | 100818/371472 [8:01:02<20:50:41, 3.61it/s] 27%|██▋ | 100819/371472 [8:01:03<20:04:11, 3.75it/s] 27%|██▋ | 100820/371472 [8:01:03<19:48:30, 3.80it/s] {'loss': 3.4727, 'learning_rate': 7.560871813656833e-07, 'epoch': 4.34} + 27%|██▋ | 100820/371472 [8:01:03<19:48:30, 3.80it/s] 27%|██▋ | 100821/371472 [8:01:03<19:48:33, 3.80it/s] 27%|██▋ | 100822/371472 [8:01:03<19:41:37, 3.82it/s] 27%|██▋ | 100823/371472 [8:01:04<20:57:50, 3.59it/s] 27%|██▋ | 100824/371472 [8:01:04<21:29:47, 3.50it/s] 27%|██▋ | 100825/371472 [8:01:04<22:43:55, 3.31it/s] 27%|██▋ | 100826/371472 [8:01:05<21:37:49, 3.48it/s] 27%|██▋ | 100827/371472 [8:01:05<21:46:59, 3.45it/s] 27%|██▋ | 100828/371472 [8:01:05<20:58:10, 3.59it/s] 27%|██▋ | 100829/371472 [8:01:05<21:07:04, 3.56it/s] 27%|██▋ | 100830/371472 [8:01:06<20:24:29, 3.68it/s] 27%|██▋ | 100831/371472 [8:01:06<19:26:24, 3.87it/s] 27%|██▋ | 100832/371472 [8:01:06<19:16:44, 3.90it/s] 27%|██▋ | 100833/371472 [8:01:06<19:43:04, 3.81it/s] 27%|██▋ | 100834/371472 [8:01:07<19:55:19, 3.77it/s] 27%|██▋ | 100835/371472 [8:01:07<19:29:42, 3.86it/s] 27%|██▋ | 100836/371472 [8:01:07<19:09:23, 3.92it/s] 27%|██▋ | 100837/371472 [8:01:07<19:29:06, 3.86it/s] 27%|██▋ | 100838/371472 [8:01:08<21:16:46, 3.53it/s] 27%|██▋ | 100839/371472 [8:01:08<20:39:16, 3.64it/s] 27%|██▋ | 100840/371472 [8:01:08<21:16:59, 3.53it/s] {'loss': 3.6293, 'learning_rate': 7.560386993902044e-07, 'epoch': 4.34} + 27%|██▋ | 100840/371472 [8:01:08<21:16:59, 3.53it/s] 27%|██▋ | 100841/371472 [8:01:09<21:26:37, 3.51it/s] 27%|██▋ | 100842/371472 [8:01:09<21:50:37, 3.44it/s] 27%|██▋ | 100843/371472 [8:01:09<20:54:58, 3.59it/s] 27%|██▋ | 100844/371472 [8:01:09<20:17:06, 3.71it/s] 27%|██▋ | 100845/371472 [8:01:10<19:36:53, 3.83it/s] 27%|██▋ | 100846/371472 [8:01:10<19:33:56, 3.84it/s] 27%|██▋ | 100847/371472 [8:01:10<19:07:13, 3.93it/s] 27%|██▋ | 100848/371472 [8:01:10<18:47:40, 4.00it/s] 27%|██▋ | 100849/371472 [8:01:11<19:35:31, 3.84it/s] 27%|██▋ | 100850/371472 [8:01:11<19:32:00, 3.85it/s] 27%|██▋ | 100851/371472 [8:01:11<20:09:13, 3.73it/s] 27%|██▋ | 100852/371472 [8:01:12<20:33:03, 3.66it/s] 27%|██▋ | 100853/371472 [8:01:12<19:48:38, 3.79it/s] 27%|██▋ | 100854/371472 [8:01:12<19:41:50, 3.82it/s] 27%|██▋ | 100855/371472 [8:01:12<20:07:00, 3.74it/s] 27%|██▋ | 100856/371472 [8:01:13<19:52:05, 3.78it/s] 27%|██▋ | 100857/371472 [8:01:13<19:39:41, 3.82it/s] 27%|██▋ | 100858/371472 [8:01:13<20:23:26, 3.69it/s] 27%|██▋ | 100859/371472 [8:01:13<21:12:16, 3.54it/s] 27%|██▋ | 100860/371472 [8:01:14<21:07:35, 3.56it/s] {'loss': 3.566, 'learning_rate': 7.559902174147256e-07, 'epoch': 4.34} + 27%|██▋ | 100860/371472 [8:01:14<21:07:35, 3.56it/s] 27%|██▋ | 100861/371472 [8:01:14<23:15:16, 3.23it/s] 27%|██▋ | 100862/371472 [8:01:14<23:18:40, 3.22it/s] 27%|██▋ | 100863/371472 [8:01:15<22:52:50, 3.29it/s] 27%|██▋ | 100864/371472 [8:01:15<22:17:07, 3.37it/s] 27%|██▋ | 100865/371472 [8:01:15<21:27:41, 3.50it/s] 27%|██▋ | 100866/371472 [8:01:16<23:19:21, 3.22it/s] 27%|██▋ | 100867/371472 [8:01:16<23:20:12, 3.22it/s] 27%|██▋ | 100868/371472 [8:01:16<23:57:24, 3.14it/s] 27%|██▋ | 100869/371472 [8:01:17<22:57:46, 3.27it/s] 27%|██▋ | 100870/371472 [8:01:17<21:51:34, 3.44it/s] 27%|██▋ | 100871/371472 [8:01:17<21:44:32, 3.46it/s] 27%|██▋ | 100872/371472 [8:01:17<21:05:17, 3.56it/s] 27%|██▋ | 100873/371472 [8:01:18<20:45:56, 3.62it/s] 27%|██▋ | 100874/371472 [8:01:18<20:33:50, 3.66it/s] 27%|██▋ | 100875/371472 [8:01:18<20:20:37, 3.69it/s] 27%|██▋ | 100876/371472 [8:01:18<21:08:58, 3.55it/s] 27%|██▋ | 100877/371472 [8:01:19<20:38:35, 3.64it/s] 27%|██▋ | 100878/371472 [8:01:19<20:34:26, 3.65it/s] 27%|██▋ | 100879/371472 [8:01:19<20:24:33, 3.68it/s] 27%|██▋ | 100880/371472 [8:01:19<19:56:42, 3.77it/s] {'loss': 3.612, 'learning_rate': 7.559417354392467e-07, 'epoch': 4.35} + 27%|██▋ | 100880/371472 [8:01:19<19:56:42, 3.77it/s] 27%|██▋ | 100881/371472 [8:01:20<20:28:11, 3.67it/s] 27%|██▋ | 100882/371472 [8:01:20<19:53:52, 3.78it/s] 27%|██▋ | 100883/371472 [8:01:20<19:56:12, 3.77it/s] 27%|██▋ | 100884/371472 [8:01:21<20:23:50, 3.68it/s] 27%|██▋ | 100885/371472 [8:01:21<19:57:59, 3.76it/s] 27%|██▋ | 100886/371472 [8:01:21<20:55:21, 3.59it/s] 27%|██▋ | 100887/371472 [8:01:21<20:14:16, 3.71it/s] 27%|██▋ | 100888/371472 [8:01:22<19:43:51, 3.81it/s] 27%|██▋ | 100889/371472 [8:01:22<20:27:21, 3.67it/s] 27%|██▋ | 100890/371472 [8:01:22<20:55:02, 3.59it/s] 27%|██▋ | 100891/371472 [8:01:23<21:13:47, 3.54it/s] 27%|██▋ | 100892/371472 [8:01:23<20:31:51, 3.66it/s] 27%|██▋ | 100893/371472 [8:01:23<21:12:48, 3.54it/s] 27%|██▋ | 100894/371472 [8:01:23<21:10:48, 3.55it/s] 27%|██▋ | 100895/371472 [8:01:24<20:33:53, 3.65it/s] 27%|██▋ | 100896/371472 [8:01:24<20:37:00, 3.65it/s] 27%|██▋ | 100897/371472 [8:01:24<21:35:05, 3.48it/s] 27%|██▋ | 100898/371472 [8:01:24<21:56:20, 3.43it/s] 27%|██▋ | 100899/371472 [8:01:25<21:02:10, 3.57it/s] 27%|██▋ | 100900/371472 [8:01:25<20:38:15, 3.64it/s] {'loss': 3.5499, 'learning_rate': 7.558932534637677e-07, 'epoch': 4.35} + 27%|██▋ | 100900/371472 [8:01:25<20:38:15, 3.64it/s] 27%|██▋ | 100901/371472 [8:01:25<21:14:09, 3.54it/s] 27%|██▋ | 100902/371472 [8:01:26<22:25:47, 3.35it/s] 27%|██▋ | 100903/371472 [8:01:26<22:04:42, 3.40it/s] 27%|██▋ | 100904/371472 [8:01:26<21:05:25, 3.56it/s] 27%|██▋ | 100905/371472 [8:01:26<21:03:59, 3.57it/s] 27%|██▋ | 100906/371472 [8:01:27<20:26:52, 3.68it/s] 27%|██▋ | 100907/371472 [8:01:27<20:24:16, 3.68it/s] 27%|██▋ | 100908/371472 [8:01:27<20:33:35, 3.66it/s] 27%|██▋ | 100909/371472 [8:01:28<22:24:08, 3.35it/s] 27%|██▋ | 100910/371472 [8:01:28<22:04:55, 3.40it/s] 27%|██▋ | 100911/371472 [8:01:28<22:01:46, 3.41it/s] 27%|██▋ | 100912/371472 [8:01:28<21:08:51, 3.55it/s] 27%|██▋ | 100913/371472 [8:01:29<20:55:23, 3.59it/s] 27%|██▋ | 100914/371472 [8:01:29<21:07:21, 3.56it/s] 27%|██▋ | 100915/371472 [8:01:29<20:31:20, 3.66it/s] 27%|██▋ | 100916/371472 [8:01:30<20:29:29, 3.67it/s] 27%|██▋ | 100917/371472 [8:01:30<20:24:41, 3.68it/s] 27%|██▋ | 100918/371472 [8:01:30<21:29:36, 3.50it/s] 27%|██▋ | 100919/371472 [8:01:30<21:16:51, 3.53it/s] 27%|██▋ | 100920/371472 [8:01:31<20:58:24, 3.58it/s] {'loss': 3.6266, 'learning_rate': 7.558447714882888e-07, 'epoch': 4.35} + 27%|██▋ | 100920/371472 [8:01:31<20:58:24, 3.58it/s] 27%|██▋ | 100921/371472 [8:01:31<21:45:09, 3.45it/s] 27%|██▋ | 100922/371472 [8:01:31<22:37:13, 3.32it/s] 27%|██▋ | 100923/371472 [8:01:32<22:39:12, 3.32it/s] 27%|██▋ | 100924/371472 [8:01:32<21:08:57, 3.55it/s] 27%|██▋ | 100925/371472 [8:01:32<20:47:24, 3.61it/s] 27%|██▋ | 100926/371472 [8:01:32<20:24:36, 3.68it/s] 27%|██▋ | 100927/371472 [8:01:33<20:19:49, 3.70it/s] 27%|██▋ | 100928/371472 [8:01:33<20:19:51, 3.70it/s] 27%|██▋ | 100929/371472 [8:01:33<20:14:42, 3.71it/s] 27%|██▋ | 100930/371472 [8:01:33<20:06:23, 3.74it/s] 27%|██▋ | 100931/371472 [8:01:34<20:01:29, 3.75it/s] 27%|██▋ | 100932/371472 [8:01:34<21:26:53, 3.50it/s] 27%|██▋ | 100933/371472 [8:01:34<23:09:55, 3.24it/s] 27%|██▋ | 100934/371472 [8:01:35<22:21:42, 3.36it/s] 27%|██▋ | 100935/371472 [8:01:35<22:40:35, 3.31it/s] 27%|██▋ | 100936/371472 [8:01:35<23:31:26, 3.19it/s] 27%|██▋ | 100937/371472 [8:01:36<23:27:00, 3.20it/s] 27%|██▋ | 100938/371472 [8:01:36<22:22:46, 3.36it/s] 27%|██▋ | 100939/371472 [8:01:36<21:35:05, 3.48it/s] 27%|██▋ | 100940/371472 [8:01:36<21:23:26, 3.51it/s] {'loss': 3.5335, 'learning_rate': 7.5579628951281e-07, 'epoch': 4.35} + 27%|██▋ | 100940/371472 [8:01:36<21:23:26, 3.51it/s] 27%|██▋ | 100941/371472 [8:01:37<20:45:42, 3.62it/s] 27%|██▋ | 100942/371472 [8:01:37<22:16:37, 3.37it/s] 27%|██▋ | 100943/371472 [8:01:37<21:33:57, 3.48it/s] 27%|██▋ | 100944/371472 [8:01:38<21:20:43, 3.52it/s] 27%|██▋ | 100945/371472 [8:01:38<21:44:45, 3.46it/s] 27%|██▋ | 100946/371472 [8:01:38<21:18:26, 3.53it/s] 27%|██▋ | 100947/371472 [8:01:38<22:14:32, 3.38it/s] 27%|██▋ | 100948/371472 [8:01:39<21:44:51, 3.46it/s] 27%|██▋ | 100949/371472 [8:01:39<20:49:38, 3.61it/s] 27%|██▋ | 100950/371472 [8:01:39<20:24:03, 3.68it/s] 27%|██▋ | 100951/371472 [8:01:39<20:01:28, 3.75it/s] 27%|██▋ | 100952/371472 [8:01:40<20:48:33, 3.61it/s] 27%|██▋ | 100953/371472 [8:01:40<20:16:43, 3.71it/s] 27%|██▋ | 100954/371472 [8:01:40<20:04:08, 3.74it/s] 27%|██▋ | 100955/371472 [8:01:41<20:03:55, 3.74it/s] 27%|██▋ | 100956/371472 [8:01:41<21:51:31, 3.44it/s] 27%|██▋ | 100957/371472 [8:01:41<20:52:12, 3.60it/s] 27%|██▋ | 100958/371472 [8:01:41<20:20:53, 3.69it/s] 27%|██▋ | 100959/371472 [8:01:42<19:54:11, 3.78it/s] 27%|██▋ | 100960/371472 [8:01:42<20:24:09, 3.68it/s] {'loss': 3.545, 'learning_rate': 7.557478075373311e-07, 'epoch': 4.35} + 27%|██▋ | 100960/371472 [8:01:42<20:24:09, 3.68it/s] 27%|██▋ | 100961/371472 [8:01:42<20:40:01, 3.64it/s] 27%|██▋ | 100962/371472 [8:01:43<20:25:31, 3.68it/s] 27%|██▋ | 100963/371472 [8:01:43<20:31:30, 3.66it/s] 27%|██▋ | 100964/371472 [8:01:43<19:50:32, 3.79it/s] 27%|██▋ | 100965/371472 [8:01:43<19:07:10, 3.93it/s] 27%|██▋ | 100966/371472 [8:01:44<19:48:41, 3.79it/s] 27%|██▋ | 100967/371472 [8:01:44<19:51:42, 3.78it/s] 27%|██▋ | 100968/371472 [8:01:44<19:54:40, 3.77it/s] 27%|██▋ | 100969/371472 [8:01:44<20:56:37, 3.59it/s] 27%|██▋ | 100970/371472 [8:01:45<20:41:59, 3.63it/s] 27%|██▋ | 100971/371472 [8:01:45<21:06:59, 3.56it/s] 27%|██▋ | 100972/371472 [8:01:45<20:33:57, 3.65it/s] 27%|██▋ | 100973/371472 [8:01:45<20:44:42, 3.62it/s] 27%|██▋ | 100974/371472 [8:01:46<21:23:45, 3.51it/s] 27%|██▋ | 100975/371472 [8:01:46<22:33:16, 3.33it/s] 27%|██▋ | 100976/371472 [8:01:47<24:34:57, 3.06it/s] 27%|██▋ | 100977/371472 [8:01:47<23:01:59, 3.26it/s] 27%|██▋ | 100978/371472 [8:01:47<23:42:59, 3.17it/s] 27%|██▋ | 100979/371472 [8:01:47<22:49:33, 3.29it/s] 27%|██▋ | 100980/371472 [8:01:48<22:16:38, 3.37it/s] {'loss': 3.6761, 'learning_rate': 7.556993255618522e-07, 'epoch': 4.35} + 27%|██▋ | 100980/371472 [8:01:48<22:16:38, 3.37it/s] 27%|██▋ | 100981/371472 [8:01:48<22:25:38, 3.35it/s] 27%|██▋ | 100982/371472 [8:01:48<23:25:45, 3.21it/s] 27%|██▋ | 100983/371472 [8:01:49<22:56:39, 3.27it/s] 27%|██▋ | 100984/371472 [8:01:49<22:56:01, 3.28it/s] 27%|██▋ | 100985/371472 [8:01:49<21:24:26, 3.51it/s] 27%|██▋ | 100986/371472 [8:01:49<22:47:15, 3.30it/s] 27%|██▋ | 100987/371472 [8:01:50<22:21:03, 3.36it/s] 27%|██▋ | 100988/371472 [8:01:50<22:57:08, 3.27it/s] 27%|██▋ | 100989/371472 [8:01:50<22:02:51, 3.41it/s] 27%|██▋ | 100990/371472 [8:01:51<21:48:14, 3.45it/s] 27%|██▋ | 100991/371472 [8:01:51<21:56:56, 3.42it/s] 27%|██▋ | 100992/371472 [8:01:51<22:07:14, 3.40it/s] 27%|██▋ | 100993/371472 [8:01:52<21:17:19, 3.53it/s] 27%|██▋ | 100994/371472 [8:01:52<22:10:39, 3.39it/s] 27%|██▋ | 100995/371472 [8:01:52<21:21:25, 3.52it/s] 27%|██▋ | 100996/371472 [8:01:52<22:01:19, 3.41it/s] 27%|██▋ | 100997/371472 [8:01:53<22:56:02, 3.28it/s] 27%|██▋ | 100998/371472 [8:01:53<22:37:39, 3.32it/s] 27%|██▋ | 100999/371472 [8:01:53<22:12:37, 3.38it/s] 27%|██▋ | 101000/371472 [8:01:54<21:36:45, 3.48it/s] {'loss': 3.7315, 'learning_rate': 7.556508435863733e-07, 'epoch': 4.35} + 27%|██▋ | 101000/371472 [8:01:54<21:36:45, 3.48it/s] 27%|██▋ | 101001/371472 [8:01:54<22:17:52, 3.37it/s] 27%|██▋ | 101002/371472 [8:01:54<20:59:17, 3.58it/s] 27%|██▋ | 101003/371472 [8:01:54<22:04:50, 3.40it/s] 27%|██▋ | 101004/371472 [8:01:55<22:34:59, 3.33it/s] 27%|██▋ | 101005/371472 [8:01:55<21:31:35, 3.49it/s] 27%|██▋ | 101006/371472 [8:01:55<20:41:38, 3.63it/s] 27%|██▋ | 101007/371472 [8:01:56<20:28:28, 3.67it/s] 27%|██▋ | 101008/371472 [8:01:56<21:16:47, 3.53it/s] 27%|██▋ | 101009/371472 [8:01:56<21:01:36, 3.57it/s] 27%|██▋ | 101010/371472 [8:01:56<20:04:35, 3.74it/s] 27%|██▋ | 101011/371472 [8:01:57<20:06:56, 3.73it/s] 27%|██▋ | 101012/371472 [8:01:57<20:40:04, 3.63it/s] 27%|██▋ | 101013/371472 [8:01:57<20:19:48, 3.70it/s] 27%|██▋ | 101014/371472 [8:01:57<20:27:35, 3.67it/s] 27%|██▋ | 101015/371472 [8:01:58<20:56:16, 3.59it/s] 27%|██▋ | 101016/371472 [8:01:58<21:18:59, 3.52it/s] 27%|██▋ | 101017/371472 [8:01:58<21:26:27, 3.50it/s] 27%|██▋ | 101018/371472 [8:01:59<22:19:22, 3.37it/s] 27%|██▋ | 101019/371472 [8:01:59<23:25:30, 3.21it/s] 27%|██▋ | 101020/371472 [8:01:59<22:53:15, 3.28it/s] {'loss': 3.4357, 'learning_rate': 7.556023616108945e-07, 'epoch': 4.35} + 27%|██▋ | 101020/371472 [8:01:59<22:53:15, 3.28it/s] 27%|██▋ | 101021/371472 [8:02:00<25:59:32, 2.89it/s] 27%|██▋ | 101022/371472 [8:02:00<24:48:41, 3.03it/s] 27%|██▋ | 101023/371472 [8:02:00<23:29:52, 3.20it/s] 27%|██▋ | 101024/371472 [8:02:01<22:13:21, 3.38it/s] 27%|██▋ | 101025/371472 [8:02:01<21:36:11, 3.48it/s] 27%|██▋ | 101026/371472 [8:02:01<21:10:11, 3.55it/s] 27%|██▋ | 101027/371472 [8:02:01<20:34:40, 3.65it/s] 27%|██▋ | 101028/371472 [8:02:02<20:05:14, 3.74it/s] 27%|██▋ | 101029/371472 [8:02:02<20:22:12, 3.69it/s] 27%|██▋ | 101030/371472 [8:02:02<21:07:46, 3.56it/s] 27%|██▋ | 101031/371472 [8:02:03<21:55:01, 3.43it/s] 27%|██▋ | 101032/371472 [8:02:03<22:06:47, 3.40it/s] 27%|██▋ | 101033/371472 [8:02:03<21:25:55, 3.51it/s] 27%|██▋ | 101034/371472 [8:02:03<20:37:25, 3.64it/s] 27%|██▋ | 101035/371472 [8:02:04<20:57:03, 3.59it/s] 27%|██▋ | 101036/371472 [8:02:04<20:28:04, 3.67it/s] 27%|██▋ | 101037/371472 [8:02:04<20:07:23, 3.73it/s] 27%|██▋ | 101038/371472 [8:02:04<20:20:37, 3.69it/s] 27%|██▋ | 101039/371472 [8:02:05<20:57:32, 3.58it/s] 27%|██▋ | 101040/371472 [8:02:05<23:26:55, 3.20it/s] {'loss': 3.5213, 'learning_rate': 7.555538796354155e-07, 'epoch': 4.35} + 27%|██▋ | 101040/371472 [8:02:05<23:26:55, 3.20it/s] 27%|██▋ | 101041/371472 [8:02:05<22:40:18, 3.31it/s] 27%|██▋ | 101042/371472 [8:02:06<22:40:38, 3.31it/s] 27%|██▋ | 101043/371472 [8:02:06<21:30:07, 3.49it/s] 27%|██▋ | 101044/371472 [8:02:06<22:43:56, 3.30it/s] 27%|██▋ | 101045/371472 [8:02:07<22:02:43, 3.41it/s] 27%|██▋ | 101046/371472 [8:02:07<23:16:27, 3.23it/s] 27%|██▋ | 101047/371472 [8:02:07<22:23:15, 3.36it/s] 27%|██▋ | 101048/371472 [8:02:07<21:47:25, 3.45it/s] 27%|██▋ | 101049/371472 [8:02:08<21:43:06, 3.46it/s] 27%|██▋ | 101050/371472 [8:02:08<21:28:55, 3.50it/s] 27%|██▋ | 101051/371472 [8:02:08<21:14:22, 3.54it/s] 27%|██▋ | 101052/371472 [8:02:09<21:00:56, 3.57it/s] 27%|██▋ | 101053/371472 [8:02:09<21:10:17, 3.55it/s] 27%|██▋ | 101054/371472 [8:02:09<21:04:01, 3.57it/s] 27%|██▋ | 101055/371472 [8:02:09<20:29:10, 3.67it/s] 27%|██▋ | 101056/371472 [8:02:10<21:40:11, 3.47it/s] 27%|██▋ | 101057/371472 [8:02:10<21:09:38, 3.55it/s] 27%|██▋ | 101058/371472 [8:02:10<20:46:42, 3.62it/s] 27%|██▋ | 101059/371472 [8:02:10<19:55:14, 3.77it/s] 27%|██▋ | 101060/371472 [8:02:11<20:12:38, 3.72it/s] {'loss': 3.5497, 'learning_rate': 7.555053976599366e-07, 'epoch': 4.35} + 27%|██▋ | 101060/371472 [8:02:11<20:12:38, 3.72it/s] 27%|██▋ | 101061/371472 [8:02:11<21:11:14, 3.55it/s] 27%|██▋ | 101062/371472 [8:02:11<21:06:30, 3.56it/s] 27%|██▋ | 101063/371472 [8:02:12<20:41:17, 3.63it/s] 27%|██▋ | 101064/371472 [8:02:12<20:54:36, 3.59it/s] 27%|██▋ | 101065/371472 [8:02:12<21:41:11, 3.46it/s] 27%|██▋ | 101066/371472 [8:02:12<20:54:05, 3.59it/s] 27%|██▋ | 101067/371472 [8:02:13<21:57:26, 3.42it/s] 27%|██▋ | 101068/371472 [8:02:13<23:08:25, 3.25it/s] 27%|██▋ | 101069/371472 [8:02:13<23:37:04, 3.18it/s] 27%|██▋ | 101070/371472 [8:02:14<22:21:18, 3.36it/s] 27%|██▋ | 101071/371472 [8:02:14<22:31:58, 3.33it/s] 27%|██▋ | 101072/371472 [8:02:14<22:34:52, 3.33it/s] 27%|██▋ | 101073/371472 [8:02:15<21:33:38, 3.48it/s] 27%|██▋ | 101074/371472 [8:02:15<21:07:24, 3.56it/s] 27%|██▋ | 101075/371472 [8:02:15<21:17:13, 3.53it/s] 27%|██▋ | 101076/371472 [8:02:15<22:06:32, 3.40it/s] 27%|██▋ | 101077/371472 [8:02:16<21:20:10, 3.52it/s] 27%|██▋ | 101078/371472 [8:02:16<20:25:28, 3.68it/s] 27%|██▋ | 101079/371472 [8:02:16<19:57:08, 3.76it/s] 27%|██▋ | 101080/371472 [8:02:16<19:15:50, 3.90it/s] {'loss': 3.6045, 'learning_rate': 7.554569156844577e-07, 'epoch': 4.35} + 27%|██▋ | 101080/371472 [8:02:16<19:15:50, 3.90it/s] 27%|██▋ | 101081/371472 [8:02:17<19:26:55, 3.86it/s] 27%|██▋ | 101082/371472 [8:02:17<19:14:46, 3.90it/s] 27%|██▋ | 101083/371472 [8:02:17<19:42:32, 3.81it/s] 27%|██▋ | 101084/371472 [8:02:18<20:53:49, 3.59it/s] 27%|██▋ | 101085/371472 [8:02:18<20:01:11, 3.75it/s] 27%|██▋ | 101086/371472 [8:02:18<19:41:22, 3.81it/s] 27%|██▋ | 101087/371472 [8:02:18<20:08:49, 3.73it/s] 27%|██▋ | 101088/371472 [8:02:19<20:07:35, 3.73it/s] 27%|██▋ | 101089/371472 [8:02:19<20:45:41, 3.62it/s] 27%|██▋ | 101090/371472 [8:02:19<20:39:13, 3.64it/s] 27%|██▋ | 101091/371472 [8:02:19<20:16:38, 3.70it/s] 27%|██▋ | 101092/371472 [8:02:20<20:42:21, 3.63it/s] 27%|██▋ | 101093/371472 [8:02:20<20:26:04, 3.68it/s] 27%|██▋ | 101094/371472 [8:02:20<21:30:50, 3.49it/s] 27%|██▋ | 101095/371472 [8:02:21<21:03:55, 3.57it/s] 27%|██▋ | 101096/371472 [8:02:21<20:21:46, 3.69it/s] 27%|██▋ | 101097/371472 [8:02:21<20:46:57, 3.61it/s] 27%|██▋ | 101098/371472 [8:02:21<20:34:51, 3.65it/s] 27%|██▋ | 101099/371472 [8:02:22<20:21:15, 3.69it/s] 27%|██▋ | 101100/371472 [8:02:22<23:11:24, 3.24it/s] {'loss': 3.7542, 'learning_rate': 7.554084337089789e-07, 'epoch': 4.35} + 27%|██▋ | 101100/371472 [8:02:22<23:11:24, 3.24it/s] 27%|██▋ | 101101/371472 [8:02:22<23:06:26, 3.25it/s] 27%|██▋ | 101102/371472 [8:02:23<21:28:01, 3.50it/s] 27%|██▋ | 101103/371472 [8:02:23<20:30:49, 3.66it/s] 27%|██▋ | 101104/371472 [8:02:23<20:32:32, 3.66it/s] 27%|██▋ | 101105/371472 [8:02:23<20:12:56, 3.72it/s] 27%|██▋ | 101106/371472 [8:02:24<21:36:17, 3.48it/s] 27%|██▋ | 101107/371472 [8:02:24<22:38:49, 3.32it/s] 27%|██▋ | 101108/371472 [8:02:24<22:08:20, 3.39it/s] 27%|██▋ | 101109/371472 [8:02:25<22:19:48, 3.36it/s] 27%|██▋ | 101110/371472 [8:02:25<21:36:31, 3.48it/s] 27%|██▋ | 101111/371472 [8:02:25<21:09:30, 3.55it/s] 27%|██▋ | 101112/371472 [8:02:25<21:03:05, 3.57it/s] 27%|██▋ | 101113/371472 [8:02:26<20:34:54, 3.65it/s] 27%|██▋ | 101114/371472 [8:02:26<20:11:14, 3.72it/s] 27%|██▋ | 101115/371472 [8:02:26<21:26:54, 3.50it/s] 27%|██▋ | 101116/371472 [8:02:26<20:56:56, 3.58it/s] 27%|██▋ | 101117/371472 [8:02:27<21:11:31, 3.54it/s] 27%|██▋ | 101118/371472 [8:02:27<20:29:39, 3.66it/s] 27%|██▋ | 101119/371472 [8:02:27<23:34:27, 3.19it/s] 27%|██▋ | 101120/371472 [8:02:28<22:21:48, 3.36it/s] {'loss': 3.5407, 'learning_rate': 7.553599517335e-07, 'epoch': 4.36} + 27%|██▋ | 101120/371472 [8:02:28<22:21:48, 3.36it/s] 27%|██▋ | 101121/371472 [8:02:28<20:49:09, 3.61it/s] 27%|██▋ | 101122/371472 [8:02:28<21:06:53, 3.56it/s] 27%|██▋ | 101123/371472 [8:02:28<20:02:02, 3.75it/s] 27%|██▋ | 101124/371472 [8:02:29<20:38:17, 3.64it/s] 27%|██▋ | 101125/371472 [8:02:29<20:08:51, 3.73it/s] 27%|██▋ | 101126/371472 [8:02:29<20:45:40, 3.62it/s] 27%|██▋ | 101127/371472 [8:02:30<20:05:54, 3.74it/s] 27%|██▋ | 101128/371472 [8:02:30<19:31:13, 3.85it/s] 27%|██▋ | 101129/371472 [8:02:30<21:01:51, 3.57it/s] 27%|██▋ | 101130/371472 [8:02:30<22:19:32, 3.36it/s] 27%|██▋ | 101131/371472 [8:02:31<21:12:25, 3.54it/s] 27%|██▋ | 101132/371472 [8:02:31<21:35:50, 3.48it/s] 27%|██▋ | 101133/371472 [8:02:31<21:17:33, 3.53it/s] 27%|██▋ | 101134/371472 [8:02:32<20:42:55, 3.63it/s] 27%|██▋ | 101135/371472 [8:02:32<20:13:08, 3.71it/s] 27%|██▋ | 101136/371472 [8:02:32<20:49:26, 3.61it/s] 27%|██▋ | 101137/371472 [8:02:32<21:40:11, 3.47it/s] 27%|██▋ | 101138/371472 [8:02:33<21:12:31, 3.54it/s] 27%|██▋ | 101139/371472 [8:02:33<21:15:15, 3.53it/s] 27%|██▋ | 101140/371472 [8:02:33<22:01:47, 3.41it/s] {'loss': 3.6474, 'learning_rate': 7.553114697580211e-07, 'epoch': 4.36} + 27%|██▋ | 101140/371472 [8:02:33<22:01:47, 3.41it/s] 27%|██▋ | 101141/371472 [8:02:34<21:24:00, 3.51it/s] 27%|██▋ | 101142/371472 [8:02:34<21:16:19, 3.53it/s] 27%|██▋ | 101143/371472 [8:02:34<21:17:45, 3.53it/s] 27%|██▋ | 101144/371472 [8:02:34<20:52:03, 3.60it/s] 27%|██▋ | 101145/371472 [8:02:35<20:06:48, 3.73it/s] 27%|██▋ | 101146/371472 [8:02:35<20:20:57, 3.69it/s] 27%|██▋ | 101147/371472 [8:02:35<21:58:26, 3.42it/s] 27%|██▋ | 101148/371472 [8:02:35<21:24:04, 3.51it/s] 27%|██▋ | 101149/371472 [8:02:36<20:43:41, 3.62it/s] 27%|██▋ | 101150/371472 [8:02:36<20:04:56, 3.74it/s] 27%|██▋ | 101151/371472 [8:02:36<20:20:34, 3.69it/s] 27%|██▋ | 101152/371472 [8:02:37<20:59:31, 3.58it/s] 27%|██▋ | 101153/371472 [8:02:37<20:29:21, 3.66it/s] 27%|██▋ | 101154/371472 [8:02:37<20:42:59, 3.62it/s] 27%|██▋ | 101155/371472 [8:02:37<21:48:04, 3.44it/s] 27%|██▋ | 101156/371472 [8:02:38<22:07:44, 3.39it/s] 27%|██▋ | 101157/371472 [8:02:38<20:59:37, 3.58it/s] 27%|██▋ | 101158/371472 [8:02:38<20:52:26, 3.60it/s] 27%|██▋ | 101159/371472 [8:02:39<23:46:35, 3.16it/s] 27%|██▋ | 101160/371472 [8:02:39<22:28:27, 3.34it/s] {'loss': 3.6087, 'learning_rate': 7.552629877825421e-07, 'epoch': 4.36} + 27%|██▋ | 101160/371472 [8:02:39<22:28:27, 3.34it/s] 27%|██▋ | 101161/371472 [8:02:39<21:27:56, 3.50it/s] 27%|██▋ | 101162/371472 [8:02:39<20:36:56, 3.64it/s] 27%|██▋ | 101163/371472 [8:02:40<23:06:52, 3.25it/s] 27%|██▋ | 101164/371472 [8:02:40<21:24:14, 3.51it/s] 27%|██▋ | 101165/371472 [8:02:40<20:23:39, 3.68it/s] 27%|██▋ | 101166/371472 [8:02:41<21:45:35, 3.45it/s] 27%|██▋ | 101167/371472 [8:02:41<20:56:46, 3.58it/s] 27%|██▋ | 101168/371472 [8:02:41<23:13:56, 3.23it/s] 27%|██▋ | 101169/371472 [8:02:42<23:29:45, 3.20it/s] 27%|██▋ | 101170/371472 [8:02:42<23:31:29, 3.19it/s] 27%|██▋ | 101171/371472 [8:02:42<22:28:21, 3.34it/s] 27%|██▋ | 101172/371472 [8:02:42<21:57:11, 3.42it/s] 27%|██▋ | 101173/371472 [8:02:43<23:37:28, 3.18it/s] 27%|██▋ | 101174/371472 [8:02:43<22:44:01, 3.30it/s] 27%|██▋ | 101175/371472 [8:02:43<21:26:59, 3.50it/s] 27%|██▋ | 101176/371472 [8:02:44<21:23:11, 3.51it/s] 27%|██▋ | 101177/371472 [8:02:44<21:14:07, 3.54it/s] 27%|██▋ | 101178/371472 [8:02:44<20:45:02, 3.62it/s] 27%|██▋ | 101179/371472 [8:02:44<20:19:36, 3.69it/s] 27%|██▋ | 101180/371472 [8:02:45<19:56:15, 3.77it/s] {'loss': 3.2966, 'learning_rate': 7.552145058070632e-07, 'epoch': 4.36} + 27%|██▋ | 101180/371472 [8:02:45<19:56:15, 3.77it/s] 27%|██▋ | 101181/371472 [8:02:45<20:04:21, 3.74it/s] 27%|██▋ | 101182/371472 [8:02:45<21:40:34, 3.46it/s] 27%|██▋ | 101183/371472 [8:02:46<22:07:10, 3.39it/s] 27%|██▋ | 101184/371472 [8:02:46<21:17:11, 3.53it/s] 27%|██▋ | 101185/371472 [8:02:46<20:22:39, 3.68it/s] 27%|██▋ | 101186/371472 [8:02:46<19:33:06, 3.84it/s] 27%|██▋ | 101187/371472 [8:02:47<21:09:10, 3.55it/s] 27%|██▋ | 101188/371472 [8:02:47<20:01:20, 3.75it/s] 27%|██▋ | 101189/371472 [8:02:47<19:26:06, 3.86it/s] 27%|██▋ | 101190/371472 [8:02:47<19:35:43, 3.83it/s] 27%|██▋ | 101191/371472 [8:02:48<20:25:36, 3.68it/s] 27%|██▋ | 101192/371472 [8:02:48<20:27:28, 3.67it/s] 27%|██▋ | 101193/371472 [8:02:48<20:12:24, 3.72it/s] 27%|██▋ | 101194/371472 [8:02:48<19:58:24, 3.76it/s] 27%|██▋ | 101195/371472 [8:02:49<19:33:22, 3.84it/s] 27%|██▋ | 101196/371472 [8:02:49<19:26:58, 3.86it/s] 27%|██▋ | 101197/371472 [8:02:49<20:39:36, 3.63it/s] 27%|██▋ | 101198/371472 [8:02:50<19:54:25, 3.77it/s] 27%|██▋ | 101199/371472 [8:02:50<19:45:30, 3.80it/s] 27%|██▋ | 101200/371472 [8:02:50<21:18:53, 3.52it/s] {'loss': 3.5954, 'learning_rate': 7.551660238315844e-07, 'epoch': 4.36} + 27%|██▋ | 101200/371472 [8:02:50<21:18:53, 3.52it/s] 27%|██▋ | 101201/371472 [8:02:50<20:13:54, 3.71it/s] 27%|██▋ | 101202/371472 [8:02:51<22:03:30, 3.40it/s] 27%|██▋ | 101203/371472 [8:02:51<21:35:00, 3.48it/s] 27%|██▋ | 101204/371472 [8:02:51<21:24:40, 3.51it/s] 27%|██▋ | 101205/371472 [8:02:52<21:14:48, 3.53it/s] 27%|██▋ | 101206/371472 [8:02:52<22:29:02, 3.34it/s] 27%|██▋ | 101207/371472 [8:02:52<21:28:35, 3.50it/s] 27%|██▋ | 101208/371472 [8:02:52<20:47:33, 3.61it/s] 27%|██▋ | 101209/371472 [8:02:53<21:52:07, 3.43it/s] 27%|██▋ | 101210/371472 [8:02:53<20:55:45, 3.59it/s] 27%|██▋ | 101211/371472 [8:02:53<21:15:50, 3.53it/s] 27%|██▋ | 101212/371472 [8:02:54<21:39:54, 3.47it/s] 27%|██▋ | 101213/371472 [8:02:54<21:24:07, 3.51it/s] 27%|██▋ | 101214/371472 [8:02:54<20:40:52, 3.63it/s] 27%|██▋ | 101215/371472 [8:02:54<20:49:34, 3.60it/s] 27%|██▋ | 101216/371472 [8:02:55<20:38:09, 3.64it/s] 27%|██▋ | 101217/371472 [8:02:55<20:28:57, 3.67it/s] 27%|██▋ | 101218/371472 [8:02:55<20:20:08, 3.69it/s] 27%|██▋ | 101219/371472 [8:02:55<19:45:17, 3.80it/s] 27%|██▋ | 101220/371472 [8:02:56<19:39:58, 3.82it/s] {'loss': 3.587, 'learning_rate': 7.551175418561054e-07, 'epoch': 4.36} + 27%|██▋ | 101220/371472 [8:02:56<19:39:58, 3.82it/s] 27%|██▋ | 101221/371472 [8:02:56<19:27:44, 3.86it/s] 27%|██▋ | 101222/371472 [8:02:56<20:21:23, 3.69it/s] 27%|██▋ | 101223/371472 [8:02:57<21:45:28, 3.45it/s] 27%|██▋ | 101224/371472 [8:02:57<20:52:29, 3.60it/s] 27%|██▋ | 101225/371472 [8:02:57<21:04:19, 3.56it/s] 27%|██▋ | 101226/371472 [8:02:57<21:26:37, 3.50it/s] 27%|██▋ | 101227/371472 [8:02:58<20:56:47, 3.58it/s] 27%|██▋ | 101228/371472 [8:02:58<21:19:08, 3.52it/s] 27%|██▋ | 101229/371472 [8:02:58<22:14:18, 3.38it/s] 27%|██▋ | 101230/371472 [8:02:59<22:33:02, 3.33it/s] 27%|██▋ | 101231/371472 [8:02:59<21:36:56, 3.47it/s] 27%|██▋ | 101232/371472 [8:02:59<20:33:25, 3.65it/s] 27%|██▋ | 101233/371472 [8:02:59<20:58:02, 3.58it/s] 27%|██▋ | 101234/371472 [8:03:00<21:12:29, 3.54it/s] 27%|██▋ | 101235/371472 [8:03:00<21:02:26, 3.57it/s] 27%|██▋ | 101236/371472 [8:03:00<20:08:36, 3.73it/s] 27%|██▋ | 101237/371472 [8:03:00<20:28:00, 3.67it/s] 27%|██▋ | 101238/371472 [8:03:01<21:07:02, 3.55it/s] 27%|██▋ | 101239/371472 [8:03:01<20:54:10, 3.59it/s] 27%|██▋ | 101240/371472 [8:03:01<21:01:20, 3.57it/s] {'loss': 3.5307, 'learning_rate': 7.550690598806266e-07, 'epoch': 4.36} + 27%|██▋ | 101240/371472 [8:03:01<21:01:20, 3.57it/s] 27%|██▋ | 101241/371472 [8:03:02<20:53:35, 3.59it/s] 27%|██▋ | 101242/371472 [8:03:02<20:58:00, 3.58it/s] 27%|██▋ | 101243/371472 [8:03:02<21:32:51, 3.48it/s] 27%|██▋ | 101244/371472 [8:03:02<22:07:06, 3.39it/s] 27%|██▋ | 101245/371472 [8:03:03<22:44:40, 3.30it/s] 27%|██▋ | 101246/371472 [8:03:03<21:51:51, 3.43it/s] 27%|██▋ | 101247/371472 [8:03:03<22:49:14, 3.29it/s] 27%|██▋ | 101248/371472 [8:03:04<21:39:12, 3.47it/s] 27%|██▋ | 101249/371472 [8:03:04<20:49:03, 3.61it/s] 27%|██▋ | 101250/371472 [8:03:04<22:22:46, 3.35it/s] 27%|██▋ | 101251/371472 [8:03:05<23:09:35, 3.24it/s] 27%|██▋ | 101252/371472 [8:03:05<21:43:06, 3.46it/s] 27%|██▋ | 101253/371472 [8:03:05<21:18:28, 3.52it/s] 27%|██▋ | 101254/371472 [8:03:05<21:01:55, 3.57it/s] 27%|██▋ | 101255/371472 [8:03:06<21:11:18, 3.54it/s] 27%|██▋ | 101256/371472 [8:03:06<20:57:34, 3.58it/s] 27%|██▋ | 101257/371472 [8:03:06<21:02:12, 3.57it/s] 27%|██▋ | 101258/371472 [8:03:06<20:59:24, 3.58it/s] 27%|██▋ | 101259/371472 [8:03:07<20:53:01, 3.59it/s] 27%|██▋ | 101260/371472 [8:03:07<20:29:06, 3.66it/s] {'loss': 3.5444, 'learning_rate': 7.550205779051477e-07, 'epoch': 4.36} + 27%|██▋ | 101260/371472 [8:03:07<20:29:06, 3.66it/s] 27%|██▋ | 101261/371472 [8:03:07<21:08:50, 3.55it/s] 27%|██▋ | 101262/371472 [8:03:08<22:12:40, 3.38it/s] 27%|██▋ | 101263/371472 [8:03:08<21:54:19, 3.43it/s] 27%|██▋ | 101264/371472 [8:03:08<21:48:13, 3.44it/s] 27%|██▋ | 101265/371472 [8:03:09<21:17:45, 3.52it/s] 27%|██▋ | 101266/371472 [8:03:09<21:56:13, 3.42it/s] 27%|██▋ | 101267/371472 [8:03:09<20:59:56, 3.57it/s] 27%|██▋ | 101268/371472 [8:03:09<22:13:27, 3.38it/s] 27%|██▋ | 101269/371472 [8:03:10<22:13:07, 3.38it/s] 27%|██▋ | 101270/371472 [8:03:10<21:01:57, 3.57it/s] 27%|██▋ | 101271/371472 [8:03:10<20:26:11, 3.67it/s] 27%|██▋ | 101272/371472 [8:03:11<21:14:36, 3.53it/s] 27%|██▋ | 101273/371472 [8:03:11<20:46:28, 3.61it/s] 27%|██▋ | 101274/371472 [8:03:11<20:37:40, 3.64it/s] 27%|██▋ | 101275/371472 [8:03:11<20:03:53, 3.74it/s] 27%|██▋ | 101276/371472 [8:03:12<20:38:19, 3.64it/s] 27%|██▋ | 101277/371472 [8:03:12<20:14:37, 3.71it/s] 27%|██▋ | 101278/371472 [8:03:12<21:46:47, 3.45it/s] 27%|██▋ | 101279/371472 [8:03:12<20:57:16, 3.58it/s] 27%|██▋ | 101280/371472 [8:03:13<20:25:40, 3.67it/s] {'loss': 3.4748, 'learning_rate': 7.549720959296688e-07, 'epoch': 4.36} + 27%|██▋ | 101280/371472 [8:03:13<20:25:40, 3.67it/s] 27%|██▋ | 101281/371472 [8:03:13<20:05:30, 3.74it/s] 27%|██▋ | 101282/371472 [8:03:13<20:25:57, 3.67it/s] 27%|██▋ | 101283/371472 [8:03:13<19:51:25, 3.78it/s] 27%|██▋ | 101284/371472 [8:03:14<20:20:11, 3.69it/s] 27%|██▋ | 101285/371472 [8:03:14<21:25:40, 3.50it/s] 27%|██▋ | 101286/371472 [8:03:14<22:00:28, 3.41it/s] 27%|██▋ | 101287/371472 [8:03:15<21:55:06, 3.42it/s] 27%|██▋ | 101288/371472 [8:03:15<21:12:24, 3.54it/s] 27%|██▋ | 101289/371472 [8:03:15<23:11:23, 3.24it/s] 27%|██▋ | 101290/371472 [8:03:16<22:54:15, 3.28it/s] 27%|██▋ | 101291/371472 [8:03:16<21:46:44, 3.45it/s] 27%|██▋ | 101292/371472 [8:03:16<21:09:43, 3.55it/s] 27%|██▋ | 101293/371472 [8:03:16<20:17:50, 3.70it/s] 27%|██▋ | 101294/371472 [8:03:17<20:32:34, 3.65it/s] 27%|██▋ | 101295/371472 [8:03:17<20:35:59, 3.64it/s] 27%|██▋ | 101296/371472 [8:03:17<20:44:34, 3.62it/s] 27%|██▋ | 101297/371472 [8:03:18<21:44:50, 3.45it/s] 27%|██▋ | 101298/371472 [8:03:18<21:31:29, 3.49it/s] 27%|██▋ | 101299/371472 [8:03:18<21:06:38, 3.55it/s] 27%|██▋ | 101300/371472 [8:03:18<22:05:28, 3.40it/s] {'loss': 3.314, 'learning_rate': 7.549236139541898e-07, 'epoch': 4.36} + 27%|██▋ | 101300/371472 [8:03:18<22:05:28, 3.40it/s] 27%|██▋ | 101301/371472 [8:03:19<21:18:52, 3.52it/s] 27%|██▋ | 101302/371472 [8:03:19<21:24:39, 3.51it/s] 27%|██▋ | 101303/371472 [8:03:19<20:55:52, 3.59it/s] 27%|██▋ | 101304/371472 [8:03:19<21:05:22, 3.56it/s] 27%|██▋ | 101305/371472 [8:03:20<24:00:12, 3.13it/s] 27%|██▋ | 101306/371472 [8:03:20<23:49:50, 3.15it/s] 27%|██▋ | 101307/371472 [8:03:20<22:10:31, 3.38it/s] 27%|██▋ | 101308/371472 [8:03:21<23:12:21, 3.23it/s] 27%|██▋ | 101309/371472 [8:03:21<21:29:26, 3.49it/s] 27%|██▋ | 101310/371472 [8:03:21<20:44:25, 3.62it/s] 27%|██▋ | 101311/371472 [8:03:22<20:26:24, 3.67it/s] 27%|██▋ | 101312/371472 [8:03:22<20:29:12, 3.66it/s] 27%|██▋ | 101313/371472 [8:03:22<20:57:53, 3.58it/s] 27%|██▋ | 101314/371472 [8:03:22<20:43:16, 3.62it/s] 27%|██▋ | 101315/371472 [8:03:23<20:26:05, 3.67it/s] 27%|██▋ | 101316/371472 [8:03:23<19:37:07, 3.83it/s] 27%|██▋ | 101317/371472 [8:03:23<20:20:06, 3.69it/s] 27%|██▋ | 101318/371472 [8:03:23<20:51:22, 3.60it/s] 27%|██▋ | 101319/371472 [8:03:24<21:26:13, 3.50it/s] 27%|██▋ | 101320/371472 [8:03:24<20:43:26, 3.62it/s] {'loss': 3.4928, 'learning_rate': 7.54875131978711e-07, 'epoch': 4.36} + 27%|██▋ | 101320/371472 [8:03:24<20:43:26, 3.62it/s] 27%|██▋ | 101321/371472 [8:03:24<21:43:50, 3.45it/s] 27%|██▋ | 101322/371472 [8:03:25<22:52:46, 3.28it/s] 27%|██▋ | 101323/371472 [8:03:25<22:28:04, 3.34it/s] 27%|██▋ | 101324/371472 [8:03:25<22:00:28, 3.41it/s] 27%|██▋ | 101325/371472 [8:03:26<21:10:45, 3.54it/s] 27%|██▋ | 101326/371472 [8:03:26<22:07:22, 3.39it/s] 27%|██▋ | 101327/371472 [8:03:26<21:14:29, 3.53it/s] 27%|██▋ | 101328/371472 [8:03:26<20:51:28, 3.60it/s] 27%|██▋ | 101329/371472 [8:03:27<19:54:00, 3.77it/s] 27%|██▋ | 101330/371472 [8:03:27<20:21:22, 3.69it/s] 27%|██▋ | 101331/371472 [8:03:27<20:45:57, 3.61it/s] 27%|██▋ | 101332/371472 [8:03:27<20:23:48, 3.68it/s] 27%|██▋ | 101333/371472 [8:03:28<20:46:10, 3.61it/s] 27%|██▋ | 101334/371472 [8:03:28<21:12:08, 3.54it/s] 27%|██▋ | 101335/371472 [8:03:28<21:09:08, 3.55it/s] 27%|██▋ | 101336/371472 [8:03:29<21:08:32, 3.55it/s] 27%|██▋ | 101337/371472 [8:03:29<20:07:05, 3.73it/s] 27%|██▋ | 101338/371472 [8:03:29<20:13:21, 3.71it/s] 27%|██▋ | 101339/371472 [8:03:29<20:29:37, 3.66it/s] 27%|██▋ | 101340/371472 [8:03:30<20:02:45, 3.74it/s] {'loss': 3.5253, 'learning_rate': 7.548266500032321e-07, 'epoch': 4.36} + 27%|██▋ | 101340/371472 [8:03:30<20:02:45, 3.74it/s] 27%|██▋ | 101341/371472 [8:03:30<20:24:41, 3.68it/s] 27%|██▋ | 101342/371472 [8:03:30<20:23:39, 3.68it/s] 27%|██▋ | 101343/371472 [8:03:30<20:16:43, 3.70it/s] 27%|██▋ | 101344/371472 [8:03:31<20:42:19, 3.62it/s] 27%|██▋ | 101345/371472 [8:03:31<20:46:48, 3.61it/s] 27%|██▋ | 101346/371472 [8:03:31<20:24:28, 3.68it/s] 27%|██▋ | 101347/371472 [8:03:32<21:18:10, 3.52it/s] 27%|██▋ | 101348/371472 [8:03:32<21:09:08, 3.55it/s] 27%|██▋ | 101349/371472 [8:03:32<20:32:26, 3.65it/s] 27%|██▋ | 101350/371472 [8:03:32<20:08:58, 3.72it/s] 27%|██▋ | 101351/371472 [8:03:33<19:41:02, 3.81it/s] 27%|██▋ | 101352/371472 [8:03:33<19:26:02, 3.86it/s] 27%|██▋ | 101353/371472 [8:03:33<19:58:25, 3.76it/s] 27%|██▋ | 101354/371472 [8:03:33<19:28:31, 3.85it/s] 27%|██▋ | 101355/371472 [8:03:34<19:18:27, 3.89it/s] 27%|██▋ | 101356/371472 [8:03:34<19:45:12, 3.80it/s] 27%|██▋ | 101357/371472 [8:03:34<19:43:55, 3.80it/s] 27%|██▋ | 101358/371472 [8:03:34<19:55:11, 3.77it/s] 27%|██▋ | 101359/371472 [8:03:35<19:51:06, 3.78it/s] 27%|██▋ | 101360/371472 [8:03:35<19:52:21, 3.78it/s] {'loss': 3.4639, 'learning_rate': 7.547781680277532e-07, 'epoch': 4.37} + 27%|██▋ | 101360/371472 [8:03:35<19:52:21, 3.78it/s] 27%|██▋ | 101361/371472 [8:03:35<19:52:02, 3.78it/s] 27%|██▋ | 101362/371472 [8:03:36<20:52:23, 3.59it/s] 27%|██▋ | 101363/371472 [8:03:36<21:29:37, 3.49it/s] 27%|██▋ | 101364/371472 [8:03:36<23:05:27, 3.25it/s] 27%|██▋ | 101365/371472 [8:03:37<22:24:44, 3.35it/s] 27%|██▋ | 101366/371472 [8:03:37<22:49:07, 3.29it/s] 27%|██▋ | 101367/371472 [8:03:37<23:15:01, 3.23it/s] 27%|██▋ | 101368/371472 [8:03:37<22:09:21, 3.39it/s] 27%|██▋ | 101369/371472 [8:03:38<21:06:45, 3.55it/s] 27%|██▋ | 101370/371472 [8:03:38<22:35:16, 3.32it/s] 27%|██▋ | 101371/371472 [8:03:38<22:12:26, 3.38it/s] 27%|██▋ | 101372/371472 [8:03:39<21:17:42, 3.52it/s] 27%|██▋ | 101373/371472 [8:03:39<21:03:25, 3.56it/s] 27%|██▋ | 101374/371472 [8:03:39<22:00:49, 3.41it/s] 27%|██▋ | 101375/371472 [8:03:39<21:55:20, 3.42it/s] 27%|██▋ | 101376/371472 [8:03:40<21:03:24, 3.56it/s] 27%|██▋ | 101377/371472 [8:03:40<20:22:30, 3.68it/s] 27%|██▋ | 101378/371472 [8:03:40<19:36:59, 3.82it/s] 27%|██▋ | 101379/371472 [8:03:41<21:16:05, 3.53it/s] 27%|██▋ | 101380/371472 [8:03:41<20:48:48, 3.60it/s] {'loss': 3.4132, 'learning_rate': 7.547296860522743e-07, 'epoch': 4.37} + 27%|██▋ | 101380/371472 [8:03:41<20:48:48, 3.60it/s] 27%|██▋ | 101381/371472 [8:03:41<21:15:04, 3.53it/s] 27%|██▋ | 101382/371472 [8:03:41<20:42:04, 3.62it/s] 27%|██▋ | 101383/371472 [8:03:42<20:42:43, 3.62it/s] 27%|██▋ | 101384/371472 [8:03:42<21:15:07, 3.53it/s] 27%|██▋ | 101385/371472 [8:03:43<30:34:35, 2.45it/s] 27%|██▋ | 101386/371472 [8:03:43<27:33:46, 2.72it/s] 27%|██▋ | 101387/371472 [8:03:43<26:07:54, 2.87it/s] 27%|██▋ | 101388/371472 [8:03:43<24:18:34, 3.09it/s] 27%|██▋ | 101389/371472 [8:03:44<22:20:21, 3.36it/s] 27%|██▋ | 101390/371472 [8:03:44<26:29:55, 2.83it/s] 27%|██▋ | 101391/371472 [8:03:45<26:27:13, 2.84it/s] 27%|██▋ | 101392/371472 [8:03:45<24:33:37, 3.05it/s] 27%|██▋ | 101393/371472 [8:03:45<23:24:45, 3.20it/s] 27%|██▋ | 101394/371472 [8:03:45<24:01:52, 3.12it/s] 27%|██▋ | 101395/371472 [8:03:46<22:26:39, 3.34it/s] 27%|██▋ | 101396/371472 [8:03:46<23:43:38, 3.16it/s] 27%|██▋ | 101397/371472 [8:03:46<23:40:56, 3.17it/s] 27%|██▋ | 101398/371472 [8:03:47<23:55:24, 3.14it/s] 27%|██▋ | 101399/371472 [8:03:47<22:46:21, 3.29it/s] 27%|██▋ | 101400/371472 [8:03:47<22:13:34, 3.38it/s] {'loss': 3.4828, 'learning_rate': 7.546812040767955e-07, 'epoch': 4.37} + 27%|██▋ | 101400/371472 [8:03:47<22:13:34, 3.38it/s] 27%|██▋ | 101401/371472 [8:03:47<22:03:23, 3.40it/s] 27%|██▋ | 101402/371472 [8:03:48<22:07:47, 3.39it/s] 27%|██▋ | 101403/371472 [8:03:48<20:53:33, 3.59it/s] 27%|██▋ | 101404/371472 [8:03:48<20:39:07, 3.63it/s] 27%|██▋ | 101405/371472 [8:03:49<21:00:56, 3.57it/s] 27%|██▋ | 101406/371472 [8:03:49<21:19:43, 3.52it/s] 27%|██▋ | 101407/371472 [8:03:49<20:57:29, 3.58it/s] 27%|██▋ | 101408/371472 [8:03:49<22:00:38, 3.41it/s] 27%|██▋ | 101409/371472 [8:03:50<21:37:13, 3.47it/s] 27%|██▋ | 101410/371472 [8:03:50<22:20:35, 3.36it/s] 27%|██▋ | 101411/371472 [8:03:50<21:06:26, 3.55it/s] 27%|██▋ | 101412/371472 [8:03:51<20:36:30, 3.64it/s] 27%|██▋ | 101413/371472 [8:03:51<21:52:08, 3.43it/s] 27%|██▋ | 101414/371472 [8:03:51<22:43:38, 3.30it/s] 27%|██▋ | 101415/371472 [8:03:51<21:28:34, 3.49it/s] 27%|██▋ | 101416/371472 [8:03:52<21:22:49, 3.51it/s] 27%|██▋ | 101417/371472 [8:03:52<20:46:15, 3.61it/s] 27%|██▋ | 101418/371472 [8:03:52<20:03:35, 3.74it/s] 27%|██▋ | 101419/371472 [8:03:53<20:32:47, 3.65it/s] 27%|██▋ | 101420/371472 [8:03:53<20:05:14, 3.73it/s] {'loss': 3.5509, 'learning_rate': 7.546327221013165e-07, 'epoch': 4.37} + 27%|██▋ | 101420/371472 [8:03:53<20:05:14, 3.73it/s] 27%|██▋ | 101421/371472 [8:03:53<19:55:59, 3.76it/s] 27%|██▋ | 101422/371472 [8:03:53<19:36:00, 3.83it/s] 27%|██▋ | 101423/371472 [8:03:54<19:42:47, 3.81it/s] 27%|██▋ | 101424/371472 [8:03:54<19:46:33, 3.79it/s] 27%|██▋ | 101425/371472 [8:03:54<19:06:13, 3.93it/s] 27%|██▋ | 101426/371472 [8:03:54<19:35:02, 3.83it/s] 27%|██▋ | 101427/371472 [8:03:55<19:55:34, 3.76it/s] 27%|██▋ | 101428/371472 [8:03:55<19:32:29, 3.84it/s] 27%|██▋ | 101429/371472 [8:03:55<19:18:02, 3.89it/s] 27%|██▋ | 101430/371472 [8:03:55<19:18:58, 3.88it/s] 27%|██▋ | 101431/371472 [8:03:56<21:19:32, 3.52it/s] 27%|██▋ | 101432/371472 [8:03:56<22:34:19, 3.32it/s] 27%|██▋ | 101433/371472 [8:03:56<21:21:15, 3.51it/s] 27%|██▋ | 101434/371472 [8:03:57<20:57:16, 3.58it/s] 27%|██▋ | 101435/371472 [8:03:57<20:36:55, 3.64it/s] 27%|██▋ | 101436/371472 [8:03:57<20:22:04, 3.68it/s] 27%|██▋ | 101437/371472 [8:03:57<19:39:50, 3.81it/s] 27%|██▋ | 101438/371472 [8:03:58<19:13:56, 3.90it/s] 27%|██▋ | 101439/371472 [8:03:58<19:34:38, 3.83it/s] 27%|██▋ | 101440/371472 [8:03:58<19:44:41, 3.80it/s] {'loss': 3.6032, 'learning_rate': 7.545842401258376e-07, 'epoch': 4.37} + 27%|██▋ | 101440/371472 [8:03:58<19:44:41, 3.80it/s] 27%|██▋ | 101441/371472 [8:03:58<21:32:23, 3.48it/s] 27%|██▋ | 101442/371472 [8:03:59<23:10:32, 3.24it/s] 27%|██▋ | 101443/371472 [8:03:59<21:53:08, 3.43it/s] 27%|██▋ | 101444/371472 [8:03:59<21:16:50, 3.52it/s] 27%|██▋ | 101445/371472 [8:04:00<20:30:36, 3.66it/s] 27%|██▋ | 101446/371472 [8:04:00<20:44:28, 3.62it/s] 27%|██▋ | 101447/371472 [8:04:00<20:11:26, 3.71it/s] 27%|██▋ | 101448/371472 [8:04:00<19:49:09, 3.78it/s] 27%|██▋ | 101449/371472 [8:04:01<20:15:51, 3.70it/s] 27%|██▋ | 101450/371472 [8:04:01<19:25:34, 3.86it/s] 27%|██▋ | 101451/371472 [8:04:01<20:06:20, 3.73it/s] 27%|██▋ | 101452/371472 [8:04:01<20:19:54, 3.69it/s] 27%|██▋ | 101453/371472 [8:04:02<20:22:08, 3.68it/s] 27%|██▋ | 101454/371472 [8:04:02<20:56:58, 3.58it/s] 27%|██▋ | 101455/371472 [8:04:02<19:58:37, 3.75it/s] 27%|██▋ | 101456/371472 [8:04:03<20:43:28, 3.62it/s] 27%|██▋ | 101457/371472 [8:04:03<20:34:34, 3.65it/s] 27%|██▋ | 101458/371472 [8:04:03<20:40:16, 3.63it/s] 27%|██▋ | 101459/371472 [8:04:03<20:38:23, 3.63it/s] 27%|██▋ | 101460/371472 [8:04:04<20:32:20, 3.65it/s] {'loss': 3.6337, 'learning_rate': 7.545357581503587e-07, 'epoch': 4.37} + 27%|██▋ | 101460/371472 [8:04:04<20:32:20, 3.65it/s] 27%|██▋ | 101461/371472 [8:04:04<20:44:52, 3.61it/s] 27%|██▋ | 101462/371472 [8:04:04<20:52:33, 3.59it/s] 27%|██▋ | 101463/371472 [8:04:05<21:16:42, 3.52it/s] 27%|██▋ | 101464/371472 [8:04:05<21:16:49, 3.52it/s] 27%|██▋ | 101465/371472 [8:04:05<22:51:59, 3.28it/s] 27%|██▋ | 101466/371472 [8:04:05<22:34:38, 3.32it/s] 27%|██▋ | 101467/371472 [8:04:06<22:03:34, 3.40it/s] 27%|██▋ | 101468/371472 [8:04:06<22:53:09, 3.28it/s] 27%|██▋ | 101469/371472 [8:04:06<22:25:39, 3.34it/s] 27%|██▋ | 101470/371472 [8:04:07<22:54:45, 3.27it/s] 27%|██▋ | 101471/371472 [8:04:07<22:36:44, 3.32it/s] 27%|██▋ | 101472/371472 [8:04:07<22:01:10, 3.41it/s] 27%|██▋ | 101473/371472 [8:04:08<21:27:05, 3.50it/s] 27%|██▋ | 101474/371472 [8:04:08<21:08:07, 3.55it/s] 27%|██▋ | 101475/371472 [8:04:08<21:16:50, 3.52it/s] 27%|██▋ | 101476/371472 [8:04:08<22:47:07, 3.29it/s] 27%|██▋ | 101477/371472 [8:04:09<23:15:18, 3.23it/s] 27%|██▋ | 101478/371472 [8:04:09<23:44:14, 3.16it/s] 27%|██▋ | 101479/371472 [8:04:09<23:03:27, 3.25it/s] 27%|██▋ | 101480/371472 [8:04:10<22:40:35, 3.31it/s] {'loss': 3.51, 'learning_rate': 7.544872761748798e-07, 'epoch': 4.37} + 27%|██▋ | 101480/371472 [8:04:10<22:40:35, 3.31it/s] 27%|██▋ | 101481/371472 [8:04:10<23:05:54, 3.25it/s] 27%|██▋ | 101482/371472 [8:04:10<22:05:20, 3.40it/s] 27%|██▋ | 101483/371472 [8:04:11<23:27:10, 3.20it/s] 27%|██▋ | 101484/371472 [8:04:11<22:33:26, 3.32it/s] 27%|██▋ | 101485/371472 [8:04:11<23:05:40, 3.25it/s] 27%|██▋ | 101486/371472 [8:04:11<22:38:41, 3.31it/s] 27%|██▋ | 101487/371472 [8:04:12<21:38:58, 3.46it/s] 27%|██▋ | 101488/371472 [8:04:12<23:16:53, 3.22it/s] 27%|██▋ | 101489/371472 [8:04:12<22:34:51, 3.32it/s] 27%|██▋ | 101490/371472 [8:04:13<23:10:16, 3.24it/s] 27%|██▋ | 101491/371472 [8:04:13<22:45:38, 3.29it/s] 27%|██▋ | 101492/371472 [8:04:13<22:02:17, 3.40it/s] 27%|██▋ | 101493/371472 [8:04:14<21:42:03, 3.46it/s] 27%|██▋ | 101494/371472 [8:04:14<21:13:53, 3.53it/s] 27%|██▋ | 101495/371472 [8:04:14<20:42:28, 3.62it/s] 27%|██▋ | 101496/371472 [8:04:14<20:12:02, 3.71it/s] 27%|██▋ | 101497/371472 [8:04:15<19:56:12, 3.76it/s] 27%|██▋ | 101498/371472 [8:04:15<20:28:47, 3.66it/s] 27%|██▋ | 101499/371472 [8:04:15<20:32:32, 3.65it/s] 27%|██▋ | 101500/371472 [8:04:15<20:50:34, 3.60it/s] {'loss': 3.5567, 'learning_rate': 7.54438794199401e-07, 'epoch': 4.37} + 27%|██▋ | 101500/371472 [8:04:15<20:50:34, 3.60it/s] 27%|██▋ | 101501/371472 [8:04:16<21:10:13, 3.54it/s] 27%|██▋ | 101502/371472 [8:04:16<21:49:51, 3.44it/s] 27%|██▋ | 101503/371472 [8:04:16<21:45:35, 3.45it/s] 27%|██▋ | 101504/371472 [8:04:17<21:44:46, 3.45it/s] 27%|██▋ | 101505/371472 [8:04:17<22:21:09, 3.35it/s] 27%|██▋ | 101506/371472 [8:04:17<21:59:46, 3.41it/s] 27%|██▋ | 101507/371472 [8:04:17<21:21:19, 3.51it/s] 27%|██▋ | 101508/371472 [8:04:18<20:36:15, 3.64it/s] 27%|██▋ | 101509/371472 [8:04:18<20:05:58, 3.73it/s] 27%|██▋ | 101510/371472 [8:04:18<21:01:18, 3.57it/s] 27%|██▋ | 101511/371472 [8:04:19<20:09:07, 3.72it/s] 27%|██▋ | 101512/371472 [8:04:19<20:11:35, 3.71it/s] 27%|██▋ | 101513/371472 [8:04:19<20:02:02, 3.74it/s] 27%|██▋ | 101514/371472 [8:04:19<21:35:50, 3.47it/s] 27%|██▋ | 101515/371472 [8:04:20<20:46:29, 3.61it/s] 27%|██▋ | 101516/371472 [8:04:20<20:30:08, 3.66it/s] 27%|██▋ | 101517/371472 [8:04:20<20:25:25, 3.67it/s] 27%|██▋ | 101518/371472 [8:04:20<19:39:47, 3.81it/s] 27%|██▋ | 101519/371472 [8:04:21<20:24:57, 3.67it/s] 27%|██▋ | 101520/371472 [8:04:21<20:24:42, 3.67it/s] {'loss': 3.5941, 'learning_rate': 7.543903122239221e-07, 'epoch': 4.37} + 27%|██▋ | 101520/371472 [8:04:21<20:24:42, 3.67it/s] 27%|██▋ | 101521/371472 [8:04:21<21:04:29, 3.56it/s] 27%|██▋ | 101522/371472 [8:04:22<21:00:44, 3.57it/s] 27%|██▋ | 101523/371472 [8:04:22<20:29:17, 3.66it/s] 27%|██▋ | 101524/371472 [8:04:22<21:34:56, 3.47it/s] 27%|██▋ | 101525/371472 [8:04:22<21:44:13, 3.45it/s] 27%|██▋ | 101526/371472 [8:04:23<20:23:35, 3.68it/s] 27%|██▋ | 101527/371472 [8:04:23<21:37:17, 3.47it/s] 27%|██▋ | 101528/371472 [8:04:23<20:43:16, 3.62it/s] 27%|██▋ | 101529/371472 [8:04:24<20:02:58, 3.74it/s] 27%|██▋ | 101530/371472 [8:04:24<20:23:49, 3.68it/s] 27%|██▋ | 101531/371472 [8:04:24<19:49:57, 3.78it/s] 27%|██▋ | 101532/371472 [8:04:24<22:14:01, 3.37it/s] 27%|██▋ | 101533/371472 [8:04:25<21:08:08, 3.55it/s] 27%|██▋ | 101534/371472 [8:04:25<20:30:44, 3.66it/s] 27%|██▋ | 101535/371472 [8:04:25<20:35:35, 3.64it/s] 27%|██▋ | 101536/371472 [8:04:25<20:37:16, 3.64it/s] 27%|██▋ | 101537/371472 [8:04:26<19:45:35, 3.79it/s] 27%|██▋ | 101538/371472 [8:04:26<19:18:35, 3.88it/s] 27%|██▋ | 101539/371472 [8:04:26<19:58:54, 3.75it/s] 27%|██▋ | 101540/371472 [8:04:27<20:17:44, 3.69it/s] {'loss': 3.6253, 'learning_rate': 7.543418302484431e-07, 'epoch': 4.37} + 27%|██▋ | 101540/371472 [8:04:27<20:17:44, 3.69it/s] 27%|██▋ | 101541/371472 [8:04:27<19:51:41, 3.78it/s] 27%|██▋ | 101542/371472 [8:04:27<20:11:11, 3.71it/s] 27%|██▋ | 101543/371472 [8:04:27<20:53:49, 3.59it/s] 27%|██▋ | 101544/371472 [8:04:28<20:25:54, 3.67it/s] 27%|██▋ | 101545/371472 [8:04:28<21:00:57, 3.57it/s] 27%|██▋ | 101546/371472 [8:04:28<21:09:21, 3.54it/s] 27%|██▋ | 101547/371472 [8:04:28<21:13:54, 3.53it/s] 27%|██▋ | 101548/371472 [8:04:29<20:23:53, 3.68it/s] 27%|██▋ | 101549/371472 [8:04:29<20:07:27, 3.73it/s] 27%|██▋ | 101550/371472 [8:04:29<20:01:50, 3.74it/s] 27%|██▋ | 101551/371472 [8:04:30<20:53:14, 3.59it/s] 27%|██▋ | 101552/371472 [8:04:30<20:46:19, 3.61it/s] 27%|██▋ | 101553/371472 [8:04:30<20:46:44, 3.61it/s] 27%|██▋ | 101554/371472 [8:04:30<20:34:29, 3.64it/s] 27%|██▋ | 101555/371472 [8:04:31<20:09:52, 3.72it/s] 27%|██▋ | 101556/371472 [8:04:31<20:01:52, 3.74it/s] 27%|██▋ | 101557/371472 [8:04:31<19:38:56, 3.82it/s] 27%|██▋ | 101558/371472 [8:04:31<20:10:08, 3.72it/s] 27%|██▋ | 101559/371472 [8:04:32<20:13:54, 3.71it/s] 27%|██▋ | 101560/371472 [8:04:32<19:45:03, 3.80it/s] {'loss': 3.6321, 'learning_rate': 7.542933482729642e-07, 'epoch': 4.37} + 27%|██▋ | 101560/371472 [8:04:32<19:45:03, 3.80it/s] 27%|██▋ | 101561/371472 [8:04:32<20:48:51, 3.60it/s] 27%|██▋ | 101562/371472 [8:04:33<20:58:35, 3.57it/s] 27%|██▋ | 101563/371472 [8:04:33<21:39:34, 3.46it/s] 27%|██▋ | 101564/371472 [8:04:33<20:55:07, 3.58it/s] 27%|██▋ | 101565/371472 [8:04:33<20:55:09, 3.58it/s] 27%|██▋ | 101566/371472 [8:04:34<19:58:24, 3.75it/s] 27%|██▋ | 101567/371472 [8:04:34<20:29:33, 3.66it/s] 27%|██▋ | 101568/371472 [8:04:34<19:42:19, 3.80it/s] 27%|██▋ | 101569/371472 [8:04:34<19:52:01, 3.77it/s] 27%|█���▋ | 101570/371472 [8:04:35<19:53:00, 3.77it/s] 27%|██▋ | 101571/371472 [8:04:35<19:43:42, 3.80it/s] 27%|██▋ | 101572/371472 [8:04:35<19:57:13, 3.76it/s] 27%|██▋ | 101573/371472 [8:04:36<20:50:46, 3.60it/s] 27%|██▋ | 101574/371472 [8:04:36<20:13:24, 3.71it/s] 27%|██▋ | 101575/371472 [8:04:36<20:39:08, 3.63it/s] 27%|██▋ | 101576/371472 [8:04:36<20:29:57, 3.66it/s] 27%|██▋ | 101577/371472 [8:04:37<20:09:31, 3.72it/s] 27%|██▋ | 101578/371472 [8:04:37<20:03:40, 3.74it/s] 27%|██▋ | 101579/371472 [8:04:37<20:29:26, 3.66it/s] 27%|██▋ | 101580/371472 [8:04:37<20:24:02, 3.67it/s] {'loss': 3.685, 'learning_rate': 7.542448662974854e-07, 'epoch': 4.38} + 27%|██▋ | 101580/371472 [8:04:37<20:24:02, 3.67it/s] 27%|██▋ | 101581/371472 [8:04:38<20:05:10, 3.73it/s] 27%|██▋ | 101582/371472 [8:04:38<19:32:52, 3.84it/s] 27%|██▋ | 101583/371472 [8:04:38<19:14:31, 3.90it/s] 27%|██▋ | 101584/371472 [8:04:38<19:37:13, 3.82it/s] 27%|██▋ | 101585/371472 [8:04:39<19:01:04, 3.94it/s] 27%|██▋ | 101586/371472 [8:04:39<19:32:06, 3.84it/s] 27%|██▋ | 101587/371472 [8:04:39<19:46:54, 3.79it/s] 27%|██▋ | 101588/371472 [8:04:40<20:46:33, 3.61it/s] 27%|██▋ | 101589/371472 [8:04:40<19:47:59, 3.79it/s] 27%|██▋ | 101590/371472 [8:04:40<20:20:03, 3.69it/s] 27%|██▋ | 101591/371472 [8:04:40<21:28:31, 3.49it/s] 27%|██▋ | 101592/371472 [8:04:41<22:01:39, 3.40it/s] 27%|██▋ | 101593/371472 [8:04:41<22:02:18, 3.40it/s] 27%|██▋ | 101594/371472 [8:04:41<21:53:43, 3.42it/s] 27%|██▋ | 101595/371472 [8:04:42<21:29:14, 3.49it/s] 27%|██▋ | 101596/371472 [8:04:42<21:28:43, 3.49it/s] 27%|██▋ | 101597/371472 [8:04:42<20:43:33, 3.62it/s] 27%|██▋ | 101598/371472 [8:04:42<20:44:43, 3.61it/s] 27%|██▋ | 101599/371472 [8:04:43<21:53:30, 3.42it/s] 27%|██▋ | 101600/371472 [8:04:43<20:57:10, 3.58it/s] {'loss': 3.5317, 'learning_rate': 7.541963843220065e-07, 'epoch': 4.38} + 27%|██▋ | 101600/371472 [8:04:43<20:57:10, 3.58it/s] 27%|██▋ | 101601/371472 [8:04:43<20:36:42, 3.64it/s] 27%|██▋ | 101602/371472 [8:04:43<20:41:42, 3.62it/s] 27%|██▋ | 101603/371472 [8:04:44<19:54:09, 3.77it/s] 27%|██▋ | 101604/371472 [8:04:44<19:20:58, 3.87it/s] 27%|██▋ | 101605/371472 [8:04:44<20:00:40, 3.75it/s] 27%|██▋ | 101606/371472 [8:04:44<19:25:53, 3.86it/s] 27%|██▋ | 101607/371472 [8:04:45<20:39:27, 3.63it/s] 27%|██▋ | 101608/371472 [8:04:45<20:44:02, 3.62it/s] 27%|██▋ | 101609/371472 [8:04:45<20:14:15, 3.70it/s] 27%|██▋ | 101610/371472 [8:04:46<20:52:11, 3.59it/s] 27%|██▋ | 101611/371472 [8:04:46<22:04:06, 3.40it/s] 27%|██▋ | 101612/371472 [8:04:46<21:34:49, 3.47it/s] 27%|██▋ | 101613/371472 [8:04:47<23:03:10, 3.25it/s] 27%|██▋ | 101614/371472 [8:04:47<21:48:15, 3.44it/s] 27%|██▋ | 101615/371472 [8:04:47<21:21:47, 3.51it/s] 27%|██▋ | 101616/371472 [8:04:47<21:11:26, 3.54it/s] 27%|██▋ | 101617/371472 [8:04:48<21:10:59, 3.54it/s] 27%|██▋ | 101618/371472 [8:04:48<22:27:51, 3.34it/s] 27%|██▋ | 101619/371472 [8:04:48<22:27:19, 3.34it/s] 27%|██▋ | 101620/371472 [8:04:49<22:41:24, 3.30it/s] {'loss': 3.3984, 'learning_rate': 7.541479023465276e-07, 'epoch': 4.38} + 27%|██▋ | 101620/371472 [8:04:49<22:41:24, 3.30it/s] 27%|██▋ | 101621/371472 [8:04:49<22:57:20, 3.27it/s] 27%|██▋ | 101622/371472 [8:04:49<22:26:17, 3.34it/s] 27%|██▋ | 101623/371472 [8:04:49<21:39:08, 3.46it/s] 27%|██▋ | 101624/371472 [8:04:50<21:30:14, 3.49it/s] 27%|██▋ | 101625/371472 [8:04:50<22:22:34, 3.35it/s] 27%|██▋ | 101626/371472 [8:04:50<22:49:11, 3.28it/s] 27%|██▋ | 101627/371472 [8:04:51<21:26:25, 3.50it/s] 27%|██▋ | 101628/371472 [8:04:51<20:49:12, 3.60it/s] 27%|██▋ | 101629/371472 [8:04:51<20:35:49, 3.64it/s] 27%|██▋ | 101630/371472 [8:04:51<20:00:46, 3.75it/s] 27%|██▋ | 101631/371472 [8:04:52<20:10:14, 3.72it/s] 27%|██▋ | 101632/371472 [8:04:52<21:44:52, 3.45it/s] 27%|██▋ | 101633/371472 [8:04:52<20:35:42, 3.64it/s] 27%|██▋ | 101634/371472 [8:04:53<20:08:21, 3.72it/s] 27%|██▋ | 101635/371472 [8:04:53<20:28:08, 3.66it/s] 27%|██▋ | 101636/371472 [8:04:53<21:04:54, 3.56it/s] 27%|██▋ | 101637/371472 [8:04:53<21:24:41, 3.50it/s] 27%|██▋ | 101638/371472 [8:04:54<20:39:37, 3.63it/s] 27%|██▋ | 101639/371472 [8:04:54<20:15:49, 3.70it/s] 27%|██▋ | 101640/371472 [8:04:54<20:14:14, 3.70it/s] {'loss': 3.6187, 'learning_rate': 7.540994203710488e-07, 'epoch': 4.38} + 27%|██▋ | 101640/371472 [8:04:54<20:14:14, 3.70it/s] 27%|██▋ | 101641/371472 [8:04:54<20:17:05, 3.70it/s] 27%|██▋ | 101642/371472 [8:04:55<19:57:06, 3.76it/s] 27%|██▋ | 101643/371472 [8:04:55<20:55:10, 3.58it/s] 27%|██▋ | 101644/371472 [8:04:55<20:33:20, 3.65it/s] 27%|██▋ | 101645/371472 [8:04:56<20:38:02, 3.63it/s] 27%|██▋ | 101646/371472 [8:04:56<20:17:27, 3.69it/s] 27%|██▋ | 101647/371472 [8:04:56<19:42:13, 3.80it/s] 27%|██▋ | 101648/371472 [8:04:56<19:42:26, 3.80it/s] 27%|██▋ | 101649/371472 [8:04:57<21:14:06, 3.53it/s] 27%|██▋ | 101650/371472 [8:04:57<22:35:12, 3.32it/s] 27%|██▋ | 101651/371472 [8:04:57<21:45:14, 3.45it/s] 27%|██▋ | 101652/371472 [8:04:58<21:11:20, 3.54it/s] 27%|██▋ | 101653/371472 [8:04:58<21:03:07, 3.56it/s] 27%|██▋ | 101654/371472 [8:04:58<20:58:41, 3.57it/s] 27%|██▋ | 101655/371472 [8:04:58<23:26:27, 3.20it/s] 27%|██▋ | 101656/371472 [8:04:59<21:55:22, 3.42it/s] 27%|██▋ | 101657/371472 [8:04:59<21:54:31, 3.42it/s] 27%|██▋ | 101658/371472 [8:04:59<21:09:49, 3.54it/s] 27%|██▋ | 101659/371472 [8:05:00<20:33:19, 3.65it/s] 27%|██▋ | 101660/371472 [8:05:00<22:16:32, 3.36it/s] {'loss': 3.5354, 'learning_rate': 7.540509383955698e-07, 'epoch': 4.38} + 27%|██▋ | 101660/371472 [8:05:00<22:16:32, 3.36it/s] 27%|██▋ | 101661/371472 [8:05:00<22:05:12, 3.39it/s] 27%|██▋ | 101662/371472 [8:05:00<21:12:44, 3.53it/s] 27%|██▋ | 101663/371472 [8:05:01<21:08:19, 3.55it/s] 27%|██▋ | 101664/371472 [8:05:01<21:07:33, 3.55it/s] 27%|██▋ | 101665/371472 [8:05:01<20:38:13, 3.63it/s] 27%|██▋ | 101666/371472 [8:05:02<19:59:32, 3.75it/s] 27%|██▋ | 101667/371472 [8:05:02<19:44:50, 3.80it/s] 27%|██▋ | 101668/371472 [8:05:02<20:39:37, 3.63it/s] 27%|██▋ | 101669/371472 [8:05:02<20:12:15, 3.71it/s] 27%|██▋ | 101670/371472 [8:05:03<19:32:38, 3.83it/s] 27%|██▋ | 101671/371472 [8:05:03<20:41:42, 3.62it/s] 27%|██▋ | 101672/371472 [8:05:03<21:01:16, 3.57it/s] 27%|██▋ | 101673/371472 [8:05:03<20:22:39, 3.68it/s] 27%|██▋ | 101674/371472 [8:05:04<20:03:40, 3.74it/s] 27%|██▋ | 101675/371472 [8:05:04<20:16:03, 3.70it/s] 27%|██▋ | 101676/371472 [8:05:04<20:12:42, 3.71it/s] 27%|██▋ | 101677/371472 [8:05:05<21:00:58, 3.57it/s] 27%|██▋ | 101678/371472 [8:05:05<21:01:20, 3.56it/s] 27%|██▋ | 101679/371472 [8:05:05<21:43:49, 3.45it/s] 27%|██▋ | 101680/371472 [8:05:05<20:42:25, 3.62it/s] {'loss': 3.5014, 'learning_rate': 7.540024564200909e-07, 'epoch': 4.38} + 27%|██▋ | 101680/371472 [8:05:05<20:42:25, 3.62it/s] 27%|██▋ | 101681/371472 [8:05:06<20:15:41, 3.70it/s] 27%|██▋ | 101682/371472 [8:05:06<20:56:05, 3.58it/s] 27%|██▋ | 101683/371472 [8:05:06<21:00:37, 3.57it/s] 27%|██▋ | 101684/371472 [8:05:07<23:59:18, 3.12it/s] 27%|██▋ | 101685/371472 [8:05:07<22:57:51, 3.26it/s] 27%|██▋ | 101686/371472 [8:05:07<22:39:53, 3.31it/s] 27%|██▋ | 101687/371472 [8:05:07<22:19:05, 3.36it/s] 27%|██▋ | 101688/371472 [8:05:08<21:33:25, 3.48it/s] 27%|██▋ | 101689/371472 [8:05:08<21:03:08, 3.56it/s] 27%|██▋ | 101690/371472 [8:05:08<21:22:18, 3.51it/s] 27%|██▋ | 101691/371472 [8:05:09<20:43:08, 3.62it/s] 27%|██▋ | 101692/371472 [8:05:09<20:53:54, 3.59it/s] 27%|██▋ | 101693/371472 [8:05:09<23:41:06, 3.16it/s] 27%|██▋ | 101694/371472 [8:05:10<24:11:56, 3.10it/s] 27%|██▋ | 101695/371472 [8:05:10<22:51:54, 3.28it/s] 27%|██▋ | 101696/371472 [8:05:10<22:56:56, 3.27it/s] 27%|██▋ | 101697/371472 [8:05:10<21:30:11, 3.48it/s] 27%|██▋ | 101698/371472 [8:05:11<22:21:53, 3.35it/s] 27%|██▋ | 101699/371472 [8:05:11<21:14:00, 3.53it/s] 27%|██▋ | 101700/371472 [8:05:11<21:35:59, 3.47it/s] {'loss': 3.5726, 'learning_rate': 7.53953974444612e-07, 'epoch': 4.38} + 27%|██▋ | 101700/371472 [8:05:11<21:35:59, 3.47it/s] 27%|██▋ | 101701/371472 [8:05:12<23:23:14, 3.20it/s] 27%|██▋ | 101702/371472 [8:05:12<22:16:58, 3.36it/s] 27%|██▋ | 101703/371472 [8:05:12<22:37:18, 3.31it/s] 27%|██▋ | 101704/371472 [8:05:13<23:17:20, 3.22it/s] 27%|██▋ | 101705/371472 [8:05:13<24:08:05, 3.10it/s] 27%|██▋ | 101706/371472 [8:05:13<24:36:09, 3.05it/s] 27%|██▋ | 101707/371472 [8:05:14<23:29:52, 3.19it/s] 27%|██▋ | 101708/371472 [8:05:14<22:36:32, 3.31it/s] 27%|██▋ | 101709/371472 [8:05:14<22:51:37, 3.28it/s] 27%|██▋ | 101710/371472 [8:05:14<22:12:00, 3.38it/s] 27%|██▋ | 101711/371472 [8:05:15<22:33:41, 3.32it/s] 27%|██▋ | 101712/371472 [8:05:15<21:59:34, 3.41it/s] 27%|██▋ | 101713/371472 [8:05:15<22:23:18, 3.35it/s] 27%|██▋ | 101714/371472 [8:05:16<23:14:54, 3.22it/s] 27%|██▋ | 101715/371472 [8:05:16<23:46:36, 3.15it/s] 27%|██▋ | 101716/371472 [8:05:16<23:03:39, 3.25it/s] 27%|██▋ | 101717/371472 [8:05:17<22:44:44, 3.29it/s] 27%|██▋ | 101718/371472 [8:05:17<23:14:48, 3.22it/s] 27%|██▋ | 101719/371472 [8:05:17<23:51:18, 3.14it/s] 27%|██▋ | 101720/371472 [8:05:17<22:35:02, 3.32it/s] {'loss': 3.6799, 'learning_rate': 7.539054924691331e-07, 'epoch': 4.38} + 27%|██▋ | 101720/371472 [8:05:17<22:35:02, 3.32it/s] 27%|██▋ | 101721/371472 [8:05:18<21:07:59, 3.55it/s] 27%|██▋ | 101722/371472 [8:05:18<22:23:31, 3.35it/s] 27%|██▋ | 101723/371472 [8:05:18<22:13:23, 3.37it/s] 27%|██▋ | 101724/371472 [8:05:19<23:27:46, 3.19it/s] 27%|██▋ | 101725/371472 [8:05:19<22:24:52, 3.34it/s] 27%|██▋ | 101726/371472 [8:05:19<21:56:55, 3.41it/s] 27%|██▋ | 101727/371472 [8:05:20<22:35:56, 3.32it/s] 27%|██▋ | 101728/371472 [8:05:20<21:32:19, 3.48it/s] 27%|██▋ | 101729/371472 [8:05:20<21:06:47, 3.55it/s] 27%|██▋ | 101730/371472 [8:05:20<20:50:18, 3.60it/s] 27%|██▋ | 101731/371472 [8:05:21<21:25:20, 3.50it/s] 27%|██▋ | 101732/371472 [8:05:21<22:42:19, 3.30it/s] 27%|██▋ | 101733/371472 [8:05:21<22:32:04, 3.32it/s] 27%|██▋ | 101734/371472 [8:05:22<21:30:37, 3.48it/s] 27%|██▋ | 101735/371472 [8:05:22<20:48:47, 3.60it/s] 27%|██▋ | 101736/371472 [8:05:22<21:23:51, 3.50it/s] 27%|██▋ | 101737/371472 [8:05:22<20:52:25, 3.59it/s] 27%|██▋ | 101738/371472 [8:05:23<20:24:56, 3.67it/s] 27%|██▋ | 101739/371472 [8:05:23<21:36:04, 3.47it/s] 27%|██▋ | 101740/371472 [8:05:23<21:33:58, 3.47it/s] {'loss': 3.6045, 'learning_rate': 7.538570104936543e-07, 'epoch': 4.38} + 27%|██▋ | 101740/371472 [8:05:23<21:33:58, 3.47it/s] 27%|██▋ | 101741/371472 [8:05:23<21:19:08, 3.51it/s] 27%|██▋ | 101742/371472 [8:05:24<20:37:45, 3.63it/s] 27%|██▋ | 101743/371472 [8:05:24<20:10:24, 3.71it/s] 27%|██▋ | 101744/371472 [8:05:24<22:18:25, 3.36it/s] 27%|██▋ | 101745/371472 [8:05:25<24:53:57, 3.01it/s] 27%|██▋ | 101746/371472 [8:05:25<23:11:01, 3.23it/s] 27%|██▋ | 101747/371472 [8:05:25<21:57:32, 3.41it/s] 27%|██▋ | 101748/371472 [8:05:26<21:16:25, 3.52it/s] 27%|██▋ | 101749/371472 [8:05:26<20:44:53, 3.61it/s] 27%|██▋ | 101750/371472 [8:05:26<20:53:21, 3.59it/s] 27%|██▋ | 101751/371472 [8:05:26<20:08:22, 3.72it/s] 27%|██▋ | 101752/371472 [8:05:27<22:23:13, 3.35it/s] 27%|██▋ | 101753/371472 [8:05:27<22:25:29, 3.34it/s] 27%|██▋ | 101754/371472 [8:05:27<22:19:08, 3.36it/s] 27%|██▋ | 101755/371472 [8:05:28<21:09:49, 3.54it/s] 27%|██▋ | 101756/371472 [8:05:28<21:48:21, 3.44it/s] 27%|██▋ | 101757/371472 [8:05:28<22:21:39, 3.35it/s] 27%|██▋ | 101758/371472 [8:05:28<21:43:30, 3.45it/s] 27%|██▋ | 101759/371472 [8:05:29<20:49:34, 3.60it/s] 27%|██▋ | 101760/371472 [8:05:29<20:12:26, 3.71it/s] {'loss': 3.5273, 'learning_rate': 7.538085285181753e-07, 'epoch': 4.38} + 27%|██▋ | 101760/371472 [8:05:29<20:12:26, 3.71it/s] 27%|██▋ | 101761/371472 [8:05:29<22:10:44, 3.38it/s] 27%|██▋ | 101762/371472 [8:05:30<20:58:51, 3.57it/s] 27%|██▋ | 101763/371472 [8:05:30<20:39:29, 3.63it/s] 27%|██▋ | 101764/371472 [8:05:30<20:23:12, 3.67it/s] 27%|██▋ | 101765/371472 [8:05:30<20:40:36, 3.62it/s] 27%|██▋ | 101766/371472 [8:05:31<21:56:50, 3.41it/s] 27%|██▋ | 101767/371472 [8:05:31<21:06:47, 3.55it/s] 27%|██▋ | 101768/371472 [8:05:31<20:15:13, 3.70it/s] 27%|██▋ | 101769/371472 [8:05:31<20:23:02, 3.68it/s] 27%|██▋ | 101770/371472 [8:05:32<20:59:07, 3.57it/s] 27%|██▋ | 101771/371472 [8:05:32<21:03:19, 3.56it/s] 27%|██▋ | 101772/371472 [8:05:32<20:19:08, 3.69it/s] 27%|██▋ | 101773/371472 [8:05:33<19:56:41, 3.76it/s] 27%|██▋ | 101774/371472 [8:05:33<19:23:00, 3.86it/s] 27%|██▋ | 101775/371472 [8:05:33<20:25:13, 3.67it/s] 27%|██▋ | 101776/371472 [8:05:33<20:17:26, 3.69it/s] 27%|██▋ | 101777/371472 [8:05:34<20:32:56, 3.65it/s] 27%|██▋ | 101778/371472 [8:05:34<20:03:55, 3.73it/s] 27%|██▋ | 101779/371472 [8:05:34<20:33:11, 3.64it/s] 27%|██▋ | 101780/371472 [8:05:34<21:23:39, 3.50it/s] {'loss': 3.5461, 'learning_rate': 7.537600465426964e-07, 'epoch': 4.38} + 27%|██▋ | 101780/371472 [8:05:34<21:23:39, 3.50it/s] 27%|██▋ | 101781/371472 [8:05:35<21:15:03, 3.53it/s] 27%|██▋ | 101782/371472 [8:05:35<21:28:37, 3.49it/s] 27%|██▋ | 101783/371472 [8:05:35<23:22:27, 3.20it/s] 27%|██▋ | 101784/371472 [8:05:36<22:11:20, 3.38it/s] 27%|██▋ | 101785/371472 [8:05:36<23:01:39, 3.25it/s] 27%|██▋ | 101786/371472 [8:05:36<21:31:13, 3.48it/s] 27%|██▋ | 101787/371472 [8:05:37<25:03:28, 2.99it/s] 27%|██▋ | 101788/371472 [8:05:37<23:12:37, 3.23it/s] 27%|██▋ | 101789/371472 [8:05:37<23:44:51, 3.15it/s] 27%|██▋ | 101790/371472 [8:05:38<22:13:35, 3.37it/s] 27%|██▋ | 101791/371472 [8:05:38<21:47:50, 3.44it/s] 27%|██▋ | 101792/371472 [8:05:38<22:25:51, 3.34it/s] 27%|██▋ | 101793/371472 [8:05:38<23:07:16, 3.24it/s] 27%|██▋ | 101794/371472 [8:05:39<21:39:15, 3.46it/s] 27%|██▋ | 101795/371472 [8:05:39<21:25:03, 3.50it/s] 27%|██▋ | 101796/371472 [8:05:39<21:48:17, 3.44it/s] 27%|██▋ | 101797/371472 [8:05:40<20:48:11, 3.60it/s] 27%|██▋ | 101798/371472 [8:05:40<21:54:13, 3.42it/s] 27%|██▋ | 101799/371472 [8:05:40<20:49:45, 3.60it/s] 27%|██▋ | 101800/371472 [8:05:40<20:20:48, 3.68it/s] {'loss': 3.6679, 'learning_rate': 7.537115645672175e-07, 'epoch': 4.38} + 27%|██▋ | 101800/371472 [8:05:40<20:20:48, 3.68it/s] 27%|██▋ | 101801/371472 [8:05:41<20:40:33, 3.62it/s] 27%|██▋ | 101802/371472 [8:05:41<20:47:35, 3.60it/s] 27%|██▋ | 101803/371472 [8:05:41<20:28:55, 3.66it/s] 27%|██▋ | 101804/371472 [8:05:41<20:39:59, 3.62it/s] 27%|██▋ | 101805/371472 [8:05:42<20:43:02, 3.62it/s] 27%|██▋ | 101806/371472 [8:05:42<20:13:00, 3.71it/s] 27%|██▋ | 101807/371472 [8:05:42<19:52:46, 3.77it/s] 27%|██▋ | 101808/371472 [8:05:43<20:24:03, 3.67it/s] 27%|██▋ | 101809/371472 [8:05:43<19:57:50, 3.75it/s] 27%|██▋ | 101810/371472 [8:05:43<19:54:51, 3.76it/s] 27%|██▋ | 101811/371472 [8:05:43<19:27:27, 3.85it/s] 27%|██▋ | 101812/371472 [8:05:44<19:36:53, 3.82it/s] 27%|██▋ | 101813/371472 [8:05:44<20:19:05, 3.69it/s] 27%|██▋ | 101814/371472 [8:05:44<20:40:12, 3.62it/s] 27%|██▋ | 101815/371472 [8:05:44<20:50:42, 3.59it/s] 27%|██▋ | 101816/371472 [8:05:45<21:15:32, 3.52it/s] 27%|██▋ | 101817/371472 [8:05:45<21:10:19, 3.54it/s] 27%|██▋ | 101818/371472 [8:05:45<20:38:57, 3.63it/s] 27%|██▋ | 101819/371472 [8:05:46<20:10:56, 3.71it/s] 27%|██▋ | 101820/371472 [8:05:46<19:38:31, 3.81it/s] {'loss': 3.5037, 'learning_rate': 7.536630825917386e-07, 'epoch': 4.39} + 27%|██▋ | 101820/371472 [8:05:46<19:38:31, 3.81it/s] 27%|██▋ | 101821/371472 [8:05:46<19:23:45, 3.86it/s] 27%|██▋ | 101822/371472 [8:05:46<19:51:20, 3.77it/s] 27%|██▋ | 101823/371472 [8:05:47<20:06:05, 3.73it/s] 27%|██▋ | 101824/371472 [8:05:47<22:48:32, 3.28it/s] 27%|██▋ | 101825/371472 [8:05:47<21:52:31, 3.42it/s] 27%|██▋ | 101826/371472 [8:05:48<21:39:29, 3.46it/s] 27%|██▋ | 101827/371472 [8:05:48<22:15:40, 3.36it/s] 27%|██▋ | 101828/371472 [8:05:48<21:45:19, 3.44it/s] 27%|██▋ | 101829/371472 [8:05:48<21:25:06, 3.50it/s] 27%|██▋ | 101830/371472 [8:05:49<21:42:24, 3.45it/s] 27%|██▋ | 101831/371472 [8:05:49<21:32:11, 3.48it/s] 27%|██▋ | 101832/371472 [8:05:49<21:44:49, 3.44it/s] 27%|██▋ | 101833/371472 [8:05:50<21:58:33, 3.41it/s] 27%|██▋ | 101834/371472 [8:05:50<21:21:40, 3.51it/s] 27%|██▋ | 101835/371472 [8:05:50<21:03:36, 3.56it/s] 27%|██▋ | 101836/371472 [8:05:50<22:44:00, 3.29it/s] 27%|██▋ | 101837/371472 [8:05:51<21:53:26, 3.42it/s] 27%|██▋ | 101838/371472 [8:05:51<21:29:46, 3.48it/s] 27%|██▋ | 101839/371472 [8:05:51<20:25:25, 3.67it/s] 27%|██▋ | 101840/371472 [8:05:52<21:25:31, 3.50it/s] {'loss': 3.6358, 'learning_rate': 7.536146006162597e-07, 'epoch': 4.39} + 27%|██▋ | 101840/371472 [8:05:52<21:25:31, 3.50it/s] 27%|██▋ | 101841/371472 [8:05:52<21:53:20, 3.42it/s] 27%|██▋ | 101842/371472 [8:05:52<21:50:06, 3.43it/s] 27%|██▋ | 101843/371472 [8:05:52<20:52:47, 3.59it/s] 27%|██▋ | 101844/371472 [8:05:53<20:41:32, 3.62it/s] 27%|██▋ | 101845/371472 [8:05:53<20:39:20, 3.63it/s] 27%|██▋ | 101846/371472 [8:05:53<21:04:53, 3.55it/s] 27%|██▋ | 101847/371472 [8:05:54<20:47:53, 3.60it/s] 27%|██▋ | 101848/371472 [8:05:54<21:09:38, 3.54it/s] 27%|██▋ | 101849/371472 [8:05:54<21:37:57, 3.46it/s] 27%|██▋ | 101850/371472 [8:05:54<22:12:13, 3.37it/s] 27%|██▋ | 101851/371472 [8:05:55<22:23:38, 3.34it/s] 27%|██▋ | 101852/371472 [8:05:55<24:01:07, 3.12it/s] 27%|██▋ | 101853/371472 [8:05:55<24:45:29, 3.03it/s] 27%|██▋ | 101854/371472 [8:05:56<23:02:48, 3.25it/s] 27%|██▋ | 101855/371472 [8:05:56<21:54:03, 3.42it/s] 27%|██▋ | 101856/371472 [8:05:56<20:54:51, 3.58it/s] 27%|██▋ | 101857/371472 [8:05:57<21:20:58, 3.51it/s] 27%|██▋ | 101858/371472 [8:05:57<20:36:08, 3.64it/s] 27%|██▋ | 101859/371472 [8:05:57<20:50:03, 3.59it/s] 27%|██▋ | 101860/371472 [8:05:57<20:53:54, 3.58it/s] {'loss': 3.568, 'learning_rate': 7.535661186407808e-07, 'epoch': 4.39} + 27%|██▋ | 101860/371472 [8:05:57<20:53:54, 3.58it/s] 27%|██▋ | 101861/371472 [8:05:58<20:43:08, 3.61it/s] 27%|██▋ | 101862/371472 [8:05:58<22:04:31, 3.39it/s] 27%|██▋ | 101863/371472 [8:05:58<21:14:46, 3.52it/s] 27%|██▋ | 101864/371472 [8:05:59<22:37:44, 3.31it/s] 27%|██▋ | 101865/371472 [8:05:59<22:27:00, 3.34it/s] 27%|██▋ | 101866/371472 [8:05:59<21:28:00, 3.49it/s] 27%|██▋ | 101867/371472 [8:05:59<20:41:41, 3.62it/s] 27%|██▋ | 101868/371472 [8:06:00<21:06:55, 3.55it/s] 27%|██▋ | 101869/371472 [8:06:00<21:11:16, 3.53it/s] 27%|██▋ | 101870/371472 [8:06:00<21:15:01, 3.52it/s] 27%|██▋ | 101871/371472 [8:06:00<20:40:06, 3.62it/s] 27%|██▋ | 101872/371472 [8:06:01<20:45:09, 3.61it/s] 27%|██▋ | 101873/371472 [8:06:01<20:53:05, 3.59it/s] 27%|██▋ | 101874/371472 [8:06:02<27:58:34, 2.68it/s] 27%|██▋ | 101875/371472 [8:06:02<25:10:48, 2.97it/s] 27%|██▋ | 101876/371472 [8:06:02<23:29:34, 3.19it/s] 27%|██▋ | 101877/371472 [8:06:02<22:14:02, 3.37it/s] 27%|██▋ | 101878/371472 [8:06:03<25:02:53, 2.99it/s] 27%|██▋ | 101879/371472 [8:06:03<23:23:17, 3.20it/s] 27%|██▋ | 101880/371472 [8:06:03<21:55:09, 3.42it/s] {'loss': 3.5031, 'learning_rate': 7.53517636665302e-07, 'epoch': 4.39} + 27%|██▋ | 101880/371472 [8:06:03<21:55:09, 3.42it/s] 27%|██▋ | 101881/371472 [8:06:04<21:43:49, 3.45it/s] 27%|██▋ | 101882/371472 [8:06:04<20:52:33, 3.59it/s] 27%|██▋ | 101883/371472 [8:06:04<21:51:41, 3.43it/s] 27%|██▋ | 101884/371472 [8:06:04<21:05:34, 3.55it/s] 27%|██▋ | 101885/371472 [8:06:05<21:53:13, 3.42it/s] 27%|██▋ | 101886/371472 [8:06:05<22:29:02, 3.33it/s] 27%|██▋ | 101887/371472 [8:06:05<21:51:08, 3.43it/s] 27%|██▋ | 101888/371472 [8:06:06<21:31:24, 3.48it/s] 27%|██▋ | 101889/371472 [8:06:06<22:38:30, 3.31it/s] 27%|██▋ | 101890/371472 [8:06:06<23:11:24, 3.23it/s] 27%|██▋ | 101891/371472 [8:06:07<26:32:22, 2.82it/s] 27%|██▋ | 101892/371472 [8:06:07<25:35:02, 2.93it/s] 27%|██▋ | 101893/371472 [8:06:07<24:47:12, 3.02it/s] 27%|██▋ | 101894/371472 [8:06:08<25:27:22, 2.94it/s] 27%|██▋ | 101895/371472 [8:06:08<24:17:09, 3.08it/s] 27%|██▋ | 101896/371472 [8:06:08<23:01:55, 3.25it/s] 27%|██▋ | 101897/371472 [8:06:09<21:35:12, 3.47it/s] 27%|██▋ | 101898/371472 [8:06:09<20:45:52, 3.61it/s] 27%|██▋ | 101899/371472 [8:06:09<21:36:22, 3.47it/s] 27%|██▋ | 101900/371472 [8:06:09<21:15:48, 3.52it/s] {'loss': 3.3478, 'learning_rate': 7.534691546898231e-07, 'epoch': 4.39} + 27%|██▋ | 101900/371472 [8:06:09<21:15:48, 3.52it/s] 27%|██▋ | 101901/371472 [8:06:10<20:48:35, 3.60it/s] 27%|██▋ | 101902/371472 [8:06:10<21:04:59, 3.55it/s] 27%|██▋ | 101903/371472 [8:06:10<22:35:06, 3.32it/s] 27%|██▋ | 101904/371472 [8:06:11<22:22:12, 3.35it/s] 27%|██▋ | 101905/371472 [8:06:11<22:16:55, 3.36it/s] 27%|██▋ | 101906/371472 [8:06:11<22:48:36, 3.28it/s] 27%|██▋ | 101907/371472 [8:06:11<22:50:34, 3.28it/s] 27%|██▋ | 101908/371472 [8:06:12<21:22:08, 3.50it/s] 27%|██▋ | 101909/371472 [8:06:12<20:40:56, 3.62it/s] 27%|██▋ | 101910/371472 [8:06:12<21:58:31, 3.41it/s] 27%|██▋ | 101911/371472 [8:06:13<21:12:53, 3.53it/s] 27%|██▋ | 101912/371472 [8:06:13<22:03:35, 3.39it/s] 27%|██▋ | 101913/371472 [8:06:13<21:22:27, 3.50it/s] 27%|██▋ | 101914/371472 [8:06:13<21:37:00, 3.46it/s] 27%|██▋ | 101915/371472 [8:06:14<21:15:21, 3.52it/s] 27%|██▋ | 101916/371472 [8:06:14<20:37:37, 3.63it/s] 27%|██▋ | 101917/371472 [8:06:14<20:54:13, 3.58it/s] 27%|██▋ | 101918/371472 [8:06:15<20:28:22, 3.66it/s] 27%|██▋ | 101919/371472 [8:06:15<20:07:05, 3.72it/s] 27%|██▋ | 101920/371472 [8:06:15<21:52:51, 3.42it/s] {'loss': 3.6285, 'learning_rate': 7.534206727143442e-07, 'epoch': 4.39} + 27%|██▋ | 101920/371472 [8:06:15<21:52:51, 3.42it/s] 27%|██▋ | 101921/371472 [8:06:15<22:37:00, 3.31it/s] 27%|██▋ | 101922/371472 [8:06:16<23:08:27, 3.24it/s] 27%|██▋ | 101923/371472 [8:06:16<23:16:07, 3.22it/s] 27%|██▋ | 101924/371472 [8:06:16<22:55:19, 3.27it/s] 27%|██▋ | 101925/371472 [8:06:17<22:07:48, 3.38it/s] 27%|██▋ | 101926/371472 [8:06:17<22:14:05, 3.37it/s] 27%|██▋ | 101927/371472 [8:06:17<21:48:25, 3.43it/s] 27%|██▋ | 101928/371472 [8:06:18<21:10:20, 3.54it/s] 27%|██▋ | 101929/371472 [8:06:18<20:40:24, 3.62it/s] 27%|██▋ | 101930/371472 [8:06:18<21:56:24, 3.41it/s] 27%|██▋ | 101931/371472 [8:06:18<21:29:22, 3.48it/s] 27%|██▋ | 101932/371472 [8:06:19<20:57:15, 3.57it/s] 27%|██▋ | 101933/371472 [8:06:19<21:55:09, 3.42it/s] 27%|██▋ | 101934/371472 [8:06:19<21:05:51, 3.55it/s] 27%|██▋ | 101935/371472 [8:06:20<21:06:38, 3.55it/s] 27%|██▋ | 101936/371472 [8:06:20<21:37:49, 3.46it/s] 27%|██▋ | 101937/371472 [8:06:20<20:54:47, 3.58it/s] 27%|██▋ | 101938/371472 [8:06:20<22:45:05, 3.29it/s] 27%|██▋ | 101939/371472 [8:06:21<23:15:46, 3.22it/s] 27%|██▋ | 101940/371472 [8:06:21<21:58:41, 3.41it/s] {'loss': 3.549, 'learning_rate': 7.533721907388652e-07, 'epoch': 4.39} + 27%|██▋ | 101940/371472 [8:06:21<21:58:41, 3.41it/s] 27%|██▋ | 101941/371472 [8:06:21<22:51:36, 3.28it/s] 27%|██▋ | 101942/371472 [8:06:22<23:16:43, 3.22it/s] 27%|██▋ | 101943/371472 [8:06:22<24:52:33, 3.01it/s] 27%|██▋ | 101944/371472 [8:06:22<23:16:20, 3.22it/s] 27%|██▋ | 101945/371472 [8:06:23<22:07:02, 3.39it/s] 27%|██▋ | 101946/371472 [8:06:23<21:33:39, 3.47it/s] 27%|██▋ | 101947/371472 [8:06:23<21:45:59, 3.44it/s] 27%|██▋ | 101948/371472 [8:06:23<22:07:16, 3.38it/s] 27%|██▋ | 101949/371472 [8:06:24<22:56:16, 3.26it/s] 27%|██▋ | 101950/371472 [8:06:24<22:39:03, 3.31it/s] 27%|██▋ | 101951/371472 [8:06:24<22:12:34, 3.37it/s] 27%|██▋ | 101952/371472 [8:06:25<21:34:36, 3.47it/s] 27%|██▋ | 101953/371472 [8:06:25<21:49:49, 3.43it/s] 27%|██▋ | 101954/371472 [8:06:25<21:32:20, 3.48it/s] 27%|██▋ | 101955/371472 [8:06:26<21:47:26, 3.44it/s] 27%|██▋ | 101956/371472 [8:06:26<21:43:24, 3.45it/s] 27%|██▋ | 101957/371472 [8:06:26<22:05:24, 3.39it/s] 27%|██▋ | 101958/371472 [8:06:26<22:15:17, 3.36it/s] 27%|██▋ | 101959/371472 [8:06:27<21:54:35, 3.42it/s] 27%|██▋ | 101960/371472 [8:06:27<22:57:43, 3.26it/s] {'loss': 3.5341, 'learning_rate': 7.533237087633864e-07, 'epoch': 4.39} + 27%|██▋ | 101960/371472 [8:06:27<22:57:43, 3.26it/s] 27%|██▋ | 101961/371472 [8:06:27<22:15:55, 3.36it/s] 27%|██▋ | 101962/371472 [8:06:28<21:37:12, 3.46it/s] 27%|██▋ | 101963/371472 [8:06:28<20:48:26, 3.60it/s] 27%|██▋ | 101964/371472 [8:06:28<21:42:26, 3.45it/s] 27%|██▋ | 101965/371472 [8:06:28<22:16:41, 3.36it/s] 27%|██▋ | 101966/371472 [8:06:29<23:21:56, 3.20it/s] 27%|██▋ | 101967/371472 [8:06:29<22:45:49, 3.29it/s] 27%|██▋ | 101968/371472 [8:06:29<22:53:07, 3.27it/s] 27%|██▋ | 101969/371472 [8:06:30<22:09:40, 3.38it/s] 27%|██▋ | 101970/371472 [8:06:30<21:32:57, 3.47it/s] 27%|██▋ | 101971/371472 [8:06:30<21:12:00, 3.53it/s] 27%|██▋ | 101972/371472 [8:06:30<21:19:17, 3.51it/s] 27%|██▋ | 101973/371472 [8:06:31<21:01:52, 3.56it/s] 27%|██▋ | 101974/371472 [8:06:31<21:17:04, 3.52it/s] 27%|██▋ | 101975/371472 [8:06:31<21:27:41, 3.49it/s] 27%|██▋ | 101976/371472 [8:06:32<24:03:04, 3.11it/s] 27%|██▋ | 101977/371472 [8:06:32<22:49:08, 3.28it/s] 27%|██▋ | 101978/371472 [8:06:32<21:42:52, 3.45it/s] 27%|██▋ | 101979/371472 [8:06:33<21:53:18, 3.42it/s] 27%|██▋ | 101980/371472 [8:06:33<22:38:14, 3.31it/s] {'loss': 3.6116, 'learning_rate': 7.532752267879075e-07, 'epoch': 4.39} + 27%|██▋ | 101980/371472 [8:06:33<22:38:14, 3.31it/s] 27%|██▋ | 101981/371472 [8:06:33<22:23:42, 3.34it/s] 27%|██▋ | 101982/371472 [8:06:33<21:50:18, 3.43it/s] 27%|██▋ | 101983/371472 [8:06:34<21:17:27, 3.52it/s] 27%|██▋ | 101984/371472 [8:06:34<20:41:03, 3.62it/s] 27%|██▋ | 101985/371472 [8:06:34<21:01:18, 3.56it/s] 27%|██▋ | 101986/371472 [8:06:35<20:44:42, 3.61it/s] 27%|██▋ | 101987/371472 [8:06:35<22:01:48, 3.40it/s] 27%|██▋ | 101988/371472 [8:06:35<22:24:26, 3.34it/s] 27%|██▋ | 101989/371472 [8:06:35<21:06:45, 3.55it/s] 27%|██▋ | 101990/371472 [8:06:36<21:36:53, 3.46it/s] 27%|██▋ | 101991/371472 [8:06:36<24:15:24, 3.09it/s] 27%|██▋ | 101992/371472 [8:06:36<23:38:21, 3.17it/s] 27%|██▋ | 101993/371472 [8:06:37<22:54:42, 3.27it/s] 27%|██▋ | 101994/371472 [8:06:37<22:32:57, 3.32it/s] 27%|██▋ | 101995/371472 [8:06:37<22:32:49, 3.32it/s] 27%|██▋ | 101996/371472 [8:06:38<22:23:12, 3.34it/s] 27%|██▋ | 101997/371472 [8:06:38<21:17:47, 3.51it/s] 27%|██▋ | 101998/371472 [8:06:38<21:17:37, 3.52it/s] 27%|██▋ | 101999/371472 [8:06:39<24:07:10, 3.10it/s] 27%|██▋ | 102000/371472 [8:06:39<22:33:49, 3.32it/s] {'loss': 3.4513, 'learning_rate': 7.532267448124286e-07, 'epoch': 4.39} + 27%|██▋ | 102000/371472 [8:06:39<22:33:49, 3.32it/s] 27%|██▋ | 102001/371472 [8:06:39<21:31:37, 3.48it/s] 27%|██▋ | 102002/371472 [8:06:39<20:51:21, 3.59it/s] 27%|██▋ | 102003/371472 [8:06:40<20:04:29, 3.73it/s] 27%|██▋ | 102004/371472 [8:06:40<19:57:22, 3.75it/s] 27%|██▋ | 102005/371472 [8:06:40<21:06:21, 3.55it/s] 27%|██▋ | 102006/371472 [8:06:40<21:07:25, 3.54it/s] 27%|██▋ | 102007/371472 [8:06:41<21:19:55, 3.51it/s] 27%|██▋ | 102008/371472 [8:06:41<21:11:45, 3.53it/s] 27%|██▋ | 102009/371472 [8:06:41<20:17:51, 3.69it/s] 27%|██▋ | 102010/371472 [8:06:42<20:52:05, 3.59it/s] 27%|██▋ | 102011/371472 [8:06:42<21:22:18, 3.50it/s] 27%|██▋ | 102012/371472 [8:06:42<21:36:47, 3.46it/s] 27%|██▋ | 102013/371472 [8:06:42<21:48:56, 3.43it/s] 27%|██▋ | 102014/371472 [8:06:43<20:46:18, 3.60it/s] 27%|██▋ | 102015/371472 [8:06:43<20:05:48, 3.72it/s] 27%|██▋ | 102016/371472 [8:06:43<19:38:31, 3.81it/s] 27%|██▋ | 102017/371472 [8:06:43<20:06:52, 3.72it/s] 27%|██▋ | 102018/371472 [8:06:44<22:22:39, 3.34it/s] 27%|██▋ | 102019/371472 [8:06:44<21:07:25, 3.54it/s] 27%|██▋ | 102020/371472 [8:06:44<22:05:34, 3.39it/s] {'loss': 3.6678, 'learning_rate': 7.531782628369497e-07, 'epoch': 4.39} + 27%|██▋ | 102020/371472 [8:06:44<22:05:34, 3.39it/s] 27%|██▋ | 102021/371472 [8:06:45<21:39:00, 3.46it/s] 27%|██▋ | 102022/371472 [8:06:45<20:56:34, 3.57it/s] 27%|██▋ | 102023/371472 [8:06:45<21:46:53, 3.44it/s] 27%|██▋ | 102024/371472 [8:06:46<21:11:11, 3.53it/s] 27%|██▋ | 102025/371472 [8:06:46<21:00:56, 3.56it/s] 27%|██▋ | 102026/371472 [8:06:46<21:27:31, 3.49it/s] 27%|██▋ | 102027/371472 [8:06:46<21:23:33, 3.50it/s] 27%|██▋ | 102028/371472 [8:06:47<20:33:01, 3.64it/s] 27%|██▋ | 102029/371472 [8:06:47<20:20:37, 3.68it/s] 27%|██▋ | 102030/371472 [8:06:47<20:35:45, 3.63it/s] 27%|██▋ | 102031/371472 [8:06:47<21:38:48, 3.46it/s] 27%|██▋ | 102032/371472 [8:06:48<21:27:57, 3.49it/s] 27%|██▋ | 102033/371472 [8:06:48<20:47:42, 3.60it/s] 27%|██▋ | 102034/371472 [8:06:48<21:14:46, 3.52it/s] 27%|██▋ | 102035/371472 [8:06:49<21:49:18, 3.43it/s] 27%|██▋ | 102036/371472 [8:06:49<21:10:58, 3.53it/s] 27%|██▋ | 102037/371472 [8:06:49<20:42:35, 3.61it/s] 27%|██▋ | 102038/371472 [8:06:49<20:25:12, 3.67it/s] 27%|██▋ | 102039/371472 [8:06:50<20:02:31, 3.73it/s] 27%|██▋ | 102040/371472 [8:06:50<21:00:32, 3.56it/s] {'loss': 3.5044, 'learning_rate': 7.531297808614709e-07, 'epoch': 4.4} + 27%|██▋ | 102040/371472 [8:06:50<21:00:32, 3.56it/s] 27%|██▋ | 102041/371472 [8:06:50<21:04:10, 3.55it/s] 27%|██▋ | 102042/371472 [8:06:51<20:29:52, 3.65it/s] 27%|██▋ | 102043/371472 [8:06:51<20:12:33, 3.70it/s] 27%|██▋ | 102044/371472 [8:06:51<19:50:58, 3.77it/s] 27%|██▋ | 102045/371472 [8:06:51<20:16:22, 3.69it/s] 27%|██▋ | 102046/371472 [8:06:52<20:02:51, 3.73it/s] 27%|██▋ | 102047/371472 [8:06:52<20:33:40, 3.64it/s] 27%|██▋ | 102048/371472 [8:06:52<20:19:00, 3.68it/s] 27%|██▋ | 102049/371472 [8:06:52<19:58:13, 3.75it/s] 27%|██▋ | 102050/371472 [8:06:53<20:52:42, 3.58it/s] 27%|██▋ | 102051/371472 [8:06:53<21:56:17, 3.41it/s] 27%|██▋ | 102052/371472 [8:06:53<21:38:32, 3.46it/s] 27%|██▋ | 102053/371472 [8:06:54<21:48:39, 3.43it/s] 27%|██▋ | 102054/371472 [8:06:54<21:44:52, 3.44it/s] 27%|██▋ | 102055/371472 [8:06:54<21:47:28, 3.43it/s] 27%|██▋ | 102056/371472 [8:06:55<22:36:56, 3.31it/s] 27%|██▋ | 102057/371472 [8:06:55<21:33:27, 3.47it/s] 27%|██▋ | 102058/371472 [8:06:55<22:52:14, 3.27it/s] 27%|██▋ | 102059/371472 [8:06:55<22:30:18, 3.33it/s] 27%|██▋ | 102060/371472 [8:06:56<21:50:44, 3.43it/s] {'loss': 3.4782, 'learning_rate': 7.530812988859919e-07, 'epoch': 4.4} + 27%|██▋ | 102060/371472 [8:06:56<21:50:44, 3.43it/s] 27%|██▋ | 102061/371472 [8:06:56<21:32:14, 3.47it/s] 27%|██▋ | 102062/371472 [8:06:56<20:54:01, 3.58it/s] 27%|██▋ | 102063/371472 [8:06:57<24:56:46, 3.00it/s] 27%|██▋ | 102064/371472 [8:06:57<23:49:55, 3.14it/s] 27%|██▋ | 102065/371472 [8:06:57<22:17:43, 3.36it/s] 27%|██▋ | 102066/371472 [8:06:58<22:18:35, 3.35it/s] 27%|██▋ | 102067/371472 [8:06:58<21:13:17, 3.53it/s] 27%|██▋ | 102068/371472 [8:06:58<21:18:10, 3.51it/s] 27%|██▋ | 102069/371472 [8:06:58<21:19:47, 3.51it/s] 27%|██▋ | 102070/371472 [8:06:59<21:45:07, 3.44it/s] 27%|██▋ | 102071/371472 [8:06:59<21:48:48, 3.43it/s] 27%|██▋ | 102072/371472 [8:06:59<20:57:20, 3.57it/s] 27%|██▋ | 102073/371472 [8:06:59<21:17:47, 3.51it/s] 27%|██▋ | 102074/371472 [8:07:00<20:37:52, 3.63it/s] 27%|██▋ | 102075/371472 [8:07:00<22:12:40, 3.37it/s] 27%|██▋ | 102076/371472 [8:07:00<20:56:42, 3.57it/s] 27%|██▋ | 102077/371472 [8:07:01<20:30:40, 3.65it/s] 27%|██▋ | 102078/371472 [8:07:01<20:16:37, 3.69it/s] 27%|██▋ | 102079/371472 [8:07:01<20:19:28, 3.68it/s] 27%|██▋ | 102080/371472 [8:07:01<20:27:59, 3.66it/s] {'loss': 3.4979, 'learning_rate': 7.530328169105129e-07, 'epoch': 4.4} + 27%|██▋ | 102080/371472 [8:07:01<20:27:59, 3.66it/s] 27%|██▋ | 102081/371472 [8:07:02<20:31:22, 3.65it/s] 27%|██▋ | 102082/371472 [8:07:02<20:43:06, 3.61it/s] 27%|██▋ | 102083/371472 [8:07:02<22:30:33, 3.32it/s] 27%|██▋ | 102084/371472 [8:07:03<22:24:58, 3.34it/s] 27%|██▋ | 102085/371472 [8:07:03<21:57:03, 3.41it/s] 27%|██▋ | 102086/371472 [8:07:03<21:33:49, 3.47it/s] 27%|██▋ | 102087/371472 [8:07:03<21:11:23, 3.53it/s] 27%|██▋ | 102088/371472 [8:07:04<21:30:31, 3.48it/s] 27%|██▋ | 102089/371472 [8:07:04<21:56:05, 3.41it/s] 27%|██▋ | 102090/371472 [8:07:04<21:52:16, 3.42it/s] 27%|██▋ | 102091/371472 [8:07:05<21:48:22, 3.43it/s] 27%|██▋ | 102092/371472 [8:07:05<21:08:51, 3.54it/s] 27%|██▋ | 102093/371472 [8:07:05<21:58:26, 3.41it/s] 27%|██▋ | 102094/371472 [8:07:06<22:10:32, 3.37it/s] 27%|██▋ | 102095/371472 [8:07:06<21:02:28, 3.56it/s] 27%|██▋ | 102096/371472 [8:07:06<20:46:03, 3.60it/s] 27%|██▋ | 102097/371472 [8:07:06<20:16:13, 3.69it/s] 27%|██▋ | 102098/371472 [8:07:07<20:38:45, 3.62it/s] 27%|██▋ | 102099/371472 [8:07:07<20:52:24, 3.58it/s] 27%|██▋ | 102100/371472 [8:07:07<21:08:25, 3.54it/s] {'loss': 3.4867, 'learning_rate': 7.529843349350341e-07, 'epoch': 4.4} + 27%|██▋ | 102100/371472 [8:07:07<21:08:25, 3.54it/s] 27%|██▋ | 102101/371472 [8:07:07<21:08:57, 3.54it/s] 27%|██▋ | 102102/371472 [8:07:08<21:04:46, 3.55it/s] 27%|██▋ | 102103/371472 [8:07:08<20:47:18, 3.60it/s] 27%|██▋ | 102104/371472 [8:07:08<21:41:45, 3.45it/s] 27%|██▋ | 102105/371472 [8:07:09<22:10:16, 3.37it/s] 27%|██▋ | 102106/371472 [8:07:09<21:58:00, 3.41it/s] 27%|██▋ | 102107/371472 [8:07:09<21:27:06, 3.49it/s] 27%|██▋ | 102108/371472 [8:07:09<21:10:16, 3.53it/s] 27%|██▋ | 102109/371472 [8:07:10<20:30:26, 3.65it/s] 27%|██▋ | 102110/371472 [8:07:10<21:01:38, 3.56it/s] 27%|██▋ | 102111/371472 [8:07:10<21:48:33, 3.43it/s] 27%|██▋ | 102112/371472 [8:07:11<21:29:10, 3.48it/s] 27%|██▋ | 102113/371472 [8:07:11<20:47:46, 3.60it/s] 27%|██▋ | 102114/371472 [8:07:11<20:12:02, 3.70it/s] 27%|██▋ | 102115/371472 [8:07:11<21:42:03, 3.45it/s] 27%|██▋ | 102116/371472 [8:07:12<21:16:00, 3.52it/s] 27%|██▋ | 102117/371472 [8:07:12<21:22:11, 3.50it/s] 27%|██▋ | 102118/371472 [8:07:12<21:49:10, 3.43it/s] 27%|██▋ | 102119/371472 [8:07:13<21:13:57, 3.52it/s] 27%|██▋ | 102120/371472 [8:07:13<20:42:49, 3.61it/s] {'loss': 3.4821, 'learning_rate': 7.529358529595553e-07, 'epoch': 4.4} + 27%|██▋ | 102120/371472 [8:07:13<20:42:49, 3.61it/s] 27%|██▋ | 102121/371472 [8:07:13<20:02:36, 3.73it/s] 27%|██▋ | 102122/371472 [8:07:13<20:08:29, 3.71it/s] 27%|██▋ | 102123/371472 [8:07:14<20:33:48, 3.64it/s] 27%|██▋ | 102124/371472 [8:07:14<20:18:53, 3.68it/s] 27%|██▋ | 102125/371472 [8:07:14<21:22:14, 3.50it/s] 27%|██▋ | 102126/371472 [8:07:14<20:52:27, 3.58it/s] 27%|██▋ | 102127/371472 [8:07:15<21:07:26, 3.54it/s] 27%|██▋ | 102128/371472 [8:07:15<21:29:51, 3.48it/s] 27%|██▋ | 102129/371472 [8:07:15<21:25:19, 3.49it/s] 27%|██▋ | 102130/371472 [8:07:16<20:53:43, 3.58it/s] 27%|██▋ | 102131/371472 [8:07:16<25:18:44, 2.96it/s] 27%|██▋ | 102132/371472 [8:07:16<24:41:08, 3.03it/s] 27%|██▋ | 102133/371472 [8:07:17<24:34:30, 3.04it/s] 27%|██▋ | 102134/371472 [8:07:17<22:37:19, 3.31it/s] 27%|██▋ | 102135/371472 [8:07:17<21:43:45, 3.44it/s] 27%|██▋ | 102136/371472 [8:07:17<21:20:01, 3.51it/s] 27%|██▋ | 102137/371472 [8:07:18<21:27:04, 3.49it/s] 27%|██▋ | 102138/371472 [8:07:18<20:55:08, 3.58it/s] 27%|██▋ | 102139/371472 [8:07:18<20:48:41, 3.59it/s] 27%|██▋ | 102140/371472 [8:07:19<21:49:21, 3.43it/s] {'loss': 3.6658, 'learning_rate': 7.528873709840764e-07, 'epoch': 4.4} + 27%|██▋ | 102140/371472 [8:07:19<21:49:21, 3.43it/s] 27%|██▋ | 102141/371472 [8:07:19<22:03:22, 3.39it/s] 27%|██▋ | 102142/371472 [8:07:19<21:25:16, 3.49it/s] 27%|██▋ | 102143/371472 [8:07:20<22:56:41, 3.26it/s] 27%|██▋ | 102144/371472 [8:07:20<21:56:42, 3.41it/s] 27%|██▋ | 102145/371472 [8:07:20<21:27:06, 3.49it/s] 27%|██▋ | 102146/371472 [8:07:20<21:30:46, 3.48it/s] 27%|██▋ | 102147/371472 [8:07:21<22:45:21, 3.29it/s] 27%|██▋ | 102148/371472 [8:07:21<22:06:07, 3.38it/s] 27%|██▋ | 102149/371472 [8:07:21<20:46:05, 3.60it/s] 27%|██▋ | 102150/371472 [8:07:21<20:12:33, 3.70it/s] 27%|██▋ | 102151/371472 [8:07:22<20:51:28, 3.59it/s] 27%|██▋ | 102152/371472 [8:07:22<20:20:51, 3.68it/s] 27%|██▋ | 102153/371472 [8:07:22<22:22:39, 3.34it/s] 27%|██▋ | 102154/371472 [8:07:23<25:25:42, 2.94it/s] 28%|██▊ | 102155/371472 [8:07:23<24:23:52, 3.07it/s] 28%|██▊ | 102156/371472 [8:07:23<23:22:45, 3.20it/s] 28%|██▊ | 102157/371472 [8:07:24<24:50:48, 3.01it/s] 28%|██▊ | 102158/371472 [8:07:24<23:38:22, 3.16it/s] 28%|██▊ | 102159/371472 [8:07:24<23:13:08, 3.22it/s] 28%|██▊ | 102160/371472 [8:07:25<22:00:56, 3.40it/s] {'loss': 3.3317, 'learning_rate': 7.528388890085974e-07, 'epoch': 4.4} + 28%|██▊ | 102160/371472 [8:07:25<22:00:56, 3.40it/s] 28%|██▊ | 102161/371472 [8:07:25<20:59:39, 3.56it/s] 28%|██▊ | 102162/371472 [8:07:25<21:03:34, 3.55it/s] 28%|██▊ | 102163/371472 [8:07:25<21:00:31, 3.56it/s] 28%|██▊ | 102164/371472 [8:07:26<21:04:45, 3.55it/s] 28%|██▊ | 102165/371472 [8:07:26<21:01:10, 3.56it/s] 28%|██▊ | 102166/371472 [8:07:26<21:21:54, 3.50it/s] 28%|██▊ | 102167/371472 [8:07:27<20:41:22, 3.62it/s] 28%|██▊ | 102168/371472 [8:07:27<19:59:29, 3.74it/s] 28%|██▊ | 102169/371472 [8:07:27<20:24:33, 3.67it/s] 28%|██▊ | 102170/371472 [8:07:27<20:31:18, 3.65it/s] 28%|██▊ | 102171/371472 [8:07:28<21:09:58, 3.53it/s] 28%|██▊ | 102172/371472 [8:07:28<20:58:39, 3.57it/s] 28%|██▊ | 102173/371472 [8:07:28<21:14:01, 3.52it/s] 28%|██▊ | 102174/371472 [8:07:29<22:28:21, 3.33it/s] 28%|██▊ | 102175/371472 [8:07:29<21:45:49, 3.44it/s] 28%|██▊ | 102176/371472 [8:07:29<21:08:34, 3.54it/s] 28%|██▊ | 102177/371472 [8:07:29<22:12:40, 3.37it/s] 28%|██▊ | 102178/371472 [8:07:30<21:34:07, 3.47it/s] 28%|██▊ | 102179/371472 [8:07:30<22:05:19, 3.39it/s] 28%|██▊ | 102180/371472 [8:07:30<22:48:32, 3.28it/s] {'loss': 3.6214, 'learning_rate': 7.527904070331185e-07, 'epoch': 4.4} + 28%|██▊ | 102180/371472 [8:07:30<22:48:32, 3.28it/s] 28%|██▊ | 102181/371472 [8:07:31<21:45:16, 3.44it/s] 28%|██▊ | 102182/371472 [8:07:31<20:50:23, 3.59it/s] 28%|██▊ | 102183/371472 [8:07:31<21:21:32, 3.50it/s] 28%|██▊ | 102184/371472 [8:07:31<21:15:56, 3.52it/s] 28%|██▊ | 102185/371472 [8:07:32<20:34:58, 3.63it/s] 28%|██▊ | 102186/371472 [8:07:32<21:01:13, 3.56it/s] 28%|██▊ | 102187/371472 [8:07:32<20:07:01, 3.72it/s] 28%|██▊ | 102188/371472 [8:07:33<20:22:40, 3.67it/s] 28%|██▊ | 102189/371472 [8:07:33<21:23:56, 3.50it/s] 28%|██▊ | 102190/371472 [8:07:33<20:22:30, 3.67it/s] 28%|██▊ | 102191/371472 [8:07:33<20:43:33, 3.61it/s] 28%|██▊ | 102192/371472 [8:07:34<21:30:30, 3.48it/s] 28%|██▊ | 102193/371472 [8:07:34<21:03:12, 3.55it/s] 28%|██▊ | 102194/371472 [8:07:34<21:00:06, 3.56it/s] 28%|██▊ | 102195/371472 [8:07:34<20:37:05, 3.63it/s] 28%|██▊ | 102196/371472 [8:07:35<20:43:52, 3.61it/s] 28%|██▊ | 102197/371472 [8:07:35<20:42:20, 3.61it/s] 28%|██▊ | 102198/371472 [8:07:35<22:05:22, 3.39it/s] 28%|██▊ | 102199/371472 [8:07:36<22:26:49, 3.33it/s] 28%|██▊ | 102200/371472 [8:07:36<22:35:45, 3.31it/s] {'loss': 3.5743, 'learning_rate': 7.527419250576397e-07, 'epoch': 4.4} + 28%|██▊ | 102200/371472 [8:07:36<22:35:45, 3.31it/s] 28%|██▊ | 102201/371472 [8:07:36<22:45:46, 3.29it/s] 28%|██▊ | 102202/371472 [8:07:37<21:46:04, 3.44it/s] 28%|██▊ | 102203/371472 [8:07:37<21:05:45, 3.55it/s] 28%|██▊ | 102204/371472 [8:07:37<20:55:30, 3.57it/s] 28%|██▊ | 102205/371472 [8:07:37<20:44:07, 3.61it/s] 28%|██▊ | 102206/371472 [8:07:38<20:41:38, 3.61it/s] 28%|██▊ | 102207/371472 [8:07:38<20:45:56, 3.60it/s] 28%|██▊ | 102208/371472 [8:07:38<20:57:29, 3.57it/s] 28%|██▊ | 102209/371472 [8:07:39<21:52:18, 3.42it/s] 28%|██▊ | 102210/371472 [8:07:39<21:04:11, 3.55it/s] 28%|██▊ | 102211/371472 [8:07:39<20:49:53, 3.59it/s] 28%|██▊ | 102212/371472 [8:07:39<20:42:50, 3.61it/s] 28%|██▊ | 102213/371472 [8:07:40<20:54:28, 3.58it/s] 28%|██▊ | 102214/371472 [8:07:40<21:36:12, 3.46it/s] 28%|██▊ | 102215/371472 [8:07:40<22:05:36, 3.39it/s] 28%|██▊ | 102216/371472 [8:07:40<20:54:21, 3.58it/s] 28%|██▊ | 102217/371472 [8:07:41<21:32:51, 3.47it/s] 28%|██▊ | 102218/371472 [8:07:41<21:10:10, 3.53it/s] 28%|██▊ | 102219/371472 [8:07:41<20:27:41, 3.66it/s] 28%|██▊ | 102220/371472 [8:07:42<22:13:40, 3.36it/s] {'loss': 3.5693, 'learning_rate': 7.526934430821608e-07, 'epoch': 4.4} + 28%|██▊ | 102220/371472 [8:07:42<22:13:40, 3.36it/s] 28%|██▊ | 102221/371472 [8:07:42<25:19:47, 2.95it/s] 28%|██▊ | 102222/371472 [8:07:42<24:15:11, 3.08it/s] 28%|██▊ | 102223/371472 [8:07:43<23:53:38, 3.13it/s] 28%|██▊ | 102224/371472 [8:07:43<22:15:51, 3.36it/s] 28%|██▊ | 102225/371472 [8:07:43<24:16:49, 3.08it/s] 28%|██▊ | 102226/371472 [8:07:44<23:00:29, 3.25it/s] 28%|██▊ | 102227/371472 [8:07:44<22:39:33, 3.30it/s] 28%|██▊ | 102228/371472 [8:07:44<24:02:06, 3.11it/s] 28%|██▊ | 102229/371472 [8:07:45<22:39:12, 3.30it/s] 28%|██▊ | 102230/371472 [8:07:45<21:25:59, 3.49it/s] 28%|██▊ | 102231/371472 [8:07:45<21:28:00, 3.48it/s] 28%|██▊ | 102232/371472 [8:07:45<21:01:23, 3.56it/s] 28%|██▊ | 102233/371472 [8:07:46<21:23:11, 3.50it/s] 28%|██▊ | 102234/371472 [8:07:46<21:04:07, 3.55it/s] 28%|██▊ | 102235/371472 [8:07:46<20:57:06, 3.57it/s] 28%|██▊ | 102236/371472 [8:07:46<21:30:17, 3.48it/s] 28%|██▊ | 102237/371472 [8:07:47<22:46:23, 3.28it/s] 28%|██▊ | 102238/371472 [8:07:47<21:49:20, 3.43it/s] 28%|██▊ | 102239/371472 [8:07:47<21:49:42, 3.43it/s] 28%|██▊ | 102240/371472 [8:07:48<23:01:10, 3.25it/s] {'loss': 3.7025, 'learning_rate': 7.526449611066818e-07, 'epoch': 4.4} + 28%|██▊ | 102240/371472 [8:07:48<23:01:10, 3.25it/s] 28%|██▊ | 102241/371472 [8:07:48<23:37:16, 3.17it/s] 28%|██▊ | 102242/371472 [8:07:48<22:56:40, 3.26it/s] 28%|██▊ | 102243/371472 [8:07:49<22:09:55, 3.37it/s] 28%|██▊ | 102244/371472 [8:07:49<21:22:28, 3.50it/s] 28%|██▊ | 102245/371472 [8:07:49<21:29:35, 3.48it/s] 28%|██▊ | 102246/371472 [8:07:50<22:42:51, 3.29it/s] 28%|██▊ | 102247/371472 [8:07:50<21:34:55, 3.47it/s] 28%|██▊ | 102248/371472 [8:07:50<21:06:22, 3.54it/s] 28%|██▊ | 102249/371472 [8:07:50<20:44:41, 3.60it/s] 28%|██▊ | 102250/371472 [8:07:51<21:30:31, 3.48it/s] 28%|██▊ | 102251/371472 [8:07:51<23:28:03, 3.19it/s] 28%|██▊ | 102252/371472 [8:07:51<22:40:37, 3.30it/s] 28%|██▊ | 102253/371472 [8:07:52<22:23:14, 3.34it/s] 28%|██▊ | 102254/371472 [8:07:52<21:57:55, 3.40it/s] 28%|██▊ | 102255/371472 [8:07:52<22:30:25, 3.32it/s] 28%|██▊ | 102256/371472 [8:07:52<21:05:50, 3.54it/s] 28%|██▊ | 102257/371472 [8:07:53<20:39:41, 3.62it/s] 28%|██▊ | 102258/371472 [8:07:53<20:46:50, 3.60it/s] 28%|██▊ | 102259/371472 [8:07:53<20:11:36, 3.70it/s] 28%|██▊ | 102260/371472 [8:07:53<20:28:53, 3.65it/s] {'loss': 3.5446, 'learning_rate': 7.52596479131203e-07, 'epoch': 4.4} + 28%|██▊ | 102260/371472 [8:07:53<20:28:53, 3.65it/s] 28%|██▊ | 102261/371472 [8:07:54<21:28:31, 3.48it/s] 28%|██▊ | 102262/371472 [8:07:54<22:00:49, 3.40it/s] 28%|██▊ | 102263/371472 [8:07:54<23:01:24, 3.25it/s] 28%|██▊ | 102264/371472 [8:07:55<22:03:20, 3.39it/s] 28%|██▊ | 102265/371472 [8:07:55<21:49:29, 3.43it/s] 28%|██▊ | 102266/371472 [8:07:55<21:00:03, 3.56it/s] 28%|██▊ | 102267/371472 [8:07:56<21:08:52, 3.54it/s] 28%|██▊ | 102268/371472 [8:07:56<21:03:54, 3.55it/s] 28%|██▊ | 102269/371472 [8:07:56<21:10:56, 3.53it/s] 28%|██▊ | 102270/371472 [8:07:56<20:38:16, 3.62it/s] 28%|██▊ | 102271/371472 [8:07:57<21:26:04, 3.49it/s] 28%|██▊ | 102272/371472 [8:07:57<20:34:52, 3.63it/s] 28%|██▊ | 102273/371472 [8:07:57<20:53:44, 3.58it/s] 28%|██▊ | 102274/371472 [8:07:57<21:13:19, 3.52it/s] 28%|██▊ | 102275/371472 [8:07:58<21:03:00, 3.55it/s] 28%|██▊ | 102276/371472 [8:07:58<23:43:18, 3.15it/s] 28%|██▊ | 102277/371472 [8:07:58<22:45:26, 3.29it/s] 28%|██▊ | 102278/371472 [8:07:59<22:50:14, 3.27it/s] 28%|██▊ | 102279/371472 [8:07:59<21:54:27, 3.41it/s] 28%|██▊ | 102280/371472 [8:07:59<25:00:30, 2.99it/s] {'loss': 3.689, 'learning_rate': 7.525479971557242e-07, 'epoch': 4.41} + 28%|██▊ | 102280/371472 [8:07:59<25:00:30, 2.99it/s] 28%|██▊ | 102281/371472 [8:08:00<24:20:40, 3.07it/s] 28%|██▊ | 102282/371472 [8:08:00<22:42:03, 3.29it/s] 28%|██▊ | 102283/371472 [8:08:00<21:48:34, 3.43it/s] 28%|██▊ | 102284/371472 [8:08:01<21:33:39, 3.47it/s] 28%|██▊ | 102285/371472 [8:08:01<20:45:32, 3.60it/s] 28%|██▊ | 102286/371472 [8:08:01<21:08:00, 3.54it/s] 28%|██▊ | 102287/371472 [8:08:01<20:53:03, 3.58it/s] 28%|██▊ | 102288/371472 [8:08:02<20:49:41, 3.59it/s] 28%|██▊ | 102289/371472 [8:08:02<19:52:28, 3.76it/s] 28%|██▊ | 102290/371472 [8:08:02<20:57:12, 3.57it/s] 28%|██▊ | 102291/371472 [8:08:02<21:27:56, 3.48it/s] 28%|██▊ | 102292/371472 [8:08:03<23:39:51, 3.16it/s] 28%|██▊ | 102293/371472 [8:08:03<22:14:31, 3.36it/s] 28%|██▊ | 102294/371472 [8:08:03<21:48:49, 3.43it/s] 28%|██▊ | 102295/371472 [8:08:04<21:21:35, 3.50it/s] 28%|██▊ | 102296/371472 [8:08:04<21:27:32, 3.48it/s] 28%|██▊ | 102297/371472 [8:08:04<20:39:26, 3.62it/s] 28%|██▊ | 102298/371472 [8:08:04<20:10:31, 3.71it/s] 28%|██▊ | 102299/371472 [8:08:05<20:49:27, 3.59it/s] 28%|██▊ | 102300/371472 [8:08:05<21:08:42, 3.54it/s] {'loss': 3.4581, 'learning_rate': 7.524995151802452e-07, 'epoch': 4.41} + 28%|██▊ | 102300/371472 [8:08:05<21:08:42, 3.54it/s] 28%|██▊ | 102301/371472 [8:08:05<23:00:07, 3.25it/s] 28%|██▊ | 102302/371472 [8:08:06<21:59:13, 3.40it/s] 28%|██▊ | 102303/371472 [8:08:06<20:46:10, 3.60it/s] 28%|██▊ | 102304/371472 [8:08:06<21:35:18, 3.46it/s] 28%|██▊ | 102305/371472 [8:08:07<21:16:15, 3.52it/s] 28%|██▊ | 102306/371472 [8:08:07<20:28:29, 3.65it/s] 28%|██▊ | 102307/371472 [8:08:07<22:23:52, 3.34it/s] 28%|██▊ | 102308/371472 [8:08:07<23:09:44, 3.23it/s] 28%|██▊ | 102309/371472 [8:08:08<22:25:16, 3.33it/s] 28%|██▊ | 102310/371472 [8:08:08<21:55:15, 3.41it/s] 28%|██▊ | 102311/371472 [8:08:08<22:04:05, 3.39it/s] 28%|██▊ | 102312/371472 [8:08:09<21:38:40, 3.45it/s] 28%|██▊ | 102313/371472 [8:08:09<21:02:20, 3.55it/s] 28%|██▊ | 102314/371472 [8:08:09<20:28:23, 3.65it/s] 28%|██▊ | 102315/371472 [8:08:09<20:20:05, 3.68it/s] 28%|██▊ | 102316/371472 [8:08:10<20:17:49, 3.68it/s] 28%|██▊ | 102317/371472 [8:08:10<20:09:55, 3.71it/s] 28%|██▊ | 102318/371472 [8:08:10<19:40:40, 3.80it/s] 28%|██▊ | 102319/371472 [8:08:10<19:30:00, 3.83it/s] 28%|██▊ | 102320/371472 [8:08:11<19:06:52, 3.91it/s] {'loss': 3.4008, 'learning_rate': 7.524510332047662e-07, 'epoch': 4.41} + 28%|██▊ | 102320/371472 [8:08:11<19:06:52, 3.91it/s] 28%|██▊ | 102321/371472 [8:08:11<19:43:01, 3.79it/s] 28%|██▊ | 102322/371472 [8:08:11<20:47:18, 3.60it/s] 28%|██▊ | 102323/371472 [8:08:12<21:31:28, 3.47it/s] 28%|██▊ | 102324/371472 [8:08:12<21:59:19, 3.40it/s] 28%|██▊ | 102325/371472 [8:08:12<21:37:48, 3.46it/s] 28%|██▊ | 102326/371472 [8:08:12<21:24:10, 3.49it/s] 28%|██▊ | 102327/371472 [8:08:13<22:25:55, 3.33it/s] 28%|██▊ | 102328/371472 [8:08:13<21:37:32, 3.46it/s] 28%|██▊ | 102329/371472 [8:08:13<21:19:04, 3.51it/s] 28%|██▊ | 102330/371472 [8:08:14<22:01:04, 3.40it/s] 28%|██▊ | 102331/371472 [8:08:14<21:03:22, 3.55it/s] 28%|██▊ | 102332/371472 [8:08:14<20:19:21, 3.68it/s] 28%|██▊ | 102333/371472 [8:08:14<19:33:02, 3.82it/s] 28%|██▊ | 102334/371472 [8:08:15<21:07:55, 3.54it/s] 28%|██▊ | 102335/371472 [8:08:15<21:05:08, 3.55it/s] 28%|██▊ | 102336/371472 [8:08:15<21:07:26, 3.54it/s] 28%|██▊ | 102337/371472 [8:08:16<20:42:29, 3.61it/s] 28%|██▊ | 102338/371472 [8:08:16<21:30:00, 3.48it/s] 28%|██▊ | 102339/371472 [8:08:16<22:46:04, 3.28it/s] 28%|██▊ | 102340/371472 [8:08:16<21:46:38, 3.43it/s] {'loss': 3.4142, 'learning_rate': 7.524025512292874e-07, 'epoch': 4.41} + 28%|██▊ | 102340/371472 [8:08:16<21:46:38, 3.43it/s] 28%|██▊ | 102341/371472 [8:08:17<21:41:27, 3.45it/s] 28%|██▊ | 102342/371472 [8:08:17<20:52:44, 3.58it/s] 28%|██▊ | 102343/371472 [8:08:17<21:57:48, 3.40it/s] 28%|██▊ | 102344/371472 [8:08:18<21:26:15, 3.49it/s] 28%|██▊ | 102345/371472 [8:08:18<21:54:18, 3.41it/s] 28%|██▊ | 102346/371472 [8:08:18<22:19:48, 3.35it/s] 28%|██▊ | 102347/371472 [8:08:18<21:36:12, 3.46it/s] 28%|██▊ | 102348/371472 [8:08:19<21:14:30, 3.52it/s] 28%|██▊ | 102349/371472 [8:08:19<21:14:17, 3.52it/s] 28%|██▊ | 102350/371472 [8:08:19<21:16:49, 3.51it/s] 28%|██▊ | 102351/371472 [8:08:20<21:37:24, 3.46it/s] 28%|██▊ | 102352/371472 [8:08:20<20:46:38, 3.60it/s] 28%|██▊ | 102353/371472 [8:08:20<20:33:48, 3.64it/s] 28%|██▊ | 102354/371472 [8:08:20<20:17:46, 3.68it/s] 28%|██▊ | 102355/371472 [8:08:21<19:58:37, 3.74it/s] 28%|██▊ | 102356/371472 [8:08:21<20:32:45, 3.64it/s] 28%|██▊ | 102357/371472 [8:08:21<20:11:10, 3.70it/s] 28%|██▊ | 102358/371472 [8:08:21<19:57:11, 3.75it/s] 28%|██▊ | 102359/371472 [8:08:22<20:42:02, 3.61it/s] 28%|██▊ | 102360/371472 [8:08:22<20:49:04, 3.59it/s] {'loss': 3.3853, 'learning_rate': 7.523540692538085e-07, 'epoch': 4.41} + 28%|██▊ | 102360/371472 [8:08:22<20:49:04, 3.59it/s] 28%|██▊ | 102361/371472 [8:08:22<20:52:09, 3.58it/s] 28%|██▊ | 102362/371472 [8:08:23<20:44:21, 3.60it/s] 28%|██▊ | 102363/371472 [8:08:23<20:46:45, 3.60it/s] 28%|██▊ | 102364/371472 [8:08:23<21:15:02, 3.52it/s] 28%|██▊ | 102365/371472 [8:08:23<22:04:04, 3.39it/s] 28%|██▊ | 102366/371472 [8:08:24<22:26:51, 3.33it/s] 28%|██▊ | 102367/371472 [8:08:24<21:46:14, 3.43it/s] 28%|██▊ | 102368/371472 [8:08:24<21:44:14, 3.44it/s] 28%|██▊ | 102369/371472 [8:08:25<21:56:34, 3.41it/s] 28%|██▊ | 102370/371472 [8:08:25<21:34:30, 3.46it/s] 28%|██▊ | 102371/371472 [8:08:25<23:39:21, 3.16it/s] 28%|██▊ | 102372/371472 [8:08:26<23:09:38, 3.23it/s] 28%|██▊ | 102373/371472 [8:08:26<22:31:32, 3.32it/s] 28%|██▊ | 102374/371472 [8:08:26<24:38:31, 3.03it/s] 28%|██▊ | 102375/371472 [8:08:27<24:26:05, 3.06it/s] 28%|██▊ | 102376/371472 [8:08:27<24:39:16, 3.03it/s] 28%|██▊ | 102377/371472 [8:08:27<24:00:33, 3.11it/s] 28%|██▊ | 102378/371472 [8:08:28<22:20:35, 3.35it/s] 28%|██▊ | 102379/371472 [8:08:28<21:29:31, 3.48it/s] 28%|██▊ | 102380/371472 [8:08:28<21:23:52, 3.49it/s] {'loss': 3.4031, 'learning_rate': 7.523055872783294e-07, 'epoch': 4.41} + 28%|██▊ | 102380/371472 [8:08:28<21:23:52, 3.49it/s] 28%|██▊ | 102381/371472 [8:08:28<22:49:06, 3.28it/s] 28%|██▊ | 102382/371472 [8:08:29<21:33:49, 3.47it/s] 28%|██▊ | 102383/371472 [8:08:29<20:56:53, 3.57it/s] 28%|██▊ | 102384/371472 [8:08:29<20:50:48, 3.59it/s] 28%|██▊ | 102385/371472 [8:08:29<20:48:14, 3.59it/s] 28%|██▊ | 102386/371472 [8:08:30<22:33:55, 3.31it/s] 28%|██▊ | 102387/371472 [8:08:30<22:02:32, 3.39it/s] 28%|██▊ | 102388/371472 [8:08:30<21:36:15, 3.46it/s] 28%|██▊ | 102389/371472 [8:08:31<22:52:42, 3.27it/s] 28%|██▊ | 102390/371472 [8:08:31<22:07:19, 3.38it/s] 28%|██▊ | 102391/371472 [8:08:31<21:48:00, 3.43it/s] 28%|██▊ | 102392/371472 [8:08:32<24:19:56, 3.07it/s] 28%|██▊ | 102393/371472 [8:08:32<23:03:28, 3.24it/s] 28%|██▊ | 102394/371472 [8:08:32<22:58:58, 3.25it/s] 28%|██▊ | 102395/371472 [8:08:33<22:27:54, 3.33it/s] 28%|██▊ | 102396/371472 [8:08:33<22:13:33, 3.36it/s] 28%|██▊ | 102397/371472 [8:08:33<21:06:22, 3.54it/s] 28%|██▊ | 102398/371472 [8:08:33<22:32:23, 3.32it/s] 28%|██▊ | 102399/371472 [8:08:34<21:20:47, 3.50it/s] 28%|██▊ | 102400/371472 [8:08:34<21:08:40, 3.53it/s] {'loss': 3.4219, 'learning_rate': 7.522571053028507e-07, 'epoch': 4.41} + 28%|██▊ | 102400/371472 [8:08:34<21:08:40, 3.53it/s] 28%|██▊ | 102401/371472 [8:08:34<22:15:45, 3.36it/s] 28%|██▊ | 102402/371472 [8:08:35<22:18:56, 3.35it/s] 28%|██▊ | 102403/371472 [8:08:35<21:39:54, 3.45it/s] 28%|██▊ | 102404/371472 [8:08:35<21:13:12, 3.52it/s] 28%|██▊ | 102405/371472 [8:08:35<21:05:46, 3.54it/s] 28%|██▊ | 102406/371472 [8:08:36<20:36:10, 3.63it/s] 28%|██▊ | 102407/371472 [8:08:36<20:57:38, 3.57it/s] 28%|██▊ | 102408/371472 [8:08:36<20:39:00, 3.62it/s] 28%|██▊ | 102409/371472 [8:08:37<21:27:51, 3.48it/s] 28%|██▊ | 102410/371472 [8:08:37<21:31:42, 3.47it/s] 28%|██▊ | 102411/371472 [8:08:37<21:33:46, 3.47it/s] 28%|██▊ | 102412/371472 [8:08:37<21:16:05, 3.51it/s] 28%|██▊ | 102413/371472 [8:08:38<20:58:29, 3.56it/s] 28%|██▊ | 102414/371472 [8:08:38<20:46:23, 3.60it/s] 28%|██▊ | 102415/371472 [8:08:38<20:46:30, 3.60it/s] 28%|██▊ | 102416/371472 [8:08:38<20:43:57, 3.60it/s] 28%|██▊ | 102417/371472 [8:08:39<20:10:35, 3.70it/s] 28%|██▊ | 102418/371472 [8:08:39<22:57:04, 3.26it/s] 28%|██▊ | 102419/371472 [8:08:39<22:12:30, 3.37it/s] 28%|██▊ | 102420/371472 [8:08:40<21:14:19, 3.52it/s] {'loss': 3.6066, 'learning_rate': 7.522086233273719e-07, 'epoch': 4.41} + 28%|██▊ | 102420/371472 [8:08:40<21:14:19, 3.52it/s] 28%|██▊ | 102421/371472 [8:08:40<21:28:02, 3.48it/s] 28%|██▊ | 102422/371472 [8:08:40<21:11:30, 3.53it/s] 28%|██▊ | 102423/371472 [8:08:41<21:46:28, 3.43it/s] 28%|██▊ | 102424/371472 [8:08:41<21:16:47, 3.51it/s] 28%|██▊ | 102425/371472 [8:08:41<20:50:43, 3.59it/s] 28%|██▊ | 102426/371472 [8:08:41<20:31:59, 3.64it/s] 28%|██▊ | 102427/371472 [8:08:42<21:34:18, 3.46it/s] 28%|██▊ | 102428/371472 [8:08:42<21:01:10, 3.56it/s] 28%|██▊ | 102429/371472 [8:08:42<21:03:20, 3.55it/s] 28%|██▊ | 102430/371472 [8:08:43<22:18:59, 3.35it/s] 28%|██▊ | 102431/371472 [8:08:43<21:17:04, 3.51it/s] 28%|██▊ | 102432/371472 [8:08:43<20:30:36, 3.64it/s] 28%|██▊ | 102433/371472 [8:08:43<20:25:59, 3.66it/s] 28%|██▊ | 102434/371472 [8:08:44<20:47:34, 3.59it/s] 28%|██▊ | 102435/371472 [8:08:44<21:13:17, 3.52it/s] 28%|██▊ | 102436/371472 [8:08:44<21:08:22, 3.54it/s] 28%|██▊ | 102437/371472 [8:08:44<21:18:11, 3.51it/s] 28%|██▊ | 102438/371472 [8:08:45<22:29:32, 3.32it/s] 28%|██▊ | 102439/371472 [8:08:45<22:18:49, 3.35it/s] 28%|██▊ | 102440/371472 [8:08:45<21:45:03, 3.44it/s] {'loss': 3.4492, 'learning_rate': 7.521601413518929e-07, 'epoch': 4.41} + 28%|██▊ | 102440/371472 [8:08:45<21:45:03, 3.44it/s] 28%|██▊ | 102441/371472 [8:08:46<22:40:08, 3.30it/s] 28%|██▊ | 102442/371472 [8:08:46<22:24:21, 3.34it/s] 28%|██▊ | 102443/371472 [8:08:46<21:10:39, 3.53it/s] 28%|██▊ | 102444/371472 [8:08:46<20:22:56, 3.67it/s] 28%|██▊ | 102445/371472 [8:08:47<21:17:56, 3.51it/s] 28%|██▊ | 102446/371472 [8:08:47<20:49:23, 3.59it/s] 28%|██▊ | 102447/371472 [8:08:47<21:20:27, 3.50it/s] 28%|██▊ | 102448/371472 [8:08:48<21:36:02, 3.46it/s] 28%|██▊ | 102449/371472 [8:08:48<22:22:39, 3.34it/s] 28%|██▊ | 102450/371472 [8:08:48<24:10:09, 3.09it/s] 28%|██▊ | 102451/371472 [8:08:49<23:38:27, 3.16it/s] 28%|██▊ | 102452/371472 [8:08:49<22:25:33, 3.33it/s] 28%|██▊ | 102453/371472 [8:08:49<21:14:13, 3.52it/s] 28%|██▊ | 102454/371472 [8:08:49<20:50:52, 3.58it/s] 28%|██▊ | 102455/371472 [8:08:50<21:27:23, 3.48it/s] 28%|██▊ | 102456/371472 [8:08:50<21:02:22, 3.55it/s] 28%|██▊ | 102457/371472 [8:08:50<22:00:23, 3.40it/s] 28%|██▊ | 102458/371472 [8:08:51<21:14:29, 3.52it/s] 28%|██▊ | 102459/371472 [8:08:51<23:05:54, 3.24it/s] 28%|██▊ | 102460/371472 [8:08:51<22:56:21, 3.26it/s] {'loss': 3.4515, 'learning_rate': 7.521116593764139e-07, 'epoch': 4.41} + 28%|██▊ | 102460/371472 [8:08:51<22:56:21, 3.26it/s] 28%|██▊ | 102461/371472 [8:08:52<22:07:01, 3.38it/s] 28%|██▊ | 102462/371472 [8:08:52<21:17:53, 3.51it/s] 28%|██▊ | 102463/371472 [8:08:52<21:56:39, 3.41it/s] 28%|██▊ | 102464/371472 [8:08:52<21:12:57, 3.52it/s] 28%|██▊ | 102465/371472 [8:08:53<20:53:39, 3.58it/s] 28%|██▊ | 102466/371472 [8:08:53<21:51:50, 3.42it/s] 28%|██▊ | 102467/371472 [8:08:53<21:59:02, 3.40it/s] 28%|██▊ | 102468/371472 [8:08:54<21:46:42, 3.43it/s] 28%|██▊ | 102469/371472 [8:08:54<21:07:27, 3.54it/s] 28%|██▊ | 102470/371472 [8:08:54<21:55:11, 3.41it/s] 28%|██▊ | 102471/371472 [8:08:54<21:48:26, 3.43it/s] 28%|██▊ | 102472/371472 [8:08:55<21:11:29, 3.53it/s] 28%|██▊ | 102473/371472 [8:08:55<20:11:35, 3.70it/s] 28%|██▊ | 102474/371472 [8:08:55<20:18:42, 3.68it/s] 28%|██▊ | 102475/371472 [8:08:55<20:05:34, 3.72it/s] 28%|██▊ | 102476/371472 [8:08:56<19:52:01, 3.76it/s] 28%|██▊ | 102477/371472 [8:08:56<19:36:24, 3.81it/s] 28%|██▊ | 102478/371472 [8:08:56<19:45:27, 3.78it/s] 28%|██▊ | 102479/371472 [8:08:57<21:04:54, 3.54it/s] 28%|██▊ | 102480/371472 [8:08:57<23:53:40, 3.13it/s] {'loss': 3.7738, 'learning_rate': 7.520631774009352e-07, 'epoch': 4.41} + 28%|██▊ | 102480/371472 [8:08:57<23:53:40, 3.13it/s] 28%|██▊ | 102481/371472 [8:08:57<22:56:19, 3.26it/s] 28%|██▊ | 102482/371472 [8:08:58<24:14:55, 3.08it/s] 28%|██▊ | 102483/371472 [8:08:58<22:47:29, 3.28it/s] 28%|██▊ | 102484/371472 [8:08:58<21:46:40, 3.43it/s] 28%|██▊ | 102485/371472 [8:08:58<21:45:26, 3.43it/s] 28%|██▊ | 102486/371472 [8:08:59<22:06:40, 3.38it/s] 28%|██▊ | 102487/371472 [8:08:59<22:21:31, 3.34it/s] 28%|██▊ | 102488/371472 [8:08:59<22:46:53, 3.28it/s] 28%|██▊ | 102489/371472 [8:09:00<23:58:05, 3.12it/s] 28%|██▊ | 102490/371472 [8:09:00<22:39:47, 3.30it/s] 28%|██▊ | 102491/371472 [8:09:00<21:18:59, 3.51it/s] 28%|██▊ | 102492/371472 [8:09:01<21:31:13, 3.47it/s] 28%|██▊ | 102493/371472 [8:09:01<20:21:04, 3.67it/s] 28%|██▊ | 102494/371472 [8:09:01<21:48:45, 3.43it/s] 28%|██▊ | 102495/371472 [8:09:01<21:08:34, 3.53it/s] 28%|██▊ | 102496/371472 [8:09:02<20:53:47, 3.58it/s] 28%|██▊ | 102497/371472 [8:09:02<20:38:43, 3.62it/s] 28%|██▊ | 102498/371472 [8:09:02<20:55:12, 3.57it/s] 28%|██▊ | 102499/371472 [8:09:02<21:45:37, 3.43it/s] 28%|██▊ | 102500/371472 [8:09:03<21:19:11, 3.50it/s] {'loss': 3.4874, 'learning_rate': 7.520146954254563e-07, 'epoch': 4.41} + 28%|██▊ | 102500/371472 [8:09:03<21:19:11, 3.50it/s] 28%|██▊ | 102501/371472 [8:09:03<23:23:17, 3.19it/s] 28%|██▊ | 102502/371472 [8:09:03<22:15:30, 3.36it/s] 28%|██▊ | 102503/371472 [8:09:04<22:02:21, 3.39it/s] 28%|██▊ | 102504/371472 [8:09:04<22:20:44, 3.34it/s] 28%|██▊ | 102505/371472 [8:09:04<24:02:45, 3.11it/s] 28%|██▊ | 102506/371472 [8:09:05<24:02:32, 3.11it/s] 28%|██▊ | 102507/371472 [8:09:05<24:24:32, 3.06it/s] 28%|██▊ | 102508/371472 [8:09:05<23:13:27, 3.22it/s] 28%|██▊ | 102509/371472 [8:09:06<22:03:54, 3.39it/s] 28%|██▊ | 102510/371472 [8:09:06<23:46:08, 3.14it/s] 28%|██▊ | 102511/371472 [8:09:06<22:46:05, 3.28it/s] 28%|██▊ | 102512/371472 [8:09:06<21:37:42, 3.45it/s] 28%|██▊ | 102513/371472 [8:09:07<22:08:45, 3.37it/s] 28%|██▊ | 102514/371472 [8:09:07<21:20:43, 3.50it/s] 28%|██▊ | 102515/371472 [8:09:07<22:37:49, 3.30it/s] 28%|██▊ | 102516/371472 [8:09:08<22:27:15, 3.33it/s] 28%|██▊ | 102517/371472 [8:09:08<22:07:09, 3.38it/s] 28%|██▊ | 102518/371472 [8:09:08<22:06:27, 3.38it/s] 28%|██▊ | 102519/371472 [8:09:09<20:58:38, 3.56it/s] 28%|██▊ | 102520/371472 [8:09:09<20:42:38, 3.61it/s] {'loss': 3.3105, 'learning_rate': 7.519662134499774e-07, 'epoch': 4.42} + 28%|██▊ | 102520/371472 [8:09:09<20:42:38, 3.61it/s] 28%|██▊ | 102521/371472 [8:09:09<20:58:53, 3.56it/s] 28%|██▊ | 102522/371472 [8:09:09<21:12:56, 3.52it/s] 28%|██▊ | 102523/371472 [8:09:10<20:22:08, 3.67it/s] 28%|██▊ | 102524/371472 [8:09:10<20:32:44, 3.64it/s] 28%|██▊ | 102525/371472 [8:09:10<20:33:24, 3.63it/s] 28%|██▊ | 102526/371472 [8:09:11<22:32:02, 3.32it/s] 28%|██▊ | 102527/371472 [8:09:11<23:23:39, 3.19it/s] 28%|██▊ | 102528/371472 [8:09:11<22:58:15, 3.25it/s] 28%|██▊ | 102529/371472 [8:09:11<23:10:08, 3.22it/s] 28%|██▊ | 102530/371472 [8:09:12<21:59:47, 3.40it/s] 28%|██▊ | 102531/371472 [8:09:12<21:50:31, 3.42it/s] 28%|██▊ | 102532/371472 [8:09:12<20:58:40, 3.56it/s] 28%|██▊ | 102533/371472 [8:09:13<21:24:41, 3.49it/s] 28%|██▊ | 102534/371472 [8:09:13<22:30:07, 3.32it/s] 28%|██▊ | 102535/371472 [8:09:13<22:39:27, 3.30it/s] 28%|██▊ | 102536/371472 [8:09:13<22:04:25, 3.38it/s] 28%|██▊ | 102537/371472 [8:09:14<22:10:06, 3.37it/s] 28%|██▊ | 102538/371472 [8:09:14<21:40:56, 3.45it/s] 28%|██▊ | 102539/371472 [8:09:14<21:22:40, 3.49it/s] 28%|██▊ | 102540/371472 [8:09:15<21:18:22, 3.51it/s] {'loss': 3.6158, 'learning_rate': 7.519177314744984e-07, 'epoch': 4.42} + 28%|██▊ | 102540/371472 [8:09:15<21:18:22, 3.51it/s] 28%|██▊ | 102541/371472 [8:09:15<23:15:17, 3.21it/s] 28%|██▊ | 102542/371472 [8:09:15<22:22:40, 3.34it/s] 28%|██▊ | 102543/371472 [8:09:16<21:32:28, 3.47it/s] 28%|██▊ | 102544/371472 [8:09:16<21:36:13, 3.46it/s] 28%|██▊ | 102545/371472 [8:09:16<23:07:21, 3.23it/s] 28%|██▊ | 102546/371472 [8:09:16<21:46:25, 3.43it/s] 28%|██▊ | 102547/371472 [8:09:17<21:31:08, 3.47it/s] 28%|██▊ | 102548/371472 [8:09:17<21:06:05, 3.54it/s] 28%|██▊ | 102549/371472 [8:09:17<22:13:33, 3.36it/s] 28%|██▊ | 102550/371472 [8:09:18<21:56:24, 3.40it/s] 28%|██▊ | 102551/371472 [8:09:18<21:29:00, 3.48it/s] 28%|██▊ | 102552/371472 [8:09:18<21:22:16, 3.50it/s] 28%|██▊ | 102553/371472 [8:09:19<23:05:16, 3.24it/s] 28%|██▊ | 102554/371472 [8:09:19<21:46:10, 3.43it/s] 28%|██▊ | 102555/371472 [8:09:19<22:19:06, 3.35it/s] 28%|██▊ | 102556/371472 [8:09:19<21:55:57, 3.41it/s] 28%|██▊ | 102557/371472 [8:09:20<21:52:47, 3.41it/s] 28%|██▊ | 102558/371472 [8:09:20<21:20:10, 3.50it/s] 28%|██▊ | 102559/371472 [8:09:20<20:44:23, 3.60it/s] 28%|██▊ | 102560/371472 [8:09:21<22:31:15, 3.32it/s] {'loss': 3.5226, 'learning_rate': 7.518692494990195e-07, 'epoch': 4.42} + 28%|██▊ | 102560/371472 [8:09:21<22:31:15, 3.32it/s] 28%|██▊ | 102561/371472 [8:09:21<22:18:19, 3.35it/s] 28%|██▊ | 102562/371472 [8:09:21<23:32:07, 3.17it/s] 28%|██▊ | 102563/371472 [8:09:21<23:07:16, 3.23it/s] 28%|██▊ | 102564/371472 [8:09:22<22:02:25, 3.39it/s] 28%|██▊ | 102565/371472 [8:09:22<21:14:58, 3.52it/s] 28%|██▊ | 102566/371472 [8:09:22<21:54:47, 3.41it/s] 28%|██▊ | 102567/371472 [8:09:23<23:36:29, 3.16it/s] 28%|██▊ | 102568/371472 [8:09:23<22:45:55, 3.28it/s] 28%|██▊ | 102569/371472 [8:09:23<23:00:45, 3.25it/s] 28%|██▊ | 102570/371472 [8:09:24<22:50:02, 3.27it/s] 28%|██▊ | 102571/371472 [8:09:24<22:33:17, 3.31it/s] 28%|██▊ | 102572/371472 [8:09:24<23:19:11, 3.20it/s] 28%|██▊ | 102573/371472 [8:09:25<23:23:57, 3.19it/s] 28%|██▊ | 102574/371472 [8:09:25<23:04:49, 3.24it/s] 28%|██▊ | 102575/371472 [8:09:25<23:04:26, 3.24it/s] 28%|██▊ | 102576/371472 [8:09:25<23:28:16, 3.18it/s] 28%|██▊ | 102577/371472 [8:09:26<22:34:05, 3.31it/s] 28%|██▊ | 102578/371472 [8:09:26<22:31:39, 3.32it/s] 28%|██▊ | 102579/371472 [8:09:26<23:03:01, 3.24it/s] 28%|██▊ | 102580/371472 [8:09:27<22:02:30, 3.39it/s] {'loss': 3.4994, 'learning_rate': 7.518207675235407e-07, 'epoch': 4.42} + 28%|██▊ | 102580/371472 [8:09:27<22:02:30, 3.39it/s] 28%|██▊ | 102581/371472 [8:09:27<21:30:20, 3.47it/s] 28%|██▊ | 102582/371472 [8:09:27<21:37:58, 3.45it/s] 28%|██▊ | 102583/371472 [8:09:27<21:16:24, 3.51it/s] 28%|██▊ | 102584/371472 [8:09:28<20:34:48, 3.63it/s] 28%|██▊ | 102585/371472 [8:09:28<21:40:32, 3.45it/s] 28%|██▊ | 102586/371472 [8:09:28<23:13:01, 3.22it/s] 28%|██▊ | 102587/371472 [8:09:29<23:45:39, 3.14it/s] 28%|██▊ | 102588/371472 [8:09:29<22:49:12, 3.27it/s] 28%|██▊ | 102589/371472 [8:09:29<22:06:49, 3.38it/s] 28%|██▊ | 102590/371472 [8:09:30<21:22:14, 3.49it/s] 28%|██▊ | 102591/371472 [8:09:30<21:28:44, 3.48it/s] 28%|██▊ | 102592/371472 [8:09:30<20:52:24, 3.58it/s] 28%|██▊ | 102593/371472 [8:09:30<20:44:19, 3.60it/s] 28%|██▊ | 102594/371472 [8:09:31<21:20:52, 3.50it/s] 28%|██▊ | 102595/371472 [8:09:31<20:32:56, 3.63it/s] 28%|██▊ | 102596/371472 [8:09:31<20:06:05, 3.72it/s] 28%|██▊ | 102597/371472 [8:09:32<26:02:06, 2.87it/s] 28%|██▊ | 102598/371472 [8:09:32<23:48:40, 3.14it/s] 28%|██▊ | 102599/371472 [8:09:32<22:57:59, 3.25it/s] 28%|██▊ | 102600/371472 [8:09:33<23:49:36, 3.13it/s] {'loss': 3.4928, 'learning_rate': 7.517722855480618e-07, 'epoch': 4.42} + 28%|██▊ | 102600/371472 [8:09:33<23:49:36, 3.13it/s] 28%|██▊ | 102601/371472 [8:09:33<23:58:54, 3.11it/s] 28%|██▊ | 102602/371472 [8:09:33<23:52:37, 3.13it/s] 28%|██▊ | 102603/371472 [8:09:34<22:37:58, 3.30it/s] 28%|██▊ | 102604/371472 [8:09:34<21:52:58, 3.41it/s] 28%|██▊ | 102605/371472 [8:09:34<20:39:09, 3.62it/s] 28%|██▊ | 102606/371472 [8:09:34<20:04:23, 3.72it/s] 28%|██▊ | 102607/371472 [8:09:35<19:53:34, 3.75it/s] 28%|██▊ | 102608/371472 [8:09:35<20:50:00, 3.58it/s] 28%|██▊ | 102609/371472 [8:09:35<22:11:21, 3.37it/s] 28%|██▊ | 102610/371472 [8:09:35<22:01:23, 3.39it/s] 28%|██▊ | 102611/371472 [8:09:36<22:33:52, 3.31it/s] 28%|██▊ | 102612/371472 [8:09:36<21:53:26, 3.41it/s] 28%|██▊ | 102613/371472 [8:09:36<21:48:40, 3.42it/s] 28%|██▊ | 102614/371472 [8:09:37<20:33:38, 3.63it/s] 28%|██▊ | 102615/371472 [8:09:37<20:35:15, 3.63it/s] 28%|██▊ | 102616/371472 [8:09:37<25:49:54, 2.89it/s] 28%|██▊ | 102617/371472 [8:09:38<24:56:50, 2.99it/s] 28%|██▊ | 102618/371472 [8:09:38<22:50:51, 3.27it/s] 28%|██▊ | 102619/371472 [8:09:38<22:28:13, 3.32it/s] 28%|██▊ | 102620/371472 [8:09:38<21:28:14, 3.48it/s] {'loss': 3.6281, 'learning_rate': 7.517238035725828e-07, 'epoch': 4.42} + 28%|██▊ | 102620/371472 [8:09:38<21:28:14, 3.48it/s] 28%|██▊ | 102621/371472 [8:09:39<22:52:07, 3.27it/s] 28%|██▊ | 102622/371472 [8:09:39<22:42:16, 3.29it/s] 28%|██▊ | 102623/371472 [8:09:39<22:14:23, 3.36it/s] 28%|██▊ | 102624/371472 [8:09:40<22:21:04, 3.34it/s] 28%|██▊ | 102625/371472 [8:09:40<22:55:59, 3.26it/s] 28%|██▊ | 102626/371472 [8:09:40<22:18:24, 3.35it/s] 28%|██▊ | 102627/371472 [8:09:41<21:14:24, 3.52it/s] 28%|██▊ | 102628/371472 [8:09:41<21:38:06, 3.45it/s] 28%|██▊ | 102629/371472 [8:09:41<21:40:38, 3.45it/s] 28%|██▊ | 102630/371472 [8:09:41<21:03:08, 3.55it/s] 28%|██▊ | 102631/371472 [8:09:42<20:40:44, 3.61it/s] 28%|██▊ | 102632/371472 [8:09:42<20:30:13, 3.64it/s] 28%|██▊ | 102633/371472 [8:09:42<19:40:26, 3.80it/s] 28%|██▊ | 102634/371472 [8:09:42<20:04:20, 3.72it/s] 28%|██▊ | 102635/371472 [8:09:43<20:21:21, 3.67it/s] 28%|██▊ | 102636/371472 [8:09:43<20:12:32, 3.70it/s] 28%|██▊ | 102637/371472 [8:09:43<19:59:53, 3.73it/s] 28%|██▊ | 102638/371472 [8:09:44<21:04:07, 3.54it/s] 28%|██▊ | 102639/371472 [8:09:44<20:32:05, 3.64it/s] 28%|██▊ | 102640/371472 [8:09:44<21:09:40, 3.53it/s] {'loss': 3.4549, 'learning_rate': 7.51675321597104e-07, 'epoch': 4.42} + 28%|██▊ | 102640/371472 [8:09:44<21:09:40, 3.53it/s] 28%|██▊ | 102641/371472 [8:09:44<22:07:26, 3.38it/s] 28%|██▊ | 102642/371472 [8:09:45<21:47:23, 3.43it/s] 28%|██▊ | 102643/371472 [8:09:45<21:48:36, 3.42it/s] 28%|██▊ | 102644/371472 [8:09:45<21:04:42, 3.54it/s] 28%|██▊ | 102645/371472 [8:09:46<20:20:37, 3.67it/s] 28%|██▊ | 102646/371472 [8:09:46<19:59:28, 3.74it/s] 28%|██▊ | 102647/371472 [8:09:46<20:13:20, 3.69it/s] 28%|██▊ | 102648/371472 [8:09:46<20:43:47, 3.60it/s] 28%|██▊ | 102649/371472 [8:09:47<20:14:12, 3.69it/s] 28%|██▊ | 102650/371472 [8:09:47<20:25:07, 3.66it/s] 28%|██▊ | 102651/371472 [8:09:47<20:30:58, 3.64it/s] 28%|██▊ | 102652/371472 [8:09:47<20:17:23, 3.68it/s] 28%|██▊ | 102653/371472 [8:09:48<20:56:41, 3.57it/s] 28%|██▊ | 102654/371472 [8:09:48<20:29:33, 3.64it/s] 28%|██▊ | 102655/371472 [8:09:48<21:17:48, 3.51it/s] 28%|██▊ | 102656/371472 [8:09:49<21:15:02, 3.51it/s] 28%|██▊ | 102657/371472 [8:09:49<21:52:00, 3.41it/s] 28%|██▊ | 102658/371472 [8:09:49<21:41:22, 3.44it/s] 28%|██▊ | 102659/371472 [8:09:49<21:29:42, 3.47it/s] 28%|██▊ | 102660/371472 [8:09:50<21:55:35, 3.41it/s] {'loss': 3.5594, 'learning_rate': 7.516268396216252e-07, 'epoch': 4.42} + 28%|██▊ | 102660/371472 [8:09:50<21:55:35, 3.41it/s] 28%|██▊ | 102661/371472 [8:09:50<25:21:37, 2.94it/s] 28%|██▊ | 102662/371472 [8:09:51<24:40:44, 3.03it/s] 28%|██▊ | 102663/371472 [8:09:51<23:16:35, 3.21it/s] 28%|██▊ | 102664/371472 [8:09:51<22:06:14, 3.38it/s] 28%|██▊ | 102665/371472 [8:09:51<21:51:43, 3.42it/s] 28%|██▊ | 102666/371472 [8:09:52<21:32:34, 3.47it/s] 28%|██▊ | 102667/371472 [8:09:52<21:38:40, 3.45it/s] 28%|██▊ | 102668/371472 [8:09:52<22:03:45, 3.38it/s] 28%|██▊ | 102669/371472 [8:09:53<21:47:19, 3.43it/s] 28%|██▊ | 102670/371472 [8:09:53<21:27:04, 3.48it/s] 28%|██▊ | 102671/371472 [8:09:53<21:31:47, 3.47it/s] 28%|██▊ | 102672/371472 [8:09:53<21:26:49, 3.48it/s] 28%|██▊ | 102673/371472 [8:09:54<21:35:10, 3.46it/s] 28%|██▊ | 102674/371472 [8:09:54<23:12:32, 3.22it/s] 28%|██▊ | 102675/371472 [8:09:54<23:23:10, 3.19it/s] 28%|██▊ | 102676/371472 [8:09:55<25:32:32, 2.92it/s] 28%|██▊ | 102677/371472 [8:09:55<24:05:19, 3.10it/s] 28%|██▊ | 102678/371472 [8:09:55<23:25:22, 3.19it/s] 28%|██▊ | 102679/371472 [8:09:56<22:34:12, 3.31it/s] 28%|██▊ | 102680/371472 [8:09:56<22:04:09, 3.38it/s] {'loss': 3.4348, 'learning_rate': 7.515783576461461e-07, 'epoch': 4.42} + 28%|██▊ | 102680/371472 [8:09:56<22:04:09, 3.38it/s] 28%|██▊ | 102681/371472 [8:09:56<22:17:40, 3.35it/s] 28%|██▊ | 102682/371472 [8:09:57<22:47:30, 3.28it/s] 28%|██▊ | 102683/371472 [8:09:57<23:41:36, 3.15it/s] 28%|██▊ | 102684/371472 [8:09:57<23:07:27, 3.23it/s] 28%|██▊ | 102685/371472 [8:09:57<22:31:27, 3.31it/s] 28%|██▊ | 102686/371472 [8:09:58<22:27:48, 3.32it/s] 28%|██▊ | 102687/371472 [8:09:58<22:24:06, 3.33it/s] 28%|██▊ | 102688/371472 [8:09:58<22:50:20, 3.27it/s] 28%|██▊ | 102689/371472 [8:09:59<22:06:09, 3.38it/s] 28%|██▊ | 102690/371472 [8:09:59<21:59:52, 3.39it/s] 28%|██▊ | 102691/371472 [8:09:59<21:48:04, 3.42it/s] 28%|██▊ | 102692/371472 [8:09:59<21:27:22, 3.48it/s] 28%|██▊ | 102693/371472 [8:10:00<24:41:28, 3.02it/s] 28%|██▊ | 102694/371472 [8:10:00<23:16:22, 3.21it/s] 28%|██▊ | 102695/371472 [8:10:00<22:32:38, 3.31it/s] 28%|██▊ | 102696/371472 [8:10:01<21:22:28, 3.49it/s] 28%|██▊ | 102697/371472 [8:10:01<20:53:08, 3.57it/s] 28%|██▊ | 102698/371472 [8:10:01<23:08:21, 3.23it/s] 28%|██▊ | 102699/371472 [8:10:02<21:59:38, 3.39it/s] 28%|██▊ | 102700/371472 [8:10:02<21:55:04, 3.41it/s] {'loss': 3.4455, 'learning_rate': 7.515298756706672e-07, 'epoch': 4.42} + 28%|██▊ | 102700/371472 [8:10:02<21:55:04, 3.41it/s] 28%|██▊ | 102701/371472 [8:10:02<21:31:45, 3.47it/s] 28%|██▊ | 102702/371472 [8:10:02<21:01:58, 3.55it/s] 28%|██▊ | 102703/371472 [8:10:03<20:54:13, 3.57it/s] 28%|██▊ | 102704/371472 [8:10:03<20:29:54, 3.64it/s] 28%|██▊ | 102705/371472 [8:10:03<24:33:39, 3.04it/s] 28%|██▊ | 102706/371472 [8:10:04<23:06:23, 3.23it/s] 28%|██▊ | 102707/371472 [8:10:04<23:52:32, 3.13it/s] 28%|██▊ | 102708/371472 [8:10:04<22:35:21, 3.30it/s] 28%|██▊ | 102709/371472 [8:10:05<21:32:13, 3.47it/s] 28%|██▊ | 102710/371472 [8:10:05<21:11:40, 3.52it/s] 28%|██▊ | 102711/371472 [8:10:05<20:57:31, 3.56it/s] 28%|██▊ | 102712/371472 [8:10:05<20:43:56, 3.60it/s] 28%|██▊ | 102713/371472 [8:10:06<21:30:46, 3.47it/s] 28%|██▊ | 102714/371472 [8:10:06<20:48:46, 3.59it/s] 28%|██▊ | 102715/371472 [8:10:06<20:26:19, 3.65it/s] 28%|██▊ | 102716/371472 [8:10:07<21:22:30, 3.49it/s] 28%|██▊ | 102717/371472 [8:10:07<20:41:39, 3.61it/s] 28%|██▊ | 102718/371472 [8:10:07<20:42:46, 3.60it/s] 28%|██▊ | 102719/371472 [8:10:07<21:30:52, 3.47it/s] 28%|██▊ | 102720/371472 [8:10:08<21:13:33, 3.52it/s] {'loss': 3.5326, 'learning_rate': 7.514813936951884e-07, 'epoch': 4.42} + 28%|██▊ | 102720/371472 [8:10:08<21:13:33, 3.52it/s] 28%|██▊ | 102721/371472 [8:10:08<22:08:01, 3.37it/s] 28%|██▊ | 102722/371472 [8:10:08<21:54:20, 3.41it/s] 28%|██▊ | 102723/371472 [8:10:09<22:01:53, 3.39it/s] 28%|██▊ | 102724/371472 [8:10:09<21:52:16, 3.41it/s] 28%|██▊ | 102725/371472 [8:10:09<22:49:11, 3.27it/s] 28%|██▊ | 102726/371472 [8:10:09<22:13:11, 3.36it/s] 28%|██▊ | 102727/371472 [8:10:10<21:16:21, 3.51it/s] 28%|██▊ | 102728/371472 [8:10:10<21:17:06, 3.51it/s] 28%|██▊ | 102729/371472 [8:10:10<20:33:27, 3.63it/s] 28%|██▊ | 102730/371472 [8:10:11<21:02:09, 3.55it/s] 28%|██▊ | 102731/371472 [8:10:11<21:25:57, 3.48it/s] 28%|██▊ | 102732/371472 [8:10:11<21:50:07, 3.42it/s] 28%|██▊ | 102733/371472 [8:10:11<20:48:51, 3.59it/s] 28%|██▊ | 102734/371472 [8:10:12<21:38:21, 3.45it/s] 28%|██▊ | 102735/371472 [8:10:12<21:50:45, 3.42it/s] 28%|██▊ | 102736/371472 [8:10:12<21:02:47, 3.55it/s] 28%|██▊ | 102737/371472 [8:10:13<21:12:35, 3.52it/s] 28%|██▊ | 102738/371472 [8:10:13<20:34:15, 3.63it/s] 28%|██▊ | 102739/371472 [8:10:13<20:46:19, 3.59it/s] 28%|██▊ | 102740/371472 [8:10:13<20:31:36, 3.64it/s] {'loss': 3.8389, 'learning_rate': 7.514329117197096e-07, 'epoch': 4.43} + 28%|██▊ | 102740/371472 [8:10:13<20:31:36, 3.64it/s] 28%|██▊ | 102741/371472 [8:10:14<20:23:16, 3.66it/s] 28%|██▊ | 102742/371472 [8:10:14<20:20:39, 3.67it/s] 28%|██▊ | 102743/371472 [8:10:14<19:37:14, 3.80it/s] 28%|██▊ | 102744/371472 [8:10:15<21:44:40, 3.43it/s] 28%|██▊ | 102745/371472 [8:10:15<22:09:37, 3.37it/s] 28%|██▊ | 102746/371472 [8:10:15<21:23:04, 3.49it/s] 28%|██▊ | 102747/371472 [8:10:15<21:08:14, 3.53it/s] 28%|██▊ | 102748/371472 [8:10:16<20:45:46, 3.60it/s] 28%|██▊ | 102749/371472 [8:10:16<20:27:30, 3.65it/s] 28%|██▊ | 102750/371472 [8:10:16<20:45:18, 3.60it/s] 28%|██▊ | 102751/371472 [8:10:16<20:37:46, 3.62it/s] 28%|██▊ | 102752/371472 [8:10:17<19:52:34, 3.76it/s] 28%|██▊ | 102753/371472 [8:10:17<19:43:15, 3.79it/s] 28%|██▊ | 102754/371472 [8:10:17<19:43:36, 3.78it/s] 28%|██▊ | 102755/371472 [8:10:17<19:57:26, 3.74it/s] 28%|██▊ | 102756/371472 [8:10:18<20:01:02, 3.73it/s] 28%|██▊ | 102757/371472 [8:10:18<20:31:18, 3.64it/s] 28%|██▊ | 102758/371472 [8:10:18<20:57:29, 3.56it/s] 28%|██▊ | 102759/371472 [8:10:19<20:37:23, 3.62it/s] 28%|██▊ | 102760/371472 [8:10:19<20:01:49, 3.73it/s] {'loss': 3.6775, 'learning_rate': 7.513844297442306e-07, 'epoch': 4.43} + 28%|██▊ | 102760/371472 [8:10:19<20:01:49, 3.73it/s] 28%|██▊ | 102761/371472 [8:10:19<19:57:46, 3.74it/s] 28%|██▊ | 102762/371472 [8:10:19<20:11:38, 3.70it/s] 28%|██▊ | 102763/371472 [8:10:20<20:23:14, 3.66it/s] 28%|██▊ | 102764/371472 [8:10:20<20:01:58, 3.73it/s] 28%|██▊ | 102765/371472 [8:10:20<20:00:35, 3.73it/s] 28%|██▊ | 102766/371472 [8:10:21<20:33:36, 3.63it/s] 28%|██▊ | 102767/371472 [8:10:21<20:40:43, 3.61it/s] 28%|██▊ | 102768/371472 [8:10:21<21:05:31, 3.54it/s] 28%|██▊ | 102769/371472 [8:10:21<22:10:15, 3.37it/s] 28%|██▊ | 102770/371472 [8:10:22<21:17:03, 3.51it/s] 28%|██▊ | 102771/371472 [8:10:22<20:47:35, 3.59it/s] 28%|██▊ | 102772/371472 [8:10:22<20:06:28, 3.71it/s] 28%|██▊ | 102773/371472 [8:10:22<20:36:59, 3.62it/s] 28%|██▊ | 102774/371472 [8:10:23<20:30:49, 3.64it/s] 28%|██▊ | 102775/371472 [8:10:23<20:26:52, 3.65it/s] 28%|██▊ | 102776/371472 [8:10:23<20:37:23, 3.62it/s] 28%|██▊ | 102777/371472 [8:10:24<21:30:04, 3.47it/s] 28%|██▊ | 102778/371472 [8:10:24<22:02:38, 3.39it/s] 28%|██▊ | 102779/371472 [8:10:24<21:47:46, 3.42it/s] 28%|██▊ | 102780/371472 [8:10:25<23:06:08, 3.23it/s] {'loss': 3.6713, 'learning_rate': 7.513359477687517e-07, 'epoch': 4.43} + 28%|██▊ | 102780/371472 [8:10:25<23:06:08, 3.23it/s] 28%|██▊ | 102781/371472 [8:10:25<21:57:52, 3.40it/s] 28%|██▊ | 102782/371472 [8:10:25<21:09:47, 3.53it/s] 28%|██▊ | 102783/371472 [8:10:25<22:16:26, 3.35it/s] 28%|██▊ | 102784/371472 [8:10:26<21:19:25, 3.50it/s] 28%|██▊ | 102785/371472 [8:10:26<22:29:41, 3.32it/s] 28%|██▊ | 102786/371472 [8:10:26<21:25:19, 3.48it/s] 28%|██▊ | 102787/371472 [8:10:27<22:18:45, 3.34it/s] 28%|██▊ | 102788/371472 [8:10:27<21:03:03, 3.55it/s] 28%|██▊ | 102789/371472 [8:10:27<21:17:27, 3.51it/s] 28%|██▊ | 102790/371472 [8:10:27<22:38:40, 3.30it/s] 28%|██▊ | 102791/371472 [8:10:28<21:51:02, 3.42it/s] 28%|██▊ | 102792/371472 [8:10:28<22:57:20, 3.25it/s] 28%|██▊ | 102793/371472 [8:10:28<22:17:59, 3.35it/s] 28%|██▊ | 102794/371472 [8:10:29<22:20:18, 3.34it/s] 28%|██▊ | 102795/371472 [8:10:29<22:09:10, 3.37it/s] 28%|██▊ | 102796/371472 [8:10:29<22:11:21, 3.36it/s] 28%|██▊ | 102797/371472 [8:10:30<22:57:46, 3.25it/s] 28%|██▊ | 102798/371472 [8:10:30<21:52:28, 3.41it/s] 28%|██▊ | 102799/371472 [8:10:30<21:48:14, 3.42it/s] 28%|██▊ | 102800/371472 [8:10:30<21:10:35, 3.52it/s] {'loss': 3.5122, 'learning_rate': 7.512874657932729e-07, 'epoch': 4.43} + 28%|██▊ | 102800/371472 [8:10:30<21:10:35, 3.52it/s] 28%|██▊ | 102801/371472 [8:10:31<20:45:24, 3.60it/s] 28%|██▊ | 102802/371472 [8:10:31<20:34:41, 3.63it/s] 28%|██▊ | 102803/371472 [8:10:31<20:16:35, 3.68it/s] 28%|██▊ | 102804/371472 [8:10:31<20:19:02, 3.67it/s] 28%|██▊ | 102805/371472 [8:10:32<21:22:13, 3.49it/s] 28%|██▊ | 102806/371472 [8:10:32<20:55:53, 3.57it/s] 28%|██▊ | 102807/371472 [8:10:32<20:39:53, 3.61it/s] 28%|██▊ | 102808/371472 [8:10:33<21:20:36, 3.50it/s] 28%|██▊ | 102809/371472 [8:10:33<20:39:33, 3.61it/s] 28%|██▊ | 102810/371472 [8:10:33<20:04:52, 3.72it/s] 28%|██▊ | 102811/371472 [8:10:33<20:05:25, 3.71it/s] 28%|██▊ | 102812/371472 [8:10:34<20:02:47, 3.72it/s] 28%|██▊ | 102813/371472 [8:10:34<23:05:02, 3.23it/s] 28%|██▊ | 102814/371472 [8:10:34<22:02:05, 3.39it/s] 28%|██▊ | 102815/371472 [8:10:35<21:20:49, 3.50it/s] 28%|██▊ | 102816/371472 [8:10:35<20:53:18, 3.57it/s] 28%|██▊ | 102817/371472 [8:10:35<20:43:45, 3.60it/s] 28%|██▊ | 102818/371472 [8:10:35<21:11:51, 3.52it/s] 28%|██▊ | 102819/371472 [8:10:36<20:21:25, 3.67it/s] 28%|██▊ | 102820/371472 [8:10:36<20:27:22, 3.65it/s] {'loss': 3.4725, 'learning_rate': 7.51238983817794e-07, 'epoch': 4.43} + 28%|██▊ | 102820/371472 [8:10:36<20:27:22, 3.65it/s] 28%|██▊ | 102821/371472 [8:10:36<21:09:19, 3.53it/s] 28%|██▊ | 102822/371472 [8:10:37<21:04:06, 3.54it/s] 28%|██▊ | 102823/371472 [8:10:37<20:57:57, 3.56it/s] 28%|██▊ | 102824/371472 [8:10:37<20:21:07, 3.67it/s] 28%|██▊ | 102825/371472 [8:10:37<20:20:22, 3.67it/s] 28%|██▊ | 102826/371472 [8:10:38<19:53:27, 3.75it/s] 28%|██▊ | 102827/371472 [8:10:38<19:45:06, 3.78it/s] 28%|██▊ | 102828/371472 [8:10:38<19:42:43, 3.79it/s] 28%|██▊ | 102829/371472 [8:10:38<20:18:07, 3.68it/s] 28%|██▊ | 102830/371472 [8:10:39<19:27:55, 3.83it/s] 28%|██▊ | 102831/371472 [8:10:39<19:59:48, 3.73it/s] 28%|██▊ | 102832/371472 [8:10:39<19:49:43, 3.76it/s] 28%|██▊ | 102833/371472 [8:10:40<21:14:59, 3.51it/s] 28%|██▊ | 102834/371472 [8:10:40<20:34:31, 3.63it/s] 28%|██▊ | 102835/371472 [8:10:40<23:55:23, 3.12it/s] 28%|██▊ | 102836/371472 [8:10:40<23:05:21, 3.23it/s] 28%|██▊ | 102837/371472 [8:10:41<22:52:26, 3.26it/s] 28%|██▊ | 102838/371472 [8:10:41<22:31:30, 3.31it/s] 28%|██▊ | 102839/371472 [8:10:41<21:19:23, 3.50it/s] 28%|██▊ | 102840/371472 [8:10:42<20:30:10, 3.64it/s] {'loss': 3.4536, 'learning_rate': 7.511905018423149e-07, 'epoch': 4.43} + 28%|██▊ | 102840/371472 [8:10:42<20:30:10, 3.64it/s] 28%|██▊ | 102841/371472 [8:10:42<20:40:34, 3.61it/s] 28%|██▊ | 102842/371472 [8:10:42<21:17:39, 3.50it/s] 28%|██▊ | 102843/371472 [8:10:42<20:43:57, 3.60it/s] 28%|██▊ | 102844/371472 [8:10:43<20:39:58, 3.61it/s] 28%|██▊ | 102845/371472 [8:10:43<21:09:48, 3.53it/s] 28%|██▊ | 102846/371472 [8:10:43<20:34:50, 3.63it/s] 28%|██▊ | 102847/371472 [8:10:44<20:45:54, 3.59it/s] 28%|██▊ | 102848/371472 [8:10:44<19:53:23, 3.75it/s] 28%|██▊ | 102849/371472 [8:10:44<20:14:05, 3.69it/s] 28%|██▊ | 102850/371472 [8:10:44<20:04:41, 3.72it/s] 28%|██▊ | 102851/371472 [8:10:45<22:32:51, 3.31it/s] 28%|██▊ | 102852/371472 [8:10:45<21:30:15, 3.47it/s] 28%|██▊ | 102853/371472 [8:10:45<21:51:43, 3.41it/s] 28%|██▊ | 102854/371472 [8:10:46<22:04:07, 3.38it/s] 28%|██▊ | 102855/371472 [8:10:46<22:51:13, 3.26it/s] 28%|██▊ | 102856/371472 [8:10:46<22:56:37, 3.25it/s] 28%|██▊ | 102857/371472 [8:10:46<22:34:48, 3.30it/s] 28%|██▊ | 102858/371472 [8:10:47<21:36:57, 3.45it/s] 28%|██▊ | 102859/371472 [8:10:47<20:42:06, 3.60it/s] 28%|██▊ | 102860/371472 [8:10:47<20:51:56, 3.58it/s] {'loss': 3.4621, 'learning_rate': 7.511420198668361e-07, 'epoch': 4.43} + 28%|██▊ | 102860/371472 [8:10:47<20:51:56, 3.58it/s] 28%|██▊ | 102861/371472 [8:10:48<21:25:22, 3.48it/s] 28%|██▊ | 102862/371472 [8:10:48<21:20:58, 3.49it/s] 28%|██▊ | 102863/371472 [8:10:48<21:22:14, 3.49it/s] 28%|██▊ | 102864/371472 [8:10:48<21:50:48, 3.42it/s] 28%|██▊ | 102865/371472 [8:10:49<21:31:10, 3.47it/s] 28%|██▊ | 102866/371472 [8:10:49<21:21:31, 3.49it/s] 28%|██▊ | 102867/371472 [8:10:49<21:17:15, 3.50it/s] 28%|██▊ | 102868/371472 [8:10:50<21:53:11, 3.41it/s] 28%|██▊ | 102869/371472 [8:10:50<23:49:21, 3.13it/s] 28%|██▊ | 102870/371472 [8:10:50<23:49:22, 3.13it/s] 28%|██▊ | 102871/371472 [8:10:51<22:20:04, 3.34it/s] 28%|██▊ | 102872/371472 [8:10:51<21:47:46, 3.42it/s] 28%|██▊ | 102873/371472 [8:10:51<20:59:06, 3.56it/s] 28%|██▊ | 102874/371472 [8:10:51<21:09:00, 3.53it/s] 28%|██▊ | 102875/371472 [8:10:52<21:00:48, 3.55it/s] 28%|██▊ | 102876/371472 [8:10:52<20:42:35, 3.60it/s] 28%|██▊ | 102877/371472 [8:10:52<20:30:47, 3.64it/s] 28%|██▊ | 102878/371472 [8:10:52<20:14:56, 3.68it/s] 28%|██▊ | 102879/371472 [8:10:53<20:09:24, 3.70it/s] 28%|██▊ | 102880/371472 [8:10:53<19:32:27, 3.82it/s] {'loss': 3.6124, 'learning_rate': 7.510935378913573e-07, 'epoch': 4.43} + 28%|██▊ | 102880/371472 [8:10:53<19:32:27, 3.82it/s] 28%|██▊ | 102881/371472 [8:10:53<19:37:16, 3.80it/s] 28%|██▊ | 102882/371472 [8:10:54<22:02:10, 3.39it/s] 28%|██▊ | 102883/371472 [8:10:54<21:42:49, 3.44it/s] 28%|██▊ | 102884/371472 [8:10:54<21:44:23, 3.43it/s] 28%|██▊ | 102885/371472 [8:10:54<21:12:49, 3.52it/s] 28%|██▊ | 102886/371472 [8:10:55<21:16:12, 3.51it/s] 28%|██▊ | 102887/371472 [8:10:55<20:41:37, 3.61it/s] 28%|██▊ | 102888/371472 [8:10:55<20:54:43, 3.57it/s] 28%|██▊ | 102889/371472 [8:10:56<20:57:46, 3.56it/s] 28%|██▊ | 102890/371472 [8:10:56<21:34:49, 3.46it/s] 28%|██▊ | 102891/371472 [8:10:56<21:26:22, 3.48it/s] 28%|██▊ | 102892/371472 [8:10:56<20:51:22, 3.58it/s] 28%|██▊ | 102893/371472 [8:10:57<20:35:19, 3.62it/s] 28%|██▊ | 102894/371472 [8:10:57<19:44:26, 3.78it/s] 28%|██▊ | 102895/371472 [8:10:57<20:20:11, 3.67it/s] 28%|██▊ | 102896/371472 [8:10:57<20:02:28, 3.72it/s] 28%|██▊ | 102897/371472 [8:10:58<19:06:25, 3.90it/s] 28%|██▊ | 102898/371472 [8:10:58<20:08:42, 3.70it/s] 28%|██▊ | 102899/371472 [8:10:58<21:03:56, 3.54it/s] 28%|██▊ | 102900/371472 [8:10:59<24:05:47, 3.10it/s] {'loss': 3.4475, 'learning_rate': 7.510450559158785e-07, 'epoch': 4.43} + 28%|██▊ | 102900/371472 [8:10:59<24:05:47, 3.10it/s] 28%|██▊ | 102901/371472 [8:10:59<22:48:57, 3.27it/s] 28%|██▊ | 102902/371472 [8:10:59<22:24:19, 3.33it/s] 28%|██▊ | 102903/371472 [8:11:00<22:32:32, 3.31it/s] 28%|██▊ | 102904/371472 [8:11:00<22:46:05, 3.28it/s] 28%|██▊ | 102905/371472 [8:11:00<22:10:30, 3.36it/s] 28%|██▊ | 102906/371472 [8:11:01<23:16:19, 3.21it/s] 28%|██▊ | 102907/371472 [8:11:01<23:04:27, 3.23it/s] 28%|██▊ | 102908/371472 [8:11:01<23:34:09, 3.17it/s] 28%|██▊ | 102909/371472 [8:11:01<23:25:05, 3.19it/s] 28%|██▊ | 102910/371472 [8:11:02<21:51:55, 3.41it/s] 28%|██▊ | 102911/371472 [8:11:02<23:00:14, 3.24it/s] 28%|██▊ | 102912/371472 [8:11:02<21:25:00, 3.48it/s] 28%|██▊ | 102913/371472 [8:11:03<20:39:49, 3.61it/s] 28%|██▊ | 102914/371472 [8:11:03<20:32:47, 3.63it/s] 28%|██▊ | 102915/371472 [8:11:03<21:18:32, 3.50it/s] 28%|██▊ | 102916/371472 [8:11:03<20:55:42, 3.56it/s] 28%|██▊ | 102917/371472 [8:11:04<21:43:26, 3.43it/s] 28%|██▊ | 102918/371472 [8:11:04<21:39:15, 3.44it/s] 28%|██▊ | 102919/371472 [8:11:04<21:26:54, 3.48it/s] 28%|██▊ | 102920/371472 [8:11:05<24:52:05, 3.00it/s] {'loss': 3.5734, 'learning_rate': 7.509965739403994e-07, 'epoch': 4.43} + 28%|██▊ | 102920/371472 [8:11:05<24:52:05, 3.00it/s] 28%|██▊ | 102921/371472 [8:11:05<23:12:12, 3.21it/s] 28%|██▊ | 102922/371472 [8:11:05<23:11:26, 3.22it/s] 28%|██▊ | 102923/371472 [8:11:06<22:30:21, 3.31it/s] 28%|██▊ | 102924/371472 [8:11:06<22:26:45, 3.32it/s] 28%|██▊ | 102925/371472 [8:11:06<21:31:25, 3.47it/s] 28%|██▊ | 102926/371472 [8:11:06<21:44:02, 3.43it/s] 28%|██▊ | 102927/371472 [8:11:07<23:09:07, 3.22it/s] 28%|██▊ | 102928/371472 [8:11:07<22:50:22, 3.27it/s] 28%|██▊ | 102929/371472 [8:11:07<24:30:44, 3.04it/s] 28%|██▊ | 102930/371472 [8:11:08<23:44:52, 3.14it/s] 28%|██▊ | 102931/371472 [8:11:08<24:13:45, 3.08it/s] 28%|██▊ | 102932/371472 [8:11:08<24:36:34, 3.03it/s] 28%|██▊ | 102933/371472 [8:11:09<23:30:27, 3.17it/s] 28%|██▊ | 102934/371472 [8:11:09<23:54:34, 3.12it/s] 28%|██▊ | 102935/371472 [8:11:09<23:03:06, 3.24it/s] 28%|██▊ | 102936/371472 [8:11:10<23:02:40, 3.24it/s] 28%|██▊ | 102937/371472 [8:11:10<22:28:40, 3.32it/s] 28%|██▊ | 102938/371472 [8:11:10<22:02:26, 3.38it/s] 28%|██▊ | 102939/371472 [8:11:11<22:23:02, 3.33it/s] 28%|██▊ | 102940/371472 [8:11:11<24:43:27, 3.02it/s] {'loss': 3.3826, 'learning_rate': 7.509480919649205e-07, 'epoch': 4.43} + 28%|██▊ | 102940/371472 [8:11:11<24:43:27, 3.02it/s] 28%|██▊ | 102941/371472 [8:11:11<23:22:39, 3.19it/s] 28%|██▊ | 102942/371472 [8:11:12<23:02:15, 3.24it/s] 28%|██▊ | 102943/371472 [8:11:12<25:10:23, 2.96it/s] 28%|██▊ | 102944/371472 [8:11:12<23:22:51, 3.19it/s] 28%|██▊ | 102945/371472 [8:11:12<22:26:45, 3.32it/s] 28%|██▊ | 102946/371472 [8:11:13<21:25:33, 3.48it/s] 28%|██▊ | 102947/371472 [8:11:13<21:30:42, 3.47it/s] 28%|██▊ | 102948/371472 [8:11:13<20:54:46, 3.57it/s] 28%|██▊ | 102949/371472 [8:11:14<21:04:03, 3.54it/s] 28%|██▊ | 102950/371472 [8:11:14<20:43:20, 3.60it/s] 28%|██▊ | 102951/371472 [8:11:14<20:22:26, 3.66it/s] 28%|██▊ | 102952/371472 [8:11:14<21:10:49, 3.52it/s] 28%|██▊ | 102953/371472 [8:11:15<20:43:55, 3.60it/s] 28%|██▊ | 102954/371472 [8:11:15<20:36:10, 3.62it/s] 28%|██▊ | 102955/371472 [8:11:15<19:53:40, 3.75it/s] 28%|██▊ | 102956/371472 [8:11:15<20:39:09, 3.61it/s] 28%|██▊ | 102957/371472 [8:11:16<20:50:11, 3.58it/s] 28%|██▊ | 102958/371472 [8:11:16<21:34:45, 3.46it/s] 28%|██▊ | 102959/371472 [8:11:16<22:25:49, 3.33it/s] 28%|██▊ | 102960/371472 [8:11:17<21:24:37, 3.48it/s] {'loss': 3.6288, 'learning_rate': 7.508996099894417e-07, 'epoch': 4.43} + 28%|██▊ | 102960/371472 [8:11:17<21:24:37, 3.48it/s] 28%|██▊ | 102961/371472 [8:11:17<20:51:17, 3.58it/s] 28%|██▊ | 102962/371472 [8:11:17<21:42:38, 3.44it/s] 28%|██▊ | 102963/371472 [8:11:18<21:53:27, 3.41it/s] 28%|██��� | 102964/371472 [8:11:18<22:21:43, 3.34it/s] 28%|██▊ | 102965/371472 [8:11:18<21:40:03, 3.44it/s] 28%|██▊ | 102966/371472 [8:11:18<22:18:49, 3.34it/s] 28%|██▊ | 102967/371472 [8:11:19<22:06:41, 3.37it/s] 28%|██▊ | 102968/371472 [8:11:19<21:26:01, 3.48it/s] 28%|██▊ | 102969/371472 [8:11:19<21:34:27, 3.46it/s] 28%|██▊ | 102970/371472 [8:11:20<20:40:23, 3.61it/s] 28%|██▊ | 102971/371472 [8:11:20<20:08:02, 3.70it/s] 28%|██▊ | 102972/371472 [8:11:20<21:02:00, 3.55it/s] 28%|██▊ | 102973/371472 [8:11:20<22:19:42, 3.34it/s] 28%|██▊ | 102974/371472 [8:11:21<21:34:38, 3.46it/s] 28%|██▊ | 102975/371472 [8:11:21<21:50:53, 3.41it/s] 28%|██▊ | 102976/371472 [8:11:21<20:37:10, 3.62it/s] 28%|██▊ | 102977/371472 [8:11:21<20:22:15, 3.66it/s] 28%|██▊ | 102978/371472 [8:11:22<19:59:20, 3.73it/s] 28%|██▊ | 102979/371472 [8:11:22<20:53:22, 3.57it/s] 28%|██▊ | 102980/371472 [8:11:22<20:38:24, 3.61it/s] {'loss': 3.5509, 'learning_rate': 7.508511280139627e-07, 'epoch': 4.44} + 28%|██▊ | 102980/371472 [8:11:22<20:38:24, 3.61it/s] 28%|██▊ | 102981/371472 [8:11:23<20:55:50, 3.56it/s] 28%|██▊ | 102982/371472 [8:11:23<21:52:09, 3.41it/s] 28%|██▊ | 102983/371472 [8:11:23<21:52:34, 3.41it/s] 28%|██▊ | 102984/371472 [8:11:24<21:56:08, 3.40it/s] 28%|██▊ | 102985/371472 [8:11:24<21:28:05, 3.47it/s] 28%|██▊ | 102986/371472 [8:11:24<21:36:10, 3.45it/s] 28%|██▊ | 102987/371472 [8:11:24<21:46:55, 3.42it/s] 28%|██▊ | 102988/371472 [8:11:25<21:37:24, 3.45it/s] 28%|██▊ | 102989/371472 [8:11:25<20:46:07, 3.59it/s] 28%|██▊ | 102990/371472 [8:11:25<20:19:52, 3.67it/s] 28%|██▊ | 102991/371472 [8:11:25<20:23:12, 3.66it/s] 28%|██▊ | 102992/371472 [8:11:26<19:49:55, 3.76it/s] 28%|██▊ | 102993/371472 [8:11:26<22:38:23, 3.29it/s] 28%|██▊ | 102994/371472 [8:11:26<22:46:00, 3.28it/s] 28%|██▊ | 102995/371472 [8:11:27<22:07:56, 3.37it/s] 28%|██▊ | 102996/371472 [8:11:27<21:26:46, 3.48it/s] 28%|██▊ | 102997/371472 [8:11:27<21:03:57, 3.54it/s] 28%|██▊ | 102998/371472 [8:11:27<20:51:08, 3.58it/s] 28%|██▊ | 102999/371472 [8:11:28<19:56:16, 3.74it/s] 28%|██▊ | 103000/371472 [8:11:28<21:21:24, 3.49it/s] {'loss': 3.5255, 'learning_rate': 7.508026460384838e-07, 'epoch': 4.44} + 28%|██▊ | 103000/371472 [8:11:28<21:21:24, 3.49it/s] 28%|██▊ | 103001/371472 [8:11:28<22:05:08, 3.38it/s] 28%|██▊ | 103002/371472 [8:11:29<21:10:08, 3.52it/s] 28%|██▊ | 103003/371472 [8:11:29<21:07:57, 3.53it/s] 28%|██▊ | 103004/371472 [8:11:29<22:10:02, 3.36it/s] 28%|██▊ | 103005/371472 [8:11:30<21:24:38, 3.48it/s] 28%|██▊ | 103006/371472 [8:11:30<22:16:09, 3.35it/s] 28%|██▊ | 103007/371472 [8:11:30<21:42:26, 3.44it/s] 28%|██▊ | 103008/371472 [8:11:30<21:35:11, 3.45it/s] 28%|██▊ | 103009/371472 [8:11:31<21:25:48, 3.48it/s] 28%|██▊ | 103010/371472 [8:11:31<21:31:50, 3.46it/s] 28%|██▊ | 103011/371472 [8:11:31<20:50:51, 3.58it/s] 28%|██▊ | 103012/371472 [8:11:31<20:24:48, 3.65it/s] 28%|██▊ | 103013/371472 [8:11:32<20:07:48, 3.70it/s] 28%|██▊ | 103014/371472 [8:11:32<20:57:05, 3.56it/s] 28%|██▊ | 103015/371472 [8:11:32<21:01:14, 3.55it/s] 28%|██▊ | 103016/371472 [8:11:33<19:58:03, 3.73it/s] 28%|██▊ | 103017/371472 [8:11:33<19:21:08, 3.85it/s] 28%|██▊ | 103018/371472 [8:11:33<19:37:06, 3.80it/s] 28%|██▊ | 103019/371472 [8:11:33<19:04:16, 3.91it/s] 28%|██▊ | 103020/371472 [8:11:34<19:18:50, 3.86it/s] {'loss': 3.4593, 'learning_rate': 7.50754164063005e-07, 'epoch': 4.44} + 28%|██▊ | 103020/371472 [8:11:34<19:18:50, 3.86it/s] 28%|██▊ | 103021/371472 [8:11:34<19:29:19, 3.83it/s] 28%|██▊ | 103022/371472 [8:11:34<19:37:13, 3.80it/s] 28%|██▊ | 103023/371472 [8:11:34<19:48:40, 3.76it/s] 28%|██▊ | 103024/371472 [8:11:35<19:52:26, 3.75it/s] 28%|██▊ | 103025/371472 [8:11:35<20:17:41, 3.67it/s] 28%|██▊ | 103026/371472 [8:11:35<20:21:09, 3.66it/s] 28%|██▊ | 103027/371472 [8:11:36<20:43:15, 3.60it/s] 28%|██▊ | 103028/371472 [8:11:36<21:44:38, 3.43it/s] 28%|██▊ | 103029/371472 [8:11:36<21:53:35, 3.41it/s] 28%|██▊ | 103030/371472 [8:11:36<21:31:46, 3.46it/s] 28%|██▊ | 103031/371472 [8:11:37<20:26:52, 3.65it/s] 28%|██▊ | 103032/371472 [8:11:37<21:45:04, 3.43it/s] 28%|██▊ | 103033/371472 [8:11:37<20:46:05, 3.59it/s] 28%|██▊ | 103034/371472 [8:11:38<20:40:19, 3.61it/s] 28%|██▊ | 103035/371472 [8:11:38<20:13:34, 3.69it/s] 28%|██▊ | 103036/371472 [8:11:38<19:43:22, 3.78it/s] 28%|██▊ | 103037/371472 [8:11:38<21:17:03, 3.50it/s] 28%|██▊ | 103038/371472 [8:11:39<21:27:12, 3.48it/s] 28%|██▊ | 103039/371472 [8:11:39<20:28:29, 3.64it/s] 28%|██▊ | 103040/371472 [8:11:39<20:08:12, 3.70it/s] {'loss': 3.2955, 'learning_rate': 7.507056820875262e-07, 'epoch': 4.44} + 28%|██▊ | 103040/371472 [8:11:39<20:08:12, 3.70it/s] 28%|██▊ | 103041/371472 [8:11:39<20:03:08, 3.72it/s] 28%|██▊ | 103042/371472 [8:11:40<19:36:22, 3.80it/s] 28%|██▊ | 103043/371472 [8:11:40<19:49:33, 3.76it/s] 28%|██▊ | 103044/371472 [8:11:40<20:25:22, 3.65it/s] 28%|██▊ | 103045/371472 [8:11:41<21:00:32, 3.55it/s] 28%|██▊ | 103046/371472 [8:11:41<21:17:05, 3.50it/s] 28%|██▊ | 103047/371472 [8:11:41<21:19:30, 3.50it/s] 28%|██▊ | 103048/371472 [8:11:41<20:43:08, 3.60it/s] 28%|██▊ | 103049/371472 [8:11:42<21:15:52, 3.51it/s] 28%|██▊ | 103050/371472 [8:11:42<23:13:23, 3.21it/s] 28%|██▊ | 103051/371472 [8:11:42<21:51:29, 3.41it/s] 28%|██▊ | 103052/371472 [8:11:43<21:07:09, 3.53it/s] 28%|██▊ | 103053/371472 [8:11:43<21:24:48, 3.48it/s] 28%|██▊ | 103054/371472 [8:11:43<21:31:24, 3.46it/s] 28%|██▊ | 103055/371472 [8:11:44<23:57:13, 3.11it/s] 28%|██▊ | 103056/371472 [8:11:44<23:01:23, 3.24it/s] 28%|██▊ | 103057/371472 [8:11:44<22:02:08, 3.38it/s] 28%|██▊ | 103058/371472 [8:11:44<20:37:37, 3.61it/s] 28%|██▊ | 103059/371472 [8:11:45<20:28:34, 3.64it/s] 28%|██▊ | 103060/371472 [8:11:45<20:03:24, 3.72it/s] {'loss': 3.6332, 'learning_rate': 7.506572001120472e-07, 'epoch': 4.44} + 28%|██▊ | 103060/371472 [8:11:45<20:03:24, 3.72it/s] 28%|██▊ | 103061/371472 [8:11:45<19:29:28, 3.83it/s] 28%|██▊ | 103062/371472 [8:11:45<19:11:31, 3.88it/s] 28%|██▊ | 103063/371472 [8:11:46<19:21:09, 3.85it/s] 28%|██▊ | 103064/371472 [8:11:46<19:59:27, 3.73it/s] 28%|██▊ | 103065/371472 [8:11:46<19:20:40, 3.85it/s] 28%|██▊ | 103066/371472 [8:11:46<19:49:42, 3.76it/s] 28%|██▊ | 103067/371472 [8:11:47<19:43:37, 3.78it/s] 28%|██▊ | 103068/371472 [8:11:47<21:50:06, 3.41it/s] 28%|██▊ | 103069/371472 [8:11:47<21:00:11, 3.55it/s] 28%|██▊ | 103070/371472 [8:11:48<20:06:29, 3.71it/s] 28%|██▊ | 103071/371472 [8:11:48<20:19:01, 3.67it/s] 28%|██▊ | 103072/371472 [8:11:48<20:06:16, 3.71it/s] 28%|██▊ | 103073/371472 [8:11:48<20:20:32, 3.67it/s] 28%|██▊ | 103074/371472 [8:11:49<20:36:59, 3.62it/s] 28%|██▊ | 103075/371472 [8:11:49<21:59:51, 3.39it/s] 28%|██▊ | 103076/371472 [8:11:49<21:19:06, 3.50it/s] 28%|██▊ | 103077/371472 [8:11:49<20:41:18, 3.60it/s] 28%|██▊ | 103078/371472 [8:11:50<21:55:38, 3.40it/s] 28%|██▊ | 103079/371472 [8:11:50<21:41:18, 3.44it/s] 28%|██▊ | 103080/371472 [8:11:50<21:07:57, 3.53it/s] {'loss': 3.3554, 'learning_rate': 7.506087181365682e-07, 'epoch': 4.44} + 28%|██▊ | 103080/371472 [8:11:50<21:07:57, 3.53it/s] 28%|██▊ | 103081/371472 [8:11:51<20:32:31, 3.63it/s] 28%|██▊ | 103082/371472 [8:11:51<20:10:10, 3.70it/s] 28%|██▊ | 103083/371472 [8:11:51<20:03:49, 3.72it/s] 28%|██▊ | 103084/371472 [8:11:51<20:12:08, 3.69it/s] 28%|██▊ | 103085/371472 [8:11:52<20:14:00, 3.68it/s] 28%|██▊ | 103086/371472 [8:11:52<20:36:05, 3.62it/s] 28%|██▊ | 103087/371472 [8:11:52<19:43:20, 3.78it/s] 28%|██▊ | 103088/371472 [8:11:52<19:21:49, 3.85it/s] 28%|██▊ | 103089/371472 [8:11:53<19:51:38, 3.75it/s] 28%|██▊ | 103090/371472 [8:11:53<19:48:28, 3.76it/s] 28%|██▊ | 103091/371472 [8:11:53<20:19:42, 3.67it/s] 28%|██▊ | 103092/371472 [8:11:54<19:57:34, 3.74it/s] 28%|██▊ | 103093/371472 [8:11:54<20:16:16, 3.68it/s] 28%|██▊ | 103094/371472 [8:11:54<20:46:44, 3.59it/s] 28%|██▊ | 103095/371472 [8:11:54<20:36:23, 3.62it/s] 28%|██▊ | 103096/371472 [8:11:55<20:42:32, 3.60it/s] 28%|██▊ | 103097/371472 [8:11:55<20:07:56, 3.70it/s] 28%|██▊ | 103098/371472 [8:11:55<20:18:58, 3.67it/s] 28%|██▊ | 103099/371472 [8:11:56<20:12:56, 3.69it/s] 28%|██▊ | 103100/371472 [8:11:56<20:04:57, 3.71it/s] {'loss': 3.6188, 'learning_rate': 7.505602361610894e-07, 'epoch': 4.44} + 28%|██▊ | 103100/371472 [8:11:56<20:04:57, 3.71it/s] 28%|██▊ | 103101/371472 [8:11:56<20:19:12, 3.67it/s] 28%|██▊ | 103102/371472 [8:11:56<20:18:10, 3.67it/s] 28%|██▊ | 103103/371472 [8:11:57<20:21:56, 3.66it/s] 28%|██▊ | 103104/371472 [8:11:57<20:10:38, 3.69it/s] 28%|██▊ | 103105/371472 [8:11:57<20:07:45, 3.70it/s] 28%|██▊ | 103106/371472 [8:11:57<20:21:02, 3.66it/s] 28%|██▊ | 103107/371472 [8:11:58<20:08:49, 3.70it/s] 28%|██▊ | 103108/371472 [8:11:58<19:49:58, 3.76it/s] 28%|██▊ | 103109/371472 [8:11:58<22:22:53, 3.33it/s] 28%|██▊ | 103110/371472 [8:11:59<21:55:28, 3.40it/s] 28%|██▊ | 103111/371472 [8:11:59<21:29:47, 3.47it/s] 28%|██▊ | 103112/371472 [8:11:59<21:18:42, 3.50it/s] 28%|██▊ | 103113/371472 [8:11:59<21:38:24, 3.44it/s] 28%|██▊ | 103114/371472 [8:12:00<21:58:26, 3.39it/s] 28%|██▊ | 103115/371472 [8:12:00<22:19:38, 3.34it/s] 28%|██▊ | 103116/371472 [8:12:00<21:43:44, 3.43it/s] 28%|██▊ | 103117/371472 [8:12:01<22:20:59, 3.34it/s] 28%|██▊ | 103118/371472 [8:12:01<22:39:04, 3.29it/s] 28%|██▊ | 103119/371472 [8:12:01<23:02:16, 3.24it/s] 28%|██▊ | 103120/371472 [8:12:02<23:37:40, 3.15it/s] {'loss': 3.4414, 'learning_rate': 7.505117541856106e-07, 'epoch': 4.44} + 28%|██▊ | 103120/371472 [8:12:02<23:37:40, 3.15it/s] 28%|██▊ | 103121/371472 [8:12:02<22:44:06, 3.28it/s] 28%|██▊ | 103122/371472 [8:12:02<22:03:43, 3.38it/s] 28%|██▊ | 103123/371472 [8:12:02<22:04:38, 3.38it/s] 28%|██▊ | 103124/371472 [8:12:03<21:20:04, 3.49it/s] 28%|██▊ | 103125/371472 [8:12:03<20:55:25, 3.56it/s] 28%|██▊ | 103126/371472 [8:12:03<20:43:14, 3.60it/s] 28%|██▊ | 103127/371472 [8:12:04<21:16:42, 3.50it/s] 28%|██▊ | 103128/371472 [8:12:04<21:04:43, 3.54it/s] 28%|██▊ | 103129/371472 [8:12:04<21:12:17, 3.52it/s] 28%|██▊ | 103130/371472 [8:12:04<22:06:29, 3.37it/s] 28%|██▊ | 103131/371472 [8:12:05<23:05:50, 3.23it/s] 28%|██▊ | 103132/371472 [8:12:05<24:41:17, 3.02it/s] 28%|██▊ | 103133/371472 [8:12:05<22:59:43, 3.24it/s] 28%|██▊ | 103134/371472 [8:12:06<22:09:31, 3.36it/s] 28%|██▊ | 103135/371472 [8:12:06<21:19:46, 3.49it/s] 28%|██▊ | 103136/371472 [8:12:06<21:22:18, 3.49it/s] 28%|██▊ | 103137/371472 [8:12:07<21:42:22, 3.43it/s] 28%|██▊ | 103138/371472 [8:12:07<21:10:02, 3.52it/s] 28%|██▊ | 103139/371472 [8:12:07<21:27:46, 3.47it/s] 28%|██▊ | 103140/371472 [8:12:07<22:24:45, 3.33it/s] {'loss': 3.546, 'learning_rate': 7.504632722101316e-07, 'epoch': 4.44} + 28%|██▊ | 103140/371472 [8:12:07<22:24:45, 3.33it/s] 28%|██▊ | 103141/371472 [8:12:08<21:53:49, 3.40it/s] 28%|██▊ | 103142/371472 [8:12:08<21:43:42, 3.43it/s] 28%|██▊ | 103143/371472 [8:12:08<21:52:07, 3.41it/s] 28%|██▊ | 103144/371472 [8:12:09<20:46:48, 3.59it/s] 28%|██▊ | 103145/371472 [8:12:09<22:01:21, 3.38it/s] 28%|██▊ | 103146/371472 [8:12:09<21:08:30, 3.53it/s] 28%|██▊ | 103147/371472 [8:12:09<21:15:02, 3.51it/s] 28%|██▊ | 103148/371472 [8:12:10<21:48:05, 3.42it/s] 28%|██▊ | 103149/371472 [8:12:10<20:52:55, 3.57it/s] 28%|██▊ | 103150/371472 [8:12:10<20:01:13, 3.72it/s] 28%|██▊ | 103151/371472 [8:12:11<21:21:59, 3.49it/s] 28%|██▊ | 103152/371472 [8:12:11<20:48:04, 3.58it/s] 28%|██▊ | 103153/371472 [8:12:11<20:22:00, 3.66it/s] 28%|██▊ | 103154/371472 [8:12:11<20:19:01, 3.67it/s] 28%|██▊ | 103155/371472 [8:12:12<20:16:17, 3.68it/s] 28%|██▊ | 103156/371472 [8:12:12<21:57:37, 3.39it/s] 28%|██▊ | 103157/371472 [8:12:12<21:29:34, 3.47it/s] 28%|██▊ | 103158/371472 [8:12:13<21:10:30, 3.52it/s] 28%|██▊ | 103159/371472 [8:12:13<21:39:55, 3.44it/s] 28%|██▊ | 103160/371472 [8:12:13<21:12:09, 3.52it/s] {'loss': 3.3908, 'learning_rate': 7.504147902346527e-07, 'epoch': 4.44} + 28%|██▊ | 103160/371472 [8:12:13<21:12:09, 3.52it/s] 28%|██▊ | 103161/371472 [8:12:13<21:21:42, 3.49it/s] 28%|██▊ | 103162/371472 [8:12:14<22:41:27, 3.28it/s] 28%|██▊ | 103163/371472 [8:12:14<22:36:37, 3.30it/s] 28%|██▊ | 103164/371472 [8:12:14<22:20:24, 3.34it/s] 28%|██▊ | 103165/371472 [8:12:15<21:59:24, 3.39it/s] 28%|██▊ | 103166/371472 [8:12:15<21:52:48, 3.41it/s] 28%|██▊ | 103167/371472 [8:12:15<21:08:59, 3.52it/s] 28%|██▊ | 103168/371472 [8:12:15<21:01:57, 3.54it/s] 28%|██▊ | 103169/371472 [8:12:16<21:03:04, 3.54it/s] 28%|██▊ | 103170/371472 [8:12:16<20:57:39, 3.56it/s] 28%|██▊ | 103171/371472 [8:12:16<20:23:27, 3.65it/s] 28%|██▊ | 103172/371472 [8:12:17<20:11:02, 3.69it/s] 28%|██▊ | 103173/371472 [8:12:17<20:34:40, 3.62it/s] 28%|██▊ | 103174/371472 [8:12:17<21:22:20, 3.49it/s] 28%|██▊ | 103175/371472 [8:12:17<20:55:50, 3.56it/s] 28%|██▊ | 103176/371472 [8:12:18<20:38:10, 3.61it/s] 28%|██▊ | 103177/371472 [8:12:18<21:50:25, 3.41it/s] 28%|██▊ | 103178/371472 [8:12:18<22:12:10, 3.36it/s] 28%|██▊ | 103179/371472 [8:12:19<21:13:36, 3.51it/s] 28%|██▊ | 103180/371472 [8:12:19<21:03:33, 3.54it/s] {'loss': 3.5382, 'learning_rate': 7.503663082591739e-07, 'epoch': 4.44} + 28%|██▊ | 103180/371472 [8:12:19<21:03:33, 3.54it/s] 28%|██▊ | 103181/371472 [8:12:19<23:43:47, 3.14it/s] 28%|██▊ | 103182/371472 [8:12:19<22:05:03, 3.37it/s] 28%|██▊ | 103183/371472 [8:12:20<21:20:32, 3.49it/s] 28%|██▊ | 103184/371472 [8:12:20<20:28:16, 3.64it/s] 28%|██▊ | 103185/371472 [8:12:20<20:01:02, 3.72it/s] 28%|██▊ | 103186/371472 [8:12:21<21:01:00, 3.55it/s] 28%|██▊ | 103187/371472 [8:12:21<20:42:26, 3.60it/s] 28%|██▊ | 103188/371472 [8:12:21<20:09:14, 3.70it/s] 28%|██▊ | 103189/371472 [8:12:21<21:00:31, 3.55it/s] 28%|██▊ | 103190/371472 [8:12:22<20:35:21, 3.62it/s] 28%|██▊ | 103191/371472 [8:12:22<21:52:43, 3.41it/s] 28%|██▊ | 103192/371472 [8:12:22<21:50:11, 3.41it/s] 28%|██▊ | 103193/371472 [8:12:23<20:30:21, 3.63it/s] 28%|██▊ | 103194/371472 [8:12:23<20:04:32, 3.71it/s] 28%|██▊ | 103195/371472 [8:12:23<19:51:28, 3.75it/s] 28%|██▊ | 103196/371472 [8:12:23<19:19:06, 3.86it/s] 28%|██▊ | 103197/371472 [8:12:24<19:09:17, 3.89it/s] 28%|██▊ | 103198/371472 [8:12:24<19:22:23, 3.85it/s] 28%|██▊ | 103199/371472 [8:12:24<19:29:57, 3.82it/s] 28%|██▊ | 103200/371472 [8:12:24<19:34:23, 3.81it/s] {'loss': 3.52, 'learning_rate': 7.50317826283695e-07, 'epoch': 4.45} + 28%|██▊ | 103200/371472 [8:12:24<19:34:23, 3.81it/s] 28%|██▊ | 103201/371472 [8:12:25<20:10:41, 3.69it/s] 28%|██▊ | 103202/371472 [8:12:25<20:23:09, 3.66it/s] 28%|██▊ | 103203/371472 [8:12:25<20:14:03, 3.68it/s] 28%|██▊ | 103204/371472 [8:12:25<20:01:46, 3.72it/s] 28%|██▊ | 103205/371472 [8:12:26<19:49:17, 3.76it/s] 28%|██▊ | 103206/371472 [8:12:26<19:54:29, 3.74it/s] 28%|██▊ | 103207/371472 [8:12:26<20:39:44, 3.61it/s] 28%|██▊ | 103208/371472 [8:12:27<22:06:14, 3.37it/s] 28%|██▊ | 103209/371472 [8:12:27<21:18:03, 3.50it/s] 28%|██▊ | 103210/371472 [8:12:27<22:39:54, 3.29it/s] 28%|██▊ | 103211/371472 [8:12:27<21:25:03, 3.48it/s] 28%|██▊ | 103212/371472 [8:12:28<20:52:26, 3.57it/s] 28%|██▊ | 103213/371472 [8:12:28<20:26:05, 3.65it/s] 28%|██▊ | 103214/371472 [8:12:28<20:28:34, 3.64it/s] 28%|██▊ | 103215/371472 [8:12:29<20:31:04, 3.63it/s] 28%|██▊ | 103216/371472 [8:12:29<20:32:10, 3.63it/s] 28%|██▊ | 103217/371472 [8:12:29<20:39:20, 3.61it/s] 28%|██▊ | 103218/371472 [8:12:29<21:27:05, 3.47it/s] 28%|██▊ | 103219/371472 [8:12:30<20:38:28, 3.61it/s] 28%|██▊ | 103220/371472 [8:12:30<20:44:45, 3.59it/s] {'loss': 3.5026, 'learning_rate': 7.50269344308216e-07, 'epoch': 4.45} + 28%|██▊ | 103220/371472 [8:12:30<20:44:45, 3.59it/s] 28%|██▊ | 103221/371472 [8:12:30<21:06:29, 3.53it/s] 28%|██▊ | 103222/371472 [8:12:31<21:03:26, 3.54it/s] 28%|██▊ | 103223/371472 [8:12:31<20:58:59, 3.55it/s] 28%|██▊ | 103224/371472 [8:12:31<21:16:12, 3.50it/s] 28%|██▊ | 103225/371472 [8:12:31<22:07:27, 3.37it/s] 28%|██▊ | 103226/371472 [8:12:32<21:50:15, 3.41it/s] 28%|██▊ | 103227/371472 [8:12:32<22:37:33, 3.29it/s] 28%|██▊ | 103228/371472 [8:12:32<21:47:38, 3.42it/s] 28%|██▊ | 103229/371472 [8:12:33<21:15:11, 3.51it/s] 28%|██▊ | 103230/371472 [8:12:33<20:55:08, 3.56it/s] 28%|██▊ | 103231/371472 [8:12:33<20:32:22, 3.63it/s] 28%|██▊ | 103232/371472 [8:12:33<23:05:19, 3.23it/s] 28%|██▊ | 103233/371472 [8:12:34<21:35:41, 3.45it/s] 28%|██▊ | 103234/371472 [8:12:34<20:19:20, 3.67it/s] 28%|██▊ | 103235/371472 [8:12:34<20:27:48, 3.64it/s] 28%|██▊ | 103236/371472 [8:12:35<20:36:05, 3.62it/s] 28%|██▊ | 103237/371472 [8:12:35<20:10:18, 3.69it/s] 28%|██▊ | 103238/371472 [8:12:35<20:22:17, 3.66it/s] 28%|██▊ | 103239/371472 [8:12:35<21:13:45, 3.51it/s] 28%|██▊ | 103240/371472 [8:12:36<21:21:44, 3.49it/s] {'loss': 3.4411, 'learning_rate': 7.502208623327371e-07, 'epoch': 4.45} + 28%|██▊ | 103240/371472 [8:12:36<21:21:44, 3.49it/s] 28%|██▊ | 103241/371472 [8:12:36<21:50:05, 3.41it/s] 28%|██▊ | 103242/371472 [8:12:36<22:08:59, 3.36it/s] 28%|██▊ | 103243/371472 [8:12:37<22:29:11, 3.31it/s] 28%|██▊ | 103244/371472 [8:12:37<22:20:38, 3.33it/s] 28%|██▊ | 103245/371472 [8:12:37<21:53:55, 3.40it/s] 28%|██▊ | 103246/371472 [8:12:37<21:09:53, 3.52it/s] 28%|██▊ | 103247/371472 [8:12:38<23:00:58, 3.24it/s] 28%|██▊ | 103248/371472 [8:12:38<24:01:19, 3.10it/s] 28%|██▊ | 103249/371472 [8:12:38<22:44:21, 3.28it/s] 28%|██▊ | 103250/371472 [8:12:39<21:15:45, 3.50it/s] 28%|██▊ | 103251/371472 [8:12:39<22:02:37, 3.38it/s] 28%|██▊ | 103252/371472 [8:12:39<21:20:58, 3.49it/s] 28%|██▊ | 103253/371472 [8:12:39<20:27:13, 3.64it/s] 28%|██▊ | 103254/371472 [8:12:40<20:00:36, 3.72it/s] 28%|██▊ | 103255/371472 [8:12:40<19:42:46, 3.78it/s] 28%|██▊ | 103256/371472 [8:12:40<19:30:13, 3.82it/s] 28%|██▊ | 103257/371472 [8:12:41<19:48:32, 3.76it/s] 28%|██▊ | 103258/371472 [8:12:41<20:49:20, 3.58it/s] 28%|██▊ | 103259/371472 [8:12:41<20:07:35, 3.70it/s] 28%|██▊ | 103260/371472 [8:12:41<20:15:15, 3.68it/s] {'loss': 3.6578, 'learning_rate': 7.501723803572583e-07, 'epoch': 4.45} + 28%|██▊ | 103260/371472 [8:12:41<20:15:15, 3.68it/s] 28%|██▊ | 103261/371472 [8:12:42<19:23:28, 3.84it/s] 28%|██▊ | 103262/371472 [8:12:42<19:14:02, 3.87it/s] 28%|██▊ | 103263/371472 [8:12:42<19:11:28, 3.88it/s] 28%|██▊ | 103264/371472 [8:12:42<19:10:11, 3.89it/s] 28%|██▊ | 103265/371472 [8:12:43<18:50:03, 3.96it/s] 28%|██▊ | 103266/371472 [8:12:43<19:17:37, 3.86it/s] 28%|██▊ | 103267/371472 [8:12:43<23:33:49, 3.16it/s] 28%|██▊ | 103268/371472 [8:12:44<23:10:54, 3.21it/s] 28%|██▊ | 103269/371472 [8:12:44<21:36:40, 3.45it/s] 28%|██▊ | 103270/371472 [8:12:44<21:54:24, 3.40it/s] 28%|██▊ | 103271/371472 [8:12:44<21:17:38, 3.50it/s] 28%|██▊ | 103272/371472 [8:12:45<22:02:29, 3.38it/s] 28%|██▊ | 103273/371472 [8:12:45<22:55:31, 3.25it/s] 28%|██▊ | 103274/371472 [8:12:45<24:40:47, 3.02it/s] 28%|██▊ | 103275/371472 [8:12:46<24:07:51, 3.09it/s] 28%|██▊ | 103276/371472 [8:12:46<23:17:32, 3.20it/s] 28%|██▊ | 103277/371472 [8:12:46<23:28:43, 3.17it/s] 28%|██▊ | 103278/371472 [8:12:47<24:49:54, 3.00it/s] 28%|██▊ | 103279/371472 [8:12:47<22:53:46, 3.25it/s] 28%|██▊ | 103280/371472 [8:12:47<23:12:47, 3.21it/s] {'loss': 3.5964, 'learning_rate': 7.501238983817795e-07, 'epoch': 4.45} + 28%|██▊ | 103280/371472 [8:12:47<23:12:47, 3.21it/s] 28%|██▊ | 103281/371472 [8:12:48<21:59:35, 3.39it/s] 28%|██▊ | 103282/371472 [8:12:48<20:53:20, 3.57it/s] 28%|██▊ | 103283/371472 [8:12:48<22:06:54, 3.37it/s] 28%|██▊ | 103284/371472 [8:12:48<21:39:58, 3.44it/s] 28%|██▊ | 103285/371472 [8:12:49<22:02:01, 3.38it/s] 28%|██▊ | 103286/371472 [8:12:49<21:16:15, 3.50it/s] 28%|██▊ | 103287/371472 [8:12:49<20:55:44, 3.56it/s] 28%|██▊ | 103288/371472 [8:12:50<22:09:21, 3.36it/s] 28%|██▊ | 103289/371472 [8:12:50<21:04:42, 3.53it/s] 28%|██▊ | 103290/371472 [8:12:50<20:19:38, 3.66it/s] 28%|██▊ | 103291/371472 [8:12:50<21:38:43, 3.44it/s] 28%|██▊ | 103292/371472 [8:12:51<21:09:19, 3.52it/s] 28%|██▊ | 103293/371472 [8:12:51<20:04:49, 3.71it/s] 28%|██▊ | 103294/371472 [8:12:51<20:21:43, 3.66it/s] 28%|██▊ | 103295/371472 [8:12:52<22:01:03, 3.38it/s] 28%|██▊ | 103296/371472 [8:12:52<22:19:24, 3.34it/s] 28%|██▊ | 103297/371472 [8:12:52<21:09:54, 3.52it/s] 28%|██▊ | 103298/371472 [8:12:52<21:25:23, 3.48it/s] 28%|██▊ | 103299/371472 [8:12:53<22:06:27, 3.37it/s] 28%|██▊ | 103300/371472 [8:12:53<21:00:09, 3.55it/s] {'loss': 3.4745, 'learning_rate': 7.500754164063005e-07, 'epoch': 4.45} + 28%|██▊ | 103300/371472 [8:12:53<21:00:09, 3.55it/s] 28%|██▊ | 103301/371472 [8:12:53<20:38:36, 3.61it/s] 28%|██▊ | 103302/371472 [8:12:54<20:05:21, 3.71it/s] 28%|██▊ | 103303/371472 [8:12:54<19:38:03, 3.79it/s] 28%|██▊ | 103304/371472 [8:12:54<19:42:39, 3.78it/s] 28%|██▊ | 103305/371472 [8:12:54<21:39:35, 3.44it/s] 28%|██▊ | 103306/371472 [8:12:55<20:44:08, 3.59it/s] 28%|██▊ | 103307/371472 [8:12:55<20:34:59, 3.62it/s] 28%|██▊ | 103308/371472 [8:12:55<20:38:59, 3.61it/s] 28%|██▊ | 103309/371472 [8:12:55<19:51:21, 3.75it/s] 28%|██▊ | 103310/371472 [8:12:56<19:37:08, 3.80it/s] 28%|██▊ | 103311/371472 [8:12:56<19:53:59, 3.74it/s] 28%|██▊ | 103312/371472 [8:12:56<19:05:41, 3.90it/s] 28%|██▊ | 103313/371472 [8:12:56<19:31:50, 3.81it/s] 28%|██▊ | 103314/371472 [8:12:57<19:05:56, 3.90it/s] 28%|██▊ | 103315/371472 [8:12:57<19:52:11, 3.75it/s] 28%|██▊ | 103316/371472 [8:12:57<19:54:19, 3.74it/s] 28%|██▊ | 103317/371472 [8:12:58<20:06:49, 3.70it/s] 28%|██▊ | 103318/371472 [8:12:58<20:19:05, 3.67it/s] 28%|██▊ | 103319/371472 [8:12:58<20:26:29, 3.64it/s] 28%|██▊ | 103320/371472 [8:12:58<22:09:55, 3.36it/s] {'loss': 3.4408, 'learning_rate': 7.500269344308215e-07, 'epoch': 4.45} + 28%|██▊ | 103320/371472 [8:12:58<22:09:55, 3.36it/s] 28%|██▊ | 103321/371472 [8:12:59<21:29:56, 3.46it/s] 28%|██▊ | 103322/371472 [8:12:59<20:57:27, 3.55it/s] 28%|██▊ | 103323/371472 [8:12:59<21:18:52, 3.49it/s] 28%|██▊ | 103324/371472 [8:13:00<21:12:40, 3.51it/s] 28%|██▊ | 103325/371472 [8:13:00<20:40:41, 3.60it/s] 28%|██▊ | 103326/371472 [8:13:00<19:57:26, 3.73it/s] 28%|██▊ | 103327/371472 [8:13:00<20:42:42, 3.60it/s] 28%|██▊ | 103328/371472 [8:13:01<20:09:24, 3.70it/s] 28%|██▊ | 103329/371472 [8:13:01<20:15:25, 3.68it/s] 28%|██▊ | 103330/371472 [8:13:01<20:49:06, 3.58it/s] 28%|██▊ | 103331/371472 [8:13:01<20:12:48, 3.68it/s] 28%|██▊ | 103332/371472 [8:13:02<20:55:36, 3.56it/s] 28%|██▊ | 103333/371472 [8:13:02<20:29:30, 3.63it/s] 28%|██▊ | 103334/371472 [8:13:02<20:18:39, 3.67it/s] 28%|██▊ | 103335/371472 [8:13:03<19:29:34, 3.82it/s] 28%|██▊ | 103336/371472 [8:13:03<18:58:59, 3.92it/s] 28%|██▊ | 103337/371472 [8:13:03<20:39:07, 3.61it/s] 28%|██▊ | 103338/371472 [8:13:03<20:49:41, 3.58it/s] 28%|██▊ | 103339/371472 [8:13:04<20:02:54, 3.72it/s] 28%|██▊ | 103340/371472 [8:13:04<19:40:52, 3.78it/s] {'loss': 3.5472, 'learning_rate': 7.499784524553427e-07, 'epoch': 4.45} + 28%|██▊ | 103340/371472 [8:13:04<19:40:52, 3.78it/s] 28%|██▊ | 103341/371472 [8:13:04<19:24:20, 3.84it/s] 28%|██▊ | 103342/371472 [8:13:04<19:33:40, 3.81it/s] 28%|██▊ | 103343/371472 [8:13:05<20:23:16, 3.65it/s] 28%|██▊ | 103344/371472 [8:13:05<20:12:18, 3.69it/s] 28%|██▊ | 103345/371472 [8:13:05<24:04:35, 3.09it/s] 28%|██▊ | 103346/371472 [8:13:06<23:25:37, 3.18it/s] 28%|██▊ | 103347/371472 [8:13:06<23:25:19, 3.18it/s] 28%|██▊ | 103348/371472 [8:13:06<22:49:00, 3.26it/s] 28%|██▊ | 103349/371472 [8:13:07<23:00:12, 3.24it/s] 28%|██▊ | 103350/371472 [8:13:07<21:42:58, 3.43it/s] 28%|██▊ | 103351/371472 [8:13:07<21:20:37, 3.49it/s] 28%|██▊ | 103352/371472 [8:13:07<20:59:44, 3.55it/s] 28%|██▊ | 103353/371472 [8:13:08<20:54:35, 3.56it/s] 28%|██▊ | 103354/371472 [8:13:08<21:01:43, 3.54it/s] 28%|██▊ | 103355/371472 [8:13:08<21:31:21, 3.46it/s] 28%|██▊ | 103356/371472 [8:13:09<22:11:44, 3.36it/s] 28%|██▊ | 103357/371472 [8:13:09<21:07:11, 3.53it/s] 28%|██▊ | 103358/371472 [8:13:09<22:50:50, 3.26it/s] 28%|██▊ | 103359/371472 [8:13:10<23:40:19, 3.15it/s] 28%|██▊ | 103360/371472 [8:13:10<22:39:47, 3.29it/s] {'loss': 3.3716, 'learning_rate': 7.499299704798638e-07, 'epoch': 4.45} + 28%|██▊ | 103360/371472 [8:13:10<22:39:47, 3.29it/s] 28%|██▊ | 103361/371472 [8:13:10<22:33:13, 3.30it/s] 28%|██▊ | 103362/371472 [8:13:10<22:11:49, 3.36it/s] 28%|██▊ | 103363/371472 [8:13:11<21:34:56, 3.45it/s] 28%|██▊ | 103364/371472 [8:13:11<20:29:33, 3.63it/s] 28%|██▊ | 103365/371472 [8:13:11<20:27:57, 3.64it/s] 28%|██▊ | 103366/371472 [8:13:11<20:04:12, 3.71it/s] 28%|██▊ | 103367/371472 [8:13:12<20:09:45, 3.69it/s] 28%|██▊ | 103368/371472 [8:13:12<20:20:50, 3.66it/s] 28%|██▊ | 103369/371472 [8:13:12<19:30:12, 3.82it/s] 28%|██▊ | 103370/371472 [8:13:13<19:44:07, 3.77it/s] 28%|██▊ | 103371/371472 [8:13:13<19:50:26, 3.75it/s] 28%|██▊ | 103372/371472 [8:13:13<19:34:52, 3.80it/s] 28%|██▊ | 103373/371472 [8:13:13<19:47:36, 3.76it/s] 28%|██▊ | 103374/371472 [8:13:14<19:53:56, 3.74it/s] 28%|██▊ | 103375/371472 [8:13:14<20:12:46, 3.68it/s] 28%|██▊ | 103376/371472 [8:13:14<20:23:02, 3.65it/s] 28%|██▊ | 103377/371472 [8:13:14<19:26:28, 3.83it/s] 28%|██▊ | 103378/371472 [8:13:15<20:03:15, 3.71it/s] 28%|██▊ | 103379/371472 [8:13:15<20:34:25, 3.62it/s] 28%|██▊ | 103380/371472 [8:13:15<20:42:25, 3.60it/s] {'loss': 3.6598, 'learning_rate': 7.498814885043848e-07, 'epoch': 4.45} + 28%|██▊ | 103380/371472 [8:13:15<20:42:25, 3.60it/s] 28%|██▊ | 103381/371472 [8:13:16<21:46:58, 3.42it/s] 28%|██▊ | 103382/371472 [8:13:16<23:40:11, 3.15it/s] 28%|██▊ | 103383/371472 [8:13:16<22:22:01, 3.33it/s] 28%|██▊ | 103384/371472 [8:13:17<22:48:46, 3.26it/s] 28%|██▊ | 103385/371472 [8:13:17<21:25:07, 3.48it/s] 28%|██▊ | 103386/371472 [8:13:17<21:07:24, 3.53it/s] 28%|██▊ | 103387/371472 [8:13:17<20:27:40, 3.64it/s] 28%|██▊ | 103388/371472 [8:13:18<20:51:34, 3.57it/s] 28%|██▊ | 103389/371472 [8:13:18<21:39:18, 3.44it/s] 28%|██▊ | 103390/371472 [8:13:18<26:04:19, 2.86it/s] 28%|██▊ | 103391/371472 [8:13:19<24:27:59, 3.04it/s] 28%|██▊ | 103392/371472 [8:13:19<22:33:19, 3.30it/s] 28%|██▊ | 103393/371472 [8:13:19<21:54:10, 3.40it/s] 28%|██▊ | 103394/371472 [8:13:19<21:20:40, 3.49it/s] 28%|██▊ | 103395/371472 [8:13:20<22:58:53, 3.24it/s] 28%|██▊ | 103396/371472 [8:13:20<22:07:06, 3.37it/s] 28%|██▊ | 103397/371472 [8:13:20<21:58:39, 3.39it/s] 28%|██▊ | 103398/371472 [8:13:21<21:33:54, 3.45it/s] 28%|██▊ | 103399/371472 [8:13:21<21:09:53, 3.52it/s] 28%|██▊ | 103400/371472 [8:13:21<22:04:37, 3.37it/s] {'loss': 3.5674, 'learning_rate': 7.49833006528906e-07, 'epoch': 4.45} + 28%|██▊ | 103400/371472 [8:13:21<22:04:37, 3.37it/s] 28%|██▊ | 103401/371472 [8:13:22<21:31:20, 3.46it/s] 28%|██▊ | 103402/371472 [8:13:22<21:31:43, 3.46it/s] 28%|██▊ | 103403/371472 [8:13:22<21:18:58, 3.49it/s] 28%|██▊ | 103404/371472 [8:13:22<21:11:31, 3.51it/s] 28%|██▊ | 103405/371472 [8:13:23<20:38:01, 3.61it/s] 28%|██▊ | 103406/371472 [8:13:23<20:59:28, 3.55it/s] 28%|██▊ | 103407/371472 [8:13:23<22:11:37, 3.36it/s] 28%|██▊ | 103408/371472 [8:13:24<21:28:02, 3.47it/s] 28%|██▊ | 103409/371472 [8:13:24<21:22:17, 3.48it/s] 28%|██▊ | 103410/371472 [8:13:24<22:52:32, 3.26it/s] 28%|██▊ | 103411/371472 [8:13:24<22:04:55, 3.37it/s] 28%|██▊ | 103412/371472 [8:13:25<22:03:55, 3.37it/s] 28%|██▊ | 103413/371472 [8:13:25<21:09:03, 3.52it/s] 28%|██▊ | 103414/371472 [8:13:25<20:58:08, 3.55it/s] 28%|██▊ | 103415/371472 [8:13:26<20:26:06, 3.64it/s] 28%|██▊ | 103416/371472 [8:13:26<20:46:04, 3.59it/s] 28%|██▊ | 103417/371472 [8:13:26<20:35:02, 3.62it/s] 28%|██▊ | 103418/371472 [8:13:26<21:37:24, 3.44it/s] 28%|██▊ | 103419/371472 [8:13:27<21:46:40, 3.42it/s] 28%|██▊ | 103420/371472 [8:13:27<21:21:23, 3.49it/s] {'loss': 3.402, 'learning_rate': 7.497845245534273e-07, 'epoch': 4.45} + 28%|██▊ | 103420/371472 [8:13:27<21:21:23, 3.49it/s] 28%|██▊ | 103421/371472 [8:13:27<22:40:06, 3.28it/s] 28%|██▊ | 103422/371472 [8:13:28<22:42:57, 3.28it/s] 28%|██▊ | 103423/371472 [8:13:28<21:41:24, 3.43it/s] 28%|██▊ | 103424/371472 [8:13:28<22:53:36, 3.25it/s] 28%|██▊ | 103425/371472 [8:13:29<22:48:07, 3.27it/s] 28%|██▊ | 103426/371472 [8:13:29<22:24:37, 3.32it/s] 28%|██▊ | 103427/371472 [8:13:29<21:09:58, 3.52it/s] 28%|██▊ | 103428/371472 [8:13:29<21:48:14, 3.41it/s] 28%|██▊ | 103429/371472 [8:13:30<21:18:39, 3.49it/s] 28%|██▊ | 103430/371472 [8:13:30<20:10:31, 3.69it/s] 28%|██▊ | 103431/371472 [8:13:30<21:04:42, 3.53it/s] 28%|██▊ | 103432/371472 [8:13:30<20:47:06, 3.58it/s] 28%|██▊ | 103433/371472 [8:13:31<21:26:50, 3.47it/s] 28%|██▊ | 103434/371472 [8:13:31<21:35:28, 3.45it/s] 28%|██▊ | 103435/371472 [8:13:31<21:39:21, 3.44it/s] 28%|██▊ | 103436/371472 [8:13:32<22:16:20, 3.34it/s] 28%|██▊ | 103437/371472 [8:13:32<23:27:59, 3.17it/s] 28%|██▊ | 103438/371472 [8:13:32<23:01:48, 3.23it/s] 28%|██▊ | 103439/371472 [8:13:33<21:50:13, 3.41it/s] 28%|██▊ | 103440/371472 [8:13:33<21:46:27, 3.42it/s] {'loss': 3.4623, 'learning_rate': 7.497360425779482e-07, 'epoch': 4.46} + 28%|██▊ | 103440/371472 [8:13:33<21:46:27, 3.42it/s] 28%|██▊ | 103441/371472 [8:13:33<22:43:02, 3.28it/s] 28%|██▊ | 103442/371472 [8:13:33<22:00:35, 3.38it/s] 28%|██▊ | 103443/371472 [8:13:34<20:55:50, 3.56it/s] 28%|██▊ | 103444/371472 [8:13:34<20:40:48, 3.60it/s] 28%|██▊ | 103445/371472 [8:13:34<21:15:03, 3.50it/s] 28%|██▊ | 103446/371472 [8:13:35<21:45:44, 3.42it/s] 28%|██▊ | 103447/371472 [8:13:35<21:07:43, 3.52it/s] 28%|██▊ | 103448/371472 [8:13:35<20:57:55, 3.55it/s] 28%|██▊ | 103449/371472 [8:13:35<22:21:49, 3.33it/s] 28%|██▊ | 103450/371472 [8:13:36<21:03:20, 3.54it/s] 28%|██▊ | 103451/371472 [8:13:36<20:46:35, 3.58it/s] 28%|██▊ | 103452/371472 [8:13:36<21:06:37, 3.53it/s] 28%|██▊ | 103453/371472 [8:13:37<20:35:17, 3.62it/s] 28%|██▊ | 103454/371472 [8:13:37<20:30:20, 3.63it/s] 28%|██▊ | 103455/371472 [8:13:37<20:39:50, 3.60it/s] 28%|██▊ | 103456/371472 [8:13:37<21:49:19, 3.41it/s] 28%|██▊ | 103457/371472 [8:13:38<21:05:14, 3.53it/s] 28%|██▊ | 103458/371472 [8:13:38<22:14:30, 3.35it/s] 28%|██▊ | 103459/371472 [8:13:38<23:06:35, 3.22it/s] 28%|██▊ | 103460/371472 [8:13:39<22:16:38, 3.34it/s] {'loss': 3.4413, 'learning_rate': 7.496875606024692e-07, 'epoch': 4.46} + 28%|██▊ | 103460/371472 [8:13:39<22:16:38, 3.34it/s] 28%|██▊ | 103461/371472 [8:13:39<23:11:48, 3.21it/s] 28%|██▊ | 103462/371472 [8:13:39<24:05:39, 3.09it/s] 28%|██▊ | 103463/371472 [8:13:40<22:26:54, 3.32it/s] 28%|██▊ | 103464/371472 [8:13:40<21:20:53, 3.49it/s] 28%|██▊ | 103465/371472 [8:13:40<21:52:50, 3.40it/s] 28%|██▊ | 103466/371472 [8:13:40<20:59:16, 3.55it/s] 28%|██▊ | 103467/371472 [8:13:41<21:26:30, 3.47it/s] 28%|██▊ | 103468/371472 [8:13:41<20:58:31, 3.55it/s] 28%|██▊ | 103469/371472 [8:13:41<20:35:44, 3.61it/s] 28%|██▊ | 103470/371472 [8:13:42<21:11:55, 3.51it/s] 28%|██▊ | 103471/371472 [8:13:42<20:37:36, 3.61it/s] 28%|██▊ | 103472/371472 [8:13:42<20:04:03, 3.71it/s] 28%|██▊ | 103473/371472 [8:13:42<20:17:34, 3.67it/s] 28%|██▊ | 103474/371472 [8:13:43<20:17:11, 3.67it/s] 28%|██▊ | 103475/371472 [8:13:43<20:52:56, 3.56it/s] 28%|██▊ | 103476/371472 [8:13:43<20:19:09, 3.66it/s] 28%|██▊ | 103477/371472 [8:13:44<22:19:13, 3.34it/s] 28%|██▊ | 103478/371472 [8:13:44<21:14:14, 3.51it/s] 28%|██▊ | 103479/371472 [8:13:44<22:34:56, 3.30it/s] 28%|██▊ | 103480/371472 [8:13:44<22:20:16, 3.33it/s] {'loss': 3.5012, 'learning_rate': 7.496390786269904e-07, 'epoch': 4.46} + 28%|██▊ | 103480/371472 [8:13:44<22:20:16, 3.33it/s] 28%|██▊ | 103481/371472 [8:13:45<21:12:32, 3.51it/s] 28%|██▊ | 103482/371472 [8:13:45<21:18:04, 3.49it/s] 28%|██▊ | 103483/371472 [8:13:45<21:06:52, 3.53it/s] 28%|██▊ | 103484/371472 [8:13:46<21:40:08, 3.44it/s] 28%|██▊ | 103485/371472 [8:13:46<20:43:16, 3.59it/s] 28%|██▊ | 103486/371472 [8:13:46<20:35:26, 3.62it/s] 28%|██▊ | 103487/371472 [8:13:46<22:57:12, 3.24it/s] 28%|██▊ | 103488/371472 [8:13:47<22:22:02, 3.33it/s] 28%|██▊ | 103489/371472 [8:13:47<21:36:43, 3.44it/s] 28%|██▊ | 103490/371472 [8:13:47<21:24:14, 3.48it/s] 28%|██▊ | 103491/371472 [8:13:48<20:14:01, 3.68it/s] 28%|██▊ | 103492/371472 [8:13:48<20:44:59, 3.59it/s] 28%|██▊ | 103493/371472 [8:13:48<20:29:05, 3.63it/s] 28%|██▊ | 103494/371472 [8:13:48<20:50:44, 3.57it/s] 28%|██▊ | 103495/371472 [8:13:49<20:12:55, 3.68it/s] 28%|██▊ | 103496/371472 [8:13:49<22:47:22, 3.27it/s] 28%|██▊ | 103497/371472 [8:13:49<23:19:06, 3.19it/s] 28%|██▊ | 103498/371472 [8:13:50<22:46:02, 3.27it/s] 28%|██▊ | 103499/371472 [8:13:50<22:41:08, 3.28it/s] 28%|██▊ | 103500/371472 [8:13:50<21:32:42, 3.45it/s] {'loss': 3.5767, 'learning_rate': 7.495905966515116e-07, 'epoch': 4.46} + 28%|██▊ | 103500/371472 [8:13:50<21:32:42, 3.45it/s] 28%|██▊ | 103501/371472 [8:13:50<21:04:28, 3.53it/s] 28%|██▊ | 103502/371472 [8:13:51<20:58:58, 3.55it/s] 28%|██▊ | 103503/371472 [8:13:51<20:36:42, 3.61it/s] 28%|██▊ | 103504/371472 [8:13:51<23:11:06, 3.21it/s] 28%|██▊ | 103505/371472 [8:13:52<21:58:05, 3.39it/s] 28%|██▊ | 103506/371472 [8:13:52<21:01:34, 3.54it/s] 28%|██▊ | 103507/371472 [8:13:52<21:24:30, 3.48it/s] 28%|██▊ | 103508/371472 [8:13:52<21:28:02, 3.47it/s] 28%|██▊ | 103509/371472 [8:13:53<21:42:49, 3.43it/s] 28%|██▊ | 103510/371472 [8:13:53<20:48:36, 3.58it/s] 28%|██▊ | 103511/371472 [8:13:53<20:35:38, 3.61it/s] 28%|██▊ | 103512/371472 [8:13:54<20:24:52, 3.65it/s] 28%|██▊ | 103513/371472 [8:13:54<20:16:31, 3.67it/s] 28%|██▊ | 103514/371472 [8:13:54<21:30:42, 3.46it/s] 28%|██▊ | 103515/371472 [8:13:55<22:44:18, 3.27it/s] 28%|██▊ | 103516/371472 [8:13:55<22:14:17, 3.35it/s] 28%|██▊ | 103517/371472 [8:13:55<21:41:10, 3.43it/s] 28%|██▊ | 103518/371472 [8:13:55<20:48:07, 3.58it/s] 28%|██▊ | 103519/371472 [8:13:56<21:08:11, 3.52it/s] 28%|██▊ | 103520/371472 [8:13:56<20:50:24, 3.57it/s] {'loss': 3.4629, 'learning_rate': 7.495421146760326e-07, 'epoch': 4.46} + 28%|██▊ | 103520/371472 [8:13:56<20:50:24, 3.57it/s] 28%|██▊ | 103521/371472 [8:13:56<21:36:53, 3.44it/s] 28%|██▊ | 103522/371472 [8:13:57<21:57:25, 3.39it/s] 28%|██▊ | 103523/371472 [8:13:57<21:18:25, 3.49it/s] 28%|██▊ | 103524/371472 [8:13:57<21:42:43, 3.43it/s] 28%|██▊ | 103525/371472 [8:13:57<20:32:42, 3.62it/s] 28%|██▊ | 103526/371472 [8:13:58<21:14:02, 3.51it/s] 28%|██▊ | 103527/371472 [8:13:58<20:57:32, 3.55it/s] 28%|██▊ | 103528/371472 [8:13:58<20:35:40, 3.61it/s] 28%|██▊ | 103529/371472 [8:13:58<20:28:11, 3.64it/s] 28%|██▊ | 103530/371472 [8:13:59<20:14:47, 3.68it/s] 28%|██▊ | 103531/371472 [8:13:59<19:54:04, 3.74it/s] 28%|██▊ | 103532/371472 [8:13:59<19:45:41, 3.77it/s] 28%|██▊ | 103533/371472 [8:14:00<23:43:08, 3.14it/s] 28%|██▊ | 103534/371472 [8:14:00<23:09:56, 3.21it/s] 28%|██▊ | 103535/371472 [8:14:00<22:15:53, 3.34it/s] 28%|██▊ | 103536/371472 [8:14:00<21:15:38, 3.50it/s] 28%|██▊ | 103537/371472 [8:14:01<21:29:54, 3.46it/s] 28%|██▊ | 103538/371472 [8:14:01<20:27:15, 3.64it/s] 28%|██▊ | 103539/371472 [8:14:01<20:46:41, 3.58it/s] 28%|██▊ | 103540/371472 [8:14:02<20:36:49, 3.61it/s] {'loss': 3.4782, 'learning_rate': 7.494936327005537e-07, 'epoch': 4.46} + 28%|██▊ | 103540/371472 [8:14:02<20:36:49, 3.61it/s] 28%|██▊ | 103541/371472 [8:14:02<20:47:36, 3.58it/s] 28%|██▊ | 103542/371472 [8:14:02<20:22:05, 3.65it/s] 28%|██▊ | 103543/371472 [8:14:02<21:31:10, 3.46it/s] 28%|██▊ | 103544/371472 [8:14:03<20:28:55, 3.63it/s] 28%|██▊ | 103545/371472 [8:14:03<20:49:35, 3.57it/s] 28%|██▊ | 103546/371472 [8:14:03<21:38:07, 3.44it/s] 28%|██▊ | 103547/371472 [8:14:04<20:49:13, 3.57it/s] 28%|██▊ | 103548/371472 [8:14:04<20:12:42, 3.68it/s] 28%|██▊ | 103549/371472 [8:14:04<20:15:37, 3.67it/s] 28%|██▊ | 103550/371472 [8:14:04<20:07:30, 3.70it/s] 28%|██▊ | 103551/371472 [8:14:05<20:09:01, 3.69it/s] 28%|██▊ | 103552/371472 [8:14:05<20:16:38, 3.67it/s] 28%|██▊ | 103553/371472 [8:14:05<20:12:34, 3.68it/s] 28%|██▊ | 103554/371472 [8:14:05<20:16:45, 3.67it/s] 28%|██▊ | 103555/371472 [8:14:06<20:17:23, 3.67it/s] 28%|██▊ | 103556/371472 [8:14:06<21:21:52, 3.48it/s] 28%|██▊ | 103557/371472 [8:14:06<20:52:36, 3.56it/s] 28%|██▊ | 103558/371472 [8:14:07<22:30:53, 3.31it/s] 28%|██▊ | 103559/371472 [8:14:07<22:07:32, 3.36it/s] 28%|██▊ | 103560/371472 [8:14:07<21:19:58, 3.49it/s] {'loss': 3.5955, 'learning_rate': 7.494451507250749e-07, 'epoch': 4.46} + 28%|██▊ | 103560/371472 [8:14:07<21:19:58, 3.49it/s] 28%|██▊ | 103561/371472 [8:14:07<21:27:20, 3.47it/s] 28%|██▊ | 103562/371472 [8:14:08<20:29:39, 3.63it/s] 28%|██▊ | 103563/371472 [8:14:08<21:03:53, 3.53it/s] 28%|██▊ | 103564/371472 [8:14:08<22:17:35, 3.34it/s] 28%|██▊ | 103565/371472 [8:14:09<23:44:08, 3.14it/s] 28%|██▊ | 103566/371472 [8:14:09<22:02:30, 3.38it/s] 28%|██▊ | 103567/371472 [8:14:09<24:06:27, 3.09it/s] 28%|██▊ | 103568/371472 [8:14:10<23:15:14, 3.20it/s] 28%|██▊ | 103569/371472 [8:14:10<24:49:34, 3.00it/s] 28%|██▊ | 103570/371472 [8:14:10<25:42:40, 2.89it/s] 28%|██▊ | 103571/371472 [8:14:11<24:00:48, 3.10it/s] 28%|██▊ | 103572/371472 [8:14:11<24:12:58, 3.07it/s] 28%|██▊ | 103573/371472 [8:14:11<24:00:11, 3.10it/s] 28%|██▊ | 103574/371472 [8:14:12<25:43:04, 2.89it/s] 28%|██▊ | 103575/371472 [8:14:12<23:34:01, 3.16it/s] 28%|██▊ | 103576/371472 [8:14:12<22:32:25, 3.30it/s] 28%|██▊ | 103577/371472 [8:14:12<21:24:30, 3.48it/s] 28%|██▊ | 103578/371472 [8:14:13<21:19:25, 3.49it/s] 28%|██▊ | 103579/371472 [8:14:13<21:17:11, 3.50it/s] 28%|██▊ | 103580/371472 [8:14:13<21:43:14, 3.43it/s] {'loss': 3.3298, 'learning_rate': 7.49396668749596e-07, 'epoch': 4.46} + 28%|██▊ | 103580/371472 [8:14:13<21:43:14, 3.43it/s] 28%|██▊ | 103581/371472 [8:14:14<22:28:59, 3.31it/s] 28%|██▊ | 103582/371472 [8:14:14<21:11:05, 3.51it/s] 28%|██▊ | 103583/371472 [8:14:14<20:13:00, 3.68it/s] 28%|██▊ | 103584/371472 [8:14:14<20:10:07, 3.69it/s] 28%|██▊ | 103585/371472 [8:14:15<20:38:48, 3.60it/s] 28%|██▊ | 103586/371472 [8:14:15<20:26:25, 3.64it/s] 28%|██▊ | 103587/371472 [8:14:15<19:36:48, 3.79it/s] 28%|██▊ | 103588/371472 [8:14:16<20:00:36, 3.72it/s] 28%|██▊ | 103589/371472 [8:14:16<20:25:21, 3.64it/s] 28%|██▊ | 103590/371472 [8:14:16<19:51:57, 3.75it/s] 28%|██▊ | 103591/371472 [8:14:16<20:03:33, 3.71it/s] 28%|██▊ | 103592/371472 [8:14:17<19:58:48, 3.72it/s] 28%|██▊ | 103593/371472 [8:14:17<20:17:08, 3.67it/s] 28%|██▊ | 103594/371472 [8:14:17<20:14:27, 3.68it/s] 28%|██▊ | 103595/371472 [8:14:17<20:10:44, 3.69it/s] 28%|██▊ | 103596/371472 [8:14:18<19:36:30, 3.79it/s] 28%|██▊ | 103597/371472 [8:14:18<20:05:06, 3.70it/s] 28%|██▊ | 103598/371472 [8:14:18<20:48:17, 3.58it/s] 28%|██▊ | 103599/371472 [8:14:19<20:13:35, 3.68it/s] 28%|██▊ | 103600/371472 [8:14:19<19:59:47, 3.72it/s] {'loss': 3.5234, 'learning_rate': 7.49348186774117e-07, 'epoch': 4.46} + 28%|██▊ | 103600/371472 [8:14:19<19:59:47, 3.72it/s] 28%|██▊ | 103601/371472 [8:14:19<21:02:01, 3.54it/s] 28%|██▊ | 103602/371472 [8:14:19<22:12:46, 3.35it/s] 28%|██▊ | 103603/371472 [8:14:20<22:20:57, 3.33it/s] 28%|██▊ | 103604/371472 [8:14:20<22:08:26, 3.36it/s] 28%|██▊ | 103605/371472 [8:14:20<21:33:01, 3.45it/s] 28%|██▊ | 103606/371472 [8:14:21<22:02:55, 3.37it/s] 28%|██▊ | 103607/371472 [8:14:21<21:27:48, 3.47it/s] 28%|██▊ | 103608/371472 [8:14:21<20:41:54, 3.59it/s] 28%|██▊ | 103609/371472 [8:14:21<21:30:10, 3.46it/s] 28%|██▊ | 103610/371472 [8:14:22<23:04:45, 3.22it/s] 28%|██▊ | 103611/371472 [8:14:22<22:01:55, 3.38it/s] 28%|██▊ | 103612/371472 [8:14:22<22:47:42, 3.26it/s] 28%|██▊ | 103613/371472 [8:14:23<21:14:36, 3.50it/s] 28%|██▊ | 103614/371472 [8:14:23<21:03:16, 3.53it/s] 28%|██▊ | 103615/371472 [8:14:23<22:16:04, 3.34it/s] 28%|██▊ | 103616/371472 [8:14:24<22:10:37, 3.35it/s] 28%|██▊ | 103617/371472 [8:14:24<21:25:37, 3.47it/s] 28%|██▊ | 103618/371472 [8:14:24<21:54:27, 3.40it/s] 28%|██▊ | 103619/371472 [8:14:24<21:12:46, 3.51it/s] 28%|██▊ | 103620/371472 [8:14:25<21:18:53, 3.49it/s] {'loss': 3.4356, 'learning_rate': 7.492997047986381e-07, 'epoch': 4.46} + 28%|██▊ | 103620/371472 [8:14:25<21:18:53, 3.49it/s] 28%|██▊ | 103621/371472 [8:14:25<20:58:56, 3.55it/s] 28%|██▊ | 103622/371472 [8:14:25<20:59:46, 3.54it/s] 28%|██▊ | 103623/371472 [8:14:25<20:20:43, 3.66it/s] 28%|██▊ | 103624/371472 [8:14:26<20:36:00, 3.61it/s] 28%|██▊ | 103625/371472 [8:14:26<22:07:56, 3.36it/s] 28%|██▊ | 103626/371472 [8:14:26<22:04:04, 3.37it/s] 28%|██▊ | 103627/371472 [8:14:27<22:00:54, 3.38it/s] 28%|██▊ | 103628/371472 [8:14:27<20:39:48, 3.60it/s] 28%|██▊ | 103629/371472 [8:14:27<20:31:15, 3.63it/s] 28%|██▊ | 103630/371472 [8:14:27<19:41:10, 3.78it/s] 28%|██▊ | 103631/371472 [8:14:28<19:20:29, 3.85it/s] 28%|██▊ | 103632/371472 [8:14:28<22:05:01, 3.37it/s] 28%|██▊ | 103633/371472 [8:14:28<21:24:43, 3.47it/s] 28%|██▊ | 103634/371472 [8:14:29<20:39:19, 3.60it/s] 28%|██▊ | 103635/371472 [8:14:29<20:14:59, 3.67it/s] 28%|██▊ | 103636/371472 [8:14:29<21:58:06, 3.39it/s] 28%|██▊ | 103637/371472 [8:14:29<21:44:18, 3.42it/s] 28%|██▊ | 103638/371472 [8:14:30<21:09:45, 3.52it/s] 28%|██▊ | 103639/371472 [8:14:30<20:49:53, 3.57it/s] 28%|██▊ | 103640/371472 [8:14:30<23:09:59, 3.21it/s] {'loss': 3.3151, 'learning_rate': 7.492512228231593e-07, 'epoch': 4.46} + 28%|██▊ | 103640/371472 [8:14:30<23:09:59, 3.21it/s] 28%|██▊ | 103641/371472 [8:14:31<22:48:20, 3.26it/s] 28%|██▊ | 103642/371472 [8:14:31<24:03:19, 3.09it/s] 28%|██▊ | 103643/371472 [8:14:31<24:04:08, 3.09it/s] 28%|██▊ | 103644/371472 [8:14:32<23:57:39, 3.10it/s] 28%|██▊ | 103645/371472 [8:14:32<22:21:09, 3.33it/s] 28%|██▊ | 103646/371472 [8:14:32<21:21:33, 3.48it/s] 28%|██▊ | 103647/371472 [8:14:32<21:15:44, 3.50it/s] 28%|██▊ | 103648/371472 [8:14:33<20:52:33, 3.56it/s] 28%|██▊ | 103649/371472 [8:14:33<20:10:59, 3.69it/s] 28%|██▊ | 103650/371472 [8:14:33<22:35:32, 3.29it/s] 28%|██▊ | 103651/371472 [8:14:34<21:13:37, 3.50it/s] 28%|██▊ | 103652/371472 [8:14:34<20:31:17, 3.63it/s] 28%|██▊ | 103653/371472 [8:14:34<22:00:54, 3.38it/s] 28%|██▊ | 103654/371472 [8:14:35<22:33:30, 3.30it/s] 28%|██▊ | 103655/371472 [8:14:35<21:37:18, 3.44it/s] 28%|██▊ | 103656/371472 [8:14:35<20:21:24, 3.65it/s] 28%|██▊ | 103657/371472 [8:14:35<19:58:34, 3.72it/s] 28%|██▊ | 103658/371472 [8:14:36<20:34:47, 3.61it/s] 28%|██▊ | 103659/371472 [8:14:36<20:51:46, 3.57it/s] 28%|██▊ | 103660/371472 [8:14:36<20:31:52, 3.62it/s] {'loss': 3.4003, 'learning_rate': 7.492027408476804e-07, 'epoch': 4.46} + 28%|██▊ | 103660/371472 [8:14:36<20:31:52, 3.62it/s] 28%|██▊ | 103661/371472 [8:14:36<20:00:07, 3.72it/s] 28%|██▊ | 103662/371472 [8:14:37<20:19:11, 3.66it/s] 28%|██▊ | 103663/371472 [8:14:37<21:29:09, 3.46it/s] 28%|██▊ | 103664/371472 [8:14:37<20:23:21, 3.65it/s] 28%|██▊ | 103665/371472 [8:14:38<20:22:11, 3.65it/s] 28%|██▊ | 103666/371472 [8:14:38<20:50:14, 3.57it/s] 28%|██▊ | 103667/371472 [8:14:38<21:10:59, 3.51it/s] 28%|██▊ | 103668/371472 [8:14:38<20:58:06, 3.55it/s] 28%|██▊ | 103669/371472 [8:14:39<20:43:54, 3.59it/s] 28%|██▊ | 103670/371472 [8:14:39<21:50:49, 3.41it/s] 28%|██▊ | 103671/371472 [8:14:39<21:07:33, 3.52it/s] 28%|██▊ | 103672/371472 [8:14:40<22:14:37, 3.34it/s] 28%|██▊ | 103673/371472 [8:14:40<21:18:47, 3.49it/s] 28%|██▊ | 103674/371472 [8:14:40<20:50:22, 3.57it/s] 28%|██▊ | 103675/371472 [8:14:40<20:59:03, 3.54it/s] 28%|██▊ | 103676/371472 [8:14:41<21:29:50, 3.46it/s] 28%|██▊ | 103677/371472 [8:14:41<21:05:11, 3.53it/s] 28%|██▊ | 103678/371472 [8:14:41<20:51:36, 3.57it/s] 28%|██▊ | 103679/371472 [8:14:42<22:01:35, 3.38it/s] 28%|██▊ | 103680/371472 [8:14:42<21:50:16, 3.41it/s] {'loss': 3.4551, 'learning_rate': 7.491542588722015e-07, 'epoch': 4.47} + 28%|██▊ | 103680/371472 [8:14:42<21:50:16, 3.41it/s] 28%|██▊ | 103681/371472 [8:14:42<22:13:18, 3.35it/s] 28%|██▊ | 103682/371472 [8:14:42<22:37:08, 3.29it/s] 28%|██▊ | 103683/371472 [8:14:43<22:30:20, 3.31it/s] 28%|██▊ | 103684/371472 [8:14:43<21:17:25, 3.49it/s] 28%|██▊ | 103685/371472 [8:14:43<20:08:15, 3.69it/s] 28%|██▊ | 103686/371472 [8:14:44<21:01:40, 3.54it/s] 28%|██▊ | 103687/371472 [8:14:44<21:22:02, 3.48it/s] 28%|██▊ | 103688/371472 [8:14:44<21:58:23, 3.39it/s] 28%|██▊ | 103689/371472 [8:14:44<21:07:58, 3.52it/s] 28%|██▊ | 103690/371472 [8:14:45<20:50:35, 3.57it/s] 28%|██▊ | 103691/371472 [8:14:45<20:16:08, 3.67it/s] 28%|██▊ | 103692/371472 [8:14:45<20:11:08, 3.68it/s] 28%|██▊ | 103693/371472 [8:14:45<19:27:45, 3.82it/s] 28%|██▊ | 103694/371472 [8:14:46<19:28:06, 3.82it/s] 28%|██▊ | 103695/371472 [8:14:46<19:23:35, 3.84it/s] 28%|██▊ | 103696/371472 [8:14:46<19:11:18, 3.88it/s] 28%|██▊ | 103697/371472 [8:14:46<18:33:32, 4.01it/s] 28%|██▊ | 103698/371472 [8:14:47<18:36:46, 4.00it/s] 28%|██▊ | 103699/371472 [8:14:47<18:34:01, 4.01it/s] 28%|██▊ | 103700/371472 [8:14:47<19:07:14, 3.89it/s] {'loss': 3.4606, 'learning_rate': 7.491057768967226e-07, 'epoch': 4.47} + 28%|██▊ | 103700/371472 [8:14:47<19:07:14, 3.89it/s] 28%|██▊ | 103701/371472 [8:14:48<19:45:53, 3.76it/s] 28%|██▊ | 103702/371472 [8:14:48<20:31:03, 3.63it/s] 28%|██▊ | 103703/371472 [8:14:48<20:04:51, 3.70it/s] 28%|██▊ | 103704/371472 [8:14:48<21:28:35, 3.46it/s] 28%|██▊ | 103705/371472 [8:14:49<22:33:04, 3.30it/s] 28%|██▊ | 103706/371472 [8:14:49<23:53:27, 3.11it/s] 28%|██▊ | 103707/371472 [8:14:49<22:22:57, 3.32it/s] 28%|██▊ | 103708/371472 [8:14:50<23:13:16, 3.20it/s] 28%|██▊ | 103709/371472 [8:14:50<22:19:54, 3.33it/s] 28%|██▊ | 103710/371472 [8:14:50<21:40:53, 3.43it/s] 28%|██▊ | 103711/371472 [8:14:51<21:48:23, 3.41it/s] 28%|██▊ | 103712/371472 [8:14:51<22:25:34, 3.32it/s] 28%|██▊ | 103713/371472 [8:14:51<23:29:31, 3.17it/s] 28%|██▊ | 103714/371472 [8:14:52<22:41:38, 3.28it/s] 28%|██▊ | 103715/371472 [8:14:52<21:51:32, 3.40it/s] 28%|██▊ | 103716/371472 [8:14:52<21:02:14, 3.54it/s] 28%|██▊ | 103717/371472 [8:14:52<20:35:40, 3.61it/s] 28%|██▊ | 103718/371472 [8:14:53<20:17:36, 3.67it/s] 28%|██▊ | 103719/371472 [8:14:53<19:31:31, 3.81it/s] 28%|██▊ | 103720/371472 [8:14:53<20:04:16, 3.71it/s] {'loss': 3.4888, 'learning_rate': 7.490572949212437e-07, 'epoch': 4.47} + 28%|██▊ | 103720/371472 [8:14:53<20:04:16, 3.71it/s] 28%|██▊ | 103721/371472 [8:14:53<19:28:25, 3.82it/s] 28%|██▊ | 103722/371472 [8:14:54<21:28:16, 3.46it/s] 28%|██▊ | 103723/371472 [8:14:54<22:25:07, 3.32it/s] 28%|██▊ | 103724/371472 [8:14:54<21:35:31, 3.44it/s] 28%|██▊ | 103725/371472 [8:14:55<21:13:43, 3.50it/s] 28%|██▊ | 103726/371472 [8:14:55<20:45:26, 3.58it/s] 28%|██▊ | 103727/371472 [8:14:55<20:43:15, 3.59it/s] 28%|██▊ | 103728/371472 [8:14:55<21:09:49, 3.51it/s] 28%|██▊ | 103729/371472 [8:14:56<20:06:00, 3.70it/s] 28%|██▊ | 103730/371472 [8:14:56<20:01:51, 3.71it/s] 28%|██▊ | 103731/371472 [8:14:56<19:37:18, 3.79it/s] 28%|██▊ | 103732/371472 [8:14:56<19:34:05, 3.80it/s] 28%|██▊ | 103733/371472 [8:14:57<19:45:12, 3.76it/s] 28%|██▊ | 103734/371472 [8:14:57<19:19:05, 3.85it/s] 28%|██▊ | 103735/371472 [8:14:57<20:16:45, 3.67it/s] 28%|██▊ | 103736/371472 [8:14:58<20:17:51, 3.66it/s] 28%|██▊ | 103737/371472 [8:14:58<19:45:07, 3.77it/s] 28%|██▊ | 103738/371472 [8:14:58<19:42:54, 3.77it/s] 28%|██▊ | 103739/371472 [8:14:58<20:25:13, 3.64it/s] 28%|██▊ | 103740/371472 [8:14:59<20:07:44, 3.69it/s] {'loss': 3.5808, 'learning_rate': 7.490088129457648e-07, 'epoch': 4.47} + 28%|██▊ | 103740/371472 [8:14:59<20:07:44, 3.69it/s] 28%|██▊ | 103741/371472 [8:14:59<19:27:29, 3.82it/s] 28%|██▊ | 103742/371472 [8:14:59<20:04:26, 3.70it/s] 28%|██▊ | 103743/371472 [8:14:59<20:33:53, 3.62it/s] 28%|██▊ | 103744/371472 [8:15:00<20:49:25, 3.57it/s] 28%|██▊ | 103745/371472 [8:15:00<20:48:13, 3.57it/s] 28%|██▊ | 103746/371472 [8:15:00<20:33:40, 3.62it/s] 28%|██▊ | 103747/371472 [8:15:01<20:07:31, 3.70it/s] 28%|██▊ | 103748/371472 [8:15:01<21:43:39, 3.42it/s] 28%|██▊ | 103749/371472 [8:15:01<21:04:01, 3.53it/s] 28%|██▊ | 103750/371472 [8:15:01<21:03:27, 3.53it/s] 28%|██▊ | 103751/371472 [8:15:02<21:09:57, 3.51it/s] 28%|██▊ | 103752/371472 [8:15:02<20:35:22, 3.61it/s] 28%|██▊ | 103753/371472 [8:15:02<20:21:57, 3.65it/s] 28%|██▊ | 103754/371472 [8:15:02<19:53:27, 3.74it/s] 28%|██▊ | 103755/371472 [8:15:03<19:58:08, 3.72it/s] 28%|██▊ | 103756/371472 [8:15:03<19:35:20, 3.80it/s] 28%|██▊ | 103757/371472 [8:15:03<19:19:00, 3.85it/s] 28%|██▊ | 103758/371472 [8:15:03<18:52:32, 3.94it/s] 28%|██▊ | 103759/371472 [8:15:04<18:45:50, 3.96it/s] 28%|██▊ | 103760/371472 [8:15:04<19:40:29, 3.78it/s] {'loss': 3.5023, 'learning_rate': 7.489603309702859e-07, 'epoch': 4.47} + 28%|██▊ | 103760/371472 [8:15:04<19:40:29, 3.78it/s] 28%|██▊ | 103761/371472 [8:15:04<19:46:49, 3.76it/s] 28%|██▊ | 103762/371472 [8:15:05<21:10:27, 3.51it/s] 28%|██▊ | 103763/371472 [8:15:05<21:46:06, 3.42it/s] 28%|██▊ | 103764/371472 [8:15:05<21:53:02, 3.40it/s] 28%|██▊ | 103765/371472 [8:15:06<21:46:20, 3.42it/s] 28%|██▊ | 103766/371472 [8:15:06<20:39:57, 3.60it/s] 28%|██▊ | 103767/371472 [8:15:06<21:46:16, 3.42it/s] 28%|██▊ | 103768/371472 [8:15:06<20:56:18, 3.55it/s] 28%|██▊ | 103769/371472 [8:15:07<21:29:42, 3.46it/s] 28%|██▊ | 103770/371472 [8:15:07<22:19:26, 3.33it/s] 28%|██▊ | 103771/371472 [8:15:07<21:57:35, 3.39it/s] 28%|██▊ | 103772/371472 [8:15:08<21:24:00, 3.47it/s] 28%|██▊ | 103773/371472 [8:15:08<20:59:31, 3.54it/s] 28%|██▊ | 103774/371472 [8:15:08<20:29:05, 3.63it/s] 28%|██▊ | 103775/371472 [8:15:08<21:29:30, 3.46it/s] 28%|██▊ | 103776/371472 [8:15:09<21:19:25, 3.49it/s] 28%|██▊ | 103777/371472 [8:15:09<20:12:48, 3.68it/s] 28%|██▊ | 103778/371472 [8:15:09<19:55:54, 3.73it/s] 28%|██▊ | 103779/371472 [8:15:09<19:45:43, 3.76it/s] 28%|██▊ | 103780/371472 [8:15:10<19:35:01, 3.80it/s] {'loss': 3.4667, 'learning_rate': 7.48911848994807e-07, 'epoch': 4.47} + 28%|██▊ | 103780/371472 [8:15:10<19:35:01, 3.80it/s] 28%|██▊ | 103781/371472 [8:15:10<19:06:55, 3.89it/s] 28%|██▊ | 103782/371472 [8:15:10<18:50:36, 3.95it/s] 28%|██▊ | 103783/371472 [8:15:10<19:09:42, 3.88it/s] 28%|██▊ | 103784/371472 [8:15:11<19:49:32, 3.75it/s] 28%|██▊ | 103785/371472 [8:15:11<19:25:00, 3.83it/s] 28%|██▊ | 103786/371472 [8:15:11<19:39:13, 3.78it/s] 28%|██▊ | 103787/371472 [8:15:12<21:08:34, 3.52it/s] 28%|██▊ | 103788/371472 [8:15:12<20:37:15, 3.61it/s] 28%|██▊ | 103789/371472 [8:15:12<21:24:43, 3.47it/s] 28%|██▊ | 103790/371472 [8:15:12<21:25:45, 3.47it/s] 28%|██▊ | 103791/371472 [8:15:13<20:58:04, 3.55it/s] 28%|██▊ | 103792/371472 [8:15:13<20:59:15, 3.54it/s] 28%|██▊ | 103793/371472 [8:15:13<22:07:18, 3.36it/s] 28%|██▊ | 103794/371472 [8:15:14<23:16:02, 3.20it/s] 28%|██▊ | 103795/371472 [8:15:14<22:05:49, 3.36it/s] 28%|██▊ | 103796/371472 [8:15:14<21:28:12, 3.46it/s] 28%|██▊ | 103797/371472 [8:15:14<21:03:00, 3.53it/s] 28%|██▊ | 103798/371472 [8:15:15<20:19:48, 3.66it/s] 28%|██▊ | 103799/371472 [8:15:15<20:54:51, 3.56it/s] 28%|██▊ | 103800/371472 [8:15:15<20:27:02, 3.64it/s] {'loss': 3.421, 'learning_rate': 7.488633670193282e-07, 'epoch': 4.47} + 28%|██▊ | 103800/371472 [8:15:15<20:27:02, 3.64it/s] 28%|██▊ | 103801/371472 [8:15:16<21:05:30, 3.53it/s] 28%|██▊ | 103802/371472 [8:15:16<20:10:38, 3.68it/s] 28%|██▊ | 103803/371472 [8:15:16<21:37:56, 3.44it/s] 28%|██▊ | 103804/371472 [8:15:16<20:41:52, 3.59it/s] 28%|██▊ | 103805/371472 [8:15:17<21:02:56, 3.53it/s] 28%|██▊ | 103806/371472 [8:15:17<20:16:10, 3.67it/s] 28%|██▊ | 103807/371472 [8:15:17<19:52:41, 3.74it/s] 28%|██▊ | 103808/371472 [8:15:17<20:21:07, 3.65it/s] 28%|██▊ | 103809/371472 [8:15:18<19:26:56, 3.82it/s] 28%|██▊ | 103810/371472 [8:15:18<21:19:08, 3.49it/s] 28%|██▊ | 103811/371472 [8:15:18<21:52:27, 3.40it/s] 28%|██▊ | 103812/371472 [8:15:19<21:31:12, 3.45it/s] 28%|██▊ | 103813/371472 [8:15:19<21:30:03, 3.46it/s] 28%|██▊ | 103814/371472 [8:15:19<22:30:03, 3.30it/s] 28%|██▊ | 103815/371472 [8:15:20<21:19:42, 3.49it/s] 28%|██▊ | 103816/371472 [8:15:20<23:29:41, 3.16it/s] 28%|██▊ | 103817/371472 [8:15:20<24:11:47, 3.07it/s] 28%|██▊ | 103818/371472 [8:15:20<22:17:28, 3.34it/s] 28%|██▊ | 103819/371472 [8:15:21<22:59:09, 3.23it/s] 28%|██▊ | 103820/371472 [8:15:21<22:00:40, 3.38it/s] {'loss': 3.4601, 'learning_rate': 7.488148850438493e-07, 'epoch': 4.47} + 28%|██▊ | 103820/371472 [8:15:21<22:00:40, 3.38it/s] 28%|██▊ | 103821/371472 [8:15:21<22:05:07, 3.37it/s] 28%|██▊ | 103822/371472 [8:15:22<22:21:00, 3.33it/s] 28%|██▊ | 103823/371472 [8:15:22<22:26:22, 3.31it/s] 28%|██▊ | 103824/371472 [8:15:22<22:10:51, 3.35it/s] 28%|██▊ | 103825/371472 [8:15:23<21:46:04, 3.42it/s] 28%|██▊ | 103826/371472 [8:15:23<23:13:24, 3.20it/s] 28%|██▊ | 103827/371472 [8:15:23<22:13:24, 3.35it/s] 28%|█��▊ | 103828/371472 [8:15:24<22:49:57, 3.26it/s] 28%|██▊ | 103829/371472 [8:15:24<21:26:01, 3.47it/s] 28%|██▊ | 103830/371472 [8:15:24<20:33:49, 3.62it/s] 28%|██▊ | 103831/371472 [8:15:24<20:59:45, 3.54it/s] 28%|██▊ | 103832/371472 [8:15:25<20:28:15, 3.63it/s] 28%|██▊ | 103833/371472 [8:15:25<20:20:57, 3.65it/s] 28%|██▊ | 103834/371472 [8:15:25<20:16:17, 3.67it/s] 28%|██▊ | 103835/371472 [8:15:25<20:28:50, 3.63it/s] 28%|██▊ | 103836/371472 [8:15:26<24:48:47, 3.00it/s] 28%|██▊ | 103837/371472 [8:15:26<26:17:24, 2.83it/s] 28%|██▊ | 103838/371472 [8:15:27<23:55:12, 3.11it/s] 28%|██▊ | 103839/371472 [8:15:27<22:44:29, 3.27it/s] 28%|██▊ | 103840/371472 [8:15:27<22:19:07, 3.33it/s] {'loss': 3.4548, 'learning_rate': 7.487664030683703e-07, 'epoch': 4.47} + 28%|██▊ | 103840/371472 [8:15:27<22:19:07, 3.33it/s] 28%|██▊ | 103841/371472 [8:15:27<21:27:49, 3.46it/s] 28%|██▊ | 103842/371472 [8:15:28<23:16:44, 3.19it/s] 28%|██▊ | 103843/371472 [8:15:28<22:24:08, 3.32it/s] 28%|██▊ | 103844/371472 [8:15:28<21:47:37, 3.41it/s] 28%|██▊ | 103845/371472 [8:15:29<22:13:24, 3.35it/s] 28%|██▊ | 103846/371472 [8:15:29<21:17:00, 3.49it/s] 28%|██▊ | 103847/371472 [8:15:29<20:40:43, 3.60it/s] 28%|██▊ | 103848/371472 [8:15:29<22:00:09, 3.38it/s] 28%|██▊ | 103849/371472 [8:15:30<21:57:08, 3.39it/s] 28%|██▊ | 103850/371472 [8:15:30<21:13:32, 3.50it/s] 28%|██▊ | 103851/371472 [8:15:30<20:43:38, 3.59it/s] 28%|██▊ | 103852/371472 [8:15:31<20:24:23, 3.64it/s] 28%|██▊ | 103853/371472 [8:15:31<20:08:43, 3.69it/s] 28%|██▊ | 103854/371472 [8:15:31<20:14:12, 3.67it/s] 28%|██▊ | 103855/371472 [8:15:31<20:44:42, 3.58it/s] 28%|██▊ | 103856/371472 [8:15:32<21:29:09, 3.46it/s] 28%|██▊ | 103857/371472 [8:15:32<20:49:42, 3.57it/s] 28%|██▊ | 103858/371472 [8:15:32<20:25:03, 3.64it/s] 28%|██▊ | 103859/371472 [8:15:32<19:55:09, 3.73it/s] 28%|██▊ | 103860/371472 [8:15:33<20:29:37, 3.63it/s] {'loss': 3.4377, 'learning_rate': 7.487179210928914e-07, 'epoch': 4.47} + 28%|██▊ | 103860/371472 [8:15:33<20:29:37, 3.63it/s] 28%|██▊ | 103861/371472 [8:15:33<20:52:35, 3.56it/s] 28%|██▊ | 103862/371472 [8:15:33<22:21:49, 3.32it/s] 28%|██▊ | 103863/371472 [8:15:34<22:30:33, 3.30it/s] 28%|██▊ | 103864/371472 [8:15:34<23:04:34, 3.22it/s] 28%|██▊ | 103865/371472 [8:15:34<22:44:03, 3.27it/s] 28%|██▊ | 103866/371472 [8:15:35<22:02:18, 3.37it/s] 28%|██▊ | 103867/371472 [8:15:35<21:59:02, 3.38it/s] 28%|██▊ | 103868/371472 [8:15:35<21:05:27, 3.52it/s] 28%|██▊ | 103869/371472 [8:15:35<22:05:17, 3.37it/s] 28%|██▊ | 103870/371472 [8:15:36<22:30:01, 3.30it/s] 28%|██▊ | 103871/371472 [8:15:36<21:59:36, 3.38it/s] 28%|██▊ | 103872/371472 [8:15:36<21:07:19, 3.52it/s] 28%|██▊ | 103873/371472 [8:15:37<20:49:43, 3.57it/s] 28%|██▊ | 103874/371472 [8:15:37<20:25:46, 3.64it/s] 28%|██▊ | 103875/371472 [8:15:37<21:44:42, 3.42it/s] 28%|██▊ | 103876/371472 [8:15:37<20:36:33, 3.61it/s] 28%|██▊ | 103877/371472 [8:15:38<21:03:37, 3.53it/s] 28%|██▊ | 103878/371472 [8:15:38<19:56:35, 3.73it/s] 28%|██▊ | 103879/371472 [8:15:38<19:38:04, 3.79it/s] 28%|██▊ | 103880/371472 [8:15:39<20:53:09, 3.56it/s] {'loss': 3.2797, 'learning_rate': 7.486694391174126e-07, 'epoch': 4.47} + 28%|██▊ | 103880/371472 [8:15:39<20:53:09, 3.56it/s] 28%|██▊ | 103881/371472 [8:15:39<20:33:49, 3.61it/s] 28%|██▊ | 103882/371472 [8:15:39<21:12:08, 3.51it/s] 28%|██▊ | 103883/371472 [8:15:39<20:48:01, 3.57it/s] 28%|██▊ | 103884/371472 [8:15:40<20:14:29, 3.67it/s] 28%|██▊ | 103885/371472 [8:15:40<20:02:31, 3.71it/s] 28%|██▊ | 103886/371472 [8:15:40<19:42:22, 3.77it/s] 28%|██▊ | 103887/371472 [8:15:40<19:25:41, 3.83it/s] 28%|██▊ | 103888/371472 [8:15:41<20:31:52, 3.62it/s] 28%|██▊ | 103889/371472 [8:15:41<20:14:07, 3.67it/s] 28%|██▊ | 103890/371472 [8:15:41<20:14:59, 3.67it/s] 28%|██▊ | 103891/371472 [8:15:41<19:31:57, 3.81it/s] 28%|██▊ | 103892/371472 [8:15:42<20:28:02, 3.63it/s] 28%|██▊ | 103893/371472 [8:15:42<20:14:22, 3.67it/s] 28%|██▊ | 103894/371472 [8:15:42<19:34:56, 3.80it/s] 28%|██▊ | 103895/371472 [8:15:43<19:46:30, 3.76it/s] 28%|██▊ | 103896/371472 [8:15:43<21:27:10, 3.46it/s] 28%|██▊ | 103897/371472 [8:15:43<21:07:24, 3.52it/s] 28%|██▊ | 103898/371472 [8:15:43<21:23:40, 3.47it/s] 28%|██▊ | 103899/371472 [8:15:44<20:27:53, 3.63it/s] 28%|██▊ | 103900/371472 [8:15:44<20:01:22, 3.71it/s] {'loss': 3.6023, 'learning_rate': 7.486209571419336e-07, 'epoch': 4.48} + 28%|██▊ | 103900/371472 [8:15:44<20:01:22, 3.71it/s] 28%|██▊ | 103901/371472 [8:15:44<19:40:57, 3.78it/s] 28%|██▊ | 103902/371472 [8:15:44<19:48:34, 3.75it/s] 28%|██▊ | 103903/371472 [8:15:45<21:06:17, 3.52it/s] 28%|██▊ | 103904/371472 [8:15:45<21:00:37, 3.54it/s] 28%|██▊ | 103905/371472 [8:15:45<22:25:48, 3.31it/s] 28%|██▊ | 103906/371472 [8:15:46<23:16:30, 3.19it/s] 28%|██▊ | 103907/371472 [8:15:46<22:03:23, 3.37it/s] 28%|██▊ | 103908/371472 [8:15:46<23:35:55, 3.15it/s] 28%|██▊ | 103909/371472 [8:15:47<22:30:08, 3.30it/s] 28%|██▊ | 103910/371472 [8:15:47<23:18:58, 3.19it/s] 28%|██▊ | 103911/371472 [8:15:47<23:01:02, 3.23it/s] 28%|██▊ | 103912/371472 [8:15:48<23:17:39, 3.19it/s] 28%|██▊ | 103913/371472 [8:15:48<22:27:29, 3.31it/s] 28%|██▊ | 103914/371472 [8:15:48<22:05:51, 3.36it/s] 28%|██▊ | 103915/371472 [8:15:49<24:17:09, 3.06it/s] 28%|██▊ | 103916/371472 [8:15:49<25:12:26, 2.95it/s] 28%|██▊ | 103917/371472 [8:15:49<23:09:59, 3.21it/s] 28%|██▊ | 103918/371472 [8:15:49<21:44:19, 3.42it/s] 28%|██▊ | 103919/371472 [8:15:50<20:47:13, 3.58it/s] 28%|██▊ | 103920/371472 [8:15:50<21:03:14, 3.53it/s] {'loss': 3.3117, 'learning_rate': 7.485724751664547e-07, 'epoch': 4.48} + 28%|██▊ | 103920/371472 [8:15:50<21:03:14, 3.53it/s] 28%|██▊ | 103921/371472 [8:15:50<20:18:44, 3.66it/s] 28%|██▊ | 103922/371472 [8:15:50<20:02:06, 3.71it/s] 28%|██▊ | 103923/371472 [8:15:51<20:18:07, 3.66it/s] 28%|██▊ | 103924/371472 [8:15:51<20:21:45, 3.65it/s] 28%|██▊ | 103925/371472 [8:15:51<20:20:25, 3.65it/s] 28%|██▊ | 103926/371472 [8:15:52<20:12:06, 3.68it/s] 28%|██▊ | 103927/371472 [8:15:52<20:01:41, 3.71it/s] 28%|██▊ | 103928/371472 [8:15:52<20:38:39, 3.60it/s] 28%|██▊ | 103929/371472 [8:15:52<19:58:01, 3.72it/s] 28%|██▊ | 103930/371472 [8:15:53<22:20:32, 3.33it/s] 28%|██▊ | 103931/371472 [8:15:53<22:39:15, 3.28it/s] 28%|██▊ | 103932/371472 [8:15:53<21:23:50, 3.47it/s] 28%|██▊ | 103933/371472 [8:15:54<20:53:47, 3.56it/s] 28%|██▊ | 103934/371472 [8:15:54<20:57:18, 3.55it/s] 28%|██▊ | 103935/371472 [8:15:54<21:52:13, 3.40it/s] 28%|██▊ | 103936/371472 [8:15:54<21:17:24, 3.49it/s] 28%|██▊ | 103937/371472 [8:15:55<21:29:48, 3.46it/s] 28%|██▊ | 103938/371472 [8:15:55<20:29:28, 3.63it/s] 28%|██▊ | 103939/371472 [8:15:55<20:09:15, 3.69it/s] 28%|██▊ | 103940/371472 [8:15:56<19:30:56, 3.81it/s] {'loss': 3.352, 'learning_rate': 7.485239931909759e-07, 'epoch': 4.48} + 28%|██▊ | 103940/371472 [8:15:56<19:30:56, 3.81it/s] 28%|██▊ | 103941/371472 [8:15:56<19:20:18, 3.84it/s] 28%|██▊ | 103942/371472 [8:15:56<19:00:05, 3.91it/s] 28%|██▊ | 103943/371472 [8:15:56<19:45:47, 3.76it/s] 28%|██▊ | 103944/371472 [8:15:57<21:07:05, 3.52it/s] 28%|██▊ | 103945/371472 [8:15:57<21:28:03, 3.46it/s] 28%|██▊ | 103946/371472 [8:15:57<20:25:59, 3.64it/s] 28%|██▊ | 103947/371472 [8:15:57<19:58:56, 3.72it/s] 28%|██▊ | 103948/371472 [8:15:58<22:39:52, 3.28it/s] 28%|██▊ | 103949/371472 [8:15:58<22:24:38, 3.32it/s] 28%|██▊ | 103950/371472 [8:15:58<22:29:46, 3.30it/s] 28%|██▊ | 103951/371472 [8:15:59<22:05:10, 3.36it/s] 28%|██▊ | 103952/371472 [8:15:59<21:45:05, 3.42it/s] 28%|██▊ | 103953/371472 [8:15:59<20:39:05, 3.60it/s] 28%|██▊ | 103954/371472 [8:16:00<20:27:58, 3.63it/s] 28%|██▊ | 103955/371472 [8:16:00<20:31:10, 3.62it/s] 28%|██▊ | 103956/371472 [8:16:00<19:56:19, 3.73it/s] 28%|██▊ | 103957/371472 [8:16:00<20:09:18, 3.69it/s] 28%|██▊ | 103958/371472 [8:16:01<21:17:36, 3.49it/s] 28%|██▊ | 103959/371472 [8:16:01<21:34:33, 3.44it/s] 28%|██▊ | 103960/371472 [8:16:01<21:32:49, 3.45it/s] {'loss': 3.6262, 'learning_rate': 7.484755112154969e-07, 'epoch': 4.48} + 28%|██▊ | 103960/371472 [8:16:01<21:32:49, 3.45it/s] 28%|██▊ | 103961/371472 [8:16:02<22:04:28, 3.37it/s] 28%|██▊ | 103962/371472 [8:16:02<22:05:56, 3.36it/s] 28%|██▊ | 103963/371472 [8:16:02<20:50:02, 3.57it/s] 28%|██▊ | 103964/371472 [8:16:02<21:10:06, 3.51it/s] 28%|██▊ | 103965/371472 [8:16:03<21:48:11, 3.41it/s] 28%|██▊ | 103966/371472 [8:16:03<20:35:46, 3.61it/s] 28%|██▊ | 103967/371472 [8:16:03<20:36:30, 3.61it/s] 28%|██▊ | 103968/371472 [8:16:03<20:36:11, 3.61it/s] 28%|██▊ | 103969/371472 [8:16:04<21:21:02, 3.48it/s] 28%|██▊ | 103970/371472 [8:16:04<21:14:04, 3.50it/s] 28%|██▊ | 103971/371472 [8:16:04<20:33:53, 3.61it/s] 28%|██▊ | 103972/371472 [8:16:05<20:23:19, 3.64it/s] 28%|██▊ | 103973/371472 [8:16:05<22:02:22, 3.37it/s] 28%|██▊ | 103974/371472 [8:16:05<21:37:07, 3.44it/s] 28%|██▊ | 103975/371472 [8:16:05<20:33:48, 3.61it/s] 28%|██▊ | 103976/371472 [8:16:06<20:26:05, 3.64it/s] 28%|██▊ | 103977/371472 [8:16:06<19:53:44, 3.73it/s] 28%|██▊ | 103978/371472 [8:16:06<19:37:56, 3.78it/s] 28%|██▊ | 103979/371472 [8:16:07<19:54:48, 3.73it/s] 28%|██▊ | 103980/371472 [8:16:07<19:35:36, 3.79it/s] {'loss': 3.3985, 'learning_rate': 7.48427029240018e-07, 'epoch': 4.48} + 28%|██▊ | 103980/371472 [8:16:07<19:35:36, 3.79it/s] 28%|██▊ | 103981/371472 [8:16:07<19:39:50, 3.78it/s] 28%|██▊ | 103982/371472 [8:16:07<20:12:28, 3.68it/s] 28%|██▊ | 103983/371472 [8:16:08<21:57:28, 3.38it/s] 28%|██▊ | 103984/371472 [8:16:08<21:20:19, 3.48it/s] 28%|██▊ | 103985/371472 [8:16:08<20:59:53, 3.54it/s] 28%|██▊ | 103986/371472 [8:16:08<20:14:14, 3.67it/s] 28%|██▊ | 103987/371472 [8:16:09<21:07:46, 3.52it/s] 28%|██▊ | 103988/371472 [8:16:09<20:26:16, 3.64it/s] 28%|██▊ | 103989/371472 [8:16:09<19:27:43, 3.82it/s] 28%|██▊ | 103990/371472 [8:16:10<19:12:02, 3.87it/s] 28%|██▊ | 103991/371472 [8:16:10<19:32:58, 3.80it/s] 28%|██▊ | 103992/371472 [8:16:10<25:51:18, 2.87it/s] 28%|██▊ | 103993/371472 [8:16:11<24:23:11, 3.05it/s] 28%|██▊ | 103994/371472 [8:16:11<23:43:05, 3.13it/s] 28%|██▊ | 103995/371472 [8:16:11<22:42:51, 3.27it/s] 28%|██▊ | 103996/371472 [8:16:11<21:43:35, 3.42it/s] 28%|██▊ | 103997/371472 [8:16:12<21:16:22, 3.49it/s] 28%|██▊ | 103998/371472 [8:16:12<21:14:38, 3.50it/s] 28%|██▊ | 103999/371472 [8:16:12<21:58:07, 3.38it/s] 28%|██▊ | 104000/371472 [8:16:13<23:44:44, 3.13it/s] {'loss': 3.5367, 'learning_rate': 7.483785472645391e-07, 'epoch': 4.48} + 28%|██▊ | 104000/371472 [8:16:13<23:44:44, 3.13it/s] 28%|██▊ | 104001/371472 [8:16:13<23:21:50, 3.18it/s] 28%|██▊ | 104002/371472 [8:16:13<21:51:46, 3.40it/s] 28%|██▊ | 104003/371472 [8:16:14<22:13:15, 3.34it/s] 28%|██▊ | 104004/371472 [8:16:14<21:58:58, 3.38it/s] 28%|██▊ | 104005/371472 [8:16:14<21:44:02, 3.42it/s] 28%|██▊ | 104006/371472 [8:16:14<20:38:47, 3.60it/s] 28%|██▊ | 104007/371472 [8:16:15<20:39:03, 3.60it/s] 28%|██▊ | 104008/371472 [8:16:15<21:41:14, 3.43it/s] 28%|██▊ | 104009/371472 [8:16:15<20:55:51, 3.55it/s] 28%|██▊ | 104010/371472 [8:16:16<20:25:42, 3.64it/s] 28%|██▊ | 104011/371472 [8:16:16<19:31:18, 3.81it/s] 28%|██▊ | 104012/371472 [8:16:16<19:48:14, 3.75it/s] 28%|██▊ | 104013/371472 [8:16:16<19:48:46, 3.75it/s] 28%|██▊ | 104014/371472 [8:16:17<20:36:31, 3.60it/s] 28%|██▊ | 104015/371472 [8:16:17<20:15:49, 3.67it/s] 28%|██▊ | 104016/371472 [8:16:17<20:57:57, 3.54it/s] 28%|██▊ | 104017/371472 [8:16:17<20:23:44, 3.64it/s] 28%|██▊ | 104018/371472 [8:16:18<19:37:28, 3.79it/s] 28%|██▊ | 104019/371472 [8:16:18<19:52:06, 3.74it/s] 28%|██▊ | 104020/371472 [8:16:18<20:04:30, 3.70it/s] {'loss': 3.353, 'learning_rate': 7.483300652890603e-07, 'epoch': 4.48} + 28%|██▊ | 104020/371472 [8:16:18<20:04:30, 3.70it/s] 28%|██▊ | 104021/371472 [8:16:18<20:27:56, 3.63it/s] 28%|██▊ | 104022/371472 [8:16:19<19:55:37, 3.73it/s] 28%|██▊ | 104023/371472 [8:16:19<20:04:29, 3.70it/s] 28%|██▊ | 104024/371472 [8:16:19<20:19:04, 3.66it/s] 28%|██▊ | 104025/371472 [8:16:20<20:20:00, 3.65it/s] 28%|██▊ | 104026/371472 [8:16:20<20:38:30, 3.60it/s] 28%|██▊ | 104027/371472 [8:16:20<20:24:08, 3.64it/s] 28%|██▊ | 104028/371472 [8:16:20<20:34:43, 3.61it/s] 28%|██▊ | 104029/371472 [8:16:21<20:18:58, 3.66it/s] 28%|██▊ | 104030/371472 [8:16:21<19:41:57, 3.77it/s] 28%|██▊ | 104031/371472 [8:16:21<20:17:15, 3.66it/s] 28%|██▊ | 104032/371472 [8:16:22<21:34:57, 3.44it/s] 28%|██▊ | 104033/371472 [8:16:22<21:14:04, 3.50it/s] 28%|██▊ | 104034/371472 [8:16:22<20:35:19, 3.61it/s] 28%|██▊ | 104035/371472 [8:16:22<20:27:27, 3.63it/s] 28%|██▊ | 104036/371472 [8:16:23<20:04:12, 3.70it/s] 28%|██▊ | 104037/371472 [8:16:23<19:40:13, 3.78it/s] 28%|██▊ | 104038/371472 [8:16:23<19:27:11, 3.82it/s] 28%|██▊ | 104039/371472 [8:16:23<20:57:36, 3.54it/s] 28%|██▊ | 104040/371472 [8:16:24<21:24:41, 3.47it/s] {'loss': 3.5371, 'learning_rate': 7.482815833135814e-07, 'epoch': 4.48} + 28%|██▊ | 104040/371472 [8:16:24<21:24:41, 3.47it/s] 28%|██▊ | 104041/371472 [8:16:24<21:23:19, 3.47it/s] 28%|██▊ | 104042/371472 [8:16:24<20:26:14, 3.63it/s] 28%|██▊ | 104043/371472 [8:16:25<20:51:06, 3.56it/s] 28%|██▊ | 104044/371472 [8:16:25<20:41:03, 3.59it/s] 28%|██▊ | 104045/371472 [8:16:25<22:29:08, 3.30it/s] 28%|██▊ | 104046/371472 [8:16:25<21:10:59, 3.51it/s] 28%|██▊ | 104047/371472 [8:16:26<21:31:10, 3.45it/s] 28%|██▊ | 104048/371472 [8:16:26<21:05:16, 3.52it/s] 28%|██▊ | 104049/371472 [8:16:26<20:10:26, 3.68it/s] 28%|██▊ | 104050/371472 [8:16:27<20:36:00, 3.61it/s] 28%|██▊ | 104051/371472 [8:16:27<21:34:27, 3.44it/s] 28%|██▊ | 104052/371472 [8:16:27<21:33:39, 3.45it/s] 28%|██▊ | 104053/371472 [8:16:27<20:42:44, 3.59it/s] 28%|██▊ | 104054/371472 [8:16:28<20:40:36, 3.59it/s] 28%|██▊ | 104055/371472 [8:16:28<19:55:14, 3.73it/s] 28%|██▊ | 104056/371472 [8:16:28<19:44:18, 3.76it/s] 28%|██▊ | 104057/371472 [8:16:28<19:28:11, 3.82it/s] 28%|██▊ | 104058/371472 [8:16:29<19:55:32, 3.73it/s] 28%|██▊ | 104059/371472 [8:16:29<19:07:20, 3.88it/s] 28%|██▊ | 104060/371472 [8:16:29<19:35:10, 3.79it/s] {'loss': 3.4642, 'learning_rate': 7.482331013381025e-07, 'epoch': 4.48} + 28%|██▊ | 104060/371472 [8:16:29<19:35:10, 3.79it/s] 28%|██▊ | 104061/371472 [8:16:29<19:12:55, 3.87it/s] 28%|██▊ | 104062/371472 [8:16:30<19:20:55, 3.84it/s] 28%|██▊ | 104063/371472 [8:16:30<19:52:53, 3.74it/s] 28%|██▊ | 104064/371472 [8:16:30<19:24:43, 3.83it/s] 28%|██▊ | 104065/371472 [8:16:31<20:21:30, 3.65it/s] 28%|██▊ | 104066/371472 [8:16:31<19:55:54, 3.73it/s] 28%|██▊ | 104067/371472 [8:16:31<19:48:25, 3.75it/s] 28%|██▊ | 104068/371472 [8:16:31<22:41:22, 3.27it/s] 28%|██▊ | 104069/371472 [8:16:32<23:40:41, 3.14it/s] 28%|██▊ | 104070/371472 [8:16:32<22:29:01, 3.30it/s] 28%|██▊ | 104071/371472 [8:16:32<22:28:55, 3.30it/s] 28%|██▊ | 104072/371472 [8:16:33<21:23:46, 3.47it/s] 28%|██▊ | 104073/371472 [8:16:33<20:44:33, 3.58it/s] 28%|██▊ | 104074/371472 [8:16:33<21:39:17, 3.43it/s] 28%|██▊ | 104075/371472 [8:16:34<21:03:41, 3.53it/s] 28%|██▊ | 104076/371472 [8:16:34<21:07:36, 3.52it/s] 28%|██▊ | 104077/371472 [8:16:34<20:39:21, 3.60it/s] 28%|██▊ | 104078/371472 [8:16:34<20:38:11, 3.60it/s] 28%|██▊ | 104079/371472 [8:16:35<20:50:12, 3.56it/s] 28%|██▊ | 104080/371472 [8:16:35<21:26:27, 3.46it/s] {'loss': 3.4866, 'learning_rate': 7.481846193626236e-07, 'epoch': 4.48} + 28%|██▊ | 104080/371472 [8:16:35<21:26:27, 3.46it/s] 28%|██▊ | 104081/371472 [8:16:35<22:32:13, 3.30it/s] 28%|██▊ | 104082/371472 [8:16:36<22:04:01, 3.37it/s] 28%|██▊ | 104083/371472 [8:16:36<22:02:46, 3.37it/s] 28%|██▊ | 104084/371472 [8:16:36<22:47:51, 3.26it/s] 28%|██▊ | 104085/371472 [8:16:37<23:10:36, 3.20it/s] 28%|██▊ | 104086/371472 [8:16:37<23:00:18, 3.23it/s] 28%|██▊ | 104087/371472 [8:16:37<22:14:13, 3.34it/s] 28%|██▊ | 104088/371472 [8:16:37<21:55:10, 3.39it/s] 28%|██▊ | 104089/371472 [8:16:38<22:03:56, 3.37it/s] 28%|██▊ | 104090/371472 [8:16:38<21:12:26, 3.50it/s] 28%|██▊ | 104091/371472 [8:16:38<20:58:54, 3.54it/s] 28%|██▊ | 104092/371472 [8:16:38<20:48:56, 3.57it/s] 28%|██▊ | 104093/371472 [8:16:39<21:31:24, 3.45it/s] 28%|██▊ | 104094/371472 [8:16:39<22:14:21, 3.34it/s] 28%|██▊ | 104095/371472 [8:16:39<22:18:18, 3.33it/s] 28%|██▊ | 104096/371472 [8:16:40<22:13:27, 3.34it/s] 28%|██▊ | 104097/371472 [8:16:40<21:35:05, 3.44it/s] 28%|██▊ | 104098/371472 [8:16:40<23:15:14, 3.19it/s] 28%|██▊ | 104099/371472 [8:16:41<22:01:37, 3.37it/s] 28%|██▊ | 104100/371472 [8:16:41<21:09:17, 3.51it/s] {'loss': 3.5181, 'learning_rate': 7.481361373871447e-07, 'epoch': 4.48} + 28%|██▊ | 104100/371472 [8:16:41<21:09:17, 3.51it/s] 28%|██▊ | 104101/371472 [8:16:41<21:21:13, 3.48it/s] 28%|██▊ | 104102/371472 [8:16:41<20:47:50, 3.57it/s] 28%|██▊ | 104103/371472 [8:16:42<21:33:15, 3.45it/s] 28%|██▊ | 104104/371472 [8:16:42<22:12:43, 3.34it/s] 28%|██▊ | 104105/371472 [8:16:42<21:51:27, 3.40it/s] 28%|██▊ | 104106/371472 [8:16:43<21:50:09, 3.40it/s] 28%|██▊ | 104107/371472 [8:16:43<22:28:46, 3.30it/s] 28%|██▊ | 104108/371472 [8:16:43<22:47:05, 3.26it/s] 28%|██▊ | 104109/371472 [8:16:44<22:04:09, 3.37it/s] 28%|██▊ | 104110/371472 [8:16:44<21:16:28, 3.49it/s] 28%|██▊ | 104111/371472 [8:16:44<21:44:24, 3.42it/s] 28%|██▊ | 104112/371472 [8:16:44<21:35:15, 3.44it/s] 28%|██▊ | 104113/371472 [8:16:45<21:34:53, 3.44it/s] 28%|██▊ | 104114/371472 [8:16:45<21:18:55, 3.48it/s] 28%|██▊ | 104115/371472 [8:16:45<21:40:15, 3.43it/s] 28%|██▊ | 104116/371472 [8:16:46<20:28:21, 3.63it/s] 28%|██▊ | 104117/371472 [8:16:46<21:42:01, 3.42it/s] 28%|██▊ | 104118/371472 [8:16:46<21:02:34, 3.53it/s] 28%|██▊ | 104119/371472 [8:16:46<20:19:50, 3.65it/s] 28%|██▊ | 104120/371472 [8:16:47<20:07:24, 3.69it/s] {'loss': 3.6749, 'learning_rate': 7.480876554116658e-07, 'epoch': 4.48} + 28%|██▊ | 104120/371472 [8:16:47<20:07:24, 3.69it/s] 28%|██▊ | 104121/371472 [8:16:47<20:48:35, 3.57it/s] 28%|██▊ | 104122/371472 [8:16:47<21:38:23, 3.43it/s] 28%|██▊ | 104123/371472 [8:16:48<22:27:07, 3.31it/s] 28%|██▊ | 104124/371472 [8:16:48<21:51:44, 3.40it/s] 28%|██▊ | 104125/371472 [8:16:48<22:36:27, 3.28it/s] 28%|██▊ | 104126/371472 [8:16:49<23:45:30, 3.13it/s] 28%|██▊ | 104127/371472 [8:16:49<23:50:39, 3.11it/s] 28%|██▊ | 104128/371472 [8:16:49<23:57:11, 3.10it/s] 28%|██▊ | 104129/371472 [8:16:49<23:47:19, 3.12it/s] 28%|██▊ | 104130/371472 [8:16:50<25:37:43, 2.90it/s] 28%|██▊ | 104131/371472 [8:16:50<23:38:24, 3.14it/s] 28%|██▊ | 104132/371472 [8:16:50<22:16:00, 3.34it/s] 28%|██▊ | 104133/371472 [8:16:51<21:18:32, 3.48it/s] 28%|██▊ | 104134/371472 [8:16:51<21:01:46, 3.53it/s] 28%|██▊ | 104135/371472 [8:16:51<20:48:58, 3.57it/s] 28%|██▊ | 104136/371472 [8:16:52<21:42:11, 3.42it/s] 28%|██▊ | 104137/371472 [8:16:52<21:13:36, 3.50it/s] 28%|██▊ | 104138/371472 [8:16:52<21:35:16, 3.44it/s] 28%|██▊ | 104139/371472 [8:16:52<21:30:35, 3.45it/s] 28%|██▊ | 104140/371472 [8:16:53<20:53:32, 3.55it/s] {'loss': 3.4921, 'learning_rate': 7.480391734361869e-07, 'epoch': 4.49} + 28%|██▊ | 104140/371472 [8:16:53<20:53:32, 3.55it/s] 28%|██▊ | 104141/371472 [8:16:53<21:29:26, 3.46it/s] 28%|██▊ | 104142/371472 [8:16:53<22:52:52, 3.25it/s] 28%|██▊ | 104143/371472 [8:16:54<21:44:36, 3.42it/s] 28%|██▊ | 104144/371472 [8:16:54<21:18:13, 3.49it/s] 28%|██▊ | 104145/371472 [8:16:54<21:52:31, 3.39it/s] 28%|██▊ | 104146/371472 [8:16:54<21:35:34, 3.44it/s] 28%|██▊ | 104147/371472 [8:16:55<23:41:49, 3.13it/s] 28%|██▊ | 104148/371472 [8:16:55<22:10:31, 3.35it/s] 28%|██▊ | 104149/371472 [8:16:55<22:05:16, 3.36it/s] 28%|██▊ | 104150/371472 [8:16:56<23:22:16, 3.18it/s] 28%|██▊ | 104151/371472 [8:16:56<22:00:24, 3.37it/s] 28%|██▊ | 104152/371472 [8:16:56<22:00:50, 3.37it/s] 28%|██▊ | 104153/371472 [8:16:57<21:49:00, 3.40it/s] 28%|██▊ | 104154/371472 [8:16:57<21:00:23, 3.53it/s] 28%|██▊ | 104155/371472 [8:16:57<20:23:30, 3.64it/s] 28%|██▊ | 104156/371472 [8:16:57<22:26:44, 3.31it/s] 28%|██▊ | 104157/371472 [8:16:58<21:51:10, 3.40it/s] 28%|██▊ | 104158/371472 [8:16:58<22:23:42, 3.32it/s] 28%|██▊ | 104159/371472 [8:16:58<21:51:06, 3.40it/s] 28%|██▊ | 104160/371472 [8:16:59<20:53:40, 3.55it/s] {'loss': 3.4176, 'learning_rate': 7.479906914607079e-07, 'epoch': 4.49} + 28%|██▊ | 104160/371472 [8:16:59<20:53:40, 3.55it/s] 28%|██▊ | 104161/371472 [8:16:59<21:21:39, 3.48it/s] 28%|██▊ | 104162/371472 [8:16:59<20:50:18, 3.56it/s] 28%|██▊ | 104163/371472 [8:16:59<20:28:53, 3.63it/s] 28%|██▊ | 104164/371472 [8:17:00<20:00:59, 3.71it/s] 28%|██▊ | 104165/371472 [8:17:00<20:12:24, 3.67it/s] 28%|██▊ | 104166/371472 [8:17:00<23:02:31, 3.22it/s] 28%|██▊ | 104167/371472 [8:17:01<21:55:45, 3.39it/s] 28%|██▊ | 104168/371472 [8:17:01<21:02:34, 3.53it/s] 28%|██▊ | 104169/371472 [8:17:01<21:54:54, 3.39it/s] 28%|██▊ | 104170/371472 [8:17:01<21:10:07, 3.51it/s] 28%|██▊ | 104171/371472 [8:17:02<21:32:24, 3.45it/s] 28%|██▊ | 104172/371472 [8:17:02<23:33:42, 3.15it/s] 28%|██▊ | 104173/371472 [8:17:02<22:15:03, 3.34it/s] 28%|██▊ | 104174/371472 [8:17:03<22:38:31, 3.28it/s] 28%|██▊ | 104175/371472 [8:17:03<21:39:16, 3.43it/s] 28%|██▊ | 104176/371472 [8:17:03<20:32:03, 3.62it/s] 28%|██▊ | 104177/371472 [8:17:03<21:02:43, 3.53it/s] 28%|██▊ | 104178/371472 [8:17:04<21:07:16, 3.52it/s] 28%|██▊ | 104179/371472 [8:17:04<20:59:01, 3.54it/s] 28%|██▊ | 104180/371472 [8:17:04<20:19:37, 3.65it/s] {'loss': 3.5571, 'learning_rate': 7.479422094852292e-07, 'epoch': 4.49} + 28%|██▊ | 104180/371472 [8:17:04<20:19:37, 3.65it/s] 28%|██▊ | 104181/371472 [8:17:05<20:42:38, 3.58it/s] 28%|██▊ | 104182/371472 [8:17:05<19:54:55, 3.73it/s] 28%|██▊ | 104183/371472 [8:17:05<22:07:35, 3.36it/s] 28%|██▊ | 104184/371472 [8:17:05<21:50:28, 3.40it/s] 28%|██▊ | 104185/371472 [8:17:06<22:44:12, 3.27it/s] 28%|██▊ | 104186/371472 [8:17:06<22:02:41, 3.37it/s] 28%|██▊ | 104187/371472 [8:17:06<20:52:31, 3.56it/s] 28%|██▊ | 104188/371472 [8:17:07<20:10:02, 3.68it/s] 28%|██▊ | 104189/371472 [8:17:07<21:11:55, 3.50it/s] 28%|██▊ | 104190/371472 [8:17:07<21:06:24, 3.52it/s] 28%|██▊ | 104191/371472 [8:17:07<20:57:29, 3.54it/s] 28%|██▊ | 104192/371472 [8:17:08<20:18:48, 3.65it/s] 28%|██▊ | 104193/371472 [8:17:08<19:46:48, 3.75it/s] 28%|██▊ | 104194/371472 [8:17:08<19:43:17, 3.76it/s] 28%|██▊ | 104195/371472 [8:17:09<20:09:59, 3.68it/s] 28%|██▊ | 104196/371472 [8:17:09<20:32:34, 3.61it/s] 28%|██▊ | 104197/371472 [8:17:09<20:00:25, 3.71it/s] 28%|██▊ | 104198/371472 [8:17:09<20:23:45, 3.64it/s] 28%|██▊ | 104199/371472 [8:17:10<20:38:32, 3.60it/s] 28%|██▊ | 104200/371472 [8:17:10<20:16:04, 3.66it/s] {'loss': 3.4492, 'learning_rate': 7.478937275097503e-07, 'epoch': 4.49} + 28%|██▊ | 104200/371472 [8:17:10<20:16:04, 3.66it/s] 28%|██▊ | 104201/371472 [8:17:10<19:54:23, 3.73it/s] 28%|██▊ | 104202/371472 [8:17:10<21:12:28, 3.50it/s] 28%|██▊ | 104203/371472 [8:17:11<21:55:40, 3.39it/s] 28%|██▊ | 104204/371472 [8:17:11<22:08:37, 3.35it/s] 28%|██▊ | 104205/371472 [8:17:11<21:15:22, 3.49it/s] 28%|██▊ | 104206/371472 [8:17:12<21:34:43, 3.44it/s] 28%|██▊ | 104207/371472 [8:17:12<20:34:09, 3.61it/s] 28%|██▊ | 104208/371472 [8:17:12<20:41:21, 3.59it/s] 28%|██▊ | 104209/371472 [8:17:12<19:51:42, 3.74it/s] 28%|██▊ | 104210/371472 [8:17:13<19:46:47, 3.75it/s] 28%|██▊ | 104211/371472 [8:17:13<19:12:52, 3.86it/s] 28%|██▊ | 104212/371472 [8:17:13<19:55:04, 3.73it/s] 28%|██▊ | 104213/371472 [8:17:13<19:58:49, 3.72it/s] 28%|██▊ | 104214/371472 [8:17:14<20:52:07, 3.56it/s] 28%|██▊ | 104215/371472 [8:17:14<20:37:48, 3.60it/s] 28%|██▊ | 104216/371472 [8:17:14<21:14:53, 3.49it/s] 28%|██▊ | 104217/371472 [8:17:15<21:49:16, 3.40it/s] 28%|██▊ | 104218/371472 [8:17:15<21:01:37, 3.53it/s] 28%|██▊ | 104219/371472 [8:17:15<20:17:07, 3.66it/s] 28%|██▊ | 104220/371472 [8:17:15<19:36:26, 3.79it/s] {'loss': 3.4424, 'learning_rate': 7.478452455342713e-07, 'epoch': 4.49} + 28%|██▊ | 104220/371472 [8:17:15<19:36:26, 3.79it/s] 28%|██▊ | 104221/371472 [8:17:16<19:31:49, 3.80it/s] 28%|██▊ | 104222/371472 [8:17:16<19:38:05, 3.78it/s] 28%|██▊ | 104223/371472 [8:17:16<20:09:16, 3.68it/s] 28%|██▊ | 104224/371472 [8:17:17<21:46:02, 3.41it/s] 28%|██▊ | 104225/371472 [8:17:17<21:24:26, 3.47it/s] 28%|██▊ | 104226/371472 [8:17:17<22:02:50, 3.37it/s] 28%|██▊ | 104227/371472 [8:17:17<21:20:43, 3.48it/s] 28%|██▊ | 104228/371472 [8:17:18<21:17:24, 3.49it/s] 28%|██▊ | 104229/371472 [8:17:18<20:57:45, 3.54it/s] 28%|██▊ | 104230/371472 [8:17:18<21:01:34, 3.53it/s] 28%|██▊ | 104231/371472 [8:17:19<21:07:31, 3.51it/s] 28%|██▊ | 104232/371472 [8:17:19<22:47:48, 3.26it/s] 28%|██▊ | 104233/371472 [8:17:19<21:36:06, 3.44it/s] 28%|██▊ | 104234/371472 [8:17:20<21:33:37, 3.44it/s] 28%|██▊ | 104235/371472 [8:17:20<21:41:21, 3.42it/s] 28%|██▊ | 104236/371472 [8:17:20<21:44:46, 3.41it/s] 28%|██▊ | 104237/371472 [8:17:20<22:21:07, 3.32it/s] 28%|██▊ | 104238/371472 [8:17:21<22:13:54, 3.34it/s] 28%|██▊ | 104239/371472 [8:17:21<22:04:42, 3.36it/s] 28%|██▊ | 104240/371472 [8:17:21<21:16:33, 3.49it/s] {'loss': 3.5695, 'learning_rate': 7.477967635587924e-07, 'epoch': 4.49} + 28%|██▊ | 104240/371472 [8:17:21<21:16:33, 3.49it/s] 28%|██▊ | 104241/371472 [8:17:22<21:00:15, 3.53it/s] 28%|██▊ | 104242/371472 [8:17:22<20:27:29, 3.63it/s] 28%|██▊ | 104243/371472 [8:17:22<20:22:49, 3.64it/s] 28%|██▊ | 104244/371472 [8:17:22<20:40:08, 3.59it/s] 28%|██▊ | 104245/371472 [8:17:23<20:22:10, 3.64it/s] 28%|██▊ | 104246/371472 [8:17:23<20:45:12, 3.58it/s] 28%|██▊ | 104247/371472 [8:17:23<21:49:13, 3.40it/s] 28%|██▊ | 104248/371472 [8:17:24<21:30:23, 3.45it/s] 28%|██▊ | 104249/371472 [8:17:24<22:08:13, 3.35it/s] 28%|██▊ | 104250/371472 [8:17:24<21:48:06, 3.40it/s] 28%|██▊ | 104251/371472 [8:17:24<20:44:30, 3.58it/s] 28%|██▊ | 104252/371472 [8:17:25<20:00:30, 3.71it/s] 28%|██▊ | 104253/371472 [8:17:25<20:08:18, 3.69it/s] 28%|██▊ | 104254/371472 [8:17:25<21:16:37, 3.49it/s] 28%|██▊ | 104255/371472 [8:17:26<21:36:54, 3.43it/s] 28%|██▊ | 104256/371472 [8:17:26<21:34:54, 3.44it/s] 28%|██▊ | 104257/371472 [8:17:26<21:42:54, 3.42it/s] 28%|██▊ | 104258/371472 [8:17:26<21:49:46, 3.40it/s] 28%|██▊ | 104259/371472 [8:17:27<21:09:51, 3.51it/s] 28%|██▊ | 104260/371472 [8:17:27<21:05:46, 3.52it/s] {'loss': 3.4163, 'learning_rate': 7.477482815833137e-07, 'epoch': 4.49} + 28%|██▊ | 104260/371472 [8:17:27<21:05:46, 3.52it/s] 28%|██▊ | 104261/371472 [8:17:27<20:57:00, 3.54it/s] 28%|██▊ | 104262/371472 [8:17:28<21:03:21, 3.53it/s] 28%|██▊ | 104263/371472 [8:17:28<20:47:30, 3.57it/s] 28%|██▊ | 104264/371472 [8:17:28<20:55:26, 3.55it/s] 28%|██▊ | 104265/371472 [8:17:28<20:58:54, 3.54it/s] 28%|██▊ | 104266/371472 [8:17:29<20:52:09, 3.56it/s] 28%|██▊ | 104267/371472 [8:17:29<22:35:15, 3.29it/s] 28%|██▊ | 104268/371472 [8:17:29<21:30:33, 3.45it/s] 28%|██▊ | 104269/371472 [8:17:30<23:05:10, 3.22it/s] 28%|██▊ | 104270/371472 [8:17:30<22:38:18, 3.28it/s] 28%|██▊ | 104271/371472 [8:17:30<21:40:48, 3.42it/s] 28%|██▊ | 104272/371472 [8:17:30<20:59:48, 3.53it/s] 28%|██▊ | 104273/371472 [8:17:31<21:08:05, 3.51it/s] 28%|██▊ | 104274/371472 [8:17:31<22:11:38, 3.34it/s] 28%|██▊ | 104275/371472 [8:17:31<21:01:15, 3.53it/s] 28%|██▊ | 104276/371472 [8:17:32<20:07:50, 3.69it/s] 28%|██▊ | 104277/371472 [8:17:32<20:13:15, 3.67it/s] 28%|██▊ | 104278/371472 [8:17:32<20:48:00, 3.57it/s] 28%|██▊ | 104279/371472 [8:17:32<21:10:14, 3.51it/s] 28%|██▊ | 104280/371472 [8:17:33<23:20:52, 3.18it/s] {'loss': 3.4495, 'learning_rate': 7.476997996078347e-07, 'epoch': 4.49} + 28%|██▊ | 104280/371472 [8:17:33<23:20:52, 3.18it/s] 28%|██▊ | 104281/371472 [8:17:33<24:42:31, 3.00it/s] 28%|██▊ | 104282/371472 [8:17:33<23:19:37, 3.18it/s] 28%|██▊ | 104283/371472 [8:17:34<22:34:14, 3.29it/s] 28%|██▊ | 104284/371472 [8:17:34<22:32:31, 3.29it/s] 28%|██▊ | 104285/371472 [8:17:34<21:36:57, 3.43it/s] 28%|██▊ | 104286/371472 [8:17:35<20:59:59, 3.53it/s] 28%|██▊ | 104287/371472 [8:17:35<20:11:42, 3.68it/s] 28%|██▊ | 104288/371472 [8:17:35<20:58:04, 3.54it/s] 28%|██▊ | 104289/371472 [8:17:35<20:38:10, 3.60it/s] 28%|██▊ | 104290/371472 [8:17:36<21:10:34, 3.50it/s] 28%|██▊ | 104291/371472 [8:17:36<20:50:14, 3.56it/s] 28%|██▊ | 104292/371472 [8:17:36<20:26:14, 3.63it/s] 28%|██▊ | 104293/371472 [8:17:36<20:31:33, 3.62it/s] 28%|██▊ | 104294/371472 [8:17:37<21:29:18, 3.45it/s] 28%|██▊ | 104295/371472 [8:17:37<21:22:07, 3.47it/s] 28%|██▊ | 104296/371472 [8:17:37<20:12:09, 3.67it/s] 28%|██▊ | 104297/371472 [8:17:38<20:44:47, 3.58it/s] 28%|██▊ | 104298/371472 [8:17:38<20:23:36, 3.64it/s] 28%|██▊ | 104299/371472 [8:17:38<20:41:11, 3.59it/s] 28%|██▊ | 104300/371472 [8:17:38<21:15:09, 3.49it/s] {'loss': 3.5052, 'learning_rate': 7.476513176323558e-07, 'epoch': 4.49} + 28%|██▊ | 104300/371472 [8:17:38<21:15:09, 3.49it/s] 28%|██▊ | 104301/371472 [8:17:39<20:21:40, 3.64it/s] 28%|██▊ | 104302/371472 [8:17:39<19:36:46, 3.78it/s] 28%|██▊ | 104303/371472 [8:17:39<19:35:50, 3.79it/s] 28%|██▊ | 104304/371472 [8:17:40<20:07:04, 3.69it/s] 28%|██▊ | 104305/371472 [8:17:40<21:44:25, 3.41it/s] 28%|██▊ | 104306/371472 [8:17:40<21:22:28, 3.47it/s] 28%|██▊ | 104307/371472 [8:17:40<21:42:29, 3.42it/s] 28%|██▊ | 104308/371472 [8:17:41<21:12:54, 3.50it/s] 28%|██▊ | 104309/371472 [8:17:41<20:59:38, 3.53it/s] 28%|██▊ | 104310/371472 [8:17:41<20:08:32, 3.68it/s] 28%|██▊ | 104311/371472 [8:17:41<19:39:45, 3.77it/s] 28%|██▊ | 104312/371472 [8:17:42<19:20:34, 3.84it/s] 28%|██▊ | 104313/371472 [8:17:42<19:08:09, 3.88it/s] 28%|██▊ | 104314/371472 [8:17:42<18:57:58, 3.91it/s] 28%|██▊ | 104315/371472 [8:17:42<19:09:23, 3.87it/s] 28%|██▊ | 104316/371472 [8:17:43<20:24:40, 3.64it/s] 28%|██▊ | 104317/371472 [8:17:43<20:13:02, 3.67it/s] 28%|██▊ | 104318/371472 [8:17:43<21:22:46, 3.47it/s] 28%|██▊ | 104319/371472 [8:17:44<21:49:30, 3.40it/s] 28%|██▊ | 104320/371472 [8:17:44<21:46:28, 3.41it/s] {'loss': 3.5367, 'learning_rate': 7.476028356568769e-07, 'epoch': 4.49} + 28%|██▊ | 104320/371472 [8:17:44<21:46:28, 3.41it/s] 28%|██▊ | 104321/371472 [8:17:44<22:24:59, 3.31it/s] 28%|██▊ | 104322/371472 [8:17:45<21:46:25, 3.41it/s] 28%|██▊ | 104323/371472 [8:17:45<21:28:34, 3.46it/s] 28%|██▊ | 104324/371472 [8:17:45<20:38:16, 3.60it/s] 28%|██▊ | 104325/371472 [8:17:45<21:34:57, 3.44it/s] 28%|██▊ | 104326/371472 [8:17:46<21:50:08, 3.40it/s] 28%|██▊ | 104327/371472 [8:17:46<21:22:47, 3.47it/s] 28%|██▊ | 104328/371472 [8:17:46<20:58:54, 3.54it/s] 28%|██▊ | 104329/371472 [8:17:47<22:04:14, 3.36it/s] 28%|██▊ | 104330/371472 [8:17:47<21:16:47, 3.49it/s] 28%|██▊ | 104331/371472 [8:17:47<20:58:33, 3.54it/s] 28%|██▊ | 104332/371472 [8:17:47<20:52:35, 3.55it/s] 28%|██▊ | 104333/371472 [8:17:48<21:33:27, 3.44it/s] 28%|██▊ | 104334/371472 [8:17:48<21:22:55, 3.47it/s] 28%|██▊ | 104335/371472 [8:17:48<21:38:48, 3.43it/s] 28%|██▊ | 104336/371472 [8:17:49<20:51:33, 3.56it/s] 28%|██▊ | 104337/371472 [8:17:49<20:35:11, 3.60it/s] 28%|██▊ | 104338/371472 [8:17:49<21:33:21, 3.44it/s] 28%|██▊ | 104339/371472 [8:17:49<20:32:57, 3.61it/s] 28%|██▊ | 104340/371472 [8:17:50<20:26:06, 3.63it/s] {'loss': 3.341, 'learning_rate': 7.475543536813979e-07, 'epoch': 4.49} + 28%|██▊ | 104340/371472 [8:17:50<20:26:06, 3.63it/s] 28%|██▊ | 104341/371472 [8:17:50<20:16:10, 3.66it/s] 28%|██▊ | 104342/371472 [8:17:50<19:55:56, 3.72it/s] 28%|██▊ | 104343/371472 [8:17:50<19:56:36, 3.72it/s] 28%|██▊ | 104344/371472 [8:17:51<20:08:21, 3.68it/s] 28%|██▊ | 104345/371472 [8:17:51<19:49:23, 3.74it/s] 28%|██▊ | 104346/371472 [8:17:51<20:05:50, 3.69it/s] 28%|██▊ | 104347/371472 [8:17:52<20:59:12, 3.54it/s] 28%|██▊ | 104348/371472 [8:17:52<21:24:17, 3.47it/s] 28%|██▊ | 104349/371472 [8:17:52<21:01:09, 3.53it/s] 28%|██▊ | 104350/371472 [8:17:52<20:36:21, 3.60it/s] 28%|██▊ | 104351/371472 [8:17:53<20:34:47, 3.61it/s] 28%|██▊ | 104352/371472 [8:17:53<20:34:53, 3.61it/s] 28%|██▊ | 104353/371472 [8:17:53<21:27:23, 3.46it/s] 28%|██▊ | 104354/371472 [8:17:54<20:39:10, 3.59it/s] 28%|██▊ | 104355/371472 [8:17:54<22:06:33, 3.36it/s] 28%|██▊ | 104356/371472 [8:17:54<21:35:46, 3.44it/s] 28%|██▊ | 104357/371472 [8:17:54<21:03:50, 3.52it/s] 28%|██▊ | 104358/371472 [8:17:55<20:39:29, 3.59it/s] 28%|██▊ | 104359/371472 [8:17:55<21:05:03, 3.52it/s] 28%|██▊ | 104360/371472 [8:17:55<22:38:05, 3.28it/s] {'loss': 3.4593, 'learning_rate': 7.475058717059191e-07, 'epoch': 4.49} + 28%|██▊ | 104360/371472 [8:17:55<22:38:05, 3.28it/s] 28%|██▊ | 104361/371472 [8:17:56<21:55:25, 3.38it/s] 28%|██▊ | 104362/371472 [8:17:56<20:54:17, 3.55it/s] 28%|██▊ | 104363/371472 [8:17:56<20:47:39, 3.57it/s] 28%|██▊ | 104364/371472 [8:17:56<20:25:48, 3.63it/s] 28%|██▊ | 104365/371472 [8:17:57<20:12:50, 3.67it/s] 28%|██▊ | 104366/371472 [8:17:57<20:58:07, 3.54it/s] 28%|██▊ | 104367/371472 [8:17:57<20:53:28, 3.55it/s] 28%|██▊ | 104368/371472 [8:17:58<20:27:25, 3.63it/s] 28%|██▊ | 104369/371472 [8:17:58<19:33:29, 3.79it/s] 28%|██▊ | 104370/371472 [8:17:58<20:17:03, 3.66it/s] 28%|██▊ | 104371/371472 [8:17:58<21:05:06, 3.52it/s] 28%|██▊ | 104372/371472 [8:17:59<20:58:05, 3.54it/s] 28%|██▊ | 104373/371472 [8:17:59<21:25:10, 3.46it/s] 28%|██▊ | 104374/371472 [8:17:59<20:43:35, 3.58it/s] 28%|██▊ | 104375/371472 [8:18:00<22:16:59, 3.33it/s] 28%|██▊ | 104376/371472 [8:18:00<23:01:36, 3.22it/s] 28%|██▊ | 104377/371472 [8:18:00<21:53:39, 3.39it/s] 28%|██▊ | 104378/371472 [8:18:00<21:11:41, 3.50it/s] 28%|██▊ | 104379/371472 [8:18:01<21:39:17, 3.43it/s] 28%|██▊ | 104380/371472 [8:18:01<21:07:31, 3.51it/s] {'loss': 3.4561, 'learning_rate': 7.474573897304402e-07, 'epoch': 4.5} + 28%|██▊ | 104380/371472 [8:18:01<21:07:31, 3.51it/s] 28%|██▊ | 104381/371472 [8:18:01<20:38:35, 3.59it/s] 28%|██▊ | 104382/371472 [8:18:02<20:01:36, 3.70it/s] 28%|██▊ | 104383/371472 [8:18:02<19:57:31, 3.72it/s] 28%|██▊ | 104384/371472 [8:18:02<19:38:58, 3.78it/s] 28%|██▊ | 104385/371472 [8:18:02<21:05:03, 3.52it/s] 28%|██▊ | 104386/371472 [8:18:03<21:33:31, 3.44it/s] 28%|██▊ | 104387/371472 [8:18:03<20:56:05, 3.54it/s] 28%|██▊ | 104388/371472 [8:18:03<20:52:37, 3.55it/s] 28%|██▊ | 104389/371472 [8:18:03<20:31:32, 3.61it/s] 28%|██▊ | 104390/371472 [8:18:04<21:57:05, 3.38it/s] 28%|██▊ | 104391/371472 [8:18:04<21:09:10, 3.51it/s] 28%|██▊ | 104392/371472 [8:18:04<21:45:35, 3.41it/s] 28%|██▊ | 104393/371472 [8:18:05<21:20:04, 3.48it/s] 28%|██▊ | 104394/371472 [8:18:05<20:40:05, 3.59it/s] 28%|██▊ | 104395/371472 [8:18:05<20:22:44, 3.64it/s] 28%|██▊ | 104396/371472 [8:18:05<20:23:36, 3.64it/s] 28%|██▊ | 104397/371472 [8:18:06<19:49:17, 3.74it/s] 28%|██▊ | 104398/371472 [8:18:06<20:50:32, 3.56it/s] 28%|██▊ | 104399/371472 [8:18:06<20:56:59, 3.54it/s] 28%|██▊ | 104400/371472 [8:18:07<20:19:22, 3.65it/s] {'loss': 3.6461, 'learning_rate': 7.474089077549613e-07, 'epoch': 4.5} + 28%|██▊ | 104400/371472 [8:18:07<20:19:22, 3.65it/s] 28%|██▊ | 104401/371472 [8:18:07<20:06:01, 3.69it/s] 28%|██▊ | 104402/371472 [8:18:07<19:38:15, 3.78it/s] 28%|██▊ | 104403/371472 [8:18:07<19:24:25, 3.82it/s] 28%|██▊ | 104404/371472 [8:18:08<20:29:52, 3.62it/s] 28%|██▊ | 104405/371472 [8:18:08<20:24:17, 3.64it/s] 28%|██▊ | 104406/371472 [8:18:08<20:12:22, 3.67it/s] 28%|██▊ | 104407/371472 [8:18:09<21:49:56, 3.40it/s] 28%|██▊ | 104408/371472 [8:18:09<21:07:44, 3.51it/s] 28%|██▊ | 104409/371472 [8:18:09<20:40:48, 3.59it/s] 28%|██▊ | 104410/371472 [8:18:09<20:33:05, 3.61it/s] 28%|██▊ | 104411/371472 [8:18:10<24:14:47, 3.06it/s] 28%|██▊ | 104412/371472 [8:18:10<24:35:16, 3.02it/s] 28%|██▊ | 104413/371472 [8:18:10<23:02:40, 3.22it/s] 28%|██▊ | 104414/371472 [8:18:11<24:33:08, 3.02it/s] 28%|██▊ | 104415/371472 [8:18:11<23:25:55, 3.17it/s] 28%|██▊ | 104416/371472 [8:18:11<21:51:07, 3.39it/s] 28%|██▊ | 104417/371472 [8:18:12<21:15:29, 3.49it/s] 28%|██▊ | 104418/371472 [8:18:12<21:28:32, 3.45it/s] 28%|██▊ | 104419/371472 [8:18:12<20:46:43, 3.57it/s] 28%|██▊ | 104420/371472 [8:18:12<21:01:26, 3.53it/s] {'loss': 3.4264, 'learning_rate': 7.473604257794824e-07, 'epoch': 4.5} + 28%|██▊ | 104420/371472 [8:18:12<21:01:26, 3.53it/s] 28%|██▊ | 104421/371472 [8:18:13<22:05:43, 3.36it/s] 28%|██▊ | 104422/371472 [8:18:13<22:19:50, 3.32it/s] 28%|██▊ | 104423/371472 [8:18:13<21:18:50, 3.48it/s] 28%|██▊ | 104424/371472 [8:18:14<21:20:06, 3.48it/s] 28%|██▊ | 104425/371472 [8:18:14<21:05:27, 3.52it/s] 28%|██▊ | 104426/371472 [8:18:14<21:02:43, 3.52it/s] 28%|██▊ | 104427/371472 [8:18:14<21:50:27, 3.40it/s] 28%|██▊ | 104428/371472 [8:18:15<24:41:38, 3.00it/s] 28%|██▊ | 104429/371472 [8:18:15<23:07:11, 3.21it/s] 28%|██▊ | 104430/371472 [8:18:15<22:03:46, 3.36it/s] 28%|██▊ | 104431/371472 [8:18:16<21:12:14, 3.50it/s] 28%|██▊ | 104432/371472 [8:18:16<20:41:56, 3.58it/s] 28%|██▊ | 104433/371472 [8:18:16<20:29:19, 3.62it/s] 28%|██▊ | 104434/371472 [8:18:17<21:06:47, 3.51it/s] 28%|██▊ | 104435/371472 [8:18:17<21:45:39, 3.41it/s] 28%|██▊ | 104436/371472 [8:18:17<20:45:09, 3.57it/s] 28%|██▊ | 104437/371472 [8:18:17<21:00:58, 3.53it/s] 28%|██▊ | 104438/371472 [8:18:18<20:28:32, 3.62it/s] 28%|██▊ | 104439/371472 [8:18:18<21:39:00, 3.43it/s] 28%|██▊ | 104440/371472 [8:18:18<21:30:07, 3.45it/s] {'loss': 3.4126, 'learning_rate': 7.473119438040035e-07, 'epoch': 4.5} + 28%|██▊ | 104440/371472 [8:18:18<21:30:07, 3.45it/s] 28%|██▊ | 104441/371472 [8:18:18<20:55:32, 3.54it/s] 28%|██▊ | 104442/371472 [8:18:19<20:46:26, 3.57it/s] 28%|██▊ | 104443/371472 [8:18:19<21:11:38, 3.50it/s] 28%|██▊ | 104444/371472 [8:18:19<20:38:10, 3.59it/s] 28%|██▊ | 104445/371472 [8:18:20<20:58:32, 3.54it/s] 28%|██▊ | 104446/371472 [8:18:20<21:38:51, 3.43it/s] 28%|██▊ | 104447/371472 [8:18:20<21:00:16, 3.53it/s] 28%|██▊ | 104448/371472 [8:18:20<20:36:55, 3.60it/s] 28%|██▊ | 104449/371472 [8:18:21<20:53:59, 3.55it/s] 28%|██▊ | 104450/371472 [8:18:21<20:54:01, 3.55it/s] 28%|██▊ | 104451/371472 [8:18:21<20:19:35, 3.65it/s] 28%|██▊ | 104452/371472 [8:18:22<22:05:47, 3.36it/s] 28%|██▊ | 104453/371472 [8:18:22<21:24:12, 3.47it/s] 28%|██▊ | 104454/371472 [8:18:22<21:30:45, 3.45it/s] 28%|██▊ | 104455/371472 [8:18:23<21:31:21, 3.45it/s] 28%|██▊ | 104456/371472 [8:18:23<22:48:01, 3.25it/s] 28%|██▊ | 104457/371472 [8:18:23<22:37:44, 3.28it/s] 28%|██▊ | 104458/371472 [8:18:23<22:58:35, 3.23it/s] 28%|██▊ | 104459/371472 [8:18:24<22:17:55, 3.33it/s] 28%|██▊ | 104460/371472 [8:18:24<21:59:58, 3.37it/s] {'loss': 3.4079, 'learning_rate': 7.472634618285246e-07, 'epoch': 4.5} + 28%|██▊ | 104460/371472 [8:18:24<21:59:58, 3.37it/s] 28%|██▊ | 104461/371472 [8:18:24<22:05:29, 3.36it/s] 28%|██▊ | 104462/371472 [8:18:25<22:13:43, 3.34it/s] 28%|██▊ | 104463/371472 [8:18:25<21:20:36, 3.48it/s] 28%|██▊ | 104464/371472 [8:18:25<20:56:05, 3.54it/s] 28%|██▊ | 104465/371472 [8:18:26<22:25:17, 3.31it/s] 28%|██▊ | 104466/371472 [8:18:26<22:26:19, 3.31it/s] 28%|██▊ | 104467/371472 [8:18:26<23:48:30, 3.12it/s] 28%|██▊ | 104468/371472 [8:18:27<23:50:10, 3.11it/s] 28%|██▊ | 104469/371472 [8:18:27<25:03:40, 2.96it/s] 28%|██▊ | 104470/371472 [8:18:27<24:16:05, 3.06it/s] 28%|██▊ | 104471/371472 [8:18:27<22:47:21, 3.25it/s] 28%|██▊ | 104472/371472 [8:18:28<22:19:10, 3.32it/s] 28%|██▊ | 104473/371472 [8:18:28<23:12:23, 3.20it/s] 28%|██▊ | 104474/371472 [8:18:28<22:24:01, 3.31it/s] 28%|██▊ | 104475/371472 [8:18:29<21:31:08, 3.45it/s] 28%|██▊ | 104476/371472 [8:18:29<20:55:35, 3.54it/s] 28%|██▊ | 104477/371472 [8:18:29<20:42:58, 3.58it/s] 28%|██▊ | 104478/371472 [8:18:29<20:29:58, 3.62it/s] 28%|██▊ | 104479/371472 [8:18:30<19:59:49, 3.71it/s] 28%|██▊ | 104480/371472 [8:18:30<19:30:41, 3.80it/s] {'loss': 3.5648, 'learning_rate': 7.472149798530457e-07, 'epoch': 4.5} + 28%|██▊ | 104480/371472 [8:18:30<19:30:41, 3.80it/s] 28%|██▊ | 104481/371472 [8:18:30<21:17:49, 3.48it/s] 28%|██▊ | 104482/371472 [8:18:31<20:37:04, 3.60it/s] 28%|██▊ | 104483/371472 [8:18:31<22:24:01, 3.31it/s] 28%|██▊ | 104484/371472 [8:18:31<22:25:00, 3.31it/s] 28%|██▊ | 104485/371472 [8:18:32<23:39:34, 3.13it/s] 28%|██▊ | 104486/371472 [8:18:32<23:11:57, 3.20it/s] 28%|██▊ | 104487/371472 [8:18:32<22:38:24, 3.28it/s] 28%|██▊ | 104488/371472 [8:18:32<22:58:51, 3.23it/s] 28%|██▊ | 104489/371472 [8:18:33<22:09:36, 3.35it/s] 28%|██▊ | 104490/371472 [8:18:33<20:57:11, 3.54it/s] 28%|██▊ | 104491/371472 [8:18:33<21:03:04, 3.52it/s] 28%|██▊ | 104492/371472 [8:18:34<21:55:23, 3.38it/s] 28%|██▊ | 104493/371472 [8:18:34<20:43:52, 3.58it/s] 28%|██▊ | 104494/371472 [8:18:34<20:30:46, 3.62it/s] 28%|██▊ | 104495/371472 [8:18:34<22:30:15, 3.30it/s] 28%|██▊ | 104496/371472 [8:18:35<23:32:59, 3.15it/s] 28%|██▊ | 104497/371472 [8:18:35<22:52:24, 3.24it/s] 28%|██▊ | 104498/371472 [8:18:35<22:27:57, 3.30it/s] 28%|██▊ | 104499/371472 [8:18:36<22:01:39, 3.37it/s] 28%|██▊ | 104500/371472 [8:18:36<22:49:52, 3.25it/s] {'loss': 3.6219, 'learning_rate': 7.471664978775668e-07, 'epoch': 4.5} + 28%|██▊ | 104500/371472 [8:18:36<22:49:52, 3.25it/s] 28%|██▊ | 104501/371472 [8:18:36<21:58:25, 3.37it/s] 28%|██▊ | 104502/371472 [8:18:37<21:31:45, 3.44it/s] 28%|██▊ | 104503/371472 [8:18:37<20:42:01, 3.58it/s] 28%|██▊ | 104504/371472 [8:18:37<21:18:01, 3.48it/s] 28%|██▊ | 104505/371472 [8:18:37<21:19:38, 3.48it/s] 28%|██▊ | 104506/371472 [8:18:38<21:07:11, 3.51it/s] 28%|██▊ | 104507/371472 [8:18:38<20:54:16, 3.55it/s] 28%|██▊ | 104508/371472 [8:18:38<20:25:31, 3.63it/s] 28%|██▊ | 104509/371472 [8:18:39<21:21:33, 3.47it/s] 28%|██▊ | 104510/371472 [8:18:39<22:13:33, 3.34it/s] 28%|██▊ | 104511/371472 [8:18:39<21:12:57, 3.50it/s] 28%|██▊ | 104512/371472 [8:18:39<20:57:54, 3.54it/s] 28%|██▊ | 104513/371472 [8:18:40<20:12:58, 3.67it/s] 28%|██▊ | 104514/371472 [8:18:40<19:55:43, 3.72it/s] 28%|██▊ | 104515/371472 [8:18:40<19:55:57, 3.72it/s] 28%|██▊ | 104516/371472 [8:18:40<20:33:01, 3.61it/s] 28%|██▊ | 104517/371472 [8:18:41<21:11:19, 3.50it/s] 28%|██▊ | 104518/371472 [8:18:41<22:06:29, 3.35it/s] 28%|██▊ | 104519/371472 [8:18:41<22:24:32, 3.31it/s] 28%|██▊ | 104520/371472 [8:18:42<21:19:41, 3.48it/s] {'loss': 3.5651, 'learning_rate': 7.471180159020879e-07, 'epoch': 4.5} + 28%|██▊ | 104520/371472 [8:18:42<21:19:41, 3.48it/s] 28%|██▊ | 104521/371472 [8:18:42<21:40:39, 3.42it/s] 28%|██▊ | 104522/371472 [8:18:42<21:36:17, 3.43it/s] 28%|██▊ | 104523/371472 [8:18:43<20:53:23, 3.55it/s] 28%|██▊ | 104524/371472 [8:18:43<20:40:01, 3.59it/s] 28%|██▊ | 104525/371472 [8:18:43<19:52:18, 3.73it/s] 28%|██▊ | 104526/371472 [8:18:43<21:06:39, 3.51it/s] 28%|██▊ | 104527/371472 [8:18:44<20:34:15, 3.60it/s] 28%|██▊ | 104528/371472 [8:18:44<20:25:36, 3.63it/s] 28%|██▊ | 104529/371472 [8:18:44<19:34:38, 3.79it/s] 28%|██▊ | 104530/371472 [8:18:44<19:27:27, 3.81it/s] 28%|██▊ | 104531/371472 [8:18:45<19:05:51, 3.88it/s] 28%|██▊ | 104532/371472 [8:18:45<20:36:33, 3.60it/s] 28%|██▊ | 104533/371472 [8:18:45<20:49:57, 3.56it/s] 28%|██▊ | 104534/371472 [8:18:45<20:35:17, 3.60it/s] 28%|██▊ | 104535/371472 [8:18:46<20:19:25, 3.65it/s] 28%|██▊ | 104536/371472 [8:18:46<21:24:17, 3.46it/s] 28%|██▊ | 104537/371472 [8:18:47<25:25:30, 2.92it/s] 28%|██▊ | 104538/371472 [8:18:47<23:23:53, 3.17it/s] 28%|██▊ | 104539/371472 [8:18:47<22:34:49, 3.28it/s] 28%|██▊ | 104540/371472 [8:18:47<22:20:39, 3.32it/s] {'loss': 3.5358, 'learning_rate': 7.47069533926609e-07, 'epoch': 4.5} + 28%|██▊ | 104540/371472 [8:18:47<22:20:39, 3.32it/s] 28%|██▊ | 104541/371472 [8:18:48<21:51:53, 3.39it/s] 28%|██▊ | 104542/371472 [8:18:48<21:26:19, 3.46it/s] 28%|██▊ | 104543/371472 [8:18:48<20:53:09, 3.55it/s] 28%|██▊ | 104544/371472 [8:18:48<20:46:10, 3.57it/s] 28%|██▊ | 104545/371472 [8:18:49<20:43:03, 3.58it/s] 28%|██▊ | 104546/371472 [8:18:49<20:09:26, 3.68it/s] 28%|██▊ | 104547/371472 [8:18:49<20:21:49, 3.64it/s] 28%|██▊ | 104548/371472 [8:18:50<22:27:36, 3.30it/s] 28%|██▊ | 104549/371472 [8:18:50<22:22:32, 3.31it/s] 28%|██▊ | 104550/371472 [8:18:50<22:20:08, 3.32it/s] 28%|██▊ | 104551/371472 [8:18:51<21:15:04, 3.49it/s] 28%|██▊ | 104552/371472 [8:18:51<20:50:09, 3.56it/s] 28%|██▊ | 104553/371472 [8:18:51<20:46:39, 3.57it/s] 28%|██▊ | 104554/371472 [8:18:51<20:13:38, 3.67it/s] 28%|██▊ | 104555/371472 [8:18:52<19:53:16, 3.73it/s] 28%|██▊ | 104556/371472 [8:18:52<20:10:18, 3.68it/s] 28%|██▊ | 104557/371472 [8:18:52<20:48:08, 3.56it/s] 28%|██▊ | 104558/371472 [8:18:52<20:20:11, 3.65it/s] 28%|██▊ | 104559/371472 [8:18:53<20:30:28, 3.62it/s] 28%|██▊ | 104560/371472 [8:18:53<21:43:23, 3.41it/s] {'loss': 3.3053, 'learning_rate': 7.470210519511301e-07, 'epoch': 4.5} + 28%|██▊ | 104560/371472 [8:18:53<21:43:23, 3.41it/s] 28%|██▊ | 104561/371472 [8:18:53<21:27:38, 3.45it/s] 28%|██▊ | 104562/371472 [8:18:54<20:39:18, 3.59it/s] 28%|██▊ | 104563/371472 [8:18:54<21:41:11, 3.42it/s] 28%|██▊ | 104564/371472 [8:18:54<22:30:25, 3.29it/s] 28%|██▊ | 104565/371472 [8:18:55<29:59:18, 2.47it/s] 28%|██▊ | 104566/371472 [8:18:55<28:27:41, 2.60it/s] 28%|██▊ | 104567/371472 [8:18:55<25:52:41, 2.86it/s] 28%|██▊ | 104568/371472 [8:18:56<24:53:59, 2.98it/s] 28%|██▊ | 104569/371472 [8:18:56<23:16:06, 3.19it/s] 28%|██▊ | 104570/371472 [8:18:56<22:30:12, 3.29it/s] 28%|██▊ | 104571/371472 [8:18:57<22:55:59, 3.23it/s] 28%|██▊ | 104572/371472 [8:18:57<21:29:13, 3.45it/s] 28%|██▊ | 104573/371472 [8:18:57<20:36:51, 3.60it/s] 28%|██▊ | 104574/371472 [8:18:57<19:56:19, 3.72it/s] 28%|██▊ | 104575/371472 [8:18:58<19:22:32, 3.83it/s] 28%|██▊ | 104576/371472 [8:18:58<20:25:25, 3.63it/s] 28%|██▊ | 104577/371472 [8:18:58<20:36:38, 3.60it/s] 28%|██▊ | 104578/371472 [8:18:58<19:59:31, 3.71it/s] 28%|██▊ | 104579/371472 [8:18:59<20:03:47, 3.70it/s] 28%|██▊ | 104580/371472 [8:18:59<19:56:28, 3.72it/s] {'loss': 3.467, 'learning_rate': 7.469725699756513e-07, 'epoch': 4.5} + 28%|██▊ | 104580/371472 [8:18:59<19:56:28, 3.72it/s] 28%|██▊ | 104581/371472 [8:18:59<20:04:04, 3.69it/s] 28%|██▊ | 104582/371472 [8:19:00<19:43:02, 3.76it/s] 28%|██▊ | 104583/371472 [8:19:00<19:22:13, 3.83it/s] 28%|██▊ | 104584/371472 [8:19:00<18:54:35, 3.92it/s] 28%|██▊ | 104585/371472 [8:19:00<19:17:25, 3.84it/s] 28%|██▊ | 104586/371472 [8:19:01<18:58:07, 3.91it/s] 28%|██▊ | 104587/371472 [8:19:01<19:12:29, 3.86it/s] 28%|██▊ | 104588/371472 [8:19:01<19:18:19, 3.84it/s] 28%|██▊ | 104589/371472 [8:19:01<19:24:59, 3.82it/s] 28%|██▊ | 104590/371472 [8:19:02<19:12:14, 3.86it/s] 28%|██▊ | 104591/371472 [8:19:02<20:06:27, 3.69it/s] 28%|██▊ | 104592/371472 [8:19:02<20:20:52, 3.64it/s] 28%|██▊ | 104593/371472 [8:19:02<20:27:25, 3.62it/s] 28%|██▊ | 104594/371472 [8:19:03<20:48:52, 3.56it/s] 28%|██▊ | 104595/371472 [8:19:03<20:40:21, 3.59it/s] 28%|██▊ | 104596/371472 [8:19:03<20:19:16, 3.65it/s] 28%|██▊ | 104597/371472 [8:19:04<19:53:32, 3.73it/s] 28%|██▊ | 104598/371472 [8:19:04<20:01:29, 3.70it/s] 28%|██▊ | 104599/371472 [8:19:04<20:10:44, 3.67it/s] 28%|██▊ | 104600/371472 [8:19:04<19:37:45, 3.78it/s] {'loss': 3.3649, 'learning_rate': 7.469240880001723e-07, 'epoch': 4.51} + 28%|██▊ | 104600/371472 [8:19:04<19:37:45, 3.78it/s] 28%|██▊ | 104601/371472 [8:19:05<20:00:04, 3.71it/s] 28%|██▊ | 104602/371472 [8:19:05<20:20:11, 3.65it/s] 28%|██▊ | 104603/371472 [8:19:05<20:29:04, 3.62it/s] 28%|██▊ | 104604/371472 [8:19:06<21:48:13, 3.40it/s] 28%|██▊ | 104605/371472 [8:19:06<23:29:07, 3.16it/s] 28%|██▊ | 104606/371472 [8:19:06<22:29:34, 3.30it/s] 28%|██▊ | 104607/371472 [8:19:06<21:11:09, 3.50it/s] 28%|██▊ | 104608/371472 [8:19:07<20:36:26, 3.60it/s] 28%|██▊ | 104609/371472 [8:19:07<19:58:42, 3.71it/s] 28%|██▊ | 104610/371472 [8:19:07<21:28:55, 3.45it/s] 28%|██▊ | 104611/371472 [8:19:08<22:17:21, 3.33it/s] 28%|██▊ | 104612/371472 [8:19:08<25:28:20, 2.91it/s] 28%|██▊ | 104613/371472 [8:19:08<24:41:45, 3.00it/s] 28%|██▊ | 104614/371472 [8:19:09<23:30:03, 3.15it/s] 28%|██▊ | 104615/371472 [8:19:09<23:02:44, 3.22it/s] 28%|██▊ | 104616/371472 [8:19:09<21:33:41, 3.44it/s] 28%|██▊ | 104617/371472 [8:19:09<20:40:46, 3.58it/s] 28%|██▊ | 104618/371472 [8:19:10<22:47:45, 3.25it/s] 28%|██▊ | 104619/371472 [8:19:10<22:23:37, 3.31it/s] 28%|██▊ | 104620/371472 [8:19:10<22:03:23, 3.36it/s] {'loss': 3.5051, 'learning_rate': 7.468756060246934e-07, 'epoch': 4.51} + 28%|██▊ | 104620/371472 [8:19:10<22:03:23, 3.36it/s] 28%|██▊ | 104621/371472 [8:19:11<21:21:31, 3.47it/s] 28%|██▊ | 104622/371472 [8:19:11<21:18:22, 3.48it/s] 28%|██▊ | 104623/371472 [8:19:11<20:17:58, 3.65it/s] 28%|██▊ | 104624/371472 [8:19:11<19:42:11, 3.76it/s] 28%|██▊ | 104625/371472 [8:19:12<20:08:36, 3.68it/s] 28%|██▊ | 104626/371472 [8:19:12<21:07:20, 3.51it/s] 28%|██▊ | 104627/371472 [8:19:12<20:50:00, 3.56it/s] 28%|██▊ | 104628/371472 [8:19:13<21:29:02, 3.45it/s] 28%|██▊ | 104629/371472 [8:19:13<21:20:59, 3.47it/s] 28%|██▊ | 104630/371472 [8:19:13<21:55:48, 3.38it/s] 28%|██▊ | 104631/371472 [8:19:13<21:26:33, 3.46it/s] 28%|██▊ | 104632/371472 [8:19:14<22:48:38, 3.25it/s] 28%|██▊ | 104633/371472 [8:19:14<21:36:32, 3.43it/s] 28%|██▊ | 104634/371472 [8:19:14<22:27:09, 3.30it/s] 28%|██▊ | 104635/371472 [8:19:15<21:29:59, 3.45it/s] 28%|██▊ | 104636/371472 [8:19:15<21:46:49, 3.40it/s] 28%|██▊ | 104637/371472 [8:19:15<21:09:48, 3.50it/s] 28%|██▊ | 104638/371472 [8:19:15<20:57:22, 3.54it/s] 28%|██▊ | 104639/371472 [8:19:16<21:15:18, 3.49it/s] 28%|██▊ | 104640/371472 [8:19:16<21:25:59, 3.46it/s] {'loss': 3.2979, 'learning_rate': 7.468271240492145e-07, 'epoch': 4.51} + 28%|██▊ | 104640/371472 [8:19:16<21:25:59, 3.46it/s] 28%|██▊ | 104641/371472 [8:19:16<20:45:28, 3.57it/s] 28%|██▊ | 104642/371472 [8:19:17<22:05:31, 3.36it/s] 28%|██▊ | 104643/371472 [8:19:17<21:53:26, 3.39it/s] 28%|██▊ | 104644/371472 [8:19:17<21:10:11, 3.50it/s] 28%|██▊ | 104645/371472 [8:19:18<22:48:27, 3.25it/s] 28%|██▊ | 104646/371472 [8:19:18<21:42:16, 3.41it/s] 28%|██▊ | 104647/371472 [8:19:18<20:58:09, 3.53it/s] 28%|██▊ | 104648/371472 [8:19:18<21:19:40, 3.48it/s] 28%|██▊ | 104649/371472 [8:19:19<20:34:51, 3.60it/s] 28%|██▊ | 104650/371472 [8:19:19<20:40:46, 3.58it/s] 28%|██▊ | 104651/371472 [8:19:19<20:57:52, 3.54it/s] 28%|██▊ | 104652/371472 [8:19:19<20:40:26, 3.58it/s] 28%|██▊ | 104653/371472 [8:19:20<19:57:00, 3.72it/s] 28%|██▊ | 104654/371472 [8:19:20<20:06:03, 3.69it/s] 28%|██▊ | 104655/371472 [8:19:20<20:48:29, 3.56it/s] 28%|██▊ | 104656/371472 [8:19:21<20:49:05, 3.56it/s] 28%|██▊ | 104657/371472 [8:19:21<20:19:03, 3.65it/s] 28%|██▊ | 104658/371472 [8:19:21<20:34:06, 3.60it/s] 28%|██▊ | 104659/371472 [8:19:21<20:39:56, 3.59it/s] 28%|██▊ | 104660/371472 [8:19:22<20:22:46, 3.64it/s] {'loss': 3.5733, 'learning_rate': 7.467786420737357e-07, 'epoch': 4.51} + 28%|██▊ | 104660/371472 [8:19:22<20:22:46, 3.64it/s] 28%|██▊ | 104661/371472 [8:19:22<20:06:46, 3.68it/s] 28%|██▊ | 104662/371472 [8:19:22<20:47:36, 3.56it/s] 28%|██▊ | 104663/371472 [8:19:23<20:20:56, 3.64it/s] 28%|██▊ | 104664/371472 [8:19:23<20:23:54, 3.63it/s] 28%|██▊ | 104665/371472 [8:19:23<20:33:31, 3.60it/s] 28%|██▊ | 104666/371472 [8:19:23<22:12:25, 3.34it/s] 28%|██▊ | 104667/371472 [8:19:24<21:45:04, 3.41it/s] 28%|██▊ | 104668/371472 [8:19:24<21:28:54, 3.45it/s] 28%|██▊ | 104669/371472 [8:19:24<20:52:34, 3.55it/s] 28%|██▊ | 104670/371472 [8:19:25<20:45:12, 3.57it/s] 28%|██▊ | 104671/371472 [8:19:25<21:34:25, 3.44it/s] 28%|██▊ | 104672/371472 [8:19:25<21:28:02, 3.45it/s] 28%|██▊ | 104673/371472 [8:19:25<21:12:03, 3.50it/s] 28%|██▊ | 104674/371472 [8:19:26<22:19:08, 3.32it/s] 28%|██▊ | 104675/371472 [8:19:26<21:45:06, 3.41it/s] 28%|██▊ | 104676/371472 [8:19:26<22:22:40, 3.31it/s] 28%|██▊ | 104677/371472 [8:19:27<22:02:18, 3.36it/s] 28%|██▊ | 104678/371472 [8:19:27<21:38:56, 3.42it/s] 28%|██▊ | 104679/371472 [8:19:27<23:07:19, 3.21it/s] 28%|██▊ | 104680/371472 [8:19:28<21:57:32, 3.37it/s] {'loss': 3.5325, 'learning_rate': 7.467301600982569e-07, 'epoch': 4.51} + 28%|██▊ | 104680/371472 [8:19:28<21:57:32, 3.37it/s] 28%|██▊ | 104681/371472 [8:19:28<20:56:45, 3.54it/s] 28%|██▊ | 104682/371472 [8:19:28<20:55:17, 3.54it/s] 28%|██▊ | 104683/371472 [8:19:28<20:52:02, 3.55it/s] 28%|██▊ | 104684/371472 [8:19:29<21:30:04, 3.45it/s] 28%|██▊ | 104685/371472 [8:19:29<20:24:49, 3.63it/s] 28%|██▊ | 104686/371472 [8:19:29<19:38:41, 3.77it/s] 28%|██▊ | 104687/371472 [8:19:29<19:28:36, 3.80it/s] 28%|██▊ | 104688/371472 [8:19:30<19:37:44, 3.78it/s] 28%|██▊ | 104689/371472 [8:19:30<19:47:52, 3.74it/s] 28%|██▊ | 104690/371472 [8:19:30<19:31:04, 3.80it/s] 28%|██▊ | 104691/371472 [8:19:30<19:44:33, 3.75it/s] 28%|██�� | 104692/371472 [8:19:31<20:35:21, 3.60it/s] 28%|██▊ | 104693/371472 [8:19:31<20:45:02, 3.57it/s] 28%|██▊ | 104694/371472 [8:19:31<22:19:33, 3.32it/s] 28%|██▊ | 104695/371472 [8:19:32<22:14:27, 3.33it/s] 28%|██▊ | 104696/371472 [8:19:32<21:15:25, 3.49it/s] 28%|██▊ | 104697/371472 [8:19:32<22:27:19, 3.30it/s] 28%|██▊ | 104698/371472 [8:19:33<21:19:42, 3.47it/s] 28%|██▊ | 104699/371472 [8:19:33<20:48:26, 3.56it/s] 28%|██▊ | 104700/371472 [8:19:33<20:36:12, 3.60it/s] {'loss': 3.4373, 'learning_rate': 7.466816781227779e-07, 'epoch': 4.51} + 28%|██▊ | 104700/371472 [8:19:33<20:36:12, 3.60it/s] 28%|██▊ | 104701/371472 [8:19:33<20:58:36, 3.53it/s] 28%|██▊ | 104702/371472 [8:19:34<20:27:48, 3.62it/s] 28%|██▊ | 104703/371472 [8:19:34<20:05:56, 3.69it/s] 28%|██▊ | 104704/371472 [8:19:34<21:19:58, 3.47it/s] 28%|██▊ | 104705/371472 [8:19:35<22:21:10, 3.32it/s] 28%|██▊ | 104706/371472 [8:19:35<22:02:26, 3.36it/s] 28%|██▊ | 104707/371472 [8:19:35<22:04:35, 3.36it/s] 28%|██▊ | 104708/371472 [8:19:35<21:04:21, 3.52it/s] 28%|██▊ | 104709/371472 [8:19:36<21:33:08, 3.44it/s] 28%|██▊ | 104710/371472 [8:19:36<22:57:48, 3.23it/s] 28%|██▊ | 104711/371472 [8:19:36<22:04:15, 3.36it/s] 28%|██▊ | 104712/371472 [8:19:37<22:33:04, 3.29it/s] 28%|██▊ | 104713/371472 [8:19:37<21:57:08, 3.38it/s] 28%|██▊ | 104714/371472 [8:19:37<21:34:43, 3.43it/s] 28%|██▊ | 104715/371472 [8:19:37<21:39:31, 3.42it/s] 28%|██▊ | 104716/371472 [8:19:38<22:38:03, 3.27it/s] 28%|██▊ | 104717/371472 [8:19:38<21:41:28, 3.42it/s] 28%|██▊ | 104718/371472 [8:19:38<22:21:37, 3.31it/s] 28%|██▊ | 104719/371472 [8:19:39<21:09:35, 3.50it/s] 28%|██▊ | 104720/371472 [8:19:39<22:07:34, 3.35it/s] {'loss': 3.3197, 'learning_rate': 7.466331961472989e-07, 'epoch': 4.51} + 28%|██▊ | 104720/371472 [8:19:39<22:07:34, 3.35it/s] 28%|██▊ | 104721/371472 [8:19:39<21:07:12, 3.51it/s] 28%|██▊ | 104722/371472 [8:19:40<20:53:37, 3.55it/s] 28%|██▊ | 104723/371472 [8:19:40<22:21:51, 3.31it/s] 28%|██▊ | 104724/371472 [8:19:40<21:50:20, 3.39it/s] 28%|██▊ | 104725/371472 [8:19:40<21:31:27, 3.44it/s] 28%|██▊ | 104726/371472 [8:19:41<21:45:30, 3.41it/s] 28%|██▊ | 104727/371472 [8:19:41<20:36:13, 3.60it/s] 28%|██▊ | 104728/371472 [8:19:41<22:03:33, 3.36it/s] 28%|██▊ | 104729/371472 [8:19:42<21:40:21, 3.42it/s] 28%|██▊ | 104730/371472 [8:19:42<22:05:24, 3.35it/s] 28%|██▊ | 104731/371472 [8:19:42<21:49:51, 3.39it/s] 28%|██▊ | 104732/371472 [8:19:42<21:13:33, 3.49it/s] 28%|██▊ | 104733/371472 [8:19:43<21:48:46, 3.40it/s] 28%|██▊ | 104734/371472 [8:19:43<22:02:21, 3.36it/s] 28%|██▊ | 104735/371472 [8:19:43<20:56:14, 3.54it/s] 28%|██▊ | 104736/371472 [8:19:44<21:00:59, 3.53it/s] 28%|██▊ | 104737/371472 [8:19:44<20:00:33, 3.70it/s] 28%|██▊ | 104738/371472 [8:19:44<20:01:45, 3.70it/s] 28%|██▊ | 104739/371472 [8:19:44<19:56:50, 3.71it/s] 28%|██▊ | 104740/371472 [8:19:45<20:35:44, 3.60it/s] {'loss': 3.4607, 'learning_rate': 7.465847141718201e-07, 'epoch': 4.51} + 28%|██▊ | 104740/371472 [8:19:45<20:35:44, 3.60it/s] 28%|██▊ | 104741/371472 [8:19:45<20:21:14, 3.64it/s] 28%|██▊ | 104742/371472 [8:19:45<20:14:52, 3.66it/s] 28%|██▊ | 104743/371472 [8:19:45<20:12:18, 3.67it/s] 28%|██▊ | 104744/371472 [8:19:46<20:30:17, 3.61it/s] 28%|██▊ | 104745/371472 [8:19:46<20:19:47, 3.64it/s] 28%|██▊ | 104746/371472 [8:19:46<20:22:00, 3.64it/s] 28%|██▊ | 104747/371472 [8:19:47<20:31:43, 3.61it/s] 28%|██▊ | 104748/371472 [8:19:47<19:47:26, 3.74it/s] 28%|██▊ | 104749/371472 [8:19:47<19:22:59, 3.82it/s] 28%|██▊ | 104750/371472 [8:19:47<21:32:23, 3.44it/s] 28%|██▊ | 104751/371472 [8:19:48<22:06:50, 3.35it/s] 28%|██▊ | 104752/371472 [8:19:48<21:40:25, 3.42it/s] 28%|██▊ | 104753/371472 [8:19:48<21:52:31, 3.39it/s] 28%|██▊ | 104754/371472 [8:19:49<21:10:28, 3.50it/s] 28%|██▊ | 104755/371472 [8:19:49<20:14:35, 3.66it/s] 28%|██▊ | 104756/371472 [8:19:49<19:43:25, 3.76it/s] 28%|██▊ | 104757/371472 [8:19:49<19:24:00, 3.82it/s] 28%|██▊ | 104758/371472 [8:19:50<18:57:38, 3.91it/s] 28%|██▊ | 104759/371472 [8:19:50<18:48:40, 3.94it/s] 28%|██▊ | 104760/371472 [8:19:50<19:11:26, 3.86it/s] {'loss': 3.6503, 'learning_rate': 7.465362321963412e-07, 'epoch': 4.51} + 28%|██▊ | 104760/371472 [8:19:50<19:11:26, 3.86it/s] 28%|██▊ | 104761/371472 [8:19:50<18:57:14, 3.91it/s] 28%|██▊ | 104762/371472 [8:19:51<18:41:07, 3.96it/s] 28%|██▊ | 104763/371472 [8:19:51<18:17:27, 4.05it/s] 28%|██▊ | 104764/371472 [8:19:51<19:38:56, 3.77it/s] 28%|██▊ | 104765/371472 [8:19:51<19:50:09, 3.73it/s] 28%|██▊ | 104766/371472 [8:19:52<21:15:02, 3.49it/s] 28%|██▊ | 104767/371472 [8:19:52<23:24:21, 3.17it/s] 28%|██▊ | 104768/371472 [8:19:52<22:59:19, 3.22it/s] 28%|██▊ | 104769/371472 [8:19:53<21:58:31, 3.37it/s] 28%|██▊ | 104770/371472 [8:19:53<21:40:33, 3.42it/s] 28%|██▊ | 104771/371472 [8:19:53<21:08:19, 3.50it/s] 28%|██▊ | 104772/371472 [8:19:54<23:24:29, 3.16it/s] 28%|██▊ | 104773/371472 [8:19:54<22:10:48, 3.34it/s] 28%|██▊ | 104774/371472 [8:19:54<21:19:57, 3.47it/s] 28%|██▊ | 104775/371472 [8:19:54<22:07:12, 3.35it/s] 28%|██▊ | 104776/371472 [8:19:55<21:44:51, 3.41it/s] 28%|██▊ | 104777/371472 [8:19:55<24:15:45, 3.05it/s] 28%|██▊ | 104778/371472 [8:19:56<24:09:14, 3.07it/s] 28%|██▊ | 104779/371472 [8:19:56<23:21:17, 3.17it/s] 28%|██▊ | 104780/371472 [8:19:56<22:23:04, 3.31it/s] {'loss': 3.5161, 'learning_rate': 7.464877502208623e-07, 'epoch': 4.51} + 28%|██▊ | 104780/371472 [8:19:56<22:23:04, 3.31it/s] 28%|██▊ | 104781/371472 [8:19:56<22:32:44, 3.29it/s] 28%|██▊ | 104782/371472 [8:19:57<22:28:53, 3.30it/s] 28%|██▊ | 104783/371472 [8:19:57<22:05:31, 3.35it/s] 28%|██▊ | 104784/371472 [8:19:57<21:37:14, 3.43it/s] 28%|██▊ | 104785/371472 [8:19:58<23:38:05, 3.13it/s] 28%|██▊ | 104786/371472 [8:19:58<23:26:53, 3.16it/s] 28%|██▊ | 104787/371472 [8:19:58<23:22:36, 3.17it/s] 28%|██▊ | 104788/371472 [8:19:59<22:22:48, 3.31it/s] 28%|██▊ | 104789/371472 [8:19:59<21:09:15, 3.50it/s] 28%|██▊ | 104790/371472 [8:19:59<21:23:27, 3.46it/s] 28%|██▊ | 104791/371472 [8:19:59<20:56:13, 3.54it/s] 28%|██▊ | 104792/371472 [8:20:00<20:14:42, 3.66it/s] 28%|██▊ | 104793/371472 [8:20:00<20:10:36, 3.67it/s] 28%|██▊ | 104794/371472 [8:20:00<20:06:52, 3.68it/s] 28%|██▊ | 104795/371472 [8:20:00<19:52:52, 3.73it/s] 28%|██▊ | 104796/371472 [8:20:01<19:39:11, 3.77it/s] 28%|██▊ | 104797/371472 [8:20:01<19:50:52, 3.73it/s] 28%|██▊ | 104798/371472 [8:20:01<19:50:27, 3.73it/s] 28%|██▊ | 104799/371472 [8:20:01<19:48:35, 3.74it/s] 28%|██▊ | 104800/371472 [8:20:02<19:15:35, 3.85it/s] {'loss': 3.4362, 'learning_rate': 7.464392682453834e-07, 'epoch': 4.51} + 28%|██▊ | 104800/371472 [8:20:02<19:15:35, 3.85it/s] 28%|██▊ | 104801/371472 [8:20:02<19:22:12, 3.82it/s] 28%|██▊ | 104802/371472 [8:20:02<19:35:57, 3.78it/s] 28%|██▊ | 104803/371472 [8:20:03<20:58:42, 3.53it/s] 28%|██▊ | 104804/371472 [8:20:03<21:17:51, 3.48it/s] 28%|██▊ | 104805/371472 [8:20:03<23:15:25, 3.19it/s] 28%|██▊ | 104806/371472 [8:20:03<22:13:12, 3.33it/s] 28%|██▊ | 104807/371472 [8:20:04<23:45:56, 3.12it/s] 28%|██▊ | 104808/371472 [8:20:04<23:31:15, 3.15it/s] 28%|██▊ | 104809/371472 [8:20:04<22:07:11, 3.35it/s] 28%|██▊ | 104810/371472 [8:20:05<21:09:34, 3.50it/s] 28%|██▊ | 104811/371472 [8:20:05<21:25:00, 3.46it/s] 28%|██▊ | 104812/371472 [8:20:05<21:23:02, 3.46it/s] 28%|██▊ | 104813/371472 [8:20:06<20:29:36, 3.61it/s] 28%|██▊ | 104814/371472 [8:20:06<20:15:47, 3.66it/s] 28%|██▊ | 104815/371472 [8:20:06<20:02:21, 3.70it/s] 28%|██▊ | 104816/371472 [8:20:06<19:24:52, 3.82it/s] 28%|██▊ | 104817/371472 [8:20:07<19:00:53, 3.90it/s] 28%|██▊ | 104818/371472 [8:20:07<20:09:46, 3.67it/s] 28%|██▊ | 104819/371472 [8:20:07<20:59:32, 3.53it/s] 28%|██▊ | 104820/371472 [8:20:07<22:12:07, 3.34it/s] {'loss': 3.389, 'learning_rate': 7.463907862699046e-07, 'epoch': 4.51} + 28%|██▊ | 104820/371472 [8:20:07<22:12:07, 3.34it/s] 28%|██▊ | 104821/371472 [8:20:08<21:32:50, 3.44it/s] 28%|██▊ | 104822/371472 [8:20:08<21:02:49, 3.52it/s] 28%|██▊ | 104823/371472 [8:20:08<20:52:22, 3.55it/s] 28%|██▊ | 104824/371472 [8:20:09<20:09:19, 3.67it/s] 28%|██▊ | 104825/371472 [8:20:09<20:00:10, 3.70it/s] 28%|██▊ | 104826/371472 [8:20:09<20:25:26, 3.63it/s] 28%|██▊ | 104827/371472 [8:20:09<20:39:10, 3.59it/s] 28%|██▊ | 104828/371472 [8:20:10<21:09:34, 3.50it/s] 28%|██▊ | 104829/371472 [8:20:10<22:16:40, 3.32it/s] 28%|██▊ | 104830/371472 [8:20:10<21:17:50, 3.48it/s] 28%|██▊ | 104831/371472 [8:20:11<20:56:28, 3.54it/s] 28%|██▊ | 104832/371472 [8:20:11<21:30:14, 3.44it/s] 28%|██▊ | 104833/371472 [8:20:11<20:22:26, 3.64it/s] 28%|██▊ | 104834/371472 [8:20:11<20:07:10, 3.68it/s] 28%|██▊ | 104835/371472 [8:20:12<19:38:41, 3.77it/s] 28%|██▊ | 104836/371472 [8:20:12<19:42:05, 3.76it/s] 28%|██▊ | 104837/371472 [8:20:12<20:57:35, 3.53it/s] 28%|██▊ | 104838/371472 [8:20:12<20:51:57, 3.55it/s] 28%|██▊ | 104839/371472 [8:20:13<20:23:06, 3.63it/s] 28%|██▊ | 104840/371472 [8:20:13<20:56:37, 3.54it/s] {'loss': 3.6639, 'learning_rate': 7.463423042944257e-07, 'epoch': 4.52} + 28%|██▊ | 104840/371472 [8:20:13<20:56:37, 3.54it/s] 28%|██▊ | 104841/371472 [8:20:13<20:40:44, 3.58it/s] 28%|██▊ | 104842/371472 [8:20:14<21:05:03, 3.51it/s] 28%|██▊ | 104843/371472 [8:20:14<20:04:30, 3.69it/s] 28%|██▊ | 104844/371472 [8:20:14<21:06:10, 3.51it/s] 28%|██▊ | 104845/371472 [8:20:14<21:15:17, 3.48it/s] 28%|██▊ | 104846/371472 [8:20:15<21:38:32, 3.42it/s] 28%|██▊ | 104847/371472 [8:20:15<20:46:54, 3.56it/s] 28%|██▊ | 104848/371472 [8:20:15<20:36:02, 3.60it/s] 28%|██▊ | 104849/371472 [8:20:16<20:08:21, 3.68it/s] 28%|██▊ | 104850/371472 [8:20:16<20:23:37, 3.63it/s] 28%|██▊ | 104851/371472 [8:20:16<26:30:30, 2.79it/s] 28%|██▊ | 104852/371472 [8:20:17<25:00:56, 2.96it/s] 28%|██▊ | 104853/371472 [8:20:17<24:17:28, 3.05it/s] 28%|██▊ | 104854/371472 [8:20:17<24:29:42, 3.02it/s] 28%|██▊ | 104855/371472 [8:20:18<22:57:16, 3.23it/s] 28%|██▊ | 104856/371472 [8:20:18<22:00:23, 3.37it/s] 28%|██▊ | 104857/371472 [8:20:18<22:15:22, 3.33it/s] 28%|██▊ | 104858/371472 [8:20:18<22:15:26, 3.33it/s] 28%|██▊ | 104859/371472 [8:20:19<21:30:23, 3.44it/s] 28%|██▊ | 104860/371472 [8:20:19<21:27:40, 3.45it/s] {'loss': 3.4774, 'learning_rate': 7.462938223189466e-07, 'epoch': 4.52} + 28%|██▊ | 104860/371472 [8:20:19<21:27:40, 3.45it/s] 28%|██▊ | 104861/371472 [8:20:19<21:17:50, 3.48it/s] 28%|██▊ | 104862/371472 [8:20:20<23:08:13, 3.20it/s] 28%|██▊ | 104863/371472 [8:20:20<21:53:21, 3.38it/s] 28%|██▊ | 104864/371472 [8:20:20<22:37:59, 3.27it/s] 28%|██▊ | 104865/371472 [8:20:21<21:30:37, 3.44it/s] 28%|██▊ | 104866/371472 [8:20:21<21:47:33, 3.40it/s] 28%|██▊ | 104867/371472 [8:20:21<26:43:06, 2.77it/s] 28%|██▊ | 104868/371472 [8:20:22<26:30:40, 2.79it/s] 28%|██▊ | 104869/371472 [8:20:22<23:56:59, 3.09it/s] 28%|██▊ | 104870/371472 [8:20:22<24:15:32, 3.05it/s] 28%|██▊ | 104871/371472 [8:20:23<22:50:13, 3.24it/s] 28%|██▊ | 104872/371472 [8:20:23<21:51:06, 3.39it/s] 28%|██▊ | 104873/371472 [8:20:23<22:32:26, 3.29it/s] 28%|██▊ | 104874/371472 [8:20:23<21:24:49, 3.46it/s] 28%|██▊ | 104875/371472 [8:20:24<20:17:34, 3.65it/s] 28%|██▊ | 104876/371472 [8:20:24<20:00:32, 3.70it/s] 28%|██▊ | 104877/371472 [8:20:24<19:52:14, 3.73it/s] 28%|██▊ | 104878/371472 [8:20:24<20:42:14, 3.58it/s] 28%|██▊ | 104879/371472 [8:20:25<20:38:32, 3.59it/s] 28%|██▊ | 104880/371472 [8:20:25<21:16:49, 3.48it/s] {'loss': 3.2268, 'learning_rate': 7.462453403434678e-07, 'epoch': 4.52} + 28%|██▊ | 104880/371472 [8:20:25<21:16:49, 3.48it/s] 28%|██▊ | 104881/371472 [8:20:25<22:06:17, 3.35it/s] 28%|██▊ | 104882/371472 [8:20:26<22:19:56, 3.32it/s] 28%|██▊ | 104883/371472 [8:20:26<21:03:21, 3.52it/s] 28%|██▊ | 104884/371472 [8:20:26<21:12:41, 3.49it/s] 28%|██▊ | 104885/371472 [8:20:26<21:33:51, 3.43it/s] 28%|██▊ | 104886/371472 [8:20:27<21:11:34, 3.49it/s] 28%|██▊ | 104887/371472 [8:20:27<20:30:50, 3.61it/s] 28%|██▊ | 104888/371472 [8:20:27<22:39:34, 3.27it/s] 28%|██▊ | 104889/371472 [8:20:28<21:56:09, 3.38it/s] 28%|██▊ | 104890/371472 [8:20:28<22:19:35, 3.32it/s] 28%|██▊ | 104891/371472 [8:20:28<21:36:39, 3.43it/s] 28%|██▊ | 104892/371472 [8:20:29<21:15:08, 3.48it/s] 28%|██▊ | 104893/371472 [8:20:29<21:48:26, 3.40it/s] 28%|██▊ | 104894/371472 [8:20:29<22:14:39, 3.33it/s] 28%|██▊ | 104895/371472 [8:20:29<22:50:49, 3.24it/s] 28%|██▊ | 104896/371472 [8:20:30<22:23:37, 3.31it/s] 28%|██▊ | 104897/371472 [8:20:30<22:14:31, 3.33it/s] 28%|██▊ | 104898/371472 [8:20:30<21:49:29, 3.39it/s] 28%|██▊ | 104899/371472 [8:20:31<20:57:40, 3.53it/s] 28%|██▊ | 104900/371472 [8:20:31<21:03:27, 3.52it/s] {'loss': 3.5172, 'learning_rate': 7.46196858367989e-07, 'epoch': 4.52} + 28%|██▊ | 104900/371472 [8:20:31<21:03:27, 3.52it/s] 28%|██▊ | 104901/371472 [8:20:31<20:52:14, 3.55it/s] 28%|██▊ | 104902/371472 [8:20:31<20:45:07, 3.57it/s] 28%|██▊ | 104903/371472 [8:20:32<21:35:04, 3.43it/s] 28%|██▊ | 104904/371472 [8:20:32<21:14:18, 3.49it/s] 28%|██▊ | 104905/371472 [8:20:32<21:16:29, 3.48it/s] 28%|██▊ | 104906/371472 [8:20:33<20:48:44, 3.56it/s] 28%|██▊ | 104907/371472 [8:20:33<20:21:07, 3.64it/s] 28%|██▊ | 104908/371472 [8:20:33<20:35:26, 3.60it/s] 28%|██▊ | 104909/371472 [8:20:33<19:44:44, 3.75it/s] 28%|██▊ | 104910/371472 [8:20:34<19:39:34, 3.77it/s] 28%|██▊ | 104911/371472 [8:20:34<20:13:19, 3.66it/s] 28%|██▊ | 104912/371472 [8:20:34<20:39:10, 3.59it/s] 28%|██▊ | 104913/371472 [8:20:35<20:38:15, 3.59it/s] 28%|██▊ | 104914/371472 [8:20:35<21:19:17, 3.47it/s] 28%|██▊ | 104915/371472 [8:20:35<20:30:29, 3.61it/s] 28%|██▊ | 104916/371472 [8:20:35<20:07:20, 3.68it/s] 28%|██▊ | 104917/371472 [8:20:36<20:50:25, 3.55it/s] 28%|██▊ | 104918/371472 [8:20:36<20:11:06, 3.67it/s] 28%|██▊ | 104919/371472 [8:20:36<20:17:31, 3.65it/s] 28%|██▊ | 104920/371472 [8:20:37<21:52:16, 3.39it/s] {'loss': 3.5388, 'learning_rate': 7.461483763925101e-07, 'epoch': 4.52} + 28%|██▊ | 104920/371472 [8:20:37<21:52:16, 3.39it/s] 28%|██▊ | 104921/371472 [8:20:37<21:12:02, 3.49it/s] 28%|██▊ | 104922/371472 [8:20:37<22:58:22, 3.22it/s] 28%|██▊ | 104923/371472 [8:20:37<22:14:39, 3.33it/s] 28%|██▊ | 104924/371472 [8:20:38<22:23:03, 3.31it/s] 28%|██▊ | 104925/371472 [8:20:38<21:08:42, 3.50it/s] 28%|██▊ | 104926/371472 [8:20:39<30:55:12, 2.39it/s] 28%|██▊ | 104927/371472 [8:20:39<27:07:39, 2.73it/s] 28%|██▊ | 104928/371472 [8:20:39<25:20:25, 2.92it/s] 28%|██▊ | 104929/371472 [8:20:39<23:23:32, 3.17it/s] 28%|██▊ | 104930/371472 [8:20:40<23:20:54, 3.17it/s] 28%|██▊ | 104931/371472 [8:20:40<22:22:53, 3.31it/s] 28%|██▊ | 104932/371472 [8:20:40<21:18:56, 3.47it/s] 28%|██▊ | 104933/371472 [8:20:41<21:18:14, 3.48it/s] 28%|██▊ | 104934/371472 [8:20:41<21:43:41, 3.41it/s] 28%|██▊ | 104935/371472 [8:20:41<21:06:21, 3.51it/s] 28%|██▊ | 104936/371472 [8:20:42<22:49:05, 3.24it/s] 28%|██▊ | 104937/371472 [8:20:42<22:00:27, 3.36it/s] 28%|██▊ | 104938/371472 [8:20:42<21:59:53, 3.37it/s] 28%|██▊ | 104939/371472 [8:20:42<21:24:14, 3.46it/s] 28%|██▊ | 104940/371472 [8:20:43<21:18:38, 3.47it/s] {'loss': 3.5466, 'learning_rate': 7.460998944170311e-07, 'epoch': 4.52} + 28%|██▊ | 104940/371472 [8:20:43<21:18:38, 3.47it/s] 28%|██▊ | 104941/371472 [8:20:43<20:33:38, 3.60it/s] 28%|██▊ | 104942/371472 [8:20:43<20:04:26, 3.69it/s] 28%|██▊ | 104943/371472 [8:20:43<19:21:19, 3.83it/s] 28%|██▊ | 104944/371472 [8:20:44<19:33:58, 3.78it/s] 28%|██▊ | 104945/371472 [8:20:44<19:27:43, 3.80it/s] 28%|██▊ | 104946/371472 [8:20:44<19:40:41, 3.76it/s] 28%|██▊ | 104947/371472 [8:20:44<19:07:53, 3.87it/s] 28%|██▊ | 104948/371472 [8:20:45<19:39:57, 3.76it/s] 28%|██▊ | 104949/371472 [8:20:45<19:32:16, 3.79it/s] 28%|██▊ | 104950/371472 [8:20:45<20:25:34, 3.62it/s] 28%|██▊ | 104951/371472 [8:20:46<21:51:12, 3.39it/s] 28%|██▊ | 104952/371472 [8:20:46<21:06:51, 3.51it/s] 28%|██▊ | 104953/371472 [8:20:46<21:43:35, 3.41it/s] 28%|██▊ | 104954/371472 [8:20:46<20:38:20, 3.59it/s] 28%|██▊ | 104955/371472 [8:20:47<20:14:22, 3.66it/s] 28%|██▊ | 104956/371472 [8:20:47<20:40:48, 3.58it/s] 28%|██▊ | 104957/371472 [8:20:47<20:07:22, 3.68it/s] 28%|██▊ | 104958/371472 [8:20:48<20:27:41, 3.62it/s] 28%|██▊ | 104959/371472 [8:20:48<20:39:00, 3.59it/s] 28%|██▊ | 104960/371472 [8:20:48<21:09:38, 3.50it/s] {'loss': 3.3623, 'learning_rate': 7.460514124415523e-07, 'epoch': 4.52} + 28%|██▊ | 104960/371472 [8:20:48<21:09:38, 3.50it/s] 28%|██▊ | 104961/371472 [8:20:49<22:42:41, 3.26it/s] 28%|██▊ | 104962/371472 [8:20:49<22:05:36, 3.35it/s] 28%|██▊ | 104963/371472 [8:20:49<20:58:16, 3.53it/s] 28%|██▊ | 104964/371472 [8:20:49<20:17:29, 3.65it/s] 28%|██▊ | 104965/371472 [8:20:50<19:49:15, 3.73it/s] 28%|██▊ | 104966/371472 [8:20:50<19:53:43, 3.72it/s] 28%|██▊ | 104967/371472 [8:20:50<21:15:21, 3.48it/s] 28%|██▊ | 104968/371472 [8:20:50<21:13:41, 3.49it/s] 28%|██▊ | 104969/371472 [8:20:51<20:54:34, 3.54it/s] 28%|██▊ | 104970/371472 [8:20:51<21:27:38, 3.45it/s] 28%|██▊ | 104971/371472 [8:20:51<21:35:15, 3.43it/s] 28%|██▊ | 104972/371472 [8:20:52<22:08:40, 3.34it/s] 28%|██▊ | 104973/371472 [8:20:52<21:36:05, 3.43it/s] 28%|██▊ | 104974/371472 [8:20:52<23:12:59, 3.19it/s] 28%|██▊ | 104975/371472 [8:20:53<22:15:43, 3.33it/s] 28%|██▊ | 104976/371472 [8:20:53<21:02:45, 3.52it/s] 28%|██▊ | 104977/371472 [8:20:53<21:07:23, 3.50it/s] 28%|██▊ | 104978/371472 [8:20:53<22:02:24, 3.36it/s] 28%|██▊ | 104979/371472 [8:20:54<21:12:23, 3.49it/s] 28%|██▊ | 104980/371472 [8:20:54<20:37:24, 3.59it/s] {'loss': 3.479, 'learning_rate': 7.460029304660733e-07, 'epoch': 4.52} + 28%|██▊ | 104980/371472 [8:20:54<20:37:24, 3.59it/s] 28%|██▊ | 104981/371472 [8:20:54<20:56:13, 3.54it/s] 28%|██▊ | 104982/371472 [8:20:54<20:11:59, 3.66it/s] 28%|██▊ | 104983/371472 [8:20:55<20:46:07, 3.56it/s] 28%|██▊ | 104984/371472 [8:20:55<21:23:34, 3.46it/s] 28%|██▊ | 104985/371472 [8:20:55<20:39:39, 3.58it/s] 28%|██▊ | 104986/371472 [8:20:56<20:12:24, 3.66it/s] 28%|██▊ | 104987/371472 [8:20:56<19:42:14, 3.76it/s] 28%|██▊ | 104988/371472 [8:20:56<20:00:14, 3.70it/s] 28%|██▊ | 104989/371472 [8:20:56<20:47:15, 3.56it/s] 28%|██▊ | 104990/371472 [8:20:57<21:36:23, 3.43it/s] 28%|██▊ | 104991/371472 [8:20:57<22:54:49, 3.23it/s] 28%|██▊ | 104992/371472 [8:20:57<22:24:56, 3.30it/s] 28%|██▊ | 104993/371472 [8:20:58<23:10:13, 3.19it/s] 28%|██▊ | 104994/371472 [8:20:58<21:49:49, 3.39it/s] 28%|██▊ | 104995/371472 [8:20:58<21:18:53, 3.47it/s] 28%|██▊ | 104996/371472 [8:20:58<21:01:41, 3.52it/s] 28%|██▊ | 104997/371472 [8:20:59<20:33:26, 3.60it/s] 28%|██▊ | 104998/371472 [8:20:59<20:15:25, 3.65it/s] 28%|██▊ | 104999/371472 [8:20:59<19:47:41, 3.74it/s] 28%|██▊ | 105000/371472 [8:21:00<19:12:51, 3.85it/s] {'loss': 3.4183, 'learning_rate': 7.459544484905945e-07, 'epoch': 4.52} + 28%|██▊ | 105000/371472 [8:21:00<19:12:51, 3.85it/s] 28%|██▊ | 105001/371472 [8:21:00<20:16:35, 3.65it/s] 28%|██▊ | 105002/371472 [8:21:00<19:53:21, 3.72it/s] 28%|██▊ | 105003/371472 [8:21:00<19:46:36, 3.74it/s] 28%|██▊ | 105004/371472 [8:21:01<19:52:50, 3.72it/s] 28%|██▊ | 105005/371472 [8:21:01<19:24:20, 3.81it/s] 28%|██▊ | 105006/371472 [8:21:01<19:47:04, 3.74it/s] 28%|██▊ | 105007/371472 [8:21:01<20:11:47, 3.66it/s] 28%|██▊ | 105008/371472 [8:21:02<20:26:50, 3.62it/s] 28%|██▊ | 105009/371472 [8:21:02<21:11:54, 3.49it/s] 28%|██▊ | 105010/371472 [8:21:02<20:49:29, 3.55it/s] 28%|██▊ | 105011/371472 [8:21:03<22:49:46, 3.24it/s] 28%|██▊ | 105012/371472 [8:21:03<22:11:40, 3.33it/s] 28%|██▊ | 105013/371472 [8:21:03<21:40:15, 3.42it/s] 28%|██▊ | 105014/371472 [8:21:04<22:26:20, 3.30it/s] 28%|██▊ | 105015/371472 [8:21:04<22:44:49, 3.25it/s] 28%|██▊ | 105016/371472 [8:21:04<21:26:24, 3.45it/s] 28%|██▊ | 105017/371472 [8:21:04<21:19:03, 3.47it/s] 28%|██▊ | 105018/371472 [8:21:05<20:34:01, 3.60it/s] 28%|██▊ | 105019/371472 [8:21:05<19:40:40, 3.76it/s] 28%|██▊ | 105020/371472 [8:21:05<19:34:06, 3.78it/s] {'loss': 3.3884, 'learning_rate': 7.459059665151155e-07, 'epoch': 4.52} + 28%|██▊ | 105020/371472 [8:21:05<19:34:06, 3.78it/s] 28%|██▊ | 105021/371472 [8:21:05<20:16:16, 3.65it/s] 28%|██▊ | 105022/371472 [8:21:06<20:34:55, 3.60it/s] 28%|██▊ | 105023/371472 [8:21:06<21:20:48, 3.47it/s] 28%|██▊ | 105024/371472 [8:21:06<20:24:50, 3.63it/s] 28%|██▊ | 105025/371472 [8:21:07<21:27:23, 3.45it/s] 28%|██▊ | 105026/371472 [8:21:07<21:26:38, 3.45it/s] 28%|██▊ | 105027/371472 [8:21:07<20:42:52, 3.57it/s] 28%|██▊ | 105028/371472 [8:21:07<20:15:20, 3.65it/s] 28%|██▊ | 105029/371472 [8:21:08<20:26:24, 3.62it/s] 28%|██▊ | 105030/371472 [8:21:08<20:24:36, 3.63it/s] 28%|██▊ | 105031/371472 [8:21:08<21:52:22, 3.38it/s] 28%|██▊ | 105032/371472 [8:21:09<21:23:06, 3.46it/s] 28%|██▊ | 105033/371472 [8:21:09<21:03:51, 3.51it/s] 28%|██▊ | 105034/371472 [8:21:09<20:44:56, 3.57it/s] 28%|██▊ | 105035/371472 [8:21:09<21:24:25, 3.46it/s] 28%|██▊ | 105036/371472 [8:21:10<20:26:18, 3.62it/s] 28%|██▊ | 105037/371472 [8:21:10<19:48:59, 3.73it/s] 28%|██▊ | 105038/371472 [8:21:10<21:20:38, 3.47it/s] 28%|██▊ | 105039/371472 [8:21:11<21:24:43, 3.46it/s] 28%|██▊ | 105040/371472 [8:21:11<20:40:48, 3.58it/s] {'loss': 3.6151, 'learning_rate': 7.458574845396367e-07, 'epoch': 4.52} + 28%|██▊ | 105040/371472 [8:21:11<20:40:48, 3.58it/s] 28%|██▊ | 105041/371472 [8:21:11<19:58:09, 3.71it/s] 28%|██▊ | 105042/371472 [8:21:11<20:42:09, 3.57it/s] 28%|██▊ | 105043/371472 [8:21:12<20:41:13, 3.58it/s] 28%|██▊ | 105044/371472 [8:21:12<20:41:07, 3.58it/s] 28%|██▊ | 105045/371472 [8:21:12<20:09:18, 3.67it/s] 28%|██▊ | 105046/371472 [8:21:12<20:37:44, 3.59it/s] 28%|██▊ | 105047/371472 [8:21:13<20:03:47, 3.69it/s] 28%|██▊ | 105048/371472 [8:21:13<20:30:58, 3.61it/s] 28%|██▊ | 105049/371472 [8:21:13<20:56:53, 3.53it/s] 28%|██▊ | 105050/371472 [8:21:14<20:44:55, 3.57it/s] 28%|██▊ | 105051/371472 [8:21:14<20:35:46, 3.59it/s] 28%|██▊ | 105052/371472 [8:21:14<20:19:06, 3.64it/s] 28%|██▊ | 105053/371472 [8:21:14<20:01:43, 3.69it/s] 28%|██▊ | 105054/371472 [8:21:15<21:23:29, 3.46it/s] 28%|██▊ | 105055/371472 [8:21:15<21:14:22, 3.48it/s] 28%|██▊ | 105056/371472 [8:21:15<20:56:55, 3.53it/s] 28%|██▊ | 105057/371472 [8:21:16<20:20:52, 3.64it/s] 28%|██▊ | 105058/371472 [8:21:16<19:42:30, 3.75it/s] 28%|██▊ | 105059/371472 [8:21:16<19:23:41, 3.82it/s] 28%|██▊ | 105060/371472 [8:21:16<22:05:52, 3.35it/s] {'loss': 3.3566, 'learning_rate': 7.458090025641578e-07, 'epoch': 4.53} + 28%|██▊ | 105060/371472 [8:21:16<22:05:52, 3.35it/s] 28%|██▊ | 105061/371472 [8:21:17<20:58:55, 3.53it/s] 28%|██▊ | 105062/371472 [8:21:17<20:17:53, 3.65it/s] 28%|██▊ | 105063/371472 [8:21:17<20:01:12, 3.70it/s] 28%|██▊ | 105064/371472 [8:21:17<20:17:07, 3.65it/s] 28%|██▊ | 105065/371472 [8:21:18<20:02:52, 3.69it/s] 28%|██▊ | 105066/371472 [8:21:18<19:58:35, 3.70it/s] 28%|██▊ | 105067/371472 [8:21:18<20:23:09, 3.63it/s] 28%|██▊ | 105068/371472 [8:21:19<20:56:45, 3.53it/s] 28%|██▊ | 105069/371472 [8:21:19<20:19:22, 3.64it/s] 28%|██▊ | 105070/371472 [8:21:19<20:07:41, 3.68it/s] 28%|██▊ | 105071/371472 [8:21:19<20:08:32, 3.67it/s] 28%|██▊ | 105072/371472 [8:21:20<21:56:48, 3.37it/s] 28%|██▊ | 105073/371472 [8:21:20<20:57:31, 3.53it/s] 28%|██▊ | 105074/371472 [8:21:20<20:37:53, 3.59it/s] 28%|██▊ | 105075/371472 [8:21:21<20:41:34, 3.58it/s] 28%|██▊ | 105076/371472 [8:21:21<20:14:25, 3.66it/s] 28%|██▊ | 105077/371472 [8:21:21<19:54:07, 3.72it/s] 28%|██▊ | 105078/371472 [8:21:21<20:33:17, 3.60it/s] 28%|██▊ | 105079/371472 [8:21:22<20:05:33, 3.68it/s] 28%|██▊ | 105080/371472 [8:21:22<20:01:29, 3.70it/s] {'loss': 3.4119, 'learning_rate': 7.457605205886789e-07, 'epoch': 4.53} + 28%|██▊ | 105080/371472 [8:21:22<20:01:29, 3.70it/s] 28%|██▊ | 105081/371472 [8:21:22<20:16:41, 3.65it/s] 28%|██▊ | 105082/371472 [8:21:22<19:49:17, 3.73it/s] 28%|██▊ | 105083/371472 [8:21:23<22:16:49, 3.32it/s] 28%|██▊ | 105084/371472 [8:21:23<21:41:13, 3.41it/s] 28%|██▊ | 105085/371472 [8:21:23<20:49:06, 3.55it/s] 28%|██▊ | 105086/371472 [8:21:24<21:10:36, 3.49it/s] 28%|██▊ | 105087/371472 [8:21:24<20:34:41, 3.60it/s] 28%|██▊ | 105088/371472 [8:21:24<20:34:42, 3.60it/s] 28%|██▊ | 105089/371472 [8:21:24<20:28:18, 3.61it/s] 28%|██▊ | 105090/371472 [8:21:25<21:07:40, 3.50it/s] 28%|██▊ | 105091/371472 [8:21:25<20:42:03, 3.57it/s] 28%|██▊ | 105092/371472 [8:21:25<20:10:13, 3.67it/s] 28%|██▊ | 105093/371472 [8:21:26<20:04:29, 3.69it/s] 28%|██▊ | 105094/371472 [8:21:26<20:17:47, 3.65it/s] 28%|██▊ | 105095/371472 [8:21:26<19:39:25, 3.76it/s] 28%|██▊ | 105096/371472 [8:21:26<20:47:58, 3.56it/s] 28%|██▊ | 105097/371472 [8:21:27<20:45:16, 3.57it/s] 28%|██▊ | 105098/371472 [8:21:27<20:13:11, 3.66it/s] 28%|██▊ | 105099/371472 [8:21:27<20:02:36, 3.69it/s] 28%|██▊ | 105100/371472 [8:21:27<19:32:13, 3.79it/s] {'loss': 3.5635, 'learning_rate': 7.457120386131999e-07, 'epoch': 4.53} + 28%|██▊ | 105100/371472 [8:21:27<19:32:13, 3.79it/s] 28%|██▊ | 105101/371472 [8:21:28<20:00:09, 3.70it/s] 28%|██▊ | 105102/371472 [8:21:28<20:05:16, 3.68it/s] 28%|██▊ | 105103/371472 [8:21:28<21:30:46, 3.44it/s] 28%|██▊ | 105104/371472 [8:21:29<21:26:38, 3.45it/s] 28%|██▊ | 105105/371472 [8:21:29<21:28:45, 3.44it/s] 28%|██▊ | 105106/371472 [8:21:29<20:54:03, 3.54it/s] 28%|██▊ | 105107/371472 [8:21:29<21:14:33, 3.48it/s] 28%|██▊ | 105108/371472 [8:21:30<21:19:03, 3.47it/s] 28%|██▊ | 105109/371472 [8:21:30<21:33:46, 3.43it/s] 28%|██▊ | 105110/371472 [8:21:30<20:54:44, 3.54it/s] 28%|██▊ | 105111/371472 [8:21:31<21:26:42, 3.45it/s] 28%|██▊ | 105112/371472 [8:21:31<21:03:39, 3.51it/s] 28%|██▊ | 105113/371472 [8:21:31<21:22:15, 3.46it/s] 28%|██▊ | 105114/371472 [8:21:31<21:30:40, 3.44it/s] 28%|██▊ | 105115/371472 [8:21:32<21:13:04, 3.49it/s] 28%|██▊ | 105116/371472 [8:21:32<21:24:25, 3.46it/s] 28%|██▊ | 105117/371472 [8:21:32<21:36:39, 3.42it/s] 28%|██▊ | 105118/371472 [8:21:33<22:29:03, 3.29it/s] 28%|██▊ | 105119/371472 [8:21:33<23:13:10, 3.19it/s] 28%|██▊ | 105120/371472 [8:21:33<23:31:22, 3.15it/s] {'loss': 3.4142, 'learning_rate': 7.456635566377211e-07, 'epoch': 4.53} + 28%|██▊ | 105120/371472 [8:21:33<23:31:22, 3.15it/s] 28%|██▊ | 105121/371472 [8:21:34<23:03:21, 3.21it/s] 28%|██▊ | 105122/371472 [8:21:34<22:38:18, 3.27it/s] 28%|██▊ | 105123/371472 [8:21:34<22:09:40, 3.34it/s] 28%|██▊ | 105124/371472 [8:21:35<22:27:27, 3.29it/s] 28%|██▊ | 105125/371472 [8:21:35<23:37:39, 3.13it/s] 28%|██▊ | 105126/371472 [8:21:35<24:26:41, 3.03it/s] 28%|██▊ | 105127/371472 [8:21:36<23:53:17, 3.10it/s] 28%|██▊ | 105128/371472 [8:21:36<23:52:22, 3.10it/s] 28%|██▊ | 105129/371472 [8:21:36<23:57:29, 3.09it/s] 28%|██▊ | 105130/371472 [8:21:36<22:49:29, 3.24it/s] 28%|██▊ | 105131/371472 [8:21:37<22:01:09, 3.36it/s] 28%|██▊ | 105132/371472 [8:21:37<21:23:02, 3.46it/s] 28%|██▊ | 105133/371472 [8:21:37<20:44:10, 3.57it/s] 28%|██▊ | 105134/371472 [8:21:38<20:23:22, 3.63it/s] 28%|██▊ | 105135/371472 [8:21:38<20:28:38, 3.61it/s] 28%|██▊ | 105136/371472 [8:21:38<19:56:27, 3.71it/s] 28%|██▊ | 105137/371472 [8:21:38<20:13:51, 3.66it/s] 28%|██▊ | 105138/371472 [8:21:39<19:58:17, 3.70it/s] 28%|██▊ | 105139/371472 [8:21:39<21:05:00, 3.51it/s] 28%|██▊ | 105140/371472 [8:21:39<20:19:36, 3.64it/s] {'loss': 3.5333, 'learning_rate': 7.456150746622422e-07, 'epoch': 4.53} + 28%|██▊ | 105140/371472 [8:21:39<20:19:36, 3.64it/s] 28%|██▊ | 105141/371472 [8:21:39<19:56:12, 3.71it/s] 28%|██▊ | 105142/371472 [8:21:40<19:32:41, 3.79it/s] 28%|██▊ | 105143/371472 [8:21:40<19:30:04, 3.79it/s] 28%|██▊ | 105144/371472 [8:21:40<20:06:48, 3.68it/s] 28%|██▊ | 105145/371472 [8:21:41<19:53:45, 3.72it/s] 28%|██▊ | 105146/371472 [8:21:41<20:13:23, 3.66it/s] 28%|██▊ | 105147/371472 [8:21:41<20:30:52, 3.61it/s] 28%|██▊ | 105148/371472 [8:21:41<20:53:48, 3.54it/s] 28%|██▊ | 105149/371472 [8:21:42<20:34:38, 3.60it/s] 28%|██▊ | 105150/371472 [8:21:42<20:44:55, 3.57it/s] 28%|██▊ | 105151/371472 [8:21:42<21:34:57, 3.43it/s] 28%|██▊ | 105152/371472 [8:21:43<23:32:20, 3.14it/s] 28%|██▊ | 105153/371472 [8:21:43<22:27:47, 3.29it/s] 28%|██▊ | 105154/371472 [8:21:43<21:04:17, 3.51it/s] 28%|██▊ | 105155/371472 [8:21:43<20:37:15, 3.59it/s] 28%|██▊ | 105156/371472 [8:21:44<20:19:18, 3.64it/s] 28%|██▊ | 105157/371472 [8:21:44<21:16:46, 3.48it/s] 28%|██▊ | 105158/371472 [8:21:44<21:17:20, 3.47it/s] 28%|██▊ | 105159/371472 [8:21:45<20:42:21, 3.57it/s] 28%|██▊ | 105160/371472 [8:21:45<20:16:22, 3.65it/s] {'loss': 3.6108, 'learning_rate': 7.455665926867632e-07, 'epoch': 4.53} + 28%|██▊ | 105160/371472 [8:21:45<20:16:22, 3.65it/s] 28%|██▊ | 105161/371472 [8:21:45<21:20:37, 3.47it/s] 28%|██▊ | 105162/371472 [8:21:45<20:39:08, 3.58it/s] 28%|██▊ | 105163/371472 [8:21:46<19:55:01, 3.71it/s] 28%|██▊ | 105164/371472 [8:21:46<19:32:58, 3.78it/s] 28%|██▊ | 105165/371472 [8:21:46<19:22:16, 3.82it/s] 28%|██▊ | 105166/371472 [8:21:46<20:08:38, 3.67it/s] 28%|██▊ | 105167/371472 [8:21:47<19:58:45, 3.70it/s] 28%|██▊ | 105168/371472 [8:21:47<19:51:07, 3.73it/s] 28%|██▊ | 105169/371472 [8:21:47<19:27:19, 3.80it/s] 28%|██▊ | 105170/371472 [8:21:47<19:53:08, 3.72it/s] 28%|██▊ | 105171/371472 [8:21:48<19:40:12, 3.76it/s] 28%|██▊ | 105172/371472 [8:21:48<19:03:50, 3.88it/s] 28%|██▊ | 105173/371472 [8:21:48<19:44:38, 3.75it/s] 28%|██▊ | 105174/371472 [8:21:49<21:22:51, 3.46it/s] 28%|██▊ | 105175/371472 [8:21:49<20:43:35, 3.57it/s] 28%|██▊ | 105176/371472 [8:21:49<20:17:10, 3.65it/s] 28%|██▊ | 105177/371472 [8:21:49<19:15:38, 3.84it/s] 28%|██▊ | 105178/371472 [8:21:50<19:08:39, 3.86it/s] 28%|██▊ | 105179/371472 [8:21:50<19:39:23, 3.76it/s] 28%|██▊ | 105180/371472 [8:21:50<19:48:08, 3.74it/s] {'loss': 3.5274, 'learning_rate': 7.455181107112844e-07, 'epoch': 4.53} + 28%|██▊ | 105180/371472 [8:21:50<19:48:08, 3.74it/s] 28%|██▊ | 105181/371472 [8:21:50<20:35:45, 3.59it/s] 28%|██▊ | 105182/371472 [8:21:51<21:15:47, 3.48it/s] 28%|██▊ | 105183/371472 [8:21:51<20:21:18, 3.63it/s] 28%|██▊ | 105184/371472 [8:21:51<20:06:57, 3.68it/s] 28%|██▊ | 105185/371472 [8:21:52<20:15:33, 3.65it/s] 28%|██▊ | 105186/371472 [8:21:52<19:49:53, 3.73it/s] 28%|██▊ | 105187/371472 [8:21:52<20:29:00, 3.61it/s] 28%|██▊ | 105188/371472 [8:21:52<20:35:03, 3.59it/s] 28%|██▊ | 105189/371472 [8:21:53<19:57:42, 3.71it/s] 28%|██▊ | 105190/371472 [8:21:53<20:01:30, 3.69it/s] 28%|██▊ | 105191/371472 [8:21:53<20:18:28, 3.64it/s] 28%|██▊ | 105192/371472 [8:21:54<21:32:48, 3.43it/s] 28%|██▊ | 105193/371472 [8:21:54<22:12:58, 3.33it/s] 28%|██▊ | 105194/371472 [8:21:54<21:11:17, 3.49it/s] 28%|██▊ | 105195/371472 [8:21:54<20:32:02, 3.60it/s] 28%|██▊ | 105196/371472 [8:21:55<21:55:13, 3.37it/s] 28%|██▊ | 105197/371472 [8:21:55<22:39:24, 3.26it/s] 28%|██▊ | 105198/371472 [8:21:55<22:13:46, 3.33it/s] 28%|██▊ | 105199/371472 [8:21:56<21:08:55, 3.50it/s] 28%|██▊ | 105200/371472 [8:21:56<20:35:19, 3.59it/s] {'loss': 3.3089, 'learning_rate': 7.454696287358057e-07, 'epoch': 4.53} + 28%|██▊ | 105200/371472 [8:21:56<20:35:19, 3.59it/s] 28%|██▊ | 105201/371472 [8:21:56<19:58:26, 3.70it/s] 28%|██▊ | 105202/371472 [8:21:56<20:08:36, 3.67it/s] 28%|██▊ | 105203/371472 [8:21:57<20:01:03, 3.69it/s] 28%|██▊ | 105204/371472 [8:21:57<19:52:14, 3.72it/s] 28%|██▊ | 105205/371472 [8:21:57<20:31:22, 3.60it/s] 28%|██▊ | 105206/371472 [8:21:57<20:03:21, 3.69it/s] 28%|██▊ | 105207/371472 [8:21:58<19:48:28, 3.73it/s] 28%|██▊ | 105208/371472 [8:21:58<20:37:19, 3.59it/s] 28%|██▊ | 105209/371472 [8:21:58<21:32:33, 3.43it/s] 28%|██▊ | 105210/371472 [8:21:59<22:06:16, 3.35it/s] 28%|██▊ | 105211/371472 [8:21:59<21:50:38, 3.39it/s] 28%|██▊ | 105212/371472 [8:21:59<20:57:26, 3.53it/s] 28%|██▊ | 105213/371472 [8:22:00<21:33:48, 3.43it/s] 28%|██▊ | 105214/371472 [8:22:00<22:20:04, 3.31it/s] 28%|██▊ | 105215/371472 [8:22:00<21:26:55, 3.45it/s] 28%|██▊ | 105216/371472 [8:22:00<20:14:36, 3.65it/s] 28%|██▊ | 105217/371472 [8:22:01<21:21:36, 3.46it/s] 28%|██▊ | 105218/371472 [8:22:01<20:48:22, 3.55it/s] 28%|██▊ | 105219/371472 [8:22:01<21:36:31, 3.42it/s] 28%|██▊ | 105220/371472 [8:22:02<21:30:34, 3.44it/s] {'loss': 3.4432, 'learning_rate': 7.454211467603267e-07, 'epoch': 4.53} + 28%|██▊ | 105220/371472 [8:22:02<21:30:34, 3.44it/s] 28%|██▊ | 105221/371472 [8:22:02<26:23:51, 2.80it/s] 28%|██��� | 105222/371472 [8:22:02<24:20:20, 3.04it/s] 28%|██▊ | 105223/371472 [8:22:03<23:36:40, 3.13it/s] 28%|██▊ | 105224/371472 [8:22:03<22:11:04, 3.33it/s] 28%|██▊ | 105225/371472 [8:22:03<21:15:58, 3.48it/s] 28%|██▊ | 105226/371472 [8:22:03<20:41:34, 3.57it/s] 28%|██▊ | 105227/371472 [8:22:04<20:04:33, 3.68it/s] 28%|██▊ | 105228/371472 [8:22:04<19:57:49, 3.70it/s] 28%|██▊ | 105229/371472 [8:22:04<19:37:31, 3.77it/s] 28%|██▊ | 105230/371472 [8:22:04<21:14:38, 3.48it/s] 28%|██▊ | 105231/371472 [8:22:05<21:11:47, 3.49it/s] 28%|██▊ | 105232/371472 [8:22:05<21:22:27, 3.46it/s] 28%|██▊ | 105233/371472 [8:22:05<22:24:52, 3.30it/s] 28%|██▊ | 105234/371472 [8:22:06<22:16:44, 3.32it/s] 28%|██▊ | 105235/371472 [8:22:06<21:18:05, 3.47it/s] 28%|██▊ | 105236/371472 [8:22:06<21:12:37, 3.49it/s] 28%|██▊ | 105237/371472 [8:22:07<21:26:17, 3.45it/s] 28%|██▊ | 105238/371472 [8:22:07<21:31:28, 3.44it/s] 28%|██▊ | 105239/371472 [8:22:07<20:18:54, 3.64it/s] 28%|██▊ | 105240/371472 [8:22:07<20:00:45, 3.70it/s] {'loss': 3.4167, 'learning_rate': 7.453726647848476e-07, 'epoch': 4.53} + 28%|██▊ | 105240/371472 [8:22:07<20:00:45, 3.70it/s] 28%|██▊ | 105241/371472 [8:22:08<19:27:55, 3.80it/s] 28%|██▊ | 105242/371472 [8:22:08<20:25:58, 3.62it/s] 28%|██▊ | 105243/371472 [8:22:08<20:05:20, 3.68it/s] 28%|██▊ | 105244/371472 [8:22:08<19:47:52, 3.74it/s] 28%|██▊ | 105245/371472 [8:22:09<19:37:37, 3.77it/s] 28%|██▊ | 105246/371472 [8:22:09<19:46:38, 3.74it/s] 28%|██▊ | 105247/371472 [8:22:09<20:50:44, 3.55it/s] 28%|██▊ | 105248/371472 [8:22:09<19:56:56, 3.71it/s] 28%|██▊ | 105249/371472 [8:22:10<21:18:25, 3.47it/s] 28%|██▊ | 105250/371472 [8:22:10<21:09:59, 3.49it/s] 28%|██▊ | 105251/371472 [8:22:10<21:37:31, 3.42it/s] 28%|██▊ | 105252/371472 [8:22:11<20:56:39, 3.53it/s] 28%|██▊ | 105253/371472 [8:22:11<19:52:41, 3.72it/s] 28%|██▊ | 105254/371472 [8:22:11<19:17:26, 3.83it/s] 28%|██▊ | 105255/371472 [8:22:11<19:53:34, 3.72it/s] 28%|██▊ | 105256/371472 [8:22:12<19:39:13, 3.76it/s] 28%|██▊ | 105257/371472 [8:22:12<20:11:53, 3.66it/s] 28%|██▊ | 105258/371472 [8:22:12<20:59:54, 3.52it/s] 28%|██▊ | 105259/371472 [8:22:13<21:04:13, 3.51it/s] 28%|██▊ | 105260/371472 [8:22:13<21:00:13, 3.52it/s] {'loss': 3.4467, 'learning_rate': 7.453241828093688e-07, 'epoch': 4.53} + 28%|██▊ | 105260/371472 [8:22:13<21:00:13, 3.52it/s] 28%|██▊ | 105261/371472 [8:22:13<22:09:04, 3.34it/s] 28%|██▊ | 105262/371472 [8:22:13<21:39:02, 3.42it/s] 28%|██▊ | 105263/371472 [8:22:14<22:07:18, 3.34it/s] 28%|██▊ | 105264/371472 [8:22:14<21:07:22, 3.50it/s] 28%|██▊ | 105265/371472 [8:22:14<20:19:19, 3.64it/s] 28%|██▊ | 105266/371472 [8:22:15<21:53:06, 3.38it/s] 28%|██▊ | 105267/371472 [8:22:15<21:33:16, 3.43it/s] 28%|██▊ | 105268/371472 [8:22:15<25:13:39, 2.93it/s] 28%|██▊ | 105269/371472 [8:22:16<23:10:16, 3.19it/s] 28%|██▊ | 105270/371472 [8:22:16<22:17:23, 3.32it/s] 28%|██▊ | 105271/371472 [8:22:16<21:26:38, 3.45it/s] 28%|██▊ | 105272/371472 [8:22:16<20:59:13, 3.52it/s] 28%|██▊ | 105273/371472 [8:22:17<20:32:31, 3.60it/s] 28%|██▊ | 105274/371472 [8:22:17<21:17:52, 3.47it/s] 28%|██▊ | 105275/371472 [8:22:17<21:34:50, 3.43it/s] 28%|██▊ | 105276/371472 [8:22:18<21:22:27, 3.46it/s] 28%|██▊ | 105277/371472 [8:22:18<20:27:32, 3.61it/s] 28%|██▊ | 105278/371472 [8:22:18<19:53:33, 3.72it/s] 28%|██▊ | 105279/371472 [8:22:18<19:52:33, 3.72it/s] 28%|██▊ | 105280/371472 [8:22:19<20:04:20, 3.68it/s] {'loss': 3.5601, 'learning_rate': 7.4527570083389e-07, 'epoch': 4.53} + 28%|██▊ | 105280/371472 [8:22:19<20:04:20, 3.68it/s] 28%|██▊ | 105281/371472 [8:22:19<19:57:54, 3.70it/s] 28%|██▊ | 105282/371472 [8:22:19<19:48:23, 3.73it/s] 28%|██▊ | 105283/371472 [8:22:19<19:18:02, 3.83it/s] 28%|██▊ | 105284/371472 [8:22:20<19:23:13, 3.81it/s] 28%|██▊ | 105285/371472 [8:22:20<19:35:49, 3.77it/s] 28%|██▊ | 105286/371472 [8:22:20<19:38:28, 3.76it/s] 28%|██▊ | 105287/371472 [8:22:20<19:08:21, 3.86it/s] 28%|██▊ | 105288/371472 [8:22:21<19:41:42, 3.75it/s] 28%|██▊ | 105289/371472 [8:22:21<19:53:55, 3.72it/s] 28%|██▊ | 105290/371472 [8:22:21<19:41:56, 3.75it/s] 28%|██▊ | 105291/371472 [8:22:22<19:37:48, 3.77it/s] 28%|██▊ | 105292/371472 [8:22:22<19:43:20, 3.75it/s] 28%|██▊ | 105293/371472 [8:22:22<20:28:13, 3.61it/s] 28%|██▊ | 105294/371472 [8:22:22<19:59:20, 3.70it/s] 28%|██▊ | 105295/371472 [8:22:23<19:51:58, 3.72it/s] 28%|██▊ | 105296/371472 [8:22:23<19:40:56, 3.76it/s] 28%|██▊ | 105297/371472 [8:22:23<20:22:06, 3.63it/s] 28%|██▊ | 105298/371472 [8:22:23<19:45:59, 3.74it/s] 28%|██▊ | 105299/371472 [8:22:24<20:16:35, 3.65it/s] 28%|██▊ | 105300/371472 [8:22:24<20:38:12, 3.58it/s] {'loss': 3.5308, 'learning_rate': 7.452272188584111e-07, 'epoch': 4.54} + 28%|██▊ | 105300/371472 [8:22:24<20:38:12, 3.58it/s] 28%|██▊ | 105301/371472 [8:22:24<19:59:15, 3.70it/s] 28%|██▊ | 105302/371472 [8:22:25<20:20:07, 3.64it/s] 28%|██▊ | 105303/371472 [8:22:25<21:10:48, 3.49it/s] 28%|██▊ | 105304/371472 [8:22:25<21:28:08, 3.44it/s] 28%|██▊ | 105305/371472 [8:22:25<21:26:24, 3.45it/s] 28%|██▊ | 105306/371472 [8:22:26<20:18:06, 3.64it/s] 28%|██▊ | 105307/371472 [8:22:26<20:47:18, 3.56it/s] 28%|██▊ | 105308/371472 [8:22:26<20:53:18, 3.54it/s] 28%|██▊ | 105309/371472 [8:22:27<22:36:53, 3.27it/s] 28%|██▊ | 105310/371472 [8:22:27<21:35:53, 3.42it/s] 28%|██▊ | 105311/371472 [8:22:27<21:24:56, 3.45it/s] 28%|██▊ | 105312/371472 [8:22:27<20:47:43, 3.56it/s] 28%|██▊ | 105313/371472 [8:22:28<20:08:28, 3.67it/s] 28%|██▊ | 105314/371472 [8:22:28<21:08:45, 3.50it/s] 28%|██▊ | 105315/371472 [8:22:28<21:03:40, 3.51it/s] 28%|██▊ | 105316/371472 [8:22:29<20:32:12, 3.60it/s] 28%|██▊ | 105317/371472 [8:22:29<21:12:26, 3.49it/s] 28%|██▊ | 105318/371472 [8:22:29<21:24:46, 3.45it/s] 28%|██▊ | 105319/371472 [8:22:29<20:28:51, 3.61it/s] 28%|██▊ | 105320/371472 [8:22:30<20:11:39, 3.66it/s] {'loss': 3.3293, 'learning_rate': 7.451787368829321e-07, 'epoch': 4.54} + 28%|██▊ | 105320/371472 [8:22:30<20:11:39, 3.66it/s] 28%|██▊ | 105321/371472 [8:22:30<19:38:49, 3.76it/s] 28%|██▊ | 105322/371472 [8:22:30<19:29:38, 3.79it/s] 28%|██▊ | 105323/371472 [8:22:31<21:00:31, 3.52it/s] 28%|██▊ | 105324/371472 [8:22:31<20:53:10, 3.54it/s] 28%|██▊ | 105325/371472 [8:22:31<20:27:37, 3.61it/s] 28%|██▊ | 105326/371472 [8:22:31<20:24:53, 3.62it/s] 28%|██▊ | 105327/371472 [8:22:32<20:26:01, 3.62it/s] 28%|██▊ | 105328/371472 [8:22:32<20:06:58, 3.68it/s] 28%|██▊ | 105329/371472 [8:22:32<21:10:42, 3.49it/s] 28%|██▊ | 105330/371472 [8:22:32<20:02:56, 3.69it/s] 28%|██▊ | 105331/371472 [8:22:33<20:08:56, 3.67it/s] 28%|██▊ | 105332/371472 [8:22:33<21:56:05, 3.37it/s] 28%|██▊ | 105333/371472 [8:22:33<21:59:13, 3.36it/s] 28%|██▊ | 105334/371472 [8:22:34<22:03:38, 3.35it/s] 28%|██▊ | 105335/371472 [8:22:34<21:29:41, 3.44it/s] 28%|██▊ | 105336/371472 [8:22:34<21:31:22, 3.43it/s] 28%|██▊ | 105337/371472 [8:22:35<21:25:36, 3.45it/s] 28%|██▊ | 105338/371472 [8:22:35<22:13:55, 3.33it/s] 28%|██▊ | 105339/371472 [8:22:35<21:18:19, 3.47it/s] 28%|██▊ | 105340/371472 [8:22:35<20:32:19, 3.60it/s] {'loss': 3.4035, 'learning_rate': 7.451302549074533e-07, 'epoch': 4.54} + 28%|██▊ | 105340/371472 [8:22:35<20:32:19, 3.60it/s] 28%|██▊ | 105341/371472 [8:22:36<20:53:04, 3.54it/s] 28%|██▊ | 105342/371472 [8:22:36<23:55:22, 3.09it/s] 28%|██▊ | 105343/371472 [8:22:36<24:24:58, 3.03it/s] 28%|██▊ | 105344/371472 [8:22:37<23:31:02, 3.14it/s] 28%|██▊ | 105345/371472 [8:22:37<24:30:02, 3.02it/s] 28%|██▊ | 105346/371472 [8:22:37<22:58:34, 3.22it/s] 28%|██▊ | 105347/371472 [8:22:38<22:31:43, 3.28it/s] 28%|██▊ | 105348/371472 [8:22:38<21:46:27, 3.39it/s] 28%|██▊ | 105349/371472 [8:22:38<23:44:38, 3.11it/s] 28%|██▊ | 105350/371472 [8:22:39<22:24:04, 3.30it/s] 28%|██▊ | 105351/371472 [8:22:39<21:19:47, 3.47it/s] 28%|██▊ | 105352/371472 [8:22:39<20:41:53, 3.57it/s] 28%|██▊ | 105353/371472 [8:22:39<22:07:28, 3.34it/s] 28%|██▊ | 105354/371472 [8:22:40<21:22:06, 3.46it/s] 28%|██▊ | 105355/371472 [8:22:40<20:44:47, 3.56it/s] 28%|██▊ | 105356/371472 [8:22:40<20:08:42, 3.67it/s] 28%|██▊ | 105357/371472 [8:22:40<19:48:44, 3.73it/s] 28%|██▊ | 105358/371472 [8:22:41<19:58:57, 3.70it/s] 28%|██▊ | 105359/371472 [8:22:41<20:30:28, 3.60it/s] 28%|██▊ | 105360/371472 [8:22:41<21:24:29, 3.45it/s] {'loss': 3.4777, 'learning_rate': 7.450817729319744e-07, 'epoch': 4.54} + 28%|██▊ | 105360/371472 [8:22:41<21:24:29, 3.45it/s] 28%|██▊ | 105361/371472 [8:22:42<22:15:16, 3.32it/s] 28%|██▊ | 105362/371472 [8:22:42<21:22:35, 3.46it/s] 28%|██▊ | 105363/371472 [8:22:42<20:47:05, 3.56it/s] 28%|██▊ | 105364/371472 [8:22:42<21:27:58, 3.44it/s] 28%|██▊ | 105365/371472 [8:22:43<21:21:44, 3.46it/s] 28%|██▊ | 105366/371472 [8:22:43<20:46:30, 3.56it/s] 28%|██▊ | 105367/371472 [8:22:43<20:25:55, 3.62it/s] 28%|██▊ | 105368/371472 [8:22:44<19:53:16, 3.72it/s] 28%|██▊ | 105369/371472 [8:22:44<20:30:01, 3.61it/s] 28%|██▊ | 105370/371472 [8:22:44<21:18:30, 3.47it/s] 28%|██▊ | 105371/371472 [8:22:44<21:34:38, 3.43it/s] 28%|██▊ | 105372/371472 [8:22:45<22:54:21, 3.23it/s] 28%|██▊ | 105373/371472 [8:22:45<21:48:01, 3.39it/s] 28%|██▊ | 105374/371472 [8:22:45<21:34:01, 3.43it/s] 28%|██▊ | 105375/371472 [8:22:46<20:58:10, 3.52it/s] 28%|██▊ | 105376/371472 [8:22:46<21:28:01, 3.44it/s] 28%|██▊ | 105377/371472 [8:22:46<21:45:06, 3.40it/s] 28%|██▊ | 105378/371472 [8:22:46<21:08:34, 3.50it/s] 28%|██▊ | 105379/371472 [8:22:47<22:36:18, 3.27it/s] 28%|██▊ | 105380/371472 [8:22:47<21:29:30, 3.44it/s] {'loss': 3.5998, 'learning_rate': 7.450332909564955e-07, 'epoch': 4.54} + 28%|██▊ | 105380/371472 [8:22:47<21:29:30, 3.44it/s] 28%|██▊ | 105381/371472 [8:22:47<20:49:25, 3.55it/s] 28%|██▊ | 105382/371472 [8:22:48<19:55:44, 3.71it/s] 28%|██▊ | 105383/371472 [8:22:48<19:49:09, 3.73it/s] 28%|██▊ | 105384/371472 [8:22:48<20:00:35, 3.69it/s] 28%|██▊ | 105385/371472 [8:22:48<21:45:44, 3.40it/s] 28%|██▊ | 105386/371472 [8:22:49<21:15:20, 3.48it/s] 28%|██▊ | 105387/371472 [8:22:49<21:16:12, 3.47it/s] 28%|██▊ | 105388/371472 [8:22:49<20:38:49, 3.58it/s] 28%|██▊ | 105389/371472 [8:22:50<21:08:51, 3.50it/s] 28%|██▊ | 105390/371472 [8:22:50<20:38:04, 3.58it/s] 28%|██▊ | 105391/371472 [8:22:50<20:37:26, 3.58it/s] 28%|██▊ | 105392/371472 [8:22:50<19:47:59, 3.73it/s] 28%|██▊ | 105393/371472 [8:22:51<20:02:06, 3.69it/s] 28%|██▊ | 105394/371472 [8:22:51<20:22:48, 3.63it/s] 28%|██▊ | 105395/371472 [8:22:51<20:35:21, 3.59it/s] 28%|██▊ | 105396/371472 [8:22:52<20:36:39, 3.59it/s] 28%|██▊ | 105397/371472 [8:22:52<22:08:03, 3.34it/s] 28%|██▊ | 105398/371472 [8:22:52<21:17:16, 3.47it/s] 28%|██▊ | 105399/371472 [8:22:52<22:24:56, 3.30it/s] 28%|██▊ | 105400/371472 [8:22:53<21:58:08, 3.36it/s] {'loss': 3.367, 'learning_rate': 7.449848089810165e-07, 'epoch': 4.54} + 28%|██▊ | 105400/371472 [8:22:53<21:58:08, 3.36it/s] 28%|██▊ | 105401/371472 [8:22:53<22:48:30, 3.24it/s] 28%|██▊ | 105402/371472 [8:22:53<23:16:38, 3.18it/s] 28%|██▊ | 105403/371472 [8:22:54<21:55:42, 3.37it/s] 28%|██▊ | 105404/371472 [8:22:54<21:08:40, 3.50it/s] 28%|██▊ | 105405/371472 [8:22:54<21:06:16, 3.50it/s] 28%|██▊ | 105406/371472 [8:22:55<22:51:12, 3.23it/s] 28%|██▊ | 105407/371472 [8:22:55<22:04:59, 3.35it/s] 28%|██▊ | 105408/371472 [8:22:55<21:52:14, 3.38it/s] 28%|██▊ | 105409/371472 [8:22:55<21:18:22, 3.47it/s] 28%|██▊ | 105410/371472 [8:22:56<20:40:35, 3.57it/s] 28%|██▊ | 105411/371472 [8:22:56<21:19:47, 3.46it/s] 28%|██▊ | 105412/371472 [8:22:56<20:27:54, 3.61it/s] 28%|██▊ | 105413/371472 [8:22:57<20:14:40, 3.65it/s] 28%|██▊ | 105414/371472 [8:22:57<21:42:56, 3.40it/s] 28%|██▊ | 105415/371472 [8:22:57<22:23:47, 3.30it/s] 28%|██▊ | 105416/371472 [8:22:57<22:19:37, 3.31it/s] 28%|██▊ | 105417/371472 [8:22:58<21:01:03, 3.52it/s] 28%|██▊ | 105418/371472 [8:22:58<20:35:42, 3.59it/s] 28%|██▊ | 105419/371472 [8:22:58<20:34:13, 3.59it/s] 28%|██▊ | 105420/371472 [8:22:59<20:14:42, 3.65it/s] {'loss': 3.638, 'learning_rate': 7.449363270055377e-07, 'epoch': 4.54} + 28%|██▊ | 105420/371472 [8:22:59<20:14:42, 3.65it/s] 28%|██▊ | 105421/371472 [8:22:59<19:59:36, 3.70it/s] 28%|██▊ | 105422/371472 [8:22:59<20:11:03, 3.66it/s] 28%|██▊ | 105423/371472 [8:22:59<19:59:26, 3.70it/s] 28%|██▊ | 105424/371472 [8:23:00<19:51:59, 3.72it/s] 28%|██▊ | 105425/371472 [8:23:00<20:12:36, 3.66it/s] 28%|██▊ | 105426/371472 [8:23:00<19:24:02, 3.81it/s] 28%|██▊ | 105427/371472 [8:23:00<19:52:16, 3.72it/s] 28%|██▊ | 105428/371472 [8:23:01<20:09:16, 3.67it/s] 28%|██▊ | 105429/371472 [8:23:01<20:59:40, 3.52it/s] 28%|██▊ | 105430/371472 [8:23:01<20:39:43, 3.58it/s] 28%|██▊ | 105431/371472 [8:23:02<19:51:06, 3.72it/s] 28%|██▊ | 105432/371472 [8:23:02<19:11:49, 3.85it/s] 28%|██▊ | 105433/371472 [8:23:02<19:29:20, 3.79it/s] 28%|██▊ | 105434/371472 [8:23:02<20:02:43, 3.69it/s] 28%|██▊ | 105435/371472 [8:23:03<19:59:36, 3.70it/s] 28%|██▊ | 105436/371472 [8:23:03<22:45:17, 3.25it/s] 28%|██▊ | 105437/371472 [8:23:03<22:56:13, 3.22it/s] 28%|██▊ | 105438/371472 [8:23:04<22:45:13, 3.25it/s] 28%|██▊ | 105439/371472 [8:23:04<24:24:13, 3.03it/s] 28%|██▊ | 105440/371472 [8:23:04<22:34:20, 3.27it/s] {'loss': 3.4904, 'learning_rate': 7.448878450300589e-07, 'epoch': 4.54} + 28%|██▊ | 105440/371472 [8:23:04<22:34:20, 3.27it/s] 28%|██▊ | 105441/371472 [8:23:05<22:43:10, 3.25it/s] 28%|██▊ | 105442/371472 [8:23:05<22:52:53, 3.23it/s] 28%|██▊ | 105443/371472 [8:23:05<21:41:50, 3.41it/s] 28%|██▊ | 105444/371472 [8:23:05<22:09:40, 3.33it/s] 28%|██▊ | 105445/371472 [8:23:06<23:43:47, 3.11it/s] 28%|██▊ | 105446/371472 [8:23:06<23:03:53, 3.20it/s] 28%|██▊ | 105447/371472 [8:23:06<22:57:37, 3.22it/s] 28%|██▊ | 105448/371472 [8:23:07<23:36:50, 3.13it/s] 28%|██▊ | 105449/371472 [8:23:07<22:13:40, 3.32it/s] 28%|██▊ | 105450/371472 [8:23:07<20:58:15, 3.52it/s] 28%|██▊ | 105451/371472 [8:23:08<22:24:15, 3.30it/s] 28%|██▊ | 105452/371472 [8:23:08<22:38:35, 3.26it/s] 28%|██▊ | 105453/371472 [8:23:08<22:14:57, 3.32it/s] 28%|██▊ | 105454/371472 [8:23:08<22:28:31, 3.29it/s] 28%|██▊ | 105455/371472 [8:23:09<21:52:22, 3.38it/s] 28%|██▊ | 105456/371472 [8:23:09<21:36:10, 3.42it/s] 28%|██▊ | 105457/371472 [8:23:09<20:48:51, 3.55it/s] 28%|██▊ | 105458/371472 [8:23:10<20:41:23, 3.57it/s] 28%|██▊ | 105459/371472 [8:23:10<21:48:47, 3.39it/s] 28%|██▊ | 105460/371472 [8:23:10<21:56:48, 3.37it/s] {'loss': 3.4241, 'learning_rate': 7.4483936305458e-07, 'epoch': 4.54} + 28%|██▊ | 105460/371472 [8:23:10<21:56:48, 3.37it/s] 28%|██▊ | 105461/371472 [8:23:10<21:11:10, 3.49it/s] 28%|██▊ | 105462/371472 [8:23:11<21:33:26, 3.43it/s] 28%|██▊ | 105463/371472 [8:23:11<22:00:22, 3.36it/s] 28%|██▊ | 105464/371472 [8:23:11<22:27:16, 3.29it/s] 28%|██▊ | 105465/371472 [8:23:12<21:29:40, 3.44it/s] 28%|██▊ | 105466/371472 [8:23:12<20:36:23, 3.59it/s] 28%|██▊ | 105467/371472 [8:23:12<20:10:33, 3.66it/s] 28%|██▊ | 105468/371472 [8:23:12<19:31:40, 3.78it/s] 28%|██▊ | 105469/371472 [8:23:13<19:26:49, 3.80it/s] 28%|██▊ | 105470/371472 [8:23:13<20:30:41, 3.60it/s] 28%|██▊ | 105471/371472 [8:23:13<19:56:20, 3.71it/s] 28%|██▊ | 105472/371472 [8:23:13<19:22:27, 3.81it/s] 28%|██▊ | 105473/371472 [8:23:14<19:16:11, 3.83it/s] 28%|██▊ | 105474/371472 [8:23:14<21:16:04, 3.47it/s] 28%|██▊ | 105475/371472 [8:23:14<22:15:36, 3.32it/s] 28%|██▊ | 105476/371472 [8:23:15<21:42:01, 3.40it/s] 28%|██▊ | 105477/371472 [8:23:15<20:48:53, 3.55it/s] 28%|██▊ | 105478/371472 [8:23:15<20:20:20, 3.63it/s] 28%|██▊ | 105479/371472 [8:23:15<20:16:18, 3.64it/s] 28%|██▊ | 105480/371472 [8:23:16<20:12:34, 3.66it/s] {'loss': 3.319, 'learning_rate': 7.44790881079101e-07, 'epoch': 4.54} + 28%|██▊ | 105480/371472 [8:23:16<20:12:34, 3.66it/s] 28%|██▊ | 105481/371472 [8:23:16<20:55:45, 3.53it/s] 28%|██▊ | 105482/371472 [8:23:16<20:46:24, 3.56it/s] 28%|██▊ | 105483/371472 [8:23:17<21:13:33, 3.48it/s] 28%|██▊ | 105484/371472 [8:23:17<21:50:37, 3.38it/s] 28%|██▊ | 105485/371472 [8:23:17<20:52:11, 3.54it/s] 28%|██▊ | 105486/371472 [8:23:18<21:31:12, 3.43it/s] 28%|██▊ | 105487/371472 [8:23:18<21:23:22, 3.45it/s] 28%|██▊ | 105488/371472 [8:23:18<20:19:53, 3.63it/s] 28%|██▊ | 105489/371472 [8:23:18<21:30:11, 3.44it/s] 28%|██▊ | 105490/371472 [8:23:19<21:34:51, 3.42it/s] 28%|██▊ | 105491/371472 [8:23:19<20:32:35, 3.60it/s] 28%|██▊ | 105492/371472 [8:23:19<19:54:47, 3.71it/s] 28%|██▊ | 105493/371472 [8:23:19<20:51:25, 3.54it/s] 28%|██▊ | 105494/371472 [8:23:20<21:40:15, 3.41it/s] 28%|██▊ | 105495/371472 [8:23:20<21:34:57, 3.42it/s] 28%|██▊ | 105496/371472 [8:23:20<21:52:46, 3.38it/s] 28%|██▊ | 105497/371472 [8:23:21<21:02:57, 3.51it/s] 28%|██▊ | 105498/371472 [8:23:21<20:46:28, 3.56it/s] 28%|██▊ | 105499/371472 [8:23:21<20:28:39, 3.61it/s] 28%|██▊ | 105500/371472 [8:23:22<21:15:58, 3.47it/s] {'loss': 3.4746, 'learning_rate': 7.447423991036221e-07, 'epoch': 4.54} + 28%|██▊ | 105500/371472 [8:23:22<21:15:58, 3.47it/s] 28%|██▊ | 105501/371472 [8:23:22<23:17:39, 3.17it/s] 28%|██▊ | 105502/371472 [8:23:22<22:32:59, 3.28it/s] 28%|██▊ | 105503/371472 [8:23:23<23:46:11, 3.11it/s] 28%|██▊ | 105504/371472 [8:23:23<22:35:29, 3.27it/s] 28%|██▊ | 105505/371472 [8:23:23<21:52:58, 3.38it/s] 28%|██▊ | 105506/371472 [8:23:23<20:52:25, 3.54it/s] 28%|██▊ | 105507/371472 [8:23:24<20:08:53, 3.67it/s] 28%|██▊ | 105508/371472 [8:23:24<20:13:45, 3.65it/s] 28%|██▊ | 105509/371472 [8:23:24<20:17:34, 3.64it/s] 28%|██▊ | 105510/371472 [8:23:24<21:03:55, 3.51it/s] 28%|██▊ | 105511/371472 [8:23:25<20:54:18, 3.53it/s] 28%|██▊ | 105512/371472 [8:23:25<21:00:38, 3.52it/s] 28%|██▊ | 105513/371472 [8:23:25<21:15:59, 3.47it/s] 28%|██▊ | 105514/371472 [8:23:26<20:16:49, 3.64it/s] 28%|██▊ | 105515/371472 [8:23:26<22:21:43, 3.30it/s] 28%|██▊ | 105516/371472 [8:23:26<21:31:52, 3.43it/s] 28%|██▊ | 105517/371472 [8:23:26<21:40:16, 3.41it/s] 28%|██▊ | 105518/371472 [8:23:27<20:49:48, 3.55it/s] 28%|██▊ | 105519/371472 [8:23:27<20:33:31, 3.59it/s] 28%|██▊ | 105520/371472 [8:23:27<20:56:35, 3.53it/s] {'loss': 3.5823, 'learning_rate': 7.446939171281433e-07, 'epoch': 4.54} + 28%|██▊ | 105520/371472 [8:23:27<20:56:35, 3.53it/s] 28%|██▊ | 105521/371472 [8:23:28<21:06:29, 3.50it/s] 28%|██▊ | 105522/371472 [8:23:28<20:15:32, 3.65it/s] 28%|██▊ | 105523/371472 [8:23:28<19:56:10, 3.71it/s] 28%|██▊ | 105524/371472 [8:23:28<21:46:53, 3.39it/s] 28%|██▊ | 105525/371472 [8:23:29<22:40:34, 3.26it/s] 28%|██▊ | 105526/371472 [8:23:29<21:15:26, 3.48it/s] 28%|██▊ | 105527/371472 [8:23:29<20:52:56, 3.54it/s] 28%|██▊ | 105528/371472 [8:23:30<20:45:38, 3.56it/s] 28%|██▊ | 105529/371472 [8:23:30<20:54:15, 3.53it/s] 28%|██▊ | 105530/371472 [8:23:30<21:05:37, 3.50it/s] 28%|██▊ | 105531/371472 [8:23:30<20:10:05, 3.66it/s] 28%|██▊ | 105532/371472 [8:23:31<22:36:52, 3.27it/s] 28%|██▊ | 105533/371472 [8:23:31<21:54:09, 3.37it/s] 28%|██▊ | 105534/371472 [8:23:31<21:21:45, 3.46it/s] 28%|██▊ | 105535/371472 [8:23:32<20:55:46, 3.53it/s] 28%|██▊ | 105536/371472 [8:23:32<21:15:55, 3.47it/s] 28%|██▊ | 105537/371472 [8:23:32<20:33:01, 3.59it/s] 28%|██▊ | 105538/371472 [8:23:32<20:26:03, 3.62it/s] 28%|██▊ | 105539/371472 [8:23:33<20:28:58, 3.61it/s] 28%|██▊ | 105540/371472 [8:23:33<20:31:14, 3.60it/s] {'loss': 3.3827, 'learning_rate': 7.446454351526642e-07, 'epoch': 4.55} + 28%|██▊ | 105540/371472 [8:23:33<20:31:14, 3.60it/s] 28%|██▊ | 105541/371472 [8:23:33<20:13:28, 3.65it/s] 28%|██▊ | 105542/371472 [8:23:33<19:29:52, 3.79it/s] 28%|██▊ | 105543/371472 [8:23:34<21:21:56, 3.46it/s] 28%|██▊ | 105544/371472 [8:23:34<21:58:03, 3.36it/s] 28%|██▊ | 105545/371472 [8:23:34<22:22:02, 3.30it/s] 28%|██▊ | 105546/371472 [8:23:35<22:59:03, 3.21it/s] 28%|██▊ | 105547/371472 [8:23:35<22:28:18, 3.29it/s] 28%|██▊ | 105548/371472 [8:23:35<22:01:10, 3.35it/s] 28%|██▊ | 105549/371472 [8:23:36<21:58:36, 3.36it/s] 28%|██▊ | 105550/371472 [8:23:36<22:28:24, 3.29it/s] 28%|██▊ | 105551/371472 [8:23:36<21:22:40, 3.46it/s] 28%|██▊ | 105552/371472 [8:23:37<21:23:33, 3.45it/s] 28%|██▊ | 105553/371472 [8:23:37<21:04:23, 3.51it/s] 28%|██▊ | 105554/371472 [8:23:37<21:57:46, 3.36it/s] 28%|██▊ | 105555/371472 [8:23:37<21:35:34, 3.42it/s] 28%|█��▊ | 105556/371472 [8:23:38<21:21:14, 3.46it/s] 28%|██▊ | 105557/371472 [8:23:38<20:25:06, 3.62it/s] 28%|██▊ | 105558/371472 [8:23:38<20:13:54, 3.65it/s] 28%|██▊ | 105559/371472 [8:23:39<20:46:50, 3.55it/s] 28%|██▊ | 105560/371472 [8:23:39<20:18:07, 3.64it/s] {'loss': 3.3021, 'learning_rate': 7.445969531771854e-07, 'epoch': 4.55} + 28%|██▊ | 105560/371472 [8:23:39<20:18:07, 3.64it/s] 28%|██▊ | 105561/371472 [8:23:39<20:26:13, 3.61it/s] 28%|██▊ | 105562/371472 [8:23:39<19:49:36, 3.73it/s] 28%|██▊ | 105563/371472 [8:23:40<20:28:34, 3.61it/s] 28%|██▊ | 105564/371472 [8:23:40<20:53:51, 3.53it/s] 28%|██▊ | 105565/371472 [8:23:40<20:44:16, 3.56it/s] 28%|██▊ | 105566/371472 [8:23:40<21:14:21, 3.48it/s] 28%|██▊ | 105567/371472 [8:23:41<23:16:39, 3.17it/s] 28%|██▊ | 105568/371472 [8:23:41<21:48:12, 3.39it/s] 28%|██▊ | 105569/371472 [8:23:41<21:00:18, 3.52it/s] 28%|██▊ | 105570/371472 [8:23:42<20:34:03, 3.59it/s] 28%|██▊ | 105571/371472 [8:23:42<20:54:04, 3.53it/s] 28%|██▊ | 105572/371472 [8:23:42<20:18:37, 3.64it/s] 28%|██▊ | 105573/371472 [8:23:42<19:40:49, 3.75it/s] 28%|██▊ | 105574/371472 [8:23:43<19:27:11, 3.80it/s] 28%|██▊ | 105575/371472 [8:23:43<19:41:02, 3.75it/s] 28%|██▊ | 105576/371472 [8:23:43<21:53:35, 3.37it/s] 28%|██▊ | 105577/371472 [8:23:44<22:46:44, 3.24it/s] 28%|██▊ | 105578/371472 [8:23:44<21:34:07, 3.42it/s] 28%|██▊ | 105579/371472 [8:23:44<20:39:09, 3.58it/s] 28%|██▊ | 105580/371472 [8:23:44<19:32:45, 3.78it/s] {'loss': 3.3844, 'learning_rate': 7.445484712017066e-07, 'epoch': 4.55} + 28%|██▊ | 105580/371472 [8:23:44<19:32:45, 3.78it/s] 28%|██▊ | 105581/371472 [8:23:45<20:46:02, 3.56it/s] 28%|██▊ | 105582/371472 [8:23:45<20:11:57, 3.66it/s] 28%|██▊ | 105583/371472 [8:23:45<20:14:20, 3.65it/s] 28%|██▊ | 105584/371472 [8:23:46<20:17:47, 3.64it/s] 28%|██▊ | 105585/371472 [8:23:46<19:28:58, 3.79it/s] 28%|██▊ | 105586/371472 [8:23:46<20:46:29, 3.56it/s] 28%|██▊ | 105587/371472 [8:23:46<20:06:50, 3.67it/s] 28%|██▊ | 105588/371472 [8:23:47<19:47:28, 3.73it/s] 28%|██▊ | 105589/371472 [8:23:47<19:49:12, 3.73it/s] 28%|██▊ | 105590/371472 [8:23:47<19:49:30, 3.73it/s] 28%|██▊ | 105591/371472 [8:23:47<19:45:19, 3.74it/s] 28%|██▊ | 105592/371472 [8:23:48<19:17:47, 3.83it/s] 28%|██▊ | 105593/371472 [8:23:48<19:40:11, 3.75it/s] 28%|██▊ | 105594/371472 [8:23:48<20:38:25, 3.58it/s] 28%|██▊ | 105595/371472 [8:23:49<20:49:04, 3.55it/s] 28%|██▊ | 105596/371472 [8:23:49<21:44:03, 3.40it/s] 28%|██▊ | 105597/371472 [8:23:49<21:16:19, 3.47it/s] 28%|██▊ | 105598/371472 [8:23:49<22:09:31, 3.33it/s] 28%|██▊ | 105599/371472 [8:23:50<21:34:51, 3.42it/s] 28%|██▊ | 105600/371472 [8:23:50<20:28:10, 3.61it/s] {'loss': 3.4303, 'learning_rate': 7.444999892262277e-07, 'epoch': 4.55} + 28%|██▊ | 105600/371472 [8:23:50<20:28:10, 3.61it/s] 28%|██▊ | 105601/371472 [8:23:50<20:13:28, 3.65it/s] 28%|██▊ | 105602/371472 [8:23:50<19:26:50, 3.80it/s] 28%|██▊ | 105603/371472 [8:23:51<19:46:39, 3.73it/s] 28%|██▊ | 105604/371472 [8:23:51<19:20:59, 3.82it/s] 28%|██▊ | 105605/371472 [8:23:51<19:52:23, 3.72it/s] 28%|██▊ | 105606/371472 [8:23:52<19:44:55, 3.74it/s] 28%|██▊ | 105607/371472 [8:23:52<19:59:16, 3.69it/s] 28%|██▊ | 105608/371472 [8:23:52<20:41:12, 3.57it/s] 28%|██▊ | 105609/371472 [8:23:52<20:57:11, 3.52it/s] 28%|██▊ | 105610/371472 [8:23:53<20:54:41, 3.53it/s] 28%|██▊ | 105611/371472 [8:23:53<21:14:30, 3.48it/s] 28%|██▊ | 105612/371472 [8:23:53<20:10:27, 3.66it/s] 28%|██▊ | 105613/371472 [8:23:54<20:34:17, 3.59it/s] 28%|██▊ | 105614/371472 [8:23:54<21:38:39, 3.41it/s] 28%|██▊ | 105615/371472 [8:23:54<21:37:49, 3.41it/s] 28%|██▊ | 105616/371472 [8:23:54<21:06:46, 3.50it/s] 28%|██▊ | 105617/371472 [8:23:55<20:19:56, 3.63it/s] 28%|██▊ | 105618/371472 [8:23:55<21:41:53, 3.40it/s] 28%|██▊ | 105619/371472 [8:23:55<21:28:22, 3.44it/s] 28%|██▊ | 105620/371472 [8:23:56<22:06:57, 3.34it/s] {'loss': 3.2746, 'learning_rate': 7.444515072507486e-07, 'epoch': 4.55} + 28%|██▊ | 105620/371472 [8:23:56<22:06:57, 3.34it/s] 28%|██▊ | 105621/371472 [8:23:56<21:33:57, 3.42it/s] 28%|██▊ | 105622/371472 [8:23:56<20:26:17, 3.61it/s] 28%|██▊ | 105623/371472 [8:23:56<20:15:48, 3.64it/s] 28%|██▊ | 105624/371472 [8:23:57<20:04:05, 3.68it/s] 28%|██▊ | 105625/371472 [8:23:57<21:47:54, 3.39it/s] 28%|██▊ | 105626/371472 [8:23:57<20:53:10, 3.54it/s] 28%|██▊ | 105627/371472 [8:23:58<20:24:43, 3.62it/s] 28%|██▊ | 105628/371472 [8:23:58<20:11:08, 3.66it/s] 28%|██▊ | 105629/371472 [8:23:58<20:45:30, 3.56it/s] 28%|██▊ | 105630/371472 [8:23:58<20:17:14, 3.64it/s] 28%|██▊ | 105631/371472 [8:23:59<20:22:42, 3.62it/s] 28%|██▊ | 105632/371472 [8:23:59<20:57:09, 3.52it/s] 28%|██▊ | 105633/371472 [8:23:59<20:57:52, 3.52it/s] 28%|██▊ | 105634/371472 [8:23:59<20:33:02, 3.59it/s] 28%|██▊ | 105635/371472 [8:24:00<20:51:42, 3.54it/s] 28%|██▊ | 105636/371472 [8:24:00<21:09:53, 3.49it/s] 28%|██▊ | 105637/371472 [8:24:00<20:22:28, 3.62it/s] 28%|██▊ | 105638/371472 [8:24:01<21:01:22, 3.51it/s] 28%|██▊ | 105639/371472 [8:24:01<20:08:36, 3.67it/s] 28%|██▊ | 105640/371472 [8:24:01<20:53:59, 3.53it/s] {'loss': 3.4267, 'learning_rate': 7.444030252752698e-07, 'epoch': 4.55} + 28%|██▊ | 105640/371472 [8:24:01<20:53:59, 3.53it/s] 28%|██▊ | 105641/371472 [8:24:01<20:47:03, 3.55it/s] 28%|██▊ | 105642/371472 [8:24:02<20:18:32, 3.64it/s] 28%|██▊ | 105643/371472 [8:24:02<20:46:37, 3.55it/s] 28%|██▊ | 105644/371472 [8:24:02<21:40:57, 3.41it/s] 28%|██▊ | 105645/371472 [8:24:03<21:45:22, 3.39it/s] 28%|██▊ | 105646/371472 [8:24:03<20:52:44, 3.54it/s] 28%|██▊ | 105647/371472 [8:24:03<21:15:20, 3.47it/s] 28%|██▊ | 105648/371472 [8:24:03<20:33:37, 3.59it/s] 28%|██▊ | 105649/371472 [8:24:04<21:16:50, 3.47it/s] 28%|██▊ | 105650/371472 [8:24:04<21:15:43, 3.47it/s] 28%|██▊ | 105651/371472 [8:24:04<21:00:12, 3.52it/s] 28%|██▊ | 105652/371472 [8:24:05<21:33:31, 3.43it/s] 28%|██▊ | 105653/371472 [8:24:05<21:47:40, 3.39it/s] 28%|██▊ | 105654/371472 [8:24:05<20:36:11, 3.58it/s] 28%|██▊ | 105655/371472 [8:24:06<22:20:29, 3.30it/s] 28%|██▊ | 105656/371472 [8:24:06<21:59:55, 3.36it/s] 28%|██▊ | 105657/371472 [8:24:06<21:14:06, 3.48it/s] 28%|██▊ | 105658/371472 [8:24:06<20:35:29, 3.59it/s] 28%|██▊ | 105659/371472 [8:24:07<20:37:41, 3.58it/s] 28%|██▊ | 105660/371472 [8:24:07<20:08:42, 3.67it/s] {'loss': 3.4473, 'learning_rate': 7.44354543299791e-07, 'epoch': 4.55} + 28%|██▊ | 105660/371472 [8:24:07<20:08:42, 3.67it/s] 28%|██▊ | 105661/371472 [8:24:07<20:17:30, 3.64it/s] 28%|██▊ | 105662/371472 [8:24:07<20:52:03, 3.54it/s] 28%|██▊ | 105663/371472 [8:24:08<20:43:50, 3.56it/s] 28%|██▊ | 105664/371472 [8:24:08<20:01:38, 3.69it/s] 28%|██▊ | 105665/371472 [8:24:08<20:02:21, 3.68it/s] 28%|██▊ | 105666/371472 [8:24:08<19:40:20, 3.75it/s] 28%|██▊ | 105667/371472 [8:24:09<19:57:39, 3.70it/s] 28%|██▊ | 105668/371472 [8:24:09<21:03:44, 3.51it/s] 28%|██▊ | 105669/371472 [8:24:09<20:36:35, 3.58it/s] 28%|██▊ | 105670/371472 [8:24:10<19:53:57, 3.71it/s] 28%|██▊ | 105671/371472 [8:24:10<20:56:31, 3.53it/s] 28%|██▊ | 105672/371472 [8:24:10<20:14:55, 3.65it/s] 28%|██▊ | 105673/371472 [8:24:10<19:36:40, 3.76it/s] 28%|██▊ | 105674/371472 [8:24:11<19:33:11, 3.78it/s] 28%|██▊ | 105675/371472 [8:24:11<19:13:13, 3.84it/s] 28%|██▊ | 105676/371472 [8:24:11<21:04:29, 3.50it/s] 28%|██▊ | 105677/371472 [8:24:12<20:56:15, 3.53it/s] 28%|██▊ | 105678/371472 [8:24:12<20:02:38, 3.68it/s] 28%|██▊ | 105679/371472 [8:24:12<19:37:58, 3.76it/s] 28%|██▊ | 105680/371472 [8:24:12<20:49:39, 3.54it/s] {'loss': 3.4104, 'learning_rate': 7.443060613243121e-07, 'epoch': 4.55} + 28%|██▊ | 105680/371472 [8:24:12<20:49:39, 3.54it/s] 28%|██▊ | 105681/371472 [8:24:13<20:53:03, 3.54it/s] 28%|██▊ | 105682/371472 [8:24:13<21:10:41, 3.49it/s] 28%|██▊ | 105683/371472 [8:24:13<23:04:56, 3.20it/s] 28%|██▊ | 105684/371472 [8:24:14<23:58:10, 3.08it/s] 28%|██▊ | 105685/371472 [8:24:14<22:51:45, 3.23it/s] 28%|██▊ | 105686/371472 [8:24:14<21:23:20, 3.45it/s] 28%|██▊ | 105687/371472 [8:24:14<21:13:43, 3.48it/s] 28%|██▊ | 105688/371472 [8:24:15<20:31:20, 3.60it/s] 28%|██▊ | 105689/371472 [8:24:15<20:36:16, 3.58it/s] 28%|██▊ | 105690/371472 [8:24:15<19:57:39, 3.70it/s] 28%|██▊ | 105691/371472 [8:24:16<20:12:30, 3.65it/s] 28%|██▊ | 105692/371472 [8:24:16<20:00:45, 3.69it/s] 28%|██▊ | 105693/371472 [8:24:16<20:53:42, 3.53it/s] 28%|██▊ | 105694/371472 [8:24:16<20:43:17, 3.56it/s] 28%|██▊ | 105695/371472 [8:24:17<21:24:27, 3.45it/s] 28%|██▊ | 105696/371472 [8:24:17<20:48:19, 3.55it/s] 28%|██▊ | 105697/371472 [8:24:17<20:37:29, 3.58it/s] 28%|██▊ | 105698/371472 [8:24:18<20:20:12, 3.63it/s] 28%|██▊ | 105699/371472 [8:24:18<19:58:10, 3.70it/s] 28%|██▊ | 105700/371472 [8:24:18<19:50:09, 3.72it/s] {'loss': 3.5344, 'learning_rate': 7.442575793488331e-07, 'epoch': 4.55} + 28%|██▊ | 105700/371472 [8:24:18<19:50:09, 3.72it/s] 28%|██▊ | 105701/371472 [8:24:18<20:00:01, 3.69it/s] 28%|██▊ | 105702/371472 [8:24:19<19:31:16, 3.78it/s] 28%|██▊ | 105703/371472 [8:24:19<19:41:07, 3.75it/s] 28%|██▊ | 105704/371472 [8:24:19<19:30:41, 3.78it/s] 28%|██▊ | 105705/371472 [8:24:19<19:37:16, 3.76it/s] 28%|██▊ | 105706/371472 [8:24:20<19:48:36, 3.73it/s] 28%|██▊ | 105707/371472 [8:24:20<21:39:40, 3.41it/s] 28%|██▊ | 105708/371472 [8:24:20<20:46:30, 3.55it/s] 28%|██▊ | 105709/371472 [8:24:20<19:48:05, 3.73it/s] 28%|██▊ | 105710/371472 [8:24:21<21:19:31, 3.46it/s] 28%|██▊ | 105711/371472 [8:24:21<20:21:54, 3.62it/s] 28%|██▊ | 105712/371472 [8:24:21<21:14:01, 3.48it/s] 28%|██▊ | 105713/371472 [8:24:22<20:10:12, 3.66it/s] 28%|██▊ | 105714/371472 [8:24:22<19:50:14, 3.72it/s] 28%|██▊ | 105715/371472 [8:24:22<20:38:52, 3.58it/s] 28%|██▊ | 105716/371472 [8:24:22<20:29:55, 3.60it/s] 28%|██▊ | 105717/371472 [8:24:23<20:09:27, 3.66it/s] 28%|██▊ | 105718/371472 [8:24:23<20:25:10, 3.62it/s] 28%|██▊ | 105719/371472 [8:24:23<21:09:18, 3.49it/s] 28%|██▊ | 105720/371472 [8:24:24<21:34:24, 3.42it/s] {'loss': 3.445, 'learning_rate': 7.442090973733543e-07, 'epoch': 4.55} + 28%|██▊ | 105720/371472 [8:24:24<21:34:24, 3.42it/s] 28%|██▊ | 105721/371472 [8:24:24<21:33:55, 3.42it/s] 28%|██▊ | 105722/371472 [8:24:24<21:02:18, 3.51it/s] 28%|██▊ | 105723/371472 [8:24:24<21:53:25, 3.37it/s] 28%|██▊ | 105724/371472 [8:24:25<21:03:50, 3.50it/s] 28%|██▊ | 105725/371472 [8:24:25<21:04:55, 3.50it/s] 28%|██▊ | 105726/371472 [8:24:25<20:23:30, 3.62it/s] 28%|██▊ | 105727/371472 [8:24:26<19:35:08, 3.77it/s] 28%|██▊ | 105728/371472 [8:24:26<19:31:30, 3.78it/s] 28%|██▊ | 105729/371472 [8:24:26<19:18:27, 3.82it/s] 28%|██▊ | 105730/371472 [8:24:26<20:43:22, 3.56it/s] 28%|██▊ | 105731/371472 [8:24:27<21:45:54, 3.39it/s] 28%|██▊ | 105732/371472 [8:24:27<21:00:10, 3.51it/s] 28%|██▊ | 105733/371472 [8:24:27<20:38:56, 3.57it/s] 28%|██▊ | 105734/371472 [8:24:28<22:38:18, 3.26it/s] 28%|██▊ | 105735/371472 [8:24:28<22:24:13, 3.29it/s] 28%|██▊ | 105736/371472 [8:24:28<22:05:43, 3.34it/s] 28%|██▊ | 105737/371472 [8:24:28<21:03:34, 3.51it/s] 28%|██▊ | 105738/371472 [8:24:29<21:37:46, 3.41it/s] 28%|██▊ | 105739/371472 [8:24:29<20:38:29, 3.58it/s] 28%|██▊ | 105740/371472 [8:24:29<20:45:26, 3.56it/s] {'loss': 3.5019, 'learning_rate': 7.441606153978754e-07, 'epoch': 4.55} + 28%|██▊ | 105740/371472 [8:24:29<20:45:26, 3.56it/s] 28%|██▊ | 105741/371472 [8:24:30<20:45:28, 3.56it/s] 28%|██▊ | 105742/371472 [8:24:30<21:37:02, 3.41it/s] 28%|██▊ | 105743/371472 [8:24:30<21:10:47, 3.49it/s] 28%|██▊ | 105744/371472 [8:24:30<20:15:30, 3.64it/s] 28%|██▊ | 105745/371472 [8:24:31<20:18:46, 3.63it/s] 28%|██▊ | 105746/371472 [8:24:31<22:00:24, 3.35it/s] 28%|██▊ | 105747/371472 [8:24:31<21:13:27, 3.48it/s] 28%|██▊ | 105748/371472 [8:24:32<22:38:23, 3.26it/s] 28%|██▊ | 105749/371472 [8:24:32<22:23:59, 3.30it/s] 28%|██▊ | 105750/371472 [8:24:32<20:56:37, 3.52it/s] 28%|██▊ | 105751/371472 [8:24:32<21:29:22, 3.43it/s] 28%|██▊ | 105752/371472 [8:24:33<20:45:54, 3.55it/s] 28%|██▊ | 105753/371472 [8:24:33<20:14:17, 3.65it/s] 28%|██▊ | 105754/371472 [8:24:33<19:59:58, 3.69it/s] 28%|██▊ | 105755/371472 [8:24:34<20:31:47, 3.60it/s] 28%|██▊ | 105756/371472 [8:24:34<20:03:04, 3.68it/s] 28%|██▊ | 105757/371472 [8:24:34<19:36:44, 3.76it/s] 28%|██▊ | 105758/371472 [8:24:34<20:20:31, 3.63it/s] 28%|██▊ | 105759/371472 [8:24:35<21:36:03, 3.42it/s] 28%|██▊ | 105760/371472 [8:24:35<21:26:49, 3.44it/s] {'loss': 3.3507, 'learning_rate': 7.441121334223965e-07, 'epoch': 4.56} + 28%|██▊ | 105760/371472 [8:24:35<21:26:49, 3.44it/s] 28%|██▊ | 105761/371472 [8:24:35<21:33:45, 3.42it/s] 28%|██▊ | 105762/371472 [8:24:36<20:31:38, 3.60it/s] 28%|██▊ | 105763/371472 [8:24:36<20:15:56, 3.64it/s] 28%|██▊ | 105764/371472 [8:24:36<20:13:06, 3.65it/s] 28%|██▊ | 105765/371472 [8:24:36<20:09:18, 3.66it/s] 28%|██▊ | 105766/371472 [8:24:37<19:26:34, 3.80it/s] 28%|██▊ | 105767/371472 [8:24:37<20:01:01, 3.69it/s] 28%|██▊ | 105768/371472 [8:24:37<19:45:37, 3.74it/s] 28%|██▊ | 105769/371472 [8:24:37<20:10:50, 3.66it/s] 28%|██▊ | 105770/371472 [8:24:38<19:44:56, 3.74it/s] 28%|██▊ | 105771/371472 [8:24:38<19:18:54, 3.82it/s] 28%|██▊ | 105772/371472 [8:24:38<20:26:06, 3.61it/s] 28%|██▊ | 105773/371472 [8:24:39<20:35:12, 3.59it/s] 28%|██▊ | 105774/371472 [8:24:39<20:34:30, 3.59it/s] 28%|██▊ | 105775/371472 [8:24:39<19:42:56, 3.74it/s] 28%|██▊ | 105776/371472 [8:24:39<20:27:08, 3.61it/s] 28%|██▊ | 105777/371472 [8:24:40<20:00:22, 3.69it/s] 28%|██▊ | 105778/371472 [8:24:40<20:31:04, 3.60it/s] 28%|██▊ | 105779/371472 [8:24:40<20:46:32, 3.55it/s] 28%|██▊ | 105780/371472 [8:24:40<20:23:41, 3.62it/s] {'loss': 3.4429, 'learning_rate': 7.440636514469175e-07, 'epoch': 4.56} + 28%|██▊ | 105780/371472 [8:24:40<20:23:41, 3.62it/s] 28%|██▊ | 105781/371472 [8:24:41<20:57:08, 3.52it/s] 28%|██▊ | 105782/371472 [8:24:41<21:05:20, 3.50it/s] 28%|██▊ | 105783/371472 [8:24:41<20:18:59, 3.63it/s] 28%|██▊ | 105784/371472 [8:24:42<20:11:31, 3.66it/s] 28%|██▊ | 105785/371472 [8:24:42<19:42:05, 3.75it/s] 28%|██▊ | 105786/371472 [8:24:42<20:09:29, 3.66it/s] 28%|██▊ | 105787/371472 [8:24:42<20:27:59, 3.61it/s] 28%|██▊ | 105788/371472 [8:24:43<21:03:54, 3.50it/s] 28%|██▊ | 105789/371472 [8:24:43<20:53:18, 3.53it/s] 28%|██▊ | 105790/371472 [8:24:43<21:01:16, 3.51it/s] 28%|██▊ | 105791/371472 [8:24:44<21:07:07, 3.49it/s] 28%|██▊ | 105792/371472 [8:24:44<21:58:49, 3.36it/s] 28%|██▊ | 105793/371472 [8:24:44<21:12:53, 3.48it/s] 28%|██▊ | 105794/371472 [8:24:44<20:28:38, 3.60it/s] 28%|██▊ | 105795/371472 [8:24:45<19:31:08, 3.78it/s] 28%|██▊ | 105796/371472 [8:24:45<20:35:43, 3.58it/s] 28%|██▊ | 105797/371472 [8:24:45<21:23:53, 3.45it/s] 28%|██▊ | 105798/371472 [8:24:46<22:01:45, 3.35it/s] 28%|██▊ | 105799/371472 [8:24:46<20:43:43, 3.56it/s] 28%|██▊ | 105800/371472 [8:24:46<20:33:02, 3.59it/s] {'loss': 3.4688, 'learning_rate': 7.440151694714387e-07, 'epoch': 4.56} + 28%|██▊ | 105800/371472 [8:24:46<20:33:02, 3.59it/s] 28%|██▊ | 105801/371472 [8:24:46<21:44:44, 3.39it/s] 28%|██▊ | 105802/371472 [8:24:47<21:22:48, 3.45it/s] 28%|██▊ | 105803/371472 [8:24:47<20:35:15, 3.58it/s] 28%|██▊ | 105804/371472 [8:24:47<20:13:57, 3.65it/s] 28%|██▊ | 105805/371472 [8:24:47<20:27:14, 3.61it/s] 28%|██▊ | 105806/371472 [8:24:48<24:01:16, 3.07it/s] 28%|██▊ | 105807/371472 [8:24:48<22:41:02, 3.25it/s] 28%|██▊ | 105808/371472 [8:24:49<23:19:28, 3.16it/s] 28%|██▊ | 105809/371472 [8:24:49<23:03:49, 3.20it/s] 28%|██▊ | 105810/371472 [8:24:49<22:27:21, 3.29it/s] 28%|██▊ | 105811/371472 [8:24:49<22:09:33, 3.33it/s] 28%|██▊ | 105812/371472 [8:24:50<21:32:04, 3.43it/s] 28%|██▊ | 105813/371472 [8:24:50<20:40:25, 3.57it/s] 28%|██▊ | 105814/371472 [8:24:50<19:51:26, 3.72it/s] 28%|██▊ | 105815/371472 [8:24:50<19:29:26, 3.79it/s] 28%|██▊ | 105816/371472 [8:24:51<19:56:57, 3.70it/s] 28%|██▊ | 105817/371472 [8:24:51<19:33:31, 3.77it/s] 28%|██▊ | 105818/371472 [8:24:51<20:20:45, 3.63it/s] 28%|██▊ | 105819/371472 [8:24:52<20:43:37, 3.56it/s] 28%|██▊ | 105820/371472 [8:24:52<20:11:31, 3.65it/s] {'loss': 3.2943, 'learning_rate': 7.439666874959599e-07, 'epoch': 4.56} + 28%|██▊ | 105820/371472 [8:24:52<20:11:31, 3.65it/s] 28%|██▊ | 105821/371472 [8:24:52<19:50:14, 3.72it/s] 28%|██▊ | 105822/371472 [8:24:52<20:27:00, 3.61it/s] 28%|██▊ | 105823/371472 [8:24:53<19:29:04, 3.79it/s] 28%|██▊ | 105824/371472 [8:24:53<20:12:26, 3.65it/s] 28%|██▊ | 105825/371472 [8:24:53<20:53:57, 3.53it/s] 28%|██▊ | 105826/371472 [8:24:53<20:12:02, 3.65it/s] 28%|██▊ | 105827/371472 [8:24:54<20:08:44, 3.66it/s] 28%|██▊ | 105828/371472 [8:24:54<20:00:34, 3.69it/s] 28%|██▊ | 105829/371472 [8:24:54<19:12:11, 3.84it/s] 28%|██▊ | 105830/371472 [8:24:55<20:05:27, 3.67it/s] 28%|██▊ | 105831/371472 [8:24:55<19:31:38, 3.78it/s] 28%|██▊ | 105832/371472 [8:24:55<19:47:11, 3.73it/s] 28%|██▊ | 105833/371472 [8:24:55<22:13:41, 3.32it/s] 28%|██▊ | 105834/371472 [8:24:56<22:01:38, 3.35it/s] 28%|██▊ | 105835/371472 [8:24:56<20:42:47, 3.56it/s] 28%|██▊ | 105836/371472 [8:24:56<20:06:40, 3.67it/s] 28%|██▊ | 105837/371472 [8:24:57<21:20:38, 3.46it/s] 28%|██▊ | 105838/371472 [8:24:57<21:27:08, 3.44it/s] 28%|██▊ | 105839/371472 [8:24:57<21:02:10, 3.51it/s] 28%|██▊ | 105840/371472 [8:24:57<22:48:40, 3.23it/s] {'loss': 3.5317, 'learning_rate': 7.43918205520481e-07, 'epoch': 4.56} + 28%|██▊ | 105840/371472 [8:24:57<22:48:40, 3.23it/s] 28%|██▊ | 105841/371472 [8:24:58<22:37:18, 3.26it/s] 28%|██▊ | 105842/371472 [8:24:58<23:12:39, 3.18it/s] 28%|██▊ | 105843/371472 [8:24:58<24:33:29, 3.00it/s] 28%|██▊ | 105844/371472 [8:24:59<23:32:43, 3.13it/s] 28%|██▊ | 105845/371472 [8:24:59<25:37:19, 2.88it/s] 28%|██▊ | 105846/371472 [8:24:59<23:45:05, 3.11it/s] 28%|██▊ | 105847/371472 [8:25:00<22:22:00, 3.30it/s] 28%|██▊ | 105848/371472 [8:25:00<21:36:21, 3.41it/s] 28%|██▊ | 105849/371472 [8:25:00<22:04:21, 3.34it/s] 28%|██▊ | 105850/371472 [8:25:01<21:46:13, 3.39it/s] 28%|██▊ | 105851/371472 [8:25:01<21:54:10, 3.37it/s] 28%|██▊ | 105852/371472 [8:25:01<22:19:39, 3.30it/s] 28%|██▊ | 105853/371472 [8:25:01<21:49:22, 3.38it/s] 28%|██▊ | 105854/371472 [8:25:02<20:32:54, 3.59it/s] 28%|██▊ | 105855/371472 [8:25:02<22:16:46, 3.31it/s] 28%|██▊ | 105856/371472 [8:25:02<21:08:26, 3.49it/s] 28%|██▊ | 105857/371472 [8:25:03<20:46:49, 3.55it/s] 28%|██▊ | 105858/371472 [8:25:03<20:57:47, 3.52it/s] 28%|██▊ | 105859/371472 [8:25:03<20:18:06, 3.63it/s] 28%|██▊ | 105860/371472 [8:25:03<20:15:51, 3.64it/s] {'loss': 3.4314, 'learning_rate': 7.43869723545002e-07, 'epoch': 4.56} + 28%|██▊ | 105860/371472 [8:25:03<20:15:51, 3.64it/s] 28%|██▊ | 105861/371472 [8:25:04<19:58:46, 3.69it/s] 28%|██▊ | 105862/371472 [8:25:04<19:37:34, 3.76it/s] 28%|██▊ | 105863/371472 [8:25:04<19:26:24, 3.80it/s] 28%|██▊ | 105864/371472 [8:25:04<20:34:13, 3.59it/s] 28%|██▊ | 105865/371472 [8:25:05<19:32:07, 3.78it/s] 28%|██▊ | 105866/371472 [8:25:05<20:24:51, 3.61it/s] 28%|██▊ | 105867/371472 [8:25:05<23:04:44, 3.20it/s] 28%|██▊ | 105868/371472 [8:25:06<21:35:26, 3.42it/s] 28%|██▊ | 105869/371472 [8:25:06<21:55:00, 3.37it/s] 29%|██▊ | 105870/371472 [8:25:06<22:32:36, 3.27it/s] 29%|██▊ | 105871/371472 [8:25:07<22:34:15, 3.27it/s] 29%|██▊ | 105872/371472 [8:25:07<21:40:56, 3.40it/s] 29%|██▊ | 105873/371472 [8:25:07<20:49:32, 3.54it/s] 29%|██▊ | 105874/371472 [8:25:07<21:59:37, 3.35it/s] 29%|██▊ | 105875/371472 [8:25:08<22:28:18, 3.28it/s] 29%|██▊ | 105876/371472 [8:25:08<22:13:13, 3.32it/s] 29%|██▊ | 105877/371472 [8:25:08<21:17:34, 3.46it/s] 29%|██▊ | 105878/371472 [8:25:09<20:41:16, 3.57it/s] 29%|██▊ | 105879/371472 [8:25:09<21:03:35, 3.50it/s] 29%|██▊ | 105880/371472 [8:25:09<21:23:18, 3.45it/s] {'loss': 3.6768, 'learning_rate': 7.438212415695231e-07, 'epoch': 4.56} + 29%|██▊ | 105880/371472 [8:25:09<21:23:18, 3.45it/s] 29%|██▊ | 105881/371472 [8:25:09<20:40:27, 3.57it/s] 29%|██▊ | 105882/371472 [8:25:10<21:35:35, 3.42it/s] 29%|██▊ | 105883/371472 [8:25:10<21:02:36, 3.51it/s] 29%|██▊ | 105884/371472 [8:25:10<21:08:54, 3.49it/s] 29%|██▊ | 105885/371472 [8:25:11<20:59:07, 3.52it/s] 29%|██▊ | 105886/371472 [8:25:11<20:32:08, 3.59it/s] 29%|██▊ | 105887/371472 [8:25:11<20:37:37, 3.58it/s] 29%|██▊ | 105888/371472 [8:25:12<22:19:12, 3.31it/s] 29%|██▊ | 105889/371472 [8:25:12<21:23:22, 3.45it/s] 29%|██▊ | 105890/371472 [8:25:12<23:26:31, 3.15it/s] 29%|██▊ | 105891/371472 [8:25:13<24:23:41, 3.02it/s] 29%|██▊ | 105892/371472 [8:25:13<22:54:19, 3.22it/s] 29%|██▊ | 105893/371472 [8:25:13<22:46:58, 3.24it/s] 29%|██▊ | 105894/371472 [8:25:13<21:32:06, 3.43it/s] 29%|██▊ | 105895/371472 [8:25:14<20:44:30, 3.56it/s] 29%|██▊ | 105896/371472 [8:25:14<20:50:16, 3.54it/s] 29%|██▊ | 105897/371472 [8:25:14<20:29:26, 3.60it/s] 29%|██▊ | 105898/371472 [8:25:14<20:09:20, 3.66it/s] 29%|██▊ | 105899/371472 [8:25:15<20:30:47, 3.60it/s] 29%|██▊ | 105900/371472 [8:25:15<20:56:12, 3.52it/s] {'loss': 3.3843, 'learning_rate': 7.437727595940443e-07, 'epoch': 4.56} + 29%|██▊ | 105900/371472 [8:25:15<20:56:12, 3.52it/s] 29%|██▊ | 105901/371472 [8:25:15<20:28:12, 3.60it/s] 29%|██▊ | 105902/371472 [8:25:15<19:35:28, 3.77it/s] 29%|██▊ | 105903/371472 [8:25:16<21:12:53, 3.48it/s] 29%|██▊ | 105904/371472 [8:25:16<20:37:40, 3.58it/s] 29%|██▊ | 105905/371472 [8:25:16<21:47:54, 3.38it/s] 29%|██▊ | 105906/371472 [8:25:17<22:43:05, 3.25it/s] 29%|██▊ | 105907/371472 [8:25:17<21:43:18, 3.40it/s] 29%|██▊ | 105908/371472 [8:25:17<21:38:28, 3.41it/s] 29%|██▊ | 105909/371472 [8:25:18<24:57:27, 2.96it/s] 29%|██▊ | 105910/371472 [8:25:18<23:40:41, 3.12it/s] 29%|██▊ | 105911/371472 [8:25:18<23:51:14, 3.09it/s] 29%|██▊ | 105912/371472 [8:25:19<23:35:47, 3.13it/s] 29%|██▊ | 105913/371472 [8:25:19<21:45:54, 3.39it/s] 29%|██▊ | 105914/371472 [8:25:19<21:09:54, 3.49it/s] 29%|██▊ | 105915/371472 [8:25:19<20:44:45, 3.56it/s] 29%|██▊ | 105916/371472 [8:25:20<20:04:11, 3.68it/s] 29%|██▊ | 105917/371472 [8:25:20<20:51:44, 3.54it/s] 29%|██▊ | 105918/371472 [8:25:20<21:06:57, 3.49it/s] 29%|██▊ | 105919/371472 [8:25:21<20:45:58, 3.55it/s] 29%|██▊ | 105920/371472 [8:25:21<21:20:28, 3.46it/s] {'loss': 3.4105, 'learning_rate': 7.437242776185653e-07, 'epoch': 4.56} + 29%|██▊ | 105920/371472 [8:25:21<21:20:28, 3.46it/s] 29%|██▊ | 105921/371472 [8:25:21<21:10:20, 3.48it/s] 29%|██▊ | 105922/371472 [8:25:21<21:19:45, 3.46it/s] 29%|██▊ | 105923/371472 [8:25:22<20:49:48, 3.54it/s] 29%|██▊ | 105924/371472 [8:25:22<20:52:32, 3.53it/s] 29%|██▊ | 105925/371472 [8:25:22<20:01:54, 3.68it/s] 29%|██▊ | 105926/371472 [8:25:23<19:55:56, 3.70it/s] 29%|██▊ | 105927/371472 [8:25:23<21:53:50, 3.37it/s] 29%|██▊ | 105928/371472 [8:25:23<22:28:06, 3.28it/s] 29%|██▊ | 105929/371472 [8:25:23<21:16:21, 3.47it/s] 29%|██▊ | 105930/371472 [8:25:24<21:52:42, 3.37it/s] 29%|██▊ | 105931/371472 [8:25:24<21:33:42, 3.42it/s] 29%|██▊ | 105932/371472 [8:25:24<22:01:09, 3.35it/s] 29%|██▊ | 105933/371472 [8:25:25<22:17:04, 3.31it/s] 29%|██▊ | 105934/371472 [8:25:25<21:58:01, 3.36it/s] 29%|██▊ | 105935/371472 [8:25:25<21:24:27, 3.45it/s] 29%|██▊ | 105936/371472 [8:25:26<20:52:53, 3.53it/s] 29%|██▊ | 105937/371472 [8:25:26<20:25:42, 3.61it/s] 29%|██▊ | 105938/371472 [8:25:26<20:16:16, 3.64it/s] 29%|██▊ | 105939/371472 [8:25:26<20:00:12, 3.69it/s] 29%|██▊ | 105940/371472 [8:25:27<19:57:13, 3.70it/s] {'loss': 3.4629, 'learning_rate': 7.436757956430863e-07, 'epoch': 4.56} + 29%|██▊ | 105940/371472 [8:25:27<19:57:13, 3.70it/s] 29%|██▊ | 105941/371472 [8:25:27<19:29:56, 3.78it/s] 29%|██▊ | 105942/371472 [8:25:27<20:12:56, 3.65it/s] 29%|██▊ | 105943/371472 [8:25:27<20:24:01, 3.62it/s] 29%|██▊ | 105944/371472 [8:25:28<19:55:50, 3.70it/s] 29%|██▊ | 105945/371472 [8:25:28<19:12:05, 3.84it/s] 29%|██▊ | 105946/371472 [8:25:28<19:36:52, 3.76it/s] 29%|██▊ | 105947/371472 [8:25:28<19:56:01, 3.70it/s] 29%|██▊ | 105948/371472 [8:25:29<20:04:24, 3.67it/s] 29%|██▊ | 105949/371472 [8:25:29<19:35:00, 3.77it/s] 29%|██▊ | 105950/371472 [8:25:29<19:47:46, 3.73it/s] 29%|██▊ | 105951/371472 [8:25:30<19:42:28, 3.74it/s] 29%|██▊ | 105952/371472 [8:25:30<20:21:28, 3.62it/s] 29%|██▊ | 105953/371472 [8:25:30<20:27:15, 3.61it/s] 29%|██▊ | 105954/371472 [8:25:30<20:57:22, 3.52it/s] 29%|██▊ | 105955/371472 [8:25:31<20:49:13, 3.54it/s] 29%|██▊ | 105956/371472 [8:25:31<20:57:44, 3.52it/s] 29%|██▊ | 105957/371472 [8:25:31<21:49:02, 3.38it/s] 29%|██▊ | 105958/371472 [8:25:32<21:09:22, 3.49it/s] 29%|██▊ | 105959/371472 [8:25:32<21:58:35, 3.36it/s] 29%|██▊ | 105960/371472 [8:25:32<21:37:34, 3.41it/s] {'loss': 3.4537, 'learning_rate': 7.436273136676076e-07, 'epoch': 4.56} + 29%|██▊ | 105960/371472 [8:25:32<21:37:34, 3.41it/s] 29%|██▊ | 105961/371472 [8:25:32<20:45:10, 3.55it/s] 29%|██▊ | 105962/371472 [8:25:33<20:39:34, 3.57it/s] 29%|██▊ | 105963/371472 [8:25:33<21:12:40, 3.48it/s] 29%|██▊ | 105964/371472 [8:25:33<20:10:37, 3.66it/s] 29%|██▊ | 105965/371472 [8:25:34<20:52:43, 3.53it/s] 29%|██▊ | 105966/371472 [8:25:34<21:30:06, 3.43it/s] 29%|██▊ | 105967/371472 [8:25:34<20:42:18, 3.56it/s] 29%|██▊ | 105968/371472 [8:25:34<20:23:59, 3.62it/s] 29%|██▊ | 105969/371472 [8:25:35<21:04:29, 3.50it/s] 29%|██▊ | 105970/371472 [8:25:35<21:12:21, 3.48it/s] 29%|██▊ | 105971/371472 [8:25:35<22:14:51, 3.31it/s] 29%|██▊ | 105972/371472 [8:25:36<22:42:10, 3.25it/s] 29%|██▊ | 105973/371472 [8:25:36<22:07:14, 3.33it/s] 29%|██▊ | 105974/371472 [8:25:36<22:17:49, 3.31it/s] 29%|██▊ | 105975/371472 [8:25:36<21:18:25, 3.46it/s] 29%|██▊ | 105976/371472 [8:25:37<22:03:01, 3.34it/s] 29%|██▊ | 105977/371472 [8:25:37<23:08:27, 3.19it/s] 29%|██▊ | 105978/371472 [8:25:37<23:59:48, 3.07it/s] 29%|██▊ | 105979/371472 [8:25:38<22:43:52, 3.24it/s] 29%|██▊ | 105980/371472 [8:25:38<21:38:15, 3.41it/s] {'loss': 3.4705, 'learning_rate': 7.435788316921288e-07, 'epoch': 4.56} + 29%|██▊ | 105980/371472 [8:25:38<21:38:15, 3.41it/s] 29%|██▊ | 105981/371472 [8:25:38<21:41:02, 3.40it/s] 29%|██▊ | 105982/371472 [8:25:39<20:30:02, 3.60it/s] 29%|██▊ | 105983/371472 [8:25:39<20:07:41, 3.66it/s] 29%|██▊ | 105984/371472 [8:25:39<20:53:48, 3.53it/s] 29%|██▊ | 105985/371472 [8:25:39<21:34:02, 3.42it/s] 29%|██▊ | 105986/371472 [8:25:40<20:35:19, 3.58it/s] 29%|██▊ | 105987/371472 [8:25:40<20:20:18, 3.63it/s] 29%|██▊ | 105988/371472 [8:25:40<19:33:43, 3.77it/s] 29%|██▊ | 105989/371472 [8:25:40<20:00:20, 3.69it/s] 29%|██▊ | 105990/371472 [8:25:41<20:57:43, 3.52it/s] 29%|██▊ | 105991/371472 [8:25:41<21:30:21, 3.43it/s] 29%|██▊ | 105992/371472 [8:25:41<21:01:32, 3.51it/s] 29%|██▊ | 105993/371472 [8:25:42<21:11:10, 3.48it/s] 29%|██▊ | 105994/371472 [8:25:42<20:23:34, 3.62it/s] 29%|██▊ | 105995/371472 [8:25:42<20:16:35, 3.64it/s] 29%|██▊ | 105996/371472 [8:25:42<19:59:19, 3.69it/s] 29%|██▊ | 105997/371472 [8:25:43<20:29:29, 3.60it/s] 29%|██▊ | 105998/371472 [8:25:43<20:14:24, 3.64it/s] 29%|██▊ | 105999/371472 [8:25:43<21:13:41, 3.47it/s] 29%|██▊ | 106000/371472 [8:25:44<22:05:56, 3.34it/s] {'loss': 3.4859, 'learning_rate': 7.435303497166497e-07, 'epoch': 4.57} + 29%|██▊ | 106000/371472 [8:25:44<22:05:56, 3.34it/s] 29%|██▊ | 106001/371472 [8:25:44<21:21:37, 3.45it/s] 29%|██▊ | 106002/371472 [8:25:44<20:43:53, 3.56it/s] 29%|██▊ | 106003/371472 [8:25:45<21:27:57, 3.44it/s] 29%|██▊ | 106004/371472 [8:25:45<24:37:24, 2.99it/s] 29%|██▊ | 106005/371472 [8:25:45<23:02:49, 3.20it/s] 29%|██▊ | 106006/371472 [8:25:45<22:09:28, 3.33it/s] 29%|██▊ | 106007/371472 [8:25:46<21:16:49, 3.47it/s] 29%|██▊ | 106008/371472 [8:25:46<21:18:26, 3.46it/s] 29%|██▊ | 106009/371472 [8:25:46<21:25:43, 3.44it/s] 29%|██▊ | 106010/371472 [8:25:47<21:04:26, 3.50it/s] 29%|██▊ | 106011/371472 [8:25:47<20:41:41, 3.56it/s] 29%|██▊ | 106012/371472 [8:25:47<20:39:40, 3.57it/s] 29%|██▊ | 106013/371472 [8:25:47<20:05:43, 3.67it/s] 29%|██▊ | 106014/371472 [8:25:48<20:55:41, 3.52it/s] 29%|██▊ | 106015/371472 [8:25:48<20:19:46, 3.63it/s] 29%|██▊ | 106016/371472 [8:25:48<20:46:33, 3.55it/s] 29%|██▊ | 106017/371472 [8:25:49<20:01:56, 3.68it/s] 29%|██▊ | 106018/371472 [8:25:49<19:32:28, 3.77it/s] 29%|██▊ | 106019/371472 [8:25:49<19:15:47, 3.83it/s] 29%|██▊ | 106020/371472 [8:25:49<20:44:45, 3.55it/s] {'loss': 3.5219, 'learning_rate': 7.434818677411708e-07, 'epoch': 4.57} + 29%|██▊ | 106020/371472 [8:25:49<20:44:45, 3.55it/s] 29%|██▊ | 106021/371472 [8:25:50<19:59:33, 3.69it/s] 29%|██▊ | 106022/371472 [8:25:50<19:57:04, 3.70it/s] 29%|██▊ | 106023/371472 [8:25:50<19:45:08, 3.73it/s] 29%|██▊ | 106024/371472 [8:25:50<19:24:11, 3.80it/s] 29%|██▊ | 106025/371472 [8:25:51<18:53:39, 3.90it/s] 29%|██▊ | 106026/371472 [8:25:51<23:09:14, 3.18it/s] 29%|██▊ | 106027/371472 [8:25:51<22:08:26, 3.33it/s] 29%|██▊ | 106028/371472 [8:25:52<22:22:17, 3.30it/s] 29%|██▊ | 106029/371472 [8:25:52<21:25:37, 3.44it/s] 29%|██▊ | 106030/371472 [8:25:52<21:56:02, 3.36it/s] 29%|██▊ | 106031/371472 [8:25:52<21:31:05, 3.43it/s] 29%|██▊ | 106032/371472 [8:25:53<22:48:59, 3.23it/s] 29%|██▊ | 106033/371472 [8:25:53<21:22:47, 3.45it/s] 29%|██▊ | 106034/371472 [8:25:53<20:22:51, 3.62it/s] 29%|██▊ | 106035/371472 [8:25:54<21:39:32, 3.40it/s] 29%|██▊ | 106036/371472 [8:25:54<20:48:43, 3.54it/s] 29%|██▊ | 106037/371472 [8:25:54<21:46:00, 3.39it/s] 29%|██▊ | 106038/371472 [8:25:55<21:24:22, 3.44it/s] 29%|██▊ | 106039/371472 [8:25:55<20:31:20, 3.59it/s] 29%|██▊ | 106040/371472 [8:25:55<19:51:37, 3.71it/s] {'loss': 3.4374, 'learning_rate': 7.434333857656921e-07, 'epoch': 4.57} + 29%|██▊ | 106040/371472 [8:25:55<19:51:37, 3.71it/s] 29%|██▊ | 106041/371472 [8:25:55<19:18:22, 3.82it/s] 29%|██▊ | 106042/371472 [8:25:56<19:29:03, 3.78it/s] 29%|██▊ | 106043/371472 [8:25:56<19:21:25, 3.81it/s] 29%|██▊ | 106044/371472 [8:25:56<18:48:59, 3.92it/s] 29%|██▊ | 106045/371472 [8:25:56<18:41:53, 3.94it/s] 29%|██▊ | 106046/371472 [8:25:57<19:16:41, 3.82it/s] 29%|██▊ | 106047/371472 [8:25:57<19:58:43, 3.69it/s] 29%|██▊ | 106048/371472 [8:25:57<20:45:01, 3.55it/s] 29%|██▊ | 106049/371472 [8:25:57<20:43:30, 3.56it/s] 29%|██▊ | 106050/371472 [8:25:58<22:39:59, 3.25it/s] 29%|██▊ | 106051/371472 [8:25:58<22:14:46, 3.31it/s] 29%|██▊ | 106052/371472 [8:25:58<21:41:37, 3.40it/s] 29%|██▊ | 106053/371472 [8:25:59<21:20:03, 3.46it/s] 29%|██▊ | 106054/371472 [8:25:59<21:33:53, 3.42it/s] 29%|██▊ | 106055/371472 [8:25:59<20:29:50, 3.60it/s] 29%|██▊ | 106056/371472 [8:25:59<21:01:06, 3.51it/s] 29%|██▊ | 106057/371472 [8:26:00<20:17:10, 3.63it/s] 29%|██▊ | 106058/371472 [8:26:00<20:51:44, 3.53it/s] 29%|██▊ | 106059/371472 [8:26:00<20:24:09, 3.61it/s] 29%|██▊ | 106060/371472 [8:26:01<20:05:35, 3.67it/s] {'loss': 3.3109, 'learning_rate': 7.43384903790213e-07, 'epoch': 4.57} + 29%|██▊ | 106060/371472 [8:26:01<20:05:35, 3.67it/s] 29%|██▊ | 106061/371472 [8:26:01<20:12:16, 3.65it/s] 29%|██▊ | 106062/371472 [8:26:01<20:12:25, 3.65it/s] 29%|██▊ | 106063/371472 [8:26:01<20:24:56, 3.61it/s] 29%|██▊ | 106064/371472 [8:26:02<20:46:55, 3.55it/s] 29%|██▊ | 106065/371472 [8:26:02<20:26:34, 3.61it/s] 29%|██▊ | 106066/371472 [8:26:02<21:41:55, 3.40it/s] 29%|██▊ | 106067/371472 [8:26:03<21:14:21, 3.47it/s] 29%|██▊ | 106068/371472 [8:26:03<23:02:03, 3.20it/s] 29%|██▊ | 106069/371472 [8:26:03<21:29:41, 3.43it/s] 29%|██▊ | 106070/371472 [8:26:03<21:19:24, 3.46it/s] 29%|██▊ | 106071/371472 [8:26:04<20:38:54, 3.57it/s] 29%|██▊ | 106072/371472 [8:26:04<20:21:44, 3.62it/s] 29%|██▊ | 106073/371472 [8:26:04<20:14:10, 3.64it/s] 29%|██▊ | 106074/371472 [8:26:05<20:23:06, 3.62it/s] 29%|██▊ | 106075/371472 [8:26:05<20:16:19, 3.64it/s] 29%|██▊ | 106076/371472 [8:26:05<20:04:29, 3.67it/s] 29%|██▊ | 106077/371472 [8:26:05<20:11:56, 3.65it/s] 29%|██▊ | 106078/371472 [8:26:06<20:21:00, 3.62it/s] 29%|██▊ | 106079/371472 [8:26:06<20:11:26, 3.65it/s] 29%|██▊ | 106080/371472 [8:26:06<19:59:09, 3.69it/s] {'loss': 3.5075, 'learning_rate': 7.433364218147341e-07, 'epoch': 4.57} + 29%|██▊ | 106080/371472 [8:26:06<19:59:09, 3.69it/s] 29%|██▊ | 106081/371472 [8:26:06<20:45:41, 3.55it/s] 29%|██▊ | 106082/371472 [8:26:07<21:15:08, 3.47it/s] 29%|██▊ | 106083/371472 [8:26:07<21:15:17, 3.47it/s] 29%|██▊ | 106084/371472 [8:26:07<20:19:31, 3.63it/s] 29%|██▊ | 106085/371472 [8:26:08<20:19:45, 3.63it/s] 29%|██▊ | 106086/371472 [8:26:08<21:27:42, 3.43it/s] 29%|██▊ | 106087/371472 [8:26:08<21:05:01, 3.50it/s] 29%|██▊ | 106088/371472 [8:26:08<21:11:59, 3.48it/s] 29%|██▊ | 106089/371472 [8:26:09<20:44:54, 3.55it/s] 29%|██▊ | 106090/371472 [8:26:09<23:13:05, 3.17it/s] 29%|██▊ | 106091/371472 [8:26:09<22:37:39, 3.26it/s] 29%|██▊ | 106092/371472 [8:26:10<24:31:05, 3.01it/s] 29%|██▊ | 106093/371472 [8:26:10<23:33:18, 3.13it/s] 29%|██▊ | 106094/371472 [8:26:10<22:42:07, 3.25it/s] 29%|██▊ | 106095/371472 [8:26:11<22:55:19, 3.22it/s] 29%|██▊ | 106096/371472 [8:26:11<22:51:45, 3.22it/s] 29%|██▊ | 106097/371472 [8:26:11<21:56:59, 3.36it/s] 29%|██▊ | 106098/371472 [8:26:12<21:36:22, 3.41it/s] 29%|██▊ | 106099/371472 [8:26:12<21:01:41, 3.51it/s] 29%|██▊ | 106100/371472 [8:26:12<20:50:59, 3.54it/s] {'loss': 3.3777, 'learning_rate': 7.432879398392553e-07, 'epoch': 4.57} + 29%|██▊ | 106100/371472 [8:26:12<20:50:59, 3.54it/s] 29%|██▊ | 106101/371472 [8:26:12<20:22:17, 3.62it/s] 29%|██▊ | 106102/371472 [8:26:13<20:36:19, 3.58it/s] 29%|██▊ | 106103/371472 [8:26:13<22:43:48, 3.24it/s] 29%|██▊ | 106104/371472 [8:26:13<22:54:20, 3.22it/s] 29%|██▊ | 106105/371472 [8:26:14<22:07:33, 3.33it/s] 29%|██▊ | 106106/371472 [8:26:14<22:26:45, 3.28it/s] 29%|██▊ | 106107/371472 [8:26:14<21:24:45, 3.44it/s] 29%|██▊ | 106108/371472 [8:26:15<21:20:28, 3.45it/s] 29%|██▊ | 106109/371472 [8:26:15<21:09:13, 3.48it/s] 29%|██▊ | 106110/371472 [8:26:15<20:16:48, 3.63it/s] 29%|██▊ | 106111/371472 [8:26:15<19:46:54, 3.73it/s] 29%|██▊ | 106112/371472 [8:26:16<19:52:04, 3.71it/s] 29%|██▊ | 106113/371472 [8:26:16<19:30:27, 3.78it/s] 29%|██▊ | 106114/371472 [8:26:16<20:05:50, 3.67it/s] 29%|██▊ | 106115/371472 [8:26:16<19:22:50, 3.80it/s] 29%|██▊ | 106116/371472 [8:26:17<21:09:20, 3.48it/s] 29%|██▊ | 106117/371472 [8:26:17<22:21:13, 3.30it/s] 29%|██▊ | 106118/371472 [8:26:17<23:22:24, 3.15it/s] 29%|██▊ | 106119/371472 [8:26:18<22:27:27, 3.28it/s] 29%|██▊ | 106120/371472 [8:26:18<21:54:47, 3.36it/s] {'loss': 3.4756, 'learning_rate': 7.432394578637764e-07, 'epoch': 4.57} + 29%|██▊ | 106120/371472 [8:26:18<21:54:47, 3.36it/s] 29%|██▊ | 106121/371472 [8:26:18<22:20:42, 3.30it/s] 29%|██▊ | 106122/371472 [8:26:19<21:43:36, 3.39it/s] 29%|██▊ | 106123/371472 [8:26:19<22:41:29, 3.25it/s] 29%|██▊ | 106124/371472 [8:26:19<22:09:15, 3.33it/s] 29%|██▊ | 106125/371472 [8:26:19<21:56:22, 3.36it/s] 29%|██▊ | 106126/371472 [8:26:20<21:56:40, 3.36it/s] 29%|██▊ | 106127/371472 [8:26:20<21:31:57, 3.42it/s] 29%|██▊ | 106128/371472 [8:26:20<21:03:57, 3.50it/s] 29%|██▊ | 106129/371472 [8:26:21<22:03:00, 3.34it/s] 29%|██▊ | 106130/371472 [8:26:21<20:38:34, 3.57it/s] 29%|██▊ | 106131/371472 [8:26:21<20:48:05, 3.54it/s] 29%|██▊ | 106132/371472 [8:26:21<21:04:47, 3.50it/s] 29%|██▊ | 106133/371472 [8:26:22<21:18:40, 3.46it/s] 29%|██▊ | 106134/371472 [8:26:22<21:32:29, 3.42it/s] 29%|██▊ | 106135/371472 [8:26:22<21:07:42, 3.49it/s] 29%|██▊ | 106136/371472 [8:26:23<23:03:54, 3.20it/s] 29%|██▊ | 106137/371472 [8:26:23<21:46:24, 3.39it/s] 29%|██▊ | 106138/371472 [8:26:23<20:58:55, 3.51it/s] 29%|██▊ | 106139/371472 [8:26:23<21:20:01, 3.45it/s] 29%|██▊ | 106140/371472 [8:26:24<22:02:44, 3.34it/s] {'loss': 3.4662, 'learning_rate': 7.431909758882974e-07, 'epoch': 4.57} + 29%|██▊ | 106140/371472 [8:26:24<22:02:44, 3.34it/s] 29%|██▊ | 106141/371472 [8:26:24<21:33:05, 3.42it/s] 29%|██▊ | 106142/371472 [8:26:24<20:43:10, 3.56it/s] 29%|██▊ | 106143/371472 [8:26:25<20:07:56, 3.66it/s] 29%|██▊ | 106144/371472 [8:26:25<20:20:48, 3.62it/s] 29%|██▊ | 106145/371472 [8:26:25<20:20:47, 3.62it/s] 29%|██▊ | 106146/371472 [8:26:26<22:05:11, 3.34it/s] 29%|██▊ | 106147/371472 [8:26:26<21:12:10, 3.48it/s] 29%|██▊ | 106148/371472 [8:26:26<20:58:50, 3.51it/s] 29%|██▊ | 106149/371472 [8:26:26<21:13:34, 3.47it/s] 29%|██▊ | 106150/371472 [8:26:27<20:42:54, 3.56it/s] 29%|██▊ | 106151/371472 [8:26:27<20:24:30, 3.61it/s] 29%|██▊ | 106152/371472 [8:26:27<20:01:19, 3.68it/s] 29%|██▊ | 106153/371472 [8:26:27<20:17:42, 3.63it/s] 29%|██▊ | 106154/371472 [8:26:28<20:20:21, 3.62it/s] 29%|██▊ | 106155/371472 [8:26:28<21:47:52, 3.38it/s] 29%|██▊ | 106156/371472 [8:26:28<23:02:21, 3.20it/s] 29%|██▊ | 106157/371472 [8:26:29<21:44:20, 3.39it/s] 29%|██▊ | 106158/371472 [8:26:29<21:21:06, 3.45it/s] 29%|██▊ | 106159/371472 [8:26:29<20:34:22, 3.58it/s] 29%|██▊ | 106160/371472 [8:26:29<20:26:45, 3.60it/s] {'loss': 3.4017, 'learning_rate': 7.431424939128185e-07, 'epoch': 4.57} + 29%|██▊ | 106160/371472 [8:26:29<20:26:45, 3.60it/s] 29%|██▊ | 106161/371472 [8:26:30<21:22:56, 3.45it/s] 29%|██▊ | 106162/371472 [8:26:30<22:14:21, 3.31it/s] 29%|██▊ | 106163/371472 [8:26:30<22:45:19, 3.24it/s] 29%|██▊ | 106164/371472 [8:26:31<22:17:26, 3.31it/s] 29%|██▊ | 106165/371472 [8:26:31<21:39:45, 3.40it/s] 29%|██▊ | 106166/371472 [8:26:31<22:05:53, 3.33it/s] 29%|██▊ | 106167/371472 [8:26:32<22:06:48, 3.33it/s] 29%|██▊ | 106168/371472 [8:26:32<21:50:25, 3.37it/s] 29%|██▊ | 106169/371472 [8:26:32<21:34:24, 3.42it/s] 29%|██▊ | 106170/371472 [8:26:32<21:25:49, 3.44it/s] 29%|██▊ | 106171/371472 [8:26:33<23:39:26, 3.12it/s] 29%|██▊ | 106172/371472 [8:26:33<22:44:42, 3.24it/s] 29%|██▊ | 106173/371472 [8:26:33<22:07:10, 3.33it/s] 29%|██▊ | 106174/371472 [8:26:34<22:04:39, 3.34it/s] 29%|██▊ | 106175/371472 [8:26:34<21:29:50, 3.43it/s] 29%|██▊ | 106176/371472 [8:26:34<22:47:47, 3.23it/s] 29%|██▊ | 106177/371472 [8:26:35<22:23:16, 3.29it/s] 29%|██▊ | 106178/371472 [8:26:35<22:21:24, 3.30it/s] 29%|██▊ | 106179/371472 [8:26:35<21:08:12, 3.49it/s] 29%|██▊ | 106180/371472 [8:26:35<20:04:00, 3.67it/s] {'loss': 3.2848, 'learning_rate': 7.430940119373397e-07, 'epoch': 4.57} + 29%|██▊ | 106180/371472 [8:26:35<20:04:00, 3.67it/s] 29%|██▊ | 106181/371472 [8:26:36<20:19:50, 3.62it/s] 29%|██▊ | 106182/371472 [8:26:36<20:22:42, 3.62it/s] 29%|██▊ | 106183/371472 [8:26:36<21:21:56, 3.45it/s] 29%|██▊ | 106184/371472 [8:26:37<21:47:16, 3.38it/s] 29%|██▊ | 106185/371472 [8:26:37<22:15:28, 3.31it/s] 29%|██▊ | 106186/371472 [8:26:37<21:14:47, 3.47it/s] 29%|██▊ | 106187/371472 [8:26:37<21:28:13, 3.43it/s] 29%|██▊ | 106188/371472 [8:26:38<20:29:09, 3.60it/s] 29%|██▊ | 106189/371472 [8:26:38<20:49:01, 3.54it/s] 29%|██▊ | 106190/371472 [8:26:38<21:26:38, 3.44it/s] 29%|██▊ | 106191/371472 [8:26:39<20:55:32, 3.52it/s] 29%|██▊ | 106192/371472 [8:26:39<21:01:20, 3.51it/s] 29%|██▊ | 106193/371472 [8:26:39<20:45:42, 3.55it/s] 29%|██▊ | 106194/371472 [8:26:39<20:27:23, 3.60it/s] 29%|██▊ | 106195/371472 [8:26:40<21:10:59, 3.48it/s] 29%|██▊ | 106196/371472 [8:26:40<20:21:11, 3.62it/s] 29%|██▊ | 106197/371472 [8:26:40<20:33:34, 3.58it/s] 29%|██▊ | 106198/371472 [8:26:41<20:08:17, 3.66it/s] 29%|██▊ | 106199/371472 [8:26:41<21:05:07, 3.49it/s] 29%|██▊ | 106200/371472 [8:26:41<20:20:28, 3.62it/s] {'loss': 3.2424, 'learning_rate': 7.430455299618609e-07, 'epoch': 4.57} + 29%|██▊ | 106200/371472 [8:26:41<20:20:28, 3.62it/s] 29%|██▊ | 106201/371472 [8:26:41<22:34:17, 3.26it/s] 29%|██▊ | 106202/371472 [8:26:42<21:39:06, 3.40it/s] 29%|██▊ | 106203/371472 [8:26:42<20:50:15, 3.54it/s] 29%|██▊ | 106204/371472 [8:26:42<20:34:49, 3.58it/s] 29%|██▊ | 106205/371472 [8:26:43<21:06:17, 3.49it/s] 29%|██▊ | 106206/371472 [8:26:43<20:35:24, 3.58it/s] 29%|██▊ | 106207/371472 [8:26:43<20:40:37, 3.56it/s] 29%|██▊ | 106208/371472 [8:26:43<19:59:49, 3.68it/s] 29%|██▊ | 106209/371472 [8:26:44<20:12:56, 3.64it/s] 29%|██▊ | 106210/371472 [8:26:44<20:54:49, 3.52it/s] 29%|██▊ | 106211/371472 [8:26:44<20:39:58, 3.57it/s] 29%|██▊ | 106212/371472 [8:26:44<20:18:03, 3.63it/s] 29%|██▊ | 106213/371472 [8:26:45<21:08:57, 3.48it/s] 29%|██▊ | 106214/371472 [8:26:45<21:30:47, 3.43it/s] 29%|██▊ | 106215/371472 [8:26:45<21:11:45, 3.48it/s] 29%|██▊ | 106216/371472 [8:26:46<22:40:37, 3.25it/s] 29%|██▊ | 106217/371472 [8:26:46<23:37:22, 3.12it/s] 29%|██▊ | 106218/371472 [8:26:47<27:41:49, 2.66it/s] 29%|██▊ | 106219/371472 [8:26:47<28:32:09, 2.58it/s] 29%|██▊ | 106220/371472 [8:26:47<26:15:33, 2.81it/s] {'loss': 3.3857, 'learning_rate': 7.429970479863819e-07, 'epoch': 4.58} + 29%|██▊ | 106220/371472 [8:26:47<26:15:33, 2.81it/s] 29%|██▊ | 106221/371472 [8:26:48<23:43:35, 3.11it/s] 29%|██▊ | 106222/371472 [8:26:48<24:51:37, 2.96it/s] 29%|██▊ | 106223/371472 [8:26:48<23:33:15, 3.13it/s] 29%|██▊ | 106224/371472 [8:26:48<22:34:21, 3.26it/s] 29%|██▊ | 106225/371472 [8:26:49<21:36:40, 3.41it/s] 29%|██▊ | 106226/371472 [8:26:49<22:09:22, 3.33it/s] 29%|██▊ | 106227/371472 [8:26:49<23:47:09, 3.10it/s] 29%|██▊ | 106228/371472 [8:26:50<22:50:23, 3.23it/s] 29%|██▊ | 106229/371472 [8:26:50<22:14:40, 3.31it/s] 29%|██▊ | 106230/371472 [8:26:50<22:17:12, 3.31it/s] 29%|██▊ | 106231/371472 [8:26:51<21:42:16, 3.39it/s] 29%|██▊ | 106232/371472 [8:26:51<21:06:24, 3.49it/s] 29%|██▊ | 106233/371472 [8:26:51<21:19:58, 3.45it/s] 29%|██▊ | 106234/371472 [8:26:51<21:27:12, 3.43it/s] 29%|██▊ | 106235/371472 [8:26:52<20:28:32, 3.60it/s] 29%|██▊ | 106236/371472 [8:26:52<20:22:36, 3.62it/s] 29%|██▊ | 106237/371472 [8:26:53<26:45:23, 2.75it/s] 29%|██▊ | 106238/371472 [8:26:53<24:50:29, 2.97it/s] 29%|██▊ | 106239/371472 [8:26:53<23:06:06, 3.19it/s] 29%|██▊ | 106240/371472 [8:26:53<21:53:29, 3.37it/s] {'loss': 3.5344, 'learning_rate': 7.42948566010903e-07, 'epoch': 4.58} + 29%|██▊ | 106240/371472 [8:26:53<21:53:29, 3.37it/s] 29%|██▊ | 106241/371472 [8:26:54<25:29:10, 2.89it/s] 29%|██▊ | 106242/371472 [8:26:54<25:07:22, 2.93it/s] 29%|██▊ | 106243/371472 [8:26:54<24:29:57, 3.01it/s] 29%|██▊ | 106244/371472 [8:26:55<22:56:31, 3.21it/s] 29%|██▊ | 106245/371472 [8:26:55<23:06:53, 3.19it/s] 29%|██▊ | 106246/371472 [8:26:55<23:22:58, 3.15it/s] 29%|██▊ | 106247/371472 [8:26:56<22:56:29, 3.21it/s] 29%|██▊ | 106248/371472 [8:26:56<22:46:38, 3.23it/s] 29%|██▊ | 106249/371472 [8:26:56<23:04:36, 3.19it/s] 29%|██▊ | 106250/371472 [8:26:57<23:01:39, 3.20it/s] 29%|██▊ | 106251/371472 [8:26:57<21:59:15, 3.35it/s] 29%|██▊ | 106252/371472 [8:26:57<22:28:06, 3.28it/s] 29%|██▊ | 106253/371472 [8:26:57<21:34:06, 3.42it/s] 29%|██▊ | 106254/371472 [8:26:58<21:07:50, 3.49it/s] 29%|██▊ | 106255/371472 [8:26:58<21:00:06, 3.51it/s] 29%|██▊ | 106256/371472 [8:26:58<20:42:10, 3.56it/s] 29%|██▊ | 106257/371472 [8:26:58<20:30:43, 3.59it/s] 29%|██▊ | 106258/371472 [8:26:59<21:26:00, 3.44it/s] 29%|██▊ | 106259/371472 [8:26:59<22:41:24, 3.25it/s] 29%|██▊ | 106260/371472 [8:26:59<21:51:12, 3.37it/s] {'loss': 3.4852, 'learning_rate': 7.429000840354241e-07, 'epoch': 4.58} + 29%|██▊ | 106260/371472 [8:26:59<21:51:12, 3.37it/s] 29%|██▊ | 106261/371472 [8:27:00<21:55:39, 3.36it/s] 29%|██▊ | 106262/371472 [8:27:00<21:31:51, 3.42it/s] 29%|██▊ | 106263/371472 [8:27:00<21:09:09, 3.48it/s] 29%|██▊ | 106264/371472 [8:27:01<21:46:16, 3.38it/s] 29%|██▊ | 106265/371472 [8:27:01<21:17:41, 3.46it/s] 29%|██▊ | 106266/371472 [8:27:01<20:44:40, 3.55it/s] 29%|██▊ | 106267/371472 [8:27:01<21:31:12, 3.42it/s] 29%|██▊ | 106268/371472 [8:27:02<21:06:38, 3.49it/s] 29%|██▊ | 106269/371472 [8:27:02<22:30:29, 3.27it/s] 29%|██▊ | 106270/371472 [8:27:02<21:42:13, 3.39it/s] 29%|██▊ | 106271/371472 [8:27:03<21:59:50, 3.35it/s] 29%|██▊ | 106272/371472 [8:27:03<21:09:24, 3.48it/s] 29%|██▊ | 106273/371472 [8:27:03<20:17:40, 3.63it/s] 29%|██▊ | 106274/371472 [8:27:03<20:11:18, 3.65it/s] 29%|██▊ | 106275/371472 [8:27:04<19:46:03, 3.73it/s] 29%|██▊ | 106276/371472 [8:27:04<21:07:59, 3.49it/s] 29%|██▊ | 106277/371472 [8:27:04<20:38:31, 3.57it/s] 29%|██▊ | 106278/371472 [8:27:05<21:34:14, 3.42it/s] 29%|██▊ | 106279/371472 [8:27:05<21:03:24, 3.50it/s] 29%|██▊ | 106280/371472 [8:27:05<21:46:52, 3.38it/s] {'loss': 3.2511, 'learning_rate': 7.428516020599453e-07, 'epoch': 4.58} + 29%|██▊ | 106280/371472 [8:27:05<21:46:52, 3.38it/s] 29%|██▊ | 106281/371472 [8:27:05<21:18:03, 3.46it/s] 29%|██▊ | 106282/371472 [8:27:06<20:44:39, 3.55it/s] 29%|██▊ | 106283/371472 [8:27:06<20:25:17, 3.61it/s] 29%|██▊ | 106284/371472 [8:27:06<20:04:59, 3.67it/s] 29%|██▊ | 106285/371472 [8:27:07<20:34:59, 3.58it/s] 29%|██▊ | 106286/371472 [8:27:07<21:04:14, 3.50it/s] 29%|██▊ | 106287/371472 [8:27:07<20:07:53, 3.66it/s] 29%|██▊ | 106288/371472 [8:27:07<21:22:02, 3.45it/s] 29%|██▊ | 106289/371472 [8:27:08<22:28:42, 3.28it/s] 29%|██▊ | 106290/371472 [8:27:08<21:44:37, 3.39it/s] 29%|██▊ | 106291/371472 [8:27:08<21:05:18, 3.49it/s] 29%|██▊ | 106292/371472 [8:27:09<21:48:29, 3.38it/s] 29%|██▊ | 106293/371472 [8:27:09<22:07:22, 3.33it/s] 29%|██▊ | 106294/371472 [8:27:09<22:05:56, 3.33it/s] 29%|██▊ | 106295/371472 [8:27:10<21:43:20, 3.39it/s] 29%|██▊ | 106296/371472 [8:27:10<20:54:58, 3.52it/s] 29%|██▊ | 106297/371472 [8:27:10<20:52:59, 3.53it/s] 29%|██▊ | 106298/371472 [8:27:10<21:20:51, 3.45it/s] 29%|██▊ | 106299/371472 [8:27:11<22:42:42, 3.24it/s] 29%|██▊ | 106300/371472 [8:27:11<22:51:46, 3.22it/s] {'loss': 3.3752, 'learning_rate': 7.428031200844663e-07, 'epoch': 4.58} + 29%|██▊ | 106300/371472 [8:27:11<22:51:46, 3.22it/s] 29%|██▊ | 106301/371472 [8:27:11<22:45:35, 3.24it/s] 29%|██▊ | 106302/371472 [8:27:12<22:13:58, 3.31it/s] 29%|██▊ | 106303/371472 [8:27:12<22:11:52, 3.32it/s] 29%|██▊ | 106304/371472 [8:27:12<22:36:51, 3.26it/s] 29%|██▊ | 106305/371472 [8:27:13<22:26:02, 3.28it/s] 29%|██▊ | 106306/371472 [8:27:13<21:34:32, 3.41it/s] 29%|██▊ | 106307/371472 [8:27:13<21:58:46, 3.35it/s] 29%|██▊ | 106308/371472 [8:27:13<22:28:34, 3.28it/s] 29%|██▊ | 106309/371472 [8:27:14<22:02:36, 3.34it/s] 29%|██▊ | 106310/371472 [8:27:14<22:03:45, 3.34it/s] 29%|██▊ | 106311/371472 [8:27:14<22:14:30, 3.31it/s] 29%|██▊ | 106312/371472 [8:27:15<21:09:40, 3.48it/s] 29%|██▊ | 106313/371472 [8:27:15<20:19:40, 3.62it/s] 29%|██▊ | 106314/371472 [8:27:15<20:09:41, 3.65it/s] 29%|██▊ | 106315/371472 [8:27:15<20:10:08, 3.65it/s] 29%|██▊ | 106316/371472 [8:27:16<19:54:29, 3.70it/s] 29%|██▊ | 106317/371472 [8:27:16<20:21:05, 3.62it/s] 29%|██▊ | 106318/371472 [8:27:16<19:49:28, 3.72it/s] 29%|██▊ | 106319/371472 [8:27:16<19:15:27, 3.82it/s] 29%|██▊ | 106320/371472 [8:27:17<20:16:34, 3.63it/s] {'loss': 3.5539, 'learning_rate': 7.427546381089874e-07, 'epoch': 4.58} + 29%|██▊ | 106320/371472 [8:27:17<20:16:34, 3.63it/s] 29%|██▊ | 106321/371472 [8:27:17<21:02:22, 3.50it/s] 29%|██▊ | 106322/371472 [8:27:17<22:02:39, 3.34it/s] 29%|██▊ | 106323/371472 [8:27:18<22:06:09, 3.33it/s] 29%|██▊ | 106324/371472 [8:27:18<21:36:44, 3.41it/s] 29%|██▊ | 106325/371472 [8:27:18<22:44:11, 3.24it/s] 29%|██▊ | 106326/371472 [8:27:19<26:13:31, 2.81it/s] 29%|██▊ | 106327/371472 [8:27:19<24:28:58, 3.01it/s] 29%|██▊ | 106328/371472 [8:27:19<22:46:53, 3.23it/s] 29%|██▊ | 106329/371472 [8:27:20<23:14:41, 3.17it/s] 29%|██▊ | 106330/371472 [8:27:20<22:20:17, 3.30it/s] 29%|██▊ | 106331/371472 [8:27:20<20:50:25, 3.53it/s] 29%|██▊ | 106332/371472 [8:27:20<20:09:22, 3.65it/s] 29%|██▊ | 106333/371472 [8:27:21<20:41:42, 3.56it/s] 29%|██▊ | 106334/371472 [8:27:21<21:48:16, 3.38it/s] 29%|██▊ | 106335/371472 [8:27:21<21:44:39, 3.39it/s] 29%|██▊ | 106336/371472 [8:27:22<21:43:20, 3.39it/s] 29%|██▊ | 106337/371472 [8:27:22<21:59:23, 3.35it/s] 29%|██▊ | 106338/371472 [8:27:22<21:10:28, 3.48it/s] 29%|██▊ | 106339/371472 [8:27:22<20:54:26, 3.52it/s] 29%|██▊ | 106340/371472 [8:27:23<21:27:27, 3.43it/s] {'loss': 3.4688, 'learning_rate': 7.427061561335086e-07, 'epoch': 4.58} + 29%|██▊ | 106340/371472 [8:27:23<21:27:27, 3.43it/s] 29%|██▊ | 106341/371472 [8:27:23<20:39:07, 3.57it/s] 29%|██▊ | 106342/371472 [8:27:23<20:32:34, 3.59it/s] 29%|██▊ | 106343/371472 [8:27:24<20:22:16, 3.62it/s] 29%|██▊ | 106344/371472 [8:27:24<20:33:44, 3.58it/s] 29%|██▊ | 106345/371472 [8:27:24<22:09:11, 3.32it/s] 29%|██▊ | 106346/371472 [8:27:24<21:19:29, 3.45it/s] 29%|██▊ | 106347/371472 [8:27:25<21:05:04, 3.49it/s] 29%|██▊ | 106348/371472 [8:27:25<20:02:26, 3.67it/s] 29%|██▊ | 106349/371472 [8:27:25<19:52:29, 3.71it/s] 29%|██▊ | 106350/371472 [8:27:26<19:30:45, 3.77it/s] 29%|██▊ | 106351/371472 [8:27:26<21:01:32, 3.50it/s] 29%|██▊ | 106352/371472 [8:27:26<21:08:11, 3.48it/s] 29%|██▊ | 106353/371472 [8:27:26<21:31:09, 3.42it/s] 29%|██▊ | 106354/371472 [8:27:27<21:04:32, 3.49it/s] 29%|██▊ | 106355/371472 [8:27:27<21:12:09, 3.47it/s] 29%|██▊ | 106356/371472 [8:27:27<20:52:41, 3.53it/s] 29%|██▊ | 106357/371472 [8:27:28<20:54:18, 3.52it/s] 29%|██▊ | 106358/371472 [8:27:28<25:05:15, 2.94it/s] 29%|██▊ | 106359/371472 [8:27:28<25:34:58, 2.88it/s] 29%|██▊ | 106360/371472 [8:27:29<24:07:12, 3.05it/s] {'loss': 3.4532, 'learning_rate': 7.426576741580298e-07, 'epoch': 4.58} + 29%|██▊ | 106360/371472 [8:27:29<24:07:12, 3.05it/s] 29%|██▊ | 106361/371472 [8:27:29<23:00:14, 3.20it/s] 29%|██▊ | 106362/371472 [8:27:29<21:57:00, 3.35it/s] 29%|██▊ | 106363/371472 [8:27:30<21:51:19, 3.37it/s] 29%|██▊ | 106364/371472 [8:27:30<21:02:51, 3.50it/s] 29%|██▊ | 106365/371472 [8:27:30<21:33:27, 3.42it/s] 29%|██▊ | 106366/371472 [8:27:30<22:31:32, 3.27it/s] 29%|██▊ | 106367/371472 [8:27:31<22:36:31, 3.26it/s] 29%|██▊ | 106368/371472 [8:27:31<21:24:40, 3.44it/s] 29%|██▊ | 106369/371472 [8:27:31<20:50:16, 3.53it/s] 29%|██▊ | 106370/371472 [8:27:31<20:12:44, 3.64it/s] 29%|██▊ | 106371/371472 [8:27:32<20:20:46, 3.62it/s] 29%|██▊ | 106372/371472 [8:27:32<22:14:34, 3.31it/s] 29%|██▊ | 106373/371472 [8:27:32<21:27:27, 3.43it/s] 29%|██▊ | 106374/371472 [8:27:33<22:12:43, 3.32it/s] 29%|██▊ | 106375/371472 [8:27:33<22:29:01, 3.28it/s] 29%|██▊ | 106376/371472 [8:27:33<22:25:59, 3.28it/s] 29%|██▊ | 106377/371472 [8:27:34<23:55:53, 3.08it/s] 29%|██▊ | 106378/371472 [8:27:34<23:46:51, 3.10it/s] 29%|██▊ | 106379/371472 [8:27:34<22:21:52, 3.29it/s] 29%|██▊ | 106380/371472 [8:27:35<21:48:16, 3.38it/s] {'loss': 3.3261, 'learning_rate': 7.426091921825507e-07, 'epoch': 4.58} + 29%|██▊ | 106380/371472 [8:27:35<21:48:16, 3.38it/s] 29%|██▊ | 106381/371472 [8:27:35<21:47:37, 3.38it/s] 29%|██▊ | 106382/371472 [8:27:35<20:48:57, 3.54it/s] 29%|██▊ | 106383/371472 [8:27:35<21:15:45, 3.46it/s] 29%|██▊ | 106384/371472 [8:27:36<21:09:24, 3.48it/s] 29%|██▊ | 106385/371472 [8:27:36<22:27:01, 3.28it/s] 29%|██▊ | 106386/371472 [8:27:36<23:23:06, 3.15it/s] 29%|██▊ | 106387/371472 [8:27:37<24:32:05, 3.00it/s] 29%|██▊ | 106388/371472 [8:27:37<23:14:23, 3.17it/s] 29%|██▊ | 106389/371472 [8:27:37<22:35:08, 3.26it/s] 29%|██▊ | 106390/371472 [8:27:38<21:17:57, 3.46it/s] 29%|██▊ | 106391/371472 [8:27:38<20:42:13, 3.56it/s] 29%|██▊ | 106392/371472 [8:27:38<20:44:49, 3.55it/s] 29%|██▊ | 106393/371472 [8:27:38<22:27:12, 3.28it/s] 29%|██▊ | 106394/371472 [8:27:39<22:02:08, 3.34it/s] 29%|██▊ | 106395/371472 [8:27:39<21:09:22, 3.48it/s] 29%|██▊ | 106396/371472 [8:27:39<20:26:04, 3.60it/s] 29%|██▊ | 106397/371472 [8:27:40<19:27:43, 3.78it/s] 29%|██▊ | 106398/371472 [8:27:40<19:38:00, 3.75it/s] 29%|██▊ | 106399/371472 [8:27:40<20:05:59, 3.66it/s] 29%|██▊ | 106400/371472 [8:27:40<20:02:22, 3.67it/s] {'loss': 3.3305, 'learning_rate': 7.425607102070718e-07, 'epoch': 4.58} + 29%|██▊ | 106400/371472 [8:27:40<20:02:22, 3.67it/s] 29%|██▊ | 106401/371472 [8:27:41<20:03:21, 3.67it/s] 29%|██▊ | 106402/371472 [8:27:41<20:53:26, 3.52it/s] 29%|██▊ | 106403/371472 [8:27:41<21:37:34, 3.40it/s] 29%|██▊ | 106404/371472 [8:27:42<22:08:14, 3.33it/s] 29%|██▊ | 106405/371472 [8:27:42<22:07:15, 3.33it/s] 29%|██▊ | 106406/371472 [8:27:42<21:29:35, 3.43it/s] 29%|██▊ | 106407/371472 [8:27:42<22:01:29, 3.34it/s] 29%|██▊ | 106408/371472 [8:27:43<22:29:46, 3.27it/s] 29%|██▊ | 106409/371472 [8:27:43<21:44:51, 3.39it/s] 29%|██▊ | 106410/371472 [8:27:43<20:59:45, 3.51it/s] 29%|██▊ | 106411/371472 [8:27:44<20:18:17, 3.63it/s] 29%|██▊ | 106412/371472 [8:27:44<20:01:55, 3.68it/s] 29%|██▊ | 106413/371472 [8:27:44<19:44:49, 3.73it/s] 29%|██▊ | 106414/371472 [8:27:44<20:37:25, 3.57it/s] 29%|██▊ | 106415/371472 [8:27:45<20:24:22, 3.61it/s] 29%|██▊ | 106416/371472 [8:27:45<21:08:46, 3.48it/s] 29%|██▊ | 106417/371472 [8:27:45<20:38:03, 3.57it/s] 29%|██▊ | 106418/371472 [8:27:46<20:23:45, 3.61it/s] 29%|██▊ | 106419/371472 [8:27:46<20:31:00, 3.59it/s] 29%|██▊ | 106420/371472 [8:27:46<20:15:28, 3.63it/s] {'loss': 3.6248, 'learning_rate': 7.42512228231593e-07, 'epoch': 4.58} + 29%|██▊ | 106420/371472 [8:27:46<20:15:28, 3.63it/s] 29%|██▊ | 106421/371472 [8:27:46<20:01:23, 3.68it/s] 29%|██▊ | 106422/371472 [8:27:47<19:59:56, 3.68it/s] 29%|██▊ | 106423/371472 [8:27:47<19:40:09, 3.74it/s] 29%|██▊ | 106424/371472 [8:27:47<19:43:10, 3.73it/s] 29%|██▊ | 106425/371472 [8:27:47<19:51:17, 3.71it/s] 29%|██▊ | 106426/371472 [8:27:48<20:08:21, 3.66it/s] 29%|██▊ | 106427/371472 [8:27:48<20:19:58, 3.62it/s] 29%|██▊ | 106428/371472 [8:27:48<20:06:39, 3.66it/s] 29%|██▊ | 106429/371472 [8:27:49<20:34:29, 3.58it/s] 29%|██▊ | 106430/371472 [8:27:49<21:09:28, 3.48it/s] 29%|██▊ | 106431/371472 [8:27:49<22:48:41, 3.23it/s] 29%|██▊ | 106432/371472 [8:27:49<22:43:19, 3.24it/s] 29%|██▊ | 106433/371472 [8:27:50<22:36:33, 3.26it/s] 29%|██▊ | 106434/371472 [8:27:50<21:49:08, 3.37it/s] 29%|██▊ | 106435/371472 [8:27:50<23:31:38, 3.13it/s] 29%|██▊ | 106436/371472 [8:27:51<22:39:14, 3.25it/s] 29%|██▊ | 106437/371472 [8:27:51<22:12:39, 3.31it/s] 29%|██▊ | 106438/371472 [8:27:51<22:03:07, 3.34it/s] 29%|██▊ | 106439/371472 [8:27:52<21:58:07, 3.35it/s] 29%|██▊ | 106440/371472 [8:27:52<22:38:17, 3.25it/s] {'loss': 3.5392, 'learning_rate': 7.424637462561142e-07, 'epoch': 4.58} + 29%|██▊ | 106440/371472 [8:27:52<22:38:17, 3.25it/s] 29%|██▊ | 106441/371472 [8:27:52<22:07:39, 3.33it/s] 29%|██▊ | 106442/371472 [8:27:53<22:38:39, 3.25it/s] 29%|██▊ | 106443/371472 [8:27:53<22:27:40, 3.28it/s] 29%|██▊ | 106444/371472 [8:27:53<22:23:32, 3.29it/s] 29%|██▊ | 106445/371472 [8:27:53<21:49:14, 3.37it/s] 29%|██▊ | 106446/371472 [8:27:54<21:46:21, 3.38it/s] 29%|██▊ | 106447/371472 [8:27:54<20:53:39, 3.52it/s] 29%|██▊ | 106448/371472 [8:27:54<20:20:24, 3.62it/s] 29%|██▊ | 106449/371472 [8:27:54<19:57:28, 3.69it/s] 29%|██▊ | 106450/371472 [8:27:55<19:52:08, 3.71it/s] 29%|██▊ | 106451/371472 [8:27:55<20:07:15, 3.66it/s] 29%|██▊ | 106452/371472 [8:27:55<21:09:06, 3.48it/s] 29%|██▊ | 106453/371472 [8:27:56<20:20:47, 3.62it/s] 29%|██▊ | 106454/371472 [8:27:56<20:01:53, 3.67it/s] 29%|██▊ | 106455/371472 [8:27:56<20:39:32, 3.56it/s] 29%|██▊ | 106456/371472 [8:27:56<21:31:27, 3.42it/s] 29%|██▊ | 106457/371472 [8:27:57<20:49:07, 3.54it/s] 29%|██▊ | 106458/371472 [8:27:57<20:58:42, 3.51it/s] 29%|██▊ | 106459/371472 [8:27:57<21:07:55, 3.48it/s] 29%|██▊ | 106460/371472 [8:27:58<22:37:54, 3.25it/s] {'loss': 3.1391, 'learning_rate': 7.424152642806353e-07, 'epoch': 4.59} + 29%|██▊ | 106460/371472 [8:27:58<22:37:54, 3.25it/s] 29%|██▊ | 106461/371472 [8:27:58<21:54:31, 3.36it/s] 29%|██▊ | 106462/371472 [8:27:58<21:30:41, 3.42it/s] 29%|██▊ | 106463/371472 [8:27:59<21:38:33, 3.40it/s] 29%|██▊ | 106464/371472 [8:27:59<21:20:29, 3.45it/s] 29%|██▊ | 106465/371472 [8:27:59<21:55:12, 3.36it/s] 29%|██▊ | 106466/371472 [8:27:59<22:04:31, 3.33it/s] 29%|██▊ | 106467/371472 [8:28:00<21:29:21, 3.43it/s] 29%|██▊ | 106468/371472 [8:28:00<22:14:17, 3.31it/s] 29%|██▊ | 106469/371472 [8:28:00<22:23:09, 3.29it/s] 29%|██▊ | 106470/371472 [8:28:01<21:23:23, 3.44it/s] 29%|██▊ | 106471/371472 [8:28:01<21:37:39, 3.40it/s] 29%|██▊ | 106472/371472 [8:28:01<22:02:09, 3.34it/s] 29%|██▊ | 106473/371472 [8:28:02<22:03:47, 3.34it/s] 29%|██▊ | 106474/371472 [8:28:02<21:08:04, 3.48it/s] 29%|██▊ | 106475/371472 [8:28:02<20:19:45, 3.62it/s] 29%|██▊ | 106476/371472 [8:28:02<20:39:28, 3.56it/s] 29%|██▊ | 106477/371472 [8:28:03<20:22:49, 3.61it/s] 29%|██▊ | 106478/371472 [8:28:03<19:24:07, 3.79it/s] 29%|██▊ | 106479/371472 [8:28:03<22:07:43, 3.33it/s] 29%|██▊ | 106480/371472 [8:28:03<20:42:45, 3.55it/s] {'loss': 3.2933, 'learning_rate': 7.423667823051563e-07, 'epoch': 4.59} + 29%|██▊ | 106480/371472 [8:28:03<20:42:45, 3.55it/s] 29%|██▊ | 106481/371472 [8:28:04<20:12:51, 3.64it/s] 29%|██▊ | 106482/371472 [8:28:04<20:44:49, 3.55it/s] 29%|██▊ | 106483/371472 [8:28:04<22:44:52, 3.24it/s] 29%|██▊ | 106484/371472 [8:28:05<21:51:32, 3.37it/s] 29%|██▊ | 106485/371472 [8:28:05<21:46:56, 3.38it/s] 29%|██▊ | 106486/371472 [8:28:05<21:40:39, 3.40it/s] 29%|██▊ | 106487/371472 [8:28:05<20:49:05, 3.54it/s] 29%|██▊ | 106488/371472 [8:28:06<21:01:38, 3.50it/s] 29%|██▊ | 106489/371472 [8:28:06<21:36:06, 3.41it/s] 29%|██▊ | 106490/371472 [8:28:06<20:39:50, 3.56it/s] 29%|██▊ | 106491/371472 [8:28:07<21:28:47, 3.43it/s] 29%|██▊ | 106492/371472 [8:28:07<21:39:29, 3.40it/s] 29%|██▊ | 106493/371472 [8:28:07<21:22:40, 3.44it/s] 29%|██▊ | 106494/371472 [8:28:08<22:22:46, 3.29it/s] 29%|██▊ | 106495/371472 [8:28:08<21:53:27, 3.36it/s] 29%|██▊ | 106496/371472 [8:28:08<21:58:26, 3.35it/s] 29%|██▊ | 106497/371472 [8:28:08<20:58:14, 3.51it/s] 29%|██▊ | 106498/371472 [8:28:09<22:38:41, 3.25it/s] 29%|██▊ | 106499/371472 [8:28:09<22:21:46, 3.29it/s] 29%|██▊ | 106500/371472 [8:28:09<21:26:14, 3.43it/s] {'loss': 3.3931, 'learning_rate': 7.423183003296774e-07, 'epoch': 4.59} + 29%|██▊ | 106500/371472 [8:28:09<21:26:14, 3.43it/s] 29%|██▊ | 106501/371472 [8:28:10<22:28:56, 3.27it/s] 29%|██▊ | 106502/371472 [8:28:10<21:48:34, 3.37it/s] 29%|██▊ | 106503/371472 [8:28:10<20:41:40, 3.56it/s] 29%|██▊ | 106504/371472 [8:28:10<20:12:01, 3.64it/s] 29%|██▊ | 106505/371472 [8:28:11<21:12:21, 3.47it/s] 29%|██▊ | 106506/371472 [8:28:11<20:47:53, 3.54it/s] 29%|██▊ | 106507/371472 [8:28:11<20:09:28, 3.65it/s] 29%|██▊ | 106508/371472 [8:28:12<20:32:25, 3.58it/s] 29%|██▊ | 106509/371472 [8:28:12<20:38:56, 3.56it/s] 29%|██▊ | 106510/371472 [8:28:12<22:15:48, 3.31it/s] 29%|██▊ | 106511/371472 [8:28:13<22:02:18, 3.34it/s] 29%|██▊ | 106512/371472 [8:28:13<21:33:44, 3.41it/s] 29%|██▊ | 106513/371472 [8:28:13<22:01:12, 3.34it/s] 29%|██▊ | 106514/371472 [8:28:13<23:48:43, 3.09it/s] 29%|██▊ | 106515/371472 [8:28:14<23:38:24, 3.11it/s] 29%|██▊ | 106516/371472 [8:28:14<24:44:00, 2.98it/s] 29%|██▊ | 106517/371472 [8:28:14<23:19:47, 3.15it/s] 29%|██▊ | 106518/371472 [8:28:15<22:43:35, 3.24it/s] 29%|██▊ | 106519/371472 [8:28:15<22:00:05, 3.35it/s] 29%|██▊ | 106520/371472 [8:28:15<21:47:23, 3.38it/s] {'loss': 3.3831, 'learning_rate': 7.422698183541985e-07, 'epoch': 4.59} + 29%|██▊ | 106520/371472 [8:28:15<21:47:23, 3.38it/s] 29%|██▊ | 106521/371472 [8:28:16<21:02:01, 3.50it/s] 29%|██▊ | 106522/371472 [8:28:16<20:42:42, 3.55it/s] 29%|██▊ | 106523/371472 [8:28:16<20:45:46, 3.54it/s] 29%|██▊ | 106524/371472 [8:28:16<20:20:27, 3.62it/s] 29%|██▊ | 106525/371472 [8:28:17<20:56:44, 3.51it/s] 29%|██▊ | 106526/371472 [8:28:17<20:38:12, 3.57it/s] 29%|██▊ | 106527/371472 [8:28:17<21:07:56, 3.48it/s] 29%|██▊ | 106528/371472 [8:28:18<20:36:14, 3.57it/s] 29%|██▊ | 106529/371472 [8:28:18<20:58:00, 3.51it/s] 29%|██▊ | 106530/371472 [8:28:18<21:01:22, 3.50it/s] 29%|██▊ | 106531/371472 [8:28:18<21:14:56, 3.46it/s] 29%|██▊ | 106532/371472 [8:28:19<21:03:26, 3.49it/s] 29%|██▊ | 106533/371472 [8:28:19<20:32:47, 3.58it/s] 29%|██▊ | 106534/371472 [8:28:19<20:27:51, 3.60it/s] 29%|██▊ | 106535/371472 [8:28:19<19:56:30, 3.69it/s] 29%|██▊ | 106536/371472 [8:28:20<19:56:51, 3.69it/s] 29%|██▊ | 106537/371472 [8:28:20<20:18:57, 3.62it/s] 29%|██▊ | 106538/371472 [8:28:20<20:03:44, 3.67it/s] 29%|██▊ | 106539/371472 [8:28:21<19:34:24, 3.76it/s] 29%|██▊ | 106540/371472 [8:28:21<22:43:37, 3.24it/s] {'loss': 3.3134, 'learning_rate': 7.422213363787196e-07, 'epoch': 4.59} + 29%|██▊ | 106540/371472 [8:28:21<22:43:37, 3.24it/s] 29%|██▊ | 106541/371472 [8:28:21<22:22:30, 3.29it/s] 29%|██▊ | 106542/371472 [8:28:22<21:58:40, 3.35it/s] 29%|██▊ | 106543/371472 [8:28:22<22:13:01, 3.31it/s] 29%|██▊ | 106544/371472 [8:28:22<21:21:21, 3.45it/s] 29%|██▊ | 106545/371472 [8:28:22<23:34:52, 3.12it/s] 29%|██▊ | 106546/371472 [8:28:23<22:45:38, 3.23it/s] 29%|██▊ | 106547/371472 [8:28:23<21:59:09, 3.35it/s] 29%|██▊ | 106548/371472 [8:28:23<21:35:59, 3.41it/s] 29%|██▊ | 106549/371472 [8:28:24<21:35:31, 3.41it/s] 29%|██▊ | 106550/371472 [8:28:24<21:50:36, 3.37it/s] 29%|██▊ | 106551/371472 [8:28:24<22:02:42, 3.34it/s] 29%|██▊ | 106552/371472 [8:28:25<23:20:52, 3.15it/s] 29%|██▊ | 106553/371472 [8:28:25<24:11:20, 3.04it/s] 29%|██▊ | 106554/371472 [8:28:25<24:44:23, 2.97it/s] 29%|██▊ | 106555/371472 [8:28:26<23:12:40, 3.17it/s] 29%|██▊ | 106556/371472 [8:28:26<22:25:35, 3.28it/s] 29%|██▊ | 106557/371472 [8:28:26<23:13:50, 3.17it/s] 29%|██▊ | 106558/371472 [8:28:26<22:24:54, 3.28it/s] 29%|██▊ | 106559/371472 [8:28:27<22:27:57, 3.28it/s] 29%|██▊ | 106560/371472 [8:28:27<22:07:49, 3.33it/s] {'loss': 3.3329, 'learning_rate': 7.421728544032407e-07, 'epoch': 4.59} + 29%|██▊ | 106560/371472 [8:28:27<22:07:49, 3.33it/s] 29%|██▊ | 106561/371472 [8:28:27<23:29:35, 3.13it/s] 29%|██▊ | 106562/371472 [8:28:28<22:29:13, 3.27it/s] 29%|██▊ | 106563/371472 [8:28:28<21:22:15, 3.44it/s] 29%|██▊ | 106564/371472 [8:28:28<21:54:57, 3.36it/s] 29%|██▊ | 106565/371472 [8:28:29<21:31:46, 3.42it/s] 29%|██▊ | 106566/371472 [8:28:29<21:08:20, 3.48it/s] 29%|██▊ | 106567/371472 [8:28:29<20:43:06, 3.55it/s] 29%|██▊ | 106568/371472 [8:28:29<20:12:39, 3.64it/s] 29%|██▊ | 106569/371472 [8:28:30<20:14:20, 3.64it/s] 29%|██▊ | 106570/371472 [8:28:30<21:13:27, 3.47it/s] 29%|██▊ | 106571/371472 [8:28:30<20:33:59, 3.58it/s] 29%|██▊ | 106572/371472 [8:28:30<20:10:26, 3.65it/s] 29%|██▊ | 106573/371472 [8:28:31<19:50:45, 3.71it/s] 29%|██▊ | 106574/371472 [8:28:31<19:13:55, 3.83it/s] 29%|██▊ | 106575/371472 [8:28:31<21:58:18, 3.35it/s] 29%|██▊ | 106576/371472 [8:28:32<20:51:35, 3.53it/s] 29%|██▊ | 106577/371472 [8:28:32<20:47:07, 3.54it/s] 29%|██▊ | 106578/371472 [8:28:32<21:06:43, 3.49it/s] 29%|██▊ | 106579/371472 [8:28:32<21:20:20, 3.45it/s] 29%|██▊ | 106580/371472 [8:28:33<21:35:44, 3.41it/s] {'loss': 3.6775, 'learning_rate': 7.421243724277619e-07, 'epoch': 4.59} + 29%|██▊ | 106580/371472 [8:28:33<21:35:44, 3.41it/s] 29%|██▊ | 106581/371472 [8:28:33<21:18:09, 3.45it/s] 29%|██▊ | 106582/371472 [8:28:33<20:29:26, 3.59it/s] 29%|██▊ | 106583/371472 [8:28:34<20:11:51, 3.64it/s] 29%|██▊ | 106584/371472 [8:28:34<19:56:42, 3.69it/s] 29%|██▊ | 106585/371472 [8:28:34<20:14:52, 3.63it/s] 29%|██▊ | 106586/371472 [8:28:34<20:17:50, 3.63it/s] 29%|██▊ | 106587/371472 [8:28:35<20:58:04, 3.51it/s] 29%|██▊ | 106588/371472 [8:28:35<21:13:42, 3.47it/s] 29%|██▊ | 106589/371472 [8:28:35<20:57:59, 3.51it/s] 29%|██▊ | 106590/371472 [8:28:36<21:30:33, 3.42it/s] 29%|██▊ | 106591/371472 [8:28:36<21:06:24, 3.49it/s] 29%|██▊ | 106592/371472 [8:28:36<21:14:24, 3.46it/s] 29%|██▊ | 106593/371472 [8:28:37<22:39:34, 3.25it/s] 29%|██▊ | 106594/371472 [8:28:37<21:35:14, 3.41it/s] 29%|██▊ | 106595/371472 [8:28:37<20:58:50, 3.51it/s] 29%|██▊ | 106596/371472 [8:28:37<22:12:06, 3.31it/s] 29%|██▊ | 106597/371472 [8:28:38<22:13:26, 3.31it/s] 29%|██▊ | 106598/371472 [8:28:38<21:21:55, 3.44it/s] 29%|██▊ | 106599/371472 [8:28:38<21:37:33, 3.40it/s] 29%|██▊ | 106600/371472 [8:28:39<21:24:10, 3.44it/s] {'loss': 3.4037, 'learning_rate': 7.420758904522829e-07, 'epoch': 4.59} + 29%|██▊ | 106600/371472 [8:28:39<21:24:10, 3.44it/s] 29%|██▊ | 106601/371472 [8:28:39<21:00:56, 3.50it/s] 29%|██▊ | 106602/371472 [8:28:39<21:03:59, 3.49it/s] 29%|██▊ | 106603/371472 [8:28:39<22:01:26, 3.34it/s] 29%|██▊ | 106604/371472 [8:28:40<20:54:01, 3.52it/s] 29%|██▊ | 106605/371472 [8:28:40<20:24:49, 3.60it/s] 29%|██▊ | 106606/371472 [8:28:40<21:12:38, 3.47it/s] 29%|██▊ | 106607/371472 [8:28:41<20:33:27, 3.58it/s] 29%|██▊ | 106608/371472 [8:28:41<20:31:09, 3.59it/s] 29%|██▊ | 106609/371472 [8:28:41<20:13:59, 3.64it/s] 29%|██▊ | 106610/371472 [8:28:41<20:40:42, 3.56it/s] 29%|██▊ | 106611/371472 [8:28:42<20:10:28, 3.65it/s] 29%|██▊ | 106612/371472 [8:28:42<20:22:39, 3.61it/s] 29%|██▊ | 106613/371472 [8:28:42<20:44:20, 3.55it/s] 29%|██▊ | 106614/371472 [8:28:42<20:04:36, 3.66it/s] 29%|██▊ | 106615/371472 [8:28:43<19:55:51, 3.69it/s] 29%|██▊ | 106616/371472 [8:28:43<20:38:39, 3.56it/s] 29%|██▊ | 106617/371472 [8:28:43<20:10:00, 3.65it/s] 29%|██▊ | 106618/371472 [8:28:44<19:41:11, 3.74it/s] 29%|█��▊ | 106619/371472 [8:28:44<19:13:19, 3.83it/s] 29%|██▊ | 106620/371472 [8:28:44<18:50:31, 3.90it/s] {'loss': 3.4664, 'learning_rate': 7.42027408476804e-07, 'epoch': 4.59} + 29%|██▊ | 106620/371472 [8:28:44<18:50:31, 3.90it/s] 29%|██▊ | 106621/371472 [8:28:44<19:26:36, 3.78it/s] 29%|██▊ | 106622/371472 [8:28:45<19:58:24, 3.68it/s] 29%|██▊ | 106623/371472 [8:28:45<20:24:21, 3.61it/s] 29%|██▊ | 106624/371472 [8:28:45<20:17:46, 3.62it/s] 29%|██▊ | 106625/371472 [8:28:45<19:56:25, 3.69it/s] 29%|██▊ | 106626/371472 [8:28:46<19:41:47, 3.74it/s] 29%|██▊ | 106627/371472 [8:28:46<19:47:52, 3.72it/s] 29%|██▊ | 106628/371472 [8:28:46<20:30:42, 3.59it/s] 29%|██▊ | 106629/371472 [8:28:46<19:59:30, 3.68it/s] 29%|██▊ | 106630/371472 [8:28:47<20:34:24, 3.58it/s] 29%|██▊ | 106631/371472 [8:28:47<20:48:02, 3.54it/s] 29%|██▊ | 106632/371472 [8:28:47<20:37:43, 3.57it/s] 29%|██▊ | 106633/371472 [8:28:48<20:17:21, 3.63it/s] 29%|██▊ | 106634/371472 [8:28:48<20:04:05, 3.67it/s] 29%|██▊ | 106635/371472 [8:28:48<21:48:19, 3.37it/s] 29%|██▊ | 106636/371472 [8:28:48<21:04:20, 3.49it/s] 29%|██▊ | 106637/371472 [8:28:49<21:08:02, 3.48it/s] 29%|██▊ | 106638/371472 [8:28:49<21:26:29, 3.43it/s] 29%|██▊ | 106639/371472 [8:28:49<20:55:40, 3.52it/s] 29%|██▊ | 106640/371472 [8:28:50<21:17:00, 3.46it/s] {'loss': 3.4854, 'learning_rate': 7.419789265013251e-07, 'epoch': 4.59} + 29%|██▊ | 106640/371472 [8:28:50<21:17:00, 3.46it/s] 29%|██▊ | 106641/371472 [8:28:50<23:49:28, 3.09it/s] 29%|██▊ | 106642/371472 [8:28:50<22:45:03, 3.23it/s] 29%|██▊ | 106643/371472 [8:28:51<21:26:12, 3.43it/s] 29%|██▊ | 106644/371472 [8:28:51<22:06:45, 3.33it/s] 29%|██▊ | 106645/371472 [8:28:51<23:01:32, 3.19it/s] 29%|██▊ | 106646/371472 [8:28:52<22:31:51, 3.26it/s] 29%|██▊ | 106647/371472 [8:28:52<21:18:59, 3.45it/s] 29%|██▊ | 106648/371472 [8:28:52<20:48:25, 3.54it/s] 29%|██▊ | 106649/371472 [8:28:52<21:14:51, 3.46it/s] 29%|██▊ | 106650/371472 [8:28:53<20:15:42, 3.63it/s] 29%|██▊ | 106651/371472 [8:28:53<20:12:59, 3.64it/s] 29%|██▊ | 106652/371472 [8:28:53<20:34:44, 3.57it/s] 29%|██▊ | 106653/371472 [8:28:54<22:49:59, 3.22it/s] 29%|██▊ | 106654/371472 [8:28:54<22:06:11, 3.33it/s] 29%|██▊ | 106655/371472 [8:28:54<20:43:41, 3.55it/s] 29%|██▊ | 106656/371472 [8:28:54<20:13:03, 3.64it/s] 29%|██▊ | 106657/371472 [8:28:55<19:46:24, 3.72it/s] 29%|██▊ | 106658/371472 [8:28:55<20:46:21, 3.54it/s] 29%|██▊ | 106659/371472 [8:28:55<21:38:04, 3.40it/s] 29%|██▊ | 106660/371472 [8:28:55<21:22:53, 3.44it/s] {'loss': 3.4455, 'learning_rate': 7.419304445258463e-07, 'epoch': 4.59} + 29%|██▊ | 106660/371472 [8:28:55<21:22:53, 3.44it/s] 29%|██▊ | 106661/371472 [8:28:56<20:24:10, 3.61it/s] 29%|██▊ | 106662/371472 [8:28:56<20:34:48, 3.57it/s] 29%|██▊ | 106663/371472 [8:28:56<21:11:55, 3.47it/s] 29%|██▊ | 106664/371472 [8:28:57<21:08:38, 3.48it/s] 29%|██▊ | 106665/371472 [8:28:57<21:20:43, 3.45it/s] 29%|██▊ | 106666/371472 [8:28:57<21:54:07, 3.36it/s] 29%|██▊ | 106667/371472 [8:28:58<21:52:40, 3.36it/s] 29%|██▊ | 106668/371472 [8:28:58<21:19:28, 3.45it/s] 29%|██▊ | 106669/371472 [8:28:58<21:06:59, 3.48it/s] 29%|██▊ | 106670/371472 [8:28:58<22:26:33, 3.28it/s] 29%|██▊ | 106671/371472 [8:28:59<21:26:02, 3.43it/s] 29%|██▊ | 106672/371472 [8:28:59<21:16:12, 3.46it/s] 29%|██▊ | 106673/371472 [8:28:59<21:14:19, 3.46it/s] 29%|██▊ | 106674/371472 [8:29:00<21:28:31, 3.43it/s] 29%|██▊ | 106675/371472 [8:29:00<23:00:04, 3.20it/s] 29%|██▊ | 106676/371472 [8:29:00<22:04:15, 3.33it/s] 29%|██▊ | 106677/371472 [8:29:00<21:58:53, 3.35it/s] 29%|██▊ | 106678/371472 [8:29:01<21:13:17, 3.47it/s] 29%|██▊ | 106679/371472 [8:29:01<20:10:34, 3.65it/s] 29%|██▊ | 106680/371472 [8:29:01<19:49:39, 3.71it/s] {'loss': 3.2571, 'learning_rate': 7.418819625503673e-07, 'epoch': 4.59} + 29%|██▊ | 106680/371472 [8:29:01<19:49:39, 3.71it/s] 29%|██▊ | 106681/371472 [8:29:02<19:48:51, 3.71it/s] 29%|██▊ | 106682/371472 [8:29:02<20:15:54, 3.63it/s] 29%|██▊ | 106683/371472 [8:29:02<19:43:19, 3.73it/s] 29%|██▊ | 106684/371472 [8:29:02<19:35:54, 3.75it/s] 29%|██▊ | 106685/371472 [8:29:03<20:17:14, 3.63it/s] 29%|██▊ | 106686/371472 [8:29:03<20:37:43, 3.57it/s] 29%|██▊ | 106687/371472 [8:29:03<20:00:03, 3.68it/s] 29%|██▊ | 106688/371472 [8:29:03<19:46:40, 3.72it/s] 29%|██▊ | 106689/371472 [8:29:04<19:48:46, 3.71it/s] 29%|██▊ | 106690/371472 [8:29:04<20:09:38, 3.65it/s] 29%|██▊ | 106691/371472 [8:29:04<20:25:50, 3.60it/s] 29%|██▊ | 106692/371472 [8:29:05<20:46:55, 3.54it/s] 29%|██▊ | 106693/371472 [8:29:05<19:56:30, 3.69it/s] 29%|██▊ | 106694/371472 [8:29:05<19:27:22, 3.78it/s] 29%|██▊ | 106695/371472 [8:29:05<19:01:44, 3.87it/s] 29%|██▊ | 106696/371472 [8:29:06<19:09:18, 3.84it/s] 29%|██▊ | 106697/371472 [8:29:06<19:03:15, 3.86it/s] 29%|██▊ | 106698/371472 [8:29:06<20:07:05, 3.66it/s] 29%|██▊ | 106699/371472 [8:29:06<20:03:29, 3.67it/s] 29%|██▊ | 106700/371472 [8:29:07<19:59:40, 3.68it/s] {'loss': 3.3615, 'learning_rate': 7.418334805748884e-07, 'epoch': 4.6} + 29%|██▊ | 106700/371472 [8:29:07<19:59:40, 3.68it/s] 29%|██▊ | 106701/371472 [8:29:07<19:46:21, 3.72it/s] 29%|██▊ | 106702/371472 [8:29:07<19:45:01, 3.72it/s] 29%|██▊ | 106703/371472 [8:29:07<19:21:20, 3.80it/s] 29%|██▊ | 106704/371472 [8:29:08<19:34:50, 3.76it/s] 29%|██▊ | 106705/371472 [8:29:08<19:24:34, 3.79it/s] 29%|██▊ | 106706/371472 [8:29:08<19:37:03, 3.75it/s] 29%|██▊ | 106707/371472 [8:29:09<20:00:55, 3.67it/s] 29%|██▊ | 106708/371472 [8:29:09<21:39:37, 3.40it/s] 29%|██▊ | 106709/371472 [8:29:09<21:14:10, 3.46it/s] 29%|██▊ | 106710/371472 [8:29:10<22:27:50, 3.27it/s] 29%|██▊ | 106711/371472 [8:29:10<21:26:57, 3.43it/s] 29%|██▊ | 106712/371472 [8:29:10<20:48:01, 3.54it/s] 29%|██▊ | 106713/371472 [8:29:10<20:34:54, 3.57it/s] 29%|██▊ | 106714/371472 [8:29:11<20:44:25, 3.55it/s] 29%|██▊ | 106715/371472 [8:29:11<20:42:27, 3.55it/s] 29%|██▊ | 106716/371472 [8:29:11<21:46:21, 3.38it/s] 29%|██▊ | 106717/371472 [8:29:12<22:32:48, 3.26it/s] 29%|██▊ | 106718/371472 [8:29:12<22:25:30, 3.28it/s] 29%|██▊ | 106719/371472 [8:29:12<22:24:02, 3.28it/s] 29%|██▊ | 106720/371472 [8:29:12<21:52:04, 3.36it/s] {'loss': 3.5081, 'learning_rate': 7.417849985994096e-07, 'epoch': 4.6} + 29%|██▊ | 106720/371472 [8:29:12<21:52:04, 3.36it/s] 29%|██▊ | 106721/371472 [8:29:13<21:08:43, 3.48it/s] 29%|██▊ | 106722/371472 [8:29:13<20:56:27, 3.51it/s] 29%|██▊ | 106723/371472 [8:29:13<20:50:12, 3.53it/s] 29%|██▊ | 106724/371472 [8:29:14<21:00:06, 3.50it/s] 29%|██▊ | 106725/371472 [8:29:14<20:17:37, 3.62it/s] 29%|██▊ | 106726/371472 [8:29:14<19:36:32, 3.75it/s] 29%|██▊ | 106727/371472 [8:29:14<20:06:42, 3.66it/s] 29%|██▊ | 106728/371472 [8:29:15<20:07:05, 3.66it/s] 29%|██▊ | 106729/371472 [8:29:15<20:10:44, 3.64it/s] 29%|██▊ | 106730/371472 [8:29:15<19:41:48, 3.73it/s] 29%|██▊ | 106731/371472 [8:29:15<20:37:39, 3.57it/s] 29%|██▊ | 106732/371472 [8:29:16<20:43:50, 3.55it/s] 29%|██▊ | 106733/371472 [8:29:16<21:46:55, 3.38it/s] 29%|██▊ | 106734/371472 [8:29:16<21:09:35, 3.48it/s] 29%|██▊ | 106735/371472 [8:29:17<20:06:20, 3.66it/s] 29%|██▊ | 106736/371472 [8:29:17<19:41:06, 3.74it/s] 29%|██▊ | 106737/371472 [8:29:17<20:05:33, 3.66it/s] 29%|██▊ | 106738/371472 [8:29:17<20:31:18, 3.58it/s] 29%|██▊ | 106739/371472 [8:29:18<20:04:24, 3.66it/s] 29%|██▊ | 106740/371472 [8:29:18<19:18:50, 3.81it/s] {'loss': 3.5212, 'learning_rate': 7.417365166239308e-07, 'epoch': 4.6} + 29%|██▊ | 106740/371472 [8:29:18<19:18:50, 3.81it/s] 29%|██▊ | 106741/371472 [8:29:18<20:07:54, 3.65it/s] 29%|██▊ | 106742/371472 [8:29:18<20:12:09, 3.64it/s] 29%|██▊ | 106743/371472 [8:29:19<19:58:39, 3.68it/s] 29%|██▊ | 106744/371472 [8:29:19<21:21:13, 3.44it/s] 29%|██▊ | 106745/371472 [8:29:19<20:48:37, 3.53it/s] 29%|██▊ | 106746/371472 [8:29:20<20:58:22, 3.51it/s] 29%|██▊ | 106747/371472 [8:29:20<21:43:59, 3.38it/s] 29%|██▊ | 106748/371472 [8:29:20<26:04:53, 2.82it/s] 29%|██▊ | 106749/371472 [8:29:21<24:34:22, 2.99it/s] 29%|██▊ | 106750/371472 [8:29:21<23:16:35, 3.16it/s] 29%|██▊ | 106751/371472 [8:29:21<23:10:49, 3.17it/s] 29%|██▊ | 106752/371472 [8:29:22<21:38:26, 3.40it/s] 29%|██▊ | 106753/371472 [8:29:22<21:00:11, 3.50it/s] 29%|██▊ | 106754/371472 [8:29:22<22:20:42, 3.29it/s] 29%|██▊ | 106755/371472 [8:29:22<21:18:11, 3.45it/s] 29%|██▊ | 106756/371472 [8:29:23<20:38:12, 3.56it/s] 29%|██▊ | 106757/371472 [8:29:23<21:01:49, 3.50it/s] 29%|██▊ | 106758/371472 [8:29:23<21:13:29, 3.46it/s] 29%|██▊ | 106759/371472 [8:29:24<21:04:54, 3.49it/s] 29%|██▊ | 106760/371472 [8:29:24<21:24:31, 3.43it/s] {'loss': 3.4033, 'learning_rate': 7.416880346484517e-07, 'epoch': 4.6} + 29%|██▊ | 106760/371472 [8:29:24<21:24:31, 3.43it/s] 29%|██▊ | 106761/371472 [8:29:24<21:04:47, 3.49it/s] 29%|██▊ | 106762/371472 [8:29:24<21:08:52, 3.48it/s] 29%|██▊ | 106763/371472 [8:29:25<21:19:43, 3.45it/s] 29%|██▊ | 106764/371472 [8:29:25<22:04:47, 3.33it/s] 29%|██▊ | 106765/371472 [8:29:25<21:21:11, 3.44it/s] 29%|██▊ | 106766/371472 [8:29:26<20:18:46, 3.62it/s] 29%|██▊ | 106767/371472 [8:29:26<21:12:02, 3.47it/s] 29%|██▊ | 106768/371472 [8:29:26<20:58:24, 3.51it/s] 29%|██▊ | 106769/371472 [8:29:26<20:26:49, 3.60it/s] 29%|██▊ | 106770/371472 [8:29:27<21:06:32, 3.48it/s] 29%|██▊ | 106771/371472 [8:29:27<21:03:00, 3.49it/s] 29%|██▊ | 106772/371472 [8:29:27<22:03:41, 3.33it/s] 29%|██▊ | 106773/371472 [8:29:28<22:00:03, 3.34it/s] 29%|██▊ | 106774/371472 [8:29:28<22:13:31, 3.31it/s] 29%|██▊ | 106775/371472 [8:29:28<21:59:19, 3.34it/s] 29%|██▊ | 106776/371472 [8:29:29<22:00:55, 3.34it/s] 29%|██▊ | 106777/371472 [8:29:29<20:59:03, 3.50it/s] 29%|██▊ | 106778/371472 [8:29:29<19:52:20, 3.70it/s] 29%|██▊ | 106779/371472 [8:29:29<19:56:43, 3.69it/s] 29%|██▊ | 106780/371472 [8:29:30<21:33:10, 3.41it/s] {'loss': 3.528, 'learning_rate': 7.416395526729729e-07, 'epoch': 4.6} + 29%|██▊ | 106780/371472 [8:29:30<21:33:10, 3.41it/s] 29%|██▊ | 106781/371472 [8:29:30<20:27:24, 3.59it/s] 29%|██▊ | 106782/371472 [8:29:30<20:10:59, 3.64it/s] 29%|██▊ | 106783/371472 [8:29:30<19:49:41, 3.71it/s] 29%|██▊ | 106784/371472 [8:29:31<19:26:24, 3.78it/s] 29%|██▊ | 106785/371472 [8:29:31<20:39:11, 3.56it/s] 29%|██▊ | 106786/371472 [8:29:31<19:53:05, 3.70it/s] 29%|██▊ | 106787/371472 [8:29:31<19:18:47, 3.81it/s] 29%|██▊ | 106788/371472 [8:29:32<19:17:52, 3.81it/s] 29%|██▊ | 106789/371472 [8:29:32<18:52:20, 3.90it/s] 29%|██▊ | 106790/371472 [8:29:32<19:04:24, 3.85it/s] 29%|██▊ | 106791/371472 [8:29:33<19:50:08, 3.71it/s] 29%|██▊ | 106792/371472 [8:29:33<19:28:35, 3.77it/s] 29%|██▊ | 106793/371472 [8:29:33<19:59:30, 3.68it/s] 29%|██▊ | 106794/371472 [8:29:33<19:48:11, 3.71it/s] 29%|██▊ | 106795/371472 [8:29:34<19:54:57, 3.69it/s] 29%|██▊ | 106796/371472 [8:29:34<19:27:23, 3.78it/s] 29%|██▊ | 106797/371472 [8:29:34<21:15:56, 3.46it/s] 29%|██▊ | 106798/371472 [8:29:34<20:31:59, 3.58it/s] 29%|██▉ | 106799/371472 [8:29:35<20:51:52, 3.52it/s] 29%|██▉ | 106800/371472 [8:29:35<21:58:57, 3.34it/s] {'loss': 3.4061, 'learning_rate': 7.41591070697494e-07, 'epoch': 4.6} + 29%|██▉ | 106800/371472 [8:29:35<21:58:57, 3.34it/s] 29%|██▉ | 106801/371472 [8:29:35<24:14:24, 3.03it/s] 29%|██▉ | 106802/371472 [8:29:36<24:00:56, 3.06it/s] 29%|██▉ | 106803/371472 [8:29:36<22:43:47, 3.23it/s] 29%|██▉ | 106804/371472 [8:29:36<22:50:31, 3.22it/s] 29%|██▉ | 106805/371472 [8:29:37<21:28:46, 3.42it/s] 29%|██▉ | 106806/371472 [8:29:37<21:46:53, 3.38it/s] 29%|██▉ | 106807/371472 [8:29:37<22:04:50, 3.33it/s] 29%|██▉ | 106808/371472 [8:29:38<22:02:52, 3.33it/s] 29%|██▉ | 106809/371472 [8:29:38<21:26:48, 3.43it/s] 29%|██▉ | 106810/371472 [8:29:38<21:35:14, 3.41it/s] 29%|██▉ | 106811/371472 [8:29:38<22:36:52, 3.25it/s] 29%|██▉ | 106812/371472 [8:29:39<23:31:38, 3.12it/s] 29%|██▉ | 106813/371472 [8:29:39<25:07:54, 2.93it/s] 29%|██▉ | 106814/371472 [8:29:40<24:12:10, 3.04it/s] 29%|██▉ | 106815/371472 [8:29:40<22:57:49, 3.20it/s] 29%|██▉ | 106816/371472 [8:29:40<21:45:58, 3.38it/s] 29%|██▉ | 106817/371472 [8:29:40<20:53:11, 3.52it/s] 29%|██▉ | 106818/371472 [8:29:41<22:46:31, 3.23it/s] 29%|██▉ | 106819/371472 [8:29:41<21:12:43, 3.47it/s] 29%|██▉ | 106820/371472 [8:29:41<20:23:49, 3.60it/s] {'loss': 3.3811, 'learning_rate': 7.415425887220151e-07, 'epoch': 4.6} + 29%|██▉ | 106820/371472 [8:29:41<20:23:49, 3.60it/s] 29%|██▉ | 106821/371472 [8:29:41<21:29:50, 3.42it/s] 29%|██▉ | 106822/371472 [8:29:42<22:30:16, 3.27it/s] 29%|██▉ | 106823/371472 [8:29:42<21:41:12, 3.39it/s] 29%|██▉ | 106824/371472 [8:29:42<20:45:37, 3.54it/s] 29%|██▉ | 106825/371472 [8:29:43<20:14:33, 3.63it/s] 29%|██▉ | 106826/371472 [8:29:43<19:59:29, 3.68it/s] 29%|██▉ | 106827/371472 [8:29:43<20:11:10, 3.64it/s] 29%|██▉ | 106828/371472 [8:29:43<20:46:00, 3.54it/s] 29%|██▉ | 106829/371472 [8:29:44<20:16:10, 3.63it/s] 29%|██▉ | 106830/371472 [8:29:44<19:56:36, 3.69it/s] 29%|██▉ | 106831/371472 [8:29:44<20:37:00, 3.57it/s] 29%|██▉ | 106832/371472 [8:29:45<19:58:01, 3.68it/s] 29%|██▉ | 106833/371472 [8:29:45<20:42:21, 3.55it/s] 29%|██▉ | 106834/371472 [8:29:45<19:49:58, 3.71it/s] 29%|██▉ | 106835/371472 [8:29:45<20:21:38, 3.61it/s] 29%|██▉ | 106836/371472 [8:29:46<19:54:47, 3.69it/s] 29%|██▉ | 106837/371472 [8:29:46<19:57:55, 3.68it/s] 29%|██▉ | 106838/371472 [8:29:46<19:19:22, 3.80it/s] 29%|██▉ | 106839/371472 [8:29:46<20:03:57, 3.66it/s] 29%|██▉ | 106840/371472 [8:29:47<20:17:49, 3.62it/s] {'loss': 3.4855, 'learning_rate': 7.414941067465362e-07, 'epoch': 4.6} + 29%|██▉ | 106840/371472 [8:29:47<20:17:49, 3.62it/s] 29%|██▉ | 106841/371472 [8:29:47<20:59:54, 3.50it/s] 29%|██▉ | 106842/371472 [8:29:47<20:06:38, 3.66it/s] 29%|██▉ | 106843/371472 [8:29:48<19:33:46, 3.76it/s] 29%|██▉ | 106844/371472 [8:29:48<20:58:43, 3.50it/s] 29%|██▉ | 106845/371472 [8:29:48<20:43:03, 3.55it/s] 29%|██▉ | 106846/371472 [8:29:48<20:16:43, 3.62it/s] 29%|██▉ | 106847/371472 [8:29:49<20:24:43, 3.60it/s] 29%|██▉ | 106848/371472 [8:29:49<20:53:05, 3.52it/s] 29%|██▉ | 106849/371472 [8:29:49<20:40:10, 3.56it/s] 29%|██▉ | 106850/371472 [8:29:49<19:59:10, 3.68it/s] 29%|██▉ | 106851/371472 [8:29:50<20:41:28, 3.55it/s] 29%|██▉ | 106852/371472 [8:29:50<20:26:15, 3.60it/s] 29%|██▉ | 106853/371472 [8:29:50<20:13:33, 3.63it/s] 29%|██▉ | 106854/371472 [8:29:51<20:54:26, 3.52it/s] 29%|██▉ | 106855/371472 [8:29:51<21:28:50, 3.42it/s] 29%|██▉ | 106856/371472 [8:29:51<20:47:00, 3.54it/s] 29%|██▉ | 106857/371472 [8:29:51<20:47:40, 3.53it/s] 29%|██▉ | 106858/371472 [8:29:52<20:53:37, 3.52it/s] 29%|██▉ | 106859/371472 [8:29:52<21:12:46, 3.47it/s] 29%|██▉ | 106860/371472 [8:29:52<20:28:47, 3.59it/s] {'loss': 3.3583, 'learning_rate': 7.414456247710573e-07, 'epoch': 4.6} + 29%|██▉ | 106860/371472 [8:29:52<20:28:47, 3.59it/s] 29%|██▉ | 106861/371472 [8:29:53<20:52:15, 3.52it/s] 29%|██▉ | 106862/371472 [8:29:53<20:21:06, 3.61it/s] 29%|██▉ | 106863/371472 [8:29:53<20:39:49, 3.56it/s] 29%|██▉ | 106864/371472 [8:29:53<21:33:25, 3.41it/s] 29%|██▉ | 106865/371472 [8:29:54<20:59:53, 3.50it/s] 29%|██▉ | 106866/371472 [8:29:54<20:15:36, 3.63it/s] 29%|██▉ | 106867/371472 [8:29:54<19:31:58, 3.76it/s] 29%|██▉ | 106868/371472 [8:29:55<20:08:45, 3.65it/s] 29%|██▉ | 106869/371472 [8:29:55<20:09:19, 3.65it/s] 29%|██▉ | 106870/371472 [8:29:55<20:03:43, 3.66it/s] 29%|██▉ | 106871/371472 [8:29:55<19:57:02, 3.68it/s] 29%|██▉ | 106872/371472 [8:29:56<19:58:14, 3.68it/s] 29%|██▉ | 106873/371472 [8:29:56<20:14:01, 3.63it/s] 29%|██▉ | 106874/371472 [8:29:56<20:15:56, 3.63it/s] 29%|██▉ | 106875/371472 [8:29:56<19:52:08, 3.70it/s] 29%|██▉ | 106876/371472 [8:29:57<19:24:36, 3.79it/s] 29%|██▉ | 106877/371472 [8:29:57<20:32:17, 3.58it/s] 29%|██▉ | 106878/371472 [8:29:57<20:50:13, 3.53it/s] 29%|██▉ | 106879/371472 [8:29:58<20:41:26, 3.55it/s] 29%|██▉ | 106880/371472 [8:29:58<20:22:58, 3.61it/s] {'loss': 3.3726, 'learning_rate': 7.413971427955784e-07, 'epoch': 4.6} + 29%|██▉ | 106880/371472 [8:29:58<20:22:58, 3.61it/s] 29%|██▉ | 106881/371472 [8:29:58<21:37:48, 3.40it/s] 29%|██▉ | 106882/371472 [8:29:58<20:57:31, 3.51it/s] 29%|██▉ | 106883/371472 [8:29:59<20:26:14, 3.60it/s] 29%|██▉ | 106884/371472 [8:29:59<19:36:47, 3.75it/s] 29%|██▉ | 106885/371472 [8:29:59<19:27:35, 3.78it/s] 29%|██▉ | 106886/371472 [8:30:00<19:52:44, 3.70it/s] 29%|██▉ | 106887/371472 [8:30:00<22:02:45, 3.33it/s] 29%|██▉ | 106888/371472 [8:30:00<21:21:52, 3.44it/s] 29%|██▉ | 106889/371472 [8:30:00<21:10:29, 3.47it/s] 29%|██▉ | 106890/371472 [8:30:01<21:26:03, 3.43it/s] 29%|██▉ | 106891/371472 [8:30:01<20:40:22, 3.56it/s] 29%|██▉ | 106892/371472 [8:30:01<19:51:11, 3.70it/s] 29%|██▉ | 106893/371472 [8:30:02<20:39:06, 3.56it/s] 29%|██▉ | 106894/371472 [8:30:02<20:20:00, 3.61it/s] 29%|██▉ | 106895/371472 [8:30:02<20:43:10, 3.55it/s] 29%|██▉ | 106896/371472 [8:30:02<20:43:12, 3.55it/s] 29%|██▉ | 106897/371472 [8:30:03<21:37:18, 3.40it/s] 29%|██▉ | 106898/371472 [8:30:03<22:38:32, 3.25it/s] 29%|██▉ | 106899/371472 [8:30:03<22:17:46, 3.30it/s] 29%|██▉ | 106900/371472 [8:30:04<22:39:41, 3.24it/s] {'loss': 3.5457, 'learning_rate': 7.413486608200995e-07, 'epoch': 4.6} + 29%|██▉ | 106900/371472 [8:30:04<22:39:41, 3.24it/s] 29%|██▉ | 106901/371472 [8:30:04<22:07:19, 3.32it/s] 29%|██▉ | 106902/371472 [8:30:04<21:50:53, 3.36it/s] 29%|██▉ | 106903/371472 [8:30:05<22:18:16, 3.29it/s] 29%|██▉ | 106904/371472 [8:30:05<23:57:35, 3.07it/s] 29%|██▉ | 106905/371472 [8:30:05<23:20:45, 3.15it/s] 29%|██▉ | 106906/371472 [8:30:05<21:58:51, 3.34it/s] 29%|██▉ | 106907/371472 [8:30:06<20:56:58, 3.51it/s] 29%|██▉ | 106908/371472 [8:30:06<21:34:27, 3.41it/s] 29%|██▉ | 106909/371472 [8:30:06<20:58:57, 3.50it/s] 29%|██▉ | 106910/371472 [8:30:07<20:46:58, 3.54it/s] 29%|██▉ | 106911/371472 [8:30:07<20:09:09, 3.65it/s] 29%|██▉ | 106912/371472 [8:30:07<20:20:24, 3.61it/s] 29%|██▉ | 106913/371472 [8:30:07<19:50:44, 3.70it/s] 29%|██▉ | 106914/371472 [8:30:08<20:09:15, 3.65it/s] 29%|██▉ | 106915/371472 [8:30:08<20:17:58, 3.62it/s] 29%|██▉ | 106916/371472 [8:30:08<22:03:54, 3.33it/s] 29%|██▉ | 106917/371472 [8:30:09<21:18:38, 3.45it/s] 29%|██▉ | 106918/371472 [8:30:09<20:29:03, 3.59it/s] 29%|██▉ | 106919/371472 [8:30:09<19:43:12, 3.73it/s] 29%|██▉ | 106920/371472 [8:30:09<20:00:22, 3.67it/s] {'loss': 3.3274, 'learning_rate': 7.413001788446206e-07, 'epoch': 4.61} + 29%|██▉ | 106920/371472 [8:30:09<20:00:22, 3.67it/s] 29%|██▉ | 106921/371472 [8:30:10<20:51:46, 3.52it/s] 29%|██▉ | 106922/371472 [8:30:10<20:26:21, 3.60it/s] 29%|██▉ | 106923/371472 [8:30:10<21:02:50, 3.49it/s] 29%|██▉ | 106924/371472 [8:30:10<20:16:38, 3.62it/s] 29%|██▉ | 106925/371472 [8:30:11<19:46:55, 3.71it/s] 29%|██▉ | 106926/371472 [8:30:11<22:41:32, 3.24it/s] 29%|██▉ | 106927/371472 [8:30:11<21:41:03, 3.39it/s] 29%|██▉ | 106928/371472 [8:30:12<20:45:28, 3.54it/s] 29%|██▉ | 106929/371472 [8:30:12<20:33:25, 3.57it/s] 29%|██▉ | 106930/371472 [8:30:12<22:11:49, 3.31it/s] 29%|██▉ | 106931/371472 [8:30:13<20:54:25, 3.51it/s] 29%|██▉ | 106932/371472 [8:30:13<20:28:24, 3.59it/s] 29%|██▉ | 106933/371472 [8:30:13<20:10:28, 3.64it/s] 29%|██▉ | 106934/371472 [8:30:13<19:44:10, 3.72it/s] 29%|██▉ | 106935/371472 [8:30:14<21:57:04, 3.35it/s] 29%|██▉ | 106936/371472 [8:30:14<22:22:55, 3.28it/s] 29%|██▉ | 106937/371472 [8:30:14<22:32:25, 3.26it/s] 29%|██▉ | 106938/371472 [8:30:15<21:32:21, 3.41it/s] 29%|██▉ | 106939/371472 [8:30:15<20:49:14, 3.53it/s] 29%|██▉ | 106940/371472 [8:30:15<22:00:12, 3.34it/s] {'loss': 3.5479, 'learning_rate': 7.412516968691417e-07, 'epoch': 4.61} + 29%|██▉ | 106940/371472 [8:30:15<22:00:12, 3.34it/s] 29%|██▉ | 106941/371472 [8:30:15<21:32:00, 3.41it/s] 29%|██▉ | 106942/371472 [8:30:16<21:03:29, 3.49it/s] 29%|██▉ | 106943/371472 [8:30:16<21:13:47, 3.46it/s] 29%|██▉ | 106944/371472 [8:30:16<20:42:13, 3.55it/s] 29%|██▉ | 106945/371472 [8:30:17<21:02:14, 3.49it/s] 29%|██▉ | 106946/371472 [8:30:17<20:24:25, 3.60it/s] 29%|██▉ | 106947/371472 [8:30:17<20:35:32, 3.57it/s] 29%|██▉ | 106948/371472 [8:30:17<20:43:32, 3.55it/s] 29%|██▉ | 106949/371472 [8:30:18<20:45:59, 3.54it/s] 29%|██�� | 106950/371472 [8:30:18<19:51:20, 3.70it/s] 29%|██▉ | 106951/371472 [8:30:18<20:14:26, 3.63it/s] 29%|██▉ | 106952/371472 [8:30:18<20:23:38, 3.60it/s] 29%|██▉ | 106953/371472 [8:30:19<21:21:14, 3.44it/s] 29%|██▉ | 106954/371472 [8:30:19<21:02:25, 3.49it/s] 29%|██▉ | 106955/371472 [8:30:19<20:46:58, 3.54it/s] 29%|██▉ | 106956/371472 [8:30:20<20:20:01, 3.61it/s] 29%|██▉ | 106957/371472 [8:30:20<20:39:22, 3.56it/s] 29%|██▉ | 106958/371472 [8:30:20<20:11:51, 3.64it/s] 29%|██▉ | 106959/371472 [8:30:20<19:34:36, 3.75it/s] 29%|██▉ | 106960/371472 [8:30:21<20:37:47, 3.56it/s] {'loss': 3.4976, 'learning_rate': 7.412032148936629e-07, 'epoch': 4.61} + 29%|██▉ | 106960/371472 [8:30:21<20:37:47, 3.56it/s] 29%|██▉ | 106961/371472 [8:30:21<19:47:56, 3.71it/s] 29%|██▉ | 106962/371472 [8:30:21<20:43:14, 3.55it/s] 29%|██▉ | 106963/371472 [8:30:22<21:05:53, 3.48it/s] 29%|██▉ | 106964/371472 [8:30:22<20:25:11, 3.60it/s] 29%|██▉ | 106965/371472 [8:30:22<19:52:38, 3.70it/s] 29%|██▉ | 106966/371472 [8:30:22<19:46:23, 3.72it/s] 29%|██▉ | 106967/371472 [8:30:23<20:24:14, 3.60it/s] 29%|██▉ | 106968/371472 [8:30:23<19:29:59, 3.77it/s] 29%|██▉ | 106969/371472 [8:30:23<19:33:08, 3.76it/s] 29%|██▉ | 106970/371472 [8:30:23<19:32:50, 3.76it/s] 29%|██▉ | 106971/371472 [8:30:24<21:44:33, 3.38it/s] 29%|██▉ | 106972/371472 [8:30:24<21:14:44, 3.46it/s] 29%|██▉ | 106973/371472 [8:30:24<20:38:28, 3.56it/s] 29%|██▉ | 106974/371472 [8:30:25<20:34:35, 3.57it/s] 29%|██▉ | 106975/371472 [8:30:25<20:57:57, 3.50it/s] 29%|██▉ | 106976/371472 [8:30:25<20:48:32, 3.53it/s] 29%|██▉ | 106977/371472 [8:30:25<20:03:04, 3.66it/s] 29%|██▉ | 106978/371472 [8:30:26<20:39:11, 3.56it/s] 29%|██▉ | 106979/371472 [8:30:26<24:02:29, 3.06it/s] 29%|██▉ | 106980/371472 [8:30:27<25:12:54, 2.91it/s] {'loss': 3.3176, 'learning_rate': 7.411547329181841e-07, 'epoch': 4.61} + 29%|██▉ | 106980/371472 [8:30:27<25:12:54, 2.91it/s] 29%|██▉ | 106981/371472 [8:30:27<23:11:09, 3.17it/s] 29%|██▉ | 106982/371472 [8:30:27<22:48:11, 3.22it/s] 29%|██▉ | 106983/371472 [8:30:27<21:07:37, 3.48it/s] 29%|██▉ | 106984/371472 [8:30:28<20:30:24, 3.58it/s] 29%|██▉ | 106985/371472 [8:30:28<19:55:29, 3.69it/s] 29%|██▉ | 106986/371472 [8:30:28<19:43:07, 3.73it/s] 29%|██▉ | 106987/371472 [8:30:28<20:02:16, 3.67it/s] 29%|██▉ | 106988/371472 [8:30:29<20:28:40, 3.59it/s] 29%|██▉ | 106989/371472 [8:30:29<20:55:43, 3.51it/s] 29%|██▉ | 106990/371472 [8:30:29<20:52:32, 3.52it/s] 29%|██▉ | 106991/371472 [8:30:30<20:54:41, 3.51it/s] 29%|██▉ | 106992/371472 [8:30:30<20:39:15, 3.56it/s] 29%|██▉ | 106993/371472 [8:30:30<20:35:24, 3.57it/s] 29%|██▉ | 106994/371472 [8:30:30<21:43:58, 3.38it/s] 29%|██▉ | 106995/371472 [8:30:31<20:59:43, 3.50it/s] 29%|██▉ | 106996/371472 [8:30:31<20:49:30, 3.53it/s] 29%|██▉ | 106997/371472 [8:30:31<21:16:16, 3.45it/s] 29%|██▉ | 106998/371472 [8:30:32<20:23:40, 3.60it/s] 29%|██▉ | 106999/371472 [8:30:32<20:19:36, 3.61it/s] 29%|██▉ | 107000/371472 [8:30:32<19:39:49, 3.74it/s] {'loss': 3.4915, 'learning_rate': 7.411062509427051e-07, 'epoch': 4.61} + 29%|██▉ | 107000/371472 [8:30:32<19:39:49, 3.74it/s] 29%|██▉ | 107001/371472 [8:30:32<20:30:49, 3.58it/s] 29%|██▉ | 107002/371472 [8:30:33<20:40:48, 3.55it/s] 29%|██▉ | 107003/371472 [8:30:33<20:46:03, 3.54it/s] 29%|██▉ | 107004/371472 [8:30:33<21:35:43, 3.40it/s] 29%|██▉ | 107005/371472 [8:30:34<21:31:10, 3.41it/s] 29%|██▉ | 107006/371472 [8:30:34<20:56:05, 3.51it/s] 29%|██▉ | 107007/371472 [8:30:34<20:19:43, 3.61it/s] 29%|██▉ | 107008/371472 [8:30:34<20:39:54, 3.55it/s] 29%|██▉ | 107009/371472 [8:30:35<20:12:51, 3.63it/s] 29%|██▉ | 107010/371472 [8:30:35<19:56:19, 3.68it/s] 29%|██▉ | 107011/371472 [8:30:35<19:44:16, 3.72it/s] 29%|██▉ | 107012/371472 [8:30:35<20:55:13, 3.51it/s] 29%|██▉ | 107013/371472 [8:30:36<21:12:32, 3.46it/s] 29%|██▉ | 107014/371472 [8:30:36<20:57:11, 3.51it/s] 29%|██▉ | 107015/371472 [8:30:36<20:15:33, 3.63it/s] 29%|██▉ | 107016/371472 [8:30:37<20:38:06, 3.56it/s] 29%|██▉ | 107017/371472 [8:30:37<20:30:47, 3.58it/s] 29%|██▉ | 107018/371472 [8:30:37<20:55:29, 3.51it/s] 29%|██▉ | 107019/371472 [8:30:37<21:19:00, 3.45it/s] 29%|██▉ | 107020/371472 [8:30:38<20:44:19, 3.54it/s] {'loss': 3.5571, 'learning_rate': 7.410577689672261e-07, 'epoch': 4.61} + 29%|██▉ | 107020/371472 [8:30:38<20:44:19, 3.54it/s] 29%|██▉ | 107021/371472 [8:30:38<20:31:14, 3.58it/s] 29%|██▉ | 107022/371472 [8:30:38<20:41:28, 3.55it/s] 29%|██▉ | 107023/371472 [8:30:39<20:56:32, 3.51it/s] 29%|██▉ | 107024/371472 [8:30:39<20:58:33, 3.50it/s] 29%|██▉ | 107025/371472 [8:30:39<21:19:56, 3.44it/s] 29%|██▉ | 107026/371472 [8:30:39<20:33:59, 3.57it/s] 29%|██▉ | 107027/371472 [8:30:40<20:32:22, 3.58it/s] 29%|██▉ | 107028/371472 [8:30:40<22:25:10, 3.28it/s] 29%|██▉ | 107029/371472 [8:30:40<21:27:49, 3.42it/s] 29%|██▉ | 107030/371472 [8:30:41<20:51:42, 3.52it/s] 29%|██▉ | 107031/371472 [8:30:41<22:28:31, 3.27it/s] 29%|██▉ | 107032/371472 [8:30:41<21:15:54, 3.45it/s] 29%|██▉ | 107033/371472 [8:30:42<22:17:54, 3.29it/s] 29%|██▉ | 107034/371472 [8:30:42<21:44:41, 3.38it/s] 29%|██▉ | 107035/371472 [8:30:42<22:24:24, 3.28it/s] 29%|██▉ | 107036/371472 [8:30:42<21:03:50, 3.49it/s] 29%|██▉ | 107037/371472 [8:30:43<20:57:46, 3.50it/s] 29%|██▉ | 107038/371472 [8:30:43<21:22:09, 3.44it/s] 29%|██▉ | 107039/371472 [8:30:43<21:05:16, 3.48it/s] 29%|██▉ | 107040/371472 [8:30:44<21:26:03, 3.43it/s] {'loss': 3.5125, 'learning_rate': 7.410092869917472e-07, 'epoch': 4.61} + 29%|██▉ | 107040/371472 [8:30:44<21:26:03, 3.43it/s] 29%|██▉ | 107041/371472 [8:30:44<21:59:39, 3.34it/s] 29%|██▉ | 107042/371472 [8:30:44<21:00:28, 3.50it/s] 29%|██▉ | 107043/371472 [8:30:44<20:53:14, 3.52it/s] 29%|██▉ | 107044/371472 [8:30:45<20:26:01, 3.59it/s] 29%|██▉ | 107045/371472 [8:30:45<20:53:01, 3.52it/s] 29%|██▉ | 107046/371472 [8:30:45<22:00:11, 3.34it/s] 29%|██▉ | 107047/371472 [8:30:46<23:29:50, 3.13it/s] 29%|██▉ | 107048/371472 [8:30:46<22:56:40, 3.20it/s] 29%|██▉ | 107049/371472 [8:30:46<21:48:10, 3.37it/s] 29%|██▉ | 107050/371472 [8:30:47<21:44:58, 3.38it/s] 29%|██▉ | 107051/371472 [8:30:47<21:58:05, 3.34it/s] 29%|██▉ | 107052/371472 [8:30:47<23:01:04, 3.19it/s] 29%|██▉ | 107053/371472 [8:30:47<22:50:30, 3.22it/s] 29%|██▉ | 107054/371472 [8:30:48<22:42:59, 3.23it/s] 29%|██▉ | 107055/371472 [8:30:48<22:18:35, 3.29it/s] 29%|██▉ | 107056/371472 [8:30:48<23:36:38, 3.11it/s] 29%|██▉ | 107057/371472 [8:30:49<23:03:32, 3.19it/s] 29%|██▉ | 107058/371472 [8:30:49<22:16:59, 3.30it/s] 29%|██▉ | 107059/371472 [8:30:49<21:57:06, 3.35it/s] 29%|██▉ | 107060/371472 [8:30:50<22:51:59, 3.21it/s] {'loss': 3.4239, 'learning_rate': 7.409608050162684e-07, 'epoch': 4.61} + 29%|██▉ | 107060/371472 [8:30:50<22:51:59, 3.21it/s] 29%|██▉ | 107061/371472 [8:30:50<22:04:36, 3.33it/s] 29%|██▉ | 107062/371472 [8:30:50<21:45:37, 3.38it/s] 29%|██▉ | 107063/371472 [8:30:50<21:23:04, 3.43it/s] 29%|██▉ | 107064/371472 [8:30:51<21:26:29, 3.43it/s] 29%|██▉ | 107065/371472 [8:30:51<21:51:20, 3.36it/s] 29%|██▉ | 107066/371472 [8:30:51<21:05:57, 3.48it/s] 29%|██▉ | 107067/371472 [8:30:52<21:29:48, 3.42it/s] 29%|██▉ | 107068/371472 [8:30:52<20:51:04, 3.52it/s] 29%|██▉ | 107069/371472 [8:30:52<20:26:50, 3.59it/s] 29%|██▉ | 107070/371472 [8:30:52<20:22:30, 3.60it/s] 29%|██▉ | 107071/371472 [8:30:53<20:14:19, 3.63it/s] 29%|██▉ | 107072/371472 [8:30:53<20:41:16, 3.55it/s] 29%|██▉ | 107073/371472 [8:30:53<20:46:28, 3.54it/s] 29%|██▉ | 107074/371472 [8:30:54<21:25:43, 3.43it/s] 29%|██▉ | 107075/371472 [8:30:54<21:08:23, 3.47it/s] 29%|██▉ | 107076/371472 [8:30:54<22:27:57, 3.27it/s] 29%|██▉ | 107077/371472 [8:30:55<23:41:59, 3.10it/s] 29%|██▉ | 107078/371472 [8:30:55<22:01:47, 3.33it/s] 29%|██▉ | 107079/371472 [8:30:55<22:25:21, 3.28it/s] 29%|██▉ | 107080/371472 [8:30:55<21:17:30, 3.45it/s] {'loss': 3.4943, 'learning_rate': 7.409123230407895e-07, 'epoch': 4.61} + 29%|██▉ | 107080/371472 [8:30:55<21:17:30, 3.45it/s] 29%|██▉ | 107081/371472 [8:30:56<21:12:02, 3.46it/s] 29%|██▉ | 107082/371472 [8:30:56<20:40:44, 3.55it/s] 29%|██▉ | 107083/371472 [8:30:56<20:03:43, 3.66it/s] 29%|██▉ | 107084/371472 [8:30:57<21:00:03, 3.50it/s] 29%|██▉ | 107085/371472 [8:30:57<21:05:22, 3.48it/s] 29%|██▉ | 107086/371472 [8:30:57<21:15:55, 3.45it/s] 29%|██▉ | 107087/371472 [8:30:57<21:09:16, 3.47it/s] 29%|██▉ | 107088/371472 [8:30:58<19:59:58, 3.67it/s] 29%|██▉ | 107089/371472 [8:30:58<19:50:48, 3.70it/s] 29%|██▉ | 107090/371472 [8:30:58<22:44:09, 3.23it/s] 29%|██▉ | 107091/371472 [8:30:59<21:23:43, 3.43it/s] 29%|██▉ | 107092/371472 [8:30:59<20:36:57, 3.56it/s] 29%|██▉ | 107093/371472 [8:30:59<21:14:59, 3.46it/s] 29%|██▉ | 107094/371472 [8:30:59<22:10:58, 3.31it/s] 29%|██▉ | 107095/371472 [8:31:00<21:10:26, 3.47it/s] 29%|██▉ | 107096/371472 [8:31:00<21:15:43, 3.45it/s] 29%|██▉ | 107097/371472 [8:31:00<20:40:22, 3.55it/s] 29%|██▉ | 107098/371472 [8:31:01<20:34:34, 3.57it/s] 29%|██▉ | 107099/371472 [8:31:01<20:50:33, 3.52it/s] 29%|██▉ | 107100/371472 [8:31:01<20:05:41, 3.65it/s] {'loss': 3.5271, 'learning_rate': 7.408638410653106e-07, 'epoch': 4.61} + 29%|██▉ | 107100/371472 [8:31:01<20:05:41, 3.65it/s] 29%|██▉ | 107101/371472 [8:31:01<19:43:49, 3.72it/s] 29%|██▉ | 107102/371472 [8:31:02<19:39:09, 3.74it/s] 29%|██▉ | 107103/371472 [8:31:02<19:25:53, 3.78it/s] 29%|██▉ | 107104/371472 [8:31:02<22:08:46, 3.32it/s] 29%|██▉ | 107105/371472 [8:31:03<22:34:23, 3.25it/s] 29%|██▉ | 107106/371472 [8:31:03<22:15:04, 3.30it/s] 29%|██▉ | 107107/371472 [8:31:03<23:02:45, 3.19it/s] 29%|██▉ | 107108/371472 [8:31:03<22:06:06, 3.32it/s] 29%|██▉ | 107109/371472 [8:31:04<21:03:00, 3.49it/s] 29%|██▉ | 107110/371472 [8:31:04<22:22:34, 3.28it/s] 29%|██▉ | 107111/371472 [8:31:04<21:47:46, 3.37it/s] 29%|██▉ | 107112/371472 [8:31:05<21:19:15, 3.44it/s] 29%|██▉ | 107113/371472 [8:31:05<20:45:25, 3.54it/s] 29%|██▉ | 107114/371472 [8:31:05<20:55:08, 3.51it/s] 29%|██▉ | 107115/371472 [8:31:05<20:27:24, 3.59it/s] 29%|██▉ | 107116/371472 [8:31:06<20:49:00, 3.53it/s] 29%|██▉ | 107117/371472 [8:31:06<20:35:30, 3.57it/s] 29%|██▉ | 107118/371472 [8:31:06<20:13:00, 3.63it/s] 29%|██▉ | 107119/371472 [8:31:07<24:27:46, 3.00it/s] 29%|██▉ | 107120/371472 [8:31:07<22:59:09, 3.19it/s] {'loss': 3.3703, 'learning_rate': 7.408153590898317e-07, 'epoch': 4.61} + 29%|██▉ | 107120/371472 [8:31:07<22:59:09, 3.19it/s] 29%|██▉ | 107121/371472 [8:31:07<23:14:05, 3.16it/s] 29%|██▉ | 107122/371472 [8:31:08<22:47:14, 3.22it/s] 29%|██▉ | 107123/371472 [8:31:08<23:27:14, 3.13it/s] 29%|██▉ | 107124/371472 [8:31:08<22:38:22, 3.24it/s] 29%|██▉ | 107125/371472 [8:31:09<22:12:07, 3.31it/s] 29%|██▉ | 107126/371472 [8:31:09<21:58:41, 3.34it/s] 29%|██▉ | 107127/371472 [8:31:09<21:03:46, 3.49it/s] 29%|██▉ | 107128/371472 [8:31:09<22:10:26, 3.31it/s] 29%|██▉ | 107129/371472 [8:31:10<21:01:43, 3.49it/s] 29%|██▉ | 107130/371472 [8:31:10<20:58:27, 3.50it/s] 29%|██▉ | 107131/371472 [8:31:10<20:36:44, 3.56it/s] 29%|██▉ | 107132/371472 [8:31:11<20:45:17, 3.54it/s] 29%|██▉ | 107133/371472 [8:31:11<19:46:34, 3.71it/s] 29%|██▉ | 107134/371472 [8:31:11<20:14:31, 3.63it/s] 29%|██▉ | 107135/371472 [8:31:11<20:41:12, 3.55it/s] 29%|██▉ | 107136/371472 [8:31:12<20:16:47, 3.62it/s] 29%|██▉ | 107137/371472 [8:31:12<19:48:57, 3.71it/s] 29%|██▉ | 107138/371472 [8:31:12<19:48:24, 3.71it/s] 29%|██▉ | 107139/371472 [8:31:12<20:20:42, 3.61it/s] 29%|██▉ | 107140/371472 [8:31:13<20:02:43, 3.66it/s] {'loss': 3.4195, 'learning_rate': 7.407668771143527e-07, 'epoch': 4.61} + 29%|██▉ | 107140/371472 [8:31:13<20:02:43, 3.66it/s] 29%|██▉ | 107141/371472 [8:31:13<19:47:09, 3.71it/s] 29%|██▉ | 107142/371472 [8:31:13<19:55:54, 3.68it/s] 29%|██▉ | 107143/371472 [8:31:14<20:11:22, 3.64it/s] 29%|██▉ | 107144/371472 [8:31:14<19:56:31, 3.68it/s] 29%|██▉ | 107145/371472 [8:31:14<19:31:25, 3.76it/s] 29%|██▉ | 107146/371472 [8:31:14<21:19:20, 3.44it/s] 29%|██▉ | 107147/371472 [8:31:15<20:57:53, 3.50it/s] 29%|██▉ | 107148/371472 [8:31:15<20:02:44, 3.66it/s] 29%|██▉ | 107149/371472 [8:31:15<20:21:44, 3.61it/s] 29%|██▉ | 107150/371472 [8:31:15<19:53:04, 3.69it/s] 29%|██▉ | 107151/371472 [8:31:16<22:25:37, 3.27it/s] 29%|██▉ | 107152/371472 [8:31:16<21:33:10, 3.41it/s] 29%|██▉ | 107153/371472 [8:31:16<20:41:17, 3.55it/s] 29%|██▉ | 107154/371472 [8:31:17<20:12:44, 3.63it/s] 29%|██▉ | 107155/371472 [8:31:17<20:49:03, 3.53it/s] 29%|██▉ | 107156/371472 [8:31:17<21:40:07, 3.39it/s] 29%|██▉ | 107157/371472 [8:31:18<23:38:12, 3.11it/s] 29%|██▉ | 107158/371472 [8:31:18<22:33:01, 3.26it/s] 29%|██▉ | 107159/371472 [8:31:18<22:12:02, 3.31it/s] 29%|██▉ | 107160/371472 [8:31:18<21:28:40, 3.42it/s] {'loss': 3.5447, 'learning_rate': 7.407183951388738e-07, 'epoch': 4.62} + 29%|██▉ | 107160/371472 [8:31:18<21:28:40, 3.42it/s] 29%|██▉ | 107161/371472 [8:31:19<21:06:10, 3.48it/s] 29%|██▉ | 107162/371472 [8:31:19<20:11:19, 3.64it/s] 29%|██▉ | 107163/371472 [8:31:19<19:48:44, 3.71it/s] 29%|██▉ | 107164/371472 [8:31:20<21:14:25, 3.46it/s] 29%|██▉ | 107165/371472 [8:31:20<21:25:00, 3.43it/s] 29%|██▉ | 107166/371472 [8:31:20<20:48:49, 3.53it/s] 29%|██▉ | 107167/371472 [8:31:20<20:45:43, 3.54it/s] 29%|██▉ | 107168/371472 [8:31:21<20:07:05, 3.65it/s] 29%|██▉ | 107169/371472 [8:31:21<19:47:22, 3.71it/s] 29%|██▉ | 107170/371472 [8:31:21<19:09:57, 3.83it/s] 29%|██▉ | 107171/371472 [8:31:21<19:44:04, 3.72it/s] 29%|██▉ | 107172/371472 [8:31:22<19:53:02, 3.69it/s] 29%|██▉ | 107173/371472 [8:31:22<19:41:07, 3.73it/s] 29%|██▉ | 107174/371472 [8:31:22<19:46:44, 3.71it/s] 29%|██▉ | 107175/371472 [8:31:23<21:54:25, 3.35it/s] 29%|██▉ | 107176/371472 [8:31:23<21:34:45, 3.40it/s] 29%|██▉ | 107177/371472 [8:31:23<21:05:04, 3.48it/s] 29%|██▉ | 107178/371472 [8:31:23<20:37:41, 3.56it/s] 29%|██▉ | 107179/371472 [8:31:24<21:42:54, 3.38it/s] 29%|██▉ | 107180/371472 [8:31:24<20:41:10, 3.55it/s] {'loss': 3.468, 'learning_rate': 7.40669913163395e-07, 'epoch': 4.62} + 29%|██▉ | 107180/371472 [8:31:24<20:41:10, 3.55it/s] 29%|██▉ | 107181/371472 [8:31:24<19:47:46, 3.71it/s] 29%|██▉ | 107182/371472 [8:31:25<19:28:36, 3.77it/s] 29%|██▉ | 107183/371472 [8:31:25<21:47:55, 3.37it/s] 29%|██▉ | 107184/371472 [8:31:25<21:01:16, 3.49it/s] 29%|██▉ | 107185/371472 [8:31:25<21:07:54, 3.47it/s] 29%|██▉ | 107186/371472 [8:31:26<20:28:57, 3.58it/s] 29%|██▉ | 107187/371472 [8:31:26<21:32:10, 3.41it/s] 29%|██▉ | 107188/371472 [8:31:26<22:28:55, 3.27it/s] 29%|██▉ | 107189/371472 [8:31:27<21:25:59, 3.43it/s] 29%|██▉ | 107190/371472 [8:31:27<23:16:52, 3.15it/s] 29%|██▉ | 107191/371472 [8:31:27<21:54:23, 3.35it/s] 29%|██▉ | 107192/371472 [8:31:28<21:40:57, 3.39it/s] 29%|██▉ | 107193/371472 [8:31:28<20:54:56, 3.51it/s] 29%|██▉ | 107194/371472 [8:31:28<21:06:03, 3.48it/s] 29%|██▉ | 107195/371472 [8:31:28<21:28:49, 3.42it/s] 29%|██▉ | 107196/371472 [8:31:29<21:04:52, 3.48it/s] 29%|██▉ | 107197/371472 [8:31:29<22:11:16, 3.31it/s] 29%|██▉ | 107198/371472 [8:31:29<21:51:53, 3.36it/s] 29%|██▉ | 107199/371472 [8:31:30<22:49:17, 3.22it/s] 29%|██▉ | 107200/371472 [8:31:30<21:13:13, 3.46it/s] {'loss': 3.6039, 'learning_rate': 7.406214311879161e-07, 'epoch': 4.62} + 29%|██▉ | 107200/371472 [8:31:30<21:13:13, 3.46it/s] 29%|██▉ | 107201/371472 [8:31:30<20:46:36, 3.53it/s] 29%|██▉ | 107202/371472 [8:31:30<19:48:16, 3.71it/s] 29%|██▉ | 107203/371472 [8:31:31<21:10:10, 3.47it/s] 29%|██▉ | 107204/371472 [8:31:31<20:22:30, 3.60it/s] 29%|██▉ | 107205/371472 [8:31:31<19:39:14, 3.73it/s] 29%|██▉ | 107206/371472 [8:31:32<19:59:40, 3.67it/s] 29%|██▉ | 107207/371472 [8:31:32<19:43:52, 3.72it/s] 29%|██▉ | 107208/371472 [8:31:32<19:30:21, 3.76it/s] 29%|██▉ | 107209/371472 [8:31:32<19:09:40, 3.83it/s] 29%|██▉ | 107210/371472 [8:31:33<19:09:42, 3.83it/s] 29%|██▉ | 107211/371472 [8:31:33<19:14:54, 3.81it/s] 29%|██▉ | 107212/371472 [8:31:33<19:51:44, 3.70it/s] 29%|██▉ | 107213/371472 [8:31:33<20:36:06, 3.56it/s] 29%|██▉ | 107214/371472 [8:31:34<20:13:19, 3.63it/s] 29%|██▉ | 107215/371472 [8:31:34<19:55:16, 3.68it/s] 29%|██▉ | 107216/371472 [8:31:34<19:59:45, 3.67it/s] 29%|██▉ | 107217/371472 [8:31:35<20:36:20, 3.56it/s] 29%|██▉ | 107218/371472 [8:31:35<20:09:50, 3.64it/s] 29%|██▉ | 107219/371472 [8:31:35<20:01:07, 3.67it/s] 29%|██▉ | 107220/371472 [8:31:35<20:18:13, 3.62it/s] {'loss': 3.4525, 'learning_rate': 7.405729492124372e-07, 'epoch': 4.62} + 29%|██▉ | 107220/371472 [8:31:35<20:18:13, 3.62it/s] 29%|██▉ | 107221/371472 [8:31:36<20:41:53, 3.55it/s] 29%|██▉ | 107222/371472 [8:31:36<22:39:35, 3.24it/s] 29%|██▉ | 107223/371472 [8:31:36<21:47:45, 3.37it/s] 29%|██▉ | 107224/371472 [8:31:37<20:47:59, 3.53it/s] 29%|██▉ | 107225/371472 [8:31:37<20:51:41, 3.52it/s] 29%|██▉ | 107226/371472 [8:31:37<20:32:15, 3.57it/s] 29%|██▉ | 107227/371472 [8:31:37<19:40:38, 3.73it/s] 29%|██▉ | 107228/371472 [8:31:38<20:19:23, 3.61it/s] 29%|██▉ | 107229/371472 [8:31:38<19:42:39, 3.72it/s] 29%|██▉ | 107230/371472 [8:31:38<19:08:18, 3.84it/s] 29%|██▉ | 107231/371472 [8:31:38<21:30:54, 3.41it/s] 29%|██▉ | 107232/371472 [8:31:39<20:17:38, 3.62it/s] 29%|██▉ | 107233/371472 [8:31:39<20:11:33, 3.63it/s] 29%|██▉ | 107234/371472 [8:31:39<20:33:53, 3.57it/s] 29%|██▉ | 107235/371472 [8:31:40<19:55:21, 3.68it/s] 29%|██▉ | 107236/371472 [8:31:40<20:14:43, 3.63it/s] 29%|██▉ | 107237/371472 [8:31:40<19:22:35, 3.79it/s] 29%|██▉ | 107238/371472 [8:31:40<19:08:31, 3.83it/s] 29%|██▉ | 107239/371472 [8:31:41<18:48:40, 3.90it/s] 29%|██▉ | 107240/371472 [8:31:41<19:59:12, 3.67it/s] {'loss': 3.4267, 'learning_rate': 7.405244672369583e-07, 'epoch': 4.62} + 29%|██▉ | 107240/371472 [8:31:41<19:59:12, 3.67it/s] 29%|██▉ | 107241/371472 [8:31:41<19:55:25, 3.68it/s] 29%|██▉ | 107242/371472 [8:31:41<20:08:53, 3.64it/s] 29%|██▉ | 107243/371472 [8:31:42<20:25:59, 3.59it/s] 29%|██▉ | 107244/371472 [8:31:42<21:15:15, 3.45it/s] 29%|██▉ | 107245/371472 [8:31:42<20:12:26, 3.63it/s] 29%|██▉ | 107246/371472 [8:31:42<19:32:42, 3.76it/s] 29%|██▉ | 107247/371472 [8:31:43<19:32:38, 3.76it/s] 29%|██▉ | 107248/371472 [8:31:43<19:02:30, 3.85it/s] 29%|██▉ | 107249/371472 [8:31:43<19:41:18, 3.73it/s] 29%|██▉ | 107250/371472 [8:31:44<20:28:32, 3.58it/s] 29%|██▉ | 107251/371472 [8:31:44<19:46:53, 3.71it/s] 29%|██▉ | 107252/371472 [8:31:44<20:27:57, 3.59it/s] 29%|██▉ | 107253/371472 [8:31:44<19:34:14, 3.75it/s] 29%|██▉ | 107254/371472 [8:31:45<21:46:13, 3.37it/s] 29%|██▉ | 107255/371472 [8:31:45<21:28:59, 3.42it/s] 29%|██▉ | 107256/371472 [8:31:45<20:54:08, 3.51it/s] 29%|██▉ | 107257/371472 [8:31:46<20:07:51, 3.65it/s] 29%|██▉ | 107258/371472 [8:31:46<21:46:16, 3.37it/s] 29%|██▉ | 107259/371472 [8:31:46<21:43:29, 3.38it/s] 29%|██▉ | 107260/371472 [8:31:47<22:24:45, 3.27it/s] {'loss': 3.5692, 'learning_rate': 7.404759852614795e-07, 'epoch': 4.62} + 29%|██▉ | 107260/371472 [8:31:47<22:24:45, 3.27it/s] 29%|██▉ | 107261/371472 [8:31:47<21:23:40, 3.43it/s] 29%|██▉ | 107262/371472 [8:31:47<20:44:59, 3.54it/s] 29%|██▉ | 107263/371472 [8:31:47<20:27:11, 3.59it/s] 29%|██▉ | 107264/371472 [8:31:48<20:16:13, 3.62it/s] 29%|██▉ | 107265/371472 [8:31:48<19:18:16, 3.80it/s] 29%|██▉ | 107266/371472 [8:31:48<18:51:54, 3.89it/s] 29%|██▉ | 107267/371472 [8:31:48<19:00:31, 3.86it/s] 29%|██▉ | 107268/371472 [8:31:49<18:51:59, 3.89it/s] 29%|██▉ | 107269/371472 [8:31:49<19:00:58, 3.86it/s] 29%|██▉ | 107270/371472 [8:31:49<19:35:08, 3.75it/s] 29%|██▉ | 107271/371472 [8:31:49<19:36:44, 3.74it/s] 29%|██▉ | 107272/371472 [8:31:50<19:14:53, 3.81it/s] 29%|██▉ | 107273/371472 [8:31:50<19:45:06, 3.72it/s] 29%|██▉ | 107274/371472 [8:31:50<19:49:48, 3.70it/s] 29%|██▉ | 107275/371472 [8:31:50<20:18:15, 3.61it/s] 29%|██▉ | 107276/371472 [8:31:51<19:54:48, 3.69it/s] 29%|██▉ | 107277/371472 [8:31:51<20:56:36, 3.50it/s] 29%|██▉ | 107278/371472 [8:31:51<20:18:57, 3.61it/s] 29%|██▉ | 107279/371472 [8:31:52<20:32:45, 3.57it/s] 29%|██▉ | 107280/371472 [8:31:52<20:49:23, 3.52it/s] {'loss': 3.4588, 'learning_rate': 7.404275032860005e-07, 'epoch': 4.62} + 29%|██▉ | 107280/371472 [8:31:52<20:49:23, 3.52it/s] 29%|██▉ | 107281/371472 [8:31:52<20:11:46, 3.63it/s] 29%|██▉ | 107282/371472 [8:31:52<19:22:41, 3.79it/s] 29%|██▉ | 107283/371472 [8:31:53<20:03:35, 3.66it/s] 29%|██▉ | 107284/371472 [8:31:53<20:15:15, 3.62it/s] 29%|██▉ | 107285/371472 [8:31:53<21:02:02, 3.49it/s] 29%|██▉ | 107286/371472 [8:31:54<20:32:36, 3.57it/s] 29%|██▉ | 107287/371472 [8:31:54<19:37:46, 3.74it/s] 29%|██▉ | 107288/371472 [8:31:54<20:33:02, 3.57it/s] 29%|██▉ | 107289/371472 [8:31:54<20:04:21, 3.66it/s] 29%|██▉ | 107290/371472 [8:31:55<20:39:17, 3.55it/s] 29%|██▉ | 107291/371472 [8:31:55<20:54:51, 3.51it/s] 29%|██▉ | 107292/371472 [8:31:55<20:58:05, 3.50it/s] 29%|██▉ | 107293/371472 [8:31:55<20:21:58, 3.60it/s] 29%|██▉ | 107294/371472 [8:31:56<21:44:33, 3.38it/s] 29%|██▉ | 107295/371472 [8:31:56<20:41:42, 3.55it/s] 29%|██▉ | 107296/371472 [8:31:56<20:36:46, 3.56it/s] 29%|██▉ | 107297/371472 [8:31:57<20:37:13, 3.56it/s] 29%|██▉ | 107298/371472 [8:31:57<22:44:42, 3.23it/s] 29%|██▉ | 107299/371472 [8:31:57<22:35:26, 3.25it/s] 29%|██▉ | 107300/371472 [8:31:58<22:01:10, 3.33it/s] {'loss': 3.4214, 'learning_rate': 7.403790213105217e-07, 'epoch': 4.62} + 29%|██▉ | 107300/371472 [8:31:58<22:01:10, 3.33it/s] 29%|██▉ | 107301/371472 [8:31:58<20:57:46, 3.50it/s] 29%|██▉ | 107302/371472 [8:31:58<20:56:52, 3.50it/s] 29%|██▉ | 107303/371472 [8:31:58<21:36:09, 3.40it/s] 29%|██▉ | 107304/371472 [8:31:59<21:34:50, 3.40it/s] 29%|██▉ | 107305/371472 [8:31:59<20:54:09, 3.51it/s] 29%|██▉ | 107306/371472 [8:31:59<20:59:03, 3.50it/s] 29%|██▉ | 107307/371472 [8:32:00<23:21:29, 3.14it/s] 29%|██▉ | 107308/371472 [8:32:00<22:18:12, 3.29it/s] 29%|██▉ | 107309/371472 [8:32:00<21:18:45, 3.44it/s] 29%|██▉ | 107310/371472 [8:32:01<23:03:06, 3.18it/s] 29%|██▉ | 107311/371472 [8:32:01<21:47:11, 3.37it/s] 29%|██▉ | 107312/371472 [8:32:01<21:43:11, 3.38it/s] 29%|██▉ | 107313/371472 [8:32:01<21:02:27, 3.49it/s] 29%|██▉ | 107314/371472 [8:32:02<21:01:59, 3.49it/s] 29%|██▉ | 107315/371472 [8:32:02<20:55:33, 3.51it/s] 29%|██▉ | 107316/371472 [8:32:02<20:28:19, 3.58it/s] 29%|██▉ | 107317/371472 [8:32:03<21:17:56, 3.45it/s] 29%|██▉ | 107318/371472 [8:32:03<21:40:00, 3.39it/s] 29%|██▉ | 107319/371472 [8:32:03<21:38:37, 3.39it/s] 29%|██▉ | 107320/371472 [8:32:03<21:11:06, 3.46it/s] {'loss': 3.273, 'learning_rate': 7.403305393350427e-07, 'epoch': 4.62} + 29%|██▉ | 107320/371472 [8:32:03<21:11:06, 3.46it/s] 29%|██▉ | 107321/371472 [8:32:04<20:37:15, 3.56it/s] 29%|██▉ | 107322/371472 [8:32:04<20:09:22, 3.64it/s] 29%|██▉ | 107323/371472 [8:32:04<19:13:18, 3.82it/s] 29%|██▉ | 107324/371472 [8:32:04<18:52:04, 3.89it/s] 29%|██▉ | 107325/371472 [8:32:05<19:11:52, 3.82it/s] 29%|██▉ | 107326/371472 [8:32:05<19:22:50, 3.79it/s] 29%|██▉ | 107327/371472 [8:32:05<19:23:40, 3.78it/s] 29%|██▉ | 107328/371472 [8:32:06<19:34:33, 3.75it/s] 29%|██▉ | 107329/371472 [8:32:06<20:37:52, 3.56it/s] 29%|██▉ | 107330/371472 [8:32:06<19:57:46, 3.68it/s] 29%|██▉ | 107331/371472 [8:32:06<21:16:14, 3.45it/s] 29%|██▉ | 107332/371472 [8:32:07<21:45:20, 3.37it/s] 29%|██▉ | 107333/371472 [8:32:07<21:05:02, 3.48it/s] 29%|██▉ | 107334/371472 [8:32:07<20:42:48, 3.54it/s] 29%|██▉ | 107335/371472 [8:32:08<20:38:50, 3.55it/s] 29%|██▉ | 107336/371472 [8:32:08<20:14:15, 3.63it/s] 29%|██▉ | 107337/371472 [8:32:08<21:47:22, 3.37it/s] 29%|██▉ | 107338/371472 [8:32:08<21:36:34, 3.40it/s] 29%|██▉ | 107339/371472 [8:32:09<20:53:51, 3.51it/s] 29%|██▉ | 107340/371472 [8:32:09<20:34:01, 3.57it/s] {'loss': 3.5492, 'learning_rate': 7.402820573595639e-07, 'epoch': 4.62} + 29%|██▉ | 107340/371472 [8:32:09<20:34:01, 3.57it/s] 29%|██▉ | 107341/371472 [8:32:09<20:27:27, 3.59it/s] 29%|██▉ | 107342/371472 [8:32:09<19:56:11, 3.68it/s] 29%|██▉ | 107343/371472 [8:32:10<20:18:50, 3.61it/s] 29%|██▉ | 107344/371472 [8:32:10<20:17:08, 3.62it/s] 29%|██▉ | 107345/371472 [8:32:10<20:15:12, 3.62it/s] 29%|██▉ | 107346/371472 [8:32:11<19:31:52, 3.76it/s] 29%|██▉ | 107347/371472 [8:32:11<19:17:37, 3.80it/s] 29%|██▉ | 107348/371472 [8:32:11<22:09:41, 3.31it/s] 29%|██▉ | 107349/371472 [8:32:12<22:26:55, 3.27it/s] 29%|██▉ | 107350/371472 [8:32:12<22:11:23, 3.31it/s] 29%|██▉ | 107351/371472 [8:32:12<22:00:11, 3.33it/s] 29%|██▉ | 107352/371472 [8:32:12<22:07:40, 3.32it/s] 29%|██▉ | 107353/371472 [8:32:13<24:18:39, 3.02it/s] 29%|██▉ | 107354/371472 [8:32:13<22:55:59, 3.20it/s] 29%|██▉ | 107355/371472 [8:32:13<22:34:07, 3.25it/s] 29%|██▉ | 107356/371472 [8:32:14<21:38:04, 3.39it/s] 29%|██▉ | 107357/371472 [8:32:14<20:22:56, 3.60it/s] 29%|██▉ | 107358/371472 [8:32:14<19:34:12, 3.75it/s] 29%|██▉ | 107359/371472 [8:32:14<18:52:45, 3.89it/s] 29%|██▉ | 107360/371472 [8:32:15<18:33:45, 3.95it/s] {'loss': 3.3676, 'learning_rate': 7.40233575384085e-07, 'epoch': 4.62} + 29%|██▉ | 107360/371472 [8:32:15<18:33:45, 3.95it/s] 29%|██▉ | 107361/371472 [8:32:15<19:15:29, 3.81it/s] 29%|██▉ | 107362/371472 [8:32:15<19:01:07, 3.86it/s] 29%|██▉ | 107363/371472 [8:32:15<19:30:56, 3.76it/s] 29%|██▉ | 107364/371472 [8:32:16<20:10:18, 3.64it/s] 29%|██▉ | 107365/371472 [8:32:16<20:09:14, 3.64it/s] 29%|██▉ | 107366/371472 [8:32:16<19:49:27, 3.70it/s] 29%|██▉ | 107367/371472 [8:32:17<19:35:21, 3.75it/s] 29%|██▉ | 107368/371472 [8:32:17<19:51:56, 3.69it/s] 29%|██▉ | 107369/371472 [8:32:17<20:18:10, 3.61it/s] 29%|██▉ | 107370/371472 [8:32:17<20:22:21, 3.60it/s] 29%|██▉ | 107371/371472 [8:32:18<22:50:37, 3.21it/s] 29%|██▉ | 107372/371472 [8:32:18<22:14:02, 3.30it/s] 29%|██▉ | 107373/371472 [8:32:18<21:48:26, 3.36it/s] 29%|██▉ | 107374/371472 [8:32:19<21:18:01, 3.44it/s] 29%|██▉ | 107375/371472 [8:32:19<20:19:49, 3.61it/s] 29%|██▉ | 107376/371472 [8:32:19<19:55:08, 3.68it/s] 29%|██▉ | 107377/371472 [8:32:19<20:46:52, 3.53it/s] 29%|██▉ | 107378/371472 [8:32:20<21:20:14, 3.44it/s] 29%|██▉ | 107379/371472 [8:32:20<21:29:14, 3.41it/s] 29%|██▉ | 107380/371472 [8:32:20<20:20:41, 3.61it/s] {'loss': 3.4707, 'learning_rate': 7.401850934086061e-07, 'epoch': 4.63} + 29%|██▉ | 107380/371472 [8:32:20<20:20:41, 3.61it/s] 29%|██▉ | 107381/371472 [8:32:21<24:14:10, 3.03it/s] 29%|██▉ | 107382/371472 [8:32:21<22:43:48, 3.23it/s] 29%|██▉ | 107383/371472 [8:32:21<21:26:19, 3.42it/s] 29%|██▉ | 107384/371472 [8:32:22<21:11:58, 3.46it/s] 29%|██▉ | 107385/371472 [8:32:22<20:50:23, 3.52it/s] 29%|██▉ | 107386/371472 [8:32:22<21:07:32, 3.47it/s] 29%|██▉ | 107387/371472 [8:32:22<21:26:58, 3.42it/s] 29%|██▉ | 107388/371472 [8:32:23<21:19:04, 3.44it/s] 29%|██▉ | 107389/371472 [8:32:23<20:28:58, 3.58it/s] 29%|██▉ | 107390/371472 [8:32:23<20:23:41, 3.60it/s] 29%|██▉ | 107391/371472 [8:32:24<20:46:10, 3.53it/s] 29%|██▉ | 107392/371472 [8:32:24<21:07:49, 3.47it/s] 29%|██▉ | 107393/371472 [8:32:24<20:15:40, 3.62it/s] 29%|██▉ | 107394/371472 [8:32:24<19:22:03, 3.79it/s] 29%|██▉ | 107395/371472 [8:32:25<19:11:44, 3.82it/s] 29%|██▉ | 107396/371472 [8:32:25<19:39:51, 3.73it/s] 29%|██▉ | 107397/371472 [8:32:25<19:30:45, 3.76it/s] 29%|██▉ | 107398/371472 [8:32:25<21:11:56, 3.46it/s] 29%|██▉ | 107399/371472 [8:32:26<21:38:49, 3.39it/s] 29%|██▉ | 107400/371472 [8:32:26<21:51:29, 3.36it/s] {'loss': 3.3834, 'learning_rate': 7.401366114331271e-07, 'epoch': 4.63} + 29%|██▉ | 107400/371472 [8:32:26<21:51:29, 3.36it/s] 29%|██▉ | 107401/371472 [8:32:26<21:22:33, 3.43it/s] 29%|██▉ | 107402/371472 [8:32:27<21:13:16, 3.46it/s] 29%|██▉ | 107403/371472 [8:32:27<20:33:24, 3.57it/s] 29%|██▉ | 107404/371472 [8:32:27<21:12:11, 3.46it/s] 29%|██▉ | 107405/371472 [8:32:27<20:02:39, 3.66it/s] 29%|██▉ | 107406/371472 [8:32:28<19:39:39, 3.73it/s] 29%|██▉ | 107407/371472 [8:32:28<19:41:20, 3.73it/s] 29%|██▉ | 107408/371472 [8:32:28<20:14:46, 3.62it/s] 29%|██▉ | 107409/371472 [8:32:29<21:15:51, 3.45it/s] 29%|██▉ | 107410/371472 [8:32:29<20:53:01, 3.51it/s] 29%|██▉ | 107411/371472 [8:32:29<20:56:45, 3.50it/s] 29%|██▉ | 107412/371472 [8:32:29<22:02:38, 3.33it/s] 29%|██▉ | 107413/371472 [8:32:30<20:45:45, 3.53it/s] 29%|██▉ | 107414/371472 [8:32:30<20:26:20, 3.59it/s] 29%|██▉ | 107415/371472 [8:32:30<20:51:00, 3.52it/s] 29%|██▉ | 107416/371472 [8:32:31<20:33:57, 3.57it/s] 29%|██▉ | 107417/371472 [8:32:31<19:47:30, 3.71it/s] 29%|██▉ | 107418/371472 [8:32:31<19:14:30, 3.81it/s] 29%|██▉ | 107419/371472 [8:32:31<19:23:55, 3.78it/s] 29%|██▉ | 107420/371472 [8:32:32<19:04:22, 3.85it/s] {'loss': 3.5304, 'learning_rate': 7.400881294576482e-07, 'epoch': 4.63} + 29%|██▉ | 107420/371472 [8:32:32<19:04:22, 3.85it/s] 29%|██▉ | 107421/371472 [8:32:32<19:04:25, 3.85it/s] 29%|██▉ | 107422/371472 [8:32:32<20:07:49, 3.64it/s] 29%|██▉ | 107423/371472 [8:32:32<19:34:06, 3.75it/s] 29%|██▉ | 107424/371472 [8:32:33<21:16:16, 3.45it/s] 29%|██▉ | 107425/371472 [8:32:33<20:27:58, 3.58it/s] 29%|██▉ | 107426/371472 [8:32:33<20:13:34, 3.63it/s] 29%|██▉ | 107427/371472 [8:32:33<20:03:43, 3.66it/s] 29%|██▉ | 107428/371472 [8:32:34<19:16:58, 3.80it/s] 29%|██▉ | 107429/371472 [8:32:34<19:08:12, 3.83it/s] 29%|██▉ | 107430/371472 [8:32:34<19:19:54, 3.79it/s] 29%|██▉ | 107431/371472 [8:32:35<21:04:29, 3.48it/s] 29%|██▉ | 107432/371472 [8:32:35<21:45:55, 3.37it/s] 29%|██▉ | 107433/371472 [8:32:35<22:31:27, 3.26it/s] 29%|██▉ | 107434/371472 [8:32:35<21:23:30, 3.43it/s] 29%|██▉ | 107435/371472 [8:32:36<20:15:57, 3.62it/s] 29%|██▉ | 107436/371472 [8:32:36<20:30:32, 3.58it/s] 29%|██▉ | 107437/371472 [8:32:36<21:05:23, 3.48it/s] 29%|██▉ | 107438/371472 [8:32:37<21:04:05, 3.48it/s] 29%|██▉ | 107439/371472 [8:32:37<20:26:42, 3.59it/s] 29%|██▉ | 107440/371472 [8:32:37<20:31:07, 3.57it/s] {'loss': 3.4311, 'learning_rate': 7.400396474821694e-07, 'epoch': 4.63} + 29%|██▉ | 107440/371472 [8:32:37<20:31:07, 3.57it/s] 29%|██▉ | 107441/371472 [8:32:37<21:48:57, 3.36it/s] 29%|██▉ | 107442/371472 [8:32:38<22:11:46, 3.30it/s] 29%|██▉ | 107443/371472 [8:32:38<21:28:13, 3.42it/s] 29%|██▉ | 107444/371472 [8:32:38<23:50:04, 3.08it/s] 29%|██▉ | 107445/371472 [8:32:39<22:13:26, 3.30it/s] 29%|██▉ | 107446/371472 [8:32:39<21:33:28, 3.40it/s] 29%|██▉ | 107447/371472 [8:32:39<20:50:16, 3.52it/s] 29%|██▉ | 107448/371472 [8:32:40<20:06:12, 3.65it/s] 29%|██▉ | 107449/371472 [8:32:40<19:25:02, 3.78it/s] 29%|██▉ | 107450/371472 [8:32:40<20:16:31, 3.62it/s] 29%|██▉ | 107451/371472 [8:32:40<19:47:52, 3.70it/s] 29%|██▉ | 107452/371472 [8:32:41<22:33:25, 3.25it/s] 29%|██▉ | 107453/371472 [8:32:41<21:27:02, 3.42it/s] 29%|██▉ | 107454/371472 [8:32:41<21:31:03, 3.41it/s] 29%|██▉ | 107455/371472 [8:32:42<20:33:11, 3.57it/s] 29%|██▉ | 107456/371472 [8:32:42<20:30:59, 3.57it/s] 29%|██▉ | 107457/371472 [8:32:42<19:47:38, 3.71it/s] 29%|██▉ | 107458/371472 [8:32:42<19:26:44, 3.77it/s] 29%|██▉ | 107459/371472 [8:32:43<19:12:44, 3.82it/s] 29%|██▉ | 107460/371472 [8:32:43<20:01:52, 3.66it/s] {'loss': 3.5663, 'learning_rate': 7.399911655066905e-07, 'epoch': 4.63} + 29%|██▉ | 107460/371472 [8:32:43<20:01:52, 3.66it/s] 29%|██▉ | 107461/371472 [8:32:43<19:46:08, 3.71it/s] 29%|██▉ | 107462/371472 [8:32:43<19:38:17, 3.73it/s] 29%|██▉ | 107463/371472 [8:32:44<19:40:16, 3.73it/s] 29%|██▉ | 107464/371472 [8:32:44<19:10:23, 3.82it/s] 29%|██▉ | 107465/371472 [8:32:44<19:22:05, 3.79it/s] 29%|██▉ | 107466/371472 [8:32:44<19:42:39, 3.72it/s] 29%|██▉ | 107467/371472 [8:32:45<20:33:59, 3.57it/s] 29%|██▉ | 107468/371472 [8:32:45<20:44:07, 3.54it/s] 29%|██▉ | 107469/371472 [8:32:45<19:51:19, 3.69it/s] 29%|██▉ | 107470/371472 [8:32:46<19:18:39, 3.80it/s] 29%|██▉ | 107471/371472 [8:32:46<20:24:06, 3.59it/s] 29%|██▉ | 107472/371472 [8:32:46<19:34:16, 3.75it/s] 29%|██▉ | 107473/371472 [8:32:46<19:28:24, 3.77it/s] 29%|██▉ | 107474/371472 [8:32:47<19:33:52, 3.75it/s] 29%|██▉ | 107475/371472 [8:32:47<18:51:40, 3.89it/s] 29%|██▉ | 107476/371472 [8:32:47<19:58:22, 3.67it/s] 29%|██▉ | 107477/371472 [8:32:47<20:19:24, 3.61it/s] 29%|██▉ | 107478/371472 [8:32:48<19:59:54, 3.67it/s] 29%|██▉ | 107479/371472 [8:32:48<20:03:33, 3.66it/s] 29%|██▉ | 107480/371472 [8:32:48<20:27:52, 3.58it/s] {'loss': 3.4356, 'learning_rate': 7.399426835312116e-07, 'epoch': 4.63} + 29%|██�� | 107480/371472 [8:32:48<20:27:52, 3.58it/s] 29%|██▉ | 107481/371472 [8:32:49<20:58:57, 3.49it/s] 29%|██▉ | 107482/371472 [8:32:49<21:59:08, 3.34it/s] 29%|██▉ | 107483/371472 [8:32:49<21:01:27, 3.49it/s] 29%|██▉ | 107484/371472 [8:32:49<19:54:18, 3.68it/s] 29%|██▉ | 107485/371472 [8:32:50<20:28:49, 3.58it/s] 29%|██▉ | 107486/371472 [8:32:50<21:50:33, 3.36it/s] 29%|██▉ | 107487/371472 [8:32:50<21:33:42, 3.40it/s] 29%|██▉ | 107488/371472 [8:32:51<20:23:48, 3.60it/s] 29%|██▉ | 107489/371472 [8:32:51<21:32:23, 3.40it/s] 29%|██▉ | 107490/371472 [8:32:51<23:02:59, 3.18it/s] 29%|██▉ | 107491/371472 [8:32:52<22:35:57, 3.24it/s] 29%|██▉ | 107492/371472 [8:32:52<22:52:53, 3.20it/s] 29%|██▉ | 107493/371472 [8:32:52<23:53:07, 3.07it/s] 29%|██▉ | 107494/371472 [8:32:52<22:39:38, 3.24it/s] 29%|██▉ | 107495/371472 [8:32:53<23:03:00, 3.18it/s] 29%|██▉ | 107496/371472 [8:32:53<22:09:45, 3.31it/s] 29%|██▉ | 107497/371472 [8:32:53<22:12:34, 3.30it/s] 29%|██▉ | 107498/371472 [8:32:54<21:28:55, 3.41it/s] 29%|██▉ | 107499/371472 [8:32:54<20:30:18, 3.58it/s] 29%|██▉ | 107500/371472 [8:32:54<20:49:14, 3.52it/s] {'loss': 3.5589, 'learning_rate': 7.398942015557327e-07, 'epoch': 4.63} + 29%|██▉ | 107500/371472 [8:32:54<20:49:14, 3.52it/s] 29%|██▉ | 107501/371472 [8:32:54<20:01:21, 3.66it/s] 29%|██▉ | 107502/371472 [8:32:55<21:09:17, 3.47it/s] 29%|██▉ | 107503/371472 [8:32:55<20:33:18, 3.57it/s] 29%|██▉ | 107504/371472 [8:32:55<20:21:56, 3.60it/s] 29%|██▉ | 107505/371472 [8:32:56<20:17:51, 3.61it/s] 29%|██▉ | 107506/371472 [8:32:56<20:33:10, 3.57it/s] 29%|██▉ | 107507/371472 [8:32:56<20:18:43, 3.61it/s] 29%|██▉ | 107508/371472 [8:32:56<19:59:39, 3.67it/s] 29%|██▉ | 107509/371472 [8:32:57<20:03:43, 3.65it/s] 29%|██▉ | 107510/371472 [8:32:57<19:32:22, 3.75it/s] 29%|██▉ | 107511/371472 [8:32:57<19:47:18, 3.71it/s] 29%|██▉ | 107512/371472 [8:32:58<20:13:50, 3.62it/s] 29%|██▉ | 107513/371472 [8:32:58<20:27:13, 3.58it/s] 29%|██▉ | 107514/371472 [8:32:58<21:52:26, 3.35it/s] 29%|██▉ | 107515/371472 [8:32:58<21:35:40, 3.40it/s] 29%|██▉ | 107516/371472 [8:32:59<21:50:29, 3.36it/s] 29%|██▉ | 107517/371472 [8:32:59<21:13:30, 3.45it/s] 29%|██▉ | 107518/371472 [8:32:59<20:10:14, 3.63it/s] 29%|██▉ | 107519/371472 [8:32:59<19:32:43, 3.75it/s] 29%|██▉ | 107520/371472 [8:33:00<19:14:53, 3.81it/s] {'loss': 3.4957, 'learning_rate': 7.398457195802538e-07, 'epoch': 4.63} + 29%|██▉ | 107520/371472 [8:33:00<19:14:53, 3.81it/s] 29%|██▉ | 107521/371472 [8:33:00<19:55:49, 3.68it/s] 29%|██▉ | 107522/371472 [8:33:00<19:48:03, 3.70it/s] 29%|██▉ | 107523/371472 [8:33:01<19:30:41, 3.76it/s] 29%|██▉ | 107524/371472 [8:33:01<19:58:03, 3.67it/s] 29%|██▉ | 107525/371472 [8:33:01<19:34:42, 3.74it/s] 29%|██▉ | 107526/371472 [8:33:01<19:16:17, 3.80it/s] 29%|██▉ | 107527/371472 [8:33:02<19:07:43, 3.83it/s] 29%|██▉ | 107528/371472 [8:33:02<18:43:43, 3.91it/s] 29%|██▉ | 107529/371472 [8:33:02<19:19:13, 3.79it/s] 29%|██▉ | 107530/371472 [8:33:02<21:22:45, 3.43it/s] 29%|██▉ | 107531/371472 [8:33:03<21:37:12, 3.39it/s] 29%|██▉ | 107532/371472 [8:33:03<21:55:52, 3.34it/s] 29%|██▉ | 107533/371472 [8:33:03<21:19:13, 3.44it/s] 29%|██▉ | 107534/371472 [8:33:04<21:19:57, 3.44it/s] 29%|██▉ | 107535/371472 [8:33:04<21:07:38, 3.47it/s] 29%|██▉ | 107536/371472 [8:33:04<21:15:12, 3.45it/s] 29%|██▉ | 107537/371472 [8:33:05<22:45:41, 3.22it/s] 29%|██▉ | 107538/371472 [8:33:05<22:17:54, 3.29it/s] 29%|██▉ | 107539/371472 [8:33:05<22:11:10, 3.30it/s] 29%|██▉ | 107540/371472 [8:33:05<21:31:20, 3.41it/s] {'loss': 3.3148, 'learning_rate': 7.397972376047749e-07, 'epoch': 4.63} + 29%|██▉ | 107540/371472 [8:33:05<21:31:20, 3.41it/s] 29%|██▉ | 107541/371472 [8:33:06<21:31:59, 3.40it/s] 29%|██▉ | 107542/371472 [8:33:06<21:19:25, 3.44it/s] 29%|██▉ | 107543/371472 [8:33:06<21:50:37, 3.36it/s] 29%|██▉ | 107544/371472 [8:33:07<20:31:31, 3.57it/s] 29%|██▉ | 107545/371472 [8:33:07<20:55:19, 3.50it/s] 29%|██▉ | 107546/371472 [8:33:07<20:36:22, 3.56it/s] 29%|██▉ | 107547/371472 [8:33:07<20:46:52, 3.53it/s] 29%|██▉ | 107548/371472 [8:33:08<20:35:04, 3.56it/s] 29%|██▉ | 107549/371472 [8:33:08<22:53:49, 3.20it/s] 29%|██▉ | 107550/371472 [8:33:08<22:49:26, 3.21it/s] 29%|██▉ | 107551/371472 [8:33:09<22:45:23, 3.22it/s] 29%|██▉ | 107552/371472 [8:33:09<23:44:42, 3.09it/s] 29%|██▉ | 107553/371472 [8:33:09<22:51:48, 3.21it/s] 29%|██▉ | 107554/371472 [8:33:10<22:45:40, 3.22it/s] 29%|██▉ | 107555/371472 [8:33:10<22:35:17, 3.25it/s] 29%|██▉ | 107556/371472 [8:33:10<21:28:11, 3.41it/s] 29%|██▉ | 107557/371472 [8:33:10<20:13:28, 3.62it/s] 29%|██▉ | 107558/371472 [8:33:11<19:58:04, 3.67it/s] 29%|██▉ | 107559/371472 [8:33:11<21:30:58, 3.41it/s] 29%|██▉ | 107560/371472 [8:33:11<21:19:13, 3.44it/s] {'loss': 3.3393, 'learning_rate': 7.39748755629296e-07, 'epoch': 4.63} + 29%|██▉ | 107560/371472 [8:33:11<21:19:13, 3.44it/s] 29%|██▉ | 107561/371472 [8:33:12<20:28:30, 3.58it/s] 29%|██▉ | 107562/371472 [8:33:12<20:28:57, 3.58it/s] 29%|██▉ | 107563/371472 [8:33:12<20:18:55, 3.61it/s] 29%|██▉ | 107564/371472 [8:33:12<20:47:25, 3.53it/s] 29%|██▉ | 107565/371472 [8:33:13<19:52:06, 3.69it/s] 29%|██▉ | 107566/371472 [8:33:13<19:39:16, 3.73it/s] 29%|██▉ | 107567/371472 [8:33:13<20:18:06, 3.61it/s] 29%|██▉ | 107568/371472 [8:33:14<20:43:22, 3.54it/s] 29%|██▉ | 107569/371472 [8:33:14<22:18:51, 3.29it/s] 29%|██▉ | 107570/371472 [8:33:14<22:35:07, 3.25it/s] 29%|██▉ | 107571/371472 [8:33:14<21:38:23, 3.39it/s] 29%|██▉ | 107572/371472 [8:33:15<20:49:28, 3.52it/s] 29%|██▉ | 107573/371472 [8:33:15<20:09:48, 3.64it/s] 29%|██▉ | 107574/371472 [8:33:15<20:02:00, 3.66it/s] 29%|██▉ | 107575/371472 [8:33:16<20:43:32, 3.54it/s] 29%|██▉ | 107576/371472 [8:33:16<20:57:37, 3.50it/s] 29%|██▉ | 107577/371472 [8:33:16<20:29:30, 3.58it/s] 29%|██▉ | 107578/371472 [8:33:16<21:08:39, 3.47it/s] 29%|██▉ | 107579/371472 [8:33:17<21:45:34, 3.37it/s] 29%|██▉ | 107580/371472 [8:33:17<20:39:26, 3.55it/s] {'loss': 3.3857, 'learning_rate': 7.397002736538171e-07, 'epoch': 4.63} + 29%|██▉ | 107580/371472 [8:33:17<20:39:26, 3.55it/s] 29%|██▉ | 107581/371472 [8:33:17<22:17:43, 3.29it/s] 29%|██▉ | 107582/371472 [8:33:18<21:11:44, 3.46it/s] 29%|██▉ | 107583/371472 [8:33:18<20:56:11, 3.50it/s] 29%|██▉ | 107584/371472 [8:33:18<21:08:00, 3.47it/s] 29%|██▉ | 107585/371472 [8:33:18<20:15:03, 3.62it/s] 29%|██▉ | 107586/371472 [8:33:19<20:29:49, 3.58it/s] 29%|██▉ | 107587/371472 [8:33:19<19:53:09, 3.69it/s] 29%|██▉ | 107588/371472 [8:33:19<19:37:39, 3.73it/s] 29%|██▉ | 107589/371472 [8:33:20<20:11:00, 3.63it/s] 29%|██▉ | 107590/371472 [8:33:20<20:22:38, 3.60it/s] 29%|██▉ | 107591/371472 [8:33:20<20:49:48, 3.52it/s] 29%|██▉ | 107592/371472 [8:33:20<21:47:49, 3.36it/s] 29%|██▉ | 107593/371472 [8:33:21<21:36:41, 3.39it/s] 29%|██▉ | 107594/371472 [8:33:21<20:44:32, 3.53it/s] 29%|██▉ | 107595/371472 [8:33:21<20:39:09, 3.55it/s] 29%|██▉ | 107596/371472 [8:33:22<20:05:21, 3.65it/s] 29%|██▉ | 107597/371472 [8:33:22<20:32:06, 3.57it/s] 29%|██▉ | 107598/371472 [8:33:22<20:33:34, 3.57it/s] 29%|██▉ | 107599/371472 [8:33:22<20:30:07, 3.58it/s] 29%|██▉ | 107600/371472 [8:33:23<20:35:28, 3.56it/s] {'loss': 3.2937, 'learning_rate': 7.396517916783383e-07, 'epoch': 4.63} + 29%|██▉ | 107600/371472 [8:33:23<20:35:28, 3.56it/s] 29%|██▉ | 107601/371472 [8:33:23<22:07:44, 3.31it/s] 29%|██▉ | 107602/371472 [8:33:23<21:15:22, 3.45it/s] 29%|██▉ | 107603/371472 [8:33:24<21:09:41, 3.46it/s] 29%|██▉ | 107604/371472 [8:33:24<20:20:38, 3.60it/s] 29%|██▉ | 107605/371472 [8:33:24<19:44:17, 3.71it/s] 29%|██▉ | 107606/371472 [8:33:24<20:12:05, 3.63it/s] 29%|██▉ | 107607/371472 [8:33:25<20:00:32, 3.66it/s] 29%|██▉ | 107608/371472 [8:33:25<20:30:28, 3.57it/s] 29%|██▉ | 107609/371472 [8:33:25<20:47:13, 3.53it/s] 29%|██▉ | 107610/371472 [8:33:25<20:47:13, 3.53it/s] 29%|██▉ | 107611/371472 [8:33:26<21:04:51, 3.48it/s] 29%|██▉ | 107612/371472 [8:33:26<20:14:05, 3.62it/s] 29%|██▉ | 107613/371472 [8:33:26<20:28:57, 3.58it/s] 29%|██▉ | 107614/371472 [8:33:27<21:23:11, 3.43it/s] 29%|██▉ | 107615/371472 [8:33:27<20:56:07, 3.50it/s] 29%|██▉ | 107616/371472 [8:33:27<20:34:55, 3.56it/s] 29%|██▉ | 107617/371472 [8:33:27<20:03:29, 3.65it/s] 29%|██▉ | 107618/371472 [8:33:28<19:35:54, 3.74it/s] 29%|██▉ | 107619/371472 [8:33:28<20:20:07, 3.60it/s] 29%|██▉ | 107620/371472 [8:33:28<21:00:23, 3.49it/s] {'loss': 3.1365, 'learning_rate': 7.396033097028594e-07, 'epoch': 4.64} + 29%|██▉ | 107620/371472 [8:33:28<21:00:23, 3.49it/s] 29%|██▉ | 107621/371472 [8:33:29<21:02:36, 3.48it/s] 29%|██▉ | 107622/371472 [8:33:29<20:52:45, 3.51it/s] 29%|██▉ | 107623/371472 [8:33:29<21:01:59, 3.48it/s] 29%|██▉ | 107624/371472 [8:33:29<20:30:10, 3.57it/s] 29%|██▉ | 107625/371472 [8:33:30<19:47:40, 3.70it/s] 29%|██▉ | 107626/371472 [8:33:30<22:11:14, 3.30it/s] 29%|██▉ | 107627/371472 [8:33:30<21:13:33, 3.45it/s] 29%|██▉ | 107628/371472 [8:33:31<21:23:10, 3.43it/s] 29%|██▉ | 107629/371472 [8:33:31<22:38:59, 3.24it/s] 29%|██▉ | 107630/371472 [8:33:31<21:30:48, 3.41it/s] 29%|██▉ | 107631/371472 [8:33:31<21:09:45, 3.46it/s] 29%|██▉ | 107632/371472 [8:33:32<21:16:07, 3.45it/s] 29%|██▉ | 107633/371472 [8:33:32<20:33:32, 3.56it/s] 29%|██▉ | 107634/371472 [8:33:32<21:37:18, 3.39it/s] 29%|██▉ | 107635/371472 [8:33:33<20:39:32, 3.55it/s] 29%|██▉ | 107636/371472 [8:33:33<20:17:20, 3.61it/s] 29%|██▉ | 107637/371472 [8:33:33<21:16:26, 3.44it/s] 29%|██▉ | 107638/371472 [8:33:34<21:48:24, 3.36it/s] 29%|██▉ | 107639/371472 [8:33:34<20:44:28, 3.53it/s] 29%|██▉ | 107640/371472 [8:33:34<20:52:07, 3.51it/s] {'loss': 3.2864, 'learning_rate': 7.395548277273805e-07, 'epoch': 4.64} + 29%|██▉ | 107640/371472 [8:33:34<20:52:07, 3.51it/s] 29%|██▉ | 107641/371472 [8:33:34<21:59:22, 3.33it/s] 29%|██▉ | 107642/371472 [8:33:35<21:57:09, 3.34it/s] 29%|██▉ | 107643/371472 [8:33:35<21:15:24, 3.45it/s] 29%|██▉ | 107644/371472 [8:33:35<20:01:17, 3.66it/s] 29%|██▉ | 107645/371472 [8:33:36<22:05:23, 3.32it/s] 29%|██▉ | 107646/371472 [8:33:36<21:17:27, 3.44it/s] 29%|██▉ | 107647/371472 [8:33:36<21:18:42, 3.44it/s] 29%|██▉ | 107648/371472 [8:33:36<20:58:30, 3.49it/s] 29%|██▉ | 107649/371472 [8:33:37<22:16:09, 3.29it/s] 29%|██▉ | 107650/371472 [8:33:37<21:38:32, 3.39it/s] 29%|██▉ | 107651/371472 [8:33:37<20:51:05, 3.51it/s] 29%|██▉ | 107652/371472 [8:33:38<20:40:14, 3.55it/s] 29%|██▉ | 107653/371472 [8:33:38<20:50:04, 3.52it/s] 29%|██▉ | 107654/371472 [8:33:38<20:07:47, 3.64it/s] 29%|██▉ | 107655/371472 [8:33:38<20:38:07, 3.55it/s] 29%|██▉ | 107656/371472 [8:33:39<20:28:21, 3.58it/s] 29%|██▉ | 107657/371472 [8:33:39<20:42:09, 3.54it/s] 29%|██▉ | 107658/371472 [8:33:39<21:08:52, 3.47it/s] 29%|██▉ | 107659/371472 [8:33:40<20:40:33, 3.54it/s] 29%|██▉ | 107660/371472 [8:33:40<20:54:48, 3.50it/s] {'loss': 3.4913, 'learning_rate': 7.395063457519015e-07, 'epoch': 4.64} + 29%|██▉ | 107660/371472 [8:33:40<20:54:48, 3.50it/s] 29%|██▉ | 107661/371472 [8:33:40<20:26:11, 3.59it/s] 29%|██▉ | 107662/371472 [8:33:40<20:34:43, 3.56it/s] 29%|██▉ | 107663/371472 [8:33:41<20:35:27, 3.56it/s] 29%|██▉ | 107664/371472 [8:33:41<20:21:58, 3.60it/s] 29%|██▉ | 107665/371472 [8:33:41<20:53:28, 3.51it/s] 29%|██▉ | 107666/371472 [8:33:42<20:56:11, 3.50it/s] 29%|██▉ | 107667/371472 [8:33:42<20:45:05, 3.53it/s] 29%|██▉ | 107668/371472 [8:33:42<21:51:40, 3.35it/s] 29%|██▉ | 107669/371472 [8:33:42<20:57:31, 3.50it/s] 29%|██▉ | 107670/371472 [8:33:43<22:19:46, 3.28it/s] 29%|██▉ | 107671/371472 [8:33:43<23:42:03, 3.09it/s] 29%|██▉ | 107672/371472 [8:33:43<22:08:26, 3.31it/s] 29%|██▉ | 107673/371472 [8:33:44<21:10:20, 3.46it/s] 29%|██▉ | 107674/371472 [8:33:44<20:12:06, 3.63it/s] 29%|██▉ | 107675/371472 [8:33:44<20:15:41, 3.62it/s] 29%|██▉ | 107676/371472 [8:33:45<22:59:01, 3.19it/s] 29%|██▉ | 107677/371472 [8:33:45<22:24:52, 3.27it/s] 29%|██▉ | 107678/371472 [8:33:45<21:15:12, 3.45it/s] 29%|██▉ | 107679/371472 [8:33:45<21:04:51, 3.48it/s] 29%|██▉ | 107680/371472 [8:33:46<20:23:31, 3.59it/s] {'loss': 3.3579, 'learning_rate': 7.394578637764226e-07, 'epoch': 4.64} + 29%|██▉ | 107680/371472 [8:33:46<20:23:31, 3.59it/s] 29%|██▉ | 107681/371472 [8:33:46<21:43:21, 3.37it/s] 29%|██▉ | 107682/371472 [8:33:46<21:08:16, 3.47it/s] 29%|██▉ | 107683/371472 [8:33:46<20:16:16, 3.61it/s] 29%|██▉ | 107684/371472 [8:33:47<21:35:34, 3.39it/s] 29%|██▉ | 107685/371472 [8:33:47<20:33:20, 3.56it/s] 29%|██▉ | 107686/371472 [8:33:47<20:12:00, 3.63it/s] 29%|██▉ | 107687/371472 [8:33:48<19:16:37, 3.80it/s] 29%|██▉ | 107688/371472 [8:33:48<21:44:12, 3.37it/s] 29%|██▉ | 107689/371472 [8:33:48<21:07:00, 3.47it/s] 29%|██▉ | 107690/371472 [8:33:48<21:04:50, 3.48it/s] 29%|██▉ | 107691/371472 [8:33:49<20:34:52, 3.56it/s] 29%|██▉ | 107692/371472 [8:33:49<24:25:01, 3.00it/s] 29%|██▉ | 107693/371472 [8:33:49<22:38:07, 3.24it/s] 29%|██▉ | 107694/371472 [8:33:50<21:41:03, 3.38it/s] 29%|██▉ | 107695/371472 [8:33:50<21:42:05, 3.38it/s] 29%|██▉ | 107696/371472 [8:33:50<21:20:10, 3.43it/s] 29%|██▉ | 107697/371472 [8:33:51<20:12:34, 3.63it/s] 29%|██▉ | 107698/371472 [8:33:51<19:47:58, 3.70it/s] 29%|██▉ | 107699/371472 [8:33:51<19:19:14, 3.79it/s] 29%|██▉ | 107700/371472 [8:33:51<20:47:57, 3.52it/s] {'loss': 3.3795, 'learning_rate': 7.394093818009437e-07, 'epoch': 4.64} + 29%|██▉ | 107700/371472 [8:33:51<20:47:57, 3.52it/s] 29%|██▉ | 107701/371472 [8:33:52<20:46:46, 3.53it/s] 29%|██▉ | 107702/371472 [8:33:52<21:09:45, 3.46it/s] 29%|██▉ | 107703/371472 [8:33:52<20:28:45, 3.58it/s] 29%|██▉ | 107704/371472 [8:33:52<19:49:16, 3.70it/s] 29%|██▉ | 107705/371472 [8:33:53<20:48:39, 3.52it/s] 29%|██▉ | 107706/371472 [8:33:53<20:09:51, 3.63it/s] 29%|██▉ | 107707/371472 [8:33:53<20:38:23, 3.55it/s] 29%|██▉ | 107708/371472 [8:33:54<20:47:26, 3.52it/s] 29%|██▉ | 107709/371472 [8:33:54<20:03:25, 3.65it/s] 29%|██▉ | 107710/371472 [8:33:54<20:08:07, 3.64it/s] 29%|██▉ | 107711/371472 [8:33:54<19:39:52, 3.73it/s] 29%|██▉ | 107712/371472 [8:33:55<20:12:30, 3.63it/s] 29%|██▉ | 107713/371472 [8:33:55<20:07:51, 3.64it/s] 29%|██▉ | 107714/371472 [8:33:55<19:31:15, 3.75it/s] 29%|██▉ | 107715/371472 [8:33:56<20:02:30, 3.66it/s] 29%|██▉ | 107716/371472 [8:33:56<20:53:48, 3.51it/s] 29%|██▉ | 107717/371472 [8:33:56<20:23:48, 3.59it/s] 29%|██▉ | 107718/371472 [8:33:56<19:57:40, 3.67it/s] 29%|██▉ | 107719/371472 [8:33:57<19:58:57, 3.67it/s] 29%|██▉ | 107720/371472 [8:33:57<20:21:46, 3.60it/s] {'loss': 3.4284, 'learning_rate': 7.393608998254647e-07, 'epoch': 4.64} + 29%|██▉ | 107720/371472 [8:33:57<20:21:46, 3.60it/s] 29%|██▉ | 107721/371472 [8:33:57<20:46:24, 3.53it/s] 29%|██▉ | 107722/371472 [8:33:57<21:09:11, 3.46it/s] 29%|██▉ | 107723/371472 [8:33:58<20:24:06, 3.59it/s] 29%|██▉ | 107724/371472 [8:33:58<23:10:34, 3.16it/s] 29%|██▉ | 107725/371472 [8:33:58<22:52:07, 3.20it/s] 29%|██▉ | 107726/371472 [8:33:59<22:09:07, 3.31it/s] 29%|██▉ | 107727/371472 [8:33:59<20:55:47, 3.50it/s] 29%|██▉ | 107728/371472 [8:33:59<20:36:50, 3.55it/s] 29%|██▉ | 107729/371472 [8:34:00<21:40:03, 3.38it/s] 29%|██▉ | 107730/371472 [8:34:00<21:45:38, 3.37it/s] 29%|██▉ | 107731/371472 [8:34:00<21:42:41, 3.37it/s] 29%|██▉ | 107732/371472 [8:34:00<21:52:25, 3.35it/s] 29%|██▉ | 107733/371472 [8:34:01<20:43:52, 3.53it/s] 29%|██▉ | 107734/371472 [8:34:01<19:49:03, 3.70it/s] 29%|██▉ | 107735/371472 [8:34:01<20:49:01, 3.52it/s] 29%|██▉ | 107736/371472 [8:34:02<22:51:23, 3.21it/s] 29%|██▉ | 107737/371472 [8:34:02<21:47:09, 3.36it/s] 29%|██▉ | 107738/371472 [8:34:02<22:33:29, 3.25it/s] 29%|██▉ | 107739/371472 [8:34:03<24:33:26, 2.98it/s] 29%|██▉ | 107740/371472 [8:34:03<25:29:46, 2.87it/s] {'loss': 3.5511, 'learning_rate': 7.39312417849986e-07, 'epoch': 4.64} + 29%|██▉ | 107740/371472 [8:34:03<25:29:46, 2.87it/s] 29%|██▉ | 107741/371472 [8:34:03<23:41:39, 3.09it/s] 29%|██▉ | 107742/371472 [8:34:04<22:20:50, 3.28it/s] 29%|██▉ | 107743/371472 [8:34:04<21:11:48, 3.46it/s] 29%|██▉ | 107744/371472 [8:34:04<20:43:32, 3.53it/s] 29%|██▉ | 107745/371472 [8:34:04<19:44:51, 3.71it/s] 29%|██▉ | 107746/371472 [8:34:05<19:11:47, 3.82it/s] 29%|██▉ | 107747/371472 [8:34:05<19:06:11, 3.83it/s] 29%|██▉ | 107748/371472 [8:34:05<20:34:11, 3.56it/s] 29%|██▉ | 107749/371472 [8:34:05<20:52:34, 3.51it/s] 29%|██▉ | 107750/371472 [8:34:06<20:30:22, 3.57it/s] 29%|██▉ | 107751/371472 [8:34:06<22:45:10, 3.22it/s] 29%|██▉ | 107752/371472 [8:34:06<21:28:17, 3.41it/s] 29%|██▉ | 107753/371472 [8:34:07<21:22:29, 3.43it/s] 29%|██▉ | 107754/371472 [8:34:07<21:09:00, 3.46it/s] 29%|██▉ | 107755/371472 [8:34:07<20:38:33, 3.55it/s] 29%|██▉ | 107756/371472 [8:34:08<21:58:28, 3.33it/s] 29%|██▉ | 107757/371472 [8:34:08<21:40:52, 3.38it/s] 29%|██▉ | 107758/371472 [8:34:08<21:14:25, 3.45it/s] 29%|██▉ | 107759/371472 [8:34:08<21:25:00, 3.42it/s] 29%|██▉ | 107760/371472 [8:34:09<21:18:25, 3.44it/s] {'loss': 3.5303, 'learning_rate': 7.392639358745071e-07, 'epoch': 4.64} + 29%|██▉ | 107760/371472 [8:34:09<21:18:25, 3.44it/s] 29%|██▉ | 107761/371472 [8:34:09<20:39:30, 3.55it/s] 29%|██▉ | 107762/371472 [8:34:09<20:14:14, 3.62it/s] 29%|██▉ | 107763/371472 [8:34:09<20:35:56, 3.56it/s] 29%|██▉ | 107764/371472 [8:34:10<20:44:59, 3.53it/s] 29%|██▉ | 107765/371472 [8:34:10<20:02:37, 3.65it/s] 29%|██▉ | 107766/371472 [8:34:10<19:59:19, 3.66it/s] 29%|██▉ | 107767/371472 [8:34:11<20:30:28, 3.57it/s] 29%|██▉ | 107768/371472 [8:34:11<19:55:14, 3.68it/s] 29%|██▉ | 107769/371472 [8:34:11<19:48:52, 3.70it/s] 29%|██▉ | 107770/371472 [8:34:11<19:47:14, 3.70it/s] 29%|██▉ | 107771/371472 [8:34:12<20:53:19, 3.51it/s] 29%|██▉ | 107772/371472 [8:34:12<21:06:14, 3.47it/s] 29%|██▉ | 107773/371472 [8:34:12<20:38:05, 3.55it/s] 29%|██▉ | 107774/371472 [8:34:13<22:05:51, 3.31it/s] 29%|██▉ | 107775/371472 [8:34:13<22:31:32, 3.25it/s] 29%|██▉ | 107776/371472 [8:34:13<23:06:37, 3.17it/s] 29%|██▉ | 107777/371472 [8:34:14<22:31:28, 3.25it/s] 29%|██▉ | 107778/371472 [8:34:14<22:20:11, 3.28it/s] 29%|██▉ | 107779/371472 [8:34:14<21:31:49, 3.40it/s] 29%|██▉ | 107780/371472 [8:34:14<20:36:10, 3.56it/s] {'loss': 3.4329, 'learning_rate': 7.392154538990281e-07, 'epoch': 4.64} + 29%|██▉ | 107780/371472 [8:34:14<20:36:10, 3.56it/s] 29%|██▉ | 107781/371472 [8:34:15<19:58:03, 3.67it/s] 29%|██▉ | 107782/371472 [8:34:15<19:53:15, 3.68it/s] 29%|██▉ | 107783/371472 [8:34:15<21:18:33, 3.44it/s] 29%|██▉ | 107784/371472 [8:34:16<21:13:01, 3.45it/s] 29%|██▉ | 107785/371472 [8:34:16<20:48:58, 3.52it/s] 29%|██▉ | 107786/371472 [8:34:16<22:00:06, 3.33it/s] 29%|██▉ | 107787/371472 [8:34:16<20:52:22, 3.51it/s] 29%|██▉ | 107788/371472 [8:34:17<20:24:29, 3.59it/s] 29%|██▉ | 107789/371472 [8:34:17<20:20:44, 3.60it/s] 29%|██▉ | 107790/371472 [8:34:17<21:14:34, 3.45it/s] 29%|██▉ | 107791/371472 [8:34:18<21:43:24, 3.37it/s] 29%|██▉ | 107792/371472 [8:34:18<20:33:54, 3.56it/s] 29%|██▉ | 107793/371472 [8:34:18<21:26:41, 3.42it/s] 29%|██▉ | 107794/371472 [8:34:18<22:26:11, 3.26it/s] 29%|██▉ | 107795/371472 [8:34:19<21:00:28, 3.49it/s] 29%|██▉ | 107796/371472 [8:34:19<23:02:56, 3.18it/s] 29%|██▉ | 107797/371472 [8:34:19<23:13:47, 3.15it/s] 29%|██▉ | 107798/371472 [8:34:20<24:40:17, 2.97it/s] 29%|██▉ | 107799/371472 [8:34:20<23:09:18, 3.16it/s] 29%|██▉ | 107800/371472 [8:34:20<22:12:24, 3.30it/s] {'loss': 3.2984, 'learning_rate': 7.391669719235492e-07, 'epoch': 4.64} + 29%|██▉ | 107800/371472 [8:34:20<22:12:24, 3.30it/s] 29%|██▉ | 107801/371472 [8:34:21<21:45:45, 3.37it/s] 29%|██▉ | 107802/371472 [8:34:21<21:23:22, 3.42it/s] 29%|██▉ | 107803/371472 [8:34:21<21:46:16, 3.36it/s] 29%|██▉ | 107804/371472 [8:34:22<23:04:55, 3.17it/s] 29%|██▉ | 107805/371472 [8:34:22<21:24:41, 3.42it/s] 29%|██▉ | 107806/371472 [8:34:22<21:24:52, 3.42it/s] 29%|██▉ | 107807/371472 [8:34:22<20:30:45, 3.57it/s] 29%|██▉ | 107808/371472 [8:34:23<21:36:16, 3.39it/s] 29%|██▉ | 107809/371472 [8:34:23<20:55:13, 3.50it/s] 29%|██▉ | 107810/371472 [8:34:23<20:16:24, 3.61it/s] 29%|██▉ | 107811/371472 [8:34:23<20:26:26, 3.58it/s] 29%|██▉ | 107812/371472 [8:34:24<21:13:23, 3.45it/s] 29%|██▉ | 107813/371472 [8:34:24<21:23:28, 3.42it/s] 29%|██▉ | 107814/371472 [8:34:24<20:26:57, 3.58it/s] 29%|██▉ | 107815/371472 [8:34:25<19:47:01, 3.70it/s] 29%|██▉ | 107816/371472 [8:34:25<19:47:36, 3.70it/s] 29%|██▉ | 107817/371472 [8:34:25<20:42:47, 3.54it/s] 29%|██▉ | 107818/371472 [8:34:25<20:09:45, 3.63it/s] 29%|██▉ | 107819/371472 [8:34:26<19:44:06, 3.71it/s] 29%|██▉ | 107820/371472 [8:34:26<19:31:47, 3.75it/s] {'loss': 3.2471, 'learning_rate': 7.391184899480705e-07, 'epoch': 4.64} + 29%|██▉ | 107820/371472 [8:34:26<19:31:47, 3.75it/s] 29%|██▉ | 107821/371472 [8:34:26<19:35:37, 3.74it/s] 29%|██▉ | 107822/371472 [8:34:27<20:35:37, 3.56it/s] 29%|██▉ | 107823/371472 [8:34:27<20:08:10, 3.64it/s] 29%|██▉ | 107824/371472 [8:34:27<20:18:39, 3.61it/s] 29%|██▉ | 107825/371472 [8:34:27<20:12:34, 3.62it/s] 29%|██▉ | 107826/371472 [8:34:28<20:20:19, 3.60it/s] 29%|██▉ | 107827/371472 [8:34:28<19:27:33, 3.76it/s] 29%|██▉ | 107828/371472 [8:34:28<19:23:08, 3.78it/s] 29%|██▉ | 107829/371472 [8:34:28<20:36:43, 3.55it/s] 29%|██▉ | 107830/371472 [8:34:29<20:21:02, 3.60it/s] 29%|██▉ | 107831/371472 [8:34:29<20:44:09, 3.53it/s] 29%|██▉ | 107832/371472 [8:34:29<21:33:28, 3.40it/s] 29%|██▉ | 107833/371472 [8:34:30<20:50:18, 3.51it/s] 29%|██▉ | 107834/371472 [8:34:30<20:58:24, 3.49it/s] 29%|██▉ | 107835/371472 [8:34:30<20:24:02, 3.59it/s] 29%|██▉ | 107836/371472 [8:34:30<20:39:21, 3.55it/s] 29%|██▉ | 107837/371472 [8:34:31<20:40:15, 3.54it/s] 29%|██▉ | 107838/371472 [8:34:31<19:56:30, 3.67it/s] 29%|██▉ | 107839/371472 [8:34:31<20:15:57, 3.61it/s] 29%|██▉ | 107840/371472 [8:34:32<20:15:02, 3.62it/s] {'loss': 3.5918, 'learning_rate': 7.390700079725915e-07, 'epoch': 4.64} + 29%|██▉ | 107840/371472 [8:34:32<20:15:02, 3.62it/s] 29%|██▉ | 107841/371472 [8:34:32<20:49:47, 3.52it/s] 29%|██▉ | 107842/371472 [8:34:32<20:48:00, 3.52it/s] 29%|██▉ | 107843/371472 [8:34:32<20:33:56, 3.56it/s] 29%|██▉ | 107844/371472 [8:34:33<20:26:10, 3.58it/s] 29%|██▉ | 107845/371472 [8:34:33<20:18:18, 3.61it/s] 29%|██▉ | 107846/371472 [8:34:33<20:30:20, 3.57it/s] 29%|██▉ | 107847/371472 [8:34:34<20:29:30, 3.57it/s] 29%|██▉ | 107848/371472 [8:34:34<21:02:46, 3.48it/s] 29%|██▉ | 107849/371472 [8:34:34<22:08:23, 3.31it/s] 29%|██▉ | 107850/371472 [8:34:34<21:08:02, 3.46it/s] 29%|██▉ | 107851/371472 [8:34:35<21:57:26, 3.34it/s] 29%|██▉ | 107852/371472 [8:34:35<20:52:30, 3.51it/s] 29%|██▉ | 107853/371472 [8:34:35<21:51:19, 3.35it/s] 29%|██▉ | 107854/371472 [8:34:36<21:01:29, 3.48it/s] 29%|██▉ | 107855/371472 [8:34:36<21:32:26, 3.40it/s] 29%|██▉ | 107856/371472 [8:34:36<20:51:48, 3.51it/s] 29%|██▉ | 107857/371472 [8:34:36<20:17:26, 3.61it/s] 29%|██▉ | 107858/371472 [8:34:37<20:31:28, 3.57it/s] 29%|██▉ | 107859/371472 [8:34:37<20:23:54, 3.59it/s] 29%|██▉ | 107860/371472 [8:34:37<20:09:38, 3.63it/s] {'loss': 3.3016, 'learning_rate': 7.390215259971126e-07, 'epoch': 4.65} + 29%|██▉ | 107860/371472 [8:34:37<20:09:38, 3.63it/s] 29%|██▉ | 107861/371472 [8:34:38<20:29:30, 3.57it/s] 29%|██▉ | 107862/371472 [8:34:38<19:41:45, 3.72it/s] 29%|██▉ | 107863/371472 [8:34:38<20:08:27, 3.64it/s] 29%|██▉ | 107864/371472 [8:34:38<20:01:44, 3.66it/s] 29%|██▉ | 107865/371472 [8:34:39<19:17:08, 3.80it/s] 29%|██▉ | 107866/371472 [8:34:39<19:23:52, 3.77it/s] 29%|██▉ | 107867/371472 [8:34:39<19:35:43, 3.74it/s] 29%|██▉ | 107868/371472 [8:34:39<18:54:21, 3.87it/s] 29%|██▉ | 107869/371472 [8:34:40<18:30:20, 3.96it/s] 29%|██▉ | 107870/371472 [8:34:40<18:31:39, 3.95it/s] 29%|██▉ | 107871/371472 [8:34:40<18:35:48, 3.94it/s] 29%|██▉ | 107872/371472 [8:34:40<19:48:38, 3.70it/s] 29%|██▉ | 107873/371472 [8:34:41<19:27:10, 3.76it/s] 29%|██▉ | 107874/371472 [8:34:41<19:02:54, 3.84it/s] 29%|██▉ | 107875/371472 [8:34:41<18:57:30, 3.86it/s] 29%|██▉ | 107876/371472 [8:34:41<19:41:32, 3.72it/s] 29%|██▉ | 107877/371472 [8:34:42<20:43:48, 3.53it/s] 29%|██▉ | 107878/371472 [8:34:42<20:25:47, 3.58it/s] 29%|██▉ | 107879/371472 [8:34:42<20:45:43, 3.53it/s] 29%|██▉ | 107880/371472 [8:34:43<19:42:34, 3.71it/s] {'loss': 3.4703, 'learning_rate': 7.389730440216337e-07, 'epoch': 4.65} + 29%|██▉ | 107880/371472 [8:34:43<19:42:34, 3.71it/s] 29%|██▉ | 107881/371472 [8:34:43<20:00:00, 3.66it/s] 29%|██▉ | 107882/371472 [8:34:43<20:04:42, 3.65it/s] 29%|██▉ | 107883/371472 [8:34:43<20:06:41, 3.64it/s] 29%|██▉ | 107884/371472 [8:34:44<20:36:38, 3.55it/s] 29%|██▉ | 107885/371472 [8:34:44<21:25:47, 3.42it/s] 29%|██▉ | 107886/371472 [8:34:44<21:04:07, 3.48it/s] 29%|██▉ | 107887/371472 [8:34:45<20:25:48, 3.58it/s] 29%|██▉ | 107888/371472 [8:34:45<21:46:12, 3.36it/s] 29%|██▉ | 107889/371472 [8:34:45<23:05:07, 3.17it/s] 29%|██▉ | 107890/371472 [8:34:46<22:44:10, 3.22it/s] 29%|██▉ | 107891/371472 [8:34:46<21:16:09, 3.44it/s] 29%|██▉ | 107892/371472 [8:34:46<20:32:33, 3.56it/s] 29%|██▉ | 107893/371472 [8:34:46<21:25:25, 3.42it/s] 29%|██▉ | 107894/371472 [8:34:47<20:12:54, 3.62it/s] 29%|██▉ | 107895/371472 [8:34:47<21:23:11, 3.42it/s] 29%|██▉ | 107896/371472 [8:34:47<21:12:56, 3.45it/s] 29%|██▉ | 107897/371472 [8:34:48<20:58:59, 3.49it/s] 29%|██▉ | 107898/371472 [8:34:48<21:15:10, 3.44it/s] 29%|██▉ | 107899/371472 [8:34:48<20:10:03, 3.63it/s] 29%|██▉ | 107900/371472 [8:34:48<21:25:46, 3.42it/s] {'loss': 3.5508, 'learning_rate': 7.389245620461548e-07, 'epoch': 4.65} + 29%|██▉ | 107900/371472 [8:34:48<21:25:46, 3.42it/s] 29%|██▉ | 107901/371472 [8:34:49<21:31:42, 3.40it/s] 29%|██▉ | 107902/371472 [8:34:49<20:50:28, 3.51it/s] 29%|██▉ | 107903/371472 [8:34:49<20:15:41, 3.61it/s] 29%|██▉ | 107904/371472 [8:34:49<20:37:03, 3.55it/s] 29%|██▉ | 107905/371472 [8:34:50<20:30:54, 3.57it/s] 29%|██▉ | 107906/371472 [8:34:50<20:23:01, 3.59it/s] 29%|██▉ | 107907/371472 [8:34:50<20:34:41, 3.56it/s] 29%|██▉ | 107908/371472 [8:34:51<20:32:07, 3.57it/s] 29%|██▉ | 107909/371472 [8:34:51<21:14:27, 3.45it/s] 29%|██▉ | 107910/371472 [8:34:51<20:25:34, 3.58it/s] 29%|██▉ | 107911/371472 [8:34:51<20:52:52, 3.51it/s] 29%|██▉ | 107912/371472 [8:34:52<20:19:42, 3.60it/s] 29%|██▉ | 107913/371472 [8:34:52<20:25:54, 3.58it/s] 29%|██▉ | 107914/371472 [8:34:52<20:31:37, 3.57it/s] 29%|██▉ | 107915/371472 [8:34:53<21:27:20, 3.41it/s] 29%|██▉ | 107916/371472 [8:34:53<20:51:12, 3.51it/s] 29%|██▉ | 107917/371472 [8:34:53<21:00:41, 3.48it/s] 29%|██▉ | 107918/371472 [8:34:53<20:15:19, 3.61it/s] 29%|██▉ | 107919/371472 [8:34:54<21:04:38, 3.47it/s] 29%|██▉ | 107920/371472 [8:34:54<22:09:29, 3.30it/s] {'loss': 3.2261, 'learning_rate': 7.388760800706759e-07, 'epoch': 4.65} + 29%|██▉ | 107920/371472 [8:34:54<22:09:29, 3.30it/s] 29%|██▉ | 107921/371472 [8:34:54<21:39:06, 3.38it/s] 29%|██▉ | 107922/371472 [8:34:55<22:44:15, 3.22it/s] 29%|██▉ | 107923/371472 [8:34:55<21:29:13, 3.41it/s] 29%|██▉ | 107924/371472 [8:34:55<20:26:54, 3.58it/s] 29%|██▉ | 107925/371472 [8:34:55<19:24:09, 3.77it/s] 29%|██▉ | 107926/371472 [8:34:56<19:18:16, 3.79it/s] 29%|██▉ | 107927/371472 [8:34:56<19:18:09, 3.79it/s] 29%|██▉ | 107928/371472 [8:34:56<18:58:11, 3.86it/s] 29%|██▉ | 107929/371472 [8:34:56<18:59:30, 3.85it/s] 29%|██▉ | 107930/371472 [8:34:57<21:16:57, 3.44it/s] 29%|██▉ | 107931/371472 [8:34:57<20:22:31, 3.59it/s] 29%|██▉ | 107932/371472 [8:34:57<20:28:41, 3.57it/s] 29%|██▉ | 107933/371472 [8:34:58<20:11:09, 3.63it/s] 29%|██▉ | 107934/371472 [8:34:58<19:53:57, 3.68it/s] 29%|██▉ | 107935/371472 [8:34:58<20:18:04, 3.61it/s] 29%|██▉ | 107936/371472 [8:34:58<20:17:47, 3.61it/s] 29%|██▉ | 107937/371472 [8:34:59<19:45:33, 3.70it/s] 29%|██▉ | 107938/371472 [8:34:59<20:07:23, 3.64it/s] 29%|██▉ | 107939/371472 [8:34:59<19:12:52, 3.81it/s] 29%|██▉ | 107940/371472 [8:34:59<18:36:34, 3.93it/s] {'loss': 3.5461, 'learning_rate': 7.38827598095197e-07, 'epoch': 4.65} + 29%|██▉ | 107940/371472 [8:34:59<18:36:34, 3.93it/s] 29%|██▉ | 107941/371472 [8:35:00<20:57:51, 3.49it/s] 29%|██▉ | 107942/371472 [8:35:00<20:38:25, 3.55it/s] 29%|██▉ | 107943/371472 [8:35:00<22:55:09, 3.19it/s] 29%|██▉ | 107944/371472 [8:35:01<21:15:53, 3.44it/s] 29%|██▉ | 107945/371472 [8:35:01<21:39:22, 3.38it/s] 29%|██▉ | 107946/371472 [8:35:01<22:16:50, 3.29it/s] 29%|██▉ | 107947/371472 [8:35:02<23:07:05, 3.17it/s] 29%|██▉ | 107948/371472 [8:35:02<21:25:22, 3.42it/s] 29%|██▉ | 107949/371472 [8:35:02<20:25:52, 3.58it/s] 29%|██▉ | 107950/371472 [8:35:02<19:51:13, 3.69it/s] 29%|██▉ | 107951/371472 [8:35:03<19:51:15, 3.69it/s] 29%|██▉ | 107952/371472 [8:35:03<20:09:28, 3.63it/s] 29%|██▉ | 107953/371472 [8:35:03<21:10:59, 3.46it/s] 29%|██▉ | 107954/371472 [8:35:04<20:47:33, 3.52it/s] 29%|██▉ | 107955/371472 [8:35:04<19:58:58, 3.66it/s] 29%|██▉ | 107956/371472 [8:35:04<20:12:51, 3.62it/s] 29%|██▉ | 107957/371472 [8:35:04<19:57:56, 3.67it/s] 29%|██▉ | 107958/371472 [8:35:05<19:37:26, 3.73it/s] 29%|██▉ | 107959/371472 [8:35:05<22:41:51, 3.22it/s] 29%|██▉ | 107960/371472 [8:35:05<21:08:47, 3.46it/s] {'loss': 3.4937, 'learning_rate': 7.387791161197181e-07, 'epoch': 4.65} + 29%|██▉ | 107960/371472 [8:35:05<21:08:47, 3.46it/s] 29%|██▉ | 107961/371472 [8:35:06<21:00:30, 3.48it/s] 29%|██▉ | 107962/371472 [8:35:06<20:24:22, 3.59it/s] 29%|██▉ | 107963/371472 [8:35:06<19:24:58, 3.77it/s] 29%|██▉ | 107964/371472 [8:35:06<19:16:02, 3.80it/s] 29%|██▉ | 107965/371472 [8:35:07<20:02:20, 3.65it/s] 29%|██▉ | 107966/371472 [8:35:07<19:28:06, 3.76it/s] 29%|██▉ | 107967/371472 [8:35:07<20:02:57, 3.65it/s] 29%|██▉ | 107968/371472 [8:35:07<19:48:38, 3.69it/s] 29%|██▉ | 107969/371472 [8:35:08<22:26:02, 3.26it/s] 29%|██▉ | 107970/371472 [8:35:08<23:13:07, 3.15it/s] 29%|██▉ | 107971/371472 [8:35:08<22:42:46, 3.22it/s] 29%|██▉ | 107972/371472 [8:35:09<22:48:12, 3.21it/s] 29%|██▉ | 107973/371472 [8:35:09<22:00:19, 3.33it/s] 29%|██▉ | 107974/371472 [8:35:09<21:05:24, 3.47it/s] 29%|██▉ | 107975/371472 [8:35:10<20:49:26, 3.51it/s] 29%|██▉ | 107976/371472 [8:35:10<22:34:21, 3.24it/s] 29%|██▉ | 107977/371472 [8:35:10<23:01:02, 3.18it/s] 29%|██▉ | 107978/371472 [8:35:11<24:41:12, 2.96it/s] 29%|██▉ | 107979/371472 [8:35:11<23:33:11, 3.11it/s] 29%|██▉ | 107980/371472 [8:35:11<21:56:47, 3.34it/s] {'loss': 3.2862, 'learning_rate': 7.387306341442393e-07, 'epoch': 4.65} + 29%|██▉ | 107980/371472 [8:35:11<21:56:47, 3.34it/s] 29%|██▉ | 107981/371472 [8:35:12<23:46:31, 3.08it/s] 29%|██▉ | 107982/371472 [8:35:12<22:05:53, 3.31it/s] 29%|██▉ | 107983/371472 [8:35:12<21:16:57, 3.44it/s] 29%|██▉ | 107984/371472 [8:35:12<20:35:17, 3.56it/s] 29%|██▉ | 107985/371472 [8:35:13<20:11:38, 3.62it/s] 29%|██▉ | 107986/371472 [8:35:13<20:33:57, 3.56it/s] 29%|██▉ | 107987/371472 [8:35:13<21:05:45, 3.47it/s] 29%|██▉ | 107988/371472 [8:35:13<21:01:09, 3.48it/s] 29%|██▉ | 107989/371472 [8:35:14<20:59:16, 3.49it/s] 29%|██▉ | 107990/371472 [8:35:14<20:06:32, 3.64it/s] 29%|██▉ | 107991/371472 [8:35:14<20:13:38, 3.62it/s] 29%|██▉ | 107992/371472 [8:35:15<19:50:56, 3.69it/s] 29%|██▉ | 107993/371472 [8:35:15<19:06:55, 3.83it/s] 29%|██▉ | 107994/371472 [8:35:15<19:48:43, 3.69it/s] 29%|██▉ | 107995/371472 [8:35:15<20:11:33, 3.62it/s] 29%|██▉ | 107996/371472 [8:35:16<20:38:36, 3.55it/s] 29%|██▉ | 107997/371472 [8:35:16<20:49:01, 3.52it/s] 29%|██▉ | 107998/371472 [8:35:16<20:22:20, 3.59it/s] 29%|██▉ | 107999/371472 [8:35:17<20:43:17, 3.53it/s] 29%|██▉ | 108000/371472 [8:35:17<20:19:22, 3.60it/s] {'loss': 3.2475, 'learning_rate': 7.386821521687604e-07, 'epoch': 4.65} + 29%|██▉ | 108000/371472 [8:35:17<20:19:22, 3.60it/s] 29%|██▉ | 108001/371472 [8:35:17<20:14:20, 3.62it/s] 29%|██▉ | 108002/371472 [8:35:17<21:57:09, 3.33it/s] 29%|██▉ | 108003/371472 [8:35:18<21:44:41, 3.37it/s] 29%|██▉ | 108004/371472 [8:35:18<21:16:01, 3.44it/s] 29%|██▉ | 108005/371472 [8:35:18<22:01:10, 3.32it/s] 29%|██▉ | 108006/371472 [8:35:19<22:23:37, 3.27it/s] 29%|██▉ | 108007/371472 [8:35:19<21:14:46, 3.44it/s] 29%|██▉ | 108008/371472 [8:35:19<20:50:36, 3.51it/s] 29%|██▉ | 108009/371472 [8:35:19<20:39:37, 3.54it/s] 29%|██▉ | 108010/371472 [8:35:20<20:34:50, 3.56it/s] 29%|██▉ | 108011/371472 [8:35:20<19:58:41, 3.66it/s] 29%|██▉ | 108012/371472 [8:35:20<19:59:04, 3.66it/s] 29%|██�� | 108013/371472 [8:35:20<19:07:36, 3.83it/s] 29%|██▉ | 108014/371472 [8:35:21<19:03:16, 3.84it/s] 29%|██▉ | 108015/371472 [8:35:21<19:13:43, 3.81it/s] 29%|██▉ | 108016/371472 [8:35:21<19:20:59, 3.78it/s] 29%|██▉ | 108017/371472 [8:35:22<19:17:41, 3.79it/s] 29%|██▉ | 108018/371472 [8:35:22<21:51:22, 3.35it/s] 29%|██▉ | 108019/371472 [8:35:22<20:50:13, 3.51it/s] 29%|██▉ | 108020/371472 [8:35:22<20:39:45, 3.54it/s] {'loss': 3.4575, 'learning_rate': 7.386336701932814e-07, 'epoch': 4.65} + 29%|██▉ | 108020/371472 [8:35:22<20:39:45, 3.54it/s] 29%|██▉ | 108021/371472 [8:35:23<21:32:56, 3.40it/s] 29%|██▉ | 108022/371472 [8:35:23<21:02:05, 3.48it/s] 29%|██▉ | 108023/371472 [8:35:23<21:07:49, 3.46it/s] 29%|██▉ | 108024/371472 [8:35:24<22:38:09, 3.23it/s] 29%|██▉ | 108025/371472 [8:35:24<23:18:45, 3.14it/s] 29%|██▉ | 108026/371472 [8:35:24<23:13:51, 3.15it/s] 29%|██▉ | 108027/371472 [8:35:25<22:07:45, 3.31it/s] 29%|██▉ | 108028/371472 [8:35:25<22:51:35, 3.20it/s] 29%|██▉ | 108029/371472 [8:35:25<21:43:52, 3.37it/s] 29%|██▉ | 108030/371472 [8:35:25<21:06:32, 3.47it/s] 29%|██▉ | 108031/371472 [8:35:26<20:54:43, 3.50it/s] 29%|██▉ | 108032/371472 [8:35:26<20:53:50, 3.50it/s] 29%|██▉ | 108033/371472 [8:35:26<20:39:20, 3.54it/s] 29%|██▉ | 108034/371472 [8:35:27<20:45:52, 3.52it/s] 29%|██▉ | 108035/371472 [8:35:27<21:06:04, 3.47it/s] 29%|██▉ | 108036/371472 [8:35:27<20:47:19, 3.52it/s] 29%|██▉ | 108037/371472 [8:35:27<20:02:54, 3.65it/s] 29%|██▉ | 108038/371472 [8:35:28<20:33:50, 3.56it/s] 29%|██▉ | 108039/371472 [8:35:28<22:33:32, 3.24it/s] 29%|██▉ | 108040/371472 [8:35:28<22:15:33, 3.29it/s] {'loss': 3.234, 'learning_rate': 7.385851882178025e-07, 'epoch': 4.65} + 29%|██▉ | 108040/371472 [8:35:28<22:15:33, 3.29it/s] 29%|██▉ | 108041/371472 [8:35:29<21:24:52, 3.42it/s] 29%|██▉ | 108042/371472 [8:35:29<21:14:57, 3.44it/s] 29%|██▉ | 108043/371472 [8:35:29<21:06:04, 3.47it/s] 29%|██▉ | 108044/371472 [8:35:29<20:27:03, 3.58it/s] 29%|██▉ | 108045/371472 [8:35:30<21:09:25, 3.46it/s] 29%|██▉ | 108046/371472 [8:35:30<20:40:25, 3.54it/s] 29%|██▉ | 108047/371472 [8:35:30<20:41:24, 3.54it/s] 29%|██▉ | 108048/371472 [8:35:31<20:29:42, 3.57it/s] 29%|██▉ | 108049/371472 [8:35:31<20:39:21, 3.54it/s] 29%|██▉ | 108050/371472 [8:35:31<20:39:59, 3.54it/s] 29%|██▉ | 108051/371472 [8:35:31<19:44:18, 3.71it/s] 29%|██▉ | 108052/371472 [8:35:32<21:02:31, 3.48it/s] 29%|██▉ | 108053/371472 [8:35:32<21:55:51, 3.34it/s] 29%|██▉ | 108054/371472 [8:35:32<23:03:40, 3.17it/s] 29%|██▉ | 108055/371472 [8:35:33<22:42:41, 3.22it/s] 29%|██▉ | 108056/371472 [8:35:33<21:45:19, 3.36it/s] 29%|██▉ | 108057/371472 [8:35:33<21:23:31, 3.42it/s] 29%|██▉ | 108058/371472 [8:35:34<22:29:18, 3.25it/s] 29%|██▉ | 108059/371472 [8:35:34<21:28:58, 3.41it/s] 29%|██▉ | 108060/371472 [8:35:34<20:32:06, 3.56it/s] {'loss': 3.3128, 'learning_rate': 7.385367062423237e-07, 'epoch': 4.65} + 29%|██▉ | 108060/371472 [8:35:34<20:32:06, 3.56it/s] 29%|██▉ | 108061/371472 [8:35:34<20:10:00, 3.63it/s] 29%|██▉ | 108062/371472 [8:35:35<20:07:42, 3.64it/s] 29%|██▉ | 108063/371472 [8:35:35<19:40:54, 3.72it/s] 29%|██▉ | 108064/371472 [8:35:35<20:27:04, 3.58it/s] 29%|██▉ | 108065/371472 [8:35:36<20:32:25, 3.56it/s] 29%|██▉ | 108066/371472 [8:35:36<20:44:30, 3.53it/s] 29%|██▉ | 108067/371472 [8:35:36<20:06:34, 3.64it/s] 29%|██▉ | 108068/371472 [8:35:36<20:05:35, 3.64it/s] 29%|██▉ | 108069/371472 [8:35:37<21:02:15, 3.48it/s] 29%|██▉ | 108070/371472 [8:35:37<20:01:37, 3.65it/s] 29%|██▉ | 108071/371472 [8:35:37<19:27:59, 3.76it/s] 29%|██▉ | 108072/371472 [8:35:37<19:03:52, 3.84it/s] 29%|██▉ | 108073/371472 [8:35:38<20:47:06, 3.52it/s] 29%|██▉ | 108074/371472 [8:35:38<20:31:19, 3.57it/s] 29%|██▉ | 108075/371472 [8:35:38<20:41:49, 3.54it/s] 29%|██▉ | 108076/371472 [8:35:39<20:42:15, 3.53it/s] 29%|██▉ | 108077/371472 [8:35:39<21:49:42, 3.35it/s] 29%|██▉ | 108078/371472 [8:35:39<21:46:14, 3.36it/s] 29%|██▉ | 108079/371472 [8:35:40<22:14:44, 3.29it/s] 29%|██▉ | 108080/371472 [8:35:40<21:36:41, 3.39it/s] {'loss': 3.3031, 'learning_rate': 7.384882242668448e-07, 'epoch': 4.66} + 29%|██▉ | 108080/371472 [8:35:40<21:36:41, 3.39it/s] 29%|██▉ | 108081/371472 [8:35:40<20:50:40, 3.51it/s] 29%|██▉ | 108082/371472 [8:35:40<20:06:48, 3.64it/s] 29%|██▉ | 108083/371472 [8:35:41<20:13:52, 3.62it/s] 29%|██▉ | 108084/371472 [8:35:41<20:20:48, 3.60it/s] 29%|██▉ | 108085/371472 [8:35:41<20:42:40, 3.53it/s] 29%|██▉ | 108086/371472 [8:35:41<20:36:52, 3.55it/s] 29%|██▉ | 108087/371472 [8:35:42<20:16:37, 3.61it/s] 29%|██▉ | 108088/371472 [8:35:42<21:02:46, 3.48it/s] 29%|██▉ | 108089/371472 [8:35:42<21:28:24, 3.41it/s] 29%|██▉ | 108090/371472 [8:35:43<20:30:11, 3.57it/s] 29%|██▉ | 108091/371472 [8:35:43<20:11:42, 3.62it/s] 29%|██▉ | 108092/371472 [8:35:43<19:31:34, 3.75it/s] 29%|██▉ | 108093/371472 [8:35:43<19:17:35, 3.79it/s] 29%|██▉ | 108094/371472 [8:35:44<19:36:08, 3.73it/s] 29%|██▉ | 108095/371472 [8:35:44<19:04:48, 3.83it/s] 29%|██▉ | 108096/371472 [8:35:44<18:30:18, 3.95it/s] 29%|██▉ | 108097/371472 [8:35:44<19:19:44, 3.78it/s] 29%|██▉ | 108098/371472 [8:35:45<18:49:56, 3.88it/s] 29%|██▉ | 108099/371472 [8:35:45<20:32:01, 3.56it/s] 29%|██▉ | 108100/371472 [8:35:45<21:52:16, 3.34it/s] {'loss': 3.4557, 'learning_rate': 7.384397422913658e-07, 'epoch': 4.66} + 29%|██▉ | 108100/371472 [8:35:45<21:52:16, 3.34it/s] 29%|██▉ | 108101/371472 [8:35:46<20:48:39, 3.52it/s] 29%|██▉ | 108102/371472 [8:35:46<20:30:49, 3.57it/s] 29%|██▉ | 108103/371472 [8:35:46<21:27:07, 3.41it/s] 29%|██▉ | 108104/371472 [8:35:46<20:33:45, 3.56it/s] 29%|██▉ | 108105/371472 [8:35:47<19:51:44, 3.68it/s] 29%|██▉ | 108106/371472 [8:35:47<19:47:35, 3.70it/s] 29%|██▉ | 108107/371472 [8:35:47<20:14:35, 3.61it/s] 29%|██▉ | 108108/371472 [8:35:47<20:15:06, 3.61it/s] 29%|██▉ | 108109/371472 [8:35:48<19:54:25, 3.67it/s] 29%|██▉ | 108110/371472 [8:35:48<20:41:41, 3.53it/s] 29%|██▉ | 108111/371472 [8:35:48<20:45:10, 3.53it/s] 29%|██▉ | 108112/371472 [8:35:49<20:49:32, 3.51it/s] 29%|██▉ | 108113/371472 [8:35:49<22:10:17, 3.30it/s] 29%|██▉ | 108114/371472 [8:35:49<22:40:38, 3.23it/s] 29%|██▉ | 108115/371472 [8:35:50<21:20:53, 3.43it/s] 29%|██▉ | 108116/371472 [8:35:50<21:57:12, 3.33it/s] 29%|██▉ | 108117/371472 [8:35:50<21:39:16, 3.38it/s] 29%|██▉ | 108118/371472 [8:35:50<21:17:44, 3.44it/s] 29%|██▉ | 108119/371472 [8:35:51<21:45:32, 3.36it/s] 29%|██▉ | 108120/371472 [8:35:51<21:34:38, 3.39it/s] {'loss': 3.4523, 'learning_rate': 7.38391260315887e-07, 'epoch': 4.66} + 29%|██▉ | 108120/371472 [8:35:51<21:34:38, 3.39it/s] 29%|██▉ | 108121/371472 [8:35:51<22:04:05, 3.31it/s] 29%|██▉ | 108122/371472 [8:35:52<21:20:12, 3.43it/s] 29%|██▉ | 108123/371472 [8:35:52<20:27:34, 3.58it/s] 29%|██▉ | 108124/371472 [8:35:52<21:05:44, 3.47it/s] 29%|██▉ | 108125/371472 [8:35:52<20:11:44, 3.62it/s] 29%|██▉ | 108126/371472 [8:35:53<22:43:48, 3.22it/s] 29%|██▉ | 108127/371472 [8:35:53<21:59:37, 3.33it/s] 29%|██▉ | 108128/371472 [8:35:53<21:59:20, 3.33it/s] 29%|██▉ | 108129/371472 [8:35:54<21:21:21, 3.43it/s] 29%|██▉ | 108130/371472 [8:35:54<20:25:11, 3.58it/s] 29%|██▉ | 108131/371472 [8:35:54<20:37:55, 3.55it/s] 29%|██▉ | 108132/371472 [8:35:54<19:59:29, 3.66it/s] 29%|██▉ | 108133/371472 [8:35:55<21:05:00, 3.47it/s] 29%|██▉ | 108134/371472 [8:35:55<22:56:34, 3.19it/s] 29%|██▉ | 108135/371472 [8:35:55<22:20:23, 3.27it/s] 29%|██▉ | 108136/371472 [8:35:56<21:54:16, 3.34it/s] 29%|██▉ | 108137/371472 [8:35:56<21:55:24, 3.34it/s] 29%|██▉ | 108138/371472 [8:35:56<21:16:59, 3.44it/s] 29%|██▉ | 108139/371472 [8:35:57<20:21:02, 3.59it/s] 29%|██▉ | 108140/371472 [8:35:57<22:33:53, 3.24it/s] {'loss': 3.3763, 'learning_rate': 7.383427783404082e-07, 'epoch': 4.66} + 29%|██▉ | 108140/371472 [8:35:57<22:33:53, 3.24it/s] 29%|██▉ | 108141/371472 [8:35:57<21:44:21, 3.36it/s] 29%|██▉ | 108142/371472 [8:35:57<21:05:13, 3.47it/s] 29%|██▉ | 108143/371472 [8:35:58<21:18:33, 3.43it/s] 29%|██▉ | 108144/371472 [8:35:58<20:39:12, 3.54it/s] 29%|██▉ | 108145/371472 [8:35:58<20:48:02, 3.52it/s] 29%|██▉ | 108146/371472 [8:35:59<20:21:41, 3.59it/s] 29%|██▉ | 108147/371472 [8:35:59<19:58:16, 3.66it/s] 29%|██▉ | 108148/371472 [8:35:59<23:08:00, 3.16it/s] 29%|██▉ | 108149/371472 [8:36:00<23:45:49, 3.08it/s] 29%|██▉ | 108150/371472 [8:36:00<23:53:08, 3.06it/s] 29%|██▉ | 108151/371472 [8:36:00<22:59:32, 3.18it/s] 29%|██▉ | 108152/371472 [8:36:00<21:55:38, 3.34it/s] 29%|██▉ | 108153/371472 [8:36:01<21:28:46, 3.41it/s] 29%|██▉ | 108154/371472 [8:36:01<21:18:31, 3.43it/s] 29%|██▉ | 108155/371472 [8:36:01<21:11:30, 3.45it/s] 29%|██▉ | 108156/371472 [8:36:02<20:17:30, 3.60it/s] 29%|██▉ | 108157/371472 [8:36:02<19:43:16, 3.71it/s] 29%|██▉ | 108158/371472 [8:36:02<19:19:39, 3.78it/s] 29%|██▉ | 108159/371472 [8:36:02<19:18:10, 3.79it/s] 29%|██▉ | 108160/371472 [8:36:03<19:04:30, 3.83it/s] {'loss': 3.3701, 'learning_rate': 7.382942963649292e-07, 'epoch': 4.66} + 29%|██▉ | 108160/371472 [8:36:03<19:04:30, 3.83it/s] 29%|██▉ | 108161/371472 [8:36:03<19:29:36, 3.75it/s] 29%|██▉ | 108162/371472 [8:36:03<19:10:49, 3.81it/s] 29%|██▉ | 108163/371472 [8:36:03<19:28:14, 3.76it/s] 29%|██▉ | 108164/371472 [8:36:04<19:54:07, 3.68it/s] 29%|██▉ | 108165/371472 [8:36:04<19:33:24, 3.74it/s] 29%|██▉ | 108166/371472 [8:36:04<21:35:54, 3.39it/s] 29%|██▉ | 108167/371472 [8:36:05<21:22:38, 3.42it/s] 29%|██▉ | 108168/371472 [8:36:05<20:22:36, 3.59it/s] 29%|██▉ | 108169/371472 [8:36:05<20:31:50, 3.56it/s] 29%|██▉ | 108170/371472 [8:36:05<19:53:40, 3.68it/s] 29%|██▉ | 108171/371472 [8:36:06<19:36:28, 3.73it/s] 29%|██▉ | 108172/371472 [8:36:06<19:25:49, 3.76it/s] 29%|██▉ | 108173/371472 [8:36:06<19:16:45, 3.79it/s] 29%|██▉ | 108174/371472 [8:36:06<19:23:37, 3.77it/s] 29%|██▉ | 108175/371472 [8:36:07<20:27:27, 3.58it/s] 29%|██▉ | 108176/371472 [8:36:07<21:04:41, 3.47it/s] 29%|██▉ | 108177/371472 [8:36:07<21:40:08, 3.38it/s] 29%|██▉ | 108178/371472 [8:36:08<21:04:43, 3.47it/s] 29%|██▉ | 108179/371472 [8:36:08<20:21:58, 3.59it/s] 29%|██▉ | 108180/371472 [8:36:08<21:54:12, 3.34it/s] {'loss': 3.5039, 'learning_rate': 7.382458143894502e-07, 'epoch': 4.66} + 29%|██▉ | 108180/371472 [8:36:08<21:54:12, 3.34it/s] 29%|██▉ | 108181/371472 [8:36:09<20:53:48, 3.50it/s] 29%|██▉ | 108182/371472 [8:36:09<20:21:42, 3.59it/s] 29%|██▉ | 108183/371472 [8:36:09<20:05:59, 3.64it/s] 29%|██▉ | 108184/371472 [8:36:09<20:56:33, 3.49it/s] 29%|██▉ | 108185/371472 [8:36:10<20:43:59, 3.53it/s] 29%|██▉ | 108186/371472 [8:36:10<21:31:13, 3.40it/s] 29%|██▉ | 108187/371472 [8:36:10<21:00:37, 3.48it/s] 29%|██▉ | 108188/371472 [8:36:11<21:54:07, 3.34it/s] 29%|██▉ | 108189/371472 [8:36:11<22:10:37, 3.30it/s] 29%|██▉ | 108190/371472 [8:36:11<21:35:19, 3.39it/s] 29%|██▉ | 108191/371472 [8:36:11<20:58:19, 3.49it/s] 29%|██▉ | 108192/371472 [8:36:12<21:08:33, 3.46it/s] 29%|██▉ | 108193/371472 [8:36:12<21:32:40, 3.39it/s] 29%|██▉ | 108194/371472 [8:36:12<21:40:16, 3.37it/s] 29%|██▉ | 108195/371472 [8:36:13<21:11:00, 3.45it/s] 29%|██▉ | 108196/371472 [8:36:13<20:08:27, 3.63it/s] 29%|██▉ | 108197/371472 [8:36:13<20:04:18, 3.64it/s] 29%|██▉ | 108198/371472 [8:36:13<20:11:26, 3.62it/s] 29%|██▉ | 108199/371472 [8:36:14<20:59:20, 3.48it/s] 29%|██▉ | 108200/371472 [8:36:14<20:57:31, 3.49it/s] {'loss': 3.4298, 'learning_rate': 7.381973324139714e-07, 'epoch': 4.66} + 29%|██▉ | 108200/371472 [8:36:14<20:57:31, 3.49it/s] 29%|██▉ | 108201/371472 [8:36:14<21:31:04, 3.40it/s] 29%|██▉ | 108202/371472 [8:36:15<21:55:22, 3.34it/s] 29%|██▉ | 108203/371472 [8:36:15<21:13:08, 3.45it/s] 29%|██▉ | 108204/371472 [8:36:15<21:42:37, 3.37it/s] 29%|██▉ | 108205/371472 [8:36:15<21:39:26, 3.38it/s] 29%|██▉ | 108206/371472 [8:36:16<21:06:24, 3.46it/s] 29%|██▉ | 108207/371472 [8:36:16<20:16:14, 3.61it/s] 29%|██▉ | 108208/371472 [8:36:16<20:06:54, 3.64it/s] 29%|██▉ | 108209/371472 [8:36:17<19:57:07, 3.67it/s] 29%|██▉ | 108210/371472 [8:36:17<20:03:40, 3.65it/s] 29%|██▉ | 108211/371472 [8:36:17<20:24:23, 3.58it/s] 29%|██▉ | 108212/371472 [8:36:17<19:46:26, 3.70it/s] 29%|██▉ | 108213/371472 [8:36:18<20:07:56, 3.63it/s] 29%|██▉ | 108214/371472 [8:36:18<20:16:07, 3.61it/s] 29%|██▉ | 108215/371472 [8:36:18<20:17:06, 3.60it/s] 29%|██▉ | 108216/371472 [8:36:18<19:42:44, 3.71it/s] 29%|██▉ | 108217/371472 [8:36:19<20:40:26, 3.54it/s] 29%|██▉ | 108218/371472 [8:36:19<21:30:14, 3.40it/s] 29%|██▉ | 108219/371472 [8:36:19<20:39:34, 3.54it/s] 29%|██▉ | 108220/371472 [8:36:20<20:14:57, 3.61it/s] {'loss': 3.5846, 'learning_rate': 7.381488504384925e-07, 'epoch': 4.66} + 29%|██▉ | 108220/371472 [8:36:20<20:14:57, 3.61it/s] 29%|██▉ | 108221/371472 [8:36:20<19:30:23, 3.75it/s] 29%|██▉ | 108222/371472 [8:36:20<22:31:25, 3.25it/s] 29%|██▉ | 108223/371472 [8:36:20<21:25:18, 3.41it/s] 29%|██▉ | 108224/371472 [8:36:21<20:20:12, 3.60it/s] 29%|██▉ | 108225/371472 [8:36:21<19:49:28, 3.69it/s] 29%|██▉ | 108226/371472 [8:36:21<19:49:21, 3.69it/s] 29%|██▉ | 108227/371472 [8:36:22<19:51:09, 3.68it/s] 29%|██▉ | 108228/371472 [8:36:22<19:42:26, 3.71it/s] 29%|██▉ | 108229/371472 [8:36:22<19:51:46, 3.68it/s] 29%|██▉ | 108230/371472 [8:36:22<21:53:55, 3.34it/s] 29%|██▉ | 108231/371472 [8:36:23<21:23:22, 3.42it/s] 29%|██▉ | 108232/371472 [8:36:23<21:45:52, 3.36it/s] 29%|██▉ | 108233/371472 [8:36:23<20:38:20, 3.54it/s] 29%|██▉ | 108234/371472 [8:36:24<20:24:08, 3.58it/s] 29%|██▉ | 108235/371472 [8:36:24<20:10:03, 3.63it/s] 29%|██▉ | 108236/371472 [8:36:24<19:42:01, 3.71it/s] 29%|██▉ | 108237/371472 [8:36:24<20:24:09, 3.58it/s] 29%|██▉ | 108238/371472 [8:36:25<20:11:24, 3.62it/s] 29%|██▉ | 108239/371472 [8:36:25<20:51:04, 3.51it/s] 29%|██▉ | 108240/371472 [8:36:25<20:19:26, 3.60it/s] {'loss': 3.3374, 'learning_rate': 7.381003684630136e-07, 'epoch': 4.66} + 29%|██▉ | 108240/371472 [8:36:25<20:19:26, 3.60it/s] 29%|██▉ | 108241/371472 [8:36:25<20:16:56, 3.61it/s] 29%|██▉ | 108242/371472 [8:36:26<19:50:17, 3.69it/s] 29%|██▉ | 108243/371472 [8:36:26<20:02:15, 3.65it/s] 29%|██▉ | 108244/371472 [8:36:26<20:15:02, 3.61it/s] 29%|██▉ | 108245/371472 [8:36:27<19:38:34, 3.72it/s] 29%|██▉ | 108246/371472 [8:36:27<19:44:05, 3.71it/s] 29%|██▉ | 108247/371472 [8:36:27<19:35:55, 3.73it/s] 29%|██▉ | 108248/371472 [8:36:27<19:13:04, 3.80it/s] 29%|██▉ | 108249/371472 [8:36:28<19:32:28, 3.74it/s] 29%|██▉ | 108250/371472 [8:36:28<19:49:49, 3.69it/s] 29%|██▉ | 108251/371472 [8:36:28<20:10:04, 3.63it/s] 29%|██▉ | 108252/371472 [8:36:28<20:26:29, 3.58it/s] 29%|██▉ | 108253/371472 [8:36:29<21:01:40, 3.48it/s] 29%|██▉ | 108254/371472 [8:36:29<21:30:05, 3.40it/s] 29%|██▉ | 108255/371472 [8:36:29<21:26:26, 3.41it/s] 29%|██▉ | 108256/371472 [8:36:30<20:55:04, 3.50it/s] 29%|██▉ | 108257/371472 [8:36:30<20:35:33, 3.55it/s] 29%|██▉ | 108258/371472 [8:36:30<20:14:57, 3.61it/s] 29%|██▉ | 108259/371472 [8:36:30<19:43:24, 3.71it/s] 29%|██▉ | 108260/371472 [8:36:31<21:10:19, 3.45it/s] {'loss': 3.6548, 'learning_rate': 7.380518864875347e-07, 'epoch': 4.66} + 29%|██▉ | 108260/371472 [8:36:31<21:10:19, 3.45it/s] 29%|██▉ | 108261/371472 [8:36:31<21:37:25, 3.38it/s] 29%|██▉ | 108262/371472 [8:36:31<22:49:28, 3.20it/s] 29%|██▉ | 108263/371472 [8:36:32<23:13:36, 3.15it/s] 29%|██▉ | 108264/371472 [8:36:32<22:22:30, 3.27it/s] 29%|██▉ | 108265/371472 [8:36:32<21:27:34, 3.41it/s] 29%|██▉ | 108266/371472 [8:36:33<21:01:14, 3.48it/s] 29%|██▉ | 108267/371472 [8:36:33<20:37:01, 3.55it/s] 29%|██▉ | 108268/371472 [8:36:33<23:17:08, 3.14it/s] 29%|██▉ | 108269/371472 [8:36:34<23:46:05, 3.08it/s] 29%|██▉ | 108270/371472 [8:36:34<23:43:20, 3.08it/s] 29%|██▉ | 108271/371472 [8:36:34<22:20:59, 3.27it/s] 29%|██▉ | 108272/371472 [8:36:34<21:30:40, 3.40it/s] 29%|██▉ | 108273/371472 [8:36:35<20:27:36, 3.57it/s] 29%|██▉ | 108274/371472 [8:36:35<20:14:21, 3.61it/s] 29%|██▉ | 108275/371472 [8:36:35<22:29:42, 3.25it/s] 29%|██▉ | 108276/371472 [8:36:36<21:44:48, 3.36it/s] 29%|██▉ | 108277/371472 [8:36:36<20:48:11, 3.51it/s] 29%|██▉ | 108278/371472 [8:36:36<20:51:16, 3.51it/s] 29%|██▉ | 108279/371472 [8:36:36<20:40:09, 3.54it/s] 29%|██▉ | 108280/371472 [8:36:37<21:01:33, 3.48it/s] {'loss': 3.428, 'learning_rate': 7.380034045120558e-07, 'epoch': 4.66} + 29%|██▉ | 108280/371472 [8:36:37<21:01:33, 3.48it/s] 29%|██▉ | 108281/371472 [8:36:37<20:16:23, 3.61it/s] 29%|██▉ | 108282/371472 [8:36:37<22:20:26, 3.27it/s] 29%|██▉ | 108283/371472 [8:36:38<22:18:14, 3.28it/s] 29%|██▉ | 108284/371472 [8:36:38<21:17:55, 3.43it/s] 29%|██▉ | 108285/371472 [8:36:38<22:56:14, 3.19it/s] 29%|██▉ | 108286/371472 [8:36:39<22:57:05, 3.19it/s] 29%|██▉ | 108287/371472 [8:36:39<22:28:55, 3.25it/s] 29%|██▉ | 108288/371472 [8:36:39<21:07:24, 3.46it/s] 29%|██▉ | 108289/371472 [8:36:39<20:34:45, 3.55it/s] 29%|██▉ | 108290/371472 [8:36:40<22:53:59, 3.19it/s] 29%|██▉ | 108291/371472 [8:36:40<23:22:46, 3.13it/s] 29%|██▉ | 108292/371472 [8:36:40<22:45:21, 3.21it/s] 29%|██▉ | 108293/371472 [8:36:41<22:35:53, 3.24it/s] 29%|██▉ | 108294/371472 [8:36:41<22:43:54, 3.22it/s] 29%|██▉ | 108295/371472 [8:36:41<21:37:22, 3.38it/s] 29%|██▉ | 108296/371472 [8:36:42<22:22:19, 3.27it/s] 29%|██▉ | 108297/371472 [8:36:42<22:07:59, 3.30it/s] 29%|██▉ | 108298/371472 [8:36:42<22:40:08, 3.22it/s] 29%|██▉ | 108299/371472 [8:36:43<22:12:28, 3.29it/s] 29%|██▉ | 108300/371472 [8:36:43<21:32:00, 3.39it/s] {'loss': 3.5412, 'learning_rate': 7.379549225365769e-07, 'epoch': 4.66} + 29%|██▉ | 108300/371472 [8:36:43<21:32:00, 3.39it/s] 29%|██▉ | 108301/371472 [8:36:43<21:21:46, 3.42it/s] 29%|██▉ | 108302/371472 [8:36:43<21:46:52, 3.36it/s] 29%|██▉ | 108303/371472 [8:36:44<21:21:46, 3.42it/s] 29%|██▉ | 108304/371472 [8:36:44<20:53:40, 3.50it/s] 29%|██▉ | 108305/371472 [8:36:44<20:07:39, 3.63it/s] 29%|██▉ | 108306/371472 [8:36:45<20:30:56, 3.56it/s] 29%|██▉ | 108307/371472 [8:36:45<20:46:41, 3.52it/s] 29%|██▉ | 108308/371472 [8:36:45<20:29:52, 3.57it/s] 29%|██▉ | 108309/371472 [8:36:45<22:06:18, 3.31it/s] 29%|██▉ | 108310/371472 [8:36:46<20:43:14, 3.53it/s] 29%|██▉ | 108311/371472 [8:36:46<20:10:19, 3.62it/s] 29%|██▉ | 108312/371472 [8:36:46<20:12:23, 3.62it/s] 29%|██▉ | 108313/371472 [8:36:46<19:43:08, 3.71it/s] 29%|██▉ | 108314/371472 [8:36:47<19:59:57, 3.66it/s] 29%|██▉ | 108315/371472 [8:36:47<20:27:23, 3.57it/s] 29%|██▉ | 108316/371472 [8:36:47<20:08:50, 3.63it/s] 29%|██▉ | 108317/371472 [8:36:48<20:43:08, 3.53it/s] 29%|██▉ | 108318/371472 [8:36:48<20:16:13, 3.61it/s] 29%|██▉ | 108319/371472 [8:36:48<20:13:00, 3.62it/s] 29%|██▉ | 108320/371472 [8:36:48<20:57:07, 3.49it/s] {'loss': 3.4767, 'learning_rate': 7.379064405610979e-07, 'epoch': 4.67} + 29%|██▉ | 108320/371472 [8:36:48<20:57:07, 3.49it/s] 29%|██▉ | 108321/371472 [8:36:49<19:53:21, 3.68it/s] 29%|██▉ | 108322/371472 [8:36:49<20:06:47, 3.63it/s] 29%|██▉ | 108323/371472 [8:36:49<19:54:52, 3.67it/s] 29%|██▉ | 108324/371472 [8:36:50<21:23:31, 3.42it/s] 29%|██▉ | 108325/371472 [8:36:50<20:53:48, 3.50it/s] 29%|██▉ | 108326/371472 [8:36:50<20:33:19, 3.56it/s] 29%|██▉ | 108327/371472 [8:36:50<21:10:01, 3.45it/s] 29%|██▉ | 108328/371472 [8:36:51<22:28:10, 3.25it/s] 29%|██▉ | 108329/371472 [8:36:51<21:54:04, 3.34it/s] 29%|██▉ | 108330/371472 [8:36:51<21:10:17, 3.45it/s] 29%|██▉ | 108331/371472 [8:36:52<22:16:37, 3.28it/s] 29%|██▉ | 108332/371472 [8:36:52<21:33:02, 3.39it/s] 29%|██▉ | 108333/371472 [8:36:52<20:55:52, 3.49it/s] 29%|██▉ | 108334/371472 [8:36:52<20:48:37, 3.51it/s] 29%|██▉ | 108335/371472 [8:36:53<22:13:40, 3.29it/s] 29%|██▉ | 108336/371472 [8:36:53<20:50:23, 3.51it/s] 29%|██▉ | 108337/371472 [8:36:53<21:38:11, 3.38it/s] 29%|██▉ | 108338/371472 [8:36:54<21:26:23, 3.41it/s] 29%|██▉ | 108339/371472 [8:36:54<20:59:17, 3.48it/s] 29%|██▉ | 108340/371472 [8:36:54<20:06:42, 3.63it/s] {'loss': 3.317, 'learning_rate': 7.378579585856191e-07, 'epoch': 4.67} + 29%|██▉ | 108340/371472 [8:36:54<20:06:42, 3.63it/s] 29%|██▉ | 108341/371472 [8:36:54<20:23:04, 3.59it/s] 29%|██▉ | 108342/371472 [8:36:55<19:40:15, 3.72it/s] 29%|██▉ | 108343/371472 [8:36:55<20:01:10, 3.65it/s] 29%|���█▉ | 108344/371472 [8:36:55<20:27:39, 3.57it/s] 29%|██▉ | 108345/371472 [8:36:56<20:13:58, 3.61it/s] 29%|██▉ | 108346/371472 [8:36:56<19:46:52, 3.69it/s] 29%|██▉ | 108347/371472 [8:36:56<20:26:11, 3.58it/s] 29%|██▉ | 108348/371472 [8:36:56<21:14:25, 3.44it/s] 29%|██▉ | 108349/371472 [8:36:57<20:42:41, 3.53it/s] 29%|██▉ | 108350/371472 [8:36:57<20:19:32, 3.60it/s] 29%|██▉ | 108351/371472 [8:36:57<21:23:30, 3.42it/s] 29%|██▉ | 108352/371472 [8:36:58<20:31:09, 3.56it/s] 29%|██▉ | 108353/371472 [8:36:58<20:18:53, 3.60it/s] 29%|██▉ | 108354/371472 [8:36:58<20:43:55, 3.53it/s] 29%|██▉ | 108355/371472 [8:36:58<20:20:23, 3.59it/s] 29%|██▉ | 108356/371472 [8:36:59<19:47:02, 3.69it/s] 29%|██▉ | 108357/371472 [8:36:59<19:50:05, 3.68it/s] 29%|██▉ | 108358/371472 [8:36:59<20:12:53, 3.62it/s] 29%|██▉ | 108359/371472 [8:36:59<20:18:39, 3.60it/s] 29%|██▉ | 108360/371472 [8:37:00<19:27:06, 3.76it/s] {'loss': 3.4289, 'learning_rate': 7.378094766101403e-07, 'epoch': 4.67} + 29%|██▉ | 108360/371472 [8:37:00<19:27:06, 3.76it/s] 29%|██▉ | 108361/371472 [8:37:00<21:17:39, 3.43it/s] 29%|██▉ | 108362/371472 [8:37:00<21:53:15, 3.34it/s] 29%|██▉ | 108363/371472 [8:37:01<21:00:26, 3.48it/s] 29%|██▉ | 108364/371472 [8:37:01<22:34:26, 3.24it/s] 29%|██▉ | 108365/371472 [8:37:01<21:06:51, 3.46it/s] 29%|██▉ | 108366/371472 [8:37:02<22:24:52, 3.26it/s] 29%|██▉ | 108367/371472 [8:37:02<21:29:54, 3.40it/s] 29%|██▉ | 108368/371472 [8:37:02<21:47:50, 3.35it/s] 29%|██▉ | 108369/371472 [8:37:02<20:53:52, 3.50it/s] 29%|██▉ | 108370/371472 [8:37:03<20:49:37, 3.51it/s] 29%|██▉ | 108371/371472 [8:37:03<19:49:55, 3.69it/s] 29%|██▉ | 108372/371472 [8:37:03<19:52:40, 3.68it/s] 29%|██▉ | 108373/371472 [8:37:04<19:53:27, 3.67it/s] 29%|██▉ | 108374/371472 [8:37:04<19:57:37, 3.66it/s] 29%|██▉ | 108375/371472 [8:37:04<19:10:34, 3.81it/s] 29%|██▉ | 108376/371472 [8:37:04<18:43:33, 3.90it/s] 29%|██▉ | 108377/371472 [8:37:05<19:17:48, 3.79it/s] 29%|██▉ | 108378/371472 [8:37:05<19:16:40, 3.79it/s] 29%|██▉ | 108379/371472 [8:37:05<19:23:14, 3.77it/s] 29%|██▉ | 108380/371472 [8:37:05<18:49:23, 3.88it/s] {'loss': 3.5949, 'learning_rate': 7.377609946346614e-07, 'epoch': 4.67} + 29%|██▉ | 108380/371472 [8:37:05<18:49:23, 3.88it/s] 29%|██▉ | 108381/371472 [8:37:06<18:44:55, 3.90it/s] 29%|██▉ | 108382/371472 [8:37:06<18:49:29, 3.88it/s] 29%|██▉ | 108383/371472 [8:37:06<19:12:45, 3.80it/s] 29%|██▉ | 108384/371472 [8:37:06<19:44:28, 3.70it/s] 29%|██▉ | 108385/371472 [8:37:07<19:02:42, 3.84it/s] 29%|██▉ | 108386/371472 [8:37:07<19:02:44, 3.84it/s] 29%|██▉ | 108387/371472 [8:37:07<19:06:17, 3.83it/s] 29%|██▉ | 108388/371472 [8:37:07<19:14:16, 3.80it/s] 29%|██▉ | 108389/371472 [8:37:08<19:42:50, 3.71it/s] 29%|██▉ | 108390/371472 [8:37:08<20:26:33, 3.57it/s] 29%|██▉ | 108391/371472 [8:37:08<20:08:06, 3.63it/s] 29%|██▉ | 108392/371472 [8:37:09<19:36:47, 3.73it/s] 29%|██▉ | 108393/371472 [8:37:09<20:24:21, 3.58it/s] 29%|██▉ | 108394/371472 [8:37:09<20:31:34, 3.56it/s] 29%|██▉ | 108395/371472 [8:37:09<20:42:39, 3.53it/s] 29%|██▉ | 108396/371472 [8:37:10<20:45:16, 3.52it/s] 29%|██▉ | 108397/371472 [8:37:10<20:32:04, 3.56it/s] 29%|██▉ | 108398/371472 [8:37:10<20:37:45, 3.54it/s] 29%|██▉ | 108399/371472 [8:37:11<22:35:47, 3.23it/s] 29%|██▉ | 108400/371472 [8:37:11<22:02:22, 3.32it/s] {'loss': 3.3951, 'learning_rate': 7.377125126591824e-07, 'epoch': 4.67} + 29%|██▉ | 108400/371472 [8:37:11<22:02:22, 3.32it/s] 29%|██▉ | 108401/371472 [8:37:11<21:10:25, 3.45it/s] 29%|██▉ | 108402/371472 [8:37:12<22:27:56, 3.25it/s] 29%|██▉ | 108403/371472 [8:37:12<22:03:40, 3.31it/s] 29%|██▉ | 108404/371472 [8:37:12<21:56:39, 3.33it/s] 29%|██▉ | 108405/371472 [8:37:12<20:54:00, 3.50it/s] 29%|██▉ | 108406/371472 [8:37:13<20:23:42, 3.58it/s] 29%|██▉ | 108407/371472 [8:37:13<22:29:00, 3.25it/s] 29%|██▉ | 108408/371472 [8:37:13<21:01:59, 3.47it/s] 29%|██▉ | 108409/371472 [8:37:14<21:01:31, 3.48it/s] 29%|██▉ | 108410/371472 [8:37:14<21:08:23, 3.46it/s] 29%|██▉ | 108411/371472 [8:37:14<21:02:04, 3.47it/s] 29%|██▉ | 108412/371472 [8:37:14<21:33:07, 3.39it/s] 29%|██▉ | 108413/371472 [8:37:15<20:40:32, 3.53it/s] 29%|██▉ | 108414/371472 [8:37:15<20:41:12, 3.53it/s] 29%|██▉ | 108415/371472 [8:37:15<20:03:37, 3.64it/s] 29%|██▉ | 108416/371472 [8:37:15<19:53:15, 3.67it/s] 29%|██▉ | 108417/371472 [8:37:16<19:50:17, 3.68it/s] 29%|██▉ | 108418/371472 [8:37:16<19:30:14, 3.75it/s] 29%|██▉ | 108419/371472 [8:37:16<19:17:08, 3.79it/s] 29%|██▉ | 108420/371472 [8:37:17<20:53:19, 3.50it/s] {'loss': 3.2444, 'learning_rate': 7.376640306837035e-07, 'epoch': 4.67} + 29%|██▉ | 108420/371472 [8:37:17<20:53:19, 3.50it/s] 29%|██▉ | 108421/371472 [8:37:17<20:53:01, 3.50it/s] 29%|██▉ | 108422/371472 [8:37:17<22:02:22, 3.32it/s] 29%|██▉ | 108423/371472 [8:37:17<21:16:49, 3.43it/s] 29%|██▉ | 108424/371472 [8:37:18<21:25:11, 3.41it/s] 29%|██▉ | 108425/371472 [8:37:18<21:10:13, 3.45it/s] 29%|██▉ | 108426/371472 [8:37:18<21:01:21, 3.48it/s] 29%|██▉ | 108427/371472 [8:37:19<21:37:04, 3.38it/s] 29%|██▉ | 108428/371472 [8:37:19<21:31:40, 3.39it/s] 29%|██▉ | 108429/371472 [8:37:19<20:56:32, 3.49it/s] 29%|██▉ | 108430/371472 [8:37:19<20:30:23, 3.56it/s] 29%|██▉ | 108431/371472 [8:37:20<21:11:02, 3.45it/s] 29%|██▉ | 108432/371472 [8:37:20<20:46:57, 3.52it/s] 29%|██▉ | 108433/371472 [8:37:20<22:44:41, 3.21it/s] 29%|██▉ | 108434/371472 [8:37:21<21:59:33, 3.32it/s] 29%|██▉ | 108435/371472 [8:37:21<20:58:29, 3.48it/s] 29%|██▉ | 108436/371472 [8:37:21<21:54:32, 3.33it/s] 29%|██▉ | 108437/371472 [8:37:22<21:08:24, 3.46it/s] 29%|██▉ | 108438/371472 [8:37:22<21:41:11, 3.37it/s] 29%|██▉ | 108439/371472 [8:37:22<21:41:51, 3.37it/s] 29%|██▉ | 108440/371472 [8:37:22<20:52:58, 3.50it/s] {'loss': 3.4278, 'learning_rate': 7.376155487082247e-07, 'epoch': 4.67} + 29%|██▉ | 108440/371472 [8:37:22<20:52:58, 3.50it/s] 29%|██▉ | 108441/371472 [8:37:23<20:56:09, 3.49it/s] 29%|██▉ | 108442/371472 [8:37:23<23:07:54, 3.16it/s] 29%|██▉ | 108443/371472 [8:37:23<22:10:12, 3.30it/s] 29%|██▉ | 108444/371472 [8:37:24<21:30:26, 3.40it/s] 29%|██▉ | 108445/371472 [8:37:24<21:14:21, 3.44it/s] 29%|██▉ | 108446/371472 [8:37:24<21:11:25, 3.45it/s] 29%|██▉ | 108447/371472 [8:37:25<21:13:34, 3.44it/s] 29%|██▉ | 108448/371472 [8:37:25<21:04:01, 3.47it/s] 29%|██▉ | 108449/371472 [8:37:25<21:50:18, 3.35it/s] 29%|██▉ | 108450/371472 [8:37:25<21:10:50, 3.45it/s] 29%|██▉ | 108451/371472 [8:37:26<20:19:38, 3.59it/s] 29%|██▉ | 108452/371472 [8:37:26<20:14:49, 3.61it/s] 29%|██▉ | 108453/371472 [8:37:26<21:09:56, 3.45it/s] 29%|██▉ | 108454/371472 [8:37:27<20:46:28, 3.52it/s] 29%|██▉ | 108455/371472 [8:37:27<21:45:02, 3.36it/s] 29%|██▉ | 108456/371472 [8:37:27<20:28:11, 3.57it/s] 29%|██▉ | 108457/371472 [8:37:27<20:30:20, 3.56it/s] 29%|██▉ | 108458/371472 [8:37:28<20:49:11, 3.51it/s] 29%|██▉ | 108459/371472 [8:37:28<21:28:09, 3.40it/s] 29%|██▉ | 108460/371472 [8:37:28<21:33:07, 3.39it/s] {'loss': 3.3376, 'learning_rate': 7.375670667327458e-07, 'epoch': 4.67} + 29%|██▉ | 108460/371472 [8:37:28<21:33:07, 3.39it/s] 29%|██▉ | 108461/371472 [8:37:29<21:02:54, 3.47it/s] 29%|██▉ | 108462/371472 [8:37:29<20:11:31, 3.62it/s] 29%|██▉ | 108463/371472 [8:37:29<20:12:39, 3.61it/s] 29%|██▉ | 108464/371472 [8:37:29<20:02:36, 3.64it/s] 29%|██▉ | 108465/371472 [8:37:30<20:53:55, 3.50it/s] 29%|██▉ | 108466/371472 [8:37:30<20:31:28, 3.56it/s] 29%|██▉ | 108467/371472 [8:37:30<22:11:17, 3.29it/s] 29%|██▉ | 108468/371472 [8:37:31<21:52:55, 3.34it/s] 29%|██▉ | 108469/371472 [8:37:31<21:40:11, 3.37it/s] 29%|██▉ | 108470/371472 [8:37:31<23:28:14, 3.11it/s] 29%|██▉ | 108471/371472 [8:37:31<22:03:18, 3.31it/s] 29%|██▉ | 108472/371472 [8:37:32<21:25:41, 3.41it/s] 29%|██▉ | 108473/371472 [8:37:32<21:34:46, 3.39it/s] 29%|██▉ | 108474/371472 [8:37:32<21:47:27, 3.35it/s] 29%|██▉ | 108475/371472 [8:37:33<21:52:26, 3.34it/s] 29%|██▉ | 108476/371472 [8:37:33<21:42:09, 3.37it/s] 29%|██▉ | 108477/371472 [8:37:33<20:43:55, 3.52it/s] 29%|██▉ | 108478/371472 [8:37:33<20:33:31, 3.55it/s] 29%|██▉ | 108479/371472 [8:37:34<19:34:19, 3.73it/s] 29%|██▉ | 108480/371472 [8:37:34<19:19:47, 3.78it/s] {'loss': 3.3744, 'learning_rate': 7.375185847572668e-07, 'epoch': 4.67} + 29%|██▉ | 108480/371472 [8:37:34<19:19:47, 3.78it/s] 29%|██▉ | 108481/371472 [8:37:34<19:47:12, 3.69it/s] 29%|██▉ | 108482/371472 [8:37:35<19:27:47, 3.75it/s] 29%|██▉ | 108483/371472 [8:37:35<19:54:24, 3.67it/s] 29%|██▉ | 108484/371472 [8:37:35<19:53:13, 3.67it/s] 29%|██▉ | 108485/371472 [8:37:35<20:15:24, 3.61it/s] 29%|██▉ | 108486/371472 [8:37:36<20:52:05, 3.50it/s] 29%|██▉ | 108487/371472 [8:37:36<23:19:28, 3.13it/s] 29%|██▉ | 108488/371472 [8:37:36<22:33:13, 3.24it/s] 29%|██▉ | 108489/371472 [8:37:37<21:55:42, 3.33it/s] 29%|██▉ | 108490/371472 [8:37:37<20:48:29, 3.51it/s] 29%|██▉ | 108491/371472 [8:37:37<20:45:39, 3.52it/s] 29%|██▉ | 108492/371472 [8:37:37<20:03:23, 3.64it/s] 29%|██▉ | 108493/371472 [8:37:38<20:47:22, 3.51it/s] 29%|██▉ | 108494/371472 [8:37:38<20:13:20, 3.61it/s] 29%|██▉ | 108495/371472 [8:37:38<20:17:25, 3.60it/s] 29%|██▉ | 108496/371472 [8:37:39<19:36:03, 3.73it/s] 29%|██▉ | 108497/371472 [8:37:39<19:22:49, 3.77it/s] 29%|██▉ | 108498/371472 [8:37:39<20:12:42, 3.61it/s] 29%|██▉ | 108499/371472 [8:37:39<20:45:56, 3.52it/s] 29%|██▉ | 108500/371472 [8:37:40<20:17:11, 3.60it/s] {'loss': 3.5111, 'learning_rate': 7.37470102781788e-07, 'epoch': 4.67} + 29%|██▉ | 108500/371472 [8:37:40<20:17:11, 3.60it/s] 29%|██▉ | 108501/371472 [8:37:40<21:34:28, 3.39it/s] 29%|██▉ | 108502/371472 [8:37:40<21:23:55, 3.41it/s] 29%|██▉ | 108503/371472 [8:37:41<21:09:35, 3.45it/s] 29%|██▉ | 108504/371472 [8:37:41<21:57:22, 3.33it/s] 29%|██▉ | 108505/371472 [8:37:41<22:51:36, 3.20it/s] 29%|██▉ | 108506/371472 [8:37:41<21:15:52, 3.44it/s] 29%|██▉ | 108507/371472 [8:37:42<20:42:47, 3.53it/s] 29%|██▉ | 108508/371472 [8:37:42<21:13:23, 3.44it/s] 29%|██▉ | 108509/371472 [8:37:42<21:36:34, 3.38it/s] 29%|██▉ | 108510/371472 [8:37:43<21:28:43, 3.40it/s] 29%|██▉ | 108511/371472 [8:37:43<21:26:37, 3.41it/s] 29%|██▉ | 108512/371472 [8:37:43<21:21:07, 3.42it/s] 29%|██▉ | 108513/371472 [8:37:44<23:07:58, 3.16it/s] 29%|██▉ | 108514/371472 [8:37:44<21:16:22, 3.43it/s] 29%|██▉ | 108515/371472 [8:37:44<20:54:23, 3.49it/s] 29%|██▉ | 108516/371472 [8:37:44<20:14:21, 3.61it/s] 29%|██▉ | 108517/371472 [8:37:45<19:20:38, 3.78it/s] 29%|██▉ | 108518/371472 [8:37:45<20:06:45, 3.63it/s] 29%|██▉ | 108519/371472 [8:37:45<19:49:36, 3.68it/s] 29%|██▉ | 108520/371472 [8:37:45<19:31:32, 3.74it/s] {'loss': 3.6147, 'learning_rate': 7.374216208063092e-07, 'epoch': 4.67} + 29%|██▉ | 108520/371472 [8:37:45<19:31:32, 3.74it/s] 29%|██▉ | 108521/371472 [8:37:46<19:38:41, 3.72it/s] 29%|██▉ | 108522/371472 [8:37:46<19:55:57, 3.66it/s] 29%|██▉ | 108523/371472 [8:37:46<20:21:50, 3.59it/s] 29%|██▉ | 108524/371472 [8:37:47<20:03:06, 3.64it/s] 29%|██▉ | 108525/371472 [8:37:47<19:41:01, 3.71it/s] 29%|██▉ | 108526/371472 [8:37:47<19:57:47, 3.66it/s] 29%|██▉ | 108527/371472 [8:37:47<20:54:38, 3.49it/s] 29%|██▉ | 108528/371472 [8:37:48<20:17:04, 3.60it/s] 29%|██▉ | 108529/371472 [8:37:48<21:02:00, 3.47it/s] 29%|██▉ | 108530/371472 [8:37:48<20:59:56, 3.48it/s] 29%|██▉ | 108531/371472 [8:37:49<21:20:37, 3.42it/s] 29%|██▉ | 108532/371472 [8:37:49<20:54:17, 3.49it/s] 29%|██▉ | 108533/371472 [8:37:49<20:28:34, 3.57it/s] 29%|██▉ | 108534/371472 [8:37:49<20:25:37, 3.58it/s] 29%|██▉ | 108535/371472 [8:37:50<19:55:02, 3.67it/s] 29%|██▉ | 108536/371472 [8:37:50<21:24:43, 3.41it/s] 29%|██▉ | 108537/371472 [8:37:50<21:11:22, 3.45it/s] 29%|██▉ | 108538/371472 [8:37:50<20:23:43, 3.58it/s] 29%|██▉ | 108539/371472 [8:37:51<21:11:51, 3.45it/s] 29%|██▉ | 108540/371472 [8:37:51<20:04:41, 3.64it/s] {'loss': 3.454, 'learning_rate': 7.373731388308302e-07, 'epoch': 4.68} + 29%|██▉ | 108540/371472 [8:37:51<20:04:41, 3.64it/s] 29%|██▉ | 108541/371472 [8:37:51<20:04:01, 3.64it/s] 29%|██▉ | 108542/371472 [8:37:52<20:29:46, 3.56it/s] 29%|██▉ | 108543/371472 [8:37:52<20:31:55, 3.56it/s] 29%|██▉ | 108544/371472 [8:37:52<20:54:54, 3.49it/s] 29%|██▉ | 108545/371472 [8:37:52<20:14:18, 3.61it/s] 29%|██▉ | 108546/371472 [8:37:53<20:19:48, 3.59it/s] 29%|██▉ | 108547/371472 [8:37:53<19:35:46, 3.73it/s] 29%|██▉ | 108548/371472 [8:37:53<21:16:48, 3.43it/s] 29%|██▉ | 108549/371472 [8:37:54<20:45:07, 3.52it/s] 29%|██▉ | 108550/371472 [8:37:54<21:18:36, 3.43it/s] 29%|██▉ | 108551/371472 [8:37:54<21:35:12, 3.38it/s] 29%|██▉ | 108552/371472 [8:37:55<22:23:51, 3.26it/s] 29%|██▉ | 108553/371472 [8:37:55<21:19:32, 3.42it/s] 29%|██▉ | 108554/371472 [8:37:55<20:37:05, 3.54it/s] 29%|██▉ | 108555/371472 [8:37:55<20:02:04, 3.65it/s] 29%|██▉ | 108556/371472 [8:37:56<20:20:47, 3.59it/s] 29%|██▉ | 108557/371472 [8:37:56<20:20:25, 3.59it/s] 29%|██▉ | 108558/371472 [8:37:56<20:17:06, 3.60it/s] 29%|██▉ | 108559/371472 [8:37:56<21:35:01, 3.38it/s] 29%|██▉ | 108560/371472 [8:37:57<20:29:15, 3.56it/s] {'loss': 3.3569, 'learning_rate': 7.373246568553513e-07, 'epoch': 4.68} + 29%|██▉ | 108560/371472 [8:37:57<20:29:15, 3.56it/s] 29%|██▉ | 108561/371472 [8:37:57<21:06:43, 3.46it/s] 29%|██▉ | 108562/371472 [8:37:57<20:37:09, 3.54it/s] 29%|██▉ | 108563/371472 [8:37:58<20:25:31, 3.58it/s] 29%|██▉ | 108564/371472 [8:37:58<20:41:17, 3.53it/s] 29%|██▉ | 108565/371472 [8:37:58<20:42:19, 3.53it/s] 29%|██▉ | 108566/371472 [8:37:58<20:08:45, 3.63it/s] 29%|██▉ | 108567/371472 [8:37:59<19:40:45, 3.71it/s] 29%|██▉ | 108568/371472 [8:37:59<20:38:24, 3.54it/s] 29%|██▉ | 108569/371472 [8:37:59<19:43:27, 3.70it/s] 29%|██▉ | 108570/371472 [8:37:59<19:30:37, 3.74it/s] 29%|██▉ | 108571/371472 [8:38:00<19:39:13, 3.72it/s] 29%|██▉ | 108572/371472 [8:38:00<19:46:09, 3.69it/s] 29%|██▉ | 108573/371472 [8:38:00<19:37:34, 3.72it/s] 29%|██▉ | 108574/371472 [8:38:01<20:18:11, 3.60it/s] 29%|██▉ | 108575/371472 [8:38:01<20:58:50, 3.48it/s] 29%|██▉ | 108576/371472 [8:38:01<20:10:40, 3.62it/s] 29%|██▉ | 108577/371472 [8:38:01<20:40:05, 3.53it/s] 29%|██▉ | 108578/371472 [8:38:02<20:09:52, 3.62it/s] 29%|██▉ | 108579/371472 [8:38:02<19:40:11, 3.71it/s] 29%|██▉ | 108580/371472 [8:38:02<19:28:09, 3.75it/s] {'loss': 3.5428, 'learning_rate': 7.372761748798724e-07, 'epoch': 4.68} + 29%|██▉ | 108580/371472 [8:38:02<19:28:09, 3.75it/s] 29%|██▉ | 108581/371472 [8:38:03<20:07:37, 3.63it/s] 29%|██▉ | 108582/371472 [8:38:03<21:12:52, 3.44it/s] 29%|██▉ | 108583/371472 [8:38:03<20:33:05, 3.55it/s] 29%|██▉ | 108584/371472 [8:38:03<21:23:40, 3.41it/s] 29%|██▉ | 108585/371472 [8:38:04<20:52:42, 3.50it/s] 29%|██▉ | 108586/371472 [8:38:04<21:04:56, 3.46it/s] 29%|██▉ | 108587/371472 [8:38:04<20:52:03, 3.50it/s] 29%|██▉ | 108588/371472 [8:38:05<20:30:28, 3.56it/s] 29%|██▉ | 108589/371472 [8:38:05<20:12:46, 3.61it/s] 29%|██▉ | 108590/371472 [8:38:05<19:45:28, 3.70it/s] 29%|██▉ | 108591/371472 [8:38:05<20:25:15, 3.58it/s] 29%|██▉ | 108592/371472 [8:38:06<20:18:45, 3.59it/s] 29%|██▉ | 108593/371472 [8:38:06<20:00:44, 3.65it/s] 29%|██▉ | 108594/371472 [8:38:06<19:47:29, 3.69it/s] 29%|██▉ | 108595/371472 [8:38:06<19:22:39, 3.77it/s] 29%|██▉ | 108596/371472 [8:38:07<19:21:32, 3.77it/s] 29%|██▉ | 108597/371472 [8:38:07<20:18:58, 3.59it/s] 29%|██▉ | 108598/371472 [8:38:07<20:32:42, 3.55it/s] 29%|██▉ | 108599/371472 [8:38:08<21:02:59, 3.47it/s] 29%|██▉ | 108600/371472 [8:38:08<20:43:55, 3.52it/s] {'loss': 3.4574, 'learning_rate': 7.372276929043936e-07, 'epoch': 4.68} + 29%|██▉ | 108600/371472 [8:38:08<20:43:55, 3.52it/s] 29%|██▉ | 108601/371472 [8:38:08<20:09:42, 3.62it/s] 29%|██▉ | 108602/371472 [8:38:08<20:36:11, 3.54it/s] 29%|██▉ | 108603/371472 [8:38:09<21:18:19, 3.43it/s] 29%|██▉ | 108604/371472 [8:38:09<20:00:30, 3.65it/s] 29%|██▉ | 108605/371472 [8:38:09<20:46:30, 3.51it/s] 29%|██▉ | 108606/371472 [8:38:10<21:05:58, 3.46it/s] 29%|██▉ | 108607/371472 [8:38:10<20:26:53, 3.57it/s] 29%|██▉ | 108608/371472 [8:38:10<19:59:45, 3.65it/s] 29%|██▉ | 108609/371472 [8:38:10<21:35:54, 3.38it/s] 29%|██▉ | 108610/371472 [8:38:11<20:25:45, 3.57it/s] 29%|██▉ | 108611/371472 [8:38:11<20:33:36, 3.55it/s] 29%|██▉ | 108612/371472 [8:38:11<20:26:23, 3.57it/s] 29%|██▉ | 108613/371472 [8:38:12<20:39:05, 3.54it/s] 29%|██▉ | 108614/371472 [8:38:12<21:04:47, 3.46it/s] 29%|██▉ | 108615/371472 [8:38:12<20:05:00, 3.64it/s] 29%|██▉ | 108616/371472 [8:38:12<20:33:12, 3.55it/s] 29%|██▉ | 108617/371472 [8:38:13<20:14:28, 3.61it/s] 29%|██▉ | 108618/371472 [8:38:13<20:12:57, 3.61it/s] 29%|██▉ | 108619/371472 [8:38:13<20:00:12, 3.65it/s] 29%|██▉ | 108620/371472 [8:38:13<20:37:38, 3.54it/s] {'loss': 3.4242, 'learning_rate': 7.371792109289146e-07, 'epoch': 4.68} + 29%|██▉ | 108620/371472 [8:38:13<20:37:38, 3.54it/s] 29%|██▉ | 108621/371472 [8:38:14<20:15:46, 3.60it/s] 29%|██▉ | 108622/371472 [8:38:14<22:21:42, 3.27it/s] 29%|██▉ | 108623/371472 [8:38:14<21:20:14, 3.42it/s] 29%|██▉ | 108624/371472 [8:38:15<20:36:54, 3.54it/s] 29%|██▉ | 108625/371472 [8:38:15<20:48:27, 3.51it/s] 29%|██▉ | 108626/371472 [8:38:15<19:56:13, 3.66it/s] 29%|██▉ | 108627/371472 [8:38:15<19:34:18, 3.73it/s] 29%|██▉ | 108628/371472 [8:38:16<20:58:31, 3.48it/s] 29%|██▉ | 108629/371472 [8:38:16<20:30:00, 3.56it/s] 29%|██▉ | 108630/371472 [8:38:16<19:43:09, 3.70it/s] 29%|██▉ | 108631/371472 [8:38:17<22:37:28, 3.23it/s] 29%|██▉ | 108632/371472 [8:38:17<21:51:10, 3.34it/s] 29%|██▉ | 108633/371472 [8:38:17<20:57:08, 3.48it/s] 29%|██▉ | 108634/371472 [8:38:18<22:12:36, 3.29it/s] 29%|██▉ | 108635/371472 [8:38:18<22:08:30, 3.30it/s] 29%|██▉ | 108636/371472 [8:38:18<22:11:10, 3.29it/s] 29%|██▉ | 108637/371472 [8:38:18<21:46:02, 3.35it/s] 29%|██▉ | 108638/371472 [8:38:19<20:47:29, 3.51it/s] 29%|██▉ | 108639/371472 [8:38:19<20:36:24, 3.54it/s] 29%|██▉ | 108640/371472 [8:38:19<20:57:35, 3.48it/s] {'loss': 3.2827, 'learning_rate': 7.371307289534357e-07, 'epoch': 4.68} + 29%|██▉ | 108640/371472 [8:38:19<20:57:35, 3.48it/s] 29%|██▉ | 108641/371472 [8:38:20<19:56:47, 3.66it/s] 29%|██▉ | 108642/371472 [8:38:20<20:28:39, 3.57it/s] 29%|██▉ | 108643/371472 [8:38:20<20:45:14, 3.52it/s] 29%|██▉ | 108644/371472 [8:38:20<21:16:46, 3.43it/s] 29%|██▉ | 108645/371472 [8:38:21<20:31:52, 3.56it/s] 29%|██▉ | 108646/371472 [8:38:21<20:47:03, 3.51it/s] 29%|██▉ | 108647/371472 [8:38:21<21:25:59, 3.41it/s] 29%|██▉ | 108648/371472 [8:38:22<21:42:14, 3.36it/s] 29%|██▉ | 108649/371472 [8:38:22<21:09:03, 3.45it/s] 29%|██▉ | 108650/371472 [8:38:22<20:55:13, 3.49it/s] 29%|██▉ | 108651/371472 [8:38:22<20:19:48, 3.59it/s] 29%|██▉ | 108652/371472 [8:38:23<20:26:46, 3.57it/s] 29%|██▉ | 108653/371472 [8:38:23<20:01:55, 3.64it/s] 29%|██▉ | 108654/371472 [8:38:23<20:03:57, 3.64it/s] 29%|██▉ | 108655/371472 [8:38:23<20:00:46, 3.65it/s] 29%|██▉ | 108656/371472 [8:38:24<19:26:44, 3.75it/s] 29%|██▉ | 108657/371472 [8:38:24<20:15:18, 3.60it/s] 29%|██▉ | 108658/371472 [8:38:24<20:34:07, 3.55it/s] 29%|██▉ | 108659/371472 [8:38:25<19:51:03, 3.68it/s] 29%|██▉ | 108660/371472 [8:38:25<20:31:11, 3.56it/s] {'loss': 3.209, 'learning_rate': 7.370822469779568e-07, 'epoch': 4.68} + 29%|██▉ | 108660/371472 [8:38:25<20:31:11, 3.56it/s] 29%|██▉ | 108661/371472 [8:38:25<20:44:39, 3.52it/s] 29%|██▉ | 108662/371472 [8:38:26<21:38:40, 3.37it/s] 29%|██▉ | 108663/371472 [8:38:26<21:30:49, 3.39it/s] 29%|██▉ | 108664/371472 [8:38:26<20:51:34, 3.50it/s] 29%|██▉ | 108665/371472 [8:38:26<20:38:19, 3.54it/s] 29%|██▉ | 108666/371472 [8:38:27<20:20:20, 3.59it/s] 29%|██▉ | 108667/371472 [8:38:27<20:25:30, 3.57it/s] 29%|██▉ | 108668/371472 [8:38:27<21:05:42, 3.46it/s] 29%|██▉ | 108669/371472 [8:38:27<19:52:21, 3.67it/s] 29%|██▉ | 108670/371472 [8:38:28<20:02:23, 3.64it/s] 29%|██▉ | 108671/371472 [8:38:28<19:37:27, 3.72it/s] 29%|██▉ | 108672/371472 [8:38:28<20:46:08, 3.51it/s] 29%|██▉ | 108673/371472 [8:38:29<20:05:01, 3.63it/s] 29%|██▉ | 108674/371472 [8:38:29<19:51:49, 3.67it/s] 29%|██▉ | 108675/371472 [8:38:29<20:18:34, 3.59it/s] 29%|██▉ | 108676/371472 [8:38:29<19:55:57, 3.66it/s] 29%|██▉ | 108677/371472 [8:38:30<20:12:46, 3.61it/s] 29%|██▉ | 108678/371472 [8:38:30<19:40:46, 3.71it/s] 29%|██▉ | 108679/371472 [8:38:30<22:35:35, 3.23it/s] 29%|██▉ | 108680/371472 [8:38:31<21:56:31, 3.33it/s] {'loss': 3.3644, 'learning_rate': 7.37033765002478e-07, 'epoch': 4.68} + 29%|██▉ | 108680/371472 [8:38:31<21:56:31, 3.33it/s] 29%|██▉ | 108681/371472 [8:38:31<23:00:57, 3.17it/s] 29%|██▉ | 108682/371472 [8:38:31<22:30:12, 3.24it/s] 29%|██▉ | 108683/371472 [8:38:32<22:16:33, 3.28it/s] 29%|██▉ | 108684/371472 [8:38:32<21:31:32, 3.39it/s] 29%|██▉ | 108685/371472 [8:38:32<21:01:05, 3.47it/s] 29%|██▉ | 108686/371472 [8:38:32<21:08:57, 3.45it/s] 29%|██▉ | 108687/371472 [8:38:33<20:34:01, 3.55it/s] 29%|██▉ | 108688/371472 [8:38:33<19:47:15, 3.69it/s] 29%|██▉ | 108689/371472 [8:38:33<19:40:47, 3.71it/s] 29%|██▉ | 108690/371472 [8:38:33<20:12:23, 3.61it/s] 29%|██▉ | 108691/371472 [8:38:34<20:05:32, 3.63it/s] 29%|██▉ | 108692/371472 [8:38:34<20:29:32, 3.56it/s] 29%|██▉ | 108693/371472 [8:38:34<23:30:30, 3.11it/s] 29%|██▉ | 108694/371472 [8:38:35<23:55:24, 3.05it/s] 29%|██▉ | 108695/371472 [8:38:35<23:49:24, 3.06it/s] 29%|██▉ | 108696/371472 [8:38:35<22:20:01, 3.27it/s] 29%|██▉ | 108697/371472 [8:38:36<21:46:05, 3.35it/s] 29%|██▉ | 108698/371472 [8:38:36<21:52:14, 3.34it/s] 29%|██▉ | 108699/371472 [8:38:36<22:43:11, 3.21it/s] 29%|██▉ | 108700/371472 [8:38:37<21:36:32, 3.38it/s] {'loss': 3.4966, 'learning_rate': 7.36985283026999e-07, 'epoch': 4.68} + 29%|██▉ | 108700/371472 [8:38:37<21:36:32, 3.38it/s] 29%|██▉ | 108701/371472 [8:38:37<21:53:05, 3.34it/s] 29%|██▉ | 108702/371472 [8:38:37<21:55:17, 3.33it/s] 29%|██▉ | 108703/371472 [8:38:37<21:07:38, 3.45it/s] 29%|██▉ | 108704/371472 [8:38:38<20:34:52, 3.55it/s] 29%|██▉ | 108705/371472 [8:38:38<20:36:01, 3.54it/s] 29%|██▉ | 108706/371472 [8:38:38<20:25:14, 3.57it/s] 29%|██▉ | 108707/371472 [8:38:39<20:58:32, 3.48it/s] 29%|██▉ | 108708/371472 [8:38:39<20:24:00, 3.58it/s] 29%|██▉ | 108709/371472 [8:38:39<21:37:34, 3.38it/s] 29%|██▉ | 108710/371472 [8:38:39<21:04:07, 3.46it/s] 29%|██▉ | 108711/371472 [8:38:40<20:51:04, 3.50it/s] 29%|██▉ | 108712/371472 [8:38:40<21:03:54, 3.46it/s] 29%|██▉ | 108713/371472 [8:38:40<20:19:35, 3.59it/s] 29%|██▉ | 108714/371472 [8:38:40<19:24:04, 3.76it/s] 29%|██▉ | 108715/371472 [8:38:41<20:16:56, 3.60it/s] 29%|██▉ | 108716/371472 [8:38:41<20:17:56, 3.60it/s] 29%|██▉ | 108717/371472 [8:38:41<19:51:29, 3.68it/s] 29%|██▉ | 108718/371472 [8:38:42<20:02:00, 3.64it/s] 29%|██▉ | 108719/371472 [8:38:42<21:52:35, 3.34it/s] 29%|██▉ | 108720/371472 [8:38:42<21:59:57, 3.32it/s] {'loss': 3.1675, 'learning_rate': 7.369368010515201e-07, 'epoch': 4.68} + 29%|██▉ | 108720/371472 [8:38:42<21:59:57, 3.32it/s] 29%|██▉ | 108721/371472 [8:38:43<22:38:00, 3.22it/s] 29%|██▉ | 108722/371472 [8:38:43<21:51:03, 3.34it/s] 29%|██▉ | 108723/371472 [8:38:43<21:00:48, 3.47it/s] 29%|██▉ | 108724/371472 [8:38:43<20:17:44, 3.60it/s] 29%|██▉ | 108725/371472 [8:38:44<19:43:38, 3.70it/s] 29%|██▉ | 108726/371472 [8:38:44<20:23:08, 3.58it/s] 29%|██▉ | 108727/371472 [8:38:44<19:44:39, 3.70it/s] 29%|██▉ | 108728/371472 [8:38:44<20:00:37, 3.65it/s] 29%|██▉ | 108729/371472 [8:38:45<19:38:06, 3.72it/s] 29%|██▉ | 108730/371472 [8:38:45<21:00:39, 3.47it/s] 29%|██▉ | 108731/371472 [8:38:45<21:07:37, 3.45it/s] 29%|██▉ | 108732/371472 [8:38:46<21:41:46, 3.36it/s] 29%|██▉ | 108733/371472 [8:38:46<21:41:24, 3.36it/s] 29%|██▉ | 108734/371472 [8:38:46<21:09:22, 3.45it/s] 29%|██▉ | 108735/371472 [8:38:46<20:41:25, 3.53it/s] 29%|██▉ | 108736/371472 [8:38:47<20:14:15, 3.61it/s] 29%|██▉ | 108737/371472 [8:38:47<20:26:48, 3.57it/s] 29%|██▉ | 108738/371472 [8:38:47<21:55:27, 3.33it/s] 29%|██▉ | 108739/371472 [8:38:48<20:55:09, 3.49it/s] 29%|██▉ | 108740/371472 [8:38:48<20:32:41, 3.55it/s] {'loss': 3.3223, 'learning_rate': 7.368883190760413e-07, 'epoch': 4.68} + 29%|██▉ | 108740/371472 [8:38:48<20:32:41, 3.55it/s] 29%|██▉ | 108741/371472 [8:38:48<20:56:46, 3.48it/s] 29%|██▉ | 108742/371472 [8:38:49<23:08:49, 3.15it/s] 29%|██▉ | 108743/371472 [8:38:49<21:37:57, 3.37it/s] 29%|██▉ | 108744/371472 [8:38:49<20:07:52, 3.63it/s] 29%|██▉ | 108745/371472 [8:38:49<21:17:18, 3.43it/s] 29%|██▉ | 108746/371472 [8:38:50<20:40:15, 3.53it/s] 29%|██▉ | 108747/371472 [8:38:50<20:02:55, 3.64it/s] 29%|██▉ | 108748/371472 [8:38:50<20:24:04, 3.58it/s] 29%|██▉ | 108749/371472 [8:38:51<21:42:35, 3.36it/s] 29%|██▉ | 108750/371472 [8:38:51<21:53:44, 3.33it/s] 29%|██▉ | 108751/371472 [8:38:51<21:26:57, 3.40it/s] 29%|██▉ | 108752/371472 [8:38:51<22:04:39, 3.31it/s] 29%|██▉ | 108753/371472 [8:38:52<21:22:45, 3.41it/s] 29%|██▉ | 108754/371472 [8:38:52<20:47:25, 3.51it/s] 29%|██▉ | 108755/371472 [8:38:52<21:04:45, 3.46it/s] 29%|██▉ | 108756/371472 [8:38:53<20:47:34, 3.51it/s] 29%|██▉ | 108757/371472 [8:38:53<20:33:34, 3.55it/s] 29%|██▉ | 108758/371472 [8:38:53<20:38:36, 3.54it/s] 29%|██▉ | 108759/371472 [8:38:53<19:47:48, 3.69it/s] 29%|██▉ | 108760/371472 [8:38:54<20:42:59, 3.52it/s] {'loss': 3.3081, 'learning_rate': 7.368398371005624e-07, 'epoch': 4.68} + 29%|██▉ | 108760/371472 [8:38:54<20:42:59, 3.52it/s] 29%|██▉ | 108761/371472 [8:38:54<20:57:12, 3.48it/s] 29%|██▉ | 108762/371472 [8:38:54<20:38:51, 3.53it/s] 29%|██▉ | 108763/371472 [8:38:55<20:12:07, 3.61it/s] 29%|██▉ | 108764/371472 [8:38:55<21:12:28, 3.44it/s] 29%|██▉ | 108765/371472 [8:38:55<20:29:09, 3.56it/s] 29%|██▉ | 108766/371472 [8:38:55<19:53:28, 3.67it/s] 29%|██▉ | 108767/371472 [8:38:56<19:32:21, 3.73it/s] 29%|██▉ | 108768/371472 [8:38:56<20:12:43, 3.61it/s] 29%|██▉ | 108769/371472 [8:38:56<20:24:29, 3.58it/s] 29%|██▉ | 108770/371472 [8:38:56<20:06:07, 3.63it/s] 29%|██▉ | 108771/371472 [8:38:57<21:41:48, 3.36it/s] 29%|██▉ | 108772/371472 [8:38:57<23:20:10, 3.13it/s] 29%|██▉ | 108773/371472 [8:38:57<23:10:50, 3.15it/s] 29%|██▉ | 108774/371472 [8:38:58<22:46:48, 3.20it/s] 29%|██▉ | 108775/371472 [8:38:58<22:34:53, 3.23it/s] 29%|██▉ | 108776/371472 [8:38:58<22:20:28, 3.27it/s] 29%|██▉ | 108777/371472 [8:38:59<21:21:53, 3.42it/s] 29%|██▉ | 108778/371472 [8:38:59<20:53:13, 3.49it/s] 29%|██▉ | 108779/371472 [8:38:59<20:33:11, 3.55it/s] 29%|██▉ | 108780/371472 [8:38:59<19:59:40, 3.65it/s] {'loss': 3.4824, 'learning_rate': 7.367913551250834e-07, 'epoch': 4.69} + 29%|██▉ | 108780/371472 [8:38:59<19:59:40, 3.65it/s] 29%|██▉ | 108781/371472 [8:39:00<20:17:32, 3.60it/s] 29%|██▉ | 108782/371472 [8:39:00<21:07:53, 3.45it/s] 29%|██▉ | 108783/371472 [8:39:00<20:50:57, 3.50it/s] 29%|██▉ | 108784/371472 [8:39:01<20:29:11, 3.56it/s] 29%|██▉ | 108785/371472 [8:39:01<20:22:11, 3.58it/s] 29%|██▉ | 108786/371472 [8:39:01<19:22:27, 3.77it/s] 29%|██▉ | 108787/371472 [8:39:01<19:10:09, 3.81it/s] 29%|██▉ | 108788/371472 [8:39:02<19:49:30, 3.68it/s] 29%|██▉ | 108789/371472 [8:39:02<19:42:18, 3.70it/s] 29%|██▉ | 108790/371472 [8:39:02<19:40:58, 3.71it/s] 29%|██▉ | 108791/371472 [8:39:02<20:31:15, 3.56it/s] 29%|██▉ | 108792/371472 [8:39:03<21:25:29, 3.41it/s] 29%|██▉ | 108793/371472 [8:39:03<20:56:29, 3.48it/s] 29%|██▉ | 108794/371472 [8:39:03<20:24:09, 3.58it/s] 29%|██▉ | 108795/371472 [8:39:04<21:30:43, 3.39it/s] 29%|██▉ | 108796/371472 [8:39:04<20:19:44, 3.59it/s] 29%|██▉ | 108797/371472 [8:39:04<20:16:11, 3.60it/s] 29%|██▉ | 108798/371472 [8:39:04<19:54:17, 3.67it/s] 29%|██▉ | 108799/371472 [8:39:05<19:28:14, 3.75it/s] 29%|██▉ | 108800/371472 [8:39:05<19:57:23, 3.66it/s] {'loss': 3.4848, 'learning_rate': 7.367428731496045e-07, 'epoch': 4.69} + 29%|██▉ | 108800/371472 [8:39:05<19:57:23, 3.66it/s] 29%|██▉ | 108801/371472 [8:39:05<20:21:52, 3.58it/s] 29%|██▉ | 108802/371472 [8:39:06<19:36:32, 3.72it/s] 29%|██▉ | 108803/371472 [8:39:06<19:54:46, 3.66it/s] 29%|██▉ | 108804/371472 [8:39:06<19:58:25, 3.65it/s] 29%|██▉ | 108805/371472 [8:39:06<19:33:41, 3.73it/s] 29%|██▉ | 108806/371472 [8:39:07<19:57:59, 3.65it/s] 29%|██▉ | 108807/371472 [8:39:07<19:45:02, 3.69it/s] 29%|██▉ | 108808/371472 [8:39:07<19:48:01, 3.68it/s] 29%|██▉ | 108809/371472 [8:39:07<20:20:30, 3.59it/s] 29%|██▉ | 108810/371472 [8:39:08<19:32:35, 3.73it/s] 29%|██▉ | 108811/371472 [8:39:08<19:16:13, 3.79it/s] 29%|██▉ | 108812/371472 [8:39:08<19:13:45, 3.79it/s] 29%|██▉ | 108813/371472 [8:39:09<19:40:45, 3.71it/s] 29%|██▉ | 108814/371472 [8:39:09<20:07:14, 3.63it/s] 29%|██▉ | 108815/371472 [8:39:09<20:01:52, 3.64it/s] 29%|██▉ | 108816/371472 [8:39:09<21:08:41, 3.45it/s] 29%|██▉ | 108817/371472 [8:39:10<20:41:02, 3.53it/s] 29%|██▉ | 108818/371472 [8:39:10<20:02:10, 3.64it/s] 29%|██▉ | 108819/371472 [8:39:10<19:41:45, 3.70it/s] 29%|██▉ | 108820/371472 [8:39:10<19:07:02, 3.82it/s] {'loss': 3.5141, 'learning_rate': 7.366943911741257e-07, 'epoch': 4.69} + 29%|██▉ | 108820/371472 [8:39:10<19:07:02, 3.82it/s] 29%|██▉ | 108821/371472 [8:39:11<20:01:58, 3.64it/s] 29%|██▉ | 108822/371472 [8:39:11<19:47:40, 3.69it/s] 29%|██▉ | 108823/371472 [8:39:11<19:33:16, 3.73it/s] 29%|██▉ | 108824/371472 [8:39:12<19:36:04, 3.72it/s] 29%|██▉ | 108825/371472 [8:39:12<19:34:02, 3.73it/s] 29%|██▉ | 108826/371472 [8:39:12<19:55:43, 3.66it/s] 29%|██▉ | 108827/371472 [8:39:12<19:48:39, 3.68it/s] 29%|██▉ | 108828/371472 [8:39:13<19:38:32, 3.71it/s] 29%|██▉ | 108829/371472 [8:39:13<20:08:27, 3.62it/s] 29%|██▉ | 108830/371472 [8:39:13<20:58:41, 3.48it/s] 29%|██▉ | 108831/371472 [8:39:13<20:20:30, 3.59it/s] 29%|██▉ | 108832/371472 [8:39:14<20:25:12, 3.57it/s] 29%|██▉ | 108833/371472 [8:39:14<20:23:29, 3.58it/s] 29%|██▉ | 108834/371472 [8:39:14<19:54:17, 3.67it/s] 29%|██▉ | 108835/371472 [8:39:15<19:59:21, 3.65it/s] 29%|██▉ | 108836/371472 [8:39:15<19:42:31, 3.70it/s] 29%|██▉ | 108837/371472 [8:39:15<19:53:18, 3.67it/s] 29%|██▉ | 108838/371472 [8:39:15<20:53:54, 3.49it/s] 29%|██▉ | 108839/371472 [8:39:16<20:55:21, 3.49it/s] 29%|██▉ | 108840/371472 [8:39:16<20:12:54, 3.61it/s] {'loss': 3.2749, 'learning_rate': 7.366459091986468e-07, 'epoch': 4.69} + 29%|██▉ | 108840/371472 [8:39:16<20:12:54, 3.61it/s] 29%|██▉ | 108841/371472 [8:39:16<19:50:04, 3.68it/s] 29%|██▉ | 108842/371472 [8:39:17<20:31:14, 3.56it/s] 29%|██▉ | 108843/371472 [8:39:17<21:00:14, 3.47it/s] 29%|██▉ | 108844/371472 [8:39:17<20:44:17, 3.52it/s] 29%|██▉ | 108845/371472 [8:39:17<21:56:56, 3.32it/s] 29%|██▉ | 108846/371472 [8:39:18<21:40:55, 3.36it/s] 29%|██▉ | 108847/371472 [8:39:18<23:59:06, 3.04it/s] 29%|██▉ | 108848/371472 [8:39:18<22:47:55, 3.20it/s] 29%|██▉ | 108849/371472 [8:39:19<22:15:20, 3.28it/s] 29%|██▉ | 108850/371472 [8:39:19<21:43:12, 3.36it/s] 29%|██▉ | 108851/371472 [8:39:19<20:22:55, 3.58it/s] 29%|██▉ | 108852/371472 [8:39:19<20:11:21, 3.61it/s] 29%|██▉ | 108853/371472 [8:39:20<19:33:45, 3.73it/s] 29%|██▉ | 108854/371472 [8:39:20<20:01:38, 3.64it/s] 29%|██▉ | 108855/371472 [8:39:20<20:30:21, 3.56it/s] 29%|██▉ | 108856/371472 [8:39:21<20:48:05, 3.51it/s] 29%|██▉ | 108857/371472 [8:39:21<20:16:13, 3.60it/s] 29%|██▉ | 108858/371472 [8:39:21<19:23:27, 3.76it/s] 29%|██▉ | 108859/371472 [8:39:21<19:27:53, 3.75it/s] 29%|██▉ | 108860/371472 [8:39:22<19:57:10, 3.66it/s] {'loss': 3.3197, 'learning_rate': 7.365974272231678e-07, 'epoch': 4.69} + 29%|██▉ | 108860/371472 [8:39:22<19:57:10, 3.66it/s] 29%|██▉ | 108861/371472 [8:39:22<20:16:59, 3.60it/s] 29%|██▉ | 108862/371472 [8:39:22<20:09:50, 3.62it/s] 29%|██▉ | 108863/371472 [8:39:22<19:31:30, 3.74it/s] 29%|██▉ | 108864/371472 [8:39:23<19:32:45, 3.73it/s] 29%|██▉ | 108865/371472 [8:39:23<21:15:36, 3.43it/s] 29%|██▉ | 108866/371472 [8:39:23<21:06:42, 3.46it/s] 29%|██▉ | 108867/371472 [8:39:24<20:30:11, 3.56it/s] 29%|██▉ | 108868/371472 [8:39:24<21:01:17, 3.47it/s] 29%|██▉ | 108869/371472 [8:39:24<22:42:57, 3.21it/s] 29%|██▉ | 108870/371472 [8:39:25<22:49:15, 3.20it/s] 29%|██▉ | 108871/371472 [8:39:25<21:31:37, 3.39it/s] 29%|██▉ | 108872/371472 [8:39:25<21:43:36, 3.36it/s] 29%|██▉ | 108873/371472 [8:39:25<21:06:15, 3.46it/s] 29%|██▉ | 108874/371472 [8:39:26<23:40:14, 3.08it/s] 29%|██▉ | 108875/371472 [8:39:26<21:50:22, 3.34it/s] 29%|██▉ | 108876/371472 [8:39:26<21:21:26, 3.42it/s] 29%|██▉ | 108877/371472 [8:39:27<20:19:40, 3.59it/s] 29%|██▉ | 108878/371472 [8:39:27<19:41:31, 3.70it/s] 29%|██▉ | 108879/371472 [8:39:27<19:41:03, 3.71it/s] 29%|██▉ | 108880/371472 [8:39:27<20:11:54, 3.61it/s] {'loss': 3.41, 'learning_rate': 7.36548945247689e-07, 'epoch': 4.69} + 29%|██▉ | 108880/371472 [8:39:27<20:11:54, 3.61it/s] 29%|██▉ | 108881/371472 [8:39:28<19:33:47, 3.73it/s] 29%|██▉ | 108882/371472 [8:39:28<19:23:19, 3.76it/s] 29%|██▉ | 108883/371472 [8:39:28<20:38:45, 3.53it/s] 29%|██▉ | 108884/371472 [8:39:29<20:51:08, 3.50it/s] 29%|██▉ | 108885/371472 [8:39:29<20:42:44, 3.52it/s] 29%|██▉ | 108886/371472 [8:39:29<20:19:02, 3.59it/s] 29%|██▉ | 108887/371472 [8:39:29<20:12:00, 3.61it/s] 29%|██▉ | 108888/371472 [8:39:30<20:26:20, 3.57it/s] 29%|██▉ | 108889/371472 [8:39:30<20:30:41, 3.56it/s] 29%|██▉ | 108890/371472 [8:39:30<20:14:02, 3.60it/s] 29%|██▉ | 108891/371472 [8:39:31<22:28:08, 3.25it/s] 29%|██▉ | 108892/371472 [8:39:31<21:05:20, 3.46it/s] 29%|██▉ | 108893/371472 [8:39:31<21:19:48, 3.42it/s] 29%|██▉ | 108894/371472 [8:39:31<20:12:16, 3.61it/s] 29%|██▉ | 108895/371472 [8:39:32<20:41:13, 3.53it/s] 29%|██▉ | 108896/371472 [8:39:32<20:31:53, 3.55it/s] 29%|██▉ | 108897/371472 [8:39:32<20:54:54, 3.49it/s] 29%|██▉ | 108898/371472 [8:39:33<21:49:15, 3.34it/s] 29%|██▉ | 108899/371472 [8:39:33<20:57:19, 3.48it/s] 29%|██▉ | 108900/371472 [8:39:33<21:28:20, 3.40it/s] {'loss': 3.5239, 'learning_rate': 7.365004632722102e-07, 'epoch': 4.69} + 29%|██▉ | 108900/371472 [8:39:33<21:28:20, 3.40it/s] 29%|██▉ | 108901/371472 [8:39:34<22:30:08, 3.24it/s] 29%|██▉ | 108902/371472 [8:39:34<21:44:37, 3.35it/s] 29%|██▉ | 108903/371472 [8:39:34<20:45:45, 3.51it/s] 29%|██▉ | 108904/371472 [8:39:34<21:11:51, 3.44it/s] 29%|██▉ | 108905/371472 [8:39:35<21:12:30, 3.44it/s] 29%|██▉ | 108906/371472 [8:39:35<21:15:41, 3.43it/s] 29%|██▉ | 108907/371472 [8:39:35<20:52:07, 3.49it/s] 29%|██▉ | 108908/371472 [8:39:36<21:45:43, 3.35it/s] 29%|██▉ | 108909/371472 [8:39:36<20:50:01, 3.50it/s] 29%|██▉ | 108910/371472 [8:39:36<20:48:49, 3.50it/s] 29%|██▉ | 108911/371472 [8:39:36<20:13:51, 3.61it/s] 29%|██▉ | 108912/371472 [8:39:37<21:05:26, 3.46it/s] 29%|██▉ | 108913/371472 [8:39:37<20:23:20, 3.58it/s] 29%|██▉ | 108914/371472 [8:39:37<20:36:49, 3.54it/s] 29%|██▉ | 108915/371472 [8:39:38<21:28:55, 3.40it/s] 29%|██▉ | 108916/371472 [8:39:38<23:02:14, 3.17it/s] 29%|██▉ | 108917/371472 [8:39:38<21:54:56, 3.33it/s] 29%|██▉ | 108918/371472 [8:39:39<23:37:30, 3.09it/s] 29%|██▉ | 108919/371472 [8:39:39<21:46:36, 3.35it/s] 29%|██▉ | 108920/371472 [8:39:39<26:43:14, 2.73it/s] {'loss': 3.3409, 'learning_rate': 7.364519812967311e-07, 'epoch': 4.69} + 29%|██▉ | 108920/371472 [8:39:39<26:43:14, 2.73it/s] 29%|██▉ | 108921/371472 [8:39:40<24:09:59, 3.02it/s] 29%|██▉ | 108922/371472 [8:39:40<23:22:09, 3.12it/s] 29%|██▉ | 108923/371472 [8:39:40<24:05:04, 3.03it/s] 29%|██▉ | 108924/371472 [8:39:40<23:17:04, 3.13it/s] 29%|██▉ | 108925/371472 [8:39:41<22:25:03, 3.25it/s] 29%|██▉ | 108926/371472 [8:39:41<23:41:27, 3.08it/s] 29%|██▉ | 108927/371472 [8:39:41<23:47:32, 3.07it/s] 29%|██▉ | 108928/371472 [8:39:42<22:12:20, 3.28it/s] 29%|██▉ | 108929/371472 [8:39:42<21:12:58, 3.44it/s] 29%|██▉ | 108930/371472 [8:39:42<20:22:52, 3.58it/s] 29%|██▉ | 108931/371472 [8:39:42<19:56:09, 3.66it/s] 29%|██▉ | 108932/371472 [8:39:43<19:21:42, 3.77it/s] 29%|██▉ | 108933/371472 [8:39:43<19:10:14, 3.80it/s] 29%|██▉ | 108934/371472 [8:39:43<18:44:58, 3.89it/s] 29%|██▉ | 108935/371472 [8:39:43<18:25:25, 3.96it/s] 29%|██▉ | 108936/371472 [8:39:44<18:04:31, 4.03it/s] 29%|██▉ | 108937/371472 [8:39:44<19:27:34, 3.75it/s] 29%|██▉ | 108938/371472 [8:39:44<20:03:33, 3.64it/s] 29%|██▉ | 108939/371472 [8:39:45<19:49:44, 3.68it/s] 29%|██▉ | 108940/371472 [8:39:45<20:06:11, 3.63it/s] {'loss': 3.4581, 'learning_rate': 7.364034993212522e-07, 'epoch': 4.69} + 29%|██▉ | 108940/371472 [8:39:45<20:06:11, 3.63it/s] 29%|██▉ | 108941/371472 [8:39:45<20:40:56, 3.53it/s] 29%|██▉ | 108942/371472 [8:39:45<20:15:55, 3.60it/s] 29%|██▉ | 108943/371472 [8:39:46<20:16:22, 3.60it/s] 29%|██▉ | 108944/371472 [8:39:46<21:37:53, 3.37it/s] 29%|██▉ | 108945/371472 [8:39:46<21:22:48, 3.41it/s] 29%|██▉ | 108946/371472 [8:39:47<21:19:26, 3.42it/s] 29%|██▉ | 108947/371472 [8:39:47<20:23:13, 3.58it/s] 29%|██▉ | 108948/371472 [8:39:47<20:38:19, 3.53it/s] 29%|██▉ | 108949/371472 [8:39:47<20:22:40, 3.58it/s] 29%|██▉ | 108950/371472 [8:39:48<21:29:39, 3.39it/s] 29%|██▉ | 108951/371472 [8:39:48<20:50:59, 3.50it/s] 29%|██▉ | 108952/371472 [8:39:48<20:12:50, 3.61it/s] 29%|██▉ | 108953/371472 [8:39:49<19:51:59, 3.67it/s] 29%|██▉ | 108954/371472 [8:39:49<19:52:42, 3.67it/s] 29%|██▉ | 108955/371472 [8:39:49<22:12:08, 3.28it/s] 29%|██▉ | 108956/371472 [8:39:49<21:08:08, 3.45it/s] 29%|██▉ | 108957/371472 [8:39:50<20:36:44, 3.54it/s] 29%|██▉ | 108958/371472 [8:39:50<19:49:27, 3.68it/s] 29%|██▉ | 108959/371472 [8:39:50<20:30:47, 3.55it/s] 29%|██▉ | 108960/371472 [8:39:51<19:44:17, 3.69it/s] {'loss': 3.4348, 'learning_rate': 7.363550173457734e-07, 'epoch': 4.69} + 29%|██▉ | 108960/371472 [8:39:51<19:44:17, 3.69it/s] 29%|██▉ | 108961/371472 [8:39:51<20:25:39, 3.57it/s] 29%|██▉ | 108962/371472 [8:39:51<19:58:49, 3.65it/s] 29%|██▉ | 108963/371472 [8:39:51<19:31:26, 3.73it/s] 29%|██▉ | 108964/371472 [8:39:52<20:04:18, 3.63it/s] 29%|██▉ | 108965/371472 [8:39:52<19:45:13, 3.69it/s] 29%|██▉ | 108966/371472 [8:39:52<19:09:39, 3.81it/s] 29%|██▉ | 108967/371472 [8:39:52<20:16:28, 3.60it/s] 29%|██▉ | 108968/371472 [8:39:53<20:25:51, 3.57it/s] 29%|██▉ | 108969/371472 [8:39:53<20:15:07, 3.60it/s] 29%|██▉ | 108970/371472 [8:39:53<20:55:43, 3.48it/s] 29%|██▉ | 108971/371472 [8:39:54<20:41:24, 3.52it/s] 29%|██▉ | 108972/371472 [8:39:54<20:02:21, 3.64it/s] 29%|██▉ | 108973/371472 [8:39:54<20:31:47, 3.55it/s] 29%|██▉ | 108974/371472 [8:39:54<20:27:39, 3.56it/s] 29%|██▉ | 108975/371472 [8:39:55<21:25:40, 3.40it/s] 29%|██▉ | 108976/371472 [8:39:55<22:20:34, 3.26it/s] 29%|██▉ | 108977/371472 [8:39:55<23:05:15, 3.16it/s] 29%|██▉ | 108978/371472 [8:39:56<22:33:38, 3.23it/s] 29%|██▉ | 108979/371472 [8:39:56<21:41:48, 3.36it/s] 29%|██▉ | 108980/371472 [8:39:56<21:30:53, 3.39it/s] {'loss': 3.3815, 'learning_rate': 7.363065353702946e-07, 'epoch': 4.69} + 29%|██▉ | 108980/371472 [8:39:56<21:30:53, 3.39it/s] 29%|██▉ | 108981/371472 [8:39:57<21:10:39, 3.44it/s] 29%|██▉ | 108982/371472 [8:39:57<20:57:26, 3.48it/s] 29%|██▉ | 108983/371472 [8:39:57<20:28:45, 3.56it/s] 29%|██▉ | 108984/371472 [8:39:57<21:52:43, 3.33it/s] 29%|██▉ | 108985/371472 [8:39:58<21:30:55, 3.39it/s] 29%|██▉ | 108986/371472 [8:39:58<20:53:03, 3.49it/s] 29%|██▉ | 108987/371472 [8:39:58<21:16:34, 3.43it/s] 29%|██▉ | 108988/371472 [8:39:59<21:26:17, 3.40it/s] 29%|██▉ | 108989/371472 [8:39:59<21:43:59, 3.35it/s] 29%|██▉ | 108990/371472 [8:39:59<21:11:56, 3.44it/s] 29%|██▉ | 108991/371472 [8:40:00<23:22:25, 3.12it/s] 29%|██▉ | 108992/371472 [8:40:00<21:43:49, 3.36it/s] 29%|██▉ | 108993/371472 [8:40:00<21:36:35, 3.37it/s] 29%|██▉ | 108994/371472 [8:40:00<20:56:37, 3.48it/s] 29%|██▉ | 108995/371472 [8:40:01<21:51:14, 3.34it/s] 29%|██▉ | 108996/371472 [8:40:01<21:28:29, 3.40it/s] 29%|██▉ | 108997/371472 [8:40:01<21:14:00, 3.43it/s] 29%|██▉ | 108998/371472 [8:40:02<21:49:50, 3.34it/s] 29%|██▉ | 108999/371472 [8:40:02<21:21:19, 3.41it/s] 29%|██▉ | 109000/371472 [8:40:02<21:19:26, 3.42it/s] {'loss': 3.4902, 'learning_rate': 7.362580533948156e-07, 'epoch': 4.69} + 29%|██▉ | 109000/371472 [8:40:02<21:19:26, 3.42it/s] 29%|██▉ | 109001/371472 [8:40:02<22:28:41, 3.24it/s] 29%|██▉ | 109002/371472 [8:40:03<21:41:57, 3.36it/s] 29%|██▉ | 109003/371472 [8:40:03<31:16:45, 2.33it/s] 29%|██▉ | 109004/371472 [8:40:04<27:27:17, 2.66it/s] 29%|██▉ | 109005/371472 [8:40:04<25:33:22, 2.85it/s] 29%|██▉ | 109006/371472 [8:40:04<27:55:49, 2.61it/s] 29%|██▉ | 109007/371472 [8:40:05<26:41:15, 2.73it/s] 29%|██▉ | 109008/371472 [8:40:05<25:04:11, 2.91it/s] 29%|██▉ | 109009/371472 [8:40:05<25:58:15, 2.81it/s] 29%|██▉ | 109010/371472 [8:40:06<24:01:07, 3.04it/s] 29%|██▉ | 109011/371472 [8:40:06<22:12:23, 3.28it/s] 29%|██▉ | 109012/371472 [8:40:06<21:49:22, 3.34it/s] 29%|██▉ | 109013/371472 [8:40:07<21:53:16, 3.33it/s] 29%|██▉ | 109014/371472 [8:40:07<20:36:09, 3.54it/s] 29%|██▉ | 109015/371472 [8:40:07<20:06:14, 3.63it/s] 29%|██▉ | 109016/371472 [8:40:07<20:37:53, 3.53it/s] 29%|██▉ | 109017/371472 [8:40:08<20:13:01, 3.61it/s] 29%|██▉ | 109018/371472 [8:40:08<20:17:15, 3.59it/s] 29%|██▉ | 109019/371472 [8:40:08<19:54:52, 3.66it/s] 29%|██▉ | 109020/371472 [8:40:08<19:53:23, 3.67it/s] {'loss': 3.4949, 'learning_rate': 7.362095714193367e-07, 'epoch': 4.7} + 29%|██▉ | 109020/371472 [8:40:08<19:53:23, 3.67it/s] 29%|██▉ | 109021/371472 [8:40:09<20:00:23, 3.64it/s] 29%|██▉ | 109022/371472 [8:40:09<19:49:25, 3.68it/s] 29%|██▉ | 109023/371472 [8:40:09<19:18:17, 3.78it/s] 29%|██▉ | 109024/371472 [8:40:10<21:05:49, 3.46it/s] 29%|██▉ | 109025/371472 [8:40:10<21:35:58, 3.38it/s] 29%|██▉ | 109026/371472 [8:40:10<20:59:27, 3.47it/s] 29%|██▉ | 109027/371472 [8:40:10<20:30:15, 3.56it/s] 29%|██▉ | 109028/371472 [8:40:11<20:12:57, 3.61it/s] 29%|██▉ | 109029/371472 [8:40:11<19:34:38, 3.72it/s] 29%|██▉ | 109030/371472 [8:40:11<20:11:34, 3.61it/s] 29%|██▉ | 109031/371472 [8:40:12<20:37:52, 3.53it/s] 29%|██▉ | 109032/371472 [8:40:12<20:42:47, 3.52it/s] 29%|██▉ | 109033/371472 [8:40:12<20:21:53, 3.58it/s] 29%|██▉ | 109034/371472 [8:40:12<21:19:29, 3.42it/s] 29%|██▉ | 109035/371472 [8:40:13<23:03:22, 3.16it/s] 29%|██▉ | 109036/371472 [8:40:13<22:13:34, 3.28it/s] 29%|██▉ | 109037/371472 [8:40:13<22:28:34, 3.24it/s] 29%|██▉ | 109038/371472 [8:40:14<22:08:41, 3.29it/s] 29%|██▉ | 109039/371472 [8:40:14<21:04:32, 3.46it/s] 29%|██▉ | 109040/371472 [8:40:14<20:51:41, 3.49it/s] {'loss': 3.4152, 'learning_rate': 7.361610894438579e-07, 'epoch': 4.7} + 29%|██▉ | 109040/371472 [8:40:14<20:51:41, 3.49it/s] 29%|██▉ | 109041/371472 [8:40:15<20:20:22, 3.58it/s] 29%|██▉ | 109042/371472 [8:40:15<21:22:40, 3.41it/s] 29%|██▉ | 109043/371472 [8:40:15<20:20:00, 3.59it/s] 29%|██▉ | 109044/371472 [8:40:15<19:53:35, 3.66it/s] 29%|██▉ | 109045/371472 [8:40:16<19:50:47, 3.67it/s] 29%|██▉ | 109046/371472 [8:40:16<20:36:43, 3.54it/s] 29%|██▉ | 109047/371472 [8:40:16<21:47:39, 3.34it/s] 29%|██▉ | 109048/371472 [8:40:17<20:49:32, 3.50it/s] 29%|██▉ | 109049/371472 [8:40:17<19:47:09, 3.68it/s] 29%|██▉ | 109050/371472 [8:40:17<19:17:32, 3.78it/s] 29%|██▉ | 109051/371472 [8:40:17<19:30:26, 3.74it/s] 29%|██▉ | 109052/371472 [8:40:18<19:26:09, 3.75it/s] 29%|██▉ | 109053/371472 [8:40:18<19:06:42, 3.81it/s] 29%|██▉ | 109054/371472 [8:40:18<20:52:38, 3.49it/s] 29%|██▉ | 109055/371472 [8:40:18<20:47:01, 3.51it/s] 29%|██▉ | 109056/371472 [8:40:19<20:12:12, 3.61it/s] 29%|██▉ | 109057/371472 [8:40:19<20:54:10, 3.49it/s] 29%|██▉ | 109058/371472 [8:40:19<20:52:01, 3.49it/s] 29%|██▉ | 109059/371472 [8:40:20<23:16:57, 3.13it/s] 29%|██▉ | 109060/371472 [8:40:20<23:41:04, 3.08it/s] {'loss': 3.4191, 'learning_rate': 7.36112607468379e-07, 'epoch': 4.7} + 29%|██▉ | 109060/371472 [8:40:20<23:41:04, 3.08it/s] 29%|██▉ | 109061/371472 [8:40:20<22:11:09, 3.29it/s] 29%|██▉ | 109062/371472 [8:40:21<21:13:43, 3.43it/s] 29%|██▉ | 109063/371472 [8:40:21<21:21:42, 3.41it/s] 29%|██▉ | 109064/371472 [8:40:21<20:34:38, 3.54it/s] 29%|██▉ | 109065/371472 [8:40:21<20:57:59, 3.48it/s] 29%|██▉ | 109066/371472 [8:40:22<20:10:45, 3.61it/s] 29%|██▉ | 109067/371472 [8:40:22<19:40:08, 3.71it/s] 29%|██▉ | 109068/371472 [8:40:22<19:15:23, 3.79it/s] 29%|██▉ | 109069/371472 [8:40:22<21:33:21, 3.38it/s] 29%|██▉ | 109070/371472 [8:40:23<20:26:40, 3.57it/s] 29%|██▉ | 109071/371472 [8:40:23<19:54:08, 3.66it/s] 29%|██▉ | 109072/371472 [8:40:23<19:24:42, 3.75it/s] 29%|██▉ | 109073/371472 [8:40:23<18:53:46, 3.86it/s] 29%|██▉ | 109074/371472 [8:40:24<19:23:27, 3.76it/s] 29%|██▉ | 109075/371472 [8:40:24<19:17:06, 3.78it/s] 29%|██▉ | 109076/371472 [8:40:24<19:58:01, 3.65it/s] 29%|██▉ | 109077/371472 [8:40:25<19:28:29, 3.74it/s] 29%|██▉ | 109078/371472 [8:40:25<19:43:20, 3.70it/s] 29%|██▉ | 109079/371472 [8:40:25<19:29:06, 3.74it/s] 29%|██▉ | 109080/371472 [8:40:25<20:00:03, 3.64it/s] {'loss': 3.4569, 'learning_rate': 7.360641254929001e-07, 'epoch': 4.7} + 29%|██▉ | 109080/371472 [8:40:25<20:00:03, 3.64it/s] 29%|██▉ | 109081/371472 [8:40:26<19:38:46, 3.71it/s] 29%|██▉ | 109082/371472 [8:40:26<19:23:11, 3.76it/s] 29%|██▉ | 109083/371472 [8:40:26<19:39:22, 3.71it/s] 29%|██▉ | 109084/371472 [8:40:26<19:54:20, 3.66it/s] 29%|██▉ | 109085/371472 [8:40:27<20:25:59, 3.57it/s] 29%|██▉ | 109086/371472 [8:40:27<20:44:24, 3.51it/s] 29%|██▉ | 109087/371472 [8:40:27<20:57:57, 3.48it/s] 29%|██▉ | 109088/371472 [8:40:28<21:41:11, 3.36it/s] 29%|██▉ | 109089/371472 [8:40:28<21:06:35, 3.45it/s] 29%|██▉ | 109090/371472 [8:40:28<20:56:05, 3.48it/s] 29%|██▉ | 109091/371472 [8:40:29<20:26:44, 3.56it/s] 29%|██▉ | 109092/371472 [8:40:29<19:53:34, 3.66it/s] 29%|██▉ | 109093/371472 [8:40:29<19:21:34, 3.76it/s] 29%|██▉ | 109094/371472 [8:40:29<19:32:46, 3.73it/s] 29%|██▉ | 109095/371472 [8:40:30<19:04:17, 3.82it/s] 29%|██▉ | 109096/371472 [8:40:30<20:14:08, 3.60it/s] 29%|██▉ | 109097/371472 [8:40:30<19:54:56, 3.66it/s] 29%|██▉ | 109098/371472 [8:40:30<20:07:40, 3.62it/s] 29%|██▉ | 109099/371472 [8:40:31<20:16:36, 3.59it/s] 29%|██▉ | 109100/371472 [8:40:31<19:33:25, 3.73it/s] {'loss': 3.2765, 'learning_rate': 7.360156435174211e-07, 'epoch': 4.7} + 29%|██▉ | 109100/371472 [8:40:31<19:33:25, 3.73it/s] 29%|██▉ | 109101/371472 [8:40:31<19:39:10, 3.71it/s] 29%|██▉ | 109102/371472 [8:40:31<19:45:10, 3.69it/s] 29%|██▉ | 109103/371472 [8:40:32<20:00:51, 3.64it/s] 29%|██▉ | 109104/371472 [8:40:32<19:50:05, 3.67it/s] 29%|██▉ | 109105/371472 [8:40:32<19:20:44, 3.77it/s] 29%|██▉ | 109106/371472 [8:40:33<19:52:05, 3.67it/s] 29%|██▉ | 109107/371472 [8:40:33<19:17:53, 3.78it/s] 29%|██▉ | 109108/371472 [8:40:33<20:22:38, 3.58it/s] 29%|██▉ | 109109/371472 [8:40:33<20:33:18, 3.55it/s] 29%|██▉ | 109110/371472 [8:40:34<22:00:30, 3.31it/s] 29%|██▉ | 109111/371472 [8:40:34<21:12:13, 3.44it/s] 29%|██▉ | 109112/371472 [8:40:34<20:21:25, 3.58it/s] 29%|██▉ | 109113/371472 [8:40:35<20:16:48, 3.59it/s] 29%|██▉ | 109114/371472 [8:40:35<20:14:04, 3.60it/s] 29%|██▉ | 109115/371472 [8:40:35<24:01:29, 3.03it/s] 29%|██▉ | 109116/371472 [8:40:36<23:18:15, 3.13it/s] 29%|██▉ | 109117/371472 [8:40:36<22:26:36, 3.25it/s] 29%|██▉ | 109118/371472 [8:40:36<23:29:53, 3.10it/s] 29%|██▉ | 109119/371472 [8:40:36<22:27:34, 3.24it/s] 29%|██▉ | 109120/371472 [8:40:37<21:23:28, 3.41it/s] {'loss': 3.3831, 'learning_rate': 7.359671615419423e-07, 'epoch': 4.7} + 29%|██▉ | 109120/371472 [8:40:37<21:23:28, 3.41it/s] 29%|██▉ | 109121/371472 [8:40:37<22:00:04, 3.31it/s] 29%|██▉ | 109122/371472 [8:40:37<20:28:13, 3.56it/s] 29%|██▉ | 109123/371472 [8:40:38<20:32:52, 3.55it/s] 29%|██▉ | 109124/371472 [8:40:38<20:22:45, 3.58it/s] 29%|██▉ | 109125/371472 [8:40:38<20:30:47, 3.55it/s] 29%|██▉ | 109126/371472 [8:40:38<20:23:36, 3.57it/s] 29%|██▉ | 109127/371472 [8:40:39<20:56:04, 3.48it/s] 29%|██▉ | 109128/371472 [8:40:39<20:49:41, 3.50it/s] 29%|██▉ | 109129/371472 [8:40:39<19:35:51, 3.72it/s] 29%|██▉ | 109130/371472 [8:40:39<19:25:45, 3.75it/s] 29%|██▉ | 109131/371472 [8:40:40<19:52:19, 3.67it/s] 29%|██▉ | 109132/371472 [8:40:40<19:52:03, 3.67it/s] 29%|██▉ | 109133/371472 [8:40:40<19:39:50, 3.71it/s] 29%|██▉ | 109134/371472 [8:40:41<19:20:45, 3.77it/s] 29%|██▉ | 109135/371472 [8:40:41<19:53:41, 3.66it/s] 29%|██▉ | 109136/371472 [8:40:41<20:27:31, 3.56it/s] 29%|██▉ | 109137/371472 [8:40:41<20:05:56, 3.63it/s] 29%|██▉ | 109138/371472 [8:40:42<20:04:46, 3.63it/s] 29%|██▉ | 109139/371472 [8:40:42<21:05:50, 3.45it/s] 29%|██▉ | 109140/371472 [8:40:42<20:20:02, 3.58it/s] {'loss': 3.6725, 'learning_rate': 7.359186795664635e-07, 'epoch': 4.7} + 29%|██▉ | 109140/371472 [8:40:42<20:20:02, 3.58it/s] 29%|██▉ | 109141/371472 [8:40:43<20:45:16, 3.51it/s] 29%|██▉ | 109142/371472 [8:40:43<20:46:27, 3.51it/s] 29%|██▉ | 109143/371472 [8:40:43<21:52:03, 3.33it/s] 29%|██▉ | 109144/371472 [8:40:44<21:57:23, 3.32it/s] 29%|██▉ | 109145/371472 [8:40:44<22:57:01, 3.18it/s] 29%|██▉ | 109146/371472 [8:40:44<22:13:27, 3.28it/s] 29%|██▉ | 109147/371472 [8:40:44<22:01:20, 3.31it/s] 29%|██▉ | 109148/371472 [8:40:45<22:57:51, 3.17it/s] 29%|██▉ | 109149/371472 [8:40:45<22:59:57, 3.17it/s] 29%|██▉ | 109150/371472 [8:40:45<21:42:48, 3.36it/s] 29%|██▉ | 109151/371472 [8:40:46<22:36:12, 3.22it/s] 29%|██▉ | 109152/371472 [8:40:46<21:33:57, 3.38it/s] 29%|██▉ | 109153/371472 [8:40:46<21:55:05, 3.32it/s] 29%|██▉ | 109154/371472 [8:40:47<21:25:16, 3.40it/s] 29%|██▉ | 109155/371472 [8:40:47<20:15:00, 3.60it/s] 29%|██▉ | 109156/371472 [8:40:47<20:39:04, 3.53it/s] 29%|██▉ | 109157/371472 [8:40:47<20:28:58, 3.56it/s] 29%|██▉ | 109158/371472 [8:40:48<20:23:54, 3.57it/s] 29%|██▉ | 109159/371472 [8:40:48<22:18:49, 3.27it/s] 29%|██▉ | 109160/371472 [8:40:48<23:32:58, 3.09it/s] {'loss': 3.3772, 'learning_rate': 7.358701975909845e-07, 'epoch': 4.7} + 29%|██▉ | 109160/371472 [8:40:48<23:32:58, 3.09it/s] 29%|██▉ | 109161/371472 [8:40:49<22:09:02, 3.29it/s] 29%|██▉ | 109162/371472 [8:40:49<21:09:00, 3.45it/s] 29%|██▉ | 109163/371472 [8:40:49<20:20:19, 3.58it/s] 29%|██▉ | 109164/371472 [8:40:49<19:38:14, 3.71it/s] 29%|██▉ | 109165/371472 [8:40:50<20:37:27, 3.53it/s] 29%|██▉ | 109166/371472 [8:40:50<20:00:51, 3.64it/s] 29%|██▉ | 109167/371472 [8:40:50<20:01:53, 3.64it/s] 29%|██▉ | 109168/371472 [8:40:51<20:21:08, 3.58it/s] 29%|██▉ | 109169/371472 [8:40:51<19:57:18, 3.65it/s] 29%|██▉ | 109170/371472 [8:40:51<19:04:33, 3.82it/s] 29%|██▉ | 109171/371472 [8:40:51<19:16:40, 3.78it/s] 29%|██▉ | 109172/371472 [8:40:52<20:01:36, 3.64it/s] 29%|██▉ | 109173/371472 [8:40:52<19:48:49, 3.68it/s] 29%|██▉ | 109174/371472 [8:40:52<19:47:40, 3.68it/s] 29%|██▉ | 109175/371472 [8:40:52<20:00:36, 3.64it/s] 29%|██▉ | 109176/371472 [8:40:53<20:16:00, 3.60it/s] 29%|██▉ | 109177/371472 [8:40:53<20:02:04, 3.64it/s] 29%|██▉ | 109178/371472 [8:40:53<19:34:31, 3.72it/s] 29%|██▉ | 109179/371472 [8:40:53<19:39:37, 3.71it/s] 29%|██▉ | 109180/371472 [8:40:54<20:18:51, 3.59it/s] {'loss': 3.482, 'learning_rate': 7.358217156155055e-07, 'epoch': 4.7} + 29%|██▉ | 109180/371472 [8:40:54<20:18:51, 3.59it/s] 29%|██▉ | 109181/371472 [8:40:54<20:48:56, 3.50it/s] 29%|██▉ | 109182/371472 [8:40:54<21:32:50, 3.38it/s] 29%|██▉ | 109183/371472 [8:40:55<21:07:42, 3.45it/s] 29%|██▉ | 109184/371472 [8:40:55<20:22:40, 3.58it/s] 29%|██▉ | 109185/371472 [8:40:55<19:42:48, 3.70it/s] 29%|██▉ | 109186/371472 [8:40:55<20:22:34, 3.58it/s] 29%|██▉ | 109187/371472 [8:40:56<20:17:24, 3.59it/s] 29%|██▉ | 109188/371472 [8:40:56<20:43:32, 3.52it/s] 29%|██▉ | 109189/371472 [8:40:56<20:06:51, 3.62it/s] 29%|██▉ | 109190/371472 [8:40:57<19:43:12, 3.69it/s] 29%|██▉ | 109191/371472 [8:40:57<19:58:45, 3.65it/s] 29%|██▉ | 109192/371472 [8:40:57<20:09:05, 3.62it/s] 29%|██▉ | 109193/371472 [8:40:57<19:41:50, 3.70it/s] 29%|██▉ | 109194/371472 [8:40:58<19:29:25, 3.74it/s] 29%|██▉ | 109195/371472 [8:40:58<19:17:53, 3.78it/s] 29%|██▉ | 109196/371472 [8:40:58<19:39:19, 3.71it/s] 29%|██▉ | 109197/371472 [8:40:58<20:24:13, 3.57it/s] 29%|██▉ | 109198/371472 [8:40:59<20:02:50, 3.63it/s] 29%|██▉ | 109199/371472 [8:40:59<20:53:48, 3.49it/s] 29%|██▉ | 109200/371472 [8:40:59<22:00:26, 3.31it/s] {'loss': 3.4736, 'learning_rate': 7.357732336400267e-07, 'epoch': 4.7} + 29%|██▉ | 109200/371472 [8:40:59<22:00:26, 3.31it/s] 29%|██▉ | 109201/371472 [8:41:00<20:59:13, 3.47it/s] 29%|██▉ | 109202/371472 [8:41:00<19:48:05, 3.68it/s] 29%|██▉ | 109203/371472 [8:41:00<20:17:08, 3.59it/s] 29%|██▉ | 109204/371472 [8:41:00<20:01:27, 3.64it/s] 29%|██▉ | 109205/371472 [8:41:01<19:48:18, 3.68it/s] 29%|██▉ | 109206/371472 [8:41:01<20:27:21, 3.56it/s] 29%|██▉ | 109207/371472 [8:41:01<20:29:12, 3.56it/s] 29%|██��� | 109208/371472 [8:41:02<20:14:10, 3.60it/s] 29%|██▉ | 109209/371472 [8:41:02<19:58:58, 3.65it/s] 29%|██▉ | 109210/371472 [8:41:02<20:10:46, 3.61it/s] 29%|██▉ | 109211/371472 [8:41:02<20:32:33, 3.55it/s] 29%|██▉ | 109212/371472 [8:41:03<19:48:33, 3.68it/s] 29%|██▉ | 109213/371472 [8:41:03<19:20:23, 3.77it/s] 29%|██▉ | 109214/371472 [8:41:03<19:27:42, 3.74it/s] 29%|██▉ | 109215/371472 [8:41:03<20:11:36, 3.61it/s] 29%|██▉ | 109216/371472 [8:41:04<21:19:45, 3.42it/s] 29%|██▉ | 109217/371472 [8:41:04<21:10:57, 3.44it/s] 29%|██▉ | 109218/371472 [8:41:04<23:23:41, 3.11it/s] 29%|██▉ | 109219/371472 [8:41:05<23:03:10, 3.16it/s] 29%|██▉ | 109220/371472 [8:41:05<21:15:41, 3.43it/s] {'loss': 3.4527, 'learning_rate': 7.357247516645479e-07, 'epoch': 4.7} + 29%|██▉ | 109220/371472 [8:41:05<21:15:41, 3.43it/s] 29%|██▉ | 109221/371472 [8:41:05<20:41:20, 3.52it/s] 29%|██▉ | 109222/371472 [8:41:06<21:01:16, 3.47it/s] 29%|██▉ | 109223/371472 [8:41:06<20:46:35, 3.51it/s] 29%|██▉ | 109224/371472 [8:41:06<20:06:23, 3.62it/s] 29%|██▉ | 109225/371472 [8:41:06<19:54:54, 3.66it/s] 29%|██▉ | 109226/371472 [8:41:07<20:57:14, 3.48it/s] 29%|██▉ | 109227/371472 [8:41:07<21:59:28, 3.31it/s] 29%|██▉ | 109228/371472 [8:41:07<22:08:49, 3.29it/s] 29%|██▉ | 109229/371472 [8:41:08<22:13:04, 3.28it/s] 29%|██▉ | 109230/371472 [8:41:08<22:35:06, 3.23it/s] 29%|██▉ | 109231/371472 [8:41:08<21:40:42, 3.36it/s] 29%|██▉ | 109232/371472 [8:41:09<20:38:34, 3.53it/s] 29%|██▉ | 109233/371472 [8:41:09<21:04:45, 3.46it/s] 29%|██▉ | 109234/371472 [8:41:09<20:52:45, 3.49it/s] 29%|██▉ | 109235/371472 [8:41:10<24:53:06, 2.93it/s] 29%|██▉ | 109236/371472 [8:41:10<32:10:27, 2.26it/s] 29%|██▉ | 109237/371472 [8:41:11<28:14:49, 2.58it/s] 29%|██▉ | 109238/371472 [8:41:11<25:58:40, 2.80it/s] 29%|██▉ | 109239/371472 [8:41:11<24:12:48, 3.01it/s] 29%|██▉ | 109240/371472 [8:41:11<23:21:08, 3.12it/s] {'loss': 3.362, 'learning_rate': 7.356762696890689e-07, 'epoch': 4.71} + 29%|██▉ | 109240/371472 [8:41:11<23:21:08, 3.12it/s] 29%|██▉ | 109241/371472 [8:41:12<22:15:14, 3.27it/s] 29%|██▉ | 109242/371472 [8:41:12<21:48:19, 3.34it/s] 29%|██▉ | 109243/371472 [8:41:12<22:29:39, 3.24it/s] 29%|██▉ | 109244/371472 [8:41:13<21:51:40, 3.33it/s] 29%|██▉ | 109245/371472 [8:41:13<21:29:08, 3.39it/s] 29%|██▉ | 109246/371472 [8:41:13<21:30:19, 3.39it/s] 29%|██▉ | 109247/371472 [8:41:13<21:06:38, 3.45it/s] 29%|██▉ | 109248/371472 [8:41:14<21:04:50, 3.46it/s] 29%|██▉ | 109249/371472 [8:41:14<20:42:05, 3.52it/s] 29%|██▉ | 109250/371472 [8:41:14<20:21:25, 3.58it/s] 29%|██▉ | 109251/371472 [8:41:14<20:28:00, 3.56it/s] 29%|██▉ | 109252/371472 [8:41:15<20:21:07, 3.58it/s] 29%|██▉ | 109253/371472 [8:41:15<20:32:03, 3.55it/s] 29%|██▉ | 109254/371472 [8:41:15<20:18:59, 3.59it/s] 29%|██▉ | 109255/371472 [8:41:16<20:20:17, 3.58it/s] 29%|██▉ | 109256/371472 [8:41:16<20:35:26, 3.54it/s] 29%|██▉ | 109257/371472 [8:41:16<21:13:57, 3.43it/s] 29%|██▉ | 109258/371472 [8:41:17<21:23:42, 3.40it/s] 29%|██▉ | 109259/371472 [8:41:17<24:35:18, 2.96it/s] 29%|██▉ | 109260/371472 [8:41:17<23:18:40, 3.12it/s] {'loss': 3.3077, 'learning_rate': 7.3562778771359e-07, 'epoch': 4.71} + 29%|██▉ | 109260/371472 [8:41:17<23:18:40, 3.12it/s] 29%|██▉ | 109261/371472 [8:41:18<23:37:43, 3.08it/s] 29%|██▉ | 109262/371472 [8:41:18<22:42:54, 3.21it/s] 29%|██▉ | 109263/371472 [8:41:18<22:07:30, 3.29it/s] 29%|██▉ | 109264/371472 [8:41:18<22:24:01, 3.25it/s] 29%|██▉ | 109265/371472 [8:41:19<21:57:45, 3.32it/s] 29%|██▉ | 109266/371472 [8:41:19<21:43:35, 3.35it/s] 29%|██▉ | 109267/371472 [8:41:19<21:26:31, 3.40it/s] 29%|██▉ | 109268/371472 [8:41:20<21:57:13, 3.32it/s] 29%|██▉ | 109269/371472 [8:41:20<20:46:25, 3.51it/s] 29%|██▉ | 109270/371472 [8:41:20<21:09:08, 3.44it/s] 29%|██▉ | 109271/371472 [8:41:20<21:18:30, 3.42it/s] 29%|██▉ | 109272/371472 [8:41:21<22:17:38, 3.27it/s] 29%|██▉ | 109273/371472 [8:41:21<22:29:08, 3.24it/s] 29%|██▉ | 109274/371472 [8:41:21<21:25:29, 3.40it/s] 29%|██▉ | 109275/371472 [8:41:22<20:43:22, 3.51it/s] 29%|██▉ | 109276/371472 [8:41:22<20:22:54, 3.57it/s] 29%|██▉ | 109277/371472 [8:41:22<20:40:20, 3.52it/s] 29%|██▉ | 109278/371472 [8:41:22<20:09:14, 3.61it/s] 29%|██▉ | 109279/371472 [8:41:23<20:27:52, 3.56it/s] 29%|██▉ | 109280/371472 [8:41:23<21:23:06, 3.41it/s] {'loss': 3.3716, 'learning_rate': 7.355793057381112e-07, 'epoch': 4.71} + 29%|██▉ | 109280/371472 [8:41:23<21:23:06, 3.41it/s] 29%|██▉ | 109281/371472 [8:41:23<20:51:09, 3.49it/s] 29%|██▉ | 109282/371472 [8:41:24<20:31:08, 3.55it/s] 29%|██▉ | 109283/371472 [8:41:24<19:44:50, 3.69it/s] 29%|██▉ | 109284/371472 [8:41:24<19:40:07, 3.70it/s] 29%|██▉ | 109285/371472 [8:41:24<19:29:50, 3.74it/s] 29%|██▉ | 109286/371472 [8:41:25<19:34:05, 3.72it/s] 29%|██▉ | 109287/371472 [8:41:25<20:42:48, 3.52it/s] 29%|██▉ | 109288/371472 [8:41:25<20:45:40, 3.51it/s] 29%|██▉ | 109289/371472 [8:41:26<22:55:01, 3.18it/s] 29%|██▉ | 109290/371472 [8:41:26<22:04:21, 3.30it/s] 29%|██▉ | 109291/371472 [8:41:26<21:58:52, 3.31it/s] 29%|██▉ | 109292/371472 [8:41:26<20:52:40, 3.49it/s] 29%|██▉ | 109293/371472 [8:41:27<20:17:07, 3.59it/s] 29%|██▉ | 109294/371472 [8:41:27<19:44:06, 3.69it/s] 29%|██▉ | 109295/371472 [8:41:27<19:39:57, 3.70it/s] 29%|██▉ | 109296/371472 [8:41:28<21:06:56, 3.45it/s] 29%|██▉ | 109297/371472 [8:41:28<20:36:29, 3.53it/s] 29%|██▉ | 109298/371472 [8:41:28<22:13:17, 3.28it/s] 29%|██▉ | 109299/371472 [8:41:28<21:23:33, 3.40it/s] 29%|██▉ | 109300/371472 [8:41:29<20:22:20, 3.57it/s] {'loss': 3.4546, 'learning_rate': 7.355308237626321e-07, 'epoch': 4.71} + 29%|██▉ | 109300/371472 [8:41:29<20:22:20, 3.57it/s] 29%|██▉ | 109301/371472 [8:41:29<20:27:41, 3.56it/s] 29%|██▉ | 109302/371472 [8:41:29<20:42:58, 3.52it/s] 29%|██▉ | 109303/371472 [8:41:30<20:53:32, 3.49it/s] 29%|██▉ | 109304/371472 [8:41:30<20:32:07, 3.55it/s] 29%|██▉ | 109305/371472 [8:41:30<20:09:19, 3.61it/s] 29%|██▉ | 109306/371472 [8:41:30<19:46:55, 3.68it/s] 29%|██▉ | 109307/371472 [8:41:31<19:47:49, 3.68it/s] 29%|██▉ | 109308/371472 [8:41:31<19:13:12, 3.79it/s] 29%|██▉ | 109309/371472 [8:41:31<21:00:22, 3.47it/s] 29%|██▉ | 109310/371472 [8:41:32<20:20:46, 3.58it/s] 29%|██▉ | 109311/371472 [8:41:32<19:48:08, 3.68it/s] 29%|██▉ | 109312/371472 [8:41:32<20:42:48, 3.52it/s] 29%|██▉ | 109313/371472 [8:41:32<20:17:15, 3.59it/s] 29%|██▉ | 109314/371472 [8:41:33<19:52:00, 3.67it/s] 29%|██▉ | 109315/371472 [8:41:33<20:02:52, 3.63it/s] 29%|██▉ | 109316/371472 [8:41:33<19:37:51, 3.71it/s] 29%|██▉ | 109317/371472 [8:41:33<19:57:03, 3.65it/s] 29%|██▉ | 109318/371472 [8:41:34<19:28:54, 3.74it/s] 29%|██▉ | 109319/371472 [8:41:34<20:27:26, 3.56it/s] 29%|██▉ | 109320/371472 [8:41:34<22:02:12, 3.30it/s] {'loss': 3.2998, 'learning_rate': 7.354823417871532e-07, 'epoch': 4.71} + 29%|██▉ | 109320/371472 [8:41:34<22:02:12, 3.30it/s] 29%|██▉ | 109321/371472 [8:41:35<20:47:03, 3.50it/s] 29%|██▉ | 109322/371472 [8:41:35<22:02:46, 3.30it/s] 29%|██▉ | 109323/371472 [8:41:35<21:35:22, 3.37it/s] 29%|██▉ | 109324/371472 [8:41:35<20:48:57, 3.50it/s] 29%|██▉ | 109325/371472 [8:41:36<20:33:05, 3.54it/s] 29%|██▉ | 109326/371472 [8:41:36<20:25:52, 3.56it/s] 29%|██▉ | 109327/371472 [8:41:36<20:31:33, 3.55it/s] 29%|██▉ | 109328/371472 [8:41:37<19:42:50, 3.69it/s] 29%|██▉ | 109329/371472 [8:41:37<19:32:15, 3.73it/s] 29%|██▉ | 109330/371472 [8:41:37<19:16:53, 3.78it/s] 29%|██▉ | 109331/371472 [8:41:37<19:19:40, 3.77it/s] 29%|██▉ | 109332/371472 [8:41:38<18:48:26, 3.87it/s] 29%|██▉ | 109333/371472 [8:41:38<20:19:02, 3.58it/s] 29%|██▉ | 109334/371472 [8:41:38<20:42:06, 3.52it/s] 29%|██▉ | 109335/371472 [8:41:39<20:44:08, 3.51it/s] 29%|██▉ | 109336/371472 [8:41:39<20:36:58, 3.53it/s] 29%|██▉ | 109337/371472 [8:41:39<19:55:40, 3.65it/s] 29%|██▉ | 109338/371472 [8:41:39<20:05:35, 3.62it/s] 29%|██▉ | 109339/371472 [8:41:40<21:02:14, 3.46it/s] 29%|██▉ | 109340/371472 [8:41:40<20:58:47, 3.47it/s] {'loss': 3.4644, 'learning_rate': 7.354338598116744e-07, 'epoch': 4.71} + 29%|██▉ | 109340/371472 [8:41:40<20:58:47, 3.47it/s] 29%|██▉ | 109341/371472 [8:41:40<20:55:11, 3.48it/s] 29%|██▉ | 109342/371472 [8:41:41<20:53:30, 3.49it/s] 29%|██▉ | 109343/371472 [8:41:41<21:36:18, 3.37it/s] 29%|██▉ | 109344/371472 [8:41:41<22:27:08, 3.24it/s] 29%|██▉ | 109345/371472 [8:41:41<23:03:55, 3.16it/s] 29%|██▉ | 109346/371472 [8:41:42<21:47:42, 3.34it/s] 29%|██▉ | 109347/371472 [8:41:42<21:33:30, 3.38it/s] 29%|██▉ | 109348/371472 [8:41:42<20:45:27, 3.51it/s] 29%|██▉ | 109349/371472 [8:41:43<20:55:17, 3.48it/s] 29%|██▉ | 109350/371472 [8:41:43<20:15:31, 3.59it/s] 29%|██▉ | 109351/371472 [8:41:43<19:21:51, 3.76it/s] 29%|██▉ | 109352/371472 [8:41:43<20:09:58, 3.61it/s] 29%|██▉ | 109353/371472 [8:41:44<19:45:31, 3.68it/s] 29%|██▉ | 109354/371472 [8:41:44<19:30:47, 3.73it/s] 29%|██▉ | 109355/371472 [8:41:44<19:42:26, 3.69it/s] 29%|██▉ | 109356/371472 [8:41:44<19:13:49, 3.79it/s] 29%|██▉ | 109357/371472 [8:41:45<19:35:01, 3.72it/s] 29%|██▉ | 109358/371472 [8:41:45<18:55:58, 3.85it/s] 29%|██▉ | 109359/371472 [8:41:45<18:26:38, 3.95it/s] 29%|██▉ | 109360/371472 [8:41:45<19:26:54, 3.74it/s] {'loss': 3.4126, 'learning_rate': 7.353853778361956e-07, 'epoch': 4.71} + 29%|██▉ | 109360/371472 [8:41:45<19:26:54, 3.74it/s] 29%|██▉ | 109361/371472 [8:41:46<20:10:31, 3.61it/s] 29%|██▉ | 109362/371472 [8:41:46<19:53:49, 3.66it/s] 29%|██▉ | 109363/371472 [8:41:46<20:39:09, 3.53it/s] 29%|██▉ | 109364/371472 [8:41:47<20:45:49, 3.51it/s] 29%|██▉ | 109365/371472 [8:41:47<19:55:56, 3.65it/s] 29%|██▉ | 109366/371472 [8:41:47<19:34:14, 3.72it/s] 29%|██▉ | 109367/371472 [8:41:47<19:27:36, 3.74it/s] 29%|██▉ | 109368/371472 [8:41:48<19:03:31, 3.82it/s] 29%|██▉ | 109369/371472 [8:41:48<19:04:32, 3.82it/s] 29%|██▉ | 109370/371472 [8:41:48<20:05:41, 3.62it/s] 29%|██▉ | 109371/371472 [8:41:48<19:44:16, 3.69it/s] 29%|██▉ | 109372/371472 [8:41:49<19:12:35, 3.79it/s] 29%|██▉ | 109373/371472 [8:41:49<19:07:59, 3.81it/s] 29%|██▉ | 109374/371472 [8:41:49<18:56:51, 3.84it/s] 29%|██▉ | 109375/371472 [8:41:50<19:42:10, 3.70it/s] 29%|██▉ | 109376/371472 [8:41:50<19:23:08, 3.76it/s] 29%|██▉ | 109377/371472 [8:41:50<18:48:39, 3.87it/s] 29%|██▉ | 109378/371472 [8:41:50<19:25:45, 3.75it/s] 29%|██▉ | 109379/371472 [8:41:51<19:26:16, 3.75it/s] 29%|██▉ | 109380/371472 [8:41:51<19:40:46, 3.70it/s] {'loss': 3.3607, 'learning_rate': 7.353368958607166e-07, 'epoch': 4.71} + 29%|██▉ | 109380/371472 [8:41:51<19:40:46, 3.70it/s] 29%|██▉ | 109381/371472 [8:41:51<23:07:50, 3.15it/s] 29%|██▉ | 109382/371472 [8:41:52<22:09:21, 3.29it/s] 29%|██▉ | 109383/371472 [8:41:52<21:39:32, 3.36it/s] 29%|██▉ | 109384/371472 [8:41:52<20:45:45, 3.51it/s] 29%|██▉ | 109385/371472 [8:41:52<21:10:29, 3.44it/s] 29%|██▉ | 109386/371472 [8:41:53<21:43:05, 3.35it/s] 29%|██▉ | 109387/371472 [8:41:53<23:43:59, 3.07it/s] 29%|██▉ | 109388/371472 [8:41:53<22:28:18, 3.24it/s] 29%|██▉ | 109389/371472 [8:41:54<21:52:24, 3.33it/s] 29%|██▉ | 109390/371472 [8:41:54<21:14:00, 3.43it/s] 29%|██▉ | 109391/371472 [8:41:54<20:22:09, 3.57it/s] 29%|██▉ | 109392/371472 [8:41:54<19:59:18, 3.64it/s] 29%|██▉ | 109393/371472 [8:41:55<20:00:08, 3.64it/s] 29%|██▉ | 109394/371472 [8:41:55<20:06:02, 3.62it/s] 29%|██▉ | 109395/371472 [8:41:55<20:36:20, 3.53it/s] 29%|██▉ | 109396/371472 [8:41:56<23:36:09, 3.08it/s] 29%|██▉ | 109397/371472 [8:41:56<23:01:59, 3.16it/s] 29%|██▉ | 109398/371472 [8:41:56<21:48:25, 3.34it/s] 29%|██▉ | 109399/371472 [8:41:57<21:31:36, 3.38it/s] 29%|██▉ | 109400/371472 [8:41:57<20:49:15, 3.50it/s] {'loss': 3.3738, 'learning_rate': 7.352884138852377e-07, 'epoch': 4.71} + 29%|██▉ | 109400/371472 [8:41:57<20:49:15, 3.50it/s] 29%|██▉ | 109401/371472 [8:41:57<20:07:17, 3.62it/s] 29%|██▉ | 109402/371472 [8:41:57<20:07:11, 3.62it/s] 29%|██▉ | 109403/371472 [8:41:58<20:44:27, 3.51it/s] 29%|██▉ | 109404/371472 [8:41:58<20:08:20, 3.61it/s] 29%|██▉ | 109405/371472 [8:41:58<21:34:04, 3.38it/s] 29%|██▉ | 109406/371472 [8:41:59<21:53:12, 3.33it/s] 29%|██▉ | 109407/371472 [8:41:59<25:11:18, 2.89it/s] 29%|██▉ | 109408/371472 [8:41:59<24:36:28, 2.96it/s] 29%|██▉ | 109409/371472 [8:42:00<22:27:37, 3.24it/s] 29%|██▉ | 109410/371472 [8:42:00<22:58:46, 3.17it/s] 29%|██▉ | 109411/371472 [8:42:00<22:12:17, 3.28it/s] 29%|██▉ | 109412/371472 [8:42:00<21:39:18, 3.36it/s] 29%|██▉ | 109413/371472 [8:42:01<21:41:48, 3.36it/s] 29%|██▉ | 109414/371472 [8:42:01<23:34:34, 3.09it/s] 29%|██▉ | 109415/371472 [8:42:01<21:49:41, 3.33it/s] 29%|██▉ | 109416/371472 [8:42:02<21:41:52, 3.35it/s] 29%|██▉ | 109417/371472 [8:42:02<21:34:38, 3.37it/s] 29%|██▉ | 109418/371472 [8:42:02<20:56:53, 3.47it/s] 29%|██▉ | 109419/371472 [8:42:03<20:48:11, 3.50it/s] 29%|██▉ | 109420/371472 [8:42:03<20:38:08, 3.53it/s] {'loss': 3.4028, 'learning_rate': 7.352399319097589e-07, 'epoch': 4.71} + 29%|██▉ | 109420/371472 [8:42:03<20:38:08, 3.53it/s] 29%|██▉ | 109421/371472 [8:42:03<20:06:23, 3.62it/s] 29%|██▉ | 109422/371472 [8:42:03<19:36:25, 3.71it/s] 29%|██▉ | 109423/371472 [8:42:04<20:23:56, 3.57it/s] 29%|██▉ | 109424/371472 [8:42:04<19:47:26, 3.68it/s] 29%|██▉ | 109425/371472 [8:42:04<19:43:02, 3.69it/s] 29%|██▉ | 109426/371472 [8:42:04<19:41:55, 3.70it/s] 29%|██▉ | 109427/371472 [8:42:05<20:16:40, 3.59it/s] 29%|██▉ | 109428/371472 [8:42:05<20:08:04, 3.62it/s] 29%|██▉ | 109429/371472 [8:42:05<19:27:57, 3.74it/s] 29%|██▉ | 109430/371472 [8:42:06<22:07:15, 3.29it/s] 29%|██▉ | 109431/371472 [8:42:06<21:39:01, 3.36it/s] 29%|██▉ | 109432/371472 [8:42:06<22:16:30, 3.27it/s] 29%|██▉ | 109433/371472 [8:42:07<21:28:06, 3.39it/s] 29%|██▉ | 109434/371472 [8:42:07<21:12:17, 3.43it/s] 29%|██▉ | 109435/371472 [8:42:07<21:35:47, 3.37it/s] 29%|██▉ | 109436/371472 [8:42:07<20:55:42, 3.48it/s] 29%|██▉ | 109437/371472 [8:42:08<20:12:32, 3.60it/s] 29%|██▉ | 109438/371472 [8:42:08<19:43:17, 3.69it/s] 29%|██▉ | 109439/371472 [8:42:08<19:36:52, 3.71it/s] 29%|██▉ | 109440/371472 [8:42:08<19:16:34, 3.78it/s] {'loss': 3.467, 'learning_rate': 7.3519144993428e-07, 'epoch': 4.71} + 29%|██▉ | 109440/371472 [8:42:08<19:16:34, 3.78it/s] 29%|██▉ | 109441/371472 [8:42:09<20:05:46, 3.62it/s] 29%|██▉ | 109442/371472 [8:42:09<21:15:29, 3.42it/s] 29%|██▉ | 109443/371472 [8:42:09<20:27:57, 3.56it/s] 29%|██▉ | 109444/371472 [8:42:10<21:57:05, 3.32it/s] 29%|██▉ | 109445/371472 [8:42:10<20:38:34, 3.53it/s] 29%|██▉ | 109446/371472 [8:42:10<20:21:27, 3.58it/s] 29%|██▉ | 109447/371472 [8:42:10<19:34:47, 3.72it/s] 29%|██▉ | 109448/371472 [8:42:11<20:44:28, 3.51it/s] 29%|██▉ | 109449/371472 [8:42:11<22:34:02, 3.23it/s] 29%|██▉ | 109450/371472 [8:42:11<21:06:25, 3.45it/s] 29%|██▉ | 109451/371472 [8:42:12<20:45:41, 3.51it/s] 29%|██▉ | 109452/371472 [8:42:12<23:19:17, 3.12it/s] 29%|██▉ | 109453/371472 [8:42:12<22:28:16, 3.24it/s] 29%|██▉ | 109454/371472 [8:42:13<20:49:46, 3.49it/s] 29%|██▉ | 109455/371472 [8:42:13<21:03:34, 3.46it/s] 29%|██▉ | 109456/371472 [8:42:13<20:03:56, 3.63it/s] 29%|██▉ | 109457/371472 [8:42:13<20:02:17, 3.63it/s] 29%|██▉ | 109458/371472 [8:42:14<22:39:27, 3.21it/s] 29%|██▉ | 109459/371472 [8:42:14<22:15:51, 3.27it/s] 29%|██▉ | 109460/371472 [8:42:14<21:30:53, 3.38it/s] {'loss': 3.607, 'learning_rate': 7.35142967958801e-07, 'epoch': 4.71} + 29%|██▉ | 109460/371472 [8:42:14<21:30:53, 3.38it/s] 29%|██▉ | 109461/371472 [8:42:15<20:15:53, 3.59it/s] 29%|██▉ | 109462/371472 [8:42:15<19:36:13, 3.71it/s] 29%|██▉ | 109463/371472 [8:42:15<19:40:34, 3.70it/s] 29%|██▉ | 109464/371472 [8:42:15<19:27:09, 3.74it/s] 29%|██▉ | 109465/371472 [8:42:16<19:22:07, 3.76it/s] 29%|██▉ | 109466/371472 [8:42:16<19:00:59, 3.83it/s] 29%|██▉ | 109467/371472 [8:42:16<19:13:58, 3.78it/s] 29%|██▉ | 109468/371472 [8:42:16<18:57:24, 3.84it/s] 29%|██▉ | 109469/371472 [8:42:17<20:11:06, 3.61it/s] 29%|██▉ | 109470/371472 [8:42:17<19:24:50, 3.75it/s] 29%|██▉ | 109471/371472 [8:42:17<19:11:25, 3.79it/s] 29%|██▉ | 109472/371472 [8:42:17<19:54:02, 3.66it/s] 29%|██▉ | 109473/371472 [8:42:18<21:09:29, 3.44it/s] 29%|██▉ | 109474/371472 [8:42:18<20:43:00, 3.51it/s] 29%|██▉ | 109475/371472 [8:42:18<20:23:14, 3.57it/s] 29%|██▉ | 109476/371472 [8:42:19<20:23:24, 3.57it/s] 29%|██▉ | 109477/371472 [8:42:19<19:41:24, 3.70it/s] 29%|██▉ | 109478/371472 [8:42:19<18:56:43, 3.84it/s] 29%|██▉ | 109479/371472 [8:42:19<18:56:55, 3.84it/s] 29%|██▉ | 109480/371472 [8:42:20<19:18:55, 3.77it/s] {'loss': 3.4806, 'learning_rate': 7.350944859833221e-07, 'epoch': 4.72} + 29%|██▉ | 109480/371472 [8:42:20<19:18:55, 3.77it/s] 29%|██▉ | 109481/371472 [8:42:20<20:29:44, 3.55it/s] 29%|██▉ | 109482/371472 [8:42:20<21:10:16, 3.44it/s] 29%|██▉ | 109483/371472 [8:42:21<21:17:07, 3.42it/s] 29%|██▉ | 109484/371472 [8:42:21<21:10:03, 3.44it/s] 29%|██▉ | 109485/371472 [8:42:21<20:39:57, 3.52it/s] 29%|██▉ | 109486/371472 [8:42:21<20:58:31, 3.47it/s] 29%|██▉ | 109487/371472 [8:42:22<20:43:58, 3.51it/s] 29%|██▉ | 109488/371472 [8:42:22<20:43:33, 3.51it/s] 29%|██▉ | 109489/371472 [8:42:22<20:50:06, 3.49it/s] 29%|██▉ | 109490/371472 [8:42:23<20:21:40, 3.57it/s] 29%|██▉ | 109491/371472 [8:42:23<19:52:28, 3.66it/s] 29%|██▉ | 109492/371472 [8:42:23<19:22:47, 3.76it/s] 29%|██▉ | 109493/371472 [8:42:23<19:06:36, 3.81it/s] 29%|██▉ | 109494/371472 [8:42:24<18:57:32, 3.84it/s] 29%|██▉ | 109495/371472 [8:42:24<19:28:18, 3.74it/s] 29%|██▉ | 109496/371472 [8:42:24<22:14:55, 3.27it/s] 29%|██▉ | 109497/371472 [8:42:25<22:06:24, 3.29it/s] 29%|██▉ | 109498/371472 [8:42:25<22:51:13, 3.18it/s] 29%|██▉ | 109499/371472 [8:42:25<21:41:00, 3.36it/s] 29%|██▉ | 109500/371472 [8:42:25<21:25:24, 3.40it/s] {'loss': 3.5371, 'learning_rate': 7.350460040078432e-07, 'epoch': 4.72} + 29%|██▉ | 109500/371472 [8:42:25<21:25:24, 3.40it/s] 29%|██▉ | 109501/371472 [8:42:26<22:31:22, 3.23it/s] 29%|██▉ | 109502/371472 [8:42:26<21:39:06, 3.36it/s] 29%|██▉ | 109503/371472 [8:42:26<21:09:21, 3.44it/s] 29%|██▉ | 109504/371472 [8:42:27<20:46:45, 3.50it/s] 29%|██▉ | 109505/371472 [8:42:27<21:10:51, 3.44it/s] 29%|██▉ | 109506/371472 [8:42:27<21:10:04, 3.44it/s] 29%|██▉ | 109507/371472 [8:42:27<21:05:24, 3.45it/s] 29%|██▉ | 109508/371472 [8:42:28<20:27:47, 3.56it/s] 29%|██▉ | 109509/371472 [8:42:28<20:21:38, 3.57it/s] 29%|██▉ | 109510/371472 [8:42:28<20:30:55, 3.55it/s] 29%|██▉ | 109511/371472 [8:42:29<20:40:02, 3.52it/s] 29%|██▉ | 109512/371472 [8:42:29<20:07:33, 3.62it/s] 29%|██▉ | 109513/371472 [8:42:29<20:16:03, 3.59it/s] 29%|██▉ | 109514/371472 [8:42:29<19:48:12, 3.67it/s] 29%|██▉ | 109515/371472 [8:42:30<19:10:31, 3.79it/s] 29%|██▉ | 109516/371472 [8:42:30<19:06:23, 3.81it/s] 29%|██▉ | 109517/371472 [8:42:30<18:56:53, 3.84it/s] 29%|██▉ | 109518/371472 [8:42:30<19:20:59, 3.76it/s] 29%|██▉ | 109519/371472 [8:42:31<19:18:54, 3.77it/s] 29%|██▉ | 109520/371472 [8:42:31<19:01:00, 3.83it/s] {'loss': 3.4783, 'learning_rate': 7.349975220323645e-07, 'epoch': 4.72} + 29%|██▉ | 109520/371472 [8:42:31<19:01:00, 3.83it/s] 29%|██▉ | 109521/371472 [8:42:31<19:07:50, 3.80it/s] 29%|██▉ | 109522/371472 [8:42:31<18:52:34, 3.85it/s] 29%|██▉ | 109523/371472 [8:42:32<20:27:17, 3.56it/s] 29%|██▉ | 109524/371472 [8:42:32<20:17:04, 3.59it/s] 29%|██▉ | 109525/371472 [8:42:32<22:18:26, 3.26it/s] 29%|██▉ | 109526/371472 [8:42:33<21:31:43, 3.38it/s] 29%|██▉ | 109527/371472 [8:42:33<20:56:35, 3.47it/s] 29%|██▉ | 109528/371472 [8:42:33<20:21:49, 3.57it/s] 29%|██▉ | 109529/371472 [8:42:33<19:43:57, 3.69it/s] 29%|██▉ | 109530/371472 [8:42:34<19:38:13, 3.71it/s] 29%|██▉ | 109531/371472 [8:42:34<20:36:52, 3.53it/s] 29%|██▉ | 109532/371472 [8:42:34<20:03:03, 3.63it/s] 29%|██▉ | 109533/371472 [8:42:35<20:12:52, 3.60it/s] 29%|██▉ | 109534/371472 [8:42:35<20:57:46, 3.47it/s] 29%|██▉ | 109535/371472 [8:42:35<20:26:50, 3.56it/s] 29%|██▉ | 109536/371472 [8:42:36<22:14:48, 3.27it/s] 29%|██▉ | 109537/371472 [8:42:36<22:19:30, 3.26it/s] 29%|██▉ | 109538/371472 [8:42:36<21:01:36, 3.46it/s] 29%|██▉ | 109539/371472 [8:42:36<20:23:44, 3.57it/s] 29%|██▉ | 109540/371472 [8:42:37<21:23:45, 3.40it/s] {'loss': 3.2858, 'learning_rate': 7.349490400568855e-07, 'epoch': 4.72} + 29%|██▉ | 109540/371472 [8:42:37<21:23:45, 3.40it/s] 29%|██▉ | 109541/371472 [8:42:37<21:14:48, 3.42it/s] 29%|██▉ | 109542/371472 [8:42:37<20:24:09, 3.57it/s] 29%|██▉ | 109543/371472 [8:42:38<20:27:34, 3.56it/s] 29%|██▉ | 109544/371472 [8:42:38<19:57:15, 3.65it/s] 29%|██▉ | 109545/371472 [8:42:38<20:00:38, 3.64it/s] 29%|██▉ | 109546/371472 [8:42:38<20:40:22, 3.52it/s] 29%|██▉ | 109547/371472 [8:42:39<20:01:40, 3.63it/s] 29%|██▉ | 109548/371472 [8:42:39<19:35:00, 3.72it/s] 29%|██▉ | 109549/371472 [8:42:39<19:46:02, 3.68it/s] 29%|██▉ | 109550/371472 [8:42:39<20:17:24, 3.59it/s] 29%|██▉ | 109551/371472 [8:42:40<19:43:43, 3.69it/s] 29%|██▉ | 109552/371472 [8:42:40<20:16:44, 3.59it/s] 29%|██▉ | 109553/371472 [8:42:40<20:30:27, 3.55it/s] 29%|██▉ | 109554/371472 [8:42:41<20:10:42, 3.61it/s] 29%|██▉ | 109555/371472 [8:42:41<20:42:41, 3.51it/s] 29%|██▉ | 109556/371472 [8:42:41<19:55:38, 3.65it/s] 29%|██▉ | 109557/371472 [8:42:41<20:27:15, 3.56it/s] 29%|██▉ | 109558/371472 [8:42:42<20:13:28, 3.60it/s] 29%|██▉ | 109559/371472 [8:42:42<19:36:30, 3.71it/s] 29%|██▉ | 109560/371472 [8:42:42<19:26:04, 3.74it/s] {'loss': 3.6001, 'learning_rate': 7.349005580814065e-07, 'epoch': 4.72} + 29%|██▉ | 109560/371472 [8:42:42<19:26:04, 3.74it/s] 29%|██▉ | 109561/371472 [8:42:42<19:13:47, 3.78it/s] 29%|██▉ | 109562/371472 [8:42:43<19:02:49, 3.82it/s] 29%|██▉ | 109563/371472 [8:42:43<20:10:26, 3.61it/s] 29%|██▉ | 109564/371472 [8:42:43<20:45:23, 3.51it/s] 29%|██▉ | 109565/371472 [8:42:44<19:59:14, 3.64it/s] 29%|██▉ | 109566/371472 [8:42:44<19:13:01, 3.79it/s] 29%|██▉ | 109567/371472 [8:42:44<19:26:41, 3.74it/s] 29%|██▉ | 109568/371472 [8:42:44<19:16:49, 3.77it/s] 29%|██▉ | 109569/371472 [8:42:45<19:42:29, 3.69it/s] 29%|██▉ | 109570/371472 [8:42:45<20:03:43, 3.63it/s] 29%|██▉ | 109571/371472 [8:42:45<20:21:07, 3.57it/s] 29%|██▉ | 109572/371472 [8:42:45<19:44:14, 3.69it/s] 29%|██▉ | 109573/371472 [8:42:46<21:38:18, 3.36it/s] 29%|██▉ | 109574/371472 [8:42:46<21:23:21, 3.40it/s] 29%|██▉ | 109575/371472 [8:42:46<22:13:54, 3.27it/s] 29%|██▉ | 109576/371472 [8:42:47<22:10:53, 3.28it/s] 29%|██▉ | 109577/371472 [8:42:47<23:25:45, 3.11it/s] 29%|██▉ | 109578/371472 [8:42:47<22:37:55, 3.21it/s] 29%|██▉ | 109579/371472 [8:42:48<21:14:51, 3.42it/s] 29%|██▉ | 109580/371472 [8:42:48<20:10:33, 3.61it/s] {'loss': 3.567, 'learning_rate': 7.348520761059277e-07, 'epoch': 4.72} + 29%|██▉ | 109580/371472 [8:42:48<20:10:33, 3.61it/s] 29%|██▉ | 109581/371472 [8:42:48<20:44:32, 3.51it/s] 29%|██▉ | 109582/371472 [8:42:48<20:38:07, 3.53it/s] 29%|██▉ | 109583/371472 [8:42:49<19:55:57, 3.65it/s] 29%|██▉ | 109584/371472 [8:42:49<19:05:59, 3.81it/s] 30%|██▉ | 109585/371472 [8:42:49<18:55:35, 3.84it/s] 30%|██▉ | 109586/371472 [8:42:49<19:35:40, 3.71it/s] 30%|██▉ | 109587/371472 [8:42:50<20:42:36, 3.51it/s] 30%|██▉ | 109588/371472 [8:42:50<21:08:24, 3.44it/s] 30%|██▉ | 109589/371472 [8:42:50<20:14:16, 3.59it/s] 30%|██▉ | 109590/371472 [8:42:51<21:11:30, 3.43it/s] 30%|██▉ | 109591/371472 [8:42:51<21:10:10, 3.44it/s] 30%|██▉ | 109592/371472 [8:42:51<20:16:41, 3.59it/s] 30%|██▉ | 109593/371472 [8:42:52<23:52:40, 3.05it/s] 30%|██▉ | 109594/371472 [8:42:52<23:44:12, 3.06it/s] 30%|██▉ | 109595/371472 [8:42:52<22:00:06, 3.31it/s] 30%|██▉ | 109596/371472 [8:42:53<21:24:35, 3.40it/s] 30%|██▉ | 109597/371472 [8:42:53<20:51:33, 3.49it/s] 30%|██▉ | 109598/371472 [8:42:53<21:15:04, 3.42it/s] 30%|██▉ | 109599/371472 [8:42:53<22:29:33, 3.23it/s] 30%|██▉ | 109600/371472 [8:42:54<22:07:37, 3.29it/s] {'loss': 3.5085, 'learning_rate': 7.34803594130449e-07, 'epoch': 4.72} + 30%|██▉ | 109600/371472 [8:42:54<22:07:37, 3.29it/s] 30%|██▉ | 109601/371472 [8:42:54<23:09:31, 3.14it/s] 30%|██▉ | 109602/371472 [8:42:54<24:09:43, 3.01it/s] 30%|██▉ | 109603/371472 [8:42:55<22:06:22, 3.29it/s] 30%|██▉ | 109604/371472 [8:42:55<20:45:45, 3.50it/s] 30%|██▉ | 109605/371472 [8:42:55<20:28:31, 3.55it/s] 30%|██▉ | 109606/371472 [8:42:55<20:44:51, 3.51it/s] 30%|██▉ | 109607/371472 [8:42:56<19:55:59, 3.65it/s] 30%|██▉ | 109608/371472 [8:42:56<21:17:36, 3.42it/s] 30%|██▉ | 109609/371472 [8:42:56<23:07:46, 3.14it/s] 30%|██▉ | 109610/371472 [8:42:57<23:00:21, 3.16it/s] 30%|██▉ | 109611/371472 [8:42:57<21:46:50, 3.34it/s] 30%|██▉ | 109612/371472 [8:42:57<20:37:41, 3.53it/s] 30%|██▉ | 109613/371472 [8:42:58<19:47:00, 3.68it/s] 30%|██▉ | 109614/371472 [8:42:58<19:25:20, 3.75it/s] 30%|██▉ | 109615/371472 [8:42:58<19:30:37, 3.73it/s] 30%|██▉ | 109616/371472 [8:42:58<21:21:02, 3.41it/s] 30%|██▉ | 109617/371472 [8:42:59<22:31:04, 3.23it/s] 30%|██▉ | 109618/371472 [8:42:59<23:05:29, 3.15it/s] 30%|██▉ | 109619/371472 [8:42:59<22:06:07, 3.29it/s] 30%|██▉ | 109620/371472 [8:43:00<22:34:56, 3.22it/s] {'loss': 3.4442, 'learning_rate': 7.347551121549699e-07, 'epoch': 4.72} + 30%|██▉ | 109620/371472 [8:43:00<22:34:56, 3.22it/s] 30%|██▉ | 109621/371472 [8:43:00<21:06:23, 3.45it/s] 30%|██▉ | 109622/371472 [8:43:00<22:14:18, 3.27it/s] 30%|██▉ | 109623/371472 [8:43:01<21:21:36, 3.41it/s] 30%|██▉ | 109624/371472 [8:43:01<20:22:13, 3.57it/s] 30%|██▉ | 109625/371472 [8:43:01<21:31:25, 3.38it/s] 30%|██▉ | 109626/371472 [8:43:01<20:43:23, 3.51it/s] 30%|██▉ | 109627/371472 [8:43:02<19:39:07, 3.70it/s] 30%|██▉ | 109628/371472 [8:43:02<22:53:29, 3.18it/s] 30%|██▉ | 109629/371472 [8:43:02<22:03:57, 3.30it/s] 30%|██▉ | 109630/371472 [8:43:03<21:18:08, 3.41it/s] 30%|██▉ | 109631/371472 [8:43:03<20:31:32, 3.54it/s] 30%|██▉ | 109632/371472 [8:43:03<20:22:18, 3.57it/s] 30%|██▉ | 109633/371472 [8:43:03<20:25:45, 3.56it/s] 30%|██▉ | 109634/371472 [8:43:04<20:46:28, 3.50it/s] 30%|██▉ | 109635/371472 [8:43:04<23:39:54, 3.07it/s] 30%|██▉ | 109636/371472 [8:43:04<22:37:56, 3.21it/s] 30%|██▉ | 109637/371472 [8:43:05<21:39:28, 3.36it/s] 30%|██▉ | 109638/371472 [8:43:05<22:04:16, 3.30it/s] 30%|██▉ | 109639/371472 [8:43:05<21:02:32, 3.46it/s] 30%|██▉ | 109640/371472 [8:43:06<22:19:08, 3.26it/s] {'loss': 3.1194, 'learning_rate': 7.34706630179491e-07, 'epoch': 4.72} + 30%|██▉ | 109640/371472 [8:43:06<22:19:08, 3.26it/s] 30%|██▉ | 109641/371472 [8:43:06<21:34:14, 3.37it/s] 30%|██▉ | 109642/371472 [8:43:06<20:28:28, 3.55it/s] 30%|██▉ | 109643/371472 [8:43:06<19:52:27, 3.66it/s] 30%|██▉ | 109644/371472 [8:43:07<19:15:16, 3.78it/s] 30%|██▉ | 109645/371472 [8:43:07<19:05:36, 3.81it/s] 30%|██▉ | 109646/371472 [8:43:07<19:35:48, 3.71it/s] 30%|██▉ | 109647/371472 [8:43:07<19:49:42, 3.67it/s] 30%|██▉ | 109648/371472 [8:43:08<19:59:44, 3.64it/s] 30%|██▉ | 109649/371472 [8:43:08<20:11:36, 3.60it/s] 30%|██▉ | 109650/371472 [8:43:08<20:10:02, 3.61it/s] 30%|██▉ | 109651/371472 [8:43:09<20:14:21, 3.59it/s] 30%|██▉ | 109652/371472 [8:43:09<19:52:21, 3.66it/s] 30%|██▉ | 109653/371472 [8:43:09<19:35:05, 3.71it/s] 30%|██▉ | 109654/371472 [8:43:09<19:19:31, 3.76it/s] 30%|██▉ | 109655/371472 [8:43:10<19:50:44, 3.66it/s] 30%|██▉ | 109656/371472 [8:43:10<21:12:13, 3.43it/s] 30%|██▉ | 109657/371472 [8:43:10<22:37:28, 3.21it/s] 30%|██▉ | 109658/371472 [8:43:11<21:53:57, 3.32it/s] 30%|██▉ | 109659/371472 [8:43:11<21:44:16, 3.35it/s] 30%|██▉ | 109660/371472 [8:43:11<20:52:54, 3.48it/s] {'loss': 3.2859, 'learning_rate': 7.346581482040122e-07, 'epoch': 4.72} + 30%|██▉ | 109660/371472 [8:43:11<20:52:54, 3.48it/s] 30%|██▉ | 109661/371472 [8:43:11<20:55:30, 3.48it/s] 30%|██▉ | 109662/371472 [8:43:12<20:22:05, 3.57it/s] 30%|██▉ | 109663/371472 [8:43:12<20:36:54, 3.53it/s] 30%|██▉ | 109664/371472 [8:43:12<20:43:51, 3.51it/s] 30%|██▉ | 109665/371472 [8:43:12<20:04:11, 3.62it/s] 30%|██▉ | 109666/371472 [8:43:13<20:03:13, 3.63it/s] 30%|██▉ | 109667/371472 [8:43:13<19:28:22, 3.73it/s] 30%|██▉ | 109668/371472 [8:43:13<19:23:52, 3.75it/s] 30%|██▉ | 109669/371472 [8:43:14<19:27:19, 3.74it/s] 30%|██▉ | 109670/371472 [8:43:14<19:50:28, 3.67it/s] 30%|██▉ | 109671/371472 [8:43:14<19:30:07, 3.73it/s] 30%|██▉ | 109672/371472 [8:43:14<18:42:55, 3.89it/s] 30%|██▉ | 109673/371472 [8:43:15<19:47:04, 3.68it/s] 30%|██▉ | 109674/371472 [8:43:15<19:39:52, 3.70it/s] 30%|██▉ | 109675/371472 [8:43:15<21:33:49, 3.37it/s] 30%|██▉ | 109676/371472 [8:43:16<22:29:14, 3.23it/s] 30%|██▉ | 109677/371472 [8:43:16<21:55:08, 3.32it/s] 30%|██▉ | 109678/371472 [8:43:16<20:54:08, 3.48it/s] 30%|██▉ | 109679/371472 [8:43:16<20:25:30, 3.56it/s] 30%|██▉ | 109680/371472 [8:43:17<19:22:06, 3.75it/s] {'loss': 3.3333, 'learning_rate': 7.346096662285332e-07, 'epoch': 4.72} + 30%|██▉ | 109680/371472 [8:43:17<19:22:06, 3.75it/s] 30%|██▉ | 109681/371472 [8:43:17<19:15:42, 3.78it/s] 30%|██▉ | 109682/371472 [8:43:17<20:27:07, 3.56it/s] 30%|██▉ | 109683/371472 [8:43:17<20:12:20, 3.60it/s] 30%|██▉ | 109684/371472 [8:43:18<21:23:10, 3.40it/s] 30%|██▉ | 109685/371472 [8:43:18<21:28:51, 3.39it/s] 30%|██▉ | 109686/371472 [8:43:18<22:12:05, 3.28it/s] 30%|██▉ | 109687/371472 [8:43:19<21:32:40, 3.38it/s] 30%|██▉ | 109688/371472 [8:43:19<21:11:19, 3.43it/s] 30%|██▉ | 109689/371472 [8:43:19<20:28:47, 3.55it/s] 30%|██▉ | 109690/371472 [8:43:20<20:55:55, 3.47it/s] 30%|██▉ | 109691/371472 [8:43:20<22:15:10, 3.27it/s] 30%|██▉ | 109692/371472 [8:43:20<22:09:26, 3.28it/s] 30%|██▉ | 109693/371472 [8:43:21<22:59:10, 3.16it/s] 30%|██▉ | 109694/371472 [8:43:21<21:40:34, 3.35it/s] 30%|██▉ | 109695/371472 [8:43:21<20:45:07, 3.50it/s] 30%|██▉ | 109696/371472 [8:43:21<19:33:47, 3.72it/s] 30%|██▉ | 109697/371472 [8:43:22<20:37:11, 3.53it/s] 30%|██▉ | 109698/371472 [8:43:22<20:17:54, 3.58it/s] 30%|██▉ | 109699/371472 [8:43:22<19:25:41, 3.74it/s] 30%|██▉ | 109700/371472 [8:43:22<20:33:57, 3.54it/s] {'loss': 3.3976, 'learning_rate': 7.345611842530543e-07, 'epoch': 4.72} + 30%|██▉ | 109700/371472 [8:43:22<20:33:57, 3.54it/s] 30%|██▉ | 109701/371472 [8:43:23<20:21:49, 3.57it/s] 30%|██▉ | 109702/371472 [8:43:23<19:51:00, 3.66it/s] 30%|██▉ | 109703/371472 [8:43:23<20:02:38, 3.63it/s] 30%|██▉ | 109704/371472 [8:43:24<19:46:45, 3.68it/s] 30%|██▉ | 109705/371472 [8:43:24<19:57:41, 3.64it/s] 30%|██▉ | 109706/371472 [8:43:24<19:42:50, 3.69it/s] 30%|██▉ | 109707/371472 [8:43:24<19:40:38, 3.70it/s] 30%|██▉ | 109708/371472 [8:43:25<19:26:14, 3.74it/s] 30%|██▉ | 109709/371472 [8:43:25<19:21:03, 3.76it/s] 30%|██▉ | 109710/371472 [8:43:25<19:30:36, 3.73it/s] 30%|██▉ | 109711/371472 [8:43:25<20:38:27, 3.52it/s] 30%|██▉ | 109712/371472 [8:43:26<20:09:43, 3.61it/s] 30%|██▉ | 109713/371472 [8:43:26<20:52:13, 3.48it/s] 30%|██▉ | 109714/371472 [8:43:26<20:06:51, 3.61it/s] 30%|██▉ | 109715/371472 [8:43:27<20:47:39, 3.50it/s] 30%|██▉ | 109716/371472 [8:43:27<21:24:31, 3.40it/s] 30%|██▉ | 109717/371472 [8:43:27<20:14:25, 3.59it/s] 30%|██▉ | 109718/371472 [8:43:27<20:48:14, 3.49it/s] 30%|██▉ | 109719/371472 [8:43:28<22:35:23, 3.22it/s] 30%|██▉ | 109720/371472 [8:43:28<22:44:36, 3.20it/s] {'loss': 3.5718, 'learning_rate': 7.345127022775754e-07, 'epoch': 4.73} + 30%|██▉ | 109720/371472 [8:43:28<22:44:36, 3.20it/s] 30%|██▉ | 109721/371472 [8:43:28<21:40:30, 3.35it/s] 30%|██▉ | 109722/371472 [8:43:29<21:46:34, 3.34it/s] 30%|██▉ | 109723/371472 [8:43:29<20:38:20, 3.52it/s] 30%|██▉ | 109724/371472 [8:43:29<20:55:40, 3.47it/s] 30%|██▉ | 109725/371472 [8:43:30<20:48:55, 3.49it/s] 30%|██▉ | 109726/371472 [8:43:30<20:24:54, 3.56it/s] 30%|██▉ | 109727/371472 [8:43:30<22:10:04, 3.28it/s] 30%|██▉ | 109728/371472 [8:43:30<20:56:05, 3.47it/s] 30%|██▉ | 109729/371472 [8:43:31<20:43:06, 3.51it/s] 30%|██▉ | 109730/371472 [8:43:31<19:59:30, 3.64it/s] 30%|██▉ | 109731/371472 [8:43:31<21:18:22, 3.41it/s] 30%|██▉ | 109732/371472 [8:43:32<21:25:34, 3.39it/s] 30%|██▉ | 109733/371472 [8:43:32<20:58:46, 3.47it/s] 30%|██▉ | 109734/371472 [8:43:32<22:11:35, 3.28it/s] 30%|██▉ | 109735/371472 [8:43:32<21:25:20, 3.39it/s] 30%|██▉ | 109736/371472 [8:43:33<22:13:58, 3.27it/s] 30%|██▉ | 109737/371472 [8:43:33<21:44:04, 3.35it/s] 30%|██▉ | 109738/371472 [8:43:33<20:47:31, 3.50it/s] 30%|██▉ | 109739/371472 [8:43:34<24:30:40, 2.97it/s] 30%|██▉ | 109740/371472 [8:43:34<22:51:52, 3.18it/s] {'loss': 3.234, 'learning_rate': 7.344642203020966e-07, 'epoch': 4.73} + 30%|██▉ | 109740/371472 [8:43:34<22:51:52, 3.18it/s] 30%|██▉ | 109741/371472 [8:43:34<22:26:49, 3.24it/s] 30%|██▉ | 109742/371472 [8:43:35<22:06:26, 3.29it/s] 30%|██▉ | 109743/371472 [8:43:35<22:39:08, 3.21it/s] 30%|██▉ | 109744/371472 [8:43:35<21:18:41, 3.41it/s] 30%|██▉ | 109745/371472 [8:43:36<21:38:07, 3.36it/s] 30%|██▉ | 109746/371472 [8:43:36<22:07:45, 3.29it/s] 30%|██▉ | 109747/371472 [8:43:36<21:25:35, 3.39it/s] 30%|██▉ | 109748/371472 [8:43:36<20:54:57, 3.48it/s] 30%|██▉ | 109749/371472 [8:43:37<21:43:00, 3.35it/s] 30%|██▉ | 109750/371472 [8:43:37<21:57:48, 3.31it/s] 30%|██▉ | 109751/371472 [8:43:37<23:02:21, 3.16it/s] 30%|██▉ | 109752/371472 [8:43:38<22:54:56, 3.17it/s] 30%|██▉ | 109753/371472 [8:43:38<22:49:48, 3.18it/s] 30%|██▉ | 109754/371472 [8:43:38<23:39:55, 3.07it/s] 30%|██▉ | 109755/371472 [8:43:39<22:26:32, 3.24it/s] 30%|██▉ | 109756/371472 [8:43:39<22:13:32, 3.27it/s] 30%|██▉ | 109757/371472 [8:43:39<22:01:33, 3.30it/s] 30%|██▉ | 109758/371472 [8:43:40<22:41:04, 3.20it/s] 30%|██▉ | 109759/371472 [8:43:40<22:12:35, 3.27it/s] 30%|██▉ | 109760/371472 [8:43:40<21:38:31, 3.36it/s] {'loss': 3.247, 'learning_rate': 7.344157383266177e-07, 'epoch': 4.73} + 30%|██▉ | 109760/371472 [8:43:40<21:38:31, 3.36it/s] 30%|██▉ | 109761/371472 [8:43:40<20:59:31, 3.46it/s] 30%|██▉ | 109762/371472 [8:43:41<21:00:15, 3.46it/s] 30%|██▉ | 109763/371472 [8:43:41<21:00:11, 3.46it/s] 30%|██▉ | 109764/371472 [8:43:41<23:07:38, 3.14it/s] 30%|██▉ | 109765/371472 [8:43:42<22:25:50, 3.24it/s] 30%|██▉ | 109766/371472 [8:43:42<21:42:59, 3.35it/s] 30%|██▉ | 109767/371472 [8:43:42<21:20:13, 3.41it/s] 30%|██▉ | 109768/371472 [8:43:43<23:07:12, 3.14it/s] 30%|██▉ | 109769/371472 [8:43:43<22:49:00, 3.19it/s] 30%|██▉ | 109770/371472 [8:43:43<21:48:33, 3.33it/s] 30%|██▉ | 109771/371472 [8:43:43<21:24:43, 3.40it/s] 30%|██▉ | 109772/371472 [8:43:44<22:21:51, 3.25it/s] 30%|██▉ | 109773/371472 [8:43:44<21:08:28, 3.44it/s] 30%|██▉ | 109774/371472 [8:43:44<20:32:46, 3.54it/s] 30%|██▉ | 109775/371472 [8:43:45<20:04:03, 3.62it/s] 30%|██▉ | 109776/371472 [8:43:45<19:24:14, 3.75it/s] 30%|██▉ | 109777/371472 [8:43:45<19:49:39, 3.67it/s] 30%|██▉ | 109778/371472 [8:43:45<19:58:35, 3.64it/s] 30%|██▉ | 109779/371472 [8:43:46<19:48:19, 3.67it/s] 30%|██▉ | 109780/371472 [8:43:46<19:28:10, 3.73it/s] {'loss': 3.3834, 'learning_rate': 7.343672563511388e-07, 'epoch': 4.73} + 30%|██▉ | 109780/371472 [8:43:46<19:28:10, 3.73it/s] 30%|██▉ | 109781/371472 [8:43:46<19:56:58, 3.64it/s] 30%|██▉ | 109782/371472 [8:43:46<19:23:27, 3.75it/s] 30%|██▉ | 109783/371472 [8:43:47<19:00:34, 3.82it/s] 30%|██▉ | 109784/371472 [8:43:47<19:04:01, 3.81it/s] 30%|██▉ | 109785/371472 [8:43:47<19:09:39, 3.79it/s] 30%|██▉ | 109786/371472 [8:43:47<19:56:43, 3.64it/s] 30%|██▉ | 109787/371472 [8:43:48<19:56:40, 3.64it/s] 30%|██▉ | 109788/371472 [8:43:48<20:17:41, 3.58it/s] 30%|██▉ | 109789/371472 [8:43:48<20:47:07, 3.50it/s] 30%|██▉ | 109790/371472 [8:43:49<20:59:15, 3.46it/s] 30%|██▉ | 109791/371472 [8:43:49<20:17:26, 3.58it/s] 30%|██▉ | 109792/371472 [8:43:49<19:40:46, 3.69it/s] 30%|██▉ | 109793/371472 [8:43:49<19:27:24, 3.74it/s] 30%|██▉ | 109794/371472 [8:43:50<19:55:23, 3.65it/s] 30%|██▉ | 109795/371472 [8:43:50<19:21:56, 3.75it/s] 30%|██▉ | 109796/371472 [8:43:50<19:38:01, 3.70it/s] 30%|██▉ | 109797/371472 [8:43:50<19:25:36, 3.74it/s] 30%|██▉ | 109798/371472 [8:43:51<19:17:22, 3.77it/s] 30%|██▉ | 109799/371472 [8:43:51<20:21:13, 3.57it/s] 30%|██▉ | 109800/371472 [8:43:51<20:38:15, 3.52it/s] {'loss': 3.4639, 'learning_rate': 7.343187743756599e-07, 'epoch': 4.73} + 30%|██▉ | 109800/371472 [8:43:51<20:38:15, 3.52it/s] 30%|██▉ | 109801/371472 [8:43:52<20:23:21, 3.56it/s] 30%|██▉ | 109802/371472 [8:43:52<19:43:14, 3.69it/s] 30%|██▉ | 109803/371472 [8:43:52<19:44:09, 3.68it/s] 30%|██▉ | 109804/371472 [8:43:52<19:11:58, 3.79it/s] 30%|██▉ | 109805/371472 [8:43:53<25:58:05, 2.80it/s] 30%|██▉ | 109806/371472 [8:43:53<24:00:10, 3.03it/s] 30%|██▉ | 109807/371472 [8:43:53<22:15:53, 3.26it/s] 30%|██▉ | 109808/371472 [8:43:54<22:08:29, 3.28it/s] 30%|██▉ | 109809/371472 [8:43:54<21:03:45, 3.45it/s] 30%|██▉ | 109810/371472 [8:43:54<21:08:23, 3.44it/s] 30%|██▉ | 109811/371472 [8:43:55<20:36:11, 3.53it/s] 30%|██▉ | 109812/371472 [8:43:55<22:57:47, 3.17it/s] 30%|██▉ | 109813/371472 [8:43:55<22:23:58, 3.24it/s] 30%|██▉ | 109814/371472 [8:43:56<21:04:58, 3.45it/s] 30%|██▉ | 109815/371472 [8:43:56<21:07:49, 3.44it/s] 30%|██▉ | 109816/371472 [8:43:56<20:24:08, 3.56it/s] 30%|██▉ | 109817/371472 [8:43:56<21:03:37, 3.45it/s] 30%|██▉ | 109818/371472 [8:43:57<20:27:07, 3.55it/s] 30%|██▉ | 109819/371472 [8:43:57<23:37:00, 3.08it/s] 30%|██▉ | 109820/371472 [8:43:57<21:41:52, 3.35it/s] {'loss': 3.4121, 'learning_rate': 7.34270292400181e-07, 'epoch': 4.73} + 30%|██▉ | 109820/371472 [8:43:57<21:41:52, 3.35it/s] 30%|██▉ | 109821/371472 [8:43:58<21:12:36, 3.43it/s] 30%|██▉ | 109822/371472 [8:43:58<20:37:17, 3.52it/s] 30%|██▉ | 109823/371472 [8:43:58<21:31:45, 3.38it/s] 30%|██▉ | 109824/371472 [8:43:58<20:52:58, 3.48it/s] 30%|██▉ | 109825/371472 [8:43:59<20:21:21, 3.57it/s] 30%|██▉ | 109826/371472 [8:43:59<21:22:38, 3.40it/s] 30%|██▉ | 109827/371472 [8:43:59<20:30:27, 3.54it/s] 30%|██▉ | 109828/371472 [8:44:00<21:28:23, 3.38it/s] 30%|██▉ | 109829/371472 [8:44:00<20:44:34, 3.50it/s] 30%|██▉ | 109830/371472 [8:44:00<20:41:14, 3.51it/s] 30%|██▉ | 109831/371472 [8:44:00<20:20:42, 3.57it/s] 30%|██▉ | 109832/371472 [8:44:01<23:08:41, 3.14it/s] 30%|██▉ | 109833/371472 [8:44:01<23:56:35, 3.04it/s] 30%|██▉ | 109834/371472 [8:44:02<23:31:49, 3.09it/s] 30%|██▉ | 109835/371472 [8:44:02<23:34:25, 3.08it/s] 30%|██▉ | 109836/371472 [8:44:02<22:47:13, 3.19it/s] 30%|██▉ | 109837/371472 [8:44:02<23:32:39, 3.09it/s] 30%|██▉ | 109838/371472 [8:44:03<22:25:45, 3.24it/s] 30%|██▉ | 109839/371472 [8:44:03<21:39:04, 3.36it/s] 30%|██▉ | 109840/371472 [8:44:03<20:57:10, 3.47it/s] {'loss': 3.4958, 'learning_rate': 7.34221810424702e-07, 'epoch': 4.73} + 30%|██▉ | 109840/371472 [8:44:03<20:57:10, 3.47it/s] 30%|██▉ | 109841/371472 [8:44:04<20:54:12, 3.48it/s] 30%|██▉ | 109842/371472 [8:44:04<21:08:42, 3.44it/s] 30%|██▉ | 109843/371472 [8:44:04<20:24:52, 3.56it/s] 30%|██▉ | 109844/371472 [8:44:04<20:38:10, 3.52it/s] 30%|██▉ | 109845/371472 [8:44:05<20:27:54, 3.55it/s] 30%|██▉ | 109846/371472 [8:44:05<20:38:09, 3.52it/s] 30%|██▉ | 109847/371472 [8:44:05<20:32:39, 3.54it/s] 30%|██▉ | 109848/371472 [8:44:06<19:46:31, 3.67it/s] 30%|██▉ | 109849/371472 [8:44:06<19:12:32, 3.78it/s] 30%|██▉ | 109850/371472 [8:44:06<19:42:37, 3.69it/s] 30%|██▉ | 109851/371472 [8:44:06<19:29:44, 3.73it/s] 30%|██▉ | 109852/371472 [8:44:07<20:00:00, 3.63it/s] 30%|██▉ | 109853/371472 [8:44:07<20:13:21, 3.59it/s] 30%|██▉ | 109854/371472 [8:44:07<20:44:42, 3.50it/s] 30%|██▉ | 109855/371472 [8:44:07<20:46:00, 3.50it/s] 30%|██▉ | 109856/371472 [8:44:08<21:11:55, 3.43it/s] 30%|██▉ | 109857/371472 [8:44:08<21:44:28, 3.34it/s] 30%|██▉ | 109858/371472 [8:44:08<21:24:38, 3.39it/s] 30%|██▉ | 109859/371472 [8:44:09<24:21:54, 2.98it/s] 30%|██▉ | 109860/371472 [8:44:09<23:24:14, 3.11it/s] {'loss': 3.4193, 'learning_rate': 7.341733284492231e-07, 'epoch': 4.73} + 30%|██▉ | 109860/371472 [8:44:09<23:24:14, 3.11it/s] 30%|██▉ | 109861/371472 [8:44:09<22:28:36, 3.23it/s] 30%|██▉ | 109862/371472 [8:44:10<23:02:26, 3.15it/s] 30%|██▉ | 109863/371472 [8:44:10<22:44:17, 3.20it/s] 30%|██▉ | 109864/371472 [8:44:10<22:10:10, 3.28it/s] 30%|██▉ | 109865/371472 [8:44:11<20:48:37, 3.49it/s] 30%|██▉ | 109866/371472 [8:44:11<19:40:07, 3.69it/s] 30%|██▉ | 109867/371472 [8:44:11<19:08:02, 3.80it/s] 30%|██▉ | 109868/371472 [8:44:11<20:21:13, 3.57it/s] 30%|██▉ | 109869/371472 [8:44:12<20:39:04, 3.52it/s] 30%|██▉ | 109870/371472 [8:44:12<19:56:33, 3.64it/s] 30%|██▉ | 109871/371472 [8:44:12<20:25:41, 3.56it/s] 30%|██▉ | 109872/371472 [8:44:13<21:21:48, 3.40it/s] 30%|��█▉ | 109873/371472 [8:44:13<22:31:57, 3.22it/s] 30%|██▉ | 109874/371472 [8:44:13<22:08:25, 3.28it/s] 30%|██▉ | 109875/371472 [8:44:13<21:39:02, 3.36it/s] 30%|██▉ | 109876/371472 [8:44:14<20:36:28, 3.53it/s] 30%|██▉ | 109877/371472 [8:44:14<20:38:08, 3.52it/s] 30%|██▉ | 109878/371472 [8:44:14<19:41:21, 3.69it/s] 30%|██▉ | 109879/371472 [8:44:15<20:35:39, 3.53it/s] 30%|██▉ | 109880/371472 [8:44:15<21:51:56, 3.32it/s] {'loss': 3.4574, 'learning_rate': 7.341248464737443e-07, 'epoch': 4.73} + 30%|██▉ | 109880/371472 [8:44:15<21:51:56, 3.32it/s] 30%|██▉ | 109881/371472 [8:44:15<21:07:33, 3.44it/s] 30%|██▉ | 109882/371472 [8:44:15<20:55:53, 3.47it/s] 30%|██▉ | 109883/371472 [8:44:16<20:07:13, 3.61it/s] 30%|██▉ | 109884/371472 [8:44:16<19:47:44, 3.67it/s] 30%|██▉ | 109885/371472 [8:44:16<19:52:42, 3.66it/s] 30%|██▉ | 109886/371472 [8:44:16<19:22:42, 3.75it/s] 30%|██▉ | 109887/371472 [8:44:17<20:11:08, 3.60it/s] 30%|██▉ | 109888/371472 [8:44:17<21:00:24, 3.46it/s] 30%|██▉ | 109889/371472 [8:44:17<20:56:57, 3.47it/s] 30%|██▉ | 109890/371472 [8:44:18<20:09:37, 3.60it/s] 30%|██▉ | 109891/371472 [8:44:18<19:58:37, 3.64it/s] 30%|██▉ | 109892/371472 [8:44:18<19:57:25, 3.64it/s] 30%|██▉ | 109893/371472 [8:44:18<20:33:07, 3.54it/s] 30%|██▉ | 109894/371472 [8:44:19<20:06:25, 3.61it/s] 30%|██▉ | 109895/371472 [8:44:19<19:58:12, 3.64it/s] 30%|██▉ | 109896/371472 [8:44:19<20:42:02, 3.51it/s] 30%|██▉ | 109897/371472 [8:44:20<21:08:54, 3.44it/s] 30%|██▉ | 109898/371472 [8:44:20<20:31:16, 3.54it/s] 30%|██▉ | 109899/371472 [8:44:20<19:53:04, 3.65it/s] 30%|██▉ | 109900/371472 [8:44:20<19:50:04, 3.66it/s] {'loss': 3.3919, 'learning_rate': 7.340763644982654e-07, 'epoch': 4.73} + 30%|██▉ | 109900/371472 [8:44:20<19:50:04, 3.66it/s] 30%|██▉ | 109901/371472 [8:44:21<19:32:18, 3.72it/s] 30%|██▉ | 109902/371472 [8:44:21<20:34:22, 3.53it/s] 30%|██▉ | 109903/371472 [8:44:21<21:34:12, 3.37it/s] 30%|██▉ | 109904/371472 [8:44:22<21:00:12, 3.46it/s] 30%|██▉ | 109905/371472 [8:44:22<20:54:53, 3.47it/s] 30%|██▉ | 109906/371472 [8:44:22<20:15:24, 3.59it/s] 30%|██▉ | 109907/371472 [8:44:22<20:04:50, 3.62it/s] 30%|██▉ | 109908/371472 [8:44:23<19:44:24, 3.68it/s] 30%|██▉ | 109909/371472 [8:44:23<19:58:37, 3.64it/s] 30%|██▉ | 109910/371472 [8:44:23<19:48:20, 3.67it/s] 30%|██▉ | 109911/371472 [8:44:23<19:35:11, 3.71it/s] 30%|██▉ | 109912/371472 [8:44:24<19:27:19, 3.73it/s] 30%|██▉ | 109913/371472 [8:44:24<19:15:12, 3.77it/s] 30%|██▉ | 109914/371472 [8:44:24<19:42:24, 3.69it/s] 30%|██▉ | 109915/371472 [8:44:25<21:02:26, 3.45it/s] 30%|██▉ | 109916/371472 [8:44:25<22:52:39, 3.18it/s] 30%|██▉ | 109917/371472 [8:44:25<22:09:21, 3.28it/s] 30%|██▉ | 109918/371472 [8:44:26<23:18:43, 3.12it/s] 30%|██▉ | 109919/371472 [8:44:26<22:07:49, 3.28it/s] 30%|██▉ | 109920/371472 [8:44:26<24:16:07, 2.99it/s] {'loss': 3.4558, 'learning_rate': 7.340278825227865e-07, 'epoch': 4.73} + 30%|██▉ | 109920/371472 [8:44:26<24:16:07, 2.99it/s] 30%|██▉ | 109921/371472 [8:44:27<23:24:48, 3.10it/s] 30%|██▉ | 109922/371472 [8:44:27<21:49:03, 3.33it/s] 30%|██▉ | 109923/371472 [8:44:27<20:52:42, 3.48it/s] 30%|██▉ | 109924/371472 [8:44:27<20:28:57, 3.55it/s] 30%|██▉ | 109925/371472 [8:44:28<20:08:06, 3.61it/s] 30%|██▉ | 109926/371472 [8:44:28<19:53:28, 3.65it/s] 30%|██▉ | 109927/371472 [8:44:28<20:24:23, 3.56it/s] 30%|██▉ | 109928/371472 [8:44:28<20:43:20, 3.51it/s] 30%|██▉ | 109929/371472 [8:44:29<21:21:31, 3.40it/s] 30%|██▉ | 109930/371472 [8:44:29<22:57:55, 3.16it/s] 30%|██▉ | 109931/371472 [8:44:29<22:04:17, 3.29it/s] 30%|██▉ | 109932/371472 [8:44:30<22:24:37, 3.24it/s] 30%|██▉ | 109933/371472 [8:44:30<21:20:15, 3.40it/s] 30%|██▉ | 109934/371472 [8:44:30<21:07:43, 3.44it/s] 30%|██▉ | 109935/371472 [8:44:31<21:18:05, 3.41it/s] 30%|██▉ | 109936/371472 [8:44:31<21:13:15, 3.42it/s] 30%|██▉ | 109937/371472 [8:44:31<20:28:55, 3.55it/s] 30%|██▉ | 109938/371472 [8:44:31<21:13:10, 3.42it/s] 30%|██▉ | 109939/371472 [8:44:32<20:46:43, 3.50it/s] 30%|██▉ | 109940/371472 [8:44:32<21:08:28, 3.44it/s] {'loss': 3.3465, 'learning_rate': 7.339794005473075e-07, 'epoch': 4.74} + 30%|██▉ | 109940/371472 [8:44:32<21:08:28, 3.44it/s] 30%|██▉ | 109941/371472 [8:44:32<20:02:13, 3.63it/s] 30%|██▉ | 109942/371472 [8:44:33<20:09:26, 3.60it/s] 30%|██▉ | 109943/371472 [8:44:33<20:59:32, 3.46it/s] 30%|██▉ | 109944/371472 [8:44:33<20:37:22, 3.52it/s] 30%|██▉ | 109945/371472 [8:44:33<19:50:49, 3.66it/s] 30%|██▉ | 109946/371472 [8:44:34<20:10:11, 3.60it/s] 30%|██▉ | 109947/371472 [8:44:34<20:43:31, 3.51it/s] 30%|██▉ | 109948/371472 [8:44:34<20:24:51, 3.56it/s] 30%|██▉ | 109949/371472 [8:44:35<20:05:51, 3.61it/s] 30%|██▉ | 109950/371472 [8:44:35<19:39:42, 3.69it/s] 30%|██▉ | 109951/371472 [8:44:35<19:29:30, 3.73it/s] 30%|██▉ | 109952/371472 [8:44:35<21:53:10, 3.32it/s] 30%|██▉ | 109953/371472 [8:44:36<21:18:00, 3.41it/s] 30%|██▉ | 109954/371472 [8:44:36<21:23:42, 3.40it/s] 30%|██▉ | 109955/371472 [8:44:36<20:55:34, 3.47it/s] 30%|██▉ | 109956/371472 [8:44:36<19:53:24, 3.65it/s] 30%|██▉ | 109957/371472 [8:44:37<19:28:25, 3.73it/s] 30%|██▉ | 109958/371472 [8:44:37<19:41:39, 3.69it/s] 30%|██▉ | 109959/371472 [8:44:37<19:24:34, 3.74it/s] 30%|██▉ | 109960/371472 [8:44:38<20:53:11, 3.48it/s] {'loss': 3.4276, 'learning_rate': 7.339309185718287e-07, 'epoch': 4.74} + 30%|██▉ | 109960/371472 [8:44:38<20:53:11, 3.48it/s] 30%|██▉ | 109961/371472 [8:44:38<21:16:54, 3.41it/s] 30%|██▉ | 109962/371472 [8:44:38<20:57:01, 3.47it/s] 30%|██▉ | 109963/371472 [8:44:38<21:04:08, 3.45it/s] 30%|██▉ | 109964/371472 [8:44:39<20:08:34, 3.61it/s] 30%|██▉ | 109965/371472 [8:44:39<20:05:24, 3.62it/s] 30%|██▉ | 109966/371472 [8:44:39<19:29:27, 3.73it/s] 30%|██▉ | 109967/371472 [8:44:40<19:59:29, 3.63it/s] 30%|██▉ | 109968/371472 [8:44:40<20:31:53, 3.54it/s] 30%|██▉ | 109969/371472 [8:44:40<20:08:24, 3.61it/s] 30%|██▉ | 109970/371472 [8:44:40<20:15:29, 3.59it/s] 30%|██▉ | 109971/371472 [8:44:41<19:48:17, 3.67it/s] 30%|██▉ | 109972/371472 [8:44:41<21:06:56, 3.44it/s] 30%|██▉ | 109973/371472 [8:44:41<20:45:29, 3.50it/s] 30%|██▉ | 109974/371472 [8:44:42<21:09:05, 3.43it/s] 30%|██▉ | 109975/371472 [8:44:42<23:07:40, 3.14it/s] 30%|██▉ | 109976/371472 [8:44:42<22:36:02, 3.21it/s] 30%|██▉ | 109977/371472 [8:44:43<22:08:56, 3.28it/s] 30%|██▉ | 109978/371472 [8:44:43<21:06:48, 3.44it/s] 30%|██▉ | 109979/371472 [8:44:43<21:25:28, 3.39it/s] 30%|██▉ | 109980/371472 [8:44:43<20:23:27, 3.56it/s] {'loss': 3.4679, 'learning_rate': 7.338824365963498e-07, 'epoch': 4.74} + 30%|██▉ | 109980/371472 [8:44:43<20:23:27, 3.56it/s] 30%|██▉ | 109981/371472 [8:44:44<20:22:14, 3.57it/s] 30%|██▉ | 109982/371472 [8:44:44<20:41:01, 3.51it/s] 30%|██▉ | 109983/371472 [8:44:44<20:12:22, 3.59it/s] 30%|██▉ | 109984/371472 [8:44:45<21:12:24, 3.43it/s] 30%|██▉ | 109985/371472 [8:44:45<20:46:05, 3.50it/s] 30%|██▉ | 109986/371472 [8:44:45<22:00:35, 3.30it/s] 30%|██▉ | 109987/371472 [8:44:45<22:03:13, 3.29it/s] 30%|██▉ | 109988/371472 [8:44:46<22:15:35, 3.26it/s] 30%|██▉ | 109989/371472 [8:44:46<21:10:42, 3.43it/s] 30%|██▉ | 109990/371472 [8:44:46<20:42:26, 3.51it/s] 30%|██▉ | 109991/371472 [8:44:47<21:41:45, 3.35it/s] 30%|██▉ | 109992/371472 [8:44:47<20:42:20, 3.51it/s] 30%|██▉ | 109993/371472 [8:44:47<20:28:49, 3.55it/s] 30%|██▉ | 109994/371472 [8:44:47<20:43:38, 3.50it/s] 30%|██▉ | 109995/371472 [8:44:48<19:57:46, 3.64it/s] 30%|██▉ | 109996/371472 [8:44:48<22:50:15, 3.18it/s] 30%|██▉ | 109997/371472 [8:44:48<22:15:52, 3.26it/s] 30%|██▉ | 109998/371472 [8:44:49<22:33:41, 3.22it/s] 30%|██▉ | 109999/371472 [8:44:49<21:58:14, 3.31it/s] 30%|██▉ | 110000/371472 [8:44:49<21:17:41, 3.41it/s] {'loss': 3.3457, 'learning_rate': 7.338339546208709e-07, 'epoch': 4.74} + 30%|██▉ | 110000/371472 [8:44:49<21:17:41, 3.41it/s] 30%|██▉ | 110001/371472 [8:44:49<20:34:30, 3.53it/s] 30%|██▉ | 110002/371472 [8:44:50<21:12:34, 3.42it/s] 30%|██▉ | 110003/371472 [8:44:50<20:21:51, 3.57it/s] 30%|██▉ | 110004/371472 [8:44:50<21:41:24, 3.35it/s] 30%|██▉ | 110005/371472 [8:44:51<23:17:12, 3.12it/s] 30%|██▉ | 110006/371472 [8:44:51<21:24:08, 3.39it/s] 30%|██▉ | 110007/371472 [8:44:51<21:12:02, 3.43it/s] 30%|██▉ | 110008/371472 [8:44:52<21:37:05, 3.36it/s] 30%|██▉ | 110009/371472 [8:44:52<21:04:25, 3.45it/s] 30%|██▉ | 110010/371472 [8:44:52<21:43:47, 3.34it/s] 30%|██▉ | 110011/371472 [8:44:53<21:43:38, 3.34it/s] 30%|██▉ | 110012/371472 [8:44:53<21:28:04, 3.38it/s] 30%|██▉ | 110013/371472 [8:44:53<20:09:08, 3.60it/s] 30%|██▉ | 110014/371472 [8:44:53<19:17:40, 3.76it/s] 30%|██▉ | 110015/371472 [8:44:54<19:57:21, 3.64it/s] 30%|██▉ | 110016/371472 [8:44:54<20:26:02, 3.55it/s] 30%|██▉ | 110017/371472 [8:44:54<19:26:17, 3.74it/s] 30%|██▉ | 110018/371472 [8:44:54<18:54:01, 3.84it/s] 30%|██▉ | 110019/371472 [8:44:55<19:19:27, 3.76it/s] 30%|██▉ | 110020/371472 [8:44:55<19:18:43, 3.76it/s] {'loss': 3.1823, 'learning_rate': 7.33785472645392e-07, 'epoch': 4.74} + 30%|██▉ | 110020/371472 [8:44:55<19:18:43, 3.76it/s] 30%|██▉ | 110021/371472 [8:44:55<20:47:44, 3.49it/s] 30%|██▉ | 110022/371472 [8:44:56<21:15:41, 3.42it/s] 30%|██▉ | 110023/371472 [8:44:56<21:04:17, 3.45it/s] 30%|██▉ | 110024/371472 [8:44:56<20:16:17, 3.58it/s] 30%|██▉ | 110025/371472 [8:44:56<22:40:27, 3.20it/s] 30%|██▉ | 110026/371472 [8:44:57<22:23:57, 3.24it/s] 30%|██▉ | 110027/371472 [8:44:57<22:06:04, 3.29it/s] 30%|██▉ | 110028/371472 [8:44:57<22:28:10, 3.23it/s] 30%|██▉ | 110029/371472 [8:44:58<21:09:10, 3.43it/s] 30%|██▉ | 110030/371472 [8:44:58<20:12:36, 3.59it/s] 30%|██▉ | 110031/371472 [8:44:58<22:42:39, 3.20it/s] 30%|██▉ | 110032/371472 [8:44:59<22:38:25, 3.21it/s] 30%|██▉ | 110033/371472 [8:44:59<21:31:22, 3.37it/s] 30%|██▉ | 110034/371472 [8:44:59<20:21:52, 3.57it/s] 30%|██▉ | 110035/371472 [8:44:59<20:09:20, 3.60it/s] 30%|██▉ | 110036/371472 [8:45:00<19:58:38, 3.64it/s] 30%|██▉ | 110037/371472 [8:45:00<20:59:12, 3.46it/s] 30%|██▉ | 110038/371472 [8:45:00<20:23:45, 3.56it/s] 30%|██▉ | 110039/371472 [8:45:00<20:25:56, 3.55it/s] 30%|██▉ | 110040/371472 [8:45:01<20:30:40, 3.54it/s] {'loss': 3.2189, 'learning_rate': 7.337369906699132e-07, 'epoch': 4.74} + 30%|██▉ | 110040/371472 [8:45:01<20:30:40, 3.54it/s] 30%|██▉ | 110041/371472 [8:45:01<20:28:52, 3.55it/s] 30%|██▉ | 110042/371472 [8:45:01<20:28:38, 3.55it/s] 30%|██▉ | 110043/371472 [8:45:02<21:53:56, 3.32it/s] 30%|██▉ | 110044/371472 [8:45:02<21:26:37, 3.39it/s] 30%|██▉ | 110045/371472 [8:45:02<21:24:11, 3.39it/s] 30%|██▉ | 110046/371472 [8:45:03<22:13:38, 3.27it/s] 30%|██▉ | 110047/371472 [8:45:03<21:35:42, 3.36it/s] 30%|██▉ | 110048/371472 [8:45:03<21:09:28, 3.43it/s] 30%|██▉ | 110049/371472 [8:45:03<20:51:10, 3.48it/s] 30%|██▉ | 110050/371472 [8:45:04<20:50:57, 3.48it/s] 30%|██▉ | 110051/371472 [8:45:04<20:04:15, 3.62it/s] 30%|██▉ | 110052/371472 [8:45:04<19:32:57, 3.71it/s] 30%|██▉ | 110053/371472 [8:45:05<21:25:44, 3.39it/s] 30%|██▉ | 110054/371472 [8:45:05<20:53:26, 3.48it/s] 30%|██▉ | 110055/371472 [8:45:05<20:51:53, 3.48it/s] 30%|██▉ | 110056/371472 [8:45:05<20:12:42, 3.59it/s] 30%|██▉ | 110057/371472 [8:45:06<20:25:34, 3.56it/s] 30%|██▉ | 110058/371472 [8:45:06<21:27:17, 3.38it/s] 30%|██▉ | 110059/371472 [8:45:06<21:05:01, 3.44it/s] 30%|██▉ | 110060/371472 [8:45:07<22:52:33, 3.17it/s] {'loss': 3.4356, 'learning_rate': 7.336885086944342e-07, 'epoch': 4.74} + 30%|██▉ | 110060/371472 [8:45:07<22:52:33, 3.17it/s] 30%|██▉ | 110061/371472 [8:45:07<21:13:20, 3.42it/s] 30%|██▉ | 110062/371472 [8:45:07<23:01:56, 3.15it/s] 30%|██▉ | 110063/371472 [8:45:08<22:32:38, 3.22it/s] 30%|██▉ | 110064/371472 [8:45:08<21:23:55, 3.39it/s] 30%|██▉ | 110065/371472 [8:45:08<22:20:29, 3.25it/s] 30%|██▉ | 110066/371472 [8:45:08<21:53:39, 3.32it/s] 30%|██▉ | 110067/371472 [8:45:09<21:41:49, 3.35it/s] 30%|██▉ | 110068/371472 [8:45:09<21:20:32, 3.40it/s] 30%|██▉ | 110069/371472 [8:45:09<20:51:18, 3.48it/s] 30%|██▉ | 110070/371472 [8:45:10<19:53:46, 3.65it/s] 30%|██▉ | 110071/371472 [8:45:10<29:57:41, 2.42it/s] 30%|██▉ | 110072/371472 [8:45:11<27:34:17, 2.63it/s] 30%|██▉ | 110073/371472 [8:45:11<24:43:16, 2.94it/s] 30%|██▉ | 110074/371472 [8:45:11<23:24:50, 3.10it/s] 30%|██▉ | 110075/371472 [8:45:11<21:20:28, 3.40it/s] 30%|██▉ | 110076/371472 [8:45:12<20:48:28, 3.49it/s] 30%|██▉ | 110077/371472 [8:45:12<20:54:21, 3.47it/s] 30%|██▉ | 110078/371472 [8:45:12<20:08:23, 3.61it/s] 30%|██▉ | 110079/371472 [8:45:12<19:52:41, 3.65it/s] 30%|██▉ | 110080/371472 [8:45:13<19:55:38, 3.64it/s] {'loss': 3.3379, 'learning_rate': 7.336400267189553e-07, 'epoch': 4.74} + 30%|██▉ | 110080/371472 [8:45:13<19:55:38, 3.64it/s] 30%|██▉ | 110081/371472 [8:45:13<20:08:07, 3.61it/s] 30%|██▉ | 110082/371472 [8:45:13<20:41:49, 3.51it/s] 30%|██▉ | 110083/371472 [8:45:14<20:16:22, 3.58it/s] 30%|██▉ | 110084/371472 [8:45:14<20:40:03, 3.51it/s] 30%|██▉ | 110085/371472 [8:45:14<20:37:59, 3.52it/s] 30%|██▉ | 110086/371472 [8:45:14<20:07:35, 3.61it/s] 30%|██▉ | 110087/371472 [8:45:15<19:35:34, 3.71it/s] 30%|██▉ | 110088/371472 [8:45:15<19:42:37, 3.68it/s] 30%|██▉ | 110089/371472 [8:45:15<19:28:29, 3.73it/s] 30%|██▉ | 110090/371472 [8:45:15<20:35:28, 3.53it/s] 30%|██▉ | 110091/371472 [8:45:16<21:59:29, 3.30it/s] 30%|██▉ | 110092/371472 [8:45:16<22:13:43, 3.27it/s] 30%|██▉ | 110093/371472 [8:45:16<23:10:42, 3.13it/s] 30%|██▉ | 110094/371472 [8:45:17<22:47:51, 3.18it/s] 30%|██▉ | 110095/371472 [8:45:17<21:58:04, 3.31it/s] 30%|██▉ | 110096/371472 [8:45:17<22:12:49, 3.27it/s] 30%|██▉ | 110097/371472 [8:45:18<22:52:34, 3.17it/s] 30%|██▉ | 110098/371472 [8:45:18<23:03:08, 3.15it/s] 30%|██▉ | 110099/371472 [8:45:18<22:41:13, 3.20it/s] 30%|██▉ | 110100/371472 [8:45:19<21:42:05, 3.35it/s] {'loss': 3.3428, 'learning_rate': 7.335915447434764e-07, 'epoch': 4.74} + 30%|██▉ | 110100/371472 [8:45:19<21:42:05, 3.35it/s] 30%|██▉ | 110101/371472 [8:45:19<20:31:43, 3.54it/s] 30%|██▉ | 110102/371472 [8:45:19<20:16:37, 3.58it/s] 30%|██▉ | 110103/371472 [8:45:19<20:13:36, 3.59it/s] 30%|██▉ | 110104/371472 [8:45:20<21:13:52, 3.42it/s] 30%|██▉ | 110105/371472 [8:45:20<22:20:00, 3.25it/s] 30%|██▉ | 110106/371472 [8:45:20<21:06:02, 3.44it/s] 30%|██▉ | 110107/371472 [8:45:21<20:28:40, 3.55it/s] 30%|██▉ | 110108/371472 [8:45:21<19:24:29, 3.74it/s] 30%|██▉ | 110109/371472 [8:45:21<19:32:30, 3.72it/s] 30%|██▉ | 110110/371472 [8:45:21<19:02:57, 3.81it/s] 30%|██▉ | 110111/371472 [8:45:22<18:28:50, 3.93it/s] 30%|██▉ | 110112/371472 [8:45:22<18:10:45, 3.99it/s] 30%|██▉ | 110113/371472 [8:45:22<18:08:33, 4.00it/s] 30%|██▉ | 110114/371472 [8:45:22<18:01:07, 4.03it/s] 30%|██▉ | 110115/371472 [8:45:23<18:00:29, 4.03it/s] 30%|██▉ | 110116/371472 [8:45:23<19:22:43, 3.75it/s] 30%|██▉ | 110117/371472 [8:45:23<19:07:52, 3.79it/s] 30%|██▉ | 110118/371472 [8:45:23<18:59:24, 3.82it/s] 30%|██▉ | 110119/371472 [8:45:24<19:43:26, 3.68it/s] 30%|██▉ | 110120/371472 [8:45:24<20:47:08, 3.49it/s] {'loss': 3.3437, 'learning_rate': 7.335430627679976e-07, 'epoch': 4.74} + 30%|██▉ | 110120/371472 [8:45:24<20:47:08, 3.49it/s] 30%|██▉ | 110121/371472 [8:45:24<19:52:21, 3.65it/s] 30%|██▉ | 110122/371472 [8:45:25<21:58:01, 3.30it/s] 30%|██▉ | 110123/371472 [8:45:25<20:45:36, 3.50it/s] 30%|██▉ | 110124/371472 [8:45:25<20:48:59, 3.49it/s] 30%|██▉ | 110125/371472 [8:45:26<22:59:07, 3.16it/s] 30%|██▉ | 110126/371472 [8:45:26<22:40:01, 3.20it/s] 30%|██▉ | 110127/371472 [8:45:26<22:12:53, 3.27it/s] 30%|██▉ | 110128/371472 [8:45:26<21:04:51, 3.44it/s] 30%|██▉ | 110129/371472 [8:45:27<21:54:59, 3.31it/s] 30%|██▉ | 110130/371472 [8:45:27<20:53:45, 3.47it/s] 30%|██▉ | 110131/371472 [8:45:27<21:07:44, 3.44it/s] 30%|██▉ | 110132/371472 [8:45:28<20:25:47, 3.55it/s] 30%|██▉ | 110133/371472 [8:45:28<20:20:09, 3.57it/s] 30%|██▉ | 110134/371472 [8:45:28<20:11:15, 3.60it/s] 30%|██▉ | 110135/371472 [8:45:28<20:04:00, 3.62it/s] 30%|██▉ | 110136/371472 [8:45:29<21:37:12, 3.36it/s] 30%|██▉ | 110137/371472 [8:45:29<21:14:08, 3.42it/s] 30%|██▉ | 110138/371472 [8:45:29<21:16:11, 3.41it/s] 30%|██▉ | 110139/371472 [8:45:30<20:44:54, 3.50it/s] 30%|██▉ | 110140/371472 [8:45:30<19:50:56, 3.66it/s] {'loss': 3.4678, 'learning_rate': 7.334945807925187e-07, 'epoch': 4.74} + 30%|██▉ | 110140/371472 [8:45:30<19:50:56, 3.66it/s] 30%|██▉ | 110141/371472 [8:45:30<19:19:00, 3.76it/s] 30%|██▉ | 110142/371472 [8:45:30<18:55:42, 3.84it/s] 30%|██▉ | 110143/371472 [8:45:31<20:04:00, 3.62it/s] 30%|██▉ | 110144/371472 [8:45:31<20:14:10, 3.59it/s] 30%|██▉ | 110145/371472 [8:45:31<20:38:23, 3.52it/s] 30%|██▉ | 110146/371472 [8:45:31<20:31:21, 3.54it/s] 30%|██▉ | 110147/371472 [8:45:32<21:26:37, 3.39it/s] 30%|██▉ | 110148/371472 [8:45:32<20:21:13, 3.57it/s] 30%|██▉ | 110149/371472 [8:45:32<20:14:26, 3.59it/s] 30%|██▉ | 110150/371472 [8:45:33<19:56:00, 3.64it/s] 30%|██▉ | 110151/371472 [8:45:33<20:45:51, 3.50it/s] 30%|██▉ | 110152/371472 [8:45:33<20:10:08, 3.60it/s] 30%|██▉ | 110153/371472 [8:45:33<19:50:01, 3.66it/s] 30%|██▉ | 110154/371472 [8:45:34<20:05:09, 3.61it/s] 30%|██▉ | 110155/371472 [8:45:34<20:17:42, 3.58it/s] 30%|██▉ | 110156/371472 [8:45:34<19:50:08, 3.66it/s] 30%|██▉ | 110157/371472 [8:45:34<19:29:01, 3.73it/s] 30%|██▉ | 110158/371472 [8:45:35<19:02:18, 3.81it/s] 30%|██▉ | 110159/371472 [8:45:35<20:37:05, 3.52it/s] 30%|██▉ | 110160/371472 [8:45:35<20:38:29, 3.52it/s] {'loss': 3.4782, 'learning_rate': 7.334460988170398e-07, 'epoch': 4.74} + 30%|██▉ | 110160/371472 [8:45:35<20:38:29, 3.52it/s] 30%|██▉ | 110161/371472 [8:45:36<19:37:35, 3.70it/s] 30%|██▉ | 110162/371472 [8:45:36<19:20:00, 3.75it/s] 30%|██▉ | 110163/371472 [8:45:36<18:55:36, 3.84it/s] 30%|██▉ | 110164/371472 [8:45:36<19:41:54, 3.68it/s] 30%|██▉ | 110165/371472 [8:45:37<18:55:53, 3.83it/s] 30%|██▉ | 110166/371472 [8:45:37<21:51:02, 3.32it/s] 30%|██▉ | 110167/371472 [8:45:37<20:45:08, 3.50it/s] 30%|██▉ | 110168/371472 [8:45:38<20:45:19, 3.50it/s] 30%|██▉ | 110169/371472 [8:45:38<20:44:08, 3.50it/s] 30%|██▉ | 110170/371472 [8:45:38<21:15:16, 3.41it/s] 30%|██▉ | 110171/371472 [8:45:38<21:27:50, 3.38it/s] 30%|██▉ | 110172/371472 [8:45:39<20:15:07, 3.58it/s] 30%|██▉ | 110173/371472 [8:45:39<19:45:17, 3.67it/s] 30%|██▉ | 110174/371472 [8:45:39<20:03:36, 3.62it/s] 30%|██▉ | 110175/371472 [8:45:40<20:10:30, 3.60it/s] 30%|██▉ | 110176/371472 [8:45:40<19:33:46, 3.71it/s] 30%|██▉ | 110177/371472 [8:45:40<20:12:34, 3.59it/s] 30%|██▉ | 110178/371472 [8:45:40<19:53:39, 3.65it/s] 30%|██▉ | 110179/371472 [8:45:41<20:23:12, 3.56it/s] 30%|██▉ | 110180/371472 [8:45:41<20:32:32, 3.53it/s] {'loss': 3.4444, 'learning_rate': 7.333976168415609e-07, 'epoch': 4.75} + 30%|██▉ | 110180/371472 [8:45:41<20:32:32, 3.53it/s] 30%|██▉ | 110181/371472 [8:45:41<19:35:51, 3.70it/s] 30%|██▉ | 110182/371472 [8:45:41<20:07:05, 3.61it/s] 30%|██▉ | 110183/371472 [8:45:42<20:39:20, 3.51it/s] 30%|██▉ | 110184/371472 [8:45:42<20:30:03, 3.54it/s] 30%|██▉ | 110185/371472 [8:45:42<20:43:10, 3.50it/s] 30%|██▉ | 110186/371472 [8:45:43<20:16:28, 3.58it/s] 30%|██▉ | 110187/371472 [8:45:43<19:42:31, 3.68it/s] 30%|██▉ | 110188/371472 [8:45:43<19:38:38, 3.69it/s] 30%|██▉ | 110189/371472 [8:45:43<19:18:51, 3.76it/s] 30%|██▉ | 110190/371472 [8:45:44<18:56:55, 3.83it/s] 30%|██▉ | 110191/371472 [8:45:44<18:26:22, 3.94it/s] 30%|██▉ | 110192/371472 [8:45:44<19:45:27, 3.67it/s] 30%|██▉ | 110193/371472 [8:45:44<20:22:45, 3.56it/s] 30%|██▉ | 110194/371472 [8:45:45<19:50:19, 3.66it/s] 30%|██▉ | 110195/371472 [8:45:45<20:40:08, 3.51it/s] 30%|██▉ | 110196/371472 [8:45:45<19:57:58, 3.63it/s] 30%|██▉ | 110197/371472 [8:45:46<19:39:28, 3.69it/s] 30%|██▉ | 110198/371472 [8:45:46<20:05:34, 3.61it/s] 30%|██▉ | 110199/371472 [8:45:46<20:18:13, 3.57it/s] 30%|██▉ | 110200/371472 [8:45:46<20:02:38, 3.62it/s] {'loss': 3.4967, 'learning_rate': 7.333491348660819e-07, 'epoch': 4.75} + 30%|██▉ | 110200/371472 [8:45:46<20:02:38, 3.62it/s] 30%|██▉ | 110201/371472 [8:45:47<20:55:12, 3.47it/s] 30%|██▉ | 110202/371472 [8:45:47<20:11:37, 3.59it/s] 30%|██▉ | 110203/371472 [8:45:47<19:43:53, 3.68it/s] 30%|██▉ | 110204/371472 [8:45:47<19:38:04, 3.70it/s] 30%|██▉ | 110205/371472 [8:45:48<19:11:03, 3.78it/s] 30%|██▉ | 110206/371472 [8:45:48<19:31:28, 3.72it/s] 30%|██▉ | 110207/371472 [8:45:48<21:06:54, 3.44it/s] 30%|██▉ | 110208/371472 [8:45:49<22:26:33, 3.23it/s] 30%|██▉ | 110209/371472 [8:45:49<21:31:23, 3.37it/s] 30%|██▉ | 110210/371472 [8:45:49<21:22:38, 3.39it/s] 30%|██▉ | 110211/371472 [8:45:49<20:05:21, 3.61it/s] 30%|██▉ | 110212/371472 [8:45:50<20:36:40, 3.52it/s] 30%|██▉ | 110213/371472 [8:45:50<20:01:27, 3.62it/s] 30%|██▉ | 110214/371472 [8:45:50<19:34:16, 3.71it/s] 30%|██▉ | 110215/371472 [8:45:51<19:37:02, 3.70it/s] 30%|██▉ | 110216/371472 [8:45:51<19:23:31, 3.74it/s] 30%|██▉ | 110217/371472 [8:45:51<20:01:16, 3.62it/s] 30%|██▉ | 110218/371472 [8:45:51<20:17:51, 3.58it/s] 30%|██▉ | 110219/371472 [8:45:52<20:24:13, 3.56it/s] 30%|██▉ | 110220/371472 [8:45:52<21:21:10, 3.40it/s] {'loss': 3.1003, 'learning_rate': 7.333006528906031e-07, 'epoch': 4.75} + 30%|██▉ | 110220/371472 [8:45:52<21:21:10, 3.40it/s] 30%|██▉ | 110221/371472 [8:45:52<20:44:45, 3.50it/s] 30%|██▉ | 110222/371472 [8:45:53<23:50:46, 3.04it/s] 30%|██▉ | 110223/371472 [8:45:53<22:30:56, 3.22it/s] 30%|██▉ | 110224/371472 [8:45:53<21:28:21, 3.38it/s] 30%|██▉ | 110225/371472 [8:45:54<21:36:56, 3.36it/s] 30%|██▉ | 110226/371472 [8:45:54<22:06:24, 3.28it/s] 30%|██▉ | 110227/371472 [8:45:54<22:59:24, 3.16it/s] 30%|██▉ | 110228/371472 [8:45:54<21:55:38, 3.31it/s] 30%|██▉ | 110229/371472 [8:45:55<22:39:14, 3.20it/s] 30%|██▉ | 110230/371472 [8:45:55<21:45:51, 3.33it/s] 30%|██▉ | 110231/371472 [8:45:55<21:12:08, 3.42it/s] 30%|██▉ | 110232/371472 [8:45:56<20:19:48, 3.57it/s] 30%|██▉ | 110233/371472 [8:45:56<20:34:52, 3.53it/s] 30%|██▉ | 110234/371472 [8:45:56<20:37:06, 3.52it/s] 30%|██▉ | 110235/371472 [8:45:56<19:54:30, 3.64it/s] 30%|██▉ | 110236/371472 [8:45:57<20:34:56, 3.53it/s] 30%|██▉ | 110237/371472 [8:45:57<20:42:25, 3.50it/s] 30%|██▉ | 110238/371472 [8:45:57<20:22:01, 3.56it/s] 30%|██▉ | 110239/371472 [8:45:58<20:30:46, 3.54it/s] 30%|██▉ | 110240/371472 [8:45:58<19:39:48, 3.69it/s] {'loss': 3.2818, 'learning_rate': 7.332521709151242e-07, 'epoch': 4.75} + 30%|██▉ | 110240/371472 [8:45:58<19:39:48, 3.69it/s] 30%|██▉ | 110241/371472 [8:45:58<20:36:18, 3.52it/s] 30%|██▉ | 110242/371472 [8:45:58<20:47:51, 3.49it/s] 30%|██▉ | 110243/371472 [8:45:59<22:02:27, 3.29it/s] 30%|██▉ | 110244/371472 [8:45:59<21:16:51, 3.41it/s] 30%|██▉ | 110245/371472 [8:45:59<21:02:10, 3.45it/s] 30%|██▉ | 110246/371472 [8:46:00<21:30:07, 3.37it/s] 30%|██▉ | 110247/371472 [8:46:00<21:37:15, 3.36it/s] 30%|██▉ | 110248/371472 [8:46:00<20:56:11, 3.47it/s] 30%|██▉ | 110249/371472 [8:46:00<20:27:44, 3.55it/s] 30%|██▉ | 110250/371472 [8:46:01<20:42:16, 3.50it/s] 30%|██▉ | 110251/371472 [8:46:01<21:57:48, 3.30it/s] 30%|██▉ | 110252/371472 [8:46:01<22:38:28, 3.20it/s] 30%|██▉ | 110253/371472 [8:46:02<21:12:13, 3.42it/s] 30%|██▉ | 110254/371472 [8:46:02<21:21:54, 3.40it/s] 30%|██▉ | 110255/371472 [8:46:02<21:30:13, 3.37it/s] 30%|██▉ | 110256/371472 [8:46:03<21:29:07, 3.38it/s] 30%|██▉ | 110257/371472 [8:46:03<21:17:56, 3.41it/s] 30%|██▉ | 110258/371472 [8:46:03<20:16:07, 3.58it/s] 30%|██▉ | 110259/371472 [8:46:04<22:03:17, 3.29it/s] 30%|██▉ | 110260/371472 [8:46:04<21:10:11, 3.43it/s] {'loss': 3.4107, 'learning_rate': 7.332036889396453e-07, 'epoch': 4.75} + 30%|██▉ | 110260/371472 [8:46:04<21:10:11, 3.43it/s] 30%|██▉ | 110261/371472 [8:46:04<20:24:39, 3.55it/s] 30%|██▉ | 110262/371472 [8:46:04<19:54:44, 3.64it/s] 30%|██▉ | 110263/371472 [8:46:05<19:47:34, 3.67it/s] 30%|██▉ | 110264/371472 [8:46:05<20:03:04, 3.62it/s] 30%|██▉ | 110265/371472 [8:46:05<19:43:39, 3.68it/s] 30%|██▉ | 110266/371472 [8:46:05<19:39:34, 3.69it/s] 30%|██▉ | 110267/371472 [8:46:06<19:05:33, 3.80it/s] 30%|██▉ | 110268/371472 [8:46:06<19:38:43, 3.69it/s] 30%|██▉ | 110269/371472 [8:46:06<20:11:36, 3.59it/s] 30%|██▉ | 110270/371472 [8:46:07<22:56:17, 3.16it/s] 30%|██▉ | 110271/371472 [8:46:07<22:46:48, 3.19it/s] 30%|██▉ | 110272/371472 [8:46:07<22:25:57, 3.23it/s] 30%|██▉ | 110273/371472 [8:46:07<21:18:43, 3.40it/s] 30%|██▉ | 110274/371472 [8:46:08<20:58:48, 3.46it/s] 30%|██▉ | 110275/371472 [8:46:08<21:18:49, 3.40it/s] 30%|██▉ | 110276/371472 [8:46:08<20:55:42, 3.47it/s] 30%|██▉ | 110277/371472 [8:46:09<20:31:14, 3.54it/s] 30%|██▉ | 110278/371472 [8:46:09<20:31:35, 3.53it/s] 30%|██▉ | 110279/371472 [8:46:09<21:23:13, 3.39it/s] 30%|██▉ | 110280/371472 [8:46:10<22:09:59, 3.27it/s] {'loss': 3.4498, 'learning_rate': 7.331552069641664e-07, 'epoch': 4.75} + 30%|██▉ | 110280/371472 [8:46:10<22:09:59, 3.27it/s] 30%|██▉ | 110281/371472 [8:46:10<22:15:51, 3.26it/s] 30%|██▉ | 110282/371472 [8:46:10<21:16:23, 3.41it/s] 30%|██▉ | 110283/371472 [8:46:10<21:34:52, 3.36it/s] 30%|██▉ | 110284/371472 [8:46:11<21:22:10, 3.40it/s] 30%|██▉ | 110285/371472 [8:46:11<20:40:47, 3.51it/s] 30%|██▉ | 110286/371472 [8:46:11<22:59:11, 3.16it/s] 30%|██▉ | 110287/371472 [8:46:12<21:54:56, 3.31it/s] 30%|██▉ | 110288/371472 [8:46:12<20:47:56, 3.49it/s] 30%|██▉ | 110289/371472 [8:46:12<20:47:55, 3.49it/s] 30%|██▉ | 110290/371472 [8:46:12<20:45:45, 3.49it/s] 30%|██▉ | 110291/371472 [8:46:13<20:19:42, 3.57it/s] 30%|██▉ | 110292/371472 [8:46:13<19:32:13, 3.71it/s] 30%|██▉ | 110293/371472 [8:46:13<19:53:01, 3.65it/s] 30%|██▉ | 110294/371472 [8:46:14<22:16:20, 3.26it/s] 30%|██▉ | 110295/371472 [8:46:14<21:03:02, 3.45it/s] 30%|██▉ | 110296/371472 [8:46:14<22:36:28, 3.21it/s] 30%|██▉ | 110297/371472 [8:46:14<21:08:02, 3.43it/s] 30%|██▉ | 110298/371472 [8:46:15<21:01:25, 3.45it/s] 30%|██▉ | 110299/371472 [8:46:15<20:54:06, 3.47it/s] 30%|██▉ | 110300/371472 [8:46:15<20:07:18, 3.61it/s] {'loss': 3.5525, 'learning_rate': 7.331067249886876e-07, 'epoch': 4.75} + 30%|██▉ | 110300/371472 [8:46:15<20:07:18, 3.61it/s] 30%|██▉ | 110301/371472 [8:46:16<19:57:42, 3.63it/s] 30%|██▉ | 110302/371472 [8:46:16<19:46:33, 3.67it/s] 30%|██▉ | 110303/371472 [8:46:16<19:35:55, 3.70it/s] 30%|██▉ | 110304/371472 [8:46:16<19:41:35, 3.68it/s] 30%|██▉ | 110305/371472 [8:46:17<20:07:15, 3.61it/s] 30%|██▉ | 110306/371472 [8:46:17<20:34:53, 3.52it/s] 30%|██▉ | 110307/371472 [8:46:17<19:51:50, 3.65it/s] 30%|██▉ | 110308/371472 [8:46:17<19:45:15, 3.67it/s] 30%|██▉ | 110309/371472 [8:46:18<19:31:46, 3.71it/s] 30%|██▉ | 110310/371472 [8:46:18<18:49:43, 3.85it/s] 30%|██▉ | 110311/371472 [8:46:18<19:28:44, 3.72it/s] 30%|██▉ | 110312/371472 [8:46:19<19:08:30, 3.79it/s] 30%|██▉ | 110313/371472 [8:46:19<19:48:47, 3.66it/s] 30%|██▉ | 110314/371472 [8:46:19<19:37:12, 3.70it/s] 30%|██▉ | 110315/371472 [8:46:19<19:50:21, 3.66it/s] 30%|██▉ | 110316/371472 [8:46:20<19:29:48, 3.72it/s] 30%|██▉ | 110317/371472 [8:46:20<20:23:46, 3.56it/s] 30%|██▉ | 110318/371472 [8:46:20<20:50:38, 3.48it/s] 30%|██▉ | 110319/371472 [8:46:21<21:34:22, 3.36it/s] 30%|██▉ | 110320/371472 [8:46:21<23:01:50, 3.15it/s] {'loss': 3.2837, 'learning_rate': 7.330582430132086e-07, 'epoch': 4.75} + 30%|██▉ | 110320/371472 [8:46:21<23:01:50, 3.15it/s] 30%|██▉ | 110321/371472 [8:46:21<22:23:52, 3.24it/s] 30%|██▉ | 110322/371472 [8:46:21<20:58:44, 3.46it/s] 30%|██▉ | 110323/371472 [8:46:22<20:56:17, 3.46it/s] 30%|██▉ | 110324/371472 [8:46:22<20:36:59, 3.52it/s] 30%|██▉ | 110325/371472 [8:46:22<20:16:21, 3.58it/s] 30%|██▉ | 110326/371472 [8:46:23<19:25:58, 3.73it/s] 30%|██▉ | 110327/371472 [8:46:23<20:06:06, 3.61it/s] 30%|██▉ | 110328/371472 [8:46:23<21:04:23, 3.44it/s] 30%|██▉ | 110329/371472 [8:46:23<22:00:58, 3.29it/s] 30%|██▉ | 110330/371472 [8:46:24<24:05:59, 3.01it/s] 30%|██▉ | 110331/371472 [8:46:24<22:53:21, 3.17it/s] 30%|██▉ | 110332/371472 [8:46:24<22:31:50, 3.22it/s] 30%|██▉ | 110333/371472 [8:46:25<22:58:44, 3.16it/s] 30%|██▉ | 110334/371472 [8:46:25<21:56:17, 3.31it/s] 30%|██▉ | 110335/371472 [8:46:25<21:16:55, 3.41it/s] 30%|██▉ | 110336/371472 [8:46:26<20:34:07, 3.53it/s] 30%|██▉ | 110337/371472 [8:46:26<19:36:49, 3.70it/s] 30%|██▉ | 110338/371472 [8:46:26<19:36:42, 3.70it/s] 30%|██▉ | 110339/371472 [8:46:26<19:11:18, 3.78it/s] 30%|██▉ | 110340/371472 [8:46:27<18:38:55, 3.89it/s] {'loss': 3.3246, 'learning_rate': 7.330097610377298e-07, 'epoch': 4.75} + 30%|██▉ | 110340/371472 [8:46:27<18:38:55, 3.89it/s] 30%|██▉ | 110341/371472 [8:46:27<18:32:18, 3.91it/s] 30%|██▉ | 110342/371472 [8:46:27<19:19:11, 3.75it/s] 30%|██▉ | 110343/371472 [8:46:27<20:23:39, 3.56it/s] 30%|██▉ | 110344/371472 [8:46:28<19:46:07, 3.67it/s] 30%|██▉ | 110345/371472 [8:46:28<19:58:16, 3.63it/s] 30%|██▉ | 110346/371472 [8:46:28<19:26:14, 3.73it/s] 30%|██▉ | 110347/371472 [8:46:29<19:55:58, 3.64it/s] 30%|██▉ | 110348/371472 [8:46:29<19:46:17, 3.67it/s] 30%|██▉ | 110349/371472 [8:46:29<22:14:23, 3.26it/s] 30%|██▉ | 110350/371472 [8:46:29<20:42:46, 3.50it/s] 30%|██▉ | 110351/371472 [8:46:30<20:17:51, 3.57it/s] 30%|██▉ | 110352/371472 [8:46:30<19:57:35, 3.63it/s] 30%|██▉ | 110353/371472 [8:46:30<19:27:24, 3.73it/s] 30%|██▉ | 110354/371472 [8:46:30<19:03:51, 3.80it/s] 30%|██▉ | 110355/371472 [8:46:31<20:30:00, 3.54it/s] 30%|██▉ | 110356/371472 [8:46:31<20:04:09, 3.61it/s] 30%|██▉ | 110357/371472 [8:46:31<19:20:10, 3.75it/s] 30%|██▉ | 110358/371472 [8:46:32<19:40:56, 3.69it/s] 30%|██▉ | 110359/371472 [8:46:32<19:45:18, 3.67it/s] 30%|██▉ | 110360/371472 [8:46:32<19:52:10, 3.65it/s] {'loss': 3.3768, 'learning_rate': 7.329612790622508e-07, 'epoch': 4.75} + 30%|██▉ | 110360/371472 [8:46:32<19:52:10, 3.65it/s] 30%|██▉ | 110361/371472 [8:46:32<20:59:02, 3.46it/s] 30%|██▉ | 110362/371472 [8:46:33<20:39:13, 3.51it/s] 30%|██▉ | 110363/371472 [8:46:33<20:17:19, 3.57it/s] 30%|██▉ | 110364/371472 [8:46:33<20:23:16, 3.56it/s] 30%|██▉ | 110365/371472 [8:46:34<20:16:51, 3.58it/s] 30%|██▉ | 110366/371472 [8:46:34<19:52:31, 3.65it/s] 30%|██▉ | 110367/371472 [8:46:34<20:08:41, 3.60it/s] 30%|██▉ | 110368/371472 [8:46:34<20:24:44, 3.55it/s] 30%|██▉ | 110369/371472 [8:46:35<20:30:53, 3.54it/s] 30%|██▉ | 110370/371472 [8:46:35<20:07:04, 3.61it/s] 30%|██▉ | 110371/371472 [8:46:35<21:07:01, 3.43it/s] 30%|██▉ | 110372/371472 [8:46:36<20:43:11, 3.50it/s] 30%|██▉ | 110373/371472 [8:46:36<20:45:13, 3.49it/s] 30%|██▉ | 110374/371472 [8:46:36<20:47:47, 3.49it/s] 30%|██▉ | 110375/371472 [8:46:36<21:25:44, 3.38it/s] 30%|██▉ | 110376/371472 [8:46:37<21:08:34, 3.43it/s] 30%|██▉ | 110377/371472 [8:46:37<21:03:38, 3.44it/s] 30%|██▉ | 110378/371472 [8:46:37<21:01:12, 3.45it/s] 30%|██▉ | 110379/371472 [8:46:38<20:03:37, 3.62it/s] 30%|██▉ | 110380/371472 [8:46:38<19:30:17, 3.72it/s] {'loss': 3.3292, 'learning_rate': 7.329127970867719e-07, 'epoch': 4.75} + 30%|██▉ | 110380/371472 [8:46:38<19:30:17, 3.72it/s] 30%|██▉ | 110381/371472 [8:46:38<19:39:06, 3.69it/s] 30%|██▉ | 110382/371472 [8:46:38<19:47:08, 3.67it/s] 30%|██▉ | 110383/371472 [8:46:39<19:08:39, 3.79it/s] 30%|██▉ | 110384/371472 [8:46:39<20:06:16, 3.61it/s] 30%|██▉ | 110385/371472 [8:46:39<20:54:22, 3.47it/s] 30%|██▉ | 110386/371472 [8:46:39<20:01:45, 3.62it/s] 30%|██▉ | 110387/371472 [8:46:40<21:16:51, 3.41it/s] 30%|██▉ | 110388/371472 [8:46:40<21:21:55, 3.39it/s] 30%|██▉ | 110389/371472 [8:46:40<20:17:34, 3.57it/s] 30%|██▉ | 110390/371472 [8:46:41<22:01:09, 3.29it/s] 30%|██▉ | 110391/371472 [8:46:41<22:21:51, 3.24it/s] 30%|██▉ | 110392/371472 [8:46:41<22:15:59, 3.26it/s] 30%|██▉ | 110393/371472 [8:46:42<21:43:49, 3.34it/s] 30%|██▉ | 110394/371472 [8:46:42<21:27:54, 3.38it/s] 30%|██▉ | 110395/371472 [8:46:42<20:26:56, 3.55it/s] 30%|██▉ | 110396/371472 [8:46:42<19:37:59, 3.69it/s] 30%|██▉ | 110397/371472 [8:46:43<19:39:40, 3.69it/s] 30%|██▉ | 110398/371472 [8:46:43<19:53:28, 3.65it/s] 30%|██▉ | 110399/371472 [8:46:43<19:40:48, 3.68it/s] 30%|██▉ | 110400/371472 [8:46:43<19:56:03, 3.64it/s] {'loss': 3.496, 'learning_rate': 7.32864315111293e-07, 'epoch': 4.76} + 30%|██▉ | 110400/371472 [8:46:43<19:56:03, 3.64it/s] 30%|██▉ | 110401/371472 [8:46:44<21:00:14, 3.45it/s] 30%|██▉ | 110402/371472 [8:46:44<20:32:58, 3.53it/s] 30%|���█▉ | 110403/371472 [8:46:44<19:35:54, 3.70it/s] 30%|██▉ | 110404/371472 [8:46:45<20:04:56, 3.61it/s] 30%|██▉ | 110405/371472 [8:46:45<19:25:56, 3.73it/s] 30%|██▉ | 110406/371472 [8:46:45<20:53:54, 3.47it/s] 30%|██▉ | 110407/371472 [8:46:45<19:57:06, 3.63it/s] 30%|██▉ | 110408/371472 [8:46:46<20:16:28, 3.58it/s] 30%|██▉ | 110409/371472 [8:46:46<20:12:26, 3.59it/s] 30%|██▉ | 110410/371472 [8:46:46<19:45:47, 3.67it/s] 30%|██▉ | 110411/371472 [8:46:47<20:23:40, 3.56it/s] 30%|██▉ | 110412/371472 [8:46:47<20:50:17, 3.48it/s] 30%|██▉ | 110413/371472 [8:46:47<20:03:12, 3.62it/s] 30%|██▉ | 110414/371472 [8:46:47<20:24:52, 3.55it/s] 30%|██▉ | 110415/371472 [8:46:48<20:42:51, 3.50it/s] 30%|██▉ | 110416/371472 [8:46:48<20:15:24, 3.58it/s] 30%|██▉ | 110417/371472 [8:46:48<19:54:42, 3.64it/s] 30%|██▉ | 110418/371472 [8:46:48<19:27:32, 3.73it/s] 30%|██▉ | 110419/371472 [8:46:49<19:36:02, 3.70it/s] 30%|██▉ | 110420/371472 [8:46:49<18:58:00, 3.82it/s] {'loss': 3.3893, 'learning_rate': 7.328158331358142e-07, 'epoch': 4.76} + 30%|██▉ | 110420/371472 [8:46:49<18:58:00, 3.82it/s] 30%|██▉ | 110421/371472 [8:46:49<19:25:19, 3.73it/s] 30%|██▉ | 110422/371472 [8:46:50<19:48:18, 3.66it/s] 30%|██▉ | 110423/371472 [8:46:50<20:05:40, 3.61it/s] 30%|██▉ | 110424/371472 [8:46:50<21:03:26, 3.44it/s] 30%|██▉ | 110425/371472 [8:46:50<20:31:57, 3.53it/s] 30%|██▉ | 110426/371472 [8:46:51<20:02:49, 3.62it/s] 30%|██▉ | 110427/371472 [8:46:51<19:29:21, 3.72it/s] 30%|██▉ | 110428/371472 [8:46:51<20:50:36, 3.48it/s] 30%|██▉ | 110429/371472 [8:46:52<20:55:31, 3.47it/s] 30%|██▉ | 110430/371472 [8:46:52<20:19:54, 3.57it/s] 30%|██▉ | 110431/371472 [8:46:52<20:52:53, 3.47it/s] 30%|██▉ | 110432/371472 [8:46:52<20:17:20, 3.57it/s] 30%|██▉ | 110433/371472 [8:46:53<19:38:07, 3.69it/s] 30%|██▉ | 110434/371472 [8:46:53<19:03:35, 3.80it/s] 30%|██▉ | 110435/371472 [8:46:53<18:33:15, 3.91it/s] 30%|██▉ | 110436/371472 [8:46:53<18:35:27, 3.90it/s] 30%|██▉ | 110437/371472 [8:46:54<19:52:15, 3.65it/s] 30%|██▉ | 110438/371472 [8:46:54<19:46:30, 3.67it/s] 30%|██▉ | 110439/371472 [8:46:54<20:08:51, 3.60it/s] 30%|██▉ | 110440/371472 [8:46:55<20:26:38, 3.55it/s] {'loss': 3.3441, 'learning_rate': 7.327673511603352e-07, 'epoch': 4.76} + 30%|██▉ | 110440/371472 [8:46:55<20:26:38, 3.55it/s] 30%|██▉ | 110441/371472 [8:46:55<19:56:53, 3.63it/s] 30%|██▉ | 110442/371472 [8:46:55<19:03:41, 3.80it/s] 30%|██▉ | 110443/371472 [8:46:55<19:28:24, 3.72it/s] 30%|██▉ | 110444/371472 [8:46:56<19:30:59, 3.72it/s] 30%|██▉ | 110445/371472 [8:46:56<19:12:16, 3.78it/s] 30%|██▉ | 110446/371472 [8:46:56<20:20:52, 3.56it/s] 30%|██▉ | 110447/371472 [8:46:56<20:43:26, 3.50it/s] 30%|██▉ | 110448/371472 [8:46:57<20:15:45, 3.58it/s] 30%|██▉ | 110449/371472 [8:46:57<20:05:39, 3.61it/s] 30%|██▉ | 110450/371472 [8:46:57<19:43:01, 3.68it/s] 30%|██▉ | 110451/371472 [8:46:58<20:36:40, 3.52it/s] 30%|██▉ | 110452/371472 [8:46:58<20:41:28, 3.50it/s] 30%|██▉ | 110453/371472 [8:46:58<21:58:50, 3.30it/s] 30%|██▉ | 110454/371472 [8:46:59<23:00:46, 3.15it/s] 30%|██▉ | 110455/371472 [8:46:59<22:03:53, 3.29it/s] 30%|██▉ | 110456/371472 [8:46:59<20:57:24, 3.46it/s] 30%|██▉ | 110457/371472 [8:46:59<21:08:15, 3.43it/s] 30%|██▉ | 110458/371472 [8:47:00<20:05:52, 3.61it/s] 30%|██▉ | 110459/371472 [8:47:00<19:58:25, 3.63it/s] 30%|██▉ | 110460/371472 [8:47:00<19:42:02, 3.68it/s] {'loss': 3.4125, 'learning_rate': 7.327188691848563e-07, 'epoch': 4.76} + 30%|██▉ | 110460/371472 [8:47:00<19:42:02, 3.68it/s] 30%|██▉ | 110461/371472 [8:47:00<19:54:54, 3.64it/s] 30%|██▉ | 110462/371472 [8:47:01<19:25:37, 3.73it/s] 30%|██▉ | 110463/371472 [8:47:01<19:09:17, 3.79it/s] 30%|██▉ | 110464/371472 [8:47:01<19:13:41, 3.77it/s] 30%|██▉ | 110465/371472 [8:47:01<19:45:06, 3.67it/s] 30%|██▉ | 110466/371472 [8:47:02<21:34:13, 3.36it/s] 30%|██▉ | 110467/371472 [8:47:02<20:25:50, 3.55it/s] 30%|██▉ | 110468/371472 [8:47:02<19:50:17, 3.65it/s] 30%|██▉ | 110469/371472 [8:47:03<19:39:06, 3.69it/s] 30%|██▉ | 110470/371472 [8:47:03<19:35:24, 3.70it/s] 30%|██▉ | 110471/371472 [8:47:03<19:46:47, 3.67it/s] 30%|██▉ | 110472/371472 [8:47:03<19:47:42, 3.66it/s] 30%|██▉ | 110473/371472 [8:47:04<19:45:54, 3.67it/s] 30%|██▉ | 110474/371472 [8:47:04<20:00:55, 3.62it/s] 30%|██▉ | 110475/371472 [8:47:04<19:49:07, 3.66it/s] 30%|██▉ | 110476/371472 [8:47:05<19:39:09, 3.69it/s] 30%|██▉ | 110477/371472 [8:47:05<19:37:06, 3.70it/s] 30%|██▉ | 110478/371472 [8:47:05<19:26:18, 3.73it/s] 30%|██▉ | 110479/371472 [8:47:05<19:53:07, 3.65it/s] 30%|██▉ | 110480/371472 [8:47:06<20:17:47, 3.57it/s] {'loss': 3.4208, 'learning_rate': 7.326703872093774e-07, 'epoch': 4.76} + 30%|██▉ | 110480/371472 [8:47:06<20:17:47, 3.57it/s] 30%|██▉ | 110481/371472 [8:47:06<21:13:55, 3.41it/s] 30%|██▉ | 110482/371472 [8:47:06<20:21:47, 3.56it/s] 30%|██▉ | 110483/371472 [8:47:06<19:54:26, 3.64it/s] 30%|██▉ | 110484/371472 [8:47:07<19:45:54, 3.67it/s] 30%|██▉ | 110485/371472 [8:47:07<20:54:56, 3.47it/s] 30%|██▉ | 110486/371472 [8:47:07<21:30:55, 3.37it/s] 30%|██▉ | 110487/371472 [8:47:08<22:07:05, 3.28it/s] 30%|██▉ | 110488/371472 [8:47:08<22:35:42, 3.21it/s] 30%|██▉ | 110489/371472 [8:47:08<22:00:26, 3.29it/s] 30%|██▉ | 110490/371472 [8:47:09<21:33:13, 3.36it/s] 30%|██▉ | 110491/371472 [8:47:09<22:01:02, 3.29it/s] 30%|██▉ | 110492/371472 [8:47:09<21:30:49, 3.37it/s] 30%|██▉ | 110493/371472 [8:47:09<20:25:06, 3.55it/s] 30%|██▉ | 110494/371472 [8:47:10<20:38:18, 3.51it/s] 30%|██▉ | 110495/371472 [8:47:10<19:53:56, 3.64it/s] 30%|██▉ | 110496/371472 [8:47:10<20:51:23, 3.48it/s] 30%|██▉ | 110497/371472 [8:47:11<20:41:49, 3.50it/s] 30%|██▉ | 110498/371472 [8:47:11<21:06:32, 3.43it/s] 30%|██▉ | 110499/371472 [8:47:11<20:15:20, 3.58it/s] 30%|██▉ | 110500/371472 [8:47:11<20:50:46, 3.48it/s] {'loss': 3.4095, 'learning_rate': 7.326219052338985e-07, 'epoch': 4.76} + 30%|██▉ | 110500/371472 [8:47:11<20:50:46, 3.48it/s] 30%|██▉ | 110501/371472 [8:47:12<20:36:53, 3.52it/s] 30%|██▉ | 110502/371472 [8:47:12<22:50:44, 3.17it/s] 30%|██▉ | 110503/371472 [8:47:12<22:55:13, 3.16it/s] 30%|██▉ | 110504/371472 [8:47:13<22:37:22, 3.20it/s] 30%|██▉ | 110505/371472 [8:47:13<23:36:26, 3.07it/s] 30%|██▉ | 110506/371472 [8:47:13<22:38:41, 3.20it/s] 30%|██▉ | 110507/371472 [8:47:14<21:55:04, 3.31it/s] 30%|██▉ | 110508/371472 [8:47:14<22:06:14, 3.28it/s] 30%|██▉ | 110509/371472 [8:47:14<21:44:14, 3.33it/s] 30%|██▉ | 110510/371472 [8:47:15<21:14:56, 3.41it/s] 30%|██▉ | 110511/371472 [8:47:15<21:12:54, 3.42it/s] 30%|██▉ | 110512/371472 [8:47:15<20:38:57, 3.51it/s] 30%|██▉ | 110513/371472 [8:47:15<20:05:28, 3.61it/s] 30%|██▉ | 110514/371472 [8:47:16<19:22:05, 3.74it/s] 30%|██▉ | 110515/371472 [8:47:16<19:11:46, 3.78it/s] 30%|██▉ | 110516/371472 [8:47:16<19:49:01, 3.66it/s] 30%|██▉ | 110517/371472 [8:47:16<20:28:44, 3.54it/s] 30%|██▉ | 110518/371472 [8:47:17<20:24:12, 3.55it/s] 30%|██▉ | 110519/371472 [8:47:17<20:57:16, 3.46it/s] 30%|██▉ | 110520/371472 [8:47:17<21:15:10, 3.41it/s] {'loss': 3.3821, 'learning_rate': 7.325734232584197e-07, 'epoch': 4.76} + 30%|██▉ | 110520/371472 [8:47:17<21:15:10, 3.41it/s] 30%|██▉ | 110521/371472 [8:47:18<20:12:57, 3.59it/s] 30%|██▉ | 110522/371472 [8:47:18<19:35:02, 3.70it/s] 30%|██▉ | 110523/371472 [8:47:18<19:18:47, 3.75it/s] 30%|██▉ | 110524/371472 [8:47:18<19:40:02, 3.69it/s] 30%|██▉ | 110525/371472 [8:47:19<19:38:52, 3.69it/s] 30%|██▉ | 110526/371472 [8:47:19<20:33:02, 3.53it/s] 30%|██▉ | 110527/371472 [8:47:19<20:24:35, 3.55it/s] 30%|██▉ | 110528/371472 [8:47:20<20:10:25, 3.59it/s] 30%|██▉ | 110529/371472 [8:47:20<19:48:07, 3.66it/s] 30%|██▉ | 110530/371472 [8:47:20<22:04:58, 3.28it/s] 30%|██▉ | 110531/371472 [8:47:20<21:14:21, 3.41it/s] 30%|██▉ | 110532/371472 [8:47:21<20:41:01, 3.50it/s] 30%|██▉ | 110533/371472 [8:47:21<20:34:41, 3.52it/s] 30%|██▉ | 110534/371472 [8:47:21<21:31:32, 3.37it/s] 30%|██▉ | 110535/371472 [8:47:22<20:26:32, 3.55it/s] 30%|██▉ | 110536/371472 [8:47:22<20:58:16, 3.46it/s] 30%|██▉ | 110537/371472 [8:47:22<20:34:10, 3.52it/s] 30%|██▉ | 110538/371472 [8:47:22<19:43:11, 3.68it/s] 30%|██▉ | 110539/371472 [8:47:23<20:14:10, 3.58it/s] 30%|██▉ | 110540/371472 [8:47:23<20:38:13, 3.51it/s] {'loss': 3.5809, 'learning_rate': 7.325249412829408e-07, 'epoch': 4.76} + 30%|██▉ | 110540/371472 [8:47:23<20:38:13, 3.51it/s] 30%|██▉ | 110541/371472 [8:47:23<20:11:37, 3.59it/s] 30%|██▉ | 110542/371472 [8:47:23<20:09:42, 3.59it/s] 30%|██▉ | 110543/371472 [8:47:24<19:39:50, 3.69it/s] 30%|██▉ | 110544/371472 [8:47:24<20:09:44, 3.59it/s] 30%|██▉ | 110545/371472 [8:47:24<19:51:13, 3.65it/s] 30%|██▉ | 110546/371472 [8:47:25<19:44:17, 3.67it/s] 30%|██▉ | 110547/371472 [8:47:25<20:54:32, 3.47it/s] 30%|██▉ | 110548/371472 [8:47:25<20:14:39, 3.58it/s] 30%|██▉ | 110549/371472 [8:47:25<21:21:00, 3.39it/s] 30%|██▉ | 110550/371472 [8:47:26<20:48:51, 3.48it/s] 30%|██▉ | 110551/371472 [8:47:26<20:42:41, 3.50it/s] 30%|██▉ | 110552/371472 [8:47:26<21:09:14, 3.43it/s] 30%|██▉ | 110553/371472 [8:47:27<20:17:21, 3.57it/s] 30%|██▉ | 110554/371472 [8:47:27<20:17:00, 3.57it/s] 30%|██▉ | 110555/371472 [8:47:27<20:16:32, 3.57it/s] 30%|██▉ | 110556/371472 [8:47:27<19:44:08, 3.67it/s] 30%|██▉ | 110557/371472 [8:47:28<19:10:32, 3.78it/s] 30%|██▉ | 110558/371472 [8:47:28<19:09:35, 3.78it/s] 30%|██▉ | 110559/371472 [8:47:28<20:13:30, 3.58it/s] 30%|██▉ | 110560/371472 [8:47:29<21:01:21, 3.45it/s] {'loss': 3.3793, 'learning_rate': 7.324764593074619e-07, 'epoch': 4.76} + 30%|██▉ | 110560/371472 [8:47:29<21:01:21, 3.45it/s] 30%|██▉ | 110561/371472 [8:47:29<21:16:36, 3.41it/s] 30%|██▉ | 110562/371472 [8:47:29<20:07:46, 3.60it/s] 30%|██▉ | 110563/371472 [8:47:29<19:55:36, 3.64it/s] 30%|██▉ | 110564/371472 [8:47:30<19:26:23, 3.73it/s] 30%|██▉ | 110565/371472 [8:47:30<19:19:00, 3.75it/s] 30%|██▉ | 110566/371472 [8:47:30<19:07:07, 3.79it/s] 30%|██▉ | 110567/371472 [8:47:30<18:34:26, 3.90it/s] 30%|██▉ | 110568/371472 [8:47:31<19:10:48, 3.78it/s] 30%|██▉ | 110569/371472 [8:47:31<19:28:31, 3.72it/s] 30%|██▉ | 110570/371472 [8:47:31<20:50:44, 3.48it/s] 30%|██▉ | 110571/371472 [8:47:31<19:39:58, 3.69it/s] 30%|██▉ | 110572/371472 [8:47:32<19:26:01, 3.73it/s] 30%|██▉ | 110573/371472 [8:47:32<19:28:31, 3.72it/s] 30%|██▉ | 110574/371472 [8:47:32<19:25:30, 3.73it/s] 30%|██▉ | 110575/371472 [8:47:33<19:32:21, 3.71it/s] 30%|██▉ | 110576/371472 [8:47:33<19:29:59, 3.72it/s] 30%|██▉ | 110577/371472 [8:47:33<18:52:56, 3.84it/s] 30%|██▉ | 110578/371472 [8:47:33<18:38:52, 3.89it/s] 30%|██▉ | 110579/371472 [8:47:34<19:58:34, 3.63it/s] 30%|██▉ | 110580/371472 [8:47:34<19:29:26, 3.72it/s] {'loss': 3.4014, 'learning_rate': 7.324279773319829e-07, 'epoch': 4.76} + 30%|██▉ | 110580/371472 [8:47:34<19:29:26, 3.72it/s] 30%|██▉ | 110581/371472 [8:47:34<19:51:40, 3.65it/s] 30%|██▉ | 110582/371472 [8:47:34<20:08:13, 3.60it/s] 30%|██▉ | 110583/371472 [8:47:35<20:31:23, 3.53it/s] 30%|██▉ | 110584/371472 [8:47:35<20:14:02, 3.58it/s] 30%|██▉ | 110585/371472 [8:47:35<20:43:29, 3.50it/s] 30%|██▉ | 110586/371472 [8:47:36<20:00:44, 3.62it/s] 30%|██▉ | 110587/371472 [8:47:36<21:45:29, 3.33it/s] 30%|██▉ | 110588/371472 [8:47:36<20:42:49, 3.50it/s] 30%|██▉ | 110589/371472 [8:47:37<21:29:49, 3.37it/s] 30%|██▉ | 110590/371472 [8:47:37<21:05:28, 3.44it/s] 30%|██▉ | 110591/371472 [8:47:37<20:16:34, 3.57it/s] 30%|██▉ | 110592/371472 [8:47:37<20:48:36, 3.48it/s] 30%|██▉ | 110593/371472 [8:47:38<21:23:04, 3.39it/s] 30%|██▉ | 110594/371472 [8:47:38<21:22:43, 3.39it/s] 30%|██▉ | 110595/371472 [8:47:38<21:24:43, 3.38it/s] 30%|██▉ | 110596/371472 [8:47:39<21:15:03, 3.41it/s] 30%|██▉ | 110597/371472 [8:47:39<21:14:13, 3.41it/s] 30%|██▉ | 110598/371472 [8:47:39<20:54:29, 3.47it/s] 30%|██▉ | 110599/371472 [8:47:39<20:23:53, 3.55it/s] 30%|██▉ | 110600/371472 [8:47:40<19:48:03, 3.66it/s] {'loss': 3.3375, 'learning_rate': 7.323794953565041e-07, 'epoch': 4.76} + 30%|██▉ | 110600/371472 [8:47:40<19:48:03, 3.66it/s] 30%|██▉ | 110601/371472 [8:47:40<19:27:06, 3.73it/s] 30%|██�� | 110602/371472 [8:47:40<20:18:15, 3.57it/s] 30%|██▉ | 110603/371472 [8:47:40<20:34:39, 3.52it/s] 30%|██▉ | 110604/371472 [8:47:41<21:01:29, 3.45it/s] 30%|██▉ | 110605/371472 [8:47:41<20:30:17, 3.53it/s] 30%|██▉ | 110606/371472 [8:47:41<20:33:20, 3.53it/s] 30%|██▉ | 110607/371472 [8:47:42<20:28:34, 3.54it/s] 30%|██▉ | 110608/371472 [8:47:42<20:09:25, 3.59it/s] 30%|██▉ | 110609/371472 [8:47:42<21:16:23, 3.41it/s] 30%|██▉ | 110610/371472 [8:47:43<20:51:30, 3.47it/s] 30%|██▉ | 110611/371472 [8:47:43<20:40:45, 3.50it/s] 30%|██▉ | 110612/371472 [8:47:43<22:15:06, 3.26it/s] 30%|██▉ | 110613/371472 [8:47:43<22:26:38, 3.23it/s] 30%|██▉ | 110614/371472 [8:47:44<21:16:22, 3.41it/s] 30%|██▉ | 110615/371472 [8:47:44<20:53:41, 3.47it/s] 30%|██▉ | 110616/371472 [8:47:44<20:16:54, 3.57it/s] 30%|██▉ | 110617/371472 [8:47:45<19:41:08, 3.68it/s] 30%|██▉ | 110618/371472 [8:47:45<19:18:28, 3.75it/s] 30%|██▉ | 110619/371472 [8:47:45<19:06:45, 3.79it/s] 30%|██▉ | 110620/371472 [8:47:45<19:02:11, 3.81it/s] {'loss': 3.4503, 'learning_rate': 7.323310133810252e-07, 'epoch': 4.76} + 30%|██▉ | 110620/371472 [8:47:45<19:02:11, 3.81it/s] 30%|██▉ | 110621/371472 [8:47:46<20:19:58, 3.56it/s] 30%|██▉ | 110622/371472 [8:47:46<19:11:24, 3.78it/s] 30%|██▉ | 110623/371472 [8:47:46<20:27:12, 3.54it/s] 30%|██▉ | 110624/371472 [8:47:46<20:26:29, 3.54it/s] 30%|██▉ | 110625/371472 [8:47:47<19:19:53, 3.75it/s] 30%|██▉ | 110626/371472 [8:47:47<18:56:51, 3.82it/s] 30%|██▉ | 110627/371472 [8:47:47<19:22:05, 3.74it/s] 30%|██▉ | 110628/371472 [8:47:47<19:14:55, 3.76it/s] 30%|██▉ | 110629/371472 [8:47:48<19:21:05, 3.74it/s] 30%|██▉ | 110630/371472 [8:47:48<19:48:00, 3.66it/s] 30%|██▉ | 110631/371472 [8:47:48<20:15:51, 3.58it/s] 30%|██▉ | 110632/371472 [8:47:49<21:10:29, 3.42it/s] 30%|██▉ | 110633/371472 [8:47:49<21:28:52, 3.37it/s] 30%|██▉ | 110634/371472 [8:47:49<21:02:59, 3.44it/s] 30%|██▉ | 110635/371472 [8:47:50<21:33:09, 3.36it/s] 30%|██▉ | 110636/371472 [8:47:50<20:30:08, 3.53it/s] 30%|██▉ | 110637/371472 [8:47:50<20:45:29, 3.49it/s] 30%|██▉ | 110638/371472 [8:47:50<20:38:30, 3.51it/s] 30%|██▉ | 110639/371472 [8:47:51<19:45:43, 3.67it/s] 30%|██▉ | 110640/371472 [8:47:51<19:51:01, 3.65it/s] {'loss': 3.4172, 'learning_rate': 7.322825314055463e-07, 'epoch': 4.77} + 30%|██▉ | 110640/371472 [8:47:51<19:51:01, 3.65it/s] 30%|██▉ | 110641/371472 [8:47:51<19:26:30, 3.73it/s] 30%|██▉ | 110642/371472 [8:47:51<19:11:07, 3.78it/s] 30%|██▉ | 110643/371472 [8:47:52<19:29:20, 3.72it/s] 30%|██▉ | 110644/371472 [8:47:52<19:16:45, 3.76it/s] 30%|██▉ | 110645/371472 [8:47:52<19:06:58, 3.79it/s] 30%|██▉ | 110646/371472 [8:47:52<19:54:30, 3.64it/s] 30%|██▉ | 110647/371472 [8:47:53<19:27:05, 3.72it/s] 30%|██▉ | 110648/371472 [8:47:53<19:24:38, 3.73it/s] 30%|██▉ | 110649/371472 [8:47:53<19:07:36, 3.79it/s] 30%|██▉ | 110650/371472 [8:47:54<20:32:27, 3.53it/s] 30%|██▉ | 110651/371472 [8:47:54<20:58:25, 3.45it/s] 30%|██▉ | 110652/371472 [8:47:54<21:05:16, 3.44it/s] 30%|██▉ | 110653/371472 [8:47:54<21:05:58, 3.43it/s] 30%|██▉ | 110654/371472 [8:47:55<20:36:40, 3.52it/s] 30%|██▉ | 110655/371472 [8:47:55<20:20:49, 3.56it/s] 30%|██▉ | 110656/371472 [8:47:55<19:57:56, 3.63it/s] 30%|██▉ | 110657/371472 [8:47:56<19:54:50, 3.64it/s] 30%|██▉ | 110658/371472 [8:47:56<19:23:25, 3.74it/s] 30%|██▉ | 110659/371472 [8:47:56<19:21:26, 3.74it/s] 30%|██▉ | 110660/371472 [8:47:56<20:19:44, 3.56it/s] {'loss': 3.4392, 'learning_rate': 7.322340494300674e-07, 'epoch': 4.77} + 30%|██▉ | 110660/371472 [8:47:56<20:19:44, 3.56it/s] 30%|██▉ | 110661/371472 [8:47:57<20:11:16, 3.59it/s] 30%|██▉ | 110662/371472 [8:47:57<20:04:28, 3.61it/s] 30%|██▉ | 110663/371472 [8:47:57<20:24:02, 3.55it/s] 30%|██▉ | 110664/371472 [8:47:57<20:09:52, 3.59it/s] 30%|██▉ | 110665/371472 [8:47:58<20:08:32, 3.60it/s] 30%|██▉ | 110666/371472 [8:47:58<20:53:33, 3.47it/s] 30%|██▉ | 110667/371472 [8:47:58<20:54:41, 3.46it/s] 30%|██▉ | 110668/371472 [8:47:59<20:29:11, 3.54it/s] 30%|██▉ | 110669/371472 [8:47:59<19:43:30, 3.67it/s] 30%|██▉ | 110670/371472 [8:47:59<20:36:30, 3.52it/s] 30%|██▉ | 110671/371472 [8:48:00<21:43:47, 3.33it/s] 30%|██▉ | 110672/371472 [8:48:00<20:38:54, 3.51it/s] 30%|██▉ | 110673/371472 [8:48:00<20:42:04, 3.50it/s] 30%|██▉ | 110674/371472 [8:48:00<20:18:15, 3.57it/s] 30%|██▉ | 110675/371472 [8:48:01<20:04:19, 3.61it/s] 30%|██▉ | 110676/371472 [8:48:01<20:48:29, 3.48it/s] 30%|██▉ | 110677/371472 [8:48:01<22:58:03, 3.15it/s] 30%|██▉ | 110678/371472 [8:48:02<21:26:27, 3.38it/s] 30%|██▉ | 110679/371472 [8:48:02<21:18:00, 3.40it/s] 30%|██▉ | 110680/371472 [8:48:02<20:17:59, 3.57it/s] {'loss': 3.4987, 'learning_rate': 7.321855674545886e-07, 'epoch': 4.77} + 30%|██▉ | 110680/371472 [8:48:02<20:17:59, 3.57it/s] 30%|██▉ | 110681/371472 [8:48:02<20:41:47, 3.50it/s] 30%|██▉ | 110682/371472 [8:48:03<19:46:51, 3.66it/s] 30%|██▉ | 110683/371472 [8:48:03<20:27:30, 3.54it/s] 30%|██▉ | 110684/371472 [8:48:03<20:20:03, 3.56it/s] 30%|██▉ | 110685/371472 [8:48:03<19:50:14, 3.65it/s] 30%|██▉ | 110686/371472 [8:48:04<19:46:20, 3.66it/s] 30%|██▉ | 110687/371472 [8:48:04<20:45:44, 3.49it/s] 30%|██▉ | 110688/371472 [8:48:04<20:33:53, 3.52it/s] 30%|██▉ | 110689/371472 [8:48:05<19:53:41, 3.64it/s] 30%|██▉ | 110690/371472 [8:48:05<19:48:27, 3.66it/s] 30%|██▉ | 110691/371472 [8:48:05<19:46:25, 3.66it/s] 30%|██▉ | 110692/371472 [8:48:05<19:33:24, 3.70it/s] 30%|██▉ | 110693/371472 [8:48:06<23:22:45, 3.10it/s] 30%|██▉ | 110694/371472 [8:48:06<23:22:14, 3.10it/s] 30%|██▉ | 110695/371472 [8:48:06<22:06:59, 3.28it/s] 30%|██▉ | 110696/371472 [8:48:07<21:30:12, 3.37it/s] 30%|██▉ | 110697/371472 [8:48:07<22:31:11, 3.22it/s] 30%|██▉ | 110698/371472 [8:48:07<20:59:40, 3.45it/s] 30%|██▉ | 110699/371472 [8:48:08<20:07:12, 3.60it/s] 30%|██▉ | 110700/371472 [8:48:08<19:18:38, 3.75it/s] {'loss': 3.2126, 'learning_rate': 7.321370854791096e-07, 'epoch': 4.77} + 30%|██▉ | 110700/371472 [8:48:08<19:18:38, 3.75it/s] 30%|██▉ | 110701/371472 [8:48:08<18:46:20, 3.86it/s] 30%|██▉ | 110702/371472 [8:48:08<19:14:40, 3.76it/s] 30%|██▉ | 110703/371472 [8:48:09<18:40:08, 3.88it/s] 30%|██▉ | 110704/371472 [8:48:09<19:07:18, 3.79it/s] 30%|██▉ | 110705/371472 [8:48:09<19:44:51, 3.67it/s] 30%|██▉ | 110706/371472 [8:48:09<19:53:47, 3.64it/s] 30%|██▉ | 110707/371472 [8:48:10<21:29:46, 3.37it/s] 30%|██▉ | 110708/371472 [8:48:10<21:15:17, 3.41it/s] 30%|██▉ | 110709/371472 [8:48:10<22:11:22, 3.26it/s] 30%|██▉ | 110710/371472 [8:48:11<21:16:43, 3.40it/s] 30%|██▉ | 110711/371472 [8:48:11<20:33:19, 3.52it/s] 30%|██▉ | 110712/371472 [8:48:11<21:43:50, 3.33it/s] 30%|██▉ | 110713/371472 [8:48:12<22:49:03, 3.17it/s] 30%|██▉ | 110714/371472 [8:48:12<22:16:48, 3.25it/s] 30%|██▉ | 110715/371472 [8:48:12<22:30:44, 3.22it/s] 30%|██▉ | 110716/371472 [8:48:12<22:24:55, 3.23it/s] 30%|██▉ | 110717/371472 [8:48:13<21:59:27, 3.29it/s] 30%|██▉ | 110718/371472 [8:48:13<22:20:52, 3.24it/s] 30%|██▉ | 110719/371472 [8:48:13<22:03:19, 3.28it/s] 30%|██▉ | 110720/371472 [8:48:14<20:47:17, 3.48it/s] {'loss': 3.4632, 'learning_rate': 7.320886035036307e-07, 'epoch': 4.77} + 30%|██▉ | 110720/371472 [8:48:14<20:47:17, 3.48it/s] 30%|██▉ | 110721/371472 [8:48:14<20:08:07, 3.60it/s] 30%|██▉ | 110722/371472 [8:48:14<19:30:05, 3.71it/s] 30%|██▉ | 110723/371472 [8:48:14<19:38:40, 3.69it/s] 30%|██▉ | 110724/371472 [8:48:15<19:24:06, 3.73it/s] 30%|██▉ | 110725/371472 [8:48:15<19:52:17, 3.64it/s] 30%|██▉ | 110726/371472 [8:48:15<20:15:38, 3.57it/s] 30%|██▉ | 110727/371472 [8:48:16<19:44:43, 3.67it/s] 30%|██▉ | 110728/371472 [8:48:16<20:18:57, 3.57it/s] 30%|██▉ | 110729/371472 [8:48:16<20:16:39, 3.57it/s] 30%|██▉ | 110730/371472 [8:48:16<19:36:32, 3.69it/s] 30%|██▉ | 110731/371472 [8:48:17<20:41:32, 3.50it/s] 30%|██▉ | 110732/371472 [8:48:17<20:57:39, 3.46it/s] 30%|██▉ | 110733/371472 [8:48:17<24:13:42, 2.99it/s] 30%|██▉ | 110734/371472 [8:48:18<22:43:50, 3.19it/s] 30%|██▉ | 110735/371472 [8:48:18<22:03:22, 3.28it/s] 30%|██▉ | 110736/371472 [8:48:18<20:57:59, 3.45it/s] 30%|██▉ | 110737/371472 [8:48:19<22:21:21, 3.24it/s] 30%|██▉ | 110738/371472 [8:48:19<22:09:59, 3.27it/s] 30%|██▉ | 110739/371472 [8:48:19<21:06:53, 3.43it/s] 30%|██▉ | 110740/371472 [8:48:19<21:06:25, 3.43it/s] {'loss': 3.527, 'learning_rate': 7.320401215281518e-07, 'epoch': 4.77} + 30%|██▉ | 110740/371472 [8:48:19<21:06:25, 3.43it/s] 30%|██▉ | 110741/371472 [8:48:20<21:07:52, 3.43it/s] 30%|██▉ | 110742/371472 [8:48:20<20:44:56, 3.49it/s] 30%|██▉ | 110743/371472 [8:48:20<21:17:26, 3.40it/s] 30%|██▉ | 110744/371472 [8:48:21<20:34:12, 3.52it/s] 30%|██▉ | 110745/371472 [8:48:21<19:39:38, 3.68it/s] 30%|██▉ | 110746/371472 [8:48:21<19:40:21, 3.68it/s] 30%|██▉ | 110747/371472 [8:48:21<20:10:30, 3.59it/s] 30%|██▉ | 110748/371472 [8:48:22<20:00:49, 3.62it/s] 30%|██▉ | 110749/371472 [8:48:22<19:38:46, 3.69it/s] 30%|██▉ | 110750/371472 [8:48:22<20:54:23, 3.46it/s] 30%|██▉ | 110751/371472 [8:48:22<20:26:12, 3.54it/s] 30%|██▉ | 110752/371472 [8:48:23<20:36:15, 3.51it/s] 30%|██▉ | 110753/371472 [8:48:23<19:58:14, 3.63it/s] 30%|██▉ | 110754/371472 [8:48:23<19:35:55, 3.70it/s] 30%|██▉ | 110755/371472 [8:48:24<21:17:41, 3.40it/s] 30%|██▉ | 110756/371472 [8:48:24<20:34:04, 3.52it/s] 30%|██▉ | 110757/371472 [8:48:24<20:09:59, 3.59it/s] 30%|██▉ | 110758/371472 [8:48:24<19:41:00, 3.68it/s] 30%|██▉ | 110759/371472 [8:48:25<20:15:24, 3.58it/s] 30%|██▉ | 110760/371472 [8:48:25<21:02:29, 3.44it/s] {'loss': 3.5748, 'learning_rate': 7.31991639552673e-07, 'epoch': 4.77} + 30%|██▉ | 110760/371472 [8:48:25<21:02:29, 3.44it/s] 30%|██▉ | 110761/371472 [8:48:25<20:15:57, 3.57it/s] 30%|██▉ | 110762/371472 [8:48:26<20:06:53, 3.60it/s] 30%|██▉ | 110763/371472 [8:48:26<20:21:39, 3.56it/s] 30%|██▉ | 110764/371472 [8:48:26<21:34:44, 3.36it/s] 30%|██▉ | 110765/371472 [8:48:27<23:30:42, 3.08it/s] 30%|██▉ | 110766/371472 [8:48:27<22:12:55, 3.26it/s] 30%|██▉ | 110767/371472 [8:48:27<22:01:24, 3.29it/s] 30%|██▉ | 110768/371472 [8:48:27<21:45:47, 3.33it/s] 30%|██▉ | 110769/371472 [8:48:28<22:39:26, 3.20it/s] 30%|██▉ | 110770/371472 [8:48:28<21:47:20, 3.32it/s] 30%|██▉ | 110771/371472 [8:48:28<22:13:08, 3.26it/s] 30%|██▉ | 110772/371472 [8:48:29<22:20:55, 3.24it/s] 30%|██▉ | 110773/371472 [8:48:29<21:07:20, 3.43it/s] 30%|██▉ | 110774/371472 [8:48:29<21:13:21, 3.41it/s] 30%|██▉ | 110775/371472 [8:48:29<20:36:00, 3.52it/s] 30%|██▉ | 110776/371472 [8:48:30<20:15:32, 3.57it/s] 30%|██▉ | 110777/371472 [8:48:30<19:48:53, 3.65it/s] 30%|██▉ | 110778/371472 [8:48:30<19:13:03, 3.77it/s] 30%|██▉ | 110779/371472 [8:48:31<19:57:22, 3.63it/s] 30%|██▉ | 110780/371472 [8:48:31<20:17:44, 3.57it/s] {'loss': 3.5437, 'learning_rate': 7.319431575771941e-07, 'epoch': 4.77} + 30%|██▉ | 110780/371472 [8:48:31<20:17:44, 3.57it/s] 30%|██▉ | 110781/371472 [8:48:31<20:18:22, 3.57it/s] 30%|██▉ | 110782/371472 [8:48:31<20:12:35, 3.58it/s] 30%|██▉ | 110783/371472 [8:48:32<19:20:06, 3.75it/s] 30%|██▉ | 110784/371472 [8:48:32<19:51:24, 3.65it/s] 30%|██▉ | 110785/371472 [8:48:32<18:56:53, 3.82it/s] 30%|██▉ | 110786/371472 [8:48:32<19:18:17, 3.75it/s] 30%|██▉ | 110787/371472 [8:48:33<19:21:50, 3.74it/s] 30%|██▉ | 110788/371472 [8:48:33<19:55:12, 3.64it/s] 30%|██▉ | 110789/371472 [8:48:33<20:13:51, 3.58it/s] 30%|██▉ | 110790/371472 [8:48:34<19:58:57, 3.62it/s] 30%|██▉ | 110791/371472 [8:48:34<19:34:33, 3.70it/s] 30%|██▉ | 110792/371472 [8:48:34<19:29:15, 3.72it/s] 30%|██▉ | 110793/371472 [8:48:34<19:18:13, 3.75it/s] 30%|██▉ | 110794/371472 [8:48:35<20:10:15, 3.59it/s] 30%|██▉ | 110795/371472 [8:48:35<20:29:04, 3.53it/s] 30%|██▉ | 110796/371472 [8:48:35<20:40:16, 3.50it/s] 30%|██▉ | 110797/371472 [8:48:36<21:34:05, 3.36it/s] 30%|██▉ | 110798/371472 [8:48:36<20:48:24, 3.48it/s] 30%|██▉ | 110799/371472 [8:48:36<20:36:48, 3.51it/s] 30%|██▉ | 110800/371472 [8:48:36<20:33:03, 3.52it/s] {'loss': 3.3332, 'learning_rate': 7.318946756017151e-07, 'epoch': 4.77} + 30%|██▉ | 110800/371472 [8:48:36<20:33:03, 3.52it/s] 30%|██▉ | 110801/371472 [8:48:37<20:04:49, 3.61it/s] 30%|██▉ | 110802/371472 [8:48:37<19:37:31, 3.69it/s] 30%|██▉ | 110803/371472 [8:48:37<18:50:53, 3.84it/s] 30%|██▉ | 110804/371472 [8:48:38<20:44:49, 3.49it/s] 30%|██▉ | 110805/371472 [8:48:38<20:28:36, 3.54it/s] 30%|██▉ | 110806/371472 [8:48:38<23:01:12, 3.15it/s] 30%|██▉ | 110807/371472 [8:48:38<21:54:56, 3.30it/s] 30%|██▉ | 110808/371472 [8:48:39<21:40:41, 3.34it/s] 30%|██▉ | 110809/371472 [8:48:39<20:56:01, 3.46it/s] 30%|██▉ | 110810/371472 [8:48:39<20:26:00, 3.54it/s] 30%|██▉ | 110811/371472 [8:48:40<20:43:21, 3.49it/s] 30%|██▉ | 110812/371472 [8:48:40<21:16:23, 3.40it/s] 30%|██▉ | 110813/371472 [8:48:40<20:30:08, 3.53it/s] 30%|██▉ | 110814/371472 [8:48:40<21:24:06, 3.38it/s] 30%|██▉ | 110815/371472 [8:48:41<20:07:04, 3.60it/s] 30%|██▉ | 110816/371472 [8:48:41<20:02:59, 3.61it/s] 30%|██▉ | 110817/371472 [8:48:41<19:59:55, 3.62it/s] 30%|██▉ | 110818/371472 [8:48:42<20:09:58, 3.59it/s] 30%|██▉ | 110819/371472 [8:48:42<19:31:44, 3.71it/s] 30%|██▉ | 110820/371472 [8:48:42<19:49:10, 3.65it/s] {'loss': 3.5089, 'learning_rate': 7.318461936262363e-07, 'epoch': 4.77} + 30%|██▉ | 110820/371472 [8:48:42<19:49:10, 3.65it/s] 30%|██▉ | 110821/371472 [8:48:42<20:22:32, 3.55it/s] 30%|██▉ | 110822/371472 [8:48:43<19:56:44, 3.63it/s] 30%|██▉ | 110823/371472 [8:48:43<19:54:06, 3.64it/s] 30%|██▉ | 110824/371472 [8:48:43<19:41:38, 3.68it/s] 30%|██▉ | 110825/371472 [8:48:43<19:40:31, 3.68it/s] 30%|██▉ | 110826/371472 [8:48:44<19:23:29, 3.73it/s] 30%|██▉ | 110827/371472 [8:48:44<19:53:32, 3.64it/s] 30%|██▉ | 110828/371472 [8:48:44<19:25:06, 3.73it/s] 30%|██▉ | 110829/371472 [8:48:45<20:16:37, 3.57it/s] 30%|██▉ | 110830/371472 [8:48:45<19:44:56, 3.67it/s] 30%|██▉ | 110831/371472 [8:48:45<20:38:38, 3.51it/s] 30%|██▉ | 110832/371472 [8:48:45<22:24:14, 3.23it/s] 30%|██▉ | 110833/371472 [8:48:46<21:07:28, 3.43it/s] 30%|██▉ | 110834/371472 [8:48:46<20:17:17, 3.57it/s] 30%|██▉ | 110835/371472 [8:48:46<20:36:37, 3.51it/s] 30%|██▉ | 110836/371472 [8:48:47<20:31:20, 3.53it/s] 30%|██▉ | 110837/371472 [8:48:47<20:36:00, 3.51it/s] 30%|██▉ | 110838/371472 [8:48:47<20:10:42, 3.59it/s] 30%|██▉ | 110839/371472 [8:48:47<19:24:03, 3.73it/s] 30%|██▉ | 110840/371472 [8:48:48<19:38:08, 3.69it/s] {'loss': 3.2424, 'learning_rate': 7.317977116507574e-07, 'epoch': 4.77} + 30%|██▉ | 110840/371472 [8:48:48<19:38:08, 3.69it/s] 30%|██▉ | 110841/371472 [8:48:48<20:26:50, 3.54it/s] 30%|██▉ | 110842/371472 [8:48:48<19:48:14, 3.66it/s] 30%|██▉ | 110843/371472 [8:48:48<20:18:05, 3.57it/s] 30%|██▉ | 110844/371472 [8:48:49<19:40:06, 3.68it/s] 30%|██▉ | 110845/371472 [8:48:49<20:09:34, 3.59it/s] 30%|██▉ | 110846/371472 [8:48:49<20:13:41, 3.58it/s] 30%|██▉ | 110847/371472 [8:48:50<19:34:40, 3.70it/s] 30%|██▉ | 110848/371472 [8:48:50<19:56:23, 3.63it/s] 30%|██▉ | 110849/371472 [8:48:50<19:46:09, 3.66it/s] 30%|██▉ | 110850/371472 [8:48:50<19:38:50, 3.68it/s] 30%|██▉ | 110851/371472 [8:48:51<20:40:11, 3.50it/s] 30%|██▉ | 110852/371472 [8:48:51<20:38:36, 3.51it/s] 30%|██▉ | 110853/371472 [8:48:51<20:25:39, 3.54it/s] 30%|██▉ | 110854/371472 [8:48:52<20:22:45, 3.55it/s] 30%|██▉ | 110855/371472 [8:48:52<19:54:43, 3.64it/s] 30%|██▉ | 110856/371472 [8:48:52<20:06:42, 3.60it/s] 30%|██▉ | 110857/371472 [8:48:52<20:21:47, 3.56it/s] 30%|██▉ | 110858/371472 [8:48:53<19:58:08, 3.63it/s] 30%|██▉ | 110859/371472 [8:48:53<19:21:43, 3.74it/s] 30%|██▉ | 110860/371472 [8:48:53<18:46:11, 3.86it/s] {'loss': 3.4568, 'learning_rate': 7.317492296752786e-07, 'epoch': 4.77} + 30%|██▉ | 110860/371472 [8:48:53<18:46:11, 3.86it/s] 30%|██▉ | 110861/371472 [8:48:53<19:57:39, 3.63it/s] 30%|██▉ | 110862/371472 [8:48:54<19:21:46, 3.74it/s] 30%|██▉ | 110863/371472 [8:48:54<19:34:12, 3.70it/s] 30%|██▉ | 110864/371472 [8:48:54<20:02:02, 3.61it/s] 30%|██▉ | 110865/371472 [8:48:55<19:41:04, 3.68it/s] 30%|██▉ | 110866/371472 [8:48:55<19:35:37, 3.69it/s] 30%|██▉ | 110867/371472 [8:48:55<19:45:15, 3.66it/s] 30%|██▉ | 110868/371472 [8:48:55<20:18:12, 3.57it/s] 30%|██▉ | 110869/371472 [8:48:56<20:08:00, 3.60it/s] 30%|██▉ | 110870/371472 [8:48:56<20:14:22, 3.58it/s] 30%|██▉ | 110871/371472 [8:48:56<20:22:51, 3.55it/s] 30%|██▉ | 110872/371472 [8:48:56<20:00:02, 3.62it/s] 30%|██▉ | 110873/371472 [8:48:57<19:06:57, 3.79it/s] 30%|██▉ | 110874/371472 [8:48:57<18:50:38, 3.84it/s] 30%|██▉ | 110875/371472 [8:48:57<19:13:35, 3.76it/s] 30%|██▉ | 110876/371472 [8:48:58<19:18:31, 3.75it/s] 30%|██▉ | 110877/371472 [8:48:58<19:20:41, 3.74it/s] 30%|██▉ | 110878/371472 [8:48:58<19:59:28, 3.62it/s] 30%|██▉ | 110879/371472 [8:48:58<20:06:32, 3.60it/s] 30%|██▉ | 110880/371472 [8:48:59<20:22:34, 3.55it/s] {'loss': 3.2172, 'learning_rate': 7.317007476997995e-07, 'epoch': 4.78} + 30%|██▉ | 110880/371472 [8:48:59<20:22:34, 3.55it/s] 30%|██▉ | 110881/371472 [8:48:59<20:29:40, 3.53it/s] 30%|██▉ | 110882/371472 [8:48:59<20:15:51, 3.57it/s] 30%|██▉ | 110883/371472 [8:48:59<19:46:45, 3.66it/s] 30%|██▉ | 110884/371472 [8:49:00<19:19:06, 3.75it/s] 30%|██▉ | 110885/371472 [8:49:00<20:08:08, 3.59it/s] 30%|██▉ | 110886/371472 [8:49:00<21:48:04, 3.32it/s] 30%|██▉ | 110887/371472 [8:49:01<21:55:03, 3.30it/s] 30%|██▉ | 110888/371472 [8:49:01<23:57:19, 3.02it/s] 30%|██▉ | 110889/371472 [8:49:01<22:34:38, 3.21it/s] 30%|██▉ | 110890/371472 [8:49:02<21:50:29, 3.31it/s] 30%|██▉ | 110891/371472 [8:49:02<21:44:51, 3.33it/s] 30%|██▉ | 110892/371472 [8:49:02<21:21:50, 3.39it/s] 30%|██▉ | 110893/371472 [8:49:02<21:24:16, 3.38it/s] 30%|██▉ | 110894/371472 [8:49:03<20:17:33, 3.57it/s] 30%|██▉ | 110895/371472 [8:49:03<20:09:57, 3.59it/s] 30%|██▉ | 110896/371472 [8:49:03<20:08:39, 3.59it/s] 30%|██▉ | 110897/371472 [8:49:04<20:55:43, 3.46it/s] 30%|██▉ | 110898/371472 [8:49:04<20:38:10, 3.51it/s] 30%|██▉ | 110899/371472 [8:49:04<21:14:11, 3.41it/s] 30%|██▉ | 110900/371472 [8:49:04<20:09:00, 3.59it/s] {'loss': 3.3651, 'learning_rate': 7.316522657243207e-07, 'epoch': 4.78} + 30%|██▉ | 110900/371472 [8:49:04<20:09:00, 3.59it/s] 30%|██▉ | 110901/371472 [8:49:05<19:58:52, 3.62it/s] 30%|██▉ | 110902/371472 [8:49:05<19:35:40, 3.69it/s] 30%|██▉ | 110903/371472 [8:49:05<18:57:22, 3.82it/s] 30%|██▉ | 110904/371472 [8:49:06<19:42:59, 3.67it/s] 30%|██▉ | 110905/371472 [8:49:06<20:13:20, 3.58it/s] 30%|██▉ | 110906/371472 [8:49:06<21:17:17, 3.40it/s] 30%|██▉ | 110907/371472 [8:49:06<20:10:31, 3.59it/s] 30%|██▉ | 110908/371472 [8:49:07<20:10:48, 3.59it/s] 30%|██▉ | 110909/371472 [8:49:07<19:52:12, 3.64it/s] 30%|██▉ | 110910/371472 [8:49:07<19:21:03, 3.74it/s] 30%|██▉ | 110911/371472 [8:49:07<19:20:08, 3.74it/s] 30%|██▉ | 110912/371472 [8:49:08<21:02:54, 3.44it/s] 30%|██▉ | 110913/371472 [8:49:08<20:37:24, 3.51it/s] 30%|██▉ | 110914/371472 [8:49:08<21:19:51, 3.39it/s] 30%|██▉ | 110915/371472 [8:49:09<21:14:14, 3.41it/s] 30%|██▉ | 110916/371472 [8:49:09<21:28:52, 3.37it/s] 30%|██▉ | 110917/371472 [8:49:09<21:11:22, 3.42it/s] 30%|██▉ | 110918/371472 [8:49:10<21:50:22, 3.31it/s] 30%|██▉ | 110919/371472 [8:49:10<21:00:05, 3.45it/s] 30%|██▉ | 110920/371472 [8:49:10<20:47:06, 3.48it/s] {'loss': 3.3589, 'learning_rate': 7.316037837488418e-07, 'epoch': 4.78} + 30%|██▉ | 110920/371472 [8:49:10<20:47:06, 3.48it/s] 30%|██▉ | 110921/371472 [8:49:10<19:51:58, 3.64it/s] 30%|██▉ | 110922/371472 [8:49:11<19:30:11, 3.71it/s] 30%|██▉ | 110923/371472 [8:49:11<19:13:06, 3.77it/s] 30%|██▉ | 110924/371472 [8:49:11<18:48:33, 3.85it/s] 30%|██▉ | 110925/371472 [8:49:11<19:01:57, 3.80it/s] 30%|██▉ | 110926/371472 [8:49:12<19:49:05, 3.65it/s] 30%|██▉ | 110927/371472 [8:49:12<20:46:04, 3.48it/s] 30%|██▉ | 110928/371472 [8:49:12<20:17:27, 3.57it/s] 30%|██▉ | 110929/371472 [8:49:13<21:01:10, 3.44it/s] 30%|██▉ | 110930/371472 [8:49:13<20:28:23, 3.54it/s] 30%|██▉ | 110931/371472 [8:49:13<20:01:17, 3.61it/s] 30%|██▉ | 110932/371472 [8:49:14<23:54:36, 3.03it/s] 30%|██▉ | 110933/371472 [8:49:14<22:46:36, 3.18it/s] 30%|██▉ | 110934/371472 [8:49:14<21:56:14, 3.30it/s] 30%|██▉ | 110935/371472 [8:49:14<21:26:59, 3.37it/s] 30%|██▉ | 110936/371472 [8:49:15<21:03:25, 3.44it/s] 30%|██▉ | 110937/371472 [8:49:15<22:11:46, 3.26it/s] 30%|██▉ | 110938/371472 [8:49:15<21:15:46, 3.40it/s] 30%|██▉ | 110939/371472 [8:49:16<20:51:25, 3.47it/s] 30%|██▉ | 110940/371472 [8:49:16<19:45:16, 3.66it/s] {'loss': 3.3583, 'learning_rate': 7.315553017733629e-07, 'epoch': 4.78} + 30%|██▉ | 110940/371472 [8:49:16<19:45:16, 3.66it/s] 30%|██▉ | 110941/371472 [8:49:16<19:18:55, 3.75it/s] 30%|██▉ | 110942/371472 [8:49:16<21:12:52, 3.41it/s] 30%|██▉ | 110943/371472 [8:49:17<20:45:26, 3.49it/s] 30%|██▉ | 110944/371472 [8:49:17<19:56:34, 3.63it/s] 30%|██▉ | 110945/371472 [8:49:17<19:34:06, 3.70it/s] 30%|██▉ | 110946/371472 [8:49:17<19:24:22, 3.73it/s] 30%|██▉ | 110947/371472 [8:49:18<20:05:40, 3.60it/s] 30%|██▉ | 110948/371472 [8:49:18<21:36:32, 3.35it/s] 30%|██▉ | 110949/371472 [8:49:18<21:32:22, 3.36it/s] 30%|██▉ | 110950/371472 [8:49:19<20:24:36, 3.55it/s] 30%|██▉ | 110951/371472 [8:49:19<22:09:56, 3.26it/s] 30%|██▉ | 110952/371472 [8:49:19<21:06:50, 3.43it/s] 30%|██▉ | 110953/371472 [8:49:20<20:38:22, 3.51it/s] 30%|██▉ | 110954/371472 [8:49:20<20:19:11, 3.56it/s] 30%|██▉ | 110955/371472 [8:49:20<21:40:44, 3.34it/s] 30%|██▉ | 110956/371472 [8:49:20<21:02:49, 3.44it/s] 30%|██▉ | 110957/371472 [8:49:21<20:05:59, 3.60it/s] 30%|██▉ | 110958/371472 [8:49:21<19:28:13, 3.72it/s] 30%|██▉ | 110959/371472 [8:49:21<19:11:05, 3.77it/s] 30%|██▉ | 110960/371472 [8:49:21<18:50:41, 3.84it/s] {'loss': 3.1732, 'learning_rate': 7.315068197978839e-07, 'epoch': 4.78} + 30%|██▉ | 110960/371472 [8:49:21<18:50:41, 3.84it/s] 30%|██▉ | 110961/371472 [8:49:22<19:05:45, 3.79it/s] 30%|██▉ | 110962/371472 [8:49:22<22:31:42, 3.21it/s] 30%|██▉ | 110963/371472 [8:49:22<21:55:28, 3.30it/s] 30%|██▉ | 110964/371472 [8:49:23<22:33:14, 3.21it/s] 30%|██▉ | 110965/371472 [8:49:23<22:10:05, 3.26it/s] 30%|██▉ | 110966/371472 [8:49:23<22:10:15, 3.26it/s] 30%|██▉ | 110967/371472 [8:49:24<21:18:26, 3.40it/s] 30%|██▉ | 110968/371472 [8:49:24<20:32:41, 3.52it/s] 30%|██▉ | 110969/371472 [8:49:24<21:30:10, 3.37it/s] 30%|██▉ | 110970/371472 [8:49:24<21:02:28, 3.44it/s] 30%|██▉ | 110971/371472 [8:49:25<20:48:07, 3.48it/s] 30%|██▉ | 110972/371472 [8:49:25<20:54:55, 3.46it/s] 30%|██▉ | 110973/371472 [8:49:25<21:16:50, 3.40it/s] 30%|██▉ | 110974/371472 [8:49:26<21:31:22, 3.36it/s] 30%|██▉ | 110975/371472 [8:49:26<21:17:45, 3.40it/s] 30%|██▉ | 110976/371472 [8:49:26<22:00:36, 3.29it/s] 30%|██▉ | 110977/371472 [8:49:27<21:47:42, 3.32it/s] 30%|██▉ | 110978/371472 [8:49:27<21:16:55, 3.40it/s] 30%|██▉ | 110979/371472 [8:49:27<21:32:01, 3.36it/s] 30%|██▉ | 110980/371472 [8:49:27<20:34:57, 3.52it/s] {'loss': 3.4154, 'learning_rate': 7.314583378224051e-07, 'epoch': 4.78} + 30%|██▉ | 110980/371472 [8:49:27<20:34:57, 3.52it/s] 30%|██▉ | 110981/371472 [8:49:28<20:58:42, 3.45it/s] 30%|██▉ | 110982/371472 [8:49:28<22:02:34, 3.28it/s] 30%|██▉ | 110983/371472 [8:49:28<21:31:04, 3.36it/s] 30%|██▉ | 110984/371472 [8:49:29<20:49:31, 3.47it/s] 30%|██▉ | 110985/371472 [8:49:29<21:26:29, 3.37it/s] 30%|██▉ | 110986/371472 [8:49:29<24:53:56, 2.91it/s] 30%|██▉ | 110987/371472 [8:49:30<24:47:00, 2.92it/s] 30%|██▉ | 110988/371472 [8:49:30<23:47:50, 3.04it/s] 30%|██▉ | 110989/371472 [8:49:30<24:21:54, 2.97it/s] 30%|██▉ | 110990/371472 [8:49:31<22:21:42, 3.24it/s] 30%|██▉ | 110991/371472 [8:49:31<21:50:54, 3.31it/s] 30%|██▉ | 110992/371472 [8:49:31<20:37:21, 3.51it/s] 30%|██▉ | 110993/371472 [8:49:31<20:48:38, 3.48it/s] 30%|██▉ | 110994/371472 [8:49:32<20:17:31, 3.57it/s] 30%|██▉ | 110995/371472 [8:49:32<19:37:36, 3.69it/s] 30%|██▉ | 110996/371472 [8:49:32<19:10:22, 3.77it/s] 30%|██▉ | 110997/371472 [8:49:32<19:32:33, 3.70it/s] 30%|██▉ | 110998/371472 [8:49:33<19:10:37, 3.77it/s] 30%|██▉ | 110999/371472 [8:49:33<18:53:39, 3.83it/s] 30%|██▉ | 111000/371472 [8:49:33<18:40:24, 3.87it/s] {'loss': 3.5414, 'learning_rate': 7.314098558469262e-07, 'epoch': 4.78} + 30%|██▉ | 111000/371472 [8:49:33<18:40:24, 3.87it/s] 30%|██▉ | 111001/371472 [8:49:33<19:20:14, 3.74it/s] 30%|██▉ | 111002/371472 [8:49:34<19:04:34, 3.79it/s] 30%|██▉ | 111003/371472 [8:49:34<18:51:35, 3.84it/s] 30%|██▉ | 111004/371472 [8:49:34<19:39:10, 3.68it/s] 30%|██▉ | 111005/371472 [8:49:35<19:09:32, 3.78it/s] 30%|██▉ | 111006/371472 [8:49:35<20:07:53, 3.59it/s] 30%|██▉ | 111007/371472 [8:49:35<19:34:45, 3.70it/s] 30%|██▉ | 111008/371472 [8:49:35<19:32:40, 3.70it/s] 30%|██▉ | 111009/371472 [8:49:36<21:13:46, 3.41it/s] 30%|██▉ | 111010/371472 [8:49:36<21:18:08, 3.40it/s] 30%|██▉ | 111011/371472 [8:49:36<20:13:24, 3.58it/s] 30%|██▉ | 111012/371472 [8:49:37<19:53:20, 3.64it/s] 30%|██▉ | 111013/371472 [8:49:37<19:38:11, 3.68it/s] 30%|██▉ | 111014/371472 [8:49:37<19:00:12, 3.81it/s] 30%|██▉ | 111015/371472 [8:49:37<18:50:25, 3.84it/s] 30%|██▉ | 111016/371472 [8:49:38<19:04:16, 3.79it/s] 30%|██▉ | 111017/371472 [8:49:38<19:54:40, 3.63it/s] 30%|██▉ | 111018/371472 [8:49:38<20:27:59, 3.53it/s] 30%|██▉ | 111019/371472 [8:49:38<20:16:41, 3.57it/s] 30%|██▉ | 111020/371472 [8:49:39<19:36:04, 3.69it/s] {'loss': 3.4901, 'learning_rate': 7.313613738714473e-07, 'epoch': 4.78} + 30%|██▉ | 111020/371472 [8:49:39<19:36:04, 3.69it/s] 30%|██▉ | 111021/371472 [8:49:39<19:11:51, 3.77it/s] 30%|██▉ | 111022/371472 [8:49:39<20:51:04, 3.47it/s] 30%|██▉ | 111023/371472 [8:49:40<20:34:11, 3.52it/s] 30%|██▉ | 111024/371472 [8:49:40<23:35:46, 3.07it/s] 30%|██▉ | 111025/371472 [8:49:40<21:28:22, 3.37it/s] 30%|██▉ | 111026/371472 [8:49:40<20:48:52, 3.48it/s] 30%|██▉ | 111027/371472 [8:49:41<20:36:28, 3.51it/s] 30%|██▉ | 111028/371472 [8:49:41<20:36:28, 3.51it/s] 30%|██▉ | 111029/371472 [8:49:41<21:05:14, 3.43it/s] 30%|██▉ | 111030/371472 [8:49:42<20:20:01, 3.56it/s] 30%|██▉ | 111031/371472 [8:49:42<19:40:58, 3.68it/s] 30%|██▉ | 111032/371472 [8:49:42<19:33:37, 3.70it/s] 30%|██▉ | 111033/371472 [8:49:42<19:58:18, 3.62it/s] 30%|██▉ | 111034/371472 [8:49:43<20:59:03, 3.45it/s] 30%|██▉ | 111035/371472 [8:49:43<21:50:24, 3.31it/s] 30%|██▉ | 111036/371472 [8:49:43<21:51:36, 3.31it/s] 30%|██▉ | 111037/371472 [8:49:44<22:52:02, 3.16it/s] 30%|██▉ | 111038/371472 [8:49:44<21:56:59, 3.30it/s] 30%|██▉ | 111039/371472 [8:49:44<21:13:41, 3.41it/s] 30%|██▉ | 111040/371472 [8:49:45<20:33:20, 3.52it/s] {'loss': 3.3706, 'learning_rate': 7.313128918959684e-07, 'epoch': 4.78} + 30%|██▉ | 111040/371472 [8:49:45<20:33:20, 3.52it/s] 30%|██▉ | 111041/371472 [8:49:45<20:01:36, 3.61it/s] 30%|██▉ | 111042/371472 [8:49:45<19:23:26, 3.73it/s] 30%|██▉ | 111043/371472 [8:49:45<19:52:23, 3.64it/s] 30%|██▉ | 111044/371472 [8:49:46<20:02:38, 3.61it/s] 30%|██▉ | 111045/371472 [8:49:46<20:42:56, 3.49it/s] 30%|██▉ | 111046/371472 [8:49:46<26:32:53, 2.72it/s] 30%|██▉ | 111047/371472 [8:49:47<24:31:00, 2.95it/s] 30%|██▉ | 111048/371472 [8:49:47<24:32:08, 2.95it/s] 30%|██▉ | 111049/371472 [8:49:47<23:03:55, 3.14it/s] 30%|██▉ | 111050/371472 [8:49:48<21:45:52, 3.32it/s] 30%|██▉ | 111051/371472 [8:49:48<21:09:16, 3.42it/s] 30%|██▉ | 111052/371472 [8:49:48<22:09:10, 3.27it/s] 30%|██▉ | 111053/371472 [8:49:48<20:42:59, 3.49it/s] 30%|██▉ | 111054/371472 [8:49:49<19:49:15, 3.65it/s] 30%|██▉ | 111055/371472 [8:49:49<19:49:42, 3.65it/s] 30%|██▉ | 111056/371472 [8:49:49<19:13:08, 3.76it/s] 30%|██▉ | 111057/371472 [8:49:50<20:16:41, 3.57it/s] 30%|██▉ | 111058/371472 [8:49:50<20:03:03, 3.61it/s] 30%|██▉ | 111059/371472 [8:49:50<19:52:10, 3.64it/s] 30%|██▉ | 111060/371472 [8:49:50<20:32:45, 3.52it/s] {'loss': 3.2446, 'learning_rate': 7.312644099204896e-07, 'epoch': 4.78} + 30%|██▉ | 111060/371472 [8:49:50<20:32:45, 3.52it/s] 30%|██▉ | 111061/371472 [8:49:51<20:17:25, 3.57it/s] 30%|██▉ | 111062/371472 [8:49:51<20:40:20, 3.50it/s] 30%|██▉ | 111063/371472 [8:49:51<19:39:59, 3.68it/s] 30%|██▉ | 111064/371472 [8:49:51<19:07:54, 3.78it/s] 30%|██▉ | 111065/371472 [8:49:52<18:55:25, 3.82it/s] 30%|██▉ | 111066/371472 [8:49:52<19:12:05, 3.77it/s] 30%|██▉ | 111067/371472 [8:49:52<19:30:02, 3.71it/s] 30%|██▉ | 111068/371472 [8:49:53<19:14:28, 3.76it/s] 30%|██▉ | 111069/371472 [8:49:53<18:58:18, 3.81it/s] 30%|██▉ | 111070/371472 [8:49:53<19:48:04, 3.65it/s] 30%|██▉ | 111071/371472 [8:49:53<19:01:32, 3.80it/s] 30%|██▉ | 111072/371472 [8:49:54<20:38:44, 3.50it/s] 30%|██▉ | 111073/371472 [8:49:54<20:23:40, 3.55it/s] 30%|██▉ | 111074/371472 [8:49:54<20:22:29, 3.55it/s] 30%|██▉ | 111075/371472 [8:49:54<20:47:39, 3.48it/s] 30%|██▉ | 111076/371472 [8:49:55<20:46:59, 3.48it/s] 30%|██▉ | 111077/371472 [8:49:55<20:21:35, 3.55it/s] 30%|██▉ | 111078/371472 [8:49:55<19:43:31, 3.67it/s] 30%|██▉ | 111079/371472 [8:49:56<21:20:54, 3.39it/s] 30%|██▉ | 111080/371472 [8:49:56<20:31:57, 3.52it/s] {'loss': 3.5023, 'learning_rate': 7.312159279450106e-07, 'epoch': 4.78} + 30%|██▉ | 111080/371472 [8:49:56<20:31:57, 3.52it/s] 30%|██▉ | 111081/371472 [8:49:56<19:30:06, 3.71it/s] 30%|██▉ | 111082/371472 [8:49:56<19:04:40, 3.79it/s] 30%|██▉ | 111083/371472 [8:49:57<20:40:06, 3.50it/s] 30%|██▉ | 111084/371472 [8:49:57<20:17:04, 3.57it/s] 30%|██▉ | 111085/371472 [8:49:57<20:46:22, 3.48it/s] 30%|██▉ | 111086/371472 [8:49:58<19:44:40, 3.66it/s] 30%|██▉ | 111087/371472 [8:49:58<19:21:55, 3.73it/s] 30%|██▉ | 111088/371472 [8:49:58<18:50:30, 3.84it/s] 30%|██▉ | 111089/371472 [8:49:58<19:54:43, 3.63it/s] 30%|██▉ | 111090/371472 [8:49:59<19:38:47, 3.68it/s] 30%|██▉ | 111091/371472 [8:49:59<20:33:32, 3.52it/s] 30%|██▉ | 111092/371472 [8:49:59<19:52:46, 3.64it/s] 30%|██▉ | 111093/371472 [8:49:59<19:32:30, 3.70it/s] 30%|██▉ | 111094/371472 [8:50:00<19:38:15, 3.68it/s] 30%|██▉ | 111095/371472 [8:50:00<20:58:39, 3.45it/s] 30%|██▉ | 111096/371472 [8:50:00<21:59:41, 3.29it/s] 30%|██▉ | 111097/371472 [8:50:01<25:01:59, 2.89it/s] 30%|██▉ | 111098/371472 [8:50:01<23:42:10, 3.05it/s] 30%|██▉ | 111099/371472 [8:50:01<21:54:44, 3.30it/s] 30%|██▉ | 111100/371472 [8:50:02<21:16:40, 3.40it/s] {'loss': 3.4189, 'learning_rate': 7.311674459695316e-07, 'epoch': 4.79} + 30%|██▉ | 111100/371472 [8:50:02<21:16:40, 3.40it/s] 30%|██▉ | 111101/371472 [8:50:02<23:36:28, 3.06it/s] 30%|██▉ | 111102/371472 [8:50:02<23:06:11, 3.13it/s] 30%|██▉ | 111103/371472 [8:50:03<23:14:38, 3.11it/s] 30%|██▉ | 111104/371472 [8:50:03<22:22:04, 3.23it/s] 30%|██▉ | 111105/371472 [8:50:03<21:16:22, 3.40it/s] 30%|██▉ | 111106/371472 [8:50:03<20:23:52, 3.55it/s] 30%|██▉ | 111107/371472 [8:50:04<19:41:35, 3.67it/s] 30%|██▉ | 111108/371472 [8:50:04<19:58:30, 3.62it/s] 30%|██▉ | 111109/371472 [8:50:04<19:50:38, 3.64it/s] 30%|██▉ | 111110/371472 [8:50:05<20:32:45, 3.52it/s] 30%|██▉ | 111111/371472 [8:50:05<20:07:05, 3.59it/s] 30%|██▉ | 111112/371472 [8:50:05<24:57:33, 2.90it/s] 30%|██▉ | 111113/371472 [8:50:06<25:36:49, 2.82it/s] 30%|██▉ | 111114/371472 [8:50:06<23:22:48, 3.09it/s] 30%|██▉ | 111115/371472 [8:50:06<25:15:09, 2.86it/s] 30%|██▉ | 111116/371472 [8:50:07<23:46:46, 3.04it/s] 30%|██▉ | 111117/371472 [8:50:07<24:39:53, 2.93it/s] 30%|██▉ | 111118/371472 [8:50:07<24:06:00, 3.00it/s] 30%|██▉ | 111119/371472 [8:50:08<24:10:56, 2.99it/s] 30%|██▉ | 111120/371472 [8:50:08<22:44:33, 3.18it/s] {'loss': 3.2795, 'learning_rate': 7.311189639940528e-07, 'epoch': 4.79} + 30%|██▉ | 111120/371472 [8:50:08<22:44:33, 3.18it/s] 30%|██▉ | 111121/371472 [8:50:08<22:04:17, 3.28it/s] 30%|██▉ | 111122/371472 [8:50:09<21:25:52, 3.37it/s] 30%|██▉ | 111123/371472 [8:50:09<22:07:58, 3.27it/s] 30%|██▉ | 111124/371472 [8:50:09<21:52:59, 3.30it/s] 30%|██▉ | 111125/371472 [8:50:09<22:17:05, 3.25it/s] 30%|██▉ | 111126/371472 [8:50:10<21:32:04, 3.36it/s] 30%|██▉ | 111127/371472 [8:50:10<23:45:06, 3.04it/s] 30%|██▉ | 111128/371472 [8:50:10<22:07:04, 3.27it/s] 30%|██▉ | 111129/371472 [8:50:11<22:06:14, 3.27it/s] 30%|██▉ | 111130/371472 [8:50:11<22:29:57, 3.21it/s] 30%|██▉ | 111131/371472 [8:50:11<21:57:37, 3.29it/s] 30%|██▉ | 111132/371472 [8:50:12<21:17:51, 3.40it/s] 30%|██▉ | 111133/371472 [8:50:12<20:46:47, 3.48it/s] 30%|██▉ | 111134/371472 [8:50:12<19:53:13, 3.64it/s] 30%|██��� | 111135/371472 [8:50:12<19:25:02, 3.72it/s] 30%|██▉ | 111136/371472 [8:50:13<20:07:36, 3.59it/s] 30%|██▉ | 111137/371472 [8:50:13<19:38:37, 3.68it/s] 30%|██▉ | 111138/371472 [8:50:13<19:48:58, 3.65it/s] 30%|██▉ | 111139/371472 [8:50:13<19:42:20, 3.67it/s] 30%|██▉ | 111140/371472 [8:50:14<19:51:46, 3.64it/s] {'loss': 3.2671, 'learning_rate': 7.31070482018574e-07, 'epoch': 4.79} + 30%|██▉ | 111140/371472 [8:50:14<19:51:46, 3.64it/s] 30%|██▉ | 111141/371472 [8:50:14<19:44:45, 3.66it/s] 30%|██▉ | 111142/371472 [8:50:14<19:36:29, 3.69it/s] 30%|██▉ | 111143/371472 [8:50:14<18:53:39, 3.83it/s] 30%|██▉ | 111144/371472 [8:50:15<19:20:19, 3.74it/s] 30%|██▉ | 111145/371472 [8:50:15<19:07:09, 3.78it/s] 30%|██▉ | 111146/371472 [8:50:15<19:01:53, 3.80it/s] 30%|██▉ | 111147/371472 [8:50:16<19:35:08, 3.69it/s] 30%|██▉ | 111148/371472 [8:50:16<19:37:40, 3.68it/s] 30%|██▉ | 111149/371472 [8:50:16<19:30:50, 3.71it/s] 30%|██▉ | 111150/371472 [8:50:16<19:43:27, 3.67it/s] 30%|██▉ | 111151/371472 [8:50:17<19:50:08, 3.65it/s] 30%|██▉ | 111152/371472 [8:50:17<19:23:27, 3.73it/s] 30%|██▉ | 111153/371472 [8:50:17<19:14:01, 3.76it/s] 30%|██▉ | 111154/371472 [8:50:17<19:25:05, 3.72it/s] 30%|██▉ | 111155/371472 [8:50:18<20:26:00, 3.54it/s] 30%|██▉ | 111156/371472 [8:50:18<20:49:42, 3.47it/s] 30%|██▉ | 111157/371472 [8:50:18<20:50:42, 3.47it/s] 30%|██▉ | 111158/371472 [8:50:19<20:13:32, 3.58it/s] 30%|██▉ | 111159/371472 [8:50:19<21:36:00, 3.35it/s] 30%|██▉ | 111160/371472 [8:50:19<20:40:42, 3.50it/s] {'loss': 3.2637, 'learning_rate': 7.310220000430951e-07, 'epoch': 4.79} + 30%|██▉ | 111160/371472 [8:50:19<20:40:42, 3.50it/s] 30%|██▉ | 111161/371472 [8:50:19<19:57:34, 3.62it/s] 30%|██▉ | 111162/371472 [8:50:20<19:49:07, 3.65it/s] 30%|██▉ | 111163/371472 [8:50:20<19:30:04, 3.71it/s] 30%|██▉ | 111164/371472 [8:50:20<19:56:49, 3.62it/s] 30%|██▉ | 111165/371472 [8:50:21<20:21:42, 3.55it/s] 30%|██▉ | 111166/371472 [8:50:21<21:39:28, 3.34it/s] 30%|██▉ | 111167/371472 [8:50:21<20:40:29, 3.50it/s] 30%|██▉ | 111168/371472 [8:50:21<20:35:25, 3.51it/s] 30%|██▉ | 111169/371472 [8:50:22<22:20:06, 3.24it/s] 30%|██▉ | 111170/371472 [8:50:22<21:27:34, 3.37it/s] 30%|██▉ | 111171/371472 [8:50:22<20:41:13, 3.50it/s] 30%|██▉ | 111172/371472 [8:50:23<20:02:45, 3.61it/s] 30%|██▉ | 111173/371472 [8:50:23<19:48:06, 3.65it/s] 30%|██▉ | 111174/371472 [8:50:23<19:13:04, 3.76it/s] 30%|██▉ | 111175/371472 [8:50:23<20:22:19, 3.55it/s] 30%|██▉ | 111176/371472 [8:50:24<20:09:51, 3.59it/s] 30%|██▉ | 111177/371472 [8:50:24<19:13:01, 3.76it/s] 30%|██▉ | 111178/371472 [8:50:24<20:23:57, 3.54it/s] 30%|██▉ | 111179/371472 [8:50:25<20:09:00, 3.59it/s] 30%|██▉ | 111180/371472 [8:50:25<21:06:11, 3.43it/s] {'loss': 3.2527, 'learning_rate': 7.309735180676161e-07, 'epoch': 4.79} + 30%|██▉ | 111180/371472 [8:50:25<21:06:11, 3.43it/s] 30%|██▉ | 111181/371472 [8:50:25<20:10:17, 3.58it/s] 30%|██▉ | 111182/371472 [8:50:25<20:02:42, 3.61it/s] 30%|██▉ | 111183/371472 [8:50:26<19:43:02, 3.67it/s] 30%|██▉ | 111184/371472 [8:50:26<19:14:07, 3.76it/s] 30%|██▉ | 111185/371472 [8:50:26<19:04:52, 3.79it/s] 30%|██▉ | 111186/371472 [8:50:26<19:24:24, 3.73it/s] 30%|██▉ | 111187/371472 [8:50:27<21:19:19, 3.39it/s] 30%|██▉ | 111188/371472 [8:50:27<20:32:03, 3.52it/s] 30%|██▉ | 111189/371472 [8:50:27<20:19:40, 3.56it/s] 30%|██▉ | 111190/371472 [8:50:28<20:29:53, 3.53it/s] 30%|██▉ | 111191/371472 [8:50:28<20:17:01, 3.56it/s] 30%|██▉ | 111192/371472 [8:50:28<20:39:11, 3.50it/s] 30%|██▉ | 111193/371472 [8:50:28<20:03:19, 3.60it/s] 30%|██▉ | 111194/371472 [8:50:29<19:39:36, 3.68it/s] 30%|██▉ | 111195/371472 [8:50:29<19:40:28, 3.67it/s] 30%|██▉ | 111196/371472 [8:50:29<20:31:45, 3.52it/s] 30%|██▉ | 111197/371472 [8:50:30<19:39:11, 3.68it/s] 30%|██▉ | 111198/371472 [8:50:30<20:27:07, 3.54it/s] 30%|██▉ | 111199/371472 [8:50:30<20:28:46, 3.53it/s] 30%|██▉ | 111200/371472 [8:50:30<20:10:05, 3.58it/s] {'loss': 3.4921, 'learning_rate': 7.309250360921373e-07, 'epoch': 4.79} + 30%|██▉ | 111200/371472 [8:50:30<20:10:05, 3.58it/s] 30%|██▉ | 111201/371472 [8:50:31<20:16:32, 3.57it/s] 30%|██▉ | 111202/371472 [8:50:31<21:57:07, 3.29it/s] 30%|██▉ | 111203/371472 [8:50:31<21:39:02, 3.34it/s] 30%|██▉ | 111204/371472 [8:50:32<21:43:53, 3.33it/s] 30%|██▉ | 111205/371472 [8:50:32<21:34:01, 3.35it/s] 30%|██▉ | 111206/371472 [8:50:32<21:58:16, 3.29it/s] 30%|██▉ | 111207/371472 [8:50:33<22:42:32, 3.18it/s] 30%|██▉ | 111208/371472 [8:50:33<21:22:56, 3.38it/s] 30%|██▉ | 111209/371472 [8:50:33<20:50:21, 3.47it/s] 30%|██▉ | 111210/371472 [8:50:33<20:26:33, 3.54it/s] 30%|██▉ | 111211/371472 [8:50:34<21:02:34, 3.44it/s] 30%|██▉ | 111212/371472 [8:50:34<20:09:50, 3.59it/s] 30%|██▉ | 111213/371472 [8:50:34<20:08:06, 3.59it/s] 30%|██▉ | 111214/371472 [8:50:34<19:34:43, 3.69it/s] 30%|██▉ | 111215/371472 [8:50:35<19:22:27, 3.73it/s] 30%|██▉ | 111216/371472 [8:50:35<20:05:11, 3.60it/s] 30%|██▉ | 111217/371472 [8:50:35<20:20:14, 3.55it/s] 30%|██▉ | 111218/371472 [8:50:36<21:06:32, 3.42it/s] 30%|██▉ | 111219/371472 [8:50:36<20:56:22, 3.45it/s] 30%|██▉ | 111220/371472 [8:50:36<21:37:20, 3.34it/s] {'loss': 3.5235, 'learning_rate': 7.308765541166584e-07, 'epoch': 4.79} + 30%|██▉ | 111220/371472 [8:50:36<21:37:20, 3.34it/s] 30%|██▉ | 111221/371472 [8:50:37<20:42:06, 3.49it/s] 30%|██▉ | 111222/371472 [8:50:37<20:47:11, 3.48it/s] 30%|██▉ | 111223/371472 [8:50:37<20:22:11, 3.55it/s] 30%|██▉ | 111224/371472 [8:50:37<20:10:56, 3.58it/s] 30%|██▉ | 111225/371472 [8:50:38<19:56:32, 3.62it/s] 30%|██▉ | 111226/371472 [8:50:38<20:26:15, 3.54it/s] 30%|██▉ | 111227/371472 [8:50:38<20:43:58, 3.49it/s] 30%|██▉ | 111228/371472 [8:50:38<20:28:36, 3.53it/s] 30%|██▉ | 111229/371472 [8:50:39<20:13:12, 3.58it/s] 30%|██▉ | 111230/371472 [8:50:39<19:50:52, 3.64it/s] 30%|██▉ | 111231/371472 [8:50:39<20:10:05, 3.58it/s] 30%|██▉ | 111232/371472 [8:50:40<22:16:11, 3.25it/s] 30%|██▉ | 111233/371472 [8:50:40<21:49:57, 3.31it/s] 30%|██▉ | 111234/371472 [8:50:40<21:16:17, 3.40it/s] 30%|██▉ | 111235/371472 [8:50:41<21:33:39, 3.35it/s] 30%|██▉ | 111236/371472 [8:50:41<20:53:09, 3.46it/s] 30%|██▉ | 111237/371472 [8:50:41<20:19:45, 3.56it/s] 30%|██▉ | 111238/371472 [8:50:41<20:48:37, 3.47it/s] 30%|██▉ | 111239/371472 [8:50:42<20:21:33, 3.55it/s] 30%|██▉ | 111240/371472 [8:50:42<19:34:46, 3.69it/s] {'loss': 3.3067, 'learning_rate': 7.308280721411795e-07, 'epoch': 4.79} + 30%|██▉ | 111240/371472 [8:50:42<19:34:46, 3.69it/s] 30%|██▉ | 111241/371472 [8:50:42<23:05:16, 3.13it/s] 30%|██▉ | 111242/371472 [8:50:43<21:55:52, 3.30it/s] 30%|██▉ | 111243/371472 [8:50:43<21:41:37, 3.33it/s] 30%|██▉ | 111244/371472 [8:50:43<20:52:35, 3.46it/s] 30%|██▉ | 111245/371472 [8:50:43<19:48:56, 3.65it/s] 30%|██▉ | 111246/371472 [8:50:44<19:13:43, 3.76it/s] 30%|██▉ | 111247/371472 [8:50:44<19:48:10, 3.65it/s] 30%|██▉ | 111248/371472 [8:50:44<22:25:34, 3.22it/s] 30%|██▉ | 111249/371472 [8:50:45<21:25:54, 3.37it/s] 30%|██▉ | 111250/371472 [8:50:45<21:33:05, 3.35it/s] 30%|██▉ | 111251/371472 [8:50:45<23:38:57, 3.06it/s] 30%|██▉ | 111252/371472 [8:50:46<24:46:01, 2.92it/s] 30%|██▉ | 111253/371472 [8:50:46<22:56:19, 3.15it/s] 30%|██▉ | 111254/371472 [8:50:46<21:24:27, 3.38it/s] 30%|██▉ | 111255/371472 [8:50:46<20:56:53, 3.45it/s] 30%|██▉ | 111256/371472 [8:50:47<20:46:43, 3.48it/s] 30%|██▉ | 111257/371472 [8:50:47<19:46:03, 3.66it/s] 30%|██▉ | 111258/371472 [8:50:47<19:53:53, 3.63it/s] 30%|██▉ | 111259/371472 [8:50:48<20:29:28, 3.53it/s] 30%|██▉ | 111260/371472 [8:50:48<21:05:30, 3.43it/s] {'loss': 3.3875, 'learning_rate': 7.307795901657005e-07, 'epoch': 4.79} + 30%|██▉ | 111260/371472 [8:50:48<21:05:30, 3.43it/s] 30%|██▉ | 111261/371472 [8:50:48<20:54:06, 3.46it/s] 30%|██▉ | 111262/371472 [8:50:48<21:55:35, 3.30it/s] 30%|██▉ | 111263/371472 [8:50:49<21:38:32, 3.34it/s] 30%|██▉ | 111264/371472 [8:50:49<20:43:02, 3.49it/s] 30%|██▉ | 111265/371472 [8:50:49<20:00:25, 3.61it/s] 30%|██▉ | 111266/371472 [8:50:50<19:24:02, 3.73it/s] 30%|██▉ | 111267/371472 [8:50:50<19:58:25, 3.62it/s] 30%|██▉ | 111268/371472 [8:50:50<20:11:39, 3.58it/s] 30%|██▉ | 111269/371472 [8:50:51<23:22:21, 3.09it/s] 30%|██▉ | 111270/371472 [8:50:51<21:59:15, 3.29it/s] 30%|██▉ | 111271/371472 [8:50:51<21:14:29, 3.40it/s] 30%|██▉ | 111272/371472 [8:50:51<20:02:46, 3.61it/s] 30%|██▉ | 111273/371472 [8:50:52<19:54:20, 3.63it/s] 30%|██▉ | 111274/371472 [8:50:52<19:29:24, 3.71it/s] 30%|██▉ | 111275/371472 [8:50:52<19:23:53, 3.73it/s] 30%|██▉ | 111276/371472 [8:50:52<19:49:35, 3.65it/s] 30%|██▉ | 111277/371472 [8:50:53<19:30:43, 3.70it/s] 30%|██▉ | 111278/371472 [8:50:53<20:21:28, 3.55it/s] 30%|██▉ | 111279/371472 [8:50:53<19:34:44, 3.69it/s] 30%|██▉ | 111280/371472 [8:50:53<19:14:02, 3.76it/s] {'loss': 3.4976, 'learning_rate': 7.307311081902216e-07, 'epoch': 4.79} + 30%|██▉ | 111280/371472 [8:50:53<19:14:02, 3.76it/s] 30%|██▉ | 111281/371472 [8:50:54<19:19:34, 3.74it/s] 30%|██▉ | 111282/371472 [8:50:54<19:26:05, 3.72it/s] 30%|██▉ | 111283/371472 [8:50:54<18:56:00, 3.82it/s] 30%|██▉ | 111284/371472 [8:50:55<19:23:42, 3.73it/s] 30%|██▉ | 111285/371472 [8:50:55<19:36:55, 3.68it/s] 30%|██▉ | 111286/371472 [8:50:55<19:27:24, 3.71it/s] 30%|██▉ | 111287/371472 [8:50:55<19:05:56, 3.78it/s] 30%|██▉ | 111288/371472 [8:50:56<18:57:13, 3.81it/s] 30%|██▉ | 111289/371472 [8:50:56<20:23:30, 3.54it/s] 30%|██▉ | 111290/371472 [8:50:56<20:13:00, 3.57it/s] 30%|██▉ | 111291/371472 [8:50:56<20:29:15, 3.53it/s] 30%|██▉ | 111292/371472 [8:50:57<19:56:30, 3.62it/s] 30%|██▉ | 111293/371472 [8:50:57<21:11:10, 3.41it/s] 30%|██▉ | 111294/371472 [8:50:57<22:49:18, 3.17it/s] 30%|██▉ | 111295/371472 [8:50:58<21:59:47, 3.29it/s] 30%|██▉ | 111296/371472 [8:50:58<24:13:51, 2.98it/s] 30%|██▉ | 111297/371472 [8:50:58<22:56:55, 3.15it/s] 30%|██▉ | 111298/371472 [8:50:59<21:31:03, 3.36it/s] 30%|██▉ | 111299/371472 [8:50:59<20:38:29, 3.50it/s] 30%|██▉ | 111300/371472 [8:50:59<20:16:51, 3.56it/s] {'loss': 3.3921, 'learning_rate': 7.306826262147429e-07, 'epoch': 4.79} + 30%|██▉ | 111300/371472 [8:50:59<20:16:51, 3.56it/s] 30%|██▉ | 111301/371472 [8:51:00<22:33:37, 3.20it/s] 30%|██▉ | 111302/371472 [8:51:00<21:29:51, 3.36it/s] 30%|██▉ | 111303/371472 [8:51:00<22:33:12, 3.20it/s] 30%|██▉ | 111304/371472 [8:51:01<25:05:04, 2.88it/s] 30%|██▉ | 111305/371472 [8:51:01<22:53:35, 3.16it/s] 30%|██▉ | 111306/371472 [8:51:01<21:25:24, 3.37it/s] 30%|██▉ | 111307/371472 [8:51:01<21:04:14, 3.43it/s] 30%|██▉ | 111308/371472 [8:51:02<20:11:24, 3.58it/s] 30%|██▉ | 111309/371472 [8:51:02<21:01:11, 3.44it/s] 30%|██▉ | 111310/371472 [8:51:02<22:30:08, 3.21it/s] 30%|██▉ | 111311/371472 [8:51:03<22:45:05, 3.18it/s] 30%|██▉ | 111312/371472 [8:51:03<21:20:03, 3.39it/s] 30%|██▉ | 111313/371472 [8:51:03<22:38:07, 3.19it/s] 30%|██▉ | 111314/371472 [8:51:04<22:35:09, 3.20it/s] 30%|██▉ | 111315/371472 [8:51:04<22:23:00, 3.23it/s] 30%|██▉ | 111316/371472 [8:51:04<21:28:34, 3.36it/s] 30%|██▉ | 111317/371472 [8:51:04<21:14:13, 3.40it/s] 30%|██▉ | 111318/371472 [8:51:05<20:42:56, 3.49it/s] 30%|██▉ | 111319/371472 [8:51:05<20:02:12, 3.61it/s] 30%|██▉ | 111320/371472 [8:51:05<20:05:54, 3.60it/s] {'loss': 3.4683, 'learning_rate': 7.30634144239264e-07, 'epoch': 4.79} + 30%|██▉ | 111320/371472 [8:51:05<20:05:54, 3.60it/s] 30%|██▉ | 111321/371472 [8:51:05<19:32:56, 3.70it/s] 30%|██▉ | 111322/371472 [8:51:06<20:10:49, 3.58it/s] 30%|██▉ | 111323/371472 [8:51:06<19:26:37, 3.72it/s] 30%|██▉ | 111324/371472 [8:51:06<19:42:06, 3.67it/s] 30%|██▉ | 111325/371472 [8:51:07<19:55:19, 3.63it/s] 30%|██▉ | 111326/371472 [8:51:07<19:26:37, 3.72it/s] 30%|██▉ | 111327/371472 [8:51:07<19:19:35, 3.74it/s] 30%|██▉ | 111328/371472 [8:51:08<23:05:53, 3.13it/s] 30%|██▉ | 111329/371472 [8:51:08<23:22:58, 3.09it/s] 30%|██▉ | 111330/371472 [8:51:08<22:44:59, 3.18it/s] 30%|██▉ | 111331/371472 [8:51:08<21:48:50, 3.31it/s] 30%|██▉ | 111332/371472 [8:51:09<21:04:49, 3.43it/s] 30%|██▉ | 111333/371472 [8:51:09<21:55:41, 3.30it/s] 30%|██▉ | 111334/371472 [8:51:09<23:05:16, 3.13it/s] 30%|██▉ | 111335/371472 [8:51:10<22:17:19, 3.24it/s] 30%|██▉ | 111336/371472 [8:51:10<21:57:27, 3.29it/s] 30%|██▉ | 111337/371472 [8:51:10<22:01:07, 3.28it/s] 30%|██▉ | 111338/371472 [8:51:11<21:33:20, 3.35it/s] 30%|██▉ | 111339/371472 [8:51:11<21:09:29, 3.42it/s] 30%|██▉ | 111340/371472 [8:51:11<22:40:13, 3.19it/s] {'loss': 3.4451, 'learning_rate': 7.305856622637849e-07, 'epoch': 4.8} + 30%|██▉ | 111340/371472 [8:51:11<22:40:13, 3.19it/s] 30%|██▉ | 111341/371472 [8:51:11<22:02:47, 3.28it/s] 30%|██▉ | 111342/371472 [8:51:12<20:30:22, 3.52it/s] 30%|██▉ | 111343/371472 [8:51:12<20:00:21, 3.61it/s] 30%|██▉ | 111344/371472 [8:51:12<19:53:16, 3.63it/s] 30%|██▉ | 111345/371472 [8:51:12<19:25:21, 3.72it/s] 30%|██▉ | 111346/371472 [8:51:13<19:41:50, 3.67it/s] 30%|██▉ | 111347/371472 [8:51:13<19:36:31, 3.68it/s] 30%|██▉ | 111348/371472 [8:51:13<21:47:33, 3.32it/s] 30%|██▉ | 111349/371472 [8:51:14<21:46:38, 3.32it/s] 30%|██▉ | 111350/371472 [8:51:14<21:34:44, 3.35it/s] 30%|██▉ | 111351/371472 [8:51:14<20:49:50, 3.47it/s] 30%|██▉ | 111352/371472 [8:51:15<21:19:35, 3.39it/s] 30%|██▉ | 111353/371472 [8:51:15<20:22:28, 3.55it/s] 30%|██▉ | 111354/371472 [8:51:15<19:35:44, 3.69it/s] 30%|██▉ | 111355/371472 [8:51:15<19:26:11, 3.72it/s] 30%|██▉ | 111356/371472 [8:51:16<19:41:49, 3.67it/s] 30%|██▉ | 111357/371472 [8:51:16<19:33:52, 3.69it/s] 30%|██▉ | 111358/371472 [8:51:16<19:17:54, 3.74it/s] 30%|██▉ | 111359/371472 [8:51:16<19:53:26, 3.63it/s] 30%|██▉ | 111360/371472 [8:51:17<20:56:30, 3.45it/s] {'loss': 3.5001, 'learning_rate': 7.305371802883061e-07, 'epoch': 4.8} + 30%|██▉ | 111360/371472 [8:51:17<20:56:30, 3.45it/s] 30%|██▉ | 111361/371472 [8:51:17<20:35:38, 3.51it/s] 30%|██▉ | 111362/371472 [8:51:17<20:33:22, 3.51it/s] 30%|██▉ | 111363/371472 [8:51:18<21:27:34, 3.37it/s] 30%|██▉ | 111364/371472 [8:51:18<20:52:33, 3.46it/s] 30%|██▉ | 111365/371472 [8:51:18<21:59:28, 3.29it/s] 30%|██▉ | 111366/371472 [8:51:19<20:46:40, 3.48it/s] 30%|██▉ | 111367/371472 [8:51:19<20:21:14, 3.55it/s] 30%|██▉ | 111368/371472 [8:51:19<20:17:48, 3.56it/s] 30%|██▉ | 111369/371472 [8:51:19<20:15:51, 3.57it/s] 30%|██▉ | 111370/371472 [8:51:20<27:58:09, 2.58it/s] 30%|██▉ | 111371/371472 [8:51:20<25:34:58, 2.82it/s] 30%|██▉ | 111372/371472 [8:51:21<25:05:24, 2.88it/s] 30%|██▉ | 111373/371472 [8:51:21<23:40:11, 3.05it/s] 30%|██▉ | 111374/371472 [8:51:21<22:57:47, 3.15it/s] 30%|██▉ | 111375/371472 [8:51:21<21:44:41, 3.32it/s] 30%|██▉ | 111376/371472 [8:51:22<20:48:41, 3.47it/s] 30%|██▉ | 111377/371472 [8:51:22<21:25:32, 3.37it/s] 30%|██▉ | 111378/371472 [8:51:22<21:31:41, 3.36it/s] 30%|██▉ | 111379/371472 [8:51:23<20:44:20, 3.48it/s] 30%|██▉ | 111380/371472 [8:51:23<20:45:30, 3.48it/s] {'loss': 3.4202, 'learning_rate': 7.304886983128274e-07, 'epoch': 4.8} + 30%|██▉ | 111380/371472 [8:51:23<20:45:30, 3.48it/s] 30%|██▉ | 111381/371472 [8:51:23<20:14:03, 3.57it/s] 30%|██▉ | 111382/371472 [8:51:23<20:10:04, 3.58it/s] 30%|██▉ | 111383/371472 [8:51:24<21:00:08, 3.44it/s] 30%|██▉ | 111384/371472 [8:51:24<22:12:11, 3.25it/s] 30%|██▉ | 111385/371472 [8:51:24<21:59:35, 3.28it/s] 30%|██▉ | 111386/371472 [8:51:25<21:32:58, 3.35it/s] 30%|██▉ | 111387/371472 [8:51:25<21:04:42, 3.43it/s] 30%|██▉ | 111388/371472 [8:51:25<22:20:00, 3.23it/s] 30%|██▉ | 111389/371472 [8:51:26<21:14:21, 3.40it/s] 30%|██▉ | 111390/371472 [8:51:26<20:30:22, 3.52it/s] 30%|██▉ | 111391/371472 [8:51:26<20:17:51, 3.56it/s] 30%|██▉ | 111392/371472 [8:51:26<19:40:44, 3.67it/s] 30%|██▉ | 111393/371472 [8:51:27<20:14:42, 3.57it/s] 30%|██▉ | 111394/371472 [8:51:27<20:54:16, 3.46it/s] 30%|██▉ | 111395/371472 [8:51:27<19:58:15, 3.62it/s] 30%|██▉ | 111396/371472 [8:51:27<19:48:38, 3.65it/s] 30%|██▉ | 111397/371472 [8:51:28<19:29:25, 3.71it/s] 30%|██▉ | 111398/371472 [8:51:28<19:31:05, 3.70it/s] 30%|██▉ | 111399/371472 [8:51:28<19:50:06, 3.64it/s] 30%|██▉ | 111400/371472 [8:51:29<19:44:54, 3.66it/s] {'loss': 3.4148, 'learning_rate': 7.304402163373484e-07, 'epoch': 4.8} + 30%|██▉ | 111400/371472 [8:51:29<19:44:54, 3.66it/s] 30%|██▉ | 111401/371472 [8:51:29<19:51:41, 3.64it/s] 30%|██▉ | 111402/371472 [8:51:29<19:42:53, 3.66it/s] 30%|██▉ | 111403/371472 [8:51:29<20:47:30, 3.47it/s] 30%|██▉ | 111404/371472 [8:51:30<22:05:43, 3.27it/s] 30%|██▉ | 111405/371472 [8:51:30<21:20:36, 3.38it/s] 30%|██▉ | 111406/371472 [8:51:30<21:27:48, 3.37it/s] 30%|██▉ | 111407/371472 [8:51:31<20:47:45, 3.47it/s] 30%|██▉ | 111408/371472 [8:51:31<20:20:34, 3.55it/s] 30%|██▉ | 111409/371472 [8:51:31<20:27:03, 3.53it/s] 30%|██▉ | 111410/371472 [8:51:31<22:29:38, 3.21it/s] 30%|██▉ | 111411/371472 [8:51:32<22:06:22, 3.27it/s] 30%|██▉ | 111412/371472 [8:51:32<22:18:20, 3.24it/s] 30%|██▉ | 111413/371472 [8:51:32<22:05:04, 3.27it/s] 30%|██▉ | 111414/371472 [8:51:33<21:21:25, 3.38it/s] 30%|██▉ | 111415/371472 [8:51:33<21:03:49, 3.43it/s] 30%|██▉ | 111416/371472 [8:51:33<19:53:34, 3.63it/s] 30%|██▉ | 111417/371472 [8:51:34<21:14:55, 3.40it/s] 30%|██▉ | 111418/371472 [8:51:34<21:19:56, 3.39it/s] 30%|██▉ | 111419/371472 [8:51:34<20:30:47, 3.52it/s] 30%|██▉ | 111420/371472 [8:51:34<21:30:06, 3.36it/s] {'loss': 3.3397, 'learning_rate': 7.303917343618694e-07, 'epoch': 4.8} + 30%|██▉ | 111420/371472 [8:51:34<21:30:06, 3.36it/s] 30%|██▉ | 111421/371472 [8:51:35<21:35:13, 3.35it/s] 30%|██▉ | 111422/371472 [8:51:35<20:43:45, 3.48it/s] 30%|██▉ | 111423/371472 [8:51:35<20:29:09, 3.53it/s] 30%|██▉ | 111424/371472 [8:51:36<20:02:33, 3.60it/s] 30%|██▉ | 111425/371472 [8:51:36<20:12:36, 3.57it/s] 30%|██▉ | 111426/371472 [8:51:36<20:12:46, 3.57it/s] 30%|██▉ | 111427/371472 [8:51:36<19:32:56, 3.70it/s] 30%|██▉ | 111428/371472 [8:51:37<19:44:12, 3.66it/s] 30%|██▉ | 111429/371472 [8:51:37<19:53:15, 3.63it/s] 30%|██▉ | 111430/371472 [8:51:37<19:02:12, 3.79it/s] 30%|██▉ | 111431/371472 [8:51:37<20:18:23, 3.56it/s] 30%|██▉ | 111432/371472 [8:51:38<19:57:29, 3.62it/s] 30%|██▉ | 111433/371472 [8:51:38<19:07:24, 3.78it/s] 30%|██▉ | 111434/371472 [8:51:38<20:01:55, 3.61it/s] 30%|██▉ | 111435/371472 [8:51:39<20:11:55, 3.58it/s] 30%|██▉ | 111436/371472 [8:51:39<19:09:41, 3.77it/s] 30%|██▉ | 111437/371472 [8:51:39<19:16:55, 3.75it/s] 30%|██▉ | 111438/371472 [8:51:39<19:17:01, 3.75it/s] 30%|██▉ | 111439/371472 [8:51:40<20:52:00, 3.46it/s] 30%|██▉ | 111440/371472 [8:51:40<20:21:37, 3.55it/s] {'loss': 3.4448, 'learning_rate': 7.303432523863906e-07, 'epoch': 4.8} + 30%|██▉ | 111440/371472 [8:51:40<20:21:37, 3.55it/s] 30%|██▉ | 111441/371472 [8:51:40<21:22:00, 3.38it/s] 30%|███ | 111442/371472 [8:51:41<20:38:20, 3.50it/s] 30%|███ | 111443/371472 [8:51:41<20:17:59, 3.56it/s] 30%|███ | 111444/371472 [8:51:41<19:57:11, 3.62it/s] 30%|███ | 111445/371472 [8:51:41<19:48:47, 3.65it/s] 30%|███ | 111446/371472 [8:51:42<19:25:21, 3.72it/s] 30%|███ | 111447/371472 [8:51:42<19:25:22, 3.72it/s] 30%|███ | 111448/371472 [8:51:42<19:07:28, 3.78it/s] 30%|███ | 111449/371472 [8:51:42<18:45:08, 3.85it/s] 30%|███ | 111450/371472 [8:51:43<19:17:18, 3.74it/s] 30%|███ | 111451/371472 [8:51:43<19:56:18, 3.62it/s] 30%|███ | 111452/371472 [8:51:43<19:44:53, 3.66it/s] 30%|███ | 111453/371472 [8:51:44<21:04:44, 3.43it/s] 30%|███ | 111454/371472 [8:51:44<21:35:09, 3.35it/s] 30%|███ | 111455/371472 [8:51:44<22:31:46, 3.21it/s] 30%|███ | 111456/371472 [8:51:44<21:59:43, 3.28it/s] 30%|███ | 111457/371472 [8:51:45<21:55:53, 3.29it/s] 30%|███ | 111458/371472 [8:51:45<23:19:52, 3.10it/s] 30%|███ | 111459/371472 [8:51:45<22:56:36, 3.15it/s] 30%|███ | 111460/371472 [8:51:46<23:18:48, 3.10it/s] {'loss': 3.3335, 'learning_rate': 7.302947704109117e-07, 'epoch': 4.8} + 30%|███ | 111460/371472 [8:51:46<23:18:48, 3.10it/s] 30%|███ | 111461/371472 [8:51:46<23:38:02, 3.06it/s] 30%|███ | 111462/371472 [8:51:46<22:44:33, 3.18it/s] 30%|███ | 111463/371472 [8:51:47<23:09:06, 3.12it/s] 30%|███ | 111464/371472 [8:51:47<22:18:43, 3.24it/s] 30%|███ | 111465/371472 [8:51:47<21:28:42, 3.36it/s] 30%|█��█ | 111466/371472 [8:51:48<21:05:00, 3.43it/s] 30%|███ | 111467/371472 [8:51:48<20:39:25, 3.50it/s] 30%|███ | 111468/371472 [8:51:48<20:34:01, 3.51it/s] 30%|███ | 111469/371472 [8:51:48<20:38:54, 3.50it/s] 30%|███ | 111470/371472 [8:51:49<20:32:21, 3.52it/s] 30%|███ | 111471/371472 [8:51:49<20:01:20, 3.61it/s] 30%|███ | 111472/371472 [8:51:49<20:12:15, 3.57it/s] 30%|███ | 111473/371472 [8:51:49<19:37:08, 3.68it/s] 30%|███ | 111474/371472 [8:51:50<19:53:35, 3.63it/s] 30%|███ | 111475/371472 [8:51:50<19:41:20, 3.67it/s] 30%|███ | 111476/371472 [8:51:50<19:49:24, 3.64it/s] 30%|███ | 111477/371472 [8:51:51<19:41:20, 3.67it/s] 30%|███ | 111478/371472 [8:51:51<19:24:44, 3.72it/s] 30%|███ | 111479/371472 [8:51:51<19:01:37, 3.80it/s] 30%|███ | 111480/371472 [8:51:51<19:08:37, 3.77it/s] {'loss': 3.4823, 'learning_rate': 7.302462884354326e-07, 'epoch': 4.8} + 30%|███ | 111480/371472 [8:51:51<19:08:37, 3.77it/s] 30%|███ | 111481/371472 [8:51:52<19:12:31, 3.76it/s] 30%|███ | 111482/371472 [8:51:52<20:46:17, 3.48it/s] 30%|███ | 111483/371472 [8:51:52<20:24:21, 3.54it/s] 30%|███ | 111484/371472 [8:51:53<20:55:48, 3.45it/s] 30%|███ | 111485/371472 [8:51:53<20:49:55, 3.47it/s] 30%|███ | 111486/371472 [8:51:53<20:33:46, 3.51it/s] 30%|███ | 111487/371472 [8:51:53<19:38:26, 3.68it/s] 30%|███ | 111488/371472 [8:51:54<18:40:07, 3.87it/s] 30%|███ | 111489/371472 [8:51:54<19:06:49, 3.78it/s] 30%|███ | 111490/371472 [8:51:54<19:38:24, 3.68it/s] 30%|███ | 111491/371472 [8:51:54<20:09:31, 3.58it/s] 30%|███ | 111492/371472 [8:51:55<21:25:06, 3.37it/s] 30%|███ | 111493/371472 [8:51:55<21:17:46, 3.39it/s] 30%|███ | 111494/371472 [8:51:55<21:01:58, 3.43it/s] 30%|███ | 111495/371472 [8:51:56<21:02:51, 3.43it/s] 30%|███ | 111496/371472 [8:51:56<20:20:55, 3.55it/s] 30%|███ | 111497/371472 [8:51:56<21:33:31, 3.35it/s] 30%|███ | 111498/371472 [8:51:57<20:49:18, 3.47it/s] 30%|███ | 111499/371472 [8:51:57<21:52:48, 3.30it/s] 30%|███ | 111500/371472 [8:51:57<21:57:19, 3.29it/s] {'loss': 3.3321, 'learning_rate': 7.301978064599538e-07, 'epoch': 4.8} + 30%|███ | 111500/371472 [8:51:57<21:57:19, 3.29it/s] 30%|███ | 111501/371472 [8:51:57<22:26:09, 3.22it/s] 30%|███ | 111502/371472 [8:51:58<20:55:31, 3.45it/s] 30%|███ | 111503/371472 [8:51:58<20:07:17, 3.59it/s] 30%|███ | 111504/371472 [8:51:58<20:09:05, 3.58it/s] 30%|███ | 111505/371472 [8:51:59<21:06:23, 3.42it/s] 30%|███ | 111506/371472 [8:51:59<20:57:26, 3.45it/s] 30%|███ | 111507/371472 [8:51:59<20:56:08, 3.45it/s] 30%|███ | 111508/371472 [8:51:59<20:34:46, 3.51it/s] 30%|███ | 111509/371472 [8:52:00<19:51:15, 3.64it/s] 30%|███ | 111510/371472 [8:52:00<19:00:45, 3.80it/s] 30%|███ | 111511/371472 [8:52:00<19:40:18, 3.67it/s] 30%|███ | 111512/371472 [8:52:00<19:34:48, 3.69it/s] 30%|███ | 111513/371472 [8:52:01<19:22:33, 3.73it/s] 30%|███ | 111514/371472 [8:52:01<19:03:10, 3.79it/s] 30%|███ | 111515/371472 [8:52:01<19:10:24, 3.77it/s] 30%|███ | 111516/371472 [8:52:02<18:56:04, 3.81it/s] 30%|███ | 111517/371472 [8:52:02<19:23:27, 3.72it/s] 30%|███ | 111518/371472 [8:52:02<19:46:03, 3.65it/s] 30%|███ | 111519/371472 [8:52:02<20:12:05, 3.57it/s] 30%|███ | 111520/371472 [8:52:03<19:34:09, 3.69it/s] {'loss': 3.4012, 'learning_rate': 7.30149324484475e-07, 'epoch': 4.8} + 30%|███ | 111520/371472 [8:52:03<19:34:09, 3.69it/s] 30%|███ | 111521/371472 [8:52:03<20:59:06, 3.44it/s] 30%|███ | 111522/371472 [8:52:03<20:13:23, 3.57it/s] 30%|███ | 111523/371472 [8:52:04<20:21:04, 3.55it/s] 30%|███ | 111524/371472 [8:52:04<21:27:49, 3.36it/s] 30%|███ | 111525/371472 [8:52:04<21:19:05, 3.39it/s] 30%|███ | 111526/371472 [8:52:04<20:20:42, 3.55it/s] 30%|███ | 111527/371472 [8:52:05<20:21:39, 3.55it/s] 30%|███ | 111528/371472 [8:52:05<19:59:57, 3.61it/s] 30%|███ | 111529/371472 [8:52:05<20:30:51, 3.52it/s] 30%|███ | 111530/371472 [8:52:05<20:09:23, 3.58it/s] 30%|███ | 111531/371472 [8:52:06<19:46:46, 3.65it/s] 30%|███ | 111532/371472 [8:52:06<19:27:02, 3.71it/s] 30%|███ | 111533/371472 [8:52:06<20:00:27, 3.61it/s] 30%|███ | 111534/371472 [8:52:07<19:26:10, 3.71it/s] 30%|███ | 111535/371472 [8:52:07<19:02:00, 3.79it/s] 30%|███ | 111536/371472 [8:52:07<19:13:30, 3.76it/s] 30%|███ | 111537/371472 [8:52:07<20:14:52, 3.57it/s] 30%|███ | 111538/371472 [8:52:08<20:11:49, 3.57it/s] 30%|███ | 111539/371472 [8:52:08<19:44:58, 3.66it/s] 30%|███ | 111540/371472 [8:52:08<20:17:44, 3.56it/s] {'loss': 3.4922, 'learning_rate': 7.301008425089961e-07, 'epoch': 4.8} + 30%|███ | 111540/371472 [8:52:08<20:17:44, 3.56it/s] 30%|███ | 111541/371472 [8:52:09<20:43:50, 3.48it/s] 30%|███ | 111542/371472 [8:52:09<19:34:32, 3.69it/s] 30%|███ | 111543/371472 [8:52:09<20:23:56, 3.54it/s] 30%|███ | 111544/371472 [8:52:09<20:04:21, 3.60it/s] 30%|███ | 111545/371472 [8:52:10<19:49:57, 3.64it/s] 30%|███ | 111546/371472 [8:52:10<20:24:51, 3.54it/s] 30%|███ | 111547/371472 [8:52:10<21:08:46, 3.41it/s] 30%|███ | 111548/371472 [8:52:11<20:39:08, 3.50it/s] 30%|███ | 111549/371472 [8:52:11<22:48:49, 3.16it/s] 30%|███ | 111550/371472 [8:52:11<21:57:53, 3.29it/s] 30%|███ | 111551/371472 [8:52:11<20:59:59, 3.44it/s] 30%|███ | 111552/371472 [8:52:12<20:42:46, 3.49it/s] 30%|███ | 111553/371472 [8:52:12<20:47:31, 3.47it/s] 30%|███ | 111554/371472 [8:52:12<20:41:21, 3.49it/s] 30%|███ | 111555/371472 [8:52:13<19:44:45, 3.66it/s] 30%|███ | 111556/371472 [8:52:13<19:13:37, 3.76it/s] 30%|███ | 111557/371472 [8:52:13<19:24:22, 3.72it/s] 30%|███ | 111558/371472 [8:52:13<20:33:00, 3.51it/s] 30%|███ | 111559/371472 [8:52:14<20:48:12, 3.47it/s] 30%|███ | 111560/371472 [8:52:14<21:04:00, 3.43it/s] {'loss': 3.2925, 'learning_rate': 7.300523605335171e-07, 'epoch': 4.81} + 30%|███ | 111560/371472 [8:52:14<21:04:00, 3.43it/s] 30%|███ | 111561/371472 [8:52:14<20:06:01, 3.59it/s] 30%|███ | 111562/371472 [8:52:14<19:58:25, 3.61it/s] 30%|███ | 111563/371472 [8:52:15<21:19:19, 3.39it/s] 30%|███ | 111564/371472 [8:52:15<20:58:39, 3.44it/s] 30%|███ | 111565/371472 [8:52:15<20:15:13, 3.56it/s] 30%|███ | 111566/371472 [8:52:16<20:07:33, 3.59it/s] 30%|███ | 111567/371472 [8:52:16<19:54:18, 3.63it/s] 30%|███ | 111568/371472 [8:52:16<20:23:37, 3.54it/s] 30%|███ | 111569/371472 [8:52:16<19:53:04, 3.63it/s] 30%|███ | 111570/371472 [8:52:17<19:32:17, 3.70it/s] 30%|███ | 111571/371472 [8:52:17<19:52:37, 3.63it/s] 30%|███ | 111572/371472 [8:52:17<19:44:48, 3.66it/s] 30%|███ | 111573/371472 [8:52:18<21:33:14, 3.35it/s] 30%|███ | 111574/371472 [8:52:18<22:10:00, 3.26it/s] 30%|███ | 111575/371472 [8:52:18<21:54:07, 3.30it/s] 30%|███ | 111576/371472 [8:52:19<21:15:01, 3.40it/s] 30%|███ | 111577/371472 [8:52:19<20:38:08, 3.50it/s] 30%|███ | 111578/371472 [8:52:19<20:09:15, 3.58it/s] 30%|███ | 111579/371472 [8:52:19<21:28:45, 3.36it/s] 30%|███ | 111580/371472 [8:52:20<21:43:10, 3.32it/s] {'loss': 3.3143, 'learning_rate': 7.300038785580383e-07, 'epoch': 4.81} + 30%|███ | 111580/371472 [8:52:20<21:43:10, 3.32it/s] 30%|███ | 111581/371472 [8:52:20<20:43:14, 3.48it/s] 30%|███ | 111582/371472 [8:52:20<21:28:54, 3.36it/s] 30%|███ | 111583/371472 [8:52:21<20:43:45, 3.48it/s] 30%|███ | 111584/371472 [8:52:21<20:37:25, 3.50it/s] 30%|███ | 111585/371472 [8:52:21<20:17:37, 3.56it/s] 30%|███ | 111586/371472 [8:52:21<21:14:02, 3.40it/s] 30%|███ | 111587/371472 [8:52:22<20:56:21, 3.45it/s] 30%|███ | 111588/371472 [8:52:22<20:35:50, 3.50it/s] 30%|███ | 111589/371472 [8:52:22<20:42:44, 3.49it/s] 30%|███ | 111590/371472 [8:52:23<19:54:23, 3.63it/s] 30%|███ | 111591/371472 [8:52:23<21:36:41, 3.34it/s] 30%|███ | 111592/371472 [8:52:23<20:44:47, 3.48it/s] 30%|███ | 111593/371472 [8:52:23<21:00:10, 3.44it/s] 30%|███ | 111594/371472 [8:52:24<21:12:16, 3.40it/s] 30%|███ | 111595/371472 [8:52:24<21:48:41, 3.31it/s] 30%|███ | 111596/371472 [8:52:24<20:54:53, 3.45it/s] 30%|███ | 111597/371472 [8:52:25<20:04:03, 3.60it/s] 30%|███ | 111598/371472 [8:52:25<20:45:33, 3.48it/s] 30%|███ | 111599/371472 [8:52:25<20:19:59, 3.55it/s] 30%|███ | 111600/371472 [8:52:25<19:19:21, 3.74it/s] {'loss': 3.3931, 'learning_rate': 7.299553965825594e-07, 'epoch': 4.81} + 30%|███ | 111600/371472 [8:52:25<19:19:21, 3.74it/s] 30%|███ | 111601/371472 [8:52:26<21:19:22, 3.39it/s] 30%|███ | 111602/371472 [8:52:26<22:41:15, 3.18it/s] 30%|███ | 111603/371472 [8:52:26<21:42:11, 3.33it/s] 30%|███ | 111604/371472 [8:52:27<22:18:23, 3.24it/s] 30%|███ | 111605/371472 [8:52:27<21:12:55, 3.40it/s] 30%|███ | 111606/371472 [8:52:27<20:41:01, 3.49it/s] 30%|███ | 111607/371472 [8:52:27<20:27:56, 3.53it/s] 30%|███ | 111608/371472 [8:52:28<23:31:37, 3.07it/s] 30%|███ | 111609/371472 [8:52:28<23:10:29, 3.11it/s] 30%|███ | 111610/371472 [8:52:29<23:09:34, 3.12it/s] 30%|███ | 111611/371472 [8:52:29<27:55:55, 2.58it/s] 30%|███ | 111612/371472 [8:52:29<25:13:59, 2.86it/s] 30%|███ | 111613/371472 [8:52:30<23:50:20, 3.03it/s] 30%|███ | 111614/371472 [8:52:30<22:19:05, 3.23it/s] 30%|███ | 111615/371472 [8:52:30<21:13:26, 3.40it/s] 30%|███ | 111616/371472 [8:52:30<21:23:17, 3.37it/s] 30%|███ | 111617/371472 [8:52:31<20:58:05, 3.44it/s] 30%|███ | 111618/371472 [8:52:31<20:30:25, 3.52it/s] 30%|███ | 111619/371472 [8:52:31<19:55:23, 3.62it/s] 30%|███ | 111620/371472 [8:52:32<20:29:19, 3.52it/s] {'loss': 3.3061, 'learning_rate': 7.299069146070805e-07, 'epoch': 4.81} + 30%|███ | 111620/371472 [8:52:32<20:29:19, 3.52it/s] 30%|███ | 111621/371472 [8:52:32<20:39:03, 3.50it/s] 30%|███ | 111622/371472 [8:52:32<20:26:16, 3.53it/s] 30%|███ | 111623/371472 [8:52:32<19:59:19, 3.61it/s] 30%|███ | 111624/371472 [8:52:33<20:28:39, 3.52it/s] 30%|███ | 111625/371472 [8:52:33<20:07:54, 3.59it/s] 30%|███ | 111626/371472 [8:52:33<21:56:54, 3.29it/s] 30%|███ | 111627/371472 [8:52:34<21:15:16, 3.40it/s] 30%|███ | 111628/371472 [8:52:34<20:32:23, 3.51it/s] 30%|███ | 111629/371472 [8:52:34<20:29:30, 3.52it/s] 30%|███ | 111630/371472 [8:52:34<20:31:47, 3.52it/s] 30%|███ | 111631/371472 [8:52:35<19:47:56, 3.65it/s] 30%|███ | 111632/371472 [8:52:35<20:37:52, 3.50it/s] 30%|███ | 111633/371472 [8:52:35<20:06:45, 3.59it/s] 30%|███ | 111634/371472 [8:52:36<19:41:13, 3.67it/s] 30%|███ | 111635/371472 [8:52:36<20:18:34, 3.55it/s] 30%|███ | 111636/371472 [8:52:36<19:44:18, 3.66it/s] 30%|███ | 111637/371472 [8:52:36<20:03:20, 3.60it/s] 30%|███ | 111638/371472 [8:52:37<19:58:57, 3.61it/s] 30%|███ | 111639/371472 [8:52:37<21:17:36, 3.39it/s] 30%|███ | 111640/371472 [8:52:37<20:59:31, 3.44it/s] {'loss': 3.2464, 'learning_rate': 7.298584326316015e-07, 'epoch': 4.81} + 30%|███ | 111640/371472 [8:52:37<20:59:31, 3.44it/s] 30%|███ | 111641/371472 [8:52:37<20:00:04, 3.61it/s] 30%|███ | 111642/371472 [8:52:38<19:22:07, 3.73it/s] 30%|███ | 111643/371472 [8:52:38<19:41:40, 3.66it/s] 30%|███ | 111644/371472 [8:52:38<19:48:42, 3.64it/s] 30%|███ | 111645/371472 [8:52:39<19:35:17, 3.68it/s] 30%|███ | 111646/371472 [8:52:39<19:53:07, 3.63it/s] 30%|███ | 111647/371472 [8:52:39<19:47:32, 3.65it/s] 30%|███ | 111648/371472 [8:52:39<20:39:28, 3.49it/s] 30%|███ | 111649/371472 [8:52:40<21:24:58, 3.37it/s] 30%|███ | 111650/371472 [8:52:40<21:09:27, 3.41it/s] 30%|███ | 111651/371472 [8:52:40<21:04:25, 3.42it/s] 30%|███ | 111652/371472 [8:52:41<20:54:07, 3.45it/s] 30%|███ | 111653/371472 [8:52:41<20:40:46, 3.49it/s] 30%|███ | 111654/371472 [8:52:41<20:12:27, 3.57it/s] 30%|███ | 111655/371472 [8:52:41<20:17:11, 3.56it/s] 30%|███ | 111656/371472 [8:52:42<20:28:03, 3.53it/s] 30%|███ | 111657/371472 [8:52:42<20:43:58, 3.48it/s] 30%|███ | 111658/371472 [8:52:42<21:57:51, 3.29it/s] 30%|███ | 111659/371472 [8:52:43<21:25:45, 3.37it/s] 30%|███ | 111660/371472 [8:52:43<21:53:57, 3.30it/s] {'loss': 3.4188, 'learning_rate': 7.298099506561227e-07, 'epoch': 4.81} + 30%|███ | 111660/371472 [8:52:43<21:53:57, 3.30it/s] 30%|███ | 111661/371472 [8:52:43<21:01:38, 3.43it/s] 30%|███ | 111662/371472 [8:52:43<19:58:23, 3.61it/s] 30%|███ | 111663/371472 [8:52:44<19:44:38, 3.66it/s] 30%|███ | 111664/371472 [8:52:44<19:25:36, 3.71it/s] 30%|███ | 111665/371472 [8:52:44<19:54:53, 3.62it/s] 30%|███ | 111666/371472 [8:52:45<21:09:27, 3.41it/s] 30%|███ | 111667/371472 [8:52:45<20:10:07, 3.58it/s] 30%|███ | 111668/371472 [8:52:45<19:21:40, 3.73it/s] 30%|███ | 111669/371472 [8:52:45<19:14:00, 3.75it/s] 30%|███ | 111670/371472 [8:52:46<19:32:51, 3.69it/s] 30%|███ | 111671/371472 [8:52:46<19:29:09, 3.70it/s] 30%|███ | 111672/371472 [8:52:46<18:58:24, 3.80it/s] 30%|███ | 111673/371472 [8:52:46<19:52:36, 3.63it/s] 30%|███ | 111674/371472 [8:52:47<20:04:04, 3.60it/s] 30%|███ | 111675/371472 [8:52:47<19:51:09, 3.64it/s] 30%|███ | 111676/371472 [8:52:47<19:14:26, 3.75it/s] 30%|███ | 111677/371472 [8:52:48<19:54:24, 3.63it/s] 30%|███ | 111678/371472 [8:52:48<19:51:39, 3.63it/s] 30%|███ | 111679/371472 [8:52:48<23:27:22, 3.08it/s] 30%|███ | 111680/371472 [8:52:49<23:29:44, 3.07it/s] {'loss': 3.2762, 'learning_rate': 7.297614686806439e-07, 'epoch': 4.81} + 30%|███ | 111680/371472 [8:52:49<23:29:44, 3.07it/s] 30%|███ | 111681/371472 [8:52:49<23:14:09, 3.11it/s] 30%|███ | 111682/371472 [8:52:49<23:57:54, 3.01it/s] 30%|███ | 111683/371472 [8:52:50<23:14:13, 3.11it/s] 30%|███ | 111684/371472 [8:52:50<21:45:16, 3.32it/s] 30%|███ | 111685/371472 [8:52:50<21:12:21, 3.40it/s] 30%|███ | 111686/371472 [8:52:50<20:01:38, 3.60it/s] 30%|███ | 111687/371472 [8:52:51<19:58:30, 3.61it/s] 30%|███ | 111688/371472 [8:52:51<19:32:37, 3.69it/s] 30%|███ | 111689/371472 [8:52:51<19:27:43, 3.71it/s] 30%|███ | 111690/371472 [8:52:51<19:29:12, 3.70it/s] 30%|███ | 111691/371472 [8:52:52<19:51:56, 3.63it/s] 30%|███ | 111692/371472 [8:52:52<20:26:12, 3.53it/s] 30%|███ | 111693/371472 [8:52:52<19:58:57, 3.61it/s] 30%|███ | 111694/371472 [8:52:53<21:25:04, 3.37it/s] 30%|███ | 111695/371472 [8:52:53<20:29:39, 3.52it/s] 30%|███ | 111696/371472 [8:52:53<21:25:53, 3.37it/s] 30%|███ | 111697/371472 [8:52:53<20:57:57, 3.44it/s] 30%|███ | 111698/371472 [8:52:54<20:40:04, 3.49it/s] 30%|███ | 111699/371472 [8:52:54<20:37:13, 3.50it/s] 30%|███ | 111700/371472 [8:52:54<20:37:14, 3.50it/s] {'loss': 3.4127, 'learning_rate': 7.29712986705165e-07, 'epoch': 4.81} + 30%|███ | 111700/371472 [8:52:54<20:37:14, 3.50it/s] 30%|███ | 111701/371472 [8:52:55<22:25:53, 3.22it/s] 30%|███ | 111702/371472 [8:52:55<22:35:50, 3.19it/s] 30%|███ | 111703/371472 [8:52:55<21:44:27, 3.32it/s] 30%|███ | 111704/371472 [8:52:56<21:08:49, 3.41it/s] 30%|███ | 111705/371472 [8:52:56<22:54:58, 3.15it/s] 30%|███ | 111706/371472 [8:52:56<26:49:44, 2.69it/s] 30%|███ | 111707/371472 [8:52:57<24:14:08, 2.98it/s] 30%|███ | 111708/371472 [8:52:57<23:33:55, 3.06it/s] 30%|███ | 111709/371472 [8:52:57<22:25:08, 3.22it/s] 30%|███ | 111710/371472 [8:52:58<21:39:44, 3.33it/s] 30%|███ | 111711/371472 [8:52:58<22:26:03, 3.22it/s] 30%|███ | 111712/371472 [8:52:58<21:34:42, 3.34it/s] 30%|███ | 111713/371472 [8:52:58<21:38:50, 3.33it/s] 30%|███ | 111714/371472 [8:52:59<21:40:08, 3.33it/s] 30%|███ | 111715/371472 [8:52:59<20:58:27, 3.44it/s] 30%|███ | 111716/371472 [8:52:59<21:28:01, 3.36it/s] 30%|███ | 111717/371472 [8:53:00<21:24:05, 3.37it/s] 30%|███ | 111718/371472 [8:53:00<20:55:01, 3.45it/s] 30%|███ | 111719/371472 [8:53:00<22:14:22, 3.24it/s] 30%|███ | 111720/371472 [8:53:01<21:31:18, 3.35it/s] {'loss': 3.3055, 'learning_rate': 7.296645047296859e-07, 'epoch': 4.81} + 30%|███ | 111720/371472 [8:53:01<21:31:18, 3.35it/s] 30%|███ | 111721/371472 [8:53:01<21:48:40, 3.31it/s] 30%|███ | 111722/371472 [8:53:01<20:42:09, 3.49it/s] 30%|███ | 111723/371472 [8:53:01<20:04:26, 3.59it/s] 30%|███ | 111724/371472 [8:53:02<19:37:13, 3.68it/s] 30%|███ | 111725/371472 [8:53:02<20:00:40, 3.61it/s] 30%|███ | 111726/371472 [8:53:02<21:00:12, 3.44it/s] 30%|███ | 111727/371472 [8:53:03<20:53:12, 3.45it/s] 30%|███ | 111728/371472 [8:53:03<21:15:24, 3.39it/s] 30%|███ | 111729/371472 [8:53:03<22:41:31, 3.18it/s] 30%|███ | 111730/371472 [8:53:03<21:22:04, 3.38it/s] 30%|███ | 111731/371472 [8:53:04<20:39:16, 3.49it/s] 30%|███ | 111732/371472 [8:53:04<20:42:13, 3.48it/s] 30%|███ | 111733/371472 [8:53:04<19:54:07, 3.63it/s] 30%|███ | 111734/371472 [8:53:05<20:09:08, 3.58it/s] 30%|███ | 111735/371472 [8:53:05<21:53:18, 3.30it/s] 30%|███ | 111736/371472 [8:53:05<21:54:03, 3.29it/s] 30%|███ | 111737/371472 [8:53:05<20:59:58, 3.44it/s] 30%|███ | 111738/371472 [8:53:06<20:34:46, 3.51it/s] 30%|███ | 111739/371472 [8:53:06<20:01:43, 3.60it/s] 30%|███ | 111740/371472 [8:53:06<20:12:15, 3.57it/s] {'loss': 3.383, 'learning_rate': 7.296160227542071e-07, 'epoch': 4.81} + 30%|███ | 111740/371472 [8:53:06<20:12:15, 3.57it/s] 30%|███ | 111741/371472 [8:53:07<21:59:15, 3.28it/s] 30%|███ | 111742/371472 [8:53:07<20:58:28, 3.44it/s] 30%|███ | 111743/371472 [8:53:07<20:42:02, 3.49it/s] 30%|███ | 111744/371472 [8:53:07<20:23:02, 3.54it/s] 30%|███ | 111745/371472 [8:53:08<22:07:43, 3.26it/s] 30%|███ | 111746/371472 [8:53:08<20:48:23, 3.47it/s] 30%|███ | 111747/371472 [8:53:08<20:23:58, 3.54it/s] 30%|███ | 111748/371472 [8:53:09<21:06:50, 3.42it/s] 30%|███ | 111749/371472 [8:53:09<20:49:17, 3.46it/s] 30%|███ | 111750/371472 [8:53:09<20:07:04, 3.59it/s] 30%|███ | 111751/371472 [8:53:09<20:04:41, 3.59it/s] 30%|███ | 111752/371472 [8:53:10<19:54:20, 3.62it/s] 30%|███ | 111753/371472 [8:53:10<19:46:20, 3.65it/s] 30%|███ | 111754/371472 [8:53:10<20:05:12, 3.59it/s] 30%|███ | 111755/371472 [8:53:11<19:58:34, 3.61it/s] 30%|███ | 111756/371472 [8:53:11<19:33:44, 3.69it/s] 30%|███ | 111757/371472 [8:53:11<19:43:00, 3.66it/s] 30%|███ | 111758/371472 [8:53:11<20:28:52, 3.52it/s] 30%|███ | 111759/371472 [8:53:12<24:56:01, 2.89it/s] 30%|███ | 111760/371472 [8:53:12<24:03:48, 3.00it/s] {'loss': 3.5977, 'learning_rate': 7.295675407787283e-07, 'epoch': 4.81} + 30%|███ | 111760/371472 [8:53:12<24:03:48, 3.00it/s] 30%|███ | 111761/371472 [8:53:12<22:36:15, 3.19it/s] 30%|███ | 111762/371472 [8:53:13<23:18:37, 3.09it/s] 30%|███ | 111763/371472 [8:53:13<22:00:23, 3.28it/s] 30%|███ | 111764/371472 [8:53:13<23:25:00, 3.08it/s] 30%|███ | 111765/371472 [8:53:14<22:01:14, 3.28it/s] 30%|███ | 111766/371472 [8:53:14<21:48:11, 3.31it/s] 30%|███ | 111767/371472 [8:53:14<20:29:50, 3.52it/s] 30%|███ | 111768/371472 [8:53:15<20:33:22, 3.51it/s] 30%|███ | 111769/371472 [8:53:15<21:52:01, 3.30it/s] 30%|███ | 111770/371472 [8:53:15<21:41:57, 3.32it/s] 30%|███ | 111771/371472 [8:53:15<21:07:41, 3.41it/s] 30%|███ | 111772/371472 [8:53:16<20:15:59, 3.56it/s] 30%|███ | 111773/371472 [8:53:16<20:01:04, 3.60it/s] 30%|███ | 111774/371472 [8:53:16<19:47:46, 3.64it/s] 30%|███ | 111775/371472 [8:53:16<19:42:00, 3.66it/s] 30%|███ | 111776/371472 [8:53:17<21:10:21, 3.41it/s] 30%|███ | 111777/371472 [8:53:17<21:55:01, 3.29it/s] 30%|███ | 111778/371472 [8:53:17<21:10:04, 3.41it/s] 30%|███ | 111779/371472 [8:53:18<20:45:20, 3.48it/s] 30%|███ | 111780/371472 [8:53:18<20:31:09, 3.52it/s] {'loss': 3.2123, 'learning_rate': 7.295190588032493e-07, 'epoch': 4.81} + 30%|███ | 111780/371472 [8:53:18<20:31:09, 3.52it/s] 30%|███ | 111781/371472 [8:53:18<22:23:38, 3.22it/s] 30%|███ | 111782/371472 [8:53:19<22:29:43, 3.21it/s] 30%|███ | 111783/371472 [8:53:19<21:31:04, 3.35it/s] 30%|███ | 111784/371472 [8:53:19<20:33:09, 3.51it/s] 30%|███ | 111785/371472 [8:53:19<20:00:27, 3.61it/s] 30%|███ | 111786/371472 [8:53:20<20:20:46, 3.55it/s] 30%|███ | 111787/371472 [8:53:20<20:59:17, 3.44it/s] 30%|███ | 111788/371472 [8:53:20<20:17:42, 3.55it/s] 30%|███ | 111789/371472 [8:53:21<19:50:56, 3.63it/s] 30%|███ | 111790/371472 [8:53:21<19:23:58, 3.72it/s] 30%|███ | 111791/371472 [8:53:21<19:01:17, 3.79it/s] 30%|███ | 111792/371472 [8:53:21<19:21:01, 3.73it/s] 30%|███ | 111793/371472 [8:53:22<19:57:27, 3.61it/s] 30%|███ | 111794/371472 [8:53:22<19:47:26, 3.64it/s] 30%|███ | 111795/371472 [8:53:22<19:00:41, 3.79it/s] 30%|███ | 111796/371472 [8:53:22<19:30:05, 3.70it/s] 30%|███ | 111797/371472 [8:53:23<20:15:34, 3.56it/s] 30%|███ | 111798/371472 [8:53:23<20:28:09, 3.52it/s] 30%|███ | 111799/371472 [8:53:23<19:57:43, 3.61it/s] 30%|��██ | 111800/371472 [8:53:24<19:46:51, 3.65it/s] {'loss': 3.4144, 'learning_rate': 7.294705768277704e-07, 'epoch': 4.82} + 30%|███ | 111800/371472 [8:53:24<19:46:51, 3.65it/s] 30%|███ | 111801/371472 [8:53:24<20:00:47, 3.60it/s] 30%|███ | 111802/371472 [8:53:24<19:39:33, 3.67it/s] 30%|███ | 111803/371472 [8:53:24<19:09:43, 3.76it/s] 30%|███ | 111804/371472 [8:53:25<19:08:31, 3.77it/s] 30%|███ | 111805/371472 [8:53:25<18:53:06, 3.82it/s] 30%|███ | 111806/371472 [8:53:25<19:09:46, 3.76it/s] 30%|███ | 111807/371472 [8:53:25<18:56:35, 3.81it/s] 30%|███ | 111808/371472 [8:53:26<19:30:36, 3.70it/s] 30%|███ | 111809/371472 [8:53:26<19:46:52, 3.65it/s] 30%|███ | 111810/371472 [8:53:26<19:32:33, 3.69it/s] 30%|███ | 111811/371472 [8:53:27<19:53:58, 3.62it/s] 30%|███ | 111812/371472 [8:53:27<20:11:46, 3.57it/s] 30%|███ | 111813/371472 [8:53:27<20:17:25, 3.55it/s] 30%|███ | 111814/371472 [8:53:27<20:10:04, 3.58it/s] 30%|███ | 111815/371472 [8:53:28<20:09:42, 3.58it/s] 30%|███ | 111816/371472 [8:53:28<21:02:34, 3.43it/s] 30%|███ | 111817/371472 [8:53:28<22:44:01, 3.17it/s] 30%|███ | 111818/371472 [8:53:29<21:06:35, 3.42it/s] 30%|███ | 111819/371472 [8:53:29<21:29:39, 3.36it/s] 30%|███ | 111820/371472 [8:53:29<21:44:37, 3.32it/s] {'loss': 3.3147, 'learning_rate': 7.294220948522916e-07, 'epoch': 4.82} + 30%|███ | 111820/371472 [8:53:29<21:44:37, 3.32it/s] 30%|███ | 111821/371472 [8:53:30<22:33:56, 3.20it/s] 30%|███ | 111822/371472 [8:53:30<24:53:28, 2.90it/s] 30%|███ | 111823/371472 [8:53:30<22:56:09, 3.14it/s] 30%|███ | 111824/371472 [8:53:30<21:42:57, 3.32it/s] 30%|███ | 111825/371472 [8:53:31<21:26:56, 3.36it/s] 30%|███ | 111826/371472 [8:53:31<21:41:13, 3.33it/s] 30%|███ | 111827/371472 [8:53:31<20:34:11, 3.51it/s] 30%|███ | 111828/371472 [8:53:32<20:10:08, 3.58it/s] 30%|███ | 111829/371472 [8:53:32<20:21:10, 3.54it/s] 30%|███ | 111830/371472 [8:53:32<20:49:08, 3.46it/s] 30%|███ | 111831/371472 [8:53:32<20:00:10, 3.61it/s] 30%|███ | 111832/371472 [8:53:33<21:38:45, 3.33it/s] 30%|███ | 111833/371472 [8:53:33<20:45:00, 3.48it/s] 30%|███ | 111834/371472 [8:53:33<20:15:15, 3.56it/s] 30%|███ | 111835/371472 [8:53:34<19:16:17, 3.74it/s] 30%|███ | 111836/371472 [8:53:34<18:53:43, 3.82it/s] 30%|███ | 111837/371472 [8:53:34<19:07:29, 3.77it/s] 30%|███ | 111838/371472 [8:53:34<20:11:45, 3.57it/s] 30%|███ | 111839/371472 [8:53:35<22:08:20, 3.26it/s] 30%|███ | 111840/371472 [8:53:35<21:07:32, 3.41it/s] {'loss': 3.3394, 'learning_rate': 7.293736128768127e-07, 'epoch': 4.82} + 30%|███ | 111840/371472 [8:53:35<21:07:32, 3.41it/s] 30%|███ | 111841/371472 [8:53:35<21:07:38, 3.41it/s] 30%|███ | 111842/371472 [8:53:36<20:44:40, 3.48it/s] 30%|███ | 111843/371472 [8:53:36<19:44:49, 3.65it/s] 30%|███ | 111844/371472 [8:53:36<19:06:19, 3.77it/s] 30%|███ | 111845/371472 [8:53:36<19:02:40, 3.79it/s] 30%|███ | 111846/371472 [8:53:37<19:14:49, 3.75it/s] 30%|███ | 111847/371472 [8:53:37<19:29:39, 3.70it/s] 30%|███ | 111848/371472 [8:53:37<19:04:53, 3.78it/s] 30%|███ | 111849/371472 [8:53:37<18:57:28, 3.80it/s] 30%|███ | 111850/371472 [8:53:38<19:38:55, 3.67it/s] 30%|███ | 111851/371472 [8:53:38<19:37:41, 3.67it/s] 30%|███ | 111852/371472 [8:53:38<20:40:40, 3.49it/s] 30%|███ | 111853/371472 [8:53:39<21:40:35, 3.33it/s] 30%|███ | 111854/371472 [8:53:39<20:51:58, 3.46it/s] 30%|███ | 111855/371472 [8:53:39<20:39:00, 3.49it/s] 30%|███ | 111856/371472 [8:53:39<21:29:58, 3.35it/s] 30%|███ | 111857/371472 [8:53:40<21:03:57, 3.42it/s] 30%|███ | 111858/371472 [8:53:40<21:21:35, 3.38it/s] 30%|███ | 111859/371472 [8:53:40<20:43:19, 3.48it/s] 30%|███ | 111860/371472 [8:53:41<20:23:17, 3.54it/s] {'loss': 3.4783, 'learning_rate': 7.293251309013337e-07, 'epoch': 4.82} + 30%|███ | 111860/371472 [8:53:41<20:23:17, 3.54it/s] 30%|███ | 111861/371472 [8:53:41<21:09:08, 3.41it/s] 30%|███ | 111862/371472 [8:53:41<20:58:11, 3.44it/s] 30%|███ | 111863/371472 [8:53:42<21:43:14, 3.32it/s] 30%|███ | 111864/371472 [8:53:42<21:15:19, 3.39it/s] 30%|███ | 111865/371472 [8:53:42<21:09:25, 3.41it/s] 30%|███ | 111866/371472 [8:53:42<22:18:00, 3.23it/s] 30%|███ | 111867/371472 [8:53:43<21:30:21, 3.35it/s] 30%|███ | 111868/371472 [8:53:43<21:47:00, 3.31it/s] 30%|███ | 111869/371472 [8:53:43<21:10:17, 3.41it/s] 30%|███ | 111870/371472 [8:53:44<21:22:15, 3.37it/s] 30%|███ | 111871/371472 [8:53:44<20:52:16, 3.46it/s] 30%|███ | 111872/371472 [8:53:44<21:11:49, 3.40it/s] 30%|███ | 111873/371472 [8:53:44<20:29:24, 3.52it/s] 30%|███ | 111874/371472 [8:53:45<20:03:45, 3.59it/s] 30%|███ | 111875/371472 [8:53:45<19:57:20, 3.61it/s] 30%|███ | 111876/371472 [8:53:45<19:53:15, 3.63it/s] 30%|███ | 111877/371472 [8:53:46<21:25:42, 3.37it/s] 30%|███ | 111878/371472 [8:53:46<20:35:25, 3.50it/s] 30%|███ | 111879/371472 [8:53:46<19:34:47, 3.68it/s] 30%|███ | 111880/371472 [8:53:46<21:12:39, 3.40it/s] {'loss': 3.1755, 'learning_rate': 7.292766489258548e-07, 'epoch': 4.82} + 30%|███ | 111880/371472 [8:53:46<21:12:39, 3.40it/s] 30%|███ | 111881/371472 [8:53:47<20:20:41, 3.54it/s] 30%|███ | 111882/371472 [8:53:47<20:00:14, 3.60it/s] 30%|███ | 111883/371472 [8:53:47<21:00:24, 3.43it/s] 30%|███ | 111884/371472 [8:53:48<20:30:50, 3.52it/s] 30%|███ | 111885/371472 [8:53:48<20:08:47, 3.58it/s] 30%|███ | 111886/371472 [8:53:48<19:49:00, 3.64it/s] 30%|███ | 111887/371472 [8:53:48<21:08:01, 3.41it/s] 30%|███ | 111888/371472 [8:53:49<23:22:30, 3.08it/s] 30%|███ | 111889/371472 [8:53:49<21:57:28, 3.28it/s] 30%|███ | 111890/371472 [8:53:49<22:23:40, 3.22it/s] 30%|███ | 111891/371472 [8:53:50<22:38:08, 3.19it/s] 30%|███ | 111892/371472 [8:53:50<21:22:41, 3.37it/s] 30%|███ | 111893/371472 [8:53:50<21:49:30, 3.30it/s] 30%|███ | 111894/371472 [8:53:51<20:43:06, 3.48it/s] 30%|███ | 111895/371472 [8:53:51<20:12:25, 3.57it/s] 30%|███ | 111896/371472 [8:53:51<22:07:52, 3.26it/s] 30%|███ | 111897/371472 [8:53:51<21:30:28, 3.35it/s] 30%|███ | 111898/371472 [8:53:52<21:29:05, 3.36it/s] 30%|███ | 111899/371472 [8:53:52<21:08:17, 3.41it/s] 30%|███ | 111900/371472 [8:53:52<21:45:51, 3.31it/s] {'loss': 3.2937, 'learning_rate': 7.29228166950376e-07, 'epoch': 4.82} + 30%|███ | 111900/371472 [8:53:52<21:45:51, 3.31it/s] 30%|███ | 111901/371472 [8:53:53<20:38:47, 3.49it/s] 30%|███ | 111902/371472 [8:53:53<20:02:24, 3.60it/s] 30%|███ | 111903/371472 [8:53:53<20:54:33, 3.45it/s] 30%|███ | 111904/371472 [8:53:53<21:02:22, 3.43it/s] 30%|███ | 111905/371472 [8:53:54<21:19:59, 3.38it/s] 30%|███ | 111906/371472 [8:53:54<21:22:09, 3.37it/s] 30%|███ | 111907/371472 [8:53:54<20:25:51, 3.53it/s] 30%|███ | 111908/371472 [8:53:55<19:48:49, 3.64it/s] 30%|███ | 111909/371472 [8:53:55<20:24:21, 3.53it/s] 30%|███ | 111910/371472 [8:53:55<20:17:16, 3.55it/s] 30%|███ | 111911/371472 [8:53:55<20:08:01, 3.58it/s] 30%|███ | 111912/371472 [8:53:56<19:50:37, 3.63it/s] 30%|███ | 111913/371472 [8:53:56<19:16:17, 3.74it/s] 30%|███ | 111914/371472 [8:53:56<19:37:31, 3.67it/s] 30%|███ | 111915/371472 [8:53:57<19:34:00, 3.68it/s] 30%|███ | 111916/371472 [8:53:57<20:36:15, 3.50it/s] 30%|███ | 111917/371472 [8:53:57<19:43:30, 3.66it/s] 30%|███ | 111918/371472 [8:53:57<19:14:11, 3.75it/s] 30%|███ | 111919/371472 [8:53:58<19:11:41, 3.76it/s] 30%|███ | 111920/371472 [8:53:58<19:52:29, 3.63it/s] {'loss': 3.2458, 'learning_rate': 7.291796849748972e-07, 'epoch': 4.82} + 30%|███ | 111920/371472 [8:53:58<19:52:29, 3.63it/s] 30%|███ | 111921/371472 [8:53:58<20:10:10, 3.57it/s] 30%|███ | 111922/371472 [8:53:58<20:00:12, 3.60it/s] 30%|███ | 111923/371472 [8:53:59<19:28:07, 3.70it/s] 30%|███ | 111924/371472 [8:53:59<21:20:18, 3.38it/s] 30%|███ | 111925/371472 [8:53:59<20:49:09, 3.46it/s] 30%|███ | 111926/371472 [8:54:00<20:25:44, 3.53it/s] 30%|███ | 111927/371472 [8:54:00<20:23:17, 3.54it/s] 30%|███ | 111928/371472 [8:54:00<20:39:25, 3.49it/s] 30%|███ | 111929/371472 [8:54:00<20:28:59, 3.52it/s] 30%|███ | 111930/371472 [8:54:01<20:03:51, 3.59it/s] 30%|███ | 111931/371472 [8:54:01<19:30:23, 3.70it/s] 30%|███ | 111932/371472 [8:54:01<20:42:14, 3.48it/s] 30%|███ | 111933/371472 [8:54:02<20:03:54, 3.59it/s] 30%|███ | 111934/371472 [8:54:02<20:10:02, 3.57it/s] 30%|███ | 111935/371472 [8:54:02<19:45:22, 3.65it/s] 30%|███ | 111936/371472 [8:54:02<19:33:43, 3.69it/s] 30%|███ | 111937/371472 [8:54:03<19:24:28, 3.71it/s] 30%|███ | 111938/371472 [8:54:03<20:00:45, 3.60it/s] 30%|███ | 111939/371472 [8:54:03<22:07:53, 3.26it/s] 30%|███ | 111940/371472 [8:54:04<21:20:22, 3.38it/s] {'loss': 3.3099, 'learning_rate': 7.291312029994182e-07, 'epoch': 4.82} + 30%|███ | 111940/371472 [8:54:04<21:20:22, 3.38it/s] 30%|███ | 111941/371472 [8:54:04<21:14:21, 3.39it/s] 30%|███ | 111942/371472 [8:54:04<21:54:15, 3.29it/s] 30%|███ | 111943/371472 [8:54:04<20:57:03, 3.44it/s] 30%|███ | 111944/371472 [8:54:05<21:00:30, 3.43it/s] 30%|███ | 111945/371472 [8:54:05<19:56:02, 3.62it/s] 30%|███ | 111946/371472 [8:54:05<21:14:15, 3.39it/s] 30%|███ | 111947/371472 [8:54:06<20:23:58, 3.53it/s] 30%|███ | 111948/371472 [8:54:06<20:17:51, 3.55it/s] 30%|███ | 111949/371472 [8:54:06<20:01:48, 3.60it/s] 30%|███ | 111950/371472 [8:54:06<19:16:02, 3.74it/s] 30%|███ | 111951/371472 [8:54:07<20:26:44, 3.53it/s] 30%|███ | 111952/371472 [8:54:07<20:05:23, 3.59it/s] 30%|███ | 111953/371472 [8:54:07<20:31:13, 3.51it/s] 30%|███ | 111954/371472 [8:54:08<20:28:39, 3.52it/s] 30%|███ | 111955/371472 [8:54:08<19:53:34, 3.62it/s] 30%|███ | 111956/371472 [8:54:08<19:16:36, 3.74it/s] 30%|███ | 111957/371472 [8:54:08<20:07:15, 3.58it/s] 30%|███ | 111958/371472 [8:54:09<20:05:10, 3.59it/s] 30%|███ | 111959/371472 [8:54:09<19:38:35, 3.67it/s] 30%|███ | 111960/371472 [8:54:09<19:48:28, 3.64it/s] {'loss': 3.531, 'learning_rate': 7.290827210239393e-07, 'epoch': 4.82} + 30%|███ | 111960/371472 [8:54:09<19:48:28, 3.64it/s] 30%|███ | 111961/371472 [8:54:09<19:33:53, 3.68it/s] 30%|███ | 111962/371472 [8:54:10<20:08:11, 3.58it/s] 30%|███ | 111963/371472 [8:54:10<19:58:07, 3.61it/s] 30%|███ | 111964/371472 [8:54:10<21:06:08, 3.42it/s] 30%|███ | 111965/371472 [8:54:11<20:23:19, 3.54it/s] 30%|███ | 111966/371472 [8:54:11<19:51:20, 3.63it/s] 30%|███ | 111967/371472 [8:54:11<20:38:07, 3.49it/s] 30%|███ | 111968/371472 [8:54:11<19:49:49, 3.64it/s] 30%|███ | 111969/371472 [8:54:12<21:32:51, 3.35it/s] 30%|███ | 111970/371472 [8:54:12<20:48:11, 3.47it/s] 30%|███ | 111971/371472 [8:54:12<20:22:43, 3.54it/s] 30%|███ | 111972/371472 [8:54:13<20:11:00, 3.57it/s] 30%|███ | 111973/371472 [8:54:13<21:34:09, 3.34it/s] 30%|███ | 111974/371472 [8:54:13<20:58:16, 3.44it/s] 30%|███ | 111975/371472 [8:54:13<21:10:25, 3.40it/s] 30%|███ | 111976/371472 [8:54:14<21:37:09, 3.33it/s] 30%|███ | 111977/371472 [8:54:14<21:00:35, 3.43it/s] 30%|███ | 111978/371472 [8:54:14<20:25:26, 3.53it/s] 30%|███ | 111979/371472 [8:54:15<20:35:03, 3.50it/s] 30%|███ | 111980/371472 [8:54:15<21:17:42, 3.38it/s] {'loss': 3.1037, 'learning_rate': 7.290342390484604e-07, 'epoch': 4.82} + 30%|███ | 111980/371472 [8:54:15<21:17:42, 3.38it/s] 30%|███ | 111981/371472 [8:54:15<20:30:06, 3.52it/s] 30%|███ | 111982/371472 [8:54:15<20:18:15, 3.55it/s] 30%|███ | 111983/371472 [8:54:16<21:22:47, 3.37it/s] 30%|███ | 111984/371472 [8:54:16<21:50:15, 3.30it/s] 30%|███ | 111985/371472 [8:54:16<21:02:54, 3.42it/s] 30%|███ | 111986/371472 [8:54:17<21:25:34, 3.36it/s] 30%|███ | 111987/371472 [8:54:17<21:17:08, 3.39it/s] 30%|███ | 111988/371472 [8:54:17<21:04:38, 3.42it/s] 30%|███ | 111989/371472 [8:54:18<20:33:55, 3.50it/s] 30%|███ | 111990/371472 [8:54:18<22:33:10, 3.20it/s] 30%|███ | 111991/371472 [8:54:18<21:25:32, 3.36it/s] 30%|███ | 111992/371472 [8:54:18<20:20:55, 3.54it/s] 30%|███ | 111993/371472 [8:54:19<19:16:02, 3.74it/s] 30%|███ | 111994/371472 [8:54:19<19:04:44, 3.78it/s] 30%|███ | 111995/371472 [8:54:19<19:24:45, 3.71it/s] 30%|███ | 111996/371472 [8:54:19<19:29:37, 3.70it/s] 30%|███ | 111997/371472 [8:54:20<20:11:41, 3.57it/s] 30%|███ | 111998/371472 [8:54:20<19:44:48, 3.65it/s] 30%|███ | 111999/371472 [8:54:20<19:38:33, 3.67it/s] 30%|███ | 112000/371472 [8:54:21<19:21:23, 3.72it/s] {'loss': 3.4912, 'learning_rate': 7.289857570729814e-07, 'epoch': 4.82} + 30%|███ | 112000/371472 [8:54:21<19:21:23, 3.72it/s] 30%|███ | 112001/371472 [8:54:21<20:48:04, 3.46it/s] 30%|███ | 112002/371472 [8:54:21<19:39:43, 3.67it/s] 30%|███ | 112003/371472 [8:54:21<20:11:59, 3.57it/s] 30%|███ | 112004/371472 [8:54:22<20:25:52, 3.53it/s] 30%|███ | 112005/371472 [8:54:22<20:21:02, 3.54it/s] 30%|███ | 112006/371472 [8:54:22<20:15:04, 3.56it/s] 30%|███ | 112007/371472 [8:54:23<20:54:45, 3.45it/s] 30%|███ | 112008/371472 [8:54:23<20:07:22, 3.58it/s] 30%|███ | 112009/371472 [8:54:23<20:47:04, 3.47it/s] 30%|███ | 112010/371472 [8:54:23<19:51:24, 3.63it/s] 30%|███ | 112011/371472 [8:54:24<19:29:47, 3.70it/s] 30%|███ | 112012/371472 [8:54:24<19:38:21, 3.67it/s] 30%|███ | 112013/371472 [8:54:24<19:22:12, 3.72it/s] 30%|███ | 112014/371472 [8:54:24<18:40:12, 3.86it/s] 30%|███ | 112015/371472 [8:54:25<18:54:26, 3.81it/s] 30%|███ | 112016/371472 [8:54:25<18:47:23, 3.84it/s] 30%|███ | 112017/371472 [8:54:25<18:47:53, 3.83it/s] 30%|███ | 112018/371472 [8:54:26<22:44:58, 3.17it/s] 30%|███ | 112019/371472 [8:54:26<21:31:44, 3.35it/s] 30%|███ | 112020/371472 [8:54:26<20:14:43, 3.56it/s] {'loss': 3.4789, 'learning_rate': 7.289372750975025e-07, 'epoch': 4.82} + 30%|███ | 112020/371472 [8:54:26<20:14:43, 3.56it/s] 30%|███ | 112021/371472 [8:54:26<20:17:34, 3.55it/s] 30%|███ | 112022/371472 [8:54:27<21:25:59, 3.36it/s] 30%|███ | 112023/371472 [8:54:27<21:31:48, 3.35it/s] 30%|███ | 112024/371472 [8:54:27<21:20:10, 3.38it/s] 30%|███ | 112025/371472 [8:54:28<21:11:36, 3.40it/s] 30%|███ | 112026/371472 [8:54:28<20:48:14, 3.46it/s] 30%|███ | 112027/371472 [8:54:28<20:33:04, 3.51it/s] 30%|███ | 112028/371472 [8:54:29<20:13:37, 3.56it/s] 30%|███ | 112029/371472 [8:54:29<19:54:19, 3.62it/s] 30%|███ | 112030/371472 [8:54:29<18:55:10, 3.81it/s] 30%|███ | 112031/371472 [8:54:29<18:29:08, 3.90it/s] 30%|███ | 112032/371472 [8:54:29<18:21:00, 3.93it/s] 30%|███ | 112033/371472 [8:54:30<18:15:22, 3.95it/s] 30%|███ | 112034/371472 [8:54:30<18:46:44, 3.84it/s] 30%|███ | 112035/371472 [8:54:30<18:34:17, 3.88it/s] 30%|███ | 112036/371472 [8:54:31<18:39:01, 3.86it/s] 30%|███ | 112037/371472 [8:54:31<18:34:01, 3.88it/s] 30%|███ | 112038/371472 [8:54:31<18:47:36, 3.83it/s] 30%|███ | 112039/371472 [8:54:31<18:56:16, 3.81it/s] 30%|███ | 112040/371472 [8:54:32<19:34:40, 3.68it/s] {'loss': 3.5027, 'learning_rate': 7.288887931220237e-07, 'epoch': 4.83} + 30%|███ | 112040/371472 [8:54:32<19:34:40, 3.68it/s] 30%|███ | 112041/371472 [8:54:32<19:05:18, 3.78it/s] 30%|███ | 112042/371472 [8:54:32<19:21:18, 3.72it/s] 30%|███ | 112043/371472 [8:54:32<20:20:14, 3.54it/s] 30%|███ | 112044/371472 [8:54:33<19:57:11, 3.61it/s] 30%|███ | 112045/371472 [8:54:33<20:00:57, 3.60it/s] 30%|███ | 112046/371472 [8:54:33<19:29:00, 3.70it/s] 30%|███ | 112047/371472 [8:54:34<19:38:13, 3.67it/s] 30%|███ | 112048/371472 [8:54:34<20:54:41, 3.45it/s] 30%|███ | 112049/371472 [8:54:34<19:58:49, 3.61it/s] 30%|███ | 112050/371472 [8:54:34<19:22:13, 3.72it/s] 30%|███ | 112051/371472 [8:54:35<19:56:52, 3.61it/s] 30%|███ | 112052/371472 [8:54:35<20:03:56, 3.59it/s] 30%|███ | 112053/371472 [8:54:35<19:47:27, 3.64it/s] 30%|███ | 112054/371472 [8:54:36<20:42:22, 3.48it/s] 30%|███ | 112055/371472 [8:54:36<19:41:06, 3.66it/s] 30%|███ | 112056/371472 [8:54:36<19:28:47, 3.70it/s] 30%|███ | 112057/371472 [8:54:36<19:07:39, 3.77it/s] 30%|███ | 112058/371472 [8:54:37<18:59:53, 3.79it/s] 30%|███ | 112059/371472 [8:54:37<19:01:06, 3.79it/s] 30%|███ | 112060/371472 [8:54:37<22:36:10, 3.19it/s] {'loss': 3.4467, 'learning_rate': 7.288403111465449e-07, 'epoch': 4.83} + 30%|███ | 112060/371472 [8:54:37<22:36:10, 3.19it/s] 30%|███ | 112061/371472 [8:54:38<21:38:00, 3.33it/s] 30%|███ | 112062/371472 [8:54:38<20:36:17, 3.50it/s] 30%|███ | 112063/371472 [8:54:38<20:31:41, 3.51it/s] 30%|███ | 112064/371472 [8:54:38<20:00:47, 3.60it/s] 30%|███ | 112065/371472 [8:54:39<20:11:54, 3.57it/s] 30%|███ | 112066/371472 [8:54:39<21:53:45, 3.29it/s] 30%|███ | 112067/371472 [8:54:39<22:14:36, 3.24it/s] 30%|███ | 112068/371472 [8:54:40<21:02:15, 3.43it/s] 30%|███ | 112069/371472 [8:54:40<21:21:51, 3.37it/s] 30%|███ | 112070/371472 [8:54:40<22:47:14, 3.16it/s] 30%|███ | 112071/371472 [8:54:40<22:25:31, 3.21it/s] 30%|███ | 112072/371472 [8:54:41<21:45:55, 3.31it/s] 30%|███ | 112073/371472 [8:54:41<22:50:22, 3.15it/s] 30%|███ | 112074/371472 [8:54:41<21:35:13, 3.34it/s] 30%|███ | 112075/371472 [8:54:42<21:33:35, 3.34it/s] 30%|███ | 112076/371472 [8:54:42<20:37:49, 3.49it/s] 30%|███ | 112077/371472 [8:54:42<19:56:01, 3.61it/s] 30%|███ | 112078/371472 [8:54:42<19:40:53, 3.66it/s] 30%|███ | 112079/371472 [8:54:43<19:19:37, 3.73it/s] 30%|███ | 112080/371472 [8:54:43<19:57:44, 3.61it/s] {'loss': 3.3026, 'learning_rate': 7.28791829171066e-07, 'epoch': 4.83} + 30%|███ | 112080/371472 [8:54:43<19:57:44, 3.61it/s] 30%|███ | 112081/371472 [8:54:43<20:10:06, 3.57it/s] 30%|███ | 112082/371472 [8:54:44<19:20:14, 3.73it/s] 30%|███ | 112083/371472 [8:54:44<19:30:13, 3.69it/s] 30%|███ | 112084/371472 [8:54:44<19:13:09, 3.75it/s] 30%|███ | 112085/371472 [8:54:44<18:40:05, 3.86it/s] 30%|███ | 112086/371472 [8:54:45<18:28:25, 3.90it/s] 30%|███ | 112087/371472 [8:54:45<18:43:45, 3.85it/s] 30%|███ | 112088/371472 [8:54:45<19:02:59, 3.78it/s] 30%|███ | 112089/371472 [8:54:45<19:27:54, 3.70it/s] 30%|███ | 112090/371472 [8:54:46<19:27:17, 3.70it/s] 30%|███ | 112091/371472 [8:54:46<19:24:09, 3.71it/s] 30%|███ | 112092/371472 [8:54:46<20:30:07, 3.51it/s] 30%|███ | 112093/371472 [8:54:47<23:29:48, 3.07it/s] 30%|███ | 112094/371472 [8:54:47<21:51:46, 3.30it/s] 30%|███ | 112095/371472 [8:54:47<20:48:50, 3.46it/s] 30%|███ | 112096/371472 [8:54:48<22:02:47, 3.27it/s] 30%|███ | 112097/371472 [8:54:48<21:22:09, 3.37it/s] 30%|███ | 112098/371472 [8:54:48<20:55:13, 3.44it/s] 30%|███ | 112099/371472 [8:54:48<20:15:16, 3.56it/s] 30%|███ | 112100/371472 [8:54:49<20:17:48, 3.55it/s] {'loss': 3.316, 'learning_rate': 7.28743347195587e-07, 'epoch': 4.83} + 30%|███ | 112100/371472 [8:54:49<20:17:48, 3.55it/s] 30%|███ | 112101/371472 [8:54:49<20:32:40, 3.51it/s] 30%|███ | 112102/371472 [8:54:49<20:16:48, 3.55it/s] 30%|███ | 112103/371472 [8:54:49<20:48:38, 3.46it/s] 30%|███ | 112104/371472 [8:54:50<19:55:32, 3.62it/s] 30%|███ | 112105/371472 [8:54:50<20:48:24, 3.46it/s] 30%|███ | 112106/371472 [8:54:50<20:29:16, 3.52it/s] 30%|███ | 112107/371472 [8:54:51<21:56:31, 3.28it/s] 30%|███ | 112108/371472 [8:54:51<21:24:20, 3.37it/s] 30%|███ | 112109/371472 [8:54:51<21:52:37, 3.29it/s] 30%|███ | 112110/371472 [8:54:52<22:13:26, 3.24it/s] 30%|███ | 112111/371472 [8:54:52<21:41:18, 3.32it/s] 30%|███ | 112112/371472 [8:54:52<21:17:38, 3.38it/s] 30%|███ | 112113/371472 [8:54:53<24:34:38, 2.93it/s] 30%|███ | 112114/371472 [8:54:53<22:43:29, 3.17it/s] 30%|███ | 112115/371472 [8:54:53<21:34:51, 3.34it/s] 30%|███ | 112116/371472 [8:54:53<20:46:52, 3.47it/s] 30%|███ | 112117/371472 [8:54:54<20:00:20, 3.60it/s] 30%|███ | 112118/371472 [8:54:54<20:47:01, 3.47it/s] 30%|███ | 112119/371472 [8:54:54<20:49:50, 3.46it/s] 30%|███ | 112120/371472 [8:54:54<19:41:03, 3.66it/s] {'loss': 3.2805, 'learning_rate': 7.28694865220108e-07, 'epoch': 4.83} + 30%|███ | 112120/371472 [8:54:54<19:41:03, 3.66it/s] 30%|███ | 112121/371472 [8:54:55<19:47:05, 3.64it/s] 30%|███ | 112122/371472 [8:54:55<22:13:06, 3.24it/s] 30%|███ | 112123/371472 [8:54:55<21:22:47, 3.37it/s] 30%|███ | 112124/371472 [8:54:56<20:17:18, 3.55it/s] 30%|███ | 112125/371472 [8:54:56<20:17:02, 3.55it/s] 30%|███ | 112126/371472 [8:54:56<19:51:59, 3.63it/s] 30%|███ | 112127/371472 [8:54:57<20:29:02, 3.52it/s] 30%|███ | 112128/371472 [8:54:57<19:33:25, 3.68it/s] 30%|███ | 112129/371472 [8:54:57<20:51:49, 3.45it/s] 30%|███ | 112130/371472 [8:54:57<20:06:51, 3.58it/s] 30%|███ | 112131/371472 [8:54:58<20:03:20, 3.59it/s] 30%|███ | 112132/371472 [8:54:58<19:19:04, 3.73it/s] 30%|███ | 112133/371472 [8:54:58<19:07:01, 3.77it/s] 30%|███ | 112134/371472 [8:54:58<19:31:19, 3.69it/s] 30%|███ | 112135/371472 [8:54:59<19:03:36, 3.78it/s] 30%|███ | 112136/371472 [8:54:59<19:11:56, 3.75it/s] 30%|███ | 112137/371472 [8:54:59<19:18:42, 3.73it/s] 30%|███ | 112138/371472 [8:54:59<19:41:56, 3.66it/s] 30%|███ | 112139/371472 [8:55:00<20:33:54, 3.50it/s] 30%|███ | 112140/371472 [8:55:00<20:00:58, 3.60it/s] {'loss': 3.3764, 'learning_rate': 7.286463832446293e-07, 'epoch': 4.83} + 30%|███ | 112140/371472 [8:55:00<20:00:58, 3.60it/s] 30%|███ | 112141/371472 [8:55:00<19:46:55, 3.64it/s] 30%|███ | 112142/371472 [8:55:01<21:28:18, 3.35it/s] 30%|███ | 112143/371472 [8:55:01<20:52:39, 3.45it/s] 30%|███ | 112144/371472 [8:55:01<20:27:47, 3.52it/s] 30%|███ | 112145/371472 [8:55:02<21:00:03, 3.43it/s] 30%|███ | 112146/371472 [8:55:02<24:26:25, 2.95it/s] 30%|███ | 112147/371472 [8:55:02<23:53:24, 3.02it/s] 30%|███ | 112148/371472 [8:55:03<22:21:39, 3.22it/s] 30%|███ | 112149/371472 [8:55:03<21:10:28, 3.40it/s] 30%|███ | 112150/371472 [8:55:03<21:49:45, 3.30it/s] 30%|███ | 112151/371472 [8:55:03<21:38:35, 3.33it/s] 30%|███ | 112152/371472 [8:55:04<21:26:32, 3.36it/s] 30%|███ | 112153/371472 [8:55:04<21:51:14, 3.30it/s] 30%|███ | 112154/371472 [8:55:04<20:54:17, 3.45it/s] 30%|███ | 112155/371472 [8:55:05<20:35:10, 3.50it/s] 30%|███ | 112156/371472 [8:55:05<20:16:00, 3.55it/s] 30%|███ | 112157/371472 [8:55:05<19:08:21, 3.76it/s] 30%|███ | 112158/371472 [8:55:05<20:50:18, 3.46it/s] 30%|███ | 112159/371472 [8:55:06<20:40:54, 3.48it/s] 30%|███ | 112160/371472 [8:55:06<19:54:02, 3.62it/s] {'loss': 3.384, 'learning_rate': 7.285979012691503e-07, 'epoch': 4.83} + 30%|███ | 112160/371472 [8:55:06<19:54:02, 3.62it/s] 30%|███ | 112161/371472 [8:55:06<20:25:52, 3.53it/s] 30%|███ | 112162/371472 [8:55:06<19:16:41, 3.74it/s] 30%|███ | 112163/371472 [8:55:07<20:28:10, 3.52it/s] 30%|███ | 112164/371472 [8:55:07<21:26:57, 3.36it/s] 30%|███ | 112165/371472 [8:55:07<20:11:10, 3.57it/s] 30%|███ | 112166/371472 [8:55:08<19:36:42, 3.67it/s] 30%|███ | 112167/371472 [8:55:08<19:48:37, 3.64it/s] 30%|███ | 112168/371472 [8:55:08<19:55:52, 3.61it/s] 30%|███ | 112169/371472 [8:55:08<19:35:35, 3.68it/s] 30%|███ | 112170/371472 [8:55:09<20:17:55, 3.55it/s] 30%|███ | 112171/371472 [8:55:09<20:10:59, 3.57it/s] 30%|███ | 112172/371472 [8:55:09<19:22:58, 3.72it/s] 30%|███ | 112173/371472 [8:55:10<20:37:29, 3.49it/s] 30%|███ | 112174/371472 [8:55:10<21:10:57, 3.40it/s] 30%|███ | 112175/371472 [8:55:10<20:51:06, 3.45it/s] 30%|███ | 112176/371472 [8:55:10<19:52:44, 3.62it/s] 30%|███ | 112177/371472 [8:55:11<19:46:17, 3.64it/s] 30%|███ | 112178/371472 [8:55:11<20:22:44, 3.53it/s] 30%|███ | 112179/371472 [8:55:11<21:34:12, 3.34it/s] 30%|███ | 112180/371472 [8:55:12<22:52:42, 3.15it/s] {'loss': 3.4574, 'learning_rate': 7.285494192936714e-07, 'epoch': 4.83} + 30%|███ | 112180/371472 [8:55:12<22:52:42, 3.15it/s] 30%|███ | 112181/371472 [8:55:12<21:28:55, 3.35it/s] 30%|███ | 112182/371472 [8:55:12<23:32:05, 3.06it/s] 30%|███ | 112183/371472 [8:55:13<22:40:32, 3.18it/s] 30%|███ | 112184/371472 [8:55:13<21:34:13, 3.34it/s] 30%|███ | 112185/371472 [8:55:13<20:57:36, 3.44it/s] 30%|███ | 112186/371472 [8:55:13<20:46:30, 3.47it/s] 30%|███ | 112187/371472 [8:55:14<19:53:52, 3.62it/s] 30%|███ | 112188/371472 [8:55:14<19:03:26, 3.78it/s] 30%|███ | 112189/371472 [8:55:14<18:50:48, 3.82it/s] 30%|███ | 112190/371472 [8:55:15<20:08:28, 3.58it/s] 30%|███ | 112191/371472 [8:55:15<21:41:16, 3.32it/s] 30%|███ | 112192/371472 [8:55:15<21:29:36, 3.35it/s] 30%|███ | 112193/371472 [8:55:15<21:41:42, 3.32it/s] 30%|███ | 112194/371472 [8:55:16<21:08:45, 3.41it/s] 30%|███ | 112195/371472 [8:55:16<20:01:38, 3.60it/s] 30%|███ | 112196/371472 [8:55:16<20:46:59, 3.47it/s] 30%|███ | 112197/371472 [8:55:17<19:39:41, 3.66it/s] 30%|███ | 112198/371472 [8:55:17<20:45:46, 3.47it/s] 30%|███ | 112199/371472 [8:55:17<21:25:59, 3.36it/s] 30%|███ | 112200/371472 [8:55:17<21:37:01, 3.33it/s] {'loss': 3.3161, 'learning_rate': 7.285009373181926e-07, 'epoch': 4.83} + 30%|███ | 112200/371472 [8:55:17<21:37:01, 3.33it/s] 30%|███ | 112201/371472 [8:55:18<21:38:16, 3.33it/s] 30%|███ | 112202/371472 [8:55:18<21:44:04, 3.31it/s] 30%|███ | 112203/371472 [8:55:18<21:46:02, 3.31it/s] 30%|███ | 112204/371472 [8:55:19<20:44:35, 3.47it/s] 30%|███ | 112205/371472 [8:55:19<19:58:12, 3.61it/s] 30%|███ | 112206/371472 [8:55:19<21:45:37, 3.31it/s] 30%|███ | 112207/371472 [8:55:20<20:50:05, 3.46it/s] 30%|███ | 112208/371472 [8:55:20<21:12:43, 3.40it/s] 30%|███ | 112209/371472 [8:55:20<21:13:37, 3.39it/s] 30%|███ | 112210/371472 [8:55:20<20:41:15, 3.48it/s] 30%|███ | 112211/371472 [8:55:21<20:23:21, 3.53it/s] 30%|███ | 112212/371472 [8:55:21<19:40:28, 3.66it/s] 30%|███ | 112213/371472 [8:55:21<19:20:34, 3.72it/s] 30%|███ | 112214/371472 [8:55:21<19:39:23, 3.66it/s] 30%|███ | 112215/371472 [8:55:22<19:01:17, 3.79it/s] 30%|███ | 112216/371472 [8:55:22<20:19:07, 3.54it/s] 30%|███ | 112217/371472 [8:55:22<21:25:55, 3.36it/s] 30%|███ | 112218/371472 [8:55:23<20:45:41, 3.47it/s] 30%|███ | 112219/371472 [8:55:23<20:10:35, 3.57it/s] 30%|███ | 112220/371472 [8:55:23<19:50:26, 3.63it/s] {'loss': 3.4587, 'learning_rate': 7.284524553427137e-07, 'epoch': 4.83} + 30%|███ | 112220/371472 [8:55:23<19:50:26, 3.63it/s] 30%|███ | 112221/371472 [8:55:23<19:15:17, 3.74it/s] 30%|███ | 112222/371472 [8:55:24<18:59:57, 3.79it/s] 30%|███ | 112223/371472 [8:55:24<19:16:29, 3.74it/s] 30%|███ | 112224/371472 [8:55:24<19:14:57, 3.74it/s] 30%|███ | 112225/371472 [8:55:24<19:36:04, 3.67it/s] 30%|███ | 112226/371472 [8:55:25<18:54:47, 3.81it/s] 30%|███ | 112227/371472 [8:55:25<21:55:00, 3.29it/s] 30%|███ | 112228/371472 [8:55:25<21:47:52, 3.30it/s] 30%|███ | 112229/371472 [8:55:26<22:22:09, 3.22it/s] 30%|███ | 112230/371472 [8:55:26<21:26:32, 3.36it/s] 30%|███ | 112231/371472 [8:55:26<20:21:09, 3.54it/s] 30%|███ | 112232/371472 [8:55:27<19:39:00, 3.66it/s] 30%|███ | 112233/371472 [8:55:27<19:15:43, 3.74it/s] 30%|███ | 112234/371472 [8:55:27<20:16:10, 3.55it/s] 30%|███ | 112235/371472 [8:55:27<19:57:28, 3.61it/s] 30%|███ | 112236/371472 [8:55:28<19:42:39, 3.65it/s] 30%|███ | 112237/371472 [8:55:28<21:54:54, 3.29it/s] 30%|███ | 112238/371472 [8:55:28<21:28:55, 3.35it/s] 30%|███ | 112239/371472 [8:55:29<21:51:12, 3.30it/s] 30%|███ | 112240/371472 [8:55:29<21:17:15, 3.38it/s] {'loss': 3.3729, 'learning_rate': 7.284039733672347e-07, 'epoch': 4.83} + 30%|███ | 112240/371472 [8:55:29<21:17:15, 3.38it/s] 30%|███ | 112241/371472 [8:55:29<21:08:51, 3.41it/s] 30%|███ | 112242/371472 [8:55:29<20:09:38, 3.57it/s] 30%|███ | 112243/371472 [8:55:30<19:15:04, 3.74it/s] 30%|███ | 112244/371472 [8:55:30<19:55:20, 3.61it/s] 30%|███ | 112245/371472 [8:55:30<19:49:54, 3.63it/s] 30%|███ | 112246/371472 [8:55:30<19:31:07, 3.69it/s] 30%|███ | 112247/371472 [8:55:31<18:59:24, 3.79it/s] 30%|███ | 112248/371472 [8:55:31<21:36:48, 3.33it/s] 30%|███ | 112249/371472 [8:55:31<21:26:44, 3.36it/s] 30%|███ | 112250/371472 [8:55:32<20:15:41, 3.55it/s] 30%|███ | 112251/371472 [8:55:32<20:25:20, 3.53it/s] 30%|███ | 112252/371472 [8:55:32<20:10:18, 3.57it/s] 30%|███ | 112253/371472 [8:55:33<20:58:41, 3.43it/s] 30%|███ | 112254/371472 [8:55:33<19:40:17, 3.66it/s] 30%|███ | 112255/371472 [8:55:33<20:02:42, 3.59it/s] 30%|███ | 112256/371472 [8:55:33<19:46:47, 3.64it/s] 30%|███ | 112257/371472 [8:55:34<20:05:15, 3.58it/s] 30%|███ | 112258/371472 [8:55:34<20:02:34, 3.59it/s] 30%|███ | 112259/371472 [8:55:34<21:47:21, 3.30it/s] 30%|███ | 112260/371472 [8:55:35<22:01:33, 3.27it/s] {'loss': 3.4642, 'learning_rate': 7.283554913917558e-07, 'epoch': 4.84} + 30%|███ | 112260/371472 [8:55:35<22:01:33, 3.27it/s] 30%|███ | 112261/371472 [8:55:35<22:39:12, 3.18it/s] 30%|███ | 112262/371472 [8:55:35<21:14:18, 3.39it/s] 30%|███ | 112263/371472 [8:55:35<21:32:26, 3.34it/s] 30%|███ | 112264/371472 [8:55:36<22:07:16, 3.25it/s] 30%|███ | 112265/371472 [8:55:36<21:34:30, 3.34it/s] 30%|███ | 112266/371472 [8:55:36<22:05:26, 3.26it/s] 30%|███ | 112267/371472 [8:55:37<23:24:56, 3.07it/s] 30%|███ | 112268/371472 [8:55:37<22:30:51, 3.20it/s] 30%|███ | 112269/371472 [8:55:37<21:19:31, 3.38it/s] 30%|███ | 112270/371472 [8:55:38<21:16:57, 3.38it/s] 30%|███ | 112271/371472 [8:55:38<22:55:51, 3.14it/s] 30%|███ | 112272/371472 [8:55:38<22:02:07, 3.27it/s] 30%|███ | 112273/371472 [8:55:39<21:31:39, 3.34it/s] 30%|███ | 112274/371472 [8:55:39<20:28:00, 3.52it/s] 30%|███ | 112275/371472 [8:55:39<20:20:20, 3.54it/s] 30%|███ | 112276/371472 [8:55:39<20:53:43, 3.45it/s] 30%|███ | 112277/371472 [8:55:40<20:31:58, 3.51it/s] 30%|███ | 112278/371472 [8:55:40<20:12:31, 3.56it/s] 30%|███ | 112279/371472 [8:55:40<19:38:25, 3.67it/s] 30%|███ | 112280/371472 [8:55:40<19:38:42, 3.66it/s] {'loss': 3.3543, 'learning_rate': 7.28307009416277e-07, 'epoch': 4.84} + 30%|███ | 112280/371472 [8:55:40<19:38:42, 3.66it/s] 30%|███ | 112281/371472 [8:55:41<18:58:53, 3.79it/s] 30%|███ | 112282/371472 [8:55:41<19:01:36, 3.78it/s] 30%|███ | 112283/371472 [8:55:41<21:05:08, 3.41it/s] 30%|███ | 112284/371472 [8:55:42<20:37:15, 3.49it/s] 30%|███ | 112285/371472 [8:55:42<19:41:05, 3.66it/s] 30%|███ | 112286/371472 [8:55:42<20:25:26, 3.53it/s] 30%|███ | 112287/371472 [8:55:42<20:21:14, 3.54it/s] 30%|███ | 112288/371472 [8:55:43<20:25:25, 3.53it/s] 30%|███ | 112289/371472 [8:55:43<19:56:12, 3.61it/s] 30%|███ | 112290/371472 [8:55:43<19:24:10, 3.71it/s] 30%|███ | 112291/371472 [8:55:44<21:56:53, 3.28it/s] 30%|███ | 112292/371472 [8:55:44<21:43:17, 3.31it/s] 30%|███ | 112293/371472 [8:55:44<20:51:26, 3.45it/s] 30%|███ | 112294/371472 [8:55:44<20:30:09, 3.51it/s] 30%|███ | 112295/371472 [8:55:45<20:33:21, 3.50it/s] 30%|███ | 112296/371472 [8:55:45<20:24:03, 3.53it/s] 30%|███ | 112297/371472 [8:55:45<21:03:09, 3.42it/s] 30%|███ | 112298/371472 [8:55:46<21:07:36, 3.41it/s] 30%|███ | 112299/371472 [8:55:46<20:22:25, 3.53it/s] 30%|███ | 112300/371472 [8:55:46<20:40:37, 3.48it/s] {'loss': 3.3695, 'learning_rate': 7.282585274407982e-07, 'epoch': 4.84} + 30%|███ | 112300/371472 [8:55:46<20:40:37, 3.48it/s] 30%|███ | 112301/371472 [8:55:46<21:19:49, 3.38it/s] 30%|███ | 112302/371472 [8:55:47<22:05:21, 3.26it/s] 30%|███ | 112303/371472 [8:55:47<21:22:17, 3.37it/s] 30%|███ | 112304/371472 [8:55:47<20:39:57, 3.48it/s] 30%|███ | 112305/371472 [8:55:48<19:37:20, 3.67it/s] 30%|███ | 112306/371472 [8:55:48<19:06:36, 3.77it/s] 30%|███ | 112307/371472 [8:55:48<21:32:55, 3.34it/s] 30%|███ | 112308/371472 [8:55:49<22:43:52, 3.17it/s] 30%|███ | 112309/371472 [8:55:49<22:12:07, 3.24it/s] 30%|███ | 112310/371472 [8:55:49<20:54:38, 3.44it/s] 30%|███ | 112311/371472 [8:55:49<20:10:15, 3.57it/s] 30%|███ | 112312/371472 [8:55:50<19:54:57, 3.61it/s] 30%|███ | 112313/371472 [8:55:50<20:06:07, 3.58it/s] 30%|███ | 112314/371472 [8:55:50<20:15:41, 3.55it/s] 30%|███ | 112315/371472 [8:55:50<19:53:18, 3.62it/s] 30%|███ | 112316/371472 [8:55:51<20:02:20, 3.59it/s] 30%|███ | 112317/371472 [8:55:51<21:54:44, 3.29it/s] 30%|███ | 112318/371472 [8:55:51<21:56:29, 3.28it/s] 30%|███ | 112319/371472 [8:55:52<20:29:58, 3.51it/s] 30%|███ | 112320/371472 [8:55:52<20:43:22, 3.47it/s] {'loss': 3.4284, 'learning_rate': 7.282100454653192e-07, 'epoch': 4.84} + 30%|███ | 112320/371472 [8:55:52<20:43:22, 3.47it/s] 30%|███ | 112321/371472 [8:55:52<21:21:19, 3.37it/s] 30%|███ | 112322/371472 [8:55:53<20:34:55, 3.50it/s] 30%|███ | 112323/371472 [8:55:53<20:24:58, 3.53it/s] 30%|███ | 112324/371472 [8:55:53<20:32:27, 3.50it/s] 30%|███ | 112325/371472 [8:55:53<19:47:48, 3.64it/s] 30%|███ | 112326/371472 [8:55:54<23:34:10, 3.05it/s] 30%|███ | 112327/371472 [8:55:54<23:33:52, 3.05it/s] 30%|███ | 112328/371472 [8:55:54<23:34:39, 3.05it/s] 30%|███ | 112329/371472 [8:55:55<21:56:22, 3.28it/s] 30%|███ | 112330/371472 [8:55:55<22:00:40, 3.27it/s] 30%|███ | 112331/371472 [8:55:55<21:30:01, 3.35it/s] 30%|███ | 112332/371472 [8:55:56<20:42:07, 3.48it/s] 30%|███ | 112333/371472 [8:55:56<21:49:29, 3.30it/s] 30%|███ | 112334/371472 [8:55:56<20:58:53, 3.43it/s] 30%|███ | 112335/371472 [8:55:56<19:53:18, 3.62it/s] 30%|███ | 112336/371472 [8:55:57<19:52:03, 3.62it/s] 30%|███ | 112337/371472 [8:55:57<20:10:28, 3.57it/s] 30%|███ | 112338/371472 [8:55:57<20:17:29, 3.55it/s] 30%|███ | 112339/371472 [8:55:58<20:13:20, 3.56it/s] 30%|███ | 112340/371472 [8:55:58<19:27:56, 3.70it/s] {'loss': 3.3576, 'learning_rate': 7.281615634898403e-07, 'epoch': 4.84} + 30%|███ | 112340/371472 [8:55:58<19:27:56, 3.70it/s] 30%|███ | 112341/371472 [8:55:58<19:07:23, 3.76it/s] 30%|███ | 112342/371472 [8:55:58<20:19:50, 3.54it/s] 30%|███ | 112343/371472 [8:55:59<19:35:21, 3.67it/s] 30%|███ | 112344/371472 [8:55:59<19:15:35, 3.74it/s] 30%|███ | 112345/371472 [8:55:59<19:41:46, 3.65it/s] 30%|███ | 112346/371472 [8:55:59<18:57:22, 3.80it/s] 30%|███ | 112347/371472 [8:56:00<19:30:42, 3.69it/s] 30%|███ | 112348/371472 [8:56:00<19:28:44, 3.70it/s] 30%|███ | 112349/371472 [8:56:00<19:48:20, 3.63it/s] 30%|███ | 112350/371472 [8:56:01<21:15:24, 3.39it/s] 30%|███ | 112351/371472 [8:56:01<20:14:14, 3.56it/s] 30%|███ | 112352/371472 [8:56:01<21:06:28, 3.41it/s] 30%|███ | 112353/371472 [8:56:01<21:27:46, 3.35it/s] 30%|███ | 112354/371472 [8:56:02<20:10:45, 3.57it/s] 30%|███ | 112355/371472 [8:56:02<19:47:25, 3.64it/s] 30%|███ | 112356/371472 [8:56:02<19:41:16, 3.66it/s] 30%|███ | 112357/371472 [8:56:02<20:10:16, 3.57it/s] 30%|███ | 112358/371472 [8:56:03<19:51:05, 3.63it/s] 30%|███ | 112359/371472 [8:56:03<19:22:28, 3.71it/s] 30%|███ | 112360/371472 [8:56:03<21:10:53, 3.40it/s] {'loss': 3.4828, 'learning_rate': 7.281130815143614e-07, 'epoch': 4.84} + 30%|███ | 112360/371472 [8:56:03<21:10:53, 3.40it/s] 30%|███ | 112361/371472 [8:56:04<24:21:21, 2.96it/s] 30%|███ | 112362/371472 [8:56:04<26:35:02, 2.71it/s] 30%|███ | 112363/371472 [8:56:04<23:55:50, 3.01it/s] 30%|███ | 112364/371472 [8:56:05<21:52:56, 3.29it/s] 30%|███ | 112365/371472 [8:56:05<21:35:41, 3.33it/s] 30%|███ | 112366/371472 [8:56:05<20:22:26, 3.53it/s] 30%|███ | 112367/371472 [8:56:06<21:30:17, 3.35it/s] 30%|███ | 112368/371472 [8:56:06<20:30:00, 3.51it/s] 30%|███ | 112369/371472 [8:56:06<19:49:31, 3.63it/s] 30%|███ | 112370/371472 [8:56:06<20:18:28, 3.54it/s] 30%|███ | 112371/371472 [8:56:07<19:22:39, 3.71it/s] 30%|███ | 112372/371472 [8:56:07<20:15:40, 3.55it/s] 30%|███ | 112373/371472 [8:56:07<19:37:35, 3.67it/s] 30%|███ | 112374/371472 [8:56:07<18:52:06, 3.81it/s] 30%|███ | 112375/371472 [8:56:08<18:52:07, 3.81it/s] 30%|███ | 112376/371472 [8:56:08<18:55:07, 3.80it/s] 30%|███ | 112377/371472 [8:56:08<19:27:45, 3.70it/s] 30%|███ | 112378/371472 [8:56:09<18:56:14, 3.80it/s] 30%|███ | 112379/371472 [8:56:09<18:43:37, 3.84it/s] 30%|███ | 112380/371472 [8:56:09<18:55:58, 3.80it/s] {'loss': 3.4506, 'learning_rate': 7.280645995388825e-07, 'epoch': 4.84} + 30%|███ | 112380/371472 [8:56:09<18:55:58, 3.80it/s] 30%|███ | 112381/371472 [8:56:09<18:35:25, 3.87it/s] 30%|███ | 112382/371472 [8:56:10<19:15:56, 3.74it/s] 30%|███ | 112383/371472 [8:56:10<19:29:42, 3.69it/s] 30%|███ | 112384/371472 [8:56:10<20:08:51, 3.57it/s] 30%|███ | 112385/371472 [8:56:10<20:21:11, 3.54it/s] 30%|███ | 112386/371472 [8:56:11<19:53:22, 3.62it/s] 30%|███ | 112387/371472 [8:56:11<19:29:43, 3.69it/s] 30%|███ | 112388/371472 [8:56:11<20:31:35, 3.51it/s] 30%|███ | 112389/371472 [8:56:12<22:14:51, 3.23it/s] 30%|███ | 112390/371472 [8:56:12<20:56:49, 3.44it/s] 30%|███ | 112391/371472 [8:56:12<19:53:56, 3.62it/s] 30%|███ | 112392/371472 [8:56:12<20:27:10, 3.52it/s] 30%|███ | 112393/371472 [8:56:13<21:36:46, 3.33it/s] 30%|███ | 112394/371472 [8:56:13<20:33:17, 3.50it/s] 30%|███ | 112395/371472 [8:56:13<21:26:53, 3.36it/s] 30%|███ | 112396/371472 [8:56:14<21:19:15, 3.38it/s] 30%|███ | 112397/371472 [8:56:14<21:08:18, 3.40it/s] 30%|███ | 112398/371472 [8:56:14<20:01:16, 3.59it/s] 30%|███ | 112399/371472 [8:56:14<20:49:33, 3.46it/s] 30%|███ | 112400/371472 [8:56:15<21:12:01, 3.39it/s] {'loss': 3.2465, 'learning_rate': 7.280161175634036e-07, 'epoch': 4.84} + 30%|███ | 112400/371472 [8:56:15<21:12:01, 3.39it/s] 30%|███ | 112401/371472 [8:56:15<21:50:15, 3.30it/s] 30%|███ | 112402/371472 [8:56:15<21:50:53, 3.29it/s] 30%|███ | 112403/371472 [8:56:16<20:49:10, 3.46it/s] 30%|███ | 112404/371472 [8:56:16<19:48:40, 3.63it/s] 30%|███ | 112405/371472 [8:56:16<19:52:49, 3.62it/s] 30%|███ | 112406/371472 [8:56:16<19:54:27, 3.61it/s] 30%|███ | 112407/371472 [8:56:17<20:03:58, 3.59it/s] 30%|███ | 112408/371472 [8:56:17<19:57:51, 3.60it/s] 30%|███ | 112409/371472 [8:56:17<19:29:37, 3.69it/s] 30%|███ | 112410/371472 [8:56:18<20:38:16, 3.49it/s] 30%|███ | 112411/371472 [8:56:18<19:57:49, 3.60it/s] 30%|███ | 112412/371472 [8:56:18<20:32:30, 3.50it/s] 30%|███ | 112413/371472 [8:56:18<20:00:48, 3.60it/s] 30%|███ | 112414/371472 [8:56:19<20:08:00, 3.57it/s] 30%|███ | 112415/371472 [8:56:19<20:14:11, 3.56it/s] 30%|███ | 112416/371472 [8:56:19<20:12:57, 3.56it/s] 30%|███ | 112417/371472 [8:56:20<21:14:15, 3.39it/s] 30%|███ | 112418/371472 [8:56:20<20:32:05, 3.50it/s] 30%|███ | 112419/371472 [8:56:20<20:58:47, 3.43it/s] 30%|███ | 112420/371472 [8:56:20<20:51:07, 3.45it/s] {'loss': 3.4101, 'learning_rate': 7.279676355879247e-07, 'epoch': 4.84} + 30%|███ | 112420/371472 [8:56:20<20:51:07, 3.45it/s] 30%|███ | 112421/371472 [8:56:21<21:11:01, 3.40it/s] 30%|███ | 112422/371472 [8:56:21<21:09:01, 3.40it/s] 30%|███ | 112423/371472 [8:56:21<20:15:14, 3.55it/s] 30%|███ | 112424/371472 [8:56:22<19:59:54, 3.60it/s] 30%|███ | 112425/371472 [8:56:22<19:58:30, 3.60it/s] 30%|███ | 112426/371472 [8:56:22<20:41:06, 3.48it/s] 30%|███ | 112427/371472 [8:56:22<21:30:22, 3.35it/s] 30%|███ | 112428/371472 [8:56:23<20:45:36, 3.47it/s] 30%|███ | 112429/371472 [8:56:23<20:41:46, 3.48it/s] 30%|███ | 112430/371472 [8:56:23<20:01:06, 3.59it/s] 30%|███ | 112431/371472 [8:56:24<19:30:00, 3.69it/s] 30%|███ | 112432/371472 [8:56:24<19:52:27, 3.62it/s] 30%|███ | 112433/371472 [8:56:24<20:36:25, 3.49it/s] 30%|███ | 112434/371472 [8:56:24<21:06:01, 3.41it/s] 30%|███ | 112435/371472 [8:56:25<20:51:18, 3.45it/s] 30%|███ | 112436/371472 [8:56:25<20:20:08, 3.54it/s] 30%|███ | 112437/371472 [8:56:25<19:54:36, 3.61it/s] 30%|███ | 112438/371472 [8:56:26<19:28:47, 3.69it/s] 30%|███ | 112439/371472 [8:56:26<19:57:14, 3.61it/s] 30%|███ | 112440/371472 [8:56:26<21:35:02, 3.33it/s] {'loss': 3.4494, 'learning_rate': 7.279191536124459e-07, 'epoch': 4.84} + 30%|███ | 112440/371472 [8:56:26<21:35:02, 3.33it/s] 30%|███ | 112441/371472 [8:56:26<20:41:32, 3.48it/s] 30%|███ | 112442/371472 [8:56:27<19:36:25, 3.67it/s] 30%|███ | 112443/371472 [8:56:27<20:08:01, 3.57it/s] 30%|███ | 112444/371472 [8:56:27<19:45:46, 3.64it/s] 30%|███ | 112445/371472 [8:56:27<19:19:45, 3.72it/s] 30%|███ | 112446/371472 [8:56:28<18:53:28, 3.81it/s] 30%|███ | 112447/371472 [8:56:28<18:26:40, 3.90it/s] 30%|███ | 112448/371472 [8:56:28<19:23:59, 3.71it/s] 30%|███ | 112449/371472 [8:56:29<19:57:59, 3.60it/s] 30%|███ | 112450/371472 [8:56:29<19:39:50, 3.66it/s] 30%|███ | 112451/371472 [8:56:29<20:34:22, 3.50it/s] 30%|███ | 112452/371472 [8:56:29<21:16:00, 3.38it/s] 30%|███ | 112453/371472 [8:56:30<22:13:09, 3.24it/s] 30%|███ | 112454/371472 [8:56:30<21:16:53, 3.38it/s] 30%|███ | 112455/371472 [8:56:30<21:08:42, 3.40it/s] 30%|███ | 112456/371472 [8:56:31<22:07:23, 3.25it/s] 30%|███ | 112457/371472 [8:56:31<22:54:20, 3.14it/s] 30%|███ | 112458/371472 [8:56:31<22:51:46, 3.15it/s] 30%|███ | 112459/371472 [8:56:32<22:22:33, 3.22it/s] 30%|███ | 112460/371472 [8:56:32<22:21:33, 3.22it/s] {'loss': 3.6476, 'learning_rate': 7.27870671636967e-07, 'epoch': 4.84} + 30%|███ | 112460/371472 [8:56:32<22:21:33, 3.22it/s] 30%|███ | 112461/371472 [8:56:32<21:30:34, 3.34it/s] 30%|███ | 112462/371472 [8:56:32<20:27:11, 3.52it/s] 30%|███ | 112463/371472 [8:56:33<20:06:31, 3.58it/s] 30%|███ | 112464/371472 [8:56:33<20:18:39, 3.54it/s] 30%|███ | 112465/371472 [8:56:33<19:48:27, 3.63it/s] 30%|███ | 112466/371472 [8:56:34<20:46:41, 3.46it/s] 30%|███ | 112467/371472 [8:56:34<21:36:06, 3.33it/s] 30%|███ | 112468/371472 [8:56:34<20:24:23, 3.53it/s] 30%|███ | 112469/371472 [8:56:34<19:54:08, 3.61it/s] 30%|███ | 112470/371472 [8:56:35<19:37:37, 3.67it/s] 30%|███ | 112471/371472 [8:56:35<19:37:35, 3.67it/s] 30%|███ | 112472/371472 [8:56:35<19:25:38, 3.70it/s] 30%|███ | 112473/371472 [8:56:36<19:40:46, 3.66it/s] 30%|███ | 112474/371472 [8:56:36<20:11:08, 3.56it/s] 30%|███ | 112475/371472 [8:56:36<20:51:15, 3.45it/s] 30%|███ | 112476/371472 [8:56:36<21:04:15, 3.41it/s] 30%|███ | 112477/371472 [8:56:37<20:03:38, 3.59it/s] 30%|███ | 112478/371472 [8:56:37<21:00:53, 3.42it/s] 30%|███ | 112479/371472 [8:56:37<20:52:23, 3.45it/s] 30%|███ | 112480/371472 [8:56:38<20:20:37, 3.54it/s] {'loss': 3.1845, 'learning_rate': 7.27822189661488e-07, 'epoch': 4.84} + 30%|███ | 112480/371472 [8:56:38<20:20:37, 3.54it/s] 30%|███ | 112481/371472 [8:56:38<19:54:40, 3.61it/s] 30%|███ | 112482/371472 [8:56:38<20:43:05, 3.47it/s] 30%|███ | 112483/371472 [8:56:38<21:52:09, 3.29it/s] 30%|███ | 112484/371472 [8:56:39<21:33:06, 3.34it/s] 30%|███ | 112485/371472 [8:56:39<21:41:29, 3.32it/s] 30%|███ | 112486/371472 [8:56:39<21:24:56, 3.36it/s] 30%|███ | 112487/371472 [8:56:40<22:16:22, 3.23it/s] 30%|███ | 112488/371472 [8:56:40<21:17:24, 3.38it/s] 30%|███ | 112489/371472 [8:56:40<22:42:38, 3.17it/s] 30%|███ | 112490/371472 [8:56:41<21:28:32, 3.35it/s] 30%|███ | 112491/371472 [8:56:41<20:20:39, 3.54it/s] 30%|███ | 112492/371472 [8:56:41<20:30:49, 3.51it/s] 30%|███ | 112493/371472 [8:56:41<20:21:57, 3.53it/s] 30%|███ | 112494/371472 [8:56:42<22:04:55, 3.26it/s] 30%|███ | 112495/371472 [8:56:42<21:18:01, 3.38it/s] 30%|███ | 112496/371472 [8:56:42<21:21:18, 3.37it/s] 30%|███ | 112497/371472 [8:56:43<20:41:32, 3.48it/s] 30%|███ | 112498/371472 [8:56:43<20:08:39, 3.57it/s] 30%|███ | 112499/371472 [8:56:43<20:29:44, 3.51it/s] 30%|███ | 112500/371472 [8:56:43<20:37:49, 3.49it/s] {'loss': 3.324, 'learning_rate': 7.277737076860091e-07, 'epoch': 4.85} + 30%|███ | 112500/371472 [8:56:43<20:37:49, 3.49it/s] 30%|███ | 112501/371472 [8:56:44<20:36:54, 3.49it/s] 30%|███ | 112502/371472 [8:56:44<19:50:01, 3.63it/s] 30%|███ | 112503/371472 [8:56:44<20:46:40, 3.46it/s] 30%|███ | 112504/371472 [8:56:45<20:33:48, 3.50it/s] 30%|███ | 112505/371472 [8:56:45<20:48:55, 3.46it/s] 30%|███ | 112506/371472 [8:56:45<19:57:36, 3.60it/s] 30%|███ | 112507/371472 [8:56:45<19:40:46, 3.66it/s] 30%|███ | 112508/371472 [8:56:46<19:41:55, 3.65it/s] 30%|███ | 112509/371472 [8:56:46<19:04:18, 3.77it/s] 30%|███ | 112510/371472 [8:56:46<19:13:32, 3.74it/s] 30%|███ | 112511/371472 [8:56:46<18:59:52, 3.79it/s] 30%|███ | 112512/371472 [8:56:47<19:01:44, 3.78it/s] 30%|███ | 112513/371472 [8:56:47<18:35:38, 3.87it/s] 30%|███ | 112514/371472 [8:56:47<19:53:54, 3.61it/s] 30%|███ | 112515/371472 [8:56:48<19:45:05, 3.64it/s] 30%|███ | 112516/371472 [8:56:48<19:38:36, 3.66it/s] 30%|███ | 112517/371472 [8:56:48<19:30:15, 3.69it/s] 30%|███ | 112518/371472 [8:56:48<19:59:02, 3.60it/s] 30%|███ | 112519/371472 [8:56:49<20:21:59, 3.53it/s] 30%|███ | 112520/371472 [8:56:49<20:24:40, 3.52it/s] {'loss': 3.3586, 'learning_rate': 7.277252257105303e-07, 'epoch': 4.85} + 30%|███ | 112520/371472 [8:56:49<20:24:40, 3.52it/s] 30%|███ | 112521/371472 [8:56:49<20:48:45, 3.46it/s] 30%|███ | 112522/371472 [8:56:50<20:29:17, 3.51it/s] 30%|███ | 112523/371472 [8:56:50<20:00:03, 3.60it/s] 30%|███ | 112524/371472 [8:56:50<19:36:46, 3.67it/s] 30%|███ | 112525/371472 [8:56:50<20:13:09, 3.56it/s] 30%|███ | 112526/371472 [8:56:51<19:51:03, 3.62it/s] 30%|███ | 112527/371472 [8:56:51<20:18:54, 3.54it/s] 30%|███ | 112528/371472 [8:56:51<19:57:24, 3.60it/s] 30%|███ | 112529/371472 [8:56:52<21:31:39, 3.34it/s] 30%|███ | 112530/371472 [8:56:52<21:22:39, 3.36it/s] 30%|███ | 112531/371472 [8:56:52<21:22:57, 3.36it/s] 30%|███ | 112532/371472 [8:56:52<21:23:03, 3.36it/s] 30%|███ | 112533/371472 [8:56:53<20:49:11, 3.45it/s] 30%|███ | 112534/371472 [8:56:53<20:38:43, 3.48it/s] 30%|███ | 112535/371472 [8:56:53<20:50:15, 3.45it/s] 30%|███ | 112536/371472 [8:56:54<20:30:33, 3.51it/s] 30%|███ | 112537/371472 [8:56:54<20:24:34, 3.52it/s] 30%|███ | 112538/371472 [8:56:54<21:33:14, 3.34it/s] 30%|███ | 112539/371472 [8:56:54<20:53:20, 3.44it/s] 30%|███ | 112540/371472 [8:56:55<22:19:31, 3.22it/s] {'loss': 3.4245, 'learning_rate': 7.276767437350512e-07, 'epoch': 4.85} + 30%|███ | 112540/371472 [8:56:55<22:19:31, 3.22it/s] 30%|███ | 112541/371472 [8:56:55<22:55:23, 3.14it/s] 30%|███ | 112542/371472 [8:56:55<22:26:24, 3.21it/s] 30%|███ | 112543/371472 [8:56:56<21:09:23, 3.40it/s] 30%|███ | 112544/371472 [8:56:56<22:11:01, 3.24it/s] 30%|███ | 112545/371472 [8:56:56<21:57:49, 3.27it/s] 30%|███ | 112546/371472 [8:56:57<21:56:46, 3.28it/s] 30%|███ | 112547/371472 [8:56:57<21:49:29, 3.30it/s] 30%|███ | 112548/371472 [8:56:57<21:11:59, 3.39it/s] 30%|███ | 112549/371472 [8:56:58<21:37:04, 3.33it/s] 30%|███ | 112550/371472 [8:56:58<20:39:25, 3.48it/s] 30%|███ | 112551/371472 [8:56:58<21:39:18, 3.32it/s] 30%|███ | 112552/371472 [8:56:58<21:06:38, 3.41it/s] 30%|███ | 112553/371472 [8:56:59<20:26:05, 3.52it/s] 30%|███ | 112554/371472 [8:56:59<19:33:01, 3.68it/s] 30%|███ | 112555/371472 [8:56:59<20:36:24, 3.49it/s] 30%|███ | 112556/371472 [8:56:59<20:23:42, 3.53it/s] 30%|███ | 112557/371472 [8:57:00<20:03:20, 3.59it/s] 30%|███ | 112558/371472 [8:57:00<19:06:46, 3.76it/s] 30%|███ | 112559/371472 [8:57:00<19:10:54, 3.75it/s] 30%|███ | 112560/371472 [8:57:01<19:04:19, 3.77it/s] {'loss': 3.3284, 'learning_rate': 7.276282617595724e-07, 'epoch': 4.85} + 30%|███ | 112560/371472 [8:57:01<19:04:19, 3.77it/s] 30%|███ | 112561/371472 [8:57:01<18:38:09, 3.86it/s] 30%|███ | 112562/371472 [8:57:01<19:26:00, 3.70it/s] 30%|███ | 112563/371472 [8:57:01<19:05:59, 3.77it/s] 30%|███ | 112564/371472 [8:57:02<18:57:52, 3.79it/s] 30%|███ | 112565/371472 [8:57:02<19:38:09, 3.66it/s] 30%|███ | 112566/371472 [8:57:02<19:35:50, 3.67it/s] 30%|███ | 112567/371472 [8:57:02<19:27:09, 3.70it/s] 30%|███ | 112568/371472 [8:57:03<19:22:36, 3.71it/s] 30%|███ | 112569/371472 [8:57:03<20:21:23, 3.53it/s] 30%|███ | 112570/371472 [8:57:03<19:47:35, 3.63it/s] 30%|███ | 112571/371472 [8:57:04<19:33:22, 3.68it/s] 30%|███ | 112572/371472 [8:57:04<19:39:03, 3.66it/s] 30%|███ | 112573/371472 [8:57:04<19:44:33, 3.64it/s] 30%|███ | 112574/371472 [8:57:04<19:43:02, 3.65it/s] 30%|███ | 112575/371472 [8:57:05<19:49:50, 3.63it/s] 30%|███ | 112576/371472 [8:57:05<19:37:26, 3.66it/s] 30%|███ | 112577/371472 [8:57:05<19:45:56, 3.64it/s] 30%|███ | 112578/371472 [8:57:05<19:27:31, 3.70it/s] 30%|███ | 112579/371472 [8:57:06<20:22:18, 3.53it/s] 30%|███ | 112580/371472 [8:57:06<20:35:57, 3.49it/s] {'loss': 3.291, 'learning_rate': 7.275797797840936e-07, 'epoch': 4.85} + 30%|███ | 112580/371472 [8:57:06<20:35:57, 3.49it/s] 30%|███ | 112581/371472 [8:57:06<21:03:46, 3.41it/s] 30%|███ | 112582/371472 [8:57:07<21:29:07, 3.35it/s] 30%|███ | 112583/371472 [8:57:07<21:11:41, 3.39it/s] 30%|███ | 112584/371472 [8:57:07<20:21:42, 3.53it/s] 30%|███ | 112585/371472 [8:57:07<20:42:10, 3.47it/s] 30%|███ | 112586/371472 [8:57:08<20:30:29, 3.51it/s] 30%|███ | 112587/371472 [8:57:08<20:28:33, 3.51it/s] 30%|███ | 112588/371472 [8:57:08<20:13:46, 3.55it/s] 30%|███ | 112589/371472 [8:57:09<19:55:10, 3.61it/s] 30%|███ | 112590/371472 [8:57:09<20:26:46, 3.52it/s] 30%|███ | 112591/371472 [8:57:09<19:44:12, 3.64it/s] 30%|███ | 112592/371472 [8:57:09<19:49:33, 3.63it/s] 30%|███ | 112593/371472 [8:57:10<19:21:01, 3.72it/s] 30%|███ | 112594/371472 [8:57:10<19:39:27, 3.66it/s] 30%|███ | 112595/371472 [8:57:10<19:57:34, 3.60it/s] 30%|███ | 112596/371472 [8:57:11<20:01:31, 3.59it/s] 30%|███ | 112597/371472 [8:57:11<20:24:44, 3.52it/s] 30%|███ | 112598/371472 [8:57:11<20:23:07, 3.53it/s] 30%|███ | 112599/371472 [8:57:11<22:46:16, 3.16it/s] 30%|███ | 112600/371472 [8:57:12<21:49:15, 3.30it/s] {'loss': 3.4461, 'learning_rate': 7.275312978086148e-07, 'epoch': 4.85} + 30%|███ | 112600/371472 [8:57:12<21:49:15, 3.30it/s] 30%|███ | 112601/371472 [8:57:12<21:55:38, 3.28it/s] 30%|███ | 112602/371472 [8:57:12<23:02:20, 3.12it/s] 30%|███ | 112603/371472 [8:57:13<22:36:16, 3.18it/s] 30%|███ | 112604/371472 [8:57:13<21:28:33, 3.35it/s] 30%|███ | 112605/371472 [8:57:13<21:17:55, 3.38it/s] 30%|███ | 112606/371472 [8:57:14<20:20:40, 3.53it/s] 30%|███ | 112607/371472 [8:57:14<20:11:07, 3.56it/s] 30%|███ | 112608/371472 [8:57:14<19:59:13, 3.60it/s] 30%|███ | 112609/371472 [8:57:14<20:02:43, 3.59it/s] 30%|███ | 112610/371472 [8:57:15<19:34:27, 3.67it/s] 30%|███ | 112611/371472 [8:57:15<20:19:58, 3.54it/s] 30%|███ | 112612/371472 [8:57:15<20:13:19, 3.56it/s] 30%|███ | 112613/371472 [8:57:15<19:57:25, 3.60it/s] 30%|███ | 112614/371472 [8:57:16<21:00:20, 3.42it/s] 30%|███ | 112615/371472 [8:57:16<21:28:41, 3.35it/s] 30%|███ | 112616/371472 [8:57:16<20:40:36, 3.48it/s] 30%|███ | 112617/371472 [8:57:17<19:49:39, 3.63it/s] 30%|███ | 112618/371472 [8:57:17<19:41:39, 3.65it/s] 30%|███ | 112619/371472 [8:57:17<18:58:14, 3.79it/s] 30%|███ | 112620/371472 [8:57:17<18:47:29, 3.83it/s] {'loss': 3.4262, 'learning_rate': 7.274828158331357e-07, 'epoch': 4.85} + 30%|███ | 112620/371472 [8:57:17<18:47:29, 3.83it/s] 30%|███ | 112621/371472 [8:57:18<20:17:28, 3.54it/s] 30%|███ | 112622/371472 [8:57:18<20:17:18, 3.54it/s] 30%|███ | 112623/371472 [8:57:18<21:44:04, 3.31it/s] 30%|███ | 112624/371472 [8:57:19<21:04:06, 3.41it/s] 30%|███ | 112625/371472 [8:57:19<21:31:41, 3.34it/s] 30%|███ | 112626/371472 [8:57:19<20:29:38, 3.51it/s] 30%|███ | 112627/371472 [8:57:19<20:14:15, 3.55it/s] 30%|███ | 112628/371472 [8:57:20<19:20:27, 3.72it/s] 30%|███ | 112629/371472 [8:57:20<19:22:07, 3.71it/s] 30%|███ | 112630/371472 [8:57:20<21:07:03, 3.40it/s] 30%|███ | 112631/371472 [8:57:21<21:51:59, 3.29it/s] 30%|███ | 112632/371472 [8:57:21<20:58:35, 3.43it/s] 30%|███ | 112633/371472 [8:57:21<22:07:56, 3.25it/s] 30%|███ | 112634/371472 [8:57:22<21:02:09, 3.42it/s] 30%|███ | 112635/371472 [8:57:22<20:53:24, 3.44it/s] 30%|███ | 112636/371472 [8:57:22<21:39:11, 3.32it/s] 30%|███ | 112637/371472 [8:57:22<20:48:33, 3.46it/s] 30%|███ | 112638/371472 [8:57:23<20:56:49, 3.43it/s] 30%|███ | 112639/371472 [8:57:23<20:11:26, 3.56it/s] 30%|███ | 112640/371472 [8:57:23<20:23:17, 3.53it/s] {'loss': 3.3892, 'learning_rate': 7.274343338576568e-07, 'epoch': 4.85} + 30%|███ | 112640/371472 [8:57:23<20:23:17, 3.53it/s] 30%|███ | 112641/371472 [8:57:24<20:35:38, 3.49it/s] 30%|███ | 112642/371472 [8:57:24<20:19:13, 3.54it/s] 30%|███ | 112643/371472 [8:57:24<20:09:31, 3.57it/s] 30%|███ | 112644/371472 [8:57:24<20:33:02, 3.50it/s] 30%|███ | 112645/371472 [8:57:25<20:33:34, 3.50it/s] 30%|███ | 112646/371472 [8:57:25<22:37:35, 3.18it/s] 30%|███ | 112647/371472 [8:57:25<21:22:41, 3.36it/s] 30%|███ | 112648/371472 [8:57:26<20:46:12, 3.46it/s] 30%|███ | 112649/371472 [8:57:26<20:27:04, 3.52it/s] 30%|███ | 112650/371472 [8:57:26<20:22:37, 3.53it/s] 30%|███ | 112651/371472 [8:57:26<19:59:36, 3.60it/s] 30%|███ | 112652/371472 [8:57:27<21:15:14, 3.38it/s] 30%|███ | 112653/371472 [8:57:27<21:45:51, 3.30it/s] 30%|███ | 112654/371472 [8:57:27<21:04:25, 3.41it/s] 30%|███ | 112655/371472 [8:57:28<20:25:21, 3.52it/s] 30%|███ | 112656/371472 [8:57:28<19:55:27, 3.61it/s] 30%|███ | 112657/371472 [8:57:28<19:56:19, 3.61it/s] 30%|███ | 112658/371472 [8:57:28<20:32:52, 3.50it/s] 30%|███ | 112659/371472 [8:57:29<20:50:41, 3.45it/s] 30%|███ | 112660/371472 [8:57:29<20:17:50, 3.54it/s] {'loss': 3.3796, 'learning_rate': 7.27385851882178e-07, 'epoch': 4.85} + 30%|███ | 112660/371472 [8:57:29<20:17:50, 3.54it/s] 30%|███ | 112661/371472 [8:57:29<19:51:03, 3.62it/s] 30%|███ | 112662/371472 [8:57:30<21:40:50, 3.32it/s] 30%|███ | 112663/371472 [8:57:30<20:51:01, 3.45it/s] 30%|███ | 112664/371472 [8:57:30<20:37:35, 3.49it/s] 30%|███ | 112665/371472 [8:57:30<20:15:58, 3.55it/s] 30%|███ | 112666/371472 [8:57:31<20:28:39, 3.51it/s] 30%|███ | 112667/371472 [8:57:31<21:25:56, 3.35it/s] 30%|███ | 112668/371472 [8:57:31<20:20:45, 3.53it/s] 30%|███ | 112669/371472 [8:57:32<21:13:32, 3.39it/s] 30%|███ | 112670/371472 [8:57:32<20:22:42, 3.53it/s] 30%|███ | 112671/371472 [8:57:32<21:22:44, 3.36it/s] 30%|███ | 112672/371472 [8:57:32<21:06:30, 3.41it/s] 30%|███ | 112673/371472 [8:57:33<22:04:46, 3.26it/s] 30%|███ | 112674/371472 [8:57:33<21:33:11, 3.34it/s] 30%|███ | 112675/371472 [8:57:33<21:07:00, 3.40it/s] 30%|███ | 112676/371472 [8:57:34<20:58:22, 3.43it/s] 30%|███ | 112677/371472 [8:57:34<20:47:45, 3.46it/s] 30%|███ | 112678/371472 [8:57:34<20:47:09, 3.46it/s] 30%|███ | 112679/371472 [8:57:35<20:37:53, 3.48it/s] 30%|███ | 112680/371472 [8:57:35<20:05:32, 3.58it/s] {'loss': 3.3873, 'learning_rate': 7.273373699066992e-07, 'epoch': 4.85} + 30%|███ | 112680/371472 [8:57:35<20:05:32, 3.58it/s] 30%|███ | 112681/371472 [8:57:35<19:56:06, 3.61it/s] 30%|███ | 112682/371472 [8:57:35<21:12:26, 3.39it/s] 30%|███ | 112683/371472 [8:57:36<21:20:26, 3.37it/s] 30%|███ | 112684/371472 [8:57:36<20:50:49, 3.45it/s] 30%|███ | 112685/371472 [8:57:36<21:32:42, 3.34it/s] 30%|███ | 112686/371472 [8:57:37<21:13:53, 3.39it/s] 30%|███ | 112687/371472 [8:57:37<21:02:17, 3.42it/s] 30%|███ | 112688/371472 [8:57:37<21:11:31, 3.39it/s] 30%|███ | 112689/371472 [8:57:37<21:08:16, 3.40it/s] 30%|███ | 112690/371472 [8:57:38<20:39:19, 3.48it/s] 30%|███ | 112691/371472 [8:57:38<21:06:12, 3.41it/s] 30%|███ | 112692/371472 [8:57:38<21:24:59, 3.36it/s] 30%|███ | 112693/371472 [8:57:39<20:32:34, 3.50it/s] 30%|███ | 112694/371472 [8:57:39<20:35:18, 3.49it/s] 30%|███ | 112695/371472 [8:57:39<21:02:47, 3.42it/s] 30%|███ | 112696/371472 [8:57:39<21:11:01, 3.39it/s] 30%|███ | 112697/371472 [8:57:40<20:41:42, 3.47it/s] 30%|███ | 112698/371472 [8:57:40<20:10:35, 3.56it/s] 30%|███ | 112699/371472 [8:57:40<20:03:22, 3.58it/s] 30%|███ | 112700/371472 [8:57:41<20:05:48, 3.58it/s] {'loss': 3.3284, 'learning_rate': 7.272888879312202e-07, 'epoch': 4.85} + 30%|███ | 112700/371472 [8:57:41<20:05:48, 3.58it/s] 30%|███ | 112701/371472 [8:57:41<19:30:51, 3.68it/s] 30%|███ | 112702/371472 [8:57:41<20:39:30, 3.48it/s] 30%|███ | 112703/371472 [8:57:41<20:01:01, 3.59it/s] 30%|███ | 112704/371472 [8:57:42<22:23:20, 3.21it/s] 30%|███ | 112705/371472 [8:57:42<20:58:14, 3.43it/s] 30%|███ | 112706/371472 [8:57:42<21:29:40, 3.34it/s] 30%|███ | 112707/371472 [8:57:43<23:05:34, 3.11it/s] 30%|███ | 112708/371472 [8:57:43<22:41:16, 3.17it/s] 30%|███ | 112709/371472 [8:57:43<22:22:52, 3.21it/s] 30%|███ | 112710/371472 [8:57:44<21:39:27, 3.32it/s] 30%|███ | 112711/371472 [8:57:44<20:49:49, 3.45it/s] 30%|███ | 112712/371472 [8:57:44<19:44:43, 3.64it/s] 30%|███ | 112713/371472 [8:57:44<19:04:43, 3.77it/s] 30%|███ | 112714/371472 [8:57:45<19:19:44, 3.72it/s] 30%|███ | 112715/371472 [8:57:45<18:55:53, 3.80it/s] 30%|███ | 112716/371472 [8:57:45<19:05:24, 3.77it/s] 30%|███ | 112717/371472 [8:57:45<19:41:35, 3.65it/s] 30%|███ | 112718/371472 [8:57:46<19:22:34, 3.71it/s] 30%|███ | 112719/371472 [8:57:46<19:11:08, 3.75it/s] 30%|███ | 112720/371472 [8:57:46<19:33:29, 3.67it/s] {'loss': 3.3175, 'learning_rate': 7.272404059557413e-07, 'epoch': 4.86} + 30%|███ | 112720/371472 [8:57:46<19:33:29, 3.67it/s] 30%|███ | 112721/371472 [8:57:47<19:51:34, 3.62it/s] 30%|███ | 112722/371472 [8:57:47<21:20:19, 3.37it/s] 30%|███ | 112723/371472 [8:57:47<22:17:20, 3.22it/s] 30%|███ | 112724/371472 [8:57:48<22:36:10, 3.18it/s] 30%|███ | 112725/371472 [8:57:48<21:25:12, 3.36it/s] 30%|███ | 112726/371472 [8:57:48<22:46:09, 3.16it/s] 30%|███ | 112727/371472 [8:57:48<21:07:59, 3.40it/s] 30%|███ | 112728/371472 [8:57:49<20:00:04, 3.59it/s] 30%|███ | 112729/371472 [8:57:49<19:23:28, 3.71it/s] 30%|███ | 112730/371472 [8:57:49<21:09:13, 3.40it/s] 30%|███ | 112731/371472 [8:57:50<20:12:31, 3.56it/s] 30%|███ | 112732/371472 [8:57:50<19:52:02, 3.62it/s] 30%|███ | 112733/371472 [8:57:50<20:13:47, 3.55it/s] 30%|███ | 112734/371472 [8:57:50<20:05:50, 3.58it/s] 30%|███ | 112735/371472 [8:57:51<21:18:04, 3.37it/s] 30%|███ | 112736/371472 [8:57:51<20:21:43, 3.53it/s] 30%|███ | 112737/371472 [8:57:51<19:49:00, 3.63it/s] 30%|███ | 112738/371472 [8:57:51<20:04:23, 3.58it/s] 30%|███ | 112739/371472 [8:57:52<19:51:08, 3.62it/s] 30%|███ | 112740/371472 [8:57:52<19:40:32, 3.65it/s] {'loss': 3.4371, 'learning_rate': 7.271919239802624e-07, 'epoch': 4.86} + 30%|███ | 112740/371472 [8:57:52<19:40:32, 3.65it/s] 30%|███ | 112741/371472 [8:57:52<20:10:02, 3.56it/s] 30%|███ | 112742/371472 [8:57:53<20:05:52, 3.58it/s] 30%|███ | 112743/371472 [8:57:53<19:23:33, 3.71it/s] 30%|███ | 112744/371472 [8:57:53<19:11:41, 3.74it/s] 30%|███ | 112745/371472 [8:57:53<20:57:02, 3.43it/s] 30%|███ | 112746/371472 [8:57:54<20:03:47, 3.58it/s] 30%|███ | 112747/371472 [8:57:54<21:12:41, 3.39it/s] 30%|███ | 112748/371472 [8:57:54<20:33:53, 3.49it/s] 30%|███ | 112749/371472 [8:57:55<20:21:40, 3.53it/s] 30%|███ | 112750/371472 [8:57:55<20:32:32, 3.50it/s] 30%|███ | 112751/371472 [8:57:55<21:01:30, 3.42it/s] 30%|███ | 112752/371472 [8:57:56<21:41:47, 3.31it/s] 30%|███ | 112753/371472 [8:57:56<20:55:37, 3.43it/s] 30%|███ | 112754/371472 [8:57:56<22:21:08, 3.22it/s] 30%|███ | 112755/371472 [8:57:56<21:51:10, 3.29it/s] 30%|███ | 112756/371472 [8:57:57<20:56:18, 3.43it/s] 30%|███ | 112757/371472 [8:57:57<20:45:33, 3.46it/s] 30%|███ | 112758/371472 [8:57:57<20:28:54, 3.51it/s] 30%|███ | 112759/371472 [8:57:58<21:18:15, 3.37it/s] 30%|███ | 112760/371472 [8:57:58<20:41:08, 3.47it/s] {'loss': 3.2728, 'learning_rate': 7.271434420047835e-07, 'epoch': 4.86} + 30%|███ | 112760/371472 [8:57:58<20:41:08, 3.47it/s] 30%|███ | 112761/371472 [8:57:58<20:45:52, 3.46it/s] 30%|███ | 112762/371472 [8:57:58<20:47:51, 3.46it/s] 30%|███ | 112763/371472 [8:57:59<20:45:54, 3.46it/s] 30%|███ | 112764/371472 [8:57:59<20:27:02, 3.51it/s] 30%|███ | 112765/371472 [8:57:59<21:39:27, 3.32it/s] 30%|███ | 112766/371472 [8:58:00<22:28:29, 3.20it/s] 30%|███ | 112767/371472 [8:58:00<21:40:42, 3.31it/s] 30%|███ | 112768/371472 [8:58:00<21:12:57, 3.39it/s] 30%|███ | 112769/371472 [8:58:00<21:04:59, 3.41it/s] 30%|███ | 112770/371472 [8:58:01<20:04:21, 3.58it/s] 30%|███ | 112771/371472 [8:58:01<20:11:32, 3.56it/s] 30%|███ | 112772/371472 [8:58:01<21:51:51, 3.29it/s] 30%|███ | 112773/371472 [8:58:02<21:33:34, 3.33it/s] 30%|███ | 112774/371472 [8:58:02<22:19:11, 3.22it/s] 30%|███ | 112775/371472 [8:58:02<21:35:08, 3.33it/s] 30%|███ | 112776/371472 [8:58:03<22:08:40, 3.25it/s] 30%|███ | 112777/371472 [8:58:03<22:23:40, 3.21it/s] 30%|███ | 112778/371472 [8:58:03<23:20:57, 3.08it/s] 30%|███ | 112779/371472 [8:58:04<23:12:23, 3.10it/s] 30%|███ | 112780/371472 [8:58:04<21:48:19, 3.30it/s] {'loss': 3.3408, 'learning_rate': 7.270949600293046e-07, 'epoch': 4.86} + 30%|███ | 112780/371472 [8:58:04<21:48:19, 3.30it/s] 30%|███ | 112781/371472 [8:58:04<20:38:40, 3.48it/s] 30%|███ | 112782/371472 [8:58:04<21:08:08, 3.40it/s] 30%|███ | 112783/371472 [8:58:05<20:17:59, 3.54it/s] 30%|███ | 112784/371472 [8:58:05<20:58:00, 3.43it/s] 30%|███ | 112785/371472 [8:58:05<20:38:28, 3.48it/s] 30%|███ | 112786/371472 [8:58:06<20:37:46, 3.48it/s] 30%|███ | 112787/371472 [8:58:06<20:43:05, 3.47it/s] 30%|███ | 112788/371472 [8:58:06<20:53:03, 3.44it/s] 30%|███ | 112789/371472 [8:58:06<20:16:48, 3.54it/s] 30%|███ | 112790/371472 [8:58:07<19:19:34, 3.72it/s] 30%|███ | 112791/371472 [8:58:07<19:48:03, 3.63it/s] 30%|███ | 112792/371472 [8:58:07<19:45:08, 3.64it/s] 30%|███ | 112793/371472 [8:58:08<20:56:54, 3.43it/s] 30%|███ | 112794/371472 [8:58:08<20:16:49, 3.54it/s] 30%|███ | 112795/371472 [8:58:08<19:52:33, 3.62it/s] 30%|███ | 112796/371472 [8:58:08<20:27:59, 3.51it/s] 30%|███ | 112797/371472 [8:58:09<20:01:27, 3.59it/s] 30%|███ | 112798/371472 [8:58:09<24:10:05, 2.97it/s] 30%|███ | 112799/371472 [8:58:09<23:41:40, 3.03it/s] 30%|███ | 112800/371472 [8:58:10<21:59:50, 3.27it/s] {'loss': 3.4124, 'learning_rate': 7.270464780538257e-07, 'epoch': 4.86} + 30%|███ | 112800/371472 [8:58:10<21:59:50, 3.27it/s] 30%|███ | 112801/371472 [8:58:10<21:38:29, 3.32it/s] 30%|███ | 112802/371472 [8:58:10<21:31:36, 3.34it/s] 30%|███ | 112803/371472 [8:58:11<22:39:11, 3.17it/s] 30%|███ | 112804/371472 [8:58:11<21:56:16, 3.28it/s] 30%|███ | 112805/371472 [8:58:11<21:19:16, 3.37it/s] 30%|███ | 112806/371472 [8:58:11<20:40:22, 3.48it/s] 30%|███ | 112807/371472 [8:58:12<21:45:30, 3.30it/s] 30%|███ | 112808/371472 [8:58:12<21:15:58, 3.38it/s] 30%|███ | 112809/371472 [8:58:12<21:01:12, 3.42it/s] 30%|███ | 112810/371472 [8:58:13<20:26:33, 3.51it/s] 30%|███ | 112811/371472 [8:58:13<20:52:58, 3.44it/s] 30%|███ | 112812/371472 [8:58:13<21:22:05, 3.36it/s] 30%|███ | 112813/371472 [8:58:13<20:34:33, 3.49it/s] 30%|███ | 112814/371472 [8:58:14<20:55:21, 3.43it/s] 30%|███ | 112815/371472 [8:58:14<20:24:23, 3.52it/s] 30%|███ | 112816/371472 [8:58:14<20:39:45, 3.48it/s] 30%|███ | 112817/371472 [8:58:15<20:07:56, 3.57it/s] 30%|███ | 112818/371472 [8:58:15<22:31:52, 3.19it/s] 30%|███ | 112819/371472 [8:58:15<22:25:01, 3.21it/s] 30%|███ | 112820/371472 [8:58:16<23:16:47, 3.09it/s] {'loss': 3.3237, 'learning_rate': 7.269979960783469e-07, 'epoch': 4.86} + 30%|███ | 112820/371472 [8:58:16<23:16:47, 3.09it/s] 30%|███ | 112821/371472 [8:58:16<21:46:52, 3.30it/s] 30%|███ | 112822/371472 [8:58:16<21:03:28, 3.41it/s] 30%|███ | 112823/371472 [8:58:16<20:18:04, 3.54it/s] 30%|███ | 112824/371472 [8:58:17<19:50:25, 3.62it/s] 30%|███ | 112825/371472 [8:58:17<19:44:11, 3.64it/s] 30%|███ | 112826/371472 [8:58:17<20:11:19, 3.56it/s] 30%|███ | 112827/371472 [8:58:18<19:55:01, 3.61it/s] 30%|███ | 112828/371472 [8:58:18<19:56:41, 3.60it/s] 30%|███ | 112829/371472 [8:58:18<19:12:50, 3.74it/s] 30%|███ | 112830/371472 [8:58:18<19:54:17, 3.61it/s] 30%|███ | 112831/371472 [8:58:19<19:58:39, 3.60it/s] 30%|███ | 112832/371472 [8:58:19<19:16:47, 3.73it/s] 30%|███ | 112833/371472 [8:58:19<18:48:42, 3.82it/s] 30%|███ | 112834/371472 [8:58:19<19:14:14, 3.73it/s] 30%|███ | 112835/371472 [8:58:20<18:47:04, 3.82it/s] 30%|███ | 112836/371472 [8:58:20<20:15:45, 3.55it/s] 30%|███ | 112837/371472 [8:58:20<23:03:29, 3.12it/s] 30%|███ | 112838/371472 [8:58:21<21:36:46, 3.32it/s] 30%|███ | 112839/371472 [8:58:21<21:06:42, 3.40it/s] 30%|███ | 112840/371472 [8:58:21<20:49:07, 3.45it/s] {'loss': 3.2809, 'learning_rate': 7.26949514102868e-07, 'epoch': 4.86} + 30%|███ | 112840/371472 [8:58:21<20:49:07, 3.45it/s] 30%|███ | 112841/371472 [8:58:21<20:26:04, 3.52it/s] 30%|███ | 112842/371472 [8:58:22<19:43:07, 3.64it/s] 30%|███ | 112843/371472 [8:58:22<19:30:24, 3.68it/s] 30%|███ | 112844/371472 [8:58:22<19:45:31, 3.64it/s] 30%|███ | 112845/371472 [8:58:23<19:35:47, 3.67it/s] 30%|███ | 112846/371472 [8:58:23<19:29:21, 3.69it/s] 30%|███ | 112847/371472 [8:58:23<20:15:24, 3.55it/s] 30%|███ | 112848/371472 [8:58:23<20:08:23, 3.57it/s] 30%|███ | 112849/371472 [8:58:24<21:09:30, 3.40it/s] 30%|███ | 112850/371472 [8:58:24<19:50:45, 3.62it/s] 30%|███ | 112851/371472 [8:58:24<20:03:10, 3.58it/s] 30%|███ | 112852/371472 [8:58:25<19:40:47, 3.65it/s] 30%|███ | 112853/371472 [8:58:25<20:02:59, 3.58it/s] 30%|███ | 112854/371472 [8:58:25<20:53:10, 3.44it/s] 30%|███ | 112855/371472 [8:58:25<20:48:20, 3.45it/s] 30%|███ | 112856/371472 [8:58:26<22:04:16, 3.25it/s] 30%|███ | 112857/371472 [8:58:26<21:00:34, 3.42it/s] 30%|███ | 112858/371472 [8:58:26<20:22:59, 3.52it/s] 30%|███ | 112859/371472 [8:58:27<21:18:20, 3.37it/s] 30%|███ | 112860/371472 [8:58:27<21:24:08, 3.36it/s] {'loss': 3.1672, 'learning_rate': 7.26901032127389e-07, 'epoch': 4.86} + 30%|███ | 112860/371472 [8:58:27<21:24:08, 3.36it/s] 30%|███ | 112861/371472 [8:58:27<21:24:47, 3.35it/s] 30%|███ | 112862/371472 [8:58:27<20:34:47, 3.49it/s] 30%|███ | 112863/371472 [8:58:28<20:12:37, 3.55it/s] 30%|███ | 112864/371472 [8:58:28<19:40:53, 3.65it/s] 30%|███ | 112865/371472 [8:58:28<20:25:45, 3.52it/s] 30%|███ | 112866/371472 [8:58:29<21:20:30, 3.37it/s] 30%|███ | 112867/371472 [8:58:29<21:39:23, 3.32it/s] 30%|███ | 112868/371472 [8:58:29<20:30:49, 3.50it/s] 30%|███ | 112869/371472 [8:58:30<21:07:01, 3.40it/s] 30%|███ | 112870/371472 [8:58:30<21:35:15, 3.33it/s] 30%|███ | 112871/371472 [8:58:30<20:45:51, 3.46it/s] 30%|███ | 112872/371472 [8:58:30<20:21:19, 3.53it/s] 30%|███ | 112873/371472 [8:58:31<19:32:08, 3.68it/s] 30%|███ | 112874/371472 [8:58:31<19:23:27, 3.70it/s] 30%|███ | 112875/371472 [8:58:31<18:46:19, 3.83it/s] 30%|███ | 112876/371472 [8:58:31<19:54:49, 3.61it/s] 30%|███ | 112877/371472 [8:58:32<22:46:03, 3.16it/s] 30%|███ | 112878/371472 [8:58:32<24:23:21, 2.95it/s] 30%|███ | 112879/371472 [8:58:33<23:39:00, 3.04it/s] 30%|███ | 112880/371472 [8:58:33<24:50:37, 2.89it/s] {'loss': 3.1231, 'learning_rate': 7.268525501519101e-07, 'epoch': 4.86} + 30%|███ | 112880/371472 [8:58:33<24:50:37, 2.89it/s] 30%|███ | 112881/371472 [8:58:33<23:17:21, 3.08it/s] 30%|███ | 112882/371472 [8:58:33<23:02:29, 3.12it/s] 30%|███ | 112883/371472 [8:58:34<22:52:31, 3.14it/s] 30%|███ | 112884/371472 [8:58:34<21:50:10, 3.29it/s] 30%|███ | 112885/371472 [8:58:34<22:11:35, 3.24it/s] 30%|███ | 112886/371472 [8:58:35<21:17:30, 3.37it/s] 30%|███ | 112887/371472 [8:58:35<21:31:58, 3.34it/s] 30%|███ | 112888/371472 [8:58:35<22:16:17, 3.23it/s] 30%|███ | 112889/371472 [8:58:36<21:43:00, 3.31it/s] 30%|███ | 112890/371472 [8:58:36<21:31:10, 3.34it/s] 30%|███ | 112891/371472 [8:58:36<21:28:02, 3.35it/s] 30%|███ | 112892/371472 [8:58:36<21:03:58, 3.41it/s] 30%|███ | 112893/371472 [8:58:37<20:18:32, 3.54it/s] 30%|███ | 112894/371472 [8:58:37<19:59:09, 3.59it/s] 30%|███ | 112895/371472 [8:58:37<19:43:45, 3.64it/s] 30%|███ | 112896/371472 [8:58:38<19:38:28, 3.66it/s] 30%|███ | 112897/371472 [8:58:38<19:27:17, 3.69it/s] 30%|███ | 112898/371472 [8:58:38<19:11:56, 3.74it/s] 30%|███ | 112899/371472 [8:58:38<19:11:03, 3.74it/s] 30%|███ | 112900/371472 [8:58:39<19:26:04, 3.70it/s] {'loss': 3.291, 'learning_rate': 7.268040681764313e-07, 'epoch': 4.86} + 30%|███ | 112900/371472 [8:58:39<19:26:04, 3.70it/s] 30%|███ | 112901/371472 [8:58:39<20:04:29, 3.58it/s] 30%|███ | 112902/371472 [8:58:39<19:41:59, 3.65it/s] 30%|███ | 112903/371472 [8:58:39<19:20:28, 3.71it/s] 30%|███ | 112904/371472 [8:58:40<18:51:19, 3.81it/s] 30%|███ | 112905/371472 [8:58:40<18:47:29, 3.82it/s] 30%|███ | 112906/371472 [8:58:40<20:01:06, 3.59it/s] 30%|███ | 112907/371472 [8:58:41<20:04:32, 3.58it/s] 30%|███ | 112908/371472 [8:58:41<19:34:28, 3.67it/s] 30%|███ | 112909/371472 [8:58:41<20:06:22, 3.57it/s] 30%|███ | 112910/371472 [8:58:41<20:05:44, 3.57it/s] 30%|███ | 112911/371472 [8:58:42<19:24:20, 3.70it/s] 30%|███ | 112912/371472 [8:58:42<19:43:03, 3.64it/s] 30%|███ | 112913/371472 [8:58:42<19:01:36, 3.77it/s] 30%|███ | 112914/371472 [8:58:42<19:05:12, 3.76it/s] 30%|███ | 112915/371472 [8:58:43<19:03:25, 3.77it/s] 30%|███ | 112916/371472 [8:58:43<21:04:10, 3.41it/s] 30%|███ | 112917/371472 [8:58:43<20:25:44, 3.52it/s] 30%|███ | 112918/371472 [8:58:44<19:33:28, 3.67it/s] 30%|███ | 112919/371472 [8:58:44<19:16:27, 3.73it/s] 30%|███ | 112920/371472 [8:58:44<19:23:21, 3.70it/s] {'loss': 3.5074, 'learning_rate': 7.267555862009524e-07, 'epoch': 4.86} + 30%|███ | 112920/371472 [8:58:44<19:23:21, 3.70it/s] 30%|███ | 112921/371472 [8:58:44<19:39:43, 3.65it/s] 30%|███ | 112922/371472 [8:58:45<20:12:25, 3.55it/s] 30%|███ | 112923/371472 [8:58:45<20:33:52, 3.49it/s] 30%|███ | 112924/371472 [8:58:45<22:01:24, 3.26it/s] 30%|███ | 112925/371472 [8:58:46<21:26:41, 3.35it/s] 30%|███ | 112926/371472 [8:58:46<21:50:48, 3.29it/s] 30%|███ | 112927/371472 [8:58:46<21:15:40, 3.38it/s] 30%|███ | 112928/371472 [8:58:46<21:31:37, 3.34it/s] 30%|███ | 112929/371472 [8:58:47<21:13:24, 3.38it/s] 30%|███ | 112930/371472 [8:58:47<21:20:56, 3.36it/s] 30%|███ | 112931/371472 [8:58:47<21:34:45, 3.33it/s] 30%|███ | 112932/371472 [8:58:48<20:48:15, 3.45it/s] 30%|███ | 112933/371472 [8:58:48<20:19:45, 3.53it/s] 30%|███ | 112934/371472 [8:58:48<19:49:41, 3.62it/s] 30%|███ | 112935/371472 [8:58:49<21:44:52, 3.30it/s] 30%|███ | 112936/371472 [8:58:49<20:53:35, 3.44it/s] 30%|███ | 112937/371472 [8:58:49<19:58:38, 3.59it/s] 30%|███ | 112938/371472 [8:58:49<20:06:40, 3.57it/s] 30%|███ | 112939/371472 [8:58:50<20:27:59, 3.51it/s] 30%|███ | 112940/371472 [8:58:50<20:28:27, 3.51it/s] {'loss': 3.4051, 'learning_rate': 7.267071042254735e-07, 'epoch': 4.86} + 30%|███ | 112940/371472 [8:58:50<20:28:27, 3.51it/s] 30%|███ | 112941/371472 [8:58:50<19:55:52, 3.60it/s] 30%|███ | 112942/371472 [8:58:50<20:05:23, 3.57it/s] 30%|███ | 112943/371472 [8:58:51<19:58:04, 3.60it/s] 30%|███ | 112944/371472 [8:58:51<20:07:05, 3.57it/s] 30%|███ | 112945/371472 [8:58:51<19:40:01, 3.65it/s] 30%|███ | 112946/371472 [8:58:52<19:35:25, 3.67it/s] 30%|███ | 112947/371472 [8:58:52<19:27:53, 3.69it/s] 30%|███ | 112948/371472 [8:58:52<22:14:44, 3.23it/s] 30%|███ | 112949/371472 [8:58:53<22:22:24, 3.21it/s] 30%|███ | 112950/371472 [8:58:53<22:08:17, 3.24it/s] 30%|███ | 112951/371472 [8:58:53<21:29:22, 3.34it/s] 30%|███ | 112952/371472 [8:58:53<20:50:25, 3.45it/s] 30%|███ | 112953/371472 [8:58:54<20:09:34, 3.56it/s] 30%|███ | 112954/371472 [8:58:54<19:48:36, 3.62it/s] 30%|███ | 112955/371472 [8:58:54<19:36:52, 3.66it/s] 30%|███ | 112956/371472 [8:58:55<21:06:28, 3.40it/s] 30%|███ | 112957/371472 [8:58:55<22:54:23, 3.13it/s] 30%|███ | 112958/371472 [8:58:55<22:35:26, 3.18it/s] 30%|███ | 112959/371472 [8:58:55<21:34:34, 3.33it/s] 30%|███ | 112960/371472 [8:58:56<20:26:41, 3.51it/s] {'loss': 3.3751, 'learning_rate': 7.266586222499946e-07, 'epoch': 4.87} + 30%|███ | 112960/371472 [8:58:56<20:26:41, 3.51it/s] 30%|███ | 112961/371472 [8:58:56<19:49:51, 3.62it/s] 30%|███ | 112962/371472 [8:58:56<20:16:39, 3.54it/s] 30%|███ | 112963/371472 [8:58:57<21:05:28, 3.40it/s] 30%|███ | 112964/371472 [8:58:57<21:27:16, 3.35it/s] 30%|███ | 112965/371472 [8:58:57<21:00:21, 3.42it/s] 30%|███ | 112966/371472 [8:58:57<21:56:41, 3.27it/s] 30%|███ | 112967/371472 [8:58:58<20:46:04, 3.46it/s] 30%|███ | 112968/371472 [8:58:58<20:43:05, 3.47it/s] 30%|███ | 112969/371472 [8:58:58<21:50:36, 3.29it/s] 30%|███ | 112970/371472 [8:58:59<21:32:24, 3.33it/s] 30%|███ | 112971/371472 [8:58:59<21:04:01, 3.41it/s] 30%|███ | 112972/371472 [8:58:59<23:02:26, 3.12it/s] 30%|███ | 112973/371472 [8:59:00<24:19:26, 2.95it/s] 30%|███ | 112974/371472 [8:59:00<23:16:32, 3.08it/s] 30%|███ | 112975/371472 [8:59:00<21:31:44, 3.34it/s] 30%|███ | 112976/371472 [8:59:00<20:09:09, 3.56it/s] 30%|███ | 112977/371472 [8:59:01<19:54:24, 3.61it/s] 30%|███ | 112978/371472 [8:59:01<20:10:28, 3.56it/s] 30%|███ | 112979/371472 [8:59:01<19:55:32, 3.60it/s] 30%|███ | 112980/371472 [8:59:02<19:16:36, 3.72it/s] {'loss': 3.481, 'learning_rate': 7.266101402745158e-07, 'epoch': 4.87} + 30%|███ | 112980/371472 [8:59:02<19:16:36, 3.72it/s] 30%|███ | 112981/371472 [8:59:02<20:11:55, 3.55it/s] 30%|███ | 112982/371472 [8:59:02<19:35:50, 3.66it/s] 30%|███ | 112983/371472 [8:59:02<19:42:10, 3.64it/s] 30%|███ | 112984/371472 [8:59:03<19:40:35, 3.65it/s] 30%|███ | 112985/371472 [8:59:03<19:08:07, 3.75it/s] 30%|███ | 112986/371472 [8:59:03<18:57:53, 3.79it/s] 30%|███ | 112987/371472 [8:59:03<18:53:03, 3.80it/s] 30%|███ | 112988/371472 [8:59:04<19:05:24, 3.76it/s] 30%|███ | 112989/371472 [8:59:04<18:56:51, 3.79it/s] 30%|███ | 112990/371472 [8:59:04<19:48:03, 3.63it/s] 30%|███ | 112991/371472 [8:59:05<21:51:26, 3.28it/s] 30%|███ | 112992/371472 [8:59:05<20:56:42, 3.43it/s] 30%|███ | 112993/371472 [8:59:05<21:09:11, 3.39it/s] 30%|███ | 112994/371472 [8:59:05<20:09:39, 3.56it/s] 30%|███ | 112995/371472 [8:59:06<20:16:39, 3.54it/s] 30%|███ | 112996/371472 [8:59:06<20:05:43, 3.57it/s] 30%|███ | 112997/371472 [8:59:06<19:48:51, 3.62it/s] 30%|███ | 112998/371472 [8:59:07<19:36:53, 3.66it/s] 30%|███ | 112999/371472 [8:59:07<20:31:28, 3.50it/s] 30%|███ | 113000/371472 [8:59:07<20:49:35, 3.45it/s] {'loss': 3.502, 'learning_rate': 7.265616582990368e-07, 'epoch': 4.87} + 30%|███ | 113000/371472 [8:59:07<20:49:35, 3.45it/s] 30%|███ | 113001/371472 [8:59:07<20:32:23, 3.50it/s] 30%|███ | 113002/371472 [8:59:08<21:52:05, 3.28it/s] 30%|███ | 113003/371472 [8:59:08<20:50:40, 3.44it/s] 30%|███ | 113004/371472 [8:59:08<20:09:30, 3.56it/s] 30%|███ | 113005/371472 [8:59:09<20:51:05, 3.44it/s] 30%|███ | 113006/371472 [8:59:09<20:10:01, 3.56it/s] 30%|███ | 113007/371472 [8:59:09<21:13:29, 3.38it/s] 30%|███ | 113008/371472 [8:59:09<21:03:53, 3.41it/s] 30%|███ | 113009/371472 [8:59:10<20:54:49, 3.43it/s] 30%|███ | 113010/371472 [8:59:10<20:15:17, 3.54it/s] 30%|███ | 113011/371472 [8:59:10<21:01:36, 3.41it/s] 30%|███ | 113012/371472 [8:59:11<20:54:52, 3.43it/s] 30%|███ | 113013/371472 [8:59:11<20:42:06, 3.47it/s] 30%|███ | 113014/371472 [8:59:11<20:47:36, 3.45it/s] 30%|███ | 113015/371472 [8:59:11<20:24:11, 3.52it/s] 30%|███ | 113016/371472 [8:59:12<19:44:29, 3.64it/s] 30%|███ | 113017/371472 [8:59:12<19:09:25, 3.75it/s] 30%|███ | 113018/371472 [8:59:12<19:04:52, 3.76it/s] 30%|███ | 113019/371472 [8:59:13<19:00:26, 3.78it/s] 30%|███ | 113020/371472 [8:59:13<18:54:23, 3.80it/s] {'loss': 3.4113, 'learning_rate': 7.265131763235579e-07, 'epoch': 4.87} + 30%|███ | 113020/371472 [8:59:13<18:54:23, 3.80it/s] 30%|███ | 113021/371472 [8:59:13<20:06:54, 3.57it/s] 30%|███ | 113022/371472 [8:59:13<19:28:14, 3.69it/s] 30%|███ | 113023/371472 [8:59:14<19:00:37, 3.78it/s] 30%|███ | 113024/371472 [8:59:14<18:54:31, 3.80it/s] 30%|███ | 113025/371472 [8:59:14<21:01:50, 3.41it/s] 30%|███ | 113026/371472 [8:59:15<21:35:37, 3.32it/s] 30%|███ | 113027/371472 [8:59:15<24:21:04, 2.95it/s] 30%|███ | 113028/371472 [8:59:15<22:41:22, 3.16it/s] 30%|███ | 113029/371472 [8:59:15<21:21:38, 3.36it/s] 30%|███ | 113030/371472 [8:59:16<21:44:56, 3.30it/s] 30%|███ | 113031/371472 [8:59:16<22:11:24, 3.24it/s] 30%|███ | 113032/371472 [8:59:16<21:57:31, 3.27it/s] 30%|███ | 113033/371472 [8:59:17<21:31:17, 3.34it/s] 30%|███ | 113034/371472 [8:59:17<22:01:17, 3.26it/s] 30%|███ | 113035/371472 [8:59:17<20:52:14, 3.44it/s] 30%|███ | 113036/371472 [8:59:18<20:14:31, 3.55it/s] 30%|███ | 113037/371472 [8:59:18<19:47:58, 3.63it/s] 30%|███ | 113038/371472 [8:59:18<19:41:50, 3.64it/s] 30%|███ | 113039/371472 [8:59:18<21:11:23, 3.39it/s] 30%|███ | 113040/371472 [8:59:19<19:56:51, 3.60it/s] {'loss': 3.4011, 'learning_rate': 7.26464694348079e-07, 'epoch': 4.87} + 30%|███ | 113040/371472 [8:59:19<19:56:51, 3.60it/s] 30%|███ | 113041/371472 [8:59:19<19:07:12, 3.75it/s] 30%|███ | 113042/371472 [8:59:19<18:43:39, 3.83it/s] 30%|███ | 113043/371472 [8:59:19<19:34:52, 3.67it/s] 30%|███ | 113044/371472 [8:59:20<20:02:33, 3.58it/s] 30%|███ | 113045/371472 [8:59:20<19:59:19, 3.59it/s] 30%|███ | 113046/371472 [8:59:20<19:19:20, 3.72it/s] 30%|███ | 113047/371472 [8:59:21<19:13:07, 3.74it/s] 30%|███ | 113048/371472 [8:59:21<19:14:47, 3.73it/s] 30%|███ | 113049/371472 [8:59:21<19:28:10, 3.69it/s] 30%|███ | 113050/371472 [8:59:21<19:58:22, 3.59it/s] 30%|███ | 113051/371472 [8:59:22<20:57:34, 3.42it/s] 30%|███ | 113052/371472 [8:59:22<21:27:57, 3.34it/s] 30%|███ | 113053/371472 [8:59:22<22:41:12, 3.16it/s] 30%|███ | 113054/371472 [8:59:23<23:01:57, 3.12it/s] 30%|███ | 113055/371472 [8:59:23<21:27:45, 3.34it/s] 30%|███ | 113056/371472 [8:59:23<21:52:38, 3.28it/s] 30%|███ | 113057/371472 [8:59:24<21:32:28, 3.33it/s] 30%|███ | 113058/371472 [8:59:24<20:08:03, 3.57it/s] 30%|███ | 113059/371472 [8:59:24<19:43:09, 3.64it/s] 30%|███ | 113060/371472 [8:59:24<19:38:25, 3.65it/s] {'loss': 3.3976, 'learning_rate': 7.264162123726e-07, 'epoch': 4.87} + 30%|███ | 113060/371472 [8:59:24<19:38:25, 3.65it/s] 30%|███ | 113061/371472 [8:59:25<19:14:52, 3.73it/s] 30%|███ | 113062/371472 [8:59:25<19:05:01, 3.76it/s] 30%|███ | 113063/371472 [8:59:25<19:24:14, 3.70it/s] 30%|███ | 113064/371472 [8:59:25<20:11:26, 3.56it/s] 30%|███ | 113065/371472 [8:59:26<19:51:24, 3.61it/s] 30%|███ | 113066/371472 [8:59:26<19:49:03, 3.62it/s] 30%|███ | 113067/371472 [8:59:26<19:50:54, 3.62it/s] 30%|███ | 113068/371472 [8:59:26<19:15:21, 3.73it/s] 30%|███ | 113069/371472 [8:59:27<19:26:27, 3.69it/s] 30%|███ | 113070/371472 [8:59:27<20:22:05, 3.52it/s] 30%|███ | 113071/371472 [8:59:27<20:45:32, 3.46it/s] 30%|███ | 113072/371472 [8:59:28<22:20:33, 3.21it/s] 30%|███ | 113073/371472 [8:59:28<21:04:29, 3.41it/s] 30%|███ | 113074/371472 [8:59:28<21:49:28, 3.29it/s] 30%|███ | 113075/371472 [8:59:29<21:22:43, 3.36it/s] 30%|███ | 113076/371472 [8:59:29<20:16:54, 3.54it/s] 30%|███ | 113077/371472 [8:59:29<19:29:50, 3.68it/s] 30%|███ | 113078/371472 [8:59:29<19:30:33, 3.68it/s] 30%|███ | 113079/371472 [8:59:30<20:16:25, 3.54it/s] 30%|███ | 113080/371472 [8:59:30<20:00:04, 3.59it/s] {'loss': 3.4237, 'learning_rate': 7.263677303971212e-07, 'epoch': 4.87} + 30%|███ | 113080/371472 [8:59:30<20:00:04, 3.59it/s] 30%|███ | 113081/371472 [8:59:30<20:06:15, 3.57it/s] 30%|███ | 113082/371472 [8:59:31<19:45:12, 3.63it/s] 30%|███ | 113083/371472 [8:59:31<20:43:33, 3.46it/s] 30%|███ | 113084/371472 [8:59:31<21:34:02, 3.33it/s] 30%|███ | 113085/371472 [8:59:31<21:02:38, 3.41it/s] 30%|███ | 113086/371472 [8:59:32<20:52:14, 3.44it/s] 30%|███ | 113087/371472 [8:59:32<20:50:14, 3.44it/s] 30%|███ | 113088/371472 [8:59:32<20:31:19, 3.50it/s] 30%|███ | 113089/371472 [8:59:33<20:31:15, 3.50it/s] 30%|███ | 113090/371472 [8:59:33<20:18:23, 3.53it/s] 30%|███ | 113091/371472 [8:59:33<20:05:35, 3.57it/s] 30%|███ | 113092/371472 [8:59:33<19:14:58, 3.73it/s] 30%|███ | 113093/371472 [8:59:34<19:19:44, 3.71it/s] 30%|███ | 113094/371472 [8:59:34<20:07:32, 3.57it/s] 30%|███ | 113095/371472 [8:59:34<20:13:17, 3.55it/s] 30%|███ | 113096/371472 [8:59:35<20:47:23, 3.45it/s] 30%|███ | 113097/371472 [8:59:35<19:53:02, 3.61it/s] 30%|███ | 113098/371472 [8:59:35<19:30:53, 3.68it/s] 30%|███ | 113099/371472 [8:59:35<20:17:54, 3.54it/s] 30%|███ | 113100/371472 [8:59:36<19:47:48, 3.63it/s] {'loss': 3.4112, 'learning_rate': 7.263192484216423e-07, 'epoch': 4.87} + 30%|███ | 113100/371472 [8:59:36<19:47:48, 3.63it/s] 30%|███ | 113101/371472 [8:59:36<20:28:45, 3.50it/s] 30%|███ | 113102/371472 [8:59:36<20:41:26, 3.47it/s] 30%|███ | 113103/371472 [8:59:36<19:58:38, 3.59it/s] 30%|███ | 113104/371472 [8:59:37<19:21:33, 3.71it/s] 30%|███ | 113105/371472 [8:59:37<19:29:49, 3.68it/s] 30%|███ | 113106/371472 [8:59:37<18:49:04, 3.81it/s] 30%|███ | 113107/371472 [8:59:38<19:10:14, 3.74it/s] 30%|███ | 113108/371472 [8:59:38<18:36:54, 3.86it/s] 30%|███ | 113109/371472 [8:59:38<19:03:40, 3.77it/s] 30%|███ | 113110/371472 [8:59:38<20:25:00, 3.52it/s] 30%|███ | 113111/371472 [8:59:39<20:31:02, 3.50it/s] 30%|███ | 113112/371472 [8:59:39<19:41:23, 3.64it/s] 30%|███ | 113113/371472 [8:59:39<19:34:39, 3.67it/s] 30%|███ | 113114/371472 [8:59:39<19:27:10, 3.69it/s] 30%|███ | 113115/371472 [8:59:40<19:14:59, 3.73it/s] 30%|███ | 113116/371472 [8:59:40<19:03:00, 3.77it/s] 30%|███ | 113117/371472 [8:59:40<19:11:45, 3.74it/s] 30%|███ | 113118/371472 [8:59:41<19:47:40, 3.63it/s] 30%|███ | 113119/371472 [8:59:41<21:19:09, 3.37it/s] 30%|███ | 113120/371472 [8:59:41<20:22:30, 3.52it/s] {'loss': 3.3411, 'learning_rate': 7.262707664461634e-07, 'epoch': 4.87} + 30%|███ | 113120/371472 [8:59:41<20:22:30, 3.52it/s] 30%|███ | 113121/371472 [8:59:41<21:01:03, 3.41it/s] 30%|███ | 113122/371472 [8:59:42<20:19:10, 3.53it/s] 30%|███ | 113123/371472 [8:59:42<19:47:02, 3.63it/s] 30%|███ | 113124/371472 [8:59:42<19:44:02, 3.64it/s] 30%|███ | 113125/371472 [8:59:42<19:44:58, 3.63it/s] 30%|███ | 113126/371472 [8:59:43<19:28:37, 3.68it/s] 30%|███ | 113127/371472 [8:59:43<19:49:31, 3.62it/s] 30%|███ | 113128/371472 [8:59:43<19:29:07, 3.68it/s] 30%|███ | 113129/371472 [8:59:44<21:36:52, 3.32it/s] 30%|███ | 113130/371472 [8:59:44<20:55:02, 3.43it/s] 30%|███ | 113131/371472 [8:59:44<19:48:40, 3.62it/s] 30%|███ | 113132/371472 [8:59:44<20:03:16, 3.58it/s] 30%|███ | 113133/371472 [8:59:45<19:37:15, 3.66it/s] 30%|███ | 113134/371472 [8:59:45<19:31:58, 3.67it/s] 30%|███ | 113135/371472 [8:59:45<19:44:07, 3.64it/s] 30%|███ | 113136/371472 [8:59:46<18:58:51, 3.78it/s] 30%|███ | 113137/371472 [8:59:46<18:58:04, 3.78it/s] 30%|███ | 113138/371472 [8:59:46<20:10:47, 3.56it/s] 30%|███ | 113139/371472 [8:59:46<19:37:57, 3.66it/s] 30%|███ | 113140/371472 [8:59:47<20:38:09, 3.48it/s] {'loss': 3.5716, 'learning_rate': 7.262222844706845e-07, 'epoch': 4.87} + 30%|███ | 113140/371472 [8:59:47<20:38:09, 3.48it/s] 30%|███ | 113141/371472 [8:59:47<19:57:18, 3.60it/s] 30%|███ | 113142/371472 [8:59:47<20:22:40, 3.52it/s] 30%|███ | 113143/371472 [8:59:48<21:54:21, 3.28it/s] 30%|███ | 113144/371472 [8:59:48<20:53:00, 3.44it/s] 30%|███ | 113145/371472 [8:59:48<20:16:20, 3.54it/s] 30%|███ | 113146/371472 [8:59:48<20:46:09, 3.45it/s] 30%|███ | 113147/371472 [8:59:49<20:26:04, 3.51it/s] 30%|███ | 113148/371472 [8:59:49<19:43:42, 3.64it/s] 30%|███ | 113149/371472 [8:59:49<19:26:41, 3.69it/s] 30%|███ | 113150/371472 [8:59:49<19:35:36, 3.66it/s] 30%|███ | 113151/371472 [8:59:50<20:11:23, 3.55it/s] 30%|███ | 113152/371472 [8:59:50<21:28:49, 3.34it/s] 30%|███ | 113153/371472 [8:59:50<21:03:54, 3.41it/s] 30%|███ | 113154/371472 [8:59:51<20:26:07, 3.51it/s] 30%|███ | 113155/371472 [8:59:51<19:39:25, 3.65it/s] 30%|███ | 113156/371472 [8:59:51<21:22:30, 3.36it/s] 30%|███ | 113157/371472 [8:59:52<21:34:37, 3.33it/s] 30%|███ | 113158/371472 [8:59:52<20:34:28, 3.49it/s] 30%|███ | 113159/371472 [8:59:52<22:02:30, 3.26it/s] 30%|███ | 113160/371472 [8:59:52<21:32:41, 3.33it/s] {'loss': 3.301, 'learning_rate': 7.261738024952056e-07, 'epoch': 4.87} + 30%|███ | 113160/371472 [8:59:52<21:32:41, 3.33it/s] 30%|███ | 113161/371472 [8:59:53<22:01:53, 3.26it/s] 30%|███ | 113162/371472 [8:59:53<21:27:09, 3.34it/s] 30%|███ | 113163/371472 [8:59:53<21:54:40, 3.27it/s] 30%|███ | 113164/371472 [8:59:54<22:40:59, 3.16it/s] 30%|███ | 113165/371472 [8:59:54<22:02:07, 3.26it/s] 30%|███ | 113166/371472 [8:59:54<22:21:39, 3.21it/s] 30%|███ | 113167/371472 [8:59:55<21:38:15, 3.32it/s] 30%|███ | 113168/371472 [8:59:55<21:05:58, 3.40it/s] 30%|███ | 113169/371472 [8:59:55<20:42:36, 3.46it/s] 30%|███ | 113170/371472 [8:59:55<20:27:33, 3.51it/s] 30%|███ | 113171/371472 [8:59:56<20:25:18, 3.51it/s] 30%|███ | 113172/371472 [8:59:56<20:33:25, 3.49it/s] 30%|███ | 113173/371472 [8:59:56<21:05:05, 3.40it/s] 30%|███ | 113174/371472 [8:59:57<21:28:05, 3.34it/s] 30%|███ | 113175/371472 [8:59:57<20:51:51, 3.44it/s] 30%|███ | 113176/371472 [8:59:57<21:10:18, 3.39it/s] 30%|███ | 113177/371472 [8:59:58<21:12:46, 3.38it/s] 30%|███ | 113178/371472 [8:59:58<20:55:33, 3.43it/s] 30%|███ | 113179/371472 [8:59:58<20:35:40, 3.48it/s] 30%|███ | 113180/371472 [8:59:58<20:57:53, 3.42it/s] {'loss': 3.4115, 'learning_rate': 7.261253205197267e-07, 'epoch': 4.87} + 30%|███ | 113180/371472 [8:59:58<20:57:53, 3.42it/s] 30%|███ | 113181/371472 [8:59:59<21:44:03, 3.30it/s] 30%|███ | 113182/371472 [8:59:59<21:21:54, 3.36it/s] 30%|███ | 113183/371472 [8:59:59<20:31:09, 3.50it/s] 30%|███ | 113184/371472 [9:00:00<21:16:31, 3.37it/s] 30%|███ | 113185/371472 [9:00:00<20:42:12, 3.47it/s] 30%|███ | 113186/371472 [9:00:00<20:08:09, 3.56it/s] 30%|███ | 113187/371472 [9:00:00<19:27:03, 3.69it/s] 30%|███ | 113188/371472 [9:00:01<19:43:10, 3.64it/s] 30%|███ | 113189/371472 [9:00:01<20:24:07, 3.52it/s] 30%|███ | 113190/371472 [9:00:01<20:21:30, 3.52it/s] 30%|███ | 113191/371472 [9:00:02<21:04:38, 3.40it/s] 30%|███ | 113192/371472 [9:00:02<21:09:27, 3.39it/s] 30%|███ | 113193/371472 [9:00:02<21:20:29, 3.36it/s] 30%|██��� | 113194/371472 [9:00:02<20:18:59, 3.53it/s] 30%|███ | 113195/371472 [9:00:03<21:10:19, 3.39it/s] 30%|███ | 113196/371472 [9:00:03<20:09:28, 3.56it/s] 30%|███ | 113197/371472 [9:00:03<19:54:47, 3.60it/s] 30%|███ | 113198/371472 [9:00:03<19:24:55, 3.70it/s] 30%|███ | 113199/371472 [9:00:04<19:10:13, 3.74it/s] 30%|███ | 113200/371472 [9:00:04<18:48:59, 3.81it/s] {'loss': 3.2897, 'learning_rate': 7.260768385442479e-07, 'epoch': 4.88} + 30%|███ | 113200/371472 [9:00:04<18:48:59, 3.81it/s] 30%|███ | 113201/371472 [9:00:04<19:39:02, 3.65it/s] 30%|███ | 113202/371472 [9:00:05<19:20:34, 3.71it/s] 30%|███ | 113203/371472 [9:00:05<19:17:22, 3.72it/s] 30%|███ | 113204/371472 [9:00:05<18:53:44, 3.80it/s] 30%|███ | 113205/371472 [9:00:05<19:01:58, 3.77it/s] 30%|███ | 113206/371472 [9:00:06<18:39:30, 3.84it/s] 30%|███ | 113207/371472 [9:00:06<19:37:19, 3.66it/s] 30%|███ | 113208/371472 [9:00:06<22:40:33, 3.16it/s] 30%|███ | 113209/371472 [9:00:07<21:36:53, 3.32it/s] 30%|███ | 113210/371472 [9:00:07<21:13:08, 3.38it/s] 30%|███ | 113211/371472 [9:00:07<20:07:53, 3.56it/s] 30%|███ | 113212/371472 [9:00:07<20:37:13, 3.48it/s] 30%|███ | 113213/371472 [9:00:08<21:30:37, 3.34it/s] 30%|███ | 113214/371472 [9:00:08<21:40:26, 3.31it/s] 30%|███ | 113215/371472 [9:00:08<22:24:06, 3.20it/s] 30%|███ | 113216/371472 [9:00:09<21:23:52, 3.35it/s] 30%|███ | 113217/371472 [9:00:09<23:22:11, 3.07it/s] 30%|███ | 113218/371472 [9:00:09<21:22:54, 3.36it/s] 30%|███ | 113219/371472 [9:00:10<21:51:46, 3.28it/s] 30%|███ | 113220/371472 [9:00:10<20:49:04, 3.45it/s] {'loss': 3.2583, 'learning_rate': 7.26028356568769e-07, 'epoch': 4.88} + 30%|███ | 113220/371472 [9:00:10<20:49:04, 3.45it/s] 30%|███ | 113221/371472 [9:00:10<20:42:32, 3.46it/s] 30%|███ | 113222/371472 [9:00:10<20:07:11, 3.57it/s] 30%|███ | 113223/371472 [9:00:11<19:29:32, 3.68it/s] 30%|███ | 113224/371472 [9:00:11<19:23:17, 3.70it/s] 30%|███ | 113225/371472 [9:00:11<18:45:10, 3.83it/s] 30%|███ | 113226/371472 [9:00:11<19:23:59, 3.70it/s] 30%|███ | 113227/371472 [9:00:12<19:10:55, 3.74it/s] 30%|███ | 113228/371472 [9:00:12<19:30:36, 3.68it/s] 30%|███ | 113229/371472 [9:00:12<19:25:09, 3.69it/s] 30%|███ | 113230/371472 [9:00:13<20:00:47, 3.58it/s] 30%|███ | 113231/371472 [9:00:13<19:54:24, 3.60it/s] 30%|███ | 113232/371472 [9:00:13<19:27:55, 3.69it/s] 30%|███ | 113233/371472 [9:00:13<19:10:29, 3.74it/s] 30%|███ | 113234/371472 [9:00:14<19:29:48, 3.68it/s] 30%|███ | 113235/371472 [9:00:14<20:20:13, 3.53it/s] 30%|███ | 113236/371472 [9:00:14<19:42:34, 3.64it/s] 30%|███ | 113237/371472 [9:00:14<19:18:13, 3.72it/s] 30%|███ | 113238/371472 [9:00:15<18:32:39, 3.87it/s] 30%|███ | 113239/371472 [9:00:15<18:46:06, 3.82it/s] 30%|███ | 113240/371472 [9:00:15<18:30:55, 3.87it/s] {'loss': 3.3965, 'learning_rate': 7.259798745932901e-07, 'epoch': 4.88} + 30%|███ | 113240/371472 [9:00:15<18:30:55, 3.87it/s] 30%|███ | 113241/371472 [9:00:15<17:59:00, 3.99it/s] 30%|███ | 113242/371472 [9:00:16<18:02:47, 3.97it/s] 30%|███ | 113243/371472 [9:00:16<18:47:13, 3.82it/s] 30%|███ | 113244/371472 [9:00:16<19:01:31, 3.77it/s] 30%|███ | 113245/371472 [9:00:17<18:59:04, 3.78it/s] 30%|███ | 113246/371472 [9:00:17<18:58:05, 3.78it/s] 30%|███ | 113247/371472 [9:00:17<18:37:22, 3.85it/s] 30%|███ | 113248/371472 [9:00:17<18:43:47, 3.83it/s] 30%|███ | 113249/371472 [9:00:18<24:53:41, 2.88it/s] 30%|███ | 113250/371472 [9:00:18<23:34:34, 3.04it/s] 30%|███ | 113251/371472 [9:00:18<22:46:15, 3.15it/s] 30%|███ | 113252/371472 [9:00:19<21:49:44, 3.29it/s] 30%|███ | 113253/371472 [9:00:19<21:16:12, 3.37it/s] 30%|███ | 113254/371472 [9:00:19<20:36:45, 3.48it/s] 30%|███ | 113255/371472 [9:00:20<23:20:16, 3.07it/s] 30%|███ | 113256/371472 [9:00:20<22:10:07, 3.24it/s] 30%|███ | 113257/371472 [9:00:20<22:06:11, 3.25it/s] 30%|███ | 113258/371472 [9:00:21<21:45:54, 3.30it/s] 30%|███ | 113259/371472 [9:00:21<23:48:07, 3.01it/s] 30%|███ | 113260/371472 [9:00:21<21:48:27, 3.29it/s] {'loss': 3.3279, 'learning_rate': 7.259313926178111e-07, 'epoch': 4.88} + 30%|███ | 113260/371472 [9:00:21<21:48:27, 3.29it/s] 30%|███ | 113261/371472 [9:00:21<21:34:19, 3.32it/s] 30%|███ | 113262/371472 [9:00:22<21:12:15, 3.38it/s] 30%|███ | 113263/371472 [9:00:22<21:39:35, 3.31it/s] 30%|███ | 113264/371472 [9:00:22<20:47:42, 3.45it/s] 30%|███ | 113265/371472 [9:00:23<20:27:36, 3.51it/s] 30%|███ | 113266/371472 [9:00:23<20:24:33, 3.51it/s] 30%|███ | 113267/371472 [9:00:23<20:13:09, 3.55it/s] 30%|███ | 113268/371472 [9:00:23<19:29:31, 3.68it/s] 30%|███ | 113269/371472 [9:00:24<19:32:11, 3.67it/s] 30%|███ | 113270/371472 [9:00:24<19:33:50, 3.67it/s] 30%|███ | 113271/371472 [9:00:24<19:09:05, 3.74it/s] 30%|███ | 113272/371472 [9:00:24<19:16:40, 3.72it/s] 30%|███ | 113273/371472 [9:00:25<19:22:48, 3.70it/s] 30%|███ | 113274/371472 [9:00:25<20:34:32, 3.49it/s] 30%|███ | 113275/371472 [9:00:25<23:11:51, 3.09it/s] 30%|███ | 113276/371472 [9:00:26<22:04:59, 3.25it/s] 30%|███ | 113277/371472 [9:00:26<21:26:46, 3.34it/s] 30%|███ | 113278/371472 [9:00:26<22:26:59, 3.19it/s] 30%|███ | 113279/371472 [9:00:27<21:15:49, 3.37it/s] 30%|███ | 113280/371472 [9:00:27<26:04:55, 2.75it/s] {'loss': 3.5487, 'learning_rate': 7.258829106423322e-07, 'epoch': 4.88} + 30%|███ | 113280/371472 [9:00:27<26:04:55, 2.75it/s] 30%|███ | 113281/371472 [9:00:27<23:50:10, 3.01it/s] 30%|███ | 113282/371472 [9:00:28<22:15:18, 3.22it/s] 30%|███ | 113283/371472 [9:00:28<20:56:39, 3.42it/s] 30%|███ | 113284/371472 [9:00:28<20:42:18, 3.46it/s] 30%|███ | 113285/371472 [9:00:28<20:07:55, 3.56it/s] 30%|███ | 113286/371472 [9:00:29<19:51:10, 3.61it/s] 30%|███ | 113287/371472 [9:00:29<19:22:04, 3.70it/s] 30%|███ | 113288/371472 [9:00:29<20:03:25, 3.58it/s] 30%|███ | 113289/371472 [9:00:30<19:31:11, 3.67it/s] 30%|███ | 113290/371472 [9:00:30<20:20:06, 3.53it/s] 30%|███ | 113291/371472 [9:00:30<20:19:11, 3.53it/s] 30%|███ | 113292/371472 [9:00:30<20:36:40, 3.48it/s] 30%|███ | 113293/371472 [9:00:31<19:52:59, 3.61it/s] 30%|███ | 113294/371472 [9:00:31<20:53:28, 3.43it/s] 30%|███ | 113295/371472 [9:00:31<20:48:59, 3.45it/s] 30%|███ | 113296/371472 [9:00:32<20:28:10, 3.50it/s] 30%|███ | 113297/371472 [9:00:32<20:11:13, 3.55it/s] 30%|███ | 113298/371472 [9:00:32<19:57:42, 3.59it/s] 31%|███ | 113299/371472 [9:00:32<19:21:07, 3.71it/s] 31%|███ | 113300/371472 [9:00:33<20:21:42, 3.52it/s] {'loss': 3.3615, 'learning_rate': 7.258344286668534e-07, 'epoch': 4.88} + 31%|███ | 113300/371472 [9:00:33<20:21:42, 3.52it/s] 31%|███ | 113301/371472 [9:00:33<20:09:44, 3.56it/s] 31%|███ | 113302/371472 [9:00:33<19:50:45, 3.61it/s] 31%|███ | 113303/371472 [9:00:33<19:32:17, 3.67it/s] 31%|███ | 113304/371472 [9:00:34<19:18:25, 3.71it/s] 31%|███ | 113305/371472 [9:00:34<20:32:58, 3.49it/s] 31%|███ | 113306/371472 [9:00:34<21:30:55, 3.33it/s] 31%|███ | 113307/371472 [9:00:35<21:16:25, 3.37it/s] 31%|███ | 113308/371472 [9:00:35<20:10:01, 3.56it/s] 31%|███ | 113309/371472 [9:00:35<20:15:34, 3.54it/s] 31%|███ | 113310/371472 [9:00:35<19:30:41, 3.68it/s] 31%|███ | 113311/371472 [9:00:36<19:24:49, 3.69it/s] 31%|███ | 113312/371472 [9:00:36<19:29:19, 3.68it/s] 31%|███ | 113313/371472 [9:00:36<20:56:34, 3.42it/s] 31%|███ | 113314/371472 [9:00:37<23:25:26, 3.06it/s] 31%|███ | 113315/371472 [9:00:37<22:22:13, 3.21it/s] 31%|███ | 113316/371472 [9:00:37<21:59:04, 3.26it/s] 31%|███ | 113317/371472 [9:00:38<21:39:50, 3.31it/s] 31%|███ | 113318/371472 [9:00:38<22:04:43, 3.25it/s] 31%|███ | 113319/371472 [9:00:38<21:41:48, 3.31it/s] 31%|███ | 113320/371472 [9:00:39<22:07:37, 3.24it/s] {'loss': 3.2451, 'learning_rate': 7.257859466913745e-07, 'epoch': 4.88} + 31%|███ | 113320/371472 [9:00:39<22:07:37, 3.24it/s] 31%|███ | 113321/371472 [9:00:39<23:24:02, 3.06it/s] 31%|███ | 113322/371472 [9:00:39<22:11:21, 3.23it/s] 31%|███ | 113323/371472 [9:00:39<21:51:34, 3.28it/s] 31%|███ | 113324/371472 [9:00:40<21:26:11, 3.35it/s] 31%|███ | 113325/371472 [9:00:40<21:41:05, 3.31it/s] 31%|███ | 113326/371472 [9:00:40<20:35:09, 3.48it/s] 31%|███ | 113327/371472 [9:00:41<20:15:57, 3.54it/s] 31%|███ | 113328/371472 [9:00:41<21:57:25, 3.27it/s] 31%|███ | 113329/371472 [9:00:41<20:25:09, 3.51it/s] 31%|███ | 113330/371472 [9:00:41<19:39:18, 3.65it/s] 31%|███ | 113331/371472 [9:00:42<19:45:23, 3.63it/s] 31%|███ | 113332/371472 [9:00:42<19:57:59, 3.59it/s] 31%|███ | 113333/371472 [9:00:42<19:58:44, 3.59it/s] 31%|███ | 113334/371472 [9:00:43<19:18:36, 3.71it/s] 31%|███ | 113335/371472 [9:00:43<19:13:03, 3.73it/s] 31%|███ | 113336/371472 [9:00:43<20:06:10, 3.57it/s] 31%|███ | 113337/371472 [9:00:43<21:39:37, 3.31it/s] 31%|███ | 113338/371472 [9:00:44<22:11:09, 3.23it/s] 31%|███ | 113339/371472 [9:00:44<22:08:25, 3.24it/s] 31%|███ | 113340/371472 [9:00:44<22:02:22, 3.25it/s] {'loss': 3.2804, 'learning_rate': 7.257374647158956e-07, 'epoch': 4.88} + 31%|███ | 113340/371472 [9:00:44<22:02:22, 3.25it/s] 31%|███ | 113341/371472 [9:00:45<22:39:20, 3.16it/s] 31%|███ | 113342/371472 [9:00:45<21:24:59, 3.35it/s] 31%|███ | 113343/371472 [9:00:45<19:59:33, 3.59it/s] 31%|███ | 113344/371472 [9:00:46<19:54:53, 3.60it/s] 31%|███ | 113345/371472 [9:00:46<19:59:45, 3.59it/s] 31%|███ | 113346/371472 [9:00:46<19:47:07, 3.62it/s] 31%|███ | 113347/371472 [9:00:46<19:06:18, 3.75it/s] 31%|███ | 113348/371472 [9:00:47<18:46:54, 3.82it/s] 31%|███ | 113349/371472 [9:00:47<19:20:48, 3.71it/s] 31%|███ | 113350/371472 [9:00:47<19:58:40, 3.59it/s] 31%|███ | 113351/371472 [9:00:47<19:33:05, 3.67it/s] 31%|███ | 113352/371472 [9:00:48<19:37:39, 3.65it/s] 31%|███ | 113353/371472 [9:00:48<20:23:13, 3.52it/s] 31%|███ | 113354/371472 [9:00:48<21:24:19, 3.35it/s] 31%|███ | 113355/371472 [9:00:49<21:12:26, 3.38it/s] 31%|███ | 113356/371472 [9:00:49<20:14:03, 3.54it/s] 31%|███ | 113357/371472 [9:00:49<21:34:53, 3.32it/s] 31%|███ | 113358/371472 [9:00:50<21:56:17, 3.27it/s] 31%|███ | 113359/371472 [9:00:50<21:09:37, 3.39it/s] 31%|███ | 113360/371472 [9:00:50<20:55:21, 3.43it/s] {'loss': 3.2816, 'learning_rate': 7.256889827404167e-07, 'epoch': 4.88} + 31%|███ | 113360/371472 [9:00:50<20:55:21, 3.43it/s] 31%|███ | 113361/371472 [9:00:50<20:40:34, 3.47it/s] 31%|███ | 113362/371472 [9:00:51<22:14:42, 3.22it/s] 31%|███ | 113363/371472 [9:00:51<21:11:16, 3.38it/s] 31%|███ | 113364/371472 [9:00:51<20:38:14, 3.47it/s] 31%|███ | 113365/371472 [9:00:52<19:53:18, 3.60it/s] 31%|███ | 113366/371472 [9:00:52<19:55:23, 3.60it/s] 31%|███ | 113367/371472 [9:00:52<19:39:56, 3.65it/s] 31%|███ | 113368/371472 [9:00:52<19:36:40, 3.66it/s] 31%|███ | 113369/371472 [9:00:53<19:35:11, 3.66it/s] 31%|███ | 113370/371472 [9:00:53<19:34:19, 3.66it/s] 31%|███ | 113371/371472 [9:00:53<20:27:06, 3.51it/s] 31%|███ | 113372/371472 [9:00:54<21:25:04, 3.35it/s] 31%|███ | 113373/371472 [9:00:54<21:02:39, 3.41it/s] 31%|███ | 113374/371472 [9:00:54<20:38:53, 3.47it/s] 31%|███ | 113375/371472 [9:00:54<20:33:55, 3.49it/s] 31%|███ | 113376/371472 [9:00:55<22:35:47, 3.17it/s] 31%|███ | 113377/371472 [9:00:55<21:20:42, 3.36it/s] 31%|███ | 113378/371472 [9:00:55<21:56:56, 3.27it/s] 31%|███ | 113379/371472 [9:00:56<22:07:11, 3.24it/s] 31%|███ | 113380/371472 [9:00:56<20:37:05, 3.48it/s] {'loss': 3.1812, 'learning_rate': 7.256405007649378e-07, 'epoch': 4.88} + 31%|███ | 113380/371472 [9:00:56<20:37:05, 3.48it/s] 31%|███ | 113381/371472 [9:00:56<20:31:41, 3.49it/s] 31%|███ | 113382/371472 [9:00:56<21:20:55, 3.36it/s] 31%|███ | 113383/371472 [9:00:57<20:52:30, 3.43it/s] 31%|███ | 113384/371472 [9:00:57<20:11:08, 3.55it/s] 31%|███ | 113385/371472 [9:00:57<19:33:37, 3.67it/s] 31%|███ | 113386/371472 [9:00:58<20:22:43, 3.52it/s] 31%|███ | 113387/371472 [9:00:58<19:45:22, 3.63it/s] 31%|███ | 113388/371472 [9:00:58<20:03:54, 3.57it/s] 31%|███ | 113389/371472 [9:00:58<20:10:24, 3.55it/s] 31%|███ | 113390/371472 [9:00:59<20:24:31, 3.51it/s] 31%|███ | 113391/371472 [9:00:59<20:19:05, 3.53it/s] 31%|███ | 113392/371472 [9:00:59<20:04:00, 3.57it/s] 31%|███ | 113393/371472 [9:01:00<20:48:04, 3.45it/s] 31%|███ | 113394/371472 [9:01:00<20:59:37, 3.41it/s] 31%|███ | 113395/371472 [9:01:00<21:01:49, 3.41it/s] 31%|███ | 113396/371472 [9:01:00<21:00:52, 3.41it/s] 31%|███ | 113397/371472 [9:01:01<19:56:11, 3.60it/s] 31%|███ | 113398/371472 [9:01:01<20:46:58, 3.45it/s] 31%|███ | 113399/371472 [9:01:01<19:59:53, 3.58it/s] 31%|███ | 113400/371472 [9:01:02<20:30:22, 3.50it/s] {'loss': 3.4344, 'learning_rate': 7.255920187894589e-07, 'epoch': 4.88} + 31%|███ | 113400/371472 [9:01:02<20:30:22, 3.50it/s] 31%|███ | 113401/371472 [9:01:02<20:32:48, 3.49it/s] 31%|███ | 113402/371472 [9:01:02<20:05:51, 3.57it/s] 31%|███ | 113403/371472 [9:01:02<20:57:00, 3.42it/s] 31%|███ | 113404/371472 [9:01:03<20:55:18, 3.43it/s] 31%|███ | 113405/371472 [9:01:03<20:14:40, 3.54it/s] 31%|███ | 113406/371472 [9:01:03<19:55:31, 3.60it/s] 31%|███ | 113407/371472 [9:01:04<20:01:04, 3.58it/s] 31%|███ | 113408/371472 [9:01:04<20:18:16, 3.53it/s] 31%|███ | 113409/371472 [9:01:04<19:35:18, 3.66it/s] 31%|███ | 113410/371472 [9:01:04<20:37:27, 3.48it/s] 31%|███ | 113411/371472 [9:01:05<20:35:46, 3.48it/s] 31%|███ | 113412/371472 [9:01:05<23:14:08, 3.09it/s] 31%|███ | 113413/371472 [9:01:05<23:11:08, 3.09it/s] 31%|███ | 113414/371472 [9:01:06<23:31:43, 3.05it/s] 31%|███ | 113415/371472 [9:01:06<22:52:50, 3.13it/s] 31%|███ | 113416/371472 [9:01:06<22:05:24, 3.24it/s] 31%|███ | 113417/371472 [9:01:07<20:56:09, 3.42it/s] 31%|███ | 113418/371472 [9:01:07<20:03:00, 3.58it/s] 31%|███ | 113419/371472 [9:01:07<19:20:20, 3.71it/s] 31%|███ | 113420/371472 [9:01:07<19:03:57, 3.76it/s] {'loss': 3.5236, 'learning_rate': 7.2554353681398e-07, 'epoch': 4.89} + 31%|███ | 113420/371472 [9:01:07<19:03:57, 3.76it/s] 31%|███ | 113421/371472 [9:01:08<19:00:06, 3.77it/s] 31%|███ | 113422/371472 [9:01:08<19:23:24, 3.70it/s] 31%|███ | 113423/371472 [9:01:08<20:00:54, 3.58it/s] 31%|███ | 113424/371472 [9:01:09<21:03:27, 3.40it/s] 31%|███ | 113425/371472 [9:01:09<20:34:38, 3.48it/s] 31%|███ | 113426/371472 [9:01:09<21:13:35, 3.38it/s] 31%|███ | 113427/371472 [9:01:09<20:53:59, 3.43it/s] 31%|███ | 113428/371472 [9:01:10<20:23:11, 3.52it/s] 31%|███ | 113429/371472 [9:01:10<20:12:31, 3.55it/s] 31%|███ | 113430/371472 [9:01:10<20:44:30, 3.46it/s] 31%|███ | 113431/371472 [9:01:11<20:25:16, 3.51it/s] 31%|███ | 113432/371472 [9:01:11<20:10:17, 3.55it/s] 31%|███ | 113433/371472 [9:01:11<22:11:13, 3.23it/s] 31%|███ | 113434/371472 [9:01:11<20:57:55, 3.42it/s] 31%|███ | 113435/371472 [9:01:12<21:27:59, 3.34it/s] 31%|███ | 113436/371472 [9:01:12<21:10:37, 3.38it/s] 31%|███ | 113437/371472 [9:01:12<22:06:00, 3.24it/s] 31%|███ | 113438/371472 [9:01:13<21:21:11, 3.36it/s] 31%|███ | 113439/371472 [9:01:13<21:48:16, 3.29it/s] 31%|███ | 113440/371472 [9:01:13<22:17:48, 3.21it/s] {'loss': 3.4669, 'learning_rate': 7.254950548385011e-07, 'epoch': 4.89} + 31%|███ | 113440/371472 [9:01:13<22:17:48, 3.21it/s] 31%|███ | 113441/371472 [9:01:14<21:39:46, 3.31it/s] 31%|███ | 113442/371472 [9:01:14<20:26:32, 3.51it/s] 31%|███ | 113443/371472 [9:01:14<20:37:26, 3.48it/s] 31%|███ | 113444/371472 [9:01:14<20:46:15, 3.45it/s] 31%|███ | 113445/371472 [9:01:15<20:55:10, 3.43it/s] 31%|███ | 113446/371472 [9:01:15<20:11:14, 3.55it/s] 31%|███ | 113447/371472 [9:01:15<19:17:46, 3.71it/s] 31%|███ | 113448/371472 [9:01:15<19:04:10, 3.76it/s] 31%|███ | 113449/371472 [9:01:16<19:05:19, 3.75it/s] 31%|███ | 113450/371472 [9:01:16<19:21:40, 3.70it/s] 31%|███ | 113451/371472 [9:01:16<18:54:53, 3.79it/s] 31%|███ | 113452/371472 [9:01:16<18:28:41, 3.88it/s] 31%|███ | 113453/371472 [9:01:17<18:20:08, 3.91it/s] 31%|███ | 113454/371472 [9:01:17<18:06:19, 3.96it/s] 31%|███ | 113455/371472 [9:01:17<20:14:51, 3.54it/s] 31%|███ | 113456/371472 [9:01:18<19:47:21, 3.62it/s] 31%|███ | 113457/371472 [9:01:18<21:45:23, 3.29it/s] 31%|███ | 113458/371472 [9:01:18<21:37:01, 3.32it/s] 31%|███ | 113459/371472 [9:01:19<20:51:18, 3.44it/s] 31%|███ | 113460/371472 [9:01:19<20:16:18, 3.54it/s] {'loss': 3.7011, 'learning_rate': 7.254465728630223e-07, 'epoch': 4.89} + 31%|███ | 113460/371472 [9:01:19<20:16:18, 3.54it/s] 31%|███ | 113461/371472 [9:01:19<20:41:45, 3.46it/s] 31%|███ | 113462/371472 [9:01:19<20:19:18, 3.53it/s] 31%|███ | 113463/371472 [9:01:20<21:00:30, 3.41it/s] 31%|███ | 113464/371472 [9:01:20<19:48:56, 3.62it/s] 31%|███ | 113465/371472 [9:01:20<21:24:52, 3.35it/s] 31%|███ | 113466/371472 [9:01:21<20:24:41, 3.51it/s] 31%|███ | 113467/371472 [9:01:21<19:04:39, 3.76it/s] 31%|███ | 113468/371472 [9:01:21<18:52:58, 3.80it/s] 31%|███ | 113469/371472 [9:01:21<19:23:49, 3.69it/s] 31%|███ | 113470/371472 [9:01:22<19:46:56, 3.62it/s] 31%|███ | 113471/371472 [9:01:22<19:14:04, 3.73it/s] 31%|███ | 113472/371472 [9:01:22<19:04:59, 3.76it/s] 31%|███ | 113473/371472 [9:01:22<19:24:05, 3.69it/s] 31%|███ | 113474/371472 [9:01:23<19:03:07, 3.76it/s] 31%|███ | 113475/371472 [9:01:23<19:17:34, 3.71it/s] 31%|███ | 113476/371472 [9:01:23<18:58:53, 3.78it/s] 31%|███ | 113477/371472 [9:01:23<19:40:32, 3.64it/s] 31%|███ | 113478/371472 [9:01:24<19:03:15, 3.76it/s] 31%|███ | 113479/371472 [9:01:24<19:33:25, 3.66it/s] 31%|███ | 113480/371472 [9:01:24<19:30:42, 3.67it/s] {'loss': 3.3601, 'learning_rate': 7.253980908875434e-07, 'epoch': 4.89} + 31%|███ | 113480/371472 [9:01:24<19:30:42, 3.67it/s] 31%|███ | 113481/371472 [9:01:25<19:04:42, 3.76it/s] 31%|███ | 113482/371472 [9:01:25<18:54:01, 3.79it/s] 31%|███ | 113483/371472 [9:01:25<19:04:11, 3.76it/s] 31%|███ | 113484/371472 [9:01:25<18:58:14, 3.78it/s] 31%|███ | 113485/371472 [9:01:26<18:42:13, 3.83it/s] 31%|███ | 113486/371472 [9:01:26<18:44:32, 3.82it/s] 31%|███ | 113487/371472 [9:01:26<18:19:44, 3.91it/s] 31%|███ | 113488/371472 [9:01:26<19:13:47, 3.73it/s] 31%|███ | 113489/371472 [9:01:27<18:59:38, 3.77it/s] 31%|███ | 113490/371472 [9:01:27<19:08:48, 3.74it/s] 31%|███ | 113491/371472 [9:01:27<19:06:41, 3.75it/s] 31%|███ | 113492/371472 [9:01:27<18:44:41, 3.82it/s] 31%|███ | 113493/371472 [9:01:28<18:31:17, 3.87it/s] 31%|███ | 113494/371472 [9:01:28<18:13:02, 3.93it/s] 31%|███ | 113495/371472 [9:01:28<18:47:56, 3.81it/s] 31%|███ | 113496/371472 [9:01:28<18:50:00, 3.80it/s] 31%|███ | 113497/371472 [9:01:29<18:48:58, 3.81it/s] 31%|███ | 113498/371472 [9:01:29<18:33:37, 3.86it/s] 31%|███ | 113499/371472 [9:01:29<19:13:03, 3.73it/s] 31%|███ | 113500/371472 [9:01:30<20:49:45, 3.44it/s] {'loss': 3.3322, 'learning_rate': 7.253496089120644e-07, 'epoch': 4.89} + 31%|███ | 113500/371472 [9:01:30<20:49:45, 3.44it/s] 31%|███ | 113501/371472 [9:01:30<20:49:07, 3.44it/s] 31%|███ | 113502/371472 [9:01:30<20:00:31, 3.58it/s] 31%|███ | 113503/371472 [9:01:30<20:14:58, 3.54it/s] 31%|███ | 113504/371472 [9:01:31<20:14:15, 3.54it/s] 31%|███ | 113505/371472 [9:01:31<19:24:25, 3.69it/s] 31%|███ | 113506/371472 [9:01:31<19:39:37, 3.64it/s] 31%|███ | 113507/371472 [9:01:31<18:55:12, 3.79it/s] 31%|███ | 113508/371472 [9:01:32<18:51:51, 3.80it/s] 31%|███ | 113509/371472 [9:01:32<20:00:08, 3.58it/s] 31%|███ | 113510/371472 [9:01:32<19:54:16, 3.60it/s] 31%|███ | 113511/371472 [9:01:33<19:41:31, 3.64it/s] 31%|███ | 113512/371472 [9:01:33<19:04:08, 3.76it/s] 31%|███ | 113513/371472 [9:01:33<19:50:15, 3.61it/s] 31%|███ | 113514/371472 [9:01:33<20:45:24, 3.45it/s] 31%|███ | 113515/371472 [9:01:34<21:10:52, 3.38it/s] 31%|███ | 113516/371472 [9:01:34<21:56:03, 3.27it/s] 31%|███ | 113517/371472 [9:01:34<21:06:02, 3.40it/s] 31%|███ | 113518/371472 [9:01:35<20:06:04, 3.56it/s] 31%|███ | 113519/371472 [9:01:35<20:22:11, 3.52it/s] 31%|███ | 113520/371472 [9:01:35<20:11:39, 3.55it/s] {'loss': 3.3331, 'learning_rate': 7.253011269365855e-07, 'epoch': 4.89} + 31%|███ | 113520/371472 [9:01:35<20:11:39, 3.55it/s] 31%|███ | 113521/371472 [9:01:35<19:27:36, 3.68it/s] 31%|███ | 113522/371472 [9:01:36<19:00:44, 3.77it/s] 31%|███ | 113523/371472 [9:01:36<19:58:32, 3.59it/s] 31%|███ | 113524/371472 [9:01:36<19:46:23, 3.62it/s] 31%|███ | 113525/371472 [9:01:37<22:29:47, 3.19it/s] 31%|███ | 113526/371472 [9:01:37<21:57:18, 3.26it/s] 31%|███ | 113527/371472 [9:01:37<20:51:50, 3.43it/s] 31%|███ | 113528/371472 [9:01:37<20:41:20, 3.46it/s] 31%|███ | 113529/371472 [9:01:38<21:47:51, 3.29it/s] 31%|███ | 113530/371472 [9:01:38<22:32:19, 3.18it/s] 31%|███ | 113531/371472 [9:01:38<21:33:03, 3.32it/s] 31%|███ | 113532/371472 [9:01:39<21:08:34, 3.39it/s] 31%|███ | 113533/371472 [9:01:39<21:05:13, 3.40it/s] 31%|███ | 113534/371472 [9:01:39<21:59:42, 3.26it/s] 31%|███ | 113535/371472 [9:01:40<21:43:52, 3.30it/s] 31%|███ | 113536/371472 [9:01:40<20:55:03, 3.43it/s] 31%|███ | 113537/371472 [9:01:40<21:44:14, 3.30it/s] 31%|███ | 113538/371472 [9:01:41<22:27:44, 3.19it/s] 31%|███ | 113539/371472 [9:01:41<22:14:46, 3.22it/s] 31%|███ | 113540/371472 [9:01:41<21:19:04, 3.36it/s] {'loss': 3.4049, 'learning_rate': 7.252526449611067e-07, 'epoch': 4.89} + 31%|███ | 113540/371472 [9:01:41<21:19:04, 3.36it/s] 31%|███ | 113541/371472 [9:01:41<20:33:34, 3.48it/s] 31%|███ | 113542/371472 [9:01:42<21:46:52, 3.29it/s] 31%|███ | 113543/371472 [9:01:42<22:02:32, 3.25it/s] 31%|███ | 113544/371472 [9:01:42<21:33:33, 3.32it/s] 31%|███ | 113545/371472 [9:01:43<21:02:37, 3.40it/s] 31%|███ | 113546/371472 [9:01:43<22:24:21, 3.20it/s] 31%|███ | 113547/371472 [9:01:43<23:42:02, 3.02it/s] 31%|███ | 113548/371472 [9:01:44<22:03:43, 3.25it/s] 31%|███ | 113549/371472 [9:01:44<21:42:00, 3.30it/s] 31%|███ | 113550/371472 [9:01:44<22:22:17, 3.20it/s] 31%|███ | 113551/371472 [9:01:45<21:47:05, 3.29it/s] 31%|███ | 113552/371472 [9:01:45<23:27:41, 3.05it/s] 31%|███ | 113553/371472 [9:01:45<23:00:10, 3.11it/s] 31%|███ | 113554/371472 [9:01:45<21:20:15, 3.36it/s] 31%|███ | 113555/371472 [9:01:46<20:14:49, 3.54it/s] 31%|███ | 113556/371472 [9:01:46<19:42:43, 3.63it/s] 31%|███ | 113557/371472 [9:01:46<19:31:27, 3.67it/s] 31%|███ | 113558/371472 [9:01:46<18:33:02, 3.86it/s] 31%|███ | 113559/371472 [9:01:47<18:23:25, 3.90it/s] 31%|███ | 113560/371472 [9:01:47<19:48:23, 3.62it/s] {'loss': 3.3272, 'learning_rate': 7.252041629856278e-07, 'epoch': 4.89} + 31%|███ | 113560/371472 [9:01:47<19:48:23, 3.62it/s] 31%|███ | 113561/371472 [9:01:47<19:30:38, 3.67it/s] 31%|███ | 113562/371472 [9:01:48<19:00:20, 3.77it/s] 31%|███ | 113563/371472 [9:01:48<19:14:58, 3.72it/s] 31%|███ | 113564/371472 [9:01:48<20:07:50, 3.56it/s] 31%|███ | 113565/371472 [9:01:48<20:03:13, 3.57it/s] 31%|███ | 113566/371472 [9:01:49<19:23:14, 3.70it/s] 31%|███ | 113567/371472 [9:01:49<19:39:02, 3.65it/s] 31%|███ | 113568/371472 [9:01:49<21:21:36, 3.35it/s] 31%|███ | 113569/371472 [9:01:50<22:30:20, 3.18it/s] 31%|███ | 113570/371472 [9:01:50<22:03:24, 3.25it/s] 31%|███ | 113571/371472 [9:01:50<20:47:53, 3.44it/s] 31%|███ | 113572/371472 [9:01:50<20:35:17, 3.48it/s] 31%|███ | 113573/371472 [9:01:51<20:02:39, 3.57it/s] 31%|███ | 113574/371472 [9:01:51<20:12:31, 3.54it/s] 31%|███ | 113575/371472 [9:01:51<19:29:00, 3.68it/s] 31%|███ | 113576/371472 [9:01:52<19:21:26, 3.70it/s] 31%|███ | 113577/371472 [9:01:52<19:26:26, 3.68it/s] 31%|███ | 113578/371472 [9:01:52<19:19:08, 3.71it/s] 31%|███ | 113579/371472 [9:01:52<21:01:22, 3.41it/s] 31%|███ | 113580/371472 [9:01:53<20:04:25, 3.57it/s] {'loss': 3.217, 'learning_rate': 7.251556810101489e-07, 'epoch': 4.89} + 31%|███ | 113580/371472 [9:01:53<20:04:25, 3.57it/s] 31%|███ | 113581/371472 [9:01:53<20:17:07, 3.53it/s] 31%|███ | 113582/371472 [9:01:53<20:01:13, 3.58it/s] 31%|███ | 113583/371472 [9:01:53<19:38:49, 3.65it/s] 31%|███ | 113584/371472 [9:01:54<19:04:42, 3.75it/s] 31%|███ | 113585/371472 [9:01:54<20:38:20, 3.47it/s] 31%|███ | 113586/371472 [9:01:54<21:47:56, 3.29it/s] 31%|███ | 113587/371472 [9:01:55<20:58:17, 3.42it/s] 31%|███ | 113588/371472 [9:01:55<20:17:00, 3.53it/s] 31%|███ | 113589/371472 [9:01:55<19:38:52, 3.65it/s] 31%|███ | 113590/371472 [9:01:55<19:02:01, 3.76it/s] 31%|███ | 113591/371472 [9:01:56<18:42:10, 3.83it/s] 31%|███ | 113592/371472 [9:01:56<19:01:35, 3.76it/s] 31%|███ | 113593/371472 [9:01:56<19:37:13, 3.65it/s] 31%|███ | 113594/371472 [9:01:57<19:36:26, 3.65it/s] 31%|███ | 113595/371472 [9:01:57<19:23:05, 3.70it/s] 31%|███ | 113596/371472 [9:01:57<20:12:20, 3.55it/s] 31%|███ | 113597/371472 [9:01:57<19:45:19, 3.63it/s] 31%|███ | 113598/371472 [9:01:58<19:01:51, 3.76it/s] 31%|███ | 113599/371472 [9:01:58<19:56:35, 3.59it/s] 31%|███ | 113600/371472 [9:01:58<19:53:11, 3.60it/s] {'loss': 3.275, 'learning_rate': 7.2510719903467e-07, 'epoch': 4.89} + 31%|███ | 113600/371472 [9:01:58<19:53:11, 3.60it/s] 31%|███ | 113601/371472 [9:01:59<21:08:22, 3.39it/s] 31%|███ | 113602/371472 [9:01:59<20:10:59, 3.55it/s] 31%|███ | 113603/371472 [9:01:59<19:13:43, 3.73it/s] 31%|███ | 113604/371472 [9:01:59<19:28:02, 3.68it/s] 31%|███ | 113605/371472 [9:02:00<18:44:47, 3.82it/s] 31%|███ | 113606/371472 [9:02:00<19:04:15, 3.76it/s] 31%|███ | 113607/371472 [9:02:00<19:27:53, 3.68it/s] 31%|███ | 113608/371472 [9:02:00<19:03:04, 3.76it/s] 31%|███ | 113609/371472 [9:02:01<18:55:45, 3.78it/s] 31%|███ | 113610/371472 [9:02:01<19:07:22, 3.75it/s] 31%|███ | 113611/371472 [9:02:01<18:40:22, 3.84it/s] 31%|███ | 113612/371472 [9:02:01<18:43:46, 3.82it/s] 31%|███ | 113613/371472 [9:02:02<20:17:36, 3.53it/s] 31%|███ | 113614/371472 [9:02:02<19:50:57, 3.61it/s] 31%|███ | 113615/371472 [9:02:02<20:12:15, 3.55it/s] 31%|███ | 113616/371472 [9:02:03<20:12:34, 3.54it/s] 31%|███ | 113617/371472 [9:02:03<19:16:53, 3.71it/s] 31%|███ | 113618/371472 [9:02:03<18:54:58, 3.79it/s] 31%|███ | 113619/371472 [9:02:03<20:22:17, 3.52it/s] 31%|███ | 113620/371472 [9:02:04<20:04:54, 3.57it/s] {'loss': 3.5163, 'learning_rate': 7.250587170591911e-07, 'epoch': 4.89} + 31%|███ | 113620/371472 [9:02:04<20:04:54, 3.57it/s] 31%|███ | 113621/371472 [9:02:04<19:38:23, 3.65it/s] 31%|███ | 113622/371472 [9:02:04<19:55:15, 3.60it/s] 31%|███ | 113623/371472 [9:02:04<19:34:20, 3.66it/s] 31%|███ | 113624/371472 [9:02:05<19:40:33, 3.64it/s] 31%|███ | 113625/371472 [9:02:05<19:02:11, 3.76it/s] 31%|███ | 113626/371472 [9:02:05<18:42:13, 3.83it/s] 31%|███ | 113627/371472 [9:02:06<19:25:26, 3.69it/s] 31%|███ | 113628/371472 [9:02:06<19:07:49, 3.74it/s] 31%|███ | 113629/371472 [9:02:06<19:21:06, 3.70it/s] 31%|███ | 113630/371472 [9:02:06<19:12:15, 3.73it/s] 31%|███ | 113631/371472 [9:02:07<20:33:14, 3.48it/s] 31%|███ | 113632/371472 [9:02:07<20:53:28, 3.43it/s] 31%|███ | 113633/371472 [9:02:07<20:30:22, 3.49it/s] 31%|███ | 113634/371472 [9:02:08<20:37:56, 3.47it/s] 31%|███ | 113635/371472 [9:02:08<19:58:45, 3.58it/s] 31%|███ | 113636/371472 [9:02:08<20:35:51, 3.48it/s] 31%|███ | 113637/371472 [9:02:08<21:34:52, 3.32it/s] 31%|███ | 113638/371472 [9:02:09<20:40:55, 3.46it/s] 31%|███ | 113639/371472 [9:02:09<20:15:55, 3.53it/s] 31%|███ | 113640/371472 [9:02:09<19:58:38, 3.59it/s] {'loss': 3.372, 'learning_rate': 7.250102350837121e-07, 'epoch': 4.89} + 31%|███ | 113640/371472 [9:02:09<19:58:38, 3.59it/s] 31%|███ | 113641/371472 [9:02:09<19:23:21, 3.69it/s] 31%|███ | 113642/371472 [9:02:10<19:09:14, 3.74it/s] 31%|███ | 113643/371472 [9:02:10<19:01:50, 3.76it/s] 31%|███ | 113644/371472 [9:02:10<19:09:20, 3.74it/s] 31%|███ | 113645/371472 [9:02:11<18:49:19, 3.81it/s] 31%|███ | 113646/371472 [9:02:11<18:44:44, 3.82it/s] 31%|███ | 113647/371472 [9:02:11<18:41:42, 3.83it/s] 31%|███ | 113648/371472 [9:02:11<20:06:25, 3.56it/s] 31%|███ | 113649/371472 [9:02:12<20:17:33, 3.53it/s] 31%|███ | 113650/371472 [9:02:12<19:31:04, 3.67it/s] 31%|███ | 113651/371472 [9:02:12<19:29:33, 3.67it/s] 31%|███ | 113652/371472 [9:02:12<19:56:54, 3.59it/s] 31%|███ | 113653/371472 [9:02:13<21:05:52, 3.39it/s] 31%|███ | 113654/371472 [9:02:13<20:42:33, 3.46it/s] 31%|███ | 113655/371472 [9:02:13<20:17:02, 3.53it/s] 31%|███ | 113656/371472 [9:02:14<19:33:19, 3.66it/s] 31%|███ | 113657/371472 [9:02:14<19:22:13, 3.70it/s] 31%|███ | 113658/371472 [9:02:14<19:25:33, 3.69it/s] 31%|███ | 113659/371472 [9:02:14<19:53:14, 3.60it/s] 31%|███ | 113660/371472 [9:02:15<19:36:41, 3.65it/s] {'loss': 3.4274, 'learning_rate': 7.249617531082332e-07, 'epoch': 4.9} + 31%|███ | 113660/371472 [9:02:15<19:36:41, 3.65it/s] 31%|███ | 113661/371472 [9:02:15<19:02:26, 3.76it/s] 31%|███ | 113662/371472 [9:02:15<19:48:54, 3.61it/s] 31%|███ | 113663/371472 [9:02:16<20:03:47, 3.57it/s] 31%|███ | 113664/371472 [9:02:16<21:36:49, 3.31it/s] 31%|███ | 113665/371472 [9:02:16<20:32:20, 3.49it/s] 31%|███ | 113666/371472 [9:02:16<20:05:18, 3.56it/s] 31%|███ | 113667/371472 [9:02:17<19:54:19, 3.60it/s] 31%|███ | 113668/371472 [9:02:17<20:33:58, 3.48it/s] 31%|███ | 113669/371472 [9:02:17<19:48:50, 3.61it/s] 31%|███ | 113670/371472 [9:02:18<19:56:19, 3.59it/s] 31%|███ | 113671/371472 [9:02:18<19:48:10, 3.62it/s] 31%|███ | 113672/371472 [9:02:18<18:58:39, 3.77it/s] 31%|███ | 113673/371472 [9:02:18<19:33:27, 3.66it/s] 31%|███ | 113674/371472 [9:02:19<19:42:38, 3.63it/s] 31%|███ | 113675/371472 [9:02:19<20:04:10, 3.57it/s] 31%|███ | 113676/371472 [9:02:19<20:10:06, 3.55it/s] 31%|███ | 113677/371472 [9:02:19<19:30:37, 3.67it/s] 31%|███ | 113678/371472 [9:02:20<19:17:01, 3.71it/s] 31%|███ | 113679/371472 [9:02:20<20:43:10, 3.46it/s] 31%|███ | 113680/371472 [9:02:20<20:27:40, 3.50it/s] {'loss': 3.2712, 'learning_rate': 7.249132711327544e-07, 'epoch': 4.9} + 31%|███ | 113680/371472 [9:02:20<20:27:40, 3.50it/s] 31%|███ | 113681/371472 [9:02:21<21:26:25, 3.34it/s] 31%|███ | 113682/371472 [9:02:21<20:53:50, 3.43it/s] 31%|███ | 113683/371472 [9:02:21<19:56:35, 3.59it/s] 31%|███ | 113684/371472 [9:02:21<20:21:01, 3.52it/s] 31%|███ | 113685/371472 [9:02:22<20:21:31, 3.52it/s] 31%|███ | 113686/371472 [9:02:22<20:19:26, 3.52it/s] 31%|███ | 113687/371472 [9:02:22<21:07:55, 3.39it/s] 31%|███ | 113688/371472 [9:02:23<20:27:24, 3.50it/s] 31%|███ | 113689/371472 [9:02:23<19:55:46, 3.59it/s] 31%|███ | 113690/371472 [9:02:23<21:04:38, 3.40it/s] 31%|███ | 113691/371472 [9:02:24<21:38:38, 3.31it/s] 31%|███ | 113692/371472 [9:02:24<21:01:05, 3.41it/s] 31%|███ | 113693/371472 [9:02:24<20:03:26, 3.57it/s] 31%|███ | 113694/371472 [9:02:24<19:17:42, 3.71it/s] 31%|███ | 113695/371472 [9:02:25<19:10:04, 3.74it/s] 31%|███ | 113696/371472 [9:02:25<20:26:01, 3.50it/s] 31%|███ | 113697/371472 [9:02:25<20:33:07, 3.48it/s] 31%|███ | 113698/371472 [9:02:25<21:18:04, 3.36it/s] 31%|███ | 113699/371472 [9:02:26<20:46:20, 3.45it/s] 31%|███ | 113700/371472 [9:02:26<20:47:04, 3.45it/s] {'loss': 3.2933, 'learning_rate': 7.248647891572755e-07, 'epoch': 4.9} + 31%|███ | 113700/371472 [9:02:26<20:47:04, 3.45it/s] 31%|███ | 113701/371472 [9:02:26<20:51:02, 3.43it/s] 31%|███ | 113702/371472 [9:02:27<20:56:57, 3.42it/s] 31%|███ | 113703/371472 [9:02:27<21:08:29, 3.39it/s] 31%|███ | 113704/371472 [9:02:27<20:33:08, 3.48it/s] 31%|███ | 113705/371472 [9:02:28<20:28:19, 3.50it/s] 31%|███ | 113706/371472 [9:02:28<20:47:35, 3.44it/s] 31%|███ | 113707/371472 [9:02:28<20:28:33, 3.50it/s] 31%|███ | 113708/371472 [9:02:28<21:45:37, 3.29it/s] 31%|███ | 113709/371472 [9:02:29<21:02:32, 3.40it/s] 31%|███ | 113710/371472 [9:02:29<20:11:47, 3.55it/s] 31%|███ | 113711/371472 [9:02:29<21:15:28, 3.37it/s] 31%|███ | 113712/371472 [9:02:30<20:53:12, 3.43it/s] 31%|███ | 113713/371472 [9:02:30<21:38:26, 3.31it/s] 31%|███ | 113714/371472 [9:02:30<20:23:48, 3.51it/s] 31%|███ | 113715/371472 [9:02:30<19:33:10, 3.66it/s] 31%|███ | 113716/371472 [9:02:31<20:57:50, 3.42it/s] 31%|███ | 113717/371472 [9:02:31<20:44:49, 3.45it/s] 31%|███ | 113718/371472 [9:02:31<20:20:36, 3.52it/s] 31%|███ | 113719/371472 [9:02:32<20:13:54, 3.54it/s] 31%|███ | 113720/371472 [9:02:32<22:45:48, 3.15it/s] {'loss': 3.2578, 'learning_rate': 7.248163071817966e-07, 'epoch': 4.9} + 31%|███ | 113720/371472 [9:02:32<22:45:48, 3.15it/s] 31%|███ | 113721/371472 [9:02:32<21:25:05, 3.34it/s] 31%|███ | 113722/371472 [9:02:33<22:19:25, 3.21it/s] 31%|███ | 113723/371472 [9:02:33<21:06:42, 3.39it/s] 31%|███ | 113724/371472 [9:02:33<20:31:03, 3.49it/s] 31%|███ | 113725/371472 [9:02:33<19:43:39, 3.63it/s] 31%|███ | 113726/371472 [9:02:34<19:43:39, 3.63it/s] 31%|███ | 113727/371472 [9:02:34<19:07:40, 3.74it/s] 31%|███ | 113728/371472 [9:02:34<19:45:39, 3.62it/s] 31%|███ | 113729/371472 [9:02:34<19:28:25, 3.68it/s] 31%|███ | 113730/371472 [9:02:35<18:57:28, 3.78it/s] 31%|███ | 113731/371472 [9:02:35<19:15:07, 3.72it/s] 31%|███ | 113732/371472 [9:02:35<19:24:21, 3.69it/s] 31%|███ | 113733/371472 [9:02:36<19:57:53, 3.59it/s] 31%|███ | 113734/371472 [9:02:36<21:17:13, 3.36it/s] 31%|███ | 113735/371472 [9:02:36<20:29:57, 3.49it/s] 31%|███ | 113736/371472 [9:02:36<21:37:34, 3.31it/s] 31%|███ | 113737/371472 [9:02:37<20:41:49, 3.46it/s] 31%|███ | 113738/371472 [9:02:37<20:09:57, 3.55it/s] 31%|███ | 113739/371472 [9:02:37<19:32:39, 3.66it/s] 31%|███ | 113740/371472 [9:02:38<20:49:29, 3.44it/s] {'loss': 3.2918, 'learning_rate': 7.247678252063177e-07, 'epoch': 4.9} + 31%|███ | 113740/371472 [9:02:38<20:49:29, 3.44it/s] 31%|███ | 113741/371472 [9:02:38<20:46:59, 3.44it/s] 31%|███ | 113742/371472 [9:02:38<20:20:24, 3.52it/s] 31%|███ | 113743/371472 [9:02:38<21:56:00, 3.26it/s] 31%|███ | 113744/371472 [9:02:39<21:08:02, 3.39it/s] 31%|███ | 113745/371472 [9:02:39<20:25:16, 3.51it/s] 31%|███ | 113746/371472 [9:02:39<20:04:34, 3.57it/s] 31%|███ | 113747/371472 [9:02:40<19:42:21, 3.63it/s] 31%|███ | 113748/371472 [9:02:40<20:11:03, 3.55it/s] 31%|███ | 113749/371472 [9:02:40<19:59:08, 3.58it/s] 31%|███ | 113750/371472 [9:02:40<19:57:48, 3.59it/s] 31%|███ | 113751/371472 [9:02:41<19:43:24, 3.63it/s] 31%|███ | 113752/371472 [9:02:41<19:01:57, 3.76it/s] 31%|███ | 113753/371472 [9:02:41<19:17:39, 3.71it/s] 31%|███ | 113754/371472 [9:02:41<19:11:55, 3.73it/s] 31%|███ | 113755/371472 [9:02:42<21:09:50, 3.38it/s] 31%|███ | 113756/371472 [9:02:42<20:15:52, 3.53it/s] 31%|███ | 113757/371472 [9:02:42<19:57:02, 3.59it/s] 31%|███ | 113758/371472 [9:02:43<19:41:49, 3.63it/s] 31%|███ | 113759/371472 [9:02:43<19:58:34, 3.58it/s] 31%|███ | 113760/371472 [9:02:43<19:58:05, 3.59it/s] {'loss': 3.1761, 'learning_rate': 7.247193432308388e-07, 'epoch': 4.9} + 31%|███ | 113760/371472 [9:02:43<19:58:05, 3.59it/s] 31%|███ | 113761/371472 [9:02:43<20:56:02, 3.42it/s] 31%|███ | 113762/371472 [9:02:44<21:07:35, 3.39it/s] 31%|███ | 113763/371472 [9:02:44<22:43:25, 3.15it/s] 31%|███ | 113764/371472 [9:02:44<21:55:48, 3.26it/s] 31%|███ | 113765/371472 [9:02:45<22:30:21, 3.18it/s] 31%|███ | 113766/371472 [9:02:45<21:58:38, 3.26it/s] 31%|███ | 113767/371472 [9:02:45<20:29:34, 3.49it/s] 31%|███ | 113768/371472 [9:02:46<21:50:27, 3.28it/s] 31%|███ | 113769/371472 [9:02:46<21:16:14, 3.37it/s] 31%|███ | 113770/371472 [9:02:46<20:30:13, 3.49it/s] 31%|███ | 113771/371472 [9:02:46<20:56:03, 3.42it/s] 31%|███ | 113772/371472 [9:02:47<21:15:16, 3.37it/s] 31%|███ | 113773/371472 [9:02:47<21:37:03, 3.31it/s] 31%|███ | 113774/371472 [9:02:47<20:30:02, 3.49it/s] 31%|███ | 113775/371472 [9:02:48<20:07:04, 3.56it/s] 31%|███ | 113776/371472 [9:02:48<20:28:26, 3.50it/s] 31%|███ | 113777/371472 [9:02:48<19:42:53, 3.63it/s] 31%|███ | 113778/371472 [9:02:49<22:11:34, 3.23it/s] 31%|███ | 113779/371472 [9:02:49<20:45:58, 3.45it/s] 31%|███ | 113780/371472 [9:02:49<19:53:59, 3.60it/s] {'loss': 3.3231, 'learning_rate': 7.246708612553599e-07, 'epoch': 4.9} + 31%|███ | 113780/371472 [9:02:49<19:53:59, 3.60it/s] 31%|███ | 113781/371472 [9:02:49<19:01:52, 3.76it/s] 31%|███ | 113782/371472 [9:02:50<19:00:15, 3.77it/s] 31%|███ | 113783/371472 [9:02:50<22:59:03, 3.11it/s] 31%|███ | 113784/371472 [9:02:50<24:06:07, 2.97it/s] 31%|███ | 113785/371472 [9:02:51<22:19:53, 3.21it/s] 31%|███ | 113786/371472 [9:02:51<23:26:06, 3.05it/s] 31%|███ | 113787/371472 [9:02:51<23:48:42, 3.01it/s] 31%|███ | 113788/371472 [9:02:52<22:56:54, 3.12it/s] 31%|███ | 113789/371472 [9:02:52<25:30:07, 2.81it/s] 31%|███ | 113790/371472 [9:02:52<24:50:38, 2.88it/s] 31%|███ | 113791/371472 [9:02:53<25:38:36, 2.79it/s] 31%|███ | 113792/371472 [9:02:53<23:19:49, 3.07it/s] 31%|███ | 113793/371472 [9:02:53<21:54:32, 3.27it/s] 31%|███ | 113794/371472 [9:02:54<20:49:44, 3.44it/s] 31%|███ | 113795/371472 [9:02:54<19:57:57, 3.58it/s] 31%|███ | 113796/371472 [9:02:54<20:12:44, 3.54it/s] 31%|███ | 113797/371472 [9:02:54<19:56:39, 3.59it/s] 31%|███ | 113798/371472 [9:02:55<19:23:12, 3.69it/s] 31%|███ | 113799/371472 [9:02:55<19:53:52, 3.60it/s] 31%|███ | 113800/371472 [9:02:55<20:07:48, 3.56it/s] {'loss': 3.2292, 'learning_rate': 7.24622379279881e-07, 'epoch': 4.9} + 31%|███ | 113800/371472 [9:02:55<20:07:48, 3.56it/s] 31%|███ | 113801/371472 [9:02:55<19:56:26, 3.59it/s] 31%|███ | 113802/371472 [9:02:56<19:46:23, 3.62it/s] 31%|███ | 113803/371472 [9:02:56<20:22:10, 3.51it/s] 31%|███ | 113804/371472 [9:02:56<19:43:55, 3.63it/s] 31%|███ | 113805/371472 [9:02:57<20:51:09, 3.43it/s] 31%|███ | 113806/371472 [9:02:57<20:19:49, 3.52it/s] 31%|███ | 113807/371472 [9:02:57<20:01:28, 3.57it/s] 31%|███ | 113808/371472 [9:02:57<19:37:19, 3.65it/s] 31%|███ | 113809/371472 [9:02:58<19:41:10, 3.64it/s] 31%|███ | 113810/371472 [9:02:58<19:57:41, 3.59it/s] 31%|███ | 113811/371472 [9:02:58<20:38:23, 3.47it/s] 31%|███ | 113812/371472 [9:02:59<20:55:17, 3.42it/s] 31%|███ | 113813/371472 [9:02:59<20:30:37, 3.49it/s] 31%|███ | 113814/371472 [9:02:59<19:53:19, 3.60it/s] 31%|███ | 113815/371472 [9:02:59<19:28:52, 3.67it/s] 31%|███ | 113816/371472 [9:03:00<19:38:10, 3.64it/s] 31%|███ | 113817/371472 [9:03:00<20:12:04, 3.54it/s] 31%|███ | 113818/371472 [9:03:00<19:47:22, 3.62it/s] 31%|███ | 113819/371472 [9:03:01<20:50:05, 3.44it/s] 31%|███ | 113820/371472 [9:03:01<20:45:57, 3.45it/s] {'loss': 3.5422, 'learning_rate': 7.245738973044021e-07, 'epoch': 4.9} + 31%|███ | 113820/371472 [9:03:01<20:45:57, 3.45it/s] 31%|███ | 113821/371472 [9:03:01<20:18:33, 3.52it/s] 31%|███ | 113822/371472 [9:03:01<21:09:38, 3.38it/s] 31%|███ | 113823/371472 [9:03:02<21:21:13, 3.35it/s] 31%|███ | 113824/371472 [9:03:02<20:35:52, 3.47it/s] 31%|███ | 113825/371472 [9:03:02<20:38:44, 3.47it/s] 31%|███ | 113826/371472 [9:03:03<20:03:48, 3.57it/s] 31%|███ | 113827/371472 [9:03:03<19:30:08, 3.67it/s] 31%|███ | 113828/371472 [9:03:03<20:26:43, 3.50it/s] 31%|███ | 113829/371472 [9:03:03<20:12:55, 3.54it/s] 31%|███ | 113830/371472 [9:03:04<21:48:12, 3.28it/s] 31%|███ | 113831/371472 [9:03:04<21:31:31, 3.32it/s] 31%|███ | 113832/371472 [9:03:04<20:54:26, 3.42it/s] 31%|███ | 113833/371472 [9:03:05<20:09:05, 3.55it/s] 31%|███ | 113834/371472 [9:03:05<20:05:20, 3.56it/s] 31%|███ | 113835/371472 [9:03:05<19:59:58, 3.58it/s] 31%|███ | 113836/371472 [9:03:05<19:46:27, 3.62it/s] 31%|███ | 113837/371472 [9:03:06<20:04:47, 3.56it/s] 31%|███ | 113838/371472 [9:03:06<21:29:48, 3.33it/s] 31%|███ | 113839/371472 [9:03:06<21:44:38, 3.29it/s] 31%|███ | 113840/371472 [9:03:07<21:18:32, 3.36it/s] {'loss': 3.4631, 'learning_rate': 7.245254153289233e-07, 'epoch': 4.9} + 31%|███ | 113840/371472 [9:03:07<21:18:32, 3.36it/s] 31%|███ | 113841/371472 [9:03:07<21:14:57, 3.37it/s] 31%|███ | 113842/371472 [9:03:07<22:17:20, 3.21it/s] 31%|███ | 113843/371472 [9:03:08<22:13:53, 3.22it/s] 31%|███ | 113844/371472 [9:03:08<21:25:05, 3.34it/s] 31%|███ | 113845/371472 [9:03:08<20:25:13, 3.50it/s] 31%|███ | 113846/371472 [9:03:08<20:39:55, 3.46it/s] 31%|███ | 113847/371472 [9:03:09<20:08:10, 3.55it/s] 31%|███ | 113848/371472 [9:03:09<19:48:49, 3.61it/s] 31%|███ | 113849/371472 [9:03:09<18:53:34, 3.79it/s] 31%|███ | 113850/371472 [9:03:09<19:01:25, 3.76it/s] 31%|███ | 113851/371472 [9:03:10<18:51:59, 3.79it/s] 31%|███ | 113852/371472 [9:03:10<18:55:58, 3.78it/s] 31%|███ | 113853/371472 [9:03:10<19:04:24, 3.75it/s] 31%|███ | 113854/371472 [9:03:11<20:02:26, 3.57it/s] 31%|███ | 113855/371472 [9:03:11<20:34:25, 3.48it/s] 31%|███ | 113856/371472 [9:03:11<20:24:38, 3.51it/s] 31%|███ | 113857/371472 [9:03:11<20:40:45, 3.46it/s] 31%|███ | 113858/371472 [9:03:12<20:43:37, 3.45it/s] 31%|█��█ | 113859/371472 [9:03:12<20:19:48, 3.52it/s] 31%|███ | 113860/371472 [9:03:12<19:54:29, 3.59it/s] {'loss': 3.4453, 'learning_rate': 7.244769333534444e-07, 'epoch': 4.9} + 31%|███ | 113860/371472 [9:03:12<19:54:29, 3.59it/s] 31%|███ | 113861/371472 [9:03:13<20:16:14, 3.53it/s] 31%|███ | 113862/371472 [9:03:13<20:26:10, 3.50it/s] 31%|███ | 113863/371472 [9:03:13<20:29:57, 3.49it/s] 31%|███ | 113864/371472 [9:03:13<21:02:56, 3.40it/s] 31%|███ | 113865/371472 [9:03:14<20:06:11, 3.56it/s] 31%|███ | 113866/371472 [9:03:14<20:07:19, 3.56it/s] 31%|███ | 113867/371472 [9:03:14<20:45:22, 3.45it/s] 31%|███ | 113868/371472 [9:03:15<21:36:10, 3.31it/s] 31%|███ | 113869/371472 [9:03:15<23:16:44, 3.07it/s] 31%|███ | 113870/371472 [9:03:15<21:37:49, 3.31it/s] 31%|███ | 113871/371472 [9:03:16<21:14:14, 3.37it/s] 31%|███ | 113872/371472 [9:03:16<20:15:49, 3.53it/s] 31%|███ | 113873/371472 [9:03:16<20:33:44, 3.48it/s] 31%|███ | 113874/371472 [9:03:16<21:23:51, 3.34it/s] 31%|███ | 113875/371472 [9:03:17<21:38:15, 3.31it/s] 31%|███ | 113876/371472 [9:03:17<22:13:40, 3.22it/s] 31%|███ | 113877/371472 [9:03:17<21:17:38, 3.36it/s] 31%|███ | 113878/371472 [9:03:18<21:34:15, 3.32it/s] 31%|███ | 113879/371472 [9:03:18<21:04:53, 3.39it/s] 31%|███ | 113880/371472 [9:03:18<20:07:36, 3.56it/s] {'loss': 3.3112, 'learning_rate': 7.244284513779655e-07, 'epoch': 4.91} + 31%|███ | 113880/371472 [9:03:18<20:07:36, 3.56it/s] 31%|███ | 113881/371472 [9:03:18<20:04:14, 3.57it/s] 31%|███ | 113882/371472 [9:03:19<19:34:50, 3.65it/s] 31%|███ | 113883/371472 [9:03:19<18:45:00, 3.82it/s] 31%|███ | 113884/371472 [9:03:19<18:40:35, 3.83it/s] 31%|███ | 113885/371472 [9:03:20<20:48:58, 3.44it/s] 31%|███ | 113886/371472 [9:03:20<21:46:38, 3.29it/s] 31%|███ | 113887/371472 [9:03:20<21:18:57, 3.36it/s] 31%|███ | 113888/371472 [9:03:20<20:44:48, 3.45it/s] 31%|███ | 113889/371472 [9:03:21<20:06:50, 3.56it/s] 31%|███ | 113890/371472 [9:03:21<21:07:06, 3.39it/s] 31%|███ | 113891/371472 [9:03:21<20:01:58, 3.57it/s] 31%|███ | 113892/371472 [9:03:22<21:03:19, 3.40it/s] 31%|███ | 113893/371472 [9:03:22<20:08:17, 3.55it/s] 31%|███ | 113894/371472 [9:03:22<24:06:27, 2.97it/s] 31%|███ | 113895/371472 [9:03:23<22:47:39, 3.14it/s] 31%|███ | 113896/371472 [9:03:23<24:18:01, 2.94it/s] 31%|███ | 113897/371472 [9:03:23<22:38:15, 3.16it/s] 31%|███ | 113898/371472 [9:03:24<21:50:30, 3.28it/s] 31%|███ | 113899/371472 [9:03:24<23:33:15, 3.04it/s] 31%|███ | 113900/371472 [9:03:24<22:12:57, 3.22it/s] {'loss': 3.4012, 'learning_rate': 7.243799694024864e-07, 'epoch': 4.91} + 31%|███ | 113900/371472 [9:03:24<22:12:57, 3.22it/s] 31%|███ | 113901/371472 [9:03:24<21:01:06, 3.40it/s] 31%|███ | 113902/371472 [9:03:25<20:55:21, 3.42it/s] 31%|███ | 113903/371472 [9:03:25<20:33:39, 3.48it/s] 31%|███ | 113904/371472 [9:03:25<22:01:50, 3.25it/s] 31%|███ | 113905/371472 [9:03:26<21:39:27, 3.30it/s] 31%|███ | 113906/371472 [9:03:26<20:56:18, 3.42it/s] 31%|███ | 113907/371472 [9:03:26<20:40:52, 3.46it/s] 31%|███ | 113908/371472 [9:03:26<20:48:43, 3.44it/s] 31%|███ | 113909/371472 [9:03:27<20:23:13, 3.51it/s] 31%|███ | 113910/371472 [9:03:27<20:16:22, 3.53it/s] 31%|███ | 113911/371472 [9:03:27<19:46:55, 3.62it/s] 31%|███ | 113912/371472 [9:03:28<23:07:07, 3.09it/s] 31%|███ | 113913/371472 [9:03:28<22:18:37, 3.21it/s] 31%|███ | 113914/371472 [9:03:28<22:46:52, 3.14it/s] 31%|███ | 113915/371472 [9:03:29<21:52:48, 3.27it/s] 31%|███ | 113916/371472 [9:03:29<20:42:22, 3.46it/s] 31%|███ | 113917/371472 [9:03:29<20:26:52, 3.50it/s] 31%|███ | 113918/371472 [9:03:29<20:56:41, 3.42it/s] 31%|███ | 113919/371472 [9:03:30<22:32:36, 3.17it/s] 31%|███ | 113920/371472 [9:03:30<21:35:02, 3.31it/s] {'loss': 3.5263, 'learning_rate': 7.243314874270077e-07, 'epoch': 4.91} + 31%|███ | 113920/371472 [9:03:30<21:35:02, 3.31it/s] 31%|███ | 113921/371472 [9:03:30<21:41:35, 3.30it/s] 31%|███ | 113922/371472 [9:03:31<21:11:11, 3.38it/s] 31%|███ | 113923/371472 [9:03:31<20:56:51, 3.42it/s] 31%|███ | 113924/371472 [9:03:31<20:06:50, 3.56it/s] 31%|███ | 113925/371472 [9:03:32<20:14:42, 3.53it/s] 31%|███ | 113926/371472 [9:03:32<20:04:39, 3.56it/s] 31%|███ | 113927/371472 [9:03:32<19:57:09, 3.59it/s] 31%|███ | 113928/371472 [9:03:32<20:03:17, 3.57it/s] 31%|███ | 113929/371472 [9:03:33<19:31:32, 3.66it/s] 31%|███ | 113930/371472 [9:03:33<19:26:43, 3.68it/s] 31%|███ | 113931/371472 [9:03:33<20:47:08, 3.44it/s] 31%|███ | 113932/371472 [9:03:33<20:11:50, 3.54it/s] 31%|███ | 113933/371472 [9:03:34<19:53:34, 3.60it/s] 31%|███ | 113934/371472 [9:03:34<19:44:45, 3.62it/s] 31%|███ | 113935/371472 [9:03:34<20:15:34, 3.53it/s] 31%|███ | 113936/371472 [9:03:35<21:50:42, 3.27it/s] 31%|███ | 113937/371472 [9:03:35<21:20:23, 3.35it/s] 31%|███ | 113938/371472 [9:03:35<20:53:43, 3.42it/s] 31%|███ | 113939/371472 [9:03:35<20:29:41, 3.49it/s] 31%|███ | 113940/371472 [9:03:36<20:03:54, 3.57it/s] {'loss': 3.3166, 'learning_rate': 7.242830054515288e-07, 'epoch': 4.91} + 31%|███ | 113940/371472 [9:03:36<20:03:54, 3.57it/s] 31%|███ | 113941/371472 [9:03:36<20:31:03, 3.49it/s] 31%|███ | 113942/371472 [9:03:36<20:12:18, 3.54it/s] 31%|███ | 113943/371472 [9:03:37<19:16:39, 3.71it/s] 31%|███ | 113944/371472 [9:03:37<19:37:47, 3.64it/s] 31%|███ | 113945/371472 [9:03:37<19:09:11, 3.73it/s] 31%|███ | 113946/371472 [9:03:37<19:01:40, 3.76it/s] 31%|███ | 113947/371472 [9:03:38<19:31:31, 3.66it/s] 31%|███ | 113948/371472 [9:03:38<20:01:06, 3.57it/s] 31%|███ | 113949/371472 [9:03:38<21:51:39, 3.27it/s] 31%|███ | 113950/371472 [9:03:39<20:57:25, 3.41it/s] 31%|███ | 113951/371472 [9:03:39<21:08:09, 3.38it/s] 31%|███ | 113952/371472 [9:03:39<20:27:56, 3.50it/s] 31%|███ | 113953/371472 [9:03:39<21:06:42, 3.39it/s] 31%|███ | 113954/371472 [9:03:40<20:09:46, 3.55it/s] 31%|███ | 113955/371472 [9:03:40<19:24:03, 3.69it/s] 31%|███ | 113956/371472 [9:03:40<19:39:19, 3.64it/s] 31%|███ | 113957/371472 [9:03:41<19:51:04, 3.60it/s] 31%|███ | 113958/371472 [9:03:41<19:14:48, 3.72it/s] 31%|███ | 113959/371472 [9:03:41<18:52:32, 3.79it/s] 31%|███ | 113960/371472 [9:03:41<19:58:33, 3.58it/s] {'loss': 3.2745, 'learning_rate': 7.242345234760498e-07, 'epoch': 4.91} + 31%|███ | 113960/371472 [9:03:41<19:58:33, 3.58it/s] 31%|███ | 113961/371472 [9:03:42<19:48:57, 3.61it/s] 31%|███ | 113962/371472 [9:03:42<19:51:00, 3.60it/s] 31%|███ | 113963/371472 [9:03:42<19:03:40, 3.75it/s] 31%|███ | 113964/371472 [9:03:42<20:12:17, 3.54it/s] 31%|███ | 113965/371472 [9:03:43<20:16:09, 3.53it/s] 31%|███ | 113966/371472 [9:03:43<21:16:08, 3.36it/s] 31%|███ | 113967/371472 [9:03:43<22:08:17, 3.23it/s] 31%|███ | 113968/371472 [9:03:44<21:50:44, 3.27it/s] 31%|███ | 113969/371472 [9:03:44<21:16:18, 3.36it/s] 31%|███ | 113970/371472 [9:03:44<20:21:49, 3.51it/s] 31%|███ | 113971/371472 [9:03:45<20:08:57, 3.55it/s] 31%|███ | 113972/371472 [9:03:45<20:23:10, 3.51it/s] 31%|███ | 113973/371472 [9:03:45<20:45:04, 3.45it/s] 31%|███ | 113974/371472 [9:03:45<20:21:39, 3.51it/s] 31%|███ | 113975/371472 [9:03:46<20:56:49, 3.41it/s] 31%|███ | 113976/371472 [9:03:46<20:19:55, 3.52it/s] 31%|███ | 113977/371472 [9:03:46<20:53:48, 3.42it/s] 31%|███ | 113978/371472 [9:03:47<21:41:18, 3.30it/s] 31%|███ | 113979/371472 [9:03:47<20:55:44, 3.42it/s] 31%|███ | 113980/371472 [9:03:47<21:12:05, 3.37it/s] {'loss': 3.2829, 'learning_rate': 7.24186041500571e-07, 'epoch': 4.91} + 31%|███ | 113980/371472 [9:03:47<21:12:05, 3.37it/s] 31%|███ | 113981/371472 [9:03:48<22:06:28, 3.24it/s] 31%|███ | 113982/371472 [9:03:48<21:04:58, 3.39it/s] 31%|███ | 113983/371472 [9:03:48<21:33:11, 3.32it/s] 31%|███ | 113984/371472 [9:03:48<20:57:58, 3.41it/s] 31%|███ | 113985/371472 [9:03:49<20:36:38, 3.47it/s] 31%|███ | 113986/371472 [9:03:49<20:49:23, 3.43it/s] 31%|███ | 113987/371472 [9:03:49<21:36:13, 3.31it/s] 31%|███ | 113988/371472 [9:03:50<21:48:20, 3.28it/s] 31%|███ | 113989/371472 [9:03:50<20:48:26, 3.44it/s] 31%|███ | 113990/371472 [9:03:50<19:39:05, 3.64it/s] 31%|███ | 113991/371472 [9:03:50<18:53:08, 3.79it/s] 31%|███ | 113992/371472 [9:03:51<18:45:45, 3.81it/s] 31%|███ | 113993/371472 [9:03:51<19:11:39, 3.73it/s] 31%|███ | 113994/371472 [9:03:51<18:45:09, 3.81it/s] 31%|███ | 113995/371472 [9:03:51<19:39:24, 3.64it/s] 31%|███ | 113996/371472 [9:03:52<19:15:39, 3.71it/s] 31%|███ | 113997/371472 [9:03:52<18:53:06, 3.79it/s] 31%|███ | 113998/371472 [9:03:52<19:01:13, 3.76it/s] 31%|███ | 113999/371472 [9:03:52<18:59:58, 3.76it/s] 31%|███ | 114000/371472 [9:03:53<18:50:44, 3.80it/s] {'loss': 3.2522, 'learning_rate': 7.241375595250921e-07, 'epoch': 4.91} + 31%|███ | 114000/371472 [9:03:53<18:50:44, 3.80it/s] 31%|███ | 114001/371472 [9:03:53<19:31:59, 3.66it/s] 31%|███ | 114002/371472 [9:03:53<20:27:48, 3.49it/s] 31%|███ | 114003/371472 [9:03:54<20:50:59, 3.43it/s] 31%|███ | 114004/371472 [9:03:54<24:23:48, 2.93it/s] 31%|███ | 114005/371472 [9:03:54<24:21:38, 2.94it/s] 31%|███ | 114006/371472 [9:03:55<22:44:49, 3.14it/s] 31%|███ | 114007/371472 [9:03:55<22:09:41, 3.23it/s] 31%|███ | 114008/371472 [9:03:55<20:49:45, 3.43it/s] 31%|███ | 114009/371472 [9:03:55<19:49:51, 3.61it/s] 31%|███ | 114010/371472 [9:03:56<22:20:22, 3.20it/s] 31%|███ | 114011/371472 [9:03:56<21:36:37, 3.31it/s] 31%|███ | 114012/371472 [9:03:56<20:23:20, 3.51it/s] 31%|███ | 114013/371472 [9:03:57<20:29:37, 3.49it/s] 31%|███ | 114014/371472 [9:03:57<20:06:49, 3.56it/s] 31%|███ | 114015/371472 [9:03:57<19:28:05, 3.67it/s] 31%|███ | 114016/371472 [9:03:57<19:35:16, 3.65it/s] 31%|███ | 114017/371472 [9:03:58<20:13:59, 3.53it/s] 31%|███ | 114018/371472 [9:03:58<22:18:15, 3.21it/s] 31%|███ | 114019/371472 [9:03:58<21:17:28, 3.36it/s] 31%|███ | 114020/371472 [9:03:59<20:36:46, 3.47it/s] {'loss': 3.0851, 'learning_rate': 7.240890775496132e-07, 'epoch': 4.91} + 31%|███ | 114020/371472 [9:03:59<20:36:46, 3.47it/s] 31%|███ | 114021/371472 [9:03:59<20:23:05, 3.51it/s] 31%|███ | 114022/371472 [9:03:59<19:57:43, 3.58it/s] 31%|███ | 114023/371472 [9:03:59<19:37:49, 3.64it/s] 31%|███ | 114024/371472 [9:04:00<20:13:22, 3.54it/s] 31%|███ | 114025/371472 [9:04:00<19:24:20, 3.69it/s] 31%|███ | 114026/371472 [9:04:00<19:09:41, 3.73it/s] 31%|███ | 114027/371472 [9:04:01<19:13:52, 3.72it/s] 31%|███ | 114028/371472 [9:04:01<20:00:07, 3.58it/s] 31%|███ | 114029/371472 [9:04:01<20:40:05, 3.46it/s] 31%|███ | 114030/371472 [9:04:02<21:13:02, 3.37it/s] 31%|███ | 114031/371472 [9:04:02<20:44:05, 3.45it/s] 31%|███ | 114032/371472 [9:04:02<20:33:47, 3.48it/s] 31%|███ | 114033/371472 [9:04:02<20:28:41, 3.49it/s] 31%|███ | 114034/371472 [9:04:03<21:07:59, 3.38it/s] 31%|███ | 114035/371472 [9:04:03<20:11:56, 3.54it/s] 31%|███ | 114036/371472 [9:04:03<20:13:20, 3.54it/s] 31%|███ | 114037/371472 [9:04:04<21:50:07, 3.27it/s] 31%|███ | 114038/371472 [9:04:04<21:16:49, 3.36it/s] 31%|███ | 114039/371472 [9:04:04<20:42:04, 3.45it/s] 31%|███ | 114040/371472 [9:04:04<20:38:08, 3.47it/s] {'loss': 3.4579, 'learning_rate': 7.240405955741342e-07, 'epoch': 4.91} + 31%|███ | 114040/371472 [9:04:04<20:38:08, 3.47it/s] 31%|███ | 114041/371472 [9:04:05<21:42:22, 3.29it/s] 31%|███ | 114042/371472 [9:04:05<20:41:58, 3.45it/s] 31%|███ | 114043/371472 [9:04:05<20:36:57, 3.47it/s] 31%|███ | 114044/371472 [9:04:06<20:11:34, 3.54it/s] 31%|███ | 114045/371472 [9:04:06<19:51:02, 3.60it/s] 31%|███ | 114046/371472 [9:04:06<20:28:45, 3.49it/s] 31%|███ | 114047/371472 [9:04:06<20:08:19, 3.55it/s] 31%|███ | 114048/371472 [9:04:07<20:55:48, 3.42it/s] 31%|███ | 114049/371472 [9:04:07<22:15:30, 3.21it/s] 31%|███ | 114050/371472 [9:04:07<20:58:00, 3.41it/s] 31%|███ | 114051/371472 [9:04:08<20:47:18, 3.44it/s] 31%|███ | 114052/371472 [9:04:08<20:01:29, 3.57it/s] 31%|███ | 114053/371472 [9:04:08<20:01:31, 3.57it/s] 31%|███ | 114054/371472 [9:04:08<20:27:05, 3.50it/s] 31%|███ | 114055/371472 [9:04:09<19:55:39, 3.59it/s] 31%|███ | 114056/371472 [9:04:09<19:36:00, 3.65it/s] 31%|███ | 114057/371472 [9:04:09<20:05:18, 3.56it/s] 31%|███ | 114058/371472 [9:04:10<20:07:58, 3.55it/s] 31%|███ | 114059/371472 [9:04:10<22:33:53, 3.17it/s] 31%|███ | 114060/371472 [9:04:10<22:54:50, 3.12it/s] {'loss': 3.5174, 'learning_rate': 7.239921135986554e-07, 'epoch': 4.91} + 31%|███ | 114060/371472 [9:04:10<22:54:50, 3.12it/s] 31%|███ | 114061/371472 [9:04:11<21:51:43, 3.27it/s] 31%|███ | 114062/371472 [9:04:11<21:34:12, 3.31it/s] 31%|███ | 114063/371472 [9:04:11<20:34:11, 3.48it/s] 31%|███ | 114064/371472 [9:04:11<19:52:47, 3.60it/s] 31%|███ | 114065/371472 [9:04:12<20:21:58, 3.51it/s] 31%|███ | 114066/371472 [9:04:12<19:29:16, 3.67it/s] 31%|███ | 114067/371472 [9:04:12<19:00:25, 3.76it/s] 31%|███ | 114068/371472 [9:04:12<19:11:40, 3.73it/s] 31%|███ | 114069/371472 [9:04:13<18:54:05, 3.78it/s] 31%|███ | 114070/371472 [9:04:13<18:24:52, 3.88it/s] 31%|███ | 114071/371472 [9:04:13<19:19:42, 3.70it/s] 31%|███ | 114072/371472 [9:04:13<19:29:36, 3.67it/s] 31%|███ | 114073/371472 [9:04:14<19:16:50, 3.71it/s] 31%|███ | 114074/371472 [9:04:14<18:52:28, 3.79it/s] 31%|███ | 114075/371472 [9:04:14<19:32:15, 3.66it/s] 31%|███ | 114076/371472 [9:04:15<20:06:26, 3.56it/s] 31%|███ | 114077/371472 [9:04:15<21:00:32, 3.40it/s] 31%|███ | 114078/371472 [9:04:15<23:05:42, 3.10it/s] 31%|███ | 114079/371472 [9:04:16<23:13:05, 3.08it/s] 31%|███ | 114080/371472 [9:04:16<22:26:02, 3.19it/s] {'loss': 3.2439, 'learning_rate': 7.239436316231766e-07, 'epoch': 4.91} + 31%|███ | 114080/371472 [9:04:16<22:26:02, 3.19it/s] 31%|███ | 114081/371472 [9:04:16<21:35:04, 3.31it/s] 31%|███ | 114082/371472 [9:04:17<23:11:56, 3.08it/s] 31%|███ | 114083/371472 [9:04:17<21:44:38, 3.29it/s] 31%|███ | 114084/371472 [9:04:17<20:45:31, 3.44it/s] 31%|███ | 114085/371472 [9:04:17<19:33:48, 3.65it/s] 31%|███ | 114086/371472 [9:04:18<20:28:34, 3.49it/s] 31%|███ | 114087/371472 [9:04:18<21:01:42, 3.40it/s] 31%|███ | 114088/371472 [9:04:18<21:41:27, 3.30it/s] 31%|███ | 114089/371472 [9:04:19<21:32:17, 3.32it/s] 31%|███ | 114090/371472 [9:04:19<21:29:27, 3.33it/s] 31%|███ | 114091/371472 [9:04:19<20:45:21, 3.44it/s] 31%|███ | 114092/371472 [9:04:19<20:38:38, 3.46it/s] 31%|███ | 114093/371472 [9:04:20<19:54:05, 3.59it/s] 31%|███ | 114094/371472 [9:04:20<19:19:19, 3.70it/s] 31%|███ | 114095/371472 [9:04:20<19:00:48, 3.76it/s] 31%|███ | 114096/371472 [9:04:21<23:16:44, 3.07it/s] 31%|███ | 114097/371472 [9:04:21<21:21:27, 3.35it/s] 31%|███ | 114098/371472 [9:04:21<20:06:06, 3.56it/s] 31%|███ | 114099/371472 [9:04:21<21:04:47, 3.39it/s] 31%|███ | 114100/371472 [9:04:22<23:33:14, 3.04it/s] {'loss': 3.3721, 'learning_rate': 7.238951496476977e-07, 'epoch': 4.91} + 31%|███ | 114100/371472 [9:04:22<23:33:14, 3.04it/s] 31%|███ | 114101/371472 [9:04:22<22:34:13, 3.17it/s] 31%|███ | 114102/371472 [9:04:22<22:22:15, 3.20it/s] 31%|███ | 114103/371472 [9:04:23<21:12:21, 3.37it/s] 31%|███ | 114104/371472 [9:04:23<21:19:37, 3.35it/s] 31%|███ | 114105/371472 [9:04:23<20:48:46, 3.43it/s] 31%|███ | 114106/371472 [9:04:24<20:04:33, 3.56it/s] 31%|███ | 114107/371472 [9:04:24<20:50:17, 3.43it/s] 31%|███ | 114108/371472 [9:04:24<21:10:27, 3.38it/s] 31%|███ | 114109/371472 [9:04:24<20:42:37, 3.45it/s] 31%|███ | 114110/371472 [9:04:25<20:37:54, 3.47it/s] 31%|███ | 114111/371472 [9:04:25<20:46:15, 3.44it/s] 31%|███ | 114112/371472 [9:04:25<21:01:42, 3.40it/s] 31%|███ | 114113/371472 [9:04:26<19:52:06, 3.60it/s] 31%|███ | 114114/371472 [9:04:26<19:13:25, 3.72it/s] 31%|███ | 114115/371472 [9:04:26<18:36:48, 3.84it/s] 31%|███ | 114116/371472 [9:04:26<19:00:01, 3.76it/s] 31%|███ | 114117/371472 [9:04:27<18:47:33, 3.80it/s] 31%|███ | 114118/371472 [9:04:27<20:06:56, 3.55it/s] 31%|███ | 114119/371472 [9:04:27<19:44:08, 3.62it/s] 31%|███ | 114120/371472 [9:04:27<20:12:23, 3.54it/s] {'loss': 3.34, 'learning_rate': 7.238466676722187e-07, 'epoch': 4.92} + 31%|███ | 114120/371472 [9:04:27<20:12:23, 3.54it/s] 31%|███ | 114121/371472 [9:04:28<20:30:08, 3.49it/s] 31%|███ | 114122/371472 [9:04:28<20:11:32, 3.54it/s] 31%|███ | 114123/371472 [9:04:28<20:30:15, 3.49it/s] 31%|███ | 114124/371472 [9:04:29<21:05:00, 3.39it/s] 31%|███ | 114125/371472 [9:04:29<20:20:00, 3.52it/s] 31%|███ | 114126/371472 [9:04:29<21:05:41, 3.39it/s] 31%|███ | 114127/371472 [9:04:29<20:19:59, 3.52it/s] 31%|███ | 114128/371472 [9:04:30<19:30:39, 3.66it/s] 31%|███ | 114129/371472 [9:04:30<19:21:02, 3.69it/s] 31%|███ | 114130/371472 [9:04:30<19:14:42, 3.71it/s] 31%|███ | 114131/371472 [9:04:31<19:03:19, 3.75it/s] 31%|███ | 114132/371472 [9:04:31<21:10:04, 3.38it/s] 31%|███ | 114133/371472 [9:04:31<21:09:11, 3.38it/s] 31%|███ | 114134/371472 [9:04:31<20:13:07, 3.54it/s] 31%|███ | 114135/371472 [9:04:32<20:12:58, 3.54it/s] 31%|███ | 114136/371472 [9:04:32<19:23:31, 3.69it/s] 31%|███ | 114137/371472 [9:04:32<19:05:18, 3.74it/s] 31%|███ | 114138/371472 [9:04:32<19:04:30, 3.75it/s] 31%|███ | 114139/371472 [9:04:33<19:36:46, 3.64it/s] 31%|███ | 114140/371472 [9:04:33<20:29:26, 3.49it/s] {'loss': 3.4718, 'learning_rate': 7.237981856967398e-07, 'epoch': 4.92} + 31%|███ | 114140/371472 [9:04:33<20:29:26, 3.49it/s] 31%|███ | 114141/371472 [9:04:33<21:10:50, 3.37it/s] 31%|███ | 114142/371472 [9:04:34<20:13:35, 3.53it/s] 31%|███ | 114143/371472 [9:04:34<20:21:50, 3.51it/s] 31%|███ | 114144/371472 [9:04:34<19:59:15, 3.58it/s] 31%|███ | 114145/371472 [9:04:35<19:50:55, 3.60it/s] 31%|███ | 114146/371472 [9:04:35<19:54:52, 3.59it/s] 31%|███ | 114147/371472 [9:04:35<19:45:07, 3.62it/s] 31%|███ | 114148/371472 [9:04:36<23:28:46, 3.04it/s] 31%|███ | 114149/371472 [9:04:36<22:38:41, 3.16it/s] 31%|███ | 114150/371472 [9:04:36<21:19:16, 3.35it/s] 31%|███ | 114151/371472 [9:04:36<20:25:52, 3.50it/s] 31%|███ | 114152/371472 [9:04:37<19:38:49, 3.64it/s] 31%|███ | 114153/371472 [9:04:37<19:19:42, 3.70it/s] 31%|███ | 114154/371472 [9:04:37<18:57:32, 3.77it/s] 31%|███ | 114155/371472 [9:04:37<19:01:57, 3.76it/s] 31%|███ | 114156/371472 [9:04:38<18:59:29, 3.76it/s] 31%|███ | 114157/371472 [9:04:38<19:18:31, 3.70it/s] 31%|███ | 114158/371472 [9:04:38<19:53:04, 3.59it/s] 31%|███ | 114159/371472 [9:04:38<19:30:50, 3.66it/s] 31%|███ | 114160/371472 [9:04:39<19:41:12, 3.63it/s] {'loss': 3.4232, 'learning_rate': 7.237497037212609e-07, 'epoch': 4.92} + 31%|███ | 114160/371472 [9:04:39<19:41:12, 3.63it/s] 31%|███ | 114161/371472 [9:04:39<20:27:13, 3.49it/s] 31%|███ | 114162/371472 [9:04:39<22:50:55, 3.13it/s] 31%|███ | 114163/371472 [9:04:40<21:41:29, 3.30it/s] 31%|███ | 114164/371472 [9:04:40<20:50:52, 3.43it/s] 31%|███ | 114165/371472 [9:04:40<20:13:34, 3.53it/s] 31%|███ | 114166/371472 [9:04:40<19:44:56, 3.62it/s] 31%|███ | 114167/371472 [9:04:41<19:35:13, 3.65it/s] 31%|███ | 114168/371472 [9:04:41<20:03:40, 3.56it/s] 31%|███ | 114169/371472 [9:04:41<20:00:52, 3.57it/s] 31%|███ | 114170/371472 [9:04:42<21:32:59, 3.32it/s] 31%|███ | 114171/371472 [9:04:42<21:29:55, 3.32it/s] 31%|███ | 114172/371472 [9:04:42<20:38:21, 3.46it/s] 31%|███ | 114173/371472 [9:04:42<19:51:19, 3.60it/s] 31%|███ | 114174/371472 [9:04:43<19:23:30, 3.69it/s] 31%|███ | 114175/371472 [9:04:43<19:13:32, 3.72it/s] 31%|███ | 114176/371472 [9:04:43<20:00:14, 3.57it/s] 31%|███ | 114177/371472 [9:04:44<19:36:11, 3.65it/s] 31%|███ | 114178/371472 [9:04:44<19:45:52, 3.62it/s] 31%|███ | 114179/371472 [9:04:44<19:59:51, 3.57it/s] 31%|███ | 114180/371472 [9:04:44<20:57:23, 3.41it/s] {'loss': 3.2935, 'learning_rate': 7.23701221745782e-07, 'epoch': 4.92} + 31%|███ | 114180/371472 [9:04:44<20:57:23, 3.41it/s] 31%|███ | 114181/371472 [9:04:45<20:18:52, 3.52it/s] 31%|███ | 114182/371472 [9:04:45<21:06:11, 3.39it/s] 31%|███ | 114183/371472 [9:04:45<20:06:06, 3.56it/s] 31%|███ | 114184/371472 [9:04:46<19:26:27, 3.68it/s] 31%|███ | 114185/371472 [9:04:46<18:42:52, 3.82it/s] 31%|███ | 114186/371472 [9:04:46<19:25:02, 3.68it/s] 31%|███ | 114187/371472 [9:04:46<19:49:22, 3.61it/s] 31%|███ | 114188/371472 [9:04:47<20:26:51, 3.50it/s] 31%|███ | 114189/371472 [9:04:47<19:51:21, 3.60it/s] 31%|███ | 114190/371472 [9:04:47<20:37:45, 3.46it/s] 31%|███ | 114191/371472 [9:04:48<21:13:55, 3.37it/s] 31%|███ | 114192/371472 [9:04:48<21:01:48, 3.40it/s] 31%|███ | 114193/371472 [9:04:48<20:34:04, 3.47it/s] 31%|███ | 114194/371472 [9:04:48<21:16:46, 3.36it/s] 31%|███ | 114195/371472 [9:04:49<20:29:46, 3.49it/s] 31%|███ | 114196/371472 [9:04:49<20:03:17, 3.56it/s] 31%|███ | 114197/371472 [9:04:49<19:31:11, 3.66it/s] 31%|███ | 114198/371472 [9:04:50<20:26:25, 3.50it/s] 31%|███ | 114199/371472 [9:04:50<19:49:02, 3.61it/s] 31%|███ | 114200/371472 [9:04:50<20:40:18, 3.46it/s] {'loss': 3.3965, 'learning_rate': 7.236527397703031e-07, 'epoch': 4.92} + 31%|███ | 114200/371472 [9:04:50<20:40:18, 3.46it/s] 31%|███ | 114201/371472 [9:04:50<21:22:13, 3.34it/s] 31%|███ | 114202/371472 [9:04:51<20:18:59, 3.52it/s] 31%|███ | 114203/371472 [9:04:51<19:46:48, 3.61it/s] 31%|███ | 114204/371472 [9:04:51<19:05:21, 3.74it/s] 31%|███ | 114205/371472 [9:04:51<19:29:30, 3.67it/s] 31%|███ | 114206/371472 [9:04:52<19:26:51, 3.67it/s] 31%|███ | 114207/371472 [9:04:52<19:45:40, 3.62it/s] 31%|███ | 114208/371472 [9:04:52<19:50:33, 3.60it/s] 31%|███ | 114209/371472 [9:04:53<19:26:15, 3.68it/s] 31%|███ | 114210/371472 [9:04:53<20:18:12, 3.52it/s] 31%|███ | 114211/371472 [9:04:53<20:52:19, 3.42it/s] 31%|███ | 114212/371472 [9:04:54<21:57:29, 3.25it/s] 31%|███ | 114213/371472 [9:04:54<20:46:30, 3.44it/s] 31%|███ | 114214/371472 [9:04:54<21:03:43, 3.39it/s] 31%|███ | 114215/371472 [9:04:54<21:37:49, 3.30it/s] 31%|███ | 114216/371472 [9:04:55<21:29:00, 3.33it/s] 31%|███ | 114217/371472 [9:04:55<20:33:28, 3.48it/s] 31%|███ | 114218/371472 [9:04:55<20:08:00, 3.55it/s] 31%|███ | 114219/371472 [9:04:56<19:37:09, 3.64it/s] 31%|███ | 114220/371472 [9:04:56<19:45:26, 3.62it/s] {'loss': 3.4413, 'learning_rate': 7.236042577948243e-07, 'epoch': 4.92} + 31%|███ | 114220/371472 [9:04:56<19:45:26, 3.62it/s] 31%|███ | 114221/371472 [9:04:56<20:13:56, 3.53it/s] 31%|███ | 114222/371472 [9:04:56<20:00:01, 3.57it/s] 31%|███ | 114223/371472 [9:04:57<19:16:56, 3.71it/s] 31%|███ | 114224/371472 [9:04:57<21:47:33, 3.28it/s] 31%|███ | 114225/371472 [9:04:57<22:49:19, 3.13it/s] 31%|███ | 114226/371472 [9:04:58<21:47:06, 3.28it/s] 31%|███ | 114227/371472 [9:04:58<20:22:23, 3.51it/s] 31%|███ | 114228/371472 [9:04:58<20:49:56, 3.43it/s] 31%|███ | 114229/371472 [9:04:58<21:24:12, 3.34it/s] 31%|███ | 114230/371472 [9:04:59<22:01:09, 3.25it/s] 31%|███ | 114231/371472 [9:04:59<20:56:40, 3.41it/s] 31%|███ | 114232/371472 [9:04:59<19:55:55, 3.58it/s] 31%|███ | 114233/371472 [9:05:00<20:24:17, 3.50it/s] 31%|███ | 114234/371472 [9:05:00<20:36:38, 3.47it/s] 31%|███ | 114235/371472 [9:05:00<20:14:50, 3.53it/s] 31%|███ | 114236/371472 [9:05:00<19:24:04, 3.68it/s] 31%|███ | 114237/371472 [9:05:01<19:45:03, 3.62it/s] 31%|███ | 114238/371472 [9:05:01<19:49:11, 3.61it/s] 31%|███ | 114239/371472 [9:05:01<19:38:04, 3.64it/s] 31%|███ | 114240/371472 [9:05:02<19:35:05, 3.65it/s] {'loss': 3.1762, 'learning_rate': 7.235557758193454e-07, 'epoch': 4.92} + 31%|███ | 114240/371472 [9:05:02<19:35:05, 3.65it/s] 31%|███ | 114241/371472 [9:05:02<19:54:59, 3.59it/s] 31%|███ | 114242/371472 [9:05:02<20:42:18, 3.45it/s] 31%|███ | 114243/371472 [9:05:02<20:06:17, 3.55it/s] 31%|███ | 114244/371472 [9:05:03<22:25:48, 3.19it/s] 31%|███ | 114245/371472 [9:05:03<21:33:28, 3.31it/s] 31%|███ | 114246/371472 [9:05:03<20:21:04, 3.51it/s] 31%|███ | 114247/371472 [9:05:04<21:22:52, 3.34it/s] 31%|███ | 114248/371472 [9:05:04<20:54:09, 3.42it/s] 31%|███ | 114249/371472 [9:05:04<20:03:23, 3.56it/s] 31%|███ | 114250/371472 [9:05:04<20:33:32, 3.48it/s] 31%|███ | 114251/371472 [9:05:05<21:54:58, 3.26it/s] 31%|███ | 114252/371472 [9:05:05<23:36:17, 3.03it/s] 31%|███ | 114253/371472 [9:05:05<21:42:42, 3.29it/s] 31%|███ | 114254/371472 [9:05:06<20:55:15, 3.42it/s] 31%|███ | 114255/371472 [9:05:06<20:22:52, 3.51it/s] 31%|███ | 114256/371472 [9:05:06<19:20:03, 3.70it/s] 31%|███ | 114257/371472 [9:05:06<19:22:31, 3.69it/s] 31%|███ | 114258/371472 [9:05:07<20:11:47, 3.54it/s] 31%|███ | 114259/371472 [9:05:07<19:59:50, 3.57it/s] 31%|███ | 114260/371472 [9:05:07<19:45:49, 3.62it/s] {'loss': 3.4838, 'learning_rate': 7.235072938438664e-07, 'epoch': 4.92} + 31%|███ | 114260/371472 [9:05:07<19:45:49, 3.62it/s] 31%|███ | 114261/371472 [9:05:08<20:36:05, 3.47it/s] 31%|███ | 114262/371472 [9:05:08<20:22:01, 3.51it/s] 31%|███ | 114263/371472 [9:05:08<20:22:13, 3.51it/s] 31%|███ | 114264/371472 [9:05:09<20:26:06, 3.50it/s] 31%|███ | 114265/371472 [9:05:09<19:40:05, 3.63it/s] 31%|███ | 114266/371472 [9:05:09<23:51:52, 2.99it/s] 31%|███ | 114267/371472 [9:05:10<22:26:10, 3.18it/s] 31%|███ | 114268/371472 [9:05:10<22:08:16, 3.23it/s] 31%|███ | 114269/371472 [9:05:10<21:18:23, 3.35it/s] 31%|███ | 114270/371472 [9:05:10<20:52:33, 3.42it/s] 31%|███ | 114271/371472 [9:05:11<20:27:21, 3.49it/s] 31%|███ | 114272/371472 [9:05:11<20:14:17, 3.53it/s] 31%|███ | 114273/371472 [9:05:11<20:09:24, 3.54it/s] 31%|███ | 114274/371472 [9:05:11<19:46:27, 3.61it/s] 31%|███ | 114275/371472 [9:05:12<20:06:27, 3.55it/s] 31%|███ | 114276/371472 [9:05:12<19:52:41, 3.59it/s] 31%|███ | 114277/371472 [9:05:12<19:50:51, 3.60it/s] 31%|███ | 114278/371472 [9:05:13<20:01:44, 3.57it/s] 31%|███ | 114279/371472 [9:05:13<19:56:16, 3.58it/s] 31%|███ | 114280/371472 [9:05:13<21:15:17, 3.36it/s] {'loss': 3.3305, 'learning_rate': 7.234588118683875e-07, 'epoch': 4.92} + 31%|███ | 114280/371472 [9:05:13<21:15:17, 3.36it/s] 31%|███ | 114281/371472 [9:05:13<20:09:25, 3.54it/s] 31%|███ | 114282/371472 [9:05:14<19:57:18, 3.58it/s] 31%|███ | 114283/371472 [9:05:14<19:49:20, 3.60it/s] 31%|███ | 114284/371472 [9:05:14<19:45:29, 3.62it/s] 31%|███ | 114285/371472 [9:05:15<19:48:18, 3.61it/s] 31%|███ | 114286/371472 [9:05:15<19:37:46, 3.64it/s] 31%|███ | 114287/371472 [9:05:15<20:28:04, 3.49it/s] 31%|███ | 114288/371472 [9:05:15<19:53:23, 3.59it/s] 31%|███ | 114289/371472 [9:05:16<20:09:21, 3.54it/s] 31%|███ | 114290/371472 [9:05:16<19:44:09, 3.62it/s] 31%|███ | 114291/371472 [9:05:16<19:13:55, 3.71it/s] 31%|███ | 114292/371472 [9:05:16<18:55:03, 3.78it/s] 31%|███ | 114293/371472 [9:05:17<19:23:02, 3.69it/s] 31%|███ | 114294/371472 [9:05:17<20:26:09, 3.50it/s] 31%|███ | 114295/371472 [9:05:17<20:06:19, 3.55it/s] 31%|███ | 114296/371472 [9:05:18<20:15:06, 3.53it/s] 31%|███ | 114297/371472 [9:05:18<19:59:57, 3.57it/s] 31%|███ | 114298/371472 [9:05:18<21:09:10, 3.38it/s] 31%|███ | 114299/371472 [9:05:18<19:54:25, 3.59it/s] 31%|███ | 114300/371472 [9:05:19<20:03:36, 3.56it/s] {'loss': 3.4817, 'learning_rate': 7.234103298929087e-07, 'epoch': 4.92} + 31%|███ | 114300/371472 [9:05:19<20:03:36, 3.56it/s] 31%|███ | 114301/371472 [9:05:19<19:24:15, 3.68it/s] 31%|███ | 114302/371472 [9:05:19<20:24:09, 3.50it/s] 31%|███ | 114303/371472 [9:05:20<20:09:51, 3.54it/s] 31%|███ | 114304/371472 [9:05:20<19:39:00, 3.64it/s] 31%|███ | 114305/371472 [9:05:20<19:58:10, 3.58it/s] 31%|███ | 114306/371472 [9:05:20<19:58:16, 3.58it/s] 31%|███ | 114307/371472 [9:05:21<19:52:21, 3.59it/s] 31%|███ | 114308/371472 [9:05:21<19:02:37, 3.75it/s] 31%|███ | 114309/371472 [9:05:21<18:57:31, 3.77it/s] 31%|███ | 114310/371472 [9:05:21<19:26:05, 3.68it/s] 31%|███ | 114311/371472 [9:05:22<19:25:06, 3.68it/s] 31%|███ | 114312/371472 [9:05:22<19:27:40, 3.67it/s] 31%|███ | 114313/371472 [9:05:22<19:33:03, 3.65it/s] 31%|███ | 114314/371472 [9:05:23<20:26:59, 3.49it/s] 31%|███ | 114315/371472 [9:05:23<19:50:02, 3.60it/s] 31%|███ | 114316/371472 [9:05:23<20:06:03, 3.55it/s] 31%|███ | 114317/371472 [9:05:23<19:51:43, 3.60it/s] 31%|███ | 114318/371472 [9:05:24<19:17:08, 3.70it/s] 31%|███ | 114319/371472 [9:05:24<18:39:20, 3.83it/s] 31%|███ | 114320/371472 [9:05:24<20:17:53, 3.52it/s] {'loss': 3.4203, 'learning_rate': 7.233618479174297e-07, 'epoch': 4.92} + 31%|███ | 114320/371472 [9:05:24<20:17:53, 3.52it/s] 31%|███ | 114321/371472 [9:05:24<19:27:53, 3.67it/s] 31%|███ | 114322/371472 [9:05:25<18:59:37, 3.76it/s] 31%|███ | 114323/371472 [9:05:25<19:19:17, 3.70it/s] 31%|███ | 114324/371472 [9:05:25<20:12:28, 3.53it/s] 31%|███ | 114325/371472 [9:05:26<20:42:56, 3.45it/s] 31%|███ | 114326/371472 [9:05:26<21:39:11, 3.30it/s] 31%|███ | 114327/371472 [9:05:26<20:53:27, 3.42it/s] 31%|███ | 114328/371472 [9:05:27<20:18:52, 3.52it/s] 31%|███ | 114329/371472 [9:05:27<20:08:09, 3.55it/s] 31%|███ | 114330/371472 [9:05:27<19:25:43, 3.68it/s] 31%|███ | 114331/371472 [9:05:27<21:30:36, 3.32it/s] 31%|███ | 114332/371472 [9:05:28<20:24:48, 3.50it/s] 31%|███ | 114333/371472 [9:05:28<20:06:47, 3.55it/s] 31%|███ | 114334/371472 [9:05:28<19:39:14, 3.63it/s] 31%|███ | 114335/371472 [9:05:28<19:56:35, 3.58it/s] 31%|███ | 114336/371472 [9:05:29<19:07:52, 3.73it/s] 31%|███ | 114337/371472 [9:05:29<19:36:22, 3.64it/s] 31%|███ | 114338/371472 [9:05:29<19:50:36, 3.60it/s] 31%|███ | 114339/371472 [9:05:30<20:56:05, 3.41it/s] 31%|███ | 114340/371472 [9:05:30<20:34:23, 3.47it/s] {'loss': 3.3875, 'learning_rate': 7.233133659419508e-07, 'epoch': 4.92} + 31%|███ | 114340/371472 [9:05:30<20:34:23, 3.47it/s] 31%|███ | 114341/371472 [9:05:30<20:05:26, 3.56it/s] 31%|███ | 114342/371472 [9:05:30<20:05:21, 3.56it/s] 31%|███ | 114343/371472 [9:05:31<19:23:07, 3.68it/s] 31%|███ | 114344/371472 [9:05:31<20:11:46, 3.54it/s] 31%|███ | 114345/371472 [9:05:31<20:04:53, 3.56it/s] 31%|███ | 114346/371472 [9:05:32<19:49:14, 3.60it/s] 31%|███ | 114347/371472 [9:05:32<20:23:30, 3.50it/s] 31%|███ | 114348/371472 [9:05:32<19:44:53, 3.62it/s] 31%|███ | 114349/371472 [9:05:32<19:28:01, 3.67it/s] 31%|███ | 114350/371472 [9:05:33<19:15:40, 3.71it/s] 31%|███ | 114351/371472 [9:05:33<19:30:37, 3.66it/s] 31%|███ | 114352/371472 [9:05:33<20:01:41, 3.57it/s] 31%|███ | 114353/371472 [9:05:33<19:55:56, 3.58it/s] 31%|███ | 114354/371472 [9:05:34<20:30:05, 3.48it/s] 31%|███ | 114355/371472 [9:05:34<19:58:52, 3.57it/s] 31%|███ | 114356/371472 [9:05:34<19:48:17, 3.61it/s] 31%|███ | 114357/371472 [9:05:35<20:09:06, 3.54it/s] 31%|███ | 114358/371472 [9:05:35<22:08:11, 3.23it/s] 31%|███ | 114359/371472 [9:05:35<21:07:26, 3.38it/s] 31%|███ | 114360/371472 [9:05:36<20:07:00, 3.55it/s] {'loss': 3.5222, 'learning_rate': 7.23264883966472e-07, 'epoch': 4.93} + 31%|███ | 114360/371472 [9:05:36<20:07:00, 3.55it/s] 31%|███ | 114361/371472 [9:05:36<19:27:56, 3.67it/s] 31%|███ | 114362/371472 [9:05:36<18:55:25, 3.77it/s] 31%|███ | 114363/371472 [9:05:36<18:53:45, 3.78it/s] 31%|███ | 114364/371472 [9:05:37<19:34:11, 3.65it/s] 31%|███ | 114365/371472 [9:05:37<19:26:39, 3.67it/s] 31%|███ | 114366/371472 [9:05:37<19:21:40, 3.69it/s] 31%|███ | 114367/371472 [9:05:37<19:05:15, 3.74it/s] 31%|███ | 114368/371472 [9:05:38<18:35:07, 3.84it/s] 31%|███ | 114369/371472 [9:05:38<18:54:37, 3.78it/s] 31%|███ | 114370/371472 [9:05:38<19:56:04, 3.58it/s] 31%|███ | 114371/371472 [9:05:38<19:51:43, 3.60it/s] 31%|███ | 114372/371472 [9:05:39<20:13:11, 3.53it/s] 31%|███ | 114373/371472 [9:05:39<19:56:53, 3.58it/s] 31%|███ | 114374/371472 [9:05:39<20:24:41, 3.50it/s] 31%|███ | 114375/371472 [9:05:40<21:45:39, 3.28it/s] 31%|███ | 114376/371472 [9:05:40<20:26:20, 3.49it/s] 31%|███ | 114377/371472 [9:05:40<19:56:28, 3.58it/s] 31%|███ | 114378/371472 [9:05:41<21:07:50, 3.38it/s] 31%|███ | 114379/371472 [9:05:41<22:43:22, 3.14it/s] 31%|███ | 114380/371472 [9:05:41<21:17:18, 3.35it/s] {'loss': 3.327, 'learning_rate': 7.232164019909932e-07, 'epoch': 4.93} + 31%|███ | 114380/371472 [9:05:41<21:17:18, 3.35it/s] 31%|███ | 114381/371472 [9:05:41<20:37:44, 3.46it/s] 31%|███ | 114382/371472 [9:05:42<20:09:36, 3.54it/s] 31%|███ | 114383/371472 [9:05:42<20:20:21, 3.51it/s] 31%|███ | 114384/371472 [9:05:42<20:10:44, 3.54it/s] 31%|███ | 114385/371472 [9:05:43<19:48:09, 3.61it/s] 31%|███ | 114386/371472 [9:05:43<18:51:28, 3.79it/s] 31%|███ | 114387/371472 [9:05:43<18:52:46, 3.78it/s] 31%|███ | 114388/371472 [9:05:43<18:43:32, 3.81it/s] 31%|██�� | 114389/371472 [9:05:44<19:13:49, 3.71it/s] 31%|███ | 114390/371472 [9:05:44<20:33:56, 3.47it/s] 31%|███ | 114391/371472 [9:05:44<20:08:53, 3.54it/s] 31%|███ | 114392/371472 [9:05:44<19:56:55, 3.58it/s] 31%|███ | 114393/371472 [9:05:45<20:47:02, 3.44it/s] 31%|███ | 114394/371472 [9:05:45<21:05:40, 3.39it/s] 31%|███ | 114395/371472 [9:05:45<20:21:36, 3.51it/s] 31%|███ | 114396/371472 [9:05:46<20:10:05, 3.54it/s] 31%|███ | 114397/371472 [9:05:46<19:50:37, 3.60it/s] 31%|███ | 114398/371472 [9:05:46<19:14:26, 3.71it/s] 31%|███ | 114399/371472 [9:05:46<19:13:17, 3.72it/s] 31%|███ | 114400/371472 [9:05:47<20:05:03, 3.56it/s] {'loss': 3.2285, 'learning_rate': 7.231679200155142e-07, 'epoch': 4.93} + 31%|███ | 114400/371472 [9:05:47<20:05:03, 3.56it/s] 31%|███ | 114401/371472 [9:05:47<19:38:38, 3.64it/s] 31%|███ | 114402/371472 [9:05:47<19:09:40, 3.73it/s] 31%|███ | 114403/371472 [9:05:47<19:15:39, 3.71it/s] 31%|███ | 114404/371472 [9:05:48<20:36:24, 3.47it/s] 31%|███ | 114405/371472 [9:05:48<20:18:00, 3.52it/s] 31%|███ | 114406/371472 [9:05:48<20:27:12, 3.49it/s] 31%|███ | 114407/371472 [9:05:49<20:08:00, 3.55it/s] 31%|███ | 114408/371472 [9:05:49<19:10:35, 3.72it/s] 31%|███ | 114409/371472 [9:05:49<19:16:36, 3.70it/s] 31%|███ | 114410/371472 [9:05:50<21:05:25, 3.39it/s] 31%|███ | 114411/371472 [9:05:50<21:19:46, 3.35it/s] 31%|███ | 114412/371472 [9:05:50<21:28:35, 3.32it/s] 31%|███ | 114413/371472 [9:05:50<21:54:08, 3.26it/s] 31%|███ | 114414/371472 [9:05:51<21:24:40, 3.33it/s] 31%|███ | 114415/371472 [9:05:51<21:01:13, 3.40it/s] 31%|███ | 114416/371472 [9:05:51<20:56:08, 3.41it/s] 31%|███ | 114417/371472 [9:05:52<20:22:05, 3.51it/s] 31%|███ | 114418/371472 [9:05:52<19:39:38, 3.63it/s] 31%|███ | 114419/371472 [9:05:52<22:32:52, 3.17it/s] 31%|███ | 114420/371472 [9:05:53<22:01:03, 3.24it/s] {'loss': 3.2453, 'learning_rate': 7.231194380400352e-07, 'epoch': 4.93} + 31%|███ | 114420/371472 [9:05:53<22:01:03, 3.24it/s] 31%|███ | 114421/371472 [9:05:53<25:06:02, 2.84it/s] 31%|███ | 114422/371472 [9:05:53<23:01:15, 3.10it/s] 31%|███ | 114423/371472 [9:05:53<21:53:49, 3.26it/s] 31%|███ | 114424/371472 [9:05:54<20:53:54, 3.42it/s] 31%|███ | 114425/371472 [9:05:54<20:33:19, 3.47it/s] 31%|███ | 114426/371472 [9:05:54<21:33:22, 3.31it/s] 31%|███ | 114427/371472 [9:05:55<21:49:18, 3.27it/s] 31%|███ | 114428/371472 [9:05:55<21:05:59, 3.38it/s] 31%|███ | 114429/371472 [9:05:55<20:01:58, 3.56it/s] 31%|███ | 114430/371472 [9:05:55<19:34:51, 3.65it/s] 31%|███ | 114431/371472 [9:05:56<21:29:06, 3.32it/s] 31%|███ | 114432/371472 [9:05:56<20:41:46, 3.45it/s] 31%|███ | 114433/371472 [9:05:56<20:57:39, 3.41it/s] 31%|███ | 114434/371472 [9:05:57<20:39:39, 3.46it/s] 31%|███ | 114435/371472 [9:05:57<20:08:42, 3.54it/s] 31%|███ | 114436/371472 [9:05:57<20:08:03, 3.55it/s] 31%|███ | 114437/371472 [9:05:57<19:42:33, 3.62it/s] 31%|███ | 114438/371472 [9:05:58<18:58:34, 3.76it/s] 31%|███ | 114439/371472 [9:05:58<20:33:02, 3.47it/s] 31%|███ | 114440/371472 [9:05:58<20:44:56, 3.44it/s] {'loss': 3.3993, 'learning_rate': 7.230709560645564e-07, 'epoch': 4.93} + 31%|███ | 114440/371472 [9:05:58<20:44:56, 3.44it/s] 31%|███ | 114441/371472 [9:05:59<20:42:27, 3.45it/s] 31%|███ | 114442/371472 [9:05:59<19:53:33, 3.59it/s] 31%|███ | 114443/371472 [9:05:59<20:11:58, 3.53it/s] 31%|███ | 114444/371472 [9:05:59<19:59:24, 3.57it/s] 31%|███ | 114445/371472 [9:06:00<21:10:23, 3.37it/s] 31%|███ | 114446/371472 [9:06:00<20:26:33, 3.49it/s] 31%|███ | 114447/371472 [9:06:00<19:37:13, 3.64it/s] 31%|███ | 114448/371472 [9:06:01<19:50:07, 3.60it/s] 31%|███ | 114449/371472 [9:06:01<20:32:06, 3.48it/s] 31%|███ | 114450/371472 [9:06:01<19:55:30, 3.58it/s] 31%|███ | 114451/371472 [9:06:01<19:28:53, 3.66it/s] 31%|███ | 114452/371472 [9:06:02<20:10:42, 3.54it/s] 31%|███ | 114453/371472 [9:06:02<21:01:53, 3.39it/s] 31%|███ | 114454/371472 [9:06:02<20:22:32, 3.50it/s] 31%|███ | 114455/371472 [9:06:03<20:56:14, 3.41it/s] 31%|███ | 114456/371472 [9:06:03<20:52:22, 3.42it/s] 31%|███ | 114457/371472 [9:06:03<21:30:51, 3.32it/s] 31%|███ | 114458/371472 [9:06:04<21:03:05, 3.39it/s] 31%|███ | 114459/371472 [9:06:04<20:03:43, 3.56it/s] 31%|███ | 114460/371472 [9:06:04<19:53:56, 3.59it/s] {'loss': 3.4082, 'learning_rate': 7.230224740890776e-07, 'epoch': 4.93} + 31%|███ | 114460/371472 [9:06:04<19:53:56, 3.59it/s] 31%|███ | 114461/371472 [9:06:04<20:57:11, 3.41it/s] 31%|███ | 114462/371472 [9:06:05<20:25:42, 3.49it/s] 31%|███ | 114463/371472 [9:06:05<20:34:03, 3.47it/s] 31%|███ | 114464/371472 [9:06:05<21:40:58, 3.29it/s] 31%|███ | 114465/371472 [9:06:06<20:44:29, 3.44it/s] 31%|███ | 114466/371472 [9:06:06<20:13:41, 3.53it/s] 31%|███ | 114467/371472 [9:06:06<19:57:20, 3.58it/s] 31%|███ | 114468/371472 [9:06:06<21:49:05, 3.27it/s] 31%|███ | 114469/371472 [9:06:07<20:35:34, 3.47it/s] 31%|███ | 114470/371472 [9:06:07<21:20:21, 3.35it/s] 31%|███ | 114471/371472 [9:06:07<20:47:00, 3.43it/s] 31%|███ | 114472/371472 [9:06:08<21:23:10, 3.34it/s] 31%|███ | 114473/371472 [9:06:08<20:39:32, 3.46it/s] 31%|███ | 114474/371472 [9:06:08<20:44:28, 3.44it/s] 31%|███ | 114475/371472 [9:06:08<20:33:09, 3.47it/s] 31%|███ | 114476/371472 [9:06:09<20:11:12, 3.54it/s] 31%|███ | 114477/371472 [9:06:09<19:13:57, 3.71it/s] 31%|███ | 114478/371472 [9:06:09<18:22:35, 3.88it/s] 31%|███ | 114479/371472 [9:06:09<18:47:33, 3.80it/s] 31%|███ | 114480/371472 [9:06:10<18:49:57, 3.79it/s] {'loss': 3.4327, 'learning_rate': 7.229739921135987e-07, 'epoch': 4.93} + 31%|███ | 114480/371472 [9:06:10<18:49:57, 3.79it/s] 31%|███ | 114481/371472 [9:06:10<18:48:02, 3.80it/s] 31%|███ | 114482/371472 [9:06:10<19:10:47, 3.72it/s] 31%|███ | 114483/371472 [9:06:11<18:39:07, 3.83it/s] 31%|███ | 114484/371472 [9:06:11<18:26:18, 3.87it/s] 31%|███ | 114485/371472 [9:06:11<18:12:32, 3.92it/s] 31%|███ | 114486/371472 [9:06:11<18:56:58, 3.77it/s] 31%|███ | 114487/371472 [9:06:12<18:27:14, 3.87it/s] 31%|███ | 114488/371472 [9:06:12<19:13:38, 3.71it/s] 31%|███ | 114489/371472 [9:06:12<18:46:12, 3.80it/s] 31%|███ | 114490/371472 [9:06:12<18:38:50, 3.83it/s] 31%|███ | 114491/371472 [9:06:13<19:29:22, 3.66it/s] 31%|███ | 114492/371472 [9:06:13<18:59:12, 3.76it/s] 31%|███ | 114493/371472 [9:06:13<19:04:47, 3.74it/s] 31%|███ | 114494/371472 [9:06:13<18:36:09, 3.84it/s] 31%|███ | 114495/371472 [9:06:14<19:26:35, 3.67it/s] 31%|███ | 114496/371472 [9:06:14<18:59:13, 3.76it/s] 31%|███ | 114497/371472 [9:06:14<18:51:06, 3.79it/s] 31%|███ | 114498/371472 [9:06:14<18:40:58, 3.82it/s] 31%|███ | 114499/371472 [9:06:15<19:07:11, 3.73it/s] 31%|███ | 114500/371472 [9:06:15<18:43:23, 3.81it/s] {'loss': 3.3565, 'learning_rate': 7.229255101381197e-07, 'epoch': 4.93} + 31%|███ | 114500/371472 [9:06:15<18:43:23, 3.81it/s] 31%|███ | 114501/371472 [9:06:15<18:21:04, 3.89it/s] 31%|███ | 114502/371472 [9:06:16<18:32:09, 3.85it/s] 31%|███ | 114503/371472 [9:06:16<20:08:58, 3.54it/s] 31%|███ | 114504/371472 [9:06:16<19:36:18, 3.64it/s] 31%|███ | 114505/371472 [9:06:16<19:22:03, 3.69it/s] 31%|███ | 114506/371472 [9:06:17<20:05:59, 3.55it/s] 31%|███ | 114507/371472 [9:06:17<19:28:42, 3.66it/s] 31%|███ | 114508/371472 [9:06:17<21:15:48, 3.36it/s] 31%|███ | 114509/371472 [9:06:18<21:22:44, 3.34it/s] 31%|███ | 114510/371472 [9:06:18<22:23:09, 3.19it/s] 31%|███ | 114511/371472 [9:06:18<21:30:15, 3.32it/s] 31%|███ | 114512/371472 [9:06:18<20:37:30, 3.46it/s] 31%|███ | 114513/371472 [9:06:19<19:25:17, 3.68it/s] 31%|███ | 114514/371472 [9:06:19<20:11:19, 3.54it/s] 31%|███ | 114515/371472 [9:06:19<20:02:49, 3.56it/s] 31%|███ | 114516/371472 [9:06:20<19:49:54, 3.60it/s] 31%|███ | 114517/371472 [9:06:20<22:13:02, 3.21it/s] 31%|███ | 114518/371472 [9:06:20<22:42:38, 3.14it/s] 31%|███ | 114519/371472 [9:06:21<20:51:57, 3.42it/s] 31%|███ | 114520/371472 [9:06:21<21:58:42, 3.25it/s] {'loss': 3.1696, 'learning_rate': 7.228770281626408e-07, 'epoch': 4.93} + 31%|███ | 114520/371472 [9:06:21<21:58:42, 3.25it/s] 31%|███ | 114521/371472 [9:06:21<22:01:59, 3.24it/s] 31%|███ | 114522/371472 [9:06:21<21:56:59, 3.25it/s] 31%|███ | 114523/371472 [9:06:22<21:47:27, 3.28it/s] 31%|███ | 114524/371472 [9:06:22<21:12:38, 3.37it/s] 31%|███ | 114525/371472 [9:06:22<21:08:54, 3.37it/s] 31%|███ | 114526/371472 [9:06:23<19:51:15, 3.59it/s] 31%|███ | 114527/371472 [9:06:23<19:23:42, 3.68it/s] 31%|███ | 114528/371472 [9:06:23<19:19:20, 3.69it/s] 31%|███ | 114529/371472 [9:06:23<19:47:20, 3.61it/s] 31%|███ | 114530/371472 [9:06:24<22:24:28, 3.19it/s] 31%|███ | 114531/371472 [9:06:24<21:47:30, 3.28it/s] 31%|███ | 114532/371472 [9:06:24<21:11:05, 3.37it/s] 31%|███ | 114533/371472 [9:06:25<20:34:32, 3.47it/s] 31%|███ | 114534/371472 [9:06:25<20:14:11, 3.53it/s] 31%|███ | 114535/371472 [9:06:25<19:43:07, 3.62it/s] 31%|███ | 114536/371472 [9:06:25<20:38:29, 3.46it/s] 31%|███ | 114537/371472 [9:06:26<21:04:53, 3.39it/s] 31%|███ | 114538/371472 [9:06:26<22:51:50, 3.12it/s] 31%|███ | 114539/371472 [9:06:26<21:48:25, 3.27it/s] 31%|███ | 114540/371472 [9:06:27<20:46:21, 3.44it/s] {'loss': 3.3463, 'learning_rate': 7.22828546187162e-07, 'epoch': 4.93} + 31%|███ | 114540/371472 [9:06:27<20:46:21, 3.44it/s] 31%|███ | 114541/371472 [9:06:27<20:10:17, 3.54it/s] 31%|███ | 114542/371472 [9:06:27<19:45:11, 3.61it/s] 31%|███ | 114543/371472 [9:06:28<21:35:20, 3.31it/s] 31%|███ | 114544/371472 [9:06:28<20:18:43, 3.51it/s] 31%|███ | 114545/371472 [9:06:28<19:50:47, 3.60it/s] 31%|███ | 114546/371472 [9:06:28<20:36:09, 3.46it/s] 31%|███ | 114547/371472 [9:06:29<20:02:46, 3.56it/s] 31%|███ | 114548/371472 [9:06:29<20:44:37, 3.44it/s] 31%|███ | 114549/371472 [9:06:29<19:56:51, 3.58it/s] 31%|███ | 114550/371472 [9:06:29<19:22:53, 3.68it/s] 31%|███ | 114551/371472 [9:06:30<18:47:59, 3.80it/s] 31%|███ | 114552/371472 [9:06:30<18:29:15, 3.86it/s] 31%|███ | 114553/371472 [9:06:30<19:29:00, 3.66it/s] 31%|███ | 114554/371472 [9:06:31<19:33:58, 3.65it/s] 31%|███ | 114555/371472 [9:06:31<19:12:22, 3.72it/s] 31%|███ | 114556/371472 [9:06:31<18:44:15, 3.81it/s] 31%|███ | 114557/371472 [9:06:31<18:39:06, 3.83it/s] 31%|███ | 114558/371472 [9:06:32<18:49:53, 3.79it/s] 31%|███ | 114559/371472 [9:06:32<18:50:19, 3.79it/s] 31%|███ | 114560/371472 [9:06:32<20:41:44, 3.45it/s] {'loss': 3.3488, 'learning_rate': 7.22780064211683e-07, 'epoch': 4.93} + 31%|███ | 114560/371472 [9:06:32<20:41:44, 3.45it/s] 31%|███ | 114561/371472 [9:06:32<20:00:17, 3.57it/s] 31%|███ | 114562/371472 [9:06:33<19:20:18, 3.69it/s] 31%|███ | 114563/371472 [9:06:33<19:22:10, 3.68it/s] 31%|███ | 114564/371472 [9:06:33<19:53:14, 3.59it/s] 31%|███ | 114565/371472 [9:06:34<20:46:06, 3.44it/s] 31%|███ | 114566/371472 [9:06:34<20:44:08, 3.44it/s] 31%|███ | 114567/371472 [9:06:34<20:43:53, 3.44it/s] 31%|███ | 114568/371472 [9:06:34<20:13:55, 3.53it/s] 31%|███ | 114569/371472 [9:06:35<19:53:26, 3.59it/s] 31%|███ | 114570/371472 [9:06:35<19:09:24, 3.73it/s] 31%|███ | 114571/371472 [9:06:35<20:15:51, 3.52it/s] 31%|███ | 114572/371472 [9:06:36<20:15:19, 3.52it/s] 31%|███ | 114573/371472 [9:06:36<19:58:55, 3.57it/s] 31%|███ | 114574/371472 [9:06:36<20:04:45, 3.55it/s] 31%|███ | 114575/371472 [9:06:36<20:27:12, 3.49it/s] 31%|███ | 114576/371472 [9:06:37<20:30:44, 3.48it/s] 31%|███ | 114577/371472 [9:06:37<21:52:51, 3.26it/s] 31%|███ | 114578/371472 [9:06:37<21:17:04, 3.35it/s] 31%|███ | 114579/371472 [9:06:38<20:14:58, 3.52it/s] 31%|███ | 114580/371472 [9:06:38<22:32:43, 3.17it/s] {'loss': 3.3541, 'learning_rate': 7.227315822362041e-07, 'epoch': 4.94} + 31%|███ | 114580/371472 [9:06:38<22:32:43, 3.17it/s] 31%|███ | 114581/371472 [9:06:38<22:12:25, 3.21it/s] 31%|███ | 114582/371472 [9:06:39<23:04:32, 3.09it/s] 31%|███ | 114583/371472 [9:06:39<22:52:21, 3.12it/s] 31%|███ | 114584/371472 [9:06:39<22:22:05, 3.19it/s] 31%|███ | 114585/371472 [9:06:40<21:51:22, 3.26it/s] 31%|███ | 114586/371472 [9:06:40<20:52:52, 3.42it/s] 31%|███ | 114587/371472 [9:06:40<20:24:20, 3.50it/s] 31%|███ | 114588/371472 [9:06:40<19:42:59, 3.62it/s] 31%|███ | 114589/371472 [9:06:41<19:12:54, 3.71it/s] 31%|███ | 114590/371472 [9:06:41<19:38:19, 3.63it/s] 31%|███ | 114591/371472 [9:06:41<20:06:52, 3.55it/s] 31%|███ | 114592/371472 [9:06:42<22:56:38, 3.11it/s] 31%|███ | 114593/371472 [9:06:42<21:31:58, 3.31it/s] 31%|███ | 114594/371472 [9:06:42<21:59:35, 3.24it/s] 31%|███ | 114595/371472 [9:06:42<21:36:17, 3.30it/s] 31%|███ | 114596/371472 [9:06:43<20:43:00, 3.44it/s] 31%|███ | 114597/371472 [9:06:43<21:09:11, 3.37it/s] 31%|███ | 114598/371472 [9:06:43<20:00:22, 3.57it/s] 31%|███ | 114599/371472 [9:06:44<19:55:33, 3.58it/s] 31%|███ | 114600/371472 [9:06:44<19:56:20, 3.58it/s] {'loss': 3.255, 'learning_rate': 7.226831002607253e-07, 'epoch': 4.94} + 31%|███ | 114600/371472 [9:06:44<19:56:20, 3.58it/s] 31%|███ | 114601/371472 [9:06:44<19:24:47, 3.68it/s] 31%|███ | 114602/371472 [9:06:44<20:30:48, 3.48it/s] 31%|███ | 114603/371472 [9:06:45<20:06:53, 3.55it/s] 31%|███ | 114604/371472 [9:06:45<19:52:44, 3.59it/s] 31%|███ | 114605/371472 [9:06:45<19:44:40, 3.61it/s] 31%|███ | 114606/371472 [9:06:45<19:08:37, 3.73it/s] 31%|███ | 114607/371472 [9:06:46<19:19:52, 3.69it/s] 31%|███ | 114608/371472 [9:06:46<20:51:00, 3.42it/s] 31%|███ | 114609/371472 [9:06:46<19:57:12, 3.58it/s] 31%|███ | 114610/371472 [9:06:47<19:48:21, 3.60it/s] 31%|███ | 114611/371472 [9:06:47<21:50:52, 3.27it/s] 31%|███ | 114612/371472 [9:06:47<20:45:30, 3.44it/s] 31%|███ | 114613/371472 [9:06:47<20:33:06, 3.47it/s] 31%|███ | 114614/371472 [9:06:48<20:24:49, 3.50it/s] 31%|███ | 114615/371472 [9:06:48<20:43:43, 3.44it/s] 31%|███ | 114616/371472 [9:06:48<20:58:32, 3.40it/s] 31%|███ | 114617/371472 [9:06:49<20:49:09, 3.43it/s] 31%|███ | 114618/371472 [9:06:49<20:22:41, 3.50it/s] 31%|███ | 114619/371472 [9:06:49<19:46:51, 3.61it/s] 31%|███ | 114620/371472 [9:06:49<19:21:37, 3.69it/s] {'loss': 3.399, 'learning_rate': 7.226346182852465e-07, 'epoch': 4.94} + 31%|███ | 114620/371472 [9:06:49<19:21:37, 3.69it/s] 31%|███ | 114621/371472 [9:06:50<20:18:46, 3.51it/s] 31%|███ | 114622/371472 [9:06:50<20:37:45, 3.46it/s] 31%|███ | 114623/371472 [9:06:50<20:41:57, 3.45it/s] 31%|███ | 114624/371472 [9:06:51<20:29:26, 3.48it/s] 31%|███ | 114625/371472 [9:06:51<19:38:07, 3.63it/s] 31%|███ | 114626/371472 [9:06:51<19:10:09, 3.72it/s] 31%|███ | 114627/371472 [9:06:51<18:26:32, 3.87it/s] 31%|███ | 114628/371472 [9:06:52<19:25:12, 3.67it/s] 31%|███ | 114629/371472 [9:06:52<20:13:56, 3.53it/s] 31%|███ | 114630/371472 [9:06:52<20:05:08, 3.55it/s] 31%|███ | 114631/371472 [9:06:53<19:34:49, 3.64it/s] 31%|███ | 114632/371472 [9:06:53<20:10:07, 3.54it/s] 31%|███ | 114633/371472 [9:06:53<21:03:19, 3.39it/s] 31%|███ | 114634/371472 [9:06:53<21:26:58, 3.33it/s] 31%|███ | 114635/371472 [9:06:54<21:35:47, 3.30it/s] 31%|███ | 114636/371472 [9:06:54<21:19:15, 3.35it/s] 31%|███ | 114637/371472 [9:06:54<21:13:41, 3.36it/s] 31%|███ | 114638/371472 [9:06:55<20:37:20, 3.46it/s] 31%|███ | 114639/371472 [9:06:55<20:18:42, 3.51it/s] 31%|███ | 114640/371472 [9:06:55<19:41:09, 3.62it/s] {'loss': 3.5777, 'learning_rate': 7.225861363097674e-07, 'epoch': 4.94} + 31%|███ | 114640/371472 [9:06:55<19:41:09, 3.62it/s] 31%|███ | 114641/371472 [9:06:56<21:46:10, 3.28it/s] 31%|███ | 114642/371472 [9:06:56<21:54:39, 3.26it/s] 31%|███ | 114643/371472 [9:06:56<20:35:41, 3.46it/s] 31%|███ | 114644/371472 [9:06:56<20:50:19, 3.42it/s] 31%|███ | 114645/371472 [9:06:57<20:07:12, 3.55it/s] 31%|███ | 114646/371472 [9:06:57<20:21:27, 3.50it/s] 31%|███ | 114647/371472 [9:06:57<19:43:34, 3.62it/s] 31%|███ | 114648/371472 [9:06:57<19:51:33, 3.59it/s] 31%|███ | 114649/371472 [9:06:58<19:56:12, 3.58it/s] 31%|███ | 114650/371472 [9:06:58<19:01:36, 3.75it/s] 31%|███ | 114651/371472 [9:06:58<20:30:52, 3.48it/s] 31%|███ | 114652/371472 [9:06:59<19:49:21, 3.60it/s] 31%|███ | 114653/371472 [9:06:59<20:34:15, 3.47it/s] 31%|███ | 114654/371472 [9:06:59<20:15:16, 3.52it/s] 31%|███ | 114655/371472 [9:06:59<20:28:26, 3.48it/s] 31%|███ | 114656/371472 [9:07:00<21:14:59, 3.36it/s] 31%|███ | 114657/371472 [9:07:00<20:33:43, 3.47it/s] 31%|███ | 114658/371472 [9:07:00<20:21:43, 3.50it/s] 31%|███ | 114659/371472 [9:07:01<20:11:21, 3.53it/s] 31%|███ | 114660/371472 [9:07:01<19:43:34, 3.62it/s] {'loss': 3.3915, 'learning_rate': 7.225376543342885e-07, 'epoch': 4.94} + 31%|███ | 114660/371472 [9:07:01<19:43:34, 3.62it/s] 31%|███ | 114661/371472 [9:07:01<22:41:07, 3.14it/s] 31%|███ | 114662/371472 [9:07:02<21:40:16, 3.29it/s] 31%|███ | 114663/371472 [9:07:02<21:13:55, 3.36it/s] 31%|███ | 114664/371472 [9:07:02<20:28:09, 3.48it/s] 31%|███ | 114665/371472 [9:07:02<21:04:14, 3.39it/s] 31%|███ | 114666/371472 [9:07:03<20:57:09, 3.40it/s] 31%|███ | 114667/371472 [9:07:03<21:14:25, 3.36it/s] 31%|███ | 114668/371472 [9:07:03<20:40:15, 3.45it/s] 31%|███ | 114669/371472 [9:07:04<20:21:19, 3.50it/s] 31%|███ | 114670/371472 [9:07:04<20:35:05, 3.47it/s] 31%|███ | 114671/371472 [9:07:04<20:51:39, 3.42it/s] 31%|███ | 114672/371472 [9:07:04<20:21:33, 3.50it/s] 31%|███ | 114673/371472 [9:07:05<21:58:27, 3.25it/s] 31%|███ | 114674/371472 [9:07:05<21:15:16, 3.36it/s] 31%|███ | 114675/371472 [9:07:05<19:55:04, 3.58it/s] 31%|███ | 114676/371472 [9:07:06<21:02:56, 3.39it/s] 31%|███ | 114677/371472 [9:07:06<21:47:46, 3.27it/s] 31%|███ | 114678/371472 [9:07:06<20:35:33, 3.46it/s] 31%|███ | 114679/371472 [9:07:07<20:50:53, 3.42it/s] 31%|███ | 114680/371472 [9:07:07<20:45:35, 3.44it/s] {'loss': 3.3361, 'learning_rate': 7.224891723588097e-07, 'epoch': 4.94} + 31%|███ | 114680/371472 [9:07:07<20:45:35, 3.44it/s] 31%|███ | 114681/371472 [9:07:07<20:33:58, 3.47it/s] 31%|███ | 114682/371472 [9:07:07<20:24:07, 3.50it/s] 31%|███ | 114683/371472 [9:07:08<19:30:44, 3.66it/s] 31%|███ | 114684/371472 [9:07:08<19:15:50, 3.70it/s] 31%|███ | 114685/371472 [9:07:08<19:33:53, 3.65it/s] 31%|███ | 114686/371472 [9:07:08<20:46:54, 3.43it/s] 31%|███ | 114687/371472 [9:07:09<20:27:43, 3.49it/s] 31%|███ | 114688/371472 [9:07:09<20:14:09, 3.52it/s] 31%|███ | 114689/371472 [9:07:09<19:45:05, 3.61it/s] 31%|███ | 114690/371472 [9:07:10<22:17:42, 3.20it/s] 31%|███ | 114691/371472 [9:07:10<22:08:27, 3.22it/s] 31%|███ | 114692/371472 [9:07:10<21:01:43, 3.39it/s] 31%|███ | 114693/371472 [9:07:11<19:58:12, 3.57it/s] 31%|███ | 114694/371472 [9:07:11<19:53:53, 3.58it/s] 31%|███ | 114695/371472 [9:07:11<19:19:42, 3.69it/s] 31%|███ | 114696/371472 [9:07:11<18:55:22, 3.77it/s] 31%|███ | 114697/371472 [9:07:12<20:34:23, 3.47it/s] 31%|███ | 114698/371472 [9:07:12<20:17:49, 3.51it/s] 31%|███ | 114699/371472 [9:07:12<19:42:54, 3.62it/s] 31%|███ | 114700/371472 [9:07:13<21:11:23, 3.37it/s] {'loss': 3.2038, 'learning_rate': 7.224406903833308e-07, 'epoch': 4.94} + 31%|███ | 114700/371472 [9:07:13<21:11:23, 3.37it/s] 31%|███ | 114701/371472 [9:07:13<20:14:51, 3.52it/s] 31%|███ | 114702/371472 [9:07:13<19:35:35, 3.64it/s] 31%|███ | 114703/371472 [9:07:13<19:03:54, 3.74it/s] 31%|███ | 114704/371472 [9:07:14<19:38:15, 3.63it/s] 31%|███ | 114705/371472 [9:07:14<19:46:21, 3.61it/s] 31%|███ | 114706/371472 [9:07:14<19:03:53, 3.74it/s] 31%|███ | 114707/371472 [9:07:14<18:44:11, 3.81it/s] 31%|███ | 114708/371472 [9:07:15<19:09:34, 3.72it/s] 31%|███ | 114709/371472 [9:07:15<19:47:10, 3.60it/s] 31%|███ | 114710/371472 [9:07:15<20:11:53, 3.53it/s] 31%|███ | 114711/371472 [9:07:15<20:05:18, 3.55it/s] 31%|███ | 114712/371472 [9:07:16<19:30:24, 3.66it/s] 31%|███ | 114713/371472 [9:07:16<18:52:05, 3.78it/s] 31%|███ | 114714/371472 [9:07:16<18:40:50, 3.82it/s] 31%|███ | 114715/371472 [9:07:17<18:35:20, 3.84it/s] 31%|███ | 114716/371472 [9:07:17<19:54:13, 3.58it/s] 31%|███ | 114717/371472 [9:07:17<20:13:34, 3.53it/s] 31%|███ | 114718/371472 [9:07:17<20:51:59, 3.42it/s] 31%|███ | 114719/371472 [9:07:18<20:44:18, 3.44it/s] 31%|███ | 114720/371472 [9:07:18<20:36:54, 3.46it/s] {'loss': 3.4828, 'learning_rate': 7.223922084078518e-07, 'epoch': 4.94} + 31%|���██ | 114720/371472 [9:07:18<20:36:54, 3.46it/s] 31%|███ | 114721/371472 [9:07:18<20:58:34, 3.40it/s] 31%|███ | 114722/371472 [9:07:19<20:28:57, 3.48it/s] 31%|███ | 114723/371472 [9:07:19<22:23:48, 3.18it/s] 31%|███ | 114724/371472 [9:07:19<21:41:45, 3.29it/s] 31%|███ | 114725/371472 [9:07:20<21:34:03, 3.31it/s] 31%|███ | 114726/371472 [9:07:20<21:25:04, 3.33it/s] 31%|███ | 114727/371472 [9:07:20<20:47:40, 3.43it/s] 31%|███ | 114728/371472 [9:07:20<19:55:26, 3.58it/s] 31%|███ | 114729/371472 [9:07:21<20:36:56, 3.46it/s] 31%|███ | 114730/371472 [9:07:21<20:37:37, 3.46it/s] 31%|███ | 114731/371472 [9:07:21<21:10:17, 3.37it/s] 31%|███ | 114732/371472 [9:07:22<20:03:39, 3.55it/s] 31%|███ | 114733/371472 [9:07:22<19:57:11, 3.57it/s] 31%|███ | 114734/371472 [9:07:22<19:39:16, 3.63it/s] 31%|███ | 114735/371472 [9:07:22<20:27:19, 3.49it/s] 31%|███ | 114736/371472 [9:07:23<19:46:17, 3.61it/s] 31%|███ | 114737/371472 [9:07:23<19:45:53, 3.61it/s] 31%|███ | 114738/371472 [9:07:23<19:22:00, 3.68it/s] 31%|███ | 114739/371472 [9:07:24<21:30:36, 3.32it/s] 31%|███ | 114740/371472 [9:07:24<20:59:36, 3.40it/s] {'loss': 3.4013, 'learning_rate': 7.22343726432373e-07, 'epoch': 4.94} + 31%|███ | 114740/371472 [9:07:24<20:59:36, 3.40it/s] 31%|███ | 114741/371472 [9:07:24<21:12:21, 3.36it/s] 31%|███ | 114742/371472 [9:07:24<21:09:28, 3.37it/s] 31%|███ | 114743/371472 [9:07:25<20:25:17, 3.49it/s] 31%|███ | 114744/371472 [9:07:25<21:15:46, 3.35it/s] 31%|███ | 114745/371472 [9:07:25<20:57:09, 3.40it/s] 31%|███ | 114746/371472 [9:07:26<20:22:46, 3.50it/s] 31%|███ | 114747/371472 [9:07:26<19:56:02, 3.58it/s] 31%|███ | 114748/371472 [9:07:26<20:23:22, 3.50it/s] 31%|███ | 114749/371472 [9:07:26<20:34:22, 3.47it/s] 31%|███ | 114750/371472 [9:07:27<19:58:22, 3.57it/s] 31%|███ | 114751/371472 [9:07:27<19:02:55, 3.74it/s] 31%|███ | 114752/371472 [9:07:27<18:50:02, 3.79it/s] 31%|███ | 114753/371472 [9:07:27<19:20:10, 3.69it/s] 31%|███ | 114754/371472 [9:07:28<20:17:10, 3.52it/s] 31%|███ | 114755/371472 [9:07:28<20:20:43, 3.50it/s] 31%|███ | 114756/371472 [9:07:28<20:02:41, 3.56it/s] 31%|███ | 114757/371472 [9:07:29<20:51:09, 3.42it/s] 31%|███ | 114758/371472 [9:07:29<20:22:54, 3.50it/s] 31%|███ | 114759/371472 [9:07:29<19:36:59, 3.64it/s] 31%|███ | 114760/371472 [9:07:29<20:21:42, 3.50it/s] {'loss': 3.4448, 'learning_rate': 7.222952444568942e-07, 'epoch': 4.94} + 31%|███ | 114760/371472 [9:07:29<20:21:42, 3.50it/s] 31%|███ | 114761/371472 [9:07:30<20:27:17, 3.49it/s] 31%|███ | 114762/371472 [9:07:30<19:59:21, 3.57it/s] 31%|███ | 114763/371472 [9:07:30<19:33:08, 3.65it/s] 31%|███ | 114764/371472 [9:07:31<19:00:31, 3.75it/s] 31%|███ | 114765/371472 [9:07:31<18:18:50, 3.89it/s] 31%|███ | 114766/371472 [9:07:31<17:56:39, 3.97it/s] 31%|███ | 114767/371472 [9:07:31<18:15:13, 3.91it/s] 31%|███ | 114768/371472 [9:07:32<18:41:58, 3.81it/s] 31%|███ | 114769/371472 [9:07:32<19:55:54, 3.58it/s] 31%|███ | 114770/371472 [9:07:32<20:58:33, 3.40it/s] 31%|███ | 114771/371472 [9:07:33<23:16:15, 3.06it/s] 31%|███ | 114772/371472 [9:07:33<21:30:50, 3.31it/s] 31%|███ | 114773/371472 [9:07:33<20:31:58, 3.47it/s] 31%|███ | 114774/371472 [9:07:33<20:36:40, 3.46it/s] 31%|███ | 114775/371472 [9:07:34<21:09:41, 3.37it/s] 31%|███ | 114776/371472 [9:07:34<20:33:40, 3.47it/s] 31%|███ | 114777/371472 [9:07:34<19:38:09, 3.63it/s] 31%|███ | 114778/371472 [9:07:34<19:21:35, 3.68it/s] 31%|███ | 114779/371472 [9:07:35<19:11:08, 3.72it/s] 31%|███ | 114780/371472 [9:07:35<19:45:38, 3.61it/s] {'loss': 3.2879, 'learning_rate': 7.222467624814152e-07, 'epoch': 4.94} + 31%|███ | 114780/371472 [9:07:35<19:45:38, 3.61it/s] 31%|███ | 114781/371472 [9:07:35<19:35:24, 3.64it/s] 31%|███ | 114782/371472 [9:07:36<19:46:51, 3.60it/s] 31%|███ | 114783/371472 [9:07:36<21:41:20, 3.29it/s] 31%|███ | 114784/371472 [9:07:36<20:09:47, 3.54it/s] 31%|███ | 114785/371472 [9:07:36<20:06:08, 3.55it/s] 31%|███ | 114786/371472 [9:07:37<20:31:59, 3.47it/s] 31%|███ | 114787/371472 [9:07:37<20:26:17, 3.49it/s] 31%|███ | 114788/371472 [9:07:37<20:31:50, 3.47it/s] 31%|███ | 114789/371472 [9:07:38<20:45:22, 3.44it/s] 31%|███ | 114790/371472 [9:07:38<22:27:15, 3.18it/s] 31%|███ | 114791/371472 [9:07:38<21:47:55, 3.27it/s] 31%|███ | 114792/371472 [9:07:39<21:18:17, 3.35it/s] 31%|███ | 114793/371472 [9:07:39<20:36:06, 3.46it/s] 31%|███ | 114794/371472 [9:07:39<21:54:22, 3.25it/s] 31%|███ | 114795/371472 [9:07:39<20:30:12, 3.48it/s] 31%|███ | 114796/371472 [9:07:40<19:53:54, 3.58it/s] 31%|███ | 114797/371472 [9:07:40<20:06:16, 3.55it/s] 31%|███ | 114798/371472 [9:07:40<20:25:36, 3.49it/s] 31%|███ | 114799/371472 [9:07:41<19:44:08, 3.61it/s] 31%|███ | 114800/371472 [9:07:41<20:50:43, 3.42it/s] {'loss': 3.2429, 'learning_rate': 7.221982805059362e-07, 'epoch': 4.94} + 31%|███ | 114800/371472 [9:07:41<20:50:43, 3.42it/s] 31%|███ | 114801/371472 [9:07:41<21:15:10, 3.35it/s] 31%|███ | 114802/371472 [9:07:41<20:20:25, 3.51it/s] 31%|███ | 114803/371472 [9:07:42<19:20:21, 3.69it/s] 31%|███ | 114804/371472 [9:07:42<19:11:25, 3.72it/s] 31%|███ | 114805/371472 [9:07:42<19:26:22, 3.67it/s] 31%|███ | 114806/371472 [9:07:42<19:19:02, 3.69it/s] 31%|███ | 114807/371472 [9:07:43<20:58:45, 3.40it/s] 31%|███ | 114808/371472 [9:07:43<21:01:31, 3.39it/s] 31%|███ | 114809/371472 [9:07:43<20:36:36, 3.46it/s] 31%|███ | 114810/371472 [9:07:44<20:34:09, 3.47it/s] 31%|███ | 114811/371472 [9:07:44<21:35:03, 3.30it/s] 31%|███ | 114812/371472 [9:07:44<20:08:38, 3.54it/s] 31%|███ | 114813/371472 [9:07:45<20:32:07, 3.47it/s] 31%|███ | 114814/371472 [9:07:45<21:18:32, 3.35it/s] 31%|███ | 114815/371472 [9:07:45<21:26:31, 3.32it/s] 31%|███ | 114816/371472 [9:07:45<21:15:25, 3.35it/s] 31%|███ | 114817/371472 [9:07:46<21:20:00, 3.34it/s] 31%|███ | 114818/371472 [9:07:46<21:20:07, 3.34it/s] 31%|███ | 114819/371472 [9:07:46<21:04:33, 3.38it/s] 31%|███ | 114820/371472 [9:07:47<20:03:11, 3.56it/s] {'loss': 3.3428, 'learning_rate': 7.221497985304574e-07, 'epoch': 4.95} + 31%|███ | 114820/371472 [9:07:47<20:03:11, 3.56it/s] 31%|███ | 114821/371472 [9:07:47<19:49:02, 3.60it/s] 31%|███ | 114822/371472 [9:07:47<19:55:27, 3.58it/s] 31%|███ | 114823/371472 [9:07:47<20:12:19, 3.53it/s] 31%|███ | 114824/371472 [9:07:48<19:31:48, 3.65it/s] 31%|███ | 114825/371472 [9:07:48<18:48:12, 3.79it/s] 31%|███ | 114826/371472 [9:07:48<19:02:43, 3.74it/s] 31%|███ | 114827/371472 [9:07:49<19:35:44, 3.64it/s] 31%|███ | 114828/371472 [9:07:49<19:12:56, 3.71it/s] 31%|███ | 114829/371472 [9:07:49<19:10:46, 3.72it/s] 31%|███ | 114830/371472 [9:07:49<19:17:26, 3.70it/s] 31%|███ | 114831/371472 [9:07:50<18:42:26, 3.81it/s] 31%|███ | 114832/371472 [9:07:50<20:11:33, 3.53it/s] 31%|███ | 114833/371472 [9:07:50<19:37:58, 3.63it/s] 31%|███ | 114834/371472 [9:07:50<18:58:08, 3.76it/s] 31%|███ | 114835/371472 [9:07:51<19:32:34, 3.65it/s] 31%|███ | 114836/371472 [9:07:51<20:24:59, 3.49it/s] 31%|███ | 114837/371472 [9:07:51<19:34:28, 3.64it/s] 31%|███ | 114838/371472 [9:07:52<19:13:38, 3.71it/s] 31%|███ | 114839/371472 [9:07:52<19:27:50, 3.66it/s] 31%|███ | 114840/371472 [9:07:52<18:54:13, 3.77it/s] {'loss': 3.3784, 'learning_rate': 7.221013165549785e-07, 'epoch': 4.95} + 31%|███ | 114840/371472 [9:07:52<18:54:13, 3.77it/s] 31%|███ | 114841/371472 [9:07:52<19:04:11, 3.74it/s] 31%|███ | 114842/371472 [9:07:53<19:14:11, 3.71it/s] 31%|███ | 114843/371472 [9:07:53<18:44:04, 3.81it/s] 31%|███ | 114844/371472 [9:07:53<18:52:36, 3.78it/s] 31%|███ | 114845/371472 [9:07:53<18:24:52, 3.87it/s] 31%|███ | 114846/371472 [9:07:54<18:32:13, 3.85it/s] 31%|███ | 114847/371472 [9:07:54<18:59:43, 3.75it/s] 31%|███ | 114848/371472 [9:07:54<18:47:10, 3.79it/s] 31%|███ | 114849/371472 [9:07:54<18:27:59, 3.86it/s] 31%|███ | 114850/371472 [9:07:55<20:43:58, 3.44it/s] 31%|███ | 114851/371472 [9:07:55<20:22:36, 3.50it/s] 31%|███ | 114852/371472 [9:07:55<19:40:05, 3.62it/s] 31%|███ | 114853/371472 [9:07:56<20:03:45, 3.55it/s] 31%|███ | 114854/371472 [9:07:56<19:57:24, 3.57it/s] 31%|███ | 114855/371472 [9:07:56<19:52:05, 3.59it/s] 31%|███ | 114856/371472 [9:07:56<20:50:38, 3.42it/s] 31%|███ | 114857/371472 [9:07:57<20:44:28, 3.44it/s] 31%|███ | 114858/371472 [9:07:57<20:15:58, 3.52it/s] 31%|███ | 114859/371472 [9:07:57<19:14:56, 3.70it/s] 31%|███ | 114860/371472 [9:07:58<19:09:16, 3.72it/s] {'loss': 3.3063, 'learning_rate': 7.220528345794996e-07, 'epoch': 4.95} + 31%|███ | 114860/371472 [9:07:58<19:09:16, 3.72it/s] 31%|███ | 114861/371472 [9:07:58<18:47:18, 3.79it/s] 31%|███ | 114862/371472 [9:07:58<18:42:17, 3.81it/s] 31%|███ | 114863/371472 [9:07:58<19:35:11, 3.64it/s] 31%|███ | 114864/371472 [9:07:59<19:55:57, 3.58it/s] 31%|███ | 114865/371472 [9:07:59<19:51:25, 3.59it/s] 31%|███ | 114866/371472 [9:07:59<19:28:09, 3.66it/s] 31%|███ | 114867/371472 [9:08:00<22:44:09, 3.14it/s] 31%|███ | 114868/371472 [9:08:00<22:06:57, 3.22it/s] 31%|███ | 114869/371472 [9:08:00<21:35:04, 3.30it/s] 31%|███ | 114870/371472 [9:08:00<20:49:12, 3.42it/s] 31%|███ | 114871/371472 [9:08:01<20:12:08, 3.53it/s] 31%|███ | 114872/371472 [9:08:01<19:55:59, 3.58it/s] 31%|███ | 114873/371472 [9:08:01<20:02:00, 3.56it/s] 31%|███ | 114874/371472 [9:08:02<19:53:25, 3.58it/s] 31%|███ | 114875/371472 [9:08:02<21:04:40, 3.38it/s] 31%|███ | 114876/371472 [9:08:02<20:14:41, 3.52it/s] 31%|███ | 114877/371472 [9:08:02<19:39:49, 3.62it/s] 31%|███ | 114878/371472 [9:08:03<20:14:18, 3.52it/s] 31%|███ | 114879/371472 [9:08:03<20:16:09, 3.52it/s] 31%|███ | 114880/371472 [9:08:03<23:28:42, 3.04it/s] {'loss': 3.4032, 'learning_rate': 7.220043526040207e-07, 'epoch': 4.95} + 31%|███ | 114880/371472 [9:08:03<23:28:42, 3.04it/s] 31%|███ | 114881/371472 [9:08:04<23:16:11, 3.06it/s] 31%|███ | 114882/371472 [9:08:04<21:40:14, 3.29it/s] 31%|███ | 114883/371472 [9:08:04<20:25:47, 3.49it/s] 31%|███ | 114884/371472 [9:08:04<19:39:06, 3.63it/s] 31%|███ | 114885/371472 [9:08:05<19:14:06, 3.71it/s] 31%|███ | 114886/371472 [9:08:05<19:26:37, 3.67it/s] 31%|███ | 114887/371472 [9:08:05<19:14:31, 3.70it/s] 31%|███ | 114888/371472 [9:08:06<19:03:41, 3.74it/s] 31%|███ | 114889/371472 [9:08:06<18:56:50, 3.76it/s] 31%|███ | 114890/371472 [9:08:06<19:32:08, 3.65it/s] 31%|███ | 114891/371472 [9:08:06<18:56:12, 3.76it/s] 31%|███ | 114892/371472 [9:08:07<18:46:23, 3.80it/s] 31%|███ | 114893/371472 [9:08:07<19:44:09, 3.61it/s] 31%|███ | 114894/371472 [9:08:07<22:51:03, 3.12it/s] 31%|███ | 114895/371472 [9:08:08<21:24:28, 3.33it/s] 31%|███ | 114896/371472 [9:08:08<20:24:50, 3.49it/s] 31%|███ | 114897/371472 [9:08:08<20:53:16, 3.41it/s] 31%|███ | 114898/371472 [9:08:08<20:39:21, 3.45it/s] 31%|███ | 114899/371472 [9:08:09<20:09:59, 3.53it/s] 31%|███ | 114900/371472 [9:08:09<20:21:38, 3.50it/s] {'loss': 3.3629, 'learning_rate': 7.219558706285418e-07, 'epoch': 4.95} + 31%|███ | 114900/371472 [9:08:09<20:21:38, 3.50it/s] 31%|███ | 114901/371472 [9:08:09<19:26:55, 3.66it/s] 31%|███ | 114902/371472 [9:08:09<19:14:25, 3.70it/s] 31%|███ | 114903/371472 [9:08:10<19:18:10, 3.69it/s] 31%|███ | 114904/371472 [9:08:10<19:06:44, 3.73it/s] 31%|███ | 114905/371472 [9:08:10<19:33:48, 3.64it/s] 31%|███ | 114906/371472 [9:08:11<19:41:12, 3.62it/s] 31%|███ | 114907/371472 [9:08:11<19:19:43, 3.69it/s] 31%|███ | 114908/371472 [9:08:11<19:11:15, 3.71it/s] 31%|███ | 114909/371472 [9:08:11<19:10:29, 3.72it/s] 31%|███ | 114910/371472 [9:08:12<19:37:46, 3.63it/s] 31%|███ | 114911/371472 [9:08:12<19:40:58, 3.62it/s] 31%|███ | 114912/371472 [9:08:12<20:01:49, 3.56it/s] 31%|███ | 114913/371472 [9:08:13<20:07:05, 3.54it/s] 31%|███ | 114914/371472 [9:08:13<20:37:14, 3.46it/s] 31%|███ | 114915/371472 [9:08:13<19:58:11, 3.57it/s] 31%|███ | 114916/371472 [9:08:13<19:51:52, 3.59it/s] 31%|███ | 114917/371472 [9:08:14<19:07:44, 3.73it/s] 31%|███ | 114918/371472 [9:08:14<20:30:25, 3.48it/s] 31%|███ | 114919/371472 [9:08:14<20:14:41, 3.52it/s] 31%|███ | 114920/371472 [9:08:15<20:13:52, 3.52it/s] {'loss': 3.3282, 'learning_rate': 7.21907388653063e-07, 'epoch': 4.95} + 31%|███ | 114920/371472 [9:08:15<20:13:52, 3.52it/s] 31%|███ | 114921/371472 [9:08:15<20:23:16, 3.50it/s] 31%|███ | 114922/371472 [9:08:15<19:31:48, 3.65it/s] 31%|███ | 114923/371472 [9:08:15<18:58:05, 3.76it/s] 31%|███ | 114924/371472 [9:08:16<18:36:43, 3.83it/s] 31%|███ | 114925/371472 [9:08:16<20:14:25, 3.52it/s] 31%|███ | 114926/371472 [9:08:16<20:03:57, 3.55it/s] 31%|███ | 114927/371472 [9:08:16<19:37:50, 3.63it/s] 31%|███ | 114928/371472 [9:08:17<19:38:28, 3.63it/s] 31%|███ | 114929/371472 [9:08:17<20:19:55, 3.50it/s] 31%|███ | 114930/371472 [9:08:17<19:41:09, 3.62it/s] 31%|███ | 114931/371472 [9:08:18<21:25:43, 3.33it/s] 31%|███ | 114932/371472 [9:08:18<20:54:51, 3.41it/s] 31%|███ | 114933/371472 [9:08:18<20:23:33, 3.49it/s] 31%|███ | 114934/371472 [9:08:18<21:21:44, 3.34it/s] 31%|███ | 114935/371472 [9:08:19<20:18:30, 3.51it/s] 31%|███ | 114936/371472 [9:08:19<19:19:02, 3.69it/s] 31%|███ | 114937/371472 [9:08:19<18:53:55, 3.77it/s] 31%|███ | 114938/371472 [9:08:19<18:23:25, 3.87it/s] 31%|███ | 114939/371472 [9:08:20<19:20:23, 3.68it/s] 31%|███ | 114940/371472 [9:08:20<19:16:12, 3.70it/s] {'loss': 3.2692, 'learning_rate': 7.21858906677584e-07, 'epoch': 4.95} + 31%|███ | 114940/371472 [9:08:20<19:16:12, 3.70it/s] 31%|███ | 114941/371472 [9:08:20<19:42:35, 3.62it/s] 31%|███ | 114942/371472 [9:08:21<19:36:52, 3.63it/s] 31%|███ | 114943/371472 [9:08:21<19:21:10, 3.68it/s] 31%|███ | 114944/371472 [9:08:21<18:48:03, 3.79it/s] 31%|███ | 114945/371472 [9:08:21<20:56:01, 3.40it/s] 31%|███ | 114946/371472 [9:08:22<20:45:56, 3.43it/s] 31%|███ | 114947/371472 [9:08:22<20:02:17, 3.56it/s] 31%|███ | 114948/371472 [9:08:22<19:14:32, 3.70it/s] 31%|███ | 114949/371472 [9:08:23<18:48:25, 3.79it/s] 31%|███ | 114950/371472 [9:08:23<19:44:03, 3.61it/s] 31%|███ | 114951/371472 [9:08:23<18:53:29, 3.77it/s] 31%|███ | 114952/371472 [9:08:23<18:49:39, 3.78it/s] 31%|███ | 114953/371472 [9:08:24<18:55:56, 3.76it/s] 31%|███ | 114954/371472 [9:08:24<19:06:55, 3.73it/s] 31%|███ | 114955/371472 [9:08:24<18:56:58, 3.76it/s] 31%|███ | 114956/371472 [9:08:24<19:25:33, 3.67it/s] 31%|███ | 114957/371472 [9:08:25<19:35:17, 3.64it/s] 31%|███ | 114958/371472 [9:08:25<19:29:54, 3.65it/s] 31%|███ | 114959/371472 [9:08:25<19:33:10, 3.64it/s] 31%|███ | 114960/371472 [9:08:26<19:41:37, 3.62it/s] {'loss': 3.3157, 'learning_rate': 7.218104247021051e-07, 'epoch': 4.95} + 31%|███ | 114960/371472 [9:08:26<19:41:37, 3.62it/s] 31%|███ | 114961/371472 [9:08:26<19:13:29, 3.71it/s] 31%|███ | 114962/371472 [9:08:26<20:42:26, 3.44it/s] 31%|███ | 114963/371472 [9:08:26<20:46:46, 3.43it/s] 31%|███ | 114964/371472 [9:08:27<24:23:56, 2.92it/s] 31%|███ | 114965/371472 [9:08:27<22:18:03, 3.20it/s] 31%|███ | 114966/371472 [9:08:27<22:38:51, 3.15it/s] 31%|███ | 114967/371472 [9:08:28<22:07:59, 3.22it/s] 31%|███ | 114968/371472 [9:08:28<21:22:38, 3.33it/s] 31%|███ | 114969/371472 [9:08:28<21:51:01, 3.26it/s] 31%|███ | 114970/371472 [9:08:29<20:50:41, 3.42it/s] 31%|███ | 114971/371472 [9:08:29<19:42:38, 3.61it/s] 31%|███ | 114972/371472 [9:08:29<19:35:50, 3.64it/s] 31%|███ | 114973/371472 [9:08:29<19:10:47, 3.71it/s] 31%|███ | 114974/371472 [9:08:30<19:14:09, 3.70it/s] 31%|███ | 114975/371472 [9:08:30<19:09:50, 3.72it/s] 31%|███ | 114976/371472 [9:08:30<19:08:30, 3.72it/s] 31%|███ | 114977/371472 [9:08:30<19:12:55, 3.71it/s] 31%|███ | 114978/371472 [9:08:31<19:16:32, 3.70it/s] 31%|███ | 114979/371472 [9:08:31<21:37:49, 3.29it/s] 31%|███ | 114980/371472 [9:08:31<20:39:22, 3.45it/s] {'loss': 3.3772, 'learning_rate': 7.217619427266263e-07, 'epoch': 4.95} + 31%|███ | 114980/371472 [9:08:31<20:39:22, 3.45it/s] 31%|███ | 114981/371472 [9:08:32<20:45:13, 3.43it/s] 31%|███ | 114982/371472 [9:08:32<20:48:25, 3.42it/s] 31%|███ | 114983/371472 [9:08:32<21:57:17, 3.25it/s] 31%|███ | 114984/371472 [9:08:33<21:17:54, 3.35it/s] 31%|███ | 114985/371472 [9:08:33<20:31:50, 3.47it/s] 31%|███ | 114986/371472 [9:08:33<19:26:40, 3.66it/s] 31%|███ | 114987/371472 [9:08:33<19:22:03, 3.68it/s] 31%|███ | 114988/371472 [9:08:34<20:08:35, 3.54it/s] 31%|███ | 114989/371472 [9:08:34<20:28:26, 3.48it/s] 31%|███ | 114990/371472 [9:08:34<19:33:43, 3.64it/s] 31%|███ | 114991/371472 [9:08:35<20:44:10, 3.44it/s] 31%|███ | 114992/371472 [9:08:35<21:21:10, 3.34it/s] 31%|███ | 114993/371472 [9:08:35<20:58:02, 3.40it/s] 31%|███ | 114994/371472 [9:08:35<19:50:05, 3.59it/s] 31%|███ | 114995/371472 [9:08:36<19:31:50, 3.65it/s] 31%|███ | 114996/371472 [9:08:36<19:23:23, 3.67it/s] 31%|███ | 114997/371472 [9:08:36<18:56:14, 3.76it/s] 31%|███ | 114998/371472 [9:08:37<21:01:24, 3.39it/s] 31%|███ | 114999/371472 [9:08:37<20:44:03, 3.44it/s] 31%|███ | 115000/371472 [9:08:37<19:52:10, 3.59it/s] {'loss': 3.334, 'learning_rate': 7.217134607511475e-07, 'epoch': 4.95} + 31%|███ | 115000/371472 [9:08:37<19:52:10, 3.59it/s] 31%|███ | 115001/371472 [9:08:37<20:15:33, 3.52it/s] 31%|███ | 115002/371472 [9:08:38<19:52:17, 3.59it/s] 31%|███ | 115003/371472 [9:08:38<21:18:55, 3.34it/s] 31%|███ | 115004/371472 [9:08:38<20:23:22, 3.49it/s] 31%|███ | 115005/371472 [9:08:38<20:10:08, 3.53it/s] 31%|███ | 115006/371472 [9:08:39<19:37:12, 3.63it/s] 31%|███ | 115007/371472 [9:08:39<18:48:33, 3.79it/s] 31%|███ | 115008/371472 [9:08:39<20:57:32, 3.40it/s] 31%|███ | 115009/371472 [9:08:40<21:37:50, 3.29it/s] 31%|███ | 115010/371472 [9:08:40<20:25:45, 3.49it/s] 31%|███ | 115011/371472 [9:08:40<21:03:38, 3.38it/s] 31%|███ | 115012/371472 [9:08:41<21:08:45, 3.37it/s] 31%|███ | 115013/371472 [9:08:41<20:11:57, 3.53it/s] 31%|███ | 115014/371472 [9:08:41<19:24:14, 3.67it/s] 31%|███ | 115015/371472 [9:08:41<19:01:12, 3.75it/s] 31%|███ | 115016/371472 [9:08:42<18:34:32, 3.83it/s] 31%|███ | 115017/371472 [9:08:42<18:22:05, 3.88it/s] 31%|███ | 115018/371472 [9:08:42<18:49:54, 3.78it/s] 31%|███ | 115019/371472 [9:08:42<19:06:57, 3.73it/s] 31%|███ | 115020/371472 [9:08:43<19:01:46, 3.74it/s] {'loss': 3.4466, 'learning_rate': 7.216649787756685e-07, 'epoch': 4.95} + 31%|███ | 115020/371472 [9:08:43<19:01:46, 3.74it/s] 31%|███ | 115021/371472 [9:08:43<18:52:25, 3.77it/s] 31%|███ | 115022/371472 [9:08:43<23:59:12, 2.97it/s] 31%|███ | 115023/371472 [9:08:44<22:44:32, 3.13it/s] 31%|███ | 115024/371472 [9:08:44<21:39:15, 3.29it/s] 31%|███ | 115025/371472 [9:08:44<22:27:08, 3.17it/s] 31%|███ | 115026/371472 [9:08:44<20:45:35, 3.43it/s] 31%|███ | 115027/371472 [9:08:45<19:58:47, 3.57it/s] 31%|███ | 115028/371472 [9:08:45<20:30:46, 3.47it/s] 31%|███ | 115029/371472 [9:08:45<21:11:35, 3.36it/s] 31%|███ | 115030/371472 [9:08:46<20:06:23, 3.54it/s] 31%|███ | 115031/371472 [9:08:46<19:27:25, 3.66it/s] 31%|███ | 115032/371472 [9:08:46<19:11:20, 3.71it/s] 31%|███ | 115033/371472 [9:08:46<20:20:24, 3.50it/s] 31%|███ | 115034/371472 [9:08:47<19:30:35, 3.65it/s] 31%|███ | 115035/371472 [9:08:47<20:47:17, 3.43it/s] 31%|███ | 115036/371472 [9:08:47<20:30:36, 3.47it/s] 31%|███ | 115037/371472 [9:08:48<20:37:46, 3.45it/s] 31%|███ | 115038/371472 [9:08:48<20:20:05, 3.50it/s] 31%|███ | 115039/371472 [9:08:48<20:48:24, 3.42it/s] 31%|███ | 115040/371472 [9:08:48<20:19:43, 3.50it/s] {'loss': 3.3218, 'learning_rate': 7.216164968001895e-07, 'epoch': 4.95} + 31%|███ | 115040/371472 [9:08:48<20:19:43, 3.50it/s] 31%|███ | 115041/371472 [9:08:49<20:16:53, 3.51it/s] 31%|███ | 115042/371472 [9:08:49<20:26:23, 3.48it/s] 31%|███ | 115043/371472 [9:08:49<20:34:10, 3.46it/s] 31%|███ | 115044/371472 [9:08:50<21:07:22, 3.37it/s] 31%|███ | 115045/371472 [9:08:50<20:23:49, 3.49it/s] 31%|███ | 115046/371472 [9:08:50<19:27:01, 3.66it/s] 31%|███ | 115047/371472 [9:08:50<21:07:24, 3.37it/s] 31%|███ | 115048/371472 [9:08:51<20:09:26, 3.53it/s] 31%|███ | 115049/371472 [9:08:51<20:03:58, 3.55it/s] 31%|███ | 115050/371472 [9:08:51<21:02:51, 3.38it/s] 31%|███ | 115051/371472 [9:08:52<19:58:52, 3.56it/s] 31%|███ | 115052/371472 [9:08:52<20:43:20, 3.44it/s] 31%|███ | 115053/371472 [9:08:52<21:20:47, 3.34it/s] 31%|███ | 115054/371472 [9:08:52<20:33:51, 3.46it/s] 31%|███ | 115055/371472 [9:08:53<21:27:14, 3.32it/s] 31%|███ | 115056/371472 [9:08:53<22:01:25, 3.23it/s] 31%|███ | 115057/371472 [9:08:54<24:05:08, 2.96it/s] 31%|███ | 115058/371472 [9:08:54<22:56:00, 3.11it/s] 31%|███ | 115059/371472 [9:08:54<22:50:12, 3.12it/s] 31%|███ | 115060/371472 [9:08:54<22:46:24, 3.13it/s] {'loss': 3.2133, 'learning_rate': 7.215680148247107e-07, 'epoch': 4.96} + 31%|███ | 115060/371472 [9:08:54<22:46:24, 3.13it/s] 31%|███ | 115061/371472 [9:08:55<22:29:41, 3.17it/s] 31%|███ | 115062/371472 [9:08:55<22:03:06, 3.23it/s] 31%|███ | 115063/371472 [9:08:55<21:10:05, 3.36it/s] 31%|███ | 115064/371472 [9:08:56<22:17:01, 3.20it/s] 31%|███ | 115065/371472 [9:08:56<20:59:24, 3.39it/s] 31%|███ | 115066/371472 [9:08:56<20:05:39, 3.54it/s] 31%|███ | 115067/371472 [9:08:57<20:27:46, 3.48it/s] 31%|███ | 115068/371472 [9:08:57<21:40:32, 3.29it/s] 31%|███ | 115069/371472 [9:08:57<21:38:52, 3.29it/s] 31%|███ | 115070/371472 [9:08:57<20:43:24, 3.44it/s] 31%|███ | 115071/371472 [9:08:58<21:29:01, 3.32it/s] 31%|███ | 115072/371472 [9:08:58<21:38:29, 3.29it/s] 31%|███ | 115073/371472 [9:08:58<21:19:56, 3.34it/s] 31%|███ | 115074/371472 [9:08:59<21:28:30, 3.32it/s] 31%|███ | 115075/371472 [9:08:59<20:30:50, 3.47it/s] 31%|███ | 115076/371472 [9:08:59<20:41:30, 3.44it/s] 31%|███ | 115077/371472 [9:08:59<20:24:56, 3.49it/s] 31%|███ | 115078/371472 [9:09:00<19:52:48, 3.58it/s] 31%|███ | 115079/371472 [9:09:00<19:29:17, 3.65it/s] 31%|███ | 115080/371472 [9:09:00<19:13:41, 3.70it/s] {'loss': 3.3602, 'learning_rate': 7.215195328492319e-07, 'epoch': 4.96} + 31%|███ | 115080/371472 [9:09:00<19:13:41, 3.70it/s] 31%|███ | 115081/371472 [9:09:01<19:11:45, 3.71it/s] 31%|███ | 115082/371472 [9:09:01<20:01:40, 3.56it/s] 31%|███ | 115083/371472 [9:09:01<19:52:32, 3.58it/s] 31%|███ | 115084/371472 [9:09:01<19:35:49, 3.63it/s] 31%|███ | 115085/371472 [9:09:02<19:13:51, 3.70it/s] 31%|███ | 115086/371472 [9:09:02<18:39:36, 3.82it/s] 31%|███ | 115087/371472 [9:09:02<18:09:24, 3.92it/s] 31%|███ | 115088/371472 [9:09:02<18:22:48, 3.87it/s] 31%|███ | 115089/371472 [9:09:03<19:15:48, 3.70it/s] 31%|███ | 115090/371472 [9:09:03<19:35:12, 3.64it/s] 31%|███ | 115091/371472 [9:09:03<18:56:25, 3.76it/s] 31%|███ | 115092/371472 [9:09:03<18:13:46, 3.91it/s] 31%|███ | 115093/371472 [9:09:04<20:07:47, 3.54it/s] 31%|███ | 115094/371472 [9:09:04<20:32:09, 3.47it/s] 31%|███ | 115095/371472 [9:09:04<19:43:51, 3.61it/s] 31%|███ | 115096/371472 [9:09:05<20:26:02, 3.49it/s] 31%|███ | 115097/371472 [9:09:05<20:39:34, 3.45it/s] 31%|███ | 115098/371472 [9:09:05<22:47:57, 3.12it/s] 31%|███ | 115099/371472 [9:09:06<21:12:06, 3.36it/s] 31%|███ | 115100/371472 [9:09:06<23:27:43, 3.04it/s] {'loss': 3.3511, 'learning_rate': 7.214710508737529e-07, 'epoch': 4.96} + 31%|███ | 115100/371472 [9:09:06<23:27:43, 3.04it/s] 31%|███ | 115101/371472 [9:09:06<22:23:22, 3.18it/s] 31%|███ | 115102/371472 [9:09:07<20:56:13, 3.40it/s] 31%|███ | 115103/371472 [9:09:07<20:34:16, 3.46it/s] 31%|███ | 115104/371472 [9:09:07<19:53:36, 3.58it/s] 31%|███ | 115105/371472 [9:09:07<19:51:41, 3.59it/s] 31%|███ | 115106/371472 [9:09:08<20:52:07, 3.41it/s] 31%|███ | 115107/371472 [9:09:08<21:09:12, 3.37it/s] 31%|███ | 115108/371472 [9:09:08<20:43:55, 3.43it/s] 31%|███ | 115109/371472 [9:09:09<20:32:33, 3.47it/s] 31%|███ | 115110/371472 [9:09:09<21:09:27, 3.37it/s] 31%|███ | 115111/371472 [9:09:09<20:35:50, 3.46it/s] 31%|███ | 115112/371472 [9:09:09<21:58:49, 3.24it/s] 31%|███ | 115113/371472 [9:09:10<21:27:47, 3.32it/s] 31%|███ | 115114/371472 [9:09:10<20:48:35, 3.42it/s] 31%|███ | 115115/371472 [9:09:10<20:00:04, 3.56it/s] 31%|███ | 115116/371472 [9:09:11<19:43:10, 3.61it/s] 31%|███ | 115117/371472 [9:09:11<19:45:00, 3.61it/s] 31%|███ | 115118/371472 [9:09:11<19:02:37, 3.74it/s] 31%|███ | 115119/371472 [9:09:11<19:08:51, 3.72it/s] 31%|███ | 115120/371472 [9:09:12<19:10:22, 3.71it/s] {'loss': 3.5421, 'learning_rate': 7.21422568898274e-07, 'epoch': 4.96} + 31%|███ | 115120/371472 [9:09:12<19:10:22, 3.71it/s] 31%|███ | 115121/371472 [9:09:12<19:13:06, 3.71it/s] 31%|███ | 115122/371472 [9:09:12<19:10:18, 3.71it/s] 31%|███ | 115123/371472 [9:09:12<18:52:59, 3.77it/s] 31%|███ | 115124/371472 [9:09:13<19:25:09, 3.67it/s] 31%|███ | 115125/371472 [9:09:13<19:54:04, 3.58it/s] 31%|███ | 115126/371472 [9:09:13<19:37:54, 3.63it/s] 31%|███ | 115127/371472 [9:09:14<19:30:15, 3.65it/s] 31%|███ | 115128/371472 [9:09:14<19:47:07, 3.60it/s] 31%|███ | 115129/371472 [9:09:14<19:19:42, 3.68it/s] 31%|███ | 115130/371472 [9:09:14<21:17:09, 3.35it/s] 31%|███ | 115131/371472 [9:09:15<21:55:34, 3.25it/s] 31%|███ | 115132/371472 [9:09:15<21:13:43, 3.35it/s] 31%|███ | 115133/371472 [9:09:15<20:54:58, 3.40it/s] 31%|███ | 115134/371472 [9:09:16<22:38:56, 3.14it/s] 31%|███ | 115135/371472 [9:09:16<21:32:02, 3.31it/s] 31%|███ | 115136/371472 [9:09:16<20:17:45, 3.51it/s] 31%|███ | 115137/371472 [9:09:17<20:48:36, 3.42it/s] 31%|███ | 115138/371472 [9:09:17<20:18:04, 3.51it/s] 31%|███ | 115139/371472 [9:09:17<20:09:42, 3.53it/s] 31%|███ | 115140/371472 [9:09:17<20:01:11, 3.56it/s] {'loss': 3.4279, 'learning_rate': 7.213740869227952e-07, 'epoch': 4.96} + 31%|███ | 115140/371472 [9:09:17<20:01:11, 3.56it/s] 31%|███ | 115141/371472 [9:09:18<20:14:34, 3.52it/s] 31%|███ | 115142/371472 [9:09:18<20:26:08, 3.48it/s] 31%|███ | 115143/371472 [9:09:18<20:06:29, 3.54it/s] 31%|███ | 115144/371472 [9:09:18<19:44:46, 3.61it/s] 31%|███ | 115145/371472 [9:09:19<19:56:17, 3.57it/s] 31%|███ | 115146/371472 [9:09:19<20:01:49, 3.55it/s] 31%|███ | 115147/371472 [9:09:19<20:00:10, 3.56it/s] 31%|███ | 115148/371472 [9:09:20<20:06:06, 3.54it/s] 31%|███ | 115149/371472 [9:09:20<20:02:17, 3.55it/s] 31%|███ | 115150/371472 [9:09:20<20:38:47, 3.45it/s] 31%|███ | 115151/371472 [9:09:20<20:42:59, 3.44it/s] 31%|███ | 115152/371472 [9:09:21<20:30:06, 3.47it/s] 31%|███ | 115153/371472 [9:09:21<21:20:31, 3.34it/s] 31%|███ | 115154/371472 [9:09:21<21:47:42, 3.27it/s] 31%|███ | 115155/371472 [9:09:22<21:15:43, 3.35it/s] 31%|███ | 115156/371472 [9:09:22<20:39:49, 3.45it/s] 31%|███ | 115157/371472 [9:09:22<20:10:10, 3.53it/s] 31%|███ | 115158/371472 [9:09:23<20:38:54, 3.45it/s] 31%|███ | 115159/371472 [9:09:23<19:31:55, 3.65it/s] 31%|███ | 115160/371472 [9:09:23<19:28:45, 3.66it/s] {'loss': 3.2522, 'learning_rate': 7.213256049473163e-07, 'epoch': 4.96} + 31%|███ | 115160/371472 [9:09:23<19:28:45, 3.66it/s] 31%|███ | 115161/371472 [9:09:23<20:37:10, 3.45it/s] 31%|███ | 115162/371472 [9:09:24<20:12:46, 3.52it/s] 31%|███ | 115163/371472 [9:09:24<21:22:32, 3.33it/s] 31%|███ | 115164/371472 [9:09:24<20:31:53, 3.47it/s] 31%|███ | 115165/371472 [9:09:24<19:54:32, 3.58it/s] 31%|███ | 115166/371472 [9:09:25<19:53:41, 3.58it/s] 31%|███ | 115167/371472 [9:09:25<20:57:51, 3.40it/s] 31%|███ | 115168/371472 [9:09:25<20:43:31, 3.44it/s] 31%|███ | 115169/371472 [9:09:26<19:54:06, 3.58it/s] 31%|███ | 115170/371472 [9:09:26<19:24:16, 3.67it/s] 31%|███ | 115171/371472 [9:09:26<19:36:05, 3.63it/s] 31%|███ | 115172/371472 [9:09:26<19:46:54, 3.60it/s] 31%|███ | 115173/371472 [9:09:27<19:33:13, 3.64it/s] 31%|███ | 115174/371472 [9:09:27<19:08:02, 3.72it/s] 31%|███ | 115175/371472 [9:09:27<20:28:05, 3.48it/s] 31%|███ | 115176/371472 [9:09:28<19:55:24, 3.57it/s] 31%|███ | 115177/371472 [9:09:28<20:05:24, 3.54it/s] 31%|███ | 115178/371472 [9:09:28<19:51:41, 3.58it/s] 31%|███ | 115179/371472 [9:09:28<19:44:50, 3.61it/s] 31%|███ | 115180/371472 [9:09:29<20:37:36, 3.45it/s] {'loss': 3.4242, 'learning_rate': 7.212771229718373e-07, 'epoch': 4.96} + 31%|███ | 115180/371472 [9:09:29<20:37:36, 3.45it/s] 31%|███ | 115181/371472 [9:09:29<23:54:19, 2.98it/s] 31%|███ | 115182/371472 [9:09:29<23:26:38, 3.04it/s] 31%|███ | 115183/371472 [9:09:30<22:28:30, 3.17it/s] 31%|███ | 115184/371472 [9:09:30<21:42:42, 3.28it/s] 31%|███ | 115185/371472 [9:09:30<20:40:32, 3.44it/s] 31%|███ | 115186/371472 [9:09:31<20:13:15, 3.52it/s] 31%|███ | 115187/371472 [9:09:31<20:18:28, 3.51it/s] 31%|███ | 115188/371472 [9:09:31<20:18:59, 3.50it/s] 31%|███ | 115189/371472 [9:09:31<20:02:27, 3.55it/s] 31%|███ | 115190/371472 [9:09:32<20:11:37, 3.53it/s] 31%|███ | 115191/371472 [9:09:32<19:46:08, 3.60it/s] 31%|███ | 115192/371472 [9:09:32<20:19:11, 3.50it/s] 31%|███ | 115193/371472 [9:09:33<20:02:13, 3.55it/s] 31%|███ | 115194/371472 [9:09:33<20:16:14, 3.51it/s] 31%|███ | 115195/371472 [9:09:33<20:31:48, 3.47it/s] 31%|███ | 115196/371472 [9:09:33<21:01:51, 3.38it/s] 31%|███ | 115197/371472 [9:09:34<23:16:49, 3.06it/s] 31%|███ | 115198/371472 [9:09:34<21:39:48, 3.29it/s] 31%|███ | 115199/371472 [9:09:34<20:18:57, 3.50it/s] 31%|███ | 115200/371472 [9:09:35<20:18:06, 3.51it/s] {'loss': 3.3116, 'learning_rate': 7.212286409963584e-07, 'epoch': 4.96} + 31%|███ | 115200/371472 [9:09:35<20:18:06, 3.51it/s] 31%|███ | 115201/371472 [9:09:35<20:43:46, 3.43it/s] 31%|███ | 115202/371472 [9:09:35<20:10:38, 3.53it/s] 31%|███ | 115203/371472 [9:09:35<20:05:25, 3.54it/s] 31%|███ | 115204/371472 [9:09:36<20:03:38, 3.55it/s] 31%|███ | 115205/371472 [9:09:36<19:21:35, 3.68it/s] 31%|███ | 115206/371472 [9:09:36<19:13:32, 3.70it/s] 31%|███ | 115207/371472 [9:09:37<19:42:03, 3.61it/s] 31%|███ | 115208/371472 [9:09:37<19:46:13, 3.60it/s] 31%|███ | 115209/371472 [9:09:37<19:59:41, 3.56it/s] 31%|███ | 115210/371472 [9:09:37<20:31:54, 3.47it/s] 31%|███ | 115211/371472 [9:09:38<19:57:19, 3.57it/s] 31%|███ | 115212/371472 [9:09:38<18:54:49, 3.76it/s] 31%|███ | 115213/371472 [9:09:38<21:42:48, 3.28it/s] 31%|███ | 115214/371472 [9:09:39<21:05:18, 3.38it/s] 31%|███ | 115215/371472 [9:09:39<20:28:12, 3.48it/s] 31%|███ | 115216/371472 [9:09:39<19:55:32, 3.57it/s] 31%|███ | 115217/371472 [9:09:39<19:20:07, 3.68it/s] 31%|███ | 115218/371472 [9:09:40<18:44:15, 3.80it/s] 31%|███ | 115219/371472 [9:09:40<18:40:58, 3.81it/s] 31%|███ | 115220/371472 [9:09:40<18:53:21, 3.77it/s] {'loss': 3.3607, 'learning_rate': 7.211801590208796e-07, 'epoch': 4.96} + 31%|███ | 115220/371472 [9:09:40<18:53:21, 3.77it/s] 31%|███ | 115221/371472 [9:09:40<18:37:39, 3.82it/s] 31%|███ | 115222/371472 [9:09:41<17:57:31, 3.96it/s] 31%|███ | 115223/371472 [9:09:41<18:25:55, 3.86it/s] 31%|███ | 115224/371472 [9:09:41<19:45:14, 3.60it/s] 31%|███ | 115225/371472 [9:09:42<19:44:31, 3.61it/s] 31%|███ | 115226/371472 [9:09:42<19:34:08, 3.64it/s] 31%|███ | 115227/371472 [9:09:42<19:39:38, 3.62it/s] 31%|███ | 115228/371472 [9:09:42<19:16:44, 3.69it/s] 31%|███ | 115229/371472 [9:09:43<19:05:26, 3.73it/s] 31%|███ | 115230/371472 [9:09:43<18:56:30, 3.76it/s] 31%|███ | 115231/371472 [9:09:43<19:11:53, 3.71it/s] 31%|███ | 115232/371472 [9:09:43<20:02:30, 3.55it/s] 31%|███ | 115233/371472 [9:09:44<20:21:46, 3.50it/s] 31%|███ | 115234/371472 [9:09:44<19:16:14, 3.69it/s] 31%|███ | 115235/371472 [9:09:44<18:40:55, 3.81it/s] 31%|███ | 115236/371472 [9:09:44<19:04:19, 3.73it/s] 31%|███ | 115237/371472 [9:09:45<21:37:28, 3.29it/s] 31%|███ | 115238/371472 [9:09:45<21:53:19, 3.25it/s] 31%|███ | 115239/371472 [9:09:46<22:11:17, 3.21it/s] 31%|███ | 115240/371472 [9:09:46<21:01:57, 3.38it/s] {'loss': 3.3513, 'learning_rate': 7.211316770454006e-07, 'epoch': 4.96} + 31%|███ | 115240/371472 [9:09:46<21:01:57, 3.38it/s] 31%|███ | 115241/371472 [9:09:46<20:43:45, 3.43it/s] 31%|███ | 115242/371472 [9:09:46<20:14:44, 3.52it/s] 31%|███ | 115243/371472 [9:09:47<20:15:07, 3.51it/s] 31%|███ | 115244/371472 [9:09:47<20:13:35, 3.52it/s] 31%|███ | 115245/371472 [9:09:47<19:55:48, 3.57it/s] 31%|███ | 115246/371472 [9:09:47<20:11:38, 3.52it/s] 31%|███ | 115247/371472 [9:09:48<19:52:01, 3.58it/s] 31%|███ | 115248/371472 [9:09:48<20:34:55, 3.46it/s] 31%|███ | 115249/371472 [9:09:48<20:33:35, 3.46it/s] 31%|███ | 115250/371472 [9:09:49<19:58:11, 3.56it/s] 31%|███ | 115251/371472 [9:09:49<19:49:34, 3.59it/s] 31%|███ | 115252/371472 [9:09:49<20:31:05, 3.47it/s] 31%|███ | 115253/371472 [9:09:50<21:46:50, 3.27it/s] 31%|███ | 115254/371472 [9:09:50<21:24:02, 3.33it/s] 31%|███ | 115255/371472 [9:09:50<21:55:03, 3.25it/s] 31%|███ | 115256/371472 [9:09:50<21:41:33, 3.28it/s] 31%|███ | 115257/371472 [9:09:51<20:56:33, 3.40it/s] 31%|███ | 115258/371472 [9:09:51<19:49:30, 3.59it/s] 31%|███ | 115259/371472 [9:09:51<20:55:15, 3.40it/s] 31%|███ | 115260/371472 [9:09:52<20:38:35, 3.45it/s] {'loss': 3.2433, 'learning_rate': 7.210831950699218e-07, 'epoch': 4.96} + 31%|███ | 115260/371472 [9:09:52<20:38:35, 3.45it/s] 31%|███ | 115261/371472 [9:09:52<20:23:33, 3.49it/s] 31%|███ | 115262/371472 [9:09:52<21:19:07, 3.34it/s] 31%|███ | 115263/371472 [9:09:52<21:34:20, 3.30it/s] 31%|███ | 115264/371472 [9:09:53<20:13:31, 3.52it/s] 31%|███ | 115265/371472 [9:09:53<20:11:47, 3.52it/s] 31%|███ | 115266/371472 [9:09:53<21:33:25, 3.30it/s] 31%|███ | 115267/371472 [9:09:54<20:38:44, 3.45it/s] 31%|███ | 115268/371472 [9:09:54<21:11:44, 3.36it/s] 31%|███ | 115269/371472 [9:09:54<20:15:40, 3.51it/s] 31%|███ | 115270/371472 [9:09:54<20:06:34, 3.54it/s] 31%|███ | 115271/371472 [9:09:55<21:06:03, 3.37it/s] 31%|███ | 115272/371472 [9:09:55<21:53:18, 3.25it/s] 31%|███ | 115273/371472 [9:09:55<20:31:47, 3.47it/s] 31%|███ | 115274/371472 [9:09:56<20:48:10, 3.42it/s] 31%|███ | 115275/371472 [9:09:56<20:11:52, 3.52it/s] 31%|███ | 115276/371472 [9:09:56<20:00:51, 3.56it/s] 31%|███ | 115277/371472 [9:09:56<19:39:39, 3.62it/s] 31%|███ | 115278/371472 [9:09:57<19:24:46, 3.67it/s] 31%|███ | 115279/371472 [9:09:57<19:18:44, 3.68it/s] 31%|███ | 115280/371472 [9:09:57<18:45:46, 3.79it/s] {'loss': 3.2373, 'learning_rate': 7.210347130944428e-07, 'epoch': 4.97} + 31%|███ | 115280/371472 [9:09:57<18:45:46, 3.79it/s] 31%|███ | 115281/371472 [9:09:58<20:05:14, 3.54it/s] 31%|███ | 115282/371472 [9:09:58<19:52:08, 3.58it/s] 31%|███ | 115283/371472 [9:09:58<18:59:57, 3.75it/s] 31%|███ | 115284/371472 [9:09:58<19:03:39, 3.73it/s] 31%|███ | 115285/371472 [9:09:59<20:03:46, 3.55it/s] 31%|███ | 115286/371472 [9:09:59<20:53:43, 3.41it/s] 31%|███ | 115287/371472 [9:09:59<20:37:24, 3.45it/s] 31%|███ | 115288/371472 [9:10:00<19:54:07, 3.58it/s] 31%|███ | 115289/371472 [9:10:00<19:57:59, 3.56it/s] 31%|███ | 115290/371472 [9:10:00<19:32:24, 3.64it/s] 31%|███ | 115291/371472 [9:10:00<19:22:49, 3.67it/s] 31%|███ | 115292/371472 [9:10:01<20:45:47, 3.43it/s] 31%|███ | 115293/371472 [9:10:01<20:28:13, 3.48it/s] 31%|███ | 115294/371472 [9:10:01<20:12:19, 3.52it/s] 31%|███ | 115295/371472 [9:10:02<21:17:56, 3.34it/s] 31%|███ | 115296/371472 [9:10:02<21:44:55, 3.27it/s] 31%|███ | 115297/371472 [9:10:02<21:09:57, 3.36it/s] 31%|███ | 115298/371472 [9:10:02<21:12:06, 3.36it/s] 31%|███ | 115299/371472 [9:10:03<21:26:57, 3.32it/s] 31%|███ | 115300/371472 [9:10:03<22:16:28, 3.19it/s] {'loss': 3.5493, 'learning_rate': 7.20986231118964e-07, 'epoch': 4.97} + 31%|███ | 115300/371472 [9:10:03<22:16:28, 3.19it/s] 31%|███ | 115301/371472 [9:10:03<22:46:26, 3.12it/s] 31%|███ | 115302/371472 [9:10:04<21:36:53, 3.29it/s] 31%|███ | 115303/371472 [9:10:04<22:52:53, 3.11it/s] 31%|███ | 115304/371472 [9:10:04<24:11:49, 2.94it/s] 31%|███ | 115305/371472 [9:10:05<22:28:15, 3.17it/s] 31%|███ | 115306/371472 [9:10:05<21:25:02, 3.32it/s] 31%|███ | 115307/371472 [9:10:05<21:54:08, 3.25it/s] 31%|███ | 115308/371472 [9:10:06<20:16:17, 3.51it/s] 31%|███ | 115309/371472 [9:10:06<19:59:55, 3.56it/s] 31%|███ | 115310/371472 [9:10:06<19:26:37, 3.66it/s] 31%|███ | 115311/371472 [9:10:06<20:23:38, 3.49it/s] 31%|███ | 115312/371472 [9:10:07<20:08:42, 3.53it/s] 31%|███ | 115313/371472 [9:10:07<21:59:01, 3.24it/s] 31%|███ | 115314/371472 [9:10:07<20:53:24, 3.41it/s] 31%|███ | 115315/371472 [9:10:08<20:50:22, 3.41it/s] 31%|███ | 115316/371472 [9:10:08<22:46:37, 3.12it/s] 31%|███ | 115317/371472 [9:10:08<22:25:16, 3.17it/s] 31%|███ | 115318/371472 [9:10:09<21:36:02, 3.29it/s] 31%|███ | 115319/371472 [9:10:09<20:07:58, 3.53it/s] 31%|███ | 115320/371472 [9:10:09<19:25:38, 3.66it/s] {'loss': 3.2338, 'learning_rate': 7.20937749143485e-07, 'epoch': 4.97} + 31%|███ | 115320/371472 [9:10:09<19:25:38, 3.66it/s] 31%|███ | 115321/371472 [9:10:09<19:03:17, 3.73it/s] 31%|███ | 115322/371472 [9:10:10<18:42:58, 3.80it/s] 31%|███ | 115323/371472 [9:10:10<19:34:27, 3.63it/s] 31%|███ | 115324/371472 [9:10:10<20:10:40, 3.53it/s] 31%|███ | 115325/371472 [9:10:10<19:12:47, 3.70it/s] 31%|███ | 115326/371472 [9:10:11<18:50:40, 3.78it/s] 31%|███ | 115327/371472 [9:10:11<19:21:00, 3.68it/s] 31%|███ | 115328/371472 [9:10:11<19:20:51, 3.68it/s] 31%|███ | 115329/371472 [9:10:11<20:21:29, 3.49it/s] 31%|███ | 115330/371472 [9:10:12<20:24:25, 3.49it/s] 31%|███ | 115331/371472 [9:10:12<21:22:46, 3.33it/s] 31%|███ | 115332/371472 [9:10:12<20:19:24, 3.50it/s] 31%|███ | 115333/371472 [9:10:13<19:09:21, 3.71it/s] 31%|███ | 115334/371472 [9:10:13<19:43:56, 3.61it/s] 31%|███ | 115335/371472 [9:10:13<19:55:06, 3.57it/s] 31%|███ | 115336/371472 [9:10:14<23:52:28, 2.98it/s] 31%|███ | 115337/371472 [9:10:14<22:09:49, 3.21it/s] 31%|███ | 115338/371472 [9:10:14<22:31:17, 3.16it/s] 31%|███ | 115339/371472 [9:10:15<22:29:40, 3.16it/s] 31%|███ | 115340/371472 [9:10:15<21:48:48, 3.26it/s] {'loss': 3.4842, 'learning_rate': 7.208892671680061e-07, 'epoch': 4.97} + 31%|███ | 115340/371472 [9:10:15<21:48:48, 3.26it/s] 31%|███ | 115341/371472 [9:10:15<21:39:44, 3.28it/s] 31%|███ | 115342/371472 [9:10:15<21:06:15, 3.37it/s] 31%|███ | 115343/371472 [9:10:16<22:26:12, 3.17it/s] 31%|███ | 115344/371472 [9:10:16<20:57:26, 3.39it/s] 31%|███ | 115345/371472 [9:10:16<19:57:20, 3.57it/s] 31%|███ | 115346/371472 [9:10:16<18:56:12, 3.76it/s] 31%|███ | 115347/371472 [9:10:17<20:08:07, 3.53it/s] 31%|███ | 115348/371472 [9:10:17<19:56:39, 3.57it/s] 31%|███ | 115349/371472 [9:10:17<20:04:59, 3.54it/s] 31%|███ | 115350/371472 [9:10:18<20:05:24, 3.54it/s] 31%|███ | 115351/371472 [9:10:18<19:16:28, 3.69it/s] 31%|███ | 115352/371472 [9:10:18<19:31:40, 3.64it/s] 31%|███ | 115353/371472 [9:10:18<19:36:54, 3.63it/s] 31%|███ | 115354/371472 [9:10:19<21:24:41, 3.32it/s] 31%|███ | 115355/371472 [9:10:19<20:27:35, 3.48it/s] 31%|███ | 115356/371472 [9:10:19<19:57:26, 3.56it/s] 31%|███ | 115357/371472 [9:10:20<19:46:24, 3.60it/s] 31%|███ | 115358/371472 [9:10:20<19:40:32, 3.62it/s] 31%|███ | 115359/371472 [9:10:20<18:58:18, 3.75it/s] 31%|███ | 115360/371472 [9:10:20<18:52:05, 3.77it/s] {'loss': 3.2719, 'learning_rate': 7.208407851925273e-07, 'epoch': 4.97} + 31%|███ | 115360/371472 [9:10:20<18:52:05, 3.77it/s] 31%|███ | 115361/371472 [9:10:21<20:04:48, 3.54it/s] 31%|███ | 115362/371472 [9:10:21<19:26:10, 3.66it/s] 31%|███ | 115363/371472 [9:10:21<19:26:17, 3.66it/s] 31%|███ | 115364/371472 [9:10:22<20:41:02, 3.44it/s] 31%|███ | 115365/371472 [9:10:22<19:34:21, 3.63it/s] 31%|███ | 115366/371472 [9:10:22<19:36:53, 3.63it/s] 31%|███ | 115367/371472 [9:10:22<20:20:31, 3.50it/s] 31%|███ | 115368/371472 [9:10:23<21:41:05, 3.28it/s] 31%|███ | 115369/371472 [9:10:23<20:55:36, 3.40it/s] 31%|███ | 115370/371472 [9:10:23<20:08:28, 3.53it/s] 31%|███ | 115371/371472 [9:10:24<20:39:08, 3.44it/s] 31%|███ | 115372/371472 [9:10:24<19:33:53, 3.64it/s] 31%|███ | 115373/371472 [9:10:24<19:03:43, 3.73it/s] 31%|███ | 115374/371472 [9:10:24<19:27:43, 3.66it/s] 31%|███ | 115375/371472 [9:10:25<19:18:39, 3.68it/s] 31%|███ | 115376/371472 [9:10:25<19:32:13, 3.64it/s] 31%|███ | 115377/371472 [9:10:25<19:05:52, 3.72it/s] 31%|███ | 115378/371472 [9:10:26<20:48:38, 3.42it/s] 31%|███ | 115379/371472 [9:10:26<20:54:57, 3.40it/s] 31%|███ | 115380/371472 [9:10:26<22:36:59, 3.15it/s] {'loss': 3.3858, 'learning_rate': 7.207923032170485e-07, 'epoch': 4.97} + 31%|███ | 115380/371472 [9:10:26<22:36:59, 3.15it/s] 31%|███ | 115381/371472 [9:10:26<22:07:54, 3.21it/s] 31%|███ | 115382/371472 [9:10:27<21:48:27, 3.26it/s] 31%|███ | 115383/371472 [9:10:27<21:10:41, 3.36it/s] 31%|███ | 115384/371472 [9:10:27<20:22:03, 3.49it/s] 31%|███ | 115385/371472 [9:10:28<20:10:38, 3.53it/s] 31%|███ | 115386/371472 [9:10:28<20:12:27, 3.52it/s] 31%|███ | 115387/371472 [9:10:28<20:10:21, 3.53it/s] 31%|███ | 115388/371472 [9:10:29<21:29:10, 3.31it/s] 31%|███ | 115389/371472 [9:10:29<22:18:07, 3.19it/s] 31%|███ | 115390/371472 [9:10:29<21:15:11, 3.35it/s] 31%|███ | 115391/371472 [9:10:29<21:33:55, 3.30it/s] 31%|███ | 115392/371472 [9:10:30<24:02:31, 2.96it/s] 31%|███ | 115393/371472 [9:10:30<22:41:46, 3.13it/s] 31%|███ | 115394/371472 [9:10:30<21:07:48, 3.37it/s] 31%|███ | 115395/371472 [9:10:31<21:53:03, 3.25it/s] 31%|███ | 115396/371472 [9:10:31<21:10:43, 3.36it/s] 31%|███ | 115397/371472 [9:10:31<20:13:54, 3.52it/s] 31%|███ | 115398/371472 [9:10:31<19:16:22, 3.69it/s] 31%|███ | 115399/371472 [9:10:32<19:42:58, 3.61it/s] 31%|███ | 115400/371472 [9:10:32<19:46:56, 3.60it/s] {'loss': 3.2511, 'learning_rate': 7.207438212415695e-07, 'epoch': 4.97} + 31%|███ | 115400/371472 [9:10:32<19:46:56, 3.60it/s] 31%|███ | 115401/371472 [9:10:32<19:53:09, 3.58it/s] 31%|███ | 115402/371472 [9:10:33<19:16:27, 3.69it/s] 31%|███ | 115403/371472 [9:10:33<19:04:13, 3.73it/s] 31%|███ | 115404/371472 [9:10:33<19:08:55, 3.71it/s] 31%|███ | 115405/371472 [9:10:33<18:49:11, 3.78it/s] 31%|███ | 115406/371472 [9:10:34<18:22:31, 3.87it/s] 31%|███ | 115407/371472 [9:10:34<18:28:08, 3.85it/s] 31%|███ | 115408/371472 [9:10:34<19:39:20, 3.62it/s] 31%|███ | 115409/371472 [9:10:34<20:05:00, 3.54it/s] 31%|███ | 115410/371472 [9:10:35<21:51:21, 3.25it/s] 31%|███ | 115411/371472 [9:10:35<22:19:48, 3.19it/s] 31%|███ | 115412/371472 [9:10:35<20:57:57, 3.39it/s] 31%|███ | 115413/371472 [9:10:36<20:26:19, 3.48it/s] 31%|███ | 115414/371472 [9:10:36<21:18:56, 3.34it/s] 31%|███ | 115415/371472 [9:10:36<20:24:53, 3.48it/s] 31%|███ | 115416/371472 [9:10:37<19:24:27, 3.66it/s] 31%|███ | 115417/371472 [9:10:37<18:51:25, 3.77it/s] 31%|███ | 115418/371472 [9:10:37<20:47:51, 3.42it/s] 31%|███ | 115419/371472 [9:10:37<19:46:07, 3.60it/s] 31%|███ | 115420/371472 [9:10:38<20:43:43, 3.43it/s] {'loss': 3.2809, 'learning_rate': 7.206953392660905e-07, 'epoch': 4.97} + 31%|███ | 115420/371472 [9:10:38<20:43:43, 3.43it/s] 31%|███ | 115421/371472 [9:10:38<19:45:59, 3.60it/s] 31%|███ | 115422/371472 [9:10:38<19:27:14, 3.66it/s] 31%|███ | 115423/371472 [9:10:38<19:57:03, 3.56it/s] 31%|███ | 115424/371472 [9:10:39<20:29:00, 3.47it/s] 31%|███ | 115425/371472 [9:10:39<19:38:56, 3.62it/s] 31%|███ | 115426/371472 [9:10:39<19:30:48, 3.64it/s] 31%|███ | 115427/371472 [9:10:40<19:38:53, 3.62it/s] 31%|███ | 115428/371472 [9:10:40<19:08:27, 3.72it/s] 31%|███ | 115429/371472 [9:10:40<18:36:11, 3.82it/s] 31%|███ | 115430/371472 [9:10:40<19:16:19, 3.69it/s] 31%|███ | 115431/371472 [9:10:41<19:30:21, 3.65it/s] 31%|███ | 115432/371472 [9:10:41<20:18:28, 3.50it/s] 31%|███ | 115433/371472 [9:10:41<20:33:56, 3.46it/s] 31%|███ | 115434/371472 [9:10:42<19:55:19, 3.57it/s] 31%|███ | 115435/371472 [9:10:42<19:43:48, 3.60it/s] 31%|███ | 115436/371472 [9:10:42<21:13:32, 3.35it/s] 31%|███ | 115437/371472 [9:10:42<20:15:35, 3.51it/s] 31%|███ | 115438/371472 [9:10:43<19:55:18, 3.57it/s] 31%|███ | 115439/371472 [9:10:43<20:39:30, 3.44it/s] 31%|███ | 115440/371472 [9:10:43<19:36:43, 3.63it/s] {'loss': 3.453, 'learning_rate': 7.206468572906117e-07, 'epoch': 4.97} + 31%|███ | 115440/371472 [9:10:43<19:36:43, 3.63it/s] 31%|███ | 115441/371472 [9:10:43<18:42:49, 3.80it/s] 31%|███ | 115442/371472 [9:10:44<18:29:58, 3.84it/s] 31%|███ | 115443/371472 [9:10:44<18:38:09, 3.82it/s] 31%|███ | 115444/371472 [9:10:44<18:16:40, 3.89it/s] 31%|███ | 115445/371472 [9:10:44<18:21:56, 3.87it/s] 31%|███ | 115446/371472 [9:10:45<19:13:29, 3.70it/s] 31%|███ | 115447/371472 [9:10:45<19:06:08, 3.72it/s] 31%|███ | 115448/371472 [9:10:45<19:26:00, 3.66it/s] 31%|███ | 115449/371472 [9:10:46<19:08:32, 3.72it/s] 31%|███ | 115450/371472 [9:10:46<19:26:27, 3.66it/s] 31%|███ | 115451/371472 [9:10:46<18:51:31, 3.77it/s] 31%|███ | 115452/371472 [9:10:46<19:59:54, 3.56it/s] 31%|███ | 115453/371472 [9:10:47<19:53:01, 3.58it/s] 31%|███ | 115454/371472 [9:10:47<19:54:18, 3.57it/s] 31%|███ | 115455/371472 [9:10:47<19:48:49, 3.59it/s] 31%|███ | 115456/371472 [9:10:48<19:26:59, 3.66it/s] 31%|███ | 115457/371472 [9:10:48<19:46:25, 3.60it/s] 31%|███ | 115458/371472 [9:10:48<19:48:45, 3.59it/s] 31%|███ | 115459/371472 [9:10:48<20:17:03, 3.51it/s] 31%|███ | 115460/371472 [9:10:49<19:48:58, 3.59it/s] {'loss': 3.3949, 'learning_rate': 7.205983753151329e-07, 'epoch': 4.97} + 31%|███ | 115460/371472 [9:10:49<19:48:58, 3.59it/s] 31%|███ | 115461/371472 [9:10:49<19:23:17, 3.67it/s] 31%|███ | 115462/371472 [9:10:49<20:35:13, 3.45it/s] 31%|███ | 115463/371472 [9:10:50<20:27:12, 3.48it/s] 31%|███ | 115464/371472 [9:10:50<19:49:27, 3.59it/s] 31%|███ | 115465/371472 [9:10:50<21:29:38, 3.31it/s] 31%|███ | 115466/371472 [9:10:50<21:06:12, 3.37it/s] 31%|███ | 115467/371472 [9:10:51<21:13:11, 3.35it/s] 31%|███ | 115468/371472 [9:10:51<22:12:25, 3.20it/s] 31%|███ | 115469/371472 [9:10:51<21:23:49, 3.32it/s] 31%|███ | 115470/371472 [9:10:52<20:43:39, 3.43it/s] 31%|███ | 115471/371472 [9:10:52<19:50:29, 3.58it/s] 31%|███ | 115472/371472 [9:10:52<19:11:16, 3.71it/s] 31%|███ | 115473/371472 [9:10:52<18:58:12, 3.75it/s] 31%|███ | 115474/371472 [9:10:53<19:01:46, 3.74it/s] 31%|███ | 115475/371472 [9:10:53<19:36:26, 3.63it/s] 31%|███ | 115476/371472 [9:10:53<19:01:17, 3.74it/s] 31%|███ | 115477/371472 [9:10:53<18:51:48, 3.77it/s] 31%|███ | 115478/371472 [9:10:54<20:36:20, 3.45it/s] 31%|███ | 115479/371472 [9:10:54<19:59:10, 3.56it/s] 31%|███ | 115480/371472 [9:10:54<20:44:52, 3.43it/s] {'loss': 3.473, 'learning_rate': 7.205498933396539e-07, 'epoch': 4.97} + 31%|███ | 115480/371472 [9:10:54<20:44:52, 3.43it/s] 31%|███ | 115481/371472 [9:10:55<19:55:06, 3.57it/s] 31%|███ | 115482/371472 [9:10:55<19:24:17, 3.66it/s] 31%|███ | 115483/371472 [9:10:55<19:30:35, 3.64it/s] 31%|███ | 115484/371472 [9:10:56<22:11:35, 3.20it/s] 31%|███ | 115485/371472 [9:10:56<21:16:40, 3.34it/s] 31%|███ | 115486/371472 [9:10:56<20:34:34, 3.46it/s] 31%|███ | 115487/371472 [9:10:56<20:08:32, 3.53it/s] 31%|███ | 115488/371472 [9:10:57<19:30:49, 3.64it/s] 31%|███ | 115489/371472 [9:10:57<19:01:43, 3.74it/s] 31%|███ | 115490/371472 [9:10:57<19:05:02, 3.73it/s] 31%|███ | 115491/371472 [9:10:57<19:15:11, 3.69it/s] 31%|███ | 115492/371472 [9:10:58<19:11:11, 3.71it/s] 31%|███ | 115493/371472 [9:10:58<18:39:01, 3.81it/s] 31%|███ | 115494/371472 [9:10:58<19:07:06, 3.72it/s] 31%|███ | 115495/371472 [9:10:59<21:07:33, 3.37it/s] 31%|███ | 115496/371472 [9:10:59<20:13:05, 3.52it/s] 31%|███ | 115497/371472 [9:10:59<20:24:01, 3.49it/s] 31%|███ | 115498/371472 [9:11:00<25:35:37, 2.78it/s] 31%|███ | 115499/371472 [9:11:00<24:20:17, 2.92it/s] 31%|███ | 115500/371472 [9:11:00<26:25:23, 2.69it/s] {'loss': 3.3837, 'learning_rate': 7.20501411364175e-07, 'epoch': 4.97} + 31%|███ | 115500/371472 [9:11:00<26:25:23, 2.69it/s] 31%|███ | 115501/371472 [9:11:01<23:43:26, 3.00it/s] 31%|███ | 115502/371472 [9:11:01<22:15:00, 3.20it/s] 31%|███ | 115503/371472 [9:11:01<24:34:19, 2.89it/s] 31%|███ | 115504/371472 [9:11:02<23:21:09, 3.04it/s] 31%|███ | 115505/371472 [9:11:02<23:58:12, 2.97it/s] 31%|███ | 115506/371472 [9:11:02<23:23:08, 3.04it/s] 31%|███ | 115507/371472 [9:11:03<21:23:15, 3.32it/s] 31%|███ | 115508/371472 [9:11:03<20:19:34, 3.50it/s] 31%|███ | 115509/371472 [9:11:03<20:12:20, 3.52it/s] 31%|███ | 115510/371472 [9:11:03<19:51:51, 3.58it/s] 31%|███ | 115511/371472 [9:11:04<19:48:08, 3.59it/s] 31%|███ | 115512/371472 [9:11:04<19:20:05, 3.68it/s] 31%|███ | 115513/371472 [9:11:04<18:49:57, 3.78it/s] 31%|███ | 115514/371472 [9:11:04<19:04:44, 3.73it/s] 31%|███ | 115515/371472 [9:11:05<18:52:11, 3.77it/s] 31%|███ | 115516/371472 [9:11:05<18:23:39, 3.87it/s] 31%|███ | 115517/371472 [9:11:05<18:44:17, 3.79it/s] 31%|███ | 115518/371472 [9:11:05<18:52:42, 3.77it/s] 31%|███ | 115519/371472 [9:11:06<19:04:49, 3.73it/s] 31%|███ | 115520/371472 [9:11:06<20:42:56, 3.43it/s] {'loss': 3.153, 'learning_rate': 7.204529293886962e-07, 'epoch': 4.98} + 31%|███ | 115520/371472 [9:11:06<20:42:56, 3.43it/s] 31%|███ | 115521/371472 [9:11:06<20:51:20, 3.41it/s] 31%|███ | 115522/371472 [9:11:07<19:41:15, 3.61it/s] 31%|███ | 115523/371472 [9:11:07<19:40:10, 3.61it/s] 31%|███ | 115524/371472 [9:11:07<22:45:13, 3.12it/s] 31%|███ | 115525/371472 [9:11:08<21:48:44, 3.26it/s] 31%|███ | 115526/371472 [9:11:08<21:28:49, 3.31it/s] 31%|███ | 115527/371472 [9:11:08<20:44:06, 3.43it/s] 31%|███ | 115528/371472 [9:11:08<20:09:03, 3.53it/s] 31%|███ | 115529/371472 [9:11:09<21:22:36, 3.33it/s] 31%|███ | 115530/371472 [9:11:09<20:54:16, 3.40it/s] 31%|███ | 115531/371472 [9:11:09<22:23:55, 3.17it/s] 31%|███ | 115532/371472 [9:11:10<23:27:22, 3.03it/s] 31%|███ | 115533/371472 [9:11:10<21:50:39, 3.25it/s] 31%|███ | 115534/371472 [9:11:10<22:01:51, 3.23it/s] 31%|███ | 115535/371472 [9:11:11<23:17:45, 3.05it/s] 31%|███ | 115536/371472 [9:11:11<21:39:24, 3.28it/s] 31%|███ | 115537/371472 [9:11:11<22:04:40, 3.22it/s] 31%|███ | 115538/371472 [9:11:12<21:32:50, 3.30it/s] 31%|███ | 115539/371472 [9:11:12<20:44:19, 3.43it/s] 31%|███ | 115540/371472 [9:11:12<20:53:06, 3.40it/s] {'loss': 3.3058, 'learning_rate': 7.204044474132172e-07, 'epoch': 4.98} + 31%|███ | 115540/371472 [9:11:12<20:53:06, 3.40it/s] 31%|███ | 115541/371472 [9:11:12<21:11:14, 3.36it/s] 31%|███ | 115542/371472 [9:11:13<20:37:35, 3.45it/s] 31%|███ | 115543/371472 [9:11:13<19:34:02, 3.63it/s] 31%|███ | 115544/371472 [9:11:13<18:41:24, 3.80it/s] 31%|███ | 115545/371472 [9:11:13<19:07:22, 3.72it/s] 31%|███ | 115546/371472 [9:11:14<19:07:37, 3.72it/s] 31%|███ | 115547/371472 [9:11:14<20:26:22, 3.48it/s] 31%|███ | 115548/371472 [9:11:14<20:20:05, 3.50it/s] 31%|███ | 115549/371472 [9:11:15<19:33:49, 3.63it/s] 31%|███ | 115550/371472 [9:11:15<19:41:50, 3.61it/s] 31%|███ | 115551/371472 [9:11:15<21:52:01, 3.25it/s] 31%|███ | 115552/371472 [9:11:16<21:29:16, 3.31it/s] 31%|███ | 115553/371472 [9:11:16<20:12:42, 3.52it/s] 31%|███ | 115554/371472 [9:11:16<19:06:40, 3.72it/s] 31%|███ | 115555/371472 [9:11:16<18:23:13, 3.87it/s] 31%|███ | 115556/371472 [9:11:17<19:10:36, 3.71it/s] 31%|███ | 115557/371472 [9:11:17<18:37:50, 3.82it/s] 31%|███ | 115558/371472 [9:11:17<19:12:47, 3.70it/s] 31%|███ | 115559/371472 [9:11:17<19:22:20, 3.67it/s] 31%|███ | 115560/371472 [9:11:18<19:17:05, 3.69it/s] {'loss': 3.1242, 'learning_rate': 7.203559654377383e-07, 'epoch': 4.98} + 31%|███ | 115560/371472 [9:11:18<19:17:05, 3.69it/s] 31%|███ | 115561/371472 [9:11:18<19:07:22, 3.72it/s] 31%|███ | 115562/371472 [9:11:18<19:29:58, 3.65it/s] 31%|███ | 115563/371472 [9:11:18<19:25:51, 3.66it/s] 31%|███ | 115564/371472 [9:11:19<19:33:29, 3.63it/s] 31%|███ | 115565/371472 [9:11:19<19:13:18, 3.70it/s] 31%|███ | 115566/371472 [9:11:19<20:27:58, 3.47it/s] 31%|███ | 115567/371472 [9:11:20<20:59:31, 3.39it/s] 31%|███ | 115568/371472 [9:11:20<20:02:58, 3.55it/s] 31%|███ | 115569/371472 [9:11:20<19:48:44, 3.59it/s] 31%|███ | 115570/371472 [9:11:20<20:25:26, 3.48it/s] 31%|███ | 115571/371472 [9:11:21<20:22:12, 3.49it/s] 31%|███ | 115572/371472 [9:11:21<21:46:53, 3.26it/s] 31%|███ | 115573/371472 [9:11:21<21:59:12, 3.23it/s] 31%|███ | 115574/371472 [9:11:22<21:39:18, 3.28it/s] 31%|███ | 115575/371472 [9:11:22<20:28:28, 3.47it/s] 31%|███ | 115576/371472 [9:11:22<19:49:39, 3.58it/s] 31%|███ | 115577/371472 [9:11:22<19:51:48, 3.58it/s] 31%|███ | 115578/371472 [9:11:23<20:15:11, 3.51it/s] 31%|███ | 115579/371472 [9:11:23<21:30:15, 3.31it/s] 31%|███ | 115580/371472 [9:11:23<20:34:46, 3.45it/s] {'loss': 3.2933, 'learning_rate': 7.203074834622594e-07, 'epoch': 4.98} + 31%|███ | 115580/371472 [9:11:23<20:34:46, 3.45it/s] 31%|███ | 115581/371472 [9:11:24<20:02:56, 3.55it/s] 31%|███ | 115582/371472 [9:11:24<21:03:13, 3.38it/s] 31%|███ | 115583/371472 [9:11:24<20:51:23, 3.41it/s] 31%|███ | 115584/371472 [9:11:25<20:07:51, 3.53it/s] 31%|███ | 115585/371472 [9:11:25<20:09:23, 3.53it/s] 31%|███ | 115586/371472 [9:11:25<20:02:16, 3.55it/s] 31%|███ | 115587/371472 [9:11:25<19:46:49, 3.59it/s] 31%|███ | 115588/371472 [9:11:26<20:58:54, 3.39it/s] 31%|███ | 115589/371472 [9:11:26<21:10:43, 3.36it/s] 31%|███ | 115590/371472 [9:11:26<21:46:11, 3.27it/s] 31%|███ | 115591/371472 [9:11:27<20:45:46, 3.42it/s] 31%|███ | 115592/371472 [9:11:27<22:47:09, 3.12it/s] 31%|███ | 115593/371472 [9:11:27<21:33:45, 3.30it/s] 31%|███ | 115594/371472 [9:11:27<20:50:47, 3.41it/s] 31%|███ | 115595/371472 [9:11:28<20:41:21, 3.44it/s] 31%|███ | 115596/371472 [9:11:28<20:25:15, 3.48it/s] 31%|███ | 115597/371472 [9:11:28<21:43:14, 3.27it/s] 31%|███ | 115598/371472 [9:11:29<20:47:59, 3.42it/s] 31%|███ | 115599/371472 [9:11:29<21:21:27, 3.33it/s] 31%|███ | 115600/371472 [9:11:29<20:37:53, 3.44it/s] {'loss': 3.1943, 'learning_rate': 7.202590014867806e-07, 'epoch': 4.98} + 31%|███ | 115600/371472 [9:11:29<20:37:53, 3.44it/s] 31%|███ | 115601/371472 [9:11:30<20:09:06, 3.53it/s] 31%|███ | 115602/371472 [9:11:30<19:48:14, 3.59it/s] 31%|███ | 115603/371472 [9:11:30<19:38:25, 3.62it/s] 31%|███ | 115604/371472 [9:11:30<19:36:57, 3.62it/s] 31%|███ | 115605/371472 [9:11:31<18:53:23, 3.76it/s] 31%|███ | 115606/371472 [9:11:31<18:10:37, 3.91it/s] 31%|███ | 115607/371472 [9:11:31<18:19:16, 3.88it/s] 31%|███ | 115608/371472 [9:11:31<19:07:52, 3.72it/s] 31%|███ | 115609/371472 [9:11:32<19:20:17, 3.68it/s] 31%|███ | 115610/371472 [9:11:32<19:54:48, 3.57it/s] 31%|███ | 115611/371472 [9:11:32<19:57:50, 3.56it/s] 31%|███ | 115612/371472 [9:11:33<20:18:17, 3.50it/s] 31%|███ | 115613/371472 [9:11:33<20:51:53, 3.41it/s] 31%|███ | 115614/371472 [9:11:33<20:01:27, 3.55it/s] 31%|███ | 115615/371472 [9:11:33<19:54:48, 3.57it/s] 31%|███ | 115616/371472 [9:11:34<19:55:07, 3.57it/s] 31%|███ | 115617/371472 [9:11:34<20:33:53, 3.46it/s] 31%|███ | 115618/371472 [9:11:34<20:17:58, 3.50it/s] 31%|███ | 115619/371472 [9:11:35<19:58:18, 3.56it/s] 31%|███ | 115620/371472 [9:11:35<20:53:59, 3.40it/s] {'loss': 3.2513, 'learning_rate': 7.202105195113017e-07, 'epoch': 4.98} + 31%|███ | 115620/371472 [9:11:35<20:53:59, 3.40it/s] 31%|███ | 115621/371472 [9:11:35<19:55:38, 3.57it/s] 31%|███ | 115622/371472 [9:11:35<23:01:21, 3.09it/s] 31%|███ | 115623/371472 [9:11:36<21:37:32, 3.29it/s] 31%|███ | 115624/371472 [9:11:36<21:00:08, 3.38it/s] 31%|███ | 115625/371472 [9:11:36<20:06:36, 3.53it/s] 31%|███ | 115626/371472 [9:11:37<19:47:50, 3.59it/s] 31%|███ | 115627/371472 [9:11:37<19:13:44, 3.70it/s] 31%|███ | 115628/371472 [9:11:37<19:30:12, 3.64it/s] 31%|███ | 115629/371472 [9:11:37<20:42:39, 3.43it/s] 31%|███ | 115630/371472 [9:11:38<21:39:14, 3.28it/s] 31%|███ | 115631/371472 [9:11:38<21:48:49, 3.26it/s] 31%|███ | 115632/371472 [9:11:38<22:46:11, 3.12it/s] 31%|███ | 115633/371472 [9:11:39<21:45:14, 3.27it/s] 31%|███ | 115634/371472 [9:11:39<20:45:09, 3.42it/s] 31%|███ | 115635/371472 [9:11:39<20:13:10, 3.51it/s] 31%|███ | 115636/371472 [9:11:40<20:14:01, 3.51it/s] 31%|███ | 115637/371472 [9:11:40<19:28:36, 3.65it/s] 31%|███ | 115638/371472 [9:11:40<19:07:56, 3.71it/s] 31%|███ | 115639/371472 [9:11:40<19:33:20, 3.63it/s] 31%|███ | 115640/371472 [9:11:41<19:37:02, 3.62it/s] {'loss': 3.4284, 'learning_rate': 7.201620375358228e-07, 'epoch': 4.98} + 31%|███ | 115640/371472 [9:11:41<19:37:02, 3.62it/s] 31%|███ | 115641/371472 [9:11:41<20:20:57, 3.49it/s] 31%|███ | 115642/371472 [9:11:41<19:34:04, 3.63it/s] 31%|███ | 115643/371472 [9:11:41<20:35:18, 3.45it/s] 31%|███ | 115644/371472 [9:11:42<20:18:51, 3.50it/s] 31%|███ | 115645/371472 [9:11:42<20:25:41, 3.48it/s] 31%|███ | 115646/371472 [9:11:42<20:02:05, 3.55it/s] 31%|███ | 115647/371472 [9:11:43<22:29:20, 3.16it/s] 31%|███ | 115648/371472 [9:11:43<22:11:29, 3.20it/s] 31%|███ | 115649/371472 [9:11:43<21:23:59, 3.32it/s] 31%|███ | 115650/371472 [9:11:44<20:58:42, 3.39it/s] 31%|███ | 115651/371472 [9:11:44<20:17:20, 3.50it/s] 31%|███ | 115652/371472 [9:11:44<19:49:50, 3.58it/s] 31%|███ | 115653/371472 [9:11:44<19:41:23, 3.61it/s] 31%|███ | 115654/371472 [9:11:45<19:29:21, 3.65it/s] 31%|███ | 115655/371472 [9:11:45<18:57:58, 3.75it/s] 31%|███ | 115656/371472 [9:11:45<19:07:16, 3.72it/s] 31%|███ | 115657/371472 [9:11:45<19:24:26, 3.66it/s] 31%|███ | 115658/371472 [9:11:46<19:30:17, 3.64it/s] 31%|███ | 115659/371472 [9:11:46<19:15:41, 3.69it/s] 31%|███ | 115660/371472 [9:11:46<19:22:54, 3.67it/s] {'loss': 3.3555, 'learning_rate': 7.201135555603439e-07, 'epoch': 4.98} + 31%|███ | 115660/371472 [9:11:46<19:22:54, 3.67it/s] 31%|███ | 115661/371472 [9:11:47<19:05:41, 3.72it/s] 31%|███ | 115662/371472 [9:11:47<19:04:38, 3.72it/s] 31%|███ | 115663/371472 [9:11:47<19:13:56, 3.69it/s] 31%|███ | 115664/371472 [9:11:47<19:22:48, 3.67it/s] 31%|███ | 115665/371472 [9:11:48<18:44:12, 3.79it/s] 31%|███ | 115666/371472 [9:11:48<19:07:01, 3.72it/s] 31%|███ | 115667/371472 [9:11:48<18:46:44, 3.78it/s] 31%|███ | 115668/371472 [9:11:48<18:52:32, 3.76it/s] 31%|███ | 115669/371472 [9:11:49<19:16:05, 3.69it/s] 31%|███ | 115670/371472 [9:11:49<19:40:52, 3.61it/s] 31%|███ | 115671/371472 [9:11:49<19:25:42, 3.66it/s] 31%|███ | 115672/371472 [9:11:49<19:09:01, 3.71it/s] 31%|███ | 115673/371472 [9:11:50<19:17:17, 3.68it/s] 31%|███ | 115674/371472 [9:11:50<19:47:05, 3.59it/s] 31%|███ | 115675/371472 [9:11:50<20:17:01, 3.50it/s] 31%|███ | 115676/371472 [9:11:51<20:22:26, 3.49it/s] 31%|███ | 115677/371472 [9:11:51<19:27:36, 3.65it/s] 31%|███ | 115678/371472 [9:11:51<19:39:02, 3.62it/s] 31%|███ | 115679/371472 [9:11:51<18:40:47, 3.80it/s] 31%|███ | 115680/371472 [9:11:52<19:57:37, 3.56it/s] {'loss': 3.4221, 'learning_rate': 7.200650735848649e-07, 'epoch': 4.98} + 31%|███ | 115680/371472 [9:11:52<19:57:37, 3.56it/s] 31%|███ | 115681/371472 [9:11:52<19:32:24, 3.64it/s] 31%|███ | 115682/371472 [9:11:52<18:59:50, 3.74it/s] 31%|███ | 115683/371472 [9:11:52<18:22:49, 3.87it/s] 31%|███ | 115684/371472 [9:11:53<19:23:45, 3.66it/s] 31%|███ | 115685/371472 [9:11:53<19:18:54, 3.68it/s] 31%|███ | 115686/371472 [9:11:53<19:22:37, 3.67it/s] 31%|███ | 115687/371472 [9:11:54<19:16:25, 3.69it/s] 31%|███ | 115688/371472 [9:11:54<20:23:34, 3.48it/s] 31%|███ | 115689/371472 [9:11:54<19:27:35, 3.65it/s] 31%|███ | 115690/371472 [9:11:54<19:52:08, 3.58it/s] 31%|███ | 115691/371472 [9:11:55<19:38:20, 3.62it/s] 31%|███ | 115692/371472 [9:11:55<19:33:30, 3.63it/s] 31%|███ | 115693/371472 [9:11:55<19:52:07, 3.58it/s] 31%|███ | 115694/371472 [9:11:56<19:50:31, 3.58it/s] 31%|███ | 115695/371472 [9:11:56<19:26:29, 3.65it/s] 31%|███ | 115696/371472 [9:11:56<19:48:00, 3.59it/s] 31%|███ | 115697/371472 [9:11:56<19:08:56, 3.71it/s] 31%|███ | 115698/371472 [9:11:57<18:57:39, 3.75it/s] 31%|███ | 115699/371472 [9:11:57<18:51:58, 3.77it/s] 31%|███ | 115700/371472 [9:11:57<19:00:41, 3.74it/s] {'loss': 3.3952, 'learning_rate': 7.200165916093861e-07, 'epoch': 4.98} + 31%|███ | 115700/371472 [9:11:57<19:00:41, 3.74it/s] 31%|███ | 115701/371472 [9:11:57<18:34:40, 3.82it/s] 31%|███ | 115702/371472 [9:11:58<19:21:58, 3.67it/s] 31%|███ | 115703/371472 [9:11:58<21:15:26, 3.34it/s] 31%|███ | 115704/371472 [9:11:58<20:45:04, 3.42it/s] 31%|███ | 115705/371472 [9:11:59<19:43:11, 3.60it/s] 31%|███ | 115706/371472 [9:11:59<19:15:04, 3.69it/s] 31%|███ | 115707/371472 [9:11:59<18:45:30, 3.79it/s] 31%|███ | 115708/371472 [9:11:59<19:44:36, 3.60it/s] 31%|███ | 115709/371472 [9:12:00<21:05:06, 3.37it/s] 31%|███ | 115710/371472 [9:12:00<20:58:16, 3.39it/s] 31%|███ | 115711/371472 [9:12:00<20:58:47, 3.39it/s] 31%|███ | 115712/371472 [9:12:01<22:04:33, 3.22it/s] 31%|███ | 115713/371472 [9:12:01<22:01:20, 3.23it/s] 31%|███ | 115714/371472 [9:12:01<21:59:22, 3.23it/s] 31%|███ | 115715/371472 [9:12:02<21:02:53, 3.38it/s] 31%|███ | 115716/371472 [9:12:02<20:02:24, 3.55it/s] 31%|███ | 115717/371472 [9:12:02<19:26:38, 3.65it/s] 31%|███ | 115718/371472 [9:12:02<19:26:24, 3.65it/s] 31%|███ | 115719/371472 [9:12:03<20:10:20, 3.52it/s] 31%|███ | 115720/371472 [9:12:03<19:33:56, 3.63it/s] {'loss': 3.2709, 'learning_rate': 7.199681096339072e-07, 'epoch': 4.98} + 31%|███ | 115720/371472 [9:12:03<19:33:56, 3.63it/s] 31%|███ | 115721/371472 [9:12:03<21:13:55, 3.35it/s] 31%|███ | 115722/371472 [9:12:04<20:56:26, 3.39it/s] 31%|███ | 115723/371472 [9:12:04<19:57:29, 3.56it/s] 31%|███ | 115724/371472 [9:12:04<19:55:55, 3.56it/s] 31%|███ | 115725/371472 [9:12:04<19:43:29, 3.60it/s] 31%|███ | 115726/371472 [9:12:05<19:10:28, 3.70it/s] 31%|███ | 115727/371472 [9:12:05<18:44:00, 3.79it/s] 31%|███ | 115728/371472 [9:12:05<19:09:57, 3.71it/s] 31%|███ | 115729/371472 [9:12:05<19:51:17, 3.58it/s] 31%|███ | 115730/371472 [9:12:06<20:58:45, 3.39it/s] 31%|███ | 115731/371472 [9:12:06<22:12:01, 3.20it/s] 31%|███ | 115732/371472 [9:12:06<21:26:31, 3.31it/s] 31%|███ | 115733/371472 [9:12:07<20:49:12, 3.41it/s] 31%|███ | 115734/371472 [9:12:07<19:34:40, 3.63it/s] 31%|███ | 115735/371472 [9:12:07<20:58:29, 3.39it/s] 31%|███ | 115736/371472 [9:12:07<19:46:26, 3.59it/s] 31%|███ | 115737/371472 [9:12:08<19:10:20, 3.71it/s] 31%|███ | 115738/371472 [9:12:08<18:50:55, 3.77it/s] 31%|███ | 115739/371472 [9:12:08<20:42:21, 3.43it/s] 31%|███ | 115740/371472 [9:12:09<20:42:22, 3.43it/s] {'loss': 3.309, 'learning_rate': 7.199196276584283e-07, 'epoch': 4.99} + 31%|███ | 115740/371472 [9:12:09<20:42:22, 3.43it/s] 31%|███ | 115741/371472 [9:12:09<21:53:44, 3.24it/s] 31%|███ | 115742/371472 [9:12:09<21:58:42, 3.23it/s] 31%|███ | 115743/371472 [9:12:10<22:16:51, 3.19it/s] 31%|███ | 115744/371472 [9:12:10<22:21:37, 3.18it/s] 31%|███ | 115745/371472 [9:12:10<22:48:45, 3.11it/s] 31%|███ | 115746/371472 [9:12:11<22:02:36, 3.22it/s] 31%|███ | 115747/371472 [9:12:11<22:04:55, 3.22it/s] 31%|███ | 115748/371472 [9:12:11<20:58:57, 3.39it/s] 31%|███ | 115749/371472 [9:12:11<19:47:05, 3.59it/s] 31%|███ | 115750/371472 [9:12:12<20:10:09, 3.52it/s] 31%|███ | 115751/371472 [9:12:12<19:41:49, 3.61it/s] 31%|███ | 115752/371472 [9:12:12<19:16:59, 3.68it/s] 31%|███ | 115753/371472 [9:12:12<18:57:20, 3.75it/s] 31%|███ | 115754/371472 [9:12:13<20:03:23, 3.54it/s] 31%|███ | 115755/371472 [9:12:13<19:42:02, 3.61it/s] 31%|███ | 115756/371472 [9:12:13<19:29:50, 3.64it/s] 31%|███ | 115757/371472 [9:12:14<18:40:26, 3.80it/s] 31%|███ | 115758/371472 [9:12:14<19:53:03, 3.57it/s] 31%|███ | 115759/371472 [9:12:14<19:17:33, 3.68it/s] 31%|███ | 115760/371472 [9:12:14<19:04:22, 3.72it/s] {'loss': 3.2481, 'learning_rate': 7.198711456829495e-07, 'epoch': 4.99} + 31%|███ | 115760/371472 [9:12:14<19:04:22, 3.72it/s] 31%|███ | 115761/371472 [9:12:15<19:10:39, 3.70it/s] 31%|███ | 115762/371472 [9:12:15<18:55:25, 3.75it/s] 31%|███ | 115763/371472 [9:12:15<19:05:36, 3.72it/s] 31%|███ | 115764/371472 [9:12:15<18:42:49, 3.80it/s] 31%|███ | 115765/371472 [9:12:16<18:32:11, 3.83it/s] 31%|███ | 115766/371472 [9:12:16<18:30:38, 3.84it/s] 31%|███ | 115767/371472 [9:12:16<19:30:07, 3.64it/s] 31%|███ | 115768/371472 [9:12:17<21:29:11, 3.31it/s] 31%|███ | 115769/371472 [9:12:17<20:34:00, 3.45it/s] 31%|███ | 115770/371472 [9:12:17<20:10:59, 3.52it/s] 31%|███ | 115771/371472 [9:12:17<19:34:05, 3.63it/s] 31%|███ | 115772/371472 [9:12:18<19:16:12, 3.69it/s] 31%|███ | 115773/371472 [9:12:18<18:43:59, 3.79it/s] 31%|███ | 115774/371472 [9:12:18<19:10:58, 3.70it/s] 31%|███ | 115775/371472 [9:12:18<18:53:52, 3.76it/s] 31%|███ | 115776/371472 [9:12:19<19:10:59, 3.70it/s] 31%|███ | 115777/371472 [9:12:19<19:15:10, 3.69it/s] 31%|███ | 115778/371472 [9:12:19<18:55:51, 3.75it/s] 31%|███ | 115779/371472 [9:12:20<20:49:45, 3.41it/s] 31%|███ | 115780/371472 [9:12:20<20:10:20, 3.52it/s] {'loss': 3.462, 'learning_rate': 7.198226637074705e-07, 'epoch': 4.99} + 31%|███ | 115780/371472 [9:12:20<20:10:20, 3.52it/s] 31%|███ | 115781/371472 [9:12:20<21:24:34, 3.32it/s] 31%|███ | 115782/371472 [9:12:20<20:24:52, 3.48it/s] 31%|██��� | 115783/371472 [9:12:21<21:58:45, 3.23it/s] 31%|███ | 115784/371472 [9:12:21<21:36:33, 3.29it/s] 31%|███ | 115785/371472 [9:12:21<21:14:37, 3.34it/s] 31%|███ | 115786/371472 [9:12:22<21:09:36, 3.36it/s] 31%|███ | 115787/371472 [9:12:22<21:37:48, 3.28it/s] 31%|███ | 115788/371472 [9:12:22<20:43:39, 3.43it/s] 31%|███ | 115789/371472 [9:12:23<20:23:47, 3.48it/s] 31%|███ | 115790/371472 [9:12:23<20:21:27, 3.49it/s] 31%|███ | 115791/371472 [9:12:23<19:58:10, 3.56it/s] 31%|███ | 115792/371472 [9:12:23<19:03:29, 3.73it/s] 31%|███ | 115793/371472 [9:12:24<18:38:51, 3.81it/s] 31%|███ | 115794/371472 [9:12:24<18:39:09, 3.81it/s] 31%|███ | 115795/371472 [9:12:24<18:35:45, 3.82it/s] 31%|███ | 115796/371472 [9:12:24<18:37:50, 3.81it/s] 31%|███ | 115797/371472 [9:12:25<18:41:25, 3.80it/s] 31%|███ | 115798/371472 [9:12:25<19:00:19, 3.74it/s] 31%|███ | 115799/371472 [9:12:25<18:35:46, 3.82it/s] 31%|███ | 115800/371472 [9:12:25<18:59:52, 3.74it/s] {'loss': 3.2731, 'learning_rate': 7.197741817319915e-07, 'epoch': 4.99} + 31%|███ | 115800/371472 [9:12:25<18:59:52, 3.74it/s] 31%|███ | 115801/371472 [9:12:26<21:02:52, 3.37it/s] 31%|███ | 115802/371472 [9:12:26<22:31:47, 3.15it/s] 31%|███ | 115803/371472 [9:12:26<22:10:35, 3.20it/s] 31%|███ | 115804/371472 [9:12:27<21:40:06, 3.28it/s] 31%|███ | 115805/371472 [9:12:27<20:56:12, 3.39it/s] 31%|███ | 115806/371472 [9:12:27<20:27:26, 3.47it/s] 31%|███ | 115807/371472 [9:12:28<20:26:52, 3.47it/s] 31%|███ | 115808/371472 [9:12:28<21:08:58, 3.36it/s] 31%|███ | 115809/371472 [9:12:28<21:02:40, 3.37it/s] 31%|███ | 115810/371472 [9:12:28<20:49:59, 3.41it/s] 31%|███ | 115811/371472 [9:12:29<20:20:18, 3.49it/s] 31%|███ | 115812/371472 [9:12:29<19:30:33, 3.64it/s] 31%|███ | 115813/371472 [9:12:29<19:15:39, 3.69it/s] 31%|███ | 115814/371472 [9:12:30<19:20:41, 3.67it/s] 31%|███ | 115815/371472 [9:12:30<19:53:51, 3.57it/s] 31%|███ | 115816/371472 [9:12:30<19:10:32, 3.70it/s] 31%|███ | 115817/371472 [9:12:30<18:49:05, 3.77it/s] 31%|███ | 115818/371472 [9:12:31<18:29:12, 3.84it/s] 31%|███ | 115819/371472 [9:12:31<19:04:01, 3.72it/s] 31%|███ | 115820/371472 [9:12:31<20:11:51, 3.52it/s] {'loss': 3.309, 'learning_rate': 7.197256997565127e-07, 'epoch': 4.99} + 31%|███ | 115820/371472 [9:12:31<20:11:51, 3.52it/s] 31%|███ | 115821/371472 [9:12:31<19:26:37, 3.65it/s] 31%|███ | 115822/371472 [9:12:32<19:27:12, 3.65it/s] 31%|███ | 115823/371472 [9:12:32<19:07:41, 3.71it/s] 31%|███ | 115824/371472 [9:12:32<19:04:01, 3.72it/s] 31%|███ | 115825/371472 [9:12:33<19:23:46, 3.66it/s] 31%|███ | 115826/371472 [9:12:33<19:55:40, 3.56it/s] 31%|███ | 115827/371472 [9:12:33<19:39:20, 3.61it/s] 31%|███ | 115828/371472 [9:12:33<18:59:25, 3.74it/s] 31%|███ | 115829/371472 [9:12:34<19:05:44, 3.72it/s] 31%|███ | 115830/371472 [9:12:34<19:03:06, 3.73it/s] 31%|███ | 115831/371472 [9:12:34<18:45:03, 3.79it/s] 31%|███ | 115832/371472 [9:12:34<19:20:13, 3.67it/s] 31%|███ | 115833/371472 [9:12:35<19:40:45, 3.61it/s] 31%|███ | 115834/371472 [9:12:35<20:37:14, 3.44it/s] 31%|███ | 115835/371472 [9:12:35<20:45:19, 3.42it/s] 31%|███ | 115836/371472 [9:12:36<20:04:04, 3.54it/s] 31%|███ | 115837/371472 [9:12:36<20:02:51, 3.54it/s] 31%|███ | 115838/371472 [9:12:36<20:11:51, 3.52it/s] 31%|███ | 115839/371472 [9:12:36<20:08:41, 3.52it/s] 31%|███ | 115840/371472 [9:12:37<19:48:28, 3.58it/s] {'loss': 3.2293, 'learning_rate': 7.196772177810338e-07, 'epoch': 4.99} + 31%|███ | 115840/371472 [9:12:37<19:48:28, 3.58it/s] 31%|███ | 115841/371472 [9:12:37<19:53:59, 3.57it/s] 31%|███ | 115842/371472 [9:12:37<20:14:06, 3.51it/s] 31%|███ | 115843/371472 [9:12:38<22:20:46, 3.18it/s] 31%|███ | 115844/371472 [9:12:38<21:15:39, 3.34it/s] 31%|███ | 115845/371472 [9:12:38<21:07:06, 3.36it/s] 31%|███ | 115846/371472 [9:12:38<19:47:03, 3.59it/s] 31%|███ | 115847/371472 [9:12:39<20:38:42, 3.44it/s] 31%|███ | 115848/371472 [9:12:39<20:31:02, 3.46it/s] 31%|███ | 115849/371472 [9:12:39<19:48:35, 3.58it/s] 31%|███ | 115850/371472 [9:12:40<19:15:23, 3.69it/s] 31%|███ | 115851/371472 [9:12:40<18:34:43, 3.82it/s] 31%|███ | 115852/371472 [9:12:40<18:49:35, 3.77it/s] 31%|███ | 115853/371472 [9:12:40<19:38:45, 3.61it/s] 31%|███ | 115854/371472 [9:12:41<19:30:37, 3.64it/s] 31%|███ | 115855/371472 [9:12:41<19:10:56, 3.70it/s] 31%|███ | 115856/371472 [9:12:41<19:24:56, 3.66it/s] 31%|███ | 115857/371472 [9:12:41<19:43:54, 3.60it/s] 31%|███ | 115858/371472 [9:12:42<19:32:21, 3.63it/s] 31%|███ | 115859/371472 [9:12:42<19:14:53, 3.69it/s] 31%|███ | 115860/371472 [9:12:42<18:30:14, 3.84it/s] {'loss': 3.5308, 'learning_rate': 7.196287358055549e-07, 'epoch': 4.99} + 31%|███ | 115860/371472 [9:12:42<18:30:14, 3.84it/s] 31%|███ | 115861/371472 [9:12:43<20:38:00, 3.44it/s] 31%|███ | 115862/371472 [9:12:43<22:19:17, 3.18it/s] 31%|███ | 115863/371472 [9:12:43<21:05:06, 3.37it/s] 31%|███ | 115864/371472 [9:12:44<20:42:23, 3.43it/s] 31%|███ | 115865/371472 [9:12:44<20:50:51, 3.41it/s] 31%|███ | 115866/371472 [9:12:44<20:42:50, 3.43it/s] 31%|███ | 115867/371472 [9:12:44<20:18:47, 3.50it/s] 31%|███ | 115868/371472 [9:12:45<20:24:29, 3.48it/s] 31%|███ | 115869/371472 [9:12:45<20:25:33, 3.48it/s] 31%|███ | 115870/371472 [9:12:45<19:49:18, 3.58it/s] 31%|███ | 115871/371472 [9:12:45<18:56:37, 3.75it/s] 31%|███ | 115872/371472 [9:12:46<19:33:20, 3.63it/s] 31%|███ | 115873/371472 [9:12:46<19:16:09, 3.68it/s] 31%|███ | 115874/371472 [9:12:46<21:39:15, 3.28it/s] 31%|███ | 115875/371472 [9:12:47<20:34:43, 3.45it/s] 31%|███ | 115876/371472 [9:12:47<19:58:37, 3.55it/s] 31%|███ | 115877/371472 [9:12:47<19:24:30, 3.66it/s] 31%|███ | 115878/371472 [9:12:47<19:42:56, 3.60it/s] 31%|███ | 115879/371472 [9:12:48<20:09:39, 3.52it/s] 31%|███ | 115880/371472 [9:12:48<21:27:58, 3.31it/s] {'loss': 3.4282, 'learning_rate': 7.19580253830076e-07, 'epoch': 4.99} + 31%|███ | 115880/371472 [9:12:48<21:27:58, 3.31it/s] 31%|███ | 115881/371472 [9:12:48<21:40:09, 3.28it/s] 31%|███ | 115882/371472 [9:12:49<20:11:25, 3.52it/s] 31%|███ | 115883/371472 [9:12:49<20:33:00, 3.45it/s] 31%|███ | 115884/371472 [9:12:49<20:11:38, 3.52it/s] 31%|███ | 115885/371472 [9:12:50<20:19:01, 3.49it/s] 31%|███ | 115886/371472 [9:12:50<19:42:25, 3.60it/s] 31%|███ | 115887/371472 [9:12:50<19:24:03, 3.66it/s] 31%|███ | 115888/371472 [9:12:50<20:27:31, 3.47it/s] 31%|███ | 115889/371472 [9:12:51<20:58:37, 3.38it/s] 31%|███ | 115890/371472 [9:12:51<21:46:14, 3.26it/s] 31%|███ | 115891/371472 [9:12:51<20:58:55, 3.38it/s] 31%|███ | 115892/371472 [9:12:52<21:43:59, 3.27it/s] 31%|███ | 115893/371472 [9:12:52<20:43:21, 3.43it/s] 31%|███ | 115894/371472 [9:12:52<21:41:25, 3.27it/s] 31%|███ | 115895/371472 [9:12:52<21:22:07, 3.32it/s] 31%|███ | 115896/371472 [9:12:53<21:29:12, 3.30it/s] 31%|███ | 115897/371472 [9:12:53<23:20:07, 3.04it/s] 31%|███ | 115898/371472 [9:12:53<22:35:23, 3.14it/s] 31%|███ | 115899/371472 [9:12:54<21:53:14, 3.24it/s] 31%|███ | 115900/371472 [9:12:54<21:17:51, 3.33it/s] {'loss': 3.2874, 'learning_rate': 7.195317718545972e-07, 'epoch': 4.99} + 31%|███ | 115900/371472 [9:12:54<21:17:51, 3.33it/s] 31%|███ | 115901/371472 [9:12:54<20:43:03, 3.43it/s] 31%|███ | 115902/371472 [9:12:55<20:38:06, 3.44it/s] 31%|███ | 115903/371472 [9:12:55<20:26:17, 3.47it/s] 31%|███ | 115904/371472 [9:12:55<21:09:01, 3.36it/s] 31%|███ | 115905/371472 [9:12:56<21:58:32, 3.23it/s] 31%|███ | 115906/371472 [9:12:56<21:28:26, 3.31it/s] 31%|███ | 115907/371472 [9:12:56<21:09:29, 3.36it/s] 31%|███ | 115908/371472 [9:12:56<20:29:08, 3.47it/s] 31%|███ | 115909/371472 [9:12:57<20:20:11, 3.49it/s] 31%|███ | 115910/371472 [9:12:57<21:37:56, 3.28it/s] 31%|███ | 115911/371472 [9:12:57<23:30:24, 3.02it/s] 31%|███ | 115912/371472 [9:12:58<22:18:17, 3.18it/s] 31%|███ | 115913/371472 [9:12:58<21:01:33, 3.38it/s] 31%|███ | 115914/371472 [9:12:58<21:21:43, 3.32it/s] 31%|███ | 115915/371472 [9:12:59<24:33:29, 2.89it/s] 31%|███ | 115916/371472 [9:12:59<22:52:52, 3.10it/s] 31%|███ | 115917/371472 [9:12:59<22:10:39, 3.20it/s] 31%|███ | 115918/371472 [9:13:00<21:53:45, 3.24it/s] 31%|███ | 115919/371472 [9:13:00<21:41:45, 3.27it/s] 31%|███ | 115920/371472 [9:13:00<20:09:04, 3.52it/s] {'loss': 3.2489, 'learning_rate': 7.194832898791182e-07, 'epoch': 4.99} + 31%|███ | 115920/371472 [9:13:00<20:09:04, 3.52it/s] 31%|███ | 115921/371472 [9:13:00<19:12:54, 3.69it/s] 31%|███ | 115922/371472 [9:13:01<19:51:08, 3.58it/s] 31%|███ | 115923/371472 [9:13:01<19:50:32, 3.58it/s] 31%|███ | 115924/371472 [9:13:01<19:33:38, 3.63it/s] 31%|███ | 115925/371472 [9:13:01<18:52:49, 3.76it/s] 31%|███ | 115926/371472 [9:13:02<19:47:23, 3.59it/s] 31%|███ | 115927/371472 [9:13:02<19:10:10, 3.70it/s] 31%|███ | 115928/371472 [9:13:02<19:33:25, 3.63it/s] 31%|███ | 115929/371472 [9:13:03<20:31:00, 3.46it/s] 31%|███ | 115930/371472 [9:13:03<19:31:11, 3.64it/s] 31%|███ | 115931/371472 [9:13:03<19:46:00, 3.59it/s] 31%|███ | 115932/371472 [9:13:03<20:03:20, 3.54it/s] 31%|███ | 115933/371472 [9:13:04<19:46:36, 3.59it/s] 31%|███ | 115934/371472 [9:13:04<20:10:30, 3.52it/s] 31%|███ | 115935/371472 [9:13:04<19:28:32, 3.64it/s] 31%|███ | 115936/371472 [9:13:05<19:42:44, 3.60it/s] 31%|███ | 115937/371472 [9:13:05<21:09:34, 3.35it/s] 31%|███ | 115938/371472 [9:13:05<20:41:08, 3.43it/s] 31%|███ | 115939/371472 [9:13:05<20:21:51, 3.49it/s] 31%|███ | 115940/371472 [9:13:06<21:23:24, 3.32it/s] {'loss': 3.3017, 'learning_rate': 7.194348079036393e-07, 'epoch': 4.99} + 31%|███ | 115940/371472 [9:13:06<21:23:24, 3.32it/s] 31%|███ | 115941/371472 [9:13:06<21:02:01, 3.37it/s] 31%|███ | 115942/371472 [9:13:06<20:53:45, 3.40it/s] 31%|███ | 115943/371472 [9:13:07<19:53:13, 3.57it/s] 31%|███ | 115944/371472 [9:13:07<19:30:26, 3.64it/s] 31%|███ | 115945/371472 [9:13:07<19:53:34, 3.57it/s] 31%|███ | 115946/371472 [9:13:07<19:18:04, 3.68it/s] 31%|███ | 115947/371472 [9:13:08<18:36:34, 3.81it/s] 31%|███ | 115948/371472 [9:13:08<18:02:13, 3.94it/s] 31%|███ | 115949/371472 [9:13:08<18:22:02, 3.86it/s] 31%|███ | 115950/371472 [9:13:08<20:26:07, 3.47it/s] 31%|███ | 115951/371472 [9:13:09<20:17:29, 3.50it/s] 31%|███ | 115952/371472 [9:13:09<21:17:29, 3.33it/s] 31%|███ | 115953/371472 [9:13:09<20:29:55, 3.46it/s] 31%|███ | 115954/371472 [9:13:10<20:12:47, 3.51it/s] 31%|███ | 115955/371472 [9:13:10<20:16:10, 3.50it/s] 31%|███ | 115956/371472 [9:13:10<20:33:31, 3.45it/s] 31%|███ | 115957/371472 [9:13:10<20:13:26, 3.51it/s] 31%|███ | 115958/371472 [9:13:11<19:31:12, 3.64it/s] 31%|███ | 115959/371472 [9:13:11<20:34:37, 3.45it/s] 31%|███ | 115960/371472 [9:13:11<19:54:39, 3.56it/s] {'loss': 3.54, 'learning_rate': 7.193863259281604e-07, 'epoch': 4.99} + 31%|███ | 115960/371472 [9:13:11<19:54:39, 3.56it/s] 31%|███ | 115961/371472 [9:13:12<19:52:39, 3.57it/s] 31%|███ | 115962/371472 [9:13:12<19:44:42, 3.59it/s] 31%|███ | 115963/371472 [9:13:12<19:29:25, 3.64it/s] 31%|███ | 115964/371472 [9:13:12<19:42:09, 3.60it/s] 31%|███ | 115965/371472 [9:13:13<19:08:36, 3.71it/s] 31%|███ | 115966/371472 [9:13:13<19:35:22, 3.62it/s] 31%|███ | 115967/371472 [9:13:13<19:28:04, 3.65it/s] 31%|███ | 115968/371472 [9:13:14<20:23:53, 3.48it/s] 31%|███ | 115969/371472 [9:13:14<19:45:15, 3.59it/s] 31%|███ | 115970/371472 [9:13:14<19:13:26, 3.69it/s] 31%|███ | 115971/371472 [9:13:14<19:10:41, 3.70it/s] 31%|███ | 115972/371472 [9:13:15<20:17:51, 3.50it/s] 31%|███ | 115973/371472 [9:13:15<20:00:37, 3.55it/s] 31%|███ | 115974/371472 [9:13:15<19:57:03, 3.56it/s] 31%|███ | 115975/371472 [9:13:15<19:32:40, 3.63it/s] 31%|███ | 115976/371472 [9:13:16<19:48:11, 3.58it/s] 31%|███ | 115977/371472 [9:13:16<19:03:21, 3.72it/s] 31%|███ | 115978/371472 [9:13:16<19:13:23, 3.69it/s] 31%|███ | 115979/371472 [9:13:17<18:39:33, 3.80it/s] 31%|███ | 115980/371472 [9:13:17<18:32:04, 3.83it/s] {'loss': 3.5398, 'learning_rate': 7.193378439526816e-07, 'epoch': 5.0} + 31%|███ | 115980/371472 [9:13:17<18:32:04, 3.83it/s] 31%|███ | 115981/371472 [9:13:17<19:12:41, 3.69it/s] 31%|███ | 115982/371472 [9:13:17<18:39:54, 3.80it/s] 31%|███ | 115983/371472 [9:13:18<18:40:46, 3.80it/s] 31%|███ | 115984/371472 [9:13:18<19:41:00, 3.61it/s] 31%|███ | 115985/371472 [9:13:18<20:02:57, 3.54it/s] 31%|███ | 115986/371472 [9:13:18<19:15:08, 3.69it/s] 31%|███ | 115987/371472 [9:13:19<20:17:06, 3.50it/s] 31%|███ | 115988/371472 [9:13:19<19:56:46, 3.56it/s] 31%|███ | 115989/371472 [9:13:19<20:50:49, 3.40it/s] 31%|███ | 115990/371472 [9:13:20<20:50:35, 3.40it/s] 31%|███ | 115991/371472 [9:13:20<19:56:09, 3.56it/s] 31%|███ | 115992/371472 [9:13:20<19:34:38, 3.62it/s] 31%|███ | 115993/371472 [9:13:20<19:08:20, 3.71it/s] 31%|███ | 115994/371472 [9:13:21<19:50:17, 3.58it/s] 31%|███ | 115995/371472 [9:13:21<20:16:54, 3.50it/s] 31%|███ | 115996/371472 [9:13:21<20:29:53, 3.46it/s] 31%|███ | 115997/371472 [9:13:22<20:26:01, 3.47it/s] 31%|███ | 115998/371472 [9:13:22<20:14:44, 3.51it/s] 31%|███ | 115999/371472 [9:13:22<20:19:36, 3.49it/s] 31%|███ | 116000/371472 [9:13:22<20:18:58, 3.49it/s] {'loss': 3.3978, 'learning_rate': 7.192893619772027e-07, 'epoch': 5.0} + 31%|███ | 116000/371472 [9:13:22<20:18:58, 3.49it/s] 31%|███ | 116001/371472 [9:13:23<20:28:41, 3.47it/s] 31%|███ | 116002/371472 [9:13:23<19:36:24, 3.62it/s] 31%|███ | 116003/371472 [9:13:23<19:50:21, 3.58it/s] 31%|███ | 116004/371472 [9:13:24<19:47:54, 3.58it/s] 31%|███ | 116005/371472 [9:13:24<19:51:42, 3.57it/s] 31%|███ | 116006/371472 [9:13:24<19:15:38, 3.68it/s] 31%|███ | 116007/371472 [9:13:24<20:23:49, 3.48it/s] 31%|███ | 116008/371472 [9:13:25<20:16:00, 3.50it/s] 31%|███ | 116009/371472 [9:13:25<19:48:49, 3.58it/s] 31%|███ | 116010/371472 [9:13:25<19:51:38, 3.57it/s] 31%|███ | 116011/371472 [9:13:25<19:15:00, 3.69it/s] 31%|███ | 116012/371472 [9:13:26<19:11:26, 3.70it/s] 31%|███ | 116013/371472 [9:13:26<19:33:39, 3.63it/s] 31%|███ | 116014/371472 [9:13:26<22:06:42, 3.21it/s] 31%|███ | 116015/371472 [9:13:27<22:03:19, 3.22it/s] 31%|███ | 116016/371472 [9:13:27<20:43:40, 3.42it/s] 31%|███ | 116017/371472 [9:13:27<21:07:44, 3.36it/s] 31%|███ | 116018/371472 [9:13:28<21:14:43, 3.34it/s] 31%|███ | 116019/371472 [9:13:28<22:01:03, 3.22it/s] 31%|███ | 116020/371472 [9:13:28<20:42:52, 3.43it/s] {'loss': 3.2752, 'learning_rate': 7.192408800017238e-07, 'epoch': 5.0} + 31%|███ | 116020/371472 [9:13:28<20:42:52, 3.43it/s] 31%|███ | 116021/371472 [9:13:28<20:06:16, 3.53it/s] 31%|███ | 116022/371472 [9:13:29<19:58:24, 3.55it/s] 31%|███ | 116023/371472 [9:13:29<19:04:08, 3.72it/s] 31%|███ | 116024/371472 [9:13:29<19:10:50, 3.70it/s] 31%|███ | 116025/371472 [9:13:30<18:46:39, 3.78it/s] 31%|███ | 116026/371472 [9:13:30<18:51:21, 3.76it/s] 31%|███ | 116027/371472 [9:13:30<18:32:39, 3.83it/s] 31%|███ | 116028/371472 [9:13:30<18:24:47, 3.85it/s] 31%|███ | 116029/371472 [9:13:31<18:55:36, 3.75it/s] 31%|███ | 116030/371472 [9:13:31<18:23:10, 3.86it/s] 31%|███ | 116031/371472 [9:13:31<19:19:13, 3.67it/s] 31%|███ | 116032/371472 [9:13:31<20:02:55, 3.54it/s] 31%|███ | 116033/371472 [9:13:32<20:03:07, 3.54it/s] 31%|███ | 116034/371472 [9:13:32<20:18:31, 3.49it/s] 31%|███ | 116035/371472 [9:13:32<20:12:47, 3.51it/s] 31%|███ | 116036/371472 [9:13:33<21:56:59, 3.23it/s] 31%|███ | 116037/371472 [9:13:33<21:45:08, 3.26it/s] 31%|███ | 116038/371472 [9:13:33<20:54:14, 3.39it/s] 31%|███ | 116039/371472 [9:13:33<20:38:42, 3.44it/s] 31%|███ | 116040/371472 [9:13:34<21:39:32, 3.28it/s] {'loss': 3.2816, 'learning_rate': 7.191923980262449e-07, 'epoch': 5.0} + 31%|███ | 116040/371472 [9:13:34<21:39:32, 3.28it/s] 31%|███ | 116041/371472 [9:13:34<20:16:08, 3.50it/s] 31%|███ | 116042/371472 [9:13:34<21:54:43, 3.24it/s] 31%|███ | 116043/371472 [9:13:35<23:45:32, 2.99it/s] 31%|███ | 116044/371472 [9:13:35<22:09:25, 3.20it/s] 31%|███ | 116045/371472 [9:13:35<20:55:38, 3.39it/s] 31%|███ | 116046/371472 [9:13:36<20:43:26, 3.42it/s] 31%|███ | 116047/371472 [9:13:36<19:44:29, 3.59it/s] 31%|███ | 116048/371472 [9:13:36<19:40:58, 3.60it/s] 31%|███ | 116049/371472 [9:13:36<19:22:27, 3.66it/s] 31%|███ | 116050/371472 [9:13:37<18:55:53, 3.75it/s] 31%|███ | 116051/371472 [9:13:37<18:27:14, 3.84it/s] 31%|███ | 116052/371472 [9:13:37<18:00:27, 3.94it/s] 31%|███ | 116053/371472 [9:13:38<20:13:07, 3.51it/s] 31%|███ | 116054/371472 [9:13:38<21:33:13, 3.29it/s] 31%|███ | 116055/371472 [9:13:38<20:32:05, 3.46it/s] 31%|███ | 116056/371472 [9:13:38<20:36:39, 3.44it/s] 31%|███ | 116057/371472 [9:13:39<20:35:09, 3.45it/s] 31%|███ | 116058/371472 [9:13:39<19:33:31, 3.63it/s] 31%|███ | 116059/371472 [9:13:39<20:04:59, 3.53it/s] 31%|███ | 116060/371472 [9:13:40<20:15:50, 3.50it/s] {'loss': 3.1059, 'learning_rate': 7.19143916050766e-07, 'epoch': 5.0} + 31%|███ | 116060/371472 [9:13:40<20:15:50, 3.50it/s] 31%|███ | 116061/371472 [9:13:40<20:32:53, 3.45it/s] 31%|███ | 116062/371472 [9:13:40<19:36:20, 3.62it/s] 31%|███ | 116063/371472 [9:13:40<19:15:14, 3.68it/s] 31%|███ | 116064/371472 [9:13:41<18:57:33, 3.74it/s] 31%|███ | 116065/371472 [9:13:41<18:44:38, 3.79it/s] 31%|███ | 116066/371472 [9:13:41<18:12:28, 3.90it/s] 31%|███ | 116067/371472 [9:13:41<19:06:38, 3.71it/s] 31%|███ | 116068/371472 [9:13:42<19:00:40, 3.73it/s] 31%|███ | 116069/371472 [9:13:42<18:30:01, 3.83it/s] 31%|███ | 116070/371472 [9:13:42<18:15:52, 3.88it/s] 31%|███ | 116071/371472 [9:13:42<19:47:33, 3.58it/s] 31%|███ | 116072/371472 [9:13:43<20:31:26, 3.46it/s] 31%|███ | 116073/371472 [9:13:43<20:05:49, 3.53it/s] 31%|███ | 116074/371472 [9:13:43<19:22:30, 3.66it/s] 31%|███ | 116075/371472 [9:13:44<19:25:59, 3.65it/s] 31%|███ | 116076/371472 [9:13:44<20:31:19, 3.46it/s] 31%|███ | 116077/371472 [9:13:44<20:19:25, 3.49it/s] 31%|███ | 116078/371472 [9:13:44<20:39:49, 3.43it/s] 31%|███ | 116079/371472 [9:13:45<20:04:27, 3.53it/s] 31%|███ | 116080/371472 [9:13:45<19:34:51, 3.62it/s] {'loss': 3.4993, 'learning_rate': 7.190954340752871e-07, 'epoch': 5.0} + 31%|███ | 116080/371472 [9:13:45<19:34:51, 3.62it/s] 31%|███ | 116081/371472 [9:13:45<20:09:59, 3.52it/s] 31%|███ | 116082/371472 [9:13:46<20:32:05, 3.45it/s] 31%|███ | 116083/371472 [9:13:46<20:45:21, 3.42it/s] 31%|███ | 116084/371472 [9:13:46<19:37:59, 3.61it/s] 31%|███▏ | 116085/371472 [9:13:46<20:19:48, 3.49it/s]Some non-default generation parameters are set in the model config. These should go into a GenerationConfig file (https://huggingface.co/docs/transformers/generation_strategies#save-a-custom-decoding-strategy-with-your-model) instead. This warning will be raised to an exception in v4.41. +Non-default generation parameters: {'max_length': 200, 'early_stopping': True, 'num_beams': 5, 'forced_eos_token_id': 2} +/opt/conda/lib/python3.10/multiprocessing/popen_fork.py:66: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock. + self.pid = os.fork() + 31%|███▏ | 116086/371472 [9:14:18<686:49:52, 9.68s/it] 31%|███▏ | 116087/371472 [9:14:18<487:28:16, 6.87s/it] 31%|███▏ | 116088/371472 [9:14:19<347:37:58, 4.90s/it] 31%|███▏ | 116089/371472 [9:14:19<251:15:02, 3.54s/it] 31%|███▏ | 116090/371472 [9:14:19<184:02:30, 2.59s/it] 31%|███▏ | 116091/371472 [9:14:20<135:20:58, 1.91s/it] 31%|███▏ | 116092/371472 [9:14:20<100:43:41, 1.42s/it] 31%|███▏ | 116093/371472 [9:14:20<76:17:26, 1.08s/it] 31%|███▏ | 116094/371472 [9:14:21<60:23:48, 1.17it/s] 31%|███▏ | 116095/371472 [9:14:21<49:18:02, 1.44it/s] 31%|███▏ | 116096/371472 [9:14:21<40:44:58, 1.74it/s] 31%|███▏ | 116097/371472 [9:14:22<34:45:11, 2.04it/s] 31%|███▏ | 116098/371472 [9:14:22<32:35:25, 2.18it/s] 31%|███▏ | 116099/371472 [9:14:22<31:09:37, 2.28it/s] 31%|███▏ | 116100/371472 [9:14:23<27:26:10, 2.59it/s] {'loss': 3.2164, 'learning_rate': 7.190469520998081e-07, 'epoch': 5.0} + 31%|███▏ | 116100/371472 [9:14:23<27:26:10, 2.59it/s] 31%|███▏ | 116101/371472 [9:14:23<26:33:22, 2.67it/s] 31%|███▏ | 116102/371472 [9:14:23<27:41:44, 2.56it/s] 31%|███▏ | 116103/371472 [9:14:24<25:27:39, 2.79it/s] 31%|███▏ | 116104/371472 [9:14:24<23:33:51, 3.01it/s] 31%|███▏ | 116105/371472 [9:14:24<21:56:01, 3.23it/s] 31%|███▏ | 116106/371472 [9:14:24<20:35:34, 3.44it/s] 31%|███▏ | 116107/371472 [9:14:25<20:16:46, 3.50it/s] 31%|███▏ | 116108/371472 [9:14:25<20:20:35, 3.49it/s] 31%|███▏ | 116109/371472 [9:14:25<20:48:20, 3.41it/s] 31%|███▏ | 116110/371472 [9:14:26<19:45:23, 3.59it/s] 31%|███▏ | 116111/371472 [9:14:26<19:01:32, 3.73it/s] 31%|███▏ | 116112/371472 [9:14:26<20:22:53, 3.48it/s] 31%|███▏ | 116113/371472 [9:14:26<19:43:34, 3.60it/s] 31%|███▏ | 116114/371472 [9:14:27<20:01:45, 3.54it/s] 31%|███▏ | 116115/371472 [9:14:27<19:50:43, 3.57it/s] 31%|███▏ | 116116/371472 [9:14:27<20:41:51, 3.43it/s] 31%|███▏ | 116117/371472 [9:14:28<20:38:41, 3.44it/s] 31%|███▏ | 116118/371472 [9:14:28<20:48:43, 3.41it/s] 31%|███▏ | 116119/371472 [9:14:28<20:40:34, 3.43it/s] 31%|███▏ | 116120/371472 [9:14:28<21:15:25, 3.34it/s] {'loss': 3.1924, 'learning_rate': 7.189984701243293e-07, 'epoch': 5.0} + 31%|███▏ | 116120/371472 [9:14:28<21:15:25, 3.34it/s] 31%|███▏ | 116121/371472 [9:14:29<22:09:00, 3.20it/s] 31%|███▏ | 116122/371472 [9:14:29<21:53:21, 3.24it/s] 31%|███▏ | 116123/371472 [9:14:29<20:38:25, 3.44it/s] 31%|███▏ | 116124/371472 [9:14:30<20:25:21, 3.47it/s] 31%|███▏ | 116125/371472 [9:14:30<20:07:45, 3.52it/s] 31%|███▏ | 116126/371472 [9:14:30<19:49:36, 3.58it/s] 31%|███▏ | 116127/371472 [9:14:31<22:20:29, 3.17it/s] 31%|███▏ | 116128/371472 [9:14:31<20:55:11, 3.39it/s] 31%|███▏ | 116129/371472 [9:14:31<20:04:38, 3.53it/s] 31%|███▏ | 116130/371472 [9:14:31<19:49:05, 3.58it/s] 31%|███▏ | 116131/371472 [9:14:32<19:20:06, 3.67it/s] 31%|███▏ | 116132/371472 [9:14:32<19:10:42, 3.70it/s] 31%|███▏ | 116133/371472 [9:14:32<18:49:43, 3.77it/s] 31%|███▏ | 116134/371472 [9:14:32<19:50:10, 3.58it/s] 31%|███▏ | 116135/371472 [9:14:33<19:17:13, 3.68it/s] 31%|███▏ | 116136/371472 [9:14:33<20:23:01, 3.48it/s] 31%|███▏ | 116137/371472 [9:14:33<20:48:18, 3.41it/s] 31%|███▏ | 116138/371472 [9:14:34<20:39:43, 3.43it/s] 31%|███▏ | 116139/371472 [9:14:34<20:48:16, 3.41it/s] 31%|███▏ | 116140/371472 [9:14:34<19:48:33, 3.58it/s] {'loss': 3.4916, 'learning_rate': 7.189499881488504e-07, 'epoch': 5.0} + 31%|███▏ | 116140/371472 [9:14:34<19:48:33, 3.58it/s] 31%|███▏ | 116141/371472 [9:14:34<19:17:12, 3.68it/s] 31%|███▏ | 116142/371472 [9:14:35<19:15:27, 3.68it/s] 31%|███▏ | 116143/371472 [9:14:35<19:18:49, 3.67it/s] 31%|███▏ | 116144/371472 [9:14:35<19:26:04, 3.65it/s] 31%|███▏ | 116145/371472 [9:14:35<18:49:16, 3.77it/s] 31%|███▏ | 116146/371472 [9:14:36<18:43:13, 3.79it/s] 31%|███▏ | 116147/371472 [9:14:36<18:36:37, 3.81it/s] 31%|███▏ | 116148/371472 [9:14:36<19:31:24, 3.63it/s] 31%|███▏ | 116149/371472 [9:14:37<19:02:47, 3.72it/s] 31%|███▏ | 116150/371472 [9:14:37<19:15:55, 3.68it/s] 31%|███▏ | 116151/371472 [9:14:37<18:36:13, 3.81it/s] 31%|███▏ | 116152/371472 [9:14:37<18:18:06, 3.88it/s] 31%|███▏ | 116153/371472 [9:14:38<18:07:33, 3.91it/s] 31%|███▏ | 116154/371472 [9:14:38<18:41:06, 3.80it/s] 31%|███▏ | 116155/371472 [9:14:38<18:54:20, 3.75it/s] 31%|███▏ | 116156/371472 [9:14:38<21:17:17, 3.33it/s] 31%|███▏ | 116157/371472 [9:14:39<20:36:34, 3.44it/s] 31%|███▏ | 116158/371472 [9:14:39<21:45:56, 3.26it/s] 31%|███▏ | 116159/371472 [9:14:39<22:36:30, 3.14it/s] 31%|███▏ | 116160/371472 [9:14:40<22:42:19, 3.12it/s] {'loss': 3.3075, 'learning_rate': 7.189015061733716e-07, 'epoch': 5.0} + 31%|███▏ | 116160/371472 [9:14:40<22:42:19, 3.12it/s] 31%|███▏ | 116161/371472 [9:14:40<22:18:00, 3.18it/s] 31%|███▏ | 116162/371472 [9:14:40<22:18:29, 3.18it/s] 31%|███▏ | 116163/371472 [9:14:41<21:11:50, 3.35it/s] 31%|███▏ | 116164/371472 [9:14:41<20:21:23, 3.48it/s] 31%|███▏ | 116165/371472 [9:14:41<20:42:01, 3.43it/s] 31%|███▏ | 116166/371472 [9:14:42<21:03:48, 3.37it/s] 31%|███▏ | 116167/371472 [9:14:42<20:28:25, 3.46it/s] 31%|███▏ | 116168/371472 [9:14:42<20:49:30, 3.41it/s] 31%|███▏ | 116169/371472 [9:14:42<20:27:08, 3.47it/s] 31%|███▏ | 116170/371472 [9:14:43<20:44:33, 3.42it/s] 31%|███▏ | 116171/371472 [9:14:43<20:36:25, 3.44it/s] 31%|███▏ | 116172/371472 [9:14:43<21:02:05, 3.37it/s] 31%|███▏ | 116173/371472 [9:14:44<20:14:05, 3.50it/s] 31%|███▏ | 116174/371472 [9:14:44<19:29:34, 3.64it/s] 31%|███▏ | 116175/371472 [9:14:44<19:43:34, 3.59it/s] 31%|███▏ | 116176/371472 [9:14:44<19:21:37, 3.66it/s] 31%|███▏ | 116177/371472 [9:14:45<19:44:18, 3.59it/s] 31%|███▏ | 116178/371472 [9:14:45<20:51:51, 3.40it/s] 31%|███▏ | 116179/371472 [9:14:45<21:33:12, 3.29it/s] 31%|███▏ | 116180/371472 [9:14:46<21:42:19, 3.27it/s] {'loss': 3.4159, 'learning_rate': 7.188530241978926e-07, 'epoch': 5.0} + 31%|███▏ | 116180/371472 [9:14:46<21:42:19, 3.27it/s] 31%|███▏ | 116181/371472 [9:14:46<21:58:40, 3.23it/s] 31%|███▏ | 116182/371472 [9:14:46<21:17:44, 3.33it/s] 31%|███▏ | 116183/371472 [9:14:46<20:06:06, 3.53it/s] 31%|███▏ | 116184/371472 [9:14:47<20:30:31, 3.46it/s] 31%|███▏ | 116185/371472 [9:14:47<22:11:24, 3.20it/s] 31%|███▏ | 116186/371472 [9:14:47<22:32:58, 3.14it/s] 31%|███▏ | 116187/371472 [9:14:48<23:40:54, 2.99it/s] 31%|███▏ | 116188/371472 [9:14:48<23:20:52, 3.04it/s] 31%|███▏ | 116189/371472 [9:14:48<23:12:06, 3.06it/s] 31%|███▏ | 116190/371472 [9:14:49<22:04:20, 3.21it/s] 31%|███▏ | 116191/371472 [9:14:49<20:46:42, 3.41it/s] 31%|███▏ | 116192/371472 [9:14:49<21:34:30, 3.29it/s] 31%|███▏ | 116193/371472 [9:14:50<21:08:54, 3.35it/s] 31%|███▏ | 116194/371472 [9:14:50<20:01:13, 3.54it/s] 31%|███▏ | 116195/371472 [9:14:50<22:09:52, 3.20it/s] 31%|███▏ | 116196/371472 [9:14:50<21:12:33, 3.34it/s] 31%|███▏ | 116197/371472 [9:14:51<21:07:10, 3.36it/s] 31%|███▏ | 116198/371472 [9:14:51<22:11:28, 3.20it/s] 31%|███▏ | 116199/371472 [9:14:51<22:17:01, 3.18it/s] 31%|███▏ | 116200/371472 [9:14:52<20:55:02, 3.39it/s] {'loss': 3.3504, 'learning_rate': 7.188045422224137e-07, 'epoch': 5.0} + 31%|███▏ | 116200/371472 [9:14:52<20:55:02, 3.39it/s] 31%|███▏ | 116201/371472 [9:14:52<22:06:47, 3.21it/s] 31%|███▏ | 116202/371472 [9:14:52<20:41:30, 3.43it/s] 31%|███▏ | 116203/371472 [9:14:53<20:05:19, 3.53it/s] 31%|███▏ | 116204/371472 [9:14:53<20:06:26, 3.53it/s] 31%|███▏ | 116205/371472 [9:14:53<19:57:53, 3.55it/s] 31%|███▏ | 116206/371472 [9:14:53<20:21:35, 3.48it/s] 31%|███▏ | 116207/371472 [9:14:54<20:28:39, 3.46it/s] 31%|███▏ | 116208/371472 [9:14:54<20:10:26, 3.51it/s] 31%|███▏ | 116209/371472 [9:14:54<20:20:26, 3.49it/s] 31%|███▏ | 116210/371472 [9:14:55<19:48:43, 3.58it/s] 31%|███▏ | 116211/371472 [9:14:55<19:36:18, 3.62it/s] 31%|███▏ | 116212/371472 [9:14:55<19:26:30, 3.65it/s] 31%|███▏ | 116213/371472 [9:14:55<20:19:17, 3.49it/s] 31%|███▏ | 116214/371472 [9:14:56<20:02:09, 3.54it/s] 31%|███▏ | 116215/371472 [9:14:56<20:54:31, 3.39it/s] 31%|███▏ | 116216/371472 [9:14:56<20:44:53, 3.42it/s] 31%|███▏ | 116217/371472 [9:14:57<19:51:18, 3.57it/s] 31%|███▏ | 116218/371472 [9:14:57<19:17:38, 3.67it/s] 31%|███▏ | 116219/371472 [9:14:57<18:58:40, 3.74it/s] 31%|███▏ | 116220/371472 [9:14:57<19:19:09, 3.67it/s] {'loss': 3.1897, 'learning_rate': 7.187560602469348e-07, 'epoch': 5.01} + 31%|███▏ | 116220/371472 [9:14:57<19:19:09, 3.67it/s] 31%|███▏ | 116221/371472 [9:14:58<19:38:52, 3.61it/s] 31%|███▏ | 116222/371472 [9:14:58<19:11:05, 3.70it/s] 31%|███▏ | 116223/371472 [9:14:58<19:26:40, 3.65it/s] 31%|███▏ | 116224/371472 [9:14:58<20:18:00, 3.49it/s] 31%|███▏ | 116225/371472 [9:14:59<21:00:19, 3.38it/s] 31%|███▏ | 116226/371472 [9:14:59<20:42:39, 3.42it/s] 31%|███▏ | 116227/371472 [9:14:59<20:40:49, 3.43it/s] 31%|███▏ | 116228/371472 [9:15:00<21:24:54, 3.31it/s] 31%|███▏ | 116229/371472 [9:15:00<22:27:53, 3.16it/s] 31%|███▏ | 116230/371472 [9:15:00<21:58:04, 3.23it/s] 31%|███▏ | 116231/371472 [9:15:01<21:59:38, 3.22it/s] 31%|███▏ | 116232/371472 [9:15:01<21:51:05, 3.24it/s] 31%|███▏ | 116233/371472 [9:15:01<21:33:36, 3.29it/s] 31%|███▏ | 116234/371472 [9:15:02<22:38:46, 3.13it/s] 31%|███▏ | 116235/371472 [9:15:02<21:56:03, 3.23it/s] 31%|███▏ | 116236/371472 [9:15:02<21:58:22, 3.23it/s] 31%|███▏ | 116237/371472 [9:15:02<22:14:16, 3.19it/s] 31%|███▏ | 116238/371472 [9:15:03<21:30:35, 3.30it/s] 31%|███▏ | 116239/371472 [9:15:03<20:12:35, 3.51it/s] 31%|███▏ | 116240/371472 [9:15:03<23:43:34, 2.99it/s] {'loss': 3.1461, 'learning_rate': 7.18707578271456e-07, 'epoch': 5.01} + 31%|███▏ | 116240/371472 [9:15:03<23:43:34, 2.99it/s] 31%|███▏ | 116241/371472 [9:15:04<21:55:15, 3.23it/s] 31%|███▏ | 116242/371472 [9:15:04<21:19:03, 3.33it/s] 31%|███▏ | 116243/371472 [9:15:04<21:40:07, 3.27it/s] 31%|███▏ | 116244/371472 [9:15:05<20:47:50, 3.41it/s] 31%|███▏ | 116245/371472 [9:15:05<21:06:47, 3.36it/s] 31%|███▏ | 116246/371472 [9:15:05<21:09:03, 3.35it/s] 31%|███▏ | 116247/371472 [9:15:05<21:02:19, 3.37it/s] 31%|███▏ | 116248/371472 [9:15:06<20:27:45, 3.46it/s] 31%|███▏ | 116249/371472 [9:15:06<20:20:22, 3.49it/s] 31%|███▏ | 116250/371472 [9:15:06<19:59:56, 3.54it/s] 31%|███▏ | 116251/371472 [9:15:07<19:25:30, 3.65it/s] 31%|███▏ | 116252/371472 [9:15:07<20:04:44, 3.53it/s] 31%|███▏ | 116253/371472 [9:15:07<20:00:23, 3.54it/s] 31%|███▏ | 116254/371472 [9:15:07<21:04:41, 3.36it/s] 31%|███▏ | 116255/371472 [9:15:08<20:20:56, 3.48it/s] 31%|███▏ | 116256/371472 [9:15:08<20:31:50, 3.45it/s] 31%|███▏ | 116257/371472 [9:15:08<20:53:50, 3.39it/s] 31%|███▏ | 116258/371472 [9:15:09<20:53:59, 3.39it/s] 31%|███▏ | 116259/371472 [9:15:09<20:47:46, 3.41it/s] 31%|███▏ | 116260/371472 [9:15:09<20:51:34, 3.40it/s] {'loss': 3.3035, 'learning_rate': 7.186590962959771e-07, 'epoch': 5.01} + 31%|███▏ | 116260/371472 [9:15:09<20:51:34, 3.40it/s] 31%|███▏ | 116261/371472 [9:15:09<20:02:55, 3.54it/s] 31%|███▏ | 116262/371472 [9:15:10<20:35:09, 3.44it/s] 31%|███▏ | 116263/371472 [9:15:10<19:56:53, 3.55it/s] 31%|███▏ | 116264/371472 [9:15:10<19:12:56, 3.69it/s] 31%|███▏ | 116265/371472 [9:15:11<19:08:01, 3.71it/s] 31%|███▏ | 116266/371472 [9:15:11<19:21:38, 3.66it/s] 31%|███▏ | 116267/371472 [9:15:11<19:17:02, 3.68it/s] 31%|███▏ | 116268/371472 [9:15:11<19:00:57, 3.73it/s] 31%|███▏ | 116269/371472 [9:15:12<19:46:22, 3.59it/s] 31%|███▏ | 116270/371472 [9:15:12<22:02:42, 3.22it/s] 31%|███▏ | 116271/371472 [9:15:12<22:20:10, 3.17it/s] 31%|███▏ | 116272/371472 [9:15:13<22:12:49, 3.19it/s] 31%|███▏ | 116273/371472 [9:15:13<21:47:29, 3.25it/s] 31%|███▏ | 116274/371472 [9:15:13<21:27:13, 3.30it/s] 31%|███▏ | 116275/371472 [9:15:14<21:14:08, 3.34it/s] 31%|███▏ | 116276/371472 [9:15:14<22:39:33, 3.13it/s] 31%|███▏ | 116277/371472 [9:15:14<21:01:36, 3.37it/s] 31%|███▏ | 116278/371472 [9:15:14<19:53:15, 3.56it/s] 31%|███▏ | 116279/371472 [9:15:15<20:11:40, 3.51it/s] 31%|███▏ | 116280/371472 [9:15:15<20:50:38, 3.40it/s] {'loss': 3.0551, 'learning_rate': 7.186106143204982e-07, 'epoch': 5.01} + 31%|███▏ | 116280/371472 [9:15:15<20:50:38, 3.40it/s] 31%|███▏ | 116281/371472 [9:15:15<21:04:43, 3.36it/s] 31%|███▏ | 116282/371472 [9:15:16<20:24:16, 3.47it/s] 31%|███▏ | 116283/371472 [9:15:16<20:48:44, 3.41it/s] 31%|███▏ | 116284/371472 [9:15:16<20:07:56, 3.52it/s] 31%|███▏ | 116285/371472 [9:15:17<21:02:57, 3.37it/s] 31%|███▏ | 116286/371472 [9:15:17<20:37:59, 3.44it/s] 31%|███▏ | 116287/371472 [9:15:17<22:36:51, 3.13it/s] 31%|███▏ | 116288/371472 [9:15:17<22:31:30, 3.15it/s] 31%|███▏ | 116289/371472 [9:15:18<22:08:13, 3.20it/s] 31%|███▏ | 116290/371472 [9:15:18<21:12:58, 3.34it/s] 31%|███▏ | 116291/371472 [9:15:18<21:30:25, 3.30it/s] 31%|███▏ | 116292/371472 [9:15:19<21:13:08, 3.34it/s] \ No newline at end of file